summaryrefslogtreecommitdiff
path: root/src/jit
diff options
context:
space:
mode:
Diffstat (limited to 'src/jit')
-rw-r--r--src/jit/.clang-format80
-rw-r--r--src/jit/.gitmirror1
-rw-r--r--src/jit/CMakeLists.txt212
-rw-r--r--src/jit/ClrJit.PAL.exports3
-rw-r--r--src/jit/ClrJit.exports3
-rw-r--r--src/jit/DIRS.proj50
-rw-r--r--src/jit/Native.rc8
-rwxr-xr-xsrc/jit/_typeinfo.h764
-rw-r--r--src/jit/alloc.cpp590
-rw-r--r--src/jit/alloc.h99
-rw-r--r--src/jit/arraystack.h146
-rw-r--r--src/jit/assertionprop.cpp5142
-rw-r--r--src/jit/bitset.cpp185
-rw-r--r--src/jit/bitset.h452
-rw-r--r--src/jit/bitsetasshortlong.h792
-rw-r--r--src/jit/bitsetasuint64.h236
-rw-r--r--src/jit/bitsetasuint64inclass.h500
-rw-r--r--src/jit/bitsetops.h34
-rw-r--r--src/jit/bitvec.h56
-rw-r--r--src/jit/block.cpp771
-rw-r--r--src/jit/block.h1313
-rw-r--r--src/jit/blockset.h77
-rwxr-xr-xsrc/jit/codegen.h967
-rw-r--r--src/jit/codegenarm.cpp2106
-rw-r--r--src/jit/codegenarm64.cpp9723
-rw-r--r--src/jit/codegenclassic.h606
-rwxr-xr-xsrc/jit/codegencommon.cpp11779
-rw-r--r--src/jit/codegeninterface.h440
-rw-r--r--src/jit/codegenlegacy.cpp22057
-rw-r--r--src/jit/codegenlinear.h224
-rw-r--r--src/jit/codegenxarch.cpp9388
-rw-r--r--src/jit/compiler.cpp10380
-rw-r--r--src/jit/compiler.h9301
-rw-r--r--src/jit/compiler.hpp4742
-rw-r--r--src/jit/compilerbitsettraits.h130
-rw-r--r--src/jit/compilerbitsettraits.hpp181
-rw-r--r--src/jit/compmemkind.h56
-rw-r--r--src/jit/compphases.h91
-rw-r--r--src/jit/conventions.txt81
-rw-r--r--src/jit/copyprop.cpp463
-rw-r--r--src/jit/cpp.hint27
-rw-r--r--src/jit/crossgen/.gitmirror1
-rw-r--r--src/jit/crossgen/CMakeLists.txt7
-rw-r--r--src/jit/crossgen/jit_crossgen.nativeproj20
-rw-r--r--src/jit/dataflow.h81
-rw-r--r--src/jit/decomposelongs.cpp1028
-rw-r--r--src/jit/decomposelongs.h67
-rw-r--r--src/jit/delayload.cpp10
-rw-r--r--src/jit/disasm.cpp1568
-rw-r--r--src/jit/disasm.h226
-rw-r--r--src/jit/dll/.gitmirror1
-rw-r--r--src/jit/dll/CMakeLists.txt35
-rw-r--r--src/jit/dll/clrjit.def7
-rw-r--r--src/jit/dll/jit.nativeproj84
-rw-r--r--src/jit/earlyprop.cpp671
-rwxr-xr-xsrc/jit/ee_il_dll.cpp1552
-rw-r--r--src/jit/ee_il_dll.hpp204
-rw-r--r--src/jit/eeinterface.cpp212
-rw-r--r--src/jit/emit.cpp7158
-rw-r--r--src/jit/emit.h2742
-rw-r--r--src/jit/emitarm.cpp7623
-rw-r--r--src/jit/emitarm.h414
-rw-r--r--src/jit/emitarm64.cpp11167
-rw-r--r--src/jit/emitarm64.h909
-rw-r--r--src/jit/emitdef.h22
-rw-r--r--src/jit/emitfmts.h14
-rw-r--r--src/jit/emitfmtsarm.h153
-rw-r--r--src/jit/emitfmtsarm64.h210
-rw-r--r--src/jit/emitfmtsxarch.h240
-rw-r--r--src/jit/emitinl.h508
-rw-r--r--src/jit/emitjmps.h58
-rw-r--r--src/jit/emitpub.h162
-rw-r--r--src/jit/emitxarch.cpp11398
-rw-r--r--src/jit/emitxarch.h437
-rw-r--r--src/jit/error.cpp536
-rw-r--r--src/jit/error.h295
-rw-r--r--src/jit/flowgraph.cpp22276
-rw-r--r--src/jit/fp.h73
-rw-r--r--src/jit/gcdecode.cpp15
-rw-r--r--src/jit/gcencode.cpp4725
-rw-r--r--src/jit/gcinfo.cpp867
-rw-r--r--src/jit/gentree.cpp16748
-rw-r--r--src/jit/gentree.h5124
-rw-r--r--src/jit/gschecks.cpp583
-rw-r--r--src/jit/gtlist.h255
-rw-r--r--src/jit/gtstructs.h112
-rw-r--r--src/jit/hashbv.cpp2028
-rw-r--r--src/jit/hashbv.h363
-rw-r--r--src/jit/host.h68
-rw-r--r--src/jit/hostallocator.cpp40
-rw-r--r--src/jit/hostallocator.h22
-rw-r--r--src/jit/importer.cpp17997
-rw-r--r--src/jit/inline.cpp1640
-rw-r--r--src/jit/inline.def176
-rw-r--r--src/jit/inline.h894
-rw-r--r--src/jit/inlinepolicy.cpp2857
-rw-r--r--src/jit/inlinepolicy.h479
-rw-r--r--src/jit/instr.cpp4086
-rw-r--r--src/jit/instr.h301
-rw-r--r--src/jit/instrs.h13
-rw-r--r--src/jit/instrsarm.h557
-rw-r--r--src/jit/instrsarm64.h954
-rw-r--r--src/jit/instrsxarch.h540
-rw-r--r--src/jit/jit.h891
-rw-r--r--src/jit/jit.settings.targets136
-rw-r--r--src/jit/jitconfig.cpp344
-rw-r--r--src/jit/jitconfig.h97
-rw-r--r--src/jit/jitconfigvalues.h255
-rw-r--r--src/jit/jiteh.cpp4056
-rw-r--r--src/jit/jiteh.h180
-rw-r--r--src/jit/jitgcinfo.h452
-rw-r--r--src/jit/jitpch.cpp6
-rw-r--r--src/jit/jitpch.h36
-rw-r--r--src/jit/jitstd.h10
-rw-r--r--src/jit/jitstd/.gitmirror1
-rw-r--r--src/jit/jitstd/algorithm.h49
-rw-r--r--src/jit/jitstd/allocator.h211
-rw-r--r--src/jit/jitstd/functional.h62
-rw-r--r--src/jit/jitstd/hash.h103
-rw-r--r--src/jit/jitstd/hashtable.h822
-rw-r--r--src/jit/jitstd/iterator.h144
-rw-r--r--src/jit/jitstd/jitstd.cpp34
-rw-r--r--src/jit/jitstd/jitstd.sln20
-rw-r--r--src/jit/jitstd/jitstd.vcxproj103
-rw-r--r--src/jit/jitstd/list.h1243
-rw-r--r--src/jit/jitstd/new.h16
-rw-r--r--src/jit/jitstd/pair.h57
-rw-r--r--src/jit/jitstd/stdafx.cpp14
-rw-r--r--src/jit/jitstd/stdafx.h20
-rw-r--r--src/jit/jitstd/targetver.h14
-rw-r--r--src/jit/jitstd/type_traits.h196
-rw-r--r--src/jit/jitstd/unordered_map.h179
-rw-r--r--src/jit/jitstd/unordered_set.h156
-rw-r--r--src/jit/jitstd/utility.h108
-rw-r--r--src/jit/jitstd/vector.h1254
-rw-r--r--src/jit/jittelemetry.cpp390
-rw-r--r--src/jit/jittelemetry.h78
-rw-r--r--src/jit/lclvars.cpp6788
-rw-r--r--src/jit/lir.cpp1640
-rw-r--r--src/jit/lir.h310
-rw-r--r--src/jit/liveness.cpp3133
-rw-r--r--src/jit/loopcloning.cpp845
-rw-r--r--src/jit/loopcloning.h667
-rw-r--r--src/jit/loopcloningopts.h16
-rw-r--r--src/jit/lower.cpp4196
-rw-r--r--src/jit/lower.h280
-rw-r--r--src/jit/lowerarm.cpp71
-rw-r--r--src/jit/lowerarm64.cpp2063
-rw-r--r--src/jit/lowerxarch.cpp4192
-rw-r--r--src/jit/lsra.cpp11578
-rw-r--r--src/jit/lsra.h1608
-rw-r--r--src/jit/lsra_reftypes.h23
-rw-r--r--src/jit/morph.cpp18245
-rw-r--r--src/jit/nodeinfo.h161
-rw-r--r--src/jit/objectalloc.cpp207
-rw-r--r--src/jit/objectalloc.h82
-rw-r--r--src/jit/opcode.h29
-rw-r--r--src/jit/optcse.cpp2582
-rw-r--r--src/jit/optimizer.cpp8540
-rw-r--r--src/jit/phase.h77
-rw-r--r--src/jit/protojit/.gitmirror1
-rw-r--r--src/jit/protojit/CMakeLists.txt51
-rw-r--r--src/jit/protojit/SOURCES10
-rw-r--r--src/jit/protojit/makefile7
-rw-r--r--src/jit/protojit/protojit.def7
-rw-r--r--src/jit/protojit/protojit.nativeproj88
-rw-r--r--src/jit/rangecheck.cpp1388
-rw-r--r--src/jit/rangecheck.h603
-rw-r--r--src/jit/rationalize.cpp1056
-rw-r--r--src/jit/rationalize.h67
-rw-r--r--src/jit/regalloc.cpp6841
-rw-r--r--src/jit/regalloc.h111
-rw-r--r--src/jit/register.h124
-rw-r--r--src/jit/register_arg_convention.cpp123
-rw-r--r--src/jit/register_arg_convention.h111
-rw-r--r--src/jit/registerarm.h86
-rw-r--r--src/jit/registerarm64.h114
-rw-r--r--src/jit/registerfp.cpp1522
-rw-r--r--src/jit/registerfp.h26
-rw-r--r--src/jit/registerxmm.h48
-rw-r--r--src/jit/reglist.h18
-rw-r--r--src/jit/regpair.h357
-rw-r--r--src/jit/regset.cpp3777
-rw-r--r--src/jit/regset.h460
-rw-r--r--src/jit/scopeinfo.cpp1271
-rw-r--r--src/jit/sharedfloat.cpp498
-rw-r--r--src/jit/sideeffects.cpp549
-rw-r--r--src/jit/sideeffects.h158
-rw-r--r--src/jit/simd.cpp2556
-rw-r--r--src/jit/simd.h43
-rw-r--r--src/jit/simdcodegenxarch.cpp2143
-rw-r--r--src/jit/simdintrinsiclist.h145
-rw-r--r--src/jit/sm.cpp190
-rw-r--r--src/jit/sm.h75
-rw-r--r--src/jit/smallhash.h592
-rw-r--r--src/jit/smcommon.cpp166
-rw-r--r--src/jit/smcommon.h50
-rw-r--r--src/jit/smdata.cpp705
-rw-r--r--src/jit/smopcode.def205
-rw-r--r--src/jit/smopcodemap.def323
-rw-r--r--src/jit/smopenum.h17
-rw-r--r--src/jit/smweights.cpp274
-rw-r--r--src/jit/ssabuilder.cpp1903
-rw-r--r--src/jit/ssabuilder.h212
-rw-r--r--src/jit/ssaconfig.h49
-rw-r--r--src/jit/ssarenamestate.cpp244
-rw-r--r--src/jit/ssarenamestate.h129
-rw-r--r--src/jit/stackfp.cpp4494
-rw-r--r--src/jit/standalone/.gitmirror1
-rw-r--r--src/jit/standalone/CMakeLists.txt58
-rw-r--r--src/jit/target.h2320
-rw-r--r--src/jit/targetamd64.cpp19
-rw-r--r--src/jit/targetarm.cpp19
-rw-r--r--src/jit/targetarm64.cpp19
-rw-r--r--src/jit/targetx86.cpp19
-rw-r--r--src/jit/tinyarray.h79
-rw-r--r--src/jit/titypes.h15
-rw-r--r--src/jit/typeinfo.cpp405
-rw-r--r--src/jit/typelist.h81
-rw-r--r--src/jit/unwind.cpp171
-rw-r--r--src/jit/unwind.h852
-rw-r--r--src/jit/unwindamd64.cpp1056
-rw-r--r--src/jit/unwindarm.cpp2320
-rw-r--r--src/jit/unwindarm64.cpp802
-rw-r--r--src/jit/utils.cpp1767
-rw-r--r--src/jit/utils.h710
-rw-r--r--src/jit/valuenum.cpp7518
-rw-r--r--src/jit/valuenum.h1378
-rw-r--r--src/jit/valuenumfuncs.h141
-rw-r--r--src/jit/valuenumtype.h101
-rw-r--r--src/jit/varset.h211
-rw-r--r--src/jit/vartype.h285
-rw-r--r--src/jit/x86_instrs.h10
233 files changed, 361283 insertions, 0 deletions
diff --git a/src/jit/.clang-format b/src/jit/.clang-format
new file mode 100644
index 0000000000..1e3930f737
--- /dev/null
+++ b/src/jit/.clang-format
@@ -0,0 +1,80 @@
+---
+Language: Cpp
+AccessModifierOffset: -4
+AlignAfterOpenBracket: Align
+AlignConsecutiveAssignments: true
+AlignConsecutiveDeclarations: true
+AlignEscapedNewlinesLeft: false
+AlignOperands: true
+AlignTrailingComments: true
+AllowAllParametersOfDeclarationOnNextLine: true
+AllowShortBlocksOnASingleLine: false
+AllowShortCaseLabelsOnASingleLine: false
+AllowShortFunctionsOnASingleLine: Empty
+AllowShortIfStatementsOnASingleLine: false
+AllowShortLoopsOnASingleLine: false
+AlwaysBreakAfterDefinitionReturnType: None
+AlwaysBreakBeforeMultilineStrings: false
+AlwaysBreakTemplateDeclarations: true
+BinPackArguments: true
+BinPackParameters: false
+BraceWrapping:
+ AfterClass: true
+ AfterControlStatement: true
+ AfterEnum: false
+ AfterFunction: true
+ AfterNamespace: false
+ AfterObjCDeclaration: false
+ AfterStruct: true
+ AfterUnion: true
+ BeforeCatch: true
+ BeforeElse: true
+ IndentBraces: false
+BreakBeforeBinaryOperators: None
+BreakBeforeBraces: Allman
+BreakBeforeTernaryOperators: true
+BreakConstructorInitializersBeforeComma: true
+ColumnLimit: 120
+CommentPragmas: '^ IWYU pragma:'
+ConstructorInitializerAllOnOneLineOrOnePerLine: true
+ConstructorInitializerIndentWidth: 4
+ContinuationIndentWidth: 4
+Cpp11BracedListStyle: true
+DerivePointerAlignment: false
+DisableFormat: false
+ExperimentalAutoDetectBinPacking: false
+ForEachMacros: [ ]
+IndentCaseLabels: true
+IndentWidth: 4
+IndentWrappedFunctionNames: false
+KeepEmptyLinesAtTheStartOfBlocks: true
+MacroBlockBegin: ''
+MacroBlockEnd: ''
+MaxEmptyLinesToKeep: 1
+NamespaceIndentation: None
+ObjCBlockIndentWidth: 2
+ObjCSpaceAfterProperty: false
+ObjCSpaceBeforeProtocolList: true
+PenaltyBreakBeforeFirstCallParameter: 400
+PenaltyBreakComment: 50
+PenaltyBreakFirstLessLess: 500
+PenaltyBreakString: 1000
+PenaltyExcessCharacter: 1000000
+PenaltyReturnTypeOnItsOwnLine: 100000
+PointerAlignment: Left
+ReflowComments: true
+SortIncludes: false
+SpaceAfterCStyleCast: false
+SpaceBeforeAssignmentOperators: true
+SpaceBeforeParens: ControlStatements
+SpaceInEmptyParentheses: false
+SpacesBeforeTrailingComments: 1
+SpacesInAngles: false
+SpacesInContainerLiterals: true
+SpacesInCStyleCastParentheses: false
+SpacesInParentheses: false
+SpacesInSquareBrackets: false
+Standard: Cpp11
+TabWidth: 4
+UseTab: Never
+...
diff --git a/src/jit/.gitmirror b/src/jit/.gitmirror
new file mode 100644
index 0000000000..f507630f94
--- /dev/null
+++ b/src/jit/.gitmirror
@@ -0,0 +1 @@
+Only contents of this folder, excluding subfolders, will be mirrored by the Git-TFS Mirror. \ No newline at end of file
diff --git a/src/jit/CMakeLists.txt b/src/jit/CMakeLists.txt
new file mode 100644
index 0000000000..6372e37852
--- /dev/null
+++ b/src/jit/CMakeLists.txt
@@ -0,0 +1,212 @@
+set(CMAKE_INCLUDE_CURRENT_DIR ON)
+set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
+
+include_directories("./jitstd")
+include_directories("../inc")
+
+# Enable the following for UNIX altjit on Windows
+# add_definitions(-DALT_JIT)
+
+if (CLR_CMAKE_TARGET_ARCH_AMD64)
+ add_definitions(-DFEATURE_SIMD)
+ add_definitions(-DFEATURE_AVX_SUPPORT)
+endif ()
+
+
+if(WIN32)
+ set(JIT_RESOURCES Native.rc)
+endif(WIN32)
+
+set( JIT_SOURCES
+ alloc.cpp
+ assertionprop.cpp
+ bitset.cpp
+ block.cpp
+ codegencommon.cpp
+ compiler.cpp
+ copyprop.cpp
+ disasm.cpp
+ earlyprop.cpp
+ ee_il_dll.cpp
+ eeinterface.cpp
+ emit.cpp
+ error.cpp
+ flowgraph.cpp
+ gcdecode.cpp
+ gcencode.cpp
+ gcinfo.cpp
+ gentree.cpp
+ gschecks.cpp
+ hashbv.cpp
+ hostallocator.cpp
+ importer.cpp
+ inline.cpp
+ inlinepolicy.cpp
+ instr.cpp
+ jitconfig.cpp
+ jiteh.cpp
+ jittelemetry.cpp
+ lclvars.cpp
+ lir.cpp
+ liveness.cpp
+ loopcloning.cpp
+ lower.cpp
+ lsra.cpp
+ morph.cpp
+ objectalloc.cpp
+ optcse.cpp
+ optimizer.cpp
+ rangecheck.cpp
+ rationalize.cpp
+ regalloc.cpp
+ register_arg_convention.cpp
+ regset.cpp
+ scopeinfo.cpp
+ sharedfloat.cpp
+ sideeffects.cpp
+ sm.cpp
+ smdata.cpp
+ smweights.cpp
+ ssabuilder.cpp
+ ssarenamestate.cpp
+ typeinfo.cpp
+ unwind.cpp
+ utils.cpp
+ valuenum.cpp
+)
+
+if(CLR_CMAKE_TARGET_ARCH_AMD64)
+ set( ARCH_SOURCES
+ codegenxarch.cpp
+ emitxarch.cpp
+ lowerxarch.cpp
+ simd.cpp
+ simdcodegenxarch.cpp
+ targetamd64.cpp
+ unwindamd64.cpp
+ )
+elseif(CLR_CMAKE_TARGET_ARCH_ARM)
+ set( ARCH_SOURCES
+ codegenarm.cpp
+ decomposelongs.cpp
+ emitarm.cpp
+ lowerarm.cpp
+ targetarm.cpp
+ unwindarm.cpp
+ )
+elseif(CLR_CMAKE_TARGET_ARCH_I386)
+ set( ARCH_SOURCES
+ codegenxarch.cpp
+ decomposelongs.cpp
+ emitxarch.cpp
+ lowerxarch.cpp
+ simd.cpp
+ simdcodegenxarch.cpp
+ targetx86.cpp
+ )
+elseif(CLR_CMAKE_TARGET_ARCH_ARM64)
+ set( ARCH_SOURCES
+ codegenarm64.cpp
+ emitarm64.cpp
+ lowerarm64.cpp
+ targetarm64.cpp
+ unwindarm.cpp
+ unwindarm64.cpp
+ )
+else()
+ clr_unknown_arch()
+endif()
+
+# The following defines all the source files used by the "legacy" back-end (#ifdef LEGACY_BACKEND).
+# It is always safe to include both legacy and non-legacy files in the build, as everything is properly
+# #ifdef'ed, though it makes the build slightly slower to do so. Note there is only a legacy backend for
+# x86 and ARM.
+
+if(CLR_CMAKE_TARGET_ARCH_AMD64)
+ set( ARCH_LEGACY_SOURCES
+ )
+elseif(CLR_CMAKE_TARGET_ARCH_ARM)
+ set( ARCH_LEGACY_SOURCES
+ codegenlegacy.cpp
+ registerfp.cpp
+ )
+elseif(CLR_CMAKE_TARGET_ARCH_I386)
+ set( ARCH_LEGACY_SOURCES
+ codegenlegacy.cpp
+ stackfp.cpp
+ )
+elseif(CLR_CMAKE_TARGET_ARCH_ARM64)
+ set( ARCH_LEGACY_SOURCES
+ )
+else()
+ clr_unknown_arch()
+endif()
+
+set( SOURCES
+ ${JIT_SOURCES}
+ ${ARCH_SOURCES}
+ ${ARCH_LEGACY_SOURCES}
+ ${JIT_RESOURCES}
+)
+
+convert_to_absolute_path(SOURCES ${SOURCES})
+
+if(WIN32)
+ add_precompiled_header(jitpch.h ../jitpch.cpp SOURCES)
+
+ # Create .def file containing a list of exports preceeded by
+ # 'EXPORTS'. The file "ClrJit.exports" already contains the list, so we
+ # massage it into the correct format here to create "ClrJit.exports.def".
+ set(JIT_EXPORTS_FILE ${CMAKE_CURRENT_BINARY_DIR}/ClrJit.exports.def)
+ set(JIT_EXPORTS_FILE_TEMP ${JIT_EXPORTS_FILE}.txt)
+ file(READ "ClrJit.exports" exports_list)
+ file(WRITE ${JIT_EXPORTS_FILE_TEMP} "LIBRARY CLRJIT\n")
+ file(APPEND ${JIT_EXPORTS_FILE_TEMP} "EXPORTS\n")
+ file(APPEND ${JIT_EXPORTS_FILE_TEMP} ${exports_list})
+
+ # Copy the file only if it has changed.
+ execute_process(COMMAND ${CMAKE_COMMAND} -E copy_if_different
+ ${JIT_EXPORTS_FILE_TEMP} ${JIT_EXPORTS_FILE})
+
+ set(SHARED_LIB_SOURCES ${SOURCES} ${JIT_EXPORTS_FILE})
+else()
+ set(JIT_EXPORTS_IN_FILE ${CMAKE_CURRENT_BINARY_DIR}/clrjit.exports.in)
+ file(READ "${CMAKE_CURRENT_LIST_DIR}/ClrJit.exports" jit_exports)
+ file(READ "${CMAKE_CURRENT_LIST_DIR}/ClrJit.PAL.exports" pal_exports)
+ file(WRITE ${JIT_EXPORTS_IN_FILE} ${jit_exports})
+ file(APPEND ${JIT_EXPORTS_IN_FILE} "\n")
+ file(APPEND ${JIT_EXPORTS_IN_FILE} ${pal_exports})
+
+ set(JIT_EXPORTS_FILE ${CMAKE_CURRENT_BINARY_DIR}/clrjit.exports)
+ generate_exports_file(${JIT_EXPORTS_IN_FILE} ${JIT_EXPORTS_FILE})
+
+ if(CMAKE_SYSTEM_NAME STREQUAL Linux OR CMAKE_SYSTEM_NAME STREQUAL FreeBSD OR CMAKE_SYSTEM_NAME STREQUAL NetBSD)
+ # This is required to force using our own PAL, not one that we are loaded with.
+ set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Xlinker -Bsymbolic -Bsymbolic-functions")
+
+ set(JIT_EXPORTS_LINKER_OPTION -Wl,--version-script=${JIT_EXPORTS_FILE})
+ elseif(CMAKE_SYSTEM_NAME STREQUAL Darwin)
+ set(JIT_EXPORTS_LINKER_OPTION -Wl,-exported_symbols_list,${JIT_EXPORTS_FILE})
+ endif()
+
+ set(SHARED_LIB_SOURCES ${SOURCES})
+endif()
+
+add_custom_target(jit_exports DEPENDS ${JIT_EXPORTS_FILE})
+
+set(JIT_BASE_NAME clrjit)
+if (CLR_BUILD_JIT32)
+ set(JIT_BASE_NAME ryujit)
+endif()
+
+if(WIN32)
+ add_definitions(-DFX_VER_INTERNALNAME_STR=${JIT_BASE_NAME}.dll)
+endif(WIN32)
+
+add_subdirectory(dll)
+add_subdirectory(crossgen)
+add_subdirectory(standalone)
+
+if (CLR_CMAKE_PLATFORM_ARCH_I386 OR CLR_CMAKE_PLATFORM_ARCH_ARM)
+ add_subdirectory(protojit)
+endif (CLR_CMAKE_PLATFORM_ARCH_I386 OR CLR_CMAKE_PLATFORM_ARCH_ARM)
diff --git a/src/jit/ClrJit.PAL.exports b/src/jit/ClrJit.PAL.exports
new file mode 100644
index 0000000000..c6b4e8ec57
--- /dev/null
+++ b/src/jit/ClrJit.PAL.exports
@@ -0,0 +1,3 @@
+DllMain
+PAL_RegisterModule
+PAL_UnregisterModule
diff --git a/src/jit/ClrJit.exports b/src/jit/ClrJit.exports
new file mode 100644
index 0000000000..0126e63b4d
--- /dev/null
+++ b/src/jit/ClrJit.exports
@@ -0,0 +1,3 @@
+getJit
+jitStartup
+sxsJitStartup
diff --git a/src/jit/DIRS.proj b/src/jit/DIRS.proj
new file mode 100644
index 0000000000..6d1c06d3f0
--- /dev/null
+++ b/src/jit/DIRS.proj
@@ -0,0 +1,50 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003" ToolsVersion="dogfood">
+ <!--Import the settings-->
+ <Import Project="$(_NTDRIVE)$(_NTROOT)\ndp\clr\clr.props" />
+
+ <PropertyGroup>
+ <BuildInPhase1>true</BuildInPhase1>
+ <BuildInPhaseDefault>false</BuildInPhaseDefault>
+ <BuildCoreBinaries>true</BuildCoreBinaries>
+ </PropertyGroup>
+
+ <PropertyGroup Condition="'$(BuildProjectName)' != 'CoreSys' and '$(BuildArchitecture)' == 'amd64'">
+ <BuildSysBinaries>false</BuildSysBinaries>
+ </PropertyGroup>
+
+ <PropertyGroup Condition="'$(BuildProjectName)' == 'CoreSys' or '$(BuildArchitecture)' != 'amd64'">
+ <BuildSysBinaries>true</BuildSysBinaries>
+ </PropertyGroup>
+
+ <ItemGroup Condition="'$(BuildExePhase)' == '1'">
+ <!-- x86 and ARM clrjit.dll are built in the JIT32 directory; we build FrankenJit here -->
+ <ProjectFile Condition="'$(BuildArchitecture)' != 'i386' and '$(BuildArchitecture)' != 'arm'" Include="dll\jit.nativeproj" />
+ </ItemGroup>
+
+ <!-- Only the main JIT gets built for CoreSys. The other jits (e.g., altjits) do not. -->
+ <ItemGroup Condition="'$(BuildExePhase)' == '1' and '$(BuildProjectName)' != 'CoreSys'">
+
+ <!-- Build the "FrankenJit" (RyuJIT front-end, legacy back-end) and "FrankenAltjit". These can't conflict with the names of the JIT32 directory outputs. -->
+ <ProjectFile Condition="'$(BuildArchitecture)' == 'i386' or '$(BuildArchitecture)' == 'arm'" Include="frankenjit\frankenjit.nativeproj" />
+ <ProjectFile Condition="'$(BuildArchitecture)' == 'i386'" Include="frankenaltjit\frankenaltjit.nativeproj" />
+
+ <!-- This might be useful, to help make sure JIT devs build all configurations of the JIT (including crossgen), but
+ it appears to cause problems with the build system, and it slows down normal JIT developer productivity by adding a seldom-useful build.
+ <ProjectFile Include="crossgen\jit_crossgen.nativeproj" />
+ -->
+
+ <ProjectFile Condition="'$(BuildArchitecture)' == 'arm'" Include="protojit\protojit.nativeproj" />
+ <ProjectFile Condition="'$(BuildArchitecture)' == 'amd64'" Include="protojit\protojit.nativeproj" />
+ <ProjectFile Condition="'$(BuildArchitecture)' == 'amd64'" Include="ctp\ctpjit.nativeproj" />
+ <ProjectFile Condition="'$(BuildArchitecture)' == 'amd64'" Include="arm64altjit\arm64altjit.nativeproj" />
+ <ProjectFile Condition="'$(BuildArchitecture)' == 'i386'" Include="protojit\protojit.nativeproj" />
+ <ProjectFile Condition="'$(BuildArchitecture)' == 'i386'" Include="protononjit\protononjit.nativeproj" />
+
+ <!-- We could build skipjit for all architectures, but we only need it for x86 currently -->
+ <ProjectFile Condition="'$(BuildArchitecture)' == 'i386'" Include="skipjit\skipjit.nativeproj" />
+ </ItemGroup>
+
+ <!--Import the targets-->
+ <Import Project="$(_NTDRIVE)$(_NTROOT)\tools\Microsoft.DevDiv.Traversal.targets" />
+</Project>
diff --git a/src/jit/Native.rc b/src/jit/Native.rc
new file mode 100644
index 0000000000..9e01bcd6cc
--- /dev/null
+++ b/src/jit/Native.rc
@@ -0,0 +1,8 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+#define FX_VER_FILEDESCRIPTION_STR "Microsoft .NET Runtime Just-In-Time Compiler\0"
+
+#include <fxver.h>
+#include <fxver.rc>
diff --git a/src/jit/_typeinfo.h b/src/jit/_typeinfo.h
new file mode 100755
index 0000000000..08273adc8d
--- /dev/null
+++ b/src/jit/_typeinfo.h
@@ -0,0 +1,764 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX _typeInfo XX
+XX XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+/*****************************************************************************
+ This header file is named _typeInfo.h to be distinguished from typeinfo.h
+ in the NT SDK
+******************************************************************************/
+
+/*****************************************************************************/
+#ifndef _TYPEINFO_H_
+#define _TYPEINFO_H_
+/*****************************************************************************/
+
+enum ti_types
+{
+#define DEF_TI(ti, nm) ti,
+#include "titypes.h"
+#undef DEF_TI
+ TI_ONLY_ENUM = TI_METHOD, // Enum values above this are completely described by the enumeration
+ TI_COUNT
+};
+
+#if defined(_TARGET_64BIT_)
+#define TI_I_IMPL TI_LONG
+#else
+#define TI_I_IMPL TI_INT
+#endif
+
+#ifdef DEBUG
+#if VERBOSE_VERIFY
+#define TI_DUMP_PADDING " "
+#ifdef _MSC_VER
+namespace
+{
+#endif // _MSC_VER
+SELECTANY const char* g_ti_type_names_map[] = {
+#define DEF_TI(ti, nm) nm,
+#include "titypes.h"
+#undef DEF_TI
+};
+#ifdef _MSC_VER
+}
+#endif // _MSC_VER
+#endif // VERBOSE_VERIFY
+#endif // DEBUG
+
+#ifdef _MSC_VER
+namespace
+{
+#endif // _MSC_VER
+SELECTANY const ti_types g_jit_types_map[] = {
+#define DEF_TP(tn, nm, jitType, verType, sz, sze, asze, st, al, tf, howUsed) verType,
+#include "typelist.h"
+#undef DEF_TP
+};
+#ifdef _MSC_VER
+}
+#endif // _MSC_VER
+
+#ifdef DEBUG
+#if VERBOSE_VERIFY
+inline const char* tiType2Str(ti_types type)
+{
+ return g_ti_type_names_map[type];
+}
+#endif // VERBOSE_VERIFY
+#endif // DEBUG
+
+// typeInfo does not care about distinction between signed/unsigned
+// This routine converts all unsigned types to signed ones
+inline ti_types varType2tiType(var_types type)
+{
+ assert(g_jit_types_map[TYP_BYTE] == TI_BYTE);
+ assert(g_jit_types_map[TYP_INT] == TI_INT);
+ assert(g_jit_types_map[TYP_UINT] == TI_INT);
+ assert(g_jit_types_map[TYP_FLOAT] == TI_FLOAT);
+ assert(g_jit_types_map[TYP_BYREF] == TI_ERROR);
+ assert(g_jit_types_map[type] != TI_ERROR);
+ return g_jit_types_map[type];
+}
+
+#ifdef _MSC_VER
+namespace
+{
+#endif // _MSC_VER
+SELECTANY const ti_types g_ti_types_map[CORINFO_TYPE_COUNT] = {
+ // see the definition of enum CorInfoType in file inc/corinfo.h
+ TI_ERROR, // CORINFO_TYPE_UNDEF = 0x0,
+ TI_ERROR, // CORINFO_TYPE_VOID = 0x1,
+ TI_BYTE, // CORINFO_TYPE_BOOL = 0x2,
+ TI_SHORT, // CORINFO_TYPE_CHAR = 0x3,
+ TI_BYTE, // CORINFO_TYPE_BYTE = 0x4,
+ TI_BYTE, // CORINFO_TYPE_UBYTE = 0x5,
+ TI_SHORT, // CORINFO_TYPE_SHORT = 0x6,
+ TI_SHORT, // CORINFO_TYPE_USHORT = 0x7,
+ TI_INT, // CORINFO_TYPE_INT = 0x8,
+ TI_INT, // CORINFO_TYPE_UINT = 0x9,
+ TI_LONG, // CORINFO_TYPE_LONG = 0xa,
+ TI_LONG, // CORINFO_TYPE_ULONG = 0xb,
+ TI_I_IMPL, // CORINFO_TYPE_NATIVEINT = 0xc,
+ TI_I_IMPL, // CORINFO_TYPE_NATIVEUINT = 0xd,
+ TI_FLOAT, // CORINFO_TYPE_FLOAT = 0xe,
+ TI_DOUBLE, // CORINFO_TYPE_DOUBLE = 0xf,
+ TI_REF, // CORINFO_TYPE_STRING = 0x10,
+ TI_ERROR, // CORINFO_TYPE_PTR = 0x11,
+ TI_ERROR, // CORINFO_TYPE_BYREF = 0x12,
+ TI_STRUCT, // CORINFO_TYPE_VALUECLASS = 0x13,
+ TI_REF, // CORINFO_TYPE_CLASS = 0x14,
+ TI_STRUCT, // CORINFO_TYPE_REFANY = 0x15,
+ TI_REF, // CORINFO_TYPE_VAR = 0x16,
+};
+#ifdef _MSC_VER
+}
+#endif // _MSC_VER
+
+// Convert the type returned from the VM to a ti_type.
+
+inline ti_types JITtype2tiType(CorInfoType type)
+{
+ // spot check to make certain enumerations have not changed
+
+ assert(g_ti_types_map[CORINFO_TYPE_CLASS] == TI_REF);
+ assert(g_ti_types_map[CORINFO_TYPE_BYREF] == TI_ERROR);
+ assert(g_ti_types_map[CORINFO_TYPE_DOUBLE] == TI_DOUBLE);
+ assert(g_ti_types_map[CORINFO_TYPE_VALUECLASS] == TI_STRUCT);
+ assert(g_ti_types_map[CORINFO_TYPE_STRING] == TI_REF);
+
+ type = CorInfoType(type & CORINFO_TYPE_MASK); // strip off modifiers
+
+ assert(type < CORINFO_TYPE_COUNT);
+
+ assert(g_ti_types_map[type] != TI_ERROR || type == CORINFO_TYPE_VOID);
+ return g_ti_types_map[type];
+};
+
+/*****************************************************************************
+ * Declares the typeInfo class, which represents the type of an entity on the
+ * stack, in a local variable or an argument.
+ *
+ * Flags: LLLLLLLLLLLLLLLLffffffffffTTTTTT
+ *
+ * L = local var # or instance field #
+ * x = unused
+ * f = flags
+ * T = type
+ *
+ * The lower bits are used to store the type component, and may be one of:
+ *
+ * TI_* (primitive) - see tyelist.h for enumeration (BYTE, SHORT, INT..)
+ * TI_REF - OBJREF / ARRAY use m_cls for the type
+ * (including arrays and null objref)
+ * TI_STRUCT - VALUE type, use m_cls for the actual type
+ *
+ * NOTE carefully that BYREF info is not stored here. You will never see a
+ * TI_BYREF in this component. For example, the type component
+ * of a "byref TI_INT" is TI_FLAG_BYREF | TI_INT.
+ *
+ * NOTE carefully that Generic Type Variable info is
+ * only stored here in part. Values of type "T" (e.g "!0" in ILASM syntax),
+ * i.e. some generic variable type, appear only when verifying generic
+ * code. They come in two flavours: unboxed and boxed. Unboxed
+ * is the norm, e.g. a local, field or argument of type T. Boxed
+ * values arise from an IL instruction such as "box !0".
+ * The EE provides type handles for each different type
+ * variable and the EE's "canCast" operation decides casting
+ * for boxed type variable. Thus:
+ *
+ * (TI_REF, <type-variable-type-handle>) == boxed type variable
+ *
+ * (TI_REF, <type-variable-type-handle>)
+ * + TI_FLAG_GENERIC_TYPE_VAR == unboxed type variable
+ *
+ * Using TI_REF for these may seem odd but using TI_STRUCT means the
+ * code-generation parts of the importer get confused when they
+ * can't work out the size, GC-ness etc. of the "struct". So using TI_REF
+ * just tricks these backend parts into generating pseudo-trees for
+ * the generic code we're verifying. These trees then get thrown away
+ * anyway as we do verification of genreic code in import-only mode.
+ *
+ */
+
+// TI_COUNT is less than or equal to TI_FLAG_DATA_MASK
+
+#define TI_FLAG_DATA_BITS 6
+#define TI_FLAG_DATA_MASK ((1 << TI_FLAG_DATA_BITS) - 1)
+
+// Flag indicating this item is uninitialized
+// Note that if UNINIT and BYREF are both set,
+// it means byref (uninit x) - i.e. we are pointing to an uninit <something>
+
+#define TI_FLAG_UNINIT_OBJREF 0x00000040
+
+// Flag indicating this item is a byref <something>
+
+#define TI_FLAG_BYREF 0x00000080
+
+// This item is a byref generated using the readonly. prefix
+// to a ldelema or Address function on an array type. The
+// runtime type check is ignored in these cases, but the
+// resulting byref can only be used in order to perform a
+// constraint call.
+
+#define TI_FLAG_BYREF_READONLY 0x00000100
+
+// This item is the MSIL 'I' type which is pointer-sized
+// (different size depending on platform) but which on ALL platforms
+// is implicitly convertible with a 32-bit int but not with a 64-bit one.
+
+// Note: this flag is currently used only in 64-bit systems to annotate
+// native int types. In 32 bits, since you can transparently coalesce int32
+// and native-int and both are the same size, JIT32 had no need to model
+// native-ints as a separate entity. For 64-bit though, since they have
+// different size, it's important to discern between a long and a native int
+// since conversions between them are not verifiable.
+#define TI_FLAG_NATIVE_INT 0x00000200
+
+// This item contains the 'this' pointer (used for tracking)
+
+#define TI_FLAG_THIS_PTR 0x00001000
+
+// This item is a byref to something which has a permanent home
+// (e.g. a static field, or instance field of an object in GC heap, as
+// opposed to the stack or a local variable). TI_FLAG_BYREF must also be
+// set. This information is useful for tail calls and return byrefs.
+//
+// Instructions that generate a permanent home byref:
+//
+// ldelema
+// ldflda of a ref object or another permanent home byref
+// array element address Get() helper
+// call or calli to a method that returns a byref and is verifiable or SkipVerify
+// dup
+// unbox
+
+#define TI_FLAG_BYREF_PERMANENT_HOME 0x00002000
+
+// This is for use when verifying generic code.
+// This indicates that the type handle is really an unboxed
+// generic type variable (e.g. the result of loading an argument
+// of type T in a class List<T>). Without this flag
+// the same type handle indicates a boxed generic value,
+// e.g. the result of a "box T" instruction.
+#define TI_FLAG_GENERIC_TYPE_VAR 0x00004000
+
+// Number of bits local var # is shifted
+
+#define TI_FLAG_LOCAL_VAR_SHIFT 16
+#define TI_FLAG_LOCAL_VAR_MASK 0xFFFF0000
+
+// Field info uses the same space as the local info
+
+#define TI_FLAG_FIELD_SHIFT TI_FLAG_LOCAL_VAR_SHIFT
+#define TI_FLAG_FIELD_MASK TI_FLAG_LOCAL_VAR_MASK
+
+#define TI_ALL_BYREF_FLAGS (TI_FLAG_BYREF | TI_FLAG_BYREF_READONLY | TI_FLAG_BYREF_PERMANENT_HOME)
+
+/*****************************************************************************
+ * A typeInfo can be one of several types:
+ * - A primitive type (I4,I8,R4,R8,I)
+ * - A type (ref, array, value type) (m_cls describes the type)
+ * - An array (m_cls describes the array type)
+ * - A byref (byref flag set, otherwise the same as the above),
+ * - A Function Pointer (m_method)
+ * - A byref local variable (byref and byref local flags set), can be
+ * uninitialized
+ *
+ * The reason that there can be 2 types of byrefs (general byrefs, and byref
+ * locals) is that byref locals initially point to uninitialized items.
+ * Therefore these byrefs must be tracked specialy.
+ */
+
+class typeInfo
+{
+
+private:
+ union {
+ struct
+ {
+ ti_types type : 6;
+ unsigned uninitobj : 1; // used
+ unsigned byref : 1; // used
+ unsigned byref_readonly : 1; // used
+ unsigned nativeInt : 1; // used
+ unsigned : 2; // unused
+ unsigned thisPtr : 1; // used
+ unsigned thisPermHome : 1; // used
+ unsigned generic_type_var : 1; // used
+ } m_bits;
+
+ DWORD m_flags;
+ };
+
+ union {
+ CORINFO_CLASS_HANDLE m_cls;
+ // Valid only for type TI_METHOD
+ CORINFO_METHOD_HANDLE m_method;
+ };
+
+ template <typename T>
+ static bool isInvalidHandle(const T handle)
+ {
+ static_assert(std::is_same<T, CORINFO_CLASS_HANDLE>::value || std::is_same<T, CORINFO_METHOD_HANDLE>::value,
+ "");
+#ifdef _HOST_64BIT_
+ return handle == reinterpret_cast<T>(0xcccccccccccccccc);
+#else
+ return handle == reinterpret_cast<T>(0xcccccccc);
+#endif
+ }
+
+public:
+ typeInfo() : m_flags(TI_ERROR)
+ {
+ m_cls = NO_CLASS_HANDLE;
+ }
+
+ typeInfo(ti_types tiType)
+ {
+ assert((tiType >= TI_BYTE) && (tiType <= TI_NULL));
+ assert(tiType <= TI_FLAG_DATA_MASK);
+
+ m_flags = (DWORD)tiType;
+ m_cls = NO_CLASS_HANDLE;
+ }
+
+ typeInfo(var_types varType)
+ {
+ m_flags = (DWORD)varType2tiType(varType);
+ m_cls = NO_CLASS_HANDLE;
+ }
+
+ static typeInfo nativeInt()
+ {
+ typeInfo result = typeInfo(TI_I_IMPL);
+#ifdef _TARGET_64BIT_
+ result.m_flags |= TI_FLAG_NATIVE_INT;
+#endif
+ return result;
+ }
+
+ typeInfo(ti_types tiType, CORINFO_CLASS_HANDLE cls, bool typeVar = false)
+ {
+ assert(tiType == TI_STRUCT || tiType == TI_REF);
+ assert(cls != nullptr && !isInvalidHandle(cls));
+ m_flags = tiType;
+ if (typeVar)
+ {
+ m_flags |= TI_FLAG_GENERIC_TYPE_VAR;
+ }
+ m_cls = cls;
+ }
+
+ typeInfo(CORINFO_METHOD_HANDLE method)
+ {
+ assert(method != nullptr && !isInvalidHandle(method));
+ m_flags = TI_METHOD;
+ m_method = method;
+ }
+
+#ifdef DEBUG
+#if VERBOSE_VERIFY
+ void Dump() const;
+#endif // VERBOSE_VERIFY
+#endif // DEBUG
+
+public:
+ // Note that we specifically ignore the permanent byref here. The rationale is that
+ // the type system doesn't know about this (it's jit only), ie, signatures don't specify if
+ // a byref is safe, so they are fully equivalent for the jit, except for the RET instruction
+ // , instructions that load safe byrefs and the stack merging logic, which need to know about
+ // the bit
+ static bool AreEquivalent(const typeInfo& li, const typeInfo& ti)
+ {
+ DWORD allFlags = TI_FLAG_DATA_MASK | TI_FLAG_BYREF | TI_FLAG_BYREF_READONLY | TI_FLAG_GENERIC_TYPE_VAR |
+ TI_FLAG_UNINIT_OBJREF;
+#ifdef _TARGET_64BIT_
+ allFlags |= TI_FLAG_NATIVE_INT;
+#endif // _TARGET_64BIT_
+
+ if ((li.m_flags & allFlags) != (ti.m_flags & allFlags))
+ {
+ return false;
+ }
+
+ unsigned type = li.m_flags & TI_FLAG_DATA_MASK;
+ assert(TI_ERROR <
+ TI_ONLY_ENUM); // TI_ERROR looks like it needs more than enum. This optimises the success case a bit
+ if (type > TI_ONLY_ENUM)
+ {
+ return true;
+ }
+ if (type == TI_ERROR)
+ {
+ return false; // TI_ERROR != TI_ERROR
+ }
+ assert(li.m_cls != NO_CLASS_HANDLE && ti.m_cls != NO_CLASS_HANDLE);
+ return li.m_cls == ti.m_cls;
+ }
+
+#ifdef DEBUG
+ // On 64-bit systems, nodes whose "proper" type is "native int" get labeled TYP_LONG.
+ // In the verification type system, we always transform "native int" to "TI_LONG" with the
+ // native int flag set.
+ // Ideally, we would keep track of which nodes labeled "TYP_LONG" are really "native int", but
+ // attempts to do that have proved too difficult. So in situations where we try to compare the
+ // verification type system and the node type system, we use this method, which allows the specific
+ // mismatch where "verTi" is TI_LONG with the native int flag and "nodeTi" is TI_LONG without the
+ // native int flag set.
+ static bool AreEquivalentModuloNativeInt(const typeInfo& verTi, const typeInfo& nodeTi)
+ {
+ if (AreEquivalent(verTi, nodeTi))
+ {
+ return true;
+ }
+#ifdef _TARGET_64BIT_
+ return (nodeTi.IsType(TI_I_IMPL) && tiCompatibleWith(nullptr, verTi, typeInfo::nativeInt(), true)) ||
+ (verTi.IsType(TI_I_IMPL) && tiCompatibleWith(nullptr, typeInfo::nativeInt(), nodeTi, true));
+#else // _TARGET_64BIT_
+ return false;
+#endif // !_TARGET_64BIT_
+ }
+#endif // DEBUG
+
+ static BOOL tiMergeToCommonParent(COMP_HANDLE CompHnd, typeInfo* pDest, const typeInfo* pSrc, bool* changed);
+ static BOOL tiCompatibleWith(COMP_HANDLE CompHnd,
+ const typeInfo& child,
+ const typeInfo& parent,
+ bool normalisedForStack);
+
+ static BOOL tiMergeCompatibleWith(COMP_HANDLE CompHnd,
+ const typeInfo& child,
+ const typeInfo& parent,
+ bool normalisedForStack);
+
+ /////////////////////////////////////////////////////////////////////////
+ // Operations
+ /////////////////////////////////////////////////////////////////////////
+
+ void SetIsThisPtr()
+ {
+ m_flags |= TI_FLAG_THIS_PTR;
+ assert(m_bits.thisPtr);
+ }
+
+ void ClearThisPtr()
+ {
+ m_flags &= ~(TI_FLAG_THIS_PTR);
+ }
+
+ void SetIsPermanentHomeByRef()
+ {
+ assert(IsByRef());
+ m_flags |= TI_FLAG_BYREF_PERMANENT_HOME;
+ }
+
+ void SetIsReadonlyByRef()
+ {
+ assert(IsByRef());
+ m_flags |= TI_FLAG_BYREF_READONLY;
+ }
+
+ // Set that this item is uninitialized.
+ void SetUninitialisedObjRef()
+ {
+ assert((IsObjRef() && IsThisPtr()));
+ // For now, this is used only to track uninit this ptrs in ctors
+
+ m_flags |= TI_FLAG_UNINIT_OBJREF;
+ assert(m_bits.uninitobj);
+ }
+
+ // Set that this item is initialised.
+ void SetInitialisedObjRef()
+ {
+ assert((IsObjRef() && IsThisPtr()));
+ // For now, this is used only to track uninit this ptrs in ctors
+
+ m_flags &= ~TI_FLAG_UNINIT_OBJREF;
+ }
+
+ typeInfo& DereferenceByRef()
+ {
+ if (!IsByRef())
+ {
+ m_flags = TI_ERROR;
+ INDEBUG(m_cls = NO_CLASS_HANDLE);
+ }
+ m_flags &= ~(TI_FLAG_THIS_PTR | TI_ALL_BYREF_FLAGS);
+ return *this;
+ }
+
+ typeInfo& MakeByRef()
+ {
+ assert(!IsByRef());
+ m_flags &= ~(TI_FLAG_THIS_PTR);
+ m_flags |= TI_FLAG_BYREF;
+ return *this;
+ }
+
+ // I1,I2 --> I4
+ // FLOAT --> DOUBLE
+ // objref, arrays, byrefs, value classes are unchanged
+ //
+ typeInfo& NormaliseForStack()
+ {
+ switch (GetType())
+ {
+ case TI_BYTE:
+ case TI_SHORT:
+ m_flags = TI_INT;
+ break;
+
+ case TI_FLOAT:
+ m_flags = TI_DOUBLE;
+ break;
+ default:
+ break;
+ }
+ return (*this);
+ }
+
+ /////////////////////////////////////////////////////////////////////////
+ // Getters
+ /////////////////////////////////////////////////////////////////////////
+
+ CORINFO_CLASS_HANDLE GetClassHandle() const
+ {
+ return m_cls;
+ }
+
+ CORINFO_CLASS_HANDLE GetClassHandleForValueClass() const
+ {
+ assert(IsType(TI_STRUCT));
+ assert(m_cls != NO_CLASS_HANDLE);
+ return m_cls;
+ }
+
+ CORINFO_CLASS_HANDLE GetClassHandleForObjRef() const
+ {
+ assert(IsType(TI_REF));
+ assert(m_cls != NO_CLASS_HANDLE);
+ return m_cls;
+ }
+
+ CORINFO_METHOD_HANDLE GetMethod() const
+ {
+ assert(GetType() == TI_METHOD);
+ return m_method;
+ }
+
+ // If FEATURE_CORECLR is enabled, GetMethod can be called
+ // before the pointer type is known to be a method pointer type.
+ CORINFO_METHOD_HANDLE GetMethod2() const
+ {
+ return m_method;
+ }
+
+ // Get this item's type
+ // If primitive, returns the primitive type (TI_*)
+ // If not primitive, returns:
+ // - TI_ERROR if a byref anything
+ // - TI_REF if a class or array or null or a generic type variable
+ // - TI_STRUCT if a value class
+ ti_types GetType() const
+ {
+ if (m_flags & TI_FLAG_BYREF)
+ {
+ return TI_ERROR;
+ }
+
+ // objref/array/null (objref), value class, ptr, primitive
+ return (ti_types)(m_flags & TI_FLAG_DATA_MASK);
+ }
+
+ BOOL IsType(ti_types type) const
+ {
+ assert(type != TI_ERROR);
+ return (m_flags & (TI_FLAG_DATA_MASK | TI_FLAG_BYREF | TI_FLAG_BYREF_READONLY | TI_FLAG_BYREF_PERMANENT_HOME |
+ TI_FLAG_GENERIC_TYPE_VAR)) == DWORD(type);
+ }
+
+ // Returns whether this is an objref
+ BOOL IsObjRef() const
+ {
+ return IsType(TI_REF) || IsType(TI_NULL);
+ }
+
+ // Returns whether this is a by-ref
+ BOOL IsByRef() const
+ {
+ return (m_flags & TI_FLAG_BYREF);
+ }
+
+ // Returns whether this is the this pointer
+ BOOL IsThisPtr() const
+ {
+ return (m_flags & TI_FLAG_THIS_PTR);
+ }
+
+ BOOL IsUnboxedGenericTypeVar() const
+ {
+ return !IsByRef() && (m_flags & TI_FLAG_GENERIC_TYPE_VAR);
+ }
+
+ BOOL IsReadonlyByRef() const
+ {
+ return IsByRef() && (m_flags & TI_FLAG_BYREF_READONLY);
+ }
+
+ BOOL IsPermanentHomeByRef() const
+ {
+ return IsByRef() && (m_flags & TI_FLAG_BYREF_PERMANENT_HOME);
+ }
+
+ // Returns whether this is a method desc
+ BOOL IsMethod() const
+ {
+ return (GetType() == TI_METHOD);
+ }
+
+ BOOL IsStruct() const
+ {
+ return IsType(TI_STRUCT);
+ }
+
+ // A byref value class is NOT a value class
+ BOOL IsValueClass() const
+ {
+ return (IsStruct() || IsPrimitiveType());
+ }
+
+ // Does not return true for primitives. Will return true for value types that behave
+ // as primitives
+ BOOL IsValueClassWithClsHnd() const
+ {
+ if ((GetType() == TI_STRUCT) ||
+ (m_cls && GetType() != TI_REF && GetType() != TI_METHOD &&
+ GetType() != TI_ERROR)) // necessary because if byref bit is set, we return TI_ERROR)
+ {
+ return TRUE;
+ }
+ else
+ {
+ return FALSE;
+ }
+ }
+
+ // Returns whether this is an integer or real number
+ // NOTE: Use NormaliseToPrimitiveType() if you think you may have a
+ // System.Int32 etc., because those types are not considered number
+ // types by this function.
+ BOOL IsNumberType() const
+ {
+ ti_types Type = GetType();
+
+ // I1, I2, Boolean, character etc. cannot exist plainly -
+ // everything is at least an I4
+
+ return (Type == TI_INT || Type == TI_LONG || Type == TI_DOUBLE);
+ }
+
+ // Returns whether this is an integer
+ // NOTE: Use NormaliseToPrimitiveType() if you think you may have a
+ // System.Int32 etc., because those types are not considered number
+ // types by this function.
+ BOOL IsIntegerType() const
+ {
+ ti_types Type = GetType();
+
+ // I1, I2, Boolean, character etc. cannot exist plainly -
+ // everything is at least an I4
+
+ return (Type == TI_INT || Type == TI_LONG);
+ }
+
+ // Returns true whether this is an integer or a native int.
+ BOOL IsIntOrNativeIntType() const
+ {
+#ifdef _TARGET_64BIT_
+ return (GetType() == TI_INT) || AreEquivalent(*this, nativeInt());
+#else
+ return IsType(TI_INT);
+#endif
+ }
+
+ BOOL IsNativeIntType() const
+ {
+ return AreEquivalent(*this, nativeInt());
+ }
+
+ // Returns whether this is a primitive type (not a byref, objref,
+ // array, null, value class, invalid value)
+ // May Need to normalise first (m/r/I4 --> I4)
+ BOOL IsPrimitiveType() const
+ {
+ DWORD Type = GetType();
+
+ // boolean, char, u1,u2 never appear on the operand stack
+ return (Type == TI_BYTE || Type == TI_SHORT || Type == TI_INT || Type == TI_LONG || Type == TI_FLOAT ||
+ Type == TI_DOUBLE);
+ }
+
+ // Returns whether this is the null objref
+ BOOL IsNullObjRef() const
+ {
+ return (IsType(TI_NULL));
+ }
+
+ // must be for a local which is an object type (i.e. has a slot >= 0)
+ // for primitive locals, use the liveness bitmap instead
+ // Note that this works if the error is 'Byref'
+ BOOL IsDead() const
+ {
+ return (m_flags & (TI_FLAG_DATA_MASK)) == TI_ERROR;
+ }
+
+ BOOL IsUninitialisedObjRef() const
+ {
+ return (m_flags & TI_FLAG_UNINIT_OBJREF);
+ }
+
+private:
+ // used to make functions that return typeinfo efficient.
+ typeInfo(DWORD flags, CORINFO_CLASS_HANDLE cls)
+ {
+ m_cls = cls;
+ m_flags = flags;
+ }
+
+ friend typeInfo ByRef(const typeInfo& ti);
+ friend typeInfo DereferenceByRef(const typeInfo& ti);
+ friend typeInfo NormaliseForStack(const typeInfo& ti);
+};
+
+inline typeInfo NormaliseForStack(const typeInfo& ti)
+{
+ return typeInfo(ti).NormaliseForStack();
+}
+
+// given ti make a byref to that type.
+inline typeInfo ByRef(const typeInfo& ti)
+{
+ return typeInfo(ti).MakeByRef();
+}
+
+// given ti which is a byref, return the type it points at
+inline typeInfo DereferenceByRef(const typeInfo& ti)
+{
+ return typeInfo(ti).DereferenceByRef();
+}
+/*****************************************************************************/
+#endif // _TYPEINFO_H_
+/*****************************************************************************/
diff --git a/src/jit/alloc.cpp b/src/jit/alloc.cpp
new file mode 100644
index 0000000000..5c5f712a3f
--- /dev/null
+++ b/src/jit/alloc.cpp
@@ -0,0 +1,590 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+#include "jitpch.h"
+
+#if defined(_MSC_VER)
+#pragma hdrstop
+#endif // defined(_MSC_VER)
+
+//------------------------------------------------------------------------
+// PooledAllocator:
+// This subclass of `ArenaAllocator` is a singleton that always keeps
+// a single default-sized page allocated. We try to use the singleton
+// allocator as often as possible (i.e. for all non-concurrent
+// method compilations).
+class PooledAllocator : public ArenaAllocator
+{
+private:
+ enum
+ {
+ POOLED_ALLOCATOR_NOTINITIALIZED = 0,
+ POOLED_ALLOCATOR_IN_USE = 1,
+ POOLED_ALLOCATOR_AVAILABLE = 2,
+ POOLED_ALLOCATOR_SHUTDOWN = 3,
+ };
+
+ static PooledAllocator s_pooledAllocator;
+ static LONG s_pooledAllocatorState;
+
+ PooledAllocator() : ArenaAllocator()
+ {
+ }
+ PooledAllocator(IEEMemoryManager* memoryManager);
+
+ PooledAllocator(const PooledAllocator& other) = delete;
+ PooledAllocator& operator=(const PooledAllocator& other) = delete;
+
+public:
+ PooledAllocator& operator=(PooledAllocator&& other);
+
+ void destroy() override;
+
+ static void shutdown();
+
+ static ArenaAllocator* getPooledAllocator(IEEMemoryManager* memoryManager);
+};
+
+size_t ArenaAllocator::s_defaultPageSize = 0;
+
+//------------------------------------------------------------------------
+// ArenaAllocator::bypassHostAllocator:
+// Indicates whether or not the ArenaAllocator should bypass the JIT
+// host when allocating memory for arena pages.
+//
+// Return Value:
+// True if the JIT should bypass the JIT host; false otherwise.
+bool ArenaAllocator::bypassHostAllocator()
+{
+#if defined(DEBUG)
+ // When JitDirectAlloc is set, all JIT allocations requests are forwarded
+ // directly to the OS. This allows taking advantage of pageheap and other gflag
+ // knobs for ensuring that we do not have buffer overruns in the JIT.
+
+ return JitConfig.JitDirectAlloc() != 0;
+#else // defined(DEBUG)
+ return false;
+#endif // !defined(DEBUG)
+}
+
+//------------------------------------------------------------------------
+// ArenaAllocator::getDefaultPageSize:
+// Returns the default size of an arena page.
+//
+// Return Value:
+// The default size of an arena page.
+size_t ArenaAllocator::getDefaultPageSize()
+{
+ return s_defaultPageSize;
+}
+
+//------------------------------------------------------------------------
+// ArenaAllocator::ArenaAllocator:
+// Default-constructs an arena allocator.
+ArenaAllocator::ArenaAllocator()
+ : m_memoryManager(nullptr)
+ , m_firstPage(nullptr)
+ , m_lastPage(nullptr)
+ , m_nextFreeByte(nullptr)
+ , m_lastFreeByte(nullptr)
+{
+}
+
+//------------------------------------------------------------------------
+// ArenaAllocator::ArenaAllocator:
+// Constructs an arena allocator.
+//
+// Arguments:
+// memoryManager - The `IEEMemoryManager` instance that will be used to
+// allocate memory for arena pages.
+ArenaAllocator::ArenaAllocator(IEEMemoryManager* memoryManager)
+ : m_memoryManager(memoryManager)
+ , m_firstPage(nullptr)
+ , m_lastPage(nullptr)
+ , m_nextFreeByte(nullptr)
+ , m_lastFreeByte(nullptr)
+{
+ assert(getDefaultPageSize() != 0);
+ assert(isInitialized());
+}
+
+//------------------------------------------------------------------------
+// ArenaAllocator::operator=:
+// Move-assigns a `ArenaAllocator`.
+ArenaAllocator& ArenaAllocator::operator=(ArenaAllocator&& other)
+{
+ assert(!isInitialized());
+
+ m_memoryManager = other.m_memoryManager;
+ m_firstPage = other.m_firstPage;
+ m_lastPage = other.m_lastPage;
+ m_nextFreeByte = other.m_nextFreeByte;
+ m_lastFreeByte = other.m_lastFreeByte;
+
+ other.m_memoryManager = nullptr;
+ other.m_firstPage = nullptr;
+ other.m_lastPage = nullptr;
+ other.m_nextFreeByte = nullptr;
+ other.m_lastFreeByte = nullptr;
+
+ return *this;
+}
+
+bool ArenaAllocator::isInitialized()
+{
+ return m_memoryManager != nullptr;
+}
+
+//------------------------------------------------------------------------
+// ArenaAllocator::allocateNewPage:
+// Allocates a new arena page.
+//
+// Arguments:
+// size - The number of bytes that were requested by the allocation
+// that triggered this request to allocate a new arena page.
+//
+// Return Value:
+// A pointer to the first usable byte of the newly allocated page.
+void* ArenaAllocator::allocateNewPage(size_t size, bool canThrow)
+{
+ assert(isInitialized());
+
+ size_t pageSize = sizeof(PageDescriptor) + size;
+
+ // Check for integer overflow
+ if (pageSize < size)
+ {
+ if (canThrow)
+ {
+ NOMEM();
+ }
+
+ return nullptr;
+ }
+
+ // If the current page is now full, update a few statistics
+ if (m_lastPage != nullptr)
+ {
+ // Undo the "+=" done in allocateMemory()
+ m_nextFreeByte -= size;
+
+ // Save the actual used size of the page
+ m_lastPage->m_usedBytes = m_nextFreeByte - m_lastPage->m_contents;
+ }
+
+ // Round up to a default-sized page if necessary
+ if (pageSize <= s_defaultPageSize)
+ {
+ pageSize = s_defaultPageSize;
+ }
+
+ // Round to the nearest multiple of OS page size if necessary
+ if (!bypassHostAllocator())
+ {
+ pageSize = roundUp(pageSize, DEFAULT_PAGE_SIZE);
+ }
+
+ // Allocate the new page
+ PageDescriptor* newPage = (PageDescriptor*)allocateHostMemory(pageSize);
+ if (newPage == nullptr)
+ {
+ if (canThrow)
+ {
+ NOMEM();
+ }
+
+ return nullptr;
+ }
+
+ // Append the new page to the end of the list
+ newPage->m_next = nullptr;
+ newPage->m_pageBytes = pageSize;
+ newPage->m_previous = m_lastPage;
+ newPage->m_usedBytes = 0; // m_usedBytes is meaningless until a new page is allocated.
+ // Instead of letting it contain garbage (so to confuse us),
+ // set it to zero.
+
+ if (m_lastPage != nullptr)
+ {
+ m_lastPage->m_next = newPage;
+ }
+ else
+ {
+ m_firstPage = newPage;
+ }
+
+ m_lastPage = newPage;
+
+ // Adjust the next/last free byte pointers
+ m_nextFreeByte = newPage->m_contents + size;
+ m_lastFreeByte = (BYTE*)newPage + pageSize;
+ assert((m_lastFreeByte - m_nextFreeByte) >= 0);
+
+ return newPage->m_contents;
+}
+
+//------------------------------------------------------------------------
+// ArenaAllocator::destroy:
+// Performs any necessary teardown for an `ArenaAllocator`.
+void ArenaAllocator::destroy()
+{
+ assert(isInitialized());
+
+ // Free all of the allocated pages
+ for (PageDescriptor *page = m_firstPage, *next; page != nullptr; page = next)
+ {
+ next = page->m_next;
+ freeHostMemory(page);
+ }
+
+ // Clear out the allocator's fields
+ m_memoryManager = nullptr;
+ m_firstPage = nullptr;
+ m_lastPage = nullptr;
+ m_nextFreeByte = nullptr;
+ m_lastFreeByte = nullptr;
+}
+
+// The debug version of the allocator may allocate directly from the
+// OS rather than going through the hosting APIs. In order to do so,
+// it must undef the macros that are usually in place to prevent
+// accidental uses of the OS allocator.
+#if defined(DEBUG)
+#undef GetProcessHeap
+#undef HeapAlloc
+#undef HeapFree
+#endif
+
+//------------------------------------------------------------------------
+// ArenaAllocator::allocateHostMemory:
+// Allocates memory from the host (or the OS if `bypassHostAllocator()`
+// returns `true`).
+//
+// Arguments:
+// size - The number of bytes to allocate.
+//
+// Return Value:
+// A pointer to the allocated memory.
+void* ArenaAllocator::allocateHostMemory(size_t size)
+{
+ assert(isInitialized());
+
+#if defined(DEBUG)
+ if (bypassHostAllocator())
+ {
+ return ::HeapAlloc(GetProcessHeap(), 0, size);
+ }
+ else
+ {
+ return ClrAllocInProcessHeap(0, S_SIZE_T(size));
+ }
+#else // defined(DEBUG)
+ return m_memoryManager->ClrVirtualAlloc(nullptr, size, MEM_COMMIT, PAGE_READWRITE);
+#endif // !defined(DEBUG)
+}
+
+//------------------------------------------------------------------------
+// ArenaAllocator::freeHostMemory:
+// Frees memory allocated by a previous call to `allocateHostMemory`.
+//
+// Arguments:
+// block - A pointer to the memory to free.
+void ArenaAllocator::freeHostMemory(void* block)
+{
+ assert(isInitialized());
+
+#if defined(DEBUG)
+ if (bypassHostAllocator())
+ {
+ ::HeapFree(GetProcessHeap(), 0, block);
+ }
+ else
+ {
+ ClrFreeInProcessHeap(0, block);
+ }
+#else // defined(DEBUG)
+ m_memoryManager->ClrVirtualFree(block, 0, MEM_RELEASE);
+#endif // !defined(DEBUG)
+}
+
+#if defined(DEBUG)
+//------------------------------------------------------------------------
+// ArenaAllocator::alloateMemory:
+// Allocates memory using an `ArenaAllocator`.
+//
+// Arguments:
+// size - The number of bytes to allocate.
+//
+// Return Value:
+// A pointer to the allocated memory.
+//
+// Note:
+// This is the DEBUG-only version of `allocateMemory`; the release
+// version of this method is defined in the corresponding header file.
+// This version of the method has some abilities that the release
+// version does not: it may inject faults into the allocator and
+// seeds all allocations with a specified pattern to help catch
+// use-before-init problems.
+void* ArenaAllocator::allocateMemory(size_t size)
+{
+ assert(isInitialized());
+ assert(size != 0 && (size & (sizeof(int) - 1)) == 0);
+
+ // Ensure that we always allocate in pointer sized increments.
+ size = (size_t)roundUp(size, sizeof(size_t));
+
+ if (JitConfig.ShouldInjectFault() != 0)
+ {
+ // Force the underlying memory allocator (either the OS or the CLR hoster)
+ // to allocate the memory. Any fault injection will kick in.
+ void* p = ClrAllocInProcessHeap(0, S_SIZE_T(1));
+ if (p != nullptr)
+ {
+ ClrFreeInProcessHeap(0, p);
+ }
+ else
+ {
+ NOMEM(); // Throw!
+ }
+ }
+
+ void* block = m_nextFreeByte;
+ m_nextFreeByte += size;
+
+ if (m_nextFreeByte > m_lastFreeByte)
+ {
+ block = allocateNewPage(size, true);
+ }
+
+ memset(block, UninitializedWord<char>(), size);
+ return block;
+}
+#endif // defined(DEBUG)
+
+//------------------------------------------------------------------------
+// ArenaAllocator::getTotalBytesAllocated:
+// Gets the total number of bytes allocated for all of the arena pages
+// for an `ArenaAllocator`.
+//
+// Return Value:
+// See above.
+size_t ArenaAllocator::getTotalBytesAllocated()
+{
+ assert(isInitialized());
+
+ size_t bytes = 0;
+ for (PageDescriptor* page = m_firstPage; page != nullptr; page = page->m_next)
+ {
+ bytes += page->m_pageBytes;
+ }
+
+ return bytes;
+}
+
+//------------------------------------------------------------------------
+// ArenaAllocator::getTotalBytesAllocated:
+// Gets the total number of bytes used in all of the arena pages for
+// an `ArenaAllocator`.
+//
+// Return Value:
+// See above.
+//
+// Notes:
+// An arena page may have unused space at the very end. This happens
+// when an allocation request comes in (via a call to `allocateMemory`)
+// that will not fit in the remaining bytes for the current page.
+// Another way to understand this method is as returning the total
+// number of bytes allocated for arena pages minus the number of bytes
+// that are unused across all area pages.
+size_t ArenaAllocator::getTotalBytesUsed()
+{
+ assert(isInitialized());
+
+ if (m_lastPage != nullptr)
+ {
+ m_lastPage->m_usedBytes = m_nextFreeByte - m_lastPage->m_contents;
+ }
+
+ size_t bytes = 0;
+ for (PageDescriptor* page = m_firstPage; page != nullptr; page = page->m_next)
+ {
+ bytes += page->m_usedBytes;
+ }
+
+ return bytes;
+}
+
+//------------------------------------------------------------------------
+// ArenaAllocator::startup:
+// Performs any necessary initialization for the arena allocator
+// subsystem.
+void ArenaAllocator::startup()
+{
+ s_defaultPageSize = bypassHostAllocator() ? (size_t)MIN_PAGE_SIZE : (size_t)DEFAULT_PAGE_SIZE;
+}
+
+//------------------------------------------------------------------------
+// ArenaAllocator::shutdown:
+// Performs any necessary teardown for the arena allocator subsystem.
+void ArenaAllocator::shutdown()
+{
+ PooledAllocator::shutdown();
+}
+
+PooledAllocator PooledAllocator::s_pooledAllocator;
+LONG PooledAllocator::s_pooledAllocatorState = POOLED_ALLOCATOR_NOTINITIALIZED;
+
+//------------------------------------------------------------------------
+// PooledAllocator::PooledAllocator:
+// Constructs a `PooledAllocator`.
+PooledAllocator::PooledAllocator(IEEMemoryManager* memoryManager) : ArenaAllocator(memoryManager)
+{
+}
+
+//------------------------------------------------------------------------
+// PooledAllocator::operator=:
+// Move-assigns a `PooledAllocator`.
+PooledAllocator& PooledAllocator::operator=(PooledAllocator&& other)
+{
+ *((ArenaAllocator*)this) = std::move((ArenaAllocator &&)other);
+ return *this;
+}
+
+//------------------------------------------------------------------------
+// PooledAllocator::shutdown:
+// Performs any necessary teardown for the pooled allocator.
+//
+// Notes:
+// If the allocator has been initialized and is in use when this method is called,
+// it is up to whatever is using the pooled allocator to call `destroy` in order
+// to free its memory.
+void PooledAllocator::shutdown()
+{
+ LONG oldState = InterlockedExchange(&s_pooledAllocatorState, POOLED_ALLOCATOR_SHUTDOWN);
+ switch (oldState)
+ {
+ case POOLED_ALLOCATOR_NOTINITIALIZED:
+ case POOLED_ALLOCATOR_SHUTDOWN:
+ case POOLED_ALLOCATOR_IN_USE:
+ return;
+
+ case POOLED_ALLOCATOR_AVAILABLE:
+ // The pooled allocator was initialized and not in use; we must destroy it.
+ s_pooledAllocator.destroy();
+ break;
+ }
+}
+
+//------------------------------------------------------------------------
+// PooledAllocator::getPooledAllocator:
+// Returns the pooled allocator if it is not already in use.
+//
+// Arguments:
+// memoryManager: The `IEEMemoryManager` instance in use by the caller.
+//
+// Return Value:
+// A pointer to the pooled allocator if it is available or `nullptr`
+// if it is already in use.
+//
+// Notes:
+// Calling `destroy` on the returned allocator will return it to the
+// pool.
+ArenaAllocator* PooledAllocator::getPooledAllocator(IEEMemoryManager* memoryManager)
+{
+ LONG oldState = InterlockedExchange(&s_pooledAllocatorState, POOLED_ALLOCATOR_IN_USE);
+ switch (oldState)
+ {
+ case POOLED_ALLOCATOR_IN_USE:
+ case POOLED_ALLOCATOR_SHUTDOWN:
+ // Either the allocator is in use or this call raced with a call to `shutdown`.
+ // Return `nullptr`.
+ return nullptr;
+
+ case POOLED_ALLOCATOR_AVAILABLE:
+ if (s_pooledAllocator.m_memoryManager != memoryManager)
+ {
+ // The allocator is available, but it was initialized with a different
+ // memory manager. Release it and return `nullptr`.
+ InterlockedExchange(&s_pooledAllocatorState, POOLED_ALLOCATOR_AVAILABLE);
+ return nullptr;
+ }
+
+ return &s_pooledAllocator;
+
+ case POOLED_ALLOCATOR_NOTINITIALIZED:
+ {
+ PooledAllocator allocator(memoryManager);
+ if (allocator.allocateNewPage(0, false) == nullptr)
+ {
+ // Failed to grab the initial memory page.
+ InterlockedExchange(&s_pooledAllocatorState, POOLED_ALLOCATOR_NOTINITIALIZED);
+ return nullptr;
+ }
+
+ s_pooledAllocator = std::move(allocator);
+ }
+
+ return &s_pooledAllocator;
+
+ default:
+ assert(!"Unknown pooled allocator state");
+ unreached();
+ }
+}
+
+//------------------------------------------------------------------------
+// PooledAllocator::destroy:
+// Performs any necessary teardown for an `PooledAllocator` and returns the allocator
+// to the pool.
+void PooledAllocator::destroy()
+{
+ assert(isInitialized());
+ assert(this == &s_pooledAllocator);
+ assert(s_pooledAllocatorState == POOLED_ALLOCATOR_IN_USE || s_pooledAllocatorState == POOLED_ALLOCATOR_SHUTDOWN);
+ assert(m_firstPage != nullptr);
+
+ // Free all but the first allocated page
+ for (PageDescriptor *page = m_firstPage->m_next, *next; page != nullptr; page = next)
+ {
+ next = page->m_next;
+ freeHostMemory(page);
+ }
+
+ // Reset the relevant state to point back to the first byte of the first page
+ m_firstPage->m_next = nullptr;
+ m_lastPage = m_firstPage;
+ m_nextFreeByte = m_firstPage->m_contents;
+ m_lastFreeByte = (BYTE*)m_firstPage + m_firstPage->m_pageBytes;
+
+ assert(getTotalBytesAllocated() == s_defaultPageSize);
+
+ // If we've already been shut down, free the first page. Otherwise, return the allocator to the pool.
+ if (s_pooledAllocatorState == POOLED_ALLOCATOR_SHUTDOWN)
+ {
+ ArenaAllocator::destroy();
+ }
+ else
+ {
+ InterlockedExchange(&s_pooledAllocatorState, POOLED_ALLOCATOR_AVAILABLE);
+ }
+}
+
+//------------------------------------------------------------------------
+// ArenaAllocator::getPooledAllocator:
+// Returns the pooled allocator if it is not already in use.
+//
+// Arguments:
+// memoryManager: The `IEEMemoryManager` instance in use by the caller.
+//
+// Return Value:
+// A pointer to the pooled allocator if it is available or `nullptr`
+// if it is already in use.
+//
+// Notes:
+// Calling `destroy` on the returned allocator will return it to the
+// pool.
+ArenaAllocator* ArenaAllocator::getPooledAllocator(IEEMemoryManager* memoryManager)
+{
+ return PooledAllocator::getPooledAllocator(memoryManager);
+}
diff --git a/src/jit/alloc.h b/src/jit/alloc.h
new file mode 100644
index 0000000000..a769341378
--- /dev/null
+++ b/src/jit/alloc.h
@@ -0,0 +1,99 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+#ifndef _ALLOC_H_
+#define _ALLOC_H_
+
+#if !defined(_HOST_H_)
+#include "host.h"
+#endif // defined(_HOST_H_)
+
+class ArenaAllocator
+{
+private:
+ ArenaAllocator(const ArenaAllocator& other) = delete;
+ ArenaAllocator& operator=(const ArenaAllocator& other) = delete;
+
+protected:
+ struct PageDescriptor
+ {
+ PageDescriptor* m_next;
+ PageDescriptor* m_previous;
+
+ size_t m_pageBytes; // # of bytes allocated
+ size_t m_usedBytes; // # of bytes actually used. (This is only valid when we've allocated a new page.)
+ // See ArenaAllocator::allocateNewPage.
+
+ BYTE m_contents[];
+ };
+
+ // Anything less than 64K leaves VM holes since the OS allocates address space in this size.
+ // Thus if we want to make this smaller, we need to do a reserve / commit scheme
+ enum
+ {
+ DEFAULT_PAGE_SIZE = 16 * OS_page_size,
+ MIN_PAGE_SIZE = sizeof(PageDescriptor)
+ };
+
+ static size_t s_defaultPageSize;
+
+ IEEMemoryManager* m_memoryManager;
+
+ PageDescriptor* m_firstPage;
+ PageDescriptor* m_lastPage;
+
+ // These two pointers (when non-null) will always point into 'm_lastPage'.
+ BYTE* m_nextFreeByte;
+ BYTE* m_lastFreeByte;
+
+ bool isInitialized();
+
+ void* allocateNewPage(size_t size, bool canThrow);
+
+ void* allocateHostMemory(size_t size);
+ void freeHostMemory(void* block);
+
+public:
+ ArenaAllocator();
+ ArenaAllocator(IEEMemoryManager* memoryManager);
+ ArenaAllocator& operator=(ArenaAllocator&& other);
+
+ // NOTE: it would be nice to have a destructor on this type to ensure that any value that
+ // goes out of scope is either uninitialized or has been torn down via a call to
+ // destroy(), but this interacts badly in methods that use SEH. #3058 tracks
+ // revisiting EH in the JIT; such a destructor could be added if SEH is removed
+ // as part of that work.
+
+ virtual void destroy();
+
+#if defined(DEBUG)
+ void* allocateMemory(size_t sz);
+#else // defined(DEBUG)
+ inline void* allocateMemory(size_t size)
+ {
+ void* block = m_nextFreeByte;
+ m_nextFreeByte += size;
+
+ if (m_nextFreeByte > m_lastFreeByte)
+ {
+ block = allocateNewPage(size, true);
+ }
+
+ return block;
+ }
+#endif // !defined(DEBUG)
+
+ size_t getTotalBytesAllocated();
+ size_t getTotalBytesUsed();
+
+ static bool bypassHostAllocator();
+ static size_t getDefaultPageSize();
+
+ static void startup();
+ static void shutdown();
+
+ static ArenaAllocator* getPooledAllocator(IEEMemoryManager* memoryManager);
+};
+
+#endif // _ALLOC_H_
diff --git a/src/jit/arraystack.h b/src/jit/arraystack.h
new file mode 100644
index 0000000000..1692294fcb
--- /dev/null
+++ b/src/jit/arraystack.h
@@ -0,0 +1,146 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+//
+// ArrayStack: A stack, implemented as a growable array
+
+template <class T>
+class ArrayStack
+{
+ static const int builtinSize = 8;
+
+public:
+ ArrayStack(Compiler* comp, int initialSize = builtinSize)
+ {
+ compiler = comp;
+
+ if (initialSize > builtinSize)
+ {
+ maxIndex = initialSize;
+ data = new (compiler, CMK_ArrayStack) T[initialSize];
+ }
+ else
+ {
+ maxIndex = builtinSize;
+ data = builtinData;
+ }
+
+ tosIndex = 0;
+ }
+
+ void Push(T item)
+ {
+ if (tosIndex == maxIndex)
+ {
+ Realloc();
+ }
+
+ data[tosIndex] = item;
+ tosIndex++;
+ }
+
+ void Realloc()
+ {
+ // get a new chunk 2x the size of the old one
+ // and copy over
+ T* oldData = data;
+ noway_assert(maxIndex * 2 > maxIndex);
+ data = new (compiler, CMK_ArrayStack) T[maxIndex * 2];
+ for (int i = 0; i < maxIndex; i++)
+ {
+ data[i] = oldData[i];
+ }
+ maxIndex *= 2;
+ }
+
+ // reverse the top N in the stack
+ void ReverseTop(int number)
+ {
+ if (number < 2)
+ {
+ return;
+ }
+
+ assert(number <= tosIndex);
+
+ int start = tosIndex - number;
+ int offset = 0;
+ while (offset < number / 2)
+ {
+ T temp;
+ int index = start + offset;
+ int otherIndex = tosIndex - 1 - offset;
+ temp = data[index];
+ data[index] = data[otherIndex];
+ data[otherIndex] = temp;
+
+ offset++;
+ }
+ }
+
+ T Pop()
+ {
+ assert(tosIndex > 0);
+ tosIndex--;
+ return data[tosIndex];
+ }
+
+ T Top()
+ {
+ assert(tosIndex > 0);
+ return data[tosIndex - 1];
+ }
+
+ T& TopRef()
+ {
+ assert(tosIndex > 0);
+ return data[tosIndex - 1];
+ }
+
+ // return the i'th from the top
+ T Index(int idx)
+ {
+ assert(tosIndex > idx);
+ return data[tosIndex - 1 - idx];
+ }
+
+ // return a reference to the i'th from the top
+ T& IndexRef(int idx)
+ {
+ assert(tosIndex > idx);
+ return data[tosIndex - 1 - idx];
+ }
+
+ int Height()
+ {
+ return tosIndex;
+ }
+
+ // return the bottom of the stack
+ T Bottom()
+ {
+ assert(tosIndex > 0);
+ return data[0];
+ }
+
+ // return the i'th from the bottom
+ T Bottom(int indx)
+ {
+ assert(tosIndex > indx);
+ return data[indx];
+ }
+
+ void Reset()
+ {
+ tosIndex = 0;
+ }
+
+private:
+ Compiler* compiler; // needed for allocation
+ int tosIndex; // first free location
+ int maxIndex;
+ T* data;
+ // initial allocation
+ T builtinData[builtinSize];
+};
diff --git a/src/jit/assertionprop.cpp b/src/jit/assertionprop.cpp
new file mode 100644
index 0000000000..fe35c3b780
--- /dev/null
+++ b/src/jit/assertionprop.cpp
@@ -0,0 +1,5142 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX AssertionProp XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+/*****************************************************************************
+ *
+ * Helper passed to Compiler::fgWalkTreePre() to find the Asgn node for optAddCopies()
+ */
+
+/* static */
+Compiler::fgWalkResult Compiler::optAddCopiesCallback(GenTreePtr* pTree, fgWalkData* data)
+{
+ GenTreePtr tree = *pTree;
+
+ if (tree->OperKind() & GTK_ASGOP)
+ {
+ GenTreePtr op1 = tree->gtOp.gtOp1;
+ Compiler* comp = data->compiler;
+
+ if ((op1->gtOper == GT_LCL_VAR) && (op1->gtLclVarCommon.gtLclNum == comp->optAddCopyLclNum))
+ {
+ comp->optAddCopyAsgnNode = tree;
+ return WALK_ABORT;
+ }
+ }
+ return WALK_CONTINUE;
+}
+
+/*****************************************************************************
+ *
+ * Add new copies before Assertion Prop.
+ */
+
+void Compiler::optAddCopies()
+{
+ unsigned lclNum;
+ LclVarDsc* varDsc;
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\n*************** In optAddCopies()\n\n");
+ }
+ if (verboseTrees)
+ {
+ printf("Blocks/Trees at start of phase\n");
+ fgDispBasicBlocks(true);
+ }
+#endif
+
+ // Don't add any copies if we have reached the tracking limit.
+ if (lvaHaveManyLocals())
+ {
+ return;
+ }
+
+ for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
+ {
+ var_types typ = varDsc->TypeGet();
+
+ // We only add copies for non temp local variables
+ // that have a single def and that can possibly be enregistered
+
+ if (varDsc->lvIsTemp || !varDsc->lvSingleDef || !varTypeCanReg(typ))
+ {
+ continue;
+ }
+
+ /* For lvNormalizeOnLoad(), we need to add a cast to the copy-assignment
+ like "copyLclNum = int(varDsc)" and optAssertionGen() only
+ tracks simple assignments. The same goes for lvNormalizedOnStore as
+ the cast is generated in fgMorphSmpOpAsg. This boils down to not having
+ a copy until optAssertionGen handles this*/
+ if (varDsc->lvNormalizeOnLoad() || varDsc->lvNormalizeOnStore() || typ == TYP_BOOL)
+ {
+ continue;
+ }
+
+ if (varTypeIsSmall(varDsc->TypeGet()) || typ == TYP_BOOL)
+ {
+ continue;
+ }
+
+ // If locals must be initialized to zero, that initialization counts as a second definition.
+ // VB in particular allows usage of variables not explicitly initialized.
+ // Note that this effectively disables this optimization for all local variables
+ // as C# sets InitLocals all the time starting in Whidbey.
+
+ if (!varDsc->lvIsParam && info.compInitMem)
+ {
+ continue;
+ }
+
+ // On x86 we may want to add a copy for an incoming double parameter
+ // because we can ensure that the copy we make is double aligned
+ // where as we can never ensure the alignment of an incoming double parameter
+ //
+ // On all other platforms we will never need to make a copy
+ // for an incoming double parameter
+
+ bool isFloatParam = false;
+
+#ifdef _TARGET_X86_
+ isFloatParam = varDsc->lvIsParam && varTypeIsFloating(typ);
+#endif
+
+ if (!isFloatParam && !varDsc->lvVolatileHint)
+ {
+ continue;
+ }
+
+ // We don't want to add a copy for a variable that is part of a struct
+ if (varDsc->lvIsStructField)
+ {
+ continue;
+ }
+
+ // We require that the weighted ref count be significant.
+ if (varDsc->lvRefCntWtd <= (BB_LOOP_WEIGHT * BB_UNITY_WEIGHT / 2))
+ {
+ continue;
+ }
+
+ // For parameters, we only want to add a copy for the heavier-than-average
+ // uses instead of adding a copy to cover every single use.
+ // 'paramImportantUseDom' is the set of blocks that dominate the
+ // heavier-than-average uses of a parameter.
+ // Initial value is all blocks.
+
+ BlockSet BLOCKSET_INIT_NOCOPY(paramImportantUseDom, BlockSetOps::MakeFull(this));
+
+ // This will be threshold for determining heavier-than-average uses
+ unsigned paramAvgWtdRefDiv2 = (varDsc->lvRefCntWtd + varDsc->lvRefCnt / 2) / (varDsc->lvRefCnt * 2);
+
+ bool paramFoundImportantUse = false;
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Trying to add a copy for V%02u %s, avg_wtd = %s\n", lclNum,
+ varDsc->lvIsParam ? "an arg" : "a local", refCntWtd2str(paramAvgWtdRefDiv2));
+ }
+#endif
+
+ //
+ // We must have a ref in a block that is dominated only by the entry block
+ //
+
+ if (BlockSetOps::MayBeUninit(varDsc->lvRefBlks))
+ {
+ // No references
+ continue;
+ }
+
+ bool isDominatedByFirstBB = false;
+
+ BLOCKSET_ITER_INIT(this, iter, varDsc->lvRefBlks, blkNum);
+ while (iter.NextElem(this, &blkNum))
+ {
+ /* Find the block 'blkNum' */
+ BasicBlock* block = fgFirstBB;
+ while (block && (block->bbNum != blkNum))
+ {
+ block = block->bbNext;
+ }
+ noway_assert(block && (block->bbNum == blkNum));
+
+ bool importantUseInBlock = (varDsc->lvIsParam) && (block->getBBWeight(this) > paramAvgWtdRefDiv2);
+ bool isPreHeaderBlock = ((block->bbFlags & BBF_LOOP_PREHEADER) != 0);
+ BlockSet BLOCKSET_INIT_NOCOPY(blockDom, BlockSetOps::UninitVal());
+ BlockSet BLOCKSET_INIT_NOCOPY(blockDomSub0, BlockSetOps::UninitVal());
+
+ if (block->bbIDom == nullptr && isPreHeaderBlock)
+ {
+ // Loop Preheader blocks that we insert will have a bbDom set that is nullptr
+ // but we can instead use the bNext successor block's dominator information
+ noway_assert(block->bbNext != nullptr);
+ BlockSetOps::AssignNoCopy(this, blockDom, fgGetDominatorSet(block->bbNext));
+ }
+ else
+ {
+ BlockSetOps::AssignNoCopy(this, blockDom, fgGetDominatorSet(block));
+ }
+
+ if (!BlockSetOps::IsEmpty(this, blockDom))
+ {
+ BlockSetOps::Assign(this, blockDomSub0, blockDom);
+ if (isPreHeaderBlock)
+ {
+ // We must clear bbNext block number from the dominator set
+ BlockSetOps::RemoveElemD(this, blockDomSub0, block->bbNext->bbNum);
+ }
+ /* Is this block dominated by fgFirstBB? */
+ if (BlockSetOps::IsMember(this, blockDomSub0, fgFirstBB->bbNum))
+ {
+ isDominatedByFirstBB = true;
+ }
+ }
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf(" Referenced in BB%02u, bbWeight is %s", blkNum, refCntWtd2str(block->getBBWeight(this)));
+
+ if (isDominatedByFirstBB)
+ {
+ printf(", which is dominated by BB01");
+ }
+
+ if (importantUseInBlock)
+ {
+ printf(", ImportantUse");
+ }
+
+ printf("\n");
+ }
+#endif
+
+ /* If this is a heavier-than-average block, then track which
+ blocks dominate this use of the parameter. */
+ if (importantUseInBlock)
+ {
+ paramFoundImportantUse = true;
+ BlockSetOps::IntersectionD(this, paramImportantUseDom,
+ blockDomSub0); // Clear blocks that do not dominate
+ }
+ }
+
+ // We should have found at least one heavier-than-averageDiv2 block.
+ if (varDsc->lvIsParam)
+ {
+ if (!paramFoundImportantUse)
+ {
+ continue;
+ }
+ }
+
+ // For us to add a new copy:
+ // we require that we have a floating point parameter
+ // or a lvVolatile variable that is always reached from the first BB
+ // and we have at least one block available in paramImportantUseDom
+ //
+ bool doCopy = (isFloatParam || (isDominatedByFirstBB && varDsc->lvVolatileHint)) &&
+ !BlockSetOps::IsEmpty(this, paramImportantUseDom);
+
+ // Under stress mode we expand the number of candidates
+ // to include parameters of any type
+ // or any variable that is always reached from the first BB
+ //
+ if (compStressCompile(STRESS_GENERIC_VARN, 30))
+ {
+ // Ensure that we preserve the invariants required by the subsequent code.
+ if (varDsc->lvIsParam || isDominatedByFirstBB)
+ {
+ doCopy = true;
+ }
+ }
+
+ if (!doCopy)
+ {
+ continue;
+ }
+
+ GenTreePtr stmt;
+ unsigned copyLclNum = lvaGrabTemp(false DEBUGARG("optAddCopies"));
+
+ // Because lvaGrabTemp may have reallocated the lvaTable, ensure varDsc
+ // is still in sync with lvaTable[lclNum];
+ varDsc = &lvaTable[lclNum];
+
+ // Set lvType on the new Temp Lcl Var
+ lvaTable[copyLclNum].lvType = typ;
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\n Finding the best place to insert the assignment V%02i=V%02i\n", copyLclNum, lclNum);
+ }
+#endif
+
+ if (varDsc->lvIsParam)
+ {
+ noway_assert(varDsc->lvDefStmt == nullptr || varDsc->lvIsStructField);
+
+ // Create a new copy assignment tree
+ GenTreePtr copyAsgn = gtNewTempAssign(copyLclNum, gtNewLclvNode(lclNum, typ));
+
+ /* Find the best block to insert the new assignment */
+ /* We will choose the lowest weighted block, and within */
+ /* those block, the highest numbered block which */
+ /* dominates all the uses of the local variable */
+
+ /* Our default is to use the first block */
+ BasicBlock* bestBlock = fgFirstBB;
+ unsigned bestWeight = bestBlock->getBBWeight(this);
+ BasicBlock* block = bestBlock;
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf(" Starting at BB%02u, bbWeight is %s", block->bbNum,
+ refCntWtd2str(block->getBBWeight(this)));
+
+ printf(", bestWeight is %s\n", refCntWtd2str(bestWeight));
+ }
+#endif
+
+ /* We have already calculated paramImportantUseDom above. */
+
+ BLOCKSET_ITER_INIT(this, iter, paramImportantUseDom, blkNum);
+ while (iter.NextElem(this, &blkNum))
+ {
+ /* Advance block to point to 'blkNum' */
+ /* This assumes that the iterator returns block number is increasing lexical order. */
+ while (block && (block->bbNum != blkNum))
+ {
+ block = block->bbNext;
+ }
+ noway_assert(block && (block->bbNum == blkNum));
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf(" Considering BB%02u, bbWeight is %s", block->bbNum,
+ refCntWtd2str(block->getBBWeight(this)));
+
+ printf(", bestWeight is %s\n", refCntWtd2str(bestWeight));
+ }
+#endif
+
+ // Does this block have a smaller bbWeight value?
+ if (block->getBBWeight(this) > bestWeight)
+ {
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("bbWeight too high\n");
+ }
+#endif
+ continue;
+ }
+
+ // Don't use blocks that are exception handlers because
+ // inserting a new first statement will interface with
+ // the CATCHARG
+
+ if (handlerGetsXcptnObj(block->bbCatchTyp))
+ {
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Catch block\n");
+ }
+#endif
+ continue;
+ }
+
+ // Don't use the BBJ_ALWAYS block marked with BBF_KEEP_BBJ_ALWAYS. These
+ // are used by EH code. The JIT can not generate code for such a block.
+
+ if (block->bbFlags & BBF_KEEP_BBJ_ALWAYS)
+ {
+#if FEATURE_EH_FUNCLETS
+ // With funclets, this is only used for BBJ_CALLFINALLY/BBJ_ALWAYS pairs. For x86, it is also used
+ // as the "final step" block for leaving finallys.
+ assert((block->bbPrev != nullptr) && block->bbPrev->isBBCallAlwaysPair());
+#endif // FEATURE_EH_FUNCLETS
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Internal EH BBJ_ALWAYS block\n");
+ }
+#endif
+ continue;
+ }
+
+ // This block will be the new candidate for the insert point
+ // for the new assignment
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("new bestBlock\n");
+ }
+#endif
+
+ bestBlock = block;
+ bestWeight = block->getBBWeight(this);
+ }
+
+ // If there is a use of the variable in this block
+ // then we insert the assignment at the beginning
+ // otherwise we insert the statement at the end
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf(" Insert copy at the %s of BB%02u\n",
+ (BlockSetOps::IsEmpty(this, paramImportantUseDom) ||
+ BlockSetOps::IsMember(this, varDsc->lvRefBlks, bestBlock->bbNum))
+ ? "start"
+ : "end",
+ bestBlock->bbNum);
+ }
+#endif
+
+ if (BlockSetOps::IsEmpty(this, paramImportantUseDom) ||
+ BlockSetOps::IsMember(this, varDsc->lvRefBlks, bestBlock->bbNum))
+ {
+ stmt = fgInsertStmtAtBeg(bestBlock, copyAsgn);
+ }
+ else
+ {
+ stmt = fgInsertStmtNearEnd(bestBlock, copyAsgn);
+ }
+
+ /* Increment its lvRefCnt and lvRefCntWtd */
+ lvaTable[lclNum].incRefCnts(fgFirstBB->getBBWeight(this), this);
+
+ /* Increment its lvRefCnt and lvRefCntWtd */
+ lvaTable[copyLclNum].incRefCnts(fgFirstBB->getBBWeight(this), this);
+ }
+ else
+ {
+ noway_assert(varDsc->lvDefStmt != nullptr);
+
+ /* Locate the assignment to varDsc in the lvDefStmt */
+ stmt = varDsc->lvDefStmt;
+ noway_assert(stmt->gtOper == GT_STMT);
+
+ optAddCopyLclNum = lclNum; // in
+ optAddCopyAsgnNode = nullptr; // out
+
+ fgWalkTreePre(&stmt->gtStmt.gtStmtExpr, Compiler::optAddCopiesCallback, (void*)this, false);
+
+ noway_assert(optAddCopyAsgnNode);
+
+ GenTreePtr tree = optAddCopyAsgnNode;
+ GenTreePtr op1 = tree->gtOp.gtOp1;
+
+ noway_assert(tree && op1 && (tree->OperKind() & GTK_ASGOP) && (op1->gtOper == GT_LCL_VAR) &&
+ (op1->gtLclVarCommon.gtLclNum == lclNum));
+
+ /* TODO-Review: BB_UNITY_WEIGHT is not the correct block weight */
+ unsigned blockWeight = BB_UNITY_WEIGHT;
+
+ /* Increment its lvRefCnt and lvRefCntWtd twice */
+ lvaTable[copyLclNum].incRefCnts(blockWeight, this);
+ lvaTable[copyLclNum].incRefCnts(blockWeight, this);
+
+ /* Assign the old expression into the new temp */
+
+ GenTreePtr newAsgn = gtNewTempAssign(copyLclNum, tree->gtOp.gtOp2);
+
+ /* Copy the new temp to op1 */
+
+ GenTreePtr copyAsgn = gtNewAssignNode(op1, gtNewLclvNode(copyLclNum, typ));
+
+ /* Change the tree to a GT_COMMA with the two assignments as child nodes */
+
+ tree->gtBashToNOP();
+ tree->ChangeOper(GT_COMMA);
+
+ tree->gtOp.gtOp1 = newAsgn;
+ tree->gtOp.gtOp2 = copyAsgn;
+
+ tree->gtFlags |= (newAsgn->gtFlags & GTF_ALL_EFFECT);
+ tree->gtFlags |= (copyAsgn->gtFlags & GTF_ALL_EFFECT);
+ }
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nIntroducing a new copy for V%02u\n", lclNum);
+ gtDispTree(stmt->gtStmt.gtStmtExpr);
+ printf("\n");
+ }
+#endif
+ }
+}
+
+//------------------------------------------------------------------------------
+// GetAssertionDep: Retrieve the assertions on this local variable
+//
+// Arguments:
+// lclNum - The local var id.
+//
+// Return Value:
+// The dependent assertions (assertions using the value of the local var)
+// of the local var.
+//
+
+ASSERT_TP& Compiler::GetAssertionDep(unsigned lclNum)
+{
+ ExpandArray<ASSERT_TP>& dep = *optAssertionDep;
+ if (dep[lclNum] == nullptr)
+ {
+ dep[lclNum] = optNewEmptyAssertSet();
+ }
+ return dep[lclNum];
+}
+
+/*****************************************************************************
+ *
+ * Initialize the assertion prop bitset traits and the default bitsets.
+ */
+
+void Compiler::optAssertionTraitsInit(AssertionIndex assertionCount)
+{
+ apTraits = new (getAllocator()) BitVecTraits(assertionCount, this);
+ apFull = BitVecOps::UninitVal();
+ apEmpty = BitVecOps::UninitVal();
+ BitVecOps::AssignNoCopy(apTraits, apFull, BitVecOps::MakeFull(apTraits));
+ BitVecOps::AssignNoCopy(apTraits, apEmpty, BitVecOps::MakeEmpty(apTraits));
+}
+
+/*****************************************************************************
+ *
+ * Initialize the assertion prop tracking logic.
+ */
+
+void Compiler::optAssertionInit(bool isLocalProp)
+{
+ // Use a function countFunc to determine a proper maximum assertion count for the
+ // method being compiled. The function is linear to the IL size for small and
+ // moderate methods. For large methods, considering throughput impact, we track no
+ // more than 64 assertions.
+ // Note this tracks at most only 256 assertions.
+ static const AssertionIndex countFunc[] = {64, 128, 256, 64};
+ static const unsigned lowerBound = 0;
+ static const unsigned upperBound = sizeof(countFunc) / sizeof(countFunc[0]) - 1;
+ const unsigned codeSize = info.compILCodeSize / 512;
+ optMaxAssertionCount = countFunc[isLocalProp ? lowerBound : min(upperBound, codeSize)];
+
+ optLocalAssertionProp = isLocalProp;
+ optAssertionTabPrivate = new (getAllocator()) AssertionDsc[optMaxAssertionCount];
+ optComplementaryAssertionMap =
+ new (getAllocator()) AssertionIndex[optMaxAssertionCount](); // zero-inited (NO_ASSERTION_INDEX.)
+ assert(NO_ASSERTION_INDEX == 0);
+
+ if (!isLocalProp)
+ {
+ optValueNumToAsserts = new (getAllocator()) ValueNumToAssertsMap(getAllocator());
+ }
+
+ if (optAssertionDep == nullptr)
+ {
+ optAssertionDep = new (getAllocator()) ExpandArray<ASSERT_TP>(getAllocator(), max(1, lvaCount));
+ }
+
+ optAssertionTraitsInit(optMaxAssertionCount);
+ optAssertionCount = 0;
+ optAssertionPropagated = false;
+ bbJtrueAssertionOut = nullptr;
+}
+
+#ifdef DEBUG
+void Compiler::optPrintAssertion(AssertionDsc* curAssertion, AssertionIndex assertionIndex /* =0 */)
+{
+ if (curAssertion->op1.kind == O1K_EXACT_TYPE)
+ {
+ printf("Type ");
+ }
+ else if (curAssertion->op1.kind == O1K_ARR_BND)
+ {
+ printf("ArrBnds ");
+ }
+ else if (curAssertion->op1.kind == O1K_SUBTYPE)
+ {
+ printf("Subtype ");
+ }
+ else if (curAssertion->op2.kind == O2K_LCLVAR_COPY)
+ {
+ printf("Copy ");
+ }
+ else if ((curAssertion->op2.kind == O2K_CONST_INT) || (curAssertion->op2.kind == O2K_CONST_LONG) ||
+ (curAssertion->op2.kind == O2K_CONST_DOUBLE))
+ {
+ printf("Constant ");
+ }
+ else if (curAssertion->op2.kind == O2K_SUBRANGE)
+ {
+ printf("Subrange ");
+ }
+ else
+ {
+ printf("?assertion classification? ");
+ }
+ printf("Assertion: ");
+ if (!optLocalAssertionProp)
+ {
+ printf("(%d, %d) ", curAssertion->op1.vn, curAssertion->op2.vn);
+ }
+
+ if (!optLocalAssertionProp)
+ {
+ printf("(" STR_VN "%x," STR_VN "%x) ", curAssertion->op1.vn, curAssertion->op2.vn);
+ }
+
+ if ((curAssertion->op1.kind == O1K_LCLVAR) || (curAssertion->op1.kind == O1K_EXACT_TYPE) ||
+ (curAssertion->op1.kind == O1K_SUBTYPE))
+ {
+ printf("V%02u", curAssertion->op1.lcl.lclNum);
+ if (curAssertion->op1.lcl.ssaNum != SsaConfig::RESERVED_SSA_NUM)
+ {
+ printf(".%02u", curAssertion->op1.lcl.ssaNum);
+ }
+ }
+ else if (curAssertion->op1.kind == O1K_ARR_BND)
+ {
+ printf("[idx:");
+ vnStore->vnDump(this, curAssertion->op1.bnd.vnIdx);
+ printf(";len:");
+ vnStore->vnDump(this, curAssertion->op1.bnd.vnLen);
+ printf("]");
+ }
+ else if (curAssertion->op1.kind == O1K_ARRLEN_OPER_BND)
+ {
+ printf("Oper_Bnd");
+ vnStore->vnDump(this, curAssertion->op1.vn);
+ }
+ else if (curAssertion->op1.kind == O1K_ARRLEN_LOOP_BND)
+ {
+ printf("Loop_Bnd");
+ vnStore->vnDump(this, curAssertion->op1.vn);
+ }
+ else if (curAssertion->op1.kind == O1K_CONSTANT_LOOP_BND)
+ {
+ printf("Loop_Bnd");
+ vnStore->vnDump(this, curAssertion->op1.vn);
+ }
+ else if (curAssertion->op1.kind == O1K_VALUE_NUMBER)
+ {
+ printf("Value_Number");
+ vnStore->vnDump(this, curAssertion->op1.vn);
+ }
+ else
+ {
+ printf("?op1.kind?");
+ }
+
+ if (curAssertion->assertionKind == OAK_SUBRANGE)
+ {
+ printf(" in ");
+ }
+ else if (curAssertion->assertionKind == OAK_EQUAL)
+ {
+ if (curAssertion->op1.kind == O1K_LCLVAR)
+ {
+ printf(" == ");
+ }
+ else
+ {
+ printf(" is ");
+ }
+ }
+ else if (curAssertion->assertionKind == OAK_NO_THROW)
+ {
+ printf(" in range ");
+ }
+ else if (curAssertion->assertionKind == OAK_NOT_EQUAL)
+ {
+ if (curAssertion->op1.kind == O1K_LCLVAR)
+ {
+ printf(" != ");
+ }
+ else
+ {
+ printf(" is not ");
+ }
+ }
+ else
+ {
+ printf(" ?assertionKind? ");
+ }
+
+ if (curAssertion->op1.kind != O1K_ARR_BND)
+ {
+ switch (curAssertion->op2.kind)
+ {
+ case O2K_LCLVAR_COPY:
+ printf("V%02u", curAssertion->op2.lcl.lclNum);
+ if (curAssertion->op1.lcl.ssaNum != SsaConfig::RESERVED_SSA_NUM)
+ {
+ printf(".%02u", curAssertion->op1.lcl.ssaNum);
+ }
+ break;
+
+ case O2K_CONST_INT:
+ case O2K_IND_CNS_INT:
+ if (curAssertion->op1.kind == O1K_EXACT_TYPE)
+ {
+ printf("Exact Type MT(%08X)", dspPtr(curAssertion->op2.u1.iconVal));
+ assert(curAssertion->op2.u1.iconFlags != 0);
+ }
+ else if (curAssertion->op1.kind == O1K_SUBTYPE)
+ {
+ printf("MT(%08X)", dspPtr(curAssertion->op2.u1.iconVal));
+ assert(curAssertion->op2.u1.iconFlags != 0);
+ }
+ else if (curAssertion->op1.kind == O1K_ARRLEN_OPER_BND)
+ {
+ assert(!optLocalAssertionProp);
+ vnStore->vnDump(this, curAssertion->op2.vn);
+ }
+ else if (curAssertion->op1.kind == O1K_ARRLEN_LOOP_BND)
+ {
+ assert(!optLocalAssertionProp);
+ vnStore->vnDump(this, curAssertion->op2.vn);
+ }
+ else if (curAssertion->op1.kind == O1K_CONSTANT_LOOP_BND)
+ {
+ assert(!optLocalAssertionProp);
+ vnStore->vnDump(this, curAssertion->op2.vn);
+ }
+ else
+ {
+ var_types op1Type;
+
+ if (curAssertion->op1.kind == O1K_VALUE_NUMBER)
+ {
+ op1Type = vnStore->TypeOfVN(curAssertion->op1.vn);
+ }
+ else
+ {
+ unsigned lclNum = curAssertion->op1.lcl.lclNum;
+ assert(lclNum < lvaCount);
+ LclVarDsc* varDsc = lvaTable + lclNum;
+ op1Type = varDsc->lvType;
+ }
+
+ if (op1Type == TYP_REF)
+ {
+ assert(curAssertion->op2.u1.iconVal == 0);
+ printf("null");
+ }
+ else
+ {
+ if ((curAssertion->op2.u1.iconFlags & GTF_ICON_HDL_MASK) != 0)
+ {
+ printf("[%08p]", dspPtr(curAssertion->op2.u1.iconVal));
+ }
+ else
+ {
+ printf("%d", curAssertion->op2.u1.iconVal);
+ }
+ }
+ }
+ break;
+
+ case O2K_CONST_LONG:
+ printf("0x%016llx", curAssertion->op2.lconVal);
+ break;
+
+ case O2K_CONST_DOUBLE:
+ if (*((__int64*)&curAssertion->op2.dconVal) == (__int64)I64(0x8000000000000000))
+ {
+ printf("-0.00000");
+ }
+ else
+ {
+ printf("%#lg", curAssertion->op2.dconVal);
+ }
+ break;
+
+ case O2K_SUBRANGE:
+ printf("[%d..%d]", curAssertion->op2.u2.loBound, curAssertion->op2.u2.hiBound);
+ break;
+
+ default:
+ printf("?op2.kind?");
+ break;
+ }
+ }
+
+ if (assertionIndex > 0)
+ {
+ printf(" index=#%02u, mask=", assertionIndex);
+
+ // This is an hack to reuse a known empty set in order to display
+ // a single bit mask.
+ BitVecOps::AddElemD(apTraits, apEmpty, assertionIndex - 1);
+ printf("%s", BitVecOps::ToString(apTraits, apEmpty));
+ BitVecOps::RemoveElemD(apTraits, apEmpty, assertionIndex - 1);
+ }
+ printf("\n");
+}
+#endif // DEBUG
+
+/******************************************************************************
+ *
+ * Helper to retrieve the "assertIndex" assertion. Note that assertIndex 0
+ * is NO_ASSERTION_INDEX and "optAssertionCount" is the last valid index.
+ *
+ */
+Compiler::AssertionDsc* Compiler::optGetAssertion(AssertionIndex assertIndex)
+{
+ assert(NO_ASSERTION_INDEX == 0);
+ noway_assert(assertIndex != NO_ASSERTION_INDEX);
+ noway_assert(assertIndex <= optAssertionCount);
+ AssertionDsc* assertion = &optAssertionTabPrivate[assertIndex - 1];
+#ifdef DEBUG
+ optDebugCheckAssertion(assertion);
+#endif
+
+ return assertion;
+}
+
+/*****************************************************************************
+ *
+ * A simple helper routine so not all callers need to supply a AssertionDsc*
+ * if they don't care about it. Refer overloaded method optCreateAssertion.
+ *
+ */
+Compiler::AssertionIndex Compiler::optCreateAssertion(GenTreePtr op1, GenTreePtr op2, optAssertionKind assertionKind)
+{
+ AssertionDsc assertionDsc;
+ return optCreateAssertion(op1, op2, assertionKind, &assertionDsc);
+}
+
+/*****************************************************************************
+ *
+ * We attempt to create the following assertion:
+ *
+ * op1 assertionKind op2
+ *
+ * If we can create the assertion then update 'assertion' if we are
+ * unsuccessful assertion->assertionKind will be OAK_INVALID. If we are
+ * successful in creating the assertion we call optAddAssertion which adds
+ * the assertion to our assertion table.
+ *
+ * If we are able to create the assertion the return value is the
+ * assertionIndex for this assertion otherwise the return value is
+ * NO_ASSERTION_INDEX and we could not create the assertion.
+ *
+ */
+Compiler::AssertionIndex Compiler::optCreateAssertion(GenTreePtr op1,
+ GenTreePtr op2,
+ optAssertionKind assertionKind,
+ AssertionDsc* assertion)
+{
+ memset(assertion, 0, sizeof(AssertionDsc));
+ //
+ // If we cannot create an assertion using op1 and op2 then the assertionKind
+ // must be OAK_INVALID, so we initialize it to OAK_INVALID and only change it
+ // to a valid assertion when everything is good.
+ //
+ assertion->assertionKind = OAK_INVALID;
+ bool haveArgs = false;
+ var_types toType;
+
+ if (op1->gtOper == GT_ARR_BOUNDS_CHECK)
+ {
+ if (assertionKind == OAK_NO_THROW)
+ {
+ GenTreeBoundsChk* arrBndsChk = op1->AsBoundsChk();
+ assertion->assertionKind = assertionKind;
+ assertion->op1.kind = O1K_ARR_BND;
+ assertion->op1.bnd.vnIdx = arrBndsChk->gtIndex->gtVNPair.GetConservative();
+ assertion->op1.bnd.vnLen = arrBndsChk->gtArrLen->gtVNPair.GetConservative();
+ goto DONE_ASSERTION;
+ }
+ }
+
+ //
+ // Did we receive Helper call args?
+ //
+ if (op1->gtOper == GT_LIST)
+ {
+ if (op2->gtOper != GT_LIST)
+ {
+ goto DONE_ASSERTION; // Don't make an assertion
+ }
+ op1 = op1->gtOp.gtOp1;
+ op2 = op2->gtOp.gtOp1;
+ haveArgs = true;
+ }
+
+ //
+ // Are we trying to make a non-null assertion?
+ //
+ if (op2 == nullptr)
+ {
+ assert(haveArgs == false);
+ //
+ // Must an OAK_NOT_EQUAL assertion
+ //
+ noway_assert(assertionKind == OAK_NOT_EQUAL);
+
+ //
+ // Set op1 to the instance pointer of the indirection
+ //
+
+ ssize_t offset = 0;
+ while ((op1->gtOper == GT_ADD) && (op1->gtType == TYP_BYREF))
+ {
+ if (op1->gtGetOp2()->IsCnsIntOrI())
+ {
+ offset += op1->gtGetOp2()->gtIntCon.gtIconVal;
+ op1 = op1->gtGetOp1();
+ }
+ else if (op1->gtGetOp1()->IsCnsIntOrI())
+ {
+ offset += op1->gtGetOp1()->gtIntCon.gtIconVal;
+ op1 = op1->gtGetOp2();
+ }
+ else
+ {
+ break;
+ }
+ }
+
+ if (fgIsBigOffset(offset) || op1->gtOper != GT_LCL_VAR)
+ {
+ goto DONE_ASSERTION; // Don't make an assertion
+ }
+
+ unsigned lclNum = op1->gtLclVarCommon.gtLclNum;
+ noway_assert(lclNum < lvaCount);
+ LclVarDsc* lclVar = &lvaTable[lclNum];
+
+ ValueNum vn;
+
+ //
+ // We only perform null-checks on GC refs
+ // so only make non-null assertions about GC refs
+ //
+ if (lclVar->TypeGet() != TYP_REF)
+ {
+ if (optLocalAssertionProp || (lclVar->TypeGet() != TYP_BYREF))
+ {
+ goto DONE_ASSERTION; // Don't make an assertion
+ }
+
+ vn = op1->gtVNPair.GetConservative();
+ VNFuncApp funcAttr;
+
+ // Try to get value number corresponding to the GC ref of the indirection
+ while (vnStore->GetVNFunc(vn, &funcAttr) && (funcAttr.m_func == (VNFunc)GT_ADD) &&
+ (vnStore->TypeOfVN(vn) == TYP_BYREF))
+ {
+ if (vnStore->IsVNConstant(funcAttr.m_args[1]))
+ {
+ offset += vnStore->CoercedConstantValue<ssize_t>(funcAttr.m_args[1]);
+ vn = funcAttr.m_args[0];
+ }
+ else if (vnStore->IsVNConstant(funcAttr.m_args[0]))
+ {
+ offset += vnStore->CoercedConstantValue<ssize_t>(funcAttr.m_args[0]);
+ vn = funcAttr.m_args[1];
+ }
+ else
+ {
+ break;
+ }
+ }
+
+ if (fgIsBigOffset(offset) || (vnStore->TypeOfVN(vn) != TYP_REF))
+ {
+ goto DONE_ASSERTION; // Don't make an assertion
+ }
+
+ assertion->op1.kind = O1K_VALUE_NUMBER;
+ }
+ else
+ {
+ // If the local variable has its address exposed then bail
+ if (lclVar->lvAddrExposed)
+ {
+ goto DONE_ASSERTION; // Don't make an assertion
+ }
+
+ assertion->op1.kind = O1K_LCLVAR;
+ assertion->op1.lcl.lclNum = lclNum;
+ assertion->op1.lcl.ssaNum = op1->AsLclVarCommon()->GetSsaNum();
+ vn = op1->gtVNPair.GetConservative();
+ }
+
+ assertion->op1.vn = vn;
+ assertion->assertionKind = assertionKind;
+ assertion->op2.kind = O2K_CONST_INT;
+ assertion->op2.vn = ValueNumStore::VNForNull();
+ assertion->op2.u1.iconVal = 0;
+ assertion->op2.u1.iconFlags = 0;
+#ifdef _TARGET_64BIT_
+ assertion->op2.u1.iconFlags |= 1; // Signify that this is really TYP_LONG
+#endif // _TARGET_64BIT_
+ }
+ //
+ // Are we making an assertion about a local variable?
+ //
+ else if (op1->gtOper == GT_LCL_VAR)
+ {
+ unsigned lclNum = op1->gtLclVarCommon.gtLclNum;
+ noway_assert(lclNum < lvaCount);
+ LclVarDsc* lclVar = &lvaTable[lclNum];
+
+ // If the local variable has its address exposed then bail
+ if (lclVar->lvAddrExposed)
+ {
+ goto DONE_ASSERTION; // Don't make an assertion
+ }
+
+ if (haveArgs)
+ {
+ //
+ // Must either be an OAK_EQUAL or an OAK_NOT_EQUAL assertion
+ //
+ if ((assertionKind != OAK_EQUAL) && (assertionKind != OAK_NOT_EQUAL))
+ {
+ goto DONE_ASSERTION; // Don't make an assertion
+ }
+
+ if (op2->gtOper == GT_IND)
+ {
+ op2 = op2->gtOp.gtOp1;
+ assertion->op2.kind = O2K_IND_CNS_INT;
+ }
+ else
+ {
+ assertion->op2.kind = O2K_CONST_INT;
+ }
+
+ if (op2->gtOper != GT_CNS_INT)
+ {
+ goto DONE_ASSERTION; // Don't make an assertion
+ }
+
+ //
+ // TODO-CQ: Check for Sealed class and change kind to O1K_EXACT_TYPE
+ // And consider the special cases, like CORINFO_FLG_SHAREDINST or CORINFO_FLG_VARIANCE
+ // where a class can be sealed, but they don't behave as exact types because casts to
+ // non-base types sometimes still succeed.
+ //
+ assertion->op1.kind = O1K_SUBTYPE;
+ assertion->op1.lcl.lclNum = lclNum;
+ assertion->op1.vn = op1->gtVNPair.GetConservative();
+ assertion->op1.lcl.ssaNum = op1->AsLclVarCommon()->GetSsaNum();
+ assertion->op2.u1.iconVal = op2->gtIntCon.gtIconVal;
+ assertion->op2.vn = op2->gtVNPair.GetConservative();
+ assertion->op2.u1.iconFlags = op2->GetIconHandleFlag();
+
+ //
+ // Ok everything has been set and the assertion looks good
+ //
+ assertion->assertionKind = assertionKind;
+ }
+ else // !haveArgs
+ {
+ /* Skip over a GT_COMMA node(s), if necessary */
+ while (op2->gtOper == GT_COMMA)
+ {
+ op2 = op2->gtOp.gtOp2;
+ }
+
+ assertion->op1.kind = O1K_LCLVAR;
+ assertion->op1.lcl.lclNum = lclNum;
+ assertion->op1.vn = op1->gtVNPair.GetConservative();
+ assertion->op1.lcl.ssaNum = op1->AsLclVarCommon()->GetSsaNum();
+
+ switch (op2->gtOper)
+ {
+ optOp2Kind op2Kind;
+ //
+ // No Assertion
+ //
+ default:
+ goto DONE_ASSERTION; // Don't make an assertion
+
+ //
+ // Constant Assertions
+ //
+ case GT_CNS_INT:
+ op2Kind = O2K_CONST_INT;
+ goto CNS_COMMON;
+
+ case GT_CNS_LNG:
+ op2Kind = O2K_CONST_LONG;
+ goto CNS_COMMON;
+
+ case GT_CNS_DBL:
+ op2Kind = O2K_CONST_DOUBLE;
+ goto CNS_COMMON;
+
+ CNS_COMMON:
+ {
+ // TODO-1stClassStructs: handle constant propagation to struct types.
+ if (varTypeIsStruct(lclVar))
+ {
+ goto DONE_ASSERTION;
+ }
+ //
+ // Must either be an OAK_EQUAL or an OAK_NOT_EQUAL assertion
+ //
+ if ((assertionKind != OAK_EQUAL) && (assertionKind != OAK_NOT_EQUAL))
+ {
+ goto DONE_ASSERTION; // Don't make an assertion
+ }
+
+ // If the LclVar is a TYP_LONG then we only make
+ // assertions where op2 is also TYP_LONG
+ //
+ if ((lclVar->TypeGet() == TYP_LONG) && (op2->TypeGet() != TYP_LONG))
+ {
+ goto DONE_ASSERTION; // Don't make an assertion
+ }
+
+ assertion->op2.kind = op2Kind;
+ assertion->op2.lconVal = 0;
+ assertion->op2.vn = op2->gtVNPair.GetConservative();
+
+ if (op2->gtOper == GT_CNS_INT)
+ {
+#ifdef _TARGET_ARM_
+ // Do not Constant-Prop large constants for ARM
+ if (!codeGen->validImmForMov(op2->gtIntCon.gtIconVal))
+ {
+ goto DONE_ASSERTION; // Don't make an assertion
+ }
+#endif // _TARGET_ARM_
+ assertion->op2.u1.iconVal = op2->gtIntCon.gtIconVal;
+ assertion->op2.u1.iconFlags = op2->GetIconHandleFlag();
+#ifdef _TARGET_64BIT_
+ if (op2->TypeGet() == TYP_LONG || op2->TypeGet() == TYP_BYREF)
+ {
+ assertion->op2.u1.iconFlags |= 1; // Signify that this is really TYP_LONG
+ }
+#endif // _TARGET_64BIT_
+ }
+ else if (op2->gtOper == GT_CNS_LNG)
+ {
+ assertion->op2.lconVal = op2->gtLngCon.gtLconVal;
+ }
+ else
+ {
+ noway_assert(op2->gtOper == GT_CNS_DBL);
+ /* If we have an NaN value then don't record it */
+ if (_isnan(op2->gtDblCon.gtDconVal))
+ {
+ goto DONE_ASSERTION; // Don't make an assertion
+ }
+ assertion->op2.dconVal = op2->gtDblCon.gtDconVal;
+ }
+
+ //
+ // Ok everything has been set and the assertion looks good
+ //
+ assertion->assertionKind = assertionKind;
+ }
+ break;
+
+ //
+ // Copy Assertions
+ //
+ case GT_LCL_VAR:
+ {
+ //
+ // Must either be an OAK_EQUAL or an OAK_NOT_EQUAL assertion
+ //
+ if ((assertionKind != OAK_EQUAL) && (assertionKind != OAK_NOT_EQUAL))
+ {
+ goto DONE_ASSERTION; // Don't make an assertion
+ }
+
+ unsigned lclNum2 = op2->gtLclVarCommon.gtLclNum;
+ noway_assert(lclNum2 < lvaCount);
+ LclVarDsc* lclVar2 = &lvaTable[lclNum2];
+
+ // If the two locals are the same then bail
+ if (lclNum == lclNum2)
+ {
+ goto DONE_ASSERTION; // Don't make an assertion
+ }
+
+ // If the types are different then bail */
+ if (lclVar->lvType != lclVar2->lvType)
+ {
+ goto DONE_ASSERTION; // Don't make an assertion
+ }
+
+ // If the local variable has its address exposed then bail
+ if (lclVar2->lvAddrExposed)
+ {
+ goto DONE_ASSERTION; // Don't make an assertion
+ }
+
+ assertion->op2.kind = O2K_LCLVAR_COPY;
+ assertion->op2.lcl.lclNum = lclNum2;
+ assertion->op2.vn = op2->gtVNPair.GetConservative();
+ assertion->op2.lcl.ssaNum = op2->AsLclVarCommon()->GetSsaNum();
+
+ //
+ // Ok everything has been set and the assertion looks good
+ //
+ assertion->assertionKind = assertionKind;
+ }
+ break;
+
+ // Subrange Assertions
+ case GT_EQ:
+ case GT_NE:
+ case GT_LT:
+ case GT_LE:
+ case GT_GT:
+ case GT_GE:
+
+ /* Assigning the result of a RELOP, we can add a boolean subrange assertion */
+
+ toType = TYP_BOOL;
+ goto SUBRANGE_COMMON;
+
+ case GT_CLS_VAR:
+
+ /* Assigning the result of an indirection into a LCL_VAR, see if we can add a subrange assertion */
+
+ toType = op2->gtType;
+ goto SUBRANGE_COMMON;
+
+ case GT_ARR_ELEM:
+
+ /* Assigning the result of an indirection into a LCL_VAR, see if we can add a subrange assertion */
+
+ toType = op2->gtType;
+ goto SUBRANGE_COMMON;
+
+ case GT_LCL_FLD:
+
+ /* Assigning the result of an indirection into a LCL_VAR, see if we can add a subrange assertion */
+
+ toType = op2->gtType;
+ goto SUBRANGE_COMMON;
+
+ case GT_IND:
+
+ /* Assigning the result of an indirection into a LCL_VAR, see if we can add a subrange assertion */
+
+ toType = op2->gtType;
+ goto SUBRANGE_COMMON;
+
+ case GT_CAST:
+ {
+ if (lvaTable[lclNum].lvIsStructField && lvaTable[lclNum].lvNormalizeOnLoad())
+ {
+ // Keep the cast on small struct fields.
+ goto DONE_ASSERTION; // Don't make an assertion
+ }
+
+ toType = op2->CastToType();
+ SUBRANGE_COMMON:
+ if ((assertionKind != OAK_SUBRANGE) && (assertionKind != OAK_EQUAL))
+ {
+ goto DONE_ASSERTION; // Don't make an assertion
+ }
+
+ if (varTypeIsFloating(op1->TypeGet()))
+ {
+ // We don't make assertions on a cast from floating point
+ goto DONE_ASSERTION;
+ }
+
+ switch (toType)
+ {
+ case TYP_BOOL:
+ case TYP_BYTE:
+ case TYP_UBYTE:
+ case TYP_SHORT:
+ case TYP_USHORT:
+ case TYP_CHAR:
+#ifdef _TARGET_64BIT_
+ case TYP_UINT:
+ case TYP_INT:
+#endif // _TARGET_64BIT_
+ assertion->op2.u2.loBound = AssertionDsc::GetLowerBoundForIntegralType(toType);
+ assertion->op2.u2.hiBound = AssertionDsc::GetUpperBoundForIntegralType(toType);
+ break;
+
+ default:
+ goto DONE_ASSERTION; // Don't make an assertion
+ }
+ assertion->op2.kind = O2K_SUBRANGE;
+ assertion->assertionKind = OAK_SUBRANGE;
+ }
+ break;
+ }
+ } // else // !haveArgs
+ } // if (op1->gtOper == GT_LCL_VAR)
+
+ //
+ // Are we making an IsType assertion?
+ //
+ else if (op1->gtOper == GT_IND)
+ {
+ op1 = op1->gtOp.gtOp1;
+ //
+ // Is this an indirection of a local variable?
+ //
+ if (op1->gtOper == GT_LCL_VAR)
+ {
+ unsigned lclNum = op1->gtLclVarCommon.gtLclNum;
+ noway_assert(lclNum < lvaCount);
+ LclVarDsc* lclVar = &lvaTable[lclNum];
+
+ // If the local variable has its address exposed then bail
+ if (fgExcludeFromSsa(lclNum))
+ {
+ goto DONE_ASSERTION;
+ }
+
+ // If we have an typeHnd indirection then op1 must be a TYP_REF
+ // and the indirection must produce a TYP_I
+ //
+ if (op1->gtType != TYP_REF)
+ {
+ goto DONE_ASSERTION; // Don't make an assertion
+ }
+
+ assertion->op1.kind = O1K_EXACT_TYPE;
+ assertion->op1.lcl.lclNum = lclNum;
+ assertion->op1.vn = op1->gtVNPair.GetConservative();
+ assertion->op1.lcl.ssaNum = op1->AsLclVarCommon()->GetSsaNum();
+ assert(assertion->op1.lcl.ssaNum == SsaConfig::RESERVED_SSA_NUM ||
+ assertion->op1.vn ==
+ lvaTable[lclNum].GetPerSsaData(assertion->op1.lcl.ssaNum)->m_vnPair.GetConservative());
+
+ ssize_t cnsValue = 0;
+ unsigned iconFlags = 0;
+ // Ngen case
+ if (op2->gtOper == GT_IND)
+ {
+ if (!optIsTreeKnownIntValue(!optLocalAssertionProp, op2->gtOp.gtOp1, &cnsValue, &iconFlags))
+ {
+ goto DONE_ASSERTION; // Don't make an assertion
+ }
+
+ assertion->assertionKind = assertionKind;
+ assertion->op2.kind = O2K_IND_CNS_INT;
+ assertion->op2.u1.iconVal = cnsValue;
+ assertion->op2.vn = op2->gtOp.gtOp1->gtVNPair.GetConservative();
+ /* iconFlags should only contain bits in GTF_ICON_HDL_MASK */
+ assert((iconFlags & ~GTF_ICON_HDL_MASK) == 0);
+ assertion->op2.u1.iconFlags = iconFlags;
+#ifdef _TARGET_64BIT_
+ if (op2->gtOp.gtOp1->TypeGet() == TYP_LONG)
+ {
+ assertion->op2.u1.iconFlags |= 1; // Signify that this is really TYP_LONG
+ }
+#endif // _TARGET_64BIT_
+ }
+ // JIT case
+ else if (optIsTreeKnownIntValue(!optLocalAssertionProp, op2, &cnsValue, &iconFlags))
+ {
+ assertion->assertionKind = assertionKind;
+ assertion->op2.kind = O2K_IND_CNS_INT;
+ assertion->op2.u1.iconVal = cnsValue;
+ assertion->op2.vn = op2->gtVNPair.GetConservative();
+ /* iconFlags should only contain bits in GTF_ICON_HDL_MASK */
+ assert((iconFlags & ~GTF_ICON_HDL_MASK) == 0);
+ assertion->op2.u1.iconFlags = iconFlags;
+#ifdef _TARGET_64BIT_
+ if (op2->TypeGet() == TYP_LONG)
+ {
+ assertion->op2.u1.iconFlags |= 1; // Signify that this is really TYP_LONG
+ }
+#endif // _TARGET_64BIT_
+ }
+ else
+ {
+ goto DONE_ASSERTION; // Don't make an assertion
+ }
+ }
+ }
+
+DONE_ASSERTION:
+ if (assertion->assertionKind == OAK_INVALID)
+ {
+ return NO_ASSERTION_INDEX;
+ }
+
+ if (!optLocalAssertionProp)
+ {
+ if ((assertion->op1.vn == ValueNumStore::NoVN) || (assertion->op2.vn == ValueNumStore::NoVN) ||
+ (assertion->op1.vn == ValueNumStore::VNForVoid()) || (assertion->op2.vn == ValueNumStore::VNForVoid()))
+ {
+ return NO_ASSERTION_INDEX;
+ }
+
+ // TODO: only copy assertions rely on valid SSA number so we could generate more assertions here
+ if ((assertion->op1.kind != O1K_VALUE_NUMBER) && (assertion->op1.lcl.ssaNum == SsaConfig::RESERVED_SSA_NUM))
+ {
+ return NO_ASSERTION_INDEX;
+ }
+ }
+
+ // Now add the assertion to our assertion table
+ noway_assert(assertion->op1.kind != O1K_INVALID);
+ noway_assert(assertion->op1.kind == O1K_ARR_BND || assertion->op2.kind != O2K_INVALID);
+ return optAddAssertion(assertion);
+}
+
+/*****************************************************************************
+ *
+ * If tree is a constant node holding an integral value, retrieve the value in
+ * pConstant. If the method returns true, pConstant holds the appropriate
+ * constant. Set "vnBased" to true to indicate local or global assertion prop.
+ * "pFlags" indicates if the constant is a handle marked by GTF_ICON_HDL_MASK.
+ */
+bool Compiler::optIsTreeKnownIntValue(bool vnBased, GenTreePtr tree, ssize_t* pConstant, unsigned* pFlags)
+{
+ // Is Local assertion prop?
+ if (!vnBased)
+ {
+ if (tree->OperGet() == GT_CNS_INT)
+ {
+ *pConstant = tree->gtIntCon.IconValue();
+ *pFlags = tree->GetIconHandleFlag();
+ return true;
+ }
+#ifdef _TARGET_64BIT_
+ // Just to be clear, get it from gtLconVal rather than
+ // overlapping gtIconVal.
+ else if (tree->OperGet() == GT_CNS_LNG)
+ {
+ *pConstant = tree->gtLngCon.gtLconVal;
+ *pFlags = tree->GetIconHandleFlag();
+ return true;
+ }
+#endif
+ return false;
+ }
+
+ // Global assertion prop
+ if (!vnStore->IsVNConstant(tree->gtVNPair.GetConservative()))
+ {
+ return false;
+ }
+
+ ValueNum vn = tree->gtVNPair.GetConservative();
+ var_types vnType = vnStore->TypeOfVN(vn);
+ if (vnType == TYP_INT)
+ {
+ *pConstant = vnStore->ConstantValue<int>(vn);
+ *pFlags = vnStore->IsVNHandle(vn) ? vnStore->GetHandleFlags(vn) : 0;
+ return true;
+ }
+#ifdef _TARGET_64BIT_
+ else if (vnType == TYP_LONG)
+ {
+ *pConstant = vnStore->ConstantValue<INT64>(vn);
+ *pFlags = vnStore->IsVNHandle(vn) ? vnStore->GetHandleFlags(vn) : 0;
+ return true;
+ }
+#endif
+ return false;
+}
+
+#ifdef DEBUG
+/*****************************************************************************
+ *
+ * Print the assertions related to a VN for all VNs.
+ *
+ */
+void Compiler::optPrintVnAssertionMapping()
+{
+ printf("\nVN Assertion Mapping\n");
+ printf("---------------------\n");
+ for (ValueNumToAssertsMap::KeyIterator ki = optValueNumToAsserts->Begin(); !ki.Equal(optValueNumToAsserts->End());
+ ++ki)
+ {
+ printf("(%d => ", ki.Get());
+ printf("%s)\n", BitVecOps::ToString(apTraits, ki.GetValue()));
+ }
+}
+#endif
+
+/*****************************************************************************
+ *
+ * Maintain a map "optValueNumToAsserts" i.e., vn -> to set of assertions
+ * about that VN. Given "assertions" about a "vn" add it to the previously
+ * mapped assertions about that "vn."
+ */
+void Compiler::optAddVnAssertionMapping(ValueNum vn, AssertionIndex index)
+{
+ ASSERT_TP cur;
+ if (!optValueNumToAsserts->Lookup(vn, &cur))
+ {
+ cur = optNewEmptyAssertSet();
+ optValueNumToAsserts->Set(vn, cur);
+ }
+ BitVecOps::AddElemD(apTraits, cur, index - 1);
+}
+
+/*****************************************************************************
+ * Statically if we know that this assertion's VN involves a NaN don't bother
+ * wasting an assertion table slot.
+ */
+bool Compiler::optAssertionVnInvolvesNan(AssertionDsc* assertion)
+{
+ if (optLocalAssertionProp)
+ {
+ return false;
+ }
+
+ static const int SZ = 2;
+ ValueNum vns[SZ] = {assertion->op1.vn, assertion->op2.vn};
+ for (int i = 0; i < SZ; ++i)
+ {
+ if (vnStore->IsVNConstant(vns[i]))
+ {
+ var_types type = vnStore->TypeOfVN(vns[i]);
+ if ((type == TYP_FLOAT && _isnan(vnStore->ConstantValue<float>(vns[i])) != 0) ||
+ (type == TYP_DOUBLE && _isnan(vnStore->ConstantValue<double>(vns[i])) != 0))
+ {
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+/*****************************************************************************
+ *
+ * Given an assertion add it to the assertion table
+ *
+ * If it is already in the assertion table return the assertionIndex that
+ * we use to refer to this element.
+ * Otherwise add it to the assertion table ad return the assertionIndex that
+ * we use to refer to this element.
+ * If we need to add to the table and the table is full return the value zero
+ */
+Compiler::AssertionIndex Compiler::optAddAssertion(AssertionDsc* newAssertion)
+{
+ noway_assert(newAssertion->assertionKind != OAK_INVALID);
+
+ // Even though the propagation step takes care of NaN, just a check
+ // to make sure there is no slot involving a NaN.
+ if (optAssertionVnInvolvesNan(newAssertion))
+ {
+ JITDUMP("Assertion involved Nan not adding\n");
+ return NO_ASSERTION_INDEX;
+ }
+
+ // Check if exists already, so we can skip adding new one. Search backwards.
+ for (AssertionIndex index = optAssertionCount; index >= 1; index--)
+ {
+ AssertionDsc* curAssertion = optGetAssertion(index);
+ if (curAssertion->Equals(newAssertion, !optLocalAssertionProp))
+ {
+ return index;
+ }
+ }
+
+ // Check if we are within max count.
+ if (optAssertionCount >= optMaxAssertionCount)
+ {
+ return NO_ASSERTION_INDEX;
+ }
+
+ optAssertionTabPrivate[optAssertionCount] = *newAssertion;
+ optAssertionCount++;
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("GenTreeNode creates assertion:\n");
+ gtDispTree(optAssertionPropCurrentTree, nullptr, nullptr, true);
+ printf(optLocalAssertionProp ? "In BB%02u New Local " : "In BB%02u New Global ", compCurBB->bbNum);
+ optPrintAssertion(newAssertion, optAssertionCount);
+ }
+#endif // DEBUG
+
+ // Assertion mask bits are [index + 1].
+ if (optLocalAssertionProp)
+ {
+ assert(newAssertion->op1.kind == O1K_LCLVAR);
+
+ // Mark the variables this index depends on
+ unsigned lclNum = newAssertion->op1.lcl.lclNum;
+ BitVecOps::AddElemD(apTraits, GetAssertionDep(lclNum), optAssertionCount - 1);
+ if (newAssertion->op2.kind == O2K_LCLVAR_COPY)
+ {
+ lclNum = newAssertion->op2.lcl.lclNum;
+ BitVecOps::AddElemD(apTraits, GetAssertionDep(lclNum), optAssertionCount - 1);
+ }
+ }
+ else
+ // If global assertion prop, then add it to the dependents map.
+ {
+ optAddVnAssertionMapping(newAssertion->op1.vn, optAssertionCount);
+ if (newAssertion->op2.kind == O2K_LCLVAR_COPY)
+ {
+ optAddVnAssertionMapping(newAssertion->op2.vn, optAssertionCount);
+ }
+ }
+
+#ifdef DEBUG
+ optDebugCheckAssertions(optAssertionCount);
+#endif
+ return optAssertionCount;
+}
+
+#ifdef DEBUG
+void Compiler::optDebugCheckAssertion(AssertionDsc* assertion)
+{
+ assert(assertion->assertionKind < OAK_COUNT);
+ assert(assertion->op1.kind < O1K_COUNT);
+ assert(assertion->op2.kind < O2K_COUNT);
+ // It would be good to check that op1.vn and op2.vn are valid value numbers.
+
+ switch (assertion->op1.kind)
+ {
+ case O1K_LCLVAR:
+ case O1K_EXACT_TYPE:
+ case O1K_SUBTYPE:
+ assert(assertion->op1.lcl.lclNum < lvaCount);
+ assert(optLocalAssertionProp || ((assertion->op1.lcl.ssaNum - SsaConfig::UNINIT_SSA_NUM) <
+ lvaTable[assertion->op1.lcl.lclNum].lvNumSsaNames));
+ break;
+ case O1K_ARR_BND:
+ // It would be good to check that bnd.vnIdx and bnd.vnLen are valid value numbers.
+ break;
+ case O1K_ARRLEN_OPER_BND:
+ case O1K_ARRLEN_LOOP_BND:
+ case O1K_CONSTANT_LOOP_BND:
+ case O1K_VALUE_NUMBER:
+ assert(!optLocalAssertionProp);
+ break;
+ default:
+ break;
+ }
+ switch (assertion->op2.kind)
+ {
+ case O2K_IND_CNS_INT:
+ case O2K_CONST_INT:
+ {
+ // The only flags that can be set are those in the GTF_ICON_HDL_MASK, or bit 0, which is
+ // used to indicate a long constant.
+ assert((assertion->op2.u1.iconFlags & ~(GTF_ICON_HDL_MASK | 1)) == 0);
+ switch (assertion->op1.kind)
+ {
+ case O1K_EXACT_TYPE:
+ case O1K_SUBTYPE:
+ assert(assertion->op2.u1.iconFlags != 0);
+ break;
+ case O1K_LCLVAR:
+ case O1K_ARR_BND:
+ assert((lvaTable[assertion->op1.lcl.lclNum].lvType != TYP_REF) || (assertion->op2.u1.iconVal == 0));
+ break;
+ case O1K_VALUE_NUMBER:
+ assert((vnStore->TypeOfVN(assertion->op1.vn) != TYP_REF) || (assertion->op2.u1.iconVal == 0));
+ break;
+ default:
+ break;
+ }
+ }
+ break;
+
+ default:
+ // for all other 'assertion->op2.kind' values we don't check anything
+ break;
+ }
+}
+
+/*****************************************************************************
+ *
+ * Verify that assertion prop related assumptions are valid. If "index"
+ * is 0 (i.e., NO_ASSERTION_INDEX) then verify all assertions in the table.
+ * If "index" is between 1 and optAssertionCount, then verify the assertion
+ * desc corresponding to "index."
+ */
+void Compiler::optDebugCheckAssertions(AssertionIndex index)
+{
+ AssertionIndex start = (index == NO_ASSERTION_INDEX) ? 1 : index;
+ AssertionIndex end = (index == NO_ASSERTION_INDEX) ? optAssertionCount : index;
+ for (AssertionIndex ind = start; ind <= end; ++ind)
+ {
+ AssertionDsc* assertion = optGetAssertion(ind);
+ optDebugCheckAssertion(assertion);
+ }
+}
+#endif
+
+/*****************************************************************************
+ *
+ * Given a "candidateAssertion", and the assertion operands op1 and op2,
+ * create a complementary assertion and add it to the assertion table,
+ * which can be retrieved using optFindComplementary(index)
+ *
+ */
+
+void Compiler::optCreateComplementaryAssertion(AssertionIndex assertionIndex, GenTreePtr op1, GenTreePtr op2)
+{
+ if (assertionIndex == NO_ASSERTION_INDEX)
+ {
+ return;
+ }
+
+ AssertionDsc& candidateAssertion = *optGetAssertion(assertionIndex);
+ if (candidateAssertion.op1.kind == O1K_ARRLEN_OPER_BND || candidateAssertion.op1.kind == O1K_ARRLEN_LOOP_BND ||
+ candidateAssertion.op1.kind == O1K_CONSTANT_LOOP_BND)
+ {
+ AssertionDsc dsc = candidateAssertion;
+ dsc.assertionKind = dsc.assertionKind == OAK_EQUAL ? OAK_NOT_EQUAL : OAK_EQUAL;
+ optAddAssertion(&dsc);
+ return;
+ }
+
+ if (candidateAssertion.assertionKind == OAK_EQUAL)
+ {
+ AssertionIndex index = optCreateAssertion(op1, op2, OAK_NOT_EQUAL);
+ optMapComplementary(index, assertionIndex);
+ }
+ else if (candidateAssertion.assertionKind == OAK_NOT_EQUAL)
+ {
+ AssertionIndex index = optCreateAssertion(op1, op2, OAK_EQUAL);
+ optMapComplementary(index, assertionIndex);
+ }
+
+ // Are we making a subtype or exact type assertion?
+ if ((candidateAssertion.op1.kind == O1K_SUBTYPE) || (candidateAssertion.op1.kind == O1K_EXACT_TYPE))
+ {
+ // Did we recieve helper call args?
+ if (op1->gtOper == GT_LIST)
+ {
+ op1 = op1->gtOp.gtOp1;
+ }
+ optCreateAssertion(op1, nullptr, OAK_NOT_EQUAL);
+ }
+}
+
+/*****************************************************************************
+ *
+ * Create assertions for jtrue operands. Given operands "op1" and "op2" that
+ * are used in a conditional evaluation of a jtrue stmt, create assertions
+ * for the operands.
+ */
+
+Compiler::AssertionIndex Compiler::optCreateJtrueAssertions(GenTreePtr op1,
+ GenTreePtr op2,
+ Compiler::optAssertionKind assertionKind)
+{
+ AssertionDsc candidateAssertion;
+ AssertionIndex assertionIndex = optCreateAssertion(op1, op2, assertionKind, &candidateAssertion);
+ // Don't bother if we don't have an assertion on the JTrue False path. Current implementation
+ // allows for a complementary only if there is an assertion on the False path (tree->HasAssertion()).
+ if (assertionIndex != NO_ASSERTION_INDEX)
+ {
+ optCreateComplementaryAssertion(assertionIndex, op1, op2);
+ }
+ return assertionIndex;
+}
+
+Compiler::AssertionIndex Compiler::optCreateJTrueBoundsAssertion(GenTreePtr tree)
+{
+ GenTreePtr relop = tree->gtGetOp1();
+ if ((relop->OperKind() & GTK_RELOP) == 0)
+ {
+ return NO_ASSERTION_INDEX;
+ }
+ GenTreePtr op1 = relop->gtGetOp1();
+ GenTreePtr op2 = relop->gtGetOp2();
+
+ ValueNum vn = op1->gtVNPair.GetConservative();
+ // Cases where op1 holds the condition with array arithmetic and op2 is 0.
+ // Loop condition like: "i < a.len +/-k == 0"
+ // Assertion: "i < a.len +/- k == 0"
+ if (vnStore->IsVNArrLenArithBound(vn) &&
+ op2->gtVNPair.GetConservative() == vnStore->VNZeroForType(op2->TypeGet()) &&
+ (relop->gtOper == GT_EQ || relop->gtOper == GT_NE))
+ {
+ AssertionDsc dsc;
+ dsc.assertionKind = relop->gtOper == GT_EQ ? OAK_EQUAL : OAK_NOT_EQUAL;
+ dsc.op1.kind = O1K_ARRLEN_OPER_BND;
+ dsc.op1.vn = vn;
+ dsc.op2.kind = O2K_CONST_INT;
+ dsc.op2.vn = vnStore->VNZeroForType(op2->TypeGet());
+ dsc.op2.u1.iconVal = 0;
+ dsc.op2.u1.iconFlags = 0;
+ AssertionIndex index = optAddAssertion(&dsc);
+ optCreateComplementaryAssertion(index, nullptr, nullptr);
+ return index;
+ }
+ // Cases where op1 holds the condition array length and op2 is 0.
+ // Loop condition like: "i < a.len == 0"
+ // Assertion: "i < a.len == false"
+ else if (vnStore->IsVNArrLenBound(vn) &&
+ (op2->gtVNPair.GetConservative() == vnStore->VNZeroForType(op2->TypeGet())) &&
+ (relop->gtOper == GT_EQ || relop->gtOper == GT_NE))
+ {
+ AssertionDsc dsc;
+ dsc.assertionKind = relop->gtOper == GT_EQ ? OAK_EQUAL : OAK_NOT_EQUAL;
+ dsc.op1.kind = O1K_ARRLEN_LOOP_BND;
+ dsc.op1.vn = vn;
+ dsc.op2.kind = O2K_CONST_INT;
+ dsc.op2.vn = vnStore->VNZeroForType(op2->TypeGet());
+ dsc.op2.u1.iconVal = 0;
+ dsc.op2.u1.iconFlags = 0;
+ AssertionIndex index = optAddAssertion(&dsc);
+ optCreateComplementaryAssertion(index, nullptr, nullptr);
+ return index;
+ }
+ // Cases where op1 holds the lhs of the condition op2 holds rhs.
+ // Loop condition like "i < a.len"
+ // Assertion: "i < a.len != 0"
+ else if (vnStore->IsVNArrLenBound(relop->gtVNPair.GetConservative()))
+ {
+ AssertionDsc dsc;
+ dsc.assertionKind = OAK_NOT_EQUAL;
+ dsc.op1.kind = O1K_ARRLEN_LOOP_BND;
+ dsc.op1.vn = relop->gtVNPair.GetConservative();
+ dsc.op2.kind = O2K_CONST_INT;
+ dsc.op2.vn = vnStore->VNZeroForType(TYP_INT);
+ dsc.op2.u1.iconVal = 0;
+ dsc.op2.u1.iconFlags = 0;
+ AssertionIndex index = optAddAssertion(&dsc);
+ optCreateComplementaryAssertion(index, nullptr, nullptr);
+ return index;
+ }
+ // Cases where op1 holds the condition bound check and op2 is 0.
+ // Loop condition like: "i < 100 == 0"
+ // Assertion: "i < 100 == false"
+ else if (vnStore->IsVNConstantBound(vn) &&
+ (op2->gtVNPair.GetConservative() == vnStore->VNZeroForType(op2->TypeGet())) &&
+ (relop->gtOper == GT_EQ || relop->gtOper == GT_NE))
+ {
+ AssertionDsc dsc;
+ dsc.assertionKind = relop->gtOper == GT_EQ ? OAK_EQUAL : OAK_NOT_EQUAL;
+ dsc.op1.kind = O1K_CONSTANT_LOOP_BND;
+ dsc.op1.vn = vn;
+ dsc.op2.kind = O2K_CONST_INT;
+ dsc.op2.vn = vnStore->VNZeroForType(op2->TypeGet());
+ dsc.op2.u1.iconVal = 0;
+ dsc.op2.u1.iconFlags = 0;
+ AssertionIndex index = optAddAssertion(&dsc);
+ optCreateComplementaryAssertion(index, nullptr, nullptr);
+ return index;
+ }
+ // Cases where op1 holds the lhs of the condition op2 holds rhs.
+ // Loop condition like "i < 100"
+ // Assertion: "i < 100 != 0"
+ else if (vnStore->IsVNConstantBound(relop->gtVNPair.GetConservative()))
+ {
+ AssertionDsc dsc;
+ dsc.assertionKind = OAK_NOT_EQUAL;
+ dsc.op1.kind = O1K_CONSTANT_LOOP_BND;
+ dsc.op1.vn = relop->gtVNPair.GetConservative();
+ dsc.op2.kind = O2K_CONST_INT;
+ dsc.op2.vn = vnStore->VNZeroForType(TYP_INT);
+ dsc.op2.u1.iconVal = 0;
+ dsc.op2.u1.iconFlags = 0;
+ AssertionIndex index = optAddAssertion(&dsc);
+ optCreateComplementaryAssertion(index, nullptr, nullptr);
+ return index;
+ }
+
+ return NO_ASSERTION_INDEX;
+}
+
+/*****************************************************************************
+ *
+ * Compute assertions for the JTrue node.
+ */
+Compiler::AssertionIndex Compiler::optAssertionGenJtrue(GenTreePtr tree)
+{
+ // Only create assertions for JTRUE when we are in the global phase
+ if (optLocalAssertionProp)
+ {
+ return NO_ASSERTION_INDEX;
+ }
+
+ GenTreePtr relop = tree->gtOp.gtOp1;
+ if ((relop->OperKind() & GTK_RELOP) == 0)
+ {
+ return NO_ASSERTION_INDEX;
+ }
+
+ Compiler::optAssertionKind assertionKind = OAK_INVALID;
+
+ GenTreePtr op1 = relop->gtOp.gtOp1;
+ GenTreePtr op2 = relop->gtOp.gtOp2;
+
+ AssertionIndex index = optCreateJTrueBoundsAssertion(tree);
+ if (index != NO_ASSERTION_INDEX)
+ {
+ return index;
+ }
+
+ // Find assertion kind.
+ switch (relop->gtOper)
+ {
+ case GT_EQ:
+ assertionKind = OAK_EQUAL;
+ break;
+ case GT_NE:
+ assertionKind = OAK_NOT_EQUAL;
+ break;
+ default:
+ // TODO-CQ: add other relop operands. Disabled for now to measure perf
+ // and not occupy assertion table slots. We'll add them when used.
+ return NO_ASSERTION_INDEX;
+ }
+
+ // Check for op1 or op2 to be lcl var and if so, keep it in op1.
+ if ((op1->gtOper != GT_LCL_VAR) && (op2->gtOper == GT_LCL_VAR))
+ {
+ jitstd::swap(op1, op2);
+ }
+ // If op1 is lcl and op2 is const or lcl, create assertion.
+ if ((op1->gtOper == GT_LCL_VAR) &&
+ ((op2->OperKind() & GTK_CONST) || (op2->gtOper == GT_LCL_VAR))) // Fix for Dev10 851483
+ {
+ return optCreateJtrueAssertions(op1, op2, assertionKind);
+ }
+
+ // Check op1 and op2 for an indirection of a GT_LCL_VAR and keep it in op1.
+ if (((op1->gtOper != GT_IND) || (op1->gtOp.gtOp1->gtOper != GT_LCL_VAR)) &&
+ ((op2->gtOper == GT_IND) && (op2->gtOp.gtOp1->gtOper == GT_LCL_VAR)))
+ {
+ jitstd::swap(op1, op2);
+ }
+ // If op1 is ind, then extract op1's oper.
+ if ((op1->gtOper == GT_IND) && (op1->gtOp.gtOp1->gtOper == GT_LCL_VAR))
+ {
+ return optCreateJtrueAssertions(op1, op2, assertionKind);
+ }
+
+ // Look for a call to an IsInstanceOf helper compared to a nullptr
+ if ((op2->gtOper != GT_CNS_INT) && (op1->gtOper == GT_CNS_INT))
+ {
+ jitstd::swap(op1, op2);
+ }
+ // Validate op1 and op2
+ if ((op1->gtOper != GT_CALL) || (op1->gtCall.gtCallType != CT_HELPER) || (op1->TypeGet() != TYP_REF) || // op1
+ (op2->gtOper != GT_CNS_INT) || (op2->gtIntCon.gtIconVal != 0)) // op2
+ {
+ return NO_ASSERTION_INDEX;
+ }
+ if (op1->gtCall.gtCallMethHnd != eeFindHelper(CORINFO_HELP_ISINSTANCEOFINTERFACE) &&
+ op1->gtCall.gtCallMethHnd != eeFindHelper(CORINFO_HELP_ISINSTANCEOFARRAY) &&
+ op1->gtCall.gtCallMethHnd != eeFindHelper(CORINFO_HELP_ISINSTANCEOFCLASS) &&
+ op1->gtCall.gtCallMethHnd != eeFindHelper(CORINFO_HELP_ISINSTANCEOFANY))
+ {
+ return NO_ASSERTION_INDEX;
+ }
+
+ op2 = op1->gtCall.gtCallLateArgs->gtOp.gtOp2;
+ op1 = op1->gtCall.gtCallLateArgs;
+
+ // Reverse the assertion
+ assert(assertionKind == OAK_EQUAL || assertionKind == OAK_NOT_EQUAL);
+ assertionKind = (assertionKind == OAK_EQUAL) ? OAK_NOT_EQUAL : OAK_EQUAL;
+
+ if (op1->gtOp.gtOp1->gtOper == GT_LCL_VAR)
+ {
+ return optCreateJtrueAssertions(op1, op2, assertionKind);
+ }
+
+ return NO_ASSERTION_INDEX;
+}
+
+/*****************************************************************************
+ *
+ * Create an assertion on the phi node if some information can be gleaned
+ * from all of the constituent phi operands.
+ *
+ */
+Compiler::AssertionIndex Compiler::optAssertionGenPhiDefn(GenTreePtr tree)
+{
+ if (!tree->IsPhiDefn())
+ {
+ return NO_ASSERTION_INDEX;
+ }
+
+ GenTreePtr phi = tree->gtOp.gtOp2;
+
+ // Try to find if all phi arguments are known to be non-null.
+ bool isNonNull = true;
+ for (GenTreeArgList* args = phi->gtOp.gtOp1->AsArgList(); args != nullptr; args = args->Rest())
+ {
+ if (!vnStore->IsKnownNonNull(args->Current()->gtVNPair.GetConservative()))
+ {
+ isNonNull = false;
+ break;
+ }
+ }
+
+ // All phi arguments are non-null implies phi rhs is non-null.
+ if (isNonNull)
+ {
+ return optCreateAssertion(tree->gtOp.gtOp1, nullptr, OAK_NOT_EQUAL);
+ }
+ return NO_ASSERTION_INDEX;
+}
+
+/*****************************************************************************
+ *
+ * If this statement creates a value assignment or assertion
+ * then assign an index to the given value assignment by adding
+ * it to the lookup table, if necessary.
+ */
+void Compiler::optAssertionGen(GenTreePtr tree)
+{
+ tree->ClearAssertion();
+
+ if (tree->gtFlags & GTF_COLON_COND)
+ {
+ return;
+ }
+
+#ifdef DEBUG
+ optAssertionPropCurrentTree = tree;
+#endif
+
+ // For most of the assertions that we create below
+ // the assertion is true after the tree is processed
+ bool assertionProven = true;
+ AssertionIndex assertionIndex = NO_ASSERTION_INDEX;
+ switch (tree->gtOper)
+ {
+ case GT_ASG:
+ // VN takes care of non local assertions for assignments and data flow.
+ // TODO-1stClassStructs: Enable assertion prop for struct types.
+ if (varTypeIsStruct(tree))
+ {
+ // Do nothing.
+ }
+ else if (optLocalAssertionProp)
+ {
+ assertionIndex = optCreateAssertion(tree->gtOp.gtOp1, tree->gtOp.gtOp2, OAK_EQUAL);
+ }
+ else
+ {
+ assertionIndex = optAssertionGenPhiDefn(tree);
+ }
+ break;
+
+ case GT_OBJ:
+ case GT_BLK:
+ case GT_DYN_BLK:
+ // TODO-1stClassStructs: These should always be considered to create a non-null
+ // assertion, but previously, when these indirections were implicit due to a block
+ // copy or init, they were not being considered to do so.
+ break;
+ case GT_IND:
+ // TODO-1stClassStructs: All indirections should be considered to create a non-null
+ // assertion, but previously, when these indirections were implicit due to a block
+ // copy or init, they were not being considered to do so.
+ if (tree->gtType == TYP_STRUCT)
+ {
+ GenTree* parent = tree->gtGetParent(nullptr);
+ if ((parent != nullptr) && (parent->gtOper == GT_ASG))
+ {
+ break;
+ }
+ }
+ case GT_NULLCHECK:
+ case GT_ARR_LENGTH:
+ // An array length can create a non-null assertion
+ assertionIndex = optCreateAssertion(tree->gtOp.gtOp1, nullptr, OAK_NOT_EQUAL);
+ break;
+
+ case GT_ARR_BOUNDS_CHECK:
+ if (!optLocalAssertionProp)
+ {
+ assertionIndex = optCreateAssertion(tree, nullptr, OAK_NO_THROW);
+ }
+ break;
+
+ case GT_ARR_ELEM:
+ // An array element reference can create a non-null assertion
+ assertionIndex = optCreateAssertion(tree->gtArrElem.gtArrObj, nullptr, OAK_NOT_EQUAL);
+ break;
+
+ case GT_CALL:
+ // A virtual call can create a non-null assertion. We transform some virtual calls into non-virtual calls
+ // with a GTF_CALL_NULLCHECK flag set.
+ if ((tree->gtFlags & GTF_CALL_NULLCHECK) || ((tree->gtFlags & GTF_CALL_VIRT_KIND_MASK) != GTF_CALL_NONVIRT))
+ {
+ // Retrieve the 'this' arg
+ GenTreePtr thisArg = gtGetThisArg(tree);
+#if defined(_TARGET_X86_) || defined(_TARGET_AMD64_) || defined(_TARGET_ARM_)
+ if (thisArg == nullptr)
+ {
+ // For tail calls we lose the this pointer in the argument list but that's OK because a null check
+ // was made explicit, so we get the assertion when we walk the GT_IND in the argument list.
+ noway_assert(tree->gtCall.IsTailCall());
+ break;
+ }
+#endif // _TARGET_X86_ || _TARGET_AMD64_ || _TARGET_ARM_
+ noway_assert(thisArg != nullptr);
+ assertionIndex = optCreateAssertion(thisArg, nullptr, OAK_NOT_EQUAL);
+ }
+ break;
+
+ case GT_CAST:
+ // We only create this assertion for global assertion prop
+ if (!optLocalAssertionProp)
+ {
+ // This represets an assertion that we would like to prove to be true. It is not actually a true
+ // assertion.
+ // If we can prove this assertion true then we can eliminate this cast.
+ assertionIndex = optCreateAssertion(tree->gtOp.gtOp1, tree, OAK_SUBRANGE);
+ assertionProven = false;
+ }
+ break;
+
+ case GT_JTRUE:
+ assertionIndex = optAssertionGenJtrue(tree);
+ break;
+
+ default:
+ // All other gtOper node kinds, leave 'assertionIndex' = NO_ASSERTION_INDEX
+ break;
+ }
+
+ // For global assertion prop we must store the assertion number in the tree node
+ if ((assertionIndex != NO_ASSERTION_INDEX) && assertionProven && !optLocalAssertionProp)
+ {
+ tree->SetAssertion(assertionIndex);
+ }
+}
+
+/*****************************************************************************
+ *
+ * Maps a complementary assertion to its original assertion so it can be
+ * retrieved faster.
+ */
+void Compiler::optMapComplementary(AssertionIndex assertionIndex, AssertionIndex index)
+{
+ if (assertionIndex == NO_ASSERTION_INDEX || index == NO_ASSERTION_INDEX)
+ {
+ return;
+ }
+ optComplementaryAssertionMap[assertionIndex] = index;
+ optComplementaryAssertionMap[index] = assertionIndex;
+}
+
+/*****************************************************************************
+ *
+ * Given an assertion index, return the assertion index of the complementary
+ * assertion or 0 if one does not exist.
+ */
+Compiler::AssertionIndex Compiler::optFindComplementary(AssertionIndex assertIndex)
+{
+ if (assertIndex == NO_ASSERTION_INDEX)
+ {
+ return NO_ASSERTION_INDEX;
+ }
+ AssertionDsc* inputAssertion = optGetAssertion(assertIndex);
+
+ // Must be an equal or not equal assertion.
+ if (inputAssertion->assertionKind != OAK_EQUAL && inputAssertion->assertionKind != OAK_NOT_EQUAL)
+ {
+ return NO_ASSERTION_INDEX;
+ }
+
+ AssertionIndex index = optComplementaryAssertionMap[assertIndex];
+ if (index != NO_ASSERTION_INDEX && index <= optAssertionCount)
+ {
+ return index;
+ }
+
+ optAssertionKind complementaryAssertionKind =
+ (inputAssertion->assertionKind == OAK_EQUAL) ? OAK_NOT_EQUAL : OAK_EQUAL;
+ for (AssertionIndex index = 1; index <= optAssertionCount; ++index)
+ {
+ // Make sure assertion kinds are complementary and op1, op2 kinds match.
+ AssertionDsc* curAssertion = optGetAssertion(index);
+ if (curAssertion->Complementary(inputAssertion, !optLocalAssertionProp))
+ {
+ optMapComplementary(assertIndex, index);
+ return index;
+ }
+ }
+ return NO_ASSERTION_INDEX;
+}
+
+/*****************************************************************************
+ *
+ * Given a lclNum and a toType, return assertion index of the assertion that
+ * claims that a variable's value is always a valid subrange of toType.
+ * Thus we can discard or omit a cast to toType. Returns NO_ASSERTION_INDEX
+ * if one such assertion could not be found in "assertions."
+ */
+
+Compiler::AssertionIndex Compiler::optAssertionIsSubrange(GenTreePtr tree,
+ var_types toType,
+ ASSERT_VALARG_TP assertions)
+{
+ if (!optLocalAssertionProp && BitVecOps::IsEmpty(apTraits, assertions))
+ {
+ return NO_ASSERTION_INDEX;
+ }
+
+ for (AssertionIndex index = 1; index <= optAssertionCount; index++)
+ {
+ AssertionDsc* curAssertion = optGetAssertion(index);
+ if ((optLocalAssertionProp ||
+ BitVecOps::IsMember(apTraits, assertions, index - 1)) && // either local prop or use propagated assertions
+ (curAssertion->assertionKind == OAK_SUBRANGE) &&
+ (curAssertion->op1.kind == O1K_LCLVAR))
+ {
+ // For local assertion prop use comparison on locals, and use comparison on vns for global prop.
+ bool isEqual = optLocalAssertionProp ? (curAssertion->op1.lcl.lclNum == tree->AsLclVarCommon()->GetLclNum())
+ : (curAssertion->op1.vn == tree->gtVNPair.GetConservative());
+ if (!isEqual)
+ {
+ continue;
+ }
+
+ // Make sure the toType is within current assertion's bounds.
+ switch (toType)
+ {
+ case TYP_BYTE:
+ case TYP_UBYTE:
+ case TYP_SHORT:
+ case TYP_USHORT:
+ case TYP_CHAR:
+ if ((curAssertion->op2.u2.loBound < AssertionDsc::GetLowerBoundForIntegralType(toType)) ||
+ (curAssertion->op2.u2.hiBound > AssertionDsc::GetUpperBoundForIntegralType(toType)))
+ {
+ continue;
+ }
+ break;
+
+ case TYP_UINT:
+ if (curAssertion->op2.u2.loBound < AssertionDsc::GetLowerBoundForIntegralType(toType))
+ {
+ continue;
+ }
+ break;
+
+ case TYP_INT:
+ break;
+
+ default:
+ continue;
+ }
+ return index;
+ }
+ }
+ return NO_ASSERTION_INDEX;
+}
+
+/**********************************************************************************
+ *
+ * Given a "tree" that is usually arg1 of a isinst/cast kind of GT_CALL (a class
+ * handle), and "methodTableArg" which is a const int (a class handle), then search
+ * if there is an assertion in "assertions", that asserts the equality of the two
+ * class handles and then returns the index of the assertion. If one such assertion
+ * could not be found, then it returns NO_ASSERTION_INDEX.
+ *
+ */
+Compiler::AssertionIndex Compiler::optAssertionIsSubtype(GenTreePtr tree,
+ GenTreePtr methodTableArg,
+ ASSERT_VALARG_TP assertions)
+{
+ if (!optLocalAssertionProp && BitVecOps::IsEmpty(apTraits, assertions))
+ {
+ return NO_ASSERTION_INDEX;
+ }
+ for (AssertionIndex index = 1; index <= optAssertionCount; index++)
+ {
+ if (!optLocalAssertionProp && !BitVecOps::IsMember(apTraits, assertions, index - 1))
+ {
+ continue;
+ }
+
+ AssertionDsc* curAssertion = optGetAssertion(index);
+ if (curAssertion->assertionKind != OAK_EQUAL ||
+ (curAssertion->op1.kind != O1K_SUBTYPE && curAssertion->op1.kind != O1K_EXACT_TYPE))
+ {
+ continue;
+ }
+
+ // If local assertion prop use "lcl" based comparison, if global assertion prop use vn based comparison.
+ if ((optLocalAssertionProp) ? (curAssertion->op1.lcl.lclNum != tree->AsLclVarCommon()->GetLclNum())
+ : (curAssertion->op1.vn != tree->gtVNPair.GetConservative()))
+ {
+ continue;
+ }
+
+ if (curAssertion->op2.kind == O2K_IND_CNS_INT)
+ {
+ if (methodTableArg->gtOper != GT_IND)
+ {
+ continue;
+ }
+ methodTableArg = methodTableArg->gtOp.gtOp1;
+ }
+ else if (curAssertion->op2.kind != O2K_CONST_INT)
+ {
+ continue;
+ }
+
+ ssize_t methodTableVal = 0;
+ unsigned iconFlags = 0;
+ if (!optIsTreeKnownIntValue(!optLocalAssertionProp, methodTableArg, &methodTableVal, &iconFlags))
+ {
+ continue;
+ }
+
+ if (curAssertion->op2.u1.iconVal == methodTableVal)
+ {
+ return index;
+ }
+ }
+ return NO_ASSERTION_INDEX;
+}
+
+//------------------------------------------------------------------------------
+// optVNConstantPropOnTree: Substitutes tree with an evaluated constant while
+// managing ref-counts and side-effects.
+//
+// Arguments:
+// block - The block containing the tree.
+// stmt - The statement in the block containing the tree.
+// tree - The tree node whose value is known at compile time.
+// The tree should have a constant value number.
+//
+// Return Value:
+// Returns a potentially new or a transformed tree node.
+// Returns nullptr when no transformation is possible.
+//
+// Description:
+// Transforms a tree node if its result evaluates to a constant. The
+// transformation can be a "ChangeOper" to a constant or a new constant node
+// with extracted side-effects.
+//
+// Before replacing or substituting the "tree" with a constant, extracts any
+// side effects from the "tree" and creates a comma separated side effect list
+// and then appends the transformed node at the end of the list.
+// This comma separated list is then returned.
+//
+// For JTrue nodes, side effects are not put into a comma separated list. If
+// the relop will evaluate to "true" or "false" statically, then the side-effects
+// will be put into new statements, presuming the JTrue will be folded away.
+//
+// The ref-counts of any variables in the tree being replaced, will be
+// appropriately decremented. The ref-counts of variables in the side-effect
+// nodes will be retained.
+//
+GenTreePtr Compiler::optVNConstantPropOnTree(BasicBlock* block, GenTreePtr stmt, GenTreePtr tree)
+{
+ if (tree->OperGet() == GT_JTRUE)
+ {
+ // Treat JTRUE separately to extract side effects into respective statements rather
+ // than using a COMMA separated op1.
+ return optVNConstantPropOnJTrue(block, stmt, tree);
+ }
+ // If relop is part of JTRUE, this should be optimized as part of the parent JTRUE.
+ // Or if relop is part of QMARK or anything else, we simply bail here.
+ else if (tree->OperIsCompare() && (tree->gtFlags & GTF_RELOP_JMP_USED))
+ {
+ return nullptr;
+ }
+
+ ValueNum vnCns = tree->gtVNPair.GetConservative();
+ ValueNum vnLib = tree->gtVNPair.GetLiberal();
+
+ // Check if node evaluates to a constant.
+ if (!vnStore->IsVNConstant(vnCns))
+ {
+ return nullptr;
+ }
+
+ GenTreePtr newTree = tree;
+ GenTreePtr sideEffList = nullptr;
+ switch (vnStore->TypeOfVN(vnCns))
+ {
+ case TYP_FLOAT:
+ {
+ float value = vnStore->ConstantValue<float>(vnCns);
+
+ if (tree->TypeGet() == TYP_INT)
+ {
+ // Same sized reinterpretation of bits to integer
+ newTree = optPrepareTreeForReplacement(tree, tree);
+ tree->ChangeOperConst(GT_CNS_INT);
+ tree->gtIntCon.gtIconVal = *(reinterpret_cast<int*>(&value));
+ tree->gtVNPair = ValueNumPair(vnLib, vnCns);
+ }
+ else
+ {
+ // Implicit assignment conversion to float or double
+ assert(varTypeIsFloating(tree->TypeGet()));
+
+ newTree = optPrepareTreeForReplacement(tree, tree);
+ tree->ChangeOperConst(GT_CNS_DBL);
+ tree->gtDblCon.gtDconVal = value;
+ tree->gtVNPair = ValueNumPair(vnLib, vnCns);
+ }
+ break;
+ }
+
+ case TYP_DOUBLE:
+ {
+ double value = vnStore->ConstantValue<double>(vnCns);
+
+ if (tree->TypeGet() == TYP_LONG)
+ {
+ // Same sized reinterpretation of bits to long
+ newTree = optPrepareTreeForReplacement(tree, tree);
+ tree->ChangeOperConst(GT_CNS_NATIVELONG);
+ tree->gtIntConCommon.SetLngValue(*(reinterpret_cast<INT64*>(&value)));
+ tree->gtVNPair = ValueNumPair(vnLib, vnCns);
+ }
+ else
+ {
+ // Implicit assignment conversion to float or double
+ assert(varTypeIsFloating(tree->TypeGet()));
+
+ newTree = optPrepareTreeForReplacement(tree, tree);
+ tree->ChangeOperConst(GT_CNS_DBL);
+ tree->gtDblCon.gtDconVal = value;
+ tree->gtVNPair = ValueNumPair(vnLib, vnCns);
+ }
+ break;
+ }
+
+ case TYP_LONG:
+ {
+ INT64 value = vnStore->ConstantValue<INT64>(vnCns);
+#ifdef _TARGET_64BIT_
+ if (vnStore->IsVNHandle(vnCns))
+ {
+#ifdef RELOC_SUPPORT
+ // Don't perform constant folding that involves a handle that needs
+ // to be recorded as a relocation with the VM.
+ if (!opts.compReloc)
+#endif
+ {
+ newTree = gtNewIconHandleNode(value, vnStore->GetHandleFlags(vnCns));
+ newTree->gtVNPair = ValueNumPair(vnLib, vnCns);
+ newTree = optPrepareTreeForReplacement(tree, newTree);
+ }
+ }
+ else
+#endif
+ {
+ switch (tree->TypeGet())
+ {
+ case TYP_INT:
+ // Implicit assignment conversion to smaller integer
+ newTree = optPrepareTreeForReplacement(tree, tree);
+ tree->ChangeOperConst(GT_CNS_INT);
+ tree->gtIntCon.gtIconVal = (int)value;
+ tree->gtVNPair = ValueNumPair(vnLib, vnCns);
+ break;
+
+ case TYP_LONG:
+ // Same type no conversion required
+ newTree = optPrepareTreeForReplacement(tree, tree);
+ tree->ChangeOperConst(GT_CNS_NATIVELONG);
+ tree->gtIntConCommon.SetLngValue(value);
+ tree->gtVNPair = ValueNumPair(vnLib, vnCns);
+ break;
+
+ case TYP_FLOAT:
+ // No implicit conversions from long to float and value numbering will
+ // not propagate through memory reinterpretations of different size.
+ unreached();
+ break;
+
+ case TYP_DOUBLE:
+ // Same sized reinterpretation of bits to double
+ newTree = optPrepareTreeForReplacement(tree, tree);
+ tree->ChangeOperConst(GT_CNS_DBL);
+ tree->gtDblCon.gtDconVal = *(reinterpret_cast<double*>(&value));
+ tree->gtVNPair = ValueNumPair(vnLib, vnCns);
+ break;
+
+ default:
+ return nullptr;
+ }
+ }
+ }
+ break;
+
+ case TYP_REF:
+ if (tree->TypeGet() != TYP_REF)
+ {
+ return nullptr;
+ }
+
+ assert(vnStore->ConstantValue<size_t>(vnCns) == 0);
+ newTree = optPrepareTreeForReplacement(tree, tree);
+ tree->ChangeOperConst(GT_CNS_INT);
+ tree->gtIntCon.gtIconVal = 0;
+ tree->ClearIconHandleMask();
+ tree->gtVNPair = ValueNumPair(vnLib, vnCns);
+ break;
+
+ case TYP_INT:
+ {
+ int value = vnStore->ConstantValue<int>(vnCns);
+#ifndef _TARGET_64BIT_
+ if (vnStore->IsVNHandle(vnCns))
+ {
+#ifdef RELOC_SUPPORT
+ // Don't perform constant folding that involves a handle that needs
+ // to be recorded as a relocation with the VM.
+ if (!opts.compReloc)
+#endif
+ {
+ newTree = gtNewIconHandleNode(value, vnStore->GetHandleFlags(vnCns));
+ newTree->gtVNPair = ValueNumPair(vnLib, vnCns);
+ newTree = optPrepareTreeForReplacement(tree, newTree);
+ }
+ }
+ else
+#endif
+ {
+ switch (tree->TypeGet())
+ {
+ case TYP_REF:
+ case TYP_INT:
+ // Same type no conversion required
+ newTree = optPrepareTreeForReplacement(tree, tree);
+ tree->ChangeOperConst(GT_CNS_INT);
+ tree->gtIntCon.gtIconVal = value;
+ tree->ClearIconHandleMask();
+ tree->gtVNPair = ValueNumPair(vnLib, vnCns);
+ break;
+
+ case TYP_LONG:
+ // Implicit assignment conversion to larger integer
+ newTree = optPrepareTreeForReplacement(tree, tree);
+ tree->ChangeOperConst(GT_CNS_NATIVELONG);
+ tree->gtIntConCommon.SetLngValue(value);
+ tree->gtVNPair = ValueNumPair(vnLib, vnCns);
+ break;
+
+ case TYP_FLOAT:
+ // Same sized reinterpretation of bits to float
+ newTree = optPrepareTreeForReplacement(tree, tree);
+ tree->ChangeOperConst(GT_CNS_DBL);
+ tree->gtDblCon.gtDconVal = *(reinterpret_cast<float*>(&value));
+ tree->gtVNPair = ValueNumPair(vnLib, vnCns);
+ break;
+
+ case TYP_DOUBLE:
+ // No implicit conversions from int to double and value numbering will
+ // not propagate through memory reinterpretations of different size.
+ unreached();
+ break;
+
+ default:
+ return nullptr;
+ }
+ }
+ }
+ break;
+
+ default:
+ return nullptr;
+ }
+ return newTree;
+}
+
+/*******************************************************************************************************
+ *
+ * Perform constant propagation on a tree given the "curAssertion" is true at the point of the "tree."
+ *
+ */
+GenTreePtr Compiler::optConstantAssertionProp(AssertionDsc* curAssertion,
+ GenTreePtr tree,
+ GenTreePtr stmt DEBUGARG(AssertionIndex index))
+{
+ unsigned lclNum = tree->gtLclVarCommon.gtLclNum;
+
+ if (lclNumIsCSE(lclNum))
+ {
+ return nullptr;
+ }
+
+ GenTreePtr newTree = tree;
+
+ // Update 'newTree' with the new value from our table
+ // Typically newTree == tree and we are updating the node in place
+ switch (curAssertion->op2.kind)
+ {
+ case O2K_CONST_DOUBLE:
+ // There could be a positive zero and a negative zero, so don't propagate zeroes.
+ if (curAssertion->op2.dconVal == 0.0)
+ {
+ return nullptr;
+ }
+ newTree->ChangeOperConst(GT_CNS_DBL);
+ newTree->gtDblCon.gtDconVal = curAssertion->op2.dconVal;
+ break;
+
+ case O2K_CONST_LONG:
+ if (newTree->gtType == TYP_LONG)
+ {
+ newTree->ChangeOperConst(GT_CNS_NATIVELONG);
+ newTree->gtIntConCommon.SetLngValue(curAssertion->op2.lconVal);
+ }
+ else
+ {
+ newTree->ChangeOperConst(GT_CNS_INT);
+ newTree->gtIntCon.gtIconVal = (int)curAssertion->op2.lconVal;
+ newTree->gtType = TYP_INT;
+ }
+ break;
+
+ case O2K_CONST_INT:
+ if (curAssertion->op2.u1.iconFlags & GTF_ICON_HDL_MASK)
+ {
+ // Here we have to allocate a new 'large' node to replace the old one
+ newTree = gtNewIconHandleNode(curAssertion->op2.u1.iconVal,
+ curAssertion->op2.u1.iconFlags & GTF_ICON_HDL_MASK);
+ }
+ else
+ {
+ bool isArrIndex = ((tree->gtFlags & GTF_VAR_ARR_INDEX) != 0);
+ newTree->ChangeOperConst(GT_CNS_INT);
+ newTree->gtIntCon.gtIconVal = curAssertion->op2.u1.iconVal;
+ newTree->ClearIconHandleMask();
+ // If we're doing an array index address, assume any constant propagated contributes to the index.
+ if (isArrIndex)
+ {
+ newTree->gtIntCon.gtFieldSeq =
+ GetFieldSeqStore()->CreateSingleton(FieldSeqStore::ConstantIndexPseudoField);
+ }
+ newTree->gtFlags &= ~GTF_VAR_ARR_INDEX;
+ }
+
+ // Constant ints are of type TYP_INT, not any of the short forms.
+ if (varTypeIsIntegral(newTree->TypeGet()))
+ {
+#ifdef _TARGET_64BIT_
+ var_types newType = (var_types)((curAssertion->op2.u1.iconFlags & 1) ? TYP_LONG : TYP_INT);
+ if (newTree->TypeGet() != newType)
+ {
+ noway_assert(newTree->gtType != TYP_REF);
+ newTree->gtType = newType;
+ }
+#else
+ if (newTree->TypeGet() != TYP_INT)
+ {
+ noway_assert(newTree->gtType != TYP_REF && newTree->gtType != TYP_LONG);
+ newTree->gtType = TYP_INT;
+ }
+#endif
+ }
+ break;
+
+ default:
+ return nullptr;
+ }
+
+ if (!optLocalAssertionProp)
+ {
+ assert(newTree->OperIsConst()); // We should have a simple Constant node for newTree
+ assert(vnStore->IsVNConstant(curAssertion->op2.vn)); // The value number stored for op2 should be a valid
+ // VN representing the constant
+ newTree->gtVNPair.SetBoth(curAssertion->op2.vn); // Set the ValueNumPair to the constant VN from op2
+ // of the assertion
+ }
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nAssertion prop in BB%02u:\n", compCurBB->bbNum);
+ optPrintAssertion(curAssertion, index);
+ gtDispTree(newTree, nullptr, nullptr, true);
+ }
+#endif
+ if (lvaLocalVarRefCounted)
+ {
+ lvaTable[lclNum].decRefCnts(compCurBB->getBBWeight(this), this);
+ }
+
+ return optAssertionProp_Update(newTree, tree, stmt);
+}
+
+/*******************************************************************************************************
+ *
+ * Called in the context of an existing copy assertion which makes an "==" assertion on "lclVar" and
+ * "copyVar." Before substituting "copyVar" for "lclVar", we make sure using "copy" doesn't widen access.
+ *
+ */
+bool Compiler::optAssertionProp_LclVarTypeCheck(GenTreePtr tree, LclVarDsc* lclVarDsc, LclVarDsc* copyVarDsc)
+{
+ /*
+ Small struct field locals are stored using the exact width and loaded widened
+ (i.e. lvNormalizeOnStore==false lvNormalizeOnLoad==true),
+ because the field locals might end up embedded in the parent struct local with the exact width.
+
+ In other words, a store to a short field local should always done using an exact width store
+
+ [00254538] 0x0009 ------------ const int 0x1234
+ [002545B8] 0x000B -A--G--NR--- = short
+ [00254570] 0x000A D------N---- lclVar short V43 tmp40
+
+ mov word ptr [L_043], 0x1234
+
+ Now, if we copy prop, say a short field local V43, to another short local V34
+ for the following tree:
+
+ [04E18650] 0x0001 ------------ lclVar int V34 tmp31
+ [04E19714] 0x0002 -A---------- = int
+ [04E196DC] 0x0001 D------N---- lclVar int V36 tmp33
+
+ We will end with this tree:
+
+ [04E18650] 0x0001 ------------ lclVar int V43 tmp40
+ [04E19714] 0x0002 -A-----NR--- = int
+ [04E196DC] 0x0001 D------N---- lclVar int V36 tmp33 EAX
+
+ And eventually causing a fetch of 4-byte out from [L_043] :(
+ mov EAX, dword ptr [L_043]
+
+ The following check is to make sure we only perform the copy prop
+ when we don't retrieve the wider value.
+ */
+
+ if (copyVarDsc->lvIsStructField)
+ {
+ var_types varType = (var_types)copyVarDsc->lvType;
+ // Make sure we don't retrieve the wider value.
+ return !varTypeIsSmall(varType) || (varType == tree->TypeGet());
+ }
+ // Called in the context of a single copy assertion, so the types should have been
+ // taken care by the assertion gen logic for other cases. Just return true.
+ return true;
+}
+
+/**********************************************************************************
+ *
+ * Perform copy assertion propagation when the lclNum and ssaNum of the "tree" match
+ * the "curAssertion."
+ *
+ */
+GenTreePtr Compiler::optCopyAssertionProp(AssertionDsc* curAssertion,
+ GenTreePtr tree,
+ GenTreePtr stmt DEBUGARG(AssertionIndex index))
+{
+ const AssertionDsc::AssertionDscOp1& op1 = curAssertion->op1;
+ const AssertionDsc::AssertionDscOp2& op2 = curAssertion->op2;
+
+ noway_assert(op1.lcl.lclNum != op2.lcl.lclNum);
+
+ unsigned lclNum = tree->gtLclVarCommon.GetLclNum();
+
+ // Make sure one of the lclNum of the assertion matches with that of the tree.
+ if (op1.lcl.lclNum != lclNum && op2.lcl.lclNum != lclNum)
+ {
+ return nullptr;
+ }
+
+ // Extract the matching lclNum and ssaNum.
+ unsigned copyLclNum = (op1.lcl.lclNum == lclNum) ? op2.lcl.lclNum : op1.lcl.lclNum;
+ unsigned copySsaNum = BAD_VAR_NUM;
+ if (!optLocalAssertionProp)
+ {
+ // Extract the ssaNum of the matching lclNum.
+ unsigned ssaNum = (op1.lcl.lclNum == lclNum) ? op1.lcl.ssaNum : op2.lcl.ssaNum;
+ copySsaNum = (op1.lcl.lclNum == lclNum) ? op2.lcl.ssaNum : op1.lcl.ssaNum;
+
+ if (ssaNum != tree->AsLclVarCommon()->GetSsaNum())
+ {
+ return nullptr;
+ }
+ }
+
+ LclVarDsc* copyVarDsc = &lvaTable[copyLclNum];
+ LclVarDsc* lclVarDsc = &lvaTable[lclNum];
+
+ // Make sure the types are compatible.
+ if (!optAssertionProp_LclVarTypeCheck(tree, lclVarDsc, copyVarDsc))
+ {
+ return nullptr;
+ }
+
+ // Make sure we can perform this copy prop.
+ if (optCopyProp_LclVarScore(lclVarDsc, copyVarDsc, curAssertion->op1.lcl.lclNum == lclNum) <= 0)
+ {
+ return nullptr;
+ }
+
+ // If global assertion prop, by now we should have ref counts, fix them.
+ if (lvaLocalVarRefCounted)
+ {
+ lvaTable[lclNum].decRefCnts(compCurBB->getBBWeight(this), this);
+ lvaTable[copyLclNum].incRefCnts(compCurBB->getBBWeight(this), this);
+ tree->gtLclVarCommon.SetSsaNum(copySsaNum);
+ }
+ tree->gtLclVarCommon.SetLclNum(copyLclNum);
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nAssertion prop in BB%02u:\n", compCurBB->bbNum);
+ optPrintAssertion(curAssertion, index);
+ gtDispTree(tree, nullptr, nullptr, true);
+ }
+#endif
+
+ // Update and morph the tree.
+ return optAssertionProp_Update(tree, tree, stmt);
+}
+
+/*****************************************************************************
+ *
+ * Given a tree consisting of a just a LclVar and a set of available assertions
+ * we try to propagate an assertion and modify the LclVar tree if we can.
+ * We pass in the root of the tree via 'stmt', for local copy prop 'stmt' will
+ * be nullptr. Returns the modified tree, or nullptr if no assertion prop took place.
+ */
+
+GenTreePtr Compiler::optAssertionProp_LclVar(ASSERT_VALARG_TP assertions, const GenTreePtr tree, const GenTreePtr stmt)
+{
+ assert(tree->gtOper == GT_LCL_VAR);
+ // If we have a var definition then bail or
+ // If this is the address of the var then it will have the GTF_DONT_CSE
+ // flag set and we don't want to to assertion prop on it.
+ if (tree->gtFlags & (GTF_VAR_DEF | GTF_DONT_CSE))
+ {
+ return nullptr;
+ }
+
+ BitVecOps::Iter iter(apTraits, assertions);
+ unsigned index = 0;
+ while (iter.NextElem(apTraits, &index))
+ {
+ index++;
+ if (index > optAssertionCount)
+ {
+ break;
+ }
+ // See if the variable is equal to a constant or another variable.
+ AssertionDsc* curAssertion = optGetAssertion((AssertionIndex)index);
+ if (curAssertion->assertionKind != OAK_EQUAL || curAssertion->op1.kind != O1K_LCLVAR)
+ {
+ continue;
+ }
+
+ // Copy prop.
+ if (curAssertion->op2.kind == O2K_LCLVAR_COPY)
+ {
+ // Cannot do copy prop during global assertion prop because of no knowledge
+ // of kill sets. We will still make a == b copy assertions during the global phase to allow
+ // for any implied assertions that can be retrieved. Because implied assertions look for
+ // matching SSA numbers (i.e., if a0 == b1 and b1 == c0 then a0 == c0) they don't need kill sets.
+ if (optLocalAssertionProp)
+ {
+ // Perform copy assertion prop.
+ GenTreePtr newTree = optCopyAssertionProp(curAssertion, tree, stmt DEBUGARG((AssertionIndex)index));
+ if (newTree == nullptr)
+ {
+ // Skip and try next assertion.
+ continue;
+ }
+ return newTree;
+ }
+ }
+ // Constant prop (for local assertion prop.)
+ // The case where the tree type could be different than the LclVar type is caused by
+ // gtFoldExpr, specifically the case of a cast, where the fold operation changes the type of the LclVar
+ // node. In such a case is not safe to perform the substitution since later on the JIT will assert mismatching
+ // types between trees.
+ else if (curAssertion->op1.lcl.lclNum == tree->gtLclVarCommon.GetLclNum() &&
+ tree->gtType == lvaTable[tree->gtLclVarCommon.GetLclNum()].lvType)
+ {
+ // If local assertion prop just, perform constant prop.
+ if (optLocalAssertionProp)
+ {
+ return optConstantAssertionProp(curAssertion, tree, stmt DEBUGARG((AssertionIndex)index));
+ }
+ // If global assertion, perform constant propagation only if the VN's match and the lcl is non-CSE.
+ else if (curAssertion->op1.vn == tree->gtVNPair.GetConservative())
+ {
+#if FEATURE_ANYCSE
+ // Don't perform constant prop for CSE LclVars
+ if (!lclNumIsCSE(tree->AsLclVarCommon()->GetLclNum()))
+#endif
+ {
+ return optConstantAssertionProp(curAssertion, tree, stmt DEBUGARG((AssertionIndex)index));
+ }
+ }
+ }
+ }
+ return nullptr;
+}
+
+/*****************************************************************************
+ *
+ * Given a set of "assertions" to search, find an assertion that matches
+ * op1Kind and lclNum, op2Kind and the constant value and is either equal or
+ * not equal assertion.
+ */
+Compiler::AssertionIndex Compiler::optLocalAssertionIsEqualOrNotEqual(
+ optOp1Kind op1Kind, unsigned lclNum, optOp2Kind op2Kind, ssize_t cnsVal, ASSERT_VALARG_TP assertions)
+{
+ noway_assert((op1Kind == O1K_LCLVAR) || (op1Kind == O1K_EXACT_TYPE) || (op1Kind == O1K_SUBTYPE));
+ noway_assert((op2Kind == O2K_CONST_INT) || (op2Kind == O2K_IND_CNS_INT));
+ if (!optLocalAssertionProp && BitVecOps::IsEmpty(apTraits, assertions))
+ {
+ return NO_ASSERTION_INDEX;
+ }
+
+ for (AssertionIndex index = 1; index <= optAssertionCount; ++index)
+ {
+ AssertionDsc* curAssertion = optGetAssertion(index);
+ if (optLocalAssertionProp || BitVecOps::IsMember(apTraits, assertions, index - 1))
+ {
+ if ((curAssertion->assertionKind != OAK_EQUAL) && (curAssertion->assertionKind != OAK_NOT_EQUAL))
+ {
+ continue;
+ }
+
+ if ((curAssertion->op1.kind == op1Kind) && (curAssertion->op1.lcl.lclNum == lclNum) &&
+ (curAssertion->op2.kind == op2Kind))
+ {
+ bool constantIsEqual = (curAssertion->op2.u1.iconVal == cnsVal);
+ bool assertionIsEqual = (curAssertion->assertionKind == OAK_EQUAL);
+
+ if (constantIsEqual || assertionIsEqual)
+ {
+ return index;
+ }
+ }
+ }
+ }
+ return NO_ASSERTION_INDEX;
+}
+
+/*****************************************************************************
+ *
+ * Given a set of "assertions" to search for, find an assertion that is either
+ * "op1" == "op2" or "op1" != "op2." Does a value number based comparison.
+ *
+ */
+Compiler::AssertionIndex Compiler::optGlobalAssertionIsEqualOrNotEqual(ASSERT_VALARG_TP assertions,
+ GenTreePtr op1,
+ GenTreePtr op2)
+{
+ if (BitVecOps::IsEmpty(apTraits, assertions))
+ {
+ return NO_ASSERTION_INDEX;
+ }
+ BitVecOps::Iter iter(apTraits, assertions);
+ unsigned index = 0;
+ while (iter.NextElem(apTraits, &index))
+ {
+ index++;
+ if (index > optAssertionCount)
+ {
+ break;
+ }
+ AssertionDsc* curAssertion = optGetAssertion((AssertionIndex)index);
+ if ((curAssertion->assertionKind != OAK_EQUAL && curAssertion->assertionKind != OAK_NOT_EQUAL))
+ {
+ continue;
+ }
+
+ if (curAssertion->op1.vn == op1->gtVNPair.GetConservative() &&
+ curAssertion->op2.vn == op2->gtVNPair.GetConservative())
+ {
+ return (AssertionIndex)index;
+ }
+ }
+ return NO_ASSERTION_INDEX;
+}
+
+/*****************************************************************************
+ *
+ * Given a tree consisting of a RelOp and a set of available assertions
+ * we try to propagate an assertion and modify the RelOp tree if we can.
+ * We pass in the root of the tree via 'stmt', for local copy prop 'stmt' will be nullptr
+ * Returns the modified tree, or nullptr if no assertion prop took place
+ */
+
+GenTreePtr Compiler::optAssertionProp_RelOp(ASSERT_VALARG_TP assertions, const GenTreePtr tree, const GenTreePtr stmt)
+{
+ assert(tree->OperKind() & GTK_RELOP);
+
+ //
+ // Currently only GT_EQ or GT_NE are supported Relops for AssertionProp
+ //
+ if ((tree->gtOper != GT_EQ) && (tree->gtOper != GT_NE))
+ {
+ return nullptr;
+ }
+
+ if (!optLocalAssertionProp)
+ {
+ // If global assertion prop then use value numbering.
+ return optAssertionPropGlobal_RelOp(assertions, tree, stmt);
+ }
+ else
+ {
+ // If local assertion prop then use variable based prop.
+ return optAssertionPropLocal_RelOp(assertions, tree, stmt);
+ }
+}
+
+/*************************************************************************************
+ *
+ * Given the set of "assertions" to look up a relop assertion about the relop "tree",
+ * perform Value numbering based relop assertion propagation on the tree.
+ *
+ */
+GenTreePtr Compiler::optAssertionPropGlobal_RelOp(ASSERT_VALARG_TP assertions,
+ const GenTreePtr tree,
+ const GenTreePtr stmt)
+{
+ assert(tree->OperGet() == GT_EQ || tree->OperGet() == GT_NE);
+
+ GenTreePtr newTree = tree;
+ GenTreePtr op1 = tree->gtOp.gtOp1;
+ GenTreePtr op2 = tree->gtOp.gtOp2;
+
+ if (op1->gtOper != GT_LCL_VAR)
+ {
+ return nullptr;
+ }
+
+ // Find an equal or not equal assertion involving "op1" and "op2".
+ AssertionIndex index = optGlobalAssertionIsEqualOrNotEqual(assertions, op1, op2);
+ if (index == NO_ASSERTION_INDEX)
+ {
+ return nullptr;
+ }
+
+ AssertionDsc* curAssertion = optGetAssertion(index);
+
+ // Allow or not to reverse condition for OAK_NOT_EQUAL assertions.
+ bool allowReverse = true;
+
+ // If the assertion involves "op2" and it is a constant, then check if "op1" also has a constant value.
+ if (vnStore->IsVNConstant(op2->gtVNPair.GetConservative()))
+ {
+ ValueNum vnCns = op2->gtVNPair.GetConservative();
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nVN relop based constant assertion prop in BB%02u:\n", compCurBB->bbNum);
+ printf("Assertion index=#%02u: ", index);
+ printTreeID(op1);
+ printf(" %s ", (curAssertion->assertionKind == OAK_EQUAL) ? "==" : "!=");
+ if (genActualType(op1->TypeGet()) == TYP_INT)
+ {
+ printf("%d\n", vnStore->ConstantValue<int>(vnCns));
+ }
+ else if (op1->TypeGet() == TYP_LONG)
+ {
+ printf("%I64d\n", vnStore->ConstantValue<INT64>(vnCns));
+ }
+ else if (op1->TypeGet() == TYP_DOUBLE)
+ {
+ printf("%f\n", vnStore->ConstantValue<double>(vnCns));
+ }
+ else if (op1->TypeGet() == TYP_FLOAT)
+ {
+ printf("%f\n", vnStore->ConstantValue<float>(vnCns));
+ }
+ else if (op1->TypeGet() == TYP_REF)
+ {
+ // The only constant of TYP_REF that ValueNumbering supports is 'null'
+ assert(vnStore->ConstantValue<size_t>(vnCns) == 0);
+ printf("null\n");
+ }
+ else
+ {
+ printf("??unknown\n");
+ }
+ gtDispTree(tree, nullptr, nullptr, true);
+ }
+#endif
+ // Decrement the ref counts, before we change the oper.
+ lvaTable[op1->gtLclVar.gtLclNum].decRefCnts(compCurBB->getBBWeight(this), this);
+
+ // Change the oper to const.
+ if (genActualType(op1->TypeGet()) == TYP_INT)
+ {
+ op1->ChangeOperConst(GT_CNS_INT);
+ op1->gtIntCon.gtIconVal = vnStore->ConstantValue<int>(vnCns);
+ }
+ else if (op1->TypeGet() == TYP_LONG)
+ {
+ op1->ChangeOperConst(GT_CNS_NATIVELONG);
+ op1->gtIntConCommon.SetLngValue(vnStore->ConstantValue<INT64>(vnCns));
+ }
+ else if (op1->TypeGet() == TYP_DOUBLE)
+ {
+ double constant = vnStore->ConstantValue<double>(vnCns);
+ op1->ChangeOperConst(GT_CNS_DBL);
+ op1->gtDblCon.gtDconVal = constant;
+
+ // Nothing can be equal to NaN. So if IL had "op1 == NaN", then we already made op1 NaN,
+ // which will yield a false correctly. Instead if IL had "op1 != NaN", then we already
+ // made op1 NaN which will yield a true correctly. Note that this is irrespective of the
+ // assertion we have made.
+ allowReverse = (_isnan(constant) == 0);
+ }
+ else if (op1->TypeGet() == TYP_FLOAT)
+ {
+ float constant = vnStore->ConstantValue<float>(vnCns);
+ op1->ChangeOperConst(GT_CNS_DBL);
+ op1->gtDblCon.gtDconVal = constant;
+ // See comments for TYP_DOUBLE.
+ allowReverse = (_isnan(constant) == 0);
+ }
+ else if (op1->TypeGet() == TYP_REF)
+ {
+ op1->ChangeOperConst(GT_CNS_INT);
+ // The only constant of TYP_REF that ValueNumbering supports is 'null'
+ noway_assert(vnStore->ConstantValue<size_t>(vnCns) == 0);
+ op1->gtIntCon.gtIconVal = 0;
+ }
+ else
+ {
+ noway_assert(!"unknown type in Global_RelOp");
+ }
+
+ op1->gtVNPair.SetBoth(vnCns); // Preserve the ValueNumPair, as ChangeOperConst/SetOper will clear it.
+ }
+ // If the assertion involves "op2" and "op1" is also a local var, then just morph the tree.
+ else if (op2->gtOper == GT_LCL_VAR)
+ {
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nVN relop based copy assertion prop in BB%02u:\n", compCurBB->bbNum);
+ printf("Assertion index=#%02u: V%02d.%02d %s V%02d.%02d\n", index, op1->gtLclVar.gtLclNum,
+ op1->gtLclVar.gtSsaNum, (curAssertion->assertionKind == OAK_EQUAL) ? "==" : "!=",
+ op2->gtLclVar.gtLclNum, op2->gtLclVar.gtSsaNum);
+ gtDispTree(tree, nullptr, nullptr, true);
+ }
+#endif
+ lvaTable[op1->gtLclVar.gtLclNum].decRefCnts(compCurBB->getBBWeight(this), this);
+
+ // If floating point, don't just substitute op1 with op2, this won't work if
+ // op2 is NaN. Just turn it into a "true" or "false" yielding expression.
+ if (op1->TypeGet() == TYP_DOUBLE || op1->TypeGet() == TYP_FLOAT)
+ {
+ // Note we can't trust the OAK_EQUAL as the value could end up being a NaN
+ // violating the assertion. However, we create OAK_EQUAL assertions for floating
+ // point only on JTrue nodes, so if the condition held earlier, it will hold
+ // now. We don't create OAK_EQUAL assertion on floating point from GT_ASG
+ // because we depend on value num which would constant prop the NaN.
+ lvaTable[op2->gtLclVar.gtLclNum].decRefCnts(compCurBB->getBBWeight(this), this);
+ op1->ChangeOperConst(GT_CNS_DBL);
+ op1->gtDblCon.gtDconVal = 0;
+ op2->ChangeOperConst(GT_CNS_DBL);
+ op2->gtDblCon.gtDconVal = 0;
+ }
+ // Change the op1 LclVar to the op2 LclVar
+ else
+ {
+ noway_assert(varTypeIsIntegralOrI(op1->TypeGet()));
+ lvaTable[op2->gtLclVar.gtLclNum].incRefCnts(compCurBB->getBBWeight(this), this);
+ op1->AsLclVarCommon()->SetLclNum(op2->AsLclVarCommon()->GetLclNum());
+ op1->AsLclVarCommon()->SetSsaNum(op2->AsLclVarCommon()->GetSsaNum());
+ }
+ }
+ else
+ {
+ return nullptr;
+ }
+
+ // Finally reverse the condition, if we have a not equal assertion.
+ if (allowReverse && curAssertion->assertionKind == OAK_NOT_EQUAL)
+ {
+ gtReverseCond(tree);
+ }
+
+ newTree = fgMorphTree(tree);
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ gtDispTree(newTree, nullptr, nullptr, true);
+ }
+#endif
+
+ return optAssertionProp_Update(newTree, tree, stmt);
+}
+
+/*************************************************************************************
+ *
+ * Given the set of "assertions" to look up a relop assertion about the relop "tree",
+ * perform local variable name based relop assertion propagation on the tree.
+ *
+ */
+GenTreePtr Compiler::optAssertionPropLocal_RelOp(ASSERT_VALARG_TP assertions,
+ const GenTreePtr tree,
+ const GenTreePtr stmt)
+{
+ assert(tree->OperGet() == GT_EQ || tree->OperGet() == GT_NE);
+
+ GenTreePtr op1 = tree->gtOp.gtOp1;
+ GenTreePtr op2 = tree->gtOp.gtOp2;
+
+ // For Local AssertionProp we only can fold when op1 is a GT_LCL_VAR
+ if (op1->gtOper != GT_LCL_VAR)
+ {
+ return nullptr;
+ }
+
+ // For Local AssertionProp we only can fold when op2 is a GT_CNS_INT
+ if (op2->gtOper != GT_CNS_INT)
+ {
+ return nullptr;
+ }
+
+ optOp1Kind op1Kind = O1K_LCLVAR;
+ optOp2Kind op2Kind = O2K_CONST_INT;
+ ssize_t cnsVal = op2->gtIntCon.gtIconVal;
+ var_types cmpType = op1->TypeGet();
+
+ // Don't try to fold/optimize Floating Compares; there are multiple zero values.
+ if (varTypeIsFloating(cmpType))
+ {
+ return nullptr;
+ }
+
+ // Find an equal or not equal assertion about op1 var.
+ unsigned lclNum = op1->gtLclVarCommon.gtLclNum;
+ noway_assert(lclNum < lvaCount);
+ AssertionIndex index = optLocalAssertionIsEqualOrNotEqual(op1Kind, lclNum, op2Kind, cnsVal, assertions);
+
+ if (index == NO_ASSERTION_INDEX)
+ {
+ return nullptr;
+ }
+
+ AssertionDsc* curAssertion = optGetAssertion(index);
+
+ bool assertionKindIsEqual = (curAssertion->assertionKind == OAK_EQUAL);
+ bool constantIsEqual = false;
+
+ if (genTypeSize(cmpType) == TARGET_POINTER_SIZE)
+ {
+ constantIsEqual = (curAssertion->op2.u1.iconVal == cnsVal);
+ }
+#ifdef _TARGET_64BIT_
+ else if (genTypeSize(cmpType) == sizeof(INT32))
+ {
+ // Compare the low 32-bits only
+ constantIsEqual = (((INT32)curAssertion->op2.u1.iconVal) == ((INT32)cnsVal));
+ }
+#endif
+ else
+ {
+ // We currently don't fold/optimze when the GT_LCL_VAR has been cast to a small type
+ return nullptr;
+ }
+
+ noway_assert(constantIsEqual || assertionKindIsEqual);
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nAssertion prop for index #%02u in BB%02u:\n", index, compCurBB->bbNum);
+ gtDispTree(tree, nullptr, nullptr, true);
+ }
+#endif
+
+ // Return either CNS_INT 0 or CNS_INT 1.
+ bool foldResult = (constantIsEqual == assertionKindIsEqual);
+ if (tree->gtOper == GT_NE)
+ {
+ foldResult = !foldResult;
+ }
+
+ op2->gtIntCon.gtIconVal = foldResult;
+ op2->gtType = TYP_INT;
+
+ return optAssertionProp_Update(op2, tree, stmt);
+}
+
+/*****************************************************************************
+ *
+ * Given a tree consisting of a Cast and a set of available assertions
+ * we try to propagate an assertion and modify the Cast tree if we can.
+ * We pass in the root of the tree via 'stmt', for local copy prop 'stmt'
+ * will be nullptr.
+ *
+ * Returns the modified tree, or nullptr if no assertion prop took place.
+ */
+GenTreePtr Compiler::optAssertionProp_Cast(ASSERT_VALARG_TP assertions, const GenTreePtr tree, const GenTreePtr stmt)
+{
+ assert(tree->gtOper == GT_CAST);
+
+ var_types toType = tree->gtCast.gtCastType;
+ GenTreePtr op1 = tree->gtCast.CastOp();
+
+ // If we have a cast involving floating point types, then bail.
+ if (varTypeIsFloating(toType) || varTypeIsFloating(op1->TypeGet()))
+ {
+ return nullptr;
+ }
+
+ // Skip over a GT_COMMA node(s), if necessary to get to the lcl.
+ GenTreePtr lcl = op1;
+ while (lcl->gtOper == GT_COMMA)
+ {
+ lcl = lcl->gtOp.gtOp2;
+ }
+
+ // If we don't have a cast of a LCL_VAR then bail.
+ if (lcl->gtOper != GT_LCL_VAR)
+ {
+ return nullptr;
+ }
+
+ unsigned index = optAssertionIsSubrange(lcl, toType, assertions);
+ if (index != NO_ASSERTION_INDEX)
+ {
+ LclVarDsc* varDsc = &lvaTable[lcl->gtLclVarCommon.gtLclNum];
+ if (varDsc->lvNormalizeOnLoad() || varTypeIsLong(varDsc->TypeGet()))
+ {
+ // For normalize on load variables it must be a narrowing cast to remove
+ if (genTypeSize(toType) > genTypeSize(varDsc->TypeGet()))
+ {
+ // Can we just remove the GTF_OVERFLOW flag?
+ if ((tree->gtFlags & GTF_OVERFLOW) == 0)
+ {
+ return nullptr;
+ }
+ else
+ {
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nSubrange prop for index #%02u in BB%02u:\n", index, compCurBB->bbNum);
+ gtDispTree(tree, nullptr, nullptr, true);
+ }
+#endif
+ tree->gtFlags &= ~GTF_OVERFLOW; // This cast cannot overflow
+ return optAssertionProp_Update(tree, tree, stmt);
+ }
+ }
+
+ // GT_CAST long -> uint -> int
+ // |
+ // GT_LCL_VAR long
+ //
+ // Where the lclvar is known to be in the range of [0..MAX_UINT]
+ //
+ // A load of a 32-bit unsigned int is the same as a load of a 32-bit signed int
+ //
+ if (toType == TYP_UINT)
+ {
+ toType = TYP_INT;
+ }
+
+ // Change the "lcl" type to match what the cast wanted, by propagating the type
+ // change down the comma nodes leading to the "lcl", if we skipped them earlier.
+ GenTreePtr tmp = op1;
+ while (tmp->gtOper == GT_COMMA)
+ {
+ tmp->gtType = toType;
+ tmp = tmp->gtOp.gtOp2;
+ }
+ noway_assert(tmp == lcl);
+ tmp->gtType = toType;
+ }
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nSubrange prop for index #%02u in BB%02u:\n", index, compCurBB->bbNum);
+ gtDispTree(tree, nullptr, nullptr, true);
+ }
+#endif
+ return optAssertionProp_Update(op1, tree, stmt);
+ }
+ return nullptr;
+}
+
+/*****************************************************************************
+ *
+ * Given a tree with an array bounds check node, eliminate it because it was
+ * checked already in the program.
+ */
+GenTreePtr Compiler::optAssertionProp_Comma(ASSERT_VALARG_TP assertions, const GenTreePtr tree, const GenTreePtr stmt)
+{
+ // Remove the bounds check as part of the GT_COMMA node since we need parent pointer to remove nodes.
+ // When processing visits the bounds check, it sets the throw kind to None if the check is redundant.
+ if ((tree->gtGetOp1()->OperGet() == GT_ARR_BOUNDS_CHECK) &&
+ ((tree->gtGetOp1()->gtFlags & GTF_ARR_BOUND_INBND) != 0))
+ {
+ optRemoveRangeCheck(tree, stmt, true, GTF_ASG, true /* force remove */);
+ return optAssertionProp_Update(tree, tree, stmt);
+ }
+ return nullptr;
+}
+
+/*****************************************************************************
+ *
+ * Given a tree consisting of a Ind and a set of available assertions, we try
+ * to propagate an assertion and modify the Ind tree if we can. We pass in the
+ * root of the tree via 'stmt', for local copy prop 'stmt' will be nullptr.
+ *
+ * Returns the modified tree, or nullptr if no assertion prop took place.
+ *
+ */
+
+GenTreePtr Compiler::optAssertionProp_Ind(ASSERT_VALARG_TP assertions, const GenTreePtr tree, const GenTreePtr stmt)
+{
+ assert(tree->OperIsIndir());
+
+ // TODO-1stClassStructs: All indirections should be handled here, but
+ // previously, when these indirections were GT_OBJ, or implicit due to a block
+ // copy or init, they were not being handled.
+ if (tree->TypeGet() == TYP_STRUCT)
+ {
+ if (tree->OperIsBlk())
+ {
+ return nullptr;
+ }
+ else
+ {
+ GenTree* parent = tree->gtGetParent(nullptr);
+ if ((parent != nullptr) && parent->OperIsBlkOp())
+ {
+ return nullptr;
+ }
+ }
+ }
+
+ if (!(tree->gtFlags & GTF_EXCEPT))
+ {
+ return nullptr;
+ }
+
+ // Check for add of a constant.
+ GenTreePtr op1 = tree->gtOp.gtOp1;
+ if ((op1->gtOper == GT_ADD) && (op1->gtOp.gtOp2->gtOper == GT_CNS_INT))
+ {
+ op1 = op1->gtOp.gtOp1;
+ }
+
+ if (op1->gtOper != GT_LCL_VAR)
+ {
+ return nullptr;
+ }
+
+ unsigned lclNum = op1->gtLclVarCommon.gtLclNum;
+
+#ifdef DEBUG
+ bool vnBased = false;
+ AssertionIndex index = NO_ASSERTION_INDEX;
+#endif
+ if (optAssertionIsNonNull(op1, assertions DEBUGARG(&vnBased) DEBUGARG(&index)))
+ {
+#ifdef DEBUG
+ if (verbose)
+ {
+ (vnBased) ? printf("\nVN based non-null prop in BB%02u:\n", compCurBB->bbNum)
+ : printf("\nNon-null prop for index #%02u in BB%02u:\n", index, compCurBB->bbNum);
+ gtDispTree(tree, nullptr, nullptr, true);
+ }
+#endif
+ tree->gtFlags &= ~GTF_EXCEPT;
+
+ // Set this flag to prevent reordering
+ tree->gtFlags |= GTF_ORDER_SIDEEFF;
+
+ return optAssertionProp_Update(tree, tree, stmt);
+ }
+
+ return nullptr;
+}
+
+/*****************************************************************************
+ * Check if a non-null assertion can be made about the input operand "op"
+ * from the set of "assertions," or implicitly from the value number on "op."
+ *
+ * Sets "pVnBased" if the assertion is value number based. If no matching
+ * assertions are found from the table, then returns "NO_ASSERTION_INDEX."
+ *
+ * Note: If both VN and assertion table yield a matching assertion, "pVnBased"
+ * is only set and the return value is "NO_ASSERTION_INDEX."
+ */
+bool Compiler::optAssertionIsNonNull(GenTreePtr op,
+ ASSERT_VALARG_TP assertions DEBUGARG(bool* pVnBased)
+ DEBUGARG(AssertionIndex* pIndex))
+{
+ bool vnBased = (!optLocalAssertionProp && vnStore->IsKnownNonNull(op->gtVNPair.GetConservative()));
+#ifdef DEBUG
+ *pVnBased = vnBased;
+#endif
+
+ if (vnBased)
+ {
+#ifdef DEBUG
+ *pIndex = NO_ASSERTION_INDEX;
+#endif
+ return true;
+ }
+
+ AssertionIndex index = optAssertionIsNonNullInternal(op, assertions);
+#ifdef DEBUG
+ *pIndex = index;
+#endif
+ return index != NO_ASSERTION_INDEX;
+}
+
+/*****************************************************************************
+ * Check if a non-null assertion can be made about the input operand "op"
+ * from the set of "assertions."
+ *
+ */
+Compiler::AssertionIndex Compiler::optAssertionIsNonNullInternal(GenTreePtr op, ASSERT_VALARG_TP assertions)
+{
+ // If local assertion prop use lcl comparison, else use VN comparison.
+ if (!optLocalAssertionProp)
+ {
+ ValueNum vn = op->gtVNPair.GetConservative();
+
+ if (BitVecOps::IsEmpty(apTraits, assertions))
+ {
+ return NO_ASSERTION_INDEX;
+ }
+
+ // Check each assertion to find if we have a vn == or != null assertion.
+ BitVecOps::Iter iter(apTraits, assertions);
+ unsigned index = 0;
+ while (iter.NextElem(apTraits, &index))
+ {
+ index++;
+ if (index > optAssertionCount)
+ {
+ break;
+ }
+ AssertionDsc* curAssertion = optGetAssertion((AssertionIndex)index);
+ if (curAssertion->assertionKind != OAK_NOT_EQUAL)
+ {
+ continue;
+ }
+ if (curAssertion->op1.vn != vn || curAssertion->op2.vn != ValueNumStore::VNForNull())
+ {
+ continue;
+ }
+ return (AssertionIndex)index;
+ }
+ }
+ else
+ {
+ unsigned lclNum = op->AsLclVarCommon()->GetLclNum();
+ // Check each assertion to find if we have a variable == or != null assertion.
+ for (AssertionIndex index = 1; index <= optAssertionCount; index++)
+ {
+ AssertionDsc* curAssertion = optGetAssertion(index);
+ if ((curAssertion->assertionKind == OAK_NOT_EQUAL) && // kind
+ (curAssertion->op1.kind == O1K_LCLVAR) && // op1
+ (curAssertion->op2.kind == O2K_CONST_INT) && // op2
+ (curAssertion->op1.lcl.lclNum == lclNum) && (curAssertion->op2.u1.iconVal == 0))
+ {
+ return index;
+ }
+ }
+ }
+ return NO_ASSERTION_INDEX;
+}
+/*****************************************************************************
+ *
+ * Given a tree consisting of a call and a set of available assertions, we
+ * try to propagate a non-null assertion and modify the Call tree if we can.
+ * Returns the modified tree, or nullptr if no assertion prop took place.
+ *
+ */
+GenTreePtr Compiler::optNonNullAssertionProp_Call(ASSERT_VALARG_TP assertions,
+ const GenTreePtr tree,
+ const GenTreePtr stmt)
+{
+ assert(tree->gtOper == GT_CALL);
+ if ((tree->gtFlags & GTF_CALL_NULLCHECK) == 0)
+ {
+ return nullptr;
+ }
+ GenTreePtr op1 = gtGetThisArg(tree);
+ noway_assert(op1 != nullptr);
+ if (op1->gtOper != GT_LCL_VAR)
+ {
+ return nullptr;
+ }
+
+#ifdef DEBUG
+ bool vnBased = false;
+ AssertionIndex index = NO_ASSERTION_INDEX;
+#endif
+ if (optAssertionIsNonNull(op1, assertions DEBUGARG(&vnBased) DEBUGARG(&index)))
+ {
+#ifdef DEBUG
+ if (verbose)
+ {
+ (vnBased) ? printf("\nVN based non-null prop in BB%02u:\n", compCurBB->bbNum)
+ : printf("\nNon-null prop for index #%02u in BB%02u:\n", index, compCurBB->bbNum);
+ gtDispTree(tree, nullptr, nullptr, true);
+ }
+#endif
+ tree->gtFlags &= ~GTF_CALL_NULLCHECK;
+ tree->gtFlags &= ~GTF_EXCEPT;
+ noway_assert(tree->gtFlags & GTF_SIDE_EFFECT);
+ return tree;
+ }
+ return nullptr;
+}
+
+/*****************************************************************************
+ *
+ * Given a tree consisting of a call and a set of available assertions, we
+ * try to propagate an assertion and modify the Call tree if we can. Our
+ * current modifications are limited to removing the nullptrCHECK flag from
+ * the call.
+ * We pass in the root of the tree via 'stmt', for local copy prop 'stmt'
+ * will be nullptr. Returns the modified tree, or nullptr if no assertion prop
+ * took place.
+ *
+ */
+
+GenTreePtr Compiler::optAssertionProp_Call(ASSERT_VALARG_TP assertions, const GenTreePtr tree, const GenTreePtr stmt)
+{
+ assert(tree->gtOper == GT_CALL);
+
+ if (optNonNullAssertionProp_Call(assertions, tree, stmt))
+ {
+ return optAssertionProp_Update(tree, tree, stmt);
+ }
+ else if (!optLocalAssertionProp && (tree->gtCall.gtCallType == CT_HELPER))
+ {
+ if (tree->gtCall.gtCallMethHnd == eeFindHelper(CORINFO_HELP_ISINSTANCEOFINTERFACE) ||
+ tree->gtCall.gtCallMethHnd == eeFindHelper(CORINFO_HELP_ISINSTANCEOFARRAY) ||
+ tree->gtCall.gtCallMethHnd == eeFindHelper(CORINFO_HELP_ISINSTANCEOFCLASS) ||
+ tree->gtCall.gtCallMethHnd == eeFindHelper(CORINFO_HELP_ISINSTANCEOFANY) ||
+ tree->gtCall.gtCallMethHnd == eeFindHelper(CORINFO_HELP_CHKCASTINTERFACE) ||
+ tree->gtCall.gtCallMethHnd == eeFindHelper(CORINFO_HELP_CHKCASTARRAY) ||
+ tree->gtCall.gtCallMethHnd == eeFindHelper(CORINFO_HELP_CHKCASTCLASS) ||
+ tree->gtCall.gtCallMethHnd == eeFindHelper(CORINFO_HELP_CHKCASTANY) ||
+ tree->gtCall.gtCallMethHnd == eeFindHelper(CORINFO_HELP_CHKCASTCLASS_SPECIAL))
+ {
+ GenTreePtr arg1 = gtArgEntryByArgNum(tree->AsCall(), 1)->node;
+ if (arg1->gtOper != GT_LCL_VAR)
+ {
+ return nullptr;
+ }
+
+ GenTreePtr arg2 = gtArgEntryByArgNum(tree->AsCall(), 0)->node;
+
+ unsigned index = optAssertionIsSubtype(arg1, arg2, assertions);
+ if (index != NO_ASSERTION_INDEX)
+ {
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nDid VN based subtype prop for index #%02u in BB%02u:\n", index, compCurBB->bbNum);
+ gtDispTree(tree, nullptr, nullptr, true);
+ }
+#endif
+ GenTreePtr list = nullptr;
+ gtExtractSideEffList(tree, &list, GTF_SIDE_EFFECT, true);
+ if (list != nullptr)
+ {
+ arg1 = gtNewOperNode(GT_COMMA, tree->TypeGet(), list, arg1);
+ fgSetTreeSeq(arg1);
+ }
+
+ return optAssertionProp_Update(arg1, tree, stmt);
+ }
+ }
+ }
+
+ return nullptr;
+}
+
+/*****************************************************************************
+ *
+ * Given a tree consisting of a comma node with a bounds check, remove any
+ * redundant bounds check that has already been checked in the program flow.
+ */
+GenTreePtr Compiler::optAssertionProp_BndsChk(ASSERT_VALARG_TP assertions, const GenTreePtr tree, const GenTreePtr stmt)
+{
+ if (optLocalAssertionProp)
+ {
+ return nullptr;
+ }
+
+ assert(tree->gtOper == GT_ARR_BOUNDS_CHECK);
+
+ BitVecOps::Iter iter(apTraits, assertions);
+ unsigned index = 0;
+ while (iter.NextElem(apTraits, &index))
+ {
+ index++;
+ if (index > optAssertionCount)
+ {
+ break;
+ }
+ // If it is not a nothrow assertion, skip.
+ AssertionDsc* curAssertion = optGetAssertion((AssertionIndex)index);
+ if (!curAssertion->IsBoundsCheckNoThrow())
+ {
+ continue;
+ }
+
+ GenTreeBoundsChk* arrBndsChk = tree->AsBoundsChk();
+
+ // Set 'isRedundant' to true if we can determine that 'arrBndsChk' can be
+ // classified as a redundant bounds check using 'curAssertion'
+ bool isRedundant = false;
+#ifdef DEBUG
+ const char* dbgMsg = "Not Set";
+#endif
+
+ // Do we have a previous range check involving the same 'vnLen' upper bound?
+ if (curAssertion->op1.bnd.vnLen == arrBndsChk->gtArrLen->gtVNPair.GetConservative())
+ {
+ ValueNum vnCurIdx = arrBndsChk->gtIndex->gtVNPair.GetConservative();
+
+ // Do we have the exact same lower bound 'vnIdx'?
+ // a[i] followed by a[i]
+ if (curAssertion->op1.bnd.vnIdx == vnCurIdx)
+ {
+ isRedundant = true;
+#ifdef DEBUG
+ dbgMsg = "a[i] followed by a[i]";
+#endif
+ }
+ // Are we using zero as the index?
+ // It can always be considered as redundant with any previous value
+ // a[*] followed by a[0]
+ else if (vnCurIdx == vnStore->VNZeroForType(arrBndsChk->gtIndex->TypeGet()))
+ {
+ isRedundant = true;
+#ifdef DEBUG
+ dbgMsg = "a[*] followed by a[0]";
+#endif
+ }
+ // Do we have two constant indexes?
+ else if (vnStore->IsVNConstant(curAssertion->op1.bnd.vnIdx) && vnStore->IsVNConstant(vnCurIdx))
+ {
+ // Make sure the types match.
+ var_types type1 = vnStore->TypeOfVN(curAssertion->op1.bnd.vnIdx);
+ var_types type2 = vnStore->TypeOfVN(vnCurIdx);
+
+ if (type1 == type2 && type1 == TYP_INT)
+ {
+ int index1 = vnStore->ConstantValue<int>(curAssertion->op1.bnd.vnIdx);
+ int index2 = vnStore->ConstantValue<int>(vnCurIdx);
+
+ // the case where index1 == index2 should have been handled above
+ assert(index1 != index2);
+
+ // It can always be considered as redundant with any previous higher constant value
+ // a[K1] followed by a[K2], with K2 >= 0 and K1 >= K2
+ if (index2 >= 0 && index1 >= index2)
+ {
+ isRedundant = true;
+#ifdef DEBUG
+ dbgMsg = "a[K1] followed by a[K2], with K2 >= 0 and K1 >= K2";
+#endif
+ }
+ }
+ }
+ // Extend this to remove additional redundant bounds checks:
+ // i.e. a[i+1] followed by a[i] by using the VN(i+1) >= VN(i)
+ // a[i] followed by a[j] when j is known to be >= i
+ // a[i] followed by a[5] when i is known to be >= 5
+ }
+
+ if (!isRedundant)
+ {
+ continue;
+ }
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nVN based redundant (%s) bounds check assertion prop for index #%02u in BB%02u:\n", dbgMsg, index,
+ compCurBB->bbNum);
+ gtDispTree(tree, nullptr, nullptr, true);
+ }
+#endif
+
+ // Defer actually removing the tree until processing reaches its parent comma, since
+ // optRemoveRangeCheck needs to rewrite the whole comma tree.
+ arrBndsChk->gtFlags |= GTF_ARR_BOUND_INBND;
+ return nullptr;
+ }
+ return nullptr;
+}
+
+/*****************************************************************************
+ *
+ * Called when we have a successfully performed an assertion prop. We have
+ * the newTree in hand. This method will replace the existing tree in the
+ * stmt with the newTree.
+ *
+ */
+
+GenTreePtr Compiler::optAssertionProp_Update(const GenTreePtr newTree, const GenTreePtr tree, const GenTreePtr stmt)
+{
+ noway_assert(newTree != nullptr);
+
+ if (stmt == nullptr)
+ {
+ noway_assert(optLocalAssertionProp);
+ }
+ else
+ {
+ noway_assert(!optLocalAssertionProp);
+
+ // If newTree == tree then we modified the tree in-place otherwise we have to
+ // locate our parent node and update it so that it points to newTree
+ if (newTree != tree)
+ {
+ GenTreePtr* link = gtFindLink(stmt, tree);
+#ifdef DEBUG
+ if (link == nullptr)
+ {
+ noway_assert(!"gtFindLink failed!");
+ printf("\nCould not find parent of:\n");
+ gtDispTree(tree);
+ printf("\nIn this stmt:\n");
+ gtDispTree(stmt);
+ }
+#endif
+ noway_assert(link != nullptr);
+ noway_assert(tree != nullptr);
+ if (link != nullptr)
+ {
+ // Replace the old operand with the newTree
+ *link = newTree;
+
+ // We only need to ensure that the gtNext field is set as it is used to traverse
+ // to the next node in the tree. We will re-morph this entire statement in
+ // optAssertionPropMain(). It will reset the gtPrev and gtNext links for all nodes.
+
+ newTree->gtNext = tree->gtNext;
+ }
+ }
+ }
+
+ // Record that we propagated the assertion.
+ optAssertionPropagated = true;
+ optAssertionPropagatedCurrentStmt = true;
+
+ return newTree;
+}
+
+/*****************************************************************************
+ *
+ * Given a tree and a set of available assertions we try to propagate an
+ * assertion and modify 'tree' if we can. We pass in the root of the tree
+ * via 'stmt', for local copy prop 'stmt' will be nullptr.
+ *
+ * Returns the modified tree, or nullptr if no assertion prop took place.
+ */
+
+GenTreePtr Compiler::optAssertionProp(ASSERT_VALARG_TP assertions, const GenTreePtr tree, const GenTreePtr stmt)
+{
+ switch (tree->gtOper)
+ {
+ case GT_LCL_VAR:
+ return optAssertionProp_LclVar(assertions, tree, stmt);
+
+ case GT_OBJ:
+ case GT_BLK:
+ case GT_DYN_BLK:
+ case GT_IND:
+ case GT_NULLCHECK:
+ return optAssertionProp_Ind(assertions, tree, stmt);
+
+ case GT_ARR_BOUNDS_CHECK:
+ return optAssertionProp_BndsChk(assertions, tree, stmt);
+
+ case GT_COMMA:
+ return optAssertionProp_Comma(assertions, tree, stmt);
+
+ case GT_CAST:
+ return optAssertionProp_Cast(assertions, tree, stmt);
+
+ case GT_CALL:
+ return optAssertionProp_Call(assertions, tree, stmt);
+
+ case GT_EQ:
+ case GT_NE:
+ case GT_LT:
+ case GT_LE:
+ case GT_GT:
+ case GT_GE:
+
+ return optAssertionProp_RelOp(assertions, tree, stmt);
+
+ default:
+ return nullptr;
+ }
+}
+
+//------------------------------------------------------------------------
+// optImpliedAssertions: Given a tree node that makes an assertion this
+// method computes the set of implied assertions
+// that are also true. The updated assertions are
+// maintained on the Compiler object.
+//
+// Arguments:
+// assertionIndex : The id of the assertion.
+// activeAssertions : The assertions that are already true at this point.
+
+void Compiler::optImpliedAssertions(AssertionIndex assertionIndex, ASSERT_TP& activeAssertions)
+{
+ noway_assert(!optLocalAssertionProp);
+ noway_assert(assertionIndex != 0);
+ noway_assert(assertionIndex <= optAssertionCount);
+
+ AssertionDsc* curAssertion = optGetAssertion(assertionIndex);
+ if (!BitVecOps::IsEmpty(apTraits, activeAssertions))
+ {
+ const ASSERT_TP mappedAssertions = optGetVnMappedAssertions(curAssertion->op1.vn);
+ if (mappedAssertions == nullptr)
+ {
+ return;
+ }
+
+ ASSERT_TP chkAssertions = BitVecOps::MakeCopy(apTraits, mappedAssertions);
+
+ if (curAssertion->op2.kind == O2K_LCLVAR_COPY)
+ {
+ const ASSERT_TP op2Assertions = optGetVnMappedAssertions(curAssertion->op2.vn);
+ if (op2Assertions != nullptr)
+ {
+ BitVecOps::UnionD(apTraits, chkAssertions, op2Assertions);
+ }
+ }
+ BitVecOps::IntersectionD(apTraits, chkAssertions, activeAssertions);
+
+ if (BitVecOps::IsEmpty(apTraits, chkAssertions))
+ {
+ return;
+ }
+
+ // Check each assertion in chkAssertions to see if it can be applied to curAssertion
+ BitVecOps::Iter chkIter(apTraits, chkAssertions);
+ unsigned chkIndex = 0;
+ while (chkIter.NextElem(apTraits, &chkIndex))
+ {
+ chkIndex++;
+ if (chkIndex > optAssertionCount)
+ {
+ break;
+ }
+ if (chkIndex == assertionIndex)
+ {
+ continue;
+ }
+
+ // Determine which one is a copy assertion and use the other to check for implied assertions.
+ AssertionDsc* iterAssertion = optGetAssertion((AssertionIndex)chkIndex);
+ if (curAssertion->IsCopyAssertion())
+ {
+ optImpliedByCopyAssertion(curAssertion, iterAssertion, activeAssertions);
+ }
+ else if (iterAssertion->IsCopyAssertion())
+ {
+ optImpliedByCopyAssertion(iterAssertion, curAssertion, activeAssertions);
+ }
+ }
+ }
+ // Is curAssertion a constant assignment of a 32-bit integer?
+ // (i.e GT_LVL_VAR X == GT_CNS_INT)
+ else if ((curAssertion->assertionKind == OAK_EQUAL) && (curAssertion->op1.kind == O1K_LCLVAR) &&
+ (curAssertion->op2.kind == O2K_CONST_INT))
+ {
+ optImpliedByConstAssertion(curAssertion, activeAssertions);
+ }
+}
+
+/*****************************************************************************
+ *
+ * Given a set of active assertions this method computes the set
+ * of non-Null implied assertions that are also true
+ */
+
+void Compiler::optImpliedByTypeOfAssertions(ASSERT_TP& activeAssertions)
+{
+ if (BitVecOps::IsEmpty(apTraits, activeAssertions))
+ {
+ return;
+ }
+
+ // Check each assertion in activeAssertions to see if it can be applied to constAssertion
+ BitVecOps::Iter chkIter(apTraits, activeAssertions);
+ unsigned chkIndex = 0;
+ while (chkIter.NextElem(apTraits, &chkIndex))
+ {
+ chkIndex++;
+ if (chkIndex > optAssertionCount)
+ {
+ break;
+ }
+ // chkAssertion must be Type/Subtype is equal assertion
+ AssertionDsc* chkAssertion = optGetAssertion((AssertionIndex)chkIndex);
+ if ((chkAssertion->op1.kind != O1K_SUBTYPE && chkAssertion->op1.kind != O1K_EXACT_TYPE) ||
+ (chkAssertion->assertionKind != OAK_EQUAL))
+ {
+ continue;
+ }
+
+ // Search the assertion table for a non-null assertion on op1 that matches chkAssertion
+ for (unsigned impIndex = 1; impIndex <= optAssertionCount; impIndex++)
+ {
+ AssertionDsc* impAssertion = optGetAssertion((AssertionIndex)impIndex);
+
+ // The impAssertion must be different from the chkAssertion
+ if (impIndex == chkIndex)
+ {
+ continue;
+ }
+
+ // impAssertion must be a Non Null assertion on lclNum
+ if ((impAssertion->assertionKind != OAK_NOT_EQUAL) ||
+ ((impAssertion->op1.kind != O1K_LCLVAR) && (impAssertion->op1.kind != O1K_VALUE_NUMBER)) ||
+ (impAssertion->op2.kind != O2K_CONST_INT) || (impAssertion->op1.vn != chkAssertion->op1.vn))
+ {
+ continue;
+ }
+
+ // The bit may already be in the result set
+ if (!BitVecOps::IsMember(apTraits, activeAssertions, impIndex - 1))
+ {
+ BitVecOps::AddElemD(apTraits, activeAssertions, impIndex - 1);
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nCompiler::optImpliedByTypeOfAssertions: %s Assertion #%02d, implies assertion #%02d",
+ (chkAssertion->op1.kind == O1K_SUBTYPE) ? "Subtype" : "Exact-type", chkIndex, impIndex);
+ }
+#endif
+ }
+
+ // There is at most one non-null assertion that is implied by the current chkIndex assertion
+ break;
+ }
+ }
+}
+
+//------------------------------------------------------------------------
+// optGetVnMappedAssertions: Given a value number, get the assertions
+// we have about the value number.
+//
+// Arguments:
+// vn - The given value number.
+//
+// Return Value:
+// The assertions we have about the value number.
+//
+
+ASSERT_VALRET_TP Compiler::optGetVnMappedAssertions(ValueNum vn)
+{
+ ASSERT_TP set = BitVecOps::UninitVal();
+ if (optValueNumToAsserts->Lookup(vn, &set))
+ {
+ return set;
+ }
+ return BitVecOps::UninitVal();
+}
+
+/*****************************************************************************
+ *
+ * Given a const assertion this method computes the set of implied assertions
+ * that are also true
+ */
+
+void Compiler::optImpliedByConstAssertion(AssertionDsc* constAssertion, ASSERT_TP& result)
+{
+ noway_assert(constAssertion->assertionKind == OAK_EQUAL);
+ noway_assert(constAssertion->op1.kind == O1K_LCLVAR);
+ noway_assert(constAssertion->op2.kind == O2K_CONST_INT);
+
+ ssize_t iconVal = constAssertion->op2.u1.iconVal;
+
+ const ASSERT_TP chkAssertions = optGetVnMappedAssertions(constAssertion->op1.vn);
+ if (chkAssertions == nullptr || BitVecOps::IsEmpty(apTraits, chkAssertions))
+ {
+ return;
+ }
+
+ // Check each assertion in chkAssertions to see if it can be applied to constAssertion
+ BitVecOps::Iter chkIter(apTraits, chkAssertions);
+ unsigned chkIndex = 0;
+ while (chkIter.NextElem(apTraits, &chkIndex))
+ {
+ chkIndex++;
+ if (chkIndex > optAssertionCount)
+ {
+ break;
+ }
+ // The impAssertion must be different from the const assertion.
+ AssertionDsc* impAssertion = optGetAssertion((AssertionIndex)chkIndex);
+ if (impAssertion == constAssertion)
+ {
+ continue;
+ }
+
+ // The impAssertion must be an assertion about the same local var.
+ if (impAssertion->op1.vn != constAssertion->op1.vn)
+ {
+ continue;
+ }
+
+ bool usable = false;
+ switch (impAssertion->op2.kind)
+ {
+ case O2K_SUBRANGE:
+ // Is the const assertion's constant, within implied assertion's bounds?
+ usable = ((iconVal >= impAssertion->op2.u2.loBound) && (iconVal <= impAssertion->op2.u2.hiBound));
+ break;
+
+ case O2K_CONST_INT:
+ // Is the const assertion's constant equal/not equal to the implied assertion?
+ usable = ((impAssertion->assertionKind == OAK_EQUAL) && (impAssertion->op2.u1.iconVal == iconVal)) ||
+ ((impAssertion->assertionKind == OAK_NOT_EQUAL) && (impAssertion->op2.u1.iconVal != iconVal));
+ break;
+
+ default:
+ // leave 'usable' = false;
+ break;
+ }
+
+ if (usable)
+ {
+ BitVecOps::AddElemD(apTraits, result, chkIndex - 1);
+#ifdef DEBUG
+ if (verbose)
+ {
+ AssertionDsc* firstAssertion = optGetAssertion(1);
+ printf("\nCompiler::optImpliedByConstAssertion: constAssertion #%02d , implies assertion #%02d",
+ (constAssertion - firstAssertion) + 1, (impAssertion - firstAssertion) + 1);
+ }
+#endif
+ }
+ }
+}
+
+/*****************************************************************************
+ *
+ * Given a copy assertion and a dependent assertion this method computes the
+ * set of implied assertions that are also true.
+ * For copy assertions, exact SSA num and LCL nums should match, because
+ * we don't have kill sets and we depend on their value num for dataflow.
+ */
+
+void Compiler::optImpliedByCopyAssertion(AssertionDsc* copyAssertion, AssertionDsc* depAssertion, ASSERT_TP& result)
+{
+ noway_assert(copyAssertion->IsCopyAssertion());
+
+ // Get the copyAssert's lcl/ssa nums.
+ unsigned copyAssertLclNum = BAD_VAR_NUM;
+ unsigned copyAssertSsaNum = SsaConfig::RESERVED_SSA_NUM;
+
+ // Check if copyAssertion's op1 or op2 matches the depAssertion's op1.
+ if (depAssertion->op1.lcl.lclNum == copyAssertion->op1.lcl.lclNum)
+ {
+ copyAssertLclNum = copyAssertion->op2.lcl.lclNum;
+ copyAssertSsaNum = copyAssertion->op2.lcl.ssaNum;
+ }
+ else if (depAssertion->op1.lcl.lclNum == copyAssertion->op2.lcl.lclNum)
+ {
+ copyAssertLclNum = copyAssertion->op1.lcl.lclNum;
+ copyAssertSsaNum = copyAssertion->op1.lcl.ssaNum;
+ }
+ // Check if copyAssertion's op1 or op2 matches the depAssertion's op2.
+ else if (depAssertion->op2.kind == O2K_LCLVAR_COPY)
+ {
+ if (depAssertion->op2.lcl.lclNum == copyAssertion->op1.lcl.lclNum)
+ {
+ copyAssertLclNum = copyAssertion->op2.lcl.lclNum;
+ copyAssertSsaNum = copyAssertion->op2.lcl.ssaNum;
+ }
+ else if (depAssertion->op2.lcl.lclNum == copyAssertion->op2.lcl.lclNum)
+ {
+ copyAssertLclNum = copyAssertion->op1.lcl.lclNum;
+ copyAssertSsaNum = copyAssertion->op1.lcl.ssaNum;
+ }
+ }
+
+ if (copyAssertLclNum == BAD_VAR_NUM || copyAssertSsaNum == SsaConfig::RESERVED_SSA_NUM)
+ {
+ return;
+ }
+
+ // Get the depAssert's lcl/ssa nums.
+ unsigned depAssertLclNum = BAD_VAR_NUM;
+ unsigned depAssertSsaNum = SsaConfig::RESERVED_SSA_NUM;
+ if ((depAssertion->op1.kind == O1K_LCLVAR) && (depAssertion->op2.kind == O2K_LCLVAR_COPY))
+ {
+ if ((depAssertion->op1.lcl.lclNum == copyAssertion->op1.lcl.lclNum) ||
+ (depAssertion->op1.lcl.lclNum == copyAssertion->op2.lcl.lclNum))
+ {
+ depAssertLclNum = depAssertion->op2.lcl.lclNum;
+ depAssertSsaNum = depAssertion->op2.lcl.ssaNum;
+ }
+ else if ((depAssertion->op2.lcl.lclNum == copyAssertion->op1.lcl.lclNum) ||
+ (depAssertion->op2.lcl.lclNum == copyAssertion->op2.lcl.lclNum))
+ {
+ depAssertLclNum = depAssertion->op1.lcl.lclNum;
+ depAssertSsaNum = depAssertion->op1.lcl.ssaNum;
+ }
+ }
+
+ if (depAssertLclNum == BAD_VAR_NUM || depAssertSsaNum == SsaConfig::RESERVED_SSA_NUM)
+ {
+ return;
+ }
+
+ // Is depAssertion a constant assignment of a 32-bit integer?
+ // (i.e GT_LVL_VAR X == GT_CNS_INT)
+ bool depIsConstAssertion = ((depAssertion->assertionKind == OAK_EQUAL) && (depAssertion->op1.kind == O1K_LCLVAR) &&
+ (depAssertion->op2.kind == O2K_CONST_INT));
+
+ // Search the assertion table for an assertion on op1 that matches depAssertion
+ // The matching assertion is the implied assertion.
+ for (AssertionIndex impIndex = 1; impIndex <= optAssertionCount; impIndex++)
+ {
+ AssertionDsc* impAssertion = optGetAssertion(impIndex);
+
+ // The impAssertion must be different from the copy and dependent assertions
+ if (impAssertion == copyAssertion || impAssertion == depAssertion)
+ {
+ continue;
+ }
+
+ if (!AssertionDsc::SameKind(depAssertion, impAssertion))
+ {
+ continue;
+ }
+
+ bool op1MatchesCopy =
+ (copyAssertLclNum == impAssertion->op1.lcl.lclNum) && (copyAssertSsaNum == impAssertion->op1.lcl.ssaNum);
+
+ bool usable = false;
+ switch (impAssertion->op2.kind)
+ {
+ case O2K_SUBRANGE:
+ usable = op1MatchesCopy && ((impAssertion->op2.u2.loBound <= depAssertion->op2.u2.loBound) &&
+ (impAssertion->op2.u2.hiBound >= depAssertion->op2.u2.hiBound));
+ break;
+
+ case O2K_CONST_LONG:
+ usable = op1MatchesCopy && (impAssertion->op2.lconVal == depAssertion->op2.lconVal);
+ break;
+
+ case O2K_CONST_DOUBLE:
+ // Exact memory match because of positive and negative zero
+ usable = op1MatchesCopy &&
+ (memcmp(&impAssertion->op2.dconVal, &depAssertion->op2.dconVal, sizeof(double)) == 0);
+ break;
+
+ case O2K_IND_CNS_INT:
+ // This is the ngen case where we have an indirection of an address.
+ noway_assert((impAssertion->op1.kind == O1K_EXACT_TYPE) || (impAssertion->op1.kind == O1K_SUBTYPE));
+
+ __fallthrough;
+
+ case O2K_CONST_INT:
+ usable = op1MatchesCopy && (impAssertion->op2.u1.iconVal == depAssertion->op2.u1.iconVal);
+ break;
+
+ case O2K_LCLVAR_COPY:
+ // Check if op1 of impAssertion matches copyAssertion and also op2 of impAssertion matches depAssertion.
+ if (op1MatchesCopy && (depAssertLclNum == impAssertion->op2.lcl.lclNum &&
+ depAssertSsaNum == impAssertion->op2.lcl.ssaNum))
+ {
+ usable = true;
+ }
+ else
+ {
+ // Otherwise, op2 of impAssertion should match copyAssertion and also op1 of impAssertion matches
+ // depAssertion.
+ usable = ((copyAssertLclNum == impAssertion->op2.lcl.lclNum &&
+ copyAssertSsaNum == impAssertion->op2.lcl.ssaNum) &&
+ (depAssertLclNum == impAssertion->op1.lcl.lclNum &&
+ depAssertSsaNum == impAssertion->op1.lcl.ssaNum));
+ }
+ break;
+
+ default:
+ // leave 'usable' = false;
+ break;
+ }
+
+ if (usable)
+ {
+ BitVecOps::AddElemD(apTraits, result, impIndex - 1);
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ AssertionDsc* firstAssertion = optGetAssertion(1);
+ printf("\nCompiler::optImpliedByCopyAssertion: copyAssertion #%02d and depAssertion #%02d, implies "
+ "assertion #%02d",
+ (copyAssertion - firstAssertion) + 1, (depAssertion - firstAssertion) + 1,
+ (impAssertion - firstAssertion) + 1);
+ }
+#endif
+ // If the depAssertion is a const assertion then any other assertions that it implies could also imply a
+ // subrange assertion.
+ if (depIsConstAssertion)
+ {
+ optImpliedByConstAssertion(impAssertion, result);
+ }
+ }
+ }
+}
+
+#include "dataflow.h"
+
+/*****************************************************************************
+ *
+ * Dataflow visitor like callback so that all dataflow is in a single place
+ *
+ */
+class AssertionPropFlowCallback
+{
+private:
+ ASSERT_TP preMergeOut;
+ ASSERT_TP preMergeJumpDestOut;
+
+ ASSERT_TP* mJumpDestOut;
+ ASSERT_TP* mJumpDestGen;
+
+ Compiler* m_pCompiler;
+ BitVecTraits* apTraits;
+
+public:
+ AssertionPropFlowCallback(Compiler* pCompiler, ASSERT_TP* jumpDestOut, ASSERT_TP* jumpDestGen)
+ : preMergeOut(BitVecOps::UninitVal())
+ , preMergeJumpDestOut(BitVecOps::UninitVal())
+ , mJumpDestOut(jumpDestOut)
+ , mJumpDestGen(jumpDestGen)
+ , m_pCompiler(pCompiler)
+ , apTraits(pCompiler->apTraits)
+ {
+ }
+
+ // At the start of the merge function of the dataflow equations, initialize premerge state (to detect change.)
+ void StartMerge(BasicBlock* block)
+ {
+ JITDUMP("AssertionPropCallback::StartMerge: BB%02d in -> %s\n", block->bbNum,
+ BitVecOps::ToString(apTraits, block->bbAssertionIn));
+ BitVecOps::Assign(apTraits, preMergeOut, block->bbAssertionOut);
+ BitVecOps::Assign(apTraits, preMergeJumpDestOut, mJumpDestOut[block->bbNum]);
+ }
+
+ // During merge, perform the actual merging of the predecessor's (since this is a forward analysis) dataflow flags.
+ void Merge(BasicBlock* block, BasicBlock* predBlock, flowList* preds)
+ {
+ ASSERT_TP pAssertionOut = ((predBlock->bbJumpKind == BBJ_COND) && (predBlock->bbJumpDest == block))
+ ? mJumpDestOut[predBlock->bbNum]
+ : predBlock->bbAssertionOut;
+ JITDUMP("AssertionPropCallback::Merge : BB%02d in -> %s, predBlock BB%02d out -> %s\n", block->bbNum,
+ BitVecOps::ToString(apTraits, block->bbAssertionIn), predBlock->bbNum,
+ BitVecOps::ToString(apTraits, predBlock->bbAssertionOut));
+ BitVecOps::IntersectionD(apTraits, block->bbAssertionIn, pAssertionOut);
+ }
+
+ // At the end of the merge store results of the dataflow equations, in a postmerge state.
+ bool EndMerge(BasicBlock* block)
+ {
+ JITDUMP("AssertionPropCallback::EndMerge : BB%02d in -> %s\n\n", block->bbNum,
+ BitVecOps::ToString(apTraits, block->bbAssertionIn));
+
+ // PERF: eliminate this tmp by passing in a OperationTree (AST) to the bitset,
+ // so the expr tree is operated on a single bit level. See "expression templates."
+ ASSERT_TP tmp = BitVecOps::MakeCopy(apTraits, block->bbAssertionIn);
+ BitVecOps::UnionD(apTraits, tmp, block->bbAssertionGen);
+ BitVecOps::IntersectionD(apTraits, block->bbAssertionOut, tmp);
+
+ BitVecOps::Assign(apTraits, tmp, block->bbAssertionIn);
+ BitVecOps::UnionD(apTraits, tmp, mJumpDestGen[block->bbNum]);
+ BitVecOps::IntersectionD(apTraits, mJumpDestOut[block->bbNum], tmp);
+
+ bool changed = (!BitVecOps::Equal(apTraits, preMergeOut, block->bbAssertionOut) ||
+ !BitVecOps::Equal(apTraits, preMergeJumpDestOut, mJumpDestOut[block->bbNum]));
+
+ if (changed)
+ {
+ JITDUMP("AssertionPropCallback::Changed : BB%02d before out -> %s; after out -> %s;\n"
+ "\t\tjumpDest before out -> %s; jumpDest after out -> %s;\n\n",
+ block->bbNum, BitVecOps::ToString(apTraits, preMergeOut),
+ BitVecOps::ToString(apTraits, block->bbAssertionOut),
+ BitVecOps::ToString(apTraits, preMergeJumpDestOut),
+ BitVecOps::ToString(apTraits, mJumpDestOut[block->bbNum]));
+ }
+ else
+ {
+ JITDUMP("AssertionPropCallback::Unchanged : BB%02d out -> %s; \t\tjumpDest out -> %s\n\n", block->bbNum,
+ BitVecOps::ToString(apTraits, block->bbAssertionOut),
+ BitVecOps::ToString(apTraits, mJumpDestOut[block->bbNum]));
+ }
+
+ return changed;
+ }
+};
+
+ASSERT_VALRET_TP Compiler::optNewFullAssertSet()
+{
+ return BitVecOps::MakeCopy(apTraits, apFull);
+}
+
+ASSERT_VALRET_TP Compiler::optNewEmptyAssertSet()
+{
+ return BitVecOps::MakeCopy(apTraits, apEmpty);
+}
+
+/*****************************************************************************
+ *
+ * Compute the assertions generated by each block.
+ */
+ASSERT_TP* Compiler::optComputeAssertionGen()
+{
+ ASSERT_TP* jumpDestGen = fgAllocateTypeForEachBlk<ASSERT_TP>();
+
+ ASSERT_TP valueGen = BitVecOps::MakeEmpty(apTraits);
+ ASSERT_TP jumpDestValueGen = BitVecOps::MakeEmpty(apTraits);
+
+ for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
+ {
+ jumpDestGen[block->bbNum] = BitVecOps::MakeEmpty(apTraits);
+
+ BitVecOps::ClearD(apTraits, valueGen);
+ BitVecOps::ClearD(apTraits, jumpDestValueGen);
+
+ // Walk the statement trees in this basic block.
+ for (GenTreePtr stmt = block->bbTreeList; stmt; stmt = stmt->gtNext)
+ {
+ noway_assert(stmt->gtOper == GT_STMT);
+
+ for (GenTreePtr tree = stmt->gtStmt.gtStmtList; tree; tree = tree->gtNext)
+ {
+ // Store whatever we have accumulated into jumpDest edge's valueGen.
+ if (tree->gtOper == GT_JTRUE)
+ {
+ BitVecOps::Assign(apTraits, jumpDestValueGen, valueGen);
+ }
+ if (!tree->HasAssertion())
+ {
+ continue;
+ }
+
+ // For regular trees, just update valueGen. For GT_JTRUE, for false part,
+ // update valueGen and true part update jumpDestValueGen.
+ AssertionIndex assertionIndex[2] = {(AssertionIndex)tree->GetAssertion(),
+ (tree->OperGet() == GT_JTRUE)
+ ? optFindComplementary((AssertionIndex)tree->GetAssertion())
+ : 0};
+
+ for (unsigned i = 0; i < 2; ++i)
+ {
+ if (assertionIndex[i] > 0)
+ {
+ // If GT_JTRUE, and true part use jumpDestValueGen.
+ ASSERT_TP& gen = (i == 0 && tree->OperGet() == GT_JTRUE) ? jumpDestValueGen : valueGen;
+ optImpliedAssertions(assertionIndex[i], gen);
+ BitVecOps::AddElemD(apTraits, gen, assertionIndex[i] - 1);
+ }
+ }
+ }
+ }
+
+ BitVecOps::Assign(apTraits, block->bbAssertionGen, valueGen);
+ BitVecOps::Assign(apTraits, jumpDestGen[block->bbNum], jumpDestValueGen);
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nBB%02u valueGen = %s", block->bbNum, BitVecOps::ToString(apTraits, valueGen));
+ if (block->bbJumpKind == BBJ_COND)
+ {
+ printf(" => BB%02u valueGen = %s,", block->bbJumpDest->bbNum,
+ BitVecOps::ToString(apTraits, jumpDestValueGen));
+ }
+ }
+#endif
+ }
+ return jumpDestGen;
+}
+
+/*****************************************************************************
+ *
+ * Initialize the assertion data flow flags that will be propagated.
+ */
+
+ASSERT_TP* Compiler::optInitAssertionDataflowFlags()
+{
+ ASSERT_TP* jumpDestOut = fgAllocateTypeForEachBlk<ASSERT_TP>();
+
+ // The local assertion gen phase may have created unreachable blocks.
+ // They will never be visited in the dataflow propagation phase, so they need to
+ // be initialized correctly. This means that instead of setting their sets to
+ // apFull (i.e. all possible bits set), we need to set the bits only for valid
+ // assertions (note that at this point we are not creating any new assertions).
+ // Also note that assertion indices start from 1.
+ ASSERT_TP apValidFull = optNewEmptyAssertSet();
+ for (int i = 1; i <= optAssertionCount; i++)
+ {
+ BitVecOps::AddElemD(apTraits, apValidFull, i - 1);
+ }
+
+ // Initially estimate the OUT sets to everything except killed expressions
+ // Also set the IN sets to 1, so that we can perform the intersection.
+ // Also, zero-out the flags for handler blocks, as we could be in the
+ // handler due to an exception bypassing the regular program flow which
+ // actually generates assertions along the bbAssertionOut/jumpDestOut
+ // edges.
+ for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
+ {
+ block->bbAssertionIn = optNewEmptyAssertSet();
+ if (!bbIsHandlerBeg(block))
+ {
+ BitVecOps::Assign(apTraits, block->bbAssertionIn, apValidFull);
+ }
+ block->bbAssertionGen = optNewEmptyAssertSet();
+ block->bbAssertionOut = optNewEmptyAssertSet();
+ BitVecOps::Assign(apTraits, block->bbAssertionOut, apValidFull);
+ jumpDestOut[block->bbNum] = optNewEmptyAssertSet();
+ BitVecOps::Assign(apTraits, jumpDestOut[block->bbNum], apValidFull);
+ }
+ // Compute the data flow values for all tracked expressions
+ // IN and OUT never change for the initial basic block B1
+ BitVecOps::Assign(apTraits, fgFirstBB->bbAssertionIn, apEmpty);
+ return jumpDestOut;
+}
+
+// Callback data for the VN based constant prop visitor.
+struct VNAssertionPropVisitorInfo
+{
+ Compiler* pThis;
+ GenTreePtr stmt;
+ BasicBlock* block;
+ VNAssertionPropVisitorInfo(Compiler* pThis, BasicBlock* block, GenTreePtr stmt)
+ : pThis(pThis), stmt(stmt), block(block)
+ {
+ }
+};
+
+//------------------------------------------------------------------------------
+// optPrepareTreeForReplacement
+// Updates ref counts and extracts side effects from a tree so it can be
+// replaced with a comma separated list of side effects + a new tree.
+//
+// Note:
+// The old and new trees may be the same. In this case, the tree will be
+// appended to the side-effect list (if present) and returned.
+//
+// Arguments:
+// oldTree - The tree node to be dropped from the stmt expr.
+// newTree - The tree node to append to the side effect list from "oldTree".
+//
+// Return Value:
+// Returns a comma separated list of side-effects present in the "oldTree".
+// When "newTree" is non-null:
+// 1. When side-effects are present in oldTree, newTree will be appended to the
+// comma separated list.
+// 2. When no side effects are present, then returns the "newTree" without
+// any list.
+// When "newTree" is null:
+// 1. Returns the extracted side-effects from "oldTree"
+// 2. When no side-effects are present, returns null.
+//
+// Description:
+// Decrements ref counts for the "oldTree" that is going to be replaced. If there
+// are side effects in the tree, then ref counts for variables in the side effects
+// are incremented because they need to be kept in the stmt expr.
+//
+// Either the "newTree" is returned when no side effects are present or a comma
+// separated side effect list with "newTree" is returned.
+//
+GenTreePtr Compiler::optPrepareTreeForReplacement(GenTreePtr oldTree, GenTreePtr newTree)
+{
+ // If we have side effects, extract them and append newTree to the list.
+ GenTreePtr sideEffList = nullptr;
+ if (oldTree->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS)
+ {
+ gtExtractSideEffList(oldTree, &sideEffList, GTF_PERSISTENT_SIDE_EFFECTS_IN_CSE);
+ }
+ if (sideEffList)
+ {
+ noway_assert(sideEffList->gtFlags & GTF_SIDE_EFFECT);
+
+ // Increment the ref counts as we want to keep the side effects.
+ lvaRecursiveIncRefCounts(sideEffList);
+
+ if (newTree)
+ {
+ newTree = gtNewOperNode(GT_COMMA, newTree->TypeGet(), sideEffList, newTree);
+ }
+ else
+ {
+ newTree = sideEffList;
+ }
+ }
+
+ // Decrement the ref counts as the oldTree is going to be dropped.
+ lvaRecursiveDecRefCounts(oldTree);
+ return newTree;
+}
+
+//------------------------------------------------------------------------------
+// optVNConstantPropOnJTrue
+// Constant propagate on the JTrue node by extracting side effects and moving
+// them into their own statements. The relop node is then modified to yield
+// true or false, so the branch can be folded.
+//
+// Arguments:
+// block - The block that contains the JTrue.
+// stmt - The JTrue stmt which can be evaluated to a constant.
+// tree - The JTrue node whose relop evaluates to 0 or non-zero value.
+//
+// Return Value:
+// The jmpTrue tree node that has relop of the form "0 =/!= 0".
+// If "tree" evaluates to "true" relop is "0 == 0". Else relop is "0 != 0".
+//
+// Description:
+// Special treatment for JTRUE nodes' constant propagation. This is because
+// for JTRUE(1) or JTRUE(0), if there are side effects they need to be put
+// in separate statements. This is to prevent relop's constant
+// propagation from doing a simple minded conversion from
+// (1) STMT(JTRUE(RELOP(COMMA(sideEffect, OP1), OP2)), S.T. op1 =/!= op2 to
+// (2) STMT(JTRUE(COMMA(sideEffect, 1/0)).
+//
+// fgFoldConditional doesn't fold (2), a side-effecting JTRUE's op1. So, let us,
+// here, convert (1) as two statements: STMT(sideEffect), STMT(JTRUE(1/0)),
+// so that the JTRUE will get folded by fgFoldConditional.
+//
+// Note: fgFoldConditional is called from other places as well, which may be
+// sensitive to adding new statements. Hence the change is not made directly
+// into fgFoldConditional.
+//
+GenTreePtr Compiler::optVNConstantPropOnJTrue(BasicBlock* block, GenTreePtr stmt, GenTreePtr test)
+{
+ GenTreePtr relop = test->gtGetOp1();
+
+ // VN based assertion non-null on this relop has been performed.
+ if (!relop->OperIsCompare())
+ {
+ return nullptr;
+ }
+
+ //
+ // Make sure GTF_RELOP_JMP_USED flag is set so that we can later skip constant
+ // prop'ing a JTRUE's relop child node for a second time in the pre-order
+ // tree walk.
+ //
+ assert((relop->gtFlags & GTF_RELOP_JMP_USED) != 0);
+
+ if (!vnStore->IsVNConstant(relop->gtVNPair.GetConservative()))
+ {
+ return nullptr;
+ }
+
+ // Prepare the tree for replacement so any side effects can be extracted.
+ GenTreePtr sideEffList = optPrepareTreeForReplacement(test, nullptr);
+
+ while (sideEffList)
+ {
+ GenTreePtr newStmt;
+ if (sideEffList->OperGet() == GT_COMMA)
+ {
+ newStmt = fgInsertStmtNearEnd(block, sideEffList->gtGetOp1());
+ sideEffList = sideEffList->gtGetOp2();
+ }
+ else
+ {
+ newStmt = fgInsertStmtNearEnd(block, sideEffList);
+ sideEffList = nullptr;
+ }
+ fgMorphBlockStmt(block, newStmt DEBUGARG(__FUNCTION__));
+ gtSetStmtInfo(newStmt);
+ fgSetStmtSeq(newStmt);
+ }
+
+ // Transform the relop's operands to be both zeroes.
+ ValueNum vnZero = vnStore->VNZeroForType(TYP_INT);
+ relop->gtOp.gtOp1 = gtNewIconNode(0);
+ relop->gtOp.gtOp1->gtVNPair = ValueNumPair(vnZero, vnZero);
+ relop->gtOp.gtOp2 = gtNewIconNode(0);
+ relop->gtOp.gtOp2->gtVNPair = ValueNumPair(vnZero, vnZero);
+
+ // Update the oper and restore the value numbers.
+ ValueNum vnCns = relop->gtVNPair.GetConservative();
+ ValueNum vnLib = relop->gtVNPair.GetLiberal();
+ bool evalsToTrue = vnStore->CoercedConstantValue<INT64>(vnCns) != 0;
+ relop->SetOper(evalsToTrue ? GT_EQ : GT_NE);
+ relop->gtVNPair = ValueNumPair(vnLib, vnCns);
+
+ return test;
+}
+
+//------------------------------------------------------------------------------
+// optVNConstantPropCurStmt
+// Performs constant prop on the current statement's tree nodes.
+//
+// Assumption:
+// This function is called as part of a pre-order tree walk.
+//
+// Arguments:
+// tree - The currently visited tree node.
+// stmt - The statement node in which the "tree" is present.
+// block - The block that contains the statement that contains the tree.
+//
+// Return Value:
+// Returns the standard visitor walk result.
+//
+// Description:
+// Checks if a node is an R-value and evaluates to a constant. If the node
+// evaluates to constant, then the tree is replaced by its side effects and
+// the constant node.
+//
+Compiler::fgWalkResult Compiler::optVNConstantPropCurStmt(BasicBlock* block, GenTreePtr stmt, GenTreePtr tree)
+{
+ // Don't propagate floating-point constants into a TYP_STRUCT LclVar
+ // This can occur for HFA return values (see hfa_sf3E_r.exe)
+ if (tree->TypeGet() == TYP_STRUCT)
+ {
+ return WALK_CONTINUE;
+ }
+
+ switch (tree->OperGet())
+ {
+ // Make sure we have an R-value.
+ case GT_ADD:
+ case GT_SUB:
+ case GT_DIV:
+ case GT_MOD:
+ case GT_UDIV:
+ case GT_UMOD:
+ case GT_MULHI:
+ case GT_EQ:
+ case GT_NE:
+ case GT_LT:
+ case GT_LE:
+ case GT_GE:
+ case GT_GT:
+ case GT_OR:
+ case GT_XOR:
+ case GT_AND:
+ case GT_LSH:
+ case GT_RSH:
+ case GT_RSZ:
+ case GT_NEG:
+ case GT_CHS:
+ case GT_CAST:
+ case GT_INTRINSIC:
+ break;
+
+ case GT_JTRUE:
+ break;
+
+ case GT_MUL:
+ // Don't transform long multiplies.
+ if (tree->gtFlags & GTF_MUL_64RSLT)
+ {
+ return WALK_SKIP_SUBTREES;
+ }
+ break;
+
+ case GT_LCL_VAR:
+ // Make sure the local variable is an R-value.
+ if ((tree->gtFlags & (GTF_VAR_DEF | GTF_DONT_CSE)))
+ {
+ return WALK_CONTINUE;
+ }
+#if FEATURE_ANYCSE
+ // Let's not conflict with CSE (to save the movw/movt).
+ if (lclNumIsCSE(tree->AsLclVarCommon()->GetLclNum()))
+ {
+ return WALK_CONTINUE;
+ }
+#endif
+ break;
+
+ default:
+ // Unknown node, continue to walk.
+ return WALK_CONTINUE;
+ }
+
+ // Perform the constant propagation
+ GenTreePtr newTree = optVNConstantPropOnTree(block, stmt, tree);
+ if (newTree == nullptr)
+ {
+ // Not propagated, keep going.
+ return WALK_CONTINUE;
+ }
+
+ // Successful propagation, mark as assertion propagated and skip
+ // sub-tree (with side-effects) visits.
+ optAssertionProp_Update(newTree, tree, stmt);
+
+ JITDUMP("After constant propagation on [%06u]:\n", tree->gtTreeID);
+ DBEXEC(VERBOSE, gtDispTree(stmt));
+
+ return WALK_SKIP_SUBTREES;
+}
+
+//------------------------------------------------------------------------------
+// optVnNonNullPropCurStmt
+// Performs VN based non-null propagation on the tree node.
+//
+// Assumption:
+// This function is called as part of a pre-order tree walk.
+//
+// Arguments:
+// block - The block that contains the statement that contains the tree.
+// stmt - The statement node in which the "tree" is present.
+// tree - The currently visited tree node.
+//
+// Return Value:
+// None.
+//
+// Description:
+// Performs value number based non-null propagation on GT_CALL and
+// indirections. This is different from flow based assertions and helps
+// unify VN based constant prop and non-null prop in a single pre-order walk.
+//
+void Compiler::optVnNonNullPropCurStmt(BasicBlock* block, GenTreePtr stmt, GenTreePtr tree)
+{
+ ASSERT_TP empty = BitVecOps::MakeEmpty(apTraits);
+ GenTreePtr newTree = nullptr;
+ if (tree->OperGet() == GT_CALL)
+ {
+ newTree = optNonNullAssertionProp_Call(empty, tree, stmt);
+ }
+ else if (tree->OperIsIndir())
+ {
+ newTree = optAssertionProp_Ind(empty, tree, stmt);
+ }
+ if (newTree)
+ {
+ assert(newTree == tree);
+ optAssertionProp_Update(newTree, tree, stmt);
+ }
+}
+
+//------------------------------------------------------------------------------
+// optVNAssertionPropCurStmtVisitor
+// Unified Value Numbering based assertion propagation visitor.
+//
+// Assumption:
+// This function is called as part of a pre-order tree walk.
+//
+// Return Value:
+// WALK_RESULTs.
+//
+// Description:
+// An unified value numbering based assertion prop visitor that
+// performs non-null and constant assertion propagation based on
+// value numbers.
+//
+/* static */
+Compiler::fgWalkResult Compiler::optVNAssertionPropCurStmtVisitor(GenTreePtr* ppTree, fgWalkData* data)
+{
+ VNAssertionPropVisitorInfo* pData = (VNAssertionPropVisitorInfo*)data->pCallbackData;
+ Compiler* pThis = pData->pThis;
+
+ pThis->optVnNonNullPropCurStmt(pData->block, pData->stmt, *ppTree);
+
+ return pThis->optVNConstantPropCurStmt(pData->block, pData->stmt, *ppTree);
+}
+
+/*****************************************************************************
+ *
+ * Perform VN based i.e., data flow based assertion prop first because
+ * even if we don't gen new control flow assertions, we still propagate
+ * these first.
+ *
+ * Returns the skipped next stmt if the current statement or next few
+ * statements got removed, else just returns the incoming stmt.
+ */
+GenTreePtr Compiler::optVNAssertionPropCurStmt(BasicBlock* block, GenTreePtr stmt)
+{
+ // TODO-Review: EH successor/predecessor iteration seems broken.
+ // See: SELF_HOST_TESTS_ARM\jit\Directed\ExcepFilters\fault\fault.exe
+ if (block->bbCatchTyp == BBCT_FAULT)
+ {
+ return stmt;
+ }
+
+ // Preserve the prev link before the propagation and morph.
+ GenTreePtr prev = (stmt == block->firstStmt()) ? nullptr : stmt->gtPrev;
+
+ // Perform VN based assertion prop first, in case we don't find
+ // anything in assertion gen.
+ optAssertionPropagatedCurrentStmt = false;
+
+ VNAssertionPropVisitorInfo data(this, block, stmt);
+ fgWalkTreePre(&stmt->gtStmt.gtStmtExpr, Compiler::optVNAssertionPropCurStmtVisitor, &data);
+
+ if (optAssertionPropagatedCurrentStmt)
+ {
+ fgMorphBlockStmt(block, stmt DEBUGARG("optVNAssertionPropCurStmt"));
+ gtSetStmtInfo(stmt);
+ fgSetStmtSeq(stmt);
+ }
+
+ // Check if propagation removed statements starting from current stmt.
+ // If so, advance to the next good statement.
+ GenTreePtr nextStmt = (prev == nullptr) ? block->firstStmt() : prev->gtNext;
+ return nextStmt;
+}
+
+/*****************************************************************************
+ *
+ * The entry point for assertion propagation
+ */
+
+void Compiler::optAssertionPropMain()
+{
+ if (fgSsaPassesCompleted == 0)
+ {
+ return;
+ }
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("*************** In optAssertionPropMain()\n");
+ printf("Blocks/Trees at start of phase\n");
+ fgDispBasicBlocks(true);
+ }
+#endif
+
+ optAssertionInit(false);
+
+ noway_assert(optAssertionCount == 0);
+
+ // First discover all value assignments and record them in the table.
+ for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
+ {
+ compCurBB = block;
+
+ fgRemoveRestOfBlock = false;
+
+ GenTreePtr stmt = block->bbTreeList;
+ while (stmt)
+ {
+ // We need to remove the rest of the block.
+ if (fgRemoveRestOfBlock)
+ {
+ fgRemoveStmt(block, stmt);
+ stmt = stmt->gtNext;
+ continue;
+ }
+ else
+ {
+ // Perform VN based assertion prop before assertion gen.
+ GenTreePtr nextStmt = optVNAssertionPropCurStmt(block, stmt);
+
+ // Propagation resulted in removal of the remaining stmts, perform it.
+ if (fgRemoveRestOfBlock)
+ {
+ stmt = stmt->gtNext;
+ continue;
+ }
+
+ // Propagation removed the current stmt or next few stmts, so skip them.
+ if (stmt != nextStmt)
+ {
+ stmt = nextStmt;
+ continue;
+ }
+ }
+
+ // Perform assertion gen for control flow based assertions.
+ for (GenTreePtr tree = stmt->gtStmt.gtStmtList; tree; tree = tree->gtNext)
+ {
+ optAssertionGen(tree);
+ }
+
+ // Advance the iterator
+ stmt = stmt->gtNext;
+ }
+ }
+
+ if (!optAssertionCount)
+ {
+ return;
+ }
+
+#ifdef DEBUG
+ fgDebugCheckLinks();
+#endif
+
+ // Allocate the bits for the predicate sensitive dataflow analysis
+ bbJtrueAssertionOut = optInitAssertionDataflowFlags();
+ ASSERT_TP* jumpDestGen = optComputeAssertionGen();
+
+ // Modified dataflow algorithm for available expressions.
+ DataFlow flow(this);
+ AssertionPropFlowCallback ap(this, bbJtrueAssertionOut, jumpDestGen);
+ flow.ForwardAnalysis(ap);
+
+ for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
+ {
+ // Compute any implied non-Null assertions for block->bbAssertionIn
+ optImpliedByTypeOfAssertions(block->bbAssertionIn);
+ }
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\n");
+ for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
+ {
+ printf("\nBB%02u", block->bbNum);
+ printf(" valueIn = %s", BitVecOps::ToString(apTraits, block->bbAssertionIn));
+ printf(" valueOut = %s", BitVecOps::ToString(apTraits, block->bbAssertionOut));
+ if (block->bbJumpKind == BBJ_COND)
+ {
+ printf(" => BB%02u", block->bbJumpDest->bbNum);
+ printf(" valueOut= %s", BitVecOps::ToString(apTraits, bbJtrueAssertionOut[block->bbNum]));
+ }
+ }
+ printf("\n");
+ }
+#endif // DEBUG
+
+ // Perform assertion propagation (and constant folding)
+ for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
+ {
+ ASSERT_TP assertions = BitVecOps::MakeCopy(apTraits, block->bbAssertionIn);
+
+ // TODO-Review: EH successor/predecessor iteration seems broken.
+ // SELF_HOST_TESTS_ARM\jit\Directed\ExcepFilters\fault\fault.exe
+ if (block->bbCatchTyp == BBCT_FAULT)
+ {
+ continue;
+ }
+
+ // Make the current basic block address available globally.
+ compCurBB = block;
+ fgRemoveRestOfBlock = false;
+
+ // Walk the statement trees in this basic block
+ GenTreePtr stmt = block->FirstNonPhiDef();
+ while (stmt)
+ {
+ noway_assert(stmt->gtOper == GT_STMT);
+
+ // Propagation tells us to remove the rest of the block. Remove it.
+ if (fgRemoveRestOfBlock)
+ {
+ fgRemoveStmt(block, stmt);
+ stmt = stmt->gtNext;
+ continue;
+ }
+
+ // Preserve the prev link before the propagation and morph, to check if propagation
+ // removes the current stmt.
+ GenTreePtr prev = (stmt == block->firstStmt()) ? nullptr : stmt->gtPrev;
+
+ optAssertionPropagatedCurrentStmt = false; // set to true if a assertion propagation took place
+ // and thus we must morph, set order, re-link
+ for (GenTreePtr tree = stmt->gtStmt.gtStmtList; tree; tree = tree->gtNext)
+ {
+ JITDUMP("Propagating %s assertions for BB%02d, stmt [%06d], tree [%06d], tree -> %d\n",
+ BitVecOps::ToString(apTraits, assertions), block->bbNum, dspTreeID(stmt), dspTreeID(tree),
+ tree->GetAssertion());
+
+ GenTreePtr newTree = optAssertionProp(assertions, tree, stmt);
+ if (newTree)
+ {
+ assert(optAssertionPropagatedCurrentStmt == true);
+ tree = newTree;
+ }
+
+ // Is this an assignment to a local variable
+ GenTreeLclVarCommon* lclVarTree = nullptr;
+
+ // If this tree makes an assertion - make it available.
+ if (tree->HasAssertion())
+ {
+ BitVecOps::AddElemD(apTraits, assertions, tree->GetAssertion() - 1);
+
+ // Also include any implied assertions for the tree node.
+ optImpliedAssertions((AssertionIndex)tree->GetAssertion(), assertions);
+ }
+ }
+
+ if (optAssertionPropagatedCurrentStmt)
+ {
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Re-morphing this stmt:\n");
+ gtDispTree(stmt);
+ printf("\n");
+ }
+#endif
+ // Re-morph the statement.
+ fgMorphBlockStmt(block, stmt DEBUGARG("optAssertionPropMain"));
+
+ // Recalculate the gtCostSz, etc...
+ gtSetStmtInfo(stmt);
+
+ // Re-thread the nodes
+ fgSetStmtSeq(stmt);
+ }
+
+ // Check if propagation removed statements starting from current stmt.
+ // If so, advance to the next good statement.
+ GenTreePtr nextStmt = (prev == nullptr) ? block->firstStmt() : prev->gtNext;
+ stmt = (stmt == nextStmt) ? stmt->gtNext : nextStmt;
+ }
+ optAssertionPropagatedCurrentStmt = false; // clear it back as we are done with stmts.
+ }
+
+#ifdef DEBUG
+ fgDebugCheckBBlist();
+ fgDebugCheckLinks();
+#endif
+
+ // Assertion propagation may have changed the reference counts
+ // We need to resort the variable table
+
+ if (optAssertionPropagated)
+ {
+ lvaSortAgain = true;
+ }
+}
diff --git a/src/jit/bitset.cpp b/src/jit/bitset.cpp
new file mode 100644
index 0000000000..90ef253199
--- /dev/null
+++ b/src/jit/bitset.cpp
@@ -0,0 +1,185 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+#include "bitset.h"
+#include "bitsetasuint64.h"
+#include "bitsetasshortlong.h"
+#include "bitsetasuint64inclass.h"
+
+// clang-format off
+unsigned BitSetSupport::BitCountTable[16] = { 0, 1, 1, 2,
+ 1, 2, 2, 3,
+ 1, 2, 2, 3,
+ 2, 3, 3, 4 };
+// clang-format on
+
+#ifdef DEBUG
+template <typename BitSetType, unsigned Uniq, typename Env, typename BitSetTraits>
+void BitSetSupport::RunTests(Env env)
+{
+
+ typedef BitSetOps<BitSetType, Uniq, Env, BitSetTraits> LclBitSetOps;
+
+ // The tests require that the Size is at least 52...
+ assert(BitSetTraits::GetSize(env) > 51);
+
+ BitSetType bs1;
+ LclBitSetOps::AssignNoCopy(env, bs1, LclBitSetOps::MakeEmpty(env));
+ unsigned bs1bits[] = {0, 10, 44, 45};
+ LclBitSetOps::AddElemD(env, bs1, bs1bits[0]);
+ LclBitSetOps::AddElemD(env, bs1, bs1bits[1]);
+ LclBitSetOps::AddElemD(env, bs1, bs1bits[2]);
+ LclBitSetOps::AddElemD(env, bs1, bs1bits[3]);
+
+ typename LclBitSetOps::Iter bsi(env, bs1);
+ unsigned bitNum = 0;
+ unsigned k = 0;
+ while (bsi.NextElem(env, &bitNum))
+ {
+ assert(bitNum == bs1bits[k]);
+ k++;
+ }
+ assert(k == 4);
+
+ assert(LclBitSetOps::Equal(env, bs1, LclBitSetOps::Union(env, bs1, bs1)));
+ assert(LclBitSetOps::Equal(env, bs1, LclBitSetOps::Intersection(env, bs1, bs1)));
+ assert(LclBitSetOps::IsSubset(env, bs1, bs1));
+
+ BitSetType bs2;
+ LclBitSetOps::AssignNoCopy(env, bs2, LclBitSetOps::MakeEmpty(env));
+ unsigned bs2bits[] = {0, 10, 50, 51};
+ LclBitSetOps::AddElemD(env, bs2, bs2bits[0]);
+ LclBitSetOps::AddElemD(env, bs2, bs2bits[1]);
+ LclBitSetOps::AddElemD(env, bs2, bs2bits[2]);
+ LclBitSetOps::AddElemD(env, bs2, bs2bits[3]);
+
+ unsigned unionBits[] = {0, 10, 44, 45, 50, 51};
+ BitSetType bsU12;
+ LclBitSetOps::AssignNoCopy(env, bsU12, LclBitSetOps::Union(env, bs1, bs2));
+ k = 0;
+ bsi = typename LclBitSetOps::Iter(env, bsU12);
+ bitNum = 0;
+ while (bsi.NextElem(env, &bitNum))
+ {
+ assert(bitNum == unionBits[k]);
+ k++;
+ }
+ assert(k == 6);
+
+ k = 0;
+ typename LclBitSetOps::Iter bsiL = typename LclBitSetOps::Iter(env, bsU12);
+ bitNum = 0;
+ while (bsiL.NextElem(env, &bitNum))
+ {
+ assert(bitNum == unionBits[k]);
+ k++;
+ }
+ assert(k == 6);
+
+ unsigned intersectionBits[] = {0, 10};
+ BitSetType bsI12;
+ LclBitSetOps::AssignNoCopy(env, bsI12, LclBitSetOps::Intersection(env, bs1, bs2));
+ k = 0;
+ bsi = typename LclBitSetOps::Iter(env, bsI12);
+ bitNum = 0;
+ while (bsi.NextElem(env, &bitNum))
+ {
+ assert(bitNum == intersectionBits[k]);
+ k++;
+ }
+ assert(k == 2);
+}
+
+class TestBitSetTraits
+{
+public:
+ static IAllocator* GetAllocator(IAllocator* alloc)
+ {
+ return alloc;
+ }
+ static unsigned GetSize(IAllocator* alloc)
+ {
+ return 64;
+ }
+ static unsigned GetArrSize(IAllocator* alloc, unsigned elemSize)
+ {
+ assert(elemSize == sizeof(size_t));
+ return (64 / 8) / sizeof(size_t);
+ }
+ static unsigned GetEpoch(IAllocator* alloc)
+ {
+ return 0;
+ }
+};
+
+void BitSetSupport::TestSuite(IAllocator* env)
+{
+ BitSetSupport::RunTests<UINT64, BSUInt64, IAllocator*, TestBitSetTraits>(env);
+ BitSetSupport::RunTests<BitSetShortLongRep, BSShortLong, IAllocator*, TestBitSetTraits>(env);
+ BitSetSupport::RunTests<BitSetUint64<IAllocator*, TestBitSetTraits>, BSUInt64Class, IAllocator*, TestBitSetTraits>(
+ env);
+}
+#endif
+
+const char* BitSetSupport::OpNames[BitSetSupport::BSOP_NUMOPS] = {
+#define BSOPNAME(x) #x,
+#include "bitsetops.h"
+#undef BSOPNAME
+};
+
+void BitSetSupport::BitSetOpCounter::RecordOp(BitSetSupport::Operation op)
+{
+ OpCounts[op]++;
+ TotalOps++;
+
+ if ((TotalOps % 1000000) == 0)
+ {
+ if (OpOutputFile == nullptr)
+ {
+ OpOutputFile = fopen(m_fileName, "a");
+ }
+ fprintf(OpOutputFile, "@ %d total ops.\n", TotalOps);
+
+ unsigned OpOrder[BSOP_NUMOPS];
+ bool OpOrdered[BSOP_NUMOPS];
+
+ // First sort by total operations (into an index permutation array, using a simple n^2 sort).
+ for (unsigned k = 0; k < BitSetSupport::BSOP_NUMOPS; k++)
+ {
+ OpOrdered[k] = false;
+ }
+ for (unsigned k = 0; k < BitSetSupport::BSOP_NUMOPS; k++)
+ {
+ bool candSet = false;
+ unsigned cand = 0;
+ unsigned candInd = 0;
+ for (unsigned j = 0; j < BitSetSupport::BSOP_NUMOPS; j++)
+ {
+ if (OpOrdered[j])
+ {
+ continue;
+ }
+ if (!candSet || OpCounts[j] > cand)
+ {
+ candInd = j;
+ cand = OpCounts[j];
+ candSet = true;
+ }
+ }
+ assert(candSet);
+ OpOrder[k] = candInd;
+ OpOrdered[candInd] = true;
+ }
+
+ for (unsigned ii = 0; ii < BitSetSupport::BSOP_NUMOPS; ii++)
+ {
+ unsigned i = OpOrder[ii];
+ fprintf(OpOutputFile, " Op %40s: %8d\n", OpNames[i], OpCounts[i]);
+ }
+ }
+}
diff --git a/src/jit/bitset.h b/src/jit/bitset.h
new file mode 100644
index 0000000000..4ecb2fc0d4
--- /dev/null
+++ b/src/jit/bitset.h
@@ -0,0 +1,452 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+// A set of integers in the range [0..N], for some given N.
+
+/*****************************************************************************/
+#ifndef _BITSET_H_
+#define _BITSET_H_
+/*****************************************************************************/
+
+// This class provides some constant declarations and some static utility methods useful
+// for bitset implementations.
+class BitSetSupport
+{
+#ifdef DEBUG
+ template <typename BitSetType, unsigned Brand, typename Env, typename BitSetTraits>
+ static void RunTests(Env env);
+#endif
+
+public:
+ static const unsigned BitsInByte = 8;
+
+ // This maps 4-bit ("nibble") values into the number of 1 bits they contain.
+ static unsigned BitCountTable[16];
+
+ // Returns the number of 1 bits in the binary representation of "u".
+ template <typename T>
+ static unsigned CountBitsInIntegral(T u)
+ {
+ unsigned res = 0;
+ // We process "u" in 4-bit nibbles, hence the "*2" below.
+ for (int i = 0; i < sizeof(T) * 2; i++)
+ {
+ res += BitCountTable[u & 0xf];
+ u >>= 4;
+ }
+ return res;
+ }
+
+#ifdef DEBUG
+ // This runs the "TestSuite" method for a few important instantiations of BitSet.
+ static void TestSuite(IAllocator* env);
+#endif
+
+ enum Operation
+ {
+#define BSOPNAME(x) x,
+#include "bitsetops.h"
+#undef BSOPNAME
+ BSOP_NUMOPS
+ };
+ static const char* OpNames[BSOP_NUMOPS];
+
+ class BitSetOpCounter
+ {
+ unsigned TotalOps;
+ unsigned OpCounts[BSOP_NUMOPS];
+ const char* m_fileName;
+ FILE* OpOutputFile;
+
+ public:
+ BitSetOpCounter(const char* fileName) : TotalOps(0), m_fileName(fileName), OpOutputFile(nullptr)
+ {
+ for (unsigned i = 0; i < BSOP_NUMOPS; i++)
+ {
+ OpCounts[i] = 0;
+ }
+ }
+
+ void RecordOp(Operation op);
+ };
+};
+
+template <>
+FORCEINLINE unsigned BitSetSupport::CountBitsInIntegral<unsigned>(unsigned c)
+{
+ // Make sure we're 32 bit.
+ assert(sizeof(unsigned) == 4);
+ c = (c & 0x55555555) + ((c >> 1) & 0x55555555);
+ c = (c & 0x33333333) + ((c >> 2) & 0x33333333);
+ c = (c & 0x0f0f0f0f) + ((c >> 4) & 0x0f0f0f0f);
+ c = (c & 0x00ff00ff) + ((c >> 8) & 0x00ff00ff);
+ c = (c & 0x0000ffff) + ((c >> 16) & 0x0000ffff);
+ return c;
+}
+
+// A "BitSet" represents a set of integers from a "universe" [0..N-1]. This implementation assumes that "N"
+// (the "Size") is provided by the "Env" template argument type discussed below, and accessed from the Env
+// via a static method of the BitSetTraits type discussed below. The intent of "BitSet" is that the set is
+// represented as a bit array. Various binary operations therefore only make sense if the operands are
+// subsets of the same universe. Further, the integers in the set that the BitSet represents may have
+// different interpretations at a higher level, so even if the range of the universe stays the same,
+// the higher-level meaning of those bits may change. For these reasons, we assume the Env can provide
+// (again, via static methods of the BitSetTraits) the current "epoch" number. The Env must keep the
+// Size the same while the epoch has a given value; a BitSet implementation may legally stamp BitSets
+// with the current epoch, and assert that BitSets from different epochs are not intermixed.
+
+// Some implementations may use a representation that (at least sometimes) is a pointer to a
+// heap-allocated data structure. (The operations of BitSetOps are static methods, rather than
+// declaring a BitSet class type with multiple subtypes, to allow maximally efficient raw
+// primitive type representations.) Therefore, we must be careful about assignment and
+// initialization. We often want to reason about BitSets as immutable values, and just copying
+// the representation would introduce sharing in the indirect case, which is usually not what's
+// desired. On the other hand, there are many cases in which the RHS value has just been
+// created functionally, and the intialization/assignment is obviously its last use. In these
+// cases, allocating a new indirect representation for the lhs (if it does not already have one)
+// would be unnecessary and wasteful. Thus, for assignment, we have a "normal" assignment
+// function, which makes a copy of the referent data structure in the indirect case, and an
+// "AssignNoCopy" version, which does not, and instead introduces sharing in the indirect case.
+// Obviously, the latter should be used with care.
+//
+// (Orthogonally, there are also further versions of assignment that differ in whether the "rhs"
+// argument may be uninitialized. The normal assignment operation requires the "rhs" argument not be
+// uninitialized; "AssignNoCopy" has the same requirement. The "AssignAllowUninitRhs" version allows
+// the "rhs" to be the uninit value, and sets the "lhs" to be uninitialized in that case.)
+
+// This class has static methods that provide the operations on BitSets.
+//
+// An instantiation requires:
+// typename BitSetType: the representation type of this kind of BitSet.
+//
+// unsigned Brand: an integer constant. This is unused by the implementation; it exists
+// *only* to ensure that we can have, if desired, multiple distinct BitSetOps
+// implementations for the same BitSetType, by instantiating these with different
+// values for Brand (thus "branding" them so that they are distinct from one another.)
+//
+// typename Env: a type that determines the (current) size of the given BitSet type, as well
+// as an allocation function, and the current epoch (integer that changes when
+// "universe" of the BitSet changes) -- all via static methods of the "BitSetTraits"
+// type.
+//
+// typename BitSetTraits:
+// An "adapter" class that provides methods that retrieves things from the Env:
+// static IAllocator* GetAllococator(Env): yields an "IAllocator*" that the BitSet implementation can use.
+// static unsigned GetSize(Env): the current size (= # of bits) of this bitset type.
+// static unsigned GetArrSize(Env, unsigned elemSize): The number of "elemSize" chunks sufficient to hold
+// "GetSize". A given BitSet implementation must call
+// this with only one constant value. Thus, and "Env"
+// may compute this result when GetSize changes.
+//
+// static unsigned GetEpoch(Env): the current epoch.
+//
+// (For many instantiations, BitSetValueArgType and BitSetValueRetType will be the same as BitSetType; in cases where
+// BitSetType is a class, type, BitSetValueArgType may need to be "const BitSetType&", for example.)
+//
+// In addition to implementing the method signatures here, an instantiation of BitSetOps must also export a
+// BitSetOps::Iter type, which supports the following operations:
+// Iter(BitSetValueArgType): a constructor
+// bool NextElem(unsigned* pElem): returns true if the iteration is not complete, and sets *pElem to the next
+// yielded member.
+//
+// Finally, it should export two further types:
+//
+// ValArgType: the type used to pass a BitSet as a by-value argument.
+// RetValType: the type that should be used to return a BitSet.
+//
+// For many instantiations, these can be identical to BitSetTypes. When the representation type is a class,
+// however, ValArgType may need to be "const BitSetType&", and RetValArg may need to be a helper class, if the
+// class hides default copy constructors and assignment operators to detect erroneous usage.
+//
+template <typename BitSetType, unsigned Brand, typename Env, typename BitSetTraits>
+class BitSetOps
+{
+#if 0
+ // Below are the set of methods that an instantiation of BitSetOps should provide. This is
+ // #if'd out because it doesn't make any difference; C++ has no mechanism for checking that
+ // the methods of an instantiation are consistent with these signatures, other than the expectations
+ // embodied in the program that uses the instantiation(s). But it's useful documentation, and
+ // we should try to keep it up to date.
+
+ public:
+
+ // The uninitialized value -- not a real bitset (if possible).
+ static BitSetValueRetType UninitVal();
+
+ // Returns "true" iff "bs" may be the uninit value.
+ static bool MayBeUninit(BitSetValueArgType bs);
+
+ // Returns the a new BitSet that is empty. Uses the Allocator of "env" to allocate memory for
+ // the representation, if necessary.
+ static BitSetValueRetType MakeEmpty(Env env);
+
+ // Returns the a new BitSet that is "full" -- represents all the integers in the current range.
+ // Uses the Allocator of "env" to allocate memory for the representation, if necessary.
+ static BitSetValueRetType MakeFull(Env env);
+
+ // Returns the set containing the single element "bitNum" (which is required to be within the
+ // BitSet's current range). Uses the Allocator of "env" to allocate memory for the representation,
+ // if necessary.
+ static BitSetValueRetType MakeSingleton(Env env, unsigned bitNum);
+
+ // Assign "rhs" to "lhs". "rhs" must not be the uninitialized value. "lhs" may be, in which case
+ // "rhs" will be copied if necessary.
+ static void Assign(Env env, BitSetType& lhs, BitSetValueArgType rhs);
+
+ // Assign "rhs" to "lhs"...*even* if "rhs" is the uninitialized value.
+ static void AssignAllowUninitRhs(Env env, BitSetType& lhs, BitSetValueArgType rhs);
+
+ // This is a "destructive" assignment -- it should only be used if the rhs is "dead" after the assignment.
+ // In particular, if the rhs has a level of indirection to a heap-allocated data structure, that pointer will
+ // be copied into the lhs.
+ static void AssignNoCopy(Env env, BitSetType& lhs, BitSetValueArgType rhs);
+
+ // Destructively set "bs" to be the empty set. This method is unique, in that it does *not*
+ // require "bs" to be a bitset of the current epoch. It ensures that it is after, however.
+ // (If the representation is indirect, this requires allocating a new, empty representation.
+ // If this is a performance issue, we could provide a new version of ClearD that assumes/asserts
+ // that the rep is for the current epoch -- this would be useful if a given bitset were repeatedly
+ // cleared within an epoch.)
+ static void ClearD(Env env, BitSetType& bs);
+
+ // Returns a copy of "bs". If the representation of "bs" involves a level of indirection, the data
+ // structure is copied and a pointer to the copy is returned.
+ static BitSetValueRetType MakeCopy(Env env, BitSetValueArgType bs);
+
+ // Returns "true" iff ""bs" represents the empty set.
+ static bool IsEmpty(Env env, BitSetValueArgType bs);
+
+ // Returns the number of members in "bs".
+ static unsigned Count(Env env, BitSetValueArgType bs);
+
+ // Returns "true" iff "i" is a member of "bs".
+ static bool IsMember(Env env, const BitSetValueArgType bs, unsigned i);
+
+ // Destructively modify "bs" to ensure that "i" is a member.
+ static void AddElemD(Env env, BitSetType& bs, unsigned i);
+ // Returns a BitSet that is a copy of "bs" with "i" added.
+ static BitSetValueRetType AddElem(Env env, BitSetValueArgType bs, unsigned i);
+
+ // Destructively modify "bs" to ensure that "i" is not a member.
+ static void RemoveElemD(Env env, BitSetType& bs, unsigned i);
+ // Returns a BitSet that is a copy of "bs" with "i" removed.
+ static BitSetValueRetType RemoveElem(Env env, BitSetValueArgType bs1, unsigned i);
+
+ // Destructively modify "bs1" to be the union of "bs1" and "bs2".
+ static void UnionD(Env env, BitSetType& bs1, BitSetValueArgType bs2);
+ // Returns a new BitSet that is the union of "bs1" and "bs2".
+ static BitSetValueRetType Union(Env env, BitSetValueArgType bs1, BitSetValueArgType bs2);
+
+ // Destructively modify "bs1" to be the intersection of "bs1" and "bs2".
+ static void IntersectionD(Env env, BitSetType& bs1, BitSetValueArgType bs2);
+ // Returns a new BitSet that is the intersection of "bs1" and "bs2".
+ static BitSetValueRetType Intersection(Env env, BitSetValueArgType bs1, BitSetValueArgType bs2);
+
+ // Returns true iff "bs1" and "bs2" have an empty intersection.
+ static bool IsEmptyIntersection(Env env, BitSetValueArgType bs1, BitSetValueArgType bs2);
+
+ // Destructively modify "bs1" to be the set difference of "bs1" and "bs2".
+ static void DiffD(Env env, BitSetType& bs1, BitSetValueArgType bs2);
+ // Returns a new BitSet that is the set difference of "bs1" and "bs2".
+ static BitSetValueRetType Diff(Env env, BitSetValueArgType bs1, BitSetValueArgType bs2);
+
+ // Returns true iff "bs2" is a subset of "bs1."
+ static bool IsSubset(Env env, BitSetValueArgType bs1, BitSetValueArgType bs2);
+
+ // Returns true iff "bs1" and "bs2" are equal.
+ static bool Equal(Env env, BitSetValueArgType bs1, BitSetValueArgType bs2);
+
+#ifdef DEBUG
+ // Returns a string representing the contents of "bs". Allocates memory for the representation
+ // using the Allocator of "env".
+ static const char* ToString(Env env, BitSetValueArgType bs);
+#endif
+
+ // Declare this as a type -- will be a real class in real instantiations.
+ class Iter {
+ public:
+ Iter(Env env, BitSetValueArgType bs) {}
+ bool NextElem(Env env, unsigned* pElem) { return false; }
+ };
+
+ typename ValArgType;
+ typename RetValType;
+#endif // 0 -- the above is #if'd out, since it's really just an extended comment on what an instantiation
+ // should provide.
+};
+
+template <typename BitSetType,
+ unsigned Brand,
+ typename Env,
+ typename BitSetTraits,
+ typename BitSetValueArgType,
+ typename BitSetValueRetType,
+ typename BaseIter>
+class BitSetOpsWithCounter
+{
+ typedef BitSetOps<BitSetType, Brand, Env, BitSetTraits> BSO;
+
+public:
+ static BitSetValueRetType UninitVal()
+ {
+ return BSO::UninitVal();
+ }
+ static bool MayBeUninit(BitSetValueArgType bs)
+ {
+ return BSO::MayBeUninit(bs);
+ }
+ static BitSetValueRetType MakeEmpty(Env env)
+ {
+ BitSetTraits::GetOpCounter(env)->RecordOp(BitSetSupport::BSOP_MakeEmpty);
+ return BSO::MakeEmpty(env);
+ }
+ static BitSetValueRetType MakeFull(Env env)
+ {
+ BitSetTraits::GetOpCounter(env)->RecordOp(BitSetSupport::BSOP_MakeFull);
+ return BSO::MakeFull(env);
+ }
+ static BitSetValueRetType MakeSingleton(Env env, unsigned bitNum)
+ {
+ BitSetTraits::GetOpCounter(env)->RecordOp(BitSetSupport::BSOP_MakeSingleton);
+ return BSO::MakeSingleton(env, bitNum);
+ }
+ static void Assign(Env env, BitSetType& lhs, BitSetValueArgType rhs)
+ {
+ BitSetTraits::GetOpCounter(env)->RecordOp(BitSetSupport::BSOP_Assign);
+ BSO::Assign(env, lhs, rhs);
+ }
+ static void AssignAllowUninitRhs(Env env, BitSetType& lhs, BitSetValueArgType rhs)
+ {
+ BitSetTraits::GetOpCounter(env)->RecordOp(BitSetSupport::BSOP_AssignAllowUninitRhs);
+ BSO::AssignAllowUninitRhs(env, lhs, rhs);
+ }
+ static void AssignNoCopy(Env env, BitSetType& lhs, BitSetValueArgType rhs)
+ {
+ BitSetTraits::GetOpCounter(env)->RecordOp(BitSetSupport::BSOP_AssignNocopy);
+ BSO::AssignNoCopy(env, lhs, rhs);
+ }
+ static void ClearD(Env env, BitSetType& bs)
+ {
+ BitSetTraits::GetOpCounter(env)->RecordOp(BitSetSupport::BSOP_ClearD);
+ BSO::ClearD(env, bs);
+ }
+ static BitSetValueRetType MakeCopy(Env env, BitSetValueArgType bs)
+ {
+ BitSetTraits::GetOpCounter(env)->RecordOp(BitSetSupport::BSOP_MakeCopy);
+ return BSO::MakeCopy(env, bs);
+ }
+ static bool IsEmpty(Env env, BitSetValueArgType bs)
+ {
+ BitSetTraits::GetOpCounter(env)->RecordOp(BitSetSupport::BSOP_IsEmpty);
+ return BSO::IsEmpty(env, bs);
+ }
+ static unsigned Count(Env env, BitSetValueArgType bs)
+ {
+ BitSetTraits::GetOpCounter(env)->RecordOp(BitSetSupport::BSOP_Count);
+ return BSO::Count(env, bs);
+ }
+ static bool IsMember(Env env, const BitSetValueArgType bs, unsigned i)
+ {
+ BitSetTraits::GetOpCounter(env)->RecordOp(BitSetSupport::BSOP_IsMember);
+ return BSO::IsMember(env, bs, i);
+ }
+ static void AddElemD(Env env, BitSetType& bs, unsigned i)
+ {
+ BitSetTraits::GetOpCounter(env)->RecordOp(BitSetSupport::BSOP_AddElemD);
+ BSO::AddElemD(env, bs, i);
+ }
+ static BitSetValueRetType AddElem(Env env, BitSetValueArgType bs, unsigned i)
+ {
+ BitSetTraits::GetOpCounter(env)->RecordOp(BitSetSupport::BSOP_AddElem);
+ return BSO::AddElem(env, bs, i);
+ }
+ static void RemoveElemD(Env env, BitSetType& bs, unsigned i)
+ {
+ BitSetTraits::GetOpCounter(env)->RecordOp(BitSetSupport::BSOP_RemoveElemD);
+ BSO::RemoveElemD(env, bs, i);
+ }
+ static BitSetValueRetType RemoveElem(Env env, BitSetValueArgType bs1, unsigned i)
+ {
+ BitSetTraits::GetOpCounter(env)->RecordOp(BitSetSupport::BSOP_RemoveElem);
+ return BSO::RemoveElem(env, bs1, i);
+ }
+ static void UnionD(Env env, BitSetType& bs1, BitSetValueArgType bs2)
+ {
+ BitSetTraits::GetOpCounter(env)->RecordOp(BitSetSupport::BSOP_UnionD);
+ BSO::UnionD(env, bs1, bs2);
+ }
+ static BitSetValueRetType Union(Env env, BitSetValueArgType bs1, BitSetValueArgType bs2)
+ {
+ BitSetTraits::GetOpCounter(env)->RecordOp(BitSetSupport::BSOP_Union);
+ return BSO::Union(env, bs1, bs2);
+ }
+ static void IntersectionD(Env env, BitSetType& bs1, BitSetValueArgType bs2)
+ {
+ BitSetTraits::GetOpCounter(env)->RecordOp(BitSetSupport::BSOP_IntersectionD);
+ BSO::IntersectionD(env, bs1, bs2);
+ }
+ static BitSetValueRetType Intersection(Env env, BitSetValueArgType bs1, BitSetValueArgType bs2)
+ {
+ BitSetTraits::GetOpCounter(env)->RecordOp(BitSetSupport::BSOP_Intersection);
+ return BSO::Intersection(env, bs1, bs2);
+ }
+ static bool IsEmptyIntersection(Env env, BitSetValueArgType bs1, BitSetValueArgType bs2)
+ {
+ BitSetTraits::GetOpCounter(env)->RecordOp(BitSetSupport::BSOP_IsEmptyIntersection);
+ return BSO::IsEmptyIntersection(env, bs1, bs2);
+ }
+ static void DiffD(Env env, BitSetType& bs1, BitSetValueArgType bs2)
+ {
+ BitSetTraits::GetOpCounter(env)->RecordOp(BitSetSupport::BSOP_DiffD);
+ BSO::DiffD(env, bs1, bs2);
+ }
+ static BitSetValueRetType Diff(Env env, BitSetValueArgType bs1, BitSetValueArgType bs2)
+ {
+ BitSetTraits::GetOpCounter(env)->RecordOp(BitSetSupport::BSOP_Diff);
+ return BSO::Diff(env, bs1, bs2);
+ }
+ static bool IsSubset(Env env, BitSetValueArgType bs1, BitSetValueArgType bs2)
+ {
+ BitSetTraits::GetOpCounter(env)->RecordOp(BitSetSupport::BSOP_IsSubset);
+ return BSO::IsSubset(env, bs1, bs2);
+ }
+ static bool Equal(Env env, BitSetValueArgType bs1, BitSetValueArgType bs2)
+ {
+ BitSetTraits::GetOpCounter(env)->RecordOp(BitSetSupport::BSOP_Equal);
+ return BSO::Equal(env, bs1, bs2);
+ }
+#ifdef DEBUG
+ static const char* ToString(Env env, BitSetValueArgType bs)
+ {
+ BitSetTraits::GetOpCounter(env)->RecordOp(BitSetSupport::BSOP_ToString);
+ return BSO::ToString(env, bs);
+ }
+#endif
+
+ class Iter
+ {
+ BaseIter m_iter;
+
+ public:
+ Iter(Env env, BitSetValueArgType bs) : m_iter(env, bs)
+ {
+ }
+
+ bool NextElem(Env env, unsigned* pElem)
+ {
+ BitSetTraits::GetOpCounter(env)->RecordOp(BitSetSupport::BSOP_NextBit);
+ return m_iter.NextElem(env, pElem);
+ }
+ };
+};
+
+// We define symbolic names for the various bitset implementations available, to allow choices between them.
+
+#define BSUInt64 0
+#define BSShortLong 1
+#define BSUInt64Class 2
+
+/*****************************************************************************/
+#endif // _BITSET_H_
+/*****************************************************************************/
diff --git a/src/jit/bitsetasshortlong.h b/src/jit/bitsetasshortlong.h
new file mode 100644
index 0000000000..ec437e189c
--- /dev/null
+++ b/src/jit/bitsetasshortlong.h
@@ -0,0 +1,792 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+// A set of integers in the range [0..N], for some N defined by the "Env" (via "BitSetTraits").
+//
+// Represented as a pointer-sized item. If N bits can fit in this item, the representation is "direct"; otherwise,
+// the item is a pointer to an array of K size_t's, where K is the number of size_t's necessary to hold N bits.
+
+#ifndef bitSetAsShortLong_DEFINED
+#define bitSetAsShortLong_DEFINED 1
+
+#include "bitset.h"
+#include "compilerbitsettraits.h"
+
+typedef size_t* BitSetShortLongRep;
+
+template <typename Env, typename BitSetTraits>
+class BitSetOps</*BitSetType*/ BitSetShortLongRep,
+ /*Brand*/ BSShortLong,
+ /*Env*/ Env,
+ /*BitSetTraits*/ BitSetTraits>
+{
+public:
+ typedef BitSetShortLongRep Rep;
+
+private:
+ static const unsigned BitsInSizeT = sizeof(size_t) * BitSetSupport::BitsInByte;
+
+ inline static bool IsShort(Env env)
+ {
+ return BitSetTraits::GetArrSize(env, sizeof(size_t)) <= 1;
+ }
+
+ // The operations on the "long" (pointer-to-array-of-size_t) versions of the representation.
+ static void AssignLong(Env env, BitSetShortLongRep& lhs, BitSetShortLongRep rhs);
+ static BitSetShortLongRep MakeSingletonLong(Env env, unsigned bitNum);
+ static BitSetShortLongRep MakeCopyLong(Env env, BitSetShortLongRep bs);
+ static bool IsEmptyLong(Env env, BitSetShortLongRep bs);
+ static unsigned CountLong(Env env, BitSetShortLongRep bs);
+ static void UnionDLong(Env env, BitSetShortLongRep& bs1, BitSetShortLongRep bs2);
+ static void DiffDLong(Env env, BitSetShortLongRep& bs1, BitSetShortLongRep bs2);
+ static void AddElemDLong(Env env, BitSetShortLongRep& bs, unsigned i);
+ static void RemoveElemDLong(Env env, BitSetShortLongRep& bs, unsigned i);
+ static void ClearDLong(Env env, BitSetShortLongRep& bs);
+ static BitSetShortLongRep MakeUninitArrayBits(Env env);
+ static BitSetShortLongRep MakeEmptyArrayBits(Env env);
+ static BitSetShortLongRep MakeFullArrayBits(Env env);
+ static bool IsMemberLong(Env env, BitSetShortLongRep bs, unsigned i);
+ static bool EqualLong(Env env, BitSetShortLongRep bs1, BitSetShortLongRep bs2);
+ static bool IsSubsetLong(Env env, BitSetShortLongRep bs1, BitSetShortLongRep bs2);
+ static bool IsEmptyIntersectionLong(Env env, BitSetShortLongRep bs1, BitSetShortLongRep bs2);
+ static void IntersectionDLong(Env env, BitSetShortLongRep& bs1, BitSetShortLongRep bs2);
+#ifdef DEBUG
+ static const char* ToStringLong(Env env, BitSetShortLongRep bs);
+#endif
+
+public:
+ inline static BitSetShortLongRep UninitVal()
+ {
+ return nullptr;
+ }
+
+ static bool MayBeUninit(BitSetShortLongRep bs)
+ {
+ return bs == UninitVal();
+ }
+
+ static void Assign(Env env, BitSetShortLongRep& lhs, BitSetShortLongRep rhs)
+ {
+ // We can't assert that rhs != UninitVal in the Short case, because in that
+ // case it's a legal value.
+ if (IsShort(env))
+ {
+ // Both are short.
+ lhs = rhs;
+ }
+ else if (lhs == UninitVal())
+ {
+ assert(rhs != UninitVal());
+ lhs = MakeCopy(env, rhs);
+ }
+ else
+ {
+ AssignLong(env, lhs, rhs);
+ }
+ }
+
+ static void AssignAllowUninitRhs(Env env, BitSetShortLongRep& lhs, BitSetShortLongRep rhs)
+ {
+ if (IsShort(env))
+ {
+ // Both are short.
+ lhs = rhs;
+ }
+ else if (rhs == UninitVal())
+ {
+ lhs = rhs;
+ }
+ else if (lhs == UninitVal())
+ {
+ lhs = MakeCopy(env, rhs);
+ }
+ else
+ {
+ AssignLong(env, lhs, rhs);
+ }
+ }
+
+ static void AssignNoCopy(Env env, BitSetShortLongRep& lhs, BitSetShortLongRep rhs)
+ {
+ lhs = rhs;
+ }
+
+ static void ClearD(Env env, BitSetShortLongRep& bs)
+ {
+ if (IsShort(env))
+ {
+ bs = (BitSetShortLongRep) nullptr;
+ }
+ else
+ {
+ assert(bs != UninitVal());
+ ClearDLong(env, bs);
+ }
+ }
+
+ static BitSetShortLongRep MakeSingleton(Env env, unsigned bitNum)
+ {
+ assert(bitNum < BitSetTraits::GetSize(env));
+ if (IsShort(env))
+ {
+ return BitSetShortLongRep(((size_t)1) << bitNum);
+ }
+ else
+ {
+ return MakeSingletonLong(env, bitNum);
+ }
+ }
+
+ static BitSetShortLongRep MakeCopy(Env env, BitSetShortLongRep bs)
+ {
+ if (IsShort(env))
+ {
+ return bs;
+ }
+ else
+ {
+ return MakeCopyLong(env, bs);
+ }
+ }
+
+ static bool IsEmpty(Env env, BitSetShortLongRep bs)
+ {
+ if (IsShort(env))
+ {
+ return bs == nullptr;
+ }
+ else
+ {
+ assert(bs != UninitVal());
+ return IsEmptyLong(env, bs);
+ }
+ }
+
+ static unsigned Count(Env env, BitSetShortLongRep bs)
+ {
+ if (IsShort(env))
+ {
+ return BitSetSupport::CountBitsInIntegral(size_t(bs));
+ }
+ else
+ {
+ assert(bs != UninitVal());
+ return CountLong(env, bs);
+ }
+ }
+
+ static void UnionD(Env env, BitSetShortLongRep& bs1, BitSetShortLongRep bs2)
+ {
+ if (IsShort(env))
+ {
+ bs1 = (BitSetShortLongRep)(((size_t)bs1) | ((size_t)bs2));
+ }
+ else
+ {
+ UnionDLong(env, bs1, bs2);
+ }
+ }
+ static BitSetShortLongRep Union(Env env, BitSetShortLongRep bs1, BitSetShortLongRep bs2)
+ {
+ BitSetShortLongRep res = MakeCopy(env, bs1);
+ UnionD(env, res, bs2);
+ return res;
+ }
+
+ static void DiffD(Env env, BitSetShortLongRep& bs1, BitSetShortLongRep bs2)
+ {
+ if (IsShort(env))
+ {
+ bs1 = (BitSetShortLongRep)(((size_t)bs1) & (~(size_t)bs2));
+ }
+ else
+ {
+ DiffDLong(env, bs1, bs2);
+ }
+ }
+ static BitSetShortLongRep Diff(Env env, BitSetShortLongRep bs1, BitSetShortLongRep bs2)
+ {
+ BitSetShortLongRep res = MakeCopy(env, bs1);
+ DiffD(env, res, bs2);
+ return res;
+ }
+
+ static void RemoveElemD(Env env, BitSetShortLongRep& bs, unsigned i)
+ {
+ assert(i < BitSetTraits::GetSize(env));
+ if (IsShort(env))
+ {
+ size_t mask = ((size_t)1) << i;
+ mask = ~mask;
+ bs = (BitSetShortLongRep)(((size_t)bs) & mask);
+ }
+ else
+ {
+ assert(bs != UninitVal());
+ RemoveElemDLong(env, bs, i);
+ }
+ }
+ static BitSetShortLongRep RemoveElem(Env env, BitSetShortLongRep bs, unsigned i)
+ {
+ BitSetShortLongRep res = MakeCopy(env, bs);
+ RemoveElemD(env, res, i);
+ return res;
+ }
+
+ static void AddElemD(Env env, BitSetShortLongRep& bs, unsigned i)
+ {
+ assert(i < BitSetTraits::GetSize(env));
+ if (IsShort(env))
+ {
+ size_t mask = ((size_t)1) << i;
+ bs = (BitSetShortLongRep)(((size_t)bs) | mask);
+ }
+ else
+ {
+ AddElemDLong(env, bs, i);
+ }
+ }
+ static BitSetShortLongRep AddElem(Env env, BitSetShortLongRep bs, unsigned i)
+ {
+ BitSetShortLongRep res = MakeCopy(env, bs);
+ AddElemD(env, res, i);
+ return res;
+ }
+
+ static bool IsMember(Env env, const BitSetShortLongRep bs, unsigned i)
+ {
+ assert(i < BitSetTraits::GetSize(env));
+ if (IsShort(env))
+ {
+ size_t mask = ((size_t)1) << i;
+ return (((size_t)bs) & mask) != 0;
+ }
+ else
+ {
+ assert(bs != UninitVal());
+ return IsMemberLong(env, bs, i);
+ }
+ }
+
+ static void IntersectionD(Env env, BitSetShortLongRep& bs1, BitSetShortLongRep bs2)
+ {
+ if (IsShort(env))
+ {
+ (size_t&)bs1 &= (size_t)bs2;
+ }
+ else
+ {
+ IntersectionDLong(env, bs1, bs2);
+ }
+ }
+
+ static BitSetShortLongRep Intersection(Env env, BitSetShortLongRep bs1, BitSetShortLongRep bs2)
+ {
+ BitSetShortLongRep res = MakeCopy(env, bs1);
+ IntersectionD(env, res, bs2);
+ return res;
+ }
+ static bool IsEmptyIntersection(Env env, BitSetShortLongRep bs1, BitSetShortLongRep bs2)
+ {
+ if (IsShort(env))
+ {
+ return (((size_t)bs1) & ((size_t)bs2)) == 0;
+ }
+ else
+ {
+ return IsEmptyIntersectionLong(env, bs1, bs2);
+ }
+ }
+
+ static bool IsSubset(Env env, BitSetShortLongRep bs1, BitSetShortLongRep bs2)
+ {
+ if (IsShort(env))
+ {
+ size_t u1 = (size_t)bs1;
+ size_t u2 = (size_t)bs2;
+ return (u1 & u2) == u1;
+ }
+ else
+ {
+ return IsSubsetLong(env, bs1, bs2);
+ }
+ }
+
+ static bool Equal(Env env, BitSetShortLongRep bs1, BitSetShortLongRep bs2)
+ {
+ if (IsShort(env))
+ {
+ return (size_t)bs1 == (size_t)bs2;
+ }
+ else
+ {
+ return EqualLong(env, bs1, bs2);
+ }
+ }
+
+#ifdef DEBUG
+ // Returns a string valid until the allocator releases the memory.
+ static const char* ToString(Env env, BitSetShortLongRep bs)
+ {
+ if (IsShort(env))
+ {
+ assert(sizeof(BitSetShortLongRep) == sizeof(size_t));
+ IAllocator* alloc = BitSetTraits::GetDebugOnlyAllocator(env);
+ const int CharsForSizeT = sizeof(size_t) * 2;
+ char* res = nullptr;
+ const int ShortAllocSize = CharsForSizeT + 4;
+ res = (char*)alloc->Alloc(ShortAllocSize);
+ size_t bits = (size_t)bs;
+ unsigned remaining = ShortAllocSize;
+ char* ptr = res;
+ if (sizeof(size_t) == sizeof(int64_t))
+ {
+ sprintf_s(ptr, remaining, "%016llX", bits);
+ }
+ else
+ {
+ assert(sizeof(size_t) == sizeof(int));
+ sprintf_s(ptr, remaining, "%08X", bits);
+ }
+ return res;
+ }
+ else
+ {
+ return ToStringLong(env, bs);
+ }
+ }
+#endif
+
+ static BitSetShortLongRep MakeEmpty(Env env)
+ {
+ if (IsShort(env))
+ {
+ return nullptr;
+ }
+ else
+ {
+ return MakeEmptyArrayBits(env);
+ }
+ }
+
+ static BitSetShortLongRep MakeFull(Env env)
+ {
+ if (IsShort(env))
+ {
+ // Can't just shift by numBits+1, since that might be 32 (and (1 << 32( == 1, for an unsigned).
+ unsigned numBits = BitSetTraits::GetSize(env);
+ if (numBits == BitsInSizeT)
+ {
+ // Can't use the implementation below to get all 1's...
+ return BitSetShortLongRep(size_t(-1));
+ }
+ else
+ {
+ return BitSetShortLongRep((size_t(1) << numBits) - 1);
+ }
+ }
+ else
+ {
+ return MakeFullArrayBits(env);
+ }
+ }
+
+ class Iter
+ {
+ BitSetShortLongRep m_bs; // The BitSet that we're iterating over.
+ size_t m_bits; // The "current" bits remaining to be iterated over.
+ // In the "short" case, these are all the remaining bits.
+ // In the "long" case, these are remaining bits in element "m_index";
+ // these and the bits in the remaining elements comprise the remaining bits.
+ unsigned m_index; // If "m_bs" uses the long (indirect) representation, the current index in the array.
+ // the index of the element in A(bs) that is currently being iterated.
+ unsigned m_bitNum; // The number of bits that have already been iterated over (set or clear). If you
+ // add this to the bit number of the next bit in "m_bits", you get the proper bit number of that
+ // bit in "m_bs".
+
+ public:
+ Iter(Env env, const BitSetShortLongRep& bs) : m_bs(bs), m_bitNum(0)
+ {
+ if (BitSetOps::IsShort(env))
+ {
+ m_index = 0;
+ m_bits = (size_t)bs;
+ }
+ else
+ {
+ assert(bs != BitSetOps::UninitVal());
+ m_index = 0;
+ m_bits = bs[0];
+ }
+ }
+
+ bool NextElem(Env env, unsigned* pElem)
+ {
+#if BITSET_TRACK_OPCOUNTS
+ BitSetStaticsImpl::RecordOp(BitSetStaticsImpl::BSOP_NextBit);
+#endif
+ for (;;)
+ {
+ DWORD nextBit;
+ BOOL hasBit;
+#ifdef _HOST_64BIT_
+ static_assert_no_msg(sizeof(size_t) == 8);
+ hasBit = BitScanForward64(&nextBit, m_bits);
+#else
+ static_assert_no_msg(sizeof(size_t) == 4);
+ hasBit = BitScanForward(&nextBit, m_bits);
+#endif
+
+ // If there's a bit, doesn't matter if we're short or long.
+ if (hasBit)
+ {
+ *pElem = m_bitNum + nextBit;
+ m_bitNum += nextBit + 1;
+ m_bits >>= nextBit;
+ m_bits >>= 1; // Have to do these separately -- if we have 0x80000000, nextBit == 31, and shifting
+ // by 32 bits does nothing.
+ return true;
+ }
+ else
+ {
+ unsigned len = BitSetTraits::GetArrSize(env, sizeof(size_t));
+ if (len <= 1)
+ {
+ return false;
+ }
+ else
+ {
+ m_index++;
+ if (m_index == len)
+ {
+ return false;
+ }
+ // Otherwise...
+ m_bitNum = m_index * sizeof(size_t) * BitSetSupport::BitsInByte;
+ m_bits = m_bs[m_index];
+ continue;
+ }
+ }
+ }
+ }
+ };
+
+ friend class Iter;
+
+ typedef size_t* ValArgType;
+ typedef size_t* RetValType;
+};
+
+template <typename Env, typename BitSetTraits>
+void BitSetOps</*BitSetType*/ BitSetShortLongRep,
+ /*Brand*/ BSShortLong,
+ /*Env*/ Env,
+ /*BitSetTraits*/ BitSetTraits>::AssignLong(Env env, BitSetShortLongRep& lhs, BitSetShortLongRep rhs)
+{
+ assert(!IsShort(env));
+ unsigned len = BitSetTraits::GetArrSize(env, sizeof(size_t));
+ for (unsigned i = 0; i < len; i++)
+ {
+ lhs[i] = rhs[i];
+ }
+}
+
+template <typename Env, typename BitSetTraits>
+BitSetShortLongRep BitSetOps</*BitSetType*/ BitSetShortLongRep,
+ /*Brand*/ BSShortLong,
+ /*Env*/ Env,
+ /*BitSetTraits*/ BitSetTraits>::MakeSingletonLong(Env env, unsigned bitNum)
+{
+ assert(!IsShort(env));
+ BitSetShortLongRep res = MakeEmptyArrayBits(env);
+ unsigned index = bitNum / BitsInSizeT;
+ res[index] = ((size_t)1) << (bitNum % BitsInSizeT);
+ return res;
+}
+
+template <typename Env, typename BitSetTraits>
+BitSetShortLongRep BitSetOps</*BitSetType*/ BitSetShortLongRep,
+ /*Brand*/ BSShortLong,
+ /*Env*/ Env,
+ /*BitSetTraits*/ BitSetTraits>::MakeCopyLong(Env env, BitSetShortLongRep bs)
+{
+ assert(!IsShort(env));
+ BitSetShortLongRep res = MakeUninitArrayBits(env);
+ unsigned len = BitSetTraits::GetArrSize(env, sizeof(size_t));
+ for (unsigned i = 0; i < len; i++)
+ {
+ res[i] = bs[i];
+ }
+ return res;
+}
+
+template <typename Env, typename BitSetTraits>
+bool BitSetOps</*BitSetType*/ BitSetShortLongRep,
+ /*Brand*/ BSShortLong,
+ /*Env*/ Env,
+ /*BitSetTraits*/ BitSetTraits>::IsEmptyLong(Env env, BitSetShortLongRep bs)
+{
+ assert(!IsShort(env));
+ unsigned len = BitSetTraits::GetArrSize(env, sizeof(size_t));
+ for (unsigned i = 0; i < len; i++)
+ {
+ if (bs[i] != 0)
+ {
+ return false;
+ }
+ }
+ return true;
+}
+
+template <typename Env, typename BitSetTraits>
+unsigned BitSetOps</*BitSetType*/ BitSetShortLongRep,
+ /*Brand*/ BSShortLong,
+ /*Env*/ Env,
+ /*BitSetTraits*/ BitSetTraits>::CountLong(Env env, BitSetShortLongRep bs)
+{
+ assert(!IsShort(env));
+ unsigned len = BitSetTraits::GetArrSize(env, sizeof(size_t));
+ unsigned res = 0;
+ for (unsigned i = 0; i < len; i++)
+ {
+ res += BitSetSupport::CountBitsInIntegral(bs[i]);
+ }
+ return res;
+}
+
+template <typename Env, typename BitSetTraits>
+void BitSetOps</*BitSetType*/ BitSetShortLongRep,
+ /*Brand*/ BSShortLong,
+ /*Env*/ Env,
+ /*BitSetTraits*/ BitSetTraits>::UnionDLong(Env env, BitSetShortLongRep& bs1, BitSetShortLongRep bs2)
+{
+ assert(!IsShort(env));
+ unsigned len = BitSetTraits::GetArrSize(env, sizeof(size_t));
+ for (unsigned i = 0; i < len; i++)
+ {
+ bs1[i] |= bs2[i];
+ }
+}
+
+template <typename Env, typename BitSetTraits>
+void BitSetOps</*BitSetType*/ BitSetShortLongRep,
+ /*Brand*/ BSShortLong,
+ /*Env*/ Env,
+ /*BitSetTraits*/ BitSetTraits>::DiffDLong(Env env, BitSetShortLongRep& bs1, BitSetShortLongRep bs2)
+{
+ assert(!IsShort(env));
+ unsigned len = BitSetTraits::GetArrSize(env, sizeof(size_t));
+ for (unsigned i = 0; i < len; i++)
+ {
+ bs1[i] &= ~bs2[i];
+ }
+}
+
+template <typename Env, typename BitSetTraits>
+void BitSetOps</*BitSetType*/ BitSetShortLongRep,
+ /*Brand*/ BSShortLong,
+ /*Env*/ Env,
+ /*BitSetTraits*/ BitSetTraits>::AddElemDLong(Env env, BitSetShortLongRep& bs, unsigned i)
+{
+ assert(!IsShort(env));
+ unsigned index = i / BitsInSizeT;
+ size_t mask = ((size_t)1) << (i % BitsInSizeT);
+ bs[index] |= mask;
+}
+
+template <typename Env, typename BitSetTraits>
+void BitSetOps</*BitSetType*/ BitSetShortLongRep,
+ /*Brand*/ BSShortLong,
+ /*Env*/ Env,
+ /*BitSetTraits*/ BitSetTraits>::RemoveElemDLong(Env env, BitSetShortLongRep& bs, unsigned i)
+{
+ assert(!IsShort(env));
+ unsigned index = i / BitsInSizeT;
+ size_t mask = ((size_t)1) << (i % BitsInSizeT);
+ mask = ~mask;
+ bs[index] &= mask;
+}
+
+template <typename Env, typename BitSetTraits>
+void BitSetOps</*BitSetType*/ BitSetShortLongRep,
+ /*Brand*/ BSShortLong,
+ /*Env*/ Env,
+ /*BitSetTraits*/ BitSetTraits>::ClearDLong(Env env, BitSetShortLongRep& bs)
+{
+ assert(!IsShort(env));
+ // Recall that ClearD does *not* require "bs" to be of the current epoch.
+ // Therefore, we must allocate a new representation.
+ bs = MakeEmptyArrayBits(env);
+}
+
+template <typename Env, typename BitSetTraits>
+BitSetShortLongRep BitSetOps</*BitSetType*/ BitSetShortLongRep,
+ /*Brand*/ BSShortLong,
+ /*Env*/ Env,
+ /*BitSetTraits*/ BitSetTraits>::MakeUninitArrayBits(Env env)
+{
+ assert(!IsShort(env));
+ unsigned len = BitSetTraits::GetArrSize(env, sizeof(size_t));
+ assert(len > 1); // Or else would not require an array.
+ return (BitSetShortLongRep)(BitSetTraits::GetAllocator(env)->Alloc(len * sizeof(size_t)));
+}
+
+template <typename Env, typename BitSetTraits>
+BitSetShortLongRep BitSetOps</*BitSetType*/ BitSetShortLongRep,
+ /*Brand*/ BSShortLong,
+ /*Env*/ Env,
+ /*BitSetTraits*/ BitSetTraits>::MakeEmptyArrayBits(Env env)
+{
+ assert(!IsShort(env));
+ unsigned len = BitSetTraits::GetArrSize(env, sizeof(size_t));
+ assert(len > 1); // Or else would not require an array.
+ BitSetShortLongRep res = (BitSetShortLongRep)(BitSetTraits::GetAllocator(env)->Alloc(len * sizeof(size_t)));
+ for (unsigned i = 0; i < len; i++)
+ {
+ res[i] = 0;
+ }
+ return res;
+}
+
+template <typename Env, typename BitSetTraits>
+BitSetShortLongRep BitSetOps</*BitSetType*/ BitSetShortLongRep,
+ /*Brand*/ BSShortLong,
+ /*Env*/ Env,
+ /*BitSetTraits*/ BitSetTraits>::MakeFullArrayBits(Env env)
+{
+ assert(!IsShort(env));
+ unsigned len = BitSetTraits::GetArrSize(env, sizeof(size_t));
+ assert(len > 1); // Or else would not require an array.
+ BitSetShortLongRep res = (BitSetShortLongRep)(BitSetTraits::GetAllocator(env)->Alloc(len * sizeof(size_t)));
+ for (unsigned i = 0; i < len - 1; i++)
+ {
+ res[i] = size_t(-1);
+ }
+ // Start with all ones, shift in zeros in the last elem.
+ unsigned lastElemBits = (BitSetTraits::GetSize(env) - 1) % BitsInSizeT + 1;
+ res[len - 1] = (size_t(-1) >> (BitsInSizeT - lastElemBits));
+ return res;
+}
+
+template <typename Env, typename BitSetTraits>
+bool BitSetOps</*BitSetType*/ BitSetShortLongRep,
+ /*Brand*/ BSShortLong,
+ /*Env*/ Env,
+ /*BitSetTraits*/ BitSetTraits>::IsMemberLong(Env env, BitSetShortLongRep bs, unsigned i)
+{
+ assert(!IsShort(env));
+ unsigned index = i / BitsInSizeT;
+ unsigned bitInElem = (i % BitsInSizeT);
+ size_t mask = ((size_t)1) << bitInElem;
+ return (bs[index] & mask) != 0;
+}
+
+template <typename Env, typename BitSetTraits>
+void BitSetOps</*BitSetType*/ BitSetShortLongRep,
+ /*Brand*/ BSShortLong,
+ /*Env*/ Env,
+ /*BitSetTraits*/ BitSetTraits>::IntersectionDLong(Env env,
+ BitSetShortLongRep& bs1,
+ BitSetShortLongRep bs2)
+{
+ assert(!IsShort(env));
+ unsigned len = BitSetTraits::GetArrSize(env, sizeof(size_t));
+ for (unsigned i = 0; i < len; i++)
+ {
+ bs1[i] &= bs2[i];
+ }
+}
+
+template <typename Env, typename BitSetTraits>
+bool BitSetOps</*BitSetType*/ BitSetShortLongRep,
+ /*Brand*/ BSShortLong,
+ /*Env*/ Env,
+ /*BitSetTraits*/ BitSetTraits>::IsEmptyIntersectionLong(Env env,
+ BitSetShortLongRep bs1,
+ BitSetShortLongRep bs2)
+{
+ assert(!IsShort(env));
+ unsigned len = BitSetTraits::GetArrSize(env, sizeof(size_t));
+ for (unsigned i = 0; i < len; i++)
+ {
+ if ((bs1[i] & bs2[i]) != 0)
+ {
+ return false;
+ }
+ }
+ return true;
+}
+
+template <typename Env, typename BitSetTraits>
+bool BitSetOps</*BitSetType*/ BitSetShortLongRep,
+ /*Brand*/ BSShortLong,
+ /*Env*/ Env,
+ /*BitSetTraits*/ BitSetTraits>::EqualLong(Env env, BitSetShortLongRep bs1, BitSetShortLongRep bs2)
+{
+ assert(!IsShort(env));
+ unsigned len = BitSetTraits::GetArrSize(env, sizeof(size_t));
+ for (unsigned i = 0; i < len; i++)
+ {
+ if (bs1[i] != bs2[i])
+ {
+ return false;
+ }
+ }
+ return true;
+}
+
+template <typename Env, typename BitSetTraits>
+bool BitSetOps</*BitSetType*/ BitSetShortLongRep,
+ /*Brand*/ BSShortLong,
+ /*Env*/ Env,
+ /*BitSetTraits*/ BitSetTraits>::IsSubsetLong(Env env, BitSetShortLongRep bs1, BitSetShortLongRep bs2)
+{
+ assert(!IsShort(env));
+ unsigned len = BitSetTraits::GetArrSize(env, sizeof(size_t));
+ for (unsigned i = 0; i < len; i++)
+ {
+ if ((bs1[i] & bs2[i]) != bs1[i])
+ {
+ return false;
+ }
+ }
+ return true;
+}
+
+#ifdef DEBUG
+template <typename Env, typename BitSetTraits>
+const char* BitSetOps</*BitSetType*/ BitSetShortLongRep,
+ /*Brand*/ BSShortLong,
+ /*Env*/ Env,
+ /*BitSetTraits*/ BitSetTraits>::ToStringLong(Env env, BitSetShortLongRep bs)
+{
+ assert(!IsShort(env));
+ unsigned len = BitSetTraits::GetArrSize(env, sizeof(size_t));
+ const int CharsForSizeT = sizeof(size_t) * 2;
+ unsigned allocSz = len * CharsForSizeT + 4;
+ unsigned remaining = allocSz;
+ IAllocator* alloc = BitSetTraits::GetDebugOnlyAllocator(env);
+ char* res = (char*)alloc->Alloc(allocSz);
+ char* temp = res;
+ for (unsigned i = len; 0 < i; i--)
+ {
+ size_t bits = bs[i - 1];
+ for (unsigned bytesDone = 0; bytesDone < sizeof(size_t); bytesDone += sizeof(unsigned))
+ {
+ unsigned bits0 = (unsigned)bits;
+ sprintf_s(temp, remaining, "%08X", bits0);
+ temp += 8;
+ remaining -= 8;
+ bytesDone += 4;
+ assert(sizeof(unsigned) == 4);
+ // Doing this twice by 16, rather than once by 32, avoids warnings when size_t == unsigned.
+ bits = bits >> 16;
+ bits = bits >> 16;
+ }
+ }
+ return res;
+}
+#endif
+
+#endif // bitSetAsShortLong_DEFINED
diff --git a/src/jit/bitsetasuint64.h b/src/jit/bitsetasuint64.h
new file mode 100644
index 0000000000..150f7e9d61
--- /dev/null
+++ b/src/jit/bitsetasuint64.h
@@ -0,0 +1,236 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+#ifndef bitSetAsUint64_DEFINED
+#define bitSetAsUint64_DEFINED 1
+
+#include "bitset.h"
+
+template <typename Env, typename BitSetTraits>
+class BitSetOps</*BitSetType*/ UINT64,
+ /*Brand*/ BSUInt64,
+ /*Env*/ Env,
+ /*BitSetTraits*/ BitSetTraits>
+{
+public:
+ typedef UINT64 Rep;
+
+private:
+ static UINT64 Singleton(unsigned bitNum)
+ {
+ assert(bitNum < sizeof(UINT64) * BitSetSupport::BitsInByte);
+ return (UINT64)1 << bitNum;
+ }
+
+public:
+ static void Assign(Env env, UINT64& lhs, UINT64 rhs)
+ {
+ lhs = rhs;
+ }
+
+ static void AssignNouninit(Env env, UINT64& lhs, UINT64 rhs)
+ {
+ lhs = rhs;
+ }
+
+ static void AssignAllowUninitRhs(Env env, UINT64& lhs, UINT64 rhs)
+ {
+ lhs = rhs;
+ }
+
+ static void AssignNoCopy(Env env, UINT64& lhs, UINT64 rhs)
+ {
+ lhs = rhs;
+ }
+
+ static void ClearD(Env env, UINT64& bs)
+ {
+ bs = 0;
+ }
+
+ static UINT64 MakeSingleton(Env env, unsigned bitNum)
+ {
+ assert(bitNum < BitSetTraits::GetSize(env));
+ return Singleton(bitNum);
+ }
+
+ static UINT64 MakeCopy(Env env, UINT64 bs)
+ {
+ return bs;
+ }
+
+ static bool IsEmpty(Env env, UINT64 bs)
+ {
+ return bs == 0;
+ }
+
+ static unsigned Count(Env env, UINT64 bs)
+ {
+ return BitSetSupport::CountBitsInIntegral(bs);
+ }
+
+ static void UnionD(Env env, UINT64& bs1, UINT64 bs2)
+ {
+ bs1 |= bs2;
+ }
+
+ static UINT64 Union(Env env, UINT64& bs1, UINT64 bs2)
+ {
+ return bs1 | bs2;
+ }
+
+ static void DiffD(Env env, UINT64& bs1, UINT64 bs2)
+ {
+ bs1 = bs1 & ~bs2;
+ }
+
+ static UINT64 Diff(Env env, UINT64 bs1, UINT64 bs2)
+ {
+ return bs1 & ~bs2;
+ }
+
+ static void RemoveElemD(Env env, UINT64& bs1, unsigned i)
+ {
+ assert(i < BitSetTraits::GetSize(env));
+ bs1 &= ~Singleton(i);
+ }
+
+ static UINT64 RemoveElem(Env env, UINT64 bs1, unsigned i)
+ {
+ return bs1 & ~Singleton(i);
+ }
+
+ static void AddElemD(Env env, UINT64& bs1, unsigned i)
+ {
+ assert(i < BitSetTraits::GetSize(env));
+ bs1 |= Singleton(i);
+ }
+
+ static UINT64 AddElem(Env env, UINT64 bs1, unsigned i)
+ {
+ assert(i < BitSetTraits::GetSize(env));
+ return bs1 | Singleton(i);
+ }
+
+ static bool IsMember(Env env, const UINT64 bs1, unsigned i)
+ {
+ assert(i < BitSetTraits::GetSize(env));
+ return (bs1 & Singleton(i)) != 0;
+ }
+
+ static void IntersectionD(Env env, UINT64& bs1, UINT64 bs2)
+ {
+ bs1 &= bs2;
+ }
+
+ static UINT64 Intersection(Env env, UINT64 bs1, UINT64 bs2)
+ {
+ return bs1 & bs2;
+ }
+
+ static bool IsEmptyIntersection(Env env, UINT64 bs1, UINT64 bs2)
+ {
+ return (bs1 & bs2) == 0;
+ }
+
+ static bool IsSubset(Env env, UINT64 bs1, UINT64 bs2)
+ {
+ return ((bs1 & bs2) == bs1);
+ }
+
+ static bool Equal(Env env, UINT64 bs1, UINT64 bs2)
+ {
+ return bs1 == bs2;
+ }
+
+ static UINT64 MakeEmpty(Env env)
+ {
+ return 0;
+ }
+
+ static UINT64 MakeFull(Env env)
+ {
+ unsigned sz = BitSetTraits::GetSize(env);
+ if (sz == sizeof(UINT64) * 8)
+ {
+ return UINT64(-1);
+ }
+ else
+ {
+ return (UINT64(1) << sz) - 1;
+ }
+ }
+
+#ifdef DEBUG
+ static const char* ToString(Env env, UINT64 bs)
+ {
+ IAllocator* alloc = BitSetTraits::GetDebugOnlyAllocator(env);
+ const int CharsForUINT64 = sizeof(UINT64) * 2;
+ char* res = NULL;
+ const int AllocSize = CharsForUINT64 + 4;
+ res = (char*)alloc->Alloc(AllocSize);
+ UINT64 bits = bs;
+ unsigned remaining = AllocSize;
+ char* ptr = res;
+ for (unsigned bytesDone = 0; bytesDone < sizeof(UINT64); bytesDone += sizeof(unsigned))
+ {
+ unsigned bits0 = (unsigned)bits;
+ sprintf_s(ptr, remaining, "%08X", bits0);
+ ptr += 8;
+ remaining -= 8;
+ bytesDone += 4;
+ assert(sizeof(unsigned) == 4);
+ // Doing this twice by 16, rather than once by 32, avoids warnings when size_t == unsigned.
+ bits = bits >> 16;
+ bits = bits >> 16;
+ }
+ return res;
+ }
+#endif
+
+ static UINT64 UninitVal()
+ {
+ return 0;
+ }
+
+ static bool MayBeUninit(UINT64 bs)
+ {
+ return bs == UninitVal();
+ }
+
+ class Iter
+ {
+ UINT64 m_bits;
+
+ public:
+ Iter(Env env, const UINT64& bits) : m_bits(bits)
+ {
+ }
+
+ bool NextElem(Env env, unsigned* pElem)
+ {
+ if (m_bits)
+ {
+ unsigned bitNum = *pElem;
+ while ((m_bits & 0x1) == 0)
+ {
+ bitNum++;
+ m_bits >>= 1;
+ }
+ *pElem = bitNum;
+ m_bits &= ~0x1;
+ return true;
+ }
+ else
+ {
+ return false;
+ }
+ }
+ };
+
+ typedef UINT64 ValArgType;
+ typedef UINT64 RetValType;
+};
+
+#endif // bitSetAsUint64_DEFINED
diff --git a/src/jit/bitsetasuint64inclass.h b/src/jit/bitsetasuint64inclass.h
new file mode 100644
index 0000000000..be92624613
--- /dev/null
+++ b/src/jit/bitsetasuint64inclass.h
@@ -0,0 +1,500 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+#ifndef bitSetAsUint64InClass_DEFINED
+#define bitSetAsUint64InClass_DEFINED 1
+
+#include "bitset.h"
+#include "bitsetasuint64.h"
+#include "stdmacros.h"
+
+template <typename Env, typename BitSetTraits>
+class BitSetUint64ValueRetType;
+
+template <typename Env, typename BitSetTraits>
+class BitSetUint64Iter;
+
+template <typename Env, typename BitSetTraits>
+class BitSetUint64
+{
+public:
+ typedef BitSetUint64<Env, BitSetTraits> Rep;
+
+private:
+ friend class BitSetOps</*BitSetType*/ BitSetUint64<Env, BitSetTraits>,
+ /*Brand*/ BSUInt64Class,
+ /*Env*/ Env,
+ /*BitSetTraits*/ BitSetTraits>;
+
+ friend class BitSetUint64ValueRetType<Env, BitSetTraits>;
+ friend class BitSetUint64Iter<Env, BitSetTraits>;
+
+ UINT64 m_bits;
+
+#ifdef DEBUG
+ unsigned m_epoch;
+#endif
+
+ typedef BitSetOps<UINT64, BSUInt64, Env, BitSetTraits> Uint64BitSetOps;
+
+ void CheckEpoch(Env env) const
+ {
+#ifdef DEBUG
+ assert(m_epoch == BitSetTraits::GetEpoch(env));
+#endif
+ }
+
+#ifdef DEBUG
+ // In debug, make sure we don't have any public assignment, by making this private.
+ BitSetUint64& operator=(const BitSetUint64& bs)
+ {
+ m_bits = bs.m_bits;
+ m_epoch = bs.m_epoch;
+ return (*this);
+ }
+#endif // DEBUG
+
+ bool operator==(const BitSetUint64& bs) const
+ {
+ return m_bits == bs.m_bits
+#ifdef DEBUG
+ && m_epoch == bs.m_epoch
+#endif
+ ;
+ }
+
+#ifndef DEBUG
+ // In debug we also want the default copy constructor to be private, to make inadvertent
+ // default initializations illegal. Debug builds therefore arrange to use the
+ // non-default constructor defined below that takes an extra argument where one would
+ // otherwise use a copy constructor. In non-debug builds, we don't pass the extra dummy
+ // int argument, and just make copy constructor defined here visible.
+public:
+#endif
+ BitSetUint64(const BitSetUint64& bs)
+ : m_bits(bs.m_bits)
+#ifdef DEBUG
+ , m_epoch(bs.m_epoch)
+#endif
+ {
+ }
+
+#ifdef DEBUG
+public:
+ // But we add a public constructor that's *almost* the default constructor.
+ BitSetUint64(const BitSetUint64& bs, int xxx) : m_bits(bs.m_bits), m_epoch(bs.m_epoch)
+ {
+ }
+#endif
+
+private:
+ // Return the number of bits set in the BitSet.
+ inline unsigned Count(Env env) const
+ {
+ CheckEpoch(env);
+ return Uint64BitSetOps::Count(env, m_bits);
+ }
+
+ inline void DiffD(Env env, const BitSetUint64& bs2)
+ {
+ CheckEpoch(env);
+ bs2.CheckEpoch(env);
+ Uint64BitSetOps::DiffD(env, m_bits, bs2.m_bits);
+ }
+
+ inline BitSetUint64 Diff(Env env, const BitSetUint64& bs2) const
+ {
+ CheckEpoch(env);
+ bs2.CheckEpoch(env);
+ BitSetUint64 res(*this);
+ Uint64BitSetOps::DiffD(env, res.m_bits, bs2.m_bits);
+ return res;
+ }
+
+ inline void RemoveElemD(Env env, unsigned i)
+ {
+ CheckEpoch(env);
+ Uint64BitSetOps::RemoveElemD(env, m_bits, i);
+ }
+
+ inline BitSetUint64 RemoveElem(Env env, unsigned i) const
+ {
+ CheckEpoch(env);
+ BitSetUint64 res(*this);
+ Uint64BitSetOps::RemoveElemD(env, res.m_bits, i);
+ return res;
+ }
+
+ inline void AddElemD(Env env, unsigned i)
+ {
+ CheckEpoch(env);
+ Uint64BitSetOps::AddElemD(env, m_bits, i);
+ }
+
+ inline BitSetUint64 AddElem(Env env, unsigned i) const
+ {
+ CheckEpoch(env);
+ BitSetUint64 res(*this);
+ Uint64BitSetOps::AddElemD(env, res.m_bits, i);
+ return res;
+ }
+
+ inline bool IsMember(Env env, unsigned i) const
+ {
+ CheckEpoch(env);
+ return Uint64BitSetOps::IsMember(env, m_bits, i);
+ }
+
+ inline void IntersectionD(Env env, const BitSetUint64& bs2)
+ {
+ CheckEpoch(env);
+ bs2.CheckEpoch(env);
+ m_bits = m_bits & bs2.m_bits;
+ }
+
+ inline BitSetUint64 Intersection(Env env, const BitSetUint64& bs2) const
+ {
+ CheckEpoch(env);
+ bs2.CheckEpoch(env);
+ BitSetUint64 res(*this);
+ Uint64BitSetOps::IntersectionD(env, res.m_bits, bs2.m_bits);
+ return res;
+ }
+
+ inline void UnionD(Env env, const BitSetUint64& bs2)
+ {
+ CheckEpoch(env);
+ bs2.CheckEpoch(env);
+ Uint64BitSetOps::UnionD(env, m_bits, bs2.m_bits);
+ }
+
+ inline BitSetUint64 Union(Env env, const BitSetUint64& bs2) const
+ {
+ CheckEpoch(env);
+ bs2.CheckEpoch(env);
+ BitSetUint64 res(*this);
+ Uint64BitSetOps::UnionD(env, res.m_bits, bs2.m_bits);
+ return res;
+ }
+
+ inline void ClearD(Env env)
+ {
+ // Recall that ClearD does *not* require "*this" to be of the current epoch.
+ Uint64BitSetOps::ClearD(env, m_bits);
+#ifdef DEBUG
+ // But it updates it to of the current epoch.
+ m_epoch = BitSetTraits::GetEpoch(env);
+#endif
+ }
+
+ inline bool IsEmpty(Env env) const
+ {
+ CheckEpoch(env);
+ return Uint64BitSetOps::IsEmpty(env, m_bits);
+ }
+
+ inline bool IsSubset(Env env, const BitSetUint64& bs2) const
+ {
+ CheckEpoch(env);
+ bs2.CheckEpoch(env);
+ return Uint64BitSetOps::IsSubset(env, m_bits, bs2.m_bits);
+ }
+
+ inline bool IsEmptyIntersection(Env env, const BitSetUint64& bs2) const
+ {
+ CheckEpoch(env);
+ bs2.CheckEpoch(env);
+ return Uint64BitSetOps::IsEmptyIntersection(env, m_bits, bs2.m_bits);
+ }
+
+ inline bool Equal(Env env, const BitSetUint64& bs2) const
+ {
+ CheckEpoch(env);
+ bs2.CheckEpoch(env);
+ return Uint64BitSetOps::Equal(env, m_bits, bs2.m_bits);
+ }
+
+ const char* ToString(Env env) const
+ {
+ return Uint64BitSetOps::ToString(env, m_bits);
+ }
+
+public:
+ // Uninint
+ BitSetUint64()
+ : m_bits(0)
+#ifdef DEBUG
+ , m_epoch(UINT32_MAX) // Undefined.
+#endif
+ {
+ }
+
+ BitSetUint64(Env env, bool full = false)
+ : m_bits(0)
+#ifdef DEBUG
+ , m_epoch(BitSetTraits::GetEpoch(env))
+#endif
+ {
+ if (full)
+ {
+ m_bits = Uint64BitSetOps::MakeFull(env);
+ }
+ }
+
+ inline BitSetUint64(const BitSetUint64ValueRetType<Env, BitSetTraits>& rt);
+
+ BitSetUint64(Env env, unsigned bitNum)
+ : m_bits(Uint64BitSetOps::MakeSingleton(env, bitNum))
+#ifdef DEBUG
+ , m_epoch(BitSetTraits::GetEpoch(env))
+#endif
+ {
+ assert(bitNum < BitSetTraits::GetSize(env));
+ }
+};
+
+template <typename Env, typename BitSetTraits>
+class BitSetUint64ValueRetType
+{
+ friend class BitSetUint64<Env, BitSetTraits>;
+
+ BitSetUint64<Env, BitSetTraits> m_bs;
+
+public:
+ BitSetUint64ValueRetType(const BitSetUint64<Env, BitSetTraits>& bs) : m_bs(bs)
+ {
+ }
+};
+
+template <typename Env, typename BitSetTraits>
+BitSetUint64<Env, BitSetTraits>::BitSetUint64(const BitSetUint64ValueRetType<Env, BitSetTraits>& rt)
+ : m_bits(rt.m_bs.m_bits)
+#ifdef DEBUG
+ , m_epoch(rt.m_bs.m_epoch)
+#endif
+{
+}
+
+// You *can* clear a bit after it's been iterated. But you shouldn't otherwise mutate the
+// bitset during bit iteration.
+template <typename Env, typename BitSetTraits>
+class BitSetUint64Iter
+{
+ UINT64 m_bits;
+ unsigned m_bitNum;
+
+public:
+ BitSetUint64Iter(Env env, const BitSetUint64<Env, BitSetTraits>& bs) : m_bits(bs.m_bits), m_bitNum(0)
+ {
+ }
+
+ bool NextElem(Env env, unsigned* pElem)
+ {
+ static const unsigned UINT64_SIZE = 64;
+
+ if ((m_bits & 0x1) != 0)
+ {
+ *pElem = m_bitNum;
+ m_bitNum++;
+ m_bits >>= 1;
+ return true;
+ }
+ else
+ {
+ // Skip groups of 4 zeros -- an optimization for sparse bitsets.
+ while (m_bitNum < UINT64_SIZE && (m_bits & 0xf) == 0)
+ {
+ m_bitNum += 4;
+ m_bits >>= 4;
+ }
+ while (m_bitNum < UINT64_SIZE && (m_bits & 0x1) == 0)
+ {
+ m_bitNum += 1;
+ m_bits >>= 1;
+ }
+ if (m_bitNum < UINT64_SIZE)
+ {
+ *pElem = m_bitNum;
+ m_bitNum++;
+ m_bits >>= 1;
+ return true;
+ }
+ else
+ {
+ return false;
+ }
+ }
+ }
+};
+
+template <typename Env, typename BitSetTraits>
+class BitSetOps</*BitSetType*/ BitSetUint64<Env, BitSetTraits>,
+ /*Brand*/ BSUInt64Class,
+ /*Env*/ Env,
+ /*BitSetTraits*/ BitSetTraits>
+{
+ typedef BitSetUint64<Env, BitSetTraits> BST;
+ typedef const BitSetUint64<Env, BitSetTraits>& BSTValArg;
+ typedef BitSetUint64ValueRetType<Env, BitSetTraits> BSTRetVal;
+
+public:
+ static BSTRetVal UninitVal()
+ {
+ return BitSetUint64<Env, BitSetTraits>();
+ }
+
+ static bool MayBeUninit(BSTValArg bs)
+ {
+ return bs == UninitVal();
+ }
+
+ static void Assign(Env env, BST& lhs, BSTValArg rhs)
+ {
+ lhs = rhs;
+ }
+
+ static void AssignNouninit(Env env, BST& lhs, BSTValArg rhs)
+ {
+ lhs = rhs;
+ }
+
+ static void AssignAllowUninitRhs(Env env, BST& lhs, BSTValArg rhs)
+ {
+ lhs = rhs;
+ }
+
+ static void AssignNoCopy(Env env, BST& lhs, BSTValArg rhs)
+ {
+ lhs = rhs;
+ }
+
+ static void ClearD(Env env, BST& bs)
+ {
+ bs.ClearD(env);
+ }
+
+ static BSTRetVal MakeSingleton(Env env, unsigned bitNum)
+ {
+ assert(bitNum < BitSetTraits::GetSize(env));
+ return BST(env, bitNum);
+ }
+
+ static BSTRetVal MakeCopy(Env env, BSTValArg bs)
+ {
+ return bs;
+ }
+
+ static bool IsEmpty(Env env, BSTValArg bs)
+ {
+ return bs.IsEmpty(env);
+ }
+
+ static unsigned Count(Env env, BSTValArg bs)
+ {
+ return bs.Count(env);
+ }
+
+ static void UnionD(Env env, BST& bs1, BSTValArg bs2)
+ {
+ bs1.UnionD(env, bs2);
+ }
+
+ static BSTRetVal Union(Env env, BSTValArg bs1, BSTValArg bs2)
+ {
+ return bs1.Union(env, bs2);
+ }
+
+ static void DiffD(Env env, BST& bs1, BSTValArg bs2)
+ {
+ bs1.DiffD(env, bs2);
+ }
+
+ static BSTRetVal Diff(Env env, BSTValArg bs1, BSTValArg bs2)
+ {
+ return bs1.Diff(env, bs2);
+ }
+
+ static void RemoveElemD(Env env, BST& bs1, unsigned i)
+ {
+ assert(i < BitSetTraits::GetSize(env));
+ bs1.RemoveElemD(env, i);
+ }
+
+ static BSTRetVal RemoveElem(Env env, BSTValArg bs1, unsigned i)
+ {
+ assert(i < BitSetTraits::GetSize(env));
+ return bs1.RemoveElem(env, i);
+ }
+
+ static void AddElemD(Env env, BST& bs1, unsigned i)
+ {
+ assert(i < BitSetTraits::GetSize(env));
+ bs1.AddElemD(env, i);
+ }
+
+ static BSTRetVal AddElem(Env env, BSTValArg bs1, unsigned i)
+ {
+ assert(i < BitSetTraits::GetSize(env));
+ return bs1.AddElem(env, i);
+ }
+
+ static bool IsMember(Env env, BSTValArg bs1, unsigned i)
+ {
+ assert(i < BitSetTraits::GetSize(env));
+ return bs1.IsMember(env, i);
+ }
+
+ static void IntersectionD(Env env, BST& bs1, BSTValArg bs2)
+ {
+ bs1.IntersectionD(env, bs2);
+ }
+
+ static BSTRetVal Intersection(Env env, BSTValArg bs1, BSTValArg bs2)
+ {
+ return bs1.Intersection(env, bs2);
+ }
+
+ static bool IsEmptyIntersection(Env env, BSTValArg bs1, BSTValArg bs2)
+ {
+ return bs1.IsEmptyIntersection(env, bs2);
+ }
+
+ static bool IsSubset(Env env, BSTValArg bs1, BSTValArg bs2)
+ {
+ return bs1.IsSubset(env, bs2);
+ }
+
+ static bool Equal(Env env, BSTValArg bs1, BSTValArg bs2)
+ {
+ return bs1.Equal(env, bs2);
+ }
+
+ static bool NotEqual(Env env, BSTValArg bs1, BSTValArg bs2)
+ {
+ return !bs1.Equal(env, bs2);
+ }
+
+ static BSTRetVal MakeEmpty(Env env)
+ {
+ return BST(env);
+ }
+
+ static BSTRetVal MakeFull(Env env)
+ {
+ return BST(env, /*full*/ true);
+ }
+
+#ifdef DEBUG
+ static const char* ToString(Env env, BSTValArg bs)
+ {
+ return bs.ToString(env);
+ }
+#endif
+
+ typedef BitSetUint64Iter<Env, BitSetTraits> Iter;
+
+ typedef const BitSetUint64<Env, BitSetTraits>& ValArgType;
+ typedef BitSetUint64ValueRetType<Env, BitSetTraits> RetValType;
+};
+#endif // bitSetAsUint64InClass_DEFINED
diff --git a/src/jit/bitsetops.h b/src/jit/bitsetops.h
new file mode 100644
index 0000000000..edf39eaf56
--- /dev/null
+++ b/src/jit/bitsetops.h
@@ -0,0 +1,34 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+BSOPNAME(BSOP_Assign)
+BSOPNAME(BSOP_AssignAllowUninitRhs)
+BSOPNAME(BSOP_AssignNocopy)
+BSOPNAME(BSOP_ClearD)
+BSOPNAME(BSOP_MakeSingleton)
+BSOPNAME(BSOP_MakeEmpty)
+BSOPNAME(BSOP_MakeFull)
+BSOPNAME(BSOP_MakeCopy)
+BSOPNAME(BSOP_IsEmpty)
+BSOPNAME(BSOP_Count)
+BSOPNAME(BSOP_RemoveElemD)
+BSOPNAME(BSOP_RemoveElem)
+BSOPNAME(BSOP_AddElemD)
+BSOPNAME(BSOP_AddElem)
+BSOPNAME(BSOP_UnionD)
+BSOPNAME(BSOP_Union)
+BSOPNAME(BSOP_IntersectionD)
+BSOPNAME(BSOP_Intersection)
+BSOPNAME(BSOP_IsEmptyIntersection)
+BSOPNAME(BSOP_DiffD)
+BSOPNAME(BSOP_Diff)
+BSOPNAME(BSOP_IsMember)
+BSOPNAME(BSOP_IsNotMember)
+BSOPNAME(BSOP_NoBitsAbove)
+BSOPNAME(BSOP_LeftShiftSingletonByOneD)
+BSOPNAME(BSOP_IsSubset)
+BSOPNAME(BSOP_Equal)
+BSOPNAME(BSOP_NotEqual)
+BSOPNAME(BSOP_NextBit)
+BSOPNAME(BSOP_ToString)
diff --git a/src/jit/bitvec.h b/src/jit/bitvec.h
new file mode 100644
index 0000000000..4db211ba0a
--- /dev/null
+++ b/src/jit/bitvec.h
@@ -0,0 +1,56 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+//
+// This include file determines how BitVec is implemented.
+//
+#ifndef _BITVEC_INCLUDED_
+#define _BITVEC_INCLUDED_ 1
+
+// This class simplifies creation and usage of "ShortLong" bitsets.
+//
+// Create new bitsets like so:
+//
+// BitVecTraits traits(size, pCompiler);
+// BitVec bitvec = BitVecOps::MakeEmpty(&traits);
+//
+// and call functions like so:
+//
+// BitVecOps::AddElemD(&traits, bitvec, 10);
+// BitVecOps::IsMember(&traits, bitvec, 10));
+//
+
+#include "bitset.h"
+#include "compilerbitsettraits.h"
+#include "bitsetasshortlong.h"
+
+typedef BitSetOps</*BitSetType*/ BitSetShortLongRep,
+ /*Brand*/ BSShortLong,
+ /*Env*/ BitVecTraits*,
+ /*BitSetTraits*/ BitVecTraits>
+ BitVecOps;
+
+typedef BitSetShortLongRep BitVec;
+
+// These types should be used as the types for BitVec arguments and return values, respectively.
+typedef BitVecOps::ValArgType BitVec_ValArg_T;
+typedef BitVecOps::RetValType BitVec_ValRet_T;
+
+// Initialize "_varName" to "_initVal." Copies contents, not references; if "_varName" is uninitialized, allocates a
+// set for it (using "_traits" for any necessary allocation), and copies the contents of "_initVal" into it.
+#define BITVEC_INIT(_traits, _varName, _initVal) _varName(BitVecOps::MakeCopy(_traits, _initVal))
+
+// Initializes "_varName" to "_initVal", without copying: if "_initVal" is an indirect representation, copies its
+// pointer into "_varName".
+#define BITVEC_INIT_NOCOPY(_varName, _initVal) _varName(_initVal)
+
+// The iterator pattern.
+
+// Use this to initialize an iterator "_iterName" to iterate over a BitVec "_bitVec".
+// "_bitNum" will be an unsigned variable to which we assign the elements of "_bitVec".
+#define BITVEC_ITER_INIT(_traits, _iterName, _bitVec, _bitNum) \
+ unsigned _bitNum = 0; \
+ BitVecOps::Iter _iterName(_traits, _bitVec)
+
+#endif // _BITVEC_INCLUDED_
diff --git a/src/jit/block.cpp b/src/jit/block.cpp
new file mode 100644
index 0000000000..2d37754ec5
--- /dev/null
+++ b/src/jit/block.cpp
@@ -0,0 +1,771 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX BasicBlock XX
+XX XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+#ifdef DEBUG
+flowList* ShuffleHelper(unsigned hash, flowList* res)
+{
+ flowList* head = res;
+ for (flowList *prev = nullptr; res != nullptr; prev = res, res = res->flNext)
+ {
+ unsigned blkHash = (hash ^ (res->flBlock->bbNum << 16) ^ res->flBlock->bbNum);
+ if (((blkHash % 1879) & 1) && prev != nullptr)
+ {
+ // Swap res with head.
+ prev->flNext = head;
+ jitstd::swap(head->flNext, res->flNext);
+ jitstd::swap(head, res);
+ }
+ }
+ return head;
+}
+
+unsigned SsaStressHashHelper()
+{
+ // hash = 0: turned off, hash = 1: use method hash, hash = *: use custom hash.
+ unsigned hash = JitConfig.JitSsaStress();
+
+ if (hash == 0)
+ {
+ return hash;
+ }
+ if (hash == 1)
+ {
+ return JitTls::GetCompiler()->info.compMethodHash();
+ }
+ return ((hash >> 16) == 0) ? ((hash << 16) | hash) : hash;
+}
+#endif
+
+EHSuccessorIter::EHSuccessorIter(Compiler* comp, BasicBlock* block)
+ : m_comp(comp)
+ , m_block(block)
+ , m_curRegSucc(nullptr)
+ , m_curTry(comp->ehGetBlockExnFlowDsc(block))
+ , m_remainingRegSuccs(block->NumSucc(comp))
+{
+ // If "block" is a "leave helper" block (the empty BBJ_ALWAYS block that pairs with a
+ // preceding BBJ_CALLFINALLY block to implement a "leave" IL instruction), then no exceptions
+ // can occur within it, so clear m_curTry if it's non-null.
+ if (m_curTry != nullptr)
+ {
+ BasicBlock* beforeBlock = block->bbPrev;
+ if (beforeBlock != nullptr && beforeBlock->isBBCallAlwaysPair())
+ {
+ m_curTry = nullptr;
+ }
+ }
+
+ if (m_curTry == nullptr && m_remainingRegSuccs > 0)
+ {
+ // Examine the successors to see if any are the start of try blocks.
+ FindNextRegSuccTry();
+ }
+}
+
+void EHSuccessorIter::FindNextRegSuccTry()
+{
+ assert(m_curTry == nullptr);
+
+ // Must now consider the next regular successor, if any.
+ while (m_remainingRegSuccs > 0)
+ {
+ m_remainingRegSuccs--;
+ m_curRegSucc = m_block->GetSucc(m_remainingRegSuccs, m_comp);
+ if (m_comp->bbIsTryBeg(m_curRegSucc))
+ {
+ assert(m_curRegSucc->hasTryIndex()); // Since it is a try begin.
+ unsigned newTryIndex = m_curRegSucc->getTryIndex();
+
+ // If the try region started by "m_curRegSucc" (represented by newTryIndex) contains m_block,
+ // we've already yielded its handler, as one of the EH handler successors of m_block itself.
+ if (m_comp->bbInExnFlowRegions(newTryIndex, m_block))
+ {
+ continue;
+ }
+
+ // Otherwise, consider this try.
+ m_curTry = m_comp->ehGetDsc(newTryIndex);
+ break;
+ }
+ }
+}
+
+void EHSuccessorIter::operator++(void)
+{
+ assert(m_curTry != nullptr);
+ if (m_curTry->ebdEnclosingTryIndex != EHblkDsc::NO_ENCLOSING_INDEX)
+ {
+ m_curTry = m_comp->ehGetDsc(m_curTry->ebdEnclosingTryIndex);
+
+ // If we've gone over into considering try's containing successors,
+ // then the enclosing try must have the successor as its first block.
+ if (m_curRegSucc == nullptr || m_curTry->ebdTryBeg == m_curRegSucc)
+ {
+ return;
+ }
+
+ // Otherwise, give up, try the next regular successor.
+ m_curTry = nullptr;
+ }
+ else
+ {
+ m_curTry = nullptr;
+ }
+
+ // We've exhausted all try blocks.
+ // See if there are any remaining regular successors that start try blocks.
+ FindNextRegSuccTry();
+}
+
+BasicBlock* EHSuccessorIter::operator*()
+{
+ assert(m_curTry != nullptr);
+ return m_curTry->ExFlowBlock();
+}
+
+flowList* Compiler::BlockPredsWithEH(BasicBlock* blk)
+{
+ BlockToFlowListMap* ehPreds = GetBlockToEHPreds();
+ flowList* res;
+ if (ehPreds->Lookup(blk, &res))
+ {
+ return res;
+ }
+
+ res = blk->bbPreds;
+ unsigned tryIndex;
+ if (bbIsExFlowBlock(blk, &tryIndex))
+ {
+ // Find the first block of the try.
+ EHblkDsc* ehblk = ehGetDsc(tryIndex);
+ BasicBlock* tryStart = ehblk->ebdTryBeg;
+ for (flowList* tryStartPreds = tryStart->bbPreds; tryStartPreds != nullptr;
+ tryStartPreds = tryStartPreds->flNext)
+ {
+ res = new (this, CMK_FlowList) flowList(tryStartPreds->flBlock, res);
+
+#if MEASURE_BLOCK_SIZE
+ genFlowNodeCnt += 1;
+ genFlowNodeSize += sizeof(flowList);
+#endif // MEASURE_BLOCK_SIZE
+ }
+
+ // Now add all blocks handled by this handler (except for second blocks of BBJ_CALLFINALLY/BBJ_ALWAYS pairs;
+ // these cannot cause transfer to the handler...)
+ BasicBlock* prevBB = nullptr;
+
+ // TODO-Throughput: It would be nice if we could iterate just over the blocks in the try, via
+ // something like:
+ // for (BasicBlock* bb = ehblk->ebdTryBeg; bb != ehblk->ebdTryLast->bbNext; bb = bb->bbNext)
+ // (plus adding in any filter blocks outside the try whose exceptions are handled here).
+ // That doesn't work, however: funclets have caused us to sometimes split the body of a try into
+ // more than one sequence of contiguous blocks. We need to find a better way to do this.
+ for (BasicBlock *bb = fgFirstBB; bb != nullptr; prevBB = bb, bb = bb->bbNext)
+ {
+ if (bbInExnFlowRegions(tryIndex, bb) && (prevBB == nullptr || !prevBB->isBBCallAlwaysPair()))
+ {
+ res = new (this, CMK_FlowList) flowList(bb, res);
+
+#if MEASURE_BLOCK_SIZE
+ genFlowNodeCnt += 1;
+ genFlowNodeSize += sizeof(flowList);
+#endif // MEASURE_BLOCK_SIZE
+ }
+ }
+
+#ifdef DEBUG
+ unsigned hash = SsaStressHashHelper();
+ if (hash != 0)
+ {
+ res = ShuffleHelper(hash, res);
+ }
+#endif // DEBUG
+
+ ehPreds->Set(blk, res);
+ }
+ return res;
+}
+
+#ifdef DEBUG
+
+//------------------------------------------------------------------------
+// dspBlockILRange(): Display the block's IL range as [XXX...YYY), where XXX and YYY might be "???" for BAD_IL_OFFSET.
+//
+void BasicBlock::dspBlockILRange()
+{
+ if (bbCodeOffs != BAD_IL_OFFSET)
+ {
+ printf("[%03X..", bbCodeOffs);
+ }
+ else
+ {
+ printf("[???"
+ "..");
+ }
+
+ if (bbCodeOffsEnd != BAD_IL_OFFSET)
+ {
+ // brace-matching editor workaround for following line: (
+ printf("%03X)", bbCodeOffsEnd);
+ }
+ else
+ {
+ // brace-matching editor workaround for following line: (
+ printf("???"
+ ")");
+ }
+}
+
+//------------------------------------------------------------------------
+// dspFlags: Print out the block's flags
+//
+void BasicBlock::dspFlags()
+{
+ if (bbFlags & BBF_VISITED)
+ {
+ printf("v ");
+ }
+ if (bbFlags & BBF_MARKED)
+ {
+ printf("m ");
+ }
+ if (bbFlags & BBF_CHANGED)
+ {
+ printf("! ");
+ }
+ if (bbFlags & BBF_REMOVED)
+ {
+ printf("del ");
+ }
+ if (bbFlags & BBF_DONT_REMOVE)
+ {
+ printf("keep ");
+ }
+ if (bbFlags & BBF_IMPORTED)
+ {
+ printf("i ");
+ }
+ if (bbFlags & BBF_INTERNAL)
+ {
+ printf("internal ");
+ }
+ if (bbFlags & BBF_FAILED_VERIFICATION)
+ {
+ printf("failV ");
+ }
+ if (bbFlags & BBF_TRY_BEG)
+ {
+ printf("try ");
+ }
+ if (bbFlags & BBF_NEEDS_GCPOLL)
+ {
+ printf("poll ");
+ }
+ if (bbFlags & BBF_RUN_RARELY)
+ {
+ printf("rare ");
+ }
+ if (bbFlags & BBF_LOOP_HEAD)
+ {
+ printf("Loop ");
+ }
+ if (bbFlags & BBF_LOOP_CALL0)
+ {
+ printf("Loop0 ");
+ }
+ if (bbFlags & BBF_LOOP_CALL1)
+ {
+ printf("Loop1 ");
+ }
+ if (bbFlags & BBF_HAS_LABEL)
+ {
+ printf("label ");
+ }
+ if (bbFlags & BBF_JMP_TARGET)
+ {
+ printf("target ");
+ }
+ if (bbFlags & BBF_HAS_JMP)
+ {
+ printf("jmp ");
+ }
+ if (bbFlags & BBF_GC_SAFE_POINT)
+ {
+ printf("gcsafe ");
+ }
+ if (bbFlags & BBF_FUNCLET_BEG)
+ {
+ printf("flet ");
+ }
+ if (bbFlags & BBF_HAS_IDX_LEN)
+ {
+ printf("idxlen ");
+ }
+ if (bbFlags & BBF_HAS_NEWARRAY)
+ {
+ printf("new[] ");
+ }
+ if (bbFlags & BBF_HAS_NEWOBJ)
+ {
+ printf("newobj ");
+ }
+#if FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
+ if (bbFlags & BBF_FINALLY_TARGET)
+ {
+ printf("ftarget ");
+ }
+#endif // FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
+ if (bbFlags & BBF_BACKWARD_JUMP)
+ {
+ printf("bwd ");
+ }
+ if (bbFlags & BBF_RETLESS_CALL)
+ {
+ printf("retless ");
+ }
+ if (bbFlags & BBF_LOOP_PREHEADER)
+ {
+ printf("LoopPH ");
+ }
+ if (bbFlags & BBF_COLD)
+ {
+ printf("cold ");
+ }
+ if (bbFlags & BBF_PROF_WEIGHT)
+ {
+ printf("IBC ");
+ }
+#ifdef LEGACY_BACKEND
+ if (bbFlags & BBF_FORWARD_SWITCH)
+ {
+ printf("fswitch ");
+ }
+#else // !LEGACY_BACKEND
+ if (bbFlags & BBF_IS_LIR)
+ {
+ printf("LIR ");
+ }
+#endif // LEGACY_BACKEND
+ if (bbFlags & BBF_KEEP_BBJ_ALWAYS)
+ {
+ printf("KEEP ");
+ }
+}
+
+/*****************************************************************************
+ *
+ * Display the bbPreds basic block list (the block predecessors).
+ * Returns the number of characters printed.
+ */
+
+unsigned BasicBlock::dspPreds()
+{
+ unsigned count = 0;
+ for (flowList* pred = bbPreds; pred != nullptr; pred = pred->flNext)
+ {
+ if (count != 0)
+ {
+ printf(",");
+ count += 1;
+ }
+ printf("BB%02u", pred->flBlock->bbNum);
+ count += 4;
+
+ // Account for %02u only handling 2 digits, but we can display more than that.
+ unsigned digits = CountDigits(pred->flBlock->bbNum);
+ if (digits > 2)
+ {
+ count += digits - 2;
+ }
+
+ // Does this predecessor have an interesting dup count? If so, display it.
+ if (pred->flDupCount > 1)
+ {
+ printf("(%u)", pred->flDupCount);
+ count += 2 + CountDigits(pred->flDupCount);
+ }
+ }
+ return count;
+}
+
+/*****************************************************************************
+ *
+ * Display the bbCheapPreds basic block list (the block predecessors).
+ * Returns the number of characters printed.
+ */
+
+unsigned BasicBlock::dspCheapPreds()
+{
+ unsigned count = 0;
+ for (BasicBlockList* pred = bbCheapPreds; pred != nullptr; pred = pred->next)
+ {
+ if (count != 0)
+ {
+ printf(",");
+ count += 1;
+ }
+ printf("BB%02u", pred->block->bbNum);
+ count += 4;
+
+ // Account for %02u only handling 2 digits, but we can display more than that.
+ unsigned digits = CountDigits(pred->block->bbNum);
+ if (digits > 2)
+ {
+ count += digits - 2;
+ }
+ }
+ return count;
+}
+
+/*****************************************************************************
+ *
+ * Display the basic block successors.
+ * Returns the count of successors.
+ */
+
+unsigned BasicBlock::dspSuccs(Compiler* compiler)
+{
+ unsigned numSuccs = NumSucc(compiler);
+ unsigned count = 0;
+ for (unsigned i = 0; i < numSuccs; i++)
+ {
+ printf("%s", (count == 0) ? "" : ",");
+ printf("BB%02u", GetSucc(i, compiler)->bbNum);
+ count++;
+ }
+ return count;
+}
+
+// Display a compact representation of the bbJumpKind, that is, where this block branches.
+// This is similar to code in Compiler::fgTableDispBasicBlock(), but doesn't have that code's requirements to align
+// things strictly.
+void BasicBlock::dspJumpKind()
+{
+ switch (bbJumpKind)
+ {
+ case BBJ_EHFINALLYRET:
+ printf(" (finret)");
+ break;
+
+ case BBJ_EHFILTERRET:
+ printf(" (fltret)");
+ break;
+
+ case BBJ_EHCATCHRET:
+ printf(" -> BB%02u (cret)", bbJumpDest->bbNum);
+ break;
+
+ case BBJ_THROW:
+ printf(" (throw)");
+ break;
+
+ case BBJ_RETURN:
+ printf(" (return)");
+ break;
+
+ case BBJ_NONE:
+ // For fall-through blocks, print nothing.
+ break;
+
+ case BBJ_ALWAYS:
+ if (bbFlags & BBF_KEEP_BBJ_ALWAYS)
+ {
+ printf(" -> BB%02u (ALWAYS)", bbJumpDest->bbNum);
+ }
+ else
+ {
+ printf(" -> BB%02u (always)", bbJumpDest->bbNum);
+ }
+ break;
+
+ case BBJ_LEAVE:
+ printf(" -> BB%02u (leave)", bbJumpDest->bbNum);
+ break;
+
+ case BBJ_CALLFINALLY:
+ printf(" -> BB%02u (callf)", bbJumpDest->bbNum);
+ break;
+
+ case BBJ_COND:
+ printf(" -> BB%02u (cond)", bbJumpDest->bbNum);
+ break;
+
+ case BBJ_SWITCH:
+ printf(" ->");
+
+ unsigned jumpCnt;
+ jumpCnt = bbJumpSwt->bbsCount;
+ BasicBlock** jumpTab;
+ jumpTab = bbJumpSwt->bbsDstTab;
+ do
+ {
+ printf("%cBB%02u", (jumpTab == bbJumpSwt->bbsDstTab) ? ' ' : ',', (*jumpTab)->bbNum);
+ } while (++jumpTab, --jumpCnt);
+
+ printf(" (switch)");
+ break;
+
+ default:
+ unreached();
+ break;
+ }
+}
+
+void BasicBlock::dspBlockHeader(Compiler* compiler,
+ bool showKind /*= true*/,
+ bool showFlags /*= false*/,
+ bool showPreds /*= true*/)
+{
+ printf("BB%02u ", bbNum);
+ dspBlockILRange();
+ if (showKind)
+ {
+ dspJumpKind();
+ }
+ if (showPreds)
+ {
+ printf(", preds={");
+ if (compiler->fgCheapPredsValid)
+ {
+ dspCheapPreds();
+ }
+ else
+ {
+ dspPreds();
+ }
+ printf("} succs={");
+ dspSuccs(compiler);
+ printf("}");
+ }
+ if (showFlags)
+ {
+ printf(" flags=0x%08x: ", bbFlags);
+ dspFlags();
+ }
+ printf("\n");
+}
+
+#endif // DEBUG
+
+// Allocation function for HeapPhiArg.
+void* BasicBlock::HeapPhiArg::operator new(size_t sz, Compiler* comp)
+{
+ return comp->compGetMem(sz, CMK_HeapPhiArg);
+}
+
+void BasicBlock::CloneBlockState(Compiler* compiler, BasicBlock* to, const BasicBlock* from)
+{
+ assert(to->bbTreeList == nullptr);
+
+ to->bbFlags = from->bbFlags;
+ to->bbWeight = from->bbWeight;
+ BlockSetOps::AssignAllowUninitRhs(compiler, to->bbReach, from->bbReach);
+ to->copyEHRegion(from);
+ to->bbCatchTyp = from->bbCatchTyp;
+ to->bbRefs = from->bbRefs;
+ to->bbStkTempsIn = from->bbStkTempsIn;
+ to->bbStkTempsOut = from->bbStkTempsOut;
+ to->bbStkDepth = from->bbStkDepth;
+ to->bbCodeOffs = from->bbCodeOffs;
+ to->bbCodeOffsEnd = from->bbCodeOffsEnd;
+ VarSetOps::AssignAllowUninitRhs(compiler, to->bbScope, from->bbScope);
+#if FEATURE_STACK_FP_X87
+ to->bbFPStateX87 = from->bbFPStateX87;
+#endif // FEATURE_STACK_FP_X87
+ to->bbNatLoopNum = from->bbNatLoopNum;
+#ifdef DEBUG
+ to->bbLoopNum = from->bbLoopNum;
+ to->bbTgtStkDepth = from->bbTgtStkDepth;
+#endif // DEBUG
+
+ for (GenTreePtr fromStmt = from->bbTreeList; fromStmt != nullptr; fromStmt = fromStmt->gtNext)
+ {
+ compiler->fgInsertStmtAtEnd(to,
+ compiler->fgNewStmtFromTree(compiler->gtCloneExpr(fromStmt->gtStmt.gtStmtExpr)));
+ }
+}
+
+// LIR helpers
+void BasicBlock::MakeLIR(GenTree* firstNode, GenTree* lastNode)
+{
+#ifdef LEGACY_BACKEND
+ unreached();
+#else // !LEGACY_BACKEND
+ assert(!IsLIR());
+ assert((firstNode == nullptr) == (lastNode == nullptr));
+ assert((firstNode == lastNode) || firstNode->Precedes(lastNode));
+
+ m_firstNode = firstNode;
+ m_lastNode = lastNode;
+ bbFlags |= BBF_IS_LIR;
+#endif // LEGACY_BACKEND
+}
+
+bool BasicBlock::IsLIR()
+{
+#ifdef LEGACY_BACKEND
+ return false;
+#else // !LEGACY_BACKEND
+ const bool isLIR = (bbFlags & BBF_IS_LIR) != 0;
+ assert((bbTreeList == nullptr) || ((isLIR) == !bbTreeList->IsStatement()));
+ return isLIR;
+#endif // LEGACY_BACKEND
+}
+
+//------------------------------------------------------------------------
+// firstStmt: Returns the first statement in the block
+//
+// Arguments:
+// None.
+//
+// Return Value:
+// The first statement in the block's bbTreeList.
+//
+GenTreeStmt* BasicBlock::firstStmt()
+{
+ if (bbTreeList == nullptr)
+ {
+ return nullptr;
+ }
+
+ return bbTreeList->AsStmt();
+}
+
+//------------------------------------------------------------------------
+// lastStmt: Returns the last statement in the block
+//
+// Arguments:
+// None.
+//
+// Return Value:
+// The last statement in the block's bbTreeList.
+//
+GenTreeStmt* BasicBlock::lastStmt()
+{
+ if (bbTreeList == nullptr)
+ {
+ return nullptr;
+ }
+
+ GenTree* result = bbTreeList->gtPrev;
+ assert(result && result->gtNext == nullptr);
+ return result->AsStmt();
+}
+
+
+//------------------------------------------------------------------------
+// BasicBlock::firstNode: Returns the first node in the block.
+//
+GenTree* BasicBlock::firstNode()
+{
+ return IsLIR() ? bbTreeList : Compiler::fgGetFirstNode(firstStmt()->gtStmtExpr);
+}
+
+//------------------------------------------------------------------------
+// BasicBlock::lastNode: Returns the last node in the block.
+//
+GenTree* BasicBlock::lastNode()
+{
+ return IsLIR() ? m_lastNode : lastStmt()->gtStmtExpr;
+}
+
+//------------------------------------------------------------------------
+// GetUniquePred: Returns the unique predecessor of a block, if one exists.
+// The predecessor lists must be accurate.
+//
+// Arguments:
+// None.
+//
+// Return Value:
+// The unique predecessor of a block, or nullptr if there is no unique predecessor.
+//
+// Notes:
+// If the first block has a predecessor (which it may have, if it is the target of
+// a backedge), we never want to consider it "unique" because the prolog is an
+// implicit predecessor.
+
+BasicBlock* BasicBlock::GetUniquePred(Compiler* compiler)
+{
+ if ((bbPreds == nullptr) || (bbPreds->flNext != nullptr) || (this == compiler->fgFirstBB))
+ {
+ return nullptr;
+ }
+ else
+ {
+ return bbPreds->flBlock;
+ }
+}
+
+//------------------------------------------------------------------------
+// GetUniqueSucc: Returns the unique successor of a block, if one exists.
+// Only considers BBJ_ALWAYS and BBJ_NONE block types.
+//
+// Arguments:
+// None.
+//
+// Return Value:
+// The unique successor of a block, or nullptr if there is no unique successor.
+
+BasicBlock* BasicBlock::GetUniqueSucc()
+{
+ if (bbJumpKind == BBJ_ALWAYS)
+ {
+ return bbJumpDest;
+ }
+ else if (bbJumpKind == BBJ_NONE)
+ {
+ return bbNext;
+ }
+ else
+ {
+ return nullptr;
+ }
+}
+
+// Static vars.
+BasicBlock::HeapPhiArg* BasicBlock::EmptyHeapPhiDef = (BasicBlock::HeapPhiArg*)0x1;
+
+unsigned PtrKeyFuncs<BasicBlock>::GetHashCode(const BasicBlock* ptr)
+{
+#ifdef DEBUG
+ unsigned hash = SsaStressHashHelper();
+ if (hash != 0)
+ {
+ return (hash ^ (ptr->bbNum << 16) ^ ptr->bbNum);
+ }
+#endif
+ return ptr->bbNum;
+}
+
+bool BasicBlock::isEmpty()
+{
+ if (!IsLIR())
+ {
+ return (this->FirstNonPhiDef() == nullptr);
+ }
+
+ for (GenTree* node : LIR::AsRange(this).NonPhiNodes())
+ {
+ if (node->OperGet() != GT_IL_OFFSET)
+ {
+ return false;
+ }
+ }
+
+ return true;
+}
diff --git a/src/jit/block.h b/src/jit/block.h
new file mode 100644
index 0000000000..ecfbb620a1
--- /dev/null
+++ b/src/jit/block.h
@@ -0,0 +1,1313 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX BasicBlock XX
+XX XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+/*****************************************************************************/
+#ifndef _BLOCK_H_
+#define _BLOCK_H_
+/*****************************************************************************/
+
+#include "vartype.h" // For "var_types.h"
+#include "_typeinfo.h"
+/*****************************************************************************/
+
+// Defines VARSET_TP
+#include "varset.h"
+
+#include "blockset.h"
+#include "jitstd.h"
+#include "bitvec.h"
+#include "simplerhash.h"
+
+/*****************************************************************************/
+
+#if LARGE_EXPSET
+typedef unsigned __int64 EXPSET_TP;
+#define EXPSET_SZ 64
+#else
+typedef unsigned int EXPSET_TP;
+#define EXPSET_SZ 32
+#endif
+
+#define EXPSET_ALL ((EXPSET_TP)0 - 1)
+
+typedef BitVec ASSERT_TP;
+typedef BitVec_ValArg_T ASSERT_VALARG_TP;
+typedef BitVec_ValRet_T ASSERT_VALRET_TP;
+
+/*****************************************************************************
+ *
+ * Each basic block ends with a jump which is described as a value
+ * of the following enumeration.
+ */
+
+DECLARE_TYPED_ENUM(BBjumpKinds, BYTE)
+{
+ BBJ_EHFINALLYRET, // block ends with 'endfinally' (for finally or fault)
+ BBJ_EHFILTERRET, // block ends with 'endfilter'
+ BBJ_EHCATCHRET, // block ends with a leave out of a catch (only #if FEATURE_EH_FUNCLETS)
+ BBJ_THROW, // block ends with 'throw'
+ BBJ_RETURN, // block ends with 'ret'
+
+ BBJ_NONE, // block flows into the next one (no jump)
+
+ BBJ_ALWAYS, // block always jumps to the target
+ BBJ_LEAVE, // block always jumps to the target, maybe out of guarded
+ // region. Used temporarily until importing
+ BBJ_CALLFINALLY, // block always calls the target finally
+ BBJ_COND, // block conditionally jumps to the target
+ BBJ_SWITCH, // block ends with a switch statement
+
+ BBJ_COUNT
+}
+END_DECLARE_TYPED_ENUM(BBjumpKinds, BYTE)
+
+struct GenTree;
+struct GenTreeStmt;
+struct BasicBlock;
+class Compiler;
+class typeInfo;
+struct BasicBlockList;
+struct flowList;
+struct EHblkDsc;
+
+#if FEATURE_STACK_FP_X87
+struct FlatFPStateX87;
+#endif
+
+/*****************************************************************************
+ *
+ * The following describes a switch block.
+ *
+ * Things to know:
+ * 1. If bbsHasDefault is true, the default case is the last one in the array of basic block addresses
+ * namely bbsDstTab[bbsCount - 1].
+ * 2. bbsCount must be at least 1, for the default case. bbsCount cannot be zero. It appears that the ECMA spec
+ * allows for a degenerate switch with zero cases. Normally, the optimizer will optimize degenerate
+ * switches with just a default case to a BBJ_ALWAYS branch, and a switch with just two cases to a BBJ_COND.
+ * However, in debuggable code, we might not do that, so bbsCount might be 1.
+ */
+struct BBswtDesc
+{
+ unsigned bbsCount; // count of cases (includes 'default' if bbsHasDefault)
+ BasicBlock** bbsDstTab; // case label table address
+ bool bbsHasDefault;
+
+ BBswtDesc() : bbsHasDefault(true)
+ {
+ }
+
+ void removeDefault()
+ {
+ assert(bbsHasDefault);
+ assert(bbsCount > 0);
+ bbsHasDefault = false;
+ bbsCount--;
+ }
+
+ BasicBlock* getDefault()
+ {
+ assert(bbsHasDefault);
+ assert(bbsCount > 0);
+ return bbsDstTab[bbsCount - 1];
+ }
+};
+
+struct StackEntry
+{
+ GenTree* val;
+ typeInfo seTypeInfo;
+};
+/*****************************************************************************/
+
+enum ThisInitState
+{
+ TIS_Bottom, // We don't know anything about the 'this' pointer.
+ TIS_Uninit, // The 'this' pointer for this constructor is known to be uninitialized.
+ TIS_Init, // The 'this' pointer for this constructor is known to be initialized.
+ TIS_Top, // This results from merging the state of two blocks one with TIS_Unint and the other with TIS_Init.
+ // We use this in fault blocks to prevent us from accessing the 'this' pointer, but otherwise
+ // allowing the fault block to generate code.
+};
+
+struct EntryState
+{
+ ThisInitState thisInitialized : 8; // used to track whether the this ptr is initialized (we could use
+ // fewer bits here)
+ unsigned esStackDepth : 24; // size of esStack
+ StackEntry* esStack; // ptr to stack
+};
+
+// This encapsulates the "exception handling" successors of a block. That is,
+// if a basic block BB1 occurs in a try block, we consider the first basic block
+// BB2 of the corresponding handler to be an "EH successor" of BB1. Because we
+// make the conservative assumption that control flow can jump from a try block
+// to its handler at any time, the immediate (regular control flow)
+// predecessor(s) of the the first block of a try block are also considered to
+// have the first block of the handler as an EH successor. This makes variables that
+// are "live-in" to the handler become "live-out" for these try-predecessor block,
+// so that they become live-in to the try -- which we require.
+class EHSuccessorIter
+{
+ // The current compilation.
+ Compiler* m_comp;
+
+ // The block whose EH successors we are iterating over.
+ BasicBlock* m_block;
+
+ // The current "regular" successor of "m_block" that we're considering.
+ BasicBlock* m_curRegSucc;
+
+ // The current try block. If non-null, then the current successor "m_curRegSucc"
+ // is the first block of the handler of this block. While this try block has
+ // enclosing try's that also start with "m_curRegSucc", the corresponding handlers will be
+ // further EH successors.
+ EHblkDsc* m_curTry;
+
+ // The number of "regular" (i.e., non-exceptional) successors that remain to
+ // be considered. If BB1 has successor BB2, and BB2 is the first block of a
+ // try block, then we consider the catch block of BB2's try to be an EH
+ // successor of BB1. This captures the iteration over the successors of BB1
+ // for this purpose. (In reverse order; we're done when this field is 0).
+ int m_remainingRegSuccs;
+
+ // Requires that "m_curTry" is NULL. Determines whether there is, as
+ // discussed just above, a regular successor that's the first block of a
+ // try; if so, sets "m_curTry" to that try block. (As noted above, selecting
+ // the try containing the current regular successor as the "current try" may cause
+ // multiple first-blocks of catches to be yielded as EH successors: trys enclosing
+ // the current try are also included if they also start with the current EH successor.)
+ void FindNextRegSuccTry();
+
+public:
+ // Returns the standard "end" iterator.
+ EHSuccessorIter()
+ : m_comp(nullptr), m_block(nullptr), m_curRegSucc(nullptr), m_curTry(nullptr), m_remainingRegSuccs(0)
+ {
+ }
+
+ // Initializes the iterator to represent the EH successors of "block".
+ EHSuccessorIter(Compiler* comp, BasicBlock* block);
+
+ // Go on to the next EH successor.
+ void operator++(void);
+
+ // Requires that "this" is not equal to the standard "end" iterator. Returns the
+ // current EH successor.
+ BasicBlock* operator*();
+
+ // Returns "true" iff "*this" is equal to "ehsi" -- ignoring the "m_comp"
+ // and "m_block" fields.
+ bool operator==(const EHSuccessorIter& ehsi)
+ {
+ // Ignore the compiler; we'll assume that's the same.
+ return m_curTry == ehsi.m_curTry && m_remainingRegSuccs == ehsi.m_remainingRegSuccs;
+ }
+
+ bool operator!=(const EHSuccessorIter& ehsi)
+ {
+ return !((*this) == ehsi);
+ }
+};
+
+// Yields both normal and EH successors (in that order) in one iteration.
+class AllSuccessorIter
+{
+ // Normal succ state.
+ Compiler* m_comp;
+ BasicBlock* m_blk;
+ unsigned m_normSucc;
+ unsigned m_numNormSuccs;
+ EHSuccessorIter m_ehIter;
+
+ // True iff m_blk is a BBJ_CALLFINALLY block, and the current try block of m_ehIter,
+ // the first block of whose handler would be next yielded, is the jump target of m_blk.
+ inline bool CurTryIsBlkCallFinallyTarget();
+
+public:
+ inline AllSuccessorIter()
+ {
+ }
+
+ // Initializes "this" to iterate over all successors of "block."
+ inline AllSuccessorIter(Compiler* comp, BasicBlock* block);
+
+ // Used for constructing an appropriate "end" iter. Should be called with
+ // the number of normal successors of the block being iterated.
+ AllSuccessorIter(unsigned numSuccs) : m_normSucc(numSuccs), m_numNormSuccs(numSuccs), m_ehIter()
+ {
+ }
+
+ // Go on to the next successor.
+ inline void operator++(void);
+
+ // Requires that "this" is not equal to the standard "end" iterator. Returns the
+ // current successor.
+ inline BasicBlock* operator*();
+
+ // Returns "true" iff "*this" is equal to "asi" -- ignoring the "m_comp"
+ // and "m_block" fields.
+ bool operator==(const AllSuccessorIter& asi)
+ {
+ return m_normSucc == asi.m_normSucc && m_ehIter == asi.m_ehIter;
+ }
+
+ bool operator!=(const AllSuccessorIter& asi)
+ {
+ return !((*this) == asi);
+ }
+};
+
+//------------------------------------------------------------------------
+// BasicBlock: describes a basic block in the flowgraph.
+//
+// Note that this type derives from LIR::Range in order to make the LIR
+// utilities that are polymorphic over basic block and scratch ranges
+// faster and simpler.
+//
+struct BasicBlock : private LIR::Range
+{
+ friend class LIR;
+
+ BasicBlock* bbNext; // next BB in ascending PC offset order
+ BasicBlock* bbPrev;
+
+ void setNext(BasicBlock* next)
+ {
+ bbNext = next;
+ if (next)
+ {
+ next->bbPrev = this;
+ }
+ }
+
+ unsigned bbNum; // the block's number
+
+ unsigned bbPostOrderNum; // the block's post order number in the graph.
+ unsigned bbRefs; // number of blocks that can reach here, either by fall-through or a branch. If this falls to zero,
+ // the block is unreachable.
+
+ unsigned bbFlags; // see BBF_xxxx below
+
+#define BBF_VISITED 0x00000001 // BB visited during optimizations
+#define BBF_MARKED 0x00000002 // BB marked during optimizations
+#define BBF_CHANGED 0x00000004 // input/output of this block has changed
+#define BBF_REMOVED 0x00000008 // BB has been removed from bb-list
+
+#define BBF_DONT_REMOVE 0x00000010 // BB should not be removed during flow graph optimizations
+#define BBF_IMPORTED 0x00000020 // BB byte-code has been imported
+#define BBF_INTERNAL 0x00000040 // BB has been added by the compiler
+#define BBF_FAILED_VERIFICATION 0x00000080 // BB has verification exception
+
+#define BBF_TRY_BEG 0x00000100 // BB starts a 'try' block
+#define BBF_FUNCLET_BEG 0x00000200 // BB is the beginning of a funclet
+#define BBF_HAS_NULLCHECK 0x00000400 // BB contains a null check
+#define BBF_NEEDS_GCPOLL 0x00000800 // This BB is the source of a back edge and needs a GC Poll
+
+#define BBF_RUN_RARELY 0x00001000 // BB is rarely run (catch clauses, blocks with throws etc)
+#define BBF_LOOP_HEAD 0x00002000 // BB is the head of a loop
+#define BBF_LOOP_CALL0 0x00004000 // BB starts a loop that sometimes won't call
+#define BBF_LOOP_CALL1 0x00008000 // BB starts a loop that will always call
+
+#define BBF_HAS_LABEL 0x00010000 // BB needs a label
+#define BBF_JMP_TARGET 0x00020000 // BB is a target of an implicit/explicit jump
+#define BBF_HAS_JMP 0x00040000 // BB executes a JMP instruction (instead of return)
+#define BBF_GC_SAFE_POINT 0x00080000 // BB has a GC safe point (a call). More abstractly, BB does not
+ // require a (further) poll -- this may be because this BB has a
+ // call, or, in some cases, because the BB occurs in a loop, and
+ // we've determined that all paths in the loop body leading to BB
+ // include a call.
+#define BBF_HAS_VTABREF 0x00100000 // BB contains reference of vtable
+#define BBF_HAS_IDX_LEN 0x00200000 // BB contains simple index or length expressions on an array local var.
+#define BBF_HAS_NEWARRAY 0x00400000 // BB contains 'new' of an array
+#define BBF_HAS_NEWOBJ 0x00800000 // BB contains 'new' of an object type.
+
+#if FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
+#define BBF_FINALLY_TARGET 0x01000000 // BB is the target of a finally return: where a finally will return during
+ // non-exceptional flow. Because the ARM calling sequence for calling a
+ // finally explicitly sets the return address to the finally target and jumps
+ // to the finally, instead of using a call instruction, ARM needs this to
+ // generate correct code at the finally target, to allow for proper stack
+ // unwind from within a non-exceptional call to a finally.
+#endif // FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
+#define BBF_BACKWARD_JUMP 0x02000000 // BB is surrounded by a backward jump/switch arc
+#define BBF_RETLESS_CALL 0x04000000 // BBJ_CALLFINALLY that will never return (and therefore, won't need a paired
+ // BBJ_ALWAYS); see isBBCallAlwaysPair().
+#define BBF_LOOP_PREHEADER 0x08000000 // BB is a loop preheader block
+
+#define BBF_COLD 0x10000000 // BB is cold
+#define BBF_PROF_WEIGHT 0x20000000 // BB weight is computed from profile data
+#ifdef LEGACY_BACKEND
+#define BBF_FORWARD_SWITCH 0x40000000 // Aux flag used in FP codegen to know if a jmptable entry has been forwarded
+#else // !LEGACY_BACKEND
+#define BBF_IS_LIR 0x40000000 // Set if the basic block contains LIR (as opposed to HIR)
+#endif // LEGACY_BACKEND
+#define BBF_KEEP_BBJ_ALWAYS 0x80000000 // A special BBJ_ALWAYS block, used by EH code generation. Keep the jump kind
+ // as BBJ_ALWAYS. Used for the paired BBJ_ALWAYS block following the
+ // BBJ_CALLFINALLY block, as well as, on x86, the final step block out of a
+ // finally.
+
+ bool isRunRarely()
+ {
+ return ((bbFlags & BBF_RUN_RARELY) != 0);
+ }
+ bool isLoopHead()
+ {
+ return ((bbFlags & BBF_LOOP_HEAD) != 0);
+ }
+
+// Flags to update when two blocks are compacted
+
+#define BBF_COMPACT_UPD \
+ (BBF_CHANGED | BBF_GC_SAFE_POINT | BBF_HAS_JMP | BBF_NEEDS_GCPOLL | BBF_HAS_IDX_LEN | BBF_BACKWARD_JUMP | \
+ BBF_HAS_NEWARRAY | BBF_HAS_NEWOBJ)
+
+// Flags a block should not have had before it is split.
+
+#ifdef LEGACY_BACKEND
+#define BBF_SPLIT_NONEXIST \
+ (BBF_CHANGED | BBF_LOOP_HEAD | BBF_LOOP_CALL0 | BBF_LOOP_CALL1 | BBF_RETLESS_CALL | BBF_LOOP_PREHEADER | \
+ BBF_COLD | BBF_FORWARD_SWITCH)
+#else // !LEGACY_BACKEND
+#define BBF_SPLIT_NONEXIST \
+ (BBF_CHANGED | BBF_LOOP_HEAD | BBF_LOOP_CALL0 | BBF_LOOP_CALL1 | BBF_RETLESS_CALL | BBF_LOOP_PREHEADER | BBF_COLD)
+#endif // LEGACY_BACKEND
+
+// Flags lost by the top block when a block is split.
+// Note, this is a conservative guess.
+// For example, the top block might or might not have BBF_GC_SAFE_POINT,
+// but we assume it does not have BBF_GC_SAFE_POINT any more.
+
+#define BBF_SPLIT_LOST (BBF_GC_SAFE_POINT | BBF_HAS_JMP | BBF_KEEP_BBJ_ALWAYS)
+
+// Flags gained by the bottom block when a block is split.
+// Note, this is a conservative guess.
+// For example, the bottom block might or might not have BBF_HAS_NEWARRAY,
+// but we assume it has BBF_HAS_NEWARRAY.
+
+// TODO: Should BBF_RUN_RARELY be added to BBF_SPLIT_GAINED ?
+
+#define BBF_SPLIT_GAINED \
+ (BBF_DONT_REMOVE | BBF_HAS_LABEL | BBF_HAS_JMP | BBF_BACKWARD_JUMP | BBF_HAS_IDX_LEN | BBF_HAS_NEWARRAY | \
+ BBF_PROF_WEIGHT | BBF_HAS_NEWOBJ | BBF_KEEP_BBJ_ALWAYS)
+
+#ifndef __GNUC__ // GCC doesn't like C_ASSERT at global scope
+ static_assert_no_msg((BBF_SPLIT_NONEXIST & BBF_SPLIT_LOST) == 0);
+ static_assert_no_msg((BBF_SPLIT_NONEXIST & BBF_SPLIT_GAINED) == 0);
+#endif
+
+#ifdef DEBUG
+ void dspFlags(); // Print the flags
+ unsigned dspCheapPreds(); // Print the predecessors (bbCheapPreds)
+ unsigned dspPreds(); // Print the predecessors (bbPreds)
+ unsigned dspSuccs(Compiler* compiler); // Print the successors. The 'compiler' argument determines whether EH
+ // regions are printed: see NumSucc() for details.
+ void dspJumpKind(); // Print the block jump kind (e.g., BBJ_NONE, BBJ_COND, etc.).
+ void dspBlockHeader(Compiler* compiler,
+ bool showKind = true,
+ bool showFlags = false,
+ bool showPreds = true); // Print a simple basic block header for various output, including a
+ // list of predecessors and successors.
+#endif // DEBUG
+
+ typedef unsigned weight_t; // Type used to hold block and edge weights
+ // Note that for CLR v2.0 and earlier our
+ // block weights were stored using unsigned shorts
+
+#define BB_UNITY_WEIGHT 100 // how much a normal execute once block weights
+#define BB_LOOP_WEIGHT 8 // how much more loops are weighted
+#define BB_ZERO_WEIGHT 0
+#define BB_MAX_WEIGHT ULONG_MAX // we're using an 'unsigned' for the weight
+#define BB_VERY_HOT_WEIGHT 256 // how many average hits a BB has (per BBT scenario run) for this block
+ // to be considered as very hot
+
+ weight_t bbWeight; // The dynamic execution weight of this block
+
+ // getBBWeight -- get the normalized weight of this block
+ unsigned getBBWeight(Compiler* comp);
+
+ // setBBWeight -- if the block weight is not derived from a profile, then set the weight to the input
+ // weight, but make sure to not overflow BB_MAX_WEIGHT
+ void setBBWeight(unsigned weight)
+ {
+ if (!(this->bbFlags & BBF_PROF_WEIGHT))
+ {
+ this->bbWeight = min(weight, BB_MAX_WEIGHT);
+ }
+ }
+
+ // modifyBBWeight -- same as setBBWeight, but also make sure that if the block is rarely run, it stays that
+ // way, and if it's not rarely run then its weight never drops below 1.
+ void modifyBBWeight(unsigned weight)
+ {
+ if (this->bbWeight != BB_ZERO_WEIGHT)
+ {
+ setBBWeight(max(weight, 1));
+ }
+ }
+
+ // setBBProfileWeight -- Set the profile-derived weight for a basic block
+ void setBBProfileWeight(unsigned weight)
+ {
+ this->bbFlags |= BBF_PROF_WEIGHT;
+ // Check if the multiplication by BB_UNITY_WEIGHT will overflow.
+ this->bbWeight = (weight <= BB_MAX_WEIGHT / BB_UNITY_WEIGHT) ? weight * BB_UNITY_WEIGHT : BB_MAX_WEIGHT;
+ }
+
+ // this block will inherit the same weight and relevant bbFlags as bSrc
+ void inheritWeight(BasicBlock* bSrc)
+ {
+ this->bbWeight = bSrc->bbWeight;
+
+ if (bSrc->bbFlags & BBF_PROF_WEIGHT)
+ {
+ this->bbFlags |= BBF_PROF_WEIGHT;
+ }
+ else
+ {
+ this->bbFlags &= ~BBF_PROF_WEIGHT;
+ }
+
+ if (this->bbWeight == 0)
+ {
+ this->bbFlags |= BBF_RUN_RARELY;
+ }
+ else
+ {
+ this->bbFlags &= ~BBF_RUN_RARELY;
+ }
+ }
+
+ // Similar to inheritWeight(), but we're splitting a block (such as creating blocks for qmark removal).
+ // So, specify a percentage (0 to 99; if it's 100, just use inheritWeight()) of the weight that we're
+ // going to inherit. Since the number isn't exact, clear the BBF_PROF_WEIGHT flag.
+ void inheritWeightPercentage(BasicBlock* bSrc, unsigned percentage)
+ {
+ assert(0 <= percentage && percentage < 100);
+
+ // Check for overflow
+ if (bSrc->bbWeight * 100 <= bSrc->bbWeight)
+ {
+ this->bbWeight = bSrc->bbWeight;
+ }
+ else
+ {
+ this->bbWeight = bSrc->bbWeight * percentage / 100;
+ }
+
+ this->bbFlags &= ~BBF_PROF_WEIGHT;
+
+ if (this->bbWeight == 0)
+ {
+ this->bbFlags |= BBF_RUN_RARELY;
+ }
+ else
+ {
+ this->bbFlags &= ~BBF_RUN_RARELY;
+ }
+ }
+
+ // makeBlockHot()
+ // This is used to override any profiling data
+ // and force a block to be in the hot region.
+ // We only call this method for handler entry point
+ // and only when HANDLER_ENTRY_MUST_BE_IN_HOT_SECTION is 1.
+ // Doing this helps fgReorderBlocks() by telling
+ // it to try to move these blocks into the hot region.
+ // Note that we do this strictly as an optimization,
+ // not for correctness. fgDetermineFirstColdBlock()
+ // will find all handler entry points and ensure that
+ // for now we don't place them in the cold section.
+ //
+ void makeBlockHot()
+ {
+ if (this->bbWeight == BB_ZERO_WEIGHT)
+ {
+ this->bbFlags &= ~BBF_RUN_RARELY; // Clear any RarelyRun flag
+ this->bbFlags &= ~BBF_PROF_WEIGHT; // Clear any profile-derived flag
+ this->bbWeight = 1;
+ }
+ }
+
+ bool isMaxBBWeight()
+ {
+ return (bbWeight == BB_MAX_WEIGHT);
+ }
+
+ // Returns "true" if the block is empty. Empty here means there are no statement
+ // trees *except* PHI definitions.
+ bool isEmpty();
+
+ // Returns "true" iff "this" is the first block of a BBJ_CALLFINALLY/BBJ_ALWAYS pair --
+ // a block corresponding to an exit from the try of a try/finally. In the flow graph,
+ // this becomes a block that calls the finally, and a second, immediately
+ // following empty block (in the bbNext chain) to which the finally will return, and which
+ // branches unconditionally to the next block to be executed outside the try/finally.
+ // Note that code is often generated differently than this description. For example, on ARM,
+ // the target of the BBJ_ALWAYS is loaded in LR (the return register), and a direct jump is
+ // made to the 'finally'. The effect is that the 'finally' returns directly to the target of
+ // the BBJ_ALWAYS. A "retless" BBJ_CALLFINALLY is one that has no corresponding BBJ_ALWAYS.
+ // This can happen if the finally is known to not return (e.g., it contains a 'throw'). In
+ // that case, the BBJ_CALLFINALLY flags has BBF_RETLESS_CALL set. Note that ARM never has
+ // "retless" BBJ_CALLFINALLY blocks due to a requirement to use the BBJ_ALWAYS for
+ // generating code.
+ bool isBBCallAlwaysPair()
+ {
+#if FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
+ if (this->bbJumpKind == BBJ_CALLFINALLY)
+#else
+ if ((this->bbJumpKind == BBJ_CALLFINALLY) && !(this->bbFlags & BBF_RETLESS_CALL))
+#endif
+ {
+#if FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
+ // On ARM, there are no retless BBJ_CALLFINALLY.
+ assert(!(this->bbFlags & BBF_RETLESS_CALL));
+#endif
+ // Some asserts that the next block is a BBJ_ALWAYS of the proper form.
+ assert(this->bbNext != nullptr);
+ assert(this->bbNext->bbJumpKind == BBJ_ALWAYS);
+ assert(this->bbNext->bbFlags & BBF_KEEP_BBJ_ALWAYS);
+ assert(this->bbNext->isEmpty());
+
+ return true;
+ }
+ else
+ {
+ return false;
+ }
+ }
+
+ BBjumpKinds bbJumpKind; // jump (if any) at the end of this block
+
+ /* The following union describes the jump target(s) of this block */
+ union {
+ unsigned bbJumpOffs; // PC offset (temporary only)
+ BasicBlock* bbJumpDest; // basic block
+ BBswtDesc* bbJumpSwt; // switch descriptor
+ };
+
+ // NumSucc() gives the number of successors, and GetSucc() allows one to iterate over them.
+ //
+ // The behavior of both for blocks that end in BBJ_EHFINALLYRET (a return from a finally or fault block)
+ // depends on whether "comp" is non-null. If it is null, then the block is considered to have no
+ // successor. If it is non-null, we figure out the actual successors. Some cases will want one behavior,
+ // other cases the other. For example, IL verification requires that these blocks end in an empty operand
+ // stack, and since the dataflow analysis of IL verification is concerned only with the contents of the
+ // operand stack, we can consider the finally block to have no successors. But a more general dataflow
+ // analysis that is tracking the contents of local variables might want to consider *all* successors,
+ // and would pass the current Compiler object.
+ //
+ // Similarly, BBJ_EHFILTERRET blocks are assumed to have no successors if "comp" is null; if non-null,
+ // NumSucc/GetSucc yields the first block of the try blocks handler.
+ //
+ // Also, the behavior for switches changes depending on the value of "comp". If it is null, then all
+ // switch successors are returned. If it is non-null, then only unique switch successors are returned;
+ // the duplicate successors are omitted.
+ //
+ // Note that for BBJ_COND, which has two successors (fall through and condition true branch target),
+ // only the unique targets are returned. Thus, if both targets are the same, NumSucc() will only return 1
+ // instead of 2.
+ //
+ // Returns the number of successors of "this".
+ unsigned NumSucc(Compiler* comp = nullptr);
+
+ // Returns the "i"th successor. Requires (0 <= i < NumSucc()).
+ BasicBlock* GetSucc(unsigned i, Compiler* comp = nullptr);
+
+ BasicBlock* GetUniquePred(Compiler* comp);
+
+ BasicBlock* GetUniqueSucc();
+
+ unsigned countOfInEdges() const
+ {
+ return bbRefs;
+ }
+
+ __declspec(property(get = getBBTreeList, put = setBBTreeList)) GenTree* bbTreeList; // the body of the block.
+
+ GenTree* getBBTreeList() const
+ {
+ return m_firstNode;
+ }
+
+ void setBBTreeList(GenTree* tree)
+ {
+ m_firstNode = tree;
+ }
+
+ EntryState* bbEntryState; // verifier tracked state of all entries in stack.
+
+#define NO_BASE_TMP UINT_MAX // base# to use when we have none
+ unsigned bbStkTempsIn; // base# for input stack temps
+ unsigned bbStkTempsOut; // base# for output stack temps
+
+#define MAX_XCPTN_INDEX (USHRT_MAX - 1)
+
+ // It would be nice to make bbTryIndex and bbHndIndex private, but there is still code that uses them directly,
+ // especially Compiler::fgNewBBinRegion() and friends.
+
+ // index, into the compHndBBtab table, of innermost 'try' clause containing the BB (used for raising exceptions).
+ // Stored as index + 1; 0 means "no try index".
+ unsigned short bbTryIndex;
+
+ // index, into the compHndBBtab table, of innermost handler (filter, catch, fault/finally) containing the BB.
+ // Stored as index + 1; 0 means "no handler index".
+ unsigned short bbHndIndex;
+
+ // Given two EH indices that are either bbTryIndex or bbHndIndex (or related), determine if index1 might be more
+ // deeply nested than index2. Both index1 and index2 are in the range [0..compHndBBtabCount], where 0 means
+ // "main function" and otherwise the value is an index into compHndBBtab[]. Note that "sibling" EH regions will
+ // have a numeric index relationship that doesn't indicate nesting, whereas a more deeply nested region must have
+ // a lower index than the region it is nested within. Note that if you compare a single block's bbTryIndex and
+ // bbHndIndex, there is guaranteed to be a nesting relationship, since that block can't be simultaneously in two
+ // sibling EH regions. In that case, "maybe" is actually "definitely".
+ static bool ehIndexMaybeMoreNested(unsigned index1, unsigned index2)
+ {
+ if (index1 == 0)
+ {
+ // index1 is in the main method. It can't be more deeply nested than index2.
+ return false;
+ }
+ else if (index2 == 0)
+ {
+ // index1 represents an EH region, whereas index2 is the main method. Thus, index1 is more deeply nested.
+ assert(index1 > 0);
+ return true;
+ }
+ else
+ {
+ // If index1 has a smaller index, it might be more deeply nested than index2.
+ assert(index1 > 0);
+ assert(index2 > 0);
+ return index1 < index2;
+ }
+ }
+
+ // catch type: class token of handler, or one of BBCT_*. Only set on first block of catch handler.
+ unsigned bbCatchTyp;
+
+ bool hasTryIndex() const
+ {
+ return bbTryIndex != 0;
+ }
+ bool hasHndIndex() const
+ {
+ return bbHndIndex != 0;
+ }
+ unsigned getTryIndex() const
+ {
+ assert(bbTryIndex != 0);
+ return bbTryIndex - 1;
+ }
+ unsigned getHndIndex() const
+ {
+ assert(bbHndIndex != 0);
+ return bbHndIndex - 1;
+ }
+ void setTryIndex(unsigned val)
+ {
+ bbTryIndex = (unsigned short)(val + 1);
+ assert(bbTryIndex != 0);
+ }
+ void setHndIndex(unsigned val)
+ {
+ bbHndIndex = (unsigned short)(val + 1);
+ assert(bbHndIndex != 0);
+ }
+ void clearTryIndex()
+ {
+ bbTryIndex = 0;
+ }
+ void clearHndIndex()
+ {
+ bbHndIndex = 0;
+ }
+
+ void copyEHRegion(const BasicBlock* from)
+ {
+ bbTryIndex = from->bbTryIndex;
+ bbHndIndex = from->bbHndIndex;
+ }
+
+ static bool sameTryRegion(const BasicBlock* blk1, const BasicBlock* blk2)
+ {
+ return blk1->bbTryIndex == blk2->bbTryIndex;
+ }
+ static bool sameHndRegion(const BasicBlock* blk1, const BasicBlock* blk2)
+ {
+ return blk1->bbHndIndex == blk2->bbHndIndex;
+ }
+ static bool sameEHRegion(const BasicBlock* blk1, const BasicBlock* blk2)
+ {
+ return sameTryRegion(blk1, blk2) && sameHndRegion(blk1, blk2);
+ }
+
+// Some non-zero value that will not collide with real tokens for bbCatchTyp
+#define BBCT_NONE 0x00000000
+#define BBCT_FAULT 0xFFFFFFFC
+#define BBCT_FINALLY 0xFFFFFFFD
+#define BBCT_FILTER 0xFFFFFFFE
+#define BBCT_FILTER_HANDLER 0xFFFFFFFF
+#define handlerGetsXcptnObj(hndTyp) ((hndTyp) != BBCT_NONE && (hndTyp) != BBCT_FAULT && (hndTyp) != BBCT_FINALLY)
+
+ // TODO-Cleanup: Get rid of bbStkDepth and use bbStackDepthOnEntry() instead
+ union {
+ unsigned short bbStkDepth; // stack depth on entry
+ unsigned short bbFPinVars; // number of inner enregistered FP vars
+ };
+
+ // Basic block predecessor lists. Early in compilation, some phases might need to compute "cheap" predecessor
+ // lists. These are stored in bbCheapPreds, computed by fgComputeCheapPreds(). If bbCheapPreds is valid,
+ // 'fgCheapPredsValid' will be 'true'. Later, the "full" predecessor lists are created by fgComputePreds(), stored
+ // in 'bbPreds', and then maintained throughout compilation. 'fgComputePredsDone' will be 'true' after the
+ // full predecessor lists are created. See the comment at fgComputeCheapPreds() to see how those differ from
+ // the "full" variant.
+ union {
+ BasicBlockList* bbCheapPreds; // ptr to list of cheap predecessors (used before normal preds are computed)
+ flowList* bbPreds; // ptr to list of predecessors
+ };
+
+ BlockSet bbReach; // Set of all blocks that can reach this one
+ BasicBlock* bbIDom; // Represent the closest dominator to this block (called the Immediate
+ // Dominator) used to compute the dominance tree.
+ unsigned bbDfsNum; // The index of this block in DFS reverse post order
+ // relative to the flow graph.
+
+#if ASSERTION_PROP
+ // A set of blocks which dominate this one *except* the normal entry block. This is lazily initialized
+ // and used only by Assertion Prop, intersected with fgEnterBlks!
+ BlockSet bbDoms;
+#endif
+
+ IL_OFFSET bbCodeOffs; // IL offset of the beginning of the block
+ IL_OFFSET bbCodeOffsEnd; // IL offset past the end of the block. Thus, the [bbCodeOffs..bbCodeOffsEnd)
+ // range is not inclusive of the end offset. The count of IL bytes in the block
+ // is bbCodeOffsEnd - bbCodeOffs, assuming neither are BAD_IL_OFFSET.
+
+#ifdef DEBUG
+ void dspBlockILRange(); // Display the block's IL range as [XXX...YYY), where XXX and YYY might be "???" for
+ // BAD_IL_OFFSET.
+#endif // DEBUG
+
+ VARSET_TP bbVarUse; // variables used by block (before an assignment)
+ VARSET_TP bbVarDef; // variables assigned by block (before a use)
+ VARSET_TP bbVarTmp; // TEMP: only used by FP enregistering code!
+
+ VARSET_TP bbLiveIn; // variables live on entry
+ VARSET_TP bbLiveOut; // variables live on exit
+
+ // Use, def, live in/out information for the implicit "Heap" variable.
+ unsigned bbHeapUse : 1;
+ unsigned bbHeapDef : 1;
+ unsigned bbHeapLiveIn : 1;
+ unsigned bbHeapLiveOut : 1;
+ unsigned bbHeapHavoc : 1; // If true, at some point the block does an operation that leaves the heap
+ // in an unknown state. (E.g., unanalyzed call, store through unknown
+ // pointer...)
+
+ // We want to make phi functions for the special implicit var "Heap". But since this is not a real
+ // lclVar, and thus has no local #, we can't use a GenTreePhiArg. Instead, we use this struct.
+ struct HeapPhiArg
+ {
+ bool m_isSsaNum; // If true, the phi arg is an SSA # for an internal try block heap state, being
+ // added to the phi of a catch block. If false, it's a pred block.
+ union {
+ BasicBlock* m_predBB; // Predecessor block from which the SSA # flows.
+ unsigned m_ssaNum; // SSA# for internal block heap state.
+ };
+ HeapPhiArg* m_nextArg; // Next arg in the list, else NULL.
+
+ unsigned GetSsaNum()
+ {
+ if (m_isSsaNum)
+ {
+ return m_ssaNum;
+ }
+ else
+ {
+ assert(m_predBB != nullptr);
+ return m_predBB->bbHeapSsaNumOut;
+ }
+ }
+
+ HeapPhiArg(BasicBlock* predBB, HeapPhiArg* nextArg = nullptr)
+ : m_isSsaNum(false), m_predBB(predBB), m_nextArg(nextArg)
+ {
+ }
+ HeapPhiArg(unsigned ssaNum, HeapPhiArg* nextArg = nullptr)
+ : m_isSsaNum(true), m_ssaNum(ssaNum), m_nextArg(nextArg)
+ {
+ }
+
+ void* operator new(size_t sz, class Compiler* comp);
+ };
+ static HeapPhiArg* EmptyHeapPhiDef; // Special value (0x1, FWIW) to represent a to-be-filled in Phi arg list
+ // for Heap.
+ HeapPhiArg* bbHeapSsaPhiFunc; // If the "in" Heap SSA var is not a phi definition, this value is NULL.
+ // Otherwise, it is either the special value EmptyHeapPhiDefn, to indicate
+ // that Heap needs a phi definition on entry, or else it is the linked list
+ // of the phi arguments.
+ unsigned bbHeapSsaNumIn; // The SSA # of "Heap" on entry to the block.
+ unsigned bbHeapSsaNumOut; // The SSA # of "Heap" on exit from the block.
+
+#ifdef DEBUGGING_SUPPORT
+ VARSET_TP bbScope; // variables in scope over the block
+#endif
+
+ void InitVarSets(class Compiler* comp);
+
+ /* The following are the standard bit sets for dataflow analysis.
+ * We perform CSE and range-checks at the same time
+ * and assertion propagation separately,
+ * thus we can union them since the two operations are completely disjunct.
+ */
+
+ union {
+ EXPSET_TP bbCseGen; // CSEs computed by block
+#if ASSERTION_PROP
+ ASSERT_TP bbAssertionGen; // value assignments computed by block
+#endif
+ };
+
+ union {
+#if ASSERTION_PROP
+ ASSERT_TP bbAssertionKill; // value assignments killed by block
+#endif
+ };
+
+ union {
+ EXPSET_TP bbCseIn; // CSEs available on entry
+#if ASSERTION_PROP
+ ASSERT_TP bbAssertionIn; // value assignments available on entry
+#endif
+ };
+
+ union {
+ EXPSET_TP bbCseOut; // CSEs available on exit
+#if ASSERTION_PROP
+ ASSERT_TP bbAssertionOut; // value assignments available on exit
+#endif
+ };
+
+ void* bbEmitCookie;
+
+#if FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
+ void* bbUnwindNopEmitCookie;
+#endif // FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
+
+#ifdef VERIFIER
+ stackDesc bbStackIn; // stack descriptor for input
+ stackDesc bbStackOut; // stack descriptor for output
+
+ verTypeVal* bbTypesIn; // list of variable types on input
+ verTypeVal* bbTypesOut; // list of variable types on output
+#endif // VERIFIER
+
+#if FEATURE_STACK_FP_X87
+ FlatFPStateX87* bbFPStateX87; // State of FP stack on entry to the basic block
+#endif // FEATURE_STACK_FP_X87
+
+ /* The following fields used for loop detection */
+
+ typedef unsigned char loopNumber;
+ static const unsigned NOT_IN_LOOP = UCHAR_MAX;
+
+#ifdef DEBUG
+ // This is the label a loop gets as part of the second, reachability-based
+ // loop discovery mechanism. This is apparently only used for debugging.
+ // We hope we'll eventually just have one loop-discovery mechanism, and this will go away.
+ loopNumber bbLoopNum; // set to 'n' for a loop #n header
+#endif // DEBUG
+
+ loopNumber bbNatLoopNum; // Index, in optLoopTable, of most-nested loop that contains this block,
+ // or else NOT_IN_LOOP if this block is not in a loop.
+
+#define MAX_LOOP_NUM 16 // we're using a 'short' for the mask
+#define LOOP_MASK_TP unsigned // must be big enough for a mask
+
+//-------------------------------------------------------------------------
+
+#if MEASURE_BLOCK_SIZE
+ static size_t s_Size;
+ static size_t s_Count;
+#endif // MEASURE_BLOCK_SIZE
+
+ bool bbFallsThrough();
+
+ // Our slop fraction is 1/128 of the block weight rounded off
+ static weight_t GetSlopFraction(weight_t weightBlk)
+ {
+ return ((weightBlk + 64) / 128);
+ }
+
+ // Given an the edge b1 -> b2, calculate the slop fraction by
+ // using the higher of the two block weights
+ static weight_t GetSlopFraction(BasicBlock* b1, BasicBlock* b2)
+ {
+ return GetSlopFraction(max(b1->bbWeight, b2->bbWeight));
+ }
+
+#ifdef DEBUG
+ unsigned bbTgtStkDepth; // Native stack depth on entry (for throw-blocks)
+ static unsigned s_nMaxTrees; // The max # of tree nodes in any BB
+
+ unsigned bbStmtNum; // The statement number of the first stmt in this block
+
+ // This is used in integrity checks. We semi-randomly pick a traversal stamp, label all blocks
+ // in the BB list with that stamp (in this field); then we can tell if (e.g.) predecessors are
+ // still in the BB list by whether they have the same stamp (with high probability).
+ unsigned bbTraversalStamp;
+#endif // DEBUG
+
+ ThisInitState bbThisOnEntry();
+ unsigned bbStackDepthOnEntry();
+ void bbSetStack(void* stackBuffer);
+ StackEntry* bbStackOnEntry();
+ void bbSetRunRarely();
+
+ // "bbNum" is one-based (for unknown reasons); it is sometimes useful to have the corresponding
+ // zero-based number for use as an array index.
+ unsigned bbInd()
+ {
+ assert(bbNum > 0);
+ return bbNum - 1;
+ }
+
+ GenTreeStmt* firstStmt();
+ GenTreeStmt* lastStmt();
+ GenTreeStmt* lastTopLevelStmt();
+
+ GenTree* firstNode();
+ GenTree* lastNode();
+
+ bool containsStatement(GenTree* statement);
+
+ bool endsWithJmpMethod(Compiler* comp);
+
+ bool endsWithTailCall(Compiler* comp,
+ bool fastTailCallsOnly,
+ bool tailCallsConvertibleToLoopOnly,
+ GenTree** tailCall);
+
+ bool endsWithTailCallOrJmp(Compiler* comp, bool fastTailCallsOnly = false);
+
+ bool endsWithTailCallConvertibleToLoop(Compiler* comp, GenTree** tailCall);
+
+ // Returns the first statement in the statement list of "this" that is
+ // not an SSA definition (a lcl = phi(...) assignment).
+ GenTreeStmt* FirstNonPhiDef();
+ GenTree* FirstNonPhiDefOrCatchArgAsg();
+
+ BasicBlock()
+ :
+#if ASSERTION_PROP
+ BLOCKSET_INIT_NOCOPY(bbDoms, BlockSetOps::UninitVal())
+ ,
+#endif // ASSERTION_PROP
+ VARSET_INIT_NOCOPY(bbLiveIn, VarSetOps::UninitVal())
+ , VARSET_INIT_NOCOPY(bbLiveOut, VarSetOps::UninitVal())
+ {
+ }
+
+private:
+ EHSuccessorIter StartEHSuccs(Compiler* comp)
+ {
+ return EHSuccessorIter(comp, this);
+ }
+ EHSuccessorIter EndEHSuccs()
+ {
+ return EHSuccessorIter();
+ }
+
+ friend struct EHSuccs;
+
+ AllSuccessorIter StartAllSuccs(Compiler* comp)
+ {
+ return AllSuccessorIter(comp, this);
+ }
+ AllSuccessorIter EndAllSuccs(Compiler* comp)
+ {
+ return AllSuccessorIter(NumSucc(comp));
+ }
+
+ friend struct AllSuccs;
+
+public:
+ // Iteratable collection of the EH successors of a block.
+ class EHSuccs
+ {
+ Compiler* m_comp;
+ BasicBlock* m_block;
+
+ public:
+ EHSuccs(Compiler* comp, BasicBlock* block) : m_comp(comp), m_block(block)
+ {
+ }
+
+ EHSuccessorIter begin()
+ {
+ return m_block->StartEHSuccs(m_comp);
+ }
+ EHSuccessorIter end()
+ {
+ return EHSuccessorIter();
+ }
+ };
+
+ EHSuccs GetEHSuccs(Compiler* comp)
+ {
+ return EHSuccs(comp, this);
+ }
+
+ class AllSuccs
+ {
+ Compiler* m_comp;
+ BasicBlock* m_block;
+
+ public:
+ AllSuccs(Compiler* comp, BasicBlock* block) : m_comp(comp), m_block(block)
+ {
+ }
+
+ AllSuccessorIter begin()
+ {
+ return m_block->StartAllSuccs(m_comp);
+ }
+ AllSuccessorIter end()
+ {
+ return AllSuccessorIter(m_block->NumSucc(m_comp));
+ }
+ };
+
+ AllSuccs GetAllSuccs(Compiler* comp)
+ {
+ return AllSuccs(comp, this);
+ }
+
+ // Clone block state and statements from 'from' block to 'to' block.
+ // Assumes that "to" is an empty block.
+ static void CloneBlockState(Compiler* compiler, BasicBlock* to, const BasicBlock* from);
+
+ void MakeLIR(GenTree* firstNode, GenTree* lastNode);
+ bool IsLIR();
+};
+
+template <>
+struct PtrKeyFuncs<BasicBlock> : public KeyFuncsDefEquals<const BasicBlock*>
+{
+public:
+ // Make sure hashing is deterministic and not on "ptr."
+ static unsigned GetHashCode(const BasicBlock* ptr);
+};
+
+// A set of blocks.
+typedef SimplerHashTable<BasicBlock*, PtrKeyFuncs<BasicBlock>, bool, JitSimplerHashBehavior> BlkSet;
+
+// A map of block -> set of blocks, can be used as sparse block trees.
+typedef SimplerHashTable<BasicBlock*, PtrKeyFuncs<BasicBlock>, BlkSet*, JitSimplerHashBehavior> BlkToBlkSetMap;
+
+// Map from Block to Block. Used for a variety of purposes.
+typedef SimplerHashTable<BasicBlock*, PtrKeyFuncs<BasicBlock>, BasicBlock*, JitSimplerHashBehavior> BlockToBlockMap;
+
+// In compiler terminology the control flow between two BasicBlocks
+// is typically referred to as an "edge". Most well known are the
+// backward branches for loops, which are often called "back-edges".
+//
+// "struct flowList" is the type that represents our control flow edges.
+// This type is a linked list of zero or more "edges".
+// (The list of zero edges is represented by NULL.)
+// Every BasicBlock has a field called bbPreds of this type. This field
+// represents the list of "edges" that flow into this BasicBlock.
+// The flowList type only stores the BasicBlock* of the source for the
+// control flow edge. The destination block for the control flow edge
+// is implied to be the block which contained the bbPreds field.
+//
+// For a switch branch target there may be multiple "edges" that have
+// the same source block (and destination block). We need to count the
+// number of these edges so that during optimization we will know when
+// we have zero of them. Rather than have extra flowList entries we
+// increment the flDupCount field.
+//
+// When we have Profile weight for the BasicBlocks we can usually compute
+// the number of times each edge was executed by examining the adjacent
+// BasicBlock weights. As we are doing for BasicBlocks, we call the number
+// of times that a control flow edge was executed the "edge weight".
+// In order to compute the edge weights we need to use a bounded range
+// for every edge weight. These two fields, 'flEdgeWeightMin' and 'flEdgeWeightMax'
+// are used to hold a bounded range. Most often these will converge such
+// that both values are the same and that value is the exact edge weight.
+// Sometimes we are left with a rage of possible values between [Min..Max]
+// which represents an inexact edge weight.
+//
+// The bbPreds list is initially created by Compiler::fgComputePreds()
+// and is incrementally kept up to date.
+//
+// The edge weight are computed by Compiler::fgComputeEdgeWeights()
+// the edge weights are used to straighten conditional branches
+// by Compiler::fgReorderBlocks()
+//
+// We have a simpler struct, BasicBlockList, which is simply a singly-linked
+// list of blocks. This is used for various purposes, but one is as a "cheap"
+// predecessor list, computed by fgComputeCheapPreds(), and stored as a list
+// on BasicBlock pointed to by bbCheapPreds.
+
+struct BasicBlockList
+{
+ BasicBlockList* next; // The next BasicBlock in the list, nullptr for end of list.
+ BasicBlock* block; // The BasicBlock of interest.
+
+ BasicBlockList() : next(nullptr), block(nullptr)
+ {
+ }
+
+ BasicBlockList(BasicBlock* blk, BasicBlockList* rest) : next(rest), block(blk)
+ {
+ }
+};
+
+struct flowList
+{
+ flowList* flNext; // The next BasicBlock in the list, nullptr for end of list.
+ BasicBlock* flBlock; // The BasicBlock of interest.
+
+ BasicBlock::weight_t flEdgeWeightMin;
+ BasicBlock::weight_t flEdgeWeightMax;
+
+ unsigned flDupCount; // The count of duplicate "edges" (use only for switch stmts)
+
+ // These two methods are used to set new values for flEdgeWeightMin and flEdgeWeightMax
+ // they are used only during the computation of the edge weights
+ // They return false if the newWeight is not between the current [min..max]
+ // when slop is non-zero we allow for the case where our weights might be off by 'slop'
+ //
+ bool setEdgeWeightMinChecked(BasicBlock::weight_t newWeight, BasicBlock::weight_t slop, bool* wbUsedSlop);
+ bool setEdgeWeightMaxChecked(BasicBlock::weight_t newWeight, BasicBlock::weight_t slop, bool* wbUsedSlop);
+
+ flowList() : flNext(nullptr), flBlock(nullptr), flEdgeWeightMin(0), flEdgeWeightMax(0), flDupCount(0)
+ {
+ }
+
+ flowList(BasicBlock* blk, flowList* rest)
+ : flNext(rest), flBlock(blk), flEdgeWeightMin(0), flEdgeWeightMax(0), flDupCount(0)
+ {
+ }
+};
+
+// This enum represents a pre/post-visit action state to emulate a depth-first
+// spanning tree traversal of a tree or graph.
+enum DfsStackState
+{
+ DSS_Invalid, // The initialized, invalid error state
+ DSS_Pre, // The DFS pre-order (first visit) traversal state
+ DSS_Post // The DFS post-order (last visit) traversal state
+};
+
+// These structs represents an entry in a stack used to emulate a non-recursive
+// depth-first spanning tree traversal of a graph. The entry contains either a
+// block pointer or a block number depending on which is more useful.
+struct DfsBlockEntry
+{
+ DfsStackState dfsStackState; // The pre/post traversal action for this entry
+ BasicBlock* dfsBlock; // The corresponding block for the action
+
+ DfsBlockEntry() : dfsStackState(DSS_Invalid), dfsBlock(nullptr)
+ {
+ }
+
+ DfsBlockEntry(DfsStackState state, BasicBlock* basicBlock) : dfsStackState(state), dfsBlock(basicBlock)
+ {
+ }
+};
+
+struct DfsNumEntry
+{
+ DfsStackState dfsStackState; // The pre/post traversal action for this entry
+ unsigned dfsNum; // The corresponding block number for the action
+
+ DfsNumEntry() : dfsStackState(DSS_Invalid), dfsNum(0)
+ {
+ }
+
+ DfsNumEntry(DfsStackState state, unsigned bbNum) : dfsStackState(state), dfsNum(bbNum)
+ {
+ }
+};
+
+/*****************************************************************************/
+
+extern BasicBlock* __cdecl verAllocBasicBlock();
+
+#ifdef DEBUG
+extern void __cdecl verDispBasicBlocks();
+#endif
+
+/*****************************************************************************
+ *
+ * The following call-backs supplied by the client; it's used by the code
+ * emitter to convert a basic block to its corresponding emitter cookie.
+ */
+
+void* emitCodeGetCookie(BasicBlock* block);
+
+AllSuccessorIter::AllSuccessorIter(Compiler* comp, BasicBlock* block)
+ : m_comp(comp), m_blk(block), m_normSucc(0), m_numNormSuccs(block->NumSucc(comp)), m_ehIter(comp, block)
+{
+ if (CurTryIsBlkCallFinallyTarget())
+ {
+ ++m_ehIter;
+ }
+}
+
+bool AllSuccessorIter::CurTryIsBlkCallFinallyTarget()
+{
+ return (m_blk->bbJumpKind == BBJ_CALLFINALLY) && (m_ehIter != EHSuccessorIter()) &&
+ (m_blk->bbJumpDest == (*m_ehIter));
+}
+
+void AllSuccessorIter::operator++(void)
+{
+ if (m_normSucc < m_numNormSuccs)
+ {
+ m_normSucc++;
+ }
+ else
+ {
+ ++m_ehIter;
+
+ // If the original block whose successors we're iterating over
+ // is a BBJ_CALLFINALLY, that finally clause's first block
+ // will be yielded as a normal successor. Don't also yield as
+ // an exceptional successor.
+ if (CurTryIsBlkCallFinallyTarget())
+ {
+ ++m_ehIter;
+ }
+ }
+}
+
+// Requires that "this" is not equal to the standard "end" iterator. Returns the
+// current successor.
+BasicBlock* AllSuccessorIter::operator*()
+{
+ if (m_normSucc < m_numNormSuccs)
+ {
+ return m_blk->GetSucc(m_normSucc, m_comp);
+ }
+ else
+ {
+ return *m_ehIter;
+ }
+}
+/*****************************************************************************/
+#endif // _BLOCK_H_
+/*****************************************************************************/
diff --git a/src/jit/blockset.h b/src/jit/blockset.h
new file mode 100644
index 0000000000..c8e27eabe8
--- /dev/null
+++ b/src/jit/blockset.h
@@ -0,0 +1,77 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+//
+// This include file determines how BlockSet is implemented.
+//
+#ifndef _BLOCKSET_INCLUDED_
+#define _BLOCKSET_INCLUDED_ 1
+
+// A BlockSet is a set of BasicBlocks, represented by the BasicBlock number (bbNum).
+// Unlike VARSET_TP, we only support a single implementation: the bitset "shortlong"
+// implementation.
+//
+// Note that BasicBlocks in the JIT are numbered starting at 1. We always just waste the
+// 0th bit to avoid having to do "bbNum - 1" calculations everywhere (at the BlockSet call
+// sites). This makes reading the code easier, and avoids potential problems of forgetting
+// to do a "- 1" somewhere.
+//
+// Basic blocks can be renumbered during compilation, so it is important to not mix
+// BlockSets created before and after a renumbering. Every time the blocks are renumbered
+// creates a different "epoch", during which the basic block numbers are stable.
+
+#include "bitset.h"
+#include "compilerbitsettraits.h"
+#include "bitsetasshortlong.h"
+
+class BlockSetOps : public BitSetOps</*BitSetType*/ BitSetShortLongRep,
+ /*Brand*/ BSShortLong,
+ /*Env*/ Compiler*,
+ /*BitSetTraits*/ BasicBlockBitSetTraits>
+{
+public:
+ // Specialize BlockSetOps::MakeFull(). Since we number basic blocks from one, we remove bit zero from
+ // the block set. Otherwise, IsEmpty() would never return true.
+ static BitSetShortLongRep MakeFull(Compiler* env)
+ {
+ BitSetShortLongRep retval;
+
+ // First, make a full set using the BitSetOps::MakeFull
+
+ retval = BitSetOps</*BitSetType*/ BitSetShortLongRep,
+ /*Brand*/ BSShortLong,
+ /*Env*/ Compiler*,
+ /*BitSetTraits*/ BasicBlockBitSetTraits>::MakeFull(env);
+
+ // Now, remove element zero, since we number basic blocks starting at one, and index the set with the
+ // basic block number. If we left this, then IsEmpty() would never return true.
+ BlockSetOps::RemoveElemD(env, retval, 0);
+
+ return retval;
+ }
+};
+
+typedef BitSetShortLongRep BlockSet;
+
+// These types should be used as the types for BlockSet arguments and return values, respectively.
+typedef BlockSetOps::ValArgType BlockSet_ValArg_T;
+typedef BlockSetOps::RetValType BlockSet_ValRet_T;
+
+// Initialize "_varName" to "_initVal." Copies contents, not references; if "_varName" is uninitialized, allocates a
+// var set for it (using "_comp" for any necessary allocation), and copies the contents of "_initVal" into it.
+#define BLOCKSET_INIT(_comp, _varName, _initVal) _varName(BlockSetOps::MakeCopy(_comp, _initVal))
+
+// Initializes "_varName" to "_initVal", without copying: if "_initVal" is an indirect representation, copies its
+// pointer into "_varName".
+#define BLOCKSET_INIT_NOCOPY(_varName, _initVal) _varName(_initVal)
+
+// The iterator pattern.
+
+// Use this to initialize an iterator "_iterName" to iterate over a BlockSet "_blockSet".
+// "_blockNum" will be an unsigned variable to which we assign the elements of "_blockSet".
+#define BLOCKSET_ITER_INIT(_comp, _iterName, _blockSet, _blockNum) \
+ unsigned _blockNum = 0; \
+ BlockSetOps::Iter _iterName(_comp, _blockSet)
+
+#endif // _BLOCKSET_INCLUDED_
diff --git a/src/jit/codegen.h b/src/jit/codegen.h
new file mode 100755
index 0000000000..0c4a311186
--- /dev/null
+++ b/src/jit/codegen.h
@@ -0,0 +1,967 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+//
+// This class contains all the data & functionality for code generation
+// of a method, except for the target-specific elements, which are
+// primarily in the Target class.
+//
+
+#ifndef _CODEGEN_H_
+#define _CODEGEN_H_
+#include "compiler.h" // temporary??
+#include "codegeninterface.h"
+#include "regset.h"
+#include "jitgcinfo.h"
+
+#if defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_) || defined(_TARGET_ARM_)
+#define FOREACH_REGISTER_FILE(file) \
+ for ((file) = &(this->intRegState); (file) != NULL; \
+ (file) = ((file) == &(this->intRegState)) ? &(this->floatRegState) : NULL)
+#else
+#define FOREACH_REGISTER_FILE(file) (file) = &(this->intRegState);
+#endif
+
+class CodeGen : public CodeGenInterface
+{
+ friend class emitter;
+ friend class DisAssembler;
+
+public:
+ // This could use further abstraction
+ CodeGen(Compiler* theCompiler);
+
+ virtual void genGenerateCode(void** codePtr, ULONG* nativeSizeOfCode);
+ // TODO-Cleanup: Abstract out the part of this that finds the addressing mode, and
+ // move it to Lower
+ virtual bool genCreateAddrMode(GenTreePtr addr,
+ int mode,
+ bool fold,
+ regMaskTP regMask,
+ bool* revPtr,
+ GenTreePtr* rv1Ptr,
+ GenTreePtr* rv2Ptr,
+#if SCALED_ADDR_MODES
+ unsigned* mulPtr,
+#endif
+ unsigned* cnsPtr,
+ bool nogen = false);
+
+
+private:
+#if defined(_TARGET_XARCH_) && !FEATURE_STACK_FP_X87
+ // Bit masks used in negating a float or double number.
+ // The below gentrees encapsulate the data offset to the bitmasks as GT_CLS_VAR nodes.
+ // This is to avoid creating more than one data constant for these bitmasks when a
+ // method has more than one GT_NEG operation on floating point values.
+ GenTreePtr negBitmaskFlt;
+ GenTreePtr negBitmaskDbl;
+
+ // Bit masks used in computing Math.Abs() of a float or double number.
+ GenTreePtr absBitmaskFlt;
+ GenTreePtr absBitmaskDbl;
+
+ // Bit mask used in U8 -> double conversion to adjust the result.
+ GenTreePtr u8ToDblBitmask;
+
+ // Generates SSE2 code for the given tree as "Operand BitWiseOp BitMask"
+ void genSSE2BitwiseOp(GenTreePtr treeNode);
+#endif // defined(_TARGET_XARCH_) && !FEATURE_STACK_FP_X87
+
+ void genPrepForCompiler();
+
+ void genPrepForEHCodegen();
+
+ inline RegState* regStateForType(var_types t)
+ {
+ return varTypeIsFloating(t) ? &floatRegState : &intRegState;
+ }
+ inline RegState* regStateForReg(regNumber reg)
+ {
+ return genIsValidFloatReg(reg) ? &floatRegState : &intRegState;
+ }
+
+ regNumber genFramePointerReg()
+ {
+ if (isFramePointerUsed())
+ {
+ return REG_FPBASE;
+ }
+ else
+ {
+ return REG_SPBASE;
+ }
+ }
+
+ enum CompareKind
+ {
+ CK_SIGNED,
+ CK_UNSIGNED,
+ CK_LOGICAL
+ };
+ static emitJumpKind genJumpKindForOper(genTreeOps cmp, CompareKind compareKind);
+
+ // For a given compare oper tree, returns the conditions to use with jmp/set in 'jmpKind' array.
+ // The corresponding elements of jmpToTrueLabel indicate whether the target of the jump is to the
+ // 'true' label or a 'false' label.
+ //
+ // 'true' label corresponds to jump target of the current basic block i.e. the target to
+ // branch to on compare condition being true. 'false' label corresponds to the target to
+ // branch to on condition being false.
+ static void genJumpKindsForTree(GenTreePtr cmpTree, emitJumpKind jmpKind[2], bool jmpToTrueLabel[2]);
+
+#if !defined(_TARGET_64BIT_)
+ static void genJumpKindsForTreeLongHi(GenTreePtr cmpTree, emitJumpKind jmpKind[2]);
+#endif //! defined(_TARGET_64BIT_)
+
+ static bool genShouldRoundFP();
+
+ GenTreeIndir indirForm(var_types type, GenTree* base);
+
+ GenTreeIntCon intForm(var_types type, ssize_t value);
+
+ void genRangeCheck(GenTree* node);
+
+ void genLockedInstructions(GenTree* node);
+
+ //-------------------------------------------------------------------------
+ // Register-related methods
+
+ void rsInit();
+
+#ifdef REG_OPT_RSVD
+ // On some targets such as the ARM we may need to have an extra reserved register
+ // that is used when addressing stack based locals and stack based temps.
+ // This method returns the regNumber that should be used when an extra register
+ // is needed to access the stack based locals and stack based temps.
+ //
+ regNumber rsGetRsvdReg()
+ {
+ // We should have already added this register to the mask
+ // of reserved registers in regSet.rdMaskResvd
+ noway_assert((regSet.rsMaskResvd & RBM_OPT_RSVD) != 0);
+
+ return REG_OPT_RSVD;
+ }
+#endif // REG_OPT_RSVD
+
+ regNumber findStkLclInReg(unsigned lclNum)
+ {
+#ifdef DEBUG
+ genInterruptibleUsed = true;
+#endif
+ return regTracker.rsLclIsInReg(lclNum);
+ }
+
+ //-------------------------------------------------------------------------
+
+ bool genUseBlockInit; // true if we plan to block-initialize the local stack frame
+ unsigned genInitStkLclCnt; // The count of local variables that we need to zero init
+
+ // Keeps track of how many bytes we've pushed on the processor's stack.
+ //
+ unsigned genStackLevel;
+
+#if STACK_PROBES
+ // Stack Probes
+ bool genNeedPrologStackProbe;
+
+ void genGenerateStackProbe();
+#endif
+
+#ifdef LEGACY_BACKEND
+ regMaskTP genNewLiveRegMask(GenTreePtr first, GenTreePtr second);
+
+ // During codegen, determine the LiveSet after tree.
+ // Preconditions: must be called during codegen, when compCurLife and
+ // compCurLifeTree are being maintained, and tree must occur in the current
+ // statement.
+ VARSET_VALRET_TP genUpdateLiveSetForward(GenTreePtr tree);
+#endif
+
+ //-------------------------------------------------------------------------
+
+ void genReportEH();
+
+ // Allocates storage for the GC info, writes the GC info into that storage, records the address of the
+ // GC info of the method with the EE, and returns a pointer to the "info" portion (just post-header) of
+ // the GC info. Requires "codeSize" to be the size of the generated code, "prologSize" and "epilogSize"
+ // to be the sizes of the prolog and epilog, respectively. In DEBUG, makes a check involving the
+ // "codePtr", assumed to be a pointer to the start of the generated code.
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef JIT32_GCENCODER
+ void* genCreateAndStoreGCInfo(unsigned codeSize, unsigned prologSize, unsigned epilogSize DEBUGARG(void* codePtr));
+ void* genCreateAndStoreGCInfoJIT32(unsigned codeSize,
+ unsigned prologSize,
+ unsigned epilogSize DEBUGARG(void* codePtr));
+#else // !JIT32_GCENCODER
+ void genCreateAndStoreGCInfo(unsigned codeSize, unsigned prologSize, unsigned epilogSize DEBUGARG(void* codePtr));
+ void genCreateAndStoreGCInfoX64(unsigned codeSize, unsigned prologSize DEBUGARG(void* codePtr));
+#endif // !JIT32_GCENCODER
+
+ /**************************************************************************
+ * PROTECTED
+ *************************************************************************/
+
+protected:
+ // the current (pending) label ref, a label which has been referenced but not yet seen
+ BasicBlock* genPendingCallLabel;
+
+#ifdef DEBUG
+ // Last instr we have displayed for dspInstrs
+ unsigned genCurDispOffset;
+
+ static const char* genInsName(instruction ins);
+#endif // DEBUG
+
+ //-------------------------------------------------------------------------
+
+ // JIT-time constants for use in multi-dimensional array code generation.
+ unsigned genOffsetOfMDArrayLowerBound(var_types elemType, unsigned rank, unsigned dimension);
+ unsigned genOffsetOfMDArrayDimensionSize(var_types elemType, unsigned rank, unsigned dimension);
+
+#ifdef DEBUG
+ static const char* genSizeStr(emitAttr size);
+
+ void genStressRegs(GenTreePtr tree);
+#endif // DEBUG
+
+ void genCodeForBBlist();
+
+public:
+#ifndef LEGACY_BACKEND
+ // genSpillVar is called by compUpdateLifeVar in the !LEGACY_BACKEND case
+ void genSpillVar(GenTreePtr tree);
+#endif // !LEGACY_BACKEND
+
+protected:
+#ifndef LEGACY_BACKEND
+ void genEmitHelperCall(unsigned helper, int argSize, emitAttr retSize, regNumber callTarget = REG_NA);
+#else
+ void genEmitHelperCall(unsigned helper, int argSize, emitAttr retSize);
+#endif
+
+ void genGCWriteBarrier(GenTreePtr tree, GCInfo::WriteBarrierForm wbf);
+
+ BasicBlock* genCreateTempLabel();
+
+ void genDefineTempLabel(BasicBlock* label);
+
+ void genAdjustSP(ssize_t delta);
+
+ void genExitCode(BasicBlock* block);
+
+ //-------------------------------------------------------------------------
+
+ GenTreePtr genMakeConst(const void* cnsAddr, var_types cnsType, GenTreePtr cnsTree, bool dblAlign);
+
+ //-------------------------------------------------------------------------
+
+ void genJumpToThrowHlpBlk(emitJumpKind jumpKind, SpecialCodeKind codeKind, GenTreePtr failBlk = nullptr);
+
+ void genCheckOverflow(GenTreePtr tree);
+
+ //-------------------------------------------------------------------------
+ //
+ // Prolog/epilog generation
+ //
+ //-------------------------------------------------------------------------
+
+ //
+ // Prolog functions and data (there are a few exceptions for more generally used things)
+ //
+
+ void genEstablishFramePointer(int delta, bool reportUnwindData);
+ void genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbered, RegState* regState);
+ void genEnregisterIncomingStackArgs();
+ void genCheckUseBlockInit();
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) && defined(FEATURE_SIMD)
+ void genClearStackVec3ArgUpperBits();
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING && FEATURE_SIMD
+
+#if defined(_TARGET_ARM64_)
+ bool genInstrWithConstant(instruction ins,
+ emitAttr attr,
+ regNumber reg1,
+ regNumber reg2,
+ ssize_t imm,
+ regNumber tmpReg,
+ bool inUnwindRegion = false);
+
+ void genStackPointerAdjustment(ssize_t spAdjustment, regNumber tmpReg, bool* pTmpRegIsZero);
+
+ void genPrologSaveRegPair(regNumber reg1,
+ regNumber reg2,
+ int spOffset,
+ int spDelta,
+ bool lastSavedWasPreviousPair,
+ regNumber tmpReg,
+ bool* pTmpRegIsZero);
+
+ void genPrologSaveReg(regNumber reg1, int spOffset, int spDelta, regNumber tmpReg, bool* pTmpRegIsZero);
+
+ void genEpilogRestoreRegPair(
+ regNumber reg1, regNumber reg2, int spOffset, int spDelta, regNumber tmpReg, bool* pTmpRegIsZero);
+
+ void genEpilogRestoreReg(regNumber reg1, int spOffset, int spDelta, regNumber tmpReg, bool* pTmpRegIsZero);
+
+ void genSaveCalleeSavedRegistersHelp(regMaskTP regsToSaveMask, int lowestCalleeSavedOffset, int spDelta);
+
+ void genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, int lowestCalleeSavedOffset, int spDelta);
+
+ void genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroed);
+#else
+ void genPushCalleeSavedRegisters();
+#endif
+
+ void genAllocLclFrame(unsigned frameSize, regNumber initReg, bool* pInitRegZeroed, regMaskTP maskArgRegsLiveIn);
+
+#if defined(_TARGET_ARM_)
+
+ void genPushFltRegs(regMaskTP regMask);
+ void genPopFltRegs(regMaskTP regMask);
+ regMaskTP genStackAllocRegisterMask(unsigned frameSize, regMaskTP maskCalleeSavedFloat);
+
+ regMaskTP genJmpCallArgMask();
+
+ void genFreeLclFrame(unsigned frameSize,
+ /* IN OUT */ bool* pUnwindStarted,
+ bool jmpEpilog);
+
+ bool genUsedPopToReturn; // True if we use the pop into PC to return,
+ // False if we didn't and must branch to LR to return.
+
+ // A set of information that is used by funclet prolog and epilog generation. It is collected once, before
+ // funclet prologs and epilogs are generated, and used by all funclet prologs and epilogs, which must all be the
+ // same.
+ struct FuncletFrameInfoDsc
+ {
+ regMaskTP fiSaveRegs; // Set of registers saved in the funclet prolog (includes LR)
+ unsigned fiFunctionCallerSPtoFPdelta; // Delta between caller SP and the frame pointer
+ unsigned fiSpDelta; // Stack pointer delta
+ unsigned fiPSP_slot_SP_offset; // PSP slot offset from SP
+ int fiPSP_slot_CallerSP_offset; // PSP slot offset from Caller SP
+ };
+
+ FuncletFrameInfoDsc genFuncletInfo;
+
+#elif defined(_TARGET_ARM64_)
+
+ // A set of information that is used by funclet prolog and epilog generation. It is collected once, before
+ // funclet prologs and epilogs are generated, and used by all funclet prologs and epilogs, which must all be the
+ // same.
+ struct FuncletFrameInfoDsc
+ {
+ regMaskTP fiSaveRegs; // Set of callee-saved registers saved in the funclet prolog (includes LR)
+ int fiFunction_CallerSP_to_FP_delta; // Delta between caller SP and the frame pointer in the parent function
+ // (negative)
+ int fiSP_to_FPLR_save_delta; // FP/LR register save offset from SP (positive)
+ int fiSP_to_PSP_slot_delta; // PSP slot offset from SP (positive)
+ int fiSP_to_CalleeSave_delta; // First callee-saved register slot offset from SP (positive)
+ int fiCallerSP_to_PSP_slot_delta; // PSP slot offset from Caller SP (negative)
+ int fiFrameType; // Funclet frame types are numbered. See genFuncletProlog() for details.
+ int fiSpDelta1; // Stack pointer delta 1 (negative)
+ int fiSpDelta2; // Stack pointer delta 2 (negative)
+ };
+
+ FuncletFrameInfoDsc genFuncletInfo;
+
+#elif defined(_TARGET_AMD64_)
+
+ // A set of information that is used by funclet prolog and epilog generation. It is collected once, before
+ // funclet prologs and epilogs are generated, and used by all funclet prologs and epilogs, which must all be the
+ // same.
+ struct FuncletFrameInfoDsc
+ {
+ unsigned fiFunction_InitialSP_to_FP_delta; // Delta between Initial-SP and the frame pointer
+ unsigned fiSpDelta; // Stack pointer delta
+ int fiPSP_slot_InitialSP_offset; // PSP slot offset from Initial-SP
+ };
+
+ FuncletFrameInfoDsc genFuncletInfo;
+
+#endif // _TARGET_AMD64_
+
+#if defined(_TARGET_XARCH_) && !FEATURE_STACK_FP_X87
+
+ // Save/Restore callee saved float regs to stack
+ void genPreserveCalleeSavedFltRegs(unsigned lclFrameSize);
+ void genRestoreCalleeSavedFltRegs(unsigned lclFrameSize);
+
+#endif // _TARGET_XARCH_ && FEATURE_STACK_FP_X87
+
+#if !FEATURE_STACK_FP_X87
+ void genZeroInitFltRegs(const regMaskTP& initFltRegs, const regMaskTP& initDblRegs, const regNumber& initReg);
+#endif // !FEATURE_STACK_FP_X87
+
+ regNumber genGetZeroReg(regNumber initReg, bool* pInitRegZeroed);
+
+ void genZeroInitFrame(int untrLclHi, int untrLclLo, regNumber initReg, bool* pInitRegZeroed);
+
+ void genReportGenericContextArg(regNumber initReg, bool* pInitRegZeroed);
+
+ void genSetGSSecurityCookie(regNumber initReg, bool* pInitRegZeroed);
+
+ void genFinalizeFrame();
+
+#ifdef PROFILING_SUPPORTED
+ void genProfilingEnterCallback(regNumber initReg, bool* pInitRegZeroed);
+ void genProfilingLeaveCallback(unsigned helper = CORINFO_HELP_PROF_FCN_LEAVE);
+#endif // PROFILING_SUPPORTED
+
+ void genPrologPadForReJit();
+
+ void genEmitCall(int callType,
+ CORINFO_METHOD_HANDLE methHnd,
+ INDEBUG_LDISASM_COMMA(CORINFO_SIG_INFO* sigInfo) void* addr X86_ARG(ssize_t argSize),
+ emitAttr retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize),
+ IL_OFFSETX ilOffset,
+ regNumber base = REG_NA,
+ bool isJump = false,
+ bool isNoGC = false);
+
+ void genEmitCall(int callType,
+ CORINFO_METHOD_HANDLE methHnd,
+ INDEBUG_LDISASM_COMMA(CORINFO_SIG_INFO* sigInfo) GenTreeIndir* indir X86_ARG(ssize_t argSize),
+ emitAttr retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize),
+ IL_OFFSETX ilOffset);
+
+ //
+ // Epilog functions
+ //
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if defined(_TARGET_ARM_)
+ bool genCanUsePopToReturn(regMaskTP maskPopRegsInt, bool jmpEpilog);
+#endif
+
+#if defined(_TARGET_ARM64_)
+
+ void genPopCalleeSavedRegistersAndFreeLclFrame(bool jmpEpilog);
+
+#else // !defined(_TARGET_ARM64_)
+
+ void genPopCalleeSavedRegisters(bool jmpEpilog = false);
+
+#endif // !defined(_TARGET_ARM64_)
+
+ //
+ // Common or driving functions
+ //
+
+ void genReserveProlog(BasicBlock* block); // currently unused
+ void genReserveEpilog(BasicBlock* block);
+ void genFnProlog();
+ void genFnEpilog(BasicBlock* block);
+
+#if FEATURE_EH_FUNCLETS
+
+ void genReserveFuncletProlog(BasicBlock* block);
+ void genReserveFuncletEpilog(BasicBlock* block);
+ void genFuncletProlog(BasicBlock* block);
+ void genFuncletEpilog();
+ void genCaptureFuncletPrologEpilogInfo();
+
+ void genSetPSPSym(regNumber initReg, bool* pInitRegZeroed);
+
+ void genUpdateCurrentFunclet(BasicBlock* block);
+
+#else // FEATURE_EH_FUNCLETS
+
+ // This is a no-op when there are no funclets!
+ void genUpdateCurrentFunclet(BasicBlock* block)
+ {
+ return;
+ }
+
+#endif // FEATURE_EH_FUNCLETS
+
+ void genGeneratePrologsAndEpilogs();
+
+#if defined(DEBUG) && defined(_TARGET_ARM64_)
+ void genArm64EmitterUnitTests();
+#endif
+
+#if defined(DEBUG) && defined(LATE_DISASM) && defined(_TARGET_AMD64_)
+ void genAmd64EmitterUnitTests();
+#endif
+
+//-------------------------------------------------------------------------
+//
+// End prolog/epilog generation
+//
+//-------------------------------------------------------------------------
+
+/*****************************************************************************/
+#ifdef DEBUGGING_SUPPORT
+/*****************************************************************************/
+
+#ifdef DEBUG
+ void genIPmappingDisp(unsigned mappingNum, Compiler::IPmappingDsc* ipMapping);
+ void genIPmappingListDisp();
+#endif // DEBUG
+
+ void genIPmappingAdd(IL_OFFSETX offset, bool isLabel);
+ void genIPmappingAddToFront(IL_OFFSETX offset);
+ void genIPmappingGen();
+
+ void genEnsureCodeEmitted(IL_OFFSETX offsx);
+
+ //-------------------------------------------------------------------------
+ // scope info for the variables
+
+ void genSetScopeInfo(unsigned which,
+ UNATIVE_OFFSET startOffs,
+ UNATIVE_OFFSET length,
+ unsigned varNum,
+ unsigned LVnum,
+ bool avail,
+ Compiler::siVarLoc& loc);
+
+ void genSetScopeInfo();
+
+ void genRemoveBBsection(BasicBlock* head, BasicBlock* tail);
+
+protected:
+ /*
+ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+ XX XX
+ XX ScopeInfo XX
+ XX XX
+ XX Keeps track of the scopes during code-generation. XX
+ XX This is used to translate the local-variable debugging information XX
+ XX from IL offsets to native code offsets. XX
+ XX XX
+ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+ */
+
+ /*****************************************************************************/
+ /*****************************************************************************
+ * ScopeInfo
+ *
+ * This class is called during code gen at block-boundaries, and when the
+ * set of live variables changes. It keeps track of the scope of the variables
+ * in terms of the native code PC.
+ */
+
+public:
+ void siInit();
+
+ void siBeginBlock(BasicBlock* block);
+
+ void siEndBlock(BasicBlock* block);
+
+ virtual void siUpdate();
+
+ void siCheckVarScope(unsigned varNum, IL_OFFSET offs);
+
+ void siCloseAllOpenScopes();
+
+#ifdef DEBUG
+ void siDispOpenScopes();
+#endif
+
+ /**************************************************************************
+ * PROTECTED
+ *************************************************************************/
+
+protected:
+ struct siScope
+ {
+ emitLocation scStartLoc; // emitter location of start of scope
+ emitLocation scEndLoc; // emitter location of end of scope
+
+ unsigned scVarNum; // index into lvaTable
+ unsigned scLVnum; // 'which' in eeGetLVinfo()
+
+ unsigned scStackLevel; // Only for stk-vars
+ bool scAvailable : 1; // It has a home / Home recycled - TODO-Cleanup: it appears this is unused (always true)
+
+ siScope* scPrev;
+ siScope* scNext;
+ };
+
+ siScope siOpenScopeList, siScopeList, *siOpenScopeLast, *siScopeLast;
+
+ unsigned siScopeCnt;
+
+ VARSET_TP siLastLife; // Life at last call to siUpdate()
+
+ // Tracks the last entry for each tracked register variable
+
+ siScope* siLatestTrackedScopes[lclMAX_TRACKED];
+
+ IL_OFFSET siLastEndOffs; // IL offset of the (exclusive) end of the last block processed
+
+#if FEATURE_EH_FUNCLETS
+ bool siInFuncletRegion; // Have we seen the start of the funclet region?
+#endif // FEATURE_EH_FUNCLETS
+
+ // Functions
+
+ siScope* siNewScope(unsigned LVnum, unsigned varNum);
+
+ void siRemoveFromOpenScopeList(siScope* scope);
+
+ void siEndTrackedScope(unsigned varIndex);
+
+ void siEndScope(unsigned varNum);
+
+ void siEndScope(siScope* scope);
+
+#ifdef DEBUG
+ bool siVerifyLocalVarTab();
+#endif
+
+#ifdef LATE_DISASM
+public:
+ /* virtual */
+ const char* siRegVarName(size_t offs, size_t size, unsigned reg);
+
+ /* virtual */
+ const char* siStackVarName(size_t offs, size_t size, unsigned reg, unsigned stkOffs);
+#endif // LATE_DISASM
+
+public:
+ /*
+ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+ XX XX
+ XX PrologScopeInfo XX
+ XX XX
+ XX We need special handling in the prolog block, as the parameter variables XX
+ XX may not be in the same position described by genLclVarTable - they all XX
+ XX start out on the stack XX
+ XX XX
+ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+ */
+
+public:
+ void psiBegProlog();
+
+ void psiAdjustStackLevel(unsigned size);
+
+ void psiMoveESPtoEBP();
+
+ void psiMoveToReg(unsigned varNum, regNumber reg = REG_NA, regNumber otherReg = REG_NA);
+
+ void psiMoveToStack(unsigned varNum);
+
+ void psiEndProlog();
+
+ /**************************************************************************
+ * PROTECTED
+ *************************************************************************/
+
+protected:
+ struct psiScope
+ {
+ emitLocation scStartLoc; // emitter location of start of scope
+ emitLocation scEndLoc; // emitter location of end of scope
+
+ unsigned scSlotNum; // index into lclVarTab
+ unsigned scLVnum; // 'which' in eeGetLVinfo()
+
+ bool scRegister;
+
+ union {
+ struct
+ {
+ regNumberSmall scRegNum;
+
+ // Used for:
+ // - "other half" of long var on architectures with 32 bit size registers - x86.
+ // - for System V structs it stores the second register
+ // used to pass a register passed struct.
+ regNumberSmall scOtherReg;
+ } u1;
+
+ struct
+ {
+ regNumberSmall scBaseReg;
+ NATIVE_OFFSET scOffset;
+ } u2;
+ };
+
+ psiScope* scPrev;
+ psiScope* scNext;
+ };
+
+ psiScope psiOpenScopeList, psiScopeList, *psiOpenScopeLast, *psiScopeLast;
+
+ unsigned psiScopeCnt;
+
+ // Implementation Functions
+
+ psiScope* psiNewPrologScope(unsigned LVnum, unsigned slotNum);
+
+ void psiEndPrologScope(psiScope* scope);
+
+ void psSetScopeOffset(psiScope* newScope, LclVarDsc* lclVarDsc1);
+
+/*****************************************************************************
+ * TrnslLocalVarInfo
+ *
+ * This struct holds the LocalVarInfo in terms of the generated native code
+ * after a call to genSetScopeInfo()
+ */
+
+#ifdef DEBUG
+
+ struct TrnslLocalVarInfo
+ {
+ unsigned tlviVarNum;
+ unsigned tlviLVnum;
+ VarName tlviName;
+ UNATIVE_OFFSET tlviStartPC;
+ size_t tlviLength;
+ bool tlviAvailable;
+ Compiler::siVarLoc tlviVarLoc;
+ };
+
+ // Array of scopes of LocalVars in terms of native code
+
+ TrnslLocalVarInfo* genTrnslLocalVarInfo;
+ unsigned genTrnslLocalVarCount;
+#endif
+
+/*****************************************************************************/
+#endif // DEBUGGING_SUPPORT
+/*****************************************************************************/
+
+#ifndef LEGACY_BACKEND
+#include "codegenlinear.h"
+#else // LEGACY_BACKEND
+#include "codegenclassic.h"
+#endif // LEGACY_BACKEND
+
+ /*
+ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+ XX XX
+ XX Instruction XX
+ XX XX
+ XX The interface to generate a machine-instruction. XX
+ XX Currently specific to x86 XX
+ XX TODO-Cleanup: Consider factoring this out of CodeGen XX
+ XX XX
+ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+ */
+
+public:
+ void instInit();
+
+ regNumber genGetZeroRegister();
+
+ void instGen(instruction ins);
+#ifdef _TARGET_XARCH_
+ void instNop(unsigned size);
+#endif
+
+ void inst_JMP(emitJumpKind jmp, BasicBlock* tgtBlock);
+
+ void inst_SET(emitJumpKind condition, regNumber reg);
+
+ void inst_RV(instruction ins, regNumber reg, var_types type, emitAttr size = EA_UNKNOWN);
+
+ void inst_RV_RV(instruction ins,
+ regNumber reg1,
+ regNumber reg2,
+ var_types type = TYP_I_IMPL,
+ emitAttr size = EA_UNKNOWN,
+ insFlags flags = INS_FLAGS_DONT_CARE);
+
+ void inst_RV_RV_RV(instruction ins,
+ regNumber reg1,
+ regNumber reg2,
+ regNumber reg3,
+ emitAttr size,
+ insFlags flags = INS_FLAGS_DONT_CARE);
+
+ void inst_IV(instruction ins, int val);
+ void inst_IV_handle(instruction ins, int val);
+ void inst_FS(instruction ins, unsigned stk = 0);
+
+ void inst_RV_IV(instruction ins, regNumber reg, ssize_t val, emitAttr size, insFlags flags = INS_FLAGS_DONT_CARE);
+
+ void inst_ST_RV(instruction ins, TempDsc* tmp, unsigned ofs, regNumber reg, var_types type);
+ void inst_ST_IV(instruction ins, TempDsc* tmp, unsigned ofs, int val, var_types type);
+
+ void inst_SA_RV(instruction ins, unsigned ofs, regNumber reg, var_types type);
+ void inst_SA_IV(instruction ins, unsigned ofs, int val, var_types type);
+
+ void inst_RV_ST(
+ instruction ins, regNumber reg, TempDsc* tmp, unsigned ofs, var_types type, emitAttr size = EA_UNKNOWN);
+ void inst_FS_ST(instruction ins, emitAttr size, TempDsc* tmp, unsigned ofs);
+
+ void instEmit_indCall(GenTreePtr call,
+ size_t argSize,
+ emitAttr retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize));
+
+ void instEmit_RM(instruction ins, GenTreePtr tree, GenTreePtr addr, unsigned offs);
+
+ void instEmit_RM_RV(instruction ins, emitAttr size, GenTreePtr tree, regNumber reg, unsigned offs);
+
+ void instEmit_RV_RM(instruction ins, emitAttr size, regNumber reg, GenTreePtr tree, unsigned offs);
+
+ void instEmit_RV_RIA(instruction ins, regNumber reg1, regNumber reg2, unsigned offs);
+
+ void inst_TT(instruction ins, GenTreePtr tree, unsigned offs = 0, int shfv = 0, emitAttr size = EA_UNKNOWN);
+
+ void inst_TT_RV(instruction ins,
+ GenTreePtr tree,
+ regNumber reg,
+ unsigned offs = 0,
+ emitAttr size = EA_UNKNOWN,
+ insFlags flags = INS_FLAGS_DONT_CARE);
+
+ void inst_TT_IV(instruction ins,
+ GenTreePtr tree,
+ ssize_t val,
+ unsigned offs = 0,
+ emitAttr size = EA_UNKNOWN,
+ insFlags flags = INS_FLAGS_DONT_CARE);
+
+ void inst_RV_AT(instruction ins,
+ emitAttr size,
+ var_types type,
+ regNumber reg,
+ GenTreePtr tree,
+ unsigned offs = 0,
+ insFlags flags = INS_FLAGS_DONT_CARE);
+
+ void inst_AT_IV(instruction ins, emitAttr size, GenTreePtr baseTree, int icon, unsigned offs = 0);
+
+ void inst_RV_TT(instruction ins,
+ regNumber reg,
+ GenTreePtr tree,
+ unsigned offs = 0,
+ emitAttr size = EA_UNKNOWN,
+ insFlags flags = INS_FLAGS_DONT_CARE);
+
+ void inst_RV_TT_IV(instruction ins, regNumber reg, GenTreePtr tree, int val);
+
+ void inst_FS_TT(instruction ins, GenTreePtr tree);
+
+ void inst_RV_SH(instruction ins, emitAttr size, regNumber reg, unsigned val, insFlags flags = INS_FLAGS_DONT_CARE);
+
+ void inst_TT_SH(instruction ins, GenTreePtr tree, unsigned val, unsigned offs = 0);
+
+ void inst_RV_CL(instruction ins, regNumber reg, var_types type = TYP_I_IMPL);
+
+ void inst_TT_CL(instruction ins, GenTreePtr tree, unsigned offs = 0);
+
+#if defined(_TARGET_XARCH_)
+ void inst_RV_RV_IV(instruction ins, emitAttr size, regNumber reg1, regNumber reg2, unsigned ival);
+#endif
+
+ void inst_RV_RR(instruction ins, emitAttr size, regNumber reg1, regNumber reg2);
+
+ void inst_RV_ST(instruction ins, emitAttr size, regNumber reg, GenTreePtr tree);
+
+ void inst_mov_RV_ST(regNumber reg, GenTreePtr tree);
+
+ void instGetAddrMode(GenTreePtr addr, regNumber* baseReg, unsigned* indScale, regNumber* indReg, unsigned* cns);
+
+ void inst_set_SV_var(GenTreePtr tree);
+
+#ifdef _TARGET_ARM_
+ bool arm_Valid_Imm_For_Instr(instruction ins, ssize_t imm, insFlags flags);
+ bool arm_Valid_Disp_For_LdSt(ssize_t disp, var_types type);
+ bool arm_Valid_Imm_For_Alu(ssize_t imm);
+ bool arm_Valid_Imm_For_Mov(ssize_t imm);
+ bool arm_Valid_Imm_For_Small_Mov(regNumber reg, ssize_t imm, insFlags flags);
+ bool arm_Valid_Imm_For_Add(ssize_t imm, insFlags flag);
+ bool arm_Valid_Imm_For_Add_SP(ssize_t imm);
+ bool arm_Valid_Imm_For_BL(ssize_t addr);
+
+ bool ins_Writes_Dest(instruction ins);
+#endif
+
+ bool isMoveIns(instruction ins);
+ instruction ins_Move_Extend(var_types srcType, bool srcInReg);
+
+ instruction ins_Copy(var_types dstType);
+ instruction ins_CopyIntToFloat(var_types srcType, var_types dstTyp);
+ instruction ins_CopyFloatToInt(var_types srcType, var_types dstTyp);
+ static instruction ins_FloatStore(var_types type = TYP_DOUBLE);
+ static instruction ins_FloatCopy(var_types type = TYP_DOUBLE);
+ instruction ins_FloatConv(var_types to, var_types from);
+ instruction ins_FloatCompare(var_types type);
+ instruction ins_MathOp(genTreeOps oper, var_types type);
+ instruction ins_FloatSqrt(var_types type);
+
+ void instGen_Return(unsigned stkArgSize);
+
+ void instGen_MemoryBarrier();
+
+ void instGen_Set_Reg_To_Zero(emitAttr size, regNumber reg, insFlags flags = INS_FLAGS_DONT_CARE);
+
+ void instGen_Set_Reg_To_Imm(emitAttr size, regNumber reg, ssize_t imm, insFlags flags = INS_FLAGS_DONT_CARE);
+
+ void instGen_Compare_Reg_To_Zero(emitAttr size, regNumber reg);
+
+ void instGen_Compare_Reg_To_Reg(emitAttr size, regNumber reg1, regNumber reg2);
+
+ void instGen_Compare_Reg_To_Imm(emitAttr size, regNumber reg, ssize_t imm);
+
+ void instGen_Load_Reg_From_Lcl(var_types srcType, regNumber dstReg, int varNum, int offs);
+
+ void instGen_Store_Reg_Into_Lcl(var_types dstType, regNumber srcReg, int varNum, int offs);
+
+ void instGen_Store_Imm_Into_Lcl(
+ var_types dstType, emitAttr sizeAttr, ssize_t imm, int varNum, int offs, regNumber regToUse = REG_NA);
+
+#ifdef DEBUG
+ void __cdecl instDisp(instruction ins, bool noNL, const char* fmt, ...);
+#endif
+
+#ifdef _TARGET_XARCH_
+ instruction genMapShiftInsToShiftByConstantIns(instruction ins, int shiftByValue);
+#endif // _TARGET_XARCH_
+};
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX Instruction XX
+XX Inline functions XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#ifdef _TARGET_XARCH_
+/*****************************************************************************
+ *
+ * Generate a floating-point instruction that has one operand given by
+ * a tree (which has been made addressable).
+ */
+
+inline void CodeGen::inst_FS_TT(instruction ins, GenTreePtr tree)
+{
+ assert(instIsFP(ins));
+
+ assert(varTypeIsFloating(tree->gtType));
+
+ inst_TT(ins, tree, 0);
+}
+#endif
+
+/*****************************************************************************
+ *
+ * Generate a "shift reg, cl" instruction.
+ */
+
+inline void CodeGen::inst_RV_CL(instruction ins, regNumber reg, var_types type)
+{
+ inst_RV(ins, reg, type);
+}
+
+#endif // _CODEGEN_H_
diff --git a/src/jit/codegenarm.cpp b/src/jit/codegenarm.cpp
new file mode 100644
index 0000000000..4ce82307f9
--- /dev/null
+++ b/src/jit/codegenarm.cpp
@@ -0,0 +1,2106 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX ARM Code Generator XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+#ifndef LEGACY_BACKEND // This file is ONLY used for the RyuJIT backend that uses the linear scan register allocator
+
+#ifdef _TARGET_ARM_
+#include "codegen.h"
+#include "lower.h"
+#include "gcinfo.h"
+#include "emit.h"
+
+#ifndef JIT32_GCENCODER
+#include "gcinfoencoder.h"
+#endif
+
+// Get the register assigned to the given node
+
+regNumber CodeGenInterface::genGetAssignedReg(GenTreePtr tree)
+{
+ return tree->gtRegNum;
+}
+
+//------------------------------------------------------------------------
+// genSpillVar: Spill a local variable
+//
+// Arguments:
+// tree - the lclVar node for the variable being spilled
+//
+// Return Value:
+// None.
+//
+// Assumptions:
+// The lclVar must be a register candidate (lvRegCandidate)
+
+void CodeGen::genSpillVar(GenTreePtr tree)
+{
+ regMaskTP regMask;
+ unsigned varNum = tree->gtLclVarCommon.gtLclNum;
+ LclVarDsc* varDsc = &(compiler->lvaTable[varNum]);
+
+ // We don't actually need to spill if it is already living in memory
+ bool needsSpill = ((tree->gtFlags & GTF_VAR_DEF) == 0 && varDsc->lvIsInReg());
+ if (needsSpill)
+ {
+ bool restoreRegVar = false;
+ if (tree->gtOper == GT_REG_VAR)
+ {
+ tree->SetOper(GT_LCL_VAR);
+ restoreRegVar = true;
+ }
+
+ // mask off the flag to generate the right spill code, then bring it back
+ tree->gtFlags &= ~GTF_REG_VAL;
+
+ instruction storeIns = ins_Store(tree->TypeGet());
+
+ if (varTypeIsMultiReg(tree))
+ {
+ assert(varDsc->lvRegNum == genRegPairLo(tree->gtRegPair));
+ assert(varDsc->lvOtherReg == genRegPairHi(tree->gtRegPair));
+ regNumber regLo = genRegPairLo(tree->gtRegPair);
+ regNumber regHi = genRegPairHi(tree->gtRegPair);
+ inst_TT_RV(storeIns, tree, regLo);
+ inst_TT_RV(storeIns, tree, regHi, 4);
+ }
+ else
+ {
+ assert(varDsc->lvRegNum == tree->gtRegNum);
+ inst_TT_RV(storeIns, tree, tree->gtRegNum);
+ }
+ tree->gtFlags |= GTF_REG_VAL;
+
+ if (restoreRegVar)
+ {
+ tree->SetOper(GT_REG_VAR);
+ }
+
+ genUpdateRegLife(varDsc, /*isBorn*/ false, /*isDying*/ true DEBUGARG(tree));
+ gcInfo.gcMarkRegSetNpt(varDsc->lvRegMask());
+
+ if (VarSetOps::IsMember(compiler, gcInfo.gcTrkStkPtrLcls, varDsc->lvVarIndex))
+ {
+#ifdef DEBUG
+ if (!VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex))
+ {
+ JITDUMP("\t\t\t\t\t\t\tVar V%02u becoming live\n", varNum);
+ }
+ else
+ {
+ JITDUMP("\t\t\t\t\t\t\tVar V%02u continuing live\n", varNum);
+ }
+#endif
+ VarSetOps::AddElemD(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex);
+ }
+ }
+
+ tree->gtFlags &= ~GTF_SPILL;
+ varDsc->lvRegNum = REG_STK;
+ if (varTypeIsMultiReg(tree))
+ {
+ varDsc->lvOtherReg = REG_STK;
+ }
+}
+
+// inline
+void CodeGenInterface::genUpdateVarReg(LclVarDsc* varDsc, GenTreePtr tree)
+{
+ assert(tree->OperIsScalarLocal() || (tree->gtOper == GT_COPY));
+ varDsc->lvRegNum = tree->gtRegNum;
+}
+
+/*****************************************************************************
+ *
+ * Generate code that will set the given register to the integer constant.
+ */
+
+void CodeGen::genSetRegToIcon(regNumber reg, ssize_t val, var_types type, insFlags flags)
+{
+ // Reg cannot be a FP reg
+ assert(!genIsValidFloatReg(reg));
+
+ // The only TYP_REF constant that can come this path is a managed 'null' since it is not
+ // relocatable. Other ref type constants (e.g. string objects) go through a different
+ // code path.
+ noway_assert(type != TYP_REF || val == 0);
+
+ if (val == 0)
+ {
+ instGen_Set_Reg_To_Zero(emitActualTypeSize(type), reg, flags);
+ }
+ else
+ {
+ // TODO-CQ: needs all the optimized cases
+ getEmitter()->emitIns_R_I(INS_mov, emitActualTypeSize(type), reg, val);
+ }
+}
+
+/*****************************************************************************
+ *
+ * Generate code to check that the GS cookie wasn't thrashed by a buffer
+ * overrun. If pushReg is true, preserve all registers around code sequence.
+ * Otherwise, ECX maybe modified.
+ */
+void CodeGen::genEmitGSCookieCheck(bool pushReg)
+{
+ NYI("ARM genEmitGSCookieCheck is not yet implemented for protojit");
+}
+
+/*****************************************************************************
+ *
+ * Generate code for all the basic blocks in the function.
+ */
+
+void CodeGen::genCodeForBBlist()
+{
+ unsigned varNum;
+ LclVarDsc* varDsc;
+
+ unsigned savedStkLvl;
+
+#ifdef DEBUG
+ genInterruptibleUsed = true;
+
+ // You have to be careful if you create basic blocks from now on
+ compiler->fgSafeBasicBlockCreation = false;
+
+ // This stress mode is not comptible with fully interruptible GC
+ if (genInterruptible && compiler->opts.compStackCheckOnCall)
+ {
+ compiler->opts.compStackCheckOnCall = false;
+ }
+
+ // This stress mode is not comptible with fully interruptible GC
+ if (genInterruptible && compiler->opts.compStackCheckOnRet)
+ {
+ compiler->opts.compStackCheckOnRet = false;
+ }
+#endif
+
+ // Prepare the blocks for exception handling codegen: mark the blocks that needs labels.
+ genPrepForEHCodegen();
+
+ assert(!compiler->fgFirstBBScratch ||
+ compiler->fgFirstBB == compiler->fgFirstBBScratch); // compiler->fgFirstBBScratch has to be first.
+
+ /* Initialize the spill tracking logic */
+
+ regSet.rsSpillBeg();
+
+#ifdef DEBUGGING_SUPPORT
+ /* Initialize the line# tracking logic */
+
+ if (compiler->opts.compScopeInfo)
+ {
+ siInit();
+ }
+#endif
+
+ if (compiler->opts.compDbgEnC)
+ {
+ noway_assert(isFramePointerUsed());
+ regSet.rsSetRegsModified(RBM_INT_CALLEE_SAVED & ~RBM_FPBASE);
+ }
+
+ /* If we have any pinvoke calls, we might potentially trash everything */
+ if (compiler->info.compCallUnmanaged)
+ {
+ noway_assert(isFramePointerUsed()); // Setup of Pinvoke frame currently requires an EBP style frame
+ regSet.rsSetRegsModified(RBM_INT_CALLEE_SAVED & ~RBM_FPBASE);
+ }
+
+ genPendingCallLabel = nullptr;
+
+ /* Initialize the pointer tracking code */
+
+ gcInfo.gcRegPtrSetInit();
+ gcInfo.gcVarPtrSetInit();
+
+ /* If any arguments live in registers, mark those regs as such */
+
+ for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++)
+ {
+ /* Is this variable a parameter assigned to a register? */
+
+ if (!varDsc->lvIsParam || !varDsc->lvRegister)
+ continue;
+
+ /* Is the argument live on entry to the method? */
+
+ if (!VarSetOps::IsMember(compiler, compiler->fgFirstBB->bbLiveIn, varDsc->lvVarIndex))
+ continue;
+
+ /* Is this a floating-point argument? */
+
+ if (varDsc->IsFloatRegType())
+ continue;
+
+ noway_assert(!varTypeIsFloating(varDsc->TypeGet()));
+
+ /* Mark the register as holding the variable */
+
+ regTracker.rsTrackRegLclVar(varDsc->lvRegNum, varNum);
+ }
+
+ unsigned finallyNesting = 0;
+
+ // Make sure a set is allocated for compiler->compCurLife (in the long case), so we can set it to empty without
+ // allocation at the start of each basic block.
+ VarSetOps::AssignNoCopy(compiler, compiler->compCurLife, VarSetOps::MakeEmpty(compiler));
+
+ /*-------------------------------------------------------------------------
+ *
+ * Walk the basic blocks and generate code for each one
+ *
+ */
+
+ BasicBlock* block;
+ BasicBlock* lblk; /* previous block */
+
+ for (lblk = NULL, block = compiler->fgFirstBB; block != NULL; lblk = block, block = block->bbNext)
+ {
+#ifdef DEBUG
+ if (compiler->verbose)
+ {
+ printf("\n=============== Generating ");
+ block->dspBlockHeader(compiler, true, true);
+ compiler->fgDispBBLiveness(block);
+ }
+#endif // DEBUG
+
+ /* Figure out which registers hold variables on entry to this block */
+
+ regSet.ClearMaskVars();
+ gcInfo.gcRegGCrefSetCur = RBM_NONE;
+ gcInfo.gcRegByrefSetCur = RBM_NONE;
+
+ compiler->m_pLinearScan->recordVarLocationsAtStartOfBB(block);
+
+ genUpdateLife(block->bbLiveIn);
+
+ // Even if liveness didn't change, we need to update the registers containing GC references.
+ // genUpdateLife will update the registers live due to liveness changes. But what about registers that didn't
+ // change? We cleared them out above. Maybe we should just not clear them out, but update the ones that change
+ // here. That would require handling the changes in recordVarLocationsAtStartOfBB().
+
+ regMaskTP newLiveRegSet = RBM_NONE;
+ regMaskTP newRegGCrefSet = RBM_NONE;
+ regMaskTP newRegByrefSet = RBM_NONE;
+ VARSET_ITER_INIT(compiler, iter, block->bbLiveIn, varIndex);
+ while (iter.NextElem(compiler, &varIndex))
+ {
+ unsigned varNum = compiler->lvaTrackedToVarNum[varIndex];
+ LclVarDsc* varDsc = &(compiler->lvaTable[varNum]);
+
+ if (varDsc->lvIsInReg())
+ {
+ newLiveRegSet |= varDsc->lvRegMask();
+ if (varDsc->lvType == TYP_REF)
+ {
+ newRegGCrefSet |= varDsc->lvRegMask();
+ }
+ else if (varDsc->lvType == TYP_BYREF)
+ {
+ newRegByrefSet |= varDsc->lvRegMask();
+ }
+ }
+ else if (varDsc->lvType == TYP_REF || varDsc->lvType == TYP_BYREF)
+ {
+ VarSetOps::AddElemD(compiler, gcInfo.gcVarPtrSetCur, varIndex);
+ }
+ }
+
+ regSet.rsMaskVars = newLiveRegSet;
+ gcInfo.gcMarkRegSetGCref(newRegGCrefSet DEBUGARG(true));
+ gcInfo.gcMarkRegSetByref(newRegByrefSet DEBUGARG(true));
+
+ /* Blocks with handlerGetsXcptnObj()==true use GT_CATCH_ARG to
+ represent the exception object (TYP_REF).
+ We mark REG_EXCEPTION_OBJECT as holding a GC object on entry
+ to the block, it will be the first thing evaluated
+ (thanks to GTF_ORDER_SIDEEFF).
+ */
+
+ if (handlerGetsXcptnObj(block->bbCatchTyp))
+ {
+ for (GenTree* node : LIR::AsRange(block))
+ {
+ if (node->OperGet() == GT_CATCH_ARG)
+ {
+ gcInfo.gcMarkRegSetGCref(RBM_EXCEPTION_OBJECT);
+ break;
+ }
+ }
+ }
+
+ /* Start a new code output block */
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if FEATURE_EH_FUNCLETS
+#if defined(_TARGET_ARM_)
+ // If this block is the target of a finally return, we need to add a preceding NOP, in the same EH region,
+ // so the unwinder doesn't get confused by our "movw lr, xxx; movt lr, xxx; b Lyyy" calling convention that
+ // calls the funclet during non-exceptional control flow.
+ if (block->bbFlags & BBF_FINALLY_TARGET)
+ {
+ assert(block->bbFlags & BBF_JMP_TARGET);
+
+#ifdef DEBUG
+ if (compiler->verbose)
+ {
+ printf("\nEmitting finally target NOP predecessor for BB%02u\n", block->bbNum);
+ }
+#endif
+ // Create a label that we'll use for computing the start of an EH region, if this block is
+ // at the beginning of such a region. If we used the existing bbEmitCookie as is for
+ // determining the EH regions, then this NOP would end up outside of the region, if this
+ // block starts an EH region. If we pointed the existing bbEmitCookie here, then the NOP
+ // would be executed, which we would prefer not to do.
+
+ block->bbUnwindNopEmitCookie =
+ getEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur);
+
+ instGen(INS_nop);
+ }
+#endif // defined(_TARGET_ARM_)
+
+ genUpdateCurrentFunclet(block);
+#endif // FEATURE_EH_FUNCLETS
+
+#ifdef _TARGET_XARCH_
+ if (genAlignLoops && block->bbFlags & BBF_LOOP_HEAD)
+ {
+ getEmitter()->emitLoopAlign();
+ }
+#endif
+
+#ifdef DEBUG
+ if (compiler->opts.dspCode)
+ printf("\n L_M%03u_BB%02u:\n", Compiler::s_compMethodsCount, block->bbNum);
+#endif
+
+ block->bbEmitCookie = NULL;
+
+ if (block->bbFlags & (BBF_JMP_TARGET | BBF_HAS_LABEL))
+ {
+ /* Mark a label and update the current set of live GC refs */
+
+ block->bbEmitCookie =
+ getEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur,
+ /*isFinally*/ block->bbFlags & BBF_FINALLY_TARGET);
+ }
+
+ if (block == compiler->fgFirstColdBlock)
+ {
+#ifdef DEBUG
+ if (compiler->verbose)
+ {
+ printf("\nThis is the start of the cold region of the method\n");
+ }
+#endif
+ // We should never have a block that falls through into the Cold section
+ noway_assert(!lblk->bbFallsThrough());
+
+ // We require the block that starts the Cold section to have a label
+ noway_assert(block->bbEmitCookie);
+ getEmitter()->emitSetFirstColdIGCookie(block->bbEmitCookie);
+ }
+
+ /* Both stacks are always empty on entry to a basic block */
+
+ genStackLevel = 0;
+
+#if !FEATURE_FIXED_OUT_ARGS
+ /* Check for inserted throw blocks and adjust genStackLevel */
+
+ if (!isFramePointerUsed() && compiler->fgIsThrowHlpBlk(block))
+ {
+ noway_assert(block->bbFlags & BBF_JMP_TARGET);
+
+ genStackLevel = compiler->fgThrowHlpBlkStkLevel(block) * sizeof(int);
+
+ if (genStackLevel)
+ {
+ NYI("Need emitMarkStackLvl()");
+ }
+ }
+#endif // !FEATURE_FIXED_OUT_ARGS
+
+ savedStkLvl = genStackLevel;
+
+ /* Tell everyone which basic block we're working on */
+
+ compiler->compCurBB = block;
+
+#ifdef DEBUGGING_SUPPORT
+ siBeginBlock(block);
+
+ // BBF_INTERNAL blocks don't correspond to any single IL instruction.
+ if (compiler->opts.compDbgInfo && (block->bbFlags & BBF_INTERNAL) && block != compiler->fgFirstBB)
+ genIPmappingAdd((IL_OFFSETX)ICorDebugInfo::NO_MAPPING, true);
+
+ bool firstMapping = true;
+#endif // DEBUGGING_SUPPORT
+
+ /*---------------------------------------------------------------------
+ *
+ * Generate code for each statement-tree in the block
+ *
+ */
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if FEATURE_EH_FUNCLETS
+ if (block->bbFlags & BBF_FUNCLET_BEG)
+ {
+ genReserveFuncletProlog(block);
+ }
+#endif // FEATURE_EH_FUNCLETS
+
+ // Clear compCurStmt and compCurLifeTree.
+ compiler->compCurStmt = nullptr;
+ compiler->compCurLifeTree = nullptr;
+
+#ifdef DEBUG
+ bool pastProfileUpdate = false;
+#endif
+
+// Traverse the block in linear order, generating code for each node as we
+// as we encounter it.
+#ifdef DEBUGGING_SUPPORT
+ IL_OFFSETX currentILOffset = BAD_IL_OFFSET;
+#endif
+ for (GenTree* node : LIR::AsRange(block))
+ {
+#ifdef DEBUGGING_SUPPORT
+ // Do we have a new IL offset?
+ if (node->OperGet() == GT_IL_OFFSET)
+ {
+ genEnsureCodeEmitted(currentILOffset);
+
+ currentILOffset = node->gtStmt.gtStmtILoffsx;
+
+ genIPmappingAdd(currentILOffset, firstMapping);
+ firstMapping = false;
+ }
+#endif // DEBUGGING_SUPPORT
+
+#ifdef DEBUG
+ if (node->OperGet() == GT_IL_OFFSET)
+ {
+ noway_assert(node->gtStmt.gtStmtLastILoffs <= compiler->info.compILCodeSize ||
+ node->gtStmt.gtStmtLastILoffs == BAD_IL_OFFSET);
+
+ if (compiler->opts.dspCode && compiler->opts.dspInstrs &&
+ node->gtStmt.gtStmtLastILoffs != BAD_IL_OFFSET)
+ {
+ while (genCurDispOffset <= node->gtStmt.gtStmtLastILoffs)
+ {
+ genCurDispOffset += dumpSingleInstr(compiler->info.compCode, genCurDispOffset, "> ");
+ }
+ }
+ }
+#endif // DEBUG
+
+ genCodeForTreeNode(node);
+ if (node->gtHasReg() && node->gtLsraInfo.isLocalDefUse)
+ {
+ genConsumeReg(node);
+ }
+
+#ifdef DEBUG
+ regSet.rsSpillChk();
+
+ assert((node->gtFlags & GTF_SPILL) == 0);
+
+ /* Make sure we didn't bungle pointer register tracking */
+
+ regMaskTP ptrRegs = (gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur);
+ regMaskTP nonVarPtrRegs = ptrRegs & ~regSet.rsMaskVars;
+
+ // If return is a GC-type, clear it. Note that if a common
+ // epilog is generated (genReturnBB) it has a void return
+ // even though we might return a ref. We can't use the compRetType
+ // as the determiner because something we are tracking as a byref
+ // might be used as a return value of a int function (which is legal)
+ if (node->gtOper == GT_RETURN && (varTypeIsGC(compiler->info.compRetType) ||
+ (node->gtOp.gtOp1 != 0 && varTypeIsGC(node->gtOp.gtOp1->TypeGet()))))
+ {
+ nonVarPtrRegs &= ~RBM_INTRET;
+ }
+
+ // When profiling, the first few nodes in a catch block will be an update of
+ // the profile count (does not interfere with the exception object).
+ if (((compiler->opts.eeFlags & CORJIT_FLG_BBINSTR) != 0) && handlerGetsXcptnObj(block->bbCatchTyp))
+ {
+ pastProfileUpdate = pastProfileUpdate || node->OperGet() == GT_CATCH_ARG;
+ if (!pastProfileUpdate)
+ {
+ nonVarPtrRegs &= ~RBM_EXCEPTION_OBJECT;
+ }
+ }
+
+ if (nonVarPtrRegs)
+ {
+ printf("Regset after node=");
+ Compiler::printTreeID(node);
+ printf(" BB%02u gcr=", block->bbNum);
+ printRegMaskInt(gcInfo.gcRegGCrefSetCur & ~regSet.rsMaskVars);
+ compiler->getEmitter()->emitDispRegSet(gcInfo.gcRegGCrefSetCur & ~regSet.rsMaskVars);
+ printf(", byr=");
+ printRegMaskInt(gcInfo.gcRegByrefSetCur & ~regSet.rsMaskVars);
+ compiler->getEmitter()->emitDispRegSet(gcInfo.gcRegByrefSetCur & ~regSet.rsMaskVars);
+ printf(", regVars=");
+ printRegMaskInt(regSet.rsMaskVars);
+ compiler->getEmitter()->emitDispRegSet(regSet.rsMaskVars);
+ printf("\n");
+ }
+
+ noway_assert(nonVarPtrRegs == 0);
+#endif // DEBUG
+ }
+
+#ifdef DEBUGGING_SUPPORT
+ // It is possible to reach the end of the block without generating code for the current IL offset.
+ // For example, if the following IR ends the current block, no code will have been generated for
+ // offset 21:
+ //
+ // ( 0, 0) [000040] ------------ il_offset void IL offset: 21
+ //
+ // N001 ( 0, 0) [000039] ------------ nop void
+ //
+ // This can lead to problems when debugging the generated code. To prevent these issues, make sure
+ // we've generated code for the last IL offset we saw in the block.
+ genEnsureCodeEmitted(currentILOffset);
+
+ if (compiler->opts.compScopeInfo && (compiler->info.compVarScopesCount > 0))
+ {
+ siEndBlock(block);
+
+ /* Is this the last block, and are there any open scopes left ? */
+
+ bool isLastBlockProcessed = (block->bbNext == NULL);
+ if (block->isBBCallAlwaysPair())
+ {
+ isLastBlockProcessed = (block->bbNext->bbNext == NULL);
+ }
+
+ if (isLastBlockProcessed && siOpenScopeList.scNext)
+ {
+ /* This assert no longer holds, because we may insert a throw
+ block to demarcate the end of a try or finally region when they
+ are at the end of the method. It would be nice if we could fix
+ our code so that this throw block will no longer be necessary. */
+
+ // noway_assert(block->bbCodeOffsEnd != compiler->info.compILCodeSize);
+
+ siCloseAllOpenScopes();
+ }
+ }
+
+#endif // DEBUGGING_SUPPORT
+
+ genStackLevel -= savedStkLvl;
+
+#ifdef DEBUG
+ // compCurLife should be equal to the liveOut set, except that we don't keep
+ // it up to date for vars that are not register candidates
+ // (it would be nice to have a xor set function)
+
+ VARSET_TP VARSET_INIT_NOCOPY(extraLiveVars, VarSetOps::Diff(compiler, block->bbLiveOut, compiler->compCurLife));
+ VarSetOps::UnionD(compiler, extraLiveVars, VarSetOps::Diff(compiler, compiler->compCurLife, block->bbLiveOut));
+ VARSET_ITER_INIT(compiler, extraLiveVarIter, extraLiveVars, extraLiveVarIndex);
+ while (extraLiveVarIter.NextElem(compiler, &extraLiveVarIndex))
+ {
+ unsigned varNum = compiler->lvaTrackedToVarNum[extraLiveVarIndex];
+ LclVarDsc* varDsc = compiler->lvaTable + varNum;
+ assert(!varDsc->lvIsRegCandidate());
+ }
+#endif
+
+ /* Both stacks should always be empty on exit from a basic block */
+
+ noway_assert(genStackLevel == 0);
+
+#ifdef _TARGET_AMD64_
+ // On AMD64, we need to generate a NOP after a call that is the last instruction of the block, in several
+ // situations, to support proper exception handling semantics. This is mostly to ensure that when the stack
+ // walker computes an instruction pointer for a frame, that instruction pointer is in the correct EH region.
+ // The document "X64 and ARM ABIs.docx" has more details. The situations:
+ // 1. If the call instruction is in a different EH region as the instruction that follows it.
+ // 2. If the call immediately precedes an OS epilog. (Note that what the JIT or VM consider an epilog might
+ // be slightly different from what the OS considers an epilog, and it is the OS-reported epilog that matters
+ // here.)
+ // We handle case #1 here, and case #2 in the emitter.
+ if (getEmitter()->emitIsLastInsCall())
+ {
+ // Ok, the last instruction generated is a call instruction. Do any of the other conditions hold?
+ // Note: we may be generating a few too many NOPs for the case of call preceding an epilog. Technically,
+ // if the next block is a BBJ_RETURN, an epilog will be generated, but there may be some instructions
+ // generated before the OS epilog starts, such as a GS cookie check.
+ if ((block->bbNext == nullptr) || !BasicBlock::sameEHRegion(block, block->bbNext))
+ {
+ // We only need the NOP if we're not going to generate any more code as part of the block end.
+
+ switch (block->bbJumpKind)
+ {
+ case BBJ_ALWAYS:
+ case BBJ_THROW:
+ case BBJ_CALLFINALLY:
+ case BBJ_EHCATCHRET:
+ // We're going to generate more code below anyway, so no need for the NOP.
+
+ case BBJ_RETURN:
+ case BBJ_EHFINALLYRET:
+ case BBJ_EHFILTERRET:
+ // These are the "epilog follows" case, handled in the emitter.
+
+ break;
+
+ case BBJ_NONE:
+ if (block->bbNext == nullptr)
+ {
+ // Call immediately before the end of the code; we should never get here .
+ instGen(INS_BREAKPOINT); // This should never get executed
+ }
+ else
+ {
+ // We need the NOP
+ instGen(INS_nop);
+ }
+ break;
+
+ case BBJ_COND:
+ case BBJ_SWITCH:
+ // These can't have a call as the last instruction!
+
+ default:
+ noway_assert(!"Unexpected bbJumpKind");
+ break;
+ }
+ }
+ }
+#endif //_TARGET_AMD64_
+
+ /* Do we need to generate a jump or return? */
+
+ switch (block->bbJumpKind)
+ {
+ case BBJ_ALWAYS:
+ inst_JMP(EJ_jmp, block->bbJumpDest);
+ break;
+
+ case BBJ_RETURN:
+ genExitCode(block);
+ break;
+
+ case BBJ_THROW:
+ // If we have a throw at the end of a function or funclet, we need to emit another instruction
+ // afterwards to help the OS unwinder determine the correct context during unwind.
+ // We insert an unexecuted breakpoint instruction in several situations
+ // following a throw instruction:
+ // 1. If the throw is the last instruction of the function or funclet. This helps
+ // the OS unwinder determine the correct context during an unwind from the
+ // thrown exception.
+ // 2. If this is this is the last block of the hot section.
+ // 3. If the subsequent block is a special throw block.
+ // 4. On AMD64, if the next block is in a different EH region.
+ if ((block->bbNext == NULL)
+#if FEATURE_EH_FUNCLETS
+ || (block->bbNext->bbFlags & BBF_FUNCLET_BEG)
+#endif // FEATURE_EH_FUNCLETS
+#ifdef _TARGET_AMD64_
+ || !BasicBlock::sameEHRegion(block, block->bbNext)
+#endif // _TARGET_AMD64_
+ || (!isFramePointerUsed() && compiler->fgIsThrowHlpBlk(block->bbNext)) ||
+ block->bbNext == compiler->fgFirstColdBlock)
+ {
+ instGen(INS_BREAKPOINT); // This should never get executed
+ }
+
+ break;
+
+ case BBJ_CALLFINALLY:
+
+ // Now set REG_LR to the address of where the finally funclet should
+ // return to directly.
+
+ BasicBlock* bbFinallyRet;
+ bbFinallyRet = NULL;
+
+ // We don't have retless calls, since we use the BBJ_ALWAYS to point at a NOP pad where
+ // we would have otherwise created retless calls.
+ assert(block->isBBCallAlwaysPair());
+
+ assert(block->bbNext != NULL);
+ assert(block->bbNext->bbJumpKind == BBJ_ALWAYS);
+ assert(block->bbNext->bbJumpDest != NULL);
+ assert(block->bbNext->bbJumpDest->bbFlags & BBF_FINALLY_TARGET);
+
+ bbFinallyRet = block->bbNext->bbJumpDest;
+ bbFinallyRet->bbFlags |= BBF_JMP_TARGET;
+
+#if 0
+ // TODO-ARM-CQ:
+ // We don't know the address of finally funclet yet. But adr requires the offset
+ // to finally funclet from current IP is within 4095 bytes. So this code is disabled
+ // for now.
+ getEmitter()->emitIns_J_R (INS_adr,
+ EA_4BYTE,
+ bbFinallyRet,
+ REG_LR);
+#else // !0
+ // Load the address where the finally funclet should return into LR.
+ // The funclet prolog/epilog will do "push {lr}" / "pop {pc}" to do
+ // the return.
+ getEmitter()->emitIns_R_L(INS_movw, EA_4BYTE_DSP_RELOC, bbFinallyRet, REG_LR);
+ getEmitter()->emitIns_R_L(INS_movt, EA_4BYTE_DSP_RELOC, bbFinallyRet, REG_LR);
+#endif // !0
+
+ // Jump to the finally BB
+ inst_JMP(EJ_jmp, block->bbJumpDest);
+
+ // The BBJ_ALWAYS is used because the BBJ_CALLFINALLY can't point to the
+ // jump target using bbJumpDest - that is already used to point
+ // to the finally block. So just skip past the BBJ_ALWAYS unless the
+ // block is RETLESS.
+ if (!(block->bbFlags & BBF_RETLESS_CALL))
+ {
+ assert(block->isBBCallAlwaysPair());
+
+ lblk = block;
+ block = block->bbNext;
+ }
+ break;
+
+#ifdef _TARGET_ARM_
+
+ case BBJ_EHCATCHRET:
+ // set r0 to the address the VM should return to after the catch
+ getEmitter()->emitIns_R_L(INS_movw, EA_4BYTE_DSP_RELOC, block->bbJumpDest, REG_R0);
+ getEmitter()->emitIns_R_L(INS_movt, EA_4BYTE_DSP_RELOC, block->bbJumpDest, REG_R0);
+
+ __fallthrough;
+
+ case BBJ_EHFINALLYRET:
+ case BBJ_EHFILTERRET:
+ genReserveFuncletEpilog(block);
+ break;
+
+#elif defined(_TARGET_AMD64_)
+
+ case BBJ_EHCATCHRET:
+ // Set EAX to the address the VM should return to after the catch.
+ // Generate a RIP-relative
+ // lea reg, [rip + disp32] ; the RIP is implicit
+ // which will be position-indepenent.
+ // TODO-ARM-Bug?: For ngen, we need to generate a reloc for the displacement (maybe EA_PTR_DSP_RELOC).
+ getEmitter()->emitIns_R_L(INS_lea, EA_PTRSIZE, block->bbJumpDest, REG_INTRET);
+ __fallthrough;
+
+ case BBJ_EHFINALLYRET:
+ case BBJ_EHFILTERRET:
+ genReserveFuncletEpilog(block);
+ break;
+
+#endif // _TARGET_AMD64_
+
+ case BBJ_NONE:
+ case BBJ_COND:
+ case BBJ_SWITCH:
+ break;
+
+ default:
+ noway_assert(!"Unexpected bbJumpKind");
+ break;
+ }
+
+#ifdef DEBUG
+ compiler->compCurBB = 0;
+#endif
+
+ } //------------------ END-FOR each block of the method -------------------
+
+ /* Nothing is live at this point */
+ genUpdateLife(VarSetOps::MakeEmpty(compiler));
+
+ /* Finalize the spill tracking logic */
+
+ regSet.rsSpillEnd();
+
+ /* Finalize the temp tracking logic */
+
+ compiler->tmpEnd();
+
+#ifdef DEBUG
+ if (compiler->verbose)
+ {
+ printf("\n# ");
+ printf("compCycleEstimate = %6d, compSizeEstimate = %5d ", compiler->compCycleEstimate, compiler->compSizeEstimate);
+ printf("%s\n", compiler->info.compFullName);
+ }
+#endif
+}
+
+// return the child that has the same reg as the dst (if any)
+// other child returned (out param) in 'other'
+GenTree* sameRegAsDst(GenTree* tree, GenTree*& other /*out*/)
+{
+ if (tree->gtRegNum == REG_NA)
+ {
+ other = nullptr;
+ return NULL;
+ }
+
+ GenTreePtr op1 = tree->gtOp.gtOp1->gtEffectiveVal();
+ GenTreePtr op2 = tree->gtOp.gtOp2->gtEffectiveVal();
+ if (op1->gtRegNum == tree->gtRegNum)
+ {
+ other = op2;
+ return op1;
+ }
+ if (op2->gtRegNum == tree->gtRegNum)
+ {
+ other = op1;
+ return op2;
+ }
+ else
+ {
+ other = nullptr;
+ return NULL;
+ }
+}
+
+// move an immediate value into an integer register
+
+void CodeGen::instGen_Set_Reg_To_Imm(emitAttr size, regNumber reg, ssize_t imm, insFlags flags)
+{
+ // reg cannot be a FP register
+ assert(!genIsValidFloatReg(reg));
+
+ if (!compiler->opts.compReloc)
+ {
+ size = EA_SIZE(size); // Strip any Reloc flags from size if we aren't doing relocs
+ }
+
+ if ((imm == 0) && !EA_IS_RELOC(size))
+ {
+ instGen_Set_Reg_To_Zero(size, reg, flags);
+ }
+ else
+ {
+#ifdef _TARGET_AMD64_
+ if (AddrShouldUsePCRel(imm))
+ {
+ getEmitter()->emitIns_R_AI(INS_lea, EA_PTR_DSP_RELOC, reg, imm);
+ }
+ else
+#endif // _TARGET_AMD64_
+ {
+ getEmitter()->emitIns_R_I(INS_mov, size, reg, imm);
+ }
+ }
+ regTracker.rsTrackRegIntCns(reg, imm);
+}
+
+/*****************************************************************************
+ *
+ * Generate code to set a register 'targetReg' of type 'targetType' to the constant
+ * specified by the constant (GT_CNS_INT or GT_CNS_DBL) in 'tree'. This does not call
+ * genProduceReg() on the target register.
+ */
+void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTreePtr tree)
+{
+ switch (tree->gtOper)
+ {
+ case GT_CNS_INT:
+ {
+ // relocatable values tend to come down as a CNS_INT of native int type
+ // so the line between these two opcodes is kind of blurry
+ GenTreeIntConCommon* con = tree->AsIntConCommon();
+ ssize_t cnsVal = con->IconValue();
+
+ bool needReloc = compiler->opts.compReloc && tree->IsIconHandle();
+ if (needReloc)
+ {
+ instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, targetReg, cnsVal);
+ regTracker.rsTrackRegTrash(targetReg);
+ }
+ else
+ {
+ genSetRegToIcon(targetReg, cnsVal, targetType);
+ }
+ }
+ break;
+
+ case GT_CNS_DBL:
+ {
+ NYI("GT_CNS_DBL");
+ }
+ break;
+
+ default:
+ unreached();
+ }
+}
+
+/*****************************************************************************
+ *
+ * Generate code for a single node in the tree.
+ * Preconditions: All operands have been evaluated
+ *
+ */
+void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
+{
+ regNumber targetReg = treeNode->gtRegNum;
+ var_types targetType = treeNode->TypeGet();
+ emitter* emit = getEmitter();
+
+ JITDUMP("Generating: ");
+ DISPNODE(treeNode);
+
+ // contained nodes are part of their parents for codegen purposes
+ // ex : immediates, most LEAs
+ if (treeNode->isContained())
+ {
+ return;
+ }
+
+ switch (treeNode->gtOper)
+ {
+ case GT_CNS_INT:
+ case GT_CNS_DBL:
+ genSetRegToConst(targetReg, targetType, treeNode);
+ genProduceReg(treeNode);
+ break;
+
+ case GT_NEG:
+ case GT_NOT:
+ {
+ NYI("GT_NEG and GT_NOT");
+ }
+ genProduceReg(treeNode);
+ break;
+
+ case GT_OR:
+ case GT_XOR:
+ case GT_AND:
+ assert(varTypeIsIntegralOrI(treeNode));
+ __fallthrough;
+
+ case GT_ADD:
+ case GT_SUB:
+ {
+ const genTreeOps oper = treeNode->OperGet();
+ if ((oper == GT_ADD || oper == GT_SUB) && treeNode->gtOverflow())
+ {
+ // This is also checked in the importer.
+ NYI("Overflow not yet implemented");
+ }
+
+ GenTreePtr op1 = treeNode->gtGetOp1();
+ GenTreePtr op2 = treeNode->gtGetOp2();
+ instruction ins = genGetInsForOper(treeNode->OperGet(), targetType);
+
+ // The arithmetic node must be sitting in a register (since it's not contained)
+ noway_assert(targetReg != REG_NA);
+
+ regNumber op1reg = op1->gtRegNum;
+ regNumber op2reg = op2->gtRegNum;
+
+ GenTreePtr dst;
+ GenTreePtr src;
+
+ genConsumeIfReg(op1);
+ genConsumeIfReg(op2);
+
+ // This is the case of reg1 = reg1 op reg2
+ // We're ready to emit the instruction without any moves
+ if (op1reg == targetReg)
+ {
+ dst = op1;
+ src = op2;
+ }
+ // We have reg1 = reg2 op reg1
+ // In order for this operation to be correct
+ // we need that op is a commutative operation so
+ // we can convert it into reg1 = reg1 op reg2 and emit
+ // the same code as above
+ else if (op2reg == targetReg)
+ {
+ noway_assert(GenTree::OperIsCommutative(treeNode->OperGet()));
+ dst = op2;
+ src = op1;
+ }
+ // dest, op1 and op2 registers are different:
+ // reg3 = reg1 op reg2
+ // We can implement this by issuing a mov:
+ // reg3 = reg1
+ // reg3 = reg3 op reg2
+ else
+ {
+ inst_RV_RV(ins_Move_Extend(targetType, true), targetReg, op1reg, op1->gtType);
+ regTracker.rsTrackRegCopy(targetReg, op1reg);
+ gcInfo.gcMarkRegPtrVal(targetReg, targetType);
+ dst = treeNode;
+ src = op2;
+ }
+
+ regNumber r = emit->emitInsBinary(ins, emitTypeSize(treeNode), dst, src);
+ noway_assert(r == targetReg);
+ }
+ genProduceReg(treeNode);
+ break;
+
+ case GT_LSH:
+ case GT_RSH:
+ case GT_RSZ:
+ genCodeForShift(treeNode);
+ // genCodeForShift() calls genProduceReg()
+ break;
+
+ case GT_CAST:
+ // Cast is never contained (?)
+ noway_assert(targetReg != REG_NA);
+
+ // Overflow conversions from float/double --> int types go through helper calls.
+ if (treeNode->gtOverflow() && !varTypeIsFloating(treeNode->gtOp.gtOp1))
+ NYI("Unimplmented GT_CAST:int <--> int with overflow");
+
+ if (varTypeIsFloating(targetType) && varTypeIsFloating(treeNode->gtOp.gtOp1))
+ {
+ // Casts float/double <--> double/float
+ genFloatToFloatCast(treeNode);
+ }
+ else if (varTypeIsFloating(treeNode->gtOp.gtOp1))
+ {
+ // Casts float/double --> int32/int64
+ genFloatToIntCast(treeNode);
+ }
+ else if (varTypeIsFloating(targetType))
+ {
+ // Casts int32/uint32/int64/uint64 --> float/double
+ genIntToFloatCast(treeNode);
+ }
+ else
+ {
+ // Casts int <--> int
+ genIntToIntCast(treeNode);
+ }
+ // The per-case functions call genProduceReg()
+ break;
+
+ case GT_LCL_VAR:
+ {
+ GenTreeLclVarCommon* lcl = treeNode->AsLclVarCommon();
+ // lcl_vars are not defs
+ assert((treeNode->gtFlags & GTF_VAR_DEF) == 0);
+
+ bool isRegCandidate = compiler->lvaTable[lcl->gtLclNum].lvIsRegCandidate();
+
+ if (isRegCandidate && !(treeNode->gtFlags & GTF_VAR_DEATH))
+ {
+ assert((treeNode->InReg()) || (treeNode->gtFlags & GTF_SPILLED));
+ }
+
+ // If this is a register candidate that has been spilled, genConsumeReg() will
+ // reload it at the point of use. Otherwise, if it's not in a register, we load it here.
+
+ if (!treeNode->InReg() && !(treeNode->gtFlags & GTF_SPILLED))
+ {
+ assert(!isRegCandidate);
+ emit->emitIns_R_S(ins_Load(treeNode->TypeGet()), emitTypeSize(treeNode), treeNode->gtRegNum,
+ lcl->gtLclNum, 0);
+ genProduceReg(treeNode);
+ }
+ }
+ break;
+
+ case GT_LCL_FLD_ADDR:
+ case GT_LCL_VAR_ADDR:
+ {
+ // Address of a local var. This by itself should never be allocated a register.
+ // If it is worth storing the address in a register then it should be cse'ed into
+ // a temp and that would be allocated a register.
+ noway_assert(targetType == TYP_BYREF);
+ noway_assert(!treeNode->InReg());
+
+ inst_RV_TT(INS_lea, targetReg, treeNode, 0, EA_BYREF);
+ }
+ genProduceReg(treeNode);
+ break;
+
+ case GT_LCL_FLD:
+ {
+ NYI_IF(targetType == TYP_STRUCT, "GT_LCL_FLD: struct load local field not supported");
+ NYI_IF(treeNode->gtRegNum == REG_NA, "GT_LCL_FLD: load local field not into a register is not supported");
+
+ emitAttr size = emitTypeSize(targetType);
+ unsigned offs = treeNode->gtLclFld.gtLclOffs;
+ unsigned varNum = treeNode->gtLclVarCommon.gtLclNum;
+ assert(varNum < compiler->lvaCount);
+
+ emit->emitIns_R_S(ins_Move_Extend(targetType, treeNode->InReg()), size, targetReg, varNum, offs);
+ }
+ genProduceReg(treeNode);
+ break;
+
+ case GT_STORE_LCL_FLD:
+ {
+ NYI_IF(targetType == TYP_STRUCT, "GT_STORE_LCL_FLD: struct store local field not supported");
+ noway_assert(!treeNode->InReg());
+
+ GenTreePtr op1 = treeNode->gtOp.gtOp1->gtEffectiveVal();
+ genConsumeIfReg(op1);
+ emit->emitInsBinary(ins_Store(targetType), emitTypeSize(treeNode), treeNode, op1);
+ }
+ break;
+
+ case GT_STORE_LCL_VAR:
+ {
+ NYI_IF(targetType == TYP_STRUCT, "struct store local not supported");
+
+ GenTreePtr op1 = treeNode->gtOp.gtOp1->gtEffectiveVal();
+ genConsumeIfReg(op1);
+ if (treeNode->gtRegNum == REG_NA)
+ {
+ // stack store
+ emit->emitInsMov(ins_Store(targetType), emitTypeSize(treeNode), treeNode);
+ compiler->lvaTable[treeNode->AsLclVarCommon()->gtLclNum].lvRegNum = REG_STK;
+ }
+ else if (op1->isContained())
+ {
+ // Currently, we assume that the contained source of a GT_STORE_LCL_VAR writing to a register
+ // must be a constant. However, in the future we might want to support a contained memory op.
+ // This is a bit tricky because we have to decide it's contained before register allocation,
+ // and this would be a case where, once that's done, we need to mark that node as always
+ // requiring a register - which we always assume now anyway, but once we "optimize" that
+ // we'll have to take cases like this into account.
+ assert((op1->gtRegNum == REG_NA) && op1->OperIsConst());
+ genSetRegToConst(treeNode->gtRegNum, targetType, op1);
+ }
+ else if (op1->gtRegNum != treeNode->gtRegNum)
+ {
+ assert(op1->gtRegNum != REG_NA);
+ emit->emitInsBinary(ins_Move_Extend(targetType, true), emitTypeSize(treeNode), treeNode, op1);
+ }
+ if (treeNode->gtRegNum != REG_NA)
+ genProduceReg(treeNode);
+ }
+ break;
+
+ case GT_RETFILT:
+ // A void GT_RETFILT is the end of a finally. For non-void filter returns we need to load the result in
+ // the return register, if it's not already there. The processing is the same as GT_RETURN.
+ if (targetType != TYP_VOID)
+ {
+ // For filters, the IL spec says the result is type int32. Further, the only specified legal values
+ // are 0 or 1, with the use of other values "undefined".
+ assert(targetType == TYP_INT);
+ }
+
+ __fallthrough;
+
+ case GT_RETURN:
+ {
+ GenTreePtr op1 = treeNode->gtOp.gtOp1;
+ if (targetType == TYP_VOID)
+ {
+ assert(op1 == nullptr);
+ break;
+ }
+ assert(op1 != nullptr);
+ op1 = op1->gtEffectiveVal();
+
+ NYI_IF(op1->gtRegNum == REG_NA, "GT_RETURN: return of a value not in register");
+ genConsumeReg(op1);
+
+ regNumber retReg = varTypeIsFloating(op1) ? REG_FLOATRET : REG_INTRET;
+ if (op1->gtRegNum != retReg)
+ {
+ inst_RV_RV(ins_Move_Extend(targetType, true), retReg, op1->gtRegNum, targetType);
+ }
+ }
+ break;
+
+ case GT_LEA:
+ {
+ // if we are here, it is the case where there is an LEA that cannot
+ // be folded into a parent instruction
+ GenTreeAddrMode* lea = treeNode->AsAddrMode();
+ genLeaInstruction(lea);
+ }
+ // genLeaInstruction calls genProduceReg()
+ break;
+
+ case GT_IND:
+ emit->emitInsMov(ins_Load(treeNode->TypeGet()), emitTypeSize(treeNode), treeNode);
+ genProduceReg(treeNode);
+ break;
+
+ case GT_MUL:
+ {
+ NYI("GT_MUL");
+ }
+ genProduceReg(treeNode);
+ break;
+
+ case GT_MOD:
+ case GT_UDIV:
+ case GT_UMOD:
+ // We shouldn't be seeing GT_MOD on float/double args as it should get morphed into a
+ // helper call by front-end. Similarly we shouldn't be seeing GT_UDIV and GT_UMOD
+ // on float/double args.
+ noway_assert(!varTypeIsFloating(treeNode));
+ __fallthrough;
+
+ case GT_DIV:
+ {
+ NYI("GT_DIV");
+ }
+ genProduceReg(treeNode);
+ break;
+
+ case GT_INTRINSIC:
+ {
+ NYI("GT_INTRINSIC");
+ }
+ genProduceReg(treeNode);
+ break;
+
+ case GT_EQ:
+ case GT_NE:
+ case GT_LT:
+ case GT_LE:
+ case GT_GE:
+ case GT_GT:
+ {
+ // TODO-ARM-CQ: Check if we can use the currently set flags.
+ // TODO-ARM-CQ: Check for the case where we can simply transfer the carry bit to a register
+ // (signed < or >= where targetReg != REG_NA)
+
+ GenTreeOp* tree = treeNode->AsOp();
+ GenTreePtr op1 = tree->gtOp1->gtEffectiveVal();
+ GenTreePtr op2 = tree->gtOp2->gtEffectiveVal();
+
+ genConsumeIfReg(op1);
+ genConsumeIfReg(op2);
+
+ instruction ins = INS_cmp;
+ emitAttr cmpAttr;
+ if (varTypeIsFloating(op1))
+ {
+ NYI("Floating point compare");
+
+ bool isUnordered = ((treeNode->gtFlags & GTF_RELOP_NAN_UN) != 0);
+ switch (tree->OperGet())
+ {
+ case GT_EQ:
+ ins = INS_beq;
+ case GT_NE:
+ ins = INS_bne;
+ case GT_LT:
+ ins = isUnordered ? INS_blt : INS_blo;
+ case GT_LE:
+ ins = isUnordered ? INS_ble : INS_bls;
+ case GT_GE:
+ ins = isUnordered ? INS_bpl : INS_bge;
+ case GT_GT:
+ ins = isUnordered ? INS_bhi : INS_bgt;
+ default:
+ unreached();
+ }
+ }
+ else
+ {
+ var_types op1Type = op1->TypeGet();
+ var_types op2Type = op2->TypeGet();
+ assert(!varTypeIsFloating(op2Type));
+ ins = INS_cmp;
+ if (op1Type == op2Type)
+ {
+ cmpAttr = emitTypeSize(op1Type);
+ }
+ else
+ {
+ var_types cmpType = TYP_INT;
+ bool op1Is64Bit = (varTypeIsLong(op1Type) || op1Type == TYP_REF);
+ bool op2Is64Bit = (varTypeIsLong(op2Type) || op2Type == TYP_REF);
+ NYI_IF(op1Is64Bit || op2Is64Bit, "Long compare");
+ assert(!op1->isContainedMemoryOp() || op1Type == op2Type);
+ assert(!op2->isContainedMemoryOp() || op1Type == op2Type);
+ cmpAttr = emitTypeSize(cmpType);
+ }
+ }
+ emit->emitInsBinary(ins, cmpAttr, op1, op2);
+
+ // Are we evaluating this into a register?
+ if (targetReg != REG_NA)
+ {
+ genSetRegToCond(targetReg, tree);
+ genProduceReg(tree);
+ }
+ }
+ break;
+
+ case GT_JTRUE:
+ {
+ GenTree* cmp = treeNode->gtOp.gtOp1->gtEffectiveVal();
+ assert(cmp->OperIsCompare());
+ assert(compiler->compCurBB->bbJumpKind == BBJ_COND);
+
+ // Get the "kind" and type of the comparison. Note that whether it is an unsigned cmp
+ // is governed by a flag NOT by the inherent type of the node
+ // TODO-ARM-CQ: Check if we can use the currently set flags.
+ CompareKind compareKind = ((cmp->gtFlags & GTF_UNSIGNED) != 0) ? CK_UNSIGNED : CK_SIGNED;
+
+ emitJumpKind jmpKind = genJumpKindForOper(cmp->gtOper, compareKind);
+ BasicBlock* jmpTarget = compiler->compCurBB->bbJumpDest;
+
+ inst_JMP(jmpKind, jmpTarget);
+ }
+ break;
+
+ case GT_RETURNTRAP:
+ {
+ // this is nothing but a conditional call to CORINFO_HELP_STOP_FOR_GC
+ // based on the contents of 'data'
+
+ GenTree* data = treeNode->gtOp.gtOp1->gtEffectiveVal();
+ genConsumeIfReg(data);
+ GenTreeIntCon cns = intForm(TYP_INT, 0);
+ emit->emitInsBinary(INS_cmp, emitTypeSize(TYP_INT), data, &cns);
+
+ BasicBlock* skipLabel = genCreateTempLabel();
+
+ emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
+ inst_JMP(jmpEqual, skipLabel);
+ // emit the call to the EE-helper that stops for GC (or other reasons)
+
+ genEmitHelperCall(CORINFO_HELP_STOP_FOR_GC, 0, EA_UNKNOWN);
+ genDefineTempLabel(skipLabel);
+ }
+ break;
+
+ case GT_STOREIND:
+ {
+ NYI("GT_STOREIND");
+ }
+ break;
+
+ case GT_COPY:
+ {
+ assert(treeNode->gtOp.gtOp1->IsLocal());
+ GenTreeLclVarCommon* lcl = treeNode->gtOp.gtOp1->AsLclVarCommon();
+ LclVarDsc* varDsc = &compiler->lvaTable[lcl->gtLclNum];
+ inst_RV_RV(ins_Move_Extend(targetType, true), targetReg, genConsumeReg(treeNode->gtOp.gtOp1), targetType,
+ emitTypeSize(targetType));
+
+ // The old location is dying
+ genUpdateRegLife(varDsc, /*isBorn*/ false, /*isDying*/ true DEBUGARG(treeNode->gtOp.gtOp1));
+
+ gcInfo.gcMarkRegSetNpt(genRegMask(treeNode->gtOp.gtOp1->gtRegNum));
+
+ genUpdateVarReg(varDsc, treeNode);
+
+ // The new location is going live
+ genUpdateRegLife(varDsc, /*isBorn*/ true, /*isDying*/ false DEBUGARG(treeNode));
+ }
+ genProduceReg(treeNode);
+ break;
+
+ case GT_LIST:
+ case GT_ARGPLACE:
+ // Nothing to do
+ break;
+
+ case GT_PUTARG_STK:
+ {
+ NYI_IF(targetType == TYP_STRUCT, "GT_PUTARG_STK: struct support not implemented");
+
+ // Get argument offset on stack.
+ // Here we cross check that argument offset hasn't changed from lowering to codegen since
+ // we are storing arg slot number in GT_PUTARG_STK node in lowering phase.
+ int argOffset = treeNode->AsPutArgStk()->gtSlotNum * TARGET_POINTER_SIZE;
+#ifdef DEBUG
+ fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(treeNode->AsPutArgStk()->gtCall, treeNode);
+ assert(curArgTabEntry);
+ assert(argOffset == (int)curArgTabEntry->slotNum * TARGET_POINTER_SIZE);
+#endif
+
+ GenTreePtr data = treeNode->gtOp.gtOp1->gtEffectiveVal();
+ if (data->isContained())
+ {
+ emit->emitIns_S_I(ins_Store(targetType), emitTypeSize(targetType), compiler->lvaOutgoingArgSpaceVar,
+ argOffset, (int)data->AsIntConCommon()->IconValue());
+ }
+ else
+ {
+ genConsumeReg(data);
+ emit->emitIns_S_R(ins_Store(targetType), emitTypeSize(targetType), data->gtRegNum,
+ compiler->lvaOutgoingArgSpaceVar, argOffset);
+ }
+ }
+ break;
+
+ case GT_PUTARG_REG:
+ {
+ NYI_IF(targetType == TYP_STRUCT, "GT_PUTARG_REG: struct support not implemented");
+
+ // commas show up here commonly, as part of a nullchk operation
+ GenTree* op1 = treeNode->gtOp.gtOp1->gtEffectiveVal();
+ // If child node is not already in the register we need, move it
+ genConsumeReg(op1);
+ if (treeNode->gtRegNum != op1->gtRegNum)
+ {
+ inst_RV_RV(ins_Move_Extend(targetType, true), treeNode->gtRegNum, op1->gtRegNum, targetType);
+ }
+ }
+ genProduceReg(treeNode);
+ break;
+
+ case GT_CALL:
+ genCallInstruction(treeNode);
+ break;
+
+ case GT_LOCKADD:
+ case GT_XCHG:
+ case GT_XADD:
+ genLockedInstructions(treeNode);
+ break;
+
+ case GT_CMPXCHG:
+ {
+ NYI("GT_CMPXCHG");
+ }
+ genProduceReg(treeNode);
+ break;
+
+ case GT_RELOAD:
+ // do nothing - reload is just a marker.
+ // The parent node will call genConsumeReg on this which will trigger the unspill of this node's child
+ // into the register specified in this node.
+ break;
+
+ case GT_NOP:
+ break;
+
+ case GT_NO_OP:
+ NYI("GT_NO_OP");
+ break;
+
+ case GT_ARR_BOUNDS_CHECK:
+ genRangeCheck(treeNode);
+ break;
+
+ case GT_PHYSREG:
+ if (treeNode->gtRegNum != treeNode->AsPhysReg()->gtSrcReg)
+ {
+ inst_RV_RV(INS_mov, treeNode->gtRegNum, treeNode->AsPhysReg()->gtSrcReg, targetType);
+
+ genTransferRegGCState(treeNode->gtRegNum, treeNode->AsPhysReg()->gtSrcReg);
+ }
+ break;
+
+ case GT_PHYSREGDST:
+ break;
+
+ case GT_NULLCHECK:
+ {
+ assert(!treeNode->gtOp.gtOp1->isContained());
+ regNumber reg = genConsumeReg(treeNode->gtOp.gtOp1);
+ emit->emitIns_AR_R(INS_cmp, EA_4BYTE, reg, reg, 0);
+ }
+ break;
+
+ case GT_CATCH_ARG:
+
+ noway_assert(handlerGetsXcptnObj(compiler->compCurBB->bbCatchTyp));
+
+ /* Catch arguments get passed in a register. genCodeForBBlist()
+ would have marked it as holding a GC object, but not used. */
+
+ noway_assert(gcInfo.gcRegGCrefSetCur & RBM_EXCEPTION_OBJECT);
+ genConsumeReg(treeNode);
+ break;
+
+ case GT_PINVOKE_PROLOG:
+ noway_assert(((gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur) & ~fullIntArgRegMask()) == 0);
+
+ // the runtime side requires the codegen here to be consistent
+ emit->emitDisableRandomNops();
+ break;
+
+ case GT_LABEL:
+ genPendingCallLabel = genCreateTempLabel();
+ treeNode->gtLabel.gtLabBB = genPendingCallLabel;
+ emit->emitIns_R_L(INS_lea, EA_PTRSIZE, genPendingCallLabel, treeNode->gtRegNum);
+ break;
+
+ default:
+ {
+#ifdef DEBUG
+ char message[256];
+ sprintf(message, "NYI: Unimplemented node type %s\n", GenTree::NodeName(treeNode->OperGet()));
+ notYetImplemented(message, __FILE__, __LINE__);
+#else
+ NYI("unimplemented node");
+#endif
+ }
+ break;
+ }
+}
+
+// generate code for the locked operations:
+// GT_LOCKADD, GT_XCHG, GT_XADD
+void CodeGen::genLockedInstructions(GenTree* treeNode)
+{
+ NYI("genLockedInstructions");
+}
+
+// generate code for GT_ARR_BOUNDS_CHECK node
+void CodeGen::genRangeCheck(GenTreePtr oper)
+{
+ noway_assert(oper->OperGet() == GT_ARR_BOUNDS_CHECK);
+ GenTreeBoundsChk* bndsChk = oper->AsBoundsChk();
+
+ GenTreePtr arrLen = bndsChk->gtArrLen->gtEffectiveVal();
+ GenTreePtr arrIdx = bndsChk->gtIndex->gtEffectiveVal();
+ GenTreePtr arrRef = NULL;
+ int lenOffset = 0;
+
+ GenTree * src1, *src2;
+ emitJumpKind jmpKind;
+
+ if (arrIdx->isContainedIntOrIImmed())
+ {
+ // To encode using a cmp immediate, we place the
+ // constant operand in the second position
+ src1 = arrLen;
+ src2 = arrIdx;
+ jmpKind = genJumpKindForOper(GT_LE, CK_UNSIGNED);
+ }
+ else
+ {
+ src1 = arrIdx;
+ src2 = arrLen;
+ jmpKind = genJumpKindForOper(GT_GE, CK_UNSIGNED);
+ }
+
+ genConsumeIfReg(src1);
+ genConsumeIfReg(src2);
+
+ getEmitter()->emitInsBinary(INS_cmp, emitAttr(TYP_INT), src1, src2);
+ genJumpToThrowHlpBlk(jmpKind, SCK_RNGCHK_FAIL, bndsChk->gtIndRngFailBB);
+}
+
+// make a temporary indir we can feed to pattern matching routines
+// in cases where we don't want to instantiate all the indirs that happen
+//
+GenTreeIndir CodeGen::indirForm(var_types type, GenTree* base)
+{
+ GenTreeIndir i(GT_IND, type, base, nullptr);
+ i.gtRegNum = REG_NA;
+ // has to be nonnull (because contained nodes can't be the last in block)
+ // but don't want it to be a valid pointer
+ i.gtNext = (GenTree*)(-1);
+ return i;
+}
+
+// make a temporary int we can feed to pattern matching routines
+// in cases where we don't want to instantiate
+//
+GenTreeIntCon CodeGen::intForm(var_types type, ssize_t value)
+{
+ GenTreeIntCon i(type, value);
+ i.gtRegNum = REG_NA;
+ // has to be nonnull (because contained nodes can't be the last in block)
+ // but don't want it to be a valid pointer
+ i.gtNext = (GenTree*)(-1);
+ return i;
+}
+
+instruction CodeGen::genGetInsForOper(genTreeOps oper, var_types type)
+{
+ instruction ins;
+
+ if (varTypeIsFloating(type))
+ return CodeGen::ins_MathOp(oper, type);
+
+ switch (oper)
+ {
+ case GT_ADD:
+ ins = INS_add;
+ break;
+ case GT_AND:
+ ins = INS_AND;
+ break;
+ case GT_MUL:
+ ins = INS_MUL;
+ break;
+ case GT_LSH:
+ ins = INS_SHIFT_LEFT_LOGICAL;
+ break;
+ case GT_NEG:
+ ins = INS_rsb;
+ break;
+ case GT_NOT:
+ ins = INS_NOT;
+ break;
+ case GT_OR:
+ ins = INS_OR;
+ break;
+ case GT_RSH:
+ ins = INS_SHIFT_RIGHT_ARITHM;
+ break;
+ case GT_RSZ:
+ ins = INS_SHIFT_RIGHT_LOGICAL;
+ break;
+ case GT_SUB:
+ ins = INS_sub;
+ break;
+ case GT_XOR:
+ ins = INS_XOR;
+ break;
+ default:
+ unreached();
+ break;
+ }
+ return ins;
+}
+
+//------------------------------------------------------------------------
+// genCodeForShift: Generates the code sequence for a GenTree node that
+// represents a bit shift or rotate operation (<<, >>, >>>, rol, ror).
+//
+// Arguments:
+// tree - the bit shift node (that specifies the type of bit shift to perform).
+//
+// Assumptions:
+// a) All GenTrees are register allocated.
+//
+void CodeGen::genCodeForShift(GenTreePtr tree)
+{
+ NYI("genCodeForShift");
+}
+
+void CodeGen::genUnspillRegIfNeeded(GenTree* tree)
+{
+ regNumber dstReg = tree->gtRegNum;
+
+ GenTree* unspillTree = tree;
+ if (tree->gtOper == GT_RELOAD)
+ {
+ unspillTree = tree->gtOp.gtOp1;
+ }
+ if (unspillTree->gtFlags & GTF_SPILLED)
+ {
+ if (genIsRegCandidateLocal(unspillTree))
+ {
+ // Reset spilled flag, since we are going to load a local variable from its home location.
+ unspillTree->gtFlags &= ~GTF_SPILLED;
+
+ // Load local variable from its home location.
+ inst_RV_TT(ins_Load(unspillTree->gtType), dstReg, unspillTree);
+
+ unspillTree->SetInReg();
+
+ GenTreeLclVarCommon* lcl = unspillTree->AsLclVarCommon();
+ LclVarDsc* varDsc = &compiler->lvaTable[lcl->gtLclNum];
+
+ // TODO-Review: We would like to call:
+ // genUpdateRegLife(varDsc, /*isBorn*/ true, /*isDying*/ false DEBUGARG(tree));
+ // instead of the following code, but this ends up hitting this assert:
+ // assert((regSet.rsMaskVars & regMask) == 0);
+ // due to issues with LSRA resolution moves.
+ // So, just force it for now. This probably indicates a condition that creates a GC hole!
+ //
+ // Extra note: I think we really want to call something like gcInfo.gcUpdateForRegVarMove,
+ // because the variable is not really going live or dead, but that method is somewhat poorly
+ // factored because it, in turn, updates rsMaskVars which is part of RegSet not GCInfo.
+ // TODO-Cleanup: This code exists in other CodeGen*.cpp files, and should be moved to CodeGenCommon.cpp.
+
+ genUpdateVarReg(varDsc, tree);
+#ifdef DEBUG
+ if (VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex))
+ {
+ JITDUMP("\t\t\t\t\t\t\tRemoving V%02u from gcVarPtrSetCur\n", lcl->gtLclNum);
+ }
+#endif // DEBUG
+ VarSetOps::RemoveElemD(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex);
+
+#ifdef DEBUG
+ if (compiler->verbose)
+ {
+ printf("\t\t\t\t\t\t\tV%02u in reg ", lcl->gtLclNum);
+ varDsc->PrintVarReg();
+ printf(" is becoming live ");
+ Compiler::printTreeID(unspillTree);
+ printf("\n");
+ }
+#endif // DEBUG
+
+ regSet.AddMaskVars(genGetRegMask(varDsc));
+ }
+ else
+ {
+ TempDsc* t = regSet.rsUnspillInPlace(unspillTree, unspillTree->gtRegNum);
+ compiler->tmpRlsTemp(t);
+ getEmitter()->emitIns_R_S(ins_Load(unspillTree->gtType), emitActualTypeSize(unspillTree->gtType), dstReg,
+ t->tdTempNum(), 0);
+
+ unspillTree->SetInReg();
+ }
+
+ gcInfo.gcMarkRegPtrVal(dstReg, unspillTree->TypeGet());
+ }
+}
+
+// do liveness update for a subnode that is being consumed by codegen
+regNumber CodeGen::genConsumeReg(GenTree* tree)
+{
+ genUnspillRegIfNeeded(tree);
+
+ // genUpdateLife() will also spill local var if marked as GTF_SPILL by calling CodeGen::genSpillVar
+ genUpdateLife(tree);
+ assert(tree->gtRegNum != REG_NA);
+
+ // there are three cases where consuming a reg means clearing the bit in the live mask
+ // 1. it was not produced by a local
+ // 2. it was produced by a local that is going dead
+ // 3. it was produced by a local that does not live in that reg (like one allocated on the stack)
+
+ if (genIsRegCandidateLocal(tree))
+ {
+ GenTreeLclVarCommon* lcl = tree->AsLclVarCommon();
+ LclVarDsc* varDsc = &compiler->lvaTable[lcl->GetLclNum()];
+
+ if (varDsc->lvRegNum == tree->gtRegNum && ((tree->gtFlags & GTF_VAR_DEATH) != 0))
+ {
+ gcInfo.gcMarkRegSetNpt(genRegMask(tree->gtRegNum));
+ }
+ else if (!varDsc->lvLRACandidate)
+ {
+ gcInfo.gcMarkRegSetNpt(genRegMask(tree->gtRegNum));
+ }
+ }
+ else
+ {
+ gcInfo.gcMarkRegSetNpt(genRegMask(tree->gtRegNum));
+ }
+
+ return tree->gtRegNum;
+}
+
+// Do liveness update for an address tree: one of GT_LEA, GT_LCL_VAR, or GT_CNS_INT (for call indirect).
+void CodeGen::genConsumeAddress(GenTree* addr)
+{
+ if (addr->OperGet() == GT_LEA)
+ {
+ genConsumeAddrMode(addr->AsAddrMode());
+ }
+ else
+ {
+ assert(!addr->isContained());
+ genConsumeReg(addr);
+ }
+}
+
+// do liveness update for a subnode that is being consumed by codegen
+void CodeGen::genConsumeAddrMode(GenTreeAddrMode* addr)
+{
+ if (addr->Base())
+ genConsumeReg(addr->Base());
+ if (addr->Index())
+ genConsumeReg(addr->Index());
+}
+
+// do liveness update for register produced by the current node in codegen
+void CodeGen::genProduceReg(GenTree* tree)
+{
+ if (tree->gtFlags & GTF_SPILL)
+ {
+ if (genIsRegCandidateLocal(tree))
+ {
+ // Store local variable to its home location.
+ tree->gtFlags &= ~GTF_REG_VAL;
+ inst_TT_RV(ins_Store(tree->gtType), tree, tree->gtRegNum);
+ }
+ else
+ {
+ tree->SetInReg();
+ regSet.rsSpillTree(tree->gtRegNum, tree);
+ tree->gtFlags |= GTF_SPILLED;
+ tree->gtFlags &= ~GTF_SPILL;
+ gcInfo.gcMarkRegSetNpt(genRegMask(tree->gtRegNum));
+ return;
+ }
+ }
+
+ genUpdateLife(tree);
+
+ // If we've produced a register, mark it as a pointer, as needed.
+ // Except in the case of a dead definition of a lclVar.
+ if (tree->gtHasReg() && (!tree->IsLocal() || (tree->gtFlags & GTF_VAR_DEATH) == 0))
+ {
+ gcInfo.gcMarkRegPtrVal(tree->gtRegNum, tree->TypeGet());
+ }
+ tree->SetInReg();
+}
+
+// transfer gc/byref status of src reg to dst reg
+void CodeGen::genTransferRegGCState(regNumber dst, regNumber src)
+{
+ regMaskTP srcMask = genRegMask(src);
+ regMaskTP dstMask = genRegMask(dst);
+
+ if (gcInfo.gcRegGCrefSetCur & srcMask)
+ {
+ gcInfo.gcMarkRegSetGCref(dstMask);
+ }
+ else if (gcInfo.gcRegByrefSetCur & srcMask)
+ {
+ gcInfo.gcMarkRegSetByref(dstMask);
+ }
+ else
+ {
+ gcInfo.gcMarkRegSetNpt(dstMask);
+ }
+}
+
+// Produce code for a GT_CALL node
+void CodeGen::genCallInstruction(GenTreePtr node)
+{
+ NYI("Call not implemented");
+}
+
+// produce code for a GT_LEA subnode
+void CodeGen::genLeaInstruction(GenTreeAddrMode* lea)
+{
+ if (lea->Base() && lea->Index())
+ {
+ regNumber baseReg = genConsumeReg(lea->Base());
+ regNumber indexReg = genConsumeReg(lea->Index());
+ getEmitter()->emitIns_R_ARX(INS_lea, EA_BYREF, lea->gtRegNum, baseReg, indexReg, lea->gtScale, lea->gtOffset);
+ }
+ else if (lea->Base())
+ {
+ getEmitter()->emitIns_R_AR(INS_lea, EA_BYREF, lea->gtRegNum, genConsumeReg(lea->Base()), lea->gtOffset);
+ }
+
+ genProduceReg(lea);
+}
+
+// Generate code to materialize a condition into a register
+// (the condition codes must already have been appropriately set)
+
+void CodeGen::genSetRegToCond(regNumber dstReg, GenTreePtr tree)
+{
+ NYI("genSetRegToCond");
+}
+
+//------------------------------------------------------------------------
+// genIntToIntCast: Generate code for an integer cast
+//
+// Arguments:
+// treeNode - The GT_CAST node
+//
+// Return Value:
+// None.
+//
+// Assumptions:
+// The treeNode must have an assigned register.
+// For a signed convert from byte, the source must be in a byte-addressable register.
+// Neither the source nor target type can be a floating point type.
+//
+void CodeGen::genIntToIntCast(GenTreePtr treeNode)
+{
+ NYI("Cast");
+}
+
+//------------------------------------------------------------------------
+// genFloatToFloatCast: Generate code for a cast between float and double
+//
+// Arguments:
+// treeNode - The GT_CAST node
+//
+// Return Value:
+// None.
+//
+// Assumptions:
+// Cast is a non-overflow conversion.
+// The treeNode must have an assigned register.
+// The cast is between float and double.
+//
+void CodeGen::genFloatToFloatCast(GenTreePtr treeNode)
+{
+ NYI("Cast");
+}
+
+//------------------------------------------------------------------------
+// genIntToFloatCast: Generate code to cast an int/long to float/double
+//
+// Arguments:
+// treeNode - The GT_CAST node
+//
+// Return Value:
+// None.
+//
+// Assumptions:
+// Cast is a non-overflow conversion.
+// The treeNode must have an assigned register.
+// SrcType= int32/uint32/int64/uint64 and DstType=float/double.
+//
+void CodeGen::genIntToFloatCast(GenTreePtr treeNode)
+{
+ NYI("Cast");
+}
+
+//------------------------------------------------------------------------
+// genFloatToIntCast: Generate code to cast float/double to int/long
+//
+// Arguments:
+// treeNode - The GT_CAST node
+//
+// Return Value:
+// None.
+//
+// Assumptions:
+// Cast is a non-overflow conversion.
+// The treeNode must have an assigned register.
+// SrcType=float/double and DstType= int32/uint32/int64/uint64
+//
+void CodeGen::genFloatToIntCast(GenTreePtr treeNode)
+{
+ NYI("Cast");
+}
+
+/*****************************************************************************
+ *
+ * Create and record GC Info for the function.
+ */
+#ifdef JIT32_GCENCODER
+void*
+#else
+void
+#endif
+CodeGen::genCreateAndStoreGCInfo(unsigned codeSize, unsigned prologSize, unsigned epilogSize DEBUGARG(void* codePtr))
+{
+#ifdef JIT32_GCENCODER
+ return genCreateAndStoreGCInfoJIT32(codeSize, prologSize, epilogSize DEBUGARG(codePtr));
+#else
+ genCreateAndStoreGCInfoX64(codeSize, prologSize DEBUGARG(codePtr));
+#endif
+}
+
+// TODO-ARM-Cleanup: It seems that the ARM JIT (classic and otherwise) uses this method, so it seems to be
+// inappropriately named?
+
+void CodeGen::genCreateAndStoreGCInfoX64(unsigned codeSize, unsigned prologSize DEBUGARG(void* codePtr))
+{
+ IAllocator* allowZeroAlloc = new (compiler, CMK_GC) AllowZeroAllocator(compiler->getAllocatorGC());
+ GcInfoEncoder* gcInfoEncoder = new (compiler, CMK_GC)
+ GcInfoEncoder(compiler->info.compCompHnd, compiler->info.compMethodInfo, allowZeroAlloc, NOMEM);
+ assert(gcInfoEncoder);
+
+ // Follow the code pattern of the x86 gc info encoder (genCreateAndStoreGCInfoJIT32).
+ gcInfo.gcInfoBlockHdrSave(gcInfoEncoder, codeSize, prologSize);
+
+ // First we figure out the encoder ID's for the stack slots and registers.
+ gcInfo.gcMakeRegPtrTable(gcInfoEncoder, codeSize, prologSize, GCInfo::MAKE_REG_PTR_MODE_ASSIGN_SLOTS);
+ // Now we've requested all the slots we'll need; "finalize" these (make more compact data structures for them).
+ gcInfoEncoder->FinalizeSlotIds();
+ // Now we can actually use those slot ID's to declare live ranges.
+ gcInfo.gcMakeRegPtrTable(gcInfoEncoder, codeSize, prologSize, GCInfo::MAKE_REG_PTR_MODE_DO_WORK);
+
+ gcInfoEncoder->Build();
+
+ // GC Encoder automatically puts the GC info in the right spot using ICorJitInfo::allocGCInfo(size_t)
+ // let's save the values anyway for debugging purposes
+ compiler->compInfoBlkAddr = gcInfoEncoder->Emit();
+ compiler->compInfoBlkSize = 0; // not exposed by the GCEncoder interface
+}
+
+/*****************************************************************************
+ * Emit a call to a helper function.
+ */
+
+void CodeGen::genEmitHelperCall(unsigned helper,
+ int argSize,
+ emitAttr retSize
+#ifndef LEGACY_BACKEND
+ ,
+ regNumber callTargetReg /*= REG_NA */
+#endif // !LEGACY_BACKEND
+ )
+{
+ NYI("Helper call");
+}
+
+/*****************************************************************************/
+#ifdef DEBUGGING_SUPPORT
+/*****************************************************************************
+ * genSetScopeInfo
+ *
+ * Called for every scope info piece to record by the main genSetScopeInfo()
+ */
+
+void CodeGen::genSetScopeInfo(unsigned which,
+ UNATIVE_OFFSET startOffs,
+ UNATIVE_OFFSET length,
+ unsigned varNum,
+ unsigned LVnum,
+ bool avail,
+ Compiler::siVarLoc& varLoc)
+{
+ /* We need to do some mapping while reporting back these variables */
+
+ unsigned ilVarNum = compiler->compMap2ILvarNum(varNum);
+ noway_assert((int)ilVarNum != ICorDebugInfo::UNKNOWN_ILNUM);
+
+ VarName name = nullptr;
+
+#ifdef DEBUG
+
+ for (unsigned scopeNum = 0; scopeNum < compiler->info.compVarScopesCount; scopeNum++)
+ {
+ if (LVnum == compiler->info.compVarScopes[scopeNum].vsdLVnum)
+ {
+ name = compiler->info.compVarScopes[scopeNum].vsdName;
+ }
+ }
+
+ // Hang on to this compiler->info.
+
+ TrnslLocalVarInfo& tlvi = genTrnslLocalVarInfo[which];
+
+ tlvi.tlviVarNum = ilVarNum;
+ tlvi.tlviLVnum = LVnum;
+ tlvi.tlviName = name;
+ tlvi.tlviStartPC = startOffs;
+ tlvi.tlviLength = length;
+ tlvi.tlviAvailable = avail;
+ tlvi.tlviVarLoc = varLoc;
+
+#endif // DEBUG
+
+ compiler->eeSetLVinfo(which, startOffs, length, ilVarNum, LVnum, name, avail, varLoc);
+}
+#endif // DEBUGGING_SUPPORT
+
+#endif // _TARGET_ARM_
+
+#endif // !LEGACY_BACKEND
diff --git a/src/jit/codegenarm64.cpp b/src/jit/codegenarm64.cpp
new file mode 100644
index 0000000000..ca0df53a34
--- /dev/null
+++ b/src/jit/codegenarm64.cpp
@@ -0,0 +1,9723 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX Arm64 Code Generator XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+#ifndef LEGACY_BACKEND // This file is ONLY used for the RyuJIT backend that uses the linear scan register allocator
+
+#ifdef _TARGET_ARM64_
+#include "emit.h"
+#include "codegen.h"
+#include "lower.h"
+#include "gcinfo.h"
+#include "gcinfoencoder.h"
+
+/*
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX Prolog / Epilog XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+//------------------------------------------------------------------------
+// genInstrWithConstant: we will typically generate one instruction
+//
+// ins reg1, reg2, imm
+//
+// However the imm might not fit as a directly encodable immediate,
+// when it doesn't fit we generate extra instruction(s) that sets up
+// the 'regTmp' with the proper immediate value.
+//
+// mov regTmp, imm
+// ins reg1, reg2, regTmp
+//
+// Arguments:
+// ins - instruction
+// attr - operation size and GC attribute
+// reg1, reg2 - first and second register operands
+// imm - immediate value (third operand when it fits)
+// tmpReg - temp register to use when the 'imm' doesn't fit
+// inUnwindRegion - true if we are in a prolog/epilog region with unwind codes
+//
+// Return Value:
+// returns true if the immediate was too large and tmpReg was used and modified.
+//
+bool CodeGen::genInstrWithConstant(instruction ins,
+ emitAttr attr,
+ regNumber reg1,
+ regNumber reg2,
+ ssize_t imm,
+ regNumber tmpReg,
+ bool inUnwindRegion /* = false */)
+{
+ bool immFitsInIns = false;
+ emitAttr size = EA_SIZE(attr);
+
+ // reg1 is usually a dest register
+ // reg2 is always source register
+ assert(tmpReg != reg2); // regTmp can not match any source register
+
+ switch (ins)
+ {
+ case INS_add:
+ case INS_sub:
+ if (imm < 0)
+ {
+ imm = -imm;
+ ins = (ins == INS_add) ? INS_sub : INS_add;
+ }
+ immFitsInIns = emitter::emitIns_valid_imm_for_add(imm, size);
+ break;
+
+ case INS_strb:
+ case INS_strh:
+ case INS_str:
+ // reg1 is a source register for store instructions
+ assert(tmpReg != reg1); // regTmp can not match any source register
+ immFitsInIns = emitter::emitIns_valid_imm_for_ldst_offset(imm, size);
+ break;
+
+ case INS_ldrsb:
+ case INS_ldrsh:
+ case INS_ldrsw:
+ case INS_ldrb:
+ case INS_ldrh:
+ case INS_ldr:
+ immFitsInIns = emitter::emitIns_valid_imm_for_ldst_offset(imm, size);
+ break;
+
+ default:
+ assert(!"Unexpected instruction in genInstrWithConstant");
+ break;
+ }
+
+ if (immFitsInIns)
+ {
+ // generate a single instruction that encodes the immediate directly
+ getEmitter()->emitIns_R_R_I(ins, attr, reg1, reg2, imm);
+ }
+ else
+ {
+ // caller can specify REG_NA for tmpReg, when it "knows" that the immediate will always fit
+ assert(tmpReg != REG_NA);
+
+ // generate two or more instructions
+
+ // first we load the immediate into tmpReg
+ instGen_Set_Reg_To_Imm(size, tmpReg, imm);
+ regTracker.rsTrackRegTrash(tmpReg);
+
+ // when we are in an unwind code region
+ // we record the extra instructions using unwindPadding()
+ if (inUnwindRegion)
+ {
+ compiler->unwindPadding();
+ }
+
+ // generate the instruction using a three register encoding with the immediate in tmpReg
+ getEmitter()->emitIns_R_R_R(ins, attr, reg1, reg2, tmpReg);
+ }
+ return immFitsInIns;
+}
+
+//------------------------------------------------------------------------
+// genStackPointerAdjustment: add a specified constant value to the stack pointer in either the prolog
+// or the epilog. The unwind codes for the generated instructions are produced. An available temporary
+// register is required to be specified, in case the constant is too large to encode in an "add"
+// instruction (or "sub" instruction if we choose to use one), such that we need to load the constant
+// into a register first, before using it.
+//
+// Arguments:
+// spDelta - the value to add to SP (can be negative)
+// tmpReg - an available temporary register
+// pTmpRegIsZero - If we use tmpReg, and pTmpRegIsZero is non-null, we set *pTmpRegIsZero to 'false'.
+// Otherwise, we don't touch it.
+//
+// Return Value:
+// None.
+
+void CodeGen::genStackPointerAdjustment(ssize_t spDelta, regNumber tmpReg, bool* pTmpRegIsZero)
+{
+ // Even though INS_add is specified here, the encoder will choose either
+ // an INS_add or an INS_sub and encode the immediate as a positive value
+ //
+ if (genInstrWithConstant(INS_add, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, spDelta, tmpReg, true))
+ {
+ if (pTmpRegIsZero != nullptr)
+ {
+ *pTmpRegIsZero = false;
+ }
+ }
+
+ // spDelta is negative in the prolog, positive in the epilog, but we always tell the unwind codes the positive
+ // value.
+ ssize_t spDeltaAbs = abs(spDelta);
+ unsigned unwindSpDelta = (unsigned)spDeltaAbs;
+ assert((ssize_t)unwindSpDelta == spDeltaAbs); // make sure that it fits in a unsigned
+
+ compiler->unwindAllocStack(unwindSpDelta);
+}
+
+//------------------------------------------------------------------------
+// genPrologSaveRegPair: Save a pair of general-purpose or floating-point/SIMD registers in a function or funclet
+// prolog. If possible, we use pre-indexed addressing to adjust SP and store the registers with a single instruction.
+// The caller must ensure that we can use the STP instruction, and that spOffset will be in the legal range for that
+// instruction.
+//
+// Arguments:
+// reg1 - First register of pair to save.
+// reg2 - Second register of pair to save.
+// spOffset - The offset from SP to store reg1 (must be positive or zero).
+// spDelta - If non-zero, the amount to add to SP before the register saves (must be negative or
+// zero).
+// lastSavedWasPreviousPair - True if the last prolog instruction was to save the previous register pair. This
+// allows us to emit the "save_next" unwind code.
+// tmpReg - An available temporary register. Needed for the case of large frames.
+// pTmpRegIsZero - If we use tmpReg, and pTmpRegIsZero is non-null, we set *pTmpRegIsZero to 'false'.
+// Otherwise, we don't touch it.
+//
+// Return Value:
+// None.
+
+void CodeGen::genPrologSaveRegPair(regNumber reg1,
+ regNumber reg2,
+ int spOffset,
+ int spDelta,
+ bool lastSavedWasPreviousPair,
+ regNumber tmpReg,
+ bool* pTmpRegIsZero)
+{
+ assert(spOffset >= 0);
+ assert(spDelta <= 0);
+ assert((spDelta % 16) == 0); // SP changes must be 16-byte aligned
+ assert(genIsValidFloatReg(reg1) == genIsValidFloatReg(reg2)); // registers must be both general-purpose, or both
+ // FP/SIMD
+
+ bool needToSaveRegs = true;
+ if (spDelta != 0)
+ {
+ if ((spOffset == 0) && (spDelta >= -512))
+ {
+ // We can use pre-indexed addressing.
+ // stp REG, REG + 1, [SP, #spDelta]!
+ // 64-bit STP offset range: -512 to 504, multiple of 8.
+ getEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, reg1, reg2, REG_SPBASE, spDelta, INS_OPTS_PRE_INDEX);
+ compiler->unwindSaveRegPairPreindexed(reg1, reg2, spDelta);
+
+ needToSaveRegs = false;
+ }
+ else // (spDelta < -512))
+ {
+ // We need to do SP adjustment separately from the store; we can't fold in a pre-indexed addressing and the
+ // non-zero offset.
+
+ // generate sub SP,SP,imm
+ genStackPointerAdjustment(spDelta, tmpReg, pTmpRegIsZero);
+ }
+ }
+
+ if (needToSaveRegs)
+ {
+ // stp REG, REG + 1, [SP, #offset]
+ // 64-bit STP offset range: -512 to 504, multiple of 8.
+ assert(spOffset <= 504);
+ getEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, reg1, reg2, REG_SPBASE, spOffset);
+
+ if (lastSavedWasPreviousPair)
+ {
+ // This works as long as we've only been saving pairs, in order, and we've saved the previous one just
+ // before this one.
+ compiler->unwindSaveNext();
+ }
+ else
+ {
+ compiler->unwindSaveRegPair(reg1, reg2, spOffset);
+ }
+ }
+}
+
+//------------------------------------------------------------------------
+// genPrologSaveReg: Like genPrologSaveRegPair, but for a single register. Save a single general-purpose or
+// floating-point/SIMD register in a function or funclet prolog. Note that if we wish to change SP (i.e., spDelta != 0),
+// then spOffset must be 8. This is because otherwise we would create an alignment hole above the saved register, not
+// below it, which we currently don't support. This restriction could be loosened if the callers change to handle it
+// (and this function changes to support using pre-indexed STR addressing). The caller must ensure that we can use the
+// STR instruction, and that spOffset will be in the legal range for that instruction.
+//
+// Arguments:
+// reg1 - Register to save.
+// spOffset - The offset from SP to store reg1 (must be positive or zero).
+// spDelta - If non-zero, the amount to add to SP before the register saves (must be negative or
+// zero).
+// tmpReg - An available temporary register. Needed for the case of large frames.
+// pTmpRegIsZero - If we use tmpReg, and pTmpRegIsZero is non-null, we set *pTmpRegIsZero to 'false'.
+// Otherwise, we don't touch it.
+//
+// Return Value:
+// None.
+
+void CodeGen::genPrologSaveReg(regNumber reg1, int spOffset, int spDelta, regNumber tmpReg, bool* pTmpRegIsZero)
+{
+ assert(spOffset >= 0);
+ assert(spDelta <= 0);
+ assert((spDelta % 16) == 0); // SP changes must be 16-byte aligned
+
+ if (spDelta != 0)
+ {
+ // generate sub SP,SP,imm
+ genStackPointerAdjustment(spDelta, tmpReg, pTmpRegIsZero);
+ }
+
+ // str REG, [SP, #offset]
+ // 64-bit STR offset range: 0 to 32760, multiple of 8.
+ getEmitter()->emitIns_R_R_I(INS_str, EA_PTRSIZE, reg1, REG_SPBASE, spOffset);
+ compiler->unwindSaveReg(reg1, spOffset);
+}
+
+//------------------------------------------------------------------------
+// genEpilogRestoreRegPair: This is the opposite of genPrologSaveRegPair(), run in the epilog instead of the prolog.
+// The stack pointer adjustment, if requested, is done after the register restore, using post-index addressing.
+// The caller must ensure that we can use the LDP instruction, and that spOffset will be in the legal range for that
+// instruction.
+//
+// Arguments:
+// reg1 - First register of pair to restore.
+// reg2 - Second register of pair to restore.
+// spOffset - The offset from SP to load reg1 (must be positive or zero).
+// spDelta - If non-zero, the amount to add to SP after the register restores (must be positive or
+// zero).
+// tmpReg - An available temporary register. Needed for the case of large frames.
+// pTmpRegIsZero - If we use tmpReg, and pTmpRegIsZero is non-null, we set *pTmpRegIsZero to 'false'.
+// Otherwise, we don't touch it.
+//
+// Return Value:
+// None.
+
+void CodeGen::genEpilogRestoreRegPair(
+ regNumber reg1, regNumber reg2, int spOffset, int spDelta, regNumber tmpReg, bool* pTmpRegIsZero)
+{
+ assert(spOffset >= 0);
+ assert(spDelta >= 0);
+ assert((spDelta % 16) == 0); // SP changes must be 16-byte aligned
+
+ if (spDelta != 0)
+ {
+ if ((spOffset == 0) && (spDelta <= 504))
+ {
+ // Fold the SP change into this instruction.
+ // ldp reg1, reg2, [SP], #spDelta
+ getEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, reg1, reg2, REG_SPBASE, spDelta, INS_OPTS_POST_INDEX);
+ compiler->unwindSaveRegPairPreindexed(reg1, reg2, -spDelta);
+ }
+ else // (spDelta > 504))
+ {
+ // Can't fold in the SP change; need to use a separate ADD instruction.
+
+ // ldp reg1, reg2, [SP, #offset]
+ getEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, reg1, reg2, REG_SPBASE, spOffset);
+ compiler->unwindSaveRegPair(reg1, reg2, spOffset);
+
+ // generate add SP,SP,imm
+ genStackPointerAdjustment(spDelta, tmpReg, pTmpRegIsZero);
+ }
+ }
+ else
+ {
+ // ldp reg1, reg2, [SP, #offset]
+ getEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, reg1, reg2, REG_SPBASE, spOffset);
+ compiler->unwindSaveRegPair(reg1, reg2, spOffset);
+ }
+}
+
+//------------------------------------------------------------------------
+// genEpilogRestoreReg: The opposite of genPrologSaveReg(), run in the epilog instead of the prolog.
+//
+// Arguments:
+// reg1 - Register to restore.
+// spOffset - The offset from SP to restore reg1 (must be positive or zero).
+// spDelta - If non-zero, the amount to add to SP after the register restores (must be positive or
+// zero).
+// tmpReg - An available temporary register. Needed for the case of large frames.
+// pTmpRegIsZero - If we use tmpReg, and pTmpRegIsZero is non-null, we set *pTmpRegIsZero to 'false'.
+// Otherwise, we don't touch it.
+//
+// Return Value:
+// None.
+
+void CodeGen::genEpilogRestoreReg(regNumber reg1, int spOffset, int spDelta, regNumber tmpReg, bool* pTmpRegIsZero)
+{
+ assert(spOffset >= 0);
+ assert(spDelta >= 0);
+ assert((spDelta % 16) == 0); // SP changes must be 16-byte aligned
+
+ // ldr reg1, [SP, #offset]
+ getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, reg1, REG_SPBASE, spOffset);
+ compiler->unwindSaveReg(reg1, spOffset);
+
+ if (spDelta != 0)
+ {
+ // generate add SP,SP,imm
+ genStackPointerAdjustment(spDelta, tmpReg, pTmpRegIsZero);
+ }
+}
+
+//------------------------------------------------------------------------
+// genSaveCalleeSavedRegistersHelp: Save the callee-saved registers in 'regsToSaveMask' to the stack frame
+// in the function or funclet prolog. The save set does not contain FP, since that is
+// guaranteed to be saved separately, so we can set up chaining. We can only use the instructions
+// that are allowed by the unwind codes. Integer registers are stored at lower addresses,
+// FP/SIMD registers are stored at higher addresses. There are no gaps. The caller ensures that
+// there is enough space on the frame to store these registers, and that the store instructions
+// we need to use (STR or STP) are encodable with the stack-pointer immediate offsets we need to
+// use. Note that the save set can contain LR if this is a frame without a frame pointer, in
+// which case LR is saved along with the other callee-saved registers. The caller can tell us
+// to fold in a stack pointer adjustment, which we will do with the first instruction. Note that
+// the stack pointer adjustment must be by a multiple of 16 to preserve the invariant that the
+// stack pointer is always 16 byte aligned. If we are saving an odd number of callee-saved
+// registers, though, we will have an empty aligment slot somewhere. It turns out we will put
+// it below (at a lower address) the callee-saved registers, as that is currently how we
+// do frame layout. This means that the first stack offset will be 8 and the stack pointer
+// adjustment must be done by a SUB, and not folded in to a pre-indexed store.
+//
+// Arguments:
+// regsToSaveMask - The mask of callee-saved registers to save. If empty, this function does nothing.
+// lowestCalleeSavedOffset - The offset from SP that is the beginning of the callee-saved register area. Note that
+// if non-zero spDelta, then this is the offset of the first save *after* that
+// SP adjustment.
+// spDelta - If non-zero, the amount to add to SP before the register saves (must be negative or
+// zero).
+//
+// Return Value:
+// None.
+
+void CodeGen::genSaveCalleeSavedRegistersHelp(regMaskTP regsToSaveMask, int lowestCalleeSavedOffset, int spDelta)
+{
+ assert(spDelta <= 0);
+ unsigned regsToSaveCount = genCountBits(regsToSaveMask);
+ if (regsToSaveCount == 0)
+ {
+ if (spDelta != 0)
+ {
+ // Currently this is the case for varargs only
+ // whose size is MAX_REG_ARG * REGSIZE_BYTES = 64 bytes.
+ genStackPointerAdjustment(spDelta, REG_NA, nullptr);
+ }
+ return;
+ }
+
+ assert((spDelta % 16) == 0);
+ assert((regsToSaveMask & RBM_FP) == 0); // we never save FP here
+ assert(regsToSaveCount <= genCountBits(RBM_CALLEE_SAVED | RBM_LR)); // We also save LR, even though it is not in
+ // RBM_CALLEE_SAVED.
+
+ regMaskTP maskSaveRegsFloat = regsToSaveMask & RBM_ALLFLOAT;
+ regMaskTP maskSaveRegsInt = regsToSaveMask & ~maskSaveRegsFloat;
+
+ int spOffset = lowestCalleeSavedOffset; // this is the offset *after* we change SP.
+
+ unsigned intRegsToSaveCount = genCountBits(maskSaveRegsInt);
+ unsigned floatRegsToSaveCount = genCountBits(maskSaveRegsFloat);
+ bool isPairSave = false;
+#ifdef DEBUG
+ bool isRegsToSaveCountOdd = ((intRegsToSaveCount + floatRegsToSaveCount) % 2 != 0);
+#endif
+
+ // Save the integer registers
+
+ bool lastSavedWasPair = false;
+
+ while (maskSaveRegsInt != RBM_NONE)
+ {
+ // If this is the first store that needs to change SP (spDelta != 0),
+ // then the offset must be 8 to account for alignment for the odd count
+ // or it must be 0 for the even count.
+ assert((spDelta == 0) || (isRegsToSaveCountOdd && spOffset == REGSIZE_BYTES) ||
+ (!isRegsToSaveCountOdd && spOffset == 0));
+
+ isPairSave = (intRegsToSaveCount >= 2);
+ regMaskTP reg1Mask = genFindLowestBit(maskSaveRegsInt);
+ regNumber reg1 = genRegNumFromMask(reg1Mask);
+ maskSaveRegsInt &= ~reg1Mask;
+ intRegsToSaveCount -= 1;
+
+ if (isPairSave)
+ {
+ // We can use a STP instruction.
+
+ regMaskTP reg2Mask = genFindLowestBit(maskSaveRegsInt);
+ regNumber reg2 = genRegNumFromMask(reg2Mask);
+ assert((reg2 == REG_NEXT(reg1)) || (reg2 == REG_LR));
+ maskSaveRegsInt &= ~reg2Mask;
+ intRegsToSaveCount -= 1;
+
+ genPrologSaveRegPair(reg1, reg2, spOffset, spDelta, lastSavedWasPair, REG_IP0, nullptr);
+
+ // TODO-ARM64-CQ: this code works in the prolog, but it's a bit weird to think about "next" when generating
+ // this epilog, to get the codes to match. Turn this off until that is better understood.
+ // lastSavedWasPair = true;
+
+ spOffset += 2 * REGSIZE_BYTES;
+ }
+ else
+ {
+ // No register pair; we use a STR instruction.
+
+ genPrologSaveReg(reg1, spOffset, spDelta, REG_IP0, nullptr);
+
+ lastSavedWasPair = false;
+ spOffset += REGSIZE_BYTES;
+ }
+
+ spDelta = 0; // We've now changed SP already, if necessary; don't do it again.
+ }
+
+ assert(intRegsToSaveCount == 0);
+
+ // Save the floating-point/SIMD registers
+
+ lastSavedWasPair = false;
+
+ while (maskSaveRegsFloat != RBM_NONE)
+ {
+ // If this is the first store that needs to change SP (spDelta != 0),
+ // then the offset must be 8 to account for alignment for the odd count
+ // or it must be 0 for the even count.
+ assert((spDelta == 0) || (isRegsToSaveCountOdd && spOffset == REGSIZE_BYTES) ||
+ (!isRegsToSaveCountOdd && spOffset == 0));
+
+ isPairSave = (floatRegsToSaveCount >= 2);
+ regMaskTP reg1Mask = genFindLowestBit(maskSaveRegsFloat);
+ regNumber reg1 = genRegNumFromMask(reg1Mask);
+ maskSaveRegsFloat &= ~reg1Mask;
+ floatRegsToSaveCount -= 1;
+
+ if (isPairSave)
+ {
+ // We can use a STP instruction.
+
+ regMaskTP reg2Mask = genFindLowestBit(maskSaveRegsFloat);
+ regNumber reg2 = genRegNumFromMask(reg2Mask);
+ assert(reg2 == REG_NEXT(reg1));
+ maskSaveRegsFloat &= ~reg2Mask;
+ floatRegsToSaveCount -= 1;
+
+ genPrologSaveRegPair(reg1, reg2, spOffset, spDelta, lastSavedWasPair, REG_IP0, nullptr);
+
+ // TODO-ARM64-CQ: this code works in the prolog, but it's a bit weird to think about "next" when generating
+ // this epilog, to get the codes to match. Turn this off until that is better understood.
+ // lastSavedWasPair = true;
+
+ spOffset += 2 * FPSAVE_REGSIZE_BYTES;
+ }
+ else
+ {
+ // No register pair; we use a STR instruction.
+
+ genPrologSaveReg(reg1, spOffset, spDelta, REG_IP0, nullptr);
+
+ lastSavedWasPair = false;
+ spOffset += FPSAVE_REGSIZE_BYTES;
+ }
+
+ spDelta = 0; // We've now changed SP already, if necessary; don't do it again.
+ }
+
+ assert(floatRegsToSaveCount == 0);
+}
+
+//------------------------------------------------------------------------
+// genRestoreCalleeSavedRegistersHelp: Restore the callee-saved registers in 'regsToRestoreMask' from the stack frame
+// in the function or funclet epilog. This exactly reverses the actions of genSaveCalleeSavedRegistersHelp().
+//
+// Arguments:
+// regsToRestoreMask - The mask of callee-saved registers to restore. If empty, this function does nothing.
+// lowestCalleeSavedOffset - The offset from SP that is the beginning of the callee-saved register area.
+// spDelta - If non-zero, the amount to add to SP after the register restores (must be positive or
+// zero).
+//
+// Here's an example restore sequence:
+// ldp x27, x28, [sp,#96]
+// ldp x25, x26, [sp,#80]
+// ldp x23, x24, [sp,#64]
+// ldp x21, x22, [sp,#48]
+// ldp x19, x20, [sp,#32]
+//
+// For the case of non-zero spDelta, we assume the base of the callee-save registers to restore is at SP, and
+// the last restore adjusts SP by the specified amount. For example:
+// ldp x27, x28, [sp,#64]
+// ldp x25, x26, [sp,#48]
+// ldp x23, x24, [sp,#32]
+// ldp x21, x22, [sp,#16]
+// ldp x19, x20, [sp], #80
+//
+// Note you call the unwind functions specifying the prolog operation that is being un-done. So, for example, when
+// generating a post-indexed load, you call the unwind function for specifying the corresponding preindexed store.
+//
+// Return Value:
+// None.
+
+void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, int lowestCalleeSavedOffset, int spDelta)
+{
+ assert(spDelta >= 0);
+ unsigned regsToRestoreCount = genCountBits(regsToRestoreMask);
+ if (regsToRestoreCount == 0)
+ {
+ if (spDelta != 0)
+ {
+ // Currently this is the case for varargs only
+ // whose size is MAX_REG_ARG * REGSIZE_BYTES = 64 bytes.
+ genStackPointerAdjustment(spDelta, REG_NA, nullptr);
+ }
+ return;
+ }
+
+ assert((spDelta % 16) == 0);
+ assert((regsToRestoreMask & RBM_FP) == 0); // we never restore FP here
+ assert(regsToRestoreCount <=
+ genCountBits(RBM_CALLEE_SAVED | RBM_LR)); // We also save LR, even though it is not in RBM_CALLEE_SAVED.
+
+ regMaskTP maskRestoreRegsFloat = regsToRestoreMask & RBM_ALLFLOAT;
+ regMaskTP maskRestoreRegsInt = regsToRestoreMask & ~maskRestoreRegsFloat;
+
+ assert(REGSIZE_BYTES == FPSAVE_REGSIZE_BYTES);
+ int spOffset = lowestCalleeSavedOffset + regsToRestoreCount * REGSIZE_BYTES; // Point past the end, to start. We
+ // predecrement to find the offset to
+ // load from.
+
+ unsigned floatRegsToRestoreCount = genCountBits(maskRestoreRegsFloat);
+ unsigned intRegsToRestoreCount = genCountBits(maskRestoreRegsInt);
+ int stackDelta = 0;
+ bool isPairRestore = false;
+ bool thisIsTheLastRestoreInstruction = false;
+#ifdef DEBUG
+ bool isRegsToRestoreCountOdd = ((floatRegsToRestoreCount + intRegsToRestoreCount) % 2 != 0);
+#endif
+
+ // We want to restore in the opposite order we saved, so the unwind codes match. Be careful to handle odd numbers of
+ // callee-saved registers properly.
+
+ // Restore the floating-point/SIMD registers
+
+ while (maskRestoreRegsFloat != RBM_NONE)
+ {
+ thisIsTheLastRestoreInstruction = (floatRegsToRestoreCount <= 2) && (maskRestoreRegsInt == RBM_NONE);
+ isPairRestore = (floatRegsToRestoreCount % 2) == 0;
+
+ // Update stack delta only if it is the last restore (the first save).
+ if (thisIsTheLastRestoreInstruction)
+ {
+ assert(stackDelta == 0);
+ stackDelta = spDelta;
+ }
+
+ // Update stack offset.
+ if (isPairRestore)
+ {
+ spOffset -= 2 * FPSAVE_REGSIZE_BYTES;
+ }
+ else
+ {
+ spOffset -= FPSAVE_REGSIZE_BYTES;
+ }
+
+ // If this is the last restore (the first save) that needs to change SP (stackDelta != 0),
+ // then the offset must be 8 to account for alignment for the odd count
+ // or it must be 0 for the even count.
+ assert((stackDelta == 0) || (isRegsToRestoreCountOdd && spOffset == FPSAVE_REGSIZE_BYTES) ||
+ (!isRegsToRestoreCountOdd && spOffset == 0));
+
+ regMaskTP reg2Mask = genFindHighestBit(maskRestoreRegsFloat);
+ regNumber reg2 = genRegNumFromMask(reg2Mask);
+ maskRestoreRegsFloat &= ~reg2Mask;
+ floatRegsToRestoreCount -= 1;
+
+ if (isPairRestore)
+ {
+ regMaskTP reg1Mask = genFindHighestBit(maskRestoreRegsFloat);
+ regNumber reg1 = genRegNumFromMask(reg1Mask);
+ maskRestoreRegsFloat &= ~reg1Mask;
+ floatRegsToRestoreCount -= 1;
+
+ genEpilogRestoreRegPair(reg1, reg2, spOffset, stackDelta, REG_IP0, nullptr);
+ }
+ else
+ {
+ genEpilogRestoreReg(reg2, spOffset, stackDelta, REG_IP0, nullptr);
+ }
+ }
+
+ assert(floatRegsToRestoreCount == 0);
+
+ // Restore the integer registers
+
+ while (maskRestoreRegsInt != RBM_NONE)
+ {
+ thisIsTheLastRestoreInstruction = (intRegsToRestoreCount <= 2);
+ isPairRestore = (intRegsToRestoreCount % 2) == 0;
+
+ // Update stack delta only if it is the last restore (the first save).
+ if (thisIsTheLastRestoreInstruction)
+ {
+ assert(stackDelta == 0);
+ stackDelta = spDelta;
+ }
+
+ // Update stack offset.
+ spOffset -= REGSIZE_BYTES;
+ if (isPairRestore)
+ {
+ spOffset -= REGSIZE_BYTES;
+ }
+
+ // If this is the last restore (the first save) that needs to change SP (stackDelta != 0),
+ // then the offset must be 8 to account for alignment for the odd count
+ // or it must be 0 for the even count.
+ assert((stackDelta == 0) || (isRegsToRestoreCountOdd && spOffset == REGSIZE_BYTES) ||
+ (!isRegsToRestoreCountOdd && spOffset == 0));
+
+ regMaskTP reg2Mask = genFindHighestBit(maskRestoreRegsInt);
+ regNumber reg2 = genRegNumFromMask(reg2Mask);
+ maskRestoreRegsInt &= ~reg2Mask;
+ intRegsToRestoreCount -= 1;
+
+ if (isPairRestore)
+ {
+ regMaskTP reg1Mask = genFindHighestBit(maskRestoreRegsInt);
+ regNumber reg1 = genRegNumFromMask(reg1Mask);
+ maskRestoreRegsInt &= ~reg1Mask;
+ intRegsToRestoreCount -= 1;
+
+ genEpilogRestoreRegPair(reg1, reg2, spOffset, stackDelta, REG_IP0, nullptr);
+ }
+ else
+ {
+ genEpilogRestoreReg(reg2, spOffset, stackDelta, REG_IP0, nullptr);
+ }
+ }
+
+ assert(intRegsToRestoreCount == 0);
+}
+
+// clang-format off
+/*****************************************************************************
+ *
+ * Generates code for an EH funclet prolog.
+ *
+ * Funclets have the following incoming arguments:
+ *
+ * catch: x0 = the exception object that was caught (see GT_CATCH_ARG)
+ * filter: x0 = the exception object to filter (see GT_CATCH_ARG), x1 = CallerSP of the containing function
+ * finally/fault: none
+ *
+ * Funclets set the following registers on exit:
+ *
+ * catch: x0 = the address at which execution should resume (see BBJ_EHCATCHRET)
+ * filter: x0 = non-zero if the handler should handle the exception, zero otherwise (see GT_RETFILT)
+ * finally/fault: none
+ *
+ * The ARM64 funclet prolog sequence is one of the following (Note: #framesz is total funclet frame size,
+ * including everything; #outsz is outgoing argument space. #framesz must be a multiple of 16):
+ *
+ * Frame type 1:
+ * For #outsz == 0 and #framesz <= 512:
+ * stp fp,lr,[sp,-#framesz]! ; establish the frame, save FP/LR
+ * stp x19,x20,[sp,#xxx] ; save callee-saved registers, as necessary
+ *
+ * The funclet frame is thus:
+ *
+ * | |
+ * |-----------------------|
+ * | incoming |
+ * | arguments |
+ * +=======================+ <---- Caller's SP
+ * |Callee saved registers | // multiple of 8 bytes
+ * |-----------------------|
+ * | PSP slot | // 8 bytes
+ * |-----------------------|
+ * ~ alignment padding ~ // To make the whole frame 16 byte aligned.
+ * |-----------------------|
+ * | Saved FP, LR | // 16 bytes
+ * |-----------------------| <---- Ambient SP
+ * | | |
+ * ~ | Stack grows ~
+ * | | downward |
+ * V
+ *
+ * Frame type 2:
+ * For #outsz != 0 and #framesz <= 512:
+ * sub sp,sp,#framesz ; establish the frame
+ * stp fp,lr,[sp,#outsz] ; save FP/LR.
+ * stp x19,x20,[sp,#xxx] ; save callee-saved registers, as necessary
+ *
+ * The funclet frame is thus:
+ *
+ * | |
+ * |-----------------------|
+ * | incoming |
+ * | arguments |
+ * +=======================+ <---- Caller's SP
+ * |Callee saved registers | // multiple of 8 bytes
+ * |-----------------------|
+ * | PSP slot | // 8 bytes
+ * |-----------------------|
+ * ~ alignment padding ~ // To make the whole frame 16 byte aligned.
+ * |-----------------------|
+ * | Saved FP, LR | // 16 bytes
+ * |-----------------------|
+ * | Outgoing arg space | // multiple of 8 bytes
+ * |-----------------------| <---- Ambient SP
+ * | | |
+ * ~ | Stack grows ~
+ * | | downward |
+ * V
+ *
+ * Frame type 3:
+ * For #framesz > 512:
+ * stp fp,lr,[sp,- (#framesz - #outsz)]! ; establish the frame, save FP/LR: note that it is guaranteed here that (#framesz - #outsz) <= 168
+ * stp x19,x20,[sp,#xxx] ; save callee-saved registers, as necessary
+ * sub sp,sp,#outsz ; create space for outgoing argument space
+ *
+ * The funclet frame is thus:
+ *
+ * | |
+ * |-----------------------|
+ * | incoming |
+ * | arguments |
+ * +=======================+ <---- Caller's SP
+ * |Callee saved registers | // multiple of 8 bytes
+ * |-----------------------|
+ * | PSP slot | // 8 bytes
+ * |-----------------------|
+ * ~ alignment padding ~ // To make the first SP subtraction 16 byte aligned
+ * |-----------------------|
+ * | Saved FP, LR | // 16 bytes
+ * |-----------------------|
+ * ~ alignment padding ~ // To make the whole frame 16 byte aligned (specifically, to 16-byte align the outgoing argument space).
+ * |-----------------------|
+ * | Outgoing arg space | // multiple of 8 bytes
+ * |-----------------------| <---- Ambient SP
+ * | | |
+ * ~ | Stack grows ~
+ * | | downward |
+ * V
+ *
+ * Both #1 and #2 only change SP once. That means that there will be a maximum of one alignment slot needed. For the general case, #3,
+ * it is possible that we will need to add alignment to both changes to SP, leading to 16 bytes of alignment. Remember that the stack
+ * pointer needs to be 16 byte aligned at all times. The size of the PSP slot plus callee-saved registers space is a maximum of 168 bytes:
+ * (1 PSP slot + 12 integer registers + 8 FP/SIMD registers) * 8 bytes. The outgoing argument size, however, can be very large, if we call a
+ * function that takes a large number of arguments (note that we currently use the same outgoing argument space size in the funclet as for the main
+ * function, even if the funclet doesn't have any calls, or has a much smaller, or larger, maximum number of outgoing arguments for any call).
+ * In that case, we need to 16-byte align the initial change to SP, before saving off the callee-saved registers and establishing the PSPsym,
+ * so we can use the limited immediate offset encodings we have available, before doing another 16-byte aligned SP adjustment to create the
+ * outgoing argument space. Both changes to SP might need to add alignment padding.
+ *
+ * Note that in all cases, the PSPSym is in exactly the same position with respect to Caller-SP, and that location is the same relative to Caller-SP
+ * as in the main function.
+ *
+ * ; After this header, fill the PSP slot, for use by the VM (it gets reported with the GC info), or by code generation of nested filters.
+ * ; This is not part of the "OS prolog"; it has no associated unwind data, and is not reversed in the funclet epilog.
+ *
+ * if (this is a filter funclet)
+ * {
+ * // x1 on entry to a filter funclet is CallerSP of the containing function:
+ * // either the main function, or the funclet for a handler that this filter is dynamically nested within.
+ * // Note that a filter can be dynamically nested within a funclet even if it is not statically within
+ * // a funclet. Consider:
+ * //
+ * // try {
+ * // try {
+ * // throw new Exception();
+ * // } catch(Exception) {
+ * // throw new Exception(); // The exception thrown here ...
+ * // }
+ * // } filter { // ... will be processed here, while the "catch" funclet frame is still on the stack
+ * // } filter-handler {
+ * // }
+ * //
+ * // Because of this, we need a PSP in the main function anytime a filter funclet doesn't know whether the enclosing frame will
+ * // be a funclet or main function. We won't know any time there is a filter protecting nested EH. To simplify, we just always
+ * // create a main function PSP for any function with a filter.
+ *
+ * ldr x1, [x1, #CallerSP_to_PSP_slot_delta] ; Load the CallerSP of the main function (stored in the PSP of the dynamically containing funclet or function)
+ * str x1, [sp, #SP_to_PSP_slot_delta] ; store the PSP
+ * add fp, x1, #Function_CallerSP_to_FP_delta ; re-establish the frame pointer
+ * }
+ * else
+ * {
+ * // This is NOT a filter funclet. The VM re-establishes the frame pointer on entry.
+ * // TODO-ARM64-CQ: if VM set x1 to CallerSP on entry, like for filters, we could save an instruction.
+ *
+ * add x3, fp, #Function_FP_to_CallerSP_delta ; compute the CallerSP, given the frame pointer. x3 is scratch.
+ * str x3, [sp, #SP_to_PSP_slot_delta] ; store the PSP
+ * }
+ *
+ * An example epilog sequence is then:
+ *
+ * add sp,sp,#outsz ; if any outgoing argument space
+ * ... ; restore callee-saved registers
+ * ldp x19,x20,[sp,#xxx]
+ * ldp fp,lr,[sp],#framesz
+ * ret lr
+ *
+ * The funclet frame is thus:
+ *
+ * | |
+ * |-----------------------|
+ * | incoming |
+ * | arguments |
+ * +=======================+ <---- Caller's SP
+ * |Callee saved registers | // multiple of 8 bytes
+ * |-----------------------|
+ * | PSP slot | // 8 bytes
+ * |-----------------------|
+ * | Saved FP, LR | // 16 bytes
+ * |-----------------------|
+ * ~ alignment padding ~ // To make the whole frame 16 byte aligned.
+ * |-----------------------|
+ * | Outgoing arg space | // multiple of 8 bytes
+ * |-----------------------| <---- Ambient SP
+ * | | |
+ * ~ | Stack grows ~
+ * | | downward |
+ * V
+ */
+// clang-format on
+
+void CodeGen::genFuncletProlog(BasicBlock* block)
+{
+#ifdef DEBUG
+ if (verbose)
+ printf("*************** In genFuncletProlog()\n");
+#endif
+
+ assert(block != NULL);
+ assert(block->bbFlags && BBF_FUNCLET_BEG);
+
+ ScopedSetVariable<bool> _setGeneratingProlog(&compiler->compGeneratingProlog, true);
+
+ gcInfo.gcResetForBB();
+
+ compiler->unwindBegProlog();
+
+ regMaskTP maskSaveRegsFloat = genFuncletInfo.fiSaveRegs & RBM_ALLFLOAT;
+ regMaskTP maskSaveRegsInt = genFuncletInfo.fiSaveRegs & ~maskSaveRegsFloat;
+
+ // Funclets must always save LR and FP, since when we have funclets we must have an FP frame.
+ assert((maskSaveRegsInt & RBM_LR) != 0);
+ assert((maskSaveRegsInt & RBM_FP) != 0);
+
+ bool isFilter = (block->bbCatchTyp == BBCT_FILTER);
+
+ regMaskTP maskArgRegsLiveIn;
+ if (isFilter)
+ {
+ maskArgRegsLiveIn = RBM_R0 | RBM_R1;
+ }
+ else if ((block->bbCatchTyp == BBCT_FINALLY) || (block->bbCatchTyp == BBCT_FAULT))
+ {
+ maskArgRegsLiveIn = RBM_NONE;
+ }
+ else
+ {
+ maskArgRegsLiveIn = RBM_R0;
+ }
+
+ int lowestCalleeSavedOffset = genFuncletInfo.fiSP_to_CalleeSave_delta;
+
+ if (genFuncletInfo.fiFrameType == 1)
+ {
+ getEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, genFuncletInfo.fiSpDelta1,
+ INS_OPTS_PRE_INDEX);
+ compiler->unwindSaveRegPairPreindexed(REG_FP, REG_LR, genFuncletInfo.fiSpDelta1);
+
+ assert(genFuncletInfo.fiSpDelta2 == 0);
+ assert(genFuncletInfo.fiSP_to_FPLR_save_delta == 0);
+ }
+ else if (genFuncletInfo.fiFrameType == 2)
+ {
+ // fiFrameType==2 constraints:
+ assert(genFuncletInfo.fiSpDelta1 < 0);
+ assert(genFuncletInfo.fiSpDelta1 >= -512);
+
+ // generate sub SP,SP,imm
+ genStackPointerAdjustment(genFuncletInfo.fiSpDelta1, REG_NA, nullptr);
+
+ assert(genFuncletInfo.fiSpDelta2 == 0);
+
+ getEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE,
+ genFuncletInfo.fiSP_to_FPLR_save_delta);
+ compiler->unwindSaveRegPair(REG_FP, REG_LR, genFuncletInfo.fiSP_to_FPLR_save_delta);
+ }
+ else
+ {
+ assert(genFuncletInfo.fiFrameType == 3);
+ getEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, genFuncletInfo.fiSpDelta1,
+ INS_OPTS_PRE_INDEX);
+ compiler->unwindSaveRegPairPreindexed(REG_FP, REG_LR, genFuncletInfo.fiSpDelta1);
+
+ lowestCalleeSavedOffset += genFuncletInfo.fiSpDelta2; // We haven't done the second adjustment of SP yet.
+ }
+ maskSaveRegsInt &= ~(RBM_LR | RBM_FP); // We've saved these now
+
+ genSaveCalleeSavedRegistersHelp(maskSaveRegsInt | maskSaveRegsFloat, lowestCalleeSavedOffset, 0);
+
+ if (genFuncletInfo.fiFrameType == 3)
+ {
+ // Note that genFuncletInfo.fiSpDelta2 is always a negative value
+ assert(genFuncletInfo.fiSpDelta2 < 0);
+
+ // generate sub SP,SP,imm
+ genStackPointerAdjustment(genFuncletInfo.fiSpDelta2, REG_R2, nullptr);
+ }
+
+ // This is the end of the OS-reported prolog for purposes of unwinding
+ compiler->unwindEndProlog();
+
+ if (isFilter)
+ {
+ // This is the first block of a filter
+ // Note that register x1 = CallerSP of the containing function
+ // X1 is overwritten by the first Load (new callerSP)
+ // X2 is scratch when we have a large constant offset
+
+ // Load the CallerSP of the main function (stored in the PSP of the dynamically containing funclet or function)
+ genInstrWithConstant(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_R1, REG_R1,
+ genFuncletInfo.fiCallerSP_to_PSP_slot_delta, REG_R2, false);
+ regTracker.rsTrackRegTrash(REG_R1);
+
+ // Store the PSP value (aka CallerSP)
+ genInstrWithConstant(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_R1, REG_SPBASE,
+ genFuncletInfo.fiSP_to_PSP_slot_delta, REG_R2, false);
+
+ // re-establish the frame pointer
+ genInstrWithConstant(INS_add, EA_PTRSIZE, REG_FPBASE, REG_R1, genFuncletInfo.fiFunction_CallerSP_to_FP_delta,
+ REG_R2, false);
+ }
+ else // This is a non-filter funclet
+ {
+ // X3 is scratch, X2 can also become scratch
+
+ // compute the CallerSP, given the frame pointer. x3 is scratch.
+ genInstrWithConstant(INS_add, EA_PTRSIZE, REG_R3, REG_FPBASE, -genFuncletInfo.fiFunction_CallerSP_to_FP_delta,
+ REG_R2, false);
+ regTracker.rsTrackRegTrash(REG_R3);
+
+ genInstrWithConstant(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_R3, REG_SPBASE,
+ genFuncletInfo.fiSP_to_PSP_slot_delta, REG_R2, false);
+ }
+}
+
+/*****************************************************************************
+ *
+ * Generates code for an EH funclet epilog.
+ */
+
+void CodeGen::genFuncletEpilog()
+{
+#ifdef DEBUG
+ if (verbose)
+ printf("*************** In genFuncletEpilog()\n");
+#endif
+
+ ScopedSetVariable<bool> _setGeneratingEpilog(&compiler->compGeneratingEpilog, true);
+
+ bool unwindStarted = false;
+
+ if (!unwindStarted)
+ {
+ // We can delay this until we know we'll generate an unwindable instruction, if necessary.
+ compiler->unwindBegEpilog();
+ unwindStarted = true;
+ }
+
+ regMaskTP maskRestoreRegsFloat = genFuncletInfo.fiSaveRegs & RBM_ALLFLOAT;
+ regMaskTP maskRestoreRegsInt = genFuncletInfo.fiSaveRegs & ~maskRestoreRegsFloat;
+
+ // Funclets must always save LR and FP, since when we have funclets we must have an FP frame.
+ assert((maskRestoreRegsInt & RBM_LR) != 0);
+ assert((maskRestoreRegsInt & RBM_FP) != 0);
+
+ maskRestoreRegsInt &= ~(RBM_LR | RBM_FP); // We restore FP/LR at the end
+
+ int lowestCalleeSavedOffset = genFuncletInfo.fiSP_to_CalleeSave_delta;
+
+ if (genFuncletInfo.fiFrameType == 3)
+ {
+ // Note that genFuncletInfo.fiSpDelta2 is always a negative value
+ assert(genFuncletInfo.fiSpDelta2 < 0);
+
+ // generate add SP,SP,imm
+ genStackPointerAdjustment(-genFuncletInfo.fiSpDelta2, REG_R2, nullptr);
+
+ lowestCalleeSavedOffset += genFuncletInfo.fiSpDelta2;
+ }
+
+ regMaskTP regsToRestoreMask = maskRestoreRegsInt | maskRestoreRegsFloat;
+ genRestoreCalleeSavedRegistersHelp(regsToRestoreMask, lowestCalleeSavedOffset, 0);
+
+ if (genFuncletInfo.fiFrameType == 1)
+ {
+ getEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, -genFuncletInfo.fiSpDelta1,
+ INS_OPTS_POST_INDEX);
+ compiler->unwindSaveRegPairPreindexed(REG_FP, REG_LR, genFuncletInfo.fiSpDelta1);
+
+ assert(genFuncletInfo.fiSpDelta2 == 0);
+ assert(genFuncletInfo.fiSP_to_FPLR_save_delta == 0);
+ }
+ else if (genFuncletInfo.fiFrameType == 2)
+ {
+ getEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE,
+ genFuncletInfo.fiSP_to_FPLR_save_delta);
+ compiler->unwindSaveRegPair(REG_FP, REG_LR, genFuncletInfo.fiSP_to_FPLR_save_delta);
+
+ // fiFrameType==2 constraints:
+ assert(genFuncletInfo.fiSpDelta1 < 0);
+ assert(genFuncletInfo.fiSpDelta1 >= -512);
+
+ // generate add SP,SP,imm
+ genStackPointerAdjustment(-genFuncletInfo.fiSpDelta1, REG_NA, nullptr);
+
+ assert(genFuncletInfo.fiSpDelta2 == 0);
+ }
+ else
+ {
+ assert(genFuncletInfo.fiFrameType == 3);
+
+ getEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, -genFuncletInfo.fiSpDelta1,
+ INS_OPTS_POST_INDEX);
+ compiler->unwindSaveRegPairPreindexed(REG_FP, REG_LR, genFuncletInfo.fiSpDelta1);
+ }
+
+ inst_RV(INS_ret, REG_LR, TYP_I_IMPL);
+ compiler->unwindReturn(REG_LR);
+
+ compiler->unwindEndEpilog();
+}
+
+/*****************************************************************************
+ *
+ * Capture the information used to generate the funclet prologs and epilogs.
+ * Note that all funclet prologs are identical, and all funclet epilogs are
+ * identical (per type: filters are identical, and non-filters are identical).
+ * Thus, we compute the data used for these just once.
+ *
+ * See genFuncletProlog() for more information about the prolog/epilog sequences.
+ */
+
+void CodeGen::genCaptureFuncletPrologEpilogInfo()
+{
+ if (!compiler->ehAnyFunclets())
+ return;
+
+ assert(isFramePointerUsed());
+ assert(compiler->lvaDoneFrameLayout == Compiler::FINAL_FRAME_LAYOUT); // The frame size and offsets must be
+ // finalized
+
+ genFuncletInfo.fiFunction_CallerSP_to_FP_delta = genCallerSPtoFPdelta();
+
+ regMaskTP rsMaskSaveRegs = regSet.rsMaskCalleeSaved;
+ assert((rsMaskSaveRegs & RBM_LR) != 0);
+ assert((rsMaskSaveRegs & RBM_FP) != 0);
+
+ unsigned saveRegsCount = genCountBits(rsMaskSaveRegs);
+ unsigned saveRegsPlusPSPSize = saveRegsCount * REGSIZE_BYTES + /* PSPSym */ REGSIZE_BYTES;
+ if (compiler->info.compIsVarArgs)
+ {
+ // For varargs we always save all of the integer register arguments
+ // so that they are contiguous with the incoming stack arguments.
+ saveRegsPlusPSPSize += MAX_REG_ARG * REGSIZE_BYTES;
+ }
+ unsigned saveRegsPlusPSPSizeAligned = (unsigned)roundUp(saveRegsPlusPSPSize, STACK_ALIGN);
+
+ assert(compiler->lvaOutgoingArgSpaceSize % REGSIZE_BYTES == 0);
+ unsigned outgoingArgSpaceAligned = (unsigned)roundUp(compiler->lvaOutgoingArgSpaceSize, STACK_ALIGN);
+
+ unsigned maxFuncletFrameSizeAligned = saveRegsPlusPSPSizeAligned + outgoingArgSpaceAligned;
+ assert((maxFuncletFrameSizeAligned % STACK_ALIGN) == 0);
+
+ int SP_to_FPLR_save_delta;
+ int SP_to_PSP_slot_delta;
+ int CallerSP_to_PSP_slot_delta;
+
+ if (maxFuncletFrameSizeAligned <= 512)
+ {
+ unsigned funcletFrameSize = saveRegsPlusPSPSize + compiler->lvaOutgoingArgSpaceSize;
+ unsigned funcletFrameSizeAligned = (unsigned)roundUp(funcletFrameSize, STACK_ALIGN);
+ assert(funcletFrameSizeAligned <= maxFuncletFrameSizeAligned);
+
+ unsigned funcletFrameAlignmentPad = funcletFrameSizeAligned - funcletFrameSize;
+ assert((funcletFrameAlignmentPad == 0) || (funcletFrameAlignmentPad == REGSIZE_BYTES));
+
+ SP_to_FPLR_save_delta = compiler->lvaOutgoingArgSpaceSize;
+ SP_to_PSP_slot_delta = SP_to_FPLR_save_delta + 2 /* FP, LR */ * REGSIZE_BYTES + funcletFrameAlignmentPad;
+ CallerSP_to_PSP_slot_delta = -(int)(saveRegsPlusPSPSize - 2 /* FP, LR */ * REGSIZE_BYTES);
+
+ if (compiler->lvaOutgoingArgSpaceSize == 0)
+ {
+ genFuncletInfo.fiFrameType = 1;
+ }
+ else
+ {
+ genFuncletInfo.fiFrameType = 2;
+ }
+ genFuncletInfo.fiSpDelta1 = -(int)funcletFrameSizeAligned;
+ genFuncletInfo.fiSpDelta2 = 0;
+
+ assert(genFuncletInfo.fiSpDelta1 + genFuncletInfo.fiSpDelta2 == -(int)funcletFrameSizeAligned);
+ }
+ else
+ {
+ unsigned saveRegsPlusPSPAlignmentPad = saveRegsPlusPSPSizeAligned - saveRegsPlusPSPSize;
+ assert((saveRegsPlusPSPAlignmentPad == 0) || (saveRegsPlusPSPAlignmentPad == REGSIZE_BYTES));
+
+ SP_to_FPLR_save_delta = outgoingArgSpaceAligned;
+ SP_to_PSP_slot_delta = SP_to_FPLR_save_delta + 2 /* FP, LR */ * REGSIZE_BYTES + saveRegsPlusPSPAlignmentPad;
+ CallerSP_to_PSP_slot_delta =
+ -(int)(saveRegsPlusPSPSizeAligned - 2 /* FP, LR */ * REGSIZE_BYTES - saveRegsPlusPSPAlignmentPad);
+
+ genFuncletInfo.fiFrameType = 3;
+ genFuncletInfo.fiSpDelta1 = -(int)saveRegsPlusPSPSizeAligned;
+ genFuncletInfo.fiSpDelta2 = -(int)outgoingArgSpaceAligned;
+
+ assert(genFuncletInfo.fiSpDelta1 + genFuncletInfo.fiSpDelta2 == -(int)maxFuncletFrameSizeAligned);
+ }
+
+ /* Now save it for future use */
+
+ genFuncletInfo.fiSaveRegs = rsMaskSaveRegs;
+ genFuncletInfo.fiSP_to_FPLR_save_delta = SP_to_FPLR_save_delta;
+ genFuncletInfo.fiSP_to_PSP_slot_delta = SP_to_PSP_slot_delta;
+ genFuncletInfo.fiSP_to_CalleeSave_delta = SP_to_PSP_slot_delta + REGSIZE_BYTES;
+ genFuncletInfo.fiCallerSP_to_PSP_slot_delta = CallerSP_to_PSP_slot_delta;
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\n");
+ printf("Funclet prolog / epilog info\n");
+ printf(" Save regs: ");
+ dspRegMask(genFuncletInfo.fiSaveRegs);
+ printf("\n");
+ printf(" Function CallerSP-to-FP delta: %d\n", genFuncletInfo.fiFunction_CallerSP_to_FP_delta);
+ printf(" SP to FP/LR save location delta: %d\n", genFuncletInfo.fiSP_to_FPLR_save_delta);
+ printf(" SP to PSP slot delta: %d\n", genFuncletInfo.fiSP_to_PSP_slot_delta);
+ printf(" SP to callee-saved area delta: %d\n", genFuncletInfo.fiSP_to_CalleeSave_delta);
+ printf(" Caller SP to PSP slot delta: %d\n", genFuncletInfo.fiCallerSP_to_PSP_slot_delta);
+ printf(" Frame type: %d\n", genFuncletInfo.fiFrameType);
+ printf(" SP delta 1: %d\n", genFuncletInfo.fiSpDelta1);
+ printf(" SP delta 2: %d\n", genFuncletInfo.fiSpDelta2);
+
+ if (CallerSP_to_PSP_slot_delta != compiler->lvaGetCallerSPRelativeOffset(compiler->lvaPSPSym)) // for debugging
+ {
+ printf("lvaGetCallerSPRelativeOffset(lvaPSPSym): %d\n",
+ compiler->lvaGetCallerSPRelativeOffset(compiler->lvaPSPSym));
+ }
+ }
+#endif // DEBUG
+
+ assert(genFuncletInfo.fiSP_to_FPLR_save_delta >= 0);
+ assert(genFuncletInfo.fiSP_to_PSP_slot_delta >= 0);
+ assert(genFuncletInfo.fiSP_to_CalleeSave_delta >= 0);
+ assert(genFuncletInfo.fiCallerSP_to_PSP_slot_delta <= 0);
+ assert(compiler->lvaPSPSym != BAD_VAR_NUM);
+ assert(genFuncletInfo.fiCallerSP_to_PSP_slot_delta ==
+ compiler->lvaGetCallerSPRelativeOffset(compiler->lvaPSPSym)); // same offset used in main function and
+ // funclet!
+}
+
+/*
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX End Prolog / Epilog XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+// Get the register assigned to the given node
+
+regNumber CodeGenInterface::genGetAssignedReg(GenTreePtr tree)
+{
+ return tree->gtRegNum;
+}
+
+//------------------------------------------------------------------------
+// genSpillVar: Spill a local variable
+//
+// Arguments:
+// tree - the lclVar node for the variable being spilled
+//
+// Return Value:
+// None.
+//
+// Assumptions:
+// The lclVar must be a register candidate (lvRegCandidate)
+
+void CodeGen::genSpillVar(GenTreePtr tree)
+{
+ unsigned varNum = tree->gtLclVarCommon.gtLclNum;
+ LclVarDsc* varDsc = &(compiler->lvaTable[varNum]);
+
+ assert(varDsc->lvIsRegCandidate());
+
+ // We don't actually need to spill if it is already living in memory
+ bool needsSpill = ((tree->gtFlags & GTF_VAR_DEF) == 0 && varDsc->lvIsInReg());
+ if (needsSpill)
+ {
+ var_types lclTyp = varDsc->TypeGet();
+ if (varDsc->lvNormalizeOnStore())
+ lclTyp = genActualType(lclTyp);
+ emitAttr size = emitTypeSize(lclTyp);
+
+ bool restoreRegVar = false;
+ if (tree->gtOper == GT_REG_VAR)
+ {
+ tree->SetOper(GT_LCL_VAR);
+ restoreRegVar = true;
+ }
+
+ // mask off the flag to generate the right spill code, then bring it back
+ tree->gtFlags &= ~GTF_REG_VAL;
+
+ instruction storeIns = ins_Store(tree->TypeGet(), compiler->isSIMDTypeLocalAligned(varNum));
+
+ assert(varDsc->lvRegNum == tree->gtRegNum);
+ inst_TT_RV(storeIns, tree, tree->gtRegNum, 0, size);
+
+ tree->gtFlags |= GTF_REG_VAL;
+
+ if (restoreRegVar)
+ {
+ tree->SetOper(GT_REG_VAR);
+ }
+
+ genUpdateRegLife(varDsc, /*isBorn*/ false, /*isDying*/ true DEBUGARG(tree));
+ gcInfo.gcMarkRegSetNpt(varDsc->lvRegMask());
+
+ if (VarSetOps::IsMember(compiler, gcInfo.gcTrkStkPtrLcls, varDsc->lvVarIndex))
+ {
+#ifdef DEBUG
+ if (!VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex))
+ {
+ JITDUMP("\t\t\t\t\t\t\tVar V%02u becoming live\n", varNum);
+ }
+ else
+ {
+ JITDUMP("\t\t\t\t\t\t\tVar V%02u continuing live\n", varNum);
+ }
+#endif
+ VarSetOps::AddElemD(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex);
+ }
+ }
+
+ tree->gtFlags &= ~GTF_SPILL;
+ varDsc->lvRegNum = REG_STK;
+ if (varTypeIsMultiReg(tree))
+ {
+ varDsc->lvOtherReg = REG_STK;
+ }
+}
+
+// inline
+void CodeGenInterface::genUpdateVarReg(LclVarDsc* varDsc, GenTreePtr tree)
+{
+ assert(tree->OperIsScalarLocal() || (tree->gtOper == GT_COPY));
+ varDsc->lvRegNum = tree->gtRegNum;
+}
+
+/*****************************************************************************/
+/*****************************************************************************/
+
+/*****************************************************************************
+ *
+ * Generate code that will set the given register to the integer constant.
+ */
+
+void CodeGen::genSetRegToIcon(regNumber reg, ssize_t val, var_types type, insFlags flags)
+{
+ // Reg cannot be a FP reg
+ assert(!genIsValidFloatReg(reg));
+
+ // The only TYP_REF constant that can come this path is a managed 'null' since it is not
+ // relocatable. Other ref type constants (e.g. string objects) go through a different
+ // code path.
+ noway_assert(type != TYP_REF || val == 0);
+
+ instGen_Set_Reg_To_Imm(emitActualTypeSize(type), reg, val, flags);
+}
+
+/*****************************************************************************
+ *
+ * Generate code to check that the GS cookie wasn't thrashed by a buffer
+ * overrun. On ARM64 we always use REG_TMP_0 and REG_TMP_1 as temp registers
+ * and this works fine in the case of tail calls
+ * Implementation Note: pushReg = true, in case of tail calls.
+ */
+void CodeGen::genEmitGSCookieCheck(bool pushReg)
+{
+ noway_assert(compiler->gsGlobalSecurityCookieAddr || compiler->gsGlobalSecurityCookieVal);
+
+ // Make sure that the return register is reported as live GC-ref so that any GC that kicks in while
+ // executing GS cookie check will not collect the object pointed to by REG_INTRET (R0).
+ if (!pushReg && (compiler->info.compRetType == TYP_REF))
+ gcInfo.gcRegGCrefSetCur |= RBM_INTRET;
+
+ regNumber regGSConst = REG_TMP_0;
+ regNumber regGSValue = REG_TMP_1;
+
+ if (compiler->gsGlobalSecurityCookieAddr == nullptr)
+ {
+ // load the GS cookie constant into a reg
+ //
+ genSetRegToIcon(regGSConst, compiler->gsGlobalSecurityCookieVal, TYP_I_IMPL);
+ }
+ else
+ {
+ // Ngen case - GS cookie constant needs to be accessed through an indirection.
+ instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, regGSConst, (ssize_t)compiler->gsGlobalSecurityCookieAddr);
+ getEmitter()->emitIns_R_R_I(ins_Load(TYP_I_IMPL), EA_PTRSIZE, regGSConst, regGSConst, 0);
+ }
+ // Load this method's GS value from the stack frame
+ getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, regGSValue, compiler->lvaGSSecurityCookie, 0);
+ // Compare with the GC cookie constant
+ getEmitter()->emitIns_R_R(INS_cmp, EA_PTRSIZE, regGSConst, regGSValue);
+
+ BasicBlock* gsCheckBlk = genCreateTempLabel();
+ emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
+ inst_JMP(jmpEqual, gsCheckBlk);
+ genEmitHelperCall(CORINFO_HELP_FAIL_FAST, 0, EA_UNKNOWN);
+ genDefineTempLabel(gsCheckBlk);
+}
+
+/*****************************************************************************
+ *
+ * Generate code for all the basic blocks in the function.
+ */
+
+void CodeGen::genCodeForBBlist()
+{
+ unsigned varNum;
+ LclVarDsc* varDsc;
+
+ unsigned savedStkLvl;
+
+#ifdef DEBUG
+ genInterruptibleUsed = true;
+
+ // You have to be careful if you create basic blocks from now on
+ compiler->fgSafeBasicBlockCreation = false;
+
+ // This stress mode is not comptible with fully interruptible GC
+ if (genInterruptible && compiler->opts.compStackCheckOnCall)
+ {
+ compiler->opts.compStackCheckOnCall = false;
+ }
+
+ // This stress mode is not comptible with fully interruptible GC
+ if (genInterruptible && compiler->opts.compStackCheckOnRet)
+ {
+ compiler->opts.compStackCheckOnRet = false;
+ }
+#endif // DEBUG
+
+ // Prepare the blocks for exception handling codegen: mark the blocks that needs labels.
+ genPrepForEHCodegen();
+
+ assert(!compiler->fgFirstBBScratch ||
+ compiler->fgFirstBB == compiler->fgFirstBBScratch); // compiler->fgFirstBBScratch has to be first.
+
+ /* Initialize the spill tracking logic */
+
+ regSet.rsSpillBeg();
+
+#ifdef DEBUGGING_SUPPORT
+ /* Initialize the line# tracking logic */
+
+ if (compiler->opts.compScopeInfo)
+ {
+ siInit();
+ }
+#endif
+
+ // The current implementation of switch tables requires the first block to have a label so it
+ // can generate offsets to the switch label targets.
+ // TODO-ARM64-CQ: remove this when switches have been re-implemented to not use this.
+ if (compiler->fgHasSwitch)
+ {
+ compiler->fgFirstBB->bbFlags |= BBF_JMP_TARGET;
+ }
+
+ genPendingCallLabel = nullptr;
+
+ /* Initialize the pointer tracking code */
+
+ gcInfo.gcRegPtrSetInit();
+ gcInfo.gcVarPtrSetInit();
+
+ /* If any arguments live in registers, mark those regs as such */
+
+ for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++)
+ {
+ /* Is this variable a parameter assigned to a register? */
+
+ if (!varDsc->lvIsParam || !varDsc->lvRegister)
+ continue;
+
+ /* Is the argument live on entry to the method? */
+
+ if (!VarSetOps::IsMember(compiler, compiler->fgFirstBB->bbLiveIn, varDsc->lvVarIndex))
+ continue;
+
+ /* Is this a floating-point argument? */
+
+ if (varDsc->IsFloatRegType())
+ continue;
+
+ noway_assert(!varTypeIsFloating(varDsc->TypeGet()));
+
+ /* Mark the register as holding the variable */
+
+ regTracker.rsTrackRegLclVar(varDsc->lvRegNum, varNum);
+ }
+
+ unsigned finallyNesting = 0;
+
+ // Make sure a set is allocated for compiler->compCurLife (in the long case), so we can set it to empty without
+ // allocation at the start of each basic block.
+ VarSetOps::AssignNoCopy(compiler, compiler->compCurLife, VarSetOps::MakeEmpty(compiler));
+
+ /*-------------------------------------------------------------------------
+ *
+ * Walk the basic blocks and generate code for each one
+ *
+ */
+
+ BasicBlock* block;
+ BasicBlock* lblk; /* previous block */
+
+ for (lblk = NULL, block = compiler->fgFirstBB; block != NULL; lblk = block, block = block->bbNext)
+ {
+#ifdef DEBUG
+ if (compiler->verbose)
+ {
+ printf("\n=============== Generating ");
+ block->dspBlockHeader(compiler, true, true);
+ compiler->fgDispBBLiveness(block);
+ }
+#endif // DEBUG
+
+ /* Figure out which registers hold variables on entry to this block */
+
+ regSet.ClearMaskVars();
+ gcInfo.gcRegGCrefSetCur = RBM_NONE;
+ gcInfo.gcRegByrefSetCur = RBM_NONE;
+
+ compiler->m_pLinearScan->recordVarLocationsAtStartOfBB(block);
+
+ genUpdateLife(block->bbLiveIn);
+
+ // Even if liveness didn't change, we need to update the registers containing GC references.
+ // genUpdateLife will update the registers live due to liveness changes. But what about registers that didn't
+ // change? We cleared them out above. Maybe we should just not clear them out, but update the ones that change
+ // here. That would require handling the changes in recordVarLocationsAtStartOfBB().
+
+ regMaskTP newLiveRegSet = RBM_NONE;
+ regMaskTP newRegGCrefSet = RBM_NONE;
+ regMaskTP newRegByrefSet = RBM_NONE;
+#ifdef DEBUG
+ VARSET_TP VARSET_INIT_NOCOPY(removedGCVars, VarSetOps::MakeEmpty(compiler));
+ VARSET_TP VARSET_INIT_NOCOPY(addedGCVars, VarSetOps::MakeEmpty(compiler));
+#endif
+ VARSET_ITER_INIT(compiler, iter, block->bbLiveIn, varIndex);
+ while (iter.NextElem(compiler, &varIndex))
+ {
+ unsigned varNum = compiler->lvaTrackedToVarNum[varIndex];
+ LclVarDsc* varDsc = &(compiler->lvaTable[varNum]);
+
+ if (varDsc->lvIsInReg())
+ {
+ newLiveRegSet |= varDsc->lvRegMask();
+ if (varDsc->lvType == TYP_REF)
+ {
+ newRegGCrefSet |= varDsc->lvRegMask();
+ }
+ else if (varDsc->lvType == TYP_BYREF)
+ {
+ newRegByrefSet |= varDsc->lvRegMask();
+ }
+#ifdef DEBUG
+ if (verbose && VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varIndex))
+ {
+ VarSetOps::AddElemD(compiler, removedGCVars, varIndex);
+ }
+#endif // DEBUG
+ VarSetOps::RemoveElemD(compiler, gcInfo.gcVarPtrSetCur, varIndex);
+ }
+ else if (compiler->lvaIsGCTracked(varDsc))
+ {
+#ifdef DEBUG
+ if (verbose && !VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varIndex))
+ {
+ VarSetOps::AddElemD(compiler, addedGCVars, varIndex);
+ }
+#endif // DEBUG
+ VarSetOps::AddElemD(compiler, gcInfo.gcVarPtrSetCur, varIndex);
+ }
+ }
+
+ regSet.rsMaskVars = newLiveRegSet;
+
+#ifdef DEBUG
+ if (compiler->verbose)
+ {
+ if (!VarSetOps::IsEmpty(compiler, addedGCVars))
+ {
+ printf("\t\t\t\t\t\t\tAdded GCVars: ");
+ dumpConvertedVarSet(compiler, addedGCVars);
+ printf("\n");
+ }
+ if (!VarSetOps::IsEmpty(compiler, removedGCVars))
+ {
+ printf("\t\t\t\t\t\t\tRemoved GCVars: ");
+ dumpConvertedVarSet(compiler, removedGCVars);
+ printf("\n");
+ }
+ }
+#endif // DEBUG
+
+ gcInfo.gcMarkRegSetGCref(newRegGCrefSet DEBUGARG(true));
+ gcInfo.gcMarkRegSetByref(newRegByrefSet DEBUGARG(true));
+
+ /* Blocks with handlerGetsXcptnObj()==true use GT_CATCH_ARG to
+ represent the exception object (TYP_REF).
+ We mark REG_EXCEPTION_OBJECT as holding a GC object on entry
+ to the block, it will be the first thing evaluated
+ (thanks to GTF_ORDER_SIDEEFF).
+ */
+
+ if (handlerGetsXcptnObj(block->bbCatchTyp))
+ {
+ for (GenTree* node : LIR::AsRange(block))
+ {
+ if (node->OperGet() == GT_CATCH_ARG)
+ {
+ gcInfo.gcMarkRegSetGCref(RBM_EXCEPTION_OBJECT);
+ break;
+ }
+ }
+ }
+
+ /* Start a new code output block */
+
+ genUpdateCurrentFunclet(block);
+
+#ifdef _TARGET_XARCH_
+ if (genAlignLoops && block->bbFlags & BBF_LOOP_HEAD)
+ {
+ getEmitter()->emitLoopAlign();
+ }
+#endif
+
+#ifdef DEBUG
+ if (compiler->opts.dspCode)
+ printf("\n L_M%03u_BB%02u:\n", Compiler::s_compMethodsCount, block->bbNum);
+#endif
+
+ block->bbEmitCookie = NULL;
+
+ if (block->bbFlags & (BBF_JMP_TARGET | BBF_HAS_LABEL))
+ {
+ /* Mark a label and update the current set of live GC refs */
+
+ block->bbEmitCookie = getEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
+ gcInfo.gcRegByrefSetCur, FALSE);
+ }
+
+ if (block == compiler->fgFirstColdBlock)
+ {
+#ifdef DEBUG
+ if (compiler->verbose)
+ {
+ printf("\nThis is the start of the cold region of the method\n");
+ }
+#endif
+ // We should never have a block that falls through into the Cold section
+ noway_assert(!lblk->bbFallsThrough());
+
+ // We require the block that starts the Cold section to have a label
+ noway_assert(block->bbEmitCookie);
+ getEmitter()->emitSetFirstColdIGCookie(block->bbEmitCookie);
+ }
+
+ /* Both stacks are always empty on entry to a basic block */
+
+ genStackLevel = 0;
+
+ savedStkLvl = genStackLevel;
+
+ /* Tell everyone which basic block we're working on */
+
+ compiler->compCurBB = block;
+
+#ifdef DEBUGGING_SUPPORT
+ siBeginBlock(block);
+
+ // BBF_INTERNAL blocks don't correspond to any single IL instruction.
+ if (compiler->opts.compDbgInfo && (block->bbFlags & BBF_INTERNAL) &&
+ !compiler->fgBBisScratch(block)) // If the block is the distinguished first scratch block, then no need to
+ // emit a NO_MAPPING entry, immediately after the prolog.
+ {
+ genIPmappingAdd((IL_OFFSETX)ICorDebugInfo::NO_MAPPING, true);
+ }
+
+ bool firstMapping = true;
+#endif // DEBUGGING_SUPPORT
+
+ /*---------------------------------------------------------------------
+ *
+ * Generate code for each statement-tree in the block
+ *
+ */
+
+ if (block->bbFlags & BBF_FUNCLET_BEG)
+ {
+ genReserveFuncletProlog(block);
+ }
+
+ // Clear compCurStmt and compCurLifeTree.
+ compiler->compCurStmt = nullptr;
+ compiler->compCurLifeTree = nullptr;
+
+ // Traverse the block in linear order, generating code for each node as we
+ // as we encounter it.
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef DEBUGGING_SUPPORT
+ IL_OFFSETX currentILOffset = BAD_IL_OFFSET;
+#endif
+ for (GenTree* node : LIR::AsRange(block).NonPhiNodes())
+ {
+#ifdef DEBUGGING_SUPPORT
+ // Do we have a new IL offset?
+ if (node->OperGet() == GT_IL_OFFSET)
+ {
+ genEnsureCodeEmitted(currentILOffset);
+ currentILOffset = node->gtStmt.gtStmtILoffsx;
+ genIPmappingAdd(currentILOffset, firstMapping);
+ firstMapping = false;
+ }
+#endif // DEBUGGING_SUPPORT
+
+#ifdef DEBUG
+ if (node->OperGet() == GT_IL_OFFSET)
+ {
+ noway_assert(node->gtStmt.gtStmtLastILoffs <= compiler->info.compILCodeSize ||
+ node->gtStmt.gtStmtLastILoffs == BAD_IL_OFFSET);
+
+ if (compiler->opts.dspCode && compiler->opts.dspInstrs &&
+ node->gtStmt.gtStmtLastILoffs != BAD_IL_OFFSET)
+ {
+ while (genCurDispOffset <= node->gtStmt.gtStmtLastILoffs)
+ {
+ genCurDispOffset += dumpSingleInstr(compiler->info.compCode, genCurDispOffset, "> ");
+ }
+ }
+ }
+#endif // DEBUG
+
+ genCodeForTreeNode(node);
+ if (node->gtHasReg() && node->gtLsraInfo.isLocalDefUse)
+ {
+ genConsumeReg(node);
+ }
+ } // end for each node in block
+
+#ifdef DEBUG
+ // The following set of register spill checks and GC pointer tracking checks used to be
+ // performed at statement boundaries. Now, with LIR, there are no statements, so they are
+ // performed at the end of each block.
+ // TODO: could these checks be performed more frequently? E.g., at each location where
+ // the register allocator says there are no live non-variable registers. Perhaps this could
+ // be done by (a) keeping a running count of live non-variable registers by using
+ // gtLsraInfo.srcCount and gtLsraInfo.dstCount to decrement and increment the count, respectively,
+ // and running the checks when the count is zero. Or, (b) use the map maintained by LSRA
+ // (operandToLocationInfoMap) to mark a node somehow when, after the execution of that node,
+ // there will be no live non-variable registers.
+
+ regSet.rsSpillChk();
+
+ /* Make sure we didn't bungle pointer register tracking */
+
+ regMaskTP ptrRegs = gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur;
+ regMaskTP nonVarPtrRegs = ptrRegs & ~regSet.rsMaskVars;
+
+ // If return is a GC-type, clear it. Note that if a common
+ // epilog is generated (genReturnBB) it has a void return
+ // even though we might return a ref. We can't use the compRetType
+ // as the determiner because something we are tracking as a byref
+ // might be used as a return value of a int function (which is legal)
+ GenTree* blockLastNode = block->lastNode();
+ if ((blockLastNode != nullptr) && (blockLastNode->gtOper == GT_RETURN) &&
+ (varTypeIsGC(compiler->info.compRetType) ||
+ (blockLastNode->gtOp.gtOp1 != nullptr && varTypeIsGC(blockLastNode->gtOp.gtOp1->TypeGet()))))
+ {
+ nonVarPtrRegs &= ~RBM_INTRET;
+ }
+
+ if (nonVarPtrRegs)
+ {
+ printf("Regset after BB%02u gcr=", block->bbNum);
+ printRegMaskInt(gcInfo.gcRegGCrefSetCur & ~regSet.rsMaskVars);
+ compiler->getEmitter()->emitDispRegSet(gcInfo.gcRegGCrefSetCur & ~regSet.rsMaskVars);
+ printf(", byr=");
+ printRegMaskInt(gcInfo.gcRegByrefSetCur & ~regSet.rsMaskVars);
+ compiler->getEmitter()->emitDispRegSet(gcInfo.gcRegByrefSetCur & ~regSet.rsMaskVars);
+ printf(", regVars=");
+ printRegMaskInt(regSet.rsMaskVars);
+ compiler->getEmitter()->emitDispRegSet(regSet.rsMaskVars);
+ printf("\n");
+ }
+
+ noway_assert(nonVarPtrRegs == RBM_NONE);
+#endif // DEBUG
+
+#if defined(DEBUG) && defined(_TARGET_ARM64_)
+ if (block->bbNext == nullptr)
+ {
+ // Unit testing of the ARM64 emitter: generate a bunch of instructions into the last block
+ // (it's as good as any, but better than the prolog, which can only be a single instruction
+ // group) then use COMPlus_JitLateDisasm=* to see if the late disassembler
+ // thinks the instructions are the same as we do.
+ genArm64EmitterUnitTests();
+ }
+#endif // defined(DEBUG) && defined(_TARGET_ARM64_)
+
+#ifdef DEBUGGING_SUPPORT
+ // It is possible to reach the end of the block without generating code for the current IL offset.
+ // For example, if the following IR ends the current block, no code will have been generated for
+ // offset 21:
+ //
+ // ( 0, 0) [000040] ------------ il_offset void IL offset: 21
+ //
+ // N001 ( 0, 0) [000039] ------------ nop void
+ //
+ // This can lead to problems when debugging the generated code. To prevent these issues, make sure
+ // we've generated code for the last IL offset we saw in the block.
+ genEnsureCodeEmitted(currentILOffset);
+
+ if (compiler->opts.compScopeInfo && (compiler->info.compVarScopesCount > 0))
+ {
+ siEndBlock(block);
+
+ /* Is this the last block, and are there any open scopes left ? */
+
+ bool isLastBlockProcessed = (block->bbNext == NULL);
+ if (block->isBBCallAlwaysPair())
+ {
+ isLastBlockProcessed = (block->bbNext->bbNext == NULL);
+ }
+
+ if (isLastBlockProcessed && siOpenScopeList.scNext)
+ {
+ /* This assert no longer holds, because we may insert a throw
+ block to demarcate the end of a try or finally region when they
+ are at the end of the method. It would be nice if we could fix
+ our code so that this throw block will no longer be necessary. */
+
+ // noway_assert(block->bbCodeOffsEnd != compiler->info.compILCodeSize);
+
+ siCloseAllOpenScopes();
+ }
+ }
+
+#endif // DEBUGGING_SUPPORT
+
+ genStackLevel -= savedStkLvl;
+
+#ifdef DEBUG
+ // compCurLife should be equal to the liveOut set, except that we don't keep
+ // it up to date for vars that are not register candidates
+ // (it would be nice to have a xor set function)
+
+ VARSET_TP VARSET_INIT_NOCOPY(extraLiveVars, VarSetOps::Diff(compiler, block->bbLiveOut, compiler->compCurLife));
+ VarSetOps::UnionD(compiler, extraLiveVars, VarSetOps::Diff(compiler, compiler->compCurLife, block->bbLiveOut));
+ VARSET_ITER_INIT(compiler, extraLiveVarIter, extraLiveVars, extraLiveVarIndex);
+ while (extraLiveVarIter.NextElem(compiler, &extraLiveVarIndex))
+ {
+ unsigned varNum = compiler->lvaTrackedToVarNum[extraLiveVarIndex];
+ LclVarDsc* varDsc = compiler->lvaTable + varNum;
+ assert(!varDsc->lvIsRegCandidate());
+ }
+#endif
+
+ /* Both stacks should always be empty on exit from a basic block */
+
+ noway_assert(genStackLevel == 0);
+
+#if 0
+ // On AMD64, we need to generate a NOP after a call that is the last instruction of the block, in several
+ // situations, to support proper exception handling semantics. This is mostly to ensure that when the stack
+ // walker computes an instruction pointer for a frame, that instruction pointer is in the correct EH region.
+ // The document "X64 and ARM ABIs.docx" has more details. The situations:
+ // 1. If the call instruction is in a different EH region as the instruction that follows it.
+ // 2. If the call immediately precedes an OS epilog. (Note that what the JIT or VM consider an epilog might
+ // be slightly different from what the OS considers an epilog, and it is the OS-reported epilog that matters here.)
+ // We handle case #1 here, and case #2 in the emitter.
+ if (getEmitter()->emitIsLastInsCall())
+ {
+ // Ok, the last instruction generated is a call instruction. Do any of the other conditions hold?
+ // Note: we may be generating a few too many NOPs for the case of call preceding an epilog. Technically,
+ // if the next block is a BBJ_RETURN, an epilog will be generated, but there may be some instructions
+ // generated before the OS epilog starts, such as a GS cookie check.
+ if ((block->bbNext == nullptr) ||
+ !BasicBlock::sameEHRegion(block, block->bbNext))
+ {
+ // We only need the NOP if we're not going to generate any more code as part of the block end.
+
+ switch (block->bbJumpKind)
+ {
+ case BBJ_ALWAYS:
+ case BBJ_THROW:
+ case BBJ_CALLFINALLY:
+ case BBJ_EHCATCHRET:
+ // We're going to generate more code below anyway, so no need for the NOP.
+
+ case BBJ_RETURN:
+ case BBJ_EHFINALLYRET:
+ case BBJ_EHFILTERRET:
+ // These are the "epilog follows" case, handled in the emitter.
+
+ break;
+
+ case BBJ_NONE:
+ if (block->bbNext == nullptr)
+ {
+ // Call immediately before the end of the code; we should never get here .
+ instGen(INS_BREAKPOINT); // This should never get executed
+ }
+ else
+ {
+ // We need the NOP
+ instGen(INS_nop);
+ }
+ break;
+
+ case BBJ_COND:
+ case BBJ_SWITCH:
+ // These can't have a call as the last instruction!
+
+ default:
+ noway_assert(!"Unexpected bbJumpKind");
+ break;
+ }
+ }
+ }
+#endif // 0
+
+ /* Do we need to generate a jump or return? */
+
+ switch (block->bbJumpKind)
+ {
+ case BBJ_ALWAYS:
+ inst_JMP(EJ_jmp, block->bbJumpDest);
+ break;
+
+ case BBJ_RETURN:
+ genExitCode(block);
+ break;
+
+ case BBJ_THROW:
+ // If we have a throw at the end of a function or funclet, we need to emit another instruction
+ // afterwards to help the OS unwinder determine the correct context during unwind.
+ // We insert an unexecuted breakpoint instruction in several situations
+ // following a throw instruction:
+ // 1. If the throw is the last instruction of the function or funclet. This helps
+ // the OS unwinder determine the correct context during an unwind from the
+ // thrown exception.
+ // 2. If this is this is the last block of the hot section.
+ // 3. If the subsequent block is a special throw block.
+ // 4. On AMD64, if the next block is in a different EH region.
+ if ((block->bbNext == NULL) || (block->bbNext->bbFlags & BBF_FUNCLET_BEG) ||
+ !BasicBlock::sameEHRegion(block, block->bbNext) ||
+ (!isFramePointerUsed() && compiler->fgIsThrowHlpBlk(block->bbNext)) ||
+ block->bbNext == compiler->fgFirstColdBlock)
+ {
+ instGen(INS_BREAKPOINT); // This should never get executed
+ }
+
+ break;
+
+ case BBJ_CALLFINALLY:
+
+ // Generate a call to the finally, like this:
+ // mov x0,qword ptr [fp + 10H] // Load x0 with PSPSym
+ // bl finally-funclet
+ // b finally-return // Only for non-retless finally calls
+ // The 'b' can be a NOP if we're going to the next block.
+
+ getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_R0, compiler->lvaPSPSym, 0);
+ getEmitter()->emitIns_J(INS_bl_local, block->bbJumpDest);
+
+ if (block->bbFlags & BBF_RETLESS_CALL)
+ {
+ // We have a retless call, and the last instruction generated was a call.
+ // If the next block is in a different EH region (or is the end of the code
+ // block), then we need to generate a breakpoint here (since it will never
+ // get executed) to get proper unwind behavior.
+
+ if ((block->bbNext == nullptr) || !BasicBlock::sameEHRegion(block, block->bbNext))
+ {
+ instGen(INS_BREAKPOINT); // This should never get executed
+ }
+ }
+ else
+ {
+ // Because of the way the flowgraph is connected, the liveness info for this one instruction
+ // after the call is not (can not be) correct in cases where a variable has a last use in the
+ // handler. So turn off GC reporting for this single instruction.
+ getEmitter()->emitDisableGC();
+
+ // Now go to where the finally funclet needs to return to.
+ if (block->bbNext->bbJumpDest == block->bbNext->bbNext)
+ {
+ // Fall-through.
+ // TODO-ARM64-CQ: Can we get rid of this instruction, and just have the call return directly
+ // to the next instruction? This would depend on stack walking from within the finally
+ // handler working without this instruction being in this special EH region.
+ instGen(INS_nop);
+ }
+ else
+ {
+ inst_JMP(EJ_jmp, block->bbNext->bbJumpDest);
+ }
+
+ getEmitter()->emitEnableGC();
+ }
+
+ // The BBJ_ALWAYS is used because the BBJ_CALLFINALLY can't point to the
+ // jump target using bbJumpDest - that is already used to point
+ // to the finally block. So just skip past the BBJ_ALWAYS unless the
+ // block is RETLESS.
+ if (!(block->bbFlags & BBF_RETLESS_CALL))
+ {
+ assert(block->isBBCallAlwaysPair());
+
+ lblk = block;
+ block = block->bbNext;
+ }
+ break;
+
+ case BBJ_EHCATCHRET:
+ // For long address (default): `adrp + add` will be emitted.
+ // For short address (proven later): `adr` will be emitted.
+ getEmitter()->emitIns_R_L(INS_adr, EA_PTRSIZE, block->bbJumpDest, REG_INTRET);
+
+ __fallthrough;
+
+ case BBJ_EHFINALLYRET:
+ case BBJ_EHFILTERRET:
+ genReserveFuncletEpilog(block);
+ break;
+
+ case BBJ_NONE:
+ case BBJ_COND:
+ case BBJ_SWITCH:
+ break;
+
+ default:
+ noway_assert(!"Unexpected bbJumpKind");
+ break;
+ }
+
+#ifdef DEBUG
+ compiler->compCurBB = 0;
+#endif
+
+ } //------------------ END-FOR each block of the method -------------------
+
+ /* Nothing is live at this point */
+ genUpdateLife(VarSetOps::MakeEmpty(compiler));
+
+ /* Finalize the spill tracking logic */
+
+ regSet.rsSpillEnd();
+
+ /* Finalize the temp tracking logic */
+
+ compiler->tmpEnd();
+
+#ifdef DEBUG
+ if (compiler->verbose)
+ {
+ printf("\n# ");
+ printf("compCycleEstimate = %6d, compSizeEstimate = %5d ", compiler->compCycleEstimate,
+ compiler->compSizeEstimate);
+ printf("%s\n", compiler->info.compFullName);
+ }
+#endif
+}
+
+// return the child that has the same reg as the dst (if any)
+// other child returned (out param) in 'other'
+// TODO-Cleanup: move to CodeGenCommon.cpp
+GenTree* sameRegAsDst(GenTree* tree, GenTree*& other /*out*/)
+{
+ if (tree->gtRegNum == REG_NA)
+ {
+ other = nullptr;
+ return NULL;
+ }
+
+ GenTreePtr op1 = tree->gtOp.gtOp1;
+ GenTreePtr op2 = tree->gtOp.gtOp2;
+ if (op1->gtRegNum == tree->gtRegNum)
+ {
+ other = op2;
+ return op1;
+ }
+ if (op2->gtRegNum == tree->gtRegNum)
+ {
+ other = op1;
+ return op2;
+ }
+ else
+ {
+ other = nullptr;
+ return NULL;
+ }
+}
+
+// move an immediate value into an integer register
+
+void CodeGen::instGen_Set_Reg_To_Imm(emitAttr size, regNumber reg, ssize_t imm, insFlags flags)
+{
+ // reg cannot be a FP register
+ assert(!genIsValidFloatReg(reg));
+ if (!compiler->opts.compReloc)
+ {
+ size = EA_SIZE(size); // Strip any Reloc flags from size if we aren't doing relocs
+ }
+
+ if (EA_IS_RELOC(size))
+ {
+ // This emits a pair of adrp/add (two instructions) with fix-ups.
+ getEmitter()->emitIns_R_AI(INS_adrp, size, reg, imm);
+ }
+ else if (imm == 0)
+ {
+ instGen_Set_Reg_To_Zero(size, reg, flags);
+ }
+ else
+ {
+ if (emitter::emitIns_valid_imm_for_mov(imm, size))
+ {
+ getEmitter()->emitIns_R_I(INS_mov, size, reg, imm);
+ }
+ else
+ {
+ getEmitter()->emitIns_R_I(INS_mov, size, reg, (imm & 0xffff));
+ getEmitter()->emitIns_R_I_I(INS_movk, size, reg, ((imm >> 16) & 0xffff), 16, INS_OPTS_LSL);
+
+ if ((size == EA_8BYTE) &&
+ ((imm >> 32) != 0)) // Sometimes the upper 32 bits are zero and the first mov has zero-ed them
+ {
+ getEmitter()->emitIns_R_I_I(INS_movk, EA_8BYTE, reg, ((imm >> 32) & 0xffff), 32, INS_OPTS_LSL);
+ if ((imm >> 48) != 0) // Frequently the upper 16 bits are zero and the first mov has zero-ed them
+ {
+ getEmitter()->emitIns_R_I_I(INS_movk, EA_8BYTE, reg, ((imm >> 48) & 0xffff), 48, INS_OPTS_LSL);
+ }
+ }
+ }
+ // The caller may have requested that the flags be set on this mov (rarely/never)
+ if (flags == INS_FLAGS_SET)
+ {
+ getEmitter()->emitIns_R_I(INS_tst, size, reg, 0);
+ }
+ }
+
+ regTracker.rsTrackRegIntCns(reg, imm);
+}
+
+/***********************************************************************************
+ *
+ * Generate code to set a register 'targetReg' of type 'targetType' to the constant
+ * specified by the constant (GT_CNS_INT or GT_CNS_DBL) in 'tree'. This does not call
+ * genProduceReg() on the target register.
+ */
+void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTreePtr tree)
+{
+ switch (tree->gtOper)
+ {
+ case GT_CNS_INT:
+ {
+ // relocatable values tend to come down as a CNS_INT of native int type
+ // so the line between these two opcodes is kind of blurry
+ GenTreeIntConCommon* con = tree->AsIntConCommon();
+ ssize_t cnsVal = con->IconValue();
+
+ bool needReloc = compiler->opts.compReloc && tree->IsIconHandle();
+ if (needReloc)
+ {
+ instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, targetReg, cnsVal);
+ regTracker.rsTrackRegTrash(targetReg);
+ }
+ else
+ {
+ genSetRegToIcon(targetReg, cnsVal, targetType);
+ }
+ }
+ break;
+
+ case GT_CNS_DBL:
+ {
+ emitter* emit = getEmitter();
+ emitAttr size = emitTypeSize(tree);
+ GenTreeDblCon* dblConst = tree->AsDblCon();
+ double constValue = dblConst->gtDblCon.gtDconVal;
+
+ // Make sure we use "movi reg, 0x00" only for positive zero (0.0) and not for negative zero (-0.0)
+ if (*(__int64*)&constValue == 0)
+ {
+ // A faster/smaller way to generate 0.0
+ // We will just zero out the entire vector register for both float and double
+ emit->emitIns_R_I(INS_movi, EA_16BYTE, targetReg, 0x00, INS_OPTS_16B);
+ }
+ else if (emitter::emitIns_valid_imm_for_fmov(constValue))
+ {
+ // We can load the FP constant using the fmov FP-immediate for this constValue
+ emit->emitIns_R_F(INS_fmov, size, targetReg, constValue);
+ }
+ else
+ {
+ // Get a temp integer register to compute long address.
+ regMaskTP addrRegMask = tree->gtRsvdRegs;
+ regNumber addrReg = genRegNumFromMask(addrRegMask);
+ noway_assert(addrReg != REG_NA);
+
+ // We must load the FP constant from the constant pool
+ // Emit a data section constant for the float or double constant.
+ CORINFO_FIELD_HANDLE hnd = emit->emitFltOrDblConst(dblConst);
+ // For long address (default): `adrp + ldr + fmov` will be emitted.
+ // For short address (proven later), `ldr` will be emitted.
+ emit->emitIns_R_C(INS_ldr, size, targetReg, addrReg, hnd, 0);
+ }
+ }
+ break;
+
+ default:
+ unreached();
+ }
+}
+
+// Generate code to get the high N bits of a N*N=2N bit multiplication result
+void CodeGen::genCodeForMulHi(GenTreeOp* treeNode)
+{
+ assert(!(treeNode->gtFlags & GTF_UNSIGNED));
+ assert(!treeNode->gtOverflowEx());
+
+#if 0
+ regNumber targetReg = treeNode->gtRegNum;
+ var_types targetType = treeNode->TypeGet();
+ emitter *emit = getEmitter();
+ emitAttr size = emitTypeSize(treeNode);
+ GenTree *op1 = treeNode->gtOp.gtOp1;
+ GenTree *op2 = treeNode->gtOp.gtOp2;
+
+ // to get the high bits of the multiply, we are constrained to using the
+ // 1-op form: RDX:RAX = RAX * rm
+ // The 3-op form (Rx=Ry*Rz) does not support it.
+
+ genConsumeOperands(treeNode->AsOp());
+
+ GenTree* regOp = op1;
+ GenTree* rmOp = op2;
+
+ // Set rmOp to the contained memory operand (if any)
+ //
+ if (op1->isContained() || (!op2->isContained() && (op2->gtRegNum == targetReg)))
+ {
+ regOp = op2;
+ rmOp = op1;
+ }
+ assert(!regOp->isContained());
+
+ // Setup targetReg when neither of the source operands was a matching register
+ if (regOp->gtRegNum != targetReg)
+ {
+ inst_RV_RV(ins_Copy(targetType), targetReg, regOp->gtRegNum, targetType);
+ }
+
+ emit->emitInsBinary(INS_imulEAX, size, treeNode, rmOp);
+
+ // Move the result to the desired register, if necessary
+ if (targetReg != REG_RDX)
+ {
+ inst_RV_RV(INS_mov, targetReg, REG_RDX, targetType);
+ }
+#else // !0
+ NYI("genCodeForMulHi");
+#endif // !0
+}
+
+// generate code for a DIV or MOD operation
+//
+void CodeGen::genCodeForDivMod(GenTreeOp* treeNode)
+{
+ // unused on ARM64
+}
+
+// Generate code for ADD, SUB, MUL, DIV, UDIV, AND, OR and XOR
+// This method is expected to have called genConsumeOperands() before calling it.
+void CodeGen::genCodeForBinary(GenTree* treeNode)
+{
+ const genTreeOps oper = treeNode->OperGet();
+ regNumber targetReg = treeNode->gtRegNum;
+ var_types targetType = treeNode->TypeGet();
+ emitter* emit = getEmitter();
+
+ assert(oper == GT_ADD || oper == GT_SUB || oper == GT_MUL || oper == GT_DIV || oper == GT_UDIV || oper == GT_AND ||
+ oper == GT_OR || oper == GT_XOR);
+
+ GenTreePtr op1 = treeNode->gtGetOp1();
+ GenTreePtr op2 = treeNode->gtGetOp2();
+ instruction ins = genGetInsForOper(treeNode->OperGet(), targetType);
+
+ // The arithmetic node must be sitting in a register (since it's not contained)
+ noway_assert(targetReg != REG_NA);
+
+ regNumber r = emit->emitInsTernary(ins, emitTypeSize(treeNode), treeNode, op1, op2);
+ noway_assert(r == targetReg);
+
+ genProduceReg(treeNode);
+}
+
+//------------------------------------------------------------------------
+// isStructReturn: Returns whether the 'treeNode' is returning a struct.
+//
+// Arguments:
+// treeNode - The tree node to evaluate whether is a struct return.
+//
+// Return Value:
+// Returns true if the 'treeNode" is a GT_RETURN node of type struct.
+// Otherwise returns false.
+//
+bool CodeGen::isStructReturn(GenTreePtr treeNode)
+{
+ // This method could be called for 'treeNode' of GT_RET_FILT or GT_RETURN.
+ // For the GT_RET_FILT, the return is always
+ // a bool or a void, for the end of a finally block.
+ noway_assert(treeNode->OperGet() == GT_RETURN || treeNode->OperGet() == GT_RETFILT);
+
+ return varTypeIsStruct(treeNode);
+}
+
+//------------------------------------------------------------------------
+// genStructReturn: Generates code for returning a struct.
+//
+// Arguments:
+// treeNode - The GT_RETURN tree node.
+//
+// Return Value:
+// None
+//
+// Assumption:
+// op1 of GT_RETURN node is either GT_LCL_VAR or multi-reg GT_CALL
+void CodeGen::genStructReturn(GenTreePtr treeNode)
+{
+ assert(treeNode->OperGet() == GT_RETURN);
+ assert(isStructReturn(treeNode));
+ GenTreePtr op1 = treeNode->gtGetOp1();
+
+ if (op1->OperGet() == GT_LCL_VAR)
+ {
+ GenTreeLclVarCommon* lclVar = op1->AsLclVarCommon();
+ LclVarDsc* varDsc = &(compiler->lvaTable[lclVar->gtLclNum]);
+ var_types lclType = genActualType(varDsc->TypeGet());
+
+ // Currently only multireg TYP_STRUCT types such as HFA's and 16-byte structs are supported
+ // In the future we could have FEATURE_SIMD types like TYP_SIMD16
+ assert(lclType == TYP_STRUCT);
+ assert(varDsc->lvIsMultiRegRet);
+
+ ReturnTypeDesc retTypeDesc;
+ unsigned regCount;
+
+ retTypeDesc.InitializeStructReturnType(compiler, varDsc->lvVerTypeInfo.GetClassHandle());
+ regCount = retTypeDesc.GetReturnRegCount();
+
+ assert(regCount >= 2);
+ assert(op1->isContained());
+
+ // Copy var on stack into ABI return registers
+ int offset = 0;
+ for (unsigned i = 0; i < regCount; ++i)
+ {
+ var_types type = retTypeDesc.GetReturnRegType(i);
+ regNumber reg = retTypeDesc.GetABIReturnReg(i);
+ getEmitter()->emitIns_R_S(ins_Load(type), emitTypeSize(type), reg, lclVar->gtLclNum, offset);
+ offset += genTypeSize(type);
+ }
+ }
+ else // op1 must be multi-reg GT_CALL
+ {
+ assert(op1->IsMultiRegCall() || op1->IsCopyOrReloadOfMultiRegCall());
+
+ genConsumeRegs(op1);
+
+ GenTree* actualOp1 = op1->gtSkipReloadOrCopy();
+ GenTreeCall* call = actualOp1->AsCall();
+
+ ReturnTypeDesc* pRetTypeDesc;
+ unsigned regCount;
+ unsigned matchingCount = 0;
+
+ pRetTypeDesc = call->GetReturnTypeDesc();
+ regCount = pRetTypeDesc->GetReturnRegCount();
+
+ var_types regType[MAX_RET_REG_COUNT];
+ regNumber returnReg[MAX_RET_REG_COUNT];
+ regNumber allocatedReg[MAX_RET_REG_COUNT];
+ regMaskTP srcRegsMask = 0;
+ regMaskTP dstRegsMask = 0;
+ bool needToShuffleRegs = false; // Set to true if we have to move any registers
+
+ for (unsigned i = 0; i < regCount; ++i)
+ {
+ regType[i] = pRetTypeDesc->GetReturnRegType(i);
+ returnReg[i] = pRetTypeDesc->GetABIReturnReg(i);
+
+ regNumber reloadReg = REG_NA;
+ if (op1->IsCopyOrReload())
+ {
+ // GT_COPY/GT_RELOAD will have valid reg for those positions
+ // that need to be copied or reloaded.
+ reloadReg = op1->AsCopyOrReload()->GetRegNumByIdx(i);
+ }
+
+ if (reloadReg != REG_NA)
+ {
+ allocatedReg[i] = reloadReg;
+ }
+ else
+ {
+ allocatedReg[i] = call->GetRegNumByIdx(i);
+ }
+
+ if (returnReg[i] == allocatedReg[i])
+ {
+ matchingCount++;
+ }
+ else // We need to move this value
+ {
+ // We want to move the value from allocatedReg[i] into returnReg[i]
+ // so record these two registers in the src and dst masks
+ //
+ srcRegsMask |= genRegMask(allocatedReg[i]);
+ dstRegsMask |= genRegMask(returnReg[i]);
+
+ needToShuffleRegs = true;
+ }
+ }
+
+ if (needToShuffleRegs)
+ {
+ assert(matchingCount < regCount);
+
+ unsigned remainingRegCount = regCount - matchingCount;
+ regMaskTP extraRegMask = treeNode->gtRsvdRegs;
+
+ while (remainingRegCount > 0)
+ {
+ // set 'available' to the 'dst' registers that are not currently holding 'src' registers
+ //
+ regMaskTP availableMask = dstRegsMask & ~srcRegsMask;
+
+ regMaskTP dstMask;
+ regNumber srcReg;
+ regNumber dstReg;
+ var_types curType = TYP_UNKNOWN;
+ regNumber freeUpReg = REG_NA;
+
+ if (availableMask == 0)
+ {
+ // Circular register dependencies
+ // So just free up the lowest register in dstRegsMask by moving it to the 'extra' register
+
+ assert(dstRegsMask == srcRegsMask); // this has to be true for us to reach here
+ assert(extraRegMask != 0); // we require an 'extra' register
+ assert((extraRegMask & ~dstRegsMask) != 0); // it can't be part of dstRegsMask
+
+ availableMask = extraRegMask & ~dstRegsMask;
+
+ regMaskTP srcMask = genFindLowestBit(srcRegsMask);
+ freeUpReg = genRegNumFromMask(srcMask);
+ }
+
+ dstMask = genFindLowestBit(availableMask);
+ dstReg = genRegNumFromMask(dstMask);
+ srcReg = REG_NA;
+
+ if (freeUpReg != REG_NA)
+ {
+ // We will free up the srcReg by moving it to dstReg which is an extra register
+ //
+ srcReg = freeUpReg;
+
+ // Find the 'srcReg' and set 'curType', change allocatedReg[] to dstReg
+ // and add the new register mask bit to srcRegsMask
+ //
+ for (unsigned i = 0; i < regCount; ++i)
+ {
+ if (allocatedReg[i] == srcReg)
+ {
+ curType = regType[i];
+ allocatedReg[i] = dstReg;
+ srcRegsMask |= genRegMask(dstReg);
+ }
+ }
+ }
+ else // The normal case
+ {
+ // Find the 'srcReg' and set 'curType'
+ //
+ for (unsigned i = 0; i < regCount; ++i)
+ {
+ if (returnReg[i] == dstReg)
+ {
+ srcReg = allocatedReg[i];
+ curType = regType[i];
+ }
+ }
+ // After we perform this move we will have one less registers to setup
+ remainingRegCount--;
+ }
+ assert(curType != TYP_UNKNOWN);
+
+ inst_RV_RV(ins_Copy(curType), dstReg, srcReg, curType);
+
+ // Clear the appropriate bits in srcRegsMask and dstRegsMask
+ srcRegsMask &= ~genRegMask(srcReg);
+ dstRegsMask &= ~genRegMask(dstReg);
+
+ } // while (remainingRegCount > 0)
+
+ } // (needToShuffleRegs)
+
+ } // op1 must be multi-reg GT_CALL
+}
+
+//------------------------------------------------------------------------
+// genReturn: Generates code for return statement.
+// In case of struct return, delegates to the genStructReturn method.
+//
+// Arguments:
+// treeNode - The GT_RETURN or GT_RETFILT tree node.
+//
+// Return Value:
+// None
+//
+void CodeGen::genReturn(GenTreePtr treeNode)
+{
+ assert(treeNode->OperGet() == GT_RETURN || treeNode->OperGet() == GT_RETFILT);
+ GenTreePtr op1 = treeNode->gtGetOp1();
+ var_types targetType = treeNode->TypeGet();
+
+#ifdef DEBUG
+ if (targetType == TYP_VOID)
+ {
+ assert(op1 == nullptr);
+ }
+#endif
+
+ if (isStructReturn(treeNode))
+ {
+ genStructReturn(treeNode);
+ }
+ else if (targetType != TYP_VOID)
+ {
+ assert(op1 != nullptr);
+ noway_assert(op1->gtRegNum != REG_NA);
+
+ genConsumeReg(op1);
+
+ regNumber retReg = varTypeIsFloating(treeNode) ? REG_FLOATRET : REG_INTRET;
+
+ bool movRequired = (op1->gtRegNum != retReg);
+
+ if (!movRequired)
+ {
+ if (op1->OperGet() == GT_LCL_VAR)
+ {
+ GenTreeLclVarCommon* lcl = op1->AsLclVarCommon();
+ bool isRegCandidate = compiler->lvaTable[lcl->gtLclNum].lvIsRegCandidate();
+ if (isRegCandidate && ((op1->gtFlags & GTF_SPILLED) == 0))
+ {
+ assert(op1->InReg());
+
+ // We may need to generate a zero-extending mov instruction to load the value from this GT_LCL_VAR
+
+ unsigned lclNum = lcl->gtLclNum;
+ LclVarDsc* varDsc = &(compiler->lvaTable[lclNum]);
+ var_types op1Type = genActualType(op1->TypeGet());
+ var_types lclType = genActualType(varDsc->TypeGet());
+
+ if (genTypeSize(op1Type) < genTypeSize(lclType))
+ {
+ movRequired = true;
+ }
+ }
+ }
+ }
+
+ if (movRequired)
+ {
+ emitAttr movSize = EA_ATTR(genTypeSize(targetType));
+ getEmitter()->emitIns_R_R(INS_mov, movSize, retReg, op1->gtRegNum);
+ }
+ }
+
+#ifdef PROFILING_SUPPORTED
+ // There will be a single return block while generating profiler ELT callbacks.
+ //
+ // Reason for not materializing Leave callback as a GT_PROF_HOOK node after GT_RETURN:
+ // In flowgraph and other places assert that the last node of a block marked as
+ // GT_RETURN is either a GT_RETURN or GT_JMP or a tail call. It would be nice to
+ // maintain such an invariant irrespective of whether profiler hook needed or not.
+ // Also, there is not much to be gained by materializing it as an explicit node.
+ if (compiler->compCurBB == compiler->genReturnBB)
+ {
+ genProfilingLeaveCallback();
+ }
+#endif
+}
+
+/*****************************************************************************
+ *
+ * Generate code for a single node in the tree.
+ * Preconditions: All operands have been evaluated
+ *
+ */
+void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
+{
+ regNumber targetReg = treeNode->gtRegNum;
+ var_types targetType = treeNode->TypeGet();
+ emitter* emit = getEmitter();
+
+#ifdef DEBUG
+ if (compiler->verbose)
+ {
+ unsigned seqNum = treeNode->gtSeqNum; // Useful for setting a conditional break in Visual Studio
+ printf("Generating: ");
+ compiler->gtDispTree(treeNode, nullptr, nullptr, true);
+ }
+#endif // DEBUG
+
+ // Is this a node whose value is already in a register? LSRA denotes this by
+ // setting the GTF_REUSE_REG_VAL flag.
+ if (treeNode->IsReuseRegVal())
+ {
+ // For now, this is only used for constant nodes.
+ assert((treeNode->OperGet() == GT_CNS_INT) || (treeNode->OperGet() == GT_CNS_DBL));
+ JITDUMP(" TreeNode is marked ReuseReg\n");
+ return;
+ }
+
+ // contained nodes are part of their parents for codegen purposes
+ // ex : immediates, most LEAs
+ if (treeNode->isContained())
+ {
+ return;
+ }
+
+ switch (treeNode->gtOper)
+ {
+ case GT_START_NONGC:
+ getEmitter()->emitDisableGC();
+ break;
+
+ case GT_PROF_HOOK:
+ // We should be seeing this only if profiler hook is needed
+ noway_assert(compiler->compIsProfilerHookNeeded());
+
+#ifdef PROFILING_SUPPORTED
+ // Right now this node is used only for tail calls. In future if
+ // we intend to use it for Enter or Leave hooks, add a data member
+ // to this node indicating the kind of profiler hook. For example,
+ // helper number can be used.
+ genProfilingLeaveCallback(CORINFO_HELP_PROF_FCN_TAILCALL);
+#endif // PROFILING_SUPPORTED
+ break;
+
+ case GT_LCLHEAP:
+ genLclHeap(treeNode);
+ break;
+
+ case GT_CNS_INT:
+ case GT_CNS_DBL:
+ genSetRegToConst(targetReg, targetType, treeNode);
+ genProduceReg(treeNode);
+ break;
+
+ case GT_NOT:
+ assert(!varTypeIsFloating(targetType));
+
+ __fallthrough;
+
+ case GT_NEG:
+ {
+ instruction ins = genGetInsForOper(treeNode->OperGet(), targetType);
+
+ // The arithmetic node must be sitting in a register (since it's not contained)
+ assert(!treeNode->isContained());
+ // The dst can only be a register.
+ assert(targetReg != REG_NA);
+
+ GenTreePtr operand = treeNode->gtGetOp1();
+ assert(!operand->isContained());
+ // The src must be a register.
+ regNumber operandReg = genConsumeReg(operand);
+
+ getEmitter()->emitIns_R_R(ins, emitTypeSize(treeNode), targetReg, operandReg);
+ }
+ genProduceReg(treeNode);
+ break;
+
+ case GT_DIV:
+ case GT_UDIV:
+ genConsumeOperands(treeNode->AsOp());
+
+ if (varTypeIsFloating(targetType))
+ {
+ // Floating point divide never raises an exception
+ genCodeForBinary(treeNode);
+ }
+ else // an integer divide operation
+ {
+ GenTreePtr divisorOp = treeNode->gtGetOp2();
+ emitAttr size = EA_ATTR(genTypeSize(genActualType(treeNode->TypeGet())));
+
+ if (divisorOp->IsIntegralConst(0))
+ {
+ // We unconditionally throw a divide by zero exception
+ genJumpToThrowHlpBlk(EJ_jmp, SCK_DIV_BY_ZERO);
+
+ // We still need to call genProduceReg
+ genProduceReg(treeNode);
+ }
+ else // the divisor is not the constant zero
+ {
+ regNumber divisorReg = divisorOp->gtRegNum;
+
+ // Generate the require runtime checks for GT_DIV or GT_UDIV
+ if (treeNode->gtOper == GT_DIV)
+ {
+ BasicBlock* sdivLabel = genCreateTempLabel();
+
+ // Two possible exceptions:
+ // (AnyVal / 0) => DivideByZeroException
+ // (MinInt / -1) => ArithmeticException
+ //
+ bool checkDividend = true;
+
+ // Do we have an immediate for the 'divisorOp'?
+ //
+ if (divisorOp->IsCnsIntOrI())
+ {
+ GenTreeIntConCommon* intConstTree = divisorOp->AsIntConCommon();
+ ssize_t intConstValue = intConstTree->IconValue();
+ assert(intConstValue != 0); // already checked above by IsIntegralConst(0))
+ if (intConstValue != -1)
+ {
+ checkDividend = false; // We statically know that the dividend is not -1
+ }
+ }
+ else // insert check for divison by zero
+ {
+ // Check if the divisor is zero throw a DivideByZeroException
+ emit->emitIns_R_I(INS_cmp, size, divisorReg, 0);
+ emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
+ genJumpToThrowHlpBlk(jmpEqual, SCK_DIV_BY_ZERO);
+ }
+
+ if (checkDividend)
+ {
+ // Check if the divisor is not -1 branch to 'sdivLabel'
+ emit->emitIns_R_I(INS_cmp, size, divisorReg, -1);
+
+ emitJumpKind jmpNotEqual = genJumpKindForOper(GT_NE, CK_SIGNED);
+ inst_JMP(jmpNotEqual, sdivLabel);
+ // If control flow continues past here the 'divisorReg' is known to be -1
+
+ regNumber dividendReg = treeNode->gtGetOp1()->gtRegNum;
+ // At this point the divisor is known to be -1
+ //
+ // Issue the 'adds zr, dividendReg, dividendReg' instruction
+ // this will set both the Z and V flags only when dividendReg is MinInt
+ //
+ emit->emitIns_R_R_R(INS_adds, size, REG_ZR, dividendReg, dividendReg);
+ inst_JMP(jmpNotEqual, sdivLabel); // goto sdiv if the Z flag is clear
+ genJumpToThrowHlpBlk(EJ_vs, SCK_ARITH_EXCPN); // if the V flags is set throw
+ // ArithmeticException
+
+ genDefineTempLabel(sdivLabel);
+ }
+ genCodeForBinary(treeNode); // Generate the sdiv instruction
+ }
+ else // (treeNode->gtOper == GT_UDIV)
+ {
+ // Only one possible exception
+ // (AnyVal / 0) => DivideByZeroException
+ //
+ // Note that division by the constant 0 was already checked for above by the
+ // op2->IsIntegralConst(0) check
+ //
+ if (!divisorOp->IsCnsIntOrI())
+ {
+ // divisorOp is not a constant, so it could be zero
+ //
+ emit->emitIns_R_I(INS_cmp, size, divisorReg, 0);
+ emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
+ genJumpToThrowHlpBlk(jmpEqual, SCK_DIV_BY_ZERO);
+ }
+ genCodeForBinary(treeNode);
+ }
+ }
+ }
+ break;
+
+ case GT_OR:
+ case GT_XOR:
+ case GT_AND:
+ assert(varTypeIsIntegralOrI(treeNode));
+ __fallthrough;
+ case GT_ADD:
+ case GT_SUB:
+ case GT_MUL:
+ genConsumeOperands(treeNode->AsOp());
+ genCodeForBinary(treeNode);
+ break;
+
+ case GT_LSH:
+ case GT_RSH:
+ case GT_RSZ:
+ case GT_ROR:
+ genCodeForShift(treeNode);
+ // genCodeForShift() calls genProduceReg()
+ break;
+
+ case GT_CAST:
+ if (varTypeIsFloating(targetType) && varTypeIsFloating(treeNode->gtOp.gtOp1))
+ {
+ // Casts float/double <--> double/float
+ genFloatToFloatCast(treeNode);
+ }
+ else if (varTypeIsFloating(treeNode->gtOp.gtOp1))
+ {
+ // Casts float/double --> int32/int64
+ genFloatToIntCast(treeNode);
+ }
+ else if (varTypeIsFloating(targetType))
+ {
+ // Casts int32/uint32/int64/uint64 --> float/double
+ genIntToFloatCast(treeNode);
+ }
+ else
+ {
+ // Casts int <--> int
+ genIntToIntCast(treeNode);
+ }
+ // The per-case functions call genProduceReg()
+ break;
+
+ case GT_LCL_FLD_ADDR:
+ case GT_LCL_VAR_ADDR:
+ // Address of a local var. This by itself should never be allocated a register.
+ // If it is worth storing the address in a register then it should be cse'ed into
+ // a temp and that would be allocated a register.
+ noway_assert(targetType == TYP_BYREF);
+ noway_assert(!treeNode->InReg());
+
+ inst_RV_TT(INS_lea, targetReg, treeNode, 0, EA_BYREF);
+ genProduceReg(treeNode);
+ break;
+
+ case GT_LCL_FLD:
+ {
+ GenTreeLclVarCommon* varNode = treeNode->AsLclVarCommon();
+ assert(varNode->gtLclNum < compiler->lvaCount);
+ unsigned varNum = varNode->gtLclNum;
+ LclVarDsc* varDsc = &(compiler->lvaTable[varNum]);
+
+ if (targetType == TYP_STRUCT)
+ {
+ NYI("GT_LCL_FLD with TYP_STRUCT");
+ }
+ emitAttr size = emitTypeSize(targetType);
+
+ noway_assert(targetType != TYP_STRUCT);
+ noway_assert(targetReg != REG_NA);
+
+ unsigned offset = treeNode->gtLclFld.gtLclOffs;
+
+ if (varTypeIsFloating(targetType))
+ {
+ if (treeNode->InReg())
+ {
+ NYI("GT_LCL_FLD with register to register Floating point move");
+ }
+ else
+ {
+ emit->emitIns_R_S(ins_Load(targetType), size, targetReg, varNum, offset);
+ }
+ }
+ else
+ {
+ size = EA_SET_SIZE(size, EA_8BYTE);
+ emit->emitIns_R_S(ins_Move_Extend(targetType, treeNode->InReg()), size, targetReg, varNum, offset);
+ }
+ genProduceReg(treeNode);
+ }
+ break;
+
+ case GT_LCL_VAR:
+ {
+ GenTreeLclVarCommon* varNode = treeNode->AsLclVarCommon();
+
+ unsigned varNum = varNode->gtLclNum;
+ assert(varNum < compiler->lvaCount);
+ LclVarDsc* varDsc = &(compiler->lvaTable[varNum]);
+ bool isRegCandidate = varDsc->lvIsRegCandidate();
+
+ // lcl_vars are not defs
+ assert((treeNode->gtFlags & GTF_VAR_DEF) == 0);
+
+ if (isRegCandidate && !(treeNode->gtFlags & GTF_VAR_DEATH))
+ {
+ assert((treeNode->InReg()) || (treeNode->gtFlags & GTF_SPILLED));
+ }
+
+ // If this is a register candidate that has been spilled, genConsumeReg() will
+ // reload it at the point of use. Otherwise, if it's not in a register, we load it here.
+
+ if (!treeNode->InReg() && !(treeNode->gtFlags & GTF_SPILLED))
+ {
+ assert(!isRegCandidate);
+
+ // targetType must be a normal scalar type and not a TYP_STRUCT
+ assert(targetType != TYP_STRUCT);
+
+ instruction ins = ins_Load(targetType);
+ emitAttr attr = emitTypeSize(targetType);
+
+ attr = emit->emitInsAdjustLoadStoreAttr(ins, attr);
+
+ emit->emitIns_R_S(ins, attr, targetReg, varNum, 0);
+ genProduceReg(treeNode);
+ }
+ }
+ break;
+
+ case GT_STORE_LCL_FLD:
+ {
+ noway_assert(targetType != TYP_STRUCT);
+
+ // record the offset
+ unsigned offset = treeNode->gtLclFld.gtLclOffs;
+
+ // We must have a stack store with GT_STORE_LCL_FLD
+ noway_assert(!treeNode->InReg());
+ noway_assert(targetReg == REG_NA);
+
+ GenTreeLclVarCommon* varNode = treeNode->AsLclVarCommon();
+ unsigned varNum = varNode->gtLclNum;
+ assert(varNum < compiler->lvaCount);
+ LclVarDsc* varDsc = &(compiler->lvaTable[varNum]);
+
+ // Ensure that lclVar nodes are typed correctly.
+ assert(!varDsc->lvNormalizeOnStore() || targetType == genActualType(varDsc->TypeGet()));
+
+ GenTreePtr data = treeNode->gtOp.gtOp1->gtEffectiveVal();
+ genConsumeRegs(data);
+
+ regNumber dataReg = REG_NA;
+ if (data->isContainedIntOrIImmed())
+ {
+ assert(data->IsIntegralConst(0));
+ dataReg = REG_ZR;
+ }
+ else
+ {
+ assert(!data->isContained());
+ genConsumeReg(data);
+ dataReg = data->gtRegNum;
+ }
+ assert(dataReg != REG_NA);
+
+ instruction ins = ins_Store(targetType);
+
+ emitAttr attr = emitTypeSize(targetType);
+
+ attr = emit->emitInsAdjustLoadStoreAttr(ins, attr);
+
+ emit->emitIns_S_R(ins, attr, dataReg, varNum, offset);
+
+ genUpdateLife(varNode);
+
+ varDsc->lvRegNum = REG_STK;
+ }
+ break;
+
+ case GT_STORE_LCL_VAR:
+ {
+ GenTreeLclVarCommon* varNode = treeNode->AsLclVarCommon();
+
+ unsigned varNum = varNode->gtLclNum;
+ assert(varNum < compiler->lvaCount);
+ LclVarDsc* varDsc = &(compiler->lvaTable[varNum]);
+ unsigned offset = 0;
+
+ // Ensure that lclVar nodes are typed correctly.
+ assert(!varDsc->lvNormalizeOnStore() || targetType == genActualType(varDsc->TypeGet()));
+
+ GenTreePtr data = treeNode->gtOp.gtOp1->gtEffectiveVal();
+
+ // var = call, where call returns a multi-reg return value
+ // case is handled separately.
+ if (data->gtSkipReloadOrCopy()->IsMultiRegCall())
+ {
+ genMultiRegCallStoreToLocal(treeNode);
+ }
+ else
+ {
+ genConsumeRegs(data);
+
+ regNumber dataReg = REG_NA;
+ if (data->isContainedIntOrIImmed())
+ {
+ assert(data->IsIntegralConst(0));
+ dataReg = REG_ZR;
+ }
+ else
+ {
+ assert(!data->isContained());
+ genConsumeReg(data);
+ dataReg = data->gtRegNum;
+ }
+ assert(dataReg != REG_NA);
+
+ if (targetReg == REG_NA) // store into stack based LclVar
+ {
+ inst_set_SV_var(varNode);
+
+ instruction ins = ins_Store(targetType);
+ emitAttr attr = emitTypeSize(targetType);
+
+ attr = emit->emitInsAdjustLoadStoreAttr(ins, attr);
+
+ emit->emitIns_S_R(ins, attr, dataReg, varNum, offset);
+
+ genUpdateLife(varNode);
+
+ varDsc->lvRegNum = REG_STK;
+ }
+ else // store into register (i.e move into register)
+ {
+ if (dataReg != targetReg)
+ {
+ // Assign into targetReg when dataReg (from op1) is not the same register
+ inst_RV_RV(ins_Copy(targetType), targetReg, dataReg, targetType);
+ }
+ genProduceReg(treeNode);
+ }
+ }
+ }
+ break;
+
+ case GT_RETFILT:
+ // A void GT_RETFILT is the end of a finally. For non-void filter returns we need to load the result in
+ // the return register, if it's not already there. The processing is the same as GT_RETURN.
+ if (targetType != TYP_VOID)
+ {
+ // For filters, the IL spec says the result is type int32. Further, the only specified legal values
+ // are 0 or 1, with the use of other values "undefined".
+ assert(targetType == TYP_INT);
+ }
+
+ __fallthrough;
+
+ case GT_RETURN:
+ genReturn(treeNode);
+ break;
+
+ case GT_LEA:
+ {
+ // if we are here, it is the case where there is an LEA that cannot
+ // be folded into a parent instruction
+ GenTreeAddrMode* lea = treeNode->AsAddrMode();
+ genLeaInstruction(lea);
+ }
+ // genLeaInstruction calls genProduceReg()
+ break;
+
+ case GT_IND:
+ genConsumeAddress(treeNode->AsIndir()->Addr());
+ emit->emitInsLoadStoreOp(ins_Load(targetType), emitTypeSize(treeNode), targetReg, treeNode->AsIndir());
+ genProduceReg(treeNode);
+ break;
+
+ case GT_MULHI:
+ genCodeForMulHi(treeNode->AsOp());
+ genProduceReg(treeNode);
+ break;
+
+ case GT_MOD:
+ case GT_UMOD:
+ // Integer MOD should have been morphed into a sequence of sub, mul, div in fgMorph.
+ //
+ // We shouldn't be seeing GT_MOD on float/double as it is morphed into a helper call by front-end.
+ noway_assert(!"Codegen for GT_MOD/GT_UMOD");
+ break;
+
+ case GT_INTRINSIC:
+ genIntrinsic(treeNode);
+ break;
+
+#ifdef FEATURE_SIMD
+ case GT_SIMD:
+ genSIMDIntrinsic(treeNode->AsSIMD());
+ break;
+#endif // FEATURE_SIMD
+
+ case GT_CKFINITE:
+ genCkfinite(treeNode);
+ break;
+
+ case GT_EQ:
+ case GT_NE:
+ case GT_LT:
+ case GT_LE:
+ case GT_GE:
+ case GT_GT:
+ {
+ // TODO-ARM64-CQ: Check if we can use the currently set flags.
+ // TODO-ARM64-CQ: Check for the case where we can simply transfer the carry bit to a register
+ // (signed < or >= where targetReg != REG_NA)
+
+ GenTreeOp* tree = treeNode->AsOp();
+ GenTreePtr op1 = tree->gtOp1;
+ GenTreePtr op2 = tree->gtOp2;
+ var_types op1Type = op1->TypeGet();
+ var_types op2Type = op2->TypeGet();
+
+ assert(!op1->isContainedMemoryOp());
+ assert(!op2->isContainedMemoryOp());
+
+ genConsumeOperands(tree);
+
+ emitAttr cmpSize = EA_UNKNOWN;
+
+ if (varTypeIsFloating(op1Type))
+ {
+ assert(varTypeIsFloating(op2Type));
+ assert(!op1->isContained());
+ assert(op1Type == op2Type);
+ cmpSize = EA_ATTR(genTypeSize(op1Type));
+
+ if (op2->IsIntegralConst(0))
+ {
+ emit->emitIns_R_F(INS_fcmp, cmpSize, op1->gtRegNum, 0.0);
+ }
+ else
+ {
+ assert(!op2->isContained());
+ emit->emitIns_R_R(INS_fcmp, cmpSize, op1->gtRegNum, op2->gtRegNum);
+ }
+ }
+ else
+ {
+ assert(!varTypeIsFloating(op2Type));
+ // We don't support swapping op1 and op2 to generate cmp reg, imm
+ assert(!op1->isContainedIntOrIImmed());
+
+ // TODO-ARM64-CQ: the second register argument of a CMP can be sign/zero
+ // extended as part of the instruction (using "CMP (extended register)").
+ // We should use that if possible, swapping operands
+ // (and reversing the condition) if necessary.
+ unsigned op1Size = genTypeSize(op1Type);
+ unsigned op2Size = genTypeSize(op2Type);
+
+ if ((op1Size < 4) || (op1Size < op2Size))
+ {
+ // We need to sign/zero extend op1 up to 32 or 64 bits.
+ instruction ins = ins_Move_Extend(op1Type, true);
+ inst_RV_RV(ins, op1->gtRegNum, op1->gtRegNum);
+ }
+
+ if (!op2->isContainedIntOrIImmed())
+ {
+ if ((op2Size < 4) || (op2Size < op1Size))
+ {
+ // We need to sign/zero extend op2 up to 32 or 64 bits.
+ instruction ins = ins_Move_Extend(op2Type, true);
+ inst_RV_RV(ins, op2->gtRegNum, op2->gtRegNum);
+ }
+ }
+ cmpSize = EA_4BYTE;
+ if ((op1Size == EA_8BYTE) || (op2Size == EA_8BYTE))
+ {
+ cmpSize = EA_8BYTE;
+ }
+
+ if (op2->isContainedIntOrIImmed())
+ {
+ GenTreeIntConCommon* intConst = op2->AsIntConCommon();
+ emit->emitIns_R_I(INS_cmp, cmpSize, op1->gtRegNum, intConst->IconValue());
+ }
+ else
+ {
+ emit->emitIns_R_R(INS_cmp, cmpSize, op1->gtRegNum, op2->gtRegNum);
+ }
+ }
+
+ // Are we evaluating this into a register?
+ if (targetReg != REG_NA)
+ {
+ genSetRegToCond(targetReg, tree);
+ genProduceReg(tree);
+ }
+ }
+ break;
+
+ case GT_JTRUE:
+ {
+ GenTree* cmp = treeNode->gtOp.gtOp1->gtEffectiveVal();
+ assert(cmp->OperIsCompare());
+ assert(compiler->compCurBB->bbJumpKind == BBJ_COND);
+
+ // Get the "kind" and type of the comparison. Note that whether it is an unsigned cmp
+ // is governed by a flag NOT by the inherent type of the node
+ emitJumpKind jumpKind[2];
+ bool branchToTrueLabel[2];
+ genJumpKindsForTree(cmp, jumpKind, branchToTrueLabel);
+ assert(jumpKind[0] != EJ_NONE);
+
+ // On Arm64 the branches will always branch to the true label
+ assert(branchToTrueLabel[0]);
+ inst_JMP(jumpKind[0], compiler->compCurBB->bbJumpDest);
+
+ if (jumpKind[1] != EJ_NONE)
+ {
+ // the second conditional branch always has to be to the true label
+ assert(branchToTrueLabel[1]);
+ inst_JMP(jumpKind[1], compiler->compCurBB->bbJumpDest);
+ }
+ }
+ break;
+
+ case GT_RETURNTRAP:
+ {
+ // this is nothing but a conditional call to CORINFO_HELP_STOP_FOR_GC
+ // based on the contents of 'data'
+
+ GenTree* data = treeNode->gtOp.gtOp1;
+ genConsumeRegs(data);
+ emit->emitIns_R_I(INS_cmp, EA_4BYTE, data->gtRegNum, 0);
+
+ BasicBlock* skipLabel = genCreateTempLabel();
+
+ emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
+ inst_JMP(jmpEqual, skipLabel);
+ // emit the call to the EE-helper that stops for GC (or other reasons)
+
+ genEmitHelperCall(CORINFO_HELP_STOP_FOR_GC, 0, EA_UNKNOWN);
+ genDefineTempLabel(skipLabel);
+ }
+ break;
+
+ case GT_STOREIND:
+ {
+ GenTree* data = treeNode->gtOp.gtOp2;
+ GenTree* addr = treeNode->gtOp.gtOp1;
+ GCInfo::WriteBarrierForm writeBarrierForm = gcInfo.gcIsWriteBarrierCandidate(treeNode, data);
+ if (writeBarrierForm != GCInfo::WBF_NoBarrier)
+ {
+ // data and addr must be in registers.
+ // Consume both registers so that any copies of interfering
+ // registers are taken care of.
+ genConsumeOperands(treeNode->AsOp());
+
+#if NOGC_WRITE_BARRIERS
+ // At this point, we should not have any interference.
+ // That is, 'data' must not be in REG_WRITE_BARRIER_DST_BYREF,
+ // as that is where 'addr' must go.
+ noway_assert(data->gtRegNum != REG_WRITE_BARRIER_DST_BYREF);
+
+ // 'addr' goes into x14 (REG_WRITE_BARRIER_DST_BYREF)
+ if (addr->gtRegNum != REG_WRITE_BARRIER_DST_BYREF)
+ {
+ inst_RV_RV(INS_mov, REG_WRITE_BARRIER_DST_BYREF, addr->gtRegNum, addr->TypeGet());
+ }
+
+ // 'data' goes into x15 (REG_WRITE_BARRIER)
+ if (data->gtRegNum != REG_WRITE_BARRIER)
+ {
+ inst_RV_RV(INS_mov, REG_WRITE_BARRIER, data->gtRegNum, data->TypeGet());
+ }
+#else
+ // At this point, we should not have any interference.
+ // That is, 'data' must not be in REG_ARG_0,
+ // as that is where 'addr' must go.
+ noway_assert(data->gtRegNum != REG_ARG_0);
+
+ // addr goes in REG_ARG_0
+ if (addr->gtRegNum != REG_ARG_0)
+ {
+ inst_RV_RV(INS_mov, REG_ARG_0, addr->gtRegNum, addr->TypeGet());
+ }
+
+ // data goes in REG_ARG_1
+ if (data->gtRegNum != REG_ARG_1)
+ {
+ inst_RV_RV(INS_mov, REG_ARG_1, data->gtRegNum, data->TypeGet());
+ }
+#endif // NOGC_WRITE_BARRIERS
+
+ genGCWriteBarrier(treeNode, writeBarrierForm);
+ }
+ else // A normal store, not a WriteBarrier store
+ {
+ bool reverseOps = ((treeNode->gtFlags & GTF_REVERSE_OPS) != 0);
+ bool dataIsUnary = false;
+ GenTree* nonRMWsrc = nullptr;
+ // We must consume the operands in the proper execution order,
+ // so that liveness is updated appropriately.
+ if (!reverseOps)
+ {
+ genConsumeAddress(addr);
+ }
+
+ if (!data->isContained())
+ {
+ genConsumeRegs(data);
+ }
+
+ if (reverseOps)
+ {
+ genConsumeAddress(addr);
+ }
+
+ regNumber dataReg = REG_NA;
+ if (data->isContainedIntOrIImmed())
+ {
+ assert(data->IsIntegralConst(0));
+ dataReg = REG_ZR;
+ }
+ else // data is not contained, so evaluate it into a register
+ {
+ assert(!data->isContained());
+ dataReg = data->gtRegNum;
+ }
+
+ emit->emitInsLoadStoreOp(ins_Store(targetType), emitTypeSize(treeNode), dataReg, treeNode->AsIndir());
+ }
+ }
+ break;
+
+ case GT_COPY:
+ // This is handled at the time we call genConsumeReg() on the GT_COPY
+ break;
+
+ case GT_SWAP:
+ {
+ // Swap is only supported for lclVar operands that are enregistered
+ // We do not consume or produce any registers. Both operands remain enregistered.
+ // However, the gc-ness may change.
+ assert(genIsRegCandidateLocal(treeNode->gtOp.gtOp1) && genIsRegCandidateLocal(treeNode->gtOp.gtOp2));
+
+ GenTreeLclVarCommon* lcl1 = treeNode->gtOp.gtOp1->AsLclVarCommon();
+ LclVarDsc* varDsc1 = &(compiler->lvaTable[lcl1->gtLclNum]);
+ var_types type1 = varDsc1->TypeGet();
+ GenTreeLclVarCommon* lcl2 = treeNode->gtOp.gtOp2->AsLclVarCommon();
+ LclVarDsc* varDsc2 = &(compiler->lvaTable[lcl2->gtLclNum]);
+ var_types type2 = varDsc2->TypeGet();
+
+ // We must have both int or both fp regs
+ assert(!varTypeIsFloating(type1) || varTypeIsFloating(type2));
+
+ // FP swap is not yet implemented (and should have NYI'd in LSRA)
+ assert(!varTypeIsFloating(type1));
+
+ regNumber oldOp1Reg = lcl1->gtRegNum;
+ regMaskTP oldOp1RegMask = genRegMask(oldOp1Reg);
+ regNumber oldOp2Reg = lcl2->gtRegNum;
+ regMaskTP oldOp2RegMask = genRegMask(oldOp2Reg);
+
+ // We don't call genUpdateVarReg because we don't have a tree node with the new register.
+ varDsc1->lvRegNum = oldOp2Reg;
+ varDsc2->lvRegNum = oldOp1Reg;
+
+ // Do the xchg
+ emitAttr size = EA_PTRSIZE;
+ if (varTypeGCtype(type1) != varTypeGCtype(type2))
+ {
+ // If the type specified to the emitter is a GC type, it will swap the GC-ness of the registers.
+ // Otherwise it will leave them alone, which is correct if they have the same GC-ness.
+ size = EA_GCREF;
+ }
+
+ NYI("register swap");
+ // inst_RV_RV(INS_xchg, oldOp1Reg, oldOp2Reg, TYP_I_IMPL, size);
+
+ // Update the gcInfo.
+ // Manually remove these regs for the gc sets (mostly to avoid confusing duplicative dump output)
+ gcInfo.gcRegByrefSetCur &= ~(oldOp1RegMask | oldOp2RegMask);
+ gcInfo.gcRegGCrefSetCur &= ~(oldOp1RegMask | oldOp2RegMask);
+
+ // gcMarkRegPtrVal will do the appropriate thing for non-gc types.
+ // It will also dump the updates.
+ gcInfo.gcMarkRegPtrVal(oldOp2Reg, type1);
+ gcInfo.gcMarkRegPtrVal(oldOp1Reg, type2);
+ }
+ break;
+
+ case GT_LIST:
+ case GT_ARGPLACE:
+ // Nothing to do
+ break;
+
+ case GT_PUTARG_STK:
+ genPutArgStk(treeNode);
+ break;
+
+ case GT_PUTARG_REG:
+ assert(targetType != TYP_STRUCT); // Any TYP_STRUCT register args should have been removed by
+ // fgMorphMultiregStructArg
+ // We have a normal non-Struct targetType
+ {
+ GenTree* op1 = treeNode->gtOp.gtOp1;
+ // If child node is not already in the register we need, move it
+ genConsumeReg(op1);
+ if (targetReg != op1->gtRegNum)
+ {
+ inst_RV_RV(ins_Copy(targetType), targetReg, op1->gtRegNum, targetType);
+ }
+ }
+ genProduceReg(treeNode);
+ break;
+
+ case GT_CALL:
+ genCallInstruction(treeNode);
+ break;
+
+ case GT_JMP:
+ genJmpMethod(treeNode);
+ break;
+
+ case GT_LOCKADD:
+ case GT_XCHG:
+ case GT_XADD:
+ genLockedInstructions(treeNode);
+ break;
+
+ case GT_MEMORYBARRIER:
+ instGen_MemoryBarrier();
+ break;
+
+ case GT_CMPXCHG:
+ NYI("GT_CMPXCHG");
+ break;
+
+ case GT_RELOAD:
+ // do nothing - reload is just a marker.
+ // The parent node will call genConsumeReg on this which will trigger the unspill of this node's child
+ // into the register specified in this node.
+ break;
+
+ case GT_NOP:
+ break;
+
+ case GT_NO_OP:
+ if (treeNode->gtFlags & GTF_NO_OP_NO)
+ {
+ noway_assert(!"GTF_NO_OP_NO should not be set");
+ }
+ else
+ {
+ instGen(INS_nop);
+ }
+ break;
+
+ case GT_ARR_BOUNDS_CHECK:
+#ifdef FEATURE_SIMD
+ case GT_SIMD_CHK:
+#endif // FEATURE_SIMD
+ genRangeCheck(treeNode);
+ break;
+
+ case GT_PHYSREG:
+ if (targetReg != treeNode->AsPhysReg()->gtSrcReg)
+ {
+ inst_RV_RV(ins_Copy(targetType), targetReg, treeNode->AsPhysReg()->gtSrcReg, targetType);
+
+ genTransferRegGCState(targetReg, treeNode->AsPhysReg()->gtSrcReg);
+ }
+ genProduceReg(treeNode);
+ break;
+
+ case GT_PHYSREGDST:
+ break;
+
+ case GT_NULLCHECK:
+ {
+ assert(!treeNode->gtOp.gtOp1->isContained());
+ regNumber reg = genConsumeReg(treeNode->gtOp.gtOp1);
+ emit->emitIns_R_R_I(INS_ldr, EA_4BYTE, REG_ZR, reg, 0);
+ }
+ break;
+
+ case GT_CATCH_ARG:
+
+ noway_assert(handlerGetsXcptnObj(compiler->compCurBB->bbCatchTyp));
+
+ /* Catch arguments get passed in a register. genCodeForBBlist()
+ would have marked it as holding a GC object, but not used. */
+
+ noway_assert(gcInfo.gcRegGCrefSetCur & RBM_EXCEPTION_OBJECT);
+ genConsumeReg(treeNode);
+ break;
+
+ case GT_PINVOKE_PROLOG:
+ noway_assert(((gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur) & ~fullIntArgRegMask()) == 0);
+
+ // the runtime side requires the codegen here to be consistent
+ emit->emitDisableRandomNops();
+ break;
+
+ case GT_LABEL:
+ genPendingCallLabel = genCreateTempLabel();
+ treeNode->gtLabel.gtLabBB = genPendingCallLabel;
+
+ // For long address (default): `adrp + add` will be emitted.
+ // For short address (proven later): `adr` will be emitted.
+ emit->emitIns_R_L(INS_adr, EA_PTRSIZE, genPendingCallLabel, targetReg);
+ break;
+
+ case GT_STORE_OBJ:
+ if (treeNode->OperIsCopyBlkOp())
+ {
+ assert(treeNode->AsObj()->gtGcPtrCount != 0);
+ genCodeForCpObj(treeNode->AsObj());
+ break;
+ }
+ __fallthrough;
+
+ case GT_STORE_DYN_BLK:
+ case GT_STORE_BLK:
+ {
+ GenTreeBlk* blkOp = treeNode->AsBlk();
+ if (blkOp->gtBlkOpGcUnsafe)
+ {
+ getEmitter()->emitDisableGC();
+ }
+ bool isCopyBlk = blkOp->OperIsCopyBlkOp();
+
+ switch (blkOp->gtBlkOpKind)
+ {
+ case GenTreeBlk::BlkOpKindHelper:
+ if (isCopyBlk)
+ {
+ genCodeForCpBlk(blkOp);
+ }
+ else
+ {
+ genCodeForInitBlk(blkOp);
+ }
+ break;
+ case GenTreeBlk::BlkOpKindUnroll:
+ if (isCopyBlk)
+ {
+ genCodeForCpBlkUnroll(blkOp);
+ }
+ else
+ {
+ genCodeForInitBlkUnroll(blkOp);
+ }
+ break;
+ default:
+ unreached();
+ }
+ if (blkOp->gtBlkOpGcUnsafe)
+ {
+ getEmitter()->emitEnableGC();
+ }
+ }
+ break;
+
+ case GT_JMPTABLE:
+ genJumpTable(treeNode);
+ break;
+
+ case GT_SWITCH_TABLE:
+ genTableBasedSwitch(treeNode);
+ break;
+
+ case GT_ARR_INDEX:
+ genCodeForArrIndex(treeNode->AsArrIndex());
+ break;
+
+ case GT_ARR_OFFSET:
+ genCodeForArrOffset(treeNode->AsArrOffs());
+ break;
+
+ case GT_CLS_VAR_ADDR:
+ NYI("GT_CLS_VAR_ADDR");
+ break;
+
+ case GT_IL_OFFSET:
+ // Do nothing; these nodes are simply markers for debug info.
+ break;
+
+ default:
+ {
+#ifdef DEBUG
+ char message[256];
+ sprintf(message, "Unimplemented node type %s\n", GenTree::NodeName(treeNode->OperGet()));
+#endif
+ assert(!"Unknown node in codegen");
+ }
+ break;
+ }
+}
+
+//----------------------------------------------------------------------------------
+// genMultiRegCallStoreToLocal: store multi-reg return value of a call node to a local
+//
+// Arguments:
+// treeNode - Gentree of GT_STORE_LCL_VAR
+//
+// Return Value:
+// None
+//
+// Assumption:
+// The child of store is a multi-reg call node.
+// genProduceReg() on treeNode is made by caller of this routine.
+//
+void CodeGen::genMultiRegCallStoreToLocal(GenTreePtr treeNode)
+{
+ assert(treeNode->OperGet() == GT_STORE_LCL_VAR);
+
+ // Structs of size >=9 and <=16 are returned in two return registers on ARM64 and HFAs.
+ assert(varTypeIsStruct(treeNode));
+
+ // Assumption: current ARM64 implementation requires that a multi-reg struct
+ // var in 'var = call' is flagged as lvIsMultiRegRet to prevent it from
+ // being struct promoted.
+ unsigned lclNum = treeNode->AsLclVarCommon()->gtLclNum;
+ LclVarDsc* varDsc = &(compiler->lvaTable[lclNum]);
+ noway_assert(varDsc->lvIsMultiRegRet);
+
+ GenTree* op1 = treeNode->gtGetOp1();
+ GenTree* actualOp1 = op1->gtSkipReloadOrCopy();
+ GenTreeCall* call = actualOp1->AsCall();
+ assert(call->HasMultiRegRetVal());
+
+ genConsumeRegs(op1);
+
+ ReturnTypeDesc* pRetTypeDesc = call->GetReturnTypeDesc();
+ unsigned regCount = pRetTypeDesc->GetReturnRegCount();
+
+ if (treeNode->gtRegNum != REG_NA)
+ {
+ // Right now the only enregistrable structs supported are SIMD types.
+ assert(varTypeIsSIMD(treeNode));
+ NYI("GT_STORE_LCL_VAR of a SIMD enregisterable struct");
+ }
+ else
+ {
+ // Stack store
+ int offset = 0;
+ for (unsigned i = 0; i < regCount; ++i)
+ {
+ var_types type = pRetTypeDesc->GetReturnRegType(i);
+ regNumber reg = call->GetRegNumByIdx(i);
+ if (op1->IsCopyOrReload())
+ {
+ // GT_COPY/GT_RELOAD will have valid reg for those positions
+ // that need to be copied or reloaded.
+ regNumber reloadReg = op1->AsCopyOrReload()->GetRegNumByIdx(i);
+ if (reloadReg != REG_NA)
+ {
+ reg = reloadReg;
+ }
+ }
+
+ assert(reg != REG_NA);
+ getEmitter()->emitIns_S_R(ins_Store(type), emitTypeSize(type), reg, lclNum, offset);
+ offset += genTypeSize(type);
+ }
+
+ varDsc->lvRegNum = REG_STK;
+ }
+}
+
+/***********************************************************************************************
+ * Generate code for localloc
+ */
+void CodeGen::genLclHeap(GenTreePtr tree)
+{
+ assert(tree->OperGet() == GT_LCLHEAP);
+
+ GenTreePtr size = tree->gtOp.gtOp1;
+ noway_assert((genActualType(size->gtType) == TYP_INT) || (genActualType(size->gtType) == TYP_I_IMPL));
+
+ regNumber targetReg = tree->gtRegNum;
+ regMaskTP tmpRegsMask = tree->gtRsvdRegs;
+ regNumber regCnt = REG_NA;
+ regNumber pspSymReg = REG_NA;
+ var_types type = genActualType(size->gtType);
+ emitAttr easz = emitTypeSize(type);
+ BasicBlock* endLabel = nullptr;
+ BasicBlock* loop = nullptr;
+ unsigned stackAdjustment = 0;
+
+#ifdef DEBUG
+ // Verify ESP
+ if (compiler->opts.compStackCheckOnRet)
+ {
+ noway_assert(compiler->lvaReturnEspCheck != 0xCCCCCCCC &&
+ compiler->lvaTable[compiler->lvaReturnEspCheck].lvDoNotEnregister &&
+ compiler->lvaTable[compiler->lvaReturnEspCheck].lvOnFrame);
+ getEmitter()->emitIns_S_R(INS_cmp, EA_PTRSIZE, REG_SPBASE, compiler->lvaReturnEspCheck, 0);
+
+ BasicBlock* esp_check = genCreateTempLabel();
+ emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
+ inst_JMP(jmpEqual, esp_check);
+ getEmitter()->emitIns(INS_BREAKPOINT);
+ genDefineTempLabel(esp_check);
+ }
+#endif
+
+ noway_assert(isFramePointerUsed()); // localloc requires Frame Pointer to be established since SP changes
+ noway_assert(genStackLevel == 0); // Can't have anything on the stack
+
+ // Whether method has PSPSym.
+ bool hasPspSym;
+#if FEATURE_EH_FUNCLETS
+ hasPspSym = (compiler->lvaPSPSym != BAD_VAR_NUM);
+#else
+ hasPspSym = false;
+#endif
+
+ // compute the amount of memory to allocate to properly STACK_ALIGN.
+ size_t amount = 0;
+ if (size->IsCnsIntOrI())
+ {
+ // If size is a constant, then it must be contained.
+ assert(size->isContained());
+
+ // If amount is zero then return null in targetReg
+ amount = size->gtIntCon.gtIconVal;
+ if (amount == 0)
+ {
+ instGen_Set_Reg_To_Zero(EA_PTRSIZE, targetReg);
+ goto BAILOUT;
+ }
+
+ // 'amount' is the total numbe of bytes to localloc to properly STACK_ALIGN
+ amount = AlignUp(amount, STACK_ALIGN);
+ }
+ else
+ {
+ // If 0 bail out by returning null in targetReg
+ genConsumeRegAndCopy(size, targetReg);
+ endLabel = genCreateTempLabel();
+ getEmitter()->emitIns_R_R(INS_TEST, easz, targetReg, targetReg);
+ emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
+ inst_JMP(jmpEqual, endLabel);
+
+ // Compute the size of the block to allocate and perform alignment.
+ // If the method has no PSPSym and compInitMem=true, we can reuse targetReg as regcnt,
+ // since we don't need any internal registers.
+ if (!hasPspSym && compiler->info.compInitMem)
+ {
+ assert(genCountBits(tmpRegsMask) == 0);
+ regCnt = targetReg;
+ }
+ else
+ {
+ assert(genCountBits(tmpRegsMask) >= 1);
+ regMaskTP regCntMask = genFindLowestBit(tmpRegsMask);
+ tmpRegsMask &= ~regCntMask;
+ regCnt = genRegNumFromMask(regCntMask);
+ if (regCnt != targetReg)
+ inst_RV_RV(INS_mov, regCnt, targetReg, size->TypeGet());
+ }
+
+ // Align to STACK_ALIGN
+ // regCnt will be the total number of bytes to localloc
+ inst_RV_IV(INS_add, regCnt, (STACK_ALIGN - 1), emitActualTypeSize(type));
+ inst_RV_IV(INS_AND, regCnt, ~(STACK_ALIGN - 1), emitActualTypeSize(type));
+ }
+
+ stackAdjustment = 0;
+#if FEATURE_EH_FUNCLETS
+ // If we have PSPsym, then need to re-locate it after localloc.
+ if (hasPspSym)
+ {
+ stackAdjustment += STACK_ALIGN;
+
+ // Save a copy of PSPSym
+ assert(genCountBits(tmpRegsMask) >= 1);
+ regMaskTP pspSymRegMask = genFindLowestBit(tmpRegsMask);
+ tmpRegsMask &= ~pspSymRegMask;
+ pspSymReg = genRegNumFromMask(pspSymRegMask);
+ getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, pspSymReg, compiler->lvaPSPSym, 0);
+ }
+#endif
+
+#if FEATURE_FIXED_OUT_ARGS
+ // If we have an outgoing arg area then we must adjust the SP by popping off the
+ // outgoing arg area. We will restore it right before we return from this method.
+ //
+ // Localloc is supposed to return stack space that is STACK_ALIGN'ed. The following
+ // are the cases that needs to be handled:
+ // i) Method has PSPSym + out-going arg area.
+ // It is guaranteed that size of out-going arg area is STACK_ALIGNED (see fgMorphArgs).
+ // Therefore, we will pop-off RSP upto out-going arg area before locallocating.
+ // We need to add padding to ensure RSP is STACK_ALIGN'ed while re-locating PSPSym + arg area.
+ // ii) Method has no PSPSym but out-going arg area.
+ // Almost same case as above without the requirement to pad for the final RSP to be STACK_ALIGN'ed.
+ // iii) Method has PSPSym but no out-going arg area.
+ // Nothing to pop-off from the stack but needs to relocate PSPSym with SP padded.
+ // iv) Method has neither PSPSym nor out-going arg area.
+ // Nothing needs to popped off from stack nor relocated.
+ if (compiler->lvaOutgoingArgSpaceSize > 0)
+ {
+ assert((compiler->lvaOutgoingArgSpaceSize % STACK_ALIGN) == 0); // This must be true for the stack to remain
+ // aligned
+ inst_RV_IV(INS_add, REG_SPBASE, compiler->lvaOutgoingArgSpaceSize, EA_PTRSIZE);
+ stackAdjustment += compiler->lvaOutgoingArgSpaceSize;
+ }
+#endif
+
+ if (size->IsCnsIntOrI())
+ {
+ // We should reach here only for non-zero, constant size allocations.
+ assert(amount > 0);
+
+ // For small allocations we will generate up to four stp instructions
+ size_t cntStackAlignedWidthItems = (amount >> STACK_ALIGN_SHIFT);
+ if (cntStackAlignedWidthItems <= 4)
+ {
+ while (cntStackAlignedWidthItems != 0)
+ {
+ // We can use pre-indexed addressing.
+ // stp ZR, ZR, [SP, #-16]!
+ getEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_ZR, REG_ZR, REG_SPBASE, -16, INS_OPTS_PRE_INDEX);
+ cntStackAlignedWidthItems -= 1;
+ }
+
+ goto ALLOC_DONE;
+ }
+ else if (!compiler->info.compInitMem && (amount < compiler->eeGetPageSize())) // must be < not <=
+ {
+ // Since the size is a page or less, simply adjust the SP value
+ // The SP might already be in the guard page, must touch it BEFORE
+ // the alloc, not after.
+ // ldr wz, [SP, #0]
+ getEmitter()->emitIns_R_R_I(INS_ldr, EA_4BYTE, REG_ZR, REG_SP, 0);
+
+ inst_RV_IV(INS_sub, REG_SP, amount, EA_PTRSIZE);
+
+ goto ALLOC_DONE;
+ }
+
+ // else, "mov regCnt, amount"
+ // If the method has no PSPSym and compInitMem=true, we can reuse targetReg as regcnt.
+ // Since size is a constant, regCnt is not yet initialized.
+ assert(regCnt == REG_NA);
+ if (!hasPspSym && compiler->info.compInitMem)
+ {
+ assert(genCountBits(tmpRegsMask) == 0);
+ regCnt = targetReg;
+ }
+ else
+ {
+ assert(genCountBits(tmpRegsMask) >= 1);
+ regMaskTP regCntMask = genFindLowestBit(tmpRegsMask);
+ tmpRegsMask &= ~regCntMask;
+ regCnt = genRegNumFromMask(regCntMask);
+ }
+ genSetRegToIcon(regCnt, amount, ((int)amount == amount) ? TYP_INT : TYP_LONG);
+ }
+
+ if (compiler->info.compInitMem)
+ {
+ BasicBlock* loop = genCreateTempLabel();
+
+ // At this point 'regCnt' is set to the total number of bytes to locAlloc.
+ // Since we have to zero out the allocated memory AND ensure that RSP is always valid
+ // by tickling the pages, we will just push 0's on the stack.
+ //
+ // Note: regCnt is guaranteed to be even on Amd64 since STACK_ALIGN/TARGET_POINTER_SIZE = 2
+ // and localloc size is a multiple of STACK_ALIGN.
+
+ // Loop:
+ genDefineTempLabel(loop);
+
+ // We can use pre-indexed addressing.
+ // stp ZR, ZR, [SP, #-16]!
+ getEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_ZR, REG_ZR, REG_SPBASE, -16, INS_OPTS_PRE_INDEX);
+
+ // If not done, loop
+ // Note that regCnt is the number of bytes to stack allocate.
+ // Therefore we need to subtract 16 from regcnt here.
+ assert(genIsValidIntReg(regCnt));
+ inst_RV_IV(INS_subs, regCnt, 16, emitActualTypeSize(type));
+ emitJumpKind jmpNotEqual = genJumpKindForOper(GT_NE, CK_SIGNED);
+ inst_JMP(jmpNotEqual, loop);
+ }
+ else
+ {
+ // At this point 'regCnt' is set to the total number of bytes to locAlloc.
+ //
+ // We don't need to zero out the allocated memory. However, we do have
+ // to tickle the pages to ensure that SP is always valid and is
+ // in sync with the "stack guard page". Note that in the worst
+ // case SP is on the last byte of the guard page. Thus you must
+ // touch SP+0 first not SP+x01000.
+ //
+ // Another subtlety is that you don't want SP to be exactly on the
+ // boundary of the guard page because PUSH is predecrement, thus
+ // call setup would not touch the guard page but just beyond it
+ //
+ // Note that we go through a few hoops so that SP never points to
+ // illegal pages at any time during the ticking process
+ //
+ // subs regCnt, SP, regCnt // regCnt now holds ultimate SP
+ // jb Loop // result is smaller than orignial SP (no wrap around)
+ // mov regCnt, #0 // Overflow, pick lowest possible value
+ //
+ // Loop:
+ // ldr wzr, [SP + 0] // tickle the page - read from the page
+ // sub regTmp, SP, PAGE_SIZE // decrement SP by PAGE_SIZE
+ // cmp regTmp, regCnt
+ // jb Done
+ // mov SP, regTmp
+ // j Loop
+ //
+ // Done:
+ // mov SP, regCnt
+ //
+
+ // Setup the regTmp
+ assert(tmpRegsMask != RBM_NONE);
+ assert(genCountBits(tmpRegsMask) == 1);
+ regNumber regTmp = genRegNumFromMask(tmpRegsMask);
+
+ BasicBlock* loop = genCreateTempLabel();
+ BasicBlock* done = genCreateTempLabel();
+
+ // subs regCnt, SP, regCnt // regCnt now holds ultimate SP
+ getEmitter()->emitIns_R_R_R(INS_subs, EA_PTRSIZE, regCnt, REG_SPBASE, regCnt);
+
+ inst_JMP(EJ_vc, loop); // branch if the V flag is not set
+
+ // Overflow, set regCnt to lowest possible value
+ instGen_Set_Reg_To_Zero(EA_PTRSIZE, regCnt);
+
+ genDefineTempLabel(loop);
+
+ // tickle the page - Read from the updated SP - this triggers a page fault when on the guard page
+ getEmitter()->emitIns_R_R_I(INS_ldr, EA_4BYTE, REG_ZR, REG_SPBASE, 0);
+
+ // decrement SP by PAGE_SIZE
+ getEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, regTmp, REG_SPBASE, compiler->eeGetPageSize());
+
+ getEmitter()->emitIns_R_R(INS_cmp, EA_PTRSIZE, regTmp, regCnt);
+ emitJumpKind jmpLTU = genJumpKindForOper(GT_LT, CK_UNSIGNED);
+ inst_JMP(jmpLTU, done);
+
+ // Update SP to be at the next page of stack that we will tickle
+ getEmitter()->emitIns_R_R(INS_mov, EA_PTRSIZE, REG_SPBASE, regCnt);
+
+ // Jump to loop and tickle new stack address
+ inst_JMP(EJ_jmp, loop);
+
+ // Done with stack tickle loop
+ genDefineTempLabel(done);
+
+ // Now just move the final value to SP
+ getEmitter()->emitIns_R_R(INS_mov, EA_PTRSIZE, REG_SPBASE, regCnt);
+ }
+
+ALLOC_DONE:
+ // Re-adjust SP to allocate PSPSym and out-going arg area
+ if (stackAdjustment != 0)
+ {
+ assert((stackAdjustment % STACK_ALIGN) == 0); // This must be true for the stack to remain aligned
+ assert(stackAdjustment > 0);
+ getEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, (int)stackAdjustment);
+
+#if FEATURE_EH_FUNCLETS
+ // Write PSPSym to its new location.
+ if (hasPspSym)
+ {
+ assert(genIsValidIntReg(pspSymReg));
+ getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, pspSymReg, compiler->lvaPSPSym, 0);
+ }
+#endif
+ // Return the stackalloc'ed address in result register.
+ // TargetReg = RSP + stackAdjustment.
+ //
+ getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, targetReg, REG_SPBASE, (int)stackAdjustment);
+ }
+ else // stackAdjustment == 0
+ {
+ // Move the final value of SP to targetReg
+ inst_RV_RV(INS_mov, targetReg, REG_SPBASE);
+ }
+
+BAILOUT:
+ if (endLabel != nullptr)
+ genDefineTempLabel(endLabel);
+
+ // Write the lvaShadowSPfirst stack frame slot
+ noway_assert(compiler->lvaLocAllocSPvar != BAD_VAR_NUM);
+ getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, targetReg, compiler->lvaLocAllocSPvar, 0);
+
+#if STACK_PROBES
+ if (compiler->opts.compNeedStackProbes)
+ {
+ genGenerateStackProbe();
+ }
+#endif
+
+#ifdef DEBUG
+ // Update new ESP
+ if (compiler->opts.compStackCheckOnRet)
+ {
+ noway_assert(compiler->lvaReturnEspCheck != 0xCCCCCCCC &&
+ compiler->lvaTable[compiler->lvaReturnEspCheck].lvDoNotEnregister &&
+ compiler->lvaTable[compiler->lvaReturnEspCheck].lvOnFrame);
+ getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, targetReg, compiler->lvaReturnEspCheck, 0);
+ }
+#endif
+
+ genProduceReg(tree);
+}
+
+// Generate code for InitBlk by performing a loop unroll
+// Preconditions:
+// a) Both the size and fill byte value are integer constants.
+// b) The size of the struct to initialize is smaller than INITBLK_UNROLL_LIMIT bytes.
+void CodeGen::genCodeForInitBlkUnroll(GenTreeBlk* initBlkNode)
+{
+#if 0
+ // Make sure we got the arguments of the initblk/initobj operation in the right registers
+ unsigned size = initBlkNode->Size();
+ GenTreePtr dstAddr = initBlkNode->Addr();
+ GenTreePtr initVal = initBlkNode->Data();
+
+ assert(!dstAddr->isContained());
+ assert(!initVal->isContained());
+ assert(size != 0);
+ assert(size <= INITBLK_UNROLL_LIMIT);
+ assert(initVal->gtSkipReloadOrCopy()->IsCnsIntOrI());
+
+ emitter *emit = getEmitter();
+
+ genConsumeReg(initVal);
+ genConsumeReg(dstAddr);
+
+ // If the initVal was moved, or spilled and reloaded to a different register,
+ // get the original initVal from below the GT_RELOAD, but only after capturing the valReg,
+ // which needs to be the new register.
+ regNumber valReg = initVal->gtRegNum;
+ initVal = initVal->gtSkipReloadOrCopy();
+#else // !0
+ NYI("genCodeForInitBlkUnroll");
+#endif // !0
+}
+
+// Generates code for InitBlk by calling the VM memset helper function.
+// Preconditions:
+// a) The size argument of the InitBlk is not an integer constant.
+// b) The size argument of the InitBlk is >= INITBLK_STOS_LIMIT bytes.
+void CodeGen::genCodeForInitBlk(GenTreeBlk* initBlkNode)
+{
+ // Make sure we got the arguments of the initblk operation in the right registers
+ unsigned size = initBlkNode->Size();
+ GenTreePtr dstAddr = initBlkNode->Addr();
+ GenTreePtr initVal = initBlkNode->Data();
+
+ assert(!dstAddr->isContained());
+ assert(!initVal->isContained());
+ assert(initBlkNode->gtRsvdRegs == RBM_ARG_2);
+
+ if (size == 0)
+ {
+ noway_assert(initBlkNode->gtOper == GT_DYN_BLK);
+ genConsumeRegAndCopy(initBlkNode->AsDynBlk()->gtDynamicSize, REG_ARG_2);
+ }
+ else
+ {
+// TODO-ARM64-CQ: When initblk loop unrolling is implemented
+// put this assert back on.
+#if 0
+ assert(size >= INITBLK_UNROLL_LIMIT);
+#endif // 0
+ genSetRegToIcon(REG_ARG_2, size);
+ }
+ genConsumeRegAndCopy(initVal, REG_ARG_1);
+ genConsumeRegAndCopy(dstAddr, REG_ARG_0);
+
+ genEmitHelperCall(CORINFO_HELP_MEMSET, 0, EA_UNKNOWN);
+}
+
+// Generate code for a load from some address + offset
+// base: tree node which can be either a local address or arbitrary node
+// offset: distance from the base from which to load
+void CodeGen::genCodeForLoadOffset(instruction ins, emitAttr size, regNumber dst, GenTree* base, unsigned offset)
+{
+ emitter* emit = getEmitter();
+
+ if (base->OperIsLocalAddr())
+ {
+ if (base->gtOper == GT_LCL_FLD_ADDR)
+ offset += base->gtLclFld.gtLclOffs;
+ emit->emitIns_R_S(ins, size, dst, base->gtLclVarCommon.gtLclNum, offset);
+ }
+ else
+ {
+ emit->emitIns_R_R_I(ins, size, dst, base->gtRegNum, offset);
+ }
+}
+
+// Generate code for a store to some address + offset
+// base: tree node which can be either a local address or arbitrary node
+// offset: distance from the base from which to load
+void CodeGen::genCodeForStoreOffset(instruction ins, emitAttr size, regNumber src, GenTree* base, unsigned offset)
+{
+#if 0
+ emitter *emit = getEmitter();
+
+ if (base->OperIsLocalAddr())
+ {
+ if (base->gtOper == GT_LCL_FLD_ADDR)
+ offset += base->gtLclFld.gtLclOffs;
+ emit->emitIns_S_R(ins, size, src, base->gtLclVarCommon.gtLclNum, offset);
+ }
+ else
+ {
+ emit->emitIns_AR_R(ins, size, src, base->gtRegNum, offset);
+ }
+#else // !0
+ NYI("genCodeForStoreOffset");
+#endif // !0
+}
+
+// Generates CpBlk code by performing a loop unroll
+// Preconditions:
+// The size argument of the CpBlk node is a constant and <= 64 bytes.
+// This may seem small but covers >95% of the cases in several framework assemblies.
+void CodeGen::genCodeForCpBlkUnroll(GenTreeBlk* cpBlkNode)
+{
+#if 0
+ // Make sure we got the arguments of the cpblk operation in the right registers
+ unsigned size = cpBlkNode->Size();
+ GenTreePtr dstAddr = cpBlkNode->Addr();
+ GenTreePtr source = cpBlkNode->Data();
+ noway_assert(source->gtOper == GT_IND);
+ GenTreePtr srcAddr = source->gtGetOp1();
+
+ assert((size != 0 ) && (size <= CPBLK_UNROLL_LIMIT));
+
+ emitter *emit = getEmitter();
+
+ if (!srcAddr->isContained())
+ genConsumeReg(srcAddr);
+
+ if (!dstAddr->isContained())
+ genConsumeReg(dstAddr);
+
+ unsigned offset = 0;
+
+ // If the size of this struct is larger than 16 bytes
+ // let's use SSE2 to be able to do 16 byte at a time
+ // loads and stores.
+ if (size >= XMM_REGSIZE_BYTES)
+ {
+ assert(cpBlkNode->gtRsvdRegs != RBM_NONE);
+ assert(genCountBits(cpBlkNode->gtRsvdRegs) == 1);
+ regNumber xmmReg = genRegNumFromMask(cpBlkNode->gtRsvdRegs);
+ assert(genIsValidFloatReg(xmmReg));
+ size_t slots = size / XMM_REGSIZE_BYTES;
+
+ while (slots-- > 0)
+ {
+ // Load
+ genCodeForLoadOffset(INS_movdqu, EA_8BYTE, xmmReg, srcAddr, offset);
+ // Store
+ genCodeForStoreOffset(INS_movdqu, EA_8BYTE, xmmReg, dstAddr, offset);
+ offset += XMM_REGSIZE_BYTES;
+ }
+ }
+
+ // Fill the remainder (15 bytes or less) if there's one.
+ if ((size & 0xf) != 0)
+ {
+ // Grab the integer temp register to emit the remaining loads and stores.
+ regNumber tmpReg = genRegNumFromMask(cpBlkNode->gtRsvdRegs & RBM_ALLINT);
+
+ if ((size & 8) != 0)
+ {
+ genCodeForLoadOffset(INS_mov, EA_8BYTE, tmpReg, srcAddr, offset);
+ genCodeForStoreOffset(INS_mov, EA_8BYTE, tmpReg, dstAddr, offset);
+ offset += 8;
+ }
+ if ((size & 4) != 0)
+ {
+ genCodeForLoadOffset(INS_mov, EA_4BYTE, tmpReg, srcAddr, offset);
+ genCodeForStoreOffset(INS_mov, EA_4BYTE, tmpReg, dstAddr, offset);
+ offset += 4;
+ }
+ if ((size & 2) != 0)
+ {
+ genCodeForLoadOffset(INS_mov, EA_2BYTE, tmpReg, srcAddr, offset);
+ genCodeForStoreOffset(INS_mov, EA_2BYTE, tmpReg, dstAddr, offset);
+ offset += 2;
+ }
+ if ((size & 1) != 0)
+ {
+ genCodeForLoadOffset(INS_mov, EA_1BYTE, tmpReg, srcAddr, offset);
+ genCodeForStoreOffset(INS_mov, EA_1BYTE, tmpReg, dstAddr, offset);
+ }
+ }
+#else // !0
+ NYI("genCodeForCpBlkUnroll");
+#endif // !0
+}
+
+// Generate code for CpObj nodes wich copy structs that have interleaved
+// GC pointers.
+// For this case we'll generate a sequence of loads/stores in the case of struct
+// slots that don't contain GC pointers. The generated code will look like:
+// ldr tempReg, [R13, #8]
+// str tempReg, [R14, #8]
+//
+// In the case of a GC-Pointer we'll call the ByRef write barrier helper
+// who happens to use the same registers as the previous call to maintain
+// the same register requirements and register killsets:
+// bl CORINFO_HELP_ASSIGN_BYREF
+//
+// So finally an example would look like this:
+// ldr tempReg, [R13, #8]
+// str tempReg, [R14, #8]
+// bl CORINFO_HELP_ASSIGN_BYREF
+// ldr tempReg, [R13, #8]
+// str tempReg, [R14, #8]
+// bl CORINFO_HELP_ASSIGN_BYREF
+// ldr tempReg, [R13, #8]
+// str tempReg, [R14, #8]
+void CodeGen::genCodeForCpObj(GenTreeObj* cpObjNode)
+{
+ // Make sure we got the arguments of the cpobj operation in the right registers
+ GenTreePtr dstAddr = cpObjNode->Addr();
+ GenTreePtr source = cpObjNode->Data();
+ noway_assert(source->gtOper == GT_IND);
+ GenTreePtr srcAddr = source->gtGetOp1();
+
+ bool dstOnStack = dstAddr->OperIsLocalAddr();
+
+#ifdef DEBUG
+ assert(!dstAddr->isContained());
+ assert(!srcAddr->isContained());
+
+ // This GenTree node has data about GC pointers, this means we're dealing
+ // with CpObj.
+ assert(cpObjNode->gtGcPtrCount > 0);
+#endif // DEBUG
+
+ // Consume these registers.
+ // They may now contain gc pointers (depending on their type; gcMarkRegPtrVal will "do the right thing").
+ genConsumeRegAndCopy(srcAddr, REG_WRITE_BARRIER_SRC_BYREF);
+ gcInfo.gcMarkRegPtrVal(REG_WRITE_BARRIER_SRC_BYREF, srcAddr->TypeGet());
+
+ genConsumeRegAndCopy(dstAddr, REG_WRITE_BARRIER_DST_BYREF);
+ gcInfo.gcMarkRegPtrVal(REG_WRITE_BARRIER_DST_BYREF, dstAddr->TypeGet());
+
+ // Temp register used to perform the sequence of loads and stores.
+ regNumber tmpReg = genRegNumFromMask(cpObjNode->gtRsvdRegs);
+
+#ifdef DEBUG
+ assert(cpObjNode->gtRsvdRegs != RBM_NONE);
+ assert(genCountBits(cpObjNode->gtRsvdRegs) == 1);
+ assert(genIsValidIntReg(tmpReg));
+#endif // DEBUG
+
+ unsigned slots = cpObjNode->gtSlots;
+ emitter* emit = getEmitter();
+
+ // If we can prove it's on the stack we don't need to use the write barrier.
+ if (dstOnStack)
+ {
+ // TODO-ARM64-CQ: Consider using LDP/STP to save codesize.
+ while (slots > 0)
+ {
+ emit->emitIns_R_R_I(INS_ldr, EA_8BYTE, tmpReg, REG_WRITE_BARRIER_SRC_BYREF, TARGET_POINTER_SIZE,
+ INS_OPTS_POST_INDEX);
+ emit->emitIns_R_R_I(INS_str, EA_8BYTE, tmpReg, REG_WRITE_BARRIER_DST_BYREF, TARGET_POINTER_SIZE,
+ INS_OPTS_POST_INDEX);
+ slots--;
+ }
+ }
+ else
+ {
+ BYTE* gcPtrs = cpObjNode->gtGcPtrs;
+ unsigned gcPtrCount = cpObjNode->gtGcPtrCount;
+
+ unsigned i = 0;
+ while (i < slots)
+ {
+ switch (gcPtrs[i])
+ {
+ case TYPE_GC_NONE:
+ // TODO-ARM64-CQ: Consider using LDP/STP to save codesize in case of contigous NON-GC slots.
+ emit->emitIns_R_R_I(INS_ldr, EA_8BYTE, tmpReg, REG_WRITE_BARRIER_SRC_BYREF, TARGET_POINTER_SIZE,
+ INS_OPTS_POST_INDEX);
+ emit->emitIns_R_R_I(INS_str, EA_8BYTE, tmpReg, REG_WRITE_BARRIER_DST_BYREF, TARGET_POINTER_SIZE,
+ INS_OPTS_POST_INDEX);
+ break;
+
+ default:
+ // We have a GC pointer, call the memory barrier.
+ genEmitHelperCall(CORINFO_HELP_ASSIGN_BYREF, 0, EA_PTRSIZE);
+ gcPtrCount--;
+ break;
+ }
+ ++i;
+ }
+ assert(gcPtrCount == 0);
+ }
+
+ // Clear the gcInfo for REG_WRITE_BARRIER_SRC_BYREF and REG_WRITE_BARRIER_DST_BYREF.
+ // While we normally update GC info prior to the last instruction that uses them,
+ // these actually live into the helper call.
+ gcInfo.gcMarkRegSetNpt(RBM_WRITE_BARRIER_SRC_BYREF | RBM_WRITE_BARRIER_DST_BYREF);
+}
+
+// Generate code for a CpBlk node by the means of the VM memcpy helper call
+// Preconditions:
+// a) The size argument of the CpBlk is not an integer constant
+// b) The size argument is a constant but is larger than CPBLK_MOVS_LIMIT bytes.
+void CodeGen::genCodeForCpBlk(GenTreeBlk* cpBlkNode)
+{
+ // Make sure we got the arguments of the cpblk operation in the right registers
+ unsigned blockSize = cpBlkNode->Size();
+ GenTreePtr dstAddr = cpBlkNode->Addr();
+ GenTreePtr source = cpBlkNode->Data();
+ noway_assert(source->gtOper == GT_IND);
+ GenTreePtr srcAddr = source->gtGetOp1();
+
+ assert(!dstAddr->isContained());
+ assert(!srcAddr->isContained());
+ assert(cpBlkNode->gtRsvdRegs == RBM_ARG_2);
+
+ if (blockSize != 0)
+ {
+#if 0
+ // Enable this when we support cpblk loop unrolling.
+
+ assert(blockSize->gtIntCon.gtIconVal >= CPBLK_UNROLL_LIMIT);
+
+#endif // 0
+ genSetRegToIcon(REG_ARG_2, blockSize);
+ }
+ else
+ {
+ noway_assert(cpBlkNode->gtOper == GT_DYN_BLK);
+ genConsumeRegAndCopy(cpBlkNode->AsDynBlk()->gtDynamicSize, REG_ARG_2);
+ }
+ genConsumeRegAndCopy(srcAddr, REG_ARG_1);
+ genConsumeRegAndCopy(dstAddr, REG_ARG_0);
+
+ genEmitHelperCall(CORINFO_HELP_MEMCPY, 0, EA_UNKNOWN);
+}
+
+// generate code do a switch statement based on a table of ip-relative offsets
+void CodeGen::genTableBasedSwitch(GenTree* treeNode)
+{
+ genConsumeOperands(treeNode->AsOp());
+ regNumber idxReg = treeNode->gtOp.gtOp1->gtRegNum;
+ regNumber baseReg = treeNode->gtOp.gtOp2->gtRegNum;
+
+ regNumber tmpReg = genRegNumFromMask(treeNode->gtRsvdRegs);
+
+ // load the ip-relative offset (which is relative to start of fgFirstBB)
+ getEmitter()->emitIns_R_R_R(INS_ldr, EA_4BYTE, baseReg, baseReg, idxReg, INS_OPTS_LSL);
+
+ // add it to the absolute address of fgFirstBB
+ compiler->fgFirstBB->bbFlags |= BBF_JMP_TARGET;
+ getEmitter()->emitIns_R_L(INS_adr, EA_PTRSIZE, compiler->fgFirstBB, tmpReg);
+ getEmitter()->emitIns_R_R_R(INS_add, EA_PTRSIZE, baseReg, baseReg, tmpReg);
+
+ // br baseReg
+ getEmitter()->emitIns_R(INS_br, emitTypeSize(TYP_I_IMPL), baseReg);
+}
+
+// emits the table and an instruction to get the address of the first element
+void CodeGen::genJumpTable(GenTree* treeNode)
+{
+ noway_assert(compiler->compCurBB->bbJumpKind == BBJ_SWITCH);
+ assert(treeNode->OperGet() == GT_JMPTABLE);
+
+ unsigned jumpCount = compiler->compCurBB->bbJumpSwt->bbsCount;
+ BasicBlock** jumpTable = compiler->compCurBB->bbJumpSwt->bbsDstTab;
+ unsigned jmpTabOffs;
+ unsigned jmpTabBase;
+
+ jmpTabBase = getEmitter()->emitBBTableDataGenBeg(jumpCount, true);
+
+ jmpTabOffs = 0;
+
+ JITDUMP("\n J_M%03u_DS%02u LABEL DWORD\n", Compiler::s_compMethodsCount, jmpTabBase);
+
+ for (unsigned i = 0; i < jumpCount; i++)
+ {
+ BasicBlock* target = *jumpTable++;
+ noway_assert(target->bbFlags & BBF_JMP_TARGET);
+
+ JITDUMP(" DD L_M%03u_BB%02u\n", Compiler::s_compMethodsCount, target->bbNum);
+
+ getEmitter()->emitDataGenData(i, target);
+ };
+
+ getEmitter()->emitDataGenEnd();
+
+ // Access to inline data is 'abstracted' by a special type of static member
+ // (produced by eeFindJitDataOffs) which the emitter recognizes as being a reference
+ // to constant data, not a real static field.
+ getEmitter()->emitIns_R_C(INS_adr, emitTypeSize(TYP_I_IMPL), treeNode->gtRegNum, REG_NA,
+ compiler->eeFindJitDataOffs(jmpTabBase), 0);
+ genProduceReg(treeNode);
+}
+
+// generate code for the locked operations:
+// GT_LOCKADD, GT_XCHG, GT_XADD
+void CodeGen::genLockedInstructions(GenTree* treeNode)
+{
+#if 0
+ GenTree* data = treeNode->gtOp.gtOp2;
+ GenTree* addr = treeNode->gtOp.gtOp1;
+ regNumber targetReg = treeNode->gtRegNum;
+ regNumber dataReg = data->gtRegNum;
+ regNumber addrReg = addr->gtRegNum;
+ instruction ins;
+
+ // all of these nodes implicitly do an indirection on op1
+ // so create a temporary node to feed into the pattern matching
+ GenTreeIndir i = indirForm(data->TypeGet(), addr);
+ genConsumeReg(addr);
+
+ // The register allocator should have extended the lifetime of the address
+ // so that it is not used as the target.
+ noway_assert(addrReg != targetReg);
+
+ // If data is a lclVar that's not a last use, we'd better have allocated a register
+ // for the result (except in the case of GT_LOCKADD which does not produce a register result).
+ assert(targetReg != REG_NA || treeNode->OperGet() == GT_LOCKADD || !genIsRegCandidateLocal(data) || (data->gtFlags & GTF_VAR_DEATH) != 0);
+
+ genConsumeIfReg(data);
+ if (targetReg != REG_NA && dataReg != REG_NA && dataReg != targetReg)
+ {
+ inst_RV_RV(ins_Copy(data->TypeGet()), targetReg, dataReg);
+ data->gtRegNum = targetReg;
+
+ // TODO-ARM64-Cleanup: Consider whether it is worth it, for debugging purposes, to restore the
+ // original gtRegNum on data, after calling emitInsBinary below.
+ }
+ switch (treeNode->OperGet())
+ {
+ case GT_LOCKADD:
+ instGen(INS_lock);
+ ins = INS_add;
+ break;
+ case GT_XCHG:
+ // lock is implied by xchg
+ ins = INS_xchg;
+ break;
+ case GT_XADD:
+ instGen(INS_lock);
+ ins = INS_xadd;
+ break;
+ default:
+ unreached();
+ }
+ getEmitter()->emitInsBinary(ins, emitTypeSize(data), &i, data);
+
+ if (treeNode->gtRegNum != REG_NA)
+ {
+ genProduceReg(treeNode);
+ }
+#else // !0
+ NYI("genLockedInstructions");
+#endif // !0
+}
+
+// generate code for BoundsCheck nodes
+void CodeGen::genRangeCheck(GenTreePtr oper)
+{
+#ifdef FEATURE_SIMD
+ noway_assert(oper->OperGet() == GT_ARR_BOUNDS_CHECK || oper->OperGet() == GT_SIMD_CHK);
+#else // !FEATURE_SIMD
+ noway_assert(oper->OperGet() == GT_ARR_BOUNDS_CHECK);
+#endif // !FEATURE_SIMD
+
+ GenTreeBoundsChk* bndsChk = oper->AsBoundsChk();
+
+ GenTreePtr arrLen = bndsChk->gtArrLen;
+ GenTreePtr arrIndex = bndsChk->gtIndex;
+ GenTreePtr arrRef = NULL;
+ int lenOffset = 0;
+
+ GenTree * src1, *src2;
+ emitJumpKind jmpKind;
+
+ genConsumeRegs(arrLen);
+ genConsumeRegs(arrIndex);
+
+ if (arrIndex->isContainedIntOrIImmed())
+ {
+ // To encode using a cmp immediate, we place the
+ // constant operand in the second position
+ src1 = arrLen;
+ src2 = arrIndex;
+ jmpKind = genJumpKindForOper(GT_LE, CK_UNSIGNED);
+ }
+ else
+ {
+ src1 = arrIndex;
+ src2 = arrLen;
+ jmpKind = genJumpKindForOper(GT_GE, CK_UNSIGNED);
+ }
+
+ GenTreeIntConCommon* intConst = nullptr;
+ if (src2->isContainedIntOrIImmed())
+ {
+ intConst = src2->AsIntConCommon();
+ }
+
+ if (intConst != nullptr)
+ {
+ getEmitter()->emitIns_R_I(INS_cmp, EA_4BYTE, src1->gtRegNum, intConst->IconValue());
+ }
+ else
+ {
+ getEmitter()->emitIns_R_R(INS_cmp, EA_4BYTE, src1->gtRegNum, src2->gtRegNum);
+ }
+
+ genJumpToThrowHlpBlk(jmpKind, SCK_RNGCHK_FAIL, bndsChk->gtIndRngFailBB);
+}
+
+//------------------------------------------------------------------------
+// genOffsetOfMDArrayLowerBound: Returns the offset from the Array object to the
+// lower bound for the given dimension.
+//
+// Arguments:
+// elemType - the element type of the array
+// rank - the rank of the array
+// dimension - the dimension for which the lower bound offset will be returned.
+//
+// Return Value:
+// The offset.
+// TODO-Cleanup: move to CodeGenCommon.cpp
+
+// static
+unsigned CodeGen::genOffsetOfMDArrayLowerBound(var_types elemType, unsigned rank, unsigned dimension)
+{
+ // Note that the lower bound and length fields of the Array object are always TYP_INT, even on 64-bit targets.
+ return compiler->eeGetArrayDataOffset(elemType) + genTypeSize(TYP_INT) * (dimension + rank);
+}
+
+//------------------------------------------------------------------------
+// genOffsetOfMDArrayLength: Returns the offset from the Array object to the
+// size for the given dimension.
+//
+// Arguments:
+// elemType - the element type of the array
+// rank - the rank of the array
+// dimension - the dimension for which the lower bound offset will be returned.
+//
+// Return Value:
+// The offset.
+// TODO-Cleanup: move to CodeGenCommon.cpp
+
+// static
+unsigned CodeGen::genOffsetOfMDArrayDimensionSize(var_types elemType, unsigned rank, unsigned dimension)
+{
+ // Note that the lower bound and length fields of the Array object are always TYP_INT, even on 64-bit targets.
+ return compiler->eeGetArrayDataOffset(elemType) + genTypeSize(TYP_INT) * dimension;
+}
+
+//------------------------------------------------------------------------
+// genCodeForArrIndex: Generates code to bounds check the index for one dimension of an array reference,
+// producing the effective index by subtracting the lower bound.
+//
+// Arguments:
+// arrIndex - the node for which we're generating code
+//
+// Return Value:
+// None.
+//
+
+void CodeGen::genCodeForArrIndex(GenTreeArrIndex* arrIndex)
+{
+ emitter* emit = getEmitter();
+ GenTreePtr arrObj = arrIndex->ArrObj();
+ GenTreePtr indexNode = arrIndex->IndexExpr();
+ regNumber arrReg = genConsumeReg(arrObj);
+ regNumber indexReg = genConsumeReg(indexNode);
+ regNumber tgtReg = arrIndex->gtRegNum;
+ noway_assert(tgtReg != REG_NA);
+
+ // We will use a temp register to load the lower bound and dimension size values
+ //
+ regMaskTP tmpRegsMask = arrIndex->gtRsvdRegs; // there will be two bits set
+ tmpRegsMask &= ~genRegMask(tgtReg); // remove the bit for 'tgtReg' from 'tmpRegsMask'
+
+ regMaskTP tmpRegMask = genFindLowestBit(tmpRegsMask); // set tmpRegMsk to a one-bit mask
+ regNumber tmpReg = genRegNumFromMask(tmpRegMask); // set tmpReg from that mask
+ noway_assert(tmpReg != REG_NA);
+
+ assert(tgtReg != tmpReg);
+
+ unsigned dim = arrIndex->gtCurrDim;
+ unsigned rank = arrIndex->gtArrRank;
+ var_types elemType = arrIndex->gtArrElemType;
+ unsigned offset;
+
+ offset = genOffsetOfMDArrayLowerBound(elemType, rank, dim);
+ emit->emitIns_R_R_I(ins_Load(TYP_INT), EA_8BYTE, tmpReg, arrReg, offset); // a 4 BYTE sign extending load
+ emit->emitIns_R_R_R(INS_sub, EA_4BYTE, tgtReg, indexReg, tmpReg);
+
+ offset = genOffsetOfMDArrayDimensionSize(elemType, rank, dim);
+ emit->emitIns_R_R_I(ins_Load(TYP_INT), EA_8BYTE, tmpReg, arrReg, offset); // a 4 BYTE sign extending load
+ emit->emitIns_R_R(INS_cmp, EA_4BYTE, tgtReg, tmpReg);
+
+ emitJumpKind jmpGEU = genJumpKindForOper(GT_GE, CK_UNSIGNED);
+ genJumpToThrowHlpBlk(jmpGEU, SCK_RNGCHK_FAIL);
+
+ genProduceReg(arrIndex);
+}
+
+//------------------------------------------------------------------------
+// genCodeForArrOffset: Generates code to compute the flattened array offset for
+// one dimension of an array reference:
+// result = (prevDimOffset * dimSize) + effectiveIndex
+// where dimSize is obtained from the arrObj operand
+//
+// Arguments:
+// arrOffset - the node for which we're generating code
+//
+// Return Value:
+// None.
+//
+// Notes:
+// dimSize and effectiveIndex are always non-negative, the former by design,
+// and the latter because it has been normalized to be zero-based.
+
+void CodeGen::genCodeForArrOffset(GenTreeArrOffs* arrOffset)
+{
+ GenTreePtr offsetNode = arrOffset->gtOffset;
+ GenTreePtr indexNode = arrOffset->gtIndex;
+ regNumber tgtReg = arrOffset->gtRegNum;
+
+ noway_assert(tgtReg != REG_NA);
+
+ if (!offsetNode->IsIntegralConst(0))
+ {
+ emitter* emit = getEmitter();
+ GenTreePtr arrObj = arrOffset->gtArrObj;
+ regNumber arrReg = genConsumeReg(arrObj);
+ noway_assert(arrReg != REG_NA);
+ regNumber offsetReg = genConsumeReg(offsetNode);
+ noway_assert(offsetReg != REG_NA);
+ regNumber indexReg = genConsumeReg(indexNode);
+ noway_assert(indexReg != REG_NA);
+ regMaskTP tmpRegMask = arrOffset->gtRsvdRegs;
+ regNumber tmpReg = genRegNumFromMask(tmpRegMask);
+ noway_assert(tmpReg != REG_NA);
+ unsigned dim = arrOffset->gtCurrDim;
+ unsigned rank = arrOffset->gtArrRank;
+ var_types elemType = arrOffset->gtArrElemType;
+ unsigned offset = genOffsetOfMDArrayDimensionSize(elemType, rank, dim);
+
+ // Load tmpReg with the dimension size
+ emit->emitIns_R_R_I(ins_Load(TYP_INT), EA_8BYTE, tmpReg, arrReg, offset); // a 4 BYTE sign extending load
+
+ // Evaluate tgtReg = offsetReg*dim_size + indexReg.
+ emit->emitIns_R_R_R_R(INS_madd, EA_4BYTE, tgtReg, tmpReg, offsetReg, indexReg);
+ }
+ else
+ {
+ regNumber indexReg = genConsumeReg(indexNode);
+ if (indexReg != tgtReg)
+ {
+ inst_RV_RV(INS_mov, tgtReg, indexReg, TYP_INT);
+ }
+ }
+ genProduceReg(arrOffset);
+}
+
+// make a temporary indir we can feed to pattern matching routines
+// in cases where we don't want to instantiate all the indirs that happen
+//
+// TODO-Cleanup: move to CodeGenCommon.cpp
+GenTreeIndir CodeGen::indirForm(var_types type, GenTree* base)
+{
+ GenTreeIndir i(GT_IND, type, base, nullptr);
+ i.gtRegNum = REG_NA;
+ // has to be nonnull (because contained nodes can't be the last in block)
+ // but don't want it to be a valid pointer
+ i.gtNext = (GenTree*)(-1);
+ return i;
+}
+
+// make a temporary int we can feed to pattern matching routines
+// in cases where we don't want to instantiate
+//
+// TODO-Cleanup: move to CodeGenCommon.cpp
+GenTreeIntCon CodeGen::intForm(var_types type, ssize_t value)
+{
+ GenTreeIntCon i(type, value);
+ i.gtRegNum = REG_NA;
+ // has to be nonnull (because contained nodes can't be the last in block)
+ // but don't want it to be a valid pointer
+ i.gtNext = (GenTree*)(-1);
+ return i;
+}
+
+instruction CodeGen::genGetInsForOper(genTreeOps oper, var_types type)
+{
+ instruction ins = INS_brk;
+
+ if (varTypeIsFloating(type))
+ {
+ switch (oper)
+ {
+ case GT_ADD:
+ ins = INS_fadd;
+ break;
+ case GT_SUB:
+ ins = INS_fsub;
+ break;
+ case GT_MUL:
+ ins = INS_fmul;
+ break;
+ case GT_DIV:
+ ins = INS_fdiv;
+ break;
+ case GT_NEG:
+ ins = INS_fneg;
+ break;
+
+ default:
+ NYI("Unhandled oper in genGetInsForOper() - float");
+ unreached();
+ break;
+ }
+ }
+ else
+ {
+ switch (oper)
+ {
+ case GT_ADD:
+ ins = INS_add;
+ break;
+ case GT_AND:
+ ins = INS_and;
+ break;
+ case GT_DIV:
+ ins = INS_sdiv;
+ break;
+ case GT_UDIV:
+ ins = INS_udiv;
+ break;
+ case GT_MUL:
+ ins = INS_mul;
+ break;
+ case GT_LSH:
+ ins = INS_lsl;
+ break;
+ case GT_NEG:
+ ins = INS_neg;
+ break;
+ case GT_NOT:
+ ins = INS_mvn;
+ break;
+ case GT_OR:
+ ins = INS_orr;
+ break;
+ case GT_ROR:
+ ins = INS_ror;
+ break;
+ case GT_RSH:
+ ins = INS_asr;
+ break;
+ case GT_RSZ:
+ ins = INS_lsr;
+ break;
+ case GT_SUB:
+ ins = INS_sub;
+ break;
+ case GT_XOR:
+ ins = INS_eor;
+ break;
+
+ default:
+ NYI("Unhandled oper in genGetInsForOper() - integer");
+ unreached();
+ break;
+ }
+ }
+ return ins;
+}
+
+//------------------------------------------------------------------------
+// genCodeForShift: Generates the code sequence for a GenTree node that
+// represents a bit shift or rotate operation (<<, >>, >>>, rol, ror).
+//
+// Arguments:
+// tree - the bit shift node (that specifies the type of bit shift to perform).
+//
+// Assumptions:
+// a) All GenTrees are register allocated.
+//
+void CodeGen::genCodeForShift(GenTreePtr tree)
+{
+ var_types targetType = tree->TypeGet();
+ genTreeOps oper = tree->OperGet();
+ instruction ins = genGetInsForOper(oper, targetType);
+ emitAttr size = emitTypeSize(tree);
+
+ assert(tree->gtRegNum != REG_NA);
+
+ GenTreePtr operand = tree->gtGetOp1();
+ genConsumeReg(operand);
+
+ GenTreePtr shiftBy = tree->gtGetOp2();
+ if (!shiftBy->IsCnsIntOrI())
+ {
+ genConsumeReg(shiftBy);
+ getEmitter()->emitIns_R_R_R(ins, size, tree->gtRegNum, operand->gtRegNum, shiftBy->gtRegNum);
+ }
+ else
+ {
+ unsigned immWidth = emitter::getBitWidth(size); // immWidth will be set to 32 or 64
+ ssize_t shiftByImm = shiftBy->gtIntCon.gtIconVal & (immWidth - 1);
+
+ getEmitter()->emitIns_R_R_I(ins, size, tree->gtRegNum, operand->gtRegNum, shiftByImm);
+ }
+
+ genProduceReg(tree);
+}
+
+// TODO-Cleanup: move to CodeGenCommon.cpp
+void CodeGen::genUnspillRegIfNeeded(GenTree* tree)
+{
+ regNumber dstReg = tree->gtRegNum;
+
+ GenTree* unspillTree = tree;
+ if (tree->gtOper == GT_RELOAD)
+ {
+ unspillTree = tree->gtOp.gtOp1;
+ }
+
+ if (unspillTree->gtFlags & GTF_SPILLED)
+ {
+ if (genIsRegCandidateLocal(unspillTree))
+ {
+ // Reset spilled flag, since we are going to load a local variable from its home location.
+ unspillTree->gtFlags &= ~GTF_SPILLED;
+
+ GenTreeLclVarCommon* lcl = unspillTree->AsLclVarCommon();
+ LclVarDsc* varDsc = &compiler->lvaTable[lcl->gtLclNum];
+
+ var_types targetType = unspillTree->gtType;
+ instruction ins = ins_Load(targetType, compiler->isSIMDTypeLocalAligned(lcl->gtLclNum));
+ emitAttr attr = emitTypeSize(targetType);
+ emitter* emit = getEmitter();
+
+ // Fixes Issue #3326
+ attr = emit->emitInsAdjustLoadStoreAttr(ins, attr);
+
+ // Load local variable from its home location.
+ inst_RV_TT(ins, dstReg, unspillTree, 0, attr);
+
+ unspillTree->SetInReg();
+
+ // TODO-Review: We would like to call:
+ // genUpdateRegLife(varDsc, /*isBorn*/ true, /*isDying*/ false DEBUGARG(tree));
+ // instead of the following code, but this ends up hitting this assert:
+ // assert((regSet.rsMaskVars & regMask) == 0);
+ // due to issues with LSRA resolution moves.
+ // So, just force it for now. This probably indicates a condition that creates a GC hole!
+ //
+ // Extra note: I think we really want to call something like gcInfo.gcUpdateForRegVarMove,
+ // because the variable is not really going live or dead, but that method is somewhat poorly
+ // factored because it, in turn, updates rsMaskVars which is part of RegSet not GCInfo.
+ // This code exists in other CodeGen*.cpp files.
+
+ // Don't update the variable's location if we are just re-spilling it again.
+
+ if ((unspillTree->gtFlags & GTF_SPILL) == 0)
+ {
+ genUpdateVarReg(varDsc, tree);
+#ifdef DEBUG
+ if (VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex))
+ {
+ JITDUMP("\t\t\t\t\t\t\tRemoving V%02u from gcVarPtrSetCur\n", lcl->gtLclNum);
+ }
+#endif // DEBUG
+ VarSetOps::RemoveElemD(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex);
+
+#ifdef DEBUG
+ if (compiler->verbose)
+ {
+ printf("\t\t\t\t\t\t\tV%02u in reg ", lcl->gtLclNum);
+ varDsc->PrintVarReg();
+ printf(" is becoming live ");
+ compiler->printTreeID(unspillTree);
+ printf("\n");
+ }
+#endif // DEBUG
+
+ regSet.AddMaskVars(genGetRegMask(varDsc));
+ }
+
+ gcInfo.gcMarkRegPtrVal(dstReg, unspillTree->TypeGet());
+ }
+ else if (unspillTree->IsMultiRegCall())
+ {
+ GenTreeCall* call = unspillTree->AsCall();
+ ReturnTypeDesc* pRetTypeDesc = call->GetReturnTypeDesc();
+ unsigned regCount = pRetTypeDesc->GetReturnRegCount();
+ GenTreeCopyOrReload* reloadTree = nullptr;
+ if (tree->OperGet() == GT_RELOAD)
+ {
+ reloadTree = tree->AsCopyOrReload();
+ }
+
+ // In case of multi-reg call node, GTF_SPILLED flag on it indicates that
+ // one or more of its result regs are spilled. Call node needs to be
+ // queried to know which specific result regs to be unspilled.
+ for (unsigned i = 0; i < regCount; ++i)
+ {
+ unsigned flags = call->GetRegSpillFlagByIdx(i);
+ if ((flags & GTF_SPILLED) != 0)
+ {
+ var_types dstType = pRetTypeDesc->GetReturnRegType(i);
+ regNumber unspillTreeReg = call->GetRegNumByIdx(i);
+
+ if (reloadTree != nullptr)
+ {
+ dstReg = reloadTree->GetRegNumByIdx(i);
+ if (dstReg == REG_NA)
+ {
+ dstReg = unspillTreeReg;
+ }
+ }
+ else
+ {
+ dstReg = unspillTreeReg;
+ }
+
+ TempDsc* t = regSet.rsUnspillInPlace(call, unspillTreeReg, i);
+ getEmitter()->emitIns_R_S(ins_Load(dstType), emitActualTypeSize(dstType), dstReg, t->tdTempNum(),
+ 0);
+ compiler->tmpRlsTemp(t);
+ gcInfo.gcMarkRegPtrVal(dstReg, dstType);
+ }
+ }
+
+ unspillTree->gtFlags &= ~GTF_SPILLED;
+ unspillTree->SetInReg();
+ }
+ else
+ {
+ TempDsc* t = regSet.rsUnspillInPlace(unspillTree, unspillTree->gtRegNum);
+ getEmitter()->emitIns_R_S(ins_Load(unspillTree->gtType), emitActualTypeSize(unspillTree->TypeGet()), dstReg,
+ t->tdTempNum(), 0);
+ compiler->tmpRlsTemp(t);
+
+ unspillTree->gtFlags &= ~GTF_SPILLED;
+ unspillTree->SetInReg();
+ gcInfo.gcMarkRegPtrVal(dstReg, unspillTree->TypeGet());
+ }
+ }
+}
+
+// Do Liveness update for a subnodes that is being consumed by codegen
+// including the logic for reload in case is needed and also takes care
+// of locating the value on the desired register.
+void CodeGen::genConsumeRegAndCopy(GenTree* tree, regNumber needReg)
+{
+ regNumber treeReg = genConsumeReg(tree);
+ if (treeReg != needReg)
+ {
+ var_types targetType = tree->TypeGet();
+ inst_RV_RV(ins_Copy(targetType), needReg, treeReg, targetType);
+ }
+}
+
+void CodeGen::genRegCopy(GenTree* treeNode)
+{
+ assert(treeNode->OperGet() == GT_COPY);
+
+ var_types targetType = treeNode->TypeGet();
+ regNumber targetReg = treeNode->gtRegNum;
+ assert(targetReg != REG_NA);
+
+ GenTree* op1 = treeNode->gtOp.gtOp1;
+
+ // Check whether this node and the node from which we're copying the value have the same
+ // register type.
+ // This can happen if (currently iff) we have a SIMD vector type that fits in an integer
+ // register, in which case it is passed as an argument, or returned from a call,
+ // in an integer register and must be copied if it's in an xmm register.
+
+ if (varTypeIsFloating(treeNode) != varTypeIsFloating(op1))
+ {
+ inst_RV_RV(INS_fmov, targetReg, genConsumeReg(op1), targetType);
+ }
+ else
+ {
+ inst_RV_RV(ins_Copy(targetType), targetReg, genConsumeReg(op1), targetType);
+ }
+
+ if (op1->IsLocal())
+ {
+ // The lclVar will never be a def.
+ // If it is a last use, the lclVar will be killed by genConsumeReg(), as usual, and genProduceReg will
+ // appropriately set the gcInfo for the copied value.
+ // If not, there are two cases we need to handle:
+ // - If this is a TEMPORARY copy (indicated by the GTF_VAR_DEATH flag) the variable
+ // will remain live in its original register.
+ // genProduceReg() will appropriately set the gcInfo for the copied value,
+ // and genConsumeReg will reset it.
+ // - Otherwise, we need to update register info for the lclVar.
+
+ GenTreeLclVarCommon* lcl = op1->AsLclVarCommon();
+ assert((lcl->gtFlags & GTF_VAR_DEF) == 0);
+
+ if ((lcl->gtFlags & GTF_VAR_DEATH) == 0 && (treeNode->gtFlags & GTF_VAR_DEATH) == 0)
+ {
+ LclVarDsc* varDsc = &compiler->lvaTable[lcl->gtLclNum];
+
+ // If we didn't just spill it (in genConsumeReg, above), then update the register info
+ if (varDsc->lvRegNum != REG_STK)
+ {
+ // The old location is dying
+ genUpdateRegLife(varDsc, /*isBorn*/ false, /*isDying*/ true DEBUGARG(op1));
+
+ gcInfo.gcMarkRegSetNpt(genRegMask(op1->gtRegNum));
+
+ genUpdateVarReg(varDsc, treeNode);
+
+ // The new location is going live
+ genUpdateRegLife(varDsc, /*isBorn*/ true, /*isDying*/ false DEBUGARG(treeNode));
+ }
+ }
+ }
+ genProduceReg(treeNode);
+}
+
+// Do liveness update for a subnode that is being consumed by codegen.
+// TODO-Cleanup: move to CodeGenCommon.cpp
+regNumber CodeGen::genConsumeReg(GenTree* tree)
+{
+ if (tree->OperGet() == GT_COPY)
+ {
+ genRegCopy(tree);
+ }
+ // Handle the case where we have a lclVar that needs to be copied before use (i.e. because it
+ // interferes with one of the other sources (or the target, if it's a "delayed use" register)).
+ // TODO-Cleanup: This is a special copyReg case in LSRA - consider eliminating these and
+ // always using GT_COPY to make the lclVar location explicit.
+ // Note that we have to do this before calling genUpdateLife because otherwise if we spill it
+ // the lvRegNum will be set to REG_STK and we will lose track of what register currently holds
+ // the lclVar (normally when a lclVar is spilled it is then used from its former register
+ // location, which matches the gtRegNum on the node).
+ // (Note that it doesn't matter if we call this before or after genUnspillRegIfNeeded
+ // because if it's on the stack it will always get reloaded into tree->gtRegNum).
+ if (genIsRegCandidateLocal(tree))
+ {
+ GenTreeLclVarCommon* lcl = tree->AsLclVarCommon();
+ LclVarDsc* varDsc = &compiler->lvaTable[lcl->GetLclNum()];
+ if ((varDsc->lvRegNum != REG_STK) && (varDsc->lvRegNum != tree->gtRegNum))
+ {
+ inst_RV_RV(ins_Copy(tree->TypeGet()), tree->gtRegNum, varDsc->lvRegNum);
+ }
+ }
+
+ genUnspillRegIfNeeded(tree);
+
+ // genUpdateLife() will also spill local var if marked as GTF_SPILL by calling CodeGen::genSpillVar
+ genUpdateLife(tree);
+ assert(tree->gtRegNum != REG_NA);
+
+ // there are three cases where consuming a reg means clearing the bit in the live mask
+ // 1. it was not produced by a local
+ // 2. it was produced by a local that is going dead
+ // 3. it was produced by a local that does not live in that reg (like one allocated on the stack)
+
+ if (genIsRegCandidateLocal(tree))
+ {
+ GenTreeLclVarCommon* lcl = tree->AsLclVarCommon();
+ LclVarDsc* varDsc = &compiler->lvaTable[lcl->GetLclNum()];
+ assert(varDsc->lvLRACandidate);
+
+ if ((tree->gtFlags & GTF_VAR_DEATH) != 0)
+ {
+ gcInfo.gcMarkRegSetNpt(genRegMask(varDsc->lvRegNum));
+ }
+ else if (varDsc->lvRegNum == REG_STK)
+ {
+ // We have loaded this into a register only temporarily
+ gcInfo.gcMarkRegSetNpt(genRegMask(tree->gtRegNum));
+ }
+ }
+ else
+ {
+ gcInfo.gcMarkRegSetNpt(genRegMask(tree->gtRegNum));
+ }
+
+ return tree->gtRegNum;
+}
+
+// Do liveness update for an address tree: one of GT_LEA, GT_LCL_VAR, or GT_CNS_INT (for call indirect).
+// TODO-Cleanup: move to CodeGenCommon.cpp
+void CodeGen::genConsumeAddress(GenTree* addr)
+{
+ if (addr->OperGet() == GT_LEA)
+ {
+ genConsumeAddrMode(addr->AsAddrMode());
+ }
+ else if (!addr->isContained())
+ {
+ genConsumeReg(addr);
+ }
+}
+
+// do liveness update for a subnode that is being consumed by codegen
+// TODO-Cleanup: move to CodeGenCommon.cpp
+void CodeGen::genConsumeAddrMode(GenTreeAddrMode* addr)
+{
+ if (addr->Base())
+ genConsumeReg(addr->Base());
+ if (addr->Index())
+ genConsumeReg(addr->Index());
+}
+
+// TODO-Cleanup: move to CodeGenCommon.cpp
+void CodeGen::genConsumeRegs(GenTree* tree)
+{
+ if (tree->isContained())
+ {
+ if (tree->isIndir())
+ {
+ genConsumeAddress(tree->AsIndir()->Addr());
+ }
+ else if (tree->OperGet() == GT_AND)
+ {
+ // This is the special contained GT_AND that we created in Lowering::LowerCmp()
+ // Now we need to consume the operands of the GT_AND node.
+ genConsumeOperands(tree->AsOp());
+ }
+ else
+ {
+ assert(tree->OperIsLeaf());
+ }
+ }
+ else
+ {
+ genConsumeReg(tree);
+ }
+}
+
+//------------------------------------------------------------------------
+// genConsumeOperands: Do liveness update for the operands of a unary or binary tree
+//
+// Arguments:
+// tree - the GenTreeOp whose operands will have their liveness updated.
+//
+// Return Value:
+// None.
+//
+// Notes:
+// Note that this logic is localized here because we must do the liveness update in
+// the correct execution order. This is important because we may have two operands
+// that involve the same lclVar, and if one is marked "lastUse" we must handle it
+// after the first.
+// TODO-Cleanup: move to CodeGenCommon.cpp
+
+void CodeGen::genConsumeOperands(GenTreeOp* tree)
+{
+ GenTree* firstOp = tree->gtOp1;
+ GenTree* secondOp = tree->gtOp2;
+ if ((tree->gtFlags & GTF_REVERSE_OPS) != 0)
+ {
+ assert(secondOp != nullptr);
+ firstOp = secondOp;
+ secondOp = tree->gtOp1;
+ }
+ if (firstOp != nullptr)
+ {
+ genConsumeRegs(firstOp);
+ }
+ if (secondOp != nullptr)
+ {
+ genConsumeRegs(secondOp);
+ }
+}
+
+// do liveness update for register produced by the current node in codegen
+// TODO-Cleanup: move to CodeGenCommon.cpp
+void CodeGen::genProduceReg(GenTree* tree)
+{
+ if (tree->gtFlags & GTF_SPILL)
+ {
+ if (genIsRegCandidateLocal(tree))
+ {
+ // Store local variable to its home location.
+ tree->gtFlags &= ~GTF_REG_VAL;
+ inst_TT_RV(ins_Store(tree->gtType, compiler->isSIMDTypeLocalAligned(tree->gtLclVarCommon.gtLclNum)), tree,
+ tree->gtRegNum);
+ }
+ else
+ {
+ tree->SetInReg();
+ regSet.rsSpillTree(tree->gtRegNum, tree);
+ tree->gtFlags |= GTF_SPILLED;
+ tree->gtFlags &= ~GTF_SPILL;
+ gcInfo.gcMarkRegSetNpt(genRegMask(tree->gtRegNum));
+ return;
+ }
+ }
+
+ genUpdateLife(tree);
+
+ // If we've produced a register, mark it as a pointer, as needed.
+ if (tree->gtHasReg())
+ {
+ // We only mark the register in the following cases:
+ // 1. It is not a register candidate local. In this case, we're producing a
+ // register from a local, but the local is not a register candidate. Thus,
+ // we must be loading it as a temp register, and any "last use" flag on
+ // the register wouldn't be relevant.
+ // 2. The register candidate local is going dead. There's no point to mark
+ // the register as live, with a GC pointer, if the variable is dead.
+ if (!genIsRegCandidateLocal(tree) || ((tree->gtFlags & GTF_VAR_DEATH) == 0))
+ {
+ gcInfo.gcMarkRegPtrVal(tree->gtRegNum, tree->TypeGet());
+ }
+ }
+ tree->SetInReg();
+}
+
+// transfer gc/byref status of src reg to dst reg
+// TODO-Cleanup: move to CodeGenCommon.cpp
+void CodeGen::genTransferRegGCState(regNumber dst, regNumber src)
+{
+ regMaskTP srcMask = genRegMask(src);
+ regMaskTP dstMask = genRegMask(dst);
+
+ if (gcInfo.gcRegGCrefSetCur & srcMask)
+ {
+ gcInfo.gcMarkRegSetGCref(dstMask);
+ }
+ else if (gcInfo.gcRegByrefSetCur & srcMask)
+ {
+ gcInfo.gcMarkRegSetByref(dstMask);
+ }
+ else
+ {
+ gcInfo.gcMarkRegSetNpt(dstMask);
+ }
+}
+
+// generates an ip-relative call or indirect call via reg ('call reg')
+// pass in 'addr' for a relative call or 'base' for a indirect register call
+// methHnd - optional, only used for pretty printing
+// retSize - emitter type of return for GC purposes, should be EA_BYREF, EA_GCREF, or EA_PTRSIZE(not GC)
+// TODO-Cleanup: move to CodeGenCommon.cpp
+void CodeGen::genEmitCall(int callType,
+ CORINFO_METHOD_HANDLE methHnd,
+ INDEBUG_LDISASM_COMMA(CORINFO_SIG_INFO* sigInfo) void* addr,
+ emitAttr retSize,
+ emitAttr secondRetSize,
+ IL_OFFSETX ilOffset,
+ regNumber base,
+ bool isJump,
+ bool isNoGC)
+{
+
+ getEmitter()->emitIns_Call(emitter::EmitCallType(callType), methHnd, INDEBUG_LDISASM_COMMA(sigInfo) addr, 0,
+ retSize, secondRetSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
+ gcInfo.gcRegByrefSetCur, ilOffset, base, REG_NA, 0, 0, isJump,
+ emitter::emitNoGChelper(compiler->eeGetHelperNum(methHnd)));
+}
+
+// generates an indirect call via addressing mode (call []) given an indir node
+// methHnd - optional, only used for pretty printing
+// retSize - emitter type of return for GC purposes, should be EA_BYREF, EA_GCREF, or EA_PTRSIZE(not GC)
+// TODO-Cleanup: move to CodeGenCommon.cpp
+void CodeGen::genEmitCall(int callType,
+ CORINFO_METHOD_HANDLE methHnd,
+ INDEBUG_LDISASM_COMMA(CORINFO_SIG_INFO* sigInfo) GenTreeIndir* indir,
+ emitAttr retSize,
+ emitAttr secondRetSize,
+ IL_OFFSETX ilOffset)
+{
+ genConsumeAddress(indir->Addr());
+
+ getEmitter()->emitIns_Call(emitter::EmitCallType(callType), methHnd, INDEBUG_LDISASM_COMMA(sigInfo) nullptr, 0,
+ retSize, secondRetSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
+ gcInfo.gcRegByrefSetCur, ilOffset, indir->Base() ? indir->Base()->gtRegNum : REG_NA,
+ indir->Index() ? indir->Index()->gtRegNum : REG_NA, indir->Scale(), indir->Offset());
+}
+
+// Produce code for a GT_CALL node
+void CodeGen::genCallInstruction(GenTreePtr node)
+{
+ GenTreeCall* call = node->AsCall();
+
+ assert(call->gtOper == GT_CALL);
+
+ gtCallTypes callType = (gtCallTypes)call->gtCallType;
+
+ IL_OFFSETX ilOffset = BAD_IL_OFFSET;
+
+ // all virtuals should have been expanded into a control expression
+ assert(!call->IsVirtual() || call->gtControlExpr || call->gtCallAddr);
+
+ // Consume all the arg regs
+ for (GenTreePtr list = call->gtCallLateArgs; list; list = list->MoveNext())
+ {
+ assert(list->IsList());
+
+ GenTreePtr argNode = list->Current();
+
+ fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, argNode->gtSkipReloadOrCopy());
+ assert(curArgTabEntry);
+
+ if (curArgTabEntry->regNum == REG_STK)
+ continue;
+
+ // Deal with multi register passed struct args.
+ if (argNode->OperGet() == GT_LIST)
+ {
+ GenTreeArgList* argListPtr = argNode->AsArgList();
+ unsigned iterationNum = 0;
+ regNumber argReg = curArgTabEntry->regNum;
+ for (; argListPtr != nullptr; argListPtr = argListPtr->Rest(), iterationNum++)
+ {
+ GenTreePtr putArgRegNode = argListPtr->gtOp.gtOp1;
+ assert(putArgRegNode->gtOper == GT_PUTARG_REG);
+
+ genConsumeReg(putArgRegNode);
+
+ if (putArgRegNode->gtRegNum != argReg)
+ {
+ inst_RV_RV(ins_Move_Extend(putArgRegNode->TypeGet(), putArgRegNode->InReg()), argReg,
+ putArgRegNode->gtRegNum);
+ }
+
+ argReg = genRegArgNext(argReg);
+ }
+ }
+ else
+ {
+ regNumber argReg = curArgTabEntry->regNum;
+ genConsumeReg(argNode);
+ if (argNode->gtRegNum != argReg)
+ {
+ inst_RV_RV(ins_Move_Extend(argNode->TypeGet(), argNode->InReg()), argReg, argNode->gtRegNum);
+ }
+ }
+
+ // In the case of a varargs call,
+ // the ABI dictates that if we have floating point args,
+ // we must pass the enregistered arguments in both the
+ // integer and floating point registers so, let's do that.
+ if (call->IsVarargs() && varTypeIsFloating(argNode))
+ {
+ NYI_ARM64("CodeGen - IsVarargs");
+ }
+ }
+
+ // Insert a null check on "this" pointer if asked.
+ if (call->NeedsNullCheck())
+ {
+ const regNumber regThis = genGetThisArgReg(call);
+ getEmitter()->emitIns_R_R_I(INS_ldr, EA_4BYTE, REG_ZR, regThis, 0);
+ }
+
+ // Either gtControlExpr != null or gtCallAddr != null or it is a direct non-virtual call to a user or helper method.
+ CORINFO_METHOD_HANDLE methHnd;
+ GenTree* target = call->gtControlExpr;
+ if (callType == CT_INDIRECT)
+ {
+ assert(target == nullptr);
+ target = call->gtCall.gtCallAddr;
+ methHnd = nullptr;
+ }
+ else
+ {
+ methHnd = call->gtCallMethHnd;
+ }
+
+ CORINFO_SIG_INFO* sigInfo = nullptr;
+#ifdef DEBUG
+ // Pass the call signature information down into the emitter so the emitter can associate
+ // native call sites with the signatures they were generated from.
+ if (callType != CT_HELPER)
+ {
+ sigInfo = call->callSig;
+ }
+#endif // DEBUG
+
+ // If fast tail call, then we are done. In this case we setup the args (both reg args
+ // and stack args in incoming arg area) and call target in IP0. Epilog sequence would
+ // generate "br IP0".
+ if (call->IsFastTailCall())
+ {
+ // Don't support fast tail calling JIT helpers
+ assert(callType != CT_HELPER);
+
+ // Fast tail calls materialize call target either in gtControlExpr or in gtCallAddr.
+ assert(target != nullptr);
+
+ genConsumeReg(target);
+
+ if (target->gtRegNum != REG_IP0)
+ {
+ inst_RV_RV(INS_mov, REG_IP0, target->gtRegNum);
+ }
+ return;
+ }
+
+ // For a pinvoke to unmanged code we emit a label to clear
+ // the GC pointer state before the callsite.
+ // We can't utilize the typical lazy killing of GC pointers
+ // at (or inside) the callsite.
+ if (call->IsUnmanaged())
+ {
+ genDefineTempLabel(genCreateTempLabel());
+ }
+
+ // Determine return value size(s).
+ ReturnTypeDesc* pRetTypeDesc = call->GetReturnTypeDesc();
+ emitAttr retSize = EA_PTRSIZE;
+ emitAttr secondRetSize = EA_UNKNOWN;
+
+ if (call->HasMultiRegRetVal())
+ {
+ retSize = emitTypeSize(pRetTypeDesc->GetReturnRegType(0));
+ secondRetSize = emitTypeSize(pRetTypeDesc->GetReturnRegType(1));
+ }
+ else
+ {
+ assert(!varTypeIsStruct(call));
+
+ if (call->gtType == TYP_REF || call->gtType == TYP_ARRAY)
+ {
+ retSize = EA_GCREF;
+ }
+ else if (call->gtType == TYP_BYREF)
+ {
+ retSize = EA_BYREF;
+ }
+ }
+
+#ifdef DEBUGGING_SUPPORT
+ // We need to propagate the IL offset information to the call instruction, so we can emit
+ // an IL to native mapping record for the call, to support managed return value debugging.
+ // We don't want tail call helper calls that were converted from normal calls to get a record,
+ // so we skip this hash table lookup logic in that case.
+ if (compiler->opts.compDbgInfo && compiler->genCallSite2ILOffsetMap != nullptr && !call->IsTailCall())
+ {
+ (void)compiler->genCallSite2ILOffsetMap->Lookup(call, &ilOffset);
+ }
+#endif // DEBUGGING_SUPPORT
+
+ if (target != nullptr)
+ {
+ // For Arm64 a call target can not be a contained indirection
+ assert(!target->isContainedIndir());
+
+ // We have already generated code for gtControlExpr evaluating it into a register.
+ // We just need to emit "call reg" in this case.
+ //
+ assert(genIsValidIntReg(target->gtRegNum));
+
+ genEmitCall(emitter::EC_INDIR_R, methHnd,
+ INDEBUG_LDISASM_COMMA(sigInfo) nullptr, // addr
+ retSize, secondRetSize, ilOffset, genConsumeReg(target));
+ }
+ else
+ {
+ // Generate a direct call to a non-virtual user defined or helper method
+ assert(callType == CT_HELPER || callType == CT_USER_FUNC);
+
+ void* addr = nullptr;
+ if (callType == CT_HELPER)
+ {
+ // Direct call to a helper method.
+ CorInfoHelpFunc helperNum = compiler->eeGetHelperNum(methHnd);
+ noway_assert(helperNum != CORINFO_HELP_UNDEF);
+
+ void* pAddr = nullptr;
+ addr = compiler->compGetHelperFtn(helperNum, (void**)&pAddr);
+
+ if (addr == nullptr)
+ {
+ addr = pAddr;
+ }
+ }
+ else
+ {
+ // Direct call to a non-virtual user function.
+ CORINFO_ACCESS_FLAGS aflags = CORINFO_ACCESS_ANY;
+ if (call->IsSameThis())
+ {
+ aflags = (CORINFO_ACCESS_FLAGS)(aflags | CORINFO_ACCESS_THIS);
+ }
+
+ if ((call->NeedsNullCheck()) == 0)
+ {
+ aflags = (CORINFO_ACCESS_FLAGS)(aflags | CORINFO_ACCESS_NONNULL);
+ }
+
+ CORINFO_CONST_LOOKUP addrInfo;
+ compiler->info.compCompHnd->getFunctionEntryPoint(methHnd, &addrInfo, aflags);
+
+ addr = addrInfo.addr;
+ }
+#if 0
+ // Use this path if you want to load an absolute call target using
+ // a sequence of movs followed by an indirect call (blr instruction)
+
+ // Load the call target address in x16
+ instGen_Set_Reg_To_Imm(EA_8BYTE, REG_IP0, (ssize_t) addr);
+
+ // indirect call to constant address in IP0
+ genEmitCall(emitter::EC_INDIR_R,
+ methHnd,
+ INDEBUG_LDISASM_COMMA(sigInfo)
+ nullptr, //addr
+ retSize,
+ secondRetSize,
+ ilOffset,
+ REG_IP0);
+#else
+ // Non-virtual direct call to known addresses
+ genEmitCall(emitter::EC_FUNC_TOKEN, methHnd, INDEBUG_LDISASM_COMMA(sigInfo) addr, retSize, secondRetSize,
+ ilOffset);
+#endif
+ }
+
+ // if it was a pinvoke we may have needed to get the address of a label
+ if (genPendingCallLabel)
+ {
+ assert(call->IsUnmanaged());
+ genDefineTempLabel(genPendingCallLabel);
+ genPendingCallLabel = nullptr;
+ }
+
+ // Update GC info:
+ // All Callee arg registers are trashed and no longer contain any GC pointers.
+ // TODO-ARM64-Bug?: As a matter of fact shouldn't we be killing all of callee trashed regs here?
+ // For now we will assert that other than arg regs gc ref/byref set doesn't contain any other
+ // registers from RBM_CALLEE_TRASH
+ assert((gcInfo.gcRegGCrefSetCur & (RBM_CALLEE_TRASH & ~RBM_ARG_REGS)) == 0);
+ assert((gcInfo.gcRegByrefSetCur & (RBM_CALLEE_TRASH & ~RBM_ARG_REGS)) == 0);
+ gcInfo.gcRegGCrefSetCur &= ~RBM_ARG_REGS;
+ gcInfo.gcRegByrefSetCur &= ~RBM_ARG_REGS;
+
+ var_types returnType = call->TypeGet();
+ if (returnType != TYP_VOID)
+ {
+ regNumber returnReg;
+
+ if (call->HasMultiRegRetVal())
+ {
+ assert(pRetTypeDesc != nullptr);
+ unsigned regCount = pRetTypeDesc->GetReturnRegCount();
+
+ // If regs allocated to call node are different from ABI return
+ // regs in which the call has returned its result, move the result
+ // to regs allocated to call node.
+ for (unsigned i = 0; i < regCount; ++i)
+ {
+ var_types regType = pRetTypeDesc->GetReturnRegType(i);
+ returnReg = pRetTypeDesc->GetABIReturnReg(i);
+ regNumber allocatedReg = call->GetRegNumByIdx(i);
+ if (returnReg != allocatedReg)
+ {
+ inst_RV_RV(ins_Copy(regType), allocatedReg, returnReg, regType);
+ }
+ }
+ }
+ else
+ {
+ if (varTypeIsFloating(returnType))
+ {
+ returnReg = REG_FLOATRET;
+ }
+ else
+ {
+ returnReg = REG_INTRET;
+ }
+
+ if (call->gtRegNum != returnReg)
+ {
+ inst_RV_RV(ins_Copy(returnType), call->gtRegNum, returnReg, returnType);
+ }
+ }
+
+ genProduceReg(call);
+ }
+
+ // If there is nothing next, that means the result is thrown away, so this value is not live.
+ // However, for minopts or debuggable code, we keep it live to support managed return value debugging.
+ if ((call->gtNext == nullptr) && !compiler->opts.MinOpts() && !compiler->opts.compDbgCode)
+ {
+ gcInfo.gcMarkRegSetNpt(RBM_INTRET);
+ }
+}
+
+// Produce code for a GT_JMP node.
+// The arguments of the caller needs to be transferred to the callee before exiting caller.
+// The actual jump to callee is generated as part of caller epilog sequence.
+// Therefore the codegen of GT_JMP is to ensure that the callee arguments are correctly setup.
+void CodeGen::genJmpMethod(GenTreePtr jmp)
+{
+ assert(jmp->OperGet() == GT_JMP);
+ assert(compiler->compJmpOpUsed);
+
+ // If no arguments, nothing to do
+ if (compiler->info.compArgsCount == 0)
+ {
+ return;
+ }
+
+ // Make sure register arguments are in their initial registers
+ // and stack arguments are put back as well.
+ unsigned varNum;
+ LclVarDsc* varDsc;
+
+ // First move any en-registered stack arguments back to the stack.
+ // At the same time any reg arg not in correct reg is moved back to its stack location.
+ //
+ // We are not strictly required to spill reg args that are not in the desired reg for a jmp call
+ // But that would require us to deal with circularity while moving values around. Spilling
+ // to stack makes the implementation simple, which is not a bad trade off given Jmp calls
+ // are not frequent.
+ for (varNum = 0; (varNum < compiler->info.compArgsCount); varNum++)
+ {
+ varDsc = compiler->lvaTable + varNum;
+
+ if (varDsc->lvPromoted)
+ {
+ noway_assert(varDsc->lvFieldCnt == 1); // We only handle one field here
+
+ unsigned fieldVarNum = varDsc->lvFieldLclStart;
+ varDsc = compiler->lvaTable + fieldVarNum;
+ }
+ noway_assert(varDsc->lvIsParam);
+
+ if (varDsc->lvIsRegArg && (varDsc->lvRegNum != REG_STK))
+ {
+ // Skip reg args which are already in its right register for jmp call.
+ // If not, we will spill such args to their stack locations.
+ //
+ // If we need to generate a tail call profiler hook, then spill all
+ // arg regs to free them up for the callback.
+ if (!compiler->compIsProfilerHookNeeded() && (varDsc->lvRegNum == varDsc->lvArgReg))
+ continue;
+ }
+ else if (varDsc->lvRegNum == REG_STK)
+ {
+ // Skip args which are currently living in stack.
+ continue;
+ }
+
+ // If we came here it means either a reg argument not in the right register or
+ // a stack argument currently living in a register. In either case the following
+ // assert should hold.
+ assert(varDsc->lvRegNum != REG_STK);
+ assert(varDsc->TypeGet() != TYP_STRUCT);
+ var_types storeType = genActualType(varDsc->TypeGet());
+ emitAttr storeSize = emitActualTypeSize(storeType);
+
+ getEmitter()->emitIns_S_R(ins_Store(storeType), storeSize, varDsc->lvRegNum, varNum, 0);
+
+ // Update lvRegNum life and GC info to indicate lvRegNum is dead and varDsc stack slot is going live.
+ // Note that we cannot modify varDsc->lvRegNum here because another basic block may not be expecting it.
+ // Therefore manually update life of varDsc->lvRegNum.
+ regMaskTP tempMask = genRegMask(varDsc->lvRegNum);
+ regSet.RemoveMaskVars(tempMask);
+ gcInfo.gcMarkRegSetNpt(tempMask);
+ if (compiler->lvaIsGCTracked(varDsc))
+ {
+ VarSetOps::AddElemD(compiler, gcInfo.gcVarPtrSetCur, varNum);
+ }
+ }
+
+#ifdef PROFILING_SUPPORTED
+ // At this point all arg regs are free.
+ // Emit tail call profiler callback.
+ genProfilingLeaveCallback(CORINFO_HELP_PROF_FCN_TAILCALL);
+#endif
+
+ // Next move any un-enregistered register arguments back to their register.
+ regMaskTP fixedIntArgMask = RBM_NONE; // tracks the int arg regs occupying fixed args in case of a vararg method.
+ unsigned firstArgVarNum = BAD_VAR_NUM; // varNum of the first argument in case of a vararg method.
+ for (varNum = 0; (varNum < compiler->info.compArgsCount); varNum++)
+ {
+ varDsc = compiler->lvaTable + varNum;
+ if (varDsc->lvPromoted)
+ {
+ noway_assert(varDsc->lvFieldCnt == 1); // We only handle one field here
+
+ unsigned fieldVarNum = varDsc->lvFieldLclStart;
+ varDsc = compiler->lvaTable + fieldVarNum;
+ }
+ noway_assert(varDsc->lvIsParam);
+
+ // Skip if arg not passed in a register.
+ if (!varDsc->lvIsRegArg)
+ continue;
+
+ // Register argument
+ noway_assert(isRegParamType(genActualType(varDsc->TypeGet())));
+
+ // Is register argument already in the right register?
+ // If not load it from its stack location.
+ regNumber argReg = varDsc->lvArgReg; // incoming arg register
+ regNumber argRegNext = REG_NA;
+
+ if (varDsc->lvRegNum != argReg)
+ {
+ var_types loadType = TYP_UNDEF;
+ if (varTypeIsStruct(varDsc))
+ {
+ // Must be <= 16 bytes or else it wouldn't be passed in registers
+ noway_assert(EA_SIZE_IN_BYTES(varDsc->lvSize()) <= MAX_PASS_MULTIREG_BYTES);
+ loadType = compiler->getJitGCType(varDsc->lvGcLayout[0]);
+ }
+ else
+ {
+ loadType = compiler->mangleVarArgsType(genActualType(varDsc->TypeGet()));
+ }
+ emitAttr loadSize = emitActualTypeSize(loadType);
+ getEmitter()->emitIns_R_S(ins_Load(loadType), loadSize, argReg, varNum, 0);
+
+ // Update argReg life and GC Info to indicate varDsc stack slot is dead and argReg is going live.
+ // Note that we cannot modify varDsc->lvRegNum here because another basic block may not be expecting it.
+ // Therefore manually update life of argReg. Note that GT_JMP marks the end of the basic block
+ // and after which reg life and gc info will be recomputed for the new block in genCodeForBBList().
+ regSet.AddMaskVars(genRegMask(argReg));
+ gcInfo.gcMarkRegPtrVal(argReg, loadType);
+
+ if (compiler->lvaIsMultiregStruct(varDsc))
+ {
+ if (varDsc->lvIsHfa())
+ {
+ NYI_ARM64("CodeGen::genJmpMethod with multireg HFA arg");
+ }
+
+ // Restore the second register.
+ argRegNext = genRegArgNext(argReg);
+
+ loadType = compiler->getJitGCType(varDsc->lvGcLayout[1]);
+ loadSize = emitActualTypeSize(loadType);
+ getEmitter()->emitIns_R_S(ins_Load(loadType), loadSize, argRegNext, varNum, TARGET_POINTER_SIZE);
+
+ regSet.AddMaskVars(genRegMask(argRegNext));
+ gcInfo.gcMarkRegPtrVal(argRegNext, loadType);
+ }
+
+ if (compiler->lvaIsGCTracked(varDsc))
+ {
+ VarSetOps::RemoveElemD(compiler, gcInfo.gcVarPtrSetCur, varNum);
+ }
+ }
+
+ // In case of a jmp call to a vararg method ensure only integer registers are passed.
+ if (compiler->info.compIsVarArgs)
+ {
+ assert((genRegMask(argReg) & RBM_ARG_REGS) != RBM_NONE);
+
+ fixedIntArgMask |= genRegMask(argReg);
+
+ if (compiler->lvaIsMultiregStruct(varDsc))
+ {
+ assert(argRegNext != REG_NA);
+ fixedIntArgMask |= genRegMask(argRegNext);
+ }
+
+ if (argReg == REG_ARG_0)
+ {
+ assert(firstArgVarNum == BAD_VAR_NUM);
+ firstArgVarNum = varNum;
+ }
+ }
+ }
+
+ // Jmp call to a vararg method - if the method has fewer than 8 fixed arguments,
+ // load the remaining integer arg registers from the corresponding
+ // shadow stack slots. This is for the reason that we don't know the number and type
+ // of non-fixed params passed by the caller, therefore we have to assume the worst case
+ // of caller passing all 8 integer arg regs.
+ //
+ // The caller could have passed gc-ref/byref type var args. Since these are var args
+ // the callee no way of knowing their gc-ness. Therefore, mark the region that loads
+ // remaining arg registers from shadow stack slots as non-gc interruptible.
+ if (fixedIntArgMask != RBM_NONE)
+ {
+ assert(compiler->info.compIsVarArgs);
+ assert(firstArgVarNum != BAD_VAR_NUM);
+
+ regMaskTP remainingIntArgMask = RBM_ARG_REGS & ~fixedIntArgMask;
+ if (remainingIntArgMask != RBM_NONE)
+ {
+ getEmitter()->emitDisableGC();
+ for (int argNum = 0, argOffset = 0; argNum < MAX_REG_ARG; ++argNum)
+ {
+ regNumber argReg = intArgRegs[argNum];
+ regMaskTP argRegMask = genRegMask(argReg);
+
+ if ((remainingIntArgMask & argRegMask) != 0)
+ {
+ remainingIntArgMask &= ~argRegMask;
+ getEmitter()->emitIns_R_S(INS_ldr, EA_8BYTE, argReg, firstArgVarNum, argOffset);
+ }
+
+ argOffset += REGSIZE_BYTES;
+ }
+ getEmitter()->emitEnableGC();
+ }
+ }
+}
+
+// produce code for a GT_LEA subnode
+void CodeGen::genLeaInstruction(GenTreeAddrMode* lea)
+{
+ genConsumeOperands(lea);
+ emitter* emit = getEmitter();
+ emitAttr size = emitTypeSize(lea);
+ unsigned offset = lea->gtOffset;
+
+ // In ARM64 we can only load addresses of the form:
+ //
+ // [Base + index*scale]
+ // [Base + Offset]
+ // [Literal] (PC-Relative)
+ //
+ // So for the case of a LEA node of the form [Base + Index*Scale + Offset] we will generate:
+ // destReg = baseReg + indexReg * scale;
+ // destReg = destReg + offset;
+ //
+ // TODO-ARM64-CQ: The purpose of the GT_LEA node is to directly reflect a single target architecture
+ // addressing mode instruction. Currently we're 'cheating' by producing one or more
+ // instructions to generate the addressing mode so we need to modify lowering to
+ // produce LEAs that are a 1:1 relationship to the ARM64 architecture.
+ if (lea->Base() && lea->Index())
+ {
+ GenTree* memBase = lea->Base();
+ GenTree* index = lea->Index();
+ unsigned offset = lea->gtOffset;
+
+ DWORD lsl;
+
+ assert(isPow2(lea->gtScale));
+ BitScanForward(&lsl, lea->gtScale);
+
+ assert(lsl <= 4);
+
+ if (offset != 0)
+ {
+ regMaskTP tmpRegMask = lea->gtRsvdRegs;
+ regNumber tmpReg = genRegNumFromMask(tmpRegMask);
+ noway_assert(tmpReg != REG_NA);
+
+ if (emitter::emitIns_valid_imm_for_add(offset, EA_8BYTE))
+ {
+ if (lsl > 0)
+ {
+ // Generate code to set tmpReg = base + index*scale
+ emit->emitIns_R_R_R_I(INS_add, EA_PTRSIZE, tmpReg, memBase->gtRegNum, index->gtRegNum, lsl,
+ INS_OPTS_LSL);
+ }
+ else // no scale
+ {
+ // Generate code to set tmpReg = base + index
+ emit->emitIns_R_R_R(INS_add, EA_PTRSIZE, tmpReg, memBase->gtRegNum, index->gtRegNum);
+ }
+
+ // Then compute target reg from [tmpReg + offset]
+ emit->emitIns_R_R_I(INS_add, size, lea->gtRegNum, tmpReg, offset);
+ ;
+ }
+ else // large offset
+ {
+ // First load/store tmpReg with the large offset constant
+ instGen_Set_Reg_To_Imm(EA_PTRSIZE, tmpReg, offset);
+ // Then add the base register
+ // rd = rd + base
+ emit->emitIns_R_R_R(INS_add, EA_PTRSIZE, tmpReg, tmpReg, memBase->gtRegNum);
+
+ noway_assert(tmpReg != index->gtRegNum);
+
+ // Then compute target reg from [tmpReg + index*scale]
+ emit->emitIns_R_R_R_I(INS_add, size, lea->gtRegNum, tmpReg, index->gtRegNum, lsl, INS_OPTS_LSL);
+ }
+ }
+ else
+ {
+ if (lsl > 0)
+ {
+ // Then compute target reg from [base + index*scale]
+ emit->emitIns_R_R_R_I(INS_add, size, lea->gtRegNum, memBase->gtRegNum, index->gtRegNum, lsl,
+ INS_OPTS_LSL);
+ }
+ else
+ {
+ // Then compute target reg from [base + index]
+ emit->emitIns_R_R_R(INS_add, size, lea->gtRegNum, memBase->gtRegNum, index->gtRegNum);
+ }
+ }
+ }
+ else if (lea->Base())
+ {
+ GenTree* memBase = lea->Base();
+
+ if (emitter::emitIns_valid_imm_for_add(offset, EA_8BYTE))
+ {
+ if (offset != 0)
+ {
+ // Then compute target reg from [memBase + offset]
+ emit->emitIns_R_R_I(INS_add, size, lea->gtRegNum, memBase->gtRegNum, offset);
+ }
+ else // offset is zero
+ {
+ emit->emitIns_R_R(INS_mov, size, lea->gtRegNum, memBase->gtRegNum);
+ }
+ }
+ else
+ {
+ // We require a tmpReg to hold the offset
+ regMaskTP tmpRegMask = lea->gtRsvdRegs;
+ regNumber tmpReg = genRegNumFromMask(tmpRegMask);
+ noway_assert(tmpReg != REG_NA);
+
+ // First load tmpReg with the large offset constant
+ instGen_Set_Reg_To_Imm(EA_PTRSIZE, tmpReg, offset);
+
+ // Then compute target reg from [memBase + tmpReg]
+ emit->emitIns_R_R_R(INS_add, size, lea->gtRegNum, memBase->gtRegNum, tmpReg);
+ }
+ }
+ else if (lea->Index())
+ {
+ // If we encounter a GT_LEA node without a base it means it came out
+ // when attempting to optimize an arbitrary arithmetic expression during lower.
+ // This is currently disabled in ARM64 since we need to adjust lower to account
+ // for the simpler instructions ARM64 supports.
+ // TODO-ARM64-CQ: Fix this and let LEA optimize arithmetic trees too.
+ assert(!"We shouldn't see a baseless address computation during CodeGen for ARM64");
+ }
+
+ genProduceReg(lea);
+}
+
+//-------------------------------------------------------------------------------------------
+// genJumpKindsForTree: Determine the number and kinds of conditional branches
+// necessary to implement the given GT_CMP node
+//
+// Arguments:
+// cmpTree - (input) The GenTree node that is used to set the Condition codes
+// - The GenTree Relop node that was used to set the Condition codes
+// jmpKind[2] - (output) One or two conditional branch instructions
+// jmpToTrueLabel[2] - (output) On Arm64 both branches will always branch to the true label
+//
+// Return Value:
+// Sets the proper values into the array elements of jmpKind[] and jmpToTrueLabel[]
+//
+// Assumptions:
+// At least one conditional branch instruction will be returned.
+// Typically only one conditional branch is needed
+// and the second jmpKind[] value is set to EJ_NONE
+//-------------------------------------------------------------------------------------------
+
+// static
+void CodeGen::genJumpKindsForTree(GenTreePtr cmpTree, emitJumpKind jmpKind[2], bool jmpToTrueLabel[2])
+{
+ // On Arm64 both branches will always branch to the true label
+ jmpToTrueLabel[0] = true;
+ jmpToTrueLabel[1] = true;
+
+ // For integer comparisons just use genJumpKindForOper
+ if (!varTypeIsFloating(cmpTree->gtOp.gtOp1->gtEffectiveVal()))
+ {
+ CompareKind compareKind = ((cmpTree->gtFlags & GTF_UNSIGNED) != 0) ? CK_UNSIGNED : CK_SIGNED;
+ jmpKind[0] = genJumpKindForOper(cmpTree->gtOper, compareKind);
+ jmpKind[1] = EJ_NONE;
+ }
+ else // We have a Floating Point Compare operation
+ {
+ assert(cmpTree->OperIsCompare());
+
+ // For details on this mapping, see the ARM64 Condition Code
+ // table at section C1.2.3 in the ARMV8 architecture manual
+ //
+
+ // We must check the GTF_RELOP_NAN_UN to find out
+ // if we need to branch when we have a NaN operand.
+ //
+ if ((cmpTree->gtFlags & GTF_RELOP_NAN_UN) != 0)
+ {
+ // Must branch if we have an NaN, unordered
+ switch (cmpTree->gtOper)
+ {
+ case GT_EQ:
+ jmpKind[0] = EJ_eq; // branch or set when equal (and no NaN's)
+ jmpKind[1] = EJ_vs; // branch or set when we have a NaN
+ break;
+
+ case GT_NE:
+ jmpKind[0] = EJ_ne; // branch or set when not equal (or have NaN's)
+ jmpKind[1] = EJ_NONE;
+ break;
+
+ case GT_LT:
+ jmpKind[0] = EJ_lt; // branch or set when less than (or have NaN's)
+ jmpKind[1] = EJ_NONE;
+ break;
+
+ case GT_LE:
+ jmpKind[0] = EJ_le; // branch or set when less than or equal (or have NaN's)
+ jmpKind[1] = EJ_NONE;
+ break;
+
+ case GT_GT:
+ jmpKind[0] = EJ_hi; // branch or set when greater than (or have NaN's)
+ jmpKind[1] = EJ_NONE;
+ break;
+
+ case GT_GE:
+ jmpKind[0] = EJ_hs; // branch or set when greater than or equal (or have NaN's)
+ jmpKind[1] = EJ_NONE;
+ break;
+
+ default:
+ unreached();
+ }
+ }
+ else // ((cmpTree->gtFlags & GTF_RELOP_NAN_UN) == 0)
+ {
+ // Do not branch if we have an NaN, unordered
+ switch (cmpTree->gtOper)
+ {
+ case GT_EQ:
+ jmpKind[0] = EJ_eq; // branch or set when equal (and no NaN's)
+ jmpKind[1] = EJ_NONE;
+ break;
+
+ case GT_NE:
+ jmpKind[0] = EJ_gt; // branch or set when greater than (and no NaN's)
+ jmpKind[1] = EJ_lo; // branch or set when less than (and no NaN's)
+ break;
+
+ case GT_LT:
+ jmpKind[0] = EJ_lo; // branch or set when less than (and no NaN's)
+ jmpKind[1] = EJ_NONE;
+ break;
+
+ case GT_LE:
+ jmpKind[0] = EJ_ls; // branch or set when less than or equal (and no NaN's)
+ jmpKind[1] = EJ_NONE;
+ break;
+
+ case GT_GT:
+ jmpKind[0] = EJ_gt; // branch or set when greater than (and no NaN's)
+ jmpKind[1] = EJ_NONE;
+ break;
+
+ case GT_GE:
+ jmpKind[0] = EJ_ge; // branch or set when greater than or equal (and no NaN's)
+ jmpKind[1] = EJ_NONE;
+ break;
+
+ default:
+ unreached();
+ }
+ }
+ }
+}
+
+//-------------------------------------------------------------------------------------------
+// genSetRegToCond: Set a register 'dstReg' to the appropriate one or zero value
+// corresponding to a binary Relational operator result.
+//
+// Arguments:
+// dstReg - The target register to set to 1 or 0
+// tree - The GenTree Relop node that was used to set the Condition codes
+//
+// Return Value: none
+//
+// Notes:
+// A full 64-bit value of either 1 or 0 is setup in the 'dstReg'
+//-------------------------------------------------------------------------------------------
+
+void CodeGen::genSetRegToCond(regNumber dstReg, GenTreePtr tree)
+{
+ emitJumpKind jumpKind[2];
+ bool branchToTrueLabel[2];
+ genJumpKindsForTree(tree, jumpKind, branchToTrueLabel);
+ assert(jumpKind[0] != EJ_NONE);
+
+ // Set the reg according to the flags
+ inst_SET(jumpKind[0], dstReg);
+
+ // Do we need to use two operation to set the flags?
+ //
+ if (jumpKind[1] != EJ_NONE)
+ {
+ emitter* emit = getEmitter();
+ bool ordered = ((tree->gtFlags & GTF_RELOP_NAN_UN) == 0);
+ insCond secondCond;
+
+ // The only ones that require two operations are the
+ // floating point compare operations of BEQ or BNE.UN
+ //
+ if (tree->gtOper == GT_EQ)
+ {
+ // This must be an ordered comparison.
+ assert(ordered);
+ assert(jumpKind[1] == EJ_vs); // We complement this value
+ secondCond = INS_COND_VC; // for the secondCond
+ }
+ else // gtOper == GT_NE
+ {
+ // This must be BNE.UN (unordered comparison)
+ assert((tree->gtOper == GT_NE) && !ordered);
+ assert(jumpKind[1] == EJ_lo); // We complement this value
+ secondCond = INS_COND_HS; // for the secondCond
+ }
+
+ // The second instruction is a 'csinc' instruction that either selects the previous dstReg
+ // or increments the ZR register, which produces a 1 result.
+
+ emit->emitIns_R_R_R_COND(INS_csinc, EA_8BYTE, dstReg, dstReg, REG_ZR, secondCond);
+ }
+}
+
+//------------------------------------------------------------------------
+// genIntToIntCast: Generate code for an integer cast
+// This method handles integer overflow checking casts
+// as well as ordinary integer casts.
+//
+// Arguments:
+// treeNode - The GT_CAST node
+//
+// Return Value:
+// None.
+//
+// Assumptions:
+// The treeNode is not a contained node and must have an assigned register.
+// For a signed convert from byte, the source must be in a byte-addressable register.
+// Neither the source nor target type can be a floating point type.
+//
+// TODO-ARM64-CQ: Allow castOp to be a contained node without an assigned register.
+//
+void CodeGen::genIntToIntCast(GenTreePtr treeNode)
+{
+ assert(treeNode->OperGet() == GT_CAST);
+
+ GenTreePtr castOp = treeNode->gtCast.CastOp();
+ emitter* emit = getEmitter();
+
+ var_types dstType = treeNode->CastToType();
+ var_types srcType = genActualType(castOp->TypeGet());
+ emitAttr movSize = emitActualTypeSize(dstType);
+ bool movRequired = false;
+
+ regNumber targetReg = treeNode->gtRegNum;
+ regNumber sourceReg = castOp->gtRegNum;
+
+ // For Long to Int conversion we will have a reserved integer register to hold the immediate mask
+ regNumber tmpReg = (treeNode->gtRsvdRegs == RBM_NONE) ? REG_NA : genRegNumFromMask(treeNode->gtRsvdRegs);
+
+ assert(genIsValidIntReg(targetReg));
+ assert(genIsValidIntReg(sourceReg));
+
+ instruction ins = INS_invalid;
+
+ genConsumeReg(castOp);
+ Lowering::CastInfo castInfo;
+
+ // Get information about the cast.
+ Lowering::getCastDescription(treeNode, &castInfo);
+
+ if (castInfo.requiresOverflowCheck)
+ {
+
+ emitAttr cmpSize = EA_ATTR(genTypeSize(srcType));
+
+ if (castInfo.signCheckOnly)
+ {
+ // We only need to check for a negative value in sourceReg
+ emit->emitIns_R_I(INS_cmp, cmpSize, sourceReg, 0);
+ emitJumpKind jmpLT = genJumpKindForOper(GT_LT, CK_SIGNED);
+ genJumpToThrowHlpBlk(jmpLT, SCK_OVERFLOW);
+ noway_assert(genTypeSize(srcType) == 4 || genTypeSize(srcType) == 8);
+ // This is only interesting case to ensure zero-upper bits.
+ if ((srcType == TYP_INT) && (dstType == TYP_ULONG))
+ {
+ // cast to TYP_ULONG:
+ // We use a mov with size=EA_4BYTE
+ // which will zero out the upper bits
+ movSize = EA_4BYTE;
+ movRequired = true;
+ }
+ }
+ else if (castInfo.unsignedSource || castInfo.unsignedDest)
+ {
+ // When we are converting from/to unsigned,
+ // we only have to check for any bits set in 'typeMask'
+
+ noway_assert(castInfo.typeMask != 0);
+ emit->emitIns_R_I(INS_tst, cmpSize, sourceReg, castInfo.typeMask);
+ emitJumpKind jmpNotEqual = genJumpKindForOper(GT_NE, CK_SIGNED);
+ genJumpToThrowHlpBlk(jmpNotEqual, SCK_OVERFLOW);
+ }
+ else
+ {
+ // For a narrowing signed cast
+ //
+ // We must check the value is in a signed range.
+
+ // Compare with the MAX
+
+ noway_assert((castInfo.typeMin != 0) && (castInfo.typeMax != 0));
+
+ if (emitter::emitIns_valid_imm_for_cmp(castInfo.typeMax, cmpSize))
+ {
+ emit->emitIns_R_I(INS_cmp, cmpSize, sourceReg, castInfo.typeMax);
+ }
+ else
+ {
+ noway_assert(tmpReg != REG_NA);
+ instGen_Set_Reg_To_Imm(cmpSize, tmpReg, castInfo.typeMax);
+ emit->emitIns_R_R(INS_cmp, cmpSize, sourceReg, tmpReg);
+ }
+
+ emitJumpKind jmpGT = genJumpKindForOper(GT_GT, CK_SIGNED);
+ genJumpToThrowHlpBlk(jmpGT, SCK_OVERFLOW);
+
+ // Compare with the MIN
+
+ if (emitter::emitIns_valid_imm_for_cmp(castInfo.typeMin, cmpSize))
+ {
+ emit->emitIns_R_I(INS_cmp, cmpSize, sourceReg, castInfo.typeMin);
+ }
+ else
+ {
+ noway_assert(tmpReg != REG_NA);
+ instGen_Set_Reg_To_Imm(cmpSize, tmpReg, castInfo.typeMin);
+ emit->emitIns_R_R(INS_cmp, cmpSize, sourceReg, tmpReg);
+ }
+
+ emitJumpKind jmpLT = genJumpKindForOper(GT_LT, CK_SIGNED);
+ genJumpToThrowHlpBlk(jmpLT, SCK_OVERFLOW);
+ }
+ ins = INS_mov;
+ }
+ else // Non-overflow checking cast.
+ {
+ if (genTypeSize(srcType) == genTypeSize(dstType))
+ {
+ ins = INS_mov;
+ }
+ else
+ {
+ var_types extendType = TYP_UNKNOWN;
+
+ // If we need to treat a signed type as unsigned
+ if ((treeNode->gtFlags & GTF_UNSIGNED) != 0)
+ {
+ extendType = genUnsignedType(srcType);
+ movSize = emitTypeSize(extendType);
+ movRequired = true;
+ }
+ else
+ {
+ if (genTypeSize(srcType) < genTypeSize(dstType))
+ {
+ extendType = srcType;
+ if (srcType == TYP_UINT)
+ {
+ // If we are casting from a smaller type to
+ // a larger type, then we need to make sure the
+ // higher 4 bytes are zero to gaurentee the correct value.
+ // Therefore using a mov with EA_4BYTE in place of EA_8BYTE
+ // will zero the upper bits
+ movSize = EA_4BYTE;
+ movRequired = true;
+ }
+ }
+ else // (genTypeSize(srcType) > genTypeSize(dstType))
+ {
+ extendType = dstType;
+ if (dstType == TYP_INT)
+ {
+ movSize = EA_8BYTE; // a sxtw instruction requires EA_8BYTE
+ }
+ }
+ }
+
+ ins = ins_Move_Extend(extendType, castOp->InReg());
+ }
+ }
+
+ // We should never be generating a load from memory instruction here!
+ assert(!emit->emitInsIsLoad(ins));
+
+ if ((ins != INS_mov) || movRequired || (targetReg != sourceReg))
+ {
+ emit->emitIns_R_R(ins, movSize, targetReg, sourceReg);
+ }
+
+ genProduceReg(treeNode);
+}
+
+//------------------------------------------------------------------------
+// genFloatToFloatCast: Generate code for a cast between float and double
+//
+// Arguments:
+// treeNode - The GT_CAST node
+//
+// Return Value:
+// None.
+//
+// Assumptions:
+// Cast is a non-overflow conversion.
+// The treeNode must have an assigned register.
+// The cast is between float and double or vice versa.
+//
+void CodeGen::genFloatToFloatCast(GenTreePtr treeNode)
+{
+ // float <--> double conversions are always non-overflow ones
+ assert(treeNode->OperGet() == GT_CAST);
+ assert(!treeNode->gtOverflow());
+
+ regNumber targetReg = treeNode->gtRegNum;
+ assert(genIsValidFloatReg(targetReg));
+
+ GenTreePtr op1 = treeNode->gtOp.gtOp1;
+ assert(!op1->isContained()); // Cannot be contained
+ assert(genIsValidFloatReg(op1->gtRegNum)); // Must be a valid float reg.
+
+ var_types dstType = treeNode->CastToType();
+ var_types srcType = op1->TypeGet();
+ assert(varTypeIsFloating(srcType) && varTypeIsFloating(dstType));
+
+ genConsumeOperands(treeNode->AsOp());
+
+ // treeNode must be a reg
+ assert(!treeNode->isContained());
+
+ if (srcType != dstType)
+ {
+ insOpts cvtOption = (srcType == TYP_FLOAT) ? INS_OPTS_S_TO_D // convert Single to Double
+ : INS_OPTS_D_TO_S; // convert Double to Single
+
+ getEmitter()->emitIns_R_R(INS_fcvt, emitTypeSize(treeNode), treeNode->gtRegNum, op1->gtRegNum, cvtOption);
+ }
+ else if (treeNode->gtRegNum != op1->gtRegNum)
+ {
+ // If double to double cast or float to float cast. Emit a move instruction.
+ getEmitter()->emitIns_R_R(INS_mov, emitTypeSize(treeNode), treeNode->gtRegNum, op1->gtRegNum);
+ }
+
+ genProduceReg(treeNode);
+}
+
+//------------------------------------------------------------------------
+// genIntToFloatCast: Generate code to cast an int/long to float/double
+//
+// Arguments:
+// treeNode - The GT_CAST node
+//
+// Return Value:
+// None.
+//
+// Assumptions:
+// Cast is a non-overflow conversion.
+// The treeNode must have an assigned register.
+// SrcType= int32/uint32/int64/uint64 and DstType=float/double.
+//
+void CodeGen::genIntToFloatCast(GenTreePtr treeNode)
+{
+ // int type --> float/double conversions are always non-overflow ones
+ assert(treeNode->OperGet() == GT_CAST);
+ assert(!treeNode->gtOverflow());
+
+ regNumber targetReg = treeNode->gtRegNum;
+ assert(genIsValidFloatReg(targetReg));
+
+ GenTreePtr op1 = treeNode->gtOp.gtOp1;
+ assert(!op1->isContained()); // Cannot be contained
+ assert(genIsValidIntReg(op1->gtRegNum)); // Must be a valid int reg.
+
+ var_types dstType = treeNode->CastToType();
+ var_types srcType = op1->TypeGet();
+ assert(!varTypeIsFloating(srcType) && varTypeIsFloating(dstType));
+
+ // force the srcType to unsigned if GT_UNSIGNED flag is set
+ if (treeNode->gtFlags & GTF_UNSIGNED)
+ {
+ srcType = genUnsignedType(srcType);
+ }
+
+ // We should never see a srcType whose size is neither EA_4BYTE or EA_8BYTE
+ // For conversions from small types (byte/sbyte/int16/uint16) to float/double,
+ // we expect the front-end or lowering phase to have generated two levels of cast.
+ //
+ emitAttr srcSize = EA_ATTR(genTypeSize(srcType));
+ noway_assert((srcSize == EA_4BYTE) || (srcSize == EA_8BYTE));
+
+ instruction ins = varTypeIsUnsigned(srcType) ? INS_ucvtf : INS_scvtf;
+ insOpts cvtOption = INS_OPTS_NONE; // invalid value
+
+ if (dstType == TYP_DOUBLE)
+ {
+ if (srcSize == EA_4BYTE)
+ {
+ cvtOption = INS_OPTS_4BYTE_TO_D;
+ }
+ else
+ {
+ assert(srcSize == EA_8BYTE);
+ cvtOption = INS_OPTS_8BYTE_TO_D;
+ }
+ }
+ else
+ {
+ assert(dstType == TYP_FLOAT);
+ if (srcSize == EA_4BYTE)
+ {
+ cvtOption = INS_OPTS_4BYTE_TO_S;
+ }
+ else
+ {
+ assert(srcSize == EA_8BYTE);
+ cvtOption = INS_OPTS_8BYTE_TO_S;
+ }
+ }
+
+ genConsumeOperands(treeNode->AsOp());
+
+ getEmitter()->emitIns_R_R(ins, emitTypeSize(dstType), treeNode->gtRegNum, op1->gtRegNum, cvtOption);
+
+ genProduceReg(treeNode);
+}
+
+//------------------------------------------------------------------------
+// genFloatToIntCast: Generate code to cast float/double to int/long
+//
+// Arguments:
+// treeNode - The GT_CAST node
+//
+// Return Value:
+// None.
+//
+// Assumptions:
+// Cast is a non-overflow conversion.
+// The treeNode must have an assigned register.
+// SrcType=float/double and DstType= int32/uint32/int64/uint64
+//
+void CodeGen::genFloatToIntCast(GenTreePtr treeNode)
+{
+ // we don't expect to see overflow detecting float/double --> int type conversions here
+ // as they should have been converted into helper calls by front-end.
+ assert(treeNode->OperGet() == GT_CAST);
+ assert(!treeNode->gtOverflow());
+
+ regNumber targetReg = treeNode->gtRegNum;
+ assert(genIsValidIntReg(targetReg)); // Must be a valid int reg.
+
+ GenTreePtr op1 = treeNode->gtOp.gtOp1;
+ assert(!op1->isContained()); // Cannot be contained
+ assert(genIsValidFloatReg(op1->gtRegNum)); // Must be a valid float reg.
+
+ var_types dstType = treeNode->CastToType();
+ var_types srcType = op1->TypeGet();
+ assert(varTypeIsFloating(srcType) && !varTypeIsFloating(dstType));
+
+ // We should never see a dstType whose size is neither EA_4BYTE or EA_8BYTE
+ // For conversions to small types (byte/sbyte/int16/uint16) from float/double,
+ // we expect the front-end or lowering phase to have generated two levels of cast.
+ //
+ emitAttr dstSize = EA_ATTR(genTypeSize(dstType));
+ noway_assert((dstSize == EA_4BYTE) || (dstSize == EA_8BYTE));
+
+ instruction ins = INS_fcvtzs; // default to sign converts
+ insOpts cvtOption = INS_OPTS_NONE; // invalid value
+
+ if (varTypeIsUnsigned(dstType))
+ {
+ ins = INS_fcvtzu; // use unsigned converts
+ }
+
+ if (srcType == TYP_DOUBLE)
+ {
+ if (dstSize == EA_4BYTE)
+ {
+ cvtOption = INS_OPTS_D_TO_4BYTE;
+ }
+ else
+ {
+ assert(dstSize == EA_8BYTE);
+ cvtOption = INS_OPTS_D_TO_8BYTE;
+ }
+ }
+ else
+ {
+ assert(srcType == TYP_FLOAT);
+ if (dstSize == EA_4BYTE)
+ {
+ cvtOption = INS_OPTS_S_TO_4BYTE;
+ }
+ else
+ {
+ assert(dstSize == EA_8BYTE);
+ cvtOption = INS_OPTS_S_TO_8BYTE;
+ }
+ }
+
+ genConsumeOperands(treeNode->AsOp());
+
+ getEmitter()->emitIns_R_R(ins, dstSize, treeNode->gtRegNum, op1->gtRegNum, cvtOption);
+
+ genProduceReg(treeNode);
+}
+
+//------------------------------------------------------------------------
+// genCkfinite: Generate code for ckfinite opcode.
+//
+// Arguments:
+// treeNode - The GT_CKFINITE node
+//
+// Return Value:
+// None.
+//
+// Assumptions:
+// GT_CKFINITE node has reserved an internal register.
+//
+// TODO-ARM64-CQ - mark the operand as contained if known to be in
+// memory (e.g. field or an array element).
+//
+void CodeGen::genCkfinite(GenTreePtr treeNode)
+{
+ assert(treeNode->OperGet() == GT_CKFINITE);
+
+ GenTreePtr op1 = treeNode->gtOp.gtOp1;
+ var_types targetType = treeNode->TypeGet();
+ int expMask = (targetType == TYP_FLOAT) ? 0x7F8 : 0x7FF; // Bit mask to extract exponent.
+ int shiftAmount = targetType == TYP_FLOAT ? 20 : 52;
+
+ emitter* emit = getEmitter();
+
+ // Extract exponent into a register.
+ regNumber intReg = genRegNumFromMask(treeNode->gtRsvdRegs);
+ regNumber fpReg = genConsumeReg(op1);
+ assert(intReg != REG_NA);
+
+ emit->emitIns_R_R(ins_Copy(targetType), emitTypeSize(treeNode), intReg, fpReg);
+ emit->emitIns_R_R_I(INS_lsr, emitTypeSize(targetType), intReg, intReg, shiftAmount);
+
+ // Mask of exponent with all 1's and check if the exponent is all 1's
+ emit->emitIns_R_R_I(INS_and, EA_4BYTE, intReg, intReg, expMask);
+ emit->emitIns_R_I(INS_cmp, EA_4BYTE, intReg, expMask);
+
+ // If exponent is all 1's, throw ArithmeticException
+ emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
+ genJumpToThrowHlpBlk(jmpEqual, SCK_ARITH_EXCPN);
+
+ // if it is a finite value copy it to targetReg
+ if (treeNode->gtRegNum != fpReg)
+ {
+ emit->emitIns_R_R(ins_Copy(targetType), emitTypeSize(treeNode), treeNode->gtRegNum, fpReg);
+ }
+ genProduceReg(treeNode);
+}
+
+int CodeGenInterface::genSPtoFPdelta()
+{
+ int delta;
+
+ // We place the saved frame pointer immediately above the outgoing argument space.
+ delta = (int)compiler->lvaOutgoingArgSpaceSize;
+
+ assert(delta >= 0);
+ return delta;
+}
+
+//---------------------------------------------------------------------
+// genTotalFrameSize - return the total size of the stack frame, including local size,
+// callee-saved register size, etc.
+//
+// Return value:
+// Total frame size
+//
+
+int CodeGenInterface::genTotalFrameSize()
+{
+ // For varargs functions, we home all the incoming register arguments. They are not
+ // included in the compCalleeRegsPushed count. This is like prespill on ARM32, but
+ // since we don't use "push" instructions to save them, we don't have to do the
+ // save of these varargs register arguments as the first thing in the prolog.
+
+ assert(!IsUninitialized(compiler->compCalleeRegsPushed));
+
+ int totalFrameSize = (compiler->info.compIsVarArgs ? MAX_REG_ARG * REGSIZE_BYTES : 0) +
+ compiler->compCalleeRegsPushed * REGSIZE_BYTES + compiler->compLclFrameSize;
+
+ assert(totalFrameSize >= 0);
+ return totalFrameSize;
+}
+
+//---------------------------------------------------------------------
+// genCallerSPtoFPdelta - return the offset from Caller-SP to the frame pointer.
+// This number is going to be negative, since the Caller-SP is at a higher
+// address than the frame pointer.
+//
+// There must be a frame pointer to call this function!
+
+int CodeGenInterface::genCallerSPtoFPdelta()
+{
+ assert(isFramePointerUsed());
+ int callerSPtoFPdelta;
+
+ callerSPtoFPdelta = genCallerSPtoInitialSPdelta() + genSPtoFPdelta();
+
+ assert(callerSPtoFPdelta <= 0);
+ return callerSPtoFPdelta;
+}
+
+//---------------------------------------------------------------------
+// genCallerSPtoInitialSPdelta - return the offset from Caller-SP to Initial SP.
+//
+// This number will be negative.
+
+int CodeGenInterface::genCallerSPtoInitialSPdelta()
+{
+ int callerSPtoSPdelta = 0;
+
+ callerSPtoSPdelta -= genTotalFrameSize();
+
+ assert(callerSPtoSPdelta <= 0);
+ return callerSPtoSPdelta;
+}
+
+//---------------------------------------------------------------------
+// genIntrinsic - generate code for a given intrinsic
+//
+// Arguments
+// treeNode - the GT_INTRINSIC node
+//
+// Return value:
+// None
+//
+void CodeGen::genIntrinsic(GenTreePtr treeNode)
+{
+ // Both operand and its result must be of the same floating point type.
+ GenTreePtr srcNode = treeNode->gtOp.gtOp1;
+ assert(varTypeIsFloating(srcNode));
+ assert(srcNode->TypeGet() == treeNode->TypeGet());
+
+ // Right now only Abs/Round/Sqrt are treated as math intrinsics.
+ //
+ switch (treeNode->gtIntrinsic.gtIntrinsicId)
+ {
+ case CORINFO_INTRINSIC_Abs:
+ genConsumeOperands(treeNode->AsOp());
+ getEmitter()->emitInsBinary(INS_fabs, emitTypeSize(treeNode), treeNode, srcNode);
+ break;
+
+ case CORINFO_INTRINSIC_Round:
+ genConsumeOperands(treeNode->AsOp());
+ getEmitter()->emitInsBinary(INS_frintn, emitTypeSize(treeNode), treeNode, srcNode);
+ break;
+
+ case CORINFO_INTRINSIC_Sqrt:
+ genConsumeOperands(treeNode->AsOp());
+ getEmitter()->emitInsBinary(INS_fsqrt, emitTypeSize(treeNode), treeNode, srcNode);
+ break;
+
+ default:
+ assert(!"genIntrinsic: Unsupported intrinsic");
+ unreached();
+ }
+
+ genProduceReg(treeNode);
+}
+
+//---------------------------------------------------------------------
+// genPutArgStk - generate code for a GT_PUTARG_STK node
+//
+// Arguments
+// treeNode - the GT_PUTARG_STK node
+//
+// Return value:
+// None
+//
+void CodeGen::genPutArgStk(GenTreePtr treeNode)
+{
+ assert(treeNode->OperGet() == GT_PUTARG_STK);
+ var_types targetType = treeNode->TypeGet();
+ GenTreePtr source = treeNode->gtOp.gtOp1;
+ emitter* emit = getEmitter();
+
+ // This is the varNum for our store operations,
+ // typically this is the varNum for the Outgoing arg space
+ // When we are generating a tail call it will be the varNum for arg0
+ unsigned varNumOut;
+ unsigned argOffsetMax; // Records the maximum size of this area for assert checks
+
+ // This is the varNum for our load operations,
+ // only used when we have a multireg struct with a LclVar source
+ unsigned varNumInp = BAD_VAR_NUM;
+
+ // Get argument offset to use with 'varNumOut'
+ // Here we cross check that argument offset hasn't changed from lowering to codegen since
+ // we are storing arg slot number in GT_PUTARG_STK node in lowering phase.
+ unsigned argOffsetOut = treeNode->AsPutArgStk()->gtSlotNum * TARGET_POINTER_SIZE;
+
+#ifdef DEBUG
+ fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(treeNode->AsPutArgStk()->gtCall, treeNode);
+ assert(curArgTabEntry);
+ assert(argOffsetOut == (curArgTabEntry->slotNum * TARGET_POINTER_SIZE));
+#endif // DEBUG
+
+#if FEATURE_FASTTAILCALL
+ bool putInIncomingArgArea = treeNode->AsPutArgStk()->putInIncomingArgArea;
+#else
+ const bool putInIncomingArgArea = false;
+#endif
+ // Whether to setup stk arg in incoming or out-going arg area?
+ // Fast tail calls implemented as epilog+jmp = stk arg is setup in incoming arg area.
+ // All other calls - stk arg is setup in out-going arg area.
+ if (putInIncomingArgArea)
+ {
+ varNumOut = getFirstArgWithStackSlot();
+ argOffsetMax = compiler->compArgSize;
+#if FEATURE_FASTTAILCALL
+ // This must be a fast tail call.
+ assert(treeNode->AsPutArgStk()->gtCall->AsCall()->IsFastTailCall());
+
+ // Since it is a fast tail call, the existence of first incoming arg is guaranteed
+ // because fast tail call requires that in-coming arg area of caller is >= out-going
+ // arg area required for tail call.
+ LclVarDsc* varDsc = &(compiler->lvaTable[varNumOut]);
+ assert(varDsc != nullptr);
+#endif // FEATURE_FASTTAILCALL
+ }
+ else
+ {
+ varNumOut = compiler->lvaOutgoingArgSpaceVar;
+ argOffsetMax = compiler->lvaOutgoingArgSpaceSize;
+ }
+ bool isStruct = (targetType == TYP_STRUCT) || (source->OperGet() == GT_LIST);
+
+ if (!isStruct) // a normal non-Struct argument
+ {
+ instruction storeIns = ins_Store(targetType);
+ emitAttr storeAttr = emitTypeSize(targetType);
+
+ // If it is contained then source must be the integer constant zero
+ if (source->isContained())
+ {
+ assert(source->OperGet() == GT_CNS_INT);
+ assert(source->AsIntConCommon()->IconValue() == 0);
+ emit->emitIns_S_R(storeIns, storeAttr, REG_ZR, varNumOut, argOffsetOut);
+ }
+ else
+ {
+ genConsumeReg(source);
+ emit->emitIns_S_R(storeIns, storeAttr, source->gtRegNum, varNumOut, argOffsetOut);
+ }
+ argOffsetOut += EA_SIZE_IN_BYTES(storeAttr);
+ assert(argOffsetOut <= argOffsetMax); // We can't write beyound the outgoing area area
+ }
+ else // We have some kind of a struct argument
+ {
+ assert(source->isContained()); // We expect that this node was marked as contained in LowerArm64
+
+ if (source->OperGet() == GT_LIST)
+ {
+ // Deal with the multi register passed struct args.
+ GenTreeArgList* argListPtr = source->AsArgList();
+
+ // Evaluate each of the GT_LIST items into their register
+ // and store their register into the outgoing argument area
+ for (; argListPtr != nullptr; argListPtr = argListPtr->Rest())
+ {
+ GenTreePtr nextArgNode = argListPtr->gtOp.gtOp1;
+ genConsumeReg(nextArgNode);
+
+ regNumber reg = nextArgNode->gtRegNum;
+ var_types type = nextArgNode->TypeGet();
+ emitAttr attr = emitTypeSize(type);
+
+ // Emit store instructions to store the registers produced by the GT_LIST into the outgoing argument
+ // area
+ emit->emitIns_S_R(ins_Store(type), attr, reg, varNumOut, argOffsetOut);
+ argOffsetOut += EA_SIZE_IN_BYTES(attr);
+ assert(argOffsetOut <= argOffsetMax); // We can't write beyound the outgoing area area
+ }
+ }
+ else // We must have a GT_OBJ or a GT_LCL_VAR
+ {
+ noway_assert((source->OperGet() == GT_LCL_VAR) || (source->OperGet() == GT_OBJ));
+
+ var_types targetType = source->TypeGet();
+ noway_assert(varTypeIsStruct(targetType));
+
+ // We will copy this struct to the stack, possibly using a ldp instruction
+ // Setup loReg and hiReg from the internal registers that we reserved in lower.
+ //
+ regNumber loReg = REG_NA;
+ regNumber hiReg = REG_NA;
+ regNumber addrReg = REG_NA;
+
+ // In lowerArm64/TreeNodeInfoInitPutArgStk we have reserved two internal integer registers
+ genGetRegPairFromMask(treeNode->gtRsvdRegs, &loReg, &hiReg);
+
+ GenTreeLclVarCommon* varNode = nullptr;
+ GenTreePtr addrNode = nullptr;
+
+ if (source->OperGet() == GT_LCL_VAR)
+ {
+ varNode = source->AsLclVarCommon();
+ }
+ else // we must have a GT_OBJ
+ {
+ assert(source->OperGet() == GT_OBJ);
+
+ addrNode = source->gtOp.gtOp1;
+
+ // addrNode can either be a GT_LCL_VAR_ADDR or an address expression
+ //
+ if (addrNode->OperGet() == GT_LCL_VAR_ADDR)
+ {
+ // We have a GT_OBJ(GT_LCL_VAR_ADDR)
+ //
+ // We will treat this case the same as above
+ // (i.e if we just had this GT_LCL_VAR directly as the source)
+ // so update 'source' to point this GT_LCL_VAR_ADDR node
+ // and continue to the codegen for the LCL_VAR node below
+ //
+ varNode = addrNode->AsLclVarCommon();
+ addrNode = nullptr;
+ }
+ }
+
+ // Either varNode or addrNOde must have been setup above,
+ // the xor ensures that only one of the two is setup, not both
+ assert((varNode != nullptr) ^ (addrNode != nullptr));
+
+ BYTE gcPtrs[MAX_ARG_REG_COUNT] = {}; // TYPE_GC_NONE = 0
+ BYTE* structGcLayout = &gcPtrs[0]; // The GC layout for the struct
+ unsigned gcPtrCount; // The count of GC pointers in the struct
+ int structSize;
+ bool isHfa;
+
+ // Setup the structSize, isHFa, and gcPtrCount
+ if (varNode != nullptr)
+ {
+ varNumInp = varNode->gtLclNum;
+ assert(varNumInp < compiler->lvaCount);
+ LclVarDsc* varDsc = &compiler->lvaTable[varNumInp];
+
+ assert(varDsc->lvType == TYP_STRUCT);
+ assert(varDsc->lvOnFrame); // This struct also must live in the stack frame
+ assert(!varDsc->lvRegister); // And it can't live in a register (SIMD)
+
+ structSize = varDsc->lvSize(); // This yields the roundUp size, but that is fine
+ // as that is how much stack is allocated for this LclVar
+ isHfa = varDsc->lvIsHfa();
+ gcPtrCount = varDsc->lvStructGcCount;
+ structGcLayout = varDsc->lvGcLayout;
+ }
+ else // addrNode is used
+ {
+ assert(addrNode != nullptr);
+
+ // Generate code to load the address that we need into a register
+ genConsumeAddress(addrNode);
+ addrReg = addrNode->gtRegNum;
+
+ CORINFO_CLASS_HANDLE objClass = source->gtObj.gtClass;
+
+ structSize = compiler->info.compCompHnd->getClassSize(objClass);
+ isHfa = compiler->IsHfa(objClass);
+ gcPtrCount = compiler->info.compCompHnd->getClassGClayout(objClass, &gcPtrs[0]);
+ }
+
+ bool hasGCpointers = (gcPtrCount > 0); // true if there are any GC pointers in the struct
+
+ // If we have an HFA we can't have any GC pointers,
+ // if not then the max size for the the struct is 16 bytes
+ if (isHfa)
+ {
+ noway_assert(gcPtrCount == 0);
+ }
+ else
+ {
+ noway_assert(structSize <= 2 * TARGET_POINTER_SIZE);
+ }
+
+ noway_assert(structSize <= MAX_PASS_MULTIREG_BYTES);
+
+ // For a 16-byte structSize with GC pointers we will use two ldr and two str instructions
+ // ldr x2, [x0]
+ // ldr x3, [x0, #8]
+ // str x2, [sp, #16]
+ // str x3, [sp, #24]
+ //
+ // For a 16-byte structSize with no GC pointers we will use a ldp and two str instructions
+ // ldp x2, x3, [x0]
+ // str x2, [sp, #16]
+ // str x3, [sp, #24]
+ //
+ // For a 32-byte structSize with no GC pointers we will use two ldp and four str instructions
+ // ldp x2, x3, [x0]
+ // str x2, [sp, #16]
+ // str x3, [sp, #24]
+ // ldp x2, x3, [x0]
+ // str x2, [sp, #32]
+ // str x3, [sp, #40]
+ //
+ // Note that when loading from a varNode we currently can't use the ldp instruction
+ // TODO-ARM64-CQ: Implement support for using a ldp instruction with a varNum (see emitIns_R_S)
+ //
+
+ int remainingSize = structSize;
+ unsigned structOffset = 0;
+ unsigned nextIndex = 0;
+
+ while (remainingSize >= 2 * TARGET_POINTER_SIZE)
+ {
+ var_types type0 = compiler->getJitGCType(gcPtrs[nextIndex + 0]);
+ var_types type1 = compiler->getJitGCType(gcPtrs[nextIndex + 1]);
+
+ if (hasGCpointers)
+ {
+ // We have GC pointers, so use two ldr instructions
+ //
+ // We must do it this way because we can't currently pass or track
+ // two different emitAttr values for a ldp instruction.
+
+ // Make sure that the first load instruction does not overwrite the addrReg.
+ //
+ if (loReg != addrReg)
+ {
+ if (varNode != nullptr)
+ {
+ // Load from our varNumImp source
+ emit->emitIns_R_S(ins_Load(type0), emitTypeSize(type0), loReg, varNumInp, 0);
+ emit->emitIns_R_S(ins_Load(type1), emitTypeSize(type1), hiReg, varNumInp,
+ TARGET_POINTER_SIZE);
+ }
+ else
+ {
+ // Load from our address expression source
+ emit->emitIns_R_R_I(ins_Load(type0), emitTypeSize(type0), loReg, addrReg, structOffset);
+ emit->emitIns_R_R_I(ins_Load(type1), emitTypeSize(type1), hiReg, addrReg,
+ structOffset + TARGET_POINTER_SIZE);
+ }
+ }
+ else // loReg == addrReg
+ {
+ assert(varNode == nullptr); // because addrReg is REG_NA when varNode is non-null
+ assert(hiReg != addrReg);
+ // Load from our address expression source
+ emit->emitIns_R_R_I(ins_Load(type1), emitTypeSize(type1), hiReg, addrReg,
+ structOffset + TARGET_POINTER_SIZE);
+ emit->emitIns_R_R_I(ins_Load(type0), emitTypeSize(type0), loReg, addrReg, structOffset);
+ }
+ }
+ else // our struct has no GC pointers
+ {
+ if (varNode != nullptr)
+ {
+ // Load from our varNumImp source, currently we can't use a ldp instruction to do this
+ emit->emitIns_R_S(ins_Load(type0), emitTypeSize(type0), loReg, varNumInp, 0);
+ emit->emitIns_R_S(ins_Load(type1), emitTypeSize(type1), hiReg, varNumInp, TARGET_POINTER_SIZE);
+ }
+ else
+ {
+ // Use a ldp instruction
+
+ // Load from our address expression source
+ emit->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, loReg, hiReg, addrReg, structOffset);
+ }
+ }
+
+ // Emit two store instructions to store the two registers into the outgoing argument area
+ emit->emitIns_S_R(ins_Store(type0), emitTypeSize(type0), loReg, varNumOut, argOffsetOut);
+ emit->emitIns_S_R(ins_Store(type1), emitTypeSize(type1), hiReg, varNumOut,
+ argOffsetOut + TARGET_POINTER_SIZE);
+ argOffsetOut += (2 * TARGET_POINTER_SIZE); // We stored 16-bytes of the struct
+ assert(argOffsetOut <= argOffsetMax); // We can't write beyound the outgoing area area
+
+ remainingSize -= (2 * TARGET_POINTER_SIZE); // We loaded 16-bytes of the struct
+ structOffset += (2 * TARGET_POINTER_SIZE);
+ nextIndex += 2;
+ }
+
+ // For a 12-byte structSize we will we will generate two load instructions
+ // ldr x2, [x0]
+ // ldr w3, [x0, #8]
+ // str x2, [sp, #16]
+ // str w3, [sp, #24]
+ //
+ // When the first instruction has a loReg that is the same register as the addrReg,
+ // we set deferLoad to true and issue the intructions in the reverse order
+ // ldr x3, [x2, #8]
+ // ldr x2, [x2]
+ // str x2, [sp, #16]
+ // str x3, [sp, #24]
+ //
+
+ var_types nextType = compiler->getJitGCType(gcPtrs[nextIndex]);
+ emitAttr nextAttr = emitTypeSize(nextType);
+ regNumber curReg = loReg;
+
+ bool deferLoad = false;
+ var_types deferType = TYP_UNKNOWN;
+ emitAttr deferAttr = EA_PTRSIZE;
+ int deferOffset = 0;
+
+ while (remainingSize > 0)
+ {
+ if (remainingSize >= TARGET_POINTER_SIZE)
+ {
+ remainingSize -= TARGET_POINTER_SIZE;
+
+ if ((curReg == addrReg) && (remainingSize != 0))
+ {
+ deferLoad = true;
+ deferType = nextType;
+ deferAttr = emitTypeSize(nextType);
+ deferOffset = structOffset;
+ }
+ else // the typical case
+ {
+ if (varNode != nullptr)
+ {
+ // Load from our varNumImp source
+ emit->emitIns_R_S(ins_Load(nextType), nextAttr, curReg, varNumInp, structOffset);
+ }
+ else
+ {
+ // Load from our address expression source
+ emit->emitIns_R_R_I(ins_Load(nextType), nextAttr, curReg, addrReg, structOffset);
+ }
+ // Emit a store instruction to store the register into the outgoing argument area
+ emit->emitIns_S_R(ins_Store(nextType), nextAttr, curReg, varNumOut, argOffsetOut);
+ argOffsetOut += EA_SIZE_IN_BYTES(nextAttr);
+ assert(argOffsetOut <= argOffsetMax); // We can't write beyound the outgoing area area
+ }
+ curReg = hiReg;
+ structOffset += TARGET_POINTER_SIZE;
+ nextIndex++;
+ nextType = compiler->getJitGCType(gcPtrs[nextIndex]);
+ nextAttr = emitTypeSize(nextType);
+ }
+ else // (remainingSize < TARGET_POINTER_SIZE)
+ {
+ int loadSize = remainingSize;
+ remainingSize = 0;
+
+ // We should never have to do a non-pointer sized load when we have a LclVar source
+ assert(varNode == nullptr);
+
+ // the left over size is smaller than a pointer and thus can never be a GC type
+ assert(varTypeIsGC(nextType) == false);
+
+ var_types loadType = TYP_UINT;
+ if (loadSize == 1)
+ {
+ loadType = TYP_UBYTE;
+ }
+ else if (loadSize == 2)
+ {
+ loadType = TYP_USHORT;
+ }
+ else
+ {
+ // Need to handle additional loadSize cases here
+ noway_assert(loadSize == 4);
+ }
+
+ instruction loadIns = ins_Load(loadType);
+ emitAttr loadAttr = emitAttr(loadSize);
+
+ // When deferLoad is false, curReg can be the same as addrReg
+ // because the last instruction is allowed to overwrite addrReg.
+ //
+ noway_assert(!deferLoad || (curReg != addrReg));
+
+ emit->emitIns_R_R_I(loadIns, loadAttr, curReg, addrReg, structOffset);
+
+ // Emit a store instruction to store the register into the outgoing argument area
+ emit->emitIns_S_R(ins_Store(loadType), loadAttr, curReg, varNumOut, argOffsetOut);
+ argOffsetOut += EA_SIZE_IN_BYTES(loadAttr);
+ assert(argOffsetOut <= argOffsetMax); // We can't write beyound the outgoing area area
+ }
+ }
+
+ if (deferLoad)
+ {
+ // We should never have to do a deferred load when we have a LclVar source
+ assert(varNode == nullptr);
+
+ curReg = addrReg;
+
+ // Load from our address expression source
+ emit->emitIns_R_R_I(ins_Load(deferType), deferAttr, curReg, addrReg, deferOffset);
+
+ // Emit a store instruction to store the register into the outgoing argument area
+ emit->emitIns_S_R(ins_Store(nextType), nextAttr, curReg, varNumOut, argOffsetOut);
+ argOffsetOut += EA_SIZE_IN_BYTES(nextAttr);
+ assert(argOffsetOut <= argOffsetMax); // We can't write beyound the outgoing area area
+ }
+ }
+ }
+}
+
+/*****************************************************************************
+ *
+ * Create and record GC Info for the function.
+ */
+void CodeGen::genCreateAndStoreGCInfo(unsigned codeSize,
+ unsigned prologSize,
+ unsigned epilogSize DEBUGARG(void* codePtr))
+{
+ genCreateAndStoreGCInfoX64(codeSize, prologSize DEBUGARG(codePtr));
+}
+
+void CodeGen::genCreateAndStoreGCInfoX64(unsigned codeSize, unsigned prologSize DEBUGARG(void* codePtr))
+{
+ IAllocator* allowZeroAlloc = new (compiler, CMK_GC) AllowZeroAllocator(compiler->getAllocatorGC());
+ GcInfoEncoder* gcInfoEncoder = new (compiler, CMK_GC)
+ GcInfoEncoder(compiler->info.compCompHnd, compiler->info.compMethodInfo, allowZeroAlloc, NOMEM);
+ assert(gcInfoEncoder != nullptr);
+
+ // Follow the code pattern of the x86 gc info encoder (genCreateAndStoreGCInfoJIT32).
+ gcInfo.gcInfoBlockHdrSave(gcInfoEncoder, codeSize, prologSize);
+
+ // First we figure out the encoder ID's for the stack slots and registers.
+ gcInfo.gcMakeRegPtrTable(gcInfoEncoder, codeSize, prologSize, GCInfo::MAKE_REG_PTR_MODE_ASSIGN_SLOTS);
+
+ // Now we've requested all the slots we'll need; "finalize" these (make more compact data structures for them).
+ gcInfoEncoder->FinalizeSlotIds();
+
+ // Now we can actually use those slot ID's to declare live ranges.
+ gcInfo.gcMakeRegPtrTable(gcInfoEncoder, codeSize, prologSize, GCInfo::MAKE_REG_PTR_MODE_DO_WORK);
+
+#if defined(DEBUGGING_SUPPORT)
+ if (compiler->opts.compDbgEnC)
+ {
+ // what we have to preserve is called the "frame header" (see comments in VM\eetwain.cpp)
+ // which is:
+ // -return address
+ // -saved off RBP
+ // -saved 'this' pointer and bool for synchronized methods
+
+ // 4 slots for RBP + return address + RSI + RDI
+ int preservedAreaSize = 4 * REGSIZE_BYTES;
+
+ if (compiler->info.compFlags & CORINFO_FLG_SYNCH)
+ {
+ if (!(compiler->info.compFlags & CORINFO_FLG_STATIC))
+ preservedAreaSize += REGSIZE_BYTES;
+
+ preservedAreaSize += 1; // bool for synchronized methods
+ }
+
+ // Used to signal both that the method is compiled for EnC, and also the size of the block at the top of the
+ // frame
+ gcInfoEncoder->SetSizeOfEditAndContinuePreservedArea(preservedAreaSize);
+ }
+#endif
+
+ gcInfoEncoder->Build();
+
+ // GC Encoder automatically puts the GC info in the right spot using ICorJitInfo::allocGCInfo(size_t)
+ // let's save the values anyway for debugging purposes
+ compiler->compInfoBlkAddr = gcInfoEncoder->Emit();
+ compiler->compInfoBlkSize = 0; // not exposed by the GCEncoder interface
+}
+
+/*****************************************************************************
+ * Emit a call to a helper function.
+ *
+ */
+
+void CodeGen::genEmitHelperCall(unsigned helper, int argSize, emitAttr retSize, regNumber callTargetReg /*= REG_NA */)
+{
+ void* addr = nullptr;
+ void* pAddr = nullptr;
+
+ emitter::EmitCallType callType = emitter::EC_FUNC_TOKEN;
+ addr = compiler->compGetHelperFtn((CorInfoHelpFunc)helper, &pAddr);
+ regNumber callTarget = REG_NA;
+
+ if (addr == nullptr)
+ {
+ // This is call to a runtime helper.
+ // adrp x, [reloc:rel page addr]
+ // add x, x, [reloc:page offset]
+ // ldr x, [x]
+ // br x
+
+ if (callTargetReg == REG_NA)
+ {
+ // If a callTargetReg has not been explicitly provided, we will use REG_DEFAULT_HELPER_CALL_TARGET, but
+ // this is only a valid assumption if the helper call is known to kill REG_DEFAULT_HELPER_CALL_TARGET.
+ callTargetReg = REG_DEFAULT_HELPER_CALL_TARGET;
+ }
+
+ regMaskTP callTargetMask = genRegMask(callTargetReg);
+ regMaskTP callKillSet = compiler->compHelperCallKillSet((CorInfoHelpFunc)helper);
+
+ // assert that all registers in callTargetMask are in the callKillSet
+ noway_assert((callTargetMask & callKillSet) == callTargetMask);
+
+ callTarget = callTargetReg;
+
+ // adrp + add with relocations will be emitted
+ getEmitter()->emitIns_R_AI(INS_adrp, EA_PTR_DSP_RELOC, callTarget, (ssize_t)pAddr);
+ getEmitter()->emitIns_R_R(INS_ldr, EA_PTRSIZE, callTarget, callTarget);
+ callType = emitter::EC_INDIR_R;
+ }
+
+ getEmitter()->emitIns_Call(callType, compiler->eeFindHelper(helper), INDEBUG_LDISASM_COMMA(nullptr) addr, argSize,
+ retSize, EA_UNKNOWN, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
+ gcInfo.gcRegByrefSetCur, BAD_IL_OFFSET, /* IL offset */
+ callTarget, /* ireg */
+ REG_NA, 0, 0, /* xreg, xmul, disp */
+ false, /* isJump */
+ emitter::emitNoGChelper(helper));
+
+ regMaskTP killMask = compiler->compHelperCallKillSet((CorInfoHelpFunc)helper);
+ regTracker.rsTrashRegSet(killMask);
+ regTracker.rsTrashRegsForGCInterruptability();
+}
+
+/*****************************************************************************/
+#ifdef DEBUGGING_SUPPORT
+/*****************************************************************************
+ * genSetScopeInfo
+ *
+ * Called for every scope info piece to record by the main genSetScopeInfo()
+ */
+
+// TODO-Cleanup: move to CodeGenCommon.cpp
+void CodeGen::genSetScopeInfo(unsigned which,
+ UNATIVE_OFFSET startOffs,
+ UNATIVE_OFFSET length,
+ unsigned varNum,
+ unsigned LVnum,
+ bool avail,
+ Compiler::siVarLoc& varLoc)
+{
+ /* We need to do some mapping while reporting back these variables */
+
+ unsigned ilVarNum = compiler->compMap2ILvarNum(varNum);
+ noway_assert((int)ilVarNum != ICorDebugInfo::UNKNOWN_ILNUM);
+
+ VarName name = nullptr;
+
+#ifdef DEBUG
+
+ for (unsigned scopeNum = 0; scopeNum < compiler->info.compVarScopesCount; scopeNum++)
+ {
+ if (LVnum == compiler->info.compVarScopes[scopeNum].vsdLVnum)
+ {
+ name = compiler->info.compVarScopes[scopeNum].vsdName;
+ }
+ }
+
+ // Hang on to this compiler->info.
+
+ TrnslLocalVarInfo& tlvi = genTrnslLocalVarInfo[which];
+
+ tlvi.tlviVarNum = ilVarNum;
+ tlvi.tlviLVnum = LVnum;
+ tlvi.tlviName = name;
+ tlvi.tlviStartPC = startOffs;
+ tlvi.tlviLength = length;
+ tlvi.tlviAvailable = avail;
+ tlvi.tlviVarLoc = varLoc;
+
+#endif // DEBUG
+
+ compiler->eeSetLVinfo(which, startOffs, length, ilVarNum, LVnum, name, avail, varLoc);
+}
+#endif // DEBUGGING_SUPPORT
+
+/*****************************************************************************
+ * Unit testing of the ARM64 emitter: generate a bunch of instructions into the prolog
+ * (it's as good a place as any), then use COMPlus_JitLateDisasm=* to see if the late
+ * disassembler thinks the instructions as the same as we do.
+ */
+
+// Uncomment "#define ALL_ARM64_EMITTER_UNIT_TESTS" to run all the unit tests here.
+// After adding a unit test, and verifying it works, put it under this #ifdef, so we don't see it run every time.
+//#define ALL_ARM64_EMITTER_UNIT_TESTS
+
+#if defined(DEBUG)
+void CodeGen::genArm64EmitterUnitTests()
+{
+ if (!verbose)
+ {
+ return;
+ }
+
+ if (!compiler->opts.altJit)
+ {
+ // No point doing this in a "real" JIT.
+ return;
+ }
+
+ // Mark the "fake" instructions in the output.
+ printf("*************** In genArm64EmitterUnitTests()\n");
+
+ emitter* theEmitter = getEmitter();
+
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
+ // We use this:
+ // genDefineTempLabel(genCreateTempLabel());
+ // to create artificial labels to help separate groups of tests.
+
+ //
+ // Loads/Stores basic general register
+ //
+
+ genDefineTempLabel(genCreateTempLabel());
+
+ // ldr/str Xt, [reg]
+ theEmitter->emitIns_R_R(INS_ldr, EA_8BYTE, REG_R8, REG_R9);
+ theEmitter->emitIns_R_R(INS_ldrb, EA_1BYTE, REG_R8, REG_R9);
+ theEmitter->emitIns_R_R(INS_ldrh, EA_2BYTE, REG_R8, REG_R9);
+ theEmitter->emitIns_R_R(INS_str, EA_8BYTE, REG_R8, REG_R9);
+ theEmitter->emitIns_R_R(INS_strb, EA_1BYTE, REG_R8, REG_R9);
+ theEmitter->emitIns_R_R(INS_strh, EA_2BYTE, REG_R8, REG_R9);
+
+ // ldr/str Wt, [reg]
+ theEmitter->emitIns_R_R(INS_ldr, EA_4BYTE, REG_R8, REG_R9);
+ theEmitter->emitIns_R_R(INS_ldrb, EA_1BYTE, REG_R8, REG_R9);
+ theEmitter->emitIns_R_R(INS_ldrh, EA_2BYTE, REG_R8, REG_R9);
+ theEmitter->emitIns_R_R(INS_str, EA_4BYTE, REG_R8, REG_R9);
+ theEmitter->emitIns_R_R(INS_strb, EA_1BYTE, REG_R8, REG_R9);
+ theEmitter->emitIns_R_R(INS_strh, EA_2BYTE, REG_R8, REG_R9);
+
+ theEmitter->emitIns_R_R(INS_ldrsb, EA_4BYTE, REG_R8, REG_R9); // target Wt
+ theEmitter->emitIns_R_R(INS_ldrsh, EA_4BYTE, REG_R8, REG_R9); // target Wt
+ theEmitter->emitIns_R_R(INS_ldrsb, EA_8BYTE, REG_R8, REG_R9); // target Xt
+ theEmitter->emitIns_R_R(INS_ldrsh, EA_8BYTE, REG_R8, REG_R9); // target Xt
+ theEmitter->emitIns_R_R(INS_ldrsw, EA_8BYTE, REG_R8, REG_R9); // target Xt
+
+ theEmitter->emitIns_R_R_I(INS_ldurb, EA_4BYTE, REG_R8, REG_R9, 1);
+ theEmitter->emitIns_R_R_I(INS_ldurh, EA_4BYTE, REG_R8, REG_R9, 1);
+ theEmitter->emitIns_R_R_I(INS_sturb, EA_4BYTE, REG_R8, REG_R9, 1);
+ theEmitter->emitIns_R_R_I(INS_sturh, EA_4BYTE, REG_R8, REG_R9, 1);
+ theEmitter->emitIns_R_R_I(INS_ldursb, EA_4BYTE, REG_R8, REG_R9, 1);
+ theEmitter->emitIns_R_R_I(INS_ldursb, EA_8BYTE, REG_R8, REG_R9, 1);
+ theEmitter->emitIns_R_R_I(INS_ldursh, EA_4BYTE, REG_R8, REG_R9, 1);
+ theEmitter->emitIns_R_R_I(INS_ldursh, EA_8BYTE, REG_R8, REG_R9, 1);
+ theEmitter->emitIns_R_R_I(INS_ldur, EA_8BYTE, REG_R8, REG_R9, 1);
+ theEmitter->emitIns_R_R_I(INS_ldur, EA_4BYTE, REG_R8, REG_R9, 1);
+ theEmitter->emitIns_R_R_I(INS_stur, EA_4BYTE, REG_R8, REG_R9, 1);
+ theEmitter->emitIns_R_R_I(INS_stur, EA_8BYTE, REG_R8, REG_R9, 1);
+ theEmitter->emitIns_R_R_I(INS_ldursw, EA_8BYTE, REG_R8, REG_R9, 1);
+
+ // SP and ZR tests
+ theEmitter->emitIns_R_R_I(INS_ldur, EA_8BYTE, REG_R8, REG_SP, 1);
+ theEmitter->emitIns_R_R_I(INS_ldurb, EA_8BYTE, REG_ZR, REG_R9, 1);
+ theEmitter->emitIns_R_R_I(INS_ldurh, EA_8BYTE, REG_ZR, REG_SP, 1);
+
+ // scaled
+ theEmitter->emitIns_R_R_I(INS_ldrb, EA_1BYTE, REG_R8, REG_R9, 1);
+ theEmitter->emitIns_R_R_I(INS_ldrh, EA_2BYTE, REG_R8, REG_R9, 2);
+ theEmitter->emitIns_R_R_I(INS_ldr, EA_4BYTE, REG_R8, REG_R9, 4);
+ theEmitter->emitIns_R_R_I(INS_ldr, EA_8BYTE, REG_R8, REG_R9, 8);
+
+ // pre-/post-indexed (unscaled)
+ theEmitter->emitIns_R_R_I(INS_ldr, EA_4BYTE, REG_R8, REG_R9, 1, INS_OPTS_POST_INDEX);
+ theEmitter->emitIns_R_R_I(INS_ldr, EA_4BYTE, REG_R8, REG_R9, 1, INS_OPTS_PRE_INDEX);
+ theEmitter->emitIns_R_R_I(INS_ldr, EA_8BYTE, REG_R8, REG_R9, 1, INS_OPTS_POST_INDEX);
+ theEmitter->emitIns_R_R_I(INS_ldr, EA_8BYTE, REG_R8, REG_R9, 1, INS_OPTS_PRE_INDEX);
+
+#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
+ //
+ // Compares
+ //
+
+ genDefineTempLabel(genCreateTempLabel());
+
+ // cmp reg, reg
+ theEmitter->emitIns_R_R(INS_cmp, EA_8BYTE, REG_R8, REG_R9);
+ theEmitter->emitIns_R_R(INS_cmn, EA_8BYTE, REG_R8, REG_R9);
+
+ // cmp reg, imm
+ theEmitter->emitIns_R_I(INS_cmp, EA_8BYTE, REG_R8, 0);
+ theEmitter->emitIns_R_I(INS_cmp, EA_8BYTE, REG_R8, 4095);
+ theEmitter->emitIns_R_I(INS_cmp, EA_8BYTE, REG_R8, 1 << 12);
+ theEmitter->emitIns_R_I(INS_cmp, EA_8BYTE, REG_R8, 4095 << 12);
+
+ theEmitter->emitIns_R_I(INS_cmn, EA_8BYTE, REG_R8, 0);
+ theEmitter->emitIns_R_I(INS_cmn, EA_8BYTE, REG_R8, 4095);
+ theEmitter->emitIns_R_I(INS_cmn, EA_8BYTE, REG_R8, 1 << 12);
+ theEmitter->emitIns_R_I(INS_cmn, EA_8BYTE, REG_R8, 4095 << 12);
+
+ theEmitter->emitIns_R_I(INS_cmp, EA_8BYTE, REG_R8, -1);
+ theEmitter->emitIns_R_I(INS_cmp, EA_8BYTE, REG_R8, -0xfff);
+ theEmitter->emitIns_R_I(INS_cmp, EA_8BYTE, REG_R8, 0xfffffffffffff000LL);
+ theEmitter->emitIns_R_I(INS_cmp, EA_8BYTE, REG_R8, 0xffffffffff800000LL);
+
+ theEmitter->emitIns_R_I(INS_cmn, EA_8BYTE, REG_R8, -1);
+ theEmitter->emitIns_R_I(INS_cmn, EA_8BYTE, REG_R8, -0xfff);
+ theEmitter->emitIns_R_I(INS_cmn, EA_8BYTE, REG_R8, 0xfffffffffffff000LL);
+ theEmitter->emitIns_R_I(INS_cmn, EA_8BYTE, REG_R8, 0xffffffffff800000LL);
+
+#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
+ // R_R
+ //
+
+ genDefineTempLabel(genCreateTempLabel());
+
+ theEmitter->emitIns_R_R(INS_cls, EA_8BYTE, REG_R1, REG_R12);
+ theEmitter->emitIns_R_R(INS_clz, EA_8BYTE, REG_R2, REG_R13);
+ theEmitter->emitIns_R_R(INS_rbit, EA_8BYTE, REG_R3, REG_R14);
+ theEmitter->emitIns_R_R(INS_rev, EA_8BYTE, REG_R4, REG_R15);
+ theEmitter->emitIns_R_R(INS_rev16, EA_8BYTE, REG_R5, REG_R0);
+ theEmitter->emitIns_R_R(INS_rev32, EA_8BYTE, REG_R6, REG_R1);
+
+ theEmitter->emitIns_R_R(INS_cls, EA_4BYTE, REG_R7, REG_R2);
+ theEmitter->emitIns_R_R(INS_clz, EA_4BYTE, REG_R8, REG_R3);
+ theEmitter->emitIns_R_R(INS_rbit, EA_4BYTE, REG_R9, REG_R4);
+ theEmitter->emitIns_R_R(INS_rev, EA_4BYTE, REG_R10, REG_R5);
+ theEmitter->emitIns_R_R(INS_rev16, EA_4BYTE, REG_R11, REG_R6);
+
+#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
+ //
+ // R_I
+ //
+
+ genDefineTempLabel(genCreateTempLabel());
+
+ // mov reg, imm(i16,hw)
+ theEmitter->emitIns_R_I(INS_mov, EA_8BYTE, REG_R8, 0x0000000000001234);
+ theEmitter->emitIns_R_I(INS_mov, EA_8BYTE, REG_R8, 0x0000000043210000);
+ theEmitter->emitIns_R_I(INS_mov, EA_8BYTE, REG_R8, 0x0000567800000000);
+ theEmitter->emitIns_R_I(INS_mov, EA_8BYTE, REG_R8, 0x8765000000000000);
+ theEmitter->emitIns_R_I(INS_mov, EA_8BYTE, REG_R8, 0xFFFFFFFFFFFF1234);
+ theEmitter->emitIns_R_I(INS_mov, EA_8BYTE, REG_R8, 0xFFFFFFFF4321FFFF);
+ theEmitter->emitIns_R_I(INS_mov, EA_8BYTE, REG_R8, 0xFFFF5678FFFFFFFF);
+ theEmitter->emitIns_R_I(INS_mov, EA_8BYTE, REG_R8, 0x8765FFFFFFFFFFFF);
+
+ theEmitter->emitIns_R_I(INS_mov, EA_4BYTE, REG_R8, 0x00001234);
+ theEmitter->emitIns_R_I(INS_mov, EA_4BYTE, REG_R8, 0x87650000);
+ theEmitter->emitIns_R_I(INS_mov, EA_4BYTE, REG_R8, 0xFFFF1234);
+ theEmitter->emitIns_R_I(INS_mov, EA_4BYTE, REG_R8, 0x4567FFFF);
+
+ // mov reg, imm(N,r,s)
+ theEmitter->emitIns_R_I(INS_mov, EA_8BYTE, REG_R8, 0x00FFFFF000000000);
+ theEmitter->emitIns_R_I(INS_mov, EA_8BYTE, REG_R8, 0x6666666666666666);
+ theEmitter->emitIns_R_I(INS_mov, EA_8BYTE, REG_SP, 0x7FFF00007FFF0000);
+ theEmitter->emitIns_R_I(INS_mov, EA_8BYTE, REG_R8, 0x5555555555555555);
+ theEmitter->emitIns_R_I(INS_mov, EA_8BYTE, REG_R8, 0xE003E003E003E003);
+ theEmitter->emitIns_R_I(INS_mov, EA_8BYTE, REG_R8, 0x0707070707070707);
+
+ theEmitter->emitIns_R_I(INS_mov, EA_4BYTE, REG_R8, 0x00FFFFF0);
+ theEmitter->emitIns_R_I(INS_mov, EA_4BYTE, REG_R8, 0x66666666);
+ theEmitter->emitIns_R_I(INS_mov, EA_4BYTE, REG_R8, 0x03FFC000);
+ theEmitter->emitIns_R_I(INS_mov, EA_4BYTE, REG_R8, 0x55555555);
+ theEmitter->emitIns_R_I(INS_mov, EA_4BYTE, REG_R8, 0xE003E003);
+ theEmitter->emitIns_R_I(INS_mov, EA_4BYTE, REG_R8, 0x07070707);
+
+ theEmitter->emitIns_R_I(INS_tst, EA_8BYTE, REG_R8, 0xE003E003E003E003);
+ theEmitter->emitIns_R_I(INS_tst, EA_8BYTE, REG_R8, 0x00FFFFF000000000);
+ theEmitter->emitIns_R_I(INS_tst, EA_8BYTE, REG_R8, 0x6666666666666666);
+ theEmitter->emitIns_R_I(INS_tst, EA_8BYTE, REG_R8, 0x0707070707070707);
+ theEmitter->emitIns_R_I(INS_tst, EA_8BYTE, REG_R8, 0x7FFF00007FFF0000);
+ theEmitter->emitIns_R_I(INS_tst, EA_8BYTE, REG_R8, 0x5555555555555555);
+
+ theEmitter->emitIns_R_I(INS_tst, EA_4BYTE, REG_R8, 0xE003E003);
+ theEmitter->emitIns_R_I(INS_tst, EA_4BYTE, REG_R8, 0x00FFFFF0);
+ theEmitter->emitIns_R_I(INS_tst, EA_4BYTE, REG_R8, 0x66666666);
+ theEmitter->emitIns_R_I(INS_tst, EA_4BYTE, REG_R8, 0x07070707);
+ theEmitter->emitIns_R_I(INS_tst, EA_4BYTE, REG_R8, 0xFFF00000);
+ theEmitter->emitIns_R_I(INS_tst, EA_4BYTE, REG_R8, 0x55555555);
+
+#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
+ //
+ // R_R
+ //
+
+ genDefineTempLabel(genCreateTempLabel());
+
+ // tst reg, reg
+ theEmitter->emitIns_R_R(INS_tst, EA_8BYTE, REG_R7, REG_R10);
+
+ // mov reg, reg
+ theEmitter->emitIns_R_R(INS_mov, EA_8BYTE, REG_R7, REG_R10);
+ theEmitter->emitIns_R_R(INS_mov, EA_8BYTE, REG_R8, REG_SP);
+ theEmitter->emitIns_R_R(INS_mov, EA_8BYTE, REG_SP, REG_R9);
+
+ theEmitter->emitIns_R_R(INS_mvn, EA_8BYTE, REG_R5, REG_R11);
+ theEmitter->emitIns_R_R(INS_neg, EA_8BYTE, REG_R4, REG_R12);
+ theEmitter->emitIns_R_R(INS_negs, EA_8BYTE, REG_R3, REG_R13);
+
+ theEmitter->emitIns_R_R(INS_mov, EA_4BYTE, REG_R7, REG_R10);
+ theEmitter->emitIns_R_R(INS_mvn, EA_4BYTE, REG_R5, REG_R11);
+ theEmitter->emitIns_R_R(INS_neg, EA_4BYTE, REG_R4, REG_R12);
+ theEmitter->emitIns_R_R(INS_negs, EA_4BYTE, REG_R3, REG_R13);
+
+ theEmitter->emitIns_R_R(INS_sxtb, EA_8BYTE, REG_R7, REG_R10);
+ theEmitter->emitIns_R_R(INS_sxth, EA_8BYTE, REG_R5, REG_R11);
+ theEmitter->emitIns_R_R(INS_sxtw, EA_8BYTE, REG_R4, REG_R12);
+ theEmitter->emitIns_R_R(INS_uxtb, EA_8BYTE, REG_R3, REG_R13); // map to Wt
+ theEmitter->emitIns_R_R(INS_uxth, EA_8BYTE, REG_R2, REG_R14); // map to Wt
+
+ theEmitter->emitIns_R_R(INS_sxtb, EA_4BYTE, REG_R7, REG_R10);
+ theEmitter->emitIns_R_R(INS_sxth, EA_4BYTE, REG_R5, REG_R11);
+ theEmitter->emitIns_R_R(INS_uxtb, EA_4BYTE, REG_R3, REG_R13);
+ theEmitter->emitIns_R_R(INS_uxth, EA_4BYTE, REG_R2, REG_R14);
+
+#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
+ //
+ // R_I_I
+ //
+
+ genDefineTempLabel(genCreateTempLabel());
+
+ // mov reg, imm(i16,hw)
+ theEmitter->emitIns_R_I_I(INS_mov, EA_8BYTE, REG_R8, 0x1234, 0, INS_OPTS_LSL);
+ theEmitter->emitIns_R_I_I(INS_mov, EA_8BYTE, REG_R8, 0x4321, 16, INS_OPTS_LSL);
+
+ theEmitter->emitIns_R_I_I(INS_movk, EA_8BYTE, REG_R8, 0x4321, 16, INS_OPTS_LSL);
+ theEmitter->emitIns_R_I_I(INS_movn, EA_8BYTE, REG_R8, 0x5678, 32, INS_OPTS_LSL);
+ theEmitter->emitIns_R_I_I(INS_movz, EA_8BYTE, REG_R8, 0x8765, 48, INS_OPTS_LSL);
+
+ theEmitter->emitIns_R_I_I(INS_movk, EA_4BYTE, REG_R8, 0x4321, 16, INS_OPTS_LSL);
+ theEmitter->emitIns_R_I_I(INS_movn, EA_4BYTE, REG_R8, 0x5678, 16, INS_OPTS_LSL);
+ theEmitter->emitIns_R_I_I(INS_movz, EA_4BYTE, REG_R8, 0x8765, 16, INS_OPTS_LSL);
+
+#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
+ //
+ // R_R_I
+ //
+
+ genDefineTempLabel(genCreateTempLabel());
+
+ theEmitter->emitIns_R_R_I(INS_lsl, EA_8BYTE, REG_R0, REG_R0, 1);
+ theEmitter->emitIns_R_R_I(INS_lsl, EA_4BYTE, REG_R9, REG_R3, 18);
+ theEmitter->emitIns_R_R_I(INS_lsr, EA_8BYTE, REG_R7, REG_R0, 37);
+ theEmitter->emitIns_R_R_I(INS_lsr, EA_4BYTE, REG_R0, REG_R1, 2);
+ theEmitter->emitIns_R_R_I(INS_asr, EA_8BYTE, REG_R2, REG_R3, 53);
+ theEmitter->emitIns_R_R_I(INS_asr, EA_4BYTE, REG_R9, REG_R3, 18);
+
+ theEmitter->emitIns_R_R_I(INS_and, EA_8BYTE, REG_R2, REG_R3, 0x5555555555555555);
+ theEmitter->emitIns_R_R_I(INS_ands, EA_8BYTE, REG_R1, REG_R5, 0x6666666666666666);
+ theEmitter->emitIns_R_R_I(INS_eor, EA_8BYTE, REG_R8, REG_R9, 0x0707070707070707);
+ theEmitter->emitIns_R_R_I(INS_orr, EA_8BYTE, REG_SP, REG_R3, 0xFFFC000000000000);
+ theEmitter->emitIns_R_R_I(INS_ands, EA_4BYTE, REG_R8, REG_R9, 0xE003E003);
+
+ theEmitter->emitIns_R_R_I(INS_ror, EA_8BYTE, REG_R8, REG_R9, 1);
+ theEmitter->emitIns_R_R_I(INS_ror, EA_8BYTE, REG_R8, REG_R9, 31);
+ theEmitter->emitIns_R_R_I(INS_ror, EA_8BYTE, REG_R8, REG_R9, 32);
+ theEmitter->emitIns_R_R_I(INS_ror, EA_8BYTE, REG_R8, REG_R9, 63);
+
+ theEmitter->emitIns_R_R_I(INS_ror, EA_4BYTE, REG_R8, REG_R9, 1);
+ theEmitter->emitIns_R_R_I(INS_ror, EA_4BYTE, REG_R8, REG_R9, 31);
+
+ theEmitter->emitIns_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, 0); // == mov
+ theEmitter->emitIns_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, 1);
+ theEmitter->emitIns_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, -1);
+ theEmitter->emitIns_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, 0xfff);
+ theEmitter->emitIns_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, -0xfff);
+ theEmitter->emitIns_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, 0x1000);
+ theEmitter->emitIns_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, 0xfff000);
+ theEmitter->emitIns_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, 0xfffffffffffff000LL);
+ theEmitter->emitIns_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, 0xffffffffff800000LL);
+
+ theEmitter->emitIns_R_R_I(INS_add, EA_4BYTE, REG_R8, REG_R9, 0); // == mov
+ theEmitter->emitIns_R_R_I(INS_add, EA_4BYTE, REG_R8, REG_R9, 1);
+ theEmitter->emitIns_R_R_I(INS_add, EA_4BYTE, REG_R8, REG_R9, -1);
+ theEmitter->emitIns_R_R_I(INS_add, EA_4BYTE, REG_R8, REG_R9, 0xfff);
+ theEmitter->emitIns_R_R_I(INS_add, EA_4BYTE, REG_R8, REG_R9, -0xfff);
+ theEmitter->emitIns_R_R_I(INS_add, EA_4BYTE, REG_R8, REG_R9, 0x1000);
+ theEmitter->emitIns_R_R_I(INS_add, EA_4BYTE, REG_R8, REG_R9, 0xfff000);
+ theEmitter->emitIns_R_R_I(INS_add, EA_4BYTE, REG_R8, REG_R9, 0xfffffffffffff000LL);
+ theEmitter->emitIns_R_R_I(INS_add, EA_4BYTE, REG_R8, REG_R9, 0xffffffffff800000LL);
+
+ theEmitter->emitIns_R_R_I(INS_sub, EA_8BYTE, REG_R8, REG_R9, 0); // == mov
+ theEmitter->emitIns_R_R_I(INS_sub, EA_8BYTE, REG_R8, REG_R9, 1);
+ theEmitter->emitIns_R_R_I(INS_sub, EA_8BYTE, REG_R8, REG_R9, -1);
+ theEmitter->emitIns_R_R_I(INS_sub, EA_8BYTE, REG_R8, REG_R9, 0xfff);
+ theEmitter->emitIns_R_R_I(INS_sub, EA_8BYTE, REG_R8, REG_R9, -0xfff);
+ theEmitter->emitIns_R_R_I(INS_sub, EA_8BYTE, REG_R8, REG_R9, 0x1000);
+ theEmitter->emitIns_R_R_I(INS_sub, EA_8BYTE, REG_R8, REG_R9, 0xfff000);
+ theEmitter->emitIns_R_R_I(INS_sub, EA_8BYTE, REG_R8, REG_R9, 0xfffffffffffff000LL);
+ theEmitter->emitIns_R_R_I(INS_sub, EA_8BYTE, REG_R8, REG_R9, 0xffffffffff800000LL);
+
+ theEmitter->emitIns_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, 0); // == mov
+ theEmitter->emitIns_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, 1);
+ theEmitter->emitIns_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, -1);
+ theEmitter->emitIns_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, 0xfff);
+ theEmitter->emitIns_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, -0xfff);
+ theEmitter->emitIns_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, 0x1000);
+ theEmitter->emitIns_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, 0xfff000);
+ theEmitter->emitIns_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, 0xfffffffffffff000LL);
+ theEmitter->emitIns_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, 0xffffffffff800000LL);
+
+ theEmitter->emitIns_R_R_I(INS_adds, EA_8BYTE, REG_R8, REG_R9, 0); // == mov
+ theEmitter->emitIns_R_R_I(INS_adds, EA_8BYTE, REG_R8, REG_R9, 1);
+ theEmitter->emitIns_R_R_I(INS_adds, EA_8BYTE, REG_R8, REG_R9, -1);
+ theEmitter->emitIns_R_R_I(INS_adds, EA_8BYTE, REG_R8, REG_R9, 0xfff);
+ theEmitter->emitIns_R_R_I(INS_adds, EA_8BYTE, REG_R8, REG_R9, -0xfff);
+ theEmitter->emitIns_R_R_I(INS_adds, EA_8BYTE, REG_R8, REG_R9, 0x1000);
+ theEmitter->emitIns_R_R_I(INS_adds, EA_8BYTE, REG_R8, REG_R9, 0xfff000);
+ theEmitter->emitIns_R_R_I(INS_adds, EA_8BYTE, REG_R8, REG_R9, 0xfffffffffffff000LL);
+ theEmitter->emitIns_R_R_I(INS_adds, EA_8BYTE, REG_R8, REG_R9, 0xffffffffff800000LL);
+
+ theEmitter->emitIns_R_R_I(INS_adds, EA_4BYTE, REG_R8, REG_R9, 0); // == mov
+ theEmitter->emitIns_R_R_I(INS_adds, EA_4BYTE, REG_R8, REG_R9, 1);
+ theEmitter->emitIns_R_R_I(INS_adds, EA_4BYTE, REG_R8, REG_R9, -1);
+ theEmitter->emitIns_R_R_I(INS_adds, EA_4BYTE, REG_R8, REG_R9, 0xfff);
+ theEmitter->emitIns_R_R_I(INS_adds, EA_4BYTE, REG_R8, REG_R9, -0xfff);
+ theEmitter->emitIns_R_R_I(INS_adds, EA_4BYTE, REG_R8, REG_R9, 0x1000);
+ theEmitter->emitIns_R_R_I(INS_adds, EA_4BYTE, REG_R8, REG_R9, 0xfff000);
+ theEmitter->emitIns_R_R_I(INS_adds, EA_4BYTE, REG_R8, REG_R9, 0xfffffffffffff000LL);
+ theEmitter->emitIns_R_R_I(INS_adds, EA_4BYTE, REG_R8, REG_R9, 0xffffffffff800000LL);
+
+ theEmitter->emitIns_R_R_I(INS_subs, EA_8BYTE, REG_R8, REG_R9, 0); // == mov
+ theEmitter->emitIns_R_R_I(INS_subs, EA_8BYTE, REG_R8, REG_R9, 1);
+ theEmitter->emitIns_R_R_I(INS_subs, EA_8BYTE, REG_R8, REG_R9, -1);
+ theEmitter->emitIns_R_R_I(INS_subs, EA_8BYTE, REG_R8, REG_R9, 0xfff);
+ theEmitter->emitIns_R_R_I(INS_subs, EA_8BYTE, REG_R8, REG_R9, -0xfff);
+ theEmitter->emitIns_R_R_I(INS_subs, EA_8BYTE, REG_R8, REG_R9, 0x1000);
+ theEmitter->emitIns_R_R_I(INS_subs, EA_8BYTE, REG_R8, REG_R9, 0xfff000);
+ theEmitter->emitIns_R_R_I(INS_subs, EA_8BYTE, REG_R8, REG_R9, 0xfffffffffffff000LL);
+ theEmitter->emitIns_R_R_I(INS_subs, EA_8BYTE, REG_R8, REG_R9, 0xffffffffff800000LL);
+
+ theEmitter->emitIns_R_R_I(INS_subs, EA_4BYTE, REG_R8, REG_R9, 0); // == mov
+ theEmitter->emitIns_R_R_I(INS_subs, EA_4BYTE, REG_R8, REG_R9, 1);
+ theEmitter->emitIns_R_R_I(INS_subs, EA_4BYTE, REG_R8, REG_R9, -1);
+ theEmitter->emitIns_R_R_I(INS_subs, EA_4BYTE, REG_R8, REG_R9, 0xfff);
+ theEmitter->emitIns_R_R_I(INS_subs, EA_4BYTE, REG_R8, REG_R9, -0xfff);
+ theEmitter->emitIns_R_R_I(INS_subs, EA_4BYTE, REG_R8, REG_R9, 0x1000);
+ theEmitter->emitIns_R_R_I(INS_subs, EA_4BYTE, REG_R8, REG_R9, 0xfff000);
+ theEmitter->emitIns_R_R_I(INS_subs, EA_4BYTE, REG_R8, REG_R9, 0xfffffffffffff000LL);
+ theEmitter->emitIns_R_R_I(INS_subs, EA_4BYTE, REG_R8, REG_R9, 0xffffffffff800000LL);
+
+#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
+ //
+ // R_R_I cmp/txt
+ //
+
+ // cmp
+ theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 0);
+ theEmitter->emitIns_R_R_I(INS_cmp, EA_4BYTE, REG_R8, REG_R9, 0);
+
+ // CMP (shifted register)
+ theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 31, INS_OPTS_LSL);
+ theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 32, INS_OPTS_LSR);
+ theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 33, INS_OPTS_ASR);
+
+ theEmitter->emitIns_R_R_I(INS_cmp, EA_4BYTE, REG_R8, REG_R9, 21, INS_OPTS_LSL);
+ theEmitter->emitIns_R_R_I(INS_cmp, EA_4BYTE, REG_R8, REG_R9, 22, INS_OPTS_LSR);
+ theEmitter->emitIns_R_R_I(INS_cmp, EA_4BYTE, REG_R8, REG_R9, 23, INS_OPTS_ASR);
+
+ // TST (shifted register)
+ theEmitter->emitIns_R_R_I(INS_tst, EA_8BYTE, REG_R8, REG_R9, 31, INS_OPTS_LSL);
+ theEmitter->emitIns_R_R_I(INS_tst, EA_8BYTE, REG_R8, REG_R9, 32, INS_OPTS_LSR);
+ theEmitter->emitIns_R_R_I(INS_tst, EA_8BYTE, REG_R8, REG_R9, 33, INS_OPTS_ASR);
+ theEmitter->emitIns_R_R_I(INS_tst, EA_8BYTE, REG_R8, REG_R9, 34, INS_OPTS_ROR);
+
+ theEmitter->emitIns_R_R_I(INS_tst, EA_4BYTE, REG_R8, REG_R9, 21, INS_OPTS_LSL);
+ theEmitter->emitIns_R_R_I(INS_tst, EA_4BYTE, REG_R8, REG_R9, 22, INS_OPTS_LSR);
+ theEmitter->emitIns_R_R_I(INS_tst, EA_4BYTE, REG_R8, REG_R9, 23, INS_OPTS_ASR);
+ theEmitter->emitIns_R_R_I(INS_tst, EA_4BYTE, REG_R8, REG_R9, 24, INS_OPTS_ROR);
+
+ // CMP (extended register)
+ theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 0, INS_OPTS_UXTB);
+ theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 0, INS_OPTS_UXTH);
+ theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 0, INS_OPTS_UXTW); // "cmp x8, x9, UXTW"; msdis
+ // disassembles this "cmp x8,x9",
+ // which looks like an msdis issue.
+ theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 0, INS_OPTS_UXTX);
+
+ theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 0, INS_OPTS_SXTB);
+ theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 0, INS_OPTS_SXTH);
+ theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 0, INS_OPTS_SXTW);
+ theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 0, INS_OPTS_SXTX);
+
+ // CMP 64-bit (extended register) and left shift
+ theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 1, INS_OPTS_UXTB);
+ theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 2, INS_OPTS_UXTH);
+ theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 3, INS_OPTS_UXTW);
+ theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 4, INS_OPTS_UXTX);
+
+ theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 1, INS_OPTS_SXTB);
+ theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 2, INS_OPTS_SXTH);
+ theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 3, INS_OPTS_SXTW);
+ theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 4, INS_OPTS_SXTX);
+
+ // CMP 32-bit (extended register) and left shift
+ theEmitter->emitIns_R_R_I(INS_cmp, EA_4BYTE, REG_R8, REG_R9, 0, INS_OPTS_UXTB);
+ theEmitter->emitIns_R_R_I(INS_cmp, EA_4BYTE, REG_R8, REG_R9, 2, INS_OPTS_UXTH);
+ theEmitter->emitIns_R_R_I(INS_cmp, EA_4BYTE, REG_R8, REG_R9, 4, INS_OPTS_UXTW);
+
+ theEmitter->emitIns_R_R_I(INS_cmp, EA_4BYTE, REG_R8, REG_R9, 0, INS_OPTS_SXTB);
+ theEmitter->emitIns_R_R_I(INS_cmp, EA_4BYTE, REG_R8, REG_R9, 2, INS_OPTS_SXTH);
+ theEmitter->emitIns_R_R_I(INS_cmp, EA_4BYTE, REG_R8, REG_R9, 4, INS_OPTS_SXTW);
+
+#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
+ //
+ // R_R_R
+ //
+
+ genDefineTempLabel(genCreateTempLabel());
+
+ theEmitter->emitIns_R_R_R(INS_lsl, EA_8BYTE, REG_R8, REG_R9, REG_R10);
+ theEmitter->emitIns_R_R_R(INS_lsr, EA_8BYTE, REG_R8, REG_R9, REG_R10);
+ theEmitter->emitIns_R_R_R(INS_asr, EA_8BYTE, REG_R8, REG_R9, REG_R10);
+ theEmitter->emitIns_R_R_R(INS_ror, EA_8BYTE, REG_R8, REG_R9, REG_R10);
+ theEmitter->emitIns_R_R_R(INS_adc, EA_8BYTE, REG_R8, REG_R9, REG_R10);
+ theEmitter->emitIns_R_R_R(INS_adcs, EA_8BYTE, REG_R8, REG_R9, REG_R10);
+ theEmitter->emitIns_R_R_R(INS_sbc, EA_8BYTE, REG_R8, REG_R9, REG_R10);
+ theEmitter->emitIns_R_R_R(INS_sbcs, EA_8BYTE, REG_R8, REG_R9, REG_R10);
+ theEmitter->emitIns_R_R_R(INS_udiv, EA_8BYTE, REG_R8, REG_R9, REG_R10);
+ theEmitter->emitIns_R_R_R(INS_sdiv, EA_8BYTE, REG_R8, REG_R9, REG_R10);
+ theEmitter->emitIns_R_R_R(INS_mul, EA_8BYTE, REG_R8, REG_R9, REG_R10);
+ theEmitter->emitIns_R_R_R(INS_mneg, EA_8BYTE, REG_R8, REG_R9, REG_R10);
+ theEmitter->emitIns_R_R_R(INS_smull, EA_8BYTE, REG_R8, REG_R9, REG_R10);
+ theEmitter->emitIns_R_R_R(INS_smnegl, EA_8BYTE, REG_R8, REG_R9, REG_R10);
+ theEmitter->emitIns_R_R_R(INS_smulh, EA_8BYTE, REG_R8, REG_R9, REG_R10);
+ theEmitter->emitIns_R_R_R(INS_umull, EA_8BYTE, REG_R8, REG_R9, REG_R10);
+ theEmitter->emitIns_R_R_R(INS_umnegl, EA_8BYTE, REG_R8, REG_R9, REG_R10);
+ theEmitter->emitIns_R_R_R(INS_umulh, EA_8BYTE, REG_R8, REG_R9, REG_R10);
+ theEmitter->emitIns_R_R_R(INS_lslv, EA_8BYTE, REG_R8, REG_R9, REG_R10);
+ theEmitter->emitIns_R_R_R(INS_lsrv, EA_8BYTE, REG_R8, REG_R9, REG_R10);
+ theEmitter->emitIns_R_R_R(INS_asrv, EA_8BYTE, REG_R8, REG_R9, REG_R10);
+ theEmitter->emitIns_R_R_R(INS_rorv, EA_8BYTE, REG_R8, REG_R9, REG_R10);
+
+ theEmitter->emitIns_R_R_R(INS_lsl, EA_4BYTE, REG_R8, REG_R9, REG_R10);
+ theEmitter->emitIns_R_R_R(INS_lsr, EA_4BYTE, REG_R8, REG_R9, REG_R10);
+ theEmitter->emitIns_R_R_R(INS_asr, EA_4BYTE, REG_R8, REG_R9, REG_R10);
+ theEmitter->emitIns_R_R_R(INS_ror, EA_4BYTE, REG_R8, REG_R9, REG_R10);
+ theEmitter->emitIns_R_R_R(INS_adc, EA_4BYTE, REG_R8, REG_R9, REG_R10);
+ theEmitter->emitIns_R_R_R(INS_adcs, EA_4BYTE, REG_R8, REG_R9, REG_R10);
+ theEmitter->emitIns_R_R_R(INS_sbc, EA_4BYTE, REG_R8, REG_R9, REG_R10);
+ theEmitter->emitIns_R_R_R(INS_sbcs, EA_4BYTE, REG_R8, REG_R9, REG_R10);
+ theEmitter->emitIns_R_R_R(INS_udiv, EA_4BYTE, REG_R8, REG_R9, REG_R10);
+ theEmitter->emitIns_R_R_R(INS_sdiv, EA_4BYTE, REG_R8, REG_R9, REG_R10);
+ theEmitter->emitIns_R_R_R(INS_mul, EA_4BYTE, REG_R8, REG_R9, REG_R10);
+ theEmitter->emitIns_R_R_R(INS_mneg, EA_4BYTE, REG_R8, REG_R9, REG_R10);
+ theEmitter->emitIns_R_R_R(INS_smull, EA_4BYTE, REG_R8, REG_R9, REG_R10);
+ theEmitter->emitIns_R_R_R(INS_smnegl, EA_4BYTE, REG_R8, REG_R9, REG_R10);
+ theEmitter->emitIns_R_R_R(INS_smulh, EA_4BYTE, REG_R8, REG_R9, REG_R10);
+ theEmitter->emitIns_R_R_R(INS_umull, EA_4BYTE, REG_R8, REG_R9, REG_R10);
+ theEmitter->emitIns_R_R_R(INS_umnegl, EA_4BYTE, REG_R8, REG_R9, REG_R10);
+ theEmitter->emitIns_R_R_R(INS_umulh, EA_4BYTE, REG_R8, REG_R9, REG_R10);
+ theEmitter->emitIns_R_R_R(INS_lslv, EA_4BYTE, REG_R8, REG_R9, REG_R10);
+ theEmitter->emitIns_R_R_R(INS_lsrv, EA_4BYTE, REG_R8, REG_R9, REG_R10);
+ theEmitter->emitIns_R_R_R(INS_asrv, EA_4BYTE, REG_R8, REG_R9, REG_R10);
+ theEmitter->emitIns_R_R_R(INS_rorv, EA_4BYTE, REG_R8, REG_R9, REG_R10);
+
+#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
+ //
+ // R_R_I_I
+ //
+
+ genDefineTempLabel(genCreateTempLabel());
+
+ theEmitter->emitIns_R_R_I_I(INS_sbfm, EA_8BYTE, REG_R2, REG_R3, 4, 39);
+ theEmitter->emitIns_R_R_I_I(INS_bfm, EA_8BYTE, REG_R1, REG_R5, 20, 23);
+ theEmitter->emitIns_R_R_I_I(INS_ubfm, EA_8BYTE, REG_R8, REG_R9, 36, 7);
+
+ theEmitter->emitIns_R_R_I_I(INS_sbfiz, EA_8BYTE, REG_R2, REG_R3, 7, 37);
+ theEmitter->emitIns_R_R_I_I(INS_bfi, EA_8BYTE, REG_R1, REG_R5, 23, 21);
+ theEmitter->emitIns_R_R_I_I(INS_ubfiz, EA_8BYTE, REG_R8, REG_R9, 39, 5);
+
+ theEmitter->emitIns_R_R_I_I(INS_sbfx, EA_8BYTE, REG_R2, REG_R3, 10, 24);
+ theEmitter->emitIns_R_R_I_I(INS_bfxil, EA_8BYTE, REG_R1, REG_R5, 26, 16);
+ theEmitter->emitIns_R_R_I_I(INS_ubfx, EA_8BYTE, REG_R8, REG_R9, 42, 8);
+
+ theEmitter->emitIns_R_R_I_I(INS_sbfm, EA_4BYTE, REG_R2, REG_R3, 4, 19);
+ theEmitter->emitIns_R_R_I_I(INS_bfm, EA_4BYTE, REG_R1, REG_R5, 10, 13);
+ theEmitter->emitIns_R_R_I_I(INS_ubfm, EA_4BYTE, REG_R8, REG_R9, 16, 7);
+
+ theEmitter->emitIns_R_R_I_I(INS_sbfiz, EA_4BYTE, REG_R2, REG_R3, 5, 17);
+ theEmitter->emitIns_R_R_I_I(INS_bfi, EA_4BYTE, REG_R1, REG_R5, 13, 11);
+ theEmitter->emitIns_R_R_I_I(INS_ubfiz, EA_4BYTE, REG_R8, REG_R9, 19, 5);
+
+ theEmitter->emitIns_R_R_I_I(INS_sbfx, EA_4BYTE, REG_R2, REG_R3, 3, 14);
+ theEmitter->emitIns_R_R_I_I(INS_bfxil, EA_4BYTE, REG_R1, REG_R5, 11, 9);
+ theEmitter->emitIns_R_R_I_I(INS_ubfx, EA_4BYTE, REG_R8, REG_R9, 22, 8);
+
+#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
+ //
+ // R_R_R_I
+ //
+
+ genDefineTempLabel(genCreateTempLabel());
+
+ // ADD (extended register)
+ theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_UXTB);
+ theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_UXTH);
+ theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_UXTW);
+ theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_UXTX);
+ theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_SXTB);
+ theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_SXTH);
+ theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_SXTW);
+ theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_SXTX);
+
+ // ADD (extended register) and left shift
+ theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_UXTB);
+ theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_UXTH);
+ theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_UXTW);
+ theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_UXTX);
+ theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_SXTB);
+ theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_SXTH);
+ theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_SXTW);
+ theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_SXTX);
+
+ // ADD (shifted register)
+ theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0);
+ theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 31, INS_OPTS_LSL);
+ theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 32, INS_OPTS_LSR);
+ theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 33, INS_OPTS_ASR);
+
+ // EXTR (extract field from register pair)
+ theEmitter->emitIns_R_R_R_I(INS_extr, EA_8BYTE, REG_R8, REG_R9, REG_R10, 1);
+ theEmitter->emitIns_R_R_R_I(INS_extr, EA_8BYTE, REG_R8, REG_R9, REG_R10, 31);
+ theEmitter->emitIns_R_R_R_I(INS_extr, EA_8BYTE, REG_R8, REG_R9, REG_R10, 32);
+ theEmitter->emitIns_R_R_R_I(INS_extr, EA_8BYTE, REG_R8, REG_R9, REG_R10, 63);
+
+ theEmitter->emitIns_R_R_R_I(INS_extr, EA_4BYTE, REG_R8, REG_R9, REG_R10, 1);
+ theEmitter->emitIns_R_R_R_I(INS_extr, EA_4BYTE, REG_R8, REG_R9, REG_R10, 31);
+
+ // SUB (extended register)
+ theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_UXTB);
+ theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_UXTH);
+ theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_UXTW);
+ theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_UXTX);
+ theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_SXTB);
+ theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_SXTH);
+ theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_SXTW);
+ theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_SXTX);
+
+ // SUB (extended register) and left shift
+ theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_UXTB);
+ theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_UXTH);
+ theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_UXTW);
+ theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_UXTX);
+ theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_SXTB);
+ theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_SXTH);
+ theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_SXTW);
+ theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_SXTX);
+
+ // SUB (shifted register)
+ theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0);
+ theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 27, INS_OPTS_LSL);
+ theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 28, INS_OPTS_LSR);
+ theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 29, INS_OPTS_ASR);
+
+ // bit operations
+ theEmitter->emitIns_R_R_R_I(INS_and, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0);
+ theEmitter->emitIns_R_R_R_I(INS_ands, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0);
+ theEmitter->emitIns_R_R_R_I(INS_eor, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0);
+ theEmitter->emitIns_R_R_R_I(INS_orr, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0);
+ theEmitter->emitIns_R_R_R_I(INS_bic, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0);
+ theEmitter->emitIns_R_R_R_I(INS_bics, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0);
+ theEmitter->emitIns_R_R_R_I(INS_eon, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0);
+ theEmitter->emitIns_R_R_R_I(INS_orn, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0);
+
+ theEmitter->emitIns_R_R_R_I(INS_and, EA_8BYTE, REG_R8, REG_R9, REG_R10, 1, INS_OPTS_LSL);
+ theEmitter->emitIns_R_R_R_I(INS_ands, EA_8BYTE, REG_R8, REG_R9, REG_R10, 2, INS_OPTS_LSR);
+ theEmitter->emitIns_R_R_R_I(INS_eor, EA_8BYTE, REG_R8, REG_R9, REG_R10, 3, INS_OPTS_ASR);
+ theEmitter->emitIns_R_R_R_I(INS_orr, EA_8BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_ROR);
+ theEmitter->emitIns_R_R_R_I(INS_bic, EA_8BYTE, REG_R8, REG_R9, REG_R10, 5, INS_OPTS_LSL);
+ theEmitter->emitIns_R_R_R_I(INS_bics, EA_8BYTE, REG_R8, REG_R9, REG_R10, 6, INS_OPTS_LSR);
+ theEmitter->emitIns_R_R_R_I(INS_eon, EA_8BYTE, REG_R8, REG_R9, REG_R10, 7, INS_OPTS_ASR);
+ theEmitter->emitIns_R_R_R_I(INS_orn, EA_8BYTE, REG_R8, REG_R9, REG_R10, 8, INS_OPTS_ROR);
+
+ theEmitter->emitIns_R_R_R_I(INS_and, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0);
+ theEmitter->emitIns_R_R_R_I(INS_ands, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0);
+ theEmitter->emitIns_R_R_R_I(INS_eor, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0);
+ theEmitter->emitIns_R_R_R_I(INS_orr, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0);
+ theEmitter->emitIns_R_R_R_I(INS_bic, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0);
+ theEmitter->emitIns_R_R_R_I(INS_bics, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0);
+ theEmitter->emitIns_R_R_R_I(INS_eon, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0);
+ theEmitter->emitIns_R_R_R_I(INS_orn, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0);
+
+ theEmitter->emitIns_R_R_R_I(INS_and, EA_4BYTE, REG_R8, REG_R9, REG_R10, 1, INS_OPTS_LSL);
+ theEmitter->emitIns_R_R_R_I(INS_ands, EA_4BYTE, REG_R8, REG_R9, REG_R10, 2, INS_OPTS_LSR);
+ theEmitter->emitIns_R_R_R_I(INS_eor, EA_4BYTE, REG_R8, REG_R9, REG_R10, 3, INS_OPTS_ASR);
+ theEmitter->emitIns_R_R_R_I(INS_orr, EA_4BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_ROR);
+ theEmitter->emitIns_R_R_R_I(INS_bic, EA_4BYTE, REG_R8, REG_R9, REG_R10, 5, INS_OPTS_LSL);
+ theEmitter->emitIns_R_R_R_I(INS_bics, EA_4BYTE, REG_R8, REG_R9, REG_R10, 6, INS_OPTS_LSR);
+ theEmitter->emitIns_R_R_R_I(INS_eon, EA_4BYTE, REG_R8, REG_R9, REG_R10, 7, INS_OPTS_ASR);
+ theEmitter->emitIns_R_R_R_I(INS_orn, EA_4BYTE, REG_R8, REG_R9, REG_R10, 8, INS_OPTS_ROR);
+
+#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
+ //
+ // R_R_R_I -- load/store pair
+ //
+
+ theEmitter->emitIns_R_R_R_I(INS_ldnp, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0);
+ theEmitter->emitIns_R_R_R_I(INS_stnp, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0);
+ theEmitter->emitIns_R_R_R_I(INS_ldnp, EA_8BYTE, REG_R8, REG_R9, REG_R10, 8);
+ theEmitter->emitIns_R_R_R_I(INS_stnp, EA_8BYTE, REG_R8, REG_R9, REG_R10, 8);
+
+ theEmitter->emitIns_R_R_R_I(INS_ldnp, EA_4BYTE, REG_R8, REG_R9, REG_SP, 0);
+ theEmitter->emitIns_R_R_R_I(INS_stnp, EA_4BYTE, REG_R8, REG_R9, REG_SP, 0);
+ theEmitter->emitIns_R_R_R_I(INS_ldnp, EA_4BYTE, REG_R8, REG_R9, REG_SP, 8);
+ theEmitter->emitIns_R_R_R_I(INS_stnp, EA_4BYTE, REG_R8, REG_R9, REG_SP, 8);
+
+ theEmitter->emitIns_R_R_R_I(INS_ldp, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0);
+ theEmitter->emitIns_R_R_R_I(INS_stp, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0);
+ theEmitter->emitIns_R_R_R_I(INS_ldp, EA_8BYTE, REG_R8, REG_R9, REG_R10, 16);
+ theEmitter->emitIns_R_R_R_I(INS_stp, EA_8BYTE, REG_R8, REG_R9, REG_R10, 16);
+ theEmitter->emitIns_R_R_R_I(INS_ldp, EA_8BYTE, REG_R8, REG_R9, REG_R10, 16, INS_OPTS_POST_INDEX);
+ theEmitter->emitIns_R_R_R_I(INS_stp, EA_8BYTE, REG_R8, REG_R9, REG_R10, 16, INS_OPTS_POST_INDEX);
+ theEmitter->emitIns_R_R_R_I(INS_ldp, EA_8BYTE, REG_R8, REG_R9, REG_R10, 16, INS_OPTS_PRE_INDEX);
+ theEmitter->emitIns_R_R_R_I(INS_stp, EA_8BYTE, REG_R8, REG_R9, REG_R10, 16, INS_OPTS_PRE_INDEX);
+
+ theEmitter->emitIns_R_R_R_I(INS_ldp, EA_4BYTE, REG_R8, REG_R9, REG_SP, 0);
+ theEmitter->emitIns_R_R_R_I(INS_stp, EA_4BYTE, REG_R8, REG_R9, REG_SP, 0);
+ theEmitter->emitIns_R_R_R_I(INS_ldp, EA_4BYTE, REG_R8, REG_R9, REG_SP, 16);
+ theEmitter->emitIns_R_R_R_I(INS_stp, EA_4BYTE, REG_R8, REG_R9, REG_SP, 16);
+ theEmitter->emitIns_R_R_R_I(INS_ldp, EA_4BYTE, REG_R8, REG_R9, REG_R10, 16, INS_OPTS_POST_INDEX);
+ theEmitter->emitIns_R_R_R_I(INS_stp, EA_4BYTE, REG_R8, REG_R9, REG_R10, 16, INS_OPTS_POST_INDEX);
+ theEmitter->emitIns_R_R_R_I(INS_ldp, EA_4BYTE, REG_R8, REG_R9, REG_R10, 16, INS_OPTS_PRE_INDEX);
+ theEmitter->emitIns_R_R_R_I(INS_stp, EA_4BYTE, REG_R8, REG_R9, REG_R10, 16, INS_OPTS_PRE_INDEX);
+
+ theEmitter->emitIns_R_R_R_I(INS_ldpsw, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0);
+ theEmitter->emitIns_R_R_R_I(INS_ldpsw, EA_4BYTE, REG_R8, REG_R9, REG_R10, 16);
+ theEmitter->emitIns_R_R_R_I(INS_ldpsw, EA_4BYTE, REG_R8, REG_R9, REG_R10, 16, INS_OPTS_POST_INDEX);
+ theEmitter->emitIns_R_R_R_I(INS_ldpsw, EA_4BYTE, REG_R8, REG_R9, REG_R10, 16, INS_OPTS_PRE_INDEX);
+
+ // SP and ZR tests
+ theEmitter->emitIns_R_R_R_I(INS_ldp, EA_8BYTE, REG_ZR, REG_R1, REG_SP, 0);
+ theEmitter->emitIns_R_R_R_I(INS_ldp, EA_8BYTE, REG_R0, REG_ZR, REG_SP, 16);
+ theEmitter->emitIns_R_R_R_I(INS_stp, EA_8BYTE, REG_ZR, REG_R1, REG_SP, 0);
+ theEmitter->emitIns_R_R_R_I(INS_stp, EA_8BYTE, REG_R0, REG_ZR, REG_SP, 16);
+ theEmitter->emitIns_R_R_R_I(INS_stp, EA_8BYTE, REG_ZR, REG_ZR, REG_SP, 16, INS_OPTS_POST_INDEX);
+ theEmitter->emitIns_R_R_R_I(INS_stp, EA_8BYTE, REG_ZR, REG_ZR, REG_R8, 16, INS_OPTS_PRE_INDEX);
+
+#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
+ //
+ // R_R_R_Ext -- load/store shifted/extend
+ //
+
+ genDefineTempLabel(genCreateTempLabel());
+
+ // LDR (register)
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_R8, REG_SP, REG_R9);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL, 3);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW, 3);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW, 3);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX, 3);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX, 3);
+
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_R8, REG_SP, REG_R9);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL, 2);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW, 2);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW, 2);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX, 2);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX, 2);
+
+ theEmitter->emitIns_R_R_R_Ext(INS_ldrh, EA_2BYTE, REG_R8, REG_SP, REG_R9);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldrh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldrh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL, 1);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldrh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldrh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW, 1);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldrh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldrh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW, 1);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldrh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldrh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX, 1);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldrh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldrh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX, 1);
+
+ theEmitter->emitIns_R_R_R_Ext(INS_ldrb, EA_1BYTE, REG_R8, REG_SP, REG_R9);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldrb, EA_1BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldrb, EA_1BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldrb, EA_1BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldrb, EA_1BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX);
+
+ theEmitter->emitIns_R_R_R_Ext(INS_ldrsw, EA_4BYTE, REG_R8, REG_SP, REG_R9);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldrsw, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldrsw, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL, 2);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldrsw, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldrsw, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW, 2);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldrsw, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldrsw, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW, 2);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldrsw, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldrsw, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX, 2);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldrsw, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldrsw, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX, 2);
+
+ theEmitter->emitIns_R_R_R_Ext(INS_ldrsh, EA_4BYTE, REG_R8, REG_SP, REG_R9);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldrsh, EA_8BYTE, REG_R8, REG_SP, REG_R9);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldrsh, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldrsh, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL, 1);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldrsh, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldrsh, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW, 1);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldrsh, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldrsh, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW, 1);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldrsh, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldrsh, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX, 1);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldrsh, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldrsh, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX, 1);
+
+ theEmitter->emitIns_R_R_R_Ext(INS_ldrsb, EA_4BYTE, REG_R8, REG_SP, REG_R9);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldrsb, EA_8BYTE, REG_R8, REG_SP, REG_R9);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldrsb, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldrsb, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldrsb, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldrsb, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX);
+
+ // STR (register)
+ theEmitter->emitIns_R_R_R_Ext(INS_str, EA_8BYTE, REG_R8, REG_SP, REG_R9);
+ theEmitter->emitIns_R_R_R_Ext(INS_str, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL);
+ theEmitter->emitIns_R_R_R_Ext(INS_str, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL, 3);
+ theEmitter->emitIns_R_R_R_Ext(INS_str, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW);
+ theEmitter->emitIns_R_R_R_Ext(INS_str, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW, 3);
+ theEmitter->emitIns_R_R_R_Ext(INS_str, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW);
+ theEmitter->emitIns_R_R_R_Ext(INS_str, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW, 3);
+ theEmitter->emitIns_R_R_R_Ext(INS_str, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX);
+ theEmitter->emitIns_R_R_R_Ext(INS_str, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX, 3);
+ theEmitter->emitIns_R_R_R_Ext(INS_str, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX);
+ theEmitter->emitIns_R_R_R_Ext(INS_str, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX, 3);
+
+ theEmitter->emitIns_R_R_R_Ext(INS_str, EA_4BYTE, REG_R8, REG_SP, REG_R9);
+ theEmitter->emitIns_R_R_R_Ext(INS_str, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL);
+ theEmitter->emitIns_R_R_R_Ext(INS_str, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL, 2);
+ theEmitter->emitIns_R_R_R_Ext(INS_str, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW);
+ theEmitter->emitIns_R_R_R_Ext(INS_str, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW, 2);
+ theEmitter->emitIns_R_R_R_Ext(INS_str, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW);
+ theEmitter->emitIns_R_R_R_Ext(INS_str, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW, 2);
+ theEmitter->emitIns_R_R_R_Ext(INS_str, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX);
+ theEmitter->emitIns_R_R_R_Ext(INS_str, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX, 2);
+ theEmitter->emitIns_R_R_R_Ext(INS_str, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX);
+ theEmitter->emitIns_R_R_R_Ext(INS_str, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX, 2);
+
+ theEmitter->emitIns_R_R_R_Ext(INS_strh, EA_2BYTE, REG_R8, REG_SP, REG_R9);
+ theEmitter->emitIns_R_R_R_Ext(INS_strh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL);
+ theEmitter->emitIns_R_R_R_Ext(INS_strh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL, 1);
+ theEmitter->emitIns_R_R_R_Ext(INS_strh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW);
+ theEmitter->emitIns_R_R_R_Ext(INS_strh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW, 1);
+ theEmitter->emitIns_R_R_R_Ext(INS_strh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW);
+ theEmitter->emitIns_R_R_R_Ext(INS_strh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW, 1);
+ theEmitter->emitIns_R_R_R_Ext(INS_strh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX);
+ theEmitter->emitIns_R_R_R_Ext(INS_strh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX, 1);
+ theEmitter->emitIns_R_R_R_Ext(INS_strh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX);
+ theEmitter->emitIns_R_R_R_Ext(INS_strh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX, 1);
+
+ theEmitter->emitIns_R_R_R_Ext(INS_strb, EA_1BYTE, REG_R8, REG_SP, REG_R9);
+ theEmitter->emitIns_R_R_R_Ext(INS_strb, EA_1BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW);
+ theEmitter->emitIns_R_R_R_Ext(INS_strb, EA_1BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW);
+ theEmitter->emitIns_R_R_R_Ext(INS_strb, EA_1BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX);
+ theEmitter->emitIns_R_R_R_Ext(INS_strb, EA_1BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX);
+
+#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
+ //
+ // R_R_R_R
+ //
+
+ genDefineTempLabel(genCreateTempLabel());
+
+ theEmitter->emitIns_R_R_R_R(INS_madd, EA_4BYTE, REG_R0, REG_R12, REG_R27, REG_R10);
+ theEmitter->emitIns_R_R_R_R(INS_msub, EA_4BYTE, REG_R1, REG_R13, REG_R28, REG_R11);
+ theEmitter->emitIns_R_R_R_R(INS_smaddl, EA_4BYTE, REG_R2, REG_R14, REG_R0, REG_R12);
+ theEmitter->emitIns_R_R_R_R(INS_smsubl, EA_4BYTE, REG_R3, REG_R15, REG_R1, REG_R13);
+ theEmitter->emitIns_R_R_R_R(INS_umaddl, EA_4BYTE, REG_R4, REG_R19, REG_R2, REG_R14);
+ theEmitter->emitIns_R_R_R_R(INS_umsubl, EA_4BYTE, REG_R5, REG_R20, REG_R3, REG_R15);
+
+ theEmitter->emitIns_R_R_R_R(INS_madd, EA_8BYTE, REG_R6, REG_R21, REG_R4, REG_R19);
+ theEmitter->emitIns_R_R_R_R(INS_msub, EA_8BYTE, REG_R7, REG_R22, REG_R5, REG_R20);
+ theEmitter->emitIns_R_R_R_R(INS_smaddl, EA_8BYTE, REG_R8, REG_R23, REG_R6, REG_R21);
+ theEmitter->emitIns_R_R_R_R(INS_smsubl, EA_8BYTE, REG_R9, REG_R24, REG_R7, REG_R22);
+ theEmitter->emitIns_R_R_R_R(INS_umaddl, EA_8BYTE, REG_R10, REG_R25, REG_R8, REG_R23);
+ theEmitter->emitIns_R_R_R_R(INS_umsubl, EA_8BYTE, REG_R11, REG_R26, REG_R9, REG_R24);
+
+#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
+ // R_COND
+ //
+
+ // cset reg, cond
+ theEmitter->emitIns_R_COND(INS_cset, EA_8BYTE, REG_R9, INS_COND_EQ); // eq
+ theEmitter->emitIns_R_COND(INS_cset, EA_4BYTE, REG_R8, INS_COND_NE); // ne
+ theEmitter->emitIns_R_COND(INS_cset, EA_4BYTE, REG_R7, INS_COND_HS); // hs
+ theEmitter->emitIns_R_COND(INS_cset, EA_8BYTE, REG_R6, INS_COND_LO); // lo
+ theEmitter->emitIns_R_COND(INS_cset, EA_8BYTE, REG_R5, INS_COND_MI); // mi
+ theEmitter->emitIns_R_COND(INS_cset, EA_4BYTE, REG_R4, INS_COND_PL); // pl
+ theEmitter->emitIns_R_COND(INS_cset, EA_4BYTE, REG_R3, INS_COND_VS); // vs
+ theEmitter->emitIns_R_COND(INS_cset, EA_8BYTE, REG_R2, INS_COND_VC); // vc
+ theEmitter->emitIns_R_COND(INS_cset, EA_8BYTE, REG_R1, INS_COND_HI); // hi
+ theEmitter->emitIns_R_COND(INS_cset, EA_4BYTE, REG_R0, INS_COND_LS); // ls
+ theEmitter->emitIns_R_COND(INS_cset, EA_4BYTE, REG_R9, INS_COND_GE); // ge
+ theEmitter->emitIns_R_COND(INS_cset, EA_8BYTE, REG_R8, INS_COND_LT); // lt
+ theEmitter->emitIns_R_COND(INS_cset, EA_8BYTE, REG_R7, INS_COND_GT); // gt
+ theEmitter->emitIns_R_COND(INS_cset, EA_4BYTE, REG_R6, INS_COND_LE); // le
+
+ // csetm reg, cond
+ theEmitter->emitIns_R_COND(INS_csetm, EA_4BYTE, REG_R9, INS_COND_EQ); // eq
+ theEmitter->emitIns_R_COND(INS_csetm, EA_8BYTE, REG_R8, INS_COND_NE); // ne
+ theEmitter->emitIns_R_COND(INS_csetm, EA_8BYTE, REG_R7, INS_COND_HS); // hs
+ theEmitter->emitIns_R_COND(INS_csetm, EA_4BYTE, REG_R6, INS_COND_LO); // lo
+ theEmitter->emitIns_R_COND(INS_csetm, EA_4BYTE, REG_R5, INS_COND_MI); // mi
+ theEmitter->emitIns_R_COND(INS_csetm, EA_8BYTE, REG_R4, INS_COND_PL); // pl
+ theEmitter->emitIns_R_COND(INS_csetm, EA_8BYTE, REG_R3, INS_COND_VS); // vs
+ theEmitter->emitIns_R_COND(INS_csetm, EA_4BYTE, REG_R2, INS_COND_VC); // vc
+ theEmitter->emitIns_R_COND(INS_csetm, EA_4BYTE, REG_R1, INS_COND_HI); // hi
+ theEmitter->emitIns_R_COND(INS_csetm, EA_8BYTE, REG_R0, INS_COND_LS); // ls
+ theEmitter->emitIns_R_COND(INS_csetm, EA_8BYTE, REG_R9, INS_COND_GE); // ge
+ theEmitter->emitIns_R_COND(INS_csetm, EA_4BYTE, REG_R8, INS_COND_LT); // lt
+ theEmitter->emitIns_R_COND(INS_csetm, EA_4BYTE, REG_R7, INS_COND_GT); // gt
+ theEmitter->emitIns_R_COND(INS_csetm, EA_8BYTE, REG_R6, INS_COND_LE); // le
+
+#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
+ // R_R_COND
+ //
+
+ // cinc reg, reg, cond
+ // cinv reg, reg, cond
+ // cneg reg, reg, cond
+ theEmitter->emitIns_R_R_COND(INS_cinc, EA_8BYTE, REG_R0, REG_R4, INS_COND_EQ); // eq
+ theEmitter->emitIns_R_R_COND(INS_cinv, EA_4BYTE, REG_R1, REG_R5, INS_COND_NE); // ne
+ theEmitter->emitIns_R_R_COND(INS_cneg, EA_4BYTE, REG_R2, REG_R6, INS_COND_HS); // hs
+ theEmitter->emitIns_R_R_COND(INS_cinc, EA_8BYTE, REG_R3, REG_R7, INS_COND_LO); // lo
+ theEmitter->emitIns_R_R_COND(INS_cinv, EA_4BYTE, REG_R4, REG_R8, INS_COND_MI); // mi
+ theEmitter->emitIns_R_R_COND(INS_cneg, EA_8BYTE, REG_R5, REG_R9, INS_COND_PL); // pl
+ theEmitter->emitIns_R_R_COND(INS_cinc, EA_8BYTE, REG_R6, REG_R0, INS_COND_VS); // vs
+ theEmitter->emitIns_R_R_COND(INS_cinv, EA_4BYTE, REG_R7, REG_R1, INS_COND_VC); // vc
+ theEmitter->emitIns_R_R_COND(INS_cneg, EA_8BYTE, REG_R8, REG_R2, INS_COND_HI); // hi
+ theEmitter->emitIns_R_R_COND(INS_cinc, EA_4BYTE, REG_R9, REG_R3, INS_COND_LS); // ls
+ theEmitter->emitIns_R_R_COND(INS_cinv, EA_4BYTE, REG_R0, REG_R4, INS_COND_GE); // ge
+ theEmitter->emitIns_R_R_COND(INS_cneg, EA_8BYTE, REG_R2, REG_R5, INS_COND_LT); // lt
+ theEmitter->emitIns_R_R_COND(INS_cinc, EA_4BYTE, REG_R2, REG_R6, INS_COND_GT); // gt
+ theEmitter->emitIns_R_R_COND(INS_cinv, EA_8BYTE, REG_R3, REG_R7, INS_COND_LE); // le
+
+#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
+ // R_R_R_COND
+ //
+
+ // csel reg, reg, reg, cond
+ // csinc reg, reg, reg, cond
+ // csinv reg, reg, reg, cond
+ // csneg reg, reg, reg, cond
+ theEmitter->emitIns_R_R_R_COND(INS_csel, EA_8BYTE, REG_R0, REG_R4, REG_R8, INS_COND_EQ); // eq
+ theEmitter->emitIns_R_R_R_COND(INS_csinc, EA_4BYTE, REG_R1, REG_R5, REG_R9, INS_COND_NE); // ne
+ theEmitter->emitIns_R_R_R_COND(INS_csinv, EA_4BYTE, REG_R2, REG_R6, REG_R0, INS_COND_HS); // hs
+ theEmitter->emitIns_R_R_R_COND(INS_csneg, EA_8BYTE, REG_R3, REG_R7, REG_R1, INS_COND_LO); // lo
+ theEmitter->emitIns_R_R_R_COND(INS_csel, EA_4BYTE, REG_R4, REG_R8, REG_R2, INS_COND_MI); // mi
+ theEmitter->emitIns_R_R_R_COND(INS_csinc, EA_8BYTE, REG_R5, REG_R9, REG_R3, INS_COND_PL); // pl
+ theEmitter->emitIns_R_R_R_COND(INS_csinv, EA_8BYTE, REG_R6, REG_R0, REG_R4, INS_COND_VS); // vs
+ theEmitter->emitIns_R_R_R_COND(INS_csneg, EA_4BYTE, REG_R7, REG_R1, REG_R5, INS_COND_VC); // vc
+ theEmitter->emitIns_R_R_R_COND(INS_csel, EA_8BYTE, REG_R8, REG_R2, REG_R6, INS_COND_HI); // hi
+ theEmitter->emitIns_R_R_R_COND(INS_csinc, EA_4BYTE, REG_R9, REG_R3, REG_R7, INS_COND_LS); // ls
+ theEmitter->emitIns_R_R_R_COND(INS_csinv, EA_4BYTE, REG_R0, REG_R4, REG_R8, INS_COND_GE); // ge
+ theEmitter->emitIns_R_R_R_COND(INS_csneg, EA_8BYTE, REG_R2, REG_R5, REG_R9, INS_COND_LT); // lt
+ theEmitter->emitIns_R_R_R_COND(INS_csel, EA_4BYTE, REG_R2, REG_R6, REG_R0, INS_COND_GT); // gt
+ theEmitter->emitIns_R_R_R_COND(INS_csinc, EA_8BYTE, REG_R3, REG_R7, REG_R1, INS_COND_LE); // le
+
+#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
+ // R_R_FLAGS_COND
+ //
+
+ // ccmp reg1, reg2, nzcv, cond
+ theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R9, REG_R3, INS_FLAGS_V, INS_COND_EQ); // eq
+ theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R8, REG_R2, INS_FLAGS_C, INS_COND_NE); // ne
+ theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R7, REG_R1, INS_FLAGS_Z, INS_COND_HS); // hs
+ theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R6, REG_R0, INS_FLAGS_N, INS_COND_LO); // lo
+ theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R5, REG_R3, INS_FLAGS_CV, INS_COND_MI); // mi
+ theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R4, REG_R2, INS_FLAGS_ZV, INS_COND_PL); // pl
+ theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R3, REG_R1, INS_FLAGS_ZC, INS_COND_VS); // vs
+ theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R2, REG_R0, INS_FLAGS_NV, INS_COND_VC); // vc
+ theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R1, REG_R3, INS_FLAGS_NC, INS_COND_HI); // hi
+ theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R0, REG_R2, INS_FLAGS_NZ, INS_COND_LS); // ls
+ theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R9, REG_R1, INS_FLAGS_NONE, INS_COND_GE); // ge
+ theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R8, REG_R0, INS_FLAGS_NZV, INS_COND_LT); // lt
+ theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R7, REG_R3, INS_FLAGS_NZC, INS_COND_GT); // gt
+ theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R6, REG_R2, INS_FLAGS_NZCV, INS_COND_LE); // le
+
+ // ccmp reg1, imm, nzcv, cond
+ theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R9, 3, INS_FLAGS_V, INS_COND_EQ); // eq
+ theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R8, 2, INS_FLAGS_C, INS_COND_NE); // ne
+ theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R7, 1, INS_FLAGS_Z, INS_COND_HS); // hs
+ theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R6, 0, INS_FLAGS_N, INS_COND_LO); // lo
+ theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R5, 31, INS_FLAGS_CV, INS_COND_MI); // mi
+ theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R4, 28, INS_FLAGS_ZV, INS_COND_PL); // pl
+ theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R3, 25, INS_FLAGS_ZC, INS_COND_VS); // vs
+ theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R2, 22, INS_FLAGS_NV, INS_COND_VC); // vc
+ theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R1, 19, INS_FLAGS_NC, INS_COND_HI); // hi
+ theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R0, 16, INS_FLAGS_NZ, INS_COND_LS); // ls
+ theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R9, 13, INS_FLAGS_NONE, INS_COND_GE); // ge
+ theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R8, 10, INS_FLAGS_NZV, INS_COND_LT); // lt
+ theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R7, 7, INS_FLAGS_NZC, INS_COND_GT); // gt
+ theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R6, 4, INS_FLAGS_NZCV, INS_COND_LE); // le
+
+ // ccmp reg1, imm, nzcv, cond -- encoded as ccmn
+ theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R9, -3, INS_FLAGS_V, INS_COND_EQ); // eq
+ theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R8, -2, INS_FLAGS_C, INS_COND_NE); // ne
+ theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R7, -1, INS_FLAGS_Z, INS_COND_HS); // hs
+ theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R6, -5, INS_FLAGS_N, INS_COND_LO); // lo
+ theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R5, -31, INS_FLAGS_CV, INS_COND_MI); // mi
+ theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R4, -28, INS_FLAGS_ZV, INS_COND_PL); // pl
+ theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R3, -25, INS_FLAGS_ZC, INS_COND_VS); // vs
+ theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R2, -22, INS_FLAGS_NV, INS_COND_VC); // vc
+ theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R1, -19, INS_FLAGS_NC, INS_COND_HI); // hi
+ theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R0, -16, INS_FLAGS_NZ, INS_COND_LS); // ls
+ theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R9, -13, INS_FLAGS_NONE, INS_COND_GE); // ge
+ theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R8, -10, INS_FLAGS_NZV, INS_COND_LT); // lt
+ theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R7, -7, INS_FLAGS_NZC, INS_COND_GT); // gt
+ theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R6, -4, INS_FLAGS_NZCV, INS_COND_LE); // le
+
+ // ccmn reg1, reg2, nzcv, cond
+ theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmn, EA_8BYTE, REG_R9, REG_R3, INS_FLAGS_V, INS_COND_EQ); // eq
+ theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmn, EA_4BYTE, REG_R8, REG_R2, INS_FLAGS_C, INS_COND_NE); // ne
+ theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmn, EA_4BYTE, REG_R7, REG_R1, INS_FLAGS_Z, INS_COND_HS); // hs
+ theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmn, EA_8BYTE, REG_R6, REG_R0, INS_FLAGS_N, INS_COND_LO); // lo
+ theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmn, EA_8BYTE, REG_R5, REG_R3, INS_FLAGS_CV, INS_COND_MI); // mi
+ theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmn, EA_4BYTE, REG_R4, REG_R2, INS_FLAGS_ZV, INS_COND_PL); // pl
+ theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmn, EA_4BYTE, REG_R3, REG_R1, INS_FLAGS_ZC, INS_COND_VS); // vs
+ theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmn, EA_8BYTE, REG_R2, REG_R0, INS_FLAGS_NV, INS_COND_VC); // vc
+ theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmn, EA_8BYTE, REG_R1, REG_R3, INS_FLAGS_NC, INS_COND_HI); // hi
+ theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmn, EA_4BYTE, REG_R0, REG_R2, INS_FLAGS_NZ, INS_COND_LS); // ls
+ theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmn, EA_4BYTE, REG_R9, REG_R1, INS_FLAGS_NONE, INS_COND_GE); // ge
+ theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmn, EA_8BYTE, REG_R8, REG_R0, INS_FLAGS_NZV, INS_COND_LT); // lt
+ theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmn, EA_8BYTE, REG_R7, REG_R3, INS_FLAGS_NZC, INS_COND_GT); // gt
+ theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmn, EA_4BYTE, REG_R6, REG_R2, INS_FLAGS_NZCV, INS_COND_LE); // le
+
+ // ccmn reg1, imm, nzcv, cond
+ theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_8BYTE, REG_R9, 3, INS_FLAGS_V, INS_COND_EQ); // eq
+ theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_4BYTE, REG_R8, 2, INS_FLAGS_C, INS_COND_NE); // ne
+ theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_4BYTE, REG_R7, 1, INS_FLAGS_Z, INS_COND_HS); // hs
+ theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_8BYTE, REG_R6, 0, INS_FLAGS_N, INS_COND_LO); // lo
+ theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_8BYTE, REG_R5, 31, INS_FLAGS_CV, INS_COND_MI); // mi
+ theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_4BYTE, REG_R4, 28, INS_FLAGS_ZV, INS_COND_PL); // pl
+ theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_4BYTE, REG_R3, 25, INS_FLAGS_ZC, INS_COND_VS); // vs
+ theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_8BYTE, REG_R2, 22, INS_FLAGS_NV, INS_COND_VC); // vc
+ theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_8BYTE, REG_R1, 19, INS_FLAGS_NC, INS_COND_HI); // hi
+ theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_4BYTE, REG_R0, 16, INS_FLAGS_NZ, INS_COND_LS); // ls
+ theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_4BYTE, REG_R9, 13, INS_FLAGS_NONE, INS_COND_GE); // ge
+ theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_8BYTE, REG_R8, 10, INS_FLAGS_NZV, INS_COND_LT); // lt
+ theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_8BYTE, REG_R7, 7, INS_FLAGS_NZC, INS_COND_GT); // gt
+ theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_4BYTE, REG_R6, 4, INS_FLAGS_NZCV, INS_COND_LE); // le
+
+#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
+ //
+ // Branch to register
+ //
+
+ genDefineTempLabel(genCreateTempLabel());
+
+ theEmitter->emitIns_R(INS_br, EA_PTRSIZE, REG_R8);
+ theEmitter->emitIns_R(INS_blr, EA_PTRSIZE, REG_R9);
+ theEmitter->emitIns_R(INS_ret, EA_PTRSIZE, REG_R8);
+ theEmitter->emitIns_R(INS_ret, EA_PTRSIZE, REG_LR);
+
+#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
+ //
+ // Misc
+ //
+
+ genDefineTempLabel(genCreateTempLabel());
+
+ theEmitter->emitIns_I(INS_brk, EA_PTRSIZE, 0);
+ theEmitter->emitIns_I(INS_brk, EA_PTRSIZE, 65535);
+
+ theEmitter->emitIns_BARR(INS_dsb, INS_BARRIER_OSHLD);
+ theEmitter->emitIns_BARR(INS_dmb, INS_BARRIER_OSHST);
+ theEmitter->emitIns_BARR(INS_isb, INS_BARRIER_OSH);
+
+ theEmitter->emitIns_BARR(INS_dmb, INS_BARRIER_NSHLD);
+ theEmitter->emitIns_BARR(INS_isb, INS_BARRIER_NSHST);
+ theEmitter->emitIns_BARR(INS_dsb, INS_BARRIER_NSH);
+
+ theEmitter->emitIns_BARR(INS_isb, INS_BARRIER_ISHLD);
+ theEmitter->emitIns_BARR(INS_dsb, INS_BARRIER_ISHST);
+ theEmitter->emitIns_BARR(INS_dmb, INS_BARRIER_ISH);
+
+ theEmitter->emitIns_BARR(INS_dsb, INS_BARRIER_LD);
+ theEmitter->emitIns_BARR(INS_dmb, INS_BARRIER_ST);
+ theEmitter->emitIns_BARR(INS_isb, INS_BARRIER_SY);
+
+#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
+ ////////////////////////////////////////////////////////////////////////////////
+ //
+ // SIMD and Floating point
+ //
+ ////////////////////////////////////////////////////////////////////////////////
+
+ //
+ // Load/Stores vector register
+ //
+
+ genDefineTempLabel(genCreateTempLabel());
+
+ // ldr/str Vt, [reg]
+ theEmitter->emitIns_R_R(INS_ldr, EA_8BYTE, REG_V1, REG_R9);
+ theEmitter->emitIns_R_R(INS_str, EA_8BYTE, REG_V2, REG_R8);
+ theEmitter->emitIns_R_R(INS_ldr, EA_4BYTE, REG_V3, REG_R7);
+ theEmitter->emitIns_R_R(INS_str, EA_4BYTE, REG_V4, REG_R6);
+ theEmitter->emitIns_R_R(INS_ldr, EA_2BYTE, REG_V5, REG_R5);
+ theEmitter->emitIns_R_R(INS_str, EA_2BYTE, REG_V6, REG_R4);
+ theEmitter->emitIns_R_R(INS_ldr, EA_1BYTE, REG_V7, REG_R3);
+ theEmitter->emitIns_R_R(INS_str, EA_1BYTE, REG_V8, REG_R2);
+ theEmitter->emitIns_R_R(INS_ldr, EA_16BYTE, REG_V9, REG_R1);
+ theEmitter->emitIns_R_R(INS_str, EA_16BYTE, REG_V10, REG_R0);
+
+ // ldr/str Vt, [reg+cns] -- scaled
+ theEmitter->emitIns_R_R_I(INS_ldr, EA_1BYTE, REG_V8, REG_R9, 1);
+ theEmitter->emitIns_R_R_I(INS_ldr, EA_2BYTE, REG_V8, REG_R9, 2);
+ theEmitter->emitIns_R_R_I(INS_ldr, EA_4BYTE, REG_V8, REG_R9, 4);
+ theEmitter->emitIns_R_R_I(INS_ldr, EA_8BYTE, REG_V8, REG_R9, 8);
+ theEmitter->emitIns_R_R_I(INS_ldr, EA_16BYTE, REG_V8, REG_R9, 16);
+
+ theEmitter->emitIns_R_R_I(INS_ldr, EA_1BYTE, REG_V7, REG_R10, 1);
+ theEmitter->emitIns_R_R_I(INS_ldr, EA_2BYTE, REG_V7, REG_R10, 2);
+ theEmitter->emitIns_R_R_I(INS_ldr, EA_4BYTE, REG_V7, REG_R10, 4);
+ theEmitter->emitIns_R_R_I(INS_ldr, EA_8BYTE, REG_V7, REG_R10, 8);
+ theEmitter->emitIns_R_R_I(INS_ldr, EA_16BYTE, REG_V7, REG_R10, 16);
+
+ // ldr/str Vt, [reg],cns -- post-indexed (unscaled)
+ // ldr/str Vt, [reg+cns]! -- post-indexed (unscaled)
+ theEmitter->emitIns_R_R_I(INS_ldr, EA_1BYTE, REG_V8, REG_R9, 1, INS_OPTS_POST_INDEX);
+ theEmitter->emitIns_R_R_I(INS_ldr, EA_2BYTE, REG_V8, REG_R9, 1, INS_OPTS_POST_INDEX);
+ theEmitter->emitIns_R_R_I(INS_ldr, EA_4BYTE, REG_V8, REG_R9, 1, INS_OPTS_POST_INDEX);
+ theEmitter->emitIns_R_R_I(INS_ldr, EA_8BYTE, REG_V8, REG_R9, 1, INS_OPTS_POST_INDEX);
+ theEmitter->emitIns_R_R_I(INS_ldr, EA_16BYTE, REG_V8, REG_R9, 1, INS_OPTS_POST_INDEX);
+
+ theEmitter->emitIns_R_R_I(INS_ldr, EA_1BYTE, REG_V8, REG_R9, 1, INS_OPTS_PRE_INDEX);
+ theEmitter->emitIns_R_R_I(INS_ldr, EA_2BYTE, REG_V8, REG_R9, 1, INS_OPTS_PRE_INDEX);
+ theEmitter->emitIns_R_R_I(INS_ldr, EA_4BYTE, REG_V8, REG_R9, 1, INS_OPTS_PRE_INDEX);
+ theEmitter->emitIns_R_R_I(INS_ldr, EA_8BYTE, REG_V8, REG_R9, 1, INS_OPTS_PRE_INDEX);
+ theEmitter->emitIns_R_R_I(INS_ldr, EA_16BYTE, REG_V8, REG_R9, 1, INS_OPTS_PRE_INDEX);
+
+ theEmitter->emitIns_R_R_I(INS_str, EA_1BYTE, REG_V8, REG_R9, 1, INS_OPTS_POST_INDEX);
+ theEmitter->emitIns_R_R_I(INS_str, EA_2BYTE, REG_V8, REG_R9, 1, INS_OPTS_POST_INDEX);
+ theEmitter->emitIns_R_R_I(INS_str, EA_4BYTE, REG_V8, REG_R9, 1, INS_OPTS_POST_INDEX);
+ theEmitter->emitIns_R_R_I(INS_str, EA_8BYTE, REG_V8, REG_R9, 1, INS_OPTS_POST_INDEX);
+ theEmitter->emitIns_R_R_I(INS_str, EA_16BYTE, REG_V8, REG_R9, 1, INS_OPTS_POST_INDEX);
+
+ theEmitter->emitIns_R_R_I(INS_str, EA_1BYTE, REG_V8, REG_R9, 1, INS_OPTS_PRE_INDEX);
+ theEmitter->emitIns_R_R_I(INS_str, EA_2BYTE, REG_V8, REG_R9, 1, INS_OPTS_PRE_INDEX);
+ theEmitter->emitIns_R_R_I(INS_str, EA_4BYTE, REG_V8, REG_R9, 1, INS_OPTS_PRE_INDEX);
+ theEmitter->emitIns_R_R_I(INS_str, EA_8BYTE, REG_V8, REG_R9, 1, INS_OPTS_PRE_INDEX);
+ theEmitter->emitIns_R_R_I(INS_str, EA_16BYTE, REG_V8, REG_R9, 1, INS_OPTS_PRE_INDEX);
+
+ theEmitter->emitIns_R_R_I(INS_ldur, EA_1BYTE, REG_V8, REG_R9, 2);
+ theEmitter->emitIns_R_R_I(INS_ldur, EA_2BYTE, REG_V8, REG_R9, 3);
+ theEmitter->emitIns_R_R_I(INS_ldur, EA_4BYTE, REG_V8, REG_R9, 5);
+ theEmitter->emitIns_R_R_I(INS_ldur, EA_8BYTE, REG_V8, REG_R9, 9);
+ theEmitter->emitIns_R_R_I(INS_ldur, EA_16BYTE, REG_V8, REG_R9, 17);
+
+ theEmitter->emitIns_R_R_I(INS_stur, EA_1BYTE, REG_V7, REG_R10, 2);
+ theEmitter->emitIns_R_R_I(INS_stur, EA_2BYTE, REG_V7, REG_R10, 3);
+ theEmitter->emitIns_R_R_I(INS_stur, EA_4BYTE, REG_V7, REG_R10, 5);
+ theEmitter->emitIns_R_R_I(INS_stur, EA_8BYTE, REG_V7, REG_R10, 9);
+ theEmitter->emitIns_R_R_I(INS_stur, EA_16BYTE, REG_V7, REG_R10, 17);
+
+ // load/store pair
+ theEmitter->emitIns_R_R_R(INS_ldnp, EA_8BYTE, REG_V0, REG_V1, REG_R10);
+ theEmitter->emitIns_R_R_R_I(INS_stnp, EA_8BYTE, REG_V1, REG_V2, REG_R10, 0);
+ theEmitter->emitIns_R_R_R_I(INS_ldnp, EA_8BYTE, REG_V2, REG_V3, REG_R10, 8);
+ theEmitter->emitIns_R_R_R_I(INS_stnp, EA_8BYTE, REG_V3, REG_V4, REG_R10, 24);
+
+ theEmitter->emitIns_R_R_R(INS_ldnp, EA_4BYTE, REG_V4, REG_V5, REG_SP);
+ theEmitter->emitIns_R_R_R_I(INS_stnp, EA_4BYTE, REG_V5, REG_V6, REG_SP, 0);
+ theEmitter->emitIns_R_R_R_I(INS_ldnp, EA_4BYTE, REG_V6, REG_V7, REG_SP, 4);
+ theEmitter->emitIns_R_R_R_I(INS_stnp, EA_4BYTE, REG_V7, REG_V8, REG_SP, 12);
+
+ theEmitter->emitIns_R_R_R(INS_ldnp, EA_16BYTE, REG_V8, REG_V9, REG_R10);
+ theEmitter->emitIns_R_R_R_I(INS_stnp, EA_16BYTE, REG_V9, REG_V10, REG_R10, 0);
+ theEmitter->emitIns_R_R_R_I(INS_ldnp, EA_16BYTE, REG_V10, REG_V11, REG_R10, 16);
+ theEmitter->emitIns_R_R_R_I(INS_stnp, EA_16BYTE, REG_V11, REG_V12, REG_R10, 48);
+
+ theEmitter->emitIns_R_R_R(INS_ldp, EA_8BYTE, REG_V0, REG_V1, REG_R10);
+ theEmitter->emitIns_R_R_R_I(INS_stp, EA_8BYTE, REG_V1, REG_V2, REG_SP, 0);
+ theEmitter->emitIns_R_R_R_I(INS_ldp, EA_8BYTE, REG_V2, REG_V3, REG_SP, 8);
+ theEmitter->emitIns_R_R_R_I(INS_stp, EA_8BYTE, REG_V3, REG_V4, REG_R10, 16);
+ theEmitter->emitIns_R_R_R_I(INS_ldp, EA_8BYTE, REG_V4, REG_V5, REG_R10, 24, INS_OPTS_POST_INDEX);
+ theEmitter->emitIns_R_R_R_I(INS_stp, EA_8BYTE, REG_V5, REG_V6, REG_SP, 32, INS_OPTS_POST_INDEX);
+ theEmitter->emitIns_R_R_R_I(INS_ldp, EA_8BYTE, REG_V6, REG_V7, REG_SP, 40, INS_OPTS_PRE_INDEX);
+ theEmitter->emitIns_R_R_R_I(INS_stp, EA_8BYTE, REG_V7, REG_V8, REG_R10, 48, INS_OPTS_PRE_INDEX);
+
+ theEmitter->emitIns_R_R_R(INS_ldp, EA_4BYTE, REG_V0, REG_V1, REG_R10);
+ theEmitter->emitIns_R_R_R_I(INS_stp, EA_4BYTE, REG_V1, REG_V2, REG_SP, 0);
+ theEmitter->emitIns_R_R_R_I(INS_ldp, EA_4BYTE, REG_V2, REG_V3, REG_SP, 4);
+ theEmitter->emitIns_R_R_R_I(INS_stp, EA_4BYTE, REG_V3, REG_V4, REG_R10, 8);
+ theEmitter->emitIns_R_R_R_I(INS_ldp, EA_4BYTE, REG_V4, REG_V5, REG_R10, 12, INS_OPTS_POST_INDEX);
+ theEmitter->emitIns_R_R_R_I(INS_stp, EA_4BYTE, REG_V5, REG_V6, REG_SP, 16, INS_OPTS_POST_INDEX);
+ theEmitter->emitIns_R_R_R_I(INS_ldp, EA_4BYTE, REG_V6, REG_V7, REG_SP, 20, INS_OPTS_PRE_INDEX);
+ theEmitter->emitIns_R_R_R_I(INS_stp, EA_4BYTE, REG_V7, REG_V8, REG_R10, 24, INS_OPTS_PRE_INDEX);
+
+ theEmitter->emitIns_R_R_R(INS_ldp, EA_16BYTE, REG_V0, REG_V1, REG_R10);
+ theEmitter->emitIns_R_R_R_I(INS_stp, EA_16BYTE, REG_V1, REG_V2, REG_SP, 0);
+ theEmitter->emitIns_R_R_R_I(INS_ldp, EA_16BYTE, REG_V2, REG_V3, REG_SP, 16);
+ theEmitter->emitIns_R_R_R_I(INS_stp, EA_16BYTE, REG_V3, REG_V4, REG_R10, 32);
+ theEmitter->emitIns_R_R_R_I(INS_ldp, EA_16BYTE, REG_V4, REG_V5, REG_R10, 48, INS_OPTS_POST_INDEX);
+ theEmitter->emitIns_R_R_R_I(INS_stp, EA_16BYTE, REG_V5, REG_V6, REG_SP, 64, INS_OPTS_POST_INDEX);
+ theEmitter->emitIns_R_R_R_I(INS_ldp, EA_16BYTE, REG_V6, REG_V7, REG_SP, 80, INS_OPTS_PRE_INDEX);
+ theEmitter->emitIns_R_R_R_I(INS_stp, EA_16BYTE, REG_V7, REG_V8, REG_R10, 96, INS_OPTS_PRE_INDEX);
+
+ // LDR (register)
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_V1, REG_SP, REG_R9);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_V2, REG_R7, REG_R9, INS_OPTS_LSL);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_V3, REG_R7, REG_R9, INS_OPTS_LSL, 3);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_V4, REG_R7, REG_R9, INS_OPTS_SXTW);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_V5, REG_R7, REG_R9, INS_OPTS_SXTW, 3);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_V6, REG_SP, REG_R9, INS_OPTS_UXTW);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_V7, REG_R7, REG_R9, INS_OPTS_UXTW, 3);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_V8, REG_R7, REG_R9, INS_OPTS_SXTX);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_V9, REG_R7, REG_R9, INS_OPTS_SXTX, 3);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_V10, REG_R7, REG_R9, INS_OPTS_UXTX);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_V11, REG_SP, REG_R9, INS_OPTS_UXTX, 3);
+
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_V1, REG_SP, REG_R9);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_V2, REG_R7, REG_R9, INS_OPTS_LSL);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_V3, REG_R7, REG_R9, INS_OPTS_LSL, 2);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_V4, REG_R7, REG_R9, INS_OPTS_SXTW);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_V5, REG_R7, REG_R9, INS_OPTS_SXTW, 2);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_V6, REG_SP, REG_R9, INS_OPTS_UXTW);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_V7, REG_R7, REG_R9, INS_OPTS_UXTW, 2);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_V8, REG_R7, REG_R9, INS_OPTS_SXTX);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_V9, REG_R7, REG_R9, INS_OPTS_SXTX, 2);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_V10, REG_R7, REG_R9, INS_OPTS_UXTX);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_V11, REG_SP, REG_R9, INS_OPTS_UXTX, 2);
+
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_16BYTE, REG_V1, REG_SP, REG_R9);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_16BYTE, REG_V2, REG_R7, REG_R9, INS_OPTS_LSL);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_16BYTE, REG_V3, REG_R7, REG_R9, INS_OPTS_LSL, 4);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_16BYTE, REG_V4, REG_R7, REG_R9, INS_OPTS_SXTW);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_16BYTE, REG_V5, REG_R7, REG_R9, INS_OPTS_SXTW, 4);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_16BYTE, REG_V6, REG_SP, REG_R9, INS_OPTS_UXTW);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_16BYTE, REG_V7, REG_R7, REG_R9, INS_OPTS_UXTW, 4);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_16BYTE, REG_V8, REG_R7, REG_R9, INS_OPTS_SXTX);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_16BYTE, REG_V9, REG_R7, REG_R9, INS_OPTS_SXTX, 4);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_16BYTE, REG_V10, REG_R7, REG_R9, INS_OPTS_UXTX);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_16BYTE, REG_V11, REG_SP, REG_R9, INS_OPTS_UXTX, 4);
+
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_2BYTE, REG_V1, REG_SP, REG_R9);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_2BYTE, REG_V2, REG_R7, REG_R9, INS_OPTS_LSL);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_2BYTE, REG_V3, REG_R7, REG_R9, INS_OPTS_LSL, 1);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_2BYTE, REG_V4, REG_R7, REG_R9, INS_OPTS_SXTW);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_2BYTE, REG_V5, REG_R7, REG_R9, INS_OPTS_SXTW, 1);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_2BYTE, REG_V6, REG_SP, REG_R9, INS_OPTS_UXTW);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_2BYTE, REG_V7, REG_R7, REG_R9, INS_OPTS_UXTW, 1);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_2BYTE, REG_V8, REG_R7, REG_R9, INS_OPTS_SXTX);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_2BYTE, REG_V9, REG_R7, REG_R9, INS_OPTS_SXTX, 1);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_2BYTE, REG_V10, REG_R7, REG_R9, INS_OPTS_UXTX);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_2BYTE, REG_V11, REG_SP, REG_R9, INS_OPTS_UXTX, 1);
+
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_1BYTE, REG_V1, REG_R7, REG_R9);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_1BYTE, REG_V2, REG_SP, REG_R9, INS_OPTS_SXTW);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_1BYTE, REG_V3, REG_R7, REG_R9, INS_OPTS_UXTW);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_1BYTE, REG_V4, REG_SP, REG_R9, INS_OPTS_SXTX);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_1BYTE, REG_V5, REG_R7, REG_R9, INS_OPTS_UXTX);
+
+#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
+ //
+ // R_R mov and aliases for mov
+ //
+
+ // mov vector to vector
+ theEmitter->emitIns_R_R(INS_mov, EA_8BYTE, REG_V0, REG_V1);
+ theEmitter->emitIns_R_R(INS_mov, EA_16BYTE, REG_V2, REG_V3);
+
+ theEmitter->emitIns_R_R(INS_mov, EA_4BYTE, REG_V12, REG_V13);
+ theEmitter->emitIns_R_R(INS_mov, EA_2BYTE, REG_V14, REG_V15);
+ theEmitter->emitIns_R_R(INS_mov, EA_1BYTE, REG_V16, REG_V17);
+
+ // mov vector to general
+ theEmitter->emitIns_R_R(INS_mov, EA_8BYTE, REG_R0, REG_V4);
+ theEmitter->emitIns_R_R(INS_mov, EA_4BYTE, REG_R1, REG_V5);
+ theEmitter->emitIns_R_R(INS_mov, EA_2BYTE, REG_R2, REG_V6);
+ theEmitter->emitIns_R_R(INS_mov, EA_1BYTE, REG_R3, REG_V7);
+
+ // mov general to vector
+ theEmitter->emitIns_R_R(INS_mov, EA_8BYTE, REG_V8, REG_R4);
+ theEmitter->emitIns_R_R(INS_mov, EA_4BYTE, REG_V9, REG_R5);
+ theEmitter->emitIns_R_R(INS_mov, EA_2BYTE, REG_V10, REG_R6);
+ theEmitter->emitIns_R_R(INS_mov, EA_1BYTE, REG_V11, REG_R7);
+
+ // mov vector[index] to vector
+ theEmitter->emitIns_R_R_I(INS_mov, EA_8BYTE, REG_V0, REG_V1, 1);
+ theEmitter->emitIns_R_R_I(INS_mov, EA_4BYTE, REG_V2, REG_V3, 3);
+ theEmitter->emitIns_R_R_I(INS_mov, EA_2BYTE, REG_V4, REG_V5, 7);
+ theEmitter->emitIns_R_R_I(INS_mov, EA_1BYTE, REG_V6, REG_V7, 15);
+
+ // mov to general from vector[index]
+ theEmitter->emitIns_R_R_I(INS_mov, EA_8BYTE, REG_R8, REG_V16, 1);
+ theEmitter->emitIns_R_R_I(INS_mov, EA_4BYTE, REG_R9, REG_V17, 2);
+ theEmitter->emitIns_R_R_I(INS_mov, EA_2BYTE, REG_R10, REG_V18, 3);
+ theEmitter->emitIns_R_R_I(INS_mov, EA_1BYTE, REG_R11, REG_V19, 4);
+
+ // mov to vector[index] from general
+ theEmitter->emitIns_R_R_I(INS_mov, EA_8BYTE, REG_V20, REG_R12, 1);
+ theEmitter->emitIns_R_R_I(INS_mov, EA_4BYTE, REG_V21, REG_R13, 2);
+ theEmitter->emitIns_R_R_I(INS_mov, EA_2BYTE, REG_V22, REG_R14, 6);
+ theEmitter->emitIns_R_R_I(INS_mov, EA_1BYTE, REG_V23, REG_R15, 8);
+
+ // mov vector[index] to vector[index2]
+ theEmitter->emitIns_R_R_I_I(INS_mov, EA_8BYTE, REG_V8, REG_V9, 1, 0);
+ theEmitter->emitIns_R_R_I_I(INS_mov, EA_4BYTE, REG_V10, REG_V11, 2, 1);
+ theEmitter->emitIns_R_R_I_I(INS_mov, EA_2BYTE, REG_V12, REG_V13, 5, 2);
+ theEmitter->emitIns_R_R_I_I(INS_mov, EA_1BYTE, REG_V14, REG_V15, 12, 3);
+
+ //////////////////////////////////////////////////////////////////////////////////
+
+ // mov/dup scalar
+ theEmitter->emitIns_R_R_I(INS_dup, EA_8BYTE, REG_V24, REG_V25, 1);
+ theEmitter->emitIns_R_R_I(INS_dup, EA_4BYTE, REG_V26, REG_V27, 3);
+ theEmitter->emitIns_R_R_I(INS_dup, EA_2BYTE, REG_V28, REG_V29, 7);
+ theEmitter->emitIns_R_R_I(INS_dup, EA_1BYTE, REG_V30, REG_V31, 15);
+
+ // mov/ins vector element
+ theEmitter->emitIns_R_R_I_I(INS_ins, EA_8BYTE, REG_V0, REG_V1, 0, 1);
+ theEmitter->emitIns_R_R_I_I(INS_ins, EA_4BYTE, REG_V2, REG_V3, 2, 2);
+ theEmitter->emitIns_R_R_I_I(INS_ins, EA_2BYTE, REG_V4, REG_V5, 4, 3);
+ theEmitter->emitIns_R_R_I_I(INS_ins, EA_1BYTE, REG_V6, REG_V7, 8, 4);
+
+ // umov to general from vector element
+ theEmitter->emitIns_R_R_I(INS_umov, EA_8BYTE, REG_R0, REG_V8, 1);
+ theEmitter->emitIns_R_R_I(INS_umov, EA_4BYTE, REG_R1, REG_V9, 2);
+ theEmitter->emitIns_R_R_I(INS_umov, EA_2BYTE, REG_R2, REG_V10, 4);
+ theEmitter->emitIns_R_R_I(INS_umov, EA_1BYTE, REG_R3, REG_V11, 8);
+
+ // ins to vector element from general
+ theEmitter->emitIns_R_R_I(INS_ins, EA_8BYTE, REG_V12, REG_R4, 1);
+ theEmitter->emitIns_R_R_I(INS_ins, EA_4BYTE, REG_V13, REG_R5, 3);
+ theEmitter->emitIns_R_R_I(INS_ins, EA_2BYTE, REG_V14, REG_R6, 7);
+ theEmitter->emitIns_R_R_I(INS_ins, EA_1BYTE, REG_V15, REG_R7, 15);
+
+ // smov to general from vector element
+ theEmitter->emitIns_R_R_I(INS_smov, EA_4BYTE, REG_R5, REG_V17, 2);
+ theEmitter->emitIns_R_R_I(INS_smov, EA_2BYTE, REG_R6, REG_V18, 4);
+ theEmitter->emitIns_R_R_I(INS_smov, EA_1BYTE, REG_R7, REG_V19, 8);
+
+#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
+ //
+ // R_I movi and mvni
+ //
+
+ // movi imm8 (vector)
+ theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V0, 0x00, INS_OPTS_8B);
+ theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V1, 0xFF, INS_OPTS_8B);
+ theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V2, 0x00, INS_OPTS_16B);
+ theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V3, 0xFF, INS_OPTS_16B);
+
+ theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V4, 0x007F, INS_OPTS_4H);
+ theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V5, 0x7F00, INS_OPTS_4H); // LSL 8
+ theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V6, 0x003F, INS_OPTS_8H);
+ theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V7, 0x3F00, INS_OPTS_8H); // LSL 8
+
+ theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V8, 0x1F, INS_OPTS_2S);
+ theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V9, 0x1F00, INS_OPTS_2S); // LSL 8
+ theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V10, 0x1F0000, INS_OPTS_2S); // LSL 16
+ theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V11, 0x1F000000, INS_OPTS_2S); // LSL 24
+
+ theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V12, 0x1FFF, INS_OPTS_2S); // MSL 8
+ theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V13, 0x1FFFFF, INS_OPTS_2S); // MSL 16
+
+ theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V14, 0x37, INS_OPTS_4S);
+ theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V15, 0x3700, INS_OPTS_4S); // LSL 8
+ theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V16, 0x370000, INS_OPTS_4S); // LSL 16
+ theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V17, 0x37000000, INS_OPTS_4S); // LSL 24
+
+ theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V18, 0x37FF, INS_OPTS_4S); // MSL 8
+ theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V19, 0x37FFFF, INS_OPTS_4S); // MSL 16
+
+ theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V20, 0xFF80, INS_OPTS_4H); // mvni
+ theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V21, 0xFFC0, INS_OPTS_8H); // mvni
+
+ theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V22, 0xFFFFFFE0, INS_OPTS_2S); // mvni
+ theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V23, 0xFFFFF0FF, INS_OPTS_4S); // mvni LSL 8
+ theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V24, 0xFFF8FFFF, INS_OPTS_2S); // mvni LSL 16
+ theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V25, 0xFCFFFFFF, INS_OPTS_4S); // mvni LSL 24
+
+ theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V26, 0xFFFFFE00, INS_OPTS_2S); // mvni MSL 8
+ theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V27, 0xFFFC0000, INS_OPTS_4S); // mvni MSL 16
+
+ theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V28, 0x00FF00FF00FF00FF, INS_OPTS_1D);
+ theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V29, 0x00FFFF0000FFFF00, INS_OPTS_2D);
+ theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V30, 0xFF000000FF000000);
+ theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V31, 0x0, INS_OPTS_2D);
+
+ theEmitter->emitIns_R_I(INS_mvni, EA_8BYTE, REG_V0, 0x0022, INS_OPTS_4H);
+ theEmitter->emitIns_R_I(INS_mvni, EA_8BYTE, REG_V1, 0x2200, INS_OPTS_4H); // LSL 8
+ theEmitter->emitIns_R_I(INS_mvni, EA_16BYTE, REG_V2, 0x0033, INS_OPTS_8H);
+ theEmitter->emitIns_R_I(INS_mvni, EA_16BYTE, REG_V3, 0x3300, INS_OPTS_8H); // LSL 8
+
+ theEmitter->emitIns_R_I(INS_mvni, EA_8BYTE, REG_V4, 0x42, INS_OPTS_2S);
+ theEmitter->emitIns_R_I(INS_mvni, EA_8BYTE, REG_V5, 0x4200, INS_OPTS_2S); // LSL 8
+ theEmitter->emitIns_R_I(INS_mvni, EA_8BYTE, REG_V6, 0x420000, INS_OPTS_2S); // LSL 16
+ theEmitter->emitIns_R_I(INS_mvni, EA_8BYTE, REG_V7, 0x42000000, INS_OPTS_2S); // LSL 24
+
+ theEmitter->emitIns_R_I(INS_mvni, EA_8BYTE, REG_V8, 0x42FF, INS_OPTS_2S); // MSL 8
+ theEmitter->emitIns_R_I(INS_mvni, EA_8BYTE, REG_V9, 0x42FFFF, INS_OPTS_2S); // MSL 16
+
+ theEmitter->emitIns_R_I(INS_mvni, EA_16BYTE, REG_V10, 0x5D, INS_OPTS_4S);
+ theEmitter->emitIns_R_I(INS_mvni, EA_16BYTE, REG_V11, 0x5D00, INS_OPTS_4S); // LSL 8
+ theEmitter->emitIns_R_I(INS_mvni, EA_16BYTE, REG_V12, 0x5D0000, INS_OPTS_4S); // LSL 16
+ theEmitter->emitIns_R_I(INS_mvni, EA_16BYTE, REG_V13, 0x5D000000, INS_OPTS_4S); // LSL 24
+
+ theEmitter->emitIns_R_I(INS_mvni, EA_16BYTE, REG_V14, 0x5DFF, INS_OPTS_4S); // MSL 8
+ theEmitter->emitIns_R_I(INS_mvni, EA_16BYTE, REG_V15, 0x5DFFFF, INS_OPTS_4S); // MSL 16
+
+#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
+ //
+ // R_I orr/bic vector immediate
+ //
+
+ theEmitter->emitIns_R_I(INS_orr, EA_8BYTE, REG_V0, 0x0022, INS_OPTS_4H);
+ theEmitter->emitIns_R_I(INS_orr, EA_8BYTE, REG_V1, 0x2200, INS_OPTS_4H); // LSL 8
+ theEmitter->emitIns_R_I(INS_orr, EA_16BYTE, REG_V2, 0x0033, INS_OPTS_8H);
+ theEmitter->emitIns_R_I(INS_orr, EA_16BYTE, REG_V3, 0x3300, INS_OPTS_8H); // LSL 8
+
+ theEmitter->emitIns_R_I(INS_orr, EA_8BYTE, REG_V4, 0x42, INS_OPTS_2S);
+ theEmitter->emitIns_R_I(INS_orr, EA_8BYTE, REG_V5, 0x4200, INS_OPTS_2S); // LSL 8
+ theEmitter->emitIns_R_I(INS_orr, EA_8BYTE, REG_V6, 0x420000, INS_OPTS_2S); // LSL 16
+ theEmitter->emitIns_R_I(INS_orr, EA_8BYTE, REG_V7, 0x42000000, INS_OPTS_2S); // LSL 24
+
+ theEmitter->emitIns_R_I(INS_orr, EA_16BYTE, REG_V10, 0x5D, INS_OPTS_4S);
+ theEmitter->emitIns_R_I(INS_orr, EA_16BYTE, REG_V11, 0x5D00, INS_OPTS_4S); // LSL 8
+ theEmitter->emitIns_R_I(INS_orr, EA_16BYTE, REG_V12, 0x5D0000, INS_OPTS_4S); // LSL 16
+ theEmitter->emitIns_R_I(INS_orr, EA_16BYTE, REG_V13, 0x5D000000, INS_OPTS_4S); // LSL 24
+
+ theEmitter->emitIns_R_I(INS_bic, EA_8BYTE, REG_V0, 0x0022, INS_OPTS_4H);
+ theEmitter->emitIns_R_I(INS_bic, EA_8BYTE, REG_V1, 0x2200, INS_OPTS_4H); // LSL 8
+ theEmitter->emitIns_R_I(INS_bic, EA_16BYTE, REG_V2, 0x0033, INS_OPTS_8H);
+ theEmitter->emitIns_R_I(INS_bic, EA_16BYTE, REG_V3, 0x3300, INS_OPTS_8H); // LSL 8
+
+ theEmitter->emitIns_R_I(INS_bic, EA_8BYTE, REG_V4, 0x42, INS_OPTS_2S);
+ theEmitter->emitIns_R_I(INS_bic, EA_8BYTE, REG_V5, 0x4200, INS_OPTS_2S); // LSL 8
+ theEmitter->emitIns_R_I(INS_bic, EA_8BYTE, REG_V6, 0x420000, INS_OPTS_2S); // LSL 16
+ theEmitter->emitIns_R_I(INS_bic, EA_8BYTE, REG_V7, 0x42000000, INS_OPTS_2S); // LSL 24
+
+ theEmitter->emitIns_R_I(INS_bic, EA_16BYTE, REG_V10, 0x5D, INS_OPTS_4S);
+ theEmitter->emitIns_R_I(INS_bic, EA_16BYTE, REG_V11, 0x5D00, INS_OPTS_4S); // LSL 8
+ theEmitter->emitIns_R_I(INS_bic, EA_16BYTE, REG_V12, 0x5D0000, INS_OPTS_4S); // LSL 16
+ theEmitter->emitIns_R_I(INS_bic, EA_16BYTE, REG_V13, 0x5D000000, INS_OPTS_4S); // LSL 24
+
+#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
+ //
+ // R_F cmp/fmov immediate
+ //
+
+ // fmov imm8 (scalar)
+ theEmitter->emitIns_R_F(INS_fmov, EA_8BYTE, REG_V14, 1.0);
+ theEmitter->emitIns_R_F(INS_fmov, EA_4BYTE, REG_V15, -1.0);
+ theEmitter->emitIns_R_F(INS_fmov, EA_4BYTE, REG_V0, 2.0); // encodes imm8 == 0
+ theEmitter->emitIns_R_F(INS_fmov, EA_4BYTE, REG_V16, 10.0);
+ theEmitter->emitIns_R_F(INS_fmov, EA_8BYTE, REG_V17, -10.0);
+ theEmitter->emitIns_R_F(INS_fmov, EA_8BYTE, REG_V18, 31); // Largest encodable value
+ theEmitter->emitIns_R_F(INS_fmov, EA_4BYTE, REG_V19, -31);
+ theEmitter->emitIns_R_F(INS_fmov, EA_4BYTE, REG_V20, 1.25);
+ theEmitter->emitIns_R_F(INS_fmov, EA_8BYTE, REG_V21, -1.25);
+ theEmitter->emitIns_R_F(INS_fmov, EA_8BYTE, REG_V22, 0.125); // Smallest encodable value
+ theEmitter->emitIns_R_F(INS_fmov, EA_4BYTE, REG_V23, -0.125);
+
+ // fmov imm8 (vector)
+ theEmitter->emitIns_R_F(INS_fmov, EA_8BYTE, REG_V0, 2.0, INS_OPTS_2S);
+ theEmitter->emitIns_R_F(INS_fmov, EA_8BYTE, REG_V24, 1.0, INS_OPTS_2S);
+ theEmitter->emitIns_R_F(INS_fmov, EA_16BYTE, REG_V25, 1.0, INS_OPTS_4S);
+ theEmitter->emitIns_R_F(INS_fmov, EA_16BYTE, REG_V26, 1.0, INS_OPTS_2D);
+ theEmitter->emitIns_R_F(INS_fmov, EA_8BYTE, REG_V27, -10.0, INS_OPTS_2S);
+ theEmitter->emitIns_R_F(INS_fmov, EA_16BYTE, REG_V28, -10.0, INS_OPTS_4S);
+ theEmitter->emitIns_R_F(INS_fmov, EA_16BYTE, REG_V29, -10.0, INS_OPTS_2D);
+ theEmitter->emitIns_R_F(INS_fmov, EA_8BYTE, REG_V30, 31.0, INS_OPTS_2S);
+ theEmitter->emitIns_R_F(INS_fmov, EA_16BYTE, REG_V31, 31.0, INS_OPTS_4S);
+ theEmitter->emitIns_R_F(INS_fmov, EA_16BYTE, REG_V0, 31.0, INS_OPTS_2D);
+ theEmitter->emitIns_R_F(INS_fmov, EA_8BYTE, REG_V1, -0.125, INS_OPTS_2S);
+ theEmitter->emitIns_R_F(INS_fmov, EA_16BYTE, REG_V2, -0.125, INS_OPTS_4S);
+ theEmitter->emitIns_R_F(INS_fmov, EA_16BYTE, REG_V3, -0.125, INS_OPTS_2D);
+
+ // fcmp with 0.0
+ theEmitter->emitIns_R_F(INS_fcmp, EA_8BYTE, REG_V12, 0.0);
+ theEmitter->emitIns_R_F(INS_fcmp, EA_4BYTE, REG_V13, 0.0);
+ theEmitter->emitIns_R_F(INS_fcmpe, EA_8BYTE, REG_V14, 0.0);
+ theEmitter->emitIns_R_F(INS_fcmpe, EA_4BYTE, REG_V15, 0.0);
+
+#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
+ //
+ // R_R fmov/fcmp/fcvt
+ //
+
+ // fmov to vector to vector
+ theEmitter->emitIns_R_R(INS_fmov, EA_8BYTE, REG_V0, REG_V2);
+ theEmitter->emitIns_R_R(INS_fmov, EA_4BYTE, REG_V1, REG_V3);
+
+ // fmov to vector to general
+ theEmitter->emitIns_R_R(INS_fmov, EA_8BYTE, REG_R0, REG_V4);
+ theEmitter->emitIns_R_R(INS_fmov, EA_4BYTE, REG_R1, REG_V5);
+ // using the optional conversion specifier
+ theEmitter->emitIns_R_R(INS_fmov, EA_8BYTE, REG_R2, REG_V6, INS_OPTS_D_TO_8BYTE);
+ theEmitter->emitIns_R_R(INS_fmov, EA_4BYTE, REG_R3, REG_V7, INS_OPTS_S_TO_4BYTE);
+
+ // fmov to general to vector
+ theEmitter->emitIns_R_R(INS_fmov, EA_8BYTE, REG_V8, REG_R4);
+ theEmitter->emitIns_R_R(INS_fmov, EA_4BYTE, REG_V9, REG_R5);
+ // using the optional conversion specifier
+ theEmitter->emitIns_R_R(INS_fmov, EA_8BYTE, REG_V10, REG_R6, INS_OPTS_8BYTE_TO_D);
+ theEmitter->emitIns_R_R(INS_fmov, EA_4BYTE, REG_V11, REG_R7, INS_OPTS_4BYTE_TO_S);
+
+ // fcmp/fcmpe
+ theEmitter->emitIns_R_R(INS_fcmp, EA_8BYTE, REG_V8, REG_V16);
+ theEmitter->emitIns_R_R(INS_fcmp, EA_4BYTE, REG_V9, REG_V17);
+ theEmitter->emitIns_R_R(INS_fcmpe, EA_8BYTE, REG_V10, REG_V18);
+ theEmitter->emitIns_R_R(INS_fcmpe, EA_4BYTE, REG_V11, REG_V19);
+
+ // fcvt
+ theEmitter->emitIns_R_R(INS_fcvt, EA_8BYTE, REG_V24, REG_V25, INS_OPTS_S_TO_D); // Single to Double
+ theEmitter->emitIns_R_R(INS_fcvt, EA_4BYTE, REG_V26, REG_V27, INS_OPTS_D_TO_S); // Double to Single
+
+ theEmitter->emitIns_R_R(INS_fcvt, EA_4BYTE, REG_V1, REG_V2, INS_OPTS_H_TO_S);
+ theEmitter->emitIns_R_R(INS_fcvt, EA_8BYTE, REG_V3, REG_V4, INS_OPTS_H_TO_D);
+
+ theEmitter->emitIns_R_R(INS_fcvt, EA_2BYTE, REG_V5, REG_V6, INS_OPTS_S_TO_H);
+ theEmitter->emitIns_R_R(INS_fcvt, EA_2BYTE, REG_V7, REG_V8, INS_OPTS_D_TO_H);
+
+#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
+ //
+ // R_R floating point conversions
+ //
+
+ // fcvtas scalar
+ theEmitter->emitIns_R_R(INS_fcvtas, EA_4BYTE, REG_V0, REG_V1);
+ theEmitter->emitIns_R_R(INS_fcvtas, EA_8BYTE, REG_V2, REG_V3);
+
+ // fcvtas scalar to general
+ theEmitter->emitIns_R_R(INS_fcvtas, EA_4BYTE, REG_R0, REG_V4, INS_OPTS_S_TO_4BYTE);
+ theEmitter->emitIns_R_R(INS_fcvtas, EA_4BYTE, REG_R1, REG_V5, INS_OPTS_D_TO_4BYTE);
+ theEmitter->emitIns_R_R(INS_fcvtas, EA_8BYTE, REG_R2, REG_V6, INS_OPTS_S_TO_8BYTE);
+ theEmitter->emitIns_R_R(INS_fcvtas, EA_8BYTE, REG_R3, REG_V7, INS_OPTS_D_TO_8BYTE);
+
+ // fcvtas vector
+ theEmitter->emitIns_R_R(INS_fcvtas, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_2S);
+ theEmitter->emitIns_R_R(INS_fcvtas, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S);
+ theEmitter->emitIns_R_R(INS_fcvtas, EA_16BYTE, REG_V12, REG_V13, INS_OPTS_2D);
+
+ // fcvtau scalar
+ theEmitter->emitIns_R_R(INS_fcvtau, EA_4BYTE, REG_V0, REG_V1);
+ theEmitter->emitIns_R_R(INS_fcvtau, EA_8BYTE, REG_V2, REG_V3);
+
+ // fcvtau scalar to general
+ theEmitter->emitIns_R_R(INS_fcvtau, EA_4BYTE, REG_R0, REG_V4, INS_OPTS_S_TO_4BYTE);
+ theEmitter->emitIns_R_R(INS_fcvtau, EA_4BYTE, REG_R1, REG_V5, INS_OPTS_D_TO_4BYTE);
+ theEmitter->emitIns_R_R(INS_fcvtau, EA_8BYTE, REG_R2, REG_V6, INS_OPTS_S_TO_8BYTE);
+ theEmitter->emitIns_R_R(INS_fcvtau, EA_8BYTE, REG_R3, REG_V7, INS_OPTS_D_TO_8BYTE);
+
+ // fcvtau vector
+ theEmitter->emitIns_R_R(INS_fcvtau, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_2S);
+ theEmitter->emitIns_R_R(INS_fcvtau, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S);
+ theEmitter->emitIns_R_R(INS_fcvtau, EA_16BYTE, REG_V12, REG_V13, INS_OPTS_2D);
+
+ ////////////////////////////////////////////////////////////////////////////////
+
+ // fcvtms scalar
+ theEmitter->emitIns_R_R(INS_fcvtms, EA_4BYTE, REG_V0, REG_V1);
+ theEmitter->emitIns_R_R(INS_fcvtms, EA_8BYTE, REG_V2, REG_V3);
+
+ // fcvtms scalar to general
+ theEmitter->emitIns_R_R(INS_fcvtms, EA_4BYTE, REG_R0, REG_V4, INS_OPTS_S_TO_4BYTE);
+ theEmitter->emitIns_R_R(INS_fcvtms, EA_4BYTE, REG_R1, REG_V5, INS_OPTS_D_TO_4BYTE);
+ theEmitter->emitIns_R_R(INS_fcvtms, EA_8BYTE, REG_R2, REG_V6, INS_OPTS_S_TO_8BYTE);
+ theEmitter->emitIns_R_R(INS_fcvtms, EA_8BYTE, REG_R3, REG_V7, INS_OPTS_D_TO_8BYTE);
+
+ // fcvtms vector
+ theEmitter->emitIns_R_R(INS_fcvtms, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_2S);
+ theEmitter->emitIns_R_R(INS_fcvtms, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S);
+ theEmitter->emitIns_R_R(INS_fcvtms, EA_16BYTE, REG_V12, REG_V13, INS_OPTS_2D);
+
+ // fcvtmu scalar
+ theEmitter->emitIns_R_R(INS_fcvtmu, EA_4BYTE, REG_V0, REG_V1);
+ theEmitter->emitIns_R_R(INS_fcvtmu, EA_8BYTE, REG_V2, REG_V3);
+
+ // fcvtmu scalar to general
+ theEmitter->emitIns_R_R(INS_fcvtmu, EA_4BYTE, REG_R0, REG_V4, INS_OPTS_S_TO_4BYTE);
+ theEmitter->emitIns_R_R(INS_fcvtmu, EA_4BYTE, REG_R1, REG_V5, INS_OPTS_D_TO_4BYTE);
+ theEmitter->emitIns_R_R(INS_fcvtmu, EA_8BYTE, REG_R2, REG_V6, INS_OPTS_S_TO_8BYTE);
+ theEmitter->emitIns_R_R(INS_fcvtmu, EA_8BYTE, REG_R3, REG_V7, INS_OPTS_D_TO_8BYTE);
+
+ // fcvtmu vector
+ theEmitter->emitIns_R_R(INS_fcvtmu, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_2S);
+ theEmitter->emitIns_R_R(INS_fcvtmu, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S);
+ theEmitter->emitIns_R_R(INS_fcvtmu, EA_16BYTE, REG_V12, REG_V13, INS_OPTS_2D);
+
+ ////////////////////////////////////////////////////////////////////////////////
+
+ // fcvtns scalar
+ theEmitter->emitIns_R_R(INS_fcvtns, EA_4BYTE, REG_V0, REG_V1);
+ theEmitter->emitIns_R_R(INS_fcvtns, EA_8BYTE, REG_V2, REG_V3);
+
+ // fcvtns scalar to general
+ theEmitter->emitIns_R_R(INS_fcvtns, EA_4BYTE, REG_R0, REG_V4, INS_OPTS_S_TO_4BYTE);
+ theEmitter->emitIns_R_R(INS_fcvtns, EA_4BYTE, REG_R1, REG_V5, INS_OPTS_D_TO_4BYTE);
+ theEmitter->emitIns_R_R(INS_fcvtns, EA_8BYTE, REG_R2, REG_V6, INS_OPTS_S_TO_8BYTE);
+ theEmitter->emitIns_R_R(INS_fcvtns, EA_8BYTE, REG_R3, REG_V7, INS_OPTS_D_TO_8BYTE);
+
+ // fcvtns vector
+ theEmitter->emitIns_R_R(INS_fcvtns, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_2S);
+ theEmitter->emitIns_R_R(INS_fcvtns, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S);
+ theEmitter->emitIns_R_R(INS_fcvtns, EA_16BYTE, REG_V12, REG_V13, INS_OPTS_2D);
+
+ // fcvtnu scalar
+ theEmitter->emitIns_R_R(INS_fcvtnu, EA_4BYTE, REG_V0, REG_V1);
+ theEmitter->emitIns_R_R(INS_fcvtnu, EA_8BYTE, REG_V2, REG_V3);
+
+ // fcvtnu scalar to general
+ theEmitter->emitIns_R_R(INS_fcvtnu, EA_4BYTE, REG_R0, REG_V4, INS_OPTS_S_TO_4BYTE);
+ theEmitter->emitIns_R_R(INS_fcvtnu, EA_4BYTE, REG_R1, REG_V5, INS_OPTS_D_TO_4BYTE);
+ theEmitter->emitIns_R_R(INS_fcvtnu, EA_8BYTE, REG_R2, REG_V6, INS_OPTS_S_TO_8BYTE);
+ theEmitter->emitIns_R_R(INS_fcvtnu, EA_8BYTE, REG_R3, REG_V7, INS_OPTS_D_TO_8BYTE);
+
+ // fcvtnu vector
+ theEmitter->emitIns_R_R(INS_fcvtnu, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_2S);
+ theEmitter->emitIns_R_R(INS_fcvtnu, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S);
+ theEmitter->emitIns_R_R(INS_fcvtnu, EA_16BYTE, REG_V12, REG_V13, INS_OPTS_2D);
+
+ ////////////////////////////////////////////////////////////////////////////////
+
+ // fcvtps scalar
+ theEmitter->emitIns_R_R(INS_fcvtps, EA_4BYTE, REG_V0, REG_V1);
+ theEmitter->emitIns_R_R(INS_fcvtps, EA_8BYTE, REG_V2, REG_V3);
+
+ // fcvtps scalar to general
+ theEmitter->emitIns_R_R(INS_fcvtps, EA_4BYTE, REG_R0, REG_V4, INS_OPTS_S_TO_4BYTE);
+ theEmitter->emitIns_R_R(INS_fcvtps, EA_4BYTE, REG_R1, REG_V5, INS_OPTS_D_TO_4BYTE);
+ theEmitter->emitIns_R_R(INS_fcvtps, EA_8BYTE, REG_R2, REG_V6, INS_OPTS_S_TO_8BYTE);
+ theEmitter->emitIns_R_R(INS_fcvtps, EA_8BYTE, REG_R3, REG_V7, INS_OPTS_D_TO_8BYTE);
+
+ // fcvtps vector
+ theEmitter->emitIns_R_R(INS_fcvtps, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_2S);
+ theEmitter->emitIns_R_R(INS_fcvtps, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S);
+ theEmitter->emitIns_R_R(INS_fcvtps, EA_16BYTE, REG_V12, REG_V13, INS_OPTS_2D);
+
+ // fcvtpu scalar
+ theEmitter->emitIns_R_R(INS_fcvtpu, EA_4BYTE, REG_V0, REG_V1);
+ theEmitter->emitIns_R_R(INS_fcvtpu, EA_8BYTE, REG_V2, REG_V3);
+
+ // fcvtpu scalar to general
+ theEmitter->emitIns_R_R(INS_fcvtpu, EA_4BYTE, REG_R0, REG_V4, INS_OPTS_S_TO_4BYTE);
+ theEmitter->emitIns_R_R(INS_fcvtpu, EA_4BYTE, REG_R1, REG_V5, INS_OPTS_D_TO_4BYTE);
+ theEmitter->emitIns_R_R(INS_fcvtpu, EA_8BYTE, REG_R2, REG_V6, INS_OPTS_S_TO_8BYTE);
+ theEmitter->emitIns_R_R(INS_fcvtpu, EA_8BYTE, REG_R3, REG_V7, INS_OPTS_D_TO_8BYTE);
+
+ // fcvtpu vector
+ theEmitter->emitIns_R_R(INS_fcvtpu, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_2S);
+ theEmitter->emitIns_R_R(INS_fcvtpu, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S);
+ theEmitter->emitIns_R_R(INS_fcvtpu, EA_16BYTE, REG_V12, REG_V13, INS_OPTS_2D);
+
+ ////////////////////////////////////////////////////////////////////////////////
+
+ // fcvtzs scalar
+ theEmitter->emitIns_R_R(INS_fcvtzs, EA_4BYTE, REG_V0, REG_V1);
+ theEmitter->emitIns_R_R(INS_fcvtzs, EA_8BYTE, REG_V2, REG_V3);
+
+ // fcvtzs scalar to general
+ theEmitter->emitIns_R_R(INS_fcvtzs, EA_4BYTE, REG_R0, REG_V4, INS_OPTS_S_TO_4BYTE);
+ theEmitter->emitIns_R_R(INS_fcvtzs, EA_4BYTE, REG_R1, REG_V5, INS_OPTS_D_TO_4BYTE);
+ theEmitter->emitIns_R_R(INS_fcvtzs, EA_8BYTE, REG_R2, REG_V6, INS_OPTS_S_TO_8BYTE);
+ theEmitter->emitIns_R_R(INS_fcvtzs, EA_8BYTE, REG_R3, REG_V7, INS_OPTS_D_TO_8BYTE);
+
+ // fcvtzs vector
+ theEmitter->emitIns_R_R(INS_fcvtzs, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_2S);
+ theEmitter->emitIns_R_R(INS_fcvtzs, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S);
+ theEmitter->emitIns_R_R(INS_fcvtzs, EA_16BYTE, REG_V12, REG_V13, INS_OPTS_2D);
+
+ // fcvtzu scalar
+ theEmitter->emitIns_R_R(INS_fcvtzu, EA_4BYTE, REG_V0, REG_V1);
+ theEmitter->emitIns_R_R(INS_fcvtzu, EA_8BYTE, REG_V2, REG_V3);
+
+ // fcvtzu scalar to general
+ theEmitter->emitIns_R_R(INS_fcvtzu, EA_4BYTE, REG_R0, REG_V4, INS_OPTS_S_TO_4BYTE);
+ theEmitter->emitIns_R_R(INS_fcvtzu, EA_4BYTE, REG_R1, REG_V5, INS_OPTS_D_TO_4BYTE);
+ theEmitter->emitIns_R_R(INS_fcvtzu, EA_8BYTE, REG_R2, REG_V6, INS_OPTS_S_TO_8BYTE);
+ theEmitter->emitIns_R_R(INS_fcvtzu, EA_8BYTE, REG_R3, REG_V7, INS_OPTS_D_TO_8BYTE);
+
+ // fcvtzu vector
+ theEmitter->emitIns_R_R(INS_fcvtzu, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_2S);
+ theEmitter->emitIns_R_R(INS_fcvtzu, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S);
+ theEmitter->emitIns_R_R(INS_fcvtzu, EA_16BYTE, REG_V12, REG_V13, INS_OPTS_2D);
+
+ ////////////////////////////////////////////////////////////////////////////////
+
+ // scvtf scalar
+ theEmitter->emitIns_R_R(INS_scvtf, EA_4BYTE, REG_V0, REG_V1);
+ theEmitter->emitIns_R_R(INS_scvtf, EA_8BYTE, REG_V2, REG_V3);
+
+ // scvtf scalar from general
+ theEmitter->emitIns_R_R(INS_scvtf, EA_4BYTE, REG_V4, REG_R0, INS_OPTS_4BYTE_TO_S);
+ theEmitter->emitIns_R_R(INS_scvtf, EA_4BYTE, REG_V5, REG_R1, INS_OPTS_8BYTE_TO_S);
+ theEmitter->emitIns_R_R(INS_scvtf, EA_8BYTE, REG_V6, REG_R2, INS_OPTS_4BYTE_TO_D);
+ theEmitter->emitIns_R_R(INS_scvtf, EA_8BYTE, REG_V7, REG_R3, INS_OPTS_8BYTE_TO_D);
+
+ // scvtf vector
+ theEmitter->emitIns_R_R(INS_scvtf, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_2S);
+ theEmitter->emitIns_R_R(INS_scvtf, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S);
+ theEmitter->emitIns_R_R(INS_scvtf, EA_16BYTE, REG_V12, REG_V13, INS_OPTS_2D);
+
+ // ucvtf scalar
+ theEmitter->emitIns_R_R(INS_ucvtf, EA_4BYTE, REG_V0, REG_V1);
+ theEmitter->emitIns_R_R(INS_ucvtf, EA_8BYTE, REG_V2, REG_V3);
+
+ // ucvtf scalar from general
+ theEmitter->emitIns_R_R(INS_ucvtf, EA_4BYTE, REG_V4, REG_R0, INS_OPTS_4BYTE_TO_S);
+ theEmitter->emitIns_R_R(INS_ucvtf, EA_4BYTE, REG_V5, REG_R1, INS_OPTS_8BYTE_TO_S);
+ theEmitter->emitIns_R_R(INS_ucvtf, EA_8BYTE, REG_V6, REG_R2, INS_OPTS_4BYTE_TO_D);
+ theEmitter->emitIns_R_R(INS_ucvtf, EA_8BYTE, REG_V7, REG_R3, INS_OPTS_8BYTE_TO_D);
+
+ // ucvtf vector
+ theEmitter->emitIns_R_R(INS_ucvtf, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_2S);
+ theEmitter->emitIns_R_R(INS_ucvtf, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S);
+ theEmitter->emitIns_R_R(INS_ucvtf, EA_16BYTE, REG_V12, REG_V13, INS_OPTS_2D);
+
+#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
+ //
+ // R_R floating point operations, one dest, one source
+ //
+
+ // fabs scalar
+ theEmitter->emitIns_R_R(INS_fabs, EA_4BYTE, REG_V0, REG_V1);
+ theEmitter->emitIns_R_R(INS_fabs, EA_8BYTE, REG_V2, REG_V3);
+
+ // fabs vector
+ theEmitter->emitIns_R_R(INS_fabs, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_2S);
+ theEmitter->emitIns_R_R(INS_fabs, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_4S);
+ theEmitter->emitIns_R_R(INS_fabs, EA_16BYTE, REG_V8, REG_V9, INS_OPTS_2D);
+
+ // fneg scalar
+ theEmitter->emitIns_R_R(INS_fneg, EA_4BYTE, REG_V0, REG_V1);
+ theEmitter->emitIns_R_R(INS_fneg, EA_8BYTE, REG_V2, REG_V3);
+
+ // fneg vector
+ theEmitter->emitIns_R_R(INS_fneg, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_2S);
+ theEmitter->emitIns_R_R(INS_fneg, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_4S);
+ theEmitter->emitIns_R_R(INS_fneg, EA_16BYTE, REG_V8, REG_V9, INS_OPTS_2D);
+
+ // fsqrt scalar
+ theEmitter->emitIns_R_R(INS_fsqrt, EA_4BYTE, REG_V0, REG_V1);
+ theEmitter->emitIns_R_R(INS_fsqrt, EA_8BYTE, REG_V2, REG_V3);
+
+ // fsqrt vector
+ theEmitter->emitIns_R_R(INS_fsqrt, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_2S);
+ theEmitter->emitIns_R_R(INS_fsqrt, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_4S);
+ theEmitter->emitIns_R_R(INS_fsqrt, EA_16BYTE, REG_V8, REG_V9, INS_OPTS_2D);
+
+ genDefineTempLabel(genCreateTempLabel());
+
+ // abs scalar
+ theEmitter->emitIns_R_R(INS_abs, EA_8BYTE, REG_V2, REG_V3);
+
+ // abs vector
+ theEmitter->emitIns_R_R(INS_abs, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_8B);
+ theEmitter->emitIns_R_R(INS_abs, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_16B);
+ theEmitter->emitIns_R_R(INS_abs, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_4H);
+ theEmitter->emitIns_R_R(INS_abs, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_8H);
+ theEmitter->emitIns_R_R(INS_abs, EA_8BYTE, REG_V12, REG_V13, INS_OPTS_2S);
+ theEmitter->emitIns_R_R(INS_abs, EA_16BYTE, REG_V14, REG_V15, INS_OPTS_4S);
+ theEmitter->emitIns_R_R(INS_abs, EA_16BYTE, REG_V16, REG_V17, INS_OPTS_2D);
+
+ // neg scalar
+ theEmitter->emitIns_R_R(INS_neg, EA_8BYTE, REG_V2, REG_V3);
+
+ // neg vector
+ theEmitter->emitIns_R_R(INS_neg, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_8B);
+ theEmitter->emitIns_R_R(INS_neg, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_16B);
+ theEmitter->emitIns_R_R(INS_neg, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_4H);
+ theEmitter->emitIns_R_R(INS_neg, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_8H);
+ theEmitter->emitIns_R_R(INS_neg, EA_8BYTE, REG_V12, REG_V13, INS_OPTS_2S);
+ theEmitter->emitIns_R_R(INS_neg, EA_16BYTE, REG_V14, REG_V15, INS_OPTS_4S);
+ theEmitter->emitIns_R_R(INS_neg, EA_16BYTE, REG_V16, REG_V17, INS_OPTS_2D);
+
+ // mvn vector
+ theEmitter->emitIns_R_R(INS_mvn, EA_8BYTE, REG_V4, REG_V5);
+ theEmitter->emitIns_R_R(INS_mvn, EA_8BYTE, REG_V6, REG_V7, INS_OPTS_8B);
+ theEmitter->emitIns_R_R(INS_mvn, EA_16BYTE, REG_V8, REG_V9);
+ theEmitter->emitIns_R_R(INS_mvn, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_16B);
+
+ // cnt vector
+ theEmitter->emitIns_R_R(INS_cnt, EA_8BYTE, REG_V22, REG_V23, INS_OPTS_8B);
+ theEmitter->emitIns_R_R(INS_cnt, EA_16BYTE, REG_V24, REG_V25, INS_OPTS_16B);
+
+ // not vector (the same encoding as mvn)
+ theEmitter->emitIns_R_R(INS_not, EA_8BYTE, REG_V12, REG_V13);
+ theEmitter->emitIns_R_R(INS_not, EA_8BYTE, REG_V14, REG_V15, INS_OPTS_8B);
+ theEmitter->emitIns_R_R(INS_not, EA_16BYTE, REG_V16, REG_V17);
+ theEmitter->emitIns_R_R(INS_not, EA_16BYTE, REG_V18, REG_V19, INS_OPTS_16B);
+
+ // cls vector
+ theEmitter->emitIns_R_R(INS_cls, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_8B);
+ theEmitter->emitIns_R_R(INS_cls, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_16B);
+ theEmitter->emitIns_R_R(INS_cls, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_4H);
+ theEmitter->emitIns_R_R(INS_cls, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_8H);
+ theEmitter->emitIns_R_R(INS_cls, EA_8BYTE, REG_V12, REG_V13, INS_OPTS_2S);
+ theEmitter->emitIns_R_R(INS_cls, EA_16BYTE, REG_V14, REG_V15, INS_OPTS_4S);
+
+ // clz vector
+ theEmitter->emitIns_R_R(INS_clz, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_8B);
+ theEmitter->emitIns_R_R(INS_clz, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_16B);
+ theEmitter->emitIns_R_R(INS_clz, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_4H);
+ theEmitter->emitIns_R_R(INS_clz, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_8H);
+ theEmitter->emitIns_R_R(INS_clz, EA_8BYTE, REG_V12, REG_V13, INS_OPTS_2S);
+ theEmitter->emitIns_R_R(INS_clz, EA_16BYTE, REG_V14, REG_V15, INS_OPTS_4S);
+
+ // rbit vector
+ theEmitter->emitIns_R_R(INS_rbit, EA_8BYTE, REG_V0, REG_V1, INS_OPTS_8B);
+ theEmitter->emitIns_R_R(INS_rbit, EA_16BYTE, REG_V2, REG_V3, INS_OPTS_16B);
+
+ // rev16 vector
+ theEmitter->emitIns_R_R(INS_rev16, EA_8BYTE, REG_V0, REG_V1, INS_OPTS_8B);
+ theEmitter->emitIns_R_R(INS_rev16, EA_16BYTE, REG_V2, REG_V3, INS_OPTS_16B);
+
+ // rev32 vector
+ theEmitter->emitIns_R_R(INS_rev32, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_8B);
+ theEmitter->emitIns_R_R(INS_rev32, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_16B);
+ theEmitter->emitIns_R_R(INS_rev32, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_4H);
+ theEmitter->emitIns_R_R(INS_rev32, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_8H);
+
+ // rev64 vector
+ theEmitter->emitIns_R_R(INS_rev64, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_8B);
+ theEmitter->emitIns_R_R(INS_rev64, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_16B);
+ theEmitter->emitIns_R_R(INS_rev64, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_4H);
+ theEmitter->emitIns_R_R(INS_rev64, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_8H);
+ theEmitter->emitIns_R_R(INS_rev64, EA_8BYTE, REG_V12, REG_V13, INS_OPTS_2S);
+ theEmitter->emitIns_R_R(INS_rev64, EA_16BYTE, REG_V14, REG_V15, INS_OPTS_4S);
+
+#endif
+
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
+ //
+ // R_R floating point round to int, one dest, one source
+ //
+
+ // frinta scalar
+ theEmitter->emitIns_R_R(INS_frinta, EA_4BYTE, REG_V0, REG_V1);
+ theEmitter->emitIns_R_R(INS_frinta, EA_8BYTE, REG_V2, REG_V3);
+
+ // frinta vector
+ theEmitter->emitIns_R_R(INS_frinta, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_2S);
+ theEmitter->emitIns_R_R(INS_frinta, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_4S);
+ theEmitter->emitIns_R_R(INS_frinta, EA_16BYTE, REG_V8, REG_V9, INS_OPTS_2D);
+
+ // frinti scalar
+ theEmitter->emitIns_R_R(INS_frinti, EA_4BYTE, REG_V0, REG_V1);
+ theEmitter->emitIns_R_R(INS_frinti, EA_8BYTE, REG_V2, REG_V3);
+
+ // frinti vector
+ theEmitter->emitIns_R_R(INS_frinti, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_2S);
+ theEmitter->emitIns_R_R(INS_frinti, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_4S);
+ theEmitter->emitIns_R_R(INS_frinti, EA_16BYTE, REG_V8, REG_V9, INS_OPTS_2D);
+
+ // frintm scalar
+ theEmitter->emitIns_R_R(INS_frintm, EA_4BYTE, REG_V0, REG_V1);
+ theEmitter->emitIns_R_R(INS_frintm, EA_8BYTE, REG_V2, REG_V3);
+
+ // frintm vector
+ theEmitter->emitIns_R_R(INS_frintm, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_2S);
+ theEmitter->emitIns_R_R(INS_frintm, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_4S);
+ theEmitter->emitIns_R_R(INS_frintm, EA_16BYTE, REG_V8, REG_V9, INS_OPTS_2D);
+
+ // frintn scalar
+ theEmitter->emitIns_R_R(INS_frintn, EA_4BYTE, REG_V0, REG_V1);
+ theEmitter->emitIns_R_R(INS_frintn, EA_8BYTE, REG_V2, REG_V3);
+
+ // frintn vector
+ theEmitter->emitIns_R_R(INS_frintn, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_2S);
+ theEmitter->emitIns_R_R(INS_frintn, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_4S);
+ theEmitter->emitIns_R_R(INS_frintn, EA_16BYTE, REG_V8, REG_V9, INS_OPTS_2D);
+
+ // frintp scalar
+ theEmitter->emitIns_R_R(INS_frintp, EA_4BYTE, REG_V0, REG_V1);
+ theEmitter->emitIns_R_R(INS_frintp, EA_8BYTE, REG_V2, REG_V3);
+
+ // frintp vector
+ theEmitter->emitIns_R_R(INS_frintp, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_2S);
+ theEmitter->emitIns_R_R(INS_frintp, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_4S);
+ theEmitter->emitIns_R_R(INS_frintp, EA_16BYTE, REG_V8, REG_V9, INS_OPTS_2D);
+
+ // frintx scalar
+ theEmitter->emitIns_R_R(INS_frintx, EA_4BYTE, REG_V0, REG_V1);
+ theEmitter->emitIns_R_R(INS_frintx, EA_8BYTE, REG_V2, REG_V3);
+
+ // frintx vector
+ theEmitter->emitIns_R_R(INS_frintx, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_2S);
+ theEmitter->emitIns_R_R(INS_frintx, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_4S);
+ theEmitter->emitIns_R_R(INS_frintx, EA_16BYTE, REG_V8, REG_V9, INS_OPTS_2D);
+
+ // frintz scalar
+ theEmitter->emitIns_R_R(INS_frintz, EA_4BYTE, REG_V0, REG_V1);
+ theEmitter->emitIns_R_R(INS_frintz, EA_8BYTE, REG_V2, REG_V3);
+
+ // frintz vector
+ theEmitter->emitIns_R_R(INS_frintz, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_2S);
+ theEmitter->emitIns_R_R(INS_frintz, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_4S);
+ theEmitter->emitIns_R_R(INS_frintz, EA_16BYTE, REG_V8, REG_V9, INS_OPTS_2D);
+
+#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
+ //
+ // R_R_R floating point operations, one dest, two source
+ //
+
+ genDefineTempLabel(genCreateTempLabel());
+
+ theEmitter->emitIns_R_R_R(INS_fadd, EA_4BYTE, REG_V0, REG_V1, REG_V2); // scalar 4BYTE
+ theEmitter->emitIns_R_R_R(INS_fadd, EA_8BYTE, REG_V3, REG_V4, REG_V5); // scalar 8BYTE
+ theEmitter->emitIns_R_R_R(INS_fadd, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S);
+ theEmitter->emitIns_R_R_R(INS_fadd, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_4S);
+ theEmitter->emitIns_R_R_R(INS_fadd, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2D);
+
+ theEmitter->emitIns_R_R_R(INS_fsub, EA_4BYTE, REG_V0, REG_V1, REG_V2); // scalar 4BYTE
+ theEmitter->emitIns_R_R_R(INS_fsub, EA_8BYTE, REG_V3, REG_V4, REG_V5); // scalar 8BYTE
+ theEmitter->emitIns_R_R_R(INS_fsub, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S);
+ theEmitter->emitIns_R_R_R(INS_fsub, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_4S);
+ theEmitter->emitIns_R_R_R(INS_fsub, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2D);
+
+ theEmitter->emitIns_R_R_R(INS_fdiv, EA_4BYTE, REG_V0, REG_V1, REG_V2); // scalar 4BYTE
+ theEmitter->emitIns_R_R_R(INS_fdiv, EA_8BYTE, REG_V3, REG_V4, REG_V5); // scalar 8BYTE
+ theEmitter->emitIns_R_R_R(INS_fdiv, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S);
+ theEmitter->emitIns_R_R_R(INS_fdiv, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_4S);
+ theEmitter->emitIns_R_R_R(INS_fdiv, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2D);
+
+ theEmitter->emitIns_R_R_R(INS_fmax, EA_4BYTE, REG_V0, REG_V1, REG_V2); // scalar 4BYTE
+ theEmitter->emitIns_R_R_R(INS_fmax, EA_8BYTE, REG_V3, REG_V4, REG_V5); // scalar 8BYTE
+ theEmitter->emitIns_R_R_R(INS_fmax, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S);
+ theEmitter->emitIns_R_R_R(INS_fmax, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_4S);
+ theEmitter->emitIns_R_R_R(INS_fmax, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2D);
+
+ theEmitter->emitIns_R_R_R(INS_fmin, EA_4BYTE, REG_V0, REG_V1, REG_V2); // scalar 4BYTE
+ theEmitter->emitIns_R_R_R(INS_fmin, EA_8BYTE, REG_V3, REG_V4, REG_V5); // scalar 8BYTE
+ theEmitter->emitIns_R_R_R(INS_fmin, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S);
+ theEmitter->emitIns_R_R_R(INS_fmin, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_4S);
+ theEmitter->emitIns_R_R_R(INS_fmin, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2D);
+
+ // fabd
+ theEmitter->emitIns_R_R_R(INS_fabd, EA_4BYTE, REG_V0, REG_V1, REG_V2); // scalar 4BYTE
+ theEmitter->emitIns_R_R_R(INS_fabd, EA_8BYTE, REG_V3, REG_V4, REG_V5); // scalar 8BYTE
+ theEmitter->emitIns_R_R_R(INS_fabd, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S);
+ theEmitter->emitIns_R_R_R(INS_fabd, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_4S);
+ theEmitter->emitIns_R_R_R(INS_fabd, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2D);
+
+ genDefineTempLabel(genCreateTempLabel());
+
+ theEmitter->emitIns_R_R_R(INS_fmul, EA_4BYTE, REG_V0, REG_V1, REG_V2); // scalar 4BYTE
+ theEmitter->emitIns_R_R_R(INS_fmul, EA_8BYTE, REG_V3, REG_V4, REG_V5); // scalar 8BYTE
+ theEmitter->emitIns_R_R_R(INS_fmul, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S);
+ theEmitter->emitIns_R_R_R(INS_fmul, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_4S);
+ theEmitter->emitIns_R_R_R(INS_fmul, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2D);
+
+ theEmitter->emitIns_R_R_R_I(INS_fmul, EA_4BYTE, REG_V15, REG_V16, REG_V17, 3); // scalar by elem 4BYTE
+ theEmitter->emitIns_R_R_R_I(INS_fmul, EA_8BYTE, REG_V18, REG_V19, REG_V20, 1); // scalar by elem 8BYTE
+ theEmitter->emitIns_R_R_R_I(INS_fmul, EA_8BYTE, REG_V21, REG_V22, REG_V23, 0, INS_OPTS_2S);
+ theEmitter->emitIns_R_R_R_I(INS_fmul, EA_16BYTE, REG_V24, REG_V25, REG_V26, 2, INS_OPTS_4S);
+ theEmitter->emitIns_R_R_R_I(INS_fmul, EA_16BYTE, REG_V27, REG_V28, REG_V29, 0, INS_OPTS_2D);
+
+ theEmitter->emitIns_R_R_R(INS_fmulx, EA_4BYTE, REG_V0, REG_V1, REG_V2); // scalar 4BYTE
+ theEmitter->emitIns_R_R_R(INS_fmulx, EA_8BYTE, REG_V3, REG_V4, REG_V5); // scalar 8BYTE
+ theEmitter->emitIns_R_R_R(INS_fmulx, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S);
+ theEmitter->emitIns_R_R_R(INS_fmulx, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_4S);
+ theEmitter->emitIns_R_R_R(INS_fmulx, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2D);
+
+ theEmitter->emitIns_R_R_R_I(INS_fmulx, EA_4BYTE, REG_V15, REG_V16, REG_V17, 3); // scalar by elem 4BYTE
+ theEmitter->emitIns_R_R_R_I(INS_fmulx, EA_8BYTE, REG_V18, REG_V19, REG_V20, 1); // scalar by elem 8BYTE
+ theEmitter->emitIns_R_R_R_I(INS_fmulx, EA_8BYTE, REG_V21, REG_V22, REG_V23, 0, INS_OPTS_2S);
+ theEmitter->emitIns_R_R_R_I(INS_fmulx, EA_16BYTE, REG_V24, REG_V25, REG_V26, 2, INS_OPTS_4S);
+ theEmitter->emitIns_R_R_R_I(INS_fmulx, EA_16BYTE, REG_V27, REG_V28, REG_V29, 0, INS_OPTS_2D);
+
+ theEmitter->emitIns_R_R_R(INS_fnmul, EA_4BYTE, REG_V0, REG_V1, REG_V2); // scalar 4BYTE
+ theEmitter->emitIns_R_R_R(INS_fnmul, EA_8BYTE, REG_V3, REG_V4, REG_V5); // scalar 8BYTE
+
+#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
+ //
+ // R_R_I vector operations, one dest, one source reg, one immed
+ //
+
+ genDefineTempLabel(genCreateTempLabel());
+
+ // 'sshr' scalar
+ theEmitter->emitIns_R_R_I(INS_sshr, EA_8BYTE, REG_V0, REG_V1, 1);
+ theEmitter->emitIns_R_R_I(INS_sshr, EA_8BYTE, REG_V2, REG_V3, 14);
+ theEmitter->emitIns_R_R_I(INS_sshr, EA_8BYTE, REG_V4, REG_V5, 27);
+ theEmitter->emitIns_R_R_I(INS_sshr, EA_8BYTE, REG_V6, REG_V7, 40);
+ theEmitter->emitIns_R_R_I(INS_sshr, EA_8BYTE, REG_V8, REG_V9, 63);
+
+ // 'sshr' vector
+ theEmitter->emitIns_R_R_I(INS_sshr, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B);
+ theEmitter->emitIns_R_R_I(INS_sshr, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B);
+ theEmitter->emitIns_R_R_I(INS_sshr, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H);
+ theEmitter->emitIns_R_R_I(INS_sshr, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H);
+ theEmitter->emitIns_R_R_I(INS_sshr, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S);
+ theEmitter->emitIns_R_R_I(INS_sshr, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S);
+ theEmitter->emitIns_R_R_I(INS_sshr, EA_16BYTE, REG_V12, REG_V13, 33, INS_OPTS_2D);
+ theEmitter->emitIns_R_R_I(INS_sshr, EA_16BYTE, REG_V14, REG_V15, 63, INS_OPTS_2D);
+
+ // 'ssra' scalar
+ theEmitter->emitIns_R_R_I(INS_ssra, EA_8BYTE, REG_V0, REG_V1, 1);
+ theEmitter->emitIns_R_R_I(INS_ssra, EA_8BYTE, REG_V2, REG_V3, 14);
+ theEmitter->emitIns_R_R_I(INS_ssra, EA_8BYTE, REG_V4, REG_V5, 27);
+ theEmitter->emitIns_R_R_I(INS_ssra, EA_8BYTE, REG_V6, REG_V7, 40);
+ theEmitter->emitIns_R_R_I(INS_ssra, EA_8BYTE, REG_V8, REG_V9, 63);
+
+ // 'ssra' vector
+ theEmitter->emitIns_R_R_I(INS_ssra, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B);
+ theEmitter->emitIns_R_R_I(INS_ssra, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B);
+ theEmitter->emitIns_R_R_I(INS_ssra, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H);
+ theEmitter->emitIns_R_R_I(INS_ssra, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H);
+ theEmitter->emitIns_R_R_I(INS_ssra, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S);
+ theEmitter->emitIns_R_R_I(INS_ssra, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S);
+ theEmitter->emitIns_R_R_I(INS_ssra, EA_16BYTE, REG_V12, REG_V13, 33, INS_OPTS_2D);
+ theEmitter->emitIns_R_R_I(INS_ssra, EA_16BYTE, REG_V14, REG_V15, 63, INS_OPTS_2D);
+
+ // 'srshr' scalar
+ theEmitter->emitIns_R_R_I(INS_srshr, EA_8BYTE, REG_V0, REG_V1, 1);
+ theEmitter->emitIns_R_R_I(INS_srshr, EA_8BYTE, REG_V2, REG_V3, 14);
+ theEmitter->emitIns_R_R_I(INS_srshr, EA_8BYTE, REG_V4, REG_V5, 27);
+ theEmitter->emitIns_R_R_I(INS_srshr, EA_8BYTE, REG_V6, REG_V7, 40);
+ theEmitter->emitIns_R_R_I(INS_srshr, EA_8BYTE, REG_V8, REG_V9, 63);
+
+ // 'srshr' vector
+ theEmitter->emitIns_R_R_I(INS_srshr, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B);
+ theEmitter->emitIns_R_R_I(INS_srshr, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B);
+ theEmitter->emitIns_R_R_I(INS_srshr, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H);
+ theEmitter->emitIns_R_R_I(INS_srshr, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H);
+ theEmitter->emitIns_R_R_I(INS_srshr, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S);
+ theEmitter->emitIns_R_R_I(INS_srshr, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S);
+ theEmitter->emitIns_R_R_I(INS_srshr, EA_16BYTE, REG_V12, REG_V13, 33, INS_OPTS_2D);
+ theEmitter->emitIns_R_R_I(INS_srshr, EA_16BYTE, REG_V14, REG_V15, 63, INS_OPTS_2D);
+
+ // 'srsra' scalar
+ theEmitter->emitIns_R_R_I(INS_srsra, EA_8BYTE, REG_V0, REG_V1, 1);
+ theEmitter->emitIns_R_R_I(INS_srsra, EA_8BYTE, REG_V2, REG_V3, 14);
+ theEmitter->emitIns_R_R_I(INS_srsra, EA_8BYTE, REG_V4, REG_V5, 27);
+ theEmitter->emitIns_R_R_I(INS_srsra, EA_8BYTE, REG_V6, REG_V7, 40);
+ theEmitter->emitIns_R_R_I(INS_srsra, EA_8BYTE, REG_V8, REG_V9, 63);
+
+ // 'srsra' vector
+ theEmitter->emitIns_R_R_I(INS_srsra, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B);
+ theEmitter->emitIns_R_R_I(INS_srsra, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B);
+ theEmitter->emitIns_R_R_I(INS_srsra, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H);
+ theEmitter->emitIns_R_R_I(INS_srsra, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H);
+ theEmitter->emitIns_R_R_I(INS_srsra, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S);
+ theEmitter->emitIns_R_R_I(INS_srsra, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S);
+ theEmitter->emitIns_R_R_I(INS_srsra, EA_16BYTE, REG_V12, REG_V13, 33, INS_OPTS_2D);
+ theEmitter->emitIns_R_R_I(INS_srsra, EA_16BYTE, REG_V14, REG_V15, 63, INS_OPTS_2D);
+
+ // 'shl' scalar
+ theEmitter->emitIns_R_R_I(INS_shl, EA_8BYTE, REG_V0, REG_V1, 1);
+ theEmitter->emitIns_R_R_I(INS_shl, EA_8BYTE, REG_V2, REG_V3, 14);
+ theEmitter->emitIns_R_R_I(INS_shl, EA_8BYTE, REG_V4, REG_V5, 27);
+ theEmitter->emitIns_R_R_I(INS_shl, EA_8BYTE, REG_V6, REG_V7, 40);
+ theEmitter->emitIns_R_R_I(INS_shl, EA_8BYTE, REG_V8, REG_V9, 63);
+
+ // 'shl' vector
+ theEmitter->emitIns_R_R_I(INS_shl, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B);
+ theEmitter->emitIns_R_R_I(INS_shl, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B);
+ theEmitter->emitIns_R_R_I(INS_shl, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H);
+ theEmitter->emitIns_R_R_I(INS_shl, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H);
+ theEmitter->emitIns_R_R_I(INS_shl, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S);
+ theEmitter->emitIns_R_R_I(INS_shl, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S);
+ theEmitter->emitIns_R_R_I(INS_shl, EA_16BYTE, REG_V12, REG_V13, 33, INS_OPTS_2D);
+ theEmitter->emitIns_R_R_I(INS_shl, EA_16BYTE, REG_V14, REG_V15, 63, INS_OPTS_2D);
+
+ // 'ushr' scalar
+ theEmitter->emitIns_R_R_I(INS_ushr, EA_8BYTE, REG_V0, REG_V1, 1);
+ theEmitter->emitIns_R_R_I(INS_ushr, EA_8BYTE, REG_V2, REG_V3, 14);
+ theEmitter->emitIns_R_R_I(INS_ushr, EA_8BYTE, REG_V4, REG_V5, 27);
+ theEmitter->emitIns_R_R_I(INS_ushr, EA_8BYTE, REG_V6, REG_V7, 40);
+ theEmitter->emitIns_R_R_I(INS_ushr, EA_8BYTE, REG_V8, REG_V9, 63);
+
+ // 'ushr' vector
+ theEmitter->emitIns_R_R_I(INS_ushr, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B);
+ theEmitter->emitIns_R_R_I(INS_ushr, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B);
+ theEmitter->emitIns_R_R_I(INS_ushr, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H);
+ theEmitter->emitIns_R_R_I(INS_ushr, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H);
+ theEmitter->emitIns_R_R_I(INS_ushr, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S);
+ theEmitter->emitIns_R_R_I(INS_ushr, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S);
+ theEmitter->emitIns_R_R_I(INS_ushr, EA_16BYTE, REG_V12, REG_V13, 33, INS_OPTS_2D);
+ theEmitter->emitIns_R_R_I(INS_ushr, EA_16BYTE, REG_V14, REG_V15, 63, INS_OPTS_2D);
+
+ // 'usra' scalar
+ theEmitter->emitIns_R_R_I(INS_usra, EA_8BYTE, REG_V0, REG_V1, 1);
+ theEmitter->emitIns_R_R_I(INS_usra, EA_8BYTE, REG_V2, REG_V3, 14);
+ theEmitter->emitIns_R_R_I(INS_usra, EA_8BYTE, REG_V4, REG_V5, 27);
+ theEmitter->emitIns_R_R_I(INS_usra, EA_8BYTE, REG_V6, REG_V7, 40);
+ theEmitter->emitIns_R_R_I(INS_usra, EA_8BYTE, REG_V8, REG_V9, 63);
+
+ // 'usra' vector
+ theEmitter->emitIns_R_R_I(INS_usra, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B);
+ theEmitter->emitIns_R_R_I(INS_usra, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B);
+ theEmitter->emitIns_R_R_I(INS_usra, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H);
+ theEmitter->emitIns_R_R_I(INS_usra, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H);
+ theEmitter->emitIns_R_R_I(INS_usra, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S);
+ theEmitter->emitIns_R_R_I(INS_usra, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S);
+ theEmitter->emitIns_R_R_I(INS_usra, EA_16BYTE, REG_V12, REG_V13, 33, INS_OPTS_2D);
+ theEmitter->emitIns_R_R_I(INS_usra, EA_16BYTE, REG_V14, REG_V15, 63, INS_OPTS_2D);
+
+ // 'urshr' scalar
+ theEmitter->emitIns_R_R_I(INS_urshr, EA_8BYTE, REG_V0, REG_V1, 1);
+ theEmitter->emitIns_R_R_I(INS_urshr, EA_8BYTE, REG_V2, REG_V3, 14);
+ theEmitter->emitIns_R_R_I(INS_urshr, EA_8BYTE, REG_V4, REG_V5, 27);
+ theEmitter->emitIns_R_R_I(INS_urshr, EA_8BYTE, REG_V6, REG_V7, 40);
+ theEmitter->emitIns_R_R_I(INS_urshr, EA_8BYTE, REG_V8, REG_V9, 63);
+
+ // 'urshr' vector
+ theEmitter->emitIns_R_R_I(INS_urshr, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B);
+ theEmitter->emitIns_R_R_I(INS_urshr, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B);
+ theEmitter->emitIns_R_R_I(INS_urshr, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H);
+ theEmitter->emitIns_R_R_I(INS_urshr, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H);
+ theEmitter->emitIns_R_R_I(INS_urshr, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S);
+ theEmitter->emitIns_R_R_I(INS_urshr, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S);
+ theEmitter->emitIns_R_R_I(INS_urshr, EA_16BYTE, REG_V12, REG_V13, 33, INS_OPTS_2D);
+ theEmitter->emitIns_R_R_I(INS_urshr, EA_16BYTE, REG_V14, REG_V15, 63, INS_OPTS_2D);
+
+ // 'ursra' scalar
+ theEmitter->emitIns_R_R_I(INS_ursra, EA_8BYTE, REG_V0, REG_V1, 1);
+ theEmitter->emitIns_R_R_I(INS_ursra, EA_8BYTE, REG_V2, REG_V3, 14);
+ theEmitter->emitIns_R_R_I(INS_ursra, EA_8BYTE, REG_V4, REG_V5, 27);
+ theEmitter->emitIns_R_R_I(INS_ursra, EA_8BYTE, REG_V6, REG_V7, 40);
+ theEmitter->emitIns_R_R_I(INS_ursra, EA_8BYTE, REG_V8, REG_V9, 63);
+
+ // 'srsra' vector
+ theEmitter->emitIns_R_R_I(INS_ursra, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B);
+ theEmitter->emitIns_R_R_I(INS_ursra, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B);
+ theEmitter->emitIns_R_R_I(INS_ursra, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H);
+ theEmitter->emitIns_R_R_I(INS_ursra, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H);
+ theEmitter->emitIns_R_R_I(INS_ursra, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S);
+ theEmitter->emitIns_R_R_I(INS_ursra, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S);
+ theEmitter->emitIns_R_R_I(INS_ursra, EA_16BYTE, REG_V12, REG_V13, 33, INS_OPTS_2D);
+ theEmitter->emitIns_R_R_I(INS_ursra, EA_16BYTE, REG_V14, REG_V15, 63, INS_OPTS_2D);
+
+ // 'sri' scalar
+ theEmitter->emitIns_R_R_I(INS_sri, EA_8BYTE, REG_V0, REG_V1, 1);
+ theEmitter->emitIns_R_R_I(INS_sri, EA_8BYTE, REG_V2, REG_V3, 14);
+ theEmitter->emitIns_R_R_I(INS_sri, EA_8BYTE, REG_V4, REG_V5, 27);
+ theEmitter->emitIns_R_R_I(INS_sri, EA_8BYTE, REG_V6, REG_V7, 40);
+ theEmitter->emitIns_R_R_I(INS_sri, EA_8BYTE, REG_V8, REG_V9, 63);
+
+ // 'sri' vector
+ theEmitter->emitIns_R_R_I(INS_sri, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B);
+ theEmitter->emitIns_R_R_I(INS_sri, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B);
+ theEmitter->emitIns_R_R_I(INS_sri, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H);
+ theEmitter->emitIns_R_R_I(INS_sri, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H);
+ theEmitter->emitIns_R_R_I(INS_sri, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S);
+ theEmitter->emitIns_R_R_I(INS_sri, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S);
+ theEmitter->emitIns_R_R_I(INS_sri, EA_16BYTE, REG_V12, REG_V13, 33, INS_OPTS_2D);
+ theEmitter->emitIns_R_R_I(INS_sri, EA_16BYTE, REG_V14, REG_V15, 63, INS_OPTS_2D);
+
+ // 'sli' scalar
+ theEmitter->emitIns_R_R_I(INS_sli, EA_8BYTE, REG_V0, REG_V1, 1);
+ theEmitter->emitIns_R_R_I(INS_sli, EA_8BYTE, REG_V2, REG_V3, 14);
+ theEmitter->emitIns_R_R_I(INS_sli, EA_8BYTE, REG_V4, REG_V5, 27);
+ theEmitter->emitIns_R_R_I(INS_sli, EA_8BYTE, REG_V6, REG_V7, 40);
+ theEmitter->emitIns_R_R_I(INS_sli, EA_8BYTE, REG_V8, REG_V9, 63);
+
+ // 'sli' vector
+ theEmitter->emitIns_R_R_I(INS_sli, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B);
+ theEmitter->emitIns_R_R_I(INS_sli, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B);
+ theEmitter->emitIns_R_R_I(INS_sli, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H);
+ theEmitter->emitIns_R_R_I(INS_sli, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H);
+ theEmitter->emitIns_R_R_I(INS_sli, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S);
+ theEmitter->emitIns_R_R_I(INS_sli, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S);
+ theEmitter->emitIns_R_R_I(INS_sli, EA_16BYTE, REG_V12, REG_V13, 33, INS_OPTS_2D);
+ theEmitter->emitIns_R_R_I(INS_sli, EA_16BYTE, REG_V14, REG_V15, 63, INS_OPTS_2D);
+
+ // 'sshll' vector
+ theEmitter->emitIns_R_R_I(INS_sshll, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B);
+ theEmitter->emitIns_R_R_I(INS_sshll2, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B);
+ theEmitter->emitIns_R_R_I(INS_sshll, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H);
+ theEmitter->emitIns_R_R_I(INS_sshll2, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H);
+ theEmitter->emitIns_R_R_I(INS_sshll, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S);
+ theEmitter->emitIns_R_R_I(INS_sshll2, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S);
+
+ // 'ushll' vector
+ theEmitter->emitIns_R_R_I(INS_ushll, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B);
+ theEmitter->emitIns_R_R_I(INS_ushll2, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B);
+ theEmitter->emitIns_R_R_I(INS_ushll, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H);
+ theEmitter->emitIns_R_R_I(INS_ushll2, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H);
+ theEmitter->emitIns_R_R_I(INS_ushll, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S);
+ theEmitter->emitIns_R_R_I(INS_ushll2, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S);
+
+ // 'shrn' vector
+ theEmitter->emitIns_R_R_I(INS_shrn, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B);
+ theEmitter->emitIns_R_R_I(INS_shrn2, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B);
+ theEmitter->emitIns_R_R_I(INS_shrn, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H);
+ theEmitter->emitIns_R_R_I(INS_shrn2, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H);
+ theEmitter->emitIns_R_R_I(INS_shrn, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S);
+ theEmitter->emitIns_R_R_I(INS_shrn2, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S);
+
+ // 'rshrn' vector
+ theEmitter->emitIns_R_R_I(INS_rshrn, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B);
+ theEmitter->emitIns_R_R_I(INS_rshrn2, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B);
+ theEmitter->emitIns_R_R_I(INS_rshrn, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H);
+ theEmitter->emitIns_R_R_I(INS_rshrn2, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H);
+ theEmitter->emitIns_R_R_I(INS_rshrn, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S);
+ theEmitter->emitIns_R_R_I(INS_rshrn2, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S);
+
+ // 'sxtl' vector
+ theEmitter->emitIns_R_R(INS_sxtl, EA_8BYTE, REG_V0, REG_V1, INS_OPTS_8B);
+ theEmitter->emitIns_R_R(INS_sxtl2, EA_16BYTE, REG_V2, REG_V3, INS_OPTS_16B);
+ theEmitter->emitIns_R_R(INS_sxtl, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_4H);
+ theEmitter->emitIns_R_R(INS_sxtl2, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_8H);
+ theEmitter->emitIns_R_R(INS_sxtl, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_2S);
+ theEmitter->emitIns_R_R(INS_sxtl2, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S);
+
+ // 'uxtl' vector
+ theEmitter->emitIns_R_R(INS_uxtl, EA_8BYTE, REG_V0, REG_V1, INS_OPTS_8B);
+ theEmitter->emitIns_R_R(INS_uxtl2, EA_16BYTE, REG_V2, REG_V3, INS_OPTS_16B);
+ theEmitter->emitIns_R_R(INS_uxtl, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_4H);
+ theEmitter->emitIns_R_R(INS_uxtl2, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_8H);
+ theEmitter->emitIns_R_R(INS_uxtl, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_2S);
+ theEmitter->emitIns_R_R(INS_uxtl2, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S);
+
+#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
+ //
+ // R_R_R vector operations, one dest, two source
+ //
+
+ genDefineTempLabel(genCreateTempLabel());
+
+ // Specifying an Arrangement is optional
+ //
+ theEmitter->emitIns_R_R_R(INS_and, EA_8BYTE, REG_V6, REG_V7, REG_V8);
+ theEmitter->emitIns_R_R_R(INS_bic, EA_8BYTE, REG_V9, REG_V10, REG_V11);
+ theEmitter->emitIns_R_R_R(INS_eor, EA_8BYTE, REG_V12, REG_V13, REG_V14);
+ theEmitter->emitIns_R_R_R(INS_orr, EA_8BYTE, REG_V15, REG_V16, REG_V17);
+ theEmitter->emitIns_R_R_R(INS_orn, EA_8BYTE, REG_V18, REG_V19, REG_V20);
+ theEmitter->emitIns_R_R_R(INS_and, EA_16BYTE, REG_V21, REG_V22, REG_V23);
+ theEmitter->emitIns_R_R_R(INS_bic, EA_16BYTE, REG_V24, REG_V25, REG_V26);
+ theEmitter->emitIns_R_R_R(INS_eor, EA_16BYTE, REG_V27, REG_V28, REG_V29);
+ theEmitter->emitIns_R_R_R(INS_orr, EA_16BYTE, REG_V30, REG_V31, REG_V0);
+ theEmitter->emitIns_R_R_R(INS_orn, EA_16BYTE, REG_V1, REG_V2, REG_V3);
+
+ theEmitter->emitIns_R_R_R(INS_bsl, EA_8BYTE, REG_V4, REG_V5, REG_V6);
+ theEmitter->emitIns_R_R_R(INS_bit, EA_8BYTE, REG_V7, REG_V8, REG_V9);
+ theEmitter->emitIns_R_R_R(INS_bif, EA_8BYTE, REG_V10, REG_V11, REG_V12);
+ theEmitter->emitIns_R_R_R(INS_bsl, EA_16BYTE, REG_V13, REG_V14, REG_V15);
+ theEmitter->emitIns_R_R_R(INS_bit, EA_16BYTE, REG_V16, REG_V17, REG_V18);
+ theEmitter->emitIns_R_R_R(INS_bif, EA_16BYTE, REG_V19, REG_V20, REG_V21);
+
+ // Default Arrangement as per the ARM64 manual
+ //
+ theEmitter->emitIns_R_R_R(INS_and, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_8B);
+ theEmitter->emitIns_R_R_R(INS_bic, EA_8BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_8B);
+ theEmitter->emitIns_R_R_R(INS_eor, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_8B);
+ theEmitter->emitIns_R_R_R(INS_orr, EA_8BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_8B);
+ theEmitter->emitIns_R_R_R(INS_orn, EA_8BYTE, REG_V18, REG_V19, REG_V20, INS_OPTS_8B);
+ theEmitter->emitIns_R_R_R(INS_and, EA_16BYTE, REG_V21, REG_V22, REG_V23, INS_OPTS_16B);
+ theEmitter->emitIns_R_R_R(INS_bic, EA_16BYTE, REG_V24, REG_V25, REG_V26, INS_OPTS_16B);
+ theEmitter->emitIns_R_R_R(INS_eor, EA_16BYTE, REG_V27, REG_V28, REG_V29, INS_OPTS_16B);
+ theEmitter->emitIns_R_R_R(INS_orr, EA_16BYTE, REG_V30, REG_V31, REG_V0, INS_OPTS_16B);
+ theEmitter->emitIns_R_R_R(INS_orn, EA_16BYTE, REG_V1, REG_V2, REG_V3, INS_OPTS_16B);
+
+ theEmitter->emitIns_R_R_R(INS_bsl, EA_8BYTE, REG_V4, REG_V5, REG_V6, INS_OPTS_8B);
+ theEmitter->emitIns_R_R_R(INS_bit, EA_8BYTE, REG_V7, REG_V8, REG_V9, INS_OPTS_8B);
+ theEmitter->emitIns_R_R_R(INS_bif, EA_8BYTE, REG_V10, REG_V11, REG_V12, INS_OPTS_8B);
+ theEmitter->emitIns_R_R_R(INS_bsl, EA_16BYTE, REG_V13, REG_V14, REG_V15, INS_OPTS_16B);
+ theEmitter->emitIns_R_R_R(INS_bit, EA_16BYTE, REG_V16, REG_V17, REG_V18, INS_OPTS_16B);
+ theEmitter->emitIns_R_R_R(INS_bif, EA_16BYTE, REG_V19, REG_V20, REG_V21, INS_OPTS_16B);
+
+ genDefineTempLabel(genCreateTempLabel());
+
+ theEmitter->emitIns_R_R_R(INS_add, EA_8BYTE, REG_V0, REG_V1, REG_V2); // scalar 8BYTE
+ theEmitter->emitIns_R_R_R(INS_add, EA_8BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_8B);
+ theEmitter->emitIns_R_R_R(INS_add, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_4H);
+ theEmitter->emitIns_R_R_R(INS_add, EA_8BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_2S);
+ theEmitter->emitIns_R_R_R(INS_add, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_16B);
+ theEmitter->emitIns_R_R_R(INS_add, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_8H);
+ theEmitter->emitIns_R_R_R(INS_add, EA_16BYTE, REG_V18, REG_V19, REG_V20, INS_OPTS_4S);
+ theEmitter->emitIns_R_R_R(INS_add, EA_16BYTE, REG_V21, REG_V22, REG_V23, INS_OPTS_2D);
+
+ theEmitter->emitIns_R_R_R(INS_sub, EA_8BYTE, REG_V1, REG_V2, REG_V3); // scalar 8BYTE
+ theEmitter->emitIns_R_R_R(INS_sub, EA_8BYTE, REG_V4, REG_V5, REG_V6, INS_OPTS_8B);
+ theEmitter->emitIns_R_R_R(INS_sub, EA_8BYTE, REG_V7, REG_V8, REG_V9, INS_OPTS_4H);
+ theEmitter->emitIns_R_R_R(INS_sub, EA_8BYTE, REG_V10, REG_V11, REG_V12, INS_OPTS_2S);
+ theEmitter->emitIns_R_R_R(INS_sub, EA_16BYTE, REG_V13, REG_V14, REG_V15, INS_OPTS_16B);
+ theEmitter->emitIns_R_R_R(INS_sub, EA_16BYTE, REG_V16, REG_V17, REG_V18, INS_OPTS_8H);
+ theEmitter->emitIns_R_R_R(INS_sub, EA_16BYTE, REG_V19, REG_V20, REG_V21, INS_OPTS_4S);
+ theEmitter->emitIns_R_R_R(INS_sub, EA_16BYTE, REG_V22, REG_V23, REG_V24, INS_OPTS_2D);
+
+ genDefineTempLabel(genCreateTempLabel());
+
+ // saba vector
+ theEmitter->emitIns_R_R_R(INS_saba, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B);
+ theEmitter->emitIns_R_R_R(INS_saba, EA_16BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_16B);
+ theEmitter->emitIns_R_R_R(INS_saba, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_4H);
+ theEmitter->emitIns_R_R_R(INS_saba, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_8H);
+ theEmitter->emitIns_R_R_R(INS_saba, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2S);
+ theEmitter->emitIns_R_R_R(INS_saba, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S);
+
+ // sabd vector
+ theEmitter->emitIns_R_R_R(INS_sabd, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B);
+ theEmitter->emitIns_R_R_R(INS_sabd, EA_16BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_16B);
+ theEmitter->emitIns_R_R_R(INS_sabd, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_4H);
+ theEmitter->emitIns_R_R_R(INS_sabd, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_8H);
+ theEmitter->emitIns_R_R_R(INS_sabd, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2S);
+ theEmitter->emitIns_R_R_R(INS_sabd, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S);
+
+ // uaba vector
+ theEmitter->emitIns_R_R_R(INS_uaba, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B);
+ theEmitter->emitIns_R_R_R(INS_uaba, EA_16BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_16B);
+ theEmitter->emitIns_R_R_R(INS_uaba, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_4H);
+ theEmitter->emitIns_R_R_R(INS_uaba, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_8H);
+ theEmitter->emitIns_R_R_R(INS_uaba, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2S);
+ theEmitter->emitIns_R_R_R(INS_uaba, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S);
+
+ // uabd vector
+ theEmitter->emitIns_R_R_R(INS_uabd, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B);
+ theEmitter->emitIns_R_R_R(INS_uabd, EA_16BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_16B);
+ theEmitter->emitIns_R_R_R(INS_uabd, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_4H);
+ theEmitter->emitIns_R_R_R(INS_uabd, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_8H);
+ theEmitter->emitIns_R_R_R(INS_uabd, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2S);
+ theEmitter->emitIns_R_R_R(INS_uabd, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S);
+
+#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
+ //
+ // R_R_R vector multiply
+ //
+
+ genDefineTempLabel(genCreateTempLabel());
+
+ theEmitter->emitIns_R_R_R(INS_mul, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B);
+ theEmitter->emitIns_R_R_R(INS_mul, EA_8BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_4H);
+ theEmitter->emitIns_R_R_R(INS_mul, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S);
+ theEmitter->emitIns_R_R_R(INS_mul, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_16B);
+ theEmitter->emitIns_R_R_R(INS_mul, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_8H);
+ theEmitter->emitIns_R_R_R(INS_mul, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S);
+
+ theEmitter->emitIns_R_R_R(INS_pmul, EA_8BYTE, REG_V18, REG_V19, REG_V20, INS_OPTS_8B);
+ theEmitter->emitIns_R_R_R(INS_pmul, EA_16BYTE, REG_V21, REG_V22, REG_V23, INS_OPTS_16B);
+
+ // 'mul' vector by elem
+ theEmitter->emitIns_R_R_R_I(INS_mul, EA_8BYTE, REG_V0, REG_V1, REG_V16, 0, INS_OPTS_2S);
+ theEmitter->emitIns_R_R_R_I(INS_mul, EA_8BYTE, REG_V2, REG_V3, REG_V15, 1, INS_OPTS_2S);
+ theEmitter->emitIns_R_R_R_I(INS_mul, EA_8BYTE, REG_V4, REG_V5, REG_V17, 3, INS_OPTS_2S);
+ theEmitter->emitIns_R_R_R_I(INS_mul, EA_8BYTE, REG_V6, REG_V7, REG_V0, 0, INS_OPTS_4H);
+ theEmitter->emitIns_R_R_R_I(INS_mul, EA_8BYTE, REG_V8, REG_V9, REG_V1, 3, INS_OPTS_4H);
+ theEmitter->emitIns_R_R_R_I(INS_mul, EA_8BYTE, REG_V10, REG_V11, REG_V2, 7, INS_OPTS_4H);
+ theEmitter->emitIns_R_R_R_I(INS_mul, EA_16BYTE, REG_V12, REG_V13, REG_V14, 0, INS_OPTS_4S);
+ theEmitter->emitIns_R_R_R_I(INS_mul, EA_16BYTE, REG_V14, REG_V15, REG_V18, 1, INS_OPTS_4S);
+ theEmitter->emitIns_R_R_R_I(INS_mul, EA_16BYTE, REG_V16, REG_V17, REG_V13, 3, INS_OPTS_4S);
+ theEmitter->emitIns_R_R_R_I(INS_mul, EA_16BYTE, REG_V18, REG_V19, REG_V3, 0, INS_OPTS_8H);
+ theEmitter->emitIns_R_R_R_I(INS_mul, EA_16BYTE, REG_V20, REG_V21, REG_V4, 3, INS_OPTS_8H);
+ theEmitter->emitIns_R_R_R_I(INS_mul, EA_16BYTE, REG_V22, REG_V23, REG_V5, 7, INS_OPTS_8H);
+
+ // 'mla' vector by elem
+ theEmitter->emitIns_R_R_R_I(INS_mla, EA_8BYTE, REG_V0, REG_V1, REG_V16, 0, INS_OPTS_2S);
+ theEmitter->emitIns_R_R_R_I(INS_mla, EA_8BYTE, REG_V2, REG_V3, REG_V15, 1, INS_OPTS_2S);
+ theEmitter->emitIns_R_R_R_I(INS_mla, EA_8BYTE, REG_V4, REG_V5, REG_V17, 3, INS_OPTS_2S);
+ theEmitter->emitIns_R_R_R_I(INS_mla, EA_8BYTE, REG_V6, REG_V7, REG_V0, 0, INS_OPTS_4H);
+ theEmitter->emitIns_R_R_R_I(INS_mla, EA_8BYTE, REG_V8, REG_V9, REG_V1, 3, INS_OPTS_4H);
+ theEmitter->emitIns_R_R_R_I(INS_mla, EA_8BYTE, REG_V10, REG_V11, REG_V2, 7, INS_OPTS_4H);
+ theEmitter->emitIns_R_R_R_I(INS_mla, EA_16BYTE, REG_V12, REG_V13, REG_V14, 0, INS_OPTS_4S);
+ theEmitter->emitIns_R_R_R_I(INS_mla, EA_16BYTE, REG_V14, REG_V15, REG_V18, 1, INS_OPTS_4S);
+ theEmitter->emitIns_R_R_R_I(INS_mla, EA_16BYTE, REG_V16, REG_V17, REG_V13, 3, INS_OPTS_4S);
+ theEmitter->emitIns_R_R_R_I(INS_mla, EA_16BYTE, REG_V18, REG_V19, REG_V3, 0, INS_OPTS_8H);
+ theEmitter->emitIns_R_R_R_I(INS_mla, EA_16BYTE, REG_V20, REG_V21, REG_V4, 3, INS_OPTS_8H);
+ theEmitter->emitIns_R_R_R_I(INS_mla, EA_16BYTE, REG_V22, REG_V23, REG_V5, 7, INS_OPTS_8H);
+
+ // 'mls' vector by elem
+ theEmitter->emitIns_R_R_R_I(INS_mls, EA_8BYTE, REG_V0, REG_V1, REG_V16, 0, INS_OPTS_2S);
+ theEmitter->emitIns_R_R_R_I(INS_mls, EA_8BYTE, REG_V2, REG_V3, REG_V15, 1, INS_OPTS_2S);
+ theEmitter->emitIns_R_R_R_I(INS_mls, EA_8BYTE, REG_V4, REG_V5, REG_V17, 3, INS_OPTS_2S);
+ theEmitter->emitIns_R_R_R_I(INS_mls, EA_8BYTE, REG_V6, REG_V7, REG_V0, 0, INS_OPTS_4H);
+ theEmitter->emitIns_R_R_R_I(INS_mls, EA_8BYTE, REG_V8, REG_V9, REG_V1, 3, INS_OPTS_4H);
+ theEmitter->emitIns_R_R_R_I(INS_mls, EA_8BYTE, REG_V10, REG_V11, REG_V2, 7, INS_OPTS_4H);
+ theEmitter->emitIns_R_R_R_I(INS_mls, EA_16BYTE, REG_V12, REG_V13, REG_V14, 0, INS_OPTS_4S);
+ theEmitter->emitIns_R_R_R_I(INS_mls, EA_16BYTE, REG_V14, REG_V15, REG_V18, 1, INS_OPTS_4S);
+ theEmitter->emitIns_R_R_R_I(INS_mls, EA_16BYTE, REG_V16, REG_V17, REG_V13, 3, INS_OPTS_4S);
+ theEmitter->emitIns_R_R_R_I(INS_mls, EA_16BYTE, REG_V18, REG_V19, REG_V3, 0, INS_OPTS_8H);
+ theEmitter->emitIns_R_R_R_I(INS_mls, EA_16BYTE, REG_V20, REG_V21, REG_V4, 3, INS_OPTS_8H);
+ theEmitter->emitIns_R_R_R_I(INS_mls, EA_16BYTE, REG_V22, REG_V23, REG_V5, 7, INS_OPTS_8H);
+
+#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
+ //
+ // R_R_R floating point operations, one source/dest, and two source
+ //
+
+ genDefineTempLabel(genCreateTempLabel());
+
+ theEmitter->emitIns_R_R_R(INS_fmla, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S);
+ theEmitter->emitIns_R_R_R(INS_fmla, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_4S);
+ theEmitter->emitIns_R_R_R(INS_fmla, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2D);
+
+ theEmitter->emitIns_R_R_R_I(INS_fmla, EA_4BYTE, REG_V15, REG_V16, REG_V17, 3); // scalar by elem 4BYTE
+ theEmitter->emitIns_R_R_R_I(INS_fmla, EA_8BYTE, REG_V18, REG_V19, REG_V20, 1); // scalar by elem 8BYTE
+ theEmitter->emitIns_R_R_R_I(INS_fmla, EA_8BYTE, REG_V21, REG_V22, REG_V23, 0, INS_OPTS_2S);
+ theEmitter->emitIns_R_R_R_I(INS_fmla, EA_16BYTE, REG_V24, REG_V25, REG_V26, 2, INS_OPTS_4S);
+ theEmitter->emitIns_R_R_R_I(INS_fmla, EA_16BYTE, REG_V27, REG_V28, REG_V29, 0, INS_OPTS_2D);
+
+ theEmitter->emitIns_R_R_R(INS_fmls, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S);
+ theEmitter->emitIns_R_R_R(INS_fmls, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_4S);
+ theEmitter->emitIns_R_R_R(INS_fmls, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2D);
+
+ theEmitter->emitIns_R_R_R_I(INS_fmls, EA_4BYTE, REG_V15, REG_V16, REG_V17, 3); // scalar by elem 4BYTE
+ theEmitter->emitIns_R_R_R_I(INS_fmls, EA_8BYTE, REG_V18, REG_V19, REG_V20, 1); // scalar by elem 8BYTE
+ theEmitter->emitIns_R_R_R_I(INS_fmls, EA_8BYTE, REG_V21, REG_V22, REG_V23, 0, INS_OPTS_2S);
+ theEmitter->emitIns_R_R_R_I(INS_fmls, EA_16BYTE, REG_V24, REG_V25, REG_V26, 2, INS_OPTS_4S);
+ theEmitter->emitIns_R_R_R_I(INS_fmls, EA_16BYTE, REG_V27, REG_V28, REG_V29, 0, INS_OPTS_2D);
+
+#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
+ //
+ // R_R_R_R floating point operations, one dest, and three source
+ //
+
+ theEmitter->emitIns_R_R_R_R(INS_fmadd, EA_4BYTE, REG_V0, REG_V8, REG_V16, REG_V24);
+ theEmitter->emitIns_R_R_R_R(INS_fmsub, EA_4BYTE, REG_V1, REG_V9, REG_V17, REG_V25);
+ theEmitter->emitIns_R_R_R_R(INS_fnmadd, EA_4BYTE, REG_V2, REG_V10, REG_V18, REG_V26);
+ theEmitter->emitIns_R_R_R_R(INS_fnmsub, EA_4BYTE, REG_V3, REG_V11, REG_V19, REG_V27);
+
+ theEmitter->emitIns_R_R_R_R(INS_fmadd, EA_8BYTE, REG_V4, REG_V12, REG_V20, REG_V28);
+ theEmitter->emitIns_R_R_R_R(INS_fmsub, EA_8BYTE, REG_V5, REG_V13, REG_V21, REG_V29);
+ theEmitter->emitIns_R_R_R_R(INS_fnmadd, EA_8BYTE, REG_V6, REG_V14, REG_V22, REG_V30);
+ theEmitter->emitIns_R_R_R_R(INS_fnmsub, EA_8BYTE, REG_V7, REG_V15, REG_V23, REG_V31);
+
+#endif
+
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
+
+ BasicBlock* label = genCreateTempLabel();
+ genDefineTempLabel(label);
+ instGen(INS_nop);
+ instGen(INS_nop);
+ instGen(INS_nop);
+ instGen(INS_nop);
+ theEmitter->emitIns_R_L(INS_adr, EA_4BYTE_DSP_RELOC, label, REG_R0);
+
+#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+
+ printf("*************** End of genArm64EmitterUnitTests()\n");
+}
+#endif // defined(DEBUG)
+
+#endif // _TARGET_ARM64_
+
+#endif // !LEGACY_BACKEND
diff --git a/src/jit/codegenclassic.h b/src/jit/codegenclassic.h
new file mode 100644
index 0000000000..81b7b34194
--- /dev/null
+++ b/src/jit/codegenclassic.h
@@ -0,0 +1,606 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+//
+// This file contains the members of CodeGen that are defined and used
+// only by the "classic" JIT backend. It is included by CodeGen.h in the
+// definition of the CodeGen class.
+//
+
+#ifndef _CODEGENCLASSIC_H_
+#define _CODEGENCLASSIC_H_
+
+#ifdef LEGACY_BACKEND // Not necessary (it's this way in the #include location), but helpful to IntelliSense
+
+public:
+regNumber genIsEnregisteredIntVariable(GenTreePtr tree);
+
+void sched_AM(instruction ins,
+ emitAttr size,
+ regNumber ireg,
+ bool rdst,
+ GenTreePtr tree,
+ unsigned offs,
+ bool cons = false,
+ int cval = 0,
+ insFlags flags = INS_FLAGS_DONT_CARE);
+
+protected:
+#if FEATURE_STACK_FP_X87
+VARSET_TP genFPregVars; // mask corresponding to genFPregCnt
+unsigned genFPdeadRegCnt; // The dead unpopped part of genFPregCnt
+#endif // FEATURE_STACK_FP_X87
+
+//-------------------------------------------------------------------------
+
+void genSetRegToIcon(regNumber reg, ssize_t val, var_types type = TYP_INT, insFlags flags = INS_FLAGS_DONT_CARE);
+
+regNumber genGetRegSetToIcon(ssize_t val, regMaskTP regBest = 0, var_types type = TYP_INT);
+void genDecRegBy(regNumber reg, ssize_t ival, GenTreePtr tree);
+void genIncRegBy(regNumber reg, ssize_t ival, GenTreePtr tree, var_types dstType = TYP_INT, bool ovfl = false);
+
+void genMulRegBy(regNumber reg, ssize_t ival, GenTreePtr tree, var_types dstType = TYP_INT, bool ovfl = false);
+
+//-------------------------------------------------------------------------
+
+bool genRegTrashable(regNumber reg, GenTreePtr tree);
+
+//
+// Prolog functions and data (there are a few exceptions for more generally used things)
+//
+
+regMaskTP genPInvokeMethodProlog(regMaskTP initRegs);
+
+void genPInvokeMethodEpilog();
+
+regNumber genPInvokeCallProlog(LclVarDsc* varDsc,
+ int argSize,
+ CORINFO_METHOD_HANDLE methodToken,
+ BasicBlock* returnLabel);
+
+void genPInvokeCallEpilog(LclVarDsc* varDsc, regMaskTP retVal);
+
+regNumber genLclHeap(GenTreePtr size);
+
+void genSinglePush();
+
+void genSinglePop();
+
+void genDyingVars(VARSET_VALARG_TP beforeSet, VARSET_VALARG_TP afterSet);
+
+bool genContainsVarDeath(GenTreePtr from, GenTreePtr to, unsigned varNum);
+
+void genComputeReg(
+ GenTreePtr tree, regMaskTP needReg, RegSet::ExactReg mustReg, RegSet::KeepReg keepReg, bool freeOnly = false);
+
+void genCompIntoFreeReg(GenTreePtr tree, regMaskTP needReg, RegSet::KeepReg keepReg);
+
+void genReleaseReg(GenTreePtr tree);
+
+void genRecoverReg(GenTreePtr tree, regMaskTP needReg, RegSet::KeepReg keepReg);
+
+void genMoveRegPairHalf(GenTreePtr tree, regNumber dst, regNumber src, int off = 0);
+
+void genMoveRegPair(GenTreePtr tree, regMaskTP needReg, regPairNo newPair);
+
+void genComputeRegPair(
+ GenTreePtr tree, regPairNo needRegPair, regMaskTP avoidReg, RegSet::KeepReg keepReg, bool freeOnly = false);
+
+void genCompIntoFreeRegPair(GenTreePtr tree, regMaskTP avoidReg, RegSet::KeepReg keepReg);
+
+void genComputeAddressable(GenTreePtr tree,
+ regMaskTP addrReg,
+ RegSet::KeepReg keptReg,
+ regMaskTP needReg,
+ RegSet::KeepReg keepReg,
+ bool freeOnly = false);
+
+void genReleaseRegPair(GenTreePtr tree);
+
+void genRecoverRegPair(GenTreePtr tree, regPairNo regPair, RegSet::KeepReg keepReg);
+
+void genEvalIntoFreeRegPair(GenTreePtr tree, regPairNo regPair, regMaskTP avoidReg);
+
+void genMakeRegPairAvailable(regPairNo regPair);
+
+bool genMakeIndAddrMode(GenTreePtr addr,
+ GenTreePtr oper,
+ bool forLea,
+ regMaskTP regMask,
+ RegSet::KeepReg keepReg,
+ regMaskTP* useMaskPtr,
+ bool deferOp = false);
+
+regMaskTP genMakeRvalueAddressable(
+ GenTreePtr tree, regMaskTP needReg, RegSet::KeepReg keepReg, bool forLoadStore, bool smallOK = false);
+
+regMaskTP genMakeAddressable(
+ GenTreePtr tree, regMaskTP needReg, RegSet::KeepReg keepReg, bool smallOK = false, bool deferOK = false);
+
+regMaskTP genMakeAddrArrElem(GenTreePtr arrElem, GenTreePtr tree, regMaskTP needReg, RegSet::KeepReg keepReg);
+
+regMaskTP genMakeAddressable2(GenTreePtr tree,
+ regMaskTP needReg,
+ RegSet::KeepReg keepReg,
+ bool forLoadStore,
+ bool smallOK = false,
+ bool deferOK = false,
+ bool evalSideEffs = false);
+
+bool genStillAddressable(GenTreePtr tree);
+
+regMaskTP genRestoreAddrMode(GenTreePtr addr, GenTreePtr tree, bool lockPhase);
+
+regMaskTP genRestAddressable(GenTreePtr tree, regMaskTP addrReg, regMaskTP lockMask);
+
+regMaskTP genKeepAddressable(GenTreePtr tree, regMaskTP addrReg, regMaskTP avoidMask = RBM_NONE);
+
+void genDoneAddressable(GenTreePtr tree, regMaskTP addrReg, RegSet::KeepReg keptReg);
+
+GenTreePtr genMakeAddrOrFPstk(GenTreePtr tree, regMaskTP* regMaskPtr, bool roundResult);
+
+void genEmitGSCookieCheck(bool pushReg);
+
+void genEvalSideEffects(GenTreePtr tree);
+
+void genCondJump(GenTreePtr cond, BasicBlock* destTrue = NULL, BasicBlock* destFalse = NULL, bool bStackFPFixup = true);
+
+emitJumpKind genCondSetFlags(GenTreePtr cond);
+
+void genJCC(genTreeOps cmp, BasicBlock* block, var_types type);
+
+void genJccLongHi(genTreeOps cmp, BasicBlock* jumpTrue, BasicBlock* jumpFalse, bool unsOper = false);
+
+void genJccLongLo(genTreeOps cmp, BasicBlock* jumpTrue, BasicBlock* jumpFalse);
+
+void genCondJumpLng(GenTreePtr cond, BasicBlock* jumpTrue, BasicBlock* jumpFalse, bool bFPTransition = false);
+
+bool genUse_fcomip();
+
+void genTableSwitch(regNumber reg, unsigned jumpCnt, BasicBlock** jumpTab);
+
+regMaskTP WriteBarrier(GenTreePtr tgt, GenTreePtr assignVal, regMaskTP addrReg);
+
+void genCodeForTreeConst(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg = RBM_NONE);
+
+void genCodeForTreeLeaf(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg = RBM_NONE);
+
+// If "tree" is a comma node, generates code for the left comma arguments,
+// in order, returning the first right argument in the list that is not
+// a comma node.
+GenTreePtr genCodeForCommaTree(GenTreePtr tree);
+
+void genCodeForTreeLeaf_GT_JMP(GenTreePtr tree);
+
+static Compiler::fgWalkPreFn fgIsVarAssignedTo;
+
+void genCodeForQmark(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg);
+
+bool genCodeForQmarkWithCMOV(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg);
+
+#ifdef _TARGET_XARCH_
+void genCodeForMultEAX(GenTreePtr tree);
+#endif
+#ifdef _TARGET_ARM_
+void genCodeForMult64(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg);
+#endif
+
+void genCodeForTreeSmpBinArithLogOp(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg);
+
+void genCodeForTreeSmpBinArithLogAsgOp(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg);
+
+void genCodeForUnsignedMod(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg);
+
+void genCodeForSignedMod(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg);
+
+void genCodeForUnsignedDiv(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg);
+
+void genCodeForSignedDiv(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg);
+
+void genCodeForGeneralDivide(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg);
+
+void genCodeForAsgShift(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg);
+
+void genCodeForShift(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg);
+
+void genCodeForRelop(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg);
+
+void genCodeForCopyObj(GenTreePtr tree, regMaskTP destReg);
+
+void genCodeForBlkOp(GenTreePtr tree, regMaskTP destReg);
+
+void genCodeForTreeSmpOp(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg = RBM_NONE);
+
+regNumber genIntegerCast(GenTree* tree, regMaskTP needReg, regMaskTP bestReg);
+
+void genCodeForNumericCast(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg);
+
+void genCodeForTreeSmpOp_GT_ADDR(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg = RBM_NONE);
+
+void genCodeForTreeSmpOpAsg(GenTreePtr tree);
+
+void genCodeForTreeSmpOpAsg_DONE_ASSG(GenTreePtr tree, regMaskTP addrReg, regNumber reg, bool ovfl);
+
+void genCodeForTreeSpecialOp(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg = RBM_NONE);
+
+void genCodeForTree(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg = RBM_NONE);
+
+void genCodeForTree_DONE_LIFE(GenTreePtr tree, regNumber reg)
+{
+ /* We've computed the value of 'tree' into 'reg' */
+
+ assert(reg != 0xFEEFFAAFu);
+ assert(!IsUninitialized(reg));
+
+ genMarkTreeInReg(tree, reg);
+}
+
+void genCodeForTree_DONE_LIFE(GenTreePtr tree, regPairNo regPair)
+{
+ /* We've computed the value of 'tree' into 'regPair' */
+
+ genMarkTreeInRegPair(tree, regPair);
+}
+
+void genCodeForTree_DONE(GenTreePtr tree, regNumber reg)
+{
+ /* Check whether this subtree has freed up any variables */
+
+ genUpdateLife(tree);
+
+ genCodeForTree_DONE_LIFE(tree, reg);
+}
+
+void genCodeForTree_REG_VAR1(GenTreePtr tree)
+{
+ /* Value is already in a register */
+
+ regNumber reg = tree->gtRegNum;
+
+ gcInfo.gcMarkRegPtrVal(reg, tree->TypeGet());
+
+ genCodeForTree_DONE(tree, reg);
+}
+
+void genCodeForTreeLng(GenTreePtr tree, regMaskTP needReg, regMaskTP avoidReg);
+
+regPairNo genCodeForLongModInt(GenTreePtr tree, regMaskTP needReg);
+
+unsigned genRegCountForLiveIntEnregVars(GenTreePtr tree);
+
+#ifdef _TARGET_ARM_
+void genStoreFromFltRetRegs(GenTreePtr tree);
+void genLoadIntoFltRetRegs(GenTreePtr tree);
+void genLdStFltRetRegsPromotedVar(LclVarDsc* varDsc, bool isLoadIntoFltReg);
+#endif
+
+#if CPU_HAS_FP_SUPPORT
+void genRoundFpExpression(GenTreePtr op, var_types type = TYP_UNDEF);
+void genCodeForTreeFlt(GenTreePtr tree, regMaskTP needReg = RBM_ALLFLOAT, regMaskTP bestReg = RBM_NONE);
+#endif
+
+// FP stuff
+#include "fp.h"
+
+void genCodeForJumpTable(GenTreePtr tree);
+void genCodeForSwitchTable(GenTreePtr tree);
+void genCodeForSwitch(GenTreePtr tree);
+
+regMaskTP genPushRegs(regMaskTP regs, regMaskTP* byrefRegs, regMaskTP* noRefRegs);
+void genPopRegs(regMaskTP regs, regMaskTP byrefRegs, regMaskTP noRefRegs);
+
+size_t genPushArgList(GenTreePtr call);
+
+#ifdef _TARGET_ARM_
+// We are generating code for a promoted struct local variable. Fill the next slot (register or
+// 4-byte stack slot) with one or more field variables of the promoted struct local -- or 2 such slots
+// if the next field is a 64-bit value.
+// The arguments are:
+// "arg" is the current argument node.
+//
+// "curArgTabEntry" arg table entry pointer for "arg".
+//
+// "promotedStructLocalVarDesc" describes the struct local being copied, assumed non-NULL.
+//
+// "fieldSize" is somewhat misnamed; it must be the element in the struct's GC layout describing the next slot
+// of the struct -- it will be EA_4BYTE, EA_GCREF, or EA_BYREF.
+//
+// "*pNextPromotedStructFieldVar" must be the the local variable number of the next field variable to copy;
+// this location will be updated by the call to reflect the bytes that are copied.
+//
+// "*pBytesOfNextSlotOfCurPromotedStruct" must be the number of bytes within the struct local at which the next
+// slot to be copied starts. This location will be updated by the call to reflect the bytes that are copied.
+//
+// "*pCurRegNum" must be the current argument register number, and will be updated if argument registers are filled.
+//
+// "argOffset" must be the offset of the next slot to be filled in the outgoing argument area, if the argument is to
+// be
+// put in the outgoing arg area of the stack (or else should be INT_MAX if the next slot to be filled is a
+// register).
+// (Strictly speaking, after the addition of "argOffsetOfFirstStackSlot", this arg is redundant, and is only used
+// in assertions, and could be removed.)
+//
+// "fieldOffsetOfFirstStackSlot" must be the offset within the promoted struct local of the first slot that should be
+// copied to the outgoing argument area -- non-zero only in the case of a struct that spans registers and stack
+// slots.
+//
+// "argOffsetOfFirstStackSlot" must be the 4-byte-aligned offset of the first offset in the outgoing argument area
+// which could
+// contain part of the struct. (Explicit alignment may mean it doesn't actually contain part of the struct.)
+//
+// "*deadFieldVarRegs" is an out parameter, the set of registers containing promoted field variables that become dead
+// after
+// this (implicit) use.
+//
+// "*pRegTmp" -- if a temporary register is needed, and this is not REG_STK, uses that register. Otherwise, if it is
+// REG_STK,
+// allocates a register, uses it, and sets "*pRegTmp" to the allocated register.
+//
+// Returns "true" iff it filled two slots with an 8-byte value.
+bool genFillSlotFromPromotedStruct(GenTreePtr arg,
+ fgArgTabEntryPtr curArgTabEntry,
+ LclVarDsc* promotedStructLocalVarDesc,
+ emitAttr fieldSize,
+ unsigned* pNextPromotedStructFieldVar, // IN/OUT
+ unsigned* pBytesOfNextSlotOfCurPromotedStruct, // IN/OUT
+ regNumber* pCurRegNum, // IN/OUT
+ int argOffset,
+ int fieldOffsetOfFirstStackSlot,
+ int argOffsetOfFirstStackSlot,
+ regMaskTP* deadFieldVarRegs, // OUT
+ regNumber* pRegTmp); // IN/OUT
+
+#endif // _TARGET_ARM_
+// Requires that "curr" is a cpblk. If the RHS is a promoted struct local,
+// then returns a regMaskTP representing the set of registers holding
+// fieldVars of the RHS that go dead with this use (as determined by the live set
+// of cpBlk).
+regMaskTP genFindDeadFieldRegs(GenTreePtr cpBlk);
+
+void SetupLateArgs(GenTreePtr call);
+
+#ifdef _TARGET_ARM_
+void PushMkRefAnyArg(GenTreePtr mkRefAnyTree, fgArgTabEntryPtr curArgTabEntry, regMaskTP regNeedMask);
+#endif // _TARGET_ARM_
+
+regMaskTP genLoadIndirectCallTarget(GenTreePtr call);
+
+regMaskTP genCodeForCall(GenTreePtr call, bool valUsed);
+
+GenTreePtr genGetAddrModeBase(GenTreePtr tree);
+
+GenTreePtr genIsAddrMode(GenTreePtr tree, GenTreePtr* indxPtr);
+
+private:
+bool genIsLocalLastUse(GenTreePtr tree);
+
+bool genIsRegCandidateLocal(GenTreePtr tree);
+
+//=========================================================================
+// Debugging support
+//=========================================================================
+
+#if FEATURE_STACK_FP_X87
+/*
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX Flat FP model XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+bool StackFPIsSameAsFloat(double d);
+bool FlatFPSameRegisters(FlatFPStateX87* pState, regMaskTP mask);
+
+// FlatFPStateX87_ functions are the actual verbs to do stuff
+// like doing a transition, loading register, etc. It's also
+// responsible for emitting the x87 code to do so. We keep
+// them in Compiler because we don't want to store a pointer to the
+// emitter.
+void FlatFPX87_Kill(FlatFPStateX87* pState, unsigned iVirtual);
+void FlatFPX87_PushVirtual(FlatFPStateX87* pState, unsigned iRegister, bool bEmitCode = true);
+unsigned FlatFPX87_Pop(FlatFPStateX87* pState, bool bEmitCode = true);
+unsigned FlatFPX87_Top(FlatFPStateX87* pState, bool bEmitCode = true);
+void FlatFPX87_Unload(FlatFPStateX87* pState, unsigned iVirtual, bool bEmitCode = true);
+#endif
+
+// Codegen functions. This is the API that codegen will use
+regMaskTP genPushArgumentStackFP(GenTreePtr arg);
+void genRoundFpExpressionStackFP(GenTreePtr op, var_types type = TYP_UNDEF);
+void genCodeForTreeStackFP_Const(GenTreePtr tree);
+void genCodeForTreeStackFP_Leaf(GenTreePtr tree);
+void genCodeForTreeStackFP_SmpOp(GenTreePtr tree);
+void genCodeForTreeStackFP_Special(GenTreePtr tree);
+void genCodeForTreeStackFP_Cast(GenTreePtr tree);
+void genCodeForTreeStackFP(GenTreePtr tree);
+void genCondJumpFltStackFP(GenTreePtr cond, BasicBlock* jumpTrue, BasicBlock* jumpFalse, bool bDoTransition = true);
+void genCondJumpFloat(GenTreePtr cond, BasicBlock* jumpTrue, BasicBlock* jumpFalse);
+void genCondJumpLngStackFP(GenTreePtr cond, BasicBlock* jumpTrue, BasicBlock* jumpFalse);
+
+void genFloatConst(GenTree* tree, RegSet::RegisterPreference* pref);
+void genFloatLeaf(GenTree* tree, RegSet::RegisterPreference* pref);
+void genFloatSimple(GenTree* tree, RegSet::RegisterPreference* pref);
+void genFloatMath(GenTree* tree, RegSet::RegisterPreference* pref);
+void genFloatCheckFinite(GenTree* tree, RegSet::RegisterPreference* pref);
+void genLoadFloat(GenTreePtr tree, regNumber reg);
+void genFloatAssign(GenTree* tree);
+void genFloatArith(GenTree* tree, RegSet::RegisterPreference* pref);
+void genFloatAsgArith(GenTree* tree);
+
+regNumber genAssignArithFloat(genTreeOps oper, GenTreePtr dst, regNumber dstreg, GenTreePtr src, regNumber srcreg);
+
+GenTreePtr genMakeAddressableFloat(GenTreePtr tree,
+ regMaskTP* regMaskIntPtr,
+ regMaskTP* regMaskFltPtr,
+ bool bCollapseConstantDoubles = true);
+
+void genCodeForTreeFloat(GenTreePtr tree, RegSet::RegisterPreference* pref = NULL);
+
+void genCodeForTreeFloat(GenTreePtr tree, regMaskTP needReg, regMaskTP bestReg);
+
+regNumber genArithmFloat(
+ genTreeOps oper, GenTreePtr dst, regNumber dstreg, GenTreePtr src, regNumber srcreg, bool bReverse);
+void genCodeForTreeCastFloat(GenTreePtr tree, RegSet::RegisterPreference* pref);
+void genCodeForTreeCastToFloat(GenTreePtr tree, RegSet::RegisterPreference* pref);
+void genCodeForTreeCastFromFloat(GenTreePtr tree, RegSet::RegisterPreference* pref);
+void genKeepAddressableFloat(GenTreePtr tree, regMaskTP* regMaskIntPtr, regMaskTP* regMaskFltPtr);
+void genDoneAddressableFloat(GenTreePtr tree, regMaskTP addrRegInt, regMaskTP addrRegFlt, RegSet::KeepReg keptReg);
+void genComputeAddressableFloat(GenTreePtr tree,
+ regMaskTP addrRegInt,
+ regMaskTP addrRegFlt,
+ RegSet::KeepReg keptReg,
+ regMaskTP needReg,
+ RegSet::KeepReg keepReg,
+ bool freeOnly = false);
+void genRoundFloatExpression(GenTreePtr op, var_types type);
+
+#if FEATURE_STACK_FP_X87
+// Assumes then block will be generated before else block.
+struct QmarkStateStackFP
+{
+ FlatFPStateX87 stackState;
+};
+
+void genQMarkRegVarTransition(GenTreePtr nextNode, VARSET_VALARG_TP liveset);
+void genQMarkBeforeElseStackFP(QmarkStateStackFP* pState, VARSET_VALARG_TP varsetCond, GenTreePtr nextNode);
+void genQMarkAfterElseBlockStackFP(QmarkStateStackFP* pState, VARSET_VALARG_TP varsetCond, GenTreePtr nextNode);
+void genQMarkAfterThenBlockStackFP(QmarkStateStackFP* pState);
+
+#endif
+
+GenTreePtr genMakeAddressableStackFP(GenTreePtr tree,
+ regMaskTP* regMaskIntPtr,
+ regMaskTP* regMaskFltPtr,
+ bool bCollapseConstantDoubles = true);
+void genKeepAddressableStackFP(GenTreePtr tree, regMaskTP* regMaskIntPtr, regMaskTP* regMaskFltPtr);
+void genDoneAddressableStackFP(GenTreePtr tree, regMaskTP addrRegInt, regMaskTP addrRegFlt, RegSet::KeepReg keptReg);
+
+void genCodeForTreeStackFP_Asg(GenTreePtr tree);
+void genCodeForTreeStackFP_AsgArithm(GenTreePtr tree);
+void genCodeForTreeStackFP_Arithm(GenTreePtr tree);
+void genCodeForTreeStackFP_DONE(GenTreePtr tree, regNumber reg);
+void genCodeForTreeFloat_DONE(GenTreePtr tree, regNumber reg);
+
+void genSetupStateStackFP(BasicBlock* block);
+regMaskTP genRegMaskFromLivenessStackFP(VARSET_VALARG_TP varset);
+
+// bReverse means make op1 addressable and codegen for op2.
+// If op1 or op2 are comma expressions, will do code-gen for their non-last comma parts,
+// and set op1 and op2 to the remaining non-comma expressions.
+void genSetupForOpStackFP(
+ GenTreePtr& op1, GenTreePtr& op2, bool bReverse, bool bMakeOp1Addressable, bool bOp1ReadOnly, bool bOp2ReadOnly);
+
+#if FEATURE_STACK_FP_X87
+
+#ifdef DEBUG
+bool ConsistentAfterStatementStackFP();
+#endif
+
+private:
+void SpillTempsStackFP(regMaskTP canSpillMask);
+void SpillForCallStackFP();
+void UnspillRegVarsStackFp();
+
+// Transition API. Takes care of the stack matching of basicblock boundaries
+void genCodeForPrologStackFP();
+void genCodeForEndBlockTransitionStackFP(BasicBlock* block);
+
+void genCodeForBBTransitionStackFP(BasicBlock* pDst);
+void genCodeForTransitionStackFP(FlatFPStateX87* pSrc, FlatFPStateX87* pDst);
+void genCodeForTransitionFromMask(FlatFPStateX87* pSrc, regMaskTP mask, bool bEmitCode = true);
+BasicBlock* genTransitionBlockStackFP(FlatFPStateX87* pState, BasicBlock* pFrom, BasicBlock* pTarget);
+
+// This is the API codegen will use to emit virtual fp code. In theory, nobody above this API
+// should know about x87 instructions.
+
+int genNumberTemps();
+void genDiscardStackFP(GenTreePtr tree);
+void genRegRenameWithMasks(regNumber dstReg, regNumber srcReg);
+void genRegVarBirthStackFP(GenTreePtr tree);
+void genRegVarBirthStackFP(LclVarDsc* varDsc);
+void genRegVarDeathStackFP(GenTreePtr tree);
+void genRegVarDeathStackFP(LclVarDsc* varDsc);
+void genLoadStackFP(GenTreePtr tree, regNumber reg);
+void genMovStackFP(GenTreePtr dst, regNumber dstreg, GenTreePtr src, regNumber srcreg);
+bool genCompInsStackFP(GenTreePtr tos, GenTreePtr other);
+regNumber genArithmStackFP(
+ genTreeOps oper, GenTreePtr dst, regNumber dstreg, GenTreePtr src, regNumber srcreg, bool bReverse);
+regNumber genAsgArithmStackFP(genTreeOps oper, GenTreePtr dst, regNumber dstreg, GenTreePtr src, regNumber srcreg);
+void genCondJmpInsStackFP(emitJumpKind jumpKind,
+ BasicBlock* jumpTrue,
+ BasicBlock* jumpFalse,
+ bool bDoTransition = true);
+void genTableSwitchStackFP(regNumber reg, unsigned jumpCnt, BasicBlock** jumpTab);
+
+void JitDumpFPState();
+#else // !FEATURE_STACK_FP_X87
+void SpillForCallRegisterFP(regMaskTP noSpillMask);
+#endif // !FEATURE_STACK_FP_X87
+
+// When bOnlyNoMemAccess = true, the load will be generated only for constant loading that doesn't
+// involve memory accesses, (ie: fldz for positive zero, or fld1 for 1). Will return true the function
+// did the load
+bool genConstantLoadStackFP(GenTreePtr tree, bool bOnlyNoMemAccess = false);
+void genEndOfStatement();
+
+#if FEATURE_STACK_FP_X87
+struct genRegVarDiesInSubTreeData
+{
+ regNumber reg;
+ bool result;
+};
+static Compiler::fgWalkPreFn genRegVarDiesInSubTreeWorker;
+bool genRegVarDiesInSubTree(GenTreePtr tree, regNumber reg);
+#endif // FEATURE_STACK_FP_X87
+
+// Float spill
+void UnspillFloat(RegSet::SpillDsc* spillDsc);
+void UnspillFloat(GenTreePtr tree);
+void UnspillFloat(LclVarDsc* varDsc);
+void UnspillFloatMachineDep(RegSet::SpillDsc* spillDsc);
+void UnspillFloatMachineDep(RegSet::SpillDsc* spillDsc, bool useSameReg);
+void RemoveSpillDsc(RegSet::SpillDsc* spillDsc);
+
+protected:
+struct genLivenessSet
+{
+ VARSET_TP liveSet;
+ VARSET_TP varPtrSet;
+ regMaskSmall maskVars;
+ regMaskSmall gcRefRegs;
+ regMaskSmall byRefRegs;
+
+ genLivenessSet()
+ : VARSET_INIT_NOCOPY(liveSet, VarSetOps::UninitVal()), VARSET_INIT_NOCOPY(varPtrSet, VarSetOps::UninitVal())
+ {
+ }
+};
+
+void saveLiveness(genLivenessSet* ls);
+void restoreLiveness(genLivenessSet* ls);
+void checkLiveness(genLivenessSet* ls);
+void unspillLiveness(genLivenessSet* ls);
+
+//-------------------------------------------------------------------------
+//
+// If we know that the flags register is set to a value that corresponds
+// to the current value of a register or variable, the following values
+// record that information.
+//
+
+emitLocation genFlagsEqLoc;
+regNumber genFlagsEqReg;
+unsigned genFlagsEqVar;
+
+void genFlagsEqualToNone();
+void genFlagsEqualToReg(GenTreePtr tree, regNumber reg);
+void genFlagsEqualToVar(GenTreePtr tree, unsigned var);
+bool genFlagsAreReg(regNumber reg);
+bool genFlagsAreVar(unsigned var);
+
+#endif // LEGACY_BACKEND
+
+#endif // _CODEGENCLASSIC_H_
diff --git a/src/jit/codegencommon.cpp b/src/jit/codegencommon.cpp
new file mode 100755
index 0000000000..2710447ade
--- /dev/null
+++ b/src/jit/codegencommon.cpp
@@ -0,0 +1,11779 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX Code Generator Common: XX
+XX Methods common to all architectures and register allocation strategies XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+// TODO-Cleanup: There are additional methods in CodeGen*.cpp that are almost
+// identical, and which should probably be moved here.
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+#include "codegen.h"
+
+#include "gcinfo.h"
+#include "emit.h"
+
+#ifndef JIT32_GCENCODER
+#include "gcinfoencoder.h"
+#endif
+
+/*****************************************************************************/
+
+const BYTE genTypeSizes[] = {
+#define DEF_TP(tn, nm, jitType, verType, sz, sze, asze, st, al, tf, howUsed) sz,
+#include "typelist.h"
+#undef DEF_TP
+};
+
+const BYTE genTypeAlignments[] = {
+#define DEF_TP(tn, nm, jitType, verType, sz, sze, asze, st, al, tf, howUsed) al,
+#include "typelist.h"
+#undef DEF_TP
+};
+
+const BYTE genTypeStSzs[] = {
+#define DEF_TP(tn, nm, jitType, verType, sz, sze, asze, st, al, tf, howUsed) st,
+#include "typelist.h"
+#undef DEF_TP
+};
+
+const BYTE genActualTypes[] = {
+#define DEF_TP(tn, nm, jitType, verType, sz, sze, asze, st, al, tf, howUsed) jitType,
+#include "typelist.h"
+#undef DEF_TP
+};
+
+void CodeGenInterface::setFramePointerRequiredEH(bool value)
+{
+ m_cgFramePointerRequired = value;
+
+#ifndef JIT32_GCENCODER
+ if (value)
+ {
+ // EnumGcRefs will only enumerate slots in aborted frames
+ // if they are fully-interruptible. So if we have a catch
+ // or finally that will keep frame-vars alive, we need to
+ // force fully-interruptible.
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Method has EH, marking method as fully interruptible\n");
+ }
+#endif
+
+ m_cgInterruptible = true;
+ }
+#endif // JIT32_GCENCODER
+}
+
+/*****************************************************************************/
+CodeGenInterface* getCodeGenerator(Compiler* comp)
+{
+ return new (comp, CMK_Codegen) CodeGen(comp);
+}
+
+// CodeGen constructor
+CodeGenInterface::CodeGenInterface(Compiler* theCompiler)
+ : gcInfo(theCompiler), regSet(theCompiler, gcInfo), compiler(theCompiler)
+{
+}
+
+/*****************************************************************************/
+
+CodeGen::CodeGen(Compiler* theCompiler) : CodeGenInterface(theCompiler)
+{
+#if defined(_TARGET_XARCH_) && !FEATURE_STACK_FP_X87
+ negBitmaskFlt = nullptr;
+ negBitmaskDbl = nullptr;
+ absBitmaskFlt = nullptr;
+ absBitmaskDbl = nullptr;
+ u8ToDblBitmask = nullptr;
+#endif // defined(_TARGET_XARCH_) && !FEATURE_STACK_FP_X87
+
+ regTracker.rsTrackInit(compiler, &regSet);
+ gcInfo.regSet = &regSet;
+ m_cgEmitter = new (compiler->getAllocator()) emitter();
+ m_cgEmitter->codeGen = this;
+ m_cgEmitter->gcInfo = &gcInfo;
+
+#ifdef DEBUG
+ setVerbose(compiler->verbose);
+#endif // DEBUG
+
+ compiler->tmpInit();
+
+#ifdef DEBUG
+#if defined(_TARGET_X86_) && defined(LEGACY_BACKEND)
+ // This appears to be x86-specific. It's attempting to make sure all offsets to temps
+ // are large. For ARM, this doesn't interact well with our decision about whether to use
+ // R10 or not as a reserved register.
+ if (regSet.rsStressRegs())
+ compiler->tmpIntSpillMax = (SCHAR_MAX / sizeof(int));
+#endif // defined(_TARGET_X86_) && defined(LEGACY_BACKEND)
+#endif // DEBUG
+
+ instInit();
+
+#ifdef LEGACY_BACKEND
+ // TODO-Cleanup: These used to be set in rsInit() - should they be moved to RegSet??
+ // They are also accessed by the register allocators and fgMorphLclVar().
+ intRegState.rsCurRegArgNum = 0;
+ floatRegState.rsCurRegArgNum = 0;
+#endif // LEGACY_BACKEND
+
+#ifdef LATE_DISASM
+ getDisAssembler().disInit(compiler);
+#endif
+
+#ifdef DEBUG
+ genTempLiveChg = true;
+ genTrnslLocalVarCount = 0;
+
+ // Shouldn't be used before it is set in genFnProlog()
+ compiler->compCalleeRegsPushed = UninitializedWord<unsigned>();
+
+#if defined(_TARGET_XARCH_) && !FEATURE_STACK_FP_X87
+ // Shouldn't be used before it is set in genFnProlog()
+ compiler->compCalleeFPRegsSavedMask = (regMaskTP)-1;
+#endif // defined(_TARGET_XARCH_) && !FEATURE_STACK_FP_X87
+#endif // DEBUG
+
+#ifdef _TARGET_AMD64_
+ // This will be set before final frame layout.
+ compiler->compVSQuirkStackPaddingNeeded = 0;
+
+ // Set to true if we perform the Quirk that fixes the PPP issue
+ compiler->compQuirkForPPPflag = false;
+#endif // _TARGET_AMD64_
+
+#ifdef LEGACY_BACKEND
+ genFlagsEqualToNone();
+#endif // LEGACY_BACKEND
+
+#ifdef DEBUGGING_SUPPORT
+ // Initialize the IP-mapping logic.
+ compiler->genIPmappingList = nullptr;
+ compiler->genIPmappingLast = nullptr;
+ compiler->genCallSite2ILOffsetMap = nullptr;
+#endif
+
+ /* Assume that we not fully interruptible */
+
+ genInterruptible = false;
+#ifdef DEBUG
+ genInterruptibleUsed = false;
+ genCurDispOffset = (unsigned)-1;
+#endif
+}
+
+void CodeGenInterface::genMarkTreeInReg(GenTreePtr tree, regNumber reg)
+{
+ tree->gtRegNum = reg;
+ tree->gtFlags |= GTF_REG_VAL;
+}
+
+#if CPU_LONG_USES_REGPAIR
+void CodeGenInterface::genMarkTreeInRegPair(GenTreePtr tree, regPairNo regPair)
+{
+ tree->gtRegPair = regPair;
+ tree->gtFlags |= GTF_REG_VAL;
+}
+#endif
+
+#if defined(_TARGET_X86_) || defined(_TARGET_ARM_)
+
+//---------------------------------------------------------------------
+// genTotalFrameSize - return the "total" size of the stack frame, including local size
+// and callee-saved register size. There are a few things "missing" depending on the
+// platform. The function genCallerSPtoInitialSPdelta() includes those things.
+//
+// For ARM, this doesn't include the prespilled registers.
+//
+// For x86, this doesn't include the frame pointer if codeGen->isFramePointerUsed() is true.
+// It also doesn't include the pushed return address.
+//
+// Return value:
+// Frame size
+
+int CodeGenInterface::genTotalFrameSize()
+{
+ assert(!IsUninitialized(compiler->compCalleeRegsPushed));
+
+ int totalFrameSize = compiler->compCalleeRegsPushed * REGSIZE_BYTES + compiler->compLclFrameSize;
+
+ assert(totalFrameSize >= 0);
+ return totalFrameSize;
+}
+
+//---------------------------------------------------------------------
+// genSPtoFPdelta - return the offset from SP to the frame pointer.
+// This number is going to be positive, since SP must be at the lowest
+// address.
+//
+// There must be a frame pointer to call this function!
+
+int CodeGenInterface::genSPtoFPdelta()
+{
+ assert(isFramePointerUsed());
+
+ int delta;
+
+ delta = -genCallerSPtoInitialSPdelta() + genCallerSPtoFPdelta();
+
+ assert(delta >= 0);
+ return delta;
+}
+
+//---------------------------------------------------------------------
+// genCallerSPtoFPdelta - return the offset from Caller-SP to the frame pointer.
+// This number is going to be negative, since the Caller-SP is at a higher
+// address than the frame pointer.
+//
+// There must be a frame pointer to call this function!
+
+int CodeGenInterface::genCallerSPtoFPdelta()
+{
+ assert(isFramePointerUsed());
+ int callerSPtoFPdelta = 0;
+
+#if defined(_TARGET_ARM_)
+ // On ARM, we first push the prespill registers, then store LR, then R11 (FP), and point R11 at the saved R11.
+ callerSPtoFPdelta -= genCountBits(regSet.rsMaskPreSpillRegs(true)) * REGSIZE_BYTES;
+ callerSPtoFPdelta -= 2 * REGSIZE_BYTES;
+#elif defined(_TARGET_X86_)
+ // Thanks to ebp chaining, the difference between ebp-based addresses
+ // and caller-SP-relative addresses is just the 2 pointers:
+ // return address
+ // pushed ebp
+ callerSPtoFPdelta -= 2 * REGSIZE_BYTES;
+#else
+#error "Unknown _TARGET_"
+#endif // _TARGET_*
+
+ assert(callerSPtoFPdelta <= 0);
+ return callerSPtoFPdelta;
+}
+
+//---------------------------------------------------------------------
+// genCallerSPtoInitialSPdelta - return the offset from Caller-SP to Initial SP.
+//
+// This number will be negative.
+
+int CodeGenInterface::genCallerSPtoInitialSPdelta()
+{
+ int callerSPtoSPdelta = 0;
+
+#if defined(_TARGET_ARM_)
+ callerSPtoSPdelta -= genCountBits(regSet.rsMaskPreSpillRegs(true)) * REGSIZE_BYTES;
+ callerSPtoSPdelta -= genTotalFrameSize();
+#elif defined(_TARGET_X86_)
+ callerSPtoSPdelta -= genTotalFrameSize();
+ callerSPtoSPdelta -= REGSIZE_BYTES; // caller-pushed return address
+
+ // compCalleeRegsPushed does not account for the frame pointer
+ // TODO-Cleanup: shouldn't this be part of genTotalFrameSize?
+ if (isFramePointerUsed())
+ {
+ callerSPtoSPdelta -= REGSIZE_BYTES;
+ }
+#else
+#error "Unknown _TARGET_"
+#endif // _TARGET_*
+
+ assert(callerSPtoSPdelta <= 0);
+ return callerSPtoSPdelta;
+}
+
+#endif // defined(_TARGET_X86_) || defined(_TARGET_ARM_)
+
+/*****************************************************************************
+ * Should we round simple operations (assignments, arithmetic operations, etc.)
+ */
+
+// inline
+// static
+bool CodeGen::genShouldRoundFP()
+{
+ RoundLevel roundLevel = getRoundFloatLevel();
+
+ switch (roundLevel)
+ {
+ case ROUND_NEVER:
+ case ROUND_CMP_CONST:
+ case ROUND_CMP:
+ return false;
+
+ default:
+ assert(roundLevel == ROUND_ALWAYS);
+ return true;
+ }
+}
+
+/*****************************************************************************
+ *
+ * Initialize some global variables.
+ */
+
+void CodeGen::genPrepForCompiler()
+{
+ unsigned varNum;
+ LclVarDsc* varDsc;
+
+ /* Figure out which non-register variables hold pointers */
+
+ VarSetOps::AssignNoCopy(compiler, gcInfo.gcTrkStkPtrLcls, VarSetOps::MakeEmpty(compiler));
+
+ // Figure out which variables live in registers.
+ // Also, initialize gcTrkStkPtrLcls to include all tracked variables that do not fully live
+ // in a register (i.e. they live on the stack for all or part of their lifetime).
+ // Note that lvRegister indicates that a lclVar is in a register for its entire lifetime.
+
+ VarSetOps::AssignNoCopy(compiler, compiler->raRegVarsMask, VarSetOps::MakeEmpty(compiler));
+
+ for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++)
+ {
+ if (varDsc->lvTracked
+#ifndef LEGACY_BACKEND
+ || varDsc->lvIsRegCandidate()
+#endif // !LEGACY_BACKEND
+ )
+ {
+ if (varDsc->lvRegister
+#if FEATURE_STACK_FP_X87
+ && !varDsc->IsFloatRegType()
+#endif
+ )
+ {
+ VarSetOps::AddElemD(compiler, compiler->raRegVarsMask, varDsc->lvVarIndex);
+ }
+ else if (compiler->lvaIsGCTracked(varDsc) && (!varDsc->lvIsParam || varDsc->lvIsRegArg))
+ {
+ VarSetOps::AddElemD(compiler, gcInfo.gcTrkStkPtrLcls, varDsc->lvVarIndex);
+ }
+ }
+ }
+ VarSetOps::AssignNoCopy(compiler, genLastLiveSet, VarSetOps::MakeEmpty(compiler));
+ genLastLiveMask = RBM_NONE;
+#ifdef DEBUG
+ compiler->fgBBcountAtCodegen = compiler->fgBBcount;
+#endif
+}
+
+/*****************************************************************************
+ * To report exception handling information to the VM, we need the size of the exception
+ * handling regions. To compute that, we need to emit labels for the beginning block of
+ * an EH region, and the block that immediately follows a region. Go through the EH
+ * table and mark all these blocks with BBF_HAS_LABEL to make this happen.
+ *
+ * The beginning blocks of the EH regions already should have this flag set.
+ *
+ * No blocks should be added or removed after this.
+ *
+ * This code is closely couple with genReportEH() in the sense that any block
+ * that this procedure has determined it needs to have a label has to be selected
+ * using the same logic both here and in genReportEH(), so basically any time there is
+ * a change in the way we handle EH reporting, we have to keep the logic of these two
+ * methods 'in sync'.
+ */
+
+void CodeGen::genPrepForEHCodegen()
+{
+ assert(!compiler->fgSafeBasicBlockCreation);
+
+ EHblkDsc* HBtab;
+ EHblkDsc* HBtabEnd;
+
+ bool anyFinallys = false;
+
+ for (HBtab = compiler->compHndBBtab, HBtabEnd = compiler->compHndBBtab + compiler->compHndBBtabCount;
+ HBtab < HBtabEnd; HBtab++)
+ {
+ assert(HBtab->ebdTryBeg->bbFlags & BBF_HAS_LABEL);
+ assert(HBtab->ebdHndBeg->bbFlags & BBF_HAS_LABEL);
+
+ if (HBtab->ebdTryLast->bbNext != nullptr)
+ {
+ HBtab->ebdTryLast->bbNext->bbFlags |= BBF_HAS_LABEL;
+ }
+
+ if (HBtab->ebdHndLast->bbNext != nullptr)
+ {
+ HBtab->ebdHndLast->bbNext->bbFlags |= BBF_HAS_LABEL;
+ }
+
+ if (HBtab->HasFilter())
+ {
+ assert(HBtab->ebdFilter->bbFlags & BBF_HAS_LABEL);
+ // The block after the last block of the filter is
+ // the handler begin block, which we already asserted
+ // has BBF_HAS_LABEL set.
+ }
+
+#ifdef _TARGET_AMD64_
+ if (HBtab->HasFinallyHandler())
+ {
+ anyFinallys = true;
+ }
+#endif // _TARGET_AMD64_
+ }
+
+#ifdef _TARGET_AMD64_
+ if (anyFinallys)
+ {
+ for (BasicBlock* block = compiler->fgFirstBB; block != nullptr; block = block->bbNext)
+ {
+ if (block->bbJumpKind == BBJ_CALLFINALLY)
+ {
+ BasicBlock* bbToLabel = block->bbNext;
+ if (block->isBBCallAlwaysPair())
+ {
+ bbToLabel = bbToLabel->bbNext; // skip the BBJ_ALWAYS
+ }
+ if (bbToLabel != nullptr)
+ {
+ bbToLabel->bbFlags |= BBF_HAS_LABEL;
+ }
+ } // block is BBJ_CALLFINALLY
+ } // for each block
+ } // if (anyFinallys)
+#endif // _TARGET_AMD64_
+}
+
+void CodeGenInterface::genUpdateLife(GenTreePtr tree)
+{
+ compiler->compUpdateLife</*ForCodeGen*/ true>(tree);
+}
+
+void CodeGenInterface::genUpdateLife(VARSET_VALARG_TP newLife)
+{
+ compiler->compUpdateLife</*ForCodeGen*/ true>(newLife);
+}
+
+#ifdef LEGACY_BACKEND
+// Returns the liveSet after tree has executed.
+// "tree" MUST occur in the current statement, AFTER the most recent
+// update of compiler->compCurLifeTree and compiler->compCurLife.
+//
+VARSET_VALRET_TP CodeGen::genUpdateLiveSetForward(GenTreePtr tree)
+{
+ VARSET_TP VARSET_INIT(compiler, startLiveSet, compiler->compCurLife);
+ GenTreePtr startNode;
+ assert(tree != compiler->compCurLifeTree);
+ if (compiler->compCurLifeTree == nullptr)
+ {
+ assert(compiler->compCurStmt != nullptr);
+ startNode = compiler->compCurStmt->gtStmt.gtStmtList;
+ }
+ else
+ {
+ startNode = compiler->compCurLifeTree->gtNext;
+ }
+ return compiler->fgUpdateLiveSet(startLiveSet, startNode, tree);
+}
+
+// Determine the registers that are live after "second" has been evaluated,
+// but which are not live after "first".
+// PRECONDITIONS:
+// 1. "first" must occur after compiler->compCurLifeTree in execution order for the current statement
+// 2. "second" must occur after "first" in the current statement
+//
+regMaskTP CodeGen::genNewLiveRegMask(GenTreePtr first, GenTreePtr second)
+{
+ // First, compute the liveset after "first"
+ VARSET_TP firstLiveSet = genUpdateLiveSetForward(first);
+ // Now, update the set forward from "first" to "second"
+ VARSET_TP secondLiveSet = compiler->fgUpdateLiveSet(firstLiveSet, first->gtNext, second);
+ regMaskTP newLiveMask = genLiveMask(VarSetOps::Diff(compiler, secondLiveSet, firstLiveSet));
+ return newLiveMask;
+}
+#endif
+
+// Return the register mask for the given register variable
+// inline
+regMaskTP CodeGenInterface::genGetRegMask(const LclVarDsc* varDsc)
+{
+ regMaskTP regMask = RBM_NONE;
+
+ assert(varDsc->lvIsInReg());
+
+ if (varTypeIsFloating(varDsc->TypeGet()))
+ {
+ regMask = genRegMaskFloat(varDsc->lvRegNum, varDsc->TypeGet());
+ }
+ else
+ {
+ regMask = genRegMask(varDsc->lvRegNum);
+ if (isRegPairType(varDsc->lvType))
+ {
+ regMask |= genRegMask(varDsc->lvOtherReg);
+ }
+ }
+ return regMask;
+}
+
+// Return the register mask for the given lclVar or regVar tree node
+// inline
+regMaskTP CodeGenInterface::genGetRegMask(GenTreePtr tree)
+{
+ assert(tree->gtOper == GT_LCL_VAR || tree->gtOper == GT_REG_VAR);
+
+ regMaskTP regMask = RBM_NONE;
+ const LclVarDsc* varDsc = compiler->lvaTable + tree->gtLclVarCommon.gtLclNum;
+ if (varDsc->lvPromoted)
+ {
+ for (unsigned i = varDsc->lvFieldLclStart; i < varDsc->lvFieldLclStart + varDsc->lvFieldCnt; ++i)
+ {
+ noway_assert(compiler->lvaTable[i].lvIsStructField);
+ if (compiler->lvaTable[i].lvIsInReg())
+ {
+ regMask |= genGetRegMask(&compiler->lvaTable[i]);
+ }
+ }
+ }
+ else if (varDsc->lvIsInReg())
+ {
+ regMask = genGetRegMask(varDsc);
+ }
+ return regMask;
+}
+
+//------------------------------------------------------------------------
+// getRegistersFromMask: Given a register mask return the two registers
+// specified by the mask.
+//
+// Arguments:
+// regPairMask: a register mask that has exactly two bits set
+// Return values:
+// pLoReg: the address of where to write the first register
+// pHiReg: the address of where to write the second register
+//
+void CodeGenInterface::genGetRegPairFromMask(regMaskTP regPairMask, regNumber* pLoReg, regNumber* pHiReg)
+{
+ assert(genCountBits(regPairMask) == 2);
+
+ regMaskTP loMask = genFindLowestBit(regPairMask); // set loMask to a one-bit mask
+ regMaskTP hiMask = regPairMask - loMask; // set hiMask to the other bit that was in tmpRegMask
+
+ regNumber loReg = genRegNumFromMask(loMask); // set loReg from loMask
+ regNumber hiReg = genRegNumFromMask(hiMask); // set hiReg from hiMask
+
+ *pLoReg = loReg;
+ *pHiReg = hiReg;
+}
+
+// The given lclVar is either going live (being born) or dying.
+// It might be both going live and dying (that is, it is a dead store) under MinOpts.
+// Update regSet.rsMaskVars accordingly.
+// inline
+void CodeGenInterface::genUpdateRegLife(const LclVarDsc* varDsc, bool isBorn, bool isDying DEBUGARG(GenTreePtr tree))
+{
+#if FEATURE_STACK_FP_X87
+ // The stack fp reg vars are handled elsewhere
+ if (varTypeIsFloating(varDsc->TypeGet()))
+ return;
+#endif
+
+ regMaskTP regMask = genGetRegMask(varDsc);
+
+#ifdef DEBUG
+ if (compiler->verbose)
+ {
+ printf("\t\t\t\t\t\t\tV%02u in reg ", (varDsc - compiler->lvaTable));
+ varDsc->PrintVarReg();
+ printf(" is becoming %s ", (isDying) ? "dead" : "live");
+ Compiler::printTreeID(tree);
+ printf("\n");
+ }
+#endif // DEBUG
+
+ if (isDying)
+ {
+ // We'd like to be able to assert the following, however if we are walking
+ // through a qmark/colon tree, we may encounter multiple last-use nodes.
+ // assert((regSet.rsMaskVars & regMask) == regMask);
+ regSet.RemoveMaskVars(regMask);
+ }
+ else
+ {
+ assert((regSet.rsMaskVars & regMask) == 0);
+ regSet.AddMaskVars(regMask);
+ }
+}
+
+// Gets a register mask that represent the kill set for a helper call since
+// not all JIT Helper calls follow the standard ABI on the target architecture.
+//
+// TODO-CQ: Currently this list is incomplete (not all helpers calls are
+// enumerated) and not 100% accurate (some killsets are bigger than
+// what they really are).
+// There's some work to be done in several places in the JIT to
+// accurately track the registers that are getting killed by
+// helper calls:
+// a) LSRA needs several changes to accomodate more precise killsets
+// for every helper call it sees (both explicitly [easy] and
+// implicitly [hard])
+// b) Currently for AMD64, when we generate code for a helper call
+// we're independently over-pessimizing the killsets of the call
+// (independently from LSRA) and this needs changes
+// both in CodeGenAmd64.cpp and emitx86.cpp.
+//
+// The best solution for this problem would be to try to centralize
+// the killset information in a single place but then make the
+// corresponding changes so every code generation phase is in sync
+// about this.
+//
+// The interim solution is to only add known helper calls that don't
+// follow the AMD64 ABI and actually trash registers that are supposed to be non-volatile.
+regMaskTP Compiler::compHelperCallKillSet(CorInfoHelpFunc helper)
+{
+ switch (helper)
+ {
+ case CORINFO_HELP_ASSIGN_BYREF:
+#if defined(_TARGET_AMD64_)
+ return RBM_RSI | RBM_RDI | RBM_CALLEE_TRASH;
+#elif defined(_TARGET_ARM64_)
+ return RBM_CALLEE_TRASH_NOGC;
+#else
+ NYI("Model kill set for CORINFO_HELP_ASSIGN_BYREF on target arch");
+ return RBM_CALLEE_TRASH;
+#endif
+
+ case CORINFO_HELP_PROF_FCN_ENTER:
+#ifdef _TARGET_AMD64_
+ return RBM_PROFILER_ENTER_TRASH;
+#else
+ unreached();
+#endif
+ case CORINFO_HELP_PROF_FCN_LEAVE:
+ case CORINFO_HELP_PROF_FCN_TAILCALL:
+#ifdef _TARGET_AMD64_
+ return RBM_PROFILER_LEAVE_TRASH;
+#else
+ unreached();
+#endif
+
+ case CORINFO_HELP_STOP_FOR_GC:
+ return RBM_STOP_FOR_GC_TRASH;
+
+ case CORINFO_HELP_INIT_PINVOKE_FRAME:
+ return RBM_INIT_PINVOKE_FRAME_TRASH;
+
+ default:
+ return RBM_CALLEE_TRASH;
+ }
+}
+
+//
+// Gets a register mask that represents the kill set for "NO GC" helper calls since
+// not all JIT Helper calls follow the standard ABI on the target architecture.
+//
+// Note: This list may not be complete and defaults to the default NOGC registers.
+//
+regMaskTP Compiler::compNoGCHelperCallKillSet(CorInfoHelpFunc helper)
+{
+ assert(emitter::emitNoGChelper(helper));
+#ifdef _TARGET_AMD64_
+ switch (helper)
+ {
+ case CORINFO_HELP_PROF_FCN_ENTER:
+ return RBM_PROFILER_ENTER_TRASH;
+
+ case CORINFO_HELP_PROF_FCN_LEAVE:
+ case CORINFO_HELP_PROF_FCN_TAILCALL:
+ return RBM_PROFILER_LEAVE_TRASH;
+
+ case CORINFO_HELP_ASSIGN_BYREF:
+ // this helper doesn't trash RSI and RDI
+ return RBM_CALLEE_TRASH_NOGC & ~(RBM_RSI | RBM_RDI);
+
+ default:
+ return RBM_CALLEE_TRASH_NOGC;
+ }
+#else
+ return RBM_CALLEE_TRASH_NOGC;
+#endif
+}
+
+// Update liveness (always var liveness, i.e., compCurLife, and also, if "ForCodeGen" is true, reg liveness, i.e.,
+// regSet.rsMaskVars as well)
+// if the given lclVar (or indir(addr(local)))/regVar node is going live (being born) or dying.
+template <bool ForCodeGen>
+void Compiler::compUpdateLifeVar(GenTreePtr tree, VARSET_TP* pLastUseVars)
+{
+ GenTreePtr indirAddrLocal = fgIsIndirOfAddrOfLocal(tree);
+ assert(tree->OperIsNonPhiLocal() || indirAddrLocal != nullptr);
+
+ // Get the local var tree -- if "tree" is "Ldobj(addr(x))", or "ind(addr(x))" this is "x", else it's "tree".
+ GenTreePtr lclVarTree = indirAddrLocal;
+ if (lclVarTree == nullptr)
+ {
+ lclVarTree = tree;
+ }
+ unsigned int lclNum = lclVarTree->gtLclVarCommon.gtLclNum;
+ LclVarDsc* varDsc = lvaTable + lclNum;
+
+#ifdef DEBUG
+#if !defined(_TARGET_AMD64_)
+ // There are no addr nodes on ARM and we are experimenting with encountering vars in 'random' order.
+ // Struct fields are not traversed in a consistent order, so ignore them when
+ // verifying that we see the var nodes in execution order
+ if (ForCodeGen)
+ {
+ if (tree->OperIsIndir())
+ {
+ assert(indirAddrLocal != NULL);
+ }
+ else if (tree->gtNext != NULL && tree->gtNext->gtOper == GT_ADDR &&
+ ((tree->gtNext->gtNext == NULL || !tree->gtNext->gtNext->OperIsIndir())))
+ {
+ assert(tree->IsLocal()); // Can only take the address of a local.
+ // The ADDR might occur in a context where the address it contributes is eventually
+ // dereferenced, so we can't say that this is not a use or def.
+ }
+#if 0
+ // TODO-ARM64-Bug?: These asserts don't seem right for ARM64: I don't understand why we have to assert
+ // two consecutive lclvars (in execution order) can only be observed if the first one is a struct field.
+ // It seems to me this is code only applicable to the legacy JIT and not RyuJIT (and therefore why it was
+ // ifdef'ed out for AMD64).
+ else if (!varDsc->lvIsStructField)
+ {
+ GenTreePtr prevTree;
+ for (prevTree = tree->gtPrev;
+ prevTree != NULL && prevTree != compCurLifeTree;
+ prevTree = prevTree->gtPrev)
+ {
+ if ((prevTree->gtOper == GT_LCL_VAR) || (prevTree->gtOper == GT_REG_VAR))
+ {
+ LclVarDsc * prevVarDsc = lvaTable + prevTree->gtLclVarCommon.gtLclNum;
+
+ // These are the only things for which this method MUST be called
+ assert(prevVarDsc->lvIsStructField);
+ }
+ }
+ assert(prevTree == compCurLifeTree);
+ }
+#endif // 0
+ }
+#endif // !_TARGET_AMD64_
+#endif // DEBUG
+
+ compCurLifeTree = tree;
+ VARSET_TP VARSET_INIT(this, newLife, compCurLife);
+
+ // By codegen, a struct may not be TYP_STRUCT, so we have to
+ // check lvPromoted, for the case where the fields are being
+ // tracked.
+ if (!varDsc->lvTracked && !varDsc->lvPromoted)
+ {
+ return;
+ }
+
+ bool isBorn = ((tree->gtFlags & GTF_VAR_DEF) != 0 && (tree->gtFlags & GTF_VAR_USEASG) == 0); // if it's "x <op>=
+ // ..." then variable
+ // "x" must have had a
+ // previous, original,
+ // site to be born.
+ bool isDying = ((tree->gtFlags & GTF_VAR_DEATH) != 0);
+#ifndef LEGACY_BACKEND
+ bool spill = ((tree->gtFlags & GTF_SPILL) != 0);
+#endif // !LEGACY_BACKEND
+
+#ifndef LEGACY_BACKEND
+ // For RyuJIT backend, since all tracked vars are register candidates, but not all are in registers at all times,
+ // we maintain two separate sets of variables - the total set of variables that are either
+ // born or dying here, and the subset of those that are on the stack
+ VARSET_TP VARSET_INIT_NOCOPY(stackVarDeltaSet, VarSetOps::MakeEmpty(this));
+#endif // !LEGACY_BACKEND
+
+ if (isBorn || isDying)
+ {
+ bool hasDeadTrackedFieldVars = false; // If this is true, then, for a LDOBJ(ADDR(<promoted struct local>)),
+ VARSET_TP* deadTrackedFieldVars =
+ nullptr; // *deadTrackedFieldVars indicates which tracked field vars are dying.
+ VARSET_TP VARSET_INIT_NOCOPY(varDeltaSet, VarSetOps::MakeEmpty(this));
+
+ if (varDsc->lvTracked)
+ {
+ VarSetOps::AddElemD(this, varDeltaSet, varDsc->lvVarIndex);
+ if (ForCodeGen)
+ {
+#ifndef LEGACY_BACKEND
+ if (isBorn && varDsc->lvIsRegCandidate() && tree->gtHasReg())
+ {
+ codeGen->genUpdateVarReg(varDsc, tree);
+ }
+#endif // !LEGACY_BACKEND
+ if (varDsc->lvIsInReg()
+#ifndef LEGACY_BACKEND
+ && tree->gtRegNum != REG_NA
+#endif // !LEGACY_BACKEND
+ )
+ {
+ codeGen->genUpdateRegLife(varDsc, isBorn, isDying DEBUGARG(tree));
+ }
+#ifndef LEGACY_BACKEND
+ else
+ {
+ VarSetOps::AddElemD(this, stackVarDeltaSet, varDsc->lvVarIndex);
+ }
+#endif // !LEGACY_BACKEND
+ }
+ }
+ else if (varDsc->lvPromoted)
+ {
+ if (indirAddrLocal != nullptr && isDying)
+ {
+ assert(!isBorn); // GTF_VAR_DEATH only set for LDOBJ last use.
+ hasDeadTrackedFieldVars = GetPromotedStructDeathVars()->Lookup(indirAddrLocal, &deadTrackedFieldVars);
+ if (hasDeadTrackedFieldVars)
+ {
+ VarSetOps::Assign(this, varDeltaSet, *deadTrackedFieldVars);
+ }
+ }
+
+ for (unsigned i = varDsc->lvFieldLclStart; i < varDsc->lvFieldLclStart + varDsc->lvFieldCnt; ++i)
+ {
+ LclVarDsc* fldVarDsc = &(lvaTable[i]);
+ noway_assert(fldVarDsc->lvIsStructField);
+ if (fldVarDsc->lvTracked)
+ {
+ unsigned fldVarIndex = fldVarDsc->lvVarIndex;
+ noway_assert(fldVarIndex < lvaTrackedCount);
+ if (!hasDeadTrackedFieldVars)
+ {
+ VarSetOps::AddElemD(this, varDeltaSet, fldVarIndex);
+ if (ForCodeGen)
+ {
+ // We repeat this call here and below to avoid the VarSetOps::IsMember
+ // test in this, the common case, where we have no deadTrackedFieldVars.
+ if (fldVarDsc->lvIsInReg())
+ {
+#ifndef LEGACY_BACKEND
+ if (isBorn)
+ {
+ codeGen->genUpdateVarReg(fldVarDsc, tree);
+ }
+#endif // !LEGACY_BACKEND
+ codeGen->genUpdateRegLife(fldVarDsc, isBorn, isDying DEBUGARG(tree));
+ }
+#ifndef LEGACY_BACKEND
+ else
+ {
+ VarSetOps::AddElemD(this, stackVarDeltaSet, fldVarIndex);
+ }
+#endif // !LEGACY_BACKEND
+ }
+ }
+ else if (ForCodeGen && VarSetOps::IsMember(this, varDeltaSet, fldVarIndex))
+ {
+ if (lvaTable[i].lvIsInReg())
+ {
+#ifndef LEGACY_BACKEND
+ if (isBorn)
+ {
+ codeGen->genUpdateVarReg(fldVarDsc, tree);
+ }
+#endif // !LEGACY_BACKEND
+ codeGen->genUpdateRegLife(fldVarDsc, isBorn, isDying DEBUGARG(tree));
+ }
+#ifndef LEGACY_BACKEND
+ else
+ {
+ VarSetOps::AddElemD(this, stackVarDeltaSet, fldVarIndex);
+ }
+#endif // !LEGACY_BACKEND
+ }
+ }
+ }
+ }
+
+ // First, update the live set
+ if (isDying)
+ {
+ // We'd like to be able to assert the following, however if we are walking
+ // through a qmark/colon tree, we may encounter multiple last-use nodes.
+ // assert (VarSetOps::IsSubset(compiler, regVarDeltaSet, newLife));
+ VarSetOps::DiffD(this, newLife, varDeltaSet);
+ if (pLastUseVars != nullptr)
+ {
+ VarSetOps::Assign(this, *pLastUseVars, varDeltaSet);
+ }
+ }
+ else
+ {
+ // This shouldn't be in newLife, unless this is debug code, in which
+ // case we keep vars live everywhere, OR the variable is address-exposed,
+ // OR this block is part of a try block, in which case it may be live at the handler
+ // Could add a check that, if it's in newLife, that it's also in
+ // fgGetHandlerLiveVars(compCurBB), but seems excessive
+ //
+ // For a dead store, it can be the case that we set both isBorn and isDying to true.
+ // (We don't eliminate dead stores under MinOpts, so we can't assume they're always
+ // eliminated.) If it's both, we handled it above.
+ VarSetOps::UnionD(this, newLife, varDeltaSet);
+ }
+ }
+
+ if (!VarSetOps::Equal(this, compCurLife, newLife))
+ {
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\t\t\t\t\t\t\tLive vars: ");
+ dumpConvertedVarSet(this, compCurLife);
+ printf(" => ");
+ dumpConvertedVarSet(this, newLife);
+ printf("\n");
+ }
+#endif // DEBUG
+
+ VarSetOps::Assign(this, compCurLife, newLife);
+
+ if (ForCodeGen)
+ {
+#ifndef LEGACY_BACKEND
+
+ // Only add vars to the gcInfo.gcVarPtrSetCur if they are currently on stack, since the
+ // gcInfo.gcTrkStkPtrLcls
+ // includes all TRACKED vars that EVER live on the stack (i.e. are not always in a register).
+ VARSET_TP VARSET_INIT_NOCOPY(gcTrkStkDeltaSet,
+ VarSetOps::Intersection(this, codeGen->gcInfo.gcTrkStkPtrLcls,
+ stackVarDeltaSet));
+ if (!VarSetOps::IsEmpty(this, gcTrkStkDeltaSet))
+ {
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\t\t\t\t\t\t\tGCvars: ");
+ dumpConvertedVarSet(this, codeGen->gcInfo.gcVarPtrSetCur);
+ printf(" => ");
+ }
+#endif // DEBUG
+
+ if (isBorn)
+ {
+ VarSetOps::UnionD(this, codeGen->gcInfo.gcVarPtrSetCur, gcTrkStkDeltaSet);
+ }
+ else
+ {
+ VarSetOps::DiffD(this, codeGen->gcInfo.gcVarPtrSetCur, gcTrkStkDeltaSet);
+ }
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ dumpConvertedVarSet(this, codeGen->gcInfo.gcVarPtrSetCur);
+ printf("\n");
+ }
+#endif // DEBUG
+ }
+
+#else // LEGACY_BACKEND
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ VARSET_TP VARSET_INIT_NOCOPY(gcVarPtrSetNew,
+ VarSetOps::Intersection(this, newLife, codeGen->gcInfo.gcTrkStkPtrLcls));
+ if (!VarSetOps::Equal(this, codeGen->gcInfo.gcVarPtrSetCur, gcVarPtrSetNew))
+ {
+ printf("\t\t\t\t\t\t\tGCvars: ");
+ dumpConvertedVarSet(this, codeGen->gcInfo.gcVarPtrSetCur);
+ printf(" => ");
+ dumpConvertedVarSet(this, gcVarPtrSetNew);
+ printf("\n");
+ }
+ }
+#endif // DEBUG
+
+ VarSetOps::AssignNoCopy(this, codeGen->gcInfo.gcVarPtrSetCur,
+ VarSetOps::Intersection(this, newLife, codeGen->gcInfo.gcTrkStkPtrLcls));
+
+#endif // LEGACY_BACKEND
+
+#ifdef DEBUGGING_SUPPORT
+ codeGen->siUpdate();
+#endif
+ }
+ }
+
+#ifndef LEGACY_BACKEND
+ if (ForCodeGen && spill)
+ {
+ assert(!varDsc->lvPromoted);
+ codeGen->genSpillVar(tree);
+ if (VarSetOps::IsMember(this, codeGen->gcInfo.gcTrkStkPtrLcls, varDsc->lvVarIndex))
+ {
+ if (!VarSetOps::IsMember(this, codeGen->gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex))
+ {
+ VarSetOps::AddElemD(this, codeGen->gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex);
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\t\t\t\t\t\t\tVar V%02u becoming live\n", varDsc - lvaTable);
+ }
+#endif // DEBUG
+ }
+ }
+ }
+#endif // !LEGACY_BACKEND
+}
+
+// Need an explicit instantiation.
+template void Compiler::compUpdateLifeVar<false>(GenTreePtr tree, VARSET_TP* pLastUseVars);
+
+template <bool ForCodeGen>
+void Compiler::compChangeLife(VARSET_VALARG_TP newLife DEBUGARG(GenTreePtr tree))
+{
+ LclVarDsc* varDsc;
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ if (tree != nullptr)
+ {
+ Compiler::printTreeID(tree);
+ }
+ printf("Change life %s ", VarSetOps::ToString(this, compCurLife));
+ dumpConvertedVarSet(this, compCurLife);
+ printf(" -> %s ", VarSetOps::ToString(this, newLife));
+ dumpConvertedVarSet(this, newLife);
+ printf("\n");
+ }
+#endif // DEBUG
+
+ /* We should only be called when the live set has actually changed */
+
+ noway_assert(!VarSetOps::Equal(this, compCurLife, newLife));
+
+ if (!ForCodeGen)
+ {
+ VarSetOps::Assign(this, compCurLife, newLife);
+ return;
+ }
+
+ /* Figure out which variables are becoming live/dead at this point */
+
+ // deadSet = compCurLife - newLife
+ VARSET_TP VARSET_INIT(this, deadSet, compCurLife);
+ VarSetOps::DiffD(this, deadSet, newLife);
+
+ // bornSet = newLife - compCurLife
+ VARSET_TP VARSET_INIT(this, bornSet, newLife);
+ VarSetOps::DiffD(this, bornSet, compCurLife);
+
+ /* Can't simultaneously become live and dead at the same time */
+
+ // (deadSet UNION bornSet) != EMPTY
+ noway_assert(!VarSetOps::IsEmpty(this, VarSetOps::Union(this, deadSet, bornSet)));
+ // (deadSet INTERSECTION bornSet) == EMPTY
+ noway_assert(VarSetOps::IsEmpty(this, VarSetOps::Intersection(this, deadSet, bornSet)));
+
+#ifdef LEGACY_BACKEND
+ // In the LEGACY_BACKEND case, we only consider variables that are fully enregisterd
+ // and there may be none.
+ VarSetOps::IntersectionD(this, deadSet, raRegVarsMask);
+ VarSetOps::IntersectionD(this, bornSet, raRegVarsMask);
+ // And all gcTrkStkPtrLcls that are now live will be on the stack
+ VarSetOps::AssignNoCopy(this, codeGen->gcInfo.gcVarPtrSetCur,
+ VarSetOps::Intersection(this, newLife, codeGen->gcInfo.gcTrkStkPtrLcls));
+#endif // LEGACY_BACKEND
+
+ VarSetOps::Assign(this, compCurLife, newLife);
+
+ // Handle the dying vars first, then the newly live vars.
+ // This is because, in the RyuJIT backend case, they may occupy registers that
+ // will be occupied by another var that is newly live.
+ VARSET_ITER_INIT(this, deadIter, deadSet, deadVarIndex);
+ while (deadIter.NextElem(this, &deadVarIndex))
+ {
+ unsigned varNum = lvaTrackedToVarNum[deadVarIndex];
+ varDsc = lvaTable + varNum;
+ bool isGCRef = (varDsc->TypeGet() == TYP_REF);
+ bool isByRef = (varDsc->TypeGet() == TYP_BYREF);
+
+ if (varDsc->lvIsInReg())
+ {
+ // TODO-Cleanup: Move the code from compUpdateLifeVar to genUpdateRegLife that updates the
+ // gc sets
+ regMaskTP regMask = varDsc->lvRegMask();
+ if (isGCRef)
+ {
+ codeGen->gcInfo.gcRegGCrefSetCur &= ~regMask;
+ }
+ else if (isByRef)
+ {
+ codeGen->gcInfo.gcRegByrefSetCur &= ~regMask;
+ }
+ codeGen->genUpdateRegLife(varDsc, false /*isBorn*/, true /*isDying*/ DEBUGARG(tree));
+ }
+#ifndef LEGACY_BACKEND
+ // This isn't in a register, so update the gcVarPtrSetCur.
+ // (Note that in the LEGACY_BACKEND case gcVarPtrSetCur is updated above unconditionally
+ // for all gcTrkStkPtrLcls in newLife, because none of them ever live in a register.)
+ else if (isGCRef || isByRef)
+ {
+ VarSetOps::RemoveElemD(this, codeGen->gcInfo.gcVarPtrSetCur, deadVarIndex);
+ JITDUMP("\t\t\t\t\t\t\tV%02u becoming dead\n", varNum);
+ }
+#endif // !LEGACY_BACKEND
+ }
+
+ VARSET_ITER_INIT(this, bornIter, bornSet, bornVarIndex);
+ while (bornIter.NextElem(this, &bornVarIndex))
+ {
+ unsigned varNum = lvaTrackedToVarNum[bornVarIndex];
+ varDsc = lvaTable + varNum;
+ bool isGCRef = (varDsc->TypeGet() == TYP_REF);
+ bool isByRef = (varDsc->TypeGet() == TYP_BYREF);
+
+ if (varDsc->lvIsInReg())
+ {
+#ifndef LEGACY_BACKEND
+#ifdef DEBUG
+ if (VarSetOps::IsMember(this, codeGen->gcInfo.gcVarPtrSetCur, bornVarIndex))
+ {
+ JITDUMP("\t\t\t\t\t\t\tRemoving V%02u from gcVarPtrSetCur\n", varNum);
+ }
+#endif // DEBUG
+ VarSetOps::RemoveElemD(this, codeGen->gcInfo.gcVarPtrSetCur, bornVarIndex);
+#endif // !LEGACY_BACKEND
+ codeGen->genUpdateRegLife(varDsc, true /*isBorn*/, false /*isDying*/ DEBUGARG(tree));
+ regMaskTP regMask = varDsc->lvRegMask();
+ if (isGCRef)
+ {
+ codeGen->gcInfo.gcRegGCrefSetCur |= regMask;
+ }
+ else if (isByRef)
+ {
+ codeGen->gcInfo.gcRegByrefSetCur |= regMask;
+ }
+ }
+#ifndef LEGACY_BACKEND
+ // This isn't in a register, so update the gcVarPtrSetCur
+ else if (lvaIsGCTracked(varDsc))
+ {
+ VarSetOps::AddElemD(this, codeGen->gcInfo.gcVarPtrSetCur, bornVarIndex);
+ JITDUMP("\t\t\t\t\t\t\tV%02u becoming live\n", varNum);
+ }
+#endif // !LEGACY_BACKEND
+ }
+
+#ifdef DEBUGGING_SUPPORT
+ codeGen->siUpdate();
+#endif
+}
+
+// Need an explicit instantiation.
+template void Compiler::compChangeLife<true>(VARSET_VALARG_TP newLife DEBUGARG(GenTreePtr tree));
+
+#ifdef LEGACY_BACKEND
+
+/*****************************************************************************
+ *
+ * Get the mask of integer registers that contain 'live' enregistered
+ * local variables after "tree".
+ *
+ * The output is the mask of integer registers that are currently
+ * alive and holding the enregistered local variables.
+ */
+regMaskTP CodeGenInterface::genLiveMask(GenTreePtr tree)
+{
+ regMaskTP liveMask = regSet.rsMaskVars;
+
+ GenTreePtr nextNode;
+ if (compiler->compCurLifeTree == nullptr)
+ {
+ assert(compiler->compCurStmt != nullptr);
+ nextNode = compiler->compCurStmt->gtStmt.gtStmtList;
+ }
+ else
+ {
+ nextNode = compiler->compCurLifeTree->gtNext;
+ }
+
+ // Theoretically, we should always be able to find "tree" by walking
+ // forward in execution order. But unfortunately, there is at least
+ // one case (addressing) where a node may be evaluated out of order
+ // So, we have to handle that case
+ bool outOfOrder = false;
+ for (; nextNode != tree->gtNext; nextNode = nextNode->gtNext)
+ {
+ if (nextNode == nullptr)
+ {
+ outOfOrder = true;
+ break;
+ }
+ if (nextNode->gtOper == GT_LCL_VAR || nextNode->gtOper == GT_REG_VAR)
+ {
+ bool isBorn = ((tree->gtFlags & GTF_VAR_DEF) != 0 && (tree->gtFlags & GTF_VAR_USEASG) == 0);
+ bool isDying = ((nextNode->gtFlags & GTF_VAR_DEATH) != 0);
+ if (isBorn || isDying)
+ {
+ regMaskTP regMask = genGetRegMask(nextNode);
+ if (regMask != RBM_NONE)
+ {
+ if (isBorn)
+ {
+ liveMask |= regMask;
+ }
+ else
+ {
+ liveMask &= ~(regMask);
+ }
+ }
+ }
+ }
+ }
+ if (outOfOrder)
+ {
+ assert(compiler->compCurLifeTree != nullptr);
+ liveMask = regSet.rsMaskVars;
+ // We were unable to find "tree" by traversing forward. We must now go
+ // backward from compiler->compCurLifeTree instead. We have to start with compiler->compCurLifeTree,
+ // since regSet.rsMaskVars reflects its completed execution
+ for (nextNode = compiler->compCurLifeTree; nextNode != tree; nextNode = nextNode->gtPrev)
+ {
+ assert(nextNode != nullptr);
+
+ if (nextNode->gtOper == GT_LCL_VAR || nextNode->gtOper == GT_REG_VAR)
+ {
+ bool isBorn = ((tree->gtFlags & GTF_VAR_DEF) != 0 && (tree->gtFlags & GTF_VAR_USEASG) == 0);
+ bool isDying = ((nextNode->gtFlags & GTF_VAR_DEATH) != 0);
+ if (isBorn || isDying)
+ {
+ regMaskTP regMask = genGetRegMask(nextNode);
+ if (regMask != RBM_NONE)
+ {
+ // We're going backward - so things born are removed
+ // and vice versa
+ if (isBorn)
+ {
+ liveMask &= ~(regMask);
+ }
+ else
+ {
+ liveMask |= regMask;
+ }
+ }
+ }
+ }
+ }
+ }
+ return liveMask;
+}
+
+/*****************************************************************************
+ *
+ * Get the mask of integer registers that contain 'live' enregistered
+ * local variables.
+
+ * The input is a liveSet which contains a set of local
+ * variables that are currently alive
+ *
+ * The output is the mask of x86 integer registers that are currently
+ * alive and holding the enregistered local variables
+ */
+
+regMaskTP CodeGenInterface::genLiveMask(VARSET_VALARG_TP liveSet)
+{
+ // Check for the zero LiveSet mask
+ if (VarSetOps::IsEmpty(compiler, liveSet))
+ {
+ return RBM_NONE;
+ }
+
+ // set if our liveSet matches the one we have cached: genLastLiveSet -> genLastLiveMask
+ if (VarSetOps::Equal(compiler, liveSet, genLastLiveSet))
+ {
+ return genLastLiveMask;
+ }
+
+ regMaskTP liveMask = 0;
+
+ VARSET_ITER_INIT(compiler, iter, liveSet, varIndex);
+ while (iter.NextElem(compiler, &varIndex))
+ {
+
+ // If the variable is not enregistered, then it can't contribute to the liveMask
+ if (!VarSetOps::IsMember(compiler, compiler->raRegVarsMask, varIndex))
+ {
+ continue;
+ }
+
+ // Find the variable in compiler->lvaTable
+ unsigned varNum = compiler->lvaTrackedToVarNum[varIndex];
+ LclVarDsc* varDsc = compiler->lvaTable + varNum;
+
+#if !FEATURE_FP_REGALLOC
+ // If the variable is a floating point type, then it can't contribute to the liveMask
+ if (varDsc->IsFloatRegType())
+ {
+ continue;
+ }
+#endif
+
+ noway_assert(compiler->lvaTable[varNum].lvRegister);
+ regMaskTP regBit;
+
+ if (varTypeIsFloating(varDsc->TypeGet()))
+ {
+ regBit = genRegMaskFloat(varDsc->lvRegNum, varDsc->TypeGet());
+ }
+ else
+ {
+ regBit = genRegMask(varDsc->lvRegNum);
+
+ // For longs we may have two regs
+ if (isRegPairType(varDsc->lvType) && varDsc->lvOtherReg != REG_STK)
+ {
+ regBit |= genRegMask(varDsc->lvOtherReg);
+ }
+ }
+
+ noway_assert(regBit != 0);
+
+ // We should not already have any of these bits set
+ noway_assert((liveMask & regBit) == 0);
+
+ // Update the liveMask with the register bits that are live
+ liveMask |= regBit;
+ }
+
+ // cache the last mapping between gtLiveSet -> liveMask
+ VarSetOps::Assign(compiler, genLastLiveSet, liveSet);
+ genLastLiveMask = liveMask;
+
+ return liveMask;
+}
+
+#endif
+
+/*****************************************************************************
+ *
+ * Generate a spill.
+ */
+void CodeGenInterface::spillReg(var_types type, TempDsc* tmp, regNumber reg)
+{
+ getEmitter()->emitIns_S_R(ins_Store(type), emitActualTypeSize(type), reg, tmp->tdTempNum(), 0);
+}
+
+/*****************************************************************************
+ *
+ * Generate a reload.
+ */
+void CodeGenInterface::reloadReg(var_types type, TempDsc* tmp, regNumber reg)
+{
+ getEmitter()->emitIns_R_S(ins_Load(type), emitActualTypeSize(type), reg, tmp->tdTempNum(), 0);
+}
+
+#ifdef LEGACY_BACKEND
+#if defined(_TARGET_ARM_) || defined(_TARGET_AMD64_)
+void CodeGenInterface::reloadFloatReg(var_types type, TempDsc* tmp, regNumber reg)
+{
+ var_types tmpType = tmp->tdTempType();
+ getEmitter()->emitIns_R_S(ins_FloatLoad(type), emitActualTypeSize(tmpType), reg, tmp->tdTempNum(), 0);
+}
+#endif
+#endif // LEGACY_BACKEND
+
+// inline
+regNumber CodeGenInterface::genGetThisArgReg(GenTreePtr call)
+{
+ noway_assert(call->IsCall());
+ return REG_ARG_0;
+}
+
+//----------------------------------------------------------------------
+// getSpillTempDsc: get the TempDsc corresponding to a spilled tree.
+//
+// Arguments:
+// tree - spilled GenTree node
+//
+// Return Value:
+// TempDsc corresponding to tree
+TempDsc* CodeGenInterface::getSpillTempDsc(GenTree* tree)
+{
+ // tree must be in spilled state.
+ assert((tree->gtFlags & GTF_SPILLED) != 0);
+
+ // Get the tree's SpillDsc.
+ RegSet::SpillDsc* prevDsc;
+ RegSet::SpillDsc* spillDsc = regSet.rsGetSpillInfo(tree, tree->gtRegNum, &prevDsc);
+ assert(spillDsc != nullptr);
+
+ // Get the temp desc.
+ TempDsc* temp = regSet.rsGetSpillTempWord(tree->gtRegNum, spillDsc, prevDsc);
+ return temp;
+}
+
+#ifdef _TARGET_XARCH_
+
+#ifdef _TARGET_AMD64_
+// Returns relocation type hint for an addr.
+// Note that there are no reloc hints on x86.
+//
+// Arguments
+// addr - data address
+//
+// Returns
+// relocation type hint
+//
+unsigned short CodeGenInterface::genAddrRelocTypeHint(size_t addr)
+{
+ return compiler->eeGetRelocTypeHint((void*)addr);
+}
+#endif //_TARGET_AMD64_
+
+// Return true if an absolute indirect data address can be encoded as IP-relative.
+// offset. Note that this method should be used only when the caller knows that
+// the address is an icon value that VM has given and there is no GenTree node
+// representing it. Otherwise, one should always use FitsInAddrBase().
+//
+// Arguments
+// addr - an absolute indirect data address
+//
+// Returns
+// true if indir data addr could be encoded as IP-relative offset.
+//
+bool CodeGenInterface::genDataIndirAddrCanBeEncodedAsPCRelOffset(size_t addr)
+{
+#ifdef _TARGET_AMD64_
+ return genAddrRelocTypeHint(addr) == IMAGE_REL_BASED_REL32;
+#else
+ // x86: PC-relative addressing is available only for control flow instructions (jmp and call)
+ return false;
+#endif
+}
+
+// Return true if an indirect code address can be encoded as IP-relative offset.
+// Note that this method should be used only when the caller knows that the
+// address is an icon value that VM has given and there is no GenTree node
+// representing it. Otherwise, one should always use FitsInAddrBase().
+//
+// Arguments
+// addr - an absolute indirect code address
+//
+// Returns
+// true if indir code addr could be encoded as IP-relative offset.
+//
+bool CodeGenInterface::genCodeIndirAddrCanBeEncodedAsPCRelOffset(size_t addr)
+{
+#ifdef _TARGET_AMD64_
+ return genAddrRelocTypeHint(addr) == IMAGE_REL_BASED_REL32;
+#else
+ // x86: PC-relative addressing is available only for control flow instructions (jmp and call)
+ return true;
+#endif
+}
+
+// Return true if an indirect code address can be encoded as 32-bit displacement
+// relative to zero. Note that this method should be used only when the caller
+// knows that the address is an icon value that VM has given and there is no
+// GenTree node representing it. Otherwise, one should always use FitsInAddrBase().
+//
+// Arguments
+// addr - absolute indirect code address
+//
+// Returns
+// true if absolute indir code addr could be encoded as 32-bit displacement relative to zero.
+//
+bool CodeGenInterface::genCodeIndirAddrCanBeEncodedAsZeroRelOffset(size_t addr)
+{
+ return GenTreeIntConCommon::FitsInI32((ssize_t)addr);
+}
+
+// Return true if an absolute indirect code address needs a relocation recorded with VM.
+//
+// Arguments
+// addr - an absolute indirect code address
+//
+// Returns
+// true if indir code addr needs a relocation recorded with VM
+//
+bool CodeGenInterface::genCodeIndirAddrNeedsReloc(size_t addr)
+{
+ // If generating relocatable ngen code, then all code addr should go through relocation
+ if (compiler->opts.compReloc)
+ {
+ return true;
+ }
+
+#ifdef _TARGET_AMD64_
+ // If code addr could be encoded as 32-bit offset relative to IP, we need to record a relocation.
+ if (genCodeIndirAddrCanBeEncodedAsPCRelOffset(addr))
+ {
+ return true;
+ }
+
+ // It could be possible that the code indir addr could be encoded as 32-bit displacement relative
+ // to zero. But we don't need to emit a relocation in that case.
+ return false;
+#else //_TARGET_X86_
+ // On x86 there is need for recording relocations during jitting,
+ // because all addrs fit within 32-bits.
+ return false;
+#endif //_TARGET_X86_
+}
+
+// Return true if a direct code address needs to be marked as relocatable.
+//
+// Arguments
+// addr - absolute direct code address
+//
+// Returns
+// true if direct code addr needs a relocation recorded with VM
+//
+bool CodeGenInterface::genCodeAddrNeedsReloc(size_t addr)
+{
+ // If generating relocatable ngen code, then all code addr should go through relocation
+ if (compiler->opts.compReloc)
+ {
+ return true;
+ }
+
+#ifdef _TARGET_AMD64_
+ // By default all direct code addresses go through relocation so that VM will setup
+ // a jump stub if addr cannot be encoded as pc-relative offset.
+ return true;
+#else //_TARGET_X86_
+ // On x86 there is no need for recording relocations during jitting,
+ // because all addrs fit within 32-bits.
+ return false;
+#endif //_TARGET_X86_
+}
+#endif //_TARGET_XARCH_
+
+/*****************************************************************************
+ *
+ * The following can be used to create basic blocks that serve as labels for
+ * the emitter. Use with caution - these are not real basic blocks!
+ *
+ */
+
+// inline
+BasicBlock* CodeGen::genCreateTempLabel()
+{
+#ifdef DEBUG
+ // These blocks don't affect FP
+ compiler->fgSafeBasicBlockCreation = true;
+#endif
+
+ BasicBlock* block = compiler->bbNewBasicBlock(BBJ_NONE);
+
+#ifdef DEBUG
+ compiler->fgSafeBasicBlockCreation = false;
+#endif
+
+ block->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL;
+
+ // Use coldness of current block, as this label will
+ // be contained in it.
+ block->bbFlags |= (compiler->compCurBB->bbFlags & BBF_COLD);
+
+#ifdef DEBUG
+ block->bbTgtStkDepth = genStackLevel / sizeof(int);
+#endif
+ return block;
+}
+
+// inline
+void CodeGen::genDefineTempLabel(BasicBlock* label)
+{
+#ifdef DEBUG
+ if (compiler->opts.dspCode)
+ {
+ printf("\n L_M%03u_BB%02u:\n", Compiler::s_compMethodsCount, label->bbNum);
+ }
+#endif
+
+ label->bbEmitCookie =
+ getEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur);
+
+ /* gcInfo.gcRegGCrefSetCur does not account for redundant load-suppression
+ of GC vars, and the emitter will not know about */
+
+ regTracker.rsTrackRegClrPtr();
+}
+
+/*****************************************************************************
+ *
+ * Adjust the stack pointer by the given value; assumes that this follows
+ * a call so only callee-saved registers (and registers that may hold a
+ * return value) are used at this point.
+ */
+
+void CodeGen::genAdjustSP(ssize_t delta)
+{
+#ifdef _TARGET_X86_
+ if (delta == sizeof(int))
+ inst_RV(INS_pop, REG_ECX, TYP_INT);
+ else
+#endif
+ inst_RV_IV(INS_add, REG_SPBASE, delta, EA_PTRSIZE);
+}
+
+#ifdef _TARGET_ARM_
+// return size
+// alignmentWB is out param
+unsigned CodeGenInterface::InferOpSizeAlign(GenTreePtr op, unsigned* alignmentWB)
+{
+ unsigned alignment = 0;
+ unsigned opSize = 0;
+
+ if (op->gtType == TYP_STRUCT || op->OperIsCopyBlkOp())
+ {
+ opSize = InferStructOpSizeAlign(op, &alignment);
+ }
+ else
+ {
+ alignment = genTypeAlignments[op->TypeGet()];
+ opSize = genTypeSizes[op->TypeGet()];
+ }
+
+ assert(opSize != 0);
+ assert(alignment != 0);
+
+ (*alignmentWB) = alignment;
+ return opSize;
+}
+// return size
+// alignmentWB is out param
+unsigned CodeGenInterface::InferStructOpSizeAlign(GenTreePtr op, unsigned* alignmentWB)
+{
+ unsigned alignment = 0;
+ unsigned opSize = 0;
+
+ while (op->gtOper == GT_COMMA)
+ {
+ op = op->gtOp.gtOp2;
+ }
+
+ if (op->gtOper == GT_OBJ)
+ {
+ CORINFO_CLASS_HANDLE clsHnd = op->AsObj()->gtClass;
+ opSize = compiler->info.compCompHnd->getClassSize(clsHnd);
+ alignment = roundUp(compiler->info.compCompHnd->getClassAlignmentRequirement(clsHnd), TARGET_POINTER_SIZE);
+ }
+ else if (op->gtOper == GT_LCL_VAR)
+ {
+ unsigned varNum = op->gtLclVarCommon.gtLclNum;
+ LclVarDsc* varDsc = compiler->lvaTable + varNum;
+ assert(varDsc->lvType == TYP_STRUCT);
+ opSize = varDsc->lvSize();
+ if (varDsc->lvStructDoubleAlign)
+ {
+ alignment = TARGET_POINTER_SIZE * 2;
+ }
+ else
+ {
+ alignment = TARGET_POINTER_SIZE;
+ }
+ }
+ else if (op->OperIsCopyBlkOp())
+ {
+ GenTreePtr op2 = op->gtOp.gtOp2;
+
+ if (op2->OperGet() == GT_CNS_INT)
+ {
+ if (op2->IsIconHandle(GTF_ICON_CLASS_HDL))
+ {
+ CORINFO_CLASS_HANDLE clsHnd = (CORINFO_CLASS_HANDLE)op2->gtIntCon.gtIconVal;
+ opSize = roundUp(compiler->info.compCompHnd->getClassSize(clsHnd), TARGET_POINTER_SIZE);
+ alignment =
+ roundUp(compiler->info.compCompHnd->getClassAlignmentRequirement(clsHnd), TARGET_POINTER_SIZE);
+ }
+ else
+ {
+ opSize = op2->gtIntCon.gtIconVal;
+ GenTreePtr op1 = op->gtOp.gtOp1;
+ assert(op1->OperGet() == GT_LIST);
+ GenTreePtr dstAddr = op1->gtOp.gtOp1;
+ if (dstAddr->OperGet() == GT_ADDR)
+ {
+ InferStructOpSizeAlign(dstAddr->gtOp.gtOp1, &alignment);
+ }
+ else
+ {
+ assert(!"Unhandle dstAddr node");
+ alignment = TARGET_POINTER_SIZE;
+ }
+ }
+ }
+ else
+ {
+ noway_assert(!"Variable sized COPYBLK register arg!");
+ opSize = 0;
+ alignment = TARGET_POINTER_SIZE;
+ }
+ }
+ else if (op->gtOper == GT_MKREFANY)
+ {
+ opSize = TARGET_POINTER_SIZE * 2;
+ alignment = TARGET_POINTER_SIZE;
+ }
+ else if (op->IsArgPlaceHolderNode())
+ {
+ CORINFO_CLASS_HANDLE clsHnd = op->gtArgPlace.gtArgPlaceClsHnd;
+ assert(clsHnd != 0);
+ opSize = roundUp(compiler->info.compCompHnd->getClassSize(clsHnd), TARGET_POINTER_SIZE);
+ alignment = roundUp(compiler->info.compCompHnd->getClassAlignmentRequirement(clsHnd), TARGET_POINTER_SIZE);
+ }
+ else
+ {
+ assert(!"Unhandled gtOper");
+ opSize = TARGET_POINTER_SIZE;
+ alignment = TARGET_POINTER_SIZE;
+ }
+
+ assert(opSize != 0);
+ assert(alignment != 0);
+
+ (*alignmentWB) = alignment;
+ return opSize;
+}
+
+#endif // _TARGET_ARM_
+
+/*****************************************************************************
+ *
+ * Take an address expression and try to find the best set of components to
+ * form an address mode; returns non-zero if this is successful.
+ *
+ * TODO-Cleanup: The RyuJIT backend never uses this to actually generate code.
+ * Refactor this code so that the underlying analysis can be used in
+ * the RyuJIT Backend to do lowering, instead of having to call this method with the
+ * option to not generate the code.
+ *
+ * 'fold' specifies if it is OK to fold the array index which hangs off
+ * a GT_NOP node.
+ *
+ * If successful, the parameters will be set to the following values:
+ *
+ * *rv1Ptr ... base operand
+ * *rv2Ptr ... optional operand
+ * *revPtr ... true if rv2 is before rv1 in the evaluation order
+ * #if SCALED_ADDR_MODES
+ * *mulPtr ... optional multiplier (2/4/8) for rv2
+ * Note that for [reg1 + reg2] and [reg1 + reg2 + icon], *mulPtr == 0.
+ * #endif
+ * *cnsPtr ... integer constant [optional]
+ *
+ * The 'mode' parameter may have one of the following values:
+ *
+ * #if LEA_AVAILABLE
+ * +1 ... we're trying to compute a value via 'LEA'
+ * #endif
+ *
+ * 0 ... we're trying to form an address mode
+ *
+ * -1 ... we're generating code for an address mode,
+ * and thus the address must already form an
+ * address mode (without any further work)
+ *
+ * IMPORTANT NOTE: This routine doesn't generate any code, it merely
+ * identifies the components that might be used to
+ * form an address mode later on.
+ */
+
+bool CodeGen::genCreateAddrMode(GenTreePtr addr,
+ int mode,
+ bool fold,
+ regMaskTP regMask,
+ bool* revPtr,
+ GenTreePtr* rv1Ptr,
+ GenTreePtr* rv2Ptr,
+#if SCALED_ADDR_MODES
+ unsigned* mulPtr,
+#endif
+ unsigned* cnsPtr,
+ bool nogen)
+{
+#ifndef LEGACY_BACKEND
+ assert(nogen == true);
+#endif // !LEGACY_BACKEND
+
+ /*
+ The following indirections are valid address modes on x86/x64:
+
+ [ icon] * not handled here
+ [reg ] * not handled here
+ [reg + icon]
+ [reg2 + reg1 ]
+ [reg2 + reg1 + icon]
+ [reg2 + 2 * reg1 ]
+ [reg2 + 4 * reg1 ]
+ [reg2 + 8 * reg1 ]
+ [ 2 * reg1 + icon]
+ [ 4 * reg1 + icon]
+ [ 8 * reg1 + icon]
+ [reg2 + 2 * reg1 + icon]
+ [reg2 + 4 * reg1 + icon]
+ [reg2 + 8 * reg1 + icon]
+
+ The following indirections are valid address modes on arm64:
+
+ [reg]
+ [reg + icon]
+ [reg2 + reg1]
+ [reg2 + reg1 * natural-scale]
+
+ */
+
+ /* All indirect address modes require the address to be an addition */
+
+ if (addr->gtOper != GT_ADD)
+ {
+ return false;
+ }
+
+ // Can't use indirect addressing mode as we need to check for overflow.
+ // Also, can't use 'lea' as it doesn't set the flags.
+
+ if (addr->gtOverflow())
+ {
+ return false;
+ }
+
+ GenTreePtr rv1 = nullptr;
+ GenTreePtr rv2 = nullptr;
+
+ GenTreePtr op1;
+ GenTreePtr op2;
+
+ ssize_t cns;
+#if SCALED_ADDR_MODES
+ unsigned mul;
+#endif
+
+ GenTreePtr tmp;
+
+ /* What order are the sub-operands to be evaluated */
+
+ if (addr->gtFlags & GTF_REVERSE_OPS)
+ {
+ op1 = addr->gtOp.gtOp2;
+ op2 = addr->gtOp.gtOp1;
+ }
+ else
+ {
+ op1 = addr->gtOp.gtOp1;
+ op2 = addr->gtOp.gtOp2;
+ }
+
+ bool rev = false; // Is op2 first in the evaluation order?
+
+ /*
+ A complex address mode can combine the following operands:
+
+ op1 ... base address
+ op2 ... optional scaled index
+#if SCALED_ADDR_MODES
+ mul ... optional multiplier (2/4/8) for op2
+#endif
+ cns ... optional displacement
+
+ Here we try to find such a set of operands and arrange for these
+ to sit in registers.
+ */
+
+ cns = 0;
+#if SCALED_ADDR_MODES
+ mul = 0;
+#endif
+
+AGAIN:
+ /* We come back to 'AGAIN' if we have an add of a constant, and we are folding that
+ constant, or we have gone through a GT_NOP or GT_COMMA node. We never come back
+ here if we find a scaled index.
+ */
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if SCALED_ADDR_MODES
+ assert(mul == 0);
+#endif
+
+#ifdef LEGACY_BACKEND
+ /* Check both operands as far as being register variables */
+
+ if (mode != -1)
+ {
+ if (op1->gtOper == GT_LCL_VAR)
+ genMarkLclVar(op1);
+ if (op2->gtOper == GT_LCL_VAR)
+ genMarkLclVar(op2);
+ }
+#endif // LEGACY_BACKEND
+
+ /* Special case: keep constants as 'op2' */
+
+ if (op1->IsCnsIntOrI())
+ {
+ // Presumably op2 is assumed to not be a constant (shouldn't happen if we've done constant folding)?
+ tmp = op1;
+ op1 = op2;
+ op2 = tmp;
+ }
+
+ /* Check for an addition of a constant */
+
+ if (op2->IsIntCnsFitsInI32() && (op2->gtType != TYP_REF) && FitsIn<INT32>(cns + op2->gtIntConCommon.IconValue()))
+ {
+ /* We're adding a constant */
+
+ cns += op2->gtIntConCommon.IconValue();
+
+#ifdef LEGACY_BACKEND
+ /* Can (and should) we use "add reg, icon" ? */
+
+ if ((op1->gtFlags & GTF_REG_VAL) && mode == 1 && !nogen)
+ {
+ regNumber reg1 = op1->gtRegNum;
+
+ if ((regMask == 0 || (regMask & genRegMask(reg1))) && genRegTrashable(reg1, addr))
+ {
+ // In case genMarkLclVar(op1) bashed it above and it is
+ // the last use of the variable.
+
+ genUpdateLife(op1);
+
+ /* 'reg1' is trashable, so add "icon" into it */
+
+ genIncRegBy(reg1, cns, addr, addr->TypeGet());
+
+ genUpdateLife(addr);
+ return true;
+ }
+ }
+#endif // LEGACY_BACKEND
+
+#ifdef _TARGET_ARM64_
+ if (cns == 0)
+#endif
+ {
+ /* Inspect the operand the constant is being added to */
+
+ switch (op1->gtOper)
+ {
+ case GT_ADD:
+
+ if (op1->gtOverflow())
+ {
+ break;
+ }
+
+ op2 = op1->gtOp.gtOp2;
+ op1 = op1->gtOp.gtOp1;
+
+ goto AGAIN;
+
+#if SCALED_ADDR_MODES && !defined(_TARGET_ARM64_)
+ // TODO-ARM64-CQ: For now we don't try to create a scaled index on ARM64.
+ case GT_MUL:
+ if (op1->gtOverflow())
+ {
+ return false; // Need overflow check
+ }
+
+ __fallthrough;
+
+ case GT_LSH:
+
+ mul = op1->GetScaledIndex();
+ if (mul)
+ {
+ /* We can use "[mul*rv2 + icon]" */
+
+ rv1 = nullptr;
+ rv2 = op1->gtOp.gtOp1;
+
+ goto FOUND_AM;
+ }
+ break;
+#endif
+
+ default:
+ break;
+ }
+ }
+
+ /* The best we can do is "[rv1 + icon]" */
+
+ rv1 = op1;
+ rv2 = nullptr;
+
+ goto FOUND_AM;
+ }
+
+ /* op2 is not a constant. So keep on trying.
+ Does op1 or op2 already sit in a register? */
+
+ if (op1->gtFlags & GTF_REG_VAL)
+ {
+ /* op1 is sitting in a register */
+ }
+ else if (op2->gtFlags & GTF_REG_VAL)
+ {
+ /* op2 is sitting in a register. Keep the enregistered value as op1 */
+
+ tmp = op1;
+ op1 = op2;
+ op2 = tmp;
+
+ noway_assert(rev == false);
+ rev = true;
+ }
+ else
+ {
+ /* Neither op1 nor op2 are sitting in a register right now */
+
+ switch (op1->gtOper)
+ {
+#ifndef _TARGET_ARM64_
+ // TODO-ARM64-CQ: For now we don't try to create a scaled index on ARM64.
+ case GT_ADD:
+
+ if (op1->gtOverflow())
+ {
+ break;
+ }
+
+ if (op1->gtOp.gtOp2->IsIntCnsFitsInI32() && FitsIn<INT32>(cns + op1->gtOp.gtOp2->gtIntCon.gtIconVal))
+ {
+ cns += op1->gtOp.gtOp2->gtIntCon.gtIconVal;
+ op1 = op1->gtOp.gtOp1;
+
+ goto AGAIN;
+ }
+
+ break;
+
+#if SCALED_ADDR_MODES
+
+ case GT_MUL:
+
+ if (op1->gtOverflow())
+ {
+ break;
+ }
+
+ __fallthrough;
+
+ case GT_LSH:
+
+ mul = op1->GetScaledIndex();
+ if (mul)
+ {
+ /* 'op1' is a scaled value */
+
+ rv1 = op2;
+ rv2 = op1->gtOp.gtOp1;
+
+ int argScale;
+ while ((rv2->gtOper == GT_MUL || rv2->gtOper == GT_LSH) && (argScale = rv2->GetScaledIndex()) != 0)
+ {
+ if (jitIsScaleIndexMul(argScale * mul))
+ {
+ mul = mul * argScale;
+ rv2 = rv2->gtOp.gtOp1;
+ }
+ else
+ {
+ break;
+ }
+ }
+
+ noway_assert(rev == false);
+ rev = true;
+
+ goto FOUND_AM;
+ }
+ break;
+
+#endif // SCALED_ADDR_MODES
+#endif // !_TARGET_ARM64_
+
+ case GT_NOP:
+
+ if (!nogen)
+ {
+ break;
+ }
+
+ op1 = op1->gtOp.gtOp1;
+ goto AGAIN;
+
+ case GT_COMMA:
+
+ if (!nogen)
+ {
+ break;
+ }
+
+ op1 = op1->gtOp.gtOp2;
+ goto AGAIN;
+
+ default:
+ break;
+ }
+
+ noway_assert(op2);
+ switch (op2->gtOper)
+ {
+#ifndef _TARGET_ARM64_
+ // TODO-ARM64-CQ: For now we don't try to create a scaled index on ARM64.
+ case GT_ADD:
+
+ if (op2->gtOverflow())
+ {
+ break;
+ }
+
+ if (op2->gtOp.gtOp2->IsIntCnsFitsInI32() && FitsIn<INT32>(cns + op2->gtOp.gtOp2->gtIntCon.gtIconVal))
+ {
+ cns += op2->gtOp.gtOp2->gtIntCon.gtIconVal;
+ op2 = op2->gtOp.gtOp1;
+
+ goto AGAIN;
+ }
+
+ break;
+
+#if SCALED_ADDR_MODES
+
+ case GT_MUL:
+
+ if (op2->gtOverflow())
+ {
+ break;
+ }
+
+ __fallthrough;
+
+ case GT_LSH:
+
+ mul = op2->GetScaledIndex();
+ if (mul)
+ {
+ // 'op2' is a scaled value...is it's argument also scaled?
+ int argScale;
+ rv2 = op2->gtOp.gtOp1;
+ while ((rv2->gtOper == GT_MUL || rv2->gtOper == GT_LSH) && (argScale = rv2->GetScaledIndex()) != 0)
+ {
+ if (jitIsScaleIndexMul(argScale * mul))
+ {
+ mul = mul * argScale;
+ rv2 = rv2->gtOp.gtOp1;
+ }
+ else
+ {
+ break;
+ }
+ }
+
+ rv1 = op1;
+
+ goto FOUND_AM;
+ }
+ break;
+
+#endif // SCALED_ADDR_MODES
+#endif // !_TARGET_ARM64_
+
+ case GT_NOP:
+
+ if (!nogen)
+ {
+ break;
+ }
+
+ op2 = op2->gtOp.gtOp1;
+ goto AGAIN;
+
+ case GT_COMMA:
+
+ if (!nogen)
+ {
+ break;
+ }
+
+ op2 = op2->gtOp.gtOp2;
+ goto AGAIN;
+
+ default:
+ break;
+ }
+
+ goto ADD_OP12;
+ }
+
+ /* op1 is in a register.
+ Is op2 an addition or a scaled value? */
+
+ noway_assert(op2);
+
+#ifndef _TARGET_ARM64_
+ // TODO-ARM64-CQ: For now we don't try to create a scaled index on ARM64.
+ switch (op2->gtOper)
+ {
+ case GT_ADD:
+
+ if (op2->gtOverflow())
+ {
+ break;
+ }
+
+ if (op2->gtOp.gtOp2->IsIntCnsFitsInI32() && FitsIn<INT32>(cns + op2->gtOp.gtOp2->gtIntCon.gtIconVal))
+ {
+ cns += op2->gtOp.gtOp2->gtIntCon.gtIconVal;
+ op2 = op2->gtOp.gtOp1;
+ goto AGAIN;
+ }
+
+ break;
+
+#if SCALED_ADDR_MODES
+
+ case GT_MUL:
+
+ if (op2->gtOverflow())
+ {
+ break;
+ }
+
+ __fallthrough;
+
+ case GT_LSH:
+
+ mul = op2->GetScaledIndex();
+ if (mul)
+ {
+ rv1 = op1;
+ rv2 = op2->gtOp.gtOp1;
+ int argScale;
+ while ((rv2->gtOper == GT_MUL || rv2->gtOper == GT_LSH) && (argScale = rv2->GetScaledIndex()) != 0)
+ {
+ if (jitIsScaleIndexMul(argScale * mul))
+ {
+ mul = mul * argScale;
+ rv2 = rv2->gtOp.gtOp1;
+ }
+ else
+ {
+ break;
+ }
+ }
+
+ goto FOUND_AM;
+ }
+ break;
+
+#endif // SCALED_ADDR_MODES
+
+ default:
+ break;
+ }
+#endif // !_TARGET_ARM64_
+
+ADD_OP12:
+
+ /* The best we can do "[rv1 + rv2]" or "[rv1 + rv2 + cns]" */
+
+ rv1 = op1;
+ rv2 = op2;
+#ifdef _TARGET_ARM64_
+ assert(cns == 0);
+#endif
+
+FOUND_AM:
+
+#ifdef LEGACY_BACKEND
+ /* Check for register variables */
+
+ if (mode != -1)
+ {
+ if (rv1 && rv1->gtOper == GT_LCL_VAR)
+ genMarkLclVar(rv1);
+ if (rv2 && rv2->gtOper == GT_LCL_VAR)
+ genMarkLclVar(rv2);
+ }
+#endif // LEGACY_BACKEND
+
+ if (rv2)
+ {
+ /* Make sure a GC address doesn't end up in 'rv2' */
+
+ if (varTypeIsGC(rv2->TypeGet()))
+ {
+ noway_assert(rv1 && !varTypeIsGC(rv1->TypeGet()));
+
+ tmp = rv1;
+ rv1 = rv2;
+ rv2 = tmp;
+
+ rev = !rev;
+ }
+
+ /* Special case: constant array index (that is range-checked) */
+
+ if (fold)
+ {
+ ssize_t tmpMul;
+ GenTreePtr index;
+
+ if ((rv2->gtOper == GT_MUL || rv2->gtOper == GT_LSH) && (rv2->gtOp.gtOp2->IsCnsIntOrI()))
+ {
+ /* For valuetype arrays where we can't use the scaled address
+ mode, rv2 will point to the scaled index. So we have to do
+ more work */
+
+ tmpMul = compiler->optGetArrayRefScaleAndIndex(rv2, &index DEBUGARG(false));
+ if (mul)
+ {
+ tmpMul *= mul;
+ }
+ }
+ else
+ {
+ /* May be a simple array. rv2 will points to the actual index */
+
+ index = rv2;
+ tmpMul = mul;
+ }
+
+ /* Get hold of the array index and see if it's a constant */
+ if (index->IsIntCnsFitsInI32())
+ {
+ /* Get hold of the index value */
+ ssize_t ixv = index->AsIntConCommon()->IconValue();
+
+#if SCALED_ADDR_MODES
+ /* Scale the index if necessary */
+ if (tmpMul)
+ {
+ ixv *= tmpMul;
+ }
+#endif
+
+ if (FitsIn<INT32>(cns + ixv))
+ {
+ /* Add the scaled index to the offset value */
+
+ cns += ixv;
+
+#if SCALED_ADDR_MODES
+ /* There is no scaled operand any more */
+ mul = 0;
+#endif
+ rv2 = nullptr;
+ }
+ }
+ }
+ }
+
+ // We shouldn't have [rv2*1 + cns] - this is equivalent to [rv1 + cns]
+ noway_assert(rv1 || mul != 1);
+
+ noway_assert(FitsIn<INT32>(cns));
+
+ /* Success - return the various components to the caller */
+
+ *revPtr = rev;
+ *rv1Ptr = rv1;
+ *rv2Ptr = rv2;
+#if SCALED_ADDR_MODES
+ *mulPtr = mul;
+#endif
+ *cnsPtr = (unsigned)cns;
+
+ return true;
+}
+
+/*****************************************************************************
+* The condition to use for (the jmp/set for) the given type of operation
+*
+* In case of amd64, this routine should be used when there is no gentree available
+* and one needs to generate jumps based on integer comparisons. When gentree is
+* available always use its overloaded version.
+*
+*/
+
+// static
+emitJumpKind CodeGen::genJumpKindForOper(genTreeOps cmp, CompareKind compareKind)
+{
+ const static BYTE genJCCinsSigned[] = {
+#if defined(_TARGET_XARCH_)
+ EJ_je, // GT_EQ
+ EJ_jne, // GT_NE
+ EJ_jl, // GT_LT
+ EJ_jle, // GT_LE
+ EJ_jge, // GT_GE
+ EJ_jg, // GT_GT
+#elif defined(_TARGET_ARMARCH_)
+ EJ_eq, // GT_EQ
+ EJ_ne, // GT_NE
+ EJ_lt, // GT_LT
+ EJ_le, // GT_LE
+ EJ_ge, // GT_GE
+ EJ_gt, // GT_GT
+#endif
+ };
+
+ const static BYTE genJCCinsUnsigned[] = /* unsigned comparison */
+ {
+#if defined(_TARGET_XARCH_)
+ EJ_je, // GT_EQ
+ EJ_jne, // GT_NE
+ EJ_jb, // GT_LT
+ EJ_jbe, // GT_LE
+ EJ_jae, // GT_GE
+ EJ_ja, // GT_GT
+#elif defined(_TARGET_ARMARCH_)
+ EJ_eq, // GT_EQ
+ EJ_ne, // GT_NE
+ EJ_lo, // GT_LT
+ EJ_ls, // GT_LE
+ EJ_hs, // GT_GE
+ EJ_hi, // GT_GT
+#endif
+ };
+
+ const static BYTE genJCCinsLogical[] = /* logical operation */
+ {
+#if defined(_TARGET_XARCH_)
+ EJ_je, // GT_EQ (Z == 1)
+ EJ_jne, // GT_NE (Z == 0)
+ EJ_js, // GT_LT (S == 1)
+ EJ_NONE, // GT_LE
+ EJ_jns, // GT_GE (S == 0)
+ EJ_NONE, // GT_GT
+#elif defined(_TARGET_ARMARCH_)
+ EJ_eq, // GT_EQ (Z == 1)
+ EJ_ne, // GT_NE (Z == 0)
+ EJ_mi, // GT_LT (N == 1)
+ EJ_NONE, // GT_LE
+ EJ_pl, // GT_GE (N == 0)
+ EJ_NONE, // GT_GT
+#endif
+ };
+
+#if defined(_TARGET_XARCH_)
+ assert(genJCCinsSigned[GT_EQ - GT_EQ] == EJ_je);
+ assert(genJCCinsSigned[GT_NE - GT_EQ] == EJ_jne);
+ assert(genJCCinsSigned[GT_LT - GT_EQ] == EJ_jl);
+ assert(genJCCinsSigned[GT_LE - GT_EQ] == EJ_jle);
+ assert(genJCCinsSigned[GT_GE - GT_EQ] == EJ_jge);
+ assert(genJCCinsSigned[GT_GT - GT_EQ] == EJ_jg);
+
+ assert(genJCCinsUnsigned[GT_EQ - GT_EQ] == EJ_je);
+ assert(genJCCinsUnsigned[GT_NE - GT_EQ] == EJ_jne);
+ assert(genJCCinsUnsigned[GT_LT - GT_EQ] == EJ_jb);
+ assert(genJCCinsUnsigned[GT_LE - GT_EQ] == EJ_jbe);
+ assert(genJCCinsUnsigned[GT_GE - GT_EQ] == EJ_jae);
+ assert(genJCCinsUnsigned[GT_GT - GT_EQ] == EJ_ja);
+
+ assert(genJCCinsLogical[GT_EQ - GT_EQ] == EJ_je);
+ assert(genJCCinsLogical[GT_NE - GT_EQ] == EJ_jne);
+ assert(genJCCinsLogical[GT_LT - GT_EQ] == EJ_js);
+ assert(genJCCinsLogical[GT_GE - GT_EQ] == EJ_jns);
+#elif defined(_TARGET_ARMARCH_)
+ assert(genJCCinsSigned[GT_EQ - GT_EQ] == EJ_eq);
+ assert(genJCCinsSigned[GT_NE - GT_EQ] == EJ_ne);
+ assert(genJCCinsSigned[GT_LT - GT_EQ] == EJ_lt);
+ assert(genJCCinsSigned[GT_LE - GT_EQ] == EJ_le);
+ assert(genJCCinsSigned[GT_GE - GT_EQ] == EJ_ge);
+ assert(genJCCinsSigned[GT_GT - GT_EQ] == EJ_gt);
+
+ assert(genJCCinsUnsigned[GT_EQ - GT_EQ] == EJ_eq);
+ assert(genJCCinsUnsigned[GT_NE - GT_EQ] == EJ_ne);
+ assert(genJCCinsUnsigned[GT_LT - GT_EQ] == EJ_lo);
+ assert(genJCCinsUnsigned[GT_LE - GT_EQ] == EJ_ls);
+ assert(genJCCinsUnsigned[GT_GE - GT_EQ] == EJ_hs);
+ assert(genJCCinsUnsigned[GT_GT - GT_EQ] == EJ_hi);
+
+ assert(genJCCinsLogical[GT_EQ - GT_EQ] == EJ_eq);
+ assert(genJCCinsLogical[GT_NE - GT_EQ] == EJ_ne);
+ assert(genJCCinsLogical[GT_LT - GT_EQ] == EJ_mi);
+ assert(genJCCinsLogical[GT_GE - GT_EQ] == EJ_pl);
+#else
+ assert(!"unknown arch");
+#endif
+ assert(GenTree::OperIsCompare(cmp));
+
+ emitJumpKind result = EJ_COUNT;
+
+ if (compareKind == CK_UNSIGNED)
+ {
+ result = (emitJumpKind)genJCCinsUnsigned[cmp - GT_EQ];
+ }
+ else if (compareKind == CK_SIGNED)
+ {
+ result = (emitJumpKind)genJCCinsSigned[cmp - GT_EQ];
+ }
+ else if (compareKind == CK_LOGICAL)
+ {
+ result = (emitJumpKind)genJCCinsLogical[cmp - GT_EQ];
+ }
+ assert(result != EJ_COUNT);
+ return result;
+}
+
+/*****************************************************************************
+ *
+ * Generate an exit sequence for a return from a method (note: when compiling
+ * for speed there might be multiple exit points).
+ */
+
+void CodeGen::genExitCode(BasicBlock* block)
+{
+#ifdef DEBUGGING_SUPPORT
+ /* Just wrote the first instruction of the epilog - inform debugger
+ Note that this may result in a duplicate IPmapping entry, and
+ that this is ok */
+
+ // For non-optimized debuggable code, there is only one epilog.
+ genIPmappingAdd((IL_OFFSETX)ICorDebugInfo::EPILOG, true);
+#endif // DEBUGGING_SUPPORT
+
+ bool jmpEpilog = ((block->bbFlags & BBF_HAS_JMP) != 0);
+ if (compiler->getNeedsGSSecurityCookie())
+ {
+ genEmitGSCookieCheck(jmpEpilog);
+
+ if (jmpEpilog)
+ {
+ // Dev10 642944 -
+ // The GS cookie check created a temp label that has no live
+ // incoming GC registers, we need to fix that
+
+ unsigned varNum;
+ LclVarDsc* varDsc;
+
+ /* Figure out which register parameters hold pointers */
+
+ for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount && varDsc->lvIsRegArg;
+ varNum++, varDsc++)
+ {
+ noway_assert(varDsc->lvIsParam);
+
+ gcInfo.gcMarkRegPtrVal(varDsc->lvArgReg, varDsc->TypeGet());
+ }
+
+ getEmitter()->emitThisGCrefRegs = getEmitter()->emitInitGCrefRegs = gcInfo.gcRegGCrefSetCur;
+ getEmitter()->emitThisByrefRegs = getEmitter()->emitInitByrefRegs = gcInfo.gcRegByrefSetCur;
+ }
+ }
+
+ genReserveEpilog(block);
+}
+
+/*****************************************************************************
+ *
+ * Generate code for an out-of-line exception.
+ * For debuggable code, we generate the 'throw' inline.
+ * For non-dbg code, we share the helper blocks created by fgAddCodeRef().
+ */
+
+void CodeGen::genJumpToThrowHlpBlk(emitJumpKind jumpKind, SpecialCodeKind codeKind, GenTreePtr failBlk)
+{
+ if (!compiler->opts.compDbgCode)
+ {
+ /* For non-debuggable code, find and use the helper block for
+ raising the exception. The block may be shared by other trees too. */
+
+ BasicBlock* tgtBlk;
+
+ if (failBlk)
+ {
+ /* We already know which block to jump to. Use that. */
+
+ noway_assert(failBlk->gtOper == GT_LABEL);
+ tgtBlk = failBlk->gtLabel.gtLabBB;
+ noway_assert(
+ tgtBlk ==
+ compiler->fgFindExcptnTarget(codeKind, compiler->bbThrowIndex(compiler->compCurBB))->acdDstBlk);
+ }
+ else
+ {
+ /* Find the helper-block which raises the exception. */
+
+ Compiler::AddCodeDsc* add =
+ compiler->fgFindExcptnTarget(codeKind, compiler->bbThrowIndex(compiler->compCurBB));
+ PREFIX_ASSUME_MSG((add != nullptr), ("ERROR: failed to find exception throw block"));
+ tgtBlk = add->acdDstBlk;
+ }
+
+ noway_assert(tgtBlk);
+
+ // Jump to the excption-throwing block on error.
+
+ inst_JMP(jumpKind, tgtBlk);
+ }
+ else
+ {
+ /* The code to throw the exception will be generated inline, and
+ we will jump around it in the normal non-exception case */
+
+ BasicBlock* tgtBlk = nullptr;
+ emitJumpKind reverseJumpKind = emitter::emitReverseJumpKind(jumpKind);
+ if (reverseJumpKind != jumpKind)
+ {
+ tgtBlk = genCreateTempLabel();
+ inst_JMP(reverseJumpKind, tgtBlk);
+ }
+
+ genEmitHelperCall(compiler->acdHelper(codeKind), 0, EA_UNKNOWN);
+
+ /* Define the spot for the normal non-exception case to jump to */
+ if (tgtBlk != nullptr)
+ {
+ assert(reverseJumpKind != jumpKind);
+ genDefineTempLabel(tgtBlk);
+ }
+ }
+}
+
+/*****************************************************************************
+ *
+ * The last operation done was generating code for "tree" and that would
+ * have set the flags. Check if the operation caused an overflow.
+ */
+
+// inline
+void CodeGen::genCheckOverflow(GenTreePtr tree)
+{
+ // Overflow-check should be asked for this tree
+ noway_assert(tree->gtOverflow());
+
+ const var_types type = tree->TypeGet();
+
+ // Overflow checks can only occur for the non-small types: (i.e. TYP_INT,TYP_LONG)
+ noway_assert(!varTypeIsSmall(type));
+
+ emitJumpKind jumpKind;
+
+#ifdef _TARGET_ARM64_
+ if (tree->OperGet() == GT_MUL)
+ {
+ jumpKind = EJ_ne;
+ }
+ else
+#endif
+ {
+ bool isUnsignedOverflow = ((tree->gtFlags & GTF_UNSIGNED) != 0);
+
+#if defined(_TARGET_XARCH_)
+
+ jumpKind = isUnsignedOverflow ? EJ_jb : EJ_jo;
+
+#elif defined(_TARGET_ARMARCH_)
+
+ jumpKind = isUnsignedOverflow ? EJ_lo : EJ_vs;
+
+ if (jumpKind == EJ_lo)
+ {
+ if ((tree->OperGet() != GT_SUB) && (tree->gtOper != GT_ASG_SUB))
+ {
+ jumpKind = EJ_hs;
+ }
+ }
+
+#endif // defined(_TARGET_ARMARCH_)
+ }
+
+ // Jump to the block which will throw the expection
+
+ genJumpToThrowHlpBlk(jumpKind, SCK_OVERFLOW);
+}
+
+#if FEATURE_EH_FUNCLETS
+
+/*****************************************************************************
+ *
+ * Update the current funclet as needed by calling genUpdateCurrentFunclet().
+ * For non-BBF_FUNCLET_BEG blocks, it asserts that the current funclet
+ * is up-to-date.
+ *
+ */
+
+void CodeGen::genUpdateCurrentFunclet(BasicBlock* block)
+{
+ if (block->bbFlags & BBF_FUNCLET_BEG)
+ {
+ compiler->funSetCurrentFunc(compiler->funGetFuncIdx(block));
+ if (compiler->funCurrentFunc()->funKind == FUNC_FILTER)
+ {
+ assert(compiler->ehGetDsc(compiler->funCurrentFunc()->funEHIndex)->ebdFilter == block);
+ }
+ else
+ {
+ // We shouldn't see FUNC_ROOT
+ assert(compiler->funCurrentFunc()->funKind == FUNC_HANDLER);
+ assert(compiler->ehGetDsc(compiler->funCurrentFunc()->funEHIndex)->ebdHndBeg == block);
+ }
+ }
+ else
+ {
+ assert(compiler->compCurrFuncIdx <= compiler->compFuncInfoCount);
+ if (compiler->funCurrentFunc()->funKind == FUNC_FILTER)
+ {
+ assert(compiler->ehGetDsc(compiler->funCurrentFunc()->funEHIndex)->InFilterRegionBBRange(block));
+ }
+ else if (compiler->funCurrentFunc()->funKind == FUNC_ROOT)
+ {
+ assert(!block->hasHndIndex());
+ }
+ else
+ {
+ assert(compiler->funCurrentFunc()->funKind == FUNC_HANDLER);
+ assert(compiler->ehGetDsc(compiler->funCurrentFunc()->funEHIndex)->InHndRegionBBRange(block));
+ }
+ }
+}
+#endif // FEATURE_EH_FUNCLETS
+
+/*****************************************************************************
+ *
+ * Generate code for the function.
+ */
+
+void CodeGen::genGenerateCode(void** codePtr, ULONG* nativeSizeOfCode)
+{
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("*************** In genGenerateCode()\n");
+ compiler->fgDispBasicBlocks(compiler->verboseTrees);
+ }
+#endif
+
+ unsigned codeSize;
+ unsigned prologSize;
+ unsigned epilogSize;
+
+ void* consPtr;
+
+#ifdef DEBUG
+ genInterruptibleUsed = true;
+
+#if STACK_PROBES
+ genNeedPrologStackProbe = false;
+#endif
+
+ compiler->fgDebugCheckBBlist();
+#endif // DEBUG
+
+ /* This is the real thing */
+
+ genPrepForCompiler();
+
+ /* Prepare the emitter */
+ getEmitter()->Init();
+#ifdef DEBUG
+ VarSetOps::AssignNoCopy(compiler, genTempOldLife, VarSetOps::MakeEmpty(compiler));
+#endif
+
+#ifdef DEBUG
+ if (compiler->opts.disAsmSpilled && regSet.rsNeededSpillReg)
+ {
+ compiler->opts.disAsm = true;
+ }
+
+ if (compiler->opts.disAsm)
+ {
+ printf("; Assembly listing for method %s\n", compiler->info.compFullName);
+
+ printf("; Emitting ");
+
+ if (compiler->compCodeOpt() == Compiler::SMALL_CODE)
+ {
+ printf("SMALL_CODE");
+ }
+ else if (compiler->compCodeOpt() == Compiler::FAST_CODE)
+ {
+ printf("FAST_CODE");
+ }
+ else
+ {
+ printf("BLENDED_CODE");
+ }
+
+ printf(" for ");
+
+ if (compiler->info.genCPU == CPU_X86)
+ {
+ printf("generic X86 CPU");
+ }
+ else if (compiler->info.genCPU == CPU_X86_PENTIUM_4)
+ {
+ printf("Pentium 4");
+ }
+ else if (compiler->info.genCPU == CPU_X64)
+ {
+ if (compiler->canUseAVX())
+ {
+ printf("X64 CPU with AVX");
+ }
+ else
+ {
+ printf("X64 CPU with SSE2");
+ }
+ }
+
+ else if (compiler->info.genCPU == CPU_ARM)
+ {
+ printf("generic ARM CPU");
+ }
+
+ printf("\n");
+
+ if ((compiler->opts.compFlags & CLFLG_MAXOPT) == CLFLG_MAXOPT)
+ {
+ printf("; optimized code\n");
+ }
+ else if (compiler->opts.compDbgCode)
+ {
+ printf("; debuggable code\n");
+ }
+ else if (compiler->opts.MinOpts())
+ {
+ printf("; compiler->opts.MinOpts() is true\n");
+ }
+ else
+ {
+ printf("; unknown optimization flags\n");
+ }
+
+#if DOUBLE_ALIGN
+ if (compiler->genDoubleAlign())
+ printf("; double-aligned frame\n");
+ else
+#endif
+ printf("; %s based frame\n", isFramePointerUsed() ? STR_FPBASE : STR_SPBASE);
+
+ if (genInterruptible)
+ {
+ printf("; fully interruptible\n");
+ }
+ else
+ {
+ printf("; partially interruptible\n");
+ }
+
+ if (compiler->fgHaveProfileData())
+ {
+ printf("; with IBC profile data\n");
+ }
+
+ if (compiler->fgProfileData_ILSizeMismatch)
+ {
+ printf("; discarded IBC profile data due to mismatch in ILSize\n");
+ }
+ }
+#endif // DEBUG
+
+#ifndef LEGACY_BACKEND
+
+ // For RyuJIT backend, we compute the final frame layout before code generation. This is because LSRA
+ // has already computed exactly the maximum concurrent number of spill temps of each type that are
+ // required during code generation. So, there is nothing left to estimate: we can be precise in the frame
+ // layout. This helps us generate smaller code, and allocate, after code generation, a smaller amount of
+ // memory from the VM.
+
+ genFinalizeFrame();
+
+ unsigned maxTmpSize = compiler->tmpSize; // This is precise after LSRA has pre-allocated the temps.
+
+#else // LEGACY_BACKEND
+
+ // Estimate the frame size: first, estimate the number of spill temps needed by taking the register
+ // predictor spill temp estimates and stress levels into consideration. Then, compute the tentative
+ // frame layout using conservative callee-save register estimation (namely, guess they'll all be used
+ // and thus saved on the frame).
+
+ // Compute the maximum estimated spill temp size.
+ unsigned maxTmpSize = sizeof(double) + sizeof(float) + sizeof(__int64) + sizeof(void*);
+
+ maxTmpSize += (compiler->tmpDoubleSpillMax * sizeof(double)) + (compiler->tmpIntSpillMax * sizeof(int));
+
+#ifdef DEBUG
+
+ /* When StressRegs is >=1, there will be a bunch of spills not predicted by
+ the predictor (see logic in rsPickReg). It will be very hard to teach
+ the predictor about the behavior of rsPickReg for StressRegs >= 1, so
+ instead let's make maxTmpSize large enough so that we won't be wrong.
+ This means that at StressRegs >= 1, we will not be testing the logic
+ that sets the maxTmpSize size.
+ */
+
+ if (regSet.rsStressRegs() >= 1)
+ {
+ maxTmpSize += (REG_TMP_ORDER_COUNT * REGSIZE_BYTES);
+ }
+
+ // JIT uses 2 passes when assigning stack variable (i.e. args, temps, and locals) locations in varDsc->lvStkOffs.
+ // During the 1st pass (in genGenerateCode), it estimates the maximum possible size for stack temps
+ // and put it in maxTmpSize. Then it calculates the varDsc->lvStkOffs for each variable based on this estimation.
+ // However during stress mode, we might spill more temps on the stack, which might grow the
+ // size of the temp area.
+ // This might cause varDsc->lvStkOffs to change during the 2nd pass (in emitEndCodeGen).
+ // If the change of varDsc->lvStkOffs crosses the threshold for the instruction size,
+ // we will then have a mismatched estimated code size (during the 1st pass) and the actual emitted code size
+ // (during the 2nd pass).
+ // Also, if STRESS_UNSAFE_BUFFER_CHECKS is turned on, we might reorder the stack variable locations,
+ // which could cause the mismatch too.
+ //
+ // The following code is simply bump the maxTmpSize up to at least BYTE_MAX+1 during the stress mode, so that
+ // we don't run into code size problem during stress.
+
+ if (getJitStressLevel() != 0)
+ {
+ if (maxTmpSize < BYTE_MAX + 1)
+ {
+ maxTmpSize = BYTE_MAX + 1;
+ }
+ }
+#endif // DEBUG
+
+ /* Estimate the offsets of locals/arguments and size of frame */
+
+ unsigned lclSize = compiler->lvaFrameSize(Compiler::TENTATIVE_FRAME_LAYOUT);
+
+#ifdef DEBUG
+ //
+ // Display the local frame offsets that we have tentatively decided upon
+ //
+ if (verbose)
+ {
+ compiler->lvaTableDump();
+ }
+#endif // DEBUG
+
+#endif // LEGACY_BACKEND
+
+ getEmitter()->emitBegFN(isFramePointerUsed()
+#if defined(DEBUG)
+ ,
+ (compiler->compCodeOpt() != Compiler::SMALL_CODE) &&
+ !(compiler->opts.eeFlags & CORJIT_FLG_PREJIT)
+#endif
+#ifdef LEGACY_BACKEND
+ ,
+ lclSize
+#endif // LEGACY_BACKEND
+ ,
+ maxTmpSize);
+
+ /* Now generate code for the function */
+ genCodeForBBlist();
+
+#ifndef LEGACY_BACKEND
+#ifdef DEBUG
+ // After code generation, dump the frame layout again. It should be the same as before code generation, if code
+ // generation hasn't touched it (it shouldn't!).
+ if (verbose)
+ {
+ compiler->lvaTableDump();
+ }
+#endif // DEBUG
+#endif // !LEGACY_BACKEND
+
+ /* We can now generate the function prolog and epilog */
+
+ genGeneratePrologsAndEpilogs();
+
+ /* Bind jump distances */
+
+ getEmitter()->emitJumpDistBind();
+
+ /* The code is now complete and final; it should not change after this. */
+
+ /* Compute the size of the code sections that we are going to ask the VM
+ to allocate. Note that this might not be precisely the size of the
+ code we emit, though it's fatal if we emit more code than the size we
+ compute here.
+ (Note: an example of a case where we emit less code would be useful.)
+ */
+
+ getEmitter()->emitComputeCodeSizes();
+
+#ifdef DEBUG
+
+ // Code to test or stress our ability to run a fallback compile.
+ // We trigger the fallback here, before asking the VM for any memory,
+ // because if not, we will leak mem, as the current codebase can't free
+ // the mem after the emitter asks the VM for it. As this is only a stress
+ // mode, we only want the functionality, and don't care about the relative
+ // ugliness of having the failure here.
+ if (!compiler->jitFallbackCompile)
+ {
+ // Use COMPlus_JitNoForceFallback=1 to prevent NOWAY assert testing from happening,
+ // especially that caused by enabling JIT stress.
+ if (!JitConfig.JitNoForceFallback())
+ {
+ if (JitConfig.JitForceFallback() || compiler->compStressCompile(Compiler::STRESS_GENERIC_VARN, 5))
+ {
+ NO_WAY_NOASSERT("Stress failure");
+ }
+ }
+ }
+
+#endif // DEBUG
+
+ /* We've finished collecting all the unwind information for the function. Now reserve
+ space for it from the VM.
+ */
+
+ compiler->unwindReserve();
+
+#if DISPLAY_SIZES
+
+ size_t dataSize = getEmitter()->emitDataSize();
+
+#endif // DISPLAY_SIZES
+
+ void* coldCodePtr;
+
+ bool trackedStackPtrsContig; // are tracked stk-ptrs contiguous ?
+
+#ifdef _TARGET_AMD64_
+ trackedStackPtrsContig = false;
+#elif defined(_TARGET_ARM_)
+ // On arm due to prespilling of arguments, tracked stk-ptrs may not be contiguous
+ trackedStackPtrsContig = !compiler->opts.compDbgEnC && !compiler->compIsProfilerHookNeeded();
+#elif defined(_TARGET_ARM64_)
+ // Incoming vararg registers are homed on the top of the stack. Tracked var may not be contiguous.
+ trackedStackPtrsContig = !compiler->opts.compDbgEnC && !compiler->info.compIsVarArgs;
+#else
+ trackedStackPtrsContig = !compiler->opts.compDbgEnC;
+#endif
+
+#ifdef DEBUG
+ /* We're done generating code for this function */
+ compiler->compCodeGenDone = true;
+#endif
+
+ compiler->EndPhase(PHASE_GENERATE_CODE);
+
+ codeSize = getEmitter()->emitEndCodeGen(compiler, trackedStackPtrsContig, genInterruptible, genFullPtrRegMap,
+ (compiler->info.compRetType == TYP_REF), compiler->compHndBBtabCount,
+ &prologSize, &epilogSize, codePtr, &coldCodePtr, &consPtr);
+
+ compiler->EndPhase(PHASE_EMIT_CODE);
+
+#ifdef DEBUG
+ if (compiler->opts.disAsm)
+ {
+ printf("; Total bytes of code %d, prolog size %d for method %s\n", codeSize, prologSize,
+ compiler->info.compFullName);
+ printf("; ============================================================\n");
+ printf(""); // in our logic this causes a flush
+ }
+
+ if (verbose)
+ {
+ printf("*************** After end code gen, before unwindEmit()\n");
+ getEmitter()->emitDispIGlist(true);
+ }
+#endif
+
+#if EMIT_TRACK_STACK_DEPTH
+ /* Check our max stack level. Needed for fgAddCodeRef().
+ We need to relax the assert as our estimation won't include code-gen
+ stack changes (which we know don't affect fgAddCodeRef()) */
+ noway_assert(getEmitter()->emitMaxStackDepth <=
+ (compiler->fgPtrArgCntMax + compiler->compHndBBtabCount + // Return address for locally-called finallys
+ genTypeStSz(TYP_LONG) + // longs/doubles may be transferred via stack, etc
+ (compiler->compTailCallUsed ? 4 : 0))); // CORINFO_HELP_TAILCALL args
+#endif
+
+ *nativeSizeOfCode = codeSize;
+ compiler->info.compNativeCodeSize = (UNATIVE_OFFSET)codeSize;
+
+ // printf("%6u bytes of code generated for %s.%s\n", codeSize, compiler->info.compFullName);
+
+ // Make sure that the x86 alignment and cache prefetch optimization rules
+ // were obeyed.
+
+ // Don't start a method in the last 7 bytes of a 16-byte alignment area
+ // unless we are generating SMALL_CODE
+ // noway_assert( (((unsigned)(*codePtr) % 16) <= 8) || (compiler->compCodeOpt() == SMALL_CODE));
+
+ /* Now that the code is issued, we can finalize and emit the unwind data */
+
+ compiler->unwindEmit(*codePtr, coldCodePtr);
+
+#ifdef DEBUGGING_SUPPORT
+
+ /* Finalize the line # tracking logic after we know the exact block sizes/offsets */
+
+ genIPmappingGen();
+
+ /* Finalize the Local Var info in terms of generated code */
+
+ genSetScopeInfo();
+
+#endif // DEBUGGING_SUPPORT
+
+#ifdef LATE_DISASM
+ unsigned finalHotCodeSize;
+ unsigned finalColdCodeSize;
+ if (compiler->fgFirstColdBlock != nullptr)
+ {
+ // We did some hot/cold splitting. The hot section is always padded out to the
+ // size we thought it would be, but the cold section is not.
+ assert(codeSize <= compiler->info.compTotalHotCodeSize + compiler->info.compTotalColdCodeSize);
+ assert(compiler->info.compTotalHotCodeSize > 0);
+ assert(compiler->info.compTotalColdCodeSize > 0);
+ finalHotCodeSize = compiler->info.compTotalHotCodeSize;
+ finalColdCodeSize = codeSize - finalHotCodeSize;
+ }
+ else
+ {
+ // No hot/cold splitting
+ assert(codeSize <= compiler->info.compTotalHotCodeSize);
+ assert(compiler->info.compTotalHotCodeSize > 0);
+ assert(compiler->info.compTotalColdCodeSize == 0);
+ finalHotCodeSize = codeSize;
+ finalColdCodeSize = 0;
+ }
+ getDisAssembler().disAsmCode((BYTE*)*codePtr, finalHotCodeSize, (BYTE*)coldCodePtr, finalColdCodeSize);
+#endif // LATE_DISASM
+
+ /* Report any exception handlers to the VM */
+
+ genReportEH();
+
+#ifdef JIT32_GCENCODER
+#ifdef DEBUG
+ void* infoPtr =
+#endif // DEBUG
+#endif
+ // Create and store the GC info for this method.
+ genCreateAndStoreGCInfo(codeSize, prologSize, epilogSize DEBUGARG(codePtr));
+
+#ifdef DEBUG
+ FILE* dmpf = jitstdout;
+
+ compiler->opts.dmpHex = false;
+ if (!strcmp(compiler->info.compMethodName, "<name of method you want the hex dump for"))
+ {
+ FILE* codf;
+ errno_t ec = fopen_s(&codf, "C:\\JIT.COD", "at"); // NOTE: file append mode
+ if (ec != 0)
+ {
+ assert(codf);
+ dmpf = codf;
+ compiler->opts.dmpHex = true;
+ }
+ }
+ if (compiler->opts.dmpHex)
+ {
+ size_t consSize = getEmitter()->emitDataSize();
+ size_t infoSize = compiler->compInfoBlkSize;
+
+ fprintf(dmpf, "Generated code for %s:\n", compiler->info.compFullName);
+ fprintf(dmpf, "\n");
+
+ if (codeSize)
+ {
+ fprintf(dmpf, " Code at %p [%04X bytes]\n", dspPtr(*codePtr), codeSize);
+ }
+ if (consSize)
+ {
+ fprintf(dmpf, " Const at %p [%04X bytes]\n", dspPtr(consPtr), consSize);
+ }
+#ifdef JIT32_GCENCODER
+ if (infoSize)
+ fprintf(dmpf, " Info at %p [%04X bytes]\n", dspPtr(infoPtr), infoSize);
+#endif // JIT32_GCENCODER
+
+ fprintf(dmpf, "\n");
+
+ if (codeSize)
+ {
+ hexDump(dmpf, "Code", (BYTE*)*codePtr, codeSize);
+ }
+ if (consSize)
+ {
+ hexDump(dmpf, "Const", (BYTE*)consPtr, consSize);
+ }
+#ifdef JIT32_GCENCODER
+ if (infoSize)
+ hexDump(dmpf, "Info", (BYTE*)infoPtr, infoSize);
+#endif // JIT32_GCENCODER
+
+ fflush(dmpf);
+ }
+
+ if (dmpf != jitstdout)
+ {
+ fclose(dmpf);
+ }
+
+#endif // DEBUG
+
+ /* Tell the emitter that we're done with this function */
+
+ getEmitter()->emitEndFN();
+
+ /* Shut down the spill logic */
+
+ regSet.rsSpillDone();
+
+ /* Shut down the temp logic */
+
+ compiler->tmpDone();
+
+#if DISPLAY_SIZES
+
+ grossVMsize += compiler->info.compILCodeSize;
+ totalNCsize += codeSize + dataSize + compiler->compInfoBlkSize;
+ grossNCsize += codeSize + dataSize;
+
+#endif // DISPLAY_SIZES
+
+ compiler->EndPhase(PHASE_EMIT_GCEH);
+}
+
+/*****************************************************************************
+ *
+ * Report EH clauses to the VM
+ */
+
+void CodeGen::genReportEH()
+{
+ if (compiler->compHndBBtabCount == 0)
+ {
+ return;
+ }
+
+#ifdef DEBUG
+ if (compiler->opts.dspEHTable)
+ {
+ printf("*************** EH table for %s\n", compiler->info.compFullName);
+ }
+#endif // DEBUG
+
+ unsigned XTnum;
+ EHblkDsc* HBtab;
+ EHblkDsc* HBtabEnd;
+
+ unsigned EHCount = compiler->compHndBBtabCount;
+
+#if FEATURE_EH_FUNCLETS
+ // Count duplicated clauses. This uses the same logic as below, where we actually generate them for reporting to the
+ // VM.
+ unsigned duplicateClauseCount = 0;
+ unsigned enclosingTryIndex;
+ for (XTnum = 0; XTnum < compiler->compHndBBtabCount; XTnum++)
+ {
+ for (enclosingTryIndex = compiler->ehTrueEnclosingTryIndexIL(XTnum); // find the true enclosing try index,
+ // ignoring 'mutual protect' trys
+ enclosingTryIndex != EHblkDsc::NO_ENCLOSING_INDEX;
+ enclosingTryIndex = compiler->ehGetEnclosingTryIndex(enclosingTryIndex))
+ {
+ ++duplicateClauseCount;
+ }
+ }
+ EHCount += duplicateClauseCount;
+
+#if FEATURE_EH_CALLFINALLY_THUNKS
+ unsigned clonedFinallyCount = 0;
+
+ // We don't keep track of how many cloned finally there are. So, go through and count.
+ // We do a quick pass first through the EH table to see if there are any try/finally
+ // clauses. If there aren't, we don't need to look for BBJ_CALLFINALLY.
+
+ bool anyFinallys = false;
+ for (HBtab = compiler->compHndBBtab, HBtabEnd = compiler->compHndBBtab + compiler->compHndBBtabCount;
+ HBtab < HBtabEnd; HBtab++)
+ {
+ if (HBtab->HasFinallyHandler())
+ {
+ anyFinallys = true;
+ break;
+ }
+ }
+ if (anyFinallys)
+ {
+ for (BasicBlock* block = compiler->fgFirstBB; block != nullptr; block = block->bbNext)
+ {
+ if (block->bbJumpKind == BBJ_CALLFINALLY)
+ {
+ ++clonedFinallyCount;
+ }
+ }
+
+ EHCount += clonedFinallyCount;
+ }
+#endif // FEATURE_EH_CALLFINALLY_THUNKS
+
+#endif // FEATURE_EH_FUNCLETS
+
+#ifdef DEBUG
+ if (compiler->opts.dspEHTable)
+ {
+#if FEATURE_EH_FUNCLETS
+#if FEATURE_EH_CALLFINALLY_THUNKS
+ printf("%d EH table entries, %d duplicate clauses, %d cloned finallys, %d total EH entries reported to VM\n",
+ compiler->compHndBBtabCount, duplicateClauseCount, clonedFinallyCount, EHCount);
+ assert(compiler->compHndBBtabCount + duplicateClauseCount + clonedFinallyCount == EHCount);
+#else // !FEATURE_EH_CALLFINALLY_THUNKS
+ printf("%d EH table entries, %d duplicate clauses, %d total EH entries reported to VM\n",
+ compiler->compHndBBtabCount, duplicateClauseCount, EHCount);
+ assert(compiler->compHndBBtabCount + duplicateClauseCount == EHCount);
+#endif // !FEATURE_EH_CALLFINALLY_THUNKS
+#else // !FEATURE_EH_FUNCLETS
+ printf("%d EH table entries, %d total EH entries reported to VM\n", compiler->compHndBBtabCount, EHCount);
+ assert(compiler->compHndBBtabCount == EHCount);
+#endif // !FEATURE_EH_FUNCLETS
+ }
+#endif // DEBUG
+
+ // Tell the VM how many EH clauses to expect.
+ compiler->eeSetEHcount(EHCount);
+
+ XTnum = 0; // This is the index we pass to the VM
+
+ for (HBtab = compiler->compHndBBtab, HBtabEnd = compiler->compHndBBtab + compiler->compHndBBtabCount;
+ HBtab < HBtabEnd; HBtab++)
+ {
+ UNATIVE_OFFSET tryBeg, tryEnd, hndBeg, hndEnd, hndTyp;
+
+ tryBeg = compiler->ehCodeOffset(HBtab->ebdTryBeg);
+ hndBeg = compiler->ehCodeOffset(HBtab->ebdHndBeg);
+
+ tryEnd = (HBtab->ebdTryLast == compiler->fgLastBB) ? compiler->info.compNativeCodeSize
+ : compiler->ehCodeOffset(HBtab->ebdTryLast->bbNext);
+ hndEnd = (HBtab->ebdHndLast == compiler->fgLastBB) ? compiler->info.compNativeCodeSize
+ : compiler->ehCodeOffset(HBtab->ebdHndLast->bbNext);
+
+ if (HBtab->HasFilter())
+ {
+ hndTyp = compiler->ehCodeOffset(HBtab->ebdFilter);
+ }
+ else
+ {
+ hndTyp = HBtab->ebdTyp;
+ }
+
+ CORINFO_EH_CLAUSE_FLAGS flags = ToCORINFO_EH_CLAUSE_FLAGS(HBtab->ebdHandlerType);
+
+ // Note that we reuse the CORINFO_EH_CLAUSE type, even though the names of
+ // the fields aren't accurate.
+
+ CORINFO_EH_CLAUSE clause;
+ clause.ClassToken = hndTyp; /* filter offset is passed back here for filter-based exception handlers */
+ clause.Flags = flags;
+ clause.TryOffset = tryBeg;
+ clause.TryLength = tryEnd;
+ clause.HandlerOffset = hndBeg;
+ clause.HandlerLength = hndEnd;
+
+ assert(XTnum < EHCount);
+
+ // Tell the VM about this EH clause.
+ compiler->eeSetEHinfo(XTnum, &clause);
+
+ ++XTnum;
+ }
+
+#if FEATURE_EH_FUNCLETS
+ // Now output duplicated clauses.
+ //
+ // If a funclet has been created by moving a handler out of a try region that it was originally nested
+ // within, then we need to report a "duplicate" clause representing the fact that an exception in that
+ // handler can be caught by the 'try' it has been moved out of. This is because the original 'try' region
+ // descriptor can only specify a single, contiguous protected range, but the funclet we've moved out is
+ // no longer contiguous with the original 'try' region. The new EH descriptor will have the same handler
+ // region as the enclosing try region's handler region. This is the sense in which it is duplicated:
+ // there is now a "duplicate" clause with the same handler region as another, but a different 'try'
+ // region.
+ //
+ // For example, consider this (capital letters represent an unknown code sequence, numbers identify a
+ // try or handler region):
+ //
+ // A
+ // try (1) {
+ // B
+ // try (2) {
+ // C
+ // } catch (3) {
+ // D
+ // } catch (4) {
+ // E
+ // }
+ // F
+ // } catch (5) {
+ // G
+ // }
+ // H
+ //
+ // Here, we have try region (1) BCDEF protected by catch (5) G, and region (2) C protected
+ // by catch (3) D and catch (4) E. Note that catch (4) E does *NOT* protect the code "D".
+ // This is an example of 'mutually protect' regions. First, we move handlers (3) and (4)
+ // to the end of the code. However, (3) and (4) are nested inside, and protected by, try (1). Again
+ // note that (3) is not nested inside (4), despite ebdEnclosingTryIndex indicating that.
+ // The code "D" and "E" won't be contiguous with the protected region for try (1) (which
+ // will, after moving catch (3) AND (4), be BCF). Thus, we need to add a new EH descriptor
+ // representing try (1) protecting the new funclets catch (3) and (4).
+ // The code will be generated as follows:
+ //
+ // ABCFH // "main" code
+ // D // funclet
+ // E // funclet
+ // G // funclet
+ //
+ // The EH regions are:
+ //
+ // C -> D
+ // C -> E
+ // BCF -> G
+ // D -> G // "duplicate" clause
+ // E -> G // "duplicate" clause
+ //
+ // Note that we actually need to generate one of these additional "duplicate" clauses for every
+ // region the funclet is nested in. Take this example:
+ //
+ // A
+ // try (1) {
+ // B
+ // try (2,3) {
+ // C
+ // try (4) {
+ // D
+ // try (5,6) {
+ // E
+ // } catch {
+ // F
+ // } catch {
+ // G
+ // }
+ // H
+ // } catch {
+ // I
+ // }
+ // J
+ // } catch {
+ // K
+ // } catch {
+ // L
+ // }
+ // M
+ // } catch {
+ // N
+ // }
+ // O
+ //
+ // When we pull out funclets, we get the following generated code:
+ //
+ // ABCDEHJMO // "main" function
+ // F // funclet
+ // G // funclet
+ // I // funclet
+ // K // funclet
+ // L // funclet
+ // N // funclet
+ //
+ // And the EH regions we report to the VM are (in order; main clauses
+ // first in most-to-least nested order, funclets ("duplicated clauses")
+ // last, in most-to-least nested) are:
+ //
+ // E -> F
+ // E -> G
+ // DEH -> I
+ // CDEHJ -> K
+ // CDEHJ -> L
+ // BCDEHJM -> N
+ // F -> I // funclet clause #1 for F
+ // F -> K // funclet clause #2 for F
+ // F -> L // funclet clause #3 for F
+ // F -> N // funclet clause #4 for F
+ // G -> I // funclet clause #1 for G
+ // G -> K // funclet clause #2 for G
+ // G -> L // funclet clause #3 for G
+ // G -> N // funclet clause #4 for G
+ // I -> K // funclet clause #1 for I
+ // I -> L // funclet clause #2 for I
+ // I -> N // funclet clause #3 for I
+ // K -> N // funclet clause #1 for K
+ // L -> N // funclet clause #1 for L
+ //
+ // So whereas the IL had 6 EH clauses, we need to report 19 EH clauses to the VM.
+ // Note that due to the nature of 'mutually protect' clauses, it would be incorrect
+ // to add a clause "F -> G" because F is NOT protected by G, but we still have
+ // both "F -> K" and "F -> L" because F IS protected by both of those handlers.
+ //
+ // The overall ordering of the clauses is still the same most-to-least nesting
+ // after front-to-back start offset. Because we place the funclets at the end
+ // these new clauses should also go at the end by this ordering.
+ //
+
+ if (duplicateClauseCount > 0)
+ {
+ unsigned reportedDuplicateClauseCount = 0; // How many duplicated clauses have we reported?
+ unsigned XTnum2;
+ for (XTnum2 = 0, HBtab = compiler->compHndBBtab; XTnum2 < compiler->compHndBBtabCount; XTnum2++, HBtab++)
+ {
+ unsigned enclosingTryIndex;
+
+ EHblkDsc* fletTab = compiler->ehGetDsc(XTnum2);
+
+ for (enclosingTryIndex = compiler->ehTrueEnclosingTryIndexIL(XTnum2); // find the true enclosing try index,
+ // ignoring 'mutual protect' trys
+ enclosingTryIndex != EHblkDsc::NO_ENCLOSING_INDEX;
+ enclosingTryIndex = compiler->ehGetEnclosingTryIndex(enclosingTryIndex))
+ {
+ // The funclet we moved out is nested in a try region, so create a new EH descriptor for the funclet
+ // that will have the enclosing try protecting the funclet.
+
+ noway_assert(XTnum2 < enclosingTryIndex); // the enclosing region must be less nested, and hence have a
+ // greater EH table index
+
+ EHblkDsc* encTab = compiler->ehGetDsc(enclosingTryIndex);
+
+ // The try region is the handler of the funclet. Note that for filters, we don't protect the
+ // filter region, only the filter handler region. This is because exceptions in filters never
+ // escape; the VM swallows them.
+
+ BasicBlock* bbTryBeg = fletTab->ebdHndBeg;
+ BasicBlock* bbTryLast = fletTab->ebdHndLast;
+
+ BasicBlock* bbHndBeg = encTab->ebdHndBeg; // The handler region is the same as the enclosing try
+ BasicBlock* bbHndLast = encTab->ebdHndLast;
+
+ UNATIVE_OFFSET tryBeg, tryEnd, hndBeg, hndEnd, hndTyp;
+
+ tryBeg = compiler->ehCodeOffset(bbTryBeg);
+ hndBeg = compiler->ehCodeOffset(bbHndBeg);
+
+ tryEnd = (bbTryLast == compiler->fgLastBB) ? compiler->info.compNativeCodeSize
+ : compiler->ehCodeOffset(bbTryLast->bbNext);
+ hndEnd = (bbHndLast == compiler->fgLastBB) ? compiler->info.compNativeCodeSize
+ : compiler->ehCodeOffset(bbHndLast->bbNext);
+
+ if (encTab->HasFilter())
+ {
+ hndTyp = compiler->ehCodeOffset(encTab->ebdFilter);
+ }
+ else
+ {
+ hndTyp = encTab->ebdTyp;
+ }
+
+ CORINFO_EH_CLAUSE_FLAGS flags = ToCORINFO_EH_CLAUSE_FLAGS(encTab->ebdHandlerType);
+
+ // Tell the VM this is an extra clause caused by moving funclets out of line.
+ // It seems weird this is from the CorExceptionFlag enum in corhdr.h,
+ // not the CORINFO_EH_CLAUSE_FLAGS enum in corinfo.h.
+ flags = (CORINFO_EH_CLAUSE_FLAGS)(flags | COR_ILEXCEPTION_CLAUSE_DUPLICATED);
+
+ // Note that the JIT-EE interface reuses the CORINFO_EH_CLAUSE type, even though the names of
+ // the fields aren't really accurate. For example, we set "TryLength" to the offset of the
+ // instruction immediately after the 'try' body. So, it really could be more accurately named
+ // "TryEndOffset".
+
+ CORINFO_EH_CLAUSE clause;
+ clause.ClassToken = hndTyp; /* filter offset is passed back here for filter-based exception handlers */
+ clause.Flags = flags;
+ clause.TryOffset = tryBeg;
+ clause.TryLength = tryEnd;
+ clause.HandlerOffset = hndBeg;
+ clause.HandlerLength = hndEnd;
+
+ assert(XTnum < EHCount);
+
+ // Tell the VM about this EH clause (a duplicated clause).
+ compiler->eeSetEHinfo(XTnum, &clause);
+
+ ++XTnum;
+ ++reportedDuplicateClauseCount;
+
+#ifndef DEBUG
+ if (duplicateClauseCount == reportedDuplicateClauseCount)
+ {
+ break; // we've reported all of them; no need to continue looking
+ }
+#endif // !DEBUG
+
+ } // for each 'true' enclosing 'try'
+ } // for each EH table entry
+
+ assert(duplicateClauseCount == reportedDuplicateClauseCount);
+ } // if (duplicateClauseCount > 0)
+
+#if FEATURE_EH_CALLFINALLY_THUNKS
+ if (anyFinallys)
+ {
+ unsigned reportedClonedFinallyCount = 0;
+ for (BasicBlock* block = compiler->fgFirstBB; block != nullptr; block = block->bbNext)
+ {
+ if (block->bbJumpKind == BBJ_CALLFINALLY)
+ {
+ UNATIVE_OFFSET hndBeg, hndEnd;
+
+ hndBeg = compiler->ehCodeOffset(block);
+
+ // How big is it? The BBJ_ALWAYS has a null bbEmitCookie! Look for the block after, which must be
+ // a label or jump target, since the BBJ_CALLFINALLY doesn't fall through.
+ BasicBlock* bbLabel = block->bbNext;
+ if (block->isBBCallAlwaysPair())
+ {
+ bbLabel = bbLabel->bbNext; // skip the BBJ_ALWAYS
+ }
+ if (bbLabel == nullptr)
+ {
+ hndEnd = compiler->info.compNativeCodeSize;
+ }
+ else
+ {
+ assert(bbLabel->bbEmitCookie != nullptr);
+ hndEnd = compiler->ehCodeOffset(bbLabel);
+ }
+
+ CORINFO_EH_CLAUSE clause;
+ clause.ClassToken = 0; // unused
+ clause.Flags = (CORINFO_EH_CLAUSE_FLAGS)(CORINFO_EH_CLAUSE_FINALLY | COR_ILEXCEPTION_CLAUSE_DUPLICATED);
+ clause.TryOffset = hndBeg;
+ clause.TryLength = hndBeg;
+ clause.HandlerOffset = hndBeg;
+ clause.HandlerLength = hndEnd;
+
+ assert(XTnum < EHCount);
+
+ // Tell the VM about this EH clause (a cloned finally clause).
+ compiler->eeSetEHinfo(XTnum, &clause);
+
+ ++XTnum;
+ ++reportedClonedFinallyCount;
+
+#ifndef DEBUG
+ if (clonedFinallyCount == reportedClonedFinallyCount)
+ {
+ break; // we're done; no need to keep looking
+ }
+#endif // !DEBUG
+ } // block is BBJ_CALLFINALLY
+ } // for each block
+
+ assert(clonedFinallyCount == reportedClonedFinallyCount);
+ } // if (anyFinallys)
+#endif // FEATURE_EH_CALLFINALLY_THUNKS
+
+#endif // FEATURE_EH_FUNCLETS
+
+ assert(XTnum == EHCount);
+}
+
+void CodeGen::genGCWriteBarrier(GenTreePtr tgt, GCInfo::WriteBarrierForm wbf)
+{
+#ifndef LEGACY_BACKEND
+ noway_assert(tgt->gtOper == GT_STOREIND);
+#else // LEGACY_BACKEND
+ noway_assert(tgt->gtOper == GT_IND || tgt->gtOper == GT_CLS_VAR); // enforced by gcIsWriteBarrierCandidate
+#endif // LEGACY_BACKEND
+
+ /* Call the proper vm helper */
+ int helper = CORINFO_HELP_ASSIGN_REF;
+#ifdef DEBUG
+ if (wbf == GCInfo::WBF_NoBarrier_CheckNotHeapInDebug)
+ {
+ helper = CORINFO_HELP_ASSIGN_REF_ENSURE_NONHEAP;
+ }
+ else
+#endif
+ if (tgt->gtOper != GT_CLS_VAR)
+ {
+ if (wbf != GCInfo::WBF_BarrierUnchecked) // This overrides the tests below.
+ {
+ if (tgt->gtFlags & GTF_IND_TGTANYWHERE)
+ {
+ helper = CORINFO_HELP_CHECKED_ASSIGN_REF;
+ }
+ else if (tgt->gtOp.gtOp1->TypeGet() == TYP_I_IMPL)
+ {
+ helper = CORINFO_HELP_CHECKED_ASSIGN_REF;
+ }
+ }
+ }
+ assert(((helper == CORINFO_HELP_ASSIGN_REF_ENSURE_NONHEAP) && (wbf == GCInfo::WBF_NoBarrier_CheckNotHeapInDebug)) ||
+ ((helper == CORINFO_HELP_CHECKED_ASSIGN_REF) &&
+ (wbf == GCInfo::WBF_BarrierChecked || wbf == GCInfo::WBF_BarrierUnknown)) ||
+ ((helper == CORINFO_HELP_ASSIGN_REF) &&
+ (wbf == GCInfo::WBF_BarrierUnchecked || wbf == GCInfo::WBF_BarrierUnknown)));
+
+#ifdef FEATURE_COUNT_GC_WRITE_BARRIERS
+ // We classify the "tgt" trees as follows:
+ // If "tgt" is of the form (where [ x ] indicates an optional x, and { x1, ..., xn } means "one of the x_i forms"):
+ // IND [-> ADDR -> IND] -> { GT_LCL_VAR, GT_REG_VAR, ADD({GT_LCL_VAR, GT_REG_VAR}, X), ADD(X, (GT_LCL_VAR,
+ // GT_REG_VAR)) }
+ // then let "v" be the GT_LCL_VAR or GT_REG_VAR.
+ // * If "v" is the return buffer argument, classify as CWBKind_RetBuf.
+ // * If "v" is another by-ref argument, classify as CWBKind_ByRefArg.
+ // * Otherwise, classify as CWBKind_OtherByRefLocal.
+ // If "tgt" is of the form IND -> ADDR -> GT_LCL_VAR, clasify as CWBKind_AddrOfLocal.
+ // Otherwise, classify as CWBKind_Unclassified.
+
+ CheckedWriteBarrierKinds wbKind = CWBKind_Unclassified;
+ if (tgt->gtOper == GT_IND)
+ {
+ GenTreePtr lcl = NULL;
+
+ GenTreePtr indArg = tgt->gtOp.gtOp1;
+ if (indArg->gtOper == GT_ADDR && indArg->gtOp.gtOp1->gtOper == GT_IND)
+ {
+ indArg = indArg->gtOp.gtOp1->gtOp.gtOp1;
+ }
+ if (indArg->gtOper == GT_LCL_VAR || indArg->gtOper == GT_REG_VAR)
+ {
+ lcl = indArg;
+ }
+ else if (indArg->gtOper == GT_ADD)
+ {
+ if (indArg->gtOp.gtOp1->gtOper == GT_LCL_VAR || indArg->gtOp.gtOp1->gtOper == GT_REG_VAR)
+ {
+ lcl = indArg->gtOp.gtOp1;
+ }
+ else if (indArg->gtOp.gtOp2->gtOper == GT_LCL_VAR || indArg->gtOp.gtOp2->gtOper == GT_REG_VAR)
+ {
+ lcl = indArg->gtOp.gtOp2;
+ }
+ }
+ if (lcl != NULL)
+ {
+ wbKind = CWBKind_OtherByRefLocal; // Unclassified local variable.
+ unsigned lclNum = 0;
+ if (lcl->gtOper == GT_LCL_VAR)
+ lclNum = lcl->gtLclVarCommon.gtLclNum;
+ else
+ {
+ assert(lcl->gtOper == GT_REG_VAR);
+ lclNum = lcl->gtRegVar.gtLclNum;
+ }
+ if (lclNum == compiler->info.compRetBuffArg)
+ {
+ wbKind = CWBKind_RetBuf; // Ret buff. Can happen if the struct exceeds the size limit.
+ }
+ else
+ {
+ LclVarDsc* varDsc = &compiler->lvaTable[lclNum];
+ if (varDsc->lvIsParam && varDsc->lvType == TYP_BYREF)
+ {
+ wbKind = CWBKind_ByRefArg; // Out (or in/out) arg
+ }
+ }
+ }
+ else
+ {
+ // We should have eliminated the barrier for this case.
+ assert(!(indArg->gtOper == GT_ADDR && indArg->gtOp.gtOp1->gtOper == GT_LCL_VAR));
+ }
+ }
+
+ if (helper == CORINFO_HELP_CHECKED_ASSIGN_REF)
+ {
+#if 0
+#ifdef DEBUG
+ // Enable this to sample the unclassified trees.
+ static int unclassifiedBarrierSite = 0;
+ if (wbKind == CWBKind_Unclassified)
+ {
+ unclassifiedBarrierSite++;
+ printf("unclassifiedBarrierSite = %d:\n", unclassifiedBarrierSite); compiler->gtDispTree(tgt); printf(""); printf("\n");
+ }
+#endif // DEBUG
+#endif // 0
+ genStackLevel += 4;
+ inst_IV(INS_push, wbKind);
+ genEmitHelperCall(helper,
+ 4, // argSize
+ EA_PTRSIZE); // retSize
+ genStackLevel -= 4;
+ }
+ else
+ {
+ genEmitHelperCall(helper,
+ 0, // argSize
+ EA_PTRSIZE); // retSize
+ }
+
+#else // !FEATURE_COUNT_GC_WRITE_BARRIERS
+ genEmitHelperCall(helper,
+ 0, // argSize
+ EA_PTRSIZE); // retSize
+#endif // !FEATURE_COUNT_GC_WRITE_BARRIERS
+}
+
+/*
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX Prolog / Epilog XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+/*****************************************************************************
+ *
+ * Generates code for moving incoming register arguments to their
+ * assigned location, in the function prolog.
+ */
+
+#ifdef _PREFAST_
+#pragma warning(push)
+#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
+#endif
+void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbered, RegState* regState)
+{
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("*************** In genFnPrologCalleeRegArgs() for %s regs\n", regState->rsIsFloat ? "float" : "int");
+ }
+#endif
+
+#ifdef _TARGET_ARM64_
+ if (compiler->info.compIsVarArgs)
+ {
+ // We've already saved all int registers at the top of stack in the prolog.
+ // No need further action.
+ return;
+ }
+#endif
+
+ unsigned argMax; // maximum argNum value plus 1, (including the RetBuffArg)
+ unsigned argNum; // current argNum, always in [0..argMax-1]
+ unsigned fixedRetBufIndex; // argNum value used by the fixed return buffer argument (ARM64)
+ unsigned regArgNum; // index into the regArgTab[] table
+ regMaskTP regArgMaskLive = regState->rsCalleeRegArgMaskLiveIn;
+ bool doingFloat = regState->rsIsFloat;
+
+ // We should be generating the prolog block when we are called
+ assert(compiler->compGeneratingProlog);
+
+ // We expect to have some registers of the type we are doing, that are LiveIn, otherwise we don't need to be called.
+ noway_assert(regArgMaskLive != 0);
+
+ // If a method has 3 args (and no fixed return buffer) then argMax is 3 and valid indexes are 0,1,2
+ // If a method has a fixed return buffer (on ARM64) then argMax gets set to 9 and valid index are 0-8
+ //
+ // The regArgTab can always have unused entries,
+ // for example if an architecture always increments the arg register number but uses either
+ // an integer register or a floating point register to hold the next argument
+ // then with a mix of float and integer args you could have:
+ //
+ // sampleMethod(int i, float x, int j, float y, int k, float z);
+ // r0, r2 and r4 as valid integer arguments with argMax as 5
+ // and f1, f3 and f5 and valid floating point arguments with argMax as 6
+ // The first one is doingFloat==false and the second one is doingFloat==true
+ //
+ // If a fixed return buffer (in r8) was also present then the first one would become:
+ // r0, r2, r4 and r8 as valid integer arguments with argMax as 9
+ //
+
+ argMax = regState->rsCalleeRegArgCount;
+ fixedRetBufIndex = (unsigned)-1; // Invalid value
+
+ // If necessary we will select a correct xtraReg for circular floating point args later.
+ if (doingFloat)
+ {
+ xtraReg = REG_NA;
+ noway_assert(argMax <= MAX_FLOAT_REG_ARG);
+ }
+ else // we are doing the integer registers
+ {
+ noway_assert(argMax <= MAX_REG_ARG);
+ if (hasFixedRetBuffReg())
+ {
+ fixedRetBufIndex = theFixedRetBuffArgNum();
+ // We have an additional integer register argument when hasFixedRetBuffReg() is true
+ argMax = fixedRetBufIndex + 1;
+ assert(argMax == (MAX_REG_ARG + 1));
+ }
+ }
+
+ //
+ // Construct a table with the register arguments, for detecting circular and
+ // non-circular dependencies between the register arguments. A dependency is when
+ // an argument register Rn needs to be moved to register Rm that is also an argument
+ // register. The table is constructed in the order the arguments are passed in
+ // registers: the first register argument is in regArgTab[0], the second in
+ // regArgTab[1], etc. Note that on ARM, a TYP_DOUBLE takes two entries, starting
+ // at an even index. The regArgTab is indexed from 0 to argMax - 1.
+ // Note that due to an extra argument register for ARM64 (i.e theFixedRetBuffReg())
+ // we have increased the allocated size of the regArgTab[] by one.
+ //
+ struct regArgElem
+ {
+ unsigned varNum; // index into compiler->lvaTable[] for this register argument
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ var_types type; // the Jit type of this regArgTab entry
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ unsigned trashBy; // index into this regArgTab[] table of the register that will be copied to this register.
+ // That is, for regArgTab[x].trashBy = y, argument register number 'y' will be copied to
+ // argument register number 'x'. Only used when circular = true.
+ char slot; // 0 means the register is not used for a register argument
+ // 1 means the first part of a register argument
+ // 2, 3 or 4 means the second,third or fourth part of a multireg argument
+ bool stackArg; // true if the argument gets homed to the stack
+ bool processed; // true after we've processed the argument (and it is in its final location)
+ bool circular; // true if this register participates in a circular dependency loop.
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+ // For UNIX AMD64 struct passing, the type of the register argument slot can differ from
+ // the type of the lclVar in ways that are not ascertainable from lvType.
+ // So, for that case we retain the type of the register in the regArgTab.
+
+ var_types getRegType(Compiler* compiler)
+ {
+ return type; // UNIX_AMD64 implementation
+ }
+
+#else // !FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+ // In other cases, we simply use the type of the lclVar to determine the type of the register.
+ var_types getRegType(Compiler* compiler)
+ {
+ LclVarDsc varDsc = compiler->lvaTable[varNum];
+ // Check if this is an HFA register arg and return the HFA type
+ if (varDsc.lvIsHfaRegArg())
+ {
+ return varDsc.GetHfaType();
+ }
+ return varDsc.lvType;
+ }
+
+#endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING
+ } regArgTab[max(MAX_REG_ARG + 1, MAX_FLOAT_REG_ARG)] = {};
+
+ unsigned varNum;
+ LclVarDsc* varDsc;
+ for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++)
+ {
+ // Is this variable a register arg?
+ if (!varDsc->lvIsParam)
+ {
+ continue;
+ }
+
+ if (!varDsc->lvIsRegArg)
+ {
+ continue;
+ }
+
+ // When we have a promoted struct we have two possible LclVars that can represent the incoming argument
+ // in the regArgTab[], either the original TYP_STRUCT argument or the introduced lvStructField.
+ // We will use the lvStructField if we have a TYPE_INDEPENDENT promoted struct field otherwise
+ // use the the original TYP_STRUCT argument.
+ //
+ if (varDsc->lvPromoted || varDsc->lvIsStructField)
+ {
+ LclVarDsc* parentVarDsc = varDsc;
+ if (varDsc->lvIsStructField)
+ {
+ assert(!varDsc->lvPromoted);
+ parentVarDsc = &compiler->lvaTable[varDsc->lvParentLcl];
+ }
+
+ Compiler::lvaPromotionType promotionType = compiler->lvaGetPromotionType(parentVarDsc);
+
+ if (promotionType == Compiler::PROMOTION_TYPE_INDEPENDENT)
+ {
+ noway_assert(parentVarDsc->lvFieldCnt == 1); // We only handle one field here
+
+ // For register arguments that are independent promoted structs we put the promoted field varNum in the
+ // regArgTab[]
+ if (varDsc->lvPromoted)
+ {
+ continue;
+ }
+ }
+ else
+ {
+ // For register arguments that are not independent promoted structs we put the parent struct varNum in
+ // the regArgTab[]
+ if (varDsc->lvIsStructField)
+ {
+ continue;
+ }
+ }
+ }
+
+ var_types regType = varDsc->TypeGet();
+ // Change regType to the HFA type when we have a HFA argument
+ if (varDsc->lvIsHfaRegArg())
+ {
+ regType = varDsc->GetHfaType();
+ }
+
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ if (!varTypeIsStruct(regType))
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ {
+ // A struct might be passed partially in XMM register for System V calls.
+ // So a single arg might use both register files.
+ if (isFloatRegType(regType) != doingFloat)
+ {
+ continue;
+ }
+ }
+
+ int slots = 0;
+
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ if (varTypeIsStruct(varDsc))
+ {
+ CORINFO_CLASS_HANDLE typeHnd = varDsc->lvVerTypeInfo.GetClassHandle();
+ assert(typeHnd != nullptr);
+ SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc;
+ compiler->eeGetSystemVAmd64PassStructInRegisterDescriptor(typeHnd, &structDesc);
+ if (!structDesc.passedInRegisters)
+ {
+ // The var is not passed in registers.
+ continue;
+ }
+
+ unsigned firstRegSlot = 0;
+ for (unsigned slotCounter = 0; slotCounter < structDesc.eightByteCount; slotCounter++)
+ {
+ regNumber regNum = varDsc->lvRegNumForSlot(slotCounter);
+ var_types regType;
+
+#ifdef FEATURE_SIMD
+ // Assumption 1:
+ // RyuJit backend depends on the assumption that on 64-Bit targets Vector3 size is rounded off
+ // to TARGET_POINTER_SIZE and hence Vector3 locals on stack can be treated as TYP_SIMD16 for
+ // reading and writing purposes. Hence while homing a Vector3 type arg on stack we should
+ // home entire 16-bytes so that the upper-most 4-bytes will be zeroed when written to stack.
+ //
+ // Assumption 2:
+ // RyuJit backend is making another implicit assumption that Vector3 type args when passed in
+ // registers or on stack, the upper most 4-bytes will be zero.
+ //
+ // For P/Invoke return and Reverse P/Invoke argument passing, native compiler doesn't guarantee
+ // that upper 4-bytes of a Vector3 type struct is zero initialized and hence assumption 2 is
+ // invalid.
+ //
+ // RyuJIT x64 Windows: arguments are treated as passed by ref and hence read/written just 12
+ // bytes. In case of Vector3 returns, Caller allocates a zero initialized Vector3 local and
+ // passes it retBuf arg and Callee method writes only 12 bytes to retBuf. For this reason,
+ // there is no need to clear upper 4-bytes of Vector3 type args.
+ //
+ // RyuJIT x64 Unix: arguments are treated as passed by value and read/writen as if TYP_SIMD16.
+ // Vector3 return values are returned two return registers and Caller assembles them into a
+ // single xmm reg. Hence RyuJIT explicitly generates code to clears upper 4-bytes of Vector3
+ // type args in prolog and Vector3 type return value of a call
+
+ if (varDsc->lvType == TYP_SIMD12)
+ {
+ regType = TYP_DOUBLE;
+ }
+ else
+#endif
+ {
+ regType = compiler->GetEightByteType(structDesc, slotCounter);
+ }
+
+ regArgNum = genMapRegNumToRegArgNum(regNum, regType);
+
+ if ((!doingFloat && (structDesc.IsIntegralSlot(slotCounter))) ||
+ (doingFloat && (structDesc.IsSseSlot(slotCounter))))
+ {
+ // Store the reg for the first slot.
+ if (slots == 0)
+ {
+ firstRegSlot = regArgNum;
+ }
+
+ // Bingo - add it to our table
+ noway_assert(regArgNum < argMax);
+ noway_assert(regArgTab[regArgNum].slot == 0); // we better not have added it already (there better
+ // not be multiple vars representing this argument
+ // register)
+ regArgTab[regArgNum].varNum = varNum;
+ regArgTab[regArgNum].slot = (char)(slotCounter + 1);
+ regArgTab[regArgNum].type = regType;
+ slots++;
+ }
+ }
+
+ if (slots == 0)
+ {
+ continue; // Nothing to do for this regState set.
+ }
+
+ regArgNum = firstRegSlot;
+ }
+ else
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ {
+ // Bingo - add it to our table
+ regArgNum = genMapRegNumToRegArgNum(varDsc->lvArgReg, regType);
+
+ noway_assert(regArgNum < argMax);
+ // We better not have added it already (there better not be multiple vars representing this argument
+ // register)
+ noway_assert(regArgTab[regArgNum].slot == 0);
+
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ // Set the register type.
+ regArgTab[regArgNum].type = regType;
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+
+ regArgTab[regArgNum].varNum = varNum;
+ regArgTab[regArgNum].slot = 1;
+
+ slots = 1;
+
+#if FEATURE_MULTIREG_ARGS
+ if (compiler->lvaIsMultiregStruct(varDsc))
+ {
+ if (varDsc->lvIsHfaRegArg())
+ {
+ // We have an HFA argument, set slots to the number of registers used
+ slots = varDsc->lvHfaSlots();
+ }
+ else
+ {
+ // Currently all non-HFA multireg structs are two registers in size (i.e. two slots)
+ assert(varDsc->lvSize() == (2 * TARGET_POINTER_SIZE));
+ // We have a non-HFA multireg argument, set slots to two
+ slots = 2;
+ }
+
+ // Note that regArgNum+1 represents an argument index not an actual argument register.
+ // see genMapRegArgNumToRegNum(unsigned argNum, var_types type)
+
+ // This is the setup for the rest of a multireg struct arg
+
+ for (int i = 1; i < slots; i++)
+ {
+ noway_assert((regArgNum + i) < argMax);
+
+ // We better not have added it already (there better not be multiple vars representing this argument
+ // register)
+ noway_assert(regArgTab[regArgNum + i].slot == 0);
+
+ regArgTab[regArgNum + i].varNum = varNum;
+ regArgTab[regArgNum + i].slot = (char)(i + 1);
+ }
+ }
+#endif // FEATURE_MULTIREG_ARGS
+ }
+
+#ifdef _TARGET_ARM_
+ int lclSize = compiler->lvaLclSize(varNum);
+
+ if (lclSize > REGSIZE_BYTES)
+ {
+ unsigned maxRegArgNum = doingFloat ? MAX_FLOAT_REG_ARG : MAX_REG_ARG;
+ slots = lclSize / REGSIZE_BYTES;
+ if (regArgNum + slots > maxRegArgNum)
+ {
+ slots = maxRegArgNum - regArgNum;
+ }
+ }
+ C_ASSERT((char)MAX_REG_ARG == MAX_REG_ARG);
+ assert(slots < INT8_MAX);
+ for (char i = 1; i < slots; i++)
+ {
+ regArgTab[regArgNum + i].varNum = varNum;
+ regArgTab[regArgNum + i].slot = i + 1;
+ }
+#endif // _TARGET_ARM_
+
+ for (int i = 0; i < slots; i++)
+ {
+ regType = regArgTab[regArgNum + i].getRegType(compiler);
+ regNumber regNum = genMapRegArgNumToRegNum(regArgNum + i, regType);
+
+#if !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ // lvArgReg could be INT or FLOAT reg. So the following assertion doesn't hold.
+ // The type of the register depends on the classification of the first eightbyte
+ // of the struct. For information on classification refer to the System V x86_64 ABI at:
+ // http://www.x86-64.org/documentation/abi.pdf
+
+ assert((i > 0) || (regNum == varDsc->lvArgReg));
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ // Is the arg dead on entry to the method ?
+
+ if ((regArgMaskLive & genRegMask(regNum)) == 0)
+ {
+ if (varDsc->lvTrackedNonStruct())
+ {
+ noway_assert(!VarSetOps::IsMember(compiler, compiler->fgFirstBB->bbLiveIn, varDsc->lvVarIndex));
+ }
+ else
+ {
+#ifdef _TARGET_X86_
+ noway_assert(varDsc->lvType == TYP_STRUCT);
+#else // !_TARGET_X86_
+#ifndef LEGACY_BACKEND
+ // For LSRA, it may not be in regArgMaskLive if it has a zero
+ // refcnt. This is in contrast with the non-LSRA case in which all
+ // non-tracked args are assumed live on entry.
+ noway_assert((varDsc->lvRefCnt == 0) || (varDsc->lvType == TYP_STRUCT) ||
+ (varDsc->lvAddrExposed && compiler->info.compIsVarArgs));
+#else // LEGACY_BACKEND
+ noway_assert(
+ varDsc->lvType == TYP_STRUCT ||
+ (varDsc->lvAddrExposed && (compiler->info.compIsVarArgs || compiler->opts.compUseSoftFP)));
+#endif // LEGACY_BACKEND
+#endif // !_TARGET_X86_
+ }
+ // Mark it as processed and be done with it
+ regArgTab[regArgNum + i].processed = true;
+ goto NON_DEP;
+ }
+
+#ifdef _TARGET_ARM_
+ // On the ARM when the varDsc is a struct arg (or pre-spilled due to varargs) the initReg/xtraReg
+ // could be equal to lvArgReg. The pre-spilled registers are also not considered live either since
+ // they've already been spilled.
+ //
+ if ((regSet.rsMaskPreSpillRegs(false) & genRegMask(regNum)) == 0)
+#endif // _TARGET_ARM_
+ {
+ noway_assert(xtraReg != varDsc->lvArgReg + i);
+ noway_assert(regArgMaskLive & genRegMask(regNum));
+ }
+
+ regArgTab[regArgNum + i].processed = false;
+
+ /* mark stack arguments since we will take care of those first */
+ regArgTab[regArgNum + i].stackArg = (varDsc->lvIsInReg()) ? false : true;
+
+ /* If it goes on the stack or in a register that doesn't hold
+ * an argument anymore -> CANNOT form a circular dependency */
+
+ if (varDsc->lvIsInReg() && (genRegMask(regNum) & regArgMaskLive))
+ {
+ /* will trash another argument -> possible dependency
+ * We may need several passes after the table is constructed
+ * to decide on that */
+
+ /* Maybe the argument stays in the register (IDEAL) */
+
+ if ((i == 0) && (varDsc->lvRegNum == regNum))
+ {
+ goto NON_DEP;
+ }
+
+#if !defined(_TARGET_64BIT_)
+ if ((i == 1) && varTypeIsStruct(varDsc) && (varDsc->lvOtherReg == regNum))
+ {
+ goto NON_DEP;
+ }
+ if ((i == 1) && (genActualType(varDsc->TypeGet()) == TYP_LONG) && (varDsc->lvOtherReg == regNum))
+ {
+ goto NON_DEP;
+ }
+
+ if ((i == 1) && (genActualType(varDsc->TypeGet()) == TYP_DOUBLE) &&
+ (REG_NEXT(varDsc->lvRegNum) == regNum))
+ {
+ goto NON_DEP;
+ }
+#endif // !defined(_TARGET_64BIT_)
+ regArgTab[regArgNum + i].circular = true;
+ }
+ else
+ {
+ NON_DEP:
+ regArgTab[regArgNum + i].circular = false;
+
+ /* mark the argument register as free */
+ regArgMaskLive &= ~genRegMask(regNum);
+ }
+ }
+ }
+
+ /* Find the circular dependencies for the argument registers, if any.
+ * A circular dependency is a set of registers R1, R2, ..., Rn
+ * such that R1->R2 (that is, R1 needs to be moved to R2), R2->R3, ..., Rn->R1 */
+
+ bool change = true;
+ if (regArgMaskLive)
+ {
+ /* Possible circular dependencies still exist; the previous pass was not enough
+ * to filter them out. Use a "sieve" strategy to find all circular dependencies. */
+
+ while (change)
+ {
+ change = false;
+
+ for (argNum = 0; argNum < argMax; argNum++)
+ {
+ // If we already marked the argument as non-circular then continue
+
+ if (!regArgTab[argNum].circular)
+ {
+ continue;
+ }
+
+ if (regArgTab[argNum].slot == 0) // Not a register argument
+ {
+ continue;
+ }
+
+ varNum = regArgTab[argNum].varNum;
+ noway_assert(varNum < compiler->lvaCount);
+ varDsc = compiler->lvaTable + varNum;
+ noway_assert(varDsc->lvIsParam && varDsc->lvIsRegArg);
+
+ /* cannot possibly have stack arguments */
+ noway_assert(varDsc->lvIsInReg());
+ noway_assert(!regArgTab[argNum].stackArg);
+
+ var_types regType = regArgTab[argNum].getRegType(compiler);
+ regNumber regNum = genMapRegArgNumToRegNum(argNum, regType);
+
+ regNumber destRegNum = REG_NA;
+ if (regArgTab[argNum].slot == 1)
+ {
+ destRegNum = varDsc->lvRegNum;
+ }
+#if FEATURE_MULTIREG_ARGS && defined(FEATURE_SIMD) && defined(_TARGET_AMD64_)
+ else
+ {
+ assert(regArgTab[argNum].slot == 2);
+ assert(argNum > 0);
+ assert(regArgTab[argNum - 1].slot == 1);
+ assert(regArgTab[argNum - 1].varNum == varNum);
+ assert((varDsc->lvType == TYP_SIMD12) || (varDsc->lvType == TYP_SIMD16));
+ regArgMaskLive &= ~genRegMask(regNum);
+ regArgTab[argNum].circular = false;
+ change = true;
+ continue;
+ }
+#elif !defined(_TARGET_64BIT_)
+ else if (regArgTab[argNum].slot == 2 && genActualType(varDsc->TypeGet()) == TYP_LONG)
+ {
+ destRegNum = varDsc->lvOtherReg;
+ }
+ else
+ {
+ assert(regArgTab[argNum].slot == 2);
+ assert(varDsc->TypeGet() == TYP_DOUBLE);
+ destRegNum = REG_NEXT(varDsc->lvRegNum);
+ }
+#endif // !defined(_TARGET_64BIT_)
+ noway_assert(destRegNum != REG_NA);
+ if (genRegMask(destRegNum) & regArgMaskLive)
+ {
+ /* we are trashing a live argument register - record it */
+ unsigned destRegArgNum = genMapRegNumToRegArgNum(destRegNum, regType);
+ noway_assert(destRegArgNum < argMax);
+ regArgTab[destRegArgNum].trashBy = argNum;
+ }
+ else
+ {
+ /* argument goes to a free register */
+ regArgTab[argNum].circular = false;
+ change = true;
+
+ /* mark the argument register as free */
+ regArgMaskLive &= ~genRegMask(regNum);
+ }
+ }
+ }
+ }
+
+ /* At this point, everything that has the "circular" flag
+ * set to "true" forms a circular dependency */
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef DEBUG
+ if (regArgMaskLive)
+ {
+ if (verbose)
+ {
+ printf("Circular dependencies found while home-ing the incoming arguments.\n");
+ }
+ }
+#endif
+
+ // LSRA allocates registers to incoming parameters in order and will not overwrite
+ // a register still holding a live parameter.
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifndef LEGACY_BACKEND
+ noway_assert(((regArgMaskLive & RBM_FLTARG_REGS) == 0) &&
+ "Homing of float argument registers with circular dependencies not implemented.");
+#endif // LEGACY_BACKEND
+
+ /* Now move the arguments to their locations.
+ * First consider ones that go on the stack since they may
+ * free some registers. */
+
+ regArgMaskLive = regState->rsCalleeRegArgMaskLiveIn; // reset the live in to what it was at the start
+ for (argNum = 0; argNum < argMax; argNum++)
+ {
+ emitAttr size;
+
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ // If this is the wrong register file, just continue.
+ if (regArgTab[argNum].type == TYP_UNDEF)
+ {
+ // This could happen if the reg in regArgTab[argNum] is of the other register file -
+ // for System V register passed structs where the first reg is GPR and the second an XMM reg.
+ // The next register file processing will process it.
+ continue;
+ }
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ // If the arg is dead on entry to the method, skip it
+
+ if (regArgTab[argNum].processed)
+ {
+ continue;
+ }
+
+ if (regArgTab[argNum].slot == 0) // Not a register argument
+ {
+ continue;
+ }
+
+ varNum = regArgTab[argNum].varNum;
+ noway_assert(varNum < compiler->lvaCount);
+ varDsc = compiler->lvaTable + varNum;
+
+#ifndef _TARGET_64BIT_
+ // If not a stack arg go to the next one
+ if (varDsc->lvType == TYP_LONG)
+ {
+ if (regArgTab[argNum].slot == 1 && !regArgTab[argNum].stackArg)
+ {
+ continue;
+ }
+ else if (varDsc->lvOtherReg != REG_STK)
+ {
+ continue;
+ }
+ }
+ else
+#endif // !_TARGET_64BIT_
+ {
+ // If not a stack arg go to the next one
+ if (!regArgTab[argNum].stackArg)
+ {
+ continue;
+ }
+ }
+
+#if defined(_TARGET_ARM_)
+ if (varDsc->lvType == TYP_DOUBLE)
+ {
+ if (regArgTab[argNum].slot == 2)
+ {
+ // We handled the entire double when processing the first half (slot == 1)
+ continue;
+ }
+ }
+#endif
+
+ noway_assert(regArgTab[argNum].circular == false);
+
+ noway_assert(varDsc->lvIsParam);
+ noway_assert(varDsc->lvIsRegArg);
+ noway_assert(varDsc->lvIsInReg() == false ||
+ (varDsc->lvType == TYP_LONG && varDsc->lvOtherReg == REG_STK && regArgTab[argNum].slot == 2));
+
+ var_types storeType = TYP_UNDEF;
+ unsigned slotSize = TARGET_POINTER_SIZE;
+
+ if (varTypeIsStruct(varDsc))
+ {
+ storeType = TYP_I_IMPL; // Default store type for a struct type is a pointer sized integer
+#if FEATURE_MULTIREG_ARGS
+ // Must be <= MAX_PASS_MULTIREG_BYTES or else it wouldn't be passed in registers
+ noway_assert(varDsc->lvSize() <= MAX_PASS_MULTIREG_BYTES);
+#endif // FEATURE_MULTIREG_ARGS
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ storeType = regArgTab[argNum].type;
+#endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING
+ if (varDsc->lvIsHfaRegArg())
+ {
+#ifdef _TARGET_ARM_
+ // On ARM32 the storeType for HFA args is always TYP_FLOAT
+ storeType = TYP_FLOAT;
+ slotSize = (unsigned)emitActualTypeSize(storeType);
+#else // _TARGET_ARM64_
+ storeType = genActualType(varDsc->GetHfaType());
+ slotSize = (unsigned)emitActualTypeSize(storeType);
+#endif // _TARGET_ARM64_
+ }
+ }
+ else // Not a struct type
+ {
+ storeType = genActualType(varDsc->TypeGet());
+ }
+ size = emitActualTypeSize(storeType);
+#ifdef _TARGET_X86_
+ noway_assert(genTypeSize(storeType) == TARGET_POINTER_SIZE);
+#endif //_TARGET_X86_
+
+ regNumber srcRegNum = genMapRegArgNumToRegNum(argNum, storeType);
+
+ // Stack argument - if the ref count is 0 don't care about it
+
+ if (!varDsc->lvOnFrame)
+ {
+ noway_assert(varDsc->lvRefCnt == 0);
+ }
+ else
+ {
+ // Since slot is typically 1, baseOffset is typically 0
+ int baseOffset = (regArgTab[argNum].slot - 1) * slotSize;
+
+ getEmitter()->emitIns_S_R(ins_Store(storeType), size, srcRegNum, varNum, baseOffset);
+
+#ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ // Check if we are writing past the end of the struct
+ if (varTypeIsStruct(varDsc))
+ {
+ assert(varDsc->lvSize() >= baseOffset + (unsigned)size);
+ }
+#endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+ if (regArgTab[argNum].slot == 1)
+ {
+ psiMoveToStack(varNum);
+ }
+ }
+
+ /* mark the argument as processed */
+
+ regArgTab[argNum].processed = true;
+ regArgMaskLive &= ~genRegMask(srcRegNum);
+
+#if defined(_TARGET_ARM_)
+ if (storeType == TYP_DOUBLE)
+ {
+ regArgTab[argNum + 1].processed = true;
+ regArgMaskLive &= ~genRegMask(REG_NEXT(srcRegNum));
+ }
+#endif
+ }
+
+ /* Process any circular dependencies */
+ if (regArgMaskLive)
+ {
+ unsigned begReg, destReg, srcReg;
+ unsigned varNumDest, varNumSrc;
+ LclVarDsc* varDscDest;
+ LclVarDsc* varDscSrc;
+ instruction insCopy = INS_mov;
+
+ if (doingFloat)
+ {
+#if defined(FEATURE_HFA) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ insCopy = ins_Copy(TYP_DOUBLE);
+ // Compute xtraReg here when we have a float argument
+ assert(xtraReg == REG_NA);
+
+ regMaskTP fpAvailMask;
+
+ fpAvailMask = RBM_FLT_CALLEE_TRASH & ~regArgMaskLive;
+#if defined(FEATURE_HFA)
+ fpAvailMask &= RBM_ALLDOUBLE;
+#else
+#if !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+#error Error. Wrong architecture.
+#endif // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+#endif // defined(FEATURE_HFA)
+
+ if (fpAvailMask == RBM_NONE)
+ {
+ fpAvailMask = RBM_ALLFLOAT & ~regArgMaskLive;
+#if defined(FEATURE_HFA)
+ fpAvailMask &= RBM_ALLDOUBLE;
+#else
+#if !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+#error Error. Wrong architecture.
+#endif // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+#endif // defined(FEATURE_HFA)
+ }
+
+ assert(fpAvailMask != RBM_NONE);
+
+ // We pick the lowest avail register number
+ regMaskTP tempMask = genFindLowestBit(fpAvailMask);
+ xtraReg = genRegNumFromMask(tempMask);
+#elif defined(_TARGET_X86_)
+ // This case shouldn't occur on x86 since NYI gets converted to an assert
+ NYI("Homing circular FP registers via xtraReg");
+#endif
+ }
+
+ for (argNum = 0; argNum < argMax; argNum++)
+ {
+ // If not a circular dependency then continue
+ if (!regArgTab[argNum].circular)
+ {
+ continue;
+ }
+
+ // If already processed the dependency then continue
+
+ if (regArgTab[argNum].processed)
+ {
+ continue;
+ }
+
+ if (regArgTab[argNum].slot == 0) // Not a register argument
+ {
+ continue;
+ }
+
+ destReg = begReg = argNum;
+ srcReg = regArgTab[argNum].trashBy;
+
+ varNumDest = regArgTab[destReg].varNum;
+ noway_assert(varNumDest < compiler->lvaCount);
+ varDscDest = compiler->lvaTable + varNumDest;
+ noway_assert(varDscDest->lvIsParam && varDscDest->lvIsRegArg);
+
+ noway_assert(srcReg < argMax);
+ varNumSrc = regArgTab[srcReg].varNum;
+ noway_assert(varNumSrc < compiler->lvaCount);
+ varDscSrc = compiler->lvaTable + varNumSrc;
+ noway_assert(varDscSrc->lvIsParam && varDscSrc->lvIsRegArg);
+
+ emitAttr size = EA_PTRSIZE;
+
+#ifdef _TARGET_XARCH_
+ //
+ // The following code relies upon the target architecture having an
+ // 'xchg' instruction which directly swaps the values held in two registers.
+ // On the ARM architecture we do not have such an instruction.
+ //
+ if (destReg == regArgTab[srcReg].trashBy)
+ {
+ /* only 2 registers form the circular dependency - use "xchg" */
+
+ varNum = regArgTab[argNum].varNum;
+ noway_assert(varNum < compiler->lvaCount);
+ varDsc = compiler->lvaTable + varNum;
+ noway_assert(varDsc->lvIsParam && varDsc->lvIsRegArg);
+
+ noway_assert(genTypeSize(genActualType(varDscSrc->TypeGet())) <= REGSIZE_BYTES);
+
+ /* Set "size" to indicate GC if one and only one of
+ * the operands is a pointer
+ * RATIONALE: If both are pointers, nothing changes in
+ * the GC pointer tracking. If only one is a pointer we
+ * have to "swap" the registers in the GC reg pointer mask
+ */
+
+ if (varTypeGCtype(varDscSrc->TypeGet()) != varTypeGCtype(varDscDest->TypeGet()))
+ {
+ size = EA_GCREF;
+ }
+
+ noway_assert(varDscDest->lvArgReg == varDscSrc->lvRegNum);
+
+ getEmitter()->emitIns_R_R(INS_xchg, size, varDscSrc->lvRegNum, varDscSrc->lvArgReg);
+ regTracker.rsTrackRegTrash(varDscSrc->lvRegNum);
+ regTracker.rsTrackRegTrash(varDscSrc->lvArgReg);
+
+ /* mark both arguments as processed */
+ regArgTab[destReg].processed = true;
+ regArgTab[srcReg].processed = true;
+
+ regArgMaskLive &= ~genRegMask(varDscSrc->lvArgReg);
+ regArgMaskLive &= ~genRegMask(varDscDest->lvArgReg);
+
+ psiMoveToReg(varNumSrc);
+ psiMoveToReg(varNumDest);
+ }
+ else
+#endif // _TARGET_XARCH_
+ {
+ var_types destMemType = varDscDest->TypeGet();
+
+#ifdef _TARGET_ARM_
+ bool cycleAllDouble = true; // assume the best
+
+ unsigned iter = begReg;
+ do
+ {
+ if (compiler->lvaTable[regArgTab[iter].varNum].TypeGet() != TYP_DOUBLE)
+ {
+ cycleAllDouble = false;
+ break;
+ }
+ iter = regArgTab[iter].trashBy;
+ } while (iter != begReg);
+
+ // We may treat doubles as floats for ARM because we could have partial circular
+ // dependencies of a float with a lo/hi part of the double. We mark the
+ // trashBy values for each slot of the double, so let the circular dependency
+ // logic work its way out for floats rather than doubles. If a cycle has all
+ // doubles, then optimize so that instead of two vmov.f32's to move a double,
+ // we can use one vmov.f64.
+ //
+ if (!cycleAllDouble && destMemType == TYP_DOUBLE)
+ {
+ destMemType = TYP_FLOAT;
+ }
+#endif // _TARGET_ARM_
+
+ if (destMemType == TYP_REF)
+ {
+ size = EA_GCREF;
+ }
+ else if (destMemType == TYP_BYREF)
+ {
+ size = EA_BYREF;
+ }
+ else if (destMemType == TYP_DOUBLE)
+ {
+ size = EA_8BYTE;
+ }
+ else if (destMemType == TYP_FLOAT)
+ {
+ size = EA_4BYTE;
+ }
+
+ /* move the dest reg (begReg) in the extra reg */
+
+ assert(xtraReg != REG_NA);
+
+ regNumber begRegNum = genMapRegArgNumToRegNum(begReg, destMemType);
+
+ getEmitter()->emitIns_R_R(insCopy, size, xtraReg, begRegNum);
+
+ regTracker.rsTrackRegCopy(xtraReg, begRegNum);
+
+ *pXtraRegClobbered = true;
+
+ psiMoveToReg(varNumDest, xtraReg);
+
+ /* start moving everything to its right place */
+
+ while (srcReg != begReg)
+ {
+ /* mov dest, src */
+
+ regNumber destRegNum = genMapRegArgNumToRegNum(destReg, destMemType);
+ regNumber srcRegNum = genMapRegArgNumToRegNum(srcReg, destMemType);
+
+ getEmitter()->emitIns_R_R(insCopy, size, destRegNum, srcRegNum);
+
+ regTracker.rsTrackRegCopy(destRegNum, srcRegNum);
+
+ /* mark 'src' as processed */
+ noway_assert(srcReg < argMax);
+ regArgTab[srcReg].processed = true;
+#ifdef _TARGET_ARM_
+ if (size == EA_8BYTE)
+ regArgTab[srcReg + 1].processed = true;
+#endif
+ regArgMaskLive &= ~genMapArgNumToRegMask(srcReg, destMemType);
+
+ /* move to the next pair */
+ destReg = srcReg;
+ srcReg = regArgTab[srcReg].trashBy;
+
+ varDscDest = varDscSrc;
+ destMemType = varDscDest->TypeGet();
+#ifdef _TARGET_ARM_
+ if (!cycleAllDouble && destMemType == TYP_DOUBLE)
+ {
+ destMemType = TYP_FLOAT;
+ }
+#endif
+ varNumSrc = regArgTab[srcReg].varNum;
+ noway_assert(varNumSrc < compiler->lvaCount);
+ varDscSrc = compiler->lvaTable + varNumSrc;
+ noway_assert(varDscSrc->lvIsParam && varDscSrc->lvIsRegArg);
+
+ if (destMemType == TYP_REF)
+ {
+ size = EA_GCREF;
+ }
+ else if (destMemType == TYP_DOUBLE)
+ {
+ size = EA_8BYTE;
+ }
+ else
+ {
+ size = EA_4BYTE;
+ }
+ }
+
+ /* take care of the beginning register */
+
+ noway_assert(srcReg == begReg);
+
+ /* move the dest reg (begReg) in the extra reg */
+
+ regNumber destRegNum = genMapRegArgNumToRegNum(destReg, destMemType);
+
+ getEmitter()->emitIns_R_R(insCopy, size, destRegNum, xtraReg);
+
+ regTracker.rsTrackRegCopy(destRegNum, xtraReg);
+
+ psiMoveToReg(varNumSrc);
+
+ /* mark the beginning register as processed */
+
+ regArgTab[srcReg].processed = true;
+#ifdef _TARGET_ARM_
+ if (size == EA_8BYTE)
+ regArgTab[srcReg + 1].processed = true;
+#endif
+ regArgMaskLive &= ~genMapArgNumToRegMask(srcReg, destMemType);
+ }
+ }
+ }
+
+ /* Finally take care of the remaining arguments that must be enregistered */
+ while (regArgMaskLive)
+ {
+ regMaskTP regArgMaskLiveSave = regArgMaskLive;
+
+ for (argNum = 0; argNum < argMax; argNum++)
+ {
+ /* If already processed go to the next one */
+ if (regArgTab[argNum].processed)
+ {
+ continue;
+ }
+
+ if (regArgTab[argNum].slot == 0)
+ { // Not a register argument
+ continue;
+ }
+
+ varNum = regArgTab[argNum].varNum;
+ noway_assert(varNum < compiler->lvaCount);
+ varDsc = compiler->lvaTable + varNum;
+ var_types regType = regArgTab[argNum].getRegType(compiler);
+ regNumber regNum = genMapRegArgNumToRegNum(argNum, regType);
+
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ if (regType == TYP_UNDEF)
+ {
+ // This could happen if the reg in regArgTab[argNum] is of the other register file -
+ // for System V register passed structs where the first reg is GPR and the second an XMM reg.
+ // The next register file processing will process it.
+ regArgMaskLive &= ~genRegMask(regNum);
+ continue;
+ }
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+
+ noway_assert(varDsc->lvIsParam && varDsc->lvIsRegArg);
+#ifndef _TARGET_64BIT_
+#ifndef _TARGET_ARM_
+ // Right now we think that incoming arguments are not pointer sized. When we eventually
+ // understand the calling convention, this still won't be true. But maybe we'll have a better
+ // idea of how to ignore it.
+
+ // On Arm, a long can be passed in register
+ noway_assert(genTypeSize(genActualType(varDsc->TypeGet())) == sizeof(void*));
+#endif
+#endif //_TARGET_64BIT_
+
+ noway_assert(varDsc->lvIsInReg() && !regArgTab[argNum].circular);
+
+ /* Register argument - hopefully it stays in the same register */
+ regNumber destRegNum = REG_NA;
+ var_types destMemType = varDsc->TypeGet();
+
+ if (regArgTab[argNum].slot == 1)
+ {
+ destRegNum = varDsc->lvRegNum;
+
+#ifdef _TARGET_ARM_
+ if (genActualType(destMemType) == TYP_DOUBLE && regArgTab[argNum + 1].processed)
+ {
+ // The second half of the double has already been processed! Treat this as a single.
+ destMemType = TYP_FLOAT;
+ }
+#endif // _TARGET_ARM_
+ }
+#ifndef _TARGET_64BIT_
+ else if (regArgTab[argNum].slot == 2 && genActualType(destMemType) == TYP_LONG)
+ {
+#ifndef LEGACY_BACKEND
+ assert(genActualType(varDsc->TypeGet()) == TYP_LONG || genActualType(varDsc->TypeGet()) == TYP_DOUBLE);
+ if (genActualType(varDsc->TypeGet()) == TYP_DOUBLE)
+ {
+ destRegNum = regNum;
+ }
+ else
+#endif // !LEGACY_BACKEND
+ destRegNum = varDsc->lvOtherReg;
+
+ assert(destRegNum != REG_STK);
+ }
+ else
+ {
+ assert(regArgTab[argNum].slot == 2);
+ assert(destMemType == TYP_DOUBLE);
+
+ // For doubles, we move the entire double using the argNum representing
+ // the first half of the double. There are two things we won't do:
+ // (1) move the double when the 1st half of the destination is free but the
+ // 2nd half is occupied, and (2) move the double when the 2nd half of the
+ // destination is free but the 1st half is occupied. Here we consider the
+ // case where the first half can't be moved initially because its target is
+ // still busy, but the second half can be moved. We wait until the entire
+ // double can be moved, if possible. For example, we have F0/F1 double moving to F2/F3,
+ // and F2 single moving to F16. When we process F0, its target F2 is busy,
+ // so we skip it on the first pass. When we process F1, its target F3 is
+ // available. However, we want to move F0/F1 all at once, so we skip it here.
+ // We process F2, which frees up F2. The next pass through, we process F0 and
+ // F2/F3 are empty, so we move it. Note that if half of a double is involved
+ // in a circularity with a single, then we will have already moved that half
+ // above, so we go ahead and move the remaining half as a single.
+ // Because there are no circularities left, we are guaranteed to terminate.
+
+ assert(argNum > 0);
+ assert(regArgTab[argNum - 1].slot == 1);
+
+ if (!regArgTab[argNum - 1].processed)
+ {
+ // The first half of the double hasn't been processed; try to be processed at the same time
+ continue;
+ }
+
+ // The first half of the double has been processed but the second half hasn't!
+ // This could happen for double F2/F3 moving to F0/F1, and single F0 moving to F2.
+ // In that case, there is a F0/F2 loop that is not a double-only loop. The circular
+ // dependency logic above will move them as singles, leaving just F3 to move. Treat
+ // it as a single to finish the shuffling.
+
+ destMemType = TYP_FLOAT;
+ destRegNum = REG_NEXT(varDsc->lvRegNum);
+ }
+#endif // !_TARGET_64BIT_
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) && defined(FEATURE_SIMD)
+ else
+ {
+ assert(regArgTab[argNum].slot == 2);
+ assert(argNum > 0);
+ assert(regArgTab[argNum - 1].slot == 1);
+ assert((varDsc->lvType == TYP_SIMD12) || (varDsc->lvType == TYP_SIMD16));
+ destRegNum = varDsc->lvRegNum;
+ noway_assert(regNum != destRegNum);
+ continue;
+ }
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) && defined(FEATURE_SIMD)
+ noway_assert(destRegNum != REG_NA);
+ if (destRegNum != regNum)
+ {
+ /* Cannot trash a currently live register argument.
+ * Skip this one until its target will be free
+ * which is guaranteed to happen since we have no circular dependencies. */
+
+ regMaskTP destMask = genRegMask(destRegNum);
+#ifdef _TARGET_ARM_
+ // Don't process the double until both halves of the destination are clear.
+ if (genActualType(destMemType) == TYP_DOUBLE)
+ {
+ assert((destMask & RBM_DBL_REGS) != 0);
+ destMask |= genRegMask(REG_NEXT(destRegNum));
+ }
+#endif
+
+ if (destMask & regArgMaskLive)
+ {
+ continue;
+ }
+
+ /* Move it to the new register */
+
+ emitAttr size = emitActualTypeSize(destMemType);
+
+ getEmitter()->emitIns_R_R(ins_Copy(destMemType), size, destRegNum, regNum);
+
+ psiMoveToReg(varNum);
+ }
+
+ /* mark the argument as processed */
+
+ assert(!regArgTab[argNum].processed);
+ regArgTab[argNum].processed = true;
+ regArgMaskLive &= ~genRegMask(regNum);
+#if FEATURE_MULTIREG_ARGS
+ int argRegCount = 1;
+#ifdef _TARGET_ARM_
+ if (genActualType(destMemType) == TYP_DOUBLE)
+ {
+ argRegCount = 2;
+ }
+#endif
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) && defined(FEATURE_SIMD)
+ if (varTypeIsStruct(varDsc) && argNum < (argMax - 1) && regArgTab[argNum + 1].slot == 2)
+ {
+ argRegCount = 2;
+ int nextArgNum = argNum + 1;
+ regNumber nextRegNum = genMapRegArgNumToRegNum(nextArgNum, regArgTab[nextArgNum].getRegType(compiler));
+ noway_assert(regArgTab[nextArgNum].varNum == varNum);
+ // Emit a shufpd with a 0 immediate, which preserves the 0th element of the dest reg
+ // and moves the 0th element of the src reg into the 1st element of the dest reg.
+ getEmitter()->emitIns_R_R_I(INS_shufpd, emitActualTypeSize(varDsc->lvType), destRegNum, nextRegNum, 0);
+ // Set destRegNum to regNum so that we skip the setting of the register below,
+ // but mark argNum as processed and clear regNum from the live mask.
+ destRegNum = regNum;
+ }
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) && defined(FEATURE_SIMD)
+ // Mark the rest of the argument registers corresponding to this multi-reg type as
+ // being processed and no longer live.
+ for (int regSlot = 1; regSlot < argRegCount; regSlot++)
+ {
+ int nextArgNum = argNum + regSlot;
+ assert(!regArgTab[nextArgNum].processed);
+ regArgTab[nextArgNum].processed = true;
+ regNumber nextRegNum = genMapRegArgNumToRegNum(nextArgNum, regArgTab[nextArgNum].getRegType(compiler));
+ regArgMaskLive &= ~genRegMask(nextRegNum);
+ }
+#endif // FEATURE_MULTIREG_ARGS
+ }
+
+ noway_assert(regArgMaskLiveSave != regArgMaskLive); // if it doesn't change, we have an infinite loop
+ }
+}
+#ifdef _PREFAST_
+#pragma warning(pop)
+#endif
+
+/*****************************************************************************
+ * If any incoming stack arguments live in registers, load them.
+ */
+void CodeGen::genEnregisterIncomingStackArgs()
+{
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("*************** In genEnregisterIncomingStackArgs()\n");
+ }
+#endif
+
+ assert(compiler->compGeneratingProlog);
+
+ unsigned varNum = 0;
+
+ for (LclVarDsc *varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++)
+ {
+ /* Is this variable a parameter? */
+
+ if (!varDsc->lvIsParam)
+ {
+ continue;
+ }
+
+ /* If it's a register argument then it's already been taken care of.
+ But, on Arm when under a profiler, we would have prespilled a register argument
+ and hence here we need to load it from its prespilled location.
+ */
+ bool isPrespilledForProfiling = false;
+#if defined(_TARGET_ARM_) && defined(PROFILING_SUPPORTED)
+ isPrespilledForProfiling =
+ compiler->compIsProfilerHookNeeded() && compiler->lvaIsPreSpilled(varNum, regSet.rsMaskPreSpillRegs(false));
+#endif
+
+ if (varDsc->lvIsRegArg && !isPrespilledForProfiling)
+ {
+ continue;
+ }
+
+ /* Has the parameter been assigned to a register? */
+
+ if (!varDsc->lvIsInReg())
+ {
+ continue;
+ }
+
+ var_types type = genActualType(varDsc->TypeGet());
+
+#if FEATURE_STACK_FP_X87
+ // Floating point locals are loaded onto the x86-FPU in the next section
+ if (varTypeIsFloating(type))
+ continue;
+#endif
+
+ /* Is the variable dead on entry */
+
+ if (!VarSetOps::IsMember(compiler, compiler->fgFirstBB->bbLiveIn, varDsc->lvVarIndex))
+ {
+ continue;
+ }
+
+ /* Load the incoming parameter into the register */
+
+ /* Figure out the home offset of the incoming argument */
+
+ regNumber regNum;
+ regNumber otherReg;
+
+#ifndef LEGACY_BACKEND
+#ifdef _TARGET_ARM_
+ if (type == TYP_LONG)
+ {
+ regPairNo regPair = varDsc->lvArgInitRegPair;
+ regNum = genRegPairLo(regPair);
+ otherReg = genRegPairHi(regPair);
+ }
+ else
+#endif // _TARGET_ARM
+ {
+ regNum = varDsc->lvArgInitReg;
+ otherReg = REG_NA;
+ }
+#else // LEGACY_BACKEND
+ regNum = varDsc->lvRegNum;
+ if (type == TYP_LONG)
+ {
+ otherReg = varDsc->lvOtherReg;
+ }
+ else
+ {
+ otherReg = REG_NA;
+ }
+#endif // LEGACY_BACKEND
+
+ assert(regNum != REG_STK);
+
+#ifndef _TARGET_64BIT_
+ if (type == TYP_LONG)
+ {
+ /* long - at least the low half must be enregistered */
+
+ getEmitter()->emitIns_R_S(ins_Load(TYP_INT), EA_4BYTE, regNum, varNum, 0);
+ regTracker.rsTrackRegTrash(regNum);
+
+ /* Is the upper half also enregistered? */
+
+ if (otherReg != REG_STK)
+ {
+ getEmitter()->emitIns_R_S(ins_Load(TYP_INT), EA_4BYTE, otherReg, varNum, sizeof(int));
+ regTracker.rsTrackRegTrash(otherReg);
+ }
+ }
+ else
+#endif // _TARGET_64BIT_
+ {
+ /* Loading a single register - this is the easy/common case */
+
+ getEmitter()->emitIns_R_S(ins_Load(type), emitTypeSize(type), regNum, varNum, 0);
+ regTracker.rsTrackRegTrash(regNum);
+ }
+
+ psiMoveToReg(varNum);
+ }
+}
+
+/*-------------------------------------------------------------------------
+ *
+ * We have to decide whether we're going to use block initialization
+ * in the prolog before we assign final stack offsets. This is because
+ * when using block initialization we may need additional callee-saved
+ * registers which need to be saved on the frame, thus increasing the
+ * frame size.
+ *
+ * We'll count the number of locals we have to initialize,
+ * and if there are lots of them we'll use block initialization.
+ * Thus, the local variable table must have accurate register location
+ * information for enregistered locals for their register state on entry
+ * to the function.
+ *
+ * At the same time we set lvMustInit for locals (enregistered or on stack)
+ * that must be initialized (e.g. initialize memory (comInitMem),
+ * untracked pointers or disable DFA)
+ */
+void CodeGen::genCheckUseBlockInit()
+{
+#ifndef LEGACY_BACKEND // this is called before codegen in RyuJIT backend
+ assert(!compiler->compGeneratingProlog);
+#else // LEGACY_BACKEND
+ assert(compiler->compGeneratingProlog);
+#endif // LEGACY_BACKEND
+
+ unsigned initStkLclCnt = 0; // The number of int-sized stack local variables that need to be initialized (variables
+ // larger than int count for more than 1).
+ unsigned largeGcStructs = 0; // The number of "large" structs with GC pointers. Used as part of the heuristic to
+ // determine whether to use block init.
+
+ unsigned varNum;
+ LclVarDsc* varDsc;
+
+ for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++)
+ {
+ if (varDsc->lvIsParam)
+ {
+ continue;
+ }
+
+ if (!varDsc->lvIsInReg() && !varDsc->lvOnFrame)
+ {
+ noway_assert(varDsc->lvRefCnt == 0);
+ continue;
+ }
+
+ if (varNum == compiler->lvaInlinedPInvokeFrameVar || varNum == compiler->lvaStubArgumentVar)
+ {
+ continue;
+ }
+
+#if FEATURE_FIXED_OUT_ARGS
+ if (varNum == compiler->lvaPInvokeFrameRegSaveVar)
+ {
+ continue;
+ }
+ if (varNum == compiler->lvaOutgoingArgSpaceVar)
+ {
+ continue;
+ }
+#endif
+
+#if FEATURE_EH_FUNCLETS
+ // There's no need to force 0-initialization of the PSPSym, it will be
+ // initialized with a real value in the prolog
+ if (varNum == compiler->lvaPSPSym)
+ {
+ continue;
+ }
+#endif
+
+ if (compiler->lvaIsFieldOfDependentlyPromotedStruct(varDsc))
+ {
+ // For Compiler::PROMOTION_TYPE_DEPENDENT type of promotion, the whole struct should have been
+ // initialized by the parent struct. No need to set the lvMustInit bit in the
+ // field locals.
+ continue;
+ }
+
+ if (compiler->info.compInitMem || varTypeIsGC(varDsc->TypeGet()) || (varDsc->lvStructGcCount > 0) ||
+ varDsc->lvMustInit)
+ {
+ if (varDsc->lvTracked)
+ {
+ /* For uninitialized use of tracked variables, the liveness
+ * will bubble to the top (compiler->fgFirstBB) in fgInterBlockLocalVarLiveness()
+ */
+ if (varDsc->lvMustInit ||
+ VarSetOps::IsMember(compiler, compiler->fgFirstBB->bbLiveIn, varDsc->lvVarIndex))
+ {
+ /* This var must be initialized */
+
+ varDsc->lvMustInit = 1;
+
+ /* See if the variable is on the stack will be initialized
+ * using rep stos - compute the total size to be zero-ed */
+
+ if (varDsc->lvOnFrame)
+ {
+ if (!varDsc->lvRegister)
+ {
+#ifndef LEGACY_BACKEND
+ if (!varDsc->lvIsInReg())
+#endif // !LEGACY_BACKEND
+ {
+ // Var is completely on the stack, in the legacy JIT case, or
+ // on the stack at entry, in the RyuJIT case.
+ initStkLclCnt += (unsigned)roundUp(compiler->lvaLclSize(varNum)) / sizeof(int);
+ }
+ }
+ else
+ {
+ // Var is partially enregistered
+ noway_assert(genTypeSize(varDsc->TypeGet()) > sizeof(int) && varDsc->lvOtherReg == REG_STK);
+ initStkLclCnt += genTypeStSz(TYP_INT);
+ }
+ }
+ }
+ }
+
+ /* With compInitMem, all untracked vars will have to be init'ed */
+ /* VSW 102460 - Do not force initialization of compiler generated temps,
+ unless they are untracked GC type or structs that contain GC pointers */
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if FEATURE_SIMD
+ // TODO-1stClassStructs
+ // This is here to duplicate previous behavior, where TYP_SIMD8 locals
+ // were not being re-typed correctly.
+ if ((!varDsc->lvTracked || (varDsc->lvType == TYP_STRUCT) || (varDsc->lvType == TYP_SIMD8)) &&
+#else // !FEATURE_SIMD
+ if ((!varDsc->lvTracked || (varDsc->lvType == TYP_STRUCT)) &&
+#endif // !FEATURE_SIMD
+ varDsc->lvOnFrame &&
+ (!varDsc->lvIsTemp || varTypeIsGC(varDsc->TypeGet()) || (varDsc->lvStructGcCount > 0)))
+ {
+ varDsc->lvMustInit = true;
+
+ initStkLclCnt += (unsigned)roundUp(compiler->lvaLclSize(varNum)) / sizeof(int);
+ }
+
+ continue;
+ }
+
+ /* Ignore if not a pointer variable or value class with a GC field */
+
+ if (!compiler->lvaTypeIsGC(varNum))
+ {
+ continue;
+ }
+
+#if CAN_DISABLE_DFA
+ /* If we don't know lifetimes of variables, must be conservative */
+
+ if (compiler->opts.MinOpts())
+ {
+ varDsc->lvMustInit = true;
+ noway_assert(!varDsc->lvRegister);
+ }
+ else
+#endif // CAN_DISABLE_DFA
+ {
+ if (!varDsc->lvTracked)
+ {
+ varDsc->lvMustInit = true;
+ }
+ }
+
+ /* Is this a 'must-init' stack pointer local? */
+
+ if (varDsc->lvMustInit && varDsc->lvOnFrame)
+ {
+ initStkLclCnt += varDsc->lvStructGcCount;
+ }
+
+ if ((compiler->lvaLclSize(varNum) > (3 * sizeof(void*))) && (largeGcStructs <= 4))
+ {
+ largeGcStructs++;
+ }
+ }
+
+ /* Don't forget about spill temps that hold pointers */
+
+ if (!TRACK_GC_TEMP_LIFETIMES)
+ {
+ assert(compiler->tmpAllFree());
+ for (TempDsc* tempThis = compiler->tmpListBeg(); tempThis != nullptr; tempThis = compiler->tmpListNxt(tempThis))
+ {
+ if (varTypeIsGC(tempThis->tdTempType()))
+ {
+ initStkLclCnt++;
+ }
+ }
+ }
+
+ // After debugging this further it was found that this logic is incorrect:
+ // it incorrectly assumes the stack slots are always 4 bytes (not necessarily the case)
+ // and this also double counts variables (we saw this in the debugger) around line 4829.
+ // Even though this doesn't pose a problem with correctness it will improperly decide to
+ // zero init the stack using a block operation instead of a 'case by case' basis.
+ genInitStkLclCnt = initStkLclCnt;
+
+ /* If we have more than 4 untracked locals, use block initialization */
+ /* TODO-Review: If we have large structs, bias toward not using block initialization since
+ we waste all the other slots. Really need to compute the correct
+ and compare that against zeroing the slots individually */
+
+ genUseBlockInit = (genInitStkLclCnt > (largeGcStructs + 4));
+
+ if (genUseBlockInit)
+ {
+ regMaskTP maskCalleeRegArgMask = intRegState.rsCalleeRegArgMaskLiveIn;
+
+ // If there is a secret stub param, don't count it, as it will no longer
+ // be live when we do block init.
+ if (compiler->info.compPublishStubParam)
+ {
+ maskCalleeRegArgMask &= ~RBM_SECRET_STUB_PARAM;
+ }
+
+#ifdef _TARGET_XARCH_
+ // If we're going to use "REP STOS", remember that we will trash EDI
+ // For fastcall we will have to save ECX, EAX
+ // so reserve two extra callee saved
+ // This is better than pushing eax, ecx, because we in the later
+ // we will mess up already computed offsets on the stack (for ESP frames)
+ regSet.rsSetRegsModified(RBM_EDI);
+
+#ifdef UNIX_AMD64_ABI
+ // For register arguments we may have to save ECX (and RDI on Amd64 System V OSes.)
+ // In such case use R12 and R13 registers.
+ if (maskCalleeRegArgMask & RBM_RCX)
+ {
+ regSet.rsSetRegsModified(RBM_R12);
+ }
+
+ if (maskCalleeRegArgMask & RBM_RDI)
+ {
+ regSet.rsSetRegsModified(RBM_R13);
+ }
+#else // !UNIX_AMD64_ABI
+ if (maskCalleeRegArgMask & RBM_ECX)
+ {
+ regSet.rsSetRegsModified(RBM_ESI);
+ }
+#endif // !UNIX_AMD64_ABI
+
+ if (maskCalleeRegArgMask & RBM_EAX)
+ {
+ regSet.rsSetRegsModified(RBM_EBX);
+ }
+
+#endif // _TARGET_XARCH_
+#ifdef _TARGET_ARM_
+ //
+ // On the Arm if we are using a block init to initialize, then we
+ // must force spill R4/R5/R6 so that we can use them during
+ // zero-initialization process.
+ //
+ int forceSpillRegCount = genCountBits(maskCalleeRegArgMask & ~regSet.rsMaskPreSpillRegs(false)) - 1;
+ if (forceSpillRegCount > 0)
+ regSet.rsSetRegsModified(RBM_R4);
+ if (forceSpillRegCount > 1)
+ regSet.rsSetRegsModified(RBM_R5);
+ if (forceSpillRegCount > 2)
+ regSet.rsSetRegsModified(RBM_R6);
+#endif // _TARGET_ARM_
+ }
+}
+
+/*-----------------------------------------------------------------------------
+ *
+ * Push any callee-saved registers we have used
+ */
+
+#if defined(_TARGET_ARM64_)
+void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroed)
+#else
+void CodeGen::genPushCalleeSavedRegisters()
+#endif
+{
+ assert(compiler->compGeneratingProlog);
+
+#if defined(_TARGET_XARCH_) && !FEATURE_STACK_FP_X87
+ // x86/x64 doesn't support push of xmm/ymm regs, therefore consider only integer registers for pushing onto stack
+ // here. Space for float registers to be preserved is stack allocated and saved as part of prolog sequence and not
+ // here.
+ regMaskTP rsPushRegs = regSet.rsGetModifiedRegsMask() & RBM_INT_CALLEE_SAVED;
+#else // !defined(_TARGET_XARCH_) || FEATURE_STACK_FP_X87
+ regMaskTP rsPushRegs = regSet.rsGetModifiedRegsMask() & RBM_CALLEE_SAVED;
+#endif
+
+#if ETW_EBP_FRAMED
+ if (!isFramePointerUsed() && regSet.rsRegsModified(RBM_FPBASE))
+ {
+ noway_assert(!"Used register RBM_FPBASE as a scratch register!");
+ }
+#endif
+
+#ifdef _TARGET_XARCH_
+ // On X86/X64 we have already pushed the FP (frame-pointer) prior to calling this method
+ if (isFramePointerUsed())
+ {
+ rsPushRegs &= ~RBM_FPBASE;
+ }
+#endif
+
+#ifdef _TARGET_ARMARCH_
+ // On ARM we push the FP (frame-pointer) here along with all other callee saved registers
+ if (isFramePointerUsed())
+ rsPushRegs |= RBM_FPBASE;
+
+ //
+ // It may be possible to skip pushing/popping lr for leaf methods. However, such optimization would require
+ // changes in GC suspension architecture.
+ //
+ // We would need to guarantee that a tight loop calling a virtual leaf method can be suspended for GC. Today, we
+ // generate partially interruptible code for both the method that contains the tight loop with the call and the leaf
+ // method. GC suspension depends on return address hijacking in this case. Return address hijacking depends
+ // on the return address to be saved on the stack. If we skipped pushing/popping lr, the return address would never
+ // be saved on the stack and the GC suspension would time out.
+ //
+ // So if we wanted to skip pushing pushing/popping lr for leaf frames, we would also need to do one of
+ // the following to make GC suspension work in the above scenario:
+ // - Make return address hijacking work even when lr is not saved on the stack.
+ // - Generate fully interruptible code for loops that contains calls
+ // - Generate fully interruptible code for leaf methods
+ //
+ // Given the limited benefit from this optimization (<10k for mscorlib NGen image), the extra complexity
+ // is not worth it.
+ //
+ rsPushRegs |= RBM_LR; // We must save the return address (in the LR register)
+
+ regSet.rsMaskCalleeSaved = rsPushRegs;
+#endif // _TARGET_ARMARCH_
+
+#ifdef DEBUG
+ if (compiler->compCalleeRegsPushed != genCountBits(rsPushRegs))
+ {
+ printf("Error: unexpected number of callee-saved registers to push. Expected: %d. Got: %d ",
+ compiler->compCalleeRegsPushed, genCountBits(rsPushRegs));
+ dspRegMask(rsPushRegs);
+ printf("\n");
+ assert(compiler->compCalleeRegsPushed == genCountBits(rsPushRegs));
+ }
+#endif // DEBUG
+
+#if defined(_TARGET_ARM_)
+ regMaskTP maskPushRegsFloat = rsPushRegs & RBM_ALLFLOAT;
+ regMaskTP maskPushRegsInt = rsPushRegs & ~maskPushRegsFloat;
+
+ maskPushRegsInt |= genStackAllocRegisterMask(compiler->compLclFrameSize, maskPushRegsFloat);
+
+ assert(FitsIn<int>(maskPushRegsInt));
+ inst_IV(INS_push, (int)maskPushRegsInt);
+ compiler->unwindPushMaskInt(maskPushRegsInt);
+
+ if (maskPushRegsFloat != 0)
+ {
+ genPushFltRegs(maskPushRegsFloat);
+ compiler->unwindPushMaskFloat(maskPushRegsFloat);
+ }
+#elif defined(_TARGET_ARM64_)
+ // See the document "ARM64 JIT Frame Layout" and/or "ARM64 Exception Data" for more details or requirements and
+ // options. Case numbers in comments here refer to this document.
+ //
+ // For most frames, generate, e.g.:
+ // stp fp, lr, [sp,-0x80]! // predecrement SP with full frame size, and store FP/LR pair. Store pair
+ // // ensures stack stays aligned.
+ // stp r19, r20, [sp, 0x60] // store at positive offset from SP established above, into callee-saved area
+ // // at top of frame (highest addresses).
+ // stp r21, r22, [sp, 0x70]
+ //
+ // Notes:
+ // 1. We don't always need to save FP. If FP isn't saved, then LR is saved with the other callee-saved registers
+ // at the top of the frame.
+ // 2. If we save FP, then the first store is FP, LR.
+ // 3. General-purpose registers are 8 bytes, floating-point registers are 16 bytes, but FP/SIMD registers only
+ // preserve their lower 8 bytes, by calling convention.
+ // 4. For frames with varargs, we spill the integer register arguments to the stack, so all the arguments are
+ // consecutive.
+ // 5. We allocate the frame here; no further changes to SP are allowed (except in the body, for localloc).
+
+ int totalFrameSize = genTotalFrameSize();
+
+ int offset; // This will be the starting place for saving the callee-saved registers, in increasing order.
+
+ regMaskTP maskSaveRegsFloat = rsPushRegs & RBM_ALLFLOAT;
+ regMaskTP maskSaveRegsInt = rsPushRegs & ~maskSaveRegsFloat;
+
+ if (compiler->info.compIsVarArgs)
+ {
+ assert(maskSaveRegsFloat == RBM_NONE);
+ }
+
+ int frameType = 0; // This number is arbitrary, is defined below, and corresponds to one of the frame styles we
+ // generate based on various sizes.
+ int calleeSaveSPDelta = 0;
+ int calleeSaveSPDeltaUnaligned = 0;
+
+ if (isFramePointerUsed())
+ {
+ // We need to save both FP and LR.
+
+ assert((maskSaveRegsInt & RBM_FP) != 0);
+ assert((maskSaveRegsInt & RBM_LR) != 0);
+
+ if ((compiler->lvaOutgoingArgSpaceSize == 0) && (totalFrameSize < 512))
+ {
+ // Case #1.
+ //
+ // Generate:
+ // stp fp,lr,[sp,#-framesz]!
+ //
+ // The (totalFrameSize < 512) condition ensures that both the predecrement
+ // and the postincrement of SP can occur with STP.
+ //
+ // After saving callee-saved registers, we establish the frame pointer with:
+ // mov fp,sp
+ // We do this *after* saving callee-saved registers, so the prolog/epilog unwind codes mostly match.
+
+ frameType = 1;
+
+ getEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, -totalFrameSize,
+ INS_OPTS_PRE_INDEX);
+ compiler->unwindSaveRegPairPreindexed(REG_FP, REG_LR, -totalFrameSize);
+
+ maskSaveRegsInt &= ~(RBM_FP | RBM_LR); // We've already saved FP/LR
+ offset = (int)compiler->compLclFrameSize + 2 * REGSIZE_BYTES; // 2 for FP/LR
+ }
+ else if (totalFrameSize <= 512)
+ {
+ // Case #2.
+ //
+ // Generate:
+ // sub sp,sp,#framesz
+ // stp fp,lr,[sp,#outsz] // note that by necessity, #outsz <= #framesz - 16, so #outsz <= 496.
+ //
+ // The (totalFrameSize <= 512) condition ensures the callee-saved registers can all be saved using STP with
+ // signed offset encoding.
+ //
+ // After saving callee-saved registers, we establish the frame pointer with:
+ // add fp,sp,#outsz
+ // We do this *after* saving callee-saved registers, so the prolog/epilog unwind codes mostly match.
+
+ frameType = 2;
+
+ assert(compiler->lvaOutgoingArgSpaceSize + 2 * REGSIZE_BYTES <= (unsigned)totalFrameSize);
+
+ getEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, totalFrameSize);
+ compiler->unwindAllocStack(totalFrameSize);
+
+ getEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE,
+ compiler->lvaOutgoingArgSpaceSize);
+ compiler->unwindSaveRegPair(REG_FP, REG_LR, compiler->lvaOutgoingArgSpaceSize);
+
+ maskSaveRegsInt &= ~(RBM_FP | RBM_LR); // We've already saved FP/LR
+ offset = (int)compiler->compLclFrameSize + 2 * REGSIZE_BYTES; // 2 for FP/LR
+ }
+ else
+ {
+ // Case 5 or 6.
+ //
+ // First, the callee-saved registers will be saved, and the callee-saved register code must use pre-index
+ // to subtract from SP as the first instruction. It must also leave space for varargs registers to be
+ // stored. For example:
+ // stp r19,r20,[sp,#-96]!
+ // stp d8,d9,[sp,#16]
+ // ... save varargs incoming integer registers ...
+ // Note that all SP alterations must be 16-byte aligned. We have already calculated any alignment to be
+ // lower on the stack than the callee-saved registers (see lvaAlignFrame() for how we calculate alignment).
+ // So, if there is an odd number of callee-saved registers, we use (for example, with just one saved
+ // register):
+ // sub sp,sp,#16
+ // str r19,[sp,#8]
+ // This is one additional instruction, but it centralizes the aligned space. Otherwise, it might be
+ // possible to have two 8-byte alignment padding words, one below the callee-saved registers, and one
+ // above them. If that is preferable, we could implement it.
+ // Note that any varargs saved space will always be 16-byte aligned, since there are 8 argument registers.
+ //
+ // Then, define #remainingFrameSz = #framesz - (callee-saved size + varargs space + possible alignment
+ // padding from above).
+ // Note that #remainingFrameSz must not be zero, since we still need to save FP,SP.
+ //
+ // Generate:
+ // sub sp,sp,#remainingFrameSz
+ // or, for large frames:
+ // mov rX, #remainingFrameSz // maybe multiple instructions
+ // sub sp,sp,rX
+ //
+ // followed by:
+ // stp fp,lr,[sp,#outsz]
+ // add fp,sp,#outsz
+ //
+ // However, we need to handle the case where #outsz is larger than the constant signed offset encoding can
+ // handle. And, once again, we might need to deal with #outsz that is not aligned to 16-bytes (i.e.,
+ // STACK_ALIGN). So, in the case of large #outsz we will have an additional SP adjustment, using one of the
+ // following sequences:
+ //
+ // Define #remainingFrameSz2 = #remainingFrameSz - #outsz.
+ //
+ // sub sp,sp,#remainingFrameSz2 // if #remainingFrameSz2 is 16-byte aligned
+ // stp fp,lr,[sp]
+ // mov fp,sp
+ // sub sp,sp,#outsz // in this case, #outsz must also be 16-byte aligned
+ //
+ // Or:
+ //
+ // sub sp,sp,roundUp(#remainingFrameSz2,16) // if #remainingFrameSz2 is not 16-byte aligned (it is
+ // // always guaranteed to be 8 byte aligned).
+ // stp fp,lr,[sp,#8] // it will always be #8 in the unaligned case
+ // add fp,sp,#8
+ // sub sp,sp,#outsz - #8
+ //
+ // (As usual, for a large constant "#outsz - #8", we might need multiple instructions:
+ // mov rX, #outsz - #8 // maybe multiple instructions
+ // sub sp,sp,rX
+ // )
+
+ frameType = 3;
+
+ calleeSaveSPDeltaUnaligned =
+ totalFrameSize - compiler->compLclFrameSize - 2 * REGSIZE_BYTES; // 2 for FP, LR which we'll save later.
+ assert(calleeSaveSPDeltaUnaligned >= 0);
+ assert((calleeSaveSPDeltaUnaligned % 8) == 0); // It better at least be 8 byte aligned.
+ calleeSaveSPDelta = AlignUp((UINT)calleeSaveSPDeltaUnaligned, STACK_ALIGN);
+
+ offset = calleeSaveSPDelta - calleeSaveSPDeltaUnaligned;
+ assert((offset == 0) || (offset == REGSIZE_BYTES)); // At most one alignment slot between SP and where we
+ // store the callee-saved registers.
+
+ // We'll take care of these later, but callee-saved regs code shouldn't see them.
+ maskSaveRegsInt &= ~(RBM_FP | RBM_LR);
+ }
+ }
+ else
+ {
+ // No frame pointer (no chaining).
+ assert((maskSaveRegsInt & RBM_FP) == 0);
+ assert((maskSaveRegsInt & RBM_LR) != 0);
+
+ // Note that there is no pre-indexed save_lrpair unwind code variant, so we can't allocate the frame using 'stp'
+ // if we only have one callee-saved register plus LR to save.
+
+ NYI("Frame without frame pointer");
+ offset = 0;
+ }
+
+ assert(frameType != 0);
+
+ genSaveCalleeSavedRegistersHelp(maskSaveRegsInt | maskSaveRegsFloat, offset, -calleeSaveSPDelta);
+
+ offset += genCountBits(maskSaveRegsInt | maskSaveRegsFloat) * REGSIZE_BYTES;
+
+ // For varargs, home the incoming arg registers last. Note that there is nothing to unwind here,
+ // so we just report "NOP" unwind codes. If there's no more frame setup after this, we don't
+ // need to add codes at all.
+
+ if (compiler->info.compIsVarArgs)
+ {
+ // There are 8 general-purpose registers to home, thus 'offset' must be 16-byte aligned here.
+ assert((offset % 16) == 0);
+ for (regNumber reg1 = REG_ARG_FIRST; reg1 < REG_ARG_LAST; reg1 = REG_NEXT(REG_NEXT(reg1)))
+ {
+ regNumber reg2 = REG_NEXT(reg1);
+ // stp REG, REG + 1, [SP, #offset]
+ getEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, reg1, reg2, REG_SPBASE, offset);
+ compiler->unwindNop();
+ offset += 2 * REGSIZE_BYTES;
+ }
+ }
+
+ if (frameType == 1)
+ {
+ getEmitter()->emitIns_R_R(INS_mov, EA_PTRSIZE, REG_FPBASE, REG_SPBASE);
+ compiler->unwindSetFrameReg(REG_FPBASE, 0);
+ }
+ else if (frameType == 2)
+ {
+ getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, REG_FPBASE, REG_SPBASE, compiler->lvaOutgoingArgSpaceSize);
+ compiler->unwindSetFrameReg(REG_FPBASE, compiler->lvaOutgoingArgSpaceSize);
+ }
+ else if (frameType == 3)
+ {
+ int remainingFrameSz = totalFrameSize - calleeSaveSPDelta;
+ assert(remainingFrameSz > 0);
+ assert((remainingFrameSz % 16) == 0); // this is guaranteed to be 16-byte aligned because each component --
+ // totalFrameSize and calleeSaveSPDelta -- is 16-byte aligned.
+
+ if (compiler->lvaOutgoingArgSpaceSize >= 504)
+ {
+ // We can't do "stp fp,lr,[sp,#outsz]" because #outsz is too big.
+ // If compiler->lvaOutgoingArgSpaceSize is not aligned, we need to align the SP adjustment.
+ assert(remainingFrameSz > (int)compiler->lvaOutgoingArgSpaceSize);
+ int spAdjustment2Unaligned = remainingFrameSz - compiler->lvaOutgoingArgSpaceSize;
+ int spAdjustment2 = (int)roundUp((size_t)spAdjustment2Unaligned, STACK_ALIGN);
+ int alignmentAdjustment2 = spAdjustment2 - spAdjustment2Unaligned;
+ assert((alignmentAdjustment2 == 0) || (alignmentAdjustment2 == 8));
+
+ genPrologSaveRegPair(REG_FP, REG_LR, alignmentAdjustment2, -spAdjustment2, false, initReg, pInitRegZeroed);
+ offset += spAdjustment2;
+
+ // Now subtract off the #outsz (or the rest of the #outsz if it was unaligned, and the above "sub" included
+ // some of it)
+
+ int spAdjustment3 = compiler->lvaOutgoingArgSpaceSize - alignmentAdjustment2;
+ assert(spAdjustment3 > 0);
+ assert((spAdjustment3 % 16) == 0);
+
+ getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, REG_FPBASE, REG_SPBASE, alignmentAdjustment2);
+ compiler->unwindSetFrameReg(REG_FPBASE, alignmentAdjustment2);
+
+ genStackPointerAdjustment(-spAdjustment3, initReg, pInitRegZeroed);
+ offset += spAdjustment3;
+ }
+ else
+ {
+ genPrologSaveRegPair(REG_FP, REG_LR, compiler->lvaOutgoingArgSpaceSize, -remainingFrameSz, false, initReg,
+ pInitRegZeroed);
+ offset += remainingFrameSz;
+
+ getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, REG_FPBASE, REG_SPBASE, compiler->lvaOutgoingArgSpaceSize);
+ compiler->unwindSetFrameReg(REG_FPBASE, compiler->lvaOutgoingArgSpaceSize);
+ }
+ }
+
+ assert(offset == totalFrameSize);
+
+#elif defined(_TARGET_XARCH_)
+ // Push backwards so we match the order we will pop them in the epilog
+ // and all the other code that expects it to be in this order.
+ for (regNumber reg = REG_INT_LAST; rsPushRegs != RBM_NONE; reg = REG_PREV(reg))
+ {
+ regMaskTP regBit = genRegMask(reg);
+
+ if ((regBit & rsPushRegs) != 0)
+ {
+ inst_RV(INS_push, reg, TYP_REF);
+ compiler->unwindPush(reg);
+
+ if (!doubleAlignOrFramePointerUsed())
+ {
+ psiAdjustStackLevel(REGSIZE_BYTES);
+ }
+
+ rsPushRegs &= ~regBit;
+ }
+ }
+
+#else
+ assert(!"Unknown TARGET");
+#endif // _TARGET_*
+}
+
+/*-----------------------------------------------------------------------------
+ *
+ * Probe the stack and allocate the local stack frame: subtract from SP.
+ * On ARM64, this only does the probing; allocating the frame is done when callee-saved registers are saved.
+ */
+
+void CodeGen::genAllocLclFrame(unsigned frameSize, regNumber initReg, bool* pInitRegZeroed, regMaskTP maskArgRegsLiveIn)
+{
+ assert(compiler->compGeneratingProlog);
+
+ if (frameSize == 0)
+ {
+ return;
+ }
+
+ const size_t pageSize = compiler->eeGetPageSize();
+
+#ifdef _TARGET_ARM_
+ assert(!compiler->info.compPublishStubParam || (REG_SECRET_STUB_PARAM != initReg));
+#endif // _TARGET_ARM_
+
+#ifdef _TARGET_XARCH_
+ if (frameSize == REGSIZE_BYTES)
+ {
+ // Frame size is the same as register size.
+ inst_RV(INS_push, REG_EAX, TYP_I_IMPL);
+ }
+ else
+#endif // _TARGET_XARCH_
+ if (frameSize < pageSize)
+ {
+#ifndef _TARGET_ARM64_
+ // Frame size is (0x0008..0x1000)
+ inst_RV_IV(INS_sub, REG_SPBASE, frameSize, EA_PTRSIZE);
+#endif // !_TARGET_ARM64_
+ }
+ else if (frameSize < compiler->getVeryLargeFrameSize())
+ {
+ // Frame size is (0x1000..0x3000)
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if CPU_LOAD_STORE_ARCH
+ instGen_Set_Reg_To_Imm(EA_PTRSIZE, initReg, -(ssize_t)pageSize);
+ getEmitter()->emitIns_R_R_R(INS_ldr, EA_4BYTE, initReg, REG_SPBASE, initReg);
+ regTracker.rsTrackRegTrash(initReg);
+ *pInitRegZeroed = false; // The initReg does not contain zero
+#else
+ getEmitter()->emitIns_AR_R(INS_TEST, EA_PTRSIZE, REG_EAX, REG_SPBASE, -(int)pageSize);
+#endif
+
+ if (frameSize >= 0x2000)
+ {
+#if CPU_LOAD_STORE_ARCH
+ instGen_Set_Reg_To_Imm(EA_PTRSIZE, initReg, -2 * (ssize_t)pageSize);
+ getEmitter()->emitIns_R_R_R(INS_ldr, EA_4BYTE, initReg, REG_SPBASE, initReg);
+ regTracker.rsTrackRegTrash(initReg);
+#else
+ getEmitter()->emitIns_AR_R(INS_TEST, EA_PTRSIZE, REG_EAX, REG_SPBASE, -2 * (int)pageSize);
+#endif
+ }
+
+#ifdef _TARGET_ARM64_
+ compiler->unwindPadding();
+#else // !_TARGET_ARM64_
+#if CPU_LOAD_STORE_ARCH
+ instGen_Set_Reg_To_Imm(EA_PTRSIZE, initReg, frameSize);
+ compiler->unwindPadding();
+ getEmitter()->emitIns_R_R_R(INS_sub, EA_4BYTE, REG_SPBASE, REG_SPBASE, initReg);
+#else
+ inst_RV_IV(INS_sub, REG_SPBASE, frameSize, EA_PTRSIZE);
+#endif
+#endif // !_TARGET_ARM64_
+ }
+ else
+ {
+ // Frame size >= 0x3000
+ assert(frameSize >= compiler->getVeryLargeFrameSize());
+
+ // Emit the following sequence to 'tickle' the pages.
+ // Note it is important that stack pointer not change until this is
+ // complete since the tickles could cause a stack overflow, and we
+ // need to be able to crawl the stack afterward (which means the
+ // stack pointer needs to be known).
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef _TARGET_XARCH_
+ bool pushedStubParam = false;
+ if (compiler->info.compPublishStubParam && (REG_SECRET_STUB_PARAM == initReg))
+ {
+ // push register containing the StubParam
+ inst_RV(INS_push, REG_SECRET_STUB_PARAM, TYP_I_IMPL);
+ pushedStubParam = true;
+ }
+#endif // !_TARGET_XARCH_
+
+ instGen_Set_Reg_To_Zero(EA_PTRSIZE, initReg);
+
+ //
+ // Can't have a label inside the ReJIT padding area
+ //
+ genPrologPadForReJit();
+
+#if CPU_LOAD_STORE_ARCH
+
+ // TODO-ARM64-Bug?: set the availMask properly!
+ regMaskTP availMask =
+ (regSet.rsGetModifiedRegsMask() & RBM_ALLINT) | RBM_R12 | RBM_LR; // Set of available registers
+ availMask &= ~maskArgRegsLiveIn; // Remove all of the incoming argument registers as they are currently live
+ availMask &= ~genRegMask(initReg); // Remove the pre-calculated initReg
+
+ regNumber rOffset = initReg;
+ regNumber rLimit;
+ regNumber rTemp;
+ regMaskTP tempMask;
+
+ // We pick the next lowest register number for rTemp
+ noway_assert(availMask != RBM_NONE);
+ tempMask = genFindLowestBit(availMask);
+ rTemp = genRegNumFromMask(tempMask);
+ availMask &= ~tempMask;
+
+ // We pick the next lowest register number for rLimit
+ noway_assert(availMask != RBM_NONE);
+ tempMask = genFindLowestBit(availMask);
+ rLimit = genRegNumFromMask(tempMask);
+ availMask &= ~tempMask;
+
+ // TODO-LdStArch-Bug?: review this. The first time we load from [sp+0] which will always succeed. That doesn't
+ // make sense.
+ // TODO-ARM64-CQ: we could probably use ZR on ARM64 instead of rTemp.
+ //
+ // mov rLimit, -frameSize
+ // loop:
+ // ldr rTemp, [sp+rOffset]
+ // sub rOffset, 0x1000 // Note that 0x1000 on ARM32 uses the funky Thumb immediate encoding
+ // cmp rOffset, rLimit
+ // jge loop
+ noway_assert((ssize_t)(int)frameSize == (ssize_t)frameSize); // make sure framesize safely fits within an int
+ instGen_Set_Reg_To_Imm(EA_PTRSIZE, rLimit, -(int)frameSize);
+ getEmitter()->emitIns_R_R_R(INS_ldr, EA_4BYTE, rTemp, REG_SPBASE, rOffset);
+ regTracker.rsTrackRegTrash(rTemp);
+#if defined(_TARGET_ARM_)
+ getEmitter()->emitIns_R_I(INS_sub, EA_PTRSIZE, rOffset, pageSize);
+#elif defined(_TARGET_ARM64_)
+ getEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, rOffset, rOffset, pageSize);
+#endif // _TARGET_ARM64_
+ getEmitter()->emitIns_R_R(INS_cmp, EA_PTRSIZE, rOffset, rLimit);
+ getEmitter()->emitIns_J(INS_bhi, NULL, -4);
+
+#else // !CPU_LOAD_STORE_ARCH
+
+ // Code size for each instruction. We need this because the
+ // backward branch is hard-coded with the number of bytes to branch.
+ // The encoding differs based on the architecture and what register is
+ // used (namely, using RAX has a smaller encoding).
+ //
+ // loop:
+ // For x86
+ // test [esp + eax], eax 3
+ // sub eax, 0x1000 5
+ // cmp EAX, -frameSize 5
+ // jge loop 2
+ //
+ // For AMD64 using RAX
+ // test [rsp + rax], rax 4
+ // sub rax, 0x1000 6
+ // cmp rax, -frameSize 6
+ // jge loop 2
+ //
+ // For AMD64 using RBP
+ // test [rsp + rbp], rbp 4
+ // sub rbp, 0x1000 7
+ // cmp rbp, -frameSize 7
+ // jge loop 2
+
+ getEmitter()->emitIns_R_ARR(INS_TEST, EA_PTRSIZE, initReg, REG_SPBASE, initReg, 0);
+ inst_RV_IV(INS_sub, initReg, pageSize, EA_PTRSIZE);
+ inst_RV_IV(INS_cmp, initReg, -((ssize_t)frameSize), EA_PTRSIZE);
+
+ int bytesForBackwardJump;
+#ifdef _TARGET_AMD64_
+ assert((initReg == REG_EAX) || (initReg == REG_EBP)); // We use RBP as initReg for EH funclets.
+ bytesForBackwardJump = ((initReg == REG_EAX) ? -18 : -20);
+#else // !_TARGET_AMD64_
+ assert(initReg == REG_EAX);
+ bytesForBackwardJump = -15;
+#endif // !_TARGET_AMD64_
+
+ inst_IV(INS_jge, bytesForBackwardJump); // Branch backwards to start of loop
+
+#endif // !CPU_LOAD_STORE_ARCH
+
+ *pInitRegZeroed = false; // The initReg does not contain zero
+
+#ifdef _TARGET_XARCH_
+ if (pushedStubParam)
+ {
+ // pop eax
+ inst_RV(INS_pop, REG_SECRET_STUB_PARAM, TYP_I_IMPL);
+ regTracker.rsTrackRegTrash(REG_SECRET_STUB_PARAM);
+ }
+#endif // _TARGET_XARCH_
+
+#if CPU_LOAD_STORE_ARCH
+ compiler->unwindPadding();
+#endif
+
+#if CPU_LOAD_STORE_ARCH
+#ifndef _TARGET_ARM64_
+ inst_RV_RV(INS_add, REG_SPBASE, rLimit, TYP_I_IMPL);
+#endif // !_TARGET_ARM64_
+#else
+ // sub esp, frameSize 6
+ inst_RV_IV(INS_sub, REG_SPBASE, frameSize, EA_PTRSIZE);
+#endif
+ }
+
+#ifndef _TARGET_ARM64_
+ compiler->unwindAllocStack(frameSize);
+
+ if (!doubleAlignOrFramePointerUsed())
+ {
+ psiAdjustStackLevel(frameSize);
+ }
+#endif // !_TARGET_ARM64_
+}
+
+#if defined(_TARGET_ARM_)
+
+void CodeGen::genPushFltRegs(regMaskTP regMask)
+{
+ assert(regMask != 0); // Don't call uness we have some registers to push
+ assert((regMask & RBM_ALLFLOAT) == regMask); // Only floasting point registers should be in regMask
+
+ regNumber lowReg = genRegNumFromMask(genFindLowestBit(regMask));
+ int slots = genCountBits(regMask);
+ // regMask should be contiguously set
+ regMaskTP tmpMask = ((regMask >> lowReg) + 1); // tmpMask should have a single bit set
+ assert((tmpMask & (tmpMask - 1)) == 0);
+ assert(lowReg == REG_F16); // Currently we expect to start at F16 in the unwind codes
+
+ // Our calling convention requires that we only use vpush for TYP_DOUBLE registers
+ noway_assert(floatRegCanHoldType(lowReg, TYP_DOUBLE));
+ noway_assert((slots % 2) == 0);
+
+ getEmitter()->emitIns_R_I(INS_vpush, EA_8BYTE, lowReg, slots / 2);
+}
+
+void CodeGen::genPopFltRegs(regMaskTP regMask)
+{
+ assert(regMask != 0); // Don't call uness we have some registers to pop
+ assert((regMask & RBM_ALLFLOAT) == regMask); // Only floasting point registers should be in regMask
+
+ regNumber lowReg = genRegNumFromMask(genFindLowestBit(regMask));
+ int slots = genCountBits(regMask);
+ // regMask should be contiguously set
+ regMaskTP tmpMask = ((regMask >> lowReg) + 1); // tmpMask should have a single bit set
+ assert((tmpMask & (tmpMask - 1)) == 0);
+
+ // Our calling convention requires that we only use vpop for TYP_DOUBLE registers
+ noway_assert(floatRegCanHoldType(lowReg, TYP_DOUBLE));
+ noway_assert((slots % 2) == 0);
+
+ getEmitter()->emitIns_R_I(INS_vpop, EA_8BYTE, lowReg, slots / 2);
+}
+
+/*-----------------------------------------------------------------------------
+ *
+ * If we have a jmp call, then the argument registers cannot be used in the
+ * epilog. So return the current call's argument registers as the argument
+ * registers for the jmp call.
+ */
+regMaskTP CodeGen::genJmpCallArgMask()
+{
+ assert(compiler->compGeneratingEpilog);
+
+ regMaskTP argMask = RBM_NONE;
+ for (unsigned varNum = 0; varNum < compiler->info.compArgsCount; ++varNum)
+ {
+ const LclVarDsc& desc = compiler->lvaTable[varNum];
+ if (desc.lvIsRegArg)
+ {
+ argMask |= genRegMask(desc.lvArgReg);
+ }
+ }
+ return argMask;
+}
+
+/*-----------------------------------------------------------------------------
+ *
+ * Free the local stack frame: add to SP.
+ * If epilog unwind hasn't been started, and we generate code, we start unwind
+ * and set *pUnwindStarted = true.
+ */
+
+void CodeGen::genFreeLclFrame(unsigned frameSize, /* IN OUT */ bool* pUnwindStarted, bool jmpEpilog)
+{
+ assert(compiler->compGeneratingEpilog);
+
+ if (frameSize == 0)
+ return;
+
+ // Add 'frameSize' to SP.
+ //
+ // Unfortunately, we can't just use:
+ //
+ // inst_RV_IV(INS_add, REG_SPBASE, frameSize, EA_PTRSIZE);
+ //
+ // because we need to generate proper unwind codes for each instruction generated,
+ // and large frame sizes might generate a temp register load which might
+ // need an unwind code. We don't want to generate a "NOP" code for this
+ // temp register load; we want the unwind codes to start after that.
+
+ if (arm_Valid_Imm_For_Instr(INS_add, frameSize, INS_FLAGS_DONT_CARE))
+ {
+ if (!*pUnwindStarted)
+ {
+ compiler->unwindBegEpilog();
+ *pUnwindStarted = true;
+ }
+
+ getEmitter()->emitIns_R_I(INS_add, EA_PTRSIZE, REG_SPBASE, frameSize, INS_FLAGS_DONT_CARE);
+ }
+ else
+ {
+ regMaskTP grabMask = RBM_INT_CALLEE_TRASH;
+ if (jmpEpilog)
+ {
+ // Do not use argument registers as scratch registers in the jmp epilog.
+ grabMask &= ~genJmpCallArgMask();
+ }
+#ifndef LEGACY_BACKEND
+ regNumber tmpReg;
+ tmpReg = REG_TMP_0;
+#else // LEGACY_BACKEND
+ regNumber tmpReg = regSet.rsGrabReg(grabMask);
+#endif // LEGACY_BACKEND
+ instGen_Set_Reg_To_Imm(EA_PTRSIZE, tmpReg, frameSize);
+ if (*pUnwindStarted)
+ {
+ compiler->unwindPadding();
+ }
+
+ // We're going to generate an unwindable instruction, so check again if
+ // we need to start the unwind codes.
+
+ if (!*pUnwindStarted)
+ {
+ compiler->unwindBegEpilog();
+ *pUnwindStarted = true;
+ }
+
+ getEmitter()->emitIns_R_R(INS_add, EA_PTRSIZE, REG_SPBASE, tmpReg, INS_FLAGS_DONT_CARE);
+ }
+
+ compiler->unwindAllocStack(frameSize);
+}
+
+/*-----------------------------------------------------------------------------
+ *
+ * Returns register mask to push/pop to allocate a small stack frame,
+ * instead of using "sub sp" / "add sp". Returns RBM_NONE if either frame size
+ * is zero, or if we should use "sub sp" / "add sp" instead of push/pop.
+ */
+regMaskTP CodeGen::genStackAllocRegisterMask(unsigned frameSize, regMaskTP maskCalleeSavedFloat)
+{
+ assert(compiler->compGeneratingProlog || compiler->compGeneratingEpilog);
+
+ // We can't do this optimization with callee saved floating point registers because
+ // the stack would be allocated in a wrong spot.
+ if (maskCalleeSavedFloat != RBM_NONE)
+ return RBM_NONE;
+
+ // Allocate space for small frames by pushing extra registers. It generates smaller and faster code
+ // that extra sub sp,XXX/add sp,XXX.
+ // R0 and R1 may be used by return value. Keep things simple and just skip the optimization
+ // for the 3*REGSIZE_BYTES and 4*REGSIZE_BYTES cases. They are less common and they have more
+ // significant negative side-effects (more memory bus traffic).
+ switch (frameSize)
+ {
+ case REGSIZE_BYTES:
+ return RBM_R3;
+ case 2 * REGSIZE_BYTES:
+ return RBM_R2 | RBM_R3;
+ default:
+ return RBM_NONE;
+ }
+}
+
+#endif // _TARGET_ARM_
+
+#if !FEATURE_STACK_FP_X87
+
+/*****************************************************************************
+ *
+ * initFltRegs -- The mask of float regs to be zeroed.
+ * initDblRegs -- The mask of double regs to be zeroed.
+ * initReg -- A zero initialized integer reg to copy from.
+ *
+ * Does best effort to move between VFP/xmm regs if one is already
+ * initialized to 0. (Arm Only) Else copies from the integer register which
+ * is slower.
+ */
+void CodeGen::genZeroInitFltRegs(const regMaskTP& initFltRegs, const regMaskTP& initDblRegs, const regNumber& initReg)
+{
+ assert(compiler->compGeneratingProlog);
+
+ // The first float/double reg that is initialized to 0. So they can be used to
+ // initialize the remaining registers.
+ regNumber fltInitReg = REG_NA;
+ regNumber dblInitReg = REG_NA;
+
+ // Iterate through float/double registers and initialize them to 0 or
+ // copy from already initialized register of the same type.
+ regMaskTP regMask = genRegMask(REG_FP_FIRST);
+ for (regNumber reg = REG_FP_FIRST; reg <= REG_FP_LAST; reg = REG_NEXT(reg), regMask <<= 1)
+ {
+ if (regMask & initFltRegs)
+ {
+ // Do we have a float register already set to 0?
+ if (fltInitReg != REG_NA)
+ {
+ // Copy from float.
+ inst_RV_RV(ins_Copy(TYP_FLOAT), reg, fltInitReg, TYP_FLOAT);
+ }
+ else
+ {
+#ifdef _TARGET_ARM_
+ // Do we have a double register initialized to 0?
+ if (dblInitReg != REG_NA)
+ {
+ // Copy from double.
+ inst_RV_RV(INS_vcvt_d2f, reg, dblInitReg, TYP_FLOAT);
+ }
+ else
+ {
+ // Copy from int.
+ inst_RV_RV(INS_vmov_i2f, reg, initReg, TYP_FLOAT, EA_4BYTE);
+ }
+#elif defined(_TARGET_XARCH_)
+ // Xorpd xmmreg, xmmreg is the fastest way to initialize a float register to
+ // zero instead of moving constant 0.0f. Though we just need to initialize just the 32-bits
+ // we will use xorpd to initialize 64-bits of the xmm register so that it can be
+ // used to zero initialize xmm registers that hold double values.
+ inst_RV_RV(INS_xorpd, reg, reg, TYP_DOUBLE);
+ dblInitReg = reg;
+#elif defined(_TARGET_ARM64_)
+ NYI("Initialize floating-point register to zero");
+#else // _TARGET_*
+#error Unsupported or unset target architecture
+#endif
+ fltInitReg = reg;
+ }
+ }
+ else if (regMask & initDblRegs)
+ {
+ // Do we have a double register already set to 0?
+ if (dblInitReg != REG_NA)
+ {
+ // Copy from double.
+ inst_RV_RV(ins_Copy(TYP_DOUBLE), reg, dblInitReg, TYP_DOUBLE);
+ }
+ else
+ {
+#ifdef _TARGET_ARM_
+ // Do we have a float register initialized to 0?
+ if (fltInitReg != REG_NA)
+ {
+ // Copy from float.
+ inst_RV_RV(INS_vcvt_f2d, reg, fltInitReg, TYP_DOUBLE);
+ }
+ else
+ {
+ // Copy from int.
+ inst_RV_RV_RV(INS_vmov_i2d, reg, initReg, initReg, EA_8BYTE);
+ }
+#elif defined(_TARGET_XARCH_)
+ // Xorpd xmmreg, xmmreg is the fastest way to initialize a double register to
+ // zero than moving constant 0.0d. We can also use lower 32-bits of 'reg'
+ // for zero initializing xmm registers subsequently that contain float values.
+ inst_RV_RV(INS_xorpd, reg, reg, TYP_DOUBLE);
+ fltInitReg = reg;
+#elif defined(_TARGET_ARM64_)
+ // We will just zero out the entire vector register. This sets it to a double zero value
+ getEmitter()->emitIns_R_I(INS_movi, EA_16BYTE, reg, 0x00, INS_OPTS_16B);
+#else // _TARGET_*
+#error Unsupported or unset target architecture
+#endif
+ dblInitReg = reg;
+ }
+ }
+ }
+}
+#endif // !FEATURE_STACK_FP_X87
+
+/*-----------------------------------------------------------------------------
+ *
+ * Restore any callee-saved registers we have used
+ */
+
+#if defined(_TARGET_ARM_)
+
+bool CodeGen::genCanUsePopToReturn(regMaskTP maskPopRegsInt, bool jmpEpilog)
+{
+ assert(compiler->compGeneratingEpilog);
+
+ if (!jmpEpilog && regSet.rsMaskPreSpillRegs(true) == RBM_NONE)
+ return true;
+ else
+ return false;
+}
+
+void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog)
+{
+ assert(compiler->compGeneratingEpilog);
+
+ regMaskTP maskPopRegs = regSet.rsGetModifiedRegsMask() & RBM_CALLEE_SAVED;
+ regMaskTP maskPopRegsFloat = maskPopRegs & RBM_ALLFLOAT;
+ regMaskTP maskPopRegsInt = maskPopRegs & ~maskPopRegsFloat;
+
+ // First, pop float registers
+
+ if (maskPopRegsFloat != RBM_NONE)
+ {
+ genPopFltRegs(maskPopRegsFloat);
+ compiler->unwindPopMaskFloat(maskPopRegsFloat);
+ }
+
+ // Next, pop integer registers
+
+ if (!jmpEpilog)
+ {
+ regMaskTP maskStackAlloc = genStackAllocRegisterMask(compiler->compLclFrameSize, maskPopRegsFloat);
+ maskPopRegsInt |= maskStackAlloc;
+ }
+
+ if (isFramePointerUsed())
+ {
+ assert(!regSet.rsRegsModified(RBM_FPBASE));
+ maskPopRegsInt |= RBM_FPBASE;
+ }
+
+ if (genCanUsePopToReturn(maskPopRegsInt, jmpEpilog))
+ {
+ maskPopRegsInt |= RBM_PC;
+ // Record the fact that we use a pop to the PC to perform the return
+ genUsedPopToReturn = true;
+ }
+ else
+ {
+ maskPopRegsInt |= RBM_LR;
+ // Record the fact that we did not use a pop to the PC to perform the return
+ genUsedPopToReturn = false;
+ }
+
+ assert(FitsIn<int>(maskPopRegsInt));
+ inst_IV(INS_pop, (int)maskPopRegsInt);
+ compiler->unwindPopMaskInt(maskPopRegsInt);
+}
+
+#elif defined(_TARGET_ARM64_)
+
+void CodeGen::genPopCalleeSavedRegistersAndFreeLclFrame(bool jmpEpilog)
+{
+ assert(compiler->compGeneratingEpilog);
+
+ regMaskTP rsRestoreRegs = regSet.rsGetModifiedRegsMask() & RBM_CALLEE_SAVED;
+
+ if (isFramePointerUsed())
+ {
+ rsRestoreRegs |= RBM_FPBASE;
+ }
+
+ rsRestoreRegs |= RBM_LR; // We must save/restore the return address (in the LR register)
+
+ regMaskTP regsToRestoreMask = rsRestoreRegs;
+
+ int totalFrameSize = genTotalFrameSize();
+
+ int calleeSaveSPOffset; // This will be the starting place for restoring the callee-saved registers, in decreasing
+ // order.
+ int frameType = 0; // An indicator of what type of frame we are popping.
+ int calleeSaveSPDelta = 0;
+ int calleeSaveSPDeltaUnaligned = 0;
+
+ if (isFramePointerUsed())
+ {
+ if ((compiler->lvaOutgoingArgSpaceSize == 0) && (totalFrameSize < 512))
+ {
+ frameType = 1;
+ if (compiler->compLocallocUsed)
+ {
+ // Restore sp from fp
+ // mov sp, fp
+ inst_RV_RV(INS_mov, REG_SPBASE, REG_FPBASE);
+ compiler->unwindSetFrameReg(REG_FPBASE, 0);
+ }
+
+ regsToRestoreMask &= ~(RBM_FP | RBM_LR); // We'll restore FP/LR at the end, and post-index SP.
+
+ // Compute callee save SP offset which is at the top of local frame while the FP/LR is saved at the bottom
+ // of stack.
+ calleeSaveSPOffset = compiler->compLclFrameSize + 2 * REGSIZE_BYTES;
+ }
+ else if (totalFrameSize <= 512)
+ {
+ frameType = 2;
+ if (compiler->compLocallocUsed)
+ {
+ // Restore sp from fp
+ // sub sp, fp, #outsz
+ getEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, REG_SPBASE, REG_FPBASE,
+ compiler->lvaOutgoingArgSpaceSize);
+ compiler->unwindSetFrameReg(REG_FPBASE, compiler->lvaOutgoingArgSpaceSize);
+ }
+
+ regsToRestoreMask &= ~(RBM_FP | RBM_LR); // We'll restore FP/LR at the end, and post-index SP.
+
+ // Compute callee save SP offset which is at the top of local frame while the FP/LR is saved at the bottom
+ // of stack.
+ calleeSaveSPOffset = compiler->compLclFrameSize + 2 * REGSIZE_BYTES;
+ }
+ else
+ {
+ frameType = 3;
+
+ calleeSaveSPDeltaUnaligned = totalFrameSize - compiler->compLclFrameSize -
+ 2 * REGSIZE_BYTES; // 2 for FP, LR which we'll restore later.
+ assert(calleeSaveSPDeltaUnaligned >= 0);
+ assert((calleeSaveSPDeltaUnaligned % 8) == 0); // It better at least be 8 byte aligned.
+ calleeSaveSPDelta = AlignUp((UINT)calleeSaveSPDeltaUnaligned, STACK_ALIGN);
+
+ regsToRestoreMask &= ~(RBM_FP | RBM_LR); // We'll restore FP/LR at the end, and (hopefully) post-index SP.
+
+ int remainingFrameSz = totalFrameSize - calleeSaveSPDelta;
+ assert(remainingFrameSz > 0);
+
+ if (compiler->lvaOutgoingArgSpaceSize >= 504)
+ {
+ // We can't do "ldp fp,lr,[sp,#outsz]" because #outsz is too big.
+ // If compiler->lvaOutgoingArgSpaceSize is not aligned, we need to align the SP adjustment.
+ assert(remainingFrameSz > (int)compiler->lvaOutgoingArgSpaceSize);
+ int spAdjustment2Unaligned = remainingFrameSz - compiler->lvaOutgoingArgSpaceSize;
+ int spAdjustment2 = (int)roundUp((size_t)spAdjustment2Unaligned, STACK_ALIGN);
+ int alignmentAdjustment2 = spAdjustment2 - spAdjustment2Unaligned;
+ assert((alignmentAdjustment2 == 0) || (alignmentAdjustment2 == REGSIZE_BYTES));
+
+ if (compiler->compLocallocUsed)
+ {
+ // Restore sp from fp. No need to update sp after this since we've set up fp before adjusting sp in
+ // prolog.
+ // sub sp, fp, #alignmentAdjustment2
+ getEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, REG_SPBASE, REG_FPBASE, alignmentAdjustment2);
+ compiler->unwindSetFrameReg(REG_FPBASE, alignmentAdjustment2);
+ }
+ else
+ {
+ // Generate:
+ // add sp,sp,#outsz ; if #outsz is not 16-byte aligned, we need to be more
+ // ; careful
+ int spAdjustment3 = compiler->lvaOutgoingArgSpaceSize - alignmentAdjustment2;
+ assert(spAdjustment3 > 0);
+ assert((spAdjustment3 % 16) == 0);
+ genStackPointerAdjustment(spAdjustment3, REG_IP0, nullptr);
+ }
+
+ // Generate:
+ // ldp fp,lr,[sp]
+ // add sp,sp,#remainingFrameSz
+ genEpilogRestoreRegPair(REG_FP, REG_LR, alignmentAdjustment2, spAdjustment2, REG_IP0, nullptr);
+ }
+ else
+ {
+ if (compiler->compLocallocUsed)
+ {
+ // Restore sp from fp
+ // sub sp, fp, #outsz
+ getEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, REG_SPBASE, REG_FPBASE,
+ compiler->lvaOutgoingArgSpaceSize);
+ compiler->unwindSetFrameReg(REG_FPBASE, compiler->lvaOutgoingArgSpaceSize);
+ }
+
+ // Generate:
+ // ldp fp,lr,[sp,#outsz]
+ // add sp,sp,#remainingFrameSz ; might need to load this constant in a scratch register if
+ // ; it's large
+
+ genEpilogRestoreRegPair(REG_FP, REG_LR, compiler->lvaOutgoingArgSpaceSize, remainingFrameSz, REG_IP0,
+ nullptr);
+ }
+
+ // Unlike frameType=1 or frameType=2 that restore SP at the end,
+ // frameType=3 already adjusted SP above to delete local frame.
+ // There is at most one alignment slot between SP and where we store the callee-saved registers.
+ calleeSaveSPOffset = calleeSaveSPDelta - calleeSaveSPDeltaUnaligned;
+ assert((calleeSaveSPOffset == 0) || (calleeSaveSPOffset == REGSIZE_BYTES));
+ }
+ }
+ else
+ {
+ // No frame pointer (no chaining).
+ NYI("Frame without frame pointer");
+ calleeSaveSPOffset = 0;
+ }
+
+ genRestoreCalleeSavedRegistersHelp(regsToRestoreMask, calleeSaveSPOffset, calleeSaveSPDelta);
+
+ if (frameType == 1)
+ {
+ // Generate:
+ // ldp fp,lr,[sp],#framesz
+
+ getEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, totalFrameSize,
+ INS_OPTS_POST_INDEX);
+ compiler->unwindSaveRegPairPreindexed(REG_FP, REG_LR, -totalFrameSize);
+ }
+ else if (frameType == 2)
+ {
+ // Generate:
+ // ldr fp,lr,[sp,#outsz]
+ // add sp,sp,#framesz
+
+ getEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE,
+ compiler->lvaOutgoingArgSpaceSize);
+ compiler->unwindSaveRegPair(REG_FP, REG_LR, compiler->lvaOutgoingArgSpaceSize);
+
+ getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, totalFrameSize);
+ compiler->unwindAllocStack(totalFrameSize);
+ }
+ else if (frameType == 3)
+ {
+ // Nothing to do after restoring callee-saved registers.
+ }
+ else
+ {
+ unreached();
+ }
+}
+
+#elif defined(_TARGET_XARCH_) && !FEATURE_STACK_FP_X87
+
+void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog)
+{
+ assert(compiler->compGeneratingEpilog);
+
+ unsigned popCount = 0;
+ if (regSet.rsRegsModified(RBM_EBX))
+ {
+ popCount++;
+ inst_RV(INS_pop, REG_EBX, TYP_I_IMPL);
+ }
+ if (regSet.rsRegsModified(RBM_FPBASE))
+ {
+ // EBP cannot be directly modified for EBP frame and double-aligned frames
+ assert(!doubleAlignOrFramePointerUsed());
+
+ popCount++;
+ inst_RV(INS_pop, REG_EBP, TYP_I_IMPL);
+ }
+
+#ifndef UNIX_AMD64_ABI
+ // For System V AMD64 calling convention ESI and EDI are volatile registers.
+ if (regSet.rsRegsModified(RBM_ESI))
+ {
+ popCount++;
+ inst_RV(INS_pop, REG_ESI, TYP_I_IMPL);
+ }
+ if (regSet.rsRegsModified(RBM_EDI))
+ {
+ popCount++;
+ inst_RV(INS_pop, REG_EDI, TYP_I_IMPL);
+ }
+#endif // !defined(UNIX_AMD64_ABI)
+
+#ifdef _TARGET_AMD64_
+ if (regSet.rsRegsModified(RBM_R12))
+ {
+ popCount++;
+ inst_RV(INS_pop, REG_R12, TYP_I_IMPL);
+ }
+ if (regSet.rsRegsModified(RBM_R13))
+ {
+ popCount++;
+ inst_RV(INS_pop, REG_R13, TYP_I_IMPL);
+ }
+ if (regSet.rsRegsModified(RBM_R14))
+ {
+ popCount++;
+ inst_RV(INS_pop, REG_R14, TYP_I_IMPL);
+ }
+ if (regSet.rsRegsModified(RBM_R15))
+ {
+ popCount++;
+ inst_RV(INS_pop, REG_R15, TYP_I_IMPL);
+ }
+#endif // _TARGET_AMD64_
+
+ // Amd64/x86 doesn't support push/pop of xmm registers.
+ // These will get saved to stack separately after allocating
+ // space on stack in prolog sequence. PopCount is essentially
+ // tracking the count of integer registers pushed.
+
+ noway_assert(compiler->compCalleeRegsPushed == popCount);
+}
+
+#elif defined(_TARGET_X86_)
+
+void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog)
+{
+ assert(compiler->compGeneratingEpilog);
+
+ unsigned popCount = 0;
+
+ /* NOTE: The EBP-less frame code below depends on the fact that
+ all of the pops are generated right at the start and
+ each takes one byte of machine code.
+ */
+
+ if (regSet.rsRegsModified(RBM_FPBASE))
+ {
+ // EBP cannot be directly modified for EBP frame and double-aligned frames
+ noway_assert(!doubleAlignOrFramePointerUsed());
+
+ inst_RV(INS_pop, REG_EBP, TYP_I_IMPL);
+ popCount++;
+ }
+ if (regSet.rsRegsModified(RBM_EBX))
+ {
+ popCount++;
+ inst_RV(INS_pop, REG_EBX, TYP_I_IMPL);
+ }
+ if (regSet.rsRegsModified(RBM_ESI))
+ {
+ popCount++;
+ inst_RV(INS_pop, REG_ESI, TYP_I_IMPL);
+ }
+ if (regSet.rsRegsModified(RBM_EDI))
+ {
+ popCount++;
+ inst_RV(INS_pop, REG_EDI, TYP_I_IMPL);
+ }
+ noway_assert(compiler->compCalleeRegsPushed == popCount);
+}
+
+#endif // _TARGET_*
+
+// We need a register with value zero. Zero the initReg, if necessary, and set *pInitRegZeroed if so.
+// Return the register to use. On ARM64, we never touch the initReg, and always just return REG_ZR.
+regNumber CodeGen::genGetZeroReg(regNumber initReg, bool* pInitRegZeroed)
+{
+#ifdef _TARGET_ARM64_
+ return REG_ZR;
+#else // !_TARGET_ARM64_
+ if (*pInitRegZeroed == false)
+ {
+ instGen_Set_Reg_To_Zero(EA_PTRSIZE, initReg);
+ *pInitRegZeroed = true;
+ }
+ return initReg;
+#endif // !_TARGET_ARM64_
+}
+
+/*-----------------------------------------------------------------------------
+ *
+ * Do we have any untracked pointer locals at all,
+ * or do we need to initialize memory for locspace?
+ *
+ * untrLclHi - (Untracked locals High-Offset) The upper bound offset at which the zero init code will end
+ * initializing memory (not inclusive).
+ * untrLclLo - (Untracked locals Low-Offset) The lower bound at which the zero init code will start zero
+ * initializing memory.
+ * initReg - A scratch register (that gets set to zero on some platforms).
+ * pInitRegZeroed - Sets a flag that tells the callee whether or not the initReg register got zeroed.
+ */
+void CodeGen::genZeroInitFrame(int untrLclHi, int untrLclLo, regNumber initReg, bool* pInitRegZeroed)
+{
+ assert(compiler->compGeneratingProlog);
+
+ if (genUseBlockInit)
+ {
+ assert(untrLclHi > untrLclLo);
+#ifdef _TARGET_ARMARCH_
+ /*
+ Generate the following code:
+
+ For cnt less than 10
+
+ mov rZero1, 0
+ mov rZero2, 0
+ mov rCnt, <cnt>
+ stm <rZero1,rZero2>,[rAddr!]
+ <optional> stm <rZero1,rZero2>,[rAddr!]
+ <optional> stm <rZero1,rZero2>,[rAddr!]
+ <optional> stm <rZero1,rZero2>,[rAddr!]
+ <optional> str rZero1,[rAddr]
+
+ For rCnt greater than or equal to 10
+
+ mov rZero1, 0
+ mov rZero2, 0
+ mov rCnt, <cnt/2>
+ sub rAddr, sp, OFFS
+
+ loop:
+ stm <rZero1,rZero2>,[rAddr!]
+ sub rCnt,rCnt,1
+ jnz loop
+
+ <optional> str rZero1,[rAddr] // When cnt is odd
+
+ NOTE: for ARM64, the instruction is stp, not stm. And we can use ZR instead of allocating registers.
+ */
+
+ regNumber rAddr;
+ regNumber rCnt = REG_NA; // Invalid
+ regMaskTP regMask;
+
+ regMaskTP availMask = regSet.rsGetModifiedRegsMask() | RBM_INT_CALLEE_TRASH; // Set of available registers
+ availMask &= ~intRegState.rsCalleeRegArgMaskLiveIn; // Remove all of the incoming argument registers as they are
+ // currently live
+ availMask &= ~genRegMask(initReg); // Remove the pre-calculated initReg as we will zero it and maybe use it for
+ // a large constant.
+
+#if defined(_TARGET_ARM_)
+
+ if (compiler->compLocallocUsed)
+ {
+ availMask &= ~RBM_SAVED_LOCALLOC_SP; // Remove the register reserved when we have a localloc frame
+ }
+
+ regNumber rZero1; // We're going to use initReg for rZero1
+ regNumber rZero2;
+
+ // We pick the next lowest register number for rZero2
+ noway_assert(availMask != RBM_NONE);
+ regMask = genFindLowestBit(availMask);
+ rZero2 = genRegNumFromMask(regMask);
+ availMask &= ~regMask;
+ assert((genRegMask(rZero2) & intRegState.rsCalleeRegArgMaskLiveIn) ==
+ 0); // rZero2 is not a live incoming argument reg
+
+ // We pick the next lowest register number for rAddr
+ noway_assert(availMask != RBM_NONE);
+ regMask = genFindLowestBit(availMask);
+ rAddr = genRegNumFromMask(regMask);
+ availMask &= ~regMask;
+
+#else // !define(_TARGET_ARM_)
+
+ regNumber rZero1 = REG_ZR;
+ rAddr = initReg;
+ *pInitRegZeroed = false;
+
+#endif // !defined(_TARGET_ARM_)
+
+ bool useLoop = false;
+ unsigned uCntBytes = untrLclHi - untrLclLo;
+ assert((uCntBytes % sizeof(int)) == 0); // The smallest stack slot is always 4 bytes.
+ unsigned uCntSlots = uCntBytes / REGSIZE_BYTES; // How many register sized stack slots we're going to use.
+
+ // When uCntSlots is 9 or less, we will emit a sequence of stm/stp instructions inline.
+ // When it is 10 or greater, we will emit a loop containing a stm/stp instruction.
+ // In both of these cases the stm/stp instruction will write two zeros to memory
+ // and we will use a single str instruction at the end whenever we have an odd count.
+ if (uCntSlots >= 10)
+ useLoop = true;
+
+ if (useLoop)
+ {
+ // We pick the next lowest register number for rCnt
+ noway_assert(availMask != RBM_NONE);
+ regMask = genFindLowestBit(availMask);
+ rCnt = genRegNumFromMask(regMask);
+ availMask &= ~regMask;
+ }
+
+ assert((genRegMask(rAddr) & intRegState.rsCalleeRegArgMaskLiveIn) ==
+ 0); // rAddr is not a live incoming argument reg
+#if defined(_TARGET_ARM_)
+ if (arm_Valid_Imm_For_Add(untrLclLo, INS_FLAGS_DONT_CARE))
+#else // !_TARGET_ARM_
+ if (emitter::emitIns_valid_imm_for_add(untrLclLo, EA_PTRSIZE))
+#endif // !_TARGET_ARM_
+ {
+ getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, rAddr, genFramePointerReg(), untrLclLo);
+ }
+ else
+ {
+ // Load immediate into the InitReg register
+ instGen_Set_Reg_To_Imm(EA_PTRSIZE, initReg, (ssize_t)untrLclLo);
+ getEmitter()->emitIns_R_R_R(INS_add, EA_PTRSIZE, rAddr, genFramePointerReg(), initReg);
+ *pInitRegZeroed = false;
+ }
+
+ if (useLoop)
+ {
+ noway_assert(uCntSlots >= 2);
+ assert((genRegMask(rCnt) & intRegState.rsCalleeRegArgMaskLiveIn) ==
+ 0); // rCnt is not a live incoming argument reg
+ instGen_Set_Reg_To_Imm(EA_PTRSIZE, rCnt, (ssize_t)uCntSlots / 2);
+ }
+
+#if defined(_TARGET_ARM_)
+ rZero1 = genGetZeroReg(initReg, pInitRegZeroed);
+ instGen_Set_Reg_To_Zero(EA_PTRSIZE, rZero2);
+ ssize_t stmImm = (ssize_t)(genRegMask(rZero1) | genRegMask(rZero2));
+#endif // _TARGET_ARM_
+
+ if (!useLoop)
+ {
+ while (uCntBytes >= REGSIZE_BYTES * 2)
+ {
+#ifdef _TARGET_ARM_
+ getEmitter()->emitIns_R_I(INS_stm, EA_PTRSIZE, rAddr, stmImm);
+#else // !_TARGET_ARM_
+ getEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_ZR, REG_ZR, rAddr, 2 * REGSIZE_BYTES,
+ INS_OPTS_POST_INDEX);
+#endif // !_TARGET_ARM_
+ uCntBytes -= REGSIZE_BYTES * 2;
+ }
+ }
+ else // useLoop is true
+ {
+#ifdef _TARGET_ARM_
+ getEmitter()->emitIns_R_I(INS_stm, EA_PTRSIZE, rAddr, stmImm); // zero stack slots
+ getEmitter()->emitIns_R_I(INS_sub, EA_PTRSIZE, rCnt, 1, INS_FLAGS_SET);
+#else // !_TARGET_ARM_
+ getEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_ZR, REG_ZR, rAddr, 2 * REGSIZE_BYTES,
+ INS_OPTS_POST_INDEX); // zero stack slots
+ getEmitter()->emitIns_R_R_I(INS_subs, EA_PTRSIZE, rCnt, rCnt, 1);
+#endif // !_TARGET_ARM_
+ getEmitter()->emitIns_J(INS_bhi, NULL, -3);
+ uCntBytes %= REGSIZE_BYTES * 2;
+ }
+
+ if (uCntBytes >= REGSIZE_BYTES) // check and zero the last register-sized stack slot (odd number)
+ {
+#ifdef _TARGET_ARM_
+ getEmitter()->emitIns_R_R_I(INS_str, EA_PTRSIZE, rZero1, rAddr, 0);
+#else // _TARGET_ARM_
+ if ((uCntBytes - REGSIZE_BYTES) == 0)
+ {
+ getEmitter()->emitIns_R_R_I(INS_str, EA_PTRSIZE, REG_ZR, rAddr, 0);
+ }
+ else
+ {
+ getEmitter()->emitIns_R_R_I(INS_str, EA_PTRSIZE, REG_ZR, rAddr, REGSIZE_BYTES, INS_OPTS_POST_INDEX);
+ }
+#endif // !_TARGET_ARM_
+ uCntBytes -= REGSIZE_BYTES;
+ }
+#ifdef _TARGET_ARM64_
+ if (uCntBytes > 0)
+ {
+ assert(uCntBytes == sizeof(int));
+ getEmitter()->emitIns_R_R_I(INS_str, EA_4BYTE, REG_ZR, rAddr, 0);
+ uCntBytes -= sizeof(int);
+ }
+#endif // _TARGET_ARM64_
+ noway_assert(uCntBytes == 0);
+
+#elif defined(_TARGET_XARCH_)
+ /*
+ Generate the following code:
+
+ lea edi, [ebp/esp-OFFS]
+ mov ecx, <size>
+ xor eax, eax
+ rep stosd
+ */
+
+ noway_assert(regSet.rsRegsModified(RBM_EDI));
+
+#ifdef UNIX_AMD64_ABI
+ // For register arguments we may have to save ECX and RDI on Amd64 System V OSes
+ if (intRegState.rsCalleeRegArgMaskLiveIn & RBM_RCX)
+ {
+ noway_assert(regSet.rsRegsModified(RBM_R12));
+ inst_RV_RV(INS_mov, REG_R12, REG_RCX);
+ regTracker.rsTrackRegTrash(REG_R12);
+ }
+
+ if (intRegState.rsCalleeRegArgMaskLiveIn & RBM_RDI)
+ {
+ noway_assert(regSet.rsRegsModified(RBM_R13));
+ inst_RV_RV(INS_mov, REG_R13, REG_RDI);
+ regTracker.rsTrackRegTrash(REG_R13);
+ }
+#else // !UNIX_AMD64_ABI
+ // For register arguments we may have to save ECX
+ if (intRegState.rsCalleeRegArgMaskLiveIn & RBM_ECX)
+ {
+ noway_assert(regSet.rsRegsModified(RBM_ESI));
+ inst_RV_RV(INS_mov, REG_ESI, REG_ECX);
+ regTracker.rsTrackRegTrash(REG_ESI);
+ }
+#endif // !UNIX_AMD64_ABI
+
+ noway_assert((intRegState.rsCalleeRegArgMaskLiveIn & RBM_EAX) == 0);
+
+ getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_EDI, genFramePointerReg(), untrLclLo);
+ regTracker.rsTrackRegTrash(REG_EDI);
+
+ inst_RV_IV(INS_mov, REG_ECX, (untrLclHi - untrLclLo) / sizeof(int), EA_4BYTE);
+ instGen_Set_Reg_To_Zero(EA_PTRSIZE, REG_EAX);
+ instGen(INS_r_stosd);
+
+#ifdef UNIX_AMD64_ABI
+ // Move back the argument registers
+ if (intRegState.rsCalleeRegArgMaskLiveIn & RBM_RCX)
+ {
+ inst_RV_RV(INS_mov, REG_RCX, REG_R12);
+ }
+
+ if (intRegState.rsCalleeRegArgMaskLiveIn & RBM_RDI)
+ {
+ inst_RV_RV(INS_mov, REG_RDI, REG_R13);
+ }
+#else // !UNIX_AMD64_ABI
+ // Move back the argument registers
+ if (intRegState.rsCalleeRegArgMaskLiveIn & RBM_ECX)
+ {
+ inst_RV_RV(INS_mov, REG_ECX, REG_ESI);
+ }
+#endif // !UNIX_AMD64_ABI
+
+#else // _TARGET_*
+#error Unsupported or unset target architecture
+#endif // _TARGET_*
+ }
+ else if (genInitStkLclCnt > 0)
+ {
+ assert((genRegMask(initReg) & intRegState.rsCalleeRegArgMaskLiveIn) ==
+ 0); // initReg is not a live incoming argument reg
+
+ /* Initialize any lvMustInit vars on the stack */
+
+ LclVarDsc* varDsc;
+ unsigned varNum;
+
+ for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++)
+ {
+ if (!varDsc->lvMustInit)
+ {
+ continue;
+ }
+
+ // TODO-Review: I'm not sure that we're correctly handling the mustInit case for
+ // partially-enregistered vars in the case where we don't use a block init.
+ noway_assert(varDsc->lvIsInReg() || varDsc->lvOnFrame);
+
+ // lvMustInit can only be set for GC types or TYP_STRUCT types
+ // or when compInitMem is true
+ // or when in debug code
+
+ noway_assert(varTypeIsGC(varDsc->TypeGet()) || (varDsc->TypeGet() == TYP_STRUCT) ||
+ compiler->info.compInitMem || compiler->opts.compDbgCode);
+
+#ifdef _TARGET_64BIT_
+ if (!varDsc->lvOnFrame)
+ {
+ continue;
+ }
+#else // !_TARGET_64BIT_
+ if (varDsc->lvRegister)
+ {
+ if (varDsc->lvOnFrame)
+ {
+ /* This is a partially enregistered TYP_LONG var */
+ noway_assert(varDsc->lvOtherReg == REG_STK);
+ noway_assert(varDsc->lvType == TYP_LONG);
+
+ noway_assert(compiler->info.compInitMem);
+
+ getEmitter()->emitIns_S_R(ins_Store(TYP_INT), EA_4BYTE, genGetZeroReg(initReg, pInitRegZeroed),
+ varNum, sizeof(int));
+ }
+ continue;
+ }
+#endif // !_TARGET_64BIT_
+
+ if ((varDsc->TypeGet() == TYP_STRUCT) && !compiler->info.compInitMem &&
+ (varDsc->lvExactSize >= TARGET_POINTER_SIZE))
+ {
+ // We only initialize the GC variables in the TYP_STRUCT
+ const unsigned slots = (unsigned)compiler->lvaLclSize(varNum) / REGSIZE_BYTES;
+ const BYTE* gcPtrs = compiler->lvaGetGcLayout(varNum);
+
+ for (unsigned i = 0; i < slots; i++)
+ {
+ if (gcPtrs[i] != TYPE_GC_NONE)
+ {
+ getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE,
+ genGetZeroReg(initReg, pInitRegZeroed), varNum, i * REGSIZE_BYTES);
+ }
+ }
+ }
+ else
+ {
+ regNumber zeroReg = genGetZeroReg(initReg, pInitRegZeroed);
+
+ // zero out the whole thing rounded up to a single stack slot size
+ unsigned lclSize = (unsigned)roundUp(compiler->lvaLclSize(varNum), sizeof(int));
+ unsigned i;
+ for (i = 0; i + REGSIZE_BYTES <= lclSize; i += REGSIZE_BYTES)
+ {
+ getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, zeroReg, varNum, i);
+ }
+
+#ifdef _TARGET_64BIT_
+ assert(i == lclSize || (i + sizeof(int) == lclSize));
+ if (i != lclSize)
+ {
+ getEmitter()->emitIns_S_R(ins_Store(TYP_INT), EA_4BYTE, zeroReg, varNum, i);
+ i += sizeof(int);
+ }
+#endif // _TARGET_64BIT_
+ assert(i == lclSize);
+ }
+ }
+
+ if (!TRACK_GC_TEMP_LIFETIMES)
+ {
+ assert(compiler->tmpAllFree());
+ for (TempDsc* tempThis = compiler->tmpListBeg(); tempThis != nullptr;
+ tempThis = compiler->tmpListNxt(tempThis))
+ {
+ if (!varTypeIsGC(tempThis->tdTempType()))
+ {
+ continue;
+ }
+
+ // printf("initialize untracked spillTmp [EBP-%04X]\n", stkOffs);
+
+ inst_ST_RV(ins_Store(TYP_I_IMPL), tempThis, 0, genGetZeroReg(initReg, pInitRegZeroed), TYP_I_IMPL);
+ }
+ }
+ }
+}
+
+/*-----------------------------------------------------------------------------
+ *
+ * Save the generic context argument.
+ *
+ * We need to do this within the "prolog" in case anyone tries to inspect
+ * the param-type-arg/this (which can be done after the prolog) using
+ * ICodeManager::GetParamTypeArg().
+ */
+
+void CodeGen::genReportGenericContextArg(regNumber initReg, bool* pInitRegZeroed)
+{
+ assert(compiler->compGeneratingProlog);
+
+ bool reportArg = compiler->lvaReportParamTypeArg();
+
+ // We should report either generic context arg or "this" when used so.
+ if (!reportArg)
+ {
+#ifndef JIT32_GCENCODER
+ if (!compiler->lvaKeepAliveAndReportThis())
+#endif
+ {
+ return;
+ }
+ }
+
+ // For JIT32_GCENCODER, we won't be here if reportArg is false.
+ unsigned contextArg = reportArg ? compiler->info.compTypeCtxtArg : compiler->info.compThisArg;
+
+ noway_assert(contextArg != BAD_VAR_NUM);
+ LclVarDsc* varDsc = &compiler->lvaTable[contextArg];
+
+ // We are still in the prolog and compiler->info.compTypeCtxtArg has not been
+ // moved to its final home location. So we need to use it from the
+ // incoming location.
+
+ regNumber reg;
+
+ bool isPrespilledForProfiling = false;
+#if defined(_TARGET_ARM_) && defined(PROFILING_SUPPORTED)
+ isPrespilledForProfiling =
+ compiler->compIsProfilerHookNeeded() && compiler->lvaIsPreSpilled(contextArg, regSet.rsMaskPreSpillRegs(false));
+#endif
+
+ // Load from the argument register only if it is not prespilled.
+ if (compiler->lvaIsRegArgument(contextArg) && !isPrespilledForProfiling)
+ {
+ reg = varDsc->lvArgReg;
+ }
+ else
+ {
+ if (isFramePointerUsed())
+ {
+#if defined(_TARGET_ARM_)
+ // lvStkOffs is always valid for incoming stack-arguments, even if the argument
+ // will become enregistered.
+ // On Arm compiler->compArgSize doesn't include r11 and lr sizes and hence we need to add 2*REGSIZE_BYTES
+ noway_assert((2 * REGSIZE_BYTES <= varDsc->lvStkOffs) &&
+ (size_t(varDsc->lvStkOffs) < compiler->compArgSize + 2 * REGSIZE_BYTES));
+#else
+ // lvStkOffs is always valid for incoming stack-arguments, even if the argument
+ // will become enregistered.
+ noway_assert((0 < varDsc->lvStkOffs) && (size_t(varDsc->lvStkOffs) < compiler->compArgSize));
+#endif
+ }
+
+ // We will just use the initReg since it is an available register
+ // and we are probably done using it anyway...
+ reg = initReg;
+ *pInitRegZeroed = false;
+
+ // mov reg, [compiler->info.compTypeCtxtArg]
+ getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, reg, genFramePointerReg(), varDsc->lvStkOffs);
+ regTracker.rsTrackRegTrash(reg);
+ }
+
+#if CPU_LOAD_STORE_ARCH
+ getEmitter()->emitIns_R_R_I(ins_Store(TYP_I_IMPL), EA_PTRSIZE, reg, genFramePointerReg(),
+ compiler->lvaCachedGenericContextArgOffset());
+#else // CPU_LOAD_STORE_ARCH
+ // mov [ebp-lvaCachedGenericContextArgOffset()], reg
+ getEmitter()->emitIns_AR_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, reg, genFramePointerReg(),
+ compiler->lvaCachedGenericContextArgOffset());
+#endif // !CPU_LOAD_STORE_ARCH
+}
+
+/*-----------------------------------------------------------------------------
+ *
+ * Set the "GS" security cookie in the prolog.
+ */
+
+void CodeGen::genSetGSSecurityCookie(regNumber initReg, bool* pInitRegZeroed)
+{
+ assert(compiler->compGeneratingProlog);
+
+ if (!compiler->getNeedsGSSecurityCookie())
+ {
+ return;
+ }
+
+ noway_assert(compiler->gsGlobalSecurityCookieAddr || compiler->gsGlobalSecurityCookieVal);
+
+ if (compiler->gsGlobalSecurityCookieAddr == nullptr)
+ {
+#ifdef _TARGET_AMD64_
+ // eax = #GlobalSecurityCookieVal64; [frame.GSSecurityCookie] = eax
+ getEmitter()->emitIns_R_I(INS_mov, EA_PTRSIZE, REG_RAX, compiler->gsGlobalSecurityCookieVal);
+ getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_RAX, compiler->lvaGSSecurityCookie, 0);
+#else
+ // mov dword ptr [frame.GSSecurityCookie], #GlobalSecurityCookieVal
+ instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_PTRSIZE, compiler->gsGlobalSecurityCookieVal,
+ compiler->lvaGSSecurityCookie, 0, initReg);
+#endif
+ }
+ else
+ {
+ regNumber reg;
+#ifdef _TARGET_XARCH_
+ // Always use EAX on x86 and x64
+ // On x64, if we're not moving into RAX, and the address isn't RIP relative, we can't encode it.
+ reg = REG_EAX;
+#else
+ // We will just use the initReg since it is an available register
+ reg = initReg;
+#endif
+
+ *pInitRegZeroed = false;
+
+#if CPU_LOAD_STORE_ARCH
+ instGen_Set_Reg_To_Imm(EA_PTR_DSP_RELOC, reg, (ssize_t)compiler->gsGlobalSecurityCookieAddr);
+ getEmitter()->emitIns_R_R_I(ins_Load(TYP_I_IMPL), EA_PTRSIZE, reg, reg, 0);
+ regTracker.rsTrackRegTrash(reg);
+#else
+ // mov reg, dword ptr [compiler->gsGlobalSecurityCookieAddr]
+ // mov dword ptr [frame.GSSecurityCookie], reg
+ getEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, reg, (ssize_t)compiler->gsGlobalSecurityCookieAddr);
+ regTracker.rsTrackRegTrash(reg);
+#endif
+ getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, reg, compiler->lvaGSSecurityCookie, 0);
+ }
+}
+
+#ifdef PROFILING_SUPPORTED
+
+/*-----------------------------------------------------------------------------
+ *
+ * Generate the profiling function enter callback.
+ */
+
+void CodeGen::genProfilingEnterCallback(regNumber initReg, bool* pInitRegZeroed)
+{
+ assert(compiler->compGeneratingProlog);
+
+ // Give profiler a chance to back out of hooking this method
+ if (!compiler->compIsProfilerHookNeeded())
+ {
+ return;
+ }
+
+#ifndef LEGACY_BACKEND
+#if defined(_TARGET_AMD64_) && !defined(UNIX_AMD64_ABI) // No profiling for System V systems yet.
+ unsigned varNum;
+ LclVarDsc* varDsc;
+
+ // Since the method needs to make a profiler callback, it should have out-going arg space allocated.
+ noway_assert(compiler->lvaOutgoingArgSpaceVar != BAD_VAR_NUM);
+ noway_assert(compiler->lvaOutgoingArgSpaceSize >= (4 * REGSIZE_BYTES));
+
+ // Home all arguments passed in arg registers (RCX, RDX, R8 and R9).
+ // In case of vararg methods, arg regs are already homed.
+ //
+ // Note: Here we don't need to worry about updating gc'info since enter
+ // callback is generated as part of prolog which is non-gc interruptible.
+ // Moreover GC cannot kick while executing inside profiler callback which is a
+ // profiler requirement so it can examine arguments which could be obj refs.
+ if (!compiler->info.compIsVarArgs)
+ {
+ for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->info.compArgsCount; varNum++, varDsc++)
+ {
+ noway_assert(varDsc->lvIsParam);
+
+ if (!varDsc->lvIsRegArg)
+ {
+ continue;
+ }
+
+ var_types storeType = varDsc->lvaArgType();
+ regNumber argReg = varDsc->lvArgReg;
+ getEmitter()->emitIns_S_R(ins_Store(storeType), emitTypeSize(storeType), argReg, varNum, 0);
+ }
+ }
+
+ // Emit profiler EnterCallback(ProfilerMethHnd, caller's SP)
+ // RCX = ProfilerMethHnd
+ if (compiler->compProfilerMethHndIndirected)
+ {
+ // Profiler hooks enabled during Ngen time.
+ // Profiler handle needs to be accessed through an indirection of a pointer.
+ getEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd);
+ }
+ else
+ {
+ // No need to record relocations, if we are generating ELT hooks under the influence
+ // of complus_JitELtHookEnabled=1
+ if (compiler->opts.compJitELTHookEnabled)
+ {
+ genSetRegToIcon(REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd, TYP_I_IMPL);
+ }
+ else
+ {
+ instGen_Set_Reg_To_Imm(EA_8BYTE, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd);
+ }
+ }
+
+ // RDX = caller's SP
+ // Notes
+ // 1) Here we can query caller's SP offset since prolog will be generated after final frame layout.
+ // 2) caller's SP relative offset to FramePointer will be negative. We need to add absolute value
+ // of that offset to FramePointer to obtain caller's SP value.
+ assert(compiler->lvaOutgoingArgSpaceVar != BAD_VAR_NUM);
+ int callerSPOffset = compiler->lvaToCallerSPRelativeOffset(0, isFramePointerUsed());
+ getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_ARG_1, genFramePointerReg(), -callerSPOffset);
+
+ // Can't have a call until we have enough padding for rejit
+ genPrologPadForReJit();
+
+ // This will emit either
+ // "call ip-relative 32-bit offset" or
+ // "mov rax, helper addr; call rax"
+ genEmitHelperCall(CORINFO_HELP_PROF_FCN_ENTER, 0, EA_UNKNOWN);
+
+ // TODO-AMD64-CQ: Rather than reloading, see if this could be optimized by combining with prolog
+ // generation logic that moves args around as required by first BB entry point conditions
+ // computed by LSRA. Code pointers for investigating this further: genFnPrologCalleeRegArgs()
+ // and genEnregisterIncomingStackArgs().
+ //
+ // Now reload arg registers from home locations.
+ // Vararg methods:
+ // - we need to reload only known (i.e. fixed) reg args.
+ // - if floating point type, also reload it into corresponding integer reg
+ for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->info.compArgsCount; varNum++, varDsc++)
+ {
+ noway_assert(varDsc->lvIsParam);
+
+ if (!varDsc->lvIsRegArg)
+ {
+ continue;
+ }
+
+ var_types loadType = varDsc->lvaArgType();
+ regNumber argReg = varDsc->lvArgReg;
+ getEmitter()->emitIns_R_S(ins_Load(loadType), emitTypeSize(loadType), argReg, varNum, 0);
+
+#if FEATURE_VARARG
+ if (compiler->info.compIsVarArgs && varTypeIsFloating(loadType))
+ {
+ regNumber intArgReg = compiler->getCallArgIntRegister(argReg);
+ instruction ins = ins_CopyFloatToInt(loadType, TYP_LONG);
+ inst_RV_RV(ins, argReg, intArgReg, loadType);
+ }
+#endif // FEATURE_VARARG
+ }
+
+ // If initReg is one of RBM_CALLEE_TRASH, then it needs to be zero'ed before using.
+ if ((RBM_CALLEE_TRASH & genRegMask(initReg)) != 0)
+ {
+ *pInitRegZeroed = false;
+ }
+
+#else //!_TARGET_AMD64_
+ NYI("RyuJIT: Emit Profiler Enter callback");
+#endif
+
+#else // LEGACY_BACKEND
+
+ unsigned saveStackLvl2 = genStackLevel;
+
+#if defined(_TARGET_X86_)
+ // Important note: when you change enter probe layout, you must also update SKIP_ENTER_PROF_CALLBACK()
+ // for x86 stack unwinding
+
+ // Push the profilerHandle
+ if (compiler->compProfilerMethHndIndirected)
+ {
+ getEmitter()->emitIns_AR_R(INS_push, EA_PTR_DSP_RELOC, REG_NA, REG_NA, (ssize_t)compiler->compProfilerMethHnd);
+ }
+ else
+ {
+ inst_IV(INS_push, (size_t)compiler->compProfilerMethHnd);
+ }
+#elif defined(_TARGET_ARM_)
+ // On Arm arguments are prespilled on stack, which frees r0-r3.
+ // For generating Enter callout we would need two registers and one of them has to be r0 to pass profiler handle.
+ // The call target register could be any free register.
+ regNumber argReg = regSet.rsGrabReg(RBM_PROFILER_ENTER_ARG);
+ noway_assert(argReg == REG_PROFILER_ENTER_ARG);
+ regSet.rsLockReg(RBM_PROFILER_ENTER_ARG);
+
+ if (compiler->compProfilerMethHndIndirected)
+ {
+ getEmitter()->emitIns_R_AI(INS_ldr, EA_PTR_DSP_RELOC, argReg, (ssize_t)compiler->compProfilerMethHnd);
+ regTracker.rsTrackRegTrash(argReg);
+ }
+ else
+ {
+ instGen_Set_Reg_To_Imm(EA_4BYTE, argReg, (ssize_t)compiler->compProfilerMethHnd);
+ }
+#else // _TARGET_*
+ NYI("Pushing the profilerHandle & caller's sp for the profiler callout and locking registers");
+#endif // _TARGET_*
+
+ //
+ // Can't have a call until we have enough padding for rejit
+ //
+ genPrologPadForReJit();
+
+ // This will emit either
+ // "call ip-relative 32-bit offset" or
+ // "mov rax, helper addr; call rax"
+ genEmitHelperCall(CORINFO_HELP_PROF_FCN_ENTER,
+ 0, // argSize. Again, we have to lie about it
+ EA_UNKNOWN); // retSize
+
+#if defined(_TARGET_X86_)
+ //
+ // Adjust the number of stack slots used by this managed method if necessary.
+ //
+ if (compiler->fgPtrArgCntMax < 1)
+ {
+ compiler->fgPtrArgCntMax = 1;
+ }
+#elif defined(_TARGET_ARM_)
+ // Unlock registers
+ regSet.rsUnlockReg(RBM_PROFILER_ENTER_ARG);
+
+ if (initReg == argReg)
+ {
+ *pInitRegZeroed = false;
+ }
+#else // _TARGET_*
+ NYI("Pushing the profilerHandle & caller's sp for the profiler callout and locking registers");
+#endif // _TARGET_*
+
+ /* Restore the stack level */
+
+ genStackLevel = saveStackLvl2;
+#endif // LEGACY_BACKEND
+}
+
+/*****************************************************************************
+ *
+ * Generates Leave profiler hook.
+ * Technically, this is not part of the epilog; it is called when we are generating code for a GT_RETURN node.
+ */
+
+void CodeGen::genProfilingLeaveCallback(unsigned helper /*= CORINFO_HELP_PROF_FCN_LEAVE*/)
+{
+ // Only hook if profiler says it's okay.
+ if (!compiler->compIsProfilerHookNeeded())
+ {
+ return;
+ }
+
+ compiler->info.compProfilerCallback = true;
+
+ // Need to save on to the stack level, since the callee will pop the argument
+ unsigned saveStackLvl2 = genStackLevel;
+
+#ifndef LEGACY_BACKEND
+
+#if defined(_TARGET_AMD64_) && !defined(UNIX_AMD64_ABI) // No profiling for System V systems yet.
+ // Since the method needs to make a profiler callback, it should have out-going arg space allocated.
+ noway_assert(compiler->lvaOutgoingArgSpaceVar != BAD_VAR_NUM);
+ noway_assert(compiler->lvaOutgoingArgSpaceSize >= (4 * REGSIZE_BYTES));
+
+ // If thisPtr needs to be kept alive and reported, it cannot be one of the callee trash
+ // registers that profiler callback kills.
+ if (compiler->lvaKeepAliveAndReportThis() && compiler->lvaTable[compiler->info.compThisArg].lvIsInReg())
+ {
+ regMaskTP thisPtrMask = genRegMask(compiler->lvaTable[compiler->info.compThisArg].lvRegNum);
+ noway_assert((RBM_PROFILER_LEAVE_TRASH & thisPtrMask) == 0);
+ }
+
+ // At this point return value is computed and stored in RAX or XMM0.
+ // On Amd64, Leave callback preserves the return register. We keep
+ // RAX alive by not reporting as trashed by helper call. Also note
+ // that GC cannot kick-in while executing inside profiler callback,
+ // which is a requirement of profiler as well since it needs to examine
+ // return value which could be an obj ref.
+
+ // RCX = ProfilerMethHnd
+ if (compiler->compProfilerMethHndIndirected)
+ {
+ // Profiler hooks enabled during Ngen time.
+ // Profiler handle needs to be accessed through an indirection of an address.
+ getEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd);
+ }
+ else
+ {
+ // Don't record relocations, if we are generating ELT hooks under the influence
+ // of complus_JitELtHookEnabled=1
+ if (compiler->opts.compJitELTHookEnabled)
+ {
+ genSetRegToIcon(REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd, TYP_I_IMPL);
+ }
+ else
+ {
+ instGen_Set_Reg_To_Imm(EA_8BYTE, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd);
+ }
+ }
+
+ // RDX = caller's SP
+ // TODO-AMD64-Cleanup: Once we start doing codegen after final frame layout, retain the "if" portion
+ // of the stmnts to execute unconditionally and clean-up rest.
+ if (compiler->lvaDoneFrameLayout == Compiler::FINAL_FRAME_LAYOUT)
+ {
+ // Caller's SP relative offset to FramePointer will be negative. We need to add absolute
+ // value of that offset to FramePointer to obtain caller's SP value.
+ int callerSPOffset = compiler->lvaToCallerSPRelativeOffset(0, isFramePointerUsed());
+ getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_ARG_1, genFramePointerReg(), -callerSPOffset);
+ }
+ else
+ {
+ // If we are here means that it is a tentative frame layout during which we
+ // cannot use caller's SP offset since it is an estimate. For now we require the
+ // method to have at least a single arg so that we can use it to obtain caller's
+ // SP.
+ LclVarDsc* varDsc = compiler->lvaTable;
+ NYI_IF((varDsc == nullptr) || !varDsc->lvIsParam, "Profiler ELT callback for a method without any params");
+
+ // lea rdx, [FramePointer + Arg0's offset]
+ getEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, REG_ARG_1, 0, 0);
+ }
+
+ // We can use any callee trash register (other than RAX, RCX, RDX) for call target.
+ // We use R8 here. This will emit either
+ // "call ip-relative 32-bit offset" or
+ // "mov r8, helper addr; call r8"
+ genEmitHelperCall(helper, 0, EA_UNKNOWN, REG_ARG_2);
+
+#else //!_TARGET_AMD64_
+ NYI("RyuJIT: Emit Profiler Leave callback");
+#endif // _TARGET_*
+
+#else // LEGACY_BACKEND
+
+#if defined(_TARGET_X86_)
+ //
+ // Push the profilerHandle
+ //
+
+ if (compiler->compProfilerMethHndIndirected)
+ {
+ getEmitter()->emitIns_AR_R(INS_push, EA_PTR_DSP_RELOC, REG_NA, REG_NA, (ssize_t)compiler->compProfilerMethHnd);
+ }
+ else
+ {
+ inst_IV(INS_push, (size_t)compiler->compProfilerMethHnd);
+ }
+ genSinglePush();
+
+ genEmitHelperCall(CORINFO_HELP_PROF_FCN_LEAVE,
+ sizeof(int) * 1, // argSize
+ EA_UNKNOWN); // retSize
+
+ //
+ // Adjust the number of stack slots used by this managed method if necessary.
+ //
+ if (compiler->fgPtrArgCntMax < 1)
+ {
+ compiler->fgPtrArgCntMax = 1;
+ }
+#elif defined(_TARGET_ARM_)
+ //
+ // Push the profilerHandle
+ //
+
+ // We could optimize register usage based on return value is int/long/void. But to keep it simple we will lock
+ // RBM_PROFILER_RET_USED always.
+ regNumber scratchReg = regSet.rsGrabReg(RBM_PROFILER_RET_SCRATCH);
+ noway_assert(scratchReg == REG_PROFILER_RET_SCRATCH);
+ regSet.rsLockReg(RBM_PROFILER_RET_USED);
+
+ // Contract between JIT and Profiler Leave callout on arm:
+ // Return size <= 4 bytes: REG_PROFILER_RET_SCRATCH will contain return value
+ // Return size > 4 and <= 8: <REG_PROFILER_RET_SCRATCH,r1> will contain return value.
+ // Floating point or double or HFA return values will be in s0-s15 in case of non-vararg methods.
+ // It is assumed that profiler Leave callback doesn't trash registers r1,REG_PROFILER_RET_SCRATCH and s0-s15.
+ //
+ // In the following cases r0 doesn't contain a return value and hence need not be preserved before emitting Leave
+ // callback.
+ bool r0Trashed;
+ emitAttr attr = EA_UNKNOWN;
+
+ if (compiler->info.compRetType == TYP_VOID ||
+ (!compiler->info.compIsVarArgs && !compiler->opts.compUseSoftFP && (varTypeIsFloating(compiler->info.compRetType) ||
+ compiler->IsHfa(compiler->info.compMethodInfo->args.retTypeClass))))
+ {
+ r0Trashed = false;
+ }
+ else
+ {
+ // Has a return value and r0 is in use. For emitting Leave profiler callout we would need r0 for passing
+ // profiler handle. Therefore, r0 is moved to REG_PROFILER_RETURN_SCRATCH as per contract.
+ if (RBM_ARG_0 & gcInfo.gcRegGCrefSetCur)
+ {
+ attr = EA_GCREF;
+ gcInfo.gcMarkRegSetGCref(RBM_PROFILER_RET_SCRATCH);
+ }
+ else if (RBM_ARG_0 & gcInfo.gcRegByrefSetCur)
+ {
+ attr = EA_BYREF;
+ gcInfo.gcMarkRegSetByref(RBM_PROFILER_RET_SCRATCH);
+ }
+ else
+ {
+ attr = EA_4BYTE;
+ }
+
+ getEmitter()->emitIns_R_R(INS_mov, attr, REG_PROFILER_RET_SCRATCH, REG_ARG_0);
+ regTracker.rsTrackRegTrash(REG_PROFILER_RET_SCRATCH);
+ gcInfo.gcMarkRegSetNpt(RBM_ARG_0);
+ r0Trashed = true;
+ }
+
+ if (compiler->compProfilerMethHndIndirected)
+ {
+ getEmitter()->emitIns_R_AI(INS_ldr, EA_PTR_DSP_RELOC, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd);
+ regTracker.rsTrackRegTrash(REG_ARG_0);
+ }
+ else
+ {
+ instGen_Set_Reg_To_Imm(EA_4BYTE, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd);
+ }
+
+ genEmitHelperCall(CORINFO_HELP_PROF_FCN_LEAVE,
+ 0, // argSize
+ EA_UNKNOWN); // retSize
+
+ // Restore state that existed before profiler callback
+ if (r0Trashed)
+ {
+ getEmitter()->emitIns_R_R(INS_mov, attr, REG_ARG_0, REG_PROFILER_RET_SCRATCH);
+ regTracker.rsTrackRegTrash(REG_ARG_0);
+ gcInfo.gcMarkRegSetNpt(RBM_PROFILER_RET_SCRATCH);
+ }
+
+ regSet.rsUnlockReg(RBM_PROFILER_RET_USED);
+#else // _TARGET_*
+ NYI("Pushing the profilerHandle & caller's sp for the profiler callout and locking them");
+#endif // _TARGET_*
+
+#endif // LEGACY_BACKEND
+
+ /* Restore the stack level */
+ genStackLevel = saveStackLvl2;
+}
+
+#endif // PROFILING_SUPPORTED
+
+/*****************************************************************************
+
+Esp frames :
+----------
+
+These instructions are just a reordering of the instructions used today.
+
+push ebp
+push esi
+push edi
+push ebx
+sub esp, LOCALS_SIZE / push dummyReg if LOCALS_SIZE=sizeof(void*)
+...
+add esp, LOCALS_SIZE / pop dummyReg
+pop ebx
+pop edi
+pop esi
+pop ebp
+ret
+
+Ebp frames :
+----------
+
+The epilog does "add esp, LOCALS_SIZE" instead of "mov ebp, esp".
+Everything else is similar, though in a different order.
+
+The security object will no longer be at a fixed offset. However, the
+offset can still be determined by looking up the GC-info and determining
+how many callee-saved registers are pushed.
+
+push ebp
+mov ebp, esp
+push esi
+push edi
+push ebx
+sub esp, LOCALS_SIZE / push dummyReg if LOCALS_SIZE=sizeof(void*)
+...
+add esp, LOCALS_SIZE / pop dummyReg
+pop ebx
+pop edi
+pop esi
+(mov esp, ebp if there are no callee-saved registers)
+pop ebp
+ret
+
+Double-aligned frame :
+--------------------
+
+LOCALS_SIZE_ADJUSTED needs to include an unused DWORD if an odd number
+of callee-saved registers are pushed on the stack so that the locals
+themselves are qword-aligned. The instructions are the same as today,
+just in a different order.
+
+push ebp
+mov ebp, esp
+and esp, 0xFFFFFFFC
+push esi
+push edi
+push ebx
+sub esp, LOCALS_SIZE_ADJUSTED / push dummyReg if LOCALS_SIZE=sizeof(void*)
+...
+add esp, LOCALS_SIZE_ADJUSTED / pop dummyReg
+pop ebx
+pop edi
+pop esi
+pop ebp
+mov esp, ebp
+pop ebp
+ret
+
+localloc (with ebp) frames :
+--------------------------
+
+The instructions are the same as today, just in a different order.
+Also, today the epilog does "lea esp, [ebp-LOCALS_SIZE-calleeSavedRegsPushedSize]"
+which will change to "lea esp, [ebp-calleeSavedRegsPushedSize]".
+
+push ebp
+mov ebp, esp
+push esi
+push edi
+push ebx
+sub esp, LOCALS_SIZE / push dummyReg if LOCALS_SIZE=sizeof(void*)
+...
+lea esp, [ebp-calleeSavedRegsPushedSize]
+pop ebx
+pop edi
+pop esi
+(mov esp, ebp if there are no callee-saved registers)
+pop ebp
+ret
+
+*****************************************************************************/
+
+/*****************************************************************************
+ *
+ * Generates appropriate NOP padding for a function prolog to support ReJIT.
+ */
+
+void CodeGen::genPrologPadForReJit()
+{
+ assert(compiler->compGeneratingProlog);
+
+#ifdef _TARGET_XARCH_
+ if (!(compiler->opts.eeFlags & CORJIT_FLG_PROF_REJIT_NOPS))
+ {
+ return;
+ }
+
+#if FEATURE_EH_FUNCLETS
+
+ // No need to generate pad (nops) for funclets.
+ // When compiling the main function (and not a funclet)
+ // the value of funCurrentFunc->funKind is equal to FUNC_ROOT.
+ if (compiler->funCurrentFunc()->funKind != FUNC_ROOT)
+ {
+ return;
+ }
+
+#endif // FEATURE_EH_FUNCLETS
+
+ unsigned size = getEmitter()->emitGetPrologOffsetEstimate();
+ if (size < 5)
+ {
+ instNop(5 - size);
+ }
+#endif
+}
+
+/*****************************************************************************
+ *
+ * Reserve space for a function prolog.
+ */
+
+void CodeGen::genReserveProlog(BasicBlock* block)
+{
+ assert(block != nullptr);
+
+ JITDUMP("Reserving prolog IG for block BB%02u\n", block->bbNum);
+
+ /* Nothing is live on entry to the prolog */
+
+ getEmitter()->emitCreatePlaceholderIG(IGPT_PROLOG, block, VarSetOps::MakeEmpty(compiler), 0, 0, false);
+}
+
+/*****************************************************************************
+ *
+ * Reserve space for a function epilog.
+ */
+
+void CodeGen::genReserveEpilog(BasicBlock* block)
+{
+ VARSET_TP VARSET_INIT(compiler, gcrefVarsArg, getEmitter()->emitThisGCrefVars);
+ regMaskTP gcrefRegsArg = gcInfo.gcRegGCrefSetCur;
+ regMaskTP byrefRegsArg = gcInfo.gcRegByrefSetCur;
+
+ /* The return value is special-cased: make sure it goes live for the epilog */
+
+ bool jmpEpilog = ((block->bbFlags & BBF_HAS_JMP) != 0);
+
+ if (genFullPtrRegMap && !jmpEpilog)
+ {
+ if (varTypeIsGC(compiler->info.compRetNativeType))
+ {
+ noway_assert(genTypeStSz(compiler->info.compRetNativeType) == genTypeStSz(TYP_I_IMPL));
+
+ gcInfo.gcMarkRegPtrVal(REG_INTRET, compiler->info.compRetNativeType);
+
+ switch (compiler->info.compRetNativeType)
+ {
+ case TYP_REF:
+ gcrefRegsArg |= RBM_INTRET;
+ break;
+ case TYP_BYREF:
+ byrefRegsArg |= RBM_INTRET;
+ break;
+ default:
+ break;
+ }
+ }
+ }
+
+ JITDUMP("Reserving epilog IG for block BB%02u\n", block->bbNum);
+
+ assert(block != nullptr);
+ bool last = (block->bbNext == nullptr);
+ getEmitter()->emitCreatePlaceholderIG(IGPT_EPILOG, block, gcrefVarsArg, gcrefRegsArg, byrefRegsArg, last);
+}
+
+#if FEATURE_EH_FUNCLETS
+
+/*****************************************************************************
+ *
+ * Reserve space for a funclet prolog.
+ */
+
+void CodeGen::genReserveFuncletProlog(BasicBlock* block)
+{
+ assert(block != nullptr);
+
+ /* Currently, no registers are live on entry to the prolog, except maybe
+ the exception object. There might be some live stack vars, but they
+ cannot be accessed until after the frame pointer is re-established.
+ In order to potentially prevent emitting a death before the prolog
+ and a birth right after it, we just report it as live during the
+ prolog, and rely on the prolog being non-interruptible. Trust
+ genCodeForBBlist to correctly initialize all the sets.
+
+ We might need to relax these asserts if the VM ever starts
+ restoring any registers, then we could have live-in reg vars...
+ */
+
+ noway_assert((gcInfo.gcRegGCrefSetCur & RBM_EXCEPTION_OBJECT) == gcInfo.gcRegGCrefSetCur);
+ noway_assert(gcInfo.gcRegByrefSetCur == 0);
+
+ JITDUMP("Reserving funclet prolog IG for block BB%02u\n", block->bbNum);
+
+ getEmitter()->emitCreatePlaceholderIG(IGPT_FUNCLET_PROLOG, block, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
+ gcInfo.gcRegByrefSetCur, false);
+}
+
+/*****************************************************************************
+ *
+ * Reserve space for a funclet epilog.
+ */
+
+void CodeGen::genReserveFuncletEpilog(BasicBlock* block)
+{
+ assert(block != nullptr);
+
+ JITDUMP("Reserving funclet epilog IG for block BB%02u\n", block->bbNum);
+
+ bool last = (block->bbNext == nullptr);
+ getEmitter()->emitCreatePlaceholderIG(IGPT_FUNCLET_EPILOG, block, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
+ gcInfo.gcRegByrefSetCur, last);
+}
+
+#endif // FEATURE_EH_FUNCLETS
+
+/*****************************************************************************
+ * Finalize the frame size and offset assignments.
+ *
+ * No changes can be made to the modified register set after this, since that can affect how many
+ * callee-saved registers get saved.
+ */
+void CodeGen::genFinalizeFrame()
+{
+ JITDUMP("Finalizing stack frame\n");
+
+#ifndef LEGACY_BACKEND
+ // Initializations need to happen based on the var locations at the start
+ // of the first basic block, so load those up. In particular, the determination
+ // of whether or not to use block init in the prolog is dependent on the variable
+ // locations on entry to the function.
+ compiler->m_pLinearScan->recordVarLocationsAtStartOfBB(compiler->fgFirstBB);
+#endif // !LEGACY_BACKEND
+
+ genCheckUseBlockInit();
+
+ // Set various registers as "modified" for special code generation scenarios: Edit & Continue, P/Invoke calls, etc.
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if defined(_TARGET_X86_)
+
+ if (compiler->compTailCallUsed)
+ {
+ // If we are generating a helper-based tailcall, we've set the tailcall helper "flags"
+ // argument to "1", indicating to the tailcall helper that we've saved the callee-saved
+ // registers (ebx, esi, edi). So, we need to make sure all the callee-saved registers
+ // actually get saved.
+
+ regSet.rsSetRegsModified(RBM_INT_CALLEE_SAVED);
+ }
+#endif // _TARGET_X86_
+
+#if defined(_TARGET_ARMARCH_)
+ // We need to determine if we will change SP larger than a specific amount to determine if we want to use a loop
+ // to touch stack pages, that will require multiple registers. See genAllocLclFrame() for details.
+ if (compiler->compLclFrameSize >= compiler->getVeryLargeFrameSize())
+ {
+ regSet.rsSetRegsModified(VERY_LARGE_FRAME_SIZE_REG_MASK);
+ }
+#endif // defined(_TARGET_ARMARCH_)
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Modified regs: ");
+ dspRegMask(regSet.rsGetModifiedRegsMask());
+ printf("\n");
+ }
+#endif // DEBUG
+
+ // Set various registers as "modified" for special code generation scenarios: Edit & Continue, P/Invoke calls, etc.
+ if (compiler->opts.compDbgEnC)
+ {
+ // We always save FP.
+ noway_assert(isFramePointerUsed());
+#ifdef _TARGET_AMD64_
+ // On x64 we always save exactly RBP, RSI and RDI for EnC.
+ regMaskTP okRegs = (RBM_CALLEE_TRASH | RBM_FPBASE | RBM_RSI | RBM_RDI);
+ regSet.rsSetRegsModified(RBM_RSI | RBM_RDI);
+ noway_assert((regSet.rsGetModifiedRegsMask() & ~okRegs) == 0);
+#else // !_TARGET_AMD64_
+ // On x86 we save all callee saved regs so the saved reg area size is consistent
+ regSet.rsSetRegsModified(RBM_INT_CALLEE_SAVED & ~RBM_FPBASE);
+#endif // !_TARGET_AMD64_
+ }
+
+ /* If we have any pinvoke calls, we might potentially trash everything */
+ if (compiler->info.compCallUnmanaged)
+ {
+ noway_assert(isFramePointerUsed()); // Setup of Pinvoke frame currently requires an EBP style frame
+ regSet.rsSetRegsModified(RBM_INT_CALLEE_SAVED & ~RBM_FPBASE);
+ }
+
+ /* Count how many callee-saved registers will actually be saved (pushed) */
+
+ // EBP cannot be (directly) modified for EBP frame and double-aligned frames
+ noway_assert(!doubleAlignOrFramePointerUsed() || !regSet.rsRegsModified(RBM_FPBASE));
+
+#if ETW_EBP_FRAMED
+ // EBP cannot be (directly) modified
+ noway_assert(!regSet.rsRegsModified(RBM_FPBASE));
+#endif
+
+ regMaskTP maskCalleeRegsPushed = regSet.rsGetModifiedRegsMask() & RBM_CALLEE_SAVED;
+
+#ifdef _TARGET_ARMARCH_
+ if (isFramePointerUsed())
+ {
+ // For a FP based frame we have to push/pop the FP register
+ //
+ maskCalleeRegsPushed |= RBM_FPBASE;
+
+ // This assert check that we are not using REG_FP
+ // as both the frame pointer and as a codegen register
+ //
+ assert(!regSet.rsRegsModified(RBM_FPBASE));
+ }
+
+ // we always push LR. See genPushCalleeSavedRegisters
+ //
+ maskCalleeRegsPushed |= RBM_LR;
+
+#if defined(_TARGET_ARM_)
+ // TODO-ARM64-Bug?: enable some variant of this for FP on ARM64?
+ regMaskTP maskPushRegsFloat = maskCalleeRegsPushed & RBM_ALLFLOAT;
+ regMaskTP maskPushRegsInt = maskCalleeRegsPushed & ~maskPushRegsFloat;
+
+ if ((maskPushRegsFloat != RBM_NONE) ||
+ (compiler->opts.MinOpts() && (regSet.rsMaskResvd & maskCalleeRegsPushed & RBM_OPT_RSVD)))
+ {
+ // Here we try to keep stack double-aligned before the vpush
+ if ((genCountBits(regSet.rsMaskPreSpillRegs(true) | maskPushRegsInt) % 2) != 0)
+ {
+ regNumber extraPushedReg = REG_R4;
+ while (maskPushRegsInt & genRegMask(extraPushedReg))
+ {
+ extraPushedReg = REG_NEXT(extraPushedReg);
+ }
+ if (extraPushedReg < REG_R11)
+ {
+ maskPushRegsInt |= genRegMask(extraPushedReg);
+ regSet.rsSetRegsModified(genRegMask(extraPushedReg));
+ }
+ }
+ maskCalleeRegsPushed = maskPushRegsInt | maskPushRegsFloat;
+ }
+
+ // We currently only expect to push/pop consecutive FP registers
+ // and these have to be double-sized registers as well.
+ // Here we will insure that maskPushRegsFloat obeys these requirements.
+ //
+ if (maskPushRegsFloat != RBM_NONE)
+ {
+ regMaskTP contiguousMask = genRegMaskFloat(REG_F16, TYP_DOUBLE);
+ while (maskPushRegsFloat > contiguousMask)
+ {
+ contiguousMask <<= 2;
+ contiguousMask |= genRegMaskFloat(REG_F16, TYP_DOUBLE);
+ }
+ if (maskPushRegsFloat != contiguousMask)
+ {
+ regMaskTP maskExtraRegs = contiguousMask - maskPushRegsFloat;
+ maskPushRegsFloat |= maskExtraRegs;
+ regSet.rsSetRegsModified(maskExtraRegs);
+ maskCalleeRegsPushed |= maskExtraRegs;
+ }
+ }
+#endif // _TARGET_ARM_
+#endif // _TARGET_ARMARCH_
+
+#if defined(_TARGET_XARCH_) && !FEATURE_STACK_FP_X87
+ // Compute the count of callee saved float regs saved on stack.
+ // On Amd64 we push only integer regs. Callee saved float (xmm6-xmm15)
+ // regs are stack allocated and preserved in their stack locations.
+ compiler->compCalleeFPRegsSavedMask = maskCalleeRegsPushed & RBM_FLT_CALLEE_SAVED;
+ maskCalleeRegsPushed &= ~RBM_FLT_CALLEE_SAVED;
+#endif // defined(_TARGET_XARCH_) && !FEATURE_STACK_FP_X87
+
+ compiler->compCalleeRegsPushed = genCountBits(maskCalleeRegsPushed);
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Callee-saved registers pushed: %d ", compiler->compCalleeRegsPushed);
+ dspRegMask(maskCalleeRegsPushed);
+ printf("\n");
+ }
+#endif // DEBUG
+
+ /* Assign the final offsets to things living on the stack frame */
+
+ compiler->lvaAssignFrameOffsets(Compiler::FINAL_FRAME_LAYOUT);
+
+ /* We want to make sure that the prolog size calculated here is accurate
+ (that is instructions will not shrink because of conservative stack
+ frame approximations). We do this by filling in the correct size
+ here (where we have committed to the final numbers for the frame offsets)
+ This will ensure that the prolog size is always correct
+ */
+ getEmitter()->emitMaxTmpSize = compiler->tmpSize;
+
+#ifdef DEBUG
+ if (compiler->opts.dspCode || compiler->opts.disAsm || compiler->opts.disAsm2 || verbose)
+ {
+ compiler->lvaTableDump();
+ }
+#endif
+}
+
+//------------------------------------------------------------------------
+// genEstablishFramePointer: Set up the frame pointer by adding an offset to the stack pointer.
+//
+// Arguments:
+// delta - the offset to add to the current stack pointer to establish the frame pointer
+// reportUnwindData - true if establishing the frame pointer should be reported in the OS unwind data.
+
+void CodeGen::genEstablishFramePointer(int delta, bool reportUnwindData)
+{
+ assert(compiler->compGeneratingProlog);
+
+#if defined(_TARGET_XARCH_)
+
+ if (delta == 0)
+ {
+ getEmitter()->emitIns_R_R(INS_mov, EA_PTRSIZE, REG_FPBASE, REG_SPBASE);
+ psiMoveESPtoEBP();
+ }
+ else
+ {
+ getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_FPBASE, REG_SPBASE, delta);
+ // We don't update prolog scope info (there is no function to handle lea), but that is currently dead code
+ // anyway.
+ }
+
+ if (reportUnwindData)
+ {
+ compiler->unwindSetFrameReg(REG_FPBASE, delta);
+ }
+
+#elif defined(_TARGET_ARM_)
+
+ assert(arm_Valid_Imm_For_Add_SP(delta));
+ getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, REG_FPBASE, REG_SPBASE, delta);
+
+ if (reportUnwindData)
+ {
+ compiler->unwindPadding();
+ }
+
+#else
+ NYI("establish frame pointer");
+#endif
+}
+
+/*****************************************************************************
+ *
+ * Generates code for a function prolog.
+ *
+ * NOTE REGARDING CHANGES THAT IMPACT THE DEBUGGER:
+ *
+ * The debugger relies on decoding ARM instructions to be able to successfully step through code. It does not
+ * implement decoding all ARM instructions. It only implements decoding the instructions which the JIT emits, and
+ * only instructions which result in control not going to the next instruction. Basically, any time execution would
+ * not continue at the next instruction (such as B, BL, BX, BLX, POP{pc}, etc.), the debugger has to be able to
+ * decode that instruction. If any of this is changed on ARM, the debugger team needs to be notified so that it
+ * can ensure stepping isn't broken. This is also a requirement for x86 and amd64.
+ *
+ * If any changes are made in the prolog, epilog, calls, returns, and branches, it is a good idea to notify the
+ * debugger team to ensure that stepping still works.
+ *
+ * ARM stepping code is here: debug\ee\arm\armwalker.cpp, vm\arm\armsinglestepper.cpp.
+ */
+
+#ifdef _PREFAST_
+#pragma warning(push)
+#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
+#endif
+void CodeGen::genFnProlog()
+{
+ ScopedSetVariable<bool> _setGeneratingProlog(&compiler->compGeneratingProlog, true);
+
+ compiler->funSetCurrentFunc(0);
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("*************** In genFnProlog()\n");
+ }
+#endif
+
+#ifdef DEBUG
+ genInterruptibleUsed = true;
+#endif
+
+#ifdef LEGACY_BACKEND
+ genFinalizeFrame();
+#endif // LEGACY_BACKEND
+
+ assert(compiler->lvaDoneFrameLayout == Compiler::FINAL_FRAME_LAYOUT);
+
+ /* Ready to start on the prolog proper */
+
+ getEmitter()->emitBegProlog();
+ compiler->unwindBegProlog();
+
+#ifdef DEBUGGING_SUPPORT
+ // Do this so we can put the prolog instruction group ahead of
+ // other instruction groups
+ genIPmappingAddToFront((IL_OFFSETX)ICorDebugInfo::PROLOG);
+#endif // DEBUGGING_SUPPORT
+
+#ifdef DEBUG
+ if (compiler->opts.dspCode)
+ {
+ printf("\n__prolog:\n");
+ }
+#endif
+
+#ifdef DEBUGGING_SUPPORT
+ if (compiler->opts.compScopeInfo && (compiler->info.compVarScopesCount > 0))
+ {
+ // Create new scopes for the method-parameters for the prolog-block.
+ psiBegProlog();
+ }
+#endif
+
+#ifdef DEBUG
+
+ if (compiler->compJitHaltMethod())
+ {
+ /* put a nop first because the debugger and other tools are likely to
+ put an int3 at the begining and we don't want to confuse them */
+
+ instGen(INS_nop);
+ instGen(INS_BREAKPOINT);
+
+#ifdef _TARGET_ARMARCH_
+ // Avoid asserts in the unwind info because these instructions aren't accounted for.
+ compiler->unwindPadding();
+#endif // _TARGET_ARMARCH_
+ }
+#endif // DEBUG
+
+#if FEATURE_EH_FUNCLETS && defined(DEBUG)
+
+ // We cannot force 0-initialization of the PSPSym
+ // as it will overwrite the real value
+ if (compiler->lvaPSPSym != BAD_VAR_NUM)
+ {
+ LclVarDsc* varDsc = &compiler->lvaTable[compiler->lvaPSPSym];
+ assert(!varDsc->lvMustInit);
+ }
+
+#endif // FEATURE_EH_FUNCLETS && DEBUG
+
+ /*-------------------------------------------------------------------------
+ *
+ * Record the stack frame ranges that will cover all of the tracked
+ * and untracked pointer variables.
+ * Also find which registers will need to be zero-initialized.
+ *
+ * 'initRegs': - Generally, enregistered variables should not need to be
+ * zero-inited. They only need to be zero-inited when they
+ * have a possibly uninitialized read on some control
+ * flow path. Apparently some of the IL_STUBs that we
+ * generate have this property.
+ */
+
+ int untrLclLo = +INT_MAX;
+ int untrLclHi = -INT_MAX;
+ // 'hasUntrLcl' is true if there are any stack locals which must be init'ed.
+ // Note that they may be tracked, but simply not allocated to a register.
+ bool hasUntrLcl = false;
+
+ int GCrefLo = +INT_MAX;
+ int GCrefHi = -INT_MAX;
+ bool hasGCRef = false;
+
+ regMaskTP initRegs = RBM_NONE; // Registers which must be init'ed.
+ regMaskTP initFltRegs = RBM_NONE; // FP registers which must be init'ed.
+ regMaskTP initDblRegs = RBM_NONE;
+
+ unsigned varNum;
+ LclVarDsc* varDsc;
+
+ for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++)
+ {
+ if (varDsc->lvIsParam && !varDsc->lvIsRegArg)
+ {
+ continue;
+ }
+
+ if (!varDsc->lvIsInReg() && !varDsc->lvOnFrame)
+ {
+ noway_assert(varDsc->lvRefCnt == 0);
+ continue;
+ }
+
+ signed int loOffs = varDsc->lvStkOffs;
+ signed int hiOffs = varDsc->lvStkOffs + compiler->lvaLclSize(varNum);
+
+ /* We need to know the offset range of tracked stack GC refs */
+ /* We assume that the GC reference can be anywhere in the TYP_STRUCT */
+
+ if (compiler->lvaTypeIsGC(varNum) && varDsc->lvTrackedNonStruct() && varDsc->lvOnFrame)
+ {
+ // For fields of PROMOTION_TYPE_DEPENDENT type of promotion, they should have been
+ // taken care of by the parent struct.
+ if (!compiler->lvaIsFieldOfDependentlyPromotedStruct(varDsc))
+ {
+ hasGCRef = true;
+
+ if (loOffs < GCrefLo)
+ {
+ GCrefLo = loOffs;
+ }
+ if (hiOffs > GCrefHi)
+ {
+ GCrefHi = hiOffs;
+ }
+ }
+ }
+
+ /* For lvMustInit vars, gather pertinent info */
+
+ if (!varDsc->lvMustInit)
+ {
+ continue;
+ }
+
+ if (varDsc->lvIsInReg())
+ {
+ regMaskTP regMask = genRegMask(varDsc->lvRegNum);
+ if (!varDsc->IsFloatRegType())
+ {
+ initRegs |= regMask;
+
+ if (varTypeIsMultiReg(varDsc))
+ {
+ if (varDsc->lvOtherReg != REG_STK)
+ {
+ initRegs |= genRegMask(varDsc->lvOtherReg);
+ }
+ else
+ {
+ /* Upper DWORD is on the stack, and needs to be inited */
+
+ loOffs += sizeof(int);
+ goto INIT_STK;
+ }
+ }
+ }
+#if !FEATURE_STACK_FP_X87
+ else if (varDsc->TypeGet() == TYP_DOUBLE)
+ {
+ initDblRegs |= regMask;
+ }
+ else
+ {
+ initFltRegs |= regMask;
+ }
+#endif // !FEATURE_STACK_FP_X87
+ }
+ else
+ {
+ INIT_STK:
+
+ hasUntrLcl = true;
+
+ if (loOffs < untrLclLo)
+ {
+ untrLclLo = loOffs;
+ }
+ if (hiOffs > untrLclHi)
+ {
+ untrLclHi = hiOffs;
+ }
+ }
+ }
+
+ /* Don't forget about spill temps that hold pointers */
+
+ if (!TRACK_GC_TEMP_LIFETIMES)
+ {
+ assert(compiler->tmpAllFree());
+ for (TempDsc* tempThis = compiler->tmpListBeg(); tempThis != nullptr; tempThis = compiler->tmpListNxt(tempThis))
+ {
+ if (!varTypeIsGC(tempThis->tdTempType()))
+ {
+ continue;
+ }
+
+ signed int loOffs = tempThis->tdTempOffs();
+ signed int hiOffs = loOffs + TARGET_POINTER_SIZE;
+
+ // If there is a frame pointer used, due to frame pointer chaining it will point to the stored value of the
+ // previous frame pointer. Thus, stkOffs can't be zero.
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if !defined(_TARGET_AMD64_)
+ // However, on amd64 there is no requirement to chain frame pointers.
+
+ noway_assert(!isFramePointerUsed() || loOffs != 0);
+#endif // !defined(_TARGET_AMD64_)
+ // printf(" Untracked tmp at [EBP-%04X]\n", -stkOffs);
+
+ hasUntrLcl = true;
+
+ if (loOffs < untrLclLo)
+ {
+ untrLclLo = loOffs;
+ }
+ if (hiOffs > untrLclHi)
+ {
+ untrLclHi = hiOffs;
+ }
+ }
+ }
+
+ assert((genInitStkLclCnt > 0) == hasUntrLcl);
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ if (genInitStkLclCnt > 0)
+ {
+ printf("Found %u lvMustInit stk vars, frame offsets %d through %d\n", genInitStkLclCnt, -untrLclLo,
+ -untrLclHi);
+ }
+ }
+#endif
+
+#ifdef _TARGET_ARM_
+ // On the ARM we will spill any incoming struct args in the first instruction in the prolog
+ // Ditto for all enregistered user arguments in a varargs method.
+ // These registers will be available to use for the initReg. We just remove
+ // all of these registers from the rsCalleeRegArgMaskLiveIn.
+ //
+ intRegState.rsCalleeRegArgMaskLiveIn &= ~regSet.rsMaskPreSpillRegs(false);
+#endif
+
+ /* Choose the register to use for zero initialization */
+
+ regNumber initReg = REG_SCRATCH; // Unless we find a better register below
+ bool initRegZeroed = false;
+ regMaskTP excludeMask = intRegState.rsCalleeRegArgMaskLiveIn;
+ regMaskTP tempMask;
+
+ // We should not use the special PINVOKE registers as the initReg
+ // since they are trashed by the jithelper call to setup the PINVOKE frame
+ if (compiler->info.compCallUnmanaged)
+ {
+ excludeMask |= RBM_PINVOKE_FRAME;
+
+ assert((!compiler->opts.ShouldUsePInvokeHelpers()) || (compiler->info.compLvFrameListRoot == BAD_VAR_NUM));
+ if (!compiler->opts.ShouldUsePInvokeHelpers())
+ {
+ noway_assert(compiler->info.compLvFrameListRoot < compiler->lvaCount);
+
+ excludeMask |= (RBM_PINVOKE_TCB | RBM_PINVOKE_SCRATCH);
+
+ // We also must exclude the register used by compLvFrameListRoot when it is enregistered
+ //
+ LclVarDsc* varDsc = &compiler->lvaTable[compiler->info.compLvFrameListRoot];
+ if (varDsc->lvRegister)
+ {
+ excludeMask |= genRegMask(varDsc->lvRegNum);
+ }
+ }
+ }
+
+#ifdef _TARGET_ARM_
+ // If we have a variable sized frame (compLocallocUsed is true)
+ // then using REG_SAVED_LOCALLOC_SP in the prolog is not allowed
+ if (compiler->compLocallocUsed)
+ {
+ excludeMask |= RBM_SAVED_LOCALLOC_SP;
+ }
+#endif // _TARGET_ARM_
+
+#if defined(_TARGET_XARCH_)
+ if (compiler->compLclFrameSize >= compiler->getVeryLargeFrameSize())
+ {
+ // We currently must use REG_EAX on x86 here
+ // because the loop's backwards branch depends upon the size of EAX encodings
+ assert(initReg == REG_EAX);
+ }
+ else
+#endif // _TARGET_XARCH_
+ {
+ tempMask = initRegs & ~excludeMask & ~regSet.rsMaskResvd;
+
+ if (tempMask != RBM_NONE)
+ {
+ // We will use one of the registers that we were planning to zero init anyway.
+ // We pick the lowest register number.
+ tempMask = genFindLowestBit(tempMask);
+ initReg = genRegNumFromMask(tempMask);
+ }
+ // Next we prefer to use one of the unused argument registers.
+ // If they aren't available we use one of the caller-saved integer registers.
+ else
+ {
+ tempMask = regSet.rsGetModifiedRegsMask() & RBM_ALLINT & ~excludeMask & ~regSet.rsMaskResvd;
+ if (tempMask != RBM_NONE)
+ {
+ // We pick the lowest register number
+ tempMask = genFindLowestBit(tempMask);
+ initReg = genRegNumFromMask(tempMask);
+ }
+ }
+ }
+
+ noway_assert(!compiler->info.compCallUnmanaged || (initReg != REG_PINVOKE_FRAME));
+
+#if defined(_TARGET_AMD64_)
+ // If we are a varargs call, in order to set up the arguments correctly this
+ // must be done in a 2 step process. As per the x64 ABI:
+ // a) The caller sets up the argument shadow space (just before the return
+ // address, 4 pointer sized slots).
+ // b) The callee is responsible to home the arguments on the shadow space
+ // provided by the caller.
+ // This way, the varargs iterator will be able to retrieve the
+ // call arguments properly since both the arg regs and the stack allocated
+ // args will be contiguous.
+ if (compiler->info.compIsVarArgs)
+ {
+ getEmitter()->spillIntArgRegsToShadowSlots();
+ }
+
+#endif // _TARGET_AMD64_
+
+#ifdef _TARGET_ARM_
+ /*-------------------------------------------------------------------------
+ *
+ * Now start emitting the part of the prolog which sets up the frame
+ */
+
+ if (regSet.rsMaskPreSpillRegs(true) != RBM_NONE)
+ {
+ inst_IV(INS_push, (int)regSet.rsMaskPreSpillRegs(true));
+ compiler->unwindPushMaskInt(regSet.rsMaskPreSpillRegs(true));
+ }
+#endif // _TARGET_ARM_
+
+#ifdef _TARGET_XARCH_
+ if (doubleAlignOrFramePointerUsed())
+ {
+ inst_RV(INS_push, REG_FPBASE, TYP_REF);
+ compiler->unwindPush(REG_FPBASE);
+ psiAdjustStackLevel(REGSIZE_BYTES);
+
+#ifndef _TARGET_AMD64_ // On AMD64, establish the frame pointer after the "sub rsp"
+ genEstablishFramePointer(0, /*reportUnwindData*/ true);
+#endif // !_TARGET_AMD64_
+
+#if DOUBLE_ALIGN
+ if (compiler->genDoubleAlign())
+ {
+ noway_assert(isFramePointerUsed() == false);
+ noway_assert(!regSet.rsRegsModified(RBM_FPBASE)); /* Trashing EBP is out. */
+
+ inst_RV_IV(INS_AND, REG_SPBASE, -8, EA_PTRSIZE);
+ }
+#endif // DOUBLE_ALIGN
+ }
+#endif // _TARGET_XARCH_
+
+#ifdef _TARGET_ARM64_
+ // Probe large frames now, if necessary, since genPushCalleeSavedRegisters() will allocate the frame.
+ genAllocLclFrame(compiler->compLclFrameSize, initReg, &initRegZeroed, intRegState.rsCalleeRegArgMaskLiveIn);
+ genPushCalleeSavedRegisters(initReg, &initRegZeroed);
+#else // !_TARGET_ARM64_
+ genPushCalleeSavedRegisters();
+#endif // !_TARGET_ARM64_
+
+#ifdef _TARGET_ARM_
+ bool needToEstablishFP = false;
+ int afterLclFrameSPtoFPdelta = 0;
+ if (doubleAlignOrFramePointerUsed())
+ {
+ needToEstablishFP = true;
+
+ // If the local frame is small enough, we establish the frame pointer after the OS-reported prolog.
+ // This makes the prolog and epilog match, giving us smaller unwind data. If the frame size is
+ // too big, we go ahead and do it here.
+
+ int SPtoFPdelta = (compiler->compCalleeRegsPushed - 2) * REGSIZE_BYTES;
+ afterLclFrameSPtoFPdelta = SPtoFPdelta + compiler->compLclFrameSize;
+ if (!arm_Valid_Imm_For_Add_SP(afterLclFrameSPtoFPdelta))
+ {
+ // Oh well, it looks too big. Go ahead and establish the frame pointer here.
+ genEstablishFramePointer(SPtoFPdelta, /*reportUnwindData*/ true);
+ needToEstablishFP = false;
+ }
+ }
+#endif // _TARGET_ARM_
+
+ //-------------------------------------------------------------------------
+ //
+ // Subtract the local frame size from SP.
+ //
+ //-------------------------------------------------------------------------
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifndef _TARGET_ARM64_
+ regMaskTP maskStackAlloc = RBM_NONE;
+
+#ifdef _TARGET_ARM_
+ maskStackAlloc =
+ genStackAllocRegisterMask(compiler->compLclFrameSize, regSet.rsGetModifiedRegsMask() & RBM_FLT_CALLEE_SAVED);
+#endif // _TARGET_ARM_
+
+ if (maskStackAlloc == RBM_NONE)
+ {
+ genAllocLclFrame(compiler->compLclFrameSize, initReg, &initRegZeroed, intRegState.rsCalleeRegArgMaskLiveIn);
+ }
+#endif // !_TARGET_ARM64_
+
+//-------------------------------------------------------------------------
+
+#ifdef _TARGET_ARM_
+ if (compiler->compLocallocUsed)
+ {
+ getEmitter()->emitIns_R_R(INS_mov, EA_4BYTE, REG_SAVED_LOCALLOC_SP, REG_SPBASE);
+ regTracker.rsTrackRegTrash(REG_SAVED_LOCALLOC_SP);
+ compiler->unwindSetFrameReg(REG_SAVED_LOCALLOC_SP, 0);
+ }
+#endif // _TARGET_ARMARCH_
+
+#if defined(_TARGET_XARCH_) && !FEATURE_STACK_FP_X87
+ // Preserve callee saved float regs to stack.
+ genPreserveCalleeSavedFltRegs(compiler->compLclFrameSize);
+#endif // defined(_TARGET_XARCH_) && !FEATURE_STACK_FP_X87
+
+#ifdef _TARGET_AMD64_
+ // Establish the AMD64 frame pointer after the OS-reported prolog.
+ if (doubleAlignOrFramePointerUsed())
+ {
+ bool reportUnwindData = compiler->compLocallocUsed || compiler->opts.compDbgEnC;
+ genEstablishFramePointer(compiler->codeGen->genSPtoFPdelta(), reportUnwindData);
+ }
+#endif //_TARGET_AMD64_
+
+//-------------------------------------------------------------------------
+//
+// This is the end of the OS-reported prolog for purposes of unwinding
+//
+//-------------------------------------------------------------------------
+
+#ifdef _TARGET_ARM_
+ if (needToEstablishFP)
+ {
+ genEstablishFramePointer(afterLclFrameSPtoFPdelta, /*reportUnwindData*/ false);
+ needToEstablishFP = false; // nobody uses this later, but set it anyway, just to be explicit
+ }
+#endif // _TARGET_ARM_
+
+ if (compiler->info.compPublishStubParam)
+ {
+#if CPU_LOAD_STORE_ARCH
+ getEmitter()->emitIns_R_R_I(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SECRET_STUB_PARAM, genFramePointerReg(),
+ compiler->lvaTable[compiler->lvaStubArgumentVar].lvStkOffs);
+#else
+ // mov [lvaStubArgumentVar], EAX
+ getEmitter()->emitIns_AR_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SECRET_STUB_PARAM, genFramePointerReg(),
+ compiler->lvaTable[compiler->lvaStubArgumentVar].lvStkOffs);
+#endif
+ assert(intRegState.rsCalleeRegArgMaskLiveIn & RBM_SECRET_STUB_PARAM);
+
+ // It's no longer live; clear it out so it can be used after this in the prolog
+ intRegState.rsCalleeRegArgMaskLiveIn &= ~RBM_SECRET_STUB_PARAM;
+ }
+
+#if STACK_PROBES
+ // We could probably fold this into the loop for the FrameSize >= 0x3000 probing
+ // when creating the stack frame. Don't think it's worth it, though.
+ if (genNeedPrologStackProbe)
+ {
+ //
+ // Can't have a call until we have enough padding for rejit
+ //
+ genPrologPadForReJit();
+ noway_assert(compiler->opts.compNeedStackProbes);
+ genGenerateStackProbe();
+ compiler->compStackProbePrologDone = true;
+ }
+#endif // STACK_PROBES
+
+ //
+ // Zero out the frame as needed
+ //
+
+ genZeroInitFrame(untrLclHi, untrLclLo, initReg, &initRegZeroed);
+
+#if FEATURE_EH_FUNCLETS
+
+ genSetPSPSym(initReg, &initRegZeroed);
+
+#else // !FEATURE_EH_FUNCLETS
+
+ // when compInitMem is true the genZeroInitFrame will zero out the shadow SP slots
+ if (compiler->ehNeedsShadowSPslots() && !compiler->info.compInitMem)
+ {
+ /*
+ // size/speed option?
+ getEmitter()->emitIns_I_ARR(INS_mov, EA_PTRSIZE, 0,
+ REG_EBP, REG_NA, -compiler->lvaShadowSPfirstOffs);
+ */
+
+ // The last slot is reserved for ICodeManager::FixContext(ppEndRegion)
+ unsigned filterEndOffsetSlotOffs = compiler->lvaLclSize(compiler->lvaShadowSPslotsVar) - (sizeof(void*));
+
+ // Zero out the slot for nesting level 0
+ unsigned firstSlotOffs = filterEndOffsetSlotOffs - (sizeof(void*));
+
+ if (!initRegZeroed)
+ {
+ instGen_Set_Reg_To_Zero(EA_PTRSIZE, initReg);
+ initRegZeroed = true;
+ }
+
+ getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, initReg, compiler->lvaShadowSPslotsVar,
+ firstSlotOffs);
+ }
+
+#endif // !FEATURE_EH_FUNCLETS
+
+ genReportGenericContextArg(initReg, &initRegZeroed);
+
+#if defined(LEGACY_BACKEND) // in RyuJIT backend this has already been expanded into trees
+ if (compiler->info.compCallUnmanaged)
+ {
+ getEmitter()->emitDisableRandomNops();
+ initRegs = genPInvokeMethodProlog(initRegs);
+ getEmitter()->emitEnableRandomNops();
+ }
+#endif // defined(LEGACY_BACKEND)
+
+ // The local variable representing the security object must be on the stack frame
+ // and must be 0 initialized.
+ noway_assert((compiler->lvaSecurityObject == BAD_VAR_NUM) ||
+ (compiler->lvaTable[compiler->lvaSecurityObject].lvOnFrame &&
+ compiler->lvaTable[compiler->lvaSecurityObject].lvMustInit));
+
+ // Initialize any "hidden" slots/locals
+
+ if (compiler->compLocallocUsed)
+ {
+ noway_assert(compiler->lvaLocAllocSPvar != BAD_VAR_NUM);
+#ifdef _TARGET_ARM64_
+ getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_FPBASE, compiler->lvaLocAllocSPvar, 0);
+#else
+ getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaLocAllocSPvar, 0);
+#endif
+ }
+
+ // Set up the GS security cookie
+
+ genSetGSSecurityCookie(initReg, &initRegZeroed);
+
+#ifdef PROFILING_SUPPORTED
+
+ // Insert a function entry callback for profiling, if requested.
+ genProfilingEnterCallback(initReg, &initRegZeroed);
+
+#endif // PROFILING_SUPPORTED
+
+ if (!genInterruptible)
+ {
+ /*-------------------------------------------------------------------------
+ *
+ * The 'real' prolog ends here for non-interruptible methods.
+ * For fully-interruptible methods, we extend the prolog so that
+ * we do not need to track GC inforation while shuffling the
+ * arguments.
+ *
+ * Make sure there's enough padding for ReJIT.
+ *
+ */
+ genPrologPadForReJit();
+ getEmitter()->emitMarkPrologEnd();
+ }
+
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) && defined(FEATURE_SIMD)
+ // The unused bits of Vector3 arguments must be cleared
+ // since native compiler doesn't initize the upper bits to zeros.
+ //
+ // TODO-Cleanup: This logic can be implemented in
+ // genFnPrologCalleeRegArgs() for argument registers and
+ // genEnregisterIncomingStackArgs() for stack arguments.
+ genClearStackVec3ArgUpperBits();
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING && FEATURE_SIMD
+
+ /*-----------------------------------------------------------------------------
+ * Take care of register arguments first
+ */
+
+ RegState* regState;
+
+#ifndef LEGACY_BACKEND
+ // Update the arg initial register locations.
+ compiler->lvaUpdateArgsWithInitialReg();
+#endif // !LEGACY_BACKEND
+
+ FOREACH_REGISTER_FILE(regState)
+ {
+ if (regState->rsCalleeRegArgMaskLiveIn)
+ {
+ // If we need an extra register to shuffle around the incoming registers
+ // we will use xtraReg (initReg) and set the xtraRegClobbered flag,
+ // if we don't need to use the xtraReg then this flag will stay false
+ //
+ regNumber xtraReg;
+ bool xtraRegClobbered = false;
+
+ if (genRegMask(initReg) & RBM_ARG_REGS)
+ {
+ xtraReg = initReg;
+ }
+ else
+ {
+ xtraReg = REG_SCRATCH;
+ initRegZeroed = false;
+ }
+
+ genFnPrologCalleeRegArgs(xtraReg, &xtraRegClobbered, regState);
+
+ if (xtraRegClobbered)
+ {
+ initRegZeroed = false;
+ }
+ }
+ }
+
+ // Home the incoming arguments
+ genEnregisterIncomingStackArgs();
+
+ /* Initialize any must-init registers variables now */
+
+ if (initRegs)
+ {
+ regMaskTP regMask = 0x1;
+
+ for (regNumber reg = REG_INT_FIRST; reg <= REG_INT_LAST; reg = REG_NEXT(reg), regMask <<= 1)
+ {
+ if (regMask & initRegs)
+ {
+ // Check if we have already zeroed this register
+ if ((reg == initReg) && initRegZeroed)
+ {
+ continue;
+ }
+ else
+ {
+ instGen_Set_Reg_To_Zero(EA_PTRSIZE, reg);
+ if (reg == initReg)
+ {
+ initRegZeroed = true;
+ }
+ }
+ }
+ }
+ }
+
+#if !FEATURE_STACK_FP_X87
+ if (initFltRegs | initDblRegs)
+ {
+ // If initReg is not in initRegs then we will use REG_SCRATCH
+ if ((genRegMask(initReg) & initRegs) == 0)
+ {
+ initReg = REG_SCRATCH;
+ initRegZeroed = false;
+ }
+
+#ifdef _TARGET_ARM_
+ // This is needed only for Arm since it can use a zero initialized int register
+ // to initialize vfp registers.
+ if (!initRegZeroed)
+ {
+ instGen_Set_Reg_To_Zero(EA_PTRSIZE, initReg);
+ initRegZeroed = true;
+ }
+#endif // _TARGET_ARM_
+
+ genZeroInitFltRegs(initFltRegs, initDblRegs, initReg);
+ }
+#endif // !FEATURE_STACK_FP_X87
+
+#if FEATURE_STACK_FP_X87
+ //
+ // Here is where we load the enregistered floating point arguments
+ // and locals onto the x86-FPU.
+ //
+ genCodeForPrologStackFP();
+#endif
+
+ //-----------------------------------------------------------------------------
+
+ //
+ // Increase the prolog size here only if fully interruptible.
+ // And again make sure it's big enough for ReJIT
+ //
+
+ if (genInterruptible)
+ {
+ genPrologPadForReJit();
+ getEmitter()->emitMarkPrologEnd();
+ }
+
+#ifdef DEBUGGING_SUPPORT
+ if (compiler->opts.compScopeInfo && (compiler->info.compVarScopesCount > 0))
+ {
+ psiEndProlog();
+ }
+#endif
+
+ if (hasGCRef)
+ {
+ getEmitter()->emitSetFrameRangeGCRs(GCrefLo, GCrefHi);
+ }
+ else
+ {
+ noway_assert(GCrefLo == +INT_MAX);
+ noway_assert(GCrefHi == -INT_MAX);
+ }
+
+#ifdef DEBUG
+ if (compiler->opts.dspCode)
+ {
+ printf("\n");
+ }
+#endif
+
+#ifdef _TARGET_X86_
+ // On non-x86 the VARARG cookie does not need any special treatment.
+
+ // Load up the VARARG argument pointer register so it doesn't get clobbered.
+ // only do this if we actually access any statically declared args
+ // (our argument pointer register has a refcount > 0).
+ unsigned argsStartVar = compiler->lvaVarargsBaseOfStkArgs;
+
+ if (compiler->info.compIsVarArgs && compiler->lvaTable[argsStartVar].lvRefCnt > 0)
+ {
+ varDsc = &compiler->lvaTable[argsStartVar];
+
+ noway_assert(compiler->info.compArgsCount > 0);
+
+ // MOV EAX, <VARARGS HANDLE>
+ getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_EAX, compiler->info.compArgsCount - 1, 0);
+ regTracker.rsTrackRegTrash(REG_EAX);
+
+ // MOV EAX, [EAX]
+ getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_EAX, REG_EAX, 0);
+
+ // EDX might actually be holding something here. So make sure to only use EAX for this code
+ // sequence.
+
+ LclVarDsc* lastArg = &compiler->lvaTable[compiler->info.compArgsCount - 1];
+ noway_assert(!lastArg->lvRegister);
+ signed offset = lastArg->lvStkOffs;
+ assert(offset != BAD_STK_OFFS);
+ noway_assert(lastArg->lvFramePointerBased);
+
+ // LEA EAX, &<VARARGS HANDLE> + EAX
+ getEmitter()->emitIns_R_ARR(INS_lea, EA_PTRSIZE, REG_EAX, genFramePointerReg(), REG_EAX, offset);
+
+ if (varDsc->lvRegister)
+ {
+ if (varDsc->lvRegNum != REG_EAX)
+ {
+ getEmitter()->emitIns_R_R(INS_mov, EA_PTRSIZE, varDsc->lvRegNum, REG_EAX);
+ regTracker.rsTrackRegTrash(varDsc->lvRegNum);
+ }
+ }
+ else
+ {
+ getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_EAX, argsStartVar, 0);
+ }
+ }
+
+#endif // _TARGET_X86_
+
+#ifdef DEBUG
+ if (compiler->opts.compStackCheckOnRet)
+ {
+ noway_assert(compiler->lvaReturnEspCheck != 0xCCCCCCCC &&
+ compiler->lvaTable[compiler->lvaReturnEspCheck].lvDoNotEnregister &&
+ compiler->lvaTable[compiler->lvaReturnEspCheck].lvOnFrame);
+ getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaReturnEspCheck, 0);
+ }
+#endif
+
+ getEmitter()->emitEndProlog();
+ compiler->unwindEndProlog();
+
+ noway_assert(getEmitter()->emitMaxTmpSize == compiler->tmpSize);
+}
+#ifdef _PREFAST_
+#pragma warning(pop)
+#endif
+
+/*****************************************************************************
+ *
+ * Generates code for a function epilog.
+ *
+ * Please consult the "debugger team notification" comment in genFnProlog().
+ */
+
+#if defined(_TARGET_ARM_)
+
+void CodeGen::genFnEpilog(BasicBlock* block)
+{
+#ifdef DEBUG
+ if (verbose)
+ printf("*************** In genFnEpilog()\n");
+#endif
+
+ ScopedSetVariable<bool> _setGeneratingEpilog(&compiler->compGeneratingEpilog, true);
+
+ VarSetOps::Assign(compiler, gcInfo.gcVarPtrSetCur, getEmitter()->emitInitGCrefVars);
+ gcInfo.gcRegGCrefSetCur = getEmitter()->emitInitGCrefRegs;
+ gcInfo.gcRegByrefSetCur = getEmitter()->emitInitByrefRegs;
+
+#ifdef DEBUG
+ if (compiler->opts.dspCode)
+ printf("\n__epilog:\n");
+
+ if (verbose)
+ {
+ printf("gcVarPtrSetCur=%s ", VarSetOps::ToString(compiler, gcInfo.gcVarPtrSetCur));
+ dumpConvertedVarSet(compiler, gcInfo.gcVarPtrSetCur);
+ printf(", gcRegGCrefSetCur=");
+ printRegMaskInt(gcInfo.gcRegGCrefSetCur);
+ getEmitter()->emitDispRegSet(gcInfo.gcRegGCrefSetCur);
+ printf(", gcRegByrefSetCur=");
+ printRegMaskInt(gcInfo.gcRegByrefSetCur);
+ getEmitter()->emitDispRegSet(gcInfo.gcRegByrefSetCur);
+ printf("\n");
+ }
+#endif
+
+ bool jmpEpilog = ((block->bbFlags & BBF_HAS_JMP) != 0);
+
+ // We delay starting the unwind codes until we have an instruction which we know
+ // needs an unwind code. In particular, for large stack frames in methods without
+ // localloc, the sequence might look something like this:
+ // movw r3, 0x38e0
+ // add sp, r3
+ // pop {r4,r5,r6,r10,r11,pc}
+ // In this case, the "movw" should not be part of the unwind codes, since it will
+ // be a NOP, and it is a waste to start with a NOP. Note that calling unwindBegEpilog()
+ // also sets the current location as the beginning offset of the epilog, so every
+ // instruction afterwards needs an unwind code. In the case above, if you call
+ // unwindBegEpilog() before the "movw", then you must generate a NOP for the "movw".
+
+ bool unwindStarted = false;
+
+ // Tear down the stack frame
+
+ if (compiler->compLocallocUsed)
+ {
+ if (!unwindStarted)
+ {
+ compiler->unwindBegEpilog();
+ unwindStarted = true;
+ }
+
+ // mov R9 into SP
+ inst_RV_RV(INS_mov, REG_SP, REG_SAVED_LOCALLOC_SP);
+ compiler->unwindSetFrameReg(REG_SAVED_LOCALLOC_SP, 0);
+ }
+
+ if (jmpEpilog ||
+ genStackAllocRegisterMask(compiler->compLclFrameSize, regSet.rsGetModifiedRegsMask() & RBM_FLT_CALLEE_SAVED) ==
+ RBM_NONE)
+ {
+ genFreeLclFrame(compiler->compLclFrameSize, &unwindStarted, jmpEpilog);
+ }
+
+ if (!unwindStarted)
+ {
+ // If we haven't generated anything yet, we're certainly going to generate a "pop" next.
+ compiler->unwindBegEpilog();
+ unwindStarted = true;
+ }
+
+ genPopCalleeSavedRegisters(jmpEpilog);
+
+ if (regSet.rsMaskPreSpillRegs(true) != RBM_NONE)
+ {
+ // We better not have used a pop PC to return otherwise this will be unreachable code
+ noway_assert(!genUsedPopToReturn);
+
+ int preSpillRegArgSize = genCountBits(regSet.rsMaskPreSpillRegs(true)) * REGSIZE_BYTES;
+ inst_RV_IV(INS_add, REG_SPBASE, preSpillRegArgSize, EA_PTRSIZE);
+ compiler->unwindAllocStack(preSpillRegArgSize);
+ }
+
+ if (jmpEpilog)
+ {
+ noway_assert(block->bbJumpKind == BBJ_RETURN);
+ noway_assert(block->bbTreeList);
+
+ // We better not have used a pop PC to return otherwise this will be unreachable code
+ noway_assert(!genUsedPopToReturn);
+
+ /* figure out what jump we have */
+
+ GenTree* jmpNode = block->lastNode();
+ noway_assert(jmpNode->gtOper == GT_JMP);
+
+ CORINFO_METHOD_HANDLE methHnd = (CORINFO_METHOD_HANDLE)jmpNode->gtVal.gtVal1;
+
+ CORINFO_CONST_LOOKUP addrInfo;
+ void* addr;
+ regNumber indCallReg;
+ emitter::EmitCallType callType;
+
+ compiler->info.compCompHnd->getFunctionEntryPoint(methHnd, &addrInfo);
+ switch (addrInfo.accessType)
+ {
+ case IAT_VALUE:
+ if (arm_Valid_Imm_For_BL((ssize_t)addrInfo.addr))
+ {
+ // Simple direct call
+ callType = emitter::EC_FUNC_TOKEN;
+ addr = addrInfo.addr;
+ indCallReg = REG_NA;
+ break;
+ }
+
+ // otherwise the target address doesn't fit in an immediate
+ // so we have to burn a register...
+ __fallthrough;
+
+ case IAT_PVALUE:
+ // Load the address into a register, load indirect and call through a register
+ // We have to use R12 since we assume the argument registers are in use
+ callType = emitter::EC_INDIR_R;
+ indCallReg = REG_R12;
+ addr = NULL;
+ instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indCallReg, (ssize_t)addrInfo.addr);
+ if (addrInfo.accessType == IAT_PVALUE)
+ {
+ getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, indCallReg, indCallReg, 0);
+ regTracker.rsTrackRegTrash(indCallReg);
+ }
+ break;
+
+ case IAT_PPVALUE:
+ default:
+ NO_WAY("Unsupported JMP indirection");
+ }
+
+ /* Simply emit a jump to the methodHnd. This is similar to a call so we can use
+ * the same descriptor with some minor adjustments.
+ */
+
+ getEmitter()->emitIns_Call(callType, methHnd, INDEBUG_LDISASM_COMMA(nullptr) addr,
+ 0, // argSize
+ EA_UNKNOWN, // retSize
+ gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur,
+ BAD_IL_OFFSET, // IL offset
+ indCallReg, // ireg
+ REG_NA, // xreg
+ 0, // xmul
+ 0, // disp
+ true); // isJump
+ }
+ else
+ {
+ if (!genUsedPopToReturn)
+ {
+ // If we did not use a pop to return, then we did a "pop {..., lr}" instead of "pop {..., pc}",
+ // so we need a "bx lr" instruction to return from the function.
+ inst_RV(INS_bx, REG_LR, TYP_I_IMPL);
+ compiler->unwindBranch16();
+ }
+ }
+
+ compiler->unwindEndEpilog();
+}
+
+#elif defined(_TARGET_ARM64_)
+
+void CodeGen::genFnEpilog(BasicBlock* block)
+{
+#ifdef DEBUG
+ if (verbose)
+ printf("*************** In genFnEpilog()\n");
+#endif
+
+ ScopedSetVariable<bool> _setGeneratingEpilog(&compiler->compGeneratingEpilog, true);
+
+ VarSetOps::Assign(compiler, gcInfo.gcVarPtrSetCur, getEmitter()->emitInitGCrefVars);
+ gcInfo.gcRegGCrefSetCur = getEmitter()->emitInitGCrefRegs;
+ gcInfo.gcRegByrefSetCur = getEmitter()->emitInitByrefRegs;
+
+#ifdef DEBUG
+ if (compiler->opts.dspCode)
+ printf("\n__epilog:\n");
+
+ if (verbose)
+ {
+ printf("gcVarPtrSetCur=%s ", VarSetOps::ToString(compiler, gcInfo.gcVarPtrSetCur));
+ dumpConvertedVarSet(compiler, gcInfo.gcVarPtrSetCur);
+ printf(", gcRegGCrefSetCur=");
+ printRegMaskInt(gcInfo.gcRegGCrefSetCur);
+ getEmitter()->emitDispRegSet(gcInfo.gcRegGCrefSetCur);
+ printf(", gcRegByrefSetCur=");
+ printRegMaskInt(gcInfo.gcRegByrefSetCur);
+ getEmitter()->emitDispRegSet(gcInfo.gcRegByrefSetCur);
+ printf("\n");
+ }
+#endif
+
+ bool jmpEpilog = ((block->bbFlags & BBF_HAS_JMP) != 0);
+
+ compiler->unwindBegEpilog();
+
+ genPopCalleeSavedRegistersAndFreeLclFrame(jmpEpilog);
+
+ if (jmpEpilog)
+ {
+ noway_assert(block->bbJumpKind == BBJ_RETURN);
+ noway_assert(block->bbTreeList != nullptr);
+
+ // figure out what jump we have
+ GenTree* jmpNode = block->lastNode();
+#if !FEATURE_FASTTAILCALL
+ noway_assert(jmpNode->gtOper == GT_JMP);
+#else
+ // arm64
+ // If jmpNode is GT_JMP then gtNext must be null.
+ // If jmpNode is a fast tail call, gtNext need not be null since it could have embedded stmts.
+ noway_assert((jmpNode->gtOper != GT_JMP) || (jmpNode->gtNext == nullptr));
+
+ // Could either be a "jmp method" or "fast tail call" implemented as epilog+jmp
+ noway_assert((jmpNode->gtOper == GT_JMP) ||
+ ((jmpNode->gtOper == GT_CALL) && jmpNode->AsCall()->IsFastTailCall()));
+
+ // The next block is associated with this "if" stmt
+ if (jmpNode->gtOper == GT_JMP)
+#endif
+ {
+ // Simply emit a jump to the methodHnd. This is similar to a call so we can use
+ // the same descriptor with some minor adjustments.
+ CORINFO_METHOD_HANDLE methHnd = (CORINFO_METHOD_HANDLE)jmpNode->gtVal.gtVal1;
+
+ CORINFO_CONST_LOOKUP addrInfo;
+ compiler->info.compCompHnd->getFunctionEntryPoint(methHnd, &addrInfo);
+ if (addrInfo.accessType != IAT_VALUE)
+ {
+ NYI_ARM64("Unsupported JMP indirection");
+ }
+
+ emitter::EmitCallType callType = emitter::EC_FUNC_TOKEN;
+
+ // Simply emit a jump to the methodHnd. This is similar to a call so we can use
+ // the same descriptor with some minor adjustments.
+ getEmitter()->emitIns_Call(callType, methHnd, INDEBUG_LDISASM_COMMA(nullptr) addrInfo.addr,
+ 0, // argSize
+ EA_UNKNOWN, // retSize
+ EA_UNKNOWN, // secondRetSize
+ gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur,
+ BAD_IL_OFFSET, REG_NA, REG_NA, 0, 0, /* iloffset, ireg, xreg, xmul, disp */
+ true); /* isJump */
+ }
+#if FEATURE_FASTTAILCALL
+ else
+ {
+ // Fast tail call.
+ // Call target = REG_IP0.
+ // https://github.com/dotnet/coreclr/issues/4827
+ // Do we need a special encoding for stack walker like rex.w prefix for x64?
+ getEmitter()->emitIns_R(INS_br, emitTypeSize(TYP_I_IMPL), REG_IP0);
+ }
+#endif // FEATURE_FASTTAILCALL
+ }
+ else
+ {
+ inst_RV(INS_ret, REG_LR, TYP_I_IMPL);
+ compiler->unwindReturn(REG_LR);
+ }
+
+ compiler->unwindEndEpilog();
+}
+
+#elif defined(_TARGET_XARCH_)
+
+void CodeGen::genFnEpilog(BasicBlock* block)
+{
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("*************** In genFnEpilog()\n");
+ }
+#endif
+
+ ScopedSetVariable<bool> _setGeneratingEpilog(&compiler->compGeneratingEpilog, true);
+
+ VarSetOps::Assign(compiler, gcInfo.gcVarPtrSetCur, getEmitter()->emitInitGCrefVars);
+ gcInfo.gcRegGCrefSetCur = getEmitter()->emitInitGCrefRegs;
+ gcInfo.gcRegByrefSetCur = getEmitter()->emitInitByrefRegs;
+
+ noway_assert(!compiler->opts.MinOpts() || isFramePointerUsed()); // FPO not allowed with minOpts
+
+#ifdef DEBUG
+ genInterruptibleUsed = true;
+#endif
+
+ bool jmpEpilog = ((block->bbFlags & BBF_HAS_JMP) != 0);
+
+#ifdef DEBUG
+ if (compiler->opts.dspCode)
+ {
+ printf("\n__epilog:\n");
+ }
+
+ if (verbose)
+ {
+ printf("gcVarPtrSetCur=%s ", VarSetOps::ToString(compiler, gcInfo.gcVarPtrSetCur));
+ dumpConvertedVarSet(compiler, gcInfo.gcVarPtrSetCur);
+ printf(", gcRegGCrefSetCur=");
+ printRegMaskInt(gcInfo.gcRegGCrefSetCur);
+ getEmitter()->emitDispRegSet(gcInfo.gcRegGCrefSetCur);
+ printf(", gcRegByrefSetCur=");
+ printRegMaskInt(gcInfo.gcRegByrefSetCur);
+ getEmitter()->emitDispRegSet(gcInfo.gcRegByrefSetCur);
+ printf("\n");
+ }
+#endif
+
+#if !FEATURE_STACK_FP_X87
+ // Restore float registers that were saved to stack before SP is modified.
+ genRestoreCalleeSavedFltRegs(compiler->compLclFrameSize);
+#endif // !FEATURE_STACK_FP_X87
+
+ /* Compute the size in bytes we've pushed/popped */
+
+ if (!doubleAlignOrFramePointerUsed())
+ {
+ // We have an ESP frame */
+
+ noway_assert(compiler->compLocallocUsed == false); // Only used with frame-pointer
+
+ /* Get rid of our local variables */
+
+ if (compiler->compLclFrameSize)
+ {
+#ifdef _TARGET_X86_
+ /* Add 'compiler->compLclFrameSize' to ESP */
+ /* Use pop ECX to increment ESP by 4, unless compiler->compJmpOpUsed is true */
+
+ if ((compiler->compLclFrameSize == sizeof(void*)) && !compiler->compJmpOpUsed)
+ {
+ inst_RV(INS_pop, REG_ECX, TYP_I_IMPL);
+ regTracker.rsTrackRegTrash(REG_ECX);
+ }
+ else
+#endif // _TARGET_X86
+ {
+ /* Add 'compiler->compLclFrameSize' to ESP */
+ /* Generate "add esp, <stack-size>" */
+ inst_RV_IV(INS_add, REG_SPBASE, compiler->compLclFrameSize, EA_PTRSIZE);
+ }
+ }
+
+ genPopCalleeSavedRegisters();
+ }
+ else
+ {
+ noway_assert(doubleAlignOrFramePointerUsed());
+
+ /* Tear down the stack frame */
+
+ bool needMovEspEbp = false;
+
+#if DOUBLE_ALIGN
+ if (compiler->genDoubleAlign())
+ {
+ //
+ // add esp, compLclFrameSize
+ //
+ // We need not do anything (except the "mov esp, ebp") if
+ // compiler->compCalleeRegsPushed==0. However, this is unlikely, and it
+ // also complicates the code manager. Hence, we ignore that case.
+
+ noway_assert(compiler->compLclFrameSize != 0);
+ inst_RV_IV(INS_add, REG_SPBASE, compiler->compLclFrameSize, EA_PTRSIZE);
+
+ needMovEspEbp = true;
+ }
+ else
+#endif // DOUBLE_ALIGN
+ {
+ bool needLea = false;
+
+ if (compiler->compLocallocUsed)
+ {
+ // ESP may be variable if a localloc was actually executed. Reset it.
+ // lea esp, [ebp - compiler->compCalleeRegsPushed * REGSIZE_BYTES]
+
+ needLea = true;
+ }
+ else if (!regSet.rsRegsModified(RBM_CALLEE_SAVED))
+ {
+ if (compiler->compLclFrameSize != 0)
+ {
+#ifdef _TARGET_AMD64_
+ // AMD64 can't use "mov esp, ebp", according to the ABI specification describing epilogs. So,
+ // do an LEA to "pop off" the frame allocation.
+ needLea = true;
+#else // !_TARGET_AMD64_
+ // We will just generate "mov esp, ebp" and be done with it.
+ needMovEspEbp = true;
+#endif // !_TARGET_AMD64_
+ }
+ }
+ else if (compiler->compLclFrameSize == 0)
+ {
+ // do nothing before popping the callee-saved registers
+ }
+#ifdef _TARGET_X86_
+ else if (compiler->compLclFrameSize == REGSIZE_BYTES)
+ {
+ // "pop ecx" will make ESP point to the callee-saved registers
+ inst_RV(INS_pop, REG_ECX, TYP_I_IMPL);
+ regTracker.rsTrackRegTrash(REG_ECX);
+ }
+#endif // _TARGET_X86
+ else
+ {
+ // We need to make ESP point to the callee-saved registers
+ needLea = true;
+ }
+
+ if (needLea)
+ {
+ int offset;
+
+#ifdef _TARGET_AMD64_
+ // lea esp, [ebp + compiler->compLclFrameSize - genSPtoFPdelta]
+ //
+ // Case 1: localloc not used.
+ // genSPToFPDelta = compiler->compCalleeRegsPushed * REGSIZE_BYTES + compiler->compLclFrameSize
+ // offset = compiler->compCalleeRegsPushed * REGSIZE_BYTES;
+ // The amount to be subtracted from RBP to point at callee saved int regs.
+ //
+ // Case 2: localloc used
+ // genSPToFPDelta = Min(240, (int)compiler->lvaOutgoingArgSpaceSize)
+ // Offset = Amount to be aded to RBP to point at callee saved int regs.
+ offset = genSPtoFPdelta() - compiler->compLclFrameSize;
+
+ // Offset should fit within a byte if localloc is not used.
+ if (!compiler->compLocallocUsed)
+ {
+ noway_assert(offset < UCHAR_MAX);
+ }
+#else
+ // lea esp, [ebp - compiler->compCalleeRegsPushed * REGSIZE_BYTES]
+ offset = compiler->compCalleeRegsPushed * REGSIZE_BYTES;
+ noway_assert(offset < UCHAR_MAX); // the offset fits in a byte
+#endif
+
+ getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_SPBASE, REG_FPBASE, -offset);
+ }
+ }
+
+ //
+ // Pop the callee-saved registers (if any)
+ //
+
+ genPopCalleeSavedRegisters();
+
+#ifdef _TARGET_AMD64_
+ assert(!needMovEspEbp); // "mov esp, ebp" is not allowed in AMD64 epilogs
+#else // !_TARGET_AMD64_
+ if (needMovEspEbp)
+ {
+ // mov esp, ebp
+ inst_RV_RV(INS_mov, REG_SPBASE, REG_FPBASE);
+ }
+#endif // !_TARGET_AMD64_
+
+ // pop ebp
+ inst_RV(INS_pop, REG_EBP, TYP_I_IMPL);
+ }
+
+ getEmitter()->emitStartExitSeq(); // Mark the start of the "return" sequence
+
+ /* Check if this a special return block i.e.
+ * CEE_JMP instruction */
+
+ if (jmpEpilog)
+ {
+ noway_assert(block->bbJumpKind == BBJ_RETURN);
+ noway_assert(block->bbTreeList);
+
+ // figure out what jump we have
+ GenTree* jmpNode = block->lastNode();
+#if !FEATURE_FASTTAILCALL
+ // x86
+ noway_assert(jmpNode->gtOper == GT_JMP);
+#else
+ // amd64
+ // If jmpNode is GT_JMP then gtNext must be null.
+ // If jmpNode is a fast tail call, gtNext need not be null since it could have embedded stmts.
+ noway_assert((jmpNode->gtOper != GT_JMP) || (jmpNode->gtNext == nullptr));
+
+ // Could either be a "jmp method" or "fast tail call" implemented as epilog+jmp
+ noway_assert((jmpNode->gtOper == GT_JMP) ||
+ ((jmpNode->gtOper == GT_CALL) && jmpNode->AsCall()->IsFastTailCall()));
+
+ // The next block is associated with this "if" stmt
+ if (jmpNode->gtOper == GT_JMP)
+#endif
+ {
+ // Simply emit a jump to the methodHnd. This is similar to a call so we can use
+ // the same descriptor with some minor adjustments.
+ CORINFO_METHOD_HANDLE methHnd = (CORINFO_METHOD_HANDLE)jmpNode->gtVal.gtVal1;
+
+ CORINFO_CONST_LOOKUP addrInfo;
+ compiler->info.compCompHnd->getFunctionEntryPoint(methHnd, &addrInfo);
+ if (addrInfo.accessType != IAT_VALUE && addrInfo.accessType != IAT_PVALUE)
+ {
+ NO_WAY("Unsupported JMP indirection");
+ }
+
+ const emitter::EmitCallType callType =
+ (addrInfo.accessType == IAT_VALUE) ? emitter::EC_FUNC_TOKEN : emitter::EC_FUNC_TOKEN_INDIR;
+
+ // Simply emit a jump to the methodHnd. This is similar to a call so we can use
+ // the same descriptor with some minor adjustments.
+ getEmitter()->emitIns_Call(callType, methHnd, INDEBUG_LDISASM_COMMA(nullptr) addrInfo.addr,
+ 0, // argSize
+ EA_UNKNOWN // retSize
+ FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(EA_UNKNOWN), // secondRetSize
+ gcInfo.gcVarPtrSetCur,
+ gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur, BAD_IL_OFFSET, REG_NA, REG_NA,
+ 0, 0, /* iloffset, ireg, xreg, xmul, disp */
+ true); /* isJump */
+ }
+#if FEATURE_FASTTAILCALL
+ else
+ {
+#ifdef _TARGET_AMD64_
+ // Fast tail call.
+ // Call target = RAX.
+ // Stack walker requires that a register indirect tail call be rex.w prefixed.
+ getEmitter()->emitIns_R(INS_rex_jmp, emitTypeSize(TYP_I_IMPL), REG_RAX);
+#else
+ assert(!"Fast tail call as epilog+jmp");
+ unreached();
+#endif //_TARGET_AMD64_
+ }
+#endif // FEATURE_FASTTAILCALL
+ }
+ else
+ {
+ unsigned stkArgSize = 0; // Zero on all platforms except x86
+
+#if defined(_TARGET_X86_)
+
+ noway_assert(compiler->compArgSize >= intRegState.rsCalleeRegArgCount * sizeof(void*));
+ stkArgSize = compiler->compArgSize - intRegState.rsCalleeRegArgCount * sizeof(void*);
+
+ noway_assert(compiler->compArgSize < 0x10000); // "ret" only has 2 byte operand
+
+ // varargs has caller pop
+ if (compiler->info.compIsVarArgs)
+ stkArgSize = 0;
+
+#endif // defined(_TARGET_X86_)
+
+ /* Return, popping our arguments (if any) */
+ instGen_Return(stkArgSize);
+ }
+}
+
+#else // _TARGET_*
+#error Unsupported or unset target architecture
+#endif // _TARGET_*
+
+#if FEATURE_EH_FUNCLETS
+
+#ifdef _TARGET_ARM_
+
+/*****************************************************************************
+ *
+ * Generates code for an EH funclet prolog.
+ *
+ * Funclets have the following incoming arguments:
+ *
+ * catch: r0 = the exception object that was caught (see GT_CATCH_ARG)
+ * filter: r0 = the exception object to filter (see GT_CATCH_ARG), r1 = CallerSP of the containing function
+ * finally/fault: none
+ *
+ * Funclets set the following registers on exit:
+ *
+ * catch: r0 = the address at which execution should resume (see BBJ_EHCATCHRET)
+ * filter: r0 = non-zero if the handler should handle the exception, zero otherwise (see GT_RETFILT)
+ * finally/fault: none
+ *
+ * The ARM funclet prolog sequence is:
+ *
+ * push {regs,lr} ; We push the callee-saved regs and 'lr'.
+ * ; TODO-ARM-CQ: We probably only need to save lr, plus any callee-save registers that we
+ * ; actually use in the funclet. Currently, we save the same set of callee-saved regs
+ * ; calculated for the entire function.
+ * sub sp, XXX ; Establish the rest of the frame.
+ * ; XXX is determined by lvaOutgoingArgSpaceSize plus space for the PSP slot, aligned
+ * ; up to preserve stack alignment. If we push an odd number of registers, we also
+ * ; generate this, to keep the stack aligned.
+ *
+ * ; Fill the PSP slot, for use by the VM (it gets reported with the GC info), or by code generation of nested
+ * ; filters.
+ * ; This is not part of the "OS prolog"; it has no associated unwind data, and is not reversed in the funclet
+ * ; epilog.
+ *
+ * if (this is a filter funclet)
+ * {
+ * // r1 on entry to a filter funclet is CallerSP of the containing function:
+ * // either the main function, or the funclet for a handler that this filter is dynamically nested within.
+ * // Note that a filter can be dynamically nested within a funclet even if it is not statically within
+ * // a funclet. Consider:
+ * //
+ * // try {
+ * // try {
+ * // throw new Exception();
+ * // } catch(Exception) {
+ * // throw new Exception(); // The exception thrown here ...
+ * // }
+ * // } filter { // ... will be processed here, while the "catch" funclet frame is
+ * // // still on the stack
+ * // } filter-handler {
+ * // }
+ * //
+ * // Because of this, we need a PSP in the main function anytime a filter funclet doesn't know whether the
+ * // enclosing frame will be a funclet or main function. We won't know any time there is a filter protecting
+ * // nested EH. To simplify, we just always create a main function PSP for any function with a filter.
+ *
+ * ldr r1, [r1 - PSP_slot_CallerSP_offset] ; Load the CallerSP of the main function (stored in the PSP of
+ * ; the dynamically containing funclet or function)
+ * str r1, [sp + PSP_slot_SP_offset] ; store the PSP
+ * sub r11, r1, Function_CallerSP_to_FP_delta ; re-establish the frame pointer
+ * }
+ * else
+ * {
+ * // This is NOT a filter funclet. The VM re-establishes the frame pointer on entry.
+ * // TODO-ARM-CQ: if VM set r1 to CallerSP on entry, like for filters, we could save an instruction.
+ *
+ * add r3, r11, Function_CallerSP_to_FP_delta ; compute the CallerSP, given the frame pointer. r3 is scratch.
+ * str r3, [sp + PSP_slot_SP_offset] ; store the PSP
+ * }
+ *
+ * The epilog sequence is then:
+ *
+ * add sp, XXX ; if necessary
+ * pop {regs,pc}
+ *
+ * If it is worth it, we could push r0, r1, r2, r3 instead of using an additional add/sub instruction.
+ * Code size would be smaller, but we would be writing to / reading from the stack, which might be slow.
+ *
+ * The funclet frame is thus:
+ *
+ * | |
+ * |-----------------------|
+ * | incoming |
+ * | arguments |
+ * +=======================+ <---- Caller's SP
+ * |Callee saved registers |
+ * |-----------------------|
+ * |Pre-spill regs space | // This is only necessary to keep the PSP slot at the same offset
+ * | | // in function and funclet
+ * |-----------------------|
+ * | PSP slot |
+ * |-----------------------|
+ * ~ possible 4 byte pad ~
+ * ~ for alignment ~
+ * |-----------------------|
+ * | Outgoing arg space |
+ * |-----------------------| <---- Ambient SP
+ * | | |
+ * ~ | Stack grows ~
+ * | | downward |
+ * V
+ */
+
+void CodeGen::genFuncletProlog(BasicBlock* block)
+{
+#ifdef DEBUG
+ if (verbose)
+ printf("*************** In genFuncletProlog()\n");
+#endif
+
+ assert(block != NULL);
+ assert(block->bbFlags && BBF_FUNCLET_BEG);
+
+ ScopedSetVariable<bool> _setGeneratingProlog(&compiler->compGeneratingProlog, true);
+
+ gcInfo.gcResetForBB();
+
+ compiler->unwindBegProlog();
+
+ regMaskTP maskPushRegsFloat = genFuncletInfo.fiSaveRegs & RBM_ALLFLOAT;
+ regMaskTP maskPushRegsInt = genFuncletInfo.fiSaveRegs & ~maskPushRegsFloat;
+
+ regMaskTP maskStackAlloc = genStackAllocRegisterMask(genFuncletInfo.fiSpDelta, maskPushRegsFloat);
+ maskPushRegsInt |= maskStackAlloc;
+
+ assert(FitsIn<int>(maskPushRegsInt));
+ inst_IV(INS_push, (int)maskPushRegsInt);
+ compiler->unwindPushMaskInt(maskPushRegsInt);
+
+ if (maskPushRegsFloat != RBM_NONE)
+ {
+ genPushFltRegs(maskPushRegsFloat);
+ compiler->unwindPushMaskFloat(maskPushRegsFloat);
+ }
+
+ bool isFilter = (block->bbCatchTyp == BBCT_FILTER);
+
+ regMaskTP maskArgRegsLiveIn;
+ if (isFilter)
+ {
+ maskArgRegsLiveIn = RBM_R0 | RBM_R1;
+ }
+ else if ((block->bbCatchTyp == BBCT_FINALLY) || (block->bbCatchTyp == BBCT_FAULT))
+ {
+ maskArgRegsLiveIn = RBM_NONE;
+ }
+ else
+ {
+ maskArgRegsLiveIn = RBM_R0;
+ }
+
+ regNumber initReg = REG_R3; // R3 is never live on entry to a funclet, so it can be trashed
+ bool initRegZeroed = false;
+
+ if (maskStackAlloc == RBM_NONE)
+ {
+ genAllocLclFrame(genFuncletInfo.fiSpDelta, initReg, &initRegZeroed, maskArgRegsLiveIn);
+ }
+
+ // This is the end of the OS-reported prolog for purposes of unwinding
+ compiler->unwindEndProlog();
+
+ if (isFilter)
+ {
+ // This is the first block of a filter
+
+ getEmitter()->emitIns_R_R_I(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_R1, REG_R1,
+ genFuncletInfo.fiPSP_slot_CallerSP_offset);
+ regTracker.rsTrackRegTrash(REG_R1);
+ getEmitter()->emitIns_R_R_I(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_R1, REG_SPBASE,
+ genFuncletInfo.fiPSP_slot_SP_offset);
+ getEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, REG_FPBASE, REG_R1,
+ genFuncletInfo.fiFunctionCallerSPtoFPdelta);
+ }
+ else
+ {
+ // This is a non-filter funclet
+ getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, REG_R3, REG_FPBASE,
+ genFuncletInfo.fiFunctionCallerSPtoFPdelta);
+ regTracker.rsTrackRegTrash(REG_R3);
+ getEmitter()->emitIns_R_R_I(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_R3, REG_SPBASE,
+ genFuncletInfo.fiPSP_slot_SP_offset);
+ }
+}
+
+/*****************************************************************************
+ *
+ * Generates code for an EH funclet epilog.
+ */
+
+void CodeGen::genFuncletEpilog()
+{
+#ifdef DEBUG
+ if (verbose)
+ printf("*************** In genFuncletEpilog()\n");
+#endif
+
+ ScopedSetVariable<bool> _setGeneratingEpilog(&compiler->compGeneratingEpilog, true);
+
+ // Just as for the main function, we delay starting the unwind codes until we have
+ // an instruction which we know needs an unwind code. This is to support code like
+ // this:
+ // movw r3, 0x38e0
+ // add sp, r3
+ // pop {r4,r5,r6,r10,r11,pc}
+ // where the "movw" shouldn't be part of the unwind codes. See genFnEpilog() for more details.
+
+ bool unwindStarted = false;
+
+ /* The saved regs info saves the LR register. We need to pop the PC register to return */
+ assert(genFuncletInfo.fiSaveRegs & RBM_LR);
+
+ regMaskTP maskPopRegsFloat = genFuncletInfo.fiSaveRegs & RBM_ALLFLOAT;
+ regMaskTP maskPopRegsInt = genFuncletInfo.fiSaveRegs & ~maskPopRegsFloat;
+
+ regMaskTP maskStackAlloc = genStackAllocRegisterMask(genFuncletInfo.fiSpDelta, maskPopRegsFloat);
+ maskPopRegsInt |= maskStackAlloc;
+
+ if (maskStackAlloc == RBM_NONE)
+ {
+ genFreeLclFrame(genFuncletInfo.fiSpDelta, &unwindStarted, false);
+ }
+
+ if (!unwindStarted)
+ {
+ // We'll definitely generate an unwindable instruction next
+ compiler->unwindBegEpilog();
+ unwindStarted = true;
+ }
+
+ maskPopRegsInt &= ~RBM_LR;
+ maskPopRegsInt |= RBM_PC;
+
+ if (maskPopRegsFloat != RBM_NONE)
+ {
+ genPopFltRegs(maskPopRegsFloat);
+ compiler->unwindPopMaskFloat(maskPopRegsFloat);
+ }
+
+ assert(FitsIn<int>(maskPopRegsInt));
+ inst_IV(INS_pop, (int)maskPopRegsInt);
+ compiler->unwindPopMaskInt(maskPopRegsInt);
+
+ compiler->unwindEndEpilog();
+}
+
+/*****************************************************************************
+ *
+ * Capture the information used to generate the funclet prologs and epilogs.
+ * Note that all funclet prologs are identical, and all funclet epilogs are
+ * identical (per type: filters are identical, and non-filters are identical).
+ * Thus, we compute the data used for these just once.
+ *
+ * See genFuncletProlog() for more information about the prolog/epilog sequences.
+ */
+
+void CodeGen::genCaptureFuncletPrologEpilogInfo()
+{
+ if (compiler->ehAnyFunclets())
+ {
+ assert(isFramePointerUsed());
+ assert(compiler->lvaDoneFrameLayout ==
+ Compiler::FINAL_FRAME_LAYOUT); // The frame size and offsets must be finalized
+
+ // Frame pointer doesn't point at the end, it points at the pushed r11. So, instead
+ // of adding the number of callee-saved regs to CallerSP, we add 1 for lr and 1 for r11
+ // (plus the "pre spill regs"). Note that we assume r12 and r13 aren't saved
+ // (also assumed in genFnProlog()).
+ assert((regSet.rsMaskCalleeSaved & (RBM_R12 | RBM_R13)) == 0);
+ unsigned preSpillRegArgSize = genCountBits(regSet.rsMaskPreSpillRegs(true)) * REGSIZE_BYTES;
+ genFuncletInfo.fiFunctionCallerSPtoFPdelta = preSpillRegArgSize + 2 * REGSIZE_BYTES;
+
+ regMaskTP rsMaskSaveRegs = regSet.rsMaskCalleeSaved;
+ unsigned saveRegsCount = genCountBits(rsMaskSaveRegs);
+ unsigned saveRegsSize = saveRegsCount * REGSIZE_BYTES; // bytes of regs we're saving
+ assert(compiler->lvaOutgoingArgSpaceSize % REGSIZE_BYTES == 0);
+ unsigned funcletFrameSize =
+ preSpillRegArgSize + saveRegsSize + REGSIZE_BYTES /* PSP slot */ + compiler->lvaOutgoingArgSpaceSize;
+
+ unsigned funcletFrameSizeAligned = roundUp(funcletFrameSize, STACK_ALIGN);
+ unsigned funcletFrameAlignmentPad = funcletFrameSizeAligned - funcletFrameSize;
+ unsigned spDelta = funcletFrameSizeAligned - saveRegsSize;
+
+ unsigned PSP_slot_SP_offset = compiler->lvaOutgoingArgSpaceSize + funcletFrameAlignmentPad;
+ int PSP_slot_CallerSP_offset =
+ -(int)(funcletFrameSize - compiler->lvaOutgoingArgSpaceSize); // NOTE: it's negative!
+
+ /* Now save it for future use */
+
+ genFuncletInfo.fiSaveRegs = rsMaskSaveRegs;
+ genFuncletInfo.fiSpDelta = spDelta;
+ genFuncletInfo.fiPSP_slot_SP_offset = PSP_slot_SP_offset;
+ genFuncletInfo.fiPSP_slot_CallerSP_offset = PSP_slot_CallerSP_offset;
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\n");
+ printf("Funclet prolog / epilog info\n");
+ printf(" Function CallerSP-to-FP delta: %d\n", genFuncletInfo.fiFunctionCallerSPtoFPdelta);
+ printf(" Save regs: ");
+ dspRegMask(rsMaskSaveRegs);
+ printf("\n");
+ printf(" SP delta: %d\n", genFuncletInfo.fiSpDelta);
+ printf(" PSP slot SP offset: %d\n", genFuncletInfo.fiPSP_slot_SP_offset);
+ printf(" PSP slot Caller SP offset: %d\n", genFuncletInfo.fiPSP_slot_CallerSP_offset);
+
+ if (PSP_slot_CallerSP_offset !=
+ compiler->lvaGetCallerSPRelativeOffset(compiler->lvaPSPSym)) // for debugging
+ printf("lvaGetCallerSPRelativeOffset(lvaPSPSym): %d\n",
+ compiler->lvaGetCallerSPRelativeOffset(compiler->lvaPSPSym));
+ }
+#endif // DEBUG
+
+ assert(PSP_slot_CallerSP_offset < 0);
+ assert(compiler->lvaPSPSym != BAD_VAR_NUM);
+ assert(PSP_slot_CallerSP_offset == compiler->lvaGetCallerSPRelativeOffset(compiler->lvaPSPSym)); // same offset
+ // used in main
+ // function and
+ // funclet!
+ }
+}
+
+#elif defined(_TARGET_AMD64_)
+
+/*****************************************************************************
+ *
+ * Generates code for an EH funclet prolog.
+ *
+ * Funclets have the following incoming arguments:
+ *
+ * catch/filter-handler: rcx = InitialSP, rdx = the exception object that was caught (see GT_CATCH_ARG)
+ * filter: rcx = InitialSP, rdx = the exception object to filter (see GT_CATCH_ARG)
+ * finally/fault: rcx = InitialSP
+ *
+ * Funclets set the following registers on exit:
+ *
+ * catch/filter-handler: rax = the address at which execution should resume (see BBJ_EHCATCHRET)
+ * filter: rax = non-zero if the handler should handle the exception, zero otherwise (see GT_RETFILT)
+ * finally/fault: none
+ *
+ * The AMD64 funclet prolog sequence is:
+ *
+ * push ebp
+ * push callee-saved regs
+ * ; TODO-AMD64-CQ: We probably only need to save any callee-save registers that we actually use
+ * ; in the funclet. Currently, we save the same set of callee-saved regs calculated for
+ * ; the entire function.
+ * sub sp, XXX ; Establish the rest of the frame.
+ * ; XXX is determined by lvaOutgoingArgSpaceSize plus space for the PSP slot, aligned
+ * ; up to preserve stack alignment. If we push an odd number of registers, we also
+ * ; generate this, to keep the stack aligned.
+ *
+ * ; Fill the PSP slot, for use by the VM (it gets reported with the GC info), or by code generation of nested
+ * ; filters.
+ * ; This is not part of the "OS prolog"; it has no associated unwind data, and is not reversed in the funclet
+ * ; epilog.
+ * ; Also, re-establish the frame pointer from the PSP.
+ *
+ * mov rbp, [rcx + PSP_slot_InitialSP_offset] ; Load the PSP (InitialSP of the main function stored in the
+ * ; PSP of the dynamically containing funclet or function)
+ * mov [rsp + PSP_slot_InitialSP_offset], rbp ; store the PSP in our frame
+ * lea ebp, [rbp + Function_InitialSP_to_FP_delta] ; re-establish the frame pointer of the parent frame. If
+ * ; Function_InitialSP_to_FP_delta==0, we don't need this
+ * ; instruction.
+ *
+ * The epilog sequence is then:
+ *
+ * add rsp, XXX
+ * pop callee-saved regs ; if necessary
+ * pop rbp
+ * ret
+ *
+ * The funclet frame is thus:
+ *
+ * | |
+ * |-----------------------|
+ * | incoming |
+ * | arguments |
+ * +=======================+ <---- Caller's SP
+ * | Return address |
+ * |-----------------------|
+ * | Saved EBP |
+ * |-----------------------|
+ * |Callee saved registers |
+ * |-----------------------|
+ * ~ possible 8 byte pad ~
+ * ~ for alignment ~
+ * |-----------------------|
+ * | PSP slot |
+ * |-----------------------|
+ * | Outgoing arg space | // this only exists if the function makes a call
+ * |-----------------------| <---- Initial SP
+ * | | |
+ * ~ | Stack grows ~
+ * | | downward |
+ * V
+ *
+ * TODO-AMD64-Bug?: the frame pointer should really point to the PSP slot (the debugger seems to assume this
+ * in DacDbiInterfaceImpl::InitParentFrameInfo()), or someplace above Initial-SP. There is an AMD64
+ * UNWIND_INFO restriction that it must be within 240 bytes of Initial-SP. See jit64\amd64\inc\md.h
+ * "FRAMEPTR OFFSETS" for details.
+ */
+
+void CodeGen::genFuncletProlog(BasicBlock* block)
+{
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("*************** In genFuncletProlog()\n");
+ }
+#endif
+
+ assert(!regSet.rsRegsModified(RBM_FPBASE));
+ assert(block != nullptr);
+ assert(block->bbFlags & BBF_FUNCLET_BEG);
+ assert(isFramePointerUsed());
+
+ ScopedSetVariable<bool> _setGeneratingProlog(&compiler->compGeneratingProlog, true);
+
+ gcInfo.gcResetForBB();
+
+ compiler->unwindBegProlog();
+
+ // We need to push ebp, since it's callee-saved.
+ // We need to push the callee-saved registers. We only need to push the ones that we need, but we don't
+ // keep track of that on a per-funclet basis, so we push the same set as in the main function.
+ // The only fixed-size frame we need to allocate is whatever is big enough for the PSPSym, since nothing else
+ // is stored here (all temps are allocated in the parent frame).
+ // We do need to allocate the outgoing argument space, in case there are calls here. This must be the same
+ // size as the parent frame's outgoing argument space, to keep the PSPSym offset the same.
+
+ inst_RV(INS_push, REG_FPBASE, TYP_REF);
+ compiler->unwindPush(REG_FPBASE);
+
+ // Callee saved int registers are pushed to stack.
+ genPushCalleeSavedRegisters();
+
+ regMaskTP maskArgRegsLiveIn;
+ if ((block->bbCatchTyp == BBCT_FINALLY) || (block->bbCatchTyp == BBCT_FAULT))
+ {
+ maskArgRegsLiveIn = RBM_ARG_0;
+ }
+ else
+ {
+ maskArgRegsLiveIn = RBM_ARG_0 | RBM_ARG_2;
+ }
+
+ regNumber initReg = REG_EBP; // We already saved EBP, so it can be trashed
+ bool initRegZeroed = false;
+
+ genAllocLclFrame(genFuncletInfo.fiSpDelta, initReg, &initRegZeroed, maskArgRegsLiveIn);
+
+ // Callee saved float registers are copied to stack in their assigned stack slots
+ // after allocating space for them as part of funclet frame.
+ genPreserveCalleeSavedFltRegs(genFuncletInfo.fiSpDelta);
+
+ // This is the end of the OS-reported prolog for purposes of unwinding
+ compiler->unwindEndProlog();
+
+ getEmitter()->emitIns_R_AR(INS_mov, EA_PTRSIZE, REG_FPBASE, REG_ARG_0, genFuncletInfo.fiPSP_slot_InitialSP_offset);
+
+ regTracker.rsTrackRegTrash(REG_FPBASE);
+
+ getEmitter()->emitIns_AR_R(INS_mov, EA_PTRSIZE, REG_FPBASE, REG_SPBASE, genFuncletInfo.fiPSP_slot_InitialSP_offset);
+
+ if (genFuncletInfo.fiFunction_InitialSP_to_FP_delta != 0)
+ {
+ getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_FPBASE, REG_FPBASE,
+ genFuncletInfo.fiFunction_InitialSP_to_FP_delta);
+ }
+
+ // We've modified EBP, but not really. Say that we haven't...
+ regSet.rsRemoveRegsModified(RBM_FPBASE);
+}
+
+/*****************************************************************************
+ *
+ * Generates code for an EH funclet epilog.
+ *
+ * Note that we don't do anything with unwind codes, because AMD64 only cares about unwind codes for the prolog.
+ */
+
+void CodeGen::genFuncletEpilog()
+{
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("*************** In genFuncletEpilog()\n");
+ }
+#endif
+
+ ScopedSetVariable<bool> _setGeneratingEpilog(&compiler->compGeneratingEpilog, true);
+
+ // Restore callee saved XMM regs from their stack slots before modifying SP
+ // to position at callee saved int regs.
+ genRestoreCalleeSavedFltRegs(genFuncletInfo.fiSpDelta);
+ inst_RV_IV(INS_add, REG_SPBASE, genFuncletInfo.fiSpDelta, EA_PTRSIZE);
+ genPopCalleeSavedRegisters();
+ inst_RV(INS_pop, REG_EBP, TYP_I_IMPL);
+ instGen_Return(0);
+}
+
+/*****************************************************************************
+ *
+ * Capture the information used to generate the funclet prologs and epilogs.
+ */
+
+void CodeGen::genCaptureFuncletPrologEpilogInfo()
+{
+ if (!compiler->ehAnyFunclets())
+ {
+ return;
+ }
+
+ // Note that compLclFrameSize can't be used (for can we call functions that depend on it),
+ // because we're not going to allocate the same size frame as the parent.
+
+ assert(isFramePointerUsed());
+ assert(compiler->lvaDoneFrameLayout ==
+ Compiler::FINAL_FRAME_LAYOUT); // The frame size and offsets must be finalized
+ assert(compiler->compCalleeFPRegsSavedMask != (regMaskTP)-1); // The float registers to be preserved is finalized
+
+ // Even though lvaToInitialSPRelativeOffset() depends on compLclFrameSize,
+ // that's ok, because we're figuring out an offset in the parent frame.
+ genFuncletInfo.fiFunction_InitialSP_to_FP_delta =
+ compiler->lvaToInitialSPRelativeOffset(0, true); // trick to find the Initial-SP-relative offset of the frame
+ // pointer.
+
+ assert(compiler->lvaOutgoingArgSpaceSize % REGSIZE_BYTES == 0);
+#ifndef UNIX_AMD64_ABI
+ // No 4 slots for outgoing params on the stack for System V systems.
+ assert((compiler->lvaOutgoingArgSpaceSize == 0) ||
+ (compiler->lvaOutgoingArgSpaceSize >= (4 * REGSIZE_BYTES))); // On AMD64, we always have 4 outgoing argument
+// slots if there are any calls in the function.
+#endif // UNIX_AMD64_ABI
+ unsigned offset = compiler->lvaOutgoingArgSpaceSize;
+
+ genFuncletInfo.fiPSP_slot_InitialSP_offset = offset;
+
+ // How much stack do we allocate in the funclet?
+ // We need to 16-byte align the stack.
+
+ unsigned totalFrameSize =
+ REGSIZE_BYTES // return address
+ + REGSIZE_BYTES // pushed EBP
+ + (compiler->compCalleeRegsPushed * REGSIZE_BYTES); // pushed callee-saved int regs, not including EBP
+
+ // Entire 128-bits of XMM register is saved to stack due to ABI encoding requirement.
+ // Copying entire XMM register to/from memory will be performant if SP is aligned at XMM_REGSIZE_BYTES boundary.
+ unsigned calleeFPRegsSavedSize = genCountBits(compiler->compCalleeFPRegsSavedMask) * XMM_REGSIZE_BYTES;
+ unsigned FPRegsPad = (calleeFPRegsSavedSize > 0) ? AlignmentPad(totalFrameSize, XMM_REGSIZE_BYTES) : 0;
+
+ totalFrameSize += FPRegsPad // Padding before pushing entire xmm regs
+ + calleeFPRegsSavedSize // pushed callee-saved float regs
+ // below calculated 'pad' will go here
+ + REGSIZE_BYTES // PSPSym
+ + compiler->lvaOutgoingArgSpaceSize // outgoing arg space
+ ;
+
+ unsigned pad = AlignmentPad(totalFrameSize, 16);
+
+ genFuncletInfo.fiSpDelta = FPRegsPad // Padding to align SP on XMM_REGSIZE_BYTES boundary
+ + calleeFPRegsSavedSize // Callee saved xmm regs
+ + pad + REGSIZE_BYTES // PSPSym
+ + compiler->lvaOutgoingArgSpaceSize // outgoing arg space
+ ;
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\n");
+ printf("Funclet prolog / epilog info\n");
+ printf(" Function InitialSP-to-FP delta: %d\n", genFuncletInfo.fiFunction_InitialSP_to_FP_delta);
+ printf(" SP delta: %d\n", genFuncletInfo.fiSpDelta);
+ printf(" PSP slot Initial SP offset: %d\n", genFuncletInfo.fiPSP_slot_InitialSP_offset);
+ }
+#endif // DEBUG
+
+ assert(compiler->lvaPSPSym != BAD_VAR_NUM);
+ assert(genFuncletInfo.fiPSP_slot_InitialSP_offset ==
+ compiler->lvaGetInitialSPRelativeOffset(compiler->lvaPSPSym)); // same offset used in main function and
+ // funclet!
+}
+
+#elif defined(_TARGET_ARM64_)
+
+// Look in CodeGenArm64.cpp
+
+#else // _TARGET_*
+
+/*****************************************************************************
+ *
+ * Generates code for an EH funclet prolog.
+ */
+
+void CodeGen::genFuncletProlog(BasicBlock* block)
+{
+ NYI("Funclet prolog");
+}
+
+/*****************************************************************************
+ *
+ * Generates code for an EH funclet epilog.
+ */
+
+void CodeGen::genFuncletEpilog()
+{
+ NYI("Funclet epilog");
+}
+
+/*****************************************************************************
+ *
+ * Capture the information used to generate the funclet prologs and epilogs.
+ */
+
+void CodeGen::genCaptureFuncletPrologEpilogInfo()
+{
+ if (compiler->ehAnyFunclets())
+ {
+ NYI("genCaptureFuncletPrologEpilogInfo()");
+ }
+}
+
+#endif // _TARGET_*
+
+/*-----------------------------------------------------------------------------
+ *
+ * Set the main function PSPSym value in the frame.
+ * Funclets use different code to load the PSP sym and save it in their frame.
+ * See the document "X64 and ARM ABIs.docx" for a full description of the PSPSym.
+ * The PSPSym section of that document is copied here.
+ *
+ ***********************************
+ * The name PSPSym stands for Previous Stack Pointer Symbol. It is how a funclet
+ * accesses locals from the main function body.
+ *
+ * First, two definitions.
+ *
+ * Caller-SP is the value of the stack pointer in a function's caller before the call
+ * instruction is executed. That is, when function A calls function B, Caller-SP for B
+ * is the value of the stack pointer immediately before the call instruction in A
+ * (calling B) was executed. Note that this definition holds for both AMD64, which
+ * pushes the return value when a call instruction is executed, and for ARM, which
+ * doesn't. For AMD64, Caller-SP is the address above the call return address.
+ *
+ * Initial-SP is the initial value of the stack pointer after the fixed-size portion of
+ * the frame has been allocated. That is, before any "alloca"-type allocations.
+ *
+ * The PSPSym is a pointer-sized local variable in the frame of the main function and
+ * of each funclet. The value stored in PSPSym is the value of Initial-SP/Caller-SP
+ * for the main function. The stack offset of the PSPSym is reported to the VM in the
+ * GC information header. The value reported in the GC information is the offset of the
+ * PSPSym from Initial-SP/Caller-SP. (Note that both the value stored, and the way the
+ * value is reported to the VM, differs between architectures. In particular, note that
+ * most things in the GC information header are reported as offsets relative to Caller-SP,
+ * but PSPSym on AMD64 is one (maybe the only) exception.)
+ *
+ * The VM uses the PSPSym to find other locals it cares about (such as the generics context
+ * in a funclet frame). The JIT uses it to re-establish the frame pointer register, so that
+ * the frame pointer is the same value in a funclet as it is in the main function body.
+ *
+ * When a funclet is called, it is passed the Establisher Frame Pointer. For AMD64 this is
+ * true for all funclets and it is passed as the first argument in RCX, but for ARM this is
+ * only true for first pass funclets (currently just filters) and it is passed as the second
+ * argument in R1. The Establisher Frame Pointer is a stack pointer of an interesting "parent"
+ * frame in the exception processing system. For the CLR, it points either to the main function
+ * frame or a dynamically enclosing funclet frame from the same function, for the funclet being
+ * invoked. The value of the Establisher Frame Pointer is Initial-SP on AMD64, Caller-SP on ARM.
+ *
+ * Using the establisher frame, the funclet wants to load the value of the PSPSym. Since we
+ * don't know if the Establisher Frame is from the main function or a funclet, we design the
+ * main function and funclet frame layouts to place the PSPSym at an identical, small, constant
+ * offset from the Establisher Frame in each case. (This is also required because we only report
+ * a single offset to the PSPSym in the GC information, and that offset must be valid for the main
+ * function and all of its funclets). Then, the funclet uses this known offset to compute the
+ * PSPSym address and read its value. From this, it can compute the value of the frame pointer
+ * (which is a constant offset from the PSPSym value) and set the frame register to be the same
+ * as the parent function. Also, the funclet writes the value of the PSPSym to its own frame's
+ * PSPSym. This "copying" of the PSPSym happens for every funclet invocation, in particular,
+ * for every nested funclet invocation.
+ *
+ * On ARM, for all second pass funclets (finally, fault, catch, and filter-handler) the VM
+ * restores all non-volatile registers to their values within the parent frame. This includes
+ * the frame register (R11). Thus, the PSPSym is not used to recompute the frame pointer register
+ * in this case, though the PSPSym is copied to the funclet's frame, as for all funclets.
+ *
+ * Catch, Filter, and Filter-handlers also get an Exception object (GC ref) as an argument
+ * (REG_EXCEPTION_OBJECT). On AMD64 it is the second argument and thus passed in RDX. On
+ * ARM this is the first argument and passed in R0.
+ *
+ * (Note that the JIT64 source code contains a comment that says, "The current CLR doesn't always
+ * pass the correct establisher frame to the funclet. Funclet may receive establisher frame of
+ * funclet when expecting that of original routine." It indicates this is the reason that a PSPSym
+ * is required in all funclets as well as the main function, whereas if the establisher frame was
+ * correctly reported, the PSPSym could be omitted in some cases.)
+ ***********************************
+ */
+void CodeGen::genSetPSPSym(regNumber initReg, bool* pInitRegZeroed)
+{
+ assert(compiler->compGeneratingProlog);
+
+ if (!compiler->ehNeedsPSPSym())
+ {
+ return;
+ }
+
+ noway_assert(isFramePointerUsed()); // We need an explicit frame pointer
+ assert(compiler->lvaPSPSym != BAD_VAR_NUM); // We should have created the PSPSym variable
+
+#if defined(_TARGET_ARM_)
+
+ // We either generate:
+ // add r1, r11, 8
+ // str r1, [reg + PSPSymOffset]
+ // or:
+ // add r1, sp, 76
+ // str r1, [reg + PSPSymOffset]
+ // depending on the smallest encoding
+
+ int SPtoCallerSPdelta = -genCallerSPtoInitialSPdelta();
+
+ int callerSPOffs;
+ regNumber regBase;
+
+ if (arm_Valid_Imm_For_Add_SP(SPtoCallerSPdelta))
+ {
+ // use the "add <reg>, sp, imm" form
+
+ callerSPOffs = SPtoCallerSPdelta;
+ regBase = REG_SPBASE;
+ }
+ else
+ {
+ // use the "add <reg>, r11, imm" form
+
+ int FPtoCallerSPdelta = -genCallerSPtoFPdelta();
+ noway_assert(arm_Valid_Imm_For_Add(FPtoCallerSPdelta, INS_FLAGS_DONT_CARE));
+
+ callerSPOffs = FPtoCallerSPdelta;
+ regBase = REG_FPBASE;
+ }
+
+ // We will just use the initReg since it is an available register
+ // and we are probably done using it anyway...
+ regNumber regTmp = initReg;
+ *pInitRegZeroed = false;
+
+ getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, regTmp, regBase, callerSPOffs);
+ getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, regTmp, compiler->lvaPSPSym, 0);
+
+#elif defined(_TARGET_ARM64_)
+
+ int SPtoCallerSPdelta = -genCallerSPtoInitialSPdelta();
+
+ // We will just use the initReg since it is an available register
+ // and we are probably done using it anyway...
+ regNumber regTmp = initReg;
+ *pInitRegZeroed = false;
+
+ getEmitter()->emitIns_R_R_Imm(INS_add, EA_PTRSIZE, regTmp, REG_SPBASE, SPtoCallerSPdelta);
+ getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, regTmp, compiler->lvaPSPSym, 0);
+
+#elif defined(_TARGET_AMD64_)
+
+ // The PSP sym value is Initial-SP, not Caller-SP!
+ // We assume that RSP is Initial-SP when this function is called. That is, the stack frame
+ // has been established.
+ //
+ // We generate:
+ // mov [rbp-20h], rsp // store the Initial-SP (our current rsp) in the PSPsym
+
+ getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaPSPSym, 0);
+
+#else // _TARGET_*
+
+ NYI("Set function PSP sym");
+
+#endif // _TARGET_*
+}
+
+#endif // FEATURE_EH_FUNCLETS
+
+/*****************************************************************************
+ *
+ * Generates code for all the function and funclet prologs and epilogs.
+ */
+
+void CodeGen::genGeneratePrologsAndEpilogs()
+{
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("*************** Before prolog / epilog generation\n");
+ getEmitter()->emitDispIGlist(false);
+ }
+#endif
+
+#ifndef LEGACY_BACKEND
+ // Before generating the prolog, we need to reset the variable locations to what they will be on entry.
+ // This affects our code that determines which untracked locals need to be zero initialized.
+ compiler->m_pLinearScan->recordVarLocationsAtStartOfBB(compiler->fgFirstBB);
+#endif // !LEGACY_BACKEND
+
+ // Tell the emitter we're done with main code generation, and are going to start prolog and epilog generation.
+
+ getEmitter()->emitStartPrologEpilogGeneration();
+
+ gcInfo.gcResetForBB();
+ genFnProlog();
+
+ // Generate all the prologs and epilogs.
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if FEATURE_EH_FUNCLETS
+
+ // Capture the data we're going to use in the funclet prolog and epilog generation. This is
+ // information computed during codegen, or during function prolog generation, like
+ // frame offsets. It must run after main function prolog generation.
+
+ genCaptureFuncletPrologEpilogInfo();
+
+#endif // FEATURE_EH_FUNCLETS
+
+ // Walk the list of prologs and epilogs and generate them.
+ // We maintain a list of prolog and epilog basic blocks in
+ // the insGroup structure in the emitter. This list was created
+ // during code generation by the genReserve*() functions.
+ //
+ // TODO: it seems like better design would be to create a list of prologs/epilogs
+ // in the code generator (not the emitter), and then walk that list. But we already
+ // have the insGroup list, which serves well, so we don't need the extra allocations
+ // for a prolog/epilog list in the code generator.
+
+ getEmitter()->emitGeneratePrologEpilog();
+
+ // Tell the emitter we're done with all prolog and epilog generation.
+
+ getEmitter()->emitFinishPrologEpilogGeneration();
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("*************** After prolog / epilog generation\n");
+ getEmitter()->emitDispIGlist(false);
+ }
+#endif
+}
+
+/*
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX End Prolog / Epilog XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#if STACK_PROBES
+void CodeGen::genGenerateStackProbe()
+{
+ noway_assert(compiler->opts.compNeedStackProbes);
+
+ // If this assert fires, it means somebody has changed the value
+ // CORINFO_STACKPROBE_DEPTH.
+ // Why does the EE need such a deep probe? It should just need a couple
+ // of bytes, to set up a frame in the unmanaged code..
+
+ static_assert_no_msg(CORINFO_STACKPROBE_DEPTH + JIT_RESERVED_STACK < compiler->eeGetPageSize());
+
+ JITDUMP("Emitting stack probe:\n");
+ getEmitter()->emitIns_AR_R(INS_TEST, EA_PTRSIZE, REG_EAX, REG_SPBASE,
+ -(CORINFO_STACKPROBE_DEPTH + JIT_RESERVED_STACK));
+}
+#endif // STACK_PROBES
+
+/*****************************************************************************
+ *
+ * Record the constant and return a tree node that yields its address.
+ */
+
+GenTreePtr CodeGen::genMakeConst(const void* cnsAddr, var_types cnsType, GenTreePtr cnsTree, bool dblAlign)
+{
+ // Assign the constant an offset in the data section
+ UNATIVE_OFFSET cnsSize = genTypeSize(cnsType);
+ UNATIVE_OFFSET cnum = getEmitter()->emitDataConst(cnsAddr, cnsSize, dblAlign);
+
+#ifdef DEBUG
+ if (compiler->opts.dspCode)
+ {
+ printf(" @%s%02u ", "CNS", cnum);
+
+ switch (cnsType)
+ {
+ case TYP_INT:
+ printf("DD %d \n", *(int*)cnsAddr);
+ break;
+ case TYP_LONG:
+ printf("DQ %lld\n", *(__int64*)cnsAddr);
+ break;
+ case TYP_FLOAT:
+ printf("DF %f \n", *(float*)cnsAddr);
+ break;
+ case TYP_DOUBLE:
+ printf("DQ %lf\n", *(double*)cnsAddr);
+ break;
+
+ default:
+ noway_assert(!"unexpected constant type");
+ }
+ }
+#endif
+
+ // Access to inline data is 'abstracted' by a special type of static member
+ // (produced by eeFindJitDataOffs) which the emitter recognizes as being a reference
+ // to constant data, not a real static field.
+
+ return new (compiler, GT_CLS_VAR) GenTreeClsVar(cnsType, compiler->eeFindJitDataOffs(cnum), nullptr);
+}
+
+#if defined(_TARGET_XARCH_) && !FEATURE_STACK_FP_X87
+// Save compCalleeFPRegsPushed with the smallest register number saved at [RSP+offset], working
+// down the stack to the largest register number stored at [RSP+offset-(genCountBits(regMask)-1)*XMM_REG_SIZE]
+// Here offset = 16-byte aligned offset after pushing integer registers.
+//
+// Params
+// lclFrameSize - Fixed frame size excluding callee pushed int regs.
+// non-funclet: this will be compLclFrameSize.
+// funclet frames: this will be FuncletInfo.fiSpDelta.
+void CodeGen::genPreserveCalleeSavedFltRegs(unsigned lclFrameSize)
+{
+ regMaskTP regMask = compiler->compCalleeFPRegsSavedMask;
+
+ // Only callee saved floating point registers should be in regMask
+ assert((regMask & RBM_FLT_CALLEE_SAVED) == regMask);
+
+ // fast path return
+ if (regMask == RBM_NONE)
+ {
+ return;
+ }
+
+#ifdef _TARGET_AMD64_
+ unsigned firstFPRegPadding = compiler->lvaIsCalleeSavedIntRegCountEven() ? REGSIZE_BYTES : 0;
+ unsigned offset = lclFrameSize - firstFPRegPadding - XMM_REGSIZE_BYTES;
+
+ // Offset is 16-byte aligned since we use movaps for preserving xmm regs.
+ assert((offset % 16) == 0);
+ instruction copyIns = ins_Copy(TYP_FLOAT);
+#else // !_TARGET_AMD64_
+ unsigned offset = lclFrameSize - XMM_REGSIZE_BYTES;
+ instruction copyIns = INS_movupd;
+#endif // !_TARGET_AMD64_
+
+ for (regNumber reg = REG_FLT_CALLEE_SAVED_FIRST; regMask != RBM_NONE; reg = REG_NEXT(reg))
+ {
+ regMaskTP regBit = genRegMask(reg);
+ if ((regBit & regMask) != 0)
+ {
+ // ABI requires us to preserve lower 128-bits of YMM register.
+ getEmitter()->emitIns_AR_R(copyIns,
+ EA_8BYTE, // TODO-XArch-Cleanup: size specified here doesn't matter but should be
+ // EA_16BYTE
+ reg, REG_SPBASE, offset);
+ compiler->unwindSaveReg(reg, offset);
+ regMask &= ~regBit;
+ offset -= XMM_REGSIZE_BYTES;
+ }
+ }
+
+#ifdef FEATURE_AVX_SUPPORT
+ // Just before restoring float registers issue a Vzeroupper to zero out upper 128-bits of all YMM regs.
+ // This is to avoid penalty if this routine is using AVX-256 and now returning to a routine that is
+ // using SSE2.
+ if (compiler->getFloatingPointInstructionSet() == InstructionSet_AVX)
+ {
+ instGen(INS_vzeroupper);
+ }
+#endif
+}
+
+// Save/Restore compCalleeFPRegsPushed with the smallest register number saved at [RSP+offset], working
+// down the stack to the largest register number stored at [RSP+offset-(genCountBits(regMask)-1)*XMM_REG_SIZE]
+// Here offset = 16-byte aligned offset after pushing integer registers.
+//
+// Params
+// lclFrameSize - Fixed frame size excluding callee pushed int regs.
+// non-funclet: this will be compLclFrameSize.
+// funclet frames: this will be FuncletInfo.fiSpDelta.
+void CodeGen::genRestoreCalleeSavedFltRegs(unsigned lclFrameSize)
+{
+ regMaskTP regMask = compiler->compCalleeFPRegsSavedMask;
+
+ // Only callee saved floating point registers should be in regMask
+ assert((regMask & RBM_FLT_CALLEE_SAVED) == regMask);
+
+ // fast path return
+ if (regMask == RBM_NONE)
+ {
+ return;
+ }
+
+#ifdef _TARGET_AMD64_
+ unsigned firstFPRegPadding = compiler->lvaIsCalleeSavedIntRegCountEven() ? REGSIZE_BYTES : 0;
+ instruction copyIns = ins_Copy(TYP_FLOAT);
+#else // !_TARGET_AMD64_
+ unsigned firstFPRegPadding = 0;
+ instruction copyIns = INS_movupd;
+#endif // !_TARGET_AMD64_
+
+ unsigned offset;
+ regNumber regBase;
+ if (compiler->compLocallocUsed)
+ {
+ // localloc frame: use frame pointer relative offset
+ assert(isFramePointerUsed());
+ regBase = REG_FPBASE;
+ offset = lclFrameSize - genSPtoFPdelta() - firstFPRegPadding - XMM_REGSIZE_BYTES;
+ }
+ else
+ {
+ regBase = REG_SPBASE;
+ offset = lclFrameSize - firstFPRegPadding - XMM_REGSIZE_BYTES;
+ }
+
+#ifdef _TARGET_AMD64_
+ // Offset is 16-byte aligned since we use movaps for restoring xmm regs
+ assert((offset % 16) == 0);
+#endif // _TARGET_AMD64_
+
+#ifdef FEATURE_AVX_SUPPORT
+ // Just before restoring float registers issue a Vzeroupper to zero out upper 128-bits of all YMM regs.
+ // This is to avoid penalty if this routine is using AVX-256 and now returning to a routine that is
+ // using SSE2.
+ if (compiler->getFloatingPointInstructionSet() == InstructionSet_AVX)
+ {
+ instGen(INS_vzeroupper);
+ }
+#endif
+
+ for (regNumber reg = REG_FLT_CALLEE_SAVED_FIRST; regMask != RBM_NONE; reg = REG_NEXT(reg))
+ {
+ regMaskTP regBit = genRegMask(reg);
+ if ((regBit & regMask) != 0)
+ {
+ // ABI requires us to restore lower 128-bits of YMM register.
+ getEmitter()->emitIns_R_AR(copyIns,
+ EA_8BYTE, // TODO-XArch-Cleanup: size specified here doesn't matter but should be
+ // EA_16BYTE
+ reg, regBase, offset);
+ regMask &= ~regBit;
+ offset -= XMM_REGSIZE_BYTES;
+ }
+ }
+}
+#endif // defined(_TARGET_XARCH_) && !FEATURE_STACK_FP_X87
+
+//-----------------------------------------------------------------------------------
+// IsMultiRegPassedType: Returns true if the type is returned in multiple registers
+//
+// Arguments:
+// hClass - type handle
+//
+// Return Value:
+// true if type is passed in multiple registers, false otherwise.
+//
+bool Compiler::IsMultiRegPassedType(CORINFO_CLASS_HANDLE hClass)
+{
+ if (hClass == NO_CLASS_HANDLE)
+ {
+ return false;
+ }
+
+ structPassingKind howToPassStruct;
+ var_types returnType = getArgTypeForStruct(hClass, &howToPassStruct);
+
+ return (returnType == TYP_STRUCT);
+}
+
+//-----------------------------------------------------------------------------------
+// IsMultiRegReturnedType: Returns true if the type is returned in multiple registers
+//
+// Arguments:
+// hClass - type handle
+//
+// Return Value:
+// true if type is returned in multiple registers, false otherwise.
+//
+bool Compiler::IsMultiRegReturnedType(CORINFO_CLASS_HANDLE hClass)
+{
+ if (hClass == NO_CLASS_HANDLE)
+ {
+ return false;
+ }
+
+ structPassingKind howToReturnStruct;
+ var_types returnType = getReturnTypeForStruct(hClass, &howToReturnStruct);
+
+ return (returnType == TYP_STRUCT);
+}
+
+//----------------------------------------------
+// Methods that support HFA's for ARM32/ARM64
+//----------------------------------------------
+
+bool Compiler::IsHfa(CORINFO_CLASS_HANDLE hClass)
+{
+#ifdef FEATURE_HFA
+ return varTypeIsFloating(GetHfaType(hClass));
+#else
+ return false;
+#endif
+}
+
+bool Compiler::IsHfa(GenTreePtr tree)
+{
+#ifdef FEATURE_HFA
+ return IsHfa(gtGetStructHandleIfPresent(tree));
+#else
+ return false;
+#endif
+}
+
+var_types Compiler::GetHfaType(GenTreePtr tree)
+{
+#ifdef FEATURE_HFA
+ if (tree->TypeGet() == TYP_STRUCT)
+ {
+ return GetHfaType(gtGetStructHandleIfPresent(tree));
+ }
+#endif
+ return TYP_UNDEF;
+}
+
+unsigned Compiler::GetHfaCount(GenTreePtr tree)
+{
+ return GetHfaCount(gtGetStructHandleIfPresent(tree));
+}
+
+var_types Compiler::GetHfaType(CORINFO_CLASS_HANDLE hClass)
+{
+ var_types result = TYP_UNDEF;
+ if (hClass != NO_CLASS_HANDLE)
+ {
+#ifdef FEATURE_HFA
+ CorInfoType corType = info.compCompHnd->getHFAType(hClass);
+ if (corType != CORINFO_TYPE_UNDEF)
+ {
+ result = JITtype2varType(corType);
+ }
+#endif // FEATURE_HFA
+ }
+ return result;
+}
+
+//------------------------------------------------------------------------
+// GetHfaCount: Given a class handle for an HFA struct
+// return the number of registers needed to hold the HFA
+//
+// Note that on ARM32 the single precision registers overlap with
+// the double precision registers and for that reason each
+// double register is considered to be two single registers.
+// Thus for ARM32 an HFA of 4 doubles this function will return 8.
+// On ARM64 given an HFA of 4 singles or 4 doubles this function will
+// will return 4 for both.
+// Arguments:
+// hClass: the class handle of a HFA struct
+//
+unsigned Compiler::GetHfaCount(CORINFO_CLASS_HANDLE hClass)
+{
+ assert(IsHfa(hClass));
+#ifdef _TARGET_ARM_
+ // A HFA of doubles is twice as large as an HFA of singles for ARM32
+ // (i.e. uses twice the number of single precison registers)
+ return info.compCompHnd->getClassSize(hClass) / REGSIZE_BYTES;
+#else // _TARGET_ARM64_
+ var_types hfaType = GetHfaType(hClass);
+ unsigned classSize = info.compCompHnd->getClassSize(hClass);
+ // Note that the retail build issues a warning about a potential divsion by zero without the Max function
+ unsigned elemSize = Max((unsigned)1, EA_SIZE_IN_BYTES(emitActualTypeSize(hfaType)));
+ return classSize / elemSize;
+#endif // _TARGET_ARM64_
+}
+
+#ifdef _TARGET_XARCH_
+
+//------------------------------------------------------------------------
+// genMapShiftInsToShiftByConstantIns: Given a general shift/rotate instruction,
+// map it to the specific x86/x64 shift opcode for a shift/rotate by a constant.
+// X86/x64 has a special encoding for shift/rotate-by-constant-1.
+//
+// Arguments:
+// ins: the base shift/rotate instruction
+// shiftByValue: the constant value by which we are shifting/rotating
+//
+instruction CodeGen::genMapShiftInsToShiftByConstantIns(instruction ins, int shiftByValue)
+{
+ assert(ins == INS_rcl || ins == INS_rcr || ins == INS_rol || ins == INS_ror || ins == INS_shl || ins == INS_shr ||
+ ins == INS_sar);
+
+ // Which format should we use?
+
+ instruction shiftByConstantIns;
+
+ if (shiftByValue == 1)
+ {
+ // Use the shift-by-one format.
+
+ assert(INS_rcl + 1 == INS_rcl_1);
+ assert(INS_rcr + 1 == INS_rcr_1);
+ assert(INS_rol + 1 == INS_rol_1);
+ assert(INS_ror + 1 == INS_ror_1);
+ assert(INS_shl + 1 == INS_shl_1);
+ assert(INS_shr + 1 == INS_shr_1);
+ assert(INS_sar + 1 == INS_sar_1);
+
+ shiftByConstantIns = (instruction)(ins + 1);
+ }
+ else
+ {
+ // Use the shift-by-NNN format.
+
+ assert(INS_rcl + 2 == INS_rcl_N);
+ assert(INS_rcr + 2 == INS_rcr_N);
+ assert(INS_rol + 2 == INS_rol_N);
+ assert(INS_ror + 2 == INS_ror_N);
+ assert(INS_shl + 2 == INS_shl_N);
+ assert(INS_shr + 2 == INS_shr_N);
+ assert(INS_sar + 2 == INS_sar_N);
+
+ shiftByConstantIns = (instruction)(ins + 2);
+ }
+
+ return shiftByConstantIns;
+}
+
+#endif // _TARGET_XARCH_
+
+#if !defined(LEGACY_BACKEND) && (defined(_TARGET_XARCH_) || defined(_TARGET_ARM64_))
+
+//------------------------------------------------------------------------------------------------ //
+// getFirstArgWithStackSlot - returns the first argument with stack slot on the caller's frame.
+//
+// Return value:
+// The number of the first argument with stack slot on the caller's frame.
+//
+// Note:
+// On x64 Windows the caller always creates slots (homing space) in its frame for the
+// first 4 arguments of a callee (register passed args). So, the the variable number
+// (lclNum) for the first argument with a stack slot is always 0.
+// For System V systems or arm64, there is no such calling convention requirement, and the code needs to find
+// the first stack passed argument from the caller. This is done by iterating over
+// all the lvParam variables and finding the first with lvArgReg equals to REG_STK.
+//
+unsigned CodeGen::getFirstArgWithStackSlot()
+{
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) || defined(_TARGET_ARM64_)
+ unsigned baseVarNum = 0;
+#if defined(FEATURE_UNIX_AMR64_STRUCT_PASSING)
+ baseVarNum = compiler->lvaFirstStackIncomingArgNum;
+
+ if (compiler->lvaFirstStackIncomingArgNum != BAD_VAR_NUM)
+ {
+ baseVarNum = compiler->lvaFirstStackIncomingArgNum;
+ }
+ else
+#endif // FEATURE_UNIX_ARM64_STRUCT_PASSING
+ {
+ // Iterate over all the local variables in the Lcl var table.
+ // They contain all the implicit arguments - thisPtr, retBuf,
+ // generic context, PInvoke cookie, var arg cookie,no-standard args, etc.
+ LclVarDsc* varDsc = nullptr;
+ for (unsigned i = 0; i < compiler->info.compArgsCount; i++)
+ {
+ varDsc = &(compiler->lvaTable[i]);
+
+ // We are iterating over the arguments only.
+ assert(varDsc->lvIsParam);
+
+ if (varDsc->lvArgReg == REG_STK)
+ {
+ baseVarNum = i;
+#if defined(FEATURE_UNIX_AMR64_STRUCT_PASSING)
+ compiler->lvaFirstStackIncomingArgNum = baseVarNum;
+#endif // FEATURE_UNIX_ARM64_STRUCT_PASSING
+ break;
+ }
+ }
+ assert(varDsc != nullptr);
+ }
+
+ return baseVarNum;
+#elif defined(_TARGET_AMD64_)
+ return 0;
+#else
+ // Not implemented for x86.
+ NYI_X86("getFirstArgWithStackSlot not yet implemented for x86.");
+ return BAD_VAR_NUM;
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING || _TARGET_ARM64_
+}
+
+#endif // !LEGACY_BACKEND && (_TARGET_XARCH_ || _TARGET_ARM64_)
+
+/*****************************************************************************/
+#ifdef DEBUGGING_SUPPORT
+
+/*****************************************************************************
+ * genSetScopeInfo
+ *
+ * This function should be called only after the sizes of the emitter blocks
+ * have been finalized.
+ */
+
+void CodeGen::genSetScopeInfo()
+{
+ if (!compiler->opts.compScopeInfo)
+ {
+ return;
+ }
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("*************** In genSetScopeInfo()\n");
+ }
+#endif
+
+ if (compiler->info.compVarScopesCount == 0)
+ {
+ compiler->eeSetLVcount(0);
+ compiler->eeSetLVdone();
+ return;
+ }
+
+ noway_assert(compiler->opts.compScopeInfo && (compiler->info.compVarScopesCount > 0));
+ noway_assert(psiOpenScopeList.scNext == nullptr);
+
+ unsigned i;
+ unsigned scopeCnt = siScopeCnt + psiScopeCnt;
+
+ compiler->eeSetLVcount(scopeCnt);
+
+#ifdef DEBUG
+ genTrnslLocalVarCount = scopeCnt;
+ if (scopeCnt)
+ {
+ genTrnslLocalVarInfo = new (compiler, CMK_DebugOnly) TrnslLocalVarInfo[scopeCnt];
+ }
+#endif
+
+ // Record the scopes found for the parameters over the prolog.
+ // The prolog needs to be treated differently as a variable may not
+ // have the same info in the prolog block as is given by compiler->lvaTable.
+ // eg. A register parameter is actually on the stack, before it is loaded to reg.
+
+ CodeGen::psiScope* scopeP;
+
+ for (i = 0, scopeP = psiScopeList.scNext; i < psiScopeCnt; i++, scopeP = scopeP->scNext)
+ {
+ noway_assert(scopeP != nullptr);
+ noway_assert(scopeP->scStartLoc.Valid());
+ noway_assert(scopeP->scEndLoc.Valid());
+
+ UNATIVE_OFFSET startOffs = scopeP->scStartLoc.CodeOffset(getEmitter());
+ UNATIVE_OFFSET endOffs = scopeP->scEndLoc.CodeOffset(getEmitter());
+
+ unsigned varNum = scopeP->scSlotNum;
+ noway_assert(startOffs <= endOffs);
+
+ // The range may be 0 if the prolog is empty. For such a case,
+ // report the liveness of arguments to span at least the first
+ // instruction in the method. This will be incorrect (except on
+ // entry to the method) if the very first instruction of the method
+ // is part of a loop. However, this should happen
+ // very rarely, and the incorrectness is worth being able to look
+ // at the argument on entry to the method.
+ if (startOffs == endOffs)
+ {
+ noway_assert(startOffs == 0);
+ endOffs++;
+ }
+
+ Compiler::siVarLoc varLoc;
+
+ if (scopeP->scRegister)
+ {
+ varLoc.vlType = Compiler::VLT_REG;
+ varLoc.vlReg.vlrReg = (regNumber)scopeP->u1.scRegNum;
+ }
+ else
+ {
+ varLoc.vlType = Compiler::VLT_STK;
+ varLoc.vlStk.vlsBaseReg = (regNumber)scopeP->u2.scBaseReg;
+ varLoc.vlStk.vlsOffset = scopeP->u2.scOffset;
+ }
+
+ genSetScopeInfo(i, startOffs, endOffs - startOffs, varNum, scopeP->scLVnum, true, varLoc);
+ }
+
+ // Record the scopes for the rest of the method.
+ // Check that the LocalVarInfo scopes look OK
+ noway_assert(siOpenScopeList.scNext == nullptr);
+
+ CodeGen::siScope* scopeL;
+
+ for (i = 0, scopeL = siScopeList.scNext; i < siScopeCnt; i++, scopeL = scopeL->scNext)
+ {
+ noway_assert(scopeL != nullptr);
+ noway_assert(scopeL->scStartLoc.Valid());
+ noway_assert(scopeL->scEndLoc.Valid());
+
+ // Find the start and end IP
+
+ UNATIVE_OFFSET startOffs = scopeL->scStartLoc.CodeOffset(getEmitter());
+ UNATIVE_OFFSET endOffs = scopeL->scEndLoc.CodeOffset(getEmitter());
+
+ noway_assert(scopeL->scStartLoc != scopeL->scEndLoc);
+
+ // For stack vars, find the base register, and offset
+
+ regNumber baseReg;
+ signed offset = compiler->lvaTable[scopeL->scVarNum].lvStkOffs;
+
+ if (!compiler->lvaTable[scopeL->scVarNum].lvFramePointerBased)
+ {
+ baseReg = REG_SPBASE;
+ offset += scopeL->scStackLevel;
+ }
+ else
+ {
+ baseReg = REG_FPBASE;
+ }
+
+ // Now fill in the varLoc
+
+ Compiler::siVarLoc varLoc;
+
+ // TODO-Review: This only works for always-enregistered variables. With LSRA, a variable might be in a register
+ // for part of its lifetime, or in different registers for different parts of its lifetime.
+ // This should only matter for non-debug code, where we do variable enregistration.
+ // We should store the ranges of variable enregistration in the scope table.
+ if (compiler->lvaTable[scopeL->scVarNum].lvIsInReg())
+ {
+ var_types type = genActualType(compiler->lvaTable[scopeL->scVarNum].TypeGet());
+ switch (type)
+ {
+ case TYP_INT:
+ case TYP_REF:
+ case TYP_BYREF:
+#ifdef _TARGET_64BIT_
+ case TYP_LONG:
+#endif // _TARGET_64BIT_
+
+ varLoc.vlType = Compiler::VLT_REG;
+ varLoc.vlReg.vlrReg = compiler->lvaTable[scopeL->scVarNum].lvRegNum;
+ break;
+
+#ifndef _TARGET_64BIT_
+ case TYP_LONG:
+#if !CPU_HAS_FP_SUPPORT
+ case TYP_DOUBLE:
+#endif
+
+ if (compiler->lvaTable[scopeL->scVarNum].lvOtherReg != REG_STK)
+ {
+ varLoc.vlType = Compiler::VLT_REG_REG;
+ varLoc.vlRegReg.vlrrReg1 = compiler->lvaTable[scopeL->scVarNum].lvRegNum;
+ varLoc.vlRegReg.vlrrReg2 = compiler->lvaTable[scopeL->scVarNum].lvOtherReg;
+ }
+ else
+ {
+ varLoc.vlType = Compiler::VLT_REG_STK;
+ varLoc.vlRegStk.vlrsReg = compiler->lvaTable[scopeL->scVarNum].lvRegNum;
+ varLoc.vlRegStk.vlrsStk.vlrssBaseReg = baseReg;
+ if (!isFramePointerUsed() && varLoc.vlRegStk.vlrsStk.vlrssBaseReg == REG_SPBASE)
+ {
+ varLoc.vlRegStk.vlrsStk.vlrssBaseReg = (regNumber)ICorDebugInfo::REGNUM_AMBIENT_SP;
+ }
+ varLoc.vlRegStk.vlrsStk.vlrssOffset = offset + sizeof(int);
+ }
+ break;
+#endif // !_TARGET_64BIT_
+
+#ifdef _TARGET_64BIT_
+
+ case TYP_FLOAT:
+ case TYP_DOUBLE:
+ // TODO-AMD64-Bug: ndp\clr\src\inc\corinfo.h has a definition of RegNum that only goes up to R15,
+ // so no XMM registers can get debug information.
+ varLoc.vlType = Compiler::VLT_REG_FP;
+ varLoc.vlReg.vlrReg = compiler->lvaTable[scopeL->scVarNum].lvRegNum;
+ break;
+
+#else // !_TARGET_64BIT_
+
+#if CPU_HAS_FP_SUPPORT
+ case TYP_FLOAT:
+ case TYP_DOUBLE:
+ if (isFloatRegType(type))
+ {
+ varLoc.vlType = Compiler::VLT_FPSTK;
+ varLoc.vlFPstk.vlfReg = compiler->lvaTable[scopeL->scVarNum].lvRegNum;
+ }
+ break;
+#endif // CPU_HAS_FP_SUPPORT
+
+#endif // !_TARGET_64BIT_
+
+#ifdef FEATURE_SIMD
+ case TYP_SIMD8:
+ case TYP_SIMD12:
+ case TYP_SIMD16:
+ case TYP_SIMD32:
+ varLoc.vlType = Compiler::VLT_REG_FP;
+
+ // TODO-AMD64-Bug: ndp\clr\src\inc\corinfo.h has a definition of RegNum that only goes up to R15,
+ // so no XMM registers can get debug information.
+ //
+ // Note: Need to initialize vlrReg field, otherwise during jit dump hitting an assert
+ // in eeDispVar() --> getRegName() that regNumber is valid.
+ varLoc.vlReg.vlrReg = compiler->lvaTable[scopeL->scVarNum].lvRegNum;
+ break;
+#endif // FEATURE_SIMD
+
+ default:
+ noway_assert(!"Invalid type");
+ }
+ }
+ else
+ {
+ assert(offset != BAD_STK_OFFS);
+ LclVarDsc* varDsc = compiler->lvaTable + scopeL->scVarNum;
+ switch (genActualType(varDsc->TypeGet()))
+ {
+ case TYP_INT:
+ case TYP_REF:
+ case TYP_BYREF:
+ case TYP_FLOAT:
+ case TYP_STRUCT:
+ case TYP_BLK: // Needed because of the TYP_BLK stress mode
+#ifdef FEATURE_SIMD
+ case TYP_SIMD8:
+ case TYP_SIMD12:
+ case TYP_SIMD16:
+ case TYP_SIMD32:
+#endif
+#ifdef _TARGET_64BIT_
+ case TYP_LONG:
+ case TYP_DOUBLE:
+#endif // _TARGET_64BIT_
+#if defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_)
+ // In the AMD64 ABI we are supposed to pass a struct by reference when its
+ // size is not 1, 2, 4 or 8 bytes in size. During fgMorph, the compiler modifies
+ // the IR to comply with the ABI and therefore changes the type of the lclVar
+ // that holds the struct from TYP_STRUCT to TYP_BYREF but it gives us a hint that
+ // this is still a struct by setting the lvIsTemp flag.
+ // The same is true for ARM64 and structs > 16 bytes.
+ // (See Compiler::fgMarkImplicitByRefArgs in Morph.cpp for further detail)
+ // Now, the VM expects a special enum for these type of local vars: VLT_STK_BYREF
+ // to accomodate for this situation.
+ if (varDsc->lvType == TYP_BYREF && varDsc->lvIsTemp)
+ {
+ assert(varDsc->lvIsParam);
+ varLoc.vlType = Compiler::VLT_STK_BYREF;
+ }
+ else
+#endif // defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_)
+ {
+ varLoc.vlType = Compiler::VLT_STK;
+ }
+ varLoc.vlStk.vlsBaseReg = baseReg;
+ varLoc.vlStk.vlsOffset = offset;
+ if (!isFramePointerUsed() && varLoc.vlStk.vlsBaseReg == REG_SPBASE)
+ {
+ varLoc.vlStk.vlsBaseReg = (regNumber)ICorDebugInfo::REGNUM_AMBIENT_SP;
+ }
+ break;
+
+#ifndef _TARGET_64BIT_
+ case TYP_LONG:
+ case TYP_DOUBLE:
+ varLoc.vlType = Compiler::VLT_STK2;
+ varLoc.vlStk2.vls2BaseReg = baseReg;
+ varLoc.vlStk2.vls2Offset = offset;
+ if (!isFramePointerUsed() && varLoc.vlStk2.vls2BaseReg == REG_SPBASE)
+ {
+ varLoc.vlStk2.vls2BaseReg = (regNumber)ICorDebugInfo::REGNUM_AMBIENT_SP;
+ }
+ break;
+#endif // !_TARGET_64BIT_
+
+ default:
+ noway_assert(!"Invalid type");
+ }
+ }
+
+ genSetScopeInfo(psiScopeCnt + i, startOffs, endOffs - startOffs, scopeL->scVarNum, scopeL->scLVnum,
+ scopeL->scAvailable, varLoc);
+ }
+
+ compiler->eeSetLVdone();
+}
+
+/*****************************************************************************/
+#ifdef LATE_DISASM
+#if defined(DEBUG)
+/*****************************************************************************
+ * CompilerRegName
+ *
+ * Can be called only after lviSetLocalVarInfo() has been called
+ */
+
+/* virtual */
+const char* CodeGen::siRegVarName(size_t offs, size_t size, unsigned reg)
+{
+ if (!compiler->opts.compScopeInfo)
+ return nullptr;
+
+ if (compiler->info.compVarScopesCount == 0)
+ return nullptr;
+
+ noway_assert(genTrnslLocalVarCount == 0 || genTrnslLocalVarInfo);
+
+ for (unsigned i = 0; i < genTrnslLocalVarCount; i++)
+ {
+ if ((genTrnslLocalVarInfo[i].tlviVarLoc.vlIsInReg((regNumber)reg)) &&
+ (genTrnslLocalVarInfo[i].tlviAvailable == true) && (genTrnslLocalVarInfo[i].tlviStartPC <= offs + size) &&
+ (genTrnslLocalVarInfo[i].tlviStartPC + genTrnslLocalVarInfo[i].tlviLength > offs))
+ {
+ return genTrnslLocalVarInfo[i].tlviName ? compiler->VarNameToStr(genTrnslLocalVarInfo[i].tlviName) : NULL;
+ }
+ }
+
+ return NULL;
+}
+
+/*****************************************************************************
+ * CompilerStkName
+ *
+ * Can be called only after lviSetLocalVarInfo() has been called
+ */
+
+/* virtual */
+const char* CodeGen::siStackVarName(size_t offs, size_t size, unsigned reg, unsigned stkOffs)
+{
+ if (!compiler->opts.compScopeInfo)
+ return nullptr;
+
+ if (compiler->info.compVarScopesCount == 0)
+ return nullptr;
+
+ noway_assert(genTrnslLocalVarCount == 0 || genTrnslLocalVarInfo);
+
+ for (unsigned i = 0; i < genTrnslLocalVarCount; i++)
+ {
+ if ((genTrnslLocalVarInfo[i].tlviVarLoc.vlIsOnStk((regNumber)reg, stkOffs)) &&
+ (genTrnslLocalVarInfo[i].tlviAvailable == true) && (genTrnslLocalVarInfo[i].tlviStartPC <= offs + size) &&
+ (genTrnslLocalVarInfo[i].tlviStartPC + genTrnslLocalVarInfo[i].tlviLength > offs))
+ {
+ return genTrnslLocalVarInfo[i].tlviName ? compiler->VarNameToStr(genTrnslLocalVarInfo[i].tlviName) : NULL;
+ }
+ }
+
+ return NULL;
+}
+
+/*****************************************************************************/
+#endif // defined(DEBUG)
+#endif // LATE_DISASM
+
+#ifdef DEBUG
+
+/*****************************************************************************
+ * Display a IPmappingDsc. Pass -1 as mappingNum to not display a mapping number.
+ */
+
+void CodeGen::genIPmappingDisp(unsigned mappingNum, Compiler::IPmappingDsc* ipMapping)
+{
+ if (mappingNum != unsigned(-1))
+ {
+ printf("%d: ", mappingNum);
+ }
+
+ IL_OFFSETX offsx = ipMapping->ipmdILoffsx;
+
+ if (offsx == BAD_IL_OFFSET)
+ {
+ printf("???");
+ }
+ else
+ {
+ Compiler::eeDispILOffs(jitGetILoffsAny(offsx));
+
+ if (jitIsStackEmpty(offsx))
+ {
+ printf(" STACK_EMPTY");
+ }
+
+ if (jitIsCallInstruction(offsx))
+ {
+ printf(" CALL_INSTRUCTION");
+ }
+ }
+
+ printf(" ");
+ ipMapping->ipmdNativeLoc.Print();
+ // We can only call this after code generation. Is there any way to tell when it's legal to call?
+ // printf(" [%x]", ipMapping->ipmdNativeLoc.CodeOffset(getEmitter()));
+
+ if (ipMapping->ipmdIsLabel)
+ {
+ printf(" label");
+ }
+
+ printf("\n");
+}
+
+void CodeGen::genIPmappingListDisp()
+{
+ unsigned mappingNum = 0;
+ Compiler::IPmappingDsc* ipMapping;
+
+ for (ipMapping = compiler->genIPmappingList; ipMapping != nullptr; ipMapping = ipMapping->ipmdNext)
+ {
+ genIPmappingDisp(mappingNum, ipMapping);
+ ++mappingNum;
+ }
+}
+
+#endif // DEBUG
+
+/*****************************************************************************
+ *
+ * Append an IPmappingDsc struct to the list that we're maintaining
+ * for the debugger.
+ * Record the instr offset as being at the current code gen position.
+ */
+
+void CodeGen::genIPmappingAdd(IL_OFFSETX offsx, bool isLabel)
+{
+ if (!compiler->opts.compDbgInfo)
+ {
+ return;
+ }
+
+ assert(offsx != BAD_IL_OFFSET);
+
+ switch ((int)offsx) // Need the cast since offs is unsigned and the case statements are comparing to signed.
+ {
+ case ICorDebugInfo::PROLOG:
+ case ICorDebugInfo::EPILOG:
+ break;
+
+ default:
+
+ if (offsx != ICorDebugInfo::NO_MAPPING)
+ {
+ noway_assert(jitGetILoffs(offsx) <= compiler->info.compILCodeSize);
+ }
+
+ // Ignore this one if it's the same IL offset as the last one we saw.
+ // Note that we'll let through two identical IL offsets if the flag bits
+ // differ, or two identical "special" mappings (e.g., PROLOG).
+ if ((compiler->genIPmappingLast != nullptr) && (offsx == compiler->genIPmappingLast->ipmdILoffsx))
+ {
+ JITDUMP("genIPmappingAdd: ignoring duplicate IL offset 0x%x\n", offsx);
+ return;
+ }
+ break;
+ }
+
+ /* Create a mapping entry and append it to the list */
+
+ Compiler::IPmappingDsc* addMapping =
+ (Compiler::IPmappingDsc*)compiler->compGetMem(sizeof(*addMapping), CMK_DebugInfo);
+
+ addMapping->ipmdNativeLoc.CaptureLocation(getEmitter());
+ addMapping->ipmdILoffsx = offsx;
+ addMapping->ipmdIsLabel = isLabel;
+ addMapping->ipmdNext = nullptr;
+
+ if (compiler->genIPmappingList != nullptr)
+ {
+ assert(compiler->genIPmappingLast != nullptr);
+ assert(compiler->genIPmappingLast->ipmdNext == nullptr);
+ compiler->genIPmappingLast->ipmdNext = addMapping;
+ }
+ else
+ {
+ assert(compiler->genIPmappingLast == nullptr);
+ compiler->genIPmappingList = addMapping;
+ }
+
+ compiler->genIPmappingLast = addMapping;
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Added IP mapping: ");
+ genIPmappingDisp(unsigned(-1), addMapping);
+ }
+#endif // DEBUG
+}
+
+/*****************************************************************************
+ *
+ * Prepend an IPmappingDsc struct to the list that we're maintaining
+ * for the debugger.
+ * Record the instr offset as being at the current code gen position.
+ */
+void CodeGen::genIPmappingAddToFront(IL_OFFSETX offsx)
+{
+ if (!compiler->opts.compDbgInfo)
+ {
+ return;
+ }
+
+ assert(offsx != BAD_IL_OFFSET);
+ assert(compiler->compGeneratingProlog); // We only ever do this during prolog generation.
+
+ switch ((int)offsx) // Need the cast since offs is unsigned and the case statements are comparing to signed.
+ {
+ case ICorDebugInfo::NO_MAPPING:
+ case ICorDebugInfo::PROLOG:
+ case ICorDebugInfo::EPILOG:
+ break;
+
+ default:
+ noway_assert(jitGetILoffs(offsx) <= compiler->info.compILCodeSize);
+ break;
+ }
+
+ /* Create a mapping entry and prepend it to the list */
+
+ Compiler::IPmappingDsc* addMapping =
+ (Compiler::IPmappingDsc*)compiler->compGetMem(sizeof(*addMapping), CMK_DebugInfo);
+
+ addMapping->ipmdNativeLoc.CaptureLocation(getEmitter());
+ addMapping->ipmdILoffsx = offsx;
+ addMapping->ipmdIsLabel = true;
+ addMapping->ipmdNext = nullptr;
+
+ addMapping->ipmdNext = compiler->genIPmappingList;
+ compiler->genIPmappingList = addMapping;
+
+ if (compiler->genIPmappingLast == nullptr)
+ {
+ compiler->genIPmappingLast = addMapping;
+ }
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Added IP mapping to front: ");
+ genIPmappingDisp(unsigned(-1), addMapping);
+ }
+#endif // DEBUG
+}
+
+/*****************************************************************************/
+
+C_ASSERT(IL_OFFSETX(ICorDebugInfo::NO_MAPPING) != IL_OFFSETX(BAD_IL_OFFSET));
+C_ASSERT(IL_OFFSETX(ICorDebugInfo::PROLOG) != IL_OFFSETX(BAD_IL_OFFSET));
+C_ASSERT(IL_OFFSETX(ICorDebugInfo::EPILOG) != IL_OFFSETX(BAD_IL_OFFSET));
+
+C_ASSERT(IL_OFFSETX(BAD_IL_OFFSET) > MAX_IL_OFFSET);
+C_ASSERT(IL_OFFSETX(ICorDebugInfo::NO_MAPPING) > MAX_IL_OFFSET);
+C_ASSERT(IL_OFFSETX(ICorDebugInfo::PROLOG) > MAX_IL_OFFSET);
+C_ASSERT(IL_OFFSETX(ICorDebugInfo::EPILOG) > MAX_IL_OFFSET);
+
+//------------------------------------------------------------------------
+// jitGetILoffs: Returns the IL offset portion of the IL_OFFSETX type.
+// Asserts if any ICorDebugInfo distinguished value (like ICorDebugInfo::NO_MAPPING)
+// is seen; these are unexpected here. Also asserts if passed BAD_IL_OFFSET.
+//
+// Arguments:
+// offsx - the IL_OFFSETX value with the IL offset to extract.
+//
+// Return Value:
+// The IL offset.
+
+IL_OFFSET jitGetILoffs(IL_OFFSETX offsx)
+{
+ assert(offsx != BAD_IL_OFFSET);
+
+ switch ((int)offsx) // Need the cast since offs is unsigned and the case statements are comparing to signed.
+ {
+ case ICorDebugInfo::NO_MAPPING:
+ case ICorDebugInfo::PROLOG:
+ case ICorDebugInfo::EPILOG:
+ unreached();
+
+ default:
+ return IL_OFFSET(offsx & ~IL_OFFSETX_BITS);
+ }
+}
+
+//------------------------------------------------------------------------
+// jitGetILoffsAny: Similar to jitGetILoffs(), but passes through ICorDebugInfo
+// distinguished values. Asserts if passed BAD_IL_OFFSET.
+//
+// Arguments:
+// offsx - the IL_OFFSETX value with the IL offset to extract.
+//
+// Return Value:
+// The IL offset.
+
+IL_OFFSET jitGetILoffsAny(IL_OFFSETX offsx)
+{
+ assert(offsx != BAD_IL_OFFSET);
+
+ switch ((int)offsx) // Need the cast since offs is unsigned and the case statements are comparing to signed.
+ {
+ case ICorDebugInfo::NO_MAPPING:
+ case ICorDebugInfo::PROLOG:
+ case ICorDebugInfo::EPILOG:
+ return IL_OFFSET(offsx);
+
+ default:
+ return IL_OFFSET(offsx & ~IL_OFFSETX_BITS);
+ }
+}
+
+//------------------------------------------------------------------------
+// jitIsStackEmpty: Does the IL offset have the stack empty bit set?
+// Asserts if passed BAD_IL_OFFSET.
+//
+// Arguments:
+// offsx - the IL_OFFSETX value to check
+//
+// Return Value:
+// 'true' if the stack empty bit is set; 'false' otherwise.
+
+bool jitIsStackEmpty(IL_OFFSETX offsx)
+{
+ assert(offsx != BAD_IL_OFFSET);
+
+ switch ((int)offsx) // Need the cast since offs is unsigned and the case statements are comparing to signed.
+ {
+ case ICorDebugInfo::NO_MAPPING:
+ case ICorDebugInfo::PROLOG:
+ case ICorDebugInfo::EPILOG:
+ return true;
+
+ default:
+ return (offsx & IL_OFFSETX_STKBIT) == 0;
+ }
+}
+
+//------------------------------------------------------------------------
+// jitIsCallInstruction: Does the IL offset have the call instruction bit set?
+// Asserts if passed BAD_IL_OFFSET.
+//
+// Arguments:
+// offsx - the IL_OFFSETX value to check
+//
+// Return Value:
+// 'true' if the call instruction bit is set; 'false' otherwise.
+
+bool jitIsCallInstruction(IL_OFFSETX offsx)
+{
+ assert(offsx != BAD_IL_OFFSET);
+
+ switch ((int)offsx) // Need the cast since offs is unsigned and the case statements are comparing to signed.
+ {
+ case ICorDebugInfo::NO_MAPPING:
+ case ICorDebugInfo::PROLOG:
+ case ICorDebugInfo::EPILOG:
+ return false;
+
+ default:
+ return (offsx & IL_OFFSETX_CALLINSTRUCTIONBIT) != 0;
+ }
+}
+
+/*****************************************************************************/
+
+void CodeGen::genEnsureCodeEmitted(IL_OFFSETX offsx)
+{
+ if (!compiler->opts.compDbgCode)
+ {
+ return;
+ }
+
+ if (offsx == BAD_IL_OFFSET)
+ {
+ return;
+ }
+
+ /* If other IL were offsets reported, skip */
+
+ if (compiler->genIPmappingLast == nullptr)
+ {
+ return;
+ }
+
+ if (compiler->genIPmappingLast->ipmdILoffsx != offsx)
+ {
+ return;
+ }
+
+ /* offsx was the last reported offset. Make sure that we generated native code */
+
+ if (compiler->genIPmappingLast->ipmdNativeLoc.IsCurrentLocation(getEmitter()))
+ {
+ instGen(INS_nop);
+ }
+}
+
+/*****************************************************************************
+ *
+ * Shut down the IP-mapping logic, report the info to the EE.
+ */
+
+void CodeGen::genIPmappingGen()
+{
+ if (!compiler->opts.compDbgInfo)
+ {
+ return;
+ }
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("*************** In genIPmappingGen()\n");
+ }
+#endif
+
+ if (compiler->genIPmappingList == nullptr)
+ {
+ compiler->eeSetLIcount(0);
+ compiler->eeSetLIdone();
+ return;
+ }
+
+ Compiler::IPmappingDsc* tmpMapping;
+ Compiler::IPmappingDsc* prevMapping;
+ unsigned mappingCnt;
+ UNATIVE_OFFSET lastNativeOfs;
+
+ /* First count the number of distinct mapping records */
+
+ mappingCnt = 0;
+ lastNativeOfs = UNATIVE_OFFSET(~0);
+
+ for (prevMapping = nullptr, tmpMapping = compiler->genIPmappingList; tmpMapping != nullptr;
+ tmpMapping = tmpMapping->ipmdNext)
+ {
+ IL_OFFSETX srcIP = tmpMapping->ipmdILoffsx;
+
+ // Managed RetVal - since new sequence points are emitted to identify IL calls,
+ // make sure that those are not filtered and do not interfere with filtering of
+ // other sequence points.
+ if (jitIsCallInstruction(srcIP))
+ {
+ mappingCnt++;
+ continue;
+ }
+
+ UNATIVE_OFFSET nextNativeOfs = tmpMapping->ipmdNativeLoc.CodeOffset(getEmitter());
+
+ if (nextNativeOfs != lastNativeOfs)
+ {
+ mappingCnt++;
+ lastNativeOfs = nextNativeOfs;
+ prevMapping = tmpMapping;
+ continue;
+ }
+
+ /* If there are mappings with the same native offset, then:
+ o If one of them is NO_MAPPING, ignore it
+ o If one of them is a label, report that and ignore the other one
+ o Else report the higher IL offset
+ */
+
+ PREFIX_ASSUME(prevMapping != nullptr); // We would exit before if this was true
+ if (prevMapping->ipmdILoffsx == (IL_OFFSETX)ICorDebugInfo::NO_MAPPING)
+ {
+ // If the previous entry was NO_MAPPING, ignore it
+ prevMapping->ipmdNativeLoc.Init();
+ prevMapping = tmpMapping;
+ }
+ else if (srcIP == (IL_OFFSETX)ICorDebugInfo::NO_MAPPING)
+ {
+ // If the current entry is NO_MAPPING, ignore it
+ // Leave prevMapping unchanged as tmpMapping is no longer valid
+ tmpMapping->ipmdNativeLoc.Init();
+ }
+ else if (srcIP == (IL_OFFSETX)ICorDebugInfo::EPILOG || srcIP == 0)
+ {
+ // counting for special cases: see below
+ mappingCnt++;
+ prevMapping = tmpMapping;
+ }
+ else
+ {
+ noway_assert(prevMapping != nullptr);
+ noway_assert(!prevMapping->ipmdNativeLoc.Valid() ||
+ lastNativeOfs == prevMapping->ipmdNativeLoc.CodeOffset(getEmitter()));
+
+ /* The previous block had the same native offset. We have to
+ discard one of the mappings. Simply reinitialize ipmdNativeLoc
+ and prevMapping will be ignored later. */
+
+ if (prevMapping->ipmdIsLabel)
+ {
+ // Leave prevMapping unchanged as tmpMapping is no longer valid
+ tmpMapping->ipmdNativeLoc.Init();
+ }
+ else
+ {
+ prevMapping->ipmdNativeLoc.Init();
+ prevMapping = tmpMapping;
+ }
+ }
+ }
+
+ /* Tell them how many mapping records we've got */
+
+ compiler->eeSetLIcount(mappingCnt);
+
+ /* Now tell them about the mappings */
+
+ mappingCnt = 0;
+ lastNativeOfs = UNATIVE_OFFSET(~0);
+
+ for (tmpMapping = compiler->genIPmappingList; tmpMapping != nullptr; tmpMapping = tmpMapping->ipmdNext)
+ {
+ // Do we have to skip this record ?
+ if (!tmpMapping->ipmdNativeLoc.Valid())
+ {
+ continue;
+ }
+
+ UNATIVE_OFFSET nextNativeOfs = tmpMapping->ipmdNativeLoc.CodeOffset(getEmitter());
+ IL_OFFSETX srcIP = tmpMapping->ipmdILoffsx;
+
+ if (jitIsCallInstruction(srcIP))
+ {
+ compiler->eeSetLIinfo(mappingCnt++, nextNativeOfs, jitGetILoffs(srcIP), jitIsStackEmpty(srcIP), true);
+ }
+ else if (nextNativeOfs != lastNativeOfs)
+ {
+ compiler->eeSetLIinfo(mappingCnt++, nextNativeOfs, jitGetILoffsAny(srcIP), jitIsStackEmpty(srcIP), false);
+ lastNativeOfs = nextNativeOfs;
+ }
+ else if (srcIP == (IL_OFFSETX)ICorDebugInfo::EPILOG || srcIP == 0)
+ {
+ // For the special case of an IL instruction with no body
+ // followed by the epilog (say ret void immediately preceding
+ // the method end), we put two entries in, so that we'll stop
+ // at the (empty) ret statement if the user tries to put a
+ // breakpoint there, and then have the option of seeing the
+ // epilog or not based on SetUnmappedStopMask for the stepper.
+ compiler->eeSetLIinfo(mappingCnt++, nextNativeOfs, jitGetILoffsAny(srcIP), jitIsStackEmpty(srcIP), false);
+ }
+ }
+
+#if 0
+ // TODO-Review:
+ //This check is disabled. It is always true that any time this check asserts, the debugger would have a
+ //problem with IL source level debugging. However, for a C# file, it only matters if things are on
+ //different source lines. As a result, we have all sorts of latent problems with how we emit debug
+ //info, but very few actual ones. Whenever someone wants to tackle that problem in general, turn this
+ //assert back on.
+ if (compiler->opts.compDbgCode)
+ {
+ //Assert that the first instruction of every basic block with more than one incoming edge has a
+ //different sequence point from each incoming block.
+ //
+ //It turns out that the only thing we really have to assert is that the first statement in each basic
+ //block has an IL offset and appears in eeBoundaries.
+ for (BasicBlock * block = compiler->fgFirstBB; block != nullptr; block = block->bbNext)
+ {
+ if ((block->bbRefs > 1) && (block->bbTreeList != nullptr))
+ {
+ noway_assert(block->bbTreeList->gtOper == GT_STMT);
+ bool found = false;
+ if (block->bbTreeList->gtStmt.gtStmtILoffsx != BAD_IL_OFFSET)
+ {
+ IL_OFFSET ilOffs = jitGetILoffs(block->bbTreeList->gtStmt.gtStmtILoffsx);
+ for (unsigned i = 0; i < eeBoundariesCount; ++i)
+ {
+ if (eeBoundaries[i].ilOffset == ilOffs)
+ {
+ found = true;
+ break;
+ }
+ }
+ }
+ noway_assert(found && "A basic block that is a jump target did not start a new sequence point.");
+ }
+ }
+ }
+#endif // 0
+
+ compiler->eeSetLIdone();
+}
+
+#endif // DEBUGGING_SUPPORT
+
+/*============================================================================
+ *
+ * These are empty stubs to help the late dis-assembler to compile
+ * if DEBUGGING_SUPPORT is not enabled, or the late disassembler is being
+ * built into a non-DEBUG build.
+ *
+ *============================================================================
+ */
+
+#if defined(LATE_DISASM)
+#if !defined(DEBUGGING_SUPPORT) || !defined(DEBUG)
+
+/* virtual */
+const char* CodeGen::siRegVarName(size_t offs, size_t size, unsigned reg)
+{
+ return NULL;
+}
+
+/* virtual */
+const char* CodeGen::siStackVarName(size_t offs, size_t size, unsigned reg, unsigned stkOffs)
+{
+ return NULL;
+}
+
+/*****************************************************************************/
+#endif // !defined(DEBUGGING_SUPPORT) || !defined(DEBUG)
+#endif // defined(LATE_DISASM)
+/*****************************************************************************/
diff --git a/src/jit/codegeninterface.h b/src/jit/codegeninterface.h
new file mode 100644
index 0000000000..e9abbe6b3c
--- /dev/null
+++ b/src/jit/codegeninterface.h
@@ -0,0 +1,440 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+//
+// This file declares the types that constitute the interface between the
+// code generator (CodeGen class) and the rest of the JIT.
+//
+// RegState
+//
+// CodeGenInterface includes only the public methods that are called by
+// the Compiler.
+//
+// CodeGenContext contains the shared context between the code generator
+// and other phases of the JIT, especially the register allocator and
+// GC encoder. It is distinct from CodeGenInterface so that it can be
+// included in the Compiler object, and avoid an extra indirection when
+// accessed from members of Compiler.
+//
+
+#ifndef _CODEGEN_INTERFACE_H_
+#define _CODEGEN_INTERFACE_H_
+
+#include "regset.h"
+#include "jitgcinfo.h"
+
+// Forward reference types
+
+class CodeGenInterface;
+class emitter;
+
+// Small helper types
+
+//-------------------- Register selection ---------------------------------
+
+struct RegState
+{
+ regMaskTP rsCalleeRegArgMaskLiveIn; // mask of register arguments (live on entry to method)
+#ifdef LEGACY_BACKEND
+ unsigned rsCurRegArgNum; // current argument number (for caller)
+#endif
+ unsigned rsCalleeRegArgCount; // total number of incoming register arguments of this kind (int or float)
+ bool rsIsFloat; // true for float argument registers, false for integer argument registers
+};
+
+//-------------------- CodeGenInterface ---------------------------------
+// interface to hide the full CodeGen implementation from rest of Compiler
+
+CodeGenInterface* getCodeGenerator(Compiler* comp);
+
+class CodeGenInterface
+{
+ friend class emitter;
+
+public:
+ CodeGenInterface(Compiler* theCompiler);
+ virtual void genGenerateCode(void** codePtr, ULONG* nativeSizeOfCode) = 0;
+
+#ifndef LEGACY_BACKEND
+ // genSpillVar is called by compUpdateLifeVar in the RyuJIT backend case.
+ // TODO-Cleanup: We should handle the spill directly in CodeGen, rather than
+ // calling it from compUpdateLifeVar. Then this can be non-virtual.
+
+ virtual void genSpillVar(GenTreePtr tree) = 0;
+#endif // !LEGACY_BACKEND
+
+ //-------------------------------------------------------------------------
+ // The following property indicates whether to align loops.
+ // (Used to avoid effects of loop alignment when diagnosing perf issues.)
+ __declspec(property(get = doAlignLoops, put = setAlignLoops)) bool genAlignLoops;
+ bool doAlignLoops()
+ {
+ return m_genAlignLoops;
+ }
+ void setAlignLoops(bool value)
+ {
+ m_genAlignLoops = value;
+ }
+
+ // TODO-Cleanup: Abstract out the part of this that finds the addressing mode, and
+ // move it to Lower
+ virtual bool genCreateAddrMode(GenTreePtr addr,
+ int mode,
+ bool fold,
+ regMaskTP regMask,
+ bool* revPtr,
+ GenTreePtr* rv1Ptr,
+ GenTreePtr* rv2Ptr,
+#if SCALED_ADDR_MODES
+ unsigned* mulPtr,
+#endif
+ unsigned* cnsPtr,
+ bool nogen = false) = 0;
+
+ void genCalcFrameSize();
+
+ GCInfo gcInfo;
+
+ RegSet regSet;
+ RegState intRegState;
+ RegState floatRegState;
+
+ // TODO-Cleanup: The only reason that regTracker needs to live in CodeGenInterface is that
+ // in RegSet::rsUnspillOneReg, it needs to mark the new register as "trash"
+ RegTracker regTracker;
+
+public:
+ void trashReg(regNumber reg)
+ {
+ regTracker.rsTrackRegTrash(reg);
+ }
+
+protected:
+ Compiler* compiler;
+ bool m_genAlignLoops;
+
+private:
+ static const BYTE instInfo[INS_count];
+
+#define INST_FP 0x01 // is it a FP instruction?
+public:
+ static bool instIsFP(instruction ins);
+
+ //-------------------------------------------------------------------------
+ // Liveness-related fields & methods
+public:
+ void genUpdateRegLife(const LclVarDsc* varDsc, bool isBorn, bool isDying DEBUGARG(GenTreePtr tree));
+#ifndef LEGACY_BACKEND
+ void genUpdateVarReg(LclVarDsc* varDsc, GenTreePtr tree);
+#endif // !LEGACY_BACKEND
+
+protected:
+#ifdef DEBUG
+ VARSET_TP genTempOldLife;
+ bool genTempLiveChg;
+#endif
+
+ VARSET_TP genLastLiveSet; // A one element map (genLastLiveSet-> genLastLiveMask)
+ regMaskTP genLastLiveMask; // these two are used in genLiveMask
+
+ regMaskTP genGetRegMask(const LclVarDsc* varDsc);
+ regMaskTP genGetRegMask(GenTreePtr tree);
+
+ void genUpdateLife(GenTreePtr tree);
+ void genUpdateLife(VARSET_VALARG_TP newLife);
+
+#ifdef LEGACY_BACKEND
+ regMaskTP genLiveMask(GenTreePtr tree);
+ regMaskTP genLiveMask(VARSET_VALARG_TP liveSet);
+#endif
+
+ void genGetRegPairFromMask(regMaskTP regPairMask, regNumber* pLoReg, regNumber* pHiReg);
+
+ // The following property indicates whether the current method sets up
+ // an explicit stack frame or not.
+private:
+ PhasedVar<bool> m_cgFramePointerUsed;
+
+public:
+ bool isFramePointerUsed() const
+ {
+ return m_cgFramePointerUsed;
+ }
+ void setFramePointerUsed(bool value)
+ {
+ m_cgFramePointerUsed = value;
+ }
+ void resetFramePointerUsedWritePhase()
+ {
+ m_cgFramePointerUsed.ResetWritePhase();
+ }
+
+ // The following property indicates whether the current method requires
+ // an explicit frame. Does not prohibit double alignment of the stack.
+private:
+ PhasedVar<bool> m_cgFrameRequired;
+
+public:
+ bool isFrameRequired() const
+ {
+ return m_cgFrameRequired;
+ }
+ void setFrameRequired(bool value)
+ {
+ m_cgFrameRequired = value;
+ }
+
+public:
+ int genCallerSPtoFPdelta();
+ int genCallerSPtoInitialSPdelta();
+ int genSPtoFPdelta();
+ int genTotalFrameSize();
+
+ regNumber genGetThisArgReg(GenTreePtr call);
+
+#ifdef _TARGET_XARCH_
+#ifdef _TARGET_AMD64_
+ // There are no reloc hints on x86
+ unsigned short genAddrRelocTypeHint(size_t addr);
+#endif
+ bool genDataIndirAddrCanBeEncodedAsPCRelOffset(size_t addr);
+ bool genCodeIndirAddrCanBeEncodedAsPCRelOffset(size_t addr);
+ bool genCodeIndirAddrCanBeEncodedAsZeroRelOffset(size_t addr);
+ bool genCodeIndirAddrNeedsReloc(size_t addr);
+ bool genCodeAddrNeedsReloc(size_t addr);
+#endif
+
+ // If both isFramePointerRequired() and isFrameRequired() are false, the method is eligible
+ // for Frame-Pointer-Omission (FPO).
+
+ // The following property indicates whether the current method requires
+ // an explicit stack frame, and all arguments and locals to be
+ // accessible relative to the Frame Pointer. Prohibits double alignment
+ // of the stack.
+private:
+ PhasedVar<bool> m_cgFramePointerRequired;
+
+public:
+ bool isFramePointerRequired() const
+ {
+ return m_cgFramePointerRequired;
+ }
+ void setFramePointerRequired(bool value)
+ {
+ m_cgFramePointerRequired = value;
+ }
+ void setFramePointerRequiredEH(bool value);
+
+ void setFramePointerRequiredGCInfo(bool value)
+ {
+#ifdef JIT32_GCENCODER
+ m_cgFramePointerRequired = value;
+#endif
+ }
+
+#if DOUBLE_ALIGN
+ // The following property indicates whether we going to double-align the frame.
+ // Arguments are accessed relative to the Frame Pointer (EBP), and
+ // locals are accessed relative to the Stack Pointer (ESP).
+public:
+ bool doDoubleAlign() const
+ {
+ return m_cgDoubleAlign;
+ }
+ void setDoubleAlign(bool value)
+ {
+ m_cgDoubleAlign = value;
+ }
+ bool doubleAlignOrFramePointerUsed() const
+ {
+ return isFramePointerUsed() || doDoubleAlign();
+ }
+
+private:
+ bool m_cgDoubleAlign;
+#else // !DOUBLE_ALIGN
+public:
+ bool doubleAlignOrFramePointerUsed() const
+ {
+ return isFramePointerUsed();
+ }
+#endif // !DOUBLE_ALIGN
+
+#ifdef DEBUG
+ // The following is used to make sure the value of 'genInterruptible' isn't
+ // changed after it's been used by any logic that depends on its value.
+public:
+ bool isGCTypeFixed()
+ {
+ return genInterruptibleUsed;
+ }
+
+protected:
+ bool genInterruptibleUsed;
+#endif
+
+public:
+#if FEATURE_STACK_FP_X87
+ FlatFPStateX87 compCurFPState;
+ unsigned genFPregCnt; // count of current FP reg. vars (including dead but unpopped ones)
+
+ void SetRegVarFloat(regNumber reg, var_types type, LclVarDsc* varDsc);
+
+ void inst_FN(instruction ins, unsigned stk);
+
+ // Keeps track of the current level of the FP coprocessor stack
+ // (excluding FP reg. vars).
+ // Do not use directly, instead use the processor agnostic accessor
+ // methods below
+ //
+ unsigned genFPstkLevel;
+
+ void genResetFPstkLevel(unsigned newValue = 0);
+ unsigned genGetFPstkLevel();
+ FlatFPStateX87* FlatFPAllocFPState(FlatFPStateX87* pInitFrom = 0);
+
+ void genIncrementFPstkLevel(unsigned inc = 1);
+ void genDecrementFPstkLevel(unsigned dec = 1);
+
+ static const char* regVarNameStackFP(regNumber reg);
+
+ // FlatFPStateX87_ functions are the actual verbs to do stuff
+ // like doing a transition, loading register, etc. It's also
+ // responsible for emitting the x87 code to do so. We keep
+ // them in Compiler because we don't want to store a pointer to the
+ // emitter.
+ void FlatFPX87_MoveToTOS(FlatFPStateX87* pState, unsigned iVirtual, bool bEmitCode = true);
+ void FlatFPX87_SwapStack(FlatFPStateX87* pState, unsigned i, unsigned j, bool bEmitCode = true);
+
+#endif // FEATURE_STACK_FP_X87
+
+#ifndef LEGACY_BACKEND
+ regNumber genGetAssignedReg(GenTreePtr tree);
+#endif // !LEGACY_BACKEND
+
+#ifdef LEGACY_BACKEND
+ // Changes GT_LCL_VAR nodes to GT_REG_VAR nodes if possible.
+ bool genMarkLclVar(GenTreePtr tree);
+
+ void genBashLclVar(GenTreePtr tree, unsigned varNum, LclVarDsc* varDsc);
+#endif // LEGACY_BACKEND
+
+public:
+ unsigned InferStructOpSizeAlign(GenTreePtr op, unsigned* alignmentWB);
+ unsigned InferOpSizeAlign(GenTreePtr op, unsigned* alignmentWB);
+
+ void genMarkTreeInReg(GenTreePtr tree, regNumber reg);
+#if CPU_LONG_USES_REGPAIR
+ void genMarkTreeInRegPair(GenTreePtr tree, regPairNo regPair);
+#endif
+ // Methods to abstract target information
+
+ bool validImmForInstr(instruction ins, ssize_t val, insFlags flags = INS_FLAGS_DONT_CARE);
+ bool validDispForLdSt(ssize_t disp, var_types type);
+ bool validImmForAdd(ssize_t imm, insFlags flags);
+ bool validImmForAlu(ssize_t imm);
+ bool validImmForMov(ssize_t imm);
+ bool validImmForBL(ssize_t addr);
+
+ instruction ins_Load(var_types srcType, bool aligned = false);
+ instruction ins_Store(var_types dstType, bool aligned = false);
+ static instruction ins_FloatLoad(var_types type = TYP_DOUBLE);
+
+ // Methods for spilling - used by RegSet
+ void spillReg(var_types type, TempDsc* tmp, regNumber reg);
+ void reloadReg(var_types type, TempDsc* tmp, regNumber reg);
+ void reloadFloatReg(var_types type, TempDsc* tmp, regNumber reg);
+
+#ifdef LEGACY_BACKEND
+ void SpillFloat(regNumber reg, bool bIsCall = false);
+#endif // LEGACY_BACKEND
+
+ // The following method is used by xarch emitter for handling contained tree temps.
+ TempDsc* getSpillTempDsc(GenTree* tree);
+
+public:
+ emitter* getEmitter()
+ {
+ return m_cgEmitter;
+ }
+
+protected:
+ emitter* m_cgEmitter;
+
+#ifdef LATE_DISASM
+public:
+ DisAssembler& getDisAssembler()
+ {
+ return m_cgDisAsm;
+ }
+
+protected:
+ DisAssembler m_cgDisAsm;
+#endif // LATE_DISASM
+
+public:
+#ifdef DEBUG
+ void setVerbose(bool value)
+ {
+ verbose = value;
+ }
+ bool verbose;
+#ifdef LEGACY_BACKEND
+ // Stress mode
+ int genStressFloat();
+ regMaskTP genStressLockedMaskFloat();
+#endif // LEGACY_BACKEND
+#endif // DEBUG
+
+ // The following is set to true if we've determined that the current method
+ // is to be fully interruptible.
+ //
+public:
+ __declspec(property(get = getInterruptible, put = setInterruptible)) bool genInterruptible;
+ bool getInterruptible()
+ {
+ return m_cgInterruptible;
+ }
+ void setInterruptible(bool value)
+ {
+ m_cgInterruptible = value;
+ }
+
+private:
+ bool m_cgInterruptible;
+
+ // The following will be set to true if we've determined that we need to
+ // generate a full-blown pointer register map for the current method.
+ // Currently it is equal to (genInterruptible || !isFramePointerUsed())
+ // (i.e. We generate the full-blown map for EBP-less methods and
+ // for fully interruptible methods)
+ //
+public:
+ __declspec(property(get = doFullPtrRegMap, put = setFullPtrRegMap)) bool genFullPtrRegMap;
+ bool doFullPtrRegMap()
+ {
+ return m_cgFullPtrRegMap;
+ }
+ void setFullPtrRegMap(bool value)
+ {
+ m_cgFullPtrRegMap = value;
+ }
+
+private:
+ bool m_cgFullPtrRegMap;
+
+#ifdef DEBUGGING_SUPPORT
+public:
+ virtual void siUpdate() = 0;
+#endif // DEBUGGING_SUPPORT
+
+#ifdef LATE_DISASM
+public:
+ virtual const char* siRegVarName(size_t offs, size_t size, unsigned reg) = 0;
+
+ virtual const char* siStackVarName(size_t offs, size_t size, unsigned reg, unsigned stkOffs) = 0;
+#endif // LATE_DISASM
+};
+
+#endif // _CODEGEN_INTERFACE_H_
diff --git a/src/jit/codegenlegacy.cpp b/src/jit/codegenlegacy.cpp
new file mode 100644
index 0000000000..ea40eb2aff
--- /dev/null
+++ b/src/jit/codegenlegacy.cpp
@@ -0,0 +1,22057 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX CodeGenerator XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+#include "codegen.h"
+
+#ifdef LEGACY_BACKEND // This file is NOT used for the '!LEGACY_BACKEND' that uses the linear scan register allocator
+
+#ifdef _TARGET_AMD64_
+#error AMD64 must be !LEGACY_BACKEND
+#endif
+
+#ifdef _TARGET_ARM64_
+#error ARM64 must be !LEGACY_BACKEND
+#endif
+
+#include "gcinfo.h"
+#include "emit.h"
+
+#ifndef JIT32_GCENCODER
+#include "gcinfoencoder.h"
+#endif
+
+/*****************************************************************************
+ *
+ * Determine what variables die between beforeSet and afterSet, and
+ * update the liveness globals accordingly:
+ * compiler->compCurLife, gcInfo.gcVarPtrSetCur, regSet.rsMaskVars, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur
+ */
+
+void CodeGen::genDyingVars(VARSET_VALARG_TP beforeSet, VARSET_VALARG_TP afterSet)
+{
+ unsigned varNum;
+ LclVarDsc* varDsc;
+ regMaskTP regBit;
+ VARSET_TP VARSET_INIT_NOCOPY(deadSet, VarSetOps::Diff(compiler, beforeSet, afterSet));
+
+ if (VarSetOps::IsEmpty(compiler, deadSet))
+ return;
+
+ /* iterate through the dead variables */
+
+ VARSET_ITER_INIT(compiler, iter, deadSet, varIndex);
+ while (iter.NextElem(compiler, &varIndex))
+ {
+ varNum = compiler->lvaTrackedToVarNum[varIndex];
+ varDsc = compiler->lvaTable + varNum;
+
+ /* Remove this variable from the 'deadSet' bit set */
+
+ noway_assert(VarSetOps::IsMember(compiler, compiler->compCurLife, varIndex));
+
+ VarSetOps::RemoveElemD(compiler, compiler->compCurLife, varIndex);
+
+ noway_assert(!VarSetOps::IsMember(compiler, gcInfo.gcTrkStkPtrLcls, varIndex) ||
+ VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varIndex));
+
+ VarSetOps::RemoveElemD(compiler, gcInfo.gcVarPtrSetCur, varIndex);
+
+ /* We are done if the variable is not enregistered */
+
+ if (!varDsc->lvRegister)
+ {
+#ifdef DEBUG
+ if (compiler->verbose)
+ {
+ printf("\t\t\t\t\t\t\tV%02u,T%02u is a dyingVar\n", varNum, varDsc->lvVarIndex);
+ }
+#endif
+ continue;
+ }
+
+#if !FEATURE_FP_REGALLOC
+ // We don't do FP-enreg of vars whose liveness changes in GTF_COLON_COND
+ if (!varDsc->IsFloatRegType())
+#endif
+ {
+ /* Get hold of the appropriate register bit(s) */
+
+ if (varTypeIsFloating(varDsc->TypeGet()))
+ {
+ regBit = genRegMaskFloat(varDsc->lvRegNum, varDsc->TypeGet());
+ }
+ else
+ {
+ regBit = genRegMask(varDsc->lvRegNum);
+ if (isRegPairType(varDsc->lvType) && varDsc->lvOtherReg != REG_STK)
+ regBit |= genRegMask(varDsc->lvOtherReg);
+ }
+
+#ifdef DEBUG
+ if (compiler->verbose)
+ {
+ printf("\t\t\t\t\t\t\tV%02u,T%02u in reg %s is a dyingVar\n", varNum, varDsc->lvVarIndex,
+ compiler->compRegVarName(varDsc->lvRegNum));
+ }
+#endif
+ noway_assert((regSet.rsMaskVars & regBit) != 0);
+
+ regSet.RemoveMaskVars(regBit);
+
+ // Remove GC tracking if any for this register
+
+ if ((regBit & regSet.rsMaskUsed) == 0) // The register may be multi-used
+ gcInfo.gcMarkRegSetNpt(regBit);
+ }
+ }
+}
+
+/*****************************************************************************
+ *
+ * Change the given enregistered local variable node to a register variable node
+ */
+
+void CodeGenInterface::genBashLclVar(GenTreePtr tree, unsigned varNum, LclVarDsc* varDsc)
+{
+ noway_assert(tree->gtOper == GT_LCL_VAR);
+ noway_assert(varDsc->lvRegister);
+
+ if (isRegPairType(varDsc->lvType))
+ {
+ /* Check for the case of a variable that was narrowed to an int */
+
+ if (isRegPairType(tree->gtType))
+ {
+ genMarkTreeInRegPair(tree, gen2regs2pair(varDsc->lvRegNum, varDsc->lvOtherReg));
+ return;
+ }
+
+ noway_assert(tree->gtFlags & GTF_VAR_CAST);
+ noway_assert(tree->gtType == TYP_INT);
+ }
+ else
+ {
+ noway_assert(!isRegPairType(tree->gtType));
+ }
+
+ /* It's a register variable -- modify the node */
+
+ unsigned livenessFlags = (tree->gtFlags & GTF_LIVENESS_MASK);
+
+ ValueNumPair vnp = tree->gtVNPair; // Save the ValueNumPair
+ tree->SetOper(GT_REG_VAR);
+ tree->gtVNPair = vnp; // Preserve the ValueNumPair, as SetOper will clear it.
+
+ tree->gtFlags |= livenessFlags;
+ tree->gtFlags |= GTF_REG_VAL;
+ tree->gtRegNum = varDsc->lvRegNum;
+ tree->gtRegVar.gtRegNum = varDsc->lvRegNum;
+ tree->gtRegVar.SetLclNum(varNum);
+}
+
+// inline
+void CodeGen::saveLiveness(genLivenessSet* ls)
+{
+ VarSetOps::Assign(compiler, ls->liveSet, compiler->compCurLife);
+ VarSetOps::Assign(compiler, ls->varPtrSet, gcInfo.gcVarPtrSetCur);
+ ls->maskVars = (regMaskSmall)regSet.rsMaskVars;
+ ls->gcRefRegs = (regMaskSmall)gcInfo.gcRegGCrefSetCur;
+ ls->byRefRegs = (regMaskSmall)gcInfo.gcRegByrefSetCur;
+}
+
+// inline
+void CodeGen::restoreLiveness(genLivenessSet* ls)
+{
+ VarSetOps::Assign(compiler, compiler->compCurLife, ls->liveSet);
+ VarSetOps::Assign(compiler, gcInfo.gcVarPtrSetCur, ls->varPtrSet);
+ regSet.rsMaskVars = ls->maskVars;
+ gcInfo.gcRegGCrefSetCur = ls->gcRefRegs;
+ gcInfo.gcRegByrefSetCur = ls->byRefRegs;
+}
+
+// inline
+void CodeGen::checkLiveness(genLivenessSet* ls)
+{
+ assert(VarSetOps::Equal(compiler, compiler->compCurLife, ls->liveSet));
+ assert(VarSetOps::Equal(compiler, gcInfo.gcVarPtrSetCur, ls->varPtrSet));
+ assert(regSet.rsMaskVars == ls->maskVars);
+ assert(gcInfo.gcRegGCrefSetCur == ls->gcRefRegs);
+ assert(gcInfo.gcRegByrefSetCur == ls->byRefRegs);
+}
+
+// inline
+bool CodeGenInterface::genMarkLclVar(GenTreePtr tree)
+{
+ unsigned varNum;
+ LclVarDsc* varDsc;
+
+ assert(tree->gtOper == GT_LCL_VAR);
+
+ /* Does the variable live in a register? */
+
+ varNum = tree->gtLclVarCommon.gtLclNum;
+ assert(varNum < compiler->lvaCount);
+ varDsc = compiler->lvaTable + varNum;
+
+ if (varDsc->lvRegister)
+ {
+ genBashLclVar(tree, varNum, varDsc);
+ return true;
+ }
+ else
+ {
+ return false;
+ }
+}
+
+// inline
+GenTreePtr CodeGen::genGetAddrModeBase(GenTreePtr tree)
+{
+ bool rev;
+ unsigned mul;
+ unsigned cns;
+ GenTreePtr adr;
+ GenTreePtr idx;
+
+ if (genCreateAddrMode(tree, // address
+ 0, // mode
+ false, // fold
+ RBM_NONE, // reg mask
+ &rev, // reverse ops
+ &adr, // base addr
+ &idx, // index val
+#if SCALED_ADDR_MODES
+ &mul, // scaling
+#endif
+ &cns, // displacement
+ true)) // don't generate code
+ return adr;
+ else
+ return NULL;
+}
+
+// inline
+void CodeGen::genSinglePush()
+{
+ genStackLevel += sizeof(void*);
+}
+
+// inline
+void CodeGen::genSinglePop()
+{
+ genStackLevel -= sizeof(void*);
+}
+
+#if FEATURE_STACK_FP_X87
+// inline
+void CodeGenInterface::genResetFPstkLevel(unsigned newValue /* = 0 */)
+{
+ genFPstkLevel = newValue;
+}
+
+// inline
+unsigned CodeGenInterface::genGetFPstkLevel()
+{
+ return genFPstkLevel;
+}
+
+// inline
+void CodeGenInterface::genIncrementFPstkLevel(unsigned inc /* = 1 */)
+{
+ noway_assert((inc == 0) || genFPstkLevel + inc > genFPstkLevel);
+ genFPstkLevel += inc;
+}
+
+// inline
+void CodeGenInterface::genDecrementFPstkLevel(unsigned dec /* = 1 */)
+{
+ noway_assert((dec == 0) || genFPstkLevel - dec < genFPstkLevel);
+ genFPstkLevel -= dec;
+}
+
+#endif // FEATURE_STACK_FP_X87
+
+/*****************************************************************************
+ *
+ * Generate code that will set the given register to the integer constant.
+ */
+
+void CodeGen::genSetRegToIcon(regNumber reg, ssize_t val, var_types type, insFlags flags)
+{
+ noway_assert(type != TYP_REF || val == NULL);
+
+ /* Does the reg already hold this constant? */
+
+ if (!regTracker.rsIconIsInReg(val, reg))
+ {
+ if (val == 0)
+ {
+ instGen_Set_Reg_To_Zero(emitActualTypeSize(type), reg, flags);
+ }
+#ifdef _TARGET_ARM_
+ // If we can set a register to a constant with a small encoding, then do that.
+ else if (arm_Valid_Imm_For_Small_Mov(reg, val, flags))
+ {
+ instGen_Set_Reg_To_Imm(emitActualTypeSize(type), reg, val, flags);
+ }
+#endif
+ else
+ {
+ /* See if a register holds the value or a close value? */
+ bool constantLoaded = false;
+ ssize_t delta;
+ regNumber srcReg = regTracker.rsIconIsInReg(val, &delta);
+
+ if (srcReg != REG_NA)
+ {
+ if (delta == 0)
+ {
+ inst_RV_RV(INS_mov, reg, srcReg, type, emitActualTypeSize(type), flags);
+ constantLoaded = true;
+ }
+ else
+ {
+#if defined(_TARGET_XARCH_)
+ /* delta should fit inside a byte */
+ if (delta == (signed char)delta)
+ {
+ /* use an lea instruction to set reg */
+ getEmitter()->emitIns_R_AR(INS_lea, emitTypeSize(type), reg, srcReg, (int)delta);
+ constantLoaded = true;
+ }
+#elif defined(_TARGET_ARM_)
+ /* We found a register 'regS' that has the value we need, modulo a small delta.
+ That is, the value we need is 'regS + delta'.
+ We one to generate one of the following instructions, listed in order of preference:
+
+ adds regD, delta ; 2 bytes. if regD == regS, regD is a low register, and
+ 0<=delta<=255
+ subs regD, delta ; 2 bytes. if regD == regS, regD is a low register, and
+ -255<=delta<=0
+ adds regD, regS, delta ; 2 bytes. if regD and regS are low registers and 0<=delta<=7
+ subs regD, regS, delta ; 2 bytes. if regD and regS are low registers and -7<=delta<=0
+ mov regD, icon ; 4 bytes. icon is a wacky Thumb 12-bit immediate.
+ movw regD, icon ; 4 bytes. 0<=icon<=65535
+ add.w regD, regS, delta ; 4 bytes. delta is a wacky Thumb 12-bit immediate.
+ sub.w regD, regS, delta ; 4 bytes. delta is a wacky Thumb 12-bit immediate.
+ addw regD, regS, delta ; 4 bytes. 0<=delta<=4095
+ subw regD, regS, delta ; 4 bytes. -4095<=delta<=0
+
+ If it wasn't for the desire to generate the "mov reg,icon" forms if possible (and no bigger
+ than necessary), this would be a lot simpler. Note that we might set the overflow flag: we
+ can have regS containing the largest signed int 0x7fffffff and need the smallest signed int
+ 0x80000000. In this case, delta will be 1.
+ */
+
+ bool useAdd = false;
+ regMaskTP regMask = genRegMask(reg);
+ regMaskTP srcRegMask = genRegMask(srcReg);
+
+ if ((flags != INS_FLAGS_NOT_SET) && (reg == srcReg) && (regMask & RBM_LOW_REGS) &&
+ (unsigned_abs(delta) <= 255))
+ {
+ useAdd = true;
+ }
+ else if ((flags != INS_FLAGS_NOT_SET) && (regMask & RBM_LOW_REGS) && (srcRegMask & RBM_LOW_REGS) &&
+ (unsigned_abs(delta) <= 7))
+ {
+ useAdd = true;
+ }
+ else if (arm_Valid_Imm_For_Mov(val))
+ {
+ // fall through to general "!constantLoaded" case below
+ }
+ else if (arm_Valid_Imm_For_Add(delta, flags))
+ {
+ useAdd = true;
+ }
+
+ if (useAdd)
+ {
+ getEmitter()->emitIns_R_R_I(INS_add, EA_4BYTE, reg, srcReg, delta, flags);
+ constantLoaded = true;
+ }
+#else
+ assert(!"Codegen missing");
+#endif
+ }
+ }
+
+ if (!constantLoaded) // Have we loaded it yet?
+ {
+#ifdef _TARGET_X86_
+ if (val == -1)
+ {
+ /* or reg,-1 takes 3 bytes */
+ inst_RV_IV(INS_OR, reg, val, emitActualTypeSize(type));
+ }
+ else
+ /* For SMALL_CODE it is smaller to push a small immediate and
+ then pop it into the dest register */
+ if ((compiler->compCodeOpt() == Compiler::SMALL_CODE) && val == (signed char)val)
+ {
+ /* "mov" has no s(sign)-bit and so always takes 6 bytes,
+ whereas push+pop takes 2+1 bytes */
+
+ inst_IV(INS_push, val);
+ genSinglePush();
+
+ inst_RV(INS_pop, reg, type);
+ genSinglePop();
+ }
+ else
+#endif // _TARGET_X86_
+ {
+ instGen_Set_Reg_To_Imm(emitActualTypeSize(type), reg, val, flags);
+ }
+ }
+ }
+ }
+ regTracker.rsTrackRegIntCns(reg, val);
+ gcInfo.gcMarkRegPtrVal(reg, type);
+}
+
+/*****************************************************************************
+ *
+ * Find an existing register set to the given integer constant, or
+ * pick a register and generate code that will set it to the integer constant.
+ *
+ * If no existing register is set to the constant, it will use regSet.rsPickReg(regBest)
+ * to pick some register to set. NOTE that this means the returned regNumber
+ * might *not* be in regBest. It also implies that you should lock any registers
+ * you don't want spilled (not just mark as used).
+ *
+ */
+
+regNumber CodeGen::genGetRegSetToIcon(ssize_t val, regMaskTP regBest /* = 0 */, var_types type /* = TYP_INT */)
+{
+ regNumber regCns;
+#if REDUNDANT_LOAD
+
+ // Is there already a register with zero that we can use?
+ regCns = regTracker.rsIconIsInReg(val);
+
+ if (regCns == REG_NA)
+#endif
+ {
+ // If not, grab a register to hold the constant, preferring
+ // any register besides RBM_TMP_0 so it can hopefully be re-used
+ regCns = regSet.rsPickReg(regBest, regBest & ~RBM_TMP_0);
+
+ // Now set the constant
+ genSetRegToIcon(regCns, val, type);
+ }
+
+ // NOTE: there is guarantee that regCns is in regBest's mask
+ return regCns;
+}
+
+/*****************************************************************************/
+/*****************************************************************************
+ *
+ * Add the given constant to the specified register.
+ * 'tree' is the resulting tree
+ */
+
+void CodeGen::genIncRegBy(regNumber reg, ssize_t ival, GenTreePtr tree, var_types dstType, bool ovfl)
+{
+ bool setFlags = (tree != NULL) && tree->gtSetFlags();
+
+#ifdef _TARGET_XARCH_
+ /* First check to see if we can generate inc or dec instruction(s) */
+ /* But avoid inc/dec on P4 in general for fast code or inside loops for blended code */
+ if (!ovfl && !compiler->optAvoidIncDec(compiler->compCurBB->getBBWeight(compiler)))
+ {
+ emitAttr size = emitTypeSize(dstType);
+
+ switch (ival)
+ {
+ case 2:
+ inst_RV(INS_inc, reg, dstType, size);
+ __fallthrough;
+ case 1:
+ inst_RV(INS_inc, reg, dstType, size);
+
+ goto UPDATE_LIVENESS;
+
+ case -2:
+ inst_RV(INS_dec, reg, dstType, size);
+ __fallthrough;
+ case -1:
+ inst_RV(INS_dec, reg, dstType, size);
+
+ goto UPDATE_LIVENESS;
+ }
+ }
+#endif
+
+ insFlags flags = setFlags ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
+ inst_RV_IV(INS_add, reg, ival, emitActualTypeSize(dstType), flags);
+
+#ifdef _TARGET_XARCH_
+UPDATE_LIVENESS:
+#endif
+
+ if (setFlags)
+ genFlagsEqualToReg(tree, reg);
+
+ regTracker.rsTrackRegTrash(reg);
+
+ gcInfo.gcMarkRegSetNpt(genRegMask(reg));
+
+ if (tree != NULL)
+ {
+ if (!tree->OperIsAssignment())
+ {
+ genMarkTreeInReg(tree, reg);
+ if (varTypeIsGC(tree->TypeGet()))
+ gcInfo.gcMarkRegSetByref(genRegMask(reg));
+ }
+ }
+}
+
+/*****************************************************************************
+ *
+ * Subtract the given constant from the specified register.
+ * Should only be used for unsigned sub with overflow. Else
+ * genIncRegBy() can be used using -ival. We shouldn't use genIncRegBy()
+ * for these cases as the flags are set differently, and the following
+ * check for overflow won't work correctly.
+ * 'tree' is the resulting tree.
+ */
+
+void CodeGen::genDecRegBy(regNumber reg, ssize_t ival, GenTreePtr tree)
+{
+ noway_assert((tree->gtFlags & GTF_OVERFLOW) &&
+ ((tree->gtFlags & GTF_UNSIGNED) || ival == ((tree->gtType == TYP_INT) ? INT32_MIN : SSIZE_T_MIN)));
+ noway_assert(tree->gtType == TYP_INT || tree->gtType == TYP_I_IMPL);
+
+ regTracker.rsTrackRegTrash(reg);
+
+ noway_assert(!varTypeIsGC(tree->TypeGet()));
+ gcInfo.gcMarkRegSetNpt(genRegMask(reg));
+
+ insFlags flags = tree->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
+ inst_RV_IV(INS_sub, reg, ival, emitActualTypeSize(tree->TypeGet()), flags);
+
+ if (tree->gtSetFlags())
+ genFlagsEqualToReg(tree, reg);
+
+ if (tree)
+ {
+ genMarkTreeInReg(tree, reg);
+ }
+}
+
+/*****************************************************************************
+ *
+ * Multiply the specified register by the given value.
+ * 'tree' is the resulting tree
+ */
+
+void CodeGen::genMulRegBy(regNumber reg, ssize_t ival, GenTreePtr tree, var_types dstType, bool ovfl)
+{
+ noway_assert(genActualType(dstType) == TYP_INT || genActualType(dstType) == TYP_I_IMPL);
+
+ regTracker.rsTrackRegTrash(reg);
+
+ if (tree)
+ {
+ genMarkTreeInReg(tree, reg);
+ }
+
+ bool use_shift = false;
+ unsigned shift_by = 0;
+
+ if ((dstType >= TYP_INT) && !ovfl && (ival > 0) && ((ival & (ival - 1)) == 0))
+ {
+ use_shift = true;
+ BitScanForwardPtr((ULONG*)&shift_by, (ULONG)ival);
+ }
+
+ if (use_shift)
+ {
+ if (shift_by != 0)
+ {
+ insFlags flags = tree->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
+ inst_RV_SH(INS_SHIFT_LEFT_LOGICAL, emitTypeSize(dstType), reg, shift_by, flags);
+ if (tree->gtSetFlags())
+ genFlagsEqualToReg(tree, reg);
+ }
+ }
+ else
+ {
+ instruction ins;
+#ifdef _TARGET_XARCH_
+ ins = getEmitter()->inst3opImulForReg(reg);
+#else
+ ins = INS_mul;
+#endif
+
+ inst_RV_IV(ins, reg, ival, emitActualTypeSize(dstType));
+ }
+}
+
+/*****************************************************************************/
+/*****************************************************************************/
+/*****************************************************************************
+ *
+ * Compute the value 'tree' into a register that's in 'needReg'
+ * (or any free register if 'needReg' is RBM_NONE).
+ *
+ * Note that 'needReg' is just a recommendation unless mustReg==RegSet::EXACT_REG.
+ * If keepReg==RegSet::KEEP_REG, we mark the register as being used.
+ *
+ * If you require that the register returned is trashable, pass true for 'freeOnly'.
+ */
+
+void CodeGen::genComputeReg(
+ GenTreePtr tree, regMaskTP needReg, RegSet::ExactReg mustReg, RegSet::KeepReg keepReg, bool freeOnly)
+{
+ noway_assert(tree->gtType != TYP_VOID);
+
+ regNumber reg;
+ regNumber rg2;
+
+#if FEATURE_STACK_FP_X87
+ noway_assert(genActualType(tree->gtType) == TYP_INT || genActualType(tree->gtType) == TYP_I_IMPL ||
+ genActualType(tree->gtType) == TYP_REF || tree->gtType == TYP_BYREF);
+#elif defined(_TARGET_ARM_)
+ noway_assert(genActualType(tree->gtType) == TYP_INT || genActualType(tree->gtType) == TYP_I_IMPL ||
+ genActualType(tree->gtType) == TYP_REF || tree->gtType == TYP_BYREF ||
+ genActualType(tree->gtType) == TYP_FLOAT || genActualType(tree->gtType) == TYP_DOUBLE ||
+ genActualType(tree->gtType) == TYP_STRUCT);
+#else
+ noway_assert(genActualType(tree->gtType) == TYP_INT || genActualType(tree->gtType) == TYP_I_IMPL ||
+ genActualType(tree->gtType) == TYP_REF || tree->gtType == TYP_BYREF ||
+ genActualType(tree->gtType) == TYP_FLOAT || genActualType(tree->gtType) == TYP_DOUBLE);
+#endif
+
+ /* Generate the value, hopefully into the right register */
+
+ genCodeForTree(tree, needReg);
+ noway_assert(tree->gtFlags & GTF_REG_VAL);
+
+ // There is a workaround in genCodeForTreeLng() that changes the type of the
+ // tree of a GT_MUL with 64 bit result to TYP_INT from TYP_LONG, then calls
+ // genComputeReg(). genCodeForTree(), above, will put the result in gtRegPair for ARM,
+ // or leave it in EAX/EDX for x86, but only set EAX as gtRegNum. There's no point
+ // running the rest of this code, because anything looking at gtRegNum on ARM or
+ // attempting to move from EAX/EDX will be wrong.
+ if ((tree->OperGet() == GT_MUL) && (tree->gtFlags & GTF_MUL_64RSLT))
+ goto REG_OK;
+
+ reg = tree->gtRegNum;
+
+ /* Did the value end up in an acceptable register? */
+
+ if ((mustReg == RegSet::EXACT_REG) && needReg && !(genRegMask(reg) & needReg))
+ {
+ /* Not good enough to satisfy the caller's orders */
+
+ if (varTypeIsFloating(tree))
+ {
+ RegSet::RegisterPreference pref(needReg, RBM_NONE);
+ rg2 = regSet.PickRegFloat(tree->TypeGet(), &pref);
+ }
+ else
+ {
+ rg2 = regSet.rsGrabReg(needReg);
+ }
+ }
+ else
+ {
+ /* Do we have to end up with a free register? */
+
+ if (!freeOnly)
+ goto REG_OK;
+
+ /* Did we luck out and the value got computed into an unused reg? */
+
+ if (genRegMask(reg) & regSet.rsRegMaskFree())
+ goto REG_OK;
+
+ /* Register already in use, so spill previous value */
+
+ if ((mustReg == RegSet::EXACT_REG) && needReg && (genRegMask(reg) & needReg))
+ {
+ rg2 = regSet.rsGrabReg(needReg);
+ if (rg2 == reg)
+ {
+ gcInfo.gcMarkRegPtrVal(reg, tree->TypeGet());
+ tree->gtRegNum = reg;
+ goto REG_OK;
+ }
+ }
+ else
+ {
+ /* OK, let's find a trashable home for the value */
+
+ regMaskTP rv1RegUsed;
+
+ regSet.rsLockReg(genRegMask(reg), &rv1RegUsed);
+ rg2 = regSet.rsPickReg(needReg);
+ regSet.rsUnlockReg(genRegMask(reg), rv1RegUsed);
+ }
+ }
+
+ noway_assert(reg != rg2);
+
+ /* Update the value in the target register */
+
+ regTracker.rsTrackRegCopy(rg2, reg);
+
+ inst_RV_RV(ins_Copy(tree->TypeGet()), rg2, reg, tree->TypeGet());
+
+ /* The value has been transferred to 'reg' */
+
+ if ((genRegMask(reg) & regSet.rsMaskUsed) == 0)
+ gcInfo.gcMarkRegSetNpt(genRegMask(reg));
+
+ gcInfo.gcMarkRegPtrVal(rg2, tree->TypeGet());
+
+ /* The value is now in an appropriate register */
+
+ tree->gtRegNum = rg2;
+
+REG_OK:
+
+ /* Does the caller want us to mark the register as used? */
+
+ if (keepReg == RegSet::KEEP_REG)
+ {
+ /* In case we're computing a value into a register variable */
+
+ genUpdateLife(tree);
+
+ /* Mark the register as 'used' */
+
+ regSet.rsMarkRegUsed(tree);
+ }
+}
+
+/*****************************************************************************
+ *
+ * Same as genComputeReg(), the only difference being that the result is
+ * guaranteed to end up in a trashable register.
+ */
+
+// inline
+void CodeGen::genCompIntoFreeReg(GenTreePtr tree, regMaskTP needReg, RegSet::KeepReg keepReg)
+{
+ genComputeReg(tree, needReg, RegSet::ANY_REG, keepReg, true);
+}
+
+/*****************************************************************************
+ *
+ * The value 'tree' was earlier computed into a register; free up that
+ * register (but also make sure the value is presently in a register).
+ */
+
+void CodeGen::genReleaseReg(GenTreePtr tree)
+{
+ if (tree->gtFlags & GTF_SPILLED)
+ {
+ /* The register has been spilled -- reload it */
+
+ regSet.rsUnspillReg(tree, 0, RegSet::FREE_REG);
+ return;
+ }
+
+ regSet.rsMarkRegFree(genRegMask(tree->gtRegNum));
+}
+
+/*****************************************************************************
+ *
+ * The value 'tree' was earlier computed into a register. Check whether that
+ * register has been spilled (and reload it if so), and if 'keepReg' is RegSet::FREE_REG,
+ * free the register. The caller shouldn't need to be setting GCness of the register
+ * where tree will be recovered to, so we disallow keepReg==RegSet::FREE_REG for GC type trees.
+ */
+
+void CodeGen::genRecoverReg(GenTreePtr tree, regMaskTP needReg, RegSet::KeepReg keepReg)
+{
+ if (tree->gtFlags & GTF_SPILLED)
+ {
+ /* The register has been spilled -- reload it */
+
+ regSet.rsUnspillReg(tree, needReg, keepReg);
+ return;
+ }
+ else if (needReg && (needReg & genRegMask(tree->gtRegNum)) == 0)
+ {
+ /* We need the tree in another register. So move it there */
+
+ noway_assert(tree->gtFlags & GTF_REG_VAL);
+ regNumber oldReg = tree->gtRegNum;
+
+ /* Pick an acceptable register */
+
+ regNumber reg = regSet.rsGrabReg(needReg);
+
+ /* Copy the value */
+
+ inst_RV_RV(INS_mov, reg, oldReg, tree->TypeGet());
+ tree->gtRegNum = reg;
+
+ gcInfo.gcMarkRegPtrVal(tree);
+ regSet.rsMarkRegUsed(tree);
+ regSet.rsMarkRegFree(oldReg, tree);
+
+ regTracker.rsTrackRegCopy(reg, oldReg);
+ }
+
+ /* Free the register if the caller desired so */
+
+ if (keepReg == RegSet::FREE_REG)
+ {
+ regSet.rsMarkRegFree(genRegMask(tree->gtRegNum));
+ // Can't use RegSet::FREE_REG on a GC type
+ noway_assert(!varTypeIsGC(tree->gtType));
+ }
+ else
+ {
+ noway_assert(regSet.rsMaskUsed & genRegMask(tree->gtRegNum));
+ }
+}
+
+/*****************************************************************************
+ *
+ * Move one half of a register pair to its new regPair(half).
+ */
+
+// inline
+void CodeGen::genMoveRegPairHalf(GenTreePtr tree, regNumber dst, regNumber src, int off)
+{
+ if (src == REG_STK)
+ {
+ // handle long to unsigned long overflow casts
+ while (tree->gtOper == GT_CAST)
+ {
+ noway_assert(tree->gtType == TYP_LONG);
+ tree = tree->gtCast.CastOp();
+ }
+ noway_assert(tree->gtEffectiveVal()->gtOper == GT_LCL_VAR);
+ noway_assert(tree->gtType == TYP_LONG);
+ inst_RV_TT(ins_Load(TYP_INT), dst, tree, off);
+ regTracker.rsTrackRegTrash(dst);
+ }
+ else
+ {
+ regTracker.rsTrackRegCopy(dst, src);
+ inst_RV_RV(INS_mov, dst, src, TYP_INT);
+ }
+}
+
+/*****************************************************************************
+ *
+ * The given long value is in a register pair, but it's not an acceptable
+ * one. We have to move the value into a register pair in 'needReg' (if
+ * non-zero) or the pair 'newPair' (when 'newPair != REG_PAIR_NONE').
+ *
+ * Important note: if 'needReg' is non-zero, we assume the current pair
+ * has not been marked as free. If, OTOH, 'newPair' is specified, we
+ * assume that the current register pair is marked as used and free it.
+ */
+
+void CodeGen::genMoveRegPair(GenTreePtr tree, regMaskTP needReg, regPairNo newPair)
+{
+ regPairNo oldPair;
+
+ regNumber oldLo;
+ regNumber oldHi;
+ regNumber newLo;
+ regNumber newHi;
+
+ /* Either a target set or a specific pair may be requested */
+
+ noway_assert((needReg != 0) != (newPair != REG_PAIR_NONE));
+
+ /* Get hold of the current pair */
+
+ oldPair = tree->gtRegPair;
+ noway_assert(oldPair != newPair);
+
+ /* Are we supposed to move to a specific pair? */
+
+ if (newPair != REG_PAIR_NONE)
+ {
+ regMaskTP oldMask = genRegPairMask(oldPair);
+ regMaskTP loMask = genRegMask(genRegPairLo(newPair));
+ regMaskTP hiMask = genRegMask(genRegPairHi(newPair));
+ regMaskTP overlap = oldMask & (loMask | hiMask);
+
+ /* First lock any registers that are in both pairs */
+
+ noway_assert((regSet.rsMaskUsed & overlap) == overlap);
+ noway_assert((regSet.rsMaskLock & overlap) == 0);
+ regSet.rsMaskLock |= overlap;
+
+ /* Make sure any additional registers we need are free */
+
+ if ((loMask & regSet.rsMaskUsed) != 0 && (loMask & oldMask) == 0)
+ {
+ regSet.rsGrabReg(loMask);
+ }
+
+ if ((hiMask & regSet.rsMaskUsed) != 0 && (hiMask & oldMask) == 0)
+ {
+ regSet.rsGrabReg(hiMask);
+ }
+
+ /* Unlock those registers we have temporarily locked */
+
+ noway_assert((regSet.rsMaskUsed & overlap) == overlap);
+ noway_assert((regSet.rsMaskLock & overlap) == overlap);
+ regSet.rsMaskLock -= overlap;
+
+ /* We can now free the old pair */
+
+ regSet.rsMarkRegFree(oldMask);
+ }
+ else
+ {
+ /* Pick the new pair based on the caller's stated preference */
+
+ newPair = regSet.rsGrabRegPair(needReg);
+ }
+
+ // If grabbed pair is the same as old one we're done
+ if (newPair == oldPair)
+ {
+ noway_assert((oldLo = genRegPairLo(oldPair), oldHi = genRegPairHi(oldPair), newLo = genRegPairLo(newPair),
+ newHi = genRegPairHi(newPair), newLo != REG_STK && newHi != REG_STK));
+ return;
+ }
+
+ /* Move the values from the old pair into the new one */
+
+ oldLo = genRegPairLo(oldPair);
+ oldHi = genRegPairHi(oldPair);
+ newLo = genRegPairLo(newPair);
+ newHi = genRegPairHi(newPair);
+
+ noway_assert(newLo != REG_STK && newHi != REG_STK);
+
+ /* Careful - the register pairs might overlap */
+
+ if (newLo == oldLo)
+ {
+ /* The low registers are identical, just move the upper half */
+
+ noway_assert(newHi != oldHi);
+ genMoveRegPairHalf(tree, newHi, oldHi, sizeof(int));
+ }
+ else
+ {
+ /* The low registers are different, are the upper ones the same? */
+
+ if (newHi == oldHi)
+ {
+ /* Just move the lower half, then */
+ genMoveRegPairHalf(tree, newLo, oldLo, 0);
+ }
+ else
+ {
+ /* Both sets are different - is there an overlap? */
+
+ if (newLo == oldHi)
+ {
+ /* Are high and low simply swapped ? */
+
+ if (newHi == oldLo)
+ {
+#ifdef _TARGET_ARM_
+ /* Let's use XOR swap to reduce register pressure. */
+ inst_RV_RV(INS_eor, oldLo, oldHi);
+ inst_RV_RV(INS_eor, oldHi, oldLo);
+ inst_RV_RV(INS_eor, oldLo, oldHi);
+#else
+ inst_RV_RV(INS_xchg, oldHi, oldLo);
+#endif
+ regTracker.rsTrackRegSwap(oldHi, oldLo);
+ }
+ else
+ {
+ /* New lower == old higher, so move higher half first */
+
+ noway_assert(newHi != oldLo);
+ genMoveRegPairHalf(tree, newHi, oldHi, sizeof(int));
+ genMoveRegPairHalf(tree, newLo, oldLo, 0);
+ }
+ }
+ else
+ {
+ /* Move lower half first */
+ genMoveRegPairHalf(tree, newLo, oldLo, 0);
+ genMoveRegPairHalf(tree, newHi, oldHi, sizeof(int));
+ }
+ }
+ }
+
+ /* Record the fact that we're switching to another pair */
+
+ tree->gtRegPair = newPair;
+}
+
+/*****************************************************************************
+ *
+ * Compute the value 'tree' into the register pair specified by 'needRegPair'
+ * if 'needRegPair' is REG_PAIR_NONE then use any free register pair, avoid
+ * those in avoidReg.
+ * If 'keepReg' is set to RegSet::KEEP_REG then we mark both registers that the
+ * value ends up in as being used.
+ */
+
+void CodeGen::genComputeRegPair(
+ GenTreePtr tree, regPairNo needRegPair, regMaskTP avoidReg, RegSet::KeepReg keepReg, bool freeOnly)
+{
+ regMaskTP regMask;
+ regPairNo regPair;
+ regMaskTP tmpMask;
+ regMaskTP tmpUsedMask;
+ regNumber rLo;
+ regNumber rHi;
+
+ noway_assert(isRegPairType(tree->gtType));
+
+ if (needRegPair == REG_PAIR_NONE)
+ {
+ if (freeOnly)
+ {
+ regMask = regSet.rsRegMaskFree() & ~avoidReg;
+ if (genMaxOneBit(regMask))
+ regMask = regSet.rsRegMaskFree();
+ }
+ else
+ {
+ regMask = RBM_ALLINT & ~avoidReg;
+ }
+
+ if (genMaxOneBit(regMask))
+ regMask = regSet.rsRegMaskCanGrab();
+ }
+ else
+ {
+ regMask = genRegPairMask(needRegPair);
+ }
+
+ /* Generate the value, hopefully into the right register pair */
+
+ genCodeForTreeLng(tree, regMask, avoidReg);
+
+ noway_assert(tree->gtFlags & GTF_REG_VAL);
+
+ regPair = tree->gtRegPair;
+ tmpMask = genRegPairMask(regPair);
+
+ rLo = genRegPairLo(regPair);
+ rHi = genRegPairHi(regPair);
+
+ /* At least one half is in a real register */
+
+ noway_assert(rLo != REG_STK || rHi != REG_STK);
+
+ /* Did the value end up in an acceptable register pair? */
+
+ if (needRegPair != REG_PAIR_NONE)
+ {
+ if (needRegPair != regPair)
+ {
+ /* This is a workaround. If we specify a regPair for genMoveRegPair */
+ /* it expects the source pair being marked as used */
+ regSet.rsMarkRegPairUsed(tree);
+ genMoveRegPair(tree, 0, needRegPair);
+ }
+ }
+ else if (freeOnly)
+ {
+ /* Do we have to end up with a free register pair?
+ Something might have gotten freed up above */
+ bool mustMoveReg = false;
+
+ regMask = regSet.rsRegMaskFree() & ~avoidReg;
+
+ if (genMaxOneBit(regMask))
+ regMask = regSet.rsRegMaskFree();
+
+ if ((tmpMask & regMask) != tmpMask || rLo == REG_STK || rHi == REG_STK)
+ {
+ /* Note that we must call genMoveRegPair if one of our registers
+ comes from the used mask, so that it will be properly spilled. */
+
+ mustMoveReg = true;
+ }
+
+ if (genMaxOneBit(regMask))
+ regMask |= regSet.rsRegMaskCanGrab() & ~avoidReg;
+
+ if (genMaxOneBit(regMask))
+ regMask |= regSet.rsRegMaskCanGrab();
+
+ /* Did the value end up in a free register pair? */
+
+ if (mustMoveReg)
+ {
+ /* We'll have to move the value to a free (trashable) pair */
+ genMoveRegPair(tree, regMask, REG_PAIR_NONE);
+ }
+ }
+ else
+ {
+ noway_assert(needRegPair == REG_PAIR_NONE);
+ noway_assert(!freeOnly);
+
+ /* it is possible to have tmpMask also in the regSet.rsMaskUsed */
+ tmpUsedMask = tmpMask & regSet.rsMaskUsed;
+ tmpMask &= ~regSet.rsMaskUsed;
+
+ /* Make sure that the value is in "real" registers*/
+ if (rLo == REG_STK)
+ {
+ /* Get one of the desired registers, but exclude rHi */
+
+ regSet.rsLockReg(tmpMask);
+ regSet.rsLockUsedReg(tmpUsedMask);
+
+ regNumber reg = regSet.rsPickReg(regMask);
+
+ regSet.rsUnlockUsedReg(tmpUsedMask);
+ regSet.rsUnlockReg(tmpMask);
+
+ inst_RV_TT(ins_Load(TYP_INT), reg, tree, 0);
+
+ tree->gtRegPair = gen2regs2pair(reg, rHi);
+
+ regTracker.rsTrackRegTrash(reg);
+ gcInfo.gcMarkRegSetNpt(genRegMask(reg));
+ }
+ else if (rHi == REG_STK)
+ {
+ /* Get one of the desired registers, but exclude rLo */
+
+ regSet.rsLockReg(tmpMask);
+ regSet.rsLockUsedReg(tmpUsedMask);
+
+ regNumber reg = regSet.rsPickReg(regMask);
+
+ regSet.rsUnlockUsedReg(tmpUsedMask);
+ regSet.rsUnlockReg(tmpMask);
+
+ inst_RV_TT(ins_Load(TYP_INT), reg, tree, 4);
+
+ tree->gtRegPair = gen2regs2pair(rLo, reg);
+
+ regTracker.rsTrackRegTrash(reg);
+ gcInfo.gcMarkRegSetNpt(genRegMask(reg));
+ }
+ }
+
+ /* Does the caller want us to mark the register as used? */
+
+ if (keepReg == RegSet::KEEP_REG)
+ {
+ /* In case we're computing a value into a register variable */
+
+ genUpdateLife(tree);
+
+ /* Mark the register as 'used' */
+
+ regSet.rsMarkRegPairUsed(tree);
+ }
+}
+
+/*****************************************************************************
+ *
+ * Same as genComputeRegPair(), the only difference being that the result
+ * is guaranteed to end up in a trashable register pair.
+ */
+
+// inline
+void CodeGen::genCompIntoFreeRegPair(GenTreePtr tree, regMaskTP avoidReg, RegSet::KeepReg keepReg)
+{
+ genComputeRegPair(tree, REG_PAIR_NONE, avoidReg, keepReg, true);
+}
+
+/*****************************************************************************
+ *
+ * The value 'tree' was earlier computed into a register pair; free up that
+ * register pair (but also make sure the value is presently in a register
+ * pair).
+ */
+
+void CodeGen::genReleaseRegPair(GenTreePtr tree)
+{
+ if (tree->gtFlags & GTF_SPILLED)
+ {
+ /* The register has been spilled -- reload it */
+
+ regSet.rsUnspillRegPair(tree, 0, RegSet::FREE_REG);
+ return;
+ }
+
+ regSet.rsMarkRegFree(genRegPairMask(tree->gtRegPair));
+}
+
+/*****************************************************************************
+ *
+ * The value 'tree' was earlier computed into a register pair. Check whether
+ * either register of that pair has been spilled (and reload it if so), and
+ * if 'keepReg' is 0, free the register pair.
+ */
+
+void CodeGen::genRecoverRegPair(GenTreePtr tree, regPairNo regPair, RegSet::KeepReg keepReg)
+{
+ if (tree->gtFlags & GTF_SPILLED)
+ {
+ regMaskTP regMask;
+
+ if (regPair == REG_PAIR_NONE)
+ regMask = RBM_NONE;
+ else
+ regMask = genRegPairMask(regPair);
+
+ /* The register pair has been spilled -- reload it */
+
+ regSet.rsUnspillRegPair(tree, regMask, RegSet::KEEP_REG);
+ }
+
+ /* Does the caller insist on the value being in a specific place? */
+
+ if (regPair != REG_PAIR_NONE && regPair != tree->gtRegPair)
+ {
+ /* No good -- we'll have to move the value to a new place */
+
+ genMoveRegPair(tree, 0, regPair);
+
+ /* Mark the pair as used if appropriate */
+
+ if (keepReg == RegSet::KEEP_REG)
+ regSet.rsMarkRegPairUsed(tree);
+
+ return;
+ }
+
+ /* Free the register pair if the caller desired so */
+
+ if (keepReg == RegSet::FREE_REG)
+ regSet.rsMarkRegFree(genRegPairMask(tree->gtRegPair));
+}
+
+/*****************************************************************************
+ *
+ * Compute the given long value into the specified register pair; don't mark
+ * the register pair as used.
+ */
+
+// inline
+void CodeGen::genEvalIntoFreeRegPair(GenTreePtr tree, regPairNo regPair, regMaskTP avoidReg)
+{
+ genComputeRegPair(tree, regPair, avoidReg, RegSet::KEEP_REG);
+ genRecoverRegPair(tree, regPair, RegSet::FREE_REG);
+}
+
+/*****************************************************************************
+ * This helper makes sure that the regpair target of an assignment is
+ * available for use. This needs to be called in genCodeForTreeLng just before
+ * a long assignment, but must not be called until everything has been
+ * evaluated, or else we might try to spill enregistered variables.
+ *
+ */
+
+// inline
+void CodeGen::genMakeRegPairAvailable(regPairNo regPair)
+{
+ /* Make sure the target of the store is available */
+
+ regNumber regLo = genRegPairLo(regPair);
+ regNumber regHi = genRegPairHi(regPair);
+
+ if ((regHi != REG_STK) && (regSet.rsMaskUsed & genRegMask(regHi)))
+ regSet.rsSpillReg(regHi);
+
+ if ((regLo != REG_STK) && (regSet.rsMaskUsed & genRegMask(regLo)))
+ regSet.rsSpillReg(regLo);
+}
+
+/*****************************************************************************/
+/*****************************************************************************
+ *
+ * Return true if the given tree 'addr' can be computed via an addressing mode,
+ * such as "[ebx+esi*4+20]". If the expression isn't an address mode already
+ * try to make it so (but we don't try 'too hard' to accomplish this).
+ *
+ * If we end up needing a register (or two registers) to hold some part(s) of the
+ * address, we return the use register mask via '*useMaskPtr'.
+ *
+ * If keepReg==RegSet::KEEP_REG, the registers (viz. *useMaskPtr) will be marked as
+ * in use. The caller would then be responsible for calling
+ * regSet.rsMarkRegFree(*useMaskPtr).
+ *
+ * If keepReg==RegSet::FREE_REG, then the caller needs update the GC-tracking by
+ * calling genDoneAddressable(addr, *useMaskPtr, RegSet::FREE_REG);
+ */
+
+bool CodeGen::genMakeIndAddrMode(GenTreePtr addr,
+ GenTreePtr oper,
+ bool forLea,
+ regMaskTP regMask,
+ RegSet::KeepReg keepReg,
+ regMaskTP* useMaskPtr,
+ bool deferOK)
+{
+ if (addr->gtOper == GT_ARR_ELEM)
+ {
+ regMaskTP regs = genMakeAddrArrElem(addr, oper, RBM_ALLINT, keepReg);
+ *useMaskPtr = regs;
+ return true;
+ }
+
+ bool rev;
+ GenTreePtr rv1;
+ GenTreePtr rv2;
+ bool operIsArrIndex; // is oper an array index
+ GenTreePtr scaledIndex; // If scaled addressing mode can't be used
+
+ regMaskTP anyMask = RBM_ALLINT;
+
+ unsigned cns;
+ unsigned mul;
+
+ GenTreePtr tmp;
+ int ixv = INT_MAX; // unset value
+
+ GenTreePtr scaledIndexVal;
+
+ regMaskTP newLiveMask;
+ regMaskTP rv1Mask;
+ regMaskTP rv2Mask;
+
+ /* Deferred address mode forming NYI for x86 */
+
+ noway_assert(deferOK == false);
+
+ noway_assert(oper == NULL ||
+ ((oper->OperIsIndir() || oper->OperIsAtomicOp()) &&
+ ((oper->gtOper == GT_CMPXCHG && oper->gtCmpXchg.gtOpLocation == addr) || oper->gtOp.gtOp1 == addr)));
+ operIsArrIndex = (oper != nullptr && oper->OperGet() == GT_IND && (oper->gtFlags & GTF_IND_ARR_INDEX) != 0);
+
+ if (addr->gtOper == GT_LEA)
+ {
+ rev = (addr->gtFlags & GTF_REVERSE_OPS) != 0;
+ GenTreeAddrMode* lea = addr->AsAddrMode();
+ rv1 = lea->Base();
+ rv2 = lea->Index();
+ mul = lea->gtScale;
+ cns = lea->gtOffset;
+
+ if (rv1 != NULL && rv2 == NULL && cns == 0 && (rv1->gtFlags & GTF_REG_VAL) != 0)
+ {
+ scaledIndex = NULL;
+ goto YES;
+ }
+ }
+ else
+ {
+ // NOTE: FOR NOW THIS ISN'T APPROPRIATELY INDENTED - THIS IS TO MAKE IT
+ // EASIER TO MERGE
+
+ /* Is the complete address already sitting in a register? */
+
+ if ((addr->gtFlags & GTF_REG_VAL) || (addr->gtOper == GT_LCL_VAR && genMarkLclVar(addr)))
+ {
+ genUpdateLife(addr);
+
+ rv1 = addr;
+ rv2 = scaledIndex = 0;
+ cns = 0;
+
+ goto YES;
+ }
+
+ /* Is it an absolute address */
+
+ if (addr->IsCnsIntOrI())
+ {
+ rv1 = rv2 = scaledIndex = 0;
+ // along this code path cns is never used, so place a BOGUS value in it as proof
+ // cns = addr->gtIntCon.gtIconVal;
+ cns = UINT_MAX;
+
+ goto YES;
+ }
+
+ /* Is there a chance of forming an address mode? */
+
+ if (!genCreateAddrMode(addr, forLea ? 1 : 0, false, regMask, &rev, &rv1, &rv2, &mul, &cns))
+ {
+ /* This better not be an array index */
+ noway_assert(!operIsArrIndex);
+
+ return false;
+ }
+ // THIS IS THE END OF THE INAPPROPRIATELY INDENTED SECTION
+ }
+
+ /* For scaled array access, RV2 may not be pointing to the index of the
+ array if the CPU does not support the needed scaling factor. We will
+ make it point to the actual index, and scaledIndex will point to
+ the scaled value */
+
+ scaledIndex = NULL;
+ scaledIndexVal = NULL;
+
+ if (operIsArrIndex && rv2 != NULL && (rv2->gtOper == GT_MUL || rv2->gtOper == GT_LSH) &&
+ rv2->gtOp.gtOp2->IsIntCnsFitsInI32())
+ {
+ scaledIndex = rv2;
+ compiler->optGetArrayRefScaleAndIndex(scaledIndex, &scaledIndexVal DEBUGARG(true));
+
+ noway_assert(scaledIndex->gtOp.gtOp2->IsIntCnsFitsInI32());
+ }
+
+ /* Has the address already been computed? */
+
+ if (addr->gtFlags & GTF_REG_VAL)
+ {
+ if (forLea)
+ return true;
+
+ rv1 = addr;
+ rv2 = NULL;
+ scaledIndex = NULL;
+ genUpdateLife(addr);
+ goto YES;
+ }
+
+ /*
+ Here we have the following operands:
+
+ rv1 ..... base address
+ rv2 ..... offset value (or NULL)
+ mul ..... multiplier for rv2 (or 0)
+ cns ..... additional constant (or 0)
+
+ The first operand must be present (and be an address) unless we're
+ computing an expression via 'LEA'. The scaled operand is optional,
+ but must not be a pointer if present.
+ */
+
+ noway_assert(rv2 == NULL || !varTypeIsGC(rv2->TypeGet()));
+
+ /*-------------------------------------------------------------------------
+ *
+ * Make sure both rv1 and rv2 (if present) are in registers
+ *
+ */
+
+ // Trivial case : Is either rv1 or rv2 a NULL ?
+
+ if (!rv2)
+ {
+ /* A single operand, make sure it's in a register */
+
+ if (cns != 0)
+ {
+ // In the case where "rv1" is already in a register, there's no reason to get into a
+ // register in "regMask" yet, if there's a non-zero constant that we're going to add;
+ // if there is, we can do an LEA.
+ genCodeForTree(rv1, RBM_NONE);
+ }
+ else
+ {
+ genCodeForTree(rv1, regMask);
+ }
+ goto DONE_REGS;
+ }
+ else if (!rv1)
+ {
+ /* A single (scaled) operand, make sure it's in a register */
+
+ genCodeForTree(rv2, 0);
+ goto DONE_REGS;
+ }
+
+ /* At this point, both rv1 and rv2 are non-NULL and we have to make sure
+ they are in registers */
+
+ noway_assert(rv1 && rv2);
+
+ /* If we have to check a constant array index, compare it against
+ the array dimension (see below) but then fold the index with a
+ scaling factor (if any) and additional offset (if any).
+ */
+
+ if (rv2->gtOper == GT_CNS_INT || (scaledIndex != NULL && scaledIndexVal->gtOper == GT_CNS_INT))
+ {
+ if (scaledIndex != NULL)
+ {
+ assert(rv2 == scaledIndex && scaledIndexVal != NULL);
+ rv2 = scaledIndexVal;
+ }
+ /* We must have a range-checked index operation */
+
+ noway_assert(operIsArrIndex);
+
+ /* Get hold of the index value and see if it's a constant */
+
+ if (rv2->IsIntCnsFitsInI32())
+ {
+ ixv = (int)rv2->gtIntCon.gtIconVal;
+ // Maybe I should just set "fold" true in the call to genMakeAddressable above.
+ if (scaledIndex != NULL)
+ {
+ int scale = 1 << ((int)scaledIndex->gtOp.gtOp2->gtIntCon.gtIconVal); // If this truncates, that's OK --
+ // multiple of 2^6.
+ if (mul == 0)
+ {
+ mul = scale;
+ }
+ else
+ {
+ mul *= scale;
+ }
+ }
+ rv2 = scaledIndex = NULL;
+
+ /* Add the scaled index into the added value */
+
+ if (mul)
+ cns += ixv * mul;
+ else
+ cns += ixv;
+
+ /* Make sure 'rv1' is in a register */
+
+ genCodeForTree(rv1, regMask);
+
+ goto DONE_REGS;
+ }
+ }
+
+ if (rv1->gtFlags & GTF_REG_VAL)
+ {
+ /* op1 already in register - how about op2? */
+
+ if (rv2->gtFlags & GTF_REG_VAL)
+ {
+ /* Great - both operands are in registers already. Just update
+ the liveness and we are done. */
+
+ if (rev)
+ {
+ genUpdateLife(rv2);
+ genUpdateLife(rv1);
+ }
+ else
+ {
+ genUpdateLife(rv1);
+ genUpdateLife(rv2);
+ }
+
+ goto DONE_REGS;
+ }
+
+ /* rv1 is in a register, but rv2 isn't */
+
+ if (!rev)
+ {
+ /* rv1 is already materialized in a register. Just update liveness
+ to rv1 and generate code for rv2 */
+
+ genUpdateLife(rv1);
+ regSet.rsMarkRegUsed(rv1, oper);
+ }
+
+ goto GEN_RV2;
+ }
+ else if (rv2->gtFlags & GTF_REG_VAL)
+ {
+ /* rv2 is in a register, but rv1 isn't */
+
+ noway_assert(rv2->gtOper == GT_REG_VAR);
+
+ if (rev)
+ {
+ /* rv2 is already materialized in a register. Update liveness
+ to after rv2 and then hang on to rv2 */
+
+ genUpdateLife(rv2);
+ regSet.rsMarkRegUsed(rv2, oper);
+ }
+
+ /* Generate the for the first operand */
+
+ genCodeForTree(rv1, regMask);
+
+ if (rev)
+ {
+ // Free up rv2 in the right fashion (it might be re-marked if keepReg)
+ regSet.rsMarkRegUsed(rv1, oper);
+ regSet.rsLockUsedReg(genRegMask(rv1->gtRegNum));
+ genReleaseReg(rv2);
+ regSet.rsUnlockUsedReg(genRegMask(rv1->gtRegNum));
+ genReleaseReg(rv1);
+ }
+ else
+ {
+ /* We have evaluated rv1, and now we just need to update liveness
+ to rv2 which was already in a register */
+
+ genUpdateLife(rv2);
+ }
+
+ goto DONE_REGS;
+ }
+
+ if (forLea && !cns)
+ return false;
+
+ /* Make sure we preserve the correct operand order */
+
+ if (rev)
+ {
+ /* Generate the second operand first */
+
+ // Determine what registers go live between rv2 and rv1
+ newLiveMask = genNewLiveRegMask(rv2, rv1);
+
+ rv2Mask = regMask & ~newLiveMask;
+ rv2Mask &= ~rv1->gtRsvdRegs;
+
+ if (rv2Mask == RBM_NONE)
+ {
+ // The regMask hint cannot be honored
+ // We probably have a call that trashes the register(s) in regMask
+ // so ignore the regMask hint, but try to avoid using
+ // the registers in newLiveMask and the rv1->gtRsvdRegs
+ //
+ rv2Mask = RBM_ALLINT & ~newLiveMask;
+ rv2Mask = regSet.rsMustExclude(rv2Mask, rv1->gtRsvdRegs);
+ }
+
+ genCodeForTree(rv2, rv2Mask);
+ regMask &= ~genRegMask(rv2->gtRegNum);
+
+ regSet.rsMarkRegUsed(rv2, oper);
+
+ /* Generate the first operand second */
+
+ genCodeForTree(rv1, regMask);
+ regSet.rsMarkRegUsed(rv1, oper);
+
+ /* Free up both operands in the right order (they might be
+ re-marked as used below)
+ */
+ regSet.rsLockUsedReg(genRegMask(rv1->gtRegNum));
+ genReleaseReg(rv2);
+ regSet.rsUnlockUsedReg(genRegMask(rv1->gtRegNum));
+ genReleaseReg(rv1);
+ }
+ else
+ {
+ /* Get the first operand into a register */
+
+ // Determine what registers go live between rv1 and rv2
+ newLiveMask = genNewLiveRegMask(rv1, rv2);
+
+ rv1Mask = regMask & ~newLiveMask;
+ rv1Mask &= ~rv2->gtRsvdRegs;
+
+ if (rv1Mask == RBM_NONE)
+ {
+ // The regMask hint cannot be honored
+ // We probably have a call that trashes the register(s) in regMask
+ // so ignore the regMask hint, but try to avoid using
+ // the registers in liveMask and the rv2->gtRsvdRegs
+ //
+ rv1Mask = RBM_ALLINT & ~newLiveMask;
+ rv1Mask = regSet.rsMustExclude(rv1Mask, rv2->gtRsvdRegs);
+ }
+
+ genCodeForTree(rv1, rv1Mask);
+ regSet.rsMarkRegUsed(rv1, oper);
+
+ GEN_RV2:
+
+ /* Here, we need to get rv2 in a register. We have either already
+ materialized rv1 into a register, or it was already in a one */
+
+ noway_assert(rv1->gtFlags & GTF_REG_VAL);
+ noway_assert(rev || regSet.rsIsTreeInReg(rv1->gtRegNum, rv1));
+
+ /* Generate the second operand as well */
+
+ regMask &= ~genRegMask(rv1->gtRegNum);
+ genCodeForTree(rv2, regMask);
+
+ if (rev)
+ {
+ /* rev==true means the evaluation order is rv2,rv1. We just
+ evaluated rv2, and rv1 was already in a register. Just
+ update liveness to rv1 and we are done. */
+
+ genUpdateLife(rv1);
+ }
+ else
+ {
+ /* We have evaluated rv1 and rv2. Free up both operands in
+ the right order (they might be re-marked as used below) */
+
+ /* Even though we have not explicitly marked rv2 as used,
+ rv2->gtRegNum may be used if rv2 is a multi-use or
+ an enregistered variable. */
+ regMaskTP rv2Used;
+ regSet.rsLockReg(genRegMask(rv2->gtRegNum), &rv2Used);
+
+ /* Check for special case both rv1 and rv2 are the same register */
+ if (rv2Used != genRegMask(rv1->gtRegNum))
+ {
+ genReleaseReg(rv1);
+ regSet.rsUnlockReg(genRegMask(rv2->gtRegNum), rv2Used);
+ }
+ else
+ {
+ regSet.rsUnlockReg(genRegMask(rv2->gtRegNum), rv2Used);
+ genReleaseReg(rv1);
+ }
+ }
+ }
+
+/*-------------------------------------------------------------------------
+ *
+ * At this point, both rv1 and rv2 (if present) are in registers
+ *
+ */
+
+DONE_REGS:
+
+ /* We must verify that 'rv1' and 'rv2' are both sitting in registers */
+
+ if (rv1 && !(rv1->gtFlags & GTF_REG_VAL))
+ return false;
+ if (rv2 && !(rv2->gtFlags & GTF_REG_VAL))
+ return false;
+
+YES:
+
+ // *(intVar1+intVar1) causes problems as we
+ // call regSet.rsMarkRegUsed(op1) and regSet.rsMarkRegUsed(op2). So the calling function
+ // needs to know that it has to call rsFreeReg(reg1) twice. We can't do
+ // that currently as we return a single mask in useMaskPtr.
+
+ if ((keepReg == RegSet::KEEP_REG) && oper && rv1 && rv2 && (rv1->gtFlags & rv2->gtFlags & GTF_REG_VAL))
+ {
+ if (rv1->gtRegNum == rv2->gtRegNum)
+ {
+ noway_assert(!operIsArrIndex);
+ return false;
+ }
+ }
+
+ /* Check either register operand to see if it needs to be saved */
+
+ if (rv1)
+ {
+ noway_assert(rv1->gtFlags & GTF_REG_VAL);
+
+ if (keepReg == RegSet::KEEP_REG)
+ {
+ regSet.rsMarkRegUsed(rv1, oper);
+ }
+ else
+ {
+ /* If the register holds an address, mark it */
+
+ gcInfo.gcMarkRegPtrVal(rv1->gtRegNum, rv1->TypeGet());
+ }
+ }
+
+ if (rv2)
+ {
+ noway_assert(rv2->gtFlags & GTF_REG_VAL);
+
+ if (keepReg == RegSet::KEEP_REG)
+ regSet.rsMarkRegUsed(rv2, oper);
+ }
+
+ if (deferOK)
+ {
+ noway_assert(!scaledIndex);
+ return true;
+ }
+
+ /* Compute the set of registers the address depends on */
+
+ regMaskTP useMask = RBM_NONE;
+
+ if (rv1)
+ {
+ if (rv1->gtFlags & GTF_SPILLED)
+ regSet.rsUnspillReg(rv1, 0, RegSet::KEEP_REG);
+
+ noway_assert(rv1->gtFlags & GTF_REG_VAL);
+ useMask |= genRegMask(rv1->gtRegNum);
+ }
+
+ if (rv2)
+ {
+ if (rv2->gtFlags & GTF_SPILLED)
+ {
+ if (rv1)
+ {
+ regMaskTP lregMask = genRegMask(rv1->gtRegNum);
+ regMaskTP used;
+
+ regSet.rsLockReg(lregMask, &used);
+ regSet.rsUnspillReg(rv2, 0, RegSet::KEEP_REG);
+ regSet.rsUnlockReg(lregMask, used);
+ }
+ else
+ regSet.rsUnspillReg(rv2, 0, RegSet::KEEP_REG);
+ }
+ noway_assert(rv2->gtFlags & GTF_REG_VAL);
+ useMask |= genRegMask(rv2->gtRegNum);
+ }
+
+ /* Tell the caller which registers we need to hang on to */
+
+ *useMaskPtr = useMask;
+
+ return true;
+}
+
+/*****************************************************************************
+ *
+ * 'oper' is an array bounds check (a GT_ARR_BOUNDS_CHECK node).
+ */
+
+void CodeGen::genRangeCheck(GenTreePtr oper)
+{
+ noway_assert(oper->OperGet() == GT_ARR_BOUNDS_CHECK);
+ GenTreeBoundsChk* bndsChk = oper->AsBoundsChk();
+
+ GenTreePtr arrLen = bndsChk->gtArrLen;
+ GenTreePtr arrRef = NULL;
+ int lenOffset = 0;
+
+ // If "arrLen" is a ARR_LENGTH operation, get the array whose length that takes in a register.
+ // Otherwise, if the length is not a constant, get it (the length, not the arr reference) in
+ // a register.
+
+ if (arrLen->OperGet() == GT_ARR_LENGTH)
+ {
+ GenTreeArrLen* arrLenExact = arrLen->AsArrLen();
+ lenOffset = arrLenExact->ArrLenOffset();
+
+#if !CPU_LOAD_STORE_ARCH && !defined(_TARGET_64BIT_)
+ // We always load the length into a register on ARM and x64.
+
+ // 64-bit has to act like LOAD_STORE_ARCH because the array only holds 32-bit
+ // lengths, but the index expression *can* be native int (64-bits)
+ arrRef = arrLenExact->ArrRef();
+ genCodeForTree(arrRef, RBM_ALLINT);
+ noway_assert(arrRef->gtFlags & GTF_REG_VAL);
+ regSet.rsMarkRegUsed(arrRef);
+ noway_assert(regSet.rsMaskUsed & genRegMask(arrRef->gtRegNum));
+#endif
+ }
+#if !CPU_LOAD_STORE_ARCH && !defined(_TARGET_64BIT_)
+ // This is another form in which we have an array reference and a constant length. Don't use
+ // on LOAD_STORE or 64BIT.
+ else if (arrLen->OperGet() == GT_IND && arrLen->gtOp.gtOp1->IsAddWithI32Const(&arrRef, &lenOffset))
+ {
+ genCodeForTree(arrRef, RBM_ALLINT);
+ noway_assert(arrRef->gtFlags & GTF_REG_VAL);
+ regSet.rsMarkRegUsed(arrRef);
+ noway_assert(regSet.rsMaskUsed & genRegMask(arrRef->gtRegNum));
+ }
+#endif
+
+ // If we didn't find one of the special forms above, generate code to evaluate the array length to a register.
+ if (arrRef == NULL)
+ {
+ // (Unless it's a constant.)
+ if (!arrLen->IsCnsIntOrI())
+ {
+ genCodeForTree(arrLen, RBM_ALLINT);
+ regSet.rsMarkRegUsed(arrLen);
+
+ noway_assert(arrLen->gtFlags & GTF_REG_VAL);
+ noway_assert(regSet.rsMaskUsed & genRegMask(arrLen->gtRegNum));
+ }
+ }
+
+ /* Is the array index a constant value? */
+ GenTreePtr index = bndsChk->gtIndex;
+ if (!index->IsCnsIntOrI())
+ {
+ // No, it's not a constant.
+ genCodeForTree(index, RBM_ALLINT);
+ regSet.rsMarkRegUsed(index);
+
+ // If we need "arrRef" or "arrLen", and evaluating "index" displaced whichever of them we're using
+ // from its register, get it back in a register.
+ if (arrRef != NULL)
+ genRecoverReg(arrRef, ~genRegMask(index->gtRegNum), RegSet::KEEP_REG);
+ else if (!arrLen->IsCnsIntOrI())
+ genRecoverReg(arrLen, ~genRegMask(index->gtRegNum), RegSet::KEEP_REG);
+
+ /* Make sure we have the values we expect */
+ noway_assert(index->gtFlags & GTF_REG_VAL);
+ noway_assert(regSet.rsMaskUsed & genRegMask(index->gtRegNum));
+
+ noway_assert(index->TypeGet() == TYP_I_IMPL ||
+ (varTypeIsIntegral(index->TypeGet()) && !varTypeIsLong(index->TypeGet())));
+ var_types indxType = index->TypeGet();
+ if (indxType != TYP_I_IMPL)
+ indxType = TYP_INT;
+
+ if (arrRef != NULL)
+ { // _TARGET_X86_ or X64 when we have a TYP_INT (32-bit) index expression in the index register
+
+ /* Generate "cmp index, [arrRef+LenOffs]" */
+ inst_RV_AT(INS_cmp, emitTypeSize(indxType), indxType, index->gtRegNum, arrRef, lenOffset);
+ }
+ else if (arrLen->IsCnsIntOrI())
+ {
+ ssize_t len = arrLen->AsIntConCommon()->IconValue();
+ inst_RV_IV(INS_cmp, index->gtRegNum, len, EA_4BYTE);
+ }
+ else
+ {
+ inst_RV_RV(INS_cmp, index->gtRegNum, arrLen->gtRegNum, indxType, emitTypeSize(indxType));
+ }
+
+ /* Generate "jae <fail_label>" */
+
+ noway_assert(oper->gtOper == GT_ARR_BOUNDS_CHECK);
+ emitJumpKind jmpGEU = genJumpKindForOper(GT_GE, CK_UNSIGNED);
+ genJumpToThrowHlpBlk(jmpGEU, SCK_RNGCHK_FAIL, bndsChk->gtIndRngFailBB);
+ }
+ else
+ {
+ /* Generate "cmp [rv1+LenOffs], cns" */
+
+ bool indIsInt = true;
+#ifdef _TARGET_64BIT_
+ int ixv = 0;
+ ssize_t ixvFull = index->AsIntConCommon()->IconValue();
+ if (ixvFull > INT32_MAX)
+ {
+ indIsInt = false;
+ }
+ else
+ {
+ ixv = (int)ixvFull;
+ }
+#else
+ ssize_t ixvFull = index->AsIntConCommon()->IconValue();
+ int ixv = (int)ixvFull;
+#endif
+ if (arrRef != NULL && indIsInt)
+ { // _TARGET_X86_ or X64 when we have a TYP_INT (32-bit) index expression in the index register
+ /* Generate "cmp [arrRef+LenOffs], ixv" */
+ inst_AT_IV(INS_cmp, EA_4BYTE, arrRef, ixv, lenOffset);
+ // Generate "jbe <fail_label>"
+ emitJumpKind jmpLEU = genJumpKindForOper(GT_LE, CK_UNSIGNED);
+ genJumpToThrowHlpBlk(jmpLEU, SCK_RNGCHK_FAIL, bndsChk->gtIndRngFailBB);
+ }
+ else if (arrLen->IsCnsIntOrI())
+ {
+ ssize_t lenv = arrLen->AsIntConCommon()->IconValue();
+ // Both are constants; decide at compile time.
+ if (!(0 <= ixvFull && ixvFull < lenv))
+ {
+ genJumpToThrowHlpBlk(EJ_jmp, SCK_RNGCHK_FAIL, bndsChk->gtIndRngFailBB);
+ }
+ }
+ else if (!indIsInt)
+ {
+ genJumpToThrowHlpBlk(EJ_jmp, SCK_RNGCHK_FAIL, bndsChk->gtIndRngFailBB);
+ }
+ else
+ {
+ /* Generate "cmp arrLen, ixv" */
+ inst_RV_IV(INS_cmp, arrLen->gtRegNum, ixv, EA_4BYTE);
+ // Generate "jbe <fail_label>"
+ emitJumpKind jmpLEU = genJumpKindForOper(GT_LE, CK_UNSIGNED);
+ genJumpToThrowHlpBlk(jmpLEU, SCK_RNGCHK_FAIL, bndsChk->gtIndRngFailBB);
+ }
+ }
+
+ // Free the registers that were used.
+ if (arrRef != NULL)
+ {
+ regSet.rsMarkRegFree(arrRef->gtRegNum, arrRef);
+ }
+ else if (!arrLen->IsCnsIntOrI())
+ {
+ regSet.rsMarkRegFree(arrLen->gtRegNum, arrLen);
+ }
+
+ if (!index->IsCnsIntOrI())
+ {
+ regSet.rsMarkRegFree(index->gtRegNum, index);
+ }
+}
+
+/*****************************************************************************
+ *
+ * If compiling without REDUNDANT_LOAD, same as genMakeAddressable().
+ * Otherwise, check if rvalue is in register. If so, mark it. Then
+ * call genMakeAddressable(). Needed because genMakeAddressable is used
+ * for both lvalue and rvalue, and we only can do this for rvalue.
+ */
+
+// inline
+regMaskTP CodeGen::genMakeRvalueAddressable(
+ GenTreePtr tree, regMaskTP needReg, RegSet::KeepReg keepReg, bool forLoadStore, bool smallOK)
+{
+ regNumber reg;
+
+#if REDUNDANT_LOAD
+
+ if (tree->gtOper == GT_LCL_VAR)
+ {
+ reg = findStkLclInReg(tree->gtLclVarCommon.gtLclNum);
+
+ if (reg != REG_NA && (needReg == 0 || (genRegMask(reg) & needReg) != 0))
+ {
+ noway_assert(!isRegPairType(tree->gtType));
+
+ genMarkTreeInReg(tree, reg);
+ }
+ }
+
+#endif
+
+ return genMakeAddressable2(tree, needReg, keepReg, forLoadStore, smallOK);
+}
+
+/*****************************************************************************/
+
+bool CodeGen::genIsLocalLastUse(GenTreePtr tree)
+{
+ const LclVarDsc* varDsc = &compiler->lvaTable[tree->gtLclVarCommon.gtLclNum];
+
+ noway_assert(tree->OperGet() == GT_LCL_VAR);
+ noway_assert(varDsc->lvTracked);
+
+ return ((tree->gtFlags & GTF_VAR_DEATH) != 0);
+}
+
+/*****************************************************************************
+ *
+ * This is genMakeAddressable(GT_ARR_ELEM).
+ * Makes the array-element addressible and returns the addressibility registers.
+ * It also marks them as used if keepReg==RegSet::KEEP_REG.
+ * tree is the dependant tree.
+ *
+ * Note that an array-element needs 2 registers to be addressibile, the
+ * array-object and the offset. This function marks gtArrObj and gtArrInds[0]
+ * with the 2 registers so that other functions (like instGetAddrMode()) know
+ * where to look for the offset to use.
+ */
+
+regMaskTP CodeGen::genMakeAddrArrElem(GenTreePtr arrElem, GenTreePtr tree, regMaskTP needReg, RegSet::KeepReg keepReg)
+{
+ noway_assert(arrElem->gtOper == GT_ARR_ELEM);
+ noway_assert(!tree || tree->gtOper == GT_IND || tree == arrElem);
+
+ /* Evaluate all the operands. We don't evaluate them into registers yet
+ as GT_ARR_ELEM does not reorder the evaluation of the operands, and
+ hence may use a sub-optimal ordering. We try to improve this
+ situation somewhat by accessing the operands in stages
+ (genMakeAddressable2 + genComputeAddressable and
+ genCompIntoFreeReg + genRecoverReg).
+
+ Note: we compute operands into free regs to avoid multiple uses of
+ the same register. Multi-use would cause problems when we free
+ registers in FIFO order instead of the assumed LIFO order that
+ applies to all type of tree nodes except for GT_ARR_ELEM.
+ */
+
+ GenTreePtr arrObj = arrElem->gtArrElem.gtArrObj;
+ unsigned rank = arrElem->gtArrElem.gtArrRank;
+ var_types elemType = arrElem->gtArrElem.gtArrElemType;
+ regMaskTP addrReg = RBM_NONE;
+ regMaskTP regNeed = RBM_ALLINT;
+
+#if FEATURE_WRITE_BARRIER && !NOGC_WRITE_BARRIERS
+ // In CodeGen::WriteBarrier we set up ARG_1 followed by ARG_0
+ // since the arrObj participates in the lea/add instruction
+ // that computes ARG_0 we should avoid putting it in ARG_1
+ //
+ if (varTypeIsGC(elemType))
+ {
+ regNeed &= ~RBM_ARG_1;
+ }
+#endif
+
+ // Strip off any comma expression.
+ arrObj = genCodeForCommaTree(arrObj);
+
+ // Having generated the code for the comma, we don't care about it anymore.
+ arrElem->gtArrElem.gtArrObj = arrObj;
+
+ // If the array ref is a stack var that's dying here we have to move it
+ // into a register (regalloc already counts of this), as if it's a GC pointer
+ // it can be collected from here on. This is not an issue for locals that are
+ // in a register, as they get marked as used an will be tracked.
+ // The bug that caused this is #100776. (untracked vars?)
+ if (arrObj->OperGet() == GT_LCL_VAR && compiler->optIsTrackedLocal(arrObj) && genIsLocalLastUse(arrObj) &&
+ !genMarkLclVar(arrObj))
+ {
+ genCodeForTree(arrObj, regNeed);
+ regSet.rsMarkRegUsed(arrObj, 0);
+ addrReg = genRegMask(arrObj->gtRegNum);
+ }
+ else
+ {
+ addrReg = genMakeAddressable2(arrObj, regNeed, RegSet::KEEP_REG,
+ true, // forLoadStore
+ false, // smallOK
+ false, // deferOK
+ true); // evalSideEffs
+ }
+
+ unsigned dim;
+ for (dim = 0; dim < rank; dim++)
+ genCompIntoFreeReg(arrElem->gtArrElem.gtArrInds[dim], RBM_NONE, RegSet::KEEP_REG);
+
+ /* Ensure that the array-object is in a register */
+
+ addrReg = genKeepAddressable(arrObj, addrReg);
+ genComputeAddressable(arrObj, addrReg, RegSet::KEEP_REG, regNeed, RegSet::KEEP_REG);
+
+ regNumber arrReg = arrObj->gtRegNum;
+ regMaskTP arrRegMask = genRegMask(arrReg);
+ regMaskTP indRegMask = RBM_ALLINT & ~arrRegMask;
+ regSet.rsLockUsedReg(arrRegMask);
+
+ /* Now process all the indices, do the range check, and compute
+ the offset of the element */
+
+ regNumber accReg = DUMMY_INIT(REG_CORRUPT); // accumulates the offset calculation
+
+ for (dim = 0; dim < rank; dim++)
+ {
+ GenTreePtr index = arrElem->gtArrElem.gtArrInds[dim];
+
+ /* Get the index into a free register (other than the register holding the array) */
+
+ genRecoverReg(index, indRegMask, RegSet::KEEP_REG);
+
+#if CPU_LOAD_STORE_ARCH
+ /* Subtract the lower bound, and do the range check */
+
+ regNumber valueReg = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(arrReg) & ~genRegMask(index->gtRegNum));
+ getEmitter()->emitIns_R_AR(INS_ldr, EA_4BYTE, valueReg, arrReg,
+ compiler->eeGetArrayDataOffset(elemType) + sizeof(int) * (dim + rank));
+ regTracker.rsTrackRegTrash(valueReg);
+ getEmitter()->emitIns_R_R(INS_sub, EA_4BYTE, index->gtRegNum, valueReg);
+ regTracker.rsTrackRegTrash(index->gtRegNum);
+
+ getEmitter()->emitIns_R_AR(INS_ldr, EA_4BYTE, valueReg, arrReg,
+ compiler->eeGetArrayDataOffset(elemType) + sizeof(int) * dim);
+ getEmitter()->emitIns_R_R(INS_cmp, EA_4BYTE, index->gtRegNum, valueReg);
+#else
+ /* Subtract the lower bound, and do the range check */
+ getEmitter()->emitIns_R_AR(INS_sub, EA_4BYTE, index->gtRegNum, arrReg,
+ compiler->eeGetArrayDataOffset(elemType) + sizeof(int) * (dim + rank));
+ regTracker.rsTrackRegTrash(index->gtRegNum);
+
+ getEmitter()->emitIns_R_AR(INS_cmp, EA_4BYTE, index->gtRegNum, arrReg,
+ compiler->eeGetArrayDataOffset(elemType) + sizeof(int) * dim);
+#endif
+ emitJumpKind jmpGEU = genJumpKindForOper(GT_GE, CK_UNSIGNED);
+ genJumpToThrowHlpBlk(jmpGEU, SCK_RNGCHK_FAIL);
+
+ if (dim == 0)
+ {
+ /* Hang on to the register of the first index */
+
+ noway_assert(accReg == DUMMY_INIT(REG_CORRUPT));
+ accReg = index->gtRegNum;
+ noway_assert(accReg != arrReg);
+ regSet.rsLockUsedReg(genRegMask(accReg));
+ }
+ else
+ {
+ /* Evaluate accReg = accReg*dim_size + index */
+
+ noway_assert(accReg != DUMMY_INIT(REG_CORRUPT));
+#if CPU_LOAD_STORE_ARCH
+ getEmitter()->emitIns_R_AR(INS_ldr, EA_4BYTE, valueReg, arrReg,
+ compiler->eeGetArrayDataOffset(elemType) + sizeof(int) * dim);
+ regTracker.rsTrackRegTrash(valueReg);
+ getEmitter()->emitIns_R_R(INS_MUL, EA_4BYTE, accReg, valueReg);
+#else
+ getEmitter()->emitIns_R_AR(INS_MUL, EA_4BYTE, accReg, arrReg,
+ compiler->eeGetArrayDataOffset(elemType) + sizeof(int) * dim);
+#endif
+
+ inst_RV_RV(INS_add, accReg, index->gtRegNum);
+ regSet.rsMarkRegFree(index->gtRegNum, index);
+ regTracker.rsTrackRegTrash(accReg);
+ }
+ }
+
+ if (!jitIsScaleIndexMul(arrElem->gtArrElem.gtArrElemSize))
+ {
+ regNumber sizeReg = genGetRegSetToIcon(arrElem->gtArrElem.gtArrElemSize);
+
+ getEmitter()->emitIns_R_R(INS_MUL, EA_4BYTE, accReg, sizeReg);
+ regTracker.rsTrackRegTrash(accReg);
+ }
+
+ regSet.rsUnlockUsedReg(genRegMask(arrReg));
+ regSet.rsUnlockUsedReg(genRegMask(accReg));
+
+ regSet.rsMarkRegFree(genRegMask(arrReg));
+ regSet.rsMarkRegFree(genRegMask(accReg));
+
+ if (keepReg == RegSet::KEEP_REG)
+ {
+ /* We mark the addressability registers on arrObj and gtArrInds[0].
+ instGetAddrMode() knows to work with this. */
+
+ regSet.rsMarkRegUsed(arrObj, tree);
+ regSet.rsMarkRegUsed(arrElem->gtArrElem.gtArrInds[0], tree);
+ }
+
+ return genRegMask(arrReg) | genRegMask(accReg);
+}
+
+/*****************************************************************************
+ *
+ * Make sure the given tree is addressable. 'needReg' is a mask that indicates
+ * the set of registers we would prefer the destination tree to be computed
+ * into (RBM_NONE means no preference).
+ *
+ * 'tree' can subsequently be used with the inst_XX_TT() family of functions.
+ *
+ * If 'keepReg' is RegSet::KEEP_REG, we mark any registers the addressability depends
+ * on as used, and return the mask for that register set (if no registers
+ * are marked as used, RBM_NONE is returned).
+ *
+ * If 'smallOK' is not true and the datatype being address is a byte or short,
+ * then the tree is forced into a register. This is useful when the machine
+ * instruction being emitted does not have a byte or short version.
+ *
+ * The "deferOK" parameter indicates the mode of operation - when it's false,
+ * upon returning an actual address mode must have been formed (i.e. it must
+ * be possible to immediately call one of the inst_TT methods to operate on
+ * the value). When "deferOK" is true, we do whatever it takes to be ready
+ * to form the address mode later - for example, if an index address mode on
+ * a particular CPU requires the use of a specific register, we usually don't
+ * want to immediately grab that register for an address mode that will only
+ * be needed later. The convention is to call genMakeAddressable() with
+ * "deferOK" equal to true, do whatever work is needed to prepare the other
+ * operand, call genMakeAddressable() with "deferOK" equal to false, and
+ * finally call one of the inst_TT methods right after that.
+ *
+ * If we do any other codegen after genMakeAddressable(tree) which can
+ * potentially spill the addressability registers, genKeepAddressable()
+ * needs to be called before accessing the tree again.
+ *
+ * genDoneAddressable() needs to be called when we are done with the tree
+ * to free the addressability registers.
+ */
+
+regMaskTP CodeGen::genMakeAddressable(
+ GenTreePtr tree, regMaskTP needReg, RegSet::KeepReg keepReg, bool smallOK, bool deferOK)
+{
+ GenTreePtr addr = NULL;
+ regMaskTP regMask;
+
+ /* Is the value simply sitting in a register? */
+
+ if (tree->gtFlags & GTF_REG_VAL)
+ {
+ genUpdateLife(tree);
+
+ goto GOT_VAL;
+ }
+
+ // TODO: If the value is for example a cast of float -> int, compute
+ // TODO: the converted value into a stack temp, and leave it there,
+ // TODO: since stack temps are always addressable. This would require
+ // TODO: recording the fact that a particular tree is in a stack temp.
+
+ /* byte/char/short operand -- is this acceptable to the caller? */
+
+ if (varTypeIsSmall(tree->TypeGet()) && !smallOK)
+ goto EVAL_TREE;
+
+ // Evaluate non-last elements of comma expressions, to get to the last.
+ tree = genCodeForCommaTree(tree);
+
+ switch (tree->gtOper)
+ {
+ case GT_LCL_FLD:
+
+ // We only use GT_LCL_FLD for lvDoNotEnregister vars, so we don't have
+ // to worry about it being enregistered.
+ noway_assert(compiler->lvaTable[tree->gtLclFld.gtLclNum].lvRegister == 0);
+
+ genUpdateLife(tree);
+ return 0;
+
+ case GT_LCL_VAR:
+
+ if (!genMarkLclVar(tree))
+ {
+ genUpdateLife(tree);
+ return 0;
+ }
+
+ __fallthrough; // it turns out the variable lives in a register
+
+ case GT_REG_VAR:
+
+ genUpdateLife(tree);
+
+ goto GOT_VAL;
+
+ case GT_CLS_VAR:
+
+ return 0;
+
+ case GT_CNS_INT:
+#ifdef _TARGET_64BIT_
+ // Non-relocs will be sign extended, so we don't have to enregister
+ // constants that are equivalent to a sign-extended int.
+ // Relocs can be left alone if they are RIP-relative.
+ if ((genTypeSize(tree->TypeGet()) > 4) &&
+ (!tree->IsIntCnsFitsInI32() ||
+ (tree->IsIconHandle() &&
+ (IMAGE_REL_BASED_REL32 != compiler->eeGetRelocTypeHint((void*)tree->gtIntCon.gtIconVal)))))
+ {
+ break;
+ }
+#endif // _TARGET_64BIT_
+ __fallthrough;
+
+ case GT_CNS_LNG:
+ case GT_CNS_DBL:
+ // For MinOpts, we don't do constant folding, so we have
+ // constants showing up in places we don't like.
+ // force them into a register now to prevent that.
+ if (compiler->opts.OptEnabled(CLFLG_CONSTANTFOLD))
+ return 0;
+ break;
+
+ case GT_IND:
+ case GT_NULLCHECK:
+
+ /* Try to make the address directly addressable */
+
+ if (genMakeIndAddrMode(tree->gtOp.gtOp1, tree, false, /* not for LEA */
+ needReg, keepReg, &regMask, deferOK))
+ {
+ genUpdateLife(tree);
+ return regMask;
+ }
+
+ /* No good, we'll have to load the address into a register */
+
+ addr = tree;
+ tree = tree->gtOp.gtOp1;
+ break;
+
+ default:
+ break;
+ }
+
+EVAL_TREE:
+
+ /* Here we need to compute the value 'tree' into a register */
+
+ genCodeForTree(tree, needReg);
+
+GOT_VAL:
+
+ noway_assert(tree->gtFlags & GTF_REG_VAL);
+
+ if (isRegPairType(tree->gtType))
+ {
+ /* Are we supposed to hang on to the register? */
+
+ if (keepReg == RegSet::KEEP_REG)
+ regSet.rsMarkRegPairUsed(tree);
+
+ regMask = genRegPairMask(tree->gtRegPair);
+ }
+ else
+ {
+ /* Are we supposed to hang on to the register? */
+
+ if (keepReg == RegSet::KEEP_REG)
+ regSet.rsMarkRegUsed(tree, addr);
+
+ regMask = genRegMask(tree->gtRegNum);
+ }
+
+ return regMask;
+}
+
+/*****************************************************************************
+ * Compute a tree (which was previously made addressable using
+ * genMakeAddressable()) into a register.
+ * needReg - mask of preferred registers.
+ * keepReg - should the computed register be marked as used by the tree
+ * freeOnly - target register needs to be a scratch register
+ */
+
+void CodeGen::genComputeAddressable(GenTreePtr tree,
+ regMaskTP addrReg,
+ RegSet::KeepReg keptReg,
+ regMaskTP needReg,
+ RegSet::KeepReg keepReg,
+ bool freeOnly)
+{
+ noway_assert(genStillAddressable(tree));
+ noway_assert(varTypeIsIntegralOrI(tree->TypeGet()));
+
+ genDoneAddressable(tree, addrReg, keptReg);
+
+ regNumber reg;
+
+ if (tree->gtFlags & GTF_REG_VAL)
+ {
+ reg = tree->gtRegNum;
+
+ if (freeOnly && !(genRegMask(reg) & regSet.rsRegMaskFree()))
+ goto MOVE_REG;
+ }
+ else
+ {
+ if (tree->OperIsConst())
+ {
+ /* Need to handle consts separately as we don't want to emit
+ "mov reg, 0" (emitter doesn't like that). Also, genSetRegToIcon()
+ handles consts better for SMALL_CODE */
+
+ noway_assert(tree->IsCnsIntOrI());
+ reg = genGetRegSetToIcon(tree->gtIntCon.gtIconVal, needReg, tree->gtType);
+ }
+ else
+ {
+ MOVE_REG:
+ reg = regSet.rsPickReg(needReg);
+
+ inst_RV_TT(INS_mov, reg, tree);
+ regTracker.rsTrackRegTrash(reg);
+ }
+ }
+
+ genMarkTreeInReg(tree, reg);
+
+ if (keepReg == RegSet::KEEP_REG)
+ regSet.rsMarkRegUsed(tree);
+ else
+ gcInfo.gcMarkRegPtrVal(tree);
+}
+
+/*****************************************************************************
+ * Should be similar to genMakeAddressable() but gives more control.
+ */
+
+regMaskTP CodeGen::genMakeAddressable2(GenTreePtr tree,
+ regMaskTP needReg,
+ RegSet::KeepReg keepReg,
+ bool forLoadStore,
+ bool smallOK,
+ bool deferOK,
+ bool evalSideEffs)
+
+{
+ bool evalToReg = false;
+
+ if (evalSideEffs && (tree->gtOper == GT_IND) && (tree->gtFlags & GTF_EXCEPT))
+ evalToReg = true;
+
+#if CPU_LOAD_STORE_ARCH
+ if (!forLoadStore)
+ evalToReg = true;
+#endif
+
+ if (evalToReg)
+ {
+ genCodeForTree(tree, needReg);
+
+ noway_assert(tree->gtFlags & GTF_REG_VAL);
+
+ if (isRegPairType(tree->gtType))
+ {
+ /* Are we supposed to hang on to the register? */
+
+ if (keepReg == RegSet::KEEP_REG)
+ regSet.rsMarkRegPairUsed(tree);
+
+ return genRegPairMask(tree->gtRegPair);
+ }
+ else
+ {
+ /* Are we supposed to hang on to the register? */
+
+ if (keepReg == RegSet::KEEP_REG)
+ regSet.rsMarkRegUsed(tree);
+
+ return genRegMask(tree->gtRegNum);
+ }
+ }
+ else
+ {
+ return genMakeAddressable(tree, needReg, keepReg, smallOK, deferOK);
+ }
+}
+
+/*****************************************************************************
+ *
+ * The given tree was previously passed to genMakeAddressable(); return
+ * 'true' if the operand is still addressable.
+ */
+
+// inline
+bool CodeGen::genStillAddressable(GenTreePtr tree)
+{
+ /* Has the value (or one or more of its sub-operands) been spilled? */
+
+ if (tree->gtFlags & (GTF_SPILLED | GTF_SPILLED_OPER))
+ return false;
+
+ return true;
+}
+
+/*****************************************************************************
+ *
+ * Recursive helper to restore complex address modes. The 'lockPhase'
+ * argument indicates whether we're in the 'lock' or 'reload' phase.
+ */
+
+regMaskTP CodeGen::genRestoreAddrMode(GenTreePtr addr, GenTreePtr tree, bool lockPhase)
+{
+ regMaskTP regMask = RBM_NONE;
+
+ /* Have we found a spilled value? */
+
+ if (tree->gtFlags & GTF_SPILLED)
+ {
+ /* Do nothing if we're locking, otherwise reload and lock */
+
+ if (!lockPhase)
+ {
+ /* Unspill the register */
+
+ regSet.rsUnspillReg(tree, 0, RegSet::FREE_REG);
+
+ /* The value should now be sitting in a register */
+
+ noway_assert(tree->gtFlags & GTF_REG_VAL);
+ regMask = genRegMask(tree->gtRegNum);
+
+ /* Mark the register as used for the address */
+
+ regSet.rsMarkRegUsed(tree, addr);
+
+ /* Lock the register until we're done with the entire address */
+
+ regSet.rsMaskLock |= regMask;
+ }
+
+ return regMask;
+ }
+
+ /* Is this sub-tree sitting in a register? */
+
+ if (tree->gtFlags & GTF_REG_VAL)
+ {
+ regMask = genRegMask(tree->gtRegNum);
+
+ /* Lock the register if we're in the locking phase */
+
+ if (lockPhase)
+ regSet.rsMaskLock |= regMask;
+ }
+ else
+ {
+ /* Process any sub-operands of this node */
+
+ unsigned kind = tree->OperKind();
+
+ if (kind & GTK_SMPOP)
+ {
+ /* Unary/binary operator */
+
+ if (tree->gtOp.gtOp1)
+ regMask |= genRestoreAddrMode(addr, tree->gtOp.gtOp1, lockPhase);
+ if (tree->gtGetOp2())
+ regMask |= genRestoreAddrMode(addr, tree->gtOp.gtOp2, lockPhase);
+ }
+ else if (tree->gtOper == GT_ARR_ELEM)
+ {
+ /* gtArrObj is the array-object and gtArrInds[0] is marked with the register
+ which holds the offset-calculation */
+
+ regMask |= genRestoreAddrMode(addr, tree->gtArrElem.gtArrObj, lockPhase);
+ regMask |= genRestoreAddrMode(addr, tree->gtArrElem.gtArrInds[0], lockPhase);
+ }
+ else if (tree->gtOper == GT_CMPXCHG)
+ {
+ regMask |= genRestoreAddrMode(addr, tree->gtCmpXchg.gtOpLocation, lockPhase);
+ }
+ else
+ {
+ /* Must be a leaf/constant node */
+
+ noway_assert(kind & (GTK_LEAF | GTK_CONST));
+ }
+ }
+
+ return regMask;
+}
+
+/*****************************************************************************
+ *
+ * The given tree was previously passed to genMakeAddressable, but since then
+ * some of its registers are known to have been spilled; do whatever it takes
+ * to make the operand addressable again (typically by reloading any spilled
+ * registers).
+ */
+
+regMaskTP CodeGen::genRestAddressable(GenTreePtr tree, regMaskTP addrReg, regMaskTP lockMask)
+{
+ noway_assert((regSet.rsMaskLock & lockMask) == lockMask);
+
+ /* Is this a 'simple' register spill? */
+
+ if (tree->gtFlags & GTF_SPILLED)
+ {
+ /* The mask must match the original register/regpair */
+
+ if (isRegPairType(tree->gtType))
+ {
+ noway_assert(addrReg == genRegPairMask(tree->gtRegPair));
+
+ regSet.rsUnspillRegPair(tree, /* restore it anywhere */ RBM_NONE, RegSet::KEEP_REG);
+
+ addrReg = genRegPairMask(tree->gtRegPair);
+ }
+ else
+ {
+ noway_assert(addrReg == genRegMask(tree->gtRegNum));
+
+ regSet.rsUnspillReg(tree, /* restore it anywhere */ RBM_NONE, RegSet::KEEP_REG);
+
+ addrReg = genRegMask(tree->gtRegNum);
+ }
+
+ noway_assert((regSet.rsMaskLock & lockMask) == lockMask);
+ regSet.rsMaskLock -= lockMask;
+
+ return addrReg;
+ }
+
+ /* We have a complex address mode with some of its sub-operands spilled */
+
+ noway_assert((tree->gtFlags & GTF_REG_VAL) == 0);
+ noway_assert((tree->gtFlags & GTF_SPILLED_OPER) != 0);
+
+ /*
+ We'll proceed in several phases:
+
+ 1. Lock any registers that are part of the address mode and
+ have not been spilled. This prevents these registers from
+ getting spilled in step 2.
+
+ 2. Reload any registers that have been spilled; lock each
+ one right after it is reloaded.
+
+ 3. Unlock all the registers.
+ */
+
+ addrReg = genRestoreAddrMode(tree, tree, true);
+ addrReg |= genRestoreAddrMode(tree, tree, false);
+
+ /* Unlock all registers that the address mode uses */
+
+ lockMask |= addrReg;
+
+ noway_assert((regSet.rsMaskLock & lockMask) == lockMask);
+ regSet.rsMaskLock -= lockMask;
+
+ return addrReg;
+}
+
+/*****************************************************************************
+ *
+ * The given tree was previously passed to genMakeAddressable, but since then
+ * some of its registers might have been spilled ('addrReg' is the set of
+ * registers used by the address). This function makes sure the operand is
+ * still addressable (while avoiding any of the registers in 'avoidMask'),
+ * and returns the (possibly modified) set of registers that are used by
+ * the address (these will be marked as used on exit).
+ */
+
+regMaskTP CodeGen::genKeepAddressable(GenTreePtr tree, regMaskTP addrReg, regMaskTP avoidMask)
+{
+ /* Is the operand still addressable? */
+
+ tree = tree->gtEffectiveVal(/*commaOnly*/ true); // Strip off commas for this purpose.
+
+ if (!genStillAddressable(tree))
+ {
+ if (avoidMask)
+ {
+ // Temporarily lock 'avoidMask' while we restore addressability
+ // genRestAddressable will unlock the 'avoidMask' for us
+ // avoidMask must already be marked as a used reg in regSet.rsMaskUsed
+ // In regSet.rsRegMaskFree() we require that all locked register be marked as used
+ //
+ regSet.rsLockUsedReg(avoidMask);
+ }
+
+ addrReg = genRestAddressable(tree, addrReg, avoidMask);
+
+ noway_assert((regSet.rsMaskLock & avoidMask) == 0);
+ }
+
+ return addrReg;
+}
+
+/*****************************************************************************
+ *
+ * After we're finished with the given operand (which was previously marked
+ * by calling genMakeAddressable), this function must be called to free any
+ * registers that may have been used by the address.
+ * keptReg indicates if the addressability registers were marked as used
+ * by genMakeAddressable().
+ */
+
+void CodeGen::genDoneAddressable(GenTreePtr tree, regMaskTP addrReg, RegSet::KeepReg keptReg)
+{
+ if (keptReg == RegSet::FREE_REG)
+ {
+ // We exclude regSet.rsMaskUsed since the registers may be multi-used.
+ // ie. There may be a pending use in a higher-up tree.
+
+ addrReg &= ~regSet.rsMaskUsed;
+
+ /* addrReg was not marked as used. So just reset its GC info */
+ if (addrReg)
+ {
+ gcInfo.gcMarkRegSetNpt(addrReg);
+ }
+ }
+ else
+ {
+ /* addrReg was marked as used. So we need to free it up (which
+ will also reset its GC info) */
+
+ regSet.rsMarkRegFree(addrReg);
+ }
+}
+
+/*****************************************************************************/
+/*****************************************************************************
+ *
+ * Make sure the given floating point value is addressable, and return a tree
+ * that will yield the value as an addressing mode (this tree may differ from
+ * the one passed in, BTW). If the only way to make the value addressable is
+ * to evaluate into the FP stack, we do this and return zero.
+ */
+
+GenTreePtr CodeGen::genMakeAddrOrFPstk(GenTreePtr tree, regMaskTP* regMaskPtr, bool roundResult)
+{
+ *regMaskPtr = 0;
+
+ switch (tree->gtOper)
+ {
+ case GT_LCL_VAR:
+ case GT_LCL_FLD:
+ case GT_CLS_VAR:
+ return tree;
+
+ case GT_CNS_DBL:
+ if (tree->gtType == TYP_FLOAT)
+ {
+ float f = forceCastToFloat(tree->gtDblCon.gtDconVal);
+ return genMakeConst(&f, TYP_FLOAT, tree, false);
+ }
+ return genMakeConst(&tree->gtDblCon.gtDconVal, tree->gtType, tree, true);
+
+ case GT_IND:
+ case GT_NULLCHECK:
+
+ /* Try to make the address directly addressable */
+
+ if (genMakeIndAddrMode(tree->gtOp.gtOp1, tree, false, /* not for LEA */
+ 0, RegSet::FREE_REG, regMaskPtr, false))
+ {
+ genUpdateLife(tree);
+ return tree;
+ }
+
+ break;
+
+ default:
+ break;
+ }
+#if FEATURE_STACK_FP_X87
+ /* We have no choice but to compute the value 'tree' onto the FP stack */
+
+ genCodeForTreeFlt(tree);
+#endif
+ return 0;
+}
+
+/*****************************************************************************/
+/*****************************************************************************
+ *
+ * Display a string literal value (debug only).
+ */
+
+#ifdef DEBUG
+#endif
+
+/*****************************************************************************
+ *
+ * Generate code to check that the GS cookie wasn't thrashed by a buffer
+ * overrun. If pushReg is true, preserve all registers around code sequence.
+ * Otherwise, ECX maybe modified.
+ *
+ * TODO-ARM-Bug?: pushReg is not implemented (is it needed for ARM?)
+ */
+void CodeGen::genEmitGSCookieCheck(bool pushReg)
+{
+ // Make sure that EAX didn't die in the return expression
+ if (!pushReg && (compiler->info.compRetType == TYP_REF))
+ gcInfo.gcRegGCrefSetCur |= RBM_INTRET;
+
+ // Add cookie check code for unsafe buffers
+ BasicBlock* gsCheckBlk;
+ regMaskTP byrefPushedRegs = RBM_NONE;
+ regMaskTP norefPushedRegs = RBM_NONE;
+ regMaskTP pushedRegs = RBM_NONE;
+
+ noway_assert(compiler->gsGlobalSecurityCookieAddr || compiler->gsGlobalSecurityCookieVal);
+
+ if (compiler->gsGlobalSecurityCookieAddr == NULL)
+ {
+ // JIT case
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if CPU_LOAD_STORE_ARCH
+
+ regNumber reg = regSet.rsGrabReg(RBM_ALLINT);
+ getEmitter()->emitIns_R_S(ins_Load(TYP_INT), EA_4BYTE, reg, compiler->lvaGSSecurityCookie, 0);
+ regTracker.rsTrackRegTrash(reg);
+
+ if (arm_Valid_Imm_For_Alu(compiler->gsGlobalSecurityCookieVal) ||
+ arm_Valid_Imm_For_Alu(~compiler->gsGlobalSecurityCookieVal))
+ {
+ getEmitter()->emitIns_R_I(INS_cmp, EA_4BYTE, reg, compiler->gsGlobalSecurityCookieVal);
+ }
+ else
+ {
+ // Load CookieVal into a register
+ regNumber immReg = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(reg));
+ instGen_Set_Reg_To_Imm(EA_4BYTE, immReg, compiler->gsGlobalSecurityCookieVal);
+ getEmitter()->emitIns_R_R(INS_cmp, EA_4BYTE, reg, immReg);
+ }
+#else
+ getEmitter()->emitIns_S_I(INS_cmp, EA_PTRSIZE, compiler->lvaGSSecurityCookie, 0,
+ (int)compiler->gsGlobalSecurityCookieVal);
+#endif
+ }
+ else
+ {
+ regNumber regGSCheck;
+ regMaskTP regMaskGSCheck;
+#if CPU_LOAD_STORE_ARCH
+ regGSCheck = regSet.rsGrabReg(RBM_ALLINT);
+ regMaskGSCheck = genRegMask(regGSCheck);
+#else
+ // Don't pick the 'this' register
+ if (compiler->lvaKeepAliveAndReportThis() && compiler->lvaTable[compiler->info.compThisArg].lvRegister &&
+ (compiler->lvaTable[compiler->info.compThisArg].lvRegNum == REG_ECX))
+ {
+ regGSCheck = REG_EDX;
+ regMaskGSCheck = RBM_EDX;
+ }
+ else
+ {
+ regGSCheck = REG_ECX;
+ regMaskGSCheck = RBM_ECX;
+ }
+
+ // NGen case
+ if (pushReg && (regMaskGSCheck & (regSet.rsMaskUsed | regSet.rsMaskVars | regSet.rsMaskLock)))
+ {
+ pushedRegs = genPushRegs(regMaskGSCheck, &byrefPushedRegs, &norefPushedRegs);
+ }
+ else
+ {
+ noway_assert((regMaskGSCheck & (regSet.rsMaskUsed | regSet.rsMaskVars | regSet.rsMaskLock)) == 0);
+ }
+#endif
+#if defined(_TARGET_ARM_)
+ instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, regGSCheck, (ssize_t)compiler->gsGlobalSecurityCookieAddr);
+ getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, regGSCheck, regGSCheck, 0);
+#else
+ getEmitter()->emitIns_R_C(ins_Load(TYP_I_IMPL), EA_PTR_DSP_RELOC, regGSCheck, FLD_GLOBAL_DS,
+ (ssize_t)compiler->gsGlobalSecurityCookieAddr);
+#endif // !_TARGET_ARM_
+ regTracker.rsTrashRegSet(regMaskGSCheck);
+#ifdef _TARGET_ARM_
+ regNumber regTmp = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(regGSCheck));
+ getEmitter()->emitIns_R_S(INS_ldr, EA_PTRSIZE, regTmp, compiler->lvaGSSecurityCookie, 0);
+ regTracker.rsTrackRegTrash(regTmp);
+ getEmitter()->emitIns_R_R(INS_cmp, EA_PTRSIZE, regTmp, regGSCheck);
+#else
+ getEmitter()->emitIns_S_R(INS_cmp, EA_PTRSIZE, regGSCheck, compiler->lvaGSSecurityCookie, 0);
+#endif
+ }
+
+ gsCheckBlk = genCreateTempLabel();
+ emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
+ inst_JMP(jmpEqual, gsCheckBlk);
+ genEmitHelperCall(CORINFO_HELP_FAIL_FAST, 0, EA_UNKNOWN);
+ genDefineTempLabel(gsCheckBlk);
+
+ genPopRegs(pushedRegs, byrefPushedRegs, norefPushedRegs);
+}
+
+/*****************************************************************************
+ *
+ * Generate any side effects within the given expression tree.
+ */
+
+void CodeGen::genEvalSideEffects(GenTreePtr tree)
+{
+ genTreeOps oper;
+ unsigned kind;
+
+AGAIN:
+
+ /* Does this sub-tree contain any side-effects? */
+ if (tree->gtFlags & GTF_SIDE_EFFECT)
+ {
+#if FEATURE_STACK_FP_X87
+ /* Remember the current FP stack level */
+ int iTemps = genNumberTemps();
+#endif
+ if (tree->OperIsIndir())
+ {
+ regMaskTP addrReg = genMakeAddressable(tree, RBM_ALLINT, RegSet::KEEP_REG, true, false);
+
+ if (tree->gtFlags & GTF_REG_VAL)
+ {
+ gcInfo.gcMarkRegPtrVal(tree);
+ genDoneAddressable(tree, addrReg, RegSet::KEEP_REG);
+ }
+ // GTF_IND_RNGCHK trees have already de-referenced the pointer, and so
+ // do not need an additional null-check
+ /* Do this only if the GTF_EXCEPT or GTF_IND_VOLATILE flag is set on the indir */
+ else if ((tree->gtFlags & GTF_IND_ARR_INDEX) == 0 && ((tree->gtFlags & GTF_EXCEPT) | GTF_IND_VOLATILE))
+ {
+ /* Compare against any register to do null-check */
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if defined(_TARGET_XARCH_)
+ inst_TT_RV(INS_cmp, tree, REG_TMP_0, 0, EA_1BYTE);
+ genDoneAddressable(tree, addrReg, RegSet::KEEP_REG);
+#elif CPU_LOAD_STORE_ARCH
+ if (varTypeIsFloating(tree->TypeGet()))
+ {
+ genComputeAddressableFloat(tree, addrReg, RBM_NONE, RegSet::KEEP_REG, RBM_ALLFLOAT,
+ RegSet::FREE_REG);
+ }
+ else
+ {
+ genComputeAddressable(tree, addrReg, RegSet::KEEP_REG, RBM_NONE, RegSet::FREE_REG);
+ }
+#ifdef _TARGET_ARM_
+ if (tree->gtFlags & GTF_IND_VOLATILE)
+ {
+ // Emit a memory barrier instruction after the load
+ instGen_MemoryBarrier();
+ }
+#endif
+#else
+ NYI("TARGET");
+#endif
+ }
+ else
+ {
+ genDoneAddressable(tree, addrReg, RegSet::KEEP_REG);
+ }
+ }
+ else
+ {
+ /* Generate the expression and throw it away */
+ genCodeForTree(tree, RBM_ALL(tree->TypeGet()));
+ if (tree->gtFlags & GTF_REG_VAL)
+ {
+ gcInfo.gcMarkRegPtrVal(tree);
+ }
+ }
+#if FEATURE_STACK_FP_X87
+ /* If the tree computed a value on the FP stack, pop the stack */
+ if (genNumberTemps() > iTemps)
+ {
+ noway_assert(genNumberTemps() == iTemps + 1);
+ genDiscardStackFP(tree);
+ }
+#endif
+ return;
+ }
+
+ noway_assert(tree->gtOper != GT_ASG);
+
+ /* Walk the tree, just to mark any dead values appropriately */
+
+ oper = tree->OperGet();
+ kind = tree->OperKind();
+
+ /* Is this a constant or leaf node? */
+
+ if (kind & (GTK_CONST | GTK_LEAF))
+ {
+#if FEATURE_STACK_FP_X87
+ if (tree->IsRegVar() && isFloatRegType(tree->gtType) && tree->IsRegVarDeath())
+ {
+ genRegVarDeathStackFP(tree);
+ FlatFPX87_Unload(&compCurFPState, tree->gtRegNum);
+ }
+#endif
+ genUpdateLife(tree);
+ gcInfo.gcMarkRegPtrVal(tree);
+ return;
+ }
+
+ /* Must be a 'simple' unary/binary operator */
+
+ noway_assert(kind & GTK_SMPOP);
+
+ if (tree->gtGetOp2())
+ {
+ genEvalSideEffects(tree->gtOp.gtOp1);
+
+ tree = tree->gtOp.gtOp2;
+ goto AGAIN;
+ }
+ else
+ {
+ tree = tree->gtOp.gtOp1;
+ if (tree)
+ goto AGAIN;
+ }
+}
+
+/*****************************************************************************
+ *
+ * A persistent pointer value is being overwritten, record it for the GC.
+ *
+ * tgt : the destination being written to
+ * assignVal : the value being assigned (the source). It must currently be in a register.
+ * tgtAddrReg : the set of registers being used by "tgt"
+ *
+ * Returns : the mask of the scratch register that was used.
+ * RBM_NONE if a write-barrier is not needed.
+ */
+
+regMaskTP CodeGen::WriteBarrier(GenTreePtr tgt, GenTreePtr assignVal, regMaskTP tgtAddrReg)
+{
+ noway_assert(assignVal->gtFlags & GTF_REG_VAL);
+
+ GCInfo::WriteBarrierForm wbf = gcInfo.gcIsWriteBarrierCandidate(tgt, assignVal);
+ if (wbf == GCInfo::WBF_NoBarrier)
+ return RBM_NONE;
+
+ regMaskTP resultRegMask = RBM_NONE;
+
+#if FEATURE_WRITE_BARRIER
+
+ regNumber reg = assignVal->gtRegNum;
+
+#if defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS
+#ifdef DEBUG
+ if (wbf != GCInfo::WBF_NoBarrier_CheckNotHeapInDebug) // This one is always a call to a C++ method.
+ {
+#endif
+ const static int regToHelper[2][8] = {
+ // If the target is known to be in managed memory
+ {
+ CORINFO_HELP_ASSIGN_REF_EAX, CORINFO_HELP_ASSIGN_REF_ECX, -1, CORINFO_HELP_ASSIGN_REF_EBX, -1,
+ CORINFO_HELP_ASSIGN_REF_EBP, CORINFO_HELP_ASSIGN_REF_ESI, CORINFO_HELP_ASSIGN_REF_EDI,
+ },
+
+ // Don't know if the target is in managed memory
+ {
+ CORINFO_HELP_CHECKED_ASSIGN_REF_EAX, CORINFO_HELP_CHECKED_ASSIGN_REF_ECX, -1,
+ CORINFO_HELP_CHECKED_ASSIGN_REF_EBX, -1, CORINFO_HELP_CHECKED_ASSIGN_REF_EBP,
+ CORINFO_HELP_CHECKED_ASSIGN_REF_ESI, CORINFO_HELP_CHECKED_ASSIGN_REF_EDI,
+ },
+ };
+
+ noway_assert(regToHelper[0][REG_EAX] == CORINFO_HELP_ASSIGN_REF_EAX);
+ noway_assert(regToHelper[0][REG_ECX] == CORINFO_HELP_ASSIGN_REF_ECX);
+ noway_assert(regToHelper[0][REG_EBX] == CORINFO_HELP_ASSIGN_REF_EBX);
+ noway_assert(regToHelper[0][REG_ESP] == -1);
+ noway_assert(regToHelper[0][REG_EBP] == CORINFO_HELP_ASSIGN_REF_EBP);
+ noway_assert(regToHelper[0][REG_ESI] == CORINFO_HELP_ASSIGN_REF_ESI);
+ noway_assert(regToHelper[0][REG_EDI] == CORINFO_HELP_ASSIGN_REF_EDI);
+
+ noway_assert(regToHelper[1][REG_EAX] == CORINFO_HELP_CHECKED_ASSIGN_REF_EAX);
+ noway_assert(regToHelper[1][REG_ECX] == CORINFO_HELP_CHECKED_ASSIGN_REF_ECX);
+ noway_assert(regToHelper[1][REG_EBX] == CORINFO_HELP_CHECKED_ASSIGN_REF_EBX);
+ noway_assert(regToHelper[1][REG_ESP] == -1);
+ noway_assert(regToHelper[1][REG_EBP] == CORINFO_HELP_CHECKED_ASSIGN_REF_EBP);
+ noway_assert(regToHelper[1][REG_ESI] == CORINFO_HELP_CHECKED_ASSIGN_REF_ESI);
+ noway_assert(regToHelper[1][REG_EDI] == CORINFO_HELP_CHECKED_ASSIGN_REF_EDI);
+
+ noway_assert((reg != REG_ESP) && (reg != REG_WRITE_BARRIER));
+
+ /*
+ Generate the following code:
+
+ lea edx, tgt
+ call write_barrier_helper_reg
+
+ First grab the RBM_WRITE_BARRIER register for the target address.
+ */
+
+ regNumber rg1;
+ bool trashOp1;
+
+ if ((tgtAddrReg & RBM_WRITE_BARRIER) == 0)
+ {
+ rg1 = regSet.rsGrabReg(RBM_WRITE_BARRIER);
+
+ regSet.rsMaskUsed |= RBM_WRITE_BARRIER;
+ regSet.rsMaskLock |= RBM_WRITE_BARRIER;
+
+ trashOp1 = false;
+ }
+ else
+ {
+ rg1 = REG_WRITE_BARRIER;
+
+ trashOp1 = true;
+ }
+
+ noway_assert(rg1 == REG_WRITE_BARRIER);
+
+ /* Generate "lea EDX, [addr-mode]" */
+
+ noway_assert(tgt->gtType == TYP_REF);
+ tgt->gtType = TYP_BYREF;
+ inst_RV_TT(INS_lea, rg1, tgt, 0, EA_BYREF);
+
+ /* Free up anything that was tied up by the LHS */
+ genDoneAddressable(tgt, tgtAddrReg, RegSet::KEEP_REG);
+
+ // In case "tgt" was a comma:
+ tgt = tgt->gtEffectiveVal();
+
+ regTracker.rsTrackRegTrash(rg1);
+ gcInfo.gcMarkRegSetNpt(genRegMask(rg1));
+ gcInfo.gcMarkRegPtrVal(rg1, TYP_BYREF);
+
+ /* Call the proper vm helper */
+
+ // enforced by gcIsWriteBarrierCandidate
+ noway_assert(tgt->gtOper == GT_IND || tgt->gtOper == GT_CLS_VAR);
+
+ unsigned tgtAnywhere = 0;
+ if ((tgt->gtOper == GT_IND) &&
+ ((tgt->gtFlags & GTF_IND_TGTANYWHERE) || (tgt->gtOp.gtOp1->TypeGet() == TYP_I_IMPL)))
+ {
+ tgtAnywhere = 1;
+ }
+
+ int helper = regToHelper[tgtAnywhere][reg];
+ resultRegMask = genRegMask(reg);
+
+ gcInfo.gcMarkRegSetNpt(RBM_WRITE_BARRIER); // byref EDX is killed in the call
+
+ genEmitHelperCall(helper,
+ 0, // argSize
+ EA_PTRSIZE); // retSize
+
+ if (!trashOp1)
+ {
+ regSet.rsMaskUsed &= ~RBM_WRITE_BARRIER;
+ regSet.rsMaskLock &= ~RBM_WRITE_BARRIER;
+ }
+
+ return resultRegMask;
+
+#ifdef DEBUG
+ }
+ else
+#endif
+#endif // defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS
+
+#if defined(DEBUG) || !(defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS)
+ {
+ /*
+ Generate the following code (or its equivalent on the given target):
+
+ mov arg1, srcReg
+ lea arg0, tgt
+ call write_barrier_helper
+
+ First, setup REG_ARG_1 with the GC ref that we are storing via the Write Barrier
+ */
+
+ if (reg != REG_ARG_1)
+ {
+ // We may need to spill whatever is in the ARG_1 register
+ //
+ if ((regSet.rsMaskUsed & RBM_ARG_1) != 0)
+ {
+ regSet.rsSpillReg(REG_ARG_1);
+ }
+
+ inst_RV_RV(INS_mov, REG_ARG_1, reg, TYP_REF);
+ }
+ resultRegMask = RBM_ARG_1;
+
+ regTracker.rsTrackRegTrash(REG_ARG_1);
+ gcInfo.gcMarkRegSetNpt(REG_ARG_1);
+ gcInfo.gcMarkRegSetGCref(RBM_ARG_1); // gcref in ARG_1
+
+ bool free_arg1 = false;
+ if ((regSet.rsMaskUsed & RBM_ARG_1) == 0)
+ {
+ regSet.rsMaskUsed |= RBM_ARG_1;
+ free_arg1 = true;
+ }
+
+ // Then we setup REG_ARG_0 with the target address to store into via the Write Barrier
+
+ /* Generate "lea R0, [addr-mode]" */
+
+ noway_assert(tgt->gtType == TYP_REF);
+ tgt->gtType = TYP_BYREF;
+
+ tgtAddrReg = genKeepAddressable(tgt, tgtAddrReg);
+
+ // We may need to spill whatever is in the ARG_0 register
+ //
+ if (((tgtAddrReg & RBM_ARG_0) == 0) && // tgtAddrReg does not contain REG_ARG_0
+ ((regSet.rsMaskUsed & RBM_ARG_0) != 0) && // and regSet.rsMaskUsed contains REG_ARG_0
+ (reg != REG_ARG_0)) // unless REG_ARG_0 contains the REF value being written, which we're finished with.
+ {
+ regSet.rsSpillReg(REG_ARG_0);
+ }
+
+ inst_RV_TT(INS_lea, REG_ARG_0, tgt, 0, EA_BYREF);
+
+ /* Free up anything that was tied up by the LHS */
+ genDoneAddressable(tgt, tgtAddrReg, RegSet::KEEP_REG);
+
+ regTracker.rsTrackRegTrash(REG_ARG_0);
+ gcInfo.gcMarkRegSetNpt(REG_ARG_0);
+ gcInfo.gcMarkRegSetByref(RBM_ARG_0); // byref in ARG_0
+
+#ifdef _TARGET_ARM_
+#if NOGC_WRITE_BARRIERS
+ // Finally, we may be required to spill whatever is in the further argument registers
+ // trashed by the call. The write barrier trashes some further registers --
+ // either the standard volatile var set, or, if we're using assembly barriers, a more specialized set.
+
+ regMaskTP volatileRegsTrashed = RBM_CALLEE_TRASH_NOGC;
+#else
+ regMaskTP volatileRegsTrashed = RBM_CALLEE_TRASH;
+#endif
+ // Spill any other registers trashed by the write barrier call and currently in use.
+ regMaskTP mustSpill = (volatileRegsTrashed & regSet.rsMaskUsed & ~(RBM_ARG_0 | RBM_ARG_1));
+ if (mustSpill)
+ regSet.rsSpillRegs(mustSpill);
+#endif // _TARGET_ARM_
+
+ bool free_arg0 = false;
+ if ((regSet.rsMaskUsed & RBM_ARG_0) == 0)
+ {
+ regSet.rsMaskUsed |= RBM_ARG_0;
+ free_arg0 = true;
+ }
+
+ // genEmitHelperCall might need to grab a register
+ // so don't let it spill one of the arguments
+ //
+ regMaskTP reallyUsedRegs = RBM_NONE;
+ regSet.rsLockReg(RBM_ARG_0 | RBM_ARG_1, &reallyUsedRegs);
+
+ genGCWriteBarrier(tgt, wbf);
+
+ regSet.rsUnlockReg(RBM_ARG_0 | RBM_ARG_1, reallyUsedRegs);
+ gcInfo.gcMarkRegSetNpt(RBM_ARG_0 | RBM_ARG_1); // byref ARG_0 and reg ARG_1 are killed by the call
+
+ if (free_arg0)
+ {
+ regSet.rsMaskUsed &= ~RBM_ARG_0;
+ }
+ if (free_arg1)
+ {
+ regSet.rsMaskUsed &= ~RBM_ARG_1;
+ }
+
+ return resultRegMask;
+ }
+#endif // defined(DEBUG) || !(defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS)
+
+#else // !FEATURE_WRITE_BARRIER
+
+ NYI("FEATURE_WRITE_BARRIER unimplemented");
+ return resultRegMask;
+
+#endif // !FEATURE_WRITE_BARRIER
+}
+
+#ifdef _TARGET_X86_
+/*****************************************************************************
+ *
+ * Generate the appropriate conditional jump(s) right after the low 32 bits
+ * of two long values have been compared.
+ */
+
+void CodeGen::genJccLongHi(genTreeOps cmp, BasicBlock* jumpTrue, BasicBlock* jumpFalse, bool isUnsigned)
+{
+ if (cmp != GT_NE)
+ {
+ jumpFalse->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL;
+ }
+
+ switch (cmp)
+ {
+ case GT_EQ:
+ inst_JMP(EJ_jne, jumpFalse);
+ break;
+
+ case GT_NE:
+ inst_JMP(EJ_jne, jumpTrue);
+ break;
+
+ case GT_LT:
+ case GT_LE:
+ if (isUnsigned)
+ {
+ inst_JMP(EJ_ja, jumpFalse);
+ inst_JMP(EJ_jb, jumpTrue);
+ }
+ else
+ {
+ inst_JMP(EJ_jg, jumpFalse);
+ inst_JMP(EJ_jl, jumpTrue);
+ }
+ break;
+
+ case GT_GE:
+ case GT_GT:
+ if (isUnsigned)
+ {
+ inst_JMP(EJ_jb, jumpFalse);
+ inst_JMP(EJ_ja, jumpTrue);
+ }
+ else
+ {
+ inst_JMP(EJ_jl, jumpFalse);
+ inst_JMP(EJ_jg, jumpTrue);
+ }
+ break;
+
+ default:
+ noway_assert(!"expected a comparison operator");
+ }
+}
+
+/*****************************************************************************
+ *
+ * Generate the appropriate conditional jump(s) right after the high 32 bits
+ * of two long values have been compared.
+ */
+
+void CodeGen::genJccLongLo(genTreeOps cmp, BasicBlock* jumpTrue, BasicBlock* jumpFalse)
+{
+ switch (cmp)
+ {
+ case GT_EQ:
+ inst_JMP(EJ_je, jumpTrue);
+ break;
+
+ case GT_NE:
+ inst_JMP(EJ_jne, jumpTrue);
+ break;
+
+ case GT_LT:
+ inst_JMP(EJ_jb, jumpTrue);
+ break;
+
+ case GT_LE:
+ inst_JMP(EJ_jbe, jumpTrue);
+ break;
+
+ case GT_GE:
+ inst_JMP(EJ_jae, jumpTrue);
+ break;
+
+ case GT_GT:
+ inst_JMP(EJ_ja, jumpTrue);
+ break;
+
+ default:
+ noway_assert(!"expected comparison");
+ }
+}
+#elif defined(_TARGET_ARM_)
+/*****************************************************************************
+*
+* Generate the appropriate conditional jump(s) right after the low 32 bits
+* of two long values have been compared.
+*/
+
+void CodeGen::genJccLongHi(genTreeOps cmp, BasicBlock* jumpTrue, BasicBlock* jumpFalse, bool isUnsigned)
+{
+ if (cmp != GT_NE)
+ {
+ jumpFalse->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL;
+ }
+
+ switch (cmp)
+ {
+ case GT_EQ:
+ inst_JMP(EJ_ne, jumpFalse);
+ break;
+
+ case GT_NE:
+ inst_JMP(EJ_ne, jumpTrue);
+ break;
+
+ case GT_LT:
+ case GT_LE:
+ if (isUnsigned)
+ {
+ inst_JMP(EJ_hi, jumpFalse);
+ inst_JMP(EJ_lo, jumpTrue);
+ }
+ else
+ {
+ inst_JMP(EJ_gt, jumpFalse);
+ inst_JMP(EJ_lt, jumpTrue);
+ }
+ break;
+
+ case GT_GE:
+ case GT_GT:
+ if (isUnsigned)
+ {
+ inst_JMP(EJ_lo, jumpFalse);
+ inst_JMP(EJ_hi, jumpTrue);
+ }
+ else
+ {
+ inst_JMP(EJ_lt, jumpFalse);
+ inst_JMP(EJ_gt, jumpTrue);
+ }
+ break;
+
+ default:
+ noway_assert(!"expected a comparison operator");
+ }
+}
+
+/*****************************************************************************
+*
+* Generate the appropriate conditional jump(s) right after the high 32 bits
+* of two long values have been compared.
+*/
+
+void CodeGen::genJccLongLo(genTreeOps cmp, BasicBlock* jumpTrue, BasicBlock* jumpFalse)
+{
+ switch (cmp)
+ {
+ case GT_EQ:
+ inst_JMP(EJ_eq, jumpTrue);
+ break;
+
+ case GT_NE:
+ inst_JMP(EJ_ne, jumpTrue);
+ break;
+
+ case GT_LT:
+ inst_JMP(EJ_lo, jumpTrue);
+ break;
+
+ case GT_LE:
+ inst_JMP(EJ_ls, jumpTrue);
+ break;
+
+ case GT_GE:
+ inst_JMP(EJ_hs, jumpTrue);
+ break;
+
+ case GT_GT:
+ inst_JMP(EJ_hi, jumpTrue);
+ break;
+
+ default:
+ noway_assert(!"expected comparison");
+ }
+}
+#endif
+/*****************************************************************************
+ *
+ * Called by genCondJump() for TYP_LONG.
+ */
+
+void CodeGen::genCondJumpLng(GenTreePtr cond, BasicBlock* jumpTrue, BasicBlock* jumpFalse, bool bFPTransition)
+{
+ noway_assert(jumpTrue && jumpFalse);
+ noway_assert((cond->gtFlags & GTF_REVERSE_OPS) == false); // Done in genCondJump()
+ noway_assert(cond->gtOp.gtOp1->gtType == TYP_LONG);
+
+ GenTreePtr op1 = cond->gtOp.gtOp1;
+ GenTreePtr op2 = cond->gtOp.gtOp2;
+ genTreeOps cmp = cond->OperGet();
+
+ regMaskTP addrReg;
+
+ /* Are we comparing against a constant? */
+
+ if (op2->gtOper == GT_CNS_LNG)
+ {
+ __int64 lval = op2->gtLngCon.gtLconVal;
+ regNumber rTmp;
+
+ // We're "done" evaluating op2; let's strip any commas off op1 before we
+ // evaluate it.
+ op1 = genCodeForCommaTree(op1);
+
+ /* We can generate better code for some special cases */
+ instruction ins = INS_invalid;
+ bool useIncToSetFlags = false;
+ bool specialCaseCmp = false;
+
+ if (cmp == GT_EQ)
+ {
+ if (lval == 0)
+ {
+ /* op1 == 0 */
+ ins = INS_OR;
+ useIncToSetFlags = false;
+ specialCaseCmp = true;
+ }
+ else if (lval == -1)
+ {
+ /* op1 == -1 */
+ ins = INS_AND;
+ useIncToSetFlags = true;
+ specialCaseCmp = true;
+ }
+ }
+ else if (cmp == GT_NE)
+ {
+ if (lval == 0)
+ {
+ /* op1 != 0 */
+ ins = INS_OR;
+ useIncToSetFlags = false;
+ specialCaseCmp = true;
+ }
+ else if (lval == -1)
+ {
+ /* op1 != -1 */
+ ins = INS_AND;
+ useIncToSetFlags = true;
+ specialCaseCmp = true;
+ }
+ }
+
+ if (specialCaseCmp)
+ {
+ /* Make the comparand addressable */
+
+ addrReg = genMakeRvalueAddressable(op1, 0, RegSet::KEEP_REG, false, true);
+
+ regMaskTP tmpMask = regSet.rsRegMaskCanGrab();
+ insFlags flags = useIncToSetFlags ? INS_FLAGS_DONT_CARE : INS_FLAGS_SET;
+
+ if (op1->gtFlags & GTF_REG_VAL)
+ {
+ regPairNo regPair = op1->gtRegPair;
+ regNumber rLo = genRegPairLo(regPair);
+ regNumber rHi = genRegPairHi(regPair);
+ if (tmpMask & genRegMask(rLo))
+ {
+ rTmp = rLo;
+ }
+ else if (tmpMask & genRegMask(rHi))
+ {
+ rTmp = rHi;
+ rHi = rLo;
+ }
+ else
+ {
+ rTmp = regSet.rsGrabReg(tmpMask);
+ inst_RV_RV(INS_mov, rTmp, rLo, TYP_INT);
+ }
+
+ /* The register is now trashed */
+ regTracker.rsTrackRegTrash(rTmp);
+
+ if (rHi != REG_STK)
+ {
+ /* Set the flags using INS_AND | INS_OR */
+ inst_RV_RV(ins, rTmp, rHi, TYP_INT, EA_4BYTE, flags);
+ }
+ else
+ {
+ /* Set the flags using INS_AND | INS_OR */
+ inst_RV_TT(ins, rTmp, op1, 4, EA_4BYTE, flags);
+ }
+ }
+ else // op1 is not GTF_REG_VAL
+ {
+ rTmp = regSet.rsGrabReg(tmpMask);
+
+ /* Load the low 32-bits of op1 */
+ inst_RV_TT(ins_Load(TYP_INT), rTmp, op1, 0);
+
+ /* The register is now trashed */
+ regTracker.rsTrackRegTrash(rTmp);
+
+ /* Set the flags using INS_AND | INS_OR */
+ inst_RV_TT(ins, rTmp, op1, 4, EA_4BYTE, flags);
+ }
+
+ /* Free up the addrReg(s) if any */
+ genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
+
+ /* compares against -1, also requires an an inc instruction */
+ if (useIncToSetFlags)
+ {
+ /* Make sure the inc will set the flags */
+ assert(cond->gtSetFlags());
+ genIncRegBy(rTmp, 1, cond, TYP_INT);
+ }
+
+#if FEATURE_STACK_FP_X87
+ // We may need a transition block
+ if (bFPTransition)
+ {
+ jumpTrue = genTransitionBlockStackFP(&compCurFPState, compiler->compCurBB, jumpTrue);
+ }
+#endif
+ emitJumpKind jmpKind = genJumpKindForOper(cmp, CK_SIGNED);
+ inst_JMP(jmpKind, jumpTrue);
+ }
+ else // specialCaseCmp == false
+ {
+ /* Make the comparand addressable */
+ addrReg = genMakeRvalueAddressable(op1, 0, RegSet::FREE_REG, false, true);
+
+ /* Compare the high part first */
+
+ int ival = (int)(lval >> 32);
+
+ /* Comparing a register against 0 is easier */
+
+ if (!ival && (op1->gtFlags & GTF_REG_VAL) && (rTmp = genRegPairHi(op1->gtRegPair)) != REG_STK)
+ {
+ /* Generate 'test rTmp, rTmp' */
+ instGen_Compare_Reg_To_Zero(emitTypeSize(op1->TypeGet()), rTmp); // set flags
+ }
+ else
+ {
+ if (!(op1->gtFlags & GTF_REG_VAL) && (op1->gtOper == GT_CNS_LNG))
+ {
+ /* Special case: comparison of two constants */
+ // Needed as gtFoldExpr() doesn't fold longs
+
+ noway_assert(addrReg == 0);
+ int op1_hiword = (int)(op1->gtLngCon.gtLconVal >> 32);
+
+ /* Get the constant operand into a register */
+ rTmp = genGetRegSetToIcon(op1_hiword);
+
+ /* Generate 'cmp rTmp, ival' */
+
+ inst_RV_IV(INS_cmp, rTmp, ival, EA_4BYTE);
+ }
+ else
+ {
+ /* Generate 'cmp op1, ival' */
+
+ inst_TT_IV(INS_cmp, op1, ival, 4);
+ }
+ }
+
+#if FEATURE_STACK_FP_X87
+ // We may need a transition block
+ if (bFPTransition)
+ {
+ jumpTrue = genTransitionBlockStackFP(&compCurFPState, compiler->compCurBB, jumpTrue);
+ }
+#endif
+ /* Generate the appropriate jumps */
+
+ if (cond->gtFlags & GTF_UNSIGNED)
+ genJccLongHi(cmp, jumpTrue, jumpFalse, true);
+ else
+ genJccLongHi(cmp, jumpTrue, jumpFalse);
+
+ /* Compare the low part second */
+
+ ival = (int)lval;
+
+ /* Comparing a register against 0 is easier */
+
+ if (!ival && (op1->gtFlags & GTF_REG_VAL) && (rTmp = genRegPairLo(op1->gtRegPair)) != REG_STK)
+ {
+ /* Generate 'test rTmp, rTmp' */
+ instGen_Compare_Reg_To_Zero(emitTypeSize(op1->TypeGet()), rTmp); // set flags
+ }
+ else
+ {
+ if (!(op1->gtFlags & GTF_REG_VAL) && (op1->gtOper == GT_CNS_LNG))
+ {
+ /* Special case: comparison of two constants */
+ // Needed as gtFoldExpr() doesn't fold longs
+
+ noway_assert(addrReg == 0);
+ int op1_loword = (int)op1->gtLngCon.gtLconVal;
+
+ /* get the constant operand into a register */
+ rTmp = genGetRegSetToIcon(op1_loword);
+
+ /* Generate 'cmp rTmp, ival' */
+
+ inst_RV_IV(INS_cmp, rTmp, ival, EA_4BYTE);
+ }
+ else
+ {
+ /* Generate 'cmp op1, ival' */
+
+ inst_TT_IV(INS_cmp, op1, ival, 0);
+ }
+ }
+
+ /* Generate the appropriate jumps */
+ genJccLongLo(cmp, jumpTrue, jumpFalse);
+
+ genDoneAddressable(op1, addrReg, RegSet::FREE_REG);
+ }
+ }
+ else // (op2->gtOper != GT_CNS_LNG)
+ {
+
+ /* The operands would be reversed by physically swapping them */
+
+ noway_assert((cond->gtFlags & GTF_REVERSE_OPS) == 0);
+
+ /* Generate the first operand into a register pair */
+
+ genComputeRegPair(op1, REG_PAIR_NONE, op2->gtRsvdRegs, RegSet::KEEP_REG, false);
+ noway_assert(op1->gtFlags & GTF_REG_VAL);
+
+#if CPU_LOAD_STORE_ARCH
+ /* Generate the second operand into a register pair */
+ // Fix 388442 ARM JitStress WP7
+ genComputeRegPair(op2, REG_PAIR_NONE, genRegPairMask(op1->gtRegPair), RegSet::KEEP_REG, false);
+ noway_assert(op2->gtFlags & GTF_REG_VAL);
+ regSet.rsLockUsedReg(genRegPairMask(op2->gtRegPair));
+#else
+ /* Make the second operand addressable */
+
+ addrReg = genMakeRvalueAddressable(op2, RBM_ALLINT & ~genRegPairMask(op1->gtRegPair), RegSet::KEEP_REG, false);
+#endif
+ /* Make sure the first operand hasn't been spilled */
+
+ genRecoverRegPair(op1, REG_PAIR_NONE, RegSet::KEEP_REG);
+ noway_assert(op1->gtFlags & GTF_REG_VAL);
+
+ regPairNo regPair = op1->gtRegPair;
+
+#if !CPU_LOAD_STORE_ARCH
+ /* Make sure 'op2' is still addressable while avoiding 'op1' (regPair) */
+
+ addrReg = genKeepAddressable(op2, addrReg, genRegPairMask(regPair));
+#endif
+
+#if FEATURE_STACK_FP_X87
+ // We may need a transition block
+ if (bFPTransition)
+ {
+ jumpTrue = genTransitionBlockStackFP(&compCurFPState, compiler->compCurBB, jumpTrue);
+ }
+#endif
+
+ /* Perform the comparison - high parts */
+
+ inst_RV_TT(INS_cmp, genRegPairHi(regPair), op2, 4);
+
+ if (cond->gtFlags & GTF_UNSIGNED)
+ genJccLongHi(cmp, jumpTrue, jumpFalse, true);
+ else
+ genJccLongHi(cmp, jumpTrue, jumpFalse);
+
+ /* Compare the low parts */
+
+ inst_RV_TT(INS_cmp, genRegPairLo(regPair), op2, 0);
+ genJccLongLo(cmp, jumpTrue, jumpFalse);
+
+ /* Free up anything that was tied up by either operand */
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if CPU_LOAD_STORE_ARCH
+
+ // Fix 388442 ARM JitStress WP7
+ regSet.rsUnlockUsedReg(genRegPairMask(op2->gtRegPair));
+ genReleaseRegPair(op2);
+#else
+ genDoneAddressable(op2, addrReg, RegSet::KEEP_REG);
+#endif
+ genReleaseRegPair(op1);
+ }
+}
+
+/*****************************************************************************
+ * gen_fcomp_FN, gen_fcomp_FS_TT, gen_fcompp_FS
+ * Called by genCondJumpFlt() to generate the fcomp instruction appropriate
+ * to the architecture we're running on.
+ *
+ * P5:
+ * gen_fcomp_FN: fcomp ST(0), stk
+ * gen_fcomp_FS_TT: fcomp ST(0), addr
+ * gen_fcompp_FS: fcompp
+ * These are followed by fnstsw, sahf to get the flags in EFLAGS.
+ *
+ * P6:
+ * gen_fcomp_FN: fcomip ST(0), stk
+ * gen_fcomp_FS_TT: fld addr, fcomip ST(0), ST(1), fstp ST(0)
+ * (and reverse the branch condition since addr comes first)
+ * gen_fcompp_FS: fcomip, fstp
+ * These instructions will correctly set the EFLAGS register.
+ *
+ * Return value: These functions return true if the instruction has
+ * already placed its result in the EFLAGS register.
+ */
+
+bool CodeGen::genUse_fcomip()
+{
+ return compiler->opts.compUseFCOMI;
+}
+
+/*****************************************************************************
+ *
+ * Sets the flag for the TYP_INT/TYP_REF comparison.
+ * We try to use the flags if they have already been set by a prior
+ * instruction.
+ * eg. i++; if(i<0) {} Here, the "i++;" will have set the sign flag. We don't
+ * need to compare again with zero. Just use a "INS_js"
+ *
+ * Returns the flags the following jump/set instruction should use.
+ */
+
+emitJumpKind CodeGen::genCondSetFlags(GenTreePtr cond)
+{
+ noway_assert(cond->OperIsCompare());
+ noway_assert(varTypeIsI(genActualType(cond->gtOp.gtOp1->gtType)));
+
+ GenTreePtr op1 = cond->gtOp.gtOp1;
+ GenTreePtr op2 = cond->gtOp.gtOp2;
+ genTreeOps cmp = cond->OperGet();
+
+ if (cond->gtFlags & GTF_REVERSE_OPS)
+ {
+ /* Don't forget to modify the condition as well */
+
+ cond->gtOp.gtOp1 = op2;
+ cond->gtOp.gtOp2 = op1;
+ cond->SetOper(GenTree::SwapRelop(cmp));
+ cond->gtFlags &= ~GTF_REVERSE_OPS;
+
+ /* Get hold of the new values */
+
+ cmp = cond->OperGet();
+ op1 = cond->gtOp.gtOp1;
+ op2 = cond->gtOp.gtOp2;
+ }
+
+ // Note that op1's type may get bashed. So save it early
+
+ var_types op1Type = op1->TypeGet();
+ bool unsignedCmp = (cond->gtFlags & GTF_UNSIGNED) != 0;
+ emitAttr size = EA_UNKNOWN;
+
+ regMaskTP regNeed;
+ regMaskTP addrReg1 = RBM_NONE;
+ regMaskTP addrReg2 = RBM_NONE;
+ emitJumpKind jumpKind = EJ_COUNT; // Initialize with an invalid value
+
+ bool byteCmp;
+ bool shortCmp;
+
+ regMaskTP newLiveMask;
+ regNumber op1Reg;
+
+ /* Are we comparing against a constant? */
+
+ if (op2->IsCnsIntOrI())
+ {
+ ssize_t ival = op2->gtIntConCommon.IconValue();
+
+ /* unsigned less than comparisons with 1 ('< 1' )
+ should be transformed into '== 0' to potentially
+ suppress a tst instruction.
+ */
+ if ((ival == 1) && (cmp == GT_LT) && unsignedCmp)
+ {
+ op2->gtIntCon.gtIconVal = ival = 0;
+ cond->gtOper = cmp = GT_EQ;
+ }
+
+ /* Comparisons against 0 can be easier */
+
+ if (ival == 0)
+ {
+ // if we can safely change the comparison to unsigned we do so
+ if (!unsignedCmp && varTypeIsSmall(op1->TypeGet()) && varTypeIsUnsigned(op1->TypeGet()))
+ {
+ unsignedCmp = true;
+ }
+
+ /* unsigned comparisons with 0 should be transformed into
+ '==0' or '!= 0' to potentially suppress a tst instruction. */
+
+ if (unsignedCmp)
+ {
+ if (cmp == GT_GT)
+ cond->gtOper = cmp = GT_NE;
+ else if (cmp == GT_LE)
+ cond->gtOper = cmp = GT_EQ;
+ }
+
+ /* Is this a simple zero/non-zero test? */
+
+ if (cmp == GT_EQ || cmp == GT_NE)
+ {
+ /* Is the operand an "AND" operation? */
+
+ if (op1->gtOper == GT_AND)
+ {
+ GenTreePtr an1 = op1->gtOp.gtOp1;
+ GenTreePtr an2 = op1->gtOp.gtOp2;
+
+ /* Check for the case "expr & icon" */
+
+ if (an2->IsIntCnsFitsInI32())
+ {
+ int iVal = (int)an2->gtIntCon.gtIconVal;
+
+ /* make sure that constant is not out of an1's range */
+
+ switch (an1->gtType)
+ {
+ case TYP_BOOL:
+ case TYP_BYTE:
+ if (iVal & 0xffffff00)
+ goto NO_TEST_FOR_AND;
+ break;
+ case TYP_CHAR:
+ case TYP_SHORT:
+ if (iVal & 0xffff0000)
+ goto NO_TEST_FOR_AND;
+ break;
+ default:
+ break;
+ }
+
+ if (an1->IsCnsIntOrI())
+ {
+ // Special case - Both operands of AND are consts
+ genComputeReg(an1, 0, RegSet::EXACT_REG, RegSet::KEEP_REG);
+ addrReg1 = genRegMask(an1->gtRegNum);
+ }
+ else
+ {
+ addrReg1 = genMakeAddressable(an1, RBM_NONE, RegSet::KEEP_REG, true);
+ }
+#if CPU_LOAD_STORE_ARCH
+ if ((an1->gtFlags & GTF_REG_VAL) == 0)
+ {
+ genComputeAddressable(an1, addrReg1, RegSet::KEEP_REG, RBM_NONE, RegSet::KEEP_REG);
+ if (arm_Valid_Imm_For_Alu(iVal))
+ {
+ inst_RV_IV(INS_TEST, an1->gtRegNum, iVal, emitActualTypeSize(an1->gtType));
+ }
+ else
+ {
+ regNumber regTmp = regSet.rsPickFreeReg();
+ instGen_Set_Reg_To_Imm(EmitSize(an2), regTmp, iVal);
+ inst_RV_RV(INS_TEST, an1->gtRegNum, regTmp);
+ }
+ genReleaseReg(an1);
+ addrReg1 = RBM_NONE;
+ }
+ else
+#endif
+ {
+#ifdef _TARGET_XARCH_
+ // Check to see if we can use a smaller immediate.
+ if ((an1->gtFlags & GTF_REG_VAL) && ((iVal & 0x0000FFFF) == iVal))
+ {
+ var_types testType =
+ (var_types)(((iVal & 0x000000FF) == iVal) ? TYP_UBYTE : TYP_USHORT);
+#if CPU_HAS_BYTE_REGS
+ // if we don't have byte-able register, switch to the 2-byte form
+ if ((testType == TYP_UBYTE) && !(genRegMask(an1->gtRegNum) & RBM_BYTE_REGS))
+ {
+ testType = TYP_USHORT;
+ }
+#endif // CPU_HAS_BYTE_REGS
+
+ inst_TT_IV(INS_TEST, an1, iVal, testType);
+ }
+ else
+#endif // _TARGET_XARCH_
+ {
+ inst_TT_IV(INS_TEST, an1, iVal);
+ }
+ }
+
+ goto DONE;
+
+ NO_TEST_FOR_AND:;
+ }
+
+ // TODO: Check for other cases that can generate 'test',
+ // TODO: also check for a 64-bit integer zero test which
+ // TODO: could generate 'or lo, hi' followed by jz/jnz.
+ }
+ }
+
+ // See what Jcc instruction we would use if we can take advantage of
+ // the knowledge of EFLAGs.
+
+ if (unsignedCmp)
+ {
+ /*
+ Unsigned comparison to 0. Using this table:
+
+ ----------------------------------------------------
+ | Comparison | Flags Checked | Instruction Used |
+ ----------------------------------------------------
+ | == 0 | ZF = 1 | je |
+ ----------------------------------------------------
+ | != 0 | ZF = 0 | jne |
+ ----------------------------------------------------
+ | < 0 | always FALSE | N/A |
+ ----------------------------------------------------
+ | <= 0 | ZF = 1 | je |
+ ----------------------------------------------------
+ | >= 0 | always TRUE | N/A |
+ ----------------------------------------------------
+ | > 0 | ZF = 0 | jne |
+ ----------------------------------------------------
+ */
+ switch (cmp)
+ {
+#ifdef _TARGET_ARM_
+ case GT_EQ:
+ jumpKind = EJ_eq;
+ break;
+ case GT_NE:
+ jumpKind = EJ_ne;
+ break;
+ case GT_LT:
+ jumpKind = EJ_NONE;
+ break;
+ case GT_LE:
+ jumpKind = EJ_eq;
+ break;
+ case GT_GE:
+ jumpKind = EJ_NONE;
+ break;
+ case GT_GT:
+ jumpKind = EJ_ne;
+ break;
+#elif defined(_TARGET_X86_)
+ case GT_EQ:
+ jumpKind = EJ_je;
+ break;
+ case GT_NE:
+ jumpKind = EJ_jne;
+ break;
+ case GT_LT:
+ jumpKind = EJ_NONE;
+ break;
+ case GT_LE:
+ jumpKind = EJ_je;
+ break;
+ case GT_GE:
+ jumpKind = EJ_NONE;
+ break;
+ case GT_GT:
+ jumpKind = EJ_jne;
+ break;
+#endif // TARGET
+ default:
+ noway_assert(!"Unexpected comparison OpCode");
+ break;
+ }
+ }
+ else
+ {
+ /*
+ Signed comparison to 0. Using this table:
+
+ -----------------------------------------------------
+ | Comparison | Flags Checked | Instruction Used |
+ -----------------------------------------------------
+ | == 0 | ZF = 1 | je |
+ -----------------------------------------------------
+ | != 0 | ZF = 0 | jne |
+ -----------------------------------------------------
+ | < 0 | SF = 1 | js |
+ -----------------------------------------------------
+ | <= 0 | N/A | N/A |
+ -----------------------------------------------------
+ | >= 0 | SF = 0 | jns |
+ -----------------------------------------------------
+ | > 0 | N/A | N/A |
+ -----------------------------------------------------
+ */
+
+ switch (cmp)
+ {
+#ifdef _TARGET_ARM_
+ case GT_EQ:
+ jumpKind = EJ_eq;
+ break;
+ case GT_NE:
+ jumpKind = EJ_ne;
+ break;
+ case GT_LT:
+ jumpKind = EJ_mi;
+ break;
+ case GT_LE:
+ jumpKind = EJ_NONE;
+ break;
+ case GT_GE:
+ jumpKind = EJ_pl;
+ break;
+ case GT_GT:
+ jumpKind = EJ_NONE;
+ break;
+#elif defined(_TARGET_X86_)
+ case GT_EQ:
+ jumpKind = EJ_je;
+ break;
+ case GT_NE:
+ jumpKind = EJ_jne;
+ break;
+ case GT_LT:
+ jumpKind = EJ_js;
+ break;
+ case GT_LE:
+ jumpKind = EJ_NONE;
+ break;
+ case GT_GE:
+ jumpKind = EJ_jns;
+ break;
+ case GT_GT:
+ jumpKind = EJ_NONE;
+ break;
+#endif // TARGET
+ default:
+ noway_assert(!"Unexpected comparison OpCode");
+ break;
+ }
+ assert(jumpKind == genJumpKindForOper(cmp, CK_LOGICAL));
+ }
+ assert(jumpKind != EJ_COUNT); // Ensure that it was assigned a valid value above
+
+ /* Is the value a simple local variable? */
+
+ if (op1->gtOper == GT_LCL_VAR)
+ {
+ /* Is the flags register set to the value? */
+
+ if (genFlagsAreVar(op1->gtLclVarCommon.gtLclNum))
+ {
+ if (jumpKind != EJ_NONE)
+ {
+ addrReg1 = RBM_NONE;
+ genUpdateLife(op1);
+ goto DONE_FLAGS;
+ }
+ }
+ }
+
+ /* Make the comparand addressable */
+ addrReg1 = genMakeRvalueAddressable(op1, RBM_NONE, RegSet::KEEP_REG, false, true);
+
+ /* Are the condition flags set based on the value? */
+
+ unsigned flags = (op1->gtFlags & GTF_ZSF_SET);
+
+ if (op1->gtFlags & GTF_REG_VAL)
+ {
+ if (genFlagsAreReg(op1->gtRegNum))
+ {
+ flags |= GTF_ZSF_SET;
+ }
+ }
+
+ if (flags)
+ {
+ if (jumpKind != EJ_NONE)
+ {
+ goto DONE_FLAGS;
+ }
+ }
+
+ /* Is the value in a register? */
+
+ if (op1->gtFlags & GTF_REG_VAL)
+ {
+ regNumber reg = op1->gtRegNum;
+
+ /* With a 'test' we can do any signed test or any test for equality */
+
+ if (!(cond->gtFlags & GTF_UNSIGNED) || cmp == GT_EQ || cmp == GT_NE)
+ {
+ emitAttr compareSize = emitTypeSize(op1->TypeGet());
+
+ // If we have an GT_REG_VAR then the register will be properly sign/zero extended
+ // But only up to 4 bytes
+ if ((op1->gtOper == GT_REG_VAR) && (compareSize < EA_4BYTE))
+ {
+ compareSize = EA_4BYTE;
+ }
+
+#if CPU_HAS_BYTE_REGS
+ // Make sure if we require a byte compare that we have a byte-able register
+ if ((compareSize != EA_1BYTE) || ((genRegMask(op1->gtRegNum) & RBM_BYTE_REGS) != 0))
+#endif // CPU_HAS_BYTE_REGS
+ {
+ /* Generate 'test reg, reg' */
+ instGen_Compare_Reg_To_Zero(compareSize, reg);
+ goto DONE;
+ }
+ }
+ }
+ }
+
+ else // if (ival != 0)
+ {
+ bool smallOk = true;
+
+ /* make sure that constant is not out of op1's range
+ if it is, we need to perform an int with int comparison
+ and therefore, we set smallOk to false, so op1 gets loaded
+ into a register
+ */
+
+ /* If op1 is TYP_SHORT, and is followed by an unsigned
+ * comparison, we can use smallOk. But we don't know which
+ * flags will be needed. This probably doesn't happen often.
+ */
+ var_types gtType = op1->TypeGet();
+
+ switch (gtType)
+ {
+ case TYP_BYTE:
+ if (ival != (signed char)ival)
+ smallOk = false;
+ break;
+ case TYP_BOOL:
+ case TYP_UBYTE:
+ if (ival != (unsigned char)ival)
+ smallOk = false;
+ break;
+
+ case TYP_SHORT:
+ if (ival != (signed short)ival)
+ smallOk = false;
+ break;
+ case TYP_CHAR:
+ if (ival != (unsigned short)ival)
+ smallOk = false;
+ break;
+
+#ifdef _TARGET_64BIT_
+ case TYP_INT:
+ if (!FitsIn<INT32>(ival))
+ smallOk = false;
+ break;
+ case TYP_UINT:
+ if (!FitsIn<UINT32>(ival))
+ smallOk = false;
+ break;
+#endif // _TARGET_64BIT_
+
+ default:
+ break;
+ }
+
+ if (smallOk && // constant is in op1's range
+ !unsignedCmp && // signed comparison
+ varTypeIsSmall(gtType) && // smalltype var
+ varTypeIsUnsigned(gtType)) // unsigned type
+ {
+ unsignedCmp = true;
+ }
+
+ /* Make the comparand addressable */
+ addrReg1 = genMakeRvalueAddressable(op1, RBM_NONE, RegSet::KEEP_REG, false, smallOk);
+ }
+
+ // #if defined(DEBUGGING_SUPPORT)
+
+ /* Special case: comparison of two constants */
+
+ // Needed if Importer doesn't call gtFoldExpr()
+
+ if (!(op1->gtFlags & GTF_REG_VAL) && (op1->IsCnsIntOrI()))
+ {
+ // noway_assert(compiler->opts.MinOpts() || compiler->opts.compDbgCode);
+
+ /* Workaround: get the constant operand into a register */
+ genComputeReg(op1, RBM_NONE, RegSet::ANY_REG, RegSet::KEEP_REG);
+
+ noway_assert(addrReg1 == RBM_NONE);
+ noway_assert(op1->gtFlags & GTF_REG_VAL);
+
+ addrReg1 = genRegMask(op1->gtRegNum);
+ }
+
+ // #endif
+
+ /* Compare the operand against the constant */
+
+ if (op2->IsIconHandle())
+ {
+ inst_TT_IV(INS_cmp, op1, ival, 0, EA_HANDLE_CNS_RELOC);
+ }
+ else
+ {
+ inst_TT_IV(INS_cmp, op1, ival);
+ }
+ goto DONE;
+ }
+
+ //---------------------------------------------------------------------
+ //
+ // We reach here if op2 was not a GT_CNS_INT
+ //
+
+ byteCmp = false;
+ shortCmp = false;
+
+ if (op1Type == op2->gtType)
+ {
+ shortCmp = varTypeIsShort(op1Type);
+ byteCmp = varTypeIsByte(op1Type);
+ }
+
+ noway_assert(op1->gtOper != GT_CNS_INT);
+
+ if (op2->gtOper == GT_LCL_VAR)
+ genMarkLclVar(op2);
+
+ assert(((addrReg1 | addrReg2) & regSet.rsMaskUsed) == (addrReg1 | addrReg2));
+ assert(((addrReg1 & addrReg2) & regSet.rsMaskMult) == (addrReg1 & addrReg2));
+
+ /* Are we comparing against a register? */
+
+ if (op2->gtFlags & GTF_REG_VAL)
+ {
+ /* Make the comparands addressable and mark as used */
+
+ assert(addrReg1 == RBM_NONE);
+ addrReg1 = genMakeAddressable2(op1, RBM_NONE, RegSet::KEEP_REG, false, true);
+
+ /* Is the size of the comparison byte/char/short ? */
+
+ if (varTypeIsSmall(op1->TypeGet()))
+ {
+ /* Is op2 sitting in an appropriate register? */
+
+ if (varTypeIsByte(op1->TypeGet()) && !isByteReg(op2->gtRegNum))
+ goto NO_SMALL_CMP;
+
+ /* Is op2 of the right type for a small comparison */
+
+ if (op2->gtOper == GT_REG_VAR)
+ {
+ if (op1->gtType != compiler->lvaGetRealType(op2->gtRegVar.gtLclNum))
+ goto NO_SMALL_CMP;
+ }
+ else
+ {
+ if (op1->gtType != op2->gtType)
+ goto NO_SMALL_CMP;
+ }
+
+ if (varTypeIsUnsigned(op1->TypeGet()))
+ unsignedCmp = true;
+ }
+
+ assert(addrReg2 == RBM_NONE);
+
+ genComputeReg(op2, RBM_NONE, RegSet::ANY_REG, RegSet::KEEP_REG);
+ addrReg2 = genRegMask(op2->gtRegNum);
+ addrReg1 = genKeepAddressable(op1, addrReg1, addrReg2);
+ assert(((addrReg1 | addrReg2) & regSet.rsMaskUsed) == (addrReg1 | addrReg2));
+ assert(((addrReg1 & addrReg2) & regSet.rsMaskMult) == (addrReg1 & addrReg2));
+
+ /* Compare against the register */
+
+ inst_TT_RV(INS_cmp, op1, op2->gtRegNum);
+
+ goto DONE;
+
+ NO_SMALL_CMP:
+
+ // op1 has been made addressable and is marked as in use
+ // op2 is un-generated
+ assert(addrReg2 == 0);
+
+ if ((op1->gtFlags & GTF_REG_VAL) == 0)
+ {
+ regNumber reg1 = regSet.rsPickReg();
+
+ noway_assert(varTypeIsSmall(op1->TypeGet()));
+ instruction ins = ins_Move_Extend(op1->TypeGet(), (op1->gtFlags & GTF_REG_VAL) != 0);
+
+ // regSet.rsPickReg can cause one of the trees within this address mode to get spilled
+ // so we need to make sure it is still valid. Note that at this point, reg1 is
+ // *not* marked as in use, and it is possible for it to be used in the address
+ // mode expression, but that is OK, because we are done with expression after
+ // this. We only need reg1.
+ addrReg1 = genKeepAddressable(op1, addrReg1);
+ inst_RV_TT(ins, reg1, op1);
+ regTracker.rsTrackRegTrash(reg1);
+
+ genDoneAddressable(op1, addrReg1, RegSet::KEEP_REG);
+ addrReg1 = 0;
+
+ genMarkTreeInReg(op1, reg1);
+
+ regSet.rsMarkRegUsed(op1);
+ addrReg1 = genRegMask(op1->gtRegNum);
+ }
+
+ assert(((addrReg1 | addrReg2) & regSet.rsMaskUsed) == (addrReg1 | addrReg2));
+ assert(((addrReg1 & addrReg2) & regSet.rsMaskMult) == (addrReg1 & addrReg2));
+
+ goto DONE_OP1;
+ }
+
+ // We come here if op2 is not enregistered or not in a "good" register.
+
+ assert(addrReg1 == 0);
+
+ // Determine what registers go live between op1 and op2
+ newLiveMask = genNewLiveRegMask(op1, op2);
+
+ // Setup regNeed with the set of register that we suggest for op1 to be in
+ //
+ regNeed = RBM_ALLINT;
+
+ // avoid selecting registers that get newly born in op2
+ regNeed = regSet.rsNarrowHint(regNeed, ~newLiveMask);
+
+ // avoid selecting op2 reserved regs
+ regNeed = regSet.rsNarrowHint(regNeed, ~op2->gtRsvdRegs);
+
+#if CPU_HAS_BYTE_REGS
+ // if necessary setup regNeed to select just the byte-able registers
+ if (byteCmp)
+ regNeed = regSet.rsNarrowHint(RBM_BYTE_REGS, regNeed);
+#endif // CPU_HAS_BYTE_REGS
+
+ // Compute the first comparand into some register, regNeed here is simply a hint because RegSet::ANY_REG is used.
+ //
+ genComputeReg(op1, regNeed, RegSet::ANY_REG, RegSet::FREE_REG);
+ noway_assert(op1->gtFlags & GTF_REG_VAL);
+
+ op1Reg = op1->gtRegNum;
+
+ // Setup regNeed with the set of register that we require for op1 to be in
+ //
+ regNeed = RBM_ALLINT;
+
+#if CPU_HAS_BYTE_REGS
+ // if necessary setup regNeed to select just the byte-able registers
+ if (byteCmp)
+ regNeed &= RBM_BYTE_REGS;
+#endif // CPU_HAS_BYTE_REGS
+
+ // avoid selecting registers that get newly born in op2, as using them will force a spill temp to be used.
+ regNeed = regSet.rsMustExclude(regNeed, newLiveMask);
+
+ // avoid selecting op2 reserved regs, as using them will force a spill temp to be used.
+ regNeed = regSet.rsMustExclude(regNeed, op2->gtRsvdRegs);
+
+ // Did we end up in an acceptable register?
+ // and do we have an acceptable free register available to grab?
+ //
+ if (((genRegMask(op1Reg) & regNeed) == 0) && ((regSet.rsRegMaskFree() & regNeed) != 0))
+ {
+ // Grab an acceptable register
+ regNumber newReg = regSet.rsGrabReg(regNeed);
+
+ noway_assert(op1Reg != newReg);
+
+ /* Update the value in the target register */
+
+ regTracker.rsTrackRegCopy(newReg, op1Reg);
+
+ inst_RV_RV(ins_Copy(op1->TypeGet()), newReg, op1Reg, op1->TypeGet());
+
+ /* The value has been transferred to 'reg' */
+
+ if ((genRegMask(op1Reg) & regSet.rsMaskUsed) == 0)
+ gcInfo.gcMarkRegSetNpt(genRegMask(op1Reg));
+
+ gcInfo.gcMarkRegPtrVal(newReg, op1->TypeGet());
+
+ /* The value is now in an appropriate register */
+
+ op1->gtRegNum = newReg;
+ }
+ noway_assert(op1->gtFlags & GTF_REG_VAL);
+ op1Reg = op1->gtRegNum;
+
+ genUpdateLife(op1);
+
+ /* Mark the register as 'used' */
+ regSet.rsMarkRegUsed(op1);
+
+ addrReg1 = genRegMask(op1Reg);
+
+ assert(((addrReg1 | addrReg2) & regSet.rsMaskUsed) == (addrReg1 | addrReg2));
+ assert(((addrReg1 & addrReg2) & regSet.rsMaskMult) == (addrReg1 & addrReg2));
+
+DONE_OP1:
+
+ assert(((addrReg1 | addrReg2) & regSet.rsMaskUsed) == (addrReg1 | addrReg2));
+ assert(((addrReg1 & addrReg2) & regSet.rsMaskMult) == (addrReg1 & addrReg2));
+ noway_assert(op1->gtFlags & GTF_REG_VAL);
+
+ // Setup regNeed with either RBM_ALLINT or the RBM_BYTE_REGS subset
+ // when byteCmp is true we will perform a byte sized cmp instruction
+ // and that instruction requires that any registers used are byte-able ones.
+ //
+ regNeed = RBM_ALLINT;
+
+#if CPU_HAS_BYTE_REGS
+ // if necessary setup regNeed to select just the byte-able registers
+ if (byteCmp)
+ regNeed &= RBM_BYTE_REGS;
+#endif // CPU_HAS_BYTE_REGS
+
+ /* Make the comparand addressable */
+ assert(addrReg2 == 0);
+ addrReg2 = genMakeRvalueAddressable(op2, regNeed, RegSet::KEEP_REG, false, (byteCmp | shortCmp));
+
+ /* Make sure the first operand is still in a register; if
+ it's been spilled, we have to make sure it's reloaded
+ into a byte-addressable register if needed.
+ Pass keepReg=RegSet::KEEP_REG. Otherwise get pointer lifetimes wrong.
+ */
+
+ assert(addrReg1 != 0);
+ genRecoverReg(op1, regNeed, RegSet::KEEP_REG);
+
+ noway_assert(op1->gtFlags & GTF_REG_VAL);
+ noway_assert(!byteCmp || isByteReg(op1->gtRegNum));
+
+ addrReg1 = genRegMask(op1->gtRegNum);
+ regSet.rsLockUsedReg(addrReg1);
+
+ /* Make sure that op2 is addressable. If we are going to do a
+ byte-comparison, we need it to be in a byte register. */
+
+ if (byteCmp && (op2->gtFlags & GTF_REG_VAL))
+ {
+ genRecoverReg(op2, regNeed, RegSet::KEEP_REG);
+ addrReg2 = genRegMask(op2->gtRegNum);
+ }
+ else
+ {
+ addrReg2 = genKeepAddressable(op2, addrReg2);
+ }
+
+ regSet.rsUnlockUsedReg(addrReg1);
+
+ assert(((addrReg1 | addrReg2) & regSet.rsMaskUsed) == (addrReg1 | addrReg2));
+ assert(((addrReg1 & addrReg2) & regSet.rsMaskMult) == (addrReg1 & addrReg2));
+
+ if (byteCmp || shortCmp)
+ {
+ size = emitTypeSize(op2->TypeGet());
+ if (varTypeIsUnsigned(op1Type))
+ unsignedCmp = true;
+ }
+ else
+ {
+ size = emitActualTypeSize(op2->TypeGet());
+ }
+
+ /* Perform the comparison */
+ inst_RV_TT(INS_cmp, op1->gtRegNum, op2, 0, size);
+
+DONE:
+
+ jumpKind = genJumpKindForOper(cmp, unsignedCmp ? CK_UNSIGNED : CK_SIGNED);
+
+DONE_FLAGS: // We have determined what jumpKind to use
+
+ genUpdateLife(cond);
+
+ /* The condition value is dead at the jump that follows */
+
+ assert(((addrReg1 | addrReg2) & regSet.rsMaskUsed) == (addrReg1 | addrReg2));
+ assert(((addrReg1 & addrReg2) & regSet.rsMaskMult) == (addrReg1 & addrReg2));
+ genDoneAddressable(op1, addrReg1, RegSet::KEEP_REG);
+ genDoneAddressable(op2, addrReg2, RegSet::KEEP_REG);
+
+ noway_assert(jumpKind != EJ_COUNT); // Ensure that it was assigned a valid value
+
+ return jumpKind;
+}
+
+/*****************************************************************************/
+/*****************************************************************************/
+/*****************************************************************************
+ *
+ * Generate code to jump to the jump target of the current basic block if
+ * the given relational operator yields 'true'.
+ */
+
+void CodeGen::genCondJump(GenTreePtr cond, BasicBlock* destTrue, BasicBlock* destFalse, bool bStackFPFixup)
+{
+ BasicBlock* jumpTrue;
+ BasicBlock* jumpFalse;
+
+ GenTreePtr op1 = cond->gtOp.gtOp1;
+ GenTreePtr op2 = cond->gtOp.gtOp2;
+ genTreeOps cmp = cond->OperGet();
+
+ if (destTrue)
+ {
+ jumpTrue = destTrue;
+ jumpFalse = destFalse;
+ }
+ else
+ {
+ noway_assert(compiler->compCurBB->bbJumpKind == BBJ_COND);
+
+ jumpTrue = compiler->compCurBB->bbJumpDest;
+ jumpFalse = compiler->compCurBB->bbNext;
+ }
+
+ noway_assert(cond->OperIsCompare());
+
+ /* Make sure the more expensive operand is 'op1' */
+ noway_assert((cond->gtFlags & GTF_REVERSE_OPS) == 0);
+
+ if (cond->gtFlags & GTF_REVERSE_OPS) // TODO: note that this is now dead code, since the above is a noway_assert()
+ {
+ /* Don't forget to modify the condition as well */
+
+ cond->gtOp.gtOp1 = op2;
+ cond->gtOp.gtOp2 = op1;
+ cond->SetOper(GenTree::SwapRelop(cmp));
+ cond->gtFlags &= ~GTF_REVERSE_OPS;
+
+ /* Get hold of the new values */
+
+ cmp = cond->OperGet();
+ op1 = cond->gtOp.gtOp1;
+ op2 = cond->gtOp.gtOp2;
+ }
+
+ /* What is the type of the operand? */
+
+ switch (genActualType(op1->gtType))
+ {
+ case TYP_INT:
+ case TYP_REF:
+ case TYP_BYREF:
+ emitJumpKind jumpKind;
+
+ // Check if we can use the currently set flags. Else set them
+
+ jumpKind = genCondSetFlags(cond);
+
+#if FEATURE_STACK_FP_X87
+ if (bStackFPFixup)
+ {
+ genCondJmpInsStackFP(jumpKind, jumpTrue, jumpFalse);
+ }
+ else
+#endif
+ {
+ /* Generate the conditional jump */
+ inst_JMP(jumpKind, jumpTrue);
+ }
+
+ return;
+
+ case TYP_LONG:
+#if FEATURE_STACK_FP_X87
+ if (bStackFPFixup)
+ {
+ genCondJumpLngStackFP(cond, jumpTrue, jumpFalse);
+ }
+ else
+#endif
+ {
+ genCondJumpLng(cond, jumpTrue, jumpFalse);
+ }
+ return;
+
+ case TYP_FLOAT:
+ case TYP_DOUBLE:
+#if FEATURE_STACK_FP_X87
+ genCondJumpFltStackFP(cond, jumpTrue, jumpFalse, bStackFPFixup);
+#else
+ genCondJumpFloat(cond, jumpTrue, jumpFalse);
+#endif
+ return;
+
+ default:
+#ifdef DEBUG
+ compiler->gtDispTree(cond);
+#endif
+ unreached(); // unexpected/unsupported 'jtrue' operands type
+ }
+}
+
+/*****************************************************************************
+ * Spill registers to check callers can handle it.
+ */
+
+#ifdef DEBUG
+
+void CodeGen::genStressRegs(GenTreePtr tree)
+{
+ if (regSet.rsStressRegs() < 2)
+ return;
+
+ /* Spill as many registers as possible. Callers should be prepared
+ to handle this case.
+ But don't spill trees with no size (TYP_STRUCT comes to mind) */
+
+ {
+ regMaskTP spillRegs = regSet.rsRegMaskCanGrab() & regSet.rsMaskUsed;
+ regNumber regNum;
+ regMaskTP regBit;
+
+ for (regNum = REG_FIRST, regBit = 1; regNum < REG_COUNT; regNum = REG_NEXT(regNum), regBit <<= 1)
+ {
+ if ((spillRegs & regBit) && (regSet.rsUsedTree[regNum] != NULL) &&
+ (genTypeSize(regSet.rsUsedTree[regNum]->TypeGet()) > 0))
+ {
+ regSet.rsSpillReg(regNum);
+
+ spillRegs &= regSet.rsMaskUsed;
+
+ if (!spillRegs)
+ break;
+ }
+ }
+ }
+
+ regMaskTP trashRegs = regSet.rsRegMaskFree();
+
+ if (trashRegs == RBM_NONE)
+ return;
+
+ /* It is sometimes reasonable to expect that calling genCodeForTree()
+ on certain trees won't spill anything */
+
+ if ((compiler->compCurStmt == compiler->compCurBB->bbTreeList) && (compiler->compCurBB->bbCatchTyp) &&
+ handlerGetsXcptnObj(compiler->compCurBB->bbCatchTyp))
+ {
+ trashRegs &= ~(RBM_EXCEPTION_OBJECT);
+ }
+
+ // If genCodeForTree() effectively gets called a second time on the same tree
+
+ if (tree->gtFlags & GTF_REG_VAL)
+ {
+ noway_assert(varTypeIsIntegralOrI(tree->TypeGet()));
+ trashRegs &= ~genRegMask(tree->gtRegNum);
+ }
+
+ if (tree->gtType == TYP_INT && tree->OperIsSimple())
+ {
+ GenTreePtr op1 = tree->gtOp.gtOp1;
+ GenTreePtr op2 = tree->gtOp.gtOp2;
+ if (op1 && (op1->gtFlags & GTF_REG_VAL))
+ trashRegs &= ~genRegMask(op1->gtRegNum);
+ if (op2 && (op2->gtFlags & GTF_REG_VAL))
+ trashRegs &= ~genRegMask(op2->gtRegNum);
+ }
+
+ if (compiler->compCurBB == compiler->genReturnBB)
+ {
+ if (compiler->info.compCallUnmanaged)
+ {
+ LclVarDsc* varDsc = &compiler->lvaTable[compiler->info.compLvFrameListRoot];
+ if (varDsc->lvRegister)
+ trashRegs &= ~genRegMask(varDsc->lvRegNum);
+ }
+ }
+
+ /* Now trash the registers. We use regSet.rsModifiedRegsMask, else we will have
+ to save/restore the register. We try to be as unintrusive
+ as possible */
+
+ noway_assert((REG_INT_LAST - REG_INT_FIRST) == 7);
+ // This is obviously false for ARM, but this function is never called.
+ for (regNumber reg = REG_INT_FIRST; reg <= REG_INT_LAST; reg = REG_NEXT(reg))
+ {
+ regMaskTP regMask = genRegMask(reg);
+
+ if (regSet.rsRegsModified(regMask & trashRegs))
+ genSetRegToIcon(reg, 0);
+ }
+}
+
+#endif // DEBUG
+
+/*****************************************************************************
+ *
+ * Generate code for a GTK_CONST tree
+ */
+
+void CodeGen::genCodeForTreeConst(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
+{
+ noway_assert(tree->IsCnsIntOrI());
+
+ ssize_t ival = tree->gtIntConCommon.IconValue();
+ regMaskTP needReg = destReg;
+ regNumber reg;
+ bool needReloc = compiler->opts.compReloc && tree->IsIconHandle();
+
+#if REDUNDANT_LOAD
+
+ /* If we are targeting destReg and ival is zero */
+ /* we would rather xor needReg than copy another register */
+
+ if (!needReloc)
+ {
+ bool reuseConstantInReg = false;
+
+ if (destReg == RBM_NONE)
+ reuseConstantInReg = true;
+
+#ifdef _TARGET_ARM_
+ // If we can set a register to a constant with a small encoding, then do that.
+ // Assume we'll get a low register if needReg has low registers as options.
+ if (!reuseConstantInReg &&
+ !arm_Valid_Imm_For_Small_Mov((needReg & RBM_LOW_REGS) ? REG_R0 : REG_R8, ival, INS_FLAGS_DONT_CARE))
+ {
+ reuseConstantInReg = true;
+ }
+#else
+ if (!reuseConstantInReg && ival != 0)
+ reuseConstantInReg = true;
+#endif
+
+ if (reuseConstantInReg)
+ {
+ /* Is the constant already in register? If so, use this register */
+
+ reg = regTracker.rsIconIsInReg(ival);
+ if (reg != REG_NA)
+ goto REG_LOADED;
+ }
+ }
+
+#endif // REDUNDANT_LOAD
+
+ reg = regSet.rsPickReg(needReg, bestReg);
+
+ /* If the constant is a handle, we need a reloc to be applied to it */
+
+ if (needReloc)
+ {
+ instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, reg, ival);
+ regTracker.rsTrackRegTrash(reg);
+ }
+ else
+ {
+ genSetRegToIcon(reg, ival, tree->TypeGet());
+ }
+
+REG_LOADED:
+
+#ifdef DEBUG
+ /* Special case: GT_CNS_INT - Restore the current live set if it was changed */
+
+ if (!genTempLiveChg)
+ {
+ VarSetOps::Assign(compiler, compiler->compCurLife, genTempOldLife);
+ genTempLiveChg = true;
+ }
+#endif
+
+ gcInfo.gcMarkRegPtrVal(reg, tree->TypeGet()); // In case the handle is a GC object (for eg, frozen strings)
+ genCodeForTree_DONE(tree, reg);
+}
+
+/*****************************************************************************
+ *
+ * Generate code for a GTK_LEAF tree
+ */
+
+void CodeGen::genCodeForTreeLeaf(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
+{
+ genTreeOps oper = tree->OperGet();
+ regNumber reg = DUMMY_INIT(REG_CORRUPT);
+ regMaskTP regs = regSet.rsMaskUsed;
+ regMaskTP needReg = destReg;
+ size_t size;
+
+ noway_assert(tree->OperKind() & GTK_LEAF);
+
+ switch (oper)
+ {
+ case GT_REG_VAR:
+ NO_WAY("GT_REG_VAR should have been caught above");
+ break;
+
+ case GT_LCL_VAR:
+
+ /* Does the variable live in a register? */
+
+ if (genMarkLclVar(tree))
+ {
+ genCodeForTree_REG_VAR1(tree);
+ return;
+ }
+
+#if REDUNDANT_LOAD
+
+ /* Is the local variable already in register? */
+
+ reg = findStkLclInReg(tree->gtLclVarCommon.gtLclNum);
+
+ if (reg != REG_NA)
+ {
+ /* Use the register the variable happens to be in */
+ regMaskTP regMask = genRegMask(reg);
+
+ // If the register that it was in isn't one of the needRegs
+ // then try to move it into a needReg register
+
+ if (((regMask & needReg) == 0) && (regSet.rsRegMaskCanGrab() & needReg))
+ {
+ regNumber rg2 = reg;
+ reg = regSet.rsPickReg(needReg, bestReg);
+ if (reg != rg2)
+ {
+ regMask = genRegMask(reg);
+ inst_RV_RV(INS_mov, reg, rg2, tree->TypeGet());
+ }
+ }
+
+ gcInfo.gcMarkRegPtrVal(reg, tree->TypeGet());
+ regTracker.rsTrackRegLclVar(reg, tree->gtLclVarCommon.gtLclNum);
+ break;
+ }
+
+#endif
+ goto MEM_LEAF;
+
+ case GT_LCL_FLD:
+
+ // We only use GT_LCL_FLD for lvDoNotEnregister vars, so we don't have
+ // to worry about it being enregistered.
+ noway_assert(compiler->lvaTable[tree->gtLclFld.gtLclNum].lvRegister == 0);
+ goto MEM_LEAF;
+
+ case GT_CLS_VAR:
+
+ MEM_LEAF:
+
+ /* Pick a register for the value */
+
+ reg = regSet.rsPickReg(needReg, bestReg);
+
+ /* Load the variable into the register */
+
+ size = genTypeSize(tree->gtType);
+
+ if (size < EA_4BYTE)
+ {
+ instruction ins = ins_Move_Extend(tree->TypeGet(), (tree->gtFlags & GTF_REG_VAL) != 0);
+ inst_RV_TT(ins, reg, tree, 0);
+
+ /* We've now "promoted" the tree-node to TYP_INT */
+
+ tree->gtType = TYP_INT;
+ }
+ else
+ {
+ inst_RV_TT(INS_mov, reg, tree, 0);
+ }
+
+ regTracker.rsTrackRegTrash(reg);
+
+ gcInfo.gcMarkRegPtrVal(reg, tree->TypeGet());
+
+ switch (oper)
+ {
+ case GT_CLS_VAR:
+ regTracker.rsTrackRegClsVar(reg, tree);
+ break;
+ case GT_LCL_VAR:
+ regTracker.rsTrackRegLclVar(reg, tree->gtLclVarCommon.gtLclNum);
+ break;
+ case GT_LCL_FLD:
+ break;
+ default:
+ noway_assert(!"Unexpected oper");
+ }
+
+#ifdef _TARGET_ARM_
+ if (tree->gtFlags & GTF_IND_VOLATILE)
+ {
+ // Emit a memory barrier instruction after the load
+ instGen_MemoryBarrier();
+ }
+#endif
+
+ break;
+
+ case GT_NO_OP:
+ // The VM does certain things with actual NOP instructions
+ // so generate something small that has no effect, but isn't
+ // a typical NOP
+ if (tree->gtFlags & GTF_NO_OP_NO)
+ {
+#ifdef _TARGET_XARCH_
+ // The VM expects 0x66 0x90 for a 2-byte NOP, not 0x90 0x90
+ instGen(INS_nop);
+ instGen(INS_nop);
+#elif defined(_TARGET_ARM_)
+ // The VM isn't checking yet, when it does, hopefully it will
+ // get fooled by the wider variant.
+ instGen(INS_nopw);
+#else
+ NYI("Non-nop NO_OP");
+#endif
+ }
+ else
+ {
+ instGen(INS_nop);
+ }
+ reg = REG_STK;
+ break;
+
+#if !FEATURE_EH_FUNCLETS
+ case GT_END_LFIN:
+
+ /* Have to clear the shadowSP of the nesting level which
+ encloses the finally */
+
+ unsigned finallyNesting;
+ finallyNesting = (unsigned)tree->gtVal.gtVal1;
+ noway_assert(tree->gtVal.gtVal1 <
+ compiler->compHndBBtabCount); // assert we didn't truncate with the cast above.
+ noway_assert(finallyNesting < compiler->compHndBBtabCount);
+
+ // The last slot is reserved for ICodeManager::FixContext(ppEndRegion)
+ unsigned filterEndOffsetSlotOffs;
+ PREFIX_ASSUME(compiler->lvaLclSize(compiler->lvaShadowSPslotsVar) >
+ sizeof(void*)); // below doesn't underflow.
+ filterEndOffsetSlotOffs = (unsigned)(compiler->lvaLclSize(compiler->lvaShadowSPslotsVar) - (sizeof(void*)));
+
+ unsigned curNestingSlotOffs;
+ curNestingSlotOffs = filterEndOffsetSlotOffs - ((finallyNesting + 1) * sizeof(void*));
+ instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_PTRSIZE, 0, compiler->lvaShadowSPslotsVar, curNestingSlotOffs);
+ reg = REG_STK;
+ break;
+#endif // !FEATURE_EH_FUNCLETS
+
+ case GT_CATCH_ARG:
+
+ noway_assert(compiler->compCurBB->bbCatchTyp && handlerGetsXcptnObj(compiler->compCurBB->bbCatchTyp));
+
+ /* Catch arguments get passed in a register. genCodeForBBlist()
+ would have marked it as holding a GC object, but not used. */
+
+ noway_assert(gcInfo.gcRegGCrefSetCur & RBM_EXCEPTION_OBJECT);
+ reg = REG_EXCEPTION_OBJECT;
+ break;
+
+ case GT_JMP:
+ genCodeForTreeLeaf_GT_JMP(tree);
+ return;
+
+ case GT_MEMORYBARRIER:
+ // Emit the memory barrier instruction
+ instGen_MemoryBarrier();
+ reg = REG_STK;
+ break;
+
+ default:
+#ifdef DEBUG
+ compiler->gtDispTree(tree);
+#endif
+ noway_assert(!"unexpected leaf");
+ }
+
+ noway_assert(reg != DUMMY_INIT(REG_CORRUPT));
+ genCodeForTree_DONE(tree, reg);
+}
+
+GenTreePtr CodeGen::genCodeForCommaTree(GenTreePtr tree)
+{
+ while (tree->OperGet() == GT_COMMA)
+ {
+ GenTreePtr op1 = tree->gtOp.gtOp1;
+ genCodeForTree(op1, RBM_NONE);
+ gcInfo.gcMarkRegPtrVal(op1);
+
+ tree = tree->gtOp.gtOp2;
+ }
+ return tree;
+}
+
+/*****************************************************************************
+ *
+ * Generate code for the a leaf node of type GT_JMP
+ */
+
+void CodeGen::genCodeForTreeLeaf_GT_JMP(GenTreePtr tree)
+{
+ noway_assert(compiler->compCurBB->bbFlags & BBF_HAS_JMP);
+
+#ifdef PROFILING_SUPPORTED
+ if (compiler->compIsProfilerHookNeeded())
+ {
+ /* fire the event at the call site */
+ unsigned saveStackLvl2 = genStackLevel;
+
+ compiler->info.compProfilerCallback = true;
+
+#ifdef _TARGET_X86_
+ //
+ // Push the profilerHandle
+ //
+ regMaskTP byrefPushedRegs;
+ regMaskTP norefPushedRegs;
+ regMaskTP pushedArgRegs =
+ genPushRegs(RBM_ARG_REGS & (regSet.rsMaskUsed | regSet.rsMaskVars | regSet.rsMaskLock), &byrefPushedRegs,
+ &norefPushedRegs);
+
+ if (compiler->compProfilerMethHndIndirected)
+ {
+ getEmitter()->emitIns_AR_R(INS_push, EA_PTR_DSP_RELOC, REG_NA, REG_NA,
+ (ssize_t)compiler->compProfilerMethHnd);
+ }
+ else
+ {
+ inst_IV(INS_push, (size_t)compiler->compProfilerMethHnd);
+ }
+ genSinglePush();
+
+ genEmitHelperCall(CORINFO_HELP_PROF_FCN_TAILCALL,
+ sizeof(int) * 1, // argSize
+ EA_UNKNOWN); // retSize
+
+ //
+ // Adjust the number of stack slots used by this managed method if necessary.
+ //
+ if (compiler->fgPtrArgCntMax < 1)
+ {
+ compiler->fgPtrArgCntMax = 1;
+ }
+
+ genPopRegs(pushedArgRegs, byrefPushedRegs, norefPushedRegs);
+#elif _TARGET_ARM_
+ // For GT_JMP nodes we have added r0 as a used register, when under arm profiler, to evaluate GT_JMP node.
+ // To emit tailcall callback we need r0 to pass profiler handle. Any free register could be used as call target.
+ regNumber argReg = regSet.rsGrabReg(RBM_PROFILER_JMP_USED);
+ noway_assert(argReg == REG_PROFILER_JMP_ARG);
+ regSet.rsLockReg(RBM_PROFILER_JMP_USED);
+
+ if (compiler->compProfilerMethHndIndirected)
+ {
+ getEmitter()->emitIns_R_AI(INS_ldr, EA_PTR_DSP_RELOC, argReg, (ssize_t)compiler->compProfilerMethHnd);
+ regTracker.rsTrackRegTrash(argReg);
+ }
+ else
+ {
+ instGen_Set_Reg_To_Imm(EA_4BYTE, argReg, (ssize_t)compiler->compProfilerMethHnd);
+ }
+
+ genEmitHelperCall(CORINFO_HELP_PROF_FCN_TAILCALL,
+ 0, // argSize
+ EA_UNKNOWN); // retSize
+
+ regSet.rsUnlockReg(RBM_PROFILER_JMP_USED);
+#else
+ NYI("Pushing the profilerHandle & caller's sp for the profiler callout and locking 'arguments'");
+#endif //_TARGET_X86_
+
+ /* Restore the stack level */
+ genStackLevel = saveStackLvl2;
+ }
+#endif // PROFILING_SUPPORTED
+
+ /* This code is cloned from the regular processing of GT_RETURN values. We have to remember to
+ * call genPInvokeMethodEpilog anywhere that we have a method return. We should really
+ * generate trees for the PInvoke prolog and epilog so we can remove these special cases.
+ */
+
+ if (compiler->info.compCallUnmanaged)
+ {
+ genPInvokeMethodEpilog();
+ }
+
+ // Make sure register arguments are in their initial registers
+ // and stack arguments are put back as well.
+ //
+ // This does not deal with circular dependencies of register
+ // arguments, which is safe because RegAlloc prevents that by
+ // not enregistering any RegArgs when a JMP opcode is used.
+
+ if (compiler->info.compArgsCount == 0)
+ {
+ return;
+ }
+
+ unsigned varNum;
+ LclVarDsc* varDsc;
+
+ // First move any enregistered stack arguments back to the stack
+ for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->info.compArgsCount; varNum++, varDsc++)
+ {
+ noway_assert(varDsc->lvIsParam);
+ if (varDsc->lvIsRegArg || !varDsc->lvRegister)
+ continue;
+
+ /* Argument was passed on the stack, but ended up in a register
+ * Store it back to the stack */
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifndef _TARGET_64BIT_
+ if (varDsc->TypeGet() == TYP_LONG)
+ {
+ /* long - at least the low half must be enregistered */
+
+ getEmitter()->emitIns_S_R(ins_Store(TYP_INT), EA_4BYTE, varDsc->lvRegNum, varNum, 0);
+
+ /* Is the upper half also enregistered? */
+
+ if (varDsc->lvOtherReg != REG_STK)
+ {
+ getEmitter()->emitIns_S_R(ins_Store(TYP_INT), EA_4BYTE, varDsc->lvOtherReg, varNum, sizeof(int));
+ }
+ }
+ else
+#endif // _TARGET_64BIT_
+ {
+ getEmitter()->emitIns_S_R(ins_Store(varDsc->TypeGet()), emitTypeSize(varDsc->TypeGet()), varDsc->lvRegNum,
+ varNum, 0);
+ }
+ }
+
+#ifdef _TARGET_ARM_
+ regMaskTP fixedArgsMask = RBM_NONE;
+#endif
+
+ // Next move any un-enregistered register arguments back to their register
+ for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->info.compArgsCount; varNum++, varDsc++)
+ {
+ /* Is this variable a register arg? */
+
+ if (!varDsc->lvIsRegArg)
+ continue;
+
+ /* Register argument */
+
+ noway_assert(isRegParamType(genActualType(varDsc->TypeGet())));
+ noway_assert(!varDsc->lvRegister);
+
+ /* Reload it from the stack */
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifndef _TARGET_64BIT_
+ if (varDsc->TypeGet() == TYP_LONG)
+ {
+ /* long - at least the low half must be enregistered */
+
+ getEmitter()->emitIns_R_S(ins_Load(TYP_INT), EA_4BYTE, varDsc->lvArgReg, varNum, 0);
+ regTracker.rsTrackRegTrash(varDsc->lvArgReg);
+
+ /* Also assume the upper half also enregistered */
+
+ getEmitter()->emitIns_R_S(ins_Load(TYP_INT), EA_4BYTE, genRegArgNext(varDsc->lvArgReg), varNum,
+ sizeof(int));
+ regTracker.rsTrackRegTrash(genRegArgNext(varDsc->lvArgReg));
+
+#ifdef _TARGET_ARM_
+ fixedArgsMask |= genRegMask(varDsc->lvArgReg);
+ fixedArgsMask |= genRegMask(genRegArgNext(varDsc->lvArgReg));
+#endif
+ }
+ else
+#endif // _TARGET_64BIT_
+#ifdef _TARGET_ARM_
+ if (varDsc->lvIsHfaRegArg())
+ {
+ const var_types elemType = varDsc->GetHfaType();
+ const instruction loadOp = ins_Load(elemType);
+ const emitAttr size = emitTypeSize(elemType);
+ regNumber argReg = varDsc->lvArgReg;
+ const unsigned maxSize = min(varDsc->lvSize(), (LAST_FP_ARGREG + 1 - argReg) * REGSIZE_BYTES);
+
+ for (unsigned ofs = 0; ofs < maxSize; ofs += (unsigned)size)
+ {
+ getEmitter()->emitIns_R_S(loadOp, size, argReg, varNum, ofs);
+ assert(genIsValidFloatReg(argReg)); // we don't use register tracking for FP
+ argReg = regNextOfType(argReg, elemType);
+ }
+ }
+ else if (varDsc->TypeGet() == TYP_STRUCT)
+ {
+ const var_types elemType = TYP_INT; // we pad everything out to at least 4 bytes
+ const instruction loadOp = ins_Load(elemType);
+ const emitAttr size = emitTypeSize(elemType);
+ regNumber argReg = varDsc->lvArgReg;
+ const unsigned maxSize = min(varDsc->lvSize(), (REG_ARG_LAST + 1 - argReg) * REGSIZE_BYTES);
+
+ for (unsigned ofs = 0; ofs < maxSize; ofs += (unsigned)size)
+ {
+ getEmitter()->emitIns_R_S(loadOp, size, argReg, varNum, ofs);
+ regTracker.rsTrackRegTrash(argReg);
+
+ fixedArgsMask |= genRegMask(argReg);
+
+ argReg = genRegArgNext(argReg);
+ }
+ }
+ else
+#endif //_TARGET_ARM_
+ {
+ var_types loadType = varDsc->TypeGet();
+ regNumber argReg = varDsc->lvArgReg; // incoming arg register
+ bool twoParts = false;
+
+ if (compiler->info.compIsVarArgs && isFloatRegType(loadType))
+ {
+#ifndef _TARGET_64BIT_
+ if (loadType == TYP_DOUBLE)
+ twoParts = true;
+#endif
+ loadType = TYP_I_IMPL;
+ assert(isValidIntArgReg(argReg));
+ }
+
+ getEmitter()->emitIns_R_S(ins_Load(loadType), emitTypeSize(loadType), argReg, varNum, 0);
+ regTracker.rsTrackRegTrash(argReg);
+
+#ifdef _TARGET_ARM_
+ fixedArgsMask |= genRegMask(argReg);
+#endif
+ if (twoParts)
+ {
+ argReg = genRegArgNext(argReg);
+ assert(isValidIntArgReg(argReg));
+
+ getEmitter()->emitIns_R_S(ins_Load(loadType), emitTypeSize(loadType), argReg, varNum, REGSIZE_BYTES);
+ regTracker.rsTrackRegTrash(argReg);
+
+#ifdef _TARGET_ARM_
+ fixedArgsMask |= genRegMask(argReg);
+#endif
+ }
+ }
+ }
+
+#ifdef _TARGET_ARM_
+ // Check if we have any non-fixed args possibly in the arg registers.
+ if (compiler->info.compIsVarArgs && (fixedArgsMask & RBM_ARG_REGS) != RBM_ARG_REGS)
+ {
+ noway_assert(compiler->lvaTable[compiler->lvaVarargsHandleArg].lvOnFrame);
+
+ regNumber regDeclArgs = REG_ARG_FIRST;
+
+ // Skip the 'this' pointer.
+ if (!compiler->info.compIsStatic)
+ {
+ regDeclArgs = REG_NEXT(regDeclArgs);
+ }
+
+ // Skip the 'generic context.'
+ if (compiler->info.compMethodInfo->args.callConv & CORINFO_CALLCONV_PARAMTYPE)
+ {
+ regDeclArgs = REG_NEXT(regDeclArgs);
+ }
+
+ // Skip any 'return buffer arg.'
+ if (compiler->info.compRetBuffArg != BAD_VAR_NUM)
+ {
+ regDeclArgs = REG_NEXT(regDeclArgs);
+ }
+
+ // Skip the 'vararg cookie.'
+ regDeclArgs = REG_NEXT(regDeclArgs);
+
+ // Also add offset for the vararg cookie.
+ int offset = REGSIZE_BYTES;
+
+ // Load all the variable arguments in registers back to their registers.
+ for (regNumber reg = regDeclArgs; reg <= REG_ARG_LAST; reg = REG_NEXT(reg))
+ {
+ if (!(fixedArgsMask & genRegMask(reg)))
+ {
+ getEmitter()->emitIns_R_S(ins_Load(TYP_INT), EA_4BYTE, reg, compiler->lvaVarargsHandleArg, offset);
+ regTracker.rsTrackRegTrash(reg);
+ }
+ offset += REGSIZE_BYTES;
+ }
+ }
+#endif // _TARGET_ARM_
+}
+
+/*****************************************************************************
+ *
+ * Check if a variable is assigned to in a tree. The variable number is
+ * passed in pCallBackData. If the variable is assigned to, return
+ * Compiler::WALK_ABORT. Otherwise return Compiler::WALK_CONTINUE.
+ */
+Compiler::fgWalkResult CodeGen::fgIsVarAssignedTo(GenTreePtr* pTree, Compiler::fgWalkData* data)
+{
+ GenTreePtr tree = *pTree;
+ if ((tree->OperIsAssignment()) && (tree->gtOp.gtOp1->OperGet() == GT_LCL_VAR) &&
+ (tree->gtOp.gtOp1->gtLclVarCommon.gtLclNum == (unsigned)(size_t)data->pCallbackData))
+ {
+ return Compiler::WALK_ABORT;
+ }
+
+ return Compiler::WALK_CONTINUE;
+}
+
+regNumber CodeGen::genIsEnregisteredIntVariable(GenTreePtr tree)
+{
+ unsigned varNum;
+ LclVarDsc* varDsc;
+
+ if (tree->gtOper == GT_LCL_VAR)
+ {
+ /* Does the variable live in a register? */
+
+ varNum = tree->gtLclVarCommon.gtLclNum;
+ noway_assert(varNum < compiler->lvaCount);
+ varDsc = compiler->lvaTable + varNum;
+
+ if (!varDsc->IsFloatRegType() && varDsc->lvRegister)
+ {
+ return varDsc->lvRegNum;
+ }
+ }
+
+ return REG_NA;
+}
+
+// inline
+void CodeGen::unspillLiveness(genLivenessSet* ls)
+{
+ // Only try to unspill the registers that are missing from the currentLiveRegs
+ //
+ regMaskTP cannotSpillMask = ls->maskVars | ls->gcRefRegs | ls->byRefRegs;
+ regMaskTP currentLiveRegs = regSet.rsMaskVars | gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur;
+ cannotSpillMask &= ~currentLiveRegs;
+
+ // Typically this will always be true and we will return
+ //
+ if (cannotSpillMask == 0)
+ return;
+
+ for (regNumber reg = REG_INT_FIRST; reg <= REG_INT_LAST; reg = REG_NEXT(reg))
+ {
+ // Is this a register that we cannot leave in the spilled state?
+ //
+ if ((cannotSpillMask & genRegMask(reg)) == 0)
+ continue;
+
+ RegSet::SpillDsc* spill = regSet.rsSpillDesc[reg];
+
+ // Was it spilled, if not then skip it.
+ //
+ if (!spill)
+ continue;
+
+ noway_assert(spill->spillTree->gtFlags & GTF_SPILLED);
+
+ regSet.rsUnspillReg(spill->spillTree, genRegMask(reg), RegSet::KEEP_REG);
+ }
+}
+
+/*****************************************************************************
+ *
+ * Generate code for a qmark colon
+ */
+
+void CodeGen::genCodeForQmark(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
+{
+ GenTreePtr op1 = tree->gtOp.gtOp1;
+ GenTreePtr op2 = tree->gtOp.gtOp2;
+ regNumber reg;
+ regMaskTP regs = regSet.rsMaskUsed;
+ regMaskTP needReg = destReg;
+
+ noway_assert(compiler->compQmarkUsed);
+ noway_assert(tree->gtOper == GT_QMARK);
+ noway_assert(op1->OperIsCompare());
+ noway_assert(op2->gtOper == GT_COLON);
+
+ GenTreePtr thenNode = op2->AsColon()->ThenNode();
+ GenTreePtr elseNode = op2->AsColon()->ElseNode();
+
+ /* If elseNode is a Nop node you must reverse the
+ thenNode and elseNode prior to reaching here!
+ (If both 'else' and 'then' are Nops, whole qmark will have been optimized away.) */
+
+ noway_assert(!elseNode->IsNothingNode());
+
+ /* Try to implement the qmark colon using a CMOV. If we can't for
+ whatever reason, this will return false and we will implement
+ it using regular branching constructs. */
+
+ if (genCodeForQmarkWithCMOV(tree, destReg, bestReg))
+ return;
+
+ /*
+ This is a ?: operator; generate code like this:
+
+ condition_compare
+ jmp_if_true lab_true
+
+ lab_false:
+ op1 (false = 'else' part)
+ jmp lab_done
+
+ lab_true:
+ op2 (true = 'then' part)
+
+ lab_done:
+
+
+ NOTE: If no 'then' part we do not generate the 'jmp lab_done'
+ or the 'lab_done' label
+ */
+
+ BasicBlock* lab_true;
+ BasicBlock* lab_false;
+ BasicBlock* lab_done;
+
+ genLivenessSet entryLiveness;
+ genLivenessSet exitLiveness;
+
+ lab_true = genCreateTempLabel();
+ lab_false = genCreateTempLabel();
+
+#if FEATURE_STACK_FP_X87
+ /* Spill any register that hold partial values so that the exit liveness
+ from sides is the same */
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef DEBUG
+ regMaskTP spillMask = regSet.rsMaskUsedFloat | regSet.rsMaskLockedFloat | regSet.rsMaskRegVarFloat;
+
+ // spillMask should be the whole FP stack
+ noway_assert(compCurFPState.m_uStackSize == genCountBits(spillMask));
+#endif
+
+ SpillTempsStackFP(regSet.rsMaskUsedFloat);
+ noway_assert(regSet.rsMaskUsedFloat == 0);
+#endif
+
+ /* Before we generate code for qmark, we spill all the currently used registers
+ that conflict with the registers used in the qmark tree. This is to avoid
+ introducing spills that only occur on either the 'then' or 'else' side of
+ the tree, but not both identically. We need to be careful with enregistered
+ variables that are used; see below.
+ */
+
+ if (regSet.rsMaskUsed)
+ {
+ /* If regSet.rsMaskUsed overlaps with regSet.rsMaskVars (multi-use of the enregistered
+ variable), then it may not get spilled. However, the variable may
+ then go dead within thenNode/elseNode, at which point regSet.rsMaskUsed
+ may get spilled from one side and not the other. So unmark regSet.rsMaskVars
+ before spilling regSet.rsMaskUsed */
+
+ regMaskTP rsAdditionalCandidates = regSet.rsMaskUsed & regSet.rsMaskVars;
+ regMaskTP rsAdditional = RBM_NONE;
+
+ // For each multi-use of an enregistered variable, we need to determine if
+ // it can get spilled inside the qmark colon. This can only happen if
+ // its life ends somewhere in the qmark colon. We have the following
+ // cases:
+ // 1) Variable is dead at the end of the colon -- needs to be spilled
+ // 2) Variable is alive at the end of the colon -- needs to be spilled
+ // iff it is assigned to in the colon. In order to determine that, we
+ // examine the GTF_ASG flag to see if any assignments were made in the
+ // colon. If there are any, we need to do a tree walk to see if this
+ // variable is the target of an assignment. This treewalk should not
+ // happen frequently.
+ if (rsAdditionalCandidates)
+ {
+#ifdef DEBUG
+ if (compiler->verbose)
+ {
+ Compiler::printTreeID(tree);
+ printf(": Qmark-Colon additional spilling candidates are ");
+ dspRegMask(rsAdditionalCandidates);
+ printf("\n");
+ }
+#endif
+
+ // If any candidates are not alive at the GT_QMARK node, then they
+ // need to be spilled
+
+ VARSET_TP VARSET_INIT(compiler, rsLiveNow, compiler->compCurLife);
+ VARSET_TP VARSET_INIT_NOCOPY(rsLiveAfter, compiler->fgUpdateLiveSet(compiler->compCurLife,
+ compiler->compCurLifeTree, tree));
+
+ VARSET_TP VARSET_INIT_NOCOPY(regVarLiveNow,
+ VarSetOps::Intersection(compiler, compiler->raRegVarsMask, rsLiveNow));
+
+ VARSET_ITER_INIT(compiler, iter, regVarLiveNow, varIndex);
+ while (iter.NextElem(compiler, &varIndex))
+ {
+ // Find the variable in compiler->lvaTable
+ unsigned varNum = compiler->lvaTrackedToVarNum[varIndex];
+ LclVarDsc* varDsc = compiler->lvaTable + varNum;
+
+#if !FEATURE_FP_REGALLOC
+ if (varDsc->IsFloatRegType())
+ continue;
+#endif
+
+ noway_assert(varDsc->lvRegister);
+
+ regMaskTP regBit;
+
+ if (varTypeIsFloating(varDsc->TypeGet()))
+ {
+ regBit = genRegMaskFloat(varDsc->lvRegNum, varDsc->TypeGet());
+ }
+ else
+ {
+ regBit = genRegMask(varDsc->lvRegNum);
+
+ // For longs we may need to spill both regs
+ if (isRegPairType(varDsc->lvType) && varDsc->lvOtherReg != REG_STK)
+ regBit |= genRegMask(varDsc->lvOtherReg);
+ }
+
+ // Is it one of our reg-use vars? If not, we don't need to spill it.
+ regBit &= rsAdditionalCandidates;
+ if (!regBit)
+ continue;
+
+ // Is the variable live at the end of the colon?
+ if (VarSetOps::IsMember(compiler, rsLiveAfter, varIndex))
+ {
+ // Variable is alive at the end of the colon. Was it assigned
+ // to inside the colon?
+
+ if (!(op2->gtFlags & GTF_ASG))
+ continue;
+
+ if (compiler->fgWalkTreePre(&op2, CodeGen::fgIsVarAssignedTo, (void*)(size_t)varNum) ==
+ Compiler::WALK_ABORT)
+ {
+ // Variable was assigned to, so we need to spill it.
+
+ rsAdditional |= regBit;
+#ifdef DEBUG
+ if (compiler->verbose)
+ {
+ Compiler::printTreeID(tree);
+ printf(": Qmark-Colon candidate ");
+ dspRegMask(regBit);
+ printf("\n");
+ printf(" is assigned to inside colon and will be spilled\n");
+ }
+#endif
+ }
+ }
+ else
+ {
+ // Variable is not alive at the end of the colon. We need to spill it.
+
+ rsAdditional |= regBit;
+#ifdef DEBUG
+ if (compiler->verbose)
+ {
+ Compiler::printTreeID(tree);
+ printf(": Qmark-Colon candidate ");
+ dspRegMask(regBit);
+ printf("\n");
+ printf(" is alive at end of colon and will be spilled\n");
+ }
+#endif
+ }
+ }
+
+#ifdef DEBUG
+ if (compiler->verbose)
+ {
+ Compiler::printTreeID(tree);
+ printf(": Qmark-Colon approved additional spilling candidates are ");
+ dspRegMask(rsAdditional);
+ printf("\n");
+ }
+#endif
+ }
+
+ noway_assert((rsAdditionalCandidates | rsAdditional) == rsAdditionalCandidates);
+
+ // We only need to spill registers that are modified by the qmark tree, as specified in tree->gtUsedRegs.
+ // If we ever need to use and spill a register while generating code that is not in tree->gtUsedRegs,
+ // we will have unbalanced spills and generate bad code.
+ regMaskTP rsSpill =
+ ((regSet.rsMaskUsed & ~(regSet.rsMaskVars | regSet.rsMaskResvd)) | rsAdditional) & tree->gtUsedRegs;
+
+#ifdef DEBUG
+ // Under register stress, regSet.rsPickReg() ignores the recommended registers and always picks
+ // 'bad' registers, causing spills. So, just force all used registers to get spilled
+ // in the stress case, to avoid the problem we're trying to resolve here. Thus, any spills
+ // that occur within the qmark condition, 'then' case, or 'else' case, will have to be
+ // unspilled while generating that same tree.
+
+ if (regSet.rsStressRegs() >= 1)
+ {
+ rsSpill |= regSet.rsMaskUsed & ~(regSet.rsMaskVars | regSet.rsMaskLock | regSet.rsMaskResvd);
+ }
+#endif // DEBUG
+
+ if (rsSpill)
+ {
+ // Remember which registers hold pointers. We will spill
+ // them, but the code that follows will fetch reg vars from
+ // the registers, so we need that gc compiler->info.
+ regMaskTP gcRegSavedByref = gcInfo.gcRegByrefSetCur & rsAdditional;
+ regMaskTP gcRegSavedGCRef = gcInfo.gcRegGCrefSetCur & rsAdditional;
+
+ // regSet.rsSpillRegs() will assert if we try to spill any enregistered variables.
+ // So, pretend there aren't any, and spill them anyway. This will only occur
+ // if rsAdditional is non-empty.
+ regMaskTP rsTemp = regSet.rsMaskVars;
+ regSet.ClearMaskVars();
+
+ regSet.rsSpillRegs(rsSpill);
+
+ // Restore gc tracking masks.
+ gcInfo.gcRegByrefSetCur |= gcRegSavedByref;
+ gcInfo.gcRegGCrefSetCur |= gcRegSavedGCRef;
+
+ // Set regSet.rsMaskVars back to normal
+ regSet.rsMaskVars = rsTemp;
+ }
+ }
+
+ // Generate the conditional jump but without doing any StackFP fixups.
+ genCondJump(op1, lab_true, lab_false, false);
+
+ /* Save the current liveness, register status, and GC pointers */
+ /* This is the liveness information upon entry */
+ /* to both the then and else parts of the qmark */
+
+ saveLiveness(&entryLiveness);
+
+ /* Clear the liveness of any local variables that are dead upon */
+ /* entry to the else part. */
+
+ /* Subtract the liveSet upon entry of the then part (op1->gtNext) */
+ /* from the "colon or op2" liveSet */
+ genDyingVars(compiler->compCurLife, tree->gtQmark.gtElseLiveSet);
+
+ /* genCondJump() closes the current emitter block */
+
+ genDefineTempLabel(lab_false);
+
+#if FEATURE_STACK_FP_X87
+ // Store fpstate
+
+ QmarkStateStackFP tempFPState;
+ bool bHasFPUState = !compCurFPState.IsEmpty();
+ genQMarkBeforeElseStackFP(&tempFPState, tree->gtQmark.gtElseLiveSet, op1->gtNext);
+#endif
+
+ /* Does the operator yield a value? */
+
+ if (tree->gtType == TYP_VOID)
+ {
+ /* Generate the code for the else part of the qmark */
+
+ genCodeForTree(elseNode, needReg, bestReg);
+
+ /* The type is VOID, so we shouldn't have computed a value */
+
+ noway_assert(!(elseNode->gtFlags & GTF_REG_VAL));
+
+ /* Save the current liveness, register status, and GC pointers */
+ /* This is the liveness information upon exit of the then part of the qmark */
+
+ saveLiveness(&exitLiveness);
+
+ /* Is there a 'then' part? */
+
+ if (thenNode->IsNothingNode())
+ {
+#if FEATURE_STACK_FP_X87
+ if (bHasFPUState)
+ {
+ // We had FP state on entry just after the condition, so potentially, the else
+ // node may have to do transition work.
+ lab_done = genCreateTempLabel();
+
+ /* Generate jmp lab_done */
+
+ inst_JMP(EJ_jmp, lab_done);
+
+ /* No 'then' - just generate the 'lab_true' label */
+
+ genDefineTempLabel(lab_true);
+
+ // We need to do this after defining the lab_false label
+ genQMarkAfterElseBlockStackFP(&tempFPState, compiler->compCurLife, op2->gtNext);
+ genQMarkAfterThenBlockStackFP(&tempFPState);
+ genDefineTempLabel(lab_done);
+ }
+ else
+#endif // FEATURE_STACK_FP_X87
+ {
+ /* No 'then' - just generate the 'lab_true' label */
+ genDefineTempLabel(lab_true);
+ }
+ }
+ else
+ {
+ lab_done = genCreateTempLabel();
+
+ /* Generate jmp lab_done */
+
+ inst_JMP(EJ_jmp, lab_done);
+
+ /* Restore the liveness that we had upon entry of the then part of the qmark */
+
+ restoreLiveness(&entryLiveness);
+
+ /* Clear the liveness of any local variables that are dead upon */
+ /* entry to the then part. */
+ genDyingVars(compiler->compCurLife, tree->gtQmark.gtThenLiveSet);
+
+ /* Generate lab_true: */
+
+ genDefineTempLabel(lab_true);
+#if FEATURE_STACK_FP_X87
+ // We need to do this after defining the lab_false label
+ genQMarkAfterElseBlockStackFP(&tempFPState, compiler->compCurLife, op2->gtNext);
+#endif
+ /* Enter the then part - trash all registers */
+
+ regTracker.rsTrackRegClr();
+
+ /* Generate the code for the then part of the qmark */
+
+ genCodeForTree(thenNode, needReg, bestReg);
+
+ /* The type is VOID, so we shouldn't have computed a value */
+
+ noway_assert(!(thenNode->gtFlags & GTF_REG_VAL));
+
+ unspillLiveness(&exitLiveness);
+
+ /* Verify that the exit liveness information is the same for the two parts of the qmark */
+
+ checkLiveness(&exitLiveness);
+#if FEATURE_STACK_FP_X87
+ genQMarkAfterThenBlockStackFP(&tempFPState);
+#endif
+ /* Define the "result" label */
+
+ genDefineTempLabel(lab_done);
+ }
+
+ /* Join of the two branches - trash all registers */
+
+ regTracker.rsTrackRegClr();
+
+ /* We're just about done */
+
+ genUpdateLife(tree);
+ }
+ else
+ {
+ /* Generate code for a qmark that generates a value */
+
+ /* Generate the code for the else part of the qmark */
+
+ noway_assert(elseNode->IsNothingNode() == false);
+
+ /* Compute the elseNode into any free register */
+ genComputeReg(elseNode, needReg, RegSet::ANY_REG, RegSet::FREE_REG, true);
+ noway_assert(elseNode->gtFlags & GTF_REG_VAL);
+ noway_assert(elseNode->gtRegNum != REG_NA);
+
+ /* Record the chosen register */
+ reg = elseNode->gtRegNum;
+ regs = genRegMask(reg);
+
+ /* Save the current liveness, register status, and GC pointers */
+ /* This is the liveness information upon exit of the else part of the qmark */
+
+ saveLiveness(&exitLiveness);
+
+ /* Generate jmp lab_done */
+ lab_done = genCreateTempLabel();
+
+#ifdef DEBUG
+ // We will use this to assert we don't emit instructions if we decide not to
+ // do the jmp
+ unsigned emittedInstructions = getEmitter()->emitInsCount;
+ bool bSkippedJump = false;
+#endif
+ // We would like to know here if the else node is really going to generate
+ // code, as if it isn't, we're generating here a jump to the next instruction.
+ // What you would really like is to be able to go back and remove the jump, but
+ // we have no way of doing that right now.
+
+ if (
+#if FEATURE_STACK_FP_X87
+ !bHasFPUState && // If there is no FPU state, we won't need an x87 transition
+#endif
+ genIsEnregisteredIntVariable(thenNode) == reg)
+ {
+#ifdef DEBUG
+ // For the moment, fix this easy case (enregistered else node), which
+ // is the one that happens all the time.
+
+ bSkippedJump = true;
+#endif
+ }
+ else
+ {
+ inst_JMP(EJ_jmp, lab_done);
+ }
+
+ /* Restore the liveness that we had upon entry of the else part of the qmark */
+
+ restoreLiveness(&entryLiveness);
+
+ /* Clear the liveness of any local variables that are dead upon */
+ /* entry to the then part. */
+ genDyingVars(compiler->compCurLife, tree->gtQmark.gtThenLiveSet);
+
+ /* Generate lab_true: */
+ genDefineTempLabel(lab_true);
+#if FEATURE_STACK_FP_X87
+ // Store FP state
+
+ // We need to do this after defining the lab_true label
+ genQMarkAfterElseBlockStackFP(&tempFPState, compiler->compCurLife, op2->gtNext);
+#endif
+ /* Enter the then part - trash all registers */
+
+ regTracker.rsTrackRegClr();
+
+ /* Generate the code for the then part of the qmark */
+
+ noway_assert(thenNode->IsNothingNode() == false);
+
+ /* This must place a value into the chosen register */
+ genComputeReg(thenNode, regs, RegSet::EXACT_REG, RegSet::FREE_REG, true);
+
+ noway_assert(thenNode->gtFlags & GTF_REG_VAL);
+ noway_assert(thenNode->gtRegNum == reg);
+
+ unspillLiveness(&exitLiveness);
+
+ /* Verify that the exit liveness information is the same for the two parts of the qmark */
+ checkLiveness(&exitLiveness);
+#if FEATURE_STACK_FP_X87
+ genQMarkAfterThenBlockStackFP(&tempFPState);
+#endif
+
+#ifdef DEBUG
+ noway_assert(bSkippedJump == false || getEmitter()->emitInsCount == emittedInstructions);
+#endif
+
+ /* Define the "result" label */
+ genDefineTempLabel(lab_done);
+
+ /* Join of the two branches - trash all registers */
+
+ regTracker.rsTrackRegClr();
+
+ /* Check whether this subtree has freed up any variables */
+
+ genUpdateLife(tree);
+
+ genMarkTreeInReg(tree, reg);
+ }
+}
+
+/*****************************************************************************
+ *
+ * Generate code for a qmark colon using the CMOV instruction. It's OK
+ * to return false when we can't easily implement it using a cmov (leading
+ * genCodeForQmark to implement it using branches).
+ */
+
+bool CodeGen::genCodeForQmarkWithCMOV(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
+{
+#ifdef _TARGET_XARCH_
+ GenTreePtr cond = tree->gtOp.gtOp1;
+ GenTreePtr colon = tree->gtOp.gtOp2;
+ // Warning: this naming of the local vars is backwards!
+ GenTreePtr thenNode = colon->gtOp.gtOp1;
+ GenTreePtr elseNode = colon->gtOp.gtOp2;
+ GenTreePtr alwaysNode, predicateNode;
+ regNumber reg;
+ regMaskTP needReg = destReg;
+
+ noway_assert(tree->gtOper == GT_QMARK);
+ noway_assert(cond->OperIsCompare());
+ noway_assert(colon->gtOper == GT_COLON);
+
+#ifdef DEBUG
+ if (JitConfig.JitNoCMOV())
+ {
+ return false;
+ }
+#endif
+
+ /* Can only implement CMOV on processors that support it */
+
+ if (!compiler->opts.compUseCMOV)
+ {
+ return false;
+ }
+
+ /* thenNode better be a local or a constant */
+
+ if ((thenNode->OperGet() != GT_CNS_INT) && (thenNode->OperGet() != GT_LCL_VAR))
+ {
+ return false;
+ }
+
+ /* elseNode better be a local or a constant or nothing */
+
+ if ((elseNode->OperGet() != GT_CNS_INT) && (elseNode->OperGet() != GT_LCL_VAR))
+ {
+ return false;
+ }
+
+ /* can't handle two constants here */
+
+ if ((thenNode->OperGet() == GT_CNS_INT) && (elseNode->OperGet() == GT_CNS_INT))
+ {
+ return false;
+ }
+
+ /* let's not handle comparisons of non-integer types */
+
+ if (!varTypeIsI(cond->gtOp.gtOp1->gtType))
+ {
+ return false;
+ }
+
+ /* Choose nodes for predicateNode and alwaysNode. Swap cond if necessary.
+ The biggest constraint is that cmov doesn't take an integer argument.
+ */
+
+ bool reverseCond = false;
+ if (elseNode->OperGet() == GT_CNS_INT)
+ {
+ // else node is a constant
+
+ alwaysNode = elseNode;
+ predicateNode = thenNode;
+ reverseCond = true;
+ }
+ else
+ {
+ alwaysNode = thenNode;
+ predicateNode = elseNode;
+ }
+
+ // If the live set in alwaysNode is not the same as in tree, then
+ // the variable in predicate node dies here. This is a dangerous
+ // case that we don't handle (genComputeReg could overwrite
+ // the value of the variable in the predicate node).
+
+ // This assert is just paranoid (we've already asserted it above)
+ assert(predicateNode->OperGet() == GT_LCL_VAR);
+ if ((predicateNode->gtFlags & GTF_VAR_DEATH) != 0)
+ {
+ return false;
+ }
+
+ // Pass this point we are comitting to use CMOV.
+
+ if (reverseCond)
+ {
+ compiler->gtReverseCond(cond);
+ }
+
+ emitJumpKind jumpKind = genCondSetFlags(cond);
+
+ // Compute the always node into any free register. If it's a constant,
+ // we need to generate the mov instruction here (otherwise genComputeReg might
+ // modify the flags, as in xor reg,reg).
+
+ if (alwaysNode->OperGet() == GT_CNS_INT)
+ {
+ reg = regSet.rsPickReg(needReg, bestReg);
+ inst_RV_IV(INS_mov, reg, alwaysNode->gtIntCon.gtIconVal, emitActualTypeSize(alwaysNode->TypeGet()));
+ gcInfo.gcMarkRegPtrVal(reg, alwaysNode->TypeGet());
+ regTracker.rsTrackRegTrash(reg);
+ }
+ else
+ {
+ genComputeReg(alwaysNode, needReg, RegSet::ANY_REG, RegSet::FREE_REG, true);
+ noway_assert(alwaysNode->gtFlags & GTF_REG_VAL);
+ noway_assert(alwaysNode->gtRegNum != REG_NA);
+
+ // Record the chosen register
+
+ reg = alwaysNode->gtRegNum;
+ }
+
+ regNumber regPredicate = REG_NA;
+
+ // Is predicateNode an enregistered variable?
+
+ if (genMarkLclVar(predicateNode))
+ {
+ // Variable lives in a register
+
+ regPredicate = predicateNode->gtRegNum;
+ }
+#if REDUNDANT_LOAD
+ else
+ {
+ // Checks if the variable happens to be in any of the registers
+
+ regPredicate = findStkLclInReg(predicateNode->gtLclVarCommon.gtLclNum);
+ }
+#endif
+
+ const static instruction EJtoCMOV[] = {INS_nop, INS_nop, INS_cmovo, INS_cmovno, INS_cmovb, INS_cmovae,
+ INS_cmove, INS_cmovne, INS_cmovbe, INS_cmova, INS_cmovs, INS_cmovns,
+ INS_cmovpe, INS_cmovpo, INS_cmovl, INS_cmovge, INS_cmovle, INS_cmovg};
+
+ noway_assert((unsigned)jumpKind < (sizeof(EJtoCMOV) / sizeof(EJtoCMOV[0])));
+ instruction cmov_ins = EJtoCMOV[jumpKind];
+
+ noway_assert(insIsCMOV(cmov_ins));
+
+ if (regPredicate != REG_NA)
+ {
+ // regPredicate is in a register
+
+ inst_RV_RV(cmov_ins, reg, regPredicate, predicateNode->TypeGet());
+ }
+ else
+ {
+ // regPredicate is in memory
+
+ inst_RV_TT(cmov_ins, reg, predicateNode, NULL);
+ }
+ gcInfo.gcMarkRegPtrVal(reg, predicateNode->TypeGet());
+ regTracker.rsTrackRegTrash(reg);
+
+ genUpdateLife(alwaysNode);
+ genUpdateLife(predicateNode);
+ genCodeForTree_DONE_LIFE(tree, reg);
+ return true;
+#else
+ return false;
+#endif
+}
+
+#ifdef _TARGET_XARCH_
+void CodeGen::genCodeForMultEAX(GenTreePtr tree)
+{
+ GenTreePtr op1 = tree->gtOp.gtOp1;
+ GenTreePtr op2 = tree->gtGetOp2();
+ bool ovfl = tree->gtOverflow();
+ regNumber reg = DUMMY_INIT(REG_CORRUPT);
+ regMaskTP addrReg;
+
+ noway_assert(tree->OperGet() == GT_MUL);
+
+ /* We'll evaluate 'op1' first */
+
+ regMaskTP op1Mask = regSet.rsMustExclude(RBM_EAX, op2->gtRsvdRegs);
+
+ /* Generate the op1 into op1Mask and hold on to it. freeOnly=true */
+
+ genComputeReg(op1, op1Mask, RegSet::ANY_REG, RegSet::KEEP_REG, true);
+ noway_assert(op1->gtFlags & GTF_REG_VAL);
+
+ // If op2 is a constant we need to load the constant into a register
+ if (op2->OperKind() & GTK_CONST)
+ {
+ genCodeForTree(op2, RBM_EDX); // since EDX is going to be spilled anyway
+ noway_assert(op2->gtFlags & GTF_REG_VAL);
+ regSet.rsMarkRegUsed(op2);
+ addrReg = genRegMask(op2->gtRegNum);
+ }
+ else
+ {
+ /* Make the second operand addressable */
+ // Try to avoid EAX.
+ addrReg = genMakeRvalueAddressable(op2, RBM_ALLINT & ~RBM_EAX, RegSet::KEEP_REG, false);
+ }
+
+ /* Make sure the first operand is still in a register */
+ // op1 *must* go into EAX.
+ genRecoverReg(op1, RBM_EAX, RegSet::KEEP_REG);
+ noway_assert(op1->gtFlags & GTF_REG_VAL);
+
+ reg = op1->gtRegNum;
+
+ // For 8 bit operations, we need to pick byte addressable registers
+
+ if (ovfl && varTypeIsByte(tree->TypeGet()) && !(genRegMask(reg) & RBM_BYTE_REGS))
+ {
+ regNumber byteReg = regSet.rsGrabReg(RBM_BYTE_REGS);
+
+ inst_RV_RV(INS_mov, byteReg, reg);
+
+ regTracker.rsTrackRegTrash(byteReg);
+ regSet.rsMarkRegFree(genRegMask(reg));
+
+ reg = byteReg;
+ op1->gtRegNum = reg;
+ regSet.rsMarkRegUsed(op1);
+ }
+
+ /* Make sure the operand is still addressable */
+ addrReg = genKeepAddressable(op2, addrReg, genRegMask(reg));
+
+ /* Free up the operand, if it's a regvar */
+
+ genUpdateLife(op2);
+
+ /* The register is about to be trashed */
+
+ regTracker.rsTrackRegTrash(reg);
+
+ // For overflow instructions, tree->TypeGet() is the accurate type,
+ // and gives us the size for the operands.
+
+ emitAttr opSize = emitTypeSize(tree->TypeGet());
+
+ /* Compute the new value */
+
+ noway_assert(op1->gtRegNum == REG_EAX);
+
+ // Make sure Edx is free (unless used by op2 itself)
+ bool op2Released = false;
+
+ if ((addrReg & RBM_EDX) == 0)
+ {
+ // op2 does not use Edx, so make sure noone else does either
+ regSet.rsGrabReg(RBM_EDX);
+ }
+ else if (regSet.rsMaskMult & RBM_EDX)
+ {
+ /* Edx is used by op2 and some other trees.
+ Spill the other trees besides op2. */
+
+ regSet.rsGrabReg(RBM_EDX);
+ op2Released = true;
+
+ /* keepReg==RegSet::FREE_REG so that the other multi-used trees
+ don't get marked as unspilled as well. */
+ regSet.rsUnspillReg(op2, RBM_EDX, RegSet::FREE_REG);
+ }
+
+ instruction ins;
+
+ if (tree->gtFlags & GTF_UNSIGNED)
+ ins = INS_mulEAX;
+ else
+ ins = INS_imulEAX;
+
+ inst_TT(ins, op2, 0, 0, opSize);
+
+ /* Both EAX and EDX are now trashed */
+
+ regTracker.rsTrackRegTrash(REG_EAX);
+ regTracker.rsTrackRegTrash(REG_EDX);
+
+ /* Free up anything that was tied up by the operand */
+
+ if (!op2Released)
+ genDoneAddressable(op2, addrReg, RegSet::KEEP_REG);
+
+ /* The result will be where the first operand is sitting */
+
+ /* We must use RegSet::KEEP_REG since op1 can have a GC pointer here */
+ genRecoverReg(op1, 0, RegSet::KEEP_REG);
+
+ reg = op1->gtRegNum;
+ noway_assert(reg == REG_EAX);
+
+ genReleaseReg(op1);
+
+ /* Do we need an overflow check */
+
+ if (ovfl)
+ genCheckOverflow(tree);
+
+ genCodeForTree_DONE(tree, reg);
+}
+#endif // _TARGET_XARCH_
+
+#ifdef _TARGET_ARM_
+void CodeGen::genCodeForMult64(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
+{
+ GenTreePtr op1 = tree->gtOp.gtOp1;
+ GenTreePtr op2 = tree->gtGetOp2();
+
+ noway_assert(tree->OperGet() == GT_MUL);
+
+ /* Generate the first operand into some register */
+
+ genComputeReg(op1, RBM_ALLINT, RegSet::ANY_REG, RegSet::KEEP_REG);
+ noway_assert(op1->gtFlags & GTF_REG_VAL);
+
+ /* Generate the second operand into some register */
+
+ genComputeReg(op2, RBM_ALLINT, RegSet::ANY_REG, RegSet::KEEP_REG);
+ noway_assert(op2->gtFlags & GTF_REG_VAL);
+
+ /* Make sure the first operand is still in a register */
+ genRecoverReg(op1, 0, RegSet::KEEP_REG);
+ noway_assert(op1->gtFlags & GTF_REG_VAL);
+
+ /* Free up the operands */
+ genUpdateLife(tree);
+
+ genReleaseReg(op1);
+ genReleaseReg(op2);
+
+ regNumber regLo = regSet.rsPickReg(destReg, bestReg);
+ regNumber regHi;
+
+ regSet.rsLockReg(genRegMask(regLo));
+ regHi = regSet.rsPickReg(destReg & ~genRegMask(regLo));
+ regSet.rsUnlockReg(genRegMask(regLo));
+
+ instruction ins;
+ if (tree->gtFlags & GTF_UNSIGNED)
+ ins = INS_umull;
+ else
+ ins = INS_smull;
+
+ getEmitter()->emitIns_R_R_R_R(ins, EA_4BYTE, regLo, regHi, op1->gtRegNum, op2->gtRegNum);
+ regTracker.rsTrackRegTrash(regHi);
+ regTracker.rsTrackRegTrash(regLo);
+
+ /* Do we need an overflow check */
+
+ if (tree->gtOverflow())
+ {
+ // Keep regLo [and regHi] locked while generating code for the gtOverflow() case
+ //
+ regSet.rsLockReg(genRegMask(regLo));
+
+ if (tree->gtFlags & GTF_MUL_64RSLT)
+ regSet.rsLockReg(genRegMask(regHi));
+
+ regNumber regTmpHi = regHi;
+ if ((tree->gtFlags & GTF_UNSIGNED) == 0)
+ {
+ getEmitter()->emitIns_R_I(INS_cmp, EA_4BYTE, regLo, 0x80000000);
+ regTmpHi = regSet.rsPickReg(RBM_ALLINT);
+ getEmitter()->emitIns_R_R_I(INS_adc, EA_4BYTE, regTmpHi, regHi, 0);
+ regTracker.rsTrackRegTrash(regTmpHi);
+ }
+ getEmitter()->emitIns_R_I(INS_cmp, EA_4BYTE, regTmpHi, 0);
+
+ // Jump to the block which will throw the expection
+ emitJumpKind jmpNotEqual = genJumpKindForOper(GT_NE, CK_SIGNED);
+ genJumpToThrowHlpBlk(jmpNotEqual, SCK_OVERFLOW);
+
+ // Unlock regLo [and regHi] after generating code for the gtOverflow() case
+ //
+ regSet.rsUnlockReg(genRegMask(regLo));
+
+ if (tree->gtFlags & GTF_MUL_64RSLT)
+ regSet.rsUnlockReg(genRegMask(regHi));
+ }
+
+ genUpdateLife(tree);
+
+ if (tree->gtFlags & GTF_MUL_64RSLT)
+ genMarkTreeInRegPair(tree, gen2regs2pair(regLo, regHi));
+ else
+ genMarkTreeInReg(tree, regLo);
+}
+#endif // _TARGET_ARM_
+
+/*****************************************************************************
+ *
+ * Generate code for a simple binary arithmetic or logical operator.
+ * Handles GT_AND, GT_OR, GT_XOR, GT_ADD, GT_SUB, GT_MUL.
+ */
+
+void CodeGen::genCodeForTreeSmpBinArithLogOp(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
+{
+ instruction ins;
+ genTreeOps oper = tree->OperGet();
+ const var_types treeType = tree->TypeGet();
+ GenTreePtr op1 = tree->gtOp.gtOp1;
+ GenTreePtr op2 = tree->gtGetOp2();
+ insFlags flags = tree->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
+ regNumber reg = DUMMY_INIT(REG_CORRUPT);
+ regMaskTP needReg = destReg;
+
+ /* Figure out what instruction to generate */
+
+ bool isArith;
+ switch (oper)
+ {
+ case GT_AND:
+ ins = INS_AND;
+ isArith = false;
+ break;
+ case GT_OR:
+ ins = INS_OR;
+ isArith = false;
+ break;
+ case GT_XOR:
+ ins = INS_XOR;
+ isArith = false;
+ break;
+ case GT_ADD:
+ ins = INS_add;
+ isArith = true;
+ break;
+ case GT_SUB:
+ ins = INS_sub;
+ isArith = true;
+ break;
+ case GT_MUL:
+ ins = INS_MUL;
+ isArith = true;
+ break;
+ default:
+ unreached();
+ }
+
+#ifdef _TARGET_XARCH_
+ /* Special case: try to use the 3 operand form "imul reg, op1, icon" */
+
+ if ((oper == GT_MUL) &&
+ op2->IsIntCnsFitsInI32() && // op2 is a constant that fits in a sign-extended 32-bit immediate
+ !op1->IsCnsIntOrI() && // op1 is not a constant
+ (tree->gtFlags & GTF_MUL_64RSLT) == 0 && // tree not marked with MUL_64RSLT
+ !varTypeIsByte(treeType) && // No encoding for say "imul al,al,imm"
+ !tree->gtOverflow()) // 3 operand imul doesn't set flags
+ {
+ /* Make the first operand addressable */
+
+ regMaskTP addrReg = genMakeRvalueAddressable(op1, needReg & ~op2->gtRsvdRegs, RegSet::FREE_REG, false);
+
+ /* Grab a register for the target */
+
+ reg = regSet.rsPickReg(needReg, bestReg);
+
+#if LEA_AVAILABLE
+ /* Compute the value into the target: reg=op1*op2_icon */
+ if (op2->gtIntCon.gtIconVal == 3 || op2->gtIntCon.gtIconVal == 5 || op2->gtIntCon.gtIconVal == 9)
+ {
+ regNumber regSrc;
+ if (op1->gtFlags & GTF_REG_VAL)
+ {
+ regSrc = op1->gtRegNum;
+ }
+ else
+ {
+ inst_RV_TT(INS_mov, reg, op1, 0, emitActualTypeSize(op1->TypeGet()));
+ regSrc = reg;
+ }
+ getEmitter()->emitIns_R_ARX(INS_lea, emitActualTypeSize(treeType), reg, regSrc, regSrc,
+ (op2->gtIntCon.gtIconVal & -2), 0);
+ }
+ else
+#endif // LEA_AVAILABLE
+ {
+ /* Compute the value into the target: reg=op1*op2_icon */
+ inst_RV_TT_IV(INS_MUL, reg, op1, (int)op2->gtIntCon.gtIconVal);
+ }
+
+ /* The register has been trashed now */
+
+ regTracker.rsTrackRegTrash(reg);
+
+ /* The address is no longer live */
+
+ genDoneAddressable(op1, addrReg, RegSet::FREE_REG);
+
+ genCodeForTree_DONE(tree, reg);
+ return;
+ }
+#endif // _TARGET_XARCH_
+
+ bool ovfl = false;
+
+ if (isArith)
+ {
+ // We only reach here for GT_ADD, GT_SUB and GT_MUL.
+ assert((oper == GT_ADD) || (oper == GT_SUB) || (oper == GT_MUL));
+
+ ovfl = tree->gtOverflow();
+
+ /* We record the accurate (small) types in trees only we need to
+ * check for overflow. Otherwise we record genActualType()
+ */
+
+ noway_assert(ovfl || (treeType == genActualType(treeType)));
+
+#if LEA_AVAILABLE
+
+ /* Can we use an 'lea' to compute the result?
+ Can't use 'lea' for overflow as it doesn't set flags
+ Can't use 'lea' unless we have at least two free registers */
+ {
+ bool bEnoughRegs = genRegCountForLiveIntEnregVars(tree) + // Live intreg variables
+ genCountBits(regSet.rsMaskLock) + // Locked registers
+ 2 // We will need two regisers
+ <= genCountBits(RBM_ALLINT & ~(doubleAlignOrFramePointerUsed() ? RBM_FPBASE : 0));
+
+ regMaskTP regs = RBM_NONE; // OUT argument
+ if (!ovfl && bEnoughRegs && genMakeIndAddrMode(tree, NULL, true, needReg, RegSet::FREE_REG, &regs, false))
+ {
+ emitAttr size;
+
+ /* Is the value now computed in some register? */
+
+ if (tree->gtFlags & GTF_REG_VAL)
+ {
+ genCodeForTree_REG_VAR1(tree);
+ return;
+ }
+
+ /* If we can reuse op1/2's register directly, and 'tree' is
+ a simple expression (ie. not in scaled index form),
+ might as well just use "add" instead of "lea" */
+
+ // However, if we're in a context where we want to evaluate "tree" into a specific
+ // register different from the reg we'd use in this optimization, then it doesn't
+ // make sense to do the "add", since we'd also have to do a "mov."
+ if (op1->gtFlags & GTF_REG_VAL)
+ {
+ reg = op1->gtRegNum;
+
+ if ((genRegMask(reg) & regSet.rsRegMaskFree()) && (genRegMask(reg) & needReg))
+ {
+ if (op2->gtFlags & GTF_REG_VAL)
+ {
+ /* Simply add op2 to the register */
+
+ inst_RV_TT(INS_add, reg, op2, 0, emitTypeSize(treeType), flags);
+
+ if (tree->gtSetFlags())
+ genFlagsEqualToReg(tree, reg);
+
+ goto DONE_LEA_ADD;
+ }
+ else if (op2->OperGet() == GT_CNS_INT)
+ {
+ /* Simply add op2 to the register */
+
+ genIncRegBy(reg, op2->gtIntCon.gtIconVal, tree, treeType);
+
+ goto DONE_LEA_ADD;
+ }
+ }
+ }
+
+ if (op2->gtFlags & GTF_REG_VAL)
+ {
+ reg = op2->gtRegNum;
+
+ if ((genRegMask(reg) & regSet.rsRegMaskFree()) && (genRegMask(reg) & needReg))
+ {
+ if (op1->gtFlags & GTF_REG_VAL)
+ {
+ /* Simply add op1 to the register */
+
+ inst_RV_TT(INS_add, reg, op1, 0, emitTypeSize(treeType), flags);
+
+ if (tree->gtSetFlags())
+ genFlagsEqualToReg(tree, reg);
+
+ goto DONE_LEA_ADD;
+ }
+ }
+ }
+
+ // The expression either requires a scaled-index form, or the
+ // op1 or op2's register can't be targeted, this can be
+ // caused when op1 or op2 are enregistered variables.
+
+ reg = regSet.rsPickReg(needReg, bestReg);
+ size = emitActualTypeSize(treeType);
+
+ /* Generate "lea reg, [addr-mode]" */
+
+ inst_RV_AT(INS_lea, size, treeType, reg, tree, 0, flags);
+
+#ifndef _TARGET_XARCH_
+ // Don't call genFlagsEqualToReg on x86/x64
+ // as it does not set the flags
+ if (tree->gtSetFlags())
+ genFlagsEqualToReg(tree, reg);
+#endif
+
+ DONE_LEA_ADD:
+ /* The register has been trashed now */
+ regTracker.rsTrackRegTrash(reg);
+
+ genDoneAddressable(tree, regs, RegSet::FREE_REG);
+
+ /* The following could be an 'inner' pointer!!! */
+
+ noway_assert(treeType == TYP_BYREF || !varTypeIsGC(treeType));
+
+ if (treeType == TYP_BYREF)
+ {
+ genUpdateLife(tree);
+
+ gcInfo.gcMarkRegSetNpt(genRegMask(reg)); // in case "reg" was a TYP_GCREF before
+ gcInfo.gcMarkRegPtrVal(reg, TYP_BYREF);
+ }
+
+ genCodeForTree_DONE(tree, reg);
+ return;
+ }
+ }
+
+#endif // LEA_AVAILABLE
+
+ noway_assert((varTypeIsGC(treeType) == false) || (treeType == TYP_BYREF && (ins == INS_add || ins == INS_sub)));
+ }
+
+ /* The following makes an assumption about gtSetEvalOrder(this) */
+
+ noway_assert((tree->gtFlags & GTF_REVERSE_OPS) == 0);
+
+ /* Compute a useful register mask */
+ needReg = regSet.rsMustExclude(needReg, op2->gtRsvdRegs);
+ needReg = regSet.rsNarrowHint(needReg, regSet.rsRegMaskFree());
+
+ // Determine what registers go live between op1 and op2
+ // Don't bother checking if op1 is already in a register.
+ // This is not just for efficiency; if it's already in a
+ // register then it may already be considered "evaluated"
+ // for the purposes of liveness, in which genNewLiveRegMask
+ // will assert
+ if (!op1->InReg())
+ {
+ regMaskTP newLiveMask = genNewLiveRegMask(op1, op2);
+ if (newLiveMask)
+ {
+ needReg = regSet.rsNarrowHint(needReg, ~newLiveMask);
+ }
+ }
+
+#if CPU_HAS_BYTE_REGS
+ /* 8-bit operations can only be done in the byte-regs */
+ if (varTypeIsByte(treeType))
+ needReg = regSet.rsNarrowHint(RBM_BYTE_REGS, needReg);
+#endif // CPU_HAS_BYTE_REGS
+
+ // Try selecting one of the 'bestRegs'
+ needReg = regSet.rsNarrowHint(needReg, bestReg);
+
+ /* Special case: small_val & small_mask */
+
+ if (varTypeIsSmall(op1->TypeGet()) && op2->IsCnsIntOrI() && oper == GT_AND)
+ {
+ size_t and_val = op2->gtIntCon.gtIconVal;
+ size_t andMask;
+ var_types typ = op1->TypeGet();
+
+ switch (typ)
+ {
+ case TYP_BOOL:
+ case TYP_BYTE:
+ case TYP_UBYTE:
+ andMask = 0x000000FF;
+ break;
+ case TYP_SHORT:
+ case TYP_CHAR:
+ andMask = 0x0000FFFF;
+ break;
+ default:
+ noway_assert(!"unexpected type");
+ return;
+ }
+
+ // Is the 'and_val' completely contained within the bits found in 'andMask'
+ if ((and_val & ~andMask) == 0)
+ {
+ // We must use unsigned instructions when loading op1
+ if (varTypeIsByte(typ))
+ {
+ op1->gtType = TYP_UBYTE;
+ }
+ else // varTypeIsShort(typ)
+ {
+ assert(varTypeIsShort(typ));
+ op1->gtType = TYP_CHAR;
+ }
+
+ /* Generate the first operand into a scratch register */
+
+ op1 = genCodeForCommaTree(op1);
+ genComputeReg(op1, needReg, RegSet::ANY_REG, RegSet::FREE_REG, true);
+
+ noway_assert(op1->gtFlags & GTF_REG_VAL);
+
+ regNumber op1Reg = op1->gtRegNum;
+
+ // Did we end up in an acceptable register?
+ // and do we have an acceptable free register available to grab?
+ //
+ if (((genRegMask(op1Reg) & needReg) == 0) && ((regSet.rsRegMaskFree() & needReg) != 0))
+ {
+ // See if we can pick a register from bestReg
+ bestReg &= needReg;
+
+ // Grab an acceptable register
+ regNumber newReg;
+ if ((bestReg & regSet.rsRegMaskFree()) != 0)
+ newReg = regSet.rsGrabReg(bestReg);
+ else
+ newReg = regSet.rsGrabReg(needReg);
+
+ noway_assert(op1Reg != newReg);
+
+ /* Update the value in the target register */
+
+ regTracker.rsTrackRegCopy(newReg, op1Reg);
+
+ inst_RV_RV(ins_Copy(op1->TypeGet()), newReg, op1Reg, op1->TypeGet());
+
+ /* The value has been transferred to 'reg' */
+
+ if ((genRegMask(op1Reg) & regSet.rsMaskUsed) == 0)
+ gcInfo.gcMarkRegSetNpt(genRegMask(op1Reg));
+
+ gcInfo.gcMarkRegPtrVal(newReg, op1->TypeGet());
+
+ /* The value is now in an appropriate register */
+
+ op1->gtRegNum = newReg;
+ }
+ noway_assert(op1->gtFlags & GTF_REG_VAL);
+ genUpdateLife(op1);
+
+ /* Mark the register as 'used' */
+ regSet.rsMarkRegUsed(op1);
+ reg = op1->gtRegNum;
+
+ if (and_val != andMask) // Does the "and" mask only cover some of the bits?
+ {
+ /* "and" the value */
+
+ inst_RV_IV(INS_AND, reg, and_val, EA_4BYTE, flags);
+ }
+
+#ifdef DEBUG
+ /* Update the live set of register variables */
+ if (compiler->opts.varNames)
+ genUpdateLife(tree);
+#endif
+
+ /* Now we can update the register pointer information */
+
+ genReleaseReg(op1);
+ gcInfo.gcMarkRegPtrVal(reg, treeType);
+
+ genCodeForTree_DONE_LIFE(tree, reg);
+ return;
+ }
+ }
+
+#ifdef _TARGET_XARCH_
+
+ // Do we have to use the special "imul" instruction
+ // which has eax as the implicit operand ?
+ //
+ bool multEAX = false;
+
+ if (oper == GT_MUL)
+ {
+ if (tree->gtFlags & GTF_MUL_64RSLT)
+ {
+ /* Only multiplying with EAX will leave the 64-bit
+ * result in EDX:EAX */
+
+ multEAX = true;
+ }
+ else if (ovfl)
+ {
+ if (tree->gtFlags & GTF_UNSIGNED)
+ {
+ /* "mul reg/mem" always has EAX as default operand */
+
+ multEAX = true;
+ }
+ else if (varTypeIsSmall(treeType))
+ {
+ /* Only the "imul with EAX" encoding has the 'w' bit
+ * to specify the size of the operands */
+
+ multEAX = true;
+ }
+ }
+ }
+
+ if (multEAX)
+ {
+ noway_assert(oper == GT_MUL);
+
+ return genCodeForMultEAX(tree);
+ }
+#endif // _TARGET_XARCH_
+
+#ifdef _TARGET_ARM_
+
+ // Do we have to use the special 32x32 => 64 bit multiply
+ //
+ bool mult64 = false;
+
+ if (oper == GT_MUL)
+ {
+ if (tree->gtFlags & GTF_MUL_64RSLT)
+ {
+ mult64 = true;
+ }
+ else if (ovfl)
+ {
+ // We always must use the 32x32 => 64 bit multiply
+ // to detect overflow
+ mult64 = true;
+ }
+ }
+
+ if (mult64)
+ {
+ noway_assert(oper == GT_MUL);
+
+ return genCodeForMult64(tree, destReg, bestReg);
+ }
+#endif // _TARGET_ARM_
+
+ /* Generate the first operand into a scratch register */
+
+ op1 = genCodeForCommaTree(op1);
+ genComputeReg(op1, needReg, RegSet::ANY_REG, RegSet::FREE_REG, true);
+
+ noway_assert(op1->gtFlags & GTF_REG_VAL);
+
+ regNumber op1Reg = op1->gtRegNum;
+
+ // Setup needReg with the set of register that we require for op1 to be in
+ //
+ needReg = RBM_ALLINT;
+
+ /* Compute a useful register mask */
+ needReg = regSet.rsMustExclude(needReg, op2->gtRsvdRegs);
+ needReg = regSet.rsNarrowHint(needReg, regSet.rsRegMaskFree());
+
+#if CPU_HAS_BYTE_REGS
+ /* 8-bit operations can only be done in the byte-regs */
+ if (varTypeIsByte(treeType))
+ needReg = regSet.rsNarrowHint(RBM_BYTE_REGS, needReg);
+#endif // CPU_HAS_BYTE_REGS
+
+ // Did we end up in an acceptable register?
+ // and do we have an acceptable free register available to grab?
+ //
+ if (((genRegMask(op1Reg) & needReg) == 0) && ((regSet.rsRegMaskFree() & needReg) != 0))
+ {
+ // See if we can pick a register from bestReg
+ bestReg &= needReg;
+
+ // Grab an acceptable register
+ regNumber newReg;
+ if ((bestReg & regSet.rsRegMaskFree()) != 0)
+ newReg = regSet.rsGrabReg(bestReg);
+ else
+ newReg = regSet.rsGrabReg(needReg);
+
+ noway_assert(op1Reg != newReg);
+
+ /* Update the value in the target register */
+
+ regTracker.rsTrackRegCopy(newReg, op1Reg);
+
+ inst_RV_RV(ins_Copy(op1->TypeGet()), newReg, op1Reg, op1->TypeGet());
+
+ /* The value has been transferred to 'reg' */
+
+ if ((genRegMask(op1Reg) & regSet.rsMaskUsed) == 0)
+ gcInfo.gcMarkRegSetNpt(genRegMask(op1Reg));
+
+ gcInfo.gcMarkRegPtrVal(newReg, op1->TypeGet());
+
+ /* The value is now in an appropriate register */
+
+ op1->gtRegNum = newReg;
+ }
+ noway_assert(op1->gtFlags & GTF_REG_VAL);
+ op1Reg = op1->gtRegNum;
+
+ genUpdateLife(op1);
+
+ /* Mark the register as 'used' */
+ regSet.rsMarkRegUsed(op1);
+
+ bool isSmallConst = false;
+
+#ifdef _TARGET_ARM_
+ if ((op2->gtOper == GT_CNS_INT) && arm_Valid_Imm_For_Instr(ins, op2->gtIntCon.gtIconVal, INS_FLAGS_DONT_CARE))
+ {
+ isSmallConst = true;
+ }
+#endif
+ /* Make the second operand addressable */
+
+ regMaskTP addrReg = genMakeRvalueAddressable(op2, RBM_ALLINT, RegSet::KEEP_REG, isSmallConst);
+
+#if CPU_LOAD_STORE_ARCH
+ genRecoverReg(op1, RBM_ALLINT, RegSet::KEEP_REG);
+#else // !CPU_LOAD_STORE_ARCH
+ /* Is op1 spilled and op2 in a register? */
+
+ if ((op1->gtFlags & GTF_SPILLED) && (op2->gtFlags & GTF_REG_VAL) && (ins != INS_sub))
+ {
+ noway_assert(ins == INS_add || ins == INS_MUL || ins == INS_AND || ins == INS_OR || ins == INS_XOR);
+
+ // genMakeRvalueAddressable(GT_LCL_VAR) shouldn't spill anything
+ noway_assert(op2->gtOper != GT_LCL_VAR ||
+ varTypeIsSmall(compiler->lvaTable[op2->gtLclVarCommon.gtLclNum].TypeGet()));
+
+ reg = op2->gtRegNum;
+ regMaskTP regMask = genRegMask(reg);
+
+ /* Is the register holding op2 available? */
+
+ if (regMask & regSet.rsMaskVars)
+ {
+ }
+ else
+ {
+ /* Get the temp we spilled into. */
+
+ TempDsc* temp = regSet.rsUnspillInPlace(op1, op1->gtRegNum);
+
+ /* For 8bit operations, we need to make sure that op2 is
+ in a byte-addressable registers */
+
+ if (varTypeIsByte(treeType) && !(regMask & RBM_BYTE_REGS))
+ {
+ regNumber byteReg = regSet.rsGrabReg(RBM_BYTE_REGS);
+
+ inst_RV_RV(INS_mov, byteReg, reg);
+ regTracker.rsTrackRegTrash(byteReg);
+
+ /* op2 couldn't have spilled as it was not sitting in
+ RBM_BYTE_REGS, and regSet.rsGrabReg() will only spill its args */
+ noway_assert(op2->gtFlags & GTF_REG_VAL);
+
+ regSet.rsUnlockReg(regMask);
+ regSet.rsMarkRegFree(regMask);
+
+ reg = byteReg;
+ regMask = genRegMask(reg);
+ op2->gtRegNum = reg;
+ regSet.rsMarkRegUsed(op2);
+ }
+
+ inst_RV_ST(ins, reg, temp, 0, treeType);
+
+ regTracker.rsTrackRegTrash(reg);
+
+ /* Free the temp */
+
+ compiler->tmpRlsTemp(temp);
+
+ /* 'add'/'sub' set all CC flags, others only ZF */
+
+ /* If we need to check overflow, for small types, the
+ * flags can't be used as we perform the arithmetic
+ * operation (on small registers) and then sign extend it
+ *
+ * NOTE : If we ever don't need to sign-extend the result,
+ * we can use the flags
+ */
+
+ if (tree->gtSetFlags())
+ {
+ genFlagsEqualToReg(tree, reg);
+ }
+
+ /* The result is where the second operand is sitting. Mark result reg as free */
+ regSet.rsMarkRegFree(genRegMask(reg));
+
+ gcInfo.gcMarkRegPtrVal(reg, treeType);
+
+ goto CHK_OVF;
+ }
+ }
+#endif // !CPU_LOAD_STORE_ARCH
+
+ /* Make sure the first operand is still in a register */
+ regSet.rsLockUsedReg(addrReg);
+ genRecoverReg(op1, 0, RegSet::KEEP_REG);
+ noway_assert(op1->gtFlags & GTF_REG_VAL);
+ regSet.rsUnlockUsedReg(addrReg);
+
+ reg = op1->gtRegNum;
+
+ // For 8 bit operations, we need to pick byte addressable registers
+
+ if (varTypeIsByte(treeType) && !(genRegMask(reg) & RBM_BYTE_REGS))
+ {
+ regNumber byteReg = regSet.rsGrabReg(RBM_BYTE_REGS);
+
+ inst_RV_RV(INS_mov, byteReg, reg);
+
+ regTracker.rsTrackRegTrash(byteReg);
+ regSet.rsMarkRegFree(genRegMask(reg));
+
+ reg = byteReg;
+ op1->gtRegNum = reg;
+ regSet.rsMarkRegUsed(op1);
+ }
+
+ /* Make sure the operand is still addressable */
+ addrReg = genKeepAddressable(op2, addrReg, genRegMask(reg));
+
+ /* Free up the operand, if it's a regvar */
+
+ genUpdateLife(op2);
+
+ /* The register is about to be trashed */
+
+ regTracker.rsTrackRegTrash(reg);
+
+ bool op2Released = false;
+
+ // For overflow instructions, tree->gtType is the accurate type,
+ // and gives us the size for the operands.
+
+ emitAttr opSize = emitTypeSize(treeType);
+
+ /* Compute the new value */
+
+ if (isArith && !op2->InReg() && (op2->OperKind() & GTK_CONST)
+#if !CPU_HAS_FP_SUPPORT
+ && (treeType == TYP_INT || treeType == TYP_I_IMPL)
+#endif
+ )
+ {
+ ssize_t ival = op2->gtIntCon.gtIconVal;
+
+ if (oper == GT_ADD)
+ {
+ genIncRegBy(reg, ival, tree, treeType, ovfl);
+ }
+ else if (oper == GT_SUB)
+ {
+ if (ovfl && ((tree->gtFlags & GTF_UNSIGNED) ||
+ (ival == ((treeType == TYP_INT) ? INT32_MIN : SSIZE_T_MIN))) // -0x80000000 == 0x80000000.
+ // Therefore we can't use -ival.
+ )
+ {
+ /* For unsigned overflow, we have to use INS_sub to set
+ the flags correctly */
+
+ genDecRegBy(reg, ival, tree);
+ }
+ else
+ {
+ /* Else, we simply add the negative of the value */
+
+ genIncRegBy(reg, -ival, tree, treeType, ovfl);
+ }
+ }
+ else if (oper == GT_MUL)
+ {
+ genMulRegBy(reg, ival, tree, treeType, ovfl);
+ }
+ }
+ else
+ {
+ // op2 could be a GT_COMMA (i.e. an assignment for a CSE def)
+ op2 = op2->gtEffectiveVal();
+ if (varTypeIsByte(treeType) && op2->InReg())
+ {
+ noway_assert(genRegMask(reg) & RBM_BYTE_REGS);
+
+ regNumber op2reg = op2->gtRegNum;
+ regMaskTP op2regMask = genRegMask(op2reg);
+
+ if (!(op2regMask & RBM_BYTE_REGS))
+ {
+ regNumber byteReg = regSet.rsGrabReg(RBM_BYTE_REGS);
+
+ inst_RV_RV(INS_mov, byteReg, op2reg);
+ regTracker.rsTrackRegTrash(byteReg);
+
+ genDoneAddressable(op2, addrReg, RegSet::KEEP_REG);
+ op2Released = true;
+
+ op2->gtRegNum = byteReg;
+ }
+ }
+
+ inst_RV_TT(ins, reg, op2, 0, opSize, flags);
+ }
+
+ /* Free up anything that was tied up by the operand */
+
+ if (!op2Released)
+ genDoneAddressable(op2, addrReg, RegSet::KEEP_REG);
+
+ /* The result will be where the first operand is sitting */
+
+ /* We must use RegSet::KEEP_REG since op1 can have a GC pointer here */
+ genRecoverReg(op1, 0, RegSet::KEEP_REG);
+
+ reg = op1->gtRegNum;
+
+ /* 'add'/'sub' set all CC flags, others only ZF+SF */
+
+ if (tree->gtSetFlags())
+ genFlagsEqualToReg(tree, reg);
+
+ genReleaseReg(op1);
+
+#if !CPU_LOAD_STORE_ARCH
+CHK_OVF:
+#endif // !CPU_LOAD_STORE_ARCH
+
+ /* Do we need an overflow check */
+
+ if (ovfl)
+ genCheckOverflow(tree);
+
+ genCodeForTree_DONE(tree, reg);
+}
+
+/*****************************************************************************
+ *
+ * Generate code for a simple binary arithmetic or logical assignment operator: x <op>= y.
+ * Handles GT_ASG_AND, GT_ASG_OR, GT_ASG_XOR, GT_ASG_ADD, GT_ASG_SUB.
+ */
+
+void CodeGen::genCodeForTreeSmpBinArithLogAsgOp(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
+{
+ instruction ins;
+ const genTreeOps oper = tree->OperGet();
+ const var_types treeType = tree->TypeGet();
+ GenTreePtr op1 = tree->gtOp.gtOp1;
+ GenTreePtr op2 = tree->gtGetOp2();
+ insFlags flags = tree->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
+ regNumber reg = DUMMY_INIT(REG_CORRUPT);
+ regMaskTP needReg = destReg;
+ regMaskTP addrReg;
+
+ /* Figure out what instruction to generate */
+
+ bool isArith;
+ switch (oper)
+ {
+ case GT_ASG_AND:
+ ins = INS_AND;
+ isArith = false;
+ break;
+ case GT_ASG_OR:
+ ins = INS_OR;
+ isArith = false;
+ break;
+ case GT_ASG_XOR:
+ ins = INS_XOR;
+ isArith = false;
+ break;
+ case GT_ASG_ADD:
+ ins = INS_add;
+ isArith = true;
+ break;
+ case GT_ASG_SUB:
+ ins = INS_sub;
+ isArith = true;
+ break;
+ default:
+ unreached();
+ }
+
+ bool ovfl = false;
+
+ if (isArith)
+ {
+ // We only reach here for GT_ASG_SUB, GT_ASG_ADD.
+
+ ovfl = tree->gtOverflow();
+
+ // We can't use += with overflow if the value cannot be changed
+ // in case of an overflow-exception which the "+" might cause
+ noway_assert(!ovfl ||
+ ((op1->gtOper == GT_LCL_VAR || op1->gtOper == GT_LCL_FLD) && !compiler->compCurBB->hasTryIndex()));
+
+ /* Do not allow overflow instructions with refs/byrefs */
+
+ noway_assert(!ovfl || !varTypeIsGC(treeType));
+
+ // We disallow overflow and byte-ops here as it is too much trouble
+ noway_assert(!ovfl || !varTypeIsByte(treeType));
+
+ /* Is the second operand a constant? */
+
+ if (op2->IsIntCnsFitsInI32())
+ {
+ int ival = (int)op2->gtIntCon.gtIconVal;
+
+ /* What is the target of the assignment? */
+
+ switch (op1->gtOper)
+ {
+ case GT_REG_VAR:
+
+ REG_VAR4:
+
+ reg = op1->gtRegVar.gtRegNum;
+
+ /* No registers are needed for addressing */
+
+ addrReg = RBM_NONE;
+#if !CPU_LOAD_STORE_ARCH
+ INCDEC_REG:
+#endif
+ /* We're adding a constant to a register */
+
+ if (oper == GT_ASG_ADD)
+ genIncRegBy(reg, ival, tree, treeType, ovfl);
+ else if (ovfl && ((tree->gtFlags & GTF_UNSIGNED) ||
+ ival == ((treeType == TYP_INT) ? INT32_MIN : SSIZE_T_MIN)) // -0x80000000 ==
+ // 0x80000000.
+ // Therefore we can't
+ // use -ival.
+ )
+ /* For unsigned overflow, we have to use INS_sub to set
+ the flags correctly */
+ genDecRegBy(reg, ival, tree);
+ else
+ genIncRegBy(reg, -ival, tree, treeType, ovfl);
+
+ break;
+
+ case GT_LCL_VAR:
+
+ /* Does the variable live in a register? */
+
+ if (genMarkLclVar(op1))
+ goto REG_VAR4;
+
+ __fallthrough;
+
+ default:
+
+ /* Make the target addressable for load/store */
+ addrReg = genMakeAddressable2(op1, needReg, RegSet::KEEP_REG, true, true);
+
+#if !CPU_LOAD_STORE_ARCH
+ // For CPU_LOAD_STORE_ARCH, we always load from memory then store to memory
+
+ /* For small types with overflow check, we need to
+ sign/zero extend the result, so we need it in a reg */
+
+ if (ovfl && genTypeSize(treeType) < sizeof(int))
+#endif // !CPU_LOAD_STORE_ARCH
+ {
+ // Load op1 into a reg
+
+ reg = regSet.rsGrabReg(RBM_ALLINT & ~addrReg);
+
+ inst_RV_TT(INS_mov, reg, op1);
+
+ // Issue the add/sub and the overflow check
+
+ inst_RV_IV(ins, reg, ival, emitActualTypeSize(treeType), flags);
+ regTracker.rsTrackRegTrash(reg);
+
+ if (ovfl)
+ {
+ genCheckOverflow(tree);
+ }
+
+ /* Store the (sign/zero extended) result back to
+ the stack location of the variable */
+
+ inst_TT_RV(ins_Store(op1->TypeGet()), op1, reg);
+
+ break;
+ }
+#if !CPU_LOAD_STORE_ARCH
+ else
+ {
+ /* Add/subtract the new value into/from the target */
+
+ if (op1->gtFlags & GTF_REG_VAL)
+ {
+ reg = op1->gtRegNum;
+ goto INCDEC_REG;
+ }
+
+ /* Special case: inc/dec (up to P3, or for small code, or blended code outside loops) */
+ if (!ovfl && (ival == 1 || ival == -1) &&
+ !compiler->optAvoidIncDec(compiler->compCurBB->getBBWeight(compiler)))
+ {
+ noway_assert(oper == GT_ASG_SUB || oper == GT_ASG_ADD);
+ if (oper == GT_ASG_SUB)
+ ival = -ival;
+
+ ins = (ival > 0) ? INS_inc : INS_dec;
+ inst_TT(ins, op1);
+ }
+ else
+ {
+ inst_TT_IV(ins, op1, ival);
+ }
+
+ if ((op1->gtOper == GT_LCL_VAR) && (!ovfl || treeType == TYP_INT))
+ {
+ if (tree->gtSetFlags())
+ genFlagsEqualToVar(tree, op1->gtLclVarCommon.gtLclNum);
+ }
+
+ break;
+ }
+#endif // !CPU_LOAD_STORE_ARCH
+ } // end switch (op1->gtOper)
+
+ genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
+
+ genCodeForTreeSmpOpAsg_DONE_ASSG(tree, addrReg, reg, ovfl);
+ return;
+ } // end if (op2->IsIntCnsFitsInI32())
+ } // end if (isArith)
+
+ noway_assert(!varTypeIsGC(treeType) || ins == INS_sub || ins == INS_add);
+
+ /* Is the target a register or local variable? */
+
+ switch (op1->gtOper)
+ {
+ case GT_LCL_VAR:
+
+ /* Does the target variable live in a register? */
+
+ if (!genMarkLclVar(op1))
+ break;
+
+ __fallthrough;
+
+ case GT_REG_VAR:
+
+ /* Get hold of the target register */
+
+ reg = op1->gtRegVar.gtRegNum;
+
+ /* Make sure the target of the store is available */
+
+ if (regSet.rsMaskUsed & genRegMask(reg))
+ {
+ regSet.rsSpillReg(reg);
+ }
+
+ /* Make the RHS addressable */
+
+ addrReg = genMakeRvalueAddressable(op2, 0, RegSet::KEEP_REG, false);
+
+ /* Compute the new value into the target register */
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if CPU_HAS_BYTE_REGS
+
+ // Fix 383833 X86 ILGEN
+ regNumber reg2;
+ if ((op2->gtFlags & GTF_REG_VAL) != 0)
+ {
+ reg2 = op2->gtRegNum;
+ }
+ else
+ {
+ reg2 = REG_STK;
+ }
+
+ // We can only generate a byte ADD,SUB,OR,AND operation when reg and reg2 are both BYTE registers
+ // when op2 is in memory then reg2==REG_STK and we will need to force op2 into a register
+ //
+ if (varTypeIsByte(treeType) &&
+ (((genRegMask(reg) & RBM_BYTE_REGS) == 0) || ((genRegMask(reg2) & RBM_BYTE_REGS) == 0)))
+ {
+ // We will force op2 into a register (via sign/zero extending load)
+ // for the cases where op2 is in memory and thus could have
+ // an unmapped page just beyond its location
+ //
+ if ((op2->OperIsIndir() || (op2->gtOper == GT_CLS_VAR)) && varTypeIsSmall(op2->TypeGet()))
+ {
+ genCodeForTree(op2, 0);
+ assert((op2->gtFlags & GTF_REG_VAL) != 0);
+ }
+
+ inst_RV_TT(ins, reg, op2, 0, EA_4BYTE, flags);
+
+ bool canOmit = false;
+
+ if (varTypeIsUnsigned(treeType))
+ {
+ // When op2 is a byte sized constant we can omit the zero extend instruction
+ if ((op2->gtOper == GT_CNS_INT) && ((op2->gtIntCon.gtIconVal & 0xFF) == op2->gtIntCon.gtIconVal))
+ {
+ canOmit = true;
+ }
+ }
+ else // treeType is signed
+ {
+ // When op2 is a positive 7-bit or smaller constant
+ // we can omit the sign extension sequence.
+ if ((op2->gtOper == GT_CNS_INT) && ((op2->gtIntCon.gtIconVal & 0x7F) == op2->gtIntCon.gtIconVal))
+ {
+ canOmit = true;
+ }
+ }
+
+ if (!canOmit)
+ {
+ // If reg is a byte reg then we can use a movzx/movsx instruction
+ //
+ if ((genRegMask(reg) & RBM_BYTE_REGS) != 0)
+ {
+ instruction extendIns = ins_Move_Extend(treeType, true);
+ inst_RV_RV(extendIns, reg, reg, treeType, emitTypeSize(treeType));
+ }
+ else // we can't encode a movzx/movsx instruction
+ {
+ if (varTypeIsUnsigned(treeType))
+ {
+ // otherwise, we must zero the upper 24 bits of 'reg'
+ inst_RV_IV(INS_AND, reg, 0xFF, EA_4BYTE);
+ }
+ else // treeType is signed
+ {
+ // otherwise, we must sign extend the result in the non-byteable register 'reg'
+ // We will shift the register left 24 bits, thus putting the sign-bit into the high bit
+ // then we do an arithmetic shift back 24 bits which propagate the sign bit correctly.
+ //
+ inst_RV_SH(INS_SHIFT_LEFT_LOGICAL, EA_4BYTE, reg, 24);
+ inst_RV_SH(INS_SHIFT_RIGHT_ARITHM, EA_4BYTE, reg, 24);
+ }
+ }
+ }
+ }
+ else
+#endif // CPU_HAS_BYTE_REGS
+ {
+ inst_RV_TT(ins, reg, op2, 0, emitTypeSize(treeType), flags);
+ }
+
+ /* The zero flag is now equal to the register value */
+
+ if (tree->gtSetFlags())
+ genFlagsEqualToReg(tree, reg);
+
+ /* Remember that we trashed the target */
+
+ regTracker.rsTrackRegTrash(reg);
+
+ /* Free up anything that was tied up by the RHS */
+
+ genDoneAddressable(op2, addrReg, RegSet::KEEP_REG);
+
+ genCodeForTreeSmpOpAsg_DONE_ASSG(tree, addrReg, reg, ovfl);
+ return;
+
+ default:
+ break;
+ } // end switch (op1->gtOper)
+
+#if !CPU_LOAD_STORE_ARCH
+ /* Special case: "x ^= -1" is actually "not(x)" */
+
+ if (oper == GT_ASG_XOR)
+ {
+ if (op2->gtOper == GT_CNS_INT && op2->gtIntCon.gtIconVal == -1)
+ {
+ addrReg = genMakeAddressable(op1, RBM_ALLINT, RegSet::KEEP_REG, true);
+ inst_TT(INS_NOT, op1);
+ genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
+
+ genCodeForTreeSmpOpAsg_DONE_ASSG(tree, addrReg, tree->gtRegNum, ovfl);
+ return;
+ }
+ }
+#endif // !CPU_LOAD_STORE_ARCH
+
+ /* Setup target mask for op2 (byte-regs for small operands) */
+
+ unsigned needMask;
+ needMask = (varTypeIsByte(treeType)) ? RBM_BYTE_REGS : RBM_ALLINT;
+
+ /* Is the second operand a constant? */
+
+ if (op2->IsIntCnsFitsInI32())
+ {
+ int ival = (int)op2->gtIntCon.gtIconVal;
+
+ /* Make the target addressable */
+ addrReg = genMakeAddressable(op1, needReg, RegSet::FREE_REG, true);
+
+ inst_TT_IV(ins, op1, ival, 0, emitTypeSize(treeType), flags);
+
+ genDoneAddressable(op1, addrReg, RegSet::FREE_REG);
+
+ genCodeForTreeSmpOpAsg_DONE_ASSG(tree, addrReg, tree->gtRegNum, ovfl);
+ return;
+ }
+
+ /* Is the value or the address to be computed first? */
+
+ if (tree->gtFlags & GTF_REVERSE_OPS)
+ {
+ /* Compute the new value into a register */
+
+ genComputeReg(op2, needMask, RegSet::EXACT_REG, RegSet::KEEP_REG);
+
+ /* Make the target addressable for load/store */
+ addrReg = genMakeAddressable2(op1, 0, RegSet::KEEP_REG, true, true);
+ regSet.rsLockUsedReg(addrReg);
+
+#if !CPU_LOAD_STORE_ARCH
+ // For CPU_LOAD_STORE_ARCH, we always load from memory then store to memory
+ /* For small types with overflow check, we need to
+ sign/zero extend the result, so we need it in a reg */
+
+ if (ovfl && genTypeSize(treeType) < sizeof(int))
+#endif // !CPU_LOAD_STORE_ARCH
+ {
+ reg = regSet.rsPickReg();
+ regSet.rsLockReg(genRegMask(reg));
+
+ noway_assert(genIsValidReg(reg));
+
+ /* Generate "ldr reg, [var]" */
+
+ inst_RV_TT(ins_Load(op1->TypeGet()), reg, op1);
+
+ if (op1->gtOper == GT_LCL_VAR)
+ regTracker.rsTrackRegLclVar(reg, op1->gtLclVar.gtLclNum);
+ else
+ regTracker.rsTrackRegTrash(reg);
+
+ /* Make sure the new value is in a register */
+
+ genRecoverReg(op2, 0, RegSet::KEEP_REG);
+
+ /* Compute the new value */
+
+ inst_RV_RV(ins, reg, op2->gtRegNum, treeType, emitTypeSize(treeType), flags);
+
+ if (ovfl)
+ genCheckOverflow(tree);
+
+ /* Move the new value back to the variable */
+ /* Generate "str reg, [var]" */
+
+ inst_TT_RV(ins_Store(op1->TypeGet()), op1, reg);
+ regSet.rsUnlockReg(genRegMask(reg));
+
+ if (op1->gtOper == GT_LCL_VAR)
+ regTracker.rsTrackRegLclVar(reg, op1->gtLclVarCommon.gtLclNum);
+ }
+#if !CPU_LOAD_STORE_ARCH
+ else
+ {
+ /* Make sure the new value is in a register */
+
+ genRecoverReg(op2, 0, RegSet::KEEP_REG);
+
+ /* Add the new value into the target */
+
+ inst_TT_RV(ins, op1, op2->gtRegNum);
+ }
+#endif // !CPU_LOAD_STORE_ARCH
+ /* Free up anything that was tied up either side */
+ regSet.rsUnlockUsedReg(addrReg);
+ genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
+ genReleaseReg(op2);
+ }
+ else
+ {
+ /* Make the target addressable */
+
+ addrReg = genMakeAddressable2(op1, RBM_ALLINT & ~op2->gtRsvdRegs, RegSet::KEEP_REG, true, true);
+
+ /* Compute the new value into a register */
+
+ genComputeReg(op2, needMask, RegSet::EXACT_REG, RegSet::KEEP_REG);
+ regSet.rsLockUsedReg(genRegMask(op2->gtRegNum));
+
+ /* Make sure the target is still addressable */
+
+ addrReg = genKeepAddressable(op1, addrReg);
+ regSet.rsLockUsedReg(addrReg);
+
+#if !CPU_LOAD_STORE_ARCH
+ // For CPU_LOAD_STORE_ARCH, we always load from memory then store to memory
+
+ /* For small types with overflow check, we need to
+ sign/zero extend the result, so we need it in a reg */
+
+ if (ovfl && genTypeSize(treeType) < sizeof(int))
+#endif // !CPU_LOAD_STORE_ARCH
+ {
+ reg = regSet.rsPickReg();
+
+ inst_RV_TT(INS_mov, reg, op1);
+
+ inst_RV_RV(ins, reg, op2->gtRegNum, treeType, emitTypeSize(treeType), flags);
+ regTracker.rsTrackRegTrash(reg);
+
+ if (ovfl)
+ genCheckOverflow(tree);
+
+ inst_TT_RV(ins_Store(op1->TypeGet()), op1, reg);
+
+ if (op1->gtOper == GT_LCL_VAR)
+ regTracker.rsTrackRegLclVar(reg, op1->gtLclVar.gtLclNum);
+ }
+#if !CPU_LOAD_STORE_ARCH
+ else
+ {
+ /* Add the new value into the target */
+
+ inst_TT_RV(ins, op1, op2->gtRegNum);
+ }
+#endif
+
+ /* Free up anything that was tied up either side */
+ regSet.rsUnlockUsedReg(addrReg);
+ genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
+
+ regSet.rsUnlockUsedReg(genRegMask(op2->gtRegNum));
+ genReleaseReg(op2);
+ }
+
+ genCodeForTreeSmpOpAsg_DONE_ASSG(tree, addrReg, reg, ovfl);
+}
+
+/*****************************************************************************
+ *
+ * Generate code for GT_UMOD.
+ */
+
+void CodeGen::genCodeForUnsignedMod(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
+{
+ assert(tree->OperGet() == GT_UMOD);
+
+ GenTreePtr op1 = tree->gtOp.gtOp1;
+ GenTreePtr op2 = tree->gtOp.gtOp2;
+ const var_types treeType = tree->TypeGet();
+ regMaskTP needReg = destReg;
+ regNumber reg;
+
+ /* Is this a division by an integer constant? */
+
+ noway_assert(op2);
+ if (compiler->fgIsUnsignedModOptimizable(op2))
+ {
+ /* Generate the operand into some register */
+
+ genCompIntoFreeReg(op1, needReg, RegSet::FREE_REG);
+ noway_assert(op1->gtFlags & GTF_REG_VAL);
+
+ reg = op1->gtRegNum;
+
+ /* Generate the appropriate sequence */
+ size_t ival = op2->gtIntCon.gtIconVal - 1;
+ inst_RV_IV(INS_AND, reg, ival, emitActualTypeSize(treeType));
+
+ /* The register is now trashed */
+
+ regTracker.rsTrackRegTrash(reg);
+
+ genCodeForTree_DONE(tree, reg);
+ return;
+ }
+
+ genCodeForGeneralDivide(tree, destReg, bestReg);
+}
+
+/*****************************************************************************
+ *
+ * Generate code for GT_MOD.
+ */
+
+void CodeGen::genCodeForSignedMod(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
+{
+ assert(tree->OperGet() == GT_MOD);
+
+ GenTreePtr op1 = tree->gtOp.gtOp1;
+ GenTreePtr op2 = tree->gtOp.gtOp2;
+ const var_types treeType = tree->TypeGet();
+ regMaskTP needReg = destReg;
+ regNumber reg;
+
+ /* Is this a division by an integer constant? */
+
+ noway_assert(op2);
+ if (compiler->fgIsSignedModOptimizable(op2))
+ {
+ ssize_t ival = op2->gtIntCon.gtIconVal;
+ BasicBlock* skip = genCreateTempLabel();
+
+ /* Generate the operand into some register */
+
+ genCompIntoFreeReg(op1, needReg, RegSet::FREE_REG);
+ noway_assert(op1->gtFlags & GTF_REG_VAL);
+
+ reg = op1->gtRegNum;
+
+ /* Generate the appropriate sequence */
+
+ inst_RV_IV(INS_AND, reg, (int)(ival - 1) | 0x80000000, EA_4BYTE, INS_FLAGS_SET);
+
+ /* The register is now trashed */
+
+ regTracker.rsTrackRegTrash(reg);
+
+ /* Check and branch for a postive value */
+ emitJumpKind jmpGEL = genJumpKindForOper(GT_GE, CK_LOGICAL);
+ inst_JMP(jmpGEL, skip);
+
+ /* Generate the rest of the sequence and we're done */
+
+ genIncRegBy(reg, -1, NULL, treeType);
+ ival = -ival;
+ if ((treeType == TYP_LONG) && ((int)ival != ival))
+ {
+ regNumber immReg = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(reg));
+ instGen_Set_Reg_To_Imm(EA_8BYTE, immReg, ival);
+ inst_RV_RV(INS_OR, reg, immReg, TYP_LONG);
+ }
+ else
+ {
+ inst_RV_IV(INS_OR, reg, (int)ival, emitActualTypeSize(treeType));
+ }
+ genIncRegBy(reg, 1, NULL, treeType);
+
+ /* Define the 'skip' label and we're done */
+
+ genDefineTempLabel(skip);
+
+ genCodeForTree_DONE(tree, reg);
+ return;
+ }
+
+ genCodeForGeneralDivide(tree, destReg, bestReg);
+}
+
+/*****************************************************************************
+ *
+ * Generate code for GT_UDIV.
+ */
+
+void CodeGen::genCodeForUnsignedDiv(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
+{
+ assert(tree->OperGet() == GT_UDIV);
+
+ GenTreePtr op1 = tree->gtOp.gtOp1;
+ GenTreePtr op2 = tree->gtOp.gtOp2;
+ const var_types treeType = tree->TypeGet();
+ regMaskTP needReg = destReg;
+ regNumber reg;
+
+ /* Is this a division by an integer constant? */
+
+ noway_assert(op2);
+ if (compiler->fgIsUnsignedDivOptimizable(op2))
+ {
+ size_t ival = op2->gtIntCon.gtIconVal;
+
+ /* Division by 1 must be handled elsewhere */
+
+ noway_assert(ival != 1 || compiler->opts.MinOpts());
+
+ /* Generate the operand into some register */
+
+ genCompIntoFreeReg(op1, needReg, RegSet::FREE_REG);
+ noway_assert(op1->gtFlags & GTF_REG_VAL);
+
+ reg = op1->gtRegNum;
+
+ /* Generate "shr reg, log2(value)" */
+
+ inst_RV_SH(INS_SHIFT_RIGHT_LOGICAL, emitTypeSize(treeType), reg, genLog2(ival));
+
+ /* The register is now trashed */
+
+ regTracker.rsTrackRegTrash(reg);
+
+ genCodeForTree_DONE(tree, reg);
+ return;
+ }
+
+ genCodeForGeneralDivide(tree, destReg, bestReg);
+}
+
+/*****************************************************************************
+ *
+ * Generate code for GT_DIV.
+ */
+
+void CodeGen::genCodeForSignedDiv(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
+{
+ assert(tree->OperGet() == GT_DIV);
+
+ GenTreePtr op1 = tree->gtOp.gtOp1;
+ GenTreePtr op2 = tree->gtOp.gtOp2;
+ const var_types treeType = tree->TypeGet();
+ regMaskTP needReg = destReg;
+ regNumber reg;
+
+ /* Is this a division by an integer constant? */
+
+ noway_assert(op2);
+ if (compiler->fgIsSignedDivOptimizable(op2))
+ {
+ ssize_t ival_s = op2->gtIntConCommon.IconValue();
+ assert(ival_s > 0); // Postcondition of compiler->fgIsSignedDivOptimizable...
+ size_t ival = static_cast<size_t>(ival_s);
+
+ /* Division by 1 must be handled elsewhere */
+
+ noway_assert(ival != 1);
+
+ BasicBlock* onNegDivisee = genCreateTempLabel();
+
+ /* Generate the operand into some register */
+
+ genCompIntoFreeReg(op1, needReg, RegSet::FREE_REG);
+ noway_assert(op1->gtFlags & GTF_REG_VAL);
+
+ reg = op1->gtRegNum;
+
+ if (ival == 2)
+ {
+ /* Generate "sar reg, log2(value)" */
+
+ inst_RV_SH(INS_SHIFT_RIGHT_ARITHM, emitTypeSize(treeType), reg, genLog2(ival), INS_FLAGS_SET);
+
+ // Check and branch for a postive value, skipping the INS_ADDC instruction
+ emitJumpKind jmpGEL = genJumpKindForOper(GT_GE, CK_LOGICAL);
+ inst_JMP(jmpGEL, onNegDivisee);
+
+ // Add the carry flag to 'reg'
+ inst_RV_IV(INS_ADDC, reg, 0, emitActualTypeSize(treeType));
+
+ /* Define the 'onNegDivisee' label and we're done */
+
+ genDefineTempLabel(onNegDivisee);
+
+ /* The register is now trashed */
+
+ regTracker.rsTrackRegTrash(reg);
+
+ /* The result is the same as the operand */
+
+ reg = op1->gtRegNum;
+ }
+ else
+ {
+ /* Generate the following sequence */
+ /*
+ test reg, reg
+ jns onNegDivisee
+ add reg, ival-1
+ onNegDivisee:
+ sar reg, log2(ival)
+ */
+
+ instGen_Compare_Reg_To_Zero(emitTypeSize(treeType), reg);
+
+ // Check and branch for a postive value, skipping the INS_add instruction
+ emitJumpKind jmpGEL = genJumpKindForOper(GT_GE, CK_LOGICAL);
+ inst_JMP(jmpGEL, onNegDivisee);
+
+ inst_RV_IV(INS_add, reg, (int)ival - 1, emitActualTypeSize(treeType));
+
+ /* Define the 'onNegDivisee' label and we're done */
+
+ genDefineTempLabel(onNegDivisee);
+
+ /* Generate "sar reg, log2(value)" */
+
+ inst_RV_SH(INS_SHIFT_RIGHT_ARITHM, emitTypeSize(treeType), reg, genLog2(ival));
+
+ /* The register is now trashed */
+
+ regTracker.rsTrackRegTrash(reg);
+
+ /* The result is the same as the operand */
+
+ reg = op1->gtRegNum;
+ }
+
+ genCodeForTree_DONE(tree, reg);
+ return;
+ }
+
+ genCodeForGeneralDivide(tree, destReg, bestReg);
+}
+
+/*****************************************************************************
+ *
+ * Generate code for a general divide. Handles the general case for GT_UMOD, GT_MOD, GT_UDIV, GT_DIV
+ * (if op2 is not a power of 2 constant).
+ */
+
+void CodeGen::genCodeForGeneralDivide(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
+{
+ assert(tree->OperGet() == GT_UMOD || tree->OperGet() == GT_MOD || tree->OperGet() == GT_UDIV ||
+ tree->OperGet() == GT_DIV);
+
+ GenTreePtr op1 = tree->gtOp.gtOp1;
+ GenTreePtr op2 = tree->gtOp.gtOp2;
+ const var_types treeType = tree->TypeGet();
+ regMaskTP needReg = destReg;
+ regNumber reg;
+ instruction ins;
+ bool gotOp1;
+ regMaskTP addrReg;
+
+#if USE_HELPERS_FOR_INT_DIV
+ noway_assert(!"Unreachable: fgMorph should have transformed this into a JitHelper");
+#endif
+
+#if defined(_TARGET_XARCH_)
+
+ /* Which operand are we supposed to evaluate first? */
+
+ if (tree->gtFlags & GTF_REVERSE_OPS)
+ {
+ /* We'll evaluate 'op2' first */
+
+ gotOp1 = false;
+ destReg &= ~op1->gtRsvdRegs;
+
+ /* Also if op1 is an enregistered LCL_VAR then exclude its register as well */
+ if (op1->gtOper == GT_LCL_VAR)
+ {
+ unsigned varNum = op1->gtLclVarCommon.gtLclNum;
+ noway_assert(varNum < compiler->lvaCount);
+ LclVarDsc* varDsc = compiler->lvaTable + varNum;
+ if (varDsc->lvRegister)
+ {
+ destReg &= ~genRegMask(varDsc->lvRegNum);
+ }
+ }
+ }
+ else
+ {
+ /* We'll evaluate 'op1' first */
+
+ gotOp1 = true;
+
+ regMaskTP op1Mask;
+ if (RBM_EAX & op2->gtRsvdRegs)
+ op1Mask = RBM_ALLINT & ~op2->gtRsvdRegs;
+ else
+ op1Mask = RBM_EAX; // EAX would be ideal
+
+ /* Generate the dividend into EAX and hold on to it. freeOnly=true */
+
+ genComputeReg(op1, op1Mask, RegSet::ANY_REG, RegSet::KEEP_REG, true);
+ }
+
+ /* We want to avoid using EAX or EDX for the second operand */
+
+ destReg = regSet.rsMustExclude(destReg, RBM_EAX | RBM_EDX);
+
+ /* Make the second operand addressable */
+ op2 = genCodeForCommaTree(op2);
+
+ /* Special case: if op2 is a local var we are done */
+
+ if (op2->gtOper == GT_LCL_VAR || op2->gtOper == GT_LCL_FLD)
+ {
+ if ((op2->gtFlags & GTF_REG_VAL) == 0)
+ addrReg = genMakeRvalueAddressable(op2, destReg, RegSet::KEEP_REG, false);
+ else
+ addrReg = 0;
+ }
+ else
+ {
+ genComputeReg(op2, destReg, RegSet::ANY_REG, RegSet::KEEP_REG);
+
+ noway_assert(op2->gtFlags & GTF_REG_VAL);
+ addrReg = genRegMask(op2->gtRegNum);
+ }
+
+ /* Make sure we have the dividend in EAX */
+
+ if (gotOp1)
+ {
+ /* We've previously computed op1 into EAX */
+
+ genRecoverReg(op1, RBM_EAX, RegSet::KEEP_REG);
+ }
+ else
+ {
+ /* Compute op1 into EAX and hold on to it */
+
+ genComputeReg(op1, RBM_EAX, RegSet::EXACT_REG, RegSet::KEEP_REG, true);
+ }
+
+ noway_assert(op1->gtFlags & GTF_REG_VAL);
+ noway_assert(op1->gtRegNum == REG_EAX);
+
+ /* We can now safely (we think) grab EDX */
+
+ regSet.rsGrabReg(RBM_EDX);
+ regSet.rsLockReg(RBM_EDX);
+
+ /* Convert the integer in EAX into a un/signed long in EDX:EAX */
+
+ const genTreeOps oper = tree->OperGet();
+
+ if (oper == GT_UMOD || oper == GT_UDIV)
+ instGen_Set_Reg_To_Zero(EA_PTRSIZE, REG_EDX);
+ else
+ instGen(INS_cdq);
+
+ /* Make sure the divisor is still addressable */
+
+ addrReg = genKeepAddressable(op2, addrReg, RBM_EAX);
+
+ /* Perform the division */
+
+ if (oper == GT_UMOD || oper == GT_UDIV)
+ inst_TT(INS_UNSIGNED_DIVIDE, op2);
+ else
+ inst_TT(INS_SIGNED_DIVIDE, op2);
+
+ /* Free up anything tied up by the divisor's address */
+
+ genDoneAddressable(op2, addrReg, RegSet::KEEP_REG);
+
+ /* Unlock and free EDX */
+
+ regSet.rsUnlockReg(RBM_EDX);
+
+ /* Free up op1 (which is in EAX) as well */
+
+ genReleaseReg(op1);
+
+ /* Both EAX and EDX are now trashed */
+
+ regTracker.rsTrackRegTrash(REG_EAX);
+ regTracker.rsTrackRegTrash(REG_EDX);
+
+ /* Figure out which register the result is in */
+
+ reg = (oper == GT_DIV || oper == GT_UDIV) ? REG_EAX : REG_EDX;
+
+ /* Don't forget to mark the first operand as using EAX and EDX */
+
+ op1->gtRegNum = reg;
+
+ genCodeForTree_DONE(tree, reg);
+
+#elif defined(_TARGET_ARM_)
+
+ /* Which operand are we supposed to evaluate first? */
+
+ if (tree->gtFlags & GTF_REVERSE_OPS)
+ {
+ /* We'll evaluate 'op2' first */
+
+ gotOp1 = false;
+ destReg &= ~op1->gtRsvdRegs;
+
+ /* Also if op1 is an enregistered LCL_VAR then exclude its register as well */
+ if (op1->gtOper == GT_LCL_VAR)
+ {
+ unsigned varNum = op1->gtLclVarCommon.gtLclNum;
+ noway_assert(varNum < compiler->lvaCount);
+ LclVarDsc* varDsc = compiler->lvaTable + varNum;
+ if (varDsc->lvRegister)
+ {
+ destReg &= ~genRegMask(varDsc->lvRegNum);
+ }
+ }
+ }
+ else
+ {
+ /* We'll evaluate 'op1' first */
+
+ gotOp1 = true;
+ regMaskTP op1Mask = RBM_ALLINT & ~op2->gtRsvdRegs;
+
+ /* Generate the dividend into a register and hold on to it. */
+
+ genComputeReg(op1, op1Mask, RegSet::ANY_REG, RegSet::KEEP_REG, true);
+ }
+
+ /* Evaluate the second operand into a register and hold onto it. */
+
+ genComputeReg(op2, destReg, RegSet::ANY_REG, RegSet::KEEP_REG);
+
+ noway_assert(op2->gtFlags & GTF_REG_VAL);
+ addrReg = genRegMask(op2->gtRegNum);
+
+ if (gotOp1)
+ {
+ // Recover op1 if spilled
+ genRecoverReg(op1, RBM_NONE, RegSet::KEEP_REG);
+ }
+ else
+ {
+ /* Compute op1 into any register and hold on to it */
+ genComputeReg(op1, RBM_ALLINT, RegSet::ANY_REG, RegSet::KEEP_REG, true);
+ }
+ noway_assert(op1->gtFlags & GTF_REG_VAL);
+
+ reg = regSet.rsPickReg(needReg, bestReg);
+
+ // Perform the divison
+
+ const genTreeOps oper = tree->OperGet();
+
+ if (oper == GT_UMOD || oper == GT_UDIV)
+ ins = INS_udiv;
+ else
+ ins = INS_sdiv;
+
+ getEmitter()->emitIns_R_R_R(ins, EA_4BYTE, reg, op1->gtRegNum, op2->gtRegNum);
+
+ if (oper == GT_UMOD || oper == GT_MOD)
+ {
+ getEmitter()->emitIns_R_R_R(INS_mul, EA_4BYTE, reg, op2->gtRegNum, reg);
+ getEmitter()->emitIns_R_R_R(INS_sub, EA_4BYTE, reg, op1->gtRegNum, reg);
+ }
+ /* Free up op1 and op2 */
+ genReleaseReg(op1);
+ genReleaseReg(op2);
+
+ genCodeForTree_DONE(tree, reg);
+
+#else
+#error "Unknown _TARGET_"
+#endif
+}
+
+/*****************************************************************************
+ *
+ * Generate code for an assignment shift (x <op>= ). Handles GT_ASG_LSH, GT_ASG_RSH, GT_ASG_RSZ.
+ */
+
+void CodeGen::genCodeForAsgShift(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
+{
+ assert(tree->OperGet() == GT_ASG_LSH || tree->OperGet() == GT_ASG_RSH || tree->OperGet() == GT_ASG_RSZ);
+
+ const genTreeOps oper = tree->OperGet();
+ GenTreePtr op1 = tree->gtOp.gtOp1;
+ GenTreePtr op2 = tree->gtOp.gtOp2;
+ const var_types treeType = tree->TypeGet();
+ insFlags flags = tree->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
+ regMaskTP needReg = destReg;
+ regNumber reg;
+ instruction ins;
+ regMaskTP addrReg;
+
+ switch (oper)
+ {
+ case GT_ASG_LSH:
+ ins = INS_SHIFT_LEFT_LOGICAL;
+ break;
+ case GT_ASG_RSH:
+ ins = INS_SHIFT_RIGHT_ARITHM;
+ break;
+ case GT_ASG_RSZ:
+ ins = INS_SHIFT_RIGHT_LOGICAL;
+ break;
+ default:
+ unreached();
+ }
+
+ noway_assert(!varTypeIsGC(treeType));
+ noway_assert(op2);
+
+ /* Shifts by a constant amount are easier */
+
+ if (op2->IsCnsIntOrI())
+ {
+ /* Make the target addressable */
+
+ addrReg = genMakeAddressable(op1, needReg, RegSet::KEEP_REG, true);
+
+ /* Are we shifting a register left by 1 bit? */
+
+ if ((oper == GT_ASG_LSH) && (op2->gtIntCon.gtIconVal == 1) && (op1->gtFlags & GTF_REG_VAL))
+ {
+ /* The target lives in a register */
+
+ reg = op1->gtRegNum;
+
+ /* "add reg, reg" is cheaper than "shl reg, 1" */
+
+ inst_RV_RV(INS_add, reg, reg, treeType, emitActualTypeSize(treeType), flags);
+ }
+ else
+ {
+#if CPU_LOAD_STORE_ARCH
+ if ((op1->gtFlags & GTF_REG_VAL) == 0)
+ {
+ regSet.rsLockUsedReg(addrReg);
+
+ // Load op1 into a reg
+
+ reg = regSet.rsPickReg(RBM_ALLINT);
+
+ inst_RV_TT(INS_mov, reg, op1);
+
+ // Issue the shift
+
+ inst_RV_IV(ins, reg, (int)op2->gtIntCon.gtIconVal, emitActualTypeSize(treeType), flags);
+ regTracker.rsTrackRegTrash(reg);
+
+ /* Store the (sign/zero extended) result back to the stack location of the variable */
+
+ inst_TT_RV(ins_Store(op1->TypeGet()), op1, reg);
+
+ regSet.rsUnlockUsedReg(addrReg);
+ }
+ else
+#endif // CPU_LOAD_STORE_ARCH
+ {
+ /* Shift by the constant value */
+
+ inst_TT_SH(ins, op1, (int)op2->gtIntCon.gtIconVal);
+ }
+ }
+
+ /* If the target is a register, it has a new value */
+
+ if (op1->gtFlags & GTF_REG_VAL)
+ regTracker.rsTrackRegTrash(op1->gtRegNum);
+
+ genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
+
+ /* The zero flag is now equal to the target value */
+ /* X86: But only if the shift count is != 0 */
+
+ if (op2->gtIntCon.gtIconVal != 0)
+ {
+ if (tree->gtSetFlags())
+ {
+ if (op1->gtOper == GT_LCL_VAR)
+ {
+ genFlagsEqualToVar(tree, op1->gtLclVarCommon.gtLclNum);
+ }
+ else if (op1->gtOper == GT_REG_VAR)
+ {
+ genFlagsEqualToReg(tree, op1->gtRegNum);
+ }
+ }
+ }
+ else
+ {
+ // It is possible for the shift count to equal 0 with valid
+ // IL, and not be optimized away, in the case where the node
+ // is of a small type. The sequence of instructions looks like
+ // ldsfld, shr, stsfld and executed on a char field. This will
+ // never happen with code produced by our compilers, because the
+ // compilers will insert a conv.u2 before the stsfld (which will
+ // lead us down a different codepath in the JIT and optimize away
+ // the shift by zero). This case is not worth optimizing and we
+ // will just make sure to generate correct code for it.
+
+ genFlagsEqualToNone();
+ }
+ }
+ else
+ {
+ regMaskTP op2Regs = RBM_NONE;
+ if (REG_SHIFT != REG_NA)
+ op2Regs = RBM_SHIFT;
+
+ regMaskTP tempRegs;
+
+ if (tree->gtFlags & GTF_REVERSE_OPS)
+ {
+ tempRegs = regSet.rsMustExclude(op2Regs, op1->gtRsvdRegs);
+ genCodeForTree(op2, tempRegs);
+ regSet.rsMarkRegUsed(op2);
+
+ tempRegs = regSet.rsMustExclude(RBM_ALLINT, genRegMask(op2->gtRegNum));
+ addrReg = genMakeAddressable(op1, tempRegs, RegSet::KEEP_REG, true);
+
+ genRecoverReg(op2, op2Regs, RegSet::KEEP_REG);
+ }
+ else
+ {
+ /* Make the target addressable avoiding op2->RsvdRegs [and RBM_SHIFT] */
+ regMaskTP excludeMask = op2->gtRsvdRegs;
+ if (REG_SHIFT != REG_NA)
+ excludeMask |= RBM_SHIFT;
+
+ tempRegs = regSet.rsMustExclude(RBM_ALLINT, excludeMask);
+ addrReg = genMakeAddressable(op1, tempRegs, RegSet::KEEP_REG, true);
+
+ /* Load the shift count into the necessary register */
+ genComputeReg(op2, op2Regs, RegSet::EXACT_REG, RegSet::KEEP_REG);
+ }
+
+ /* Make sure the address registers are still here */
+ addrReg = genKeepAddressable(op1, addrReg, op2Regs);
+
+#ifdef _TARGET_XARCH_
+ /* Perform the shift */
+ inst_TT_CL(ins, op1);
+#else
+ /* Perform the shift */
+ noway_assert(op2->gtFlags & GTF_REG_VAL);
+ op2Regs = genRegMask(op2->gtRegNum);
+
+ regSet.rsLockUsedReg(addrReg | op2Regs);
+ inst_TT_RV(ins, op1, op2->gtRegNum, 0, emitTypeSize(treeType), flags);
+ regSet.rsUnlockUsedReg(addrReg | op2Regs);
+#endif
+ /* Free the address registers */
+ genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
+
+ /* If the value is in a register, it's now trash */
+
+ if (op1->gtFlags & GTF_REG_VAL)
+ regTracker.rsTrackRegTrash(op1->gtRegNum);
+
+ /* Release the op2 [RBM_SHIFT] operand */
+
+ genReleaseReg(op2);
+ }
+
+ genCodeForTreeSmpOpAsg_DONE_ASSG(tree, addrReg, /* unused for ovfl=false */ REG_NA, /* ovfl */ false);
+}
+
+/*****************************************************************************
+ *
+ * Generate code for a shift. Handles GT_LSH, GT_RSH, GT_RSZ.
+ */
+
+void CodeGen::genCodeForShift(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
+{
+ assert(tree->OperIsShift());
+
+ const genTreeOps oper = tree->OperGet();
+ GenTreePtr op1 = tree->gtOp.gtOp1;
+ GenTreePtr op2 = tree->gtOp.gtOp2;
+ const var_types treeType = tree->TypeGet();
+ insFlags flags = tree->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
+ regMaskTP needReg = destReg;
+ regNumber reg;
+ instruction ins;
+
+ switch (oper)
+ {
+ case GT_LSH:
+ ins = INS_SHIFT_LEFT_LOGICAL;
+ break;
+ case GT_RSH:
+ ins = INS_SHIFT_RIGHT_ARITHM;
+ break;
+ case GT_RSZ:
+ ins = INS_SHIFT_RIGHT_LOGICAL;
+ break;
+ default:
+ unreached();
+ }
+
+ /* Is the shift count constant? */
+ noway_assert(op2);
+ if (op2->IsIntCnsFitsInI32())
+ {
+ // TODO: Check to see if we could generate a LEA instead!
+
+ /* Compute the left operand into any free register */
+
+ genCompIntoFreeReg(op1, needReg, RegSet::KEEP_REG);
+
+ noway_assert(op1->gtFlags & GTF_REG_VAL);
+ reg = op1->gtRegNum;
+
+ /* Are we shifting left by 1 bit? (or 2 bits for fast code) */
+
+ // On ARM, until proven otherwise by performance numbers, just do the shift.
+ // It's no bigger than add (16 bits for low registers, 32 bits for high registers).
+ // It's smaller than two "add reg, reg".
+
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifndef _TARGET_ARM_
+ if (oper == GT_LSH)
+ {
+ emitAttr size = emitActualTypeSize(treeType);
+ if (op2->gtIntConCommon.IconValue() == 1)
+ {
+ /* "add reg, reg" is smaller and faster than "shl reg, 1" */
+ inst_RV_RV(INS_add, reg, reg, treeType, size, flags);
+ }
+ else if ((op2->gtIntConCommon.IconValue() == 2) && (compiler->compCodeOpt() == Compiler::FAST_CODE))
+ {
+ /* two "add reg, reg" instructions are faster than "shl reg, 2" */
+ inst_RV_RV(INS_add, reg, reg, treeType);
+ inst_RV_RV(INS_add, reg, reg, treeType, size, flags);
+ }
+ else
+ goto DO_SHIFT_BY_CNS;
+ }
+ else
+#endif // _TARGET_ARM_
+ {
+#ifndef _TARGET_ARM_
+ DO_SHIFT_BY_CNS:
+#endif // _TARGET_ARM_
+ // If we are shifting 'reg' by zero bits and do not need the flags to be set
+ // then we can just skip emitting the instruction as 'reg' is already correct.
+ //
+ if ((op2->gtIntConCommon.IconValue() != 0) || tree->gtSetFlags())
+ {
+ /* Generate the appropriate shift instruction */
+ inst_RV_SH(ins, emitTypeSize(treeType), reg, (int)op2->gtIntConCommon.IconValue(), flags);
+ }
+ }
+ }
+ else
+ {
+ /* Calculate a useful register mask for computing op1 */
+ needReg = regSet.rsNarrowHint(regSet.rsRegMaskFree(), needReg);
+ regMaskTP op2RegMask;
+#ifdef _TARGET_XARCH_
+ op2RegMask = RBM_ECX;
+#else
+ op2RegMask = RBM_NONE;
+#endif
+ needReg = regSet.rsMustExclude(needReg, op2RegMask);
+
+ regMaskTP tempRegs;
+
+ /* Which operand are we supposed to evaluate first? */
+ if (tree->gtFlags & GTF_REVERSE_OPS)
+ {
+ /* Load the shift count [into ECX on XARCH] */
+ tempRegs = regSet.rsMustExclude(op2RegMask, op1->gtRsvdRegs);
+ genComputeReg(op2, tempRegs, RegSet::EXACT_REG, RegSet::KEEP_REG, false);
+
+ /* We must not target the register that is holding op2 */
+ needReg = regSet.rsMustExclude(needReg, genRegMask(op2->gtRegNum));
+
+ /* Now evaluate 'op1' into a free register */
+ genComputeReg(op1, needReg, RegSet::ANY_REG, RegSet::KEEP_REG, true);
+
+ /* Recover op2 into ECX */
+ genRecoverReg(op2, op2RegMask, RegSet::KEEP_REG);
+ }
+ else
+ {
+ /* Compute op1 into a register, trying to avoid op2->rsvdRegs and ECX */
+ tempRegs = regSet.rsMustExclude(needReg, op2->gtRsvdRegs);
+ genComputeReg(op1, tempRegs, RegSet::ANY_REG, RegSet::KEEP_REG, true);
+
+ /* Load the shift count [into ECX on XARCH] */
+ genComputeReg(op2, op2RegMask, RegSet::EXACT_REG, RegSet::KEEP_REG, false);
+ }
+
+ noway_assert(op2->gtFlags & GTF_REG_VAL);
+#ifdef _TARGET_XARCH_
+ noway_assert(genRegMask(op2->gtRegNum) == op2RegMask);
+#endif
+ // Check for the case of op1 being spilled during the evaluation of op2
+ if (op1->gtFlags & GTF_SPILLED)
+ {
+ // The register has been spilled -- reload it to any register except ECX
+ regSet.rsLockUsedReg(op2RegMask);
+ regSet.rsUnspillReg(op1, 0, RegSet::KEEP_REG);
+ regSet.rsUnlockUsedReg(op2RegMask);
+ }
+
+ noway_assert(op1->gtFlags & GTF_REG_VAL);
+ reg = op1->gtRegNum;
+
+#ifdef _TARGET_ARM_
+ /* Perform the shift */
+ getEmitter()->emitIns_R_R(ins, EA_4BYTE, reg, op2->gtRegNum, flags);
+#else
+ /* Perform the shift */
+ inst_RV_CL(ins, reg);
+#endif
+ genReleaseReg(op2);
+ }
+
+ noway_assert(op1->gtFlags & GTF_REG_VAL);
+ noway_assert(reg == op1->gtRegNum);
+
+ /* The register is now trashed */
+ genReleaseReg(op1);
+ regTracker.rsTrackRegTrash(reg);
+
+ genCodeForTree_DONE(tree, reg);
+}
+
+/*****************************************************************************
+ *
+ * Generate code for a top-level relational operator (not one that is part of a GT_JTRUE tree).
+ * Handles GT_EQ, GT_NE, GT_LT, GT_LE, GT_GE, GT_GT.
+ */
+
+void CodeGen::genCodeForRelop(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
+{
+ assert(tree->OperGet() == GT_EQ || tree->OperGet() == GT_NE || tree->OperGet() == GT_LT ||
+ tree->OperGet() == GT_LE || tree->OperGet() == GT_GE || tree->OperGet() == GT_GT);
+
+ const genTreeOps oper = tree->OperGet();
+ GenTreePtr op1 = tree->gtOp.gtOp1;
+ const var_types treeType = tree->TypeGet();
+ regMaskTP needReg = destReg;
+ regNumber reg;
+
+ // Longs and float comparisons are converted to "?:"
+ noway_assert(!compiler->fgMorphRelopToQmark(op1));
+
+ // Check if we can use the currently set flags. Else set them
+
+ emitJumpKind jumpKind = genCondSetFlags(tree);
+
+ // Grab a register to materialize the bool value into
+
+ bestReg = regSet.rsRegMaskCanGrab() & RBM_BYTE_REGS;
+
+ // Check that the predictor did the right job
+ noway_assert(bestReg);
+
+ // If needReg is in bestReg then use it
+ if (needReg & bestReg)
+ reg = regSet.rsGrabReg(needReg & bestReg);
+ else
+ reg = regSet.rsGrabReg(bestReg);
+
+#if defined(_TARGET_ARM_)
+
+ // Generate:
+ // jump-if-true L_true
+ // mov reg, 0
+ // jmp L_end
+ // L_true:
+ // mov reg, 1
+ // L_end:
+
+ BasicBlock* L_true;
+ BasicBlock* L_end;
+
+ L_true = genCreateTempLabel();
+ L_end = genCreateTempLabel();
+
+ inst_JMP(jumpKind, L_true);
+ getEmitter()->emitIns_R_I(INS_mov, EA_4BYTE, reg, 0); // Executes when the cond is false
+ inst_JMP(EJ_jmp, L_end);
+ genDefineTempLabel(L_true);
+ getEmitter()->emitIns_R_I(INS_mov, EA_4BYTE, reg, 1); // Executes when the cond is true
+ genDefineTempLabel(L_end);
+
+ regTracker.rsTrackRegTrash(reg);
+
+#elif defined(_TARGET_XARCH_)
+ regMaskTP regs = genRegMask(reg);
+ noway_assert(regs & RBM_BYTE_REGS);
+
+ // Set (lower byte of) reg according to the flags
+
+ /* Look for the special case where just want to transfer the carry bit */
+
+ if (jumpKind == EJ_jb)
+ {
+ inst_RV_RV(INS_SUBC, reg, reg);
+ inst_RV(INS_NEG, reg, TYP_INT);
+ regTracker.rsTrackRegTrash(reg);
+ }
+ else if (jumpKind == EJ_jae)
+ {
+ inst_RV_RV(INS_SUBC, reg, reg);
+ genIncRegBy(reg, 1, tree, TYP_INT);
+ regTracker.rsTrackRegTrash(reg);
+ }
+ else
+ {
+ inst_SET(jumpKind, reg);
+
+ regTracker.rsTrackRegTrash(reg);
+
+ if (treeType == TYP_INT)
+ {
+ // Set the higher bytes to 0
+ inst_RV_RV(ins_Move_Extend(TYP_UBYTE, true), reg, reg, TYP_UBYTE, emitTypeSize(TYP_UBYTE));
+ }
+ else
+ {
+ noway_assert(treeType == TYP_BYTE);
+ }
+ }
+#else
+ NYI("TARGET");
+#endif // _TARGET_XXX
+
+ genCodeForTree_DONE(tree, reg);
+}
+
+//------------------------------------------------------------------------
+// genCodeForCopyObj: Generate code for a CopyObj node
+//
+// Arguments:
+// tree - The CopyObj node we are going to generate code for.
+// destReg - The register mask for register(s), if any, that will be defined.
+//
+// Return Value:
+// None
+
+void CodeGen::genCodeForCopyObj(GenTreePtr tree, regMaskTP destReg)
+{
+ // If the value class doesn't have any fields that are GC refs or
+ // the target isn't on the GC-heap, we can merge it with CPBLK.
+ // GC fields cannot be copied directly, instead we will
+ // need to use a jit-helper for that.
+ assert(tree->gtOper == GT_ASG);
+ assert(tree->gtOp.gtOp1->gtOper == GT_OBJ);
+
+ GenTreeObj* cpObjOp = tree->gtOp.gtOp1->AsObj();
+ assert(cpObjOp->HasGCPtr());
+
+#ifdef _TARGET_ARM_
+ if (cpObjOp->IsVolatile())
+ {
+ // Emit a memory barrier instruction before the CopyBlk
+ instGen_MemoryBarrier();
+ }
+#endif
+ assert(tree->gtOp.gtOp2->OperIsIndir());
+ GenTreePtr srcObj = tree->gtOp.gtOp2->AsIndir()->Addr();
+ GenTreePtr dstObj = cpObjOp->Addr();
+
+ noway_assert(dstObj->gtType == TYP_BYREF || dstObj->gtType == TYP_I_IMPL);
+
+#ifdef DEBUG
+ CORINFO_CLASS_HANDLE clsHnd = (CORINFO_CLASS_HANDLE)cpObjOp->gtClass;
+ size_t debugBlkSize = roundUp(compiler->info.compCompHnd->getClassSize(clsHnd), TARGET_POINTER_SIZE);
+
+ // Since we round up, we are not handling the case where we have a non-pointer sized struct with GC pointers.
+ // The EE currently does not allow this. Let's assert it just to be safe.
+ noway_assert(compiler->info.compCompHnd->getClassSize(clsHnd) == debugBlkSize);
+#endif
+
+ size_t blkSize = cpObjOp->gtSlots * TARGET_POINTER_SIZE;
+ unsigned slots = cpObjOp->gtSlots;
+ BYTE* gcPtrs = cpObjOp->gtGcPtrs;
+ unsigned gcPtrCount = cpObjOp->gtGcPtrCount;
+ assert(blkSize == cpObjOp->gtBlkSize);
+
+ GenTreePtr treeFirst, treeSecond;
+ regNumber regFirst, regSecond;
+
+ // Check what order the object-ptrs have to be evaluated in ?
+
+ if (tree->gtFlags & GTF_REVERSE_OPS)
+ {
+ treeFirst = srcObj;
+ treeSecond = dstObj;
+#if CPU_USES_BLOCK_MOVE
+ regFirst = REG_ESI;
+ regSecond = REG_EDI;
+#else
+ regFirst = REG_ARG_1;
+ regSecond = REG_ARG_0;
+#endif
+ }
+ else
+ {
+ treeFirst = dstObj;
+ treeSecond = srcObj;
+#if CPU_USES_BLOCK_MOVE
+ regFirst = REG_EDI;
+ regSecond = REG_ESI;
+#else
+ regFirst = REG_ARG_0;
+ regSecond = REG_ARG_1;
+#endif
+ }
+
+ bool dstIsOnStack = (dstObj->gtOper == GT_ADDR && (dstObj->gtFlags & GTF_ADDR_ONSTACK));
+ bool srcIsOnStack = (srcObj->gtOper == GT_ADDR && (srcObj->gtFlags & GTF_ADDR_ONSTACK));
+ emitAttr srcType = (varTypeIsGC(srcObj) && !srcIsOnStack) ? EA_BYREF : EA_PTRSIZE;
+ emitAttr dstType = (varTypeIsGC(dstObj) && !dstIsOnStack) ? EA_BYREF : EA_PTRSIZE;
+
+#if CPU_USES_BLOCK_MOVE
+ // Materialize the trees in the order desired
+
+ genComputeReg(treeFirst, genRegMask(regFirst), RegSet::EXACT_REG, RegSet::KEEP_REG, true);
+ genComputeReg(treeSecond, genRegMask(regSecond), RegSet::EXACT_REG, RegSet::KEEP_REG, true);
+ genRecoverReg(treeFirst, genRegMask(regFirst), RegSet::KEEP_REG);
+
+ // Grab ECX because it will be trashed by the helper
+ //
+ regSet.rsGrabReg(RBM_ECX);
+
+ while (blkSize >= TARGET_POINTER_SIZE)
+ {
+ if (*gcPtrs++ == TYPE_GC_NONE || dstIsOnStack)
+ {
+ // Note that we can use movsd even if it is a GC pointer being transfered
+ // because the value is not cached anywhere. If we did this in two moves,
+ // we would have to make certain we passed the appropriate GC info on to
+ // the emitter.
+ instGen(INS_movsp);
+ }
+ else
+ {
+ // This helper will act like a MOVSD
+ // -- inputs EDI and ESI are byrefs
+ // -- including incrementing of ESI and EDI by 4
+ // -- helper will trash ECX
+ //
+ regMaskTP argRegs = genRegMask(regFirst) | genRegMask(regSecond);
+ regSet.rsLockUsedReg(argRegs);
+ genEmitHelperCall(CORINFO_HELP_ASSIGN_BYREF,
+ 0, // argSize
+ EA_PTRSIZE); // retSize
+ regSet.rsUnlockUsedReg(argRegs);
+ }
+
+ blkSize -= TARGET_POINTER_SIZE;
+ }
+
+ // "movsd/movsq" as well as CPX_BYREF_ASG modify all three registers
+
+ regTracker.rsTrackRegTrash(REG_EDI);
+ regTracker.rsTrackRegTrash(REG_ESI);
+ regTracker.rsTrackRegTrash(REG_ECX);
+
+ gcInfo.gcMarkRegSetNpt(RBM_ESI | RBM_EDI);
+
+ /* The emitter won't record CORINFO_HELP_ASSIGN_BYREF in the GC tables as
+ it is a emitNoGChelper. However, we have to let the emitter know that
+ the GC liveness has changed. We do this by creating a new label.
+ */
+
+ noway_assert(emitter::emitNoGChelper(CORINFO_HELP_ASSIGN_BYREF));
+
+ genDefineTempLabel(&dummyBB);
+
+#else // !CPU_USES_BLOCK_MOVE
+
+#ifndef _TARGET_ARM_
+// Currently only the ARM implementation is provided
+#error "COPYBLK for non-ARM && non-CPU_USES_BLOCK_MOVE"
+#endif
+
+ // Materialize the trees in the order desired
+ bool helperUsed;
+ regNumber regDst;
+ regNumber regSrc;
+ regNumber regTemp;
+
+ if ((gcPtrCount > 0) && !dstIsOnStack)
+ {
+ genComputeReg(treeFirst, genRegMask(regFirst), RegSet::EXACT_REG, RegSet::KEEP_REG, true);
+ genComputeReg(treeSecond, genRegMask(regSecond), RegSet::EXACT_REG, RegSet::KEEP_REG, true);
+ genRecoverReg(treeFirst, genRegMask(regFirst), RegSet::KEEP_REG);
+
+ /* The helper is a Asm-routine that will trash R2,R3 and LR */
+ {
+ /* Spill any callee-saved registers which are being used */
+ regMaskTP spillRegs = RBM_CALLEE_TRASH_NOGC & regSet.rsMaskUsed;
+
+ if (spillRegs)
+ {
+ regSet.rsSpillRegs(spillRegs);
+ }
+ }
+
+ // Grab R2 (aka REG_TMP_1) because it will be trashed by the helper
+ // We will also use it as the temp register for our load/store sequences
+ //
+ assert(REG_R2 == REG_TMP_1);
+ regTemp = regSet.rsGrabReg(RBM_R2);
+ helperUsed = true;
+ }
+ else
+ {
+ genCompIntoFreeReg(treeFirst, (RBM_ALLINT & ~treeSecond->gtRsvdRegs), RegSet::KEEP_REG);
+ genCompIntoFreeReg(treeSecond, RBM_ALLINT, RegSet::KEEP_REG);
+ genRecoverReg(treeFirst, RBM_ALLINT, RegSet::KEEP_REG);
+
+ // Grab any temp register to use for our load/store sequences
+ //
+ regTemp = regSet.rsGrabReg(RBM_ALLINT);
+ helperUsed = false;
+ }
+ assert(dstObj->gtFlags & GTF_REG_VAL);
+ assert(srcObj->gtFlags & GTF_REG_VAL);
+
+ regDst = dstObj->gtRegNum;
+ regSrc = srcObj->gtRegNum;
+
+ assert(regDst != regTemp);
+ assert(regSrc != regTemp);
+
+ instruction loadIns = ins_Load(TYP_I_IMPL); // INS_ldr
+ instruction storeIns = ins_Store(TYP_I_IMPL); // INS_str
+
+ size_t offset = 0;
+ while (blkSize >= TARGET_POINTER_SIZE)
+ {
+ CorInfoGCType gcType;
+ CorInfoGCType gcTypeNext = TYPE_GC_NONE;
+ var_types type = TYP_I_IMPL;
+
+#if FEATURE_WRITE_BARRIER
+ gcType = (CorInfoGCType)(*gcPtrs++);
+ if (blkSize > TARGET_POINTER_SIZE)
+ gcTypeNext = (CorInfoGCType)(*gcPtrs);
+
+ if (gcType == TYPE_GC_REF)
+ type = TYP_REF;
+ else if (gcType == TYPE_GC_BYREF)
+ type = TYP_BYREF;
+
+ if (helperUsed)
+ {
+ assert(regDst == REG_ARG_0);
+ assert(regSrc == REG_ARG_1);
+ assert(regTemp == REG_R2);
+ }
+#else
+ gcType = TYPE_GC_NONE;
+#endif // FEATURE_WRITE_BARRIER
+
+ blkSize -= TARGET_POINTER_SIZE;
+
+ emitAttr opSize = emitTypeSize(type);
+
+ if (!helperUsed || (gcType == TYPE_GC_NONE))
+ {
+ getEmitter()->emitIns_R_R_I(loadIns, opSize, regTemp, regSrc, offset);
+ getEmitter()->emitIns_R_R_I(storeIns, opSize, regTemp, regDst, offset);
+ offset += TARGET_POINTER_SIZE;
+
+ if ((helperUsed && (gcTypeNext != TYPE_GC_NONE)) || ((offset >= 128) && (blkSize > 0)))
+ {
+ getEmitter()->emitIns_R_I(INS_add, srcType, regSrc, offset);
+ getEmitter()->emitIns_R_I(INS_add, dstType, regDst, offset);
+ offset = 0;
+ }
+ }
+ else
+ {
+ assert(offset == 0);
+
+ // The helper will act like this:
+ // -- inputs R0 and R1 are byrefs
+ // -- helper will perform copy from *R1 into *R0
+ // -- helper will perform post increment of R0 and R1 by 4
+ // -- helper will trash R2
+ // -- helper will trash R3
+ // -- calling the helper implicitly trashes LR
+ //
+ assert(helperUsed);
+ regMaskTP argRegs = genRegMask(regFirst) | genRegMask(regSecond);
+ regSet.rsLockUsedReg(argRegs);
+ genEmitHelperCall(CORINFO_HELP_ASSIGN_BYREF,
+ 0, // argSize
+ EA_PTRSIZE); // retSize
+
+ regSet.rsUnlockUsedReg(argRegs);
+ regTracker.rsTrackRegMaskTrash(RBM_CALLEE_TRASH_NOGC);
+ }
+ }
+
+ regTracker.rsTrackRegTrash(regDst);
+ regTracker.rsTrackRegTrash(regSrc);
+ regTracker.rsTrackRegTrash(regTemp);
+
+ gcInfo.gcMarkRegSetNpt(genRegMask(regDst) | genRegMask(regSrc));
+
+ /* The emitter won't record CORINFO_HELP_ASSIGN_BYREF in the GC tables as
+ it is a emitNoGChelper. However, we have to let the emitter know that
+ the GC liveness has changed. We do this by creating a new label.
+ */
+
+ noway_assert(emitter::emitNoGChelper(CORINFO_HELP_ASSIGN_BYREF));
+
+ genDefineTempLabel(&dummyBB);
+
+#endif // !CPU_USES_BLOCK_MOVE
+
+ assert(blkSize == 0);
+
+ genReleaseReg(dstObj);
+ genReleaseReg(srcObj);
+
+ genCodeForTree_DONE(tree, REG_NA);
+
+#ifdef _TARGET_ARM_
+ if (cpObjOp->IsVolatile())
+ {
+ // Emit a memory barrier instruction after the CopyBlk
+ instGen_MemoryBarrier();
+ }
+#endif
+}
+
+//------------------------------------------------------------------------
+// genCodeForBlkOp: Generate code for a block copy or init operation
+//
+// Arguments:
+// tree - The block assignment
+// destReg - The expected destination register
+//
+void CodeGen::genCodeForBlkOp(GenTreePtr tree, regMaskTP destReg)
+{
+ genTreeOps oper = tree->OperGet();
+ GenTreePtr dest = tree->gtOp.gtOp1;
+ GenTreePtr src = tree->gtGetOp2();
+ regMaskTP needReg = destReg;
+ regMaskTP regs = regSet.rsMaskUsed;
+ GenTreePtr opsPtr[3];
+ regMaskTP regsPtr[3];
+ GenTreePtr destPtr;
+ GenTreePtr srcPtrOrVal;
+
+ noway_assert(tree->OperIsBlkOp());
+
+ bool isCopyBlk = false;
+ bool isInitBlk = false;
+ bool hasGCpointer = false;
+ unsigned blockSize = dest->AsBlk()->gtBlkSize;
+ GenTreePtr sizeNode = nullptr;
+ bool sizeIsConst = true;
+ if (dest->gtOper == GT_DYN_BLK)
+ {
+ sizeNode = dest->AsDynBlk()->gtDynamicSize;
+ sizeIsConst = false;
+ }
+
+ if (tree->OperIsCopyBlkOp())
+ {
+ isCopyBlk = true;
+ if (dest->gtOper == GT_OBJ)
+ {
+ if (dest->AsObj()->gtGcPtrCount != 0)
+ {
+ genCodeForCopyObj(tree, destReg);
+ return;
+ }
+ }
+ }
+ else
+ {
+ isInitBlk = true;
+ }
+
+ // Ensure that we have an address in the CopyBlk case.
+ if (isCopyBlk)
+ {
+ // TODO-1stClassStructs: Allow a lclVar here.
+ assert(src->OperIsIndir());
+ srcPtrOrVal = src->AsIndir()->Addr();
+ }
+ else
+ {
+ srcPtrOrVal = src;
+ }
+
+#ifdef _TARGET_ARM_
+ if (dest->AsBlk()->IsVolatile())
+ {
+ // Emit a memory barrier instruction before the InitBlk/CopyBlk
+ instGen_MemoryBarrier();
+ }
+#endif
+ {
+ destPtr = dest->AsBlk()->Addr();
+ noway_assert(destPtr->TypeGet() == TYP_BYREF || varTypeIsIntegral(destPtr->TypeGet()));
+ noway_assert(
+ (isCopyBlk && (srcPtrOrVal->TypeGet() == TYP_BYREF || varTypeIsIntegral(srcPtrOrVal->TypeGet()))) ||
+ (isInitBlk && varTypeIsIntegral(srcPtrOrVal->TypeGet())));
+
+ noway_assert(destPtr && srcPtrOrVal);
+
+#if CPU_USES_BLOCK_MOVE
+ regs = isInitBlk ? RBM_EAX : RBM_ESI; // What is the needReg for Val/Src
+
+ /* Some special code for block moves/inits for constant sizes */
+
+ //
+ // Is this a fixed size COPYBLK?
+ // or a fixed size INITBLK with a constant init value?
+ //
+ if ((sizeIsConst) && (isCopyBlk || (srcPtrOrVal->IsCnsIntOrI())))
+ {
+ size_t length = blockSize;
+ size_t initVal = 0;
+ instruction ins_P, ins_PR, ins_B;
+
+ if (isInitBlk)
+ {
+ ins_P = INS_stosp;
+ ins_PR = INS_r_stosp;
+ ins_B = INS_stosb;
+
+ /* Properly extend the init constant from a U1 to a U4 */
+ initVal = 0xFF & ((unsigned)srcPtrOrVal->gtIntCon.gtIconVal);
+
+ /* If it is a non-zero value we have to replicate */
+ /* the byte value four times to form the DWORD */
+ /* Then we change this new value into the tree-node */
+
+ if (initVal)
+ {
+ initVal = initVal | (initVal << 8) | (initVal << 16) | (initVal << 24);
+#ifdef _TARGET_64BIT_
+ if (length > 4)
+ {
+ initVal = initVal | (initVal << 32);
+ srcPtrOrVal->gtType = TYP_LONG;
+ }
+ else
+ {
+ srcPtrOrVal->gtType = TYP_INT;
+ }
+#endif // _TARGET_64BIT_
+ }
+ srcPtrOrVal->gtIntCon.gtIconVal = initVal;
+ }
+ else
+ {
+ ins_P = INS_movsp;
+ ins_PR = INS_r_movsp;
+ ins_B = INS_movsb;
+ }
+
+ // Determine if we will be using SSE2
+ unsigned movqLenMin = 8;
+ unsigned movqLenMax = 24;
+
+ bool bWillUseSSE2 = false;
+ bool bWillUseOnlySSE2 = false;
+ bool bNeedEvaluateCnst = true; // If we only use SSE2, we will just load the constant there.
+
+#ifdef _TARGET_64BIT_
+
+// Until we get SSE2 instructions that move 16 bytes at a time instead of just 8
+// there is no point in wasting space on the bigger instructions
+
+#else // !_TARGET_64BIT_
+
+ if (compiler->opts.compCanUseSSE2)
+ {
+ unsigned curBBweight = compiler->compCurBB->getBBWeight(compiler);
+
+ /* Adjust for BB weight */
+ if (curBBweight == BB_ZERO_WEIGHT)
+ {
+ // Don't bother with this optimization in
+ // rarely run blocks
+ movqLenMax = movqLenMin = 0;
+ }
+ else if (curBBweight < BB_UNITY_WEIGHT)
+ {
+ // Be less aggressive when we are inside a conditional
+ movqLenMax = 16;
+ }
+ else if (curBBweight >= (BB_LOOP_WEIGHT * BB_UNITY_WEIGHT) / 2)
+ {
+ // Be more aggressive when we are inside a loop
+ movqLenMax = 48;
+ }
+
+ if ((compiler->compCodeOpt() == Compiler::FAST_CODE) || isInitBlk)
+ {
+ // Be more aggressive when optimizing for speed
+ // InitBlk uses fewer instructions
+ movqLenMax += 16;
+ }
+
+ if (compiler->compCodeOpt() != Compiler::SMALL_CODE && length >= movqLenMin && length <= movqLenMax)
+ {
+ bWillUseSSE2 = true;
+
+ if ((length % 8) == 0)
+ {
+ bWillUseOnlySSE2 = true;
+ if (isInitBlk && (initVal == 0))
+ {
+ bNeedEvaluateCnst = false;
+ noway_assert((srcPtrOrVal->OperGet() == GT_CNS_INT));
+ }
+ }
+ }
+ }
+
+#endif // !_TARGET_64BIT_
+
+ const bool bWillTrashRegSrc = (isCopyBlk && !bWillUseOnlySSE2);
+ /* Evaluate dest and src/val */
+
+ if (tree->gtFlags & GTF_REVERSE_OPS)
+ {
+ if (bNeedEvaluateCnst)
+ {
+ genComputeReg(srcPtrOrVal, regs, RegSet::EXACT_REG, RegSet::KEEP_REG, bWillTrashRegSrc);
+ }
+ genComputeReg(destPtr, RBM_EDI, RegSet::EXACT_REG, RegSet::KEEP_REG, !bWillUseOnlySSE2);
+ if (bNeedEvaluateCnst)
+ {
+ genRecoverReg(srcPtrOrVal, regs, RegSet::KEEP_REG);
+ }
+ }
+ else
+ {
+ genComputeReg(destPtr, RBM_EDI, RegSet::EXACT_REG, RegSet::KEEP_REG, !bWillUseOnlySSE2);
+ if (bNeedEvaluateCnst)
+ {
+ genComputeReg(srcPtrOrVal, regs, RegSet::EXACT_REG, RegSet::KEEP_REG, bWillTrashRegSrc);
+ }
+ genRecoverReg(destPtr, RBM_EDI, RegSet::KEEP_REG);
+ }
+
+ bool bTrashedESI = false;
+ bool bTrashedEDI = false;
+
+ if (bWillUseSSE2)
+ {
+ int blkDisp = 0;
+ regNumber xmmReg = REG_XMM0;
+
+ if (isInitBlk)
+ {
+ if (initVal)
+ {
+ getEmitter()->emitIns_R_R(INS_mov_i2xmm, EA_4BYTE, xmmReg, REG_EAX);
+ getEmitter()->emitIns_R_R(INS_punpckldq, EA_4BYTE, xmmReg, xmmReg);
+ }
+ else
+ {
+ getEmitter()->emitIns_R_R(INS_xorps, EA_8BYTE, xmmReg, xmmReg);
+ }
+ }
+
+ JITLOG_THIS(compiler, (LL_INFO100, "Using XMM instructions for %3d byte %s while compiling %s\n",
+ length, isInitBlk ? "initblk" : "copyblk", compiler->info.compFullName));
+
+ while (length > 7)
+ {
+ if (isInitBlk)
+ {
+ getEmitter()->emitIns_AR_R(INS_movq, EA_8BYTE, xmmReg, REG_EDI, blkDisp);
+ }
+ else
+ {
+ getEmitter()->emitIns_R_AR(INS_movq, EA_8BYTE, xmmReg, REG_ESI, blkDisp);
+ getEmitter()->emitIns_AR_R(INS_movq, EA_8BYTE, xmmReg, REG_EDI, blkDisp);
+ }
+ blkDisp += 8;
+ length -= 8;
+ }
+
+ if (length > 0)
+ {
+ noway_assert(bNeedEvaluateCnst);
+ noway_assert(!bWillUseOnlySSE2);
+
+ if (isCopyBlk)
+ {
+ inst_RV_IV(INS_add, REG_ESI, blkDisp, emitActualTypeSize(srcPtrOrVal->TypeGet()));
+ bTrashedESI = true;
+ }
+
+ inst_RV_IV(INS_add, REG_EDI, blkDisp, emitActualTypeSize(destPtr->TypeGet()));
+ bTrashedEDI = true;
+
+ if (length >= REGSIZE_BYTES)
+ {
+ instGen(ins_P);
+ length -= REGSIZE_BYTES;
+ }
+ }
+ }
+ else if (compiler->compCodeOpt() == Compiler::SMALL_CODE)
+ {
+ /* For small code, we can only use ins_DR to generate fast
+ and small code. We also can't use "rep movsb" because
+ we may not atomically reading and writing the DWORD */
+
+ noway_assert(bNeedEvaluateCnst);
+
+ goto USE_DR;
+ }
+ else if (length <= 4 * REGSIZE_BYTES)
+ {
+ noway_assert(bNeedEvaluateCnst);
+
+ while (length >= REGSIZE_BYTES)
+ {
+ instGen(ins_P);
+ length -= REGSIZE_BYTES;
+ }
+
+ bTrashedEDI = true;
+ if (isCopyBlk)
+ bTrashedESI = true;
+ }
+ else
+ {
+ USE_DR:
+ noway_assert(bNeedEvaluateCnst);
+
+ /* set ECX to length/REGSIZE_BYTES (in pointer-sized words) */
+ genSetRegToIcon(REG_ECX, length / REGSIZE_BYTES, TYP_I_IMPL);
+
+ length &= (REGSIZE_BYTES - 1);
+
+ instGen(ins_PR);
+
+ regTracker.rsTrackRegTrash(REG_ECX);
+
+ bTrashedEDI = true;
+ if (isCopyBlk)
+ bTrashedESI = true;
+ }
+
+ /* Now take care of the remainder */
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef _TARGET_64BIT_
+ if (length > 4)
+ {
+ noway_assert(bNeedEvaluateCnst);
+ noway_assert(length < 8);
+
+ instGen((isInitBlk) ? INS_stosd : INS_movsd);
+ length -= 4;
+
+ bTrashedEDI = true;
+ if (isCopyBlk)
+ bTrashedESI = true;
+ }
+
+#endif // _TARGET_64BIT_
+
+ if (length)
+ {
+ noway_assert(bNeedEvaluateCnst);
+
+ while (length--)
+ {
+ instGen(ins_B);
+ }
+
+ bTrashedEDI = true;
+ if (isCopyBlk)
+ bTrashedESI = true;
+ }
+
+ noway_assert(bTrashedEDI == !bWillUseOnlySSE2);
+ if (bTrashedEDI)
+ regTracker.rsTrackRegTrash(REG_EDI);
+ if (bTrashedESI)
+ regTracker.rsTrackRegTrash(REG_ESI);
+ // else No need to trash EAX as it wasnt destroyed by the "rep stos"
+
+ genReleaseReg(destPtr);
+ if (bNeedEvaluateCnst)
+ genReleaseReg(srcPtrOrVal);
+ }
+ else
+ {
+ //
+ // This a variable-sized COPYBLK/INITBLK,
+ // or a fixed size INITBLK with a variable init value,
+ //
+
+ // What order should the Dest, Val/Src, and Size be calculated
+
+ compiler->fgOrderBlockOps(tree, RBM_EDI, regs, RBM_ECX, opsPtr, regsPtr); // OUT arguments
+
+ noway_assert((isInitBlk && (regs == RBM_EAX)) || (isCopyBlk && (regs == RBM_ESI)));
+ genComputeReg(opsPtr[0], regsPtr[0], RegSet::EXACT_REG, RegSet::KEEP_REG, (regsPtr[0] != RBM_EAX));
+ genComputeReg(opsPtr[1], regsPtr[1], RegSet::EXACT_REG, RegSet::KEEP_REG, (regsPtr[1] != RBM_EAX));
+ if (opsPtr[2] != nullptr)
+ {
+ genComputeReg(opsPtr[2], regsPtr[2], RegSet::EXACT_REG, RegSet::KEEP_REG, (regsPtr[2] != RBM_EAX));
+ }
+ genRecoverReg(opsPtr[0], regsPtr[0], RegSet::KEEP_REG);
+ genRecoverReg(opsPtr[1], regsPtr[1], RegSet::KEEP_REG);
+
+ noway_assert((destPtr->gtFlags & GTF_REG_VAL) && // Dest
+ (destPtr->gtRegNum == REG_EDI));
+
+ noway_assert((srcPtrOrVal->gtFlags & GTF_REG_VAL) && // Val/Src
+ (genRegMask(srcPtrOrVal->gtRegNum) == regs));
+
+ if (sizeIsConst)
+ {
+ inst_RV_IV(INS_mov, REG_ECX, blockSize, EA_PTRSIZE);
+ }
+ else
+ {
+ noway_assert((sizeNode->gtFlags & GTF_REG_VAL) && // Size
+ (sizeNode->gtRegNum == REG_ECX));
+ }
+
+ if (isInitBlk)
+ instGen(INS_r_stosb);
+ else
+ instGen(INS_r_movsb);
+
+ regTracker.rsTrackRegTrash(REG_EDI);
+ regTracker.rsTrackRegTrash(REG_ECX);
+
+ if (isCopyBlk)
+ regTracker.rsTrackRegTrash(REG_ESI);
+ // else No need to trash EAX as it wasnt destroyed by the "rep stos"
+
+ genReleaseReg(opsPtr[0]);
+ genReleaseReg(opsPtr[1]);
+ if (opsPtr[2] != nullptr)
+ {
+ genReleaseReg(opsPtr[2]);
+ }
+ }
+
+#else // !CPU_USES_BLOCK_MOVE
+
+#ifndef _TARGET_ARM_
+// Currently only the ARM implementation is provided
+#error "COPYBLK/INITBLK non-ARM && non-CPU_USES_BLOCK_MOVE"
+#endif
+ //
+ // Is this a fixed size COPYBLK?
+ // or a fixed size INITBLK with a constant init value?
+ //
+ if (sizeIsConst && (isCopyBlk || (srcPtrOrVal->OperGet() == GT_CNS_INT)))
+ {
+ GenTreePtr dstOp = destPtr;
+ GenTreePtr srcOp = srcPtrOrVal;
+ unsigned length = blockSize;
+ unsigned fullStoreCount = length / TARGET_POINTER_SIZE;
+ unsigned initVal = 0;
+ bool useLoop = false;
+
+ if (isInitBlk)
+ {
+ /* Properly extend the init constant from a U1 to a U4 */
+ initVal = 0xFF & ((unsigned)srcOp->gtIntCon.gtIconVal);
+
+ /* If it is a non-zero value we have to replicate */
+ /* the byte value four times to form the DWORD */
+ /* Then we store this new value into the tree-node */
+
+ if (initVal != 0)
+ {
+ initVal = initVal | (initVal << 8) | (initVal << 16) | (initVal << 24);
+ srcPtrOrVal->gtIntCon.gtIconVal = initVal;
+ }
+ }
+
+ // Will we be using a loop to implement this INITBLK/COPYBLK?
+ if ((isCopyBlk && (fullStoreCount >= 8)) || (isInitBlk && (fullStoreCount >= 16)))
+ {
+ useLoop = true;
+ }
+
+ regMaskTP usedRegs;
+ regNumber regDst;
+ regNumber regSrc;
+ regNumber regTemp;
+
+ /* Evaluate dest and src/val */
+
+ if (tree->gtFlags & GTF_REVERSE_OPS)
+ {
+ genComputeReg(srcOp, (needReg & ~dstOp->gtRsvdRegs), RegSet::ANY_REG, RegSet::KEEP_REG, useLoop);
+ assert(srcOp->gtFlags & GTF_REG_VAL);
+
+ genComputeReg(dstOp, needReg, RegSet::ANY_REG, RegSet::KEEP_REG, useLoop);
+ assert(dstOp->gtFlags & GTF_REG_VAL);
+ regDst = dstOp->gtRegNum;
+
+ genRecoverReg(srcOp, needReg, RegSet::KEEP_REG);
+ regSrc = srcOp->gtRegNum;
+ }
+ else
+ {
+ genComputeReg(dstOp, (needReg & ~srcOp->gtRsvdRegs), RegSet::ANY_REG, RegSet::KEEP_REG, useLoop);
+ assert(dstOp->gtFlags & GTF_REG_VAL);
+
+ genComputeReg(srcOp, needReg, RegSet::ANY_REG, RegSet::KEEP_REG, useLoop);
+ assert(srcOp->gtFlags & GTF_REG_VAL);
+ regSrc = srcOp->gtRegNum;
+
+ genRecoverReg(dstOp, needReg, RegSet::KEEP_REG);
+ regDst = dstOp->gtRegNum;
+ }
+ assert(dstOp->gtFlags & GTF_REG_VAL);
+ assert(srcOp->gtFlags & GTF_REG_VAL);
+
+ regDst = dstOp->gtRegNum;
+ regSrc = srcOp->gtRegNum;
+ usedRegs = (genRegMask(regSrc) | genRegMask(regDst));
+ bool dstIsOnStack = (dstOp->gtOper == GT_ADDR && (dstOp->gtFlags & GTF_ADDR_ONSTACK));
+ emitAttr dstType = (varTypeIsGC(dstOp) && !dstIsOnStack) ? EA_BYREF : EA_PTRSIZE;
+ emitAttr srcType;
+
+ if (isCopyBlk)
+ {
+ // Prefer a low register,but avoid one of the ones we've already grabbed
+ regTemp = regSet.rsGrabReg(regSet.rsNarrowHint(regSet.rsRegMaskCanGrab() & ~usedRegs, RBM_LOW_REGS));
+ usedRegs |= genRegMask(regTemp);
+ bool srcIsOnStack = (srcOp->gtOper == GT_ADDR && (srcOp->gtFlags & GTF_ADDR_ONSTACK));
+ srcType = (varTypeIsGC(srcOp) && !srcIsOnStack) ? EA_BYREF : EA_PTRSIZE;
+ }
+ else
+ {
+ regTemp = REG_STK;
+ srcType = EA_PTRSIZE;
+ }
+
+ instruction loadIns = ins_Load(TYP_I_IMPL); // INS_ldr
+ instruction storeIns = ins_Store(TYP_I_IMPL); // INS_str
+
+ int finalOffset;
+
+ // Can we emit a small number of ldr/str instructions to implement this INITBLK/COPYBLK?
+ if (!useLoop)
+ {
+ for (unsigned i = 0; i < fullStoreCount; i++)
+ {
+ if (isCopyBlk)
+ {
+ getEmitter()->emitIns_R_R_I(loadIns, EA_4BYTE, regTemp, regSrc, i * TARGET_POINTER_SIZE);
+ getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regTemp, regDst, i * TARGET_POINTER_SIZE);
+ gcInfo.gcMarkRegSetNpt(genRegMask(regTemp));
+ regTracker.rsTrackRegTrash(regTemp);
+ }
+ else
+ {
+ getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regSrc, regDst, i * TARGET_POINTER_SIZE);
+ }
+ }
+
+ finalOffset = fullStoreCount * TARGET_POINTER_SIZE;
+ length -= finalOffset;
+ }
+ else // We will use a loop to implement this INITBLK/COPYBLK
+ {
+ unsigned pairStoreLoopCount = fullStoreCount / 2;
+
+ // We need a second temp register for CopyBlk
+ regNumber regTemp2 = REG_STK;
+ if (isCopyBlk)
+ {
+ // Prefer a low register, but avoid one of the ones we've already grabbed
+ regTemp2 =
+ regSet.rsGrabReg(regSet.rsNarrowHint(regSet.rsRegMaskCanGrab() & ~usedRegs, RBM_LOW_REGS));
+ usedRegs |= genRegMask(regTemp2);
+ }
+
+ // Pick and initialize the loop counter register
+ regNumber regLoopIndex;
+ regLoopIndex =
+ regSet.rsGrabReg(regSet.rsNarrowHint(regSet.rsRegMaskCanGrab() & ~usedRegs, RBM_LOW_REGS));
+ genSetRegToIcon(regLoopIndex, pairStoreLoopCount, TYP_INT);
+
+ // Create and define the Basic Block for the loop top
+ BasicBlock* loopTopBlock = genCreateTempLabel();
+ genDefineTempLabel(loopTopBlock);
+
+ // The loop body
+ if (isCopyBlk)
+ {
+ getEmitter()->emitIns_R_R_I(loadIns, EA_4BYTE, regTemp, regSrc, 0);
+ getEmitter()->emitIns_R_R_I(loadIns, EA_4BYTE, regTemp2, regSrc, TARGET_POINTER_SIZE);
+ getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regTemp, regDst, 0);
+ getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regTemp2, regDst, TARGET_POINTER_SIZE);
+ getEmitter()->emitIns_R_I(INS_add, srcType, regSrc, 2 * TARGET_POINTER_SIZE);
+ gcInfo.gcMarkRegSetNpt(genRegMask(regTemp));
+ gcInfo.gcMarkRegSetNpt(genRegMask(regTemp2));
+ regTracker.rsTrackRegTrash(regSrc);
+ regTracker.rsTrackRegTrash(regTemp);
+ regTracker.rsTrackRegTrash(regTemp2);
+ }
+ else // isInitBlk
+ {
+ getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regSrc, regDst, 0);
+ getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regSrc, regDst, TARGET_POINTER_SIZE);
+ }
+
+ getEmitter()->emitIns_R_I(INS_add, dstType, regDst, 2 * TARGET_POINTER_SIZE);
+ regTracker.rsTrackRegTrash(regDst);
+ getEmitter()->emitIns_R_I(INS_sub, EA_4BYTE, regLoopIndex, 1, INS_FLAGS_SET);
+ emitJumpKind jmpGTS = genJumpKindForOper(GT_GT, CK_SIGNED);
+ inst_JMP(jmpGTS, loopTopBlock);
+
+ regTracker.rsTrackRegIntCns(regLoopIndex, 0);
+
+ length -= (pairStoreLoopCount * (2 * TARGET_POINTER_SIZE));
+
+ if (length & TARGET_POINTER_SIZE)
+ {
+ if (isCopyBlk)
+ {
+ getEmitter()->emitIns_R_R_I(loadIns, EA_4BYTE, regTemp, regSrc, 0);
+ getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regTemp, regDst, 0);
+ }
+ else
+ {
+ getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regSrc, regDst, 0);
+ }
+ finalOffset = TARGET_POINTER_SIZE;
+ length -= TARGET_POINTER_SIZE;
+ }
+ else
+ {
+ finalOffset = 0;
+ }
+ }
+
+ if (length & sizeof(short))
+ {
+ loadIns = ins_Load(TYP_USHORT); // INS_ldrh
+ storeIns = ins_Store(TYP_USHORT); // INS_strh
+
+ if (isCopyBlk)
+ {
+ getEmitter()->emitIns_R_R_I(loadIns, EA_2BYTE, regTemp, regSrc, finalOffset);
+ getEmitter()->emitIns_R_R_I(storeIns, EA_2BYTE, regTemp, regDst, finalOffset);
+ gcInfo.gcMarkRegSetNpt(genRegMask(regTemp));
+ regTracker.rsTrackRegTrash(regTemp);
+ }
+ else
+ {
+ getEmitter()->emitIns_R_R_I(storeIns, EA_2BYTE, regSrc, regDst, finalOffset);
+ }
+ length -= sizeof(short);
+ finalOffset += sizeof(short);
+ }
+
+ if (length & sizeof(char))
+ {
+ loadIns = ins_Load(TYP_UBYTE); // INS_ldrb
+ storeIns = ins_Store(TYP_UBYTE); // INS_strb
+
+ if (isCopyBlk)
+ {
+ getEmitter()->emitIns_R_R_I(loadIns, EA_1BYTE, regTemp, regSrc, finalOffset);
+ getEmitter()->emitIns_R_R_I(storeIns, EA_1BYTE, regTemp, regDst, finalOffset);
+ gcInfo.gcMarkRegSetNpt(genRegMask(regTemp));
+ regTracker.rsTrackRegTrash(regTemp);
+ }
+ else
+ {
+ getEmitter()->emitIns_R_R_I(storeIns, EA_1BYTE, regSrc, regDst, finalOffset);
+ }
+ length -= sizeof(char);
+ }
+ assert(length == 0);
+
+ genReleaseReg(dstOp);
+ genReleaseReg(srcOp);
+ }
+ else
+ {
+ //
+ // This a variable-sized COPYBLK/INITBLK,
+ // or a fixed size INITBLK with a variable init value,
+ //
+
+ // What order should the Dest, Val/Src, and Size be calculated
+
+ compiler->fgOrderBlockOps(tree, RBM_ARG_0, RBM_ARG_1, RBM_ARG_2, opsPtr, regsPtr); // OUT arguments
+
+ genComputeReg(opsPtr[0], regsPtr[0], RegSet::EXACT_REG, RegSet::KEEP_REG);
+ genComputeReg(opsPtr[1], regsPtr[1], RegSet::EXACT_REG, RegSet::KEEP_REG);
+ if (opsPtr[2] != nullptr)
+ {
+ genComputeReg(opsPtr[2], regsPtr[2], RegSet::EXACT_REG, RegSet::KEEP_REG);
+ }
+ genRecoverReg(opsPtr[0], regsPtr[0], RegSet::KEEP_REG);
+ genRecoverReg(opsPtr[1], regsPtr[1], RegSet::KEEP_REG);
+
+ noway_assert((destPtr->gtFlags & GTF_REG_VAL) && // Dest
+ (destPtr->gtRegNum == REG_ARG_0));
+
+ noway_assert((srcPtrOrVal->gtFlags & GTF_REG_VAL) && // Val/Src
+ (srcPtrOrVal->gtRegNum == REG_ARG_1));
+
+ if (sizeIsConst)
+ {
+ inst_RV_IV(INS_mov, REG_ARG_2, blockSize, EA_PTRSIZE);
+ }
+ else
+ {
+ noway_assert((sizeNode->gtFlags & GTF_REG_VAL) && // Size
+ (sizeNode->gtRegNum == REG_ARG_2));
+ }
+
+ regSet.rsLockUsedReg(RBM_ARG_0 | RBM_ARG_1 | RBM_ARG_2);
+
+ genEmitHelperCall(isCopyBlk ? CORINFO_HELP_MEMCPY
+ /* GT_INITBLK */
+ : CORINFO_HELP_MEMSET,
+ 0, EA_UNKNOWN);
+
+ regTracker.rsTrackRegMaskTrash(RBM_CALLEE_TRASH);
+
+ regSet.rsUnlockUsedReg(RBM_ARG_0 | RBM_ARG_1 | RBM_ARG_2);
+ genReleaseReg(opsPtr[0]);
+ genReleaseReg(opsPtr[1]);
+ if (opsPtr[2] != nullptr)
+ {
+ genReleaseReg(opsPtr[2]);
+ }
+ }
+
+ if (isCopyBlk && dest->AsBlk()->IsVolatile())
+ {
+ // Emit a memory barrier instruction after the CopyBlk
+ instGen_MemoryBarrier();
+ }
+#endif // !CPU_USES_BLOCK_MOVE
+ }
+}
+BasicBlock dummyBB;
+
+#ifdef _PREFAST_
+#pragma warning(push)
+#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
+#endif
+void CodeGen::genCodeForTreeSmpOp(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
+{
+ const genTreeOps oper = tree->OperGet();
+ const var_types treeType = tree->TypeGet();
+ GenTreePtr op1 = tree->gtOp.gtOp1;
+ GenTreePtr op2 = tree->gtGetOp2();
+ regNumber reg = DUMMY_INIT(REG_CORRUPT);
+ regMaskTP regs = regSet.rsMaskUsed;
+ regMaskTP needReg = destReg;
+ insFlags flags = tree->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
+ emitAttr size;
+ instruction ins;
+ regMaskTP addrReg;
+ GenTreePtr opsPtr[3];
+ regMaskTP regsPtr[3];
+
+#ifdef DEBUG
+ addrReg = 0xDEADCAFE;
+#endif
+
+ noway_assert(tree->OperKind() & GTK_SMPOP);
+
+ switch (oper)
+ {
+ case GT_ASG:
+ if (tree->OperIsBlkOp())
+ {
+ genCodeForBlkOp(tree, destReg);
+ }
+ else
+ {
+ genCodeForTreeSmpOpAsg(tree);
+ }
+ return;
+
+ case GT_ASG_LSH:
+ case GT_ASG_RSH:
+ case GT_ASG_RSZ:
+ genCodeForAsgShift(tree, destReg, bestReg);
+ return;
+
+ case GT_ASG_AND:
+ case GT_ASG_OR:
+ case GT_ASG_XOR:
+ case GT_ASG_ADD:
+ case GT_ASG_SUB:
+ genCodeForTreeSmpBinArithLogAsgOp(tree, destReg, bestReg);
+ return;
+
+ case GT_CHS:
+ addrReg = genMakeAddressable(op1, 0, RegSet::KEEP_REG, true);
+#ifdef _TARGET_XARCH_
+ // Note that the specialCase here occurs when the treeType specifies a byte sized operation
+ // and we decided to enregister the op1 LclVar in a non-byteable register (ESI or EDI)
+ //
+ bool specialCase;
+ specialCase = false;
+ if (op1->gtOper == GT_REG_VAR)
+ {
+ /* Get hold of the target register */
+
+ reg = op1->gtRegVar.gtRegNum;
+ if (varTypeIsByte(treeType) && !(genRegMask(reg) & RBM_BYTE_REGS))
+ {
+ regNumber byteReg = regSet.rsGrabReg(RBM_BYTE_REGS);
+
+ inst_RV_RV(INS_mov, byteReg, reg);
+ regTracker.rsTrackRegTrash(byteReg);
+
+ inst_RV(INS_NEG, byteReg, treeType, emitTypeSize(treeType));
+ var_types op1Type = op1->TypeGet();
+ instruction wideningIns = ins_Move_Extend(op1Type, true);
+ inst_RV_RV(wideningIns, reg, byteReg, op1Type, emitTypeSize(op1Type));
+ regTracker.rsTrackRegTrash(reg);
+ specialCase = true;
+ }
+ }
+
+ if (!specialCase)
+ {
+ inst_TT(INS_NEG, op1, 0, 0, emitTypeSize(treeType));
+ }
+#else // not _TARGET_XARCH_
+ if (op1->gtFlags & GTF_REG_VAL)
+ {
+ inst_TT_IV(INS_NEG, op1, 0, 0, emitTypeSize(treeType), flags);
+ }
+ else
+ {
+ // Fix 388382 ARM JitStress WP7
+ var_types op1Type = op1->TypeGet();
+ regNumber reg = regSet.rsPickFreeReg();
+ inst_RV_TT(ins_Load(op1Type), reg, op1, 0, emitTypeSize(op1Type));
+ regTracker.rsTrackRegTrash(reg);
+ inst_RV_IV(INS_NEG, reg, 0, emitTypeSize(treeType), flags);
+ inst_TT_RV(ins_Store(op1Type), op1, reg, 0, emitTypeSize(op1Type));
+ }
+#endif
+ if (op1->gtFlags & GTF_REG_VAL)
+ regTracker.rsTrackRegTrash(op1->gtRegNum);
+ genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
+
+ genCodeForTreeSmpOpAsg_DONE_ASSG(tree, addrReg, tree->gtRegNum, /* ovfl */ false);
+ return;
+
+ case GT_AND:
+ case GT_OR:
+ case GT_XOR:
+ case GT_ADD:
+ case GT_SUB:
+ case GT_MUL:
+ genCodeForTreeSmpBinArithLogOp(tree, destReg, bestReg);
+ return;
+
+ case GT_UMOD:
+ genCodeForUnsignedMod(tree, destReg, bestReg);
+ return;
+
+ case GT_MOD:
+ genCodeForSignedMod(tree, destReg, bestReg);
+ return;
+
+ case GT_UDIV:
+ genCodeForUnsignedDiv(tree, destReg, bestReg);
+ return;
+
+ case GT_DIV:
+ genCodeForSignedDiv(tree, destReg, bestReg);
+ return;
+
+ case GT_LSH:
+ case GT_RSH:
+ case GT_RSZ:
+ genCodeForShift(tree, destReg, bestReg);
+ return;
+
+ case GT_NEG:
+ case GT_NOT:
+
+ /* Generate the operand into some register */
+
+ genCompIntoFreeReg(op1, needReg, RegSet::FREE_REG);
+ noway_assert(op1->gtFlags & GTF_REG_VAL);
+
+ reg = op1->gtRegNum;
+
+ /* Negate/reverse the value in the register */
+
+ inst_RV((oper == GT_NEG) ? INS_NEG : INS_NOT, reg, treeType);
+
+ /* The register is now trashed */
+
+ regTracker.rsTrackRegTrash(reg);
+
+ genCodeForTree_DONE(tree, reg);
+ return;
+
+ case GT_IND:
+ case GT_NULLCHECK: // At this point, explicit null checks are just like inds...
+
+ /* Make sure the operand is addressable */
+
+ addrReg = genMakeAddressable(tree, RBM_ALLINT, RegSet::KEEP_REG, true);
+
+ genDoneAddressable(tree, addrReg, RegSet::KEEP_REG);
+
+ /* Figure out the size of the value being loaded */
+
+ size = EA_ATTR(genTypeSize(tree->gtType));
+
+ /* Pick a register for the value */
+
+ if (needReg == RBM_ALLINT && bestReg == 0)
+ {
+ /* Absent a better suggestion, pick a useless register */
+
+ bestReg = regSet.rsExcludeHint(regSet.rsRegMaskFree(), ~regTracker.rsUselessRegs());
+ }
+
+ reg = regSet.rsPickReg(needReg, bestReg);
+
+ if (op1->IsCnsIntOrI() && op1->IsIconHandle(GTF_ICON_TLS_HDL))
+ {
+ noway_assert(size == EA_PTRSIZE);
+ getEmitter()->emitIns_R_C(ins_Load(TYP_I_IMPL), EA_PTRSIZE, reg, FLD_GLOBAL_FS,
+ (int)op1->gtIntCon.gtIconVal);
+ }
+ else
+ {
+ /* Generate "mov reg, [addr]" or "movsx/movzx reg, [addr]" */
+
+ inst_mov_RV_ST(reg, tree);
+ }
+
+#ifdef _TARGET_ARM_
+ if (tree->gtFlags & GTF_IND_VOLATILE)
+ {
+ // Emit a memory barrier instruction after the load
+ instGen_MemoryBarrier();
+ }
+#endif
+
+ /* Note the new contents of the register we used */
+
+ regTracker.rsTrackRegTrash(reg);
+
+#ifdef DEBUG
+ /* Update the live set of register variables */
+ if (compiler->opts.varNames)
+ genUpdateLife(tree);
+#endif
+
+ /* Now we can update the register pointer information */
+
+ // genDoneAddressable(tree, addrReg, RegSet::KEEP_REG);
+ gcInfo.gcMarkRegPtrVal(reg, treeType);
+
+ genCodeForTree_DONE_LIFE(tree, reg);
+ return;
+
+ case GT_CAST:
+
+ genCodeForNumericCast(tree, destReg, bestReg);
+ return;
+
+ case GT_JTRUE:
+
+ /* Is this a test of a relational operator? */
+
+ if (op1->OperIsCompare())
+ {
+ /* Generate the conditional jump */
+
+ genCondJump(op1);
+
+ genUpdateLife(tree);
+ return;
+ }
+
+#ifdef DEBUG
+ compiler->gtDispTree(tree);
+#endif
+ NO_WAY("ISSUE: can we ever have a jumpCC without a compare node?");
+ break;
+
+ case GT_SWITCH:
+ genCodeForSwitch(tree);
+ return;
+
+ case GT_RETFILT:
+ noway_assert(tree->gtType == TYP_VOID || op1 != 0);
+ if (op1 == 0) // endfinally
+ {
+ reg = REG_NA;
+
+#ifdef _TARGET_XARCH_
+ /* Return using a pop-jmp sequence. As the "try" block calls
+ the finally with a jmp, this leaves the x86 call-ret stack
+ balanced in the normal flow of path. */
+
+ noway_assert(isFramePointerRequired());
+ inst_RV(INS_pop_hide, REG_EAX, TYP_I_IMPL);
+ inst_RV(INS_i_jmp, REG_EAX, TYP_I_IMPL);
+#elif defined(_TARGET_ARM_)
+// Nothing needed for ARM
+#else
+ NYI("TARGET");
+#endif
+ }
+ else // endfilter
+ {
+ genComputeReg(op1, RBM_INTRET, RegSet::EXACT_REG, RegSet::FREE_REG);
+ noway_assert(op1->gtFlags & GTF_REG_VAL);
+ noway_assert(op1->gtRegNum == REG_INTRET);
+ /* The return value has now been computed */
+ reg = op1->gtRegNum;
+
+ /* Return */
+ instGen_Return(0);
+ }
+
+ genCodeForTree_DONE(tree, reg);
+ return;
+
+ case GT_RETURN:
+
+ // TODO: this should be done AFTER we called exit mon so that
+ // we are sure that we don't have to keep 'this' alive
+
+ if (compiler->info.compCallUnmanaged && (compiler->compCurBB == compiler->genReturnBB))
+ {
+ /* either it's an "empty" statement or the return statement
+ of a synchronized method
+ */
+
+ genPInvokeMethodEpilog();
+ }
+
+ /* Is there a return value and/or an exit statement? */
+
+ if (op1)
+ {
+ if (op1->gtType == TYP_VOID)
+ {
+ // We're returning nothing, just generate the block (shared epilog calls).
+ genCodeForTree(op1, 0);
+ }
+#ifdef _TARGET_ARM_
+ else if (op1->gtType == TYP_STRUCT)
+ {
+ if (op1->gtOper == GT_CALL)
+ {
+ // We have a return call() because we failed to tail call.
+ // In any case, just generate the call and be done.
+ assert(compiler->IsHfa(op1));
+ genCodeForCall(op1, true);
+ genMarkTreeInReg(op1, REG_FLOATRET);
+ }
+ else
+ {
+ assert(op1->gtOper == GT_LCL_VAR);
+ assert(compiler->IsHfa(compiler->lvaGetStruct(op1->gtLclVarCommon.gtLclNum)));
+ genLoadIntoFltRetRegs(op1);
+ }
+ }
+ else if (op1->TypeGet() == TYP_FLOAT)
+ {
+ // This can only occur when we are returning a non-HFA struct
+ // that is composed of a single float field and we performed
+ // struct promotion and enregistered the float field.
+ //
+ genComputeReg(op1, 0, RegSet::ANY_REG, RegSet::FREE_REG);
+ getEmitter()->emitIns_R_R(INS_vmov_f2i, EA_4BYTE, REG_INTRET, op1->gtRegNum);
+ }
+#endif // _TARGET_ARM_
+ else
+ {
+ // we can now go through this code for compiler->genReturnBB. I've regularized all the code.
+
+ // noway_assert(compiler->compCurBB != compiler->genReturnBB);
+
+ noway_assert(op1->gtType != TYP_VOID);
+
+ /* Generate the return value into the return register */
+
+ genComputeReg(op1, RBM_INTRET, RegSet::EXACT_REG, RegSet::FREE_REG);
+
+ /* The result must now be in the return register */
+
+ noway_assert(op1->gtFlags & GTF_REG_VAL);
+ noway_assert(op1->gtRegNum == REG_INTRET);
+ }
+
+ /* The return value has now been computed */
+
+ reg = op1->gtRegNum;
+
+ genCodeForTree_DONE(tree, reg);
+ }
+
+#ifdef PROFILING_SUPPORTED
+ // The profiling hook does not trash registers, so it's safe to call after we emit the code for
+ // the GT_RETURN tree.
+
+ if (compiler->compCurBB == compiler->genReturnBB)
+ {
+ genProfilingLeaveCallback();
+ }
+#endif
+#ifdef DEBUG
+ if (compiler->opts.compStackCheckOnRet)
+ {
+ noway_assert(compiler->lvaReturnEspCheck != 0xCCCCCCCC &&
+ compiler->lvaTable[compiler->lvaReturnEspCheck].lvDoNotEnregister &&
+ compiler->lvaTable[compiler->lvaReturnEspCheck].lvOnFrame);
+ getEmitter()->emitIns_S_R(INS_cmp, EA_PTRSIZE, REG_SPBASE, compiler->lvaReturnEspCheck, 0);
+
+ BasicBlock* esp_check = genCreateTempLabel();
+ emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
+ inst_JMP(jmpEqual, esp_check);
+ getEmitter()->emitIns(INS_BREAKPOINT);
+ genDefineTempLabel(esp_check);
+ }
+#endif
+ return;
+
+ case GT_COMMA:
+
+ if (tree->gtFlags & GTF_REVERSE_OPS)
+ {
+ if (tree->gtType == TYP_VOID)
+ {
+ genEvalSideEffects(op2);
+ genUpdateLife(op2);
+ genEvalSideEffects(op1);
+ genUpdateLife(tree);
+ return;
+ }
+
+ // Generate op2
+ genCodeForTree(op2, needReg);
+ genUpdateLife(op2);
+
+ noway_assert(op2->gtFlags & GTF_REG_VAL);
+
+ regSet.rsMarkRegUsed(op2);
+
+ // Do side effects of op1
+ genEvalSideEffects(op1);
+
+ // Recover op2 if spilled
+ genRecoverReg(op2, RBM_NONE, RegSet::KEEP_REG);
+
+ regSet.rsMarkRegFree(genRegMask(op2->gtRegNum));
+
+ // set gc info if we need so
+ gcInfo.gcMarkRegPtrVal(op2->gtRegNum, treeType);
+
+ genUpdateLife(tree);
+ genCodeForTree_DONE(tree, op2->gtRegNum);
+
+ return;
+ }
+ else
+ {
+ noway_assert((tree->gtFlags & GTF_REVERSE_OPS) == 0);
+
+ /* Generate side effects of the first operand */
+
+ genEvalSideEffects(op1);
+ genUpdateLife(op1);
+
+ /* Is the value of the second operand used? */
+
+ if (tree->gtType == TYP_VOID)
+ {
+ /* The right operand produces no result. The morpher is
+ responsible for resetting the type of GT_COMMA nodes
+ to TYP_VOID if op2 isn't meant to yield a result. */
+
+ genEvalSideEffects(op2);
+ genUpdateLife(tree);
+ return;
+ }
+
+ /* Generate the second operand, i.e. the 'real' value */
+
+ genCodeForTree(op2, needReg);
+ noway_assert(op2->gtFlags & GTF_REG_VAL);
+
+ /* The result of 'op2' is also the final result */
+
+ reg = op2->gtRegNum;
+
+ /* Remember whether we set the flags */
+
+ tree->gtFlags |= (op2->gtFlags & GTF_ZSF_SET);
+
+ genCodeForTree_DONE(tree, reg);
+ return;
+ }
+
+ case GT_BOX:
+ genCodeForTree(op1, needReg);
+ noway_assert(op1->gtFlags & GTF_REG_VAL);
+
+ /* The result of 'op1' is also the final result */
+
+ reg = op1->gtRegNum;
+
+ /* Remember whether we set the flags */
+
+ tree->gtFlags |= (op1->gtFlags & GTF_ZSF_SET);
+
+ genCodeForTree_DONE(tree, reg);
+ return;
+
+ case GT_QMARK:
+
+ genCodeForQmark(tree, destReg, bestReg);
+ return;
+
+ case GT_NOP:
+
+#if OPT_BOOL_OPS
+ if (op1 == NULL)
+ return;
+#endif
+
+ /* Generate the operand into some register */
+
+ genCodeForTree(op1, needReg);
+
+ /* The result is the same as the operand */
+
+ reg = op1->gtRegNum;
+
+ genCodeForTree_DONE(tree, reg);
+ return;
+
+ case GT_INTRINSIC:
+
+ switch (tree->gtIntrinsic.gtIntrinsicId)
+ {
+ case CORINFO_INTRINSIC_Round:
+ {
+ noway_assert(tree->gtType == TYP_INT);
+
+#if FEATURE_STACK_FP_X87
+ genCodeForTreeFlt(op1);
+
+ /* Store the FP value into the temp */
+ TempDsc* temp = compiler->tmpGetTemp(TYP_INT);
+
+ FlatFPX87_MoveToTOS(&compCurFPState, op1->gtRegNum);
+ FlatFPX87_Kill(&compCurFPState, op1->gtRegNum);
+ inst_FS_ST(INS_fistp, EA_4BYTE, temp, 0);
+
+ reg = regSet.rsPickReg(needReg, bestReg);
+ regTracker.rsTrackRegTrash(reg);
+
+ inst_RV_ST(INS_mov, reg, temp, 0, TYP_INT);
+
+ compiler->tmpRlsTemp(temp);
+#else
+ genCodeForTreeFloat(tree, needReg, bestReg);
+ return;
+#endif
+ }
+ break;
+
+ default:
+ noway_assert(!"unexpected math intrinsic");
+ }
+
+ genCodeForTree_DONE(tree, reg);
+ return;
+
+ case GT_LCLHEAP:
+
+ reg = genLclHeap(op1);
+ genCodeForTree_DONE(tree, reg);
+ return;
+
+ case GT_EQ:
+ case GT_NE:
+ case GT_LT:
+ case GT_LE:
+ case GT_GE:
+ case GT_GT:
+ genCodeForRelop(tree, destReg, bestReg);
+ return;
+
+ case GT_ADDR:
+
+ genCodeForTreeSmpOp_GT_ADDR(tree, destReg, bestReg);
+ return;
+
+#ifdef _TARGET_XARCH_
+ case GT_LOCKADD:
+
+ // This is for a locked add operation. We know that the resulting value doesn't "go" anywhere.
+ // For reference, op1 is the location. op2 is the addend or the value.
+ if (op2->OperIsConst())
+ {
+ noway_assert(op2->TypeGet() == TYP_INT);
+ ssize_t cns = op2->gtIntCon.gtIconVal;
+
+ genComputeReg(op1, RBM_NONE, RegSet::ANY_REG, RegSet::KEEP_REG);
+ switch (cns)
+ {
+ case 1:
+ instGen(INS_lock);
+ instEmit_RM(INS_inc, op1, op1, 0);
+ break;
+ case -1:
+ instGen(INS_lock);
+ instEmit_RM(INS_dec, op1, op1, 0);
+ break;
+ default:
+ assert((int)cns == cns); // By test above for AMD64.
+ instGen(INS_lock);
+ inst_AT_IV(INS_add, EA_4BYTE, op1, (int)cns, 0);
+ break;
+ }
+ genReleaseReg(op1);
+ }
+ else
+ {
+ // non constant addend means it needs to go into a register.
+ ins = INS_add;
+ goto LockBinOpCommon;
+ }
+
+ genFlagsEqualToNone(); // We didn't compute a result into a register.
+ genUpdateLife(tree); // We didn't compute an operand into anything.
+ return;
+
+ case GT_XADD:
+ ins = INS_xadd;
+ goto LockBinOpCommon;
+ case GT_XCHG:
+ ins = INS_xchg;
+ goto LockBinOpCommon;
+ LockBinOpCommon:
+ {
+ // Compute the second operand into a register. xadd and xchg are r/m32, r32. So even if op2
+ // is a constant, it needs to be in a register. This should be the output register if
+ // possible.
+ //
+ // For reference, gtOp1 is the location. gtOp2 is the addend or the value.
+
+ GenTreePtr location = op1;
+ GenTreePtr value = op2;
+
+ // Again, a friendly reminder. IL calling convention is left to right.
+ if (tree->gtFlags & GTF_REVERSE_OPS)
+ {
+ // The atomic operations destroy this argument, so force it into a scratch register
+ reg = regSet.rsPickFreeReg();
+ genComputeReg(value, genRegMask(reg), RegSet::EXACT_REG, RegSet::KEEP_REG);
+
+ // Must evaluate location into a register
+ genCodeForTree(location, needReg, RBM_NONE);
+ assert(location->gtFlags & GTF_REG_VAL);
+ regSet.rsMarkRegUsed(location);
+ regSet.rsLockUsedReg(genRegMask(location->gtRegNum));
+ genRecoverReg(value, RBM_NONE, RegSet::KEEP_REG);
+ regSet.rsUnlockUsedReg(genRegMask(location->gtRegNum));
+
+ if (ins != INS_xchg)
+ {
+ // xchg implies the lock prefix, but xadd and add require it.
+ instGen(INS_lock);
+ }
+ instEmit_RM_RV(ins, EA_4BYTE, location, reg, 0);
+ genReleaseReg(value);
+ regTracker.rsTrackRegTrash(reg);
+ genReleaseReg(location);
+ }
+ else
+ {
+ regMaskTP addrReg;
+ if (genMakeIndAddrMode(location, tree, false, /* not for LEA */
+ needReg, RegSet::KEEP_REG, &addrReg))
+ {
+ genUpdateLife(location);
+
+ reg = regSet.rsPickFreeReg();
+ genComputeReg(value, genRegMask(reg), RegSet::EXACT_REG, RegSet::KEEP_REG);
+ addrReg = genKeepAddressable(location, addrReg, genRegMask(reg));
+
+ if (ins != INS_xchg)
+ {
+ // xchg implies the lock prefix, but xadd and add require it.
+ instGen(INS_lock);
+ }
+
+ // instEmit_RM_RV(ins, EA_4BYTE, location, reg, 0);
+ // inst_TT_RV(ins, location, reg);
+ sched_AM(ins, EA_4BYTE, reg, false, location, 0);
+
+ genReleaseReg(value);
+ regTracker.rsTrackRegTrash(reg);
+ genDoneAddressable(location, addrReg, RegSet::KEEP_REG);
+ }
+ else
+ {
+ // Must evalute location into a register.
+ genCodeForTree(location, needReg, RBM_NONE);
+ assert(location->gtFlags && GTF_REG_VAL);
+ regSet.rsMarkRegUsed(location);
+
+ // xadd destroys this argument, so force it into a scratch register
+ reg = regSet.rsPickFreeReg();
+ genComputeReg(value, genRegMask(reg), RegSet::EXACT_REG, RegSet::KEEP_REG);
+ regSet.rsLockUsedReg(genRegMask(value->gtRegNum));
+ genRecoverReg(location, RBM_NONE, RegSet::KEEP_REG);
+ regSet.rsUnlockUsedReg(genRegMask(value->gtRegNum));
+
+ if (ins != INS_xchg)
+ {
+ // xchg implies the lock prefix, but xadd and add require it.
+ instGen(INS_lock);
+ }
+
+ instEmit_RM_RV(ins, EA_4BYTE, location, reg, 0);
+
+ genReleaseReg(value);
+ regTracker.rsTrackRegTrash(reg);
+ genReleaseReg(location);
+ }
+ }
+
+ // The flags are equal to the target of the tree (i.e. the result of the add), not to the
+ // result in the register. If tree is actually GT_IND->GT_ADDR->GT_LCL_VAR, we could use
+ // that information to set the flags. Doesn't seem like there is a good reason for that.
+ // Therefore, trash the flags.
+ genFlagsEqualToNone();
+
+ if (ins == INS_add)
+ {
+ // If the operator was add, then we were called from the GT_LOCKADD
+ // case. In that case we don't use the result, so we don't need to
+ // update anything.
+ genUpdateLife(tree);
+ }
+ else
+ {
+ genCodeForTree_DONE(tree, reg);
+ }
+ }
+ return;
+
+#else // !_TARGET_XARCH_
+
+ case GT_LOCKADD:
+ case GT_XADD:
+ case GT_XCHG:
+
+ NYI_ARM("LOCK instructions");
+#endif
+
+ case GT_ARR_LENGTH:
+ {
+ // Make the corresponding ind(a + c) node, and do codegen for that.
+ GenTreePtr addr = compiler->gtNewOperNode(GT_ADD, TYP_BYREF, tree->gtArrLen.ArrRef(),
+ compiler->gtNewIconNode(tree->AsArrLen()->ArrLenOffset()));
+ tree->SetOper(GT_IND);
+ tree->gtFlags |= GTF_IND_ARR_LEN; // Record that this node represents an array length expression.
+ assert(tree->TypeGet() == TYP_INT);
+ tree->gtOp.gtOp1 = addr;
+ genCodeForTree(tree, destReg, bestReg);
+ return;
+ }
+
+ case GT_OBJ:
+ // All GT_OBJ nodes must have been morphed prior to this.
+ noway_assert(!"Should not see a GT_OBJ node during CodeGen.");
+
+ default:
+#ifdef DEBUG
+ compiler->gtDispTree(tree);
+#endif
+ noway_assert(!"unexpected unary/binary operator");
+ } // end switch (oper)
+
+ unreached();
+}
+#ifdef _PREFAST_
+#pragma warning(pop) // End suppress PREFast warning about overly large function
+#endif
+
+regNumber CodeGen::genIntegerCast(GenTree* tree, regMaskTP needReg, regMaskTP bestReg)
+{
+ instruction ins;
+ emitAttr size;
+ bool unsv;
+ bool andv = false;
+ regNumber reg;
+ GenTreePtr op1 = tree->gtOp.gtOp1->gtEffectiveVal();
+ var_types dstType = tree->CastToType();
+ var_types srcType = op1->TypeGet();
+
+ if (genTypeSize(srcType) < genTypeSize(dstType))
+ {
+ // Widening cast
+
+ /* we need the source size */
+
+ size = EA_ATTR(genTypeSize(srcType));
+
+ noway_assert(size < EA_PTRSIZE);
+
+ unsv = varTypeIsUnsigned(srcType);
+ ins = ins_Move_Extend(srcType, op1->InReg());
+
+ /*
+ Special case: for a cast of byte to char we first
+ have to expand the byte (w/ sign extension), then
+ mask off the high bits.
+ Use 'movsx' followed by 'and'
+ */
+ if (!unsv && varTypeIsUnsigned(dstType) && genTypeSize(dstType) < EA_4BYTE)
+ {
+ noway_assert(genTypeSize(dstType) == EA_2BYTE && size == EA_1BYTE);
+ andv = true;
+ }
+ }
+ else
+ {
+ // Narrowing cast, or sign-changing cast
+
+ noway_assert(genTypeSize(srcType) >= genTypeSize(dstType));
+
+ size = EA_ATTR(genTypeSize(dstType));
+
+ unsv = varTypeIsUnsigned(dstType);
+ ins = ins_Move_Extend(dstType, op1->InReg());
+ }
+
+ noway_assert(size < EA_PTRSIZE);
+
+ // Set bestReg to the same register a op1 if op1 is a regVar and is available
+ if (op1->InReg())
+ {
+ regMaskTP op1RegMask = genRegMask(op1->gtRegNum);
+ if ((((op1RegMask & bestReg) != 0) || (bestReg == 0)) && ((op1RegMask & regSet.rsRegMaskFree()) != 0))
+ {
+ bestReg = op1RegMask;
+ }
+ }
+
+ /* Is the value sitting in a non-byte-addressable register? */
+
+ if (op1->InReg() && (size == EA_1BYTE) && !isByteReg(op1->gtRegNum))
+ {
+ if (unsv)
+ {
+ // for unsigned values we can AND, so it needs not be a byte register
+
+ reg = regSet.rsPickReg(needReg, bestReg);
+
+ ins = INS_AND;
+ }
+ else
+ {
+ /* Move the value into a byte register */
+
+ reg = regSet.rsGrabReg(RBM_BYTE_REGS);
+ }
+
+ if (reg != op1->gtRegNum)
+ {
+ /* Move the value into that register */
+
+ regTracker.rsTrackRegCopy(reg, op1->gtRegNum);
+ inst_RV_RV(INS_mov, reg, op1->gtRegNum, srcType);
+
+ /* The value has a new home now */
+
+ op1->gtRegNum = reg;
+ }
+ }
+ else
+ {
+ /* Pick a register for the value (general case) */
+
+ reg = regSet.rsPickReg(needReg, bestReg);
+
+ // if we (might) need to set the flags and the value is in the same register
+ // and we have an unsigned value then use AND instead of MOVZX
+ if (tree->gtSetFlags() && unsv && op1->InReg() && (op1->gtRegNum == reg))
+ {
+#ifdef _TARGET_X86_
+ noway_assert(ins == INS_movzx);
+#endif
+ ins = INS_AND;
+ }
+ }
+
+ if (ins == INS_AND)
+ {
+ noway_assert(andv == false && unsv);
+
+ /* Generate "and reg, MASK */
+
+ insFlags flags = tree->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
+ inst_RV_IV(INS_AND, reg, (size == EA_1BYTE) ? 0xFF : 0xFFFF, EA_4BYTE, flags);
+
+ if (tree->gtSetFlags())
+ genFlagsEqualToReg(tree, reg);
+ }
+ else
+ {
+#ifdef _TARGET_XARCH_
+ noway_assert(ins == INS_movsx || ins == INS_movzx);
+#endif
+
+ /* Generate "movsx/movzx reg, [addr]" */
+
+ inst_RV_ST(ins, size, reg, op1);
+
+ /* Mask off high bits for cast from byte to char */
+
+ if (andv)
+ {
+#ifdef _TARGET_XARCH_
+ noway_assert(genTypeSize(dstType) == 2 && ins == INS_movsx);
+#endif
+ insFlags flags = tree->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
+ inst_RV_IV(INS_AND, reg, 0xFFFF, EA_4BYTE, flags);
+
+ if (tree->gtSetFlags())
+ genFlagsEqualToReg(tree, reg);
+ }
+ }
+
+ regTracker.rsTrackRegTrash(reg);
+ return reg;
+}
+
+void CodeGen::genCodeForNumericCast(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
+{
+ GenTreePtr op1 = tree->gtOp.gtOp1;
+ var_types dstType = tree->CastToType();
+ var_types baseType = TYP_INT;
+ regNumber reg = DUMMY_INIT(REG_CORRUPT);
+ regMaskTP needReg = destReg;
+ regMaskTP addrReg;
+ emitAttr size;
+ BOOL unsv;
+
+ /*
+ * Constant casts should have been folded earlier
+ * If not finite don't bother
+ * We don't do this optimization for debug code/no optimization
+ */
+
+ noway_assert((op1->gtOper != GT_CNS_INT && op1->gtOper != GT_CNS_LNG && op1->gtOper != GT_CNS_DBL) ||
+ tree->gtOverflow() || (op1->gtOper == GT_CNS_DBL && !_finite(op1->gtDblCon.gtDconVal)) ||
+ !compiler->opts.OptEnabled(CLFLG_CONSTANTFOLD));
+
+ noway_assert(dstType != TYP_VOID);
+
+ /* What type are we casting from? */
+
+ switch (op1->TypeGet())
+ {
+ case TYP_LONG:
+
+ /* Special case: the long is generated via the mod of long
+ with an int. This is really an int and need not be
+ converted to a reg pair. NOTE: the flag only indicates
+ that this is a case to TYP_INT, it hasn't actually
+ verified the second operand of the MOD! */
+
+ if (((op1->gtOper == GT_MOD) || (op1->gtOper == GT_UMOD)) && (op1->gtFlags & GTF_MOD_INT_RESULT))
+ {
+
+ /* Verify that the op2 of the mod node is
+ 1) An integer tree, or
+ 2) A long constant that is small enough to fit in an integer
+ */
+
+ GenTreePtr modop2 = op1->gtOp.gtOp2;
+ if ((genActualType(modop2->gtType) == TYP_INT) ||
+ ((modop2->gtOper == GT_CNS_LNG) && (modop2->gtLngCon.gtLconVal == (int)modop2->gtLngCon.gtLconVal)))
+ {
+ genCodeForTree(op1, destReg, bestReg);
+
+#ifdef _TARGET_64BIT_
+ reg = op1->gtRegNum;
+#else // _TARGET_64BIT_
+ reg = genRegPairLo(op1->gtRegPair);
+#endif //_TARGET_64BIT_
+
+ genCodeForTree_DONE(tree, reg);
+ return;
+ }
+ }
+
+ /* Make the operand addressable. When gtOverflow() is true,
+ hold on to the addrReg as we will need it to access the higher dword */
+
+ op1 = genCodeForCommaTree(op1); // Strip off any commas (necessary, since we seem to generate code for op1
+ // twice!)
+ // See, e.g., the TYP_INT case below...
+
+ addrReg = genMakeAddressable2(op1, 0, tree->gtOverflow() ? RegSet::KEEP_REG : RegSet::FREE_REG, false);
+
+ /* Load the lower half of the value into some register */
+
+ if (op1->gtFlags & GTF_REG_VAL)
+ {
+ /* Can we simply use the low part of the value? */
+ reg = genRegPairLo(op1->gtRegPair);
+
+ if (tree->gtOverflow())
+ goto REG_OK;
+
+ regMaskTP loMask;
+ loMask = genRegMask(reg);
+ if (loMask & regSet.rsRegMaskFree())
+ bestReg = loMask;
+ }
+
+ // for cast overflow we need to preserve addrReg for testing the hiDword
+ // so we lock it to prevent regSet.rsPickReg from picking it.
+ if (tree->gtOverflow())
+ regSet.rsLockUsedReg(addrReg);
+
+ reg = regSet.rsPickReg(needReg, bestReg);
+
+ if (tree->gtOverflow())
+ regSet.rsUnlockUsedReg(addrReg);
+
+ noway_assert(genStillAddressable(op1));
+
+ REG_OK:
+ if (((op1->gtFlags & GTF_REG_VAL) == 0) || (reg != genRegPairLo(op1->gtRegPair)))
+ {
+ /* Generate "mov reg, [addr-mode]" */
+ inst_RV_TT(ins_Load(TYP_INT), reg, op1);
+ }
+
+ /* conv.ovf.i8i4, or conv.ovf.u8u4 */
+
+ if (tree->gtOverflow())
+ {
+ regNumber hiReg = (op1->gtFlags & GTF_REG_VAL) ? genRegPairHi(op1->gtRegPair) : REG_NA;
+
+ emitJumpKind jmpNotEqual = genJumpKindForOper(GT_NE, CK_SIGNED);
+ emitJumpKind jmpLTS = genJumpKindForOper(GT_LT, CK_SIGNED);
+
+ switch (dstType)
+ {
+ case TYP_INT:
+ // conv.ovf.i8.i4
+ /* Generate the following sequence
+
+ test loDWord, loDWord // set flags
+ jl neg
+ pos: test hiDWord, hiDWord // set flags
+ jne ovf
+ jmp done
+ neg: cmp hiDWord, 0xFFFFFFFF
+ jne ovf
+ done:
+
+ */
+
+ instGen_Compare_Reg_To_Zero(EA_4BYTE, reg);
+ if (tree->gtFlags & GTF_UNSIGNED) // conv.ovf.u8.i4 (i4 > 0 and upper bits 0)
+ {
+ genJumpToThrowHlpBlk(jmpLTS, SCK_OVERFLOW);
+ goto UPPER_BITS_ZERO;
+ }
+
+#if CPU_LOAD_STORE_ARCH
+ // This is tricky.
+ // We will generate code like
+ // if (...)
+ // {
+ // ...
+ // }
+ // else
+ // {
+ // ...
+ // }
+ // We load the tree op1 into regs when we generate code for if clause.
+ // When we generate else clause, we see the tree is already loaded into reg, and start use it
+ // directly.
+ // Well, when the code is run, we may execute else clause without going through if clause.
+ //
+ genCodeForTree(op1, 0);
+#endif
+
+ BasicBlock* neg;
+ BasicBlock* done;
+
+ neg = genCreateTempLabel();
+ done = genCreateTempLabel();
+
+ // Is the loDWord positive or negative
+ inst_JMP(jmpLTS, neg);
+
+ // If loDWord is positive, hiDWord should be 0 (sign extended loDWord)
+
+ if (hiReg < REG_STK)
+ {
+ instGen_Compare_Reg_To_Zero(EA_4BYTE, hiReg);
+ }
+ else
+ {
+ inst_TT_IV(INS_cmp, op1, 0x00000000, 4);
+ }
+
+ genJumpToThrowHlpBlk(jmpNotEqual, SCK_OVERFLOW);
+ inst_JMP(EJ_jmp, done);
+
+ // If loDWord is negative, hiDWord should be -1 (sign extended loDWord)
+
+ genDefineTempLabel(neg);
+
+ if (hiReg < REG_STK)
+ {
+ inst_RV_IV(INS_cmp, hiReg, 0xFFFFFFFFL, EA_4BYTE);
+ }
+ else
+ {
+ inst_TT_IV(INS_cmp, op1, 0xFFFFFFFFL, 4);
+ }
+ genJumpToThrowHlpBlk(jmpNotEqual, SCK_OVERFLOW);
+
+ // Done
+
+ genDefineTempLabel(done);
+
+ break;
+
+ case TYP_UINT: // conv.ovf.u8u4
+ UPPER_BITS_ZERO:
+ // Just check that the upper DWord is 0
+
+ if (hiReg < REG_STK)
+ {
+ instGen_Compare_Reg_To_Zero(EA_4BYTE, hiReg); // set flags
+ }
+ else
+ {
+ inst_TT_IV(INS_cmp, op1, 0, 4);
+ }
+
+ genJumpToThrowHlpBlk(jmpNotEqual, SCK_OVERFLOW);
+ break;
+
+ default:
+ noway_assert(!"Unexpected dstType");
+ break;
+ }
+
+ genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
+ }
+
+ regTracker.rsTrackRegTrash(reg);
+ genDoneAddressable(op1, addrReg, RegSet::FREE_REG);
+
+ genCodeForTree_DONE(tree, reg);
+ return;
+
+ case TYP_BOOL:
+ case TYP_BYTE:
+ case TYP_SHORT:
+ case TYP_CHAR:
+ case TYP_UBYTE:
+ break;
+
+ case TYP_UINT:
+ case TYP_INT:
+ break;
+
+#if FEATURE_STACK_FP_X87
+ case TYP_FLOAT:
+ NO_WAY("OPCAST from TYP_FLOAT should have been converted into a helper call");
+ break;
+
+ case TYP_DOUBLE:
+ if (compiler->opts.compCanUseSSE2)
+ {
+ // do the SSE2 based cast inline
+ // getting the fp operand
+
+ regMaskTP addrRegInt = 0;
+ regMaskTP addrRegFlt = 0;
+
+ // make the operand addressable
+ // We don't want to collapse constant doubles into floats, as the SSE2 instruction
+ // operates on doubles. Note that these (casts from constant doubles) usually get
+ // folded, but we don't do it for some cases (infinitys, etc). So essentially this
+ // shouldn't affect performance or size at all. We're fixing this for #336067
+ op1 = genMakeAddressableStackFP(op1, &addrRegInt, &addrRegFlt, false);
+ if (!addrRegFlt && !op1->IsRegVar())
+ {
+ // we have the address
+
+ inst_RV_TT(INS_movsdsse2, REG_XMM0, op1, 0, EA_8BYTE);
+ genDoneAddressableStackFP(op1, addrRegInt, addrRegFlt, RegSet::KEEP_REG);
+ genUpdateLife(op1);
+
+ reg = regSet.rsPickReg(needReg);
+ getEmitter()->emitIns_R_R(INS_cvttsd2si, EA_8BYTE, reg, REG_XMM0);
+
+ regTracker.rsTrackRegTrash(reg);
+ genCodeForTree_DONE(tree, reg);
+ }
+ else
+ {
+ // we will need to use a temp to get it into the xmm reg
+ var_types typeTemp = op1->TypeGet();
+ TempDsc* temp = compiler->tmpGetTemp(typeTemp);
+
+ size = EA_ATTR(genTypeSize(typeTemp));
+
+ if (addrRegFlt)
+ {
+ // On the fp stack; Take reg to top of stack
+
+ FlatFPX87_MoveToTOS(&compCurFPState, op1->gtRegNum);
+ }
+ else
+ {
+ // op1->IsRegVar()
+ // pick a register
+ reg = regSet.PickRegFloat();
+ if (!op1->IsRegVarDeath())
+ {
+ // Load it on the fp stack
+ genLoadStackFP(op1, reg);
+ }
+ else
+ {
+ // if it's dying, genLoadStackFP just renames it and then we move reg to TOS
+ genLoadStackFP(op1, reg);
+ FlatFPX87_MoveToTOS(&compCurFPState, reg);
+ }
+ }
+
+ // pop it off the fp stack
+ compCurFPState.Pop();
+
+ getEmitter()->emitIns_S(INS_fstp, size, temp->tdTempNum(), 0);
+ // pick a reg
+ reg = regSet.rsPickReg(needReg);
+
+ inst_RV_ST(INS_movsdsse2, REG_XMM0, temp, 0, TYP_DOUBLE, EA_8BYTE);
+ getEmitter()->emitIns_R_R(INS_cvttsd2si, EA_8BYTE, reg, REG_XMM0);
+
+ // done..release the temp
+ compiler->tmpRlsTemp(temp);
+
+ // the reg is now trashed
+ regTracker.rsTrackRegTrash(reg);
+ genDoneAddressableStackFP(op1, addrRegInt, addrRegFlt, RegSet::KEEP_REG);
+ genUpdateLife(op1);
+ genCodeForTree_DONE(tree, reg);
+ }
+ }
+#else
+ case TYP_FLOAT:
+ case TYP_DOUBLE:
+ genCodeForTreeFloat(tree, needReg, bestReg);
+#endif // FEATURE_STACK_FP_X87
+ return;
+
+ default:
+ noway_assert(!"unexpected cast type");
+ }
+
+ if (tree->gtOverflow())
+ {
+ /* Compute op1 into a register, and free the register */
+
+ genComputeReg(op1, destReg, RegSet::ANY_REG, RegSet::FREE_REG);
+ reg = op1->gtRegNum;
+
+ /* Do we need to compare the value, or just check masks */
+
+ ssize_t typeMin = DUMMY_INIT(~0), typeMax = DUMMY_INIT(0);
+ ssize_t typeMask;
+
+ switch (dstType)
+ {
+ case TYP_BYTE:
+ typeMask = ssize_t((int)0xFFFFFF80);
+ typeMin = SCHAR_MIN;
+ typeMax = SCHAR_MAX;
+ unsv = (tree->gtFlags & GTF_UNSIGNED);
+ break;
+ case TYP_SHORT:
+ typeMask = ssize_t((int)0xFFFF8000);
+ typeMin = SHRT_MIN;
+ typeMax = SHRT_MAX;
+ unsv = (tree->gtFlags & GTF_UNSIGNED);
+ break;
+ case TYP_INT:
+ typeMask = ssize_t((int)0x80000000L);
+#ifdef _TARGET_64BIT_
+ unsv = (tree->gtFlags & GTF_UNSIGNED);
+ typeMin = INT_MIN;
+ typeMax = INT_MAX;
+#else // _TARGET_64BIT_
+ noway_assert((tree->gtFlags & GTF_UNSIGNED) != 0);
+ unsv = true;
+#endif // _TARGET_64BIT_
+ break;
+ case TYP_UBYTE:
+ unsv = true;
+ typeMask = ssize_t((int)0xFFFFFF00L);
+ break;
+ case TYP_CHAR:
+ unsv = true;
+ typeMask = ssize_t((int)0xFFFF0000L);
+ break;
+ case TYP_UINT:
+ unsv = true;
+#ifdef _TARGET_64BIT_
+ typeMask = 0xFFFFFFFF00000000LL;
+#else // _TARGET_64BIT_
+ typeMask = 0x80000000L;
+ noway_assert((tree->gtFlags & GTF_UNSIGNED) == 0);
+#endif // _TARGET_64BIT_
+ break;
+ default:
+ NO_WAY("Unknown type");
+ return;
+ }
+
+ // If we just have to check a mask.
+ // This must be conv.ovf.u4u1, conv.ovf.u4u2, conv.ovf.u4i4,
+ // or conv.i4u4
+
+ if (unsv)
+ {
+ inst_RV_IV(INS_TEST, reg, typeMask, emitActualTypeSize(baseType));
+ emitJumpKind jmpNotEqual = genJumpKindForOper(GT_NE, CK_SIGNED);
+ genJumpToThrowHlpBlk(jmpNotEqual, SCK_OVERFLOW);
+ }
+ else
+ {
+ // Check the value is in range.
+ // This must be conv.ovf.i4i1, etc.
+
+ // Compare with the MAX
+
+ noway_assert(typeMin != DUMMY_INIT(~0) && typeMax != DUMMY_INIT(0));
+
+ inst_RV_IV(INS_cmp, reg, typeMax, emitActualTypeSize(baseType));
+ emitJumpKind jmpGTS = genJumpKindForOper(GT_GT, CK_SIGNED);
+ genJumpToThrowHlpBlk(jmpGTS, SCK_OVERFLOW);
+
+ // Compare with the MIN
+
+ inst_RV_IV(INS_cmp, reg, typeMin, emitActualTypeSize(baseType));
+ emitJumpKind jmpLTS = genJumpKindForOper(GT_LT, CK_SIGNED);
+ genJumpToThrowHlpBlk(jmpLTS, SCK_OVERFLOW);
+ }
+
+ genCodeForTree_DONE(tree, reg);
+ return;
+ }
+
+ /* Make the operand addressable */
+
+ addrReg = genMakeAddressable(op1, needReg, RegSet::FREE_REG, true);
+
+ reg = genIntegerCast(tree, needReg, bestReg);
+
+ genDoneAddressable(op1, addrReg, RegSet::FREE_REG);
+
+ genCodeForTree_DONE(tree, reg);
+}
+
+/*****************************************************************************
+ *
+ * Generate code for a leaf node of type GT_ADDR
+ */
+
+void CodeGen::genCodeForTreeSmpOp_GT_ADDR(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
+{
+ genTreeOps oper = tree->OperGet();
+ const var_types treeType = tree->TypeGet();
+ GenTreePtr op1;
+ regNumber reg;
+ regMaskTP needReg = destReg;
+ regMaskTP addrReg;
+
+#ifdef DEBUG
+ reg = (regNumber)0xFEEFFAAF; // to detect uninitialized use
+ addrReg = 0xDEADCAFE;
+#endif
+
+ // We should get here for ldloca, ldarga, ldslfda, ldelema,
+ // or ldflda.
+ if (oper == GT_ARR_ELEM)
+ {
+ op1 = tree;
+ }
+ else
+ {
+ op1 = tree->gtOp.gtOp1;
+ }
+
+ // (tree=op1, needReg=0, keepReg=RegSet::FREE_REG, smallOK=true)
+ if (oper == GT_ARR_ELEM)
+ {
+ // To get the address of the array element,
+ // we first call genMakeAddrArrElem to make the element addressable.
+ // (That is, for example, we first emit code to calculate EBX, and EAX.)
+ // And then use lea to obtain the address.
+ // (That is, for example, we then emit
+ // lea EBX, bword ptr [EBX+4*EAX+36]
+ // to obtain the address of the array element.)
+ addrReg = genMakeAddrArrElem(op1, tree, RBM_NONE, RegSet::FREE_REG);
+ }
+ else
+ {
+ addrReg = genMakeAddressable(op1, 0, RegSet::FREE_REG, true);
+ }
+
+ noway_assert(treeType == TYP_BYREF || treeType == TYP_I_IMPL);
+
+ // We want to reuse one of the scratch registers that were used
+ // in forming the address mode as the target register for the lea.
+ // If bestReg is unset or if it is set to one of the registers used to
+ // form the address (i.e. addrReg), we calculate the scratch register
+ // to use as the target register for the LEA
+
+ bestReg = regSet.rsUseIfZero(bestReg, addrReg);
+ bestReg = regSet.rsNarrowHint(bestReg, addrReg);
+
+ /* Even if addrReg is regSet.rsRegMaskCanGrab(), regSet.rsPickReg() won't spill
+ it since keepReg==false.
+ If addrReg can't be grabbed, regSet.rsPickReg() won't touch it anyway.
+ So this is guaranteed not to spill addrReg */
+
+ reg = regSet.rsPickReg(needReg, bestReg);
+
+ // Slight workaround, force the inst routine to think that
+ // value being loaded is an int (since that is what what
+ // LEA will return) otherwise it would try to allocate
+ // two registers for a long etc.
+ noway_assert(treeType == TYP_I_IMPL || treeType == TYP_BYREF);
+ op1->gtType = treeType;
+
+ inst_RV_TT(INS_lea, reg, op1, 0, (treeType == TYP_BYREF) ? EA_BYREF : EA_PTRSIZE);
+
+ // The Lea instruction above better not have tried to put the
+ // 'value' pointed to by 'op1' in a register, LEA will not work.
+ noway_assert(!(op1->gtFlags & GTF_REG_VAL));
+
+ genDoneAddressable(op1, addrReg, RegSet::FREE_REG);
+ // gcInfo.gcMarkRegSetNpt(genRegMask(reg));
+ noway_assert((gcInfo.gcRegGCrefSetCur & genRegMask(reg)) == 0);
+
+ regTracker.rsTrackRegTrash(reg); // reg does have foldable value in it
+ gcInfo.gcMarkRegPtrVal(reg, treeType);
+
+ genCodeForTree_DONE(tree, reg);
+}
+
+#ifdef _TARGET_ARM_
+
+/*****************************************************************************
+ *
+ * Move (load/store) between float ret regs and struct promoted variable.
+ *
+ * varDsc - The struct variable to be loaded from or stored into.
+ * isLoadIntoFlt - Perform a load operation if "true" or store if "false."
+ *
+ */
+void CodeGen::genLdStFltRetRegsPromotedVar(LclVarDsc* varDsc, bool isLoadIntoFlt)
+{
+ regNumber curReg = REG_FLOATRET;
+
+ unsigned lclLast = varDsc->lvFieldLclStart + varDsc->lvFieldCnt - 1;
+ for (unsigned lclNum = varDsc->lvFieldLclStart; lclNum <= lclLast; ++lclNum)
+ {
+ LclVarDsc* varDscFld = &compiler->lvaTable[lclNum];
+
+ // Is the struct field promoted and sitting in a register?
+ if (varDscFld->lvRegister)
+ {
+ // Move from the struct field into curReg if load
+ // else move into struct field from curReg if store
+ regNumber srcReg = (isLoadIntoFlt) ? varDscFld->lvRegNum : curReg;
+ regNumber dstReg = (isLoadIntoFlt) ? curReg : varDscFld->lvRegNum;
+ if (srcReg != dstReg)
+ {
+ inst_RV_RV(ins_Copy(varDscFld->TypeGet()), dstReg, srcReg, varDscFld->TypeGet());
+ regTracker.rsTrackRegCopy(dstReg, srcReg);
+ }
+ }
+ else
+ {
+ // This field is in memory, do a move between the field and float registers.
+ emitAttr size = (varDscFld->TypeGet() == TYP_DOUBLE) ? EA_8BYTE : EA_4BYTE;
+ if (isLoadIntoFlt)
+ {
+ getEmitter()->emitIns_R_S(ins_Load(varDscFld->TypeGet()), size, curReg, lclNum, 0);
+ regTracker.rsTrackRegTrash(curReg);
+ }
+ else
+ {
+ getEmitter()->emitIns_S_R(ins_Store(varDscFld->TypeGet()), size, curReg, lclNum, 0);
+ }
+ }
+
+ // Advance the current reg.
+ curReg = (varDscFld->TypeGet() == TYP_DOUBLE) ? REG_NEXT(REG_NEXT(curReg)) : REG_NEXT(curReg);
+ }
+}
+
+void CodeGen::genLoadIntoFltRetRegs(GenTreePtr tree)
+{
+ assert(tree->TypeGet() == TYP_STRUCT);
+ assert(tree->gtOper == GT_LCL_VAR);
+ LclVarDsc* varDsc = compiler->lvaTable + tree->gtLclVarCommon.gtLclNum;
+ int slots = varDsc->lvSize() / REGSIZE_BYTES;
+ if (varDsc->lvPromoted)
+ {
+ genLdStFltRetRegsPromotedVar(varDsc, true);
+ }
+ else
+ {
+ if (slots <= 2)
+ {
+ // Use the load float/double instruction.
+ inst_RV_TT(ins_Load((slots == 1) ? TYP_FLOAT : TYP_DOUBLE), REG_FLOATRET, tree, 0,
+ (slots == 1) ? EA_4BYTE : EA_8BYTE);
+ }
+ else
+ {
+ // Use the load store multiple instruction.
+ regNumber reg = regSet.rsPickReg(RBM_ALLINT);
+ inst_RV_TT(INS_lea, reg, tree, 0, EA_PTRSIZE);
+ regTracker.rsTrackRegTrash(reg);
+ getEmitter()->emitIns_R_R_I(INS_vldm, EA_4BYTE, REG_FLOATRET, reg, slots * REGSIZE_BYTES);
+ }
+ }
+ genMarkTreeInReg(tree, REG_FLOATRET);
+}
+
+void CodeGen::genStoreFromFltRetRegs(GenTreePtr tree)
+{
+ assert(tree->TypeGet() == TYP_STRUCT);
+ assert(tree->OperGet() == GT_ASG);
+
+ // LHS should be lcl var or fld.
+ GenTreePtr op1 = tree->gtOp.gtOp1;
+
+ // TODO: We had a bug where op1 was a GT_IND, the result of morphing a GT_BOX, and not properly
+ // handling multiple levels of inlined functions that return HFA on the right-hand-side.
+ // So, make the op1 check a noway_assert (that exists in non-debug builds) so we'll fall
+ // back to MinOpts with no inlining, if we don't have what we expect. We don't want to
+ // do the full IsHfa() check in non-debug, since that involves VM calls, so leave that
+ // as a regular assert().
+ noway_assert((op1->gtOper == GT_LCL_VAR) || (op1->gtOper == GT_LCL_FLD));
+ unsigned varNum = op1->gtLclVarCommon.gtLclNum;
+ assert(compiler->IsHfa(compiler->lvaGetStruct(varNum)));
+
+ // The RHS should be a call.
+ GenTreePtr op2 = tree->gtOp.gtOp2;
+ assert(op2->gtOper == GT_CALL);
+
+ // Generate code for call and copy the return registers into the local.
+ regMaskTP retMask = genCodeForCall(op2, true);
+
+ // Ret mask should be contiguously set from s0, up to s3 or starting from d0 upto d3.
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef DEBUG
+ regMaskTP mask = ((retMask >> REG_FLOATRET) + 1);
+ assert((mask & (mask - 1)) == 0);
+ assert(mask <= (1 << MAX_HFA_RET_SLOTS));
+ assert((retMask & (((regMaskTP)RBM_FLOATRET) - 1)) == 0);
+#endif
+
+ int slots = genCountBits(retMask & RBM_ALLFLOAT);
+
+ LclVarDsc* varDsc = &compiler->lvaTable[varNum];
+
+ if (varDsc->lvPromoted)
+ {
+ genLdStFltRetRegsPromotedVar(varDsc, false);
+ }
+ else
+ {
+ if (slots <= 2)
+ {
+ inst_TT_RV(ins_Store((slots == 1) ? TYP_FLOAT : TYP_DOUBLE), op1, REG_FLOATRET, 0,
+ (slots == 1) ? EA_4BYTE : EA_8BYTE);
+ }
+ else
+ {
+ regNumber reg = regSet.rsPickReg(RBM_ALLINT);
+ inst_RV_TT(INS_lea, reg, op1, 0, EA_PTRSIZE);
+ regTracker.rsTrackRegTrash(reg);
+ getEmitter()->emitIns_R_R_I(INS_vstm, EA_4BYTE, REG_FLOATRET, reg, slots * REGSIZE_BYTES);
+ }
+ }
+}
+
+#endif // _TARGET_ARM_
+
+/*****************************************************************************
+ *
+ * Generate code for a GT_ASG tree
+ */
+
+#ifdef _PREFAST_
+#pragma warning(push)
+#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
+#endif
+void CodeGen::genCodeForTreeSmpOpAsg(GenTreePtr tree)
+{
+ noway_assert(tree->gtOper == GT_ASG);
+
+ GenTreePtr op1 = tree->gtOp.gtOp1;
+ GenTreePtr op2 = tree->gtOp.gtOp2;
+ regMaskTP needReg = RBM_ALLINT;
+ regMaskTP bestReg = RBM_CORRUPT;
+ regMaskTP addrReg = DUMMY_INIT(RBM_CORRUPT);
+ bool ovfl = false; // Do we need an overflow check
+ bool volat = false; // Is this a volatile store
+ regMaskTP regGC;
+ instruction ins;
+#ifdef DEBUGGING_SUPPORT
+ unsigned lclVarNum = compiler->lvaCount;
+ unsigned lclILoffs = DUMMY_INIT(0);
+#endif
+
+#ifdef _TARGET_ARM_
+ if (tree->gtType == TYP_STRUCT)
+ {
+ // We use copy block to assign structs, however to receive HFAs in registers
+ // from a CALL, we use assignment, var = (hfa) call();
+ assert(compiler->IsHfa(tree));
+ genStoreFromFltRetRegs(tree);
+ return;
+ }
+#endif
+
+#ifdef DEBUG
+ if (varTypeIsFloating(op1) != varTypeIsFloating(op2))
+ {
+ if (varTypeIsFloating(op1))
+ assert(!"Bad IL: Illegal assignment of integer into float!");
+ else
+ assert(!"Bad IL: Illegal assignment of float into integer!");
+ }
+#endif
+
+ if ((tree->gtFlags & GTF_REVERSE_OPS) == 0)
+ {
+ op1 = genCodeForCommaTree(op1); // Strip away any comma expressions.
+ }
+
+ /* Is the target a register or local variable? */
+ switch (op1->gtOper)
+ {
+ unsigned varNum;
+ LclVarDsc* varDsc;
+
+ case GT_LCL_VAR:
+ varNum = op1->gtLclVarCommon.gtLclNum;
+ noway_assert(varNum < compiler->lvaCount);
+ varDsc = compiler->lvaTable + varNum;
+
+#ifdef DEBUGGING_SUPPORT
+ /* For non-debuggable code, every definition of a lcl-var has
+ * to be checked to see if we need to open a new scope for it.
+ * Remember the local var info to call siCheckVarScope
+ * AFTER code generation of the assignment.
+ */
+ if (compiler->opts.compScopeInfo && !compiler->opts.compDbgCode && (compiler->info.compVarScopesCount > 0))
+ {
+ lclVarNum = varNum;
+ lclILoffs = op1->gtLclVar.gtLclILoffs;
+ }
+#endif
+
+ /* Check against dead store ? (with min opts we may have dead stores) */
+
+ noway_assert(!varDsc->lvTracked || compiler->opts.MinOpts() || !(op1->gtFlags & GTF_VAR_DEATH));
+
+ /* Does this variable live in a register? */
+
+ if (genMarkLclVar(op1))
+ goto REG_VAR2;
+
+ break;
+
+ REG_VAR2:
+
+ /* Get hold of the target register */
+
+ regNumber op1Reg;
+
+ op1Reg = op1->gtRegVar.gtRegNum;
+
+#ifdef DEBUG
+ /* Compute the RHS (hopefully) into the variable's register.
+ For debuggable code, op1Reg may already be part of regSet.rsMaskVars,
+ as variables are kept alive everywhere. So we have to be
+ careful if we want to compute the value directly into
+ the variable's register. */
+
+ bool needToUpdateRegSetCheckLevel;
+ needToUpdateRegSetCheckLevel = false;
+#endif
+
+ // We should only be accessing lvVarIndex if varDsc is tracked.
+ assert(varDsc->lvTracked);
+
+ if (VarSetOps::IsMember(compiler, genUpdateLiveSetForward(op2), varDsc->lvVarIndex))
+ {
+ noway_assert(compiler->opts.compDbgCode);
+
+ /* The predictor might expect us to generate op2 directly
+ into the var's register. However, since the variable is
+ already alive, first kill it and its register. */
+
+ if (rpCanAsgOperWithoutReg(op2, true))
+ {
+ genUpdateLife(VarSetOps::RemoveElem(compiler, compiler->compCurLife, varDsc->lvVarIndex));
+ needReg = regSet.rsNarrowHint(needReg, genRegMask(op1Reg));
+#ifdef DEBUG
+ needToUpdateRegSetCheckLevel = true;
+#endif
+ }
+ }
+ else
+ {
+ needReg = regSet.rsNarrowHint(needReg, genRegMask(op1Reg));
+ }
+
+#ifdef DEBUG
+
+ /* Special cases: op2 is a GT_CNS_INT */
+
+ if (op2->gtOper == GT_CNS_INT && !(op1->gtFlags & GTF_VAR_DEATH))
+ {
+ /* Save the old life status */
+
+ VarSetOps::Assign(compiler, genTempOldLife, compiler->compCurLife);
+ VarSetOps::AddElemD(compiler, compiler->compCurLife, varDsc->lvVarIndex);
+
+ /* Set a flag to avoid printing the message
+ and remember that life was changed. */
+
+ genTempLiveChg = false;
+ }
+#endif
+
+#ifdef DEBUG
+ if (needToUpdateRegSetCheckLevel)
+ compiler->compRegSetCheckLevel++;
+#endif
+ genCodeForTree(op2, needReg, genRegMask(op1Reg));
+#ifdef DEBUG
+ if (needToUpdateRegSetCheckLevel)
+ compiler->compRegSetCheckLevel--;
+ noway_assert(compiler->compRegSetCheckLevel >= 0);
+#endif
+ noway_assert(op2->gtFlags & GTF_REG_VAL);
+
+ /* Make sure the value ends up in the right place ... */
+
+ if (op2->gtRegNum != op1Reg)
+ {
+ /* Make sure the target of the store is available */
+
+ if (regSet.rsMaskUsed & genRegMask(op1Reg))
+ regSet.rsSpillReg(op1Reg);
+
+#ifdef _TARGET_ARM_
+ if (op1->TypeGet() == TYP_FLOAT)
+ {
+ // This can only occur when we are returning a non-HFA struct
+ // that is composed of a single float field.
+ //
+ inst_RV_RV(INS_vmov_i2f, op1Reg, op2->gtRegNum, op1->TypeGet());
+ }
+ else
+#endif // _TARGET_ARM_
+ {
+ inst_RV_RV(INS_mov, op1Reg, op2->gtRegNum, op1->TypeGet());
+ }
+
+ /* The value has been transferred to 'op1Reg' */
+
+ regTracker.rsTrackRegCopy(op1Reg, op2->gtRegNum);
+
+ if ((genRegMask(op2->gtRegNum) & regSet.rsMaskUsed) == 0)
+ gcInfo.gcMarkRegSetNpt(genRegMask(op2->gtRegNum));
+
+ gcInfo.gcMarkRegPtrVal(op1Reg, tree->TypeGet());
+ }
+ else
+ {
+ // First we need to remove it from the original reg set mask (or else trigger an
+ // assert when we add it to the other reg set mask).
+ gcInfo.gcMarkRegSetNpt(genRegMask(op1Reg));
+ gcInfo.gcMarkRegPtrVal(op1Reg, tree->TypeGet());
+
+ // The emitter has logic that tracks the GCness of registers and asserts if you
+ // try to do bad things to a GC pointer (like lose its GCness).
+
+ // An explict cast of a GC pointer to an int (which is legal if the
+ // pointer is pinned) is encoded as an assignment of a GC source
+ // to a integer variable. Unfortunately if the source was the last
+ // use, and the source register gets reused by the destination, no
+ // code gets emitted (That is where we are at right now). The emitter
+ // thinks the register is a GC pointer (it did not see the cast).
+ // This causes asserts, as well as bad GC info since we will continue
+ // to report the register as a GC pointer even if we do arithmetic
+ // with it. So force the emitter to see the change in the type
+ // of variable by placing a label.
+ // We only have to do this check at this point because in the
+ // CAST morphing, we create a temp and assignment whenever we
+ // have a cast that loses its GCness.
+
+ if (varTypeGCtype(op2->TypeGet()) != varTypeGCtype(op1->TypeGet()))
+ {
+ void* label = getEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
+ gcInfo.gcRegByrefSetCur);
+ }
+ }
+
+ addrReg = 0;
+
+ genCodeForTreeSmpOpAsg_DONE_ASSG(tree, addrReg, op1Reg, ovfl);
+ goto LExit;
+
+ case GT_LCL_FLD:
+
+ // We only use GT_LCL_FLD for lvDoNotEnregister vars, so we don't have
+ // to worry about it being enregistered.
+ noway_assert(compiler->lvaTable[op1->gtLclFld.gtLclNum].lvRegister == 0);
+ break;
+
+ case GT_CLS_VAR:
+
+ __fallthrough;
+
+ case GT_IND:
+ case GT_NULLCHECK:
+
+ assert((op1->OperGet() == GT_CLS_VAR) || (op1->OperGet() == GT_IND));
+
+ if (op1->gtFlags & GTF_IND_VOLATILE)
+ {
+ volat = true;
+ }
+
+ break;
+
+ default:
+ break;
+ }
+
+ /* Is the value being assigned a simple one? */
+
+ noway_assert(op2);
+ switch (op2->gtOper)
+ {
+ case GT_LCL_VAR:
+
+ if (!genMarkLclVar(op2))
+ goto SMALL_ASG;
+
+ __fallthrough;
+
+ case GT_REG_VAR:
+
+ /* Is the target a byte/short/char value? */
+
+ if (varTypeIsSmall(op1->TypeGet()))
+ goto SMALL_ASG;
+
+ if (tree->gtFlags & GTF_REVERSE_OPS)
+ goto SMALL_ASG;
+
+ /* Make the target addressable */
+
+ op1 = genCodeForCommaTree(op1); // Strip away comma expressions.
+
+ addrReg = genMakeAddressable(op1, needReg, RegSet::KEEP_REG, true);
+
+ /* Does the write barrier helper do the assignment? */
+
+ regGC = WriteBarrier(op1, op2, addrReg);
+
+ // Was assignment done by the WriteBarrier
+ if (regGC == RBM_NONE)
+ {
+#ifdef _TARGET_ARM_
+ if (volat)
+ {
+ // Emit a memory barrier instruction before the store
+ instGen_MemoryBarrier();
+ }
+#endif
+
+ /* Move the value into the target */
+
+ inst_TT_RV(ins_Store(op1->TypeGet()), op1, op2->gtRegVar.gtRegNum);
+
+ // This is done in WriteBarrier when (regGC != RBM_NONE)
+
+ /* Free up anything that was tied up by the LHS */
+ genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
+ }
+
+ /* Free up the RHS */
+ genUpdateLife(op2);
+
+ /* Remember that we've also touched the op2 register */
+
+ addrReg |= genRegMask(op2->gtRegVar.gtRegNum);
+ break;
+
+ case GT_CNS_INT:
+
+ ssize_t ival;
+ ival = op2->gtIntCon.gtIconVal;
+ emitAttr size;
+ size = emitTypeSize(tree->TypeGet());
+
+ ins = ins_Store(op1->TypeGet());
+
+ // If we are storing a constant into a local variable
+ // we extend the size of the store here
+ // this normally takes place in CodeGen::inst_TT_IV on x86.
+ //
+ if ((op1->gtOper == GT_LCL_VAR) && (size < EA_4BYTE))
+ {
+ unsigned varNum = op1->gtLclVarCommon.gtLclNum;
+ LclVarDsc* varDsc = compiler->lvaTable + varNum;
+
+ // Fix the immediate by sign extending if needed
+ if (!varTypeIsUnsigned(varDsc->TypeGet()))
+ {
+ if (size == EA_1BYTE)
+ {
+ if ((ival & 0x7f) != ival)
+ ival = ival | 0xffffff00;
+ }
+ else
+ {
+ assert(size == EA_2BYTE);
+ if ((ival & 0x7fff) != ival)
+ ival = ival | 0xffff0000;
+ }
+ }
+
+ // A local stack slot is at least 4 bytes in size, regardless of
+ // what the local var is typed as, so auto-promote it here
+ // unless it is a field of a promoted struct
+ if (!varDsc->lvIsStructField)
+ {
+ size = EA_SET_SIZE(size, EA_4BYTE);
+ ins = ins_Store(TYP_INT);
+ }
+ }
+
+ /* Make the target addressable */
+
+ addrReg = genMakeAddressable(op1, needReg, RegSet::KEEP_REG, true);
+
+#ifdef _TARGET_ARM_
+ if (volat)
+ {
+ // Emit a memory barrier instruction before the store
+ instGen_MemoryBarrier();
+ }
+#endif
+
+ /* Move the value into the target */
+
+ noway_assert(op1->gtOper != GT_REG_VAR);
+ if (compiler->opts.compReloc && op2->IsIconHandle())
+ {
+ /* The constant is actually a handle that may need relocation
+ applied to it. genComputeReg will do the right thing (see
+ code in genCodeForTreeConst), so we'll just call it to load
+ the constant into a register. */
+
+ genComputeReg(op2, needReg & ~addrReg, RegSet::ANY_REG, RegSet::KEEP_REG);
+ addrReg = genKeepAddressable(op1, addrReg, genRegMask(op2->gtRegNum));
+ noway_assert(op2->gtFlags & GTF_REG_VAL);
+ inst_TT_RV(ins, op1, op2->gtRegNum);
+ genReleaseReg(op2);
+ }
+ else
+ {
+ regSet.rsLockUsedReg(addrReg);
+
+#if REDUNDANT_LOAD
+ bool copyIconFromReg = true;
+ regNumber iconReg = REG_NA;
+
+#ifdef _TARGET_ARM_
+ // Only if the constant can't be encoded in a small instruction,
+ // look for another register to copy the value from. (Assumes
+ // target is a small register.)
+ if ((op1->gtFlags & GTF_REG_VAL) && !isRegPairType(tree->gtType) &&
+ arm_Valid_Imm_For_Small_Mov(op1->gtRegNum, ival, INS_FLAGS_DONT_CARE))
+ {
+ copyIconFromReg = false;
+ }
+#endif // _TARGET_ARM_
+
+ if (copyIconFromReg)
+ {
+ iconReg = regTracker.rsIconIsInReg(ival);
+ if (iconReg == REG_NA)
+ copyIconFromReg = false;
+ }
+
+ if (copyIconFromReg && (isByteReg(iconReg) || (genTypeSize(tree->TypeGet()) == EA_PTRSIZE) ||
+ (genTypeSize(tree->TypeGet()) == EA_4BYTE)))
+ {
+ /* Move the value into the target */
+
+ inst_TT_RV(ins, op1, iconReg, 0, size);
+ }
+ else
+#endif // REDUNDANT_LOAD
+ {
+ inst_TT_IV(ins, op1, ival, 0, size);
+ }
+
+ regSet.rsUnlockUsedReg(addrReg);
+ }
+
+ /* Free up anything that was tied up by the LHS */
+
+ genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
+ break;
+
+ default:
+
+ SMALL_ASG:
+
+ bool isWriteBarrier = false;
+ regMaskTP needRegOp1 = RBM_ALLINT;
+ RegSet::ExactReg mustReg = RegSet::ANY_REG; // set to RegSet::EXACT_REG for op1 and NOGC helpers
+
+ /* Is the LHS more complex than the RHS? */
+
+ if (tree->gtFlags & GTF_REVERSE_OPS)
+ {
+ /* Is the target a byte/short/char value? */
+
+ if (varTypeIsSmall(op1->TypeGet()))
+ {
+ noway_assert(op1->gtOper != GT_LCL_VAR || (op1->gtFlags & GTF_VAR_CAST) ||
+ // TODO: Why does this have to be true?
+ compiler->lvaTable[op1->gtLclVarCommon.gtLclNum].lvIsStructField ||
+ compiler->lvaTable[op1->gtLclVarCommon.gtLclNum].lvNormalizeOnLoad());
+
+ if (op2->gtOper == GT_CAST && !op2->gtOverflow())
+ {
+ /* Special case: cast to small type */
+
+ if (op2->CastToType() >= op1->gtType)
+ {
+ /* Make sure the cast operand is not > int */
+
+ if (op2->CastFromType() <= TYP_INT)
+ {
+ /* Cast via a non-smaller type */
+
+ op2 = op2->gtCast.CastOp();
+ }
+ }
+ }
+
+ if (op2->gtOper == GT_AND && op2->gtOp.gtOp2->gtOper == GT_CNS_INT)
+ {
+ unsigned mask;
+ switch (op1->gtType)
+ {
+ case TYP_BYTE:
+ mask = 0x000000FF;
+ break;
+ case TYP_SHORT:
+ mask = 0x0000FFFF;
+ break;
+ case TYP_CHAR:
+ mask = 0x0000FFFF;
+ break;
+ default:
+ goto SIMPLE_SMALL;
+ }
+
+ if (unsigned(op2->gtOp.gtOp2->gtIntCon.gtIconVal) == mask)
+ {
+ /* Redundant AND */
+
+ op2 = op2->gtOp.gtOp1;
+ }
+ }
+
+ /* Must get the new value into a byte register */
+
+ SIMPLE_SMALL:
+ if (varTypeIsByte(op1->TypeGet()))
+ genComputeReg(op2, RBM_BYTE_REGS, RegSet::EXACT_REG, RegSet::KEEP_REG);
+ else
+ goto NOT_SMALL;
+ }
+ else
+ {
+ NOT_SMALL:
+ /* Generate the RHS into a register */
+
+ isWriteBarrier = gcInfo.gcIsWriteBarrierAsgNode(tree);
+ if (isWriteBarrier)
+ {
+#if NOGC_WRITE_BARRIERS
+ // Exclude the REG_WRITE_BARRIER from op2's needReg mask
+ needReg = Target::exclude_WriteBarrierReg(needReg);
+ mustReg = RegSet::EXACT_REG;
+#else // !NOGC_WRITE_BARRIERS
+ // This code should be generic across architectures.
+
+ // For the standard JIT Helper calls
+ // op1 goes into REG_ARG_0 and
+ // op2 goes into REG_ARG_1
+ //
+ needRegOp1 = RBM_ARG_0;
+ needReg = RBM_ARG_1;
+#endif // !NOGC_WRITE_BARRIERS
+ }
+ genComputeReg(op2, needReg, mustReg, RegSet::KEEP_REG);
+ }
+
+ noway_assert(op2->gtFlags & GTF_REG_VAL);
+
+ /* Make the target addressable */
+
+ op1 = genCodeForCommaTree(op1); // Strip off any comma expressions.
+ addrReg = genMakeAddressable(op1, needRegOp1, RegSet::KEEP_REG, true);
+
+ /* Make sure the RHS register hasn't been spilled;
+ keep the register marked as "used", otherwise
+ we might get the pointer lifetimes wrong.
+ */
+
+ if (varTypeIsByte(op1->TypeGet()))
+ needReg = regSet.rsNarrowHint(RBM_BYTE_REGS, needReg);
+
+ genRecoverReg(op2, needReg, RegSet::KEEP_REG);
+ noway_assert(op2->gtFlags & GTF_REG_VAL);
+
+ /* Lock the RHS temporarily (lock only already used) */
+
+ regSet.rsLockUsedReg(genRegMask(op2->gtRegNum));
+
+ /* Make sure the LHS is still addressable */
+
+ addrReg = genKeepAddressable(op1, addrReg);
+
+ /* We can unlock (only already used ) the RHS register */
+
+ regSet.rsUnlockUsedReg(genRegMask(op2->gtRegNum));
+
+ /* Does the write barrier helper do the assignment? */
+
+ regGC = WriteBarrier(op1, op2, addrReg);
+
+ if (regGC != 0)
+ {
+ // Yes, assignment done by the WriteBarrier
+ noway_assert(isWriteBarrier);
+ }
+ else
+ {
+#ifdef _TARGET_ARM_
+ if (volat)
+ {
+ // Emit a memory barrier instruction before the store
+ instGen_MemoryBarrier();
+ }
+#endif
+
+ /* Move the value into the target */
+
+ inst_TT_RV(ins_Store(op1->TypeGet()), op1, op2->gtRegNum);
+ }
+
+#ifdef DEBUG
+ /* Update the current liveness info */
+ if (compiler->opts.varNames)
+ genUpdateLife(tree);
+#endif
+
+ // If op2 register is still in use, free it. (Might not be in use, if
+ // a full-call write barrier was done, and the register was a caller-saved
+ // register.)
+ regMaskTP op2RM = genRegMask(op2->gtRegNum);
+ if (op2RM & regSet.rsMaskUsed)
+ regSet.rsMarkRegFree(genRegMask(op2->gtRegNum));
+
+ // This is done in WriteBarrier when (regGC != 0)
+ if (regGC == 0)
+ {
+ /* Free up anything that was tied up by the LHS */
+ genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
+ }
+ }
+ else
+ {
+ /* Make the target addressable */
+
+ isWriteBarrier = gcInfo.gcIsWriteBarrierAsgNode(tree);
+
+ if (isWriteBarrier)
+ {
+#if NOGC_WRITE_BARRIERS
+ /* Try to avoid RBM_TMP_0 */
+ needRegOp1 = regSet.rsNarrowHint(needRegOp1, ~RBM_TMP_0);
+ mustReg = RegSet::EXACT_REG; // For op2
+#else // !NOGC_WRITE_BARRIERS
+ // This code should be generic across architectures.
+
+ // For the standard JIT Helper calls
+ // op1 goes into REG_ARG_0 and
+ // op2 goes into REG_ARG_1
+ //
+ needRegOp1 = RBM_ARG_0;
+ needReg = RBM_ARG_1;
+ mustReg = RegSet::EXACT_REG; // For op2
+#endif // !NOGC_WRITE_BARRIERS
+ }
+
+ needRegOp1 = regSet.rsNarrowHint(needRegOp1, ~op2->gtRsvdRegs);
+
+ op1 = genCodeForCommaTree(op1); // Strip away any comma expression.
+
+ addrReg = genMakeAddressable(op1, needRegOp1, RegSet::KEEP_REG, true);
+
+#if CPU_HAS_BYTE_REGS
+ /* Is the target a byte value? */
+ if (varTypeIsByte(op1->TypeGet()))
+ {
+ /* Must get the new value into a byte register */
+ needReg = regSet.rsNarrowHint(RBM_BYTE_REGS, needReg);
+ mustReg = RegSet::EXACT_REG;
+
+ if (op2->gtType >= op1->gtType)
+ op2->gtFlags |= GTF_SMALL_OK;
+ }
+#endif
+
+#if NOGC_WRITE_BARRIERS
+ /* For WriteBarrier we can't use REG_WRITE_BARRIER */
+ if (isWriteBarrier)
+ needReg = Target::exclude_WriteBarrierReg(needReg);
+
+ /* Also avoid using the previously computed addrReg(s) */
+ bestReg = regSet.rsNarrowHint(needReg, ~addrReg);
+
+ /* If we have a reg available to grab then use bestReg */
+ if (bestReg & regSet.rsRegMaskCanGrab())
+ needReg = bestReg;
+
+ mustReg = RegSet::EXACT_REG;
+#endif
+
+ /* Generate the RHS into a register */
+ genComputeReg(op2, needReg, mustReg, RegSet::KEEP_REG);
+ noway_assert(op2->gtFlags & GTF_REG_VAL);
+
+ /* Make sure the target is still addressable */
+ addrReg = genKeepAddressable(op1, addrReg, genRegMask(op2->gtRegNum));
+ noway_assert(op2->gtFlags & GTF_REG_VAL);
+
+ /* Does the write barrier helper do the assignment? */
+
+ regGC = WriteBarrier(op1, op2, addrReg);
+
+ if (regGC != 0)
+ {
+ // Yes, assignment done by the WriteBarrier
+ noway_assert(isWriteBarrier);
+ }
+ else
+ {
+ assert(!isWriteBarrier);
+
+#ifdef _TARGET_ARM_
+ if (volat)
+ {
+ // Emit a memory barrier instruction before the store
+ instGen_MemoryBarrier();
+ }
+#endif
+
+ /* Move the value into the target */
+
+ inst_TT_RV(ins_Store(op1->TypeGet()), op1, op2->gtRegNum);
+ }
+
+ /* The new value is no longer needed */
+
+ genReleaseReg(op2);
+
+#ifdef DEBUG
+ /* Update the current liveness info */
+ if (compiler->opts.varNames)
+ genUpdateLife(tree);
+#endif
+
+ // This is done in WriteBarrier when (regGC != 0)
+ if (regGC == 0)
+ {
+ /* Free up anything that was tied up by the LHS */
+ genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
+ }
+ }
+
+ addrReg = RBM_NONE;
+ break;
+ }
+
+ noway_assert(addrReg != DUMMY_INIT(RBM_CORRUPT));
+ genCodeForTreeSmpOpAsg_DONE_ASSG(tree, addrReg, REG_NA, ovfl);
+
+LExit:
+#ifdef DEBUGGING_SUPPORT
+ /* For non-debuggable code, every definition of a lcl-var has
+ * to be checked to see if we need to open a new scope for it.
+ */
+ if (lclVarNum < compiler->lvaCount)
+ siCheckVarScope(lclVarNum, lclILoffs);
+#endif
+}
+#ifdef _PREFAST_
+#pragma warning(pop)
+#endif
+
+/*****************************************************************************
+ *
+ * Generate code to complete the assignment operation
+ */
+
+void CodeGen::genCodeForTreeSmpOpAsg_DONE_ASSG(GenTreePtr tree, regMaskTP addrReg, regNumber reg, bool ovfl)
+{
+ const var_types treeType = tree->TypeGet();
+ GenTreePtr op1 = tree->gtOp.gtOp1;
+ GenTreePtr op2 = tree->gtOp.gtOp2;
+ noway_assert(op2);
+
+ if (op1->gtOper == GT_LCL_VAR || op1->gtOper == GT_REG_VAR)
+ genUpdateLife(op1);
+ genUpdateLife(tree);
+
+#if REDUNDANT_LOAD
+
+ if (op1->gtOper == GT_LCL_VAR)
+ regTracker.rsTrashLcl(op1->gtLclVarCommon.gtLclNum);
+
+ /* Have we just assigned a value that is in a register? */
+
+ if ((op2->gtFlags & GTF_REG_VAL) && tree->gtOper == GT_ASG)
+ {
+ regTracker.rsTrackRegAssign(op1, op2);
+ }
+
+#endif
+
+ noway_assert(addrReg != 0xDEADCAFE);
+
+ gcInfo.gcMarkRegSetNpt(addrReg);
+
+ if (ovfl)
+ {
+ noway_assert(tree->gtOper == GT_ASG_ADD || tree->gtOper == GT_ASG_SUB);
+
+ /* If GTF_REG_VAL is not set, and it is a small type, then
+ we must have loaded it up from memory, done the increment,
+ checked for overflow, and then stored it back to memory */
+
+ bool ovfCheckDone = (genTypeSize(op1->TypeGet()) < sizeof(int)) && !(op1->gtFlags & GTF_REG_VAL);
+
+ if (!ovfCheckDone)
+ {
+ // For small sizes, reg should be set as we sign/zero extend it.
+
+ noway_assert(genIsValidReg(reg) || genTypeSize(treeType) == sizeof(int));
+
+ /* Currently we don't morph x=x+y into x+=y in try blocks
+ * if we need overflow check, as x+y may throw an exception.
+ * We can do it if x is not live on entry to the catch block.
+ */
+ noway_assert(!compiler->compCurBB->hasTryIndex());
+
+ genCheckOverflow(tree);
+ }
+ }
+}
+
+/*****************************************************************************
+ *
+ * Generate code for a special op tree
+ */
+
+void CodeGen::genCodeForTreeSpecialOp(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
+{
+ genTreeOps oper = tree->OperGet();
+ regNumber reg = DUMMY_INIT(REG_CORRUPT);
+ regMaskTP regs = regSet.rsMaskUsed;
+
+ noway_assert((tree->OperKind() & (GTK_CONST | GTK_LEAF | GTK_SMPOP)) == 0);
+
+ switch (oper)
+ {
+ case GT_CALL:
+ regs = genCodeForCall(tree, true);
+
+ /* If the result is in a register, make sure it ends up in the right place */
+
+ if (regs != RBM_NONE)
+ {
+ genMarkTreeInReg(tree, genRegNumFromMask(regs));
+ }
+
+ genUpdateLife(tree);
+ return;
+
+ case GT_FIELD:
+ NO_WAY("should not see this operator in this phase");
+ break;
+
+ case GT_ARR_BOUNDS_CHECK:
+ {
+#ifdef FEATURE_ENABLE_NO_RANGE_CHECKS
+ // MUST NEVER CHECK-IN WITH THIS ENABLED.
+ // This is just for convenience in doing performance investigations and requires x86ret builds
+ if (!JitConfig.JitNoRngChk())
+#endif
+ genRangeCheck(tree);
+ }
+ return;
+
+ case GT_ARR_ELEM:
+ genCodeForTreeSmpOp_GT_ADDR(tree, destReg, bestReg);
+ return;
+
+ case GT_CMPXCHG:
+ {
+#if defined(_TARGET_XARCH_)
+ // cmpxchg does not have an [r/m32], imm32 encoding, so we need a register for the value operand
+
+ // Since this is a "call", evaluate the operands from right to left. Don't worry about spilling
+ // right now, just get the trees evaluated.
+
+ // As a friendly reminder. IL args are evaluated left to right.
+
+ GenTreePtr location = tree->gtCmpXchg.gtOpLocation; // arg1
+ GenTreePtr value = tree->gtCmpXchg.gtOpValue; // arg2
+ GenTreePtr comparand = tree->gtCmpXchg.gtOpComparand; // arg3
+ regMaskTP addrReg;
+
+ bool isAddr = genMakeIndAddrMode(location, tree, false, /* not for LEA */
+ RBM_ALLINT, RegSet::KEEP_REG, &addrReg);
+
+ if (!isAddr)
+ {
+ genCodeForTree(location, RBM_NONE, RBM_NONE);
+ assert(location->gtFlags && GTF_REG_VAL);
+ addrReg = genRegMask(location->gtRegNum);
+ regSet.rsMarkRegUsed(location);
+ }
+
+ // We must have a reg for the Value, but it doesn't really matter which register.
+
+ // Try to avoid EAX and the address regsiter if possible.
+ genComputeReg(value, regSet.rsNarrowHint(RBM_ALLINT, RBM_EAX | addrReg), RegSet::ANY_REG, RegSet::KEEP_REG);
+
+#ifdef DEBUG
+ // cmpxchg uses EAX as an implicit operand to hold the comparand
+ // We're going to destroy EAX in this operation, so we better not be keeping
+ // anything important in it.
+ if (RBM_EAX & regSet.rsMaskVars)
+ {
+ // We have a variable enregistered in EAX. Make sure it goes dead in this tree.
+ for (unsigned varNum = 0; varNum < compiler->lvaCount; ++varNum)
+ {
+ const LclVarDsc& varDesc = compiler->lvaTable[varNum];
+ if (!varDesc.lvIsRegCandidate())
+ continue;
+ if (!varDesc.lvRegister)
+ continue;
+ if (isFloatRegType(varDesc.lvType))
+ continue;
+ if (varDesc.lvRegNum != REG_EAX)
+ continue;
+ // We may need to check lvOtherReg.
+
+ // If the variable isn't going dead during this tree, we've just trashed a local with
+ // cmpxchg.
+ noway_assert(genContainsVarDeath(value->gtNext, comparand->gtNext, varNum));
+
+ break;
+ }
+ }
+#endif
+ genComputeReg(comparand, RBM_EAX, RegSet::EXACT_REG, RegSet::KEEP_REG);
+
+ // By this point we've evaluated everything. However the odds are that we've spilled something by
+ // now. Let's recover all the registers and force them to stay.
+
+ // Well, we just computed comparand, so it's still in EAX.
+ noway_assert(comparand->gtRegNum == REG_EAX);
+ regSet.rsLockUsedReg(RBM_EAX);
+
+ // Stick it anywhere other than EAX.
+ genRecoverReg(value, ~RBM_EAX, RegSet::KEEP_REG);
+ reg = value->gtRegNum;
+ noway_assert(reg != REG_EAX);
+ regSet.rsLockUsedReg(genRegMask(reg));
+
+ if (isAddr)
+ {
+ addrReg = genKeepAddressable(/*location*/ tree, addrReg, 0 /*avoidMask*/);
+ }
+ else
+ {
+ genRecoverReg(location, ~(RBM_EAX | genRegMask(reg)), RegSet::KEEP_REG);
+ }
+
+ regSet.rsUnlockUsedReg(genRegMask(reg));
+ regSet.rsUnlockUsedReg(RBM_EAX);
+
+ instGen(INS_lock);
+ if (isAddr)
+ {
+ sched_AM(INS_cmpxchg, EA_4BYTE, reg, false, location, 0);
+ genDoneAddressable(location, addrReg, RegSet::KEEP_REG);
+ }
+ else
+ {
+ instEmit_RM_RV(INS_cmpxchg, EA_4BYTE, location, reg, 0);
+ genReleaseReg(location);
+ }
+
+ genReleaseReg(value);
+ genReleaseReg(comparand);
+
+ // EAX and the value register are both trashed at this point.
+ regTracker.rsTrackRegTrash(REG_EAX);
+ regTracker.rsTrackRegTrash(reg);
+
+ reg = REG_EAX;
+
+ genFlagsEqualToNone();
+ break;
+#else // not defined(_TARGET_XARCH_)
+ NYI("GT_CMPXCHG codegen");
+ break;
+#endif
+ }
+
+ default:
+#ifdef DEBUG
+ compiler->gtDispTree(tree);
+#endif
+ noway_assert(!"unexpected operator");
+ NO_WAY("unexpected operator");
+ }
+
+ noway_assert(reg != DUMMY_INIT(REG_CORRUPT));
+ genCodeForTree_DONE(tree, reg);
+}
+
+/*****************************************************************************
+ *
+ * Generate code for the given tree. tree->gtRegNum will be set to the
+ * register where the tree lives.
+ *
+ * If 'destReg' is non-zero, we'll do our best to compute the value into a
+ * register that is in that register set.
+ * Use genComputeReg() if you need the tree in a specific register.
+ * Use genCompIntoFreeReg() if the register needs to be written to. Otherwise,
+ * the register can only be used for read, but not for write.
+ * Use genMakeAddressable() if you only need the tree to be accessible
+ * using a complex addressing mode, and do not necessarily need the tree
+ * materialized in a register.
+ *
+ * The GCness of the register will be properly set in gcInfo.gcRegGCrefSetCur/gcInfo.gcRegByrefSetCur.
+ *
+ * The register will not be marked as used. Use regSet.rsMarkRegUsed() if the
+ * register will not be consumed right away and could possibly be spilled.
+ */
+
+void CodeGen::genCodeForTree(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
+{
+#if 0
+ if (compiler->verbose)
+ {
+ printf("Generating code for tree ");
+ Compiler::printTreeID(tree);
+ printf(" destReg = 0x%x bestReg = 0x%x\n", destReg, bestReg);
+ }
+ genStressRegs(tree);
+#endif
+
+ noway_assert(tree);
+ noway_assert(tree->gtOper != GT_STMT);
+ assert(tree->IsNodeProperlySized());
+
+ // When assigning to a enregistered local variable we receive
+ // a hint that we should target the register that is used to
+ // hold the enregistered local variable.
+ // When receiving this hint both destReg and bestReg masks are set
+ // to the register that is used by the enregistered local variable.
+ //
+ // However it is possible to us to have a different local variable
+ // targeting the same register to become alive (and later die)
+ // as we descend the expression tree.
+ //
+ // To handle such cases we will remove any registers that are alive from the
+ // both the destReg and bestReg masks.
+ //
+ regMaskTP liveMask = genLiveMask(tree);
+
+ // This removes any registers used to hold enregistered locals
+ // from the destReg and bestReg masks.
+ // After this either mask could become 0
+ //
+ destReg &= ~liveMask;
+ bestReg &= ~liveMask;
+
+ /* 'destReg' of 0 really means 'any' */
+
+ destReg = regSet.rsUseIfZero(destReg, RBM_ALL(tree->TypeGet()));
+
+ if (destReg != RBM_ALL(tree->TypeGet()))
+ bestReg = regSet.rsUseIfZero(bestReg, destReg);
+
+ // Long, float, and double have their own codegen functions
+ switch (tree->TypeGet())
+ {
+
+ case TYP_LONG:
+#if !CPU_HAS_FP_SUPPORT
+ case TYP_DOUBLE:
+#endif
+ genCodeForTreeLng(tree, destReg, /*avoidReg*/ RBM_NONE);
+ return;
+
+#if CPU_HAS_FP_SUPPORT
+ case TYP_FLOAT:
+ case TYP_DOUBLE:
+
+ // For comma nodes, we'll get back here for the last node in the comma list.
+ if (tree->gtOper != GT_COMMA)
+ {
+ genCodeForTreeFlt(tree, RBM_ALLFLOAT, RBM_ALLFLOAT & (destReg | bestReg));
+ return;
+ }
+ break;
+#endif
+
+#ifdef DEBUG
+ case TYP_UINT:
+ case TYP_ULONG:
+ noway_assert(!"These types are only used as markers in GT_CAST nodes");
+ break;
+#endif
+
+ default:
+ break;
+ }
+
+ /* Is the value already in a register? */
+
+ if (tree->gtFlags & GTF_REG_VAL)
+ {
+ genCodeForTree_REG_VAR1(tree);
+ return;
+ }
+
+ /* We better not have a spilled value here */
+
+ noway_assert((tree->gtFlags & GTF_SPILLED) == 0);
+
+ /* Figure out what kind of a node we have */
+
+ unsigned kind = tree->OperKind();
+
+ if (kind & GTK_CONST)
+ {
+ /* Handle constant nodes */
+
+ genCodeForTreeConst(tree, destReg, bestReg);
+ }
+ else if (kind & GTK_LEAF)
+ {
+ /* Handle leaf nodes */
+
+ genCodeForTreeLeaf(tree, destReg, bestReg);
+ }
+ else if (kind & GTK_SMPOP)
+ {
+ /* Handle 'simple' unary/binary operators */
+
+ genCodeForTreeSmpOp(tree, destReg, bestReg);
+ }
+ else
+ {
+ /* Handle special operators */
+
+ genCodeForTreeSpecialOp(tree, destReg, bestReg);
+ }
+}
+
+/*****************************************************************************
+ *
+ * Generate code for all the basic blocks in the function.
+ */
+
+#ifdef _PREFAST_
+#pragma warning(push)
+#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
+#endif
+void CodeGen::genCodeForBBlist()
+{
+ unsigned varNum;
+ LclVarDsc* varDsc;
+
+ unsigned savedStkLvl;
+
+#ifdef DEBUG
+ genInterruptibleUsed = true;
+ unsigned stmtNum = 0;
+ unsigned totalCostEx = 0;
+ unsigned totalCostSz = 0;
+
+ // You have to be careful if you create basic blocks from now on
+ compiler->fgSafeBasicBlockCreation = false;
+
+ // This stress mode is not comptible with fully interruptible GC
+ if (genInterruptible && compiler->opts.compStackCheckOnCall)
+ {
+ compiler->opts.compStackCheckOnCall = false;
+ }
+
+ // This stress mode is not comptible with fully interruptible GC
+ if (genInterruptible && compiler->opts.compStackCheckOnRet)
+ {
+ compiler->opts.compStackCheckOnRet = false;
+ }
+#endif
+
+ // Prepare the blocks for exception handling codegen: mark the blocks that needs labels.
+ genPrepForEHCodegen();
+
+ assert(!compiler->fgFirstBBScratch ||
+ compiler->fgFirstBB == compiler->fgFirstBBScratch); // compiler->fgFirstBBScratch has to be first.
+
+ /* Initialize the spill tracking logic */
+
+ regSet.rsSpillBeg();
+
+#ifdef DEBUGGING_SUPPORT
+ /* Initialize the line# tracking logic */
+
+ if (compiler->opts.compScopeInfo)
+ {
+ siInit();
+ }
+#endif
+
+#ifdef _TARGET_X86_
+ if (compiler->compTailCallUsed)
+ {
+ noway_assert(isFramePointerUsed());
+ regSet.rsSetRegsModified(RBM_INT_CALLEE_SAVED & ~RBM_FPBASE);
+ }
+#endif
+
+ if (compiler->opts.compDbgEnC)
+ {
+ noway_assert(isFramePointerUsed());
+ regSet.rsSetRegsModified(RBM_INT_CALLEE_SAVED & ~RBM_FPBASE);
+ }
+
+ /* If we have any pinvoke calls, we might potentially trash everything */
+
+ if (compiler->info.compCallUnmanaged)
+ {
+ noway_assert(isFramePointerUsed()); // Setup of Pinvoke frame currently requires an EBP style frame
+ regSet.rsSetRegsModified(RBM_INT_CALLEE_SAVED & ~RBM_FPBASE);
+ }
+
+ /* Initialize the pointer tracking code */
+
+ gcInfo.gcRegPtrSetInit();
+ gcInfo.gcVarPtrSetInit();
+
+ /* If any arguments live in registers, mark those regs as such */
+
+ for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++)
+ {
+ /* Is this variable a parameter assigned to a register? */
+
+ if (!varDsc->lvIsParam || !varDsc->lvRegister)
+ continue;
+
+ /* Is the argument live on entry to the method? */
+
+ if (!VarSetOps::IsMember(compiler, compiler->fgFirstBB->bbLiveIn, varDsc->lvVarIndex))
+ continue;
+
+#if CPU_HAS_FP_SUPPORT
+ /* Is this a floating-point argument? */
+
+ if (varDsc->IsFloatRegType())
+ continue;
+
+ noway_assert(!varTypeIsFloating(varDsc->TypeGet()));
+#endif
+
+ /* Mark the register as holding the variable */
+
+ if (isRegPairType(varDsc->lvType))
+ {
+ regTracker.rsTrackRegLclVarLng(varDsc->lvRegNum, varNum, true);
+
+ if (varDsc->lvOtherReg != REG_STK)
+ regTracker.rsTrackRegLclVarLng(varDsc->lvOtherReg, varNum, false);
+ }
+ else
+ {
+ regTracker.rsTrackRegLclVar(varDsc->lvRegNum, varNum);
+ }
+ }
+
+ unsigned finallyNesting = 0;
+
+ // Make sure a set is allocated for compiler->compCurLife (in the long case), so we can set it to empty without
+ // allocation at the start of each basic block.
+ VarSetOps::AssignNoCopy(compiler, compiler->compCurLife, VarSetOps::MakeEmpty(compiler));
+
+ /*-------------------------------------------------------------------------
+ *
+ * Walk the basic blocks and generate code for each one
+ *
+ */
+
+ BasicBlock* block;
+ BasicBlock* lblk; /* previous block */
+
+ for (lblk = NULL, block = compiler->fgFirstBB; block != NULL; lblk = block, block = block->bbNext)
+ {
+#ifdef DEBUG
+ if (compiler->verbose)
+ {
+ printf("\n=============== Generating ");
+ block->dspBlockHeader(compiler, true, true);
+ compiler->fgDispBBLiveness(block);
+ }
+#endif // DEBUG
+
+ VARSET_TP VARSET_INIT_NOCOPY(liveSet, VarSetOps::UninitVal());
+
+ regMaskTP gcrefRegs = 0;
+ regMaskTP byrefRegs = 0;
+
+ /* Does any other block jump to this point ? */
+
+ if (block->bbFlags & BBF_JMP_TARGET)
+ {
+ /* Someone may jump here, so trash all regs */
+
+ regTracker.rsTrackRegClr();
+
+ genFlagsEqualToNone();
+ }
+ else
+ {
+ /* No jump, but pointers always need to get trashed for proper GC tracking */
+
+ regTracker.rsTrackRegClrPtr();
+ }
+
+ /* No registers are used or locked on entry to a basic block */
+
+ regSet.rsMaskUsed = RBM_NONE;
+ regSet.rsMaskMult = RBM_NONE;
+ regSet.rsMaskLock = RBM_NONE;
+
+ // If we need to reserve registers such that they are not used
+ // by CodeGen in this BasicBlock we do so here.
+ // On the ARM when we have large frame offsets for locals we
+ // will have RBM_R10 in the regSet.rsMaskResvd set,
+ // additionally if a LocAlloc or alloca is used RBM_R9 is in
+ // the regSet.rsMaskResvd set and we lock these registers here.
+ //
+ if (regSet.rsMaskResvd != RBM_NONE)
+ {
+ regSet.rsLockReg(regSet.rsMaskResvd);
+ regSet.rsSetRegsModified(regSet.rsMaskResvd);
+ }
+
+ /* Figure out which registers hold variables on entry to this block */
+
+ regMaskTP specialUseMask = regSet.rsMaskResvd;
+
+ specialUseMask |= doubleAlignOrFramePointerUsed() ? RBM_SPBASE | RBM_FPBASE : RBM_SPBASE;
+ regSet.ClearMaskVars();
+ VarSetOps::ClearD(compiler, compiler->compCurLife);
+ VarSetOps::Assign(compiler, liveSet, block->bbLiveIn);
+
+#if FEATURE_STACK_FP_X87
+ VarSetOps::AssignNoCopy(compiler, genFPregVars,
+ VarSetOps::Intersection(compiler, liveSet, compiler->optAllFPregVars));
+ genFPregCnt = VarSetOps::Count(compiler, genFPregVars);
+ genFPdeadRegCnt = 0;
+#endif
+ gcInfo.gcResetForBB();
+
+ genUpdateLife(liveSet); // This updates regSet.rsMaskVars with bits from any enregistered LclVars
+#if FEATURE_STACK_FP_X87
+ VarSetOps::IntersectionD(compiler, liveSet, compiler->optAllNonFPvars);
+#endif
+
+ // We should never enregister variables in any of the specialUseMask registers
+ noway_assert((specialUseMask & regSet.rsMaskVars) == 0);
+
+ VARSET_ITER_INIT(compiler, iter, liveSet, varIndex);
+ while (iter.NextElem(compiler, &varIndex))
+ {
+ varNum = compiler->lvaTrackedToVarNum[varIndex];
+ varDsc = compiler->lvaTable + varNum;
+ assert(varDsc->lvTracked);
+ /* Ignore the variable if it's not not in a reg */
+
+ if (!varDsc->lvRegister)
+ continue;
+ if (isFloatRegType(varDsc->lvType))
+ continue;
+
+ /* Get hold of the index and the bitmask for the variable */
+ regNumber regNum = varDsc->lvRegNum;
+ regMaskTP regMask = genRegMask(regNum);
+
+ regSet.AddMaskVars(regMask);
+
+ if (varDsc->lvType == TYP_REF)
+ gcrefRegs |= regMask;
+ else if (varDsc->lvType == TYP_BYREF)
+ byrefRegs |= regMask;
+
+ /* Mark the register holding the variable as such */
+
+ if (varTypeIsMultiReg(varDsc))
+ {
+ regTracker.rsTrackRegLclVarLng(regNum, varNum, true);
+ if (varDsc->lvOtherReg != REG_STK)
+ {
+ regTracker.rsTrackRegLclVarLng(varDsc->lvOtherReg, varNum, false);
+ regMask |= genRegMask(varDsc->lvOtherReg);
+ }
+ }
+ else
+ {
+ regTracker.rsTrackRegLclVar(regNum, varNum);
+ }
+ }
+
+ gcInfo.gcPtrArgCnt = 0;
+
+#if FEATURE_STACK_FP_X87
+
+ regSet.rsMaskUsedFloat = regSet.rsMaskRegVarFloat = regSet.rsMaskLockedFloat = RBM_NONE;
+
+ memset(regSet.genUsedRegsFloat, 0, sizeof(regSet.genUsedRegsFloat));
+ memset(regSet.genRegVarsFloat, 0, sizeof(regSet.genRegVarsFloat));
+
+ // Setup fp state on block entry
+ genSetupStateStackFP(block);
+
+#ifdef DEBUG
+ if (compiler->verbose)
+ {
+ JitDumpFPState();
+ }
+#endif // DEBUG
+#endif // FEATURE_STACK_FP_X87
+
+ /* Make sure we keep track of what pointers are live */
+
+ noway_assert((gcrefRegs & byrefRegs) == 0); // Something can't be both a gcref and a byref
+ gcInfo.gcRegGCrefSetCur = gcrefRegs;
+ gcInfo.gcRegByrefSetCur = byrefRegs;
+
+ /* Blocks with handlerGetsXcptnObj()==true use GT_CATCH_ARG to
+ represent the exception object (TYP_REF).
+ We mark REG_EXCEPTION_OBJECT as holding a GC object on entry
+ to the block, it will be the first thing evaluated
+ (thanks to GTF_ORDER_SIDEEFF).
+ */
+
+ if (handlerGetsXcptnObj(block->bbCatchTyp))
+ {
+ GenTreePtr firstStmt = block->FirstNonPhiDef();
+ if (firstStmt != NULL)
+ {
+ GenTreePtr firstTree = firstStmt->gtStmt.gtStmtExpr;
+ if (compiler->gtHasCatchArg(firstTree))
+ {
+ gcInfo.gcRegGCrefSetCur |= RBM_EXCEPTION_OBJECT;
+ }
+ }
+ }
+
+ /* Start a new code output block */
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if FEATURE_EH_FUNCLETS
+#if defined(_TARGET_ARM_)
+ // If this block is the target of a finally return, we need to add a preceding NOP, in the same EH region,
+ // so the unwinder doesn't get confused by our "movw lr, xxx; movt lr, xxx; b Lyyy" calling convention that
+ // calls the funclet during non-exceptional control flow.
+ if (block->bbFlags & BBF_FINALLY_TARGET)
+ {
+ assert(block->bbFlags & BBF_JMP_TARGET);
+
+#ifdef DEBUG
+ if (compiler->verbose)
+ {
+ printf("\nEmitting finally target NOP predecessor for BB%02u\n", block->bbNum);
+ }
+#endif
+ // Create a label that we'll use for computing the start of an EH region, if this block is
+ // at the beginning of such a region. If we used the existing bbEmitCookie as is for
+ // determining the EH regions, then this NOP would end up outside of the region, if this
+ // block starts an EH region. If we pointed the existing bbEmitCookie here, then the NOP
+ // would be executed, which we would prefer not to do.
+
+ block->bbUnwindNopEmitCookie =
+ getEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur);
+
+ instGen(INS_nop);
+ }
+#endif // defined(_TARGET_ARM_)
+
+ genUpdateCurrentFunclet(block);
+#endif // FEATURE_EH_FUNCLETS
+
+#ifdef _TARGET_XARCH_
+ if (genAlignLoops && block->bbFlags & BBF_LOOP_HEAD)
+ {
+ getEmitter()->emitLoopAlign();
+ }
+#endif
+
+#ifdef DEBUG
+ if (compiler->opts.dspCode)
+ printf("\n L_M%03u_BB%02u:\n", Compiler::s_compMethodsCount, block->bbNum);
+#endif
+
+ block->bbEmitCookie = NULL;
+
+ if (block->bbFlags & (BBF_JMP_TARGET | BBF_HAS_LABEL))
+ {
+ /* Mark a label and update the current set of live GC refs */
+
+ block->bbEmitCookie =
+ getEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur,
+#if FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
+ /*isFinally*/ block->bbFlags & BBF_FINALLY_TARGET
+#else
+ FALSE
+#endif
+ );
+ }
+
+ if (block == compiler->fgFirstColdBlock)
+ {
+#ifdef DEBUG
+ if (compiler->verbose)
+ {
+ printf("\nThis is the start of the cold region of the method\n");
+ }
+#endif
+ // We should never have a block that falls through into the Cold section
+ noway_assert(!lblk->bbFallsThrough());
+
+ // We require the block that starts the Cold section to have a label
+ noway_assert(block->bbEmitCookie);
+ getEmitter()->emitSetFirstColdIGCookie(block->bbEmitCookie);
+ }
+
+ /* Both stacks are always empty on entry to a basic block */
+
+ genStackLevel = 0;
+#if FEATURE_STACK_FP_X87
+ genResetFPstkLevel();
+#endif // FEATURE_STACK_FP_X87
+
+#if !FEATURE_FIXED_OUT_ARGS
+ /* Check for inserted throw blocks and adjust genStackLevel */
+
+ if (!isFramePointerUsed() && compiler->fgIsThrowHlpBlk(block))
+ {
+ noway_assert(block->bbFlags & BBF_JMP_TARGET);
+
+ genStackLevel = compiler->fgThrowHlpBlkStkLevel(block) * sizeof(int);
+
+ if (genStackLevel)
+ {
+#ifdef _TARGET_X86_
+ getEmitter()->emitMarkStackLvl(genStackLevel);
+ inst_RV_IV(INS_add, REG_SPBASE, genStackLevel, EA_PTRSIZE);
+ genStackLevel = 0;
+#else // _TARGET_X86_
+ NYI("Need emitMarkStackLvl()");
+#endif // _TARGET_X86_
+ }
+ }
+#endif // !FEATURE_FIXED_OUT_ARGS
+
+ savedStkLvl = genStackLevel;
+
+ /* Tell everyone which basic block we're working on */
+
+ compiler->compCurBB = block;
+
+#ifdef DEBUGGING_SUPPORT
+ siBeginBlock(block);
+
+ // BBF_INTERNAL blocks don't correspond to any single IL instruction.
+ if (compiler->opts.compDbgInfo && (block->bbFlags & BBF_INTERNAL) && block != compiler->fgFirstBB)
+ genIPmappingAdd((IL_OFFSETX)ICorDebugInfo::NO_MAPPING, true);
+
+ bool firstMapping = true;
+#endif // DEBUGGING_SUPPORT
+
+ /*---------------------------------------------------------------------
+ *
+ * Generate code for each statement-tree in the block
+ *
+ */
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if FEATURE_EH_FUNCLETS
+ if (block->bbFlags & BBF_FUNCLET_BEG)
+ {
+ genReserveFuncletProlog(block);
+ }
+#endif // FEATURE_EH_FUNCLETS
+
+ for (GenTreePtr stmt = block->FirstNonPhiDef(); stmt; stmt = stmt->gtNext)
+ {
+ noway_assert(stmt->gtOper == GT_STMT);
+
+#if defined(DEBUGGING_SUPPORT)
+
+ /* Do we have a new IL-offset ? */
+
+ if (stmt->gtStmt.gtStmtILoffsx != BAD_IL_OFFSET)
+ {
+ /* Create and append a new IP-mapping entry */
+ genIPmappingAdd(stmt->gtStmt.gtStmt.gtStmtILoffsx, firstMapping);
+ firstMapping = false;
+ }
+
+#endif // DEBUGGING_SUPPORT
+
+#ifdef DEBUG
+ if (stmt->gtStmt.gtStmtLastILoffs != BAD_IL_OFFSET)
+ {
+ noway_assert(stmt->gtStmt.gtStmtLastILoffs <= compiler->info.compILCodeSize);
+ if (compiler->opts.dspCode && compiler->opts.dspInstrs)
+ {
+ while (genCurDispOffset <= stmt->gtStmt.gtStmtLastILoffs)
+ {
+ genCurDispOffset += dumpSingleInstr(compiler->info.compCode, genCurDispOffset, "> ");
+ }
+ }
+ }
+#endif // DEBUG
+
+ /* Get hold of the statement tree */
+ GenTreePtr tree = stmt->gtStmt.gtStmtExpr;
+
+#ifdef DEBUG
+ stmtNum++;
+ if (compiler->verbose)
+ {
+ printf("\nGenerating BB%02u, stmt %u\t\t", block->bbNum, stmtNum);
+ printf("Holding variables: ");
+ dspRegMask(regSet.rsMaskVars);
+ printf("\n\n");
+ compiler->gtDispTree(compiler->opts.compDbgInfo ? stmt : tree);
+ printf("\n");
+#if FEATURE_STACK_FP_X87
+ JitDumpFPState();
+#endif
+
+ printf("Execution Order:\n");
+ for (GenTreePtr treeNode = stmt->gtStmt.gtStmtList; treeNode != NULL; treeNode = treeNode->gtNext)
+ {
+ compiler->gtDispTree(treeNode, 0, NULL, true);
+ }
+ printf("\n");
+ }
+ totalCostEx += (stmt->gtCostEx * block->getBBWeight(compiler));
+ totalCostSz += stmt->gtCostSz;
+#endif // DEBUG
+
+ compiler->compCurStmt = stmt;
+
+ compiler->compCurLifeTree = NULL;
+ switch (tree->gtOper)
+ {
+ case GT_CALL:
+ // Managed Retval under managed debugger - we need to make sure that the returned ref-type is
+ // reported as alive even though not used within the caller for managed debugger sake. So
+ // consider the return value of the method as used if generating debuggable code.
+ genCodeForCall(tree, compiler->opts.MinOpts() || compiler->opts.compDbgCode);
+ genUpdateLife(tree);
+ gcInfo.gcMarkRegSetNpt(RBM_INTRET);
+ break;
+
+ case GT_IND:
+ case GT_NULLCHECK:
+
+ // Just do the side effects
+ genEvalSideEffects(tree);
+ break;
+
+ default:
+ /* Generate code for the tree */
+
+ genCodeForTree(tree, 0);
+ break;
+ }
+
+ regSet.rsSpillChk();
+
+ /* The value of the tree isn't used, unless it's a return stmt */
+
+ if (tree->gtOper != GT_RETURN)
+ gcInfo.gcMarkRegPtrVal(tree);
+
+#if FEATURE_STACK_FP_X87
+ genEndOfStatement();
+#endif
+
+#ifdef DEBUG
+ /* Make sure we didn't bungle pointer register tracking */
+
+ regMaskTP ptrRegs = (gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur);
+ regMaskTP nonVarPtrRegs = ptrRegs & ~regSet.rsMaskVars;
+
+ // If return is a GC-type, clear it. Note that if a common
+ // epilog is generated (compiler->genReturnBB) it has a void return
+ // even though we might return a ref. We can't use the compRetType
+ // as the determiner because something we are tracking as a byref
+ // might be used as a return value of a int function (which is legal)
+ if (tree->gtOper == GT_RETURN && (varTypeIsGC(compiler->info.compRetType) ||
+ (tree->gtOp.gtOp1 != 0 && varTypeIsGC(tree->gtOp.gtOp1->TypeGet()))))
+ {
+ nonVarPtrRegs &= ~RBM_INTRET;
+ }
+
+ // When profiling, the first statement in a catch block will be the
+ // harmless "inc" instruction (does not interfere with the exception
+ // object).
+
+ if ((compiler->opts.eeFlags & CORJIT_FLG_BBINSTR) && (stmt == block->bbTreeList) &&
+ (block->bbCatchTyp && handlerGetsXcptnObj(block->bbCatchTyp)))
+ {
+ nonVarPtrRegs &= ~RBM_EXCEPTION_OBJECT;
+ }
+
+ if (nonVarPtrRegs)
+ {
+ printf("Regset after tree=");
+ Compiler::printTreeID(tree);
+ printf(" BB%02u gcr=", block->bbNum);
+ printRegMaskInt(gcInfo.gcRegGCrefSetCur & ~regSet.rsMaskVars);
+ compiler->getEmitter()->emitDispRegSet(gcInfo.gcRegGCrefSetCur & ~regSet.rsMaskVars);
+ printf(", byr=");
+ printRegMaskInt(gcInfo.gcRegByrefSetCur & ~regSet.rsMaskVars);
+ compiler->getEmitter()->emitDispRegSet(gcInfo.gcRegByrefSetCur & ~regSet.rsMaskVars);
+ printf(", regVars=");
+ printRegMaskInt(regSet.rsMaskVars);
+ compiler->getEmitter()->emitDispRegSet(regSet.rsMaskVars);
+ printf("\n");
+ }
+
+ noway_assert(nonVarPtrRegs == 0);
+#endif // DEBUG
+
+ noway_assert(stmt->gtOper == GT_STMT);
+
+#ifdef DEBUGGING_SUPPORT
+ genEnsureCodeEmitted(stmt->gtStmt.gtStmtILoffsx);
+#endif
+
+ } //-------- END-FOR each statement-tree of the current block ---------
+
+#ifdef DEBUGGING_SUPPORT
+
+ if (compiler->opts.compScopeInfo && (compiler->info.compVarScopesCount > 0))
+ {
+ siEndBlock(block);
+
+ /* Is this the last block, and are there any open scopes left ? */
+
+ bool isLastBlockProcessed = (block->bbNext == NULL);
+ if (block->isBBCallAlwaysPair())
+ {
+ isLastBlockProcessed = (block->bbNext->bbNext == NULL);
+ }
+
+ if (isLastBlockProcessed && siOpenScopeList.scNext)
+ {
+ /* This assert no longer holds, because we may insert a throw
+ block to demarcate the end of a try or finally region when they
+ are at the end of the method. It would be nice if we could fix
+ our code so that this throw block will no longer be necessary. */
+
+ // noway_assert(block->bbCodeOffsEnd != compiler->info.compILCodeSize);
+
+ siCloseAllOpenScopes();
+ }
+ }
+
+#endif // DEBUGGING_SUPPORT
+
+ genStackLevel -= savedStkLvl;
+
+ gcInfo.gcMarkRegSetNpt(gcrefRegs | byrefRegs);
+
+ if (!VarSetOps::Equal(compiler, compiler->compCurLife, block->bbLiveOut))
+ compiler->genChangeLife(block->bbLiveOut DEBUGARG(NULL));
+
+ /* Both stacks should always be empty on exit from a basic block */
+
+ noway_assert(genStackLevel == 0);
+#if FEATURE_STACK_FP_X87
+ noway_assert(genGetFPstkLevel() == 0);
+
+ // Do the FPState matching that may have to be done
+ genCodeForEndBlockTransitionStackFP(block);
+#endif
+
+ noway_assert(genFullPtrRegMap == false || gcInfo.gcPtrArgCnt == 0);
+
+ /* Do we need to generate a jump or return? */
+
+ switch (block->bbJumpKind)
+ {
+ case BBJ_ALWAYS:
+ inst_JMP(EJ_jmp, block->bbJumpDest);
+ break;
+
+ case BBJ_RETURN:
+ genExitCode(block);
+ break;
+
+ case BBJ_THROW:
+ // If we have a throw at the end of a function or funclet, we need to emit another instruction
+ // afterwards to help the OS unwinder determine the correct context during unwind.
+ // We insert an unexecuted breakpoint instruction in several situations
+ // following a throw instruction:
+ // 1. If the throw is the last instruction of the function or funclet. This helps
+ // the OS unwinder determine the correct context during an unwind from the
+ // thrown exception.
+ // 2. If this is this is the last block of the hot section.
+ // 3. If the subsequent block is a special throw block.
+ if ((block->bbNext == NULL)
+#if FEATURE_EH_FUNCLETS
+ || (block->bbNext->bbFlags & BBF_FUNCLET_BEG)
+#endif // FEATURE_EH_FUNCLETS
+ || (!isFramePointerUsed() && compiler->fgIsThrowHlpBlk(block->bbNext)) ||
+ block->bbNext == compiler->fgFirstColdBlock)
+ {
+ instGen(INS_BREAKPOINT); // This should never get executed
+ }
+
+ break;
+
+ case BBJ_CALLFINALLY:
+
+#if defined(_TARGET_X86_)
+
+ /* If we are about to invoke a finally locally from a try block,
+ we have to set the hidden slot corresponding to the finally's
+ nesting level. When invoked in response to an exception, the
+ EE usually does it.
+
+ We must have : BBJ_CALLFINALLY followed by a BBJ_ALWAYS.
+
+ This code depends on this order not being messed up.
+ We will emit :
+ mov [ebp-(n+1)],0
+ mov [ebp- n ],0xFC
+ push &step
+ jmp finallyBlock
+
+ step: mov [ebp- n ],0
+ jmp leaveTarget
+ leaveTarget:
+ */
+
+ noway_assert(isFramePointerUsed());
+
+ // Get the nesting level which contains the finally
+ compiler->fgGetNestingLevel(block, &finallyNesting);
+
+ // The last slot is reserved for ICodeManager::FixContext(ppEndRegion)
+ unsigned filterEndOffsetSlotOffs;
+ filterEndOffsetSlotOffs =
+ (unsigned)(compiler->lvaLclSize(compiler->lvaShadowSPslotsVar) - (sizeof(void*)));
+
+ unsigned curNestingSlotOffs;
+ curNestingSlotOffs = (unsigned)(filterEndOffsetSlotOffs - ((finallyNesting + 1) * sizeof(void*)));
+
+ // Zero out the slot for the next nesting level
+ instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_PTRSIZE, 0, compiler->lvaShadowSPslotsVar,
+ curNestingSlotOffs - sizeof(void*));
+
+ instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_PTRSIZE, LCL_FINALLY_MARK, compiler->lvaShadowSPslotsVar,
+ curNestingSlotOffs);
+
+ // Now push the address of where the finally funclet should
+ // return to directly.
+ if (!(block->bbFlags & BBF_RETLESS_CALL))
+ {
+ assert(block->isBBCallAlwaysPair());
+ getEmitter()->emitIns_J(INS_push_hide, block->bbNext->bbJumpDest);
+ }
+ else
+ {
+ // EE expects a DWORD, so we give him 0
+ inst_IV(INS_push_hide, 0);
+ }
+
+ // Jump to the finally BB
+ inst_JMP(EJ_jmp, block->bbJumpDest);
+
+#elif defined(_TARGET_ARM_)
+
+ // Now set REG_LR to the address of where the finally funclet should
+ // return to directly.
+
+ BasicBlock* bbFinallyRet;
+ bbFinallyRet = NULL;
+
+ // We don't have retless calls, since we use the BBJ_ALWAYS to point at a NOP pad where
+ // we would have otherwise created retless calls.
+ assert(block->isBBCallAlwaysPair());
+
+ assert(block->bbNext != NULL);
+ assert(block->bbNext->bbJumpKind == BBJ_ALWAYS);
+ assert(block->bbNext->bbJumpDest != NULL);
+ assert(block->bbNext->bbJumpDest->bbFlags & BBF_FINALLY_TARGET);
+
+ bbFinallyRet = block->bbNext->bbJumpDest;
+ bbFinallyRet->bbFlags |= BBF_JMP_TARGET;
+
+#if 0
+ // We don't know the address of finally funclet yet. But adr requires the offset
+ // to finally funclet from current IP is within 4095 bytes. So this code is disabled
+ // for now.
+ getEmitter()->emitIns_J_R (INS_adr,
+ EA_4BYTE,
+ bbFinallyRet,
+ REG_LR);
+#else // 0
+ // Load the address where the finally funclet should return into LR.
+ // The funclet prolog/epilog will do "push {lr}" / "pop {pc}" to do
+ // the return.
+ getEmitter()->emitIns_R_L(INS_movw, EA_4BYTE_DSP_RELOC, bbFinallyRet, REG_LR);
+ getEmitter()->emitIns_R_L(INS_movt, EA_4BYTE_DSP_RELOC, bbFinallyRet, REG_LR);
+ regTracker.rsTrackRegTrash(REG_LR);
+#endif // 0
+
+ // Jump to the finally BB
+ inst_JMP(EJ_jmp, block->bbJumpDest);
+#else
+ NYI("TARGET");
+#endif
+
+ // The BBJ_ALWAYS is used because the BBJ_CALLFINALLY can't point to the
+ // jump target using bbJumpDest - that is already used to point
+ // to the finally block. So just skip past the BBJ_ALWAYS unless the
+ // block is RETLESS.
+ if (!(block->bbFlags & BBF_RETLESS_CALL))
+ {
+ assert(block->isBBCallAlwaysPair());
+
+ lblk = block;
+ block = block->bbNext;
+ }
+ break;
+
+#ifdef _TARGET_ARM_
+
+ case BBJ_EHCATCHRET:
+ // set r0 to the address the VM should return to after the catch
+ getEmitter()->emitIns_R_L(INS_movw, EA_4BYTE_DSP_RELOC, block->bbJumpDest, REG_R0);
+ getEmitter()->emitIns_R_L(INS_movt, EA_4BYTE_DSP_RELOC, block->bbJumpDest, REG_R0);
+ regTracker.rsTrackRegTrash(REG_R0);
+
+ __fallthrough;
+
+ case BBJ_EHFINALLYRET:
+ case BBJ_EHFILTERRET:
+ genReserveFuncletEpilog(block);
+ break;
+
+#else // _TARGET_ARM_
+
+ case BBJ_EHFINALLYRET:
+ case BBJ_EHFILTERRET:
+ case BBJ_EHCATCHRET:
+ break;
+
+#endif // _TARGET_ARM_
+
+ case BBJ_NONE:
+ case BBJ_COND:
+ case BBJ_SWITCH:
+ break;
+
+ default:
+ noway_assert(!"Unexpected bbJumpKind");
+ break;
+ }
+
+#ifdef DEBUG
+ compiler->compCurBB = 0;
+#endif
+
+ } //------------------ END-FOR each block of the method -------------------
+
+ /* Nothing is live at this point */
+ genUpdateLife(VarSetOps::MakeEmpty(compiler));
+
+ /* Finalize the spill tracking logic */
+
+ regSet.rsSpillEnd();
+
+ /* Finalize the temp tracking logic */
+
+ compiler->tmpEnd();
+
+#ifdef DEBUG
+ if (compiler->verbose)
+ {
+ printf("\n# ");
+ printf("totalCostEx = %6d, totalCostSz = %5d ", totalCostEx, totalCostSz);
+ printf("%s\n", compiler->info.compFullName);
+ }
+#endif
+}
+#ifdef _PREFAST_
+#pragma warning(pop)
+#endif
+
+/*****************************************************************************
+ *
+ * Generate code for a long operation.
+ * needReg is a recommendation of which registers to use for the tree.
+ * For partially enregistered longs, the tree will be marked as GTF_REG_VAL
+ * without loading the stack part into a register. Note that only leaf
+ * nodes (or if gtEffectiveVal() == leaf node) may be marked as partially
+ * enregistered so that we can know the memory location of the other half.
+ */
+
+#ifdef _PREFAST_
+#pragma warning(push)
+#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
+#endif
+void CodeGen::genCodeForTreeLng(GenTreePtr tree, regMaskTP needReg, regMaskTP avoidReg)
+{
+ genTreeOps oper;
+ unsigned kind;
+
+ regPairNo regPair = DUMMY_INIT(REG_PAIR_CORRUPT);
+ regMaskTP addrReg;
+ regNumber regLo;
+ regNumber regHi;
+
+ noway_assert(tree);
+ noway_assert(tree->gtOper != GT_STMT);
+ noway_assert(genActualType(tree->gtType) == TYP_LONG);
+
+ /* Figure out what kind of a node we have */
+
+ oper = tree->OperGet();
+ kind = tree->OperKind();
+
+ if (tree->gtFlags & GTF_REG_VAL)
+ {
+ REG_VAR_LONG:
+ regPair = tree->gtRegPair;
+
+ gcInfo.gcMarkRegSetNpt(genRegPairMask(regPair));
+
+ goto DONE;
+ }
+
+ /* Is this a constant node? */
+
+ if (kind & GTK_CONST)
+ {
+ __int64 lval;
+
+ /* Pick a register pair for the value */
+
+ regPair = regSet.rsPickRegPair(needReg);
+
+ /* Load the value into the registers */
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if !CPU_HAS_FP_SUPPORT
+ if (oper == GT_CNS_DBL)
+ {
+ noway_assert(sizeof(__int64) == sizeof(double));
+
+ noway_assert(sizeof(tree->gtLngCon.gtLconVal) == sizeof(tree->gtDblCon.gtDconVal));
+
+ lval = *(__int64*)(&tree->gtDblCon.gtDconVal);
+ }
+ else
+#endif
+ {
+ noway_assert(oper == GT_CNS_LNG);
+
+ lval = tree->gtLngCon.gtLconVal;
+ }
+
+ genSetRegToIcon(genRegPairLo(regPair), int(lval));
+ genSetRegToIcon(genRegPairHi(regPair), int(lval >> 32));
+ goto DONE;
+ }
+
+ /* Is this a leaf node? */
+
+ if (kind & GTK_LEAF)
+ {
+ switch (oper)
+ {
+ case GT_LCL_VAR:
+
+#if REDUNDANT_LOAD
+
+ /* This case has to consider the case in which an int64 LCL_VAR
+ * may both be enregistered and also have a cached copy of itself
+ * in a different set of registers.
+ * We want to return the registers that have the most in common
+ * with the needReg mask
+ */
+
+ /* Does the var have a copy of itself in the cached registers?
+ * And are these cached registers both free?
+ * If so use these registers if they match any needReg.
+ */
+
+ regPair = regTracker.rsLclIsInRegPair(tree->gtLclVarCommon.gtLclNum);
+
+ if ((regPair != REG_PAIR_NONE) && ((regSet.rsRegMaskFree() & needReg) == needReg) &&
+ ((genRegPairMask(regPair) & needReg) != RBM_NONE))
+ {
+ goto DONE;
+ }
+
+ /* Does the variable live in a register?
+ * If so use these registers.
+ */
+ if (genMarkLclVar(tree))
+ goto REG_VAR_LONG;
+
+ /* If tree is not an enregistered variable then
+ * be sure to use any cached register that contain
+ * a copy of this local variable
+ */
+ if (regPair != REG_PAIR_NONE)
+ {
+ goto DONE;
+ }
+#endif
+ goto MEM_LEAF;
+
+ case GT_LCL_FLD:
+
+ // We only use GT_LCL_FLD for lvDoNotEnregister vars, so we don't have
+ // to worry about it being enregistered.
+ noway_assert(compiler->lvaTable[tree->gtLclFld.gtLclNum].lvRegister == 0);
+ goto MEM_LEAF;
+
+ case GT_CLS_VAR:
+ MEM_LEAF:
+
+ /* Pick a register pair for the value */
+
+ regPair = regSet.rsPickRegPair(needReg);
+
+ /* Load the value into the registers */
+
+ instruction loadIns;
+
+ loadIns = ins_Load(TYP_INT); // INS_ldr
+ regLo = genRegPairLo(regPair);
+ regHi = genRegPairHi(regPair);
+
+#if CPU_LOAD_STORE_ARCH
+ {
+ regNumber regAddr = regSet.rsGrabReg(RBM_ALLINT);
+ inst_RV_TT(INS_lea, regAddr, tree, 0);
+ regTracker.rsTrackRegTrash(regAddr);
+
+ if (regLo != regAddr)
+ {
+ // assert(regLo != regAddr); // forced by if statement
+ getEmitter()->emitIns_R_R_I(loadIns, EA_4BYTE, regLo, regAddr, 0);
+ getEmitter()->emitIns_R_R_I(loadIns, EA_4BYTE, regHi, regAddr, 4);
+ }
+ else
+ {
+ // assert(regHi != regAddr); // implied by regpair property and the if statement
+ getEmitter()->emitIns_R_R_I(loadIns, EA_4BYTE, regHi, regAddr, 4);
+ getEmitter()->emitIns_R_R_I(loadIns, EA_4BYTE, regLo, regAddr, 0);
+ }
+ }
+#else
+ inst_RV_TT(loadIns, regLo, tree, 0);
+ inst_RV_TT(loadIns, regHi, tree, 4);
+#endif
+
+#ifdef _TARGET_ARM_
+ if ((oper == GT_CLS_VAR) && (tree->gtFlags & GTF_IND_VOLATILE))
+ {
+ // Emit a memory barrier instruction after the load
+ instGen_MemoryBarrier();
+ }
+#endif
+
+ regTracker.rsTrackRegTrash(regLo);
+ regTracker.rsTrackRegTrash(regHi);
+
+ goto DONE;
+
+ default:
+#ifdef DEBUG
+ compiler->gtDispTree(tree);
+#endif
+ noway_assert(!"unexpected leaf");
+ }
+ }
+
+ /* Is it a 'simple' unary/binary operator? */
+
+ if (kind & GTK_SMPOP)
+ {
+ instruction insLo;
+ instruction insHi;
+ bool doLo;
+ bool doHi;
+ bool setCarry = false;
+ int helper;
+
+ GenTreePtr op1 = tree->gtOp.gtOp1;
+ GenTreePtr op2 = tree->gtGetOp2();
+
+ switch (oper)
+ {
+ case GT_ASG:
+ {
+#ifdef DEBUGGING_SUPPORT
+ unsigned lclVarNum = compiler->lvaCount;
+ unsigned lclVarILoffs = DUMMY_INIT(0);
+#endif
+
+ /* Is the target a local ? */
+
+ if (op1->gtOper == GT_LCL_VAR)
+ {
+ unsigned varNum = op1->gtLclVarCommon.gtLclNum;
+ LclVarDsc* varDsc;
+
+ noway_assert(varNum < compiler->lvaCount);
+ varDsc = compiler->lvaTable + varNum;
+
+ // No dead stores, (with min opts we may have dead stores)
+ noway_assert(!varDsc->lvTracked || compiler->opts.MinOpts() || !(op1->gtFlags & GTF_VAR_DEATH));
+
+#ifdef DEBUGGING_SUPPORT
+ /* For non-debuggable code, every definition of a lcl-var has
+ * to be checked to see if we need to open a new scope for it.
+ * Remember the local var info to call siCheckVarScope
+ * AFTER codegen of the assignment.
+ */
+ if (compiler->opts.compScopeInfo && !compiler->opts.compDbgCode &&
+ (compiler->info.compVarScopesCount > 0))
+ {
+ lclVarNum = varNum;
+ lclVarILoffs = op1->gtLclVar.gtLclILoffs;
+ }
+#endif
+
+ /* Has the variable been assigned to a register (pair) ? */
+
+ if (genMarkLclVar(op1))
+ {
+ noway_assert(op1->gtFlags & GTF_REG_VAL);
+ regPair = op1->gtRegPair;
+ regLo = genRegPairLo(regPair);
+ regHi = genRegPairHi(regPair);
+ noway_assert(regLo != regHi);
+
+ /* Is the value being assigned a constant? */
+
+ if (op2->gtOper == GT_CNS_LNG)
+ {
+ /* Move the value into the target */
+
+ genMakeRegPairAvailable(regPair);
+
+ instruction ins;
+ if (regLo == REG_STK)
+ {
+ ins = ins_Store(TYP_INT);
+ }
+ else
+ {
+ // Always do the stack first (in case it grabs a register it can't
+ // clobber regLo this way)
+ if (regHi == REG_STK)
+ {
+ inst_TT_IV(ins_Store(TYP_INT), op1, (int)(op2->gtLngCon.gtLconVal >> 32), 4);
+ }
+ ins = INS_mov;
+ }
+ inst_TT_IV(ins, op1, (int)(op2->gtLngCon.gtLconVal), 0);
+
+ // The REG_STK case has already been handled
+ if (regHi != REG_STK)
+ {
+ ins = INS_mov;
+ inst_TT_IV(ins, op1, (int)(op2->gtLngCon.gtLconVal >> 32), 4);
+ }
+
+ goto DONE_ASSG_REGS;
+ }
+
+ /* Compute the RHS into desired register pair */
+
+ if (regHi != REG_STK)
+ {
+ genComputeRegPair(op2, regPair, avoidReg, RegSet::KEEP_REG);
+ noway_assert(op2->gtFlags & GTF_REG_VAL);
+ noway_assert(op2->gtRegPair == regPair);
+ }
+ else
+ {
+ regPairNo curPair;
+ regNumber curLo;
+ regNumber curHi;
+
+ genComputeRegPair(op2, REG_PAIR_NONE, avoidReg, RegSet::KEEP_REG);
+
+ noway_assert(op2->gtFlags & GTF_REG_VAL);
+
+ curPair = op2->gtRegPair;
+ curLo = genRegPairLo(curPair);
+ curHi = genRegPairHi(curPair);
+
+ /* move high first, target is on stack */
+ inst_TT_RV(ins_Store(TYP_INT), op1, curHi, 4);
+
+ if (regLo != curLo)
+ {
+ if ((regSet.rsMaskUsed & genRegMask(regLo)) && (regLo != curHi))
+ regSet.rsSpillReg(regLo);
+ inst_RV_RV(INS_mov, regLo, curLo, TYP_LONG);
+ regTracker.rsTrackRegCopy(regLo, curLo);
+ }
+ }
+
+ genReleaseRegPair(op2);
+ goto DONE_ASSG_REGS;
+ }
+ }
+
+ /* Is the value being assigned a constant? */
+
+ if (op2->gtOper == GT_CNS_LNG)
+ {
+ /* Make the target addressable */
+
+ addrReg = genMakeAddressable(op1, needReg, RegSet::KEEP_REG);
+
+ /* Move the value into the target */
+
+ inst_TT_IV(ins_Store(TYP_INT), op1, (int)(op2->gtLngCon.gtLconVal), 0);
+ inst_TT_IV(ins_Store(TYP_INT), op1, (int)(op2->gtLngCon.gtLconVal >> 32), 4);
+
+ genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
+
+ goto LAsgExit;
+ }
+
+#if 0
+ /* Catch a case where can avoid generating op reg, mem. Better pairing
+ * from
+ * mov regHi, mem
+ * op regHi, reg
+ *
+ * To avoid problems with order of evaluation, only do this if op2 is
+ * a non-enregistered local variable
+ */
+
+ if (GenTree::OperIsCommutative(oper) &&
+ op1->gtOper == GT_LCL_VAR &&
+ op2->gtOper == GT_LCL_VAR)
+ {
+ regPair = regTracker.rsLclIsInRegPair(op2->gtLclVarCommon.gtLclNum);
+
+ /* Is op2 a non-enregistered local variable? */
+ if (regPair == REG_PAIR_NONE)
+ {
+ regPair = regTracker.rsLclIsInRegPair(op1->gtLclVarCommon.gtLclNum);
+
+ /* Is op1 an enregistered local variable? */
+ if (regPair != REG_PAIR_NONE)
+ {
+ /* Swap the operands */
+ GenTreePtr op = op1;
+ op1 = op2;
+ op2 = op;
+ }
+ }
+ }
+#endif
+
+ /* Eliminate worthless assignment "lcl = lcl" */
+
+ if (op2->gtOper == GT_LCL_VAR && op1->gtOper == GT_LCL_VAR &&
+ op2->gtLclVarCommon.gtLclNum == op1->gtLclVarCommon.gtLclNum)
+ {
+ genUpdateLife(op2);
+ goto LAsgExit;
+ }
+
+ if (op2->gtOper == GT_CAST && TYP_ULONG == op2->CastToType() && op2->CastFromType() <= TYP_INT &&
+ // op1,op2 need to be materialized in the correct order.
+ (tree->gtFlags & GTF_REVERSE_OPS))
+ {
+ /* Generate the small RHS into a register pair */
+
+ GenTreePtr smallOpr = op2->gtOp.gtOp1;
+
+ genComputeReg(smallOpr, 0, RegSet::ANY_REG, RegSet::KEEP_REG);
+
+ /* Make the target addressable */
+
+ addrReg = genMakeAddressable(op1, 0, RegSet::KEEP_REG, true);
+
+ /* Make sure everything is still addressable */
+
+ genRecoverReg(smallOpr, 0, RegSet::KEEP_REG);
+ noway_assert(smallOpr->gtFlags & GTF_REG_VAL);
+ regHi = smallOpr->gtRegNum;
+ addrReg = genKeepAddressable(op1, addrReg, genRegMask(regHi));
+
+ // conv.ovf.u8 could overflow if the original number was negative
+ if (op2->gtOverflow())
+ {
+ noway_assert((op2->gtFlags & GTF_UNSIGNED) ==
+ 0); // conv.ovf.u8.un should be bashed to conv.u8.un
+ instGen_Compare_Reg_To_Zero(EA_4BYTE, regHi); // set flags
+ emitJumpKind jmpLTS = genJumpKindForOper(GT_LT, CK_SIGNED);
+ genJumpToThrowHlpBlk(jmpLTS, SCK_OVERFLOW);
+ }
+
+ /* Move the value into the target */
+
+ inst_TT_RV(ins_Store(TYP_INT), op1, regHi, 0);
+ inst_TT_IV(ins_Store(TYP_INT), op1, 0, 4); // Store 0 in hi-word
+
+ /* Free up anything that was tied up by either side */
+
+ genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
+ genReleaseReg(smallOpr);
+
+#if REDUNDANT_LOAD
+ if (op1->gtOper == GT_LCL_VAR)
+ {
+ /* clear this local from reg table */
+ regTracker.rsTrashLclLong(op1->gtLclVarCommon.gtLclNum);
+
+ /* mark RHS registers as containing the local var */
+ regTracker.rsTrackRegLclVarLng(regHi, op1->gtLclVarCommon.gtLclNum, true);
+ }
+#endif
+ goto LAsgExit;
+ }
+
+ /* Is the LHS more complex than the RHS? */
+
+ if (tree->gtFlags & GTF_REVERSE_OPS)
+ {
+ /* Generate the RHS into a register pair */
+
+ genComputeRegPair(op2, REG_PAIR_NONE, avoidReg | op1->gtUsedRegs, RegSet::KEEP_REG);
+ noway_assert(op2->gtFlags & GTF_REG_VAL);
+
+ /* Make the target addressable */
+ op1 = genCodeForCommaTree(op1);
+ addrReg = genMakeAddressable(op1, 0, RegSet::KEEP_REG);
+
+ /* Make sure the RHS register hasn't been spilled */
+
+ genRecoverRegPair(op2, REG_PAIR_NONE, RegSet::KEEP_REG);
+ }
+ else
+ {
+ /* Make the target addressable */
+
+ op1 = genCodeForCommaTree(op1);
+ addrReg = genMakeAddressable(op1, RBM_ALLINT & ~op2->gtRsvdRegs, RegSet::KEEP_REG, true);
+
+ /* Generate the RHS into a register pair */
+
+ genComputeRegPair(op2, REG_PAIR_NONE, avoidReg, RegSet::KEEP_REG, false);
+ }
+
+ /* Lock 'op2' and make sure 'op1' is still addressable */
+
+ noway_assert(op2->gtFlags & GTF_REG_VAL);
+ regPair = op2->gtRegPair;
+
+ addrReg = genKeepAddressable(op1, addrReg, genRegPairMask(regPair));
+
+ /* Move the value into the target */
+
+ inst_TT_RV(ins_Store(TYP_INT), op1, genRegPairLo(regPair), 0);
+ inst_TT_RV(ins_Store(TYP_INT), op1, genRegPairHi(regPair), 4);
+
+ /* Free up anything that was tied up by either side */
+
+ genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
+ genReleaseRegPair(op2);
+
+ DONE_ASSG_REGS:
+
+#if REDUNDANT_LOAD
+
+ if (op1->gtOper == GT_LCL_VAR)
+ {
+ /* Clear this local from reg table */
+
+ regTracker.rsTrashLclLong(op1->gtLclVarCommon.gtLclNum);
+
+ if ((op2->gtFlags & GTF_REG_VAL) &&
+ /* constant has precedence over local */
+ // rsRegValues[op2->gtRegNum].rvdKind != RV_INT_CNS &&
+ tree->gtOper == GT_ASG)
+ {
+ regNumber regNo;
+
+ /* mark RHS registers as containing the local var */
+
+ regNo = genRegPairLo(op2->gtRegPair);
+ if (regNo != REG_STK)
+ regTracker.rsTrackRegLclVarLng(regNo, op1->gtLclVarCommon.gtLclNum, true);
+
+ regNo = genRegPairHi(op2->gtRegPair);
+ if (regNo != REG_STK)
+ {
+ /* For partially enregistered longs, we might have
+ stomped on op2's hiReg */
+ if (!(op1->gtFlags & GTF_REG_VAL) || regNo != genRegPairLo(op1->gtRegPair))
+ {
+ regTracker.rsTrackRegLclVarLng(regNo, op1->gtLclVarCommon.gtLclNum, false);
+ }
+ }
+ }
+ }
+#endif
+
+ LAsgExit:
+
+ genUpdateLife(op1);
+ genUpdateLife(tree);
+
+#ifdef DEBUGGING_SUPPORT
+ /* For non-debuggable code, every definition of a lcl-var has
+ * to be checked to see if we need to open a new scope for it.
+ */
+ if (lclVarNum < compiler->lvaCount)
+ siCheckVarScope(lclVarNum, lclVarILoffs);
+#endif
+ }
+ return;
+
+ case GT_SUB:
+ insLo = INS_sub;
+ insHi = INS_SUBC;
+ setCarry = true;
+ goto BINOP_OVF;
+ case GT_ADD:
+ insLo = INS_add;
+ insHi = INS_ADDC;
+ setCarry = true;
+ goto BINOP_OVF;
+
+ bool ovfl;
+
+ BINOP_OVF:
+ ovfl = tree->gtOverflow();
+ goto _BINOP;
+
+ case GT_AND:
+ insLo = insHi = INS_AND;
+ goto BINOP;
+ case GT_OR:
+ insLo = insHi = INS_OR;
+ goto BINOP;
+ case GT_XOR:
+ insLo = insHi = INS_XOR;
+ goto BINOP;
+
+ BINOP:
+ ovfl = false;
+ goto _BINOP;
+
+ _BINOP:
+
+ /* The following makes an assumption about gtSetEvalOrder(this) */
+
+ noway_assert((tree->gtFlags & GTF_REVERSE_OPS) == 0);
+
+ /* Special case: check for "(long(intval) << 32) | longval" */
+
+ if (oper == GT_OR && op1->gtOper == GT_LSH)
+ {
+ GenTreePtr lshLHS = op1->gtOp.gtOp1;
+ GenTreePtr lshRHS = op1->gtOp.gtOp2;
+
+ if (lshLHS->gtOper == GT_CAST && lshRHS->gtOper == GT_CNS_INT && lshRHS->gtIntCon.gtIconVal == 32 &&
+ genTypeSize(TYP_INT) == genTypeSize(lshLHS->CastFromType()))
+ {
+
+ /* Throw away the cast of the shift operand. */
+
+ op1 = lshLHS->gtCast.CastOp();
+
+ /* Special case: check op2 for "ulong(intval)" */
+ if ((op2->gtOper == GT_CAST) && (op2->CastToType() == TYP_ULONG) &&
+ genTypeSize(TYP_INT) == genTypeSize(op2->CastFromType()))
+ {
+ /* Throw away the cast of the second operand. */
+
+ op2 = op2->gtCast.CastOp();
+ goto SIMPLE_OR_LONG;
+ }
+ /* Special case: check op2 for "long(intval) & 0xFFFFFFFF" */
+ else if (op2->gtOper == GT_AND)
+ {
+ GenTreePtr andLHS;
+ andLHS = op2->gtOp.gtOp1;
+ GenTreePtr andRHS;
+ andRHS = op2->gtOp.gtOp2;
+
+ if (andLHS->gtOper == GT_CAST && andRHS->gtOper == GT_CNS_LNG &&
+ andRHS->gtLngCon.gtLconVal == 0x00000000FFFFFFFF &&
+ genTypeSize(TYP_INT) == genTypeSize(andLHS->CastFromType()))
+ {
+ /* Throw away the cast of the second operand. */
+
+ op2 = andLHS->gtCast.CastOp();
+
+ SIMPLE_OR_LONG:
+ // Load the high DWORD, ie. op1
+
+ genCodeForTree(op1, needReg & ~op2->gtRsvdRegs);
+
+ noway_assert(op1->gtFlags & GTF_REG_VAL);
+ regHi = op1->gtRegNum;
+ regSet.rsMarkRegUsed(op1);
+
+ // Load the low DWORD, ie. op2
+
+ genCodeForTree(op2, needReg & ~genRegMask(regHi));
+
+ noway_assert(op2->gtFlags & GTF_REG_VAL);
+ regLo = op2->gtRegNum;
+
+ /* Make sure regHi is still around. Also, force
+ regLo to be excluded in case regLo==regHi */
+
+ genRecoverReg(op1, ~genRegMask(regLo), RegSet::FREE_REG);
+ regHi = op1->gtRegNum;
+
+ regPair = gen2regs2pair(regLo, regHi);
+ goto DONE;
+ }
+ }
+
+ /* Generate the following sequence:
+ Prepare op1 (discarding shift)
+ Compute op2 into some regpair
+ OR regpairhi, op1
+ */
+
+ /* First, make op1 addressable */
+
+ /* tempReg must avoid both needReg, op2->RsvdRegs and regSet.rsMaskResvd.
+
+ It appears incorrect to exclude needReg as we are not ensuring that the reg pair into
+ which the long value is computed is from needReg. But at this point the safest fix is
+ to exclude regSet.rsMaskResvd.
+
+ Note that needReg could be the set of free registers (excluding reserved ones). If we don't
+ exclude regSet.rsMaskResvd, the expression below will have the effect of trying to choose a
+ reg from
+ reserved set which is bound to fail. To prevent that we avoid regSet.rsMaskResvd.
+ */
+ regMaskTP tempReg = RBM_ALLINT & ~needReg & ~op2->gtRsvdRegs & ~avoidReg & ~regSet.rsMaskResvd;
+
+ addrReg = genMakeAddressable(op1, tempReg, RegSet::KEEP_REG);
+
+ genCompIntoFreeRegPair(op2, avoidReg, RegSet::KEEP_REG);
+
+ noway_assert(op2->gtFlags & GTF_REG_VAL);
+ regPair = op2->gtRegPair;
+ regHi = genRegPairHi(regPair);
+
+ /* The operand might have interfered with the address */
+
+ addrReg = genKeepAddressable(op1, addrReg, genRegPairMask(regPair));
+
+ /* Now compute the result */
+
+ inst_RV_TT(insHi, regHi, op1, 0);
+
+ regTracker.rsTrackRegTrash(regHi);
+
+ /* Free up anything that was tied up by the LHS */
+
+ genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
+
+ /* The result is where the second operand is sitting */
+
+ genRecoverRegPair(op2, REG_PAIR_NONE, RegSet::FREE_REG);
+
+ regPair = op2->gtRegPair;
+ goto DONE;
+ }
+ }
+
+ /* Special case: check for "longval | (long(intval) << 32)" */
+
+ if (oper == GT_OR && op2->gtOper == GT_LSH)
+ {
+ GenTreePtr lshLHS = op2->gtOp.gtOp1;
+ GenTreePtr lshRHS = op2->gtOp.gtOp2;
+
+ if (lshLHS->gtOper == GT_CAST && lshRHS->gtOper == GT_CNS_INT && lshRHS->gtIntCon.gtIconVal == 32 &&
+ genTypeSize(TYP_INT) == genTypeSize(lshLHS->CastFromType()))
+
+ {
+ /* We throw away the cast of the shift operand. */
+
+ op2 = lshLHS->gtCast.CastOp();
+
+ /* Special case: check op1 for "long(intval) & 0xFFFFFFFF" */
+
+ if (op1->gtOper == GT_AND)
+ {
+ GenTreePtr andLHS = op1->gtOp.gtOp1;
+ GenTreePtr andRHS = op1->gtOp.gtOp2;
+
+ if (andLHS->gtOper == GT_CAST && andRHS->gtOper == GT_CNS_LNG &&
+ andRHS->gtLngCon.gtLconVal == 0x00000000FFFFFFFF &&
+ genTypeSize(TYP_INT) == genTypeSize(andLHS->CastFromType()))
+ {
+ /* Throw away the cast of the first operand. */
+
+ op1 = andLHS->gtCast.CastOp();
+
+ // Load the low DWORD, ie. op1
+
+ genCodeForTree(op1, needReg & ~op2->gtRsvdRegs);
+
+ noway_assert(op1->gtFlags & GTF_REG_VAL);
+ regLo = op1->gtRegNum;
+ regSet.rsMarkRegUsed(op1);
+
+ // Load the high DWORD, ie. op2
+
+ genCodeForTree(op2, needReg & ~genRegMask(regLo));
+
+ noway_assert(op2->gtFlags & GTF_REG_VAL);
+ regHi = op2->gtRegNum;
+
+ /* Make sure regLo is still around. Also, force
+ regHi to be excluded in case regLo==regHi */
+
+ genRecoverReg(op1, ~genRegMask(regHi), RegSet::FREE_REG);
+ regLo = op1->gtRegNum;
+
+ regPair = gen2regs2pair(regLo, regHi);
+ goto DONE;
+ }
+ }
+
+ /* Generate the following sequence:
+ Compute op1 into some regpair
+ Make op2 (ignoring shift) addressable
+ OR regPairHi, op2
+ */
+
+ // First, generate the first operand into some register
+
+ genCompIntoFreeRegPair(op1, avoidReg | op2->gtRsvdRegs, RegSet::KEEP_REG);
+ noway_assert(op1->gtFlags & GTF_REG_VAL);
+
+ /* Make the second operand addressable */
+
+ addrReg = genMakeAddressable(op2, needReg, RegSet::KEEP_REG);
+
+ /* Make sure the result is in a free register pair */
+
+ genRecoverRegPair(op1, REG_PAIR_NONE, RegSet::KEEP_REG);
+ regPair = op1->gtRegPair;
+ regHi = genRegPairHi(regPair);
+
+ /* The operand might have interfered with the address */
+
+ addrReg = genKeepAddressable(op2, addrReg, genRegPairMask(regPair));
+
+ /* Compute the new value */
+
+ inst_RV_TT(insHi, regHi, op2, 0);
+
+ /* The value in the high register has been trashed */
+
+ regTracker.rsTrackRegTrash(regHi);
+
+ goto DONE_OR;
+ }
+ }
+
+ /* Generate the first operand into registers */
+
+ if ((genCountBits(needReg) == 2) && ((regSet.rsRegMaskFree() & needReg) == needReg) &&
+ ((op2->gtRsvdRegs & needReg) == RBM_NONE) && (!(tree->gtFlags & GTF_ASG)))
+ {
+ regPair = regSet.rsPickRegPair(needReg);
+ genComputeRegPair(op1, regPair, avoidReg | op2->gtRsvdRegs, RegSet::KEEP_REG);
+ }
+ else
+ {
+ genCompIntoFreeRegPair(op1, avoidReg | op2->gtRsvdRegs, RegSet::KEEP_REG);
+ }
+ noway_assert(op1->gtFlags & GTF_REG_VAL);
+ regMaskTP op1Mask;
+ regPair = op1->gtRegPair;
+ op1Mask = genRegPairMask(regPair);
+
+ /* Make the second operand addressable */
+ regMaskTP needReg2;
+ needReg2 = regSet.rsNarrowHint(needReg, ~op1Mask);
+ addrReg = genMakeAddressable(op2, needReg2, RegSet::KEEP_REG);
+
+ // TODO: If 'op1' got spilled and 'op2' happens to be
+ // TODO: in a register, and we have add/mul/and/or/xor,
+ // TODO: reverse the operands since we can perform the
+ // TODO: operation directly with the spill temp, e.g.
+ // TODO: 'add regHi, [temp]'.
+
+ /* Make sure the result is in a free register pair */
+
+ genRecoverRegPair(op1, REG_PAIR_NONE, RegSet::KEEP_REG);
+ regPair = op1->gtRegPair;
+ op1Mask = genRegPairMask(regPair);
+
+ regLo = genRegPairLo(regPair);
+ regHi = genRegPairHi(regPair);
+
+ /* Make sure that we don't spill regLo/regHi below */
+ regSet.rsLockUsedReg(op1Mask);
+
+ /* The operand might have interfered with the address */
+
+ addrReg = genKeepAddressable(op2, addrReg);
+
+ /* The value in the register pair is about to be trashed */
+
+ regTracker.rsTrackRegTrash(regLo);
+ regTracker.rsTrackRegTrash(regHi);
+
+ /* Compute the new value */
+
+ doLo = true;
+ doHi = true;
+
+ if (op2->gtOper == GT_CNS_LNG)
+ {
+ __int64 icon = op2->gtLngCon.gtLconVal;
+
+ /* Check for "(op1 AND -1)" and "(op1 [X]OR 0)" */
+
+ switch (oper)
+ {
+ case GT_AND:
+ if ((int)(icon) == -1)
+ doLo = false;
+ if ((int)(icon >> 32) == -1)
+ doHi = false;
+
+ if (!(icon & I64(0x00000000FFFFFFFF)))
+ {
+ genSetRegToIcon(regLo, 0);
+ doLo = false;
+ }
+
+ if (!(icon & I64(0xFFFFFFFF00000000)))
+ {
+ /* Just to always set low first*/
+
+ if (doLo)
+ {
+ inst_RV_TT(insLo, regLo, op2, 0);
+ doLo = false;
+ }
+ genSetRegToIcon(regHi, 0);
+ doHi = false;
+ }
+
+ break;
+
+ case GT_OR:
+ case GT_XOR:
+ if (!(icon & I64(0x00000000FFFFFFFF)))
+ doLo = false;
+ if (!(icon & I64(0xFFFFFFFF00000000)))
+ doHi = false;
+ break;
+ default:
+ break;
+ }
+ }
+
+ // Fix 383813 X86/ARM ILGEN
+ // Fix 383793 ARM ILGEN
+ // Fix 383911 ARM ILGEN
+ regMaskTP newMask;
+ newMask = addrReg & ~op1Mask;
+ regSet.rsLockUsedReg(newMask);
+
+ if (doLo)
+ {
+ insFlags flagsLo = setCarry ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
+ inst_RV_TT(insLo, regLo, op2, 0, EA_4BYTE, flagsLo);
+ }
+ if (doHi)
+ {
+ insFlags flagsHi = ovfl ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
+ inst_RV_TT(insHi, regHi, op2, 4, EA_4BYTE, flagsHi);
+ }
+
+ regSet.rsUnlockUsedReg(newMask);
+ regSet.rsUnlockUsedReg(op1Mask);
+
+ DONE_OR:
+
+ /* Free up anything that was tied up by the LHS */
+
+ genDoneAddressable(op2, addrReg, RegSet::KEEP_REG);
+
+ /* The result is where the first operand is sitting */
+
+ genRecoverRegPair(op1, REG_PAIR_NONE, RegSet::FREE_REG);
+
+ regPair = op1->gtRegPair;
+
+ if (ovfl)
+ genCheckOverflow(tree);
+
+ goto DONE;
+
+ case GT_UMOD:
+
+ regPair = genCodeForLongModInt(tree, needReg);
+ goto DONE;
+
+ case GT_MUL:
+
+ /* Special case: both operands promoted from int */
+
+ assert(tree->gtIsValid64RsltMul());
+
+ /* Change to an integer multiply temporarily */
+
+ tree->gtType = TYP_INT;
+
+ noway_assert(op1->gtOper == GT_CAST && op2->gtOper == GT_CAST);
+ tree->gtOp.gtOp1 = op1->gtCast.CastOp();
+ tree->gtOp.gtOp2 = op2->gtCast.CastOp();
+
+ assert(tree->gtFlags & GTF_MUL_64RSLT);
+
+#if defined(_TARGET_X86_)
+ // imul on x86 requires EDX:EAX
+ genComputeReg(tree, (RBM_EAX | RBM_EDX), RegSet::EXACT_REG, RegSet::FREE_REG);
+ noway_assert(tree->gtFlags & GTF_REG_VAL);
+ noway_assert(tree->gtRegNum == REG_EAX); // Also REG_EDX is setup with hi 32-bits
+#elif defined(_TARGET_ARM_)
+ genComputeReg(tree, needReg, RegSet::ANY_REG, RegSet::FREE_REG);
+ noway_assert(tree->gtFlags & GTF_REG_VAL);
+#else
+ assert(!"Unsupported target for 64-bit multiply codegen");
+#endif
+
+ /* Restore gtType, op1 and op2 from the change above */
+
+ tree->gtType = TYP_LONG;
+ tree->gtOp.gtOp1 = op1;
+ tree->gtOp.gtOp2 = op2;
+
+#if defined(_TARGET_X86_)
+ /* The result is now in EDX:EAX */
+ regPair = REG_PAIR_EAXEDX;
+#elif defined(_TARGET_ARM_)
+ regPair = tree->gtRegPair;
+#endif
+ goto DONE;
+
+ case GT_LSH:
+ helper = CORINFO_HELP_LLSH;
+ goto SHIFT;
+ case GT_RSH:
+ helper = CORINFO_HELP_LRSH;
+ goto SHIFT;
+ case GT_RSZ:
+ helper = CORINFO_HELP_LRSZ;
+ goto SHIFT;
+
+ SHIFT:
+
+ noway_assert(op1->gtType == TYP_LONG);
+ noway_assert(genActualType(op2->gtType) == TYP_INT);
+
+ /* Is the second operand a constant? */
+
+ if (op2->gtOper == GT_CNS_INT)
+ {
+ unsigned int count = op2->gtIntCon.gtIconVal;
+
+ /* Compute the left operand into a free register pair */
+
+ genCompIntoFreeRegPair(op1, avoidReg | op2->gtRsvdRegs, RegSet::FREE_REG);
+ noway_assert(op1->gtFlags & GTF_REG_VAL);
+
+ regPair = op1->gtRegPair;
+ regLo = genRegPairLo(regPair);
+ regHi = genRegPairHi(regPair);
+
+ /* Assume the value in the register pair is trashed. In some cases, though,
+ a register might be set to zero, and we can use that information to improve
+ some code generation.
+ */
+
+ regTracker.rsTrackRegTrash(regLo);
+ regTracker.rsTrackRegTrash(regHi);
+
+ /* Generate the appropriate shift instructions */
+
+ switch (oper)
+ {
+ case GT_LSH:
+ if (count == 0)
+ {
+ // regHi, regLo are correct
+ }
+ else if (count < 32)
+ {
+#if defined(_TARGET_XARCH_)
+ inst_RV_RV_IV(INS_shld, EA_4BYTE, regHi, regLo, count);
+#elif defined(_TARGET_ARM_)
+ inst_RV_SH(INS_SHIFT_LEFT_LOGICAL, EA_4BYTE, regHi, count);
+ getEmitter()->emitIns_R_R_R_I(INS_OR, EA_4BYTE, regHi, regHi, regLo, 32 - count,
+ INS_FLAGS_DONT_CARE, INS_OPTS_LSR);
+#else // _TARGET_*
+ NYI("INS_shld");
+#endif // _TARGET_*
+ inst_RV_SH(INS_SHIFT_LEFT_LOGICAL, EA_4BYTE, regLo, count);
+ }
+ else // count >= 32
+ {
+ assert(count >= 32);
+ if (count < 64)
+ {
+#if defined(_TARGET_ARM_)
+ if (count == 32)
+ {
+ // mov low dword into high dword (i.e. shift left by 32-bits)
+ inst_RV_RV(INS_mov, regHi, regLo);
+ }
+ else
+ {
+ assert(count > 32 && count < 64);
+ getEmitter()->emitIns_R_R_I(INS_SHIFT_LEFT_LOGICAL, EA_4BYTE, regHi, regLo,
+ count - 32);
+ }
+#else // _TARGET_*
+ // mov low dword into high dword (i.e. shift left by 32-bits)
+ inst_RV_RV(INS_mov, regHi, regLo);
+ if (count > 32)
+ {
+ // Shift high dword left by count - 32
+ inst_RV_SH(INS_SHIFT_LEFT_LOGICAL, EA_4BYTE, regHi, count - 32);
+ }
+#endif // _TARGET_*
+ }
+ else // count >= 64
+ {
+ assert(count >= 64);
+ genSetRegToIcon(regHi, 0);
+ }
+ genSetRegToIcon(regLo, 0);
+ }
+ break;
+
+ case GT_RSH:
+ if (count == 0)
+ {
+ // regHi, regLo are correct
+ }
+ else if (count < 32)
+ {
+#if defined(_TARGET_XARCH_)
+ inst_RV_RV_IV(INS_shrd, EA_4BYTE, regLo, regHi, count);
+#elif defined(_TARGET_ARM_)
+ inst_RV_SH(INS_SHIFT_RIGHT_LOGICAL, EA_4BYTE, regLo, count);
+ getEmitter()->emitIns_R_R_R_I(INS_OR, EA_4BYTE, regLo, regLo, regHi, 32 - count,
+ INS_FLAGS_DONT_CARE, INS_OPTS_LSL);
+#else // _TARGET_*
+ NYI("INS_shrd");
+#endif // _TARGET_*
+ inst_RV_SH(INS_SHIFT_RIGHT_ARITHM, EA_4BYTE, regHi, count);
+ }
+ else // count >= 32
+ {
+ assert(count >= 32);
+ if (count < 64)
+ {
+#if defined(_TARGET_ARM_)
+ if (count == 32)
+ {
+ // mov high dword into low dword (i.e. shift right by 32-bits)
+ inst_RV_RV(INS_mov, regLo, regHi);
+ }
+ else
+ {
+ assert(count > 32 && count < 64);
+ getEmitter()->emitIns_R_R_I(INS_SHIFT_RIGHT_ARITHM, EA_4BYTE, regLo, regHi,
+ count - 32);
+ }
+#else // _TARGET_*
+ // mov high dword into low dword (i.e. shift right by 32-bits)
+ inst_RV_RV(INS_mov, regLo, regHi);
+ if (count > 32)
+ {
+ // Shift low dword right by count - 32
+ inst_RV_SH(INS_SHIFT_RIGHT_ARITHM, EA_4BYTE, regLo, count - 32);
+ }
+#endif // _TARGET_*
+ }
+
+ // Propagate sign bit in high dword
+ inst_RV_SH(INS_SHIFT_RIGHT_ARITHM, EA_4BYTE, regHi, 31);
+
+ if (count >= 64)
+ {
+ // Propagate the sign from the high dword
+ inst_RV_RV(INS_mov, regLo, regHi, TYP_INT);
+ }
+ }
+ break;
+
+ case GT_RSZ:
+ if (count == 0)
+ {
+ // regHi, regLo are correct
+ }
+ else if (count < 32)
+ {
+#if defined(_TARGET_XARCH_)
+ inst_RV_RV_IV(INS_shrd, EA_4BYTE, regLo, regHi, count);
+#elif defined(_TARGET_ARM_)
+ inst_RV_SH(INS_SHIFT_RIGHT_LOGICAL, EA_4BYTE, regLo, count);
+ getEmitter()->emitIns_R_R_R_I(INS_OR, EA_4BYTE, regLo, regLo, regHi, 32 - count,
+ INS_FLAGS_DONT_CARE, INS_OPTS_LSL);
+#else // _TARGET_*
+ NYI("INS_shrd");
+#endif // _TARGET_*
+ inst_RV_SH(INS_SHIFT_RIGHT_LOGICAL, EA_4BYTE, regHi, count);
+ }
+ else // count >= 32
+ {
+ assert(count >= 32);
+ if (count < 64)
+ {
+#if defined(_TARGET_ARM_)
+ if (count == 32)
+ {
+ // mov high dword into low dword (i.e. shift right by 32-bits)
+ inst_RV_RV(INS_mov, regLo, regHi);
+ }
+ else
+ {
+ assert(count > 32 && count < 64);
+ getEmitter()->emitIns_R_R_I(INS_SHIFT_RIGHT_LOGICAL, EA_4BYTE, regLo, regHi,
+ count - 32);
+ }
+#else // _TARGET_*
+ // mov high dword into low dword (i.e. shift right by 32-bits)
+ inst_RV_RV(INS_mov, regLo, regHi);
+ if (count > 32)
+ {
+ // Shift low dword right by count - 32
+ inst_RV_SH(INS_SHIFT_RIGHT_LOGICAL, EA_4BYTE, regLo, count - 32);
+ }
+#endif // _TARGET_*
+ }
+ else // count >= 64
+ {
+ assert(count >= 64);
+ genSetRegToIcon(regLo, 0);
+ }
+ genSetRegToIcon(regHi, 0);
+ }
+ break;
+
+ default:
+ noway_assert(!"Illegal oper for long shift");
+ break;
+ }
+
+ goto DONE_SHF;
+ }
+
+ /* Which operand are we supposed to compute first? */
+
+ assert((RBM_SHIFT_LNG & RBM_LNGARG_0) == 0);
+
+ if (tree->gtFlags & GTF_REVERSE_OPS)
+ {
+ /* The second operand can't be a constant */
+
+ noway_assert(op2->gtOper != GT_CNS_INT);
+
+ /* Load the shift count, hopefully into RBM_SHIFT */
+ RegSet::ExactReg exactReg;
+ if ((RBM_SHIFT_LNG & op1->gtRsvdRegs) == 0)
+ exactReg = RegSet::EXACT_REG;
+ else
+ exactReg = RegSet::ANY_REG;
+ genComputeReg(op2, RBM_SHIFT_LNG, exactReg, RegSet::KEEP_REG);
+
+ /* Compute the left operand into REG_LNGARG_0 */
+
+ genComputeRegPair(op1, REG_LNGARG_0, avoidReg, RegSet::KEEP_REG, false);
+ noway_assert(op1->gtFlags & GTF_REG_VAL);
+
+ /* Lock op1 so that it doesn't get trashed */
+
+ regSet.rsLockUsedReg(RBM_LNGARG_0);
+
+ /* Make sure the shift count wasn't displaced */
+
+ genRecoverReg(op2, RBM_SHIFT_LNG, RegSet::KEEP_REG);
+
+ /* Lock op2 */
+
+ regSet.rsLockUsedReg(RBM_SHIFT_LNG);
+ }
+ else
+ {
+ /* Compute the left operand into REG_LNGARG_0 */
+
+ genComputeRegPair(op1, REG_LNGARG_0, avoidReg, RegSet::KEEP_REG, false);
+ noway_assert(op1->gtFlags & GTF_REG_VAL);
+
+ /* Compute the shift count into RBM_SHIFT */
+
+ genComputeReg(op2, RBM_SHIFT_LNG, RegSet::EXACT_REG, RegSet::KEEP_REG);
+
+ /* Lock op2 */
+
+ regSet.rsLockUsedReg(RBM_SHIFT_LNG);
+
+ /* Make sure the value hasn't been displaced */
+
+ genRecoverRegPair(op1, REG_LNGARG_0, RegSet::KEEP_REG);
+
+ /* Lock op1 so that it doesn't get trashed */
+
+ regSet.rsLockUsedReg(RBM_LNGARG_0);
+ }
+
+#ifndef _TARGET_X86_
+ /* The generic helper is a C-routine and so it follows the full ABI */
+ {
+ /* Spill any callee-saved registers which are being used */
+ regMaskTP spillRegs = RBM_CALLEE_TRASH & regSet.rsMaskUsed;
+
+ /* But do not spill our argument registers. */
+ spillRegs &= ~(RBM_LNGARG_0 | RBM_SHIFT_LNG);
+
+ if (spillRegs)
+ {
+ regSet.rsSpillRegs(spillRegs);
+ }
+ }
+#endif // !_TARGET_X86_
+
+ /* Perform the shift by calling a helper function */
+
+ noway_assert(op1->gtRegPair == REG_LNGARG_0);
+ noway_assert(op2->gtRegNum == REG_SHIFT_LNG);
+ noway_assert((regSet.rsMaskLock & (RBM_LNGARG_0 | RBM_SHIFT_LNG)) == (RBM_LNGARG_0 | RBM_SHIFT_LNG));
+
+ genEmitHelperCall(helper,
+ 0, // argSize
+ EA_8BYTE); // retSize
+
+#ifdef _TARGET_X86_
+ /* The value in the register pair is trashed */
+
+ regTracker.rsTrackRegTrash(genRegPairLo(REG_LNGARG_0));
+ regTracker.rsTrackRegTrash(genRegPairHi(REG_LNGARG_0));
+#else // _TARGET_X86_
+ /* The generic helper is a C-routine and so it follows the full ABI */
+ regTracker.rsTrackRegMaskTrash(RBM_CALLEE_TRASH);
+#endif // _TARGET_X86_
+
+ /* Release both operands */
+
+ regSet.rsUnlockUsedReg(RBM_LNGARG_0 | RBM_SHIFT_LNG);
+ genReleaseRegPair(op1);
+ genReleaseReg(op2);
+
+ DONE_SHF:
+
+ noway_assert(op1->gtFlags & GTF_REG_VAL);
+ regPair = op1->gtRegPair;
+ goto DONE;
+
+ case GT_NEG:
+ case GT_NOT:
+
+ /* Generate the operand into some register pair */
+
+ genCompIntoFreeRegPair(op1, avoidReg, RegSet::FREE_REG);
+ noway_assert(op1->gtFlags & GTF_REG_VAL);
+
+ regPair = op1->gtRegPair;
+
+ /* Figure out which registers the value is in */
+
+ regLo = genRegPairLo(regPair);
+ regHi = genRegPairHi(regPair);
+
+ /* The value in the register pair is about to be trashed */
+
+ regTracker.rsTrackRegTrash(regLo);
+ regTracker.rsTrackRegTrash(regHi);
+
+ /* Unary "neg": negate the value in the register pair */
+ if (oper == GT_NEG)
+ {
+#ifdef _TARGET_ARM_
+
+ // ARM doesn't have an opcode that sets the carry bit like
+ // x86, so we can't use neg/addc/neg. Instead we use subtract
+ // with carry. Too bad this uses an extra register.
+
+ // Lock regLo and regHi so we don't pick them, and then pick
+ // a third register to be our 0.
+ regMaskTP regPairMask = genRegMask(regLo) | genRegMask(regHi);
+ regSet.rsLockReg(regPairMask);
+ regMaskTP regBest = RBM_ALLINT & ~avoidReg;
+ regNumber regZero = genGetRegSetToIcon(0, regBest);
+ regSet.rsUnlockReg(regPairMask);
+
+ inst_RV_IV(INS_rsb, regLo, 0, EA_4BYTE, INS_FLAGS_SET);
+ getEmitter()->emitIns_R_R_R_I(INS_sbc, EA_4BYTE, regHi, regZero, regHi, 0);
+
+#elif defined(_TARGET_XARCH_)
+
+ inst_RV(INS_NEG, regLo, TYP_LONG);
+ inst_RV_IV(INS_ADDC, regHi, 0, emitActualTypeSize(TYP_LONG));
+ inst_RV(INS_NEG, regHi, TYP_LONG);
+#else
+ NYI("GT_NEG on TYP_LONG");
+#endif
+ }
+ else
+ {
+ /* Unary "not": flip all the bits in the register pair */
+
+ inst_RV(INS_NOT, regLo, TYP_LONG);
+ inst_RV(INS_NOT, regHi, TYP_LONG);
+ }
+
+ goto DONE;
+
+#if LONG_ASG_OPS
+
+ case GT_ASG_OR:
+ insLo = insHi = INS_OR;
+ goto ASG_OPR;
+ case GT_ASG_XOR:
+ insLo = insHi = INS_XOR;
+ goto ASG_OPR;
+ case GT_ASG_AND:
+ insLo = insHi = INS_AND;
+ goto ASG_OPR;
+ case GT_ASG_SUB:
+ insLo = INS_sub;
+ insHi = INS_SUBC;
+ goto ASG_OPR;
+ case GT_ASG_ADD:
+ insLo = INS_add;
+ insHi = INS_ADDC;
+ goto ASG_OPR;
+
+ ASG_OPR:
+
+ if (op2->gtOper == GT_CNS_LNG)
+ {
+ __int64 lval = op2->gtLngCon.gtLconVal;
+
+ /* Make the target addressable */
+
+ addrReg = genMakeAddressable(op1, needReg, RegSet::FREE_REG);
+
+ /* Optimize some special cases */
+
+ doLo = doHi = true;
+
+ /* Check for "(op1 AND -1)" and "(op1 [X]OR 0)" */
+
+ switch (oper)
+ {
+ case GT_ASG_AND:
+ if ((int)(lval) == -1)
+ doLo = false;
+ if ((int)(lval >> 32) == -1)
+ doHi = false;
+ break;
+
+ case GT_ASG_OR:
+ case GT_ASG_XOR:
+ if (!(lval & 0x00000000FFFFFFFF))
+ doLo = false;
+ if (!(lval & 0xFFFFFFFF00000000))
+ doHi = false;
+ break;
+ }
+
+ if (doLo)
+ inst_TT_IV(insLo, op1, (int)(lval), 0);
+ if (doHi)
+ inst_TT_IV(insHi, op1, (int)(lval >> 32), 4);
+
+ bool isArith = (oper == GT_ASG_ADD || oper == GT_ASG_SUB);
+ if (doLo || doHi)
+ tree->gtFlags |= GTF_ZSF_SET;
+
+ genDoneAddressable(op1, addrReg, RegSet::FREE_REG);
+ goto DONE_ASSG_REGS;
+ }
+
+ /* TODO: allow non-const long assignment operators */
+
+ noway_assert(!"non-const long asgop NYI");
+
+#endif // LONG_ASG_OPS
+
+ case GT_IND:
+ case GT_NULLCHECK:
+ {
+ regMaskTP tmpMask;
+ int hiFirst;
+
+ regMaskTP availMask = RBM_ALLINT & ~needReg;
+
+ /* Make sure the operand is addressable */
+
+ addrReg = genMakeAddressable(tree, availMask, RegSet::FREE_REG);
+
+ GenTreePtr addr = oper == GT_IND ? op1 : tree;
+
+ /* Pick a register for the value */
+
+ regPair = regSet.rsPickRegPair(needReg);
+ tmpMask = genRegPairMask(regPair);
+
+ /* Is there any overlap between the register pair and the address? */
+
+ hiFirst = FALSE;
+
+ if (tmpMask & addrReg)
+ {
+ /* Does one or both of the target registers overlap? */
+
+ if ((tmpMask & addrReg) != tmpMask)
+ {
+ /* Only one register overlaps */
+
+ noway_assert(genMaxOneBit(tmpMask & addrReg) == TRUE);
+
+ /* If the low register overlaps, load the upper half first */
+
+ if (addrReg & genRegMask(genRegPairLo(regPair)))
+ hiFirst = TRUE;
+ }
+ else
+ {
+ regMaskTP regFree;
+
+ /* The register completely overlaps with the address */
+
+ noway_assert(genMaxOneBit(tmpMask & addrReg) == FALSE);
+
+ /* Can we pick another pair easily? */
+
+ regFree = regSet.rsRegMaskFree() & ~addrReg;
+ if (needReg)
+ regFree &= needReg;
+
+ /* More than one free register available? */
+
+ if (regFree && !genMaxOneBit(regFree))
+ {
+ regPair = regSet.rsPickRegPair(regFree);
+ tmpMask = genRegPairMask(regPair);
+ }
+ else
+ {
+ // printf("Overlap: needReg = %08X\n", needReg);
+
+ // Reg-prediction won't allow this
+ noway_assert((regSet.rsMaskVars & addrReg) == 0);
+
+ // Grab one fresh reg, and use any one of addrReg
+
+ if (regFree) // Try to follow 'needReg'
+ regLo = regSet.rsGrabReg(regFree);
+ else // Pick any reg besides addrReg
+ regLo = regSet.rsGrabReg(RBM_ALLINT & ~addrReg);
+
+ unsigned regBit = 0x1;
+ regNumber regNo;
+
+ for (regNo = REG_INT_FIRST; regNo <= REG_INT_LAST; regNo = REG_NEXT(regNo), regBit <<= 1)
+ {
+ // Found one of addrReg. Use it.
+ if (regBit & addrReg)
+ break;
+ }
+ noway_assert(genIsValidReg(regNo)); // Should have found regNo
+
+ regPair = gen2regs2pair(regLo, regNo);
+ tmpMask = genRegPairMask(regPair);
+ }
+ }
+ }
+
+ /* Make sure the value is still addressable */
+
+ noway_assert(genStillAddressable(tree));
+
+ /* Figure out which registers the value is in */
+
+ regLo = genRegPairLo(regPair);
+ regHi = genRegPairHi(regPair);
+
+ /* The value in the register pair is about to be trashed */
+
+ regTracker.rsTrackRegTrash(regLo);
+ regTracker.rsTrackRegTrash(regHi);
+
+ /* Load the target registers from where the value is */
+
+ if (hiFirst)
+ {
+ inst_RV_AT(ins_Load(TYP_INT), EA_4BYTE, TYP_INT, regHi, addr, 4);
+ regSet.rsLockReg(genRegMask(regHi));
+ inst_RV_AT(ins_Load(TYP_INT), EA_4BYTE, TYP_INT, regLo, addr, 0);
+ regSet.rsUnlockReg(genRegMask(regHi));
+ }
+ else
+ {
+ inst_RV_AT(ins_Load(TYP_INT), EA_4BYTE, TYP_INT, regLo, addr, 0);
+ regSet.rsLockReg(genRegMask(regLo));
+ inst_RV_AT(ins_Load(TYP_INT), EA_4BYTE, TYP_INT, regHi, addr, 4);
+ regSet.rsUnlockReg(genRegMask(regLo));
+ }
+
+#ifdef _TARGET_ARM_
+ if (tree->gtFlags & GTF_IND_VOLATILE)
+ {
+ // Emit a memory barrier instruction after the load
+ instGen_MemoryBarrier();
+ }
+#endif
+
+ genUpdateLife(tree);
+ genDoneAddressable(tree, addrReg, RegSet::FREE_REG);
+ }
+ goto DONE;
+
+ case GT_CAST:
+
+ /* What are we casting from? */
+
+ switch (op1->gtType)
+ {
+ case TYP_BOOL:
+ case TYP_BYTE:
+ case TYP_CHAR:
+ case TYP_SHORT:
+ case TYP_INT:
+ case TYP_UBYTE:
+ case TYP_BYREF:
+ {
+ regMaskTP hiRegMask;
+ regMaskTP loRegMask;
+
+ // For an unsigned cast we don't need to sign-extend the 32 bit value
+ if (tree->gtFlags & GTF_UNSIGNED)
+ {
+ // Does needReg have exactly two bits on and thus
+ // specifies the exact register pair that we want to use
+ if (!genMaxOneBit(needReg))
+ {
+ regPair = regSet.rsFindRegPairNo(needReg);
+ if (needReg != genRegPairMask(regPair))
+ goto ANY_FREE_REG_UNSIGNED;
+ loRegMask = genRegMask(genRegPairLo(regPair));
+ if ((loRegMask & regSet.rsRegMaskCanGrab()) == 0)
+ goto ANY_FREE_REG_UNSIGNED;
+ hiRegMask = genRegMask(genRegPairHi(regPair));
+ }
+ else
+ {
+ ANY_FREE_REG_UNSIGNED:
+ loRegMask = needReg;
+ hiRegMask = needReg;
+ }
+
+ genComputeReg(op1, loRegMask, RegSet::ANY_REG, RegSet::KEEP_REG);
+ noway_assert(op1->gtFlags & GTF_REG_VAL);
+
+ regLo = op1->gtRegNum;
+ loRegMask = genRegMask(regLo);
+ regSet.rsLockUsedReg(loRegMask);
+ regHi = regSet.rsPickReg(hiRegMask);
+ regSet.rsUnlockUsedReg(loRegMask);
+
+ regPair = gen2regs2pair(regLo, regHi);
+
+ // Move 0 to the higher word of the ULong
+ genSetRegToIcon(regHi, 0, TYP_INT);
+
+ /* We can now free up the operand */
+ genReleaseReg(op1);
+
+ goto DONE;
+ }
+#ifdef _TARGET_XARCH_
+ /* Cast of 'int' to 'long' --> Use cdq if EAX,EDX are available
+ and we need the result to be in those registers.
+ cdq is smaller so we use it for SMALL_CODE
+ */
+
+ if ((needReg & (RBM_EAX | RBM_EDX)) == (RBM_EAX | RBM_EDX) &&
+ (regSet.rsRegMaskFree() & RBM_EDX))
+ {
+ genCodeForTree(op1, RBM_EAX);
+ regSet.rsMarkRegUsed(op1);
+
+ /* If we have to spill EDX, might as well use the faster
+ sar as the spill will increase code size anyway */
+
+ if (op1->gtRegNum != REG_EAX || !(regSet.rsRegMaskFree() & RBM_EDX))
+ {
+ hiRegMask = regSet.rsRegMaskFree();
+ goto USE_SAR_FOR_CAST;
+ }
+
+ regSet.rsGrabReg(RBM_EDX);
+ regTracker.rsTrackRegTrash(REG_EDX);
+
+ /* Convert the int in EAX into a long in EDX:EAX */
+
+ instGen(INS_cdq);
+
+ /* The result is in EDX:EAX */
+
+ regPair = REG_PAIR_EAXEDX;
+ }
+ else
+#endif
+ {
+ /* use the sar instruction to sign-extend a 32-bit integer */
+
+ // Does needReg have exactly two bits on and thus
+ // specifies the exact register pair that we want to use
+ if (!genMaxOneBit(needReg))
+ {
+ regPair = regSet.rsFindRegPairNo(needReg);
+ if ((regPair == REG_PAIR_NONE) || (needReg != genRegPairMask(regPair)))
+ goto ANY_FREE_REG_SIGNED;
+ loRegMask = genRegMask(genRegPairLo(regPair));
+ if ((loRegMask & regSet.rsRegMaskCanGrab()) == 0)
+ goto ANY_FREE_REG_SIGNED;
+ hiRegMask = genRegMask(genRegPairHi(regPair));
+ }
+ else
+ {
+ ANY_FREE_REG_SIGNED:
+ loRegMask = needReg;
+ hiRegMask = RBM_NONE;
+ }
+
+ genComputeReg(op1, loRegMask, RegSet::ANY_REG, RegSet::KEEP_REG);
+#ifdef _TARGET_XARCH_
+ USE_SAR_FOR_CAST:
+#endif
+ noway_assert(op1->gtFlags & GTF_REG_VAL);
+
+ regLo = op1->gtRegNum;
+ loRegMask = genRegMask(regLo);
+ regSet.rsLockUsedReg(loRegMask);
+ regHi = regSet.rsPickReg(hiRegMask);
+ regSet.rsUnlockUsedReg(loRegMask);
+
+ regPair = gen2regs2pair(regLo, regHi);
+
+#ifdef _TARGET_ARM_
+ /* Copy the lo32 bits from regLo to regHi and sign-extend it */
+ // Use one instruction instead of two
+ getEmitter()->emitIns_R_R_I(INS_SHIFT_RIGHT_ARITHM, EA_4BYTE, regHi, regLo, 31);
+#else
+ /* Copy the lo32 bits from regLo to regHi and sign-extend it */
+ inst_RV_RV(INS_mov, regHi, regLo, TYP_INT);
+ inst_RV_SH(INS_SHIFT_RIGHT_ARITHM, EA_4BYTE, regHi, 31);
+#endif
+
+ /* The value in the upper register is trashed */
+
+ regTracker.rsTrackRegTrash(regHi);
+ }
+
+ /* We can now free up the operand */
+ genReleaseReg(op1);
+
+ // conv.ovf.u8 could overflow if the original number was negative
+ if (tree->gtOverflow() && TYP_ULONG == tree->CastToType())
+ {
+ regNumber hiReg = genRegPairHi(regPair);
+ instGen_Compare_Reg_To_Zero(EA_4BYTE, hiReg); // set flags
+ emitJumpKind jmpLTS = genJumpKindForOper(GT_LT, CK_SIGNED);
+ genJumpToThrowHlpBlk(jmpLTS, SCK_OVERFLOW);
+ }
+ }
+ goto DONE;
+
+ case TYP_FLOAT:
+ case TYP_DOUBLE:
+
+#if 0
+ /* Load the FP value onto the coprocessor stack */
+
+ genCodeForTreeFlt(op1);
+
+ /* Allocate a temp for the long value */
+
+ temp = compiler->tmpGetTemp(TYP_LONG);
+
+ /* Store the FP value into the temp */
+
+ inst_FS_ST(INS_fistpl, sizeof(int), temp, 0);
+ genFPstkLevel--;
+
+ /* Pick a register pair for the value */
+
+ regPair = regSet.rsPickRegPair(needReg);
+
+ /* Figure out which registers the value is in */
+
+ regLo = genRegPairLo(regPair);
+ regHi = genRegPairHi(regPair);
+
+ /* The value in the register pair is about to be trashed */
+
+ regTracker.rsTrackRegTrash(regLo);
+ regTracker.rsTrackRegTrash(regHi);
+
+ /* Load the converted value into the registers */
+
+ inst_RV_ST(INS_mov, EA_4BYTE, regLo, temp, 0);
+ inst_RV_ST(INS_mov, EA_4BYTE, regHi, temp, 4);
+
+ /* We no longer need the temp */
+
+ compiler->tmpRlsTemp(temp);
+ goto DONE;
+#else
+ NO_WAY("Cast from TYP_FLOAT or TYP_DOUBLE supposed to be done via a helper call");
+ break;
+#endif
+ case TYP_LONG:
+ case TYP_ULONG:
+ {
+ noway_assert(tree->gtOverflow()); // conv.ovf.u8 or conv.ovf.i8
+
+ genComputeRegPair(op1, REG_PAIR_NONE, RBM_ALLINT & ~needReg, RegSet::FREE_REG);
+ regPair = op1->gtRegPair;
+
+ // Do we need to set the sign-flag, or can we checked if it is set?
+ // and not do this "test" if so.
+
+ if (op1->gtFlags & GTF_REG_VAL)
+ {
+ regNumber hiReg = genRegPairHi(op1->gtRegPair);
+ noway_assert(hiReg != REG_STK);
+ instGen_Compare_Reg_To_Zero(EA_4BYTE, hiReg); // set flags
+ }
+ else
+ {
+ inst_TT_IV(INS_cmp, op1, 0, sizeof(int));
+ }
+
+ emitJumpKind jmpLTS = genJumpKindForOper(GT_LT, CK_SIGNED);
+ genJumpToThrowHlpBlk(jmpLTS, SCK_OVERFLOW);
+ }
+ goto DONE;
+
+ default:
+#ifdef DEBUG
+ compiler->gtDispTree(tree);
+#endif
+ NO_WAY("unexpected cast to long");
+ }
+ break;
+
+ case GT_RETURN:
+
+ /* TODO:
+ * This code is cloned from the regular processing of GT_RETURN values. We have to remember to
+ * call genPInvokeMethodEpilog anywhere that we have a GT_RETURN statement. We should really
+ * generate trees for the PInvoke prolog and epilog so we can remove these special cases.
+ */
+
+ // TODO: this should be done AFTER we called exit mon so that
+ // we are sure that we don't have to keep 'this' alive
+
+ if (compiler->info.compCallUnmanaged && (compiler->compCurBB == compiler->genReturnBB))
+ {
+ /* either it's an "empty" statement or the return statement
+ of a synchronized method
+ */
+
+ genPInvokeMethodEpilog();
+ }
+
+#if CPU_LONG_USES_REGPAIR
+ /* There must be a long return value */
+
+ noway_assert(op1);
+
+ /* Evaluate the return value into EDX:EAX */
+
+ genEvalIntoFreeRegPair(op1, REG_LNGRET, avoidReg);
+
+ noway_assert(op1->gtFlags & GTF_REG_VAL);
+ noway_assert(op1->gtRegPair == REG_LNGRET);
+
+#else
+ NYI("64-bit return");
+#endif
+
+#ifdef PROFILING_SUPPORTED
+ // The profiling hook does not trash registers, so it's safe to call after we emit the code for
+ // the GT_RETURN tree.
+
+ if (compiler->compCurBB == compiler->genReturnBB)
+ {
+ genProfilingLeaveCallback();
+ }
+#endif
+ return;
+
+ case GT_QMARK:
+ noway_assert(!"inliner-generated ?: for longs NYI");
+ NO_WAY("inliner-generated ?: for longs NYI");
+ break;
+
+ case GT_COMMA:
+
+ if (tree->gtFlags & GTF_REVERSE_OPS)
+ {
+ // Generate op2
+ genCodeForTreeLng(op2, needReg, avoidReg);
+ genUpdateLife(op2);
+
+ noway_assert(op2->gtFlags & GTF_REG_VAL);
+
+ regSet.rsMarkRegPairUsed(op2);
+
+ // Do side effects of op1
+ genEvalSideEffects(op1);
+
+ // Recover op2 if spilled
+ genRecoverRegPair(op2, REG_PAIR_NONE, RegSet::KEEP_REG);
+
+ genReleaseRegPair(op2);
+
+ genUpdateLife(tree);
+
+ regPair = op2->gtRegPair;
+ }
+ else
+ {
+ noway_assert((tree->gtFlags & GTF_REVERSE_OPS) == 0);
+
+ /* Generate side effects of the first operand */
+
+ genEvalSideEffects(op1);
+ genUpdateLife(op1);
+
+ /* Is the value of the second operand used? */
+
+ if (tree->gtType == TYP_VOID)
+ {
+ /* The right operand produces no result */
+
+ genEvalSideEffects(op2);
+ genUpdateLife(tree);
+ return;
+ }
+
+ /* Generate the second operand, i.e. the 'real' value */
+
+ genCodeForTreeLng(op2, needReg, avoidReg);
+
+ /* The result of 'op2' is also the final result */
+
+ regPair = op2->gtRegPair;
+ }
+
+ goto DONE;
+
+ case GT_BOX:
+ {
+ /* Generate the operand, i.e. the 'real' value */
+
+ genCodeForTreeLng(op1, needReg, avoidReg);
+
+ /* The result of 'op1' is also the final result */
+
+ regPair = op1->gtRegPair;
+ }
+
+ goto DONE;
+
+ case GT_NOP:
+ if (op1 == NULL)
+ return;
+
+ genCodeForTreeLng(op1, needReg, avoidReg);
+ regPair = op1->gtRegPair;
+ goto DONE;
+
+ default:
+ break;
+ }
+
+#ifdef DEBUG
+ compiler->gtDispTree(tree);
+#endif
+ noway_assert(!"unexpected 64-bit operator");
+ }
+
+ /* See what kind of a special operator we have here */
+
+ switch (oper)
+ {
+ regMaskTP retMask;
+ case GT_CALL:
+ retMask = genCodeForCall(tree, true);
+ if (retMask == RBM_NONE)
+ regPair = REG_PAIR_NONE;
+ else
+ regPair = regSet.rsFindRegPairNo(retMask);
+ break;
+
+ default:
+#ifdef DEBUG
+ compiler->gtDispTree(tree);
+#endif
+ NO_WAY("unexpected long operator");
+ }
+
+DONE:
+
+ genUpdateLife(tree);
+
+ /* Here we've computed the value of 'tree' into 'regPair' */
+
+ noway_assert(regPair != DUMMY_INIT(REG_PAIR_CORRUPT));
+
+ genMarkTreeInRegPair(tree, regPair);
+}
+#ifdef _PREFAST_
+#pragma warning(pop)
+#endif
+
+/*****************************************************************************
+ *
+ * Generate code for a mod of a long by an int.
+ */
+
+regPairNo CodeGen::genCodeForLongModInt(GenTreePtr tree, regMaskTP needReg)
+{
+#ifdef _TARGET_X86_
+
+ regPairNo regPair;
+ regMaskTP addrReg;
+
+ genTreeOps oper = tree->OperGet();
+ GenTreePtr op1 = tree->gtOp.gtOp1;
+ GenTreePtr op2 = tree->gtOp.gtOp2;
+
+ /* Codegen only for Unsigned MOD */
+ noway_assert(oper == GT_UMOD);
+
+ /* op2 must be a long constant in the range 2 to 0x3fffffff */
+
+ noway_assert((op2->gtOper == GT_CNS_LNG) && (op2->gtLngCon.gtLconVal >= 2) &&
+ (op2->gtLngCon.gtLconVal <= 0x3fffffff));
+ int val = (int)op2->gtLngCon.gtLconVal;
+
+ op2->ChangeOperConst(GT_CNS_INT); // it's effectively an integer constant
+
+ op2->gtType = TYP_INT;
+ op2->gtIntCon.gtIconVal = val;
+
+ /* Which operand are we supposed to compute first? */
+
+ if (tree->gtFlags & GTF_REVERSE_OPS)
+ {
+ /* Compute the second operand into a scratch register, other
+ than EAX or EDX */
+
+ needReg = regSet.rsMustExclude(needReg, RBM_PAIR_TMP);
+
+ /* Special case: if op2 is a local var we are done */
+
+ if (op2->gtOper == GT_LCL_VAR || op2->gtOper == GT_LCL_FLD || op2->gtOper == GT_CLS_VAR)
+ {
+ addrReg = genMakeRvalueAddressable(op2, needReg, RegSet::KEEP_REG, false);
+ }
+ else
+ {
+ genComputeReg(op2, needReg, RegSet::ANY_REG, RegSet::KEEP_REG);
+
+ noway_assert(op2->gtFlags & GTF_REG_VAL);
+ addrReg = genRegMask(op2->gtRegNum);
+ }
+
+ /* Compute the first operand into EAX:EDX */
+
+ genComputeRegPair(op1, REG_PAIR_TMP, RBM_NONE, RegSet::KEEP_REG, true);
+ noway_assert(op1->gtFlags & GTF_REG_VAL);
+ noway_assert(op1->gtRegPair == REG_PAIR_TMP);
+
+ /* And recover the second argument while locking the first one */
+
+ addrReg = genKeepAddressable(op2, addrReg, RBM_PAIR_TMP);
+ }
+ else
+ {
+ /* Compute the first operand into EAX:EDX */
+
+ genComputeRegPair(op1, REG_PAIR_EAXEDX, RBM_NONE, RegSet::KEEP_REG, true);
+ noway_assert(op1->gtFlags & GTF_REG_VAL);
+ noway_assert(op1->gtRegPair == REG_PAIR_TMP);
+
+ /* Compute the second operand into a scratch register, other
+ than EAX or EDX */
+
+ needReg = regSet.rsMustExclude(needReg, RBM_PAIR_TMP);
+
+ /* Special case: if op2 is a local var we are done */
+
+ if (op2->gtOper == GT_LCL_VAR || op2->gtOper == GT_LCL_FLD || op2->gtOper == GT_CLS_VAR)
+ {
+ addrReg = genMakeRvalueAddressable(op2, needReg, RegSet::KEEP_REG, false);
+ }
+ else
+ {
+ genComputeReg(op2, needReg, RegSet::ANY_REG, RegSet::KEEP_REG);
+
+ noway_assert(op2->gtFlags & GTF_REG_VAL);
+ addrReg = genRegMask(op2->gtRegNum);
+ }
+
+ /* Recover the first argument */
+
+ genRecoverRegPair(op1, REG_PAIR_EAXEDX, RegSet::KEEP_REG);
+
+ /* And recover the second argument while locking the first one */
+
+ addrReg = genKeepAddressable(op2, addrReg, RBM_PAIR_TMP);
+ }
+
+ /* At this point, EAX:EDX contains the 64bit dividend and op2->gtRegNum
+ contains the 32bit divisor. We want to generate the following code:
+
+ ==========================
+ Unsigned (GT_UMOD)
+
+ cmp edx, op2->gtRegNum
+ jb lab_no_overflow
+
+ mov temp, eax
+ mov eax, edx
+ xor edx, edx
+ div op2->g2RegNum
+ mov eax, temp
+
+ lab_no_overflow:
+ idiv
+ ==========================
+ This works because (a * 2^32 + b) % c = ((a % c) * 2^32 + b) % c
+ */
+
+ BasicBlock* lab_no_overflow = genCreateTempLabel();
+
+ // grab a temporary register other than eax, edx, and op2->gtRegNum
+
+ regNumber tempReg = regSet.rsGrabReg(RBM_ALLINT & ~(RBM_PAIR_TMP | genRegMask(op2->gtRegNum)));
+
+ // EAX and tempReg will be trashed by the mov instructions. Doing
+ // this early won't hurt, and might prevent confusion in genSetRegToIcon.
+
+ regTracker.rsTrackRegTrash(REG_PAIR_TMP_LO);
+ regTracker.rsTrackRegTrash(tempReg);
+
+ inst_RV_RV(INS_cmp, REG_PAIR_TMP_HI, op2->gtRegNum);
+ inst_JMP(EJ_jb, lab_no_overflow);
+
+ inst_RV_RV(INS_mov, tempReg, REG_PAIR_TMP_LO, TYP_INT);
+ inst_RV_RV(INS_mov, REG_PAIR_TMP_LO, REG_PAIR_TMP_HI, TYP_INT);
+ genSetRegToIcon(REG_PAIR_TMP_HI, 0, TYP_INT);
+ inst_TT(INS_UNSIGNED_DIVIDE, op2);
+ inst_RV_RV(INS_mov, REG_PAIR_TMP_LO, tempReg, TYP_INT);
+
+ // Jump point for no overflow divide
+
+ genDefineTempLabel(lab_no_overflow);
+
+ // Issue the divide instruction
+
+ inst_TT(INS_UNSIGNED_DIVIDE, op2);
+
+ /* EAX, EDX, tempReg and op2->gtRegNum are now trashed */
+
+ regTracker.rsTrackRegTrash(REG_PAIR_TMP_LO);
+ regTracker.rsTrackRegTrash(REG_PAIR_TMP_HI);
+ regTracker.rsTrackRegTrash(tempReg);
+ regTracker.rsTrackRegTrash(op2->gtRegNum);
+
+ if (tree->gtFlags & GTF_MOD_INT_RESULT)
+ {
+ /* We don't need to normalize the result, because the caller wants
+ an int (in edx) */
+
+ regPair = REG_PAIR_TMP_REVERSE;
+ }
+ else
+ {
+ /* The result is now in EDX, we now have to normalize it, i.e. we have
+ to issue:
+ mov eax, edx; xor edx, edx (for UMOD)
+ */
+
+ inst_RV_RV(INS_mov, REG_PAIR_TMP_LO, REG_PAIR_TMP_HI, TYP_INT);
+
+ genSetRegToIcon(REG_PAIR_TMP_HI, 0, TYP_INT);
+
+ regPair = REG_PAIR_TMP;
+ }
+
+ genReleaseRegPair(op1);
+ genDoneAddressable(op2, addrReg, RegSet::KEEP_REG);
+
+ return regPair;
+
+#else // !_TARGET_X86_
+
+ NYI("codegen for LongModInt");
+
+ return REG_PAIR_NONE;
+
+#endif // !_TARGET_X86_
+}
+
+// Given a tree, return the number of registers that are currently
+// used to hold integer enregistered local variables.
+// Note that, an enregistered TYP_LONG can take 1 or 2 registers.
+unsigned CodeGen::genRegCountForLiveIntEnregVars(GenTreePtr tree)
+{
+ unsigned regCount = 0;
+
+ VARSET_ITER_INIT(compiler, iter, compiler->compCurLife, varNum);
+ while (iter.NextElem(compiler, &varNum))
+ {
+ unsigned lclNum = compiler->lvaTrackedToVarNum[varNum];
+ LclVarDsc* varDsc = &compiler->lvaTable[lclNum];
+
+ if (varDsc->lvRegister && !varTypeIsFloating(varDsc->TypeGet()))
+ {
+ ++regCount;
+
+ if (varTypeIsLong(varDsc->TypeGet()))
+ {
+ // For enregistered LONG/ULONG, the lower half should always be in a register.
+ noway_assert(varDsc->lvRegNum != REG_STK);
+
+ // If the LONG/ULONG is NOT paritally enregistered, then the higher half should be in a register as
+ // well.
+ if (varDsc->lvOtherReg != REG_STK)
+ {
+ ++regCount;
+ }
+ }
+ }
+ }
+
+ return regCount;
+}
+
+/*****************************************************************************/
+/*****************************************************************************/
+#if CPU_HAS_FP_SUPPORT
+/*****************************************************************************
+ *
+ * Generate code for a floating-point operation.
+ */
+
+void CodeGen::genCodeForTreeFlt(GenTreePtr tree,
+ regMaskTP needReg, /* = RBM_ALLFLOAT */
+ regMaskTP bestReg) /* = RBM_NONE */
+{
+ genCodeForTreeFloat(tree, needReg, bestReg);
+
+ if (tree->OperGet() == GT_RETURN)
+ {
+ // Make sure to get ALL THE EPILOG CODE
+
+ // TODO: this should be done AFTER we called exit mon so that
+ // we are sure that we don't have to keep 'this' alive
+
+ if (compiler->info.compCallUnmanaged && (compiler->compCurBB == compiler->genReturnBB))
+ {
+ /* either it's an "empty" statement or the return statement
+ of a synchronized method
+ */
+
+ genPInvokeMethodEpilog();
+ }
+
+#ifdef PROFILING_SUPPORTED
+ // The profiling hook does not trash registers, so it's safe to call after we emit the code for
+ // the GT_RETURN tree.
+
+ if (compiler->compCurBB == compiler->genReturnBB)
+ {
+ genProfilingLeaveCallback();
+ }
+#endif
+ }
+}
+
+/*****************************************************************************/
+#endif // CPU_HAS_FP_SUPPORT
+
+/*****************************************************************************
+ *
+ * Generate a table switch - the switch value (0-based) is in register 'reg'.
+ */
+
+void CodeGen::genTableSwitch(regNumber reg, unsigned jumpCnt, BasicBlock** jumpTab)
+{
+ unsigned jmpTabBase;
+
+ if (jumpCnt == 1)
+ {
+ // In debug code, we don't optimize away the trivial switch statements. So we can get here with a
+ // BBJ_SWITCH with only a default case. Therefore, don't generate the switch table.
+ noway_assert(compiler->opts.MinOpts() || compiler->opts.compDbgCode);
+ inst_JMP(EJ_jmp, jumpTab[0]);
+ return;
+ }
+
+ noway_assert(jumpCnt >= 2);
+
+ /* Is the number of cases right for a test and jump switch? */
+
+ const bool fFirstCaseFollows = (compiler->compCurBB->bbNext == jumpTab[0]);
+ const bool fDefaultFollows = (compiler->compCurBB->bbNext == jumpTab[jumpCnt - 1]);
+ const bool fHaveScratchReg = ((regSet.rsRegMaskFree() & genRegMask(reg)) != 0);
+
+ unsigned minSwitchTabJumpCnt = 2; // table is better than just 2 cmp/jcc
+
+ // This means really just a single cmp/jcc (aka a simple if/else)
+ if (fFirstCaseFollows || fDefaultFollows)
+ minSwitchTabJumpCnt++;
+
+#ifdef _TARGET_ARM_
+ // On the ARM for small switch tables we will
+ // generate a sequence of compare and branch instructions
+ // because the code to load the base of the switch
+ // table is huge and hideous due to the relocation... :(
+ //
+ minSwitchTabJumpCnt++;
+ if (fHaveScratchReg)
+ minSwitchTabJumpCnt++;
+
+#endif // _TARGET_ARM_
+
+ if (jumpCnt < minSwitchTabJumpCnt)
+ {
+ /* Does the first case label follow? */
+ emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
+
+ if (fFirstCaseFollows)
+ {
+ /* Check for the default case */
+ inst_RV_IV(INS_cmp, reg, jumpCnt - 1, EA_4BYTE);
+ emitJumpKind jmpGEU = genJumpKindForOper(GT_GE, CK_UNSIGNED);
+ inst_JMP(jmpGEU, jumpTab[jumpCnt - 1]);
+
+ /* No need to jump to the first case */
+
+ jumpCnt -= 2;
+ jumpTab += 1;
+
+ /* Generate a series of "dec reg; jmp label" */
+
+ // Make sure that we can trash the register so
+ // that we can generate a series of compares and jumps
+ //
+ if ((jumpCnt > 0) && !fHaveScratchReg)
+ {
+ regNumber tmpReg = regSet.rsGrabReg(RBM_ALLINT);
+ inst_RV_RV(INS_mov, tmpReg, reg);
+ regTracker.rsTrackRegTrash(tmpReg);
+ reg = tmpReg;
+ }
+
+ while (jumpCnt > 0)
+ {
+ inst_RV_IV(INS_sub, reg, 1, EA_4BYTE, INS_FLAGS_SET);
+ inst_JMP(jmpEqual, *jumpTab++);
+ jumpCnt--;
+ }
+ }
+ else
+ {
+ /* Check for case0 first */
+ instGen_Compare_Reg_To_Zero(EA_4BYTE, reg); // set flags
+ inst_JMP(jmpEqual, *jumpTab);
+
+ /* No need to jump to the first case or the default */
+
+ jumpCnt -= 2;
+ jumpTab += 1;
+
+ /* Generate a series of "dec reg; jmp label" */
+
+ // Make sure that we can trash the register so
+ // that we can generate a series of compares and jumps
+ //
+ if ((jumpCnt > 0) && !fHaveScratchReg)
+ {
+ regNumber tmpReg = regSet.rsGrabReg(RBM_ALLINT);
+ inst_RV_RV(INS_mov, tmpReg, reg);
+ regTracker.rsTrackRegTrash(tmpReg);
+ reg = tmpReg;
+ }
+
+ while (jumpCnt > 0)
+ {
+ inst_RV_IV(INS_sub, reg, 1, EA_4BYTE, INS_FLAGS_SET);
+ inst_JMP(jmpEqual, *jumpTab++);
+ jumpCnt--;
+ }
+
+ if (!fDefaultFollows)
+ {
+ inst_JMP(EJ_jmp, *jumpTab);
+ }
+ }
+
+ if ((fFirstCaseFollows || fDefaultFollows) &&
+ compiler->fgInDifferentRegions(compiler->compCurBB, compiler->compCurBB->bbNext))
+ {
+ inst_JMP(EJ_jmp, compiler->compCurBB->bbNext);
+ }
+
+ return;
+ }
+
+ /* First take care of the default case */
+
+ inst_RV_IV(INS_cmp, reg, jumpCnt - 1, EA_4BYTE);
+ emitJumpKind jmpGEU = genJumpKindForOper(GT_GE, CK_UNSIGNED);
+ inst_JMP(jmpGEU, jumpTab[jumpCnt - 1]);
+
+ /* Generate the jump table contents */
+
+ jmpTabBase = getEmitter()->emitBBTableDataGenBeg(jumpCnt - 1, false);
+
+#ifdef DEBUG
+ if (compiler->opts.dspCode)
+ printf("\n J_M%03u_DS%02u LABEL DWORD\n", Compiler::s_compMethodsCount, jmpTabBase);
+#endif
+
+ for (unsigned index = 0; index < jumpCnt - 1; index++)
+ {
+ BasicBlock* target = jumpTab[index];
+
+ noway_assert(target->bbFlags & BBF_JMP_TARGET);
+
+#ifdef DEBUG
+ if (compiler->opts.dspCode)
+ printf(" DD L_M%03u_BB%02u\n", Compiler::s_compMethodsCount, target->bbNum);
+#endif
+
+ getEmitter()->emitDataGenData(index, target);
+ }
+
+ getEmitter()->emitDataGenEnd();
+
+#ifdef _TARGET_ARM_
+ // We need to load the address of the table into a register.
+ // The data section might get placed a long distance away, so we
+ // can't safely do a PC-relative ADR. :(
+ // Pick any register except the index register.
+ //
+ regNumber regTabBase = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(reg));
+ getEmitter()->emitIns_R_D(INS_movw, EA_HANDLE_CNS_RELOC, jmpTabBase, regTabBase);
+ getEmitter()->emitIns_R_D(INS_movt, EA_HANDLE_CNS_RELOC, jmpTabBase, regTabBase);
+ regTracker.rsTrackRegTrash(regTabBase);
+
+ // LDR PC, [regTableBase + reg * 4] (encoded as LDR PC, [regTableBase, reg, LSL 2]
+ getEmitter()->emitIns_R_ARX(INS_ldr, EA_PTRSIZE, REG_PC, regTabBase, reg, TARGET_POINTER_SIZE, 0);
+
+#else // !_TARGET_ARM_
+
+ getEmitter()->emitIns_IJ(EA_4BYTE_DSP_RELOC, reg, jmpTabBase);
+
+#endif
+}
+
+/*****************************************************************************
+ *
+ * Generate code for a switch statement.
+ */
+
+void CodeGen::genCodeForSwitch(GenTreePtr tree)
+{
+ unsigned jumpCnt;
+ BasicBlock** jumpTab;
+
+ GenTreePtr oper;
+ regNumber reg;
+
+ noway_assert(tree->gtOper == GT_SWITCH);
+ oper = tree->gtOp.gtOp1;
+ noway_assert(genActualTypeIsIntOrI(oper->gtType));
+
+ /* Get hold of the jump table */
+
+ noway_assert(compiler->compCurBB->bbJumpKind == BBJ_SWITCH);
+
+ jumpCnt = compiler->compCurBB->bbJumpSwt->bbsCount;
+ jumpTab = compiler->compCurBB->bbJumpSwt->bbsDstTab;
+
+ /* Compute the switch value into some register */
+
+ genCodeForTree(oper, 0);
+
+ /* Get hold of the register the value is in */
+
+ noway_assert(oper->gtFlags & GTF_REG_VAL);
+ reg = oper->gtRegNum;
+
+#if FEATURE_STACK_FP_X87
+ if (!compCurFPState.IsEmpty())
+ {
+ return genTableSwitchStackFP(reg, jumpCnt, jumpTab);
+ }
+ else
+#endif // FEATURE_STACK_FP_X87
+ {
+ return genTableSwitch(reg, jumpCnt, jumpTab);
+ }
+}
+
+/*****************************************************************************/
+/*****************************************************************************
+ * Emit a call to a helper function.
+ */
+
+// inline
+void CodeGen::genEmitHelperCall(unsigned helper, int argSize, emitAttr retSize)
+{
+ // Can we call the helper function directly
+
+ void *addr = NULL, **pAddr = NULL;
+
+#if defined(_TARGET_ARM_) && defined(DEBUG) && defined(PROFILING_SUPPORTED)
+ // Don't ask VM if it hasn't requested ELT hooks
+ if (!compiler->compProfilerHookNeeded && compiler->opts.compJitELTHookEnabled &&
+ (helper == CORINFO_HELP_PROF_FCN_ENTER || helper == CORINFO_HELP_PROF_FCN_LEAVE ||
+ helper == CORINFO_HELP_PROF_FCN_TAILCALL))
+ {
+ addr = compiler->compProfilerMethHnd;
+ }
+ else
+#endif
+ {
+ addr = compiler->compGetHelperFtn((CorInfoHelpFunc)helper, (void**)&pAddr);
+ }
+
+#ifdef _TARGET_ARM_
+ if (!addr || !arm_Valid_Imm_For_BL((ssize_t)addr))
+ {
+ // Load the address into a register and call through a register
+ regNumber indCallReg =
+ regSet.rsGrabReg(RBM_ALLINT); // Grab an available register to use for the CALL indirection
+ if (addr)
+ {
+ instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indCallReg, (ssize_t)addr);
+ }
+ else
+ {
+ getEmitter()->emitIns_R_AI(INS_ldr, EA_PTR_DSP_RELOC, indCallReg, (ssize_t)pAddr);
+ regTracker.rsTrackRegTrash(indCallReg);
+ }
+
+ getEmitter()->emitIns_Call(emitter::EC_INDIR_R, compiler->eeFindHelper(helper),
+ INDEBUG_LDISASM_COMMA(nullptr) NULL, // addr
+ argSize, retSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
+ gcInfo.gcRegByrefSetCur,
+ BAD_IL_OFFSET, // ilOffset
+ indCallReg, // ireg
+ REG_NA, 0, 0, // xreg, xmul, disp
+ false, // isJump
+ emitter::emitNoGChelper(helper),
+ (CorInfoHelpFunc)helper == CORINFO_HELP_PROF_FCN_LEAVE);
+ }
+ else
+ {
+ getEmitter()->emitIns_Call(emitter::EC_FUNC_TOKEN, compiler->eeFindHelper(helper),
+ INDEBUG_LDISASM_COMMA(nullptr) addr, argSize, retSize, gcInfo.gcVarPtrSetCur,
+ gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur, BAD_IL_OFFSET, REG_NA, REG_NA, 0,
+ 0, /* ilOffset, ireg, xreg, xmul, disp */
+ false, /* isJump */
+ emitter::emitNoGChelper(helper),
+ (CorInfoHelpFunc)helper == CORINFO_HELP_PROF_FCN_LEAVE);
+ }
+#else
+
+ {
+ emitter::EmitCallType callType = emitter::EC_FUNC_TOKEN;
+
+ if (!addr)
+ {
+ callType = emitter::EC_FUNC_TOKEN_INDIR;
+ addr = pAddr;
+ }
+
+ getEmitter()->emitIns_Call(callType, compiler->eeFindHelper(helper), INDEBUG_LDISASM_COMMA(nullptr) addr,
+ argSize, retSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
+ gcInfo.gcRegByrefSetCur, BAD_IL_OFFSET, REG_NA, REG_NA, 0,
+ 0, /* ilOffset, ireg, xreg, xmul, disp */
+ false, /* isJump */
+ emitter::emitNoGChelper(helper));
+ }
+#endif
+
+ regTracker.rsTrashRegSet(RBM_CALLEE_TRASH);
+ regTracker.rsTrashRegsForGCInterruptability();
+}
+
+/*****************************************************************************
+ *
+ * Push the given registers.
+ * This function does not check if the register is marked as used, etc.
+ */
+
+regMaskTP CodeGen::genPushRegs(regMaskTP regs, regMaskTP* byrefRegs, regMaskTP* noRefRegs)
+{
+ *byrefRegs = RBM_NONE;
+ *noRefRegs = RBM_NONE;
+
+ // noway_assert((regs & regSet.rsRegMaskFree()) == regs); // Don't care. Caller is responsible for all this
+
+ if (regs == RBM_NONE)
+ return RBM_NONE;
+
+#if FEATURE_FIXED_OUT_ARGS
+
+ NYI("Don't call genPushRegs with real regs!");
+ return RBM_NONE;
+
+#else // FEATURE_FIXED_OUT_ARGS
+
+ noway_assert(genTypeStSz(TYP_REF) == genTypeStSz(TYP_I_IMPL));
+ noway_assert(genTypeStSz(TYP_BYREF) == genTypeStSz(TYP_I_IMPL));
+
+ regMaskTP pushedRegs = regs;
+
+ for (regNumber reg = REG_INT_FIRST; regs != RBM_NONE; reg = REG_NEXT(reg))
+ {
+ regMaskTP regBit = regMaskTP(1) << reg;
+
+ if ((regBit & regs) == RBM_NONE)
+ continue;
+
+ var_types type;
+ if (regBit & gcInfo.gcRegGCrefSetCur)
+ {
+ type = TYP_REF;
+ }
+ else if (regBit & gcInfo.gcRegByrefSetCur)
+ {
+ *byrefRegs |= regBit;
+ type = TYP_BYREF;
+ }
+ else if (noRefRegs != NULL)
+ {
+ *noRefRegs |= regBit;
+ type = TYP_I_IMPL;
+ }
+ else
+ {
+ continue;
+ }
+
+ inst_RV(INS_push, reg, type);
+
+ genSinglePush();
+ gcInfo.gcMarkRegSetNpt(regBit);
+
+ regs &= ~regBit;
+ }
+
+ return pushedRegs;
+
+#endif // FEATURE_FIXED_OUT_ARGS
+}
+
+/*****************************************************************************
+ *
+ * Pop the registers pushed by genPushRegs()
+ */
+
+void CodeGen::genPopRegs(regMaskTP regs, regMaskTP byrefRegs, regMaskTP noRefRegs)
+{
+ if (regs == RBM_NONE)
+ return;
+
+#if FEATURE_FIXED_OUT_ARGS
+
+ NYI("Don't call genPopRegs with real regs!");
+
+#else // FEATURE_FIXED_OUT_ARGS
+
+ noway_assert((regs & byrefRegs) == byrefRegs);
+ noway_assert((regs & noRefRegs) == noRefRegs);
+ // noway_assert((regs & regSet.rsRegMaskFree()) == regs); // Don't care. Caller is responsible for all this
+ noway_assert((regs & (gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur)) == RBM_NONE);
+
+ noway_assert(genTypeStSz(TYP_REF) == genTypeStSz(TYP_INT));
+ noway_assert(genTypeStSz(TYP_BYREF) == genTypeStSz(TYP_INT));
+
+ // Walk the registers in the reverse order as genPushRegs()
+ for (regNumber reg = REG_INT_LAST; regs != RBM_NONE; reg = REG_PREV(reg))
+ {
+ regMaskTP regBit = regMaskTP(1) << reg;
+
+ if ((regBit & regs) == RBM_NONE)
+ continue;
+
+ var_types type;
+ if (regBit & byrefRegs)
+ {
+ type = TYP_BYREF;
+ }
+ else if (regBit & noRefRegs)
+ {
+ type = TYP_INT;
+ }
+ else
+ {
+ type = TYP_REF;
+ }
+
+ inst_RV(INS_pop, reg, type);
+ genSinglePop();
+
+ if (type != TYP_INT)
+ gcInfo.gcMarkRegPtrVal(reg, type);
+
+ regs &= ~regBit;
+ }
+
+#endif // FEATURE_FIXED_OUT_ARGS
+}
+
+/*****************************************************************************
+ *
+ * Push the given argument list, right to left; returns the total amount of
+ * stuff pushed.
+ */
+
+#if !FEATURE_FIXED_OUT_ARGS
+#ifdef _PREFAST_
+#pragma warning(push)
+#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
+#endif
+size_t CodeGen::genPushArgList(GenTreePtr call)
+{
+ GenTreeArgList* regArgs = call->gtCall.gtCallLateArgs;
+ size_t size = 0;
+ regMaskTP addrReg;
+
+ GenTreeArgList* args;
+ // Create a local, artificial GenTreeArgList that includes the gtCallObjp, if that exists, as first argument,
+ // so we can iterate over this argument list more uniformly.
+ // Need to provide a temporary non-null first argument here: if we use this, we'll replace it.
+ GenTreeArgList firstForObjp(/*temp dummy arg*/ call, call->gtCall.gtCallArgs);
+ if (call->gtCall.gtCallObjp == NULL)
+ {
+ args = call->gtCall.gtCallArgs;
+ }
+ else
+ {
+ firstForObjp.Current() = call->gtCall.gtCallObjp;
+ args = &firstForObjp;
+ }
+
+ GenTreePtr curr;
+ var_types type;
+ size_t opsz;
+
+ for (; args; args = args->Rest())
+ {
+ addrReg = DUMMY_INIT(RBM_CORRUPT); // to detect uninitialized use
+
+ /* Get hold of the next argument value */
+ curr = args->Current();
+
+ if (curr->IsArgPlaceHolderNode())
+ {
+ assert(curr->gtFlags & GTF_LATE_ARG);
+
+ addrReg = 0;
+ continue;
+ }
+
+ // If we have a comma expression, eval the non-last, then deal with the last.
+ if (!(curr->gtFlags & GTF_LATE_ARG))
+ curr = genCodeForCommaTree(curr);
+
+ /* See what type of a value we're passing */
+ type = curr->TypeGet();
+
+ opsz = genTypeSize(genActualType(type));
+
+ switch (type)
+ {
+ case TYP_BOOL:
+ case TYP_BYTE:
+ case TYP_SHORT:
+ case TYP_CHAR:
+ case TYP_UBYTE:
+
+ /* Don't want to push a small value, make it a full word */
+
+ genCodeForTree(curr, 0);
+
+ __fallthrough; // now the value should be in a register ...
+
+ case TYP_INT:
+ case TYP_REF:
+ case TYP_BYREF:
+#if !CPU_HAS_FP_SUPPORT
+ case TYP_FLOAT:
+#endif
+
+ if (curr->gtFlags & GTF_LATE_ARG)
+ {
+ assert(curr->gtOper == GT_ASG);
+ /* one more argument will be passed in a register */
+ noway_assert(intRegState.rsCurRegArgNum < MAX_REG_ARG);
+
+ /* arg is passed in the register, nothing on the stack */
+
+ opsz = 0;
+ }
+
+ /* Is this value a handle? */
+
+ if (curr->gtOper == GT_CNS_INT && curr->IsIconHandle())
+ {
+ /* Emit a fixup for the push instruction */
+
+ inst_IV_handle(INS_push, curr->gtIntCon.gtIconVal);
+ genSinglePush();
+
+ addrReg = 0;
+ break;
+ }
+
+ /* Is the value a constant? */
+
+ if (curr->gtOper == GT_CNS_INT)
+ {
+
+#if REDUNDANT_LOAD
+ regNumber reg = regTracker.rsIconIsInReg(curr->gtIntCon.gtIconVal);
+
+ if (reg != REG_NA)
+ {
+ inst_RV(INS_push, reg, TYP_INT);
+ }
+ else
+#endif
+ {
+ inst_IV(INS_push, curr->gtIntCon.gtIconVal);
+ }
+
+ /* If the type is TYP_REF, then this must be a "null". So we can
+ treat it as a TYP_INT as we don't need to report it as a GC ptr */
+
+ noway_assert(curr->TypeGet() == TYP_INT ||
+ (varTypeIsGC(curr->TypeGet()) && curr->gtIntCon.gtIconVal == 0));
+
+ genSinglePush();
+
+ addrReg = 0;
+ break;
+ }
+
+ if (curr->gtFlags & GTF_LATE_ARG)
+ {
+ /* This must be a register arg temp assignment */
+
+ noway_assert(curr->gtOper == GT_ASG);
+
+ /* Evaluate it to the temp */
+
+ genCodeForTree(curr, 0);
+
+ /* Increment the current argument register counter */
+
+ intRegState.rsCurRegArgNum++;
+
+ addrReg = 0;
+ }
+ else
+ {
+ /* This is a 32-bit integer non-register argument */
+
+ addrReg = genMakeRvalueAddressable(curr, 0, RegSet::KEEP_REG, false);
+ inst_TT(INS_push, curr);
+ genSinglePush();
+ genDoneAddressable(curr, addrReg, RegSet::KEEP_REG);
+ }
+ break;
+
+ case TYP_LONG:
+#if !CPU_HAS_FP_SUPPORT
+ case TYP_DOUBLE:
+#endif
+
+ /* Is the value a constant? */
+
+ if (curr->gtOper == GT_CNS_LNG)
+ {
+ inst_IV(INS_push, (int)(curr->gtLngCon.gtLconVal >> 32));
+ genSinglePush();
+ inst_IV(INS_push, (int)(curr->gtLngCon.gtLconVal));
+ genSinglePush();
+
+ addrReg = 0;
+ }
+ else
+ {
+ addrReg = genMakeAddressable(curr, 0, RegSet::FREE_REG);
+
+ inst_TT(INS_push, curr, sizeof(int));
+ genSinglePush();
+ inst_TT(INS_push, curr);
+ genSinglePush();
+ }
+ break;
+
+#if CPU_HAS_FP_SUPPORT
+ case TYP_FLOAT:
+ case TYP_DOUBLE:
+#endif
+#if FEATURE_STACK_FP_X87
+ addrReg = genPushArgumentStackFP(curr);
+#else
+ NYI("FP codegen");
+ addrReg = 0;
+#endif
+ break;
+
+ case TYP_VOID:
+
+ /* Is this a nothing node, deferred register argument? */
+
+ if (curr->gtFlags & GTF_LATE_ARG)
+ {
+ GenTree* arg = curr;
+ if (arg->gtOper == GT_COMMA)
+ {
+ while (arg->gtOper == GT_COMMA)
+ {
+ GenTreePtr op1 = arg->gtOp.gtOp1;
+ genEvalSideEffects(op1);
+ genUpdateLife(op1);
+ arg = arg->gtOp.gtOp2;
+ }
+ if (!arg->IsNothingNode())
+ {
+ genEvalSideEffects(arg);
+ genUpdateLife(arg);
+ }
+ }
+
+ /* increment the register count and continue with the next argument */
+
+ intRegState.rsCurRegArgNum++;
+
+ noway_assert(opsz == 0);
+
+ addrReg = 0;
+ break;
+ }
+
+ __fallthrough;
+
+ case TYP_STRUCT:
+ {
+ GenTree* arg = curr;
+ while (arg->gtOper == GT_COMMA)
+ {
+ GenTreePtr op1 = arg->gtOp.gtOp1;
+ genEvalSideEffects(op1);
+ genUpdateLife(op1);
+ arg = arg->gtOp.gtOp2;
+ }
+
+ noway_assert(arg->gtOper == GT_OBJ || arg->gtOper == GT_MKREFANY || arg->gtOper == GT_IND);
+ noway_assert((arg->gtFlags & GTF_REVERSE_OPS) == 0);
+ noway_assert(addrReg == DUMMY_INIT(RBM_CORRUPT));
+
+ if (arg->gtOper == GT_MKREFANY)
+ {
+ GenTreePtr op1 = arg->gtOp.gtOp1;
+ GenTreePtr op2 = arg->gtOp.gtOp2;
+
+ addrReg = genMakeAddressable(op1, RBM_NONE, RegSet::KEEP_REG);
+
+ /* Is this value a handle? */
+ if (op2->gtOper == GT_CNS_INT && op2->IsIconHandle())
+ {
+ /* Emit a fixup for the push instruction */
+
+ inst_IV_handle(INS_push, op2->gtIntCon.gtIconVal);
+ genSinglePush();
+ }
+ else
+ {
+ regMaskTP addrReg2 = genMakeRvalueAddressable(op2, 0, RegSet::KEEP_REG, false);
+ inst_TT(INS_push, op2);
+ genSinglePush();
+ genDoneAddressable(op2, addrReg2, RegSet::KEEP_REG);
+ }
+ addrReg = genKeepAddressable(op1, addrReg);
+ inst_TT(INS_push, op1);
+ genSinglePush();
+ genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
+
+ opsz = 2 * TARGET_POINTER_SIZE;
+ }
+ else
+ {
+ noway_assert(arg->gtOper == GT_OBJ);
+
+ if (arg->gtObj.gtOp1->gtOper == GT_ADDR && arg->gtObj.gtOp1->gtOp.gtOp1->gtOper == GT_LCL_VAR)
+ {
+ GenTreePtr structLocalTree = arg->gtObj.gtOp1->gtOp.gtOp1;
+ unsigned structLclNum = structLocalTree->gtLclVarCommon.gtLclNum;
+ LclVarDsc* varDsc = &compiler->lvaTable[structLclNum];
+
+ // As much as we would like this to be a noway_assert, we can't because
+ // there are some weird casts out there, and backwards compatiblity
+ // dictates we do *NOT* start rejecting them now. lvaGetPromotion and
+ // lvPromoted in general currently do not require the local to be
+ // TYP_STRUCT, so this assert is really more about how we wish the world
+ // was then some JIT invariant.
+ assert((structLocalTree->TypeGet() == TYP_STRUCT) || compiler->compUnsafeCastUsed);
+
+ Compiler::lvaPromotionType promotionType = compiler->lvaGetPromotionType(varDsc);
+
+ if (varDsc->lvPromoted &&
+ promotionType ==
+ Compiler::PROMOTION_TYPE_INDEPENDENT) // Otherwise it is guaranteed to live on stack.
+ {
+ assert(!varDsc->lvAddrExposed); // Compiler::PROMOTION_TYPE_INDEPENDENT ==> not exposed.
+
+ addrReg = 0;
+
+ // Get the number of BYTES to copy to the stack
+ opsz = roundUp(compiler->info.compCompHnd->getClassSize(arg->gtObj.gtClass), sizeof(void*));
+ size_t bytesToBeCopied = opsz;
+
+ // postponedFields is true if we have any postponed fields
+ // Any field that does not start on a 4-byte boundary is a postponed field
+ // Such a field is required to be a short or a byte
+ //
+ // postponedRegKind records the kind of scratch register we will
+ // need to process the postponed fields
+ // RBM_NONE means that we don't need a register
+ //
+ // expectedAlignedOffset records the aligned offset that
+ // has to exist for a push to cover the postponed fields.
+ // Since all promoted structs have the tightly packed property
+ // we are guaranteed that we will have such a push
+ //
+ bool postponedFields = false;
+ regMaskTP postponedRegKind = RBM_NONE;
+ size_t expectedAlignedOffset = UINT_MAX;
+
+ VARSET_TP* deadVarBits = NULL;
+ compiler->GetPromotedStructDeathVars()->Lookup(structLocalTree, &deadVarBits);
+
+ // Reverse loop, starts pushing from the end of the struct (i.e. the highest field offset)
+ //
+ for (int varNum = varDsc->lvFieldLclStart + varDsc->lvFieldCnt - 1;
+ varNum >= (int)varDsc->lvFieldLclStart; varNum--)
+ {
+ LclVarDsc* fieldVarDsc = compiler->lvaTable + varNum;
+#ifdef DEBUG
+ if (fieldVarDsc->lvExactSize == 2 * sizeof(unsigned))
+ {
+ noway_assert(fieldVarDsc->lvFldOffset % (2 * sizeof(unsigned)) == 0);
+ noway_assert(fieldVarDsc->lvFldOffset + (2 * sizeof(unsigned)) == bytesToBeCopied);
+ }
+#endif
+ // Whenever we see a stack-aligned fieldVarDsc then we use 4-byte push instruction(s)
+ // For packed structs we will go back and store the unaligned bytes and shorts
+ // in the next loop
+ //
+ if (fieldVarDsc->lvStackAligned())
+ {
+ if (fieldVarDsc->lvExactSize != 2 * sizeof(unsigned) &&
+ fieldVarDsc->lvFldOffset + sizeof(void*) != bytesToBeCopied)
+ {
+ // Might need 4-bytes paddings for fields other than LONG and DOUBLE.
+ // Just push some junk (i.e EAX) on the stack.
+ inst_RV(INS_push, REG_EAX, TYP_INT);
+ genSinglePush();
+
+ bytesToBeCopied -= sizeof(void*);
+ }
+
+ // If we have an expectedAlignedOffset make sure that this push instruction
+ // is what we expect to cover the postponedFields
+ //
+ if (expectedAlignedOffset != UINT_MAX)
+ {
+ // This push must be for a small field
+ noway_assert(fieldVarDsc->lvExactSize < 4);
+ // The fldOffset for this push should be equal to the expectedAlignedOffset
+ noway_assert(fieldVarDsc->lvFldOffset == expectedAlignedOffset);
+ expectedAlignedOffset = UINT_MAX;
+ }
+
+ // Push the "upper half" of LONG var first
+
+ if (isRegPairType(fieldVarDsc->lvType))
+ {
+ if (fieldVarDsc->lvOtherReg != REG_STK)
+ {
+ inst_RV(INS_push, fieldVarDsc->lvOtherReg, TYP_INT);
+ genSinglePush();
+
+ // Prepare the set of vars to be cleared from gcref/gcbyref set
+ // in case they become dead after genUpdateLife.
+ // genDoneAddressable() will remove dead gc vars by calling
+ // gcInfo.gcMarkRegSetNpt.
+ // Although it is not addrReg, we just borrow the name here.
+ addrReg |= genRegMask(fieldVarDsc->lvOtherReg);
+ }
+ else
+ {
+ getEmitter()->emitIns_S(INS_push, EA_4BYTE, varNum, sizeof(void*));
+ genSinglePush();
+ }
+
+ bytesToBeCopied -= sizeof(void*);
+ }
+
+ // Push the "upper half" of DOUBLE var if it is not enregistered.
+
+ if (fieldVarDsc->lvType == TYP_DOUBLE)
+ {
+ if (!fieldVarDsc->lvRegister)
+ {
+ getEmitter()->emitIns_S(INS_push, EA_4BYTE, varNum, sizeof(void*));
+ genSinglePush();
+ }
+
+ bytesToBeCopied -= sizeof(void*);
+ }
+
+ //
+ // Push the field local.
+ //
+
+ if (fieldVarDsc->lvRegister)
+ {
+ if (!varTypeIsFloating(genActualType(fieldVarDsc->TypeGet())))
+ {
+ inst_RV(INS_push, fieldVarDsc->lvRegNum,
+ genActualType(fieldVarDsc->TypeGet()));
+ genSinglePush();
+
+ // Prepare the set of vars to be cleared from gcref/gcbyref set
+ // in case they become dead after genUpdateLife.
+ // genDoneAddressable() will remove dead gc vars by calling
+ // gcInfo.gcMarkRegSetNpt.
+ // Although it is not addrReg, we just borrow the name here.
+ addrReg |= genRegMask(fieldVarDsc->lvRegNum);
+ }
+ else
+ {
+ // Must be TYP_FLOAT or TYP_DOUBLE
+ noway_assert(fieldVarDsc->lvRegNum != REG_FPNONE);
+
+ noway_assert(fieldVarDsc->lvExactSize == sizeof(unsigned) ||
+ fieldVarDsc->lvExactSize == 2 * sizeof(unsigned));
+
+ inst_RV_IV(INS_sub, REG_SPBASE, fieldVarDsc->lvExactSize, EA_PTRSIZE);
+
+ genSinglePush();
+ if (fieldVarDsc->lvExactSize == 2 * sizeof(unsigned))
+ {
+ genSinglePush();
+ }
+
+#if FEATURE_STACK_FP_X87
+ GenTree* fieldTree = new (compiler, GT_REG_VAR)
+ GenTreeLclVar(fieldVarDsc->lvType, varNum, BAD_IL_OFFSET);
+ fieldTree->gtOper = GT_REG_VAR;
+ fieldTree->gtRegNum = fieldVarDsc->lvRegNum;
+ fieldTree->gtRegVar.gtRegNum = fieldVarDsc->lvRegNum;
+ if ((arg->gtFlags & GTF_VAR_DEATH) != 0)
+ {
+ if (fieldVarDsc->lvTracked &&
+ (deadVarBits == NULL ||
+ VarSetOps::IsMember(compiler, *deadVarBits,
+ fieldVarDsc->lvVarIndex)))
+ {
+ fieldTree->gtFlags |= GTF_VAR_DEATH;
+ }
+ }
+ genCodeForTreeStackFP_Leaf(fieldTree);
+
+ // Take reg to top of stack
+
+ FlatFPX87_MoveToTOS(&compCurFPState, fieldTree->gtRegNum);
+
+ // Pop it off to stack
+ compCurFPState.Pop();
+
+ getEmitter()->emitIns_AR_R(INS_fstp, EA_ATTR(fieldVarDsc->lvExactSize),
+ REG_NA, REG_SPBASE, 0);
+#else
+ NYI_FLAT_FP_X87("FP codegen");
+#endif
+ }
+ }
+ else
+ {
+ getEmitter()->emitIns_S(INS_push,
+ (fieldVarDsc->TypeGet() == TYP_REF) ? EA_GCREF
+ : EA_4BYTE,
+ varNum, 0);
+ genSinglePush();
+ }
+
+ bytesToBeCopied -= sizeof(void*);
+ }
+ else // not stack aligned
+ {
+ noway_assert(fieldVarDsc->lvExactSize < 4);
+
+ // We will need to use a store byte or store word
+ // to set this unaligned location
+ postponedFields = true;
+
+ if (expectedAlignedOffset != UINT_MAX)
+ {
+ // This should never change until it is set back to UINT_MAX by an aligned
+ // offset
+ noway_assert(expectedAlignedOffset ==
+ roundUp(fieldVarDsc->lvFldOffset, sizeof(void*)) - sizeof(void*));
+ }
+
+ expectedAlignedOffset =
+ roundUp(fieldVarDsc->lvFldOffset, sizeof(void*)) - sizeof(void*);
+
+ noway_assert(expectedAlignedOffset < bytesToBeCopied);
+
+ if (fieldVarDsc->lvRegister)
+ {
+ // Do we need to use a byte-able register?
+ if (fieldVarDsc->lvExactSize == 1)
+ {
+ // Did we enregister fieldVarDsc2 in a non byte-able register?
+ if ((genRegMask(fieldVarDsc->lvRegNum) & RBM_BYTE_REGS) == 0)
+ {
+ // then we will need to grab a byte-able register
+ postponedRegKind = RBM_BYTE_REGS;
+ }
+ }
+ }
+ else // not enregistered
+ {
+ if (fieldVarDsc->lvExactSize == 1)
+ {
+ // We will need to grab a byte-able register
+ postponedRegKind = RBM_BYTE_REGS;
+ }
+ else
+ {
+ // We will need to grab any scratch register
+ if (postponedRegKind != RBM_BYTE_REGS)
+ postponedRegKind = RBM_ALLINT;
+ }
+ }
+ }
+ }
+
+ // Now we've pushed all of the aligned fields.
+ //
+ // We should have pushed bytes equal to the entire struct
+ noway_assert(bytesToBeCopied == 0);
+
+ // We should have seen a push that covers every postponed field
+ noway_assert(expectedAlignedOffset == UINT_MAX);
+
+ // Did we have any postponed fields?
+ if (postponedFields)
+ {
+ regNumber regNum = REG_STK; // means no register
+
+ // If we needed a scratch register then grab it here
+
+ if (postponedRegKind != RBM_NONE)
+ regNum = regSet.rsGrabReg(postponedRegKind);
+
+ // Forward loop, starts from the lowest field offset
+ //
+ for (unsigned varNum = varDsc->lvFieldLclStart;
+ varNum < varDsc->lvFieldLclStart + varDsc->lvFieldCnt; varNum++)
+ {
+ LclVarDsc* fieldVarDsc = compiler->lvaTable + varNum;
+
+ // All stack aligned fields have already been pushed
+ if (fieldVarDsc->lvStackAligned())
+ continue;
+
+ // We have a postponed field
+
+ // It must be a byte or a short
+ noway_assert(fieldVarDsc->lvExactSize < 4);
+
+ // Is the field enregistered?
+ if (fieldVarDsc->lvRegister)
+ {
+ // Frequently we can just use that register
+ regNumber tmpRegNum = fieldVarDsc->lvRegNum;
+
+ // Do we need to use a byte-able register?
+ if (fieldVarDsc->lvExactSize == 1)
+ {
+ // Did we enregister the field in a non byte-able register?
+ if ((genRegMask(tmpRegNum) & RBM_BYTE_REGS) == 0)
+ {
+ // then we will need to use the byte-able register 'regNum'
+ noway_assert((genRegMask(regNum) & RBM_BYTE_REGS) != 0);
+
+ // Copy the register that contains fieldVarDsc into 'regNum'
+ getEmitter()->emitIns_R_R(INS_mov, EA_4BYTE, regNum,
+ fieldVarDsc->lvRegNum);
+ regTracker.rsTrackRegLclVar(regNum, varNum);
+
+ // tmpRegNum is the register that we will extract the byte value from
+ tmpRegNum = regNum;
+ }
+ noway_assert((genRegMask(tmpRegNum) & RBM_BYTE_REGS) != 0);
+ }
+
+ getEmitter()->emitIns_AR_R(ins_Store(fieldVarDsc->TypeGet()),
+ (emitAttr)fieldVarDsc->lvExactSize, tmpRegNum,
+ REG_SPBASE, fieldVarDsc->lvFldOffset);
+ }
+ else // not enregistered
+ {
+ // We will copy the non-enregister fieldVar into our scratch register 'regNum'
+
+ noway_assert(regNum != REG_STK);
+ getEmitter()->emitIns_R_S(ins_Load(fieldVarDsc->TypeGet()),
+ (emitAttr)fieldVarDsc->lvExactSize, regNum, varNum,
+ 0);
+
+ regTracker.rsTrackRegLclVar(regNum, varNum);
+
+ // Store the value (byte or short) into the stack
+
+ getEmitter()->emitIns_AR_R(ins_Store(fieldVarDsc->TypeGet()),
+ (emitAttr)fieldVarDsc->lvExactSize, regNum,
+ REG_SPBASE, fieldVarDsc->lvFldOffset);
+ }
+ }
+ }
+ genUpdateLife(structLocalTree);
+
+ break;
+ }
+ }
+
+ genCodeForTree(arg->gtObj.gtOp1, 0);
+ noway_assert(arg->gtObj.gtOp1->gtFlags & GTF_REG_VAL);
+ regNumber reg = arg->gtObj.gtOp1->gtRegNum;
+ // Get the number of DWORDS to copy to the stack
+ opsz = roundUp(compiler->info.compCompHnd->getClassSize(arg->gtObj.gtClass), sizeof(void*));
+ unsigned slots = (unsigned)(opsz / sizeof(void*));
+
+ BYTE* gcLayout = new (compiler, CMK_Codegen) BYTE[slots];
+
+ compiler->info.compCompHnd->getClassGClayout(arg->gtObj.gtClass, gcLayout);
+
+ BOOL bNoneGC = TRUE;
+ for (int i = slots - 1; i >= 0; --i)
+ {
+ if (gcLayout[i] != TYPE_GC_NONE)
+ {
+ bNoneGC = FALSE;
+ break;
+ }
+ }
+
+ /* passing large structures using movq instead of pushes does not increase codesize very much */
+ unsigned movqLenMin = 8;
+ unsigned movqLenMax = 64;
+ unsigned curBBweight = compiler->compCurBB->getBBWeight(compiler);
+
+ if ((compiler->compCodeOpt() == Compiler::SMALL_CODE) || (curBBweight == BB_ZERO_WEIGHT))
+ {
+ // Don't bother with this optimization in
+ // rarely run blocks or when optimizing for size
+ movqLenMax = movqLenMin = 0;
+ }
+ else if (compiler->compCodeOpt() == Compiler::FAST_CODE)
+ {
+ // Be more aggressive when optimizing for speed
+ movqLenMax *= 2;
+ }
+
+ /* Adjust for BB weight */
+ if (curBBweight >= (BB_LOOP_WEIGHT * BB_UNITY_WEIGHT) / 2)
+ {
+ // Be more aggressive when we are inside a loop
+ movqLenMax *= 2;
+ }
+
+ if (compiler->opts.compCanUseSSE2 && bNoneGC && (opsz >= movqLenMin) && (opsz <= movqLenMax))
+ {
+ JITLOG_THIS(compiler, (LL_INFO10000,
+ "Using XMM instructions to pass %3d byte valuetype while compiling %s\n",
+ opsz, compiler->info.compFullName));
+
+ int stkDisp = (int)(unsigned)opsz;
+ int curDisp = 0;
+ regNumber xmmReg = REG_XMM0;
+
+ if (opsz & 0x4)
+ {
+ stkDisp -= sizeof(void*);
+ getEmitter()->emitIns_AR_R(INS_push, EA_4BYTE, REG_NA, reg, stkDisp);
+ genSinglePush();
+ }
+
+ inst_RV_IV(INS_sub, REG_SPBASE, stkDisp, EA_PTRSIZE);
+ genStackLevel += stkDisp;
+
+ while (curDisp < stkDisp)
+ {
+ getEmitter()->emitIns_R_AR(INS_movq, EA_8BYTE, xmmReg, reg, curDisp);
+ getEmitter()->emitIns_AR_R(INS_movq, EA_8BYTE, xmmReg, REG_SPBASE, curDisp);
+ curDisp += 2 * sizeof(void*);
+ }
+ noway_assert(curDisp == stkDisp);
+ }
+ else
+ {
+ for (int i = slots - 1; i >= 0; --i)
+ {
+ emitAttr fieldSize;
+ if (gcLayout[i] == TYPE_GC_NONE)
+ fieldSize = EA_4BYTE;
+ else if (gcLayout[i] == TYPE_GC_REF)
+ fieldSize = EA_GCREF;
+ else
+ {
+ noway_assert(gcLayout[i] == TYPE_GC_BYREF);
+ fieldSize = EA_BYREF;
+ }
+ getEmitter()->emitIns_AR_R(INS_push, fieldSize, REG_NA, reg, i * sizeof(void*));
+ genSinglePush();
+ }
+ }
+ gcInfo.gcMarkRegSetNpt(genRegMask(reg)); // Kill the pointer in op1
+ }
+
+ addrReg = 0;
+ break;
+ }
+
+ default:
+ noway_assert(!"unhandled/unexpected arg type");
+ NO_WAY("unhandled/unexpected arg type");
+ }
+
+ /* Update the current set of live variables */
+
+ genUpdateLife(curr);
+
+ /* Update the current set of register pointers */
+
+ noway_assert(addrReg != DUMMY_INIT(RBM_CORRUPT));
+ genDoneAddressable(curr, addrReg, RegSet::FREE_REG);
+
+ /* Remember how much stuff we've pushed on the stack */
+
+ size += opsz;
+
+ /* Update the current argument stack offset */
+
+ /* Continue with the next argument, if any more are present */
+
+ } // while args
+
+ /* Move the deferred arguments to registers */
+
+ for (args = regArgs; args; args = args->Rest())
+ {
+ curr = args->Current();
+
+ assert(!curr->IsArgPlaceHolderNode()); // No place holders nodes are in the late args
+
+ fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, curr);
+ assert(curArgTabEntry);
+ regNumber regNum = curArgTabEntry->regNum;
+
+ noway_assert(isRegParamType(curr->TypeGet()));
+ noway_assert(curr->gtType != TYP_VOID);
+
+ /* Evaluate the argument to a register [pair] */
+
+ if (genTypeSize(genActualType(curr->TypeGet())) == sizeof(int))
+ {
+ /* Check if this is the guess area for the resolve interface call
+ * Pass a size of EA_OFFSET*/
+ if (curr->gtOper == GT_CLS_VAR && compiler->eeGetJitDataOffs(curr->gtClsVar.gtClsVarHnd) >= 0)
+ {
+ getEmitter()->emitIns_R_C(ins_Load(TYP_INT), EA_OFFSET, regNum, curr->gtClsVar.gtClsVarHnd, 0);
+ regTracker.rsTrackRegTrash(regNum);
+
+ /* The value is now in the appropriate register */
+
+ genMarkTreeInReg(curr, regNum);
+ }
+ else
+ {
+ genComputeReg(curr, genRegMask(regNum), RegSet::EXACT_REG, RegSet::FREE_REG, false);
+ }
+
+ noway_assert(curr->gtRegNum == regNum);
+
+ /* If the register is already marked as used, it will become
+ multi-used. However, since it is a callee-trashed register,
+ we will have to spill it before the call anyway. So do it now */
+
+ if (regSet.rsMaskUsed & genRegMask(regNum))
+ {
+ noway_assert(genRegMask(regNum) & RBM_CALLEE_TRASH);
+ regSet.rsSpillReg(regNum);
+ }
+
+ /* Mark the register as 'used' */
+
+ regSet.rsMarkRegUsed(curr);
+ }
+ else
+ {
+ noway_assert(!"UNDONE: Passing a TYP_STRUCT in register arguments");
+ }
+ }
+
+ /* If any of the previously loaded arguments were spilled - reload them */
+
+ for (args = regArgs; args; args = args->Rest())
+ {
+ curr = args->Current();
+ assert(curr);
+
+ if (curr->gtFlags & GTF_SPILLED)
+ {
+ if (isRegPairType(curr->gtType))
+ {
+ regSet.rsUnspillRegPair(curr, genRegPairMask(curr->gtRegPair), RegSet::KEEP_REG);
+ }
+ else
+ {
+ regSet.rsUnspillReg(curr, genRegMask(curr->gtRegNum), RegSet::KEEP_REG);
+ }
+ }
+ }
+
+ /* Return the total size pushed */
+
+ return size;
+}
+#ifdef _PREFAST_
+#pragma warning(pop)
+#endif
+
+#else // FEATURE_FIXED_OUT_ARGS
+
+//
+// ARM and AMD64 uses this method to pass the stack based args
+//
+// returns size pushed (always zero)
+size_t CodeGen::genPushArgList(GenTreePtr call)
+{
+
+ GenTreeArgList* lateArgs = call->gtCall.gtCallLateArgs;
+ GenTreePtr curr;
+ var_types type;
+ int argSize;
+
+ GenTreeArgList* args;
+ // Create a local, artificial GenTreeArgList that includes the gtCallObjp, if that exists, as first argument,
+ // so we can iterate over this argument list more uniformly.
+ // Need to provide a temporary non-null first argument here: if we use this, we'll replace it.
+ GenTreeArgList objpArgList(/*temp dummy arg*/ call, call->gtCall.gtCallArgs);
+ if (call->gtCall.gtCallObjp == NULL)
+ {
+ args = call->gtCall.gtCallArgs;
+ }
+ else
+ {
+ objpArgList.Current() = call->gtCall.gtCallObjp;
+ args = &objpArgList;
+ }
+
+ for (; args; args = args->Rest())
+ {
+ /* Get hold of the next argument value */
+ curr = args->Current();
+
+ fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, curr);
+ assert(curArgTabEntry);
+ regNumber regNum = curArgTabEntry->regNum;
+ int argOffset = curArgTabEntry->slotNum * TARGET_POINTER_SIZE;
+
+ /* See what type of a value we're passing */
+ type = curr->TypeGet();
+
+ if ((type == TYP_STRUCT) && (curr->gtOper == GT_ASG))
+ {
+ type = TYP_VOID;
+ }
+
+ // This holds the set of registers corresponding to enregistered promoted struct field variables
+ // that go dead after this use of the variable in the argument list.
+ regMaskTP deadFieldVarRegs = RBM_NONE;
+
+ argSize = TARGET_POINTER_SIZE; // The default size for an arg is one pointer-sized item
+
+ if (curr->IsArgPlaceHolderNode())
+ {
+ assert(curr->gtFlags & GTF_LATE_ARG);
+ goto DEFERRED;
+ }
+
+ if (varTypeIsSmall(type))
+ {
+ // Normalize 'type', it represents the item that we will be storing in the Outgoing Args
+ type = TYP_I_IMPL;
+ }
+
+ switch (type)
+ {
+
+ case TYP_DOUBLE:
+ case TYP_LONG:
+
+#if defined(_TARGET_ARM_)
+
+ argSize = (TARGET_POINTER_SIZE * 2);
+
+ /* Is the value a constant? */
+
+ if (curr->gtOper == GT_CNS_LNG)
+ {
+ assert((curr->gtFlags & GTF_LATE_ARG) == 0);
+
+ int hiVal = (int)(curr->gtLngCon.gtLconVal >> 32);
+ int loVal = (int)(curr->gtLngCon.gtLconVal & 0xffffffff);
+
+ instGen_Store_Imm_Into_Lcl(TYP_INT, EA_4BYTE, loVal, compiler->lvaOutgoingArgSpaceVar, argOffset);
+
+ instGen_Store_Imm_Into_Lcl(TYP_INT, EA_4BYTE, hiVal, compiler->lvaOutgoingArgSpaceVar,
+ argOffset + 4);
+
+ break;
+ }
+ else
+ {
+ genCodeForTree(curr, 0);
+
+ if (curr->gtFlags & GTF_LATE_ARG)
+ {
+ // The arg was assigned into a temp and
+ // will be moved to the correct register or slot later
+
+ argSize = 0; // nothing is passed on the stack
+ }
+ else
+ {
+ // The arg is passed in the outgoing argument area of the stack frame
+ //
+ assert(curr->gtOper != GT_ASG); // GTF_LATE_ARG should be set if this is the case
+ assert(curr->gtFlags & GTF_REG_VAL); // should be enregistered after genCodeForTree(curr, 0)
+
+ if (type == TYP_LONG)
+ {
+ regNumber regLo = genRegPairLo(curr->gtRegPair);
+ regNumber regHi = genRegPairHi(curr->gtRegPair);
+
+ assert(regLo != REG_STK);
+ inst_SA_RV(ins_Store(TYP_INT), argOffset, regLo, TYP_INT);
+ if (regHi == REG_STK)
+ {
+ regHi = regSet.rsPickFreeReg();
+ inst_RV_TT(ins_Load(TYP_INT), regHi, curr, 4);
+ regTracker.rsTrackRegTrash(regHi);
+ }
+ inst_SA_RV(ins_Store(TYP_INT), argOffset + 4, regHi, TYP_INT);
+ }
+ else // (type == TYP_DOUBLE)
+ {
+ inst_SA_RV(ins_Store(type), argOffset, curr->gtRegNum, type);
+ }
+ }
+ }
+ break;
+
+#elif defined(_TARGET_64BIT_)
+ __fallthrough;
+#else
+#error "Unknown target for passing TYP_LONG argument using FIXED_ARGS"
+#endif
+
+ case TYP_REF:
+ case TYP_BYREF:
+
+ case TYP_FLOAT:
+ case TYP_INT:
+ /* Is the value a constant? */
+
+ if (curr->gtOper == GT_CNS_INT)
+ {
+ assert(!(curr->gtFlags & GTF_LATE_ARG));
+
+#if REDUNDANT_LOAD
+ regNumber reg = regTracker.rsIconIsInReg(curr->gtIntCon.gtIconVal);
+
+ if (reg != REG_NA)
+ {
+ inst_SA_RV(ins_Store(type), argOffset, reg, type);
+ }
+ else
+#endif
+ {
+ bool needReloc = compiler->opts.compReloc && curr->IsIconHandle();
+ emitAttr attr = needReloc ? EA_HANDLE_CNS_RELOC : emitTypeSize(type);
+ instGen_Store_Imm_Into_Lcl(type, attr, curr->gtIntCon.gtIconVal,
+ compiler->lvaOutgoingArgSpaceVar, argOffset);
+ }
+ break;
+ }
+
+ /* This is passed as a pointer-sized integer argument */
+
+ genCodeForTree(curr, 0);
+
+ // The arg has been evaluated now, but will be put in a register or pushed on the stack later.
+ if (curr->gtFlags & GTF_LATE_ARG)
+ {
+#ifdef _TARGET_ARM_
+ argSize = 0; // nothing is passed on the stack
+#endif
+ }
+ else
+ {
+ // The arg is passed in the outgoing argument area of the stack frame
+
+ assert(curr->gtOper != GT_ASG); // GTF_LATE_ARG should be set if this is the case
+ assert(curr->gtFlags & GTF_REG_VAL); // should be enregistered after genCodeForTree(curr, 0)
+ inst_SA_RV(ins_Store(type), argOffset, curr->gtRegNum, type);
+
+ if ((genRegMask(curr->gtRegNum) & regSet.rsMaskUsed) == 0)
+ gcInfo.gcMarkRegSetNpt(genRegMask(curr->gtRegNum));
+ }
+ break;
+
+ case TYP_VOID:
+ /* Is this a nothing node, deferred register argument? */
+
+ if (curr->gtFlags & GTF_LATE_ARG)
+ {
+ /* Handle side-effects */
+ DEFERRED:
+ if (curr->OperIsCopyBlkOp() || curr->OperGet() == GT_COMMA)
+ {
+#ifdef _TARGET_ARM_
+ {
+ GenTreePtr curArgNode = curArgTabEntry->node;
+ var_types curRegArgType = curArgNode->gtType;
+ assert(curRegArgType != TYP_UNDEF);
+
+ if (curRegArgType == TYP_STRUCT)
+ {
+ // If the RHS of the COPYBLK is a promoted struct local, then the use of that
+ // is an implicit use of all its field vars. If these are last uses, remember that,
+ // so we can later update the GC compiler->info.
+ if (curr->OperIsCopyBlkOp())
+ deadFieldVarRegs |= genFindDeadFieldRegs(curr);
+ }
+ }
+#endif // _TARGET_ARM_
+
+ genCodeForTree(curr, 0);
+ }
+ else
+ {
+ assert(curr->IsArgPlaceHolderNode() || curr->IsNothingNode());
+ }
+
+#if defined(_TARGET_ARM_)
+ argSize = curArgTabEntry->numSlots * TARGET_POINTER_SIZE;
+#endif
+ }
+ else
+ {
+ for (GenTree* arg = curr; arg->gtOper == GT_COMMA; arg = arg->gtOp.gtOp2)
+ {
+ GenTreePtr op1 = arg->gtOp.gtOp1;
+
+ genEvalSideEffects(op1);
+ genUpdateLife(op1);
+ }
+ }
+ break;
+
+#ifdef _TARGET_ARM_
+
+ case TYP_STRUCT:
+ {
+ GenTree* arg = curr;
+ while (arg->gtOper == GT_COMMA)
+ {
+ GenTreePtr op1 = arg->gtOp.gtOp1;
+ genEvalSideEffects(op1);
+ genUpdateLife(op1);
+ arg = arg->gtOp.gtOp2;
+ }
+ noway_assert((arg->OperGet() == GT_OBJ) || (arg->OperGet() == GT_MKREFANY));
+
+ CORINFO_CLASS_HANDLE clsHnd;
+ unsigned argAlign;
+ unsigned slots;
+ BYTE* gcLayout = NULL;
+
+ // If the struct being passed is a OBJ of a local struct variable that is promoted (in the
+ // INDEPENDENT fashion, which doesn't require writes to be written through to the variable's
+ // home stack loc) "promotedStructLocalVarDesc" will be set to point to the local variable
+ // table entry for the promoted struct local. As we fill slots with the contents of a
+ // promoted struct, "bytesOfNextSlotOfCurPromotedStruct" will be the number of filled bytes
+ // that indicate another filled slot, and "nextPromotedStructFieldVar" will be the local
+ // variable number of the next field variable to be copied.
+ LclVarDsc* promotedStructLocalVarDesc = NULL;
+ GenTreePtr structLocalTree = NULL;
+ unsigned bytesOfNextSlotOfCurPromotedStruct = TARGET_POINTER_SIZE; // Size of slot.
+ unsigned nextPromotedStructFieldVar = BAD_VAR_NUM;
+ unsigned promotedStructOffsetOfFirstStackSlot = 0;
+ unsigned argOffsetOfFirstStackSlot = UINT32_MAX; // Indicates uninitialized.
+
+ if (arg->OperGet() == GT_OBJ)
+ {
+ clsHnd = arg->gtObj.gtClass;
+ unsigned originalSize = compiler->info.compCompHnd->getClassSize(clsHnd);
+ argAlign =
+ roundUp(compiler->info.compCompHnd->getClassAlignmentRequirement(clsHnd), TARGET_POINTER_SIZE);
+ argSize = (unsigned)(roundUp(originalSize, TARGET_POINTER_SIZE));
+
+ slots = (unsigned)(argSize / TARGET_POINTER_SIZE);
+
+ gcLayout = new (compiler, CMK_Codegen) BYTE[slots];
+
+ compiler->info.compCompHnd->getClassGClayout(clsHnd, gcLayout);
+
+ // Are we loading a promoted struct local var?
+ if (arg->gtObj.gtOp1->gtOper == GT_ADDR && arg->gtObj.gtOp1->gtOp.gtOp1->gtOper == GT_LCL_VAR)
+ {
+ structLocalTree = arg->gtObj.gtOp1->gtOp.gtOp1;
+ unsigned structLclNum = structLocalTree->gtLclVarCommon.gtLclNum;
+ LclVarDsc* varDsc = &compiler->lvaTable[structLclNum];
+
+ // As much as we would like this to be a noway_assert, we can't because
+ // there are some weird casts out there, and backwards compatiblity
+ // dictates we do *NOT* start rejecting them now. lvaGetPromotion and
+ // lvPromoted in general currently do not require the local to be
+ // TYP_STRUCT, so this assert is really more about how we wish the world
+ // was then some JIT invariant.
+ assert((structLocalTree->TypeGet() == TYP_STRUCT) || compiler->compUnsafeCastUsed);
+
+ Compiler::lvaPromotionType promotionType = compiler->lvaGetPromotionType(varDsc);
+
+ if (varDsc->lvPromoted &&
+ promotionType == Compiler::PROMOTION_TYPE_INDEPENDENT) // Otherwise it is guaranteed to live
+ // on stack.
+ {
+ assert(!varDsc->lvAddrExposed); // Compiler::PROMOTION_TYPE_INDEPENDENT ==> not exposed.
+ promotedStructLocalVarDesc = varDsc;
+ nextPromotedStructFieldVar = promotedStructLocalVarDesc->lvFieldLclStart;
+ }
+ }
+ }
+ else
+ {
+ noway_assert(arg->OperGet() == GT_MKREFANY);
+
+ clsHnd = NULL;
+ argAlign = TARGET_POINTER_SIZE;
+ argSize = 2 * TARGET_POINTER_SIZE;
+ slots = 2;
+ }
+
+ // Any TYP_STRUCT argument that is passed in registers must be moved over to the LateArg list
+ noway_assert(regNum == REG_STK);
+
+ // This code passes a TYP_STRUCT by value using the outgoing arg space var
+ //
+ if (arg->OperGet() == GT_OBJ)
+ {
+ regNumber regSrc = REG_STK;
+ regNumber regTmp = REG_STK; // This will get set below if the obj is not of a promoted struct local.
+ int cStackSlots = 0;
+
+ if (promotedStructLocalVarDesc == NULL)
+ {
+ genComputeReg(arg->gtObj.gtOp1, 0, RegSet::ANY_REG, RegSet::KEEP_REG);
+ noway_assert(arg->gtObj.gtOp1->gtFlags & GTF_REG_VAL);
+ regSrc = arg->gtObj.gtOp1->gtRegNum;
+ }
+
+ // The number of bytes to add "argOffset" to get the arg offset of the current slot.
+ int extraArgOffset = 0;
+
+ for (unsigned i = 0; i < slots; i++)
+ {
+ emitAttr fieldSize;
+ if (gcLayout[i] == TYPE_GC_NONE)
+ fieldSize = EA_PTRSIZE;
+ else if (gcLayout[i] == TYPE_GC_REF)
+ fieldSize = EA_GCREF;
+ else
+ {
+ noway_assert(gcLayout[i] == TYPE_GC_BYREF);
+ fieldSize = EA_BYREF;
+ }
+
+ // Pass the argument using the lvaOutgoingArgSpaceVar
+
+ if (promotedStructLocalVarDesc != NULL)
+ {
+ if (argOffsetOfFirstStackSlot == UINT32_MAX)
+ argOffsetOfFirstStackSlot = argOffset;
+
+ regNumber maxRegArg = regNumber(MAX_REG_ARG);
+ bool filledExtraSlot = genFillSlotFromPromotedStruct(
+ arg, curArgTabEntry, promotedStructLocalVarDesc, fieldSize, &nextPromotedStructFieldVar,
+ &bytesOfNextSlotOfCurPromotedStruct,
+ /*pCurRegNum*/ &maxRegArg,
+ /*argOffset*/ argOffset + extraArgOffset,
+ /*fieldOffsetOfFirstStackSlot*/ promotedStructOffsetOfFirstStackSlot,
+ argOffsetOfFirstStackSlot, &deadFieldVarRegs, &regTmp);
+ extraArgOffset += TARGET_POINTER_SIZE;
+ // If we filled an extra slot with an 8-byte value, skip a slot.
+ if (filledExtraSlot)
+ {
+ i++;
+ cStackSlots++;
+ extraArgOffset += TARGET_POINTER_SIZE;
+ }
+ }
+ else
+ {
+ if (regTmp == REG_STK)
+ {
+ regTmp = regSet.rsPickFreeReg();
+ }
+
+ getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), fieldSize, regTmp, regSrc,
+ i * TARGET_POINTER_SIZE);
+
+ getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), fieldSize, regTmp,
+ compiler->lvaOutgoingArgSpaceVar,
+ argOffset + cStackSlots * TARGET_POINTER_SIZE);
+ regTracker.rsTrackRegTrash(regTmp);
+ }
+ cStackSlots++;
+ }
+
+ if (promotedStructLocalVarDesc == NULL)
+ {
+ regSet.rsMarkRegFree(genRegMask(regSrc));
+ }
+ if (structLocalTree != NULL)
+ genUpdateLife(structLocalTree);
+ }
+ else
+ {
+ assert(arg->OperGet() == GT_MKREFANY);
+ PushMkRefAnyArg(arg, curArgTabEntry, RBM_ALLINT);
+ argSize = (curArgTabEntry->numSlots * TARGET_POINTER_SIZE);
+ }
+ }
+ break;
+#endif // _TARGET_ARM_
+
+ default:
+ assert(!"unhandled/unexpected arg type");
+ NO_WAY("unhandled/unexpected arg type");
+ }
+
+ /* Update the current set of live variables */
+
+ genUpdateLife(curr);
+
+ // Now, if some copied field locals were enregistered, and they're now dead, update the set of
+ // register holding gc pointers.
+ if (deadFieldVarRegs != 0)
+ gcInfo.gcMarkRegSetNpt(deadFieldVarRegs);
+
+ /* Update the current argument stack offset */
+
+ argOffset += argSize;
+
+ /* Continue with the next argument, if any more are present */
+ } // while (args)
+
+ if (lateArgs)
+ {
+ SetupLateArgs(call);
+ }
+
+ /* Return the total size pushed */
+
+ return 0;
+}
+
+#ifdef _TARGET_ARM_
+bool CodeGen::genFillSlotFromPromotedStruct(GenTreePtr arg,
+ fgArgTabEntryPtr curArgTabEntry,
+ LclVarDsc* promotedStructLocalVarDesc,
+ emitAttr fieldSize,
+ unsigned* pNextPromotedStructFieldVar,
+ unsigned* pBytesOfNextSlotOfCurPromotedStruct,
+ regNumber* pCurRegNum,
+ int argOffset,
+ int fieldOffsetOfFirstStackSlot,
+ int argOffsetOfFirstStackSlot,
+ regMaskTP* deadFieldVarRegs,
+ regNumber* pRegTmp)
+{
+ unsigned nextPromotedStructFieldVar = *pNextPromotedStructFieldVar;
+ unsigned limitPromotedStructFieldVar =
+ promotedStructLocalVarDesc->lvFieldLclStart + promotedStructLocalVarDesc->lvFieldCnt;
+ unsigned bytesOfNextSlotOfCurPromotedStruct = *pBytesOfNextSlotOfCurPromotedStruct;
+
+ regNumber curRegNum = *pCurRegNum;
+ regNumber regTmp = *pRegTmp;
+ bool filledExtraSlot = false;
+
+ if (nextPromotedStructFieldVar == limitPromotedStructFieldVar)
+ {
+ // We've already finished; just return.
+ // We can reach this because the calling loop computes a # of slots based on the size of the struct.
+ // If the struct has padding at the end because of alignment (say, long/int), then we'll get a call for
+ // the fourth slot, even though we've copied all the fields.
+ return false;
+ }
+
+ LclVarDsc* fieldVarDsc = &compiler->lvaTable[nextPromotedStructFieldVar];
+
+ // Does this field fill an entire slot, and does it go at the start of the slot?
+ // If so, things are easier...
+
+ bool oneFieldFillsSlotFromStart =
+ (fieldVarDsc->lvFldOffset < bytesOfNextSlotOfCurPromotedStruct) // The field should start in the current slot...
+ && ((fieldVarDsc->lvFldOffset % 4) == 0) // at the start of the slot, and...
+ && (nextPromotedStructFieldVar + 1 ==
+ limitPromotedStructFieldVar // next field, if there is one, goes in the next slot.
+ || compiler->lvaTable[nextPromotedStructFieldVar + 1].lvFldOffset >= bytesOfNextSlotOfCurPromotedStruct);
+
+ // Compute the proper size.
+ if (fieldSize == EA_4BYTE) // Not a GC ref or byref.
+ {
+ switch (fieldVarDsc->lvExactSize)
+ {
+ case 1:
+ fieldSize = EA_1BYTE;
+ break;
+ case 2:
+ fieldSize = EA_2BYTE;
+ break;
+ case 8:
+ // An 8-byte field will be at an 8-byte-aligned offset unless explicit layout has been used,
+ // in which case we should not have promoted the struct variable.
+ noway_assert((fieldVarDsc->lvFldOffset % 8) == 0);
+
+ // If the current reg number is not aligned, align it, and return to the calling loop, which will
+ // consider that a filled slot and move on to the next argument register.
+ if (curRegNum != MAX_REG_ARG && ((curRegNum % 2) != 0))
+ {
+ // We must update the slot target, however!
+ bytesOfNextSlotOfCurPromotedStruct += 4;
+ *pBytesOfNextSlotOfCurPromotedStruct = bytesOfNextSlotOfCurPromotedStruct;
+ return false;
+ }
+ // Dest is an aligned pair of arg regs, if the struct type demands it.
+ noway_assert((curRegNum % 2) == 0);
+ // We leave the fieldSize as EA_4BYTE; but we must do 2 reg moves.
+ break;
+ default:
+ assert(fieldVarDsc->lvExactSize == 4);
+ break;
+ }
+ }
+ else
+ {
+ // If the gc layout said it's a GC ref or byref, then the field size must be 4.
+ noway_assert(fieldVarDsc->lvExactSize == 4);
+ }
+
+ // We may need the type of the field to influence instruction selection.
+ // If we have a TYP_LONG we can use TYP_I_IMPL and we do two loads/stores
+ // If the fieldVarDsc is enregistered float we must use the field's exact type
+ // however if it is in memory we can use an integer type TYP_I_IMPL
+ //
+ var_types fieldTypeForInstr = var_types(fieldVarDsc->lvType);
+ if ((fieldVarDsc->lvType == TYP_LONG) || (!fieldVarDsc->lvRegister && varTypeIsFloating(fieldTypeForInstr)))
+ {
+ fieldTypeForInstr = TYP_I_IMPL;
+ }
+
+ // If we have a HFA, then it is a much simpler deal -- HFAs are completely enregistered.
+ if (curArgTabEntry->isHfaRegArg)
+ {
+ assert(oneFieldFillsSlotFromStart);
+
+ // Is the field variable promoted?
+ if (fieldVarDsc->lvRegister)
+ {
+ // Move the field var living in register to dst, if they are different registers.
+ regNumber srcReg = fieldVarDsc->lvRegNum;
+ regNumber dstReg = curRegNum;
+ if (srcReg != dstReg)
+ {
+ inst_RV_RV(ins_Copy(fieldVarDsc->TypeGet()), dstReg, srcReg, fieldVarDsc->TypeGet());
+ assert(genIsValidFloatReg(dstReg)); // we don't use register tracking for FP
+ }
+ }
+ else
+ {
+ // Move the field var living in stack to dst.
+ getEmitter()->emitIns_R_S(ins_Load(fieldVarDsc->TypeGet()),
+ fieldVarDsc->TypeGet() == TYP_DOUBLE ? EA_8BYTE : EA_4BYTE, curRegNum,
+ nextPromotedStructFieldVar, 0);
+ assert(genIsValidFloatReg(curRegNum)); // we don't use register tracking for FP
+ }
+
+ // Mark the arg as used and using reg val.
+ genMarkTreeInReg(arg, curRegNum);
+ regSet.SetUsedRegFloat(arg, true);
+
+ // Advance for double.
+ if (fieldVarDsc->TypeGet() == TYP_DOUBLE)
+ {
+ bytesOfNextSlotOfCurPromotedStruct += 4;
+ curRegNum = REG_NEXT(curRegNum);
+ arg->gtRegNum = curRegNum;
+ regSet.SetUsedRegFloat(arg, true);
+ filledExtraSlot = true;
+ }
+ arg->gtRegNum = curArgTabEntry->regNum;
+
+ // Advance.
+ bytesOfNextSlotOfCurPromotedStruct += 4;
+ nextPromotedStructFieldVar++;
+ }
+ else
+ {
+ if (oneFieldFillsSlotFromStart)
+ {
+ // If we write to the stack, offset in outgoing args at which we'll write.
+ int fieldArgOffset = argOffsetOfFirstStackSlot + fieldVarDsc->lvFldOffset - fieldOffsetOfFirstStackSlot;
+ assert(fieldArgOffset >= 0);
+
+ // Is the source a register or memory?
+ if (fieldVarDsc->lvRegister)
+ {
+ if (fieldTypeForInstr == TYP_DOUBLE)
+ {
+ fieldSize = EA_8BYTE;
+ }
+
+ // Are we writing to a register or to the stack?
+ if (curRegNum != MAX_REG_ARG)
+ {
+ // Source is register and Dest is register.
+
+ instruction insCopy = INS_mov;
+
+ if (varTypeIsFloating(fieldTypeForInstr))
+ {
+ if (fieldTypeForInstr == TYP_FLOAT)
+ {
+ insCopy = INS_vmov_f2i;
+ }
+ else
+ {
+ assert(fieldTypeForInstr == TYP_DOUBLE);
+ insCopy = INS_vmov_d2i;
+ }
+ }
+
+ // If the value being copied is a TYP_LONG (8 bytes), it may be in two registers. Record the second
+ // register (which may become a tmp register, if its held in the argument register that the first
+ // register to be copied will overwrite).
+ regNumber otherRegNum = REG_STK;
+ if (fieldVarDsc->lvType == TYP_LONG)
+ {
+ otherRegNum = fieldVarDsc->lvOtherReg;
+ // Are we about to overwrite?
+ if (otherRegNum == curRegNum)
+ {
+ if (regTmp == REG_STK)
+ {
+ regTmp = regSet.rsPickFreeReg();
+ }
+ // Copy the second register to the temp reg.
+ getEmitter()->emitIns_R_R(INS_mov, fieldSize, regTmp, otherRegNum);
+ regTracker.rsTrackRegCopy(regTmp, otherRegNum);
+ otherRegNum = regTmp;
+ }
+ }
+
+ if (fieldVarDsc->lvType == TYP_DOUBLE)
+ {
+ assert(curRegNum <= REG_R2);
+ getEmitter()->emitIns_R_R_R(insCopy, fieldSize, curRegNum, genRegArgNext(curRegNum),
+ fieldVarDsc->lvRegNum);
+ regTracker.rsTrackRegTrash(curRegNum);
+ regTracker.rsTrackRegTrash(genRegArgNext(curRegNum));
+ }
+ else
+ {
+ // Now do the first register.
+ // It might be the case that it's already in the desired register; if so do nothing.
+ if (curRegNum != fieldVarDsc->lvRegNum)
+ {
+ getEmitter()->emitIns_R_R(insCopy, fieldSize, curRegNum, fieldVarDsc->lvRegNum);
+ regTracker.rsTrackRegCopy(curRegNum, fieldVarDsc->lvRegNum);
+ }
+ }
+
+ // In either case, mark the arg register as used.
+ regSet.rsMarkArgRegUsedByPromotedFieldArg(arg, curRegNum, EA_IS_GCREF(fieldSize));
+
+ // Is there a second half of the value?
+ if (fieldVarDsc->lvExactSize == 8)
+ {
+ curRegNum = genRegArgNext(curRegNum);
+ // The second dest reg must also be an argument register.
+ noway_assert(curRegNum < MAX_REG_ARG);
+
+ // Now, if it's an 8-byte TYP_LONG, we have to do the second 4 bytes.
+ if (fieldVarDsc->lvType == TYP_LONG)
+ {
+ // Copy the second register into the next argument register
+
+ // If it's a register variable for a TYP_LONG value, then otherReg now should
+ // hold the second register or it might say that it's in the stack.
+ if (otherRegNum == REG_STK)
+ {
+ // Apparently when we partially enregister, we allocate stack space for the full
+ // 8 bytes, and enregister the low half. Thus the final TARGET_POINTER_SIZE offset
+ // parameter, to get the high half.
+ getEmitter()->emitIns_R_S(ins_Load(fieldTypeForInstr), fieldSize, curRegNum,
+ nextPromotedStructFieldVar, TARGET_POINTER_SIZE);
+ regTracker.rsTrackRegTrash(curRegNum);
+ }
+ else
+ {
+ // The other half is in a register.
+ // Again, it might be the case that it's already in the desired register; if so do
+ // nothing.
+ if (curRegNum != otherRegNum)
+ {
+ getEmitter()->emitIns_R_R(INS_mov, fieldSize, curRegNum, otherRegNum);
+ regTracker.rsTrackRegCopy(curRegNum, otherRegNum);
+ }
+ }
+ }
+
+ // Also mark the 2nd arg register as used.
+ regSet.rsMarkArgRegUsedByPromotedFieldArg(arg, curRegNum, false);
+ // Record the fact that we filled in an extra register slot
+ filledExtraSlot = true;
+ }
+ }
+ else
+ {
+ // Source is register and Dest is memory (OutgoingArgSpace).
+
+ // Now write the srcReg into the right location in the outgoing argument list.
+ getEmitter()->emitIns_S_R(ins_Store(fieldTypeForInstr), fieldSize, fieldVarDsc->lvRegNum,
+ compiler->lvaOutgoingArgSpaceVar, fieldArgOffset);
+
+ if (fieldVarDsc->lvExactSize == 8)
+ {
+ // Now, if it's an 8-byte TYP_LONG, we have to do the second 4 bytes.
+ if (fieldVarDsc->lvType == TYP_LONG)
+ {
+ if (fieldVarDsc->lvOtherReg == REG_STK)
+ {
+ // Source is stack.
+ if (regTmp == REG_STK)
+ {
+ regTmp = regSet.rsPickFreeReg();
+ }
+ // Apparently if we partially enregister, we allocate stack space for the full
+ // 8 bytes, and enregister the low half. Thus the final TARGET_POINTER_SIZE offset
+ // parameter, to get the high half.
+ getEmitter()->emitIns_R_S(ins_Load(fieldTypeForInstr), fieldSize, regTmp,
+ nextPromotedStructFieldVar, TARGET_POINTER_SIZE);
+ regTracker.rsTrackRegTrash(regTmp);
+ getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), fieldSize, regTmp,
+ compiler->lvaOutgoingArgSpaceVar,
+ fieldArgOffset + TARGET_POINTER_SIZE);
+ }
+ else
+ {
+ getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), fieldSize, fieldVarDsc->lvOtherReg,
+ compiler->lvaOutgoingArgSpaceVar,
+ fieldArgOffset + TARGET_POINTER_SIZE);
+ }
+ }
+ // Record the fact that we filled in an extra register slot
+ filledExtraSlot = true;
+ }
+ }
+ assert(fieldVarDsc->lvTracked); // Must be tracked, since it's enregistered...
+ // If the fieldVar becomes dead, then declare the register not to contain a pointer value.
+ if (arg->gtFlags & GTF_VAR_DEATH)
+ {
+ *deadFieldVarRegs |= genRegMask(fieldVarDsc->lvRegNum);
+ // We don't bother with the second reg of a register pair, since if it has one,
+ // it obviously doesn't hold a pointer.
+ }
+ }
+ else
+ {
+ // Source is in memory.
+
+ if (curRegNum != MAX_REG_ARG)
+ {
+ // Dest is reg.
+ getEmitter()->emitIns_R_S(ins_Load(fieldTypeForInstr), fieldSize, curRegNum,
+ nextPromotedStructFieldVar, 0);
+ regTracker.rsTrackRegTrash(curRegNum);
+
+ regSet.rsMarkArgRegUsedByPromotedFieldArg(arg, curRegNum, EA_IS_GCREF(fieldSize));
+
+ if (fieldVarDsc->lvExactSize == 8)
+ {
+ noway_assert(fieldSize == EA_4BYTE);
+ curRegNum = genRegArgNext(curRegNum);
+ noway_assert(curRegNum < MAX_REG_ARG); // Because of 8-byte alignment.
+ getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), fieldSize, curRegNum,
+ nextPromotedStructFieldVar, TARGET_POINTER_SIZE);
+ regTracker.rsTrackRegTrash(curRegNum);
+ regSet.rsMarkArgRegUsedByPromotedFieldArg(arg, curRegNum, EA_IS_GCREF(fieldSize));
+ // Record the fact that we filled in an extra stack slot
+ filledExtraSlot = true;
+ }
+ }
+ else
+ {
+ // Dest is stack.
+ if (regTmp == REG_STK)
+ {
+ regTmp = regSet.rsPickFreeReg();
+ }
+ getEmitter()->emitIns_R_S(ins_Load(fieldTypeForInstr), fieldSize, regTmp,
+ nextPromotedStructFieldVar, 0);
+
+ // Now write regTmp into the right location in the outgoing argument list.
+ getEmitter()->emitIns_S_R(ins_Store(fieldTypeForInstr), fieldSize, regTmp,
+ compiler->lvaOutgoingArgSpaceVar, fieldArgOffset);
+ // We overwrote "regTmp", so erase any previous value we recorded that it contained.
+ regTracker.rsTrackRegTrash(regTmp);
+
+ if (fieldVarDsc->lvExactSize == 8)
+ {
+ getEmitter()->emitIns_R_S(ins_Load(fieldTypeForInstr), fieldSize, regTmp,
+ nextPromotedStructFieldVar, TARGET_POINTER_SIZE);
+
+ getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), fieldSize, regTmp,
+ compiler->lvaOutgoingArgSpaceVar,
+ fieldArgOffset + TARGET_POINTER_SIZE);
+ // Record the fact that we filled in an extra stack slot
+ filledExtraSlot = true;
+ }
+ }
+ }
+
+ // Bump up the following if we filled in an extra slot
+ if (filledExtraSlot)
+ bytesOfNextSlotOfCurPromotedStruct += 4;
+
+ // Go to the next field.
+ nextPromotedStructFieldVar++;
+ if (nextPromotedStructFieldVar == limitPromotedStructFieldVar)
+ {
+ fieldVarDsc = NULL;
+ }
+ else
+ {
+ // The next field should have the same parent variable, and we should have put the field vars in order
+ // sorted by offset.
+ assert(fieldVarDsc->lvIsStructField && compiler->lvaTable[nextPromotedStructFieldVar].lvIsStructField &&
+ fieldVarDsc->lvParentLcl == compiler->lvaTable[nextPromotedStructFieldVar].lvParentLcl &&
+ fieldVarDsc->lvFldOffset < compiler->lvaTable[nextPromotedStructFieldVar].lvFldOffset);
+ fieldVarDsc = &compiler->lvaTable[nextPromotedStructFieldVar];
+ }
+ bytesOfNextSlotOfCurPromotedStruct += 4;
+ }
+ else // oneFieldFillsSlotFromStart == false
+ {
+ // The current slot should contain more than one field.
+ // We'll construct a word in memory for the slot, then load it into a register.
+ // (Note that it *may* be possible for the fldOffset to be greater than the largest offset in the current
+ // slot, in which case we'll just skip this loop altogether.)
+ while (fieldVarDsc != NULL && fieldVarDsc->lvFldOffset < bytesOfNextSlotOfCurPromotedStruct)
+ {
+ // If it doesn't fill a slot, it can't overflow the slot (again, because we only promote structs
+ // whose fields have their natural alignment, and alignment == size on ARM).
+ noway_assert(fieldVarDsc->lvFldOffset + fieldVarDsc->lvExactSize <= bytesOfNextSlotOfCurPromotedStruct);
+
+ // If the argument goes to the stack, the offset in the outgoing arg area for the argument.
+ int fieldArgOffset = argOffsetOfFirstStackSlot + fieldVarDsc->lvFldOffset - fieldOffsetOfFirstStackSlot;
+ noway_assert(argOffset == INT32_MAX ||
+ (argOffset <= fieldArgOffset && fieldArgOffset < argOffset + TARGET_POINTER_SIZE));
+
+ if (fieldVarDsc->lvRegister)
+ {
+ if (curRegNum != MAX_REG_ARG)
+ {
+ noway_assert(compiler->lvaPromotedStructAssemblyScratchVar != BAD_VAR_NUM);
+
+ getEmitter()->emitIns_S_R(ins_Store(fieldTypeForInstr), fieldSize, fieldVarDsc->lvRegNum,
+ compiler->lvaPromotedStructAssemblyScratchVar,
+ fieldVarDsc->lvFldOffset % 4);
+ }
+ else
+ {
+ // Dest is stack; write directly.
+ getEmitter()->emitIns_S_R(ins_Store(fieldTypeForInstr), fieldSize, fieldVarDsc->lvRegNum,
+ compiler->lvaOutgoingArgSpaceVar, fieldArgOffset);
+ }
+ }
+ else
+ {
+ // Source is in memory.
+
+ // Make sure we have a temporary register to use...
+ if (regTmp == REG_STK)
+ {
+ regTmp = regSet.rsPickFreeReg();
+ }
+ getEmitter()->emitIns_R_S(ins_Load(fieldTypeForInstr), fieldSize, regTmp,
+ nextPromotedStructFieldVar, 0);
+ regTracker.rsTrackRegTrash(regTmp);
+
+ if (curRegNum != MAX_REG_ARG)
+ {
+ noway_assert(compiler->lvaPromotedStructAssemblyScratchVar != BAD_VAR_NUM);
+
+ getEmitter()->emitIns_S_R(ins_Store(fieldTypeForInstr), fieldSize, regTmp,
+ compiler->lvaPromotedStructAssemblyScratchVar,
+ fieldVarDsc->lvFldOffset % 4);
+ }
+ else
+ {
+ getEmitter()->emitIns_S_R(ins_Store(fieldTypeForInstr), fieldSize, regTmp,
+ compiler->lvaOutgoingArgSpaceVar, fieldArgOffset);
+ }
+ }
+ // Go to the next field.
+ nextPromotedStructFieldVar++;
+ if (nextPromotedStructFieldVar == limitPromotedStructFieldVar)
+ {
+ fieldVarDsc = NULL;
+ }
+ else
+ {
+ // The next field should have the same parent variable, and we should have put the field vars in
+ // order sorted by offset.
+ noway_assert(fieldVarDsc->lvIsStructField &&
+ compiler->lvaTable[nextPromotedStructFieldVar].lvIsStructField &&
+ fieldVarDsc->lvParentLcl ==
+ compiler->lvaTable[nextPromotedStructFieldVar].lvParentLcl &&
+ fieldVarDsc->lvFldOffset < compiler->lvaTable[nextPromotedStructFieldVar].lvFldOffset);
+ fieldVarDsc = &compiler->lvaTable[nextPromotedStructFieldVar];
+ }
+ }
+ // Now, if we were accumulating into the first scratch word of the outgoing argument space in order to
+ // write to an argument register, do so.
+ if (curRegNum != MAX_REG_ARG)
+ {
+ noway_assert(compiler->lvaPromotedStructAssemblyScratchVar != BAD_VAR_NUM);
+
+ getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_4BYTE, curRegNum,
+ compiler->lvaPromotedStructAssemblyScratchVar, 0);
+ regTracker.rsTrackRegTrash(curRegNum);
+ regSet.rsMarkArgRegUsedByPromotedFieldArg(arg, curRegNum, EA_IS_GCREF(fieldSize));
+ }
+ // We've finished a slot; set the goal of the next slot.
+ bytesOfNextSlotOfCurPromotedStruct += 4;
+ }
+ }
+
+ // Write back the updates.
+ *pNextPromotedStructFieldVar = nextPromotedStructFieldVar;
+ *pBytesOfNextSlotOfCurPromotedStruct = bytesOfNextSlotOfCurPromotedStruct;
+ *pCurRegNum = curRegNum;
+ *pRegTmp = regTmp;
+
+ return filledExtraSlot;
+}
+#endif // _TARGET_ARM_
+
+regMaskTP CodeGen::genFindDeadFieldRegs(GenTreePtr cpBlk)
+{
+ noway_assert(cpBlk->OperIsCopyBlkOp()); // Precondition.
+ GenTreePtr rhs = cpBlk->gtOp.gtOp1;
+ regMaskTP res = 0;
+ if (rhs->OperIsIndir())
+ {
+ GenTree* addr = rhs->AsIndir()->Addr();
+ if (addr->gtOper == GT_ADDR)
+ {
+ rhs = addr->gtOp.gtOp1;
+ }
+ }
+ if (rhs->OperGet() == GT_LCL_VAR)
+ {
+ LclVarDsc* rhsDsc = &compiler->lvaTable[rhs->gtLclVarCommon.gtLclNum];
+ if (rhsDsc->lvPromoted)
+ {
+ // It is promoted; iterate over its field vars.
+ unsigned fieldVarNum = rhsDsc->lvFieldLclStart;
+ for (unsigned i = 0; i < rhsDsc->lvFieldCnt; i++, fieldVarNum++)
+ {
+ LclVarDsc* fieldVarDsc = &compiler->lvaTable[fieldVarNum];
+ // Did the variable go dead, and is it enregistered?
+ if (fieldVarDsc->lvRegister && (rhs->gtFlags & GTF_VAR_DEATH))
+ {
+ // Add the register number to the set of registers holding field vars that are going dead.
+ res |= genRegMask(fieldVarDsc->lvRegNum);
+ }
+ }
+ }
+ }
+ return res;
+}
+
+void CodeGen::SetupLateArgs(GenTreePtr call)
+{
+ GenTreeArgList* lateArgs;
+ GenTreePtr curr;
+
+ /* Generate the code to move the late arguments into registers */
+
+ for (lateArgs = call->gtCall.gtCallLateArgs; lateArgs; lateArgs = lateArgs->Rest())
+ {
+ curr = lateArgs->Current();
+ assert(curr);
+
+ fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, curr);
+ assert(curArgTabEntry);
+ regNumber regNum = curArgTabEntry->regNum;
+ unsigned argOffset = curArgTabEntry->slotNum * TARGET_POINTER_SIZE;
+
+ assert(isRegParamType(curr->TypeGet()));
+ assert(curr->gtType != TYP_VOID);
+
+ /* If the register is already marked as used, it will become
+ multi-used. However, since it is a callee-trashed register,
+ we will have to spill it before the call anyway. So do it now */
+
+ {
+ // Remember which registers hold pointers. We will spill
+ // them, but the code that follows will fetch reg vars from
+ // the registers, so we need that gc compiler->info.
+ // Also regSet.rsSpillReg doesn't like to spill enregistered
+ // variables, but if this is their last use that is *exactly*
+ // what we need to do, so we have to temporarily pretend
+ // they are no longer live.
+ // You might ask why are they in regSet.rsMaskUsed and regSet.rsMaskVars
+ // when their last use is about to occur?
+ // It is because this is the second operand to be evaluated
+ // of some parent binary op, and the first operand is
+ // live across this tree, and thought it could re-use the
+ // variables register (like a GT_REG_VAR). This probably
+ // is caused by RegAlloc assuming the first operand would
+ // evaluate into another register.
+ regMaskTP rsTemp = regSet.rsMaskVars & regSet.rsMaskUsed & RBM_CALLEE_TRASH;
+ regMaskTP gcRegSavedByref = gcInfo.gcRegByrefSetCur & rsTemp;
+ regMaskTP gcRegSavedGCRef = gcInfo.gcRegGCrefSetCur & rsTemp;
+ regSet.RemoveMaskVars(rsTemp);
+
+ regNumber regNum2 = regNum;
+ for (unsigned i = 0; i < curArgTabEntry->numRegs; i++)
+ {
+ if (regSet.rsMaskUsed & genRegMask(regNum2))
+ {
+ assert(genRegMask(regNum2) & RBM_CALLEE_TRASH);
+ regSet.rsSpillReg(regNum2);
+ }
+ regNum2 = genRegArgNext(regNum2);
+ assert(i + 1 == curArgTabEntry->numRegs || regNum2 != MAX_REG_ARG);
+ }
+
+ // Restore gc tracking masks.
+ gcInfo.gcRegByrefSetCur |= gcRegSavedByref;
+ gcInfo.gcRegGCrefSetCur |= gcRegSavedGCRef;
+
+ // Set maskvars back to normal
+ regSet.AddMaskVars(rsTemp);
+ }
+
+ /* Evaluate the argument to a register */
+
+ /* Check if this is the guess area for the resolve interface call
+ * Pass a size of EA_OFFSET*/
+ if (curr->gtOper == GT_CLS_VAR && compiler->eeGetJitDataOffs(curr->gtClsVar.gtClsVarHnd) >= 0)
+ {
+ getEmitter()->emitIns_R_C(ins_Load(TYP_INT), EA_OFFSET, regNum, curr->gtClsVar.gtClsVarHnd, 0);
+ regTracker.rsTrackRegTrash(regNum);
+
+ /* The value is now in the appropriate register */
+
+ genMarkTreeInReg(curr, regNum);
+
+ regSet.rsMarkRegUsed(curr);
+ }
+#ifdef _TARGET_ARM_
+ else if (curr->gtType == TYP_STRUCT)
+ {
+ GenTree* arg = curr;
+ while (arg->gtOper == GT_COMMA)
+ {
+ GenTreePtr op1 = arg->gtOp.gtOp1;
+ genEvalSideEffects(op1);
+ genUpdateLife(op1);
+ arg = arg->gtOp.gtOp2;
+ }
+ noway_assert((arg->OperGet() == GT_OBJ) || (arg->OperGet() == GT_LCL_VAR) ||
+ (arg->OperGet() == GT_MKREFANY));
+
+ // This code passes a TYP_STRUCT by value using
+ // the argument registers first and
+ // then the lvaOutgoingArgSpaceVar area.
+ //
+
+ // We prefer to choose low registers here to reduce code bloat
+ regMaskTP regNeedMask = RBM_LOW_REGS;
+ unsigned firstStackSlot = 0;
+ unsigned argAlign = TARGET_POINTER_SIZE;
+ size_t originalSize = InferStructOpSizeAlign(arg, &argAlign);
+
+ unsigned slots = (unsigned)(roundUp(originalSize, TARGET_POINTER_SIZE) / TARGET_POINTER_SIZE);
+ assert(slots > 0);
+
+ if (regNum == REG_STK)
+ {
+ firstStackSlot = 0;
+ }
+ else
+ {
+ if (argAlign == (TARGET_POINTER_SIZE * 2))
+ {
+ assert((regNum & 1) == 0);
+ }
+
+ // firstStackSlot is an index of the first slot of the struct
+ // that is on the stack, in the range [0,slots]. If it is 'slots',
+ // then the entire struct is in registers. It is also equal to
+ // the number of slots of the struct that are passed in registers.
+
+ if (curArgTabEntry->isHfaRegArg)
+ {
+ // HFA arguments that have been decided to go into registers fit the reg space.
+ assert(regNum >= FIRST_FP_ARGREG && "HFA must go in FP register");
+ assert(regNum + slots - 1 <= LAST_FP_ARGREG &&
+ "HFA argument doesn't fit entirely in FP argument registers");
+ firstStackSlot = slots;
+ }
+ else if (regNum + slots > MAX_REG_ARG)
+ {
+ firstStackSlot = MAX_REG_ARG - regNum;
+ assert(firstStackSlot > 0);
+ }
+ else
+ {
+ firstStackSlot = slots;
+ }
+
+ if (curArgTabEntry->isHfaRegArg)
+ {
+ // Mask out the registers used by an HFA arg from the ones used to compute tree into.
+ for (unsigned i = regNum; i < regNum + slots; i++)
+ {
+ regNeedMask &= ~genRegMask(regNumber(i));
+ }
+ }
+ }
+
+ // This holds the set of registers corresponding to enregistered promoted struct field variables
+ // that go dead after this use of the variable in the argument list.
+ regMaskTP deadFieldVarRegs = RBM_NONE;
+
+ // If the struct being passed is an OBJ of a local struct variable that is promoted (in the
+ // INDEPENDENT fashion, which doesn't require writes to be written through to the variables
+ // home stack loc) "promotedStructLocalVarDesc" will be set to point to the local variable
+ // table entry for the promoted struct local. As we fill slots with the contents of a
+ // promoted struct, "bytesOfNextSlotOfCurPromotedStruct" will be the number of filled bytes
+ // that indicate another filled slot (if we have a 12-byte struct, it has 3 four byte slots; when we're
+ // working on the second slot, "bytesOfNextSlotOfCurPromotedStruct" will be 8, the point at which we're
+ // done), and "nextPromotedStructFieldVar" will be the local variable number of the next field variable
+ // to be copied.
+ LclVarDsc* promotedStructLocalVarDesc = NULL;
+ unsigned bytesOfNextSlotOfCurPromotedStruct = 0; // Size of slot.
+ unsigned nextPromotedStructFieldVar = BAD_VAR_NUM;
+ GenTreePtr structLocalTree = NULL;
+
+ BYTE* gcLayout = NULL;
+ regNumber regSrc = REG_NA;
+ if (arg->gtOper == GT_OBJ)
+ {
+ // Are we loading a promoted struct local var?
+ if (arg->gtObj.gtOp1->gtOper == GT_ADDR && arg->gtObj.gtOp1->gtOp.gtOp1->gtOper == GT_LCL_VAR)
+ {
+ structLocalTree = arg->gtObj.gtOp1->gtOp.gtOp1;
+ unsigned structLclNum = structLocalTree->gtLclVarCommon.gtLclNum;
+ LclVarDsc* varDsc = &compiler->lvaTable[structLclNum];
+
+ Compiler::lvaPromotionType promotionType = compiler->lvaGetPromotionType(varDsc);
+
+ if (varDsc->lvPromoted && promotionType == Compiler::PROMOTION_TYPE_INDEPENDENT) // Otherwise it is
+ // guaranteed to
+ // live on stack.
+ {
+ // Fix 388395 ARM JitStress WP7
+ noway_assert(structLocalTree->TypeGet() == TYP_STRUCT);
+
+ assert(!varDsc->lvAddrExposed); // Compiler::PROMOTION_TYPE_INDEPENDENT ==> not exposed.
+ promotedStructLocalVarDesc = varDsc;
+ nextPromotedStructFieldVar = promotedStructLocalVarDesc->lvFieldLclStart;
+ }
+ }
+
+ if (promotedStructLocalVarDesc == NULL)
+ {
+ // If it's not a promoted struct variable, set "regSrc" to the address
+ // of the struct local.
+ genComputeReg(arg->gtObj.gtOp1, regNeedMask, RegSet::EXACT_REG, RegSet::KEEP_REG);
+ noway_assert(arg->gtObj.gtOp1->gtFlags & GTF_REG_VAL);
+ regSrc = arg->gtObj.gtOp1->gtRegNum;
+ // Remove this register from the set of registers that we pick from, unless slots equals 1
+ if (slots > 1)
+ regNeedMask &= ~genRegMask(regSrc);
+ }
+
+ gcLayout = new (compiler, CMK_Codegen) BYTE[slots];
+ compiler->info.compCompHnd->getClassGClayout(arg->gtObj.gtClass, gcLayout);
+ }
+ else if (arg->gtOper == GT_LCL_VAR)
+ {
+ // Move the address of the LCL_VAR in arg into reg
+
+ unsigned varNum = arg->gtLclVarCommon.gtLclNum;
+
+ // Are we loading a promoted struct local var?
+ structLocalTree = arg;
+ unsigned structLclNum = structLocalTree->gtLclVarCommon.gtLclNum;
+ LclVarDsc* varDsc = &compiler->lvaTable[structLclNum];
+
+ noway_assert(structLocalTree->TypeGet() == TYP_STRUCT);
+
+ Compiler::lvaPromotionType promotionType = compiler->lvaGetPromotionType(varDsc);
+
+ if (varDsc->lvPromoted && promotionType == Compiler::PROMOTION_TYPE_INDEPENDENT) // Otherwise it is
+ // guaranteed to live
+ // on stack.
+ {
+ assert(!varDsc->lvAddrExposed); // Compiler::PROMOTION_TYPE_INDEPENDENT ==> not exposed.
+ promotedStructLocalVarDesc = varDsc;
+ nextPromotedStructFieldVar = promotedStructLocalVarDesc->lvFieldLclStart;
+ }
+
+ if (promotedStructLocalVarDesc == NULL)
+ {
+ regSrc = regSet.rsPickFreeReg(regNeedMask);
+ // Remove this register from the set of registers that we pick from, unless slots equals 1
+ if (slots > 1)
+ regNeedMask &= ~genRegMask(regSrc);
+
+ getEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, regSrc, varNum, 0);
+ regTracker.rsTrackRegTrash(regSrc);
+ gcLayout = compiler->lvaGetGcLayout(varNum);
+ }
+ }
+ else if (arg->gtOper == GT_MKREFANY)
+ {
+ assert(slots == 2);
+ assert((firstStackSlot == 1) || (firstStackSlot == 2));
+ assert(argOffset == 0); // ???
+ PushMkRefAnyArg(arg, curArgTabEntry, regNeedMask);
+
+ // Adjust argOffset if part of this guy was pushed onto the stack
+ if (firstStackSlot < slots)
+ {
+ argOffset += TARGET_POINTER_SIZE;
+ }
+
+ // Skip the copy loop below because we have already placed the argument in the right place
+ slots = 0;
+ gcLayout = NULL;
+ }
+ else
+ {
+ assert(!"Unsupported TYP_STRUCT arg kind");
+ gcLayout = new (compiler, CMK_Codegen) BYTE[slots];
+ }
+
+ if (promotedStructLocalVarDesc != NULL)
+ {
+ // We must do do the stack parts first, since those might need values
+ // from argument registers that will be overwritten in the portion of the
+ // loop that writes into the argument registers.
+ bytesOfNextSlotOfCurPromotedStruct = (firstStackSlot + 1) * TARGET_POINTER_SIZE;
+ // Now find the var number of the first that starts in the first stack slot.
+ unsigned fieldVarLim =
+ promotedStructLocalVarDesc->lvFieldLclStart + promotedStructLocalVarDesc->lvFieldCnt;
+ while (compiler->lvaTable[nextPromotedStructFieldVar].lvFldOffset <
+ (firstStackSlot * TARGET_POINTER_SIZE) &&
+ nextPromotedStructFieldVar < fieldVarLim)
+ {
+ nextPromotedStructFieldVar++;
+ }
+ // If we reach the limit, meaning there is no field that goes even partly in the stack, only if the
+ // first stack slot is after the last slot.
+ assert(nextPromotedStructFieldVar < fieldVarLim || firstStackSlot >= slots);
+ }
+
+ if (slots > 0) // the mkref case may have set "slots" to zero.
+ {
+ // First pass the stack portion of the struct (if any)
+ //
+ int argOffsetOfFirstStackSlot = argOffset;
+ for (unsigned i = firstStackSlot; i < slots; i++)
+ {
+ emitAttr fieldSize;
+ if (gcLayout[i] == TYPE_GC_NONE)
+ fieldSize = EA_PTRSIZE;
+ else if (gcLayout[i] == TYPE_GC_REF)
+ fieldSize = EA_GCREF;
+ else
+ {
+ noway_assert(gcLayout[i] == TYPE_GC_BYREF);
+ fieldSize = EA_BYREF;
+ }
+
+ regNumber maxRegArg = regNumber(MAX_REG_ARG);
+ if (promotedStructLocalVarDesc != NULL)
+ {
+ regNumber regTmp = REG_STK;
+
+ bool filledExtraSlot =
+ genFillSlotFromPromotedStruct(arg, curArgTabEntry, promotedStructLocalVarDesc, fieldSize,
+ &nextPromotedStructFieldVar,
+ &bytesOfNextSlotOfCurPromotedStruct,
+ /*pCurRegNum*/ &maxRegArg, argOffset,
+ /*fieldOffsetOfFirstStackSlot*/ firstStackSlot *
+ TARGET_POINTER_SIZE,
+ argOffsetOfFirstStackSlot, &deadFieldVarRegs, &regTmp);
+ if (filledExtraSlot)
+ {
+ i++;
+ argOffset += TARGET_POINTER_SIZE;
+ }
+ }
+ else // (promotedStructLocalVarDesc == NULL)
+ {
+ // when slots > 1, we perform multiple load/stores thus regTmp cannot be equal to regSrc
+ // and although regSrc has been excluded from regNeedMask, regNeedMask is only a *hint*
+ // to regSet.rsPickFreeReg, so we need to be a little more forceful.
+ // Otherwise, just re-use the same register.
+ //
+ regNumber regTmp = regSrc;
+ if (slots != 1)
+ {
+ regMaskTP regSrcUsed;
+ regSet.rsLockReg(genRegMask(regSrc), &regSrcUsed);
+
+ regTmp = regSet.rsPickFreeReg(regNeedMask);
+
+ noway_assert(regTmp != regSrc);
+
+ regSet.rsUnlockReg(genRegMask(regSrc), regSrcUsed);
+ }
+
+ getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), fieldSize, regTmp, regSrc,
+ i * TARGET_POINTER_SIZE);
+
+ getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), fieldSize, regTmp,
+ compiler->lvaOutgoingArgSpaceVar, argOffset);
+ regTracker.rsTrackRegTrash(regTmp);
+ }
+ argOffset += TARGET_POINTER_SIZE;
+ }
+
+ // Now pass the register portion of the struct
+ //
+
+ bytesOfNextSlotOfCurPromotedStruct = TARGET_POINTER_SIZE;
+ if (promotedStructLocalVarDesc != NULL)
+ nextPromotedStructFieldVar = promotedStructLocalVarDesc->lvFieldLclStart;
+
+ // Create a nested loop here so that the first time thru the loop
+ // we setup all of the regArg registers except for possibly
+ // the one that would overwrite regSrc. Then in the final loop
+ // (if necessary) we just setup regArg/regSrc with the overwrite
+ //
+ bool overwriteRegSrc = false;
+ bool needOverwriteRegSrc = false;
+ do
+ {
+ if (needOverwriteRegSrc)
+ overwriteRegSrc = true;
+
+ for (unsigned i = 0; i < firstStackSlot; i++)
+ {
+ regNumber regArg = (regNumber)(regNum + i);
+
+ if (overwriteRegSrc == false)
+ {
+ if (regArg == regSrc)
+ {
+ needOverwriteRegSrc = true;
+ continue;
+ }
+ }
+ else
+ {
+ if (regArg != regSrc)
+ continue;
+ }
+
+ emitAttr fieldSize;
+ if (gcLayout[i] == TYPE_GC_NONE)
+ fieldSize = EA_PTRSIZE;
+ else if (gcLayout[i] == TYPE_GC_REF)
+ fieldSize = EA_GCREF;
+ else
+ {
+ noway_assert(gcLayout[i] == TYPE_GC_BYREF);
+ fieldSize = EA_BYREF;
+ }
+
+ regNumber regTmp = REG_STK;
+ if (promotedStructLocalVarDesc != NULL)
+ {
+ bool filledExtraSlot =
+ genFillSlotFromPromotedStruct(arg, curArgTabEntry, promotedStructLocalVarDesc,
+ fieldSize, &nextPromotedStructFieldVar,
+ &bytesOfNextSlotOfCurPromotedStruct,
+ /*pCurRegNum*/ &regArg,
+ /*argOffset*/ INT32_MAX,
+ /*fieldOffsetOfFirstStackSlot*/ INT32_MAX,
+ /*argOffsetOfFirstStackSlot*/ INT32_MAX,
+ &deadFieldVarRegs, &regTmp);
+ if (filledExtraSlot)
+ i++;
+ }
+ else
+ {
+ getEmitter()->emitIns_R_AR(ins_Load(curArgTabEntry->isHfaRegArg ? TYP_FLOAT : TYP_I_IMPL),
+ fieldSize, regArg, regSrc, i * TARGET_POINTER_SIZE);
+ }
+ regTracker.rsTrackRegTrash(regArg);
+ }
+ } while (needOverwriteRegSrc != overwriteRegSrc);
+ }
+
+ if ((arg->gtOper == GT_OBJ) && (promotedStructLocalVarDesc == NULL))
+ {
+ regSet.rsMarkRegFree(genRegMask(regSrc));
+ }
+
+ if (regNum != REG_STK && promotedStructLocalVarDesc == NULL) // If promoted, we already declared the regs
+ // used.
+ {
+ arg->gtFlags |= GTF_REG_VAL;
+ for (unsigned i = 1; i < firstStackSlot; i++)
+ {
+ arg->gtRegNum = (regNumber)(regNum + i);
+ curArgTabEntry->isHfaRegArg ? regSet.SetUsedRegFloat(arg, true) : regSet.rsMarkRegUsed(arg);
+ }
+ arg->gtRegNum = regNum;
+ curArgTabEntry->isHfaRegArg ? regSet.SetUsedRegFloat(arg, true) : regSet.rsMarkRegUsed(arg);
+ }
+
+ // If we're doing struct promotion, the liveness of the promoted field vars may change after this use,
+ // so update liveness.
+ genUpdateLife(arg);
+
+ // Now, if some copied field locals were enregistered, and they're now dead, update the set of
+ // register holding gc pointers.
+ if (deadFieldVarRegs != RBM_NONE)
+ gcInfo.gcMarkRegSetNpt(deadFieldVarRegs);
+ }
+ else if (curr->gtType == TYP_LONG || curr->gtType == TYP_ULONG)
+ {
+ if (curArgTabEntry->regNum == REG_STK)
+ {
+ // The arg is passed in the outgoing argument area of the stack frame
+ genCompIntoFreeRegPair(curr, RBM_NONE, RegSet::FREE_REG);
+ assert(curr->gtFlags & GTF_REG_VAL); // should be enregistered after genCompIntoFreeRegPair(curr, 0)
+
+ inst_SA_RV(ins_Store(TYP_INT), argOffset + 0, genRegPairLo(curr->gtRegPair), TYP_INT);
+ inst_SA_RV(ins_Store(TYP_INT), argOffset + 4, genRegPairHi(curr->gtRegPair), TYP_INT);
+ }
+ else
+ {
+ assert(regNum < REG_ARG_LAST);
+ regPairNo regPair = gen2regs2pair(regNum, REG_NEXT(regNum));
+ genComputeRegPair(curr, regPair, RBM_NONE, RegSet::FREE_REG, false);
+ assert(curr->gtRegPair == regPair);
+ regSet.rsMarkRegPairUsed(curr);
+ }
+ }
+#endif // _TARGET_ARM_
+ else if (curArgTabEntry->regNum == REG_STK)
+ {
+ // The arg is passed in the outgoing argument area of the stack frame
+ //
+ genCodeForTree(curr, 0);
+ assert(curr->gtFlags & GTF_REG_VAL); // should be enregistered after genCodeForTree(curr, 0)
+
+ inst_SA_RV(ins_Store(curr->gtType), argOffset, curr->gtRegNum, curr->gtType);
+
+ if ((genRegMask(curr->gtRegNum) & regSet.rsMaskUsed) == 0)
+ gcInfo.gcMarkRegSetNpt(genRegMask(curr->gtRegNum));
+ }
+ else
+ {
+ if (!varTypeIsFloating(curr->gtType))
+ {
+ genComputeReg(curr, genRegMask(regNum), RegSet::EXACT_REG, RegSet::FREE_REG, false);
+ assert(curr->gtRegNum == regNum);
+ regSet.rsMarkRegUsed(curr);
+ }
+ else // varTypeIsFloating(curr->gtType)
+ {
+ if (genIsValidFloatReg(regNum))
+ {
+ genComputeReg(curr, genRegMaskFloat(regNum, curr->gtType), RegSet::EXACT_REG, RegSet::FREE_REG,
+ false);
+ assert(curr->gtRegNum == regNum);
+ regSet.rsMarkRegUsed(curr);
+ }
+ else
+ {
+ genCodeForTree(curr, 0);
+ // If we are loading a floating point type into integer registers
+ // then it must be for varargs.
+ // genCodeForTree will load it into a floating point register,
+ // now copy it into the correct integer register(s)
+ if (curr->TypeGet() == TYP_FLOAT)
+ {
+ assert(genRegMask(regNum) & RBM_CALLEE_TRASH);
+ regSet.rsSpillRegIfUsed(regNum);
+#ifdef _TARGET_ARM_
+ getEmitter()->emitIns_R_R(INS_vmov_f2i, EA_4BYTE, regNum, curr->gtRegNum);
+#else
+#error "Unsupported target"
+#endif
+ regTracker.rsTrackRegTrash(regNum);
+
+ curr->gtType = TYP_INT; // Change this to TYP_INT in case we need to spill this register
+ curr->gtRegNum = regNum;
+ regSet.rsMarkRegUsed(curr);
+ }
+ else
+ {
+ assert(curr->TypeGet() == TYP_DOUBLE);
+ regNumber intRegNumLo = regNum;
+ curr->gtType = TYP_LONG; // Change this to TYP_LONG in case we spill this
+#ifdef _TARGET_ARM_
+ regNumber intRegNumHi = regNumber(intRegNumLo + 1);
+ assert(genRegMask(intRegNumHi) & RBM_CALLEE_TRASH);
+ assert(genRegMask(intRegNumLo) & RBM_CALLEE_TRASH);
+ regSet.rsSpillRegIfUsed(intRegNumHi);
+ regSet.rsSpillRegIfUsed(intRegNumLo);
+
+ getEmitter()->emitIns_R_R_R(INS_vmov_d2i, EA_8BYTE, intRegNumLo, intRegNumHi, curr->gtRegNum);
+ regTracker.rsTrackRegTrash(intRegNumLo);
+ regTracker.rsTrackRegTrash(intRegNumHi);
+ curr->gtRegPair = gen2regs2pair(intRegNumLo, intRegNumHi);
+ regSet.rsMarkRegPairUsed(curr);
+#else
+#error "Unsupported target"
+#endif
+ }
+ }
+ }
+ }
+ }
+
+ /* If any of the previously loaded arguments were spilled - reload them */
+
+ for (lateArgs = call->gtCall.gtCallLateArgs; lateArgs; lateArgs = lateArgs->Rest())
+ {
+ curr = lateArgs->Current();
+ assert(curr);
+
+ if (curr->gtFlags & GTF_SPILLED)
+ {
+ if (isRegPairType(curr->gtType))
+ {
+ regSet.rsUnspillRegPair(curr, genRegPairMask(curr->gtRegPair), RegSet::KEEP_REG);
+ }
+ else
+ {
+ regSet.rsUnspillReg(curr, genRegMask(curr->gtRegNum), RegSet::KEEP_REG);
+ }
+ }
+ }
+}
+
+#ifdef _TARGET_ARM_
+
+// 'Push' a single GT_MKREFANY argument onto a call's argument list
+// The argument is passed as described by the fgArgTabEntry
+// If any part of the struct is to be passed in a register the
+// regNum value will be equal to the the registers used to pass the
+// the first part of the struct.
+// If any part is to go onto the stack, we first generate the
+// value into a register specified by 'regNeedMask' and
+// then store it to the out going argument area.
+// When this method returns, both parts of the TypeReference have
+// been pushed onto the stack, but *no* registers have been marked
+// as 'in-use', that is the responsibility of the caller.
+//
+void CodeGen::PushMkRefAnyArg(GenTreePtr mkRefAnyTree, fgArgTabEntryPtr curArgTabEntry, regMaskTP regNeedMask)
+{
+ regNumber regNum = curArgTabEntry->regNum;
+ regNumber regNum2;
+ assert(mkRefAnyTree->gtOper == GT_MKREFANY);
+ regMaskTP arg1RegMask = 0;
+ int argOffset = curArgTabEntry->slotNum * TARGET_POINTER_SIZE;
+
+ // Construct the TypedReference directly into the argument list of the call by
+ // 'pushing' the first field of the typed reference: the pointer.
+ // Do this by directly generating it into the argument register or outgoing arg area of the stack.
+ // Mark it as used so we don't trash it while generating the second field.
+ //
+ if (regNum == REG_STK)
+ {
+ genComputeReg(mkRefAnyTree->gtOp.gtOp1, regNeedMask, RegSet::EXACT_REG, RegSet::FREE_REG);
+ noway_assert(mkRefAnyTree->gtOp.gtOp1->gtFlags & GTF_REG_VAL);
+ regNumber tmpReg1 = mkRefAnyTree->gtOp.gtOp1->gtRegNum;
+ inst_SA_RV(ins_Store(TYP_I_IMPL), argOffset, tmpReg1, TYP_I_IMPL);
+ regTracker.rsTrackRegTrash(tmpReg1);
+ argOffset += TARGET_POINTER_SIZE;
+ regNum2 = REG_STK;
+ }
+ else
+ {
+ assert(regNum <= REG_ARG_LAST);
+ arg1RegMask = genRegMask(regNum);
+ genComputeReg(mkRefAnyTree->gtOp.gtOp1, arg1RegMask, RegSet::EXACT_REG, RegSet::KEEP_REG);
+ regNum2 = (regNum == REG_ARG_LAST) ? REG_STK : genRegArgNext(regNum);
+ }
+
+ // Now 'push' the second field of the typed reference: the method table.
+ if (regNum2 == REG_STK)
+ {
+ genComputeReg(mkRefAnyTree->gtOp.gtOp2, regNeedMask, RegSet::EXACT_REG, RegSet::FREE_REG);
+ noway_assert(mkRefAnyTree->gtOp.gtOp2->gtFlags & GTF_REG_VAL);
+ regNumber tmpReg2 = mkRefAnyTree->gtOp.gtOp2->gtRegNum;
+ inst_SA_RV(ins_Store(TYP_I_IMPL), argOffset, tmpReg2, TYP_I_IMPL);
+ regTracker.rsTrackRegTrash(tmpReg2);
+ }
+ else
+ {
+ assert(regNum2 <= REG_ARG_LAST);
+ // We don't have to mark this register as being in use here because it will
+ // be done by the caller, and we don't want to double-count it.
+ genComputeReg(mkRefAnyTree->gtOp.gtOp2, genRegMask(regNum2), RegSet::EXACT_REG, RegSet::FREE_REG);
+ }
+
+ // Now that we are done generating the second part of the TypeReference, we can mark
+ // the first register as free.
+ // The caller in the shared path we will re-mark all registers used by this argument
+ // as being used, so we don't want to double-count this one.
+ if (arg1RegMask != 0)
+ {
+ GenTreePtr op1 = mkRefAnyTree->gtOp.gtOp1;
+ if (op1->gtFlags & GTF_SPILLED)
+ {
+ /* The register that we loaded arg1 into has been spilled -- reload it back into the correct arg register */
+
+ regSet.rsUnspillReg(op1, arg1RegMask, RegSet::FREE_REG);
+ }
+ else
+ {
+ regSet.rsMarkRegFree(arg1RegMask);
+ }
+ }
+}
+#endif // _TARGET_ARM_
+
+#endif // FEATURE_FIXED_OUT_ARGS
+
+regMaskTP CodeGen::genLoadIndirectCallTarget(GenTreePtr call)
+{
+ assert((gtCallTypes)call->gtCall.gtCallType == CT_INDIRECT);
+
+ regMaskTP fptrRegs;
+
+ /* Loading the indirect call target might cause one or more of the previously
+ loaded argument registers to be spilled. So, we save information about all
+ the argument registers, and unspill any of them that get spilled, after
+ the call target is loaded.
+ */
+ struct
+ {
+ GenTreePtr node;
+ union {
+ regNumber regNum;
+ regPairNo regPair;
+ };
+ } regArgTab[MAX_REG_ARG];
+
+ /* Record the previously loaded arguments, if any */
+
+ unsigned regIndex;
+ regMaskTP prefRegs = regSet.rsRegMaskFree();
+ regMaskTP argRegs = RBM_NONE;
+ for (regIndex = 0; regIndex < MAX_REG_ARG; regIndex++)
+ {
+ regMaskTP mask;
+ regNumber regNum = genMapRegArgNumToRegNum(regIndex, TYP_INT);
+ GenTreePtr argTree = regSet.rsUsedTree[regNum];
+ regArgTab[regIndex].node = argTree;
+ if ((argTree != NULL) && (argTree->gtType != TYP_STRUCT)) // We won't spill the struct
+ {
+ assert(argTree->gtFlags & GTF_REG_VAL);
+ if (isRegPairType(argTree->gtType))
+ {
+ regPairNo regPair = argTree->gtRegPair;
+ assert(regNum == genRegPairHi(regPair) || regNum == genRegPairLo(regPair));
+ regArgTab[regIndex].regPair = regPair;
+ mask = genRegPairMask(regPair);
+ }
+ else
+ {
+ assert(regNum == argTree->gtRegNum);
+ regArgTab[regIndex].regNum = regNum;
+ mask = genRegMask(regNum);
+ }
+ assert(!(prefRegs & mask));
+ argRegs |= mask;
+ }
+ }
+
+ /* Record the register(s) used for the indirect call func ptr */
+ fptrRegs = genMakeRvalueAddressable(call->gtCall.gtCallAddr, prefRegs, RegSet::KEEP_REG, false);
+
+ /* If any of the previously loaded arguments were spilled, reload them */
+
+ for (regIndex = 0; regIndex < MAX_REG_ARG; regIndex++)
+ {
+ GenTreePtr argTree = regArgTab[regIndex].node;
+ if ((argTree != NULL) && (argTree->gtFlags & GTF_SPILLED))
+ {
+ assert(argTree->gtType != TYP_STRUCT); // We currently don't support spilling structs in argument registers
+ if (isRegPairType(argTree->gtType))
+ {
+ regSet.rsUnspillRegPair(argTree, genRegPairMask(regArgTab[regIndex].regPair), RegSet::KEEP_REG);
+ }
+ else
+ {
+ regSet.rsUnspillReg(argTree, genRegMask(regArgTab[regIndex].regNum), RegSet::KEEP_REG);
+ }
+ }
+ }
+
+ /* Make sure the target is still addressable while avoiding the argument registers */
+
+ fptrRegs = genKeepAddressable(call->gtCall.gtCallAddr, fptrRegs, argRegs);
+
+ return fptrRegs;
+}
+
+/*****************************************************************************
+ *
+ * Generate code for a call. If the call returns a value in register(s), the
+ * register mask that describes where the result will be found is returned;
+ * otherwise, RBM_NONE is returned.
+ */
+
+#ifdef _PREFAST_
+#pragma warning(push)
+#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
+#endif
+regMaskTP CodeGen::genCodeForCall(GenTreePtr call, bool valUsed)
+{
+ emitAttr retSize;
+ size_t argSize;
+ size_t args;
+ regMaskTP retVal;
+ emitter::EmitCallType emitCallType;
+
+ unsigned saveStackLvl;
+
+ BasicBlock* returnLabel = DUMMY_INIT(NULL);
+ LclVarDsc* frameListRoot = NULL;
+
+ unsigned savCurIntArgReg;
+ unsigned savCurFloatArgReg;
+
+ unsigned areg;
+
+ regMaskTP fptrRegs = RBM_NONE;
+ regMaskTP vptrMask = RBM_NONE;
+
+#ifdef DEBUG
+ unsigned stackLvl = getEmitter()->emitCurStackLvl;
+
+ if (compiler->verbose)
+ {
+ printf("\t\t\t\t\t\t\tBeg call ");
+ Compiler::printTreeID(call);
+ printf(" stack %02u [E=%02u]\n", genStackLevel, stackLvl);
+ }
+#endif
+
+ gtCallTypes callType = (gtCallTypes)call->gtCall.gtCallType;
+ IL_OFFSETX ilOffset = BAD_IL_OFFSET;
+
+ CORINFO_SIG_INFO* sigInfo = nullptr;
+
+#ifdef DEBUGGING_SUPPORT
+ if (compiler->opts.compDbgInfo && compiler->genCallSite2ILOffsetMap != NULL)
+ {
+ (void)compiler->genCallSite2ILOffsetMap->Lookup(call, &ilOffset);
+ }
+#endif
+
+ /* Make some sanity checks on the call node */
+
+ // This is a call
+ noway_assert(call->IsCall());
+ // "this" only makes sense for user functions
+ noway_assert(call->gtCall.gtCallObjp == 0 || callType == CT_USER_FUNC || callType == CT_INDIRECT);
+ // tailcalls won't be done for helpers, caller-pop args, and check that
+ // the global flag is set
+ noway_assert(!call->gtCall.IsTailCall() ||
+ (callType != CT_HELPER && !(call->gtFlags & GTF_CALL_POP_ARGS) && compiler->compTailCallUsed));
+
+#ifdef DEBUG
+ // Pass the call signature information down into the emitter so the emitter can associate
+ // native call sites with the signatures they were generated from.
+ if (callType != CT_HELPER)
+ {
+ sigInfo = call->gtCall.callSig;
+ }
+#endif // DEBUG
+
+ unsigned pseudoStackLvl = 0;
+
+ if (!isFramePointerUsed() && (genStackLevel != 0) && compiler->fgIsThrowHlpBlk(compiler->compCurBB))
+ {
+ noway_assert(compiler->compCurBB->bbTreeList->gtStmt.gtStmtExpr == call);
+
+ pseudoStackLvl = genStackLevel;
+
+ noway_assert(!"Blocks with non-empty stack on entry are NYI in the emitter "
+ "so fgAddCodeRef() should have set isFramePointerRequired()");
+ }
+
+ /* Mark the current stack level and list of pointer arguments */
+
+ saveStackLvl = genStackLevel;
+
+ /*-------------------------------------------------------------------------
+ * Set up the registers and arguments
+ */
+
+ /* We'll keep track of how much we've pushed on the stack */
+
+ argSize = 0;
+
+ /* We need to get a label for the return address with the proper stack depth. */
+ /* For the callee pops case (the default) that is before the args are pushed. */
+
+ if ((call->gtFlags & GTF_CALL_UNMANAGED) && !(call->gtFlags & GTF_CALL_POP_ARGS))
+ {
+ returnLabel = genCreateTempLabel();
+ }
+
+ /*
+ Make sure to save the current argument register status
+ in case we have nested calls.
+ */
+
+ noway_assert(intRegState.rsCurRegArgNum <= MAX_REG_ARG);
+ savCurIntArgReg = intRegState.rsCurRegArgNum;
+ savCurFloatArgReg = floatRegState.rsCurRegArgNum;
+ intRegState.rsCurRegArgNum = 0;
+ floatRegState.rsCurRegArgNum = 0;
+
+ /* Pass the arguments */
+
+ if ((call->gtCall.gtCallObjp != NULL) || (call->gtCall.gtCallArgs != NULL))
+ {
+ argSize += genPushArgList(call);
+ }
+
+ /* We need to get a label for the return address with the proper stack depth. */
+ /* For the caller pops case (cdecl) that is after the args are pushed. */
+
+ if (call->gtFlags & GTF_CALL_UNMANAGED)
+ {
+ if (call->gtFlags & GTF_CALL_POP_ARGS)
+ returnLabel = genCreateTempLabel();
+
+ /* Make sure that we now have a label */
+ noway_assert(returnLabel != DUMMY_INIT(NULL));
+ }
+
+ if (callType == CT_INDIRECT)
+ {
+ fptrRegs = genLoadIndirectCallTarget(call);
+ }
+
+ /* Make sure any callee-trashed registers are saved */
+
+ regMaskTP calleeTrashedRegs = RBM_NONE;
+
+#if GTF_CALL_REG_SAVE
+ if (call->gtFlags & GTF_CALL_REG_SAVE)
+ {
+ /* The return value reg(s) will definitely be trashed */
+
+ switch (call->gtType)
+ {
+ case TYP_INT:
+ case TYP_REF:
+ case TYP_BYREF:
+#if !CPU_HAS_FP_SUPPORT
+ case TYP_FLOAT:
+#endif
+ calleeTrashedRegs = RBM_INTRET;
+ break;
+
+ case TYP_LONG:
+#if !CPU_HAS_FP_SUPPORT
+ case TYP_DOUBLE:
+#endif
+ calleeTrashedRegs = RBM_LNGRET;
+ break;
+
+ case TYP_VOID:
+#if CPU_HAS_FP_SUPPORT
+ case TYP_FLOAT:
+ case TYP_DOUBLE:
+#endif
+ calleeTrashedRegs = 0;
+ break;
+
+ default:
+ noway_assert(!"unhandled/unexpected type");
+ }
+ }
+ else
+#endif
+ {
+ calleeTrashedRegs = RBM_CALLEE_TRASH;
+ }
+
+ /* Spill any callee-saved registers which are being used */
+
+ regMaskTP spillRegs = calleeTrashedRegs & regSet.rsMaskUsed;
+
+ /* We need to save all GC registers to the InlinedCallFrame.
+ Instead, just spill them to temps. */
+
+ if (call->gtFlags & GTF_CALL_UNMANAGED)
+ spillRegs |= (gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur) & regSet.rsMaskUsed;
+
+ // Ignore fptrRegs as it is needed only to perform the indirect call
+
+ spillRegs &= ~fptrRegs;
+
+ /* Do not spill the argument registers.
+ Multi-use of RBM_ARG_REGS should be prevented by genPushArgList() */
+
+ noway_assert((regSet.rsMaskMult & call->gtCall.gtCallRegUsedMask) == 0);
+ spillRegs &= ~call->gtCall.gtCallRegUsedMask;
+
+ if (spillRegs)
+ {
+ regSet.rsSpillRegs(spillRegs);
+ }
+
+#if FEATURE_STACK_FP_X87
+ // Spill fp stack
+ SpillForCallStackFP();
+
+ if (call->gtType == TYP_FLOAT || call->gtType == TYP_DOUBLE)
+ {
+ // Pick up a reg
+ regNumber regReturn = regSet.PickRegFloat();
+
+ // Assign reg to tree
+ genMarkTreeInReg(call, regReturn);
+
+ // Mark as used
+ regSet.SetUsedRegFloat(call, true);
+
+ // Update fp state
+ compCurFPState.Push(regReturn);
+ }
+#else
+ SpillForCallRegisterFP(call->gtCall.gtCallRegUsedMask);
+#endif
+
+ /* If the method returns a GC ref, set size to EA_GCREF or EA_BYREF */
+
+ retSize = EA_PTRSIZE;
+
+ if (valUsed)
+ {
+ if (call->gtType == TYP_REF || call->gtType == TYP_ARRAY)
+ {
+ retSize = EA_GCREF;
+ }
+ else if (call->gtType == TYP_BYREF)
+ {
+ retSize = EA_BYREF;
+ }
+ }
+
+ /*-------------------------------------------------------------------------
+ * For caller-pop calls, the GC info will report the arguments as pending
+ arguments as the caller explicitly pops them. Also should be
+ reported as non-GC arguments as they effectively go dead at the
+ call site (callee owns them)
+ */
+
+ args = (call->gtFlags & GTF_CALL_POP_ARGS) ? -int(argSize) : argSize;
+
+#ifdef PROFILING_SUPPORTED
+
+ /*-------------------------------------------------------------------------
+ * Generate the profiling hooks for the call
+ */
+
+ /* Treat special cases first */
+
+ /* fire the event at the call site */
+ /* alas, right now I can only handle calls via a method handle */
+ if (compiler->compIsProfilerHookNeeded() && (callType == CT_USER_FUNC) && call->gtCall.IsTailCall())
+ {
+ unsigned saveStackLvl2 = genStackLevel;
+
+ //
+ // Push the profilerHandle
+ //
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef _TARGET_X86_
+ regMaskTP byrefPushedRegs;
+ regMaskTP norefPushedRegs;
+ regMaskTP pushedArgRegs = genPushRegs(call->gtCall.gtCallRegUsedMask, &byrefPushedRegs, &norefPushedRegs);
+
+ if (compiler->compProfilerMethHndIndirected)
+ {
+ getEmitter()->emitIns_AR_R(INS_push, EA_PTR_DSP_RELOC, REG_NA, REG_NA,
+ (ssize_t)compiler->compProfilerMethHnd);
+ }
+ else
+ {
+ inst_IV(INS_push, (size_t)compiler->compProfilerMethHnd);
+ }
+ genSinglePush();
+
+ genEmitHelperCall(CORINFO_HELP_PROF_FCN_TAILCALL,
+ sizeof(int) * 1, // argSize
+ EA_UNKNOWN); // retSize
+
+ //
+ // Adjust the number of stack slots used by this managed method if necessary.
+ //
+ if (compiler->fgPtrArgCntMax < 1)
+ {
+ compiler->fgPtrArgCntMax = 1;
+ }
+
+ genPopRegs(pushedArgRegs, byrefPushedRegs, norefPushedRegs);
+#elif _TARGET_ARM_
+ // We need r0 (to pass profiler handle) and another register (call target) to emit a tailcall callback.
+ // To make r0 available, we add REG_PROFILER_TAIL_SCRATCH as an additional interference for tail prefixed calls.
+ // Here we grab a register to temporarily store r0 and revert it back after we have emitted callback.
+ //
+ // By the time we reach this point argument registers are setup (by genPushArgList()), therefore we don't want
+ // to disturb them and hence argument registers are locked here.
+ regMaskTP usedMask = RBM_NONE;
+ regSet.rsLockReg(RBM_ARG_REGS, &usedMask);
+
+ regNumber scratchReg = regSet.rsGrabReg(RBM_CALLEE_SAVED);
+ regSet.rsLockReg(genRegMask(scratchReg));
+
+ emitAttr attr = EA_UNKNOWN;
+ if (RBM_R0 & gcInfo.gcRegGCrefSetCur)
+ {
+ attr = EA_GCREF;
+ gcInfo.gcMarkRegSetGCref(scratchReg);
+ }
+ else if (RBM_R0 & gcInfo.gcRegByrefSetCur)
+ {
+ attr = EA_BYREF;
+ gcInfo.gcMarkRegSetByref(scratchReg);
+ }
+ else
+ {
+ attr = EA_4BYTE;
+ }
+
+ getEmitter()->emitIns_R_R(INS_mov, attr, scratchReg, REG_R0);
+ regTracker.rsTrackRegTrash(scratchReg);
+
+ if (compiler->compProfilerMethHndIndirected)
+ {
+ getEmitter()->emitIns_R_AI(INS_ldr, EA_PTR_DSP_RELOC, REG_R0, (ssize_t)compiler->compProfilerMethHnd);
+ regTracker.rsTrackRegTrash(REG_R0);
+ }
+ else
+ {
+ instGen_Set_Reg_To_Imm(EA_4BYTE, REG_R0, (ssize_t)compiler->compProfilerMethHnd);
+ }
+
+ genEmitHelperCall(CORINFO_HELP_PROF_FCN_TAILCALL,
+ 0, // argSize
+ EA_UNKNOWN); // retSize
+
+ // Restore back to the state that existed before profiler callback
+ gcInfo.gcMarkRegSetNpt(scratchReg);
+ getEmitter()->emitIns_R_R(INS_mov, attr, REG_R0, scratchReg);
+ regTracker.rsTrackRegTrash(REG_R0);
+ regSet.rsUnlockReg(genRegMask(scratchReg));
+ regSet.rsUnlockReg(RBM_ARG_REGS, usedMask);
+#else
+ NYI("Pushing the profilerHandle & caller's sp for the profiler callout and locking any registers");
+#endif //_TARGET_X86_
+
+ /* Restore the stack level */
+ genStackLevel = saveStackLvl2;
+ }
+
+#endif // PROFILING_SUPPORTED
+
+#ifdef DEBUG
+ /*-------------------------------------------------------------------------
+ * Generate an ESP check for the call
+ */
+
+ if (compiler->opts.compStackCheckOnCall
+#if defined(USE_TRANSITION_THUNKS) || defined(USE_DYNAMIC_STACK_ALIGN)
+ // check the stacks as frequently as possible
+ && !call->IsHelperCall()
+#else
+ && call->gtCall.gtCallType == CT_USER_FUNC
+#endif
+ )
+ {
+ noway_assert(compiler->lvaCallEspCheck != 0xCCCCCCCC &&
+ compiler->lvaTable[compiler->lvaCallEspCheck].lvDoNotEnregister &&
+ compiler->lvaTable[compiler->lvaCallEspCheck].lvOnFrame);
+ getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaCallEspCheck, 0);
+ }
+#endif
+
+ /*-------------------------------------------------------------------------
+ * Generate the call
+ */
+
+ bool fPossibleSyncHelperCall = false;
+ CorInfoHelpFunc helperNum = CORINFO_HELP_UNDEF; /* only initialized to avoid compiler C4701 warning */
+
+ bool fTailCallTargetIsVSD = false;
+
+ bool fTailCall = (call->gtCall.gtCallMoreFlags & GTF_CALL_M_TAILCALL) != 0;
+
+ /* Check for Delegate.Invoke. If so, we inline it. We get the
+ target-object and target-function from the delegate-object, and do
+ an indirect call.
+ */
+
+ if ((call->gtCall.gtCallMoreFlags & GTF_CALL_M_DELEGATE_INV) && !fTailCall)
+ {
+ noway_assert(call->gtCall.gtCallType == CT_USER_FUNC);
+
+ assert((compiler->info.compCompHnd->getMethodAttribs(call->gtCall.gtCallMethHnd) &
+ (CORINFO_FLG_DELEGATE_INVOKE | CORINFO_FLG_FINAL)) ==
+ (CORINFO_FLG_DELEGATE_INVOKE | CORINFO_FLG_FINAL));
+
+ /* Find the offsets of the 'this' pointer and new target */
+
+ CORINFO_EE_INFO* pInfo;
+ unsigned instOffs; // offset of new 'this' pointer
+ unsigned firstTgtOffs; // offset of first target to invoke
+ const regNumber regThis = genGetThisArgReg(call);
+
+ pInfo = compiler->eeGetEEInfo();
+ instOffs = pInfo->offsetOfDelegateInstance;
+ firstTgtOffs = pInfo->offsetOfDelegateFirstTarget;
+
+#ifdef _TARGET_ARM_
+ if ((call->gtCall.gtCallMoreFlags & GTF_CALL_M_SECURE_DELEGATE_INV))
+ {
+ getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, REG_VIRTUAL_STUB_PARAM, regThis,
+ pInfo->offsetOfSecureDelegateIndirectCell);
+ regTracker.rsTrackRegTrash(REG_VIRTUAL_STUB_PARAM);
+ }
+#endif // _TARGET_ARM_
+
+ // Grab an available register to use for the CALL indirection
+ regNumber indCallReg = regSet.rsGrabReg(RBM_ALLINT);
+
+ // Save the invoke-target-function in indCallReg
+ // 'mov indCallReg, dword ptr [regThis + firstTgtOffs]'
+ getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, indCallReg, regThis, firstTgtOffs);
+ regTracker.rsTrackRegTrash(indCallReg);
+
+ /* Set new 'this' in REG_CALL_THIS - 'mov REG_CALL_THIS, dword ptr [regThis + instOffs]' */
+
+ getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_GCREF, regThis, regThis, instOffs);
+ regTracker.rsTrackRegTrash(regThis);
+ noway_assert(instOffs < 127);
+
+ /* Call through indCallReg */
+
+ getEmitter()->emitIns_Call(emitter::EC_INDIR_R,
+ NULL, // methHnd
+ INDEBUG_LDISASM_COMMA(sigInfo) NULL, // addr
+ args, retSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
+ gcInfo.gcRegByrefSetCur, ilOffset, indCallReg);
+ }
+ else
+
+ /*-------------------------------------------------------------------------
+ * Virtual and interface calls
+ */
+
+ switch (call->gtFlags & GTF_CALL_VIRT_KIND_MASK)
+ {
+ case GTF_CALL_VIRT_STUB:
+ {
+ regSet.rsSetRegsModified(RBM_VIRTUAL_STUB_PARAM);
+
+ // An x86 JIT which uses full stub dispatch must generate only
+ // the following stub dispatch calls:
+ //
+ // (1) isCallRelativeIndirect:
+ // call dword ptr [rel32] ; FF 15 ---rel32----
+ // (2) isCallRelative:
+ // call abc ; E8 ---rel32----
+ // (3) isCallRegisterIndirect:
+ // 3-byte nop ;
+ // call dword ptr [eax] ; FF 10
+ //
+ // THIS IS VERY TIGHTLY TIED TO THE PREDICATES IN
+ // vm\i386\cGenCpu.h, esp. isCallRegisterIndirect.
+
+ //
+ // Please do not insert any Random NOPs while constructing this VSD call
+ //
+ getEmitter()->emitDisableRandomNops();
+
+ if (!fTailCall)
+ {
+ // This is code to set up an indirect call to a stub address computed
+ // via dictionary lookup. However the dispatch stub receivers aren't set up
+ // to accept such calls at the moment.
+ if (callType == CT_INDIRECT)
+ {
+ regNumber indReg;
+
+ // -------------------------------------------------------------------------
+ // The importer decided we needed a stub call via a computed
+ // stub dispatch address, i.e. an address which came from a dictionary lookup.
+ // - The dictionary lookup produces an indirected address, suitable for call
+ // via "call [REG_VIRTUAL_STUB_PARAM]"
+ //
+ // This combination will only be generated for shared generic code and when
+ // stub dispatch is active.
+
+ // No need to null check the this pointer - the dispatch code will deal with this.
+
+ noway_assert(genStillAddressable(call->gtCall.gtCallAddr));
+
+ // Now put the address in REG_VIRTUAL_STUB_PARAM.
+ // This is typically a nop when the register used for
+ // the gtCallAddr is REG_VIRTUAL_STUB_PARAM
+ //
+ inst_RV_TT(INS_mov, REG_VIRTUAL_STUB_PARAM, call->gtCall.gtCallAddr);
+ regTracker.rsTrackRegTrash(REG_VIRTUAL_STUB_PARAM);
+
+#if defined(_TARGET_X86_)
+ // Emit enough bytes of nops so that this sequence can be distinguished
+ // from other virtual stub dispatch calls.
+ //
+ // NOTE: THIS IS VERY TIGHTLY TIED TO THE PREDICATES IN
+ // vm\i386\cGenCpu.h, esp. isCallRegisterIndirect.
+ //
+ getEmitter()->emitIns_Nop(3);
+
+ // Make the virtual stub call:
+ // call [REG_VIRTUAL_STUB_PARAM]
+ //
+ emitCallType = emitter::EC_INDIR_ARD;
+
+ indReg = REG_VIRTUAL_STUB_PARAM;
+ genDoneAddressable(call->gtCall.gtCallAddr, fptrRegs, RegSet::KEEP_REG);
+
+#elif CPU_LOAD_STORE_ARCH // ARM doesn't allow us to use an indirection for the call
+
+ genDoneAddressable(call->gtCall.gtCallAddr, fptrRegs, RegSet::KEEP_REG);
+
+ // Make the virtual stub call:
+ // ldr indReg, [REG_VIRTUAL_STUB_PARAM]
+ // call indReg
+ //
+ emitCallType = emitter::EC_INDIR_R;
+
+ // Now dereference [REG_VIRTUAL_STUB_PARAM] and put it in a new temp register 'indReg'
+ //
+ indReg = regSet.rsGrabReg(RBM_ALLINT & ~RBM_VIRTUAL_STUB_PARAM);
+ assert(call->gtCall.gtCallAddr->gtFlags & GTF_REG_VAL);
+ getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, indReg, REG_VIRTUAL_STUB_PARAM, 0);
+ regTracker.rsTrackRegTrash(indReg);
+
+#else
+#error "Unknown target for VSD call"
+#endif
+
+ getEmitter()->emitIns_Call(emitCallType,
+ NULL, // methHnd
+ INDEBUG_LDISASM_COMMA(sigInfo) NULL, // addr
+ args, retSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
+ gcInfo.gcRegByrefSetCur, ilOffset, indReg);
+ }
+ else
+ {
+ // -------------------------------------------------------------------------
+ // Check for a direct stub call.
+ //
+
+ // Get stub addr. This will return NULL if virtual call stubs are not active
+ void* stubAddr = NULL;
+
+ stubAddr = (void*)call->gtCall.gtStubCallStubAddr;
+
+ noway_assert(stubAddr != NULL);
+
+ // -------------------------------------------------------------------------
+ // Direct stub calls, though the stubAddr itself may still need to be
+ // accesed via an indirection.
+ //
+
+ // No need to null check - the dispatch code will deal with null this.
+
+ emitter::EmitCallType callTypeStubAddr = emitter::EC_FUNC_ADDR;
+ void* addr = stubAddr;
+ int disp = 0;
+ regNumber callReg = REG_NA;
+
+ if (call->gtCall.gtCallMoreFlags & GTF_CALL_M_VIRTSTUB_REL_INDIRECT)
+ {
+#if CPU_LOAD_STORE_ARCH
+ callReg = regSet.rsGrabReg(RBM_VIRTUAL_STUB_PARAM);
+ noway_assert(callReg == REG_VIRTUAL_STUB_PARAM);
+
+ instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, REG_VIRTUAL_STUB_PARAM, (ssize_t)stubAddr);
+ // The stub will write-back to this register, so don't track it
+ regTracker.rsTrackRegTrash(REG_VIRTUAL_STUB_PARAM);
+ getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, REG_JUMP_THUNK_PARAM,
+ REG_VIRTUAL_STUB_PARAM, 0);
+ regTracker.rsTrackRegTrash(REG_JUMP_THUNK_PARAM);
+ callTypeStubAddr = emitter::EC_INDIR_R;
+ getEmitter()->emitIns_Call(emitter::EC_INDIR_R,
+ NULL, // methHnd
+ INDEBUG_LDISASM_COMMA(sigInfo) NULL, // addr
+ args, retSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
+ gcInfo.gcRegByrefSetCur, ilOffset, REG_JUMP_THUNK_PARAM);
+
+#else
+ // emit an indirect call
+ callTypeStubAddr = emitter::EC_INDIR_C;
+ addr = 0;
+ disp = (ssize_t)stubAddr;
+#endif
+ }
+#if CPU_LOAD_STORE_ARCH
+ if (callTypeStubAddr != emitter::EC_INDIR_R)
+#endif
+ {
+ getEmitter()->emitIns_Call(callTypeStubAddr, call->gtCall.gtCallMethHnd,
+ INDEBUG_LDISASM_COMMA(sigInfo) addr, args, retSize,
+ gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
+ gcInfo.gcRegByrefSetCur, ilOffset, callReg, REG_NA, 0, disp);
+ }
+ }
+ }
+ else // tailCall is true
+ {
+
+// Non-X86 tail calls materialize the null-check in fgMorphTailCall, when it
+// moves the this pointer out of it's usual place and into the argument list.
+#ifdef _TARGET_X86_
+
+ // Generate "cmp ECX, [ECX]" to trap null pointers
+ const regNumber regThis = genGetThisArgReg(call);
+ getEmitter()->emitIns_AR_R(INS_cmp, EA_4BYTE, regThis, regThis, 0);
+
+#endif // _TARGET_X86_
+
+ if (callType == CT_INDIRECT)
+ {
+ noway_assert(genStillAddressable(call->gtCall.gtCallAddr));
+
+ // Now put the address in EAX.
+ inst_RV_TT(INS_mov, REG_TAILCALL_ADDR, call->gtCall.gtCallAddr);
+ regTracker.rsTrackRegTrash(REG_TAILCALL_ADDR);
+
+ genDoneAddressable(call->gtCall.gtCallAddr, fptrRegs, RegSet::KEEP_REG);
+ }
+ else
+ {
+ // importer/EE should guarantee the indirection
+ noway_assert(call->gtCall.gtCallMoreFlags & GTF_CALL_M_VIRTSTUB_REL_INDIRECT);
+
+ instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, REG_TAILCALL_ADDR,
+ ssize_t(call->gtCall.gtStubCallStubAddr));
+ }
+
+ fTailCallTargetIsVSD = true;
+ }
+
+ //
+ // OK to start inserting random NOPs again
+ //
+ getEmitter()->emitEnableRandomNops();
+ }
+ break;
+
+ case GTF_CALL_VIRT_VTABLE:
+ // stub dispatching is off or this is not a virtual call (could be a tailcall)
+ {
+ regNumber vptrReg;
+ unsigned vtabOffsOfIndirection;
+ unsigned vtabOffsAfterIndirection;
+
+ noway_assert(callType == CT_USER_FUNC);
+
+ vptrReg =
+ regSet.rsGrabReg(RBM_ALLINT); // Grab an available register to use for the CALL indirection
+ vptrMask = genRegMask(vptrReg);
+
+ /* The register no longer holds a live pointer value */
+ gcInfo.gcMarkRegSetNpt(vptrMask);
+
+ // MOV vptrReg, [REG_CALL_THIS + offs]
+ getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, vptrReg, genGetThisArgReg(call),
+ VPTR_OFFS);
+ regTracker.rsTrackRegTrash(vptrReg);
+
+ noway_assert(vptrMask & ~call->gtCall.gtCallRegUsedMask);
+
+ /* Get hold of the vtable offset (note: this might be expensive) */
+
+ compiler->info.compCompHnd->getMethodVTableOffset(call->gtCall.gtCallMethHnd,
+ &vtabOffsOfIndirection,
+ &vtabOffsAfterIndirection);
+
+ /* Get the appropriate vtable chunk */
+
+ /* The register no longer holds a live pointer value */
+ gcInfo.gcMarkRegSetNpt(vptrMask);
+
+ // MOV vptrReg, [REG_CALL_IND_SCRATCH + vtabOffsOfIndirection]
+ getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, vptrReg, vptrReg,
+ vtabOffsOfIndirection);
+
+ /* Call through the appropriate vtable slot */
+
+ if (fTailCall)
+ {
+ /* Load the function address: "[vptrReg+vtabOffs] -> reg_intret" */
+
+ getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_TAILCALL_ADDR, vptrReg,
+ vtabOffsAfterIndirection);
+ }
+ else
+ {
+#if CPU_LOAD_STORE_ARCH
+ getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, vptrReg, vptrReg,
+ vtabOffsAfterIndirection);
+
+ getEmitter()->emitIns_Call(emitter::EC_INDIR_R, call->gtCall.gtCallMethHnd,
+ INDEBUG_LDISASM_COMMA(sigInfo) NULL, // addr
+ args, retSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
+ gcInfo.gcRegByrefSetCur, ilOffset,
+ vptrReg); // ireg
+#else
+ getEmitter()->emitIns_Call(emitter::EC_FUNC_VIRTUAL, call->gtCall.gtCallMethHnd,
+ INDEBUG_LDISASM_COMMA(sigInfo) NULL, // addr
+ args, retSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
+ gcInfo.gcRegByrefSetCur, ilOffset,
+ vptrReg, // ireg
+ REG_NA, // xreg
+ 0, // xmul
+ vtabOffsAfterIndirection); // disp
+#endif // CPU_LOAD_STORE_ARCH
+ }
+ }
+ break;
+
+ case GTF_CALL_NONVIRT:
+ {
+ //------------------------ Non-virtual/Indirect calls -------------------------
+ // Lots of cases follow
+ // - Direct P/Invoke calls
+ // - Indirect calls to P/Invoke functions via the P/Invoke stub
+ // - Direct Helper calls
+ // - Indirect Helper calls
+ // - Direct calls to known addresses
+ // - Direct calls where address is accessed by one or two indirections
+ // - Indirect calls to computed addresses
+ // - Tailcall versions of all of the above
+
+ CORINFO_METHOD_HANDLE methHnd = call->gtCall.gtCallMethHnd;
+
+ //------------------------------------------------------
+ // Non-virtual/Indirect calls: Insert a null check on the "this" pointer if needed
+ //
+ // For (final and private) functions which were called with
+ // invokevirtual, but which we call directly, we need to
+ // dereference the object pointer to make sure it's not NULL.
+ //
+
+ if (call->gtFlags & GTF_CALL_NULLCHECK)
+ {
+ /* Generate "cmp ECX, [ECX]" to trap null pointers */
+ const regNumber regThis = genGetThisArgReg(call);
+#if CPU_LOAD_STORE_ARCH
+ regNumber indReg =
+ regSet.rsGrabReg(RBM_ALLINT); // Grab an available register to use for the indirection
+ getEmitter()->emitIns_R_R_I(INS_ldr, EA_4BYTE, indReg, regThis, 0);
+ regTracker.rsTrackRegTrash(indReg);
+#else
+ getEmitter()->emitIns_AR_R(INS_cmp, EA_4BYTE, regThis, regThis, 0);
+#endif
+ }
+
+ if (call->gtFlags & GTF_CALL_UNMANAGED)
+ {
+ //------------------------------------------------------
+ // Non-virtual/Indirect calls: PInvoke calls.
+
+ noway_assert(compiler->info.compCallUnmanaged != 0);
+
+ /* args shouldn't be greater than 64K */
+
+ noway_assert((argSize & 0xffff0000) == 0);
+
+ /* Remember the varDsc for the callsite-epilog */
+
+ frameListRoot = &compiler->lvaTable[compiler->info.compLvFrameListRoot];
+
+ // exact codegen is required
+ getEmitter()->emitDisableRandomNops();
+
+ int nArgSize = 0;
+
+ regNumber indCallReg = REG_NA;
+
+ if (callType == CT_INDIRECT)
+ {
+ noway_assert(genStillAddressable(call->gtCall.gtCallAddr));
+
+ if (call->gtCall.gtCallAddr->gtFlags & GTF_REG_VAL)
+ indCallReg = call->gtCall.gtCallAddr->gtRegNum;
+
+ nArgSize = (call->gtFlags & GTF_CALL_POP_ARGS) ? 0 : (int)argSize;
+ methHnd = 0;
+ }
+ else
+ {
+ noway_assert(callType == CT_USER_FUNC);
+ }
+
+ regNumber tcbReg;
+ tcbReg = genPInvokeCallProlog(frameListRoot, nArgSize, methHnd, returnLabel);
+
+ void* addr = NULL;
+
+ if (callType == CT_INDIRECT)
+ {
+ /* Double check that the callee didn't use/trash the
+ registers holding the call target.
+ */
+ noway_assert(tcbReg != indCallReg);
+
+ if (indCallReg == REG_NA)
+ {
+ indCallReg = regSet.rsGrabReg(RBM_ALLINT); // Grab an available register to use for the CALL
+ // indirection
+
+ /* Please note that this even works with tcbReg == REG_EAX.
+ tcbReg contains an interesting value only if frameListRoot is
+ an enregistered local that stays alive across the call
+ (certainly not EAX). If frameListRoot has been moved into
+ EAX, we can trash it since it won't survive across the call
+ anyways.
+ */
+
+ inst_RV_TT(INS_mov, indCallReg, call->gtCall.gtCallAddr);
+ regTracker.rsTrackRegTrash(indCallReg);
+ }
+
+ emitCallType = emitter::EC_INDIR_R;
+ }
+ else
+ {
+ noway_assert(callType == CT_USER_FUNC);
+
+ void* pAddr;
+ addr = compiler->info.compCompHnd->getAddressOfPInvokeFixup(methHnd, (void**)&pAddr);
+ if (addr != NULL)
+ {
+#if CPU_LOAD_STORE_ARCH
+ // Load the address into a register, indirect it and call through a register
+ indCallReg = regSet.rsGrabReg(RBM_ALLINT); // Grab an available register to use for the CALL
+ // indirection
+ instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indCallReg, (ssize_t)addr);
+ getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, indCallReg, indCallReg, 0);
+ regTracker.rsTrackRegTrash(indCallReg);
+ // Now make the call "call indCallReg"
+
+ getEmitter()->emitIns_Call(emitter::EC_INDIR_R,
+ methHnd, // methHnd
+ INDEBUG_LDISASM_COMMA(sigInfo) // sigInfo
+ NULL, // addr
+ args,
+ retSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
+ gcInfo.gcRegByrefSetCur, ilOffset, indCallReg);
+
+ emitCallType = emitter::EC_INDIR_R;
+ break;
+#else
+ emitCallType = emitter::EC_FUNC_TOKEN_INDIR;
+ indCallReg = REG_NA;
+#endif
+ }
+ else
+ {
+ // Double-indirection. Load the address into a register
+ // and call indirectly through a register
+ indCallReg = regSet.rsGrabReg(RBM_ALLINT); // Grab an available register to use for the CALL
+ // indirection
+
+#if CPU_LOAD_STORE_ARCH
+ instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indCallReg, (ssize_t)pAddr);
+ getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, indCallReg, indCallReg, 0);
+ getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, indCallReg, indCallReg, 0);
+ regTracker.rsTrackRegTrash(indCallReg);
+
+ emitCallType = emitter::EC_INDIR_R;
+
+#else
+ getEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, indCallReg, (ssize_t)pAddr);
+ regTracker.rsTrackRegTrash(indCallReg);
+ emitCallType = emitter::EC_INDIR_ARD;
+
+#endif // CPU_LOAD_STORE_ARCH
+ }
+ }
+
+ getEmitter()->emitIns_Call(emitCallType, compiler->eeMarkNativeTarget(methHnd),
+ INDEBUG_LDISASM_COMMA(sigInfo) addr, args, retSize,
+ gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur,
+ ilOffset, indCallReg);
+
+ if (callType == CT_INDIRECT)
+ genDoneAddressable(call->gtCall.gtCallAddr, fptrRegs, RegSet::KEEP_REG);
+
+ getEmitter()->emitEnableRandomNops();
+
+ // Done with PInvoke calls
+ break;
+ }
+
+ if (callType == CT_INDIRECT)
+ {
+ noway_assert(genStillAddressable(call->gtCall.gtCallAddr));
+
+ if (call->gtCall.gtCallCookie)
+ {
+ //------------------------------------------------------
+ // Non-virtual indirect calls via the P/Invoke stub
+
+ GenTreePtr cookie = call->gtCall.gtCallCookie;
+ GenTreePtr target = call->gtCall.gtCallAddr;
+
+ noway_assert((call->gtFlags & GTF_CALL_POP_ARGS) == 0);
+
+ noway_assert(cookie->gtOper == GT_CNS_INT ||
+ cookie->gtOper == GT_IND && cookie->gtOp.gtOp1->gtOper == GT_CNS_INT);
+
+ noway_assert(args == argSize);
+
+#if defined(_TARGET_X86_)
+ /* load eax with the real target */
+
+ inst_RV_TT(INS_mov, REG_EAX, target);
+ regTracker.rsTrackRegTrash(REG_EAX);
+
+ if (cookie->gtOper == GT_CNS_INT)
+ inst_IV_handle(INS_push, cookie->gtIntCon.gtIconVal);
+ else
+ inst_TT(INS_push, cookie);
+
+ /* Keep track of ESP for EBP-less frames */
+ genSinglePush();
+
+ argSize += sizeof(void*);
+
+#elif defined(_TARGET_ARM_)
+
+ // Ensure that we spill these registers (if caller saved) in the prolog
+ regSet.rsSetRegsModified(RBM_PINVOKE_COOKIE_PARAM | RBM_PINVOKE_TARGET_PARAM);
+
+ // ARM: load r12 with the real target
+ // X64: load r10 with the real target
+ inst_RV_TT(INS_mov, REG_PINVOKE_TARGET_PARAM, target);
+ regTracker.rsTrackRegTrash(REG_PINVOKE_TARGET_PARAM);
+
+ // ARM: load r4 with the pinvoke VASigCookie
+ // X64: load r11 with the pinvoke VASigCookie
+ if (cookie->gtOper == GT_CNS_INT)
+ inst_RV_IV(INS_mov, REG_PINVOKE_COOKIE_PARAM, cookie->gtIntCon.gtIconVal,
+ EA_HANDLE_CNS_RELOC);
+ else
+ inst_RV_TT(INS_mov, REG_PINVOKE_COOKIE_PARAM, cookie);
+ regTracker.rsTrackRegTrash(REG_PINVOKE_COOKIE_PARAM);
+
+ noway_assert(args == argSize);
+
+ // Ensure that we don't trash any of these registers if we have to load
+ // the helper call target into a register to invoke it.
+ regMaskTP regsUsed;
+ regSet.rsLockReg(call->gtCall.gtCallRegUsedMask | RBM_PINVOKE_TARGET_PARAM |
+ RBM_PINVOKE_COOKIE_PARAM,
+ &regsUsed);
+#else
+ NYI("Non-virtual indirect calls via the P/Invoke stub");
+#endif
+
+ args = argSize;
+ noway_assert((size_t)(int)args == args);
+
+ genEmitHelperCall(CORINFO_HELP_PINVOKE_CALLI, (int)args, retSize);
+
+#if defined(_TARGET_ARM_)
+ regSet.rsUnlockReg(call->gtCall.gtCallRegUsedMask | RBM_PINVOKE_TARGET_PARAM |
+ RBM_PINVOKE_COOKIE_PARAM,
+ regsUsed);
+#endif
+
+#ifdef _TARGET_ARM_
+ // genEmitHelperCall doesn't record all registers a helper call would trash.
+ regTracker.rsTrackRegTrash(REG_PINVOKE_COOKIE_PARAM);
+#endif
+ }
+ else
+ {
+ //------------------------------------------------------
+ // Non-virtual indirect calls
+
+ if (fTailCall)
+ {
+ inst_RV_TT(INS_mov, REG_TAILCALL_ADDR, call->gtCall.gtCallAddr);
+ regTracker.rsTrackRegTrash(REG_TAILCALL_ADDR);
+ }
+ else
+ instEmit_indCall(call, args, retSize);
+ }
+
+ genDoneAddressable(call->gtCall.gtCallAddr, fptrRegs, RegSet::KEEP_REG);
+
+ // Done with indirect calls
+ break;
+ }
+
+ //------------------------------------------------------
+ // Non-virtual direct/indirect calls: Work out if the address of the
+ // call is known at JIT time (if not it is either an indirect call
+ // or the address must be accessed via an single/double indirection)
+
+ noway_assert(callType == CT_USER_FUNC || callType == CT_HELPER);
+
+ void* addr;
+ InfoAccessType accessType;
+
+ helperNum = compiler->eeGetHelperNum(methHnd);
+
+ if (callType == CT_HELPER)
+ {
+ noway_assert(helperNum != CORINFO_HELP_UNDEF);
+
+ void* pAddr;
+ addr = compiler->compGetHelperFtn(helperNum, (void**)&pAddr);
+
+ accessType = IAT_VALUE;
+
+ if (!addr)
+ {
+ accessType = IAT_PVALUE;
+ addr = pAddr;
+ }
+ }
+ else
+ {
+ noway_assert(helperNum == CORINFO_HELP_UNDEF);
+
+ CORINFO_ACCESS_FLAGS aflags = CORINFO_ACCESS_ANY;
+
+ if (call->gtCall.gtCallMoreFlags & GTF_CALL_M_NONVIRT_SAME_THIS)
+ aflags = (CORINFO_ACCESS_FLAGS)(aflags | CORINFO_ACCESS_THIS);
+
+ if ((call->gtFlags & GTF_CALL_NULLCHECK) == 0)
+ aflags = (CORINFO_ACCESS_FLAGS)(aflags | CORINFO_ACCESS_NONNULL);
+
+ CORINFO_CONST_LOOKUP addrInfo;
+ compiler->info.compCompHnd->getFunctionEntryPoint(methHnd, &addrInfo, aflags);
+
+ accessType = addrInfo.accessType;
+ addr = addrInfo.addr;
+ }
+
+ if (fTailCall)
+ {
+ noway_assert(callType == CT_USER_FUNC);
+
+ switch (accessType)
+ {
+ case IAT_VALUE:
+ //------------------------------------------------------
+ // Non-virtual direct calls to known addressess
+ //
+ instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, REG_TAILCALL_ADDR, (ssize_t)addr);
+ break;
+
+ case IAT_PVALUE:
+ //------------------------------------------------------
+ // Non-virtual direct calls to addresses accessed by
+ // a single indirection.
+ //
+ // For tailcalls we place the target address in REG_TAILCALL_ADDR
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if CPU_LOAD_STORE_ARCH
+ {
+ regNumber indReg = REG_TAILCALL_ADDR;
+ instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indReg, (ssize_t)addr);
+ getEmitter()->emitIns_R_R_I(INS_ldr, EA_4BYTE, indReg, indReg, 0);
+ regTracker.rsTrackRegTrash(indReg);
+ }
+#else
+ getEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, REG_TAILCALL_ADDR, (ssize_t)addr);
+ regTracker.rsTrackRegTrash(REG_TAILCALL_ADDR);
+#endif
+ break;
+
+ case IAT_PPVALUE:
+ //------------------------------------------------------
+ // Non-virtual direct calls to addresses accessed by
+ // a double indirection.
+ //
+ // For tailcalls we place the target address in REG_TAILCALL_ADDR
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if CPU_LOAD_STORE_ARCH
+ {
+ regNumber indReg = REG_TAILCALL_ADDR;
+ instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indReg, (ssize_t)addr);
+ getEmitter()->emitIns_R_R_I(INS_ldr, EA_4BYTE, indReg, indReg, 0);
+ getEmitter()->emitIns_R_R_I(INS_ldr, EA_4BYTE, indReg, indReg, 0);
+ regTracker.rsTrackRegTrash(indReg);
+ }
+#else
+ getEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, REG_TAILCALL_ADDR, (ssize_t)addr);
+ getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_TAILCALL_ADDR,
+ REG_TAILCALL_ADDR, 0);
+ regTracker.rsTrackRegTrash(REG_TAILCALL_ADDR);
+#endif
+ break;
+
+ default:
+ noway_assert(!"Bad accessType");
+ break;
+ }
+ }
+ else
+ {
+ switch (accessType)
+ {
+ regNumber indCallReg;
+
+ case IAT_VALUE:
+ //------------------------------------------------------
+ // Non-virtual direct calls to known addressess
+ //
+ // The vast majority of calls end up here.... Wouldn't
+ // it be nice if they all did!
+ CLANG_FORMAT_COMMENT_ANCHOR;
+#ifdef _TARGET_ARM_
+ if (!arm_Valid_Imm_For_BL((ssize_t)addr))
+ {
+ // Load the address into a register and call through a register
+ indCallReg = regSet.rsGrabReg(RBM_ALLINT); // Grab an available register to use for the
+ // CALL indirection
+ instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indCallReg, (ssize_t)addr);
+
+ getEmitter()->emitIns_Call(emitter::EC_INDIR_R, methHnd,
+ INDEBUG_LDISASM_COMMA(sigInfo) NULL, // addr
+ args, retSize, gcInfo.gcVarPtrSetCur,
+ gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur, ilOffset,
+ indCallReg, // ireg
+ REG_NA, 0, 0, // xreg, xmul, disp
+ false, // isJump
+ emitter::emitNoGChelper(helperNum));
+ }
+ else
+#endif
+ {
+ getEmitter()->emitIns_Call(emitter::EC_FUNC_TOKEN, methHnd,
+ INDEBUG_LDISASM_COMMA(sigInfo) addr, args, retSize,
+ gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
+ gcInfo.gcRegByrefSetCur, ilOffset, REG_NA, REG_NA, 0,
+ 0, /* ireg, xreg, xmul, disp */
+ false, /* isJump */
+ emitter::emitNoGChelper(helperNum));
+ }
+ break;
+
+ case IAT_PVALUE:
+ //------------------------------------------------------
+ // Non-virtual direct calls to addresses accessed by
+ // a single indirection.
+ //
+
+ // Load the address into a register, load indirect and call through a register
+ CLANG_FORMAT_COMMENT_ANCHOR;
+#if CPU_LOAD_STORE_ARCH
+ indCallReg = regSet.rsGrabReg(RBM_ALLINT); // Grab an available register to use for the CALL
+ // indirection
+
+ instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indCallReg, (ssize_t)addr);
+ getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, indCallReg, indCallReg, 0);
+ regTracker.rsTrackRegTrash(indCallReg);
+
+ emitCallType = emitter::EC_INDIR_R;
+ addr = NULL;
+
+#else
+ emitCallType = emitter::EC_FUNC_TOKEN_INDIR;
+ indCallReg = REG_NA;
+
+#endif // CPU_LOAD_STORE_ARCH
+
+ getEmitter()->emitIns_Call(emitCallType, methHnd, INDEBUG_LDISASM_COMMA(sigInfo) addr, args,
+ retSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
+ gcInfo.gcRegByrefSetCur, ilOffset,
+ indCallReg, // ireg
+ REG_NA, 0, 0, // xreg, xmul, disp
+ false, /* isJump */
+ emitter::emitNoGChelper(helperNum));
+ break;
+
+ case IAT_PPVALUE:
+ {
+ //------------------------------------------------------
+ // Non-virtual direct calls to addresses accessed by
+ // a double indirection.
+ //
+ // Double-indirection. Load the address into a register
+ // and call indirectly through the register
+
+ noway_assert(helperNum == CORINFO_HELP_UNDEF);
+
+ // Grab an available register to use for the CALL indirection
+ indCallReg = regSet.rsGrabReg(RBM_ALLINT);
+
+#if CPU_LOAD_STORE_ARCH
+ instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indCallReg, (ssize_t)addr);
+ getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, indCallReg, indCallReg, 0);
+ getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, indCallReg, indCallReg, 0);
+ regTracker.rsTrackRegTrash(indCallReg);
+
+ emitCallType = emitter::EC_INDIR_R;
+
+#else
+
+ getEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, indCallReg, (ssize_t)addr);
+ regTracker.rsTrackRegTrash(indCallReg);
+
+ emitCallType = emitter::EC_INDIR_ARD;
+
+#endif // CPU_LOAD_STORE_ARCH
+
+ getEmitter()->emitIns_Call(emitCallType, methHnd,
+ INDEBUG_LDISASM_COMMA(sigInfo) NULL, // addr
+ args, retSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
+ gcInfo.gcRegByrefSetCur, ilOffset,
+ indCallReg, // ireg
+ REG_NA, 0, 0, // xreg, xmul, disp
+ false, // isJump
+ emitter::emitNoGChelper(helperNum));
+ }
+ break;
+
+ default:
+ noway_assert(!"Bad accessType");
+ break;
+ }
+
+ // tracking of region protected by the monitor in synchronized methods
+ if ((helperNum != CORINFO_HELP_UNDEF) && (compiler->info.compFlags & CORINFO_FLG_SYNCH))
+ {
+ fPossibleSyncHelperCall = true;
+ }
+ }
+ }
+ break;
+
+ default:
+ noway_assert(!"strange call type");
+ break;
+ }
+
+ /*-------------------------------------------------------------------------
+ * For tailcalls, REG_INTRET contains the address of the target function,
+ * enregistered args are in the correct registers, and the stack arguments
+ * have been pushed on the stack. Now call the stub-sliding helper
+ */
+
+ if (fTailCall)
+ {
+
+ if (compiler->info.compCallUnmanaged)
+ genPInvokeMethodEpilog();
+
+#ifdef _TARGET_X86_
+ noway_assert(0 <= (ssize_t)args); // caller-pop args not supported for tailcall
+
+ // Push the count of the incoming stack arguments
+
+ unsigned nOldStkArgs =
+ (unsigned)((compiler->compArgSize - (intRegState.rsCalleeRegArgCount * sizeof(void*))) / sizeof(void*));
+ getEmitter()->emitIns_I(INS_push, EA_4BYTE, nOldStkArgs);
+ genSinglePush(); // Keep track of ESP for EBP-less frames
+ args += sizeof(void*);
+
+ // Push the count of the outgoing stack arguments
+
+ getEmitter()->emitIns_I(INS_push, EA_4BYTE, argSize / sizeof(void*));
+ genSinglePush(); // Keep track of ESP for EBP-less frames
+ args += sizeof(void*);
+
+ // Push info about the callee-saved registers to be restored
+ // For now, we always spill all registers if compiler->compTailCallUsed
+
+ DWORD calleeSavedRegInfo = 1 | // always restore EDI,ESI,EBX
+ (fTailCallTargetIsVSD ? 0x2 : 0x0); // Stub dispatch flag
+ getEmitter()->emitIns_I(INS_push, EA_4BYTE, calleeSavedRegInfo);
+ genSinglePush(); // Keep track of ESP for EBP-less frames
+ args += sizeof(void*);
+
+ // Push the address of the target function
+
+ getEmitter()->emitIns_R(INS_push, EA_4BYTE, REG_TAILCALL_ADDR);
+ genSinglePush(); // Keep track of ESP for EBP-less frames
+ args += sizeof(void*);
+
+#else // _TARGET_X86_
+
+ args = 0;
+ retSize = EA_UNKNOWN;
+
+#endif // _TARGET_X86_
+
+ if (compiler->getNeedsGSSecurityCookie())
+ {
+ genEmitGSCookieCheck(true);
+ }
+
+ // TailCall helper does not poll for GC. An explicit GC poll
+ // Should have been placed in when we morphed this into a tail call.
+ noway_assert(compiler->compCurBB->bbFlags & BBF_GC_SAFE_POINT);
+
+ // Now call the helper
+
+ genEmitHelperCall(CORINFO_HELP_TAILCALL, (int)args, retSize);
+ }
+
+ /*-------------------------------------------------------------------------
+ * Done with call.
+ * Trash registers, pop arguments if needed, etc
+ */
+
+ /* Mark the argument registers as free */
+
+ noway_assert(intRegState.rsCurRegArgNum <= MAX_REG_ARG);
+
+ for (areg = 0; areg < MAX_REG_ARG; areg++)
+ {
+ regMaskTP curArgMask = genMapArgNumToRegMask(areg, TYP_INT);
+
+ // Is this one of the used argument registers?
+ if ((curArgMask & call->gtCall.gtCallRegUsedMask) == 0)
+ continue;
+
+#ifdef _TARGET_ARM_
+ if (regSet.rsUsedTree[areg] == NULL)
+ {
+ noway_assert(areg % 2 == 1 &&
+ (((areg + 1) >= MAX_REG_ARG) || (regSet.rsUsedTree[areg + 1]->TypeGet() == TYP_STRUCT) ||
+ (genTypeStSz(regSet.rsUsedTree[areg + 1]->TypeGet()) == 2)));
+ continue;
+ }
+#endif
+
+ regSet.rsMarkRegFree(curArgMask);
+
+ // We keep regSet.rsMaskVars current during codegen, so we have to remove any
+ // that have been copied into arg regs.
+
+ regSet.RemoveMaskVars(curArgMask);
+ gcInfo.gcRegGCrefSetCur &= ~(curArgMask);
+ gcInfo.gcRegByrefSetCur &= ~(curArgMask);
+ }
+
+#if !FEATURE_STACK_FP_X87
+ //-------------------------------------------------------------------------
+ // free up the FP args
+
+ for (areg = 0; areg < MAX_FLOAT_REG_ARG; areg++)
+ {
+ regNumber argRegNum = genMapRegArgNumToRegNum(areg, TYP_FLOAT);
+ regMaskTP curArgMask = genMapArgNumToRegMask(areg, TYP_FLOAT);
+
+ // Is this one of the used argument registers?
+ if ((curArgMask & call->gtCall.gtCallRegUsedMask) == 0)
+ continue;
+
+ regSet.rsMaskUsed &= ~curArgMask;
+ regSet.rsUsedTree[argRegNum] = NULL;
+ }
+#endif // !FEATURE_STACK_FP_X87
+
+ /* restore the old argument register status */
+
+ intRegState.rsCurRegArgNum = savCurIntArgReg;
+ floatRegState.rsCurRegArgNum = savCurFloatArgReg;
+
+ noway_assert(intRegState.rsCurRegArgNum <= MAX_REG_ARG);
+
+ /* Mark all trashed registers as such */
+
+ if (calleeTrashedRegs)
+ regTracker.rsTrashRegSet(calleeTrashedRegs);
+
+ regTracker.rsTrashRegsForGCInterruptability();
+
+#ifdef DEBUG
+
+ if (!(call->gtFlags & GTF_CALL_POP_ARGS))
+ {
+ if (compiler->verbose)
+ {
+ printf("\t\t\t\t\t\t\tEnd call ");
+ Compiler::printTreeID(call);
+ printf(" stack %02u [E=%02u] argSize=%u\n", saveStackLvl, getEmitter()->emitCurStackLvl, argSize);
+ }
+ noway_assert(stackLvl == getEmitter()->emitCurStackLvl);
+ }
+
+#endif
+
+#if FEATURE_STACK_FP_X87
+ /* All float temps must be spilled around function calls */
+ if (call->gtType == TYP_FLOAT || call->gtType == TYP_DOUBLE)
+ {
+ noway_assert(compCurFPState.m_uStackSize == 1);
+ }
+ else
+ {
+ noway_assert(compCurFPState.m_uStackSize == 0);
+ }
+#else
+ if (call->gtType == TYP_FLOAT || call->gtType == TYP_DOUBLE)
+ {
+#ifdef _TARGET_ARM_
+ if (call->gtCall.IsVarargs() || compiler->opts.compUseSoftFP)
+ {
+ // Result return for vararg methods is in r0, r1, but our callers would
+ // expect the return in s0, s1 because of floating type. Do the move now.
+ if (call->gtType == TYP_FLOAT)
+ {
+ inst_RV_RV(INS_vmov_i2f, REG_FLOATRET, REG_INTRET, TYP_FLOAT, EA_4BYTE);
+ }
+ else
+ {
+ inst_RV_RV_RV(INS_vmov_i2d, REG_FLOATRET, REG_INTRET, REG_NEXT(REG_INTRET), EA_8BYTE);
+ }
+ }
+#endif
+ genMarkTreeInReg(call, REG_FLOATRET);
+ }
+#endif
+
+ /* The function will pop all arguments before returning */
+
+ genStackLevel = saveStackLvl;
+
+ /* No trashed registers may possibly hold a pointer at this point */
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef DEBUG
+
+ regMaskTP ptrRegs = (gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur) & (calleeTrashedRegs & RBM_ALLINT) &
+ ~regSet.rsMaskVars & ~vptrMask;
+ if (ptrRegs)
+ {
+ // A reg may be dead already. The assertion is too strong.
+ LclVarDsc* varDsc;
+ unsigned varNum;
+
+ // use compiler->compCurLife
+ for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount && ptrRegs != 0; varNum++, varDsc++)
+ {
+ /* Ignore the variable if it's not tracked, not in a register, or a floating-point type */
+
+ if (!varDsc->lvTracked)
+ continue;
+ if (!varDsc->lvRegister)
+ continue;
+ if (varDsc->IsFloatRegType())
+ continue;
+
+ /* Get hold of the index and the bitmask for the variable */
+
+ unsigned varIndex = varDsc->lvVarIndex;
+
+ /* Is this variable live currently? */
+
+ if (!VarSetOps::IsMember(compiler, compiler->compCurLife, varIndex))
+ {
+ regNumber regNum = varDsc->lvRegNum;
+ regMaskTP regMask = genRegMask(regNum);
+
+ if (varDsc->lvType == TYP_REF || varDsc->lvType == TYP_BYREF)
+ ptrRegs &= ~regMask;
+ }
+ }
+ if (ptrRegs)
+ {
+ printf("Bad call handling for ");
+ Compiler::printTreeID(call);
+ printf("\n");
+ noway_assert(!"A callee trashed reg is holding a GC pointer");
+ }
+ }
+#endif
+
+#if defined(_TARGET_X86_)
+ //-------------------------------------------------------------------------
+ // Create a label for tracking of region protected by the monitor in synchronized methods.
+ // This needs to be here, rather than above where fPossibleSyncHelperCall is set,
+ // so the GC state vars have been updated before creating the label.
+
+ if (fPossibleSyncHelperCall)
+ {
+ switch (helperNum)
+ {
+ case CORINFO_HELP_MON_ENTER:
+ case CORINFO_HELP_MON_ENTER_STATIC:
+ noway_assert(compiler->syncStartEmitCookie == NULL);
+ compiler->syncStartEmitCookie =
+ getEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur);
+ noway_assert(compiler->syncStartEmitCookie != NULL);
+ break;
+ case CORINFO_HELP_MON_EXIT:
+ case CORINFO_HELP_MON_EXIT_STATIC:
+ noway_assert(compiler->syncEndEmitCookie == NULL);
+ compiler->syncEndEmitCookie =
+ getEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur);
+ noway_assert(compiler->syncEndEmitCookie != NULL);
+ break;
+ default:
+ break;
+ }
+ }
+#endif // _TARGET_X86_
+
+ if (call->gtFlags & GTF_CALL_UNMANAGED)
+ {
+ genDefineTempLabel(returnLabel);
+
+#ifdef _TARGET_X86_
+ if (getInlinePInvokeCheckEnabled())
+ {
+ noway_assert(compiler->lvaInlinedPInvokeFrameVar != BAD_VAR_NUM);
+ BasicBlock* esp_check;
+
+ CORINFO_EE_INFO* pInfo = compiler->eeGetEEInfo();
+ /* mov ecx, dword ptr [frame.callSiteTracker] */
+
+ getEmitter()->emitIns_R_S(INS_mov, EA_4BYTE, REG_ARG_0, compiler->lvaInlinedPInvokeFrameVar,
+ pInfo->inlinedCallFrameInfo.offsetOfCallSiteSP);
+ regTracker.rsTrackRegTrash(REG_ARG_0);
+
+ /* Generate the conditional jump */
+
+ if (!(call->gtFlags & GTF_CALL_POP_ARGS))
+ {
+ if (argSize)
+ {
+ getEmitter()->emitIns_R_I(INS_add, EA_PTRSIZE, REG_ARG_0, argSize);
+ }
+ }
+ /* cmp ecx, esp */
+
+ getEmitter()->emitIns_R_R(INS_cmp, EA_PTRSIZE, REG_ARG_0, REG_SPBASE);
+
+ esp_check = genCreateTempLabel();
+
+ emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
+ inst_JMP(jmpEqual, esp_check);
+
+ getEmitter()->emitIns(INS_BREAKPOINT);
+
+ /* genCondJump() closes the current emitter block */
+
+ genDefineTempLabel(esp_check);
+ }
+#endif
+ }
+
+ /* Are we supposed to pop the arguments? */
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if defined(_TARGET_X86_)
+ if (call->gtFlags & GTF_CALL_UNMANAGED)
+ {
+ if ((compiler->opts.eeFlags & CORJIT_FLG_PINVOKE_RESTORE_ESP) ||
+ compiler->compStressCompile(Compiler::STRESS_PINVOKE_RESTORE_ESP, 50))
+ {
+ // P/Invoke signature mismatch resilience - restore ESP to pre-call value. We would ideally
+ // take care of the cdecl argument popping here as well but the stack depth tracking logic
+ // makes this very hard, i.e. it needs to "see" the actual pop.
+
+ CORINFO_EE_INFO* pInfo = compiler->eeGetEEInfo();
+
+ if (argSize == 0 || (call->gtFlags & GTF_CALL_POP_ARGS))
+ {
+ /* mov esp, dword ptr [frame.callSiteTracker] */
+ getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE,
+ compiler->lvaInlinedPInvokeFrameVar,
+ pInfo->inlinedCallFrameInfo.offsetOfCallSiteSP);
+ }
+ else
+ {
+ /* mov ecx, dword ptr [frame.callSiteTracker] */
+ getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_ARG_0,
+ compiler->lvaInlinedPInvokeFrameVar,
+ pInfo->inlinedCallFrameInfo.offsetOfCallSiteSP);
+ regTracker.rsTrackRegTrash(REG_ARG_0);
+
+ /* lea esp, [ecx + argSize] */
+ getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_SPBASE, REG_ARG_0, (int)argSize);
+ }
+ }
+ }
+#endif // _TARGET_X86_
+
+ if (call->gtFlags & GTF_CALL_POP_ARGS)
+ {
+ noway_assert(args == (size_t) - (int)argSize);
+
+ if (argSize)
+ {
+ genAdjustSP(argSize);
+ }
+ }
+
+ if (pseudoStackLvl)
+ {
+ noway_assert(call->gtType == TYP_VOID);
+
+ /* Generate NOP */
+
+ instGen(INS_nop);
+ }
+
+ /* What does the function return? */
+
+ retVal = RBM_NONE;
+
+ switch (call->gtType)
+ {
+ case TYP_REF:
+ case TYP_ARRAY:
+ case TYP_BYREF:
+ gcInfo.gcMarkRegPtrVal(REG_INTRET, call->TypeGet());
+
+ __fallthrough;
+
+ case TYP_INT:
+#if !CPU_HAS_FP_SUPPORT
+ case TYP_FLOAT:
+#endif
+ retVal = RBM_INTRET;
+ break;
+
+#ifdef _TARGET_ARM_
+ case TYP_STRUCT:
+ {
+ assert(call->gtCall.gtRetClsHnd != NULL);
+ assert(compiler->IsHfa(call->gtCall.gtRetClsHnd));
+ int retSlots = compiler->GetHfaCount(call->gtCall.gtRetClsHnd);
+ assert(retSlots > 0 && retSlots <= MAX_HFA_RET_SLOTS);
+ assert(MAX_HFA_RET_SLOTS < sizeof(int) * 8);
+ retVal = ((1 << retSlots) - 1) << REG_FLOATRET;
+ }
+ break;
+#endif
+
+ case TYP_LONG:
+#if !CPU_HAS_FP_SUPPORT
+ case TYP_DOUBLE:
+#endif
+ retVal = RBM_LNGRET;
+ break;
+
+#if CPU_HAS_FP_SUPPORT
+ case TYP_FLOAT:
+ case TYP_DOUBLE:
+
+ break;
+#endif
+
+ case TYP_VOID:
+ break;
+
+ default:
+ noway_assert(!"unexpected/unhandled fn return type");
+ }
+
+ // We now have to generate the "call epilog" (if it was a call to unmanaged code).
+ /* if it is a call to unmanaged code, frameListRoot must be set */
+
+ noway_assert((call->gtFlags & GTF_CALL_UNMANAGED) == 0 || frameListRoot);
+
+ if (frameListRoot)
+ genPInvokeCallEpilog(frameListRoot, retVal);
+
+ if (frameListRoot && (call->gtCall.gtCallMoreFlags & GTF_CALL_M_FRAME_VAR_DEATH))
+ {
+ if (frameListRoot->lvRegister)
+ {
+ bool isBorn = false;
+ bool isDying = true;
+ genUpdateRegLife(frameListRoot, isBorn, isDying DEBUGARG(call));
+ }
+ }
+
+#ifdef DEBUG
+ if (compiler->opts.compStackCheckOnCall
+#if defined(USE_TRANSITION_THUNKS) || defined(USE_DYNAMIC_STACK_ALIGN)
+ // check the stack as frequently as possible
+ && !call->IsHelperCall()
+#else
+ && call->gtCall.gtCallType == CT_USER_FUNC
+#endif
+ )
+ {
+ noway_assert(compiler->lvaCallEspCheck != 0xCCCCCCCC &&
+ compiler->lvaTable[compiler->lvaCallEspCheck].lvDoNotEnregister &&
+ compiler->lvaTable[compiler->lvaCallEspCheck].lvOnFrame);
+ if (argSize > 0)
+ {
+ getEmitter()->emitIns_R_R(INS_mov, EA_4BYTE, REG_ARG_0, REG_SPBASE);
+ getEmitter()->emitIns_R_I(INS_sub, EA_4BYTE, REG_ARG_0, argSize);
+ getEmitter()->emitIns_S_R(INS_cmp, EA_4BYTE, REG_ARG_0, compiler->lvaCallEspCheck, 0);
+ regTracker.rsTrackRegTrash(REG_ARG_0);
+ }
+ else
+ getEmitter()->emitIns_S_R(INS_cmp, EA_4BYTE, REG_SPBASE, compiler->lvaCallEspCheck, 0);
+
+ BasicBlock* esp_check = genCreateTempLabel();
+ emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
+ inst_JMP(jmpEqual, esp_check);
+ getEmitter()->emitIns(INS_BREAKPOINT);
+ genDefineTempLabel(esp_check);
+ }
+#endif // DEBUG
+
+#if FEATURE_STACK_FP_X87
+ UnspillRegVarsStackFp();
+#endif // FEATURE_STACK_FP_X87
+
+ if (call->gtType == TYP_FLOAT || call->gtType == TYP_DOUBLE)
+ {
+ // Restore return node if necessary
+ if (call->gtFlags & GTF_SPILLED)
+ {
+ UnspillFloat(call);
+ }
+
+#if FEATURE_STACK_FP_X87
+ // Mark as free
+ regSet.SetUsedRegFloat(call, false);
+#endif
+ }
+
+#if FEATURE_STACK_FP_X87
+#ifdef DEBUG
+ if (compiler->verbose)
+ {
+ JitDumpFPState();
+ }
+#endif
+#endif
+
+ return retVal;
+}
+#ifdef _PREFAST_
+#pragma warning(pop)
+#endif
+
+/*****************************************************************************
+ *
+ * Create and record GC Info for the function.
+ */
+#ifdef JIT32_GCENCODER
+void*
+#else
+void
+#endif
+CodeGen::genCreateAndStoreGCInfo(unsigned codeSize, unsigned prologSize, unsigned epilogSize DEBUGARG(void* codePtr))
+{
+#ifdef JIT32_GCENCODER
+ return genCreateAndStoreGCInfoJIT32(codeSize, prologSize, epilogSize DEBUGARG(codePtr));
+#else
+ genCreateAndStoreGCInfoX64(codeSize, prologSize DEBUGARG(codePtr));
+#endif
+}
+
+#ifdef JIT32_GCENCODER
+void* CodeGen::genCreateAndStoreGCInfoJIT32(unsigned codeSize,
+ unsigned prologSize,
+ unsigned epilogSize DEBUGARG(void* codePtr))
+{
+ BYTE headerBuf[64];
+ InfoHdr header;
+
+ int s_cached;
+#ifdef DEBUG
+ size_t headerSize =
+#endif
+ compiler->compInfoBlkSize =
+ gcInfo.gcInfoBlockHdrSave(headerBuf, 0, codeSize, prologSize, epilogSize, &header, &s_cached);
+
+ size_t argTabOffset = 0;
+ size_t ptrMapSize = gcInfo.gcPtrTableSize(header, codeSize, &argTabOffset);
+
+#if DISPLAY_SIZES
+
+ if (genInterruptible)
+ {
+ gcHeaderISize += compiler->compInfoBlkSize;
+ gcPtrMapISize += ptrMapSize;
+ }
+ else
+ {
+ gcHeaderNSize += compiler->compInfoBlkSize;
+ gcPtrMapNSize += ptrMapSize;
+ }
+
+#endif // DISPLAY_SIZES
+
+ compiler->compInfoBlkSize += ptrMapSize;
+
+ /* Allocate the info block for the method */
+
+ compiler->compInfoBlkAddr = (BYTE*)compiler->info.compCompHnd->allocGCInfo(compiler->compInfoBlkSize);
+
+#if 0 // VERBOSE_SIZES
+ // TODO-Review: 'dataSize', below, is not defined
+
+// if (compiler->compInfoBlkSize > codeSize && compiler->compInfoBlkSize > 100)
+ {
+ printf("[%7u VM, %7u+%7u/%7u x86 %03u/%03u%%] %s.%s\n",
+ compiler->info.compILCodeSize,
+ compiler->compInfoBlkSize,
+ codeSize + dataSize,
+ codeSize + dataSize - prologSize - epilogSize,
+ 100 * (codeSize + dataSize) / compiler->info.compILCodeSize,
+ 100 * (codeSize + dataSize + compiler->compInfoBlkSize) / compiler->info.compILCodeSize,
+ compiler->info.compClassName,
+ compiler->info.compMethodName);
+ }
+
+#endif
+
+ /* Fill in the info block and return it to the caller */
+
+ void* infoPtr = compiler->compInfoBlkAddr;
+
+ /* Create the method info block: header followed by GC tracking tables */
+
+ compiler->compInfoBlkAddr +=
+ gcInfo.gcInfoBlockHdrSave(compiler->compInfoBlkAddr, -1, codeSize, prologSize, epilogSize, &header, &s_cached);
+
+ assert(compiler->compInfoBlkAddr == (BYTE*)infoPtr + headerSize);
+ compiler->compInfoBlkAddr = gcInfo.gcPtrTableSave(compiler->compInfoBlkAddr, header, codeSize, &argTabOffset);
+ assert(compiler->compInfoBlkAddr == (BYTE*)infoPtr + headerSize + ptrMapSize);
+
+#ifdef DEBUG
+
+ if (0)
+ {
+ BYTE* temp = (BYTE*)infoPtr;
+ unsigned size = compiler->compInfoBlkAddr - temp;
+ BYTE* ptab = temp + headerSize;
+
+ noway_assert(size == headerSize + ptrMapSize);
+
+ printf("Method info block - header [%u bytes]:", headerSize);
+
+ for (unsigned i = 0; i < size; i++)
+ {
+ if (temp == ptab)
+ {
+ printf("\nMethod info block - ptrtab [%u bytes]:", ptrMapSize);
+ printf("\n %04X: %*c", i & ~0xF, 3 * (i & 0xF), ' ');
+ }
+ else
+ {
+ if (!(i % 16))
+ printf("\n %04X: ", i);
+ }
+
+ printf("%02X ", *temp++);
+ }
+
+ printf("\n");
+ }
+
+#endif // DEBUG
+
+#if DUMP_GC_TABLES
+
+ if (compiler->opts.dspGCtbls)
+ {
+ const BYTE* base = (BYTE*)infoPtr;
+ unsigned size;
+ unsigned methodSize;
+ InfoHdr dumpHeader;
+
+ printf("GC Info for method %s\n", compiler->info.compFullName);
+ printf("GC info size = %3u\n", compiler->compInfoBlkSize);
+
+ size = gcInfo.gcInfoBlockHdrDump(base, &dumpHeader, &methodSize);
+ // printf("size of header encoding is %3u\n", size);
+ printf("\n");
+
+ if (compiler->opts.dspGCtbls)
+ {
+ base += size;
+ size = gcInfo.gcDumpPtrTable(base, dumpHeader, methodSize);
+ // printf("size of pointer table is %3u\n", size);
+ printf("\n");
+ noway_assert(compiler->compInfoBlkAddr == (base + size));
+ }
+ }
+
+#ifdef DEBUG
+ if (jitOpts.testMask & 128)
+ {
+ for (unsigned offs = 0; offs < codeSize; offs++)
+ {
+ gcInfo.gcFindPtrsInFrame(infoPtr, codePtr, offs);
+ }
+ }
+#endif // DEBUG
+#endif // DUMP_GC_TABLES
+
+ /* Make sure we ended up generating the expected number of bytes */
+
+ noway_assert(compiler->compInfoBlkAddr == (BYTE*)infoPtr + compiler->compInfoBlkSize);
+
+ return infoPtr;
+}
+
+#else // JIT32_GCENCODER
+
+void CodeGen::genCreateAndStoreGCInfoX64(unsigned codeSize, unsigned prologSize DEBUGARG(void* codePtr))
+{
+ IAllocator* allowZeroAlloc = new (compiler, CMK_GC) AllowZeroAllocator(compiler->getAllocatorGC());
+ GcInfoEncoder* gcInfoEncoder = new (compiler, CMK_GC)
+ GcInfoEncoder(compiler->info.compCompHnd, compiler->info.compMethodInfo, allowZeroAlloc, NOMEM);
+ assert(gcInfoEncoder);
+
+ // Follow the code pattern of the x86 gc info encoder (genCreateAndStoreGCInfoJIT32).
+ gcInfo.gcInfoBlockHdrSave(gcInfoEncoder, codeSize, prologSize);
+
+ // First we figure out the encoder ID's for the stack slots and registers.
+ gcInfo.gcMakeRegPtrTable(gcInfoEncoder, codeSize, prologSize, GCInfo::MAKE_REG_PTR_MODE_ASSIGN_SLOTS);
+ // Now we've requested all the slots we'll need; "finalize" these (make more compact data structures for them).
+ gcInfoEncoder->FinalizeSlotIds();
+ // Now we can actually use those slot ID's to declare live ranges.
+ gcInfo.gcMakeRegPtrTable(gcInfoEncoder, codeSize, prologSize, GCInfo::MAKE_REG_PTR_MODE_DO_WORK);
+
+ gcInfoEncoder->Build();
+
+ // GC Encoder automatically puts the GC info in the right spot using ICorJitInfo::allocGCInfo(size_t)
+ // let's save the values anyway for debugging purposes
+ compiler->compInfoBlkAddr = gcInfoEncoder->Emit();
+ compiler->compInfoBlkSize = 0; // not exposed by the GCEncoder interface
+}
+#endif
+
+/*****************************************************************************
+ * For CEE_LOCALLOC
+ */
+
+regNumber CodeGen::genLclHeap(GenTreePtr size)
+{
+ noway_assert((genActualType(size->gtType) == TYP_INT) || (genActualType(size->gtType) == TYP_I_IMPL));
+
+ // regCnt is a register used to hold both
+ // the amount to stack alloc (either in bytes or pointer sized words)
+ // and the final stack alloc address to return as the result
+ //
+ regNumber regCnt = DUMMY_INIT(REG_CORRUPT);
+ var_types type = genActualType(size->gtType);
+ emitAttr easz = emitTypeSize(type);
+
+#ifdef DEBUG
+ // Verify ESP
+ if (compiler->opts.compStackCheckOnRet)
+ {
+ noway_assert(compiler->lvaReturnEspCheck != 0xCCCCCCCC &&
+ compiler->lvaTable[compiler->lvaReturnEspCheck].lvDoNotEnregister &&
+ compiler->lvaTable[compiler->lvaReturnEspCheck].lvOnFrame);
+ getEmitter()->emitIns_S_R(INS_cmp, EA_PTRSIZE, REG_SPBASE, compiler->lvaReturnEspCheck, 0);
+
+ BasicBlock* esp_check = genCreateTempLabel();
+ emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
+ inst_JMP(jmpEqual, esp_check);
+ getEmitter()->emitIns(INS_BREAKPOINT);
+ genDefineTempLabel(esp_check);
+ }
+#endif
+
+ noway_assert(isFramePointerUsed());
+ noway_assert(genStackLevel == 0); // Can't have anything on the stack
+
+ BasicBlock* endLabel = NULL;
+#if FEATURE_FIXED_OUT_ARGS
+ bool stackAdjusted = false;
+#endif
+
+ if (size->IsCnsIntOrI())
+ {
+#if FEATURE_FIXED_OUT_ARGS
+ // If we have an outgoing arg area then we must adjust the SP
+ // essentially popping off the outgoing arg area,
+ // We will restore it right before we return from this method
+ //
+ if (compiler->lvaOutgoingArgSpaceSize > 0)
+ {
+ assert((compiler->lvaOutgoingArgSpaceSize % STACK_ALIGN) ==
+ 0); // This must be true for the stack to remain aligned
+ inst_RV_IV(INS_add, REG_SPBASE, compiler->lvaOutgoingArgSpaceSize, EA_PTRSIZE);
+ stackAdjusted = true;
+ }
+#endif
+ size_t amount = size->gtIntCon.gtIconVal;
+
+ // Convert amount to be properly STACK_ALIGN and count of DWORD_PTRs
+ amount += (STACK_ALIGN - 1);
+ amount &= ~(STACK_ALIGN - 1);
+ amount >>= STACK_ALIGN_SHIFT; // amount is number of pointer-sized words to locAlloc
+ size->gtIntCon.gtIconVal = amount; // update the GT_CNS value in the node
+
+ /* If amount is zero then return null in RegCnt */
+ if (amount == 0)
+ {
+ regCnt = regSet.rsGrabReg(RBM_ALLINT);
+ instGen_Set_Reg_To_Zero(EA_PTRSIZE, regCnt);
+ goto DONE;
+ }
+
+ /* For small allocations we will generate up to six push 0 inline */
+ if (amount <= 6)
+ {
+ regCnt = regSet.rsGrabReg(RBM_ALLINT);
+#if CPU_LOAD_STORE_ARCH
+ regNumber regZero = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(regCnt));
+ // Set 'regZero' to zero
+ instGen_Set_Reg_To_Zero(EA_PTRSIZE, regZero);
+#endif
+
+ while (amount != 0)
+ {
+#if CPU_LOAD_STORE_ARCH
+ inst_IV(INS_push, (unsigned)genRegMask(regZero));
+#else
+ inst_IV(INS_push_hide, 0); // push_hide means don't track the stack
+#endif
+ amount--;
+ }
+
+ regTracker.rsTrackRegTrash(regCnt);
+ // --- move regCnt, ESP
+ inst_RV_RV(INS_mov, regCnt, REG_SPBASE, TYP_I_IMPL);
+ goto DONE;
+ }
+ else
+ {
+ if (!compiler->info.compInitMem)
+ {
+ // Re-bias amount to be number of bytes to adjust the SP
+ amount <<= STACK_ALIGN_SHIFT;
+ size->gtIntCon.gtIconVal = amount; // update the GT_CNS value in the node
+ if (amount < compiler->eeGetPageSize()) // must be < not <=
+ {
+ // Since the size is a page or less, simply adjust ESP
+
+ // ESP might already be in the guard page, must touch it BEFORE
+ // the alloc, not after.
+ regCnt = regSet.rsGrabReg(RBM_ALLINT);
+ inst_RV_RV(INS_mov, regCnt, REG_SPBASE, TYP_I_IMPL);
+#if CPU_LOAD_STORE_ARCH
+ regNumber regTmp = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(regCnt));
+ getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, regTmp, REG_SPBASE, 0);
+ regTracker.rsTrackRegTrash(regTmp);
+#else
+ getEmitter()->emitIns_AR_R(INS_TEST, EA_4BYTE, REG_SPBASE, REG_SPBASE, 0);
+#endif
+ inst_RV_IV(INS_sub, regCnt, amount, EA_PTRSIZE);
+ inst_RV_RV(INS_mov, REG_SPBASE, regCnt, TYP_I_IMPL);
+ regTracker.rsTrackRegTrash(regCnt);
+ goto DONE;
+ }
+ }
+ }
+ }
+
+ // Compute the size of the block to allocate
+ genCompIntoFreeReg(size, 0, RegSet::KEEP_REG);
+ noway_assert(size->gtFlags & GTF_REG_VAL);
+ regCnt = size->gtRegNum;
+
+#if FEATURE_FIXED_OUT_ARGS
+ // If we have an outgoing arg area then we must adjust the SP
+ // essentially popping off the outgoing arg area,
+ // We will restore it right before we return from this method
+ //
+ if ((compiler->lvaOutgoingArgSpaceSize > 0) && !stackAdjusted)
+ {
+ assert((compiler->lvaOutgoingArgSpaceSize % STACK_ALIGN) ==
+ 0); // This must be true for the stack to remain aligned
+ inst_RV_IV(INS_add, REG_SPBASE, compiler->lvaOutgoingArgSpaceSize, EA_PTRSIZE);
+ stackAdjusted = true;
+ }
+#endif
+
+ // Perform alignment if we don't have a GT_CNS size
+ //
+ if (!size->IsCnsIntOrI())
+ {
+ endLabel = genCreateTempLabel();
+
+ // If 0 we bail out
+ instGen_Compare_Reg_To_Zero(easz, regCnt); // set flags
+ emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
+ inst_JMP(jmpEqual, endLabel);
+
+ // Align to STACK_ALIGN
+ inst_RV_IV(INS_add, regCnt, (STACK_ALIGN - 1), emitActualTypeSize(type));
+
+ if (compiler->info.compInitMem)
+ {
+#if ((STACK_ALIGN >> STACK_ALIGN_SHIFT) > 1)
+ // regCnt will be the number of pointer-sized words to locAlloc
+ // If the shift right won't do the 'and' do it here
+ inst_RV_IV(INS_AND, regCnt, ~(STACK_ALIGN - 1), emitActualTypeSize(type));
+#endif
+ // --- shr regCnt, 2 ---
+ inst_RV_SH(INS_SHIFT_RIGHT_LOGICAL, EA_PTRSIZE, regCnt, STACK_ALIGN_SHIFT);
+ }
+ else
+ {
+ // regCnt will be the total number of bytes to locAlloc
+
+ inst_RV_IV(INS_AND, regCnt, ~(STACK_ALIGN - 1), emitActualTypeSize(type));
+ }
+ }
+
+ BasicBlock* loop;
+ loop = genCreateTempLabel();
+
+ if (compiler->info.compInitMem)
+ {
+ // At this point 'regCnt' is set to the number of pointer-sized words to locAlloc
+
+ /* Since we have to zero out the allocated memory AND ensure that
+ ESP is always valid by tickling the pages, we will just push 0's
+ on the stack */
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if defined(_TARGET_ARM_)
+ regNumber regZero1 = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(regCnt));
+ regNumber regZero2 = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(regCnt) & ~genRegMask(regZero1));
+ // Set 'regZero1' and 'regZero2' to zero
+ instGen_Set_Reg_To_Zero(EA_PTRSIZE, regZero1);
+ instGen_Set_Reg_To_Zero(EA_PTRSIZE, regZero2);
+#endif
+
+ // Loop:
+ genDefineTempLabel(loop);
+
+#if defined(_TARGET_X86_)
+
+ inst_IV(INS_push_hide, 0); // --- push 0
+ // Are we done?
+ inst_RV(INS_dec, regCnt, type);
+
+#elif defined(_TARGET_ARM_)
+
+ inst_IV(INS_push, (unsigned)(genRegMask(regZero1) | genRegMask(regZero2)));
+ // Are we done?
+ inst_RV_IV(INS_sub, regCnt, 2, emitActualTypeSize(type), INS_FLAGS_SET);
+
+#else
+ assert(!"Codegen missing");
+#endif // TARGETS
+
+ emitJumpKind jmpNotEqual = genJumpKindForOper(GT_NE, CK_SIGNED);
+ inst_JMP(jmpNotEqual, loop);
+
+ // Move the final value of ESP into regCnt
+ inst_RV_RV(INS_mov, regCnt, REG_SPBASE);
+ regTracker.rsTrackRegTrash(regCnt);
+ }
+ else
+ {
+ // At this point 'regCnt' is set to the total number of bytes to locAlloc
+
+ /* We don't need to zero out the allocated memory. However, we do have
+ to tickle the pages to ensure that ESP is always valid and is
+ in sync with the "stack guard page". Note that in the worst
+ case ESP is on the last byte of the guard page. Thus you must
+ touch ESP+0 first not ESP+x01000.
+
+ Another subtlety is that you don't want ESP to be exactly on the
+ boundary of the guard page because PUSH is predecrement, thus
+ call setup would not touch the guard page but just beyond it */
+
+ /* Note that we go through a few hoops so that ESP never points to
+ illegal pages at any time during the ticking process
+
+ neg REG
+ add REG, ESP // reg now holds ultimate ESP
+ jb loop // result is smaller than orignial ESP (no wrap around)
+ xor REG, REG, // Overflow, pick lowest possible number
+ loop:
+ test ESP, [ESP+0] // X86 - tickle the page
+ ldr REGH,[ESP+0] // ARM - tickle the page
+ mov REGH, ESP
+ sub REGH, PAGE_SIZE
+ mov ESP, REGH
+ cmp ESP, REG
+ jae loop
+
+ mov ESP, REG
+ end:
+ */
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef _TARGET_ARM_
+
+ inst_RV_RV_RV(INS_sub, regCnt, REG_SPBASE, regCnt, EA_4BYTE, INS_FLAGS_SET);
+ inst_JMP(EJ_hs, loop);
+#else
+ inst_RV(INS_NEG, regCnt, TYP_I_IMPL);
+ inst_RV_RV(INS_add, regCnt, REG_SPBASE, TYP_I_IMPL);
+ inst_JMP(EJ_jb, loop);
+#endif
+ regTracker.rsTrackRegTrash(regCnt);
+
+ instGen_Set_Reg_To_Zero(EA_PTRSIZE, regCnt);
+
+ genDefineTempLabel(loop);
+
+ // This is a workaround to avoid the emitter trying to track the
+ // decrement of the ESP - we do the subtraction in another reg
+ // instead of adjusting ESP directly.
+
+ regNumber regTemp = regSet.rsPickReg();
+
+ // Tickle the decremented value, and move back to ESP,
+ // note that it has to be done BEFORE the update of ESP since
+ // ESP might already be on the guard page. It is OK to leave
+ // the final value of ESP on the guard page
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if CPU_LOAD_STORE_ARCH
+ getEmitter()->emitIns_R_R_I(INS_ldr, EA_4BYTE, regTemp, REG_SPBASE, 0);
+#else
+ getEmitter()->emitIns_AR_R(INS_TEST, EA_4BYTE, REG_SPBASE, REG_SPBASE, 0);
+#endif
+
+ inst_RV_RV(INS_mov, regTemp, REG_SPBASE, TYP_I_IMPL);
+ regTracker.rsTrackRegTrash(regTemp);
+
+ inst_RV_IV(INS_sub, regTemp, compiler->eeGetPageSize(), EA_PTRSIZE);
+ inst_RV_RV(INS_mov, REG_SPBASE, regTemp, TYP_I_IMPL);
+
+ genRecoverReg(size, RBM_ALLINT,
+ RegSet::KEEP_REG); // not purely the 'size' tree anymore; though it is derived from 'size'
+ noway_assert(size->gtFlags & GTF_REG_VAL);
+ regCnt = size->gtRegNum;
+ inst_RV_RV(INS_cmp, REG_SPBASE, regCnt, TYP_I_IMPL);
+ emitJumpKind jmpGEU = genJumpKindForOper(GT_GE, CK_UNSIGNED);
+ inst_JMP(jmpGEU, loop);
+
+ // Move the final value to ESP
+ inst_RV_RV(INS_mov, REG_SPBASE, regCnt);
+ }
+ regSet.rsMarkRegFree(genRegMask(regCnt));
+
+DONE:
+
+ noway_assert(regCnt != DUMMY_INIT(REG_CORRUPT));
+
+ if (endLabel != NULL)
+ genDefineTempLabel(endLabel);
+
+#if FEATURE_FIXED_OUT_ARGS
+ // If we have an outgoing arg area then we must readjust the SP
+ //
+ if (stackAdjusted)
+ {
+ assert(compiler->lvaOutgoingArgSpaceSize > 0);
+ assert((compiler->lvaOutgoingArgSpaceSize % STACK_ALIGN) ==
+ 0); // This must be true for the stack to remain aligned
+ inst_RV_IV(INS_sub, REG_SPBASE, compiler->lvaOutgoingArgSpaceSize, EA_PTRSIZE);
+ }
+#endif
+
+ /* Write the lvaShadowSPfirst stack frame slot */
+ noway_assert(compiler->lvaLocAllocSPvar != BAD_VAR_NUM);
+ getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaLocAllocSPvar, 0);
+
+#if STACK_PROBES
+ // Don't think it is worth it the codegen complexity to embed this
+ // when it's possible in each of the customized allocas.
+ if (compiler->opts.compNeedStackProbes)
+ {
+ genGenerateStackProbe();
+ }
+#endif
+
+#ifdef DEBUG
+ // Update new ESP
+ if (compiler->opts.compStackCheckOnRet)
+ {
+ noway_assert(compiler->lvaReturnEspCheck != 0xCCCCCCCC &&
+ compiler->lvaTable[compiler->lvaReturnEspCheck].lvDoNotEnregister &&
+ compiler->lvaTable[compiler->lvaReturnEspCheck].lvOnFrame);
+ getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaReturnEspCheck, 0);
+ }
+#endif
+
+ return regCnt;
+}
+
+/*****************************************************************************/
+#ifdef DEBUGGING_SUPPORT
+/*****************************************************************************
+ * genSetScopeInfo
+ *
+ * Called for every scope info piece to record by the main genSetScopeInfo()
+ */
+
+void CodeGen::genSetScopeInfo(unsigned which,
+ UNATIVE_OFFSET startOffs,
+ UNATIVE_OFFSET length,
+ unsigned varNum,
+ unsigned LVnum,
+ bool avail,
+ Compiler::siVarLoc& varLoc)
+{
+ /* We need to do some mapping while reporting back these variables */
+
+ unsigned ilVarNum = compiler->compMap2ILvarNum(varNum);
+ noway_assert((int)ilVarNum != ICorDebugInfo::UNKNOWN_ILNUM);
+
+#ifdef _TARGET_X86_
+ // Non-x86 platforms are allowed to access all arguments directly
+ // so we don't need this code.
+
+ // Is this a varargs function?
+
+ if (compiler->info.compIsVarArgs && varNum != compiler->lvaVarargsHandleArg &&
+ varNum < compiler->info.compArgsCount && !compiler->lvaTable[varNum].lvIsRegArg)
+ {
+ noway_assert(varLoc.vlType == Compiler::VLT_STK || varLoc.vlType == Compiler::VLT_STK2);
+
+ // All stack arguments (except the varargs handle) have to be
+ // accessed via the varargs cookie. Discard generated info,
+ // and just find its position relative to the varargs handle
+
+ PREFIX_ASSUME(compiler->lvaVarargsHandleArg < compiler->info.compArgsCount);
+ if (!compiler->lvaTable[compiler->lvaVarargsHandleArg].lvOnFrame)
+ {
+ noway_assert(!compiler->opts.compDbgCode);
+ return;
+ }
+
+ // Can't check compiler->lvaTable[varNum].lvOnFrame as we don't set it for
+ // arguments of vararg functions to avoid reporting them to GC.
+ noway_assert(!compiler->lvaTable[varNum].lvRegister);
+ unsigned cookieOffset = compiler->lvaTable[compiler->lvaVarargsHandleArg].lvStkOffs;
+ unsigned varOffset = compiler->lvaTable[varNum].lvStkOffs;
+
+ noway_assert(cookieOffset < varOffset);
+ unsigned offset = varOffset - cookieOffset;
+ unsigned stkArgSize = compiler->compArgSize - intRegState.rsCalleeRegArgCount * sizeof(void*);
+ noway_assert(offset < stkArgSize);
+ offset = stkArgSize - offset;
+
+ varLoc.vlType = Compiler::VLT_FIXED_VA;
+ varLoc.vlFixedVarArg.vlfvOffset = offset;
+ }
+
+#endif // _TARGET_X86_
+
+ VarName name = NULL;
+
+#ifdef DEBUG
+
+ for (unsigned scopeNum = 0; scopeNum < compiler->info.compVarScopesCount; scopeNum++)
+ {
+ if (LVnum == compiler->info.compVarScopes[scopeNum].vsdLVnum)
+ {
+ name = compiler->info.compVarScopes[scopeNum].vsdName;
+ }
+ }
+
+ // Hang on to this compiler->info.
+
+ TrnslLocalVarInfo& tlvi = genTrnslLocalVarInfo[which];
+
+ tlvi.tlviVarNum = ilVarNum;
+ tlvi.tlviLVnum = LVnum;
+ tlvi.tlviName = name;
+ tlvi.tlviStartPC = startOffs;
+ tlvi.tlviLength = length;
+ tlvi.tlviAvailable = avail;
+ tlvi.tlviVarLoc = varLoc;
+
+#endif // DEBUG
+
+ compiler->eeSetLVinfo(which, startOffs, length, ilVarNum, LVnum, name, avail, varLoc);
+}
+
+#endif // DEBUGGING_SUPPORT
+
+/*****************************************************************************
+ *
+ * Return non-zero if the given register is free after the given tree is
+ * evaluated (i.e. the register is either not used at all, or it holds a
+ * register variable which is not live after the given node).
+ * This is only called by genCreateAddrMode, when tree is a GT_ADD, with one
+ * constant operand, and one that's in a register. Thus, the only thing we
+ * need to determine is whether the register holding op1 is dead.
+ */
+bool CodeGen::genRegTrashable(regNumber reg, GenTreePtr tree)
+{
+ regMaskTP vars;
+ regMaskTP mask = genRegMask(reg);
+
+ if (regSet.rsMaskUsed & mask)
+ return false;
+
+ assert(tree->gtOper == GT_ADD);
+ GenTreePtr regValTree = tree->gtOp.gtOp1;
+ if (!tree->gtOp.gtOp2->IsCnsIntOrI())
+ {
+ regValTree = tree->gtOp.gtOp2;
+ assert(tree->gtOp.gtOp1->IsCnsIntOrI());
+ }
+ assert(regValTree->gtFlags & GTF_REG_VAL);
+
+ /* At this point, the only way that the register will remain live
+ * is if it is itself a register variable that isn't dying.
+ */
+ assert(regValTree->gtRegNum == reg);
+ if (regValTree->IsRegVar() && !regValTree->IsRegVarDeath())
+ return false;
+ else
+ return true;
+}
+
+/*****************************************************************************/
+//
+// This method calculates the USE and DEF values for a statement.
+// It also calls fgSetRngChkTarget for the statement.
+//
+// We refactor out this code from fgPerBlockLocalVarLiveness
+// and add QMARK logics to it.
+//
+// NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE
+//
+// The usage of this method is very limited.
+// We should only call it for the first node in the statement or
+// for the node after the GTF_RELOP_QMARK node.
+//
+// NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE
+
+/*
+ Since a GT_QMARK tree can take two paths (i.e. the thenTree Path or the elseTree path),
+ when we calculate its fgCurDefSet and fgCurUseSet, we need to combine the results
+ from both trees.
+
+ Note that the GT_QMARK trees are threaded as shown below with nodes 1 to 11
+ linked by gtNext.
+
+ The algorithm we use is:
+ (1) We walk these nodes according the the evaluation order (i.e. from node 1 to node 11).
+ (2) When we see the GTF_RELOP_QMARK node, we know we are about to split the path.
+ We cache copies of current fgCurDefSet and fgCurUseSet.
+ (The fact that it is recursively calling itself is for nested QMARK case,
+ where we need to remember multiple copies of fgCurDefSet and fgCurUseSet.)
+ (3) We walk the thenTree.
+ (4) When we see GT_COLON node, we know that we just finished the thenTree.
+ We then make a copy of the current fgCurDefSet and fgCurUseSet,
+ restore them to the ones before the thenTree, and then continue walking
+ the elseTree.
+ (5) When we see the GT_QMARK node, we know we just finished the elseTree.
+ So we combine the results from the thenTree and elseTree and then return.
+
+
+ +--------------------+
+ | GT_QMARK 11|
+ +----------+---------+
+ |
+ *
+ / \
+ / \
+ / \
+ +---------------------+ +--------------------+
+ | GT_<cond> 3 | | GT_COLON 7 |
+ | w/ GTF_RELOP_QMARK | | w/ GTF_COLON_COND |
+ +----------+----------+ +---------+----------+
+ | |
+ * *
+ / \ / \
+ / \ / \
+ / \ / \
+ 2 1 thenTree 6 elseTree 10
+ x | |
+ / * *
+ +----------------+ / / \ / \
+ |prevExpr->gtNext+------/ / \ / \
+ +----------------+ / \ / \
+ 5 4 9 8
+
+
+*/
+
+GenTreePtr Compiler::fgLegacyPerStatementLocalVarLiveness(GenTreePtr startNode, // The node to start walking with.
+ GenTreePtr relopNode, // The node before the startNode.
+ // (It should either be NULL or
+ // a GTF_RELOP_QMARK node.)
+ GenTreePtr asgdLclVar)
+{
+ GenTreePtr tree;
+
+ VARSET_TP VARSET_INIT(this, defSet_BeforeSplit, fgCurDefSet); // Store the current fgCurDefSet and fgCurUseSet so
+ VARSET_TP VARSET_INIT(this, useSet_BeforeSplit, fgCurUseSet); // we can restore then before entering the elseTree.
+
+ bool heapUse_BeforeSplit = fgCurHeapUse;
+ bool heapDef_BeforeSplit = fgCurHeapDef;
+ bool heapHavoc_BeforeSplit = fgCurHeapHavoc;
+
+ VARSET_TP VARSET_INIT_NOCOPY(defSet_AfterThenTree, VarSetOps::MakeEmpty(this)); // These two variables will store
+ // the USE and DEF sets after
+ VARSET_TP VARSET_INIT_NOCOPY(useSet_AfterThenTree, VarSetOps::MakeEmpty(this)); // evaluating the thenTree.
+
+ bool heapUse_AfterThenTree = fgCurHeapUse;
+ bool heapDef_AfterThenTree = fgCurHeapDef;
+ bool heapHavoc_AfterThenTree = fgCurHeapHavoc;
+
+ // relopNode is either NULL or a GTF_RELOP_QMARK node.
+ assert(!relopNode || (relopNode->OperKind() & GTK_RELOP) && (relopNode->gtFlags & GTF_RELOP_QMARK));
+
+ // If relopNode is NULL, then the startNode must be the 1st node of the statement.
+ // If relopNode is non-NULL, then the startNode must be the node right after the GTF_RELOP_QMARK node.
+ assert((!relopNode && startNode == compCurStmt->gtStmt.gtStmtList) ||
+ (relopNode && startNode == relopNode->gtNext));
+
+ for (tree = startNode; tree; tree = tree->gtNext)
+ {
+ switch (tree->gtOper)
+ {
+
+ case GT_QMARK:
+
+ // This must be a GT_QMARK node whose GTF_RELOP_QMARK node is recursively calling us.
+ noway_assert(relopNode && tree->gtOp.gtOp1 == relopNode);
+
+ // By the time we see a GT_QMARK, we must have finished processing the elseTree.
+ // So it's the time to combine the results
+ // from the the thenTree and the elseTree, and then return.
+
+ VarSetOps::IntersectionD(this, fgCurDefSet, defSet_AfterThenTree);
+ VarSetOps::UnionD(this, fgCurUseSet, useSet_AfterThenTree);
+
+ fgCurHeapDef = fgCurHeapDef && heapDef_AfterThenTree;
+ fgCurHeapHavoc = fgCurHeapHavoc && heapHavoc_AfterThenTree;
+ fgCurHeapUse = fgCurHeapUse || heapUse_AfterThenTree;
+
+ // Return the GT_QMARK node itself so the caller can continue from there.
+ // NOTE: the caller will get to the next node by doing the "tree = tree->gtNext"
+ // in the "for" statement.
+ goto _return;
+
+ case GT_COLON:
+ // By the time we see GT_COLON, we must have just walked the thenTree.
+ // So we need to do two things here.
+ // (1) Save the current fgCurDefSet and fgCurUseSet so that later we can combine them
+ // with the result from the elseTree.
+ // (2) Restore fgCurDefSet and fgCurUseSet to the points before the thenTree is walked.
+ // and then continue walking the elseTree.
+ VarSetOps::Assign(this, defSet_AfterThenTree, fgCurDefSet);
+ VarSetOps::Assign(this, useSet_AfterThenTree, fgCurUseSet);
+
+ heapDef_AfterThenTree = fgCurHeapDef;
+ heapHavoc_AfterThenTree = fgCurHeapHavoc;
+ heapUse_AfterThenTree = fgCurHeapUse;
+
+ VarSetOps::Assign(this, fgCurDefSet, defSet_BeforeSplit);
+ VarSetOps::Assign(this, fgCurUseSet, useSet_BeforeSplit);
+
+ fgCurHeapDef = heapDef_BeforeSplit;
+ fgCurHeapHavoc = heapHavoc_BeforeSplit;
+ fgCurHeapUse = heapUse_BeforeSplit;
+
+ break;
+
+ case GT_LCL_VAR:
+ case GT_LCL_FLD:
+ case GT_LCL_VAR_ADDR:
+ case GT_LCL_FLD_ADDR:
+ case GT_STORE_LCL_VAR:
+ case GT_STORE_LCL_FLD:
+ fgMarkUseDef(tree->AsLclVarCommon(), asgdLclVar);
+ break;
+
+ case GT_CLS_VAR:
+ // For Volatile indirection, first mutate the global heap
+ // see comments in ValueNum.cpp (under case GT_CLS_VAR)
+ // This models Volatile reads as def-then-use of the heap.
+ // and allows for a CSE of a subsequent non-volatile read
+ if ((tree->gtFlags & GTF_FLD_VOLATILE) != 0)
+ {
+ // For any Volatile indirection, we must handle it as a
+ // definition of the global heap
+ fgCurHeapDef = true;
+ }
+ // If the GT_CLS_VAR is the lhs of an assignment, we'll handle it as a heap def, when we get to
+ // assignment.
+ // Otherwise, we treat it as a use here.
+ if (!fgCurHeapDef && (tree->gtFlags & GTF_CLS_VAR_ASG_LHS) == 0)
+ {
+ fgCurHeapUse = true;
+ }
+ break;
+
+ case GT_IND:
+ // For Volatile indirection, first mutate the global heap
+ // see comments in ValueNum.cpp (under case GT_CLS_VAR)
+ // This models Volatile reads as def-then-use of the heap.
+ // and allows for a CSE of a subsequent non-volatile read
+ if ((tree->gtFlags & GTF_IND_VOLATILE) != 0)
+ {
+ // For any Volatile indirection, we must handle it as a
+ // definition of the global heap
+ fgCurHeapDef = true;
+ }
+
+ // If the GT_IND is the lhs of an assignment, we'll handle it
+ // as a heap def, when we get to assignment.
+ // Otherwise, we treat it as a use here.
+ if ((tree->gtFlags & GTF_IND_ASG_LHS) == 0)
+ {
+ GenTreeLclVarCommon* dummyLclVarTree = NULL;
+ bool dummyIsEntire = false;
+ GenTreePtr addrArg = tree->gtOp.gtOp1->gtEffectiveVal(/*commaOnly*/ true);
+ if (!addrArg->DefinesLocalAddr(this, /*width doesn't matter*/ 0, &dummyLclVarTree, &dummyIsEntire))
+ {
+ if (!fgCurHeapDef)
+ {
+ fgCurHeapUse = true;
+ }
+ }
+ else
+ {
+ // Defines a local addr
+ assert(dummyLclVarTree != nullptr);
+ fgMarkUseDef(dummyLclVarTree->AsLclVarCommon(), asgdLclVar);
+ }
+ }
+ break;
+
+ // These should have been morphed away to become GT_INDs:
+ case GT_FIELD:
+ case GT_INDEX:
+ unreached();
+ break;
+
+ // We'll assume these are use-then-defs of the heap.
+ case GT_LOCKADD:
+ case GT_XADD:
+ case GT_XCHG:
+ case GT_CMPXCHG:
+ if (!fgCurHeapDef)
+ {
+ fgCurHeapUse = true;
+ }
+ fgCurHeapDef = true;
+ fgCurHeapHavoc = true;
+ break;
+
+ case GT_MEMORYBARRIER:
+ // Simliar to any Volatile indirection, we must handle this as a definition of the global heap
+ fgCurHeapDef = true;
+ break;
+
+ // For now, all calls read/write the heap, the latter in its entirety. Might tighten this case later.
+ case GT_CALL:
+ {
+ GenTreeCall* call = tree->AsCall();
+ bool modHeap = true;
+ if (call->gtCallType == CT_HELPER)
+ {
+ CorInfoHelpFunc helpFunc = eeGetHelperNum(call->gtCallMethHnd);
+
+ if (!s_helperCallProperties.MutatesHeap(helpFunc) && !s_helperCallProperties.MayRunCctor(helpFunc))
+ {
+ modHeap = false;
+ }
+ }
+ if (modHeap)
+ {
+ if (!fgCurHeapDef)
+ {
+ fgCurHeapUse = true;
+ }
+ fgCurHeapDef = true;
+ fgCurHeapHavoc = true;
+ }
+ }
+
+ // If this is a p/invoke unmanaged call or if this is a tail-call
+ // and we have an unmanaged p/invoke call in the method,
+ // then we're going to run the p/invoke epilog.
+ // So we mark the FrameRoot as used by this instruction.
+ // This ensures that the block->bbVarUse will contain
+ // the FrameRoot local var if is it a tracked variable.
+
+ if (tree->gtCall.IsUnmanaged() || (tree->gtCall.IsTailCall() && info.compCallUnmanaged))
+ {
+ /* Get the TCB local and mark it as used */
+
+ noway_assert(info.compLvFrameListRoot < lvaCount);
+
+ LclVarDsc* varDsc = &lvaTable[info.compLvFrameListRoot];
+
+ if (varDsc->lvTracked)
+ {
+ if (!VarSetOps::IsMember(this, fgCurDefSet, varDsc->lvVarIndex))
+ {
+ VarSetOps::AddElemD(this, fgCurUseSet, varDsc->lvVarIndex);
+ }
+ }
+ }
+
+ break;
+
+ default:
+
+ // Determine whether it defines a heap location.
+ if (tree->OperIsAssignment() || tree->OperIsBlkOp())
+ {
+ GenTreeLclVarCommon* dummyLclVarTree = NULL;
+ if (!tree->DefinesLocal(this, &dummyLclVarTree))
+ {
+ // If it doesn't define a local, then it might update the heap.
+ fgCurHeapDef = true;
+ }
+ }
+
+ // Are we seeing a GT_<cond> for a GT_QMARK node?
+ if ((tree->OperKind() & GTK_RELOP) && (tree->gtFlags & GTF_RELOP_QMARK))
+ {
+ // We are about to enter the parallel paths (i.e. the thenTree and the elseTree).
+ // Recursively call fgLegacyPerStatementLocalVarLiveness.
+ // At the very beginning of fgLegacyPerStatementLocalVarLiveness, we will cache the values of the
+ // current
+ // fgCurDefSet and fgCurUseSet into local variables defSet_BeforeSplit and useSet_BeforeSplit.
+ // The cached values will be used to restore fgCurDefSet and fgCurUseSet once we see the GT_COLON
+ // node.
+ tree = fgLegacyPerStatementLocalVarLiveness(tree->gtNext, tree, asgdLclVar);
+
+ // We must have been returned here after seeing a GT_QMARK node.
+ noway_assert(tree->gtOper == GT_QMARK);
+ }
+
+ break;
+ }
+ }
+
+_return:
+ return tree;
+}
+
+/*****************************************************************************/
+
+/*****************************************************************************
+ * Initialize the TCB local and the NDirect stub, afterwards "push"
+ * the hoisted NDirect stub.
+ *
+ * 'initRegs' is the set of registers which will be zeroed out by the prolog
+ * typically initRegs is zero
+ *
+ * The layout of the NDirect Inlined Call Frame is as follows:
+ * (see VM/frames.h and VM/JITInterface.cpp for more information)
+ *
+ * offset field name when set
+ * --------------------------------------------------------------
+ * +00h vptr for class InlinedCallFrame method prolog
+ * +04h m_Next method prolog
+ * +08h m_Datum call site
+ * +0ch m_pCallSiteTracker (callsite ESP) call site and zeroed in method prolog
+ * +10h m_pCallerReturnAddress call site
+ * +14h m_pCalleeSavedRegisters not set by JIT
+ * +18h JIT retval spill area (int) before call_gc
+ * +1ch JIT retval spill area (long) before call_gc
+ * +20h Saved value of EBP method prolog
+ */
+
+regMaskTP CodeGen::genPInvokeMethodProlog(regMaskTP initRegs)
+{
+ assert(compiler->compGeneratingProlog);
+ noway_assert(!compiler->opts.ShouldUsePInvokeHelpers());
+ noway_assert(compiler->info.compCallUnmanaged);
+
+ CORINFO_EE_INFO* pInfo = compiler->eeGetEEInfo();
+ noway_assert(compiler->lvaInlinedPInvokeFrameVar != BAD_VAR_NUM);
+
+ /* let's find out if compLvFrameListRoot is enregistered */
+
+ LclVarDsc* varDsc = &compiler->lvaTable[compiler->info.compLvFrameListRoot];
+
+ noway_assert(!varDsc->lvIsParam);
+ noway_assert(varDsc->lvType == TYP_I_IMPL);
+
+ DWORD threadTlsIndex, *pThreadTlsIndex;
+
+ threadTlsIndex = compiler->info.compCompHnd->getThreadTLSIndex((void**)&pThreadTlsIndex);
+#if defined(_TARGET_X86_)
+ if (threadTlsIndex == (DWORD)-1 || pInfo->osType != CORINFO_WINNT)
+#else
+ if (true)
+#endif
+ {
+ // Instead of calling GetThread(), and getting GS cookie and
+ // InlinedCallFrame vptr through indirections, we'll call only one helper.
+ // The helper takes frame address in REG_PINVOKE_FRAME, returns TCB in REG_PINVOKE_TCB
+ // and uses REG_PINVOKE_SCRATCH as scratch register.
+ getEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, REG_PINVOKE_FRAME, compiler->lvaInlinedPInvokeFrameVar,
+ pInfo->inlinedCallFrameInfo.offsetOfFrameVptr);
+ regTracker.rsTrackRegTrash(REG_PINVOKE_FRAME);
+
+ // We're about to trask REG_PINVOKE_TCB, it better not be in use!
+ assert((regSet.rsMaskUsed & RBM_PINVOKE_TCB) == 0);
+
+ // Don't use the argument registers (including the special argument in
+ // REG_PINVOKE_FRAME) for computing the target address.
+ regSet.rsLockReg(RBM_ARG_REGS | RBM_PINVOKE_FRAME);
+
+ genEmitHelperCall(CORINFO_HELP_INIT_PINVOKE_FRAME, 0, EA_UNKNOWN);
+
+ regSet.rsUnlockReg(RBM_ARG_REGS | RBM_PINVOKE_FRAME);
+
+ if (varDsc->lvRegister)
+ {
+ regNumber regTgt = varDsc->lvRegNum;
+
+ // we are about to initialize it. So turn the bit off in initRegs to prevent
+ // the prolog reinitializing it.
+ initRegs &= ~genRegMask(regTgt);
+
+ if (regTgt != REG_PINVOKE_TCB)
+ {
+ // move TCB to the its register if necessary
+ getEmitter()->emitIns_R_R(INS_mov, EA_PTRSIZE, regTgt, REG_PINVOKE_TCB);
+ regTracker.rsTrackRegTrash(regTgt);
+ }
+ }
+ else
+ {
+ // move TCB to its stack location
+ getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_PINVOKE_TCB,
+ compiler->info.compLvFrameListRoot, 0);
+ }
+
+ // We are done, the rest of this function deals with the inlined case.
+ return initRegs;
+ }
+
+ regNumber regTCB;
+
+ if (varDsc->lvRegister)
+ {
+ regTCB = varDsc->lvRegNum;
+
+ // we are about to initialize it. So turn the bit off in initRegs to prevent
+ // the prolog reinitializing it.
+ initRegs &= ~genRegMask(regTCB);
+ }
+ else // varDsc is allocated on the Stack
+ {
+ regTCB = REG_PINVOKE_TCB;
+ }
+
+#if !defined(_TARGET_ARM_)
+#define WIN_NT_TLS_OFFSET (0xE10)
+#define WIN_NT5_TLS_HIGHOFFSET (0xf94)
+
+ /* get TCB, mov reg, FS:[compiler->info.compEEInfo.threadTlsIndex] */
+
+ // TODO-ARM-CQ: should we inline TlsGetValue here?
+
+ if (threadTlsIndex < 64)
+ {
+ // mov reg, FS:[0xE10+threadTlsIndex*4]
+ getEmitter()->emitIns_R_C(ins_Load(TYP_I_IMPL), EA_PTRSIZE, regTCB, FLD_GLOBAL_FS,
+ WIN_NT_TLS_OFFSET + threadTlsIndex * sizeof(int));
+ regTracker.rsTrackRegTrash(regTCB);
+ }
+ else
+ {
+ noway_assert(pInfo->osMajor >= 5);
+
+ DWORD basePtr = WIN_NT5_TLS_HIGHOFFSET;
+ threadTlsIndex -= 64;
+
+ // mov reg, FS:[0x2c] or mov reg, fs:[0xf94]
+ // mov reg, [reg+threadTlsIndex*4]
+
+ getEmitter()->emitIns_R_C(ins_Load(TYP_I_IMPL), EA_PTRSIZE, regTCB, FLD_GLOBAL_FS, basePtr);
+ getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, regTCB, regTCB, threadTlsIndex * sizeof(int));
+ regTracker.rsTrackRegTrash(regTCB);
+ }
+#endif
+
+ /* save TCB in local var if not enregistered */
+
+ if (!varDsc->lvRegister)
+ {
+ getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, regTCB, compiler->info.compLvFrameListRoot, 0);
+ }
+
+ /* set frame's vptr */
+
+ const void *inlinedCallFrameVptr, **pInlinedCallFrameVptr;
+ inlinedCallFrameVptr = compiler->info.compCompHnd->getInlinedCallFrameVptr((void**)&pInlinedCallFrameVptr);
+ noway_assert(inlinedCallFrameVptr != NULL); // if we have the TLS index, vptr must also be known
+
+ instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_HANDLE_CNS_RELOC, (ssize_t)inlinedCallFrameVptr,
+ compiler->lvaInlinedPInvokeFrameVar, pInfo->inlinedCallFrameInfo.offsetOfFrameVptr,
+ REG_PINVOKE_SCRATCH);
+
+ // Set the GSCookie
+ GSCookie gsCookie, *pGSCookie;
+ compiler->info.compCompHnd->getGSCookie(&gsCookie, &pGSCookie);
+ noway_assert(gsCookie != 0); // if we have the TLS index, GS cookie must also be known
+
+ instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_PTRSIZE, (ssize_t)gsCookie, compiler->lvaInlinedPInvokeFrameVar,
+ pInfo->inlinedCallFrameInfo.offsetOfGSCookie, REG_PINVOKE_SCRATCH);
+
+ /* Get current frame root (mov reg2, [reg+offsetOfThreadFrame]) and
+ set next field in frame */
+
+ getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_PINVOKE_SCRATCH, regTCB,
+ pInfo->offsetOfThreadFrame);
+ regTracker.rsTrackRegTrash(REG_PINVOKE_SCRATCH);
+
+ getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_PINVOKE_SCRATCH,
+ compiler->lvaInlinedPInvokeFrameVar, pInfo->inlinedCallFrameInfo.offsetOfFrameLink);
+
+ noway_assert(isFramePointerUsed()); // Setup of Pinvoke frame currently requires an EBP style frame
+
+ /* set EBP value in frame */
+ getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, genFramePointerReg(),
+ compiler->lvaInlinedPInvokeFrameVar, pInfo->inlinedCallFrameInfo.offsetOfCalleeSavedFP);
+
+ /* reset track field in frame */
+ instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_PTRSIZE, 0, compiler->lvaInlinedPInvokeFrameVar,
+ pInfo->inlinedCallFrameInfo.offsetOfReturnAddress, REG_PINVOKE_SCRATCH);
+
+ /* get address of our frame */
+
+ getEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, REG_PINVOKE_SCRATCH, compiler->lvaInlinedPInvokeFrameVar,
+ pInfo->inlinedCallFrameInfo.offsetOfFrameVptr);
+ regTracker.rsTrackRegTrash(REG_PINVOKE_SCRATCH);
+
+ /* now "push" our N/direct frame */
+
+ getEmitter()->emitIns_AR_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_PINVOKE_SCRATCH, regTCB,
+ pInfo->offsetOfThreadFrame);
+
+ return initRegs;
+}
+
+/*****************************************************************************
+ * Unchain the InlinedCallFrame.
+ * Technically, this is not part of the epilog; it is called when we are generating code for a GT_RETURN node
+ * or tail call.
+ */
+void CodeGen::genPInvokeMethodEpilog()
+{
+ noway_assert(compiler->info.compCallUnmanaged);
+ noway_assert(!compiler->opts.ShouldUsePInvokeHelpers());
+ noway_assert(compiler->compCurBB == compiler->genReturnBB ||
+ (compiler->compTailCallUsed && (compiler->compCurBB->bbJumpKind == BBJ_THROW)) ||
+ (compiler->compJmpOpUsed && (compiler->compCurBB->bbFlags & BBF_HAS_JMP)));
+
+ CORINFO_EE_INFO* pInfo = compiler->eeGetEEInfo();
+ noway_assert(compiler->lvaInlinedPInvokeFrameVar != BAD_VAR_NUM);
+
+ getEmitter()->emitDisableRandomNops();
+ // debug check to make sure that we're not using ESI and/or EDI across this call, except for
+ // compLvFrameListRoot.
+ unsigned regTrashCheck = 0;
+
+ /* XXX Tue 5/29/2007
+ * We explicitly add interference for these in CodeGen::rgPredictRegUse. If you change the code
+ * sequence or registers used, make sure to update the interference for compiler->genReturnLocal.
+ */
+ LclVarDsc* varDsc = &compiler->lvaTable[compiler->info.compLvFrameListRoot];
+ regNumber reg;
+ regNumber reg2 = REG_PINVOKE_FRAME;
+
+ //
+ // Two cases for epilog invocation:
+ //
+ // 1. Return
+ // We can trash the ESI/EDI registers.
+ //
+ // 2. Tail call
+ // When tail called, we'd like to preserve enregistered args,
+ // in ESI/EDI so we can pass it to the callee.
+ //
+ // For ARM, don't modify SP for storing and restoring the TCB/frame registers.
+ // Instead use the reserved local variable slot.
+ //
+ if (compiler->compCurBB->bbFlags & BBF_HAS_JMP)
+ {
+ if (compiler->rpMaskPInvokeEpilogIntf & RBM_PINVOKE_TCB)
+ {
+#if FEATURE_FIXED_OUT_ARGS
+ // Save the register in the reserved local var slot.
+ getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_PINVOKE_TCB,
+ compiler->lvaPInvokeFrameRegSaveVar, 0);
+#else
+ inst_RV(INS_push, REG_PINVOKE_TCB, TYP_I_IMPL);
+#endif
+ }
+ if (compiler->rpMaskPInvokeEpilogIntf & RBM_PINVOKE_FRAME)
+ {
+#if FEATURE_FIXED_OUT_ARGS
+ // Save the register in the reserved local var slot.
+ getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_PINVOKE_FRAME,
+ compiler->lvaPInvokeFrameRegSaveVar, REGSIZE_BYTES);
+#else
+ inst_RV(INS_push, REG_PINVOKE_FRAME, TYP_I_IMPL);
+#endif
+ }
+ }
+
+ if (varDsc->lvRegister)
+ {
+ reg = varDsc->lvRegNum;
+ if (reg == reg2)
+ reg2 = REG_PINVOKE_TCB;
+
+ regTrashCheck |= genRegMask(reg2);
+ }
+ else
+ {
+ /* mov esi, [tcb address] */
+
+ getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_PINVOKE_TCB, compiler->info.compLvFrameListRoot,
+ 0);
+ regTracker.rsTrackRegTrash(REG_PINVOKE_TCB);
+ reg = REG_PINVOKE_TCB;
+
+ regTrashCheck = RBM_PINVOKE_TCB | RBM_PINVOKE_FRAME;
+ }
+
+ /* mov edi, [ebp-frame.next] */
+
+ getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, reg2, compiler->lvaInlinedPInvokeFrameVar,
+ pInfo->inlinedCallFrameInfo.offsetOfFrameLink);
+ regTracker.rsTrackRegTrash(reg2);
+
+ /* mov [esi+offsetOfThreadFrame], edi */
+
+ getEmitter()->emitIns_AR_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, reg2, reg, pInfo->offsetOfThreadFrame);
+
+ noway_assert(!(regSet.rsMaskUsed & regTrashCheck));
+
+ if (compiler->genReturnLocal != BAD_VAR_NUM && compiler->lvaTable[compiler->genReturnLocal].lvTracked &&
+ compiler->lvaTable[compiler->genReturnLocal].lvRegister)
+ {
+ // really make sure we're not clobbering compiler->genReturnLocal.
+ noway_assert(
+ !(genRegMask(compiler->lvaTable[compiler->genReturnLocal].lvRegNum) &
+ ((varDsc->lvRegister ? genRegMask(varDsc->lvRegNum) : 0) | RBM_PINVOKE_TCB | RBM_PINVOKE_FRAME)));
+ }
+
+ (void)regTrashCheck;
+
+ // Restore the registers ESI and EDI.
+ if (compiler->compCurBB->bbFlags & BBF_HAS_JMP)
+ {
+ if (compiler->rpMaskPInvokeEpilogIntf & RBM_PINVOKE_FRAME)
+ {
+#if FEATURE_FIXED_OUT_ARGS
+ // Restore the register from the reserved local var slot.
+ getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_PINVOKE_FRAME,
+ compiler->lvaPInvokeFrameRegSaveVar, REGSIZE_BYTES);
+#else
+ inst_RV(INS_pop, REG_PINVOKE_FRAME, TYP_I_IMPL);
+#endif
+ regTracker.rsTrackRegTrash(REG_PINVOKE_FRAME);
+ }
+ if (compiler->rpMaskPInvokeEpilogIntf & RBM_PINVOKE_TCB)
+ {
+#if FEATURE_FIXED_OUT_ARGS
+ // Restore the register from the reserved local var slot.
+ getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_PINVOKE_TCB,
+ compiler->lvaPInvokeFrameRegSaveVar, 0);
+#else
+ inst_RV(INS_pop, REG_PINVOKE_TCB, TYP_I_IMPL);
+#endif
+ regTracker.rsTrackRegTrash(REG_PINVOKE_TCB);
+ }
+ }
+ getEmitter()->emitEnableRandomNops();
+}
+
+/*****************************************************************************
+ This function emits the call-site prolog for direct calls to unmanaged code.
+ It does all the necessary setup of the InlinedCallFrame.
+ frameListRoot specifies the local containing the thread control block.
+ argSize or methodToken is the value to be copied into the m_datum
+ field of the frame (methodToken may be indirected & have a reloc)
+ The function returns the register now containing the thread control block,
+ (it could be either enregistered or loaded into one of the scratch registers)
+*/
+
+regNumber CodeGen::genPInvokeCallProlog(LclVarDsc* frameListRoot,
+ int argSize,
+ CORINFO_METHOD_HANDLE methodToken,
+ BasicBlock* returnLabel)
+{
+ // Some stack locals might be 'cached' in registers, we need to trash them
+ // from the regTracker *and* also ensure the gc tracker does not consider
+ // them live (see the next assert). However, they might be live reg vars
+ // that are non-pointers CSE'd from pointers.
+ // That means the register will be live in rsMaskVars, so we can't just
+ // call gcMarkSetNpt().
+ {
+ regMaskTP deadRegs = regTracker.rsTrashRegsForGCInterruptability() & ~RBM_ARG_REGS;
+ gcInfo.gcRegGCrefSetCur &= ~deadRegs;
+ gcInfo.gcRegByrefSetCur &= ~deadRegs;
+
+#ifdef DEBUG
+ deadRegs &= regSet.rsMaskVars;
+ if (deadRegs)
+ {
+ for (LclVarDsc* varDsc = compiler->lvaTable;
+ ((varDsc < (compiler->lvaTable + compiler->lvaCount)) && deadRegs); varDsc++)
+ {
+ if (!varDsc->lvTracked || !varDsc->lvRegister)
+ continue;
+
+ if (!VarSetOps::IsMember(compiler, compiler->compCurLife, varDsc->lvVarIndex))
+ continue;
+
+ regMaskTP varRegMask = genRegMask(varDsc->lvRegNum);
+ if (isRegPairType(varDsc->lvType) && varDsc->lvOtherReg != REG_STK)
+ varRegMask |= genRegMask(varDsc->lvOtherReg);
+
+ if (varRegMask & deadRegs)
+ {
+ // We found the enregistered var that should not be live if it
+ // was a GC pointer.
+ noway_assert(!varTypeIsGC(varDsc));
+ deadRegs &= ~varRegMask;
+ }
+ }
+ }
+#endif // DEBUG
+ }
+
+ /* Since we are using the InlinedCallFrame, we should have spilled all
+ GC pointers to it - even from callee-saved registers */
+
+ noway_assert(((gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur) & ~RBM_ARG_REGS) == 0);
+
+ /* must specify only one of these parameters */
+ noway_assert((argSize == 0) || (methodToken == NULL));
+
+ /* We are about to call unmanaged code directly.
+ Before we can do that we have to emit the following sequence:
+
+ mov dword ptr [frame.callTarget], MethodToken
+ mov dword ptr [frame.callSiteTracker], esp
+ mov reg, dword ptr [tcb_address]
+ mov byte ptr [tcb+offsetOfGcState], 0
+
+ */
+
+ CORINFO_EE_INFO* pInfo = compiler->eeGetEEInfo();
+
+ noway_assert(compiler->lvaInlinedPInvokeFrameVar != BAD_VAR_NUM);
+
+ /* mov dword ptr [frame.callSiteTarget], value */
+
+ if (methodToken == NULL)
+ {
+ /* mov dword ptr [frame.callSiteTarget], argSize */
+ instGen_Store_Imm_Into_Lcl(TYP_INT, EA_4BYTE, argSize, compiler->lvaInlinedPInvokeFrameVar,
+ pInfo->inlinedCallFrameInfo.offsetOfCallTarget);
+ }
+ else
+ {
+ void *embedMethHnd, *pEmbedMethHnd;
+
+ embedMethHnd = (void*)compiler->info.compCompHnd->embedMethodHandle(methodToken, &pEmbedMethHnd);
+
+ noway_assert((!embedMethHnd) != (!pEmbedMethHnd));
+
+ if (embedMethHnd != NULL)
+ {
+ /* mov dword ptr [frame.callSiteTarget], "MethodDesc" */
+
+ instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_HANDLE_CNS_RELOC, (ssize_t)embedMethHnd,
+ compiler->lvaInlinedPInvokeFrameVar,
+ pInfo->inlinedCallFrameInfo.offsetOfCallTarget);
+ }
+ else
+ {
+ /* mov reg, dword ptr [MethodDescIndir]
+ mov dword ptr [frame.callSiteTarget], reg */
+
+ regNumber reg = regSet.rsPickFreeReg();
+
+#if CPU_LOAD_STORE_ARCH
+ instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, reg, (ssize_t)pEmbedMethHnd);
+ getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, reg, reg, 0);
+#else // !CPU_LOAD_STORE_ARCH
+ getEmitter()->emitIns_R_AI(ins_Load(TYP_I_IMPL), EA_PTR_DSP_RELOC, reg, (ssize_t)pEmbedMethHnd);
+#endif // !CPU_LOAD_STORE_ARCH
+ regTracker.rsTrackRegTrash(reg);
+ getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, reg, compiler->lvaInlinedPInvokeFrameVar,
+ pInfo->inlinedCallFrameInfo.offsetOfCallTarget);
+ }
+ }
+
+ regNumber tcbReg = REG_NA;
+
+ if (frameListRoot->lvRegister)
+ {
+ tcbReg = frameListRoot->lvRegNum;
+ }
+ else
+ {
+ tcbReg = regSet.rsGrabReg(RBM_ALLINT);
+
+ /* mov reg, dword ptr [tcb address] */
+
+ getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, tcbReg,
+ (unsigned)(frameListRoot - compiler->lvaTable), 0);
+ regTracker.rsTrackRegTrash(tcbReg);
+ }
+
+#ifdef _TARGET_X86_
+ /* mov dword ptr [frame.callSiteTracker], esp */
+
+ getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaInlinedPInvokeFrameVar,
+ pInfo->inlinedCallFrameInfo.offsetOfCallSiteSP);
+#endif // _TARGET_X86_
+
+#if CPU_LOAD_STORE_ARCH
+ regNumber tmpReg = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(tcbReg));
+ getEmitter()->emitIns_J_R(INS_adr, EA_PTRSIZE, returnLabel, tmpReg);
+ regTracker.rsTrackRegTrash(tmpReg);
+ getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, tmpReg, compiler->lvaInlinedPInvokeFrameVar,
+ pInfo->inlinedCallFrameInfo.offsetOfReturnAddress);
+#else // !CPU_LOAD_STORE_ARCH
+ /* mov dword ptr [frame.callSiteReturnAddress], label */
+
+ getEmitter()->emitIns_J_S(ins_Store(TYP_I_IMPL), EA_PTRSIZE, returnLabel, compiler->lvaInlinedPInvokeFrameVar,
+ pInfo->inlinedCallFrameInfo.offsetOfReturnAddress);
+#endif // !CPU_LOAD_STORE_ARCH
+
+#if CPU_LOAD_STORE_ARCH
+ instGen_Set_Reg_To_Zero(EA_1BYTE, tmpReg);
+
+ noway_assert(tmpReg != tcbReg);
+
+ getEmitter()->emitIns_AR_R(ins_Store(TYP_BYTE), EA_1BYTE, tmpReg, tcbReg, pInfo->offsetOfGCState);
+#else // !CPU_LOAD_STORE_ARCH
+ /* mov byte ptr [tcbReg+offsetOfGcState], 0 */
+
+ getEmitter()->emitIns_I_AR(ins_Store(TYP_BYTE), EA_1BYTE, 0, tcbReg, pInfo->offsetOfGCState);
+#endif // !CPU_LOAD_STORE_ARCH
+
+ return tcbReg;
+}
+
+/*****************************************************************************
+ *
+ First we have to mark in the hoisted NDirect stub that we are back
+ in managed code. Then we have to check (a global flag) whether GC is
+ pending or not. If so, we just call into a jit-helper.
+ Right now we have this call always inlined, i.e. we always skip around
+ the jit-helper call.
+ Note:
+ The tcb address is a regular local (initialized in the prolog), so it is either
+ enregistered or in the frame:
+
+ tcb_reg = [tcb_address is enregistered] OR [mov ecx, tcb_address]
+ mov byte ptr[tcb_reg+offsetOfGcState], 1
+ cmp 'global GC pending flag', 0
+ je @f
+ [mov ECX, tcb_reg] OR [ecx was setup above] ; we pass the tcb value to callGC
+ [mov [EBP+spill_area+0], eax] ; spill the int return value if any
+ [mov [EBP+spill_area+4], edx] ; spill the long return value if any
+ call @callGC
+ [mov eax, [EBP+spill_area+0] ] ; reload the int return value if any
+ [mov edx, [EBP+spill_area+4] ] ; reload the long return value if any
+ @f:
+ */
+
+void CodeGen::genPInvokeCallEpilog(LclVarDsc* frameListRoot, regMaskTP retVal)
+{
+ BasicBlock* clab_nostop;
+ CORINFO_EE_INFO* pInfo = compiler->eeGetEEInfo();
+ regNumber reg2;
+ regNumber reg3;
+
+#ifdef _TARGET_ARM_
+ reg3 = REG_R3;
+#else
+ reg3 = REG_EDX;
+#endif
+
+ getEmitter()->emitDisableRandomNops();
+
+ if (frameListRoot->lvRegister)
+ {
+ /* make sure that register is live across the call */
+
+ reg2 = frameListRoot->lvRegNum;
+ noway_assert(genRegMask(reg2) & RBM_INT_CALLEE_SAVED);
+ }
+ else
+ {
+ /* mov reg2, dword ptr [tcb address] */
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef _TARGET_ARM_
+ reg2 = REG_R2;
+#else
+ reg2 = REG_ECX;
+#endif
+
+ getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, reg2,
+ (unsigned)(frameListRoot - compiler->lvaTable), 0);
+ regTracker.rsTrackRegTrash(reg2);
+ }
+
+#ifdef _TARGET_ARM_
+ /* mov r3, 1 */
+ /* strb [r2+offsetOfGcState], r3 */
+ instGen_Set_Reg_To_Imm(EA_PTRSIZE, reg3, 1);
+ getEmitter()->emitIns_AR_R(ins_Store(TYP_BYTE), EA_1BYTE, reg3, reg2, pInfo->offsetOfGCState);
+#else
+ /* mov byte ptr [tcb+offsetOfGcState], 1 */
+ getEmitter()->emitIns_I_AR(ins_Store(TYP_BYTE), EA_1BYTE, 1, reg2, pInfo->offsetOfGCState);
+#endif
+
+ /* test global flag (we return to managed code) */
+
+ LONG *addrOfCaptureThreadGlobal, **pAddrOfCaptureThreadGlobal;
+
+ addrOfCaptureThreadGlobal =
+ compiler->info.compCompHnd->getAddrOfCaptureThreadGlobal((void**)&pAddrOfCaptureThreadGlobal);
+ noway_assert((!addrOfCaptureThreadGlobal) != (!pAddrOfCaptureThreadGlobal));
+
+ // Can we directly use addrOfCaptureThreadGlobal?
+
+ if (addrOfCaptureThreadGlobal)
+ {
+#ifdef _TARGET_ARM_
+ instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, reg3, (ssize_t)addrOfCaptureThreadGlobal);
+ getEmitter()->emitIns_R_R_I(ins_Load(TYP_INT), EA_4BYTE, reg3, reg3, 0);
+ regTracker.rsTrackRegTrash(reg3);
+ getEmitter()->emitIns_R_I(INS_cmp, EA_4BYTE, reg3, 0);
+#else
+ getEmitter()->emitIns_C_I(INS_cmp, EA_PTR_DSP_RELOC, FLD_GLOBAL_DS, (ssize_t)addrOfCaptureThreadGlobal, 0);
+#endif
+ }
+ else
+ {
+#ifdef _TARGET_ARM_
+ instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, reg3, (ssize_t)pAddrOfCaptureThreadGlobal);
+ getEmitter()->emitIns_R_R_I(ins_Load(TYP_INT), EA_4BYTE, reg3, reg3, 0);
+ regTracker.rsTrackRegTrash(reg3);
+ getEmitter()->emitIns_R_R_I(ins_Load(TYP_INT), EA_4BYTE, reg3, reg3, 0);
+ getEmitter()->emitIns_R_I(INS_cmp, EA_4BYTE, reg3, 0);
+#else // !_TARGET_ARM_
+
+ getEmitter()->emitIns_R_AI(ins_Load(TYP_I_IMPL), EA_PTR_DSP_RELOC, REG_ECX,
+ (ssize_t)pAddrOfCaptureThreadGlobal);
+ regTracker.rsTrackRegTrash(REG_ECX);
+
+ getEmitter()->emitIns_I_AR(INS_cmp, EA_4BYTE, 0, REG_ECX, 0);
+
+#endif // !_TARGET_ARM_
+ }
+
+ /* */
+ clab_nostop = genCreateTempLabel();
+
+ /* Generate the conditional jump */
+ emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
+ inst_JMP(jmpEqual, clab_nostop);
+
+#ifdef _TARGET_ARM_
+// The helper preserves the return value on ARM
+#else
+ /* save return value (if necessary) */
+ if (retVal != RBM_NONE)
+ {
+ if (retVal == RBM_INTRET || retVal == RBM_LNGRET)
+ {
+ /* push eax */
+
+ inst_RV(INS_push, REG_INTRET, TYP_INT);
+
+ if (retVal == RBM_LNGRET)
+ {
+ /* push edx */
+
+ inst_RV(INS_push, REG_EDX, TYP_INT);
+ }
+ }
+ }
+#endif
+
+ /* emit the call to the EE-helper that stops for GC (or other reasons) */
+
+ genEmitHelperCall(CORINFO_HELP_STOP_FOR_GC, 0, /* argSize */
+ EA_UNKNOWN); /* retSize */
+
+#ifdef _TARGET_ARM_
+// The helper preserves the return value on ARM
+#else
+ /* restore return value (if necessary) */
+
+ if (retVal != RBM_NONE)
+ {
+ if (retVal == RBM_INTRET || retVal == RBM_LNGRET)
+ {
+ if (retVal == RBM_LNGRET)
+ {
+ /* pop edx */
+
+ inst_RV(INS_pop, REG_EDX, TYP_INT);
+ regTracker.rsTrackRegTrash(REG_EDX);
+ }
+
+ /* pop eax */
+
+ inst_RV(INS_pop, REG_INTRET, TYP_INT);
+ regTracker.rsTrackRegTrash(REG_INTRET);
+ }
+ }
+#endif
+
+ /* genCondJump() closes the current emitter block */
+
+ genDefineTempLabel(clab_nostop);
+
+ // This marks the InlinedCallFrame as "inactive". In fully interruptible code, this is not atomic with
+ // the above code. So the process is:
+ // 1) Return to cooperative mode
+ // 2) Check to see if we need to stop for GC
+ // 3) Return from the p/invoke (as far as the stack walker is concerned).
+
+ /* mov dword ptr [frame.callSiteTracker], 0 */
+
+ instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_PTRSIZE, 0, compiler->lvaInlinedPInvokeFrameVar,
+ pInfo->inlinedCallFrameInfo.offsetOfReturnAddress);
+
+ getEmitter()->emitEnableRandomNops();
+}
+
+/*****************************************************************************/
+
+/*****************************************************************************
+* TRACKING OF FLAGS
+*****************************************************************************/
+
+void CodeGen::genFlagsEqualToNone()
+{
+ genFlagsEqReg = REG_NA;
+ genFlagsEqVar = (unsigned)-1;
+ genFlagsEqLoc.Init();
+}
+
+/*****************************************************************************
+ *
+ * Record the fact that the flags register has a value that reflects the
+ * contents of the given register.
+ */
+
+void CodeGen::genFlagsEqualToReg(GenTreePtr tree, regNumber reg)
+{
+ genFlagsEqLoc.CaptureLocation(getEmitter());
+ genFlagsEqReg = reg;
+
+ /* previous setting of flags by a var becomes invalid */
+
+ genFlagsEqVar = 0xFFFFFFFF;
+
+ /* Set appropriate flags on the tree */
+
+ if (tree)
+ {
+ tree->gtFlags |= GTF_ZSF_SET;
+ assert(tree->gtSetFlags());
+ }
+}
+
+/*****************************************************************************
+ *
+ * Record the fact that the flags register has a value that reflects the
+ * contents of the given local variable.
+ */
+
+void CodeGen::genFlagsEqualToVar(GenTreePtr tree, unsigned var)
+{
+ genFlagsEqLoc.CaptureLocation(getEmitter());
+ genFlagsEqVar = var;
+
+ /* previous setting of flags by a register becomes invalid */
+
+ genFlagsEqReg = REG_NA;
+
+ /* Set appropriate flags on the tree */
+
+ if (tree)
+ {
+ tree->gtFlags |= GTF_ZSF_SET;
+ assert(tree->gtSetFlags());
+ }
+}
+
+/*****************************************************************************
+ *
+ * Return an indication of whether the flags register is set to the current
+ * value of the given register/variable. The return value is as follows:
+ *
+ * false .. nothing
+ * true .. the zero flag (ZF) and sign flag (SF) is set
+ */
+
+bool CodeGen::genFlagsAreReg(regNumber reg)
+{
+ if ((genFlagsEqReg == reg) && genFlagsEqLoc.IsCurrentLocation(getEmitter()))
+ {
+ return true;
+ }
+
+ return false;
+}
+
+bool CodeGen::genFlagsAreVar(unsigned var)
+{
+ if ((genFlagsEqVar == var) && genFlagsEqLoc.IsCurrentLocation(getEmitter()))
+ {
+ return true;
+ }
+
+ return false;
+}
+
+/*****************************************************************************
+ * This utility function returns true iff the execution path from "from"
+ * (inclusive) to "to" (exclusive) contains a death of the given var
+ */
+bool CodeGen::genContainsVarDeath(GenTreePtr from, GenTreePtr to, unsigned varNum)
+{
+ GenTreePtr tree;
+ for (tree = from; tree != NULL && tree != to; tree = tree->gtNext)
+ {
+ if (tree->IsLocal() && (tree->gtFlags & GTF_VAR_DEATH))
+ {
+ unsigned dyingVarNum = tree->gtLclVarCommon.gtLclNum;
+ if (dyingVarNum == varNum)
+ return true;
+ LclVarDsc* varDsc = &(compiler->lvaTable[varNum]);
+ if (varDsc->lvPromoted)
+ {
+ assert(varDsc->lvType == TYP_STRUCT);
+ unsigned firstFieldNum = varDsc->lvFieldLclStart;
+ if (varNum >= firstFieldNum && varNum < firstFieldNum + varDsc->lvFieldCnt)
+ {
+ return true;
+ }
+ }
+ }
+ }
+ assert(tree != NULL);
+ return false;
+}
+
+#endif // LEGACY_BACKEND
diff --git a/src/jit/codegenlinear.h b/src/jit/codegenlinear.h
new file mode 100644
index 0000000000..fb0d6ea165
--- /dev/null
+++ b/src/jit/codegenlinear.h
@@ -0,0 +1,224 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+//
+// This file contains the members of CodeGen that are defined and used
+// only by the RyuJIT backend. It is included by CodeGen.h in the
+// definition of the CodeGen class.
+//
+
+#ifndef LEGACY_BACKEND // Not necessary (it's this way in the #include location), but helpful to IntelliSense
+
+void genSetRegToConst(regNumber targetReg, var_types targetType, GenTreePtr tree);
+
+void genCodeForTreeNode(GenTreePtr treeNode);
+
+void genCodeForBinary(GenTreePtr treeNode);
+
+void genCodeForDivMod(GenTreeOp* treeNode);
+
+void genCodeForMulHi(GenTreeOp* treeNode);
+
+void genLeaInstruction(GenTreeAddrMode* lea);
+
+void genSetRegToCond(regNumber dstReg, GenTreePtr tree);
+
+void genIntToIntCast(GenTreePtr treeNode);
+
+void genFloatToFloatCast(GenTreePtr treeNode);
+
+void genFloatToIntCast(GenTreePtr treeNode);
+
+void genIntToFloatCast(GenTreePtr treeNode);
+
+void genCkfinite(GenTreePtr treeNode);
+
+void genIntrinsic(GenTreePtr treeNode);
+
+void genPutArgStk(GenTreePtr treeNode);
+unsigned getBaseVarForPutArgStk(GenTreePtr treeNode);
+
+#if defined(_TARGET_XARCH_) || defined(_TARGET_ARM64_)
+unsigned getFirstArgWithStackSlot();
+#endif // _TARGET_XARCH_ || _TARGET_ARM64_
+
+void genCompareFloat(GenTreePtr treeNode);
+
+void genCompareInt(GenTreePtr treeNode);
+
+#if !defined(_TARGET_64BIT_)
+void genCompareLong(GenTreePtr treeNode);
+void genJTrueLong(GenTreePtr treeNode);
+#endif
+
+#ifdef FEATURE_SIMD
+enum SIMDScalarMoveType
+{
+ SMT_ZeroInitUpper, // zero initlaize target upper bits
+ SMT_ZeroInitUpper_SrcHasUpperZeros, // zero initialize target upper bits; source upper bits are known to be zero
+ SMT_PreserveUpper // preserve target upper bits
+};
+
+instruction getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_types baseType, unsigned* ival = nullptr);
+void genSIMDScalarMove(var_types type, regNumber target, regNumber src, SIMDScalarMoveType moveType);
+void genSIMDZero(var_types targetType, var_types baseType, regNumber targetReg);
+void genSIMDIntrinsicInit(GenTreeSIMD* simdNode);
+void genSIMDIntrinsicInitN(GenTreeSIMD* simdNode);
+void genSIMDIntrinsicInitArray(GenTreeSIMD* simdNode);
+void genSIMDIntrinsicUnOp(GenTreeSIMD* simdNode);
+void genSIMDIntrinsicBinOp(GenTreeSIMD* simdNode);
+void genSIMDIntrinsicRelOp(GenTreeSIMD* simdNode);
+void genSIMDIntrinsicDotProduct(GenTreeSIMD* simdNode);
+void genSIMDIntrinsicSetItem(GenTreeSIMD* simdNode);
+void genSIMDIntrinsicGetItem(GenTreeSIMD* simdNode);
+void genSIMDIntrinsicShuffleSSE2(GenTreeSIMD* simdNode);
+void genSIMDIntrinsicUpperSave(GenTreeSIMD* simdNode);
+void genSIMDIntrinsicUpperRestore(GenTreeSIMD* simdNode);
+
+void genSIMDIntrinsic(GenTreeSIMD* simdNode);
+void genSIMDCheck(GenTree* treeNode);
+
+// TYP_SIMD12 (i.e Vector3 of size 12 bytes) is not a hardware supported size and requires
+// two reads/writes on 64-bit targets. These routines abstract reading/writing of Vector3
+// values through an indirection. Note that Vector3 locals allocated on stack would have
+// their size rounded to TARGET_POINTER_SIZE (which is 8 bytes on 64-bit targets) and hence
+// Vector3 locals could be treated as TYP_SIMD16 while reading/writing.
+void genStoreIndTypeSIMD12(GenTree* treeNode);
+void genStoreLclFldTypeSIMD12(GenTree* treeNode);
+void genLoadIndTypeSIMD12(GenTree* treeNode);
+void genLoadLclFldTypeSIMD12(GenTree* treeNode);
+#endif // FEATURE_SIMD
+
+#if !defined(_TARGET_64BIT_)
+
+// CodeGen for Long Ints
+
+void genStoreLongLclVar(GenTree* treeNode);
+
+#endif // !defined(_TARGET_64BIT_)
+
+void genProduceReg(GenTree* tree);
+
+void genUnspillRegIfNeeded(GenTree* tree);
+
+regNumber genConsumeReg(GenTree* tree);
+
+void genConsumeRegAndCopy(GenTree* tree, regNumber needReg);
+
+void genConsumeIfReg(GenTreePtr tree)
+{
+ if (!tree->isContained())
+ {
+ (void)genConsumeReg(tree);
+ }
+}
+
+void genRegCopy(GenTreePtr tree);
+
+void genTransferRegGCState(regNumber dst, regNumber src);
+
+void genConsumeAddress(GenTree* addr);
+
+void genConsumeAddrMode(GenTreeAddrMode* mode);
+
+void genConsumeBlockSize(GenTreeBlk* blkNode, regNumber sizeReg);
+void genConsumeBlockDst(GenTreeBlk* blkNode);
+GenTree* genConsumeBlockSrc(GenTreeBlk* blkNode);
+void genConsumeBlockOp(GenTreeBlk* blkNode, regNumber dstReg, regNumber srcReg, regNumber sizeReg);
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+void genConsumePutStructArgStk(
+ GenTreePutArgStk* putArgStkNode, regNumber dstReg, regNumber srcReg, regNumber sizeReg, unsigned baseVarNum);
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+void genConsumeRegs(GenTree* tree);
+
+void genConsumeOperands(GenTreeOp* tree);
+
+void genEmitGSCookieCheck(bool pushReg);
+
+void genSetRegToIcon(regNumber reg, ssize_t val, var_types type = TYP_INT, insFlags flags = INS_FLAGS_DONT_CARE);
+
+void genCodeForShift(GenTreePtr tree);
+
+#ifdef _TARGET_XARCH_
+void genCodeForShiftRMW(GenTreeStoreInd* storeInd);
+#endif // _TARGET_XARCH_
+
+void genCodeForCpObj(GenTreeObj* cpObjNode);
+
+void genCodeForCpBlk(GenTreeBlk* cpBlkNode);
+
+void genCodeForCpBlkRepMovs(GenTreeBlk* cpBlkNode);
+
+void genCodeForCpBlkUnroll(GenTreeBlk* cpBlkNode);
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+void genPutStructArgStk(GenTreePtr treeNode, unsigned baseVarNum);
+
+void genStructPutArgRepMovs(GenTreePutArgStk* putArgStkNode, unsigned baseVarNum);
+void genStructPutArgUnroll(GenTreePutArgStk* putArgStkNode, unsigned baseVarNum);
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+void genCodeForLoadOffset(instruction ins, emitAttr size, regNumber dst, GenTree* base, unsigned offset);
+
+void genCodeForStoreOffset(instruction ins, emitAttr size, regNumber dst, GenTree* base, unsigned offset);
+
+void genCodeForStoreBlk(GenTreeBlk* storeBlkNode);
+
+void genCodeForInitBlk(GenTreeBlk* initBlkNode);
+
+void genCodeForInitBlkRepStos(GenTreeBlk* initBlkNode);
+
+void genCodeForInitBlkUnroll(GenTreeBlk* initBlkNode);
+
+void genJumpTable(GenTree* tree);
+
+void genTableBasedSwitch(GenTree* tree);
+
+void genCodeForArrIndex(GenTreeArrIndex* treeNode);
+
+void genCodeForArrOffset(GenTreeArrOffs* treeNode);
+
+instruction genGetInsForOper(genTreeOps oper, var_types type);
+
+void genStoreInd(GenTreePtr node);
+
+bool genEmitOptimizedGCWriteBarrier(GCInfo::WriteBarrierForm writeBarrierForm, GenTree* addr, GenTree* data);
+
+void genCallInstruction(GenTreePtr call);
+
+void genJmpMethod(GenTreePtr jmp);
+
+void genMultiRegCallStoreToLocal(GenTreePtr treeNode);
+
+// Deals with codegen for muti-register struct returns.
+bool isStructReturn(GenTreePtr treeNode);
+void genStructReturn(GenTreePtr treeNode);
+
+// Codegen for GT_RETURN.
+void genReturn(GenTreePtr treeNode);
+
+void genLclHeap(GenTreePtr tree);
+
+bool genIsRegCandidateLocal(GenTreePtr tree)
+{
+ if (!tree->IsLocal())
+ {
+ return false;
+ }
+ const LclVarDsc* varDsc = &compiler->lvaTable[tree->gtLclVarCommon.gtLclNum];
+ return (varDsc->lvIsRegCandidate());
+}
+
+#ifdef DEBUG
+GenTree* lastConsumedNode;
+void genCheckConsumeNode(GenTree* treeNode);
+#else // !DEBUG
+inline void genCheckConsumeNode(GenTree* treeNode)
+{
+}
+#endif // DEBUG
+
+#endif // !LEGACY_BACKEND
diff --git a/src/jit/codegenxarch.cpp b/src/jit/codegenxarch.cpp
new file mode 100644
index 0000000000..a41c28695b
--- /dev/null
+++ b/src/jit/codegenxarch.cpp
@@ -0,0 +1,9388 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX Amd64/x86 Code Generator XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+#ifndef LEGACY_BACKEND // This file is ONLY used for the RyuJIT backend that uses the linear scan register allocator.
+
+#ifdef _TARGET_XARCH_
+#include "emit.h"
+#include "codegen.h"
+#include "lower.h"
+#include "gcinfo.h"
+#include "gcinfoencoder.h"
+
+// Get the register assigned to the given node
+
+regNumber CodeGenInterface::genGetAssignedReg(GenTreePtr tree)
+{
+ return tree->gtRegNum;
+}
+
+//------------------------------------------------------------------------
+// genSpillVar: Spill a local variable
+//
+// Arguments:
+// tree - the lclVar node for the variable being spilled
+//
+// Return Value:
+// None.
+//
+// Assumptions:
+// The lclVar must be a register candidate (lvRegCandidate)
+
+void CodeGen::genSpillVar(GenTreePtr tree)
+{
+ unsigned varNum = tree->gtLclVarCommon.gtLclNum;
+ LclVarDsc* varDsc = &(compiler->lvaTable[varNum]);
+
+ assert(varDsc->lvIsRegCandidate());
+
+ // We don't actually need to spill if it is already living in memory
+ bool needsSpill = ((tree->gtFlags & GTF_VAR_DEF) == 0 && varDsc->lvIsInReg());
+ if (needsSpill)
+ {
+ var_types lclTyp = varDsc->TypeGet();
+ if (varDsc->lvNormalizeOnStore())
+ {
+ lclTyp = genActualType(lclTyp);
+ }
+ emitAttr size = emitTypeSize(lclTyp);
+
+ bool restoreRegVar = false;
+ if (tree->gtOper == GT_REG_VAR)
+ {
+ tree->SetOper(GT_LCL_VAR);
+ restoreRegVar = true;
+ }
+
+ // mask off the flag to generate the right spill code, then bring it back
+ tree->gtFlags &= ~GTF_REG_VAL;
+
+ instruction storeIns = ins_Store(tree->TypeGet(), compiler->isSIMDTypeLocalAligned(varNum));
+#if CPU_LONG_USES_REGPAIR
+ if (varTypeIsMultiReg(tree))
+ {
+ assert(varDsc->lvRegNum == genRegPairLo(tree->gtRegPair));
+ assert(varDsc->lvOtherReg == genRegPairHi(tree->gtRegPair));
+ regNumber regLo = genRegPairLo(tree->gtRegPair);
+ regNumber regHi = genRegPairHi(tree->gtRegPair);
+ inst_TT_RV(storeIns, tree, regLo);
+ inst_TT_RV(storeIns, tree, regHi, 4);
+ }
+ else
+#endif
+ {
+ assert(varDsc->lvRegNum == tree->gtRegNum);
+ inst_TT_RV(storeIns, tree, tree->gtRegNum, 0, size);
+ }
+ tree->gtFlags |= GTF_REG_VAL;
+
+ if (restoreRegVar)
+ {
+ tree->SetOper(GT_REG_VAR);
+ }
+
+ genUpdateRegLife(varDsc, /*isBorn*/ false, /*isDying*/ true DEBUGARG(tree));
+ gcInfo.gcMarkRegSetNpt(varDsc->lvRegMask());
+
+ if (VarSetOps::IsMember(compiler, gcInfo.gcTrkStkPtrLcls, varDsc->lvVarIndex))
+ {
+#ifdef DEBUG
+ if (!VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex))
+ {
+ JITDUMP("\t\t\t\t\t\t\tVar V%02u becoming live\n", varNum);
+ }
+ else
+ {
+ JITDUMP("\t\t\t\t\t\t\tVar V%02u continuing live\n", varNum);
+ }
+#endif
+ VarSetOps::AddElemD(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex);
+ }
+ }
+
+ tree->gtFlags &= ~GTF_SPILL;
+ varDsc->lvRegNum = REG_STK;
+ if (varTypeIsMultiReg(tree))
+ {
+ varDsc->lvOtherReg = REG_STK;
+ }
+}
+
+// inline
+void CodeGenInterface::genUpdateVarReg(LclVarDsc* varDsc, GenTreePtr tree)
+{
+ assert(tree->OperIsScalarLocal() || (tree->gtOper == GT_COPY));
+ varDsc->lvRegNum = tree->gtRegNum;
+}
+
+/*****************************************************************************/
+/*****************************************************************************/
+
+/*****************************************************************************
+ *
+ * Generate code that will set the given register to the integer constant.
+ */
+
+void CodeGen::genSetRegToIcon(regNumber reg, ssize_t val, var_types type, insFlags flags)
+{
+ // Reg cannot be a FP reg
+ assert(!genIsValidFloatReg(reg));
+
+ // The only TYP_REF constant that can come this path is a managed 'null' since it is not
+ // relocatable. Other ref type constants (e.g. string objects) go through a different
+ // code path.
+ noway_assert(type != TYP_REF || val == 0);
+
+ if (val == 0)
+ {
+ instGen_Set_Reg_To_Zero(emitActualTypeSize(type), reg, flags);
+ }
+ else
+ {
+ // TODO-XArch-CQ: needs all the optimized cases
+ getEmitter()->emitIns_R_I(INS_mov, emitActualTypeSize(type), reg, val);
+ }
+}
+
+/*****************************************************************************
+ *
+ * Generate code to check that the GS cookie wasn't thrashed by a buffer
+ * overrun. If pushReg is true, preserve all registers around code sequence.
+ * Otherwise ECX could be modified.
+ *
+ * Implementation Note: pushReg = true, in case of tail calls.
+ */
+void CodeGen::genEmitGSCookieCheck(bool pushReg)
+{
+ noway_assert(compiler->gsGlobalSecurityCookieAddr || compiler->gsGlobalSecurityCookieVal);
+
+ // Make sure that EAX is reported as live GC-ref so that any GC that kicks in while
+ // executing GS cookie check will not collect the object pointed to by EAX.
+ //
+ // For Amd64 System V, a two-register-returned struct could be returned in RAX and RDX
+ // In such case make sure that the correct GC-ness of RDX is reported as well, so
+ // a GC object pointed by RDX will not be collected.
+ if (!pushReg)
+ {
+ // Handle multi-reg return type values
+ if (compiler->compMethodReturnsMultiRegRetType())
+ {
+ ReturnTypeDesc retTypeDesc;
+ if (varTypeIsLong(compiler->info.compRetNativeType))
+ {
+ retTypeDesc.InitializeLongReturnType(compiler);
+ }
+ else // we must have a struct return type
+ {
+ retTypeDesc.InitializeStructReturnType(compiler, compiler->info.compMethodInfo->args.retTypeClass);
+ }
+
+ unsigned regCount = retTypeDesc.GetReturnRegCount();
+
+ // Only x86 and x64 Unix ABI allows multi-reg return and
+ // number of result regs should be equal to MAX_RET_REG_COUNT.
+ assert(regCount == MAX_RET_REG_COUNT);
+
+ for (unsigned i = 0; i < regCount; ++i)
+ {
+ gcInfo.gcMarkRegPtrVal(retTypeDesc.GetABIReturnReg(i), retTypeDesc.GetReturnRegType(i));
+ }
+ }
+ else if (compiler->compMethodReturnsRetBufAddr())
+ {
+ // This is for returning in an implicit RetBuf.
+ // If the address of the buffer is returned in REG_INTRET, mark the content of INTRET as ByRef.
+
+ // In case the return is in an implicit RetBuf, the native return type should be a struct
+ assert(varTypeIsStruct(compiler->info.compRetNativeType));
+
+ gcInfo.gcMarkRegPtrVal(REG_INTRET, TYP_BYREF);
+ }
+ // ... all other cases.
+ else
+ {
+#ifdef _TARGET_AMD64_
+ // For x64, structs that are not returned in registers are always
+ // returned in implicit RetBuf. If we reached here, we should not have
+ // a RetBuf and the return type should not be a struct.
+ assert(compiler->info.compRetBuffArg == BAD_VAR_NUM);
+ assert(!varTypeIsStruct(compiler->info.compRetNativeType));
+#endif // _TARGET_AMD64_
+
+ // For x86 Windows we can't make such assertions since we generate code for returning of
+ // the RetBuf in REG_INTRET only when the ProfilerHook is enabled. Otherwise
+ // compRetNativeType could be TYP_STRUCT.
+ gcInfo.gcMarkRegPtrVal(REG_INTRET, compiler->info.compRetNativeType);
+ }
+ }
+
+ regNumber regGSCheck;
+ if (!pushReg)
+ {
+ // Non-tail call: we can use any callee trash register that is not
+ // a return register or contain 'this' pointer (keep alive this), since
+ // we are generating GS cookie check after a GT_RETURN block.
+ // Note: On Amd64 System V RDX is an arg register - REG_ARG_2 - as well
+ // as return register for two-register-returned structs.
+ if (compiler->lvaKeepAliveAndReportThis() && compiler->lvaTable[compiler->info.compThisArg].lvRegister &&
+ (compiler->lvaTable[compiler->info.compThisArg].lvRegNum == REG_ARG_0))
+ {
+ regGSCheck = REG_ARG_1;
+ }
+ else
+ {
+ regGSCheck = REG_ARG_0;
+ }
+ }
+ else
+ {
+#ifdef _TARGET_X86_
+ NYI_X86("Tail calls from methods that need GS check");
+ regGSCheck = REG_NA;
+#else // !_TARGET_X86_
+ // Tail calls from methods that need GS check: We need to preserve registers while
+ // emitting GS cookie check for a tail prefixed call or a jmp. To emit GS cookie
+ // check, we might need a register. This won't be an issue for jmp calls for the
+ // reason mentioned below (see comment starting with "Jmp Calls:").
+ //
+ // The following are the possible solutions in case of tail prefixed calls:
+ // 1) Use R11 - ignore tail prefix on calls that need to pass a param in R11 when
+ // present in methods that require GS cookie check. Rest of the tail calls that
+ // do not require R11 will be honored.
+ // 2) Internal register - GT_CALL node reserves an internal register and emits GS
+ // cookie check as part of tail call codegen. GenExitCode() needs to special case
+ // fast tail calls implemented as epilog+jmp or such tail calls should always get
+ // dispatched via helper.
+ // 3) Materialize GS cookie check as a sperate node hanging off GT_CALL node in
+ // right execution order during rationalization.
+ //
+ // There are two calls that use R11: VSD and calli pinvokes with cookie param. Tail
+ // prefix on pinvokes is ignored. That is, options 2 and 3 will allow tail prefixed
+ // VSD calls from methods that need GS check.
+ //
+ // Tail prefixed calls: Right now for Jit64 compat, method requiring GS cookie check
+ // ignores tail prefix. In future, if we intend to support tail calls from such a method,
+ // consider one of the options mentioned above. For now adding an assert that we don't
+ // expect to see a tail call in a method that requires GS check.
+ noway_assert(!compiler->compTailCallUsed);
+
+ // Jmp calls: specify method handle using which JIT queries VM for its entry point
+ // address and hence it can neither be a VSD call nor PInvoke calli with cookie
+ // parameter. Therefore, in case of jmp calls it is safe to use R11.
+ regGSCheck = REG_R11;
+#endif // !_TARGET_X86_
+ }
+
+ if (compiler->gsGlobalSecurityCookieAddr == nullptr)
+ {
+ // If GS cookie value fits within 32-bits we can use 'cmp mem64, imm32'.
+ // Otherwise, load the value into a reg and use 'cmp mem64, reg64'.
+ if ((int)compiler->gsGlobalSecurityCookieVal != (ssize_t)compiler->gsGlobalSecurityCookieVal)
+ {
+ genSetRegToIcon(regGSCheck, compiler->gsGlobalSecurityCookieVal, TYP_I_IMPL);
+ getEmitter()->emitIns_S_R(INS_cmp, EA_PTRSIZE, regGSCheck, compiler->lvaGSSecurityCookie, 0);
+ }
+ else
+ {
+ getEmitter()->emitIns_S_I(INS_cmp, EA_PTRSIZE, compiler->lvaGSSecurityCookie, 0,
+ (int)compiler->gsGlobalSecurityCookieVal);
+ }
+ }
+ else
+ {
+ // Ngen case - GS cookie value needs to be accessed through an indirection.
+ instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, regGSCheck, (ssize_t)compiler->gsGlobalSecurityCookieAddr);
+ getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, regGSCheck, regGSCheck, 0);
+ getEmitter()->emitIns_S_R(INS_cmp, EA_PTRSIZE, regGSCheck, compiler->lvaGSSecurityCookie, 0);
+ }
+
+ BasicBlock* gsCheckBlk = genCreateTempLabel();
+ emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
+ inst_JMP(jmpEqual, gsCheckBlk);
+ genEmitHelperCall(CORINFO_HELP_FAIL_FAST, 0, EA_UNKNOWN);
+ genDefineTempLabel(gsCheckBlk);
+}
+
+/*****************************************************************************
+ *
+ * Generate code for all the basic blocks in the function.
+ */
+
+void CodeGen::genCodeForBBlist()
+{
+ unsigned varNum;
+ LclVarDsc* varDsc;
+
+ unsigned savedStkLvl;
+
+#ifdef DEBUG
+ genInterruptibleUsed = true;
+
+ // You have to be careful if you create basic blocks from now on
+ compiler->fgSafeBasicBlockCreation = false;
+
+ // This stress mode is not comptible with fully interruptible GC
+ if (genInterruptible && compiler->opts.compStackCheckOnCall)
+ {
+ compiler->opts.compStackCheckOnCall = false;
+ }
+
+ // This stress mode is not comptible with fully interruptible GC
+ if (genInterruptible && compiler->opts.compStackCheckOnRet)
+ {
+ compiler->opts.compStackCheckOnRet = false;
+ }
+#endif // DEBUG
+
+ // Prepare the blocks for exception handling codegen: mark the blocks that needs labels.
+ genPrepForEHCodegen();
+
+ assert(!compiler->fgFirstBBScratch ||
+ compiler->fgFirstBB == compiler->fgFirstBBScratch); // compiler->fgFirstBBScratch has to be first.
+
+ /* Initialize the spill tracking logic */
+
+ regSet.rsSpillBeg();
+
+#ifdef DEBUGGING_SUPPORT
+ /* Initialize the line# tracking logic */
+
+ if (compiler->opts.compScopeInfo)
+ {
+ siInit();
+ }
+#endif
+
+ // The current implementation of switch tables requires the first block to have a label so it
+ // can generate offsets to the switch label targets.
+ // TODO-XArch-CQ: remove this when switches have been re-implemented to not use this.
+ if (compiler->fgHasSwitch)
+ {
+ compiler->fgFirstBB->bbFlags |= BBF_JMP_TARGET;
+ }
+
+ genPendingCallLabel = nullptr;
+
+ /* Initialize the pointer tracking code */
+
+ gcInfo.gcRegPtrSetInit();
+ gcInfo.gcVarPtrSetInit();
+
+ /* If any arguments live in registers, mark those regs as such */
+
+ for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++)
+ {
+ /* Is this variable a parameter assigned to a register? */
+
+ if (!varDsc->lvIsParam || !varDsc->lvRegister)
+ {
+ continue;
+ }
+
+ /* Is the argument live on entry to the method? */
+
+ if (!VarSetOps::IsMember(compiler, compiler->fgFirstBB->bbLiveIn, varDsc->lvVarIndex))
+ {
+ continue;
+ }
+
+ /* Is this a floating-point argument? */
+
+ if (varDsc->IsFloatRegType())
+ {
+ continue;
+ }
+
+ noway_assert(!varTypeIsFloating(varDsc->TypeGet()));
+
+ /* Mark the register as holding the variable */
+
+ regTracker.rsTrackRegLclVar(varDsc->lvRegNum, varNum);
+ }
+
+ unsigned finallyNesting = 0;
+
+ // Make sure a set is allocated for compiler->compCurLife (in the long case), so we can set it to empty without
+ // allocation at the start of each basic block.
+ VarSetOps::AssignNoCopy(compiler, compiler->compCurLife, VarSetOps::MakeEmpty(compiler));
+
+ /*-------------------------------------------------------------------------
+ *
+ * Walk the basic blocks and generate code for each one
+ *
+ */
+
+ BasicBlock* block;
+ BasicBlock* lblk; /* previous block */
+
+ for (lblk = nullptr, block = compiler->fgFirstBB; block != nullptr; lblk = block, block = block->bbNext)
+ {
+#ifdef DEBUG
+ if (compiler->verbose)
+ {
+ printf("\n=============== Generating ");
+ block->dspBlockHeader(compiler, true, true);
+ compiler->fgDispBBLiveness(block);
+ }
+#endif // DEBUG
+
+ // Figure out which registers hold variables on entry to this block
+
+ regSet.ClearMaskVars();
+ gcInfo.gcRegGCrefSetCur = RBM_NONE;
+ gcInfo.gcRegByrefSetCur = RBM_NONE;
+
+ compiler->m_pLinearScan->recordVarLocationsAtStartOfBB(block);
+
+ genUpdateLife(block->bbLiveIn);
+
+ // Even if liveness didn't change, we need to update the registers containing GC references.
+ // genUpdateLife will update the registers live due to liveness changes. But what about registers that didn't
+ // change? We cleared them out above. Maybe we should just not clear them out, but update the ones that change
+ // here. That would require handling the changes in recordVarLocationsAtStartOfBB().
+
+ regMaskTP newLiveRegSet = RBM_NONE;
+ regMaskTP newRegGCrefSet = RBM_NONE;
+ regMaskTP newRegByrefSet = RBM_NONE;
+#ifdef DEBUG
+ VARSET_TP VARSET_INIT_NOCOPY(removedGCVars, VarSetOps::MakeEmpty(compiler));
+ VARSET_TP VARSET_INIT_NOCOPY(addedGCVars, VarSetOps::MakeEmpty(compiler));
+#endif
+ VARSET_ITER_INIT(compiler, iter, block->bbLiveIn, varIndex);
+ while (iter.NextElem(compiler, &varIndex))
+ {
+ unsigned varNum = compiler->lvaTrackedToVarNum[varIndex];
+ LclVarDsc* varDsc = &(compiler->lvaTable[varNum]);
+
+ if (varDsc->lvIsInReg())
+ {
+ newLiveRegSet |= varDsc->lvRegMask();
+ if (varDsc->lvType == TYP_REF)
+ {
+ newRegGCrefSet |= varDsc->lvRegMask();
+ }
+ else if (varDsc->lvType == TYP_BYREF)
+ {
+ newRegByrefSet |= varDsc->lvRegMask();
+ }
+#ifdef DEBUG
+ if (verbose && VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varIndex))
+ {
+ VarSetOps::AddElemD(compiler, removedGCVars, varIndex);
+ }
+#endif // DEBUG
+ VarSetOps::RemoveElemD(compiler, gcInfo.gcVarPtrSetCur, varIndex);
+ }
+ else if (compiler->lvaIsGCTracked(varDsc))
+ {
+#ifdef DEBUG
+ if (verbose && !VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varIndex))
+ {
+ VarSetOps::AddElemD(compiler, addedGCVars, varIndex);
+ }
+#endif // DEBUG
+ VarSetOps::AddElemD(compiler, gcInfo.gcVarPtrSetCur, varIndex);
+ }
+ }
+
+ regSet.rsMaskVars = newLiveRegSet;
+
+#ifdef DEBUG
+ if (compiler->verbose)
+ {
+ if (!VarSetOps::IsEmpty(compiler, addedGCVars))
+ {
+ printf("\t\t\t\t\t\t\tAdded GCVars: ");
+ dumpConvertedVarSet(compiler, addedGCVars);
+ printf("\n");
+ }
+ if (!VarSetOps::IsEmpty(compiler, removedGCVars))
+ {
+ printf("\t\t\t\t\t\t\tRemoved GCVars: ");
+ dumpConvertedVarSet(compiler, removedGCVars);
+ printf("\n");
+ }
+ }
+#endif // DEBUG
+
+ gcInfo.gcMarkRegSetGCref(newRegGCrefSet DEBUGARG(true));
+ gcInfo.gcMarkRegSetByref(newRegByrefSet DEBUGARG(true));
+
+ /* Blocks with handlerGetsXcptnObj()==true use GT_CATCH_ARG to
+ represent the exception object (TYP_REF).
+ We mark REG_EXCEPTION_OBJECT as holding a GC object on entry
+ to the block, it will be the first thing evaluated
+ (thanks to GTF_ORDER_SIDEEFF).
+ */
+
+ if (handlerGetsXcptnObj(block->bbCatchTyp))
+ {
+ for (GenTree* node : LIR::AsRange(block))
+ {
+ if (node->OperGet() == GT_CATCH_ARG)
+ {
+ gcInfo.gcMarkRegSetGCref(RBM_EXCEPTION_OBJECT);
+ break;
+ }
+ }
+ }
+
+ /* Start a new code output block */
+
+ genUpdateCurrentFunclet(block);
+
+ if (genAlignLoops && block->bbFlags & BBF_LOOP_HEAD)
+ {
+ getEmitter()->emitLoopAlign();
+ }
+
+#ifdef DEBUG
+ if (compiler->opts.dspCode)
+ {
+ printf("\n L_M%03u_BB%02u:\n", Compiler::s_compMethodsCount, block->bbNum);
+ }
+#endif
+
+ block->bbEmitCookie = nullptr;
+
+ if (block->bbFlags & (BBF_JMP_TARGET | BBF_HAS_LABEL))
+ {
+ /* Mark a label and update the current set of live GC refs */
+
+ block->bbEmitCookie = getEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
+ gcInfo.gcRegByrefSetCur, FALSE);
+ }
+
+ if (block == compiler->fgFirstColdBlock)
+ {
+#ifdef DEBUG
+ if (compiler->verbose)
+ {
+ printf("\nThis is the start of the cold region of the method\n");
+ }
+#endif
+ // We should never have a block that falls through into the Cold section
+ noway_assert(!lblk->bbFallsThrough());
+
+ // We require the block that starts the Cold section to have a label
+ noway_assert(block->bbEmitCookie);
+ getEmitter()->emitSetFirstColdIGCookie(block->bbEmitCookie);
+ }
+
+ /* Both stacks are always empty on entry to a basic block */
+
+ genStackLevel = 0;
+
+ savedStkLvl = genStackLevel;
+
+ /* Tell everyone which basic block we're working on */
+
+ compiler->compCurBB = block;
+
+#ifdef DEBUGGING_SUPPORT
+ siBeginBlock(block);
+
+ // BBF_INTERNAL blocks don't correspond to any single IL instruction.
+ if (compiler->opts.compDbgInfo && (block->bbFlags & BBF_INTERNAL) &&
+ !compiler->fgBBisScratch(block)) // If the block is the distinguished first scratch block, then no need to
+ // emit a NO_MAPPING entry, immediately after the prolog.
+ {
+ genIPmappingAdd((IL_OFFSETX)ICorDebugInfo::NO_MAPPING, true);
+ }
+
+ bool firstMapping = true;
+#endif // DEBUGGING_SUPPORT
+
+ /*---------------------------------------------------------------------
+ *
+ * Generate code for each statement-tree in the block
+ *
+ */
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if FEATURE_EH_FUNCLETS
+ if (block->bbFlags & BBF_FUNCLET_BEG)
+ {
+ genReserveFuncletProlog(block);
+ }
+#endif // FEATURE_EH_FUNCLETS
+
+ // Clear compCurStmt and compCurLifeTree.
+ compiler->compCurStmt = nullptr;
+ compiler->compCurLifeTree = nullptr;
+
+ // Traverse the block in linear order, generating code for each node as we
+ // as we encounter it.
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef DEBUGGING_SUPPORT
+ IL_OFFSETX currentILOffset = BAD_IL_OFFSET;
+#endif
+ for (GenTree* node : LIR::AsRange(block).NonPhiNodes())
+ {
+#ifdef DEBUGGING_SUPPORT
+ // Do we have a new IL offset?
+ if (node->OperGet() == GT_IL_OFFSET)
+ {
+ genEnsureCodeEmitted(currentILOffset);
+ currentILOffset = node->gtStmt.gtStmtILoffsx;
+ genIPmappingAdd(currentILOffset, firstMapping);
+ firstMapping = false;
+ }
+#endif // DEBUGGING_SUPPORT
+
+#ifdef DEBUG
+ if (node->OperGet() == GT_IL_OFFSET)
+ {
+ noway_assert(node->gtStmt.gtStmtLastILoffs <= compiler->info.compILCodeSize ||
+ node->gtStmt.gtStmtLastILoffs == BAD_IL_OFFSET);
+
+ if (compiler->opts.dspCode && compiler->opts.dspInstrs &&
+ node->gtStmt.gtStmtLastILoffs != BAD_IL_OFFSET)
+ {
+ while (genCurDispOffset <= node->gtStmt.gtStmtLastILoffs)
+ {
+ genCurDispOffset += dumpSingleInstr(compiler->info.compCode, genCurDispOffset, "> ");
+ }
+ }
+ }
+#endif // DEBUG
+
+ genCodeForTreeNode(node);
+ if (node->gtHasReg() && node->gtLsraInfo.isLocalDefUse)
+ {
+ genConsumeReg(node);
+ }
+ } // end for each node in block
+
+#ifdef DEBUG
+ // The following set of register spill checks and GC pointer tracking checks used to be
+ // performed at statement boundaries. Now, with LIR, there are no statements, so they are
+ // performed at the end of each block.
+ // TODO: could these checks be performed more frequently? E.g., at each location where
+ // the register allocator says there are no live non-variable registers. Perhaps this could
+ // be done by (a) keeping a running count of live non-variable registers by using
+ // gtLsraInfo.srcCount and gtLsraInfo.dstCount to decrement and increment the count, respectively,
+ // and running the checks when the count is zero. Or, (b) use the map maintained by LSRA
+ // (operandToLocationInfoMap) to mark a node somehow when, after the execution of that node,
+ // there will be no live non-variable registers.
+
+ regSet.rsSpillChk();
+
+ /* Make sure we didn't bungle pointer register tracking */
+
+ regMaskTP ptrRegs = gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur;
+ regMaskTP nonVarPtrRegs = ptrRegs & ~regSet.rsMaskVars;
+
+ // If return is a GC-type, clear it. Note that if a common
+ // epilog is generated (genReturnBB) it has a void return
+ // even though we might return a ref. We can't use the compRetType
+ // as the determiner because something we are tracking as a byref
+ // might be used as a return value of a int function (which is legal)
+ GenTree* blockLastNode = block->lastNode();
+ if ((blockLastNode != nullptr) && (blockLastNode->gtOper == GT_RETURN) &&
+ (varTypeIsGC(compiler->info.compRetType) ||
+ (blockLastNode->gtOp.gtOp1 != nullptr && varTypeIsGC(blockLastNode->gtOp.gtOp1->TypeGet()))))
+ {
+ nonVarPtrRegs &= ~RBM_INTRET;
+ }
+
+ if (nonVarPtrRegs)
+ {
+ printf("Regset after BB%02u gcr=", block->bbNum);
+ printRegMaskInt(gcInfo.gcRegGCrefSetCur & ~regSet.rsMaskVars);
+ compiler->getEmitter()->emitDispRegSet(gcInfo.gcRegGCrefSetCur & ~regSet.rsMaskVars);
+ printf(", byr=");
+ printRegMaskInt(gcInfo.gcRegByrefSetCur & ~regSet.rsMaskVars);
+ compiler->getEmitter()->emitDispRegSet(gcInfo.gcRegByrefSetCur & ~regSet.rsMaskVars);
+ printf(", regVars=");
+ printRegMaskInt(regSet.rsMaskVars);
+ compiler->getEmitter()->emitDispRegSet(regSet.rsMaskVars);
+ printf("\n");
+ }
+
+ noway_assert(nonVarPtrRegs == RBM_NONE);
+#endif // DEBUG
+
+#if defined(DEBUG) && defined(LATE_DISASM) && defined(_TARGET_AMD64_)
+ if (block->bbNext == nullptr)
+ {
+ // Unit testing of the AMD64 emitter: generate a bunch of instructions into the last block
+ // (it's as good as any, but better than the prolog, which can only be a single instruction
+ // group) then use COMPlus_JitLateDisasm=* to see if the late disassembler
+ // thinks the instructions are the same as we do.
+ genAmd64EmitterUnitTests();
+ }
+#endif // defined(DEBUG) && defined(LATE_DISASM) && defined(_TARGET_ARM64_)
+
+#ifdef DEBUGGING_SUPPORT
+ // It is possible to reach the end of the block without generating code for the current IL offset.
+ // For example, if the following IR ends the current block, no code will have been generated for
+ // offset 21:
+ //
+ // ( 0, 0) [000040] ------------ il_offset void IL offset: 21
+ //
+ // N001 ( 0, 0) [000039] ------------ nop void
+ //
+ // This can lead to problems when debugging the generated code. To prevent these issues, make sure
+ // we've generated code for the last IL offset we saw in the block.
+ genEnsureCodeEmitted(currentILOffset);
+
+ if (compiler->opts.compScopeInfo && (compiler->info.compVarScopesCount > 0))
+ {
+ siEndBlock(block);
+
+ /* Is this the last block, and are there any open scopes left ? */
+
+ bool isLastBlockProcessed = (block->bbNext == nullptr);
+ if (block->isBBCallAlwaysPair())
+ {
+ isLastBlockProcessed = (block->bbNext->bbNext == nullptr);
+ }
+
+ if (isLastBlockProcessed && siOpenScopeList.scNext)
+ {
+ /* This assert no longer holds, because we may insert a throw
+ block to demarcate the end of a try or finally region when they
+ are at the end of the method. It would be nice if we could fix
+ our code so that this throw block will no longer be necessary. */
+
+ // noway_assert(block->bbCodeOffsEnd != compiler->info.compILCodeSize);
+
+ siCloseAllOpenScopes();
+ }
+ }
+
+#endif // DEBUGGING_SUPPORT
+
+ genStackLevel -= savedStkLvl;
+
+#ifdef DEBUG
+ // compCurLife should be equal to the liveOut set, except that we don't keep
+ // it up to date for vars that are not register candidates
+ // (it would be nice to have a xor set function)
+
+ VARSET_TP VARSET_INIT_NOCOPY(extraLiveVars, VarSetOps::Diff(compiler, block->bbLiveOut, compiler->compCurLife));
+ VarSetOps::UnionD(compiler, extraLiveVars, VarSetOps::Diff(compiler, compiler->compCurLife, block->bbLiveOut));
+ VARSET_ITER_INIT(compiler, extraLiveVarIter, extraLiveVars, extraLiveVarIndex);
+ while (extraLiveVarIter.NextElem(compiler, &extraLiveVarIndex))
+ {
+ unsigned varNum = compiler->lvaTrackedToVarNum[extraLiveVarIndex];
+ LclVarDsc* varDsc = compiler->lvaTable + varNum;
+ assert(!varDsc->lvIsRegCandidate());
+ }
+#endif
+
+ /* Both stacks should always be empty on exit from a basic block */
+ noway_assert(genStackLevel == 0);
+
+#ifdef _TARGET_AMD64_
+ // On AMD64, we need to generate a NOP after a call that is the last instruction of the block, in several
+ // situations, to support proper exception handling semantics. This is mostly to ensure that when the stack
+ // walker computes an instruction pointer for a frame, that instruction pointer is in the correct EH region.
+ // The document "X64 and ARM ABIs.docx" has more details. The situations:
+ // 1. If the call instruction is in a different EH region as the instruction that follows it.
+ // 2. If the call immediately precedes an OS epilog. (Note that what the JIT or VM consider an epilog might
+ // be slightly different from what the OS considers an epilog, and it is the OS-reported epilog that matters
+ // here.)
+ // We handle case #1 here, and case #2 in the emitter.
+ if (getEmitter()->emitIsLastInsCall())
+ {
+ // Ok, the last instruction generated is a call instruction. Do any of the other conditions hold?
+ // Note: we may be generating a few too many NOPs for the case of call preceding an epilog. Technically,
+ // if the next block is a BBJ_RETURN, an epilog will be generated, but there may be some instructions
+ // generated before the OS epilog starts, such as a GS cookie check.
+ if ((block->bbNext == nullptr) || !BasicBlock::sameEHRegion(block, block->bbNext))
+ {
+ // We only need the NOP if we're not going to generate any more code as part of the block end.
+
+ switch (block->bbJumpKind)
+ {
+ case BBJ_ALWAYS:
+ case BBJ_THROW:
+ case BBJ_CALLFINALLY:
+ case BBJ_EHCATCHRET:
+ // We're going to generate more code below anyway, so no need for the NOP.
+
+ case BBJ_RETURN:
+ case BBJ_EHFINALLYRET:
+ case BBJ_EHFILTERRET:
+ // These are the "epilog follows" case, handled in the emitter.
+
+ break;
+
+ case BBJ_NONE:
+ if (block->bbNext == nullptr)
+ {
+ // Call immediately before the end of the code; we should never get here .
+ instGen(INS_BREAKPOINT); // This should never get executed
+ }
+ else
+ {
+ // We need the NOP
+ instGen(INS_nop);
+ }
+ break;
+
+ case BBJ_COND:
+ case BBJ_SWITCH:
+ // These can't have a call as the last instruction!
+
+ default:
+ noway_assert(!"Unexpected bbJumpKind");
+ break;
+ }
+ }
+ }
+#endif // _TARGET_AMD64_
+
+ /* Do we need to generate a jump or return? */
+
+ switch (block->bbJumpKind)
+ {
+ case BBJ_ALWAYS:
+ inst_JMP(EJ_jmp, block->bbJumpDest);
+ break;
+
+ case BBJ_RETURN:
+ genExitCode(block);
+ break;
+
+ case BBJ_THROW:
+ // If we have a throw at the end of a function or funclet, we need to emit another instruction
+ // afterwards to help the OS unwinder determine the correct context during unwind.
+ // We insert an unexecuted breakpoint instruction in several situations
+ // following a throw instruction:
+ // 1. If the throw is the last instruction of the function or funclet. This helps
+ // the OS unwinder determine the correct context during an unwind from the
+ // thrown exception.
+ // 2. If this is this is the last block of the hot section.
+ // 3. If the subsequent block is a special throw block.
+ // 4. On AMD64, if the next block is in a different EH region.
+ if ((block->bbNext == nullptr) || (block->bbNext->bbFlags & BBF_FUNCLET_BEG) ||
+ !BasicBlock::sameEHRegion(block, block->bbNext) ||
+ (!isFramePointerUsed() && compiler->fgIsThrowHlpBlk(block->bbNext)) ||
+ block->bbNext == compiler->fgFirstColdBlock)
+ {
+ instGen(INS_BREAKPOINT); // This should never get executed
+ }
+
+ break;
+
+ case BBJ_CALLFINALLY:
+
+#if FEATURE_EH_FUNCLETS
+
+ // Generate a call to the finally, like this:
+ // mov rcx,qword ptr [rbp + 20H] // Load rcx with PSPSym
+ // call finally-funclet
+ // jmp finally-return // Only for non-retless finally calls
+ // The jmp can be a NOP if we're going to the next block.
+ // If we're generating code for the main function (not a funclet), and there is no localloc,
+ // then RSP at this point is the same value as that stored in the PSPsym. So just copy RSP
+ // instead of loading the PSPSym in this case.
+
+ if (!compiler->compLocallocUsed && (compiler->funCurrentFunc()->funKind == FUNC_ROOT))
+ {
+ inst_RV_RV(INS_mov, REG_ARG_0, REG_SPBASE, TYP_I_IMPL);
+ }
+ else
+ {
+ getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_ARG_0, compiler->lvaPSPSym, 0);
+ }
+ getEmitter()->emitIns_J(INS_call, block->bbJumpDest);
+
+ if (block->bbFlags & BBF_RETLESS_CALL)
+ {
+ // We have a retless call, and the last instruction generated was a call.
+ // If the next block is in a different EH region (or is the end of the code
+ // block), then we need to generate a breakpoint here (since it will never
+ // get executed) to get proper unwind behavior.
+
+ if ((block->bbNext == nullptr) || !BasicBlock::sameEHRegion(block, block->bbNext))
+ {
+ instGen(INS_BREAKPOINT); // This should never get executed
+ }
+ }
+ else
+ {
+ // Because of the way the flowgraph is connected, the liveness info for this one instruction
+ // after the call is not (can not be) correct in cases where a variable has a last use in the
+ // handler. So turn off GC reporting for this single instruction.
+ getEmitter()->emitDisableGC();
+
+ // Now go to where the finally funclet needs to return to.
+ if (block->bbNext->bbJumpDest == block->bbNext->bbNext)
+ {
+ // Fall-through.
+ // TODO-XArch-CQ: Can we get rid of this instruction, and just have the call return directly
+ // to the next instruction? This would depend on stack walking from within the finally
+ // handler working without this instruction being in this special EH region.
+ instGen(INS_nop);
+ }
+ else
+ {
+ inst_JMP(EJ_jmp, block->bbNext->bbJumpDest);
+ }
+
+ getEmitter()->emitEnableGC();
+ }
+
+#else // !FEATURE_EH_FUNCLETS
+
+ // If we are about to invoke a finally locally from a try block, we have to set the ShadowSP slot
+ // corresponding to the finally's nesting level. When invoked in response to an exception, the
+ // EE does this.
+ //
+ // We have a BBJ_CALLFINALLY followed by a BBJ_ALWAYS.
+ //
+ // We will emit :
+ // mov [ebp - (n + 1)], 0
+ // mov [ebp - n ], 0xFC
+ // push &step
+ // jmp finallyBlock
+ // ...
+ // step:
+ // mov [ebp - n ], 0
+ // jmp leaveTarget
+ // ...
+ // leaveTarget:
+
+ noway_assert(isFramePointerUsed());
+
+ // Get the nesting level which contains the finally
+ compiler->fgGetNestingLevel(block, &finallyNesting);
+
+ // The last slot is reserved for ICodeManager::FixContext(ppEndRegion)
+ unsigned filterEndOffsetSlotOffs;
+ filterEndOffsetSlotOffs =
+ (unsigned)(compiler->lvaLclSize(compiler->lvaShadowSPslotsVar) - TARGET_POINTER_SIZE);
+
+ unsigned curNestingSlotOffs;
+ curNestingSlotOffs = (unsigned)(filterEndOffsetSlotOffs - ((finallyNesting + 1) * TARGET_POINTER_SIZE));
+
+ // Zero out the slot for the next nesting level
+ instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_PTRSIZE, 0, compiler->lvaShadowSPslotsVar,
+ curNestingSlotOffs - TARGET_POINTER_SIZE);
+ instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_PTRSIZE, LCL_FINALLY_MARK, compiler->lvaShadowSPslotsVar,
+ curNestingSlotOffs);
+
+ // Now push the address where the finally funclet should return to directly.
+ if (!(block->bbFlags & BBF_RETLESS_CALL))
+ {
+ assert(block->isBBCallAlwaysPair());
+ getEmitter()->emitIns_J(INS_push_hide, block->bbNext->bbJumpDest);
+ }
+ else
+ {
+ // EE expects a DWORD, so we give him 0
+ inst_IV(INS_push_hide, 0);
+ }
+
+ // Jump to the finally BB
+ inst_JMP(EJ_jmp, block->bbJumpDest);
+
+#endif // !FEATURE_EH_FUNCLETS
+
+ // The BBJ_ALWAYS is used because the BBJ_CALLFINALLY can't point to the
+ // jump target using bbJumpDest - that is already used to point
+ // to the finally block. So just skip past the BBJ_ALWAYS unless the
+ // block is RETLESS.
+ if (!(block->bbFlags & BBF_RETLESS_CALL))
+ {
+ assert(block->isBBCallAlwaysPair());
+
+ lblk = block;
+ block = block->bbNext;
+ }
+
+ break;
+
+#if FEATURE_EH_FUNCLETS
+
+ case BBJ_EHCATCHRET:
+ // Set RAX to the address the VM should return to after the catch.
+ // Generate a RIP-relative
+ // lea reg, [rip + disp32] ; the RIP is implicit
+ // which will be position-indepenent.
+ getEmitter()->emitIns_R_L(INS_lea, EA_PTR_DSP_RELOC, block->bbJumpDest, REG_INTRET);
+ __fallthrough;
+
+ case BBJ_EHFINALLYRET:
+ case BBJ_EHFILTERRET:
+ genReserveFuncletEpilog(block);
+ break;
+
+#else // !FEATURE_EH_FUNCLETS
+
+ case BBJ_EHCATCHRET:
+ noway_assert(!"Unexpected BBJ_EHCATCHRET"); // not used on x86
+
+ case BBJ_EHFINALLYRET:
+ case BBJ_EHFILTERRET:
+ {
+ // The last statement of the block must be a GT_RETFILT, which has already been generated.
+ assert(block->lastNode() != nullptr);
+ assert(block->lastNode()->OperGet() == GT_RETFILT);
+
+ if (block->bbJumpKind == BBJ_EHFINALLYRET)
+ {
+ assert(block->lastNode()->gtOp.gtOp1 == nullptr); // op1 == nullptr means endfinally
+
+ // Return using a pop-jmp sequence. As the "try" block calls
+ // the finally with a jmp, this leaves the x86 call-ret stack
+ // balanced in the normal flow of path.
+
+ noway_assert(isFramePointerRequired());
+ inst_RV(INS_pop_hide, REG_EAX, TYP_I_IMPL);
+ inst_RV(INS_i_jmp, REG_EAX, TYP_I_IMPL);
+ }
+ else
+ {
+ assert(block->bbJumpKind == BBJ_EHFILTERRET);
+
+ // The return value has already been computed.
+ instGen_Return(0);
+ }
+ }
+ break;
+
+#endif // !FEATURE_EH_FUNCLETS
+
+ case BBJ_NONE:
+ case BBJ_COND:
+ case BBJ_SWITCH:
+ break;
+
+ default:
+ noway_assert(!"Unexpected bbJumpKind");
+ break;
+ }
+
+#ifdef DEBUG
+ compiler->compCurBB = nullptr;
+#endif
+
+ } //------------------ END-FOR each block of the method -------------------
+
+ /* Nothing is live at this point */
+ genUpdateLife(VarSetOps::MakeEmpty(compiler));
+
+ /* Finalize the spill tracking logic */
+
+ regSet.rsSpillEnd();
+
+ /* Finalize the temp tracking logic */
+
+ compiler->tmpEnd();
+
+#ifdef DEBUG
+ if (compiler->verbose)
+ {
+ printf("\n# ");
+ printf("compCycleEstimate = %6d, compSizeEstimate = %5d ", compiler->compCycleEstimate,
+ compiler->compSizeEstimate);
+ printf("%s\n", compiler->info.compFullName);
+ }
+#endif
+}
+
+// return the child that has the same reg as the dst (if any)
+// other child returned (out param) in 'other'
+GenTree* sameRegAsDst(GenTree* tree, GenTree*& other /*out*/)
+{
+ if (tree->gtRegNum == REG_NA)
+ {
+ other = nullptr;
+ return nullptr;
+ }
+
+ GenTreePtr op1 = tree->gtOp.gtOp1;
+ GenTreePtr op2 = tree->gtOp.gtOp2;
+ if (op1->gtRegNum == tree->gtRegNum)
+ {
+ other = op2;
+ return op1;
+ }
+ if (op2->gtRegNum == tree->gtRegNum)
+ {
+ other = op1;
+ return op2;
+ }
+ else
+ {
+ other = nullptr;
+ return nullptr;
+ }
+}
+
+// Move an immediate value into an integer register
+
+void CodeGen::instGen_Set_Reg_To_Imm(emitAttr size, regNumber reg, ssize_t imm, insFlags flags)
+{
+ // reg cannot be a FP register
+ assert(!genIsValidFloatReg(reg));
+
+ if (!compiler->opts.compReloc)
+ {
+ size = EA_SIZE(size); // Strip any Reloc flags from size if we aren't doing relocs
+ }
+
+ if ((imm == 0) && !EA_IS_RELOC(size))
+ {
+ instGen_Set_Reg_To_Zero(size, reg, flags);
+ }
+ else
+ {
+ if (genDataIndirAddrCanBeEncodedAsPCRelOffset(imm))
+ {
+ getEmitter()->emitIns_R_AI(INS_lea, EA_PTR_DSP_RELOC, reg, imm);
+ }
+ else
+ {
+ getEmitter()->emitIns_R_I(INS_mov, size, reg, imm);
+ }
+ }
+ regTracker.rsTrackRegIntCns(reg, imm);
+}
+
+/***********************************************************************************
+ *
+ * Generate code to set a register 'targetReg' of type 'targetType' to the constant
+ * specified by the constant (GT_CNS_INT or GT_CNS_DBL) in 'tree'. This does not call
+ * genProduceReg() on the target register.
+ */
+void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTreePtr tree)
+{
+
+ switch (tree->gtOper)
+ {
+ case GT_CNS_INT:
+ {
+ // relocatable values tend to come down as a CNS_INT of native int type
+ // so the line between these two opcodes is kind of blurry
+ GenTreeIntConCommon* con = tree->AsIntConCommon();
+ ssize_t cnsVal = con->IconValue();
+
+ if (con->ImmedValNeedsReloc(compiler))
+ {
+ instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, targetReg, cnsVal);
+ regTracker.rsTrackRegTrash(targetReg);
+ }
+ else
+ {
+ genSetRegToIcon(targetReg, cnsVal, targetType);
+ }
+ }
+ break;
+
+ case GT_CNS_DBL:
+ {
+ double constValue = tree->gtDblCon.gtDconVal;
+
+ // Make sure we use "xorpd reg, reg" only for +ve zero constant (0.0) and not for -ve zero (-0.0)
+ if (*(__int64*)&constValue == 0)
+ {
+ // A faster/smaller way to generate 0
+ instruction ins = genGetInsForOper(GT_XOR, targetType);
+ inst_RV_RV(ins, targetReg, targetReg, targetType);
+ }
+ else
+ {
+ GenTreePtr cns;
+ if (targetType == TYP_FLOAT)
+ {
+ float f = forceCastToFloat(constValue);
+ cns = genMakeConst(&f, targetType, tree, false);
+ }
+ else
+ {
+ cns = genMakeConst(&constValue, targetType, tree, true);
+ }
+
+ inst_RV_TT(ins_Load(targetType), targetReg, cns);
+ }
+ }
+ break;
+
+ default:
+ unreached();
+ }
+}
+
+// Generate code to get the high N bits of a N*N=2N bit multiplication result
+void CodeGen::genCodeForMulHi(GenTreeOp* treeNode)
+{
+ assert(!(treeNode->gtFlags & GTF_UNSIGNED));
+ assert(!treeNode->gtOverflowEx());
+
+ regNumber targetReg = treeNode->gtRegNum;
+ var_types targetType = treeNode->TypeGet();
+ emitter* emit = getEmitter();
+ emitAttr size = emitTypeSize(treeNode);
+ GenTree* op1 = treeNode->gtOp.gtOp1;
+ GenTree* op2 = treeNode->gtOp.gtOp2;
+
+ // to get the high bits of the multiply, we are constrained to using the
+ // 1-op form: RDX:RAX = RAX * rm
+ // The 3-op form (Rx=Ry*Rz) does not support it.
+
+ genConsumeOperands(treeNode->AsOp());
+
+ GenTree* regOp = op1;
+ GenTree* rmOp = op2;
+
+ // Set rmOp to the contained memory operand (if any)
+ //
+ if (op1->isContained() || (!op2->isContained() && (op2->gtRegNum == targetReg)))
+ {
+ regOp = op2;
+ rmOp = op1;
+ }
+ assert(!regOp->isContained());
+
+ // Setup targetReg when neither of the source operands was a matching register
+ if (regOp->gtRegNum != targetReg)
+ {
+ inst_RV_RV(ins_Copy(targetType), targetReg, regOp->gtRegNum, targetType);
+ }
+
+ emit->emitInsBinary(INS_imulEAX, size, treeNode, rmOp);
+
+ // Move the result to the desired register, if necessary
+ if (targetReg != REG_RDX)
+ {
+ inst_RV_RV(INS_mov, targetReg, REG_RDX, targetType);
+ }
+}
+
+// generate code for a DIV or MOD operation
+//
+void CodeGen::genCodeForDivMod(GenTreeOp* treeNode)
+{
+ GenTree* dividend = treeNode->gtOp1;
+ GenTree* divisor = treeNode->gtOp2;
+ genTreeOps oper = treeNode->OperGet();
+ emitAttr size = emitTypeSize(treeNode);
+ regNumber targetReg = treeNode->gtRegNum;
+ var_types targetType = treeNode->TypeGet();
+ emitter* emit = getEmitter();
+
+ // dividend is not contained.
+ assert(!dividend->isContained());
+
+ genConsumeOperands(treeNode->AsOp());
+ if (varTypeIsFloating(targetType))
+ {
+ // divisor is not contained or if contained is a memory op.
+ // Note that a reg optional operand is a treated as a memory op
+ // if no register is allocated to it.
+ assert(!divisor->isContained() || divisor->isMemoryOp() || divisor->IsCnsFltOrDbl() ||
+ divisor->IsRegOptional());
+
+ // Floating point div/rem operation
+ assert(oper == GT_DIV || oper == GT_MOD);
+
+ if (dividend->gtRegNum == targetReg)
+ {
+ emit->emitInsBinary(genGetInsForOper(treeNode->gtOper, targetType), size, treeNode, divisor);
+ }
+ else if (!divisor->isContained() && divisor->gtRegNum == targetReg)
+ {
+ // It is not possible to generate 2-operand divss or divsd where reg2 = reg1 / reg2
+ // because divss/divsd reg1, reg2 will over-write reg1. Therefore, in case of AMD64
+ // LSRA has to make sure that such a register assignment is not generated for floating
+ // point div/rem operations.
+ noway_assert(
+ !"GT_DIV/GT_MOD (float): case of reg2 = reg1 / reg2, LSRA should never generate such a reg assignment");
+ }
+ else
+ {
+ inst_RV_RV(ins_Copy(targetType), targetReg, dividend->gtRegNum, targetType);
+ emit->emitInsBinary(genGetInsForOper(treeNode->gtOper, targetType), size, treeNode, divisor);
+ }
+ }
+ else
+ {
+ // dividend must be in RAX
+ if (dividend->gtRegNum != REG_RAX)
+ {
+ inst_RV_RV(INS_mov, REG_RAX, dividend->gtRegNum, targetType);
+ }
+
+ // zero or sign extend rax to rdx
+ if (oper == GT_UMOD || oper == GT_UDIV)
+ {
+ instGen_Set_Reg_To_Zero(EA_PTRSIZE, REG_EDX);
+ }
+ else
+ {
+ emit->emitIns(INS_cdq, size);
+ // the cdq instruction writes RDX, So clear the gcInfo for RDX
+ gcInfo.gcMarkRegSetNpt(RBM_RDX);
+ }
+
+ // Perform the 'targetType' (64-bit or 32-bit) divide instruction
+ instruction ins;
+ if (oper == GT_UMOD || oper == GT_UDIV)
+ {
+ ins = INS_div;
+ }
+ else
+ {
+ ins = INS_idiv;
+ }
+
+ emit->emitInsBinary(ins, size, treeNode, divisor);
+
+ // DIV/IDIV instructions always store the quotient in RAX and the remainder in RDX.
+ // Move the result to the desired register, if necessary
+ if (oper == GT_DIV || oper == GT_UDIV)
+ {
+ if (targetReg != REG_RAX)
+ {
+ inst_RV_RV(INS_mov, targetReg, REG_RAX, targetType);
+ }
+ }
+ else
+ {
+ assert((oper == GT_MOD) || (oper == GT_UMOD));
+ if (targetReg != REG_RDX)
+ {
+ inst_RV_RV(INS_mov, targetReg, REG_RDX, targetType);
+ }
+ }
+ }
+ genProduceReg(treeNode);
+}
+
+//------------------------------------------------------------------------
+// genCodeForBinary: Generate code for many binary arithmetic operators
+// This method is expected to have called genConsumeOperands() before calling it.
+//
+// Arguments:
+// treeNode - The binary operation for which we are generating code.
+//
+// Return Value:
+// None.
+//
+// Notes:
+// Mul and div variants have special constraints on x64 so are not handled here.
+// See teh assert below for the operators that are handled.
+
+void CodeGen::genCodeForBinary(GenTree* treeNode)
+{
+ const genTreeOps oper = treeNode->OperGet();
+ regNumber targetReg = treeNode->gtRegNum;
+ var_types targetType = treeNode->TypeGet();
+ emitter* emit = getEmitter();
+
+#if defined(_TARGET_64BIT_)
+ assert(oper == GT_OR || oper == GT_XOR || oper == GT_AND || oper == GT_ADD || oper == GT_SUB);
+#else // !defined(_TARGET_64BIT_)
+ assert(oper == GT_OR || oper == GT_XOR || oper == GT_AND || oper == GT_ADD_LO || oper == GT_ADD_HI ||
+ oper == GT_SUB_LO || oper == GT_SUB_HI || oper == GT_MUL_HI || oper == GT_DIV_HI || oper == GT_MOD_HI ||
+ oper == GT_ADD || oper == GT_SUB);
+#endif // !defined(_TARGET_64BIT_)
+
+ GenTreePtr op1 = treeNode->gtGetOp1();
+ GenTreePtr op2 = treeNode->gtGetOp2();
+
+ // Commutative operations can mark op1 as contained to generate "op reg, memop/immed"
+ if (op1->isContained())
+ {
+ assert(treeNode->OperIsCommutative());
+ assert(op1->isMemoryOp() || op1->IsCnsNonZeroFltOrDbl() || op1->IsIntCnsFitsInI32() || op1->IsRegOptional());
+
+ op1 = treeNode->gtGetOp2();
+ op2 = treeNode->gtGetOp1();
+ }
+
+ instruction ins = genGetInsForOper(treeNode->OperGet(), targetType);
+
+ // The arithmetic node must be sitting in a register (since it's not contained)
+ noway_assert(targetReg != REG_NA);
+
+ regNumber op1reg = op1->isContained() ? REG_NA : op1->gtRegNum;
+ regNumber op2reg = op2->isContained() ? REG_NA : op2->gtRegNum;
+
+ GenTreePtr dst;
+ GenTreePtr src;
+
+ // This is the case of reg1 = reg1 op reg2
+ // We're ready to emit the instruction without any moves
+ if (op1reg == targetReg)
+ {
+ dst = op1;
+ src = op2;
+ }
+ // We have reg1 = reg2 op reg1
+ // In order for this operation to be correct
+ // we need that op is a commutative operation so
+ // we can convert it into reg1 = reg1 op reg2 and emit
+ // the same code as above
+ else if (op2reg == targetReg)
+ {
+ noway_assert(GenTree::OperIsCommutative(oper));
+ dst = op2;
+ src = op1;
+ }
+ // now we know there are 3 different operands so attempt to use LEA
+ else if (oper == GT_ADD && !varTypeIsFloating(treeNode) && !treeNode->gtOverflowEx() // LEA does not set flags
+ && (op2->isContainedIntOrIImmed() || !op2->isContained()))
+ {
+ if (op2->isContainedIntOrIImmed())
+ {
+ emit->emitIns_R_AR(INS_lea, emitTypeSize(treeNode), targetReg, op1reg,
+ (int)op2->AsIntConCommon()->IconValue());
+ }
+ else
+ {
+ assert(op2reg != REG_NA);
+ emit->emitIns_R_ARX(INS_lea, emitTypeSize(treeNode), targetReg, op1reg, op2reg, 1, 0);
+ }
+ genProduceReg(treeNode);
+ return;
+ }
+ // dest, op1 and op2 registers are different:
+ // reg3 = reg1 op reg2
+ // We can implement this by issuing a mov:
+ // reg3 = reg1
+ // reg3 = reg3 op reg2
+ else
+ {
+ inst_RV_RV(ins_Copy(targetType), targetReg, op1reg, targetType);
+ regTracker.rsTrackRegCopy(targetReg, op1reg);
+ gcInfo.gcMarkRegPtrVal(targetReg, targetType);
+ dst = treeNode;
+ src = op2;
+ }
+
+ // try to use an inc or dec
+ if (oper == GT_ADD && !varTypeIsFloating(treeNode) && src->isContainedIntOrIImmed() && !treeNode->gtOverflowEx())
+ {
+ if (src->IsIntegralConst(1))
+ {
+ emit->emitIns_R(INS_inc, emitTypeSize(treeNode), targetReg);
+ genProduceReg(treeNode);
+ return;
+ }
+ else if (src->IsIntegralConst(-1))
+ {
+ emit->emitIns_R(INS_dec, emitTypeSize(treeNode), targetReg);
+ genProduceReg(treeNode);
+ return;
+ }
+ }
+ regNumber r = emit->emitInsBinary(ins, emitTypeSize(treeNode), dst, src);
+ noway_assert(r == targetReg);
+
+ if (treeNode->gtOverflowEx())
+ {
+#if !defined(_TARGET_64BIT_)
+ assert(oper == GT_ADD || oper == GT_SUB || oper == GT_ADD_HI || oper == GT_SUB_HI);
+#else
+ assert(oper == GT_ADD || oper == GT_SUB);
+#endif
+ genCheckOverflow(treeNode);
+ }
+ genProduceReg(treeNode);
+}
+
+//------------------------------------------------------------------------
+// isStructReturn: Returns whether the 'treeNode' is returning a struct.
+//
+// Arguments:
+// treeNode - The tree node to evaluate whether is a struct return.
+//
+// Return Value:
+// For AMD64 *nix: returns true if the 'treeNode" is a GT_RETURN node, of type struct.
+// Otherwise returns false.
+// For other platforms always returns false.
+//
+bool CodeGen::isStructReturn(GenTreePtr treeNode)
+{
+ // This method could be called for 'treeNode' of GT_RET_FILT or GT_RETURN.
+ // For the GT_RET_FILT, the return is always
+ // a bool or a void, for the end of a finally block.
+ noway_assert(treeNode->OperGet() == GT_RETURN || treeNode->OperGet() == GT_RETFILT);
+ if (treeNode->OperGet() != GT_RETURN)
+ {
+ return false;
+ }
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ return varTypeIsStruct(treeNode);
+#else // !FEATURE_UNIX_AMD64_STRUCT_PASSING
+ assert(!varTypeIsStruct(treeNode));
+ return false;
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+}
+
+//------------------------------------------------------------------------
+// genStructReturn: Generates code for returning a struct.
+//
+// Arguments:
+// treeNode - The GT_RETURN tree node.
+//
+// Return Value:
+// None
+//
+// Assumption:
+// op1 of GT_RETURN node is either GT_LCL_VAR or multi-reg GT_CALL
+void CodeGen::genStructReturn(GenTreePtr treeNode)
+{
+ assert(treeNode->OperGet() == GT_RETURN);
+ GenTreePtr op1 = treeNode->gtGetOp1();
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ if (op1->OperGet() == GT_LCL_VAR)
+ {
+ GenTreeLclVarCommon* lclVar = op1->AsLclVarCommon();
+ LclVarDsc* varDsc = &(compiler->lvaTable[lclVar->gtLclNum]);
+ assert(varDsc->lvIsMultiRegRet);
+
+ ReturnTypeDesc retTypeDesc;
+ retTypeDesc.InitializeStructReturnType(compiler, varDsc->lvVerTypeInfo.GetClassHandle());
+ unsigned regCount = retTypeDesc.GetReturnRegCount();
+ assert(regCount == MAX_RET_REG_COUNT);
+
+ if (varTypeIsEnregisterableStruct(op1))
+ {
+ // Right now the only enregistrable structs supported are SIMD vector types.
+ assert(varTypeIsSIMD(op1));
+ assert(!op1->isContained());
+
+ // This is a case of operand is in a single reg and needs to be
+ // returned in multiple ABI return registers.
+ regNumber opReg = genConsumeReg(op1);
+ regNumber reg0 = retTypeDesc.GetABIReturnReg(0);
+ regNumber reg1 = retTypeDesc.GetABIReturnReg(1);
+
+ if (opReg != reg0 && opReg != reg1)
+ {
+ // Operand reg is different from return regs.
+ // Copy opReg to reg0 and let it to be handled by one of the
+ // two cases below.
+ inst_RV_RV(ins_Copy(TYP_DOUBLE), reg0, opReg, TYP_DOUBLE);
+ opReg = reg0;
+ }
+
+ if (opReg == reg0)
+ {
+ assert(opReg != reg1);
+
+ // reg0 - already has required 8-byte in bit position [63:0].
+ // reg1 = opReg.
+ // swap upper and lower 8-bytes of reg1 so that desired 8-byte is in bit position [63:0].
+ inst_RV_RV(ins_Copy(TYP_DOUBLE), reg1, opReg, TYP_DOUBLE);
+ }
+ else
+ {
+ assert(opReg == reg1);
+
+ // reg0 = opReg.
+ // swap upper and lower 8-bytes of reg1 so that desired 8-byte is in bit position [63:0].
+ inst_RV_RV(ins_Copy(TYP_DOUBLE), reg0, opReg, TYP_DOUBLE);
+ }
+ inst_RV_RV_IV(INS_shufpd, EA_16BYTE, reg1, reg1, 0x01);
+ }
+ else
+ {
+ assert(op1->isContained());
+
+ // Copy var on stack into ABI return registers
+ int offset = 0;
+ for (unsigned i = 0; i < regCount; ++i)
+ {
+ var_types type = retTypeDesc.GetReturnRegType(i);
+ regNumber reg = retTypeDesc.GetABIReturnReg(i);
+ getEmitter()->emitIns_R_S(ins_Load(type), emitTypeSize(type), reg, lclVar->gtLclNum, offset);
+ offset += genTypeSize(type);
+ }
+ }
+ }
+ else
+ {
+ assert(op1->IsMultiRegCall() || op1->IsCopyOrReloadOfMultiRegCall());
+
+ genConsumeRegs(op1);
+
+ GenTree* actualOp1 = op1->gtSkipReloadOrCopy();
+ GenTreeCall* call = actualOp1->AsCall();
+ ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc();
+ unsigned regCount = retTypeDesc->GetReturnRegCount();
+ assert(regCount == MAX_RET_REG_COUNT);
+
+ // Handle circular dependency between call allocated regs and ABI return regs.
+ //
+ // It is possible under LSRA stress that originally allocated regs of call node,
+ // say rax and rdx, are spilled and reloaded to rdx and rax respectively. But
+ // GT_RETURN needs to move values as follows: rdx->rax, rax->rdx. Similar kind
+ // kind of circular dependency could arise between xmm0 and xmm1 return regs.
+ // Codegen is expected to handle such circular dependency.
+ //
+ var_types regType0 = retTypeDesc->GetReturnRegType(0);
+ regNumber returnReg0 = retTypeDesc->GetABIReturnReg(0);
+ regNumber allocatedReg0 = call->GetRegNumByIdx(0);
+
+ var_types regType1 = retTypeDesc->GetReturnRegType(1);
+ regNumber returnReg1 = retTypeDesc->GetABIReturnReg(1);
+ regNumber allocatedReg1 = call->GetRegNumByIdx(1);
+
+ if (op1->IsCopyOrReload())
+ {
+ // GT_COPY/GT_RELOAD will have valid reg for those positions
+ // that need to be copied or reloaded.
+ regNumber reloadReg = op1->AsCopyOrReload()->GetRegNumByIdx(0);
+ if (reloadReg != REG_NA)
+ {
+ allocatedReg0 = reloadReg;
+ }
+
+ reloadReg = op1->AsCopyOrReload()->GetRegNumByIdx(1);
+ if (reloadReg != REG_NA)
+ {
+ allocatedReg1 = reloadReg;
+ }
+ }
+
+ if (allocatedReg0 == returnReg1 && allocatedReg1 == returnReg0)
+ {
+ // Circular dependency - swap allocatedReg0 and allocatedReg1
+ if (varTypeIsFloating(regType0))
+ {
+ assert(varTypeIsFloating(regType1));
+
+ // The fastest way to swap two XMM regs is using PXOR
+ inst_RV_RV(INS_pxor, allocatedReg0, allocatedReg1, TYP_DOUBLE);
+ inst_RV_RV(INS_pxor, allocatedReg1, allocatedReg0, TYP_DOUBLE);
+ inst_RV_RV(INS_pxor, allocatedReg0, allocatedReg1, TYP_DOUBLE);
+ }
+ else
+ {
+ assert(varTypeIsIntegral(regType0));
+ assert(varTypeIsIntegral(regType1));
+ inst_RV_RV(INS_xchg, allocatedReg1, allocatedReg0, TYP_I_IMPL);
+ }
+ }
+ else if (allocatedReg1 == returnReg0)
+ {
+ // Change the order of moves to correctly handle dependency.
+ if (allocatedReg1 != returnReg1)
+ {
+ inst_RV_RV(ins_Copy(regType1), returnReg1, allocatedReg1, regType1);
+ }
+
+ if (allocatedReg0 != returnReg0)
+ {
+ inst_RV_RV(ins_Copy(regType0), returnReg0, allocatedReg0, regType0);
+ }
+ }
+ else
+ {
+ // No circular dependency case.
+ if (allocatedReg0 != returnReg0)
+ {
+ inst_RV_RV(ins_Copy(regType0), returnReg0, allocatedReg0, regType0);
+ }
+
+ if (allocatedReg1 != returnReg1)
+ {
+ inst_RV_RV(ins_Copy(regType1), returnReg1, allocatedReg1, regType1);
+ }
+ }
+ }
+#else
+ unreached();
+#endif
+}
+
+//------------------------------------------------------------------------
+// genReturn: Generates code for return statement.
+// In case of struct return, delegates to the genStructReturn method.
+//
+// Arguments:
+// treeNode - The GT_RETURN or GT_RETFILT tree node.
+//
+// Return Value:
+// None
+//
+void CodeGen::genReturn(GenTreePtr treeNode)
+{
+ assert(treeNode->OperGet() == GT_RETURN || treeNode->OperGet() == GT_RETFILT);
+ GenTreePtr op1 = treeNode->gtGetOp1();
+ var_types targetType = treeNode->TypeGet();
+
+#ifdef DEBUG
+ if (targetType == TYP_VOID)
+ {
+ assert(op1 == nullptr);
+ }
+#endif
+
+#ifdef _TARGET_X86_
+ if (treeNode->TypeGet() == TYP_LONG)
+ {
+ assert(op1 != nullptr);
+ noway_assert(op1->OperGet() == GT_LONG);
+ GenTree* loRetVal = op1->gtGetOp1();
+ GenTree* hiRetVal = op1->gtGetOp2();
+ noway_assert((loRetVal->gtRegNum != REG_NA) && (hiRetVal->gtRegNum != REG_NA));
+
+ genConsumeReg(loRetVal);
+ genConsumeReg(hiRetVal);
+ if (loRetVal->gtRegNum != REG_LNGRET_LO)
+ {
+ inst_RV_RV(ins_Copy(targetType), REG_LNGRET_LO, loRetVal->gtRegNum, TYP_INT);
+ }
+ if (hiRetVal->gtRegNum != REG_LNGRET_HI)
+ {
+ inst_RV_RV(ins_Copy(targetType), REG_LNGRET_HI, hiRetVal->gtRegNum, TYP_INT);
+ }
+ }
+ else
+#endif // !defined(_TARGET_X86_)
+ {
+ if (isStructReturn(treeNode))
+ {
+ genStructReturn(treeNode);
+ }
+ else if (targetType != TYP_VOID)
+ {
+ assert(op1 != nullptr);
+ noway_assert(op1->gtRegNum != REG_NA);
+
+ // !! NOTE !! genConsumeReg will clear op1 as GC ref after it has
+ // consumed a reg for the operand. This is because the variable
+ // is dead after return. But we are issuing more instructions
+ // like "profiler leave callback" after this consumption. So
+ // if you are issuing more instructions after this point,
+ // remember to keep the variable live up until the new method
+ // exit point where it is actually dead.
+ genConsumeReg(op1);
+
+ regNumber retReg = varTypeIsFloating(treeNode) ? REG_FLOATRET : REG_INTRET;
+#ifdef _TARGET_X86_
+ if (varTypeIsFloating(treeNode))
+ {
+ // Spill the return value register from an XMM register to the stack, then load it on the x87 stack.
+ // If it already has a home location, use that. Otherwise, we need a temp.
+ if (genIsRegCandidateLocal(op1) && compiler->lvaTable[op1->gtLclVarCommon.gtLclNum].lvOnFrame)
+ {
+ // Store local variable to its home location, if necessary.
+ if ((op1->gtFlags & GTF_REG_VAL) != 0)
+ {
+ op1->gtFlags &= ~GTF_REG_VAL;
+ inst_TT_RV(ins_Store(op1->gtType,
+ compiler->isSIMDTypeLocalAligned(op1->gtLclVarCommon.gtLclNum)),
+ op1, op1->gtRegNum);
+ }
+ // Now, load it to the fp stack.
+ getEmitter()->emitIns_S(INS_fld, emitTypeSize(op1), op1->AsLclVarCommon()->gtLclNum, 0);
+ }
+ else
+ {
+ // Spill the value, which should be in a register, then load it to the fp stack.
+ // TODO-X86-CQ: Deal with things that are already in memory (don't call genConsumeReg yet).
+ op1->gtFlags |= GTF_SPILL;
+ regSet.rsSpillTree(op1->gtRegNum, op1);
+ op1->gtFlags |= GTF_SPILLED;
+ op1->gtFlags &= ~GTF_SPILL;
+
+ TempDsc* t = regSet.rsUnspillInPlace(op1, op1->gtRegNum);
+ inst_FS_ST(INS_fld, emitActualTypeSize(op1->gtType), t, 0);
+ op1->gtFlags &= ~GTF_SPILLED;
+ compiler->tmpRlsTemp(t);
+ }
+ }
+ else
+#endif // _TARGET_X86_
+ {
+ if (op1->gtRegNum != retReg)
+ {
+ inst_RV_RV(ins_Copy(targetType), retReg, op1->gtRegNum, targetType);
+ }
+ }
+ }
+ }
+
+#ifdef PROFILING_SUPPORTED
+ // !! Note !!
+ // TODO-AMD64-Unix: If the profiler hook is implemented on *nix, make sure for 2 register returned structs
+ // the RAX and RDX needs to be kept alive. Make the necessary changes in lowerxarch.cpp
+ // in the handling of the GT_RETURN statement.
+ // Such structs containing GC pointers need to be handled by calling gcInfo.gcMarkRegSetNpt
+ // for the return registers containing GC refs.
+
+ // There will be a single return block while generating profiler ELT callbacks.
+ //
+ // Reason for not materializing Leave callback as a GT_PROF_HOOK node after GT_RETURN:
+ // In flowgraph and other places assert that the last node of a block marked as
+ // GT_RETURN is either a GT_RETURN or GT_JMP or a tail call. It would be nice to
+ // maintain such an invariant irrespective of whether profiler hook needed or not.
+ // Also, there is not much to be gained by materializing it as an explicit node.
+ if (compiler->compCurBB == compiler->genReturnBB)
+ {
+ // !! NOTE !!
+ // Since we are invalidating the assumption that we would slip into the epilog
+ // right after the "return", we need to preserve the return reg's GC state
+ // across the call until actual method return.
+ if (varTypeIsGC(compiler->info.compRetType))
+ {
+ gcInfo.gcMarkRegPtrVal(REG_INTRET, compiler->info.compRetType);
+ }
+
+ genProfilingLeaveCallback();
+
+ if (varTypeIsGC(compiler->info.compRetType))
+ {
+ gcInfo.gcMarkRegSetNpt(REG_INTRET);
+ }
+ }
+#endif
+}
+
+/*****************************************************************************
+ *
+ * Generate code for a single node in the tree.
+ * Preconditions: All operands have been evaluated
+ *
+ */
+void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
+{
+ regNumber targetReg;
+#if !defined(_TARGET_64BIT_)
+ if (treeNode->TypeGet() == TYP_LONG)
+ {
+ // All long enregistered nodes will have been decomposed into their
+ // constituent lo and hi nodes.
+ targetReg = REG_NA;
+ }
+ else
+#endif // !defined(_TARGET_64BIT_)
+ {
+ targetReg = treeNode->gtRegNum;
+ }
+ var_types targetType = treeNode->TypeGet();
+ emitter* emit = getEmitter();
+
+#ifdef DEBUG
+ // Validate that all the operands for the current node are consumed in order.
+ // This is important because LSRA ensures that any necessary copies will be
+ // handled correctly.
+ lastConsumedNode = nullptr;
+ if (compiler->verbose)
+ {
+ unsigned seqNum = treeNode->gtSeqNum; // Useful for setting a conditional break in Visual Studio
+ printf("Generating: ");
+ compiler->gtDispTree(treeNode, nullptr, nullptr, true);
+ }
+#endif // DEBUG
+
+ // Is this a node whose value is already in a register? LSRA denotes this by
+ // setting the GTF_REUSE_REG_VAL flag.
+ if (treeNode->IsReuseRegVal())
+ {
+ // For now, this is only used for constant nodes.
+ assert((treeNode->OperIsConst()));
+ JITDUMP(" TreeNode is marked ReuseReg\n");
+ return;
+ }
+
+ // contained nodes are part of their parents for codegen purposes
+ // ex : immediates, most LEAs
+ if (treeNode->isContained())
+ {
+ return;
+ }
+
+ switch (treeNode->gtOper)
+ {
+ case GT_START_NONGC:
+ getEmitter()->emitDisableGC();
+ break;
+
+ case GT_PROF_HOOK:
+#ifdef PROFILING_SUPPORTED
+ // We should be seeing this only if profiler hook is needed
+ noway_assert(compiler->compIsProfilerHookNeeded());
+
+ // Right now this node is used only for tail calls. In future if
+ // we intend to use it for Enter or Leave hooks, add a data member
+ // to this node indicating the kind of profiler hook. For example,
+ // helper number can be used.
+ genProfilingLeaveCallback(CORINFO_HELP_PROF_FCN_TAILCALL);
+#endif // PROFILING_SUPPORTED
+ break;
+
+ case GT_LCLHEAP:
+ genLclHeap(treeNode);
+ break;
+
+ case GT_CNS_INT:
+#ifdef _TARGET_X86_
+ NYI_IF(treeNode->IsIconHandle(GTF_ICON_TLS_HDL), "TLS constants");
+#endif // _TARGET_X86_
+ __fallthrough;
+
+ case GT_CNS_DBL:
+ genSetRegToConst(targetReg, targetType, treeNode);
+ genProduceReg(treeNode);
+ break;
+
+ case GT_NEG:
+ case GT_NOT:
+ if (varTypeIsFloating(targetType))
+ {
+ assert(treeNode->gtOper == GT_NEG);
+ genSSE2BitwiseOp(treeNode);
+ }
+ else
+ {
+ GenTreePtr operand = treeNode->gtGetOp1();
+ assert(!operand->isContained());
+ regNumber operandReg = genConsumeReg(operand);
+
+ if (operandReg != targetReg)
+ {
+ inst_RV_RV(INS_mov, targetReg, operandReg, targetType);
+ }
+
+ instruction ins = genGetInsForOper(treeNode->OperGet(), targetType);
+ inst_RV(ins, targetReg, targetType);
+ }
+ genProduceReg(treeNode);
+ break;
+
+ case GT_OR:
+ case GT_XOR:
+ case GT_AND:
+ assert(varTypeIsIntegralOrI(treeNode));
+ __fallthrough;
+
+#if !defined(_TARGET_64BIT_)
+ case GT_ADD_LO:
+ case GT_ADD_HI:
+ case GT_SUB_LO:
+ case GT_SUB_HI:
+#endif // !defined(_TARGET_64BIT_)
+ case GT_ADD:
+ case GT_SUB:
+ genConsumeOperands(treeNode->AsOp());
+ genCodeForBinary(treeNode);
+ break;
+
+ case GT_LSH:
+ case GT_RSH:
+ case GT_RSZ:
+ case GT_ROL:
+ case GT_ROR:
+ genCodeForShift(treeNode);
+ // genCodeForShift() calls genProduceReg()
+ break;
+
+ case GT_CAST:
+#if !defined(_TARGET_64BIT_)
+ // We will NYI in DecomposeNode() if we are cast TO a long type, but we do not
+ // yet support casting FROM a long type either, and that's simpler to catch
+ // here.
+ NYI_IF(varTypeIsLong(treeNode->gtOp.gtOp1), "Casts from TYP_LONG");
+#endif // !defined(_TARGET_64BIT_)
+
+ if (varTypeIsFloating(targetType) && varTypeIsFloating(treeNode->gtOp.gtOp1))
+ {
+ // Casts float/double <--> double/float
+ genFloatToFloatCast(treeNode);
+ }
+ else if (varTypeIsFloating(treeNode->gtOp.gtOp1))
+ {
+ // Casts float/double --> int32/int64
+ genFloatToIntCast(treeNode);
+ }
+ else if (varTypeIsFloating(targetType))
+ {
+ // Casts int32/uint32/int64/uint64 --> float/double
+ genIntToFloatCast(treeNode);
+ }
+ else
+ {
+ // Casts int <--> int
+ genIntToIntCast(treeNode);
+ }
+ // The per-case functions call genProduceReg()
+ break;
+
+ case GT_LCL_VAR:
+ {
+ // lcl_vars are not defs
+ assert((treeNode->gtFlags & GTF_VAR_DEF) == 0);
+
+ GenTreeLclVarCommon* lcl = treeNode->AsLclVarCommon();
+ bool isRegCandidate = compiler->lvaTable[lcl->gtLclNum].lvIsRegCandidate();
+
+ if (isRegCandidate && !(treeNode->gtFlags & GTF_VAR_DEATH))
+ {
+ assert((treeNode->InReg()) || (treeNode->gtFlags & GTF_SPILLED));
+ }
+
+ // If this is a register candidate that has been spilled, genConsumeReg() will
+ // reload it at the point of use. Otherwise, if it's not in a register, we load it here.
+
+ if (!treeNode->InReg() && !(treeNode->gtFlags & GTF_SPILLED))
+ {
+ assert(!isRegCandidate);
+
+ emit->emitIns_R_S(ins_Load(treeNode->TypeGet(), compiler->isSIMDTypeLocalAligned(lcl->gtLclNum)),
+ emitTypeSize(treeNode), treeNode->gtRegNum, lcl->gtLclNum, 0);
+ genProduceReg(treeNode);
+ }
+ }
+ break;
+
+ case GT_LCL_FLD_ADDR:
+ case GT_LCL_VAR_ADDR:
+ // Address of a local var. This by itself should never be allocated a register.
+ // If it is worth storing the address in a register then it should be cse'ed into
+ // a temp and that would be allocated a register.
+ noway_assert(targetType == TYP_BYREF);
+ noway_assert(!treeNode->InReg());
+
+ inst_RV_TT(INS_lea, targetReg, treeNode, 0, EA_BYREF);
+ genProduceReg(treeNode);
+ break;
+
+ case GT_LCL_FLD:
+ {
+ noway_assert(targetType != TYP_STRUCT);
+ noway_assert(treeNode->gtRegNum != REG_NA);
+
+#ifdef FEATURE_SIMD
+ // Loading of TYP_SIMD12 (i.e. Vector3) field
+ if (treeNode->TypeGet() == TYP_SIMD12)
+ {
+ genLoadLclFldTypeSIMD12(treeNode);
+ break;
+ }
+#endif
+
+ emitAttr size = emitTypeSize(targetType);
+ unsigned offs = treeNode->gtLclFld.gtLclOffs;
+ unsigned varNum = treeNode->gtLclVarCommon.gtLclNum;
+ assert(varNum < compiler->lvaCount);
+
+ emit->emitIns_R_S(ins_Move_Extend(targetType, treeNode->InReg()), size, targetReg, varNum, offs);
+ }
+ genProduceReg(treeNode);
+ break;
+
+ case GT_STORE_LCL_FLD:
+ {
+ noway_assert(targetType != TYP_STRUCT);
+ noway_assert(!treeNode->InReg());
+ assert(!varTypeIsFloating(targetType) || (targetType == treeNode->gtGetOp1()->TypeGet()));
+
+#ifdef FEATURE_SIMD
+ // storing of TYP_SIMD12 (i.e. Vector3) field
+ if (treeNode->TypeGet() == TYP_SIMD12)
+ {
+ genStoreLclFldTypeSIMD12(treeNode);
+ break;
+ }
+#endif
+ GenTreePtr op1 = treeNode->gtGetOp1();
+ genConsumeRegs(op1);
+ emit->emitInsBinary(ins_Store(targetType), emitTypeSize(treeNode), treeNode, op1);
+ }
+ break;
+
+ case GT_STORE_LCL_VAR:
+ {
+ GenTreePtr op1 = treeNode->gtGetOp1();
+
+ // var = call, where call returns a multi-reg return value
+ // case is handled separately.
+ if (op1->gtSkipReloadOrCopy()->IsMultiRegCall())
+ {
+ genMultiRegCallStoreToLocal(treeNode);
+ }
+ else
+ {
+ noway_assert(targetType != TYP_STRUCT);
+ assert(!varTypeIsFloating(targetType) || (targetType == treeNode->gtGetOp1()->TypeGet()));
+
+ unsigned lclNum = treeNode->AsLclVarCommon()->gtLclNum;
+ LclVarDsc* varDsc = &(compiler->lvaTable[lclNum]);
+
+ // Ensure that lclVar nodes are typed correctly.
+ assert(!varDsc->lvNormalizeOnStore() || treeNode->TypeGet() == genActualType(varDsc->TypeGet()));
+
+#if !defined(_TARGET_64BIT_)
+ if (treeNode->TypeGet() == TYP_LONG)
+ {
+ genStoreLongLclVar(treeNode);
+ break;
+ }
+#endif // !defined(_TARGET_64BIT_)
+
+#ifdef FEATURE_SIMD
+ if (varTypeIsSIMD(targetType) && (targetReg != REG_NA) && op1->IsCnsIntOrI())
+ {
+ // This is only possible for a zero-init.
+ noway_assert(op1->IsIntegralConst(0));
+ genSIMDZero(targetType, varDsc->lvBaseType, targetReg);
+ genProduceReg(treeNode);
+ break;
+ }
+#endif // FEATURE_SIMD
+
+ genConsumeRegs(op1);
+
+ if (treeNode->gtRegNum == REG_NA)
+ {
+ // stack store
+ emit->emitInsMov(ins_Store(targetType, compiler->isSIMDTypeLocalAligned(lclNum)),
+ emitTypeSize(targetType), treeNode);
+ varDsc->lvRegNum = REG_STK;
+ }
+ else
+ {
+ bool containedOp1 = op1->isContained();
+ // Look for the case where we have a constant zero which we've marked for reuse,
+ // but which isn't actually in the register we want. In that case, it's better to create
+ // zero in the target register, because an xor is smaller than a copy. Note that we could
+ // potentially handle this in the register allocator, but we can't always catch it there
+ // because the target may not have a register allocated for it yet.
+ if (!containedOp1 && (op1->gtRegNum != treeNode->gtRegNum) &&
+ (op1->IsIntegralConst(0) || op1->IsFPZero()))
+ {
+ op1->gtRegNum = REG_NA;
+ op1->ResetReuseRegVal();
+ containedOp1 = true;
+ }
+
+ if (containedOp1)
+ {
+ // Currently, we assume that the contained source of a GT_STORE_LCL_VAR writing to a register
+ // must be a constant. However, in the future we might want to support a contained memory op.
+ // This is a bit tricky because we have to decide it's contained before register allocation,
+ // and this would be a case where, once that's done, we need to mark that node as always
+ // requiring a register - which we always assume now anyway, but once we "optimize" that
+ // we'll have to take cases like this into account.
+ assert((op1->gtRegNum == REG_NA) && op1->OperIsConst());
+ genSetRegToConst(treeNode->gtRegNum, targetType, op1);
+ }
+ else if (op1->gtRegNum != treeNode->gtRegNum)
+ {
+ assert(op1->gtRegNum != REG_NA);
+ emit->emitInsBinary(ins_Move_Extend(targetType, true), emitTypeSize(treeNode), treeNode, op1);
+ }
+ }
+ }
+
+ if (treeNode->gtRegNum != REG_NA)
+ {
+ genProduceReg(treeNode);
+ }
+ }
+ break;
+
+ case GT_RETFILT:
+ // A void GT_RETFILT is the end of a finally. For non-void filter returns we need to load the result in
+ // the return register, if it's not already there. The processing is the same as GT_RETURN.
+ if (targetType != TYP_VOID)
+ {
+ // For filters, the IL spec says the result is type int32. Further, the only specified legal values
+ // are 0 or 1, with the use of other values "undefined".
+ assert(targetType == TYP_INT);
+ }
+
+ __fallthrough;
+
+ case GT_RETURN:
+ genReturn(treeNode);
+ break;
+
+ case GT_LEA:
+ {
+ // if we are here, it is the case where there is an LEA that cannot
+ // be folded into a parent instruction
+ GenTreeAddrMode* lea = treeNode->AsAddrMode();
+ genLeaInstruction(lea);
+ }
+ // genLeaInstruction calls genProduceReg()
+ break;
+
+ case GT_IND:
+#ifdef FEATURE_SIMD
+ // Handling of Vector3 type values loaded through indirection.
+ if (treeNode->TypeGet() == TYP_SIMD12)
+ {
+ genLoadIndTypeSIMD12(treeNode);
+ break;
+ }
+#endif // FEATURE_SIMD
+
+ genConsumeAddress(treeNode->AsIndir()->Addr());
+ emit->emitInsMov(ins_Load(treeNode->TypeGet()), emitTypeSize(treeNode), treeNode);
+ genProduceReg(treeNode);
+ break;
+
+ case GT_MULHI:
+ genCodeForMulHi(treeNode->AsOp());
+ genProduceReg(treeNode);
+ break;
+
+ case GT_MUL:
+ {
+ instruction ins;
+ emitAttr size = emitTypeSize(treeNode);
+ bool isUnsignedMultiply = ((treeNode->gtFlags & GTF_UNSIGNED) != 0);
+ bool requiresOverflowCheck = treeNode->gtOverflowEx();
+
+ GenTree* op1 = treeNode->gtGetOp1();
+ GenTree* op2 = treeNode->gtGetOp2();
+
+ // there are 3 forms of x64 multiply:
+ // 1-op form with 128 result: RDX:RAX = RAX * rm
+ // 2-op form: reg *= rm
+ // 3-op form: reg = rm * imm
+
+ genConsumeOperands(treeNode->AsOp());
+
+ // This matches the 'mul' lowering in Lowering::SetMulOpCounts()
+ //
+ // immOp :: Only one operand can be an immediate
+ // rmOp :: Only one operand can be a memory op.
+ // regOp :: A register op (especially the operand that matches 'targetReg')
+ // (can be nullptr when we have both a memory op and an immediate op)
+
+ GenTree* immOp = nullptr;
+ GenTree* rmOp = op1;
+ GenTree* regOp;
+
+ if (op2->isContainedIntOrIImmed())
+ {
+ immOp = op2;
+ }
+ else if (op1->isContainedIntOrIImmed())
+ {
+ immOp = op1;
+ rmOp = op2;
+ }
+
+ if (immOp != nullptr)
+ {
+ // This must be a non-floating point operation.
+ assert(!varTypeIsFloating(treeNode));
+
+ // CQ: When possible use LEA for mul by imm 3, 5 or 9
+ ssize_t imm = immOp->AsIntConCommon()->IconValue();
+
+ if (!requiresOverflowCheck && !rmOp->isContained() && ((imm == 3) || (imm == 5) || (imm == 9)))
+ {
+ // We will use the LEA instruction to perform this multiply
+ // Note that an LEA with base=x, index=x and scale=(imm-1) computes x*imm when imm=3,5 or 9.
+ unsigned int scale = (unsigned int)(imm - 1);
+ getEmitter()->emitIns_R_ARX(INS_lea, size, targetReg, rmOp->gtRegNum, rmOp->gtRegNum, scale, 0);
+ }
+ else
+ {
+ // use the 3-op form with immediate
+ ins = getEmitter()->inst3opImulForReg(targetReg);
+ emit->emitInsBinary(ins, size, rmOp, immOp);
+ }
+ }
+ else // we have no contained immediate operand
+ {
+ regOp = op1;
+ rmOp = op2;
+
+ regNumber mulTargetReg = targetReg;
+ if (isUnsignedMultiply && requiresOverflowCheck)
+ {
+ ins = INS_mulEAX;
+ mulTargetReg = REG_RAX;
+ }
+ else
+ {
+ ins = genGetInsForOper(GT_MUL, targetType);
+ }
+
+ // Set rmOp to the contain memory operand (if any)
+ // or set regOp to the op2 when it has the matching target register for our multiply op
+ //
+ if (op1->isContained() || (!op2->isContained() && (op2->gtRegNum == mulTargetReg)))
+ {
+ regOp = op2;
+ rmOp = op1;
+ }
+ assert(!regOp->isContained());
+
+ // Setup targetReg when neither of the source operands was a matching register
+ if (regOp->gtRegNum != mulTargetReg)
+ {
+ inst_RV_RV(ins_Copy(targetType), mulTargetReg, regOp->gtRegNum, targetType);
+ }
+
+ emit->emitInsBinary(ins, size, treeNode, rmOp);
+
+ // Move the result to the desired register, if necessary
+ if ((ins == INS_mulEAX) && (targetReg != REG_RAX))
+ {
+ inst_RV_RV(INS_mov, targetReg, REG_RAX, targetType);
+ }
+ }
+
+ if (requiresOverflowCheck)
+ {
+ // Overflow checking is only used for non-floating point types
+ noway_assert(!varTypeIsFloating(treeNode));
+
+ genCheckOverflow(treeNode);
+ }
+ }
+ genProduceReg(treeNode);
+ break;
+
+ case GT_MOD:
+ case GT_UDIV:
+ case GT_UMOD:
+ // We shouldn't be seeing GT_MOD on float/double args as it should get morphed into a
+ // helper call by front-end. Similarly we shouldn't be seeing GT_UDIV and GT_UMOD
+ // on float/double args.
+ noway_assert(!varTypeIsFloating(treeNode));
+ __fallthrough;
+
+ case GT_DIV:
+ genCodeForDivMod(treeNode->AsOp());
+ break;
+
+ case GT_INTRINSIC:
+ genIntrinsic(treeNode);
+ break;
+
+#ifdef FEATURE_SIMD
+ case GT_SIMD:
+ genSIMDIntrinsic(treeNode->AsSIMD());
+ break;
+#endif // FEATURE_SIMD
+
+ case GT_CKFINITE:
+ genCkfinite(treeNode);
+ break;
+
+ case GT_EQ:
+ case GT_NE:
+ case GT_LT:
+ case GT_LE:
+ case GT_GE:
+ case GT_GT:
+ {
+ // TODO-XArch-CQ: Check if we can use the currently set flags.
+ // TODO-XArch-CQ: Check for the case where we can simply transfer the carry bit to a register
+ // (signed < or >= where targetReg != REG_NA)
+
+ GenTreePtr op1 = treeNode->gtGetOp1();
+ var_types op1Type = op1->TypeGet();
+
+ if (varTypeIsFloating(op1Type))
+ {
+ genCompareFloat(treeNode);
+ }
+#if !defined(_TARGET_64BIT_)
+ // X86 Long comparison
+ else if (varTypeIsLong(op1Type))
+ {
+ // When not materializing the result in a register, the compare logic is generated
+ // when we generate the GT_JTRUE.
+ if (treeNode->gtRegNum != REG_NA)
+ {
+ genCompareLong(treeNode);
+ }
+ else
+ {
+ // We generate the compare when we generate the GT_JTRUE, but we need to consume
+ // the operands now.
+ genConsumeOperands(treeNode->AsOp());
+ }
+ }
+#endif // !defined(_TARGET_64BIT_)
+ else
+ {
+ genCompareInt(treeNode);
+ }
+ }
+ break;
+
+ case GT_JTRUE:
+ {
+ GenTree* cmp = treeNode->gtOp.gtOp1;
+
+ assert(cmp->OperIsCompare());
+ assert(compiler->compCurBB->bbJumpKind == BBJ_COND);
+
+#if !defined(_TARGET_64BIT_)
+ // For long compares, we emit special logic
+ if (varTypeIsLong(cmp->gtGetOp1()))
+ {
+ genJTrueLong(cmp);
+ }
+ else
+#endif
+ {
+ // Get the "kind" and type of the comparison. Note that whether it is an unsigned cmp
+ // is governed by a flag NOT by the inherent type of the node
+ // TODO-XArch-CQ: Check if we can use the currently set flags.
+ emitJumpKind jumpKind[2];
+ bool branchToTrueLabel[2];
+ genJumpKindsForTree(cmp, jumpKind, branchToTrueLabel);
+
+ BasicBlock* skipLabel = nullptr;
+ if (jumpKind[0] != EJ_NONE)
+ {
+ BasicBlock* jmpTarget;
+ if (branchToTrueLabel[0])
+ {
+ jmpTarget = compiler->compCurBB->bbJumpDest;
+ }
+ else
+ {
+ // This case arises only for ordered GT_EQ right now
+ assert((cmp->gtOper == GT_EQ) && ((cmp->gtFlags & GTF_RELOP_NAN_UN) == 0));
+ skipLabel = genCreateTempLabel();
+ jmpTarget = skipLabel;
+ }
+
+ inst_JMP(jumpKind[0], jmpTarget);
+ }
+
+ if (jumpKind[1] != EJ_NONE)
+ {
+ // the second conditional branch always has to be to the true label
+ assert(branchToTrueLabel[1]);
+ inst_JMP(jumpKind[1], compiler->compCurBB->bbJumpDest);
+ }
+
+ if (skipLabel != nullptr)
+ {
+ genDefineTempLabel(skipLabel);
+ }
+ }
+ }
+ break;
+
+ case GT_RETURNTRAP:
+ {
+ // this is nothing but a conditional call to CORINFO_HELP_STOP_FOR_GC
+ // based on the contents of 'data'
+
+ GenTree* data = treeNode->gtOp.gtOp1;
+ genConsumeRegs(data);
+ GenTreeIntCon cns = intForm(TYP_INT, 0);
+ emit->emitInsBinary(INS_cmp, emitTypeSize(TYP_INT), data, &cns);
+
+ BasicBlock* skipLabel = genCreateTempLabel();
+
+ emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
+ inst_JMP(jmpEqual, skipLabel);
+
+ // emit the call to the EE-helper that stops for GC (or other reasons)
+ assert(treeNode->gtRsvdRegs != RBM_NONE);
+ assert(genCountBits(treeNode->gtRsvdRegs) == 1);
+ regNumber tmpReg = genRegNumFromMask(treeNode->gtRsvdRegs);
+ assert(genIsValidIntReg(tmpReg));
+
+ genEmitHelperCall(CORINFO_HELP_STOP_FOR_GC, 0, EA_UNKNOWN, tmpReg);
+ genDefineTempLabel(skipLabel);
+ }
+ break;
+
+ case GT_STOREIND:
+ genStoreInd(treeNode);
+ break;
+
+ case GT_COPY:
+ // This is handled at the time we call genConsumeReg() on the GT_COPY
+ break;
+
+ case GT_SWAP:
+ {
+ // Swap is only supported for lclVar operands that are enregistered
+ // We do not consume or produce any registers. Both operands remain enregistered.
+ // However, the gc-ness may change.
+ assert(genIsRegCandidateLocal(treeNode->gtOp.gtOp1) && genIsRegCandidateLocal(treeNode->gtOp.gtOp2));
+
+ GenTreeLclVarCommon* lcl1 = treeNode->gtOp.gtOp1->AsLclVarCommon();
+ LclVarDsc* varDsc1 = &(compiler->lvaTable[lcl1->gtLclNum]);
+ var_types type1 = varDsc1->TypeGet();
+ GenTreeLclVarCommon* lcl2 = treeNode->gtOp.gtOp2->AsLclVarCommon();
+ LclVarDsc* varDsc2 = &(compiler->lvaTable[lcl2->gtLclNum]);
+ var_types type2 = varDsc2->TypeGet();
+
+ // We must have both int or both fp regs
+ assert(!varTypeIsFloating(type1) || varTypeIsFloating(type2));
+
+ // FP swap is not yet implemented (and should have NYI'd in LSRA)
+ assert(!varTypeIsFloating(type1));
+
+ regNumber oldOp1Reg = lcl1->gtRegNum;
+ regMaskTP oldOp1RegMask = genRegMask(oldOp1Reg);
+ regNumber oldOp2Reg = lcl2->gtRegNum;
+ regMaskTP oldOp2RegMask = genRegMask(oldOp2Reg);
+
+ // We don't call genUpdateVarReg because we don't have a tree node with the new register.
+ varDsc1->lvRegNum = oldOp2Reg;
+ varDsc2->lvRegNum = oldOp1Reg;
+
+ // Do the xchg
+ emitAttr size = EA_PTRSIZE;
+ if (varTypeGCtype(type1) != varTypeGCtype(type2))
+ {
+ // If the type specified to the emitter is a GC type, it will swap the GC-ness of the registers.
+ // Otherwise it will leave them alone, which is correct if they have the same GC-ness.
+ size = EA_GCREF;
+ }
+ inst_RV_RV(INS_xchg, oldOp1Reg, oldOp2Reg, TYP_I_IMPL, size);
+
+ // Update the gcInfo.
+ // Manually remove these regs for the gc sets (mostly to avoid confusing duplicative dump output)
+ gcInfo.gcRegByrefSetCur &= ~(oldOp1RegMask | oldOp2RegMask);
+ gcInfo.gcRegGCrefSetCur &= ~(oldOp1RegMask | oldOp2RegMask);
+
+ // gcMarkRegPtrVal will do the appropriate thing for non-gc types.
+ // It will also dump the updates.
+ gcInfo.gcMarkRegPtrVal(oldOp2Reg, type1);
+ gcInfo.gcMarkRegPtrVal(oldOp1Reg, type2);
+ }
+ break;
+
+ case GT_LIST:
+ case GT_ARGPLACE:
+ // Nothing to do
+ break;
+
+ case GT_PUTARG_STK:
+ genPutArgStk(treeNode);
+ break;
+
+ case GT_PUTARG_REG:
+ {
+#ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ noway_assert(targetType != TYP_STRUCT);
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+ // commas show up here commonly, as part of a nullchk operation
+ GenTree* op1 = treeNode->gtOp.gtOp1;
+ // If child node is not already in the register we need, move it
+ genConsumeReg(op1);
+ if (treeNode->gtRegNum != op1->gtRegNum)
+ {
+ inst_RV_RV(ins_Copy(targetType), treeNode->gtRegNum, op1->gtRegNum, targetType);
+ }
+ genProduceReg(treeNode);
+ }
+ break;
+
+ case GT_CALL:
+ genCallInstruction(treeNode);
+ break;
+
+ case GT_JMP:
+ genJmpMethod(treeNode);
+ break;
+
+ case GT_LOCKADD:
+ case GT_XCHG:
+ case GT_XADD:
+ genLockedInstructions(treeNode);
+ break;
+
+ case GT_MEMORYBARRIER:
+ instGen_MemoryBarrier();
+ break;
+
+ case GT_CMPXCHG:
+ {
+ GenTreePtr location = treeNode->gtCmpXchg.gtOpLocation; // arg1
+ GenTreePtr value = treeNode->gtCmpXchg.gtOpValue; // arg2
+ GenTreePtr comparand = treeNode->gtCmpXchg.gtOpComparand; // arg3
+
+ assert(location->gtRegNum != REG_NA && location->gtRegNum != REG_RAX);
+ assert(value->gtRegNum != REG_NA && value->gtRegNum != REG_RAX);
+
+ genConsumeReg(location);
+ genConsumeReg(value);
+ genConsumeReg(comparand);
+ // comparand goes to RAX;
+ // Note that we must issue this move after the genConsumeRegs(), in case any of the above
+ // have a GT_COPY from RAX.
+ if (comparand->gtRegNum != REG_RAX)
+ {
+ inst_RV_RV(ins_Copy(comparand->TypeGet()), REG_RAX, comparand->gtRegNum, comparand->TypeGet());
+ }
+
+ // location is Rm
+ instGen(INS_lock);
+
+ emit->emitIns_AR_R(INS_cmpxchg, emitTypeSize(targetType), value->gtRegNum, location->gtRegNum, 0);
+
+ // Result is in RAX
+ if (targetReg != REG_RAX)
+ {
+ inst_RV_RV(ins_Copy(targetType), targetReg, REG_RAX, targetType);
+ }
+ }
+ genProduceReg(treeNode);
+ break;
+
+ case GT_RELOAD:
+ // do nothing - reload is just a marker.
+ // The parent node will call genConsumeReg on this which will trigger the unspill of this node's child
+ // into the register specified in this node.
+ break;
+
+ case GT_NOP:
+ break;
+
+ case GT_NO_OP:
+ if (treeNode->gtFlags & GTF_NO_OP_NO)
+ {
+ noway_assert(!"GTF_NO_OP_NO should not be set");
+ }
+ else
+ {
+ getEmitter()->emitIns_Nop(1);
+ }
+ break;
+
+ case GT_ARR_BOUNDS_CHECK:
+#ifdef FEATURE_SIMD
+ case GT_SIMD_CHK:
+#endif // FEATURE_SIMD
+ genRangeCheck(treeNode);
+ break;
+
+ case GT_PHYSREG:
+ if (treeNode->gtRegNum != treeNode->AsPhysReg()->gtSrcReg)
+ {
+ inst_RV_RV(INS_mov, treeNode->gtRegNum, treeNode->AsPhysReg()->gtSrcReg, targetType);
+
+ genTransferRegGCState(treeNode->gtRegNum, treeNode->AsPhysReg()->gtSrcReg);
+ }
+ genProduceReg(treeNode);
+ break;
+
+ case GT_PHYSREGDST:
+ break;
+
+ case GT_NULLCHECK:
+ {
+ assert(!treeNode->gtOp.gtOp1->isContained());
+ regNumber reg = genConsumeReg(treeNode->gtOp.gtOp1);
+ emit->emitIns_AR_R(INS_cmp, EA_4BYTE, reg, reg, 0);
+ }
+ break;
+
+ case GT_CATCH_ARG:
+
+ noway_assert(handlerGetsXcptnObj(compiler->compCurBB->bbCatchTyp));
+
+ /* Catch arguments get passed in a register. genCodeForBBlist()
+ would have marked it as holding a GC object, but not used. */
+
+ noway_assert(gcInfo.gcRegGCrefSetCur & RBM_EXCEPTION_OBJECT);
+ genConsumeReg(treeNode);
+ break;
+
+#if !FEATURE_EH_FUNCLETS
+ case GT_END_LFIN:
+
+ // Have to clear the ShadowSP of the nesting level which encloses the finally. Generates:
+ // mov dword ptr [ebp-0xC], 0 // for some slot of the ShadowSP local var
+
+ unsigned finallyNesting;
+ finallyNesting = treeNode->gtVal.gtVal1;
+ noway_assert(treeNode->gtVal.gtVal1 < compiler->compHndBBtabCount);
+ noway_assert(finallyNesting < compiler->compHndBBtabCount);
+
+ // The last slot is reserved for ICodeManager::FixContext(ppEndRegion)
+ unsigned filterEndOffsetSlotOffs;
+ PREFIX_ASSUME(compiler->lvaLclSize(compiler->lvaShadowSPslotsVar) >
+ TARGET_POINTER_SIZE); // below doesn't underflow.
+ filterEndOffsetSlotOffs =
+ (unsigned)(compiler->lvaLclSize(compiler->lvaShadowSPslotsVar) - TARGET_POINTER_SIZE);
+
+ unsigned curNestingSlotOffs;
+ curNestingSlotOffs = filterEndOffsetSlotOffs - ((finallyNesting + 1) * TARGET_POINTER_SIZE);
+ instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_PTRSIZE, 0, compiler->lvaShadowSPslotsVar, curNestingSlotOffs);
+ break;
+#endif // !FEATURE_EH_FUNCLETS
+
+ case GT_PINVOKE_PROLOG:
+ noway_assert(((gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur) & ~fullIntArgRegMask()) == 0);
+
+ // the runtime side requires the codegen here to be consistent
+ emit->emitDisableRandomNops();
+ break;
+
+ case GT_LABEL:
+ genPendingCallLabel = genCreateTempLabel();
+ treeNode->gtLabel.gtLabBB = genPendingCallLabel;
+ emit->emitIns_R_L(INS_lea, EA_PTR_DSP_RELOC, genPendingCallLabel, treeNode->gtRegNum);
+ break;
+
+ case GT_STORE_OBJ:
+ if (treeNode->OperIsCopyBlkOp() && !treeNode->AsBlk()->gtBlkOpGcUnsafe)
+ {
+ assert(treeNode->AsObj()->gtGcPtrCount != 0);
+ genCodeForCpObj(treeNode->AsObj());
+ break;
+ }
+ __fallthrough;
+
+ case GT_STORE_DYN_BLK:
+ case GT_STORE_BLK:
+ genCodeForStoreBlk(treeNode->AsBlk());
+ break;
+
+ case GT_JMPTABLE:
+ genJumpTable(treeNode);
+ break;
+
+ case GT_SWITCH_TABLE:
+ genTableBasedSwitch(treeNode);
+ break;
+
+ case GT_ARR_INDEX:
+ genCodeForArrIndex(treeNode->AsArrIndex());
+ break;
+
+ case GT_ARR_OFFSET:
+ genCodeForArrOffset(treeNode->AsArrOffs());
+ break;
+
+ case GT_CLS_VAR_ADDR:
+ getEmitter()->emitIns_R_C(INS_lea, EA_PTRSIZE, targetReg, treeNode->gtClsVar.gtClsVarHnd, 0);
+ genProduceReg(treeNode);
+ break;
+
+#if !defined(_TARGET_64BIT_)
+ case GT_LONG:
+ assert(!treeNode->isContained());
+ genConsumeRegs(treeNode);
+ break;
+#endif
+
+ case GT_IL_OFFSET:
+ // Do nothing; these nodes are simply markers for debug info.
+ break;
+
+ default:
+ {
+#ifdef DEBUG
+ char message[256];
+ sprintf(message, "Unimplemented node type %s\n", GenTree::NodeName(treeNode->OperGet()));
+#endif
+ assert(!"Unknown node in codegen");
+ }
+ break;
+ }
+}
+
+//----------------------------------------------------------------------------------
+// genMultiRegCallStoreToLocal: store multi-reg return value of a call node to a local
+//
+// Arguments:
+// treeNode - Gentree of GT_STORE_LCL_VAR
+//
+// Return Value:
+// None
+//
+// Assumption:
+// The child of store is a multi-reg call node.
+// genProduceReg() on treeNode is made by caller of this routine.
+//
+void CodeGen::genMultiRegCallStoreToLocal(GenTreePtr treeNode)
+{
+ assert(treeNode->OperGet() == GT_STORE_LCL_VAR);
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ // Structs of size >=9 and <=16 are returned in two return registers on x64 Unix.
+ assert(varTypeIsStruct(treeNode));
+
+ // Assumption: current x64 Unix implementation requires that a multi-reg struct
+ // var in 'var = call' is flagged as lvIsMultiRegRet to prevent it from
+ // being struct promoted.
+ unsigned lclNum = treeNode->AsLclVarCommon()->gtLclNum;
+ LclVarDsc* varDsc = &(compiler->lvaTable[lclNum]);
+ noway_assert(varDsc->lvIsMultiRegRet);
+
+ GenTree* op1 = treeNode->gtGetOp1();
+ GenTree* actualOp1 = op1->gtSkipReloadOrCopy();
+ GenTreeCall* call = actualOp1->AsCall();
+ assert(call->HasMultiRegRetVal());
+
+ genConsumeRegs(op1);
+
+ ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc();
+ assert(retTypeDesc->GetReturnRegCount() == MAX_RET_REG_COUNT);
+ unsigned regCount = retTypeDesc->GetReturnRegCount();
+
+ if (treeNode->gtRegNum != REG_NA)
+ {
+ // Right now the only enregistrable structs supported are SIMD types.
+ assert(varTypeIsSIMD(treeNode));
+ assert(varTypeIsFloating(retTypeDesc->GetReturnRegType(0)));
+ assert(varTypeIsFloating(retTypeDesc->GetReturnRegType(1)));
+
+ // This is a case of two 8-bytes that comprise the operand is in
+ // two different xmm registers and needs to assembled into a single
+ // xmm register.
+ regNumber targetReg = treeNode->gtRegNum;
+ regNumber reg0 = call->GetRegNumByIdx(0);
+ regNumber reg1 = call->GetRegNumByIdx(1);
+
+ if (op1->IsCopyOrReload())
+ {
+ // GT_COPY/GT_RELOAD will have valid reg for those positions
+ // that need to be copied or reloaded.
+ regNumber reloadReg = op1->AsCopyOrReload()->GetRegNumByIdx(0);
+ if (reloadReg != REG_NA)
+ {
+ reg0 = reloadReg;
+ }
+
+ reloadReg = op1->AsCopyOrReload()->GetRegNumByIdx(1);
+ if (reloadReg != REG_NA)
+ {
+ reg1 = reloadReg;
+ }
+ }
+
+ if (targetReg != reg0 && targetReg != reg1)
+ {
+ // Copy reg0 into targetReg and let it to be handled by one
+ // of the cases below.
+ inst_RV_RV(ins_Copy(TYP_DOUBLE), targetReg, reg0, TYP_DOUBLE);
+ targetReg = reg0;
+ }
+
+ if (targetReg == reg0)
+ {
+ // targeReg[63:0] = targetReg[63:0]
+ // targetReg[127:64] = reg1[127:64]
+ inst_RV_RV_IV(INS_shufpd, EA_16BYTE, targetReg, reg1, 0x00);
+ }
+ else
+ {
+ assert(targetReg == reg1);
+
+ // We need two shuffles to achieve this
+ // First:
+ // targeReg[63:0] = targetReg[63:0]
+ // targetReg[127:64] = reg0[63:0]
+ //
+ // Second:
+ // targeReg[63:0] = targetReg[127:64]
+ // targetReg[127:64] = targetReg[63:0]
+ //
+ // Essentially copy low 8-bytes from reg0 to high 8-bytes of targetReg
+ // and next swap low and high 8-bytes of targetReg to have them
+ // rearranged in the right order.
+ inst_RV_RV_IV(INS_shufpd, EA_16BYTE, targetReg, reg0, 0x00);
+ inst_RV_RV_IV(INS_shufpd, EA_16BYTE, targetReg, targetReg, 0x01);
+ }
+ }
+ else
+ {
+ // Stack store
+ int offset = 0;
+ for (unsigned i = 0; i < regCount; ++i)
+ {
+ var_types type = retTypeDesc->GetReturnRegType(i);
+ regNumber reg = call->GetRegNumByIdx(i);
+ if (op1->IsCopyOrReload())
+ {
+ // GT_COPY/GT_RELOAD will have valid reg for those positions
+ // that need to be copied or reloaded.
+ regNumber reloadReg = op1->AsCopyOrReload()->GetRegNumByIdx(i);
+ if (reloadReg != REG_NA)
+ {
+ reg = reloadReg;
+ }
+ }
+
+ assert(reg != REG_NA);
+ getEmitter()->emitIns_S_R(ins_Store(type), emitTypeSize(type), reg, lclNum, offset);
+ offset += genTypeSize(type);
+ }
+
+ varDsc->lvRegNum = REG_STK;
+ }
+#elif defined(_TARGET_X86_)
+ // Longs are returned in two return registers on x86.
+ assert(varTypeIsLong(treeNode));
+
+ // Assumption: current x86 implementation requires that a multi-reg long
+ // var in 'var = call' is flagged as lvIsMultiRegRet to prevent it from
+ // being promoted.
+ unsigned lclNum = treeNode->AsLclVarCommon()->gtLclNum;
+ LclVarDsc* varDsc = &(compiler->lvaTable[lclNum]);
+ noway_assert(varDsc->lvIsMultiRegRet);
+
+ GenTree* op1 = treeNode->gtGetOp1();
+ GenTree* actualOp1 = op1->gtSkipReloadOrCopy();
+ GenTreeCall* call = actualOp1->AsCall();
+ assert(call->HasMultiRegRetVal());
+
+ genConsumeRegs(op1);
+
+ ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc();
+ unsigned regCount = retTypeDesc->GetReturnRegCount();
+ assert(regCount == MAX_RET_REG_COUNT);
+
+ // Stack store
+ int offset = 0;
+ for (unsigned i = 0; i < regCount; ++i)
+ {
+ var_types type = retTypeDesc->GetReturnRegType(i);
+ regNumber reg = call->GetRegNumByIdx(i);
+ if (op1->IsCopyOrReload())
+ {
+ // GT_COPY/GT_RELOAD will have valid reg for those positions
+ // that need to be copied or reloaded.
+ regNumber reloadReg = op1->AsCopyOrReload()->GetRegNumByIdx(i);
+ if (reloadReg != REG_NA)
+ {
+ reg = reloadReg;
+ }
+ }
+
+ assert(reg != REG_NA);
+ getEmitter()->emitIns_S_R(ins_Store(type), emitTypeSize(type), reg, lclNum, offset);
+ offset += genTypeSize(type);
+ }
+
+ varDsc->lvRegNum = REG_STK;
+#else // !FEATURE_UNIX_AMD64_STRUCT_PASSING && !_TARGET_X86_
+ assert(!"Unreached");
+#endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING && !_TARGET_X86_
+}
+
+//------------------------------------------------------------------------
+// genLclHeap: Generate code for localloc.
+//
+// Arguments:
+// tree - the localloc tree to generate.
+//
+// Notes:
+// Note that for x86, we don't track ESP movements while generating the localloc code.
+// The ESP tracking is used to report stack pointer-relative GC info, which is not
+// interesting while doing the localloc construction. Also, for functions with localloc,
+// we have EBP frames, and EBP-relative locals, and ESP-relative accesses only for function
+// call arguments. We store the ESP after the localloc is complete in the LocAllocSP
+// variable. This variable is implicitly reported to the VM in the GC info (its position
+// is defined by convention relative to other items), and is used by the GC to find the
+// "base" stack pointer in functions with localloc.
+//
+void CodeGen::genLclHeap(GenTreePtr tree)
+{
+ assert(tree->OperGet() == GT_LCLHEAP);
+ assert(compiler->compLocallocUsed);
+
+ GenTreePtr size = tree->gtOp.gtOp1;
+ noway_assert((genActualType(size->gtType) == TYP_INT) || (genActualType(size->gtType) == TYP_I_IMPL));
+
+ regNumber targetReg = tree->gtRegNum;
+ regMaskTP tmpRegsMask = tree->gtRsvdRegs;
+ regNumber regCnt = REG_NA;
+ var_types type = genActualType(size->gtType);
+ emitAttr easz = emitTypeSize(type);
+ BasicBlock* endLabel = nullptr;
+
+#ifdef DEBUG
+ // Verify ESP
+ if (compiler->opts.compStackCheckOnRet)
+ {
+ noway_assert(compiler->lvaReturnEspCheck != 0xCCCCCCCC &&
+ compiler->lvaTable[compiler->lvaReturnEspCheck].lvDoNotEnregister &&
+ compiler->lvaTable[compiler->lvaReturnEspCheck].lvOnFrame);
+ getEmitter()->emitIns_S_R(INS_cmp, EA_PTRSIZE, REG_SPBASE, compiler->lvaReturnEspCheck, 0);
+
+ BasicBlock* esp_check = genCreateTempLabel();
+ emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
+ inst_JMP(jmpEqual, esp_check);
+ getEmitter()->emitIns(INS_BREAKPOINT);
+ genDefineTempLabel(esp_check);
+ }
+#endif
+
+ noway_assert(isFramePointerUsed()); // localloc requires Frame Pointer to be established since SP changes
+ noway_assert(genStackLevel == 0); // Can't have anything on the stack
+
+ unsigned stackAdjustment = 0;
+ BasicBlock* loop = nullptr;
+
+ // compute the amount of memory to allocate to properly STACK_ALIGN.
+ size_t amount = 0;
+ if (size->IsCnsIntOrI())
+ {
+ // If size is a constant, then it must be contained.
+ assert(size->isContained());
+
+ // If amount is zero then return null in targetReg
+ amount = size->gtIntCon.gtIconVal;
+ if (amount == 0)
+ {
+ instGen_Set_Reg_To_Zero(EA_PTRSIZE, targetReg);
+ goto BAILOUT;
+ }
+
+ // 'amount' is the total number of bytes to localloc to properly STACK_ALIGN
+ amount = AlignUp(amount, STACK_ALIGN);
+ }
+ else
+ {
+ // The localloc requested memory size is non-constant.
+
+ // Put the size value in targetReg. If it is zero, bail out by returning null in targetReg.
+ genConsumeRegAndCopy(size, targetReg);
+ endLabel = genCreateTempLabel();
+ getEmitter()->emitIns_R_R(INS_test, easz, targetReg, targetReg);
+ inst_JMP(EJ_je, endLabel);
+
+ // Compute the size of the block to allocate and perform alignment.
+ // If compInitMem=true, we can reuse targetReg as regcnt,
+ // since we don't need any internal registers.
+ if (compiler->info.compInitMem)
+ {
+ assert(genCountBits(tmpRegsMask) == 0);
+ regCnt = targetReg;
+ }
+ else
+ {
+ assert(genCountBits(tmpRegsMask) >= 1);
+ regMaskTP regCntMask = genFindLowestBit(tmpRegsMask);
+ tmpRegsMask &= ~regCntMask;
+ regCnt = genRegNumFromMask(regCntMask);
+ if (regCnt != targetReg)
+ {
+ // Above, we put the size in targetReg. Now, copy it to our new temp register if necessary.
+ inst_RV_RV(INS_mov, regCnt, targetReg, size->TypeGet());
+ }
+ }
+
+ // Round up the number of bytes to allocate to a STACK_ALIGN boundary. This is done
+ // by code like:
+ // add reg, 15
+ // and reg, -16
+ // However, in the initialized memory case, we need the count of STACK_ALIGN-sized
+ // elements, not a byte count, after the alignment. So instead of the "and", which
+ // becomes unnecessary, generate a shift, e.g.:
+ // add reg, 15
+ // shr reg, 4
+
+ inst_RV_IV(INS_add, regCnt, STACK_ALIGN - 1, emitActualTypeSize(type));
+
+ if (compiler->info.compInitMem)
+ {
+ // Convert the count from a count of bytes to a loop count. We will loop once per
+ // stack alignment size, so each loop will zero 4 bytes on x86 and 16 bytes on x64.
+ // Note that we zero a single reg-size word per iteration on x86, and 2 reg-size
+ // words per iteration on x64. We will shift off all the stack alignment bits
+ // added above, so there is no need for an 'and' instruction.
+
+ // --- shr regCnt, 2 (or 4) ---
+ inst_RV_SH(INS_SHIFT_RIGHT_LOGICAL, EA_PTRSIZE, regCnt, STACK_ALIGN_SHIFT_ALL);
+ }
+ else
+ {
+ // Otherwise, mask off the low bits to align the byte count.
+ inst_RV_IV(INS_AND, regCnt, ~(STACK_ALIGN - 1), emitActualTypeSize(type));
+ }
+ }
+
+#if FEATURE_FIXED_OUT_ARGS
+ // If we have an outgoing arg area then we must adjust the SP by popping off the
+ // outgoing arg area. We will restore it right before we return from this method.
+ //
+ // Localloc returns stack space that aligned to STACK_ALIGN bytes. The following
+ // are the cases that need to be handled:
+ // i) Method has out-going arg area.
+ // It is guaranteed that size of out-going arg area is STACK_ALIGN'ed (see fgMorphArgs).
+ // Therefore, we will pop off the out-going arg area from RSP before allocating the localloc space.
+ // ii) Method has no out-going arg area.
+ // Nothing to pop off from the stack.
+ if (compiler->lvaOutgoingArgSpaceSize > 0)
+ {
+ assert((compiler->lvaOutgoingArgSpaceSize % STACK_ALIGN) == 0); // This must be true for the stack to remain
+ // aligned
+ inst_RV_IV(INS_add, REG_SPBASE, compiler->lvaOutgoingArgSpaceSize, EA_PTRSIZE);
+ stackAdjustment += compiler->lvaOutgoingArgSpaceSize;
+ }
+#endif
+
+ if (size->IsCnsIntOrI())
+ {
+ // We should reach here only for non-zero, constant size allocations.
+ assert(amount > 0);
+ assert((amount % STACK_ALIGN) == 0);
+ assert((amount % REGSIZE_BYTES) == 0);
+
+ // For small allocations we will generate up to six push 0 inline
+ size_t cntRegSizedWords = amount / REGSIZE_BYTES;
+ if (cntRegSizedWords <= 6)
+ {
+ for (; cntRegSizedWords != 0; cntRegSizedWords--)
+ {
+ inst_IV(INS_push_hide, 0); // push_hide means don't track the stack
+ }
+ goto ALLOC_DONE;
+ }
+
+ bool doNoInitLessThanOnePageAlloc =
+ !compiler->info.compInitMem && (amount < compiler->eeGetPageSize()); // must be < not <=
+
+#ifdef _TARGET_X86_
+ bool needRegCntRegister = true;
+#else // !_TARGET_X86_
+ bool needRegCntRegister = !doNoInitLessThanOnePageAlloc;
+#endif // !_TARGET_X86_
+
+ if (needRegCntRegister)
+ {
+ // If compInitMem=true, we can reuse targetReg as regcnt.
+ // Since size is a constant, regCnt is not yet initialized.
+ assert(regCnt == REG_NA);
+ if (compiler->info.compInitMem)
+ {
+ assert(genCountBits(tmpRegsMask) == 0);
+ regCnt = targetReg;
+ }
+ else
+ {
+ assert(genCountBits(tmpRegsMask) >= 1);
+ regMaskTP regCntMask = genFindLowestBit(tmpRegsMask);
+ tmpRegsMask &= ~regCntMask;
+ regCnt = genRegNumFromMask(regCntMask);
+ }
+ }
+
+ if (doNoInitLessThanOnePageAlloc)
+ {
+ // Since the size is less than a page, simply adjust ESP.
+ // ESP might already be in the guard page, so we must touch it BEFORE
+ // the alloc, not after.
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef _TARGET_X86_
+ // For x86, we don't want to use "sub ESP" because we don't want the emitter to track the adjustment
+ // to ESP. So do the work in the count register.
+ // TODO-CQ: manipulate ESP directly, to share code, reduce #ifdefs, and improve CQ. This would require
+ // creating a way to temporarily turn off the emitter's tracking of ESP, maybe marking instrDescs as "don't
+ // track".
+ inst_RV_RV(INS_mov, regCnt, REG_SPBASE, TYP_I_IMPL);
+ getEmitter()->emitIns_AR_R(INS_TEST, EA_4BYTE, REG_SPBASE, REG_SPBASE, 0);
+ inst_RV_IV(INS_sub, regCnt, amount, EA_PTRSIZE);
+ inst_RV_RV(INS_mov, REG_SPBASE, regCnt, TYP_I_IMPL);
+#else // !_TARGET_X86_
+ getEmitter()->emitIns_AR_R(INS_TEST, EA_4BYTE, REG_SPBASE, REG_SPBASE, 0);
+ inst_RV_IV(INS_sub, REG_SPBASE, amount, EA_PTRSIZE);
+#endif // !_TARGET_X86_
+
+ goto ALLOC_DONE;
+ }
+
+ // else, "mov regCnt, amount"
+
+ if (compiler->info.compInitMem)
+ {
+ // When initializing memory, we want 'amount' to be the loop count.
+ assert((amount % STACK_ALIGN) == 0);
+ amount /= STACK_ALIGN;
+ }
+
+ genSetRegToIcon(regCnt, amount, ((int)amount == amount) ? TYP_INT : TYP_LONG);
+ }
+
+ loop = genCreateTempLabel();
+ if (compiler->info.compInitMem)
+ {
+ // At this point 'regCnt' is set to the number of loop iterations for this loop, if each
+ // iteration zeros (and subtracts from the stack pointer) STACK_ALIGN bytes.
+ // Since we have to zero out the allocated memory AND ensure that RSP is always valid
+ // by tickling the pages, we will just push 0's on the stack.
+
+ assert(genIsValidIntReg(regCnt));
+
+ // Loop:
+ genDefineTempLabel(loop);
+
+#if defined(_TARGET_AMD64_)
+ // Push two 8-byte zeros. This matches the 16-byte STACK_ALIGN value.
+ static_assert_no_msg(STACK_ALIGN == (REGSIZE_BYTES * 2));
+ inst_IV(INS_push_hide, 0); // --- push 8-byte 0
+ inst_IV(INS_push_hide, 0); // --- push 8-byte 0
+#elif defined(_TARGET_X86_)
+ // Push a single 4-byte zero. This matches the 4-byte STACK_ALIGN value.
+ static_assert_no_msg(STACK_ALIGN == REGSIZE_BYTES);
+ inst_IV(INS_push_hide, 0); // --- push 4-byte 0
+#endif // _TARGET_X86_
+
+ // Decrement the loop counter and loop if not done.
+ inst_RV(INS_dec, regCnt, TYP_I_IMPL);
+ inst_JMP(EJ_jne, loop);
+ }
+ else
+ {
+ // At this point 'regCnt' is set to the total number of bytes to localloc.
+ //
+ // We don't need to zero out the allocated memory. However, we do have
+ // to tickle the pages to ensure that ESP is always valid and is
+ // in sync with the "stack guard page". Note that in the worst
+ // case ESP is on the last byte of the guard page. Thus you must
+ // touch ESP+0 first not ESP+x01000.
+ //
+ // Another subtlety is that you don't want ESP to be exactly on the
+ // boundary of the guard page because PUSH is predecrement, thus
+ // call setup would not touch the guard page but just beyond it
+ //
+ // Note that we go through a few hoops so that ESP never points to
+ // illegal pages at any time during the tickling process
+ //
+ // neg REGCNT
+ // add REGCNT, ESP // reg now holds ultimate ESP
+ // jb loop // result is smaller than orignial ESP (no wrap around)
+ // xor REGCNT, REGCNT, // Overflow, pick lowest possible number
+ // loop:
+ // test ESP, [ESP+0] // tickle the page
+ // mov REGTMP, ESP
+ // sub REGTMP, PAGE_SIZE
+ // mov ESP, REGTMP
+ // cmp ESP, REGCNT
+ // jae loop
+ //
+ // mov ESP, REG
+ // end:
+ inst_RV(INS_NEG, regCnt, TYP_I_IMPL);
+ inst_RV_RV(INS_add, regCnt, REG_SPBASE, TYP_I_IMPL);
+ inst_JMP(EJ_jb, loop);
+
+ instGen_Set_Reg_To_Zero(EA_PTRSIZE, regCnt);
+
+ genDefineTempLabel(loop);
+
+ // Tickle the decremented value, and move back to ESP,
+ // note that it has to be done BEFORE the update of ESP since
+ // ESP might already be on the guard page. It is OK to leave
+ // the final value of ESP on the guard page
+ getEmitter()->emitIns_AR_R(INS_TEST, EA_4BYTE, REG_SPBASE, REG_SPBASE, 0);
+
+ // This is a harmless trick to avoid the emitter trying to track the
+ // decrement of the ESP - we do the subtraction in another reg instead
+ // of adjusting ESP directly.
+ assert(tmpRegsMask != RBM_NONE);
+ assert(genCountBits(tmpRegsMask) == 1);
+ regNumber regTmp = genRegNumFromMask(tmpRegsMask);
+
+ inst_RV_RV(INS_mov, regTmp, REG_SPBASE, TYP_I_IMPL);
+ inst_RV_IV(INS_sub, regTmp, compiler->eeGetPageSize(), EA_PTRSIZE);
+ inst_RV_RV(INS_mov, REG_SPBASE, regTmp, TYP_I_IMPL);
+
+ inst_RV_RV(INS_cmp, REG_SPBASE, regCnt, TYP_I_IMPL);
+ inst_JMP(EJ_jae, loop);
+
+ // Move the final value to ESP
+ inst_RV_RV(INS_mov, REG_SPBASE, regCnt);
+ }
+
+ALLOC_DONE:
+ // Re-adjust SP to allocate out-going arg area
+ if (stackAdjustment > 0)
+ {
+ assert((stackAdjustment % STACK_ALIGN) == 0); // This must be true for the stack to remain aligned
+ inst_RV_IV(INS_sub, REG_SPBASE, stackAdjustment, EA_PTRSIZE);
+ }
+
+ // Return the stackalloc'ed address in result register.
+ // TargetReg = RSP + stackAdjustment.
+ getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, targetReg, REG_SPBASE, stackAdjustment);
+
+ if (endLabel != nullptr)
+ {
+ genDefineTempLabel(endLabel);
+ }
+
+BAILOUT:
+
+ // Write the lvaLocAllocSPvar stack frame slot
+ noway_assert(compiler->lvaLocAllocSPvar != BAD_VAR_NUM);
+ getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaLocAllocSPvar, 0);
+
+#if STACK_PROBES
+ if (compiler->opts.compNeedStackProbes)
+ {
+ genGenerateStackProbe();
+ }
+#endif
+
+#ifdef DEBUG
+ // Update new ESP
+ if (compiler->opts.compStackCheckOnRet)
+ {
+ noway_assert(compiler->lvaReturnEspCheck != 0xCCCCCCCC &&
+ compiler->lvaTable[compiler->lvaReturnEspCheck].lvDoNotEnregister &&
+ compiler->lvaTable[compiler->lvaReturnEspCheck].lvOnFrame);
+ getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaReturnEspCheck, 0);
+ }
+#endif
+
+ genProduceReg(tree);
+}
+
+void CodeGen::genCodeForStoreBlk(GenTreeBlk* storeBlkNode)
+{
+ if (storeBlkNode->gtBlkOpGcUnsafe)
+ {
+ getEmitter()->emitDisableGC();
+ }
+ bool isCopyBlk = storeBlkNode->OperIsCopyBlkOp();
+
+ switch (storeBlkNode->gtBlkOpKind)
+ {
+#ifdef _TARGET_AMD64_
+ case GenTreeBlk::BlkOpKindHelper:
+ if (isCopyBlk)
+ {
+ genCodeForCpBlk(storeBlkNode);
+ }
+ else
+ {
+ genCodeForInitBlk(storeBlkNode);
+ }
+ break;
+#endif // _TARGET_AMD64_
+ case GenTreeBlk::BlkOpKindRepInstr:
+ if (isCopyBlk)
+ {
+ genCodeForCpBlkRepMovs(storeBlkNode);
+ }
+ else
+ {
+ genCodeForInitBlkRepStos(storeBlkNode);
+ }
+ break;
+ case GenTreeBlk::BlkOpKindUnroll:
+ if (isCopyBlk)
+ {
+ genCodeForCpBlkUnroll(storeBlkNode);
+ }
+ else
+ {
+ genCodeForInitBlkUnroll(storeBlkNode);
+ }
+ break;
+ default:
+ unreached();
+ }
+ if (storeBlkNode->gtBlkOpGcUnsafe)
+ {
+ getEmitter()->emitEnableGC();
+ }
+}
+
+// Generate code for InitBlk using rep stos.
+// Preconditions:
+// The size of the buffers must be a constant and also less than INITBLK_STOS_LIMIT bytes.
+// Any value larger than that, we'll use the helper even if both the
+// fill byte and the size are integer constants.
+void CodeGen::genCodeForInitBlkRepStos(GenTreeBlk* initBlkNode)
+{
+ // Make sure we got the arguments of the initblk/initobj operation in the right registers
+ unsigned size = initBlkNode->Size();
+ GenTreePtr dstAddr = initBlkNode->Addr();
+ GenTreePtr initVal = initBlkNode->Data();
+
+#ifdef DEBUG
+ assert(!dstAddr->isContained());
+ assert(!initVal->isContained());
+#ifdef _TARGET_AMD64_
+ assert(size != 0);
+#endif
+ if (initVal->IsCnsIntOrI())
+ {
+#ifdef _TARGET_AMD64_
+ assert(size > CPBLK_UNROLL_LIMIT && size < CPBLK_MOVS_LIMIT);
+#else
+ assert(size > CPBLK_UNROLL_LIMIT);
+#endif
+ }
+
+#endif // DEBUG
+
+ genConsumeBlockOp(initBlkNode, REG_RDI, REG_RAX, REG_RCX);
+ instGen(INS_r_stosb);
+}
+
+// Generate code for InitBlk by performing a loop unroll
+// Preconditions:
+// a) Both the size and fill byte value are integer constants.
+// b) The size of the struct to initialize is smaller than INITBLK_UNROLL_LIMIT bytes.
+//
+void CodeGen::genCodeForInitBlkUnroll(GenTreeBlk* initBlkNode)
+{
+ // Make sure we got the arguments of the initblk/initobj operation in the right registers
+ unsigned size = initBlkNode->Size();
+ GenTreePtr dstAddr = initBlkNode->Addr();
+ GenTreePtr initVal = initBlkNode->Data();
+
+ assert(!dstAddr->isContained());
+ assert(!initVal->isContained());
+ assert(size != 0);
+ assert(size <= INITBLK_UNROLL_LIMIT);
+ assert(initVal->gtSkipReloadOrCopy()->IsCnsIntOrI());
+
+ emitter* emit = getEmitter();
+
+ genConsumeOperands(initBlkNode);
+
+ // If the initVal was moved, or spilled and reloaded to a different register,
+ // get the original initVal from below the GT_RELOAD, but only after capturing the valReg,
+ // which needs to be the new register.
+ regNumber valReg = initVal->gtRegNum;
+ initVal = initVal->gtSkipReloadOrCopy();
+
+ unsigned offset = 0;
+
+ // Perform an unroll using SSE2 loads and stores.
+ if (size >= XMM_REGSIZE_BYTES)
+ {
+ regNumber tmpReg = genRegNumFromMask(initBlkNode->gtRsvdRegs);
+
+#ifdef DEBUG
+ assert(initBlkNode->gtRsvdRegs != RBM_NONE);
+ assert(genCountBits(initBlkNode->gtRsvdRegs) == 1);
+ assert(genIsValidFloatReg(tmpReg));
+#endif // DEBUG
+
+ if (initVal->gtIntCon.gtIconVal != 0)
+ {
+ emit->emitIns_R_R(INS_mov_i2xmm, EA_PTRSIZE, tmpReg, valReg);
+ emit->emitIns_R_R(INS_punpckldq, EA_8BYTE, tmpReg, tmpReg);
+#ifdef _TARGET_X86_
+ // For x86, we need one more to convert it from 8 bytes to 16 bytes.
+ emit->emitIns_R_R(INS_punpckldq, EA_8BYTE, tmpReg, tmpReg);
+#endif // _TARGET_X86_
+ }
+ else
+ {
+ emit->emitIns_R_R(INS_xorpd, EA_8BYTE, tmpReg, tmpReg);
+ }
+
+ // Determine how many 16 byte slots we're going to fill using SSE movs.
+ size_t slots = size / XMM_REGSIZE_BYTES;
+
+ while (slots-- > 0)
+ {
+ emit->emitIns_AR_R(INS_movdqu, EA_8BYTE, tmpReg, dstAddr->gtRegNum, offset);
+ offset += XMM_REGSIZE_BYTES;
+ }
+ }
+
+ // Fill the remainder (or a < 16 byte sized struct)
+ if ((size & 8) != 0)
+ {
+#ifdef _TARGET_X86_
+ // TODO-X86-CQ: [1091735] Revisit block ops codegen. One example: use movq for 8 byte movs.
+ emit->emitIns_AR_R(INS_mov, EA_4BYTE, valReg, dstAddr->gtRegNum, offset);
+ offset += 4;
+ emit->emitIns_AR_R(INS_mov, EA_4BYTE, valReg, dstAddr->gtRegNum, offset);
+ offset += 4;
+#else // !_TARGET_X86_
+ emit->emitIns_AR_R(INS_mov, EA_8BYTE, valReg, dstAddr->gtRegNum, offset);
+ offset += 8;
+#endif // !_TARGET_X86_
+ }
+ if ((size & 4) != 0)
+ {
+ emit->emitIns_AR_R(INS_mov, EA_4BYTE, valReg, dstAddr->gtRegNum, offset);
+ offset += 4;
+ }
+ if ((size & 2) != 0)
+ {
+ emit->emitIns_AR_R(INS_mov, EA_2BYTE, valReg, dstAddr->gtRegNum, offset);
+ offset += 2;
+ }
+ if ((size & 1) != 0)
+ {
+ emit->emitIns_AR_R(INS_mov, EA_1BYTE, valReg, dstAddr->gtRegNum, offset);
+ }
+}
+
+// Generates code for InitBlk by calling the VM memset helper function.
+// Preconditions:
+// a) The size argument of the InitBlk is not an integer constant.
+// b) The size argument of the InitBlk is >= INITBLK_STOS_LIMIT bytes.
+void CodeGen::genCodeForInitBlk(GenTreeBlk* initBlkNode)
+{
+#ifdef _TARGET_AMD64_
+ // Make sure we got the arguments of the initblk operation in the right registers
+ unsigned blockSize = initBlkNode->Size();
+ GenTreePtr dstAddr = initBlkNode->Addr();
+ GenTreePtr initVal = initBlkNode->Data();
+
+ assert(!dstAddr->isContained());
+ assert(!initVal->isContained());
+
+ if (blockSize != 0)
+ {
+ assert(blockSize >= CPBLK_MOVS_LIMIT);
+ }
+
+ genConsumeBlockOp(initBlkNode, REG_ARG_0, REG_ARG_1, REG_ARG_2);
+
+ genEmitHelperCall(CORINFO_HELP_MEMSET, 0, EA_UNKNOWN);
+#else // !_TARGET_AMD64_
+ NYI_X86("Helper call for InitBlk");
+#endif // !_TARGET_AMD64_
+}
+
+// Generate code for a load from some address + offset
+// baseNode: tree node which can be either a local address or arbitrary node
+// offset: distance from the baseNode from which to load
+void CodeGen::genCodeForLoadOffset(instruction ins, emitAttr size, regNumber dst, GenTree* baseNode, unsigned offset)
+{
+ emitter* emit = getEmitter();
+
+ if (baseNode->OperIsLocalAddr())
+ {
+ if (baseNode->gtOper == GT_LCL_FLD_ADDR)
+ {
+ offset += baseNode->gtLclFld.gtLclOffs;
+ }
+ emit->emitIns_R_S(ins, size, dst, baseNode->gtLclVarCommon.gtLclNum, offset);
+ }
+ else
+ {
+ emit->emitIns_R_AR(ins, size, dst, baseNode->gtRegNum, offset);
+ }
+}
+
+//------------------------------------------------------------------------
+// genCodeForStoreOffset: Generate code to store a reg to [base + offset].
+//
+// Arguments:
+// ins - the instruction to generate.
+// size - the size that needs to be stored.
+// src - the register which needs to be stored.
+// baseNode - the base, relative to which to store the src register.
+// offset - the offset that is added to the baseNode to calculate the address to store into.
+//
+void CodeGen::genCodeForStoreOffset(instruction ins, emitAttr size, regNumber src, GenTree* baseNode, unsigned offset)
+{
+ emitter* emit = getEmitter();
+
+ if (baseNode->OperIsLocalAddr())
+ {
+ if (baseNode->gtOper == GT_LCL_FLD_ADDR)
+ {
+ offset += baseNode->gtLclFld.gtLclOffs;
+ }
+
+ emit->emitIns_S_R(ins, size, src, baseNode->AsLclVarCommon()->GetLclNum(), offset);
+ }
+ else
+ {
+ emit->emitIns_AR_R(ins, size, src, baseNode->gtRegNum, offset);
+ }
+}
+
+// Generates CpBlk code by performing a loop unroll
+// Preconditions:
+// The size argument of the CpBlk node is a constant and <= 64 bytes.
+// This may seem small but covers >95% of the cases in several framework assemblies.
+//
+void CodeGen::genCodeForCpBlkUnroll(GenTreeBlk* cpBlkNode)
+{
+ // Make sure we got the arguments of the cpblk operation in the right registers
+ unsigned size = cpBlkNode->Size();
+ GenTreePtr dstAddr = cpBlkNode->Addr();
+ GenTreePtr source = cpBlkNode->Data();
+ GenTreePtr srcAddr = nullptr;
+ assert(size <= CPBLK_UNROLL_LIMIT);
+
+ emitter* emit = getEmitter();
+
+ if (source->gtOper == GT_IND)
+ {
+ srcAddr = source->gtGetOp1();
+ if (!srcAddr->isContained())
+ {
+ genConsumeReg(srcAddr);
+ }
+ }
+ else
+ {
+ noway_assert(source->IsLocal());
+ // TODO-Cleanup: Consider making the addrForm() method in Rationalize public, e.g. in GenTree.
+ // OR: transform source to GT_IND(GT_LCL_VAR_ADDR)
+ if (source->OperGet() == GT_LCL_VAR)
+ {
+ source->SetOper(GT_LCL_VAR_ADDR);
+ }
+ else
+ {
+ assert(source->OperGet() == GT_LCL_FLD);
+ source->SetOper(GT_LCL_FLD_ADDR);
+ }
+ srcAddr = source;
+ }
+
+ if (!dstAddr->isContained())
+ {
+ genConsumeReg(dstAddr);
+ }
+
+ unsigned offset = 0;
+
+ // If the size of this struct is larger than 16 bytes
+ // let's use SSE2 to be able to do 16 byte at a time
+ // loads and stores.
+
+ if (size >= XMM_REGSIZE_BYTES)
+ {
+ assert(cpBlkNode->gtRsvdRegs != RBM_NONE);
+ regNumber xmmReg = genRegNumFromMask(cpBlkNode->gtRsvdRegs & RBM_ALLFLOAT);
+ assert(genIsValidFloatReg(xmmReg));
+ size_t slots = size / XMM_REGSIZE_BYTES;
+
+ // TODO: In the below code the load and store instructions are for 16 bytes, but the
+ // type is EA_8BYTE. The movdqa/u are 16 byte instructions, so it works, but
+ // this probably needs to be changed.
+ while (slots-- > 0)
+ {
+ // Load
+ genCodeForLoadOffset(INS_movdqu, EA_8BYTE, xmmReg, srcAddr, offset);
+ // Store
+ genCodeForStoreOffset(INS_movdqu, EA_8BYTE, xmmReg, dstAddr, offset);
+ offset += XMM_REGSIZE_BYTES;
+ }
+ }
+
+ // Fill the remainder (15 bytes or less) if there's one.
+ if ((size & 0xf) != 0)
+ {
+ // Grab the integer temp register to emit the remaining loads and stores.
+ regNumber tmpReg = genRegNumFromMask(cpBlkNode->gtRsvdRegs & RBM_ALLINT);
+
+ if ((size & 8) != 0)
+ {
+#ifdef _TARGET_X86_
+ // TODO-X86-CQ: [1091735] Revisit block ops codegen. One example: use movq for 8 byte movs.
+ for (unsigned savedOffs = offset; offset < savedOffs + 8; offset += 4)
+ {
+ genCodeForLoadOffset(INS_mov, EA_4BYTE, tmpReg, srcAddr, offset);
+ genCodeForStoreOffset(INS_mov, EA_4BYTE, tmpReg, dstAddr, offset);
+ }
+#else // !_TARGET_X86_
+ genCodeForLoadOffset(INS_mov, EA_8BYTE, tmpReg, srcAddr, offset);
+ genCodeForStoreOffset(INS_mov, EA_8BYTE, tmpReg, dstAddr, offset);
+ offset += 8;
+#endif // !_TARGET_X86_
+ }
+ if ((size & 4) != 0)
+ {
+ genCodeForLoadOffset(INS_mov, EA_4BYTE, tmpReg, srcAddr, offset);
+ genCodeForStoreOffset(INS_mov, EA_4BYTE, tmpReg, dstAddr, offset);
+ offset += 4;
+ }
+ if ((size & 2) != 0)
+ {
+ genCodeForLoadOffset(INS_mov, EA_2BYTE, tmpReg, srcAddr, offset);
+ genCodeForStoreOffset(INS_mov, EA_2BYTE, tmpReg, dstAddr, offset);
+ offset += 2;
+ }
+ if ((size & 1) != 0)
+ {
+ genCodeForLoadOffset(INS_mov, EA_1BYTE, tmpReg, srcAddr, offset);
+ genCodeForStoreOffset(INS_mov, EA_1BYTE, tmpReg, dstAddr, offset);
+ }
+ }
+}
+
+// Generate code for CpBlk by using rep movs
+// Preconditions:
+// The size argument of the CpBlk is a constant and is between
+// CPBLK_UNROLL_LIMIT and CPBLK_MOVS_LIMIT bytes.
+void CodeGen::genCodeForCpBlkRepMovs(GenTreeBlk* cpBlkNode)
+{
+ // Make sure we got the arguments of the cpblk operation in the right registers
+ unsigned size = cpBlkNode->Size();
+ GenTreePtr dstAddr = cpBlkNode->Addr();
+ GenTreePtr source = cpBlkNode->Data();
+ GenTreePtr srcAddr = nullptr;
+
+#ifdef DEBUG
+ assert(!dstAddr->isContained());
+ assert(source->isContained());
+
+#ifdef _TARGET_X86_
+ if (size == 0)
+ {
+ noway_assert(cpBlkNode->OperGet() == GT_STORE_DYN_BLK);
+ }
+ else
+#endif
+ {
+#ifdef _TARGET_X64_
+ assert(size > CPBLK_UNROLL_LIMIT && size < CPBLK_MOVS_LIMIT);
+#else
+ assert(size > CPBLK_UNROLL_LIMIT);
+#endif
+ }
+#endif // DEBUG
+
+ genConsumeBlockOp(cpBlkNode, REG_RDI, REG_RSI, REG_RCX);
+ instGen(INS_r_movsb);
+}
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+//---------------------------------------------------------------------------------------------------------------//
+// genStructPutArgUnroll: Generates code for passing a struct arg on stack by value using loop unrolling.
+//
+// Arguments:
+// putArgNode - the PutArgStk tree.
+// baseVarNum - the base var number, relative to which the by-val struct will be copied on the stack.
+//
+// TODO-Amd64-Unix: Try to share code with copyblk.
+// Need refactoring of copyblk before it could be used for putarg_stk.
+// The difference for now is that a putarg_stk contains its children, while cpyblk does not.
+// This creates differences in code. After some significant refactoring it could be reused.
+//
+void CodeGen::genStructPutArgUnroll(GenTreePutArgStk* putArgNode, unsigned baseVarNum)
+{
+ // We will never call this method for SIMD types, which are stored directly
+ // in genPutStructArgStk().
+ noway_assert(putArgNode->TypeGet() == TYP_STRUCT);
+
+ // Make sure we got the arguments of the cpblk operation in the right registers
+ GenTreePtr dstAddr = putArgNode;
+ GenTreePtr src = putArgNode->gtOp.gtOp1;
+
+ size_t size = putArgNode->getArgSize();
+ assert(size <= CPBLK_UNROLL_LIMIT);
+
+ emitter* emit = getEmitter();
+ unsigned putArgOffset = putArgNode->getArgOffset();
+
+ assert(src->isContained());
+
+ assert(src->gtOper == GT_OBJ);
+
+ if (!src->gtOp.gtOp1->isContained())
+ {
+ genConsumeReg(src->gtOp.gtOp1);
+ }
+
+ unsigned offset = 0;
+
+ // If the size of this struct is larger than 16 bytes
+ // let's use SSE2 to be able to do 16 byte at a time
+ // loads and stores.
+ if (size >= XMM_REGSIZE_BYTES)
+ {
+ assert(putArgNode->gtRsvdRegs != RBM_NONE);
+ regNumber xmmReg = genRegNumFromMask(putArgNode->gtRsvdRegs & RBM_ALLFLOAT);
+ assert(genIsValidFloatReg(xmmReg));
+ size_t slots = size / XMM_REGSIZE_BYTES;
+
+ assert(putArgNode->gtGetOp1()->isContained());
+ assert(putArgNode->gtGetOp1()->gtOp.gtOper == GT_OBJ);
+
+ // TODO: In the below code the load and store instructions are for 16 bytes, but the
+ // type is EA_8BYTE. The movdqa/u are 16 byte instructions, so it works, but
+ // this probably needs to be changed.
+ while (slots-- > 0)
+ {
+ // Load
+ genCodeForLoadOffset(INS_movdqu, EA_8BYTE, xmmReg, src->gtGetOp1(),
+ offset); // Load the address of the child of the Obj node.
+
+ // Store
+ emit->emitIns_S_R(INS_movdqu, EA_8BYTE, xmmReg, baseVarNum, putArgOffset + offset);
+
+ offset += XMM_REGSIZE_BYTES;
+ }
+ }
+
+ // Fill the remainder (15 bytes or less) if there's one.
+ if ((size & 0xf) != 0)
+ {
+ // Grab the integer temp register to emit the remaining loads and stores.
+ regNumber tmpReg = genRegNumFromMask(putArgNode->gtRsvdRegs & RBM_ALLINT);
+ assert(genIsValidIntReg(tmpReg));
+
+ if ((size & 8) != 0)
+ {
+ genCodeForLoadOffset(INS_mov, EA_8BYTE, tmpReg, src->gtOp.gtOp1, offset);
+
+ emit->emitIns_S_R(INS_mov, EA_8BYTE, tmpReg, baseVarNum, putArgOffset + offset);
+
+ offset += 8;
+ }
+
+ if ((size & 4) != 0)
+ {
+ genCodeForLoadOffset(INS_mov, EA_4BYTE, tmpReg, src->gtOp.gtOp1, offset);
+
+ emit->emitIns_S_R(INS_mov, EA_4BYTE, tmpReg, baseVarNum, putArgOffset + offset);
+
+ offset += 4;
+ }
+
+ if ((size & 2) != 0)
+ {
+ genCodeForLoadOffset(INS_mov, EA_2BYTE, tmpReg, src->gtOp.gtOp1, offset);
+
+ emit->emitIns_S_R(INS_mov, EA_2BYTE, tmpReg, baseVarNum, putArgOffset + offset);
+
+ offset += 2;
+ }
+
+ if ((size & 1) != 0)
+ {
+ genCodeForLoadOffset(INS_mov, EA_1BYTE, tmpReg, src->gtOp.gtOp1, offset);
+ emit->emitIns_S_R(INS_mov, EA_1BYTE, tmpReg, baseVarNum, putArgOffset + offset);
+ }
+ }
+}
+
+//------------------------------------------------------------------------
+// genStructPutArgRepMovs: Generates code for passing a struct arg by value on stack using Rep Movs.
+//
+// Arguments:
+// putArgNode - the PutArgStk tree.
+// baseVarNum - the base var number, relative to which the by-val struct bits will go.
+//
+// Preconditions:
+// The size argument of the PutArgStk (for structs) is a constant and is between
+// CPBLK_UNROLL_LIMIT and CPBLK_MOVS_LIMIT bytes.
+//
+void CodeGen::genStructPutArgRepMovs(GenTreePutArgStk* putArgNode, unsigned baseVarNum)
+{
+ assert(putArgNode->TypeGet() == TYP_STRUCT);
+ assert(putArgNode->getArgSize() > CPBLK_UNROLL_LIMIT);
+ assert(baseVarNum != BAD_VAR_NUM);
+
+ // Make sure we got the arguments of the cpblk operation in the right registers
+ GenTreePtr dstAddr = putArgNode;
+ GenTreePtr srcAddr = putArgNode->gtGetOp1();
+
+ // Validate state.
+ assert(putArgNode->gtRsvdRegs == (RBM_RDI | RBM_RCX | RBM_RSI));
+ assert(srcAddr->isContained());
+
+ genConsumePutStructArgStk(putArgNode, REG_RDI, REG_RSI, REG_RCX, baseVarNum);
+ instGen(INS_r_movsb);
+}
+
+//------------------------------------------------------------------------
+// If any Vector3 args are on stack and they are not pass-by-ref, the upper 32bits
+// must be cleared to zeroes. The native compiler doesn't clear the upper bits
+// and there is no way to know if the caller is native or not. So, the upper
+// 32 bits of Vector argument on stack are always cleared to zero.
+#ifdef FEATURE_SIMD
+void CodeGen::genClearStackVec3ArgUpperBits()
+{
+#ifdef DEBUG
+ if (verbose)
+ printf("*************** In genClearStackVec3ArgUpperBits()\n");
+#endif
+
+ assert(compiler->compGeneratingProlog);
+
+ unsigned varNum = 0;
+
+ for (unsigned varNum = 0; varNum < compiler->info.compArgsCount; varNum++)
+ {
+ LclVarDsc* varDsc = &(compiler->lvaTable[varNum]);
+ assert(varDsc->lvIsParam);
+
+ // Does var has simd12 type?
+ if (varDsc->lvType != TYP_SIMD12)
+ {
+ continue;
+ }
+
+ if (!varDsc->lvIsRegArg)
+ {
+ // Clear the upper 32 bits by mov dword ptr [V_ARG_BASE+0xC], 0
+ getEmitter()->emitIns_S_I(ins_Store(TYP_INT), EA_4BYTE, varNum, genTypeSize(TYP_FLOAT) * 3, 0);
+ }
+ else
+ {
+ // Assume that for x64 linux, an argument is fully in registers
+ // or fully on stack.
+ regNumber argReg = varDsc->GetOtherArgReg();
+
+ // Clear the upper 32 bits by two shift instructions.
+ // argReg = argReg << 96
+ getEmitter()->emitIns_R_I(INS_pslldq, emitActualTypeSize(TYP_SIMD12), argReg, 12);
+ // argReg = argReg >> 96
+ getEmitter()->emitIns_R_I(INS_psrldq, emitActualTypeSize(TYP_SIMD12), argReg, 12);
+ }
+ }
+}
+#endif // FEATURE_SIMD
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+// Generate code for CpObj nodes wich copy structs that have interleaved
+// GC pointers.
+// This will generate a sequence of movsq instructions for the cases of non-gc members
+// and calls to the BY_REF_ASSIGN helper otherwise.
+void CodeGen::genCodeForCpObj(GenTreeObj* cpObjNode)
+{
+ // Make sure we got the arguments of the cpobj operation in the right registers
+ GenTreePtr dstAddr = cpObjNode->Addr();
+ GenTreePtr source = cpObjNode->Data();
+ GenTreePtr srcAddr = nullptr;
+ bool sourceIsLocal = false;
+
+ assert(source->isContained());
+ if (source->gtOper == GT_IND)
+ {
+ srcAddr = source->gtGetOp1();
+ assert(!srcAddr->isContained());
+ }
+ else
+ {
+ noway_assert(source->IsLocal());
+ sourceIsLocal = true;
+ // TODO: Consider making the addrForm() method in Rationalize public, e.g. in GenTree.
+ // OR: transform source to GT_IND(GT_LCL_VAR_ADDR)
+ if (source->OperGet() == GT_LCL_VAR)
+ {
+ source->SetOper(GT_LCL_VAR_ADDR);
+ }
+ else
+ {
+ assert(source->OperGet() == GT_LCL_FLD);
+ source->SetOper(GT_LCL_FLD_ADDR);
+ }
+ srcAddr = source;
+ }
+
+ bool dstOnStack = dstAddr->OperIsLocalAddr();
+
+#ifdef DEBUG
+ bool isRepMovsqUsed = false;
+
+ assert(!dstAddr->isContained());
+
+ // If the GenTree node has data about GC pointers, this means we're dealing
+ // with CpObj, so this requires special logic.
+ assert(cpObjNode->gtGcPtrCount > 0);
+
+ // MovSq instruction is used for copying non-gcref fields and it needs
+ // src = RSI and dst = RDI.
+ // Either these registers must not contain lclVars, or they must be dying or marked for spill.
+ // This is because these registers are incremented as we go through the struct.
+ GenTree* actualSrcAddr = srcAddr->gtSkipReloadOrCopy();
+ GenTree* actualDstAddr = dstAddr->gtSkipReloadOrCopy();
+ unsigned srcLclVarNum = BAD_VAR_NUM;
+ unsigned dstLclVarNum = BAD_VAR_NUM;
+ bool isSrcAddrLiveOut = false;
+ bool isDstAddrLiveOut = false;
+ if (genIsRegCandidateLocal(actualSrcAddr))
+ {
+ srcLclVarNum = actualSrcAddr->AsLclVarCommon()->gtLclNum;
+ isSrcAddrLiveOut = ((actualSrcAddr->gtFlags & (GTF_VAR_DEATH | GTF_SPILL)) == 0);
+ }
+ if (genIsRegCandidateLocal(actualDstAddr))
+ {
+ dstLclVarNum = actualDstAddr->AsLclVarCommon()->gtLclNum;
+ isDstAddrLiveOut = ((actualDstAddr->gtFlags & (GTF_VAR_DEATH | GTF_SPILL)) == 0);
+ }
+ assert((actualSrcAddr->gtRegNum != REG_RSI) || !isSrcAddrLiveOut ||
+ ((srcLclVarNum == dstLclVarNum) && !isDstAddrLiveOut));
+ assert((actualDstAddr->gtRegNum != REG_RDI) || !isDstAddrLiveOut ||
+ ((srcLclVarNum == dstLclVarNum) && !isSrcAddrLiveOut));
+#endif // DEBUG
+
+ // Consume these registers.
+ // They may now contain gc pointers (depending on their type; gcMarkRegPtrVal will "do the right thing").
+ if (sourceIsLocal)
+ {
+ inst_RV_TT(INS_lea, REG_RSI, source, 0, EA_BYREF);
+ genConsumeBlockOp(cpObjNode, REG_RDI, REG_NA, REG_NA);
+ }
+ else
+ {
+ genConsumeBlockOp(cpObjNode, REG_RDI, REG_RSI, REG_NA);
+ }
+ gcInfo.gcMarkRegPtrVal(REG_RSI, srcAddr->TypeGet());
+ gcInfo.gcMarkRegPtrVal(REG_RDI, dstAddr->TypeGet());
+
+ unsigned slots = cpObjNode->gtSlots;
+
+ // If we can prove it's on the stack we don't need to use the write barrier.
+ if (dstOnStack)
+ {
+ if (slots >= CPOBJ_NONGC_SLOTS_LIMIT)
+ {
+#ifdef DEBUG
+ // If the destination of the CpObj is on the stack
+ // make sure we allocated RCX to emit rep movsq.
+ regNumber tmpReg = genRegNumFromMask(cpObjNode->gtRsvdRegs & RBM_ALLINT);
+ assert(tmpReg == REG_RCX);
+ isRepMovsqUsed = true;
+#endif // DEBUG
+
+ getEmitter()->emitIns_R_I(INS_mov, EA_4BYTE, REG_RCX, slots);
+ instGen(INS_r_movsq);
+ }
+ else
+ {
+ // For small structs, it's better to emit a sequence of movsq than to
+ // emit a rep movsq instruction.
+ while (slots > 0)
+ {
+ instGen(INS_movsq);
+ slots--;
+ }
+ }
+ }
+ else
+ {
+ BYTE* gcPtrs = cpObjNode->gtGcPtrs;
+ unsigned gcPtrCount = cpObjNode->gtGcPtrCount;
+
+ unsigned i = 0;
+ while (i < slots)
+ {
+ switch (gcPtrs[i])
+ {
+ case TYPE_GC_NONE:
+ // Let's see if we can use rep movsq instead of a sequence of movsq instructions
+ // to save cycles and code size.
+ {
+ unsigned nonGcSlotCount = 0;
+
+ do
+ {
+ nonGcSlotCount++;
+ i++;
+ } while (i < slots && gcPtrs[i] == TYPE_GC_NONE);
+
+ // If we have a very small contiguous non-gc region, it's better just to
+ // emit a sequence of movsq instructions
+ if (nonGcSlotCount < CPOBJ_NONGC_SLOTS_LIMIT)
+ {
+ while (nonGcSlotCount > 0)
+ {
+ instGen(INS_movsq);
+ nonGcSlotCount--;
+ }
+ }
+ else
+ {
+#ifdef DEBUG
+ // Otherwise, we can save code-size and improve CQ by emitting
+ // rep movsq
+ regNumber tmpReg = genRegNumFromMask(cpObjNode->gtRsvdRegs & RBM_ALLINT);
+ assert(tmpReg == REG_RCX);
+ isRepMovsqUsed = true;
+#endif // DEBUG
+ getEmitter()->emitIns_R_I(INS_mov, EA_4BYTE, REG_RCX, nonGcSlotCount);
+ instGen(INS_r_movsq);
+ }
+ }
+ break;
+ default:
+ // We have a GC pointer, call the memory barrier.
+ genEmitHelperCall(CORINFO_HELP_ASSIGN_BYREF, 0, EA_PTRSIZE);
+ gcPtrCount--;
+ i++;
+ }
+ }
+
+ assert(gcPtrCount == 0);
+ }
+
+ // Clear the gcInfo for RSI and RDI.
+ // While we normally update GC info prior to the last instruction that uses them,
+ // these actually live into the helper call.
+ gcInfo.gcMarkRegSetNpt(RBM_RSI);
+ gcInfo.gcMarkRegSetNpt(RBM_RDI);
+}
+
+// Generate code for a CpBlk node by the means of the VM memcpy helper call
+// Preconditions:
+// a) The size argument of the CpBlk is not an integer constant
+// b) The size argument is a constant but is larger than CPBLK_MOVS_LIMIT bytes.
+void CodeGen::genCodeForCpBlk(GenTreeBlk* cpBlkNode)
+{
+#ifdef _TARGET_AMD64_
+ // Make sure we got the arguments of the cpblk operation in the right registers
+ unsigned blockSize = cpBlkNode->Size();
+ GenTreePtr dstAddr = cpBlkNode->Addr();
+ GenTreePtr source = cpBlkNode->Data();
+ GenTreePtr srcAddr = nullptr;
+
+ // Size goes in arg2
+ if (blockSize != 0)
+ {
+ assert(blockSize >= CPBLK_MOVS_LIMIT);
+ assert((cpBlkNode->gtRsvdRegs & RBM_ARG_2) != 0);
+ }
+ else
+ {
+ noway_assert(cpBlkNode->gtOper == GT_STORE_DYN_BLK);
+ }
+
+ // Source address goes in arg1
+ if (source->gtOper == GT_IND)
+ {
+ srcAddr = source->gtGetOp1();
+ assert(!srcAddr->isContained());
+ }
+ else
+ {
+ noway_assert(source->IsLocal());
+ assert((cpBlkNode->gtRsvdRegs & RBM_ARG_1) != 0);
+ inst_RV_TT(INS_lea, REG_ARG_1, source, 0, EA_BYREF);
+ }
+
+ genConsumeBlockOp(cpBlkNode, REG_ARG_0, REG_ARG_1, REG_ARG_2);
+
+ genEmitHelperCall(CORINFO_HELP_MEMCPY, 0, EA_UNKNOWN);
+#else // !_TARGET_AMD64_
+ noway_assert(false && "Helper call for CpBlk is not needed.");
+#endif // !_TARGET_AMD64_
+}
+
+// generate code do a switch statement based on a table of ip-relative offsets
+void CodeGen::genTableBasedSwitch(GenTree* treeNode)
+{
+ genConsumeOperands(treeNode->AsOp());
+ regNumber idxReg = treeNode->gtOp.gtOp1->gtRegNum;
+ regNumber baseReg = treeNode->gtOp.gtOp2->gtRegNum;
+
+ regNumber tmpReg = genRegNumFromMask(treeNode->gtRsvdRegs);
+
+ // load the ip-relative offset (which is relative to start of fgFirstBB)
+ getEmitter()->emitIns_R_ARX(INS_mov, EA_4BYTE, baseReg, baseReg, idxReg, 4, 0);
+
+ // add it to the absolute address of fgFirstBB
+ compiler->fgFirstBB->bbFlags |= BBF_JMP_TARGET;
+ getEmitter()->emitIns_R_L(INS_lea, EA_PTR_DSP_RELOC, compiler->fgFirstBB, tmpReg);
+ getEmitter()->emitIns_R_R(INS_add, EA_PTRSIZE, baseReg, tmpReg);
+ // jmp baseReg
+ getEmitter()->emitIns_R(INS_i_jmp, emitTypeSize(TYP_I_IMPL), baseReg);
+}
+
+// emits the table and an instruction to get the address of the first element
+void CodeGen::genJumpTable(GenTree* treeNode)
+{
+ noway_assert(compiler->compCurBB->bbJumpKind == BBJ_SWITCH);
+ assert(treeNode->OperGet() == GT_JMPTABLE);
+
+ unsigned jumpCount = compiler->compCurBB->bbJumpSwt->bbsCount;
+ BasicBlock** jumpTable = compiler->compCurBB->bbJumpSwt->bbsDstTab;
+ unsigned jmpTabOffs;
+ unsigned jmpTabBase;
+
+ jmpTabBase = getEmitter()->emitBBTableDataGenBeg(jumpCount, true);
+
+ jmpTabOffs = 0;
+
+ JITDUMP("\n J_M%03u_DS%02u LABEL DWORD\n", Compiler::s_compMethodsCount, jmpTabBase);
+
+ for (unsigned i = 0; i < jumpCount; i++)
+ {
+ BasicBlock* target = *jumpTable++;
+ noway_assert(target->bbFlags & BBF_JMP_TARGET);
+
+ JITDUMP(" DD L_M%03u_BB%02u\n", Compiler::s_compMethodsCount, target->bbNum);
+
+ getEmitter()->emitDataGenData(i, target);
+ };
+
+ getEmitter()->emitDataGenEnd();
+
+ // Access to inline data is 'abstracted' by a special type of static member
+ // (produced by eeFindJitDataOffs) which the emitter recognizes as being a reference
+ // to constant data, not a real static field.
+ getEmitter()->emitIns_R_C(INS_lea, emitTypeSize(TYP_I_IMPL), treeNode->gtRegNum,
+ compiler->eeFindJitDataOffs(jmpTabBase), 0);
+ genProduceReg(treeNode);
+}
+
+// generate code for the locked operations:
+// GT_LOCKADD, GT_XCHG, GT_XADD
+void CodeGen::genLockedInstructions(GenTree* treeNode)
+{
+ GenTree* data = treeNode->gtOp.gtOp2;
+ GenTree* addr = treeNode->gtOp.gtOp1;
+ regNumber targetReg = treeNode->gtRegNum;
+ regNumber dataReg = data->gtRegNum;
+ regNumber addrReg = addr->gtRegNum;
+ instruction ins;
+
+ // all of these nodes implicitly do an indirection on op1
+ // so create a temporary node to feed into the pattern matching
+ GenTreeIndir i = indirForm(data->TypeGet(), addr);
+ genConsumeReg(addr);
+
+ // The register allocator should have extended the lifetime of the address
+ // so that it is not used as the target.
+ noway_assert(addrReg != targetReg);
+
+ // If data is a lclVar that's not a last use, we'd better have allocated a register
+ // for the result (except in the case of GT_LOCKADD which does not produce a register result).
+ assert(targetReg != REG_NA || treeNode->OperGet() == GT_LOCKADD || !genIsRegCandidateLocal(data) ||
+ (data->gtFlags & GTF_VAR_DEATH) != 0);
+
+ genConsumeIfReg(data);
+ if (targetReg != REG_NA && dataReg != REG_NA && dataReg != targetReg)
+ {
+ inst_RV_RV(ins_Copy(data->TypeGet()), targetReg, dataReg);
+ data->gtRegNum = targetReg;
+
+ // TODO-XArch-Cleanup: Consider whether it is worth it, for debugging purposes, to restore the
+ // original gtRegNum on data, after calling emitInsBinary below.
+ }
+ switch (treeNode->OperGet())
+ {
+ case GT_LOCKADD:
+ instGen(INS_lock);
+ ins = INS_add;
+ break;
+ case GT_XCHG:
+ // lock is implied by xchg
+ ins = INS_xchg;
+ break;
+ case GT_XADD:
+ instGen(INS_lock);
+ ins = INS_xadd;
+ break;
+ default:
+ unreached();
+ }
+ getEmitter()->emitInsBinary(ins, emitTypeSize(data), &i, data);
+
+ if (treeNode->gtRegNum != REG_NA)
+ {
+ genProduceReg(treeNode);
+ }
+}
+
+// generate code for BoundsCheck nodes
+void CodeGen::genRangeCheck(GenTreePtr oper)
+{
+#ifdef FEATURE_SIMD
+ noway_assert(oper->OperGet() == GT_ARR_BOUNDS_CHECK || oper->OperGet() == GT_SIMD_CHK);
+#else // !FEATURE_SIMD
+ noway_assert(oper->OperGet() == GT_ARR_BOUNDS_CHECK);
+#endif // !FEATURE_SIMD
+
+ GenTreeBoundsChk* bndsChk = oper->AsBoundsChk();
+
+ GenTreePtr arrLen = bndsChk->gtArrLen;
+ GenTreePtr arrIndex = bndsChk->gtIndex;
+ GenTreePtr arrRef = nullptr;
+ int lenOffset = 0;
+
+ GenTree * src1, *src2;
+ emitJumpKind jmpKind;
+
+ genConsumeRegs(arrLen);
+ genConsumeRegs(arrIndex);
+
+ if (arrIndex->isContainedIntOrIImmed())
+ {
+ // arrIndex is a contained constant. In this case
+ // we will generate one of the following
+ // cmp [mem], immed (if arrLen is a memory op)
+ // cmp reg, immed (if arrLen is in a reg)
+ //
+ // That is arrLen cannot be a contained immed.
+ assert(!arrLen->isContainedIntOrIImmed());
+
+ src1 = arrLen;
+ src2 = arrIndex;
+ jmpKind = EJ_jbe;
+ }
+ else
+ {
+ // arrIndex could either be a contained memory op or a reg
+ // In this case we will generate one of the following
+ // cmp [mem], immed (if arrLen is a constant)
+ // cmp [mem], reg (if arrLen is in a reg)
+ // cmp reg, immed (if arrIndex is in a reg)
+ // cmp reg1, reg2 (if arraIndex is in reg1)
+ // cmp reg, [mem] (if arrLen is a memory op)
+ //
+ // That is only one of arrIndex or arrLen can be a memory op.
+ assert(!arrIndex->isContainedMemoryOp() || !arrLen->isContainedMemoryOp());
+
+ src1 = arrIndex;
+ src2 = arrLen;
+ jmpKind = EJ_jae;
+ }
+
+ var_types bndsChkType = src2->TypeGet();
+#if DEBUG
+ // Bounds checks can only be 32 or 64 bit sized comparisons.
+ assert(bndsChkType == TYP_INT || bndsChkType == TYP_LONG);
+
+ // The type of the bounds check should always wide enough to compare against the index.
+ assert(emitTypeSize(bndsChkType) >= emitTypeSize(src1->TypeGet()));
+#endif // DEBUG
+
+ getEmitter()->emitInsBinary(INS_cmp, emitTypeSize(bndsChkType), src1, src2);
+ genJumpToThrowHlpBlk(jmpKind, bndsChk->gtThrowKind, bndsChk->gtIndRngFailBB);
+}
+
+//------------------------------------------------------------------------
+// genOffsetOfMDArrayLowerBound: Returns the offset from the Array object to the
+// lower bound for the given dimension.
+//
+// Arguments:
+// elemType - the element type of the array
+// rank - the rank of the array
+// dimension - the dimension for which the lower bound offset will be returned.
+//
+// Return Value:
+// The offset.
+
+unsigned CodeGen::genOffsetOfMDArrayLowerBound(var_types elemType, unsigned rank, unsigned dimension)
+{
+ // Note that the lower bound and length fields of the Array object are always TYP_INT, even on 64-bit targets.
+ return compiler->eeGetArrayDataOffset(elemType) + genTypeSize(TYP_INT) * (dimension + rank);
+}
+
+//------------------------------------------------------------------------
+// genOffsetOfMDArrayLength: Returns the offset from the Array object to the
+// size for the given dimension.
+//
+// Arguments:
+// elemType - the element type of the array
+// rank - the rank of the array
+// dimension - the dimension for which the lower bound offset will be returned.
+//
+// Return Value:
+// The offset.
+
+unsigned CodeGen::genOffsetOfMDArrayDimensionSize(var_types elemType, unsigned rank, unsigned dimension)
+{
+ // Note that the lower bound and length fields of the Array object are always TYP_INT, even on 64-bit targets.
+ return compiler->eeGetArrayDataOffset(elemType) + genTypeSize(TYP_INT) * dimension;
+}
+
+//------------------------------------------------------------------------
+// genCodeForArrIndex: Generates code to bounds check the index for one dimension of an array reference,
+// producing the effective index by subtracting the lower bound.
+//
+// Arguments:
+// arrIndex - the node for which we're generating code
+//
+// Return Value:
+// None.
+//
+
+void CodeGen::genCodeForArrIndex(GenTreeArrIndex* arrIndex)
+{
+ GenTreePtr arrObj = arrIndex->ArrObj();
+ GenTreePtr indexNode = arrIndex->IndexExpr();
+
+ regNumber arrReg = genConsumeReg(arrObj);
+ regNumber indexReg = genConsumeReg(indexNode);
+ regNumber tgtReg = arrIndex->gtRegNum;
+
+ unsigned dim = arrIndex->gtCurrDim;
+ unsigned rank = arrIndex->gtArrRank;
+ var_types elemType = arrIndex->gtArrElemType;
+
+ noway_assert(tgtReg != REG_NA);
+
+ // Subtract the lower bound for this dimension.
+ // TODO-XArch-CQ: make this contained if it's an immediate that fits.
+ if (tgtReg != indexReg)
+ {
+ inst_RV_RV(INS_mov, tgtReg, indexReg, indexNode->TypeGet());
+ }
+ getEmitter()->emitIns_R_AR(INS_sub, emitActualTypeSize(TYP_INT), tgtReg, arrReg,
+ genOffsetOfMDArrayLowerBound(elemType, rank, dim));
+ getEmitter()->emitIns_R_AR(INS_cmp, emitActualTypeSize(TYP_INT), tgtReg, arrReg,
+ genOffsetOfMDArrayDimensionSize(elemType, rank, dim));
+ genJumpToThrowHlpBlk(EJ_jae, SCK_RNGCHK_FAIL);
+
+ genProduceReg(arrIndex);
+}
+
+//------------------------------------------------------------------------
+// genCodeForArrOffset: Generates code to compute the flattened array offset for
+// one dimension of an array reference:
+// result = (prevDimOffset * dimSize) + effectiveIndex
+// where dimSize is obtained from the arrObj operand
+//
+// Arguments:
+// arrOffset - the node for which we're generating code
+//
+// Return Value:
+// None.
+//
+// Notes:
+// dimSize and effectiveIndex are always non-negative, the former by design,
+// and the latter because it has been normalized to be zero-based.
+
+void CodeGen::genCodeForArrOffset(GenTreeArrOffs* arrOffset)
+{
+ GenTreePtr offsetNode = arrOffset->gtOffset;
+ GenTreePtr indexNode = arrOffset->gtIndex;
+ GenTreePtr arrObj = arrOffset->gtArrObj;
+
+ regNumber tgtReg = arrOffset->gtRegNum;
+
+ noway_assert(tgtReg != REG_NA);
+
+ unsigned dim = arrOffset->gtCurrDim;
+ unsigned rank = arrOffset->gtArrRank;
+ var_types elemType = arrOffset->gtArrElemType;
+
+ // We will use a temp register for the offset*scale+effectiveIndex computation.
+ regMaskTP tmpRegMask = arrOffset->gtRsvdRegs;
+ regNumber tmpReg = genRegNumFromMask(tmpRegMask);
+
+ // First, consume the operands in the correct order.
+ regNumber offsetReg = REG_NA;
+ if (!offsetNode->IsIntegralConst(0))
+ {
+ offsetReg = genConsumeReg(offsetNode);
+ }
+ else
+ {
+ assert(offsetNode->isContained());
+ }
+ regNumber indexReg = genConsumeReg(indexNode);
+ // Although arrReg may not be used in the constant-index case, if we have generated
+ // the value into a register, we must consume it, otherwise we will fail to end the
+ // live range of the gc ptr.
+ // TODO-CQ: Currently arrObj will always have a register allocated to it.
+ // We could avoid allocating a register for it, which would be of value if the arrObj
+ // is an on-stack lclVar.
+ regNumber arrReg = REG_NA;
+ if (arrObj->gtHasReg())
+ {
+ arrReg = genConsumeReg(arrObj);
+ }
+
+ if (!offsetNode->IsIntegralConst(0))
+ {
+ // Evaluate tgtReg = offsetReg*dim_size + indexReg.
+ // tmpReg is used to load dim_size and the result of the multiplication.
+ // Note that dim_size will never be negative.
+
+ getEmitter()->emitIns_R_AR(INS_mov, emitActualTypeSize(TYP_INT), tmpReg, arrReg,
+ genOffsetOfMDArrayDimensionSize(elemType, rank, dim));
+ inst_RV_RV(INS_imul, tmpReg, offsetReg);
+
+ if (tmpReg == tgtReg)
+ {
+ inst_RV_RV(INS_add, tmpReg, indexReg);
+ }
+ else
+ {
+ if (indexReg != tgtReg)
+ {
+ inst_RV_RV(INS_mov, tgtReg, indexReg, TYP_I_IMPL);
+ }
+ inst_RV_RV(INS_add, tgtReg, tmpReg);
+ }
+ }
+ else
+ {
+ if (indexReg != tgtReg)
+ {
+ inst_RV_RV(INS_mov, tgtReg, indexReg, TYP_INT);
+ }
+ }
+ genProduceReg(arrOffset);
+}
+
+// make a temporary indir we can feed to pattern matching routines
+// in cases where we don't want to instantiate all the indirs that happen
+//
+GenTreeIndir CodeGen::indirForm(var_types type, GenTree* base)
+{
+ GenTreeIndir i(GT_IND, type, base, nullptr);
+ i.gtRegNum = REG_NA;
+ // has to be nonnull (because contained nodes can't be the last in block)
+ // but don't want it to be a valid pointer
+ i.gtNext = (GenTree*)(-1);
+ return i;
+}
+
+// make a temporary int we can feed to pattern matching routines
+// in cases where we don't want to instantiate
+//
+GenTreeIntCon CodeGen::intForm(var_types type, ssize_t value)
+{
+ GenTreeIntCon i(type, value);
+ i.gtRegNum = REG_NA;
+ // has to be nonnull (because contained nodes can't be the last in block)
+ // but don't want it to be a valid pointer
+ i.gtNext = (GenTree*)(-1);
+ return i;
+}
+
+instruction CodeGen::genGetInsForOper(genTreeOps oper, var_types type)
+{
+ instruction ins;
+
+ // Operations on SIMD vectors shouldn't come this path
+ assert(!varTypeIsSIMD(type));
+ if (varTypeIsFloating(type))
+ {
+ return ins_MathOp(oper, type);
+ }
+
+ switch (oper)
+ {
+ case GT_ADD:
+ ins = INS_add;
+ break;
+ case GT_AND:
+ ins = INS_and;
+ break;
+ case GT_LSH:
+ ins = INS_shl;
+ break;
+ case GT_MUL:
+ ins = INS_imul;
+ break;
+ case GT_NEG:
+ ins = INS_neg;
+ break;
+ case GT_NOT:
+ ins = INS_not;
+ break;
+ case GT_OR:
+ ins = INS_or;
+ break;
+ case GT_ROL:
+ ins = INS_rol;
+ break;
+ case GT_ROR:
+ ins = INS_ror;
+ break;
+ case GT_RSH:
+ ins = INS_sar;
+ break;
+ case GT_RSZ:
+ ins = INS_shr;
+ break;
+ case GT_SUB:
+ ins = INS_sub;
+ break;
+ case GT_XOR:
+ ins = INS_xor;
+ break;
+#if !defined(_TARGET_64BIT_)
+ case GT_ADD_LO:
+ ins = INS_add;
+ break;
+ case GT_ADD_HI:
+ ins = INS_adc;
+ break;
+ case GT_SUB_LO:
+ ins = INS_sub;
+ break;
+ case GT_SUB_HI:
+ ins = INS_sbb;
+ break;
+#endif // !defined(_TARGET_64BIT_)
+ default:
+ unreached();
+ break;
+ }
+ return ins;
+}
+
+//------------------------------------------------------------------------
+// genCodeForShift: Generates the code sequence for a GenTree node that
+// represents a bit shift or rotate operation (<<, >>, >>>, rol, ror).
+//
+// Arguments:
+// tree - the bit shift node (that specifies the type of bit shift to perform).
+//
+// Assumptions:
+// a) All GenTrees are register allocated.
+// b) The shift-by-amount in tree->gtOp.gtOp2 is either a contained constant or
+// it's a register-allocated expression. If it is in a register that is
+// not RCX, it will be moved to RCX (so RCX better not be in use!).
+//
+void CodeGen::genCodeForShift(GenTreePtr tree)
+{
+ // Only the non-RMW case here.
+ assert(tree->OperIsShiftOrRotate());
+ assert(!tree->gtOp.gtOp1->isContained());
+ assert(tree->gtRegNum != REG_NA);
+
+ genConsumeOperands(tree->AsOp());
+
+ var_types targetType = tree->TypeGet();
+ instruction ins = genGetInsForOper(tree->OperGet(), targetType);
+
+ GenTreePtr operand = tree->gtGetOp1();
+ regNumber operandReg = operand->gtRegNum;
+
+ GenTreePtr shiftBy = tree->gtGetOp2();
+ if (shiftBy->isContainedIntOrIImmed())
+ {
+ // First, move the operand to the destination register and
+ // later on perform the shift in-place.
+ // (LSRA will try to avoid this situation through preferencing.)
+ if (tree->gtRegNum != operandReg)
+ {
+ inst_RV_RV(INS_mov, tree->gtRegNum, operandReg, targetType);
+ }
+
+ int shiftByValue = (int)shiftBy->AsIntConCommon()->IconValue();
+ inst_RV_SH(ins, emitTypeSize(tree), tree->gtRegNum, shiftByValue);
+ }
+ else
+ {
+ // We must have the number of bits to shift stored in ECX, since we constrained this node to
+ // sit in ECX. In case this didn't happen, LSRA expects the code generator to move it since it's a single
+ // register destination requirement.
+ regNumber shiftReg = shiftBy->gtRegNum;
+ if (shiftReg != REG_RCX)
+ {
+ // Issue the mov to RCX:
+ inst_RV_RV(INS_mov, REG_RCX, shiftReg, shiftBy->TypeGet());
+ }
+
+ // The operand to be shifted must not be in ECX
+ noway_assert(operandReg != REG_RCX);
+
+ if (tree->gtRegNum != operandReg)
+ {
+ inst_RV_RV(INS_mov, tree->gtRegNum, operandReg, targetType);
+ }
+ inst_RV_CL(ins, tree->gtRegNum, targetType);
+ }
+
+ genProduceReg(tree);
+}
+
+//------------------------------------------------------------------------
+// genCodeForShiftRMW: Generates the code sequence for a GT_STOREIND GenTree node that
+// represents a RMW bit shift or rotate operation (<<, >>, >>>, rol, ror), for example:
+// GT_STOREIND( AddressTree, GT_SHL( Ind ( AddressTree ), Operand ) )
+//
+// Arguments:
+// storeIndNode: the GT_STOREIND node.
+//
+void CodeGen::genCodeForShiftRMW(GenTreeStoreInd* storeInd)
+{
+ GenTree* data = storeInd->Data();
+ GenTree* addr = storeInd->Addr();
+
+ assert(data->OperIsShiftOrRotate());
+
+ // This function only handles the RMW case.
+ assert(data->gtOp.gtOp1->isContained());
+ assert(data->gtOp.gtOp1->isIndir());
+ assert(Lowering::IndirsAreEquivalent(data->gtOp.gtOp1, storeInd));
+ assert(data->gtRegNum == REG_NA);
+
+ var_types targetType = data->TypeGet();
+ genTreeOps oper = data->OperGet();
+ instruction ins = genGetInsForOper(oper, targetType);
+ emitAttr attr = EA_ATTR(genTypeSize(targetType));
+
+ GenTree* shiftBy = data->gtOp.gtOp2;
+ if (shiftBy->isContainedIntOrIImmed())
+ {
+ int shiftByValue = (int)shiftBy->AsIntConCommon()->IconValue();
+ ins = genMapShiftInsToShiftByConstantIns(ins, shiftByValue);
+ if (shiftByValue == 1)
+ {
+ // There is no source in this case, as the shift by count is embedded in the instruction opcode itself.
+ getEmitter()->emitInsRMW(ins, attr, storeInd);
+ }
+ else
+ {
+ getEmitter()->emitInsRMW(ins, attr, storeInd, shiftBy);
+ }
+ }
+ else
+ {
+ // We must have the number of bits to shift stored in ECX, since we constrained this node to
+ // sit in ECX. In case this didn't happen, LSRA expects the code generator to move it since it's a single
+ // register destination requirement.
+ regNumber shiftReg = shiftBy->gtRegNum;
+ if (shiftReg != REG_RCX)
+ {
+ // Issue the mov to RCX:
+ inst_RV_RV(INS_mov, REG_RCX, shiftReg, shiftBy->TypeGet());
+ }
+
+ // The shiftBy operand is implicit, so call the unary version of emitInsRMW.
+ getEmitter()->emitInsRMW(ins, attr, storeInd);
+ }
+}
+
+void CodeGen::genUnspillRegIfNeeded(GenTree* tree)
+{
+ regNumber dstReg = tree->gtRegNum;
+ GenTree* unspillTree = tree;
+
+ if (tree->gtOper == GT_RELOAD)
+ {
+ unspillTree = tree->gtOp.gtOp1;
+ }
+
+ if ((unspillTree->gtFlags & GTF_SPILLED) != 0)
+ {
+ if (genIsRegCandidateLocal(unspillTree))
+ {
+ // Reset spilled flag, since we are going to load a local variable from its home location.
+ unspillTree->gtFlags &= ~GTF_SPILLED;
+
+ GenTreeLclVarCommon* lcl = unspillTree->AsLclVarCommon();
+ LclVarDsc* varDsc = &compiler->lvaTable[lcl->gtLclNum];
+
+ // Load local variable from its home location.
+ // In most cases the tree type will indicate the correct type to use for the load.
+ // However, if it is NOT a normalizeOnLoad lclVar (i.e. NOT a small int that always gets
+ // widened when loaded into a register), and its size is not the same as genActualType of
+ // the type of the lclVar, then we need to change the type of the tree node when loading.
+ // This situation happens due to "optimizations" that avoid a cast and
+ // simply retype the node when using long type lclVar as an int.
+ // While loading the int in that case would work for this use of the lclVar, if it is
+ // later used as a long, we will have incorrectly truncated the long.
+ // In the normalizeOnLoad case ins_Load will return an appropriate sign- or zero-
+ // extending load.
+
+ var_types treeType = unspillTree->TypeGet();
+ if (treeType != genActualType(varDsc->lvType) && !varTypeIsGC(treeType) && !varDsc->lvNormalizeOnLoad())
+ {
+ assert(!varTypeIsGC(varDsc));
+ var_types spillType = genActualType(varDsc->lvType);
+ unspillTree->gtType = spillType;
+ inst_RV_TT(ins_Load(spillType, compiler->isSIMDTypeLocalAligned(lcl->gtLclNum)), dstReg, unspillTree);
+ unspillTree->gtType = treeType;
+ }
+ else
+ {
+ inst_RV_TT(ins_Load(treeType, compiler->isSIMDTypeLocalAligned(lcl->gtLclNum)), dstReg, unspillTree);
+ }
+
+ unspillTree->SetInReg();
+
+ // TODO-Review: We would like to call:
+ // genUpdateRegLife(varDsc, /*isBorn*/ true, /*isDying*/ false DEBUGARG(tree));
+ // instead of the following code, but this ends up hitting this assert:
+ // assert((regSet.rsMaskVars & regMask) == 0);
+ // due to issues with LSRA resolution moves.
+ // So, just force it for now. This probably indicates a condition that creates a GC hole!
+ //
+ // Extra note: I think we really want to call something like gcInfo.gcUpdateForRegVarMove,
+ // because the variable is not really going live or dead, but that method is somewhat poorly
+ // factored because it, in turn, updates rsMaskVars which is part of RegSet not GCInfo.
+ // TODO-Cleanup: This code exists in other CodeGen*.cpp files, and should be moved to CodeGenCommon.cpp.
+
+ // Don't update the variable's location if we are just re-spilling it again.
+
+ if ((unspillTree->gtFlags & GTF_SPILL) == 0)
+ {
+ genUpdateVarReg(varDsc, tree);
+#ifdef DEBUG
+ if (VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex))
+ {
+ JITDUMP("\t\t\t\t\t\t\tRemoving V%02u from gcVarPtrSetCur\n", lcl->gtLclNum);
+ }
+#endif // DEBUG
+ VarSetOps::RemoveElemD(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex);
+
+#ifdef DEBUG
+ if (compiler->verbose)
+ {
+ printf("\t\t\t\t\t\t\tV%02u in reg ", lcl->gtLclNum);
+ varDsc->PrintVarReg();
+ printf(" is becoming live ");
+ compiler->printTreeID(unspillTree);
+ printf("\n");
+ }
+#endif // DEBUG
+
+ regSet.AddMaskVars(genGetRegMask(varDsc));
+ }
+
+ gcInfo.gcMarkRegPtrVal(dstReg, unspillTree->TypeGet());
+ }
+ else if (unspillTree->IsMultiRegCall())
+ {
+ GenTreeCall* call = unspillTree->AsCall();
+ ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc();
+ unsigned regCount = retTypeDesc->GetReturnRegCount();
+ GenTreeCopyOrReload* reloadTree = nullptr;
+ if (tree->OperGet() == GT_RELOAD)
+ {
+ reloadTree = tree->AsCopyOrReload();
+ }
+
+ // In case of multi-reg call node, GTF_SPILLED flag on it indicates that
+ // one or more of its result regs are spilled. Call node needs to be
+ // queried to know which specific result regs to be unspilled.
+ for (unsigned i = 0; i < regCount; ++i)
+ {
+ unsigned flags = call->GetRegSpillFlagByIdx(i);
+ if ((flags & GTF_SPILLED) != 0)
+ {
+ var_types dstType = retTypeDesc->GetReturnRegType(i);
+ regNumber unspillTreeReg = call->GetRegNumByIdx(i);
+
+ if (reloadTree != nullptr)
+ {
+ dstReg = reloadTree->GetRegNumByIdx(i);
+ if (dstReg == REG_NA)
+ {
+ dstReg = unspillTreeReg;
+ }
+ }
+ else
+ {
+ dstReg = unspillTreeReg;
+ }
+
+ TempDsc* t = regSet.rsUnspillInPlace(call, unspillTreeReg, i);
+ getEmitter()->emitIns_R_S(ins_Load(dstType), emitActualTypeSize(dstType), dstReg, t->tdTempNum(),
+ 0);
+ compiler->tmpRlsTemp(t);
+ gcInfo.gcMarkRegPtrVal(dstReg, dstType);
+ }
+ }
+
+ unspillTree->gtFlags &= ~GTF_SPILLED;
+ unspillTree->SetInReg();
+ }
+ else
+ {
+ TempDsc* t = regSet.rsUnspillInPlace(unspillTree, unspillTree->gtRegNum);
+ getEmitter()->emitIns_R_S(ins_Load(unspillTree->gtType), emitActualTypeSize(unspillTree->TypeGet()), dstReg,
+ t->tdTempNum(), 0);
+ compiler->tmpRlsTemp(t);
+
+ unspillTree->gtFlags &= ~GTF_SPILLED;
+ unspillTree->SetInReg();
+ gcInfo.gcMarkRegPtrVal(dstReg, unspillTree->TypeGet());
+ }
+ }
+}
+
+// Do Liveness update for a subnodes that is being consumed by codegen
+// including the logic for reload in case is needed and also takes care
+// of locating the value on the desired register.
+void CodeGen::genConsumeRegAndCopy(GenTree* tree, regNumber needReg)
+{
+ if (needReg == REG_NA)
+ {
+ return;
+ }
+ regNumber treeReg = genConsumeReg(tree);
+ if (treeReg != needReg)
+ {
+ inst_RV_RV(INS_mov, needReg, treeReg, tree->TypeGet());
+ }
+}
+
+void CodeGen::genRegCopy(GenTree* treeNode)
+{
+ assert(treeNode->OperGet() == GT_COPY);
+ GenTree* op1 = treeNode->gtOp.gtOp1;
+
+ if (op1->IsMultiRegCall())
+ {
+ genConsumeReg(op1);
+
+ GenTreeCopyOrReload* copyTree = treeNode->AsCopyOrReload();
+ GenTreeCall* call = op1->AsCall();
+ ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc();
+ unsigned regCount = retTypeDesc->GetReturnRegCount();
+
+ for (unsigned i = 0; i < regCount; ++i)
+ {
+ var_types type = retTypeDesc->GetReturnRegType(i);
+ regNumber fromReg = call->GetRegNumByIdx(i);
+ regNumber toReg = copyTree->GetRegNumByIdx(i);
+
+ // A Multi-reg GT_COPY node will have valid reg only for those
+ // positions that corresponding result reg of call node needs
+ // to be copied.
+ if (toReg != REG_NA)
+ {
+ assert(toReg != fromReg);
+ inst_RV_RV(ins_Copy(type), toReg, fromReg, type);
+ }
+ }
+ }
+ else
+ {
+ var_types targetType = treeNode->TypeGet();
+ regNumber targetReg = treeNode->gtRegNum;
+ assert(targetReg != REG_NA);
+
+ // Check whether this node and the node from which we're copying the value have
+ // different register types. This can happen if (currently iff) we have a SIMD
+ // vector type that fits in an integer register, in which case it is passed as
+ // an argument, or returned from a call, in an integer register and must be
+ // copied if it's in an xmm register.
+
+ bool srcFltReg = (varTypeIsFloating(op1) || varTypeIsSIMD(op1));
+ bool tgtFltReg = (varTypeIsFloating(treeNode) || varTypeIsSIMD(treeNode));
+ if (srcFltReg != tgtFltReg)
+ {
+ instruction ins;
+ regNumber fpReg;
+ regNumber intReg;
+ if (tgtFltReg)
+ {
+ ins = ins_CopyIntToFloat(op1->TypeGet(), treeNode->TypeGet());
+ fpReg = targetReg;
+ intReg = op1->gtRegNum;
+ }
+ else
+ {
+ ins = ins_CopyFloatToInt(op1->TypeGet(), treeNode->TypeGet());
+ intReg = targetReg;
+ fpReg = op1->gtRegNum;
+ }
+ inst_RV_RV(ins, fpReg, intReg, targetType);
+ }
+ else
+ {
+ inst_RV_RV(ins_Copy(targetType), targetReg, genConsumeReg(op1), targetType);
+ }
+
+ if (op1->IsLocal())
+ {
+ // The lclVar will never be a def.
+ // If it is a last use, the lclVar will be killed by genConsumeReg(), as usual, and genProduceReg will
+ // appropriately set the gcInfo for the copied value.
+ // If not, there are two cases we need to handle:
+ // - If this is a TEMPORARY copy (indicated by the GTF_VAR_DEATH flag) the variable
+ // will remain live in its original register.
+ // genProduceReg() will appropriately set the gcInfo for the copied value,
+ // and genConsumeReg will reset it.
+ // - Otherwise, we need to update register info for the lclVar.
+
+ GenTreeLclVarCommon* lcl = op1->AsLclVarCommon();
+ assert((lcl->gtFlags & GTF_VAR_DEF) == 0);
+
+ if ((lcl->gtFlags & GTF_VAR_DEATH) == 0 && (treeNode->gtFlags & GTF_VAR_DEATH) == 0)
+ {
+ LclVarDsc* varDsc = &compiler->lvaTable[lcl->gtLclNum];
+
+ // If we didn't just spill it (in genConsumeReg, above), then update the register info
+ if (varDsc->lvRegNum != REG_STK)
+ {
+ // The old location is dying
+ genUpdateRegLife(varDsc, /*isBorn*/ false, /*isDying*/ true DEBUGARG(op1));
+
+ gcInfo.gcMarkRegSetNpt(genRegMask(op1->gtRegNum));
+
+ genUpdateVarReg(varDsc, treeNode);
+
+ // The new location is going live
+ genUpdateRegLife(varDsc, /*isBorn*/ true, /*isDying*/ false DEBUGARG(treeNode));
+ }
+ }
+ }
+ }
+
+ genProduceReg(treeNode);
+}
+
+// Check that registers are consumed in the right order for the current node being generated.
+#ifdef DEBUG
+void CodeGen::genCheckConsumeNode(GenTree* treeNode)
+{
+ // GT_PUTARG_REG is consumed out of order.
+ if (treeNode->gtSeqNum != 0 && treeNode->OperGet() != GT_PUTARG_REG)
+ {
+ if (lastConsumedNode != nullptr)
+ {
+ if (treeNode == lastConsumedNode)
+ {
+ if (verbose)
+ {
+ printf("Node was consumed twice:\n ");
+ compiler->gtDispTree(treeNode, nullptr, nullptr, true);
+ }
+ }
+ else
+ {
+ if (verbose && (lastConsumedNode->gtSeqNum > treeNode->gtSeqNum))
+ {
+ printf("Nodes were consumed out-of-order:\n");
+ compiler->gtDispTree(lastConsumedNode, nullptr, nullptr, true);
+ compiler->gtDispTree(treeNode, nullptr, nullptr, true);
+ }
+ // assert(lastConsumedNode->gtSeqNum < treeNode->gtSeqNum);
+ }
+ }
+ lastConsumedNode = treeNode;
+ }
+}
+#endif // DEBUG
+
+//--------------------------------------------------------------------
+// genConsumeReg: Do liveness update for a subnode that is being
+// consumed by codegen.
+//
+// Arguments:
+// tree - GenTree node
+//
+// Return Value:
+// Returns the reg number of tree.
+// In case of multi-reg call node returns the first reg number
+// of the multi-reg return.
+regNumber CodeGen::genConsumeReg(GenTree* tree)
+{
+ if (tree->OperGet() == GT_COPY)
+ {
+ genRegCopy(tree);
+ }
+
+ // Handle the case where we have a lclVar that needs to be copied before use (i.e. because it
+ // interferes with one of the other sources (or the target, if it's a "delayed use" register)).
+ // TODO-Cleanup: This is a special copyReg case in LSRA - consider eliminating these and
+ // always using GT_COPY to make the lclVar location explicit.
+ // Note that we have to do this before calling genUpdateLife because otherwise if we spill it
+ // the lvRegNum will be set to REG_STK and we will lose track of what register currently holds
+ // the lclVar (normally when a lclVar is spilled it is then used from its former register
+ // location, which matches the gtRegNum on the node).
+ // (Note that it doesn't matter if we call this before or after genUnspillRegIfNeeded
+ // because if it's on the stack it will always get reloaded into tree->gtRegNum).
+ if (genIsRegCandidateLocal(tree))
+ {
+ GenTreeLclVarCommon* lcl = tree->AsLclVarCommon();
+ LclVarDsc* varDsc = &compiler->lvaTable[lcl->GetLclNum()];
+ if (varDsc->lvRegNum != REG_STK && varDsc->lvRegNum != tree->gtRegNum)
+ {
+ inst_RV_RV(INS_mov, tree->gtRegNum, varDsc->lvRegNum);
+ }
+ }
+
+ genUnspillRegIfNeeded(tree);
+
+ // genUpdateLife() will also spill local var if marked as GTF_SPILL by calling CodeGen::genSpillVar
+ genUpdateLife(tree);
+
+ assert(tree->gtHasReg());
+
+ // there are three cases where consuming a reg means clearing the bit in the live mask
+ // 1. it was not produced by a local
+ // 2. it was produced by a local that is going dead
+ // 3. it was produced by a local that does not live in that reg (like one allocated on the stack)
+
+ if (genIsRegCandidateLocal(tree))
+ {
+ GenTreeLclVarCommon* lcl = tree->AsLclVarCommon();
+ LclVarDsc* varDsc = &compiler->lvaTable[lcl->GetLclNum()];
+ assert(varDsc->lvLRACandidate);
+
+ if ((tree->gtFlags & GTF_VAR_DEATH) != 0)
+ {
+ gcInfo.gcMarkRegSetNpt(genRegMask(varDsc->lvRegNum));
+ }
+ else if (varDsc->lvRegNum == REG_STK)
+ {
+ // We have loaded this into a register only temporarily
+ gcInfo.gcMarkRegSetNpt(genRegMask(tree->gtRegNum));
+ }
+ }
+ else
+ {
+ gcInfo.gcMarkRegSetNpt(tree->gtGetRegMask());
+ }
+
+ genCheckConsumeNode(tree);
+ return tree->gtRegNum;
+}
+
+// Do liveness update for an address tree: one of GT_LEA, GT_LCL_VAR, or GT_CNS_INT (for call indirect).
+void CodeGen::genConsumeAddress(GenTree* addr)
+{
+ if (!addr->isContained())
+ {
+ genConsumeReg(addr);
+ }
+ else if (addr->OperGet() == GT_LEA)
+ {
+ genConsumeAddrMode(addr->AsAddrMode());
+ }
+}
+
+// do liveness update for a subnode that is being consumed by codegen
+void CodeGen::genConsumeAddrMode(GenTreeAddrMode* addr)
+{
+ genConsumeOperands(addr);
+}
+
+void CodeGen::genConsumeRegs(GenTree* tree)
+{
+#if !defined(_TARGET_64BIT_)
+ if (tree->OperGet() == GT_LONG)
+ {
+ genConsumeRegs(tree->gtGetOp1());
+ genConsumeRegs(tree->gtGetOp2());
+ return;
+ }
+#endif // !defined(_TARGET_64BIT_)
+
+ if (tree->isContained())
+ {
+ if (tree->isContainedSpillTemp())
+ {
+ // spill temps are un-tracked and hence no need to update life
+ }
+ else if (tree->isIndir())
+ {
+ genConsumeAddress(tree->AsIndir()->Addr());
+ }
+ else if (tree->OperGet() == GT_AND)
+ {
+ // This is the special contained GT_AND that we created in Lowering::LowerCmp()
+ // Now we need to consume the operands of the GT_AND node.
+ genConsumeOperands(tree->AsOp());
+ }
+ else if (tree->OperGet() == GT_LCL_VAR)
+ {
+ // A contained lcl var must be living on stack and marked as reg optional.
+ unsigned varNum = tree->AsLclVarCommon()->GetLclNum();
+ LclVarDsc* varDsc = compiler->lvaTable + varNum;
+
+ noway_assert(varDsc->lvRegNum == REG_STK);
+ noway_assert(tree->IsRegOptional());
+
+ // Update the life of reg optional lcl var.
+ genUpdateLife(tree);
+ }
+ else
+ {
+ assert(tree->OperIsLeaf());
+ }
+ }
+ else
+ {
+ genConsumeReg(tree);
+ }
+}
+
+//------------------------------------------------------------------------
+// genConsumeOperands: Do liveness update for the operands of a unary or binary tree
+//
+// Arguments:
+// tree - the GenTreeOp whose operands will have their liveness updated.
+//
+// Return Value:
+// None.
+//
+// Notes:
+// Note that this logic is localized here because we must do the liveness update in
+// the correct execution order. This is important because we may have two operands
+// that involve the same lclVar, and if one is marked "lastUse" we must handle it
+// after the first.
+
+void CodeGen::genConsumeOperands(GenTreeOp* tree)
+{
+ GenTree* firstOp = tree->gtOp1;
+ GenTree* secondOp = tree->gtOp2;
+ if ((tree->gtFlags & GTF_REVERSE_OPS) != 0)
+ {
+ assert(secondOp != nullptr);
+ firstOp = secondOp;
+ secondOp = tree->gtOp1;
+ }
+ if (firstOp != nullptr)
+ {
+ genConsumeRegs(firstOp);
+ }
+ if (secondOp != nullptr)
+ {
+ genConsumeRegs(secondOp);
+ }
+}
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+//------------------------------------------------------------------------
+// genConsumePutStructArgStk: Do liveness update for the operands of a PutArgStk node.
+// Also loads in the right register the addresses of the
+// src/dst for rep mov operation.
+//
+// Arguments:
+// putArgNode - the PUTARG_STK tree.
+// dstReg - the dstReg for the rep move operation.
+// srcReg - the srcReg for the rep move operation.
+// sizeReg - the sizeReg for the rep move operation.
+// baseVarNum - the varnum for the local used for placing the "by-value" args on the stack.
+//
+// Return Value:
+// None.
+//
+// Note: sizeReg can be REG_NA when this function is used to consume the dstReg and srcReg
+// for copying on the stack a struct with references.
+// The source address/offset is determined from the address on the GT_OBJ node, while
+// the destination address is the address contained in 'baseVarNum' plus the offset
+// provided in the 'putArgNode'.
+
+void CodeGen::genConsumePutStructArgStk(
+ GenTreePutArgStk* putArgNode, regNumber dstReg, regNumber srcReg, regNumber sizeReg, unsigned baseVarNum)
+{
+ assert(varTypeIsStruct(putArgNode));
+ assert(baseVarNum != BAD_VAR_NUM);
+
+ // The putArgNode children are always contained. We should not consume any registers.
+ assert(putArgNode->gtGetOp1()->isContained());
+
+ GenTree* dstAddr = putArgNode;
+
+ // Get the source address.
+ GenTree* src = putArgNode->gtGetOp1();
+ assert((src->gtOper == GT_OBJ) || ((src->gtOper == GT_IND && varTypeIsSIMD(src))));
+ GenTree* srcAddr = src->gtGetOp1();
+
+ size_t size = putArgNode->getArgSize();
+
+ assert(dstReg != REG_NA);
+ assert(srcReg != REG_NA);
+
+ // Consume the registers only if they are not contained or set to REG_NA.
+ if (srcAddr->gtRegNum != REG_NA)
+ {
+ genConsumeReg(srcAddr);
+ }
+
+ // If the op1 is already in the dstReg - nothing to do.
+ // Otherwise load the op1 (GT_ADDR) into the dstReg to copy the struct on the stack by value.
+ if (dstAddr->gtRegNum != dstReg)
+ {
+ // Generate LEA instruction to load the stack of the outgoing var + SlotNum offset (or the incoming arg area
+ // for tail calls) in RDI.
+ // Destination is always local (on the stack) - use EA_PTRSIZE.
+ getEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, dstReg, baseVarNum, putArgNode->getArgOffset());
+ }
+
+ if (srcAddr->gtRegNum != srcReg)
+ {
+ if (srcAddr->OperIsLocalAddr())
+ {
+ // The OperLocalAddr is always contained.
+ assert(srcAddr->isContained());
+ GenTreeLclVarCommon* lclNode = srcAddr->AsLclVarCommon();
+
+ // Generate LEA instruction to load the LclVar address in RSI.
+ // Source is known to be on the stack. Use EA_PTRSIZE.
+ unsigned int offset = 0;
+ if (srcAddr->OperGet() == GT_LCL_FLD_ADDR)
+ {
+ offset = srcAddr->AsLclFld()->gtLclOffs;
+ }
+ getEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, srcReg, lclNode->gtLclNum, offset);
+ }
+ else
+ {
+ assert(srcAddr->gtRegNum != REG_NA);
+ // Source is not known to be on the stack. Use EA_BYREF.
+ getEmitter()->emitIns_R_R(INS_mov, EA_BYREF, srcReg, srcAddr->gtRegNum);
+ }
+ }
+
+ if (sizeReg != REG_NA)
+ {
+ inst_RV_IV(INS_mov, sizeReg, size, EA_8BYTE);
+ }
+}
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+//------------------------------------------------------------------------
+// genConsumeBlockSize: Ensure that the block size is in the given register
+//
+// Arguments:
+// blkNode - The block node
+// sizeReg - The register into which the block's size should go
+//
+
+void CodeGen::genConsumeBlockSize(GenTreeBlk* blkNode, regNumber sizeReg)
+{
+ if (sizeReg != REG_NA)
+ {
+ unsigned blockSize = blkNode->Size();
+ if (blockSize != 0)
+ {
+ assert(blkNode->gtRsvdRegs == genRegMask(sizeReg));
+ genSetRegToIcon(sizeReg, blockSize);
+ }
+ else
+ {
+ noway_assert(blkNode->gtOper == GT_STORE_DYN_BLK);
+ genConsumeReg(blkNode->AsDynBlk()->gtDynamicSize);
+ }
+ }
+}
+
+//------------------------------------------------------------------------
+// genConsumeBlockDst: Ensure that the block destination address is in its
+// allocated register.
+// Arguments:
+// blkNode - The block node
+//
+
+void CodeGen::genConsumeBlockDst(GenTreeBlk* blkNode)
+{
+ GenTree* dstAddr = blkNode->Addr();
+ genConsumeReg(dstAddr);
+}
+
+//------------------------------------------------------------------------
+// genConsumeBlockSrc: Ensure that the block source address is in its
+// allocated register if it is non-local.
+// Arguments:
+// blkNode - The block node
+//
+// Return Value:
+// Returns the source address node, if it is non-local,
+// and nullptr otherwise.
+
+GenTree* CodeGen::genConsumeBlockSrc(GenTreeBlk* blkNode)
+{
+ GenTree* src = blkNode->Data();
+ if (blkNode->OperIsCopyBlkOp())
+ {
+ // For a CopyBlk we need the address of the source.
+ if (src->OperGet() == GT_IND)
+ {
+ src = src->gtOp.gtOp1;
+ }
+ else
+ {
+ // This must be a local.
+ // For this case, there is no source address register, as it is a
+ // stack-based address.
+ assert(src->OperIsLocal());
+ return nullptr;
+ }
+ }
+ genConsumeReg(src);
+ return src;
+}
+
+//------------------------------------------------------------------------
+// genConsumeBlockOp: Ensure that the block's operands are enregistered
+// as needed.
+// Arguments:
+// blkNode - The block node
+//
+// Notes:
+// This ensures that the operands are consumed in the proper order to
+// obey liveness modeling.
+
+void CodeGen::genConsumeBlockOp(GenTreeBlk* blkNode, regNumber dstReg, regNumber srcReg, regNumber sizeReg)
+{
+ // We have to consume the registers, and perform any copies, in the actual execution order.
+ // The nominal order is: dst, src, size. However this may have been changed
+ // with reverse flags on the blkNode and the setting of gtEvalSizeFirst in the case of a dynamic
+ // block size.
+ // Note that the register allocator ensures that the registers ON THE NODES will not interfere
+ // with one another if consumed (i.e. reloaded or moved to their ASSIGNED reg) in execution order.
+ // Further, it ensures that they will not interfere with one another if they are then copied
+ // to the REQUIRED register (if a fixed register requirement) in execution order. This requires,
+ // then, that we first consume all the operands, then do any necessary moves.
+
+ GenTree* dstAddr = blkNode->Addr();
+ GenTree* src = nullptr;
+ unsigned blockSize = blkNode->Size();
+ GenTree* size = nullptr;
+ bool evalSizeFirst = true;
+
+ if (blkNode->OperGet() == GT_STORE_DYN_BLK)
+ {
+ evalSizeFirst = blkNode->AsDynBlk()->gtEvalSizeFirst;
+ size = blkNode->AsDynBlk()->gtDynamicSize;
+ }
+
+ // First, consusme all the sources in order
+ if (evalSizeFirst)
+ {
+ genConsumeBlockSize(blkNode, sizeReg);
+ }
+ if (blkNode->IsReverseOp())
+ {
+ src = genConsumeBlockSrc(blkNode);
+ genConsumeBlockDst(blkNode);
+ }
+ else
+ {
+ genConsumeBlockDst(blkNode);
+ src = genConsumeBlockSrc(blkNode);
+ }
+ if (!evalSizeFirst)
+ {
+ genConsumeBlockSize(blkNode, sizeReg);
+ }
+ // Next, perform any necessary moves.
+ if (evalSizeFirst && (size != nullptr) && (size->gtRegNum != sizeReg))
+ {
+ inst_RV_RV(INS_mov, sizeReg, size->gtRegNum, size->TypeGet());
+ }
+ if (blkNode->IsReverseOp())
+ {
+ if ((src != nullptr) && (src->gtRegNum != srcReg))
+ {
+ inst_RV_RV(INS_mov, srcReg, src->gtRegNum, src->TypeGet());
+ }
+ if (dstAddr->gtRegNum != dstReg)
+ {
+ inst_RV_RV(INS_mov, dstReg, dstAddr->gtRegNum, dstAddr->TypeGet());
+ }
+ }
+ else
+ {
+ if (dstAddr->gtRegNum != dstReg)
+ {
+ inst_RV_RV(INS_mov, dstReg, dstAddr->gtRegNum, dstAddr->TypeGet());
+ }
+ if ((src != nullptr) && (src->gtRegNum != srcReg))
+ {
+ inst_RV_RV(INS_mov, srcReg, src->gtRegNum, src->TypeGet());
+ }
+ }
+ if (!evalSizeFirst && size != nullptr && (size->gtRegNum != sizeReg))
+ {
+ inst_RV_RV(INS_mov, sizeReg, size->gtRegNum, size->TypeGet());
+ }
+}
+
+//-------------------------------------------------------------------------
+// genProduceReg: do liveness update for register produced by the current
+// node in codegen.
+//
+// Arguments:
+// tree - Gentree node
+//
+// Return Value:
+// None.
+void CodeGen::genProduceReg(GenTree* tree)
+{
+ if (tree->gtFlags & GTF_SPILL)
+ {
+ // Code for GT_COPY node gets generated as part of consuming regs by its parent.
+ // A GT_COPY node in turn produces reg result and it should never be marked to
+ // spill.
+ //
+ // Similarly GT_RELOAD node gets generated as part of consuming regs by its
+ // parent and should never be marked for spilling.
+ noway_assert(!tree->IsCopyOrReload());
+
+ if (genIsRegCandidateLocal(tree))
+ {
+ // Store local variable to its home location.
+ tree->gtFlags &= ~GTF_REG_VAL;
+ // Ensure that lclVar stores are typed correctly.
+ unsigned varNum = tree->gtLclVarCommon.gtLclNum;
+ assert(!compiler->lvaTable[varNum].lvNormalizeOnStore() ||
+ (tree->TypeGet() == genActualType(compiler->lvaTable[varNum].TypeGet())));
+ inst_TT_RV(ins_Store(tree->gtType, compiler->isSIMDTypeLocalAligned(varNum)), tree, tree->gtRegNum);
+ }
+ else
+ {
+ // In case of multi-reg call node, spill flag on call node
+ // indicates that one or more of its allocated regs need to
+ // be spilled. Call node needs to be further queried to
+ // know which of its result regs needs to be spilled.
+ if (tree->IsMultiRegCall())
+ {
+ GenTreeCall* call = tree->AsCall();
+ ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc();
+ unsigned regCount = retTypeDesc->GetReturnRegCount();
+
+ for (unsigned i = 0; i < regCount; ++i)
+ {
+ unsigned flags = call->GetRegSpillFlagByIdx(i);
+ if ((flags & GTF_SPILL) != 0)
+ {
+ regNumber reg = call->GetRegNumByIdx(i);
+ call->SetInReg();
+ regSet.rsSpillTree(reg, call, i);
+ gcInfo.gcMarkRegSetNpt(genRegMask(reg));
+ }
+ }
+ }
+ else
+ {
+ tree->SetInReg();
+ regSet.rsSpillTree(tree->gtRegNum, tree);
+ gcInfo.gcMarkRegSetNpt(genRegMask(tree->gtRegNum));
+ }
+
+ tree->gtFlags |= GTF_SPILLED;
+ tree->gtFlags &= ~GTF_SPILL;
+
+ return;
+ }
+ }
+
+ genUpdateLife(tree);
+
+ // If we've produced a register, mark it as a pointer, as needed.
+ if (tree->gtHasReg())
+ {
+ // We only mark the register in the following cases:
+ // 1. It is not a register candidate local. In this case, we're producing a
+ // register from a local, but the local is not a register candidate. Thus,
+ // we must be loading it as a temp register, and any "last use" flag on
+ // the register wouldn't be relevant.
+ // 2. The register candidate local is going dead. There's no point to mark
+ // the register as live, with a GC pointer, if the variable is dead.
+ if (!genIsRegCandidateLocal(tree) || ((tree->gtFlags & GTF_VAR_DEATH) == 0))
+ {
+ // Multi-reg call node will produce more than one register result.
+ // Mark all the regs produced by call node.
+ if (tree->IsMultiRegCall())
+ {
+ GenTreeCall* call = tree->AsCall();
+ ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc();
+ unsigned regCount = retTypeDesc->GetReturnRegCount();
+
+ for (unsigned i = 0; i < regCount; ++i)
+ {
+ regNumber reg = call->GetRegNumByIdx(i);
+ var_types type = retTypeDesc->GetReturnRegType(i);
+ gcInfo.gcMarkRegPtrVal(reg, type);
+ }
+ }
+ else if (tree->IsCopyOrReloadOfMultiRegCall())
+ {
+ // we should never see reload of multi-reg call here
+ // because GT_RELOAD gets generated in reg consuming path.
+ noway_assert(tree->OperGet() == GT_COPY);
+
+ // A multi-reg GT_COPY node produces those regs to which
+ // copy has taken place.
+ GenTreeCopyOrReload* copy = tree->AsCopyOrReload();
+ GenTreeCall* call = copy->gtGetOp1()->AsCall();
+ ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc();
+ unsigned regCount = retTypeDesc->GetReturnRegCount();
+
+ for (unsigned i = 0; i < regCount; ++i)
+ {
+ var_types type = retTypeDesc->GetReturnRegType(i);
+ regNumber fromReg = call->GetRegNumByIdx(i);
+ regNumber toReg = copy->GetRegNumByIdx(i);
+
+ if (toReg != REG_NA)
+ {
+ gcInfo.gcMarkRegPtrVal(toReg, type);
+ }
+ }
+ }
+ else
+ {
+ gcInfo.gcMarkRegPtrVal(tree->gtRegNum, tree->TypeGet());
+ }
+ }
+ }
+ tree->SetInReg();
+}
+
+// transfer gc/byref status of src reg to dst reg
+void CodeGen::genTransferRegGCState(regNumber dst, regNumber src)
+{
+ regMaskTP srcMask = genRegMask(src);
+ regMaskTP dstMask = genRegMask(dst);
+
+ if (gcInfo.gcRegGCrefSetCur & srcMask)
+ {
+ gcInfo.gcMarkRegSetGCref(dstMask);
+ }
+ else if (gcInfo.gcRegByrefSetCur & srcMask)
+ {
+ gcInfo.gcMarkRegSetByref(dstMask);
+ }
+ else
+ {
+ gcInfo.gcMarkRegSetNpt(dstMask);
+ }
+}
+
+// generates an ip-relative call or indirect call via reg ('call reg')
+// pass in 'addr' for a relative call or 'base' for a indirect register call
+// methHnd - optional, only used for pretty printing
+// retSize - emitter type of return for GC purposes, should be EA_BYREF, EA_GCREF, or EA_PTRSIZE(not GC)
+void CodeGen::genEmitCall(int callType,
+ CORINFO_METHOD_HANDLE methHnd,
+ INDEBUG_LDISASM_COMMA(CORINFO_SIG_INFO* sigInfo) void* addr X86_ARG(ssize_t argSize),
+ emitAttr retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize),
+ IL_OFFSETX ilOffset,
+ regNumber base,
+ bool isJump,
+ bool isNoGC)
+{
+#if !defined(_TARGET_X86_)
+ ssize_t argSize = 0;
+#endif // !defined(_TARGET_X86_)
+ getEmitter()->emitIns_Call(emitter::EmitCallType(callType), methHnd, INDEBUG_LDISASM_COMMA(sigInfo) addr, argSize,
+ retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), gcInfo.gcVarPtrSetCur,
+ gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur, ilOffset, base, REG_NA, 0, 0, isJump,
+ emitter::emitNoGChelper(compiler->eeGetHelperNum(methHnd)));
+}
+
+// generates an indirect call via addressing mode (call []) given an indir node
+// methHnd - optional, only used for pretty printing
+// retSize - emitter type of return for GC purposes, should be EA_BYREF, EA_GCREF, or EA_PTRSIZE(not GC)
+void CodeGen::genEmitCall(int callType,
+ CORINFO_METHOD_HANDLE methHnd,
+ INDEBUG_LDISASM_COMMA(CORINFO_SIG_INFO* sigInfo) GenTreeIndir* indir X86_ARG(ssize_t argSize),
+ emitAttr retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize),
+ IL_OFFSETX ilOffset)
+{
+#if !defined(_TARGET_X86_)
+ ssize_t argSize = 0;
+#endif // !defined(_TARGET_X86_)
+ genConsumeAddress(indir->Addr());
+
+ getEmitter()->emitIns_Call(emitter::EmitCallType(callType), methHnd, INDEBUG_LDISASM_COMMA(sigInfo) nullptr,
+ argSize, retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize),
+ gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur, ilOffset,
+ indir->Base() ? indir->Base()->gtRegNum : REG_NA,
+ indir->Index() ? indir->Index()->gtRegNum : REG_NA, indir->Scale(), indir->Offset());
+}
+
+//------------------------------------------------------------------------
+// genStoreInd: Generate code for a GT_STOREIND node.
+//
+// Arguments:
+// treeNode - The GT_STOREIND node for which to generate code.
+//
+// Return Value:
+// none
+
+void CodeGen::genStoreInd(GenTreePtr node)
+{
+ assert(node->OperGet() == GT_STOREIND);
+
+#ifdef FEATURE_SIMD
+ // Storing Vector3 of size 12 bytes through indirection
+ if (node->TypeGet() == TYP_SIMD12)
+ {
+ genStoreIndTypeSIMD12(node);
+ return;
+ }
+#endif // FEATURE_SIMD
+
+ GenTreeStoreInd* storeInd = node->AsStoreInd();
+ GenTree* data = storeInd->Data();
+ GenTree* addr = storeInd->Addr();
+ var_types targetType = storeInd->TypeGet();
+
+ assert(!varTypeIsFloating(targetType) || (targetType == data->TypeGet()));
+
+ GCInfo::WriteBarrierForm writeBarrierForm = gcInfo.gcIsWriteBarrierCandidate(storeInd, data);
+ if (writeBarrierForm != GCInfo::WBF_NoBarrier)
+ {
+ // data and addr must be in registers.
+ // Consume both registers so that any copies of interfering registers are taken care of.
+ genConsumeOperands(storeInd->AsOp());
+
+ if (genEmitOptimizedGCWriteBarrier(writeBarrierForm, addr, data))
+ {
+ return;
+ }
+
+ // At this point, we should not have any interference.
+ // That is, 'data' must not be in REG_ARG_0, as that is where 'addr' must go.
+ noway_assert(data->gtRegNum != REG_ARG_0);
+
+ // addr goes in REG_ARG_0
+ if (addr->gtRegNum != REG_ARG_0)
+ {
+ inst_RV_RV(INS_mov, REG_ARG_0, addr->gtRegNum, addr->TypeGet());
+ }
+
+ // data goes in REG_ARG_1
+ if (data->gtRegNum != REG_ARG_1)
+ {
+ inst_RV_RV(INS_mov, REG_ARG_1, data->gtRegNum, data->TypeGet());
+ }
+
+ genGCWriteBarrier(storeInd, writeBarrierForm);
+ }
+ else
+ {
+ bool reverseOps = ((storeInd->gtFlags & GTF_REVERSE_OPS) != 0);
+ bool dataIsUnary = false;
+ bool isRMWMemoryOp = storeInd->IsRMWMemoryOp();
+ GenTree* rmwSrc = nullptr;
+
+ // We must consume the operands in the proper execution order, so that liveness is
+ // updated appropriately.
+ if (!reverseOps)
+ {
+ genConsumeAddress(addr);
+ }
+
+ // If storeInd represents a RMW memory op then its data is a non-leaf node marked as contained
+ // and non-indir operand of data is the source of RMW memory op.
+ if (isRMWMemoryOp)
+ {
+ assert(data->isContained() && !data->OperIsLeaf());
+
+ GenTreePtr rmwDst = nullptr;
+
+ dataIsUnary = (GenTree::OperIsUnary(data->OperGet()) != 0);
+ if (!dataIsUnary)
+ {
+ if (storeInd->IsRMWDstOp1())
+ {
+ rmwDst = data->gtGetOp1();
+ rmwSrc = data->gtGetOp2();
+ }
+ else
+ {
+ assert(storeInd->IsRMWDstOp2());
+ rmwDst = data->gtGetOp2();
+ rmwSrc = data->gtGetOp1();
+ }
+
+ genConsumeRegs(rmwSrc);
+ }
+ else
+ {
+ // *(p) = oper *(p): Here addr = p, rmwsrc=rmwDst = *(p) i.e. GT_IND(p)
+ // For unary RMW ops, src and dst of RMW memory op is the same. Lower
+ // clears operand counts on rmwSrc and we don't need to perform a
+ // genConsumeReg() on it.
+ assert(storeInd->IsRMWDstOp1());
+ rmwSrc = data->gtGetOp1();
+ rmwDst = data->gtGetOp1();
+ assert(rmwSrc->isContained());
+ }
+
+ assert(rmwSrc != nullptr);
+ assert(rmwDst != nullptr);
+ assert(Lowering::IndirsAreEquivalent(rmwDst, storeInd));
+ }
+ else
+ {
+ genConsumeRegs(data);
+ }
+
+ if (reverseOps)
+ {
+ genConsumeAddress(addr);
+ }
+
+ if (isRMWMemoryOp)
+ {
+ if (dataIsUnary)
+ {
+ // generate code for unary RMW memory ops like neg/not
+ getEmitter()->emitInsRMW(genGetInsForOper(data->OperGet(), data->TypeGet()), emitTypeSize(storeInd),
+ storeInd);
+ }
+ else
+ {
+ if (data->OperIsShiftOrRotate())
+ {
+ // Generate code for shift RMW memory ops.
+ // The data address needs to be op1 (it must be [addr] = [addr] <shift> <amount>, not [addr] =
+ // <amount> <shift> [addr]).
+ assert(storeInd->IsRMWDstOp1());
+ assert(rmwSrc == data->gtGetOp2());
+ genCodeForShiftRMW(storeInd);
+ }
+ else
+ {
+ // generate code for remaining binary RMW memory ops like add/sub/and/or/xor
+ getEmitter()->emitInsRMW(genGetInsForOper(data->OperGet(), data->TypeGet()), emitTypeSize(storeInd),
+ storeInd, rmwSrc);
+ }
+ }
+ }
+ else
+ {
+ getEmitter()->emitInsMov(ins_Store(data->TypeGet()), emitTypeSize(storeInd), storeInd);
+ }
+ }
+}
+
+//------------------------------------------------------------------------
+// genEmitOptimizedGCWriteBarrier: Generate write barrier store using the optimized
+// helper functions.
+//
+// Arguments:
+// writeBarrierForm - the write barrier form to use
+// addr - the address at which to do the store
+// data - the data to store
+//
+// Return Value:
+// true if an optimized write barrier form was used, false if not. If this
+// function returns false, the caller must emit a "standard" write barrier.
+
+bool CodeGen::genEmitOptimizedGCWriteBarrier(GCInfo::WriteBarrierForm writeBarrierForm, GenTree* addr, GenTree* data)
+{
+ assert(writeBarrierForm != GCInfo::WBF_NoBarrier);
+
+#if defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS
+ bool useOptimizedWriteBarriers = true;
+
+#ifdef DEBUG
+ useOptimizedWriteBarriers =
+ (writeBarrierForm != GCInfo::WBF_NoBarrier_CheckNotHeapInDebug); // This one is always a call to a C++ method.
+#endif
+
+ if (!useOptimizedWriteBarriers)
+ {
+ return false;
+ }
+
+ const static int regToHelper[2][8] = {
+ // If the target is known to be in managed memory
+ {
+ CORINFO_HELP_ASSIGN_REF_EAX, CORINFO_HELP_ASSIGN_REF_ECX, -1, CORINFO_HELP_ASSIGN_REF_EBX, -1,
+ CORINFO_HELP_ASSIGN_REF_EBP, CORINFO_HELP_ASSIGN_REF_ESI, CORINFO_HELP_ASSIGN_REF_EDI,
+ },
+
+ // Don't know if the target is in managed memory
+ {
+ CORINFO_HELP_CHECKED_ASSIGN_REF_EAX, CORINFO_HELP_CHECKED_ASSIGN_REF_ECX, -1,
+ CORINFO_HELP_CHECKED_ASSIGN_REF_EBX, -1, CORINFO_HELP_CHECKED_ASSIGN_REF_EBP,
+ CORINFO_HELP_CHECKED_ASSIGN_REF_ESI, CORINFO_HELP_CHECKED_ASSIGN_REF_EDI,
+ },
+ };
+
+ noway_assert(regToHelper[0][REG_EAX] == CORINFO_HELP_ASSIGN_REF_EAX);
+ noway_assert(regToHelper[0][REG_ECX] == CORINFO_HELP_ASSIGN_REF_ECX);
+ noway_assert(regToHelper[0][REG_EBX] == CORINFO_HELP_ASSIGN_REF_EBX);
+ noway_assert(regToHelper[0][REG_ESP] == -1);
+ noway_assert(regToHelper[0][REG_EBP] == CORINFO_HELP_ASSIGN_REF_EBP);
+ noway_assert(regToHelper[0][REG_ESI] == CORINFO_HELP_ASSIGN_REF_ESI);
+ noway_assert(regToHelper[0][REG_EDI] == CORINFO_HELP_ASSIGN_REF_EDI);
+
+ noway_assert(regToHelper[1][REG_EAX] == CORINFO_HELP_CHECKED_ASSIGN_REF_EAX);
+ noway_assert(regToHelper[1][REG_ECX] == CORINFO_HELP_CHECKED_ASSIGN_REF_ECX);
+ noway_assert(regToHelper[1][REG_EBX] == CORINFO_HELP_CHECKED_ASSIGN_REF_EBX);
+ noway_assert(regToHelper[1][REG_ESP] == -1);
+ noway_assert(regToHelper[1][REG_EBP] == CORINFO_HELP_CHECKED_ASSIGN_REF_EBP);
+ noway_assert(regToHelper[1][REG_ESI] == CORINFO_HELP_CHECKED_ASSIGN_REF_ESI);
+ noway_assert(regToHelper[1][REG_EDI] == CORINFO_HELP_CHECKED_ASSIGN_REF_EDI);
+
+ regNumber reg = data->gtRegNum;
+ noway_assert((reg != REG_ESP) && (reg != REG_WRITE_BARRIER));
+
+ // Generate the following code:
+ // lea edx, addr
+ // call write_barrier_helper_reg
+
+ // addr goes in REG_ARG_0
+ if (addr->gtRegNum != REG_WRITE_BARRIER) // REVIEW: can it ever not already by in this register?
+ {
+ inst_RV_RV(INS_mov, REG_WRITE_BARRIER, addr->gtRegNum, addr->TypeGet());
+ }
+
+ unsigned tgtAnywhere = 0;
+ if (writeBarrierForm != GCInfo::WBF_BarrierUnchecked)
+ {
+ tgtAnywhere = 1;
+ }
+
+ // We might want to call a modified version of genGCWriteBarrier() to get the benefit of
+ // the FEATURE_COUNT_GC_WRITE_BARRIERS code there, but that code doesn't look like it works
+ // with rationalized RyuJIT IR. So, for now, just emit the helper call directly here.
+
+ genEmitHelperCall(regToHelper[tgtAnywhere][reg],
+ 0, // argSize
+ EA_PTRSIZE); // retSize
+
+ return true;
+#else // !defined(_TARGET_X86_) || !NOGC_WRITE_BARRIERS
+ return false;
+#endif // !defined(_TARGET_X86_) || !NOGC_WRITE_BARRIERS
+}
+
+// Produce code for a GT_CALL node
+void CodeGen::genCallInstruction(GenTreePtr node)
+{
+ GenTreeCall* call = node->AsCall();
+ assert(call->gtOper == GT_CALL);
+
+ gtCallTypes callType = (gtCallTypes)call->gtCallType;
+
+ IL_OFFSETX ilOffset = BAD_IL_OFFSET;
+
+ // all virtuals should have been expanded into a control expression
+ assert(!call->IsVirtual() || call->gtControlExpr || call->gtCallAddr);
+
+ // Consume all the arg regs
+ for (GenTreePtr list = call->gtCallLateArgs; list; list = list->MoveNext())
+ {
+ assert(list->IsList());
+
+ GenTreePtr argNode = list->Current();
+
+ fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, argNode->gtSkipReloadOrCopy());
+ assert(curArgTabEntry);
+
+ if (curArgTabEntry->regNum == REG_STK)
+ {
+ continue;
+ }
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ // Deal with multi register passed struct args.
+ if (argNode->OperGet() == GT_LIST)
+ {
+ GenTreeArgList* argListPtr = argNode->AsArgList();
+ unsigned iterationNum = 0;
+ for (; argListPtr != nullptr; argListPtr = argListPtr->Rest(), iterationNum++)
+ {
+ GenTreePtr putArgRegNode = argListPtr->gtOp.gtOp1;
+ assert(putArgRegNode->gtOper == GT_PUTARG_REG);
+ regNumber argReg = REG_NA;
+
+ if (iterationNum == 0)
+ {
+ argReg = curArgTabEntry->regNum;
+ }
+ else
+ {
+ assert(iterationNum == 1);
+ argReg = curArgTabEntry->otherRegNum;
+ }
+
+ genConsumeReg(putArgRegNode);
+
+ // Validate the putArgRegNode has the right type.
+ assert(putArgRegNode->TypeGet() ==
+ compiler->GetTypeFromClassificationAndSizes(curArgTabEntry->structDesc
+ .eightByteClassifications[iterationNum],
+ curArgTabEntry->structDesc
+ .eightByteSizes[iterationNum]));
+ if (putArgRegNode->gtRegNum != argReg)
+ {
+ inst_RV_RV(ins_Move_Extend(putArgRegNode->TypeGet(), putArgRegNode->InReg()), argReg,
+ putArgRegNode->gtRegNum);
+ }
+ }
+ }
+ else
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+ {
+ regNumber argReg = curArgTabEntry->regNum;
+ genConsumeReg(argNode);
+ if (argNode->gtRegNum != argReg)
+ {
+ inst_RV_RV(ins_Move_Extend(argNode->TypeGet(), argNode->InReg()), argReg, argNode->gtRegNum);
+ }
+ }
+
+#if FEATURE_VARARG
+ // In the case of a varargs call,
+ // the ABI dictates that if we have floating point args,
+ // we must pass the enregistered arguments in both the
+ // integer and floating point registers so, let's do that.
+ if (call->IsVarargs() && varTypeIsFloating(argNode))
+ {
+ regNumber targetReg = compiler->getCallArgIntRegister(argNode->gtRegNum);
+ instruction ins = ins_CopyFloatToInt(argNode->TypeGet(), TYP_LONG);
+ inst_RV_RV(ins, argNode->gtRegNum, targetReg);
+ }
+#endif // FEATURE_VARARG
+ }
+
+#if defined(_TARGET_X86_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ // The call will pop its arguments.
+ // for each putarg_stk:
+ ssize_t stackArgBytes = 0;
+ GenTreePtr args = call->gtCallArgs;
+ while (args)
+ {
+ GenTreePtr arg = args->gtOp.gtOp1;
+ if (arg->OperGet() != GT_ARGPLACE && !(arg->gtFlags & GTF_LATE_ARG))
+ {
+#if defined(_TARGET_X86_)
+ assert((arg->OperGet() == GT_PUTARG_STK) || (arg->OperGet() == GT_LONG));
+ if (arg->OperGet() == GT_LONG)
+ {
+ assert((arg->gtGetOp1()->OperGet() == GT_PUTARG_STK) && (arg->gtGetOp2()->OperGet() == GT_PUTARG_STK));
+ }
+#endif // defined(_TARGET_X86_)
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ if (genActualType(arg->TypeGet()) == TYP_STRUCT)
+ {
+ assert(arg->OperGet() == GT_PUTARG_STK);
+
+ GenTreeObj* obj = arg->gtGetOp1()->AsObj();
+ stackArgBytes = compiler->info.compCompHnd->getClassSize(obj->gtClass);
+ }
+ else
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+ stackArgBytes += genTypeSize(genActualType(arg->TypeGet()));
+ }
+ args = args->gtOp.gtOp2;
+ }
+#endif // defined(_TARGET_X86_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+
+ // Insert a null check on "this" pointer if asked.
+ if (call->NeedsNullCheck())
+ {
+ const regNumber regThis = genGetThisArgReg(call);
+ getEmitter()->emitIns_AR_R(INS_cmp, EA_4BYTE, regThis, regThis, 0);
+ }
+
+ // Either gtControlExpr != null or gtCallAddr != null or it is a direct non-virtual call to a user or helper method.
+ CORINFO_METHOD_HANDLE methHnd;
+ GenTree* target = call->gtControlExpr;
+ if (callType == CT_INDIRECT)
+ {
+ assert(target == nullptr);
+ target = call->gtCall.gtCallAddr;
+ methHnd = nullptr;
+ }
+ else
+ {
+ methHnd = call->gtCallMethHnd;
+ }
+
+ CORINFO_SIG_INFO* sigInfo = nullptr;
+#ifdef DEBUG
+ // Pass the call signature information down into the emitter so the emitter can associate
+ // native call sites with the signatures they were generated from.
+ if (callType != CT_HELPER)
+ {
+ sigInfo = call->callSig;
+ }
+#endif // DEBUG
+
+ // If fast tail call, then we are done. In this case we setup the args (both reg args
+ // and stack args in incoming arg area) and call target in rax. Epilog sequence would
+ // generate "jmp rax".
+ if (call->IsFastTailCall())
+ {
+ // Don't support fast tail calling JIT helpers
+ assert(callType != CT_HELPER);
+
+ // Fast tail calls materialize call target either in gtControlExpr or in gtCallAddr.
+ assert(target != nullptr);
+
+ genConsumeReg(target);
+ if (target->gtRegNum != REG_RAX)
+ {
+ inst_RV_RV(INS_mov, REG_RAX, target->gtRegNum);
+ }
+ return;
+ }
+
+ // For a pinvoke to unmanged code we emit a label to clear
+ // the GC pointer state before the callsite.
+ // We can't utilize the typical lazy killing of GC pointers
+ // at (or inside) the callsite.
+ if (call->IsUnmanaged())
+ {
+ genDefineTempLabel(genCreateTempLabel());
+ }
+
+ // Determine return value size(s).
+ ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc();
+ emitAttr retSize = EA_PTRSIZE;
+ emitAttr secondRetSize = EA_UNKNOWN;
+
+ if (call->HasMultiRegRetVal())
+ {
+ retSize = emitTypeSize(retTypeDesc->GetReturnRegType(0));
+ secondRetSize = emitTypeSize(retTypeDesc->GetReturnRegType(1));
+ }
+ else
+ {
+ assert(!varTypeIsStruct(call));
+
+ if (call->gtType == TYP_REF || call->gtType == TYP_ARRAY)
+ {
+ retSize = EA_GCREF;
+ }
+ else if (call->gtType == TYP_BYREF)
+ {
+ retSize = EA_BYREF;
+ }
+ }
+
+ bool fPossibleSyncHelperCall = false;
+ CorInfoHelpFunc helperNum = CORINFO_HELP_UNDEF;
+
+#ifdef DEBUGGING_SUPPORT
+ // We need to propagate the IL offset information to the call instruction, so we can emit
+ // an IL to native mapping record for the call, to support managed return value debugging.
+ // We don't want tail call helper calls that were converted from normal calls to get a record,
+ // so we skip this hash table lookup logic in that case.
+ if (compiler->opts.compDbgInfo && compiler->genCallSite2ILOffsetMap != nullptr && !call->IsTailCall())
+ {
+ (void)compiler->genCallSite2ILOffsetMap->Lookup(call, &ilOffset);
+ }
+#endif // DEBUGGING_SUPPORT
+
+#if defined(_TARGET_X86_)
+ // If the callee pops the arguments, we pass a positive value as the argSize, and the emitter will
+ // adjust its stack level accordingly.
+ // If the caller needs to explicitly pop its arguments, we must pass a negative value, and then do the
+ // pop when we're done.
+ ssize_t argSizeForEmitter = stackArgBytes;
+ if ((call->gtFlags & GTF_CALL_POP_ARGS) != 0)
+ {
+ argSizeForEmitter = -stackArgBytes;
+ }
+
+#endif // defined(_TARGET_X86_)
+
+ if (target != nullptr)
+ {
+ if (target->isContainedIndir())
+ {
+ if (target->AsIndir()->HasBase() && target->AsIndir()->Base()->isContainedIntOrIImmed())
+ {
+ // Note that if gtControlExpr is an indir of an absolute address, we mark it as
+ // contained only if it can be encoded as PC-relative offset.
+ assert(target->AsIndir()->Base()->AsIntConCommon()->FitsInAddrBase(compiler));
+
+ genEmitCall(emitter::EC_FUNC_TOKEN_INDIR, methHnd,
+ INDEBUG_LDISASM_COMMA(sigInfo)(void*) target->AsIndir()
+ ->Base()
+ ->AsIntConCommon()
+ ->IconValue() X86_ARG(argSizeForEmitter),
+ retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), ilOffset);
+ }
+ else
+ {
+ genEmitCall(emitter::EC_INDIR_ARD, methHnd,
+ INDEBUG_LDISASM_COMMA(sigInfo) target->AsIndir() X86_ARG(argSizeForEmitter),
+ retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), ilOffset);
+ }
+ }
+ else
+ {
+ // We have already generated code for gtControlExpr evaluating it into a register.
+ // We just need to emit "call reg" in this case.
+ assert(genIsValidIntReg(target->gtRegNum));
+ genEmitCall(emitter::EC_INDIR_R, methHnd,
+ INDEBUG_LDISASM_COMMA(sigInfo) nullptr // addr
+ X86_ARG(argSizeForEmitter),
+ retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), ilOffset, genConsumeReg(target));
+ }
+ }
+#ifdef FEATURE_READYTORUN_COMPILER
+ else if (call->gtEntryPoint.addr != nullptr)
+ {
+ genEmitCall((call->gtEntryPoint.accessType == IAT_VALUE) ? emitter::EC_FUNC_TOKEN
+ : emitter::EC_FUNC_TOKEN_INDIR,
+ methHnd, INDEBUG_LDISASM_COMMA(sigInfo)(void*) call->gtEntryPoint.addr X86_ARG(argSizeForEmitter),
+ retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), ilOffset);
+ }
+#endif
+ else
+ {
+ // Generate a direct call to a non-virtual user defined or helper method
+ assert(callType == CT_HELPER || callType == CT_USER_FUNC);
+
+ void* addr = nullptr;
+ if (callType == CT_HELPER)
+ {
+ // Direct call to a helper method.
+ helperNum = compiler->eeGetHelperNum(methHnd);
+ noway_assert(helperNum != CORINFO_HELP_UNDEF);
+
+ void* pAddr = nullptr;
+ addr = compiler->compGetHelperFtn(helperNum, (void**)&pAddr);
+
+ if (addr == nullptr)
+ {
+ addr = pAddr;
+ }
+
+ // tracking of region protected by the monitor in synchronized methods
+ if (compiler->info.compFlags & CORINFO_FLG_SYNCH)
+ {
+ fPossibleSyncHelperCall = true;
+ }
+ }
+ else
+ {
+ // Direct call to a non-virtual user function.
+ addr = call->gtDirectCallAddress;
+ }
+
+ // Non-virtual direct calls to known addresses
+ genEmitCall(emitter::EC_FUNC_TOKEN, methHnd, INDEBUG_LDISASM_COMMA(sigInfo) addr X86_ARG(argSizeForEmitter),
+ retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), ilOffset);
+ }
+
+ // if it was a pinvoke we may have needed to get the address of a label
+ if (genPendingCallLabel)
+ {
+ assert(call->IsUnmanaged());
+ genDefineTempLabel(genPendingCallLabel);
+ genPendingCallLabel = nullptr;
+ }
+
+#if defined(_TARGET_X86_)
+ // The call will pop its arguments.
+ genStackLevel -= stackArgBytes;
+#endif // defined(_TARGET_X86_)
+
+ // Update GC info:
+ // All Callee arg registers are trashed and no longer contain any GC pointers.
+ // TODO-XArch-Bug?: As a matter of fact shouldn't we be killing all of callee trashed regs here?
+ // For now we will assert that other than arg regs gc ref/byref set doesn't contain any other
+ // registers from RBM_CALLEE_TRASH.
+ assert((gcInfo.gcRegGCrefSetCur & (RBM_CALLEE_TRASH & ~RBM_ARG_REGS)) == 0);
+ assert((gcInfo.gcRegByrefSetCur & (RBM_CALLEE_TRASH & ~RBM_ARG_REGS)) == 0);
+ gcInfo.gcRegGCrefSetCur &= ~RBM_ARG_REGS;
+ gcInfo.gcRegByrefSetCur &= ~RBM_ARG_REGS;
+
+ var_types returnType = call->TypeGet();
+ if (returnType != TYP_VOID)
+ {
+#ifdef _TARGET_X86_
+ if (varTypeIsFloating(returnType))
+ {
+ // Spill the value from the fp stack.
+ // Then, load it into the target register.
+ call->gtFlags |= GTF_SPILL;
+ regSet.rsSpillFPStack(call);
+ call->gtFlags |= GTF_SPILLED;
+ call->gtFlags &= ~GTF_SPILL;
+ }
+ else
+#endif // _TARGET_X86_
+ {
+ regNumber returnReg;
+
+ if (call->HasMultiRegRetVal())
+ {
+ assert(retTypeDesc != nullptr);
+ unsigned regCount = retTypeDesc->GetReturnRegCount();
+
+ // If regs allocated to call node are different from ABI return
+ // regs in which the call has returned its result, move the result
+ // to regs allocated to call node.
+ for (unsigned i = 0; i < regCount; ++i)
+ {
+ var_types regType = retTypeDesc->GetReturnRegType(i);
+ returnReg = retTypeDesc->GetABIReturnReg(i);
+ regNumber allocatedReg = call->GetRegNumByIdx(i);
+ if (returnReg != allocatedReg)
+ {
+ inst_RV_RV(ins_Copy(regType), allocatedReg, returnReg, regType);
+ }
+ }
+
+#ifdef FEATURE_SIMD
+ // A Vector3 return value is stored in xmm0 and xmm1.
+ // RyuJIT assumes that the upper unused bits of xmm1 are cleared but
+ // the native compiler doesn't guarantee it.
+ if (returnType == TYP_SIMD12)
+ {
+ returnReg = retTypeDesc->GetABIReturnReg(1);
+ // Clear the upper 32 bits by two shift instructions.
+ // retReg = retReg << 96
+ // retReg = retReg >> 96
+ getEmitter()->emitIns_R_I(INS_pslldq, emitActualTypeSize(TYP_SIMD12), returnReg, 12);
+ getEmitter()->emitIns_R_I(INS_psrldq, emitActualTypeSize(TYP_SIMD12), returnReg, 12);
+ }
+#endif // FEATURE_SIMD
+ }
+ else
+ {
+#ifdef _TARGET_X86_
+ if (call->IsHelperCall(compiler, CORINFO_HELP_INIT_PINVOKE_FRAME))
+ {
+ // The x86 CORINFO_HELP_INIT_PINVOKE_FRAME helper uses a custom calling convention that returns with
+ // TCB in REG_PINVOKE_TCB. AMD64/ARM64 use the standard calling convention. fgMorphCall() sets the
+ // correct argument registers.
+ returnReg = REG_PINVOKE_TCB;
+ }
+ else
+#endif // _TARGET_X86_
+ if (varTypeIsFloating(returnType))
+ {
+ returnReg = REG_FLOATRET;
+ }
+ else
+ {
+ returnReg = REG_INTRET;
+ }
+
+ if (call->gtRegNum != returnReg)
+ {
+ inst_RV_RV(ins_Copy(returnType), call->gtRegNum, returnReg, returnType);
+ }
+ }
+
+ genProduceReg(call);
+ }
+ }
+
+ // If there is nothing next, that means the result is thrown away, so this value is not live.
+ // However, for minopts or debuggable code, we keep it live to support managed return value debugging.
+ if ((call->gtNext == nullptr) && !compiler->opts.MinOpts() && !compiler->opts.compDbgCode)
+ {
+ gcInfo.gcMarkRegSetNpt(RBM_INTRET);
+ }
+
+#if defined(_TARGET_X86_)
+ //-------------------------------------------------------------------------
+ // Create a label for tracking of region protected by the monitor in synchronized methods.
+ // This needs to be here, rather than above where fPossibleSyncHelperCall is set,
+ // so the GC state vars have been updated before creating the label.
+
+ if (fPossibleSyncHelperCall)
+ {
+ switch (helperNum)
+ {
+ case CORINFO_HELP_MON_ENTER:
+ case CORINFO_HELP_MON_ENTER_STATIC:
+ noway_assert(compiler->syncStartEmitCookie == NULL);
+ compiler->syncStartEmitCookie =
+ getEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur);
+ noway_assert(compiler->syncStartEmitCookie != NULL);
+ break;
+ case CORINFO_HELP_MON_EXIT:
+ case CORINFO_HELP_MON_EXIT_STATIC:
+ noway_assert(compiler->syncEndEmitCookie == NULL);
+ compiler->syncEndEmitCookie =
+ getEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur);
+ noway_assert(compiler->syncEndEmitCookie != NULL);
+ break;
+ default:
+ break;
+ }
+ }
+
+ // Is the caller supposed to pop the arguments?
+ if (((call->gtFlags & GTF_CALL_POP_ARGS) != 0) && (stackArgBytes != 0))
+ {
+ genAdjustSP(stackArgBytes);
+ }
+#endif // _TARGET_X86_
+}
+
+// Produce code for a GT_JMP node.
+// The arguments of the caller needs to be transferred to the callee before exiting caller.
+// The actual jump to callee is generated as part of caller epilog sequence.
+// Therefore the codegen of GT_JMP is to ensure that the callee arguments are correctly setup.
+void CodeGen::genJmpMethod(GenTreePtr jmp)
+{
+ assert(jmp->OperGet() == GT_JMP);
+ assert(compiler->compJmpOpUsed);
+
+ // If no arguments, nothing to do
+ if (compiler->info.compArgsCount == 0)
+ {
+ return;
+ }
+
+ // Make sure register arguments are in their initial registers
+ // and stack arguments are put back as well.
+ unsigned varNum;
+ LclVarDsc* varDsc;
+
+ // First move any en-registered stack arguments back to the stack.
+ // At the same time any reg arg not in correct reg is moved back to its stack location.
+ //
+ // We are not strictly required to spill reg args that are not in the desired reg for a jmp call
+ // But that would require us to deal with circularity while moving values around. Spilling
+ // to stack makes the implementation simple, which is not a bad trade off given Jmp calls
+ // are not frequent.
+ for (varNum = 0; (varNum < compiler->info.compArgsCount); varNum++)
+ {
+ varDsc = compiler->lvaTable + varNum;
+
+ if (varDsc->lvPromoted)
+ {
+ noway_assert(varDsc->lvFieldCnt == 1); // We only handle one field here
+
+ unsigned fieldVarNum = varDsc->lvFieldLclStart;
+ varDsc = compiler->lvaTable + fieldVarNum;
+ }
+ noway_assert(varDsc->lvIsParam);
+
+ if (varDsc->lvIsRegArg && (varDsc->lvRegNum != REG_STK))
+ {
+ // Skip reg args which are already in its right register for jmp call.
+ // If not, we will spill such args to their stack locations.
+ //
+ // If we need to generate a tail call profiler hook, then spill all
+ // arg regs to free them up for the callback.
+ if (!compiler->compIsProfilerHookNeeded() && (varDsc->lvRegNum == varDsc->lvArgReg))
+ {
+ continue;
+ }
+ }
+ else if (varDsc->lvRegNum == REG_STK)
+ {
+ // Skip args which are currently living in stack.
+ continue;
+ }
+
+ // If we came here it means either a reg argument not in the right register or
+ // a stack argument currently living in a register. In either case the following
+ // assert should hold.
+ assert(varDsc->lvRegNum != REG_STK);
+
+ var_types loadType = varDsc->lvaArgType();
+ getEmitter()->emitIns_S_R(ins_Store(loadType), emitTypeSize(loadType), varDsc->lvRegNum, varNum, 0);
+
+ // Update lvRegNum life and GC info to indicate lvRegNum is dead and varDsc stack slot is going live.
+ // Note that we cannot modify varDsc->lvRegNum here because another basic block may not be expecting it.
+ // Therefore manually update life of varDsc->lvRegNum.
+ regMaskTP tempMask = varDsc->lvRegMask();
+ regSet.RemoveMaskVars(tempMask);
+ gcInfo.gcMarkRegSetNpt(tempMask);
+ if (compiler->lvaIsGCTracked(varDsc))
+ {
+#ifdef DEBUG
+ if (!VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex))
+ {
+ JITDUMP("\t\t\t\t\t\t\tVar V%02u becoming live\n", varNum);
+ }
+ else
+ {
+ JITDUMP("\t\t\t\t\t\t\tVar V%02u continuing live\n", varNum);
+ }
+#endif // DEBUG
+
+ VarSetOps::AddElemD(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex);
+ }
+ }
+
+#ifdef PROFILING_SUPPORTED
+ // At this point all arg regs are free.
+ // Emit tail call profiler callback.
+ genProfilingLeaveCallback(CORINFO_HELP_PROF_FCN_TAILCALL);
+#endif
+
+ // Next move any un-enregistered register arguments back to their register.
+ regMaskTP fixedIntArgMask = RBM_NONE; // tracks the int arg regs occupying fixed args in case of a vararg method.
+ unsigned firstArgVarNum = BAD_VAR_NUM; // varNum of the first argument in case of a vararg method.
+ for (varNum = 0; (varNum < compiler->info.compArgsCount); varNum++)
+ {
+ varDsc = compiler->lvaTable + varNum;
+ if (varDsc->lvPromoted)
+ {
+ noway_assert(varDsc->lvFieldCnt == 1); // We only handle one field here
+
+ unsigned fieldVarNum = varDsc->lvFieldLclStart;
+ varDsc = compiler->lvaTable + fieldVarNum;
+ }
+ noway_assert(varDsc->lvIsParam);
+
+ // Skip if arg not passed in a register.
+ if (!varDsc->lvIsRegArg)
+ {
+ continue;
+ }
+
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ if (varTypeIsStruct(varDsc))
+ {
+ CORINFO_CLASS_HANDLE typeHnd = varDsc->lvVerTypeInfo.GetClassHandle();
+ assert(typeHnd != nullptr);
+
+ SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc;
+ compiler->eeGetSystemVAmd64PassStructInRegisterDescriptor(typeHnd, &structDesc);
+ assert(structDesc.passedInRegisters);
+
+ unsigned __int8 offset0 = 0;
+ unsigned __int8 offset1 = 0;
+ var_types type0 = TYP_UNKNOWN;
+ var_types type1 = TYP_UNKNOWN;
+
+ // Get the eightbyte data
+ compiler->GetStructTypeOffset(structDesc, &type0, &type1, &offset0, &offset1);
+
+ // Move the values into the right registers.
+ //
+
+ // Update varDsc->lvArgReg and lvOtherArgReg life and GC Info to indicate varDsc stack slot is dead and
+ // argReg is going live. Note that we cannot modify varDsc->lvRegNum and lvOtherArgReg here because another
+ // basic block may not be expecting it. Therefore manually update life of argReg. Note that GT_JMP marks
+ // the end of the basic block and after which reg life and gc info will be recomputed for the new block in
+ // genCodeForBBList().
+ if (type0 != TYP_UNKNOWN)
+ {
+ getEmitter()->emitIns_R_S(ins_Load(type0), emitTypeSize(type0), varDsc->lvArgReg, varNum, offset0);
+ regSet.rsMaskVars |= genRegMask(varDsc->lvArgReg);
+ gcInfo.gcMarkRegPtrVal(varDsc->lvArgReg, type0);
+ }
+
+ if (type1 != TYP_UNKNOWN)
+ {
+ getEmitter()->emitIns_R_S(ins_Load(type1), emitTypeSize(type1), varDsc->lvOtherArgReg, varNum, offset1);
+ regSet.rsMaskVars |= genRegMask(varDsc->lvOtherArgReg);
+ gcInfo.gcMarkRegPtrVal(varDsc->lvOtherArgReg, type1);
+ }
+
+ if (varDsc->lvTracked)
+ {
+ VarSetOps::RemoveElemD(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex);
+ }
+ }
+ else
+#endif // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ {
+ // Register argument
+ noway_assert(isRegParamType(genActualType(varDsc->TypeGet())));
+
+ // Is register argument already in the right register?
+ // If not load it from its stack location.
+ var_types loadType = varDsc->lvaArgType();
+ regNumber argReg = varDsc->lvArgReg; // incoming arg register
+
+ if (varDsc->lvRegNum != argReg)
+ {
+ assert(genIsValidReg(argReg));
+ getEmitter()->emitIns_R_S(ins_Load(loadType), emitTypeSize(loadType), argReg, varNum, 0);
+
+ // Update argReg life and GC Info to indicate varDsc stack slot is dead and argReg is going live.
+ // Note that we cannot modify varDsc->lvRegNum here because another basic block may not be expecting it.
+ // Therefore manually update life of argReg. Note that GT_JMP marks the end of the basic block
+ // and after which reg life and gc info will be recomputed for the new block in genCodeForBBList().
+ regSet.AddMaskVars(genRegMask(argReg));
+ gcInfo.gcMarkRegPtrVal(argReg, loadType);
+ if (compiler->lvaIsGCTracked(varDsc))
+ {
+#ifdef DEBUG
+ if (VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex))
+ {
+ JITDUMP("\t\t\t\t\t\t\tVar V%02u becoming dead\n", varNum);
+ }
+ else
+ {
+ JITDUMP("\t\t\t\t\t\t\tVar V%02u continuing dead\n", varNum);
+ }
+#endif // DEBUG
+
+ VarSetOps::RemoveElemD(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex);
+ }
+ }
+ }
+
+#if FEATURE_VARARG && defined(_TARGET_AMD64_)
+ // In case of a jmp call to a vararg method also pass the float/double arg in the corresponding int arg
+ // register. This is due to the AMD64 ABI which requires floating point values passed to varargs functions to
+ // be passed in both integer and floating point registers. It doesn't apply to x86, which passes floating point
+ // values on the stack.
+ if (compiler->info.compIsVarArgs)
+ {
+ regNumber intArgReg;
+ var_types loadType = varDsc->lvaArgType();
+ regNumber argReg = varDsc->lvArgReg; // incoming arg register
+
+ if (varTypeIsFloating(loadType))
+ {
+ intArgReg = compiler->getCallArgIntRegister(argReg);
+ instruction ins = ins_CopyFloatToInt(loadType, TYP_LONG);
+ inst_RV_RV(ins, argReg, intArgReg, loadType);
+ }
+ else
+ {
+ intArgReg = argReg;
+ }
+
+ fixedIntArgMask |= genRegMask(intArgReg);
+
+ if (intArgReg == REG_ARG_0)
+ {
+ assert(firstArgVarNum == BAD_VAR_NUM);
+ firstArgVarNum = varNum;
+ }
+ }
+#endif // FEATURE_VARARG
+ }
+
+#if FEATURE_VARARG && defined(_TARGET_AMD64_)
+ // Jmp call to a vararg method - if the method has fewer than 4 fixed arguments,
+ // load the remaining arg registers (both int and float) from the corresponding
+ // shadow stack slots. This is for the reason that we don't know the number and type
+ // of non-fixed params passed by the caller, therefore we have to assume the worst case
+ // of caller passing float/double args both in int and float arg regs.
+ //
+ // This doesn't apply to x86, which doesn't pass floating point values in floating
+ // point registers.
+ //
+ // The caller could have passed gc-ref/byref type var args. Since these are var args
+ // the callee no way of knowing their gc-ness. Therefore, mark the region that loads
+ // remaining arg registers from shadow stack slots as non-gc interruptible.
+ if (fixedIntArgMask != RBM_NONE)
+ {
+ assert(compiler->info.compIsVarArgs);
+ assert(firstArgVarNum != BAD_VAR_NUM);
+
+ regMaskTP remainingIntArgMask = RBM_ARG_REGS & ~fixedIntArgMask;
+ if (remainingIntArgMask != RBM_NONE)
+ {
+ instruction insCopyIntToFloat = ins_CopyIntToFloat(TYP_LONG, TYP_DOUBLE);
+ getEmitter()->emitDisableGC();
+ for (int argNum = 0, argOffset = 0; argNum < MAX_REG_ARG; ++argNum)
+ {
+ regNumber argReg = intArgRegs[argNum];
+ regMaskTP argRegMask = genRegMask(argReg);
+
+ if ((remainingIntArgMask & argRegMask) != 0)
+ {
+ remainingIntArgMask &= ~argRegMask;
+ getEmitter()->emitIns_R_S(INS_mov, EA_8BYTE, argReg, firstArgVarNum, argOffset);
+
+ // also load it in corresponding float arg reg
+ regNumber floatReg = compiler->getCallArgFloatRegister(argReg);
+ inst_RV_RV(insCopyIntToFloat, floatReg, argReg);
+ }
+
+ argOffset += REGSIZE_BYTES;
+ }
+ getEmitter()->emitEnableGC();
+ }
+ }
+#endif // FEATURE_VARARG
+}
+
+// produce code for a GT_LEA subnode
+void CodeGen::genLeaInstruction(GenTreeAddrMode* lea)
+{
+ emitAttr size = emitTypeSize(lea);
+ genConsumeOperands(lea);
+
+ if (lea->Base() && lea->Index())
+ {
+ regNumber baseReg = lea->Base()->gtRegNum;
+ regNumber indexReg = lea->Index()->gtRegNum;
+ getEmitter()->emitIns_R_ARX(INS_lea, size, lea->gtRegNum, baseReg, indexReg, lea->gtScale, lea->gtOffset);
+ }
+ else if (lea->Base())
+ {
+ getEmitter()->emitIns_R_AR(INS_lea, size, lea->gtRegNum, lea->Base()->gtRegNum, lea->gtOffset);
+ }
+ else if (lea->Index())
+ {
+ getEmitter()->emitIns_R_ARX(INS_lea, size, lea->gtRegNum, REG_NA, lea->Index()->gtRegNum, lea->gtScale,
+ lea->gtOffset);
+ }
+
+ genProduceReg(lea);
+}
+
+//-------------------------------------------------------------------------------------------
+// genJumpKindsForTree: Determine the number and kinds of conditional branches
+// necessary to implement the given GT_CMP node
+//
+// Arguments:
+// cmpTree - (input) The GenTree node that is used to set the Condition codes
+// - The GenTree Relop node that was used to set the Condition codes
+// jmpKind[2] - (output) One or two conditional branch instructions
+// jmpToTrueLabel[2] - (output) When true we branch to the true case
+// When false we create a second label and branch to the false case
+// Only GT_EQ for a floating point compares can have a false value.
+//
+// Return Value:
+// Sets the proper values into the array elements of jmpKind[] and jmpToTrueLabel[]
+//
+// Assumptions:
+// At least one conditional branch instruction will be returned.
+// Typically only one conditional branch is needed
+// and the second jmpKind[] value is set to EJ_NONE
+//
+// Notes:
+// jmpToTrueLabel[i]= true implies branch when the compare operation is true.
+// jmpToTrueLabel[i]= false implies branch when the compare operation is false.
+//-------------------------------------------------------------------------------------------
+
+// static
+void CodeGen::genJumpKindsForTree(GenTreePtr cmpTree, emitJumpKind jmpKind[2], bool jmpToTrueLabel[2])
+{
+ // Except for BEQ (= ordered GT_EQ) both jumps are to the true label.
+ jmpToTrueLabel[0] = true;
+ jmpToTrueLabel[1] = true;
+
+ // For integer comparisons just use genJumpKindForOper
+ if (!varTypeIsFloating(cmpTree->gtOp.gtOp1->gtEffectiveVal()))
+ {
+ CompareKind compareKind = ((cmpTree->gtFlags & GTF_UNSIGNED) != 0) ? CK_UNSIGNED : CK_SIGNED;
+ jmpKind[0] = genJumpKindForOper(cmpTree->gtOper, compareKind);
+ jmpKind[1] = EJ_NONE;
+ }
+ else
+ {
+ assert(cmpTree->OperIsCompare());
+
+ // For details on how we arrived at this mapping, see the comment block in genCodeForTreeNode()
+ // while generating code for compare opererators (e.g. GT_EQ etc).
+ if ((cmpTree->gtFlags & GTF_RELOP_NAN_UN) != 0)
+ {
+ // Must branch if we have an NaN, unordered
+ switch (cmpTree->gtOper)
+ {
+ case GT_LT:
+ case GT_GT:
+ jmpKind[0] = EJ_jb;
+ jmpKind[1] = EJ_NONE;
+ break;
+
+ case GT_LE:
+ case GT_GE:
+ jmpKind[0] = EJ_jbe;
+ jmpKind[1] = EJ_NONE;
+ break;
+
+ case GT_NE:
+ jmpKind[0] = EJ_jpe;
+ jmpKind[1] = EJ_jne;
+ break;
+
+ case GT_EQ:
+ jmpKind[0] = EJ_je;
+ jmpKind[1] = EJ_NONE;
+ break;
+
+ default:
+ unreached();
+ }
+ }
+ else // ((cmpTree->gtFlags & GTF_RELOP_NAN_UN) == 0)
+ {
+ // Do not branch if we have an NaN, unordered
+ switch (cmpTree->gtOper)
+ {
+ case GT_LT:
+ case GT_GT:
+ jmpKind[0] = EJ_ja;
+ jmpKind[1] = EJ_NONE;
+ break;
+
+ case GT_LE:
+ case GT_GE:
+ jmpKind[0] = EJ_jae;
+ jmpKind[1] = EJ_NONE;
+ break;
+
+ case GT_NE:
+ jmpKind[0] = EJ_jne;
+ jmpKind[1] = EJ_NONE;
+ break;
+
+ case GT_EQ:
+ jmpKind[0] = EJ_jpe;
+ jmpKind[1] = EJ_je;
+ jmpToTrueLabel[0] = false;
+ break;
+
+ default:
+ unreached();
+ }
+ }
+ }
+}
+
+#if !defined(_TARGET_64BIT_)
+//------------------------------------------------------------------------
+// genJumpKindsForTreeLongHi: Generate the jump types for compare
+// operators of the high parts of a compare with long type operands
+// on x86 for the case where rel-op result needs to be materialized into a
+// register.
+//
+// Arguments:
+// cmpTree - The GT_CMP node
+// jmpKind - Return array of jump kinds
+// jmpToTrueLabel - Return array of if the jump is going to true label
+//
+// Return Value:
+// None.
+//
+void CodeGen::genJumpKindsForTreeLongHi(GenTreePtr cmpTree, emitJumpKind jmpKind[2])
+{
+ assert(cmpTree->OperIsCompare());
+ CompareKind compareKind = ((cmpTree->gtFlags & GTF_UNSIGNED) != 0) ? CK_UNSIGNED : CK_SIGNED;
+
+ switch (cmpTree->gtOper)
+ {
+ case GT_LT:
+ case GT_LE:
+ if (compareKind == CK_SIGNED)
+ {
+ jmpKind[0] = EJ_jl;
+ jmpKind[1] = EJ_jg;
+ }
+ else
+ {
+ jmpKind[0] = EJ_jb;
+ jmpKind[1] = EJ_ja;
+ }
+ break;
+
+ case GT_GT:
+ case GT_GE:
+ if (compareKind == CK_SIGNED)
+ {
+ jmpKind[0] = EJ_jg;
+ jmpKind[1] = EJ_jl;
+ }
+ else
+ {
+ jmpKind[0] = EJ_ja;
+ jmpKind[1] = EJ_jb;
+ }
+ break;
+
+ case GT_EQ:
+ // GT_EQ will not jump to the true label if the hi parts are equal
+ jmpKind[0] = EJ_NONE;
+ jmpKind[1] = EJ_jne;
+ break;
+
+ case GT_NE:
+ // GT_NE will always jump to the true label if the high parts are not equal
+ jmpKind[0] = EJ_jne;
+ jmpKind[1] = EJ_NONE;
+ break;
+
+ default:
+ unreached();
+ }
+}
+
+//------------------------------------------------------------------------
+// genCompareLong: Generate code for comparing two longs on x86 when the result of the compare
+// is manifested in a register.
+//
+// Arguments:
+// treeNode - the compare tree
+//
+// Return Value:
+// None.
+// Comments:
+// For long compares, we need to compare the high parts of operands first, then the low parts.
+// If the high compare is false, we do not need to compare the low parts. For less than and
+// greater than, if the high compare is true, we can assume the entire compare is true. For
+// compares that are realized in a register, we will generate:
+//
+// Opcode x86 equivalent Comment
+// ------ -------------- -------
+// GT_EQ cmp hiOp1,hiOp2 If any part is not equal, the entire compare
+// jne label is false.
+// cmp loOp1,loOp2
+// label: sete
+//
+// GT_NE cmp hiOp1,hiOp2 If any part is not equal, the entire compare
+// jne label is true.
+// cmp loOp1,loOp2
+// label: setne
+//
+// GT_LT; unsigned cmp hiOp1,hiOp2 If hiOp1 is not equal to hiOp2, the flags are set
+// jne label correctly and we do not need to check lo. Otherwise,
+// cmp loOp1,loOp2 we need to compare the lo halves
+// label: setb
+//
+// GT_LE; unsigned cmp hiOp1,hiOp2 If hiOp1 is not equal to hiOp2, the flags are set
+// jne label correctly and we do not need to check lo. Otherwise,
+// cmp loOp1,loOp2 we need to compare the lo halves
+// label: setbe
+//
+// GT_GT; unsigned cmp hiOp1,hiOp2 If hiOp1 is not equal to hiOp2, the flags are set
+// jne label correctly and we do not need to check lo. Otherwise,
+// cmp loOp1,loOp2 we need to compare the lo halves
+// label: seta
+//
+// GT_GE; unsigned cmp hiOp1,hiOp2 If hiOp1 is not equal to hiOp2, the flags are set
+// jne label correctly and we do not need to check lo. Otherwise,
+// cmp loOp1,loOp2 we need to compare the lo halves
+// label: setae
+//
+// For signed long comparisons, we need additional labels, as we need to use signed conditions on the
+// "set" instruction:
+//
+// GT_LT; signed cmp hiOp1,hiOp2 If hiOp1 is not equal to hiOp2, the flags are set
+// jne labelHi correctly and we do not need to check lo. Otherwise,
+// cmp loOp1,loOp2 we need to compare the lo halves
+// setb Unsigned set for lo compare
+// jmp labelFinal
+// labelHi: setl Signed set for high compare
+// labelFinal:
+//
+// GT_LE; signed cmp hiOp1,hiOp2 If hiOp1 is not equal to hiOp2, the flags are set
+// jne labelHi correctly and we do not need to check lo. Otherwise,
+// cmp loOp1,loOp2 we need to compare the lo halves
+// setbe Unsigend set for lo compare
+// jmp labelFinal
+// labelHi: setle Signed set for hi compare
+// labelFinal:
+//
+// GT_GT; signed cmp hiOp1,hiOp2 If hiOp1 is not equal to hiOp2, the flags are set
+// jne labelHi correctly and we do not need to check lo. Otherwise,
+// cmp loOp1,loOp2 we need to compare the lo halves
+// seta Unsigned set for lo compare
+// jmp labelFinal
+// labelHi: setg Signed set for high compare
+// labelFinal
+//
+// GT_GE; signed cmp hiOp1,hiOp2 If hiOp1 is not equal to hiOp2, the flags are set
+// jne labelHi correctly and we do not need to check lo. Otherwise,
+// cmp loOp1,loOp2 we need to compare the lo halves
+// setae Unsigned set for lo compare
+// jmp labelFinal
+// labelHi: setge Signed set for hi compare
+// labelFinal:
+//
+// TODO-X86-CQ: Check if hi or lo parts of op2 are 0 and change the compare to a test.
+void CodeGen::genCompareLong(GenTreePtr treeNode)
+{
+ assert(treeNode->OperIsCompare());
+
+ GenTreeOp* tree = treeNode->AsOp();
+ GenTreePtr op1 = tree->gtOp1;
+ GenTreePtr op2 = tree->gtOp2;
+
+ assert(varTypeIsLong(op1->TypeGet()));
+ assert(varTypeIsLong(op2->TypeGet()));
+
+ regNumber targetReg = treeNode->gtRegNum;
+
+ genConsumeOperands(tree);
+
+ assert(targetReg != REG_NA);
+
+ GenTreePtr loOp1 = op1->gtGetOp1();
+ GenTreePtr hiOp1 = op1->gtGetOp2();
+ GenTreePtr loOp2 = op2->gtGetOp1();
+ GenTreePtr hiOp2 = op2->gtGetOp2();
+
+ // Create compare for the high parts
+ instruction ins = INS_cmp;
+ var_types cmpType = TYP_INT;
+ emitAttr cmpAttr = emitTypeSize(cmpType);
+
+ // Emit the compare instruction
+ getEmitter()->emitInsBinary(ins, cmpAttr, hiOp1, hiOp2);
+
+ // Generate the first jump for the high compare
+ CompareKind compareKind = ((tree->gtFlags & GTF_UNSIGNED) != 0) ? CK_UNSIGNED : CK_SIGNED;
+
+ BasicBlock* labelHi = genCreateTempLabel();
+ BasicBlock* labelFinal = genCreateTempLabel();
+
+ if (compareKind == CK_SIGNED && (tree->gtOper != GT_NE && tree->gtOper != GT_EQ))
+ {
+ // If we are doing a signed comparison, we need to do a signed set if the high compare is true,
+ // but an unsigned set if we fall through to the low compare. If we have a GT_NE or GT_EQ, we do not
+ // need to worry about the sign of the comparison, so we can use the simplified case.
+
+ // We only have to check for equality for the hi comparison. If they are not equal, then the set will
+ // do the right thing. If they are equal, we have to check the lo halves.
+ inst_JMP(EJ_jne, labelHi);
+
+ // Emit the comparison. Perform the set for the lo. Jump to labelFinal
+ getEmitter()->emitInsBinary(ins, cmpAttr, loOp1, loOp2);
+
+ // The low set must be unsigned
+ emitJumpKind jumpKindLo = genJumpKindForOper(tree->gtOper, CK_UNSIGNED);
+
+ inst_SET(jumpKindLo, targetReg);
+ // Set the higher bytes to 0
+ inst_RV_RV(ins_Move_Extend(TYP_UBYTE, true), targetReg, targetReg, TYP_UBYTE, emitTypeSize(TYP_UBYTE));
+ genProduceReg(tree);
+
+ inst_JMP(EJ_jmp, labelFinal);
+
+ // Define the label for hi jump target here. If we have jumped here, we want to set
+ // the target register based on the jump kind of the actual compare type.
+
+ genDefineTempLabel(labelHi);
+ inst_SET(genJumpKindForOper(tree->gtOper, compareKind), targetReg);
+
+ // Set the higher bytes to 0
+ inst_RV_RV(ins_Move_Extend(TYP_UBYTE, true), targetReg, targetReg, TYP_UBYTE, emitTypeSize(TYP_UBYTE));
+ genProduceReg(tree);
+
+ genDefineTempLabel(labelFinal);
+ }
+ else
+ {
+ // If the compare is unsigned, or if the sign doesn't change the set instruction, we can use
+ // the same set logic for both the hi and lo compare, so we don't need to jump to a high label,
+ // we can just jump to the set that the lo compare will use.
+
+ // We only have to check for equality for the hi comparison. If they are not equal, then the set will
+ // do the right thing. If they are equal, we have to check the lo halves.
+ inst_JMP(EJ_jne, labelFinal);
+
+ // Emit the comparison
+ getEmitter()->emitInsBinary(ins, cmpAttr, loOp1, loOp2);
+
+ // Define the label for hi jump target here. If we have jumped here, we want to set
+ // the target register based on the jump kind of the lower half (the actual compare
+ // type). If we have fallen through, then we are doing a normal int compare for the
+ // lower parts
+
+ genDefineTempLabel(labelFinal);
+
+ // The low set must be unsigned
+ emitJumpKind jumpKindLo = genJumpKindForOper(tree->gtOper, CK_UNSIGNED);
+
+ inst_SET(jumpKindLo, targetReg);
+ // Set the higher bytes to 0
+ inst_RV_RV(ins_Move_Extend(TYP_UBYTE, true), targetReg, targetReg, TYP_UBYTE, emitTypeSize(TYP_UBYTE));
+ genProduceReg(tree);
+ }
+}
+
+//------------------------------------------------------------------------
+// genJTrueLong: Generate code for comparing two longs on x86 for the case where the result
+// is not manifested in a register.
+//
+// Arguments:
+// treeNode - the compare tree
+//
+// Return Value:
+// None.
+// Comments:
+// For long compares, we need to compare the high parts of operands first, then the low parts.
+// We only have to do the low compare if the high parts of the operands are equal.
+//
+// In the case where the result of a rel-op is not realized in a register, we generate:
+//
+// Opcode x86 equivalent Comment
+// ------ -------------- -------
+//
+// GT_LT; unsigned cmp hiOp1,hiOp2
+// jb trueLabel
+// ja falseLabel
+// cmp loOp1,loOp2
+// jb trueLabel
+// falseLabel:
+//
+// GT_LE; unsigned cmp hiOp1,hiOp2
+// jb trueLabel
+// ja falseLabel
+// cmp loOp1,loOp2
+// jbe trueLabel
+// falseLabel:
+//
+// GT_GT; unsigned cmp hiOp1,hiOp2
+// ja trueLabel
+// jb falseLabel
+// cmp loOp1,loOp2
+// ja trueLabel
+// falseLabel:
+//
+// GT_GE; unsigned cmp hiOp1,hiOp2
+// ja trueLabel
+// jb falseLabel
+// cmp loOp1,loOp2
+// jae trueLabel
+// falseLabel:
+//
+// GT_LT; signed cmp hiOp1,hiOp2
+// jl trueLabel
+// jg falseLabel
+// cmp loOp1,loOp2
+// jb trueLabel
+// falseLabel:
+//
+// GT_LE; signed cmp hiOp1,hiOp2
+// jl trueLabel
+// jg falseLabel
+// cmp loOp1,loOp2
+// jbe trueLabel
+// falseLabel:
+//
+// GT_GT; signed cmp hiOp1,hiOp2
+// jg trueLabel
+// jl falseLabel
+// cmp loOp1,loOp2
+// ja trueLabel
+// falseLabel:
+//
+// GT_GE; signed cmp hiOp1,hiOp2
+// jg trueLabel
+// jl falseLabel
+// cmp loOp1,loOp2
+// jae trueLabel
+// falseLabel:
+//
+// GT_EQ; cmp hiOp1,hiOp2
+// jne falseLabel
+// cmp loOp1,loOp2
+// je trueLabel
+// falseLabel:
+//
+// GT_NE; cmp hiOp1,hiOp2
+// jne labelTrue
+// cmp loOp1,loOp2
+// jne trueLabel
+// falseLabel:
+//
+// TODO-X86-CQ: Check if hi or lo parts of op2 are 0 and change the compare to a test.
+void CodeGen::genJTrueLong(GenTreePtr treeNode)
+{
+ assert(treeNode->OperIsCompare());
+
+ GenTreeOp* tree = treeNode->AsOp();
+ GenTreePtr op1 = tree->gtOp1;
+ GenTreePtr op2 = tree->gtOp2;
+
+ assert(varTypeIsLong(op1->TypeGet()));
+ assert(varTypeIsLong(op2->TypeGet()));
+
+ regNumber targetReg = treeNode->gtRegNum;
+
+ assert(targetReg == REG_NA);
+
+ GenTreePtr loOp1 = op1->gtGetOp1();
+ GenTreePtr hiOp1 = op1->gtGetOp2();
+ GenTreePtr loOp2 = op2->gtGetOp1();
+ GenTreePtr hiOp2 = op2->gtGetOp2();
+
+ // Emit the compare instruction
+ getEmitter()->emitInsBinary(INS_cmp, EA_4BYTE, hiOp1, hiOp2);
+
+ // Generate the first jump for the high compare
+ CompareKind compareKind = ((tree->gtFlags & GTF_UNSIGNED) != 0) ? CK_UNSIGNED : CK_SIGNED;
+
+ // TODO-X86-CQ: If the next block is a BBJ_ALWAYS, we can set falseLabel = compiler->compCurBB->bbNext->bbJumpDest.
+ BasicBlock* falseLabel = genCreateTempLabel();
+
+ emitJumpKind jumpKindHi[2];
+
+ // Generate the jumps for the high compare
+ genJumpKindsForTreeLongHi(tree, jumpKindHi);
+
+ BasicBlock* trueLabel = compiler->compCurBB->bbJumpDest;
+
+ if (jumpKindHi[0] != EJ_NONE)
+ {
+ inst_JMP(jumpKindHi[0], trueLabel);
+ }
+
+ if (jumpKindHi[1] != EJ_NONE)
+ {
+ inst_JMP(jumpKindHi[1], falseLabel);
+ }
+
+ // The low jump must be unsigned
+ emitJumpKind jumpKindLo = genJumpKindForOper(tree->gtOper, CK_UNSIGNED);
+
+ // Emit the comparison and the jump to the trueLabel
+ getEmitter()->emitInsBinary(INS_cmp, EA_4BYTE, loOp1, loOp2);
+
+ inst_JMP(jumpKindLo, trueLabel);
+
+ // Generate falseLabel, which is the false path. We will jump here if the high compare is false
+ // or fall through if the low compare is false.
+ genDefineTempLabel(falseLabel);
+}
+#endif //! defined(_TARGET_64BIT_)
+
+//------------------------------------------------------------------------
+// genCompareFloat: Generate code for comparing two floating point values
+//
+// Arguments:
+// treeNode - the compare tree
+//
+// Return Value:
+// None.
+// Comments:
+// SSE2 instruction ucomis[s|d] is performs unordered comparison and
+// updates rFLAGS register as follows.
+// Result of compare ZF PF CF
+// ----------------- ------------
+// Unordered 1 1 1 <-- this result implies one of operands of compare is a NAN.
+// Greater 0 0 0
+// Less Than 0 0 1
+// Equal 1 0 0
+//
+// From the above table the following equalities follow. As per ECMA spec *.UN opcodes perform
+// unordered comparison of floating point values. That is *.UN comparisons result in true when
+// one of the operands is a NaN whereas ordered comparisons results in false.
+//
+// Opcode Amd64 equivalent Comment
+// ------ ----------------- --------
+// BLT.UN(a,b) ucomis[s|d] a, b Jb branches if CF=1, which means either a<b or unordered from the above
+// jb table
+//
+// BLT(a,b) ucomis[s|d] b, a Ja branches if CF=0 and ZF=0, which means b>a that in turn implies a<b
+// ja
+//
+// BGT.UN(a,b) ucomis[s|d] b, a branch if b<a or unordered ==> branch if a>b or unordered
+// jb
+//
+// BGT(a, b) ucomis[s|d] a, b branch if a>b
+// ja
+//
+// BLE.UN(a,b) ucomis[s|d] a, b jbe branches if CF=1 or ZF=1, which implies a<=b or unordered
+// jbe
+//
+// BLE(a,b) ucomis[s|d] b, a jae branches if CF=0, which mean b>=a or a<=b
+// jae
+//
+// BGE.UN(a,b) ucomis[s|d] b, a branch if b<=a or unordered ==> branch if a>=b or unordered
+// jbe
+//
+// BGE(a,b) ucomis[s|d] a, b branch if a>=b
+// jae
+//
+// BEQ.UN(a,b) ucomis[s|d] a, b branch if a==b or unordered. There is no BEQ.UN opcode in ECMA spec.
+// je This case is given for completeness, in case if JIT generates such
+// a gentree internally.
+//
+// BEQ(a,b) ucomis[s|d] a, b From the above table, PF=0 and ZF=1 corresponds to a==b.
+// jpe L1
+// je <true label>
+// L1:
+//
+// BNE(a,b) ucomis[s|d] a, b branch if a!=b. There is no BNE opcode in ECMA spec. This case is
+// jne given for completeness, in case if JIT generates such a gentree
+// internally.
+//
+// BNE.UN(a,b) ucomis[s|d] a, b From the above table, PF=1 or ZF=0 implies unordered or a!=b
+// jpe <true label>
+// jne <true label>
+//
+// As we can see from the above equalities that the operands of a compare operator need to be
+// reveresed in case of BLT/CLT, BGT.UN/CGT.UN, BLE/CLE, BGE.UN/CGE.UN.
+void CodeGen::genCompareFloat(GenTreePtr treeNode)
+{
+ assert(treeNode->OperIsCompare());
+
+ GenTreeOp* tree = treeNode->AsOp();
+ GenTreePtr op1 = tree->gtOp1;
+ GenTreePtr op2 = tree->gtOp2;
+ var_types op1Type = op1->TypeGet();
+ var_types op2Type = op2->TypeGet();
+
+ genConsumeOperands(tree);
+
+ assert(varTypeIsFloating(op1Type));
+ assert(op1Type == op2Type);
+
+ regNumber targetReg = treeNode->gtRegNum;
+ instruction ins;
+ emitAttr cmpAttr;
+
+ bool reverseOps;
+ if ((tree->gtFlags & GTF_RELOP_NAN_UN) != 0)
+ {
+ // Unordered comparison case
+ reverseOps = (tree->gtOper == GT_GT || tree->gtOper == GT_GE);
+ }
+ else
+ {
+ reverseOps = (tree->gtOper == GT_LT || tree->gtOper == GT_LE);
+ }
+
+ if (reverseOps)
+ {
+ GenTreePtr tmp = op1;
+ op1 = op2;
+ op2 = tmp;
+ }
+
+ ins = ins_FloatCompare(op1Type);
+ cmpAttr = emitTypeSize(op1Type);
+
+ getEmitter()->emitInsBinary(ins, cmpAttr, op1, op2);
+
+ // Are we evaluating this into a register?
+ if (targetReg != REG_NA)
+ {
+ genSetRegToCond(targetReg, tree);
+ genProduceReg(tree);
+ }
+}
+
+//------------------------------------------------------------------------
+// genCompareInt: Generate code for comparing ints or, on amd64, longs.
+//
+// Arguments:
+// treeNode - the compare tree
+//
+// Return Value:
+// None.
+void CodeGen::genCompareInt(GenTreePtr treeNode)
+{
+ assert(treeNode->OperIsCompare());
+
+ GenTreeOp* tree = treeNode->AsOp();
+ GenTreePtr op1 = tree->gtOp1;
+ GenTreePtr op2 = tree->gtOp2;
+ var_types op1Type = op1->TypeGet();
+ var_types op2Type = op2->TypeGet();
+
+ genConsumeOperands(tree);
+
+ instruction ins;
+ emitAttr cmpAttr;
+
+ regNumber targetReg = treeNode->gtRegNum;
+ assert(!op1->isContainedIntOrIImmed()); // We no longer support swapping op1 and op2 to generate cmp reg, imm
+ assert(!varTypeIsFloating(op2Type));
+
+#ifdef _TARGET_X86_
+ assert(!varTypeIsLong(op1Type) && !varTypeIsLong(op2Type));
+#endif // _TARGET_X86_
+
+ // By default we use an int32 sized cmp instruction
+ //
+ ins = INS_cmp;
+ var_types cmpType = TYP_INT;
+
+ // In the if/then/else statement below we may change the
+ // 'cmpType' and/or 'ins' to generate a smaller instruction
+
+ // Are we comparing two values that are the same size?
+ //
+ if (genTypeSize(op1Type) == genTypeSize(op2Type))
+ {
+ if (op1Type == op2Type)
+ {
+ // If both types are exactly the same we can use that type
+ cmpType = op1Type;
+ }
+ else if (genTypeSize(op1Type) == 8)
+ {
+ // If we have two different int64 types we need to use a long compare
+ cmpType = TYP_LONG;
+ }
+
+ cmpAttr = emitTypeSize(cmpType);
+ }
+ else // Here we know that (op1Type != op2Type)
+ {
+ // Do we have a short compare against a constant in op2?
+ //
+ // We checked for this case in LowerCmp() and if we can perform a small
+ // compare immediate we labeled this compare with a GTF_RELOP_SMALL
+ // and for unsigned small non-equality compares the GTF_UNSIGNED flag.
+ //
+ if (op2->isContainedIntOrIImmed() && ((tree->gtFlags & GTF_RELOP_SMALL) != 0))
+ {
+ assert(varTypeIsSmall(op1Type));
+ cmpType = op1Type;
+ }
+#ifdef _TARGET_AMD64_
+ else // compare two different sized operands
+ {
+ // For this case we don't want any memory operands, only registers or immediates
+ //
+ assert(!op1->isContainedMemoryOp());
+ assert(!op2->isContainedMemoryOp());
+
+ // Check for the case where one operand is an int64 type
+ // Lower should have placed 32-bit operand in a register
+ // for signed comparisons we will sign extend the 32-bit value in place.
+ //
+ bool op1Is64Bit = (genTypeSize(op1Type) == 8);
+ bool op2Is64Bit = (genTypeSize(op2Type) == 8);
+ if (op1Is64Bit)
+ {
+ cmpType = TYP_LONG;
+ if (!(tree->gtFlags & GTF_UNSIGNED) && !op2Is64Bit)
+ {
+ assert(op2->gtRegNum != REG_NA);
+ inst_RV_RV(INS_movsxd, op2->gtRegNum, op2->gtRegNum, op2Type);
+ }
+ }
+ else if (op2Is64Bit)
+ {
+ cmpType = TYP_LONG;
+ if (!(tree->gtFlags & GTF_UNSIGNED) && !op1Is64Bit)
+ {
+ assert(op1->gtRegNum != REG_NA);
+ }
+ }
+ }
+#endif // _TARGET_AMD64_
+
+ cmpAttr = emitTypeSize(cmpType);
+ }
+
+ // See if we can generate a "test" instruction instead of a "cmp".
+ // For this to generate the correct conditional branch we must have
+ // a compare against zero.
+ //
+ if (op2->IsIntegralConst(0))
+ {
+ if (op1->isContained())
+ {
+ // op1 can be a contained memory op
+ // or the special contained GT_AND that we created in Lowering::LowerCmp()
+ //
+ if ((op1->OperGet() == GT_AND))
+ {
+ noway_assert(op1->gtOp.gtOp2->isContainedIntOrIImmed());
+
+ ins = INS_test; // we will generate "test andOp1, andOp2CnsVal"
+ op2 = op1->gtOp.gtOp2; // must assign op2 before we overwrite op1
+ op1 = op1->gtOp.gtOp1; // overwrite op1
+
+ if (op1->isContainedMemoryOp())
+ {
+ // use the size andOp1 if it is a contained memoryop.
+ cmpAttr = emitTypeSize(op1->TypeGet());
+ }
+ // fallthrough to emit->emitInsBinary(ins, cmpAttr, op1, op2);
+ }
+ }
+ else // op1 is not contained thus it must be in a register
+ {
+ ins = INS_test;
+ op2 = op1; // we will generate "test reg1,reg1"
+ // fallthrough to emit->emitInsBinary(ins, cmpAttr, op1, op2);
+ }
+ }
+
+ getEmitter()->emitInsBinary(ins, cmpAttr, op1, op2);
+
+ // Are we evaluating this into a register?
+ if (targetReg != REG_NA)
+ {
+ genSetRegToCond(targetReg, tree);
+ genProduceReg(tree);
+ }
+}
+
+//-------------------------------------------------------------------------------------------
+// genSetRegToCond: Set a register 'dstReg' to the appropriate one or zero value
+// corresponding to a binary Relational operator result.
+//
+// Arguments:
+// dstReg - The target register to set to 1 or 0
+// tree - The GenTree Relop node that was used to set the Condition codes
+//
+// Return Value: none
+//
+// Notes:
+// A full 64-bit value of either 1 or 0 is setup in the 'dstReg'
+//-------------------------------------------------------------------------------------------
+
+void CodeGen::genSetRegToCond(regNumber dstReg, GenTreePtr tree)
+{
+ noway_assert((genRegMask(dstReg) & RBM_BYTE_REGS) != 0);
+
+ emitJumpKind jumpKind[2];
+ bool branchToTrueLabel[2];
+ genJumpKindsForTree(tree, jumpKind, branchToTrueLabel);
+
+ if (jumpKind[1] == EJ_NONE)
+ {
+ // Set (lower byte of) reg according to the flags
+ inst_SET(jumpKind[0], dstReg);
+ }
+ else
+ {
+#ifdef DEBUG
+ // jmpKind[1] != EJ_NONE implies BEQ and BEN.UN of floating point values.
+ // These are represented by two conditions.
+ if (tree->gtOper == GT_EQ)
+ {
+ // This must be an ordered comparison.
+ assert((tree->gtFlags & GTF_RELOP_NAN_UN) == 0);
+ }
+ else
+ {
+ // This must be BNE.UN
+ assert((tree->gtOper == GT_NE) && ((tree->gtFlags & GTF_RELOP_NAN_UN) != 0));
+ }
+#endif
+
+ // Here is the sample code generated in each case:
+ // BEQ == cmp, jpe <false label>, je <true label>
+ // That is, to materialize comparison reg needs to be set if PF=0 and ZF=1
+ // setnp reg // if (PF==0) reg = 1 else reg = 0
+ // jpe L1 // Jmp if PF==1
+ // sete reg
+ // L1:
+ //
+ // BNE.UN == cmp, jpe <true label>, jne <true label>
+ // That is, to materialize the comparison reg needs to be set if either PF=1 or ZF=0;
+ // setp reg
+ // jpe L1
+ // setne reg
+ // L1:
+
+ // reverse the jmpkind condition before setting dstReg if it is to false label.
+ inst_SET(branchToTrueLabel[0] ? jumpKind[0] : emitter::emitReverseJumpKind(jumpKind[0]), dstReg);
+
+ BasicBlock* label = genCreateTempLabel();
+ inst_JMP(jumpKind[0], label);
+
+ // second branch is always to true label
+ assert(branchToTrueLabel[1]);
+ inst_SET(jumpKind[1], dstReg);
+ genDefineTempLabel(label);
+ }
+
+ var_types treeType = tree->TypeGet();
+ if (treeType == TYP_INT || treeType == TYP_LONG)
+ {
+ // Set the higher bytes to 0
+ inst_RV_RV(ins_Move_Extend(TYP_UBYTE, true), dstReg, dstReg, TYP_UBYTE, emitTypeSize(TYP_UBYTE));
+ }
+ else
+ {
+ noway_assert(treeType == TYP_BYTE);
+ }
+}
+
+//------------------------------------------------------------------------
+// genIntToIntCast: Generate code for an integer cast
+// This method handles integer overflow checking casts
+// as well as ordinary integer casts.
+//
+// Arguments:
+// treeNode - The GT_CAST node
+//
+// Return Value:
+// None.
+//
+// Assumptions:
+// The treeNode is not a contained node and must have an assigned register.
+// For a signed convert from byte, the source must be in a byte-addressable register.
+// Neither the source nor target type can be a floating point type.
+//
+// TODO-XArch-CQ: Allow castOp to be a contained node without an assigned register.
+// TODO: refactor to use getCastDescription
+//
+void CodeGen::genIntToIntCast(GenTreePtr treeNode)
+{
+ assert(treeNode->OperGet() == GT_CAST);
+
+ GenTreePtr castOp = treeNode->gtCast.CastOp();
+ regNumber targetReg = treeNode->gtRegNum;
+ regNumber sourceReg = castOp->gtRegNum;
+ var_types dstType = treeNode->CastToType();
+ bool isUnsignedDst = varTypeIsUnsigned(dstType);
+ var_types srcType = genActualType(castOp->TypeGet());
+ bool isUnsignedSrc = varTypeIsUnsigned(srcType);
+
+ // if necessary, force the srcType to unsigned when the GT_UNSIGNED flag is set
+ if (!isUnsignedSrc && (treeNode->gtFlags & GTF_UNSIGNED) != 0)
+ {
+ srcType = genUnsignedType(srcType);
+ isUnsignedSrc = true;
+ }
+
+ bool requiresOverflowCheck = false;
+ bool needAndAfter = false;
+
+ assert(genIsValidIntReg(targetReg));
+ assert(genIsValidIntReg(sourceReg));
+
+ instruction ins = INS_invalid;
+ emitAttr size = EA_UNKNOWN;
+
+ if (genTypeSize(srcType) < genTypeSize(dstType))
+ {
+ // Widening cast
+
+ // Is this an Overflow checking cast?
+ // We only need to handle one case, as the other casts can never overflow.
+ // cast from TYP_INT to TYP_ULONG
+ //
+ if (treeNode->gtOverflow() && (srcType == TYP_INT) && (dstType == TYP_ULONG))
+ {
+ requiresOverflowCheck = true;
+ size = EA_ATTR(genTypeSize(srcType));
+ ins = INS_mov;
+ }
+ else
+ {
+ // we need the source size
+ size = EA_ATTR(genTypeSize(srcType));
+ noway_assert(size < EA_PTRSIZE);
+
+ ins = ins_Move_Extend(srcType, castOp->InReg());
+
+ /*
+ Special case: ins_Move_Extend assumes the destination type is no bigger
+ than TYP_INT. movsx and movzx can already extend all the way to
+ 64-bit, and a regular 32-bit mov clears the high 32 bits (like the non-existant movzxd),
+ but for a sign extension from TYP_INT to TYP_LONG, we need to use movsxd opcode.
+ */
+ if (!isUnsignedSrc && !isUnsignedDst && (size == EA_4BYTE) && (genTypeSize(dstType) > EA_4BYTE))
+ {
+#ifdef _TARGET_X86_
+ NYI_X86("Cast to 64 bit for x86/RyuJIT");
+#else // !_TARGET_X86_
+ ins = INS_movsxd;
+#endif // !_TARGET_X86_
+ }
+
+ /*
+ Special case: for a cast of byte to char we first
+ have to expand the byte (w/ sign extension), then
+ mask off the high bits.
+ Use 'movsx' followed by 'and'
+ */
+ if (!isUnsignedSrc && isUnsignedDst && (genTypeSize(dstType) < EA_4BYTE))
+ {
+ noway_assert(genTypeSize(dstType) == EA_2BYTE && size == EA_1BYTE);
+ needAndAfter = true;
+ }
+ }
+ }
+ else
+ {
+ // Narrowing cast, or sign-changing cast
+ noway_assert(genTypeSize(srcType) >= genTypeSize(dstType));
+
+ // Is this an Overflow checking cast?
+ if (treeNode->gtOverflow())
+ {
+ requiresOverflowCheck = true;
+ size = EA_ATTR(genTypeSize(srcType));
+ ins = INS_mov;
+ }
+ else
+ {
+ size = EA_ATTR(genTypeSize(dstType));
+ ins = ins_Move_Extend(dstType, castOp->InReg());
+ }
+ }
+
+ noway_assert(ins != INS_invalid);
+
+ genConsumeReg(castOp);
+
+ if (requiresOverflowCheck)
+ {
+ ssize_t typeMin = 0;
+ ssize_t typeMax = 0;
+ ssize_t typeMask = 0;
+ bool needScratchReg = false;
+ bool signCheckOnly = false;
+
+ /* Do we need to compare the value, or just check masks */
+
+ switch (dstType)
+ {
+ case TYP_BYTE:
+ typeMask = ssize_t((int)0xFFFFFF80);
+ typeMin = SCHAR_MIN;
+ typeMax = SCHAR_MAX;
+ break;
+
+ case TYP_UBYTE:
+ typeMask = ssize_t((int)0xFFFFFF00L);
+ break;
+
+ case TYP_SHORT:
+ typeMask = ssize_t((int)0xFFFF8000);
+ typeMin = SHRT_MIN;
+ typeMax = SHRT_MAX;
+ break;
+
+ case TYP_CHAR:
+ typeMask = ssize_t((int)0xFFFF0000L);
+ break;
+
+ case TYP_INT:
+ if (srcType == TYP_UINT)
+ {
+ signCheckOnly = true;
+ }
+ else
+ {
+ typeMask = 0xFFFFFFFF80000000LL;
+ typeMin = INT_MIN;
+ typeMax = INT_MAX;
+ }
+ break;
+
+ case TYP_UINT:
+ if (srcType == TYP_INT)
+ {
+ signCheckOnly = true;
+ }
+ else
+ {
+ needScratchReg = true;
+ }
+ break;
+
+ case TYP_LONG:
+ noway_assert(srcType == TYP_ULONG);
+ signCheckOnly = true;
+ break;
+
+ case TYP_ULONG:
+ noway_assert((srcType == TYP_LONG) || (srcType == TYP_INT));
+ signCheckOnly = true;
+ break;
+
+ default:
+ NO_WAY("Unknown type");
+ return;
+ }
+
+ if (signCheckOnly)
+ {
+ // We only need to check for a negative value in sourceReg
+ inst_RV_IV(INS_cmp, sourceReg, 0, size);
+ genJumpToThrowHlpBlk(EJ_jl, SCK_OVERFLOW);
+ }
+ else
+ {
+ regNumber tmpReg = REG_NA;
+
+ if (needScratchReg)
+ {
+ // We need an additional temp register
+ // Make sure we have exactly one allocated.
+ assert(treeNode->gtRsvdRegs != RBM_NONE);
+ assert(genCountBits(treeNode->gtRsvdRegs) == 1);
+ tmpReg = genRegNumFromMask(treeNode->gtRsvdRegs);
+ }
+
+ // When we are converting from unsigned or to unsigned, we
+ // will only have to check for any bits set using 'typeMask'
+ if (isUnsignedSrc || isUnsignedDst)
+ {
+ if (needScratchReg)
+ {
+ inst_RV_RV(INS_mov, tmpReg, sourceReg, TYP_LONG); // Move the 64-bit value to a writeable temp reg
+ inst_RV_SH(INS_SHIFT_RIGHT_LOGICAL, size, tmpReg, 32); // Shift right by 32 bits
+ genJumpToThrowHlpBlk(EJ_jne, SCK_OVERFLOW); // Thow if result shift is non-zero
+ }
+ else
+ {
+ noway_assert(typeMask != 0);
+ inst_RV_IV(INS_TEST, sourceReg, typeMask, size);
+ genJumpToThrowHlpBlk(EJ_jne, SCK_OVERFLOW);
+ }
+ }
+ else
+ {
+ // For a narrowing signed cast
+ //
+ // We must check the value is in a signed range.
+
+ // Compare with the MAX
+
+ noway_assert((typeMin != 0) && (typeMax != 0));
+
+ inst_RV_IV(INS_cmp, sourceReg, typeMax, size);
+ genJumpToThrowHlpBlk(EJ_jg, SCK_OVERFLOW);
+
+ // Compare with the MIN
+
+ inst_RV_IV(INS_cmp, sourceReg, typeMin, size);
+ genJumpToThrowHlpBlk(EJ_jl, SCK_OVERFLOW);
+ }
+ }
+
+ if (targetReg != sourceReg
+#ifdef _TARGET_AMD64_
+ // On amd64, we can hit this path for a same-register
+ // 4-byte to 8-byte widening conversion, and need to
+ // emit the instruction to set the high bits correctly.
+ || (EA_ATTR(genTypeSize(dstType)) == EA_8BYTE && EA_ATTR(genTypeSize(srcType)) == EA_4BYTE)
+#endif // _TARGET_AMD64_
+ )
+ inst_RV_RV(ins, targetReg, sourceReg, srcType, size);
+ }
+ else // non-overflow checking cast
+ {
+ noway_assert(size < EA_PTRSIZE || srcType == dstType);
+
+ // We may have code transformations that result in casts where srcType is the same as dstType.
+ // e.g. Bug 824281, in which a comma is split by the rationalizer, leaving an assignment of a
+ // long constant to a long lclVar.
+ if (srcType == dstType)
+ {
+ ins = INS_mov;
+ }
+ /* Is the value sitting in a non-byte-addressable register? */
+ else if (castOp->InReg() && (size == EA_1BYTE) && !isByteReg(sourceReg))
+ {
+ if (isUnsignedDst)
+ {
+ // for unsigned values we can AND, so it need not be a byte register
+ ins = INS_AND;
+ }
+ else
+ {
+ // Move the value into a byte register
+ noway_assert(!"Signed byte convert from non-byte-addressable register");
+ }
+
+ /* Generate "mov targetReg, castOp->gtReg */
+ if (targetReg != sourceReg)
+ {
+ inst_RV_RV(INS_mov, targetReg, sourceReg, srcType);
+ }
+ }
+
+ if (ins == INS_AND)
+ {
+ noway_assert((needAndAfter == false) && isUnsignedDst);
+
+ /* Generate "and reg, MASK */
+ unsigned fillPattern;
+ if (size == EA_1BYTE)
+ {
+ fillPattern = 0xff;
+ }
+ else if (size == EA_2BYTE)
+ {
+ fillPattern = 0xffff;
+ }
+ else
+ {
+ fillPattern = 0xffffffff;
+ }
+
+ inst_RV_IV(INS_AND, targetReg, fillPattern, EA_4BYTE);
+ }
+#ifdef _TARGET_AMD64_
+ else if (ins == INS_movsxd)
+ {
+ noway_assert(!needAndAfter);
+ inst_RV_RV(ins, targetReg, sourceReg, srcType, size);
+ }
+#endif // _TARGET_AMD64_
+ else if (ins == INS_mov)
+ {
+ noway_assert(!needAndAfter);
+ if (targetReg != sourceReg
+#ifdef _TARGET_AMD64_
+ // On amd64, 'mov' is the opcode used to zero-extend from
+ // 4 bytes to 8 bytes.
+ || (EA_ATTR(genTypeSize(dstType)) == EA_8BYTE && EA_ATTR(genTypeSize(srcType)) == EA_4BYTE)
+#endif // _TARGET_AMD64_
+ )
+ {
+ inst_RV_RV(ins, targetReg, sourceReg, srcType, size);
+ }
+ }
+ else
+ {
+ noway_assert(ins == INS_movsx || ins == INS_movzx);
+
+ /* Generate "mov targetReg, castOp->gtReg */
+ inst_RV_RV(ins, targetReg, sourceReg, srcType, size);
+
+ /* Mask off high bits for cast from byte to char */
+ if (needAndAfter)
+ {
+ noway_assert(genTypeSize(dstType) == 2 && ins == INS_movsx);
+ inst_RV_IV(INS_AND, targetReg, 0xFFFF, EA_4BYTE);
+ }
+ }
+ }
+
+ genProduceReg(treeNode);
+}
+
+//------------------------------------------------------------------------
+// genFloatToFloatCast: Generate code for a cast between float and double
+//
+// Arguments:
+// treeNode - The GT_CAST node
+//
+// Return Value:
+// None.
+//
+// Assumptions:
+// Cast is a non-overflow conversion.
+// The treeNode must have an assigned register.
+// The cast is between float and double or vice versa.
+//
+void CodeGen::genFloatToFloatCast(GenTreePtr treeNode)
+{
+ // float <--> double conversions are always non-overflow ones
+ assert(treeNode->OperGet() == GT_CAST);
+ assert(!treeNode->gtOverflow());
+
+ regNumber targetReg = treeNode->gtRegNum;
+ assert(genIsValidFloatReg(targetReg));
+
+ GenTreePtr op1 = treeNode->gtOp.gtOp1;
+#ifdef DEBUG
+ // If not contained, must be a valid float reg.
+ if (!op1->isContained())
+ {
+ assert(genIsValidFloatReg(op1->gtRegNum));
+ }
+#endif
+
+ var_types dstType = treeNode->CastToType();
+ var_types srcType = op1->TypeGet();
+ assert(varTypeIsFloating(srcType) && varTypeIsFloating(dstType));
+
+ genConsumeOperands(treeNode->AsOp());
+ if (srcType == dstType && targetReg == op1->gtRegNum)
+ {
+ // source and destinations types are the same and also reside in the same register.
+ // we just need to consume and produce the reg in this case.
+ ;
+ }
+ else
+ {
+ instruction ins = ins_FloatConv(dstType, srcType);
+ getEmitter()->emitInsBinary(ins, emitTypeSize(dstType), treeNode, op1);
+ }
+
+ genProduceReg(treeNode);
+}
+
+//------------------------------------------------------------------------
+// genIntToFloatCast: Generate code to cast an int/long to float/double
+//
+// Arguments:
+// treeNode - The GT_CAST node
+//
+// Return Value:
+// None.
+//
+// Assumptions:
+// Cast is a non-overflow conversion.
+// The treeNode must have an assigned register.
+// SrcType= int32/uint32/int64/uint64 and DstType=float/double.
+//
+void CodeGen::genIntToFloatCast(GenTreePtr treeNode)
+{
+ // int type --> float/double conversions are always non-overflow ones
+ assert(treeNode->OperGet() == GT_CAST);
+ assert(!treeNode->gtOverflow());
+
+ regNumber targetReg = treeNode->gtRegNum;
+ assert(genIsValidFloatReg(targetReg));
+
+ GenTreePtr op1 = treeNode->gtOp.gtOp1;
+#ifdef DEBUG
+ if (!op1->isContained())
+ {
+ assert(genIsValidIntReg(op1->gtRegNum));
+ }
+#endif
+
+ var_types dstType = treeNode->CastToType();
+ var_types srcType = op1->TypeGet();
+ assert(!varTypeIsFloating(srcType) && varTypeIsFloating(dstType));
+
+#if !defined(_TARGET_64BIT_)
+ NYI_IF(varTypeIsLong(srcType), "Conversion from long to float");
+#endif // !defined(_TARGET_64BIT_)
+
+ // Since xarch emitter doesn't handle reporting gc-info correctly while casting away gc-ness we
+ // ensure srcType of a cast is non gc-type. Codegen should never see BYREF as source type except
+ // for GT_LCL_VAR_ADDR and GT_LCL_FLD_ADDR that represent stack addresses and can be considered
+ // as TYP_I_IMPL. In all other cases where src operand is a gc-type and not known to be on stack,
+ // Front-end (see fgMorphCast()) ensures this by assigning gc-type local to a non gc-type
+ // temp and using temp as operand of cast operation.
+ if (srcType == TYP_BYREF)
+ {
+ noway_assert(op1->OperGet() == GT_LCL_VAR_ADDR || op1->OperGet() == GT_LCL_FLD_ADDR);
+ srcType = TYP_I_IMPL;
+ }
+
+ // force the srcType to unsigned if GT_UNSIGNED flag is set
+ if (treeNode->gtFlags & GTF_UNSIGNED)
+ {
+ srcType = genUnsignedType(srcType);
+ }
+
+ noway_assert(!varTypeIsGC(srcType));
+
+ // We should never be seeing srcType whose size is not sizeof(int) nor sizeof(long).
+ // For conversions from byte/sbyte/int16/uint16 to float/double, we would expect
+ // either the front-end or lowering phase to have generated two levels of cast.
+ // The first one is for widening smaller int type to int32 and the second one is
+ // to the float/double.
+ emitAttr srcSize = EA_ATTR(genTypeSize(srcType));
+ noway_assert((srcSize == EA_ATTR(genTypeSize(TYP_INT))) || (srcSize == EA_ATTR(genTypeSize(TYP_LONG))));
+
+ // Also we don't expect to see uint32 -> float/double and uint64 -> float conversions
+ // here since they should have been lowered apropriately.
+ noway_assert(srcType != TYP_UINT);
+ noway_assert((srcType != TYP_ULONG) || (dstType != TYP_FLOAT));
+
+ // To convert int to a float/double, cvtsi2ss/sd SSE2 instruction is used
+ // which does a partial write to lower 4/8 bytes of xmm register keeping the other
+ // upper bytes unmodified. If "cvtsi2ss/sd xmmReg, r32/r64" occurs inside a loop,
+ // the partial write could introduce a false dependency and could cause a stall
+ // if there are further uses of xmmReg. We have such a case occuring with a
+ // customer reported version of SpectralNorm benchmark, resulting in 2x perf
+ // regression. To avoid false dependency, we emit "xorps xmmReg, xmmReg" before
+ // cvtsi2ss/sd instruction.
+
+ genConsumeOperands(treeNode->AsOp());
+ getEmitter()->emitIns_R_R(INS_xorps, EA_4BYTE, treeNode->gtRegNum, treeNode->gtRegNum);
+
+ // Note that here we need to specify srcType that will determine
+ // the size of source reg/mem operand and rex.w prefix.
+ instruction ins = ins_FloatConv(dstType, TYP_INT);
+ getEmitter()->emitInsBinary(ins, emitTypeSize(srcType), treeNode, op1);
+
+ // Handle the case of srcType = TYP_ULONG. SSE2 conversion instruction
+ // will interpret ULONG value as LONG. Hence we need to adjust the
+ // result if sign-bit of srcType is set.
+ if (srcType == TYP_ULONG)
+ {
+ // The instruction sequence below is less accurate than what clang
+ // and gcc generate. However, we keep the current sequence for backward compatiblity.
+ // If we change the instructions below, FloatingPointUtils::convertUInt64ToDobule
+ // should be also updated for consistent conversion result.
+ assert(dstType == TYP_DOUBLE);
+ assert(!op1->isContained());
+
+ // Set the flags without modifying op1.
+ // test op1Reg, op1Reg
+ inst_RV_RV(INS_test, op1->gtRegNum, op1->gtRegNum, srcType);
+
+ // No need to adjust result if op1 >= 0 i.e. positive
+ // Jge label
+ BasicBlock* label = genCreateTempLabel();
+ inst_JMP(EJ_jge, label);
+
+ // Adjust the result
+ // result = result + 0x43f00000 00000000
+ // addsd resultReg, 0x43f00000 00000000
+ GenTreePtr* cns = &u8ToDblBitmask;
+ if (*cns == nullptr)
+ {
+ double d;
+ static_assert_no_msg(sizeof(double) == sizeof(__int64));
+ *((__int64*)&d) = 0x43f0000000000000LL;
+
+ *cns = genMakeConst(&d, dstType, treeNode, true);
+ }
+ inst_RV_TT(INS_addsd, treeNode->gtRegNum, *cns);
+
+ genDefineTempLabel(label);
+ }
+
+ genProduceReg(treeNode);
+}
+
+//------------------------------------------------------------------------
+// genFloatToIntCast: Generate code to cast float/double to int/long
+//
+// Arguments:
+// treeNode - The GT_CAST node
+//
+// Return Value:
+// None.
+//
+// Assumptions:
+// Cast is a non-overflow conversion.
+// The treeNode must have an assigned register.
+// SrcType=float/double and DstType= int32/uint32/int64/uint64
+//
+// TODO-XArch-CQ: (Low-pri) - generate in-line code when DstType = uint64
+//
+void CodeGen::genFloatToIntCast(GenTreePtr treeNode)
+{
+ // we don't expect to see overflow detecting float/double --> int type conversions here
+ // as they should have been converted into helper calls by front-end.
+ assert(treeNode->OperGet() == GT_CAST);
+ assert(!treeNode->gtOverflow());
+
+ regNumber targetReg = treeNode->gtRegNum;
+ assert(genIsValidIntReg(targetReg));
+
+ GenTreePtr op1 = treeNode->gtOp.gtOp1;
+#ifdef DEBUG
+ if (!op1->isContained())
+ {
+ assert(genIsValidFloatReg(op1->gtRegNum));
+ }
+#endif
+
+ var_types dstType = treeNode->CastToType();
+ var_types srcType = op1->TypeGet();
+ assert(varTypeIsFloating(srcType) && !varTypeIsFloating(dstType));
+
+ // We should never be seeing dstType whose size is neither sizeof(TYP_INT) nor sizeof(TYP_LONG).
+ // For conversions to byte/sbyte/int16/uint16 from float/double, we would expect the
+ // front-end or lowering phase to have generated two levels of cast. The first one is
+ // for float or double to int32/uint32 and the second one for narrowing int32/uint32 to
+ // the required smaller int type.
+ emitAttr dstSize = EA_ATTR(genTypeSize(dstType));
+ noway_assert((dstSize == EA_ATTR(genTypeSize(TYP_INT))) || (dstSize == EA_ATTR(genTypeSize(TYP_LONG))));
+
+ // We shouldn't be seeing uint64 here as it should have been converted
+ // into a helper call by either front-end or lowering phase.
+ noway_assert(!varTypeIsUnsigned(dstType) || (dstSize != EA_ATTR(genTypeSize(TYP_LONG))));
+
+ // If the dstType is TYP_UINT, we have 32-bits to encode the
+ // float number. Any of 33rd or above bits can be the sign bit.
+ // To acheive it we pretend as if we are converting it to a long.
+ if (varTypeIsUnsigned(dstType) && (dstSize == EA_ATTR(genTypeSize(TYP_INT))))
+ {
+ dstType = TYP_LONG;
+ }
+
+ // Note that we need to specify dstType here so that it will determine
+ // the size of destination integer register and also the rex.w prefix.
+ genConsumeOperands(treeNode->AsOp());
+ instruction ins = ins_FloatConv(TYP_INT, srcType);
+ getEmitter()->emitInsBinary(ins, emitTypeSize(dstType), treeNode, op1);
+ genProduceReg(treeNode);
+}
+
+//------------------------------------------------------------------------
+// genCkfinite: Generate code for ckfinite opcode.
+//
+// Arguments:
+// treeNode - The GT_CKFINITE node
+//
+// Return Value:
+// None.
+//
+// Assumptions:
+// GT_CKFINITE node has reserved an internal register.
+//
+// TODO-XArch-CQ - mark the operand as contained if known to be in
+// memory (e.g. field or an array element).
+//
+void CodeGen::genCkfinite(GenTreePtr treeNode)
+{
+ assert(treeNode->OperGet() == GT_CKFINITE);
+
+ GenTreePtr op1 = treeNode->gtOp.gtOp1;
+ var_types targetType = treeNode->TypeGet();
+ int expMask = (targetType == TYP_FLOAT) ? 0x7F800000 : 0x7FF00000; // Bit mask to extract exponent.
+ regNumber targetReg = treeNode->gtRegNum;
+
+ // Extract exponent into a register.
+ assert(treeNode->gtRsvdRegs != RBM_NONE);
+ assert(genCountBits(treeNode->gtRsvdRegs) == 1);
+ regNumber tmpReg = genRegNumFromMask(treeNode->gtRsvdRegs);
+
+ genConsumeReg(op1);
+
+#ifdef _TARGET_64BIT_
+
+ // Copy the floating-point value to an integer register. If we copied a float to a long, then
+ // right-shift the value so the high 32 bits of the floating-point value sit in the low 32
+ // bits of the integer register.
+ instruction ins = ins_CopyFloatToInt(targetType, (targetType == TYP_FLOAT) ? TYP_INT : TYP_LONG);
+ inst_RV_RV(ins, op1->gtRegNum, tmpReg, targetType);
+ if (targetType == TYP_DOUBLE)
+ {
+ // right shift by 32 bits to get to exponent.
+ inst_RV_SH(INS_shr, EA_8BYTE, tmpReg, 32);
+ }
+
+ // Mask exponent with all 1's and check if the exponent is all 1's
+ inst_RV_IV(INS_and, tmpReg, expMask, EA_4BYTE);
+ inst_RV_IV(INS_cmp, tmpReg, expMask, EA_4BYTE);
+
+ // If exponent is all 1's, throw ArithmeticException
+ genJumpToThrowHlpBlk(EJ_je, SCK_ARITH_EXCPN);
+
+ // if it is a finite value copy it to targetReg
+ if (targetReg != op1->gtRegNum)
+ {
+ inst_RV_RV(ins_Copy(targetType), targetReg, op1->gtRegNum, targetType);
+ }
+
+#else // !_TARGET_64BIT_
+
+ // If the target type is TYP_DOUBLE, we want to extract the high 32 bits into the register.
+ // There is no easy way to do this. To not require an extra register, we'll use shuffles
+ // to move the high 32 bits into the low 32 bits, then then shuffle it back, since we
+ // need to produce the value into the target register.
+ //
+ // For TYP_DOUBLE, we'll generate (for targetReg != op1->gtRegNum):
+ // movaps targetReg, op1->gtRegNum
+ // shufps targetReg, targetReg, 0xB1 // WZYX => ZWXY
+ // mov_xmm2i tmpReg, targetReg // tmpReg <= Y
+ // and tmpReg, <mask>
+ // cmp tmpReg, <mask>
+ // je <throw block>
+ // movaps targetReg, op1->gtRegNum // copy the value again, instead of un-shuffling it
+ //
+ // For TYP_DOUBLE with (targetReg == op1->gtRegNum):
+ // shufps targetReg, targetReg, 0xB1 // WZYX => ZWXY
+ // mov_xmm2i tmpReg, targetReg // tmpReg <= Y
+ // and tmpReg, <mask>
+ // cmp tmpReg, <mask>
+ // je <throw block>
+ // shufps targetReg, targetReg, 0xB1 // ZWXY => WZYX
+ //
+ // For TYP_FLOAT, it's the same as _TARGET_64BIT_:
+ // mov_xmm2i tmpReg, targetReg // tmpReg <= low 32 bits
+ // and tmpReg, <mask>
+ // cmp tmpReg, <mask>
+ // je <throw block>
+ // movaps targetReg, op1->gtRegNum // only if targetReg != op1->gtRegNum
+
+ regNumber copyToTmpSrcReg; // The register we'll copy to the integer temp.
+
+ if (targetType == TYP_DOUBLE)
+ {
+ if (targetReg != op1->gtRegNum)
+ {
+ inst_RV_RV(ins_Copy(targetType), targetReg, op1->gtRegNum, targetType);
+ }
+ inst_RV_RV_IV(INS_shufps, EA_16BYTE, targetReg, targetReg, 0xb1);
+ copyToTmpSrcReg = targetReg;
+ }
+ else
+ {
+ copyToTmpSrcReg = op1->gtRegNum;
+ }
+
+ // Copy only the low 32 bits. This will be the high order 32 bits of the floating-point
+ // value, no matter the floating-point type.
+ inst_RV_RV(ins_CopyFloatToInt(TYP_FLOAT, TYP_INT), copyToTmpSrcReg, tmpReg, TYP_FLOAT);
+
+ // Mask exponent with all 1's and check if the exponent is all 1's
+ inst_RV_IV(INS_and, tmpReg, expMask, EA_4BYTE);
+ inst_RV_IV(INS_cmp, tmpReg, expMask, EA_4BYTE);
+
+ // If exponent is all 1's, throw ArithmeticException
+ genJumpToThrowHlpBlk(EJ_je, SCK_ARITH_EXCPN);
+
+ if (targetReg != op1->gtRegNum)
+ {
+ // In both the TYP_FLOAT and TYP_DOUBLE case, the op1 register is untouched,
+ // so copy it to the targetReg. This is faster and smaller for TYP_DOUBLE
+ // than re-shuffling the targetReg.
+ inst_RV_RV(ins_Copy(targetType), targetReg, op1->gtRegNum, targetType);
+ }
+ else if (targetType == TYP_DOUBLE)
+ {
+ // We need to re-shuffle the targetReg to get the correct result.
+ inst_RV_RV_IV(INS_shufps, EA_16BYTE, targetReg, targetReg, 0xb1);
+ }
+
+#endif // !_TARGET_64BIT_
+
+ genProduceReg(treeNode);
+}
+
+#ifdef _TARGET_AMD64_
+int CodeGenInterface::genSPtoFPdelta()
+{
+ int delta;
+
+#ifdef PLATFORM_UNIX
+
+ // We require frame chaining on Unix to support native tool unwinding (such as
+ // unwinding by the native debugger). We have a CLR-only extension to the
+ // unwind codes (UWOP_SET_FPREG_LARGE) to support SP->FP offsets larger than 240.
+ // If Unix ever supports EnC, the RSP == RBP assumption will have to be reevaluated.
+ delta = genTotalFrameSize();
+
+#else // !PLATFORM_UNIX
+
+ // As per Amd64 ABI, RBP offset from initial RSP can be between 0 and 240 if
+ // RBP needs to be reported in unwind codes. This case would arise for methods
+ // with localloc.
+ if (compiler->compLocallocUsed)
+ {
+ // We cannot base delta computation on compLclFrameSize since it changes from
+ // tentative to final frame layout and hence there is a possibility of
+ // under-estimating offset of vars from FP, which in turn results in under-
+ // estimating instruction size.
+ //
+ // To be predictive and so as never to under-estimate offset of vars from FP
+ // we will always position FP at min(240, outgoing arg area size).
+ delta = Min(240, (int)compiler->lvaOutgoingArgSpaceSize);
+ }
+ else if (compiler->opts.compDbgEnC)
+ {
+ // vm assumption on EnC methods is that rsp and rbp are equal
+ delta = 0;
+ }
+ else
+ {
+ delta = genTotalFrameSize();
+ }
+
+#endif // !PLATFORM_UNIX
+
+ return delta;
+}
+
+//---------------------------------------------------------------------
+// genTotalFrameSize - return the total size of the stack frame, including local size,
+// callee-saved register size, etc. For AMD64, this does not include the caller-pushed
+// return address.
+//
+// Return value:
+// Total frame size
+//
+
+int CodeGenInterface::genTotalFrameSize()
+{
+ assert(!IsUninitialized(compiler->compCalleeRegsPushed));
+
+ int totalFrameSize = compiler->compCalleeRegsPushed * REGSIZE_BYTES + compiler->compLclFrameSize;
+
+ assert(totalFrameSize >= 0);
+ return totalFrameSize;
+}
+
+//---------------------------------------------------------------------
+// genCallerSPtoFPdelta - return the offset from Caller-SP to the frame pointer.
+// This number is going to be negative, since the Caller-SP is at a higher
+// address than the frame pointer.
+//
+// There must be a frame pointer to call this function!
+//
+// We can't compute this directly from the Caller-SP, since the frame pointer
+// is based on a maximum delta from Initial-SP, so first we find SP, then
+// compute the FP offset.
+
+int CodeGenInterface::genCallerSPtoFPdelta()
+{
+ assert(isFramePointerUsed());
+ int callerSPtoFPdelta;
+
+ callerSPtoFPdelta = genCallerSPtoInitialSPdelta() + genSPtoFPdelta();
+
+ assert(callerSPtoFPdelta <= 0);
+ return callerSPtoFPdelta;
+}
+
+//---------------------------------------------------------------------
+// genCallerSPtoInitialSPdelta - return the offset from Caller-SP to Initial SP.
+//
+// This number will be negative.
+
+int CodeGenInterface::genCallerSPtoInitialSPdelta()
+{
+ int callerSPtoSPdelta = 0;
+
+ callerSPtoSPdelta -= genTotalFrameSize();
+ callerSPtoSPdelta -= REGSIZE_BYTES; // caller-pushed return address
+
+ // compCalleeRegsPushed does not account for the frame pointer
+ // TODO-Cleanup: shouldn't this be part of genTotalFrameSize?
+ if (isFramePointerUsed())
+ {
+ callerSPtoSPdelta -= REGSIZE_BYTES;
+ }
+
+ assert(callerSPtoSPdelta <= 0);
+ return callerSPtoSPdelta;
+}
+#endif // _TARGET_AMD64_
+
+//-----------------------------------------------------------------------------------------
+// genSSE2BitwiseOp - generate SSE2 code for the given oper as "Operand BitWiseOp BitMask"
+//
+// Arguments:
+// treeNode - tree node
+//
+// Return value:
+// None
+//
+// Assumptions:
+// i) tree oper is one of GT_NEG or GT_INTRINSIC Abs()
+// ii) tree type is floating point type.
+// iii) caller of this routine needs to call genProduceReg()
+void CodeGen::genSSE2BitwiseOp(GenTreePtr treeNode)
+{
+ regNumber targetReg = treeNode->gtRegNum;
+ var_types targetType = treeNode->TypeGet();
+ assert(varTypeIsFloating(targetType));
+
+ float f;
+ double d;
+ GenTreePtr* bitMask = nullptr;
+ instruction ins = INS_invalid;
+ void* cnsAddr = nullptr;
+ bool dblAlign = false;
+
+ switch (treeNode->OperGet())
+ {
+ case GT_NEG:
+ // Neg(x) = flip the sign bit.
+ // Neg(f) = f ^ 0x80000000
+ // Neg(d) = d ^ 0x8000000000000000
+ ins = genGetInsForOper(GT_XOR, targetType);
+ if (targetType == TYP_FLOAT)
+ {
+ bitMask = &negBitmaskFlt;
+
+ static_assert_no_msg(sizeof(float) == sizeof(int));
+ *((int*)&f) = 0x80000000;
+ cnsAddr = &f;
+ }
+ else
+ {
+ bitMask = &negBitmaskDbl;
+
+ static_assert_no_msg(sizeof(double) == sizeof(__int64));
+ *((__int64*)&d) = 0x8000000000000000LL;
+ cnsAddr = &d;
+ dblAlign = true;
+ }
+ break;
+
+ case GT_INTRINSIC:
+ assert(treeNode->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Abs);
+
+ // Abs(x) = set sign-bit to zero
+ // Abs(f) = f & 0x7fffffff
+ // Abs(d) = d & 0x7fffffffffffffff
+ ins = genGetInsForOper(GT_AND, targetType);
+ if (targetType == TYP_FLOAT)
+ {
+ bitMask = &absBitmaskFlt;
+
+ static_assert_no_msg(sizeof(float) == sizeof(int));
+ *((int*)&f) = 0x7fffffff;
+ cnsAddr = &f;
+ }
+ else
+ {
+ bitMask = &absBitmaskDbl;
+
+ static_assert_no_msg(sizeof(double) == sizeof(__int64));
+ *((__int64*)&d) = 0x7fffffffffffffffLL;
+ cnsAddr = &d;
+ dblAlign = true;
+ }
+ break;
+
+ default:
+ assert(!"genSSE2: unsupported oper");
+ unreached();
+ break;
+ }
+
+ if (*bitMask == nullptr)
+ {
+ assert(cnsAddr != nullptr);
+ *bitMask = genMakeConst(cnsAddr, targetType, treeNode, dblAlign);
+ }
+
+ // We need an additional register for bitmask.
+ // Make sure we have one allocated.
+ assert(treeNode->gtRsvdRegs != RBM_NONE);
+ assert(genCountBits(treeNode->gtRsvdRegs) == 1);
+ regNumber tmpReg = genRegNumFromMask(treeNode->gtRsvdRegs);
+
+ // Move operand into targetReg only if the reg reserved for
+ // internal purpose is not the same as targetReg.
+ GenTreePtr op1 = treeNode->gtOp.gtOp1;
+ assert(!op1->isContained());
+ regNumber operandReg = genConsumeReg(op1);
+ if (tmpReg != targetReg)
+ {
+ if (operandReg != targetReg)
+ {
+ inst_RV_RV(ins_Copy(targetType), targetReg, operandReg, targetType);
+ }
+
+ operandReg = tmpReg;
+ }
+
+ inst_RV_TT(ins_Load(targetType, false), tmpReg, *bitMask);
+ assert(ins != INS_invalid);
+ inst_RV_RV(ins, targetReg, operandReg, targetType);
+}
+
+//---------------------------------------------------------------------
+// genIntrinsic - generate code for a given intrinsic
+//
+// Arguments
+// treeNode - the GT_INTRINSIC node
+//
+// Return value:
+// None
+//
+void CodeGen::genIntrinsic(GenTreePtr treeNode)
+{
+ // Right now only Sqrt/Abs are treated as math intrinsics.
+ switch (treeNode->gtIntrinsic.gtIntrinsicId)
+ {
+ case CORINFO_INTRINSIC_Sqrt:
+ noway_assert(treeNode->TypeGet() == TYP_DOUBLE);
+ genConsumeOperands(treeNode->AsOp());
+ getEmitter()->emitInsBinary(ins_FloatSqrt(treeNode->TypeGet()), emitTypeSize(treeNode), treeNode,
+ treeNode->gtOp.gtOp1);
+ break;
+
+ case CORINFO_INTRINSIC_Abs:
+ genSSE2BitwiseOp(treeNode);
+ break;
+
+ default:
+ assert(!"genIntrinsic: Unsupported intrinsic");
+ unreached();
+ }
+
+ genProduceReg(treeNode);
+}
+
+//-------------------------------------------------------------------------- //
+// getBaseVarForPutArgStk - returns the baseVarNum for passing a stack arg.
+//
+// Arguments
+// treeNode - the GT_PUTARG_STK node
+//
+// Return value:
+// The number of the base variable.
+//
+// Note:
+// If tail call the outgoing args are placed in the caller's incoming arg stack space.
+// Otherwise, they go in the outgoing arg area on the current frame.
+//
+// On Windows the caller always creates slots (homing space) in its frame for the
+// first 4 arguments of a callee (register passed args). So, the baseVarNum is always 0.
+// For System V systems there is no such calling convention requirement, and the code needs to find
+// the first stack passed argument from the caller. This is done by iterating over
+// all the lvParam variables and finding the first with lvArgReg equals to REG_STK.
+//
+unsigned CodeGen::getBaseVarForPutArgStk(GenTreePtr treeNode)
+{
+ assert(treeNode->OperGet() == GT_PUTARG_STK);
+
+ unsigned baseVarNum;
+
+#if FEATURE_FASTTAILCALL
+ bool putInIncomingArgArea = treeNode->AsPutArgStk()->putInIncomingArgArea;
+#else
+ const bool putInIncomingArgArea = false;
+#endif
+
+ // Whether to setup stk arg in incoming or out-going arg area?
+ // Fast tail calls implemented as epilog+jmp = stk arg is setup in incoming arg area.
+ // All other calls - stk arg is setup in out-going arg area.
+ if (putInIncomingArgArea)
+ {
+ // See the note in the function header re: finding the first stack passed argument.
+ baseVarNum = getFirstArgWithStackSlot();
+ assert(baseVarNum != BAD_VAR_NUM);
+
+#ifdef DEBUG
+ // This must be a fast tail call.
+ assert(treeNode->AsPutArgStk()->gtCall->AsCall()->IsFastTailCall());
+
+ // Since it is a fast tail call, the existence of first incoming arg is guaranteed
+ // because fast tail call requires that in-coming arg area of caller is >= out-going
+ // arg area required for tail call.
+ LclVarDsc* varDsc = &(compiler->lvaTable[baseVarNum]);
+ assert(varDsc != nullptr);
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ assert(!varDsc->lvIsRegArg && varDsc->lvArgReg == REG_STK);
+#else // !FEATURE_UNIX_AMD64_STRUCT_PASSING
+ // On Windows this assert is always true. The first argument will always be in REG_ARG_0 or REG_FLTARG_0.
+ assert(varDsc->lvIsRegArg && (varDsc->lvArgReg == REG_ARG_0 || varDsc->lvArgReg == REG_FLTARG_0));
+#endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING
+#endif // !DEBUG
+ }
+ else
+ {
+#if FEATURE_FIXED_OUT_ARGS
+ baseVarNum = compiler->lvaOutgoingArgSpaceVar;
+#else // !FEATURE_FIXED_OUT_ARGS
+ NYI_X86("Stack args for x86/RyuJIT");
+ baseVarNum = BAD_VAR_NUM;
+#endif // !FEATURE_FIXED_OUT_ARGS
+ }
+
+ return baseVarNum;
+}
+
+//--------------------------------------------------------------------- //
+// genPutStructArgStk - generate code for passing an arg on the stack.
+//
+// Arguments
+// treeNode - the GT_PUTARG_STK node
+// targetType - the type of the treeNode
+//
+// Return value:
+// None
+//
+void CodeGen::genPutArgStk(GenTreePtr treeNode)
+{
+ var_types targetType = treeNode->TypeGet();
+#ifdef _TARGET_X86_
+ noway_assert(targetType != TYP_STRUCT);
+
+ // The following logic is applicable for x86 arch.
+ assert(!varTypeIsFloating(targetType) || (targetType == treeNode->gtGetOp1()->TypeGet()));
+
+ GenTreePtr data = treeNode->gtOp.gtOp1;
+
+ // On a 32-bit target, all of the long arguments have been decomposed into
+ // a separate putarg_stk for each of the upper and lower halves.
+ noway_assert(targetType != TYP_LONG);
+
+ int argSize = genTypeSize(genActualType(targetType));
+ genStackLevel += argSize;
+
+ // TODO-Cleanup: Handle this in emitInsMov() in emitXArch.cpp?
+ if (data->isContainedIntOrIImmed())
+ {
+ if (data->IsIconHandle())
+ {
+ inst_IV_handle(INS_push, data->gtIntCon.gtIconVal);
+ }
+ else
+ {
+ inst_IV(INS_push, data->gtIntCon.gtIconVal);
+ }
+ }
+ else if (data->isContained())
+ {
+ NYI_X86("Contained putarg_stk of non-constant");
+ }
+ else
+ {
+ genConsumeReg(data);
+ if (varTypeIsIntegralOrI(targetType))
+ {
+ inst_RV(INS_push, data->gtRegNum, targetType);
+ }
+ else
+ {
+ // Decrement SP.
+ inst_RV_IV(INS_sub, REG_SPBASE, argSize, emitActualTypeSize(TYP_I_IMPL));
+ getEmitter()->emitIns_AR_R(ins_Store(targetType), emitTypeSize(targetType), data->gtRegNum, REG_SPBASE, 0);
+ }
+ }
+#else // !_TARGET_X86_
+ {
+ unsigned baseVarNum = getBaseVarForPutArgStk(treeNode);
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+ if (varTypeIsStruct(targetType))
+ {
+ genPutStructArgStk(treeNode, baseVarNum);
+ return;
+ }
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+ noway_assert(targetType != TYP_STRUCT);
+ assert(!varTypeIsFloating(targetType) || (targetType == treeNode->gtGetOp1()->TypeGet()));
+
+ // Get argument offset on stack.
+ // Here we cross check that argument offset hasn't changed from lowering to codegen since
+ // we are storing arg slot number in GT_PUTARG_STK node in lowering phase.
+ int argOffset = treeNode->AsPutArgStk()->getArgOffset();
+
+#ifdef DEBUG
+ fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(treeNode->AsPutArgStk()->gtCall, treeNode);
+ assert(curArgTabEntry);
+ assert(argOffset == (int)curArgTabEntry->slotNum * TARGET_POINTER_SIZE);
+#endif
+
+ GenTreePtr data = treeNode->gtGetOp1();
+
+ if (data->isContained())
+ {
+ getEmitter()->emitIns_S_I(ins_Store(targetType), emitTypeSize(targetType), baseVarNum, argOffset,
+ (int)data->AsIntConCommon()->IconValue());
+ }
+ else
+ {
+ genConsumeReg(data);
+ getEmitter()->emitIns_S_R(ins_Store(targetType), emitTypeSize(targetType), data->gtRegNum, baseVarNum,
+ argOffset);
+ }
+ }
+#endif // !_TARGET_X86_
+}
+
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+
+//---------------------------------------------------------------------
+// genPutStructArgStk - generate code for copying a struct arg on the stack by value.
+// In case there are references to heap object in the struct,
+// it generates the gcinfo as well.
+//
+// Arguments
+// treeNode - the GT_PUTARG_STK node
+// baseVarNum - the variable number relative to which to put the argument on the stack.
+// For tail calls this is the baseVarNum = 0.
+// For non tail calls this is the outgoingArgSpace.
+//
+// Return value:
+// None
+//
+void CodeGen::genPutStructArgStk(GenTreePtr treeNode, unsigned baseVarNum)
+{
+ assert(treeNode->OperGet() == GT_PUTARG_STK);
+ assert(baseVarNum != BAD_VAR_NUM);
+
+ var_types targetType = treeNode->TypeGet();
+
+ if (varTypeIsSIMD(targetType))
+ {
+ regNumber srcReg = genConsumeReg(treeNode->gtGetOp1());
+ assert((srcReg != REG_NA) && (genIsValidFloatReg(srcReg)));
+ getEmitter()->emitIns_S_R(ins_Store(targetType), emitTypeSize(targetType), srcReg, baseVarNum,
+ treeNode->AsPutArgStk()->getArgOffset());
+ return;
+ }
+
+ assert(targetType == TYP_STRUCT);
+
+ GenTreePutArgStk* putArgStk = treeNode->AsPutArgStk();
+ if (putArgStk->gtNumberReferenceSlots == 0)
+ {
+ switch (putArgStk->gtPutArgStkKind)
+ {
+ case GenTreePutArgStk::PutArgStkKindRepInstr:
+ genStructPutArgRepMovs(putArgStk, baseVarNum);
+ break;
+ case GenTreePutArgStk::PutArgStkKindUnroll:
+ genStructPutArgUnroll(putArgStk, baseVarNum);
+ break;
+ default:
+ unreached();
+ }
+ }
+ else
+ {
+ // No need to disable GC the way COPYOBJ does. Here the refs are copied in atomic operations always.
+
+ // Consume these registers.
+ // They may now contain gc pointers (depending on their type; gcMarkRegPtrVal will "do the right thing").
+ genConsumePutStructArgStk(putArgStk, REG_RDI, REG_RSI, REG_NA, baseVarNum);
+ GenTreePtr dstAddr = putArgStk;
+ GenTreePtr src = putArgStk->gtOp.gtOp1;
+ assert(src->OperGet() == GT_OBJ);
+ GenTreePtr srcAddr = src->gtGetOp1();
+
+ unsigned slots = putArgStk->gtNumSlots;
+
+ // We are always on the stack we don't need to use the write barrier.
+ BYTE* gcPtrs = putArgStk->gtGcPtrs;
+ unsigned gcPtrCount = putArgStk->gtNumberReferenceSlots;
+
+ unsigned i = 0;
+ unsigned copiedSlots = 0;
+ while (i < slots)
+ {
+ switch (gcPtrs[i])
+ {
+ case TYPE_GC_NONE:
+ // Let's see if we can use rep movsq instead of a sequence of movsq instructions
+ // to save cycles and code size.
+ {
+ unsigned nonGcSlotCount = 0;
+
+ do
+ {
+ nonGcSlotCount++;
+ i++;
+ } while (i < slots && gcPtrs[i] == TYPE_GC_NONE);
+
+ // If we have a very small contiguous non-gc region, it's better just to
+ // emit a sequence of movsq instructions
+ if (nonGcSlotCount < CPOBJ_NONGC_SLOTS_LIMIT)
+ {
+ copiedSlots += nonGcSlotCount;
+ while (nonGcSlotCount > 0)
+ {
+ instGen(INS_movsq);
+ nonGcSlotCount--;
+ }
+ }
+ else
+ {
+ getEmitter()->emitIns_R_I(INS_mov, EA_4BYTE, REG_RCX, nonGcSlotCount);
+ copiedSlots += nonGcSlotCount;
+ instGen(INS_r_movsq);
+ }
+ }
+ break;
+
+ case TYPE_GC_REF: // Is an object ref
+ case TYPE_GC_BYREF: // Is an interior pointer - promote it but don't scan it
+ {
+ // We have a GC (byref or ref) pointer
+ // TODO-Amd64-Unix: Here a better solution (for code size and CQ) would be to use movsq instruction,
+ // but the logic for emitting a GC info record is not available (it is internal for the emitter
+ // only.) See emitGCVarLiveUpd function. If we could call it separately, we could do
+ // instGen(INS_movsq); and emission of gc info.
+
+ var_types memType;
+ if (gcPtrs[i] == TYPE_GC_REF)
+ {
+ memType = TYP_REF;
+ }
+ else
+ {
+ assert(gcPtrs[i] == TYPE_GC_BYREF);
+ memType = TYP_BYREF;
+ }
+
+ getEmitter()->emitIns_R_AR(ins_Load(memType), emitTypeSize(memType), REG_RCX, REG_RSI, 0);
+ getEmitter()->emitIns_S_R(ins_Store(memType), emitTypeSize(memType), REG_RCX, baseVarNum,
+ ((copiedSlots + putArgStk->gtSlotNum) * TARGET_POINTER_SIZE));
+
+ // Source for the copy operation.
+ // If a LocalAddr, use EA_PTRSIZE - copy from stack.
+ // If not a LocalAddr, use EA_BYREF - the source location is not on the stack.
+ getEmitter()->emitIns_R_I(INS_add, ((src->OperIsLocalAddr()) ? EA_PTRSIZE : EA_BYREF), REG_RSI,
+ TARGET_POINTER_SIZE);
+
+ // Always copying to the stack - outgoing arg area
+ // (or the outgoing arg area of the caller for a tail call) - use EA_PTRSIZE.
+ getEmitter()->emitIns_R_I(INS_add, EA_PTRSIZE, REG_RDI, TARGET_POINTER_SIZE);
+ copiedSlots++;
+ gcPtrCount--;
+ i++;
+ }
+ break;
+
+ default:
+ unreached();
+ break;
+ }
+ }
+
+ assert(gcPtrCount == 0);
+ }
+}
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+
+/*****************************************************************************
+ *
+ * Create and record GC Info for the function.
+ */
+#ifdef _TARGET_AMD64_
+void
+#else // !_TARGET_AMD64_
+void*
+#endif // !_TARGET_AMD64_
+CodeGen::genCreateAndStoreGCInfo(unsigned codeSize, unsigned prologSize, unsigned epilogSize DEBUGARG(void* codePtr))
+{
+#ifdef JIT32_GCENCODER
+ return genCreateAndStoreGCInfoJIT32(codeSize, prologSize, epilogSize DEBUGARG(codePtr));
+#else // !JIT32_GCENCODER
+ genCreateAndStoreGCInfoX64(codeSize, prologSize DEBUGARG(codePtr));
+#endif // !JIT32_GCENCODER
+}
+
+#ifdef JIT32_GCENCODER
+void* CodeGen::genCreateAndStoreGCInfoJIT32(unsigned codeSize,
+ unsigned prologSize,
+ unsigned epilogSize DEBUGARG(void* codePtr))
+{
+ BYTE headerBuf[64];
+ InfoHdr header;
+
+ int s_cached;
+#ifdef DEBUG
+ size_t headerSize =
+#endif
+ compiler->compInfoBlkSize =
+ gcInfo.gcInfoBlockHdrSave(headerBuf, 0, codeSize, prologSize, epilogSize, &header, &s_cached);
+
+ size_t argTabOffset = 0;
+ size_t ptrMapSize = gcInfo.gcPtrTableSize(header, codeSize, &argTabOffset);
+
+#if DISPLAY_SIZES
+
+ if (genInterruptible)
+ {
+ gcHeaderISize += compiler->compInfoBlkSize;
+ gcPtrMapISize += ptrMapSize;
+ }
+ else
+ {
+ gcHeaderNSize += compiler->compInfoBlkSize;
+ gcPtrMapNSize += ptrMapSize;
+ }
+
+#endif // DISPLAY_SIZES
+
+ compiler->compInfoBlkSize += ptrMapSize;
+
+ /* Allocate the info block for the method */
+
+ compiler->compInfoBlkAddr = (BYTE*)compiler->info.compCompHnd->allocGCInfo(compiler->compInfoBlkSize);
+
+#if 0 // VERBOSE_SIZES
+ // TODO-X86-Cleanup: 'dataSize', below, is not defined
+
+// if (compiler->compInfoBlkSize > codeSize && compiler->compInfoBlkSize > 100)
+ {
+ printf("[%7u VM, %7u+%7u/%7u x86 %03u/%03u%%] %s.%s\n",
+ compiler->info.compILCodeSize,
+ compiler->compInfoBlkSize,
+ codeSize + dataSize,
+ codeSize + dataSize - prologSize - epilogSize,
+ 100 * (codeSize + dataSize) / compiler->info.compILCodeSize,
+ 100 * (codeSize + dataSize + compiler->compInfoBlkSize) / compiler->info.compILCodeSize,
+ compiler->info.compClassName,
+ compiler->info.compMethodName);
+}
+
+#endif
+
+ /* Fill in the info block and return it to the caller */
+
+ void* infoPtr = compiler->compInfoBlkAddr;
+
+ /* Create the method info block: header followed by GC tracking tables */
+
+ compiler->compInfoBlkAddr +=
+ gcInfo.gcInfoBlockHdrSave(compiler->compInfoBlkAddr, -1, codeSize, prologSize, epilogSize, &header, &s_cached);
+
+ assert(compiler->compInfoBlkAddr == (BYTE*)infoPtr + headerSize);
+ compiler->compInfoBlkAddr = gcInfo.gcPtrTableSave(compiler->compInfoBlkAddr, header, codeSize, &argTabOffset);
+ assert(compiler->compInfoBlkAddr == (BYTE*)infoPtr + headerSize + ptrMapSize);
+
+#ifdef DEBUG
+
+ if (0)
+ {
+ BYTE* temp = (BYTE*)infoPtr;
+ unsigned size = compiler->compInfoBlkAddr - temp;
+ BYTE* ptab = temp + headerSize;
+
+ noway_assert(size == headerSize + ptrMapSize);
+
+ printf("Method info block - header [%u bytes]:", headerSize);
+
+ for (unsigned i = 0; i < size; i++)
+ {
+ if (temp == ptab)
+ {
+ printf("\nMethod info block - ptrtab [%u bytes]:", ptrMapSize);
+ printf("\n %04X: %*c", i & ~0xF, 3 * (i & 0xF), ' ');
+ }
+ else
+ {
+ if (!(i % 16))
+ printf("\n %04X: ", i);
+ }
+
+ printf("%02X ", *temp++);
+ }
+
+ printf("\n");
+ }
+
+#endif // DEBUG
+
+#if DUMP_GC_TABLES
+
+ if (compiler->opts.dspGCtbls)
+ {
+ const BYTE* base = (BYTE*)infoPtr;
+ unsigned size;
+ unsigned methodSize;
+ InfoHdr dumpHeader;
+
+ printf("GC Info for method %s\n", compiler->info.compFullName);
+ printf("GC info size = %3u\n", compiler->compInfoBlkSize);
+
+ size = gcInfo.gcInfoBlockHdrDump(base, &dumpHeader, &methodSize);
+ // printf("size of header encoding is %3u\n", size);
+ printf("\n");
+
+ if (compiler->opts.dspGCtbls)
+ {
+ base += size;
+ size = gcInfo.gcDumpPtrTable(base, dumpHeader, methodSize);
+ // printf("size of pointer table is %3u\n", size);
+ printf("\n");
+ noway_assert(compiler->compInfoBlkAddr == (base + size));
+ }
+ }
+
+#ifdef DEBUG
+ if (jitOpts.testMask & 128)
+ {
+ for (unsigned offs = 0; offs < codeSize; offs++)
+ {
+ gcInfo.gcFindPtrsInFrame(infoPtr, codePtr, offs);
+ }
+ }
+#endif // DEBUG
+#endif // DUMP_GC_TABLES
+
+ /* Make sure we ended up generating the expected number of bytes */
+
+ noway_assert(compiler->compInfoBlkAddr == (BYTE*)infoPtr + compiler->compInfoBlkSize);
+
+ return infoPtr;
+}
+
+#else // !JIT32_GCENCODER
+void CodeGen::genCreateAndStoreGCInfoX64(unsigned codeSize, unsigned prologSize DEBUGARG(void* codePtr))
+{
+ IAllocator* allowZeroAlloc = new (compiler, CMK_GC) AllowZeroAllocator(compiler->getAllocatorGC());
+ GcInfoEncoder* gcInfoEncoder = new (compiler, CMK_GC)
+ GcInfoEncoder(compiler->info.compCompHnd, compiler->info.compMethodInfo, allowZeroAlloc, NOMEM);
+ assert(gcInfoEncoder);
+
+ // Follow the code pattern of the x86 gc info encoder (genCreateAndStoreGCInfoJIT32).
+ gcInfo.gcInfoBlockHdrSave(gcInfoEncoder, codeSize, prologSize);
+
+ // First we figure out the encoder ID's for the stack slots and registers.
+ gcInfo.gcMakeRegPtrTable(gcInfoEncoder, codeSize, prologSize, GCInfo::MAKE_REG_PTR_MODE_ASSIGN_SLOTS);
+ // Now we've requested all the slots we'll need; "finalize" these (make more compact data structures for them).
+ gcInfoEncoder->FinalizeSlotIds();
+ // Now we can actually use those slot ID's to declare live ranges.
+ gcInfo.gcMakeRegPtrTable(gcInfoEncoder, codeSize, prologSize, GCInfo::MAKE_REG_PTR_MODE_DO_WORK);
+
+#if defined(DEBUGGING_SUPPORT)
+ if (compiler->opts.compDbgEnC)
+ {
+ // what we have to preserve is called the "frame header" (see comments in VM\eetwain.cpp)
+ // which is:
+ // -return address
+ // -saved off RBP
+ // -saved 'this' pointer and bool for synchronized methods
+
+ // 4 slots for RBP + return address + RSI + RDI
+ int preservedAreaSize = 4 * REGSIZE_BYTES;
+
+ if (compiler->info.compFlags & CORINFO_FLG_SYNCH)
+ {
+ if (!(compiler->info.compFlags & CORINFO_FLG_STATIC))
+ {
+ preservedAreaSize += REGSIZE_BYTES;
+ }
+
+ // bool in synchronized methods that tracks whether the lock has been taken (takes 4 bytes on stack)
+ preservedAreaSize += 4;
+ }
+
+ // Used to signal both that the method is compiled for EnC, and also the size of the block at the top of the
+ // frame
+ gcInfoEncoder->SetSizeOfEditAndContinuePreservedArea(preservedAreaSize);
+ }
+#endif
+
+ gcInfoEncoder->Build();
+
+ // GC Encoder automatically puts the GC info in the right spot using ICorJitInfo::allocGCInfo(size_t)
+ // let's save the values anyway for debugging purposes
+ compiler->compInfoBlkAddr = gcInfoEncoder->Emit();
+ compiler->compInfoBlkSize = 0; // not exposed by the GCEncoder interface
+}
+#endif // !JIT32_GCENCODER
+
+/*****************************************************************************
+ * Emit a call to a helper function.
+ *
+ */
+
+void CodeGen::genEmitHelperCall(unsigned helper, int argSize, emitAttr retSize, regNumber callTargetReg)
+{
+ void* addr = nullptr;
+ void* pAddr = nullptr;
+
+ emitter::EmitCallType callType = emitter::EC_FUNC_TOKEN;
+ addr = compiler->compGetHelperFtn((CorInfoHelpFunc)helper, &pAddr);
+ regNumber callTarget = REG_NA;
+ regMaskTP killMask = compiler->compHelperCallKillSet((CorInfoHelpFunc)helper);
+
+ if (!addr)
+ {
+ assert(pAddr != nullptr);
+
+ // Absolute indirect call addr
+ // Note: Order of checks is important. First always check for pc-relative and next
+ // zero-relative. Because the former encoding is 1-byte smaller than the latter.
+ if (genCodeIndirAddrCanBeEncodedAsPCRelOffset((size_t)pAddr) ||
+ genCodeIndirAddrCanBeEncodedAsZeroRelOffset((size_t)pAddr))
+ {
+ // generate call whose target is specified by 32-bit offset relative to PC or zero.
+ callType = emitter::EC_FUNC_TOKEN_INDIR;
+ addr = pAddr;
+ }
+ else
+ {
+#ifdef _TARGET_AMD64_
+ // If this indirect address cannot be encoded as 32-bit offset relative to PC or Zero,
+ // load it into REG_HELPER_CALL_TARGET and use register indirect addressing mode to
+ // make the call.
+ // mov reg, addr
+ // call [reg]
+
+ if (callTargetReg == REG_NA)
+ {
+ // If a callTargetReg has not been explicitly provided, we will use REG_DEFAULT_HELPER_CALL_TARGET, but
+ // this is only a valid assumption if the helper call is known to kill REG_DEFAULT_HELPER_CALL_TARGET.
+ callTargetReg = REG_DEFAULT_HELPER_CALL_TARGET;
+ regMaskTP callTargetMask = genRegMask(callTargetReg);
+ noway_assert((callTargetMask & killMask) == callTargetMask);
+ }
+ else
+ {
+ // The call target must not overwrite any live variable, though it may not be in the
+ // kill set for the call.
+ regMaskTP callTargetMask = genRegMask(callTargetReg);
+ noway_assert((callTargetMask & regSet.rsMaskVars) == RBM_NONE);
+ }
+#endif
+
+ callTarget = callTargetReg;
+ CodeGen::genSetRegToIcon(callTarget, (ssize_t)pAddr, TYP_I_IMPL);
+ callType = emitter::EC_INDIR_ARD;
+ }
+ }
+
+ getEmitter()->emitIns_Call(callType, compiler->eeFindHelper(helper), INDEBUG_LDISASM_COMMA(nullptr) addr, argSize,
+ retSize FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(EA_UNKNOWN), gcInfo.gcVarPtrSetCur,
+ gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur,
+ BAD_IL_OFFSET, // IL offset
+ callTarget, // ireg
+ REG_NA, 0, 0, // xreg, xmul, disp
+ false, // isJump
+ emitter::emitNoGChelper(helper));
+
+ regTracker.rsTrashRegSet(killMask);
+ regTracker.rsTrashRegsForGCInterruptability();
+}
+
+#if !defined(_TARGET_64BIT_)
+//-----------------------------------------------------------------------------
+//
+// Code Generation for Long integers
+//
+//-----------------------------------------------------------------------------
+
+//------------------------------------------------------------------------
+// genStoreLongLclVar: Generate code to store a non-enregistered long lclVar
+//
+// Arguments:
+// treeNode - A TYP_LONG lclVar node.
+//
+// Return Value:
+// None.
+//
+// Assumptions:
+// 'treeNode' must be a TYP_LONG lclVar node for a lclVar that has NOT been promoted.
+// Its operand must be a GT_LONG node.
+//
+void CodeGen::genStoreLongLclVar(GenTree* treeNode)
+{
+ emitter* emit = getEmitter();
+
+ GenTreeLclVarCommon* lclNode = treeNode->AsLclVarCommon();
+ unsigned lclNum = lclNode->gtLclNum;
+ LclVarDsc* varDsc = &(compiler->lvaTable[lclNum]);
+ assert(varDsc->TypeGet() == TYP_LONG);
+ assert(!varDsc->lvPromoted);
+ GenTreePtr op1 = treeNode->gtOp.gtOp1;
+ noway_assert(op1->OperGet() == GT_LONG);
+ genConsumeRegs(op1);
+
+ // Definitions of register candidates will have been lowered to 2 int lclVars.
+ assert(!treeNode->InReg());
+
+ GenTreePtr loVal = op1->gtGetOp1();
+ GenTreePtr hiVal = op1->gtGetOp2();
+ // NYI: Contained immediates.
+ NYI_IF((loVal->gtRegNum == REG_NA) || (hiVal->gtRegNum == REG_NA), "Store of long lclVar with contained immediate");
+ emit->emitIns_R_S(ins_Store(TYP_INT), EA_4BYTE, loVal->gtRegNum, lclNum, 0);
+ emit->emitIns_R_S(ins_Store(TYP_INT), EA_4BYTE, hiVal->gtRegNum, lclNum, genTypeSize(TYP_INT));
+}
+#endif // !defined(_TARGET_64BIT_)
+
+/*****************************************************************************
+* Unit testing of the XArch emitter: generate a bunch of instructions into the prolog
+* (it's as good a place as any), then use COMPlus_JitLateDisasm=* to see if the late
+* disassembler thinks the instructions as the same as we do.
+*/
+
+// Uncomment "#define ALL_ARM64_EMITTER_UNIT_TESTS" to run all the unit tests here.
+// After adding a unit test, and verifying it works, put it under this #ifdef, so we don't see it run every time.
+//#define ALL_XARCH_EMITTER_UNIT_TESTS
+
+#if defined(DEBUG) && defined(LATE_DISASM) && defined(_TARGET_AMD64_)
+void CodeGen::genAmd64EmitterUnitTests()
+{
+ if (!verbose)
+ {
+ return;
+ }
+
+ if (!compiler->opts.altJit)
+ {
+ // No point doing this in a "real" JIT.
+ return;
+ }
+
+ // Mark the "fake" instructions in the output.
+ printf("*************** In genAmd64EmitterUnitTests()\n");
+
+ // We use this:
+ // genDefineTempLabel(genCreateTempLabel());
+ // to create artificial labels to help separate groups of tests.
+
+ //
+ // Loads
+ //
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef ALL_XARCH_EMITTER_UNIT_TESTS
+#ifdef FEATURE_AVX_SUPPORT
+ genDefineTempLabel(genCreateTempLabel());
+
+ // vhaddpd ymm0,ymm1,ymm2
+ getEmitter()->emitIns_R_R_R(INS_haddpd, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+ // vaddss xmm0,xmm1,xmm2
+ getEmitter()->emitIns_R_R_R(INS_addss, EA_4BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+ // vaddsd xmm0,xmm1,xmm2
+ getEmitter()->emitIns_R_R_R(INS_addsd, EA_8BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+ // vaddps xmm0,xmm1,xmm2
+ getEmitter()->emitIns_R_R_R(INS_addps, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+ // vaddps ymm0,ymm1,ymm2
+ getEmitter()->emitIns_R_R_R(INS_addps, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+ // vaddpd xmm0,xmm1,xmm2
+ getEmitter()->emitIns_R_R_R(INS_addpd, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+ // vaddpd ymm0,ymm1,ymm2
+ getEmitter()->emitIns_R_R_R(INS_addpd, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+ // vsubss xmm0,xmm1,xmm2
+ getEmitter()->emitIns_R_R_R(INS_subss, EA_4BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+ // vsubsd xmm0,xmm1,xmm2
+ getEmitter()->emitIns_R_R_R(INS_subsd, EA_8BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+ // vsubps ymm0,ymm1,ymm2
+ getEmitter()->emitIns_R_R_R(INS_subps, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+ // vsubps ymm0,ymm1,ymm2
+ getEmitter()->emitIns_R_R_R(INS_subps, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+ // vsubpd xmm0,xmm1,xmm2
+ getEmitter()->emitIns_R_R_R(INS_subpd, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+ // vsubpd ymm0,ymm1,ymm2
+ getEmitter()->emitIns_R_R_R(INS_subpd, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+ // vmulss xmm0,xmm1,xmm2
+ getEmitter()->emitIns_R_R_R(INS_mulss, EA_4BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+ // vmulsd xmm0,xmm1,xmm2
+ getEmitter()->emitIns_R_R_R(INS_mulsd, EA_8BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+ // vmulps xmm0,xmm1,xmm2
+ getEmitter()->emitIns_R_R_R(INS_mulps, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+ // vmulpd xmm0,xmm1,xmm2
+ getEmitter()->emitIns_R_R_R(INS_mulpd, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+ // vmulps ymm0,ymm1,ymm2
+ getEmitter()->emitIns_R_R_R(INS_mulps, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+ // vmulpd ymm0,ymm1,ymm2
+ getEmitter()->emitIns_R_R_R(INS_mulpd, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+ // vandps xmm0,xmm1,xmm2
+ getEmitter()->emitIns_R_R_R(INS_andps, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+ // vandpd xmm0,xmm1,xmm2
+ getEmitter()->emitIns_R_R_R(INS_andpd, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+ // vandps ymm0,ymm1,ymm2
+ getEmitter()->emitIns_R_R_R(INS_andps, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+ // vandpd ymm0,ymm1,ymm2
+ getEmitter()->emitIns_R_R_R(INS_andpd, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+ // vorps xmm0,xmm1,xmm2
+ getEmitter()->emitIns_R_R_R(INS_orps, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+ // vorpd xmm0,xmm1,xmm2
+ getEmitter()->emitIns_R_R_R(INS_orpd, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+ // vorps ymm0,ymm1,ymm2
+ getEmitter()->emitIns_R_R_R(INS_orps, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+ // vorpd ymm0,ymm1,ymm2
+ getEmitter()->emitIns_R_R_R(INS_orpd, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+ // vdivss xmm0,xmm1,xmm2
+ getEmitter()->emitIns_R_R_R(INS_divss, EA_4BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+ // vdivsd xmm0,xmm1,xmm2
+ getEmitter()->emitIns_R_R_R(INS_divsd, EA_8BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+ // vdivss xmm0,xmm1,xmm2
+ getEmitter()->emitIns_R_R_R(INS_divss, EA_4BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+ // vdivsd xmm0,xmm1,xmm2
+ getEmitter()->emitIns_R_R_R(INS_divsd, EA_8BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+
+ // vdivss xmm0,xmm1,xmm2
+ getEmitter()->emitIns_R_R_R(INS_cvtss2sd, EA_4BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+ // vdivsd xmm0,xmm1,xmm2
+ getEmitter()->emitIns_R_R_R(INS_cvtsd2ss, EA_8BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+#endif // FEATURE_AVX_SUPPORT
+#endif // ALL_XARCH_EMITTER_UNIT_TESTS
+ printf("*************** End of genAmd64EmitterUnitTests()\n");
+}
+
+#endif // defined(DEBUG) && defined(LATE_DISASM) && defined(_TARGET_AMD64_)
+
+/*****************************************************************************/
+#ifdef DEBUGGING_SUPPORT
+/*****************************************************************************
+ * genSetScopeInfo
+ *
+ * Called for every scope info piece to record by the main genSetScopeInfo()
+ */
+
+void CodeGen::genSetScopeInfo(unsigned which,
+ UNATIVE_OFFSET startOffs,
+ UNATIVE_OFFSET length,
+ unsigned varNum,
+ unsigned LVnum,
+ bool avail,
+ Compiler::siVarLoc& varLoc)
+{
+ /* We need to do some mapping while reporting back these variables */
+
+ unsigned ilVarNum = compiler->compMap2ILvarNum(varNum);
+ noway_assert((int)ilVarNum != ICorDebugInfo::UNKNOWN_ILNUM);
+
+ VarName name = nullptr;
+
+#ifdef DEBUG
+
+ for (unsigned scopeNum = 0; scopeNum < compiler->info.compVarScopesCount; scopeNum++)
+ {
+ if (LVnum == compiler->info.compVarScopes[scopeNum].vsdLVnum)
+ {
+ name = compiler->info.compVarScopes[scopeNum].vsdName;
+ }
+ }
+
+ // Hang on to this compiler->info.
+
+ TrnslLocalVarInfo& tlvi = genTrnslLocalVarInfo[which];
+
+ tlvi.tlviVarNum = ilVarNum;
+ tlvi.tlviLVnum = LVnum;
+ tlvi.tlviName = name;
+ tlvi.tlviStartPC = startOffs;
+ tlvi.tlviLength = length;
+ tlvi.tlviAvailable = avail;
+ tlvi.tlviVarLoc = varLoc;
+
+#endif // DEBUG
+
+ compiler->eeSetLVinfo(which, startOffs, length, ilVarNum, LVnum, name, avail, varLoc);
+}
+#endif // DEBUGGING_SUPPORT
+
+#endif // _TARGET_AMD64_
+
+#endif // !LEGACY_BACKEND
diff --git a/src/jit/compiler.cpp b/src/jit/compiler.cpp
new file mode 100644
index 0000000000..afbecdfc60
--- /dev/null
+++ b/src/jit/compiler.cpp
@@ -0,0 +1,10380 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX Compiler XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif // _MSC_VER
+#include "hostallocator.h"
+#include "emit.h"
+#include "ssabuilder.h"
+#include "valuenum.h"
+#include "rangecheck.h"
+
+#ifndef LEGACY_BACKEND
+#include "lower.h"
+#endif // !LEGACY_BACKEND
+
+#include "jittelemetry.h"
+
+#if defined(DEBUG)
+// Column settings for COMPlus_JitDumpIR. We could(should) make these programmable.
+#define COLUMN_OPCODE 30
+#define COLUMN_OPERANDS (COLUMN_OPCODE + 25)
+#define COLUMN_KINDS 110
+#define COLUMN_FLAGS (COLUMN_KINDS + 32)
+#endif
+
+#if defined(DEBUG)
+unsigned Compiler::jitTotalMethodCompiled = 0;
+#endif // defined(DEBUG)
+
+#if defined(DEBUG)
+LONG Compiler::jitNestingLevel = 0;
+#endif // defined(DEBUG)
+
+#ifdef ALT_JIT
+// static
+bool Compiler::s_pAltJitExcludeAssembliesListInitialized = false;
+AssemblyNamesList2* Compiler::s_pAltJitExcludeAssembliesList = nullptr;
+#endif // ALT_JIT
+
+/*****************************************************************************/
+inline unsigned getCurTime()
+{
+ SYSTEMTIME tim;
+
+ GetSystemTime(&tim);
+
+ return (((tim.wHour * 60) + tim.wMinute) * 60 + tim.wSecond) * 1000 + tim.wMilliseconds;
+}
+
+/*****************************************************************************/
+#ifdef DEBUG
+/*****************************************************************************/
+
+static FILE* jitSrcFilePtr;
+
+static unsigned jitCurSrcLine;
+
+void Compiler::JitLogEE(unsigned level, const char* fmt, ...)
+{
+ va_list args;
+
+ if (verbose)
+ {
+ va_start(args, fmt);
+ vflogf(jitstdout, fmt, args);
+ va_end(args);
+ }
+
+ va_start(args, fmt);
+ vlogf(level, fmt, args);
+ va_end(args);
+}
+
+void Compiler::compDspSrcLinesByLineNum(unsigned line, bool seek)
+{
+ if (!jitSrcFilePtr)
+ {
+ return;
+ }
+
+ if (jitCurSrcLine == line)
+ {
+ return;
+ }
+
+ if (jitCurSrcLine > line)
+ {
+ if (!seek)
+ {
+ return;
+ }
+
+ if (fseek(jitSrcFilePtr, 0, SEEK_SET) != 0)
+ {
+ printf("Compiler::compDspSrcLinesByLineNum: fseek returned an error.\n");
+ }
+ jitCurSrcLine = 0;
+ }
+
+ if (!seek)
+ {
+ printf(";\n");
+ }
+
+ do
+ {
+ char temp[128];
+ size_t llen;
+
+ if (!fgets(temp, sizeof(temp), jitSrcFilePtr))
+ {
+ return;
+ }
+
+ if (seek)
+ {
+ continue;
+ }
+
+ llen = strlen(temp);
+ if (llen && temp[llen - 1] == '\n')
+ {
+ temp[llen - 1] = 0;
+ }
+
+ printf("; %s\n", temp);
+ } while (++jitCurSrcLine < line);
+
+ if (!seek)
+ {
+ printf(";\n");
+ }
+}
+
+/*****************************************************************************/
+
+void Compiler::compDspSrcLinesByNativeIP(UNATIVE_OFFSET curIP)
+{
+#ifdef DEBUGGING_SUPPORT
+
+ static IPmappingDsc* nextMappingDsc;
+ static unsigned lastLine;
+
+ if (!opts.dspLines)
+ {
+ return;
+ }
+
+ if (curIP == 0)
+ {
+ if (genIPmappingList)
+ {
+ nextMappingDsc = genIPmappingList;
+ lastLine = jitGetILoffs(nextMappingDsc->ipmdILoffsx);
+
+ unsigned firstLine = jitGetILoffs(nextMappingDsc->ipmdILoffsx);
+
+ unsigned earlierLine = (firstLine < 5) ? 0 : firstLine - 5;
+
+ compDspSrcLinesByLineNum(earlierLine, true); // display previous 5 lines
+ compDspSrcLinesByLineNum(firstLine, false);
+ }
+ else
+ {
+ nextMappingDsc = nullptr;
+ }
+
+ return;
+ }
+
+ if (nextMappingDsc)
+ {
+ UNATIVE_OFFSET offset = nextMappingDsc->ipmdNativeLoc.CodeOffset(genEmitter);
+
+ if (offset <= curIP)
+ {
+ IL_OFFSET nextOffs = jitGetILoffs(nextMappingDsc->ipmdILoffsx);
+
+ if (lastLine < nextOffs)
+ {
+ compDspSrcLinesByLineNum(nextOffs);
+ }
+ else
+ {
+ // This offset corresponds to a previous line. Rewind to that line
+
+ compDspSrcLinesByLineNum(nextOffs - 2, true);
+ compDspSrcLinesByLineNum(nextOffs);
+ }
+
+ lastLine = nextOffs;
+ nextMappingDsc = nextMappingDsc->ipmdNext;
+ }
+ }
+
+#endif
+}
+
+/*****************************************************************************/
+#endif // DEBUG
+
+/*****************************************************************************/
+#if defined(DEBUG) || MEASURE_NODE_SIZE || MEASURE_BLOCK_SIZE || DISPLAY_SIZES || CALL_ARG_STATS
+
+static unsigned genMethodCnt; // total number of methods JIT'ted
+unsigned genMethodICnt; // number of interruptible methods
+unsigned genMethodNCnt; // number of non-interruptible methods
+static unsigned genSmallMethodsNeedingExtraMemoryCnt = 0;
+
+#endif
+
+/*****************************************************************************/
+#if MEASURE_NODE_SIZE
+NodeSizeStats genNodeSizeStats;
+NodeSizeStats genNodeSizeStatsPerFunc;
+
+unsigned genTreeNcntHistBuckets[] = {10, 20, 30, 40, 50, 100, 200, 300, 400, 500, 1000, 5000, 10000, 0};
+Histogram genTreeNcntHist(HostAllocator::getHostAllocator(), genTreeNcntHistBuckets);
+
+unsigned genTreeNsizHistBuckets[] = {1000, 5000, 10000, 50000, 100000, 500000, 1000000, 0};
+Histogram genTreeNsizHist(HostAllocator::getHostAllocator(), genTreeNsizHistBuckets);
+#endif // MEASURE_NODE_SIZE
+
+/*****************************************************************************
+ *
+ * Variables to keep track of total code amounts.
+ */
+
+#if DISPLAY_SIZES
+
+size_t grossVMsize; // Total IL code size
+size_t grossNCsize; // Native code + data size
+size_t totalNCsize; // Native code + data + GC info size (TODO-Cleanup: GC info size only accurate for JIT32_GCENCODER)
+size_t gcHeaderISize; // GC header size: interruptible methods
+size_t gcPtrMapISize; // GC pointer map size: interruptible methods
+size_t gcHeaderNSize; // GC header size: non-interruptible methods
+size_t gcPtrMapNSize; // GC pointer map size: non-interruptible methods
+
+#endif // DISPLAY_SIZES
+
+/*****************************************************************************
+ *
+ * Variables to keep track of argument counts.
+ */
+
+#if CALL_ARG_STATS
+
+unsigned argTotalCalls;
+unsigned argHelperCalls;
+unsigned argStaticCalls;
+unsigned argNonVirtualCalls;
+unsigned argVirtualCalls;
+
+unsigned argTotalArgs; // total number of args for all calls (including objectPtr)
+unsigned argTotalDWordArgs;
+unsigned argTotalLongArgs;
+unsigned argTotalFloatArgs;
+unsigned argTotalDoubleArgs;
+
+unsigned argTotalRegArgs;
+unsigned argTotalTemps;
+unsigned argTotalLclVar;
+unsigned argTotalDeferred;
+unsigned argTotalConst;
+
+unsigned argTotalObjPtr;
+unsigned argTotalGTF_ASGinArgs;
+
+unsigned argMaxTempsPerMethod;
+
+unsigned argCntBuckets[] = {0, 1, 2, 3, 4, 5, 6, 10, 0};
+Histogram argCntTable(HostAllocator::getHostAllocator(), argCntBuckets);
+
+unsigned argDWordCntBuckets[] = {0, 1, 2, 3, 4, 5, 6, 10, 0};
+Histogram argDWordCntTable(HostAllocator::getHostAllocator(), argDWordCntBuckets);
+
+unsigned argDWordLngCntBuckets[] = {0, 1, 2, 3, 4, 5, 6, 10, 0};
+Histogram argDWordLngCntTable(HostAllocator::getHostAllocator(), argDWordLngCntBuckets);
+
+unsigned argTempsCntBuckets[] = {0, 1, 2, 3, 4, 5, 6, 10, 0};
+Histogram argTempsCntTable(HostAllocator::getHostAllocator(), argTempsCntBuckets);
+
+#endif // CALL_ARG_STATS
+
+/*****************************************************************************
+ *
+ * Variables to keep track of basic block counts.
+ */
+
+#if COUNT_BASIC_BLOCKS
+
+// --------------------------------------------------
+// Basic block count frequency table:
+// --------------------------------------------------
+// <= 1 ===> 26872 count ( 56% of total)
+// 2 .. 2 ===> 669 count ( 58% of total)
+// 3 .. 3 ===> 4687 count ( 68% of total)
+// 4 .. 5 ===> 5101 count ( 78% of total)
+// 6 .. 10 ===> 5575 count ( 90% of total)
+// 11 .. 20 ===> 3028 count ( 97% of total)
+// 21 .. 50 ===> 1108 count ( 99% of total)
+// 51 .. 100 ===> 182 count ( 99% of total)
+// 101 .. 1000 ===> 34 count (100% of total)
+// 1001 .. 10000 ===> 0 count (100% of total)
+// --------------------------------------------------
+
+unsigned bbCntBuckets[] = {1, 2, 3, 5, 10, 20, 50, 100, 1000, 10000, 0};
+Histogram bbCntTable(HostAllocator::getHostAllocator(), bbCntBuckets);
+
+/* Histogram for the IL opcode size of methods with a single basic block */
+
+unsigned bbSizeBuckets[] = {1, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 0};
+Histogram bbOneBBSizeTable(HostAllocator::getHostAllocator(), bbSizeBuckets);
+
+#endif // COUNT_BASIC_BLOCKS
+
+/*****************************************************************************
+ *
+ * Used by optFindNaturalLoops to gather statistical information such as
+ * - total number of natural loops
+ * - number of loops with 1, 2, ... exit conditions
+ * - number of loops that have an iterator (for like)
+ * - number of loops that have a constant iterator
+ */
+
+#if COUNT_LOOPS
+
+unsigned totalLoopMethods; // counts the total number of methods that have natural loops
+unsigned maxLoopsPerMethod; // counts the maximum number of loops a method has
+unsigned totalLoopOverflows; // # of methods that identified more loops than we can represent
+unsigned totalLoopCount; // counts the total number of natural loops
+unsigned totalUnnatLoopCount; // counts the total number of (not-necessarily natural) loops
+unsigned totalUnnatLoopOverflows; // # of methods that identified more unnatural loops than we can represent
+unsigned iterLoopCount; // counts the # of loops with an iterator (for like)
+unsigned simpleTestLoopCount; // counts the # of loops with an iterator and a simple loop condition (iter < const)
+unsigned constIterLoopCount; // counts the # of loops with a constant iterator (for like)
+bool hasMethodLoops; // flag to keep track if we already counted a method as having loops
+unsigned loopsThisMethod; // counts the number of loops in the current method
+bool loopOverflowThisMethod; // True if we exceeded the max # of loops in the method.
+
+/* Histogram for number of loops in a method */
+
+unsigned loopCountBuckets[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 0};
+Histogram loopCountTable(HostAllocator::getHostAllocator(), loopCountBuckets);
+
+/* Histogram for number of loop exits */
+
+unsigned loopExitCountBuckets[] = {0, 1, 2, 3, 4, 5, 6, 0};
+Histogram loopExitCountTable(HostAllocator::getHostAllocator(), loopExitCountBuckets);
+
+#endif // COUNT_LOOPS
+
+//------------------------------------------------------------------------
+// getJitGCType: Given the VM's CorInfoGCType convert it to the JIT's var_types
+//
+// Arguments:
+// gcType - an enum value that originally came from an element
+// of the BYTE[] returned from getClassGClayout()
+//
+// Return Value:
+// The corresponsing enum value from the JIT's var_types
+//
+// Notes:
+// The gcLayout of each field of a struct is returned from getClassGClayout()
+// as a BYTE[] but each BYTE element is actually a CorInfoGCType value
+// Note when we 'know' that there is only one element in theis array
+// the JIT will often pass the address of a single BYTE, instead of a BYTE[]
+//
+
+var_types Compiler::getJitGCType(BYTE gcType)
+{
+ var_types result = TYP_UNKNOWN;
+ CorInfoGCType corInfoType = (CorInfoGCType)gcType;
+
+ if (corInfoType == TYPE_GC_NONE)
+ {
+ result = TYP_I_IMPL;
+ }
+ else if (corInfoType == TYPE_GC_REF)
+ {
+ result = TYP_REF;
+ }
+ else if (corInfoType == TYPE_GC_BYREF)
+ {
+ result = TYP_BYREF;
+ }
+ else
+ {
+ noway_assert(!"Bad value of 'gcType'");
+ }
+ return result;
+}
+
+#if FEATURE_MULTIREG_ARGS
+//---------------------------------------------------------------------------
+// getStructGcPtrsFromOp: Given a GenTree node of TYP_STRUCT that represents
+// a pass by value argument, return the gcPtr layout
+// for the pointers sized fields
+// Arguments:
+// op - the operand of TYP_STRUCT that is passed by value
+// gcPtrsOut - an array of BYTES that are written by this method
+// they will contain the VM's CorInfoGCType values
+// for each pointer sized field
+// Return Value:
+// Two [or more] values are written into the gcPtrs array
+//
+// Note that for ARM64 there will alwys be exactly two pointer sized fields
+
+void Compiler::getStructGcPtrsFromOp(GenTreePtr op, BYTE* gcPtrsOut)
+{
+ assert(op->TypeGet() == TYP_STRUCT);
+
+#ifdef _TARGET_ARM64_
+ if (op->OperGet() == GT_OBJ)
+ {
+ CORINFO_CLASS_HANDLE objClass = op->gtObj.gtClass;
+
+ int structSize = info.compCompHnd->getClassSize(objClass);
+ assert(structSize <= 2 * TARGET_POINTER_SIZE);
+
+ BYTE gcPtrsTmp[2] = {TYPE_GC_NONE, TYPE_GC_NONE};
+
+ info.compCompHnd->getClassGClayout(objClass, &gcPtrsTmp[0]);
+
+ gcPtrsOut[0] = gcPtrsTmp[0];
+ gcPtrsOut[1] = gcPtrsTmp[1];
+ }
+ else if (op->OperGet() == GT_LCL_VAR)
+ {
+ GenTreeLclVarCommon* varNode = op->AsLclVarCommon();
+ unsigned varNum = varNode->gtLclNum;
+ assert(varNum < lvaCount);
+ LclVarDsc* varDsc = &lvaTable[varNum];
+
+ // At this point any TYP_STRUCT LclVar must be a 16-byte pass by value argument
+ assert(varDsc->lvSize() == 2 * TARGET_POINTER_SIZE);
+
+ gcPtrsOut[0] = varDsc->lvGcLayout[0];
+ gcPtrsOut[1] = varDsc->lvGcLayout[1];
+ }
+ else
+#endif
+ {
+ noway_assert(!"Unsupported Oper for getStructGcPtrsFromOp");
+ }
+}
+#endif // FEATURE_MULTIREG_ARGS
+
+#ifdef ARM_SOFTFP
+//---------------------------------------------------------------------------
+// IsSingleFloat32Struct:
+// Check if the given struct type contains only one float32 value type
+//
+// Arguments:
+// clsHnd - the handle for the struct type
+//
+// Return Value:
+// true if the given struct type contains only one float32 value type,
+// false otherwise.
+//
+
+bool Compiler::isSingleFloat32Struct(CORINFO_CLASS_HANDLE clsHnd)
+{
+ for (;;)
+ {
+ // all of class chain must be of value type and must have only one field
+ if (!info.compCompHnd->isValueClass(clsHnd) && info.compCompHnd->getClassNumInstanceFields(clsHnd) != 1)
+ {
+ return false;
+ }
+
+ CORINFO_CLASS_HANDLE* pClsHnd = &clsHnd;
+ CORINFO_FIELD_HANDLE fldHnd = info.compCompHnd->getFieldInClass(clsHnd, 0);
+ CorInfoType fieldType = info.compCompHnd->getFieldType(fldHnd, pClsHnd);
+
+ switch (fieldType)
+ {
+ case CORINFO_TYPE_VALUECLASS:
+ clsHnd = *pClsHnd;
+ break;
+
+ case CORINFO_TYPE_FLOAT:
+ return true;
+
+ default:
+ return false;
+ }
+ }
+}
+#endif // ARM_SOFTFP
+
+//-----------------------------------------------------------------------------
+// getPrimitiveTypeForStruct:
+// Get the "primitive" type that is is used for a struct
+// of size 'structSize'.
+// We examine 'clsHnd' to check the GC layout of the struct and
+// return TYP_REF for structs that simply wrap an object.
+// If the struct is a one element HFA, we will return the
+// proper floating point type.
+//
+// Arguments:
+// structSize - the size of the struct type, cannot be zero
+// clsHnd - the handle for the struct type, used when may have
+// an HFA or if we need the GC layout for an object ref.
+//
+// Return Value:
+// The primitive type (i.e. byte, short, int, long, ref, float, double)
+// used to pass or return structs of this size.
+// If we shouldn't use a "primitive" type then TYP_UNKNOWN is returned.
+// Notes:
+// For 32-bit targets (X86/ARM32) the 64-bit TYP_LONG type is not
+// considered a primitive type by this method.
+// So a struct that wraps a 'long' is passed and returned in the
+// same way as any other 8-byte struct
+// For ARM32 if we have an HFA struct that wraps a 64-bit double
+// we will return TYP_DOUBLE.
+//
+var_types Compiler::getPrimitiveTypeForStruct(unsigned structSize, CORINFO_CLASS_HANDLE clsHnd)
+{
+ assert(structSize != 0);
+
+ var_types useType;
+
+ switch (structSize)
+ {
+ case 1:
+ useType = TYP_BYTE;
+ break;
+
+ case 2:
+ useType = TYP_SHORT;
+ break;
+
+#ifndef _TARGET_XARCH_
+ case 3:
+ useType = TYP_INT;
+ break;
+
+#endif // _TARGET_XARCH_
+
+#ifdef _TARGET_64BIT_
+ case 4:
+ if (IsHfa(clsHnd))
+ {
+ // A structSize of 4 with IsHfa, it must be an HFA of one float
+ useType = TYP_FLOAT;
+ }
+ else
+ {
+ useType = TYP_INT;
+ }
+ break;
+
+#ifndef _TARGET_XARCH_
+ case 5:
+ case 6:
+ case 7:
+ useType = TYP_I_IMPL;
+ break;
+
+#endif // _TARGET_XARCH_
+#endif // _TARGET_64BIT_
+
+ case TARGET_POINTER_SIZE:
+#ifdef ARM_SOFTFP
+ // For ARM_SOFTFP, HFA is unsupported so we need to check in another way
+ // This matters only for size-4 struct cause bigger structs would be processed with RetBuf
+ if (isSingleFloat32Struct(clsHnd))
+#else // !ARM_SOFTFP
+ if (IsHfa(clsHnd))
+#endif // ARM_SOFTFP
+ {
+#ifdef _TARGET_64BIT_
+ var_types hfaType = GetHfaType(clsHnd);
+
+ // A structSize of 8 with IsHfa, we have two possiblities:
+ // An HFA of one double or an HFA of two floats
+ //
+ // Check and exclude the case of an HFA of two floats
+ if (hfaType == TYP_DOUBLE)
+ {
+ // We have an HFA of one double
+ useType = TYP_DOUBLE;
+ }
+ else
+ {
+ assert(hfaType == TYP_FLOAT);
+
+ // We have an HFA of two floats
+ // This should be passed or returned in two FP registers
+ useType = TYP_UNKNOWN;
+ }
+#else // a 32BIT target
+ // A structSize of 4 with IsHfa, it must be an HFA of one float
+ useType = TYP_FLOAT;
+#endif // _TARGET_64BIT_
+ }
+ else
+ {
+ BYTE gcPtr = 0;
+ // Check if this pointer-sized struct is wrapping a GC object
+ info.compCompHnd->getClassGClayout(clsHnd, &gcPtr);
+ useType = getJitGCType(gcPtr);
+ }
+ break;
+
+#ifdef _TARGET_ARM_
+ case 8:
+ if (IsHfa(clsHnd))
+ {
+ var_types hfaType = GetHfaType(clsHnd);
+
+ // A structSize of 8 with IsHfa, we have two possiblities:
+ // An HFA of one double or an HFA of two floats
+ //
+ // Check and exclude the case of an HFA of two floats
+ if (hfaType == TYP_DOUBLE)
+ {
+ // We have an HFA of one double
+ useType = TYP_DOUBLE;
+ }
+ else
+ {
+ assert(hfaType == TYP_FLOAT);
+
+ // We have an HFA of two floats
+ // This should be passed or returned in two FP registers
+ useType = TYP_UNKNOWN;
+ }
+ }
+ else
+ {
+ // We don't have an HFA
+ useType = TYP_UNKNOWN;
+ }
+ break;
+#endif // _TARGET_ARM_
+
+ default:
+ useType = TYP_UNKNOWN;
+ break;
+ }
+
+ return useType;
+}
+
+//-----------------------------------------------------------------------------
+// getArgTypeForStruct:
+// Get the type that is used to pass values of the given struct type.
+// If you have already retrieved the struct size then it should be
+// passed as the optional third argument, as this allows us to avoid
+// an extra call to getClassSize(clsHnd)
+//
+// Arguments:
+// clsHnd - the handle for the struct type
+// wbPassStruct - An "out" argument with information about how
+// the struct is to be passed
+// structSize - the size of the struct type,
+// or zero if we should call getClassSize(clsHnd)
+//
+// Return Value:
+// For wbPassStruct you can pass a 'nullptr' and nothing will be written
+// or returned for that out parameter.
+// When *wbPassStruct is SPK_PrimitiveType this method's return value
+// is the primitive type used to pass the struct.
+// When *wbPassStruct is SPK_ByReference this method's return value
+// is always TYP_UNKNOWN and the struct type is passed by reference to a copy
+// When *wbPassStruct is SPK_ByValue or SPK_ByValueAsHfa this method's return value
+// is always TYP_STRUCT and the struct type is passed by value either
+// using multiple registers or on the stack.
+//
+// Assumptions:
+// The size must be the size of the given type.
+// The given class handle must be for a value type (struct).
+//
+// Notes:
+// About HFA types:
+// When the clsHnd is a one element HFA type we return the appropriate
+// floating point primitive type and *wbPassStruct is SPK_PrimitiveType
+// If there are two or more elements in the HFA type then the this method's
+// return value is TYP_STRUCT and *wbPassStruct is SPK_ByValueAsHfa
+//
+var_types Compiler::getArgTypeForStruct(CORINFO_CLASS_HANDLE clsHnd,
+ structPassingKind* wbPassStruct,
+ unsigned structSize /* = 0 */)
+{
+ var_types useType = TYP_UNKNOWN;
+ structPassingKind howToPassStruct = SPK_Unknown; // We must change this before we return
+
+ if (structSize == 0)
+ {
+ structSize = info.compCompHnd->getClassSize(clsHnd);
+ }
+ assert(structSize > 0);
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+ // An 8-byte struct may need to be passed in a floating point register
+ // So we always consult the struct "Classifier" routine
+ //
+ SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc;
+ eeGetSystemVAmd64PassStructInRegisterDescriptor(clsHnd, &structDesc);
+
+ // If we have one eightByteCount then we can set 'useType' based on that
+ if (structDesc.eightByteCount == 1)
+ {
+ // Set 'useType' to the type of the first eightbyte item
+ useType = GetEightByteType(structDesc, 0);
+ }
+
+#elif defined(_TARGET_X86_)
+
+ // On x86 we never pass structs as primitive types (unless the VM unwraps them for us)
+ useType = TYP_UNKNOWN;
+
+#else // all other targets
+
+ // The largest primitive type is 8 bytes (TYP_DOUBLE)
+ // so we can skip calling getPrimitiveTypeForStruct when we
+ // have a struct that is larger than that.
+ //
+ if (structSize <= sizeof(double))
+ {
+ // We set the "primitive" useType based upon the structSize
+ // and also examine the clsHnd to see if it is an HFA of count one
+ useType = getPrimitiveTypeForStruct(structSize, clsHnd);
+ }
+
+#endif // all other targets
+
+ // Did we change this struct type into a simple "primitive" type?
+ //
+ if (useType != TYP_UNKNOWN)
+ {
+ // Yes, we should use the "primitive" type in 'useType'
+ howToPassStruct = SPK_PrimitiveType;
+ }
+ else // We can't replace the struct with a "primitive" type
+ {
+ // See if we can pass this struct by value, possibly in multiple registers
+ // or if we should pass it by reference to a copy
+ //
+ if (structSize <= MAX_PASS_MULTIREG_BYTES)
+ {
+ // Structs that are HFA's are passed by value in multiple registers
+ if (IsHfa(clsHnd))
+ {
+ // HFA's of count one should have been handled by getPrimitiveTypeForStruct
+ assert(GetHfaCount(clsHnd) >= 2);
+
+ // setup wbPassType and useType indicate that this is passed by value as an HFA
+ // using multiple registers
+ // (when all of the parameters registers are used, then the stack will be used)
+ howToPassStruct = SPK_ByValueAsHfa;
+ useType = TYP_STRUCT;
+ }
+ else // Not an HFA struct type
+ {
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+ // The case of (structDesc.eightByteCount == 1) should have already been handled
+ if (structDesc.eightByteCount > 1)
+ {
+ // setup wbPassType and useType indicate that this is passed by value in multiple registers
+ // (when all of the parameters registers are used, then the stack will be used)
+ howToPassStruct = SPK_ByValue;
+ useType = TYP_STRUCT;
+ }
+ else
+ {
+ assert(structDesc.eightByteCount == 0);
+ // Otherwise we pass this struct by reference to a copy
+ // setup wbPassType and useType indicate that this is passed using one register
+ // (by reference to a copy)
+ howToPassStruct = SPK_ByReference;
+ useType = TYP_UNKNOWN;
+ }
+
+#elif defined(_TARGET_ARM64_)
+
+ // Structs that are pointer sized or smaller should have been handled by getPrimitiveTypeForStruct
+ assert(structSize > TARGET_POINTER_SIZE);
+
+ // On ARM64 structs that are 9-16 bytes are passed by value in multiple registers
+ //
+ if (structSize <= (TARGET_POINTER_SIZE * 2))
+ {
+ // setup wbPassType and useType indicate that this is passed by value in multiple registers
+ // (when all of the parameters registers are used, then the stack will be used)
+ howToPassStruct = SPK_ByValue;
+ useType = TYP_STRUCT;
+ }
+ else // a structSize that is 17-32 bytes in size
+ {
+ // Otherwise we pass this struct by reference to a copy
+ // setup wbPassType and useType indicate that this is passed using one register
+ // (by reference to a copy)
+ howToPassStruct = SPK_ByReference;
+ useType = TYP_UNKNOWN;
+ }
+
+#elif defined(_TARGET_X86_) || defined(_TARGET_ARM_)
+
+ // Otherwise we pass this struct by value on the stack
+ // setup wbPassType and useType indicate that this is passed by value according to the X86/ARM32 ABI
+ howToPassStruct = SPK_ByValue;
+ useType = TYP_STRUCT;
+
+#else // _TARGET_XXX_
+
+ noway_assert(!"Unhandled TARGET in getArgTypeForStruct (with FEATURE_MULTIREG_ARGS=1)");
+
+#endif // _TARGET_XXX_
+ }
+ }
+ else // (structSize > MAX_PASS_MULTIREG_BYTES)
+ {
+ // We have a (large) struct that can't be replaced with a "primitive" type
+ // and can't be passed in multiple registers
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if defined(_TARGET_X86_) || defined(_TARGET_ARM_)
+
+ // Otherwise we pass this struct by value on the stack
+ // setup wbPassType and useType indicate that this is passed by value according to the X86/ARM32 ABI
+ howToPassStruct = SPK_ByValue;
+ useType = TYP_STRUCT;
+
+#elif defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_)
+
+ // Otherwise we pass this struct by reference to a copy
+ // setup wbPassType and useType indicate that this is passed using one register (by reference to a copy)
+ howToPassStruct = SPK_ByReference;
+ useType = TYP_UNKNOWN;
+
+#else // _TARGET_XXX_
+
+ noway_assert(!"Unhandled TARGET in getArgTypeForStruct");
+
+#endif // _TARGET_XXX_
+ }
+ }
+
+ // 'howToPassStruct' must be set to one of the valid values before we return
+ assert(howToPassStruct != SPK_Unknown);
+ if (wbPassStruct != nullptr)
+ {
+ *wbPassStruct = howToPassStruct;
+ }
+ return useType;
+}
+
+//-----------------------------------------------------------------------------
+// getReturnTypeForStruct:
+// Get the type that is used to return values of the given struct type.
+// If you have already retrieved the struct size then it should be
+// passed as the optional third argument, as this allows us to avoid
+// an extra call to getClassSize(clsHnd)
+//
+// Arguments:
+// clsHnd - the handle for the struct type
+// wbReturnStruct - An "out" argument with information about how
+// the struct is to be returned
+// structSize - the size of the struct type,
+// or zero if we should call getClassSize(clsHnd)
+//
+// Return Value:
+// For wbReturnStruct you can pass a 'nullptr' and nothing will be written
+// or returned for that out parameter.
+// When *wbReturnStruct is SPK_PrimitiveType this method's return value
+// is the primitive type used to return the struct.
+// When *wbReturnStruct is SPK_ByReference this method's return value
+// is always TYP_UNKNOWN and the struct type is returned using a return buffer
+// When *wbReturnStruct is SPK_ByValue or SPK_ByValueAsHfa this method's return value
+// is always TYP_STRUCT and the struct type is returned using multiple registers.
+//
+// Assumptions:
+// The size must be the size of the given type.
+// The given class handle must be for a value type (struct).
+//
+// Notes:
+// About HFA types:
+// When the clsHnd is a one element HFA type then this method's return
+// value is the appropriate floating point primitive type and
+// *wbReturnStruct is SPK_PrimitiveType.
+// If there are two or more elements in the HFA type and the target supports
+// multireg return types then the return value is TYP_STRUCT and
+// *wbReturnStruct is SPK_ByValueAsHfa.
+// Additionally if there are two or more elements in the HFA type and
+// the target doesn't support multreg return types then it is treated
+// as if it wasn't an HFA type.
+// About returning TYP_STRUCT:
+// Whenever this method's return value is TYP_STRUCT it always means
+// that multiple registers are used to return this struct.
+//
+var_types Compiler::getReturnTypeForStruct(CORINFO_CLASS_HANDLE clsHnd,
+ structPassingKind* wbReturnStruct /* = nullptr */,
+ unsigned structSize /* = 0 */)
+{
+ var_types useType = TYP_UNKNOWN;
+ structPassingKind howToReturnStruct = SPK_Unknown; // We must change this before we return
+
+ assert(clsHnd != NO_CLASS_HANDLE);
+
+ if (structSize == 0)
+ {
+ structSize = info.compCompHnd->getClassSize(clsHnd);
+ }
+ assert(structSize > 0);
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+ // An 8-byte struct may need to be returned in a floating point register
+ // So we always consult the struct "Classifier" routine
+ //
+ SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc;
+ eeGetSystemVAmd64PassStructInRegisterDescriptor(clsHnd, &structDesc);
+
+ // If we have one eightByteCount then we can set 'useType' based on that
+ if (structDesc.eightByteCount == 1)
+ {
+ // Set 'useType' to the type of the first eightbyte item
+ useType = GetEightByteType(structDesc, 0);
+ assert(structDesc.passedInRegisters == true);
+ }
+
+#else // not UNIX_AMD64
+
+ // The largest primitive type is 8 bytes (TYP_DOUBLE)
+ // so we can skip calling getPrimitiveTypeForStruct when we
+ // have a struct that is larger than that.
+ //
+ if (structSize <= sizeof(double))
+ {
+ // We set the "primitive" useType based upon the structSize
+ // and also examine the clsHnd to see if it is an HFA of count one
+ useType = getPrimitiveTypeForStruct(structSize, clsHnd);
+ }
+
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+#ifdef _TARGET_64BIT_
+ // Note this handles an odd case when FEATURE_MULTIREG_RET is disabled and HFAs are enabled
+ //
+ // getPrimitiveTypeForStruct will return TYP_UNKNOWN for a struct that is an HFA of two floats
+ // because when HFA are enabled, normally we would use two FP registers to pass or return it
+ //
+ // But if we don't have support for multiple register return types, we have to change this.
+ // Since we what we have an 8-byte struct (float + float) we change useType to TYP_I_IMPL
+ // so that the struct is returned instead using an 8-byte integer register.
+ //
+ if ((FEATURE_MULTIREG_RET == 0) && (useType == TYP_UNKNOWN) && (structSize == (2 * sizeof(float))) && IsHfa(clsHnd))
+ {
+ useType = TYP_I_IMPL;
+ }
+#endif
+
+ // Did we change this struct type into a simple "primitive" type?
+ //
+ if (useType != TYP_UNKNOWN)
+ {
+ // Yes, we should use the "primitive" type in 'useType'
+ howToReturnStruct = SPK_PrimitiveType;
+ }
+ else // We can't replace the struct with a "primitive" type
+ {
+ // See if we can return this struct by value, possibly in multiple registers
+ // or if we should return it using a return buffer register
+ //
+ if ((FEATURE_MULTIREG_RET == 1) && (structSize <= MAX_RET_MULTIREG_BYTES))
+ {
+ // Structs that are HFA's are returned in multiple registers
+ if (IsHfa(clsHnd))
+ {
+ // HFA's of count one should have been handled by getPrimitiveTypeForStruct
+ assert(GetHfaCount(clsHnd) >= 2);
+
+ // setup wbPassType and useType indicate that this is returned by value as an HFA
+ // using multiple registers
+ howToReturnStruct = SPK_ByValueAsHfa;
+ useType = TYP_STRUCT;
+ }
+ else // Not an HFA struct type
+ {
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+ // The case of (structDesc.eightByteCount == 1) should have already been handled
+ if (structDesc.eightByteCount > 1)
+ {
+ // setup wbPassType and useType indicate that this is returned by value in multiple registers
+ howToReturnStruct = SPK_ByValue;
+ useType = TYP_STRUCT;
+ assert(structDesc.passedInRegisters == true);
+ }
+ else
+ {
+ assert(structDesc.eightByteCount == 0);
+ // Otherwise we return this struct using a return buffer
+ // setup wbPassType and useType indicate that this is return using a return buffer register
+ // (reference to a return buffer)
+ howToReturnStruct = SPK_ByReference;
+ useType = TYP_UNKNOWN;
+ assert(structDesc.passedInRegisters == false);
+ }
+
+#elif defined(_TARGET_ARM64_)
+
+ // Structs that are pointer sized or smaller should have been handled by getPrimitiveTypeForStruct
+ assert(structSize > TARGET_POINTER_SIZE);
+
+ // On ARM64 structs that are 9-16 bytes are returned by value in multiple registers
+ //
+ if (structSize <= (TARGET_POINTER_SIZE * 2))
+ {
+ // setup wbPassType and useType indicate that this is return by value in multiple registers
+ howToReturnStruct = SPK_ByValue;
+ useType = TYP_STRUCT;
+ }
+ else // a structSize that is 17-32 bytes in size
+ {
+ // Otherwise we return this struct using a return buffer
+ // setup wbPassType and useType indicate that this is returned using a return buffer register
+ // (reference to a return buffer)
+ howToReturnStruct = SPK_ByReference;
+ useType = TYP_UNKNOWN;
+ }
+
+#elif defined(_TARGET_ARM_) || defined(_TARGET_X86_)
+
+ // Otherwise we return this struct using a return buffer
+ // setup wbPassType and useType indicate that this is returned using a return buffer register
+ // (reference to a return buffer)
+ howToReturnStruct = SPK_ByReference;
+ useType = TYP_UNKNOWN;
+
+#else // _TARGET_XXX_
+
+ noway_assert(!"Unhandled TARGET in getReturnTypeForStruct (with FEATURE_MULTIREG_ARGS=1)");
+
+#endif // _TARGET_XXX_
+ }
+ }
+ else // (structSize > MAX_RET_MULTIREG_BYTES) || (FEATURE_MULTIREG_RET == 0)
+ {
+ // We have a (large) struct that can't be replaced with a "primitive" type
+ // and can't be returned in multiple registers
+
+ // We return this struct using a return buffer register
+ // setup wbPassType and useType indicate that this is returned using a return buffer register
+ // (reference to a return buffer)
+ howToReturnStruct = SPK_ByReference;
+ useType = TYP_UNKNOWN;
+ }
+ }
+
+ // 'howToReturnStruct' must be set to one of the valid values before we return
+ assert(howToReturnStruct != SPK_Unknown);
+ if (wbReturnStruct != nullptr)
+ {
+ *wbReturnStruct = howToReturnStruct;
+ }
+ return useType;
+}
+
+/*****************************************************************************
+ * variables to keep track of how many iterations we go in a dataflow pass
+ */
+
+#if DATAFLOW_ITER
+
+unsigned CSEiterCount; // counts the # of iteration for the CSE dataflow
+unsigned CFiterCount; // counts the # of iteration for the Const Folding dataflow
+
+#endif // DATAFLOW_ITER
+
+#if MEASURE_BLOCK_SIZE
+size_t genFlowNodeSize;
+size_t genFlowNodeCnt;
+#endif // MEASURE_BLOCK_SIZE
+
+/*****************************************************************************/
+// We keep track of methods we've already compiled.
+
+/*****************************************************************************
+ * Declare the statics
+ */
+
+#ifdef DEBUG
+/* static */
+unsigned Compiler::s_compMethodsCount = 0; // to produce unique label names
+
+/* static */
+bool Compiler::s_dspMemStats = false;
+#endif
+
+#ifndef DEBUGGING_SUPPORT
+/* static */
+const bool Compiler::Options::compDbgCode = false;
+#endif
+
+#ifndef PROFILING_SUPPORTED
+const bool Compiler::Options::compNoPInvokeInlineCB = false;
+#endif
+
+/*****************************************************************************
+ *
+ * One time initialization code
+ */
+
+/* static */
+void Compiler::compStartup()
+{
+#if DISPLAY_SIZES
+ grossVMsize = grossNCsize = totalNCsize = 0;
+#endif // DISPLAY_SIZES
+
+ // Initialize the JIT's allocator.
+ ArenaAllocator::startup();
+
+ /* Initialize the table of tree node sizes */
+
+ GenTree::InitNodeSize();
+
+#ifdef JIT32_GCENCODER
+ // Initialize the GC encoder lookup table
+
+ GCInfo::gcInitEncoderLookupTable();
+#endif
+
+ /* Initialize the emitter */
+
+ emitter::emitInit();
+
+ // Static vars of ValueNumStore
+ ValueNumStore::InitValueNumStoreStatics();
+
+ compDisplayStaticSizes(jitstdout);
+}
+
+/*****************************************************************************
+ *
+ * One time finalization code
+ */
+
+/* static */
+void Compiler::compShutdown()
+{
+#ifdef ALT_JIT
+ if (s_pAltJitExcludeAssembliesList != nullptr)
+ {
+ s_pAltJitExcludeAssembliesList->~AssemblyNamesList2(); // call the destructor
+ s_pAltJitExcludeAssembliesList = nullptr;
+ }
+#endif // ALT_JIT
+
+ ArenaAllocator::shutdown();
+
+ /* Shut down the emitter */
+
+ emitter::emitDone();
+
+#if defined(DEBUG) || defined(INLINE_DATA)
+ // Finish reading and/or writing inline xml
+ InlineStrategy::FinalizeXml();
+#endif // defined(DEBUG) || defined(INLINE_DATA)
+
+#if defined(DEBUG) || MEASURE_NODE_SIZE || MEASURE_BLOCK_SIZE || DISPLAY_SIZES || CALL_ARG_STATS
+ if (genMethodCnt == 0)
+ {
+ return;
+ }
+#endif
+
+ // Where should we write our statistics output?
+ FILE* fout = jitstdout;
+
+#ifdef FEATURE_JIT_METHOD_PERF
+ if (compJitTimeLogFilename != NULL)
+ {
+ // I assume that this will return NULL if it fails for some reason, and
+ // that...
+ FILE* jitTimeLogFile = _wfopen(compJitTimeLogFilename, W("a"));
+ // ...Print will return silently with a NULL argument.
+ CompTimeSummaryInfo::s_compTimeSummary.Print(jitTimeLogFile);
+ fclose(jitTimeLogFile);
+ }
+#endif // FEATURE_JIT_METHOD_PERF
+
+#if FUNC_INFO_LOGGING
+ if (compJitFuncInfoFile != nullptr)
+ {
+ fclose(compJitFuncInfoFile);
+ compJitFuncInfoFile = nullptr;
+ }
+#endif // FUNC_INFO_LOGGING
+
+#if COUNT_RANGECHECKS
+ if (optRangeChkAll > 0)
+ {
+ fprintf(fout, "Removed %u of %u range checks\n", optRangeChkRmv, optRangeChkAll);
+ }
+#endif // COUNT_RANGECHECKS
+
+#if DISPLAY_SIZES
+
+ if (grossVMsize && grossNCsize)
+ {
+ fprintf(fout, "\n");
+ fprintf(fout, "--------------------------------------\n");
+ fprintf(fout, "Function and GC info size stats\n");
+ fprintf(fout, "--------------------------------------\n");
+
+ fprintf(fout, "[%7u VM, %8u %6s %4u%%] %s\n", grossVMsize, grossNCsize, Target::g_tgtCPUName,
+ 100 * grossNCsize / grossVMsize, "Total (excluding GC info)");
+
+ fprintf(fout, "[%7u VM, %8u %6s %4u%%] %s\n", grossVMsize, totalNCsize, Target::g_tgtCPUName,
+ 100 * totalNCsize / grossVMsize, "Total (including GC info)");
+
+ if (gcHeaderISize || gcHeaderNSize)
+ {
+ fprintf(fout, "\n");
+
+ fprintf(fout, "GC tables : [%7uI,%7uN] %7u byt (%u%% of IL, %u%% of %s).\n",
+ gcHeaderISize + gcPtrMapISize, gcHeaderNSize + gcPtrMapNSize, totalNCsize - grossNCsize,
+ 100 * (totalNCsize - grossNCsize) / grossVMsize, 100 * (totalNCsize - grossNCsize) / grossNCsize,
+ Target::g_tgtCPUName);
+
+ fprintf(fout, "GC headers : [%7uI,%7uN] %7u byt, [%4.1fI,%4.1fN] %4.1f byt/meth\n", gcHeaderISize,
+ gcHeaderNSize, gcHeaderISize + gcHeaderNSize, (float)gcHeaderISize / (genMethodICnt + 0.001),
+ (float)gcHeaderNSize / (genMethodNCnt + 0.001),
+ (float)(gcHeaderISize + gcHeaderNSize) / genMethodCnt);
+
+ fprintf(fout, "GC ptr maps : [%7uI,%7uN] %7u byt, [%4.1fI,%4.1fN] %4.1f byt/meth\n", gcPtrMapISize,
+ gcPtrMapNSize, gcPtrMapISize + gcPtrMapNSize, (float)gcPtrMapISize / (genMethodICnt + 0.001),
+ (float)gcPtrMapNSize / (genMethodNCnt + 0.001),
+ (float)(gcPtrMapISize + gcPtrMapNSize) / genMethodCnt);
+ }
+ else
+ {
+ fprintf(fout, "\n");
+
+ fprintf(fout, "GC tables take up %u bytes (%u%% of instr, %u%% of %6s code).\n",
+ totalNCsize - grossNCsize, 100 * (totalNCsize - grossNCsize) / grossVMsize,
+ 100 * (totalNCsize - grossNCsize) / grossNCsize, Target::g_tgtCPUName);
+ }
+
+#ifdef DEBUG
+#if DOUBLE_ALIGN
+ fprintf(fout, "%u out of %u methods generated with double-aligned stack\n",
+ Compiler::s_lvaDoubleAlignedProcsCount, genMethodCnt);
+#endif
+#endif
+ }
+
+#endif // DISPLAY_SIZES
+
+#if CALL_ARG_STATS
+ compDispCallArgStats(fout);
+#endif
+
+#if COUNT_BASIC_BLOCKS
+ fprintf(fout, "--------------------------------------------------\n");
+ fprintf(fout, "Basic block count frequency table:\n");
+ fprintf(fout, "--------------------------------------------------\n");
+ bbCntTable.dump(fout);
+ fprintf(fout, "--------------------------------------------------\n");
+
+ fprintf(fout, "\n");
+
+ fprintf(fout, "--------------------------------------------------\n");
+ fprintf(fout, "IL method size frequency table for methods with a single basic block:\n");
+ fprintf(fout, "--------------------------------------------------\n");
+ bbOneBBSizeTable.dump(fout);
+ fprintf(fout, "--------------------------------------------------\n");
+#endif // COUNT_BASIC_BLOCKS
+
+#if COUNT_LOOPS
+
+ fprintf(fout, "\n");
+ fprintf(fout, "---------------------------------------------------\n");
+ fprintf(fout, "Loop stats\n");
+ fprintf(fout, "---------------------------------------------------\n");
+ fprintf(fout, "Total number of methods with loops is %5u\n", totalLoopMethods);
+ fprintf(fout, "Total number of loops is %5u\n", totalLoopCount);
+ fprintf(fout, "Maximum number of loops per method is %5u\n", maxLoopsPerMethod);
+ fprintf(fout, "# of methods overflowing nat loop table is %5u\n", totalLoopOverflows);
+ fprintf(fout, "Total number of 'unnatural' loops is %5u\n", totalUnnatLoopCount);
+ fprintf(fout, "# of methods overflowing unnat loop limit is %5u\n", totalUnnatLoopOverflows);
+ fprintf(fout, "Total number of loops with an iterator is %5u\n", iterLoopCount);
+ fprintf(fout, "Total number of loops with a simple iterator is %5u\n", simpleTestLoopCount);
+ fprintf(fout, "Total number of loops with a constant iterator is %5u\n", constIterLoopCount);
+
+ fprintf(fout, "--------------------------------------------------\n");
+ fprintf(fout, "Loop count frequency table:\n");
+ fprintf(fout, "--------------------------------------------------\n");
+ loopCountTable.dump(fout);
+ fprintf(fout, "--------------------------------------------------\n");
+ fprintf(fout, "Loop exit count frequency table:\n");
+ fprintf(fout, "--------------------------------------------------\n");
+ loopExitCountTable.dump(fout);
+ fprintf(fout, "--------------------------------------------------\n");
+
+#endif // COUNT_LOOPS
+
+#if DATAFLOW_ITER
+
+ fprintf(fout, "---------------------------------------------------\n");
+ fprintf(fout, "Total number of iterations in the CSE dataflow loop is %5u\n", CSEiterCount);
+ fprintf(fout, "Total number of iterations in the CF dataflow loop is %5u\n", CFiterCount);
+
+#endif // DATAFLOW_ITER
+
+#if MEASURE_NODE_SIZE
+
+ fprintf(fout, "\n");
+ fprintf(fout, "---------------------------------------------------\n");
+ fprintf(fout, "GenTree node allocation stats\n");
+ fprintf(fout, "---------------------------------------------------\n");
+
+ fprintf(fout, "Allocated %6u tree nodes (%7u bytes total, avg %4u bytes per method)\n",
+ genNodeSizeStats.genTreeNodeCnt, genNodeSizeStats.genTreeNodeSize,
+ genNodeSizeStats.genTreeNodeSize / genMethodCnt);
+
+ fprintf(fout, "Allocated %7u bytes of unused tree node space (%3.2f%%)\n",
+ genNodeSizeStats.genTreeNodeSize - genNodeSizeStats.genTreeNodeActualSize,
+ (float)(100 * (genNodeSizeStats.genTreeNodeSize - genNodeSizeStats.genTreeNodeActualSize)) /
+ genNodeSizeStats.genTreeNodeSize);
+
+ fprintf(fout, "\n");
+ fprintf(fout, "---------------------------------------------------\n");
+ fprintf(fout, "Distribution of per-method GenTree node counts:\n");
+ genTreeNcntHist.dump(fout);
+
+ fprintf(fout, "\n");
+ fprintf(fout, "---------------------------------------------------\n");
+ fprintf(fout, "Distribution of per-method GenTree node allocations (in bytes):\n");
+ genTreeNsizHist.dump(fout);
+
+#endif // MEASURE_NODE_SIZE
+
+#if MEASURE_BLOCK_SIZE
+
+ fprintf(fout, "\n");
+ fprintf(fout, "---------------------------------------------------\n");
+ fprintf(fout, "BasicBlock and flowList/BasicBlockList allocation stats\n");
+ fprintf(fout, "---------------------------------------------------\n");
+
+ fprintf(fout, "Allocated %6u basic blocks (%7u bytes total, avg %4u bytes per method)\n", BasicBlock::s_Count,
+ BasicBlock::s_Size, BasicBlock::s_Size / genMethodCnt);
+ fprintf(fout, "Allocated %6u flow nodes (%7u bytes total, avg %4u bytes per method)\n", genFlowNodeCnt,
+ genFlowNodeSize, genFlowNodeSize / genMethodCnt);
+
+#endif // MEASURE_BLOCK_SIZE
+
+#if MEASURE_MEM_ALLOC
+
+#ifdef DEBUG
+ // Under debug, we only dump memory stats when the COMPlus_* variable is defined.
+ // Under non-debug, we don't have the COMPlus_* variable, and we always dump it.
+ if (s_dspMemStats)
+#endif
+ {
+ fprintf(fout, "\nAll allocations:\n");
+ s_aggMemStats.Print(jitstdout);
+
+ fprintf(fout, "\nLargest method:\n");
+ s_maxCompMemStats.Print(jitstdout);
+ }
+
+#endif // MEASURE_MEM_ALLOC
+
+#if LOOP_HOIST_STATS
+#ifdef DEBUG // Always display loop stats in retail
+ if (JitConfig.DisplayLoopHoistStats() != 0)
+#endif // DEBUG
+ {
+ PrintAggregateLoopHoistStats(jitstdout);
+ }
+#endif // LOOP_HOIST_STATS
+
+#if MEASURE_PTRTAB_SIZE
+
+ fprintf(fout, "\n");
+ fprintf(fout, "---------------------------------------------------\n");
+ fprintf(fout, "GC pointer table stats\n");
+ fprintf(fout, "---------------------------------------------------\n");
+
+ fprintf(fout, "Reg pointer descriptor size (internal): %8u (avg %4u per method)\n", GCInfo::s_gcRegPtrDscSize,
+ GCInfo::s_gcRegPtrDscSize / genMethodCnt);
+
+ fprintf(fout, "Total pointer table size: %8u (avg %4u per method)\n", GCInfo::s_gcTotalPtrTabSize,
+ GCInfo::s_gcTotalPtrTabSize / genMethodCnt);
+
+#endif // MEASURE_PTRTAB_SIZE
+
+#if MEASURE_NODE_SIZE || MEASURE_BLOCK_SIZE || MEASURE_PTRTAB_SIZE || DISPLAY_SIZES
+
+ if (genMethodCnt != 0)
+ {
+ fprintf(fout, "\n");
+ fprintf(fout, "A total of %6u methods compiled", genMethodCnt);
+#if DISPLAY_SIZES
+ if (genMethodICnt || genMethodNCnt)
+ {
+ fprintf(fout, " (%u interruptible, %u non-interruptible)", genMethodICnt, genMethodNCnt);
+ }
+#endif // DISPLAY_SIZES
+ fprintf(fout, ".\n");
+ }
+
+#endif // MEASURE_NODE_SIZE || MEASURE_BLOCK_SIZE || MEASURE_PTRTAB_SIZE || DISPLAY_SIZES
+
+#if EMITTER_STATS
+ emitterStats(fout);
+#endif
+
+#if MEASURE_FATAL
+ fprintf(fout, "\n");
+ fprintf(fout, "---------------------------------------------------\n");
+ fprintf(fout, "Fatal errors stats\n");
+ fprintf(fout, "---------------------------------------------------\n");
+ fprintf(fout, " badCode: %u\n", fatal_badCode);
+ fprintf(fout, " noWay: %u\n", fatal_noWay);
+ fprintf(fout, " NOMEM: %u\n", fatal_NOMEM);
+ fprintf(fout, " noWayAssertBody: %u\n", fatal_noWayAssertBody);
+#ifdef DEBUG
+ fprintf(fout, " noWayAssertBodyArgs: %u\n", fatal_noWayAssertBodyArgs);
+#endif // DEBUG
+ fprintf(fout, " NYI: %u\n", fatal_NYI);
+#endif // MEASURE_FATAL
+}
+
+/*****************************************************************************
+ * Display static data structure sizes.
+ */
+
+/* static */
+void Compiler::compDisplayStaticSizes(FILE* fout)
+{
+
+#if MEASURE_NODE_SIZE
+ /*
+ IMPORTANT: Use the following code to check the alignment of
+ GenTree members (in a retail build, of course).
+ */
+
+ GenTree* gtDummy = nullptr;
+
+ fprintf(fout, "\n");
+ fprintf(fout, "Offset / size of gtOper = %2u / %2u\n", offsetof(GenTree, gtOper), sizeof(gtDummy->gtOper));
+ fprintf(fout, "Offset / size of gtType = %2u / %2u\n", offsetof(GenTree, gtType), sizeof(gtDummy->gtType));
+#if FEATURE_ANYCSE
+ fprintf(fout, "Offset / size of gtCSEnum = %2u / %2u\n", offsetof(GenTree, gtCSEnum),
+ sizeof(gtDummy->gtCSEnum));
+#endif // FEATURE_ANYCSE
+#if ASSERTION_PROP
+ fprintf(fout, "Offset / size of gtAssertionNum = %2u / %2u\n", offsetof(GenTree, gtAssertionNum),
+ sizeof(gtDummy->gtAssertionNum));
+#endif // ASSERTION_PROP
+#if FEATURE_STACK_FP_X87
+ fprintf(fout, "Offset / size of gtFPlvl = %2u / %2u\n", offsetof(GenTree, gtFPlvl),
+ sizeof(gtDummy->gtFPlvl));
+#endif // FEATURE_STACK_FP_X87
+ // TODO: The section that report GenTree sizes should be made into a public static member function of the GenTree
+ // class (see https://github.com/dotnet/coreclr/pull/493)
+ // fprintf(fout, "Offset / size of gtCostEx = %2u / %2u\n", offsetof(GenTree, _gtCostEx ),
+ // sizeof(gtDummy->_gtCostEx ));
+ // fprintf(fout, "Offset / size of gtCostSz = %2u / %2u\n", offsetof(GenTree, _gtCostSz ),
+ // sizeof(gtDummy->_gtCostSz ));
+ fprintf(fout, "Offset / size of gtFlags = %2u / %2u\n", offsetof(GenTree, gtFlags),
+ sizeof(gtDummy->gtFlags));
+ fprintf(fout, "Offset / size of gtVNPair = %2u / %2u\n", offsetof(GenTree, gtVNPair),
+ sizeof(gtDummy->gtVNPair));
+ fprintf(fout, "Offset / size of gtRsvdRegs = %2u / %2u\n", offsetof(GenTree, gtRsvdRegs),
+ sizeof(gtDummy->gtRsvdRegs));
+#ifdef LEGACY_BACKEND
+ fprintf(fout, "Offset / size of gtUsedRegs = %2u / %2u\n", offsetof(GenTree, gtUsedRegs),
+ sizeof(gtDummy->gtUsedRegs));
+#endif // LEGACY_BACKEND
+#ifndef LEGACY_BACKEND
+ fprintf(fout, "Offset / size of gtLsraInfo = %2u / %2u\n", offsetof(GenTree, gtLsraInfo),
+ sizeof(gtDummy->gtLsraInfo));
+#endif // !LEGACY_BACKEND
+ fprintf(fout, "Offset / size of gtNext = %2u / %2u\n", offsetof(GenTree, gtNext), sizeof(gtDummy->gtNext));
+ fprintf(fout, "Offset / size of gtPrev = %2u / %2u\n", offsetof(GenTree, gtPrev), sizeof(gtDummy->gtPrev));
+ fprintf(fout, "\n");
+
+#if SMALL_TREE_NODES
+ fprintf(fout, "Small tree node size = %3u\n", TREE_NODE_SZ_SMALL);
+#endif // SMALL_TREE_NODES
+ fprintf(fout, "Large tree node size = %3u\n", TREE_NODE_SZ_LARGE);
+ fprintf(fout, "Size of GenTree = %3u\n", sizeof(GenTree));
+ fprintf(fout, "Size of GenTreeUnOp = %3u\n", sizeof(GenTreeUnOp));
+ fprintf(fout, "Size of GenTreeOp = %3u\n", sizeof(GenTreeOp));
+ fprintf(fout, "Size of GenTreeVal = %3u\n", sizeof(GenTreeVal));
+ fprintf(fout, "Size of GenTreeIntConCommon = %3u\n", sizeof(GenTreeIntConCommon));
+ fprintf(fout, "Size of GenTreePhysReg = %3u\n", sizeof(GenTreePhysReg));
+#ifndef LEGACY_BACKEND
+ fprintf(fout, "Size of GenTreeJumpTable = %3u\n", sizeof(GenTreeJumpTable));
+#endif // !LEGACY_BACKEND
+ fprintf(fout, "Size of GenTreeIntCon = %3u\n", sizeof(GenTreeIntCon));
+ fprintf(fout, "Size of GenTreeLngCon = %3u\n", sizeof(GenTreeLngCon));
+ fprintf(fout, "Size of GenTreeDblCon = %3u\n", sizeof(GenTreeDblCon));
+ fprintf(fout, "Size of GenTreeStrCon = %3u\n", sizeof(GenTreeStrCon));
+ fprintf(fout, "Size of GenTreeLclVarCommon = %3u\n", sizeof(GenTreeLclVarCommon));
+ fprintf(fout, "Size of GenTreeLclVar = %3u\n", sizeof(GenTreeLclVar));
+ fprintf(fout, "Size of GenTreeLclFld = %3u\n", sizeof(GenTreeLclFld));
+ fprintf(fout, "Size of GenTreeRegVar = %3u\n", sizeof(GenTreeRegVar));
+ fprintf(fout, "Size of GenTreeCast = %3u\n", sizeof(GenTreeCast));
+ fprintf(fout, "Size of GenTreeBox = %3u\n", sizeof(GenTreeBox));
+ fprintf(fout, "Size of GenTreeField = %3u\n", sizeof(GenTreeField));
+ fprintf(fout, "Size of GenTreeArgList = %3u\n", sizeof(GenTreeArgList));
+ fprintf(fout, "Size of GenTreeColon = %3u\n", sizeof(GenTreeColon));
+ fprintf(fout, "Size of GenTreeCall = %3u\n", sizeof(GenTreeCall));
+ fprintf(fout, "Size of GenTreeCmpXchg = %3u\n", sizeof(GenTreeCmpXchg));
+ fprintf(fout, "Size of GenTreeFptrVal = %3u\n", sizeof(GenTreeFptrVal));
+ fprintf(fout, "Size of GenTreeQmark = %3u\n", sizeof(GenTreeQmark));
+ fprintf(fout, "Size of GenTreeIntrinsic = %3u\n", sizeof(GenTreeIntrinsic));
+ fprintf(fout, "Size of GenTreeIndex = %3u\n", sizeof(GenTreeIndex));
+ fprintf(fout, "Size of GenTreeArrLen = %3u\n", sizeof(GenTreeArrLen));
+ fprintf(fout, "Size of GenTreeBoundsChk = %3u\n", sizeof(GenTreeBoundsChk));
+ fprintf(fout, "Size of GenTreeArrElem = %3u\n", sizeof(GenTreeArrElem));
+ fprintf(fout, "Size of GenTreeAddrMode = %3u\n", sizeof(GenTreeAddrMode));
+ fprintf(fout, "Size of GenTreeIndir = %3u\n", sizeof(GenTreeIndir));
+ fprintf(fout, "Size of GenTreeStoreInd = %3u\n", sizeof(GenTreeStoreInd));
+ fprintf(fout, "Size of GenTreeRetExpr = %3u\n", sizeof(GenTreeRetExpr));
+ fprintf(fout, "Size of GenTreeStmt = %3u\n", sizeof(GenTreeStmt));
+ fprintf(fout, "Size of GenTreeObj = %3u\n", sizeof(GenTreeObj));
+ fprintf(fout, "Size of GenTreeClsVar = %3u\n", sizeof(GenTreeClsVar));
+ fprintf(fout, "Size of GenTreeArgPlace = %3u\n", sizeof(GenTreeArgPlace));
+ fprintf(fout, "Size of GenTreeLabel = %3u\n", sizeof(GenTreeLabel));
+ fprintf(fout, "Size of GenTreePhiArg = %3u\n", sizeof(GenTreePhiArg));
+ fprintf(fout, "Size of GenTreePutArgStk = %3u\n", sizeof(GenTreePutArgStk));
+ fprintf(fout, "\n");
+#endif // MEASURE_NODE_SIZE
+
+#if MEASURE_BLOCK_SIZE
+
+ BasicBlock* bbDummy = nullptr;
+
+ fprintf(fout, "\n");
+ fprintf(fout, "Offset / size of bbNext = %3u / %3u\n", offsetof(BasicBlock, bbNext),
+ sizeof(bbDummy->bbNext));
+ fprintf(fout, "Offset / size of bbNum = %3u / %3u\n", offsetof(BasicBlock, bbNum),
+ sizeof(bbDummy->bbNum));
+ fprintf(fout, "Offset / size of bbPostOrderNum = %3u / %3u\n", offsetof(BasicBlock, bbPostOrderNum),
+ sizeof(bbDummy->bbPostOrderNum));
+ fprintf(fout, "Offset / size of bbRefs = %3u / %3u\n", offsetof(BasicBlock, bbRefs),
+ sizeof(bbDummy->bbRefs));
+ fprintf(fout, "Offset / size of bbFlags = %3u / %3u\n", offsetof(BasicBlock, bbFlags),
+ sizeof(bbDummy->bbFlags));
+ fprintf(fout, "Offset / size of bbWeight = %3u / %3u\n", offsetof(BasicBlock, bbWeight),
+ sizeof(bbDummy->bbWeight));
+ fprintf(fout, "Offset / size of bbJumpKind = %3u / %3u\n", offsetof(BasicBlock, bbJumpKind),
+ sizeof(bbDummy->bbJumpKind));
+ fprintf(fout, "Offset / size of bbJumpOffs = %3u / %3u\n", offsetof(BasicBlock, bbJumpOffs),
+ sizeof(bbDummy->bbJumpOffs));
+ fprintf(fout, "Offset / size of bbJumpDest = %3u / %3u\n", offsetof(BasicBlock, bbJumpDest),
+ sizeof(bbDummy->bbJumpDest));
+ fprintf(fout, "Offset / size of bbJumpSwt = %3u / %3u\n", offsetof(BasicBlock, bbJumpSwt),
+ sizeof(bbDummy->bbJumpSwt));
+ fprintf(fout, "Offset / size of bbTreeList = %3u / %3u\n", offsetof(BasicBlock, bbTreeList),
+ sizeof(bbDummy->bbTreeList));
+ fprintf(fout, "Offset / size of bbEntryState = %3u / %3u\n", offsetof(BasicBlock, bbEntryState),
+ sizeof(bbDummy->bbEntryState));
+ fprintf(fout, "Offset / size of bbStkTempsIn = %3u / %3u\n", offsetof(BasicBlock, bbStkTempsIn),
+ sizeof(bbDummy->bbStkTempsIn));
+ fprintf(fout, "Offset / size of bbStkTempsOut = %3u / %3u\n", offsetof(BasicBlock, bbStkTempsOut),
+ sizeof(bbDummy->bbStkTempsOut));
+ fprintf(fout, "Offset / size of bbTryIndex = %3u / %3u\n", offsetof(BasicBlock, bbTryIndex),
+ sizeof(bbDummy->bbTryIndex));
+ fprintf(fout, "Offset / size of bbHndIndex = %3u / %3u\n", offsetof(BasicBlock, bbHndIndex),
+ sizeof(bbDummy->bbHndIndex));
+ fprintf(fout, "Offset / size of bbCatchTyp = %3u / %3u\n", offsetof(BasicBlock, bbCatchTyp),
+ sizeof(bbDummy->bbCatchTyp));
+ fprintf(fout, "Offset / size of bbStkDepth = %3u / %3u\n", offsetof(BasicBlock, bbStkDepth),
+ sizeof(bbDummy->bbStkDepth));
+ fprintf(fout, "Offset / size of bbFPinVars = %3u / %3u\n", offsetof(BasicBlock, bbFPinVars),
+ sizeof(bbDummy->bbFPinVars));
+ fprintf(fout, "Offset / size of bbPreds = %3u / %3u\n", offsetof(BasicBlock, bbPreds),
+ sizeof(bbDummy->bbPreds));
+ fprintf(fout, "Offset / size of bbReach = %3u / %3u\n", offsetof(BasicBlock, bbReach),
+ sizeof(bbDummy->bbReach));
+ fprintf(fout, "Offset / size of bbIDom = %3u / %3u\n", offsetof(BasicBlock, bbIDom),
+ sizeof(bbDummy->bbIDom));
+ fprintf(fout, "Offset / size of bbDfsNum = %3u / %3u\n", offsetof(BasicBlock, bbDfsNum),
+ sizeof(bbDummy->bbDfsNum));
+ fprintf(fout, "Offset / size of bbCodeOffs = %3u / %3u\n", offsetof(BasicBlock, bbCodeOffs),
+ sizeof(bbDummy->bbCodeOffs));
+ fprintf(fout, "Offset / size of bbCodeOffsEnd = %3u / %3u\n", offsetof(BasicBlock, bbCodeOffsEnd),
+ sizeof(bbDummy->bbCodeOffsEnd));
+ fprintf(fout, "Offset / size of bbVarUse = %3u / %3u\n", offsetof(BasicBlock, bbVarUse),
+ sizeof(bbDummy->bbVarUse));
+ fprintf(fout, "Offset / size of bbVarDef = %3u / %3u\n", offsetof(BasicBlock, bbVarDef),
+ sizeof(bbDummy->bbVarDef));
+ fprintf(fout, "Offset / size of bbVarTmp = %3u / %3u\n", offsetof(BasicBlock, bbVarTmp),
+ sizeof(bbDummy->bbVarTmp));
+ fprintf(fout, "Offset / size of bbLiveIn = %3u / %3u\n", offsetof(BasicBlock, bbLiveIn),
+ sizeof(bbDummy->bbLiveIn));
+ fprintf(fout, "Offset / size of bbLiveOut = %3u / %3u\n", offsetof(BasicBlock, bbLiveOut),
+ sizeof(bbDummy->bbLiveOut));
+ fprintf(fout, "Offset / size of bbHeapSsaPhiFunc = %3u / %3u\n", offsetof(BasicBlock, bbHeapSsaPhiFunc),
+ sizeof(bbDummy->bbHeapSsaPhiFunc));
+ fprintf(fout, "Offset / size of bbHeapSsaNumIn = %3u / %3u\n", offsetof(BasicBlock, bbHeapSsaNumIn),
+ sizeof(bbDummy->bbHeapSsaNumIn));
+ fprintf(fout, "Offset / size of bbHeapSsaNumOut = %3u / %3u\n", offsetof(BasicBlock, bbHeapSsaNumOut),
+ sizeof(bbDummy->bbHeapSsaNumOut));
+
+#ifdef DEBUGGING_SUPPORT
+ fprintf(fout, "Offset / size of bbScope = %3u / %3u\n", offsetof(BasicBlock, bbScope),
+ sizeof(bbDummy->bbScope));
+#endif // DEBUGGING_SUPPORT
+
+ fprintf(fout, "Offset / size of bbCseGen = %3u / %3u\n", offsetof(BasicBlock, bbCseGen),
+ sizeof(bbDummy->bbCseGen));
+ fprintf(fout, "Offset / size of bbCseIn = %3u / %3u\n", offsetof(BasicBlock, bbCseIn),
+ sizeof(bbDummy->bbCseIn));
+ fprintf(fout, "Offset / size of bbCseOut = %3u / %3u\n", offsetof(BasicBlock, bbCseOut),
+ sizeof(bbDummy->bbCseOut));
+
+ fprintf(fout, "Offset / size of bbEmitCookie = %3u / %3u\n", offsetof(BasicBlock, bbEmitCookie),
+ sizeof(bbDummy->bbEmitCookie));
+
+#if FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
+ fprintf(fout, "Offset / size of bbUnwindNopEmitCookie = %3u / %3u\n", offsetof(BasicBlock, bbUnwindNopEmitCookie),
+ sizeof(bbDummy->bbUnwindNopEmitCookie));
+#endif // FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
+
+#ifdef VERIFIER
+ fprintf(fout, "Offset / size of bbStackIn = %3u / %3u\n", offsetof(BasicBlock, bbStackIn),
+ sizeof(bbDummy->bbStackIn));
+ fprintf(fout, "Offset / size of bbStackOut = %3u / %3u\n", offsetof(BasicBlock, bbStackOut),
+ sizeof(bbDummy->bbStackOut));
+ fprintf(fout, "Offset / size of bbTypesIn = %3u / %3u\n", offsetof(BasicBlock, bbTypesIn),
+ sizeof(bbDummy->bbTypesIn));
+ fprintf(fout, "Offset / size of bbTypesOut = %3u / %3u\n", offsetof(BasicBlock, bbTypesOut),
+ sizeof(bbDummy->bbTypesOut));
+#endif // VERIFIER
+
+#if FEATURE_STACK_FP_X87
+ fprintf(fout, "Offset / size of bbFPStateX87 = %3u / %3u\n", offsetof(BasicBlock, bbFPStateX87),
+ sizeof(bbDummy->bbFPStateX87));
+#endif // FEATURE_STACK_FP_X87
+
+#ifdef DEBUG
+ fprintf(fout, "Offset / size of bbLoopNum = %3u / %3u\n", offsetof(BasicBlock, bbLoopNum),
+ sizeof(bbDummy->bbLoopNum));
+#endif // DEBUG
+
+ fprintf(fout, "\n");
+ fprintf(fout, "Size of BasicBlock = %3u\n", sizeof(BasicBlock));
+
+#endif // MEASURE_BLOCK_SIZE
+
+#if EMITTER_STATS
+ emitterStaticStats(fout);
+#endif
+}
+
+/*****************************************************************************
+ *
+ * Constructor
+ */
+
+void Compiler::compInit(ArenaAllocator* pAlloc, InlineInfo* inlineInfo)
+{
+ assert(pAlloc);
+ compAllocator = pAlloc;
+
+ // Inlinee Compile object will only be allocated when needed for the 1st time.
+ InlineeCompiler = nullptr;
+
+ // Set the inline info.
+ impInlineInfo = inlineInfo;
+
+ eeInfoInitialized = false;
+
+ compDoAggressiveInlining = false;
+
+ if (compIsForInlining())
+ {
+ m_inlineStrategy = nullptr;
+ compInlineResult = inlineInfo->inlineResult;
+ compAsIAllocator = nullptr; // We shouldn't be using the compAsIAllocator for other than the root compiler.
+#if MEASURE_MEM_ALLOC
+ compAsIAllocatorBitset = nullptr;
+ compAsIAllocatorGC = nullptr;
+ compAsIAllocatorLoopHoist = nullptr;
+#ifdef DEBUG
+ compAsIAllocatorDebugOnly = nullptr;
+#endif // DEBUG
+#endif // MEASURE_MEM_ALLOC
+
+ compQMarks = nullptr;
+ }
+ else
+ {
+ m_inlineStrategy = new (this, CMK_Inlining) InlineStrategy(this);
+ compInlineResult = nullptr;
+ compAsIAllocator = new (this, CMK_Unknown) CompAllocator(this, CMK_AsIAllocator);
+#if MEASURE_MEM_ALLOC
+ compAsIAllocatorBitset = new (this, CMK_Unknown) CompAllocator(this, CMK_bitset);
+ compAsIAllocatorGC = new (this, CMK_Unknown) CompAllocator(this, CMK_GC);
+ compAsIAllocatorLoopHoist = new (this, CMK_Unknown) CompAllocator(this, CMK_LoopHoist);
+#ifdef DEBUG
+ compAsIAllocatorDebugOnly = new (this, CMK_Unknown) CompAllocator(this, CMK_DebugOnly);
+#endif // DEBUG
+#endif // MEASURE_MEM_ALLOC
+
+ compQMarks = new (this, CMK_Unknown) ExpandArrayStack<GenTreePtr>(getAllocator());
+ }
+
+#ifdef FEATURE_TRACELOGGING
+ // Make sure JIT telemetry is initialized as soon as allocations can be made
+ // but no later than a point where noway_asserts can be thrown.
+ // 1. JIT telemetry could allocate some objects internally.
+ // 2. NowayAsserts are tracked through telemetry.
+ // Note: JIT telemetry could gather data when compiler is not fully initialized.
+ // So you have to initialize the compiler variables you use for telemetry.
+ assert((unsigned)PHASE_PRE_IMPORT == 0);
+ previousCompletedPhase = PHASE_PRE_IMPORT;
+ info.compILCodeSize = 0;
+ info.compMethodHnd = nullptr;
+ compJitTelemetry.Initialize(this);
+#endif
+
+#ifdef DEBUG
+ bRangeAllowStress = false;
+#endif
+
+ fgInit();
+ lvaInit();
+
+ if (!compIsForInlining())
+ {
+ codeGen = getCodeGenerator(this);
+#ifdef LEGACY_BACKEND
+ raInit();
+#endif // LEGACY_BACKEND
+ optInit();
+#ifndef LEGACY_BACKEND
+ hashBv::Init(this);
+#endif // !LEGACY_BACKEND
+
+ compVarScopeMap = nullptr;
+
+ // If this method were a real constructor for Compiler, these would
+ // become method initializations.
+ impPendingBlockMembers = ExpandArray<BYTE>(getAllocator());
+ impSpillCliquePredMembers = ExpandArray<BYTE>(getAllocator());
+ impSpillCliqueSuccMembers = ExpandArray<BYTE>(getAllocator());
+
+ memset(&lvHeapPerSsaData, 0, sizeof(PerSsaArray));
+ lvHeapPerSsaData.Init(getAllocator());
+ lvHeapNumSsaNames = 0;
+
+ //
+ // Initialize all the per-method statistics gathering data structures.
+ //
+
+ optLoopsCloned = 0;
+
+#if MEASURE_MEM_ALLOC
+ genMemStats.Init();
+#endif // MEASURE_MEM_ALLOC
+#if LOOP_HOIST_STATS
+ m_loopsConsidered = 0;
+ m_curLoopHasHoistedExpression = false;
+ m_loopsWithHoistedExpressions = 0;
+ m_totalHoistedExpressions = 0;
+#endif // LOOP_HOIST_STATS
+#if MEASURE_NODE_SIZE
+ genNodeSizeStatsPerFunc.Init();
+#endif // MEASURE_NODE_SIZE
+ }
+ else
+ {
+ codeGen = nullptr;
+ }
+
+ compJmpOpUsed = false;
+ compLongUsed = false;
+ compTailCallUsed = false;
+ compLocallocUsed = false;
+ compQmarkRationalized = false;
+ compQmarkUsed = false;
+ compFloatingPointUsed = false;
+ compUnsafeCastUsed = false;
+#if CPU_USES_BLOCK_MOVE
+ compBlkOpUsed = false;
+#endif
+#if FEATURE_STACK_FP_X87
+ compMayHaveTransitionBlocks = false;
+#endif
+ compNeedsGSSecurityCookie = false;
+ compGSReorderStackLayout = false;
+#if STACK_PROBES
+ compStackProbePrologDone = false;
+#endif
+
+ compGeneratingProlog = false;
+ compGeneratingEpilog = false;
+
+#ifndef LEGACY_BACKEND
+ compLSRADone = false;
+#endif // !LEGACY_BACKEND
+ compRationalIRForm = false;
+
+#ifdef DEBUG
+ compCodeGenDone = false;
+ compRegSetCheckLevel = 0;
+ opts.compMinOptsIsUsed = false;
+#endif
+ opts.compMinOptsIsSet = false;
+
+ // Used by fgFindJumpTargets for inlining heuristics.
+ opts.instrCount = 0;
+
+ // Used to track when we should consider running EarlyProp
+ optMethodFlags = 0;
+
+ for (unsigned i = 0; i < MAX_LOOP_NUM; i++)
+ {
+ AllVarSetOps::AssignNoCopy(this, optLoopTable[i].lpAsgVars, AllVarSetOps::UninitVal());
+ }
+
+#ifdef DEBUG
+ m_nodeTestData = nullptr;
+ m_loopHoistCSEClass = FIRST_LOOP_HOIST_CSE_CLASS;
+#endif
+ m_switchDescMap = nullptr;
+ m_blockToEHPreds = nullptr;
+ m_fieldSeqStore = nullptr;
+ m_zeroOffsetFieldMap = nullptr;
+ m_arrayInfoMap = nullptr;
+ m_heapSsaMap = nullptr;
+ m_refAnyClass = nullptr;
+
+#ifdef DEBUG
+ if (!compIsForInlining())
+ {
+ compDoComponentUnitTestsOnce();
+ }
+#endif // DEBUG
+
+ vnStore = nullptr;
+ m_opAsgnVarDefSsaNums = nullptr;
+ m_indirAssignMap = nullptr;
+ fgSsaPassesCompleted = 0;
+ fgVNPassesCompleted = 0;
+
+ // check that HelperCallProperties are initialized
+
+ assert(s_helperCallProperties.IsPure(CORINFO_HELP_GETSHARED_GCSTATIC_BASE));
+ assert(!s_helperCallProperties.IsPure(CORINFO_HELP_GETFIELDOBJ)); // quick sanity check
+
+ // We start with the flow graph in tree-order
+ fgOrder = FGOrderTree;
+
+#ifdef FEATURE_SIMD
+ // SIMD Types
+ SIMDFloatHandle = nullptr;
+ SIMDDoubleHandle = nullptr;
+ SIMDIntHandle = nullptr;
+ SIMDUShortHandle = nullptr;
+ SIMDUByteHandle = nullptr;
+ SIMDShortHandle = nullptr;
+ SIMDByteHandle = nullptr;
+ SIMDLongHandle = nullptr;
+ SIMDUIntHandle = nullptr;
+ SIMDULongHandle = nullptr;
+ SIMDVector2Handle = nullptr;
+ SIMDVector3Handle = nullptr;
+ SIMDVector4Handle = nullptr;
+ SIMDVectorHandle = nullptr;
+#endif
+
+#ifdef DEBUG
+ inlRNG = nullptr;
+#endif
+
+ compUsesThrowHelper = false;
+}
+
+/*****************************************************************************
+ *
+ * Destructor
+ */
+
+void Compiler::compDone()
+{
+}
+
+void* Compiler::compGetHelperFtn(CorInfoHelpFunc ftnNum, /* IN */
+ void** ppIndirection) /* OUT */
+{
+ void* addr;
+
+ if (info.compMatchedVM)
+ {
+ addr = info.compCompHnd->getHelperFtn(ftnNum, ppIndirection);
+ }
+ else
+ {
+ // If we don't have a matched VM, we won't get valid results when asking for a helper function.
+ addr = (void*)0xCA11CA11; // "callcall"
+ }
+
+ return addr;
+}
+
+unsigned Compiler::compGetTypeSize(CorInfoType cit, CORINFO_CLASS_HANDLE clsHnd)
+{
+ var_types sigType = genActualType(JITtype2varType(cit));
+ unsigned sigSize;
+ sigSize = genTypeSize(sigType);
+ if (cit == CORINFO_TYPE_VALUECLASS)
+ {
+ sigSize = info.compCompHnd->getClassSize(clsHnd);
+ }
+ else if (cit == CORINFO_TYPE_REFANY)
+ {
+ sigSize = 2 * sizeof(void*);
+ }
+ return sigSize;
+}
+
+#ifdef DEBUG
+static bool DidComponentUnitTests = false;
+
+void Compiler::compDoComponentUnitTestsOnce()
+{
+ if (!JitConfig.RunComponentUnitTests())
+ {
+ return;
+ }
+
+ if (!DidComponentUnitTests)
+ {
+ DidComponentUnitTests = true;
+ ValueNumStore::RunTests(this);
+ BitSetSupport::TestSuite(getAllocatorDebugOnly());
+ }
+}
+#endif // DEBUG
+
+/******************************************************************************
+ *
+ * The Emitter uses this callback function to allocate its memory
+ */
+
+/* static */
+void* Compiler::compGetMemCallback(void* p, size_t size, CompMemKind cmk)
+{
+ assert(p);
+
+ return ((Compiler*)p)->compGetMem(size, cmk);
+}
+
+/*****************************************************************************
+ *
+ * The central memory allocation routine used by the compiler. Normally this
+ * is a simple inline method defined in compiler.hpp, but for debugging it's
+ * often convenient to keep it non-inline.
+ */
+
+#ifdef DEBUG
+
+void* Compiler::compGetMem(size_t sz, CompMemKind cmk)
+{
+#if 0
+#if SMALL_TREE_NODES
+ if (sz != TREE_NODE_SZ_SMALL &&
+ sz != TREE_NODE_SZ_LARGE && sz > 32)
+ {
+ printf("Alloc %3u bytes\n", sz);
+ }
+#else
+ if (sz != sizeof(GenTree) && sz > 32)
+ {
+ printf("Alloc %3u bytes\n", sz);
+ }
+#endif
+#endif // 0
+
+#if MEASURE_MEM_ALLOC
+ genMemStats.AddAlloc(sz, cmk);
+#endif
+
+ void* ptr = compAllocator->allocateMemory(sz);
+
+ // Verify that the current block is aligned. Only then will the next
+ // block allocated be on an aligned boundary.
+ assert((size_t(ptr) & (sizeof(size_t) - 1)) == 0);
+
+ return ptr;
+}
+
+#endif
+
+/*****************************************************************************/
+#ifdef DEBUG
+/*****************************************************************************/
+
+VarName Compiler::compVarName(regNumber reg, bool isFloatReg)
+{
+ if (isFloatReg)
+ {
+#if FEATURE_STACK_FP_X87
+ assert(reg < FP_STK_SIZE); // would like to have same assert as below but sometimes you get -1?
+#else
+ assert(genIsValidFloatReg(reg));
+#endif
+ }
+ else
+ {
+ assert(genIsValidReg(reg));
+ }
+
+ if ((info.compVarScopesCount > 0) && compCurBB && opts.varNames)
+ {
+ unsigned lclNum;
+ LclVarDsc* varDsc;
+
+ /* Look for the matching register */
+ for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
+ {
+ /* If the variable is not in a register, or not in the register we're looking for, quit. */
+ /* Also, if it is a compiler generated variable (i.e. slot# > info.compVarScopesCount), don't bother. */
+ if ((varDsc->lvRegister != 0) && (varDsc->lvRegNum == reg) && (varDsc->IsFloatRegType() || !isFloatReg) &&
+ (varDsc->lvSlotNum < info.compVarScopesCount))
+ {
+ /* check if variable in that register is live */
+ if (VarSetOps::IsMember(this, compCurLife, varDsc->lvVarIndex))
+ {
+ /* variable is live - find the corresponding slot */
+ VarScopeDsc* varScope =
+ compFindLocalVar(varDsc->lvSlotNum, compCurBB->bbCodeOffs, compCurBB->bbCodeOffsEnd);
+ if (varScope)
+ {
+ return varScope->vsdName;
+ }
+ }
+ }
+ }
+
+#ifdef LEGACY_BACKEND
+ // maybe var is marked dead, but still used (last use)
+ if (!isFloatReg && codeGen->regSet.rsUsedTree[reg] != NULL)
+ {
+ GenTreePtr nodePtr;
+
+ if (GenTree::OperIsUnary(codeGen->regSet.rsUsedTree[reg]->OperGet()))
+ {
+ assert(codeGen->regSet.rsUsedTree[reg]->gtOp.gtOp1 != NULL);
+ nodePtr = codeGen->regSet.rsUsedTree[reg]->gtOp.gtOp1;
+ }
+ else
+ {
+ nodePtr = codeGen->regSet.rsUsedTree[reg];
+ }
+
+ if ((nodePtr->gtOper == GT_REG_VAR) && (nodePtr->gtRegVar.gtRegNum == reg) &&
+ (nodePtr->gtRegVar.gtLclNum < info.compVarScopesCount))
+ {
+ VarScopeDsc* varScope =
+ compFindLocalVar(nodePtr->gtRegVar.gtLclNum, compCurBB->bbCodeOffs, compCurBB->bbCodeOffsEnd);
+ if (varScope)
+ return varScope->vsdName;
+ }
+ }
+#endif // LEGACY_BACKEND
+ }
+ return nullptr;
+}
+
+const char* Compiler::compRegVarName(regNumber reg, bool displayVar, bool isFloatReg)
+{
+
+#ifdef _TARGET_ARM_
+ isFloatReg = genIsValidFloatReg(reg);
+#endif
+
+ if (displayVar && (reg != REG_NA))
+ {
+ VarName varName = compVarName(reg, isFloatReg);
+
+ if (varName)
+ {
+ const int NAME_VAR_REG_BUFFER_LEN = 4 + 256 + 1;
+ static char nameVarReg[2][NAME_VAR_REG_BUFFER_LEN]; // to avoid overwriting the buffer when have 2
+ // consecutive calls before printing
+ static int index = 0; // for circular index into the name array
+
+ index = (index + 1) % 2; // circular reuse of index
+ sprintf_s(nameVarReg[index], NAME_VAR_REG_BUFFER_LEN, "%s'%s'", getRegName(reg, isFloatReg),
+ VarNameToStr(varName));
+
+ return nameVarReg[index];
+ }
+ }
+
+ /* no debug info required or no variable in that register
+ -> return standard name */
+
+ return getRegName(reg, isFloatReg);
+}
+
+#define MAX_REG_PAIR_NAME_LENGTH 10
+
+const char* Compiler::compRegPairName(regPairNo regPair)
+{
+ static char regNameLong[MAX_REG_PAIR_NAME_LENGTH];
+
+ if (regPair == REG_PAIR_NONE)
+ {
+ return "NA|NA";
+ }
+
+ assert(regPair >= REG_PAIR_FIRST && regPair <= REG_PAIR_LAST);
+
+ strcpy_s(regNameLong, sizeof(regNameLong), compRegVarName(genRegPairLo(regPair)));
+ strcat_s(regNameLong, sizeof(regNameLong), "|");
+ strcat_s(regNameLong, sizeof(regNameLong), compRegVarName(genRegPairHi(regPair)));
+ return regNameLong;
+}
+
+const char* Compiler::compRegNameForSize(regNumber reg, size_t size)
+{
+ if (size == 0 || size >= 4)
+ {
+ return compRegVarName(reg, true);
+ }
+
+ // clang-format off
+ static
+ const char * sizeNames[][2] =
+ {
+ { "al", "ax" },
+ { "cl", "cx" },
+ { "dl", "dx" },
+ { "bl", "bx" },
+#ifdef _TARGET_AMD64_
+ { "spl", "sp" }, // ESP
+ { "bpl", "bp" }, // EBP
+ { "sil", "si" }, // ESI
+ { "dil", "di" }, // EDI
+ { "r8b", "r8w" },
+ { "r9b", "r9w" },
+ { "r10b", "r10w" },
+ { "r11b", "r11w" },
+ { "r12b", "r12w" },
+ { "r13b", "r13w" },
+ { "r14b", "r14w" },
+ { "r15b", "r15w" },
+#endif // _TARGET_AMD64_
+ };
+ // clang-format on
+
+ assert(isByteReg(reg));
+ assert(genRegMask(reg) & RBM_BYTE_REGS);
+ assert(size == 1 || size == 2);
+
+ return sizeNames[reg][size - 1];
+}
+
+const char* Compiler::compFPregVarName(unsigned fpReg, bool displayVar)
+{
+ const int NAME_VAR_REG_BUFFER_LEN = 4 + 256 + 1;
+ static char nameVarReg[2][NAME_VAR_REG_BUFFER_LEN]; // to avoid overwriting the buffer when have 2 consecutive calls
+ // before printing
+ static int index = 0; // for circular index into the name array
+
+ index = (index + 1) % 2; // circular reuse of index
+
+#if FEATURE_STACK_FP_X87
+ /* 'fpReg' is the distance from the bottom of the stack, ie.
+ * it is independant of the current FP stack level
+ */
+
+ if (displayVar && codeGen->genFPregCnt)
+ {
+ assert(fpReg < FP_STK_SIZE);
+ assert(compCodeGenDone || (fpReg <= codeGen->compCurFPState.m_uStackSize));
+
+ int pos = codeGen->genFPregCnt - (fpReg + 1 - codeGen->genGetFPstkLevel());
+ if (pos >= 0)
+ {
+ VarName varName = compVarName((regNumber)pos, true);
+
+ if (varName)
+ {
+ sprintf_s(nameVarReg[index], NAME_VAR_REG_BUFFER_LEN, "ST(%d)'%s'", fpReg, VarNameToStr(varName));
+ return nameVarReg[index];
+ }
+ }
+ }
+#endif // FEATURE_STACK_FP_X87
+
+ /* no debug info required or no variable in that register
+ -> return standard name */
+
+ sprintf_s(nameVarReg[index], NAME_VAR_REG_BUFFER_LEN, "ST(%d)", fpReg);
+ return nameVarReg[index];
+}
+
+const char* Compiler::compLocalVarName(unsigned varNum, unsigned offs)
+{
+ unsigned i;
+ VarScopeDsc* t;
+
+ for (i = 0, t = info.compVarScopes; i < info.compVarScopesCount; i++, t++)
+ {
+ if (t->vsdVarNum != varNum)
+ {
+ continue;
+ }
+
+ if (offs >= t->vsdLifeBeg && offs < t->vsdLifeEnd)
+ {
+ return VarNameToStr(t->vsdName);
+ }
+ }
+
+ return nullptr;
+}
+
+/*****************************************************************************/
+#endif // DEBUG
+/*****************************************************************************/
+
+void Compiler::compSetProcessor()
+{
+ unsigned compileFlags = opts.eeFlags;
+
+#if defined(_TARGET_ARM_)
+ info.genCPU = CPU_ARM;
+#elif defined(_TARGET_AMD64_)
+ info.genCPU = CPU_X64;
+#elif defined(_TARGET_X86_)
+ if (compileFlags & CORJIT_FLG_TARGET_P4)
+ info.genCPU = CPU_X86_PENTIUM_4;
+ else
+ info.genCPU = CPU_X86;
+#endif
+
+ //
+ // Processor specific optimizations
+ //
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef _TARGET_AMD64_
+ opts.compUseFCOMI = false;
+ opts.compUseCMOV = true;
+ opts.compCanUseSSE2 = true;
+
+#ifdef FEATURE_AVX_SUPPORT
+ // COMPlus_EnableAVX can be used to disable using AVX if available on a target machine.
+ // Note that FEATURE_AVX_SUPPORT is not enabled for ctpjit
+ opts.compCanUseAVX = false;
+ if (((compileFlags & CORJIT_FLG_PREJIT) == 0) && ((compileFlags & CORJIT_FLG_USE_AVX2) != 0))
+ {
+ if (JitConfig.EnableAVX() != 0)
+ {
+ opts.compCanUseAVX = true;
+ if (!compIsForInlining())
+ {
+ codeGen->getEmitter()->SetUseAVX(true);
+ }
+ }
+ }
+#endif
+#endif //_TARGET_AMD64_
+
+#ifdef _TARGET_X86_
+ opts.compUseFCOMI = ((opts.eeFlags & CORJIT_FLG_USE_FCOMI) != 0);
+ opts.compUseCMOV = ((opts.eeFlags & CORJIT_FLG_USE_CMOV) != 0);
+ opts.compCanUseSSE2 = ((opts.eeFlags & CORJIT_FLG_USE_SSE2) != 0);
+
+#ifdef DEBUG
+ if (opts.compUseFCOMI)
+ opts.compUseFCOMI = !compStressCompile(STRESS_USE_FCOMI, 50);
+ if (opts.compUseCMOV)
+ opts.compUseCMOV = !compStressCompile(STRESS_USE_CMOV, 50);
+
+ // Should we override the SSE2 setting
+ enum
+ {
+ SSE2_FORCE_DISABLE = 0,
+ SSE2_FORCE_USE = 1,
+ SSE2_FORCE_INVALID = -1
+ };
+
+ if (JitConfig.JitCanUseSSE2() == SSE2_FORCE_DISABLE)
+ opts.compCanUseSSE2 = false;
+ else if (JitConfig.JitCanUseSSE2() == SSE2_FORCE_USE)
+ opts.compCanUseSSE2 = true;
+ else if (opts.compCanUseSSE2)
+ opts.compCanUseSSE2 = !compStressCompile(STRESS_GENERIC_VARN, 50);
+#endif // DEBUG
+#endif // _TARGET_X86_
+}
+
+#ifdef PROFILING_SUPPORTED
+// A Dummy routine to receive Enter/Leave/Tailcall profiler callbacks.
+// These are used when complus_JitEltHookEnabled=1
+#ifdef _TARGET_AMD64_
+void DummyProfilerELTStub(UINT_PTR ProfilerHandle, UINT_PTR callerSP)
+{
+ return;
+}
+#else //! _TARGET_AMD64_
+void DummyProfilerELTStub(UINT_PTR ProfilerHandle)
+{
+ return;
+}
+#endif //!_TARGET_AMD64_
+
+#endif // PROFILING_SUPPORTED
+
+bool Compiler::compIsFullTrust()
+{
+ return (info.compCompHnd->canSkipMethodVerification(info.compMethodHnd) == CORINFO_VERIFICATION_CAN_SKIP);
+}
+
+bool Compiler::compShouldThrowOnNoway(
+#ifdef FEATURE_TRACELOGGING
+ const char* filename, unsigned line
+#endif
+ )
+{
+#ifdef FEATURE_TRACELOGGING
+ compJitTelemetry.NotifyNowayAssert(filename, line);
+#endif
+ // In min opts, we don't want the noway assert to go through the exception
+ // path. Instead we want it to just silently go through codegen for
+ // compat reasons.
+ // If we are not in full trust, we should always fire for security.
+ return !opts.MinOpts() || !compIsFullTrust();
+}
+
+// ConfigInteger does not offer an option for decimal flags. Any numbers are interpreted as hex.
+// I could add the decimal option to ConfigInteger or I could write a function to reinterpret this
+// value as the user intended.
+unsigned ReinterpretHexAsDecimal(unsigned in)
+{
+ // ex: in: 0x100 returns: 100
+ unsigned result = 0;
+ unsigned index = 1;
+
+ // default value
+ if (in == INT_MAX)
+ {
+ return in;
+ }
+
+ while (in)
+ {
+ unsigned digit = in % 16;
+ in >>= 4;
+ assert(digit < 10);
+ result += digit * index;
+ index *= 10;
+ }
+ return result;
+}
+
+void Compiler::compInitOptions(CORJIT_FLAGS* jitFlags)
+{
+#ifdef UNIX_AMD64_ABI
+ opts.compNeedToAlignFrame = false;
+#endif // UNIX_AMD64_ABI
+ memset(&opts, 0, sizeof(opts));
+
+ unsigned compileFlags = jitFlags->corJitFlags;
+
+ if (compIsForInlining())
+ {
+ assert((compileFlags & CORJIT_FLG_LOST_WHEN_INLINING) == 0);
+ assert(compileFlags & CORJIT_FLG_SKIP_VERIFICATION);
+ }
+
+ opts.jitFlags = jitFlags;
+ opts.eeFlags = compileFlags;
+ opts.compFlags = CLFLG_MAXOPT; // Default value is for full optimization
+
+ if (opts.eeFlags & (CORJIT_FLG_DEBUG_CODE | CORJIT_FLG_MIN_OPT))
+ {
+ opts.compFlags = CLFLG_MINOPT;
+ }
+ // Don't optimize .cctors (except prejit) or if we're an inlinee
+ else if (!(opts.eeFlags & CORJIT_FLG_PREJIT) && ((info.compFlags & FLG_CCTOR) == FLG_CCTOR) && !compIsForInlining())
+ {
+ opts.compFlags = CLFLG_MINOPT;
+ }
+
+ // Default value is to generate a blend of size and speed optimizations
+ //
+ opts.compCodeOpt = BLENDED_CODE;
+
+ // If the EE sets SIZE_OPT or if we are compiling a Class constructor
+ // we will optimize for code size at the expense of speed
+ //
+ if ((opts.eeFlags & CORJIT_FLG_SIZE_OPT) || ((info.compFlags & FLG_CCTOR) == FLG_CCTOR))
+ {
+ opts.compCodeOpt = SMALL_CODE;
+ }
+ //
+ // If the EE sets SPEED_OPT we will optimize for speed at the expense of code size
+ //
+ else if (opts.eeFlags & CORJIT_FLG_SPEED_OPT)
+ {
+ opts.compCodeOpt = FAST_CODE;
+ assert((opts.eeFlags & CORJIT_FLG_SIZE_OPT) == 0);
+ }
+
+//-------------------------------------------------------------------------
+
+#ifdef DEBUGGING_SUPPORT
+ opts.compDbgCode = (opts.eeFlags & CORJIT_FLG_DEBUG_CODE) != 0;
+ opts.compDbgInfo = (opts.eeFlags & CORJIT_FLG_DEBUG_INFO) != 0;
+ opts.compDbgEnC = (opts.eeFlags & CORJIT_FLG_DEBUG_EnC) != 0;
+#if REGEN_SHORTCUTS || REGEN_CALLPAT
+ // We never want to have debugging enabled when regenerating GC encoding patterns
+ opts.compDbgCode = false;
+ opts.compDbgInfo = false;
+ opts.compDbgEnC = false;
+#endif
+#endif
+
+ compSetProcessor();
+
+#ifdef DEBUG
+ opts.dspOrder = false;
+ if (compIsForInlining())
+ {
+ verbose = impInlineInfo->InlinerCompiler->verbose;
+ }
+ else
+ {
+ verbose = false;
+ codeGen->setVerbose(false);
+ }
+ verboseTrees = verbose && shouldUseVerboseTrees();
+ verboseSsa = verbose && shouldUseVerboseSsa();
+ asciiTrees = shouldDumpASCIITrees();
+ opts.dspDiffable = compIsForInlining() ? impInlineInfo->InlinerCompiler->opts.dspDiffable : false;
+#endif
+
+ opts.compNeedSecurityCheck = false;
+ opts.altJit = false;
+
+#if defined(LATE_DISASM) && !defined(DEBUG)
+ // For non-debug builds with the late disassembler built in, we currently always do late disassembly
+ // (we have no way to determine when not to, since we don't have class/method names).
+ // In the DEBUG case, this is initialized to false, below.
+ opts.doLateDisasm = true;
+#endif
+
+#ifdef DEBUG
+
+ const JitConfigValues::MethodSet* pfAltJit;
+ if (opts.eeFlags & CORJIT_FLG_PREJIT)
+ {
+ pfAltJit = &JitConfig.AltJitNgen();
+ }
+ else
+ {
+ pfAltJit = &JitConfig.AltJit();
+ }
+
+#ifdef ALT_JIT
+ if (pfAltJit->contains(info.compMethodName, info.compClassName, &info.compMethodInfo->args))
+ {
+ opts.altJit = true;
+ }
+
+ unsigned altJitLimit = ReinterpretHexAsDecimal(JitConfig.AltJitLimit());
+ if (altJitLimit > 0 && Compiler::jitTotalMethodCompiled >= altJitLimit)
+ {
+ opts.altJit = false;
+ }
+#endif // ALT_JIT
+
+#else // !DEBUG
+
+ const char* altJitVal;
+ if (opts.eeFlags & CORJIT_FLG_PREJIT)
+ {
+ altJitVal = JitConfig.AltJitNgen().list();
+ }
+ else
+ {
+ altJitVal = JitConfig.AltJit().list();
+ }
+
+#ifdef ALT_JIT
+ // In release mode, you either get all methods or no methods. You must use "*" as the parameter, or we ignore it.
+ // You don't get to give a regular expression of methods to match.
+ // (Partially, this is because we haven't computed and stored the method and class name except in debug, and it
+ // might be expensive to do so.)
+ if ((altJitVal != nullptr) && (strcmp(altJitVal, "*") == 0))
+ {
+ opts.altJit = true;
+ }
+#endif // ALT_JIT
+
+#endif // !DEBUG
+
+#ifdef ALT_JIT
+ // Take care of COMPlus_AltJitExcludeAssemblies.
+ if (opts.altJit)
+ {
+ // First, initialize the AltJitExcludeAssemblies list, but only do it once.
+ if (!s_pAltJitExcludeAssembliesListInitialized)
+ {
+ const wchar_t* wszAltJitExcludeAssemblyList = JitConfig.AltJitExcludeAssemblies();
+ if (wszAltJitExcludeAssemblyList != nullptr)
+ {
+ // NOTE: The Assembly name list is allocated in the process heap, not in the no-release heap, which is
+ // reclaimed
+ // for every compilation. This is ok because we only allocate once, due to the static.
+ s_pAltJitExcludeAssembliesList = new (HostAllocator::getHostAllocator())
+ AssemblyNamesList2(wszAltJitExcludeAssemblyList, HostAllocator::getHostAllocator());
+ }
+ s_pAltJitExcludeAssembliesListInitialized = true;
+ }
+
+ if (s_pAltJitExcludeAssembliesList != nullptr)
+ {
+ // We have an exclusion list. See if this method is in an assembly that is on the list.
+ // Note that we check this for every method, since we might inline across modules, and
+ // if the inlinee module is on the list, we don't want to use the altjit for it.
+ const char* methodAssemblyName = info.compCompHnd->getAssemblyName(
+ info.compCompHnd->getModuleAssembly(info.compCompHnd->getClassModule(info.compClassHnd)));
+ if (s_pAltJitExcludeAssembliesList->IsInList(methodAssemblyName))
+ {
+ opts.altJit = false;
+ }
+ }
+ }
+#endif // ALT_JIT
+
+#ifdef DEBUG
+
+ bool altJitConfig = !pfAltJit->isEmpty();
+
+ // If we have a non-empty AltJit config then we change all of these other
+ // config values to refer only to the AltJit. Otherwise, a lot of COMPlus_* variables
+ // would apply to both the altjit and the normal JIT, but we only care about
+ // debugging the altjit if the COMPlus_AltJit configuration is set.
+ //
+ if (compIsForImportOnly() && (!altJitConfig || opts.altJit))
+ {
+ if (JitConfig.JitImportBreak().contains(info.compMethodName, info.compClassName, &info.compMethodInfo->args))
+ {
+ assert(!"JitImportBreak reached");
+ }
+ }
+
+ bool verboseDump = false;
+ bool dumpIR = false;
+ bool dumpIRTypes = false;
+ bool dumpIRLocals = false;
+ bool dumpIRRegs = false;
+ bool dumpIRSsa = false;
+ bool dumpIRValnums = false;
+ bool dumpIRCosts = false;
+ bool dumpIRFlags = false;
+ bool dumpIRKinds = false;
+ bool dumpIRNodes = false;
+ bool dumpIRNoLists = false;
+ bool dumpIRNoLeafs = false;
+ bool dumpIRNoStmts = false;
+ bool dumpIRTrees = false;
+ bool dumpIRLinear = false;
+ bool dumpIRDataflow = false;
+ bool dumpIRBlockHeaders = false;
+ bool dumpIRExit = false;
+ LPCWSTR dumpIRPhase = nullptr;
+ LPCWSTR dumpIRFormat = nullptr;
+
+ if (!altJitConfig || opts.altJit)
+ {
+ LPCWSTR dumpIRFormat = nullptr;
+
+ // We should only enable 'verboseDump' when we are actually compiling a matching method
+ // and not enable it when we are just considering inlining a matching method.
+ //
+ if (!compIsForInlining())
+ {
+ if (opts.eeFlags & CORJIT_FLG_PREJIT)
+ {
+ if (JitConfig.NgenDump().contains(info.compMethodName, info.compClassName, &info.compMethodInfo->args))
+ {
+ verboseDump = true;
+ }
+ unsigned ngenHashDumpVal = (unsigned)JitConfig.NgenHashDump();
+ if ((ngenHashDumpVal != (DWORD)-1) && (ngenHashDumpVal == info.compMethodHash()))
+ {
+ verboseDump = true;
+ }
+ if (JitConfig.NgenDumpIR().contains(info.compMethodName, info.compClassName,
+ &info.compMethodInfo->args))
+ {
+ dumpIR = true;
+ }
+ unsigned ngenHashDumpIRVal = (unsigned)JitConfig.NgenHashDumpIR();
+ if ((ngenHashDumpIRVal != (DWORD)-1) && (ngenHashDumpIRVal == info.compMethodHash()))
+ {
+ dumpIR = true;
+ }
+ dumpIRFormat = JitConfig.NgenDumpIRFormat();
+ dumpIRPhase = JitConfig.NgenDumpIRPhase();
+ }
+ else
+ {
+ if (JitConfig.JitDump().contains(info.compMethodName, info.compClassName, &info.compMethodInfo->args))
+ {
+ verboseDump = true;
+ }
+ unsigned jitHashDumpVal = (unsigned)JitConfig.JitHashDump();
+ if ((jitHashDumpVal != (DWORD)-1) && (jitHashDumpVal == info.compMethodHash()))
+ {
+ verboseDump = true;
+ }
+ if (JitConfig.JitDumpIR().contains(info.compMethodName, info.compClassName, &info.compMethodInfo->args))
+ {
+ dumpIR = true;
+ }
+ unsigned jitHashDumpIRVal = (unsigned)JitConfig.JitHashDumpIR();
+ if ((jitHashDumpIRVal != (DWORD)-1) && (jitHashDumpIRVal == info.compMethodHash()))
+ {
+ dumpIR = true;
+ }
+ dumpIRFormat = JitConfig.JitDumpIRFormat();
+ dumpIRPhase = JitConfig.JitDumpIRPhase();
+ }
+ }
+
+ if (dumpIRPhase == nullptr)
+ {
+ dumpIRPhase = W("*");
+ }
+
+ this->dumpIRPhase = dumpIRPhase;
+
+ if (dumpIRFormat != nullptr)
+ {
+ this->dumpIRFormat = dumpIRFormat;
+ }
+
+ dumpIRTrees = false;
+ dumpIRLinear = true;
+ if (dumpIRFormat != nullptr)
+ {
+ for (LPCWSTR p = dumpIRFormat; (*p != 0);)
+ {
+ for (; (*p != 0); p++)
+ {
+ if (*p != L' ')
+ {
+ break;
+ }
+ }
+
+ if (*p == 0)
+ {
+ break;
+ }
+
+ static bool dumpedHelp = false;
+
+ if ((*p == L'?') && (!dumpedHelp))
+ {
+ printf("*******************************************************************************\n");
+ printf("\n");
+ dFormatIR();
+ printf("\n");
+ printf("\n");
+ printf("Available specifiers (comma separated):\n");
+ printf("\n");
+ printf("? dump out value of COMPlus_JitDumpIRFormat and this list of values\n");
+ printf("\n");
+ printf("linear linear IR dump (default)\n");
+ printf("tree tree IR dump (traditional)\n");
+ printf("mixed intermingle tree dump with linear IR dump\n");
+ printf("\n");
+ printf("dataflow use data flow form of linear IR dump\n");
+ printf("structural use structural form of linear IR dump\n");
+ printf("all implies structural, include everything\n");
+ printf("\n");
+ printf("kinds include tree node kinds in dump, example: \"kinds=[LEAF][LOCAL]\"\n");
+ printf("flags include tree node flags in dump, example: \"flags=[CALL][GLOB_REF]\" \n");
+ printf("types includes tree node types in dump, example: \".int\"\n");
+ printf("locals include local numbers and tracking numbers in dump, example: \"(V3,T1)\"\n");
+ printf("regs include register assignments in dump, example: \"(rdx)\"\n");
+ printf("ssa include SSA numbers in dump, example: \"<d:3>\" or \"<u:3>\"\n");
+ printf("valnums include Value numbers in dump, example: \"<v:$c4>\" or \"<v:$c4,$c5>\"\n");
+ printf("\n");
+ printf("nolist exclude GT_LIST nodes from dump\n");
+ printf("noleafs exclude LEAF nodes from dump (fold into operations)\n");
+ printf("nostmts exclude GT_STMTS from dump (unless required by dependencies)\n");
+ printf("\n");
+ printf("blkhdrs include block headers\n");
+ printf("exit exit program after last phase dump (used with single method)\n");
+ printf("\n");
+ printf("*******************************************************************************\n");
+ dumpedHelp = true;
+ }
+
+ if (wcsncmp(p, W("types"), 5) == 0)
+ {
+ dumpIRTypes = true;
+ }
+
+ if (wcsncmp(p, W("locals"), 6) == 0)
+ {
+ dumpIRLocals = true;
+ }
+
+ if (wcsncmp(p, W("regs"), 4) == 0)
+ {
+ dumpIRRegs = true;
+ }
+
+ if (wcsncmp(p, W("ssa"), 3) == 0)
+ {
+ dumpIRSsa = true;
+ }
+
+ if (wcsncmp(p, W("valnums"), 7) == 0)
+ {
+ dumpIRValnums = true;
+ }
+
+ if (wcsncmp(p, W("costs"), 5) == 0)
+ {
+ dumpIRCosts = true;
+ }
+
+ if (wcsncmp(p, W("flags"), 5) == 0)
+ {
+ dumpIRFlags = true;
+ }
+
+ if (wcsncmp(p, W("kinds"), 5) == 0)
+ {
+ dumpIRKinds = true;
+ }
+
+ if (wcsncmp(p, W("nodes"), 5) == 0)
+ {
+ dumpIRNodes = true;
+ }
+
+ if (wcsncmp(p, W("exit"), 4) == 0)
+ {
+ dumpIRExit = true;
+ }
+
+ if (wcsncmp(p, W("nolists"), 7) == 0)
+ {
+ dumpIRNoLists = true;
+ }
+
+ if (wcsncmp(p, W("noleafs"), 7) == 0)
+ {
+ dumpIRNoLeafs = true;
+ }
+
+ if (wcsncmp(p, W("nostmts"), 7) == 0)
+ {
+ dumpIRNoStmts = true;
+ }
+
+ if (wcsncmp(p, W("trees"), 5) == 0)
+ {
+ dumpIRTrees = true;
+ dumpIRLinear = false;
+ }
+
+ if (wcsncmp(p, W("structural"), 10) == 0)
+ {
+ dumpIRLinear = true;
+ dumpIRNoStmts = false;
+ dumpIRNoLeafs = false;
+ dumpIRNoLists = false;
+ }
+
+ if (wcsncmp(p, W("all"), 3) == 0)
+ {
+ dumpIRLinear = true;
+ dumpIRKinds = true;
+ dumpIRFlags = true;
+ dumpIRTypes = true;
+ dumpIRLocals = true;
+ dumpIRRegs = true;
+ dumpIRSsa = true;
+ dumpIRValnums = true;
+ dumpIRCosts = true;
+ dumpIRNoStmts = false;
+ dumpIRNoLeafs = false;
+ dumpIRNoLists = false;
+ }
+
+ if (wcsncmp(p, W("linear"), 6) == 0)
+ {
+ dumpIRTrees = false;
+ dumpIRLinear = true;
+ }
+
+ if (wcsncmp(p, W("mixed"), 5) == 0)
+ {
+ dumpIRTrees = true;
+ dumpIRLinear = true;
+ }
+
+ if (wcsncmp(p, W("dataflow"), 8) == 0)
+ {
+ dumpIRDataflow = true;
+ dumpIRNoLeafs = true;
+ dumpIRNoLists = true;
+ dumpIRNoStmts = true;
+ }
+
+ if (wcsncmp(p, W("blkhdrs"), 7) == 0)
+ {
+ dumpIRBlockHeaders = true;
+ }
+
+ for (; (*p != 0); p++)
+ {
+ if (*p == L',')
+ {
+ p++;
+ break;
+ }
+ }
+ }
+ }
+ }
+
+ if (verboseDump)
+ {
+ verbose = true;
+ }
+
+ if (dumpIR)
+ {
+ this->dumpIR = true;
+ }
+
+ if (dumpIRTypes)
+ {
+ this->dumpIRTypes = true;
+ }
+
+ if (dumpIRLocals)
+ {
+ this->dumpIRLocals = true;
+ }
+
+ if (dumpIRRegs)
+ {
+ this->dumpIRRegs = true;
+ }
+
+ if (dumpIRSsa)
+ {
+ this->dumpIRSsa = true;
+ }
+
+ if (dumpIRValnums)
+ {
+ this->dumpIRValnums = true;
+ }
+
+ if (dumpIRCosts)
+ {
+ this->dumpIRCosts = true;
+ }
+
+ if (dumpIRFlags)
+ {
+ this->dumpIRFlags = true;
+ }
+
+ if (dumpIRKinds)
+ {
+ this->dumpIRKinds = true;
+ }
+
+ if (dumpIRNodes)
+ {
+ this->dumpIRNodes = true;
+ }
+
+ if (dumpIRNoLists)
+ {
+ this->dumpIRNoLists = true;
+ }
+
+ if (dumpIRNoLeafs)
+ {
+ this->dumpIRNoLeafs = true;
+ }
+
+ if (dumpIRNoLeafs && dumpIRDataflow)
+ {
+ this->dumpIRDataflow = true;
+ }
+
+ if (dumpIRNoStmts)
+ {
+ this->dumpIRNoStmts = true;
+ }
+
+ if (dumpIRTrees)
+ {
+ this->dumpIRTrees = true;
+ }
+
+ if (dumpIRLinear)
+ {
+ this->dumpIRLinear = true;
+ }
+
+ if (dumpIRBlockHeaders)
+ {
+ this->dumpIRBlockHeaders = true;
+ }
+
+ if (dumpIRExit)
+ {
+ this->dumpIRExit = true;
+ }
+
+#endif // DEBUG
+
+#ifdef FEATURE_SIMD
+#ifdef _TARGET_AMD64_
+ // Minimum bar for availing SIMD benefits is SSE2 on AMD64.
+ featureSIMD = ((opts.eeFlags & CORJIT_FLG_FEATURE_SIMD) != 0);
+#endif // _TARGET_AMD64_
+#endif // FEATURE_SIMD
+
+ if (compIsForInlining() || compIsForImportOnly())
+ {
+ return;
+ }
+ // The rest of the opts fields that we initialize here
+ // should only be used when we generate code for the method
+ // They should not be used when importing or inlining
+
+ opts.genFPorder = true;
+ opts.genFPopt = true;
+
+ opts.instrCount = 0;
+ opts.lvRefCount = 0;
+
+#if FEATURE_TAILCALL_OPT
+ // By default opportunistic tail call optimization is enabled
+ opts.compTailCallOpt = true;
+ opts.compTailCallLoopOpt = true;
+#endif
+
+#ifdef DEBUG
+ opts.dspInstrs = false;
+ opts.dspEmit = false;
+ opts.dspLines = false;
+ opts.varNames = false;
+ opts.dmpHex = false;
+ opts.disAsm = false;
+ opts.disAsmSpilled = false;
+ opts.disDiffable = false;
+ opts.dspCode = false;
+ opts.dspEHTable = false;
+ opts.dspGCtbls = false;
+ opts.disAsm2 = false;
+ opts.dspUnwind = false;
+ s_dspMemStats = false;
+ opts.compLongAddress = false;
+ opts.compJitELTHookEnabled = false;
+
+#ifdef LATE_DISASM
+ opts.doLateDisasm = false;
+#endif // LATE_DISASM
+
+ compDebugBreak = false;
+
+ // If we have a non-empty AltJit config then we change all of these other
+ // config values to refer only to the AltJit.
+ //
+ if (!altJitConfig || opts.altJit)
+ {
+ if (opts.eeFlags & CORJIT_FLG_PREJIT)
+ {
+ if ((JitConfig.NgenOrder() & 1) == 1)
+ {
+ opts.dspOrder = true;
+ }
+
+ if (JitConfig.NgenGCDump().contains(info.compMethodName, info.compClassName, &info.compMethodInfo->args))
+ {
+ opts.dspGCtbls = true;
+ }
+
+ if (JitConfig.NgenDisasm().contains(info.compMethodName, info.compClassName, &info.compMethodInfo->args))
+ {
+ opts.disAsm = true;
+ }
+ if (JitConfig.NgenDisasm().contains("SPILLED", nullptr, nullptr))
+ {
+ opts.disAsmSpilled = true;
+ }
+
+ if (JitConfig.NgenUnwindDump().contains(info.compMethodName, info.compClassName,
+ &info.compMethodInfo->args))
+ {
+ opts.dspUnwind = true;
+ }
+
+ if (JitConfig.NgenEHDump().contains(info.compMethodName, info.compClassName, &info.compMethodInfo->args))
+ {
+ opts.dspEHTable = true;
+ }
+ }
+ else
+ {
+ if ((JitConfig.JitOrder() & 1) == 1)
+ {
+ opts.dspOrder = true;
+ }
+
+ if (JitConfig.JitGCDump().contains(info.compMethodName, info.compClassName, &info.compMethodInfo->args))
+ {
+ opts.dspGCtbls = true;
+ }
+
+ if (JitConfig.JitDisasm().contains(info.compMethodName, info.compClassName, &info.compMethodInfo->args))
+ {
+ opts.disAsm = true;
+ }
+
+ if (JitConfig.JitDisasm().contains("SPILLED", nullptr, nullptr))
+ {
+ opts.disAsmSpilled = true;
+ }
+
+ if (JitConfig.JitUnwindDump().contains(info.compMethodName, info.compClassName, &info.compMethodInfo->args))
+ {
+ opts.dspUnwind = true;
+ }
+
+ if (JitConfig.JitEHDump().contains(info.compMethodName, info.compClassName, &info.compMethodInfo->args))
+ {
+ opts.dspEHTable = true;
+ }
+ }
+
+#ifdef LATE_DISASM
+ if (JitConfig.JitLateDisasm().contains(info.compMethodName, info.compClassName, &info.compMethodInfo->args))
+ opts.doLateDisasm = true;
+#endif // LATE_DISASM
+
+ // This one applies to both Ngen/Jit Disasm output: COMPlus_JitDiffableDasm=1
+ if (JitConfig.DiffableDasm() != 0)
+ {
+ opts.disDiffable = true;
+ opts.dspDiffable = true;
+ }
+
+ if (JitConfig.DisplayMemStats() != 0)
+ {
+ s_dspMemStats = true;
+ }
+
+ if (JitConfig.JitLongAddress() != 0)
+ {
+ opts.compLongAddress = true;
+ }
+ }
+
+ if (verboseDump)
+ {
+ opts.dspCode = true;
+ opts.dspEHTable = true;
+ opts.dspGCtbls = true;
+ opts.disAsm2 = true;
+ opts.dspUnwind = true;
+ verbose = true;
+ verboseTrees = shouldUseVerboseTrees();
+ verboseSsa = shouldUseVerboseSsa();
+ codeGen->setVerbose(true);
+ }
+
+ treesBeforeAfterMorph = (JitConfig.TreesBeforeAfterMorph() == 1);
+ morphNum = 0; // Initialize the morphed-trees counting.
+
+ expensiveDebugCheckLevel = JitConfig.JitExpensiveDebugCheckLevel();
+ if (expensiveDebugCheckLevel == 0)
+ {
+ // If we're in a stress mode that modifies the flowgraph, make 1 the default.
+ if (fgStressBBProf() || compStressCompile(STRESS_DO_WHILE_LOOPS, 30))
+ {
+ expensiveDebugCheckLevel = 1;
+ }
+ }
+
+ if (verbose)
+ {
+ printf("****** START compiling %s (MethodHash=%08x)\n", info.compFullName, info.compMethodHash());
+ printf("Generating code for %s %s\n", Target::g_tgtPlatformName, Target::g_tgtCPUName);
+ printf(""); // in our logic this causes a flush
+ }
+
+ if (JitConfig.JitBreak().contains(info.compMethodName, info.compClassName, &info.compMethodInfo->args))
+ {
+ assert(!"JitBreak reached");
+ }
+
+ unsigned jitHashBreakVal = (unsigned)JitConfig.JitHashBreak();
+ if ((jitHashBreakVal != (DWORD)-1) && (jitHashBreakVal == info.compMethodHash()))
+ {
+ assert(!"JitHashBreak reached");
+ }
+
+ if (verbose ||
+ JitConfig.JitDebugBreak().contains(info.compMethodName, info.compClassName, &info.compMethodInfo->args) ||
+ JitConfig.JitBreak().contains(info.compMethodName, info.compClassName, &info.compMethodInfo->args))
+ {
+ compDebugBreak = true;
+ }
+
+ memset(compActiveStressModes, 0, sizeof(compActiveStressModes));
+
+#endif // DEBUG
+
+//-------------------------------------------------------------------------
+
+#ifdef DEBUGGING_SUPPORT
+#ifdef DEBUG
+ assert(!codeGen->isGCTypeFixed());
+ opts.compGcChecks = (JitConfig.JitGCChecks() != 0) || compStressCompile(STRESS_GENERIC_VARN, 5);
+
+ enum
+ {
+ STACK_CHECK_ON_RETURN = 0x1,
+ STACK_CHECK_ON_CALL = 0x2,
+ STACK_CHECK_ALL = 0x3,
+ };
+
+ DWORD dwJitStackChecks = JitConfig.JitStackChecks();
+ if (compStressCompile(STRESS_GENERIC_VARN, 5))
+ {
+ dwJitStackChecks = STACK_CHECK_ALL;
+ }
+ opts.compStackCheckOnRet = (dwJitStackChecks & DWORD(STACK_CHECK_ON_RETURN)) != 0;
+ opts.compStackCheckOnCall = (dwJitStackChecks & DWORD(STACK_CHECK_ON_CALL)) != 0;
+#endif
+
+#ifdef PROFILING_SUPPORTED
+ opts.compNoPInvokeInlineCB = (opts.eeFlags & CORJIT_FLG_PROF_NO_PINVOKE_INLINE) ? true : false;
+
+ // Cache the profiler handle
+ if (opts.eeFlags & CORJIT_FLG_PROF_ENTERLEAVE)
+ {
+ BOOL hookNeeded;
+ BOOL indirected;
+ info.compCompHnd->GetProfilingHandle(&hookNeeded, &compProfilerMethHnd, &indirected);
+ compProfilerHookNeeded = !!hookNeeded;
+ compProfilerMethHndIndirected = !!indirected;
+ }
+ else
+ {
+ compProfilerHookNeeded = false;
+ compProfilerMethHnd = nullptr;
+ compProfilerMethHndIndirected = false;
+ }
+
+#if defined(_TARGET_ARM_) || defined(_TARGET_AMD64_)
+ // Right now this ELT hook option is enabled only for arm and amd64
+
+ // Honour complus_JitELTHookEnabled only if VM has not asked us to generate profiler
+ // hooks in the first place. That is, Override VM only if it hasn't asked for a
+ // profiler callback for this method.
+ if (!compProfilerHookNeeded && (JitConfig.JitELTHookEnabled() != 0))
+ {
+ opts.compJitELTHookEnabled = true;
+ }
+
+ // TBD: Exclude PInvoke stubs
+ if (opts.compJitELTHookEnabled)
+ {
+ compProfilerMethHnd = (void*)DummyProfilerELTStub;
+ compProfilerMethHndIndirected = false;
+ }
+#endif // _TARGET_ARM_ || _TARGET_AMD64_
+
+#endif // PROFILING_SUPPORTED
+
+#if FEATURE_TAILCALL_OPT
+ const wchar_t* strTailCallOpt = JitConfig.TailCallOpt();
+ if (strTailCallOpt != nullptr)
+ {
+ opts.compTailCallOpt = (UINT)_wtoi(strTailCallOpt) != 0;
+ }
+
+ if (JitConfig.TailCallLoopOpt() == 0)
+ {
+ opts.compTailCallLoopOpt = false;
+ }
+#endif
+
+ opts.compMustInlinePInvokeCalli = (opts.eeFlags & CORJIT_FLG_IL_STUB) ? true : false;
+
+ opts.compScopeInfo = opts.compDbgInfo;
+#endif // DEBUGGING_SUPPORT
+
+#ifdef LATE_DISASM
+ codeGen->getDisAssembler().disOpenForLateDisAsm(info.compMethodName, info.compClassName,
+ info.compMethodInfo->args.pSig);
+#endif
+
+//-------------------------------------------------------------------------
+
+#if RELOC_SUPPORT
+ opts.compReloc = (opts.eeFlags & CORJIT_FLG_RELOC) ? true : false;
+#endif
+
+#ifdef DEBUG
+#if defined(_TARGET_XARCH_) && !defined(LEGACY_BACKEND)
+ // Whether encoding of absolute addr as PC-rel offset is enabled in RyuJIT
+ opts.compEnablePCRelAddr = (JitConfig.EnablePCRelAddr() != 0);
+#endif
+#endif // DEBUG
+
+ opts.compProcedureSplitting = (opts.eeFlags & CORJIT_FLG_PROCSPLIT) ? true : false;
+
+#ifdef _TARGET_ARM64_
+ // TODO-ARM64-NYI: enable hot/cold splitting
+ opts.compProcedureSplitting = false;
+#endif // _TARGET_ARM64_
+
+#ifdef DEBUG
+ opts.compProcedureSplittingEH = opts.compProcedureSplitting;
+#endif // DEBUG
+
+ if (opts.compProcedureSplitting)
+ {
+ // Note that opts.compdbgCode is true under ngen for checked assemblies!
+ opts.compProcedureSplitting = !opts.compDbgCode;
+
+#ifdef DEBUG
+ // JitForceProcedureSplitting is used to force procedure splitting on checked assemblies.
+ // This is useful for debugging on a checked build. Note that we still only do procedure
+ // splitting in the zapper.
+ if (JitConfig.JitForceProcedureSplitting().contains(info.compMethodName, info.compClassName,
+ &info.compMethodInfo->args))
+ {
+ opts.compProcedureSplitting = true;
+ }
+
+ // JitNoProcedureSplitting will always disable procedure splitting.
+ if (JitConfig.JitNoProcedureSplitting().contains(info.compMethodName, info.compClassName,
+ &info.compMethodInfo->args))
+ {
+ opts.compProcedureSplitting = false;
+ }
+ //
+ // JitNoProcedureSplittingEH will disable procedure splitting in functions with EH.
+ if (JitConfig.JitNoProcedureSplittingEH().contains(info.compMethodName, info.compClassName,
+ &info.compMethodInfo->args))
+ {
+ opts.compProcedureSplittingEH = false;
+ }
+#endif
+ }
+
+ fgProfileBuffer = nullptr;
+ fgProfileData_ILSizeMismatch = false;
+ fgNumProfileRuns = 0;
+ if (opts.eeFlags & CORJIT_FLG_BBOPT)
+ {
+ assert(!compIsForInlining());
+ HRESULT hr;
+ hr = info.compCompHnd->getBBProfileData(info.compMethodHnd, &fgProfileBufferCount, &fgProfileBuffer,
+ &fgNumProfileRuns);
+
+ // a failed result that also has a non-NULL fgProfileBuffer
+ // indicates that the ILSize for the method no longer matches
+ // the ILSize for the method when profile data was collected.
+ //
+ // We will discard the IBC data in this case
+ //
+ if (FAILED(hr) && (fgProfileBuffer != nullptr))
+ {
+ fgProfileData_ILSizeMismatch = true;
+ fgProfileBuffer = nullptr;
+ }
+#ifdef DEBUG
+ // A successful result implies a non-NULL fgProfileBuffer
+ //
+ if (SUCCEEDED(hr))
+ {
+ assert(fgProfileBuffer != nullptr);
+ }
+
+ // A failed result implies a NULL fgProfileBuffer
+ // see implementation of Compiler::fgHaveProfileData()
+ //
+ if (FAILED(hr))
+ {
+ assert(fgProfileBuffer == nullptr);
+ }
+#endif
+ }
+
+ opts.compNeedStackProbes = false;
+
+#ifdef DEBUG
+ if (JitConfig.StackProbesOverride() != 0 || compStressCompile(STRESS_GENERIC_VARN, 5))
+ {
+ opts.compNeedStackProbes = true;
+ }
+#endif
+
+#ifdef DEBUG
+ // Now, set compMaxUncheckedOffsetForNullObject for STRESS_NULL_OBJECT_CHECK
+ if (compStressCompile(STRESS_NULL_OBJECT_CHECK, 30))
+ {
+ compMaxUncheckedOffsetForNullObject = (size_t)JitConfig.JitMaxUncheckedOffset();
+ if (verbose)
+ {
+ printf("STRESS_NULL_OBJECT_CHECK: compMaxUncheckedOffsetForNullObject=0x%X\n",
+ compMaxUncheckedOffsetForNullObject);
+ }
+ }
+
+ if (verbose)
+ {
+ printf("OPTIONS: compCodeOpt = %s\n",
+ (opts.compCodeOpt == BLENDED_CODE)
+ ? "BLENDED_CODE"
+ : (opts.compCodeOpt == SMALL_CODE) ? "SMALL_CODE"
+ : (opts.compCodeOpt == FAST_CODE) ? "FAST_CODE" : "UNKNOWN_CODE");
+
+ printf("OPTIONS: compDbgCode = %s\n", dspBool(opts.compDbgCode));
+ printf("OPTIONS: compDbgInfo = %s\n", dspBool(opts.compDbgInfo));
+ printf("OPTIONS: compDbgEnC = %s\n", dspBool(opts.compDbgEnC));
+ printf("OPTIONS: compProcedureSplitting = %s\n", dspBool(opts.compProcedureSplitting));
+ printf("OPTIONS: compProcedureSplittingEH = %s\n", dspBool(opts.compProcedureSplittingEH));
+
+ if ((opts.eeFlags & CORJIT_FLG_BBOPT) && fgHaveProfileData())
+ {
+ printf("OPTIONS: using real profile data\n");
+ }
+
+ if (fgProfileData_ILSizeMismatch)
+ {
+ printf("OPTIONS: discarded IBC profile data due to mismatch in ILSize\n");
+ }
+
+ if (opts.eeFlags & CORJIT_FLG_PREJIT)
+ {
+ printf("OPTIONS: Jit invoked for ngen\n");
+ }
+ printf("OPTIONS: Stack probing is %s\n", opts.compNeedStackProbes ? "ENABLED" : "DISABLED");
+ }
+#endif
+
+ opts.compGCPollType = GCPOLL_NONE;
+ if (opts.eeFlags & CORJIT_FLG_GCPOLL_CALLS)
+ {
+ opts.compGCPollType = GCPOLL_CALL;
+ }
+ else if (opts.eeFlags & CORJIT_FLG_GCPOLL_INLINE)
+ {
+ // make sure that the EE didn't set both flags.
+ assert(opts.compGCPollType == GCPOLL_NONE);
+ opts.compGCPollType = GCPOLL_INLINE;
+ }
+}
+
+#ifdef DEBUG
+
+void JitDump(const char* pcFormat, ...)
+{
+ va_list lst;
+ va_start(lst, pcFormat);
+ vflogf(jitstdout, pcFormat, lst);
+ va_end(lst);
+}
+
+bool Compiler::compJitHaltMethod()
+{
+ /* This method returns true when we use an INS_BREAKPOINT to allow us to step into the generated native code */
+ /* Note that this these two "Jit" environment variables also work for ngen images */
+
+ if (JitConfig.JitHalt().contains(info.compMethodName, info.compClassName, &info.compMethodInfo->args))
+ {
+ return true;
+ }
+
+ /* Use this Hash variant when there are a lot of method with the same name and different signatures */
+
+ unsigned fJitHashHaltVal = (unsigned)JitConfig.JitHashHalt();
+ if ((fJitHashHaltVal != (unsigned)-1) && (fJitHashHaltVal == info.compMethodHash()))
+ {
+ return true;
+ }
+
+ return false;
+}
+
+/*****************************************************************************
+ * Should we use a "stress-mode" for the given stressArea. We have different
+ * areas to allow the areas to be mixed in different combinations in
+ * different methods.
+ * 'weight' indicates how often (as a percentage) the area should be stressed.
+ * It should reflect the usefulness:overhead ratio.
+ */
+
+const LPCWSTR Compiler::s_compStressModeNames[STRESS_COUNT + 1] = {
+#define STRESS_MODE(mode) W("STRESS_") W(#mode),
+
+ STRESS_MODES
+#undef STRESS_MODE
+};
+
+bool Compiler::compStressCompile(compStressArea stressArea, unsigned weight)
+{
+ unsigned hash;
+ DWORD stressLevel;
+
+ if (!bRangeAllowStress)
+ {
+ return false;
+ }
+
+ if (!JitConfig.JitStressOnly().isEmpty() &&
+ !JitConfig.JitStressOnly().contains(info.compMethodName, info.compClassName, &info.compMethodInfo->args))
+ {
+ return false;
+ }
+
+ bool doStress = false;
+ const wchar_t* strStressModeNames;
+
+ // Does user explicitly prevent using this STRESS_MODE through the command line?
+ const wchar_t* strStressModeNamesNot = JitConfig.JitStressModeNamesNot();
+ if ((strStressModeNamesNot != nullptr) &&
+ (wcsstr(strStressModeNamesNot, s_compStressModeNames[stressArea]) != nullptr))
+ {
+ if (verbose)
+ {
+ printf("JitStressModeNamesNot contains %ws\n", s_compStressModeNames[stressArea]);
+ }
+ doStress = false;
+ goto _done;
+ }
+
+ // Does user explicitly set this STRESS_MODE through the command line?
+ strStressModeNames = JitConfig.JitStressModeNames();
+ if (strStressModeNames != nullptr)
+ {
+ if (wcsstr(strStressModeNames, s_compStressModeNames[stressArea]) != nullptr)
+ {
+ if (verbose)
+ {
+ printf("JitStressModeNames contains %ws\n", s_compStressModeNames[stressArea]);
+ }
+ doStress = true;
+ goto _done;
+ }
+
+ // This stress mode name did not match anything in the stress
+ // mode whitelist. If user has requested only enable mode,
+ // don't allow this stress mode to turn on.
+ const bool onlyEnableMode = JitConfig.JitStressModeNamesOnly() != 0;
+
+ if (onlyEnableMode)
+ {
+ doStress = false;
+ goto _done;
+ }
+ }
+
+ // 0: No stress (Except when explicitly set in complus_JitStressModeNames)
+ // !=2: Vary stress. Performance will be slightly/moderately degraded
+ // 2: Check-all stress. Performance will be REALLY horrible
+ stressLevel = getJitStressLevel();
+
+ assert(weight <= MAX_STRESS_WEIGHT);
+
+ /* Check for boundary conditions */
+
+ if (stressLevel == 0 || weight == 0)
+ {
+ return false;
+ }
+
+ // Should we allow unlimited stress ?
+ if (stressArea > STRESS_COUNT_VARN && stressLevel == 2)
+ {
+ return true;
+ }
+
+ if (weight == MAX_STRESS_WEIGHT)
+ {
+ doStress = true;
+ goto _done;
+ }
+
+ // Get a hash which can be compared with 'weight'
+
+ assert(stressArea != 0);
+ hash = (info.compMethodHash() ^ stressArea ^ stressLevel) % MAX_STRESS_WEIGHT;
+
+ assert(hash < MAX_STRESS_WEIGHT && weight <= MAX_STRESS_WEIGHT);
+ doStress = (hash < weight);
+
+_done:
+
+ if (doStress && !compActiveStressModes[stressArea])
+ {
+ if (verbose)
+ {
+ printf("\n\n*** JitStress: %ws ***\n\n", s_compStressModeNames[stressArea]);
+ }
+ compActiveStressModes[stressArea] = 1;
+ }
+
+ return doStress;
+}
+
+#endif // DEBUG
+
+void Compiler::compInitDebuggingInfo()
+{
+ assert(!compIsForInlining());
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("*************** In compInitDebuggingInfo() for %s\n", info.compFullName);
+ }
+#endif
+
+ /*-------------------------------------------------------------------------
+ *
+ * Get hold of the local variable records, if there are any
+ */
+
+ info.compVarScopesCount = 0;
+
+#ifdef DEBUGGING_SUPPORT
+ if (opts.compScopeInfo)
+#endif
+ {
+ eeGetVars();
+ }
+
+#ifdef DEBUGGING_SUPPORT
+ compInitVarScopeMap();
+
+ if (opts.compScopeInfo || opts.compDbgCode)
+ {
+ compInitScopeLists();
+ }
+
+ if (opts.compDbgCode && (info.compVarScopesCount > 0))
+ {
+ /* Create a new empty basic block. fgExtendDbgLifetimes() may add
+ initialization of variables which are in scope right from the
+ start of the (real) first BB (and therefore artificially marked
+ as alive) into this block.
+ */
+
+ fgEnsureFirstBBisScratch();
+
+ fgInsertStmtAtEnd(fgFirstBB, gtNewNothingNode());
+
+ JITDUMP("Debuggable code - Add new BB%02u to perform initialization of variables [%08X]\n", fgFirstBB->bbNum,
+ dspPtr(fgFirstBB));
+ }
+#endif // DEBUGGING_SUPPORT
+
+ /*-------------------------------------------------------------------------
+ *
+ * Read the stmt-offsets table and the line-number table
+ */
+
+ info.compStmtOffsetsImplicit = ICorDebugInfo::NO_BOUNDARIES;
+
+ // We can only report debug info for EnC at places where the stack is empty.
+ // Actually, at places where there are not live temps. Else, we won't be able
+ // to map between the old and the new versions correctly as we won't have
+ // any info for the live temps.
+
+ assert(!opts.compDbgEnC || !opts.compDbgInfo ||
+ 0 == (info.compStmtOffsetsImplicit & ~ICorDebugInfo::STACK_EMPTY_BOUNDARIES));
+
+ info.compStmtOffsetsCount = 0;
+
+#ifdef DEBUGGING_SUPPORT
+ if (opts.compDbgInfo)
+#endif
+ {
+ /* Get hold of the line# records, if there are any */
+
+ eeGetStmtOffsets();
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("info.compStmtOffsetsCount = %d\n", info.compStmtOffsetsCount);
+ printf("info.compStmtOffsetsImplicit = %04Xh", info.compStmtOffsetsImplicit);
+
+ if (info.compStmtOffsetsImplicit)
+ {
+ printf(" ( ");
+ if (info.compStmtOffsetsImplicit & ICorDebugInfo::STACK_EMPTY_BOUNDARIES)
+ {
+ printf("STACK_EMPTY ");
+ }
+ if (info.compStmtOffsetsImplicit & ICorDebugInfo::NOP_BOUNDARIES)
+ {
+ printf("NOP ");
+ }
+ if (info.compStmtOffsetsImplicit & ICorDebugInfo::CALL_SITE_BOUNDARIES)
+ {
+ printf("CALL_SITE ");
+ }
+ printf(")");
+ }
+ printf("\n");
+ IL_OFFSET* pOffs = info.compStmtOffsets;
+ for (unsigned i = 0; i < info.compStmtOffsetsCount; i++, pOffs++)
+ {
+ printf("%02d) IL_%04Xh\n", i, *pOffs);
+ }
+ }
+#endif
+ }
+}
+
+void Compiler::compSetOptimizationLevel()
+{
+ unsigned compileFlags;
+ bool theMinOptsValue;
+ unsigned jitMinOpts;
+
+ compileFlags = opts.eeFlags;
+
+ if (compIsForInlining())
+ {
+ theMinOptsValue = impInlineInfo->InlinerCompiler->opts.MinOpts();
+ goto _SetMinOpts;
+ }
+
+ theMinOptsValue = false;
+
+ if (opts.compFlags == CLFLG_MINOPT)
+ {
+ JITLOG((LL_INFO100, "CLFLG_MINOPT set for method %s\n", info.compFullName));
+ theMinOptsValue = true;
+ }
+
+#ifdef DEBUG
+ jitMinOpts = JitConfig.JitMinOpts();
+
+ if (!theMinOptsValue && (jitMinOpts > 0))
+ {
+ unsigned methodCount = Compiler::jitTotalMethodCompiled;
+ unsigned methodCountMask = methodCount & 0xFFF;
+ unsigned kind = (jitMinOpts & 0xF000000) >> 24;
+ switch (kind)
+ {
+ default:
+ if (jitMinOpts <= methodCount)
+ {
+ if (verbose)
+ {
+ printf(" Optimizations disabled by JitMinOpts and methodCount\n");
+ }
+ theMinOptsValue = true;
+ }
+ break;
+ case 0xD:
+ {
+ unsigned firstMinopts = (jitMinOpts >> 12) & 0xFFF;
+ unsigned secondMinopts = (jitMinOpts >> 0) & 0xFFF;
+
+ if ((firstMinopts == methodCountMask) || (secondMinopts == methodCountMask))
+ {
+ if (verbose)
+ {
+ printf("0xD: Optimizations disabled by JitMinOpts and methodCountMask\n");
+ }
+ theMinOptsValue = true;
+ }
+ }
+ break;
+ case 0xE:
+ {
+ unsigned startMinopts = (jitMinOpts >> 12) & 0xFFF;
+ unsigned endMinopts = (jitMinOpts >> 0) & 0xFFF;
+
+ if ((startMinopts <= methodCountMask) && (endMinopts >= methodCountMask))
+ {
+ if (verbose)
+ {
+ printf("0xE: Optimizations disabled by JitMinOpts and methodCountMask\n");
+ }
+ theMinOptsValue = true;
+ }
+ }
+ break;
+ case 0xF:
+ {
+ unsigned bitsZero = (jitMinOpts >> 12) & 0xFFF;
+ unsigned bitsOne = (jitMinOpts >> 0) & 0xFFF;
+
+ if (((methodCountMask & bitsOne) == bitsOne) && ((~methodCountMask & bitsZero) == bitsZero))
+ {
+ if (verbose)
+ {
+ printf("0xF: Optimizations disabled by JitMinOpts and methodCountMask\n");
+ }
+ theMinOptsValue = true;
+ }
+ }
+ break;
+ }
+ }
+
+ if (!theMinOptsValue)
+ {
+ if (JitConfig.JitMinOptsName().contains(info.compMethodName, info.compClassName, &info.compMethodInfo->args))
+ {
+ theMinOptsValue = true;
+ }
+ }
+
+ if (compStressCompile(STRESS_MIN_OPTS, 5))
+ {
+ theMinOptsValue = true;
+ }
+ // For PREJIT we never drop down to MinOpts
+ // unless unless CLFLG_MINOPT is set
+ else if (!(compileFlags & CORJIT_FLG_PREJIT))
+ {
+ if ((unsigned)JitConfig.JitMinOptsCodeSize() < info.compILCodeSize)
+ {
+ JITLOG((LL_INFO10, "IL Code Size exceeded, using MinOpts for method %s\n", info.compFullName));
+ theMinOptsValue = true;
+ }
+ else if ((unsigned)JitConfig.JitMinOptsInstrCount() < opts.instrCount)
+ {
+ JITLOG((LL_INFO10, "IL instruction count exceeded, using MinOpts for method %s\n", info.compFullName));
+ theMinOptsValue = true;
+ }
+ else if ((unsigned)JitConfig.JitMinOptsBbCount() < fgBBcount)
+ {
+ JITLOG((LL_INFO10, "Basic Block count exceeded, using MinOpts for method %s\n", info.compFullName));
+ theMinOptsValue = true;
+ }
+ else if ((unsigned)JitConfig.JitMinOptsLvNumCount() < lvaCount)
+ {
+ JITLOG((LL_INFO10, "Local Variable Num count exceeded, using MinOpts for method %s\n", info.compFullName));
+ theMinOptsValue = true;
+ }
+ else if ((unsigned)JitConfig.JitMinOptsLvRefCount() < opts.lvRefCount)
+ {
+ JITLOG((LL_INFO10, "Local Variable Ref count exceeded, using MinOpts for method %s\n", info.compFullName));
+ theMinOptsValue = true;
+ }
+ if (theMinOptsValue == true)
+ {
+ JITLOG((LL_INFO10000, "IL Code Size,Instr %4d,%4d, Basic Block count %3d, Local Variable Num,Ref count "
+ "%3d,%3d for method %s\n",
+ info.compILCodeSize, opts.instrCount, fgBBcount, lvaCount, opts.lvRefCount, info.compFullName));
+ if (JitConfig.JitBreakOnMinOpts() != 0)
+ {
+ assert(!"MinOpts enabled");
+ }
+ }
+ }
+#else // !DEBUG
+ // Retail check if we should force Minopts due to the complexity of the method
+ // For PREJIT we never drop down to MinOpts
+ // unless unless CLFLG_MINOPT is set
+ if (!theMinOptsValue && !(compileFlags & CORJIT_FLG_PREJIT) &&
+ ((DEFAULT_MIN_OPTS_CODE_SIZE < info.compILCodeSize) || (DEFAULT_MIN_OPTS_INSTR_COUNT < opts.instrCount) ||
+ (DEFAULT_MIN_OPTS_BB_COUNT < fgBBcount) || (DEFAULT_MIN_OPTS_LV_NUM_COUNT < lvaCount) ||
+ (DEFAULT_MIN_OPTS_LV_REF_COUNT < opts.lvRefCount)))
+ {
+ theMinOptsValue = true;
+ }
+#endif // DEBUG
+
+ JITLOG((LL_INFO10000,
+ "IL Code Size,Instr %4d,%4d, Basic Block count %3d, Local Variable Num,Ref count %3d,%3d for method %s\n",
+ info.compILCodeSize, opts.instrCount, fgBBcount, lvaCount, opts.lvRefCount, info.compFullName));
+
+#if 0
+ // The code in this #if has been useful in debugging loop cloning issues, by
+ // enabling selective enablement of the loop cloning optimization according to
+ // method hash.
+#ifdef DEBUG
+ if (!theMinOptsValue)
+ {
+ unsigned methHash = info.compMethodHash();
+ char* lostr = getenv("opthashlo");
+ unsigned methHashLo = 0;
+ if (lostr != NULL)
+ {
+ sscanf_s(lostr, "%x", &methHashLo);
+ // methHashLo = (unsigned(atoi(lostr)) << 2); // So we don't have to use negative numbers.
+ }
+ char* histr = getenv("opthashhi");
+ unsigned methHashHi = UINT32_MAX;
+ if (histr != NULL)
+ {
+ sscanf_s(histr, "%x", &methHashHi);
+ // methHashHi = (unsigned(atoi(histr)) << 2); // So we don't have to use negative numbers.
+ }
+ if (methHash < methHashLo || methHash > methHashHi)
+ {
+ theMinOptsValue = true;
+ }
+ else
+ {
+ printf("Doing optimization in in %s (0x%x).\n", info.compFullName, methHash);
+ }
+ }
+#endif
+#endif
+
+_SetMinOpts:
+
+ // Set the MinOpts value
+ opts.SetMinOpts(theMinOptsValue);
+
+#ifdef DEBUG
+ if (verbose && !compIsForInlining())
+ {
+ printf("OPTIONS: opts.MinOpts() == %s\n", opts.MinOpts() ? "true" : "false");
+ }
+#endif
+
+ /* Control the optimizations */
+
+ if (opts.MinOpts() || opts.compDbgCode)
+ {
+ opts.compFlags &= ~CLFLG_MAXOPT;
+ opts.compFlags |= CLFLG_MINOPT;
+ }
+
+ if (!compIsForInlining())
+ {
+ codeGen->setFramePointerRequired(false);
+ codeGen->setFrameRequired(false);
+
+ if (opts.MinOpts() || opts.compDbgCode)
+ {
+ codeGen->setFrameRequired(true);
+ }
+
+#if !defined(_TARGET_AMD64_)
+ // The VM sets CORJIT_FLG_FRAMED for two reasons: (1) the COMPlus_JitFramed variable is set, or
+ // (2) the function is marked "noinline". The reason for #2 is that people mark functions
+ // noinline to ensure the show up on in a stack walk. But for AMD64, we don't need a frame
+ // pointer for the frame to show up in stack walk.
+ if (compileFlags & CORJIT_FLG_FRAMED)
+ codeGen->setFrameRequired(true);
+#endif
+
+ if (compileFlags & CORJIT_FLG_RELOC)
+ {
+ codeGen->genAlignLoops = false; // loop alignment not supported for prejitted code
+
+ // The zapper doesn't set CORJIT_FLG_ALIGN_LOOPS, and there is
+ // no reason for it to set it as the JIT doesn't currently support loop alignment
+ // for prejitted images. (The JIT doesn't know the final address of the code, hence
+ // it can't align code based on unknown addresses.)
+ assert((compileFlags & CORJIT_FLG_ALIGN_LOOPS) == 0);
+ }
+ else
+ {
+ codeGen->genAlignLoops = (compileFlags & CORJIT_FLG_ALIGN_LOOPS) != 0;
+ }
+ }
+
+ info.compUnwrapContextful = !opts.MinOpts() && !opts.compDbgCode;
+
+ fgCanRelocateEHRegions = true;
+}
+
+#ifdef _TARGET_ARMARCH_
+// Function compRsvdRegCheck:
+// given a curState to use for calculating the total frame size
+// it will return true if the REG_OPT_RSVD should be reserved so
+// that it can be use to form large offsets when accessing stack
+// based LclVar including both incoming and out going argument areas.
+//
+// The method advances the frame layout state to curState by calling
+// lvaFrameSize(curState).
+//
+bool Compiler::compRsvdRegCheck(FrameLayoutState curState)
+{
+ // Always do the layout even if returning early. Callers might
+ // depend on us to do the layout.
+ unsigned frameSize = lvaFrameSize(curState);
+
+ if (opts.MinOpts())
+ {
+ // Have a recovery path in case we fail to reserve REG_OPT_RSVD and go
+ // over the limit of SP and FP offset ranges due to large
+ // temps.
+ return true;
+ }
+
+ unsigned calleeSavedRegMaxSz = CALLEE_SAVED_REG_MAXSZ;
+ if (compFloatingPointUsed)
+ {
+ calleeSavedRegMaxSz += CALLEE_SAVED_FLOAT_MAXSZ;
+ }
+
+ noway_assert(frameSize > calleeSavedRegMaxSz);
+
+#if defined(_TARGET_ARM64_)
+
+ // TODO-ARM64-CQ: update this!
+ return true; // just always assume we'll need it, for now
+
+#else // _TARGET_ARM_
+
+ // frame layout:
+ //
+ // low addresses
+ // inArgs compArgSize
+ // origSP --->
+ // LR --->
+ // R11 --->
+ // + callee saved regs CALLEE_SAVED_REG_MAXSZ (32 bytes)
+ // optional saved fp regs 16 * sizeof(float) (64 bytes)
+ // - lclSize
+ // incl. TEMPS MAX_SPILL_TEMP_SIZE
+ // + incl. outArgs
+ // SP --->
+ // -
+ // high addresses
+
+ // With codeGen->isFramePointerRequired we use R11 to access incoming args with positive offsets
+ // and use R11 to access LclVars with negative offsets in the non funclet or
+ // main region we use SP with positive offsets. The limiting factor in the
+ // codeGen->isFramePointerRequired case is that we need the offset to be less than or equal to 0x7C
+ // for negative offsets, but positive offsets can be imm12 limited by vldr/vstr
+ // using +/-imm8.
+ //
+ // Subtract 4 bytes for alignment of a local var because number of temps could
+ // trigger a misaligned double or long.
+ //
+ unsigned maxR11ArgLimit = (compFloatingPointUsed ? 0x03FC : 0x0FFC);
+ unsigned maxR11LclLimit = 0x0078;
+
+ if (codeGen->isFramePointerRequired())
+ {
+ unsigned maxR11LclOffs = frameSize;
+ unsigned maxR11ArgOffs = compArgSize + (2 * REGSIZE_BYTES);
+ if (maxR11LclOffs > maxR11LclLimit || maxR11ArgOffs > maxR11ArgLimit)
+ {
+ return true;
+ }
+ }
+
+ // So this case is the SP based frame case, but note that we also will use SP based
+ // offsets for R11 based frames in the non-funclet main code area. However if we have
+ // passed the above max_R11_offset check these SP checks won't fire.
+
+ // Check local coverage first. If vldr/vstr will be used the limit can be +/-imm8.
+ unsigned maxSPLclLimit = (compFloatingPointUsed ? 0x03F8 : 0x0FF8);
+ if (frameSize > (codeGen->isFramePointerUsed() ? (maxR11LclLimit + maxSPLclLimit) : maxSPLclLimit))
+ {
+ return true;
+ }
+
+ // Check arguments coverage.
+ if ((!codeGen->isFramePointerUsed() || (compArgSize > maxR11ArgLimit)) && (compArgSize + frameSize) > maxSPLclLimit)
+ {
+ return true;
+ }
+
+ // We won't need to reserve REG_OPT_RSVD.
+ //
+ return false;
+#endif // _TARGET_ARM_
+}
+#endif // _TARGET_ARMARCH_
+
+void Compiler::compFunctionTraceStart()
+{
+#ifdef DEBUG
+ if (compIsForInlining())
+ {
+ return;
+ }
+
+ if ((JitConfig.JitFunctionTrace() != 0) && !opts.disDiffable)
+ {
+ LONG newJitNestingLevel = InterlockedIncrement(&Compiler::jitNestingLevel);
+ if (newJitNestingLevel <= 0)
+ {
+ printf("{ Illegal nesting level %d }\n", newJitNestingLevel);
+ }
+
+ for (LONG i = 0; i < newJitNestingLevel - 1; i++)
+ {
+ printf(" ");
+ }
+ printf("{ Start Jitting %s (MethodHash=%08x)\n", info.compFullName,
+ info.compMethodHash()); /* } editor brace matching workaround for this printf */
+ }
+#endif // DEBUG
+}
+
+void Compiler::compFunctionTraceEnd(void* methodCodePtr, ULONG methodCodeSize, bool isNYI)
+{
+#ifdef DEBUG
+ assert(!compIsForInlining());
+
+ if ((JitConfig.JitFunctionTrace() != 0) && !opts.disDiffable)
+ {
+ LONG newJitNestingLevel = InterlockedDecrement(&Compiler::jitNestingLevel);
+ if (newJitNestingLevel < 0)
+ {
+ printf("{ Illegal nesting level %d }\n", newJitNestingLevel);
+ }
+
+ for (LONG i = 0; i < newJitNestingLevel; i++)
+ {
+ printf(" ");
+ }
+ /* { editor brace-matching workaround for following printf */
+ printf("} Jitted Entry %03x at" FMT_ADDR "method %s size %08x%s\n", Compiler::jitTotalMethodCompiled,
+ DBG_ADDR(methodCodePtr), info.compFullName, methodCodeSize,
+ isNYI ? " NYI" : (compIsForImportOnly() ? " import only" : ""));
+ }
+#endif // DEBUG
+}
+
+//*********************************************************************************************
+// #Phases
+//
+// This is the most interesting 'toplevel' function in the JIT. It goes through the operations of
+// importing, morphing, optimizations and code generation. This is called from the EE through the
+// code:CILJit::compileMethod function.
+//
+// For an overview of the structure of the JIT, see:
+// https://github.com/dotnet/coreclr/blob/master/Documentation/botr/ryujit-overview.md
+//
+void Compiler::compCompile(void** methodCodePtr, ULONG* methodCodeSize, CORJIT_FLAGS* compileFlags)
+{
+ if (compIsForInlining())
+ {
+ // Notify root instance that an inline attempt is about to import IL
+ impInlineRoot()->m_inlineStrategy->NoteImport();
+ }
+
+ hashBv::Init(this);
+
+ VarSetOps::AssignAllowUninitRhs(this, compCurLife, VarSetOps::UninitVal());
+
+ /* The temp holding the secret stub argument is used by fgImport() when importing the intrinsic. */
+
+ if (info.compPublishStubParam)
+ {
+ assert(lvaStubArgumentVar == BAD_VAR_NUM);
+ lvaStubArgumentVar = lvaGrabTempWithImplicitUse(false DEBUGARG("stub argument"));
+ lvaTable[lvaStubArgumentVar].lvType = TYP_I_IMPL;
+ }
+
+ EndPhase(PHASE_PRE_IMPORT);
+
+ compFunctionTraceStart();
+
+ /* Convert the instrs in each basic block to a tree based intermediate representation */
+
+ fgImport();
+
+ assert(!fgComputePredsDone);
+ if (fgCheapPredsValid)
+ {
+ // Remove cheap predecessors before inlining; allowing the cheap predecessor lists to be inserted
+ // with inlined blocks causes problems.
+ fgRemovePreds();
+ }
+
+ if (compIsForInlining())
+ {
+ /* Quit inlining if fgImport() failed for any reason. */
+
+ if (compDonotInline())
+ {
+ return;
+ }
+
+ /* Filter out unimported BBs */
+
+ fgRemoveEmptyBlocks();
+
+ return;
+ }
+
+ assert(!compDonotInline());
+
+ EndPhase(PHASE_IMPORTATION);
+
+ // Maybe the caller was not interested in generating code
+ if (compIsForImportOnly())
+ {
+ compFunctionTraceEnd(nullptr, 0, false);
+ return;
+ }
+
+#if !FEATURE_EH
+ // If we aren't yet supporting EH in a compiler bring-up, remove as many EH handlers as possible, so
+ // we can pass tests that contain try/catch EH, but don't actually throw any exceptions.
+ fgRemoveEH();
+#endif // !FEATURE_EH
+
+ if (compileFlags->corJitFlags & CORJIT_FLG_BBINSTR)
+ {
+ fgInstrumentMethod();
+ }
+
+ // We could allow ESP frames. Just need to reserve space for
+ // pushing EBP if the method becomes an EBP-frame after an edit.
+ // Note that requiring a EBP Frame disallows double alignment. Thus if we change this
+ // we either have to disallow double alignment for E&C some other way or handle it in EETwain.
+
+ if (opts.compDbgEnC)
+ {
+ codeGen->setFramePointerRequired(true);
+
+ // Since we need a slots for security near ebp, its not possible
+ // to do this after an Edit without shifting all the locals.
+ // So we just always reserve space for these slots in case an Edit adds them
+ opts.compNeedSecurityCheck = true;
+
+ // We don't care about localloc right now. If we do support it,
+ // EECodeManager::FixContextForEnC() needs to handle it smartly
+ // in case the localloc was actually executed.
+ //
+ // compLocallocUsed = true;
+ }
+
+ EndPhase(PHASE_POST_IMPORT);
+
+ /* Initialize the BlockSet epoch */
+
+ NewBasicBlockEpoch();
+
+ /* Massage the trees so that we can generate code out of them */
+
+ fgMorph();
+ EndPhase(PHASE_MORPH);
+
+ /* GS security checks for unsafe buffers */
+ if (getNeedsGSSecurityCookie())
+ {
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\n*************** -GS checks for unsafe buffers \n");
+ }
+#endif
+
+ gsGSChecksInitCookie();
+
+ if (compGSReorderStackLayout)
+ {
+ gsCopyShadowParams();
+ }
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ fgDispBasicBlocks(true);
+ printf("\n");
+ }
+#endif
+ }
+ EndPhase(PHASE_GS_COOKIE);
+
+ /* Compute bbNum, bbRefs and bbPreds */
+
+ JITDUMP("\nRenumbering the basic blocks for fgComputePred\n");
+ fgRenumberBlocks();
+
+ noway_assert(!fgComputePredsDone); // This is the first time full (not cheap) preds will be computed.
+ fgComputePreds();
+ EndPhase(PHASE_COMPUTE_PREDS);
+
+ /* If we need to emit GC Poll calls, mark the blocks that need them now. This is conservative and can
+ * be optimized later. */
+ fgMarkGCPollBlocks();
+ EndPhase(PHASE_MARK_GC_POLL_BLOCKS);
+
+ /* From this point on the flowgraph information such as bbNum,
+ * bbRefs or bbPreds has to be kept updated */
+
+ // Compute the edge weights (if we have profile data)
+ fgComputeEdgeWeights();
+ EndPhase(PHASE_COMPUTE_EDGE_WEIGHTS);
+
+#if FEATURE_EH_FUNCLETS
+
+ /* Create funclets from the EH handlers. */
+
+ fgCreateFunclets();
+ EndPhase(PHASE_CREATE_FUNCLETS);
+
+#endif // FEATURE_EH_FUNCLETS
+
+ if (!opts.MinOpts() && !opts.compDbgCode)
+ {
+ optOptimizeLayout();
+ EndPhase(PHASE_OPTIMIZE_LAYOUT);
+
+ // Compute reachability sets and dominators.
+ fgComputeReachability();
+ }
+
+ // Transform each GT_ALLOCOBJ node into either an allocation helper call or
+ // local variable allocation on the stack.
+ ObjectAllocator objectAllocator(this);
+ objectAllocator.Run();
+
+ if (!opts.MinOpts() && !opts.compDbgCode)
+ {
+ /* Perform loop inversion (i.e. transform "while" loops into
+ "repeat" loops) and discover and classify natural loops
+ (e.g. mark iterative loops as such). Also marks loop blocks
+ and sets bbWeight to the loop nesting levels
+ */
+
+ optOptimizeLoops();
+ EndPhase(PHASE_OPTIMIZE_LOOPS);
+
+ // Clone loops with optimization opportunities, and
+ // choose the one based on dynamic condition evaluation.
+ optCloneLoops();
+ EndPhase(PHASE_CLONE_LOOPS);
+
+ /* Unroll loops */
+ optUnrollLoops();
+ EndPhase(PHASE_UNROLL_LOOPS);
+ }
+
+#ifdef DEBUG
+ fgDebugCheckLinks();
+#endif
+
+ /* Create the variable table (and compute variable ref counts) */
+
+ lvaMarkLocalVars();
+ EndPhase(PHASE_MARK_LOCAL_VARS);
+
+ // IMPORTANT, after this point, every place where trees are modified or cloned
+ // the local variable reference counts must be updated
+ // You can test the value of the following variable to see if
+ // the local variable ref counts must be updated
+ //
+ assert(lvaLocalVarRefCounted == true);
+
+ if (!opts.MinOpts() && !opts.compDbgCode)
+ {
+ /* Optimize boolean conditions */
+
+ optOptimizeBools();
+ EndPhase(PHASE_OPTIMIZE_BOOLS);
+
+ // optOptimizeBools() might have changed the number of blocks; the dominators/reachability might be bad.
+ }
+
+ /* Figure out the order in which operators are to be evaluated */
+ fgFindOperOrder();
+ EndPhase(PHASE_FIND_OPER_ORDER);
+
+ // Weave the tree lists. Anyone who modifies the tree shapes after
+ // this point is responsible for calling fgSetStmtSeq() to keep the
+ // nodes properly linked.
+ // This can create GC poll calls, and create new BasicBlocks (without updating dominators/reachability).
+ fgSetBlockOrder();
+ EndPhase(PHASE_SET_BLOCK_ORDER);
+
+ // IMPORTANT, after this point, every place where tree topology changes must redo evaluation
+ // order (gtSetStmtInfo) and relink nodes (fgSetStmtSeq) if required.
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef DEBUG
+ // Now we have determined the order of evaluation and the gtCosts for every node.
+ // If verbose, dump the full set of trees here before the optimization phases mutate them
+ //
+ if (verbose)
+ {
+ fgDispBasicBlocks(true); // 'true' will call fgDumpTrees() after dumping the BasicBlocks
+ printf("\n");
+ }
+#endif
+
+ // At this point we know if we are fully interruptible or not
+ if (!opts.MinOpts() && !opts.compDbgCode)
+ {
+ bool doSsa = true;
+ bool doEarlyProp = true;
+ bool doValueNum = true;
+ bool doLoopHoisting = true;
+ bool doCopyProp = true;
+ bool doAssertionProp = true;
+ bool doRangeAnalysis = true;
+
+#ifdef DEBUG
+ doSsa = (JitConfig.JitDoSsa() != 0);
+ doEarlyProp = doSsa && (JitConfig.JitDoEarlyProp() != 0);
+ doValueNum = doSsa && (JitConfig.JitDoValueNumber() != 0);
+ doLoopHoisting = doValueNum && (JitConfig.JitDoLoopHoisting() != 0);
+ doCopyProp = doValueNum && (JitConfig.JitDoCopyProp() != 0);
+ doAssertionProp = doValueNum && (JitConfig.JitDoAssertionProp() != 0);
+ doRangeAnalysis = doAssertionProp && (JitConfig.JitDoRangeAnalysis() != 0);
+#endif
+
+ if (doSsa)
+ {
+ fgSsaBuild();
+ EndPhase(PHASE_BUILD_SSA);
+ }
+
+ if (doEarlyProp)
+ {
+ /* Propagate array length and rewrite getType() method call */
+ optEarlyProp();
+ EndPhase(PHASE_EARLY_PROP);
+ }
+
+ if (doValueNum)
+ {
+ fgValueNumber();
+ EndPhase(PHASE_VALUE_NUMBER);
+ }
+
+ if (doLoopHoisting)
+ {
+ /* Hoist invariant code out of loops */
+ optHoistLoopCode();
+ EndPhase(PHASE_HOIST_LOOP_CODE);
+ }
+
+ if (doCopyProp)
+ {
+ /* Perform VN based copy propagation */
+ optVnCopyProp();
+ EndPhase(PHASE_VN_COPY_PROP);
+ }
+
+#if FEATURE_ANYCSE
+ /* Remove common sub-expressions */
+ optOptimizeCSEs();
+#endif // FEATURE_ANYCSE
+
+#if ASSERTION_PROP
+ if (doAssertionProp)
+ {
+ /* Assertion propagation */
+ optAssertionPropMain();
+ EndPhase(PHASE_ASSERTION_PROP_MAIN);
+ }
+
+ if (doRangeAnalysis)
+ {
+ /* Optimize array index range checks */
+ RangeCheck rc(this);
+ rc.OptimizeRangeChecks();
+ EndPhase(PHASE_OPTIMIZE_INDEX_CHECKS);
+ }
+#endif // ASSERTION_PROP
+
+ /* update the flowgraph if we modified it during the optimization phase*/
+ if (fgModified)
+ {
+ fgUpdateFlowGraph();
+ EndPhase(PHASE_UPDATE_FLOW_GRAPH);
+
+ // Recompute the edge weight if we have modified the flow graph
+ fgComputeEdgeWeights();
+ EndPhase(PHASE_COMPUTE_EDGE_WEIGHTS2);
+ }
+ }
+
+#ifdef _TARGET_AMD64_
+ // Check if we need to add the Quirk for the PPP backward compat issue
+ compQuirkForPPPflag = compQuirkForPPP();
+#endif
+
+ fgDetermineFirstColdBlock();
+ EndPhase(PHASE_DETERMINE_FIRST_COLD_BLOCK);
+
+#ifdef DEBUG
+ fgDebugCheckLinks(compStressCompile(STRESS_REMORPH_TREES, 50));
+
+ // Stash the current estimate of the function's size if necessary.
+ if (verbose)
+ {
+ compSizeEstimate = 0;
+ compCycleEstimate = 0;
+ for (BasicBlock* block = fgFirstBB; block != nullptr; block = block->bbNext)
+ {
+ for (GenTreeStmt* stmt = block->firstStmt(); stmt != nullptr; stmt = stmt->getNextStmt())
+ {
+ compSizeEstimate += stmt->GetCostSz();
+ compCycleEstimate += stmt->GetCostEx();
+ }
+ }
+ }
+#endif
+
+#ifndef LEGACY_BACKEND
+ // rationalize trees
+ Rationalizer rat(this); // PHASE_RATIONALIZE
+ rat.Run();
+#endif // !LEGACY_BACKEND
+
+ // Here we do "simple lowering". When the RyuJIT backend works for all
+ // platforms, this will be part of the more general lowering phase. For now, though, we do a separate
+ // pass of "final lowering." We must do this before (final) liveness analysis, because this creates
+ // range check throw blocks, in which the liveness must be correct.
+ fgSimpleLowering();
+ EndPhase(PHASE_SIMPLE_LOWERING);
+
+#ifdef LEGACY_BACKEND
+ /* Local variable liveness */
+ fgLocalVarLiveness();
+ EndPhase(PHASE_LCLVARLIVENESS);
+#endif // !LEGACY_BACKEND
+
+#ifdef DEBUG
+ fgDebugCheckBBlist();
+ fgDebugCheckLinks();
+#endif
+
+ /* Enable this to gather statistical data such as
+ * call and register argument info, flowgraph and loop info, etc. */
+
+ compJitStats();
+
+#ifdef _TARGET_ARM_
+ if (compLocallocUsed)
+ {
+ // We reserve REG_SAVED_LOCALLOC_SP to store SP on entry for stack unwinding
+ codeGen->regSet.rsMaskResvd |= RBM_SAVED_LOCALLOC_SP;
+ }
+#endif // _TARGET_ARM_
+#ifdef _TARGET_ARMARCH_
+ if (compRsvdRegCheck(PRE_REGALLOC_FRAME_LAYOUT))
+ {
+ // We reserve R10/IP1 in this case to hold the offsets in load/store instructions
+ codeGen->regSet.rsMaskResvd |= RBM_OPT_RSVD;
+ assert(REG_OPT_RSVD != REG_FP);
+ }
+
+#ifdef DEBUG
+ //
+ // Display the pre-regalloc frame offsets that we have tentatively decided upon
+ //
+ if (verbose)
+ lvaTableDump();
+#endif
+#endif // _TARGET_ARMARCH_
+
+ /* Assign registers to variables, etc. */
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifndef LEGACY_BACKEND
+ ///////////////////////////////////////////////////////////////////////////////
+ // Dominator and reachability sets are no longer valid. They haven't been
+ // maintained up to here, and shouldn't be used (unless recomputed).
+ ///////////////////////////////////////////////////////////////////////////////
+ fgDomsComputed = false;
+
+ /* Create LSRA before Lowering, this way Lowering can initialize the TreeNode Map */
+ m_pLinearScan = getLinearScanAllocator(this);
+
+ /* Lower */
+ Lowering lower(this, m_pLinearScan); // PHASE_LOWERING
+ lower.Run();
+
+ assert(lvaSortAgain == false); // We should have re-run fgLocalVarLiveness() in lower.Run()
+ lvaTrackedFixed = true; // We can not add any new tracked variables after this point.
+
+ /* Now that lowering is completed we can proceed to perform register allocation */
+ m_pLinearScan->doLinearScan();
+ EndPhase(PHASE_LINEAR_SCAN);
+
+ // Copied from rpPredictRegUse()
+ genFullPtrRegMap = (codeGen->genInterruptible || !codeGen->isFramePointerUsed());
+#else // LEGACY_BACKEND
+
+ lvaTrackedFixed = true; // We cannot add any new tracked variables after this point.
+ // For the classic JIT32 at this point lvaSortAgain can be set and raAssignVars() will call lvaSortOnly()
+
+ // Now do "classic" register allocation.
+ raAssignVars();
+ EndPhase(PHASE_RA_ASSIGN_VARS);
+#endif // LEGACY_BACKEND
+
+#ifdef DEBUG
+ fgDebugCheckLinks();
+#endif
+
+ /* Generate code */
+
+ codeGen->genGenerateCode(methodCodePtr, methodCodeSize);
+
+#ifdef FEATURE_JIT_METHOD_PERF
+ if (pCompJitTimer)
+ pCompJitTimer->Terminate(this, CompTimeSummaryInfo::s_compTimeSummary);
+#endif
+
+ RecordStateAtEndOfCompilation();
+
+#ifdef FEATURE_TRACELOGGING
+ compJitTelemetry.NotifyEndOfCompilation();
+#endif
+
+#if defined(DEBUG)
+ ++Compiler::jitTotalMethodCompiled;
+#endif // defined(DEBUG)
+
+ compFunctionTraceEnd(*methodCodePtr, *methodCodeSize, false);
+
+#if FUNC_INFO_LOGGING
+ if (compJitFuncInfoFile != nullptr)
+ {
+ assert(!compIsForInlining());
+#ifdef DEBUG // We only have access to info.compFullName in DEBUG builds.
+ fprintf(compJitFuncInfoFile, "%s\n", info.compFullName);
+#elif FEATURE_SIMD
+ fprintf(compJitFuncInfoFile, " %s\n", eeGetMethodFullName(info.compMethodHnd));
+#endif
+ fprintf(compJitFuncInfoFile, ""); // in our logic this causes a flush
+ }
+#endif // FUNC_INFO_LOGGING
+}
+
+/*****************************************************************************/
+void Compiler::ProcessShutdownWork(ICorStaticInfo* statInfo)
+{
+}
+
+#ifdef _TARGET_AMD64_
+// Check if we need to add the Quirk for the PPP backward compat issue.
+// This Quirk addresses a compatibility issue between the new RyuJit and the previous JIT64.
+// A backward compatibity issue called 'PPP' exists where a PInvoke call passes a 32-byte struct
+// into a native API which basically writes 48 bytes of data into the struct.
+// With the stack frame layout used by the RyuJIT the extra 16 bytes written corrupts a
+// caller saved register and this leads to an A/V in the calling method.
+// The older JIT64 jit compiler just happened to have a different stack layout and/or
+// caller saved register set so that it didn't hit the A/V in the caller.
+// By increasing the amount of stack allocted for the struct by 32 bytes we can fix this.
+//
+// Return true if we actually perform the Quirk, otherwise return false
+//
+bool Compiler::compQuirkForPPP()
+{
+ if (lvaCount != 2)
+ { // We require that there are exactly two locals
+ return false;
+ }
+
+ if (compTailCallUsed)
+ { // Don't try this quirk if a tail call was used
+ return false;
+ }
+
+ bool hasOutArgs = false;
+ LclVarDsc* varDscExposedStruct = nullptr;
+
+ unsigned lclNum;
+ LclVarDsc* varDsc;
+
+ /* Look for struct locals that are address taken */
+ for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
+ {
+ if (varDsc->lvIsParam) // It can't be a parameter
+ {
+ continue;
+ }
+
+ // We require that the OutgoingArg space lclVar exists
+ if (lclNum == lvaOutgoingArgSpaceVar)
+ {
+ hasOutArgs = true; // Record that we saw it
+ continue;
+ }
+
+ // Look for a 32-byte address exposed Struct and record its varDsc
+ if ((varDsc->TypeGet() == TYP_STRUCT) && varDsc->lvAddrExposed && (varDsc->lvExactSize == 32))
+ {
+ varDscExposedStruct = varDsc;
+ }
+ }
+
+ // We only perform the Quirk when there are two locals
+ // one of them is a address exposed struct of size 32
+ // and the other is the outgoing arg space local
+ //
+ if (hasOutArgs && (varDscExposedStruct != nullptr))
+ {
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nAdding a backwards compatibility quirk for the 'PPP' issue\n");
+ }
+#endif // DEBUG
+
+ // Increase the exact size of this struct by 32 bytes
+ // This fixes the PPP backward compat issue
+ varDscExposedStruct->lvExactSize += 32;
+
+ return true;
+ }
+ return false;
+}
+#endif // _TARGET_AMD64_
+
+/*****************************************************************************/
+
+#ifdef DEBUG
+void* forceFrameJIT; // used to force to frame &useful for fastchecked debugging
+
+bool Compiler::skipMethod()
+{
+ static ConfigMethodRange fJitRange;
+ fJitRange.EnsureInit(JitConfig.JitRange());
+ assert(!fJitRange.Error());
+
+ // Normally JitConfig.JitRange() is null, we don't want to skip
+ // jitting any methods.
+ //
+ // So, the logic below relies on the fact that a null range string
+ // passed to ConfigMethodRange represents the set of all methods.
+
+ if (!fJitRange.Contains(info.compCompHnd, info.compMethodHnd))
+ {
+ return true;
+ }
+
+ if (JitConfig.JitExclude().contains(info.compMethodName, info.compClassName, &info.compMethodInfo->args))
+ {
+ return true;
+ }
+
+ if (!JitConfig.JitInclude().isEmpty() &&
+ !JitConfig.JitInclude().contains(info.compMethodName, info.compClassName, &info.compMethodInfo->args))
+ {
+ return true;
+ }
+
+ return false;
+}
+
+#endif
+
+/*****************************************************************************/
+
+int Compiler::compCompile(CORINFO_METHOD_HANDLE methodHnd,
+ CORINFO_MODULE_HANDLE classPtr,
+ COMP_HANDLE compHnd,
+ CORINFO_METHOD_INFO* methodInfo,
+ void** methodCodePtr,
+ ULONG* methodCodeSize,
+ CORJIT_FLAGS* compileFlags)
+{
+#ifdef FEATURE_JIT_METHOD_PERF
+ static bool checkedForJitTimeLog = false;
+
+ if (!checkedForJitTimeLog)
+ {
+ // Call into VM to get the config strings. FEATURE_JIT_METHOD_PERF is enabled for
+ // retail builds. Do not call the regular Config helper here as it would pull
+ // in a copy of the config parser into the clrjit.dll.
+ InterlockedCompareExchangeT(&Compiler::compJitTimeLogFilename, compHnd->getJitTimeLogFilename(), NULL);
+
+ // At a process or module boundary clear the file and start afresh.
+ JitTimer::PrintCsvHeader();
+
+ checkedForJitTimeLog = true;
+ }
+ if ((Compiler::compJitTimeLogFilename != NULL) || (JitTimeLogCsv() != NULL))
+ {
+ pCompJitTimer = JitTimer::Create(this, methodInfo->ILCodeSize);
+ }
+ else
+ {
+ pCompJitTimer = NULL;
+ }
+#endif // FEATURE_JIT_METHOD_PERF
+
+#ifdef DEBUG
+ Compiler* me = this;
+ forceFrameJIT = (void*)&me; // let us see the this pointer in fastchecked build
+ // set this early so we can use it without relying on random memory values
+ verbose = compIsForInlining() ? impInlineInfo->InlinerCompiler->verbose : false;
+
+ this->dumpIR = compIsForInlining() ? impInlineInfo->InlinerCompiler->dumpIR : false;
+ this->dumpIRPhase = compIsForInlining() ? impInlineInfo->InlinerCompiler->dumpIRPhase : nullptr;
+ this->dumpIRFormat = compIsForInlining() ? impInlineInfo->InlinerCompiler->dumpIRFormat : nullptr;
+ this->dumpIRTypes = compIsForInlining() ? impInlineInfo->InlinerCompiler->dumpIRTypes : false;
+ this->dumpIRLocals = compIsForInlining() ? impInlineInfo->InlinerCompiler->dumpIRLocals : false;
+ this->dumpIRRegs = compIsForInlining() ? impInlineInfo->InlinerCompiler->dumpIRRegs : false;
+ this->dumpIRSsa = compIsForInlining() ? impInlineInfo->InlinerCompiler->dumpIRSsa : false;
+ this->dumpIRValnums = compIsForInlining() ? impInlineInfo->InlinerCompiler->dumpIRValnums : false;
+ this->dumpIRCosts = compIsForInlining() ? impInlineInfo->InlinerCompiler->dumpIRCosts : false;
+ this->dumpIRFlags = compIsForInlining() ? impInlineInfo->InlinerCompiler->dumpIRFlags : false;
+ this->dumpIRKinds = compIsForInlining() ? impInlineInfo->InlinerCompiler->dumpIRKinds : false;
+ this->dumpIRNodes = compIsForInlining() ? impInlineInfo->InlinerCompiler->dumpIRNodes : false;
+ this->dumpIRNoLists = compIsForInlining() ? impInlineInfo->InlinerCompiler->dumpIRNoLists : false;
+ this->dumpIRNoLeafs = compIsForInlining() ? impInlineInfo->InlinerCompiler->dumpIRNoLeafs : false;
+ this->dumpIRNoStmts = compIsForInlining() ? impInlineInfo->InlinerCompiler->dumpIRNoStmts : false;
+ this->dumpIRTrees = compIsForInlining() ? impInlineInfo->InlinerCompiler->dumpIRTrees : false;
+ this->dumpIRLinear = compIsForInlining() ? impInlineInfo->InlinerCompiler->dumpIRLinear : false;
+ this->dumpIRDataflow = compIsForInlining() ? impInlineInfo->InlinerCompiler->dumpIRDataflow : false;
+ this->dumpIRBlockHeaders = compIsForInlining() ? impInlineInfo->InlinerCompiler->dumpIRBlockHeaders : NULL;
+ this->dumpIRExit = compIsForInlining() ? impInlineInfo->InlinerCompiler->dumpIRExit : NULL;
+
+#endif
+
+#if defined(DEBUG) || defined(INLINE_DATA)
+ info.compMethodHashPrivate = 0;
+#endif // defined(DEBUG) || defined(INLINE_DATA)
+
+#if FUNC_INFO_LOGGING
+ LPCWSTR tmpJitFuncInfoFilename = JitConfig.JitFuncInfoFile();
+
+ if (tmpJitFuncInfoFilename != nullptr)
+ {
+ LPCWSTR oldFuncInfoFileName =
+ InterlockedCompareExchangeT(&compJitFuncInfoFilename, tmpJitFuncInfoFilename, NULL);
+ if (oldFuncInfoFileName == nullptr)
+ {
+ assert(compJitFuncInfoFile == nullptr);
+ compJitFuncInfoFile = _wfopen(compJitFuncInfoFilename, W("a"));
+ if (compJitFuncInfoFile == nullptr)
+ {
+#if defined(DEBUG) && !defined(FEATURE_PAL) // no 'perror' in the PAL
+ perror("Failed to open JitFuncInfoLogFile");
+#endif // defined(DEBUG) && !defined(FEATURE_PAL)
+ }
+ }
+ }
+#endif // FUNC_INFO_LOGGING
+
+ // if (s_compMethodsCount==0) setvbuf(jitstdout, NULL, _IONBF, 0);
+
+ info.compCompHnd = compHnd;
+ info.compMethodHnd = methodHnd;
+ info.compMethodInfo = methodInfo;
+
+ // Do we have a matched VM? Or are we "abusing" the VM to help us do JIT work (such as using an x86 native VM
+ // with an ARM-targeting "altjit").
+ info.compMatchedVM = IMAGE_FILE_MACHINE_TARGET == info.compCompHnd->getExpectedTargetArchitecture();
+
+#if defined(ALT_JIT) && defined(UNIX_AMD64_ABI)
+ // ToDo: This code is to allow us to run UNIX codegen on Windows for now. Remove when appropriate.
+ // Make sure that the generated UNIX altjit code is skipped on Windows. The static jit codegen is used to run.
+ info.compMatchedVM = false;
+#endif // UNIX_AMD64_ABI
+
+#if COR_JIT_EE_VERSION > 460
+ compMaxUncheckedOffsetForNullObject = eeGetEEInfo()->maxUncheckedOffsetForNullObject;
+#else // COR_JIT_EE_VERSION <= 460
+ compMaxUncheckedOffsetForNullObject = MAX_UNCHECKED_OFFSET_FOR_NULL_OBJECT;
+#endif // COR_JIT_EE_VERSION > 460
+
+ // Set the context for token lookup.
+ if (compIsForInlining())
+ {
+ impTokenLookupContextHandle = impInlineInfo->tokenLookupContextHandle;
+
+ assert(impInlineInfo->inlineCandidateInfo->clsHandle == compHnd->getMethodClass(methodHnd));
+ info.compClassHnd = impInlineInfo->inlineCandidateInfo->clsHandle;
+
+ assert(impInlineInfo->inlineCandidateInfo->clsAttr == info.compCompHnd->getClassAttribs(info.compClassHnd));
+ // printf("%x != %x\n", impInlineInfo->inlineCandidateInfo->clsAttr,
+ // info.compCompHnd->getClassAttribs(info.compClassHnd));
+ info.compClassAttr = impInlineInfo->inlineCandidateInfo->clsAttr;
+ }
+ else
+ {
+ impTokenLookupContextHandle = MAKE_METHODCONTEXT(info.compMethodHnd);
+
+ info.compClassHnd = compHnd->getMethodClass(methodHnd);
+ info.compClassAttr = info.compCompHnd->getClassAttribs(info.compClassHnd);
+ }
+
+ info.compProfilerCallback = false; // Assume false until we are told to hook this method.
+
+#if defined(DEBUG) || defined(LATE_DISASM)
+ const char* classNamePtr;
+
+ info.compMethodName = eeGetMethodName(methodHnd, &classNamePtr);
+ unsigned len = (unsigned)roundUp(strlen(classNamePtr) + 1);
+ info.compClassName = (char*)compGetMem(len, CMK_DebugOnly);
+ strcpy_s((char*)info.compClassName, len, classNamePtr);
+
+ info.compFullName = eeGetMethodFullName(methodHnd);
+#endif // defined(DEBUG) || defined(LATE_DISASM)
+
+#ifdef DEBUG
+ if (!compIsForInlining())
+ {
+ JitTls::GetLogEnv()->setCompiler(this);
+ }
+
+ // Have we been told to be more selective in our Jitting?
+ if (skipMethod())
+ {
+ if (compIsForInlining())
+ {
+ compInlineResult->NoteFatal(InlineObservation::CALLEE_MARKED_AS_SKIPPED);
+ }
+ return CORJIT_SKIPPED;
+ }
+
+ // Opt-in to jit stress based on method hash ranges.
+ //
+ // Note the default (with JitStressRange not set) is that all
+ // methods will be subject to stress.
+ static ConfigMethodRange fJitStressRange;
+ fJitStressRange.EnsureInit(JitConfig.JitStressRange());
+ assert(!fJitStressRange.Error());
+ bRangeAllowStress = fJitStressRange.Contains(info.compCompHnd, info.compMethodHnd);
+
+#endif // DEBUG
+
+ // Set this before the first 'BADCODE'
+ // Skip verification where possible
+ tiVerificationNeeded = (compileFlags->corJitFlags & CORJIT_FLG_SKIP_VERIFICATION) == 0;
+
+ assert(!compIsForInlining() || !tiVerificationNeeded); // Inlinees must have been verified.
+
+ // assume the code is verifiable unless proven otherwise
+ tiIsVerifiableCode = TRUE;
+
+ tiRuntimeCalloutNeeded = false;
+
+ CorInfoInstantiationVerification instVerInfo = INSTVER_GENERIC_PASSED_VERIFICATION;
+
+ if (!compIsForInlining() && tiVerificationNeeded)
+ {
+ instVerInfo = compHnd->isInstantiationOfVerifiedGeneric(methodHnd);
+
+ if (tiVerificationNeeded && (instVerInfo == INSTVER_GENERIC_FAILED_VERIFICATION))
+ {
+ CorInfoCanSkipVerificationResult canSkipVerificationResult =
+ info.compCompHnd->canSkipMethodVerification(info.compMethodHnd);
+
+ switch (canSkipVerificationResult)
+ {
+ case CORINFO_VERIFICATION_CANNOT_SKIP:
+ // We cannot verify concrete instantiation.
+ // We can only verify the typical/open instantiation
+ // The VM should throw a VerificationException instead of allowing this.
+ NO_WAY("Verification of closed instantiations is not supported");
+ break;
+
+ case CORINFO_VERIFICATION_CAN_SKIP:
+ // The VM should first verify the open instantiation. If unverifiable code
+ // is detected, it should pass in CORJIT_FLG_SKIP_VERIFICATION.
+ assert(!"The VM should have used CORJIT_FLG_SKIP_VERIFICATION");
+ tiVerificationNeeded = false;
+ break;
+
+ case CORINFO_VERIFICATION_RUNTIME_CHECK:
+ // This is a concrete generic instantiation with unverifiable code, that also
+ // needs a runtime callout.
+ tiVerificationNeeded = false;
+ tiRuntimeCalloutNeeded = true;
+ break;
+
+ case CORINFO_VERIFICATION_DONT_JIT:
+ // We cannot verify concrete instantiation.
+ // We can only verify the typical/open instantiation
+ // The VM should throw a VerificationException instead of allowing this.
+ BADCODE("NGEN of unverifiable transparent code is not supported");
+ break;
+ }
+ }
+
+ // load any constraints for verification, noting any cycles to be rejected by the verifying importer
+ if (tiVerificationNeeded)
+ {
+ compHnd->initConstraintsForVerification(methodHnd, &info.hasCircularClassConstraints,
+ &info.hasCircularMethodConstraints);
+ }
+ }
+
+ /* Setup an error trap */
+
+ struct Param
+ {
+ Compiler* pThis;
+
+ CORINFO_MODULE_HANDLE classPtr;
+ COMP_HANDLE compHnd;
+ CORINFO_METHOD_INFO* methodInfo;
+ void** methodCodePtr;
+ ULONG* methodCodeSize;
+ CORJIT_FLAGS* compileFlags;
+
+ CorInfoInstantiationVerification instVerInfo;
+ int result;
+ } param;
+ param.pThis = this;
+ param.classPtr = classPtr;
+ param.compHnd = compHnd;
+ param.methodInfo = methodInfo;
+ param.methodCodePtr = methodCodePtr;
+ param.methodCodeSize = methodCodeSize;
+ param.compileFlags = compileFlags;
+ param.instVerInfo = instVerInfo;
+ param.result = CORJIT_INTERNALERROR;
+
+ setErrorTrap(compHnd, Param*, pParam, &param) // ERROR TRAP: Start normal block
+ {
+ pParam->result = pParam->pThis->compCompileHelper(pParam->classPtr, pParam->compHnd, pParam->methodInfo,
+ pParam->methodCodePtr, pParam->methodCodeSize,
+ pParam->compileFlags, pParam->instVerInfo);
+ }
+ finallyErrorTrap() // ERROR TRAP: The following block handles errors
+ {
+ /* Cleanup */
+
+ if (compIsForInlining())
+ {
+ goto DoneCleanUp;
+ }
+
+ /* Tell the emitter that we're done with this function */
+
+ genEmitter->emitEndCG();
+
+ DoneCleanUp:
+ compDone();
+ }
+ endErrorTrap() // ERROR TRAP: End
+
+ return param.result;
+}
+
+#if defined(DEBUG) || defined(INLINE_DATA)
+unsigned Compiler::Info::compMethodHash() const
+{
+ if (compMethodHashPrivate == 0)
+ {
+ compMethodHashPrivate = compCompHnd->getMethodHash(compMethodHnd);
+ }
+ return compMethodHashPrivate;
+}
+#endif // defined(DEBUG) || defined(INLINE_DATA)
+
+void Compiler::compCompileFinish()
+{
+#if defined(DEBUG) || MEASURE_NODE_SIZE || MEASURE_BLOCK_SIZE || DISPLAY_SIZES || CALL_ARG_STATS
+ genMethodCnt++;
+#endif
+
+#if MEASURE_MEM_ALLOC
+ {
+ // Grab the relevant lock.
+ CritSecHolder statsLock(s_memStatsLock);
+
+ // Make the updates.
+ genMemStats.nraTotalSizeAlloc = compGetAllocator()->getTotalBytesAllocated();
+ genMemStats.nraTotalSizeUsed = compGetAllocator()->getTotalBytesUsed();
+ s_aggMemStats.Add(genMemStats);
+ if (genMemStats.allocSz > s_maxCompMemStats.allocSz)
+ {
+ s_maxCompMemStats = genMemStats;
+ }
+ }
+
+#ifdef DEBUG
+ if (s_dspMemStats || verbose)
+ {
+ printf("\nAllocations for %s (MethodHash=%08x)\n", info.compFullName, info.compMethodHash());
+ genMemStats.Print(jitstdout);
+ }
+#endif // DEBUG
+#endif // MEASURE_MEM_ALLOC
+
+#if LOOP_HOIST_STATS
+ AddLoopHoistStats();
+#endif // LOOP_HOIST_STATS
+
+#if MEASURE_NODE_SIZE
+ genTreeNcntHist.record(static_cast<unsigned>(genNodeSizeStatsPerFunc.genTreeNodeCnt));
+ genTreeNsizHist.record(static_cast<unsigned>(genNodeSizeStatsPerFunc.genTreeNodeSize));
+#endif
+
+#if defined(DEBUG)
+ // Small methods should fit in ArenaAllocator::getDefaultPageSize(), or else
+ // we should bump up ArenaAllocator::getDefaultPageSize()
+
+ if ((info.compILCodeSize <= 32) && // Is it a reasonably small method?
+ (info.compNativeCodeSize < 512) && // Some trivial methods generate huge native code. eg. pushing a single huge
+ // struct
+ (impInlinedCodeSize <= 128) && // Is the the inlining reasonably bounded?
+ // Small methods cannot meaningfully have a big number of locals
+ // or arguments. We always track arguments at the start of
+ // the prolog which requires memory
+ (info.compLocalsCount <= 32) && (!opts.MinOpts()) && // We may have too many local variables, etc
+ (getJitStressLevel() == 0) && // We need extra memory for stress
+ !compAllocator->bypassHostAllocator() && // ArenaAllocator::getDefaultPageSize() is artificially low for
+ // DirectAlloc
+ (compAllocator->getTotalBytesAllocated() > (2 * ArenaAllocator::getDefaultPageSize())) &&
+// Factor of 2x is because data-structures are bigger under DEBUG
+#ifndef LEGACY_BACKEND
+ // RyuJIT backend needs memory tuning! TODO-Cleanup: remove this case when memory tuning is complete.
+ (compAllocator->getTotalBytesAllocated() > (10 * ArenaAllocator::getDefaultPageSize())) &&
+#endif
+ !verbose) // We allocate lots of memory to convert sets to strings for JitDump
+ {
+ genSmallMethodsNeedingExtraMemoryCnt++;
+
+ // Less than 1% of all methods should run into this.
+ // We cannot be more strict as there are always degenerate cases where we
+ // would need extra memory (like huge structs as locals - see lvaSetStruct()).
+ assert((genMethodCnt < 500) || (genSmallMethodsNeedingExtraMemoryCnt < (genMethodCnt / 100)));
+ }
+#endif // DEBUG
+
+#if defined(DEBUG) || defined(INLINE_DATA)
+
+ m_inlineStrategy->DumpData();
+ m_inlineStrategy->DumpXml();
+
+#endif
+
+#ifdef DEBUG
+ if (opts.dspOrder)
+ {
+ // mdMethodDef __stdcall CEEInfo::getMethodDefFromMethod(CORINFO_METHOD_HANDLE hMethod)
+ mdMethodDef currentMethodToken = info.compCompHnd->getMethodDefFromMethod(info.compMethodHnd);
+
+ unsigned profCallCount = 0;
+ if (((opts.eeFlags & CORJIT_FLG_BBOPT) != 0) && fgHaveProfileData())
+ {
+ assert(fgProfileBuffer[0].ILOffset == 0);
+ profCallCount = fgProfileBuffer[0].ExecutionCount;
+ }
+
+ static bool headerPrinted = false;
+ if (!headerPrinted)
+ {
+ // clang-format off
+ headerPrinted = true;
+ printf(" | Profiled | Exec- | Method has | calls | Num |LclV |AProp| CSE | Reg |bytes | %3s code size | \n", Target::g_tgtCPUName);
+ printf(" mdToken | | RGN | Count | EH | FRM | LOOP | NRM | IND | BBs | Cnt | Cnt | Cnt | Alloc | IL | HOT | COLD | method name \n");
+ printf("---------+-----+------+----------+----+-----+------+-----+-----+-----+-----+-----+-----+---------+------+-------+-------+-----------\n");
+ // 06001234 | PRF | HOT | 219 | EH | ebp | LOOP | 15 | 6 | 12 | 17 | 12 | 8 | 28 p2 | 145 | 211 | 123 | System.Example(int)
+ // clang-format on
+ }
+
+ printf("%08X | ", currentMethodToken);
+
+ CorInfoRegionKind regionKind = info.compMethodInfo->regionKind;
+
+ if (opts.altJit)
+ {
+ printf("ALT | ");
+ }
+ else if (fgHaveProfileData())
+ {
+ printf("PRF | ");
+ }
+ else
+ {
+ printf(" | ");
+ }
+
+ if (regionKind == CORINFO_REGION_NONE)
+ {
+ printf(" | ");
+ }
+ else if (regionKind == CORINFO_REGION_HOT)
+ {
+ printf(" HOT | ");
+ }
+ else if (regionKind == CORINFO_REGION_COLD)
+ {
+ printf("COLD | ");
+ }
+ else if (regionKind == CORINFO_REGION_JIT)
+ {
+ printf(" JIT | ");
+ }
+ else
+ {
+ printf("UNKN | ");
+ }
+
+ printf("%8d | ", profCallCount);
+
+ if (compHndBBtabCount > 0)
+ {
+ printf("EH | ");
+ }
+ else
+ {
+ printf(" | ");
+ }
+
+ if (rpFrameType == FT_EBP_FRAME)
+ {
+ printf("%3s | ", STR_FPBASE);
+ }
+ else if (rpFrameType == FT_ESP_FRAME)
+ {
+ printf("%3s | ", STR_SPBASE);
+ }
+#if DOUBLE_ALIGN
+ else if (rpFrameType == FT_DOUBLE_ALIGN_FRAME)
+ {
+ printf("dbl | ");
+ }
+#endif
+ else // (rpFrameType == FT_NOT_SET)
+ {
+ printf("??? | ");
+ }
+
+ if (fgHasLoops)
+ {
+ printf("LOOP |");
+ }
+ else
+ {
+ printf(" |");
+ }
+
+ printf(" %3d |", optCallCount);
+ printf(" %3d |", optIndirectCallCount);
+ printf(" %3d |", fgBBcountAtCodegen);
+ printf(" %3d |", lvaCount);
+
+ if (opts.MinOpts())
+ {
+ printf(" MinOpts |");
+ }
+ else
+ {
+ printf(" %3d |", optAssertionCount);
+#if FEATURE_ANYCSE
+ printf(" %3d |", optCSEcount);
+#else
+ printf(" %3d |", 0);
+#endif // FEATURE_ANYCSE
+ }
+
+#ifndef LEGACY_BACKEND
+ printf(" LSRA |"); // TODO-Cleanup: dump some interesting LSRA stat into the order file?
+#else // LEGACY_BACKEND
+ printf("%s%4d p%1d |", (tmpCount > 0) ? "T" : " ", rpStkPredict / BB_UNITY_WEIGHT, rpPasses);
+#endif // LEGACY_BACKEND
+ printf(" %4d |", info.compMethodInfo->ILCodeSize);
+ printf(" %5d |", info.compTotalHotCodeSize);
+ printf(" %5d |", info.compTotalColdCodeSize);
+
+ printf(" %s\n", eeGetMethodFullName(info.compMethodHnd));
+ printf(""); // in our logic this causes a flush
+ }
+
+ if (verbose)
+ {
+ printf("****** DONE compiling %s\n", info.compFullName);
+ printf(""); // in our logic this causes a flush
+ }
+
+ // Only call _DbgBreakCheck when we are jitting, not when we are ngen-ing
+ // For ngen the int3 or breakpoint instruction will be right at the
+ // start of the ngen method and we will stop when we execute it.
+ //
+ if ((opts.eeFlags & CORJIT_FLG_PREJIT) == 0)
+ {
+ if (compJitHaltMethod())
+ {
+#if !defined(_TARGET_ARM64_) && !defined(PLATFORM_UNIX)
+ // TODO-ARM64-NYI: re-enable this when we have an OS that supports a pop-up dialog
+
+ // Don't do an assert, but just put up the dialog box so we get just-in-time debugger
+ // launching. When you hit 'retry' it will continue and naturally stop at the INT 3
+ // that the JIT put in the code
+ _DbgBreakCheck(__FILE__, __LINE__, "JitHalt");
+#endif
+ }
+ }
+#endif // DEBUG
+}
+
+#ifdef PSEUDORANDOM_NOP_INSERTION
+// this is zlib adler32 checksum. source came from windows base
+
+#define BASE 65521L // largest prime smaller than 65536
+#define NMAX 5552
+// NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1
+
+#define DO1(buf, i) \
+ { \
+ s1 += buf[i]; \
+ s2 += s1; \
+ }
+#define DO2(buf, i) \
+ DO1(buf, i); \
+ DO1(buf, i + 1);
+#define DO4(buf, i) \
+ DO2(buf, i); \
+ DO2(buf, i + 2);
+#define DO8(buf, i) \
+ DO4(buf, i); \
+ DO4(buf, i + 4);
+#define DO16(buf) \
+ DO8(buf, 0); \
+ DO8(buf, 8);
+
+unsigned adler32(unsigned adler, char* buf, unsigned int len)
+{
+ unsigned int s1 = adler & 0xffff;
+ unsigned int s2 = (adler >> 16) & 0xffff;
+ int k;
+
+ if (buf == NULL)
+ return 1L;
+
+ while (len > 0)
+ {
+ k = len < NMAX ? len : NMAX;
+ len -= k;
+ while (k >= 16)
+ {
+ DO16(buf);
+ buf += 16;
+ k -= 16;
+ }
+ if (k != 0)
+ do
+ {
+ s1 += *buf++;
+ s2 += s1;
+ } while (--k);
+ s1 %= BASE;
+ s2 %= BASE;
+ }
+ return (s2 << 16) | s1;
+}
+#endif
+
+unsigned getMethodBodyChecksum(__in_z char* code, int size)
+{
+#ifdef PSEUDORANDOM_NOP_INSERTION
+ return adler32(0, code, size);
+#else
+ return 0;
+#endif
+}
+
+int Compiler::compCompileHelper(CORINFO_MODULE_HANDLE classPtr,
+ COMP_HANDLE compHnd,
+ CORINFO_METHOD_INFO* methodInfo,
+ void** methodCodePtr,
+ ULONG* methodCodeSize,
+ CORJIT_FLAGS* compileFlags,
+ CorInfoInstantiationVerification instVerInfo)
+{
+ CORINFO_METHOD_HANDLE methodHnd = info.compMethodHnd;
+
+ info.compCode = methodInfo->ILCode;
+ info.compILCodeSize = methodInfo->ILCodeSize;
+
+ if (info.compILCodeSize == 0)
+ {
+ BADCODE("code size is zero");
+ }
+
+ if (compIsForInlining())
+ {
+#ifdef DEBUG
+ unsigned methAttr_Old = impInlineInfo->inlineCandidateInfo->methAttr;
+ unsigned methAttr_New = info.compCompHnd->getMethodAttribs(info.compMethodHnd);
+ unsigned flagsToIgnore = CORINFO_FLG_DONT_INLINE | CORINFO_FLG_FORCEINLINE;
+ assert((methAttr_Old & (~flagsToIgnore)) == (methAttr_New & (~flagsToIgnore)));
+#endif
+
+ info.compFlags = impInlineInfo->inlineCandidateInfo->methAttr;
+ }
+ else
+ {
+ info.compFlags = info.compCompHnd->getMethodAttribs(info.compMethodHnd);
+#ifdef PSEUDORANDOM_NOP_INSERTION
+ info.compChecksum = getMethodBodyChecksum((char*)methodInfo->ILCode, methodInfo->ILCodeSize);
+#endif
+ }
+
+ // compInitOptions will set the correct verbose flag.
+
+ compInitOptions(compileFlags);
+
+#ifdef ALT_JIT
+ if (!compIsForInlining() && !opts.altJit)
+ {
+ // We're an altjit, but the COMPlus_AltJit configuration did not say to compile this method,
+ // so skip it.
+ return CORJIT_SKIPPED;
+ }
+#endif // ALT_JIT
+
+#ifdef DEBUG
+
+ if (verbose)
+ {
+ printf("IL to import:\n");
+ dumpILRange(info.compCode, info.compILCodeSize);
+ }
+
+#endif
+
+ // Check for COMPlus_AgressiveInlining
+ if (JitConfig.JitAggressiveInlining())
+ {
+ compDoAggressiveInlining = true;
+ }
+
+ if (compDoAggressiveInlining)
+ {
+ info.compFlags |= CORINFO_FLG_FORCEINLINE;
+ }
+
+#ifdef DEBUG
+
+ // Check for ForceInline stress.
+ if (compStressCompile(STRESS_FORCE_INLINE, 0))
+ {
+ info.compFlags |= CORINFO_FLG_FORCEINLINE;
+ }
+
+ if (compIsForInlining())
+ {
+ JITLOG((LL_INFO100000, "\nINLINER impTokenLookupContextHandle for %s is 0x%p.\n",
+ eeGetMethodFullName(info.compMethodHnd), dspPtr(impTokenLookupContextHandle)));
+ }
+
+ // Force verification if asked to do so
+ if (JitConfig.JitForceVer())
+ {
+ tiVerificationNeeded = (instVerInfo == INSTVER_NOT_INSTANTIATION);
+ }
+
+ if (tiVerificationNeeded)
+ {
+ JITLOG((LL_INFO10000, "tiVerificationNeeded initially set to true for %s\n", info.compFullName));
+ }
+#endif // DEBUG
+
+ /* Since tiVerificationNeeded can be turned off in the middle of
+ compiling a method, and it might have caused blocks to be queued up
+ for reimporting, impCanReimport can be used to check for reimporting. */
+
+ impCanReimport = (tiVerificationNeeded || compStressCompile(STRESS_CHK_REIMPORT, 15));
+
+ // Need security prolog/epilog callouts when there is a declarative security in the method.
+ tiSecurityCalloutNeeded = ((info.compFlags & CORINFO_FLG_NOSECURITYWRAP) == 0);
+
+ if (tiSecurityCalloutNeeded || (info.compFlags & CORINFO_FLG_SECURITYCHECK))
+ {
+ // We need to allocate the security object on the stack
+ // when the method being compiled has a declarative security
+ // (i.e. when CORINFO_FLG_NOSECURITYWRAP is reset for the current method).
+ // This is also the case when we inject a prolog and epilog in the method.
+ opts.compNeedSecurityCheck = true;
+ }
+
+ /* Initialize set a bunch of global values */
+
+ info.compScopeHnd = classPtr;
+ info.compXcptnsCount = methodInfo->EHcount;
+ info.compMaxStack = methodInfo->maxStack;
+ compHndBBtab = nullptr;
+ compHndBBtabCount = 0;
+ compHndBBtabAllocCount = 0;
+
+ info.compNativeCodeSize = 0;
+ info.compTotalHotCodeSize = 0;
+ info.compTotalColdCodeSize = 0;
+
+#ifdef DEBUG
+ compCurBB = nullptr;
+ lvaTable = nullptr;
+
+ // Reset node ID counter
+ compGenTreeID = 0;
+#endif
+
+ /* Initialize emitter */
+
+ if (!compIsForInlining())
+ {
+ codeGen->getEmitter()->emitBegCG(this, compHnd);
+ }
+
+ info.compIsStatic = (info.compFlags & CORINFO_FLG_STATIC) != 0;
+
+ info.compIsContextful = (info.compClassAttr & CORINFO_FLG_CONTEXTFUL) != 0;
+
+ info.compPublishStubParam = (opts.eeFlags & CORJIT_FLG_PUBLISH_SECRET_PARAM) != 0;
+
+ switch (methodInfo->args.getCallConv())
+ {
+ case CORINFO_CALLCONV_VARARG:
+ case CORINFO_CALLCONV_NATIVEVARARG:
+ info.compIsVarArgs = true;
+ break;
+ case CORINFO_CALLCONV_DEFAULT:
+ info.compIsVarArgs = false;
+ break;
+ default:
+ BADCODE("bad calling convention");
+ }
+ info.compRetNativeType = info.compRetType = JITtype2varType(methodInfo->args.retType);
+
+ info.compCallUnmanaged = 0;
+ info.compLvFrameListRoot = BAD_VAR_NUM;
+
+#if FEATURE_FIXED_OUT_ARGS
+ lvaOutgoingArgSpaceSize = 0;
+#endif
+
+ lvaGenericsContextUsed = false;
+
+ info.compInitMem = ((methodInfo->options & CORINFO_OPT_INIT_LOCALS) != 0);
+
+ /* Allocate the local variable table */
+
+ lvaInitTypeRef();
+
+ if (!compIsForInlining())
+ {
+ compInitDebuggingInfo();
+ }
+
+ const bool forceInline = !!(info.compFlags & CORINFO_FLG_FORCEINLINE);
+
+ if (!compIsForInlining() && (opts.eeFlags & CORJIT_FLG_PREJIT))
+ {
+ // We're prejitting the root method. We also will analyze it as
+ // a potential inline candidate.
+ InlineResult prejitResult(this, methodHnd, "prejit");
+
+ // Do the initial inline screen.
+ impCanInlineIL(methodHnd, methodInfo, forceInline, &prejitResult);
+
+ // Temporarily install the prejitResult as the
+ // compInlineResult so it's available to fgFindJumpTargets
+ // and can accumulate more observations as the IL is
+ // scanned.
+ //
+ // We don't pass prejitResult in as a parameter to avoid
+ // potential aliasing confusion -- the other call to
+ // fgFindBasicBlocks may have set up compInlineResult and
+ // the code in fgFindJumpTargets references that data
+ // member extensively.
+ assert(compInlineResult == nullptr);
+ assert(impInlineInfo == nullptr);
+ compInlineResult = &prejitResult;
+
+ // Find the basic blocks. We must do this regardless of
+ // inlineability, since we are prejitting this method.
+ //
+ // This will also update the status of this method as
+ // an inline candidate.
+ fgFindBasicBlocks();
+
+ // Undo the temporary setup.
+ assert(compInlineResult == &prejitResult);
+ compInlineResult = nullptr;
+
+ // If still a viable, discretionary inline, assess
+ // profitability.
+ if (prejitResult.IsDiscretionaryCandidate())
+ {
+ prejitResult.DetermineProfitability(methodInfo);
+ }
+
+ // Handle the results of the inline analysis.
+ if (prejitResult.IsFailure())
+ {
+ // This method is a bad inlinee according to our
+ // analysis. We will let the InlineResult destructor
+ // mark it as noinline in the prejit image to save the
+ // jit some work.
+ //
+ // This decision better not be context-dependent.
+ assert(prejitResult.IsNever());
+ }
+ else
+ {
+ // This looks like a viable inline candidate. Since
+ // we're not actually inlining, don't report anything.
+ prejitResult.SetReported();
+ }
+ }
+ else
+ {
+ // We are jitting the root method, or inlining.
+ fgFindBasicBlocks();
+ }
+
+ // If we're inlining and the candidate is bad, bail out.
+ if (compDonotInline())
+ {
+ goto _Next;
+ }
+
+ compSetOptimizationLevel();
+
+#if COUNT_BASIC_BLOCKS
+ bbCntTable.record(fgBBcount);
+
+ if (fgBBcount == 1)
+ {
+ bbOneBBSizeTable.record(methodInfo->ILCodeSize);
+ }
+#endif // COUNT_BASIC_BLOCKS
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Basic block list for '%s'\n", info.compFullName);
+ fgDispBasicBlocks();
+ }
+#endif
+
+#ifdef DEBUG
+ /* Give the function a unique number */
+
+ if (opts.disAsm || opts.dspEmit || verbose)
+ {
+ s_compMethodsCount = ~info.compMethodHash() & 0xffff;
+ }
+ else
+ {
+ s_compMethodsCount++;
+ }
+#endif
+
+ if (compIsForInlining())
+ {
+ compInlineResult->NoteInt(InlineObservation::CALLEE_NUMBER_OF_BASIC_BLOCKS, fgBBcount);
+
+ if (compInlineResult->IsFailure())
+ {
+ goto _Next;
+ }
+ }
+
+#ifdef DEBUG
+ if (JitConfig.DumpJittedMethods() == 1 && !compIsForInlining())
+ {
+ printf("Compiling %4d %s::%s, IL size = %u, hsh=0x%x\n", Compiler::jitTotalMethodCompiled, info.compClassName,
+ info.compMethodName, info.compILCodeSize, info.compMethodHash());
+ }
+ if (compIsForInlining())
+ {
+ compGenTreeID = impInlineInfo->InlinerCompiler->compGenTreeID;
+ }
+#endif
+
+ compCompile(methodCodePtr, methodCodeSize, compileFlags);
+
+#ifdef DEBUG
+ if (compIsForInlining())
+ {
+ impInlineInfo->InlinerCompiler->compGenTreeID = compGenTreeID;
+ }
+#endif
+
+_Next:
+
+ if (compDonotInline())
+ {
+ // Verify we have only one inline result in play.
+ assert(impInlineInfo->inlineResult == compInlineResult);
+ }
+
+ if (!compIsForInlining())
+ {
+ compCompileFinish();
+
+ // Did we just compile for a target architecture that the VM isn't expecting? If so, the VM
+ // can't used the generated code (and we better be an AltJit!).
+
+ if (!info.compMatchedVM)
+ {
+ return CORJIT_SKIPPED;
+ }
+
+#ifdef ALT_JIT
+#ifdef DEBUG
+ if (JitConfig.RunAltJitCode() == 0)
+ {
+ return CORJIT_SKIPPED;
+ }
+#endif // DEBUG
+#endif // ALT_JIT
+ }
+
+ /* Success! */
+ return CORJIT_OK;
+}
+
+/*****************************************************************************/
+#ifdef DEBUGGING_SUPPORT
+/*****************************************************************************/
+
+//------------------------------------------------------------------------
+// compFindLocalVarLinear: Linear search for variable's scope containing offset.
+//
+// Arguments:
+// varNum The variable number to search for in the array of scopes.
+// offs The offset value which should occur within the life of the variable.
+//
+// Return Value:
+// VarScopeDsc* of a matching variable that contains the offset within its life
+// begin and life end or nullptr when there is no match found.
+//
+// Description:
+// Linear search for matching variables with their life begin and end containing
+// the offset.
+// or NULL if one couldn't be found.
+//
+// Note:
+// Usually called for scope count = 4. Could be called for values upto 8.
+//
+VarScopeDsc* Compiler::compFindLocalVarLinear(unsigned varNum, unsigned offs)
+{
+ for (unsigned i = 0; i < info.compVarScopesCount; i++)
+ {
+ VarScopeDsc* dsc = &info.compVarScopes[i];
+ if ((dsc->vsdVarNum == varNum) && (dsc->vsdLifeBeg <= offs) && (dsc->vsdLifeEnd > offs))
+ {
+ return dsc;
+ }
+ }
+ return nullptr;
+}
+
+//------------------------------------------------------------------------
+// compFindLocalVar: Search for variable's scope containing offset.
+//
+// Arguments:
+// varNum The variable number to search for in the array of scopes.
+// offs The offset value which should occur within the life of the variable.
+//
+// Return Value:
+// VarScopeDsc* of a matching variable that contains the offset within its life
+// begin and life end.
+// or NULL if one couldn't be found.
+//
+// Description:
+// Linear search for matching variables with their life begin and end containing
+// the offset only when the scope count is < MAX_LINEAR_FIND_LCL_SCOPELIST,
+// else use the hashtable lookup.
+//
+VarScopeDsc* Compiler::compFindLocalVar(unsigned varNum, unsigned offs)
+{
+ if (info.compVarScopesCount < MAX_LINEAR_FIND_LCL_SCOPELIST)
+ {
+ return compFindLocalVarLinear(varNum, offs);
+ }
+ else
+ {
+ VarScopeDsc* ret = compFindLocalVar(varNum, offs, offs);
+ assert(ret == compFindLocalVarLinear(varNum, offs));
+ return ret;
+ }
+}
+
+//------------------------------------------------------------------------
+// compFindLocalVar: Search for variable's scope containing offset.
+//
+// Arguments:
+// varNum The variable number to search for in the array of scopes.
+// lifeBeg The life begin of the variable's scope
+// lifeEnd The life end of the variable's scope
+//
+// Return Value:
+// VarScopeDsc* of a matching variable that contains the offset within its life
+// begin and life end, or NULL if one couldn't be found.
+//
+// Description:
+// Following are the steps used:
+// 1. Index into the hashtable using varNum.
+// 2. Iterate through the linked list at index varNum to find a matching
+// var scope.
+//
+VarScopeDsc* Compiler::compFindLocalVar(unsigned varNum, unsigned lifeBeg, unsigned lifeEnd)
+{
+ assert(compVarScopeMap != nullptr);
+
+ VarScopeMapInfo* info;
+ if (compVarScopeMap->Lookup(varNum, &info))
+ {
+ VarScopeListNode* list = info->head;
+ while (list != nullptr)
+ {
+ if ((list->data->vsdLifeBeg <= lifeBeg) && (list->data->vsdLifeEnd > lifeEnd))
+ {
+ return list->data;
+ }
+ list = list->next;
+ }
+ }
+ return nullptr;
+}
+
+//-------------------------------------------------------------------------
+// compInitVarScopeMap: Create a scope map so it can be looked up by varNum
+//
+// Description:
+// Map.K => Map.V :: varNum => List(ScopeDsc)
+//
+// Create a scope map that can be indexed by varNum and can be iterated
+// on it's values to look for matching scope when given an offs or
+// lifeBeg and lifeEnd.
+//
+// Notes:
+// 1. Build the map only when we think linear search is slow, i.e.,
+// MAX_LINEAR_FIND_LCL_SCOPELIST is large.
+// 2. Linked list preserves original array order.
+//
+void Compiler::compInitVarScopeMap()
+{
+ if (info.compVarScopesCount < MAX_LINEAR_FIND_LCL_SCOPELIST)
+ {
+ return;
+ }
+
+ assert(compVarScopeMap == nullptr);
+
+ compVarScopeMap = new (getAllocator()) VarNumToScopeDscMap(getAllocator());
+
+ // 599 prime to limit huge allocations; for ex: duplicated scopes on single var.
+ compVarScopeMap->Reallocate(min(info.compVarScopesCount, 599));
+
+ for (unsigned i = 0; i < info.compVarScopesCount; ++i)
+ {
+ unsigned varNum = info.compVarScopes[i].vsdVarNum;
+
+ VarScopeListNode* node = VarScopeListNode::Create(&info.compVarScopes[i], getAllocator());
+
+ // Index by varNum and if the list exists append "node" to the "list".
+ VarScopeMapInfo* info;
+ if (compVarScopeMap->Lookup(varNum, &info))
+ {
+ info->tail->next = node;
+ info->tail = node;
+ }
+ // Create a new list.
+ else
+ {
+ info = VarScopeMapInfo::Create(node, getAllocator());
+ compVarScopeMap->Set(varNum, info);
+ }
+ }
+}
+
+static int __cdecl genCmpLocalVarLifeBeg(const void* elem1, const void* elem2)
+{
+ return (*((VarScopeDsc**)elem1))->vsdLifeBeg - (*((VarScopeDsc**)elem2))->vsdLifeBeg;
+}
+
+static int __cdecl genCmpLocalVarLifeEnd(const void* elem1, const void* elem2)
+{
+ return (*((VarScopeDsc**)elem1))->vsdLifeEnd - (*((VarScopeDsc**)elem2))->vsdLifeEnd;
+}
+
+inline void Compiler::compInitScopeLists()
+{
+ if (info.compVarScopesCount == 0)
+ {
+ compEnterScopeList = compExitScopeList = nullptr;
+ return;
+ }
+
+ // Populate the 'compEnterScopeList' and 'compExitScopeList' lists
+
+ compEnterScopeList = new (this, CMK_DebugInfo) VarScopeDsc*[info.compVarScopesCount];
+ compExitScopeList = new (this, CMK_DebugInfo) VarScopeDsc*[info.compVarScopesCount];
+
+ for (unsigned i = 0; i < info.compVarScopesCount; i++)
+ {
+ compEnterScopeList[i] = compExitScopeList[i] = &info.compVarScopes[i];
+ }
+
+ qsort(compEnterScopeList, info.compVarScopesCount, sizeof(*compEnterScopeList), genCmpLocalVarLifeBeg);
+ qsort(compExitScopeList, info.compVarScopesCount, sizeof(*compExitScopeList), genCmpLocalVarLifeEnd);
+}
+
+void Compiler::compResetScopeLists()
+{
+ if (info.compVarScopesCount == 0)
+ {
+ return;
+ }
+
+ assert(compEnterScopeList && compExitScopeList);
+
+ compNextEnterScope = compNextExitScope = 0;
+}
+
+VarScopeDsc* Compiler::compGetNextEnterScope(unsigned offs, bool scan)
+{
+ assert(info.compVarScopesCount);
+ assert(compEnterScopeList && compExitScopeList);
+
+ if (compNextEnterScope < info.compVarScopesCount)
+ {
+ assert(compEnterScopeList[compNextEnterScope]);
+ unsigned nextEnterOff = compEnterScopeList[compNextEnterScope]->vsdLifeBeg;
+ assert(scan || (offs <= nextEnterOff));
+
+ if (!scan)
+ {
+ if (offs == nextEnterOff)
+ {
+ return compEnterScopeList[compNextEnterScope++];
+ }
+ }
+ else
+ {
+ if (nextEnterOff <= offs)
+ {
+ return compEnterScopeList[compNextEnterScope++];
+ }
+ }
+ }
+
+ return nullptr;
+}
+
+VarScopeDsc* Compiler::compGetNextExitScope(unsigned offs, bool scan)
+{
+ assert(info.compVarScopesCount);
+ assert(compEnterScopeList && compExitScopeList);
+
+ if (compNextExitScope < info.compVarScopesCount)
+ {
+ assert(compExitScopeList[compNextExitScope]);
+ unsigned nextExitOffs = compExitScopeList[compNextExitScope]->vsdLifeEnd;
+ assert(scan || (offs <= nextExitOffs));
+
+ if (!scan)
+ {
+ if (offs == nextExitOffs)
+ {
+ return compExitScopeList[compNextExitScope++];
+ }
+ }
+ else
+ {
+ if (nextExitOffs <= offs)
+ {
+ return compExitScopeList[compNextExitScope++];
+ }
+ }
+ }
+
+ return nullptr;
+}
+
+// The function will call the callback functions for scopes with boundaries
+// at instrs from the current status of the scope lists to 'offset',
+// ordered by instrs.
+
+void Compiler::compProcessScopesUntil(unsigned offset,
+ VARSET_TP* inScope,
+ void (Compiler::*enterScopeFn)(VARSET_TP* inScope, VarScopeDsc*),
+ void (Compiler::*exitScopeFn)(VARSET_TP* inScope, VarScopeDsc*))
+{
+ assert(offset != BAD_IL_OFFSET);
+ assert(inScope != nullptr);
+
+ bool foundExit = false, foundEnter = true;
+ VarScopeDsc* scope;
+ VarScopeDsc* nextExitScope = nullptr;
+ VarScopeDsc* nextEnterScope = nullptr;
+ unsigned offs = offset, curEnterOffs = 0;
+
+ goto START_FINDING_SCOPES;
+
+ // We need to determine the scopes which are open for the current block.
+ // This loop walks over the missing blocks between the current and the
+ // previous block, keeping the enter and exit offsets in lockstep.
+
+ do
+ {
+ foundExit = foundEnter = false;
+
+ if (nextExitScope)
+ {
+ (this->*exitScopeFn)(inScope, nextExitScope);
+ nextExitScope = nullptr;
+ foundExit = true;
+ }
+
+ offs = nextEnterScope ? nextEnterScope->vsdLifeBeg : offset;
+
+ while ((scope = compGetNextExitScope(offs, true)) != nullptr)
+ {
+ foundExit = true;
+
+ if (!nextEnterScope || scope->vsdLifeEnd > nextEnterScope->vsdLifeBeg)
+ {
+ // We overshot the last found Enter scope. Save the scope for later
+ // and find an entering scope
+
+ nextExitScope = scope;
+ break;
+ }
+
+ (this->*exitScopeFn)(inScope, scope);
+ }
+
+ if (nextEnterScope)
+ {
+ (this->*enterScopeFn)(inScope, nextEnterScope);
+ curEnterOffs = nextEnterScope->vsdLifeBeg;
+ nextEnterScope = nullptr;
+ foundEnter = true;
+ }
+
+ offs = nextExitScope ? nextExitScope->vsdLifeEnd : offset;
+
+ START_FINDING_SCOPES:
+
+ while ((scope = compGetNextEnterScope(offs, true)) != nullptr)
+ {
+ foundEnter = true;
+
+ if ((nextExitScope && scope->vsdLifeBeg >= nextExitScope->vsdLifeEnd) || (scope->vsdLifeBeg > curEnterOffs))
+ {
+ // We overshot the last found exit scope. Save the scope for later
+ // and find an exiting scope
+
+ nextEnterScope = scope;
+ break;
+ }
+
+ (this->*enterScopeFn)(inScope, scope);
+
+ if (!nextExitScope)
+ {
+ curEnterOffs = scope->vsdLifeBeg;
+ }
+ }
+ } while (foundExit || foundEnter);
+}
+
+/*****************************************************************************/
+#endif // DEBUGGING_SUPPORT
+/*****************************************************************************/
+
+#if defined(DEBUGGING_SUPPORT) && defined(DEBUG)
+
+void Compiler::compDispScopeLists()
+{
+ unsigned i;
+
+ printf("Local variable scopes = %d\n", info.compVarScopesCount);
+
+ if (info.compVarScopesCount)
+ {
+ printf(" \tVarNum \tLVNum \t Name \tBeg \tEnd\n");
+ }
+
+ printf("Sorted by enter scope:\n");
+ for (i = 0; i < info.compVarScopesCount; i++)
+ {
+ VarScopeDsc* varScope = compEnterScopeList[i];
+ assert(varScope);
+ printf("%2d: \t%02Xh \t%02Xh \t%10s \t%03Xh \t%03Xh", i, varScope->vsdVarNum, varScope->vsdLVnum,
+ VarNameToStr(varScope->vsdName) == nullptr ? "UNKNOWN" : VarNameToStr(varScope->vsdName),
+ varScope->vsdLifeBeg, varScope->vsdLifeEnd);
+
+ if (compNextEnterScope == i)
+ {
+ printf(" <-- next enter scope");
+ }
+
+ printf("\n");
+ }
+
+ printf("Sorted by exit scope:\n");
+ for (i = 0; i < info.compVarScopesCount; i++)
+ {
+ VarScopeDsc* varScope = compExitScopeList[i];
+ assert(varScope);
+ printf("%2d: \t%02Xh \t%02Xh \t%10s \t%03Xh \t%03Xh", i, varScope->vsdVarNum, varScope->vsdLVnum,
+ VarNameToStr(varScope->vsdName) == nullptr ? "UNKNOWN" : VarNameToStr(varScope->vsdName),
+ varScope->vsdLifeBeg, varScope->vsdLifeEnd);
+
+ if (compNextExitScope == i)
+ {
+ printf(" <-- next exit scope");
+ }
+
+ printf("\n");
+ }
+}
+
+#endif
+
+#if defined(DEBUG)
+
+void Compiler::compDispLocalVars()
+{
+ printf("info.compVarScopesCount = %d\n", info.compVarScopesCount);
+
+ if (info.compVarScopesCount > 0)
+ {
+ printf(" \tVarNum \tLVNum \t Name \tBeg \tEnd\n");
+ }
+
+ for (unsigned i = 0; i < info.compVarScopesCount; i++)
+ {
+ VarScopeDsc* varScope = &info.compVarScopes[i];
+ printf("%2d: \t%02Xh \t%02Xh \t%10s \t%03Xh \t%03Xh\n", i, varScope->vsdVarNum, varScope->vsdLVnum,
+ VarNameToStr(varScope->vsdName) == nullptr ? "UNKNOWN" : VarNameToStr(varScope->vsdName),
+ varScope->vsdLifeBeg, varScope->vsdLifeEnd);
+ }
+}
+
+#endif
+
+/*****************************************************************************/
+
+// Compile a single method
+
+int jitNativeCode(CORINFO_METHOD_HANDLE methodHnd,
+ CORINFO_MODULE_HANDLE classPtr,
+ COMP_HANDLE compHnd,
+ CORINFO_METHOD_INFO* methodInfo,
+ void** methodCodePtr,
+ ULONG* methodCodeSize,
+ CORJIT_FLAGS* compileFlags,
+ void* inlineInfoPtr)
+{
+ //
+ // A non-NULL inlineInfo means we are compiling the inlinee method.
+ //
+ InlineInfo* inlineInfo = (InlineInfo*)inlineInfoPtr;
+
+ bool jitFallbackCompile = false;
+START:
+ int result = CORJIT_INTERNALERROR;
+
+ ArenaAllocator* pAlloc = nullptr;
+ ArenaAllocator alloc;
+
+ if (inlineInfo)
+ {
+ // Use inliner's memory allocator when compiling the inlinee.
+ pAlloc = inlineInfo->InlinerCompiler->compGetAllocator();
+ }
+ else
+ {
+ IEEMemoryManager* pMemoryManager = compHnd->getMemoryManager();
+
+ // Try to reuse the pre-inited allocator
+ pAlloc = ArenaAllocator::getPooledAllocator(pMemoryManager);
+
+ if (pAlloc == nullptr)
+ {
+ alloc = ArenaAllocator(pMemoryManager);
+ pAlloc = &alloc;
+ }
+ }
+
+ Compiler* pComp;
+ pComp = nullptr;
+
+ struct Param
+ {
+ Compiler* pComp;
+ ArenaAllocator* pAlloc;
+ ArenaAllocator* alloc;
+ bool jitFallbackCompile;
+
+ CORINFO_METHOD_HANDLE methodHnd;
+ CORINFO_MODULE_HANDLE classPtr;
+ COMP_HANDLE compHnd;
+ CORINFO_METHOD_INFO* methodInfo;
+ void** methodCodePtr;
+ ULONG* methodCodeSize;
+ CORJIT_FLAGS* compileFlags;
+ InlineInfo* inlineInfo;
+
+ int result;
+ } param;
+ param.pComp = nullptr;
+ param.pAlloc = pAlloc;
+ param.alloc = &alloc;
+ param.jitFallbackCompile = jitFallbackCompile;
+ param.methodHnd = methodHnd;
+ param.classPtr = classPtr;
+ param.compHnd = compHnd;
+ param.methodInfo = methodInfo;
+ param.methodCodePtr = methodCodePtr;
+ param.methodCodeSize = methodCodeSize;
+ param.compileFlags = compileFlags;
+ param.inlineInfo = inlineInfo;
+ param.result = result;
+
+ setErrorTrap(compHnd, Param*, pParamOuter, &param)
+ {
+ setErrorTrap(nullptr, Param*, pParam, pParamOuter)
+ {
+ if (pParam->inlineInfo)
+ {
+ // Lazily create the inlinee compiler object
+ if (pParam->inlineInfo->InlinerCompiler->InlineeCompiler == nullptr)
+ {
+ pParam->inlineInfo->InlinerCompiler->InlineeCompiler =
+ (Compiler*)pParam->pAlloc->allocateMemory(roundUp(sizeof(*pParam->pComp)));
+ }
+
+ // Use the inlinee compiler object
+ pParam->pComp = pParam->inlineInfo->InlinerCompiler->InlineeCompiler;
+#ifdef DEBUG
+// memset(pParam->pComp, 0xEE, sizeof(Compiler));
+#endif
+ }
+ else
+ {
+ // Allocate create the inliner compiler object
+ pParam->pComp = (Compiler*)pParam->pAlloc->allocateMemory(roundUp(sizeof(*pParam->pComp)));
+ }
+
+ // push this compiler on the stack (TLS)
+ pParam->pComp->prevCompiler = JitTls::GetCompiler();
+ JitTls::SetCompiler(pParam->pComp);
+
+// PREFIX_ASSUME gets turned into ASSERT_CHECK and we cannot have it here
+#if defined(_PREFAST_) || defined(_PREFIX_)
+ PREFIX_ASSUME(pParam->pComp != NULL);
+#else
+ assert(pParam->pComp != nullptr);
+#endif
+
+ pParam->pComp->compInit(pParam->pAlloc, pParam->inlineInfo);
+
+#ifdef DEBUG
+ pParam->pComp->jitFallbackCompile = pParam->jitFallbackCompile;
+#endif
+
+ // Now generate the code
+ pParam->result =
+ pParam->pComp->compCompile(pParam->methodHnd, pParam->classPtr, pParam->compHnd, pParam->methodInfo,
+ pParam->methodCodePtr, pParam->methodCodeSize, pParam->compileFlags);
+ }
+ finallyErrorTrap()
+ {
+ // Add a dummy touch to pComp so that it is kept alive, and is easy to get to
+ // during debugging since all other data can be obtained through it.
+ //
+ if (pParamOuter->pComp) // If OOM is thrown when allocating memory for pComp, we will end up here.
+ // In that case, pComp is still NULL.
+ {
+ pParamOuter->pComp->info.compCode = nullptr;
+
+ // pop the compiler off the TLS stack only if it was linked above
+ assert(JitTls::GetCompiler() == pParamOuter->pComp);
+ JitTls::SetCompiler(JitTls::GetCompiler()->prevCompiler);
+ }
+
+ if (pParamOuter->inlineInfo == nullptr)
+ {
+ // Free up the allocator we were using
+ pParamOuter->pAlloc->destroy();
+ }
+ }
+ endErrorTrap()
+ }
+ impJitErrorTrap()
+ {
+ // If we were looking at an inlinee....
+ if (inlineInfo != nullptr)
+ {
+ // Note that we failed to compile the inlinee, and that
+ // there's no point trying to inline it again anywhere else.
+ inlineInfo->inlineResult->NoteFatal(InlineObservation::CALLEE_COMPILATION_ERROR);
+ }
+ param.result = __errc;
+ }
+ endErrorTrap()
+
+ result = param.result;
+
+ if (!inlineInfo && (result == CORJIT_INTERNALERROR || result == CORJIT_RECOVERABLEERROR) && !jitFallbackCompile)
+ {
+ // If we failed the JIT, reattempt with debuggable code.
+ jitFallbackCompile = true;
+
+ // Update the flags for 'safer' code generation.
+ compileFlags->corJitFlags |= CORJIT_FLG_MIN_OPT;
+ compileFlags->corJitFlags &= ~(CORJIT_FLG_SIZE_OPT | CORJIT_FLG_SPEED_OPT);
+
+ goto START;
+ }
+
+ return result;
+}
+
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+
+// GetTypeFromClassificationAndSizes:
+// Returns the type of the eightbyte accounting for the classification and size of the eightbyte.
+//
+// args:
+// classType: classification type
+// size: size of the eightbyte.
+//
+// static
+var_types Compiler::GetTypeFromClassificationAndSizes(SystemVClassificationType classType, int size)
+{
+ var_types type = TYP_UNKNOWN;
+ switch (classType)
+ {
+ case SystemVClassificationTypeInteger:
+ if (size == 1)
+ {
+ type = TYP_BYTE;
+ }
+ else if (size <= 2)
+ {
+ type = TYP_SHORT;
+ }
+ else if (size <= 4)
+ {
+ type = TYP_INT;
+ }
+ else if (size <= 8)
+ {
+ type = TYP_LONG;
+ }
+ else
+ {
+ assert(false && "GetTypeFromClassificationAndSizes Invalid Integer classification type.");
+ }
+ break;
+ case SystemVClassificationTypeIntegerReference:
+ type = TYP_REF;
+ break;
+ case SystemVClassificationTypeIntegerByRef:
+ type = TYP_BYREF;
+ break;
+ case SystemVClassificationTypeSSE:
+ if (size <= 4)
+ {
+ type = TYP_FLOAT;
+ }
+ else if (size <= 8)
+ {
+ type = TYP_DOUBLE;
+ }
+ else
+ {
+ assert(false && "GetTypeFromClassificationAndSizes Invalid SSE classification type.");
+ }
+ break;
+
+ default:
+ assert(false && "GetTypeFromClassificationAndSizes Invalid classification type.");
+ break;
+ }
+
+ return type;
+}
+
+//-------------------------------------------------------------------
+// GetEightByteType: Returns the type of eightbyte slot of a struct
+//
+// Arguments:
+// structDesc - struct classification description.
+// slotNum - eightbyte slot number for the struct.
+//
+// Return Value:
+// type of the eightbyte slot of the struct
+//
+// static
+var_types Compiler::GetEightByteType(const SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR& structDesc,
+ unsigned slotNum)
+{
+ var_types eightByteType = TYP_UNDEF;
+ unsigned len = structDesc.eightByteSizes[slotNum];
+
+ switch (structDesc.eightByteClassifications[slotNum])
+ {
+ case SystemVClassificationTypeInteger:
+ // See typelist.h for jit type definition.
+ // All the types of size < 4 bytes are of jit type TYP_INT.
+ if (structDesc.eightByteSizes[slotNum] <= 4)
+ {
+ eightByteType = TYP_INT;
+ }
+ else if (structDesc.eightByteSizes[slotNum] <= 8)
+ {
+ eightByteType = TYP_LONG;
+ }
+ else
+ {
+ assert(false && "GetEightByteType Invalid Integer classification type.");
+ }
+ break;
+ case SystemVClassificationTypeIntegerReference:
+ assert(len == REGSIZE_BYTES);
+ eightByteType = TYP_REF;
+ break;
+ case SystemVClassificationTypeIntegerByRef:
+ assert(len == REGSIZE_BYTES);
+ eightByteType = TYP_BYREF;
+ break;
+ case SystemVClassificationTypeSSE:
+ if (structDesc.eightByteSizes[slotNum] <= 4)
+ {
+ eightByteType = TYP_FLOAT;
+ }
+ else if (structDesc.eightByteSizes[slotNum] <= 8)
+ {
+ eightByteType = TYP_DOUBLE;
+ }
+ else
+ {
+ assert(false && "GetEightByteType Invalid SSE classification type.");
+ }
+ break;
+ default:
+ assert(false && "GetEightByteType Invalid classification type.");
+ break;
+ }
+
+ return eightByteType;
+}
+
+//------------------------------------------------------------------------------------------------------
+// GetStructTypeOffset: Gets the type, size and offset of the eightbytes of a struct for System V systems.
+//
+// Arguments:
+// 'structDesc' - struct description
+// 'type0' - out param; returns the type of the first eightbyte.
+// 'type1' - out param; returns the type of the second eightbyte.
+// 'offset0' - out param; returns the offset of the first eightbyte.
+// 'offset1' - out param; returns the offset of the second eightbyte.
+//
+// static
+void Compiler::GetStructTypeOffset(const SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR& structDesc,
+ var_types* type0,
+ var_types* type1,
+ unsigned __int8* offset0,
+ unsigned __int8* offset1)
+{
+ *offset0 = structDesc.eightByteOffsets[0];
+ *offset1 = structDesc.eightByteOffsets[1];
+
+ *type0 = TYP_UNKNOWN;
+ *type1 = TYP_UNKNOWN;
+
+ // Set the first eightbyte data
+ if (structDesc.eightByteCount >= 1)
+ {
+ *type0 = GetEightByteType(structDesc, 0);
+ }
+
+ // Set the second eight byte data
+ if (structDesc.eightByteCount == 2)
+ {
+ *type1 = GetEightByteType(structDesc, 1);
+ }
+}
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+
+/*****************************************************************************/
+/*****************************************************************************/
+
+#ifdef DEBUG
+Compiler::NodeToIntMap* Compiler::FindReachableNodesInNodeTestData()
+{
+ NodeToIntMap* reachable = new (getAllocatorDebugOnly()) NodeToIntMap(getAllocatorDebugOnly());
+
+ if (m_nodeTestData == nullptr)
+ {
+ return reachable;
+ }
+
+ // Otherwise, iterate.
+
+ for (BasicBlock* block = fgFirstBB; block != nullptr; block = block->bbNext)
+ {
+ for (GenTreePtr stmt = block->FirstNonPhiDef(); stmt != nullptr; stmt = stmt->gtNext)
+ {
+ for (GenTreePtr tree = stmt->gtStmt.gtStmtList; tree; tree = tree->gtNext)
+ {
+ TestLabelAndNum tlAndN;
+
+ // For call nodes, translate late args to what they stand for.
+ if (tree->OperGet() == GT_CALL)
+ {
+ GenTreeCall* call = tree->AsCall();
+ GenTreeArgList* args = call->gtCallArgs;
+ unsigned i = 0;
+ while (args != nullptr)
+ {
+ GenTreePtr arg = args->Current();
+ if (arg->gtFlags & GTF_LATE_ARG)
+ {
+ // Find the corresponding late arg.
+ GenTreePtr lateArg = nullptr;
+ for (unsigned j = 0; j < call->fgArgInfo->ArgCount(); j++)
+ {
+ if (call->fgArgInfo->ArgTable()[j]->argNum == i)
+ {
+ lateArg = call->fgArgInfo->ArgTable()[j]->node;
+ break;
+ }
+ }
+ assert(lateArg != nullptr);
+ if (GetNodeTestData()->Lookup(lateArg, &tlAndN))
+ {
+ reachable->Set(lateArg, 0);
+ }
+ }
+ i++;
+ args = args->Rest();
+ }
+ }
+
+ if (GetNodeTestData()->Lookup(tree, &tlAndN))
+ {
+ reachable->Set(tree, 0);
+ }
+ }
+ }
+ }
+ return reachable;
+}
+
+void Compiler::TransferTestDataToNode(GenTreePtr from, GenTreePtr to)
+{
+ TestLabelAndNum tlAndN;
+ // We can't currently associate multiple annotations with a single node.
+ // If we need to, we can fix this...
+
+ // If the table is null, don't create it just to do the lookup, which would fail...
+ if (m_nodeTestData != nullptr && GetNodeTestData()->Lookup(from, &tlAndN))
+ {
+ assert(!GetNodeTestData()->Lookup(to, &tlAndN));
+ // We can't currently associate multiple annotations with a single node.
+ // If we need to, we can fix this...
+ TestLabelAndNum tlAndNTo;
+ assert(!GetNodeTestData()->Lookup(to, &tlAndNTo));
+
+ GetNodeTestData()->Remove(from);
+ GetNodeTestData()->Set(to, tlAndN);
+ }
+}
+
+void Compiler::CopyTestDataToCloneTree(GenTreePtr from, GenTreePtr to)
+{
+ if (m_nodeTestData == nullptr)
+ {
+ return;
+ }
+ if (from == nullptr)
+ {
+ assert(to == nullptr);
+ return;
+ }
+ // Otherwise...
+ TestLabelAndNum tlAndN;
+ if (GetNodeTestData()->Lookup(from, &tlAndN))
+ {
+ // We can't currently associate multiple annotations with a single node.
+ // If we need to, we can fix this...
+ TestLabelAndNum tlAndNTo;
+ assert(!GetNodeTestData()->Lookup(to, &tlAndNTo));
+ GetNodeTestData()->Set(to, tlAndN);
+ }
+ // Now recurse, in parallel on both trees.
+
+ genTreeOps oper = from->OperGet();
+ unsigned kind = from->OperKind();
+ assert(oper == to->OperGet());
+
+ // Cconstant or leaf nodes have no children.
+ if (kind & (GTK_CONST | GTK_LEAF))
+ {
+ return;
+ }
+
+ // Otherwise, is it a 'simple' unary/binary operator?
+
+ if (kind & GTK_SMPOP)
+ {
+ if (from->gtOp.gtOp1 != nullptr)
+ {
+ assert(to->gtOp.gtOp1 != nullptr);
+ CopyTestDataToCloneTree(from->gtOp.gtOp1, to->gtOp.gtOp1);
+ }
+ else
+ {
+ assert(to->gtOp.gtOp1 == nullptr);
+ }
+
+ if (from->gtGetOp2() != nullptr)
+ {
+ assert(to->gtGetOp2() != nullptr);
+ CopyTestDataToCloneTree(from->gtGetOp2(), to->gtGetOp2());
+ }
+ else
+ {
+ assert(to->gtGetOp2() == nullptr);
+ }
+
+ return;
+ }
+
+ // Otherwise, see what kind of a special operator we have here.
+
+ switch (oper)
+ {
+ case GT_STMT:
+ CopyTestDataToCloneTree(from->gtStmt.gtStmtExpr, to->gtStmt.gtStmtExpr);
+ return;
+
+ case GT_CALL:
+ CopyTestDataToCloneTree(from->gtCall.gtCallObjp, to->gtCall.gtCallObjp);
+ CopyTestDataToCloneTree(from->gtCall.gtCallArgs, to->gtCall.gtCallArgs);
+ CopyTestDataToCloneTree(from->gtCall.gtCallLateArgs, to->gtCall.gtCallLateArgs);
+
+ if (from->gtCall.gtCallType == CT_INDIRECT)
+ {
+ CopyTestDataToCloneTree(from->gtCall.gtCallCookie, to->gtCall.gtCallCookie);
+ CopyTestDataToCloneTree(from->gtCall.gtCallAddr, to->gtCall.gtCallAddr);
+ }
+ // The other call types do not have additional GenTree arguments.
+
+ return;
+
+ case GT_FIELD:
+ CopyTestDataToCloneTree(from->gtField.gtFldObj, to->gtField.gtFldObj);
+ return;
+
+ case GT_ARR_ELEM:
+ assert(from->gtArrElem.gtArrRank == to->gtArrElem.gtArrRank);
+ for (unsigned dim = 0; dim < from->gtArrElem.gtArrRank; dim++)
+ {
+ CopyTestDataToCloneTree(from->gtArrElem.gtArrInds[dim], to->gtArrElem.gtArrInds[dim]);
+ }
+ CopyTestDataToCloneTree(from->gtArrElem.gtArrObj, to->gtArrElem.gtArrObj);
+ return;
+
+ case GT_CMPXCHG:
+ CopyTestDataToCloneTree(from->gtCmpXchg.gtOpLocation, to->gtCmpXchg.gtOpLocation);
+ CopyTestDataToCloneTree(from->gtCmpXchg.gtOpValue, to->gtCmpXchg.gtOpValue);
+ CopyTestDataToCloneTree(from->gtCmpXchg.gtOpComparand, to->gtCmpXchg.gtOpComparand);
+ return;
+
+ case GT_ARR_BOUNDS_CHECK:
+#ifdef FEATURE_SIMD
+ case GT_SIMD_CHK:
+#endif // FEATURE_SIMD
+ CopyTestDataToCloneTree(from->gtBoundsChk.gtArrLen, to->gtBoundsChk.gtArrLen);
+ CopyTestDataToCloneTree(from->gtBoundsChk.gtIndex, to->gtBoundsChk.gtIndex);
+ return;
+
+ default:
+ unreached();
+ }
+}
+
+#endif // DEBUG
+
+/*
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX jvc XX
+XX XX
+XX Functions for the stand-alone version of the JIT . XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+/*****************************************************************************/
+void codeGeneratorCodeSizeBeg()
+{
+}
+/*****************************************************************************/
+
+/*****************************************************************************
+ *
+ * If any temporary tables are smaller than 'genMinSize2free' we won't bother
+ * freeing them.
+ */
+
+const size_t genMinSize2free = 64;
+
+/*****************************************************************************/
+
+/*****************************************************************************
+ *
+ * Used for counting pointer assignments.
+ */
+
+/*****************************************************************************/
+void codeGeneratorCodeSizeEnd()
+{
+}
+/*****************************************************************************
+ *
+ * Gather statistics - mainly used for the standalone
+ * Enable various #ifdef's to get the information you need
+ */
+
+void Compiler::compJitStats()
+{
+#if CALL_ARG_STATS
+
+ /* Method types and argument statistics */
+ compCallArgStats();
+#endif // CALL_ARG_STATS
+}
+
+#if CALL_ARG_STATS
+
+/*****************************************************************************
+ *
+ * Gather statistics about method calls and arguments
+ */
+
+void Compiler::compCallArgStats()
+{
+ GenTreePtr args;
+ GenTreePtr argx;
+
+ BasicBlock* block;
+ GenTreePtr stmt;
+ GenTreePtr call;
+
+ unsigned argNum;
+
+ unsigned argDWordNum;
+ unsigned argLngNum;
+ unsigned argFltNum;
+ unsigned argDblNum;
+
+ unsigned regArgNum;
+ unsigned regArgDeferred;
+ unsigned regArgTemp;
+
+ unsigned regArgLclVar;
+ unsigned regArgConst;
+
+ unsigned argTempsThisMethod = 0;
+
+ assert(fgStmtListThreaded);
+
+ for (block = fgFirstBB; block; block = block->bbNext)
+ {
+ for (stmt = block->bbTreeList; stmt; stmt = stmt->gtNext)
+ {
+ assert(stmt->gtOper == GT_STMT);
+
+ for (call = stmt->gtStmt.gtStmtList; call; call = call->gtNext)
+ {
+ if (call->gtOper != GT_CALL)
+ continue;
+
+ argNum =
+
+ regArgNum = regArgDeferred = regArgTemp =
+
+ regArgConst = regArgLclVar =
+
+ argDWordNum = argLngNum = argFltNum = argDblNum = 0;
+
+ argTotalCalls++;
+
+ if (!call->gtCall.gtCallObjp)
+ {
+ if (call->gtCall.gtCallType == CT_HELPER)
+ {
+ argHelperCalls++;
+ }
+ else
+ {
+ argStaticCalls++;
+ }
+ }
+ else
+ {
+ /* We have a 'this' pointer */
+
+ argDWordNum++;
+ argNum++;
+ regArgNum++;
+ regArgDeferred++;
+ argTotalObjPtr++;
+
+ if (call->gtFlags & (GTF_CALL_VIRT_VTABLE | GTF_CALL_VIRT_STUB))
+ {
+ /* virtual function */
+ argVirtualCalls++;
+ }
+ else
+ {
+ argNonVirtualCalls++;
+ }
+ }
+
+#ifdef LEGACY_BACKEND
+ // TODO-Cleaenup: We need to add support below for additional node types that RyuJIT backend has in the
+ // IR.
+ // Gather arguments information.
+
+ for (args = call->gtCall.gtCallArgs; args; args = args->gtOp.gtOp2)
+ {
+ argx = args->gtOp.gtOp1;
+
+ argNum++;
+
+ switch (genActualType(argx->TypeGet()))
+ {
+ case TYP_INT:
+ case TYP_REF:
+ case TYP_BYREF:
+ argDWordNum++;
+ break;
+
+ case TYP_LONG:
+ argLngNum++;
+ break;
+
+ case TYP_FLOAT:
+ argFltNum++;
+ break;
+
+ case TYP_DOUBLE:
+ argDblNum++;
+ break;
+
+ case TYP_VOID:
+ /* This is a deferred register argument */
+ assert(argx->gtOper == GT_NOP);
+ assert(argx->gtFlags & GTF_LATE_ARG);
+ argDWordNum++;
+ break;
+ }
+
+ /* Is this argument a register argument? */
+
+ if (argx->gtFlags & GTF_LATE_ARG)
+ {
+ regArgNum++;
+
+ /* We either have a deferred argument or a temp */
+
+ if (argx->gtOper == GT_NOP)
+ {
+ regArgDeferred++;
+ }
+ else
+ {
+ assert(argx->gtOper == GT_ASG);
+ regArgTemp++;
+ }
+ }
+ }
+
+ /* Look at the register arguments and count how many constants, local vars */
+
+ for (args = call->gtCall.gtCallLateArgs; args; args = args->gtOp.gtOp2)
+ {
+ argx = args->gtOp.gtOp1;
+
+ switch (argx->gtOper)
+ {
+ case GT_CNS_INT:
+ regArgConst++;
+ break;
+
+ case GT_LCL_VAR:
+ regArgLclVar++;
+ break;
+ }
+ }
+
+ assert(argNum == argDWordNum + argLngNum + argFltNum + argDblNum);
+ assert(regArgNum == regArgDeferred + regArgTemp);
+
+ argTotalArgs += argNum;
+ argTotalRegArgs += regArgNum;
+
+ argTotalDWordArgs += argDWordNum;
+ argTotalLongArgs += argLngNum;
+ argTotalFloatArgs += argFltNum;
+ argTotalDoubleArgs += argDblNum;
+
+ argTotalDeferred += regArgDeferred;
+ argTotalTemps += regArgTemp;
+ argTotalConst += regArgConst;
+ argTotalLclVar += regArgLclVar;
+
+ argTempsThisMethod += regArgTemp;
+
+ argCntTable.record(argNum);
+ argDWordCntTable.record(argDWordNum);
+ argDWordLngCntTable.record(argDWordNum + (2 * argLngNum));
+#endif // LEGACY_BACKEND
+ }
+ }
+ }
+
+ argTempsCntTable.record(argTempsThisMethod);
+
+ if (argMaxTempsPerMethod < argTempsThisMethod)
+ {
+ argMaxTempsPerMethod = argTempsThisMethod;
+ }
+}
+
+/* static */
+void Compiler::compDispCallArgStats(FILE* fout)
+{
+ if (argTotalCalls == 0)
+ return;
+
+ fprintf(fout, "\n");
+ fprintf(fout, "--------------------------------------------------\n");
+ fprintf(fout, "Call stats\n");
+ fprintf(fout, "--------------------------------------------------\n");
+ fprintf(fout, "Total # of calls = %d, calls / method = %.3f\n\n", argTotalCalls,
+ (float)argTotalCalls / genMethodCnt);
+
+ fprintf(fout, "Percentage of helper calls = %4.2f %%\n", (float)(100 * argHelperCalls) / argTotalCalls);
+ fprintf(fout, "Percentage of static calls = %4.2f %%\n", (float)(100 * argStaticCalls) / argTotalCalls);
+ fprintf(fout, "Percentage of virtual calls = %4.2f %%\n", (float)(100 * argVirtualCalls) / argTotalCalls);
+ fprintf(fout, "Percentage of non-virtual calls = %4.2f %%\n\n", (float)(100 * argNonVirtualCalls) / argTotalCalls);
+
+ fprintf(fout, "Average # of arguments per call = %.2f%%\n\n", (float)argTotalArgs / argTotalCalls);
+
+ fprintf(fout, "Percentage of DWORD arguments = %.2f %%\n", (float)(100 * argTotalDWordArgs) / argTotalArgs);
+ fprintf(fout, "Percentage of LONG arguments = %.2f %%\n", (float)(100 * argTotalLongArgs) / argTotalArgs);
+ fprintf(fout, "Percentage of FLOAT arguments = %.2f %%\n", (float)(100 * argTotalFloatArgs) / argTotalArgs);
+ fprintf(fout, "Percentage of DOUBLE arguments = %.2f %%\n\n", (float)(100 * argTotalDoubleArgs) / argTotalArgs);
+
+ if (argTotalRegArgs == 0)
+ return;
+
+ /*
+ fprintf(fout, "Total deferred arguments = %d \n", argTotalDeferred);
+
+ fprintf(fout, "Total temp arguments = %d \n\n", argTotalTemps);
+
+ fprintf(fout, "Total 'this' arguments = %d \n", argTotalObjPtr);
+ fprintf(fout, "Total local var arguments = %d \n", argTotalLclVar);
+ fprintf(fout, "Total constant arguments = %d \n\n", argTotalConst);
+ */
+
+ fprintf(fout, "\nRegister Arguments:\n\n");
+
+ fprintf(fout, "Percentage of deferred arguments = %.2f %%\n", (float)(100 * argTotalDeferred) / argTotalRegArgs);
+ fprintf(fout, "Percentage of temp arguments = %.2f %%\n\n", (float)(100 * argTotalTemps) / argTotalRegArgs);
+
+ fprintf(fout, "Maximum # of temps per method = %d\n\n", argMaxTempsPerMethod);
+
+ fprintf(fout, "Percentage of ObjPtr arguments = %.2f %%\n", (float)(100 * argTotalObjPtr) / argTotalRegArgs);
+ // fprintf(fout, "Percentage of global arguments = %.2f %%\n", (float)(100 * argTotalDWordGlobEf) /
+ // argTotalRegArgs);
+ fprintf(fout, "Percentage of constant arguments = %.2f %%\n", (float)(100 * argTotalConst) / argTotalRegArgs);
+ fprintf(fout, "Percentage of lcl var arguments = %.2f %%\n\n", (float)(100 * argTotalLclVar) / argTotalRegArgs);
+
+ fprintf(fout, "--------------------------------------------------\n");
+ fprintf(fout, "Argument count frequency table (includes ObjPtr):\n");
+ fprintf(fout, "--------------------------------------------------\n");
+ argCntTable.dump(fout);
+ fprintf(fout, "--------------------------------------------------\n");
+
+ fprintf(fout, "--------------------------------------------------\n");
+ fprintf(fout, "DWORD argument count frequency table (w/o LONG):\n");
+ fprintf(fout, "--------------------------------------------------\n");
+ argDWordCntTable.dump(fout);
+ fprintf(fout, "--------------------------------------------------\n");
+
+ fprintf(fout, "--------------------------------------------------\n");
+ fprintf(fout, "Temps count frequency table (per method):\n");
+ fprintf(fout, "--------------------------------------------------\n");
+ argTempsCntTable.dump(fout);
+ fprintf(fout, "--------------------------------------------------\n");
+
+ /*
+ fprintf(fout, "--------------------------------------------------\n");
+ fprintf(fout, "DWORD argument count frequency table (w/ LONG):\n");
+ fprintf(fout, "--------------------------------------------------\n");
+ argDWordLngCntTable.dump(fout);
+ fprintf(fout, "--------------------------------------------------\n");
+ */
+}
+
+#endif // CALL_ARG_STATS
+
+// JIT time end to end, and by phases.
+
+#ifdef FEATURE_JIT_METHOD_PERF
+// Static variables
+CritSecObject CompTimeSummaryInfo::s_compTimeSummaryLock;
+CompTimeSummaryInfo CompTimeSummaryInfo::s_compTimeSummary;
+#endif // FEATURE_JIT_METHOD_PERF
+
+#if defined(FEATURE_JIT_METHOD_PERF) || DUMP_FLOWGRAPHS
+const char* PhaseNames[] = {
+#define CompPhaseNameMacro(enum_nm, string_nm, short_nm, hasChildren, parent) string_nm,
+#include "compphases.h"
+};
+
+const char* PhaseEnums[] = {
+#define CompPhaseNameMacro(enum_nm, string_nm, short_nm, hasChildren, parent) #enum_nm,
+#include "compphases.h"
+};
+
+const LPCWSTR PhaseShortNames[] = {
+#define CompPhaseNameMacro(enum_nm, string_nm, short_nm, hasChildren, parent) W(short_nm),
+#include "compphases.h"
+};
+#endif // defined(FEATURE_JIT_METHOD_PERF) || DUMP_FLOWGRAPHS
+
+#ifdef FEATURE_JIT_METHOD_PERF
+bool PhaseHasChildren[] = {
+#define CompPhaseNameMacro(enum_nm, string_nm, short_nm, hasChildren, parent) hasChildren,
+#include "compphases.h"
+};
+
+int PhaseParent[] = {
+#define CompPhaseNameMacro(enum_nm, string_nm, short_nm, hasChildren, parent) parent,
+#include "compphases.h"
+};
+
+CompTimeInfo::CompTimeInfo(unsigned byteCodeBytes)
+ : m_byteCodeBytes(byteCodeBytes), m_totalCycles(0), m_parentPhaseEndSlop(0), m_timerFailure(false)
+{
+ for (int i = 0; i < PHASE_NUMBER_OF; i++)
+ {
+ m_invokesByPhase[i] = 0;
+ m_cyclesByPhase[i] = 0;
+ }
+}
+
+bool CompTimeSummaryInfo::IncludedInFilteredData(CompTimeInfo& info)
+{
+ return false; // info.m_byteCodeBytes < 10;
+}
+
+void CompTimeSummaryInfo::AddInfo(CompTimeInfo& info)
+{
+ if (info.m_timerFailure)
+ return; // Don't update if there was a failure.
+
+ CritSecHolder timeLock(s_compTimeSummaryLock);
+ m_numMethods++;
+
+ bool includeInFiltered = IncludedInFilteredData(info);
+
+ // Update the totals and maxima.
+ m_total.m_byteCodeBytes += info.m_byteCodeBytes;
+ m_maximum.m_byteCodeBytes = max(m_maximum.m_byteCodeBytes, info.m_byteCodeBytes);
+ m_total.m_totalCycles += info.m_totalCycles;
+ m_maximum.m_totalCycles = max(m_maximum.m_totalCycles, info.m_totalCycles);
+
+ if (includeInFiltered)
+ {
+ m_numFilteredMethods++;
+ m_filtered.m_byteCodeBytes += info.m_byteCodeBytes;
+ m_filtered.m_totalCycles += info.m_totalCycles;
+ m_filtered.m_parentPhaseEndSlop += info.m_parentPhaseEndSlop;
+ }
+
+ for (int i = 0; i < PHASE_NUMBER_OF; i++)
+ {
+ m_total.m_invokesByPhase[i] += info.m_invokesByPhase[i];
+ m_total.m_cyclesByPhase[i] += info.m_cyclesByPhase[i];
+ if (includeInFiltered)
+ {
+ m_filtered.m_invokesByPhase[i] += info.m_invokesByPhase[i];
+ m_filtered.m_cyclesByPhase[i] += info.m_cyclesByPhase[i];
+ }
+ m_maximum.m_cyclesByPhase[i] = max(m_maximum.m_cyclesByPhase[i], info.m_cyclesByPhase[i]);
+ }
+ m_total.m_parentPhaseEndSlop += info.m_parentPhaseEndSlop;
+ m_maximum.m_parentPhaseEndSlop = max(m_maximum.m_parentPhaseEndSlop, info.m_parentPhaseEndSlop);
+}
+
+// Static
+LPCWSTR Compiler::compJitTimeLogFilename = NULL;
+
+void CompTimeSummaryInfo::Print(FILE* f)
+{
+ if (f == NULL)
+ return;
+ // Otherwise...
+ double countsPerSec = CycleTimer::CyclesPerSecond();
+ if (countsPerSec == 0.0)
+ {
+ fprintf(f, "Processor does not have a high-frequency timer.\n");
+ return;
+ }
+
+ fprintf(f, "JIT Compilation time report:\n");
+ fprintf(f, " Compiled %d methods.\n", m_numMethods);
+ if (m_numMethods != 0)
+ {
+ fprintf(f, " Compiled %d bytecodes total (%d max, %8.2f avg).\n", m_total.m_byteCodeBytes,
+ m_maximum.m_byteCodeBytes, (double)m_total.m_byteCodeBytes / (double)m_numMethods);
+ double totTime_ms = ((double)m_total.m_totalCycles / countsPerSec) * 1000.0;
+ fprintf(f, " Time: total: %10.3f Mcycles/%10.3f ms\n", ((double)m_total.m_totalCycles / 1000000.0),
+ totTime_ms);
+ fprintf(f, " max: %10.3f Mcycles/%10.3f ms\n", ((double)m_maximum.m_totalCycles) / 1000000.0,
+ ((double)m_maximum.m_totalCycles / countsPerSec) * 1000.0);
+ fprintf(f, " avg: %10.3f Mcycles/%10.3f ms\n",
+ ((double)m_total.m_totalCycles) / 1000000.0 / (double)m_numMethods, totTime_ms / (double)m_numMethods);
+
+ fprintf(f, " Total time by phases:\n");
+ fprintf(f, " PHASE inv/meth Mcycles time (ms) %% of total max (ms)\n");
+ fprintf(f, " --------------------------------------------------------------------------------------\n");
+ // Ensure that at least the names array and the Phases enum have the same number of entries:
+ assert(sizeof(PhaseNames) / sizeof(const char*) == PHASE_NUMBER_OF);
+ for (int i = 0; i < PHASE_NUMBER_OF; i++)
+ {
+ double phase_tot_ms = (((double)m_total.m_cyclesByPhase[i]) / countsPerSec) * 1000.0;
+ double phase_max_ms = (((double)m_maximum.m_cyclesByPhase[i]) / countsPerSec) * 1000.0;
+ // Indent nested phases, according to depth.
+ int ancPhase = PhaseParent[i];
+ while (ancPhase != -1)
+ {
+ fprintf(f, " ");
+ ancPhase = PhaseParent[ancPhase];
+ }
+ fprintf(f, " %-30s %5.2f %10.2f %9.3f %8.2f%% %8.3f\n", PhaseNames[i],
+ ((double)m_total.m_invokesByPhase[i]) / ((double)m_numMethods),
+ ((double)m_total.m_cyclesByPhase[i]) / 1000000.0, phase_tot_ms, (phase_tot_ms * 100.0 / totTime_ms),
+ phase_max_ms);
+ }
+ fprintf(f, "\n 'End phase slop' should be very small (if not, there's unattributed time): %9.3f Mcycles.\n",
+ m_total.m_parentPhaseEndSlop);
+ }
+ if (m_numFilteredMethods > 0)
+ {
+ fprintf(f, " Compiled %d methods that meet the filter requirement.\n", m_numFilteredMethods);
+ fprintf(f, " Compiled %d bytecodes total (%8.2f avg).\n", m_filtered.m_byteCodeBytes,
+ (double)m_filtered.m_byteCodeBytes / (double)m_numFilteredMethods);
+ double totTime_ms = ((double)m_filtered.m_totalCycles / countsPerSec) * 1000.0;
+ fprintf(f, " Time: total: %10.3f Mcycles/%10.3f ms\n", ((double)m_filtered.m_totalCycles / 1000000.0),
+ totTime_ms);
+ fprintf(f, " avg: %10.3f Mcycles/%10.3f ms\n",
+ ((double)m_filtered.m_totalCycles) / 1000000.0 / (double)m_numFilteredMethods,
+ totTime_ms / (double)m_numFilteredMethods);
+
+ fprintf(f, " Total time by phases:\n");
+ fprintf(f, " PHASE inv/meth Mcycles time (ms) %% of total\n");
+ fprintf(f, " --------------------------------------------------------------------------------------\n");
+ // Ensure that at least the names array and the Phases enum have the same number of entries:
+ assert(sizeof(PhaseNames) / sizeof(const char*) == PHASE_NUMBER_OF);
+ for (int i = 0; i < PHASE_NUMBER_OF; i++)
+ {
+ double phase_tot_ms = (((double)m_filtered.m_cyclesByPhase[i]) / countsPerSec) * 1000.0;
+ // Indent nested phases, according to depth.
+ int ancPhase = PhaseParent[i];
+ while (ancPhase != -1)
+ {
+ fprintf(f, " ");
+ ancPhase = PhaseParent[ancPhase];
+ }
+ fprintf(f, " %-30s %5.2f %10.2f %9.3f %8.2f%%\n", PhaseNames[i],
+ ((double)m_filtered.m_invokesByPhase[i]) / ((double)m_numFilteredMethods),
+ ((double)m_filtered.m_cyclesByPhase[i]) / 1000000.0, phase_tot_ms,
+ (phase_tot_ms * 100.0 / totTime_ms));
+ }
+ fprintf(f, "\n 'End phase slop' should be very small (if not, there's unattributed time): %9.3f Mcycles.\n",
+ m_filtered.m_parentPhaseEndSlop);
+ }
+}
+
+JitTimer::JitTimer(unsigned byteCodeSize) : m_info(byteCodeSize)
+{
+#ifdef DEBUG
+ m_lastPhase = (Phases)-1;
+#endif
+
+ unsigned __int64 threadCurCycles;
+ if (GetThreadCycles(&threadCurCycles))
+ {
+ m_start = threadCurCycles;
+ m_curPhaseStart = threadCurCycles;
+ }
+}
+
+void JitTimer::EndPhase(Phases phase)
+{
+ // Otherwise...
+ // We re-run some phases currently, so this following assert doesn't work.
+ // assert((int)phase > (int)m_lastPhase); // We should end phases in increasing order.
+
+ unsigned __int64 threadCurCycles;
+ if (GetThreadCycles(&threadCurCycles))
+ {
+ unsigned __int64 phaseCycles = (threadCurCycles - m_curPhaseStart);
+ // If this is not a leaf phase, the assumption is that the last subphase must have just recently ended.
+ // Credit the duration to "slop", the total of which should be very small.
+ if (PhaseHasChildren[phase])
+ {
+ m_info.m_parentPhaseEndSlop += phaseCycles;
+ }
+ else
+ {
+ // It is a leaf phase. Credit duration to it.
+ m_info.m_invokesByPhase[phase]++;
+ m_info.m_cyclesByPhase[phase] += phaseCycles;
+ // Credit the phase's ancestors, if any.
+ int ancPhase = PhaseParent[phase];
+ while (ancPhase != -1)
+ {
+ m_info.m_cyclesByPhase[ancPhase] += phaseCycles;
+ ancPhase = PhaseParent[ancPhase];
+ }
+ // Did we just end the last phase?
+ if (phase + 1 == PHASE_NUMBER_OF)
+ {
+ m_info.m_totalCycles = (threadCurCycles - m_start);
+ }
+ else
+ {
+ m_curPhaseStart = threadCurCycles;
+ }
+ }
+ }
+#ifdef DEBUG
+ m_lastPhase = phase;
+#endif
+}
+
+CritSecObject JitTimer::s_csvLock;
+
+LPCWSTR Compiler::JitTimeLogCsv()
+{
+ LPCWSTR jitTimeLogCsv = JitConfig.JitTimeLogCsv();
+ return jitTimeLogCsv;
+}
+
+void JitTimer::PrintCsvHeader()
+{
+ LPCWSTR jitTimeLogCsv = Compiler::JitTimeLogCsv();
+ if (jitTimeLogCsv == NULL)
+ {
+ return;
+ }
+
+ CritSecHolder csvLock(s_csvLock);
+
+ FILE* fp = _wfopen(jitTimeLogCsv, W("r"));
+ if (fp == nullptr)
+ {
+ // File doesn't exist, so create it and write the header
+
+ // Use write mode, so we rewrite the file, and retain only the last compiled process/dll.
+ // Ex: ngen install mscorlib won't print stats for "ngen" but for "mscorsvw"
+ FILE* fp = _wfopen(jitTimeLogCsv, W("w"));
+ fprintf(fp, "\"Method Name\",");
+ fprintf(fp, "\"Method Index\",");
+ fprintf(fp, "\"IL Bytes\",");
+ fprintf(fp, "\"Basic Blocks\",");
+ fprintf(fp, "\"Opt Level\",");
+ fprintf(fp, "\"Loops Cloned\",");
+
+ for (int i = 0; i < PHASE_NUMBER_OF; i++)
+ {
+ fprintf(fp, "\"%s\",", PhaseNames[i]);
+ }
+
+ InlineStrategy::DumpCsvHeader(fp);
+
+ fprintf(fp, "\"Total Cycles\",");
+ fprintf(fp, "\"CPS\"\n");
+ }
+ fclose(fp);
+}
+
+extern ICorJitHost* g_jitHost;
+
+void JitTimer::PrintCsvMethodStats(Compiler* comp)
+{
+ LPCWSTR jitTimeLogCsv = Compiler::JitTimeLogCsv();
+ if (jitTimeLogCsv == NULL)
+ {
+ return;
+ }
+
+ // eeGetMethodFullName uses locks, so don't enter crit sec before this call.
+ const char* methName = comp->eeGetMethodFullName(comp->info.compMethodHnd);
+
+ // Try and access the SPMI index to report in the data set.
+ //
+ // If the jit is not hosted under SPMI this will return the
+ // default value of zero.
+ //
+ // Query the jit host directly here instead of going via the
+ // config cache, since value will change for each method.
+ int index = g_jitHost->getIntConfigValue(W("SuperPMIMethodContextNumber"), 0);
+
+ CritSecHolder csvLock(s_csvLock);
+
+ FILE* fp = _wfopen(jitTimeLogCsv, W("a"));
+ fprintf(fp, "\"%s\",", methName);
+ fprintf(fp, "%d,", index);
+ fprintf(fp, "%u,", comp->info.compILCodeSize);
+ fprintf(fp, "%u,", comp->fgBBcount);
+ fprintf(fp, "%u,", comp->opts.MinOpts());
+ fprintf(fp, "%u,", comp->optLoopsCloned);
+ unsigned __int64 totCycles = 0;
+ for (int i = 0; i < PHASE_NUMBER_OF; i++)
+ {
+ if (!PhaseHasChildren[i])
+ totCycles += m_info.m_cyclesByPhase[i];
+ fprintf(fp, "%I64u,", m_info.m_cyclesByPhase[i]);
+ }
+
+ comp->m_inlineStrategy->DumpCsvData(fp);
+
+ fprintf(fp, "%I64u,", m_info.m_totalCycles);
+ fprintf(fp, "%f\n", CycleTimer::CyclesPerSecond());
+ fclose(fp);
+}
+
+// Completes the timing of the current method, and adds it to "sum".
+void JitTimer::Terminate(Compiler* comp, CompTimeSummaryInfo& sum)
+{
+#ifdef DEBUG
+ unsigned __int64 totCycles2 = 0;
+ for (int i = 0; i < PHASE_NUMBER_OF; i++)
+ {
+ if (!PhaseHasChildren[i])
+ totCycles2 += m_info.m_cyclesByPhase[i];
+ }
+ // We include m_parentPhaseEndSlop in the next phase's time also (we probably shouldn't)
+ // totCycles2 += m_info.m_parentPhaseEndSlop;
+ assert(totCycles2 == m_info.m_totalCycles);
+#endif
+
+ PrintCsvMethodStats(comp);
+
+ sum.AddInfo(m_info);
+}
+#endif // FEATURE_JIT_METHOD_PERF
+
+#if MEASURE_MEM_ALLOC
+// static vars.
+CritSecObject Compiler::s_memStatsLock; // Default constructor.
+Compiler::AggregateMemStats Compiler::s_aggMemStats; // Default constructor.
+Compiler::MemStats Compiler::s_maxCompMemStats; // Default constructor.
+
+const char* Compiler::MemStats::s_CompMemKindNames[] = {
+#define CompMemKindMacro(kind) #kind,
+#include "compmemkind.h"
+};
+
+void Compiler::MemStats::Print(FILE* f)
+{
+ fprintf(f, "count: %10u, size: %10llu, max = %10llu\n", allocCnt, allocSz, allocSzMax);
+ fprintf(f, "allocateMemory: %10llu, nraUsed: %10llu\n", nraTotalSizeAlloc, nraTotalSizeUsed);
+ PrintByKind(f);
+}
+
+void Compiler::MemStats::PrintByKind(FILE* f)
+{
+ fprintf(f, "\nAlloc'd bytes by kind:\n %20s | %10s | %7s\n", "kind", "size", "pct");
+ fprintf(f, " %20s-+-%10s-+-%7s\n", "--------------------", "----------", "-------");
+ float allocSzF = static_cast<float>(allocSz);
+ for (int cmk = 0; cmk < CMK_Count; cmk++)
+ {
+ float pct = 100.0f * static_cast<float>(allocSzByKind[cmk]) / allocSzF;
+ fprintf(f, " %20s | %10llu | %6.2f%%\n", s_CompMemKindNames[cmk], allocSzByKind[cmk], pct);
+ }
+ fprintf(f, "\n");
+}
+
+void Compiler::AggregateMemStats::Print(FILE* f)
+{
+ fprintf(f, "For %9u methods:\n", nMethods);
+ fprintf(f, " count: %12u (avg %7u per method)\n", allocCnt, allocCnt / nMethods);
+ fprintf(f, " alloc size : %12llu (avg %7llu per method)\n", allocSz, allocSz / nMethods);
+ fprintf(f, " max alloc : %12llu\n", allocSzMax);
+ fprintf(f, "\n");
+ fprintf(f, " allocateMemory : %12llu (avg %7llu per method)\n", nraTotalSizeAlloc, nraTotalSizeAlloc / nMethods);
+ fprintf(f, " nraUsed : %12llu (avg %7llu per method)\n", nraTotalSizeUsed, nraTotalSizeUsed / nMethods);
+ PrintByKind(f);
+}
+#endif // MEASURE_MEM_ALLOC
+
+#if LOOP_HOIST_STATS
+// Static fields.
+CritSecObject Compiler::s_loopHoistStatsLock; // Default constructor.
+unsigned Compiler::s_loopsConsidered = 0;
+unsigned Compiler::s_loopsWithHoistedExpressions = 0;
+unsigned Compiler::s_totalHoistedExpressions = 0;
+
+// static
+void Compiler::PrintAggregateLoopHoistStats(FILE* f)
+{
+ fprintf(f, "\n");
+ fprintf(f, "---------------------------------------------------\n");
+ fprintf(f, "Loop hoisting stats\n");
+ fprintf(f, "---------------------------------------------------\n");
+
+ double pctWithHoisted = 0.0;
+ if (s_loopsConsidered > 0)
+ {
+ pctWithHoisted = 100.0 * (double(s_loopsWithHoistedExpressions) / double(s_loopsConsidered));
+ }
+ double exprsPerLoopWithExpr = 0.0;
+ if (s_loopsWithHoistedExpressions > 0)
+ {
+ exprsPerLoopWithExpr = double(s_totalHoistedExpressions) / double(s_loopsWithHoistedExpressions);
+ }
+ fprintf(f, "Considered %d loops. Of these, we hoisted expressions out of %d (%6.2f%%).\n", s_loopsConsidered,
+ s_loopsWithHoistedExpressions, pctWithHoisted);
+ fprintf(f, " A total of %d expressions were hoisted, an average of %5.2f per loop-with-hoisted-expr.\n",
+ s_totalHoistedExpressions, exprsPerLoopWithExpr);
+}
+
+void Compiler::AddLoopHoistStats()
+{
+ CritSecHolder statsLock(s_loopHoistStatsLock);
+
+ s_loopsConsidered += m_loopsConsidered;
+ s_loopsWithHoistedExpressions += m_loopsWithHoistedExpressions;
+ s_totalHoistedExpressions += m_totalHoistedExpressions;
+}
+
+void Compiler::PrintPerMethodLoopHoistStats()
+{
+ double pctWithHoisted = 0.0;
+ if (m_loopsConsidered > 0)
+ {
+ pctWithHoisted = 100.0 * (double(m_loopsWithHoistedExpressions) / double(m_loopsConsidered));
+ }
+ double exprsPerLoopWithExpr = 0.0;
+ if (m_loopsWithHoistedExpressions > 0)
+ {
+ exprsPerLoopWithExpr = double(m_totalHoistedExpressions) / double(m_loopsWithHoistedExpressions);
+ }
+ printf("Considered %d loops. Of these, we hoisted expressions out of %d (%5.2f%%).\n", m_loopsConsidered,
+ m_loopsWithHoistedExpressions, pctWithHoisted);
+ printf(" A total of %d expressions were hoisted, an average of %5.2f per loop-with-hoisted-expr.\n",
+ m_totalHoistedExpressions, exprsPerLoopWithExpr);
+}
+#endif // LOOP_HOIST_STATS
+
+//------------------------------------------------------------------------
+// RecordStateAtEndOfInlining: capture timing data (if enabled) after
+// inlining as completed.
+//
+// Note:
+// Records data needed for SQM and inlining data dumps. Should be
+// called after inlining is complete. (We do this after inlining
+// because this marks the last point at which the JIT is likely to
+// cause type-loading and class initialization).
+
+void Compiler::RecordStateAtEndOfInlining()
+{
+#if defined(DEBUG) || defined(INLINE_DATA) || defined(FEATURE_CLRSQM)
+
+ m_compCyclesAtEndOfInlining = 0;
+ m_compTickCountAtEndOfInlining = 0;
+ bool b = CycleTimer::GetThreadCyclesS(&m_compCyclesAtEndOfInlining);
+ if (!b)
+ {
+ return; // We don't have a thread cycle counter.
+ }
+ m_compTickCountAtEndOfInlining = GetTickCount();
+
+#endif // defined(DEBUG) || defined(INLINE_DATA) || defined(FEATURE_CLRSQM)
+}
+
+//------------------------------------------------------------------------
+// RecordStateAtEndOfCompilation: capture timing data (if enabled) after
+// compilation is completed.
+
+void Compiler::RecordStateAtEndOfCompilation()
+{
+#if defined(DEBUG) || defined(INLINE_DATA) || defined(FEATURE_CLRSQM)
+
+ // Common portion
+ m_compCycles = 0;
+ unsigned __int64 compCyclesAtEnd;
+ bool b = CycleTimer::GetThreadCyclesS(&compCyclesAtEnd);
+ if (!b)
+ {
+ return; // We don't have a thread cycle counter.
+ }
+ assert(compCyclesAtEnd >= m_compCyclesAtEndOfInlining);
+
+ m_compCycles = compCyclesAtEnd - m_compCyclesAtEndOfInlining;
+
+#endif // defined(DEBUG) || defined(INLINE_DATA) || defined(FEATURE_CLRSQM)
+
+#ifdef FEATURE_CLRSQM
+
+ // SQM only portion
+ unsigned __int64 mcycles64 = m_compCycles / ((unsigned __int64)1000000);
+ unsigned mcycles;
+ if (mcycles64 > UINT32_MAX)
+ {
+ mcycles = UINT32_MAX;
+ }
+ else
+ {
+ mcycles = (unsigned)mcycles64;
+ }
+
+ DWORD ticksAtEnd = GetTickCount();
+ assert(ticksAtEnd >= m_compTickCountAtEndOfInlining);
+ DWORD compTicks = ticksAtEnd - m_compTickCountAtEndOfInlining;
+
+ if (mcycles >= 1000)
+ {
+ info.compCompHnd->logSQMLongJitEvent(mcycles, compTicks, info.compILCodeSize, fgBBcount, opts.MinOpts(),
+ info.compMethodHnd);
+ }
+
+#endif // FEATURE_CLRSQM
+}
+
+#if FUNC_INFO_LOGGING
+// static
+LPCWSTR Compiler::compJitFuncInfoFilename = nullptr;
+
+// static
+FILE* Compiler::compJitFuncInfoFile = nullptr;
+#endif // FUNC_INFO_LOGGING
+
+#ifdef DEBUG
+
+// dumpConvertedVarSet() is just like dumpVarSet(), except we assume the varset bits are tracked
+// variable indices, and we convert them to variable numbers, sort the variable numbers, and
+// print them as variable numbers. To do this, we use a temporary set indexed by
+// variable number. We can't use the "all varset" type because it is still size-limited, and might
+// not be big enough to handle all possible variable numbers.
+void dumpConvertedVarSet(Compiler* comp, VARSET_VALARG_TP vars)
+{
+ BYTE* pVarNumSet; // trivial set: one byte per varNum, 0 means not in set, 1 means in set.
+
+ size_t varNumSetBytes = comp->lvaCount * sizeof(BYTE);
+ pVarNumSet = (BYTE*)_alloca(varNumSetBytes);
+ memset(pVarNumSet, 0, varNumSetBytes); // empty the set
+
+ VARSET_ITER_INIT(comp, iter, vars, varIndex);
+ while (iter.NextElem(comp, &varIndex))
+ {
+ unsigned varNum = comp->lvaTrackedToVarNum[varIndex];
+ assert(varNum < comp->lvaCount);
+ pVarNumSet[varNum] = 1; // This varNum is in the set
+ }
+
+ bool first = true;
+ printf("{");
+ for (size_t varNum = 0; varNum < comp->lvaCount; varNum++)
+ {
+ if (pVarNumSet[varNum] == 1)
+ {
+ if (!first)
+ {
+ printf(" ");
+ }
+ printf("V%02u", varNum);
+ first = false;
+ }
+ }
+ printf("}");
+}
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX Debugging helpers XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+/*****************************************************************************/
+/* The following functions are intended to be called from the debugger, to dump
+ * various data structures.
+ *
+ * The versions that start with 'c' take a Compiler* as the first argument.
+ * The versions that start with 'd' use the tlsCompiler, so don't require a Compiler*.
+ *
+ * Summary:
+ * cBlock, dBlock : Display a basic block (call fgDispBasicBlock()).
+ * cBlocks, dBlocks : Display all the basic blocks of a function (call fgDispBasicBlocks()).
+ * cBlocksV, dBlocksV : Display all the basic blocks of a function (call fgDispBasicBlocks(true)).
+ * "V" means "verbose", and will dump all the trees.
+ * cTree, dTree : Display a tree (call gtDispTree()).
+ * cTrees, dTrees : Display all the trees in a function (call fgDumpTrees()).
+ * cEH, dEH : Display the EH handler table (call fgDispHandlerTab()).
+ * cVar, dVar : Display a local variable given its number (call lvaDumpEntry()).
+ * cVarDsc, dVarDsc : Display a local variable given a LclVarDsc* (call lvaDumpEntry()).
+ * cVars, dVars : Display the local variable table (call lvaTableDump()).
+ * cVarsFinal, dVarsFinal : Display the local variable table (call lvaTableDump(FINAL_FRAME_LAYOUT)).
+ * cBlockCheapPreds, dBlockCheapPreds : Display a block's cheap predecessors (call block->dspCheapPreds()).
+ * cBlockPreds, dBlockPreds : Display a block's predecessors (call block->dspPreds()).
+ * cBlockSuccs, dBlockSuccs : Display a block's successors (call block->dspSuccs(compiler)).
+ * cReach, dReach : Display all block reachability (call fgDispReach()).
+ * cDoms, dDoms : Display all block dominators (call fgDispDoms()).
+ * cLiveness, dLiveness : Display per-block variable liveness (call fgDispBBLiveness()).
+ * cCVarSet, dCVarSet : Display a "converted" VARSET_TP: the varset is assumed to be tracked variable
+ * indices. These are converted to variable numbers and sorted. (Calls
+ * dumpConvertedVarSet()).
+ *
+ * cFuncIR, dFuncIR : Display all the basic blocks of a function in linear IR form.
+ * cLoopIR, dLoopIR : Display a loop in linear IR form.
+ * dLoopNumIR : Display a loop (given number) in linear IR form.
+ * cBlockIR, dBlockIR : Display a basic block in linear IR form.
+ * cTreeIR, dTreeIR : Display a tree in linear IR form.
+ * dTabStopIR : Display spaces to the next tab stop column
+ * cTreeTypeIR dTreeTypeIR : Display tree type
+ * cTreeKindsIR dTreeKindsIR : Display tree kinds
+ * cTreeFlagsIR dTreeFlagsIR : Display tree flags
+ * cOperandIR dOperandIR : Display tree operand
+ * cLeafIR dLeafIR : Display tree leaf
+ * cIndirIR dIndirIR : Display indir tree as [t#] or [leaf]
+ * cListIR dListIR : Display tree list
+ * cSsaNumIR dSsaNumIR : Display SSA number as <u|d:#>
+ * cValNumIR dValNumIR : Display Value number as <v{l|c}:#{,R}>
+ * cDependsIR : Display dependencies of a tree DEP(t# ...) node
+ * based on child comma tree nodes
+ * dFormatIR : Display dump format specified on command line
+ *
+ *
+ * The following don't require a Compiler* to work:
+ * dVarSet : Display a VARSET_TP (call dumpVarSet()).
+ * dRegMask : Display a regMaskTP (call dspRegMask(mask)).
+ */
+
+void cBlock(Compiler* comp, BasicBlock* block)
+{
+ static unsigned sequenceNumber = 0; // separate calls with a number to indicate this function has been called
+ printf("===================================================================== *Block %u\n", sequenceNumber++);
+ comp->fgTableDispBasicBlock(block);
+}
+
+void cBlocks(Compiler* comp)
+{
+ static unsigned sequenceNumber = 0; // separate calls with a number to indicate this function has been called
+ printf("===================================================================== *Blocks %u\n", sequenceNumber++);
+ comp->fgDispBasicBlocks();
+}
+
+void cBlocksV(Compiler* comp)
+{
+ static unsigned sequenceNumber = 0; // separate calls with a number to indicate this function has been called
+ printf("===================================================================== *BlocksV %u\n", sequenceNumber++);
+ comp->fgDispBasicBlocks(true);
+}
+
+void cTree(Compiler* comp, GenTree* tree)
+{
+ static unsigned sequenceNumber = 0; // separate calls with a number to indicate this function has been called
+ printf("===================================================================== *Tree %u\n", sequenceNumber++);
+ comp->gtDispTree(tree, nullptr, ">>>");
+}
+
+void cTrees(Compiler* comp)
+{
+ static unsigned sequenceNumber = 0; // separate calls with a number to indicate this function has been called
+ printf("===================================================================== *Trees %u\n", sequenceNumber++);
+ comp->fgDumpTrees(comp->fgFirstBB, nullptr);
+}
+
+void cEH(Compiler* comp)
+{
+ static unsigned sequenceNumber = 0; // separate calls with a number to indicate this function has been called
+ printf("===================================================================== *EH %u\n", sequenceNumber++);
+ comp->fgDispHandlerTab();
+}
+
+void cVar(Compiler* comp, unsigned lclNum)
+{
+ static unsigned sequenceNumber = 0; // separate calls with a number to indicate this function has been called
+ printf("===================================================================== *Var %u\n", sequenceNumber++);
+ comp->lvaDumpEntry(lclNum, Compiler::FINAL_FRAME_LAYOUT);
+}
+
+void cVarDsc(Compiler* comp, LclVarDsc* varDsc)
+{
+ static unsigned sequenceNumber = 0; // separate calls with a number to indicate this function has been called
+ printf("===================================================================== *VarDsc %u\n", sequenceNumber++);
+ unsigned lclNum = (unsigned)(varDsc - comp->lvaTable);
+ comp->lvaDumpEntry(lclNum, Compiler::FINAL_FRAME_LAYOUT);
+}
+
+void cVars(Compiler* comp)
+{
+ static unsigned sequenceNumber = 0; // separate calls with a number to indicate this function has been called
+ printf("===================================================================== *Vars %u\n", sequenceNumber++);
+ comp->lvaTableDump();
+}
+
+void cVarsFinal(Compiler* comp)
+{
+ static unsigned sequenceNumber = 0; // separate calls with a number to indicate this function has been called
+ printf("===================================================================== *Vars %u\n", sequenceNumber++);
+ comp->lvaTableDump(Compiler::FINAL_FRAME_LAYOUT);
+}
+
+void cBlockCheapPreds(Compiler* comp, BasicBlock* block)
+{
+ static unsigned sequenceNumber = 0; // separate calls with a number to indicate this function has been called
+ printf("===================================================================== *BlockCheapPreds %u\n",
+ sequenceNumber++);
+ block->dspCheapPreds();
+}
+
+void cBlockPreds(Compiler* comp, BasicBlock* block)
+{
+ static unsigned sequenceNumber = 0; // separate calls with a number to indicate this function has been called
+ printf("===================================================================== *BlockPreds %u\n", sequenceNumber++);
+ block->dspPreds();
+}
+
+void cBlockSuccs(Compiler* comp, BasicBlock* block)
+{
+ static unsigned sequenceNumber = 0; // separate calls with a number to indicate this function has been called
+ printf("===================================================================== *BlockSuccs %u\n", sequenceNumber++);
+ block->dspSuccs(comp);
+}
+
+void cReach(Compiler* comp)
+{
+ static unsigned sequenceNumber = 0; // separate calls with a number to indicate this function has been called
+ printf("===================================================================== *Reach %u\n", sequenceNumber++);
+ comp->fgDispReach();
+}
+
+void cDoms(Compiler* comp)
+{
+ static unsigned sequenceNumber = 0; // separate calls with a number to indicate this function has been called
+ printf("===================================================================== *Doms %u\n", sequenceNumber++);
+ comp->fgDispDoms();
+}
+
+void cLiveness(Compiler* comp)
+{
+ static unsigned sequenceNumber = 0; // separate calls with a number to indicate this function has been called
+ printf("===================================================================== *Liveness %u\n", sequenceNumber++);
+ comp->fgDispBBLiveness();
+}
+
+void cCVarSet(Compiler* comp, VARSET_VALARG_TP vars)
+{
+ static unsigned sequenceNumber = 0; // separate calls with a number to indicate this function has been called
+ printf("===================================================================== dCVarSet %u\n", sequenceNumber++);
+ dumpConvertedVarSet(comp, vars);
+ printf("\n"); // dumpConvertedVarSet() doesn't emit a trailing newline
+}
+
+void dBlock(BasicBlock* block)
+{
+ cBlock(JitTls::GetCompiler(), block);
+}
+
+void dBlocks()
+{
+ cBlocks(JitTls::GetCompiler());
+}
+
+void dBlocksV()
+{
+ cBlocksV(JitTls::GetCompiler());
+}
+
+void dTree(GenTree* tree)
+{
+ cTree(JitTls::GetCompiler(), tree);
+}
+
+void dTrees()
+{
+ cTrees(JitTls::GetCompiler());
+}
+
+void dEH()
+{
+ cEH(JitTls::GetCompiler());
+}
+
+void dVar(unsigned lclNum)
+{
+ cVar(JitTls::GetCompiler(), lclNum);
+}
+
+void dVarDsc(LclVarDsc* varDsc)
+{
+ cVarDsc(JitTls::GetCompiler(), varDsc);
+}
+
+void dVars()
+{
+ cVars(JitTls::GetCompiler());
+}
+
+void dVarsFinal()
+{
+ cVarsFinal(JitTls::GetCompiler());
+}
+
+void dBlockPreds(BasicBlock* block)
+{
+ cBlockPreds(JitTls::GetCompiler(), block);
+}
+
+void dBlockCheapPreds(BasicBlock* block)
+{
+ cBlockCheapPreds(JitTls::GetCompiler(), block);
+}
+
+void dBlockSuccs(BasicBlock* block)
+{
+ cBlockSuccs(JitTls::GetCompiler(), block);
+}
+
+void dReach()
+{
+ cReach(JitTls::GetCompiler());
+}
+
+void dDoms()
+{
+ cDoms(JitTls::GetCompiler());
+}
+
+void dLiveness()
+{
+ cLiveness(JitTls::GetCompiler());
+}
+
+void dCVarSet(VARSET_VALARG_TP vars)
+{
+ cCVarSet(JitTls::GetCompiler(), vars);
+}
+
+void dRegMask(regMaskTP mask)
+{
+ static unsigned sequenceNumber = 0; // separate calls with a number to indicate this function has been called
+ printf("===================================================================== dRegMask %u\n", sequenceNumber++);
+ dspRegMask(mask);
+ printf("\n"); // dspRegMask() doesn't emit a trailing newline
+}
+
+void dBlockList(BasicBlockList* list)
+{
+ printf("WorkList: ");
+ while (list != nullptr)
+ {
+ printf("BB%02u ", list->block->bbNum);
+ list = list->next;
+ }
+ printf("\n");
+}
+
+// Global variables available in debug mode. That are set by debug APIs for finding
+// Trees, Stmts, and/or Blocks using id or bbNum.
+// That can be used in watch window or as a way to get address of fields for data break points.
+
+GenTree* dbTree;
+GenTreeStmt* dbStmt;
+BasicBlock* dbTreeBlock;
+BasicBlock* dbBlock;
+
+// Debug APIs for finding Trees, Stmts, and/or Blocks.
+// As a side effect, they set the debug variables above.
+
+GenTree* dFindTree(GenTree* tree, unsigned id)
+{
+ GenTree* child;
+
+ if (tree == nullptr)
+ {
+ return nullptr;
+ }
+
+ if (tree->gtTreeID == id)
+ {
+ dbTree = tree;
+ return tree;
+ }
+
+ unsigned childCount = tree->NumChildren();
+ for (unsigned childIndex = 0; childIndex < childCount; childIndex++)
+ {
+ child = tree->GetChild(childIndex);
+ child = dFindTree(child, id);
+ if (child != nullptr)
+ {
+ return child;
+ }
+ }
+
+ return nullptr;
+}
+
+GenTree* dFindTree(unsigned id)
+{
+ Compiler* comp = JitTls::GetCompiler();
+ BasicBlock* block;
+ GenTree* tree;
+
+ dbTreeBlock = nullptr;
+ dbTree = nullptr;
+
+ for (block = comp->fgFirstBB; block != nullptr; block = block->bbNext)
+ {
+ for (GenTreeStmt* stmt = block->firstStmt(); stmt; stmt = stmt->gtNextStmt)
+ {
+ tree = dFindTree(stmt, id);
+ if (tree != nullptr)
+ {
+ dbTreeBlock = block;
+ return tree;
+ }
+ }
+ }
+
+ return nullptr;
+}
+
+GenTreeStmt* dFindStmt(unsigned id)
+{
+ Compiler* comp = JitTls::GetCompiler();
+ BasicBlock* block;
+
+ dbStmt = nullptr;
+
+ unsigned stmtId = 0;
+ for (block = comp->fgFirstBB; block != nullptr; block = block->bbNext)
+ {
+ for (GenTreeStmt* stmt = block->firstStmt(); stmt; stmt = stmt->gtNextStmt)
+ {
+ stmtId++;
+ if (stmtId == id)
+ {
+ dbStmt = stmt;
+ return stmt;
+ }
+ }
+ }
+
+ return nullptr;
+}
+
+BasicBlock* dFindBlock(unsigned bbNum)
+{
+ Compiler* comp = JitTls::GetCompiler();
+ BasicBlock* block = nullptr;
+
+ dbBlock = nullptr;
+ for (block = comp->fgFirstBB; block != nullptr; block = block->bbNext)
+ {
+ if (block->bbNum == bbNum)
+ {
+ dbBlock = block;
+ break;
+ }
+ }
+
+ return block;
+}
+
+/*****************************************************************************
+ *
+ * COMPlus_JitDumpIR support - dump out function in linear IR form
+ */
+
+void cFuncIR(Compiler* comp)
+{
+ BasicBlock* block;
+
+ printf("Method %s::%s, hsh=0x%x\n", comp->info.compClassName, comp->info.compMethodName,
+ comp->info.compMethodHash());
+
+ printf("\n");
+
+ for (block = comp->fgFirstBB; block != nullptr; block = block->bbNext)
+ {
+ cBlockIR(comp, block);
+ }
+}
+
+/*****************************************************************************
+ *
+ * COMPlus_JitDumpIR support - dump out the format specifiers from COMPlus_JitDumpIRFormat
+ */
+
+void dFormatIR()
+{
+ Compiler* comp = JitTls::GetCompiler();
+
+ if (comp->dumpIRFormat != nullptr)
+ {
+ printf("COMPlus_JitDumpIRFormat=%ls", comp->dumpIRFormat);
+ }
+}
+
+/*****************************************************************************
+ *
+ * COMPlus_JitDumpIR support - dump out function in linear IR form
+ */
+
+void dFuncIR()
+{
+ cFuncIR(JitTls::GetCompiler());
+}
+
+/*****************************************************************************
+ *
+ * COMPlus_JitDumpIR support - dump out loop in linear IR form
+ */
+
+void cLoopIR(Compiler* comp, Compiler::LoopDsc* loop)
+{
+ BasicBlock* blockHead = loop->lpHead;
+ BasicBlock* blockFirst = loop->lpFirst;
+ BasicBlock* blockTop = loop->lpTop;
+ BasicBlock* blockEntry = loop->lpEntry;
+ BasicBlock* blockBottom = loop->lpBottom;
+ BasicBlock* blockExit = loop->lpExit;
+ BasicBlock* blockLast = blockBottom->bbNext;
+ BasicBlock* block;
+
+ printf("LOOP\n");
+ printf("\n");
+ printf("HEAD BB%02u\n", blockHead->bbNum);
+ printf("FIRST BB%02u\n", blockFirst->bbNum);
+ printf("TOP BB%02u\n", blockTop->bbNum);
+ printf("ENTRY BB%02u\n", blockEntry->bbNum);
+ if (loop->lpExitCnt == 1)
+ {
+ printf("EXIT BB%02u\n", blockExit->bbNum);
+ }
+ else
+ {
+ printf("EXITS %u", loop->lpExitCnt);
+ }
+ printf("BOTTOM BB%02u\n", blockBottom->bbNum);
+ printf("\n");
+
+ cBlockIR(comp, blockHead);
+ for (block = blockFirst; ((block != nullptr) && (block != blockLast)); block = block->bbNext)
+ {
+ cBlockIR(comp, block);
+ }
+}
+
+/*****************************************************************************
+ *
+ * COMPlus_JitDumpIR support - dump out loop in linear IR form
+ */
+
+void dLoopIR(Compiler::LoopDsc* loop)
+{
+ cLoopIR(JitTls::GetCompiler(), loop);
+}
+
+/*****************************************************************************
+ *
+ * COMPlus_JitDumpIR support - dump out loop (given loop number) in linear IR form
+ */
+
+void dLoopNumIR(unsigned loopNum)
+{
+ Compiler* comp = JitTls::GetCompiler();
+
+ if (loopNum >= comp->optLoopCount)
+ {
+ printf("loopNum %u out of range\n");
+ return;
+ }
+
+ Compiler::LoopDsc* loop = &comp->optLoopTable[loopNum];
+ cLoopIR(JitTls::GetCompiler(), loop);
+}
+
+/*****************************************************************************
+ *
+ * COMPlus_JitDumpIR support - dump spaces to specified tab stop
+ */
+
+int dTabStopIR(int curr, int tabstop)
+{
+ int chars = 0;
+
+ if (tabstop <= curr)
+ {
+ chars += printf(" ");
+ }
+
+ for (int i = curr; i < tabstop; i++)
+ {
+ chars += printf(" ");
+ }
+
+ return chars;
+}
+
+void cNodeIR(Compiler* comp, GenTree* tree);
+
+/*****************************************************************************
+ *
+ * COMPlus_JitDumpIR support - dump out block in linear IR form
+ */
+
+void cBlockIR(Compiler* comp, BasicBlock* block)
+{
+ bool noStmts = comp->dumpIRNoStmts;
+ bool trees = comp->dumpIRTrees;
+
+ if (comp->dumpIRBlockHeaders)
+ {
+ block->dspBlockHeader(comp);
+ }
+ else
+ {
+ printf("BB%02u:\n", block->bbNum);
+ }
+
+ printf("\n");
+
+ if (!block->IsLIR())
+ {
+ for (GenTreeStmt* stmt = block->firstStmt(); stmt; stmt = stmt->gtNextStmt)
+ {
+ // Print current stmt.
+
+ if (trees)
+ {
+ cTree(comp, stmt);
+ printf("\n");
+ printf("=====================================================================\n");
+ }
+
+ if (comp->compRationalIRForm)
+ {
+ GenTree* tree;
+
+ foreach_treenode_execution_order(tree, stmt)
+ {
+ cNodeIR(comp, tree);
+ }
+ }
+ else
+ {
+ cTreeIR(comp, stmt);
+ }
+
+ if (!noStmts && !trees)
+ {
+ printf("\n");
+ }
+ }
+ }
+ else
+ {
+ for (GenTree* node = block->bbTreeList; node != nullptr; node = node->gtNext)
+ {
+ cNodeIR(comp, node);
+ }
+ }
+
+ int chars = 0;
+
+ chars += dTabStopIR(chars, COLUMN_OPCODE);
+
+ chars += printf(" ");
+ switch (block->bbJumpKind)
+ {
+ case BBJ_EHFINALLYRET:
+ chars += printf("BRANCH(EHFINALLYRET)");
+ break;
+
+ case BBJ_EHFILTERRET:
+ chars += printf("BRANCH(EHFILTERRET)");
+ break;
+
+ case BBJ_EHCATCHRET:
+ chars += printf("BRANCH(EHCATCHRETURN)");
+ chars += dTabStopIR(chars, COLUMN_OPERANDS);
+ chars += printf(" BB%02u", block->bbJumpDest->bbNum);
+ break;
+
+ case BBJ_THROW:
+ chars += printf("BRANCH(THROW)");
+ break;
+
+ case BBJ_RETURN:
+ chars += printf("BRANCH(RETURN)");
+ break;
+
+ case BBJ_NONE:
+ // For fall-through blocks
+ chars += printf("BRANCH(NONE)");
+ break;
+
+ case BBJ_ALWAYS:
+ chars += printf("BRANCH(ALWAYS)");
+ chars += dTabStopIR(chars, COLUMN_OPERANDS);
+ chars += printf(" BB%02u", block->bbJumpDest->bbNum);
+ if (block->bbFlags & BBF_KEEP_BBJ_ALWAYS)
+ {
+ chars += dTabStopIR(chars, COLUMN_KINDS);
+ chars += printf("; [KEEP_BBJ_ALWAYS]");
+ }
+ break;
+
+ case BBJ_LEAVE:
+ chars += printf("BRANCH(LEAVE)");
+ chars += dTabStopIR(chars, COLUMN_OPERANDS);
+ chars += printf(" BB%02u", block->bbJumpDest->bbNum);
+ break;
+
+ case BBJ_CALLFINALLY:
+ chars += printf("BRANCH(CALLFINALLY)");
+ chars += dTabStopIR(chars, COLUMN_OPERANDS);
+ chars += printf(" BB%02u", block->bbJumpDest->bbNum);
+ break;
+
+ case BBJ_COND:
+ chars += printf("BRANCH(COND)");
+ chars += dTabStopIR(chars, COLUMN_OPERANDS);
+ chars += printf(" BB%02u", block->bbJumpDest->bbNum);
+ break;
+
+ case BBJ_SWITCH:
+ chars += printf("BRANCH(SWITCH)");
+ chars += dTabStopIR(chars, COLUMN_OPERANDS);
+
+ unsigned jumpCnt;
+ jumpCnt = block->bbJumpSwt->bbsCount;
+ BasicBlock** jumpTab;
+ jumpTab = block->bbJumpSwt->bbsDstTab;
+ do
+ {
+ chars += printf("%c BB%02u", (jumpTab == block->bbJumpSwt->bbsDstTab) ? ' ' : ',', (*jumpTab)->bbNum);
+ } while (++jumpTab, --jumpCnt);
+ break;
+
+ default:
+ unreached();
+ break;
+ }
+
+ printf("\n");
+ if (block->bbNext != nullptr)
+ {
+ printf("\n");
+ }
+}
+
+/*****************************************************************************
+ *
+ * COMPlus_JitDumpIR support - dump out block in linear IR form
+ */
+
+void dBlockIR(BasicBlock* block)
+{
+ cBlockIR(JitTls::GetCompiler(), block);
+}
+
+/*****************************************************************************
+ *
+ * COMPlus_JitDumpIR support - dump out tree node type for linear IR form
+ */
+
+int cTreeTypeIR(Compiler* comp, GenTree* tree)
+{
+ int chars = 0;
+
+ var_types type = tree->TypeGet();
+
+ const char* typeName = varTypeName(type);
+ chars += printf(".%s", typeName);
+
+ return chars;
+}
+
+/*****************************************************************************
+ *
+ * COMPlus_JitDumpIR support - dump out tree node type for linear IR form
+ */
+
+int dTreeTypeIR(GenTree* tree)
+{
+ int chars = cTreeTypeIR(JitTls::GetCompiler(), tree);
+
+ return chars;
+}
+
+/*****************************************************************************
+ *
+ * COMPlus_JitDumpIR support - dump out tree node kind for linear IR form
+ */
+
+int cTreeKindsIR(Compiler* comp, GenTree* tree)
+{
+ int chars = 0;
+
+ unsigned kind = tree->OperKind();
+
+ chars += printf("kinds=");
+ if (kind == GTK_SPECIAL)
+ {
+ chars += printf("[SPECIAL]");
+ }
+ if (kind & GTK_CONST)
+ {
+ chars += printf("[CONST]");
+ }
+ if (kind & GTK_LEAF)
+ {
+ chars += printf("[LEAF]");
+ }
+ if (kind & GTK_UNOP)
+ {
+ chars += printf("[UNOP]");
+ }
+ if (kind & GTK_BINOP)
+ {
+ chars += printf("[BINOP]");
+ }
+ if (kind & GTK_LOGOP)
+ {
+ chars += printf("[LOGOP]");
+ }
+ if (kind & GTK_ASGOP)
+ {
+ chars += printf("[ASGOP]");
+ }
+ if (kind & GTK_COMMUTE)
+ {
+ chars += printf("[COMMUTE]");
+ }
+ if (kind & GTK_EXOP)
+ {
+ chars += printf("[EXOP]");
+ }
+ if (kind & GTK_LOCAL)
+ {
+ chars += printf("[LOCAL]");
+ }
+ if (kind & GTK_SMPOP)
+ {
+ chars += printf("[SMPOP]");
+ }
+
+ return chars;
+}
+
+/*****************************************************************************
+ *
+ * COMPlus_JitDumpIR support - dump out tree node kind for linear IR form
+ */
+
+int dTreeKindsIR(GenTree* tree)
+{
+ int chars = cTreeKindsIR(JitTls::GetCompiler(), tree);
+
+ return chars;
+}
+
+/*****************************************************************************
+ *
+ * COMPlus_JitDumpIR support - dump out tree node flags for linear IR form
+ */
+
+int cTreeFlagsIR(Compiler* comp, GenTree* tree)
+{
+ int chars = 0;
+
+ if (tree->gtFlags != 0)
+ {
+ chars += printf("flags=");
+
+ // Node flags
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if defined(DEBUG)
+#if SMALL_TREE_NODES
+ if (comp->dumpIRNodes)
+ {
+ if (tree->gtDebugFlags & GTF_DEBUG_NODE_LARGE)
+ {
+ chars += printf("[NODE_LARGE]");
+ }
+ if (tree->gtDebugFlags & GTF_DEBUG_NODE_SMALL)
+ {
+ chars += printf("[NODE_SMALL]");
+ }
+ }
+#endif // SMALL_TREE_NODES
+ if (tree->gtDebugFlags & GTF_DEBUG_NODE_MORPHED)
+ {
+ chars += printf("[MORPHED]");
+ }
+#endif // defined(DEBUG)
+
+ if (tree->gtFlags & GTF_COLON_COND)
+ {
+ chars += printf("[COLON_COND]");
+ }
+
+ // Operator flags
+
+ genTreeOps op = tree->OperGet();
+ switch (op)
+ {
+
+ case GT_LCL_VAR:
+ case GT_LCL_VAR_ADDR:
+ case GT_LCL_FLD:
+ case GT_LCL_FLD_ADDR:
+ case GT_STORE_LCL_FLD:
+ case GT_STORE_LCL_VAR:
+ case GT_REG_VAR:
+
+ if (tree->gtFlags & GTF_VAR_DEF)
+ {
+ chars += printf("[VAR_DEF]");
+ }
+ if (tree->gtFlags & GTF_VAR_USEASG)
+ {
+ chars += printf("[VAR_USEASG]");
+ }
+ if (tree->gtFlags & GTF_VAR_USEDEF)
+ {
+ chars += printf("[VAR_USEDEF]");
+ }
+ if (tree->gtFlags & GTF_VAR_CAST)
+ {
+ chars += printf("[VAR_CAST]");
+ }
+ if (tree->gtFlags & GTF_VAR_ITERATOR)
+ {
+ chars += printf("[VAR_ITERATOR]");
+ }
+ if (tree->gtFlags & GTF_VAR_CLONED)
+ {
+ chars += printf("[VAR_CLONED]");
+ }
+ if (tree->gtFlags & GTF_VAR_DEATH)
+ {
+ chars += printf("[VAR_DEATH]");
+ }
+ if (tree->gtFlags & GTF_VAR_ARR_INDEX)
+ {
+ chars += printf("[VAR_ARR_INDEX]");
+ }
+#if defined(DEBUG)
+ if (tree->gtDebugFlags & GTF_DEBUG_VAR_CSE_REF)
+ {
+ chars += printf("[VAR_CSE_REF]");
+ }
+#endif
+ if (op == GT_REG_VAR)
+ {
+ if (tree->gtFlags & GTF_REG_BIRTH)
+ {
+ chars += printf("[REG_BIRTH]");
+ }
+ }
+ break;
+
+ case GT_NOP:
+
+ if (tree->gtFlags & GTF_NOP_DEATH)
+ {
+ chars += printf("[NOP_DEATH]");
+ }
+ break;
+
+ case GT_NO_OP:
+
+ if (tree->gtFlags & GTF_NO_OP_NO)
+ {
+ chars += printf("[NO_OP_NO]");
+ }
+ break;
+
+ case GT_FIELD:
+
+ if (tree->gtFlags & GTF_FLD_NULLCHECK)
+ {
+ chars += printf("[FLD_NULLCHECK]");
+ }
+ if (tree->gtFlags & GTF_FLD_VOLATILE)
+ {
+ chars += printf("[FLD_VOLATILE]");
+ }
+ break;
+
+ case GT_INDEX:
+
+ if (tree->gtFlags & GTF_INX_RNGCHK)
+ {
+ chars += printf("[INX_RNGCHK]");
+ }
+ if (tree->gtFlags & GTF_INX_REFARR_LAYOUT)
+ {
+ chars += printf("[INX_REFARR_LAYOUT]");
+ }
+ if (tree->gtFlags & GTF_INX_STRING_LAYOUT)
+ {
+ chars += printf("[INX_STRING_LAYOUT]");
+ }
+ break;
+
+ case GT_IND:
+ case GT_STOREIND:
+
+ if (tree->gtFlags & GTF_IND_VOLATILE)
+ {
+ chars += printf("[IND_VOLATILE]");
+ }
+ if (tree->gtFlags & GTF_IND_REFARR_LAYOUT)
+ {
+ chars += printf("[IND_REFARR_LAYOUT]");
+ }
+ if (tree->gtFlags & GTF_IND_TGTANYWHERE)
+ {
+ chars += printf("[IND_TGTANYWHERE]");
+ }
+ if (tree->gtFlags & GTF_IND_TLS_REF)
+ {
+ chars += printf("[IND_TLS_REF]");
+ }
+ if (tree->gtFlags & GTF_IND_ASG_LHS)
+ {
+ chars += printf("[IND_ASG_LHS]");
+ }
+ if (tree->gtFlags & GTF_IND_UNALIGNED)
+ {
+ chars += printf("[IND_UNALIGNED]");
+ }
+ if (tree->gtFlags & GTF_IND_INVARIANT)
+ {
+ chars += printf("[IND_INVARIANT]");
+ }
+ if (tree->gtFlags & GTF_IND_ARR_LEN)
+ {
+ chars += printf("[IND_ARR_INDEX]");
+ }
+ break;
+
+ case GT_CLS_VAR:
+
+ if (tree->gtFlags & GTF_CLS_VAR_ASG_LHS)
+ {
+ chars += printf("[CLS_VAR_ASG_LHS]");
+ }
+ break;
+
+ case GT_ADDR:
+
+ if (tree->gtFlags & GTF_ADDR_ONSTACK)
+ {
+ chars += printf("[ADDR_ONSTACK]");
+ }
+ break;
+
+ case GT_MUL:
+
+ if (tree->gtFlags & GTF_MUL_64RSLT)
+ {
+ chars += printf("[64RSLT]");
+ }
+ if (tree->gtFlags & GTF_ADDRMODE_NO_CSE)
+ {
+ chars += printf("[ADDRMODE_NO_CSE]");
+ }
+ break;
+
+ case GT_ADD:
+
+ if (tree->gtFlags & GTF_ADDRMODE_NO_CSE)
+ {
+ chars += printf("[ADDRMODE_NO_CSE]");
+ }
+ break;
+
+ case GT_LSH:
+
+ if (tree->gtFlags & GTF_ADDRMODE_NO_CSE)
+ {
+ chars += printf("[ADDRMODE_NO_CSE]");
+ }
+ break;
+
+ case GT_MOD:
+ case GT_UMOD:
+
+ if (tree->gtFlags & GTF_MOD_INT_RESULT)
+ {
+ chars += printf("[MOD_INT_RESULT]");
+ }
+ break;
+
+ case GT_EQ:
+ case GT_NE:
+ case GT_LT:
+ case GT_LE:
+ case GT_GT:
+ case GT_GE:
+
+ if (tree->gtFlags & GTF_RELOP_NAN_UN)
+ {
+ chars += printf("[RELOP_NAN_UN]");
+ }
+ if (tree->gtFlags & GTF_RELOP_JMP_USED)
+ {
+ chars += printf("[RELOP_JMP_USED]");
+ }
+ if (tree->gtFlags & GTF_RELOP_QMARK)
+ {
+ chars += printf("[RELOP_QMARK]");
+ }
+ if (tree->gtFlags & GTF_RELOP_SMALL)
+ {
+ chars += printf("[RELOP_SMALL]");
+ }
+ break;
+
+ case GT_QMARK:
+
+ if (tree->gtFlags & GTF_QMARK_CAST_INSTOF)
+ {
+ chars += printf("[QMARK_CAST_INSTOF]");
+ }
+ break;
+
+ case GT_BOX:
+
+ if (tree->gtFlags & GTF_BOX_VALUE)
+ {
+ chars += printf("[BOX_VALUE]");
+ }
+ break;
+
+ case GT_CNS_INT:
+
+ {
+ unsigned handleKind = (tree->gtFlags & GTF_ICON_HDL_MASK);
+
+ switch (handleKind)
+ {
+
+ case GTF_ICON_SCOPE_HDL:
+
+ chars += printf("[ICON_SCOPE_HDL]");
+ break;
+
+ case GTF_ICON_CLASS_HDL:
+
+ chars += printf("[ICON_CLASS_HDL]");
+ break;
+
+ case GTF_ICON_METHOD_HDL:
+
+ chars += printf("[ICON_METHOD_HDL]");
+ break;
+
+ case GTF_ICON_FIELD_HDL:
+
+ chars += printf("[ICON_FIELD_HDL]");
+ break;
+
+ case GTF_ICON_STATIC_HDL:
+
+ chars += printf("[ICON_STATIC_HDL]");
+ break;
+
+ case GTF_ICON_STR_HDL:
+
+ chars += printf("[ICON_STR_HDL]");
+ break;
+
+ case GTF_ICON_PSTR_HDL:
+
+ chars += printf("[ICON_PSTR_HDL]");
+ break;
+
+ case GTF_ICON_PTR_HDL:
+
+ chars += printf("[ICON_PTR_HDL]");
+ break;
+
+ case GTF_ICON_VARG_HDL:
+
+ chars += printf("[ICON_VARG_HDL]");
+ break;
+
+ case GTF_ICON_PINVKI_HDL:
+
+ chars += printf("[ICON_PINVKI_HDL]");
+ break;
+
+ case GTF_ICON_TOKEN_HDL:
+
+ chars += printf("[ICON_TOKEN_HDL]");
+ break;
+
+ case GTF_ICON_TLS_HDL:
+
+ chars += printf("[ICON_TLD_HDL]");
+ break;
+
+ case GTF_ICON_FTN_ADDR:
+
+ chars += printf("[ICON_FTN_ADDR]");
+ break;
+
+ case GTF_ICON_CIDMID_HDL:
+
+ chars += printf("[ICON_CIDMID_HDL]");
+ break;
+
+ case GTF_ICON_BBC_PTR:
+
+ chars += printf("[ICON_BBC_PTR]");
+ break;
+
+ case GTF_ICON_FIELD_OFF:
+
+ chars += printf("[ICON_FIELD_OFF]");
+ break;
+ }
+ }
+ break;
+
+ case GT_OBJ:
+ case GT_STORE_OBJ:
+ if (tree->AsObj()->HasGCPtr())
+ {
+ chars += printf("[BLK_HASGCPTR]");
+ }
+ __fallthrough;
+
+ case GT_BLK:
+ case GT_DYN_BLK:
+ case GT_STORE_BLK:
+ case GT_STORE_DYN_BLK:
+
+ if (tree->gtFlags & GTF_BLK_VOLATILE)
+ {
+ chars += printf("[BLK_VOLATILE]");
+ }
+ if (tree->AsBlk()->IsUnaligned())
+ {
+ chars += printf("[BLK_UNALIGNED]");
+ }
+ break;
+
+ case GT_CALL:
+
+ if (tree->gtFlags & GTF_CALL_UNMANAGED)
+ {
+ chars += printf("[CALL_UNMANAGED]");
+ }
+ if (tree->gtFlags & GTF_CALL_INLINE_CANDIDATE)
+ {
+ chars += printf("[CALL_INLINE_CANDIDATE]");
+ }
+ if (tree->gtFlags & GTF_CALL_NONVIRT)
+ {
+ chars += printf("[CALL_NONVIRT]");
+ }
+ if (tree->gtFlags & GTF_CALL_VIRT_VTABLE)
+ {
+ chars += printf("[CALL_VIRT_VTABLE]");
+ }
+ if (tree->gtFlags & GTF_CALL_VIRT_STUB)
+ {
+ chars += printf("[CALL_VIRT_STUB]");
+ }
+ if (tree->gtFlags & GTF_CALL_NULLCHECK)
+ {
+ chars += printf("[CALL_NULLCHECK]");
+ }
+ if (tree->gtFlags & GTF_CALL_POP_ARGS)
+ {
+ chars += printf("[CALL_POP_ARGS]");
+ }
+ if (tree->gtFlags & GTF_CALL_HOISTABLE)
+ {
+ chars += printf("[CALL_HOISTABLE]");
+ }
+ if (tree->gtFlags & GTF_CALL_REG_SAVE)
+ {
+ chars += printf("[CALL_REG_SAVE]");
+ }
+
+ // More flags associated with calls.
+
+ {
+ GenTreeCall* call = tree->AsCall();
+
+ if (call->gtCallMoreFlags & GTF_CALL_M_EXPLICIT_TAILCALL)
+ {
+ chars += printf("[CALL_M_EXPLICIT_TAILCALL]");
+ }
+ if (call->gtCallMoreFlags & GTF_CALL_M_TAILCALL)
+ {
+ chars += printf("[CALL_M_TAILCALL]");
+ }
+ if (call->gtCallMoreFlags & GTF_CALL_M_VARARGS)
+ {
+ chars += printf("[CALL_M_VARARGS]");
+ }
+ if (call->gtCallMoreFlags & GTF_CALL_M_RETBUFFARG)
+ {
+ chars += printf("[CALL_M_RETBUFFARG]");
+ }
+ if (call->gtCallMoreFlags & GTF_CALL_M_DELEGATE_INV)
+ {
+ chars += printf("[CALL_M_DELEGATE_INV]");
+ }
+ if (call->gtCallMoreFlags & GTF_CALL_M_NOGCCHECK)
+ {
+ chars += printf("[CALL_M_NOGCCHECK]");
+ }
+ if (call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC)
+ {
+ chars += printf("[CALL_M_SPECIAL_INTRINSIC]");
+ }
+
+ if (call->IsUnmanaged())
+ {
+ if (call->gtCallMoreFlags & GTF_CALL_M_UNMGD_THISCALL)
+ {
+ chars += printf("[CALL_M_UNMGD_THISCALL]");
+ }
+ }
+ else if (call->IsVirtualStub())
+ {
+ if (call->gtCallMoreFlags & GTF_CALL_M_VIRTSTUB_REL_INDIRECT)
+ {
+ chars += printf("[CALL_M_VIRTSTUB_REL_INDIRECT]");
+ }
+ }
+ else if (!call->IsVirtual())
+ {
+ if (call->gtCallMoreFlags & GTF_CALL_M_NONVIRT_SAME_THIS)
+ {
+ chars += printf("[CALL_M_NONVIRT_SAME_THIS]");
+ }
+ }
+
+ if (call->gtCallMoreFlags & GTF_CALL_M_FRAME_VAR_DEATH)
+ {
+ chars += printf("[CALL_M_FRAME_VAR_DEATH]");
+ }
+#ifndef LEGACY_BACKEND
+ if (call->gtCallMoreFlags & GTF_CALL_M_TAILCALL_VIA_HELPER)
+ {
+ chars += printf("[CALL_M_TAILCALL_VIA_HELPER]");
+ }
+#endif
+#if FEATURE_TAILCALL_OPT
+ if (call->gtCallMoreFlags & GTF_CALL_M_IMPLICIT_TAILCALL)
+ {
+ chars += printf("[CALL_M_IMPLICIT_TAILCALL]");
+ }
+#endif
+ if (call->gtCallMoreFlags & GTF_CALL_M_PINVOKE)
+ {
+ chars += printf("[CALL_M_PINVOKE]");
+ }
+ }
+ break;
+
+ case GT_STMT:
+
+ if (tree->gtFlags & GTF_STMT_CMPADD)
+ {
+ chars += printf("[STMT_CMPADD]");
+ }
+ if (tree->gtFlags & GTF_STMT_HAS_CSE)
+ {
+ chars += printf("[STMT_HAS_CSE]");
+ }
+ break;
+
+ default:
+
+ {
+ unsigned flags = (tree->gtFlags & (~(unsigned)(GTF_COMMON_MASK | GTF_OVERFLOW)));
+ if (flags != 0)
+ {
+ chars += printf("[%08X]", flags);
+ }
+ }
+ break;
+ }
+
+ // Common flags.
+
+ if (tree->gtFlags & GTF_ASG)
+ {
+ chars += printf("[ASG]");
+ }
+ if (tree->gtFlags & GTF_CALL)
+ {
+ chars += printf("[CALL]");
+ }
+ switch (op)
+ {
+ case GT_MUL:
+ case GT_CAST:
+ case GT_ADD:
+ case GT_SUB:
+ case GT_ASG_ADD:
+ case GT_ASG_SUB:
+ if (tree->gtFlags & GTF_OVERFLOW)
+ {
+ chars += printf("[OVERFLOW]");
+ }
+ break;
+ default:
+ break;
+ }
+ if (tree->gtFlags & GTF_EXCEPT)
+ {
+ chars += printf("[EXCEPT]");
+ }
+ if (tree->gtFlags & GTF_GLOB_REF)
+ {
+ chars += printf("[GLOB_REF]");
+ }
+ if (tree->gtFlags & GTF_ORDER_SIDEEFF)
+ {
+ chars += printf("[ORDER_SIDEEFF]");
+ }
+ if (tree->gtFlags & GTF_REVERSE_OPS)
+ {
+ if (op != GT_LCL_VAR)
+ {
+ chars += printf("[REVERSE_OPS]");
+ }
+ }
+ if (tree->gtFlags & GTF_REG_VAL)
+ {
+ chars += printf("[REG_VAL]");
+ }
+ if (tree->gtFlags & GTF_SPILLED)
+ {
+ chars += printf("[SPILLED_OPER]");
+ }
+#if defined(LEGACY_BACKEND)
+ if (tree->gtFlags & GTF_SPILLED_OP2)
+ {
+ chars += printf("[SPILLED_OP2]");
+ }
+#endif
+ if (tree->gtFlags & GTF_ZSF_SET)
+ {
+ chars += printf("[ZSF_SET]");
+ }
+#if FEATURE_SET_FLAGS
+ if (tree->gtFlags & GTF_SET_FLAGS)
+ {
+ if ((op != GT_IND) && (op != GT_STOREIND))
+ {
+ chars += printf("[ZSF_SET_FLAGS]");
+ }
+ }
+#endif
+ if (tree->gtFlags & GTF_IND_NONFAULTING)
+ {
+ if ((op == GT_IND) || (op == GT_STOREIND))
+ {
+ chars += printf("[IND_NONFAULTING]");
+ }
+ }
+ if (tree->gtFlags & GTF_MAKE_CSE)
+ {
+ chars += printf("[MAKE_CSE]");
+ }
+ if (tree->gtFlags & GTF_DONT_CSE)
+ {
+ chars += printf("[DONT_CSE]");
+ }
+ if (tree->gtFlags & GTF_BOOLEAN)
+ {
+ chars += printf("[BOOLEAN]");
+ }
+ if (tree->gtFlags & GTF_SMALL_OK)
+ {
+ chars += printf("[SMALL_OK]");
+ }
+ if (tree->gtFlags & GTF_UNSIGNED)
+ {
+ chars += printf("[SMALL_UNSIGNED]");
+ }
+ if (tree->gtFlags & GTF_LATE_ARG)
+ {
+ chars += printf("[SMALL_LATE_ARG]");
+ }
+ if (tree->gtFlags & GTF_SPILL)
+ {
+ chars += printf("[SPILL]");
+ }
+ if (tree->gtFlags & GTF_SPILL_HIGH)
+ {
+ chars += printf("[SPILL_HIGH]");
+ }
+ if (tree->gtFlags & GTF_REUSE_REG_VAL)
+ {
+ if (op == GT_CNS_INT)
+ {
+ chars += printf("[REUSE_REG_VAL]");
+ }
+ }
+ }
+
+ return chars;
+}
+
+/*****************************************************************************
+ *
+ * COMPlus_JitDumpIR support - dump out tree node flags for linear IR form
+ */
+
+int dTreeFlagsIR(GenTree* tree)
+{
+ int chars = cTreeFlagsIR(JitTls::GetCompiler(), tree);
+
+ return chars;
+}
+
+/*****************************************************************************
+ *
+ * COMPlus_JitDumpIR support - dump out SSA number on tree node for linear IR form
+ */
+
+int cSsaNumIR(Compiler* comp, GenTree* tree)
+{
+ int chars = 0;
+
+ if (tree->gtLclVarCommon.HasSsaName())
+ {
+ if (tree->gtFlags & GTF_VAR_USEASG)
+ {
+ assert(tree->gtFlags & GTF_VAR_DEF);
+ chars += printf("<u:%d><d:%d>", tree->gtLclVarCommon.gtSsaNum, comp->GetSsaNumForLocalVarDef(tree));
+ }
+ else
+ {
+ chars += printf("<%s:%d>", (tree->gtFlags & GTF_VAR_DEF) ? "d" : "u", tree->gtLclVarCommon.gtSsaNum);
+ }
+ }
+
+ return chars;
+}
+
+/*****************************************************************************
+ *
+ * COMPlus_JitDumpIR support - dump out SSA number on tree node for linear IR form
+ */
+
+int dSsaNumIR(GenTree* tree)
+{
+ int chars = cSsaNumIR(JitTls::GetCompiler(), tree);
+
+ return chars;
+}
+
+/*****************************************************************************
+ *
+ * COMPlus_JitDumpIR support - dump out Value Number on tree node for linear IR form
+ */
+
+int cValNumIR(Compiler* comp, GenTree* tree)
+{
+ int chars = 0;
+
+ if (tree->gtVNPair.GetLiberal() != ValueNumStore::NoVN)
+ {
+ assert(tree->gtVNPair.GetConservative() != ValueNumStore::NoVN);
+ ValueNumPair vnp = tree->gtVNPair;
+ ValueNum vn;
+ if (vnp.BothEqual())
+ {
+ chars += printf("<v:");
+ vn = vnp.GetLiberal();
+ chars += printf(STR_VN "%x", vn);
+ if (ValueNumStore::isReservedVN(vn))
+ {
+ chars += printf("R");
+ }
+ chars += printf(">");
+ }
+ else
+ {
+ vn = vnp.GetLiberal();
+ chars += printf("<v:");
+ chars += printf(STR_VN "%x", vn);
+ if (ValueNumStore::isReservedVN(vn))
+ {
+ chars += printf("R");
+ }
+ chars += printf(",");
+ vn = vnp.GetConservative();
+ chars += printf(STR_VN "%x", vn);
+ if (ValueNumStore::isReservedVN(vn))
+ {
+ chars += printf("R");
+ }
+ chars += printf(">");
+ }
+ }
+
+ return chars;
+}
+
+/*****************************************************************************
+ *
+ * COMPlus_JitDumpIR support - dump out Value Number on tree node for linear IR form
+ */
+
+int dValNumIR(GenTree* tree)
+{
+ int chars = cValNumIR(JitTls::GetCompiler(), tree);
+
+ return chars;
+}
+
+/*****************************************************************************
+ *
+ * COMPlus_JitDumpIR support - dump out tree leaf node for linear IR form
+ */
+
+int cLeafIR(Compiler* comp, GenTree* tree)
+{
+ int chars = 0;
+ genTreeOps op = tree->OperGet();
+ const char* ilKind = nullptr;
+ const char* ilName = nullptr;
+ unsigned ilNum = 0;
+ unsigned lclNum = 0;
+ bool hasSsa = false;
+
+ switch (op)
+ {
+
+ case GT_PHI_ARG:
+ case GT_LCL_VAR:
+ case GT_LCL_VAR_ADDR:
+ case GT_STORE_LCL_VAR:
+ case GT_REG_VAR:
+
+ lclNum = tree->gtLclVarCommon.gtLclNum;
+ comp->gtGetLclVarNameInfo(lclNum, &ilKind, &ilName, &ilNum);
+ if (ilName != nullptr)
+ {
+ chars += printf("%s", ilName);
+ }
+ else
+ {
+ LclVarDsc* varDsc = comp->lvaTable + lclNum;
+ chars += printf("%s%d", ilKind, ilNum);
+ if (comp->dumpIRLocals)
+ {
+ chars += printf("(V%02u", lclNum);
+ if (varDsc->lvTracked)
+ {
+ chars += printf(":T%02u", varDsc->lvVarIndex);
+ }
+ if (comp->dumpIRRegs)
+ {
+ if (varDsc->lvRegister)
+ {
+ if (isRegPairType(varDsc->TypeGet()))
+ {
+ chars += printf(":%s:%s",
+ getRegName(varDsc->lvOtherReg), // hi32
+ getRegName(varDsc->lvRegNum)); // lo32
+ }
+ else
+ {
+ chars += printf(":%s", getRegName(varDsc->lvRegNum));
+ }
+ }
+ else
+ {
+ switch (tree->GetRegTag())
+ {
+ case GenTree::GT_REGTAG_REG:
+ chars += printf(":%s", comp->compRegVarName(tree->gtRegNum));
+ break;
+#if CPU_LONG_USES_REGPAIR
+ case GenTree::GT_REGTAG_REGPAIR:
+ chars += printf(":%s", comp->compRegPairName(tree->gtRegPair));
+ break;
+#endif
+ default:
+ break;
+ }
+ }
+ }
+ chars += printf(")");
+ }
+ else if (comp->dumpIRRegs)
+ {
+ if (varDsc->lvRegister)
+ {
+ chars += printf("(");
+ if (isRegPairType(varDsc->TypeGet()))
+ {
+ chars += printf("%s:%s",
+ getRegName(varDsc->lvOtherReg), // hi32
+ getRegName(varDsc->lvRegNum)); // lo32
+ }
+ else
+ {
+ chars += printf("%s", getRegName(varDsc->lvRegNum));
+ }
+ chars += printf(")");
+ }
+ else
+ {
+ switch (tree->GetRegTag())
+ {
+ case GenTree::GT_REGTAG_REG:
+ chars += printf("(%s)", comp->compRegVarName(tree->gtRegNum));
+ break;
+#if CPU_LONG_USES_REGPAIR
+ case GenTree::GT_REGTAG_REGPAIR:
+ chars += printf("(%s)", comp->compRegPairName(tree->gtRegPair));
+ break;
+#endif
+ default:
+ break;
+ }
+ }
+ }
+ }
+
+ if (op == GT_REG_VAR)
+ {
+ if (isFloatRegType(tree->gtType))
+ {
+ assert(tree->gtRegVar.gtRegNum == tree->gtRegNum);
+ chars += printf("(FPV%u)", tree->gtRegNum);
+ }
+ else
+ {
+ chars += printf("(%s)", comp->compRegVarName(tree->gtRegVar.gtRegNum));
+ }
+ }
+
+ hasSsa = true;
+ break;
+
+ case GT_LCL_FLD:
+ case GT_LCL_FLD_ADDR:
+ case GT_STORE_LCL_FLD:
+
+ lclNum = tree->gtLclVarCommon.gtLclNum;
+ comp->gtGetLclVarNameInfo(lclNum, &ilKind, &ilName, &ilNum);
+ if (ilName != nullptr)
+ {
+ chars += printf("%s+%u", ilName, tree->gtLclFld.gtLclOffs);
+ }
+ else
+ {
+ chars += printf("%s%d+%u", ilKind, ilNum, tree->gtLclFld.gtLclOffs);
+ LclVarDsc* varDsc = comp->lvaTable + lclNum;
+ if (comp->dumpIRLocals)
+ {
+ chars += printf("(V%02u", lclNum);
+ if (varDsc->lvTracked)
+ {
+ chars += printf(":T%02u", varDsc->lvVarIndex);
+ }
+ if (comp->dumpIRRegs)
+ {
+ if (varDsc->lvRegister)
+ {
+ if (isRegPairType(varDsc->TypeGet()))
+ {
+ chars += printf(":%s:%s",
+ getRegName(varDsc->lvOtherReg), // hi32
+ getRegName(varDsc->lvRegNum)); // lo32
+ }
+ else
+ {
+ chars += printf(":%s", getRegName(varDsc->lvRegNum));
+ }
+ }
+ else
+ {
+ switch (tree->GetRegTag())
+ {
+ case GenTree::GT_REGTAG_REG:
+ chars += printf(":%s", comp->compRegVarName(tree->gtRegNum));
+ break;
+#if CPU_LONG_USES_REGPAIR
+ case GenTree::GT_REGTAG_REGPAIR:
+ chars += printf(":%s", comp->compRegPairName(tree->gtRegPair));
+ break;
+#endif
+ default:
+ break;
+ }
+ }
+ }
+ chars += printf(")");
+ }
+ else if (comp->dumpIRRegs)
+ {
+ if (varDsc->lvRegister)
+ {
+ chars += printf("(");
+ if (isRegPairType(varDsc->TypeGet()))
+ {
+ chars += printf("%s:%s",
+ getRegName(varDsc->lvOtherReg), // hi32
+ getRegName(varDsc->lvRegNum)); // lo32
+ }
+ else
+ {
+ chars += printf("%s", getRegName(varDsc->lvRegNum));
+ }
+ chars += printf(")");
+ }
+ else
+ {
+ switch (tree->GetRegTag())
+ {
+ case GenTree::GT_REGTAG_REG:
+ chars += printf("(%s)", comp->compRegVarName(tree->gtRegNum));
+ break;
+#if CPU_LONG_USES_REGPAIR
+ case GenTree::GT_REGTAG_REGPAIR:
+ chars += printf("(%s)", comp->compRegPairName(tree->gtRegPair));
+ break;
+#endif
+ default:
+ break;
+ }
+ }
+ }
+ }
+
+ // TODO: We probably want to expand field sequence.
+ // gtDispFieldSeq(tree->gtLclFld.gtFieldSeq);
+
+ hasSsa = true;
+ break;
+
+ case GT_CNS_INT:
+
+ if (tree->IsIconHandle())
+ {
+#if 0
+ // TODO: Commented out because sometimes the CLR throws
+ // and exception when asking the names of some handles.
+ // Need to investigate.
+
+ const char* className;
+ const char* fieldName;
+ const char* methodName;
+ const wchar_t* str;
+
+ switch (tree->GetIconHandleFlag())
+ {
+
+ case GTF_ICON_SCOPE_HDL:
+
+ chars += printf("SCOPE(?)");
+ break;
+
+ case GTF_ICON_CLASS_HDL:
+
+ className = comp->eeGetClassName((CORINFO_CLASS_HANDLE)tree->gtIntCon.gtIconVal);
+ chars += printf("CLASS(%s)", className);
+ break;
+
+ case GTF_ICON_METHOD_HDL:
+
+ methodName = comp->eeGetMethodName((CORINFO_METHOD_HANDLE)tree->gtIntCon.gtIconVal,
+ &className);
+ chars += printf("METHOD(%s.%s)", className, methodName);
+ break;
+
+ case GTF_ICON_FIELD_HDL:
+
+ fieldName = comp->eeGetFieldName((CORINFO_FIELD_HANDLE)tree->gtIntCon.gtIconVal,
+ &className);
+ chars += printf("FIELD(%s.%s) ", className, fieldName);
+ break;
+
+ case GTF_ICON_STATIC_HDL:
+
+ fieldName = comp->eeGetFieldName((CORINFO_FIELD_HANDLE)tree->gtIntCon.gtIconVal,
+ &className);
+ chars += printf("STATIC_FIELD(%s.%s)", className, fieldName);
+ break;
+
+ case GTF_ICON_STR_HDL:
+
+ str = comp->eeGetCPString(tree->gtIntCon.gtIconVal);
+ chars += printf("\"%S\"", str);
+ break;
+
+ case GTF_ICON_PSTR_HDL:
+
+ chars += printf("PSTR(?)");
+ break;
+
+ case GTF_ICON_PTR_HDL:
+
+ chars += printf("PTR(?)");
+ break;
+
+ case GTF_ICON_VARG_HDL:
+
+ chars += printf("VARARG(?)");
+ break;
+
+ case GTF_ICON_PINVKI_HDL:
+
+ chars += printf("PINVOKE(?)");
+ break;
+
+ case GTF_ICON_TOKEN_HDL:
+
+ chars += printf("TOKEN(%08X)", tree->gtIntCon.gtIconVal);
+ break;
+
+ case GTF_ICON_TLS_HDL:
+
+ chars += printf("TLS(?)");
+ break;
+
+ case GTF_ICON_FTN_ADDR:
+
+ chars += printf("FTN(?)");
+ break;
+
+ case GTF_ICON_CIDMID_HDL:
+
+ chars += printf("CIDMID(?)");
+ break;
+
+ case GTF_ICON_BBC_PTR:
+
+ chars += printf("BBC(?)");
+ break;
+
+ default:
+
+ chars += printf("HANDLE(?)");
+ break;
+ }
+#else
+#ifdef _TARGET_64BIT_
+ if ((tree->gtIntCon.gtIconVal & 0xFFFFFFFF00000000LL) != 0)
+ {
+ chars += printf("HANDLE(0x%llx)", dspPtr(tree->gtIntCon.gtIconVal));
+ }
+ else
+#endif
+ {
+ chars += printf("HANDLE(0x%0x)", dspPtr(tree->gtIntCon.gtIconVal));
+ }
+#endif
+ }
+ else
+ {
+ if (tree->TypeGet() == TYP_REF)
+ {
+ assert(tree->gtIntCon.gtIconVal == 0);
+ chars += printf("null");
+ }
+#ifdef _TARGET_64BIT_
+ else if ((tree->gtIntCon.gtIconVal & 0xFFFFFFFF00000000LL) != 0)
+ {
+ chars += printf("0x%llx", tree->gtIntCon.gtIconVal);
+ }
+ else
+#endif
+ {
+ chars += printf("%ld(0x%x)", tree->gtIntCon.gtIconVal, tree->gtIntCon.gtIconVal);
+ }
+ }
+ break;
+
+ case GT_CNS_LNG:
+
+ chars += printf("CONST(LONG)");
+ break;
+
+ case GT_CNS_DBL:
+
+ chars += printf("CONST(DOUBLE)");
+ break;
+
+ case GT_CNS_STR:
+
+ chars += printf("CONST(STR)");
+ break;
+
+ case GT_JMP:
+
+ {
+ const char* methodName;
+ const char* className;
+
+ methodName = comp->eeGetMethodName((CORINFO_METHOD_HANDLE)tree->gtVal.gtVal1, &className);
+ chars += printf(" %s.%s", className, methodName);
+ }
+ break;
+
+ case GT_NO_OP:
+ case GT_START_NONGC:
+ case GT_PROF_HOOK:
+ case GT_CATCH_ARG:
+ case GT_MEMORYBARRIER:
+ case GT_ARGPLACE:
+ case GT_PINVOKE_PROLOG:
+#ifndef LEGACY_BACKEND
+ case GT_JMPTABLE:
+#endif
+ // Do nothing.
+ break;
+
+ case GT_RET_EXPR:
+
+ chars += printf("t%d", tree->gtRetExpr.gtInlineCandidate->gtTreeID);
+ break;
+
+ case GT_PHYSREG:
+
+ chars += printf("%s", getRegName(tree->gtPhysReg.gtSrcReg, varTypeIsFloating(tree)));
+ break;
+
+ case GT_LABEL:
+
+ if (tree->gtLabel.gtLabBB)
+ {
+ chars += printf("BB%02u", tree->gtLabel.gtLabBB->bbNum);
+ }
+ else
+ {
+ chars += printf("BB?");
+ }
+ break;
+
+ case GT_IL_OFFSET:
+
+ if (tree->gtStmt.gtStmtILoffsx == BAD_IL_OFFSET)
+ {
+ chars += printf("?");
+ }
+ else
+ {
+ chars += printf("0x%x", jitGetILoffs(tree->gtStmt.gtStmtILoffsx));
+ }
+ break;
+
+ case GT_CLS_VAR:
+ case GT_CLS_VAR_ADDR:
+ default:
+
+ if (tree->OperIsLeaf())
+ {
+ chars += printf("<leaf nyi: %s>", tree->OpName(tree->OperGet()));
+ }
+
+ chars += printf("t%d", tree->gtTreeID);
+ break;
+ }
+
+ if (comp->dumpIRTypes)
+ {
+ chars += cTreeTypeIR(comp, tree);
+ }
+ if (comp->dumpIRValnums)
+ {
+ chars += cValNumIR(comp, tree);
+ }
+ if (hasSsa && comp->dumpIRSsa)
+ {
+ chars += cSsaNumIR(comp, tree);
+ }
+
+ return chars;
+}
+
+/*****************************************************************************
+ *
+ * COMPlus_JitDumpIR support - dump out tree leaf node for linear IR form
+ */
+
+int dLeafIR(GenTree* tree)
+{
+ int chars = cLeafIR(JitTls::GetCompiler(), tree);
+
+ return chars;
+}
+
+/*****************************************************************************
+ *
+ * COMPlus_JitDumpIR support - dump out tree indir node for linear IR form
+ */
+
+int cIndirIR(Compiler* comp, GenTree* tree)
+{
+ assert(tree->gtOper == GT_IND);
+
+ int chars = 0;
+ GenTree* child;
+
+ chars += printf("[");
+ child = tree->GetChild(0);
+ chars += cLeafIR(comp, child);
+ chars += printf("]");
+
+ return chars;
+}
+
+/*****************************************************************************
+ *
+ * COMPlus_JitDumpIR support - dump out tree indir node for linear IR form
+ */
+
+int dIndirIR(GenTree* tree)
+{
+ int chars = cIndirIR(JitTls::GetCompiler(), tree);
+
+ return chars;
+}
+
+/*****************************************************************************
+ *
+ * COMPlus_JitDumpIR support - dump out tree operand node for linear IR form
+ */
+
+int cOperandIR(Compiler* comp, GenTree* operand)
+{
+ int chars = 0;
+
+ if (operand == nullptr)
+ {
+ chars += printf("t?");
+ return chars;
+ }
+
+ bool dumpTypes = comp->dumpIRTypes;
+ bool dumpValnums = comp->dumpIRValnums;
+ bool foldIndirs = comp->dumpIRDataflow;
+ bool foldLeafs = comp->dumpIRNoLeafs;
+ bool foldCommas = comp->dumpIRDataflow;
+ bool dumpDataflow = comp->dumpIRDataflow;
+ bool foldLists = comp->dumpIRNoLists;
+ bool dumpRegs = comp->dumpIRRegs;
+
+ genTreeOps op = operand->OperGet();
+
+ if (foldLeafs && operand->OperIsLeaf())
+ {
+ if ((op == GT_ARGPLACE) && foldLists)
+ {
+ return chars;
+ }
+ chars += cLeafIR(comp, operand);
+ }
+ else if (dumpDataflow && (operand->OperIsAssignment() || (op == GT_STORE_LCL_VAR) || (op == GT_STORE_LCL_FLD)))
+ {
+ operand = operand->GetChild(0);
+ chars += cOperandIR(comp, operand);
+ }
+ else if ((op == GT_INDEX) && foldIndirs)
+ {
+ chars += printf("[t%d]", operand->gtTreeID);
+ if (dumpTypes)
+ {
+ chars += cTreeTypeIR(comp, operand);
+ }
+ if (dumpValnums)
+ {
+ chars += cValNumIR(comp, operand);
+ }
+ }
+ else if ((op == GT_IND) && foldIndirs)
+ {
+ chars += cIndirIR(comp, operand);
+ if (dumpTypes)
+ {
+ chars += cTreeTypeIR(comp, operand);
+ }
+ if (dumpValnums)
+ {
+ chars += cValNumIR(comp, operand);
+ }
+ }
+ else if ((op == GT_COMMA) && foldCommas)
+ {
+ operand = operand->GetChild(1);
+ chars += cOperandIR(comp, operand);
+ }
+ else if ((op == GT_LIST) && foldLists)
+ {
+ GenTree* list = operand;
+ unsigned childCount = list->NumChildren();
+
+ operand = list->GetChild(0);
+ int operandChars = cOperandIR(comp, operand);
+ chars += operandChars;
+ if (childCount > 1)
+ {
+ if (operandChars > 0)
+ {
+ chars += printf(", ");
+ }
+ operand = list->GetChild(1);
+ if (operand->gtOper == GT_LIST)
+ {
+ chars += cListIR(comp, operand);
+ }
+ else
+ {
+ chars += cOperandIR(comp, operand);
+ }
+ }
+ }
+ else
+ {
+ chars += printf("t%d", operand->gtTreeID);
+ if (dumpRegs)
+ {
+ regNumber regNum = operand->GetReg();
+ if (regNum != REG_NA)
+ {
+ chars += printf("(%s)", getRegName(regNum));
+ }
+ }
+ if (dumpTypes)
+ {
+ chars += cTreeTypeIR(comp, operand);
+ }
+ if (dumpValnums)
+ {
+ chars += cValNumIR(comp, operand);
+ }
+ }
+
+ return chars;
+}
+
+/*****************************************************************************
+ *
+ * COMPlus_JitDumpIR support - dump out tree operand node for linear IR form
+ */
+
+int dOperandIR(GenTree* operand)
+{
+ int chars = cOperandIR(JitTls::GetCompiler(), operand);
+
+ return chars;
+}
+
+/*****************************************************************************
+ *
+ * COMPlus_JitDumpIR support - dump out tree list of nodes for linear IR form
+ */
+
+int cListIR(Compiler* comp, GenTree* list)
+{
+ int chars = 0;
+ int operandChars;
+
+ assert(list->gtOper == GT_LIST);
+
+ GenTree* child;
+ unsigned childCount;
+
+ childCount = list->NumChildren();
+ assert(childCount == 1 || childCount == 2);
+
+ operandChars = 0;
+ for (unsigned childIndex = 0; childIndex < childCount; childIndex++)
+ {
+ if ((childIndex > 0) && (operandChars > 0))
+ {
+ chars += printf(", ");
+ }
+
+ child = list->GetChild(childIndex);
+ operandChars = cOperandIR(comp, child);
+ chars += operandChars;
+ }
+
+ return chars;
+}
+
+/*****************************************************************************
+ *
+ * COMPlus_JitDumpIR support - dump out tree list of nodes for linear IR form
+ */
+
+int dListIR(GenTree* list)
+{
+ int chars = cListIR(JitTls::GetCompiler(), list);
+
+ return chars;
+}
+
+/*****************************************************************************
+ *
+ * COMPlus_JitDumpIR support - dump out tree dependencies based on comma nodes for linear IR form
+ */
+
+int cDependsIR(Compiler* comp, GenTree* comma, bool* first)
+{
+ int chars = 0;
+
+ assert(comma->gtOper == GT_COMMA);
+
+ GenTree* child;
+
+ child = comma->GetChild(0);
+ if (child->gtOper == GT_COMMA)
+ {
+ chars += cDependsIR(comp, child, first);
+ }
+ else
+ {
+ if (!(*first))
+ {
+ chars += printf(", ");
+ }
+ chars += printf("t%d", child->gtTreeID);
+ *first = false;
+ }
+
+ child = comma->GetChild(1);
+ if (child->gtOper == GT_COMMA)
+ {
+ chars += cDependsIR(comp, child, first);
+ }
+
+ return chars;
+}
+
+/*****************************************************************************
+ *
+ * COMPlus_JitDumpIR support - dump out tree dependencies based on comma nodes for linear IR form
+ */
+
+int dDependsIR(GenTree* comma)
+{
+ int chars = 0;
+ bool first = TRUE;
+
+ chars = cDependsIR(JitTls::GetCompiler(), comma, &first);
+
+ return chars;
+}
+
+/*****************************************************************************
+ *
+ * COMPlus_JitDumpIR support - dump out tree node in linear IR form
+ */
+
+void cNodeIR(Compiler* comp, GenTree* tree)
+{
+ bool foldLeafs = comp->dumpIRNoLeafs;
+ bool foldIndirs = comp->dumpIRDataflow;
+ bool foldLists = comp->dumpIRNoLists;
+ bool dataflowView = comp->dumpIRDataflow;
+ bool dumpTypes = comp->dumpIRTypes;
+ bool dumpValnums = comp->dumpIRValnums;
+ bool noStmts = comp->dumpIRNoStmts;
+ genTreeOps op = tree->OperGet();
+ unsigned childCount = tree->NumChildren();
+ GenTree* child;
+
+ // What are we skipping?
+
+ if (tree->OperIsLeaf())
+ {
+ if (foldLeafs)
+ {
+ return;
+ }
+ }
+ else if (op == GT_IND)
+ {
+ if (foldIndirs)
+ {
+ return;
+ }
+ }
+ else if (op == GT_LIST)
+ {
+ if (foldLists)
+ {
+ return;
+ }
+ }
+ else if (op == GT_STMT)
+ {
+ if (noStmts)
+ {
+ if (dataflowView)
+ {
+ child = tree->GetChild(0);
+ if (child->gtOper != GT_COMMA)
+ {
+ return;
+ }
+ }
+ else
+ {
+ return;
+ }
+ }
+ }
+ else if (op == GT_COMMA)
+ {
+ if (dataflowView)
+ {
+ return;
+ }
+ }
+
+ bool nodeIsValue = tree->IsValue();
+
+ // Dump tree id or dataflow destination.
+
+ int chars = 0;
+
+ // if (comp->compRationalIRForm)
+ // {
+ // chars += printf("R");
+ // }
+
+ chars += printf(" ");
+ if (dataflowView && tree->OperIsAssignment())
+ {
+ child = tree->GetChild(0);
+ chars += cOperandIR(comp, child);
+ }
+ else if (dataflowView && ((op == GT_STORE_LCL_VAR) || (op == GT_STORE_LCL_FLD)))
+ {
+ chars += cLeafIR(comp, tree);
+ }
+ else if (dataflowView && (op == GT_STOREIND))
+ {
+ child = tree->GetChild(0);
+ chars += printf("[");
+ chars += cOperandIR(comp, child);
+ chars += printf("]");
+ if (dumpTypes)
+ {
+ chars += cTreeTypeIR(comp, tree);
+ }
+ if (dumpValnums)
+ {
+ chars += cValNumIR(comp, tree);
+ }
+ }
+ else if (nodeIsValue)
+ {
+ chars += printf("t%d", tree->gtTreeID);
+ if (comp->dumpIRRegs)
+ {
+ regNumber regNum = tree->GetReg();
+ if (regNum != REG_NA)
+ {
+ chars += printf("(%s)", getRegName(regNum));
+ }
+ }
+ if (dumpTypes)
+ {
+ chars += cTreeTypeIR(comp, tree);
+ }
+ if (dumpValnums)
+ {
+ chars += cValNumIR(comp, tree);
+ }
+ }
+
+ // Dump opcode and tree ID if need in dataflow view.
+
+ chars += dTabStopIR(chars, COLUMN_OPCODE);
+ const char* opName = tree->OpName(op);
+ chars += printf(" %c %s", nodeIsValue ? '=' : ' ', opName);
+
+ if (dataflowView)
+ {
+ if (tree->OperIsAssignment() || (op == GT_STORE_LCL_VAR) || (op == GT_STORE_LCL_FLD) || (op == GT_STOREIND))
+ {
+ chars += printf("(t%d)", tree->gtTreeID);
+ }
+ }
+
+ // Dump modifiers for opcodes to help with readability
+
+ if (op == GT_CALL)
+ {
+ GenTreeCall* call = tree->AsCall();
+
+ if (call->gtCallType == CT_USER_FUNC)
+ {
+ if (call->IsVirtualStub())
+ {
+ chars += printf(":VS");
+ }
+ else if (call->IsVirtualVtable())
+ {
+ chars += printf(":VT");
+ }
+ else if (call->IsVirtual())
+ {
+ chars += printf(":V");
+ }
+ }
+ else if (call->gtCallType == CT_HELPER)
+ {
+ chars += printf(":H");
+ }
+ else if (call->gtCallType == CT_INDIRECT)
+ {
+ chars += printf(":I");
+ }
+ else if (call->IsUnmanaged())
+ {
+ chars += printf(":U");
+ }
+ else
+ {
+ if (call->IsVirtualStub())
+ {
+ chars += printf(":XVS");
+ }
+ else if (call->IsVirtualVtable())
+ {
+ chars += printf(":XVT");
+ }
+ else
+ {
+ chars += printf(":?");
+ }
+ }
+
+ if (call->IsUnmanaged())
+ {
+ if (call->gtCallMoreFlags & GTF_CALL_M_UNMGD_THISCALL)
+ {
+ chars += printf(":T");
+ }
+ }
+
+ if (tree->gtFlags & GTF_CALL_NULLCHECK)
+ {
+ chars += printf(":N");
+ }
+ }
+ else if (op == GT_INTRINSIC)
+ {
+ CorInfoIntrinsics intrin = tree->gtIntrinsic.gtIntrinsicId;
+
+ chars += printf(":");
+ switch (intrin)
+ {
+ case CORINFO_INTRINSIC_Sin:
+ chars += printf("Sin");
+ break;
+ case CORINFO_INTRINSIC_Cos:
+ chars += printf("Cos");
+ break;
+ case CORINFO_INTRINSIC_Sqrt:
+ chars += printf("Sqrt");
+ break;
+ case CORINFO_INTRINSIC_Cosh:
+ chars += printf("Cosh");
+ break;
+ case CORINFO_INTRINSIC_Sinh:
+ chars += printf("Sinh");
+ break;
+ case CORINFO_INTRINSIC_Tan:
+ chars += printf("Tan");
+ break;
+ case CORINFO_INTRINSIC_Tanh:
+ chars += printf("Tanh");
+ break;
+ case CORINFO_INTRINSIC_Asin:
+ chars += printf("Asin");
+ break;
+ case CORINFO_INTRINSIC_Acos:
+ chars += printf("Acos");
+ break;
+ case CORINFO_INTRINSIC_Atan:
+ chars += printf("Atan");
+ break;
+ case CORINFO_INTRINSIC_Atan2:
+ chars += printf("Atan2");
+ break;
+ case CORINFO_INTRINSIC_Log10:
+ chars += printf("Log10");
+ break;
+ case CORINFO_INTRINSIC_Pow:
+ chars += printf("Pow");
+ break;
+ case CORINFO_INTRINSIC_Exp:
+ chars += printf("Exp");
+ break;
+ case CORINFO_INTRINSIC_Ceiling:
+ chars += printf("Ceiling");
+ break;
+ case CORINFO_INTRINSIC_Floor:
+ chars += printf("Floor");
+ break;
+ default:
+ chars += printf("unknown(%d)", intrin);
+ break;
+ }
+ }
+
+ // Dump operands.
+
+ chars += dTabStopIR(chars, COLUMN_OPERANDS);
+
+ // Dump operator specific fields as operands
+
+ switch (op)
+ {
+ default:
+ break;
+ case GT_FIELD:
+
+ {
+ const char* className = nullptr;
+ const char* fieldName = comp->eeGetFieldName(tree->gtField.gtFldHnd, &className);
+
+ chars += printf(" %s.%s", className, fieldName);
+ }
+ break;
+
+ case GT_CALL:
+
+ if (tree->gtCall.gtCallType != CT_INDIRECT)
+ {
+ const char* methodName;
+ const char* className;
+
+ methodName = comp->eeGetMethodName(tree->gtCall.gtCallMethHnd, &className);
+
+ chars += printf(" %s.%s", className, methodName);
+ }
+ break;
+
+ case GT_STORE_LCL_VAR:
+ case GT_STORE_LCL_FLD:
+
+ if (!dataflowView)
+ {
+ chars += printf(" ");
+ chars += cLeafIR(comp, tree);
+ }
+ break;
+
+ case GT_STORE_CLS_VAR:
+
+ chars += printf(" ???");
+ break;
+
+ case GT_LEA:
+
+ GenTreeAddrMode* lea = tree->AsAddrMode();
+ GenTree* base = lea->Base();
+ GenTree* index = lea->Index();
+ unsigned scale = lea->gtScale;
+ unsigned offset = lea->gtOffset;
+
+ chars += printf(" [");
+ if (base != nullptr)
+ {
+ chars += cOperandIR(comp, base);
+ }
+ if (index != nullptr)
+ {
+ if (base != nullptr)
+ {
+ chars += printf("+");
+ }
+ chars += cOperandIR(comp, index);
+ if (scale > 1)
+ {
+ chars += printf("*%u", scale);
+ }
+ }
+ if ((offset != 0) || ((base == nullptr) && (index == nullptr)))
+ {
+ if ((base != nullptr) || (index != nullptr))
+ {
+ chars += printf("+");
+ }
+ chars += printf("%u", offset);
+ }
+ chars += printf("]");
+ break;
+ }
+
+ // Dump operands.
+
+ if (tree->OperIsLeaf())
+ {
+ chars += printf(" ");
+ chars += cLeafIR(comp, tree);
+ }
+ else if (op == GT_LEA)
+ {
+ // Already dumped it above.
+ }
+ else if (op == GT_PHI)
+ {
+ if (tree->gtOp.gtOp1 != nullptr)
+ {
+ bool first = true;
+ for (GenTreeArgList* args = tree->gtOp.gtOp1->AsArgList(); args != nullptr; args = args->Rest())
+ {
+ child = args->Current();
+ if (!first)
+ {
+ chars += printf(",");
+ }
+ first = false;
+ chars += printf(" ");
+ chars += cOperandIR(comp, child);
+ }
+ }
+ }
+ else
+ {
+ bool hasComma = false;
+ bool first = true;
+ int operandChars = 0;
+ for (unsigned childIndex = 0; childIndex < childCount; childIndex++)
+ {
+ child = tree->GetChild(childIndex);
+ if (child == nullptr)
+ {
+ continue;
+ }
+
+ if (child->gtOper == GT_COMMA)
+ {
+ hasComma = true;
+ }
+
+ if (dataflowView && (childIndex == 0))
+ {
+ if ((op == GT_ASG) || (op == GT_STOREIND))
+ {
+ continue;
+ }
+ }
+
+ if (!first)
+ {
+ chars += printf(",");
+ }
+
+ bool isList = (child->gtOper == GT_LIST);
+ if (!isList || !foldLists)
+ {
+ if (foldLeafs && (child->gtOper == GT_ARGPLACE))
+ {
+ continue;
+ }
+ chars += printf(" ");
+ operandChars = cOperandIR(comp, child);
+ chars += operandChars;
+ if (operandChars > 0)
+ {
+ first = false;
+ }
+ }
+ else
+ {
+ assert(isList);
+ chars += printf(" ");
+ operandChars = cOperandIR(comp, child);
+ chars += operandChars;
+ if (operandChars > 0)
+ {
+ first = false;
+ }
+ }
+ }
+
+ if (dataflowView && hasComma)
+ {
+ chars += printf(", DEPS(");
+ first = true;
+ for (unsigned childIndex = 0; childIndex < childCount; childIndex++)
+ {
+ child = tree->GetChild(childIndex);
+ if (child->gtOper == GT_COMMA)
+ {
+ chars += cDependsIR(comp, child, &first);
+ }
+ }
+ chars += printf(")");
+ }
+ }
+
+ // Dump kinds, flags, costs
+
+ if (comp->dumpIRKinds || comp->dumpIRFlags || comp->dumpIRCosts)
+ {
+ chars += dTabStopIR(chars, COLUMN_KINDS);
+ chars += printf(";");
+ if (comp->dumpIRKinds)
+ {
+ chars += printf(" ");
+ chars += cTreeKindsIR(comp, tree);
+ }
+ if (comp->dumpIRFlags && (tree->gtFlags != 0))
+ {
+ if (comp->dumpIRKinds)
+ {
+ chars += dTabStopIR(chars, COLUMN_FLAGS);
+ }
+ else
+ {
+ chars += printf(" ");
+ }
+ chars += cTreeFlagsIR(comp, tree);
+ }
+ if (comp->dumpIRCosts && (tree->gtCostsInitialized))
+ {
+ chars += printf(" CostEx=%d, CostSz=%d", tree->GetCostEx(), tree->GetCostSz());
+ }
+ }
+
+ printf("\n");
+}
+
+/*****************************************************************************
+ *
+ * COMPlus_JitDumpIR support - dump out tree in linear IR form
+ */
+
+void cTreeIR(Compiler* comp, GenTree* tree)
+{
+ bool foldLeafs = comp->dumpIRNoLeafs;
+ bool foldIndirs = comp->dumpIRDataflow;
+ bool foldLists = comp->dumpIRNoLists;
+ bool dataflowView = comp->dumpIRDataflow;
+ bool dumpTypes = comp->dumpIRTypes;
+ bool dumpValnums = comp->dumpIRValnums;
+ bool noStmts = comp->dumpIRNoStmts;
+ genTreeOps op = tree->OperGet();
+ unsigned childCount = tree->NumChildren();
+ GenTree* child;
+
+ // Recurse and dump trees that this node depends on.
+
+ if (tree->OperIsLeaf())
+ {
+ }
+ else if (tree->OperIsBinary() && tree->IsReverseOp())
+ {
+ child = tree->GetChild(1);
+ cTreeIR(comp, child);
+ child = tree->GetChild(0);
+ cTreeIR(comp, child);
+ }
+ else if (op == GT_PHI)
+ {
+ // Don't recurse.
+ }
+ else
+ {
+ assert(!tree->IsReverseOp());
+ for (unsigned childIndex = 0; childIndex < childCount; childIndex++)
+ {
+ child = tree->GetChild(childIndex);
+ if (child != nullptr)
+ {
+ cTreeIR(comp, child);
+ }
+ }
+ }
+
+ cNodeIR(comp, tree);
+}
+
+/*****************************************************************************
+ *
+ * COMPlus_JitDumpIR support - dump out tree in linear IR form
+ */
+
+void dTreeIR(GenTree* tree)
+{
+ cTreeIR(JitTls::GetCompiler(), tree);
+}
+
+#endif // DEBUG
+
+#if VARSET_COUNTOPS
+// static
+BitSetSupport::BitSetOpCounter Compiler::m_varsetOpCounter("VarSetOpCounts.log");
+#endif
+#if ALLVARSET_COUNTOPS
+// static
+BitSetSupport::BitSetOpCounter Compiler::m_allvarsetOpCounter("AllVarSetOpCounts.log");
+#endif
+
+// static
+HelperCallProperties Compiler::s_helperCallProperties;
+
+/*****************************************************************************/
+/*****************************************************************************/
diff --git a/src/jit/compiler.h b/src/jit/compiler.h
new file mode 100644
index 0000000000..05047c5ecb
--- /dev/null
+++ b/src/jit/compiler.h
@@ -0,0 +1,9301 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX Compiler XX
+XX XX
+XX Represents the method data we are currently JIT-compiling. XX
+XX An instance of this class is created for every method we JIT. XX
+XX This contains all the info needed for the method. So allocating a XX
+XX a new instance per method makes it thread-safe. XX
+XX It should be used to do all the memory management for the compiler run. XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+/*****************************************************************************/
+#ifndef _COMPILER_H_
+#define _COMPILER_H_
+/*****************************************************************************/
+
+#include "jit.h"
+#include "opcode.h"
+#include "varset.h"
+#include "gentree.h"
+#include "lir.h"
+#include "block.h"
+#include "inline.h"
+#include "jiteh.h"
+#include "instr.h"
+#include "regalloc.h"
+#include "sm.h"
+#include "simplerhash.h"
+#include "cycletimer.h"
+#include "blockset.h"
+#include "jitstd.h"
+#include "arraystack.h"
+#include "hashbv.h"
+#include "fp.h"
+#include "expandarray.h"
+#include "tinyarray.h"
+#include "valuenum.h"
+#include "reglist.h"
+#include "jittelemetry.h"
+#ifdef LATE_DISASM
+#include "disasm.h"
+#endif
+
+#include "codegeninterface.h"
+#include "regset.h"
+#include "jitgcinfo.h"
+
+#if DUMP_GC_TABLES && defined(JIT32_GCENCODER)
+#include "gcdump.h"
+#endif
+
+#include "emit.h"
+
+#include "simd.h"
+
+// This is only used locally in the JIT to indicate that
+// a verification block should be inserted
+#define SEH_VERIFICATION_EXCEPTION 0xe0564552 // VER
+
+/*****************************************************************************
+ * Forward declarations
+ */
+
+struct InfoHdr; // defined in GCInfo.h
+struct escapeMapping_t; // defined in flowgraph.cpp
+class emitter; // defined in emit.h
+struct ShadowParamVarInfo; // defined in GSChecks.cpp
+struct InitVarDscInfo; // defined in register_arg_convention.h
+class FgStack; // defined in flowgraph.cpp
+#if FEATURE_STACK_FP_X87
+struct FlatFPStateX87; // defined in fp.h
+#endif
+#if FEATURE_ANYCSE
+class CSE_DataFlow; // defined in OptCSE.cpp
+#endif
+#ifdef DEBUG
+struct IndentStack;
+#endif
+
+// The following are defined in this file, Compiler.h
+
+class Compiler;
+
+/*****************************************************************************
+ * Unwind info
+ */
+
+#include "unwind.h"
+
+/*****************************************************************************/
+
+//
+// Declare global operator new overloads that use the Compiler::compGetMem() function for allocation.
+//
+
+// Or the more-general IAllocator interface.
+void* __cdecl operator new(size_t n, IAllocator* alloc);
+void* __cdecl operator new[](size_t n, IAllocator* alloc);
+
+// I wanted to make the second argument optional, with default = CMK_Unknown, but that
+// caused these to be ambiguous with the global placement new operators.
+void* __cdecl operator new(size_t n, Compiler* context, CompMemKind cmk);
+void* __cdecl operator new[](size_t n, Compiler* context, CompMemKind cmk);
+void* __cdecl operator new(size_t n, void* p, const jitstd::placement_t& syntax_difference);
+
+// Requires the definitions of "operator new" so including "LoopCloning.h" after the definitions.
+#include "loopcloning.h"
+
+/*****************************************************************************/
+
+/* This is included here and not earlier as it needs the definition of "CSE"
+ * which is defined in the section above */
+
+/*****************************************************************************/
+
+unsigned genLog2(unsigned value);
+unsigned genLog2(unsigned __int64 value);
+
+var_types genActualType(var_types type);
+var_types genUnsignedType(var_types type);
+var_types genSignedType(var_types type);
+
+unsigned ReinterpretHexAsDecimal(unsigned);
+
+/*****************************************************************************/
+
+#ifdef FEATURE_SIMD
+#ifdef FEATURE_AVX_SUPPORT
+const unsigned TEMP_MAX_SIZE = YMM_REGSIZE_BYTES;
+#else // !FEATURE_AVX_SUPPORT
+const unsigned TEMP_MAX_SIZE = XMM_REGSIZE_BYTES;
+#endif // !FEATURE_AVX_SUPPORT
+#else // !FEATURE_SIMD
+const unsigned TEMP_MAX_SIZE = sizeof(double);
+#endif // !FEATURE_SIMD
+const unsigned TEMP_SLOT_COUNT = (TEMP_MAX_SIZE / sizeof(int));
+
+const unsigned FLG_CCTOR = (CORINFO_FLG_CONSTRUCTOR | CORINFO_FLG_STATIC);
+
+#ifdef DEBUG
+const int BAD_STK_OFFS = 0xBAADF00D; // for LclVarDsc::lvStkOffs
+#endif
+
+// The following holds the Local var info (scope information)
+typedef const char* VarName; // Actual ASCII string
+struct VarScopeDsc
+{
+ IL_OFFSET vsdLifeBeg; // instr offset of beg of life
+ IL_OFFSET vsdLifeEnd; // instr offset of end of life
+ unsigned vsdVarNum; // (remapped) LclVarDsc number
+
+#ifdef DEBUG
+ VarName vsdName; // name of the var
+#endif
+
+ unsigned vsdLVnum; // 'which' in eeGetLVinfo().
+ // Also, it is the index of this entry in the info.compVarScopes array,
+ // which is useful since the array is also accessed via the
+ // compEnterScopeList and compExitScopeList sorted arrays.
+};
+
+/*****************************************************************************
+ *
+ * The following holds the local variable counts and the descriptor table.
+ */
+
+// This is the location of a definition.
+struct DefLoc
+{
+ BasicBlock* m_blk;
+ GenTreePtr m_tree;
+
+ DefLoc() : m_blk(nullptr), m_tree(nullptr)
+ {
+ }
+};
+
+// This class encapsulates all info about a local variable that may vary for different SSA names
+// in the family.
+class LclSsaVarDsc
+{
+public:
+ ValueNumPair m_vnPair;
+ DefLoc m_defLoc;
+
+ LclSsaVarDsc()
+ {
+ }
+};
+
+typedef ExpandArray<LclSsaVarDsc> PerSsaArray;
+
+class LclVarDsc
+{
+public:
+ // The constructor. Most things can just be zero'ed.
+ LclVarDsc(Compiler* comp);
+
+ // note this only packs because var_types is a typedef of unsigned char
+ var_types lvType : 5; // TYP_INT/LONG/FLOAT/DOUBLE/REF
+
+ unsigned char lvIsParam : 1; // is this a parameter?
+ unsigned char lvIsRegArg : 1; // is this a register argument?
+ unsigned char lvFramePointerBased : 1; // 0 = off of REG_SPBASE (e.g., ESP), 1 = off of REG_FPBASE (e.g., EBP)
+
+ unsigned char lvStructGcCount : 3; // if struct, how many GC pointer (stop counting at 7). The only use of values >1
+ // is to help determine whether to use block init in the prolog.
+ unsigned char lvOnFrame : 1; // (part of) the variable lives on the frame
+ unsigned char lvDependReg : 1; // did the predictor depend upon this being enregistered
+ unsigned char lvRegister : 1; // assigned to live in a register? For RyuJIT backend, this is only set if the
+ // variable is in the same register for the entire function.
+ unsigned char lvTracked : 1; // is this a tracked variable?
+ bool lvTrackedNonStruct()
+ {
+ return lvTracked && lvType != TYP_STRUCT;
+ }
+ unsigned char lvPinned : 1; // is this a pinned variable?
+
+ unsigned char lvMustInit : 1; // must be initialized
+ unsigned char lvAddrExposed : 1; // The address of this variable is "exposed" -- passed as an argument, stored in a
+ // global location, etc.
+ // We cannot reason reliably about the value of the variable.
+ unsigned char lvDoNotEnregister : 1; // Do not enregister this variable.
+ unsigned char lvFieldAccessed : 1; // The var is a struct local, and a field of the variable is accessed. Affects
+ // struct promotion.
+
+#ifdef DEBUG
+ // These further document the reasons for setting "lvDoNotEnregister". (Note that "lvAddrExposed" is one of the
+ // reasons;
+ // also, lvType == TYP_STRUCT prevents enregistration. At least one of the reasons should be true.
+ unsigned char lvVMNeedsStackAddr : 1; // The VM may have access to a stack-relative address of the variable, and
+ // read/write its value.
+ unsigned char lvLiveInOutOfHndlr : 1; // The variable was live in or out of an exception handler, and this required
+ // the variable to be
+ // in the stack (at least at those boundaries.)
+ unsigned char lvLclFieldExpr : 1; // The variable is not a struct, but was accessed like one (e.g., reading a
+ // particular byte from an int).
+ unsigned char lvLclBlockOpAddr : 1; // The variable was written to via a block operation that took its address.
+ unsigned char lvLiveAcrossUCall : 1; // The variable is live across an unmanaged call.
+#endif
+ unsigned char lvIsCSE : 1; // Indicates if this LclVar is a CSE variable.
+ unsigned char lvRefAssign : 1; // involved in pointer assignment
+ unsigned char lvHasLdAddrOp : 1; // has ldloca or ldarga opcode on this local.
+ unsigned char lvStackByref : 1; // This is a compiler temporary of TYP_BYREF that is known to point into our local
+ // stack frame.
+
+ unsigned char lvArgWrite : 1; // variable is a parameter and STARG was used on it
+ unsigned char lvIsTemp : 1; // Short-lifetime compiler temp
+#if OPT_BOOL_OPS
+ unsigned char lvIsBoolean : 1; // set if variable is boolean
+#endif
+ unsigned char lvRngOptDone : 1; // considered for range check opt?
+ unsigned char lvLoopInc : 1; // incremented in the loop?
+ unsigned char lvLoopAsg : 1; // reassigned in the loop (other than a monotonic inc/dec for the index var)?
+ unsigned char lvArrIndx : 1; // used as an array index?
+ unsigned char lvArrIndxOff : 1; // used as an array index with an offset?
+ unsigned char lvArrIndxDom : 1; // index dominates loop exit
+#if ASSERTION_PROP
+ unsigned char lvSingleDef : 1; // variable has a single def
+ unsigned char lvDisqualify : 1; // variable is no longer OK for add copy optimization
+ unsigned char lvVolatileHint : 1; // hint for AssertionProp
+#endif
+#if FANCY_ARRAY_OPT
+ unsigned char lvAssignOne : 1; // assigned at least once?
+ unsigned char lvAssignTwo : 1; // assigned at least twice?
+#endif
+
+ unsigned char lvSpilled : 1; // enregistered variable was spilled
+#ifndef _TARGET_64BIT_
+ unsigned char lvStructDoubleAlign : 1; // Must we double align this struct?
+#endif // !_TARGET_64BIT_
+#ifdef _TARGET_64BIT_
+ unsigned char lvQuirkToLong : 1; // Quirk to allocate this LclVar as a 64-bit long
+#endif
+#ifdef DEBUG
+ unsigned char lvKeepType : 1; // Don't change the type of this variable
+ unsigned char lvNoLclFldStress : 1; // Can't apply local field stress on this one
+#endif
+ unsigned char lvIsPtr : 1; // Might this be used in an address computation? (used by buffer overflow security
+ // checks)
+ unsigned char lvIsUnsafeBuffer : 1; // Does this contain an unsafe buffer requiring buffer overflow security checks?
+ unsigned char lvPromoted : 1; // True when this local is a promoted struct, a normed struct, or a "split" long on a
+ // 32-bit target.
+ unsigned char lvIsStructField : 1; // Is this local var a field of a promoted struct local?
+ unsigned char lvContainsFloatingFields : 1; // Does this struct contains floating point fields?
+ unsigned char lvOverlappingFields : 1; // True when we have a struct with possibly overlapping fields
+ unsigned char lvContainsHoles : 1; // True when we have a promoted struct that contains holes
+ unsigned char lvCustomLayout : 1; // True when this struct has "CustomLayout"
+
+ unsigned char lvIsMultiRegArg : 1; // true if this is a multireg LclVar struct used in an argument context
+ unsigned char lvIsMultiRegRet : 1; // true if this is a multireg LclVar struct assigned from a multireg call
+
+#ifdef FEATURE_HFA
+ unsigned char _lvIsHfa : 1; // Is this a struct variable who's class handle is an HFA type
+ unsigned char _lvIsHfaRegArg : 1; // Is this a HFA argument variable? // TODO-CLEANUP: Remove this and replace
+ // with (lvIsRegArg && lvIsHfa())
+ unsigned char _lvHfaTypeIsFloat : 1; // Is the HFA type float or double?
+#endif // FEATURE_HFA
+
+#ifdef DEBUG
+ // TODO-Cleanup: See the note on lvSize() - this flag is only in use by asserts that are checking for struct
+ // types, and is needed because of cases where TYP_STRUCT is bashed to an integral type.
+ // Consider cleaning this up so this workaround is not required.
+ unsigned char lvUnusedStruct : 1; // All references to this promoted struct are through its field locals.
+ // I.e. there is no longer any reference to the struct directly.
+ // In this case we can simply remove this struct local.
+#endif
+#ifndef LEGACY_BACKEND
+ unsigned char lvLRACandidate : 1; // Tracked for linear scan register allocation purposes
+#endif // !LEGACY_BACKEND
+
+#ifdef FEATURE_SIMD
+ // Note that both SIMD vector args and locals are marked as lvSIMDType = true, but the
+ // type of an arg node is TYP_BYREF and a local node is TYP_SIMD*.
+ unsigned char lvSIMDType : 1; // This is a SIMD struct
+ unsigned char lvUsedInSIMDIntrinsic : 1; // This tells lclvar is used for simd intrinsic
+#endif // FEATURE_SIMD
+ unsigned char lvRegStruct : 1; // This is a reg-sized non-field-addressed struct.
+
+ union {
+ unsigned lvFieldLclStart; // The index of the local var representing the first field in the promoted struct
+ // local.
+ unsigned lvParentLcl; // The index of the local var representing the parent (i.e. the promoted struct local).
+ // Valid on promoted struct local fields.
+#ifdef FEATURE_SIMD
+ var_types lvBaseType; // The base type of a SIMD local var. Valid on TYP_SIMD locals.
+#endif // FEATURE_SIMD
+ };
+
+ unsigned char lvFieldCnt; // Number of fields in the promoted VarDsc.
+ unsigned char lvFldOffset;
+ unsigned char lvFldOrdinal;
+
+#if FEATURE_MULTIREG_ARGS
+ regNumber lvRegNumForSlot(unsigned slotNum)
+ {
+ if (slotNum == 0)
+ {
+ return lvArgReg;
+ }
+ else if (slotNum == 1)
+ {
+ return lvOtherArgReg;
+ }
+ else
+ {
+ assert(false && "Invalid slotNum!");
+ }
+
+ unreached();
+ }
+#endif // FEATURE_MULTIREG_ARGS
+
+ bool lvIsHfa() const
+ {
+#ifdef FEATURE_HFA
+ return _lvIsHfa;
+#else
+ return false;
+#endif
+ }
+
+ void lvSetIsHfa()
+ {
+#ifdef FEATURE_HFA
+ _lvIsHfa = true;
+#endif
+ }
+
+ bool lvIsHfaRegArg() const
+ {
+#ifdef FEATURE_HFA
+ return _lvIsHfaRegArg;
+#else
+ return false;
+#endif
+ }
+
+ void lvSetIsHfaRegArg()
+ {
+#ifdef FEATURE_HFA
+ _lvIsHfaRegArg = true;
+#endif
+ }
+
+ bool lvHfaTypeIsFloat() const
+ {
+#ifdef FEATURE_HFA
+ return _lvHfaTypeIsFloat;
+#else
+ return false;
+#endif
+ }
+
+ void lvSetHfaTypeIsFloat(bool value)
+ {
+#ifdef FEATURE_HFA
+ _lvHfaTypeIsFloat = value;
+#endif
+ }
+
+ // on Arm64 - Returns 1-4 indicating the number of register slots used by the HFA
+ // on Arm32 - Returns the total number of single FP register slots used by the HFA, max is 8
+ //
+ unsigned lvHfaSlots() const
+ {
+ assert(lvIsHfa());
+ assert(lvType == TYP_STRUCT);
+#ifdef _TARGET_ARM_
+ return lvExactSize / sizeof(float);
+#else // _TARGET_ARM64_
+ if (lvHfaTypeIsFloat())
+ {
+ return lvExactSize / sizeof(float);
+ }
+ else
+ {
+ return lvExactSize / sizeof(double);
+ }
+#endif // _TARGET_ARM64_
+ }
+
+ // lvIsMultiRegArgOrRet()
+ // returns true if this is a multireg LclVar struct used in an argument context
+ // or if this is a multireg LclVar struct assigned from a multireg call
+ bool lvIsMultiRegArgOrRet()
+ {
+ return lvIsMultiRegArg || lvIsMultiRegRet;
+ }
+
+private:
+ regNumberSmall _lvRegNum; // Used to store the register this variable is in (or, the low register of a
+ // register pair). For LEGACY_BACKEND, this is only set if lvRegister is
+ // non-zero. For non-LEGACY_BACKEND, it is set during codegen any time the
+ // variable is enregistered (in non-LEGACY_BACKEND, lvRegister is only set
+ // to non-zero if the variable gets the same register assignment for its entire
+ // lifetime).
+#if !defined(_TARGET_64BIT_)
+ regNumberSmall _lvOtherReg; // Used for "upper half" of long var.
+#endif // !defined(_TARGET_64BIT_)
+
+ regNumberSmall _lvArgReg; // The register in which this argument is passed.
+
+#if FEATURE_MULTIREG_ARGS
+ regNumberSmall _lvOtherArgReg; // Used for the second part of the struct passed in a register.
+ // Note this is defined but not used by ARM32
+#endif // FEATURE_MULTIREG_ARGS
+
+#ifndef LEGACY_BACKEND
+ union {
+ regNumberSmall _lvArgInitReg; // the register into which the argument is moved at entry
+ regPairNoSmall _lvArgInitRegPair; // the register pair into which the argument is moved at entry
+ };
+#endif // !LEGACY_BACKEND
+
+public:
+ // The register number is stored in a small format (8 bits), but the getters return and the setters take
+ // a full-size (unsigned) format, to localize the casts here.
+
+ /////////////////////
+
+ __declspec(property(get = GetRegNum, put = SetRegNum)) regNumber lvRegNum;
+
+ regNumber GetRegNum() const
+ {
+ return (regNumber)_lvRegNum;
+ }
+
+ void SetRegNum(regNumber reg)
+ {
+ _lvRegNum = (regNumberSmall)reg;
+ assert(_lvRegNum == reg);
+ }
+
+/////////////////////
+
+#if defined(_TARGET_64BIT_)
+ __declspec(property(get = GetOtherReg, put = SetOtherReg)) regNumber lvOtherReg;
+
+ regNumber GetOtherReg() const
+ {
+ assert(!"shouldn't get here"); // can't use "unreached();" because it's NORETURN, which causes C4072
+ // "unreachable code" warnings
+ return REG_NA;
+ }
+
+ void SetOtherReg(regNumber reg)
+ {
+ assert(!"shouldn't get here"); // can't use "unreached();" because it's NORETURN, which causes C4072
+ // "unreachable code" warnings
+ }
+#else // !_TARGET_64BIT_
+ __declspec(property(get = GetOtherReg, put = SetOtherReg)) regNumber lvOtherReg;
+
+ regNumber GetOtherReg() const
+ {
+ return (regNumber)_lvOtherReg;
+ }
+
+ void SetOtherReg(regNumber reg)
+ {
+ _lvOtherReg = (regNumberSmall)reg;
+ assert(_lvOtherReg == reg);
+ }
+#endif // !_TARGET_64BIT_
+
+ /////////////////////
+
+ __declspec(property(get = GetArgReg, put = SetArgReg)) regNumber lvArgReg;
+
+ regNumber GetArgReg() const
+ {
+ return (regNumber)_lvArgReg;
+ }
+
+ void SetArgReg(regNumber reg)
+ {
+ _lvArgReg = (regNumberSmall)reg;
+ assert(_lvArgReg == reg);
+ }
+
+#if FEATURE_MULTIREG_ARGS
+ __declspec(property(get = GetOtherArgReg, put = SetOtherArgReg)) regNumber lvOtherArgReg;
+
+ regNumber GetOtherArgReg() const
+ {
+ return (regNumber)_lvOtherArgReg;
+ }
+
+ void SetOtherArgReg(regNumber reg)
+ {
+ _lvOtherArgReg = (regNumberSmall)reg;
+ assert(_lvOtherArgReg == reg);
+ }
+#endif // FEATURE_MULTIREG_ARGS
+
+#ifdef FEATURE_SIMD
+ // Is this is a SIMD struct?
+ bool lvIsSIMDType() const
+ {
+ return lvSIMDType;
+ }
+
+ // Is this is a SIMD struct which is used for SIMD intrinsic?
+ bool lvIsUsedInSIMDIntrinsic() const
+ {
+ return lvUsedInSIMDIntrinsic;
+ }
+#else
+ // If feature_simd not enabled, return false
+ bool lvIsSIMDType() const
+ {
+ return false;
+ }
+ bool lvIsUsedInSIMDIntrinsic() const
+ {
+ return false;
+ }
+#endif
+
+/////////////////////
+
+#ifndef LEGACY_BACKEND
+ __declspec(property(get = GetArgInitReg, put = SetArgInitReg)) regNumber lvArgInitReg;
+
+ regNumber GetArgInitReg() const
+ {
+ return (regNumber)_lvArgInitReg;
+ }
+
+ void SetArgInitReg(regNumber reg)
+ {
+ _lvArgInitReg = (regNumberSmall)reg;
+ assert(_lvArgInitReg == reg);
+ }
+
+ /////////////////////
+
+ __declspec(property(get = GetArgInitRegPair, put = SetArgInitRegPair)) regPairNo lvArgInitRegPair;
+
+ regPairNo GetArgInitRegPair() const
+ {
+ regPairNo regPair = (regPairNo)_lvArgInitRegPair;
+ assert(regPair >= REG_PAIR_FIRST && regPair <= REG_PAIR_LAST);
+ return regPair;
+ }
+
+ void SetArgInitRegPair(regPairNo regPair)
+ {
+ assert(regPair >= REG_PAIR_FIRST && regPair <= REG_PAIR_LAST);
+ _lvArgInitRegPair = (regPairNoSmall)regPair;
+ assert(_lvArgInitRegPair == regPair);
+ }
+
+ /////////////////////
+
+ bool lvIsRegCandidate() const
+ {
+ return lvLRACandidate != 0;
+ }
+
+ bool lvIsInReg() const
+ {
+ return lvIsRegCandidate() && (lvRegNum != REG_STK);
+ }
+
+#else // LEGACY_BACKEND
+
+ bool lvIsRegCandidate() const
+ {
+ return lvTracked != 0;
+ }
+
+ bool lvIsInReg() const
+ {
+ return lvRegister != 0;
+ }
+
+#endif // LEGACY_BACKEND
+
+ regMaskTP lvRegMask() const
+ {
+ regMaskTP regMask = RBM_NONE;
+ if (varTypeIsFloating(TypeGet()))
+ {
+ if (lvRegNum != REG_STK)
+ {
+ regMask = genRegMaskFloat(lvRegNum, TypeGet());
+ }
+ }
+ else
+ {
+ if (lvRegNum != REG_STK)
+ {
+ regMask = genRegMask(lvRegNum);
+ }
+
+ // For longs we may have two regs
+ if (isRegPairType(lvType) && lvOtherReg != REG_STK)
+ {
+ regMask |= genRegMask(lvOtherReg);
+ }
+ }
+ return regMask;
+ }
+
+ regMaskSmall lvPrefReg; // set of regs it prefers to live in
+
+ unsigned short lvVarIndex; // variable tracking index
+ unsigned short lvRefCnt; // unweighted (real) reference count
+ unsigned lvRefCntWtd; // weighted reference count
+ int lvStkOffs; // stack offset of home
+ unsigned lvExactSize; // (exact) size of the type in bytes
+
+ // Is this a promoted struct?
+ // This method returns true only for structs (including SIMD structs), not for
+ // locals that are split on a 32-bit target.
+ // It is only necessary to use this:
+ // 1) if only structs are wanted, and
+ // 2) if Lowering has already been done.
+ // Otherwise lvPromoted is valid.
+ bool lvPromotedStruct()
+ {
+#if !defined(_TARGET_64BIT_)
+ return (lvPromoted && !varTypeIsLong(lvType));
+#else // defined(_TARGET_64BIT_)
+ return lvPromoted;
+#endif // defined(_TARGET_64BIT_)
+ }
+
+ unsigned lvSize() // Size needed for storage representation. Only used for structs or TYP_BLK.
+ {
+ // TODO-Review: Sometimes we get called on ARM with HFA struct variables that have been promoted,
+ // where the struct itself is no longer used because all access is via its member fields.
+ // When that happens, the struct is marked as unused and its type has been changed to
+ // TYP_INT (to keep the GC tracking code from looking at it).
+ // See Compiler::raAssignVars() for details. For example:
+ // N002 ( 4, 3) [00EA067C] ------------- return struct $346
+ // N001 ( 3, 2) [00EA0628] ------------- lclVar struct(U) V03 loc2
+ // float V03.f1 (offs=0x00) -> V12 tmp7
+ // f8 (last use) (last use) $345
+ // Here, the "struct(U)" shows that the "V03 loc2" variable is unused. Not shown is that V03
+ // is now TYP_INT in the local variable table. It's not really unused, because it's in the tree.
+
+ assert(varTypeIsStruct(lvType) || (lvType == TYP_BLK) || (lvPromoted && lvUnusedStruct));
+ return (unsigned)(roundUp(lvExactSize, TARGET_POINTER_SIZE));
+ }
+
+#if defined(DEBUGGING_SUPPORT) || defined(DEBUG)
+ unsigned lvSlotNum; // original slot # (if remapped)
+#endif
+
+ typeInfo lvVerTypeInfo; // type info needed for verification
+
+ BYTE* lvGcLayout; // GC layout info for structs
+
+#if FANCY_ARRAY_OPT
+ GenTreePtr lvKnownDim; // array size if known
+#endif
+
+#if ASSERTION_PROP
+ BlockSet lvRefBlks; // Set of blocks that contain refs
+ GenTreePtr lvDefStmt; // Pointer to the statement with the single definition
+ void lvaDisqualifyVar(); // Call to disqualify a local variable from use in optAddCopies
+#endif
+ var_types TypeGet() const
+ {
+ return (var_types)lvType;
+ }
+ bool lvStackAligned() const
+ {
+ assert(lvIsStructField);
+ return ((lvFldOffset % sizeof(void*)) == 0);
+ }
+ bool lvNormalizeOnLoad() const
+ {
+ return varTypeIsSmall(TypeGet()) &&
+ // lvIsStructField is treated the same as the aliased local, see fgDoNormalizeOnStore.
+ (lvIsParam || lvAddrExposed || lvIsStructField);
+ }
+
+ bool lvNormalizeOnStore()
+ {
+ return varTypeIsSmall(TypeGet()) &&
+ // lvIsStructField is treated the same as the aliased local, see fgDoNormalizeOnStore.
+ !(lvIsParam || lvAddrExposed || lvIsStructField);
+ }
+
+ void lvaResetSortAgainFlag(Compiler* pComp);
+ void decRefCnts(BasicBlock::weight_t weight, Compiler* pComp, bool propagate = true);
+ void incRefCnts(BasicBlock::weight_t weight, Compiler* pComp, bool propagate = true);
+ void setPrefReg(regNumber regNum, Compiler* pComp);
+ void addPrefReg(regMaskTP regMask, Compiler* pComp);
+ bool IsFloatRegType() const
+ {
+ return isFloatRegType(lvType) || lvIsHfaRegArg();
+ }
+ var_types GetHfaType() const
+ {
+ return lvIsHfa() ? (lvHfaTypeIsFloat() ? TYP_FLOAT : TYP_DOUBLE) : TYP_UNDEF;
+ }
+ void SetHfaType(var_types type)
+ {
+ assert(varTypeIsFloating(type));
+ lvSetHfaTypeIsFloat(type == TYP_FLOAT);
+ }
+
+#ifndef LEGACY_BACKEND
+ var_types lvaArgType();
+#endif
+
+ PerSsaArray lvPerSsaData;
+
+#ifdef DEBUG
+ // Keep track of the # of SsaNames, for a bounds check.
+ unsigned lvNumSsaNames;
+#endif
+
+ // Returns the address of the per-Ssa data for the given ssaNum (which is required
+ // not to be the SsaConfig::RESERVED_SSA_NUM, which indicates that the variable is
+ // not an SSA variable).
+ LclSsaVarDsc* GetPerSsaData(unsigned ssaNum)
+ {
+ assert(ssaNum != SsaConfig::RESERVED_SSA_NUM);
+ assert(SsaConfig::RESERVED_SSA_NUM == 0);
+ unsigned zeroBased = ssaNum - SsaConfig::UNINIT_SSA_NUM;
+ assert(zeroBased < lvNumSsaNames);
+ return &lvPerSsaData.GetRef(zeroBased);
+ }
+
+#ifdef DEBUG
+public:
+ void PrintVarReg() const
+ {
+ if (isRegPairType(TypeGet()))
+ {
+ printf("%s:%s", getRegName(lvOtherReg), // hi32
+ getRegName(lvRegNum)); // lo32
+ }
+ else
+ {
+ printf("%s", getRegName(lvRegNum));
+ }
+ }
+#endif // DEBUG
+
+}; // class LclVarDsc
+
+/*
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX TempsInfo XX
+XX XX
+XX The temporary lclVars allocated by the compiler for code generation XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+/*****************************************************************************
+ *
+ * The following keeps track of temporaries allocated in the stack frame
+ * during code-generation (after register allocation). These spill-temps are
+ * only used if we run out of registers while evaluating a tree.
+ *
+ * These are different from the more common temps allocated by lvaGrabTemp().
+ */
+
+class TempDsc
+{
+public:
+ TempDsc* tdNext;
+
+private:
+ int tdOffs;
+#ifdef DEBUG
+ static const int BAD_TEMP_OFFSET = 0xDDDDDDDD; // used as a sentinel "bad value" for tdOffs in DEBUG
+#endif // DEBUG
+
+ int tdNum;
+ BYTE tdSize;
+ var_types tdType;
+
+public:
+ TempDsc(int _tdNum, unsigned _tdSize, var_types _tdType) : tdNum(_tdNum), tdSize((BYTE)_tdSize), tdType(_tdType)
+ {
+#ifdef DEBUG
+ assert(tdNum <
+ 0); // temps must have a negative number (so they have a different number from all local variables)
+ tdOffs = BAD_TEMP_OFFSET;
+#endif // DEBUG
+ if (tdNum != _tdNum)
+ {
+ IMPL_LIMITATION("too many spill temps");
+ }
+ }
+
+#ifdef DEBUG
+ bool tdLegalOffset() const
+ {
+ return tdOffs != BAD_TEMP_OFFSET;
+ }
+#endif // DEBUG
+
+ int tdTempOffs() const
+ {
+ assert(tdLegalOffset());
+ return tdOffs;
+ }
+ void tdSetTempOffs(int offs)
+ {
+ tdOffs = offs;
+ assert(tdLegalOffset());
+ }
+ void tdAdjustTempOffs(int offs)
+ {
+ tdOffs += offs;
+ assert(tdLegalOffset());
+ }
+
+ int tdTempNum() const
+ {
+ assert(tdNum < 0);
+ return tdNum;
+ }
+ unsigned tdTempSize() const
+ {
+ return tdSize;
+ }
+ var_types tdTempType() const
+ {
+ return tdType;
+ }
+};
+
+// interface to hide linearscan implementation from rest of compiler
+class LinearScanInterface
+{
+public:
+ virtual void doLinearScan() = 0;
+ virtual void recordVarLocationsAtStartOfBB(BasicBlock* bb) = 0;
+};
+
+LinearScanInterface* getLinearScanAllocator(Compiler* comp);
+
+// Information about arrays: their element type and size, and the offset of the first element.
+// We label GT_IND's that are array indices with GTF_IND_ARR_INDEX, and, for such nodes,
+// associate an array info via the map retrieved by GetArrayInfoMap(). This information is used,
+// for example, in value numbering of array index expressions.
+struct ArrayInfo
+{
+ var_types m_elemType;
+ CORINFO_CLASS_HANDLE m_elemStructType;
+ unsigned m_elemSize;
+ unsigned m_elemOffset;
+
+ ArrayInfo() : m_elemType(TYP_UNDEF), m_elemStructType(nullptr), m_elemSize(0), m_elemOffset(0)
+ {
+ }
+
+ ArrayInfo(var_types elemType, unsigned elemSize, unsigned elemOffset, CORINFO_CLASS_HANDLE elemStructType)
+ : m_elemType(elemType), m_elemStructType(elemStructType), m_elemSize(elemSize), m_elemOffset(elemOffset)
+ {
+ }
+};
+
+// This enumeration names the phases into which we divide compilation. The phases should completely
+// partition a compilation.
+enum Phases
+{
+#define CompPhaseNameMacro(enum_nm, string_nm, short_nm, hasChildren, parent) enum_nm,
+#include "compphases.h"
+ PHASE_NUMBER_OF
+};
+
+extern const char* PhaseNames[];
+extern const char* PhaseEnums[];
+extern const LPCWSTR PhaseShortNames[];
+
+//---------------------------------------------------------------
+// Compilation time.
+//
+
+// A "CompTimeInfo" is a structure for tracking the compilation time of one or more methods.
+// We divide a compilation into a sequence of contiguous phases, and track the total (per-thread) cycles
+// of the compilation, as well as the cycles for each phase. We also track the number of bytecodes.
+// If there is a failure in reading a timer at any point, the "CompTimeInfo" becomes invalid, as indicated
+// by "m_timerFailure" being true.
+// If FEATURE_JIT_METHOD_PERF is not set, we define a minimal form of this, enough to let other code compile.
+struct CompTimeInfo
+{
+#ifdef FEATURE_JIT_METHOD_PERF
+ // The string names of the phases.
+ static const char* PhaseNames[];
+
+ static bool PhaseHasChildren[];
+ static int PhaseParent[];
+
+ unsigned m_byteCodeBytes;
+ unsigned __int64 m_totalCycles;
+ unsigned __int64 m_invokesByPhase[PHASE_NUMBER_OF];
+ unsigned __int64 m_cyclesByPhase[PHASE_NUMBER_OF];
+ // For better documentation, we call EndPhase on
+ // non-leaf phases. We should also call EndPhase on the
+ // last leaf subphase; obviously, the elapsed cycles between the EndPhase
+ // for the last leaf subphase and the EndPhase for an ancestor should be very small.
+ // We add all such "redundant end phase" intervals to this variable below; we print
+ // it out in a report, so we can verify that it is, indeed, very small. If it ever
+ // isn't, this means that we're doing something significant between the end of the last
+ // declared subphase and the end of its parent.
+ unsigned __int64 m_parentPhaseEndSlop;
+ bool m_timerFailure;
+
+ CompTimeInfo(unsigned byteCodeBytes);
+#endif
+};
+
+#ifdef FEATURE_JIT_METHOD_PERF
+
+// This class summarizes the JIT time information over the course of a run: the number of methods compiled,
+// and the total and maximum timings. (These are instances of the "CompTimeInfo" type described above).
+// The operation of adding a single method's timing to the summary may be performed concurrently by several
+// threads, so it is protected by a lock.
+// This class is intended to be used as a singleton type, with only a single instance.
+class CompTimeSummaryInfo
+{
+ // This lock protects the fields of all CompTimeSummaryInfo(s) (of which we expect there to be one).
+ static CritSecObject s_compTimeSummaryLock;
+
+ int m_numMethods;
+ CompTimeInfo m_total;
+ CompTimeInfo m_maximum;
+
+ int m_numFilteredMethods;
+ CompTimeInfo m_filtered;
+
+ // This method computes the number of cycles/sec for the current machine. The cycles are those counted
+ // by GetThreadCycleTime; we assume that these are of equal duration, though that is not necessarily true.
+ // If any OS interaction fails, returns 0.0.
+ double CyclesPerSecond();
+
+ // This can use what ever data you want to determine if the value to be added
+ // belongs in the filtered section (it's always included in the unfiltered section)
+ bool IncludedInFilteredData(CompTimeInfo& info);
+
+public:
+ // This is the unique CompTimeSummaryInfo object for this instance of the runtime.
+ static CompTimeSummaryInfo s_compTimeSummary;
+
+ CompTimeSummaryInfo() : m_numMethods(0), m_total(0), m_maximum(0), m_numFilteredMethods(0), m_filtered(0)
+ {
+ }
+
+ // Assumes that "info" is a completed CompTimeInfo for a compilation; adds it to the summary.
+ // This is thread safe.
+ void AddInfo(CompTimeInfo& info);
+
+ // Print the summary information to "f".
+ // This is not thread-safe; assumed to be called by only one thread.
+ void Print(FILE* f);
+};
+
+// A JitTimer encapsulates a CompTimeInfo for a single compilation. It also tracks the start of compilation,
+// and when the current phase started. This is intended to be part of a Compilation object. This is
+// disabled (FEATURE_JIT_METHOD_PERF not defined) when FEATURE_CORECLR is set, or on non-windows platforms.
+//
+class JitTimer
+{
+ unsigned __int64 m_start; // Start of the compilation.
+ unsigned __int64 m_curPhaseStart; // Start of the current phase.
+#ifdef DEBUG
+ Phases m_lastPhase; // The last phase that was completed (or (Phases)-1 to start).
+#endif
+ CompTimeInfo m_info; // The CompTimeInfo for this compilation.
+
+ static CritSecObject s_csvLock; // Lock to protect the time log file.
+ void PrintCsvMethodStats(Compiler* comp);
+
+private:
+ void* operator new(size_t);
+ void* operator new[](size_t);
+ void operator delete(void*);
+ void operator delete[](void*);
+
+public:
+ // Initialized the timer instance
+ JitTimer(unsigned byteCodeSize);
+
+ static JitTimer* Create(Compiler* comp, unsigned byteCodeSize)
+ {
+ return ::new (comp, CMK_Unknown) JitTimer(byteCodeSize);
+ }
+
+ static void PrintCsvHeader();
+
+ // Ends the current phase (argument is for a redundant check).
+ void EndPhase(Phases phase);
+
+ // Completes the timing of the current method, which is assumed to have "byteCodeBytes" bytes of bytecode,
+ // and adds it to "sum".
+ void Terminate(Compiler* comp, CompTimeSummaryInfo& sum);
+
+ // Attempts to query the cycle counter of the current thread. If successful, returns "true" and sets
+ // *cycles to the cycle counter value. Otherwise, returns false and sets the "m_timerFailure" flag of
+ // "m_info" to true.
+ bool GetThreadCycles(unsigned __int64* cycles)
+ {
+ bool res = CycleTimer::GetThreadCyclesS(cycles);
+ if (!res)
+ {
+ m_info.m_timerFailure = true;
+ }
+ return res;
+ }
+};
+#endif // FEATURE_JIT_METHOD_PERF
+
+//------------------- Function/Funclet info -------------------------------
+DECLARE_TYPED_ENUM(FuncKind, BYTE)
+{
+ FUNC_ROOT, // The main/root function (always id==0)
+ FUNC_HANDLER, // a funclet associated with an EH handler (finally, fault, catch, filter handler)
+ FUNC_FILTER, // a funclet associated with an EH filter
+ FUNC_COUNT
+}
+END_DECLARE_TYPED_ENUM(FuncKind, BYTE)
+
+class emitLocation;
+
+struct FuncInfoDsc
+{
+ FuncKind funKind;
+ BYTE funFlags; // Currently unused, just here for padding
+ unsigned short funEHIndex; // index, into the ebd table, of innermost EH clause corresponding to this
+ // funclet. It is only valid if funKind field indicates this is a
+ // EH-related funclet: FUNC_HANDLER or FUNC_FILTER
+
+#if defined(_TARGET_AMD64_)
+
+ // TODO-AMD64-Throughput: make the AMD64 info more like the ARM info to avoid having this large static array.
+ emitLocation* startLoc;
+ emitLocation* endLoc;
+ emitLocation* coldStartLoc; // locations for the cold section, if there is one.
+ emitLocation* coldEndLoc;
+ UNWIND_INFO unwindHeader;
+ // Maximum of 255 UNWIND_CODE 'nodes' and then the unwind header. If there are an odd
+ // number of codes, the VM or Zapper will 4-byte align the whole thing.
+ BYTE unwindCodes[offsetof(UNWIND_INFO, UnwindCode) + (0xFF * sizeof(UNWIND_CODE))];
+ unsigned unwindCodeSlot;
+
+#ifdef UNIX_AMD64_ABI
+ jitstd::vector<CFI_CODE>* cfiCodes;
+#endif // UNIX_AMD64_ABI
+
+#elif defined(_TARGET_ARMARCH_)
+
+ UnwindInfo uwi; // Unwind information for this function/funclet's hot section
+ UnwindInfo* uwiCold; // Unwind information for this function/funclet's cold section
+ // Note: we only have a pointer here instead of the actual object,
+ // to save memory in the JIT case (compared to the NGEN case),
+ // where we don't have any cold section.
+ // Note 2: we currently don't support hot/cold splitting in functions
+ // with EH, so uwiCold will be NULL for all funclets.
+
+#endif // _TARGET_ARMARCH_
+
+ // Eventually we may want to move rsModifiedRegsMask, lvaOutgoingArgSize, and anything else
+ // that isn't shared between the main function body and funclets.
+};
+
+struct fgArgTabEntry
+{
+
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ fgArgTabEntry()
+ {
+ otherRegNum = REG_NA;
+ isStruct = false; // is this a struct arg
+ }
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+
+ GenTreePtr node; // Initially points at the Op1 field of 'parent', but if the argument is replaced with an GT_ASG or
+ // placeholder
+ // it will point at the actual argument in the gtCallLateArgs list.
+ GenTreePtr parent; // Points at the GT_LIST node in the gtCallArgs for this argument
+
+ unsigned argNum; // The original argument number, also specifies the required argument evaluation order from the IL
+
+ regNumber regNum; // The (first) register to use when passing this argument, set to REG_STK for arguments passed on
+ // the stack
+ unsigned numRegs; // Count of number of registers that this argument uses
+
+ // A slot is a pointer sized region in the OutArg area.
+ unsigned slotNum; // When an argument is passed in the OutArg area this is the slot number in the OutArg area
+ unsigned numSlots; // Count of number of slots that this argument uses
+
+ unsigned alignment; // 1 or 2 (slots/registers)
+ unsigned lateArgInx; // index into gtCallLateArgs list
+ unsigned tmpNum; // the LclVar number if we had to force evaluation of this arg
+
+ bool isSplit : 1; // True when this argument is split between the registers and OutArg area
+ bool needTmp : 1; // True when we force this argument's evaluation into a temp LclVar
+ bool needPlace : 1; // True when we must replace this argument with a placeholder node
+ bool isTmp : 1; // True when we setup a temp LclVar for this argument due to size issues with the struct
+ bool processed : 1; // True when we have decided the evaluation order for this argument in the gtCallLateArgs
+ bool isHfaRegArg : 1; // True when the argument is passed as a HFA in FP registers.
+ bool isBackFilled : 1; // True when the argument fills a register slot skipped due to alignment requirements of
+ // previous arguments.
+ bool isNonStandard : 1; // True if it is an arg that is passed in a reg other than a standard arg reg, or is forced
+ // to be on the stack despite its arg list position.
+
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ bool isStruct : 1; // True if this is a struct arg
+
+ regNumber otherRegNum; // The (second) register to use when passing this argument.
+
+ SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc;
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+
+#ifdef _TARGET_ARM_
+ void SetIsHfaRegArg(bool hfaRegArg)
+ {
+ isHfaRegArg = hfaRegArg;
+ }
+
+ void SetIsBackFilled(bool backFilled)
+ {
+ isBackFilled = backFilled;
+ }
+
+ bool IsBackFilled() const
+ {
+ return isBackFilled;
+ }
+#else // !_TARGET_ARM_
+ // To make the callers easier, we allow these calls (and the isHfaRegArg and isBackFilled data members) for all
+ // platforms.
+ void SetIsHfaRegArg(bool hfaRegArg)
+ {
+ }
+
+ void SetIsBackFilled(bool backFilled)
+ {
+ }
+
+ bool IsBackFilled() const
+ {
+ return false;
+ }
+#endif // !_TARGET_ARM_
+
+#ifdef DEBUG
+ void Dump();
+#endif
+};
+typedef struct fgArgTabEntry* fgArgTabEntryPtr;
+
+//-------------------------------------------------------------------------
+//
+// The class fgArgInfo is used to handle the arguments
+// when morphing a GT_CALL node.
+//
+
+class fgArgInfo
+{
+ Compiler* compiler; // Back pointer to the compiler instance so that we can allocate memory
+ GenTreePtr callTree; // Back pointer to the GT_CALL node for this fgArgInfo
+ unsigned argCount; // Updatable arg count value
+ unsigned nextSlotNum; // Updatable slot count value
+ unsigned stkLevel; // Stack depth when we make this call (for x86)
+
+ unsigned argTableSize; // size of argTable array (equal to the argCount when done with fgMorphArgs)
+ bool hasRegArgs; // true if we have one or more register arguments
+ bool hasStackArgs; // true if we have one or more stack arguments
+ bool argsComplete; // marker for state
+ bool argsSorted; // marker for state
+ fgArgTabEntryPtr* argTable; // variable sized array of per argument descrption: (i.e. argTable[argTableSize])
+
+private:
+ void AddArg(fgArgTabEntryPtr curArgTabEntry);
+
+public:
+ fgArgInfo(Compiler* comp, GenTreePtr call, unsigned argCount);
+ fgArgInfo(GenTreePtr newCall, GenTreePtr oldCall);
+
+ fgArgTabEntryPtr AddRegArg(
+ unsigned argNum, GenTreePtr node, GenTreePtr parent, regNumber regNum, unsigned numRegs, unsigned alignment);
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ fgArgTabEntryPtr AddRegArg(
+ unsigned argNum,
+ GenTreePtr node,
+ GenTreePtr parent,
+ regNumber regNum,
+ unsigned numRegs,
+ unsigned alignment,
+ const bool isStruct,
+ const regNumber otherRegNum = REG_NA,
+ const SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR* const structDescPtr = nullptr);
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+ fgArgTabEntryPtr AddStkArg(unsigned argNum,
+ GenTreePtr node,
+ GenTreePtr parent,
+ unsigned numSlots,
+ unsigned alignment FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(const bool isStruct));
+
+ void RemorphReset();
+ fgArgTabEntryPtr RemorphRegArg(
+ unsigned argNum, GenTreePtr node, GenTreePtr parent, regNumber regNum, unsigned numRegs, unsigned alignment);
+
+ void RemorphStkArg(unsigned argNum, GenTreePtr node, GenTreePtr parent, unsigned numSlots, unsigned alignment);
+
+ void SplitArg(unsigned argNum, unsigned numRegs, unsigned numSlots);
+
+ void EvalToTmp(unsigned argNum, unsigned tmpNum, GenTreePtr newNode);
+
+ void ArgsComplete();
+
+ void SortArgs();
+
+ void EvalArgsToTemps();
+
+ void RecordStkLevel(unsigned stkLvl);
+ unsigned RetrieveStkLevel();
+
+ unsigned ArgCount()
+ {
+ return argCount;
+ }
+ fgArgTabEntryPtr* ArgTable()
+ {
+ return argTable;
+ }
+ unsigned GetNextSlotNum()
+ {
+ return nextSlotNum;
+ }
+ bool HasRegArgs()
+ {
+ return hasRegArgs;
+ }
+ bool HasStackArgs()
+ {
+ return hasStackArgs;
+ }
+};
+
+#ifdef DEBUG
+// XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+// We have the ability to mark source expressions with "Test Labels."
+// These drive assertions within the JIT, or internal JIT testing. For example, we could label expressions
+// that should be CSE defs, and other expressions that should uses of those defs, with a shared label.
+
+enum TestLabel // This must be kept identical to System.Runtime.CompilerServices.JitTestLabel.TestLabel.
+{
+ TL_SsaName,
+ TL_VN, // Defines a "VN equivalence class". (For full VN, including exceptions thrown).
+ TL_VNNorm, // Like above, but uses the non-exceptional value of the expression.
+ TL_CSE_Def, // This must be identified in the JIT as a CSE def
+ TL_CSE_Use, // This must be identified in the JIT as a CSE use
+ TL_LoopHoist, // Expression must (or must not) be hoisted out of the loop.
+};
+
+struct TestLabelAndNum
+{
+ TestLabel m_tl;
+ ssize_t m_num;
+
+ TestLabelAndNum() : m_tl(TestLabel(0)), m_num(0)
+ {
+ }
+};
+
+typedef SimplerHashTable<GenTreePtr, PtrKeyFuncs<GenTree>, TestLabelAndNum, JitSimplerHashBehavior> NodeToTestDataMap;
+
+// XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+#endif // DEBUG
+
+// This class implements the "IAllocator" interface, so that we can use
+// utilcode collection classes in the JIT, and have them use the JIT's allocator.
+
+class CompAllocator : public IAllocator
+{
+ Compiler* m_comp;
+#if MEASURE_MEM_ALLOC
+ CompMemKind m_cmk;
+#endif
+public:
+ CompAllocator(Compiler* comp, CompMemKind cmk)
+ : m_comp(comp)
+#if MEASURE_MEM_ALLOC
+ , m_cmk(cmk)
+#endif
+ {
+ }
+
+ inline void* Alloc(size_t sz);
+
+ inline void* ArrayAlloc(size_t elems, size_t elemSize);
+
+ // For the compiler's no-release allocator, free operations are no-ops.
+ void Free(void* p)
+ {
+ }
+};
+
+/*
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX The big guy. The sections are currently organized as : XX
+XX XX
+XX o GenTree and BasicBlock XX
+XX o LclVarsInfo XX
+XX o Importer XX
+XX o FlowGraph XX
+XX o Optimizer XX
+XX o RegAlloc XX
+XX o EEInterface XX
+XX o TempsInfo XX
+XX o RegSet XX
+XX o GCInfo XX
+XX o Instruction XX
+XX o ScopeInfo XX
+XX o PrologScopeInfo XX
+XX o CodeGenerator XX
+XX o UnwindInfo XX
+XX o Compiler XX
+XX o typeInfo XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+class Compiler
+{
+ friend class emitter;
+ friend class UnwindInfo;
+ friend class UnwindFragmentInfo;
+ friend class UnwindEpilogInfo;
+ friend class JitTimer;
+ friend class LinearScan;
+ friend class fgArgInfo;
+ friend class Rationalizer;
+ friend class Phase;
+ friend class Lowering;
+ friend class CSE_DataFlow;
+ friend class CSE_Heuristic;
+ friend class CodeGenInterface;
+ friend class CodeGen;
+ friend class LclVarDsc;
+ friend class TempDsc;
+ friend class LIR;
+ friend class ObjectAllocator;
+
+#ifndef _TARGET_64BIT_
+ friend class DecomposeLongs;
+#endif // !_TARGET_64BIT_
+
+ /*
+ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+ XX XX
+ XX Misc structs definitions XX
+ XX XX
+ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+ */
+
+public:
+ hashBvGlobalData hbvGlobalData; // Used by the hashBv bitvector package.
+
+#ifdef DEBUG
+ bool verbose;
+ bool dumpIR;
+ bool dumpIRNodes;
+ bool dumpIRTypes;
+ bool dumpIRKinds;
+ bool dumpIRLocals;
+ bool dumpIRRegs;
+ bool dumpIRSsa;
+ bool dumpIRValnums;
+ bool dumpIRCosts;
+ bool dumpIRFlags;
+ bool dumpIRNoLists;
+ bool dumpIRNoLeafs;
+ bool dumpIRNoStmts;
+ bool dumpIRTrees;
+ bool dumpIRLinear;
+ bool dumpIRDataflow;
+ bool dumpIRBlockHeaders;
+ bool dumpIRExit;
+ LPCWSTR dumpIRPhase;
+ LPCWSTR dumpIRFormat;
+ bool verboseTrees;
+ bool shouldUseVerboseTrees();
+ bool asciiTrees; // If true, dump trees using only ASCII characters
+ bool shouldDumpASCIITrees();
+ bool verboseSsa; // If true, produce especially verbose dump output in SSA construction.
+ bool shouldUseVerboseSsa();
+ bool treesBeforeAfterMorph; // If true, print trees before/after morphing (paired by an intra-compilation id:
+ int morphNum; // This counts the the trees that have been morphed, allowing us to label each uniquely.
+
+ const char* VarNameToStr(VarName name)
+ {
+ return name;
+ }
+
+ DWORD expensiveDebugCheckLevel;
+#endif
+
+#if FEATURE_MULTIREG_RET
+ GenTreePtr impAssignMultiRegTypeToVar(GenTreePtr op, CORINFO_CLASS_HANDLE hClass);
+#endif // FEATURE_MULTIREG_RET
+
+#ifdef ARM_SOFTFP
+ bool isSingleFloat32Struct(CORINFO_CLASS_HANDLE hClass);
+#endif // ARM_SOFTFP
+
+ //-------------------------------------------------------------------------
+ // Functions to handle homogeneous floating-point aggregates (HFAs) in ARM.
+ // HFAs are one to four element structs where each element is the same
+ // type, either all float or all double. They are treated specially
+ // in the ARM Procedure Call Standard, specifically, they are passed in
+ // floating-point registers instead of the general purpose registers.
+ //
+
+ bool IsHfa(CORINFO_CLASS_HANDLE hClass);
+ bool IsHfa(GenTreePtr tree);
+
+ var_types GetHfaType(GenTreePtr tree);
+ unsigned GetHfaCount(GenTreePtr tree);
+
+ var_types GetHfaType(CORINFO_CLASS_HANDLE hClass);
+ unsigned GetHfaCount(CORINFO_CLASS_HANDLE hClass);
+
+ bool IsMultiRegPassedType(CORINFO_CLASS_HANDLE hClass);
+ bool IsMultiRegReturnedType(CORINFO_CLASS_HANDLE hClass);
+
+ //-------------------------------------------------------------------------
+ // The following is used for validating format of EH table
+ //
+
+ struct EHNodeDsc;
+ typedef struct EHNodeDsc* pEHNodeDsc;
+
+ EHNodeDsc* ehnTree; // root of the tree comprising the EHnodes.
+ EHNodeDsc* ehnNext; // root of the tree comprising the EHnodes.
+
+ struct EHNodeDsc
+ {
+ enum EHBlockType
+ {
+ TryNode,
+ FilterNode,
+ HandlerNode,
+ FinallyNode,
+ FaultNode
+ };
+
+ EHBlockType ehnBlockType; // kind of EH block
+ IL_OFFSET ehnStartOffset; // IL offset of start of the EH block
+ IL_OFFSET ehnEndOffset; // IL offset past end of the EH block. (TODO: looks like verInsertEhNode() sets this to
+ // the last IL offset, not "one past the last one", i.e., the range Start to End is
+ // inclusive).
+ pEHNodeDsc ehnNext; // next (non-nested) block in sequential order
+ pEHNodeDsc ehnChild; // leftmost nested block
+ union {
+ pEHNodeDsc ehnTryNode; // for filters and handlers, the corresponding try node
+ pEHNodeDsc ehnHandlerNode; // for a try node, the corresponding handler node
+ };
+ pEHNodeDsc ehnFilterNode; // if this is a try node and has a filter, otherwise 0
+ pEHNodeDsc ehnEquivalent; // if blockType=tryNode, start offset and end offset is same,
+
+ inline void ehnSetTryNodeType()
+ {
+ ehnBlockType = TryNode;
+ }
+ inline void ehnSetFilterNodeType()
+ {
+ ehnBlockType = FilterNode;
+ }
+ inline void ehnSetHandlerNodeType()
+ {
+ ehnBlockType = HandlerNode;
+ }
+ inline void ehnSetFinallyNodeType()
+ {
+ ehnBlockType = FinallyNode;
+ }
+ inline void ehnSetFaultNodeType()
+ {
+ ehnBlockType = FaultNode;
+ }
+
+ inline BOOL ehnIsTryBlock()
+ {
+ return ehnBlockType == TryNode;
+ }
+ inline BOOL ehnIsFilterBlock()
+ {
+ return ehnBlockType == FilterNode;
+ }
+ inline BOOL ehnIsHandlerBlock()
+ {
+ return ehnBlockType == HandlerNode;
+ }
+ inline BOOL ehnIsFinallyBlock()
+ {
+ return ehnBlockType == FinallyNode;
+ }
+ inline BOOL ehnIsFaultBlock()
+ {
+ return ehnBlockType == FaultNode;
+ }
+
+ // returns true if there is any overlap between the two nodes
+ static inline BOOL ehnIsOverlap(pEHNodeDsc node1, pEHNodeDsc node2)
+ {
+ if (node1->ehnStartOffset < node2->ehnStartOffset)
+ {
+ return (node1->ehnEndOffset >= node2->ehnStartOffset);
+ }
+ else
+ {
+ return (node1->ehnStartOffset <= node2->ehnEndOffset);
+ }
+ }
+
+ // fails with BADCODE if inner is not completely nested inside outer
+ static inline BOOL ehnIsNested(pEHNodeDsc inner, pEHNodeDsc outer)
+ {
+ return ((inner->ehnStartOffset >= outer->ehnStartOffset) && (inner->ehnEndOffset <= outer->ehnEndOffset));
+ }
+ };
+
+//-------------------------------------------------------------------------
+// Exception handling functions
+//
+
+#if !FEATURE_EH_FUNCLETS
+
+ bool ehNeedsShadowSPslots()
+ {
+ return (info.compXcptnsCount || opts.compDbgEnC);
+ }
+
+ // 0 for methods with no EH
+ // 1 for methods with non-nested EH, or where only the try blocks are nested
+ // 2 for a method with a catch within a catch
+ // etc.
+ unsigned ehMaxHndNestingCount;
+
+#endif // !FEATURE_EH_FUNCLETS
+
+ static bool jitIsBetween(unsigned value, unsigned start, unsigned end);
+ static bool jitIsBetweenInclusive(unsigned value, unsigned start, unsigned end);
+
+ bool bbInCatchHandlerILRange(BasicBlock* blk);
+ bool bbInFilterILRange(BasicBlock* blk);
+ bool bbInTryRegions(unsigned regionIndex, BasicBlock* blk);
+ bool bbInExnFlowRegions(unsigned regionIndex, BasicBlock* blk);
+ bool bbInHandlerRegions(unsigned regionIndex, BasicBlock* blk);
+ bool bbInCatchHandlerRegions(BasicBlock* tryBlk, BasicBlock* hndBlk);
+ unsigned short bbFindInnermostCommonTryRegion(BasicBlock* bbOne, BasicBlock* bbTwo);
+
+ unsigned short bbFindInnermostTryRegionContainingHandlerRegion(unsigned handlerIndex);
+ unsigned short bbFindInnermostHandlerRegionContainingTryRegion(unsigned tryIndex);
+
+ // Returns true if "block" is the start of a try region.
+ bool bbIsTryBeg(BasicBlock* block);
+
+ // Returns true if "block" is the start of a handler or filter region.
+ bool bbIsHandlerBeg(BasicBlock* block);
+
+ // Returns true iff "block" is where control flows if an exception is raised in the
+ // try region, and sets "*regionIndex" to the index of the try for the handler.
+ // Differs from "IsHandlerBeg" in the case of filters, where this is true for the first
+ // block of the filter, but not for the filter's handler.
+ bool bbIsExFlowBlock(BasicBlock* block, unsigned* regionIndex);
+
+ bool ehHasCallableHandlers();
+
+ // Return the EH descriptor for the given region index.
+ EHblkDsc* ehGetDsc(unsigned regionIndex);
+
+ // Return the EH index given a region descriptor.
+ unsigned ehGetIndex(EHblkDsc* ehDsc);
+
+ // Return the EH descriptor index of the enclosing try, for the given region index.
+ unsigned ehGetEnclosingTryIndex(unsigned regionIndex);
+
+ // Return the EH descriptor index of the enclosing handler, for the given region index.
+ unsigned ehGetEnclosingHndIndex(unsigned regionIndex);
+
+ // Return the EH descriptor for the most nested 'try' region this BasicBlock is a member of (or nullptr if this
+ // block is not in a 'try' region).
+ EHblkDsc* ehGetBlockTryDsc(BasicBlock* block);
+
+ // Return the EH descriptor for the most nested filter or handler region this BasicBlock is a member of (or nullptr
+ // if this block is not in a filter or handler region).
+ EHblkDsc* ehGetBlockHndDsc(BasicBlock* block);
+
+ // Return the EH descriptor for the most nested region that may handle exceptions raised in this BasicBlock (or
+ // nullptr if this block's exceptions propagate to caller).
+ EHblkDsc* ehGetBlockExnFlowDsc(BasicBlock* block);
+
+ EHblkDsc* ehIsBlockTryLast(BasicBlock* block);
+ EHblkDsc* ehIsBlockHndLast(BasicBlock* block);
+ bool ehIsBlockEHLast(BasicBlock* block);
+
+ bool ehBlockHasExnFlowDsc(BasicBlock* block);
+
+ // Return the region index of the most nested EH region this block is in.
+ unsigned ehGetMostNestedRegionIndex(BasicBlock* block, bool* inTryRegion);
+
+ // Find the true enclosing try index, ignoring 'mutual protect' try. Uses IL ranges to check.
+ unsigned ehTrueEnclosingTryIndexIL(unsigned regionIndex);
+
+ // Return the index of the most nested enclosing region for a particular EH region. Returns NO_ENCLOSING_INDEX
+ // if there is no enclosing region. If the returned index is not NO_ENCLOSING_INDEX, then '*inTryRegion'
+ // is set to 'true' if the enclosing region is a 'try', or 'false' if the enclosing region is a handler.
+ // (It can never be a filter.)
+ unsigned ehGetEnclosingRegionIndex(unsigned regionIndex, bool* inTryRegion);
+
+ // A block has been deleted. Update the EH table appropriately.
+ void ehUpdateForDeletedBlock(BasicBlock* block);
+
+ // Determine whether a block can be deleted while preserving the EH normalization rules.
+ bool ehCanDeleteEmptyBlock(BasicBlock* block);
+
+ // Update the 'last' pointers in the EH table to reflect new or deleted blocks in an EH region.
+ void ehUpdateLastBlocks(BasicBlock* oldLast, BasicBlock* newLast);
+
+ // For a finally handler, find the region index that the BBJ_CALLFINALLY lives in that calls the handler,
+ // or NO_ENCLOSING_INDEX if the BBJ_CALLFINALLY lives in the main function body. Normally, the index
+ // is the same index as the handler (and the BBJ_CALLFINALLY lives in the 'try' region), but for AMD64 the
+ // BBJ_CALLFINALLY lives in the enclosing try or handler region, whichever is more nested, or the main function
+ // body. If the returned index is not NO_ENCLOSING_INDEX, then '*inTryRegion' is set to 'true' if the
+ // BBJ_CALLFINALLY lives in the returned index's 'try' region, or 'false' if lives in the handler region. (It never
+ // lives in a filter.)
+ unsigned ehGetCallFinallyRegionIndex(unsigned finallyIndex, bool* inTryRegion);
+
+ // Find the range of basic blocks in which all BBJ_CALLFINALLY will be found that target the 'finallyIndex' region's
+ // handler. Set begBlk to the first block, and endBlk to the block after the last block of the range
+ // (nullptr if the last block is the last block in the program).
+ // Precondition: 'finallyIndex' is the EH region of a try/finally clause.
+ void ehGetCallFinallyBlockRange(unsigned finallyIndex, BasicBlock** begBlk, BasicBlock** endBlk);
+
+#ifdef DEBUG
+ // Given a BBJ_CALLFINALLY block and the EH region index of the finally it is calling, return
+ // 'true' if the BBJ_CALLFINALLY is in the correct EH region.
+ bool ehCallFinallyInCorrectRegion(BasicBlock* blockCallFinally, unsigned finallyIndex);
+#endif // DEBUG
+
+#if FEATURE_EH_FUNCLETS
+ // Do we need a PSPSym in the main function? For codegen purposes, we only need one
+ // if there is a filter that protects a region with a nested EH clause (such as a
+ // try/catch nested in the 'try' body of a try/filter/filter-handler). See
+ // genFuncletProlog() for more details. However, the VM seems to use it for more
+ // purposes, maybe including debugging. Until we are sure otherwise, always create
+ // a PSPSym for functions with any EH.
+ bool ehNeedsPSPSym() const
+ {
+ return compHndBBtabCount > 0;
+ }
+
+ bool ehAnyFunclets(); // Are there any funclets in this function?
+ unsigned ehFuncletCount(); // Return the count of funclets in the function
+
+ unsigned bbThrowIndex(BasicBlock* blk); // Get the index to use as the cache key for sharing throw blocks
+#else // !FEATURE_EH_FUNCLETS
+ bool ehAnyFunclets()
+ {
+ return false;
+ }
+ unsigned ehFuncletCount()
+ {
+ return 0;
+ }
+
+ unsigned bbThrowIndex(BasicBlock* blk)
+ {
+ return blk->bbTryIndex;
+ } // Get the index to use as the cache key for sharing throw blocks
+#endif // !FEATURE_EH_FUNCLETS
+
+ // Returns a flowList representing the "EH predecessors" of "blk". These are the normal predecessors of
+ // "blk", plus one special case: if "blk" is the first block of a handler, considers the predecessor(s) of the first
+ // first block of the corresponding try region to be "EH predecessors". (If there is a single such predecessor,
+ // for example, we want to consider that the immediate dominator of the catch clause start block, so it's
+ // convenient to also consider it a predecessor.)
+ flowList* BlockPredsWithEH(BasicBlock* blk);
+
+ // This table is useful for memoization of the method above.
+ typedef SimplerHashTable<BasicBlock*, PtrKeyFuncs<BasicBlock>, flowList*, JitSimplerHashBehavior>
+ BlockToFlowListMap;
+ BlockToFlowListMap* m_blockToEHPreds;
+ BlockToFlowListMap* GetBlockToEHPreds()
+ {
+ if (m_blockToEHPreds == nullptr)
+ {
+ m_blockToEHPreds = new (getAllocator()) BlockToFlowListMap(getAllocator());
+ }
+ return m_blockToEHPreds;
+ }
+
+ void* ehEmitCookie(BasicBlock* block);
+ UNATIVE_OFFSET ehCodeOffset(BasicBlock* block);
+
+ EHblkDsc* ehInitHndRange(BasicBlock* src, IL_OFFSET* hndBeg, IL_OFFSET* hndEnd, bool* inFilter);
+
+ EHblkDsc* ehInitTryRange(BasicBlock* src, IL_OFFSET* tryBeg, IL_OFFSET* tryEnd);
+
+ EHblkDsc* ehInitHndBlockRange(BasicBlock* blk, BasicBlock** hndBeg, BasicBlock** hndLast, bool* inFilter);
+
+ EHblkDsc* ehInitTryBlockRange(BasicBlock* blk, BasicBlock** tryBeg, BasicBlock** tryLast);
+
+ void fgSetTryEnd(EHblkDsc* handlerTab, BasicBlock* newTryLast);
+
+ void fgSetHndEnd(EHblkDsc* handlerTab, BasicBlock* newHndLast);
+
+ void fgSkipRmvdBlocks(EHblkDsc* handlerTab);
+
+ void fgAllocEHTable();
+
+ void fgRemoveEHTableEntry(unsigned XTnum);
+
+#if FEATURE_EH_FUNCLETS
+
+ EHblkDsc* fgAddEHTableEntry(unsigned XTnum);
+
+#endif // FEATURE_EH_FUNCLETS
+
+#if !FEATURE_EH
+ void fgRemoveEH();
+#endif // !FEATURE_EH
+
+ void fgSortEHTable();
+
+ // Causes the EH table to obey some well-formedness conditions, by inserting
+ // empty BB's when necessary:
+ // * No block is both the first block of a handler and the first block of a try.
+ // * No block is the first block of multiple 'try' regions.
+ // * No block is the last block of multiple EH regions.
+ void fgNormalizeEH();
+ bool fgNormalizeEHCase1();
+ bool fgNormalizeEHCase2();
+ bool fgNormalizeEHCase3();
+
+#ifdef DEBUG
+ void dispIncomingEHClause(unsigned num, const CORINFO_EH_CLAUSE& clause);
+ void dispOutgoingEHClause(unsigned num, const CORINFO_EH_CLAUSE& clause);
+ void fgVerifyHandlerTab();
+ void fgDispHandlerTab();
+#endif // DEBUG
+
+ bool fgNeedToSortEHTable;
+
+ void verInitEHTree(unsigned numEHClauses);
+ void verInsertEhNode(CORINFO_EH_CLAUSE* clause, EHblkDsc* handlerTab);
+ void verInsertEhNodeInTree(EHNodeDsc** ppRoot, EHNodeDsc* node);
+ void verInsertEhNodeParent(EHNodeDsc** ppRoot, EHNodeDsc* node);
+ void verCheckNestingLevel(EHNodeDsc* initRoot);
+
+ /*
+ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+ XX XX
+ XX GenTree and BasicBlock XX
+ XX XX
+ XX Functions to allocate and display the GenTrees and BasicBlocks XX
+ XX XX
+ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+ */
+
+ // Functions to create nodes
+ GenTreeStmt* gtNewStmt(GenTreePtr expr = nullptr, IL_OFFSETX offset = BAD_IL_OFFSET);
+
+ // For unary opers.
+ GenTreePtr gtNewOperNode(genTreeOps oper, var_types type, GenTreePtr op1, bool doSimplifications = TRUE);
+
+ // For binary opers.
+ GenTreePtr gtNewOperNode(genTreeOps oper, var_types type, GenTreePtr op1, GenTreePtr op2);
+
+ GenTreePtr gtNewQmarkNode(var_types type, GenTreePtr cond, GenTreePtr colon);
+
+ GenTreePtr gtNewLargeOperNode(genTreeOps oper,
+ var_types type = TYP_I_IMPL,
+ GenTreePtr op1 = nullptr,
+ GenTreePtr op2 = nullptr);
+
+ GenTreeIntCon* gtNewIconNode(ssize_t value, var_types type = TYP_INT);
+
+ GenTree* gtNewPhysRegNode(regNumber reg, var_types type);
+
+ GenTree* gtNewPhysRegNode(regNumber reg, GenTree* src);
+
+ GenTreePtr gtNewJmpTableNode();
+ GenTreePtr gtNewIconHandleNode(
+ size_t value, unsigned flags, FieldSeqNode* fields = nullptr, unsigned handle1 = 0, void* handle2 = nullptr);
+
+ unsigned gtTokenToIconFlags(unsigned token);
+
+ GenTreePtr gtNewIconEmbHndNode(void* value,
+ void* pValue,
+ unsigned flags,
+ unsigned handle1 = 0,
+ void* handle2 = nullptr,
+ void* compileTimeHandle = nullptr);
+
+ GenTreePtr gtNewIconEmbScpHndNode(CORINFO_MODULE_HANDLE scpHnd, unsigned hnd1 = 0, void* hnd2 = nullptr);
+ GenTreePtr gtNewIconEmbClsHndNode(CORINFO_CLASS_HANDLE clsHnd, unsigned hnd1 = 0, void* hnd2 = nullptr);
+ GenTreePtr gtNewIconEmbMethHndNode(CORINFO_METHOD_HANDLE methHnd, unsigned hnd1 = 0, void* hnd2 = nullptr);
+ GenTreePtr gtNewIconEmbFldHndNode(CORINFO_FIELD_HANDLE fldHnd, unsigned hnd1 = 0, void* hnd2 = nullptr);
+
+ GenTreePtr gtNewStringLiteralNode(InfoAccessType iat, void* pValue);
+
+ GenTreePtr gtNewLconNode(__int64 value);
+
+ GenTreePtr gtNewDconNode(double value);
+
+ GenTreePtr gtNewSconNode(int CPX, CORINFO_MODULE_HANDLE scpHandle);
+
+ GenTreePtr gtNewZeroConNode(var_types type);
+
+ GenTreePtr gtNewOneConNode(var_types type);
+
+ GenTreeBlk* gtNewBlkOpNode(
+ genTreeOps oper, GenTreePtr dst, GenTreePtr srcOrFillVal, GenTreePtr sizeOrClsTok, bool isVolatile);
+
+ GenTree* gtNewBlkOpNode(GenTreePtr dst, GenTreePtr srcOrFillVal, unsigned size, bool isVolatile, bool isCopyBlock);
+
+protected:
+ void gtBlockOpInit(GenTreePtr result, GenTreePtr dst, GenTreePtr srcOrFillVal, bool isVolatile);
+
+public:
+ GenTree* gtNewObjNode(CORINFO_CLASS_HANDLE structHnd, GenTreePtr addr);
+ void gtSetObjGcInfo(GenTreeObj* objNode);
+ GenTree* gtNewStructVal(CORINFO_CLASS_HANDLE structHnd, GenTreePtr addr);
+ GenTree* gtNewBlockVal(GenTreePtr addr, unsigned size);
+
+ GenTree* gtNewCpObjNode(GenTreePtr dst, GenTreePtr src, CORINFO_CLASS_HANDLE structHnd, bool isVolatile);
+
+ GenTreeArgList* gtNewListNode(GenTreePtr op1, GenTreeArgList* op2);
+
+ GenTreeCall* gtNewCallNode(gtCallTypes callType,
+ CORINFO_METHOD_HANDLE handle,
+ var_types type,
+ GenTreeArgList* args,
+ IL_OFFSETX ilOffset = BAD_IL_OFFSET);
+
+ GenTreeCall* gtNewIndCallNode(GenTreePtr addr,
+ var_types type,
+ GenTreeArgList* args,
+ IL_OFFSETX ilOffset = BAD_IL_OFFSET);
+
+ GenTreeCall* gtNewHelperCallNode(unsigned helper,
+ var_types type,
+ unsigned flags = 0,
+ GenTreeArgList* args = nullptr);
+
+ GenTreePtr gtNewLclvNode(unsigned lnum, var_types type, IL_OFFSETX ILoffs = BAD_IL_OFFSET);
+
+#ifdef FEATURE_SIMD
+ GenTreeSIMD* gtNewSIMDNode(
+ var_types type, GenTreePtr op1, SIMDIntrinsicID simdIntrinsicID, var_types baseType, unsigned size);
+ GenTreeSIMD* gtNewSIMDNode(var_types type,
+ GenTreePtr op1,
+ GenTreePtr op2,
+ SIMDIntrinsicID simdIntrinsicID,
+ var_types baseType,
+ unsigned size);
+#endif
+
+ GenTreePtr gtNewLclLNode(unsigned lnum, var_types type, IL_OFFSETX ILoffs = BAD_IL_OFFSET);
+ GenTreeLclFld* gtNewLclFldNode(unsigned lnum, var_types type, unsigned offset);
+ GenTreePtr gtNewInlineCandidateReturnExpr(GenTreePtr inlineCandidate, var_types type);
+
+ GenTreePtr gtNewCodeRef(BasicBlock* block);
+
+ GenTreePtr gtNewFieldRef(
+ var_types typ, CORINFO_FIELD_HANDLE fldHnd, GenTreePtr obj = nullptr, DWORD offset = 0, bool nullcheck = false);
+
+ GenTreePtr gtNewIndexRef(var_types typ, GenTreePtr arrayOp, GenTreePtr indexOp);
+
+ GenTreeArgList* gtNewArgList(GenTreePtr op);
+ GenTreeArgList* gtNewArgList(GenTreePtr op1, GenTreePtr op2);
+ GenTreeArgList* gtNewArgList(GenTreePtr op1, GenTreePtr op2, GenTreePtr op3);
+
+ GenTreeArgList* gtNewAggregate(GenTree* element);
+
+ static fgArgTabEntryPtr gtArgEntryByArgNum(GenTreePtr call, unsigned argNum);
+ static fgArgTabEntryPtr gtArgEntryByNode(GenTreePtr call, GenTreePtr node);
+ fgArgTabEntryPtr gtArgEntryByLateArgIndex(GenTreePtr call, unsigned lateArgInx);
+ bool gtArgIsThisPtr(fgArgTabEntryPtr argEntry);
+
+ GenTreePtr gtNewAssignNode(GenTreePtr dst, GenTreePtr src);
+
+ GenTreePtr gtNewTempAssign(unsigned tmp, GenTreePtr val);
+
+ GenTreePtr gtNewRefCOMfield(GenTreePtr objPtr,
+ CORINFO_RESOLVED_TOKEN* pResolvedToken,
+ CORINFO_ACCESS_FLAGS access,
+ CORINFO_FIELD_INFO* pFieldInfo,
+ var_types lclTyp,
+ CORINFO_CLASS_HANDLE structType,
+ GenTreePtr assg);
+
+ GenTreePtr gtNewNothingNode();
+
+ GenTreePtr gtNewArgPlaceHolderNode(var_types type, CORINFO_CLASS_HANDLE clsHnd);
+
+ GenTreePtr gtUnusedValNode(GenTreePtr expr);
+
+ GenTreePtr gtNewCastNode(var_types typ, GenTreePtr op1, var_types castType);
+
+ GenTreePtr gtNewCastNodeL(var_types typ, GenTreePtr op1, var_types castType);
+
+ GenTreePtr gtNewAllocObjNode(unsigned int helper, CORINFO_CLASS_HANDLE clsHnd, var_types type, GenTreePtr op1);
+
+ //------------------------------------------------------------------------
+ // Other GenTree functions
+
+ GenTreePtr gtClone(GenTree* tree, bool complexOK = false);
+
+ GenTreePtr gtCloneExpr(GenTree* tree, unsigned addFlags = 0, unsigned varNum = (unsigned)-1, int varVal = 0);
+
+ GenTreePtr gtReplaceTree(GenTreePtr stmt, GenTreePtr tree, GenTreePtr replacementTree);
+
+ void gtUpdateSideEffects(GenTreePtr tree, unsigned oldGtFlags, unsigned newGtFlags);
+
+ // Returns "true" iff the complexity (not formally defined, but first interpretation
+ // is #of nodes in subtree) of "tree" is greater than "limit".
+ // (This is somewhat redundant with the "gtCostEx/gtCostSz" fields, but can be used
+ // before they have been set.)
+ bool gtComplexityExceeds(GenTreePtr* tree, unsigned limit);
+
+ bool gtCompareTree(GenTree* op1, GenTree* op2);
+
+ GenTreePtr gtReverseCond(GenTree* tree);
+
+ bool gtHasRef(GenTree* tree, ssize_t lclNum, bool defOnly);
+
+ bool gtHasLocalsWithAddrOp(GenTreePtr tree);
+
+ unsigned gtHashValue(GenTree* tree);
+
+ unsigned gtSetListOrder(GenTree* list, bool regs);
+
+ void gtWalkOp(GenTree** op1, GenTree** op2, GenTree* adr, bool constOnly);
+
+#ifdef DEBUG
+ GenTreePtr gtWalkOpEffectiveVal(GenTreePtr op);
+#endif
+
+ void gtPrepareCost(GenTree* tree);
+ bool gtIsLikelyRegVar(GenTree* tree);
+
+ unsigned gtSetEvalOrderAndRestoreFPstkLevel(GenTree* tree);
+
+ // Returns true iff the secondNode can be swapped with firstNode.
+ bool gtCanSwapOrder(GenTree* firstNode, GenTree* secondNode);
+
+ unsigned gtSetEvalOrder(GenTree* tree);
+
+#if FEATURE_STACK_FP_X87
+ bool gtFPstLvlRedo;
+ void gtComputeFPlvls(GenTreePtr tree);
+#endif // FEATURE_STACK_FP_X87
+
+ void gtSetStmtInfo(GenTree* stmt);
+
+ // Returns "true" iff "node" has any of the side effects in "flags".
+ bool gtNodeHasSideEffects(GenTreePtr node, unsigned flags);
+
+ // Returns "true" iff "tree" or its (transitive) children have any of the side effects in "flags".
+ bool gtTreeHasSideEffects(GenTreePtr tree, unsigned flags);
+
+ // Appends 'expr' in front of 'list'
+ // 'list' will typically start off as 'nullptr'
+ // when 'list' is non-null a GT_COMMA node is used to insert 'expr'
+ GenTreePtr gtBuildCommaList(GenTreePtr list, GenTreePtr expr);
+
+ void gtExtractSideEffList(GenTreePtr expr,
+ GenTreePtr* pList,
+ unsigned flags = GTF_SIDE_EFFECT,
+ bool ignoreRoot = false);
+
+ GenTreePtr gtGetThisArg(GenTreePtr call);
+
+ // Static fields of struct types (and sometimes the types that those are reduced to) are represented by having the
+ // static field contain an object pointer to the boxed struct. This simplifies the GC implementation...but
+ // complicates the JIT somewhat. This predicate returns "true" iff a node with type "fieldNodeType", representing
+ // the given "fldHnd", is such an object pointer.
+ bool gtIsStaticFieldPtrToBoxedStruct(var_types fieldNodeType, CORINFO_FIELD_HANDLE fldHnd);
+
+ // Return true if call is a recursive call; return false otherwise.
+ bool gtIsRecursiveCall(GenTreeCall* call)
+ {
+ return (call->gtCallMethHnd == info.compMethodHnd);
+ }
+
+ //-------------------------------------------------------------------------
+
+ GenTreePtr gtFoldExpr(GenTreePtr tree);
+ GenTreePtr
+#ifdef __clang__
+ // TODO-Amd64-Unix: Remove this when the clang optimizer is fixed and/or the method implementation is
+ // refactored in a simpler code. This is a workaround for a bug in the clang-3.5 optimizer. The issue is that in
+ // release build the optimizer is mistyping (or just wrongly decides to use 32 bit operation for a corner case
+ // of MIN_LONG) the args of the (ltemp / lval2) to int (it does a 32 bit div operation instead of 64 bit) - see
+ // the implementation of the method in gentree.cpp. For the case of lval1 and lval2 equal to MIN_LONG
+ // (0x8000000000000000) this results in raising a SIGFPE. The method implementation is rather complex. Disable
+ // optimizations for now.
+ __attribute__((optnone))
+#endif // __clang__
+ gtFoldExprConst(GenTreePtr tree);
+ GenTreePtr gtFoldExprSpecial(GenTreePtr tree);
+ GenTreePtr gtFoldExprCompare(GenTreePtr tree);
+
+ //-------------------------------------------------------------------------
+ // Get the handle, if any.
+ CORINFO_CLASS_HANDLE gtGetStructHandleIfPresent(GenTreePtr tree);
+ // Get the handle, and assert if not found.
+ CORINFO_CLASS_HANDLE gtGetStructHandle(GenTreePtr tree);
+
+//-------------------------------------------------------------------------
+// Functions to display the trees
+
+#ifdef DEBUG
+ void gtDispNode(GenTreePtr tree, IndentStack* indentStack, __in_z const char* msg, bool isLIR);
+
+ void gtDispVN(GenTreePtr tree);
+ void gtDispConst(GenTreePtr tree);
+ void gtDispLeaf(GenTreePtr tree, IndentStack* indentStack);
+ void gtDispNodeName(GenTreePtr tree);
+ void gtDispRegVal(GenTreePtr tree);
+
+ enum IndentInfo
+ {
+ IINone,
+ IIArc,
+ IIArcTop,
+ IIArcBottom,
+ IIEmbedded,
+ IIError,
+ IndentInfoCount
+ };
+ void gtDispChild(GenTreePtr child,
+ IndentStack* indentStack,
+ IndentInfo arcType,
+ __in_opt const char* msg = nullptr,
+ bool topOnly = false);
+ void gtDispTree(GenTreePtr tree,
+ IndentStack* indentStack = nullptr,
+ __in_opt const char* msg = nullptr,
+ bool topOnly = false,
+ bool isLIR = false);
+ void gtGetLclVarNameInfo(unsigned lclNum, const char** ilKindOut, const char** ilNameOut, unsigned* ilNumOut);
+ int gtGetLclVarName(unsigned lclNum, char* buf, unsigned buf_remaining);
+ char* gtGetLclVarName(unsigned lclNum);
+ void gtDispLclVar(unsigned varNum, bool padForBiggestDisp = true);
+ void gtDispTreeList(GenTreePtr tree, IndentStack* indentStack = nullptr);
+ void gtGetArgMsg(GenTreePtr call, GenTreePtr arg, unsigned argNum, int listCount, char* bufp, unsigned bufLength);
+ void gtGetLateArgMsg(GenTreePtr call, GenTreePtr arg, int argNum, int listCount, char* bufp, unsigned bufLength);
+ void gtDispArgList(GenTreePtr tree, IndentStack* indentStack);
+ void gtDispFieldSeq(FieldSeqNode* pfsn);
+
+ void gtDispRange(LIR::ReadOnlyRange const& range);
+
+ void gtDispTreeRange(LIR::Range& containingRange, GenTree* tree);
+
+ void gtDispLIRNode(GenTree* node);
+#endif
+
+ // For tree walks
+
+ enum fgWalkResult
+ {
+ WALK_CONTINUE,
+ WALK_SKIP_SUBTREES,
+ WALK_ABORT
+ };
+ struct fgWalkData;
+ typedef fgWalkResult(fgWalkPreFn)(GenTreePtr* pTree, fgWalkData* data);
+ typedef fgWalkResult(fgWalkPostFn)(GenTreePtr* pTree, fgWalkData* data);
+
+#ifdef DEBUG
+ static fgWalkPreFn gtAssertColonCond;
+#endif
+ static fgWalkPreFn gtMarkColonCond;
+ static fgWalkPreFn gtClearColonCond;
+
+ GenTreePtr* gtFindLink(GenTreePtr stmt, GenTreePtr node);
+ bool gtHasCatchArg(GenTreePtr tree);
+ bool gtHasUnmanagedCall(GenTreePtr tree);
+
+ typedef ArrayStack<GenTree*> GenTreeStack;
+
+ static bool gtHasCallOnStack(GenTreeStack* parentStack);
+ void gtCheckQuirkAddrExposedLclVar(GenTreePtr argTree, GenTreeStack* parentStack);
+
+//=========================================================================
+// BasicBlock functions
+#ifdef DEBUG
+ // This is a debug flag we will use to assert when creating block during codegen
+ // as this interferes with procedure splitting. If you know what you're doing, set
+ // it to true before creating the block. (DEBUG only)
+ bool fgSafeBasicBlockCreation;
+#endif
+
+ BasicBlock* bbNewBasicBlock(BBjumpKinds jumpKind);
+
+ /*
+ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+ XX XX
+ XX LclVarsInfo XX
+ XX XX
+ XX The variables to be used by the code generator. XX
+ XX XX
+ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+ */
+
+ //
+ // For both PROMOTION_TYPE_NONE and PROMOTION_TYPE_DEPENDENT the struct will
+ // be placed in the stack frame and it's fields must be laid out sequentially.
+ //
+ // For PROMOTION_TYPE_INDEPENDENT each of the struct's fields is replaced by
+ // a local variable that can be enregistered or placed in the stack frame.
+ // The fields do not need to be laid out sequentially
+ //
+ enum lvaPromotionType
+ {
+ PROMOTION_TYPE_NONE, // The struct local is not promoted
+ PROMOTION_TYPE_INDEPENDENT, // The struct local is promoted,
+ // and its field locals are independent of its parent struct local.
+ PROMOTION_TYPE_DEPENDENT // The struct local is promoted,
+ // but its field locals depend on its parent struct local.
+ };
+
+ static int __cdecl RefCntCmp(const void* op1, const void* op2);
+ static int __cdecl WtdRefCntCmp(const void* op1, const void* op2);
+
+ /*****************************************************************************/
+
+ enum FrameLayoutState
+ {
+ NO_FRAME_LAYOUT,
+ INITIAL_FRAME_LAYOUT,
+ PRE_REGALLOC_FRAME_LAYOUT,
+ REGALLOC_FRAME_LAYOUT,
+ TENTATIVE_FRAME_LAYOUT,
+ FINAL_FRAME_LAYOUT
+ };
+
+public:
+ bool lvaRefCountingStarted; // Set to true when we have started counting the local vars
+ bool lvaLocalVarRefCounted; // Set to true after we have called lvaMarkLocalVars()
+ bool lvaSortAgain; // true: We need to sort the lvaTable
+ bool lvaTrackedFixed; // true: We cannot add new 'tracked' variable
+ unsigned lvaCount; // total number of locals
+
+ unsigned lvaRefCount; // total number of references to locals
+ LclVarDsc* lvaTable; // variable descriptor table
+ unsigned lvaTableCnt; // lvaTable size (>= lvaCount)
+
+ LclVarDsc** lvaRefSorted; // table sorted by refcount
+
+ unsigned short lvaTrackedCount; // actual # of locals being tracked
+ unsigned lvaTrackedCountInSizeTUnits; // min # of size_t's sufficient to hold a bit for all the locals being tracked
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ // Only for AMD64 System V cache the first caller stack homed argument.
+ unsigned lvaFirstStackIncomingArgNum; // First argument with stack slot in the caller.
+#endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+#ifdef DEBUG
+ VARSET_TP lvaTrackedVars; // set of tracked variables
+#endif
+#ifndef _TARGET_64BIT_
+ VARSET_TP lvaLongVars; // set of long (64-bit) variables
+#endif
+ VARSET_TP lvaFloatVars; // set of floating-point (32-bit and 64-bit) variables
+
+ unsigned lvaCurEpoch; // VarSets are relative to a specific set of tracked var indices.
+ // It that changes, this changes. VarSets from different epochs
+ // cannot be meaningfully combined.
+
+ unsigned GetCurLVEpoch()
+ {
+ return lvaCurEpoch;
+ }
+
+ // reverse map of tracked number to var number
+ unsigned lvaTrackedToVarNum[lclMAX_TRACKED];
+
+#ifdef LEGACY_BACKEND
+ // variable interference graph
+ VARSET_TP lvaVarIntf[lclMAX_TRACKED];
+#endif
+
+ // variable preference graph
+ VARSET_TP lvaVarPref[lclMAX_TRACKED];
+
+#if DOUBLE_ALIGN
+#ifdef DEBUG
+ // # of procs compiled a with double-aligned stack
+ static unsigned s_lvaDoubleAlignedProcsCount;
+#endif
+#endif
+
+ // Getters and setters for address-exposed and do-not-enregister local var properties.
+ bool lvaVarAddrExposed(unsigned varNum);
+ void lvaSetVarAddrExposed(unsigned varNum);
+ bool lvaVarDoNotEnregister(unsigned varNum);
+#ifdef DEBUG
+ // Reasons why we can't enregister. Some of these correspond to debug properties of local vars.
+ enum DoNotEnregisterReason
+ {
+ DNER_AddrExposed,
+ DNER_IsStruct,
+ DNER_LocalField,
+ DNER_VMNeedsStackAddr,
+ DNER_LiveInOutOfHandler,
+ DNER_LiveAcrossUnmanagedCall,
+ DNER_BlockOp, // Is read or written via a block operation that explicitly takes the address.
+#ifdef JIT32_GCENCODER
+ DNER_PinningRef,
+#endif
+ };
+#endif
+ void lvaSetVarDoNotEnregister(unsigned varNum DEBUGARG(DoNotEnregisterReason reason));
+
+ unsigned lvaVarargsHandleArg;
+#ifdef _TARGET_X86_
+ unsigned lvaVarargsBaseOfStkArgs; // Pointer (computed based on incoming varargs handle) to the start of the stack
+ // arguments
+#endif // _TARGET_X86_
+
+ unsigned lvaInlinedPInvokeFrameVar; // variable representing the InlinedCallFrame
+ unsigned lvaReversePInvokeFrameVar; // variable representing the reverse PInvoke frame
+#if FEATURE_FIXED_OUT_ARGS
+ unsigned lvaPInvokeFrameRegSaveVar; // variable representing the RegSave for PInvoke inlining.
+#endif
+ unsigned lvaMonAcquired; // boolean variable introduced into in synchronized methods
+ // that tracks whether the lock has been taken
+
+ unsigned lvaArg0Var; // The lclNum of arg0. Normally this will be info.compThisArg.
+ // However, if there is a "ldarga 0" or "starg 0" in the IL,
+ // we will redirect all "ldarg(a) 0" and "starg 0" to this temp.
+
+ unsigned lvaInlineeReturnSpillTemp; // The temp to spill the non-VOID return expression
+ // in case there are multiple BBJ_RETURN blocks in the inlinee.
+
+#if FEATURE_FIXED_OUT_ARGS
+ unsigned lvaOutgoingArgSpaceVar; // dummy TYP_LCLBLK var for fixed outgoing argument space
+ unsigned lvaOutgoingArgSpaceSize; // size of fixed outgoing argument space
+#endif // FEATURE_FIXED_OUT_ARGS
+
+#ifdef _TARGET_ARM_
+ // On architectures whose ABIs allow structs to be passed in registers, struct promotion will sometimes
+ // require us to "rematerialize" a struct from it's separate constituent field variables. Packing several sub-word
+ // field variables into an argument register is a hard problem. It's easier to reserve a word of memory into which
+ // such field can be copied, after which the assembled memory word can be read into the register. We will allocate
+ // this variable to be this scratch word whenever struct promotion occurs.
+ unsigned lvaPromotedStructAssemblyScratchVar;
+#endif // _TARGET_ARM_
+
+#ifdef DEBUG
+ unsigned lvaReturnEspCheck; // confirms ESP not corrupted on return
+ unsigned lvaCallEspCheck; // confirms ESP not corrupted after a call
+#endif
+
+ bool lvaGenericsContextUsed;
+
+ bool lvaKeepAliveAndReportThis(); // Synchronized instance method of a reference type, or
+ // CORINFO_GENERICS_CTXT_FROM_THIS?
+ bool lvaReportParamTypeArg(); // Exceptions and CORINFO_GENERICS_CTXT_FROM_PARAMTYPEARG?
+
+//-------------------------------------------------------------------------
+// All these frame offsets are inter-related and must be kept in sync
+
+#if !FEATURE_EH_FUNCLETS
+ // This is used for the callable handlers
+ unsigned lvaShadowSPslotsVar; // TYP_BLK variable for all the shadow SP slots
+#endif // FEATURE_EH_FUNCLETS
+
+ unsigned lvaCachedGenericContextArgOffs;
+ unsigned lvaCachedGenericContextArgOffset(); // For CORINFO_CALLCONV_PARAMTYPE and if generic context is passed as
+ // THIS pointer
+
+ unsigned lvaLocAllocSPvar; // variable which has the result of the last alloca/localloc
+
+ unsigned lvaNewObjArrayArgs; // variable with arguments for new MD array helper
+
+ // TODO-Review: Prior to reg predict we reserve 24 bytes for Spill temps.
+ // after the reg predict we will use a computed maxTmpSize
+ // which is based upon the number of spill temps predicted by reg predict
+ // All this is necessary because if we under-estimate the size of the spill
+ // temps we could fail when encoding instructions that reference stack offsets for ARM.
+ //
+ // Pre codegen max spill temp size.
+ static const unsigned MAX_SPILL_TEMP_SIZE = 24;
+
+ //-------------------------------------------------------------------------
+
+ unsigned lvaGetMaxSpillTempSize();
+#ifdef _TARGET_ARM_
+ bool lvaIsPreSpilled(unsigned lclNum, regMaskTP preSpillMask);
+#endif // _TARGET_ARM_
+ void lvaAssignFrameOffsets(FrameLayoutState curState);
+ void lvaFixVirtualFrameOffsets();
+
+#ifndef LEGACY_BACKEND
+ void lvaUpdateArgsWithInitialReg();
+#endif // !LEGACY_BACKEND
+
+ void lvaAssignVirtualFrameOffsetsToArgs();
+#ifdef UNIX_AMD64_ABI
+ int lvaAssignVirtualFrameOffsetToArg(unsigned lclNum, unsigned argSize, int argOffs, int* callerArgOffset);
+#else // !UNIX_AMD64_ABI
+ int lvaAssignVirtualFrameOffsetToArg(unsigned lclNum, unsigned argSize, int argOffs);
+#endif // !UNIX_AMD64_ABI
+ void lvaAssignVirtualFrameOffsetsToLocals();
+ int lvaAllocLocalAndSetVirtualOffset(unsigned lclNum, unsigned size, int stkOffs);
+#ifdef _TARGET_AMD64_
+ // Returns true if compCalleeRegsPushed (including RBP if used as frame pointer) is even.
+ bool lvaIsCalleeSavedIntRegCountEven();
+#endif
+ void lvaAlignFrame();
+ void lvaAssignFrameOffsetsToPromotedStructs();
+ int lvaAllocateTemps(int stkOffs, bool mustDoubleAlign);
+
+#ifdef DEBUG
+ void lvaDumpRegLocation(unsigned lclNum);
+ void lvaDumpFrameLocation(unsigned lclNum);
+ void lvaDumpEntry(unsigned lclNum, FrameLayoutState curState, size_t refCntWtdWidth = 6);
+ void lvaTableDump(FrameLayoutState curState = NO_FRAME_LAYOUT); // NO_FRAME_LAYOUT means use the current frame
+ // layout state defined by lvaDoneFrameLayout
+#endif
+
+// Limit frames size to 1GB. The maximum is 2GB in theory - make it intentionally smaller
+// to avoid bugs from borderline cases.
+#define MAX_FrameSize 0x3FFFFFFF
+ void lvaIncrementFrameSize(unsigned size);
+
+ unsigned lvaFrameSize(FrameLayoutState curState);
+
+ // Returns the caller-SP-relative offset for the SP/FP relative offset determined by FP based.
+ int lvaToCallerSPRelativeOffset(int offs, bool isFpBased);
+
+ // Returns the caller-SP-relative offset for the local variable "varNum."
+ int lvaGetCallerSPRelativeOffset(unsigned varNum);
+
+ // Returns the SP-relative offset for the local variable "varNum". Illegal to ask this for functions with localloc.
+ int lvaGetSPRelativeOffset(unsigned varNum);
+
+ int lvaToInitialSPRelativeOffset(unsigned offset, bool isFpBased);
+ int lvaGetInitialSPRelativeOffset(unsigned varNum);
+
+ //------------------------ For splitting types ----------------------------
+
+ void lvaInitTypeRef();
+
+ void lvaInitArgs(InitVarDscInfo* varDscInfo);
+ void lvaInitThisPtr(InitVarDscInfo* varDscInfo);
+ void lvaInitRetBuffArg(InitVarDscInfo* varDscInfo);
+ void lvaInitUserArgs(InitVarDscInfo* varDscInfo);
+ void lvaInitGenericsCtxt(InitVarDscInfo* varDscInfo);
+ void lvaInitVarArgsHandle(InitVarDscInfo* varDscInfo);
+
+ void lvaInitVarDsc(LclVarDsc* varDsc,
+ unsigned varNum,
+ CorInfoType corInfoType,
+ CORINFO_CLASS_HANDLE typeHnd,
+ CORINFO_ARG_LIST_HANDLE varList,
+ CORINFO_SIG_INFO* varSig);
+
+ static unsigned lvaTypeRefMask(var_types type);
+
+ var_types lvaGetActualType(unsigned lclNum);
+ var_types lvaGetRealType(unsigned lclNum);
+
+ //-------------------------------------------------------------------------
+
+ void lvaInit();
+
+ unsigned lvaArgSize(const void* argTok);
+ unsigned lvaLclSize(unsigned varNum);
+ unsigned lvaLclExactSize(unsigned varNum);
+
+ bool lvaLclVarRefs(GenTreePtr tree, GenTreePtr* findPtr, varRefKinds* refsPtr, void* result);
+
+ // Call lvaLclVarRefs on "true"; accumulate "*result" into whichever of
+ // "allVars" and "trkdVars" is indiated by the nullness of "findPtr"; return
+ // the return result.
+ bool lvaLclVarRefsAccum(
+ GenTreePtr tree, GenTreePtr* findPtr, varRefKinds* refsPtr, ALLVARSET_TP* allVars, VARSET_TP* trkdVars);
+
+ // If "findPtr" is non-NULL, assumes "result" is an "ALLVARSET_TP*", and
+ // (destructively) unions "allVars" into "*result". Otherwise, assumes "result" is a "VARSET_TP*",
+ // and (destructively) unions "trkedVars" into "*result".
+ void lvaLclVarRefsAccumIntoRes(GenTreePtr* findPtr,
+ void* result,
+ ALLVARSET_VALARG_TP allVars,
+ VARSET_VALARG_TP trkdVars);
+
+ bool lvaHaveManyLocals() const;
+
+ unsigned lvaGrabTemp(bool shortLifetime DEBUGARG(const char* reason));
+ unsigned lvaGrabTemps(unsigned cnt DEBUGARG(const char* reason));
+ unsigned lvaGrabTempWithImplicitUse(bool shortLifetime DEBUGARG(const char* reason));
+
+ void lvaSortOnly();
+ void lvaSortByRefCount();
+ void lvaDumpRefCounts();
+
+ void lvaMarkLocalVars(BasicBlock* block);
+
+ void lvaMarkLocalVars(); // Local variable ref-counting
+
+ void lvaAllocOutgoingArgSpace(); // 'Commit' lvaOutgoingArgSpaceSize and lvaOutgoingArgSpaceVar
+
+ VARSET_VALRET_TP lvaStmtLclMask(GenTreePtr stmt);
+
+ static fgWalkPreFn lvaIncRefCntsCB;
+ void lvaIncRefCnts(GenTreePtr tree);
+
+ static fgWalkPreFn lvaDecRefCntsCB;
+ void lvaDecRefCnts(GenTreePtr tree);
+ void lvaDecRefCnts(BasicBlock* basicBlock, GenTreePtr tree);
+ void lvaRecursiveDecRefCounts(GenTreePtr tree);
+ void lvaRecursiveIncRefCounts(GenTreePtr tree);
+
+#ifdef DEBUG
+ struct lvaStressLclFldArgs
+ {
+ Compiler* m_pCompiler;
+ bool m_bFirstPass;
+ };
+
+ static fgWalkPreFn lvaStressLclFldCB;
+ void lvaStressLclFld();
+
+ void lvaDispVarSet(VARSET_VALARG_TP set, VARSET_VALARG_TP allVars);
+ void lvaDispVarSet(VARSET_VALARG_TP set);
+
+#endif
+
+#ifdef _TARGET_ARM_
+ int lvaFrameAddress(int varNum, bool mustBeFPBased, regNumber* pBaseReg, int addrModeOffset);
+#else
+ int lvaFrameAddress(int varNum, bool* pFPbased);
+#endif
+
+ bool lvaIsParameter(unsigned varNum);
+ bool lvaIsRegArgument(unsigned varNum);
+ BOOL lvaIsOriginalThisArg(unsigned varNum); // Is this varNum the original this argument?
+ BOOL lvaIsOriginalThisReadOnly(); // return TRUE if there is no place in the code
+ // that writes to arg0
+
+ // Struct parameters that are passed by reference are marked as both lvIsParam and lvIsTemp
+ // (this is an overload of lvIsTemp because there are no temp parameters).
+ // For x64 this is 3, 5, 6, 7, >8 byte structs that are passed by reference.
+ // For ARM64, this is structs larger than 16 bytes that are passed by reference.
+ bool lvaIsImplicitByRefLocal(unsigned varNum)
+ {
+#if defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_)
+ LclVarDsc* varDsc = &(lvaTable[varNum]);
+ if (varDsc->lvIsParam && varDsc->lvIsTemp)
+ {
+ assert((varDsc->lvType == TYP_STRUCT) || (varDsc->lvType == TYP_BYREF));
+ return true;
+ }
+#endif // defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_)
+ return false;
+ }
+
+ // Returns true if this local var is a multireg struct
+ bool lvaIsMultiregStruct(LclVarDsc* varDsc);
+
+ // If the class is a TYP_STRUCT, get/set a class handle describing it
+
+ CORINFO_CLASS_HANDLE lvaGetStruct(unsigned varNum);
+ void lvaSetStruct(unsigned varNum, CORINFO_CLASS_HANDLE typeHnd, bool unsafeValueClsCheck, bool setTypeInfo = true);
+
+#define MAX_NumOfFieldsInPromotableStruct 4 // Maximum number of fields in promotable struct
+
+ // Info about struct fields
+ struct lvaStructFieldInfo
+ {
+ CORINFO_FIELD_HANDLE fldHnd;
+ unsigned char fldOffset;
+ unsigned char fldOrdinal;
+ var_types fldType;
+ unsigned fldSize;
+ CORINFO_CLASS_HANDLE fldTypeHnd;
+ };
+
+ // Info about struct to be promoted.
+ struct lvaStructPromotionInfo
+ {
+ CORINFO_CLASS_HANDLE typeHnd;
+ bool canPromote;
+ bool requiresScratchVar;
+ bool containsHoles;
+ bool customLayout;
+ unsigned char fieldCnt;
+ lvaStructFieldInfo fields[MAX_NumOfFieldsInPromotableStruct];
+
+ lvaStructPromotionInfo()
+ : typeHnd(nullptr), canPromote(false), requiresScratchVar(false), containsHoles(false), customLayout(false)
+ {
+ }
+ };
+
+ static int __cdecl lvaFieldOffsetCmp(const void* field1, const void* field2);
+ void lvaCanPromoteStructType(CORINFO_CLASS_HANDLE typeHnd,
+ lvaStructPromotionInfo* StructPromotionInfo,
+ bool sortFields);
+ void lvaCanPromoteStructVar(unsigned lclNum, lvaStructPromotionInfo* StructPromotionInfo);
+ void lvaPromoteStructVar(unsigned lclNum, lvaStructPromotionInfo* StructPromotionInfo);
+#if !defined(_TARGET_64BIT_)
+ void lvaPromoteLongVars();
+#endif // !defined(_TARGET_64BIT_)
+ unsigned lvaGetFieldLocal(LclVarDsc* varDsc, unsigned int fldOffset);
+ lvaPromotionType lvaGetPromotionType(const LclVarDsc* varDsc);
+ lvaPromotionType lvaGetPromotionType(unsigned varNum);
+ lvaPromotionType lvaGetParentPromotionType(const LclVarDsc* varDsc);
+ lvaPromotionType lvaGetParentPromotionType(unsigned varNum);
+ bool lvaIsFieldOfDependentlyPromotedStruct(const LclVarDsc* varDsc);
+ bool lvaIsGCTracked(const LclVarDsc* varDsc);
+
+ BYTE* lvaGetGcLayout(unsigned varNum);
+ bool lvaTypeIsGC(unsigned varNum);
+ unsigned lvaGSSecurityCookie; // LclVar number
+ bool lvaTempsHaveLargerOffsetThanVars();
+
+ unsigned lvaSecurityObject; // variable representing the security object on the stack
+ unsigned lvaStubArgumentVar; // variable representing the secret stub argument coming in EAX
+
+#if FEATURE_EH_FUNCLETS
+ unsigned lvaPSPSym; // variable representing the PSPSym
+#endif
+
+ InlineInfo* impInlineInfo;
+ InlineStrategy* m_inlineStrategy;
+
+ // The Compiler* that is the root of the inlining tree of which "this" is a member.
+ Compiler* impInlineRoot();
+
+#if defined(DEBUG) || defined(INLINE_DATA)
+ unsigned __int64 getInlineCycleCount()
+ {
+ return m_compCycles;
+ }
+#endif // defined(DEBUG) || defined(INLINE_DATA)
+
+ bool fgNoStructPromotion; // Set to TRUE to turn off struct promotion for this method.
+ bool fgNoStructParamPromotion; // Set to TRUE to turn off struct promotion for parameters this method.
+
+ //=========================================================================
+ // PROTECTED
+ //=========================================================================
+
+protected:
+//---------------- Local variable ref-counting ----------------------------
+
+#if ASSERTION_PROP
+ BasicBlock* lvaMarkRefsCurBlock;
+ GenTreePtr lvaMarkRefsCurStmt;
+#endif
+ BasicBlock::weight_t lvaMarkRefsWeight;
+
+ static fgWalkPreFn lvaMarkLclRefsCallback;
+ void lvaMarkLclRefs(GenTreePtr tree);
+
+ // Keeps the mapping from SSA #'s to VN's for the implicit "Heap" variable.
+ PerSsaArray lvHeapPerSsaData;
+ unsigned lvHeapNumSsaNames;
+
+public:
+ // Returns the address of the per-Ssa data for "Heap" at the given ssaNum (which is required
+ // not to be the SsaConfig::RESERVED_SSA_NUM, which indicates that the variable is
+ // not an SSA variable).
+ LclSsaVarDsc* GetHeapPerSsaData(unsigned ssaNum)
+ {
+ assert(ssaNum != SsaConfig::RESERVED_SSA_NUM);
+ assert(SsaConfig::RESERVED_SSA_NUM == 0);
+ ssaNum--;
+ assert(ssaNum < lvHeapNumSsaNames);
+ return &lvHeapPerSsaData.GetRef(ssaNum);
+ }
+
+ /*
+ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+ XX XX
+ XX Importer XX
+ XX XX
+ XX Imports the given method and converts it to semantic trees XX
+ XX XX
+ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+ */
+
+public:
+ void impInit();
+
+ void impImport(BasicBlock* method);
+
+ CORINFO_CLASS_HANDLE impGetRefAnyClass();
+ CORINFO_CLASS_HANDLE impGetRuntimeArgumentHandle();
+ CORINFO_CLASS_HANDLE impGetTypeHandleClass();
+ CORINFO_CLASS_HANDLE impGetStringClass();
+ CORINFO_CLASS_HANDLE impGetObjectClass();
+
+ //=========================================================================
+ // PROTECTED
+ //=========================================================================
+
+protected:
+ //-------------------- Stack manipulation ---------------------------------
+
+ unsigned impStkSize; // Size of the full stack
+
+#define SMALL_STACK_SIZE 16 // number of elements in impSmallStack
+
+ StackEntry impSmallStack[SMALL_STACK_SIZE]; // Use this array if possible
+
+ struct SavedStack // used to save/restore stack contents.
+ {
+ unsigned ssDepth; // number of values on stack
+ StackEntry* ssTrees; // saved tree values
+ };
+
+ bool impIsPrimitive(CorInfoType type);
+ bool impILConsumesAddr(const BYTE* codeAddr, CORINFO_METHOD_HANDLE fncHandle, CORINFO_MODULE_HANDLE scpHandle);
+
+ void impResolveToken(const BYTE* addr, CORINFO_RESOLVED_TOKEN* pResolvedToken, CorInfoTokenKind kind);
+ void impPushOnStackNoType(GenTreePtr tree);
+
+ void impPushOnStack(GenTreePtr tree, typeInfo ti);
+ void impPushNullObjRefOnStack();
+ StackEntry impPopStack();
+ StackEntry impPopStack(CORINFO_CLASS_HANDLE& structTypeRet);
+ GenTreePtr impPopStack(typeInfo& ti);
+ StackEntry& impStackTop(unsigned n = 0);
+
+ void impSaveStackState(SavedStack* savePtr, bool copy);
+ void impRestoreStackState(SavedStack* savePtr);
+
+ GenTreePtr impImportLdvirtftn(GenTreePtr thisPtr,
+ CORINFO_RESOLVED_TOKEN* pResolvedToken,
+ CORINFO_CALL_INFO* pCallInfo);
+
+ void impImportAndPushBox(CORINFO_RESOLVED_TOKEN* pResolvedToken);
+
+ void impImportNewObjArray(CORINFO_RESOLVED_TOKEN* pResolvedToken, CORINFO_CALL_INFO* pCallInfo);
+
+ bool impCanPInvokeInline(var_types callRetTyp);
+ bool impCanPInvokeInlineCallSite(var_types callRetTyp);
+ void impCheckForPInvokeCall(GenTreePtr call, CORINFO_METHOD_HANDLE methHnd, CORINFO_SIG_INFO* sig, unsigned mflags);
+ GenTreePtr impImportIndirectCall(CORINFO_SIG_INFO* sig, IL_OFFSETX ilOffset = BAD_IL_OFFSET);
+ void impPopArgsForUnmanagedCall(GenTreePtr call, CORINFO_SIG_INFO* sig);
+
+ void impInsertHelperCall(CORINFO_HELPER_DESC* helperCall);
+ void impHandleAccessAllowed(CorInfoIsAccessAllowedResult result, CORINFO_HELPER_DESC* helperCall);
+ void impHandleAccessAllowedInternal(CorInfoIsAccessAllowedResult result, CORINFO_HELPER_DESC* helperCall);
+
+ void impInsertCalloutForDelegate(CORINFO_METHOD_HANDLE callerMethodHnd,
+ CORINFO_METHOD_HANDLE calleeMethodHnd,
+ CORINFO_CLASS_HANDLE delegateTypeHnd);
+
+ var_types impImportCall(OPCODE opcode,
+ CORINFO_RESOLVED_TOKEN* pResolvedToken,
+ CORINFO_RESOLVED_TOKEN* pConstrainedResolvedToken, // Is this a "constrained." call on a
+ // type parameter?
+ GenTreePtr newobjThis,
+ int prefixFlags,
+ CORINFO_CALL_INFO* callInfo,
+ IL_OFFSET rawILOffset);
+
+ bool impMethodInfo_hasRetBuffArg(CORINFO_METHOD_INFO* methInfo);
+
+ GenTreePtr impFixupCallStructReturn(GenTreePtr call, CORINFO_CLASS_HANDLE retClsHnd);
+
+ GenTreePtr impInitCallLongReturn(GenTreePtr call);
+
+ GenTreePtr impFixupStructReturnType(GenTreePtr op, CORINFO_CLASS_HANDLE retClsHnd);
+
+#ifdef DEBUG
+ var_types impImportJitTestLabelMark(int numArgs);
+#endif // DEBUG
+
+ GenTreePtr impInitClass(CORINFO_RESOLVED_TOKEN* pResolvedToken);
+
+ GenTreePtr impImportStaticReadOnlyField(void* fldAddr, var_types lclTyp);
+
+ GenTreePtr impImportStaticFieldAccess(CORINFO_RESOLVED_TOKEN* pResolvedToken,
+ CORINFO_ACCESS_FLAGS access,
+ CORINFO_FIELD_INFO* pFieldInfo,
+ var_types lclTyp);
+
+ static void impBashVarAddrsToI(GenTreePtr tree1, GenTreePtr tree2 = nullptr);
+
+ GenTreePtr impImplicitIorI4Cast(GenTreePtr tree, var_types dstTyp);
+
+ GenTreePtr impImplicitR4orR8Cast(GenTreePtr tree, var_types dstTyp);
+
+ void impImportLeave(BasicBlock* block);
+ void impResetLeaveBlock(BasicBlock* block, unsigned jmpAddr);
+ BOOL impLocAllocOnStack();
+ GenTreePtr impIntrinsic(CORINFO_CLASS_HANDLE clsHnd,
+ CORINFO_METHOD_HANDLE method,
+ CORINFO_SIG_INFO* sig,
+ int memberRef,
+ bool readonlyCall,
+ bool tailCall,
+ CorInfoIntrinsics* pIntrinsicID);
+ GenTreePtr impArrayAccessIntrinsic(CORINFO_CLASS_HANDLE clsHnd,
+ CORINFO_SIG_INFO* sig,
+ int memberRef,
+ bool readonlyCall,
+ CorInfoIntrinsics intrinsicID);
+ GenTreePtr impInitializeArrayIntrinsic(CORINFO_SIG_INFO* sig);
+
+ GenTreePtr impMethodPointer(CORINFO_RESOLVED_TOKEN* pResolvedToken, CORINFO_CALL_INFO* pCallInfo);
+
+ GenTreePtr impTransformThis(GenTreePtr thisPtr,
+ CORINFO_RESOLVED_TOKEN* pConstrainedResolvedToken,
+ CORINFO_THIS_TRANSFORM transform);
+
+ //----------------- Manipulating the trees and stmts ----------------------
+
+ GenTreePtr impTreeList; // Trees for the BB being imported
+ GenTreePtr impTreeLast; // The last tree for the current BB
+
+ enum
+ {
+ CHECK_SPILL_ALL = -1,
+ CHECK_SPILL_NONE = -2
+ };
+
+public:
+ void impBeginTreeList();
+ void impEndTreeList(BasicBlock* block, GenTreePtr firstStmt, GenTreePtr lastStmt);
+ void impEndTreeList(BasicBlock* block);
+ void impAppendStmtCheck(GenTreePtr stmt, unsigned chkLevel);
+ void impAppendStmt(GenTreePtr stmt, unsigned chkLevel);
+ void impInsertStmtBefore(GenTreePtr stmt, GenTreePtr stmtBefore);
+ GenTreePtr impAppendTree(GenTreePtr tree, unsigned chkLevel, IL_OFFSETX offset);
+ void impInsertTreeBefore(GenTreePtr tree, IL_OFFSETX offset, GenTreePtr stmtBefore);
+ void impAssignTempGen(unsigned tmp,
+ GenTreePtr val,
+ unsigned curLevel,
+ GenTreePtr* pAfterStmt = nullptr,
+ IL_OFFSETX ilOffset = BAD_IL_OFFSET,
+ BasicBlock* block = nullptr);
+ void impAssignTempGen(unsigned tmpNum,
+ GenTreePtr val,
+ CORINFO_CLASS_HANDLE structHnd,
+ unsigned curLevel,
+ GenTreePtr* pAfterStmt = nullptr,
+ IL_OFFSETX ilOffset = BAD_IL_OFFSET,
+ BasicBlock* block = nullptr);
+ GenTreePtr impCloneExpr(GenTreePtr tree,
+ GenTreePtr* clone,
+ CORINFO_CLASS_HANDLE structHnd,
+ unsigned curLevel,
+ GenTreePtr* pAfterStmt DEBUGARG(const char* reason));
+ GenTreePtr impAssignStruct(GenTreePtr dest,
+ GenTreePtr src,
+ CORINFO_CLASS_HANDLE structHnd,
+ unsigned curLevel,
+ GenTreePtr* pAfterStmt = nullptr,
+ BasicBlock* block = nullptr);
+ GenTreePtr impAssignStructPtr(GenTreePtr dest,
+ GenTreePtr src,
+ CORINFO_CLASS_HANDLE structHnd,
+ unsigned curLevel,
+ GenTreePtr* pAfterStmt = nullptr,
+ BasicBlock* block = nullptr);
+
+ GenTreePtr impGetStructAddr(GenTreePtr structVal,
+ CORINFO_CLASS_HANDLE structHnd,
+ unsigned curLevel,
+ bool willDeref);
+
+ var_types impNormStructType(CORINFO_CLASS_HANDLE structHnd,
+ BYTE* gcLayout = nullptr,
+ unsigned* numGCVars = nullptr,
+ var_types* simdBaseType = nullptr);
+
+ GenTreePtr impNormStructVal(GenTreePtr structVal,
+ CORINFO_CLASS_HANDLE structHnd,
+ unsigned curLevel,
+ bool forceNormalization = false);
+
+ GenTreePtr impTokenToHandle(CORINFO_RESOLVED_TOKEN* pResolvedToken,
+ BOOL* pRuntimeLookup = nullptr,
+ BOOL mustRestoreHandle = FALSE,
+ BOOL importParent = FALSE);
+
+ GenTreePtr impParentClassTokenToHandle(CORINFO_RESOLVED_TOKEN* pResolvedToken,
+ BOOL* pRuntimeLookup = nullptr,
+ BOOL mustRestoreHandle = FALSE)
+ {
+ return impTokenToHandle(pResolvedToken, pRuntimeLookup, mustRestoreHandle, TRUE);
+ }
+
+ GenTreePtr impLookupToTree(CORINFO_RESOLVED_TOKEN* pResolvedToken,
+ CORINFO_LOOKUP* pLookup,
+ unsigned flags,
+ void* compileTimeHandle);
+
+ GenTreePtr impRuntimeLookupToTree(CORINFO_RESOLVED_TOKEN* pResolvedToken,
+ CORINFO_LOOKUP* pLookup,
+ void* compileTimeHandle);
+
+ GenTreePtr impReadyToRunLookupToTree(CORINFO_CONST_LOOKUP* pLookup, unsigned flags, void* compileTimeHandle);
+
+ GenTreePtr impReadyToRunHelperToTree(CORINFO_RESOLVED_TOKEN* pResolvedToken,
+ CorInfoHelpFunc helper,
+ var_types type,
+ GenTreeArgList* arg = nullptr,
+ CORINFO_LOOKUP_KIND* pGenericLookupKind = nullptr);
+
+ GenTreePtr impCastClassOrIsInstToTree(GenTreePtr op1,
+ GenTreePtr op2,
+ CORINFO_RESOLVED_TOKEN* pResolvedToken,
+ bool isCastClass);
+
+ bool VarTypeIsMultiByteAndCanEnreg(var_types type,
+ CORINFO_CLASS_HANDLE typeClass,
+ unsigned* typeSize,
+ bool forReturn);
+
+ static bool IsIntrinsicImplementedByUserCall(CorInfoIntrinsics intrinsicId);
+ static bool IsTargetIntrinsic(CorInfoIntrinsics intrinsicId);
+ static bool IsMathIntrinsic(CorInfoIntrinsics intrinsicId);
+ static bool IsMathIntrinsic(GenTreePtr tree);
+
+private:
+ //----------------- Importing the method ----------------------------------
+
+ CORINFO_CONTEXT_HANDLE impTokenLookupContextHandle; // The context used for looking up tokens.
+
+#ifdef DEBUG
+ unsigned impCurOpcOffs;
+ const char* impCurOpcName;
+ bool impNestedStackSpill;
+
+ // For displaying instrs with generated native code (-n:B)
+ GenTreePtr impLastILoffsStmt; // oldest stmt added for which we did not gtStmtLastILoffs
+ void impNoteLastILoffs();
+#endif
+
+ /* IL offset of the stmt currently being imported. It gets set to
+ BAD_IL_OFFSET after it has been set in the appended trees. Then it gets
+ updated at IL offsets for which we have to report mapping info.
+ It also includes flag bits, so use jitGetILoffs()
+ to get the actual IL offset value.
+ */
+
+ IL_OFFSETX impCurStmtOffs;
+ void impCurStmtOffsSet(IL_OFFSET offs);
+
+ void impNoteBranchOffs();
+
+ unsigned impInitBlockLineInfo();
+
+ GenTreePtr impCheckForNullPointer(GenTreePtr obj);
+ bool impIsThis(GenTreePtr obj);
+ bool impIsLDFTN_TOKEN(const BYTE* delegateCreateStart, const BYTE* newobjCodeAddr);
+ bool impIsDUP_LDVIRTFTN_TOKEN(const BYTE* delegateCreateStart, const BYTE* newobjCodeAddr);
+ bool impIsAnySTLOC(OPCODE opcode)
+ {
+ return ((opcode == CEE_STLOC) || (opcode == CEE_STLOC_S) ||
+ ((opcode >= CEE_STLOC_0) && (opcode <= CEE_STLOC_3)));
+ }
+
+ GenTreeArgList* impPopList(unsigned count,
+ unsigned* flagsPtr,
+ CORINFO_SIG_INFO* sig,
+ GenTreeArgList* prefixTree = nullptr);
+
+ GenTreeArgList* impPopRevList(unsigned count,
+ unsigned* flagsPtr,
+ CORINFO_SIG_INFO* sig,
+ unsigned skipReverseCount = 0);
+
+ /*
+ * Get current IL offset with stack-empty info incoporated
+ */
+ IL_OFFSETX impCurILOffset(IL_OFFSET offs, bool callInstruction = false);
+
+ //---------------- Spilling the importer stack ----------------------------
+
+ struct PendingDsc
+ {
+ PendingDsc* pdNext;
+ BasicBlock* pdBB;
+ SavedStack pdSavedStack;
+ ThisInitState pdThisPtrInit;
+ };
+
+ PendingDsc* impPendingList; // list of BBs currently waiting to be imported.
+ PendingDsc* impPendingFree; // Freed up dscs that can be reused
+
+ // We keep a byte-per-block map (dynamically extended) in the top-level Compiler object of a compilation.
+ ExpandArray<BYTE> impPendingBlockMembers;
+
+ // Return the byte for "b" (allocating/extending impPendingBlockMembers if necessary.)
+ // Operates on the map in the top-level ancestor.
+ BYTE impGetPendingBlockMember(BasicBlock* blk)
+ {
+ return impInlineRoot()->impPendingBlockMembers.Get(blk->bbInd());
+ }
+
+ // Set the byte for "b" to "val" (allocating/extending impPendingBlockMembers if necessary.)
+ // Operates on the map in the top-level ancestor.
+ void impSetPendingBlockMember(BasicBlock* blk, BYTE val)
+ {
+ impInlineRoot()->impPendingBlockMembers.Set(blk->bbInd(), val);
+ }
+
+ bool impCanReimport;
+
+ bool impSpillStackEntry(unsigned level,
+ unsigned varNum
+#ifdef DEBUG
+ ,
+ bool bAssertOnRecursion,
+ const char* reason
+#endif
+ );
+
+ void impSpillStackEnsure(bool spillLeaves = false);
+ void impEvalSideEffects();
+ void impSpillSpecialSideEff();
+ void impSpillSideEffects(bool spillGlobEffects, unsigned chkLevel DEBUGARG(const char* reason));
+ void impSpillValueClasses();
+ void impSpillEvalStack();
+ static fgWalkPreFn impFindValueClasses;
+ void impSpillLclRefs(ssize_t lclNum);
+
+ BasicBlock* impPushCatchArgOnStack(BasicBlock* hndBlk, CORINFO_CLASS_HANDLE clsHnd);
+
+ void impImportBlockCode(BasicBlock* block);
+
+ void impReimportMarkBlock(BasicBlock* block);
+ void impReimportMarkSuccessors(BasicBlock* block);
+
+ void impVerifyEHBlock(BasicBlock* block, bool isTryStart);
+
+ void impImportBlockPending(BasicBlock* block);
+
+ // Similar to impImportBlockPending, but assumes that block has already been imported once and is being
+ // reimported for some reason. It specifically does *not* look at verCurrentState to set the EntryState
+ // for the block, but instead, just re-uses the block's existing EntryState.
+ void impReimportBlockPending(BasicBlock* block);
+
+ var_types impGetByRefResultType(genTreeOps oper, bool fUnsigned, GenTreePtr* pOp1, GenTreePtr* pOp2);
+
+ void impImportBlock(BasicBlock* block);
+
+ // Assumes that "block" is a basic block that completes with a non-empty stack. We will assign the values
+ // on the stack to local variables (the "spill temp" variables). The successor blocks will assume that
+ // its incoming stack contents are in those locals. This requires "block" and its successors to agree on
+ // the variables that will be used -- and for all the predecessors of those successors, and the
+ // successors of those predecessors, etc. Call such a set of blocks closed under alternating
+ // successor/predecessor edges a "spill clique." A block is a "predecessor" or "successor" member of the
+ // clique (or, conceivably, both). Each block has a specified sequence of incoming and outgoing spill
+ // temps. If "block" already has its outgoing spill temps assigned (they are always a contiguous series
+ // of local variable numbers, so we represent them with the base local variable number), returns that.
+ // Otherwise, picks a set of spill temps, and propagates this choice to all blocks in the spill clique of
+ // which "block" is a member (asserting, in debug mode, that no block in this clique had its spill temps
+ // chosen already. More precisely, that the incoming or outgoing spill temps are not chosen, depending
+ // on which kind of member of the clique the block is).
+ unsigned impGetSpillTmpBase(BasicBlock* block);
+
+ // Assumes that "block" is a basic block that completes with a non-empty stack. We have previously
+ // assigned the values on the stack to local variables (the "spill temp" variables). The successor blocks
+ // will assume that its incoming stack contents are in those locals. This requires "block" and its
+ // successors to agree on the variables and their types that will be used. The CLI spec allows implicit
+ // conversions between 'int' and 'native int' or 'float' and 'double' stack types. So one predecessor can
+ // push an int and another can push a native int. For 64-bit we have chosen to implement this by typing
+ // the "spill temp" as native int, and then importing (or re-importing as needed) so that all the
+ // predecessors in the "spill clique" push a native int (sign-extending if needed), and all the
+ // successors receive a native int. Similarly float and double are unified to double.
+ // This routine is called after a type-mismatch is detected, and it will walk the spill clique to mark
+ // blocks for re-importation as appropriate (both successors, so they get the right incoming type, and
+ // predecessors, so they insert an upcast if needed).
+ void impReimportSpillClique(BasicBlock* block);
+
+ // When we compute a "spill clique" (see above) these byte-maps are allocated to have a byte per basic
+ // block, and represent the predecessor and successor members of the clique currently being computed.
+ // *** Access to these will need to be locked in a parallel compiler.
+ ExpandArray<BYTE> impSpillCliquePredMembers;
+ ExpandArray<BYTE> impSpillCliqueSuccMembers;
+
+ enum SpillCliqueDir
+ {
+ SpillCliquePred,
+ SpillCliqueSucc
+ };
+
+ // Abstract class for receiving a callback while walking a spill clique
+ class SpillCliqueWalker
+ {
+ public:
+ virtual void Visit(SpillCliqueDir predOrSucc, BasicBlock* blk) = 0;
+ };
+
+ // This class is used for setting the bbStkTempsIn and bbStkTempsOut on the blocks within a spill clique
+ class SetSpillTempsBase : public SpillCliqueWalker
+ {
+ unsigned m_baseTmp;
+
+ public:
+ SetSpillTempsBase(unsigned baseTmp) : m_baseTmp(baseTmp)
+ {
+ }
+ virtual void Visit(SpillCliqueDir predOrSucc, BasicBlock* blk);
+ };
+
+ // This class is used for implementing impReimportSpillClique part on each block within the spill clique
+ class ReimportSpillClique : public SpillCliqueWalker
+ {
+ Compiler* m_pComp;
+
+ public:
+ ReimportSpillClique(Compiler* pComp) : m_pComp(pComp)
+ {
+ }
+ virtual void Visit(SpillCliqueDir predOrSucc, BasicBlock* blk);
+ };
+
+ // This is the heart of the algorithm for walking spill cliques. It invokes callback->Visit for each
+ // predecessor or successor within the spill clique
+ void impWalkSpillCliqueFromPred(BasicBlock* pred, SpillCliqueWalker* callback);
+
+ // For a BasicBlock that has already been imported, the EntryState has an array of GenTrees for the
+ // incoming locals. This walks that list an resets the types of the GenTrees to match the types of
+ // the VarDscs. They get out of sync when we have int/native int issues (see impReimportSpillClique).
+ void impRetypeEntryStateTemps(BasicBlock* blk);
+
+ BYTE impSpillCliqueGetMember(SpillCliqueDir predOrSucc, BasicBlock* blk);
+ void impSpillCliqueSetMember(SpillCliqueDir predOrSucc, BasicBlock* blk, BYTE val);
+
+ void impPushVar(GenTree* op, typeInfo tiRetVal);
+ void impLoadVar(unsigned lclNum, IL_OFFSET offset, typeInfo tiRetVal);
+ void impLoadVar(unsigned lclNum, IL_OFFSET offset)
+ {
+ impLoadVar(lclNum, offset, lvaTable[lclNum].lvVerTypeInfo);
+ }
+ void impLoadArg(unsigned ilArgNum, IL_OFFSET offset);
+ void impLoadLoc(unsigned ilLclNum, IL_OFFSET offset);
+ bool impReturnInstruction(BasicBlock* block, int prefixFlags, OPCODE& opcode);
+
+#ifdef _TARGET_ARM_
+ void impMarkLclDstNotPromotable(unsigned tmpNum, GenTreePtr op, CORINFO_CLASS_HANDLE hClass);
+#endif
+
+ // A free list of linked list nodes used to represent to-do stacks of basic blocks.
+ struct BlockListNode
+ {
+ BasicBlock* m_blk;
+ BlockListNode* m_next;
+ BlockListNode(BasicBlock* blk, BlockListNode* next = nullptr) : m_blk(blk), m_next(next)
+ {
+ }
+ void* operator new(size_t sz, Compiler* comp);
+ };
+ BlockListNode* impBlockListNodeFreeList;
+
+ BlockListNode* AllocBlockListNode();
+ void FreeBlockListNode(BlockListNode* node);
+
+ bool impIsValueType(typeInfo* pTypeInfo);
+ var_types mangleVarArgsType(var_types type);
+
+#if FEATURE_VARARG
+ regNumber getCallArgIntRegister(regNumber floatReg);
+ regNumber getCallArgFloatRegister(regNumber intReg);
+#endif // FEATURE_VARARG
+
+#if defined(DEBUG)
+ static unsigned jitTotalMethodCompiled;
+#endif
+
+#ifdef DEBUG
+ static LONG jitNestingLevel;
+#endif // DEBUG
+
+ bool seenConditionalJump;
+
+ static BOOL impIsAddressInLocal(GenTreePtr tree, GenTreePtr* lclVarTreeOut);
+
+ void impMakeDiscretionaryInlineObservations(InlineInfo* pInlineInfo, InlineResult* inlineResult);
+
+ // STATIC inlining decision based on the IL code.
+ void impCanInlineIL(CORINFO_METHOD_HANDLE fncHandle,
+ CORINFO_METHOD_INFO* methInfo,
+ bool forceInline,
+ InlineResult* inlineResult);
+
+ void impCheckCanInline(GenTreePtr call,
+ CORINFO_METHOD_HANDLE fncHandle,
+ unsigned methAttr,
+ CORINFO_CONTEXT_HANDLE exactContextHnd,
+ InlineCandidateInfo** ppInlineCandidateInfo,
+ InlineResult* inlineResult);
+
+ void impInlineRecordArgInfo(InlineInfo* pInlineInfo,
+ GenTreePtr curArgVal,
+ unsigned argNum,
+ InlineResult* inlineResult);
+
+ void impInlineInitVars(InlineInfo* pInlineInfo);
+
+ unsigned impInlineFetchLocal(unsigned lclNum DEBUGARG(const char* reason));
+
+ GenTreePtr impInlineFetchArg(unsigned lclNum, InlArgInfo* inlArgInfo, InlLclVarInfo* lclTypeInfo);
+
+ BOOL impInlineIsThis(GenTreePtr tree, InlArgInfo* inlArgInfo);
+
+ BOOL impInlineIsGuaranteedThisDerefBeforeAnySideEffects(GenTreePtr additionalTreesToBeEvaluatedBefore,
+ GenTreePtr variableBeingDereferenced,
+ InlArgInfo* inlArgInfo);
+
+ void impMarkInlineCandidate(GenTreePtr call, CORINFO_CONTEXT_HANDLE exactContextHnd, CORINFO_CALL_INFO* callInfo);
+
+ bool impTailCallRetTypeCompatible(var_types callerRetType,
+ CORINFO_CLASS_HANDLE callerRetTypeClass,
+ var_types calleeRetType,
+ CORINFO_CLASS_HANDLE calleeRetTypeClass);
+
+ bool impIsTailCallILPattern(bool tailPrefixed,
+ OPCODE curOpcode,
+ const BYTE* codeAddrOfNextOpcode,
+ const BYTE* codeEnd,
+ bool isRecursive,
+ bool* IsCallPopRet = nullptr);
+
+ bool impIsImplicitTailCallCandidate(
+ OPCODE curOpcode, const BYTE* codeAddrOfNextOpcode, const BYTE* codeEnd, int prefixFlags, bool isRecursive);
+
+ /*
+ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+ XX XX
+ XX FlowGraph XX
+ XX XX
+ XX Info about the basic-blocks, their contents and the flow analysis XX
+ XX XX
+ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+ */
+
+public:
+ BasicBlock* fgFirstBB; // Beginning of the basic block list
+ BasicBlock* fgLastBB; // End of the basic block list
+ BasicBlock* fgFirstColdBlock; // First block to be placed in the cold section
+#if FEATURE_EH_FUNCLETS
+ BasicBlock* fgFirstFuncletBB; // First block of outlined funclets (to allow block insertion before the funclets)
+#endif
+ BasicBlock* fgFirstBBScratch; // Block inserted for initialization stuff. Is nullptr if no such block has been
+ // created.
+ BasicBlockList* fgReturnBlocks; // list of BBJ_RETURN blocks
+ unsigned fgEdgeCount; // # of control flow edges between the BBs
+ unsigned fgBBcount; // # of BBs in the method
+#ifdef DEBUG
+ unsigned fgBBcountAtCodegen; // # of BBs in the method at the start of codegen
+#endif
+ unsigned fgBBNumMax; // The max bbNum that has been assigned to basic blocks
+ unsigned fgDomBBcount; // # of BBs for which we have dominator and reachability information
+ BasicBlock** fgBBInvPostOrder; // The flow graph stored in an array sorted in topological order, needed to compute
+ // dominance. Indexed by block number. Size: fgBBNumMax + 1.
+
+ // After the dominance tree is computed, we cache a DFS preorder number and DFS postorder number to compute
+ // dominance queries in O(1). fgDomTreePreOrder and fgDomTreePostOrder are arrays giving the block's preorder and
+ // postorder number, respectively. The arrays are indexed by basic block number. (Note that blocks are numbered
+ // starting from one. Thus, we always waste element zero. This makes debugging easier and makes the code less likely
+ // to suffer from bugs stemming from forgetting to add or subtract one from the block number to form an array
+ // index). The arrays are of size fgBBNumMax + 1.
+ unsigned* fgDomTreePreOrder;
+ unsigned* fgDomTreePostOrder;
+
+ bool fgBBVarSetsInited;
+
+ // Allocate array like T* a = new T[fgBBNumMax + 1];
+ // Using helper so we don't keep forgetting +1.
+ template <typename T>
+ T* fgAllocateTypeForEachBlk(CompMemKind cmk = CMK_Unknown)
+ {
+ return (T*)compGetMem((fgBBNumMax + 1) * sizeof(T), cmk);
+ }
+
+ // BlockSets are relative to a specific set of BasicBlock numbers. If that changes
+ // (if the blocks are renumbered), this changes. BlockSets from different epochs
+ // cannot be meaningfully combined. Note that new blocks can be created with higher
+ // block numbers without changing the basic block epoch. These blocks *cannot*
+ // participate in a block set until the blocks are all renumbered, causing the epoch
+ // to change. This is useful if continuing to use previous block sets is valuable.
+ // If the epoch is zero, then it is uninitialized, and block sets can't be used.
+ unsigned fgCurBBEpoch;
+
+ unsigned GetCurBasicBlockEpoch()
+ {
+ return fgCurBBEpoch;
+ }
+
+ // The number of basic blocks in the current epoch. When the blocks are renumbered,
+ // this is fgBBcount. As blocks are added, fgBBcount increases, fgCurBBEpochSize remains
+ // the same, until a new BasicBlock epoch is created, such as when the blocks are all renumbered.
+ unsigned fgCurBBEpochSize;
+
+ // The number of "size_t" elements required to hold a bitset large enough for fgCurBBEpochSize
+ // bits. This is precomputed to avoid doing math every time BasicBlockBitSetTraits::GetArrSize() is called.
+ unsigned fgBBSetCountInSizeTUnits;
+
+ void NewBasicBlockEpoch()
+ {
+ INDEBUG(unsigned oldEpochArrSize = fgBBSetCountInSizeTUnits);
+
+ // We have a new epoch. Compute and cache the size needed for new BlockSets.
+ fgCurBBEpoch++;
+ fgCurBBEpochSize = fgBBNumMax + 1;
+ fgBBSetCountInSizeTUnits =
+ unsigned(roundUp(fgCurBBEpochSize, sizeof(size_t) * 8)) / unsigned(sizeof(size_t) * 8);
+
+#ifdef DEBUG
+ // All BlockSet objects are now invalid!
+ fgReachabilitySetsValid = false; // the bbReach sets are now invalid!
+ fgEnterBlksSetValid = false; // the fgEnterBlks set is now invalid!
+
+ if (verbose)
+ {
+ unsigned epochArrSize = BasicBlockBitSetTraits::GetArrSize(this, sizeof(size_t));
+ printf("\nNew BlockSet epoch %d, # of blocks (including unused BB00): %u, bitset array size: %u (%s)",
+ fgCurBBEpoch, fgCurBBEpochSize, epochArrSize, (epochArrSize <= 1) ? "short" : "long");
+ if ((fgCurBBEpoch != 1) && ((oldEpochArrSize <= 1) != (epochArrSize <= 1)))
+ {
+ // If we're not just establishing the first epoch, and the epoch array size has changed such that we're
+ // going to change our bitset representation from short (just a size_t bitset) to long (a pointer to an
+ // array of size_t bitsets), then print that out.
+ printf("; NOTE: BlockSet size was previously %s!", (oldEpochArrSize <= 1) ? "short" : "long");
+ }
+ printf("\n");
+ }
+#endif // DEBUG
+ }
+
+ void EnsureBasicBlockEpoch()
+ {
+ if (fgCurBBEpochSize != fgBBNumMax + 1)
+ {
+ NewBasicBlockEpoch();
+ }
+ }
+
+ BasicBlock* fgNewBasicBlock(BBjumpKinds jumpKind);
+ void fgEnsureFirstBBisScratch();
+ bool fgFirstBBisScratch();
+ bool fgBBisScratch(BasicBlock* block);
+
+ void fgExtendEHRegionBefore(BasicBlock* block);
+ void fgExtendEHRegionAfter(BasicBlock* block);
+
+ BasicBlock* fgNewBBbefore(BBjumpKinds jumpKind, BasicBlock* block, bool extendRegion);
+
+ BasicBlock* fgNewBBafter(BBjumpKinds jumpKind, BasicBlock* block, bool extendRegion);
+
+ BasicBlock* fgNewBBinRegion(BBjumpKinds jumpKind,
+ unsigned tryIndex,
+ unsigned hndIndex,
+ BasicBlock* nearBlk,
+ bool putInFilter = false,
+ bool runRarely = false,
+ bool insertAtEnd = false);
+
+ BasicBlock* fgNewBBinRegion(BBjumpKinds jumpKind,
+ BasicBlock* srcBlk,
+ bool runRarely = false,
+ bool insertAtEnd = false);
+
+ BasicBlock* fgNewBBinRegion(BBjumpKinds jumpKind);
+
+ BasicBlock* fgNewBBinRegionWorker(BBjumpKinds jumpKind,
+ BasicBlock* afterBlk,
+ unsigned xcptnIndex,
+ bool putInTryRegion);
+
+ void fgInsertBBbefore(BasicBlock* insertBeforeBlk, BasicBlock* newBlk);
+ void fgInsertBBafter(BasicBlock* insertAfterBlk, BasicBlock* newBlk);
+ void fgUnlinkBlock(BasicBlock* block);
+
+#if OPT_BOOL_OPS // Used to detect multiple logical "not" assignments.
+ bool fgMultipleNots;
+#endif
+
+ bool fgModified; // True if the flow graph has been modified recently
+ bool fgComputePredsDone; // Have we computed the bbPreds list
+ bool fgCheapPredsValid; // Is the bbCheapPreds list valid?
+ bool fgDomsComputed; // Have we computed the dominator sets?
+
+ bool fgHasSwitch; // any BBJ_SWITCH jumps?
+ bool fgHasPostfix; // any postfix ++/-- found?
+ unsigned fgIncrCount; // number of increment nodes found
+
+ BlockSet fgEnterBlks; // Set of blocks which have a special transfer of control; the "entry" blocks plus EH handler
+ // begin blocks.
+
+#ifdef DEBUG
+ bool fgReachabilitySetsValid; // Are the bbReach sets valid?
+ bool fgEnterBlksSetValid; // Is the fgEnterBlks set valid?
+#endif // DEBUG
+
+ bool fgRemoveRestOfBlock; // true if we know that we will throw
+ bool fgStmtRemoved; // true if we remove statements -> need new DFA
+
+ // There are two modes for ordering of the trees.
+ // - In FGOrderTree, the dominant ordering is the tree order, and the nodes contained in
+ // each tree and sub-tree are contiguous, and can be traversed (in gtNext/gtPrev order)
+ // by traversing the tree according to the order of the operands.
+ // - In FGOrderLinear, the dominant ordering is the linear order.
+
+ enum FlowGraphOrder
+ {
+ FGOrderTree,
+ FGOrderLinear
+ };
+ FlowGraphOrder fgOrder;
+
+ // The following are boolean flags that keep track of the state of internal data structures
+
+ bool fgStmtListThreaded;
+ bool fgCanRelocateEHRegions; // true if we are allowed to relocate the EH regions
+ bool fgEdgeWeightsComputed; // true after we have called fgComputeEdgeWeights
+ bool fgHaveValidEdgeWeights; // true if we were successful in computing all of the edge weights
+ bool fgSlopUsedInEdgeWeights; // true if their was some slop used when computing the edge weights
+ bool fgRangeUsedInEdgeWeights; // true if some of the edgeWeight are expressed in Min..Max form
+ bool fgNeedsUpdateFlowGraph; // true if we need to run fgUpdateFlowGraph
+ BasicBlock::weight_t fgCalledWeight; // count of the number of times this method was called
+ // This is derived from the profile data
+ // or is BB_UNITY_WEIGHT when we don't have profile data
+
+#if FEATURE_EH_FUNCLETS
+ bool fgFuncletsCreated; // true if the funclet creation phase has been run
+#endif // FEATURE_EH_FUNCLETS
+
+ bool fgGlobalMorph; // indicates if we are during the global morphing phase
+ // since fgMorphTree can be called from several places
+ bool fgExpandInline; // indicates that we are creating tree for the inliner
+
+ bool impBoxTempInUse; // the temp below is valid and available
+ unsigned impBoxTemp; // a temporary that is used for boxing
+
+#ifdef DEBUG
+ bool jitFallbackCompile; // Are we doing a fallback compile? That is, have we executed a NO_WAY assert,
+ // and we are trying to compile again in a "safer", minopts mode?
+#endif
+
+#if defined(DEBUG)
+ unsigned impInlinedCodeSize;
+#endif
+
+ //-------------------------------------------------------------------------
+
+ void fgInit();
+
+ void fgImport();
+
+ void fgInline();
+
+ GenTreePtr fgGetCritSectOfStaticMethod();
+
+#if !defined(_TARGET_X86_)
+
+ void fgAddSyncMethodEnterExit();
+
+ GenTree* fgCreateMonitorTree(unsigned lvaMonitorBool, unsigned lvaThisVar, BasicBlock* block, bool enter);
+
+ void fgConvertSyncReturnToLeave(BasicBlock* block);
+
+#endif // !_TARGET_X86_
+
+ void fgAddReversePInvokeEnterExit();
+
+ bool fgMoreThanOneReturnBlock();
+
+ // The number of separate return points in the method.
+ unsigned fgReturnCount;
+
+ void fgAddInternal();
+
+ bool fgFoldConditional(BasicBlock* block);
+
+ void fgMorphStmts(BasicBlock* block, bool* mult, bool* lnot, bool* loadw);
+ void fgMorphBlocks();
+
+ bool fgMorphBlockStmt(BasicBlock* block, GenTreePtr stmt DEBUGARG(const char* msg));
+
+ void fgSetOptions();
+
+#ifdef DEBUG
+ static fgWalkPreFn fgAssertNoQmark;
+ void fgPreExpandQmarkChecks(GenTreePtr expr);
+ void fgPostExpandQmarkChecks();
+ static void fgCheckQmarkAllowedForm(GenTreePtr tree);
+#endif
+
+ IL_OFFSET fgFindBlockILOffset(BasicBlock* block);
+
+ BasicBlock* fgSplitBlockAtBeginning(BasicBlock* curr);
+ BasicBlock* fgSplitBlockAtEnd(BasicBlock* curr);
+ BasicBlock* fgSplitBlockAfterStatement(BasicBlock* curr, GenTree* stmt);
+ BasicBlock* fgSplitBlockAfterNode(BasicBlock* curr, GenTree* node); // for LIR
+ BasicBlock* fgSplitEdge(BasicBlock* curr, BasicBlock* succ);
+
+ GenTreeStmt* fgNewStmtFromTree(GenTreePtr tree, BasicBlock* block, IL_OFFSETX offs);
+ GenTreeStmt* fgNewStmtFromTree(GenTreePtr tree);
+ GenTreeStmt* fgNewStmtFromTree(GenTreePtr tree, BasicBlock* block);
+ GenTreeStmt* fgNewStmtFromTree(GenTreePtr tree, IL_OFFSETX offs);
+
+ GenTreePtr fgGetTopLevelQmark(GenTreePtr expr, GenTreePtr* ppDst = nullptr);
+ void fgExpandQmarkForCastInstOf(BasicBlock* block, GenTreePtr stmt);
+ void fgExpandQmarkStmt(BasicBlock* block, GenTreePtr expr);
+ void fgExpandQmarkNodes();
+
+ void fgMorph();
+
+ // Do "simple lowering." This functionality is (conceptually) part of "general"
+ // lowering that is distributed between fgMorph and the lowering phase of LSRA.
+ void fgSimpleLowering();
+
+ bool fgShouldCreateAssignOp(GenTreePtr tree, bool* bReverse);
+
+ GenTreePtr fgInitThisClass();
+
+ GenTreePtr fgGetStaticsCCtorHelper(CORINFO_CLASS_HANDLE cls, CorInfoHelpFunc helper);
+
+ GenTreePtr fgGetSharedCCtor(CORINFO_CLASS_HANDLE cls);
+
+ void fgLocalVarLiveness();
+
+ void fgLocalVarLivenessInit();
+
+#ifdef LEGACY_BACKEND
+ GenTreePtr fgLegacyPerStatementLocalVarLiveness(GenTreePtr startNode, GenTreePtr relopNode, GenTreePtr asgdLclVar);
+#else
+ void fgPerNodeLocalVarLiveness(GenTree* node, GenTree* asgdLclVar);
+ void fgPerStatementLocalVarLiveness(GenTree* node, GenTree* asgdLclVar);
+#endif
+ void fgPerBlockLocalVarLiveness();
+
+ VARSET_VALRET_TP fgGetHandlerLiveVars(BasicBlock* block);
+
+ void fgLiveVarAnalysis(bool updateInternalOnly = false);
+
+ // This is used in the liveness computation, as a temporary. When we use the
+ // arbitrary-length VarSet representation, it is better not to allocate a new one
+ // at each call.
+ VARSET_TP fgMarkIntfUnionVS;
+
+ bool fgMarkIntf(VARSET_VALARG_TP varSet);
+
+ bool fgMarkIntf(VARSET_VALARG_TP varSet1, VARSET_VALARG_TP varSet2);
+
+ void fgUpdateRefCntForClone(BasicBlock* addedToBlock, GenTreePtr clonedTree);
+
+ void fgUpdateRefCntForExtract(GenTreePtr wholeTree, GenTreePtr keptTree);
+
+ void fgComputeLifeCall(VARSET_TP& life, GenTreeCall* call);
+
+ bool fgComputeLifeLocal(VARSET_TP& life, VARSET_TP& keepAliveVars, GenTree* lclVarNode, GenTree* node);
+
+ VARSET_VALRET_TP fgComputeLife(VARSET_VALARG_TP life,
+ GenTreePtr startNode,
+ GenTreePtr endNode,
+ VARSET_VALARG_TP volatileVars,
+ bool* pStmtInfoDirty DEBUGARG(bool* treeModf));
+
+ VARSET_VALRET_TP fgComputeLifeLIR(VARSET_VALARG_TP life, BasicBlock* block, VARSET_VALARG_TP volatileVars);
+
+ bool fgRemoveDeadStore(GenTree** pTree,
+ LclVarDsc* varDsc,
+ VARSET_TP life,
+ bool* doAgain,
+ bool* pStmtInfoDirty DEBUGARG(bool* treeModf));
+
+ bool fgTryRemoveDeadLIRStore(LIR::Range& blockRange, GenTree* node, GenTree** next);
+
+ // For updating liveset during traversal AFTER fgComputeLife has completed
+ VARSET_VALRET_TP fgGetVarBits(GenTreePtr tree);
+ VARSET_VALRET_TP fgUpdateLiveSet(VARSET_VALARG_TP liveSet, GenTreePtr tree);
+
+ // Returns the set of live variables after endTree,
+ // assuming that liveSet is the set of live variables BEFORE tree.
+ // Requires that fgComputeLife has completed, and that tree is in the same
+ // statement as endTree, and that it comes before endTree in execution order
+
+ VARSET_VALRET_TP fgUpdateLiveSet(VARSET_VALARG_TP liveSet, GenTreePtr tree, GenTreePtr endTree)
+ {
+ VARSET_TP VARSET_INIT(this, newLiveSet, liveSet);
+ while (tree != nullptr && tree != endTree->gtNext)
+ {
+ VarSetOps::AssignNoCopy(this, newLiveSet, fgUpdateLiveSet(newLiveSet, tree));
+ tree = tree->gtNext;
+ }
+ assert(tree == endTree->gtNext);
+ return newLiveSet;
+ }
+
+ void fgInterBlockLocalVarLiveness();
+
+ // The presence of "x op= y" operations presents some difficulties for SSA: this is both a use of some SSA name of
+ // "x", and a def of a new SSA name for "x". The tree only has one local variable for "x", so it has to choose
+ // whether to treat that as the use or def. It chooses the "use", and thus the old SSA name. This map allows us
+ // to record/recover the "def" SSA number, given the lcl var node for "x" in such a tree.
+ typedef SimplerHashTable<GenTreePtr, PtrKeyFuncs<GenTree>, unsigned, JitSimplerHashBehavior> NodeToUnsignedMap;
+ NodeToUnsignedMap* m_opAsgnVarDefSsaNums;
+ NodeToUnsignedMap* GetOpAsgnVarDefSsaNums()
+ {
+ if (m_opAsgnVarDefSsaNums == nullptr)
+ {
+ m_opAsgnVarDefSsaNums = new (getAllocator()) NodeToUnsignedMap(getAllocator());
+ }
+ return m_opAsgnVarDefSsaNums;
+ }
+
+ // Requires value numbering phase to have completed. Returns the value number ("gtVN") of the
+ // "tree," EXCEPT in the case of GTF_VAR_USEASG, because the tree node's gtVN member is the
+ // "use" VN. Performs a lookup into the map of (use asg tree -> def VN.) to return the "def's"
+ // VN.
+ inline ValueNum GetUseAsgDefVNOrTreeVN(GenTreePtr tree);
+
+ // Requires that "lcl" has the GTF_VAR_DEF flag set. Returns the SSA number of "lcl".
+ // Except: assumes that lcl is a def, and if it is
+ // a def appearing in "lcl op= rhs" (GTF_VAR_USEASG), looks up and returns the SSA number for the "def",
+ // rather than the "use" SSA number recorded in the tree "lcl".
+ inline unsigned GetSsaNumForLocalVarDef(GenTreePtr lcl);
+
+ // Some assignments assign to a local "indirectly": they are part of a comma expression that takes the address
+ // of the local (or a field thereof), assigns this address to a temp, and uses an indirection of this temp as
+ // the LHS of the assignment. This actually arises in exactly one situation. At the source level we assign one
+ // struct local to another: "s1 = s2". This becomes a copyblk. If "s2" is promoted into field variables "s2f0",
+ // ..."s2fn", then the copyblk will morph to a comma expression that takes the address of "s1" and does field-wise
+ // assignments:
+ // (byref addrS1 = &s1,
+ // *(addrS1 * offsetof(f0)) = s2f0,
+ // ...
+ // *(addrS1 * offsetof(fn)) = s2fn)
+ //
+ // It would be a shame, given the simple form at the source level, to be unable to track the values in the
+ // fields of "s1" after this. But "s1" does not appear in the assignments that modify it. How, then, to
+ // give it SSA names and value numbers?
+ //
+ // The solution is to use the side table described below to annotate each of the field-wise assignments at the
+ // end with an instance of the structure below, whose fields are described in the declaration.
+ struct IndirectAssignmentAnnotation
+ {
+ unsigned m_lclNum; // The local num that is being indirectly assigned.
+ FieldSeqNode* m_fieldSeq; // If the LHS of the struct assignment is itself a struct field dereference,
+ // as in "s0.g = s2", then "m_lclNum" would be "s0", and "m_fieldSeq" would
+ // be the singleton field sequence "g". The individual assignments would
+ // further append the fields of "s.g" to that.
+ bool m_isEntire; // True iff this assignment writes all of m_lclNum. (This can occur if the
+ // structure has a single field).
+ unsigned m_defSsaNum; // The new SSA number of "m_lclNum" after the assignment.
+ unsigned m_useSsaNum; // Only valid if "m_isEntire" is false; if so, the SSA number of "m_lclNum" before the
+ // assignment.
+
+ IndirectAssignmentAnnotation(unsigned lclNum,
+ FieldSeqNode* fldSeq,
+ bool isEntire,
+ unsigned defSsaNum = SsaConfig::RESERVED_SSA_NUM,
+ unsigned useSsaNum = SsaConfig::RESERVED_SSA_NUM)
+ : m_lclNum(lclNum), m_fieldSeq(fldSeq), m_isEntire(isEntire), m_defSsaNum(defSsaNum), m_useSsaNum(useSsaNum)
+ {
+ }
+ };
+ typedef SimplerHashTable<GenTreePtr, PtrKeyFuncs<GenTree>, IndirectAssignmentAnnotation*, JitSimplerHashBehavior>
+ NodeToIndirAssignMap;
+ NodeToIndirAssignMap* m_indirAssignMap;
+ NodeToIndirAssignMap* GetIndirAssignMap()
+ {
+ if (m_indirAssignMap == nullptr)
+ {
+ // Create a CompAllocator that labels sub-structure with CMK_IndirAssignMap, and use that for allocation.
+ IAllocator* ialloc = new (this, CMK_IndirAssignMap) CompAllocator(this, CMK_IndirAssignMap);
+ m_indirAssignMap = new (ialloc) NodeToIndirAssignMap(ialloc);
+ }
+ return m_indirAssignMap;
+ }
+
+ // Performs SSA conversion.
+ void fgSsaBuild();
+
+ // Reset any data structures to the state expected by "fgSsaBuild", so it can be run again.
+ void fgResetForSsa();
+
+ unsigned fgSsaPassesCompleted; // Number of times fgSsaBuild has been run.
+
+ // Returns "true" iff lcl "lclNum" should be excluded from SSA.
+ inline bool fgExcludeFromSsa(unsigned lclNum);
+
+ // The value numbers for this compilation.
+ ValueNumStore* vnStore;
+
+public:
+ ValueNumStore* GetValueNumStore()
+ {
+ return vnStore;
+ }
+
+ // Do value numbering (assign a value number to each
+ // tree node).
+ void fgValueNumber();
+
+ // Updates "fgCurHeap" via the assignment H[elemTypeEq][arrVN][inx][fldSeq] = rhsVN.
+ // Assumes that "elemTypeEq" is the (equivalence class rep) of the array element type.
+ // The 'indType' is the indirection type of the lhs of the assignment and will typically
+ // match the element type of the array or fldSeq. When this type doesn't match
+ // or if the fldSeq is 'NotAField' we invalidate the array contents H[elemTypeEq][arrVN]
+ //
+ void fgValueNumberArrIndexAssign(CORINFO_CLASS_HANDLE elemTypeEq,
+ ValueNum arrVN,
+ ValueNum inxVN,
+ FieldSeqNode* fldSeq,
+ ValueNum rhsVN,
+ var_types indType);
+
+ // Requires that "tree" is a GT_IND marked as an array index, and that its address argument
+ // has been parsed to yield the other input arguments. If evaluation of the address
+ // can raise exceptions, those should be captured in the exception set "excVN."
+ // Assumes that "elemTypeEq" is the (equivalence class rep) of the array element type.
+ // Marks "tree" with the VN for H[elemTypeEq][arrVN][inx][fldSeq] (for the liberal VN; a new unique
+ // VN for the conservative VN.) Also marks the tree's argument as the address of an array element.
+ // The type tree->TypeGet() will typically match the element type of the array or fldSeq.
+ // When this type doesn't match or if the fldSeq is 'NotAField' we return a new unique VN
+ //
+ ValueNum fgValueNumberArrIndexVal(GenTreePtr tree,
+ CORINFO_CLASS_HANDLE elemTypeEq,
+ ValueNum arrVN,
+ ValueNum inxVN,
+ ValueNum excVN,
+ FieldSeqNode* fldSeq);
+
+ // Requires "funcApp" to be a VNF_PtrToArrElem, and "addrXvn" to represent the exception set thrown
+ // by evaluating the array index expression "tree". Returns the value number resulting from
+ // dereferencing the array in the current heap state. If "tree" is non-null, it must be the
+ // "GT_IND" that does the dereference, and it is given the returned value number.
+ ValueNum fgValueNumberArrIndexVal(GenTreePtr tree, struct VNFuncApp* funcApp, ValueNum addrXvn);
+
+ unsigned fgVNPassesCompleted; // Number of times fgValueNumber has been run.
+
+ // Utility functions for fgValueNumber.
+
+ // Perform value-numbering for the trees in "blk". When giving VN's to the SSA
+ // names defined by phi definitions at the start of "blk", "newVNsForPhis" indicates
+ // that these should be given new VN's, irrespective of the values of the LHS.
+ // If "false", then we may assume that all inputs to phi RHS's of such definitions
+ // have already been assigned value numbers; if they are all assigned the *same* value
+ // number, then the LHS SSA name gets the same VN.
+ void fgValueNumberBlock(BasicBlock* blk, bool newVNsForPhis);
+
+ // Requires that "entryBlock" is the entry block of loop "loopNum", and that "loopNum" is the
+ // innermost loop of which "entryBlock" is the entry. Returns the value number that should be
+ // assumed for the heap at the start "entryBlk".
+ ValueNum fgHeapVNForLoopSideEffects(BasicBlock* entryBlock, unsigned loopNum);
+
+ // Called when an operation (performed by "tree", described by "msg") may cause the global Heap to be mutated.
+ void fgMutateHeap(GenTreePtr tree DEBUGARG(const char* msg));
+
+ // Tree caused an update in the current heap VN. If "tree" has an associated heap SSA #, record that
+ // value in that SSA #.
+ void fgValueNumberRecordHeapSsa(GenTreePtr tree);
+
+ // The input 'tree' is a leaf node that is a constant
+ // Assign the proper value number to the tree
+ void fgValueNumberTreeConst(GenTreePtr tree);
+
+ // Assumes that all inputs to "tree" have had value numbers assigned; assigns a VN to tree.
+ // (With some exceptions: the VN of the lhs of an assignment is assigned as part of the
+ // assignment.)
+ // If "evalAsgLhsInd" is true, evaluate a GT_IND node, even if it's labeled as the LHS of
+ // an assignment.
+ void fgValueNumberTree(GenTreePtr tree, bool evalAsgLhsInd = false);
+
+ // Does value-numbering for a block assignment.
+ void fgValueNumberBlockAssignment(GenTreePtr tree, bool evalAsgLhsInd);
+
+ // Does value-numbering for a cast tree.
+ void fgValueNumberCastTree(GenTreePtr tree);
+
+ // Does value-numbering for an intrinsic tree.
+ void fgValueNumberIntrinsic(GenTreePtr tree);
+
+ // Does value-numbering for a call. We interpret some helper calls.
+ void fgValueNumberCall(GenTreeCall* call);
+
+ // The VN of some nodes in "args" may have changed -- reassign VNs to the arg list nodes.
+ void fgUpdateArgListVNs(GenTreeArgList* args);
+
+ // Does value-numbering for a helper "call" that has a VN function symbol "vnf".
+ void fgValueNumberHelperCallFunc(GenTreeCall* call, VNFunc vnf, ValueNumPair vnpExc);
+
+ // Requires "helpCall" to be a helper call. Assigns it a value number;
+ // we understand the semantics of some of the calls. Returns "true" if
+ // the call may modify the heap (we assume arbitrary memory side effects if so).
+ bool fgValueNumberHelperCall(GenTreeCall* helpCall);
+
+ // Requires "helpFunc" to be pure. Returns the corresponding VNFunc.
+ VNFunc fgValueNumberHelperMethVNFunc(CorInfoHelpFunc helpFunc);
+
+ // This is the current value number for the "Heap" implicit variable while
+ // doing value numbering. This is the value number under the "liberal" interpretation
+ // of heap values; the "conservative" interpretation needs no VN, since every access of
+ // the heap yields an unknown value.
+ ValueNum fgCurHeapVN;
+
+ // Return a "pseudo"-class handle for an array element type. If "elemType" is TYP_STRUCT,
+ // requires "elemStructType" to be non-null (and to have a low-order zero). Otherwise, low order bit
+ // is 1, and the rest is an encoding of "elemTyp".
+ static CORINFO_CLASS_HANDLE EncodeElemType(var_types elemTyp, CORINFO_CLASS_HANDLE elemStructType)
+ {
+ if (elemStructType != nullptr)
+ {
+ assert(varTypeIsStruct(elemTyp) || elemTyp == TYP_REF || elemTyp == TYP_BYREF ||
+ varTypeIsIntegral(elemTyp));
+ assert((size_t(elemStructType) & 0x1) == 0x0); // Make sure the encoding below is valid.
+ return elemStructType;
+ }
+ else
+ {
+ elemTyp = varTypeUnsignedToSigned(elemTyp);
+ return CORINFO_CLASS_HANDLE(size_t(elemTyp) << 1 | 0x1);
+ }
+ }
+ // If "clsHnd" is the result of an "EncodePrim" call, returns true and sets "*pPrimType" to the
+ // var_types it represents. Otherwise, returns TYP_STRUCT (on the assumption that "clsHnd" is
+ // the struct type of the element).
+ static var_types DecodeElemType(CORINFO_CLASS_HANDLE clsHnd)
+ {
+ size_t clsHndVal = size_t(clsHnd);
+ if (clsHndVal & 0x1)
+ {
+ return var_types(clsHndVal >> 1);
+ }
+ else
+ {
+ return TYP_STRUCT;
+ }
+ }
+
+ // Convert a BYTE which represents the VM's CorInfoGCtype to the JIT's var_types
+ var_types getJitGCType(BYTE gcType);
+
+ enum structPassingKind
+ {
+ SPK_Unknown, // Invalid value, never returned
+ SPK_PrimitiveType, // The struct is passed/returned using a primitive type.
+ SPK_ByValue, // The struct is passed/returned by value (using the ABI rules)
+ // for ARM64 and UNIX_X64 in multiple registers. (when all of the
+ // parameters registers are used, then the stack will be used)
+ // for X86 passed on the stack, for ARM32 passed in registers
+ // or the stack or split between registers and the stack.
+ SPK_ByValueAsHfa, // The struct is passed/returned as an HFA in multiple registers.
+ SPK_ByReference
+ }; // The struct is passed/returned by reference to a copy/buffer.
+
+ // Get the "primitive" type that is is used when we are given a struct of size 'structSize'.
+ // For pointer sized structs the 'clsHnd' is used to determine if the struct contains GC ref.
+ // A "primitive" type is one of the scalar types: byte, short, int, long, ref, float, double
+ // If we can't or shouldn't use a "primitive" type then TYP_UNKNOWN is returned.
+ //
+ var_types getPrimitiveTypeForStruct(unsigned structSize, CORINFO_CLASS_HANDLE clsHnd);
+
+ // Get the type that is used to pass values of the given struct type.
+ // If you have already retrieved the struct size then pass it as the optional third argument
+ //
+ var_types getArgTypeForStruct(CORINFO_CLASS_HANDLE clsHnd,
+ structPassingKind* wbPassStruct,
+ unsigned structSize = 0);
+
+ // Get the type that is used to return values of the given struct type.
+ // If you have already retrieved the struct size then pass it as the optional third argument
+ //
+ var_types getReturnTypeForStruct(CORINFO_CLASS_HANDLE clsHnd,
+ structPassingKind* wbPassStruct = nullptr,
+ unsigned structSize = 0);
+
+#ifdef DEBUG
+ // Print a representation of "vnp" or "vn" on standard output.
+ // If "level" is non-zero, we also print out a partial expansion of the value.
+ void vnpPrint(ValueNumPair vnp, unsigned level);
+ void vnPrint(ValueNum vn, unsigned level);
+#endif
+
+ // Dominator computation member functions
+ // Not exposed outside Compiler
+protected:
+ bool fgDominate(BasicBlock* b1, BasicBlock* b2); // Return true if b1 dominates b2
+
+ bool fgReachable(BasicBlock* b1, BasicBlock* b2); // Returns true if block b1 can reach block b2
+
+ void fgComputeDoms(); // Computes the immediate dominators for each basic block in the
+ // flow graph. We first assume the fields bbIDom on each
+ // basic block are invalid. This computation is needed later
+ // by fgBuildDomTree to build the dominance tree structure.
+ // Based on: A Simple, Fast Dominance Algorithm
+ // by Keith D. Cooper, Timothy J. Harvey, and Ken Kennedy
+
+ BlockSet_ValRet_T fgGetDominatorSet(BasicBlock* block); // Returns a set of blocks that dominate the given block.
+ // Note: this is relatively slow compared to calling fgDominate(),
+ // especially if dealing with a single block versus block check.
+
+ void fgComputeReachabilitySets(); // Compute bbReach sets. (Also sets BBF_GC_SAFE_POINT flag on blocks.)
+
+ void fgComputeEnterBlocksSet(); // Compute the set of entry blocks, 'fgEnterBlks'.
+
+ bool fgRemoveUnreachableBlocks(); // Remove blocks determined to be unreachable by the bbReach sets.
+
+ void fgComputeReachability(); // Perform flow graph node reachability analysis.
+
+ BasicBlock* fgIntersectDom(BasicBlock* a, BasicBlock* b); // Intersect two immediate dominator sets.
+
+ void fgDfsInvPostOrder(); // In order to compute dominance using fgIntersectDom, the flow graph nodes must be
+ // processed in topological sort, this function takes care of that.
+
+ void fgDfsInvPostOrderHelper(BasicBlock* block, BlockSet& visited, unsigned* count);
+
+ BlockSet_ValRet_T fgDomFindStartNodes(); // Computes which basic blocks don't have incoming edges in the flow graph.
+ // Returns this as a set.
+
+ BlockSet_ValRet_T fgDomTreeEntryNodes(BasicBlockList** domTree); // Computes which nodes in the dominance forest are
+ // root nodes. Returns this as a set.
+
+#ifdef DEBUG
+ void fgDispDomTree(BasicBlockList** domTree); // Helper that prints out the Dominator Tree in debug builds.
+#endif // DEBUG
+
+ void fgBuildDomTree(); // Once we compute all the immediate dominator sets for each node in the flow graph
+ // (performed by fgComputeDoms), this procedure builds the dominance tree represented
+ // adjacency lists.
+
+ // In order to speed up the queries of the form 'Does A dominates B', we can perform a DFS preorder and postorder
+ // traversal of the dominance tree and the dominance query will become A dominates B iif preOrder(A) <= preOrder(B)
+ // && postOrder(A) >= postOrder(B) making the computation O(1).
+ void fgTraverseDomTree(unsigned bbNum, BasicBlockList** domTree, unsigned* preNum, unsigned* postNum);
+
+ // When the flow graph changes, we need to update the block numbers, predecessor lists, reachability sets, and
+ // dominators.
+ void fgUpdateChangedFlowGraph();
+
+public:
+ // Compute the predecessors of the blocks in the control flow graph.
+ void fgComputePreds();
+
+ // Remove all predecessor information.
+ void fgRemovePreds();
+
+ // Compute the cheap flow graph predecessors lists. This is used in some early phases
+ // before the full predecessors lists are computed.
+ void fgComputeCheapPreds();
+
+private:
+ void fgAddCheapPred(BasicBlock* block, BasicBlock* blockPred);
+
+ void fgRemoveCheapPred(BasicBlock* block, BasicBlock* blockPred);
+
+public:
+ enum GCPollType
+ {
+ GCPOLL_NONE,
+ GCPOLL_CALL,
+ GCPOLL_INLINE
+ };
+
+ // Initialize the per-block variable sets (used for liveness analysis).
+ void fgInitBlockVarSets();
+
+ // true if we've gone through and created GC Poll calls.
+ bool fgGCPollsCreated;
+ void fgMarkGCPollBlocks();
+ void fgCreateGCPolls();
+ bool fgCreateGCPoll(GCPollType pollType, BasicBlock* block);
+
+ // Requires that "block" is a block that returns from
+ // a finally. Returns the number of successors (jump targets of
+ // of blocks in the covered "try" that did a "LEAVE".)
+ unsigned fgNSuccsOfFinallyRet(BasicBlock* block);
+
+ // Requires that "block" is a block that returns (in the sense of BBJ_EHFINALLYRET) from
+ // a finally. Returns its "i"th successor (jump targets of
+ // of blocks in the covered "try" that did a "LEAVE".)
+ // Requires that "i" < fgNSuccsOfFinallyRet(block).
+ BasicBlock* fgSuccOfFinallyRet(BasicBlock* block, unsigned i);
+
+private:
+ // Factor out common portions of the impls of the methods above.
+ void fgSuccOfFinallyRetWork(BasicBlock* block, unsigned i, BasicBlock** bres, unsigned* nres);
+
+public:
+ // For many purposes, it is desirable to be able to enumerate the *distinct* targets of a switch statement,
+ // skipping duplicate targets. (E.g., in flow analyses that are only interested in the set of possible targets.)
+ // SwitchUniqueSuccSet contains the non-duplicated switch targets.
+ // (Code that modifies the jump table of a switch has an obligation to call Compiler::UpdateSwitchTableTarget,
+ // which in turn will call the "UpdateTarget" method of this type if a SwitchUniqueSuccSet has already
+ // been computed for the switch block. If a switch block is deleted or is transformed into a non-switch,
+ // we leave the entry associated with the block, but it will no longer be accessed.)
+ struct SwitchUniqueSuccSet
+ {
+ unsigned numDistinctSuccs; // Number of distinct targets of the switch.
+ BasicBlock** nonDuplicates; // Array of "numDistinctSuccs", containing all the distinct switch target
+ // successors.
+
+ // The switch block "switchBlk" just had an entry with value "from" modified to the value "to".
+ // Update "this" as necessary: if "from" is no longer an element of the jump table of "switchBlk",
+ // remove it from "this", and ensure that "to" is a member. Use "alloc" to do any required allocation.
+ void UpdateTarget(IAllocator* alloc, BasicBlock* switchBlk, BasicBlock* from, BasicBlock* to);
+ };
+
+ typedef SimplerHashTable<BasicBlock*, PtrKeyFuncs<BasicBlock>, SwitchUniqueSuccSet, JitSimplerHashBehavior>
+ BlockToSwitchDescMap;
+
+private:
+ // Maps BasicBlock*'s that end in switch statements to SwitchUniqueSuccSets that allow
+ // iteration over only the distinct successors.
+ BlockToSwitchDescMap* m_switchDescMap;
+
+public:
+ BlockToSwitchDescMap* GetSwitchDescMap()
+ {
+ if (m_switchDescMap == nullptr)
+ {
+ m_switchDescMap = new (getAllocator()) BlockToSwitchDescMap(getAllocator());
+ }
+ return m_switchDescMap;
+ }
+
+ // Invalidate the map of unique switch block successors. For example, since the hash key of the map
+ // depends on block numbers, we must invalidate the map when the blocks are renumbered, to ensure that
+ // we don't accidentally look up and return the wrong switch data.
+ void InvalidateUniqueSwitchSuccMap()
+ {
+ m_switchDescMap = nullptr;
+ }
+
+ // Requires "switchBlock" to be a block that ends in a switch. Returns
+ // the corresponding SwitchUniqueSuccSet.
+ SwitchUniqueSuccSet GetDescriptorForSwitch(BasicBlock* switchBlk);
+
+ // The switch block "switchBlk" just had an entry with value "from" modified to the value "to".
+ // Update "this" as necessary: if "from" is no longer an element of the jump table of "switchBlk",
+ // remove it from "this", and ensure that "to" is a member.
+ void UpdateSwitchTableTarget(BasicBlock* switchBlk, BasicBlock* from, BasicBlock* to);
+
+ // Remove the "SwitchUniqueSuccSet" of "switchBlk" in the BlockToSwitchDescMap.
+ void fgInvalidateSwitchDescMapEntry(BasicBlock* switchBlk);
+
+ BasicBlock* fgFirstBlockOfHandler(BasicBlock* block);
+
+ flowList* fgGetPredForBlock(BasicBlock* block, BasicBlock* blockPred);
+
+ flowList* fgGetPredForBlock(BasicBlock* block, BasicBlock* blockPred, flowList*** ptrToPred);
+
+ flowList* fgSpliceOutPred(BasicBlock* block, BasicBlock* blockPred);
+
+ flowList* fgRemoveRefPred(BasicBlock* block, BasicBlock* blockPred);
+
+ flowList* fgRemoveAllRefPreds(BasicBlock* block, BasicBlock* blockPred);
+
+ flowList* fgRemoveAllRefPreds(BasicBlock* block, flowList** ptrToPred);
+
+ void fgRemoveBlockAsPred(BasicBlock* block);
+
+ void fgChangeSwitchBlock(BasicBlock* oldSwitchBlock, BasicBlock* newSwitchBlock);
+
+ void fgReplaceSwitchJumpTarget(BasicBlock* blockSwitch, BasicBlock* newTarget, BasicBlock* oldTarget);
+
+ void fgReplaceJumpTarget(BasicBlock* block, BasicBlock* newTarget, BasicBlock* oldTarget);
+
+ void fgReplacePred(BasicBlock* block, BasicBlock* oldPred, BasicBlock* newPred);
+
+ flowList* fgAddRefPred(BasicBlock* block,
+ BasicBlock* blockPred,
+ flowList* oldEdge = nullptr,
+ bool initializingPreds = false); // Only set to 'true' when we are computing preds in
+ // fgComputePreds()
+
+ void fgFindBasicBlocks();
+
+ bool fgIsBetterFallThrough(BasicBlock* bCur, BasicBlock* bAlt);
+
+ bool fgCheckEHCanInsertAfterBlock(BasicBlock* blk, unsigned regionIndex, bool putInTryRegion);
+
+ BasicBlock* fgFindInsertPoint(unsigned regionIndex,
+ bool putInTryRegion,
+ BasicBlock* startBlk,
+ BasicBlock* endBlk,
+ BasicBlock* nearBlk,
+ BasicBlock* jumpBlk,
+ bool runRarely);
+
+ unsigned fgGetNestingLevel(BasicBlock* block, unsigned* pFinallyNesting = nullptr);
+
+ void fgRemoveEmptyBlocks();
+
+ void fgRemoveStmt(BasicBlock* block, GenTreePtr stmt, bool updateRefCnt = true);
+
+ bool fgCheckRemoveStmt(BasicBlock* block, GenTreePtr stmt);
+
+ void fgCreateLoopPreHeader(unsigned lnum);
+
+ void fgUnreachableBlock(BasicBlock* block);
+
+ void fgRemoveJTrue(BasicBlock* block);
+
+ BasicBlock* fgLastBBInMainFunction();
+
+ BasicBlock* fgEndBBAfterMainFunction();
+
+ void fgUnlinkRange(BasicBlock* bBeg, BasicBlock* bEnd);
+
+ void fgRemoveBlock(BasicBlock* block, bool unreachable);
+
+ bool fgCanCompactBlocks(BasicBlock* block, BasicBlock* bNext);
+
+ void fgCompactBlocks(BasicBlock* block, BasicBlock* bNext);
+
+ void fgUpdateLoopsAfterCompacting(BasicBlock* block, BasicBlock* bNext);
+
+ BasicBlock* fgConnectFallThrough(BasicBlock* bSrc, BasicBlock* bDst);
+
+ bool fgRenumberBlocks();
+
+ bool fgExpandRarelyRunBlocks();
+
+ bool fgEhAllowsMoveBlock(BasicBlock* bBefore, BasicBlock* bAfter);
+
+ void fgMoveBlocksAfter(BasicBlock* bStart, BasicBlock* bEnd, BasicBlock* insertAfterBlk);
+
+ enum FG_RELOCATE_TYPE
+ {
+ FG_RELOCATE_TRY, // relocate the 'try' region
+ FG_RELOCATE_HANDLER // relocate the handler region (including the filter if necessary)
+ };
+ BasicBlock* fgRelocateEHRange(unsigned regionIndex, FG_RELOCATE_TYPE relocateType);
+
+#if FEATURE_EH_FUNCLETS
+#if defined(_TARGET_ARM_)
+ void fgClearFinallyTargetBit(BasicBlock* block);
+#endif // defined(_TARGET_ARM_)
+ bool fgIsIntraHandlerPred(BasicBlock* predBlock, BasicBlock* block);
+ bool fgAnyIntraHandlerPreds(BasicBlock* block);
+ void fgInsertFuncletPrologBlock(BasicBlock* block);
+ void fgCreateFuncletPrologBlocks();
+ void fgCreateFunclets();
+#else // !FEATURE_EH_FUNCLETS
+ bool fgRelocateEHRegions();
+#endif // !FEATURE_EH_FUNCLETS
+
+ bool fgOptimizeUncondBranchToSimpleCond(BasicBlock* block, BasicBlock* target);
+
+ bool fgBlockEndFavorsTailDuplication(BasicBlock* block);
+
+ bool fgBlockIsGoodTailDuplicationCandidate(BasicBlock* block);
+
+ bool fgOptimizeFallthroughTailDup(BasicBlock* block, BasicBlock* target);
+
+ bool fgOptimizeEmptyBlock(BasicBlock* block);
+
+ bool fgOptimizeBranchToEmptyUnconditional(BasicBlock* block, BasicBlock* bDest);
+
+ bool fgOptimizeBranch(BasicBlock* bJump);
+
+ bool fgOptimizeSwitchBranches(BasicBlock* block);
+
+ bool fgOptimizeBranchToNext(BasicBlock* block, BasicBlock* bNext, BasicBlock* bPrev);
+
+ bool fgOptimizeSwitchJumps();
+#ifdef DEBUG
+ void fgPrintEdgeWeights();
+#endif
+ void fgComputeEdgeWeights();
+
+ void fgReorderBlocks();
+
+ void fgDetermineFirstColdBlock();
+
+ bool fgIsForwardBranch(BasicBlock* bJump, BasicBlock* bSrc = nullptr);
+
+ bool fgUpdateFlowGraph(bool doTailDup = false);
+
+ void fgFindOperOrder();
+
+ // method that returns if you should split here
+ typedef bool(fgSplitPredicate)(GenTree* tree, GenTree* parent, fgWalkData* data);
+
+ void fgSetBlockOrder();
+
+ void fgRemoveReturnBlock(BasicBlock* block);
+
+ /* Helper code that has been factored out */
+ inline void fgConvertBBToThrowBB(BasicBlock* block);
+
+ bool fgCastNeeded(GenTreePtr tree, var_types toType);
+ GenTreePtr fgDoNormalizeOnStore(GenTreePtr tree);
+ GenTreePtr fgMakeTmpArgNode(
+ unsigned tmpVarNum FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(const bool passedInRegisters));
+
+ // The following check for loops that don't execute calls
+ bool fgLoopCallMarked;
+
+ void fgLoopCallTest(BasicBlock* srcBB, BasicBlock* dstBB);
+ void fgLoopCallMark();
+
+ void fgMarkLoopHead(BasicBlock* block);
+
+ unsigned fgGetCodeEstimate(BasicBlock* block);
+
+#if DUMP_FLOWGRAPHS
+ const char* fgProcessEscapes(const char* nameIn, escapeMapping_t* map);
+ FILE* fgOpenFlowGraphFile(bool* wbDontClose, Phases phase, LPCWSTR type);
+ bool fgDumpFlowGraph(Phases phase);
+
+#endif // DUMP_FLOWGRAPHS
+
+#ifdef DEBUG
+ void fgDispDoms();
+ void fgDispReach();
+ void fgDispBBLiveness(BasicBlock* block);
+ void fgDispBBLiveness();
+ void fgTableDispBasicBlock(BasicBlock* block, int ibcColWidth = 0);
+ void fgDispBasicBlocks(BasicBlock* firstBlock, BasicBlock* lastBlock, bool dumpTrees);
+ void fgDispBasicBlocks(bool dumpTrees = false);
+ void fgDumpStmtTree(GenTreePtr stmt, unsigned blkNum);
+ void fgDumpBlock(BasicBlock* block);
+ void fgDumpTrees(BasicBlock* firstBlock, BasicBlock* lastBlock);
+
+ static fgWalkPreFn fgStress64RsltMulCB;
+ void fgStress64RsltMul();
+ void fgDebugCheckUpdate();
+ void fgDebugCheckBBlist(bool checkBBNum = false, bool checkBBRefs = true);
+ void fgDebugCheckBlockLinks();
+ void fgDebugCheckLinks(bool morphTrees = false);
+ void fgDebugCheckNodeLinks(BasicBlock* block, GenTreePtr stmt);
+ void fgDebugCheckFlags(GenTreePtr tree);
+#endif
+
+#ifdef LEGACY_BACKEND
+ static void fgOrderBlockOps(GenTreePtr tree,
+ regMaskTP reg0,
+ regMaskTP reg1,
+ regMaskTP reg2,
+ GenTreePtr* opsPtr, // OUT
+ regMaskTP* regsPtr); // OUT
+#endif // LEGACY_BACKEND
+
+ static GenTreePtr fgGetFirstNode(GenTreePtr tree);
+ static bool fgTreeIsInStmt(GenTree* tree, GenTreeStmt* stmt);
+
+ inline bool fgIsInlining()
+ {
+ return fgExpandInline;
+ }
+
+ void fgTraverseRPO();
+
+ //--------------------- Walking the trees in the IR -----------------------
+
+ struct fgWalkData
+ {
+ Compiler* compiler;
+ fgWalkPreFn* wtprVisitorFn;
+ fgWalkPostFn* wtpoVisitorFn;
+ void* pCallbackData; // user-provided data
+ bool wtprLclsOnly; // whether to only visit lclvar nodes
+ GenTreePtr parent; // parent of current node, provided to callback
+ GenTreeStack* parentStack; // stack of parent nodes, if asked for
+#ifdef DEBUG
+ bool printModified; // callback can use this
+#endif
+ };
+
+ template <bool computeStack>
+ static fgWalkResult fgWalkTreePreRec(GenTreePtr* pTree, fgWalkData* fgWalkPre);
+
+ // general purpose tree-walker that is capable of doing pre- and post- order
+ // callbacks at the same time
+ template <bool doPreOrder, bool doPostOrder>
+ static fgWalkResult fgWalkTreeRec(GenTreePtr* pTree, fgWalkData* fgWalkPre);
+
+ fgWalkResult fgWalkTreePre(GenTreePtr* pTree,
+ fgWalkPreFn* visitor,
+ void* pCallBackData = nullptr,
+ bool lclVarsOnly = false,
+ bool computeStack = false);
+
+ fgWalkResult fgWalkTree(GenTreePtr* pTree,
+ fgWalkPreFn* preVisitor,
+ fgWalkPostFn* postVisitor,
+ void* pCallBackData = nullptr);
+
+ void fgWalkAllTreesPre(fgWalkPreFn* visitor, void* pCallBackData);
+
+ //----- Postorder
+
+ template <bool computeStack>
+ static fgWalkResult fgWalkTreePostRec(GenTreePtr* pTree, fgWalkData* fgWalkPre);
+
+ fgWalkResult fgWalkTreePost(GenTreePtr* pTree,
+ fgWalkPostFn* visitor,
+ void* pCallBackData = nullptr,
+ bool computeStack = false);
+
+ // An fgWalkPreFn that looks for expressions that have inline throws in
+ // minopts mode. Basically it looks for tress with gtOverflowEx() or
+ // GTF_IND_RNGCHK. It returns WALK_ABORT if one is found. It
+ // returns WALK_SKIP_SUBTREES if GTF_EXCEPT is not set (assumes flags
+ // properly propagated to parent trees). It returns WALK_CONTINUE
+ // otherwise.
+ static fgWalkResult fgChkThrowCB(GenTreePtr* pTree, Compiler::fgWalkData* data);
+ static fgWalkResult fgChkLocAllocCB(GenTreePtr* pTree, Compiler::fgWalkData* data);
+ static fgWalkResult fgChkQmarkCB(GenTreePtr* pTree, Compiler::fgWalkData* data);
+
+ /**************************************************************************
+ * PROTECTED
+ *************************************************************************/
+
+protected:
+ friend class SsaBuilder;
+ friend struct ValueNumberState;
+
+ //--------------------- Detect the basic blocks ---------------------------
+
+ BasicBlock** fgBBs; // Table of pointers to the BBs
+
+ void fgInitBBLookup();
+ BasicBlock* fgLookupBB(unsigned addr);
+
+ void fgMarkJumpTarget(BYTE* jumpTarget, IL_OFFSET offs);
+
+ void fgFindJumpTargets(const BYTE* codeAddr, IL_OFFSET codeSize, BYTE* jumpTarget);
+
+ void fgMarkBackwardJump(BasicBlock* startBlock, BasicBlock* endBlock);
+
+ void fgLinkBasicBlocks();
+
+ void fgMakeBasicBlocks(const BYTE* codeAddr, IL_OFFSET codeSize, BYTE* jumpTarget);
+
+ void fgCheckBasicBlockControlFlow();
+
+ void fgControlFlowPermitted(BasicBlock* blkSrc,
+ BasicBlock* blkDest,
+ BOOL IsLeave = false /* is the src a leave block */);
+
+ bool fgFlowToFirstBlockOfInnerTry(BasicBlock* blkSrc, BasicBlock* blkDest, bool sibling);
+
+ void fgObserveInlineConstants(OPCODE opcode, const FgStack& stack, bool isInlining);
+
+ void fgAdjustForAddressExposedOrWrittenThis();
+
+ bool fgProfileData_ILSizeMismatch;
+ ICorJitInfo::ProfileBuffer* fgProfileBuffer;
+ ULONG fgProfileBufferCount;
+ ULONG fgNumProfileRuns;
+
+ unsigned fgStressBBProf()
+ {
+#ifdef DEBUG
+ unsigned result = JitConfig.JitStressBBProf();
+ if (result == 0)
+ {
+ if (compStressCompile(STRESS_BB_PROFILE, 15))
+ {
+ result = 1;
+ }
+ }
+ return result;
+#else
+ return 0;
+#endif
+ }
+
+ bool fgHaveProfileData();
+ bool fgGetProfileWeightForBasicBlock(IL_OFFSET offset, unsigned* weight);
+
+ bool fgIsUsingProfileWeights()
+ {
+ return (fgHaveProfileData() || fgStressBBProf());
+ }
+ void fgInstrumentMethod();
+
+//-------- Insert a statement at the start or end of a basic block --------
+
+#ifdef DEBUG
+public:
+ static bool fgBlockContainsStatementBounded(BasicBlock* block, GenTree* stmt, bool answerOnBoundExceeded = true);
+#endif
+
+public:
+ GenTreeStmt* fgInsertStmtAtEnd(BasicBlock* block, GenTreePtr node);
+
+public: // Used by linear scan register allocation
+ GenTreeStmt* fgInsertStmtNearEnd(BasicBlock* block, GenTreePtr node);
+
+private:
+ GenTreePtr fgInsertStmtAtBeg(BasicBlock* block, GenTreePtr stmt);
+ GenTreePtr fgInsertStmtAfter(BasicBlock* block, GenTreePtr insertionPoint, GenTreePtr stmt);
+
+public: // Used by linear scan register allocation
+ GenTreePtr fgInsertStmtBefore(BasicBlock* block, GenTreePtr insertionPoint, GenTreePtr stmt);
+
+private:
+ GenTreePtr fgInsertStmtListAfter(BasicBlock* block, GenTreePtr stmtAfter, GenTreePtr stmtList);
+
+ GenTreePtr fgMorphSplitTree(GenTree** splitPoint, GenTree* stmt, BasicBlock* blk);
+
+ // Create a new temporary variable to hold the result of *ppTree,
+ // and transform the graph accordingly.
+ GenTree* fgInsertCommaFormTemp(GenTree** ppTree, CORINFO_CLASS_HANDLE structType = nullptr);
+ GenTree* fgMakeMultiUse(GenTree** ppTree);
+
+ // After replacing oldChild with newChild, fixup the fgArgTabEntryPtr
+ // if it happens to be an argument to a call.
+ void fgFixupIfCallArg(ArrayStack<GenTree*>* parentStack, GenTree* oldChild, GenTree* newChild);
+
+public:
+ void fgFixupArgTabEntryPtr(GenTreePtr parentCall, GenTreePtr oldArg, GenTreePtr newArg);
+
+private:
+ // Recognize a bitwise rotation pattern and convert into a GT_ROL or a GT_ROR node.
+ GenTreePtr fgRecognizeAndMorphBitwiseRotation(GenTreePtr tree);
+ bool fgOperIsBitwiseRotationRoot(genTreeOps oper);
+
+ //-------- Determine the order in which the trees will be evaluated -------
+
+ unsigned fgTreeSeqNum;
+ GenTree* fgTreeSeqLst;
+ GenTree* fgTreeSeqBeg;
+
+ GenTree* fgSetTreeSeq(GenTree* tree, GenTree* prev = nullptr, bool isLIR = false);
+ void fgSetTreeSeqHelper(GenTree* tree, bool isLIR);
+ void fgSetTreeSeqFinish(GenTreePtr tree, bool isLIR);
+ void fgSetStmtSeq(GenTree* tree);
+ void fgSetBlockOrder(BasicBlock* block);
+
+ //------------------------- Morphing --------------------------------------
+
+ unsigned fgPtrArgCntCur;
+ unsigned fgPtrArgCntMax;
+ hashBv* fgOutgoingArgTemps;
+ hashBv* fgCurrentlyInUseArgTemps;
+
+ bool compCanEncodePtrArgCntMax();
+
+ void fgSetRngChkTarget(GenTreePtr tree, bool delay = true);
+
+#if REARRANGE_ADDS
+ void fgMoveOpsLeft(GenTreePtr tree);
+#endif
+
+ bool fgIsCommaThrow(GenTreePtr tree, bool forFolding = false);
+
+ bool fgIsThrow(GenTreePtr tree);
+
+ bool fgInDifferentRegions(BasicBlock* blk1, BasicBlock* blk2);
+ bool fgIsBlockCold(BasicBlock* block);
+
+ GenTreePtr fgMorphCastIntoHelper(GenTreePtr tree, int helper, GenTreePtr oper);
+
+ GenTreePtr fgMorphIntoHelperCall(GenTreePtr tree, int helper, GenTreeArgList* args);
+
+ GenTreePtr fgMorphStackArgForVarArgs(unsigned lclNum, var_types varType, unsigned lclOffs);
+
+ bool fgMorphRelopToQmark(GenTreePtr tree);
+
+ // A "MorphAddrContext" carries information from the surrounding context. If we are evaluating a byref address,
+ // it is useful to know whether the address will be immediately dereferenced, or whether the address value will
+ // be used, perhaps by passing it as an argument to a called method. This affects how null checking is done:
+ // for sufficiently small offsets, we can rely on OS page protection to implicitly null-check addresses that we
+ // know will be dereferenced. To know that reliance on implicit null checking is sound, we must further know that
+ // all offsets between the top-level indirection and the bottom are constant, and that their sum is sufficiently
+ // small; hence the other fields of MorphAddrContext. Finally, the odd structure of GT_COPYBLK, in which the second
+ // argument is a GT_LIST, requires us to "tell" that List node that its parent is a GT_COPYBLK, so it "knows" that
+ // each of its arguments should be evaluated in MACK_Ind contexts. (This would not be true for GT_LIST nodes
+ // representing method call argument lists.)
+ enum MorphAddrContextKind
+ {
+ MACK_Ind,
+ MACK_Addr,
+ MACK_CopyBlock, // This is necessary so we know we have to start a new "Ind" context for each of the
+ // addresses in the arg list.
+ };
+ struct MorphAddrContext
+ {
+ MorphAddrContextKind m_kind;
+ bool m_allConstantOffsets; // Valid only for "m_kind == MACK_Ind". True iff all offsets between
+ // top-level indirection and here have been constants.
+ size_t m_totalOffset; // Valid only for "m_kind == MACK_Ind", and if "m_allConstantOffsets" is true.
+ // In that case, is the sum of those constant offsets.
+
+ MorphAddrContext(MorphAddrContextKind kind) : m_kind(kind), m_allConstantOffsets(true), m_totalOffset(0)
+ {
+ }
+ };
+
+ // A MACK_CopyBlock context is immutable, so we can just make one of these and share it.
+ static MorphAddrContext s_CopyBlockMAC;
+
+#ifdef FEATURE_SIMD
+ GenTreePtr fgCopySIMDNode(GenTreeSIMD* simdNode);
+ GenTreePtr getSIMDStructFromField(GenTreePtr tree,
+ var_types* baseTypeOut,
+ unsigned* indexOut,
+ unsigned* simdSizeOut,
+ bool ignoreUsedInSIMDIntrinsic = false);
+ GenTreePtr fgMorphFieldAssignToSIMDIntrinsicSet(GenTreePtr tree);
+ GenTreePtr fgMorphFieldToSIMDIntrinsicGet(GenTreePtr tree);
+ bool fgMorphCombineSIMDFieldAssignments(BasicBlock* block, GenTreePtr stmt);
+ void impMarkContiguousSIMDFieldAssignments(GenTreePtr stmt);
+
+ // fgPreviousCandidateSIMDFieldAsgStmt is only used for tracking previous simd field assignment
+ // in function: Complier::impMarkContiguousSIMDFieldAssignments.
+ GenTreePtr fgPreviousCandidateSIMDFieldAsgStmt;
+
+#endif // FEATURE_SIMD
+ GenTreePtr fgMorphArrayIndex(GenTreePtr tree);
+ GenTreePtr fgMorphCast(GenTreePtr tree);
+ GenTreePtr fgUnwrapProxy(GenTreePtr objRef);
+ GenTreeCall* fgMorphArgs(GenTreeCall* call);
+
+ void fgMakeOutgoingStructArgCopy(GenTreeCall* call,
+ GenTree* args,
+ unsigned argIndex,
+ CORINFO_CLASS_HANDLE copyBlkClass FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(
+ const SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR* structDescPtr));
+
+ void fgFixupStructReturn(GenTreePtr call);
+ GenTreePtr fgMorphLocalVar(GenTreePtr tree);
+ bool fgAddrCouldBeNull(GenTreePtr addr);
+ GenTreePtr fgMorphField(GenTreePtr tree, MorphAddrContext* mac);
+ bool fgCanFastTailCall(GenTreeCall* call);
+ void fgMorphTailCall(GenTreeCall* call);
+ void fgMorphRecursiveFastTailCallIntoLoop(BasicBlock* block, GenTreeCall* recursiveTailCall);
+ GenTreePtr fgAssignRecursiveCallArgToCallerParam(GenTreePtr arg,
+ fgArgTabEntryPtr argTabEntry,
+ BasicBlock* block,
+ IL_OFFSETX callILOffset,
+ GenTreePtr tmpAssignmentInsertionPoint,
+ GenTreePtr paramAssignmentInsertionPoint);
+ static int fgEstimateCallStackSize(GenTreeCall* call);
+ GenTreePtr fgMorphCall(GenTreeCall* call);
+ void fgMorphCallInline(GenTreeCall* call, InlineResult* result);
+ void fgMorphCallInlineHelper(GenTreeCall* call, InlineResult* result);
+#if DEBUG
+ void fgNoteNonInlineCandidate(GenTreePtr tree, GenTreeCall* call);
+ static fgWalkPreFn fgFindNonInlineCandidate;
+#endif
+ GenTreePtr fgOptimizeDelegateConstructor(GenTreePtr call, CORINFO_CONTEXT_HANDLE* ExactContextHnd);
+ GenTreePtr fgMorphLeaf(GenTreePtr tree);
+ void fgAssignSetVarDef(GenTreePtr tree);
+ GenTreePtr fgMorphOneAsgBlockOp(GenTreePtr tree);
+ GenTreePtr fgMorphInitBlock(GenTreePtr tree);
+ GenTreePtr fgMorphBlkToInd(GenTreeBlk* tree, var_types type);
+ GenTreePtr fgMorphGetStructAddr(GenTreePtr* pTree, CORINFO_CLASS_HANDLE clsHnd, bool isRValue = false);
+ GenTreePtr fgMorphBlkNode(GenTreePtr tree, bool isDest);
+ GenTreePtr fgMorphBlockOperand(GenTreePtr tree, var_types asgType, unsigned blockWidth, bool isDest);
+ GenTreePtr fgMorphCopyBlock(GenTreePtr tree);
+ GenTreePtr fgMorphForRegisterFP(GenTreePtr tree);
+ GenTreePtr fgMorphSmpOp(GenTreePtr tree, MorphAddrContext* mac = nullptr);
+ GenTreePtr fgMorphSmpOpPre(GenTreePtr tree);
+ GenTreePtr fgMorphDivByConst(GenTreeOp* tree);
+ GenTreePtr fgMorphModByConst(GenTreeOp* tree);
+ GenTreePtr fgMorphModToSubMulDiv(GenTreeOp* tree);
+ GenTreePtr fgMorphSmpOpOptional(GenTreeOp* tree);
+ GenTreePtr fgMorphRecognizeBoxNullable(GenTree* compare);
+ bool fgShouldUseMagicNumberDivide(GenTreeOp* tree);
+
+ GenTreePtr fgMorphToEmulatedFP(GenTreePtr tree);
+ GenTreePtr fgMorphConst(GenTreePtr tree);
+
+public:
+ GenTreePtr fgMorphTree(GenTreePtr tree, MorphAddrContext* mac = nullptr);
+
+private:
+#if LOCAL_ASSERTION_PROP
+ void fgKillDependentAssertions(unsigned lclNum DEBUGARG(GenTreePtr tree));
+#endif
+ void fgMorphTreeDone(GenTreePtr tree, GenTreePtr oldTree = nullptr DEBUGARG(int morphNum = 0));
+
+ GenTreePtr fgMorphStmt;
+
+ unsigned fgGetBigOffsetMorphingTemp(var_types type); // We cache one temp per type to be
+ // used when morphing big offset.
+
+ //----------------------- Liveness analysis -------------------------------
+
+ VARSET_TP fgCurUseSet; // vars used by block (before an assignment)
+ VARSET_TP fgCurDefSet; // vars assigned by block (before a use)
+
+ bool fgCurHeapUse; // True iff the current basic block uses the heap before defining it.
+ bool fgCurHeapDef; // True iff the current basic block defines the heap.
+ bool fgCurHeapHavoc; // True if the current basic block is known to set the heap to a "havoc" value.
+
+ void fgMarkUseDef(GenTreeLclVarCommon* tree, GenTree* asgdLclVar = nullptr);
+
+#ifdef DEBUGGING_SUPPORT
+ void fgBeginScopeLife(VARSET_TP* inScope, VarScopeDsc* var);
+ void fgEndScopeLife(VARSET_TP* inScope, VarScopeDsc* var);
+
+ void fgMarkInScope(BasicBlock* block, VARSET_VALARG_TP inScope);
+ void fgUnmarkInScope(BasicBlock* block, VARSET_VALARG_TP unmarkScope);
+
+ void fgExtendDbgScopes();
+ void fgExtendDbgLifetimes();
+
+#ifdef DEBUG
+ void fgDispDebugScopes();
+#endif // DEBUG
+
+#endif // DEBUGGING_SUPPORT
+
+ //-------------------------------------------------------------------------
+ //
+ // The following keeps track of any code we've added for things like array
+ // range checking or explicit calls to enable GC, and so on.
+ //
+public:
+ struct AddCodeDsc
+ {
+ AddCodeDsc* acdNext;
+ BasicBlock* acdDstBlk; // block to which we jump
+ unsigned acdData;
+ SpecialCodeKind acdKind; // what kind of a special block is this?
+ unsigned short acdStkLvl;
+ };
+
+private:
+ static unsigned acdHelper(SpecialCodeKind codeKind);
+
+ AddCodeDsc* fgAddCodeList;
+ bool fgAddCodeModf;
+ bool fgRngChkThrowAdded;
+ AddCodeDsc* fgExcptnTargetCache[SCK_COUNT];
+
+ BasicBlock* fgRngChkTarget(BasicBlock* block, unsigned stkDepth, SpecialCodeKind kind);
+
+ BasicBlock* fgAddCodeRef(BasicBlock* srcBlk, unsigned refData, SpecialCodeKind kind, unsigned stkDepth = 0);
+
+public:
+ AddCodeDsc* fgFindExcptnTarget(SpecialCodeKind kind, unsigned refData);
+
+private:
+ bool fgIsCodeAdded();
+
+ bool fgIsThrowHlpBlk(BasicBlock* block);
+ unsigned fgThrowHlpBlkStkLevel(BasicBlock* block);
+
+ unsigned fgBigOffsetMorphingTemps[TYP_COUNT];
+
+ unsigned fgCheckInlineDepthAndRecursion(InlineInfo* inlineInfo);
+ void fgInvokeInlineeCompiler(GenTreeCall* call, InlineResult* result);
+ void fgInsertInlineeBlocks(InlineInfo* pInlineInfo);
+ GenTreePtr fgInlinePrependStatements(InlineInfo* inlineInfo);
+
+#if FEATURE_MULTIREG_RET
+ GenTreePtr fgGetStructAsStructPtr(GenTreePtr tree);
+ GenTreePtr fgAssignStructInlineeToVar(GenTreePtr child, CORINFO_CLASS_HANDLE retClsHnd);
+ void fgAttachStructInlineeToAsg(GenTreePtr tree, GenTreePtr child, CORINFO_CLASS_HANDLE retClsHnd);
+#endif // FEATURE_MULTIREG_RET
+
+ static fgWalkPreFn fgUpdateInlineReturnExpressionPlaceHolder;
+
+#ifdef DEBUG
+ static fgWalkPreFn fgDebugCheckInlineCandidates;
+#endif
+
+ void fgPromoteStructs();
+ fgWalkResult fgMorphStructField(GenTreePtr tree, fgWalkData* fgWalkPre);
+ fgWalkResult fgMorphLocalField(GenTreePtr tree, fgWalkData* fgWalkPre);
+ void fgMarkImplicitByRefArgs();
+ bool fgMorphImplicitByRefArgs(GenTree** pTree, fgWalkData* fgWalkPre);
+ static fgWalkPreFn fgMarkAddrTakenLocalsPreCB;
+ static fgWalkPostFn fgMarkAddrTakenLocalsPostCB;
+ void fgMarkAddressExposedLocals();
+ bool fgNodesMayInterfere(GenTree* store, GenTree* load);
+
+ // Returns true if the type of tree is of size at least "width", or if "tree" is not a
+ // local variable.
+ bool fgFitsInOrNotLoc(GenTreePtr tree, unsigned width);
+
+ // The given local variable, required to be a struct variable, is being assigned via
+ // a "lclField", to make it masquerade as an integral type in the ABI. Make sure that
+ // the variable is not enregistered, and is therefore not promoted independently.
+ void fgLclFldAssign(unsigned lclNum);
+
+ static fgWalkPreFn gtHasLocalsWithAddrOpCB;
+ bool gtCanOptimizeTypeEquality(GenTreePtr tree);
+ bool gtIsTypeHandleToRuntimeTypeHelper(GenTreePtr tree);
+ bool gtIsActiveCSE_Candidate(GenTreePtr tree);
+
+#ifdef DEBUG
+ bool fgPrintInlinedMethods;
+#endif
+
+ bool fgIsBigOffset(size_t offset);
+
+ // The following are used when morphing special cases of integer div/mod operations and also by codegen
+ bool fgIsSignedDivOptimizable(GenTreePtr divisor);
+ bool fgIsUnsignedDivOptimizable(GenTreePtr divisor);
+ bool fgIsSignedModOptimizable(GenTreePtr divisor);
+ bool fgIsUnsignedModOptimizable(GenTreePtr divisor);
+
+ /*
+ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+ XX XX
+ XX Optimizer XX
+ XX XX
+ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+ */
+
+public:
+ void optInit();
+
+protected:
+ LclVarDsc* optIsTrackedLocal(GenTreePtr tree);
+
+public:
+ void optRemoveRangeCheck(
+ GenTreePtr tree, GenTreePtr stmt, bool updateCSEcounts, unsigned sideEffFlags = 0, bool forceRemove = false);
+ bool optIsRangeCheckRemovable(GenTreePtr tree);
+
+protected:
+ static fgWalkPreFn optValidRangeCheckIndex;
+ static fgWalkPreFn optRemoveTreeVisitor; // Helper passed to Compiler::fgWalkAllTreesPre() to decrement the LclVar
+ // usage counts
+
+ void optRemoveTree(GenTreePtr deadTree, GenTreePtr keepList);
+
+ /**************************************************************************
+ *
+ *************************************************************************/
+
+protected:
+ // Do hoisting for all loops.
+ void optHoistLoopCode();
+
+ // To represent sets of VN's that have already been hoisted in outer loops.
+ typedef SimplerHashTable<ValueNum, SmallPrimitiveKeyFuncs<ValueNum>, bool, JitSimplerHashBehavior> VNToBoolMap;
+ typedef VNToBoolMap VNSet;
+
+ struct LoopHoistContext
+ {
+ private:
+ // The set of variables hoisted in the current loop (or nullptr if there are none).
+ VNSet* m_pHoistedInCurLoop;
+
+ public:
+ // Value numbers of expressions that have been hoisted in parent loops in the loop nest.
+ VNSet m_hoistedInParentLoops;
+ // Value numbers of expressions that have been hoisted in the current (or most recent) loop in the nest.
+ // Previous decisions on loop-invariance of value numbers in the current loop.
+ VNToBoolMap m_curLoopVnInvariantCache;
+
+ VNSet* GetHoistedInCurLoop(Compiler* comp)
+ {
+ if (m_pHoistedInCurLoop == nullptr)
+ {
+ m_pHoistedInCurLoop = new (comp->getAllocatorLoopHoist()) VNSet(comp->getAllocatorLoopHoist());
+ }
+ return m_pHoistedInCurLoop;
+ }
+
+ VNSet* ExtractHoistedInCurLoop()
+ {
+ VNSet* res = m_pHoistedInCurLoop;
+ m_pHoistedInCurLoop = nullptr;
+ return res;
+ }
+
+ LoopHoistContext(Compiler* comp)
+ : m_pHoistedInCurLoop(nullptr)
+ , m_hoistedInParentLoops(comp->getAllocatorLoopHoist())
+ , m_curLoopVnInvariantCache(comp->getAllocatorLoopHoist())
+ {
+ }
+ };
+
+ // Do hoisting for loop "lnum" (an index into the optLoopTable), and all loops nested within it.
+ // Tracks the expressions that have been hoisted by containing loops by temporary recording their
+ // value numbers in "m_hoistedInParentLoops". This set is not modified by the call.
+ void optHoistLoopNest(unsigned lnum, LoopHoistContext* hoistCtxt);
+
+ // Do hoisting for a particular loop ("lnum" is an index into the optLoopTable.)
+ // Assumes that expressions have been hoisted in containing loops if their value numbers are in
+ // "m_hoistedInParentLoops".
+ //
+ void optHoistThisLoop(unsigned lnum, LoopHoistContext* hoistCtxt);
+
+ // Hoist all expressions in "blk" that are invariant in loop "lnum" (an index into the optLoopTable)
+ // outside of that loop. Exempt expressions whose value number is in "m_hoistedInParentLoops"; add VN's of hoisted
+ // expressions to "hoistInLoop".
+ void optHoistLoopExprsForBlock(BasicBlock* blk, unsigned lnum, LoopHoistContext* hoistCtxt);
+
+ // Return true if the tree looks profitable to hoist out of loop 'lnum'.
+ bool optIsProfitableToHoistableTree(GenTreePtr tree, unsigned lnum);
+
+ // Hoist all proper sub-expressions of "tree" (which occurs in "stmt", which occurs in "blk")
+ // that are invariant in loop "lnum" (an index into the optLoopTable)
+ // outside of that loop. Exempt expressions whose value number is in "hoistedInParents"; add VN's of hoisted
+ // expressions to "hoistInLoop".
+ // Returns "true" iff "tree" is loop-invariant (wrt "lnum").
+ // Assumes that the value of "*firstBlockAndBeforeSideEffect" indicates that we're in the first block, and before
+ // any possible globally visible side effects. Assume is called in evaluation order, and updates this.
+ bool optHoistLoopExprsForTree(GenTreePtr tree,
+ unsigned lnum,
+ LoopHoistContext* hoistCtxt,
+ bool* firstBlockAndBeforeSideEffect,
+ bool* pHoistable);
+
+ // Performs the hoisting 'tree' into the PreHeader for loop 'lnum'
+ void optHoistCandidate(GenTreePtr tree, unsigned lnum, LoopHoistContext* hoistCtxt);
+
+ // Returns true iff the ValueNum "vn" represents a value that is loop-invariant in "lnum".
+ // Constants and init values are always loop invariant.
+ // VNPhi's connect VN's to the SSA definition, so we can know if the SSA def occurs in the loop.
+ bool optVNIsLoopInvariant(ValueNum vn, unsigned lnum, VNToBoolMap* recordedVNs);
+
+ // Returns "true" iff "tree" is valid at the head of loop "lnum", in the context of the hoist substitution
+ // "subst". If "tree" is a local SSA var, it is valid if its SSA definition occurs outside of the loop, or
+ // if it is in the domain of "subst" (meaning that it's definition has been previously hoisted, with a "standin"
+ // local.) If tree is a constant, it is valid. Otherwise, if it is an operator, it is valid iff its children are.
+ bool optTreeIsValidAtLoopHead(GenTreePtr tree, unsigned lnum);
+
+ // If "blk" is the entry block of a natural loop, returns true and sets "*pLnum" to the index of the loop
+ // in the loop table.
+ bool optBlockIsLoopEntry(BasicBlock* blk, unsigned* pLnum);
+
+ // Records the set of "side effects" of all loops: fields (object instance and static)
+ // written to, and SZ-array element type equivalence classes updated.
+ void optComputeLoopSideEffects();
+
+private:
+ // Requires "lnum" to be the index of an outermost loop in the loop table. Traverses the body of that loop,
+ // including all nested loops, and records the set of "side effects" of the loop: fields (object instance and
+ // static) written to, and SZ-array element type equivalence classes updated.
+ void optComputeLoopNestSideEffects(unsigned lnum);
+
+ // Add the side effects of "blk" (which is required to be within a loop) to all loops of which it is a part.
+ void optComputeLoopSideEffectsOfBlock(BasicBlock* blk);
+
+ // Hoist the expression "expr" out of loop "lnum".
+ void optPerformHoistExpr(GenTreePtr expr, unsigned lnum);
+
+public:
+ void optOptimizeBools();
+
+private:
+ GenTree* optIsBoolCond(GenTree* condBranch, GenTree** compPtr, bool* boolPtr);
+#ifdef DEBUG
+ void optOptimizeBoolsGcStress(BasicBlock* condBlock);
+#endif
+public:
+ void optOptimizeLayout(); // Optimize the BasicBlock layout of the method
+
+ void optOptimizeLoops(); // for "while-do" loops duplicates simple loop conditions and transforms
+ // the loop into a "do-while" loop
+ // Also finds all natural loops and records them in the loop table
+
+ // Optionally clone loops in the loop table.
+ void optCloneLoops();
+
+ // Clone loop "loopInd" in the loop table.
+ void optCloneLoop(unsigned loopInd, LoopCloneContext* context);
+
+ // Ensure that loop "loopInd" has a unique head block. (If the existing entry has
+ // non-loop predecessors other than the head entry, create a new, empty block that goes (only) to the entry,
+ // and redirects the preds of the entry to this new block.) Sets the weight of the newly created block to
+ // "ambientWeight".
+ void optEnsureUniqueHead(unsigned loopInd, unsigned ambientWeight);
+
+ void optUnrollLoops(); // Unrolls loops (needs to have cost info)
+
+protected:
+ // This enumeration describes what is killed by a call.
+
+ enum callInterf
+ {
+ CALLINT_NONE, // no interference (most helpers)
+ CALLINT_REF_INDIRS, // kills GC ref indirections (SETFIELD OBJ)
+ CALLINT_SCL_INDIRS, // kills non GC ref indirections (SETFIELD non-OBJ)
+ CALLINT_ALL_INDIRS, // kills both GC ref and non GC ref indirections (SETFIELD STRUCT)
+ CALLINT_ALL, // kills everything (normal method call)
+ };
+
+public:
+ // A "LoopDsc" describes a ("natural") loop. We (currently) require the body of a loop to be a contiguous (in
+ // bbNext order) sequence of basic blocks. (At times, we may require the blocks in a loop to be "properly numbered"
+ // in bbNext order; we use comparisons on the bbNum to decide order.)
+ // The blocks that define the body are
+ // first <= top <= entry <= bottom .
+ // The "head" of the loop is a block outside the loop that has "entry" as a successor. We only support loops with a
+ // single 'head' block. The meanings of these blocks are given in the definitions below. Also see the picture at
+ // Compiler::optFindNaturalLoops().
+ struct LoopDsc
+ {
+ BasicBlock* lpHead; // HEAD of the loop (not part of the looping of the loop) -- has ENTRY as a successor.
+ BasicBlock* lpFirst; // FIRST block (in bbNext order) reachable within this loop. (May be part of a nested
+ // loop, but not the outer loop.)
+ BasicBlock* lpTop; // loop TOP (the back edge from lpBottom reaches here) (in most cases FIRST and TOP are the
+ // same)
+ BasicBlock* lpEntry; // the ENTRY in the loop (in most cases TOP or BOTTOM)
+ BasicBlock* lpBottom; // loop BOTTOM (from here we have a back edge to the TOP)
+ BasicBlock* lpExit; // if a single exit loop this is the EXIT (in most cases BOTTOM)
+
+ callInterf lpAsgCall; // "callInterf" for calls in the loop
+ ALLVARSET_TP lpAsgVars; // set of vars assigned within the loop (all vars, not just tracked)
+ varRefKinds lpAsgInds : 8; // set of inds modified within the loop
+
+ unsigned short lpFlags; // Mask of the LPFLG_* constants
+
+ unsigned char lpExitCnt; // number of exits from the loop
+
+ unsigned char lpParent; // The index of the most-nested loop that completely contains this one,
+ // or else BasicBlock::NOT_IN_LOOP if no such loop exists.
+ unsigned char lpChild; // The index of a nested loop, or else BasicBlock::NOT_IN_LOOP if no child exists.
+ // (Actually, an "immediately" nested loop --
+ // no other child of this loop is a parent of lpChild.)
+ unsigned char lpSibling; // The index of another loop that is an immediate child of lpParent,
+ // or else BasicBlock::NOT_IN_LOOP. One can enumerate all the children of a loop
+ // by following "lpChild" then "lpSibling" links.
+
+#define LPFLG_DO_WHILE 0x0001 // it's a do-while loop (i.e ENTRY is at the TOP)
+#define LPFLG_ONE_EXIT 0x0002 // the loop has only one exit
+
+#define LPFLG_ITER 0x0004 // for (i = icon or lclVar; test_condition(); i++)
+#define LPFLG_HOISTABLE 0x0008 // the loop is in a form that is suitable for hoisting expressions
+#define LPFLG_CONST 0x0010 // for (i=icon;i<icon;i++){ ... } - constant loop
+
+#define LPFLG_VAR_INIT 0x0020 // iterator is initialized with a local var (var # found in lpVarInit)
+#define LPFLG_CONST_INIT 0x0040 // iterator is initialized with a constant (found in lpConstInit)
+
+#define LPFLG_VAR_LIMIT 0x0100 // iterator is compared with a local var (var # found in lpVarLimit)
+#define LPFLG_CONST_LIMIT 0x0200 // iterator is compared with a constant (found in lpConstLimit)
+#define LPFLG_ARRLEN_LIMIT 0x0400 // iterator is compared with a.len or a[i].len (found in lpArrLenLimit)
+
+#define LPFLG_HAS_PREHEAD 0x0800 // lpHead is known to be a preHead for this loop
+#define LPFLG_REMOVED 0x1000 // has been removed from the loop table (unrolled or optimized away)
+#define LPFLG_DONT_UNROLL 0x2000 // do not unroll this loop
+
+#define LPFLG_ASGVARS_YES 0x4000 // "lpAsgVars" has been computed
+#define LPFLG_ASGVARS_INC 0x8000 // "lpAsgVars" is incomplete -- vars beyond those representable in an AllVarSet
+ // type are assigned to.
+
+ bool lpLoopHasHeapHavoc; // The loop contains an operation that we assume has arbitrary heap side effects.
+ // If this is set, the fields below may not be accurate (since they become irrelevant.)
+ bool lpContainsCall; // True if executing the loop body *may* execute a call
+
+ VARSET_TP lpVarInOut; // The set of variables that are IN or OUT during the execution of this loop
+ VARSET_TP lpVarUseDef; // The set of variables that are USE or DEF during the execution of this loop
+
+ int lpHoistedExprCount; // The register count for the non-FP expressions from inside this loop that have been
+ // hoisted
+ int lpLoopVarCount; // The register count for the non-FP LclVars that are read/written inside this loop
+ int lpVarInOutCount; // The register count for the non-FP LclVars that are alive inside or accross this loop
+
+ int lpHoistedFPExprCount; // The register count for the FP expressions from inside this loop that have been
+ // hoisted
+ int lpLoopVarFPCount; // The register count for the FP LclVars that are read/written inside this loop
+ int lpVarInOutFPCount; // The register count for the FP LclVars that are alive inside or accross this loop
+
+ typedef SimplerHashTable<CORINFO_FIELD_HANDLE,
+ PtrKeyFuncs<struct CORINFO_FIELD_STRUCT_>,
+ bool,
+ JitSimplerHashBehavior>
+ FieldHandleSet;
+ FieldHandleSet* lpFieldsModified; // This has entries (mappings to "true") for all static field and object
+ // instance fields modified
+ // in the loop.
+
+ typedef SimplerHashTable<CORINFO_CLASS_HANDLE,
+ PtrKeyFuncs<struct CORINFO_CLASS_STRUCT_>,
+ bool,
+ JitSimplerHashBehavior>
+ ClassHandleSet;
+ ClassHandleSet* lpArrayElemTypesModified; // Bits set indicate the set of sz array element types such that
+ // arrays of that type are modified
+ // in the loop.
+
+ // Adds the variable liveness information for 'blk' to 'this' LoopDsc
+ void AddVariableLiveness(Compiler* comp, BasicBlock* blk);
+
+ inline void AddModifiedField(Compiler* comp, CORINFO_FIELD_HANDLE fldHnd);
+ // This doesn't *always* take a class handle -- it can also take primitive types, encoded as class handles
+ // (shifted left, with a low-order bit set to distinguish.)
+ // Use the {Encode/Decode}ElemType methods to construct/destruct these.
+ inline void AddModifiedElemType(Compiler* comp, CORINFO_CLASS_HANDLE structHnd);
+
+ /* The following values are set only for iterator loops, i.e. has the flag LPFLG_ITER set */
+
+ GenTreePtr lpIterTree; // The "i <op>= const" tree
+ unsigned lpIterVar(); // iterator variable #
+ int lpIterConst(); // the constant with which the iterator is incremented
+ genTreeOps lpIterOper(); // the type of the operation on the iterator (ASG_ADD, ASG_SUB, etc.)
+ void VERIFY_lpIterTree();
+
+ var_types lpIterOperType(); // For overflow instructions
+
+ union {
+ int lpConstInit; // initial constant value of iterator : Valid if LPFLG_CONST_INIT
+ unsigned lpVarInit; // initial local var number to which we initialize the iterator : Valid if
+ // LPFLG_VAR_INIT
+ };
+
+ /* The following is for LPFLG_ITER loops only (i.e. the loop condition is "i RELOP const or var" */
+
+ GenTreePtr lpTestTree; // pointer to the node containing the loop test
+ genTreeOps lpTestOper(); // the type of the comparison between the iterator and the limit (GT_LE, GT_GE, etc.)
+ void VERIFY_lpTestTree();
+
+ bool lpIsReversed(); // true if the iterator node is the second operand in the loop condition
+ GenTreePtr lpIterator(); // the iterator node in the loop test
+ GenTreePtr lpLimit(); // the limit node in the loop test
+
+ int lpConstLimit(); // limit constant value of iterator - loop condition is "i RELOP const" : Valid if
+ // LPFLG_CONST_LIMIT
+ unsigned lpVarLimit(); // the lclVar # in the loop condition ( "i RELOP lclVar" ) : Valid if
+ // LPFLG_VAR_LIMIT
+ bool lpArrLenLimit(Compiler* comp, ArrIndex* index); // The array length in the loop condition ( "i RELOP
+ // arr.len" or "i RELOP arr[i][j].len" ) : Valid if
+ // LPFLG_ARRLEN_LIMIT
+
+ // Returns "true" iff "*this" contains the blk.
+ bool lpContains(BasicBlock* blk)
+ {
+ return lpFirst->bbNum <= blk->bbNum && blk->bbNum <= lpBottom->bbNum;
+ }
+ // Returns "true" iff "*this" (properly) contains the range [first, bottom] (allowing firsts
+ // to be equal, but requiring bottoms to be different.)
+ bool lpContains(BasicBlock* first, BasicBlock* bottom)
+ {
+ return lpFirst->bbNum <= first->bbNum && bottom->bbNum < lpBottom->bbNum;
+ }
+
+ // Returns "true" iff "*this" (properly) contains "lp2" (allowing firsts to be equal, but requiring
+ // bottoms to be different.)
+ bool lpContains(const LoopDsc& lp2)
+ {
+ return lpContains(lp2.lpFirst, lp2.lpBottom);
+ }
+
+ // Returns "true" iff "*this" is (properly) contained by the range [first, bottom]
+ // (allowing firsts to be equal, but requiring bottoms to be different.)
+ bool lpContainedBy(BasicBlock* first, BasicBlock* bottom)
+ {
+ return first->bbNum <= lpFirst->bbNum && lpBottom->bbNum < bottom->bbNum;
+ }
+
+ // Returns "true" iff "*this" is (properly) contained by "lp2"
+ // (allowing firsts to be equal, but requiring bottoms to be different.)
+ bool lpContainedBy(const LoopDsc& lp2)
+ {
+ return lpContains(lp2.lpFirst, lp2.lpBottom);
+ }
+
+ // Returns "true" iff "*this" is disjoint from the range [top, bottom].
+ bool lpDisjoint(BasicBlock* first, BasicBlock* bottom)
+ {
+ return bottom->bbNum < lpFirst->bbNum || lpBottom->bbNum < first->bbNum;
+ }
+ // Returns "true" iff "*this" is disjoint from "lp2".
+ bool lpDisjoint(const LoopDsc& lp2)
+ {
+ return lpDisjoint(lp2.lpFirst, lp2.lpBottom);
+ }
+ // Returns "true" iff the loop is well-formed (see code for defn).
+ bool lpWellFormed()
+ {
+ return lpFirst->bbNum <= lpTop->bbNum && lpTop->bbNum <= lpEntry->bbNum &&
+ lpEntry->bbNum <= lpBottom->bbNum &&
+ (lpHead->bbNum < lpTop->bbNum || lpHead->bbNum > lpBottom->bbNum);
+ }
+ };
+
+protected:
+ bool fgMightHaveLoop(); // returns true if there are any backedges
+ bool fgHasLoops; // True if this method has any loops, set in fgComputeReachability
+
+public:
+ LoopDsc optLoopTable[MAX_LOOP_NUM]; // loop descriptor table
+ unsigned char optLoopCount; // number of tracked loops
+
+protected:
+ unsigned optCallCount; // number of calls made in the method
+ unsigned optIndirectCallCount; // number of virtual, interface and indirect calls made in the method
+ unsigned optNativeCallCount; // number of Pinvoke/Native calls made in the method
+ unsigned optLoopsCloned; // number of loops cloned in the current method.
+
+#ifdef DEBUG
+ unsigned optFindLoopNumberFromBeginBlock(BasicBlock* begBlk);
+ void optPrintLoopInfo(unsigned loopNum,
+ BasicBlock* lpHead,
+ BasicBlock* lpFirst,
+ BasicBlock* lpTop,
+ BasicBlock* lpEntry,
+ BasicBlock* lpBottom,
+ unsigned char lpExitCnt,
+ BasicBlock* lpExit,
+ unsigned parentLoop = BasicBlock::NOT_IN_LOOP);
+ void optPrintLoopInfo(unsigned lnum);
+ void optPrintLoopRecording(unsigned lnum);
+
+ void optCheckPreds();
+#endif
+
+ void optSetBlockWeights();
+
+ void optMarkLoopBlocks(BasicBlock* begBlk, BasicBlock* endBlk, bool excludeEndBlk);
+
+ void optUnmarkLoopBlocks(BasicBlock* begBlk, BasicBlock* endBlk);
+
+ void optUpdateLoopsBeforeRemoveBlock(BasicBlock* block, bool skipUnmarkLoop = false);
+
+ bool optIsLoopTestEvalIntoTemp(GenTreePtr test, GenTreePtr* newTest);
+ unsigned optIsLoopIncrTree(GenTreePtr incr);
+ bool optCheckIterInLoopTest(unsigned loopInd, GenTreePtr test, BasicBlock* from, BasicBlock* to, unsigned iterVar);
+ bool optComputeIterInfo(GenTreePtr incr, BasicBlock* from, BasicBlock* to, unsigned* pIterVar);
+ bool optPopulateInitInfo(unsigned loopInd, GenTreePtr init, unsigned iterVar);
+ bool optExtractInitTestIncr(BasicBlock* head,
+ BasicBlock* bottom,
+ BasicBlock* exit,
+ GenTreePtr* ppInit,
+ GenTreePtr* ppTest,
+ GenTreePtr* ppIncr);
+
+ void optRecordLoop(BasicBlock* head,
+ BasicBlock* first,
+ BasicBlock* top,
+ BasicBlock* entry,
+ BasicBlock* bottom,
+ BasicBlock* exit,
+ unsigned char exitCnt);
+
+ void optFindNaturalLoops();
+
+ // Ensures that all the loops in the loop nest rooted at "loopInd" (an index into the loop table) are 'canonical' --
+ // each loop has a unique "top." Returns "true" iff the flowgraph has been modified.
+ bool optCanonicalizeLoopNest(unsigned char loopInd);
+
+ // Ensures that the loop "loopInd" (an index into the loop table) is 'canonical' -- it has a unique "top,"
+ // unshared with any other loop. Returns "true" iff the flowgraph has been modified
+ bool optCanonicalizeLoop(unsigned char loopInd);
+
+ // Requires "l1" to be a valid loop table index, and not "BasicBlock::NOT_IN_LOOP". Requires "l2" to be
+ // a valid loop table index, or else "BasicBlock::NOT_IN_LOOP". Returns true
+ // iff "l2" is not NOT_IN_LOOP, and "l1" contains "l2".
+ bool optLoopContains(unsigned l1, unsigned l2);
+
+ // Requires "loopInd" to be a valid index into the loop table.
+ // Updates the loop table by changing loop "loopInd", whose head is required
+ // to be "from", to be "to". Also performs this transformation for any
+ // loop nested in "loopInd" that shares the same head as "loopInd".
+ void optUpdateLoopHead(unsigned loopInd, BasicBlock* from, BasicBlock* to);
+
+ // Updates the successors of "blk": if "blk2" is a successor of "blk", and there is a mapping for "blk2->blk3" in
+ // "redirectMap", change "blk" so that "blk3" is this successor. Note that the predecessor lists are not updated.
+ void optRedirectBlock(BasicBlock* blk, BlockToBlockMap* redirectMap);
+
+ // Marks the containsCall information to "lnum" and any parent loops.
+ void AddContainsCallAllContainingLoops(unsigned lnum);
+ // Adds the variable liveness information from 'blk' to "lnum" and any parent loops.
+ void AddVariableLivenessAllContainingLoops(unsigned lnum, BasicBlock* blk);
+ // Adds "fldHnd" to the set of modified fields of "lnum" and any parent loops.
+ void AddModifiedFieldAllContainingLoops(unsigned lnum, CORINFO_FIELD_HANDLE fldHnd);
+ // Adds "elemType" to the set of modified array element types of "lnum" and any parent loops.
+ void AddModifiedElemTypeAllContainingLoops(unsigned lnum, CORINFO_CLASS_HANDLE elemType);
+
+ // Requires that "from" and "to" have the same "bbJumpKind" (perhaps because "to" is a clone
+ // of "from".) Copies the jump destination from "from" to "to".
+ void optCopyBlkDest(BasicBlock* from, BasicBlock* to);
+
+ // The depth of the loop described by "lnum" (an index into the loop table.) (0 == top level)
+ unsigned optLoopDepth(unsigned lnum)
+ {
+ unsigned par = optLoopTable[lnum].lpParent;
+ if (par == BasicBlock::NOT_IN_LOOP)
+ {
+ return 0;
+ }
+ else
+ {
+ return 1 + optLoopDepth(par);
+ }
+ }
+
+ void fgOptWhileLoop(BasicBlock* block);
+
+ bool optComputeLoopRep(int constInit,
+ int constLimit,
+ int iterInc,
+ genTreeOps iterOper,
+ var_types iterType,
+ genTreeOps testOper,
+ bool unsignedTest,
+ bool dupCond,
+ unsigned* iterCount);
+#if FEATURE_STACK_FP_X87
+
+public:
+ VARSET_TP optAllFloatVars; // mask of all tracked FP variables
+ VARSET_TP optAllFPregVars; // mask of all enregistered FP variables
+ VARSET_TP optAllNonFPvars; // mask of all tracked non-FP variables
+#endif // FEATURE_STACK_FP_X87
+
+private:
+ static fgWalkPreFn optIsVarAssgCB;
+
+protected:
+ bool optIsVarAssigned(BasicBlock* beg, BasicBlock* end, GenTreePtr skip, unsigned var);
+
+ bool optIsVarAssgLoop(unsigned lnum, unsigned var);
+
+ int optIsSetAssgLoop(unsigned lnum, ALLVARSET_VALARG_TP vars, varRefKinds inds = VR_NONE);
+
+ bool optNarrowTree(GenTreePtr tree, var_types srct, var_types dstt, ValueNumPair vnpNarrow, bool doit);
+
+ /**************************************************************************
+ * Optimization conditions
+ *************************************************************************/
+
+ bool optFastCodeOrBlendedLoop(BasicBlock::weight_t bbWeight);
+ bool optPentium4(void);
+ bool optAvoidIncDec(BasicBlock::weight_t bbWeight);
+ bool optAvoidIntMult(void);
+
+#if FEATURE_ANYCSE
+
+protected:
+ // The following is the upper limit on how many expressions we'll keep track
+ // of for the CSE analysis.
+ //
+ static const unsigned MAX_CSE_CNT = EXPSET_SZ;
+
+ static const int MIN_CSE_COST = 2;
+
+ /* Generic list of nodes - used by the CSE logic */
+
+ struct treeLst
+ {
+ treeLst* tlNext;
+ GenTreePtr tlTree;
+ };
+
+ typedef struct treeLst* treeLstPtr;
+
+ struct treeStmtLst
+ {
+ treeStmtLst* tslNext;
+ GenTreePtr tslTree; // tree node
+ GenTreePtr tslStmt; // statement containing the tree
+ BasicBlock* tslBlock; // block containing the statement
+ };
+
+ typedef struct treeStmtLst* treeStmtLstPtr;
+
+ // The following logic keeps track of expressions via a simple hash table.
+
+ struct CSEdsc
+ {
+ CSEdsc* csdNextInBucket; // used by the hash table
+
+ unsigned csdHashValue; // the orginal hashkey
+
+ unsigned csdIndex; // 1..optCSECandidateCount
+ char csdLiveAcrossCall; // 0 or 1
+
+ unsigned short csdDefCount; // definition count
+ unsigned short csdUseCount; // use count (excluding the implicit uses at defs)
+
+ unsigned csdDefWtCnt; // weighted def count
+ unsigned csdUseWtCnt; // weighted use count (excluding the implicit uses at defs)
+
+ GenTreePtr csdTree; // treenode containing the 1st occurance
+ GenTreePtr csdStmt; // stmt containing the 1st occurance
+ BasicBlock* csdBlock; // block containing the 1st occurance
+
+ treeStmtLstPtr csdTreeList; // list of matching tree nodes: head
+ treeStmtLstPtr csdTreeLast; // list of matching tree nodes: tail
+ };
+
+ static const size_t s_optCSEhashSize;
+ CSEdsc** optCSEhash;
+ CSEdsc** optCSEtab;
+
+ void optCSEstop();
+
+ CSEdsc* optCSEfindDsc(unsigned index);
+ void optUnmarkCSE(GenTreePtr tree);
+
+ // user defined callback data for the tree walk function optCSE_MaskHelper()
+ struct optCSE_MaskData
+ {
+ EXPSET_TP CSE_defMask;
+ EXPSET_TP CSE_useMask;
+ };
+
+ // Treewalk helper for optCSE_DefMask and optCSE_UseMask
+ static fgWalkPreFn optCSE_MaskHelper;
+
+ // This function walks all the node for an given tree
+ // and return the mask of CSE definitions and uses for the tree
+ //
+ void optCSE_GetMaskData(GenTreePtr tree, optCSE_MaskData* pMaskData);
+
+ // Given a binary tree node return true if it is safe to swap the order of evaluation for op1 and op2.
+ bool optCSE_canSwap(GenTree* firstNode, GenTree* secondNode);
+ bool optCSE_canSwap(GenTree* tree);
+
+ static fgWalkPostFn optPropagateNonCSE;
+ static fgWalkPreFn optHasNonCSEChild;
+
+ static fgWalkPreFn optUnmarkCSEs;
+
+ static int __cdecl optCSEcostCmpEx(const void* op1, const void* op2);
+ static int __cdecl optCSEcostCmpSz(const void* op1, const void* op2);
+
+ void optCleanupCSEs();
+
+#ifdef DEBUG
+ void optEnsureClearCSEInfo();
+#endif // DEBUG
+
+#endif // FEATURE_ANYCSE
+
+#if FEATURE_VALNUM_CSE
+ /**************************************************************************
+ * Value Number based CSEs
+ *************************************************************************/
+
+public:
+ void optOptimizeValnumCSEs();
+
+protected:
+ void optValnumCSE_Init();
+ unsigned optValnumCSE_Index(GenTreePtr tree, GenTreePtr stmt);
+ unsigned optValnumCSE_Locate();
+ void optValnumCSE_InitDataFlow();
+ void optValnumCSE_DataFlow();
+ void optValnumCSE_Availablity();
+ void optValnumCSE_Heuristic();
+ void optValnumCSE_UnmarkCSEs(GenTreePtr deadTree, GenTreePtr keepList);
+
+#endif // FEATURE_VALNUM_CSE
+
+#if FEATURE_ANYCSE
+ bool optDoCSE; // True when we have found a duplicate CSE tree
+ bool optValnumCSE_phase; // True when we are executing the optValnumCSE_phase
+ unsigned optCSECandidateTotal; // Grand total of CSE candidates for both Lexical and ValNum
+ unsigned optCSECandidateCount; // Count of CSE's candidates, reset for Lexical and ValNum CSE's
+ unsigned optCSEstart; // The first local variable number that is a CSE
+ unsigned optCSEcount; // The total count of CSE's introduced.
+ unsigned optCSEweight; // The weight of the current block when we are
+ // scanning for CSE expressions
+
+ bool optIsCSEcandidate(GenTreePtr tree);
+
+ // lclNumIsTrueCSE returns true if the LclVar was introduced by the CSE phase of the compiler
+ //
+ bool lclNumIsTrueCSE(unsigned lclNum) const
+ {
+ return ((optCSEcount > 0) && (lclNum >= optCSEstart) && (lclNum < optCSEstart + optCSEcount));
+ }
+
+ // lclNumIsCSE returns true if the LclVar should be treated like a CSE with regards to constant prop.
+ //
+ bool lclNumIsCSE(unsigned lclNum) const
+ {
+ return lvaTable[lclNum].lvIsCSE;
+ }
+
+#ifdef DEBUG
+ bool optConfigDisableCSE();
+ bool optConfigDisableCSE2();
+#endif
+ void optOptimizeCSEs();
+
+#endif // FEATURE_ANYCSE
+
+ struct isVarAssgDsc
+ {
+ GenTreePtr ivaSkip;
+#ifdef DEBUG
+ void* ivaSelf;
+#endif
+ unsigned ivaVar; // Variable we are interested in, or -1
+ ALLVARSET_TP ivaMaskVal; // Set of variables assigned to. This is a set of all vars, not tracked vars.
+ bool ivaMaskIncomplete; // Variables not representable in ivaMaskVal were assigned to.
+ varRefKinds ivaMaskInd; // What kind of indirect assignments are there?
+ callInterf ivaMaskCall; // What kind of calls are there?
+ };
+
+ static callInterf optCallInterf(GenTreePtr call);
+
+public:
+ // VN based copy propagation.
+ typedef ArrayStack<GenTreePtr> GenTreePtrStack;
+ typedef SimplerHashTable<unsigned, SmallPrimitiveKeyFuncs<unsigned>, GenTreePtrStack*, JitSimplerHashBehavior>
+ LclNumToGenTreePtrStack;
+
+ // Kill set to track variables with intervening definitions.
+ VARSET_TP optCopyPropKillSet;
+
+ // Copy propagation functions.
+ void optCopyProp(BasicBlock* block, GenTreePtr stmt, GenTreePtr tree, LclNumToGenTreePtrStack* curSsaName);
+ void optBlockCopyPropPopStacks(BasicBlock* block, LclNumToGenTreePtrStack* curSsaName);
+ void optBlockCopyProp(BasicBlock* block, LclNumToGenTreePtrStack* curSsaName);
+ bool optIsSsaLocal(GenTreePtr tree);
+ int optCopyProp_LclVarScore(LclVarDsc* lclVarDsc, LclVarDsc* copyVarDsc, bool preferOp2);
+ void optVnCopyProp();
+
+ /**************************************************************************
+ * Early value propagation
+ *************************************************************************/
+ struct SSAName
+ {
+ unsigned m_lvNum;
+ unsigned m_ssaNum;
+
+ SSAName(unsigned lvNum, unsigned ssaNum) : m_lvNum(lvNum), m_ssaNum(ssaNum)
+ {
+ }
+
+ static unsigned GetHashCode(SSAName ssaNm)
+ {
+ return (ssaNm.m_lvNum << 16) | (ssaNm.m_ssaNum);
+ }
+
+ static bool Equals(SSAName ssaNm1, SSAName ssaNm2)
+ {
+ return (ssaNm1.m_lvNum == ssaNm2.m_lvNum) && (ssaNm1.m_ssaNum == ssaNm2.m_ssaNum);
+ }
+ };
+
+#define OMF_HAS_NEWARRAY 0x00000001 // Method contains 'new' of an array
+#define OMF_HAS_NEWOBJ 0x00000002 // Method contains 'new' of an object type.
+#define OMF_HAS_ARRAYREF 0x00000004 // Method contains array element loads or stores.
+#define OMF_HAS_VTABLEREF 0x00000008 // Method contains method table reference.
+#define OMF_HAS_NULLCHECK 0x00000010 // Method contains null check.
+
+ unsigned optMethodFlags;
+
+ // Recursion bound controls how far we can go backwards tracking for a SSA value.
+ // No throughput diff was found with backward walk bound between 3-8.
+ static const int optEarlyPropRecurBound = 5;
+
+ enum class optPropKind
+ {
+ OPK_INVALID,
+ OPK_ARRAYLEN,
+ OPK_OBJ_GETTYPE,
+ OPK_NULLCHECK
+ };
+
+ bool gtIsVtableRef(GenTreePtr tree);
+ GenTreePtr getArrayLengthFromAllocation(GenTreePtr tree);
+ GenTreePtr getObjectHandleNodeFromAllocation(GenTreePtr tree);
+ GenTreePtr optPropGetValueRec(unsigned lclNum, unsigned ssaNum, optPropKind valueKind, int walkDepth);
+ GenTreePtr optPropGetValue(unsigned lclNum, unsigned ssaNum, optPropKind valueKind);
+ bool optEarlyPropRewriteTree(GenTreePtr tree);
+ bool optDoEarlyPropForBlock(BasicBlock* block);
+ bool optDoEarlyPropForFunc();
+ void optEarlyProp();
+ void optFoldNullCheck(GenTreePtr tree);
+ bool optCanMoveNullCheckPastTree(GenTreePtr tree, bool isInsideTry);
+
+#if ASSERTION_PROP
+ /**************************************************************************
+ * Value/Assertion propagation
+ *************************************************************************/
+public:
+ // Data structures for assertion prop
+ BitVecTraits* apTraits;
+ ASSERT_TP apFull;
+ ASSERT_TP apEmpty;
+
+ enum optAssertionKind
+ {
+ OAK_INVALID,
+ OAK_EQUAL,
+ OAK_NOT_EQUAL,
+ OAK_SUBRANGE,
+ OAK_NO_THROW,
+ OAK_COUNT
+ };
+
+ enum optOp1Kind
+ {
+ O1K_INVALID,
+ O1K_LCLVAR,
+ O1K_ARR_BND,
+ O1K_ARRLEN_OPER_BND,
+ O1K_ARRLEN_LOOP_BND,
+ O1K_CONSTANT_LOOP_BND,
+ O1K_EXACT_TYPE,
+ O1K_SUBTYPE,
+ O1K_VALUE_NUMBER,
+ O1K_COUNT
+ };
+
+ enum optOp2Kind
+ {
+ O2K_INVALID,
+ O2K_LCLVAR_COPY,
+ O2K_IND_CNS_INT,
+ O2K_CONST_INT,
+ O2K_CONST_LONG,
+ O2K_CONST_DOUBLE,
+ O2K_ARR_LEN,
+ O2K_SUBRANGE,
+ O2K_COUNT
+ };
+ struct AssertionDsc
+ {
+ optAssertionKind assertionKind;
+ struct SsaVar
+ {
+ unsigned lclNum; // assigned to or property of this local var number
+ unsigned ssaNum;
+ };
+ struct ArrBnd
+ {
+ ValueNum vnIdx;
+ ValueNum vnLen;
+ };
+ struct AssertionDscOp1
+ {
+ optOp1Kind kind; // a normal LclVar, or Exact-type or Subtype
+ ValueNum vn;
+ union {
+ SsaVar lcl;
+ ArrBnd bnd;
+ };
+ } op1;
+ struct AssertionDscOp2
+ {
+ optOp2Kind kind; // a const or copy assignment
+ ValueNum vn;
+ struct IntVal
+ {
+ ssize_t iconVal; // integer
+ unsigned iconFlags; // gtFlags
+ };
+ struct Range // integer subrange
+ {
+ ssize_t loBound;
+ ssize_t hiBound;
+ };
+ union {
+ SsaVar lcl;
+ IntVal u1;
+ __int64 lconVal;
+ double dconVal;
+ Range u2;
+ };
+ } op2;
+
+ bool IsArrLenArithBound()
+ {
+ return ((assertionKind == OAK_EQUAL || assertionKind == OAK_NOT_EQUAL) && op1.kind == O1K_ARRLEN_OPER_BND);
+ }
+ bool IsArrLenBound()
+ {
+ return ((assertionKind == OAK_EQUAL || assertionKind == OAK_NOT_EQUAL) && op1.kind == O1K_ARRLEN_LOOP_BND);
+ }
+ bool IsConstantBound()
+ {
+ return ((assertionKind == OAK_EQUAL || assertionKind == OAK_NOT_EQUAL) &&
+ op1.kind == O1K_CONSTANT_LOOP_BND);
+ }
+ bool IsBoundsCheckNoThrow()
+ {
+ return ((assertionKind == OAK_NO_THROW) && (op1.kind == O1K_ARR_BND));
+ }
+
+ bool IsCopyAssertion()
+ {
+ return ((assertionKind == OAK_EQUAL) && (op1.kind == O1K_LCLVAR) && (op2.kind == O2K_LCLVAR_COPY));
+ }
+
+ static bool SameKind(AssertionDsc* a1, AssertionDsc* a2)
+ {
+ return a1->assertionKind == a2->assertionKind && a1->op1.kind == a2->op1.kind &&
+ a1->op2.kind == a2->op2.kind;
+ }
+
+ static bool ComplementaryKind(optAssertionKind kind, optAssertionKind kind2)
+ {
+ if (kind == OAK_EQUAL)
+ {
+ return kind2 == OAK_NOT_EQUAL;
+ }
+ else if (kind == OAK_NOT_EQUAL)
+ {
+ return kind2 == OAK_EQUAL;
+ }
+ return false;
+ }
+
+ static ssize_t GetLowerBoundForIntegralType(var_types type)
+ {
+ switch (type)
+ {
+ case TYP_BYTE:
+ return SCHAR_MIN;
+ case TYP_SHORT:
+ return SHRT_MIN;
+ case TYP_INT:
+ return INT_MIN;
+ case TYP_BOOL:
+ case TYP_UBYTE:
+ case TYP_CHAR:
+ case TYP_USHORT:
+ case TYP_UINT:
+ return 0;
+ default:
+ unreached();
+ }
+ }
+ static ssize_t GetUpperBoundForIntegralType(var_types type)
+ {
+ switch (type)
+ {
+ case TYP_BOOL:
+ return 1;
+ case TYP_BYTE:
+ return SCHAR_MAX;
+ case TYP_SHORT:
+ return SHRT_MAX;
+ case TYP_INT:
+ return INT_MAX;
+ case TYP_UBYTE:
+ return UCHAR_MAX;
+ case TYP_CHAR:
+ case TYP_USHORT:
+ return USHRT_MAX;
+ case TYP_UINT:
+ return UINT_MAX;
+ default:
+ unreached();
+ }
+ }
+
+ bool HasSameOp1(AssertionDsc* that, bool vnBased)
+ {
+ return (op1.kind == that->op1.kind) &&
+ ((vnBased && (op1.vn == that->op1.vn)) || (!vnBased && (op1.lcl.lclNum == that->op1.lcl.lclNum)));
+ }
+
+ bool HasSameOp2(AssertionDsc* that, bool vnBased)
+ {
+ if (op2.kind != that->op2.kind)
+ {
+ return false;
+ }
+ switch (op2.kind)
+ {
+ case O2K_IND_CNS_INT:
+ case O2K_CONST_INT:
+ return ((op2.u1.iconVal == that->op2.u1.iconVal) && (op2.u1.iconFlags == that->op2.u1.iconFlags));
+
+ case O2K_CONST_LONG:
+ return (op2.lconVal == that->op2.lconVal);
+
+ case O2K_CONST_DOUBLE:
+ // exact match because of positive and negative zero.
+ return (memcmp(&op2.dconVal, &that->op2.dconVal, sizeof(double)) == 0);
+
+ case O2K_LCLVAR_COPY:
+ case O2K_ARR_LEN:
+ return (op2.lcl.lclNum == that->op2.lcl.lclNum) &&
+ (!vnBased || op2.lcl.ssaNum == that->op2.lcl.ssaNum);
+
+ case O2K_SUBRANGE:
+ return ((op2.u2.loBound == that->op2.u2.loBound) && (op2.u2.hiBound == that->op2.u2.hiBound));
+
+ case O2K_INVALID:
+ // we will return false
+ break;
+
+ default:
+ assert(!"Unexpected value for op2.kind in AssertionDsc.");
+ break;
+ }
+ return false;
+ }
+
+ bool Complementary(AssertionDsc* that, bool vnBased)
+ {
+ return ComplementaryKind(assertionKind, that->assertionKind) && HasSameOp1(that, vnBased) &&
+ HasSameOp2(that, vnBased);
+ }
+
+ bool Equals(AssertionDsc* that, bool vnBased)
+ {
+ return (assertionKind == that->assertionKind) && HasSameOp1(that, vnBased) && HasSameOp2(that, vnBased);
+ }
+ };
+
+ typedef unsigned short AssertionIndex;
+
+protected:
+ static fgWalkPreFn optAddCopiesCallback;
+ static fgWalkPreFn optVNAssertionPropCurStmtVisitor;
+ unsigned optAddCopyLclNum;
+ GenTreePtr optAddCopyAsgnNode;
+
+ bool optLocalAssertionProp; // indicates that we are performing local assertion prop
+ bool optAssertionPropagated; // set to true if we modified the trees
+ bool optAssertionPropagatedCurrentStmt;
+#ifdef DEBUG
+ GenTreePtr optAssertionPropCurrentTree;
+#endif
+ AssertionIndex* optComplementaryAssertionMap;
+ ExpandArray<ASSERT_TP>* optAssertionDep; // table that holds dependent assertions (assertions
+ // using the value of a local var) for each local var
+ AssertionDsc* optAssertionTabPrivate; // table that holds info about value assignments
+ AssertionIndex optAssertionCount; // total number of assertions in the assertion table
+ AssertionIndex optMaxAssertionCount;
+
+public:
+ void optVnNonNullPropCurStmt(BasicBlock* block, GenTreePtr stmt, GenTreePtr tree);
+ fgWalkResult optVNConstantPropCurStmt(BasicBlock* block, GenTreePtr stmt, GenTreePtr tree);
+ GenTreePtr optVNConstantPropOnRelOp(GenTreePtr tree);
+ GenTreePtr optVNConstantPropOnJTrue(BasicBlock* block, GenTreePtr stmt, GenTreePtr test);
+ GenTreePtr optVNConstantPropOnTree(BasicBlock* block, GenTreePtr stmt, GenTreePtr tree);
+ GenTreePtr optPrepareTreeForReplacement(GenTreePtr extractTree, GenTreePtr replaceTree);
+
+ AssertionIndex GetAssertionCount()
+ {
+ return optAssertionCount;
+ }
+ ASSERT_TP* bbJtrueAssertionOut;
+ typedef SimplerHashTable<ValueNum, SmallPrimitiveKeyFuncs<ValueNum>, ASSERT_TP, JitSimplerHashBehavior>
+ ValueNumToAssertsMap;
+ ValueNumToAssertsMap* optValueNumToAsserts;
+
+ static const AssertionIndex NO_ASSERTION_INDEX = 0;
+
+ // Assertion prop helpers.
+ ASSERT_TP& GetAssertionDep(unsigned lclNum);
+ AssertionDsc* optGetAssertion(AssertionIndex assertIndex);
+ void optAssertionInit(bool isLocalProp);
+ void optAssertionTraitsInit(AssertionIndex assertionCount);
+#if LOCAL_ASSERTION_PROP
+ void optAssertionReset(AssertionIndex limit);
+ void optAssertionRemove(AssertionIndex index);
+#endif
+
+ // Assertion prop data flow functions.
+ void optAssertionPropMain();
+ GenTreePtr optVNAssertionPropCurStmt(BasicBlock* block, GenTreePtr stmt);
+ bool optIsTreeKnownIntValue(bool vnBased, GenTreePtr tree, ssize_t* pConstant, unsigned* pIconFlags);
+ ASSERT_TP* optInitAssertionDataflowFlags();
+ ASSERT_TP* optComputeAssertionGen();
+
+ // Assertion Gen functions.
+ void optAssertionGen(GenTreePtr tree);
+ AssertionIndex optAssertionGenPhiDefn(GenTreePtr tree);
+ AssertionIndex optCreateJTrueBoundsAssertion(GenTreePtr tree);
+ AssertionIndex optAssertionGenJtrue(GenTreePtr tree);
+ AssertionIndex optCreateJtrueAssertions(GenTreePtr op1, GenTreePtr op2, Compiler::optAssertionKind assertionKind);
+ AssertionIndex optFindComplementary(AssertionIndex assertionIndex);
+ void optMapComplementary(AssertionIndex assertionIndex, AssertionIndex index);
+
+ // Assertion creation functions.
+ AssertionIndex optCreateAssertion(GenTreePtr op1, GenTreePtr op2, optAssertionKind assertionKind);
+ AssertionIndex optCreateAssertion(GenTreePtr op1,
+ GenTreePtr op2,
+ optAssertionKind assertionKind,
+ AssertionDsc* assertion);
+ void optCreateComplementaryAssertion(AssertionIndex assertionIndex, GenTreePtr op1, GenTreePtr op2);
+
+ bool optAssertionVnInvolvesNan(AssertionDsc* assertion);
+ AssertionIndex optAddAssertion(AssertionDsc* assertion);
+ void optAddVnAssertionMapping(ValueNum vn, AssertionIndex index);
+#ifdef DEBUG
+ void optPrintVnAssertionMapping();
+#endif
+ ASSERT_TP optGetVnMappedAssertions(ValueNum vn);
+
+ // Used for respective assertion propagations.
+ AssertionIndex optAssertionIsSubrange(GenTreePtr tree, var_types toType, ASSERT_VALARG_TP assertions);
+ AssertionIndex optAssertionIsSubtype(GenTreePtr tree, GenTreePtr methodTableArg, ASSERT_VALARG_TP assertions);
+ AssertionIndex optAssertionIsNonNullInternal(GenTreePtr op, ASSERT_VALARG_TP assertions);
+ bool optAssertionIsNonNull(GenTreePtr op,
+ ASSERT_VALARG_TP assertions DEBUGARG(bool* pVnBased) DEBUGARG(AssertionIndex* pIndex));
+
+ // Used for Relop propagation.
+ AssertionIndex optGlobalAssertionIsEqualOrNotEqual(ASSERT_VALARG_TP assertions, GenTreePtr op1, GenTreePtr op2);
+ AssertionIndex optLocalAssertionIsEqualOrNotEqual(
+ optOp1Kind op1Kind, unsigned lclNum, optOp2Kind op2Kind, ssize_t cnsVal, ASSERT_VALARG_TP assertions);
+
+ // Assertion prop for lcl var functions.
+ bool optAssertionProp_LclVarTypeCheck(GenTreePtr tree, LclVarDsc* lclVarDsc, LclVarDsc* copyVarDsc);
+ GenTreePtr optCopyAssertionProp(AssertionDsc* curAssertion,
+ GenTreePtr tree,
+ GenTreePtr stmt DEBUGARG(AssertionIndex index));
+ GenTreePtr optConstantAssertionProp(AssertionDsc* curAssertion,
+ const GenTreePtr tree,
+ const GenTreePtr stmt DEBUGARG(AssertionIndex index));
+ GenTreePtr optVnConstantAssertionProp(const GenTreePtr tree, const GenTreePtr stmt);
+
+ // Assertion propagation functions.
+ GenTreePtr optAssertionProp(ASSERT_VALARG_TP assertions, const GenTreePtr tree, const GenTreePtr stmt);
+ GenTreePtr optAssertionProp_LclVar(ASSERT_VALARG_TP assertions, const GenTreePtr tree, const GenTreePtr stmt);
+ GenTreePtr optAssertionProp_Ind(ASSERT_VALARG_TP assertions, const GenTreePtr tree, const GenTreePtr stmt);
+ GenTreePtr optAssertionProp_Cast(ASSERT_VALARG_TP assertions, const GenTreePtr tree, const GenTreePtr stmt);
+ GenTreePtr optAssertionProp_Call(ASSERT_VALARG_TP assertions, const GenTreePtr tree, const GenTreePtr stmt);
+ GenTreePtr optAssertionProp_RelOp(ASSERT_VALARG_TP assertions, const GenTreePtr tree, const GenTreePtr stmt);
+ GenTreePtr optAssertionProp_Comma(ASSERT_VALARG_TP assertions, const GenTreePtr tree, const GenTreePtr stmt);
+ GenTreePtr optAssertionProp_BndsChk(ASSERT_VALARG_TP assertions, const GenTreePtr tree, const GenTreePtr stmt);
+ GenTreePtr optAssertionPropGlobal_RelOp(ASSERT_VALARG_TP assertions, const GenTreePtr tree, const GenTreePtr stmt);
+ GenTreePtr optAssertionPropLocal_RelOp(ASSERT_VALARG_TP assertions, const GenTreePtr tree, const GenTreePtr stmt);
+ GenTreePtr optAssertionProp_Update(const GenTreePtr newTree, const GenTreePtr tree, const GenTreePtr stmt);
+ GenTreePtr optNonNullAssertionProp_Call(ASSERT_VALARG_TP assertions, const GenTreePtr tree, const GenTreePtr stmt);
+
+ // Implied assertion functions.
+ void optImpliedAssertions(AssertionIndex assertionIndex, ASSERT_TP& activeAssertions);
+ void optImpliedByTypeOfAssertions(ASSERT_TP& activeAssertions);
+ void optImpliedByCopyAssertion(AssertionDsc* copyAssertion, AssertionDsc* depAssertion, ASSERT_TP& result);
+ void optImpliedByConstAssertion(AssertionDsc* curAssertion, ASSERT_TP& result);
+
+ ASSERT_VALRET_TP optNewFullAssertSet();
+ ASSERT_VALRET_TP optNewEmptyAssertSet();
+
+#ifdef DEBUG
+ void optPrintAssertion(AssertionDsc* newAssertion, AssertionIndex assertionIndex = 0);
+ void optDebugCheckAssertion(AssertionDsc* assertion);
+ void optDebugCheckAssertions(AssertionIndex AssertionIndex);
+#endif
+ void optAddCopies();
+#endif // ASSERTION_PROP
+
+ /**************************************************************************
+ * Range checks
+ *************************************************************************/
+
+public:
+ struct LoopCloneVisitorInfo
+ {
+ LoopCloneContext* context;
+ unsigned loopNum;
+ GenTreePtr stmt;
+ LoopCloneVisitorInfo(LoopCloneContext* context, unsigned loopNum, GenTreePtr stmt)
+ : context(context), loopNum(loopNum), stmt(nullptr)
+ {
+ }
+ };
+
+ bool optIsStackLocalInvariant(unsigned loopNum, unsigned lclNum);
+ bool optExtractArrIndex(GenTreePtr tree, ArrIndex* result, unsigned lhsNum);
+ bool optReconstructArrIndex(GenTreePtr tree, ArrIndex* result, unsigned lhsNum);
+ bool optIdentifyLoopOptInfo(unsigned loopNum, LoopCloneContext* context);
+ static fgWalkPreFn optCanOptimizeByLoopCloningVisitor;
+ fgWalkResult optCanOptimizeByLoopCloning(GenTreePtr tree, LoopCloneVisitorInfo* info);
+ void optObtainLoopCloningOpts(LoopCloneContext* context);
+ bool optIsLoopClonable(unsigned loopInd);
+
+ bool optCanCloneLoops();
+
+#ifdef DEBUG
+ void optDebugLogLoopCloning(BasicBlock* block, GenTreePtr insertBefore);
+#endif
+ void optPerformStaticOptimizations(unsigned loopNum, LoopCloneContext* context DEBUGARG(bool fastPath));
+ bool optComputeDerefConditions(unsigned loopNum, LoopCloneContext* context);
+ bool optDeriveLoopCloningConditions(unsigned loopNum, LoopCloneContext* context);
+ BasicBlock* optInsertLoopChoiceConditions(LoopCloneContext* context,
+ unsigned loopNum,
+ BasicBlock* head,
+ BasicBlock* slow);
+ void optInsertLoopCloningStress(BasicBlock* head);
+
+#if COUNT_RANGECHECKS
+ static unsigned optRangeChkRmv;
+ static unsigned optRangeChkAll;
+#endif
+
+protected:
+ struct arraySizes
+ {
+ unsigned arrayVar;
+ int arrayDim;
+
+#define MAX_ARRAYS 4 // a magic max number of arrays tracked for bounds check elimination
+ };
+
+ struct RngChkDsc
+ {
+ RngChkDsc* rcdNextInBucket; // used by the hash table
+
+ unsigned short rcdHashValue; // to make matching faster
+ unsigned short rcdIndex; // 0..optRngChkCount-1
+
+ GenTreePtr rcdTree; // the array index tree
+ };
+
+ unsigned optRngChkCount;
+ static const size_t optRngChkHashSize;
+
+ ssize_t optGetArrayRefScaleAndIndex(GenTreePtr mul, GenTreePtr* pIndex DEBUGARG(bool bRngChk));
+ GenTreePtr optFindLocalInit(BasicBlock* block, GenTreePtr local, VARSET_TP* pKilledInOut, bool* isKilledAfterInit);
+
+#if FANCY_ARRAY_OPT
+ bool optIsNoMore(GenTreePtr op1, GenTreePtr op2, int add1 = 0, int add2 = 0);
+#endif
+
+ bool optReachWithoutCall(BasicBlock* srcBB, BasicBlock* dstBB);
+
+protected:
+ bool optLoopsMarked;
+
+ /*
+ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+ XX XX
+ XX RegAlloc XX
+ XX XX
+ XX Does the register allocation and puts the remaining lclVars on the stack XX
+ XX XX
+ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+ */
+
+public:
+#ifndef LEGACY_BACKEND
+ bool doLSRA() const
+ {
+ return true;
+ }
+#else // LEGACY_BACKEND
+ bool doLSRA() const
+ {
+ return false;
+ }
+#endif // LEGACY_BACKEND
+
+#ifdef LEGACY_BACKEND
+ void raInit();
+ void raAssignVars(); // register allocation
+#endif // LEGACY_BACKEND
+
+ VARSET_TP raRegVarsMask; // Set of all enregistered variables (not including FEATURE_STACK_FP_X87 enregistered
+ // variables)
+ regNumber raUpdateRegStateForArg(RegState* regState, LclVarDsc* argDsc);
+
+ void raMarkStkVars();
+
+protected:
+ // Some things are used by both LSRA and regpredict allocators.
+
+ FrameType rpFrameType;
+ bool rpMustCreateEBPCalled; // Set to true after we have called rpMustCreateEBPFrame once
+
+#ifdef LEGACY_BACKEND
+ regMaskTP rpMaskPInvokeEpilogIntf; // pinvoke epilog trashes esi/edi holding stack args needed to setup tail call's
+ // args
+#endif // LEGACY_BACKEND
+
+ bool rpMustCreateEBPFrame(INDEBUG(const char** wbReason));
+
+#if FEATURE_FP_REGALLOC
+ enum enumConfigRegisterFP
+ {
+ CONFIG_REGISTER_FP_NONE = 0x0,
+ CONFIG_REGISTER_FP_CALLEE_TRASH = 0x1,
+ CONFIG_REGISTER_FP_CALLEE_SAVED = 0x2,
+ CONFIG_REGISTER_FP_FULL = 0x3,
+ };
+ enumConfigRegisterFP raConfigRegisterFP();
+#endif // FEATURE_FP_REGALLOC
+
+public:
+ regMaskTP raConfigRestrictMaskFP();
+
+private:
+#ifndef LEGACY_BACKEND
+ LinearScanInterface* m_pLinearScan; // Linear Scan allocator
+#else // LEGACY_BACKEND
+ unsigned raAvoidArgRegMask; // Mask of incoming argument registers that we may need to avoid
+ VARSET_TP raLclRegIntf[REG_COUNT]; // variable to register interference graph
+ bool raNewBlocks; // True is we added killing blocks for FPU registers
+ unsigned rpPasses; // Number of passes made by the register predicter
+ unsigned rpPassesMax; // Maximum number of passes made by the register predicter
+ unsigned rpPassesPessimize; // Number of passes non-pessimizing made by the register predicter
+ unsigned rpStkPredict; // Weighted count of variables were predicted STK (lower means register allocation is better)
+ unsigned rpPredictSpillCnt; // Predicted number of integer spill tmps for the current tree
+ regMaskTP rpPredictAssignMask; // Mask of registers to consider in rpPredictAssignRegVars()
+ VARSET_TP rpLastUseVars; // Set of last use variables in rpPredictTreeRegUse
+ VARSET_TP rpUseInPlace; // Set of variables that we used in place
+ int rpAsgVarNum; // VarNum for the target of GT_ASG node
+ bool rpPredictAssignAgain; // Must rerun the rpPredictAssignRegVars()
+ bool rpAddedVarIntf; // Set to true if we need to add a new var intf
+ bool rpLostEnreg; // Set to true if we lost an enregister var that had lvDependReg set
+ bool rpReverseEBPenreg; // Decided to reverse the enregistration of EBP
+public:
+ bool rpRegAllocDone; // Set to true after we have completed register allocation
+private:
+ regMaskTP rpPredictMap[PREDICT_COUNT]; // Holds the regMaskTP for each of the enum values
+
+ void raSetupArgMasks(RegState* r);
+
+ const regNumber* raGetRegVarOrder(var_types regType, unsigned* wbVarOrderSize);
+#ifdef DEBUG
+ void raDumpVarIntf(); // Dump the variable to variable interference graph
+ void raDumpRegIntf(); // Dump the variable to register interference graph
+#endif
+ void raAdjustVarIntf();
+
+ regMaskTP rpPredictRegMask(rpPredictReg predictReg, var_types type);
+
+ bool rpRecordRegIntf(regMaskTP regMask, VARSET_VALARG_TP life DEBUGARG(const char* msg));
+
+ bool rpRecordVarIntf(unsigned varNum, VARSET_VALARG_TP intfVar DEBUGARG(const char* msg));
+ regMaskTP rpPredictRegPick(var_types type, rpPredictReg predictReg, regMaskTP lockedRegs);
+
+ regMaskTP rpPredictGrabReg(var_types type, rpPredictReg predictReg, regMaskTP lockedRegs);
+
+ static fgWalkPreFn rpMarkRegIntf;
+
+ regMaskTP rpPredictAddressMode(
+ GenTreePtr tree, var_types type, regMaskTP lockedRegs, regMaskTP rsvdRegs, GenTreePtr lenCSE);
+
+ void rpPredictRefAssign(unsigned lclNum);
+
+ regMaskTP rpPredictBlkAsgRegUse(GenTreePtr tree, rpPredictReg predictReg, regMaskTP lockedRegs, regMaskTP rsvdRegs);
+
+ regMaskTP rpPredictTreeRegUse(GenTreePtr tree, rpPredictReg predictReg, regMaskTP lockedRegs, regMaskTP rsvdRegs);
+
+ regMaskTP rpPredictAssignRegVars(regMaskTP regAvail);
+
+ void rpPredictRegUse(); // Entry point
+
+ unsigned raPredictTreeRegUse(GenTreePtr tree);
+ unsigned raPredictListRegUse(GenTreePtr list);
+
+ void raSetRegVarOrder(var_types regType,
+ regNumber* customVarOrder,
+ unsigned* customVarOrderSize,
+ regMaskTP prefReg,
+ regMaskTP avoidReg);
+
+ // We use (unsigned)-1 as an uninitialized sentinel for rpStkPredict and
+ // also as the maximum value of lvRefCntWtd. Don't allow overflow, and
+ // saturate at UINT_MAX - 1, to avoid using the sentinel.
+ void raAddToStkPredict(unsigned val)
+ {
+ unsigned newStkPredict = rpStkPredict + val;
+ if ((newStkPredict < rpStkPredict) || (newStkPredict == UINT_MAX))
+ rpStkPredict = UINT_MAX - 1;
+ else
+ rpStkPredict = newStkPredict;
+ }
+
+#ifdef DEBUG
+#if !FEATURE_FP_REGALLOC
+ void raDispFPlifeInfo();
+#endif
+#endif
+
+ regMaskTP genReturnRegForTree(GenTreePtr tree);
+#endif // LEGACY_BACKEND
+
+ /* raIsVarargsStackArg is called by raMaskStkVars and by
+ lvaSortByRefCount. It identifies the special case
+ where a varargs function has a parameter passed on the
+ stack, other than the special varargs handle. Such parameters
+ require special treatment, because they cannot be tracked
+ by the GC (their offsets in the stack are not known
+ at compile time).
+ */
+
+ bool raIsVarargsStackArg(unsigned lclNum)
+ {
+#ifdef _TARGET_X86_
+
+ LclVarDsc* varDsc = &lvaTable[lclNum];
+
+ assert(varDsc->lvIsParam);
+
+ return (info.compIsVarArgs && !varDsc->lvIsRegArg && (lclNum != lvaVarargsHandleArg));
+
+#else // _TARGET_X86_
+
+ return false;
+
+#endif // _TARGET_X86_
+ }
+
+#ifdef LEGACY_BACKEND
+ // Records the current prediction, if it's better than any previous recorded prediction.
+ void rpRecordPrediction();
+ // Applies the best recorded prediction, if one exists and is better than the current prediction.
+ void rpUseRecordedPredictionIfBetter();
+
+ // Data members used in the methods above.
+ unsigned rpBestRecordedStkPredict;
+ struct VarRegPrediction
+ {
+ bool m_isEnregistered;
+ regNumberSmall m_regNum;
+ regNumberSmall m_otherReg;
+ };
+ VarRegPrediction* rpBestRecordedPrediction;
+#endif // LEGACY_BACKEND
+
+ /*
+ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+ XX XX
+ XX EEInterface XX
+ XX XX
+ XX Get to the class and method info from the Execution Engine given XX
+ XX tokens for the class and method XX
+ XX XX
+ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+ */
+
+public:
+ /* These are the different addressing modes used to access a local var.
+ * The JIT has to report the location of the locals back to the EE
+ * for debugging purposes.
+ */
+
+ enum siVarLocType
+ {
+ VLT_REG,
+ VLT_REG_BYREF, // this type is currently only used for value types on X64
+ VLT_REG_FP,
+ VLT_STK,
+ VLT_STK_BYREF, // this type is currently only used for value types on X64
+ VLT_REG_REG,
+ VLT_REG_STK,
+ VLT_STK_REG,
+ VLT_STK2,
+ VLT_FPSTK,
+ VLT_FIXED_VA,
+
+ VLT_COUNT,
+ VLT_INVALID
+ };
+
+ struct siVarLoc
+ {
+ siVarLocType vlType;
+
+ union {
+ // VLT_REG/VLT_REG_FP -- Any pointer-sized enregistered value (TYP_INT, TYP_REF, etc)
+ // eg. EAX
+ // VLT_REG_BYREF -- the specified register contains the address of the variable
+ // eg. [EAX]
+
+ struct
+ {
+ regNumber vlrReg;
+ } vlReg;
+
+ // VLT_STK -- Any 32 bit value which is on the stack
+ // eg. [ESP+0x20], or [EBP-0x28]
+ // VLT_STK_BYREF -- the specified stack location contains the address of the variable
+ // eg. mov EAX, [ESP+0x20]; [EAX]
+
+ struct
+ {
+ regNumber vlsBaseReg;
+ NATIVE_OFFSET vlsOffset;
+ } vlStk;
+
+ // VLT_REG_REG -- TYP_LONG/TYP_DOUBLE with both DWords enregistered
+ // eg. RBM_EAXEDX
+
+ struct
+ {
+ regNumber vlrrReg1;
+ regNumber vlrrReg2;
+ } vlRegReg;
+
+ // VLT_REG_STK -- Partly enregistered TYP_LONG/TYP_DOUBLE
+ // eg { LowerDWord=EAX UpperDWord=[ESP+0x8] }
+
+ struct
+ {
+ regNumber vlrsReg;
+
+ struct
+ {
+ regNumber vlrssBaseReg;
+ NATIVE_OFFSET vlrssOffset;
+ } vlrsStk;
+ } vlRegStk;
+
+ // VLT_STK_REG -- Partly enregistered TYP_LONG/TYP_DOUBLE
+ // eg { LowerDWord=[ESP+0x8] UpperDWord=EAX }
+
+ struct
+ {
+ struct
+ {
+ regNumber vlsrsBaseReg;
+ NATIVE_OFFSET vlsrsOffset;
+ } vlsrStk;
+
+ regNumber vlsrReg;
+ } vlStkReg;
+
+ // VLT_STK2 -- Any 64 bit value which is on the stack, in 2 successsive DWords
+ // eg 2 DWords at [ESP+0x10]
+
+ struct
+ {
+ regNumber vls2BaseReg;
+ NATIVE_OFFSET vls2Offset;
+ } vlStk2;
+
+ // VLT_FPSTK -- enregisterd TYP_DOUBLE (on the FP stack)
+ // eg. ST(3). Actually it is ST("FPstkHeight - vpFpStk")
+
+ struct
+ {
+ unsigned vlfReg;
+ } vlFPstk;
+
+ // VLT_FIXED_VA -- fixed argument of a varargs function.
+ // The argument location depends on the size of the variable
+ // arguments (...). Inspecting the VARARGS_HANDLE indicates the
+ // location of the first arg. This argument can then be accessed
+ // relative to the position of the first arg
+
+ struct
+ {
+ unsigned vlfvOffset;
+ } vlFixedVarArg;
+
+ // VLT_MEMORY
+
+ struct
+ {
+ void* rpValue; // pointer to the in-process
+ // location of the value.
+ } vlMemory;
+ };
+
+ // Helper functions
+
+ bool vlIsInReg(regNumber reg);
+ bool vlIsOnStk(regNumber reg, signed offset);
+ };
+
+ /*************************************************************************/
+
+public:
+ // Get handles
+
+ void eeGetCallInfo(CORINFO_RESOLVED_TOKEN* pResolvedToken,
+ CORINFO_RESOLVED_TOKEN* pConstrainedToken,
+ CORINFO_CALLINFO_FLAGS flags,
+ CORINFO_CALL_INFO* pResult);
+ inline CORINFO_CALLINFO_FLAGS addVerifyFlag(CORINFO_CALLINFO_FLAGS flags);
+
+ void eeGetFieldInfo(CORINFO_RESOLVED_TOKEN* pResolvedToken,
+ CORINFO_ACCESS_FLAGS flags,
+ CORINFO_FIELD_INFO* pResult);
+
+ // Get the flags
+
+ BOOL eeIsValueClass(CORINFO_CLASS_HANDLE clsHnd);
+
+#if defined(DEBUG) || defined(FEATURE_JIT_METHOD_PERF) || defined(FEATURE_SIMD)
+
+ bool IsSuperPMIException(unsigned code)
+ {
+ // Copied from NDP\clr\src\ToolBox\SuperPMI\SuperPMI-Shared\ErrorHandling.h
+
+ const unsigned EXCEPTIONCODE_DebugBreakorAV = 0xe0421000;
+ const unsigned EXCEPTIONCODE_MC = 0xe0422000;
+ const unsigned EXCEPTIONCODE_LWM = 0xe0423000;
+ const unsigned EXCEPTIONCODE_SASM = 0xe0424000;
+ const unsigned EXCEPTIONCODE_SSYM = 0xe0425000;
+ const unsigned EXCEPTIONCODE_CALLUTILS = 0xe0426000;
+ const unsigned EXCEPTIONCODE_TYPEUTILS = 0xe0427000;
+ const unsigned EXCEPTIONCODE_ASSERT = 0xe0440000;
+
+ switch (code)
+ {
+ case EXCEPTIONCODE_DebugBreakorAV:
+ case EXCEPTIONCODE_MC:
+ case EXCEPTIONCODE_LWM:
+ case EXCEPTIONCODE_SASM:
+ case EXCEPTIONCODE_SSYM:
+ case EXCEPTIONCODE_CALLUTILS:
+ case EXCEPTIONCODE_TYPEUTILS:
+ case EXCEPTIONCODE_ASSERT:
+ return true;
+ default:
+ return false;
+ }
+ }
+
+ const char* eeGetMethodName(CORINFO_METHOD_HANDLE hnd, const char** className);
+ const char* eeGetMethodFullName(CORINFO_METHOD_HANDLE hnd);
+
+ bool eeIsNativeMethod(CORINFO_METHOD_HANDLE method);
+ CORINFO_METHOD_HANDLE eeGetMethodHandleForNative(CORINFO_METHOD_HANDLE method);
+#endif
+
+ var_types eeGetArgType(CORINFO_ARG_LIST_HANDLE list, CORINFO_SIG_INFO* sig);
+ var_types eeGetArgType(CORINFO_ARG_LIST_HANDLE list, CORINFO_SIG_INFO* sig, bool* isPinned);
+ unsigned eeGetArgSize(CORINFO_ARG_LIST_HANDLE list, CORINFO_SIG_INFO* sig);
+
+ // VOM info, method sigs
+
+ void eeGetSig(unsigned sigTok,
+ CORINFO_MODULE_HANDLE scope,
+ CORINFO_CONTEXT_HANDLE context,
+ CORINFO_SIG_INFO* retSig);
+
+ void eeGetCallSiteSig(unsigned sigTok,
+ CORINFO_MODULE_HANDLE scope,
+ CORINFO_CONTEXT_HANDLE context,
+ CORINFO_SIG_INFO* retSig);
+
+ void eeGetMethodSig(CORINFO_METHOD_HANDLE methHnd, CORINFO_SIG_INFO* retSig, CORINFO_CLASS_HANDLE owner = nullptr);
+
+ // Method entry-points, instrs
+
+ void* eeGetFieldAddress(CORINFO_FIELD_HANDLE handle, void*** ppIndir);
+
+ CORINFO_METHOD_HANDLE eeMarkNativeTarget(CORINFO_METHOD_HANDLE method);
+
+ CORINFO_EE_INFO eeInfo;
+ bool eeInfoInitialized;
+
+ CORINFO_EE_INFO* eeGetEEInfo();
+
+ // Gets the offset of a SDArray's first element
+ unsigned eeGetArrayDataOffset(var_types type);
+ // Gets the offset of a MDArray's first element
+ unsigned eeGetMDArrayDataOffset(var_types type, unsigned rank);
+
+ GenTreePtr eeGetPInvokeCookie(CORINFO_SIG_INFO* szMetaSig);
+
+ // Returns the page size for the target machine as reported by the EE.
+ inline size_t eeGetPageSize()
+ {
+#if COR_JIT_EE_VERSION > 460
+ return eeGetEEInfo()->osPageSize;
+#else // COR_JIT_EE_VERSION <= 460
+ return CORINFO_PAGE_SIZE;
+#endif // COR_JIT_EE_VERSION > 460
+ }
+
+ // Returns the frame size at which we will generate a loop to probe the stack.
+ inline size_t getVeryLargeFrameSize()
+ {
+#ifdef _TARGET_ARM_
+ // The looping probe code is 40 bytes, whereas the straight-line probing for
+ // the (0x2000..0x3000) case is 44, so use looping for anything 0x2000 bytes
+ // or greater, to generate smaller code.
+ return 2 * eeGetPageSize();
+#else
+ return 3 * eeGetPageSize();
+#endif
+ }
+
+ inline bool generateCFIUnwindCodes()
+ {
+#if COR_JIT_EE_VERSION > 460 && defined(UNIX_AMD64_ABI)
+ return eeGetEEInfo()->targetAbi == CORINFO_CORERT_ABI;
+#else
+ return false;
+#endif
+ }
+
+ // Exceptions
+
+ unsigned eeGetEHcount(CORINFO_METHOD_HANDLE handle);
+
+ // Debugging support - Line number info
+
+ void eeGetStmtOffsets();
+
+ unsigned eeBoundariesCount;
+
+ struct boundariesDsc
+ {
+ UNATIVE_OFFSET nativeIP;
+ IL_OFFSET ilOffset;
+ unsigned sourceReason;
+ } * eeBoundaries; // Boundaries to report to EE
+ void eeSetLIcount(unsigned count);
+ void eeSetLIinfo(unsigned which, UNATIVE_OFFSET offs, unsigned srcIP, bool stkEmpty, bool callInstruction);
+ void eeSetLIdone();
+
+#ifdef DEBUG
+ static void eeDispILOffs(IL_OFFSET offs);
+ static void eeDispLineInfo(const boundariesDsc* line);
+ void eeDispLineInfos();
+#endif // DEBUG
+
+ // Debugging support - Local var info
+
+ void eeGetVars();
+
+ unsigned eeVarsCount;
+
+ struct VarResultInfo
+ {
+ UNATIVE_OFFSET startOffset;
+ UNATIVE_OFFSET endOffset;
+ DWORD varNumber;
+ siVarLoc loc;
+ } * eeVars;
+ void eeSetLVcount(unsigned count);
+ void eeSetLVinfo(unsigned which,
+ UNATIVE_OFFSET startOffs,
+ UNATIVE_OFFSET length,
+ unsigned varNum,
+ unsigned LVnum,
+ VarName namex,
+ bool avail,
+ const siVarLoc& loc);
+ void eeSetLVdone();
+
+#ifdef DEBUG
+ void eeDispVar(ICorDebugInfo::NativeVarInfo* var);
+ void eeDispVars(CORINFO_METHOD_HANDLE ftn, ULONG32 cVars, ICorDebugInfo::NativeVarInfo* vars);
+#endif // DEBUG
+
+ // ICorJitInfo wrappers
+
+ void eeReserveUnwindInfo(BOOL isFunclet, BOOL isColdCode, ULONG unwindSize);
+
+ void eeAllocUnwindInfo(BYTE* pHotCode,
+ BYTE* pColdCode,
+ ULONG startOffset,
+ ULONG endOffset,
+ ULONG unwindSize,
+ BYTE* pUnwindBlock,
+ CorJitFuncKind funcKind);
+
+ void eeSetEHcount(unsigned cEH);
+
+ void eeSetEHinfo(unsigned EHnumber, const CORINFO_EH_CLAUSE* clause);
+
+ WORD eeGetRelocTypeHint(void* target);
+
+ // ICorStaticInfo wrapper functions
+
+ bool eeTryResolveToken(CORINFO_RESOLVED_TOKEN* resolvedToken);
+
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+#ifdef DEBUG
+ static void dumpSystemVClassificationType(SystemVClassificationType ct);
+#endif // DEBUG
+
+ void eeGetSystemVAmd64PassStructInRegisterDescriptor(
+ /*IN*/ CORINFO_CLASS_HANDLE structHnd,
+ /*OUT*/ SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR* structPassInRegDescPtr);
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+ template <typename ParamType>
+ bool eeRunWithErrorTrap(void (*function)(ParamType*), ParamType* param)
+ {
+ return eeRunWithErrorTrapImp(reinterpret_cast<void (*)(void*)>(function), reinterpret_cast<void*>(param));
+ }
+
+ bool eeRunWithErrorTrapImp(void (*function)(void*), void* param);
+
+ // Utility functions
+
+ const char* eeGetFieldName(CORINFO_FIELD_HANDLE fieldHnd, const char** classNamePtr = nullptr);
+
+#if defined(DEBUG)
+ const wchar_t* eeGetCPString(size_t stringHandle);
+#endif
+
+ const char* eeGetClassName(CORINFO_CLASS_HANDLE clsHnd);
+
+ static CORINFO_METHOD_HANDLE eeFindHelper(unsigned helper);
+ static CorInfoHelpFunc eeGetHelperNum(CORINFO_METHOD_HANDLE method);
+
+ static fgWalkPreFn CountSharedStaticHelper;
+ static bool IsSharedStaticHelper(GenTreePtr tree);
+ static bool IsTreeAlwaysHoistable(GenTreePtr tree);
+
+ static CORINFO_FIELD_HANDLE eeFindJitDataOffs(unsigned jitDataOffs);
+ // returns true/false if 'field' is a Jit Data offset
+ static bool eeIsJitDataOffs(CORINFO_FIELD_HANDLE field);
+ // returns a number < 0 if 'field' is not a Jit Data offset, otherwise the data offset (limited to 2GB)
+ static int eeGetJitDataOffs(CORINFO_FIELD_HANDLE field);
+
+ /*****************************************************************************/
+
+public:
+ void tmpInit();
+
+ enum TEMP_USAGE_TYPE
+ {
+ TEMP_USAGE_FREE,
+ TEMP_USAGE_USED
+ };
+
+ static var_types tmpNormalizeType(var_types type);
+ TempDsc* tmpGetTemp(var_types type); // get temp for the given type
+ void tmpRlsTemp(TempDsc* temp);
+ TempDsc* tmpFindNum(int temp, TEMP_USAGE_TYPE usageType = TEMP_USAGE_FREE) const;
+
+ void tmpEnd();
+ TempDsc* tmpListBeg(TEMP_USAGE_TYPE usageType = TEMP_USAGE_FREE) const;
+ TempDsc* tmpListNxt(TempDsc* curTemp, TEMP_USAGE_TYPE usageType = TEMP_USAGE_FREE) const;
+ void tmpDone();
+
+#ifdef DEBUG
+ bool tmpAllFree() const;
+#endif // DEBUG
+
+#ifndef LEGACY_BACKEND
+ void tmpPreAllocateTemps(var_types type, unsigned count);
+#endif // !LEGACY_BACKEND
+
+protected:
+#ifdef LEGACY_BACKEND
+ unsigned tmpIntSpillMax; // number of int-sized spill temps
+ unsigned tmpDoubleSpillMax; // number of double-sized spill temps
+#endif // LEGACY_BACKEND
+
+ unsigned tmpCount; // Number of temps
+ unsigned tmpSize; // Size of all the temps
+#ifdef DEBUG
+public:
+ // Used by RegSet::rsSpillChk()
+ unsigned tmpGetCount; // Temps which haven't been released yet
+#endif
+private:
+ static unsigned tmpSlot(unsigned size); // which slot in tmpFree[] or tmpUsed[] to use
+
+ TempDsc* tmpFree[TEMP_MAX_SIZE / sizeof(int)];
+ TempDsc* tmpUsed[TEMP_MAX_SIZE / sizeof(int)];
+
+ /*
+ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+ XX XX
+ XX CodeGenerator XX
+ XX XX
+ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+ */
+
+public:
+ CodeGenInterface* codeGen;
+
+#ifdef DEBUGGING_SUPPORT
+
+ // The following holds information about instr offsets in terms of generated code.
+
+ struct IPmappingDsc
+ {
+ IPmappingDsc* ipmdNext; // next line# record
+ IL_OFFSETX ipmdILoffsx; // the instr offset
+ emitLocation ipmdNativeLoc; // the emitter location of the native code corresponding to the IL offset
+ bool ipmdIsLabel; // Can this code be a branch label?
+ };
+
+ // Record the instr offset mapping to the generated code
+
+ IPmappingDsc* genIPmappingList;
+ IPmappingDsc* genIPmappingLast;
+
+ // Managed RetVal - A side hash table meant to record the mapping from a
+ // GT_CALL node to its IL offset. This info is used to emit sequence points
+ // that can be used by debugger to determine the native offset at which the
+ // managed RetVal will be available.
+ //
+ // In fact we can store IL offset in a GT_CALL node. This was ruled out in
+ // favor of a side table for two reasons: 1) We need IL offset for only those
+ // GT_CALL nodes (created during importation) that correspond to an IL call and
+ // whose return type is other than TYP_VOID. 2) GT_CALL node is a frequently used
+ // structure and IL offset is needed only when generating debuggable code. Therefore
+ // it is desirable to avoid memory size penalty in retail scenarios.
+ typedef SimplerHashTable<GenTreePtr, PtrKeyFuncs<GenTree>, IL_OFFSETX, JitSimplerHashBehavior>
+ CallSiteILOffsetTable;
+ CallSiteILOffsetTable* genCallSite2ILOffsetMap;
+#endif // DEBUGGING_SUPPORT
+
+ unsigned genReturnLocal; // Local number for the return value when applicable.
+ BasicBlock* genReturnBB; // jumped to when not optimizing for speed.
+
+ // The following properties are part of CodeGenContext. Getters are provided here for
+ // convenience and backward compatibility, but the properties can only be set by invoking
+ // the setter on CodeGenContext directly.
+
+ __declspec(property(get = getEmitter)) emitter* genEmitter;
+ emitter* getEmitter()
+ {
+ return codeGen->getEmitter();
+ }
+
+ const bool isFramePointerUsed()
+ {
+ return codeGen->isFramePointerUsed();
+ }
+
+ __declspec(property(get = getInterruptible, put = setInterruptible)) bool genInterruptible;
+ bool getInterruptible()
+ {
+ return codeGen->genInterruptible;
+ }
+ void setInterruptible(bool value)
+ {
+ codeGen->setInterruptible(value);
+ }
+
+#if DOUBLE_ALIGN
+ const bool genDoubleAlign()
+ {
+ return codeGen->doDoubleAlign();
+ }
+ DWORD getCanDoubleAlign(); // Defined & used only by RegAlloc
+#endif // DOUBLE_ALIGN
+ __declspec(property(get = getFullPtrRegMap, put = setFullPtrRegMap)) bool genFullPtrRegMap;
+ bool getFullPtrRegMap()
+ {
+ return codeGen->genFullPtrRegMap;
+ }
+ void setFullPtrRegMap(bool value)
+ {
+ codeGen->setFullPtrRegMap(value);
+ }
+
+// Things that MAY belong either in CodeGen or CodeGenContext
+
+#if FEATURE_EH_FUNCLETS
+ FuncInfoDsc* compFuncInfos;
+ unsigned short compCurrFuncIdx;
+ unsigned short compFuncInfoCount;
+
+ unsigned short compFuncCount()
+ {
+ assert(fgFuncletsCreated);
+ return compFuncInfoCount;
+ }
+
+#else // !FEATURE_EH_FUNCLETS
+
+ // This is a no-op when there are no funclets!
+ void genUpdateCurrentFunclet(BasicBlock* block)
+ {
+ return;
+ }
+
+ FuncInfoDsc compFuncInfoRoot;
+
+ static const unsigned compCurrFuncIdx = 0;
+
+ unsigned short compFuncCount()
+ {
+ return 1;
+ }
+
+#endif // !FEATURE_EH_FUNCLETS
+
+ FuncInfoDsc* funCurrentFunc();
+ void funSetCurrentFunc(unsigned funcIdx);
+ FuncInfoDsc* funGetFunc(unsigned funcIdx);
+ unsigned int funGetFuncIdx(BasicBlock* block);
+
+ // LIVENESS
+
+ VARSET_TP compCurLife; // current live variables
+ GenTreePtr compCurLifeTree; // node after which compCurLife has been computed
+
+ template <bool ForCodeGen>
+ void compChangeLife(VARSET_VALARG_TP newLife DEBUGARG(GenTreePtr tree));
+
+ void genChangeLife(VARSET_VALARG_TP newLife DEBUGARG(GenTreePtr tree))
+ {
+ compChangeLife</*ForCodeGen*/ true>(newLife DEBUGARG(tree));
+ }
+
+ template <bool ForCodeGen>
+ void compUpdateLife(GenTreePtr tree);
+
+ // Updates "compCurLife" to its state after evaluate of "true". If "pLastUseVars" is
+ // non-null, sets "*pLastUseVars" to the set of tracked variables for which "tree" was a last
+ // use. (Can be more than one var in the case of dependently promoted struct vars.)
+ template <bool ForCodeGen>
+ void compUpdateLifeVar(GenTreePtr tree, VARSET_TP* pLastUseVars = nullptr);
+
+ template <bool ForCodeGen>
+ inline void compUpdateLife(VARSET_VALARG_TP newLife);
+
+ // Gets a register mask that represent the kill set for a helper call since
+ // not all JIT Helper calls follow the standard ABI on the target architecture.
+ regMaskTP compHelperCallKillSet(CorInfoHelpFunc helper);
+
+ // Gets a register mask that represent the kill set for a NoGC helper call.
+ regMaskTP compNoGCHelperCallKillSet(CorInfoHelpFunc helper);
+
+#ifdef _TARGET_ARM_
+ // Requires that "varDsc" be a promoted struct local variable being passed as an argument, beginning at
+ // "firstArgRegNum", which is assumed to have already been aligned to the register alignment restriction of the
+ // struct type. Adds bits to "*pArgSkippedRegMask" for any argument registers *not* used in passing "varDsc" --
+ // i.e., internal "holes" caused by internal alignment constraints. For example, if the struct contained an int and
+ // a double, and we at R0 (on ARM), then R1 would be skipped, and the bit for R1 would be added to the mask.
+ void fgAddSkippedRegsInPromotedStructArg(LclVarDsc* varDsc, unsigned firstArgRegNum, regMaskTP* pArgSkippedRegMask);
+#endif // _TARGET_ARM_
+
+ // If "tree" is a indirection (GT_IND, or GT_OBJ) whose arg is an ADDR, whose arg is a LCL_VAR, return that LCL_VAR
+ // node, else NULL.
+ static GenTreePtr fgIsIndirOfAddrOfLocal(GenTreePtr tree);
+
+ // This is indexed by GT_OBJ nodes that are address of promoted struct variables, which
+ // have been annotated with the GTF_VAR_DEATH flag. If such a node is *not* mapped in this
+ // table, one may assume that all the (tracked) field vars die at this point. Otherwise,
+ // the node maps to a pointer to a VARSET_TP, containing set bits for each of the tracked field
+ // vars of the promoted struct local that go dead at the given node (the set bits are the bits
+ // for the tracked var indices of the field vars, as in a live var set).
+ NodeToVarsetPtrMap* m_promotedStructDeathVars;
+
+ NodeToVarsetPtrMap* GetPromotedStructDeathVars()
+ {
+ if (m_promotedStructDeathVars == nullptr)
+ {
+ m_promotedStructDeathVars = new (getAllocator()) NodeToVarsetPtrMap(getAllocator());
+ }
+ return m_promotedStructDeathVars;
+ }
+
+/*
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX UnwindInfo XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#if !defined(__GNUC__)
+#pragma region Unwind information
+#endif
+
+public:
+ //
+ // Infrastructure functions: start/stop/reserve/emit.
+ //
+
+ void unwindBegProlog();
+ void unwindEndProlog();
+ void unwindBegEpilog();
+ void unwindEndEpilog();
+ void unwindReserve();
+ void unwindEmit(void* pHotCode, void* pColdCode);
+
+ //
+ // Specific unwind information functions: called by code generation to indicate a particular
+ // prolog or epilog unwindable instruction has been generated.
+ //
+
+ void unwindPush(regNumber reg);
+ void unwindAllocStack(unsigned size);
+ void unwindSetFrameReg(regNumber reg, unsigned offset);
+ void unwindSaveReg(regNumber reg, unsigned offset);
+
+#if defined(_TARGET_ARM_)
+ void unwindPushMaskInt(regMaskTP mask);
+ void unwindPushMaskFloat(regMaskTP mask);
+ void unwindPopMaskInt(regMaskTP mask);
+ void unwindPopMaskFloat(regMaskTP mask);
+ void unwindBranch16(); // The epilog terminates with a 16-bit branch (e.g., "bx lr")
+ void unwindNop(unsigned codeSizeInBytes); // Generate unwind NOP code. 'codeSizeInBytes' is 2 or 4 bytes. Only
+ // called via unwindPadding().
+ void unwindPadding(); // Generate a sequence of unwind NOP codes representing instructions between the last
+ // instruction and the current location.
+#endif // _TARGET_ARM_
+
+#if defined(_TARGET_ARM64_)
+ void unwindNop();
+ void unwindPadding(); // Generate a sequence of unwind NOP codes representing instructions between the last
+ // instruction and the current location.
+ void unwindSaveReg(regNumber reg, int offset); // str reg, [sp, #offset]
+ void unwindSaveRegPreindexed(regNumber reg, int offset); // str reg, [sp, #offset]!
+ void unwindSaveRegPair(regNumber reg1, regNumber reg2, int offset); // stp reg1, reg2, [sp, #offset]
+ void unwindSaveRegPairPreindexed(regNumber reg1, regNumber reg2, int offset); // stp reg1, reg2, [sp, #offset]!
+ void unwindSaveNext(); // unwind code: save_next
+ void unwindReturn(regNumber reg); // ret lr
+#endif // defined(_TARGET_ARM64_)
+
+ //
+ // Private "helper" functions for the unwind implementation.
+ //
+
+private:
+#if FEATURE_EH_FUNCLETS
+ void unwindGetFuncLocations(FuncInfoDsc* func,
+ bool getHotSectionData,
+ /* OUT */ emitLocation** ppStartLoc,
+ /* OUT */ emitLocation** ppEndLoc);
+#endif // FEATURE_EH_FUNCLETS
+
+ void unwindReserveFunc(FuncInfoDsc* func);
+ void unwindEmitFunc(FuncInfoDsc* func, void* pHotCode, void* pColdCode);
+
+#if defined(_TARGET_AMD64_)
+
+ void unwindReserveFuncHelper(FuncInfoDsc* func, bool isHotCode);
+ void unwindEmitFuncHelper(FuncInfoDsc* func, void* pHotCode, void* pColdCode, bool isHotCode);
+ UNATIVE_OFFSET unwindGetCurrentOffset(FuncInfoDsc* func);
+
+ void unwindBegPrologWindows();
+ void unwindPushWindows(regNumber reg);
+ void unwindAllocStackWindows(unsigned size);
+ void unwindSetFrameRegWindows(regNumber reg, unsigned offset);
+ void unwindSaveRegWindows(regNumber reg, unsigned offset);
+
+#ifdef UNIX_AMD64_ABI
+ void unwindBegPrologCFI();
+ void unwindPushCFI(regNumber reg);
+ void unwindAllocStackCFI(unsigned size);
+ void unwindSetFrameRegCFI(regNumber reg, unsigned offset);
+ void unwindSaveRegCFI(regNumber reg, unsigned offset);
+ int mapRegNumToDwarfReg(regNumber reg);
+ void createCfiCode(FuncInfoDsc* func, UCHAR codeOffset, UCHAR opcode, USHORT dwarfReg, INT offset = 0);
+#endif // UNIX_AMD64_ABI
+#elif defined(_TARGET_ARM_)
+
+ void unwindPushPopMaskInt(regMaskTP mask, bool useOpsize16);
+ void unwindPushPopMaskFloat(regMaskTP mask);
+ void unwindSplit(FuncInfoDsc* func);
+
+#endif // _TARGET_ARM_
+
+#if !defined(__GNUC__)
+#pragma endregion // Note: region is NOT under !defined(__GNUC__)
+#endif
+
+ /*
+ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+ XX XX
+ XX SIMD XX
+ XX XX
+ XX Info about SIMD types, methods and the SIMD assembly (i.e. the assembly XX
+ XX that contains the distinguished, well-known SIMD type definitions). XX
+ XX XX
+ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+ */
+
+ // Get highest available instruction set for floating point codegen
+ InstructionSet getFloatingPointInstructionSet()
+ {
+#if defined(_TARGET_XARCH_) && !defined(LEGACY_BACKEND)
+ if (canUseAVX())
+ {
+ return InstructionSet_AVX;
+ }
+
+ // min bar is SSE2
+ assert(canUseSSE2());
+ return InstructionSet_SSE2;
+#else
+ assert(!"getFPInstructionSet() is not implemented for target arch");
+ unreached();
+ return InstructionSet_NONE;
+#endif
+ }
+
+ // Get highest available instruction set for SIMD codegen
+ InstructionSet getSIMDInstructionSet()
+ {
+#if defined(_TARGET_XARCH_) && !defined(LEGACY_BACKEND)
+ return getFloatingPointInstructionSet();
+#else
+ assert(!"Available instruction set(s) for SIMD codegen is not defined for target arch");
+ unreached();
+ return InstructionSet_NONE;
+#endif
+ }
+
+#ifdef FEATURE_SIMD
+
+ // Should we support SIMD intrinsics?
+ bool featureSIMD;
+
+ // This is a temp lclVar allocated on the stack as TYP_SIMD. It is used to implement intrinsics
+ // that require indexed access to the individual fields of the vector, which is not well supported
+ // by the hardware. It is allocated when/if such situations are encountered during Lowering.
+ unsigned lvaSIMDInitTempVarNum;
+
+ // SIMD Types
+ CORINFO_CLASS_HANDLE SIMDFloatHandle;
+ CORINFO_CLASS_HANDLE SIMDDoubleHandle;
+ CORINFO_CLASS_HANDLE SIMDIntHandle;
+ CORINFO_CLASS_HANDLE SIMDUShortHandle;
+ CORINFO_CLASS_HANDLE SIMDUByteHandle;
+ CORINFO_CLASS_HANDLE SIMDShortHandle;
+ CORINFO_CLASS_HANDLE SIMDByteHandle;
+ CORINFO_CLASS_HANDLE SIMDLongHandle;
+ CORINFO_CLASS_HANDLE SIMDUIntHandle;
+ CORINFO_CLASS_HANDLE SIMDULongHandle;
+ CORINFO_CLASS_HANDLE SIMDVector2Handle;
+ CORINFO_CLASS_HANDLE SIMDVector3Handle;
+ CORINFO_CLASS_HANDLE SIMDVector4Handle;
+ CORINFO_CLASS_HANDLE SIMDVectorHandle;
+
+ // Get the handle for a SIMD type.
+ CORINFO_CLASS_HANDLE gtGetStructHandleForSIMD(var_types simdType, var_types simdBaseType)
+ {
+ if (simdBaseType == TYP_FLOAT)
+ {
+ switch (simdType)
+ {
+ case TYP_SIMD8:
+ return SIMDVector2Handle;
+ case TYP_SIMD12:
+ return SIMDVector3Handle;
+ case TYP_SIMD16:
+ if ((getSIMDVectorType() == TYP_SIMD32) || (SIMDVector4Handle != NO_CLASS_HANDLE))
+ {
+ return SIMDVector4Handle;
+ }
+ break;
+ case TYP_SIMD32:
+ break;
+ default:
+ unreached();
+ }
+ }
+ assert(simdType == getSIMDVectorType());
+ switch (simdBaseType)
+ {
+ case TYP_FLOAT:
+ return SIMDFloatHandle;
+ case TYP_DOUBLE:
+ return SIMDDoubleHandle;
+ case TYP_INT:
+ return SIMDIntHandle;
+ case TYP_CHAR:
+ return SIMDUShortHandle;
+ case TYP_USHORT:
+ return SIMDUShortHandle;
+ case TYP_UBYTE:
+ return SIMDUByteHandle;
+ case TYP_SHORT:
+ return SIMDShortHandle;
+ case TYP_BYTE:
+ return SIMDByteHandle;
+ case TYP_LONG:
+ return SIMDLongHandle;
+ case TYP_UINT:
+ return SIMDUIntHandle;
+ case TYP_ULONG:
+ return SIMDULongHandle;
+ default:
+ assert(!"Didn't find a class handle for simdType");
+ }
+ return NO_CLASS_HANDLE;
+ }
+
+ // SIMD Methods
+ CORINFO_METHOD_HANDLE SIMDVectorFloat_set_Item;
+ CORINFO_METHOD_HANDLE SIMDVectorFloat_get_Length;
+ CORINFO_METHOD_HANDLE SIMDVectorFloat_op_Addition;
+
+ // Returns true if the tree corresponds to a TYP_SIMD lcl var.
+ // Note that both SIMD vector args and locals are mared as lvSIMDType = true, but
+ // type of an arg node is TYP_BYREF and a local node is TYP_SIMD or TYP_STRUCT.
+ bool isSIMDTypeLocal(GenTree* tree)
+ {
+ return tree->OperIsLocal() && lvaTable[tree->AsLclVarCommon()->gtLclNum].lvSIMDType;
+ }
+
+ // Returns true if the type of the tree is a byref of TYP_SIMD
+ bool isAddrOfSIMDType(GenTree* tree)
+ {
+ if (tree->TypeGet() == TYP_BYREF || tree->TypeGet() == TYP_I_IMPL)
+ {
+ switch (tree->OperGet())
+ {
+ case GT_ADDR:
+ return varTypeIsSIMD(tree->gtGetOp1());
+
+ case GT_LCL_VAR_ADDR:
+ return lvaTable[tree->AsLclVarCommon()->gtLclNum].lvSIMDType;
+
+ default:
+ return isSIMDTypeLocal(tree);
+ }
+ }
+
+ return false;
+ }
+
+ static bool isRelOpSIMDIntrinsic(SIMDIntrinsicID intrinsicId)
+ {
+ return (intrinsicId == SIMDIntrinsicEqual || intrinsicId == SIMDIntrinsicLessThan ||
+ intrinsicId == SIMDIntrinsicLessThanOrEqual || intrinsicId == SIMDIntrinsicGreaterThan ||
+ intrinsicId == SIMDIntrinsicGreaterThanOrEqual);
+ }
+
+ // Returns base type of a TYP_SIMD local.
+ // Returns TYP_UNKNOWN if the local is not TYP_SIMD.
+ var_types getBaseTypeOfSIMDLocal(GenTree* tree)
+ {
+ if (isSIMDTypeLocal(tree))
+ {
+ return lvaTable[tree->AsLclVarCommon()->gtLclNum].lvBaseType;
+ }
+
+ return TYP_UNKNOWN;
+ }
+
+ bool isSIMDClass(CORINFO_CLASS_HANDLE clsHnd)
+ {
+ return info.compCompHnd->isInSIMDModule(clsHnd);
+ }
+
+ bool isSIMDClass(typeInfo* pTypeInfo)
+ {
+ return pTypeInfo->IsStruct() && isSIMDClass(pTypeInfo->GetClassHandleForValueClass());
+ }
+
+ // Get the base (element) type and size in bytes for a SIMD type. Returns TYP_UNKNOWN
+ // if it is not a SIMD type or is an unsupported base type.
+ var_types getBaseTypeAndSizeOfSIMDType(CORINFO_CLASS_HANDLE typeHnd, unsigned* sizeBytes = nullptr);
+
+ var_types getBaseTypeOfSIMDType(CORINFO_CLASS_HANDLE typeHnd)
+ {
+ return getBaseTypeAndSizeOfSIMDType(typeHnd, nullptr);
+ }
+
+ // Get SIMD Intrinsic info given the method handle.
+ // Also sets typeHnd, argCount, baseType and sizeBytes out params.
+ const SIMDIntrinsicInfo* getSIMDIntrinsicInfo(CORINFO_CLASS_HANDLE* typeHnd,
+ CORINFO_METHOD_HANDLE methodHnd,
+ CORINFO_SIG_INFO* sig,
+ bool isNewObj,
+ unsigned* argCount,
+ var_types* baseType,
+ unsigned* sizeBytes);
+
+ // Pops and returns GenTree node from importers type stack.
+ // Normalizes TYP_STRUCT value in case of GT_CALL, GT_RET_EXPR and arg nodes.
+ GenTreePtr impSIMDPopStack(var_types type, bool expectAddr = false);
+
+ // Create a GT_SIMD tree for a Get property of SIMD vector with a fixed index.
+ GenTreeSIMD* impSIMDGetFixed(var_types simdType, var_types baseType, unsigned simdSize, int index);
+
+ // Creates a GT_SIMD tree for Select operation
+ GenTreePtr impSIMDSelect(CORINFO_CLASS_HANDLE typeHnd,
+ var_types baseType,
+ unsigned simdVectorSize,
+ GenTree* op1,
+ GenTree* op2,
+ GenTree* op3);
+
+ // Creates a GT_SIMD tree for Min/Max operation
+ GenTreePtr impSIMDMinMax(SIMDIntrinsicID intrinsicId,
+ CORINFO_CLASS_HANDLE typeHnd,
+ var_types baseType,
+ unsigned simdVectorSize,
+ GenTree* op1,
+ GenTree* op2);
+
+ // Transforms operands and returns the SIMD intrinsic to be applied on
+ // transformed operands to obtain given relop result.
+ SIMDIntrinsicID impSIMDRelOp(SIMDIntrinsicID relOpIntrinsicId,
+ CORINFO_CLASS_HANDLE typeHnd,
+ unsigned simdVectorSize,
+ var_types* baseType,
+ GenTree** op1,
+ GenTree** op2);
+
+#if defined(_TARGET_XARCH_) && !defined(LEGACY_BACKEND)
+ // Transforms operands and returns the SIMD intrinsic to be applied on
+ // transformed operands to obtain == comparison result.
+ SIMDIntrinsicID impSIMDLongRelOpEqual(CORINFO_CLASS_HANDLE typeHnd,
+ unsigned simdVectorSize,
+ GenTree** op1,
+ GenTree** op2);
+
+ // Transforms operands and returns the SIMD intrinsic to be applied on
+ // transformed operands to obtain > comparison result.
+ SIMDIntrinsicID impSIMDLongRelOpGreaterThan(CORINFO_CLASS_HANDLE typeHnd,
+ unsigned simdVectorSize,
+ GenTree** op1,
+ GenTree** op2);
+
+ // Transforms operands and returns the SIMD intrinsic to be applied on
+ // transformed operands to obtain >= comparison result.
+ SIMDIntrinsicID impSIMDLongRelOpGreaterThanOrEqual(CORINFO_CLASS_HANDLE typeHnd,
+ unsigned simdVectorSize,
+ GenTree** op1,
+ GenTree** op2);
+
+ // Transforms operands and returns the SIMD intrinsic to be applied on
+ // transformed operands to obtain >= comparison result in case of int32
+ // and small int base type vectors.
+ SIMDIntrinsicID impSIMDIntegralRelOpGreaterThanOrEqual(
+ CORINFO_CLASS_HANDLE typeHnd, unsigned simdVectorSize, var_types baseType, GenTree** op1, GenTree** op2);
+#endif // defined(_TARGET_AMD64_) && !defined(LEGACY_BACKEND)
+
+ void setLclRelatedToSIMDIntrinsic(GenTreePtr tree);
+ bool areFieldsContiguous(GenTreePtr op1, GenTreePtr op2);
+ bool areArrayElementsContiguous(GenTreePtr op1, GenTreePtr op2);
+ bool areArgumentsContiguous(GenTreePtr op1, GenTreePtr op2);
+ GenTreePtr createAddressNodeForSIMDInit(GenTreePtr tree, unsigned simdSize);
+
+ // check methodHnd to see if it is a SIMD method that is expanded as an intrinsic in the JIT.
+ GenTreePtr impSIMDIntrinsic(OPCODE opcode,
+ GenTreePtr newobjThis,
+ CORINFO_CLASS_HANDLE clsHnd,
+ CORINFO_METHOD_HANDLE method,
+ CORINFO_SIG_INFO* sig,
+ int memberRef);
+
+ GenTreePtr getOp1ForConstructor(OPCODE opcode, GenTreePtr newobjThis, CORINFO_CLASS_HANDLE clsHnd);
+
+ // Whether SIMD vector occupies part of SIMD register.
+ // SSE2: vector2f/3f are considered sub register SIMD types.
+ // AVX: vector2f, 3f and 4f are all considered sub register SIMD types.
+ bool isSubRegisterSIMDType(CORINFO_CLASS_HANDLE typeHnd)
+ {
+ unsigned sizeBytes = 0;
+ var_types baseType = getBaseTypeAndSizeOfSIMDType(typeHnd, &sizeBytes);
+ return (baseType == TYP_FLOAT) && (sizeBytes < getSIMDVectorRegisterByteLength());
+ }
+
+ bool isSubRegisterSIMDType(GenTreeSIMD* simdNode)
+ {
+ return (simdNode->gtSIMDSize < getSIMDVectorRegisterByteLength());
+ }
+
+ // Get the type for the hardware SIMD vector.
+ // This is the maximum SIMD type supported for this target.
+ var_types getSIMDVectorType()
+ {
+#if defined(_TARGET_XARCH_) && !defined(LEGACY_BACKEND)
+ if (canUseAVX())
+ {
+ return TYP_SIMD32;
+ }
+ else
+ {
+ assert(canUseSSE2());
+ return TYP_SIMD16;
+ }
+#else
+ assert(!"getSIMDVectorType() unimplemented on target arch");
+ unreached();
+#endif
+ }
+
+ // Get the size of the SIMD type in bytes
+ int getSIMDTypeSizeInBytes(CORINFO_CLASS_HANDLE typeHnd)
+ {
+ unsigned sizeBytes = 0;
+ (void)getBaseTypeAndSizeOfSIMDType(typeHnd, &sizeBytes);
+ return sizeBytes;
+ }
+
+ // Get the the number of elements of basetype of SIMD vector given by its size and baseType
+ static int getSIMDVectorLength(unsigned simdSize, var_types baseType);
+
+ // Get the the number of elements of basetype of SIMD vector given by its type handle
+ int getSIMDVectorLength(CORINFO_CLASS_HANDLE typeHnd);
+
+ // Get preferred alignment of SIMD type.
+ int getSIMDTypeAlignment(var_types simdType);
+
+ // Get the number of bytes in a SIMD Vector for the current compilation.
+ unsigned getSIMDVectorRegisterByteLength()
+ {
+#if defined(_TARGET_XARCH_) && !defined(LEGACY_BACKEND)
+ if (canUseAVX())
+ {
+ return YMM_REGSIZE_BYTES;
+ }
+ else
+ {
+ assert(canUseSSE2());
+ return XMM_REGSIZE_BYTES;
+ }
+#else
+ assert(!"getSIMDVectorRegisterByteLength() unimplemented on target arch");
+ unreached();
+#endif
+ }
+
+ // The minimum and maximum possible number of bytes in a SIMD vector.
+ unsigned int maxSIMDStructBytes()
+ {
+ return getSIMDVectorRegisterByteLength();
+ }
+ unsigned int minSIMDStructBytes()
+ {
+ return emitTypeSize(TYP_SIMD8);
+ }
+
+#ifdef FEATURE_AVX_SUPPORT
+ // (maxPossibleSIMDStructBytes is for use in a context that requires a compile-time constant.)
+ static const unsigned maxPossibleSIMDStructBytes = 32;
+#else // !FEATURE_AVX_SUPPORT
+ static const unsigned maxPossibleSIMDStructBytes = 16;
+#endif // !FEATURE_AVX_SUPPORT
+
+ // Returns the codegen type for a given SIMD size.
+ var_types getSIMDTypeForSize(unsigned size)
+ {
+ var_types simdType = TYP_UNDEF;
+ if (size == 8)
+ {
+ simdType = TYP_SIMD8;
+ }
+ else if (size == 12)
+ {
+ simdType = TYP_SIMD12;
+ }
+ else if (size == 16)
+ {
+ simdType = TYP_SIMD16;
+ }
+#ifdef FEATURE_AVX_SUPPORT
+ else if (size == 32)
+ {
+ simdType = TYP_SIMD32;
+ }
+#endif // FEATURE_AVX_SUPPORT
+ else
+ {
+ noway_assert(!"Unexpected size for SIMD type");
+ }
+ return simdType;
+ }
+
+ unsigned getSIMDInitTempVarNum()
+ {
+ if (lvaSIMDInitTempVarNum == BAD_VAR_NUM)
+ {
+ lvaSIMDInitTempVarNum = lvaGrabTempWithImplicitUse(false DEBUGARG("SIMDInitTempVar"));
+ lvaTable[lvaSIMDInitTempVarNum].lvType = getSIMDVectorType();
+ }
+ return lvaSIMDInitTempVarNum;
+ }
+
+#endif // FEATURE_SIMD
+
+public:
+ //------------------------------------------------------------------------
+ // largestEnregisterableStruct: The size in bytes of the largest struct that can be enregistered.
+ //
+ // Notes: It is not guaranteed that the struct of this size or smaller WILL be a
+ // candidate for enregistration.
+
+ unsigned largestEnregisterableStructSize()
+ {
+#ifdef FEATURE_SIMD
+ unsigned vectorRegSize = getSIMDVectorRegisterByteLength();
+ if (vectorRegSize > TARGET_POINTER_SIZE)
+ {
+ return vectorRegSize;
+ }
+ else
+#endif // FEATURE_SIMD
+ {
+ return TARGET_POINTER_SIZE;
+ }
+ }
+
+private:
+ // These routines need not be enclosed under FEATURE_SIMD since lvIsSIMDType()
+ // is defined for both FEATURE_SIMD and !FEATURE_SIMD apropriately. The use
+ // of this routines also avoids the need of #ifdef FEATURE_SIMD specific code.
+
+ // Is this var is of type simd struct?
+ bool lclVarIsSIMDType(unsigned varNum)
+ {
+ LclVarDsc* varDsc = lvaTable + varNum;
+ return varDsc->lvIsSIMDType();
+ }
+
+ // Is this Local node a SIMD local?
+ bool lclVarIsSIMDType(GenTreeLclVarCommon* lclVarTree)
+ {
+ return lclVarIsSIMDType(lclVarTree->gtLclNum);
+ }
+
+ // Returns true if the TYP_SIMD locals on stack are aligned at their
+ // preferred byte boundary specified by getSIMDTypeAlignment().
+ bool isSIMDTypeLocalAligned(unsigned varNum)
+ {
+#if defined(FEATURE_SIMD) && ALIGN_SIMD_TYPES
+ if (lclVarIsSIMDType(varNum) && lvaTable[varNum].lvType != TYP_BYREF)
+ {
+ bool ebpBased;
+ int off = lvaFrameAddress(varNum, &ebpBased);
+ // TODO-Cleanup: Can't this use the lvExactSize on the varDsc?
+ int alignment = getSIMDTypeAlignment(lvaTable[varNum].lvType);
+ bool isAligned = ((off % alignment) == 0);
+ noway_assert(isAligned || lvaTable[varNum].lvIsParam);
+ return isAligned;
+ }
+#endif // FEATURE_SIMD
+
+ return false;
+ }
+
+ // Whether SSE2 is available
+ bool canUseSSE2() const
+ {
+#ifdef _TARGET_XARCH_
+ return opts.compCanUseSSE2;
+#else
+ return false;
+#endif
+ }
+
+ bool canUseAVX() const
+ {
+#ifdef FEATURE_AVX_SUPPORT
+ return opts.compCanUseAVX;
+#else
+ return false;
+#endif
+ }
+
+ /*
+ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+ XX XX
+ XX Compiler XX
+ XX XX
+ XX Generic info about the compilation and the method being compiled. XX
+ XX It is responsible for driving the other phases. XX
+ XX It is also responsible for all the memory management. XX
+ XX XX
+ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+ */
+
+public:
+ Compiler* InlineeCompiler; // The Compiler instance for the inlinee
+
+ InlineResult* compInlineResult; // The result of importing the inlinee method.
+
+ bool compDoAggressiveInlining; // If true, mark every method as CORINFO_FLG_FORCEINLINE
+ bool compJmpOpUsed; // Does the method do a JMP
+ bool compLongUsed; // Does the method use TYP_LONG
+ bool compFloatingPointUsed; // Does the method use TYP_FLOAT or TYP_DOUBLE
+ bool compTailCallUsed; // Does the method do a tailcall
+ bool compLocallocUsed; // Does the method use localloc.
+ bool compQmarkUsed; // Does the method use GT_QMARK/GT_COLON
+ bool compQmarkRationalized; // Is it allowed to use a GT_QMARK/GT_COLON node.
+ bool compUnsafeCastUsed; // Does the method use LDIND/STIND to cast between scalar/refernce types
+
+ // NOTE: These values are only reliable after
+ // the importing is completely finished.
+
+ ExpandArrayStack<GenTreePtr>* compQMarks; // The set of QMark nodes created in the current compilation, so
+ // we can iterate over these efficiently.
+
+#if CPU_USES_BLOCK_MOVE
+ bool compBlkOpUsed; // Does the method do a COPYBLK or INITBLK
+#endif
+
+#ifdef DEBUG
+ // State information - which phases have completed?
+ // These are kept together for easy discoverability
+
+ bool bRangeAllowStress;
+ bool compCodeGenDone;
+ int64_t compNumStatementLinksTraversed; // # of links traversed while doing debug checks
+ bool fgNormalizeEHDone; // Has the flowgraph EH normalization phase been done?
+ size_t compSizeEstimate; // The estimated size of the method as per `gtSetEvalOrder`.
+ size_t compCycleEstimate; // The estimated cycle count of the method as per `gtSetEvalOrder`
+#endif // DEBUG
+
+ bool fgLocalVarLivenessDone; // Note that this one is used outside of debug.
+ bool fgLocalVarLivenessChanged;
+#if STACK_PROBES
+ bool compStackProbePrologDone;
+#endif
+#ifndef LEGACY_BACKEND
+ bool compLSRADone;
+#endif // !LEGACY_BACKEND
+ bool compRationalIRForm;
+
+ bool compUsesThrowHelper; // There is a call to a THOROW_HELPER for the compiled method.
+
+ bool compGeneratingProlog;
+ bool compGeneratingEpilog;
+ bool compNeedsGSSecurityCookie; // There is an unsafe buffer (or localloc) on the stack.
+ // Insert cookie on frame and code to check the cookie, like VC++ -GS.
+ bool compGSReorderStackLayout; // There is an unsafe buffer on the stack, reorder locals and make local
+ // copies of susceptible parameters to avoid buffer overrun attacks through locals/params
+ bool getNeedsGSSecurityCookie() const
+ {
+ return compNeedsGSSecurityCookie;
+ }
+ void setNeedsGSSecurityCookie()
+ {
+ compNeedsGSSecurityCookie = true;
+ }
+
+ FrameLayoutState lvaDoneFrameLayout; // The highest frame layout state that we've completed. During
+ // frame layout calculations, this is the level we are currently
+ // computing.
+
+ //---------------------------- JITing options -----------------------------
+
+ enum codeOptimize
+ {
+ BLENDED_CODE,
+ SMALL_CODE,
+ FAST_CODE,
+
+ COUNT_OPT_CODE
+ };
+
+ struct Options
+ {
+ CORJIT_FLAGS* jitFlags; // all flags passed from the EE
+ unsigned eeFlags; // CorJitFlag flags passed from the EE
+ unsigned compFlags; // method attributes
+
+ codeOptimize compCodeOpt; // what type of code optimizations
+
+ bool compUseFCOMI;
+ bool compUseCMOV;
+#ifdef _TARGET_XARCH_
+ bool compCanUseSSE2; // Allow CodeGen to use "movq XMM" instructions
+
+#ifdef FEATURE_AVX_SUPPORT
+ bool compCanUseAVX; // Allow CodeGen to use AVX 256-bit vectors for SIMD operations
+#endif
+#endif
+
+// optimize maximally and/or favor speed over size?
+
+#define DEFAULT_MIN_OPTS_CODE_SIZE 60000
+#define DEFAULT_MIN_OPTS_INSTR_COUNT 20000
+#define DEFAULT_MIN_OPTS_BB_COUNT 2000
+#define DEFAULT_MIN_OPTS_LV_NUM_COUNT 2000
+#define DEFAULT_MIN_OPTS_LV_REF_COUNT 8000
+
+// Maximun number of locals before turning off the inlining
+#define MAX_LV_NUM_COUNT_FOR_INLINING 512
+
+ bool compMinOpts;
+ unsigned instrCount;
+ unsigned lvRefCount;
+ bool compMinOptsIsSet;
+#ifdef DEBUG
+ bool compMinOptsIsUsed;
+
+ inline bool MinOpts()
+ {
+ assert(compMinOptsIsSet);
+ compMinOptsIsUsed = true;
+ return compMinOpts;
+ }
+ inline bool IsMinOptsSet()
+ {
+ return compMinOptsIsSet;
+ }
+#else // !DEBUG
+ inline bool MinOpts()
+ {
+ return compMinOpts;
+ }
+ inline bool IsMinOptsSet()
+ {
+ return compMinOptsIsSet;
+ }
+#endif // !DEBUG
+ inline void SetMinOpts(bool val)
+ {
+ assert(!compMinOptsIsUsed);
+ assert(!compMinOptsIsSet || (compMinOpts == val));
+ compMinOpts = val;
+ compMinOptsIsSet = true;
+ }
+
+ // true if the CLFLG_* for an optimization is set.
+ inline bool OptEnabled(unsigned optFlag)
+ {
+ return !!(compFlags & optFlag);
+ }
+
+#ifdef FEATURE_READYTORUN_COMPILER
+ inline bool IsReadyToRun()
+ {
+ return (eeFlags & CORJIT_FLG_READYTORUN) != 0;
+ }
+#else
+ inline bool IsReadyToRun()
+ {
+ return false;
+ }
+#endif
+
+ // true if we should use the PINVOKE_{BEGIN,END} helpers instead of generating
+ // PInvoke transitions inline (e.g. when targeting CoreRT).
+ inline bool ShouldUsePInvokeHelpers()
+ {
+#if COR_JIT_EE_VERSION > 460
+ return (jitFlags->corJitFlags2 & CORJIT_FLG2_USE_PINVOKE_HELPERS) != 0;
+#else
+ return false;
+#endif
+ }
+
+ // true if we should use insert the REVERSE_PINVOKE_{ENTER,EXIT} helpers in the method
+ // prolog/epilog
+ inline bool IsReversePInvoke()
+ {
+#if COR_JIT_EE_VERSION > 460
+ return (jitFlags->corJitFlags2 & CORJIT_FLG2_REVERSE_PINVOKE) != 0;
+#else
+ return false;
+#endif
+ }
+
+ // true if we must generate code compatible with JIT32 quirks
+ inline bool IsJit32Compat()
+ {
+#if defined(_TARGET_X86_) && COR_JIT_EE_VERSION > 460
+ return (jitFlags->corJitFlags2 & CORJIT_FLG2_DESKTOP_QUIRKS) != 0;
+#else
+ return false;
+#endif
+ }
+
+ // true if we must generate code compatible with Jit64 quirks
+ inline bool IsJit64Compat()
+ {
+#if defined(_TARGET_AMD64_) && COR_JIT_EE_VERSION > 460
+ return (jitFlags->corJitFlags2 & CORJIT_FLG2_DESKTOP_QUIRKS) != 0;
+#elif defined(_TARGET_AMD64_) && !defined(FEATURE_CORECLR)
+ return true;
+#else
+ return false;
+#endif
+ }
+
+#ifdef DEBUGGING_SUPPORT
+ bool compScopeInfo; // Generate the LocalVar info ?
+ bool compDbgCode; // Generate debugger-friendly code?
+ bool compDbgInfo; // Gather debugging info?
+ bool compDbgEnC;
+#else
+ static const bool compDbgCode;
+#endif
+
+#ifdef PROFILING_SUPPORTED
+ bool compNoPInvokeInlineCB;
+#else
+ static const bool compNoPInvokeInlineCB;
+#endif
+
+ bool compMustInlinePInvokeCalli; // Unmanaged CALLI in IL stubs must be inlined
+
+#ifdef DEBUG
+ bool compGcChecks; // Check arguments and return values to ensure they are sane
+ bool compStackCheckOnRet; // Check ESP on return to ensure it is correct
+ bool compStackCheckOnCall; // Check ESP after every call to ensure it is correct
+
+#endif
+
+ bool compNeedSecurityCheck; // This flag really means where or not a security object needs
+ // to be allocated on the stack.
+ // It will be set to true in the following cases:
+ // 1. When the method being compiled has a declarative security
+ // (i.e. when CORINFO_FLG_NOSECURITYWRAP is reset for the current method).
+ // This is also the case when we inject a prolog and epilog in the method.
+ // (or)
+ // 2. When the method being compiled has imperative security (i.e. the method
+ // calls into another method that has CORINFO_FLG_SECURITYCHECK flag set).
+ // (or)
+ // 3. When opts.compDbgEnC is true. (See also Compiler::compCompile).
+ //
+// When this flag is set, jit will allocate a gc-reference local variable (lvaSecurityObject),
+// which gets reported as a GC root to stackwalker.
+// (See also ICodeManager::GetAddrOfSecurityObject.)
+
+#if RELOC_SUPPORT
+ bool compReloc;
+#endif
+
+#ifdef DEBUG
+#if defined(_TARGET_XARCH_) && !defined(LEGACY_BACKEND)
+ bool compEnablePCRelAddr; // Whether absolute addr be encoded as PC-rel offset by RyuJIT where possible
+#endif
+#endif // DEBUG
+
+#ifdef UNIX_AMD64_ABI
+ // This flag is indicating if there is a need to align the frame.
+ // On AMD64-Windows, if there are calls, 4 slots for the outgoing ars are allocated, except for
+ // FastTailCall. This slots makes the frame size non-zero, so alignment logic will be called.
+ // On AMD64-Unix, there are no such slots. There is a possibility to have calls in the method with frame size of
+ // 0. The frame alignment logic won't kick in. This flags takes care of the AMD64-Unix case by remembering that
+ // there are calls and making sure the frame alignment logic is executed.
+ bool compNeedToAlignFrame;
+#endif // UNIX_AMD64_ABI
+
+ bool compProcedureSplitting; // Separate cold code from hot code
+
+ bool genFPorder; // Preserve FP order (operations are non-commutative)
+ bool genFPopt; // Can we do frame-pointer-omission optimization?
+ bool altJit; // True if we are an altjit and are compiling this method
+
+#ifdef DEBUG
+ bool compProcedureSplittingEH; // Separate cold code from hot code for functions with EH
+ bool dspCode; // Display native code generated
+ bool dspEHTable; // Display the EH table reported to the VM
+ bool dspInstrs; // Display the IL instructions intermixed with the native code output
+ bool dspEmit; // Display emitter output
+ bool dspLines; // Display source-code lines intermixed with native code output
+ bool dmpHex; // Display raw bytes in hex of native code output
+ bool varNames; // Display variables names in native code output
+ bool disAsm; // Display native code as it is generated
+ bool disAsmSpilled; // Display native code when any register spilling occurs
+ bool disDiffable; // Makes the Disassembly code 'diff-able'
+ bool disAsm2; // Display native code after it is generated using external disassembler
+ bool dspOrder; // Display names of each of the methods that we ngen/jit
+ bool dspUnwind; // Display the unwind info output
+ bool dspDiffable; // Makes the Jit Dump 'diff-able' (currently uses same COMPlus_* flag as disDiffable)
+ bool compLongAddress; // Force using large pseudo instructions for long address
+ // (IF_LARGEJMP/IF_LARGEADR/IF_LARGLDC)
+ bool dspGCtbls; // Display the GC tables
+#endif
+
+#ifdef LATE_DISASM
+ bool doLateDisasm; // Run the late disassembler
+#endif // LATE_DISASM
+
+#if DUMP_GC_TABLES && !defined(DEBUG) && defined(JIT32_GCENCODER)
+// Only the JIT32_GCENCODER implements GC dumping in non-DEBUG code.
+#pragma message("NOTE: this non-debug build has GC ptr table dumping always enabled!")
+ static const bool dspGCtbls = true;
+#endif
+
+ // We need stack probes to guarantee that we won't trigger a stack overflow
+ // when calling unmanaged code until they get a chance to set up a frame, because
+ // the EE will have no idea where it is.
+ //
+ // We will only be doing this currently for hosted environments. Unfortunately
+ // we need to take care of stubs, so potentially, we will have to do the probes
+ // for any call. We have a plan for not needing for stubs though
+ bool compNeedStackProbes;
+
+ // Whether to emit Enter/Leave/TailCall hooks using a dummy stub (DummyProfilerELTStub())
+ // This options helps one to make JIT behave as if it is under profiler.
+ bool compJitELTHookEnabled;
+
+#if FEATURE_TAILCALL_OPT
+ // Whether opportunistic or implicit tail call optimization is enabled.
+ bool compTailCallOpt;
+ // Whether optimization of transforming a recursive tail call into a loop is enabled.
+ bool compTailCallLoopOpt;
+#endif
+
+#ifdef ARM_SOFTFP
+ static const bool compUseSoftFP = true;
+#else // !ARM_SOFTFP
+ static const bool compUseSoftFP = false;
+#endif
+
+ GCPollType compGCPollType;
+ } opts;
+
+#ifdef ALT_JIT
+ static bool s_pAltJitExcludeAssembliesListInitialized;
+ static AssemblyNamesList2* s_pAltJitExcludeAssembliesList;
+#endif // ALT_JIT
+
+#ifdef DEBUG
+
+ static bool s_dspMemStats; // Display per-phase memory statistics for every function
+
+ template <typename T>
+ T dspPtr(T p)
+ {
+ return (p == ZERO) ? ZERO : (opts.dspDiffable ? T(0xD1FFAB1E) : p);
+ }
+
+ template <typename T>
+ T dspOffset(T o)
+ {
+ return (o == ZERO) ? ZERO : (opts.dspDiffable ? T(0xD1FFAB1E) : o);
+ }
+
+ static int dspTreeID(GenTree* tree)
+ {
+ return tree->gtTreeID;
+ }
+ static void printTreeID(GenTree* tree)
+ {
+ if (tree == nullptr)
+ {
+ printf("[------]");
+ }
+ else
+ {
+ printf("[%06d]", dspTreeID(tree));
+ }
+ }
+
+#endif // DEBUG
+
+// clang-format off
+#define STRESS_MODES \
+ \
+ STRESS_MODE(NONE) \
+ \
+ /* "Variations" stress areas which we try to mix up with each other. */ \
+ /* These should not be exhaustively used as they might */ \
+ /* hide/trivialize other areas */ \
+ \
+ STRESS_MODE(REGS) STRESS_MODE(DBL_ALN) STRESS_MODE(LCL_FLDS) STRESS_MODE(UNROLL_LOOPS) \
+ STRESS_MODE(MAKE_CSE) STRESS_MODE(LEGACY_INLINE) STRESS_MODE(CLONE_EXPR) \
+ STRESS_MODE(USE_FCOMI) STRESS_MODE(USE_CMOV) STRESS_MODE(FOLD) \
+ STRESS_MODE(BB_PROFILE) STRESS_MODE(OPT_BOOLS_GC) STRESS_MODE(REMORPH_TREES) \
+ STRESS_MODE(64RSLT_MUL) STRESS_MODE(DO_WHILE_LOOPS) STRESS_MODE(MIN_OPTS) \
+ STRESS_MODE(REVERSE_FLAG) /* Will set GTF_REVERSE_OPS whenever we can */ \
+ STRESS_MODE(REVERSE_COMMA) /* Will reverse commas created with gtNewCommaNode */ \
+ STRESS_MODE(TAILCALL) /* Will make the call as a tailcall whenever legal */ \
+ STRESS_MODE(CATCH_ARG) /* Will spill catch arg */ \
+ STRESS_MODE(UNSAFE_BUFFER_CHECKS) \
+ STRESS_MODE(NULL_OBJECT_CHECK) \
+ STRESS_MODE(PINVOKE_RESTORE_ESP) \
+ STRESS_MODE(RANDOM_INLINE) \
+ \
+ STRESS_MODE(GENERIC_VARN) STRESS_MODE(COUNT_VARN) \
+ \
+ /* "Check" stress areas that can be exhaustively used if we */ \
+ /* dont care about performance at all */ \
+ \
+ STRESS_MODE(FORCE_INLINE) /* Treat every method as AggressiveInlining */ \
+ STRESS_MODE(CHK_FLOW_UPDATE) \
+ STRESS_MODE(EMITTER) STRESS_MODE(CHK_REIMPORT) STRESS_MODE(FLATFP) \
+ \
+ STRESS_MODE(GENERIC_CHECK) STRESS_MODE(COUNT) \
+
+ enum compStressArea
+ {
+#define STRESS_MODE(mode) STRESS_##mode,
+ STRESS_MODES
+#undef STRESS_MODE
+ };
+// clang-format on
+
+#ifdef DEBUG
+ static const LPCWSTR s_compStressModeNames[STRESS_COUNT + 1];
+ BYTE compActiveStressModes[STRESS_COUNT];
+#endif // DEBUG
+
+#define MAX_STRESS_WEIGHT 100
+
+ bool compStressCompile(compStressArea stressArea, unsigned weightPercentage);
+
+#ifdef DEBUG
+
+ bool compInlineStress()
+ {
+ return compStressCompile(STRESS_LEGACY_INLINE, 50);
+ }
+
+ bool compRandomInlineStress()
+ {
+ return compStressCompile(STRESS_RANDOM_INLINE, 50);
+ }
+
+#endif // DEBUG
+
+ bool compTailCallStress()
+ {
+#ifdef DEBUG
+ return (JitConfig.TailcallStress() != 0 || compStressCompile(STRESS_TAILCALL, 5));
+#else
+ return false;
+#endif
+ }
+
+ codeOptimize compCodeOpt()
+ {
+#if 0
+ // Switching between size & speed has measurable throughput impact
+ // (3.5% on NGen mscorlib when measured). It used to be enabled for
+ // DEBUG, but should generate identical code between CHK & RET builds,
+ // so that's not acceptable.
+ // TODO-Throughput: Figure out what to do about size vs. speed & throughput.
+ // Investigate the cause of the throughput regression.
+
+ return opts.compCodeOpt;
+#else
+ return BLENDED_CODE;
+#endif
+ }
+
+#ifdef DEBUG
+ CLRRandom* inlRNG;
+#endif
+
+ //--------------------- Info about the procedure --------------------------
+
+ struct Info
+ {
+ COMP_HANDLE compCompHnd;
+ CORINFO_MODULE_HANDLE compScopeHnd;
+ CORINFO_CLASS_HANDLE compClassHnd;
+ CORINFO_METHOD_HANDLE compMethodHnd;
+ CORINFO_METHOD_INFO* compMethodInfo;
+
+ BOOL hasCircularClassConstraints;
+ BOOL hasCircularMethodConstraints;
+
+#if defined(DEBUG) || defined(LATE_DISASM)
+ const char* compMethodName;
+ const char* compClassName;
+ const char* compFullName;
+#endif // defined(DEBUG) || defined(LATE_DISASM)
+
+#if defined(DEBUG) || defined(INLINE_DATA)
+ // Method hash is logcally const, but computed
+ // on first demand.
+ mutable unsigned compMethodHashPrivate;
+ unsigned compMethodHash() const;
+#endif // defined(DEBUG) || defined(INLINE_DATA)
+
+#ifdef PSEUDORANDOM_NOP_INSERTION
+ // things for pseudorandom nop insertion
+ unsigned compChecksum;
+ CLRRandom compRNG;
+#endif
+
+ // The following holds the FLG_xxxx flags for the method we're compiling.
+ unsigned compFlags;
+
+ // The following holds the class attributes for the method we're compiling.
+ unsigned compClassAttr;
+
+ const BYTE* compCode;
+ IL_OFFSET compILCodeSize; // The IL code size
+ UNATIVE_OFFSET compNativeCodeSize; // The native code size, after instructions are issued. This
+ // is less than (compTotalHotCodeSize + compTotalColdCodeSize) only if:
+ // (1) the code is not hot/cold split, and we issued less code than we expected, or
+ // (2) the code is hot/cold split, and we issued less code than we expected
+ // in the cold section (the hot section will always be padded out to compTotalHotCodeSize).
+
+ bool compIsStatic : 1; // Is the method static (no 'this' pointer)?
+ bool compIsVarArgs : 1; // Does the method have varargs parameters?
+ bool compIsContextful : 1; // contextful method
+ bool compInitMem : 1; // Is the CORINFO_OPT_INIT_LOCALS bit set in the method info options?
+ bool compUnwrapContextful : 1; // JIT should unwrap proxies when possible
+ bool compProfilerCallback : 1; // JIT inserted a profiler Enter callback
+ bool compPublishStubParam : 1; // EAX captured in prolog will be available through an instrinsic
+ bool compRetBuffDefStack : 1; // The ret buff argument definitely points into the stack.
+
+ var_types compRetType; // Return type of the method as declared in IL
+ var_types compRetNativeType; // Normalized return type as per target arch ABI
+ unsigned compILargsCount; // Number of arguments (incl. implicit but not hidden)
+ unsigned compArgsCount; // Number of arguments (incl. implicit and hidden)
+ unsigned compRetBuffArg; // position of hidden return param var (0, 1) (BAD_VAR_NUM means not present);
+ int compTypeCtxtArg; // position of hidden param for type context for generic code (CORINFO_CALLCONV_PARAMTYPE)
+ unsigned compThisArg; // position of implicit this pointer param (not to be confused with lvaArg0Var)
+ unsigned compILlocalsCount; // Number of vars : args + locals (incl. implicit but not hidden)
+ unsigned compLocalsCount; // Number of vars : args + locals (incl. implicit and hidden)
+ unsigned compMaxStack;
+ UNATIVE_OFFSET compTotalHotCodeSize; // Total number of bytes of Hot Code in the method
+ UNATIVE_OFFSET compTotalColdCodeSize; // Total number of bytes of Cold Code in the method
+
+ unsigned compCallUnmanaged; // count of unmanaged calls
+ unsigned compLvFrameListRoot; // lclNum for the Frame root
+ unsigned compXcptnsCount; // Number of exception-handling clauses read in the method's IL.
+ // You should generally use compHndBBtabCount instead: it is the
+ // current number of EH clauses (after additions like synchronized
+ // methods and funclets, and removals like unreachable code deletion).
+
+ bool compMatchedVM; // true if the VM is "matched": either the JIT is a cross-compiler
+ // and the VM expects that, or the JIT is a "self-host" compiler
+ // (e.g., x86 hosted targeting x86) and the VM expects that.
+
+#if defined(DEBUGGING_SUPPORT) || defined(DEBUG)
+
+ /* The following holds IL scope information about local variables.
+ */
+
+ unsigned compVarScopesCount;
+ VarScopeDsc* compVarScopes;
+
+ /* The following holds information about instr offsets for
+ * which we need to report IP-mappings
+ */
+
+ IL_OFFSET* compStmtOffsets; // sorted
+ unsigned compStmtOffsetsCount;
+ ICorDebugInfo::BoundaryTypes compStmtOffsetsImplicit;
+
+#endif // DEBUGGING_SUPPORT || DEBUG
+
+#define CPU_X86 0x0100 // The generic X86 CPU
+#define CPU_X86_PENTIUM_4 0x0110
+
+#define CPU_X64 0x0200 // The generic x64 CPU
+#define CPU_AMD_X64 0x0210 // AMD x64 CPU
+#define CPU_INTEL_X64 0x0240 // Intel x64 CPU
+
+#define CPU_ARM 0x0300 // The generic ARM CPU
+
+ unsigned genCPU; // What CPU are we running on
+ } info;
+
+ // Returns true if the method being compiled returns a non-void and non-struct value.
+ // Note that lvaInitTypeRef() normalizes compRetNativeType for struct returns in a
+ // single register as per target arch ABI (e.g on Amd64 Windows structs of size 1, 2,
+ // 4 or 8 gets normalized to TYP_BYTE/TYP_SHORT/TYP_INT/TYP_LONG; On Arm HFA structs).
+ // Methods returning such structs are considered to return non-struct return value and
+ // this method returns true in that case.
+ bool compMethodReturnsNativeScalarType()
+ {
+ return (info.compRetType != TYP_VOID) && !varTypeIsStruct(info.compRetNativeType);
+ }
+
+ // Returns true if the method being compiled returns RetBuf addr as its return value
+ bool compMethodReturnsRetBufAddr()
+ {
+ // There are cases where implicit RetBuf argument should be explicitly returned in a register.
+ // In such cases the return type is changed to TYP_BYREF and appropriate IR is generated.
+ // These cases are:
+ // 1. Profiler Leave calllback expects the address of retbuf as return value for
+ // methods with hidden RetBuf argument. impReturnInstruction() when profiler
+ // callbacks are needed creates GT_RETURN(TYP_BYREF, op1 = Addr of RetBuf) for
+ // methods with hidden RetBufArg.
+ //
+ // 2. As per the System V ABI, the address of RetBuf needs to be returned by
+ // methods with hidden RetBufArg in RAX. In such case GT_RETURN is of TYP_BYREF,
+ // returning the address of RetBuf.
+ //
+ // 3. Windows 64-bit native calling convention also requires the address of RetBuff
+ // to be returned in RAX.
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef _TARGET_AMD64_
+ return (info.compRetBuffArg != BAD_VAR_NUM);
+#else // !_TARGET_AMD64_
+ return (compIsProfilerHookNeeded()) && (info.compRetBuffArg != BAD_VAR_NUM);
+#endif // !_TARGET_AMD64_
+ }
+
+ // Returns true if the method returns a value in more than one return register
+ // TODO-ARM-Bug: Deal with multi-register genReturnLocaled structs?
+ // TODO-ARM64: Does this apply for ARM64 too?
+ bool compMethodReturnsMultiRegRetType()
+ {
+#if FEATURE_MULTIREG_RET
+#if defined(_TARGET_X86_)
+ // On x86 only 64-bit longs are returned in multiple registers
+ return varTypeIsLong(info.compRetNativeType);
+#else // targets: X64-UNIX, ARM64 or ARM32
+ // On all other targets that support multireg return values:
+ // Methods returning a struct in multiple registers have a return value of TYP_STRUCT.
+ // Such method's compRetNativeType is TYP_STRUCT without a hidden RetBufArg
+ return varTypeIsStruct(info.compRetNativeType) && (info.compRetBuffArg == BAD_VAR_NUM);
+#endif // TARGET_XXX
+#else // not FEATURE_MULTIREG_RET
+ // For this architecture there are no multireg returns
+ return false;
+#endif // FEATURE_MULTIREG_RET
+ }
+
+#if FEATURE_MULTIREG_ARGS
+ // Given a GenTree node of TYP_STRUCT that represents a pass by value argument
+ // return the gcPtr layout for the pointers sized fields
+ void getStructGcPtrsFromOp(GenTreePtr op, BYTE* gcPtrsOut);
+#endif // FEATURE_MULTIREG_ARGS
+
+ // Returns true if the method being compiled returns a value
+ bool compMethodHasRetVal()
+ {
+ return compMethodReturnsNativeScalarType() || compMethodReturnsRetBufAddr() ||
+ compMethodReturnsMultiRegRetType();
+ }
+
+#if defined(DEBUG)
+
+ void compDispLocalVars();
+
+#endif // DEBUGGING_SUPPORT || DEBUG
+
+//-------------------------- Global Compiler Data ------------------------------------
+
+#ifdef DEBUG
+ static unsigned s_compMethodsCount; // to produce unique label names
+ unsigned compGenTreeID;
+#endif
+
+ BasicBlock* compCurBB; // the current basic block in process
+ GenTreePtr compCurStmt; // the current statement in process
+#ifdef DEBUG
+ unsigned compCurStmtNum; // to give all statements an increasing StmtNum when printing dumps
+#endif
+
+ // The following is used to create the 'method JIT info' block.
+ size_t compInfoBlkSize;
+ BYTE* compInfoBlkAddr;
+
+ EHblkDsc* compHndBBtab; // array of EH data
+ unsigned compHndBBtabCount; // element count of used elements in EH data array
+ unsigned compHndBBtabAllocCount; // element count of allocated elements in EH data array
+
+#if defined(_TARGET_X86_)
+
+ //-------------------------------------------------------------------------
+ // Tracking of region covered by the monitor in synchronized methods
+ void* syncStartEmitCookie; // the emitter cookie for first instruction after the call to MON_ENTER
+ void* syncEndEmitCookie; // the emitter cookie for first instruction after the call to MON_EXIT
+
+#endif // !_TARGET_X86_
+
+ Phases previousCompletedPhase; // the most recently completed phase
+
+ //-------------------------------------------------------------------------
+ // The following keeps track of how many bytes of local frame space we've
+ // grabbed so far in the current function, and how many argument bytes we
+ // need to pop when we return.
+ //
+
+ unsigned compLclFrameSize; // secObject+lclBlk+locals+temps
+
+ // Count of callee-saved regs we pushed in the prolog.
+ // Does not include EBP for isFramePointerUsed() and double-aligned frames.
+ // In case of Amd64 this doesn't include float regs saved on stack.
+ unsigned compCalleeRegsPushed;
+
+#if defined(_TARGET_XARCH_) && !FEATURE_STACK_FP_X87
+ // Mask of callee saved float regs on stack.
+ regMaskTP compCalleeFPRegsSavedMask;
+#endif
+#ifdef _TARGET_AMD64_
+// Quirk for VS debug-launch scenario to work:
+// Bytes of padding between save-reg area and locals.
+#define VSQUIRK_STACK_PAD (2 * REGSIZE_BYTES)
+ unsigned compVSQuirkStackPaddingNeeded;
+ bool compQuirkForPPPflag;
+#endif
+
+ unsigned compArgSize; // total size of arguments in bytes (including register args (lvIsRegArg))
+
+ unsigned compMapILargNum(unsigned ILargNum); // map accounting for hidden args
+ unsigned compMapILvarNum(unsigned ILvarNum); // map accounting for hidden args
+ unsigned compMap2ILvarNum(unsigned varNum); // map accounting for hidden args
+
+ //-------------------------------------------------------------------------
+
+ static void compStartup(); // One-time initialization
+ static void compShutdown(); // One-time finalization
+
+ void compInit(ArenaAllocator* pAlloc, InlineInfo* inlineInfo);
+ void compDone();
+
+ static void compDisplayStaticSizes(FILE* fout);
+
+ //------------ Some utility functions --------------
+
+ void* compGetHelperFtn(CorInfoHelpFunc ftnNum, /* IN */
+ void** ppIndirection); /* OUT */
+
+ // Several JIT/EE interface functions return a CorInfoType, and also return a
+ // class handle as an out parameter if the type is a value class. Returns the
+ // size of the type these describe.
+ unsigned compGetTypeSize(CorInfoType cit, CORINFO_CLASS_HANDLE clsHnd);
+
+#ifdef DEBUG
+ // Components used by the compiler may write unit test suites, and
+ // have them run within this method. They will be run only once per process, and only
+ // in debug. (Perhaps should be under the control of a COMPlus_ flag.)
+ // These should fail by asserting.
+ void compDoComponentUnitTestsOnce();
+#endif // DEBUG
+
+ int compCompile(CORINFO_METHOD_HANDLE methodHnd,
+ CORINFO_MODULE_HANDLE classPtr,
+ COMP_HANDLE compHnd,
+ CORINFO_METHOD_INFO* methodInfo,
+ void** methodCodePtr,
+ ULONG* methodCodeSize,
+ CORJIT_FLAGS* compileFlags);
+ void compCompileFinish();
+ int compCompileHelper(CORINFO_MODULE_HANDLE classPtr,
+ COMP_HANDLE compHnd,
+ CORINFO_METHOD_INFO* methodInfo,
+ void** methodCodePtr,
+ ULONG* methodCodeSize,
+ CORJIT_FLAGS* compileFlags,
+ CorInfoInstantiationVerification instVerInfo);
+
+ ArenaAllocator* compGetAllocator();
+
+#if MEASURE_MEM_ALLOC
+ struct MemStats
+ {
+ unsigned allocCnt; // # of allocs
+ UINT64 allocSz; // total size of those alloc.
+ UINT64 allocSzMax; // Maximum single allocation.
+ UINT64 allocSzByKind[CMK_Count]; // Classified by "kind".
+ UINT64 nraTotalSizeAlloc;
+ UINT64 nraTotalSizeUsed;
+
+ static const char* s_CompMemKindNames[]; // Names of the kinds.
+
+ MemStats() : allocCnt(0), allocSz(0), allocSzMax(0), nraTotalSizeAlloc(0), nraTotalSizeUsed(0)
+ {
+ for (int i = 0; i < CMK_Count; i++)
+ {
+ allocSzByKind[i] = 0;
+ }
+ }
+ MemStats(const MemStats& ms)
+ : allocCnt(ms.allocCnt)
+ , allocSz(ms.allocSz)
+ , allocSzMax(ms.allocSzMax)
+ , nraTotalSizeAlloc(ms.nraTotalSizeAlloc)
+ , nraTotalSizeUsed(ms.nraTotalSizeUsed)
+ {
+ for (int i = 0; i < CMK_Count; i++)
+ {
+ allocSzByKind[i] = ms.allocSzByKind[i];
+ }
+ }
+
+ // Until we have ubiquitous constructors.
+ void Init()
+ {
+ this->MemStats::MemStats();
+ }
+
+ void AddAlloc(size_t sz, CompMemKind cmk)
+ {
+ allocCnt += 1;
+ allocSz += sz;
+ if (sz > allocSzMax)
+ {
+ allocSzMax = sz;
+ }
+ allocSzByKind[cmk] += sz;
+ }
+
+ void Print(FILE* f); // Print these stats to f.
+ void PrintByKind(FILE* f); // Do just the by-kind histogram part.
+ };
+ MemStats genMemStats;
+
+ struct AggregateMemStats : public MemStats
+ {
+ unsigned nMethods;
+
+ AggregateMemStats() : MemStats(), nMethods(0)
+ {
+ }
+
+ void Add(const MemStats& ms)
+ {
+ nMethods++;
+ allocCnt += ms.allocCnt;
+ allocSz += ms.allocSz;
+ allocSzMax = max(allocSzMax, ms.allocSzMax);
+ for (int i = 0; i < CMK_Count; i++)
+ {
+ allocSzByKind[i] += ms.allocSzByKind[i];
+ }
+ nraTotalSizeAlloc += ms.nraTotalSizeAlloc;
+ nraTotalSizeUsed += ms.nraTotalSizeUsed;
+ }
+
+ void Print(FILE* f); // Print these stats to jitstdout.
+ };
+
+ static CritSecObject s_memStatsLock; // This lock protects the data structures below.
+ static MemStats s_maxCompMemStats; // Stats for the compilation with the largest amount allocated.
+ static AggregateMemStats s_aggMemStats; // Aggregates statistics for all compilations.
+
+#endif // MEASURE_MEM_ALLOC
+
+#if LOOP_HOIST_STATS
+ unsigned m_loopsConsidered;
+ bool m_curLoopHasHoistedExpression;
+ unsigned m_loopsWithHoistedExpressions;
+ unsigned m_totalHoistedExpressions;
+
+ void AddLoopHoistStats();
+ void PrintPerMethodLoopHoistStats();
+
+ static CritSecObject s_loopHoistStatsLock; // This lock protects the data structures below.
+ static unsigned s_loopsConsidered;
+ static unsigned s_loopsWithHoistedExpressions;
+ static unsigned s_totalHoistedExpressions;
+
+ static void PrintAggregateLoopHoistStats(FILE* f);
+#endif // LOOP_HOIST_STATS
+
+ void* compGetMemArray(size_t numElem, size_t elemSize, CompMemKind cmk = CMK_Unknown);
+ void* compGetMemArrayA(size_t numElem, size_t elemSize, CompMemKind cmk = CMK_Unknown);
+ void* compGetMem(size_t sz, CompMemKind cmk = CMK_Unknown);
+ void* compGetMemA(size_t sz, CompMemKind cmk = CMK_Unknown);
+ static void* compGetMemCallback(void*, size_t, CompMemKind cmk = CMK_Unknown);
+ void compFreeMem(void*);
+
+ bool compIsForImportOnly();
+ bool compIsForInlining();
+ bool compDonotInline();
+
+#ifdef DEBUG
+ const char* compLocalVarName(unsigned varNum, unsigned offs);
+ VarName compVarName(regNumber reg, bool isFloatReg = false);
+ const char* compRegVarName(regNumber reg, bool displayVar = false, bool isFloatReg = false);
+ const char* compRegPairName(regPairNo regPair);
+ const char* compRegNameForSize(regNumber reg, size_t size);
+ const char* compFPregVarName(unsigned fpReg, bool displayVar = false);
+ void compDspSrcLinesByNativeIP(UNATIVE_OFFSET curIP);
+ void compDspSrcLinesByLineNum(unsigned line, bool seek = false);
+#endif // DEBUG
+
+//-------------------------------------------------------------------------
+
+#ifdef DEBUGGING_SUPPORT
+ typedef ListNode<VarScopeDsc*> VarScopeListNode;
+
+ struct VarScopeMapInfo
+ {
+ VarScopeListNode* head;
+ VarScopeListNode* tail;
+ static VarScopeMapInfo* Create(VarScopeListNode* node, IAllocator* alloc)
+ {
+ VarScopeMapInfo* info = new (alloc) VarScopeMapInfo;
+ info->head = node;
+ info->tail = node;
+ return info;
+ }
+ };
+
+ // Max value of scope count for which we would use linear search; for larger values we would use hashtable lookup.
+ static const unsigned MAX_LINEAR_FIND_LCL_SCOPELIST = 32;
+
+ typedef SimplerHashTable<unsigned, SmallPrimitiveKeyFuncs<unsigned>, VarScopeMapInfo*, JitSimplerHashBehavior>
+ VarNumToScopeDscMap;
+
+ // Map to keep variables' scope indexed by varNum containing it's scope dscs at the index.
+ VarNumToScopeDscMap* compVarScopeMap;
+
+ VarScopeDsc* compFindLocalVar(unsigned varNum, unsigned lifeBeg, unsigned lifeEnd);
+
+ VarScopeDsc* compFindLocalVar(unsigned varNum, unsigned offs);
+
+ VarScopeDsc* compFindLocalVarLinear(unsigned varNum, unsigned offs);
+
+ void compInitVarScopeMap();
+
+ VarScopeDsc** compEnterScopeList; // List has the offsets where variables
+ // enter scope, sorted by instr offset
+ unsigned compNextEnterScope;
+
+ VarScopeDsc** compExitScopeList; // List has the offsets where variables
+ // go out of scope, sorted by instr offset
+ unsigned compNextExitScope;
+
+ void compInitScopeLists();
+
+ void compResetScopeLists();
+
+ VarScopeDsc* compGetNextEnterScope(unsigned offs, bool scan = false);
+
+ VarScopeDsc* compGetNextExitScope(unsigned offs, bool scan = false);
+
+ void compProcessScopesUntil(unsigned offset,
+ VARSET_TP* inScope,
+ void (Compiler::*enterScopeFn)(VARSET_TP* inScope, VarScopeDsc*),
+ void (Compiler::*exitScopeFn)(VARSET_TP* inScope, VarScopeDsc*));
+
+#ifdef DEBUG
+ void compDispScopeLists();
+#endif // DEBUG
+
+#endif // DEBUGGING_SUPPORT
+
+ bool compIsProfilerHookNeeded();
+
+ //-------------------------------------------------------------------------
+ /* Statistical Data Gathering */
+
+ void compJitStats(); // call this function and enable
+ // various ifdef's below for statistical data
+
+#if CALL_ARG_STATS
+ void compCallArgStats();
+ static void compDispCallArgStats(FILE* fout);
+#endif
+
+ //-------------------------------------------------------------------------
+
+protected:
+#ifdef DEBUG
+ bool skipMethod();
+#endif
+
+ ArenaAllocator* compAllocator;
+
+public:
+ // This one presents an implementation of the "IAllocator" abstract class that uses "compAllocator",
+ // suitable for use by utilcode collection types.
+ IAllocator* compAsIAllocator;
+
+#if MEASURE_MEM_ALLOC
+ IAllocator* compAsIAllocatorBitset; // An allocator that uses the CMK_bitset tracker.
+ IAllocator* compAsIAllocatorGC; // An allocator that uses the CMK_GC tracker.
+ IAllocator* compAsIAllocatorLoopHoist; // An allocator that uses the CMK_LoopHoist tracker.
+#ifdef DEBUG
+ IAllocator* compAsIAllocatorDebugOnly; // An allocator that uses the CMK_DebugOnly tracker.
+#endif // DEBUG
+#endif // MEASURE_MEM_ALLOC
+
+ void compFunctionTraceStart();
+ void compFunctionTraceEnd(void* methodCodePtr, ULONG methodCodeSize, bool isNYI);
+
+protected:
+ size_t compMaxUncheckedOffsetForNullObject;
+
+ void compInitOptions(CORJIT_FLAGS* compileFlags);
+
+ void compSetProcessor();
+ void compInitDebuggingInfo();
+ void compSetOptimizationLevel();
+#ifdef _TARGET_ARMARCH_
+ bool compRsvdRegCheck(FrameLayoutState curState);
+#endif
+ void compCompile(void** methodCodePtr, ULONG* methodCodeSize, CORJIT_FLAGS* compileFlags);
+
+ // Data required for generating profiler Enter/Leave/TailCall hooks
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef PROFILING_SUPPORTED
+ bool compProfilerHookNeeded; // Whether profiler Enter/Leave/TailCall hook needs to be generated for the method
+ void* compProfilerMethHnd; // Profiler handle of the method being compiled. Passed as param to ELT callbacks
+ bool compProfilerMethHndIndirected; // Whether compProfilerHandle is pointer to the handle or is an actual handle
+#endif
+#ifdef _TARGET_AMD64_
+ bool compQuirkForPPP(); // Check if this method should be Quirked for the PPP issue
+#endif
+public:
+ // Assumes called as part of process shutdown; does any compiler-specific work associated with that.
+ static void ProcessShutdownWork(ICorStaticInfo* statInfo);
+
+ IAllocator* getAllocator()
+ {
+ return compAsIAllocator;
+ }
+
+#if MEASURE_MEM_ALLOC
+ IAllocator* getAllocatorBitset()
+ {
+ return compAsIAllocatorBitset;
+ }
+ IAllocator* getAllocatorGC()
+ {
+ return compAsIAllocatorGC;
+ }
+ IAllocator* getAllocatorLoopHoist()
+ {
+ return compAsIAllocatorLoopHoist;
+ }
+#else // !MEASURE_MEM_ALLOC
+ IAllocator* getAllocatorBitset()
+ {
+ return compAsIAllocator;
+ }
+ IAllocator* getAllocatorGC()
+ {
+ return compAsIAllocator;
+ }
+ IAllocator* getAllocatorLoopHoist()
+ {
+ return compAsIAllocator;
+ }
+#endif // !MEASURE_MEM_ALLOC
+
+#ifdef DEBUG
+ IAllocator* getAllocatorDebugOnly()
+ {
+#if MEASURE_MEM_ALLOC
+ return compAsIAllocatorDebugOnly;
+#else // !MEASURE_MEM_ALLOC
+ return compAsIAllocator;
+#endif // !MEASURE_MEM_ALLOC
+ }
+#endif // DEBUG
+
+ /*
+ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+ XX XX
+ XX typeInfo XX
+ XX XX
+ XX Checks for type compatibility and merges types XX
+ XX XX
+ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+ */
+
+public:
+ // Set to TRUE if verification cannot be skipped for this method
+ // If we detect unverifiable code, we will lazily check
+ // canSkipMethodVerification() to see if verification is REALLY needed.
+ BOOL tiVerificationNeeded;
+
+ // It it initially TRUE, and it gets set to FALSE if we run into unverifiable code
+ // Note that this is valid only if tiVerificationNeeded was ever TRUE.
+ BOOL tiIsVerifiableCode;
+
+ // Set to TRUE if runtime callout is needed for this method
+ BOOL tiRuntimeCalloutNeeded;
+
+ // Set to TRUE if security prolog/epilog callout is needed for this method
+ // Note: This flag is different than compNeedSecurityCheck.
+ // compNeedSecurityCheck means whether or not a security object needs
+ // to be allocated on the stack, which is currently true for EnC as well.
+ // tiSecurityCalloutNeeded means whether or not security callouts need
+ // to be inserted in the jitted code.
+ BOOL tiSecurityCalloutNeeded;
+
+ // Returns TRUE if child is equal to or a subtype of parent for merge purposes
+ // This support is necessary to suport attributes that are not described in
+ // for example, signatures. For example, the permanent home byref (byref that
+ // points to the gc heap), isn't a property of method signatures, therefore,
+ // it is safe to have mismatches here (that tiCompatibleWith will not flag),
+ // but when deciding if we need to reimport a block, we need to take these
+ // in account
+ BOOL tiMergeCompatibleWith(const typeInfo& pChild, const typeInfo& pParent, bool normalisedForStack) const;
+
+ // Returns TRUE if child is equal to or a subtype of parent.
+ // normalisedForStack indicates that both types are normalised for the stack
+ BOOL tiCompatibleWith(const typeInfo& pChild, const typeInfo& pParent, bool normalisedForStack) const;
+
+ // Merges pDest and pSrc. Returns FALSE if merge is undefined.
+ // *pDest is modified to represent the merged type. Sets "*changed" to true
+ // if this changes "*pDest".
+ BOOL tiMergeToCommonParent(typeInfo* pDest, const typeInfo* pSrc, bool* changed) const;
+
+ // Set pDest from the primitive value type.
+ // Eg. System.Int32 -> ELEMENT_TYPE_I4
+
+ BOOL tiFromPrimitiveValueClass(typeInfo* pDest, const typeInfo* pVC) const;
+
+#ifdef DEBUG
+ // <BUGNUM> VSW 471305
+ // IJW allows assigning REF to BYREF. The following allows us to temporarily
+ // bypass the assert check in gcMarkRegSetGCref and gcMarkRegSetByref
+ // We use a "short" as we need to push/pop this scope.
+ // </BUGNUM>
+ short compRegSetCheckLevel;
+#endif
+
+ /*
+ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+ XX XX
+ XX IL verification stuff XX
+ XX XX
+ XX XX
+ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+ */
+
+public:
+ // The following is used to track liveness of local variables, initialization
+ // of valueclass constructors, and type safe use of IL instructions.
+
+ // dynamic state info needed for verification
+ EntryState verCurrentState;
+
+ // this ptr of object type .ctors are considered intited only after
+ // the base class ctor is called, or an alternate ctor is called.
+ // An uninited this ptr can be used to access fields, but cannot
+ // be used to call a member function.
+ BOOL verTrackObjCtorInitState;
+
+ void verInitBBEntryState(BasicBlock* block, EntryState* currentState);
+
+ // Requires that "tis" is not TIS_Bottom -- it's a definite init/uninit state.
+ void verSetThisInit(BasicBlock* block, ThisInitState tis);
+ void verInitCurrentState();
+ void verResetCurrentState(BasicBlock* block, EntryState* currentState);
+
+ // Merges the current verification state into the entry state of "block", return FALSE if that merge fails,
+ // TRUE if it succeeds. Further sets "*changed" to true if this changes the entry state of "block".
+ BOOL verMergeEntryStates(BasicBlock* block, bool* changed);
+
+ void verConvertBBToThrowVerificationException(BasicBlock* block DEBUGARG(bool logMsg));
+ void verHandleVerificationFailure(BasicBlock* block DEBUGARG(bool logMsg));
+ typeInfo verMakeTypeInfo(CORINFO_CLASS_HANDLE clsHnd,
+ bool bashStructToRef = false); // converts from jit type representation to typeInfo
+ typeInfo verMakeTypeInfo(CorInfoType ciType,
+ CORINFO_CLASS_HANDLE clsHnd); // converts from jit type representation to typeInfo
+ BOOL verIsSDArray(typeInfo ti);
+ typeInfo verGetArrayElemType(typeInfo ti);
+
+ typeInfo verParseArgSigToTypeInfo(CORINFO_SIG_INFO* sig, CORINFO_ARG_LIST_HANDLE args);
+ BOOL verNeedsVerification();
+ BOOL verIsByRefLike(const typeInfo& ti);
+ BOOL verIsSafeToReturnByRef(const typeInfo& ti);
+
+ // generic type variables range over types that satisfy IsBoxable
+ BOOL verIsBoxable(const typeInfo& ti);
+
+ void DECLSPEC_NORETURN verRaiseVerifyException(INDEBUG(const char* reason) DEBUGARG(const char* file)
+ DEBUGARG(unsigned line));
+ void verRaiseVerifyExceptionIfNeeded(INDEBUG(const char* reason) DEBUGARG(const char* file)
+ DEBUGARG(unsigned line));
+ bool verCheckTailCallConstraint(OPCODE opcode,
+ CORINFO_RESOLVED_TOKEN* pResolvedToken,
+ CORINFO_RESOLVED_TOKEN* pConstrainedResolvedToken, // Is this a "constrained." call
+ // on a type parameter?
+ bool speculative // If true, won't throw if verificatoin fails. Instead it will
+ // return false to the caller.
+ // If false, it will throw.
+ );
+ bool verIsBoxedValueType(typeInfo ti);
+
+ void verVerifyCall(OPCODE opcode,
+ CORINFO_RESOLVED_TOKEN* pResolvedToken,
+ CORINFO_RESOLVED_TOKEN* pConstrainedResolvedToken,
+ bool tailCall,
+ bool readonlyCall, // is this a "readonly." call?
+ const BYTE* delegateCreateStart,
+ const BYTE* codeAddr,
+ CORINFO_CALL_INFO* callInfo DEBUGARG(const char* methodName));
+
+ BOOL verCheckDelegateCreation(const BYTE* delegateCreateStart, const BYTE* codeAddr, mdMemberRef& targetMemberRef);
+
+ typeInfo verVerifySTIND(const typeInfo& ptr, const typeInfo& value, const typeInfo& instrType);
+ typeInfo verVerifyLDIND(const typeInfo& ptr, const typeInfo& instrType);
+ void verVerifyField(CORINFO_RESOLVED_TOKEN* pResolvedToken,
+ const CORINFO_FIELD_INFO& fieldInfo,
+ const typeInfo* tiThis,
+ BOOL mutator,
+ BOOL allowPlainStructAsThis = FALSE);
+ void verVerifyCond(const typeInfo& tiOp1, const typeInfo& tiOp2, unsigned opcode);
+ void verVerifyThisPtrInitialised();
+ BOOL verIsCallToInitThisPtr(CORINFO_CLASS_HANDLE context, CORINFO_CLASS_HANDLE target);
+
+ // Register allocator
+ void raInitStackFP();
+ void raEnregisterVarsPrePassStackFP();
+ void raSetRegLclBirthDeath(GenTreePtr tree, VARSET_VALARG_TP lastlife, bool fromLDOBJ);
+ void raEnregisterVarsPostPassStackFP();
+ void raGenerateFPRefCounts();
+ void raEnregisterVarsStackFP();
+ void raUpdateHeightsForVarsStackFP(VARSET_VALARG_TP mask);
+
+ regNumber raRegForVarStackFP(unsigned varTrackedIndex);
+ void raAddPayloadStackFP(VARSET_VALARG_TP mask, unsigned weight);
+
+ // returns true if enregistering v1 would save more mem accesses than v2
+ bool raVarIsGreaterValueStackFP(LclVarDsc* lv1, LclVarDsc* lv2);
+
+#ifdef DEBUG
+ void raDumpHeightsStackFP();
+ void raDumpVariableRegIntfFloat();
+#endif
+
+#if FEATURE_STACK_FP_X87
+
+ // Currently, we use FP transition blocks in only 2 situations:
+ //
+ // -conditional jump on longs where FP stack differs with target: it's not strictly
+ // necessary, but its low frequency and the code would get complicated if we try to
+ // inline the FP stack adjustment, as we have a lot of special casing going on to try
+ // minimize the way we generate the jump code.
+ // -case statements of switch where the FP stack differs with the one of evaluating the switch () statement
+ // We do this as we want to codegen switch as a jumptable. Again, this is low frequency.
+ //
+ // However, transition blocks have 2 problems
+ //
+ // - Procedure splitting: current implementation of procedure splitting requires all basic blocks to
+ // be known at codegen time, as it generates all hot blocks first and cold blocks later. This ties
+ // us up in codegen and is a solvable problem (we could make procedure splitting generate blocks
+ // in the right place without preordering them), this causes us to have to generate the transition
+ // blocks in the cold area if we want procedure splitting.
+ //
+ //
+ // - Thread abort exceptions and transition blocks. Transition blocks were designed under the assumption
+ // that no exceptions can happen inside them. Unfortunately Thread.Abort can happen in any instruction,
+ // and if we have handlers we will have to try to call them. Fixing this the right way would imply
+ // having multiple try native code regions for a single try il region. This is doable and shouldnt be
+ // a big change in the exception.
+ //
+ // Given the low frequency of the cases where we have transition blocks, I've decided to dumb down
+ // optimizations. For these 2 cases:
+ //
+ // - When there is a chance that we will have FP transition blocks, we won't do procedure splitting.
+ // - When a method has a handler, it won't enregister any FP variables that go thru a conditional long or
+ // a switch statement.
+ //
+ // If at any point we find we need to optimize this, we should throw work at unblocking the restrictions our
+ // current procedure splitting and exception code have.
+ bool compMayHaveTransitionBlocks;
+
+ VARSET_TP raMaskDontEnregFloat; // mask for additional restrictions
+
+ VARSET_TP raLclRegIntfFloat[REG_FPCOUNT];
+
+ unsigned raCntStkStackFP;
+ unsigned raCntWtdStkDblStackFP;
+ unsigned raCntStkParamDblStackFP;
+
+ // Payload in mem accesses for enregistering a variable (we dont want to mix with refcounts)
+ // TODO: Do we want to put this in LclVarDsc?
+ unsigned raPayloadStackFP[lclMAX_TRACKED];
+ unsigned raHeightsStackFP[lclMAX_TRACKED][FP_VIRTUALREGISTERS + 1];
+#ifdef DEBUG
+ // Useful for debugging
+ unsigned raHeightsNonWeightedStackFP[lclMAX_TRACKED][FP_VIRTUALREGISTERS + 1];
+#endif
+#endif // FEATURE_STACK_FP_X87
+
+#ifdef DEBUG
+ // One line log function. Default level is 0. Increasing it gives you
+ // more log information
+
+ // levels are currently unused: #define JITDUMP(level,...) ();
+ void JitLogEE(unsigned level, const char* fmt, ...);
+
+ bool compDebugBreak;
+
+ bool compJitHaltMethod();
+
+#endif
+
+ /*
+ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+ XX XX
+ XX GS Security checks for unsafe buffers XX
+ XX XX
+ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+ */
+public:
+ struct ShadowParamVarInfo
+ {
+ FixedBitVect* assignGroup; // the closure set of variables whose values depend on each other
+ unsigned shadowCopy; // Lcl var num, valid only if not set to NO_SHADOW_COPY
+
+ static bool mayNeedShadowCopy(LclVarDsc* varDsc)
+ {
+#if defined(_TARGET_AMD64_) && !defined(LEGACY_BACKEND)
+ // GS cookie logic to create shadow slots, create trees to copy reg args to shadow
+ // slots and update all trees to refer to shadow slots is done immediately after
+ // fgMorph(). Lsra could potentially mark a param as DoNotEnregister after JIT determines
+ // not to shadow a parameter. Also, LSRA could potentially spill a param which is passed
+ // in register. Therefore, conservatively all params may need a shadow copy. Note that
+ // GS cookie logic further checks whether the param is a ptr or an unsafe buffer before
+ // creating a shadow slot even though this routine returns true.
+ //
+ // TODO-AMD64-CQ: Revisit this conservative approach as it could create more shadow slots than
+ // required. There are two cases under which a reg arg could potentially be used from its
+ // home location:
+ // a) LSRA marks it as DoNotEnregister (see LinearScan::identifyCandidates())
+ // b) LSRA spills it
+ //
+ // Possible solution to address case (a)
+ // - The conditions under which LSRA marks a varDsc as DoNotEnregister could be checked
+ // in this routine. Note that live out of exception handler is something we may not be
+ // able to do it here since GS cookie logic is invoked ahead of liveness computation.
+ // Therefore, for methods with exception handling and need GS cookie check we might have
+ // to take conservative approach.
+ //
+ // Possible solution to address case (b)
+ // - Whenver a parameter passed in an argument register needs to be spilled by LSRA, we
+ // create a new spill temp if the method needs GS cookie check.
+ return varDsc->lvIsParam;
+#else // !(defined(_TARGET_AMD64_) && defined(LEGACY_BACKEND))
+ return varDsc->lvIsParam && !varDsc->lvIsRegArg;
+#endif
+ }
+
+#ifdef DEBUG
+ void Print()
+ {
+ printf("assignGroup [%p]; shadowCopy: [%d];\n", assignGroup, shadowCopy);
+ }
+#endif
+ };
+
+ GSCookie* gsGlobalSecurityCookieAddr; // Address of global cookie for unsafe buffer checks
+ GSCookie gsGlobalSecurityCookieVal; // Value of global cookie if addr is NULL
+ ShadowParamVarInfo* gsShadowVarInfo; // Table used by shadow param analysis code
+
+ void gsGSChecksInitCookie(); // Grabs cookie variable
+ void gsCopyShadowParams(); // Identify vulnerable params and create dhadow copies
+ bool gsFindVulnerableParams(); // Shadow param analysis code
+ void gsParamsToShadows(); // Insert copy code and replave param uses by shadow
+
+ static fgWalkPreFn gsMarkPtrsAndAssignGroups; // Shadow param analysis tree-walk
+ static fgWalkPreFn gsReplaceShadowParams; // Shadow param replacement tree-walk
+
+#define DEFAULT_MAX_INLINE_SIZE 100 // Methods with > DEFAULT_MAX_INLINE_SIZE IL bytes will never be inlined.
+ // This can be overwritten by setting complus_JITInlineSize env variable.
+
+#define DEFAULT_MAX_INLINE_DEPTH 20 // Methods at more than this level deep will not be inlined
+
+private:
+#ifdef FEATURE_JIT_METHOD_PERF
+ JitTimer* pCompJitTimer; // Timer data structure (by phases) for current compilation.
+ static CompTimeSummaryInfo s_compJitTimerSummary; // Summary of the Timer information for the whole run.
+
+ static LPCWSTR JitTimeLogCsv(); // Retrieve the file name for CSV from ConfigDWORD.
+ static LPCWSTR compJitTimeLogFilename; // If a log file for JIT time is desired, filename to write it to.
+#endif
+ inline void EndPhase(Phases phase); // Indicate the end of the given phase.
+
+#if defined(DEBUG) || defined(INLINE_DATA) || defined(FEATURE_CLRSQM)
+ // These variables are associated with maintaining SQM data about compile time.
+ unsigned __int64 m_compCyclesAtEndOfInlining; // The thread-virtualized cycle count at the end of the inlining phase
+ // in the current compilation.
+ unsigned __int64 m_compCycles; // Net cycle count for current compilation
+ DWORD m_compTickCountAtEndOfInlining; // The result of GetTickCount() (# ms since some epoch marker) at the end of
+ // the inlining phase in the current compilation.
+#endif // defined(DEBUG) || defined(INLINE_DATA) || defined(FEATURE_CLRSQM)
+
+ // Records the SQM-relevant (cycles and tick count). Should be called after inlining is complete.
+ // (We do this after inlining because this marks the last point at which the JIT is likely to cause
+ // type-loading and class initialization).
+ void RecordStateAtEndOfInlining();
+ // Assumes being called at the end of compilation. Update the SQM state.
+ void RecordStateAtEndOfCompilation();
+
+#ifdef FEATURE_CLRSQM
+ // Does anything SQM related necessary at process shutdown time.
+ static void ProcessShutdownSQMWork(ICorStaticInfo* statInfo);
+#endif // FEATURE_CLRSQM
+
+public:
+#if FUNC_INFO_LOGGING
+ static LPCWSTR compJitFuncInfoFilename; // If a log file for per-function information is required, this is the
+ // filename to write it to.
+ static FILE* compJitFuncInfoFile; // And this is the actual FILE* to write to.
+#endif // FUNC_INFO_LOGGING
+
+ Compiler* prevCompiler; // Previous compiler on stack for TLS Compiler* linked list for reentrant compilers.
+
+ // Is the compilation in a full trust context?
+ bool compIsFullTrust();
+
+#ifndef FEATURE_TRACELOGGING
+ // Should we actually fire the noway assert body and the exception handler?
+ bool compShouldThrowOnNoway();
+#else // FEATURE_TRACELOGGING
+ // Should we actually fire the noway assert body and the exception handler?
+ bool compShouldThrowOnNoway(const char* filename, unsigned line);
+
+ // Telemetry instance to use per method compilation.
+ JitTelemetry compJitTelemetry;
+
+ // Get common parameters that have to be logged with most telemetry data.
+ void compGetTelemetryDefaults(const char** assemblyName,
+ const char** scopeName,
+ const char** methodName,
+ unsigned* methodHash);
+#endif // !FEATURE_TRACELOGGING
+
+#ifdef DEBUG
+private:
+ NodeToTestDataMap* m_nodeTestData;
+
+ static const unsigned FIRST_LOOP_HOIST_CSE_CLASS = 1000;
+ unsigned m_loopHoistCSEClass; // LoopHoist test annotations turn into CSE requirements; we
+ // label them with CSE Class #'s starting at FIRST_LOOP_HOIST_CSE_CLASS.
+ // Current kept in this.
+public:
+ NodeToTestDataMap* GetNodeTestData()
+ {
+ Compiler* compRoot = impInlineRoot();
+ if (compRoot->m_nodeTestData == nullptr)
+ {
+ compRoot->m_nodeTestData = new (getAllocatorDebugOnly()) NodeToTestDataMap(getAllocatorDebugOnly());
+ }
+ return compRoot->m_nodeTestData;
+ }
+
+ typedef SimplerHashTable<GenTreePtr, PtrKeyFuncs<GenTree>, int, JitSimplerHashBehavior> NodeToIntMap;
+
+ // Returns the set (i.e., the domain of the result map) of nodes that are keys in m_nodeTestData, and
+ // currently occur in the AST graph.
+ NodeToIntMap* FindReachableNodesInNodeTestData();
+
+ // Node "from" is being eliminated, and being replaced by node "to". If "from" had any associated
+ // test data, associate that data with "to".
+ void TransferTestDataToNode(GenTreePtr from, GenTreePtr to);
+
+ // Requires that "to" is a clone of "from". If any nodes in the "from" tree
+ // have annotations, attach similar annotations to the corresponding nodes in "to".
+ void CopyTestDataToCloneTree(GenTreePtr from, GenTreePtr to);
+
+ // These are the methods that test that the various conditions implied by the
+ // test attributes are satisfied.
+ void JitTestCheckSSA(); // SSA builder tests.
+ void JitTestCheckVN(); // Value numbering tests.
+#endif // DEBUG
+
+ // The "FieldSeqStore", for canonicalizing field sequences. See the definition of FieldSeqStore for
+ // operations.
+ FieldSeqStore* m_fieldSeqStore;
+
+ FieldSeqStore* GetFieldSeqStore()
+ {
+ Compiler* compRoot = impInlineRoot();
+ if (compRoot->m_fieldSeqStore == nullptr)
+ {
+ // Create a CompAllocator that labels sub-structure with CMK_FieldSeqStore, and use that for allocation.
+ IAllocator* ialloc = new (this, CMK_FieldSeqStore) CompAllocator(this, CMK_FieldSeqStore);
+ compRoot->m_fieldSeqStore = new (ialloc) FieldSeqStore(ialloc);
+ }
+ return compRoot->m_fieldSeqStore;
+ }
+
+ typedef SimplerHashTable<GenTreePtr, PtrKeyFuncs<GenTree>, FieldSeqNode*, JitSimplerHashBehavior> NodeToFieldSeqMap;
+
+ // Some nodes of "TYP_BYREF" or "TYP_I_IMPL" actually represent the address of a field within a struct, but since
+ // the offset of the field is zero, there's no "GT_ADD" node. We normally attach a field sequence to the constant
+ // that is added, but what do we do when that constant is zero, and is thus not present? We use this mechanism to
+ // attach the field sequence directly to the address node.
+ NodeToFieldSeqMap* m_zeroOffsetFieldMap;
+
+ NodeToFieldSeqMap* GetZeroOffsetFieldMap()
+ {
+ // Don't need to worry about inlining here
+ if (m_zeroOffsetFieldMap == nullptr)
+ {
+ // Create a CompAllocator that labels sub-structure with CMK_ZeroOffsetFieldMap, and use that for
+ // allocation.
+ IAllocator* ialloc = new (this, CMK_ZeroOffsetFieldMap) CompAllocator(this, CMK_ZeroOffsetFieldMap);
+ m_zeroOffsetFieldMap = new (ialloc) NodeToFieldSeqMap(ialloc);
+ }
+ return m_zeroOffsetFieldMap;
+ }
+
+ // Requires that "op1" is a node of type "TYP_BYREF" or "TYP_I_IMPL". We are dereferencing this with the fields in
+ // "fieldSeq", whose offsets are required all to be zero. Ensures that any field sequence annotation currently on
+ // "op1" or its components is augmented by appending "fieldSeq". In practice, if "op1" is a GT_LCL_FLD, it has
+ // a field sequence as a member; otherwise, it may be the addition of an a byref and a constant, where the const
+ // has a field sequence -- in this case "fieldSeq" is appended to that of the constant; otherwise, we
+ // record the the field sequence using the ZeroOffsetFieldMap described above.
+ //
+ // One exception above is that "op1" is a node of type "TYP_REF" where "op1" is a GT_LCL_VAR.
+ // This happens when System.Object vtable pointer is a regular field at offset 0 in System.Private.CoreLib in
+ // CoreRT. Such case is handled same as the default case.
+ void fgAddFieldSeqForZeroOffset(GenTreePtr op1, FieldSeqNode* fieldSeq);
+
+ typedef SimplerHashTable<const GenTree*, PtrKeyFuncs<GenTree>, ArrayInfo, JitSimplerHashBehavior>
+ NodeToArrayInfoMap;
+ NodeToArrayInfoMap* m_arrayInfoMap;
+
+ NodeToArrayInfoMap* GetArrayInfoMap()
+ {
+ Compiler* compRoot = impInlineRoot();
+ if (compRoot->m_arrayInfoMap == nullptr)
+ {
+ // Create a CompAllocator that labels sub-structure with CMK_ArrayInfoMap, and use that for allocation.
+ IAllocator* ialloc = new (this, CMK_ArrayInfoMap) CompAllocator(this, CMK_ArrayInfoMap);
+ compRoot->m_arrayInfoMap = new (ialloc) NodeToArrayInfoMap(ialloc);
+ }
+ return compRoot->m_arrayInfoMap;
+ }
+
+ NodeToUnsignedMap* m_heapSsaMap;
+
+ // In some cases, we want to assign intermediate SSA #'s to heap states, and know what nodes create those heap
+ // states. (We do this for try blocks, where, if the try block doesn't do a call that loses track of the heap state,
+ // all the possible heap states are possible initial states of the corresponding catch block(s).)
+ NodeToUnsignedMap* GetHeapSsaMap()
+ {
+ Compiler* compRoot = impInlineRoot();
+ if (compRoot->m_heapSsaMap == nullptr)
+ {
+ // Create a CompAllocator that labels sub-structure with CMK_ArrayInfoMap, and use that for allocation.
+ IAllocator* ialloc = new (this, CMK_ArrayInfoMap) CompAllocator(this, CMK_ArrayInfoMap);
+ compRoot->m_heapSsaMap = new (ialloc) NodeToUnsignedMap(ialloc);
+ }
+ return compRoot->m_heapSsaMap;
+ }
+
+ // The Refany type is the only struct type whose structure is implicitly assumed by IL. We need its fields.
+ CORINFO_CLASS_HANDLE m_refAnyClass;
+ CORINFO_FIELD_HANDLE GetRefanyDataField()
+ {
+ if (m_refAnyClass == nullptr)
+ {
+ m_refAnyClass = info.compCompHnd->getBuiltinClass(CLASSID_TYPED_BYREF);
+ }
+ return info.compCompHnd->getFieldInClass(m_refAnyClass, 0);
+ }
+ CORINFO_FIELD_HANDLE GetRefanyTypeField()
+ {
+ if (m_refAnyClass == nullptr)
+ {
+ m_refAnyClass = info.compCompHnd->getBuiltinClass(CLASSID_TYPED_BYREF);
+ }
+ return info.compCompHnd->getFieldInClass(m_refAnyClass, 1);
+ }
+
+#if VARSET_COUNTOPS
+ static BitSetSupport::BitSetOpCounter m_varsetOpCounter;
+#endif
+#if ALLVARSET_COUNTOPS
+ static BitSetSupport::BitSetOpCounter m_allvarsetOpCounter;
+#endif
+
+ static HelperCallProperties s_helperCallProperties;
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ static var_types GetTypeFromClassificationAndSizes(SystemVClassificationType classType, int size);
+ static var_types GetEightByteType(const SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR& structDesc,
+ unsigned slotNum);
+ static void GetStructTypeOffset(const SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR& structDesc,
+ var_types* type0,
+ var_types* type1,
+ unsigned __int8* offset0,
+ unsigned __int8* offset1);
+ void fgMorphSystemVStructArgs(GenTreeCall* call, bool hasStructArgument);
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+
+ void fgMorphMultiregStructArgs(GenTreeCall* call);
+ GenTreePtr fgMorphMultiregStructArg(GenTreePtr arg, fgArgTabEntryPtr fgEntryPtr);
+
+}; // end of class Compiler
+
+// Inline methods of CompAllocator.
+void* CompAllocator::Alloc(size_t sz)
+{
+#if MEASURE_MEM_ALLOC
+ return m_comp->compGetMem(sz, m_cmk);
+#else
+ return m_comp->compGetMem(sz);
+#endif
+}
+
+void* CompAllocator::ArrayAlloc(size_t elems, size_t elemSize)
+{
+#if MEASURE_MEM_ALLOC
+ return m_comp->compGetMemArray(elems, elemSize, m_cmk);
+#else
+ return m_comp->compGetMemArray(elems, elemSize);
+#endif
+}
+
+// LclVarDsc constructor. Uses Compiler, so must come after Compiler definition.
+inline LclVarDsc::LclVarDsc(Compiler* comp)
+ : // Initialize the ArgRegs to REG_STK.
+ // The morph will do the right thing to change
+ // to the right register if passed in register.
+ _lvArgReg(REG_STK)
+ ,
+#if FEATURE_MULTIREG_ARGS
+ _lvOtherArgReg(REG_STK)
+ ,
+#endif // FEATURE_MULTIREG_ARGS
+#if ASSERTION_PROP
+ lvRefBlks(BlockSetOps::UninitVal())
+ ,
+#endif // ASSERTION_PROP
+ lvPerSsaData(comp->getAllocator())
+{
+}
+
+/*
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX Miscellaneous Compiler stuff XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+// Values used to mark the types a stack slot is used for
+
+const unsigned TYPE_REF_INT = 0x01; // slot used as a 32-bit int
+const unsigned TYPE_REF_LNG = 0x02; // slot used as a 64-bit long
+const unsigned TYPE_REF_FLT = 0x04; // slot used as a 32-bit float
+const unsigned TYPE_REF_DBL = 0x08; // slot used as a 64-bit float
+const unsigned TYPE_REF_PTR = 0x10; // slot used as a 32-bit pointer
+const unsigned TYPE_REF_BYR = 0x20; // slot used as a byref pointer
+const unsigned TYPE_REF_STC = 0x40; // slot used as a struct
+const unsigned TYPE_REF_TYPEMASK = 0x7F; // bits that represent the type
+
+// const unsigned TYPE_REF_ADDR_TAKEN = 0x80; // slots address was taken
+
+/*****************************************************************************
+ *
+ * Variables to keep track of total code amounts.
+ */
+
+#if DISPLAY_SIZES
+
+extern size_t grossVMsize;
+extern size_t grossNCsize;
+extern size_t totalNCsize;
+
+extern unsigned genMethodICnt;
+extern unsigned genMethodNCnt;
+extern size_t gcHeaderISize;
+extern size_t gcPtrMapISize;
+extern size_t gcHeaderNSize;
+extern size_t gcPtrMapNSize;
+
+#endif // DISPLAY_SIZES
+
+/*****************************************************************************
+ *
+ * Variables to keep track of basic block counts (more data on 1 BB methods)
+ */
+
+#if COUNT_BASIC_BLOCKS
+extern Histogram bbCntTable;
+extern Histogram bbOneBBSizeTable;
+#endif
+
+/*****************************************************************************
+ *
+ * Used by optFindNaturalLoops to gather statistical information such as
+ * - total number of natural loops
+ * - number of loops with 1, 2, ... exit conditions
+ * - number of loops that have an iterator (for like)
+ * - number of loops that have a constant iterator
+ */
+
+#if COUNT_LOOPS
+
+extern unsigned totalLoopMethods; // counts the total number of methods that have natural loops
+extern unsigned maxLoopsPerMethod; // counts the maximum number of loops a method has
+extern unsigned totalLoopOverflows; // # of methods that identified more loops than we can represent
+extern unsigned totalLoopCount; // counts the total number of natural loops
+extern unsigned totalUnnatLoopCount; // counts the total number of (not-necessarily natural) loops
+extern unsigned totalUnnatLoopOverflows; // # of methods that identified more unnatural loops than we can represent
+extern unsigned iterLoopCount; // counts the # of loops with an iterator (for like)
+extern unsigned simpleTestLoopCount; // counts the # of loops with an iterator and a simple loop condition (iter <
+ // const)
+extern unsigned constIterLoopCount; // counts the # of loops with a constant iterator (for like)
+extern bool hasMethodLoops; // flag to keep track if we already counted a method as having loops
+extern unsigned loopsThisMethod; // counts the number of loops in the current method
+extern bool loopOverflowThisMethod; // True if we exceeded the max # of loops in the method.
+extern Histogram loopCountTable; // Histogram of loop counts
+extern Histogram loopExitCountTable; // Histogram of loop exit counts
+
+#endif // COUNT_LOOPS
+
+/*****************************************************************************
+ * variables to keep track of how many iterations we go in a dataflow pass
+ */
+
+#if DATAFLOW_ITER
+
+extern unsigned CSEiterCount; // counts the # of iteration for the CSE dataflow
+extern unsigned CFiterCount; // counts the # of iteration for the Const Folding dataflow
+
+#endif // DATAFLOW_ITER
+
+#if MEASURE_BLOCK_SIZE
+extern size_t genFlowNodeSize;
+extern size_t genFlowNodeCnt;
+#endif // MEASURE_BLOCK_SIZE
+
+#if MEASURE_NODE_SIZE
+struct NodeSizeStats
+{
+ void Init()
+ {
+ genTreeNodeCnt = 0;
+ genTreeNodeSize = 0;
+ genTreeNodeActualSize = 0;
+ }
+
+ size_t genTreeNodeCnt;
+ size_t genTreeNodeSize; // The size we allocate
+ size_t genTreeNodeActualSize; // The actual size of the node. Note that the actual size will likely be smaller
+ // than the allocated size, but we sometimes use SetOper()/ChangeOper() to change
+ // a smaller node to a larger one. TODO-Cleanup: add stats on
+ // SetOper()/ChangeOper() usage to quanitfy this.
+};
+extern NodeSizeStats genNodeSizeStats; // Total node size stats
+extern NodeSizeStats genNodeSizeStatsPerFunc; // Per-function node size stats
+extern Histogram genTreeNcntHist;
+extern Histogram genTreeNsizHist;
+#endif // MEASURE_NODE_SIZE
+
+/*****************************************************************************
+ * Count fatal errors (including noway_asserts).
+ */
+
+#if MEASURE_FATAL
+extern unsigned fatal_badCode;
+extern unsigned fatal_noWay;
+extern unsigned fatal_NOMEM;
+extern unsigned fatal_noWayAssertBody;
+#ifdef DEBUG
+extern unsigned fatal_noWayAssertBodyArgs;
+#endif // DEBUG
+extern unsigned fatal_NYI;
+#endif // MEASURE_FATAL
+
+/*****************************************************************************
+ * Codegen
+ */
+
+#ifdef _TARGET_XARCH_
+
+const instruction INS_SHIFT_LEFT_LOGICAL = INS_shl;
+const instruction INS_SHIFT_RIGHT_LOGICAL = INS_shr;
+const instruction INS_SHIFT_RIGHT_ARITHM = INS_sar;
+
+const instruction INS_AND = INS_and;
+const instruction INS_OR = INS_or;
+const instruction INS_XOR = INS_xor;
+const instruction INS_NEG = INS_neg;
+const instruction INS_TEST = INS_test;
+const instruction INS_MUL = INS_imul;
+const instruction INS_SIGNED_DIVIDE = INS_idiv;
+const instruction INS_UNSIGNED_DIVIDE = INS_div;
+const instruction INS_BREAKPOINT = INS_int3;
+const instruction INS_ADDC = INS_adc;
+const instruction INS_SUBC = INS_sbb;
+const instruction INS_NOT = INS_not;
+
+#endif
+
+#ifdef _TARGET_ARM_
+
+const instruction INS_SHIFT_LEFT_LOGICAL = INS_lsl;
+const instruction INS_SHIFT_RIGHT_LOGICAL = INS_lsr;
+const instruction INS_SHIFT_RIGHT_ARITHM = INS_asr;
+
+const instruction INS_AND = INS_and;
+const instruction INS_OR = INS_orr;
+const instruction INS_XOR = INS_eor;
+const instruction INS_NEG = INS_rsb;
+const instruction INS_TEST = INS_tst;
+const instruction INS_MUL = INS_mul;
+const instruction INS_SIGNED_DIVIDE = INS_sdiv;
+const instruction INS_UNSIGNED_DIVIDE = INS_udiv;
+const instruction INS_BREAKPOINT = INS_bkpt;
+const instruction INS_ADDC = INS_adc;
+const instruction INS_SUBC = INS_sbc;
+const instruction INS_NOT = INS_mvn;
+
+#endif
+
+#ifdef _TARGET_ARM64_
+
+const instruction INS_SHIFT_LEFT_LOGICAL = INS_lsl;
+const instruction INS_SHIFT_RIGHT_LOGICAL = INS_lsr;
+const instruction INS_SHIFT_RIGHT_ARITHM = INS_asr;
+
+const instruction INS_AND = INS_and;
+const instruction INS_OR = INS_orr;
+const instruction INS_XOR = INS_eor;
+const instruction INS_NEG = INS_neg;
+const instruction INS_TEST = INS_tst;
+const instruction INS_MUL = INS_mul;
+const instruction INS_SIGNED_DIVIDE = INS_sdiv;
+const instruction INS_UNSIGNED_DIVIDE = INS_udiv;
+const instruction INS_BREAKPOINT = INS_bkpt;
+const instruction INS_ADDC = INS_adc;
+const instruction INS_SUBC = INS_sbc;
+const instruction INS_NOT = INS_mvn;
+
+#endif
+
+/*****************************************************************************/
+
+extern const BYTE genTypeSizes[];
+extern const BYTE genTypeAlignments[];
+extern const BYTE genTypeStSzs[];
+extern const BYTE genActualTypes[];
+
+/*****************************************************************************/
+
+// VERY_LARGE_FRAME_SIZE_REG_MASK is the set of registers we need to use for
+// the probing loop generated for very large stack frames (see `getVeryLargeFrameSize`).
+
+#ifdef _TARGET_ARM_
+#define VERY_LARGE_FRAME_SIZE_REG_MASK (RBM_R4 | RBM_R5 | RBM_R6)
+#elif defined(_TARGET_ARM64_)
+#define VERY_LARGE_FRAME_SIZE_REG_MASK (RBM_R9 | RBM_R10 | RBM_R11)
+#endif
+
+/*****************************************************************************/
+
+#define REG_CORRUPT regNumber(REG_NA + 1)
+#define RBM_CORRUPT (RBM_ILLEGAL | regMaskTP(1))
+#define REG_PAIR_CORRUPT regPairNo(REG_PAIR_NONE + 1)
+
+/*****************************************************************************/
+
+extern BasicBlock dummyBB;
+
+/*****************************************************************************/
+/*****************************************************************************/
+
+// foreach_treenode_execution_order: An iterator that iterates through all the tree
+// nodes of a statement in execution order.
+// __stmt: a GT_STMT type GenTree*
+// __node: a GenTree*, already declared, that gets updated with each node in the statement, in execution order
+
+#define foreach_treenode_execution_order(__node, __stmt) \
+ for ((__node) = (__stmt)->gtStmt.gtStmtList; (__node); (__node) = (__node)->gtNext)
+
+// foreach_block: An iterator over all blocks in the function.
+// __compiler: the Compiler* object
+// __block : a BasicBlock*, already declared, that gets updated each iteration.
+
+#define foreach_block(__compiler, __block) \
+ for ((__block) = (__compiler)->fgFirstBB; (__block); (__block) = (__block)->bbNext)
+
+/*****************************************************************************/
+/*****************************************************************************/
+
+#ifdef DEBUG
+
+void dumpConvertedVarSet(Compiler* comp, VARSET_VALARG_TP vars);
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX Debugging helpers XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+/*****************************************************************************/
+/* The following functions are intended to be called from the debugger, to dump
+ * various data structures. The can be used in the debugger Watch or Quick Watch
+ * windows. They are designed to be short to type and take as few arguments as
+ * possible. The 'c' versions take a Compiler*, whereas the 'd' versions use the TlsCompiler.
+ * See the function definition comment for more details.
+ */
+
+void cBlock(Compiler* comp, BasicBlock* block);
+void cBlocks(Compiler* comp);
+void cBlocksV(Compiler* comp);
+void cTree(Compiler* comp, GenTree* tree);
+void cTrees(Compiler* comp);
+void cEH(Compiler* comp);
+void cVar(Compiler* comp, unsigned lclNum);
+void cVarDsc(Compiler* comp, LclVarDsc* varDsc);
+void cVars(Compiler* comp);
+void cVarsFinal(Compiler* comp);
+void cBlockPreds(Compiler* comp, BasicBlock* block);
+void cReach(Compiler* comp);
+void cDoms(Compiler* comp);
+void cLiveness(Compiler* comp);
+void cCVarSet(Compiler* comp, VARSET_VALARG_TP vars);
+
+void cFuncIR(Compiler* comp);
+void cBlockIR(Compiler* comp, BasicBlock* block);
+void cLoopIR(Compiler* comp, Compiler::LoopDsc* loop);
+void cTreeIR(Compiler* comp, GenTree* tree);
+int cTreeTypeIR(Compiler* comp, GenTree* tree);
+int cTreeKindsIR(Compiler* comp, GenTree* tree);
+int cTreeFlagsIR(Compiler* comp, GenTree* tree);
+int cOperandIR(Compiler* comp, GenTree* operand);
+int cLeafIR(Compiler* comp, GenTree* tree);
+int cIndirIR(Compiler* comp, GenTree* tree);
+int cListIR(Compiler* comp, GenTree* list);
+int cSsaNumIR(Compiler* comp, GenTree* tree);
+int cValNumIR(Compiler* comp, GenTree* tree);
+int cDependsIR(Compiler* comp, GenTree* comma, bool* first);
+
+void dBlock(BasicBlock* block);
+void dBlocks();
+void dBlocksV();
+void dTree(GenTree* tree);
+void dTrees();
+void dEH();
+void dVar(unsigned lclNum);
+void dVarDsc(LclVarDsc* varDsc);
+void dVars();
+void dVarsFinal();
+void dBlockPreds(BasicBlock* block);
+void dReach();
+void dDoms();
+void dLiveness();
+void dCVarSet(VARSET_VALARG_TP vars);
+
+void dVarSet(VARSET_VALARG_TP vars);
+void dRegMask(regMaskTP mask);
+
+void dFuncIR();
+void dBlockIR(BasicBlock* block);
+void dTreeIR(GenTree* tree);
+void dLoopIR(Compiler::LoopDsc* loop);
+void dLoopNumIR(unsigned loopNum);
+int dTabStopIR(int curr, int tabstop);
+int dTreeTypeIR(GenTree* tree);
+int dTreeKindsIR(GenTree* tree);
+int dTreeFlagsIR(GenTree* tree);
+int dOperandIR(GenTree* operand);
+int dLeafIR(GenTree* tree);
+int dIndirIR(GenTree* tree);
+int dListIR(GenTree* list);
+int dSsaNumIR(GenTree* tree);
+int dValNumIR(GenTree* tree);
+int dDependsIR(GenTree* comma);
+void dFormatIR();
+
+GenTree* dFindTree(GenTree* tree, unsigned id);
+GenTree* dFindTree(unsigned id);
+GenTreeStmt* dFindStmt(unsigned id);
+BasicBlock* dFindBlock(unsigned bbNum);
+
+#endif // DEBUG
+
+#include "compiler.hpp" // All the shared inline functions
+
+/*****************************************************************************/
+#endif //_COMPILER_H_
+/*****************************************************************************/
diff --git a/src/jit/compiler.hpp b/src/jit/compiler.hpp
new file mode 100644
index 0000000000..eb8eb19c68
--- /dev/null
+++ b/src/jit/compiler.hpp
@@ -0,0 +1,4742 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX Inline functions XX
+XX XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#ifndef _COMPILER_HPP_
+#define _COMPILER_HPP_
+
+#include "emit.h" // for emitter::emitAddLabel
+
+#include "bitvec.h"
+
+#include "compilerbitsettraits.hpp"
+
+/*
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX Miscellaneous utility functions. Some of these are defined in Utils.cpp XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+/*****************************************************************************/
+/*****************************************************************************/
+
+inline bool getInlinePInvokeEnabled()
+{
+#ifdef DEBUG
+ return JitConfig.JitPInvokeEnabled() && !JitConfig.StressCOMCall();
+#else
+ return true;
+#endif
+}
+
+inline bool getInlinePInvokeCheckEnabled()
+{
+#ifdef DEBUG
+ return JitConfig.JitPInvokeCheckEnabled() != 0;
+#else
+ return false;
+#endif
+}
+
+// Enforce float narrowing for buggy compilers (notably preWhidbey VC)
+inline float forceCastToFloat(double d)
+{
+ Volatile<float> f = (float)d;
+ return f;
+}
+
+// Enforce UInt32 narrowing for buggy compilers (notably Whidbey Beta 2 LKG)
+inline UINT32 forceCastToUInt32(double d)
+{
+ Volatile<UINT32> u = (UINT32)d;
+ return u;
+}
+
+enum RoundLevel
+{
+ ROUND_NEVER = 0, // Never round
+ ROUND_CMP_CONST = 1, // Round values compared against constants
+ ROUND_CMP = 2, // Round comparands and return values
+ ROUND_ALWAYS = 3, // Round always
+
+ COUNT_ROUND_LEVEL,
+ DEFAULT_ROUND_LEVEL = ROUND_NEVER
+};
+
+inline RoundLevel getRoundFloatLevel()
+{
+#ifdef DEBUG
+ return (RoundLevel)JitConfig.JitRoundFloat();
+#else
+ return DEFAULT_ROUND_LEVEL;
+#endif
+}
+
+/*****************************************************************************/
+/*****************************************************************************
+ *
+ * Return the lowest bit that is set
+ */
+
+template <typename T>
+inline T genFindLowestBit(T value)
+{
+ return (value & (0 - value));
+}
+
+/*****************************************************************************/
+/*****************************************************************************
+ *
+ * Return the highest bit that is set (that is, a mask that includes just the highest bit).
+ * TODO-ARM64-Throughput: we should convert these to use the _BitScanReverse() / _BitScanReverse64()
+ * compiler intrinsics, but our CRT header file intrin.h doesn't define these for ARM64 yet.
+ */
+
+inline unsigned int genFindHighestBit(unsigned int mask)
+{
+ assert(mask != 0);
+ unsigned int bit = 1U << ((sizeof(unsigned int) * 8) - 1); // start looking at the top
+ while ((bit & mask) == 0)
+ {
+ bit >>= 1;
+ }
+ return bit;
+}
+
+inline unsigned __int64 genFindHighestBit(unsigned __int64 mask)
+{
+ assert(mask != 0);
+ unsigned __int64 bit = 1ULL << ((sizeof(unsigned __int64) * 8) - 1); // start looking at the top
+ while ((bit & mask) == 0)
+ {
+ bit >>= 1;
+ }
+ return bit;
+}
+
+#if 0
+// TODO-ARM64-Cleanup: These should probably be the implementation, when intrin.h is updated for ARM64
+inline
+unsigned int genFindHighestBit(unsigned int mask)
+{
+ assert(mask != 0);
+ unsigned int index;
+ _BitScanReverse(&index, mask);
+ return 1L << index;
+}
+
+inline
+unsigned __int64 genFindHighestBit(unsigned __int64 mask)
+{
+ assert(mask != 0);
+ unsigned int index;
+ _BitScanReverse64(&index, mask);
+ return 1LL << index;
+}
+#endif // 0
+
+/*****************************************************************************
+ *
+ * Return true if the given 64-bit value has exactly zero or one bits set.
+ */
+
+template <typename T>
+inline BOOL genMaxOneBit(T value)
+{
+ return (value & (value - 1)) == 0;
+}
+
+/*****************************************************************************
+ *
+ * Return true if the given 32-bit value has exactly zero or one bits set.
+ */
+
+inline BOOL genMaxOneBit(unsigned value)
+{
+ return (value & (value - 1)) == 0;
+}
+
+/*****************************************************************************
+ *
+ * Given a value that has exactly one bit set, return the position of that
+ * bit, in other words return the logarithm in base 2 of the given value.
+ */
+
+inline unsigned genLog2(unsigned value)
+{
+ return BitPosition(value);
+}
+
+/*****************************************************************************
+ *
+ * Given a value that has exactly one bit set, return the position of that
+ * bit, in other words return the logarithm in base 2 of the given value.
+ */
+
+inline unsigned genLog2(unsigned __int64 value)
+{
+ unsigned lo32 = (unsigned)value;
+ unsigned hi32 = (unsigned)(value >> 32);
+
+ if (lo32 != 0)
+ {
+ assert(hi32 == 0);
+ return genLog2(lo32);
+ }
+ else
+ {
+ return genLog2(hi32) + 32;
+ }
+}
+
+/*****************************************************************************
+ *
+ * Return the lowest bit that is set in the given register mask.
+ */
+
+inline regMaskTP genFindLowestReg(regMaskTP value)
+{
+ return (regMaskTP)genFindLowestBit(value);
+}
+
+/*****************************************************************************
+ *
+ * A rather simple routine that counts the number of bits in a given number.
+ */
+
+template <typename T>
+inline unsigned genCountBits(T bits)
+{
+ unsigned cnt = 0;
+
+ while (bits)
+ {
+ cnt++;
+ bits -= genFindLowestBit(bits);
+ }
+
+ return cnt;
+}
+
+/*****************************************************************************
+ *
+ * Given 3 masks value, end, start, returns the bits of value between start
+ * and end (exclusive).
+ *
+ * value[bitNum(end) - 1, bitNum(start) + 1]
+ */
+
+inline unsigned __int64 BitsBetween(unsigned __int64 value, unsigned __int64 end, unsigned __int64 start)
+{
+ assert(start != 0);
+ assert(start < end);
+ assert((start & (start - 1)) == 0);
+ assert((end & (end - 1)) == 0);
+
+ return value & ~((start - 1) | start) & // Ones to the left of set bit in the start mask.
+ (end - 1); // Ones to the right of set bit in the end mask.
+}
+
+/*****************************************************************************/
+
+inline bool jitIsScaleIndexMul(size_t val)
+{
+ switch (val)
+ {
+ case 1:
+ case 2:
+ case 4:
+ case 8:
+ return true;
+
+ default:
+ return false;
+ }
+}
+
+// Returns "tree" iff "val" is a valid addressing mode scale shift amount on
+// the target architecture.
+inline bool jitIsScaleIndexShift(ssize_t val)
+{
+ // It happens that this is the right test for all our current targets: x86, x64 and ARM.
+ // This test would become target-dependent if we added a new target with a different constraint.
+ return 0 < val && val < 4;
+}
+
+/*****************************************************************************
+ * Returns true if value is between [start..end).
+ * The comparison is inclusive of start, exclusive of end.
+ */
+
+/* static */
+inline bool Compiler::jitIsBetween(unsigned value, unsigned start, unsigned end)
+{
+ return start <= value && value < end;
+}
+
+/*****************************************************************************
+ * Returns true if value is between [start..end].
+ * The comparison is inclusive of both start and end.
+ */
+
+/* static */
+inline bool Compiler::jitIsBetweenInclusive(unsigned value, unsigned start, unsigned end)
+{
+ return start <= value && value <= end;
+}
+
+/******************************************************************************************
+ * Return the EH descriptor for the given region index.
+ */
+inline EHblkDsc* Compiler::ehGetDsc(unsigned regionIndex)
+{
+ assert(regionIndex < compHndBBtabCount);
+ return &compHndBBtab[regionIndex];
+}
+
+/******************************************************************************************
+ * Return the EH descriptor index of the enclosing try, for the given region index.
+ */
+inline unsigned Compiler::ehGetEnclosingTryIndex(unsigned regionIndex)
+{
+ return ehGetDsc(regionIndex)->ebdEnclosingTryIndex;
+}
+
+/******************************************************************************************
+ * Return the EH descriptor index of the enclosing handler, for the given region index.
+ */
+inline unsigned Compiler::ehGetEnclosingHndIndex(unsigned regionIndex)
+{
+ return ehGetDsc(regionIndex)->ebdEnclosingHndIndex;
+}
+
+/******************************************************************************************
+ * Return the EH index given a region descriptor.
+ */
+inline unsigned Compiler::ehGetIndex(EHblkDsc* ehDsc)
+{
+ assert(compHndBBtab <= ehDsc && ehDsc < compHndBBtab + compHndBBtabCount);
+ return (unsigned)(ehDsc - compHndBBtab);
+}
+
+/******************************************************************************************
+ * Return the EH descriptor for the most nested 'try' region this BasicBlock is a member of
+ * (or nullptr if this block is not in a 'try' region).
+ */
+inline EHblkDsc* Compiler::ehGetBlockTryDsc(BasicBlock* block)
+{
+ if (!block->hasTryIndex())
+ {
+ return nullptr;
+ }
+
+ return ehGetDsc(block->getTryIndex());
+}
+
+/******************************************************************************************
+ * Return the EH descriptor for the most nested filter or handler region this BasicBlock is a member of
+ * (or nullptr if this block is not in a filter or handler region).
+ */
+inline EHblkDsc* Compiler::ehGetBlockHndDsc(BasicBlock* block)
+{
+ if (!block->hasHndIndex())
+ {
+ return nullptr;
+ }
+
+ return ehGetDsc(block->getHndIndex());
+}
+
+#if FEATURE_EH_FUNCLETS
+
+/*****************************************************************************
+ * Get the FuncInfoDsc for the funclet we are currently generating code for.
+ * This is only valid during codegen.
+ *
+ */
+inline FuncInfoDsc* Compiler::funCurrentFunc()
+{
+ return funGetFunc(compCurrFuncIdx);
+}
+
+/*****************************************************************************
+ * Change which funclet we are currently generating code for.
+ * This is only valid after funclets are created.
+ *
+ */
+inline void Compiler::funSetCurrentFunc(unsigned funcIdx)
+{
+ assert(fgFuncletsCreated);
+ assert(FitsIn<unsigned short>(funcIdx));
+ noway_assert(funcIdx < compFuncInfoCount);
+ compCurrFuncIdx = (unsigned short)funcIdx;
+}
+
+/*****************************************************************************
+ * Get the FuncInfoDsc for the given funclet.
+ * This is only valid after funclets are created.
+ *
+ */
+inline FuncInfoDsc* Compiler::funGetFunc(unsigned funcIdx)
+{
+ assert(fgFuncletsCreated);
+ assert(funcIdx < compFuncInfoCount);
+ return &compFuncInfos[funcIdx];
+}
+
+/*****************************************************************************
+ * Get the funcIdx for the EH funclet that begins with block.
+ * This is only valid after funclets are created.
+ * It is only valid for blocks marked with BBF_FUNCLET_BEG because
+ * otherwise we would have to do a more expensive check to determine
+ * if this should return the filter funclet or the filter handler funclet.
+ *
+ */
+inline unsigned Compiler::funGetFuncIdx(BasicBlock* block)
+{
+ assert(fgFuncletsCreated);
+ assert(block->bbFlags & BBF_FUNCLET_BEG);
+
+ EHblkDsc* eh = ehGetDsc(block->getHndIndex());
+ unsigned int funcIdx = eh->ebdFuncIndex;
+ if (eh->ebdHndBeg != block)
+ {
+ // If this is a filter EH clause, but we want the funclet
+ // for the filter (not the filter handler), it is the previous one
+ noway_assert(eh->HasFilter());
+ noway_assert(eh->ebdFilter == block);
+ assert(funGetFunc(funcIdx)->funKind == FUNC_HANDLER);
+ assert(funGetFunc(funcIdx)->funEHIndex == funGetFunc(funcIdx - 1)->funEHIndex);
+ assert(funGetFunc(funcIdx - 1)->funKind == FUNC_FILTER);
+ funcIdx--;
+ }
+
+ return funcIdx;
+}
+
+#else // !FEATURE_EH_FUNCLETS
+
+/*****************************************************************************
+ * Get the FuncInfoDsc for the funclet we are currently generating code for.
+ * This is only valid during codegen. For non-funclet platforms, this is
+ * always the root function.
+ *
+ */
+inline FuncInfoDsc* Compiler::funCurrentFunc()
+{
+ return &compFuncInfoRoot;
+}
+
+/*****************************************************************************
+ * Change which funclet we are currently generating code for.
+ * This is only valid after funclets are created.
+ *
+ */
+inline void Compiler::funSetCurrentFunc(unsigned funcIdx)
+{
+ assert(funcIdx == 0);
+}
+
+/*****************************************************************************
+ * Get the FuncInfoDsc for the givven funclet.
+ * This is only valid after funclets are created.
+ *
+ */
+inline FuncInfoDsc* Compiler::funGetFunc(unsigned funcIdx)
+{
+ assert(funcIdx == 0);
+ return &compFuncInfoRoot;
+}
+
+/*****************************************************************************
+ * No funclets, so always 0.
+ *
+ */
+inline unsigned Compiler::funGetFuncIdx(BasicBlock* block)
+{
+ return 0;
+}
+
+#endif // !FEATURE_EH_FUNCLETS
+
+/*****************************************************************************
+ *
+ * Map a register mask to a register number
+ */
+
+inline regNumber genRegNumFromMask(regMaskTP mask)
+{
+ assert(mask != 0); // Must have one bit set, so can't have a mask of zero
+
+ /* Convert the mask to a register number */
+
+ regNumber regNum = (regNumber)genLog2(mask);
+
+ /* Make sure we got it right */
+
+ assert(genRegMask(regNum) == mask);
+
+ return regNum;
+}
+
+/*****************************************************************************
+ *
+ * Return the size in bytes of the given type.
+ */
+
+extern const BYTE genTypeSizes[TYP_COUNT];
+
+template <class T>
+inline unsigned genTypeSize(T type)
+{
+ assert((unsigned)TypeGet(type) < sizeof(genTypeSizes) / sizeof(genTypeSizes[0]));
+
+ return genTypeSizes[TypeGet(type)];
+}
+
+/*****************************************************************************
+ *
+ * Return the "stack slot count" of the given type.
+ * returns 1 for 32-bit types and 2 for 64-bit types.
+ */
+
+extern const BYTE genTypeStSzs[TYP_COUNT];
+
+inline unsigned genTypeStSz(var_types type)
+{
+ assert((unsigned)type < sizeof(genTypeStSzs) / sizeof(genTypeStSzs[0]));
+
+ return genTypeStSzs[type];
+}
+
+/*****************************************************************************
+ *
+ * Return the number of registers required to hold a value of the given type.
+ */
+
+/*****************************************************************************
+ *
+ * The following function maps a 'precise' type to an actual type as seen
+ * by the VM (for example, 'byte' maps to 'int').
+ */
+
+extern const BYTE genActualTypes[TYP_COUNT];
+
+inline var_types genActualType(var_types type)
+{
+ /* Spot check to make certain the table is in synch with the enum */
+
+ assert(genActualTypes[TYP_DOUBLE] == TYP_DOUBLE);
+ assert(genActualTypes[TYP_FNC] == TYP_FNC);
+ assert(genActualTypes[TYP_REF] == TYP_REF);
+
+ assert((unsigned)type < sizeof(genActualTypes));
+ return (var_types)genActualTypes[type];
+}
+
+/*****************************************************************************/
+
+inline var_types genUnsignedType(var_types type)
+{
+ /* Force signed types into corresponding unsigned type */
+
+ switch (type)
+ {
+ case TYP_BYTE:
+ type = TYP_UBYTE;
+ break;
+ case TYP_SHORT:
+ type = TYP_CHAR;
+ break;
+ case TYP_INT:
+ type = TYP_UINT;
+ break;
+ case TYP_LONG:
+ type = TYP_ULONG;
+ break;
+ default:
+ break;
+ }
+
+ return type;
+}
+
+/*****************************************************************************/
+
+inline var_types genSignedType(var_types type)
+{
+ /* Force non-small unsigned type into corresponding signed type */
+ /* Note that we leave the small types alone */
+
+ switch (type)
+ {
+ case TYP_UINT:
+ type = TYP_INT;
+ break;
+ case TYP_ULONG:
+ type = TYP_LONG;
+ break;
+ default:
+ break;
+ }
+
+ return type;
+}
+
+/*****************************************************************************
+ * Can this type be passed as a parameter in a register?
+ */
+
+inline bool isRegParamType(var_types type)
+{
+#if defined(_TARGET_X86_)
+ return (type <= TYP_INT || type == TYP_REF || type == TYP_BYREF);
+#else // !_TARGET_X86_
+ return true;
+#endif // !_TARGET_X86_
+}
+
+#if defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_)
+/*****************************************************************************/
+// Returns true if 'type' is a struct that can be enregistered for call args
+// or can be returned by value in multiple registers.
+// if 'type' is not a struct the return value will be false.
+//
+// Arguments:
+// type - the basic jit var_type for the item being queried
+// typeClass - the handle for the struct when 'type' is TYP_STRUCT
+// typeSize - Out param (if non-null) is updated with the size of 'type'.
+// forReturn - this is true when we asking about a GT_RETURN context;
+// this is false when we are asking about an argument context
+//
+inline bool Compiler::VarTypeIsMultiByteAndCanEnreg(var_types type,
+ CORINFO_CLASS_HANDLE typeClass,
+ unsigned* typeSize,
+ bool forReturn)
+{
+ bool result = false;
+ unsigned size = 0;
+
+ if (varTypeIsStruct(type))
+ {
+ size = info.compCompHnd->getClassSize(typeClass);
+ if (forReturn)
+ {
+ structPassingKind howToReturnStruct;
+ type = getReturnTypeForStruct(typeClass, &howToReturnStruct, size);
+ }
+ else
+ {
+ structPassingKind howToPassStruct;
+ type = getArgTypeForStruct(typeClass, &howToPassStruct, size);
+ }
+ if (type != TYP_UNKNOWN)
+ {
+ result = true;
+ }
+ }
+ else
+ {
+ size = genTypeSize(type);
+ }
+
+ if (typeSize != nullptr)
+ {
+ *typeSize = size;
+ }
+
+ return result;
+}
+#endif //_TARGET_AMD64_ || _TARGET_ARM64_
+
+/*****************************************************************************/
+
+#ifdef DEBUG
+
+inline const char* varTypeGCstring(var_types type)
+{
+ switch (type)
+ {
+ case TYP_REF:
+ return "gcr";
+ case TYP_BYREF:
+ return "byr";
+ default:
+ return "non";
+ }
+}
+
+#endif
+
+/*****************************************************************************/
+
+const char* varTypeName(var_types);
+
+/*****************************************************************************
+ *
+ * Helpers to pull big-endian values out of a byte stream.
+ */
+
+inline unsigned genGetU1(const BYTE* addr)
+{
+ return addr[0];
+}
+
+inline signed genGetI1(const BYTE* addr)
+{
+ return (signed char)addr[0];
+}
+
+inline unsigned genGetU2(const BYTE* addr)
+{
+ return (addr[0] << 8) | addr[1];
+}
+
+inline signed genGetI2(const BYTE* addr)
+{
+ return (signed short)((addr[0] << 8) | addr[1]);
+}
+
+inline unsigned genGetU4(const BYTE* addr)
+{
+ return (addr[0] << 24) | (addr[1] << 16) | (addr[2] << 8) | addr[3];
+}
+
+/*****************************************************************************/
+// Helpers to pull little-endian values out of a byte stream.
+
+inline unsigned __int8 getU1LittleEndian(const BYTE* ptr)
+{
+ return *(UNALIGNED unsigned __int8*)ptr;
+}
+
+inline unsigned __int16 getU2LittleEndian(const BYTE* ptr)
+{
+ return GET_UNALIGNED_VAL16(ptr);
+}
+
+inline unsigned __int32 getU4LittleEndian(const BYTE* ptr)
+{
+ return GET_UNALIGNED_VAL32(ptr);
+}
+
+inline signed __int8 getI1LittleEndian(const BYTE* ptr)
+{
+ return *(UNALIGNED signed __int8*)ptr;
+}
+
+inline signed __int16 getI2LittleEndian(const BYTE* ptr)
+{
+ return GET_UNALIGNED_VAL16(ptr);
+}
+
+inline signed __int32 getI4LittleEndian(const BYTE* ptr)
+{
+ return GET_UNALIGNED_VAL32(ptr);
+}
+
+inline signed __int64 getI8LittleEndian(const BYTE* ptr)
+{
+ return GET_UNALIGNED_VAL64(ptr);
+}
+
+inline float getR4LittleEndian(const BYTE* ptr)
+{
+ __int32 val = getI4LittleEndian(ptr);
+ return *(float*)&val;
+}
+
+inline double getR8LittleEndian(const BYTE* ptr)
+{
+ __int64 val = getI8LittleEndian(ptr);
+ return *(double*)&val;
+}
+
+/*****************************************************************************
+ *
+ * Return the bitmask to use in the EXPSET_TP for the CSE with the given CSE index.
+ * Each GenTree has the following field:
+ * signed char gtCSEnum; // 0 or the CSE index (negated if def)
+ * So zero is reserved to mean this node is not a CSE
+ * and postive values indicate CSE uses and negative values indicate CSE defs.
+ * The caller of this method must pass a non-zero postive value.
+ * This precondition is checked by the assert on the first line of this method.
+ */
+
+inline EXPSET_TP genCSEnum2bit(unsigned index)
+{
+ assert((index > 0) && (index <= EXPSET_SZ));
+
+ return ((EXPSET_TP)1 << (index - 1));
+}
+
+#ifdef DEBUG
+const char* genES2str(EXPSET_TP set);
+const char* refCntWtd2str(unsigned refCntWtd);
+#endif
+
+/*
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX GenTree XX
+XX Inline functions XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+void* GenTree::operator new(size_t sz, Compiler* comp, genTreeOps oper)
+{
+#if SMALL_TREE_NODES
+ size_t size = GenTree::s_gtNodeSizes[oper];
+#else
+ size_t size = TREE_NODE_SZ_LARGE;
+#endif
+
+#if MEASURE_NODE_SIZE
+ genNodeSizeStats.genTreeNodeCnt += 1;
+ genNodeSizeStats.genTreeNodeSize += size;
+ genNodeSizeStats.genTreeNodeActualSize += sz;
+
+ genNodeSizeStatsPerFunc.genTreeNodeCnt += 1;
+ genNodeSizeStatsPerFunc.genTreeNodeSize += size;
+ genNodeSizeStatsPerFunc.genTreeNodeActualSize += sz;
+#endif // MEASURE_NODE_SIZE
+
+ assert(size >= sz);
+ return comp->compGetMem(size, CMK_ASTNode);
+}
+
+// GenTree constructor
+inline GenTree::GenTree(genTreeOps oper, var_types type DEBUGARG(bool largeNode))
+{
+ gtOper = oper;
+ gtType = type;
+ gtFlags = 0;
+ gtLIRFlags = 0;
+#ifdef DEBUG
+ gtDebugFlags = 0;
+#endif // DEBUG
+#ifdef LEGACY_BACKEND
+ gtUsedRegs = 0;
+#endif // LEGACY_BACKEND
+#if FEATURE_ANYCSE
+ gtCSEnum = NO_CSE;
+#endif // FEATURE_ANYCSE
+#if ASSERTION_PROP
+ ClearAssertion();
+#endif
+
+#if FEATURE_STACK_FP_X87
+ gtFPlvl = 0;
+#endif
+
+ gtNext = nullptr;
+ gtPrev = nullptr;
+ gtRegNum = REG_NA;
+ INDEBUG(gtRegTag = GT_REGTAG_NONE;)
+
+ INDEBUG(gtCostsInitialized = false;)
+
+#ifdef DEBUG
+#if SMALL_TREE_NODES
+ size_t size = GenTree::s_gtNodeSizes[oper];
+ if (size == TREE_NODE_SZ_SMALL && !largeNode)
+ {
+ gtDebugFlags |= GTF_DEBUG_NODE_SMALL;
+ }
+ else if (size == TREE_NODE_SZ_LARGE || largeNode)
+ {
+ gtDebugFlags |= GTF_DEBUG_NODE_LARGE;
+ }
+ else
+ {
+ assert(!"bogus node size");
+ }
+#endif
+#endif
+
+#ifdef DEBUG
+ gtSeqNum = 0;
+ gtTreeID = JitTls::GetCompiler()->compGenTreeID++;
+ gtVNPair.SetBoth(ValueNumStore::NoVN);
+ gtRegTag = GT_REGTAG_NONE;
+ gtOperSave = GT_NONE;
+#endif
+}
+
+/*****************************************************************************/
+
+inline GenTreeStmt* Compiler::gtNewStmt(GenTreePtr expr, IL_OFFSETX offset)
+{
+ /* NOTE - GT_STMT is now a small node in retail */
+
+ GenTreeStmt* stmt = new (this, GT_STMT) GenTreeStmt(expr, offset);
+
+ return stmt;
+}
+
+/*****************************************************************************/
+
+inline GenTreePtr Compiler::gtNewOperNode(genTreeOps oper, var_types type, GenTreePtr op1, bool doSimplifications)
+{
+ assert((GenTree::OperKind(oper) & (GTK_UNOP | GTK_BINOP)) != 0);
+ assert((GenTree::OperKind(oper) & GTK_EXOP) ==
+ 0); // Can't use this to construct any types that extend unary/binary operator.
+ assert(op1 != nullptr || oper == GT_PHI || oper == GT_RETFILT || oper == GT_NOP ||
+ (oper == GT_RETURN && type == TYP_VOID));
+
+ if (doSimplifications)
+ {
+ // We do some simplifications here.
+ // If this gets to be too many, try a switch...
+ // TODO-Cleanup: With the factoring out of array bounds checks, it should not be the
+ // case that we need to check for the array index case here, but without this check
+ // we get failures (see for example jit\Directed\Languages\Python\test_methods_d.exe)
+ if (oper == GT_IND)
+ {
+ // IND(ADDR(IND(x)) == IND(x)
+ if (op1->gtOper == GT_ADDR)
+ {
+ if (op1->gtOp.gtOp1->gtOper == GT_IND && (op1->gtOp.gtOp1->gtFlags & GTF_IND_ARR_INDEX) == 0)
+ {
+ op1 = op1->gtOp.gtOp1->gtOp.gtOp1;
+ }
+ }
+ }
+ else if (oper == GT_ADDR)
+ {
+ // if "x" is not an array index, ADDR(IND(x)) == x
+ if (op1->gtOper == GT_IND && (op1->gtFlags & GTF_IND_ARR_INDEX) == 0)
+ {
+ return op1->gtOp.gtOp1;
+ }
+ }
+ }
+
+ GenTreePtr node = new (this, oper) GenTreeOp(oper, type, op1, nullptr);
+
+ //
+ // the GT_ADDR of a Local Variable implies GTF_ADDR_ONSTACK
+ //
+ if ((oper == GT_ADDR) && (op1->OperGet() == GT_LCL_VAR))
+ {
+ node->gtFlags |= GTF_ADDR_ONSTACK;
+ }
+
+ return node;
+}
+
+// Returns an opcode that is of the largest node size in use.
+inline genTreeOps LargeOpOpcode()
+{
+#if SMALL_TREE_NODES
+ // Allocate a large node
+ assert(GenTree::s_gtNodeSizes[GT_CALL] == TREE_NODE_SZ_LARGE);
+#endif
+ return GT_CALL;
+}
+
+/******************************************************************************
+ *
+ * Use to create nodes which may later be morphed to another (big) operator
+ */
+
+inline GenTreePtr Compiler::gtNewLargeOperNode(genTreeOps oper, var_types type, GenTreePtr op1, GenTreePtr op2)
+{
+ assert((GenTree::OperKind(oper) & (GTK_UNOP | GTK_BINOP)) != 0);
+ assert((GenTree::OperKind(oper) & GTK_EXOP) ==
+ 0); // Can't use this to construct any types that extend unary/binary operator.
+#if SMALL_TREE_NODES
+ // Allocate a large node
+
+ assert(GenTree::s_gtNodeSizes[oper] == TREE_NODE_SZ_SMALL);
+
+ GenTreePtr node = new (this, LargeOpOpcode()) GenTreeOp(oper, type, op1, op2 DEBUGARG(/*largeNode*/ true));
+#else
+ GenTreePtr node = new (this, oper) GenTreeOp(oper, type, op1, op2);
+#endif
+
+ return node;
+}
+
+/*****************************************************************************
+ *
+ * allocates a integer constant entry that represents a handle (something
+ * that may need to be fixed up).
+ */
+
+inline GenTreePtr Compiler::gtNewIconHandleNode(
+ size_t value, unsigned flags, FieldSeqNode* fields, unsigned handle1, void* handle2)
+{
+ GenTreePtr node;
+ assert((flags & (GTF_ICON_HDL_MASK | GTF_ICON_FIELD_OFF)) != 0);
+
+ // Interpret "fields == NULL" as "not a field."
+ if (fields == nullptr)
+ {
+ fields = FieldSeqStore::NotAField();
+ }
+
+#if defined(LATE_DISASM)
+ node = new (this, LargeOpOpcode()) GenTreeIntCon(TYP_I_IMPL, value, fields DEBUGARG(/*largeNode*/ true));
+
+ node->gtIntCon.gtIconHdl.gtIconHdl1 = handle1;
+ node->gtIntCon.gtIconHdl.gtIconHdl2 = handle2;
+#else
+ node = new (this, GT_CNS_INT) GenTreeIntCon(TYP_I_IMPL, value, fields);
+#endif
+ node->gtFlags |= flags;
+ return node;
+}
+
+/*****************************************************************************
+ *
+ * It may not be allowed to embed HANDLEs directly into the JITed code (for eg,
+ * as arguments to JIT helpers). Get a corresponding value that can be embedded.
+ * These are versions for each specific type of HANDLE
+ */
+
+inline GenTreePtr Compiler::gtNewIconEmbScpHndNode(CORINFO_MODULE_HANDLE scpHnd, unsigned hnd1, void* hnd2)
+{
+ void *embedScpHnd, *pEmbedScpHnd;
+
+ embedScpHnd = (void*)info.compCompHnd->embedModuleHandle(scpHnd, &pEmbedScpHnd);
+
+ assert((!embedScpHnd) != (!pEmbedScpHnd));
+
+ return gtNewIconEmbHndNode(embedScpHnd, pEmbedScpHnd, GTF_ICON_SCOPE_HDL, hnd1, hnd2, scpHnd);
+}
+
+//-----------------------------------------------------------------------------
+
+inline GenTreePtr Compiler::gtNewIconEmbClsHndNode(CORINFO_CLASS_HANDLE clsHnd, unsigned hnd1, void* hnd2)
+{
+ void *embedClsHnd, *pEmbedClsHnd;
+
+ embedClsHnd = (void*)info.compCompHnd->embedClassHandle(clsHnd, &pEmbedClsHnd);
+
+ assert((!embedClsHnd) != (!pEmbedClsHnd));
+
+ return gtNewIconEmbHndNode(embedClsHnd, pEmbedClsHnd, GTF_ICON_CLASS_HDL, hnd1, hnd2, clsHnd);
+}
+
+//-----------------------------------------------------------------------------
+
+inline GenTreePtr Compiler::gtNewIconEmbMethHndNode(CORINFO_METHOD_HANDLE methHnd, unsigned hnd1, void* hnd2)
+{
+ void *embedMethHnd, *pEmbedMethHnd;
+
+ embedMethHnd = (void*)info.compCompHnd->embedMethodHandle(methHnd, &pEmbedMethHnd);
+
+ assert((!embedMethHnd) != (!pEmbedMethHnd));
+
+ return gtNewIconEmbHndNode(embedMethHnd, pEmbedMethHnd, GTF_ICON_METHOD_HDL, hnd1, hnd2, methHnd);
+}
+
+//-----------------------------------------------------------------------------
+
+inline GenTreePtr Compiler::gtNewIconEmbFldHndNode(CORINFO_FIELD_HANDLE fldHnd, unsigned hnd1, void* hnd2)
+{
+ void *embedFldHnd, *pEmbedFldHnd;
+
+ embedFldHnd = (void*)info.compCompHnd->embedFieldHandle(fldHnd, &pEmbedFldHnd);
+
+ assert((!embedFldHnd) != (!pEmbedFldHnd));
+
+ return gtNewIconEmbHndNode(embedFldHnd, pEmbedFldHnd, GTF_ICON_FIELD_HDL, hnd1, hnd2, fldHnd);
+}
+
+/*****************************************************************************/
+
+inline GenTreeCall* Compiler::gtNewHelperCallNode(unsigned helper, var_types type, unsigned flags, GenTreeArgList* args)
+{
+ GenTreeCall* result = gtNewCallNode(CT_HELPER, eeFindHelper(helper), type, args);
+ result->gtFlags |= flags;
+
+#if DEBUG
+ // Helper calls are never candidates.
+
+ result->gtInlineObservation = InlineObservation::CALLSITE_IS_CALL_TO_HELPER;
+#endif
+
+ return result;
+}
+
+//------------------------------------------------------------------------
+// gtNewAllocObjNode: A little helper to create an object allocation node.
+//
+// Arguments:
+// helper - Value returned by ICorJitInfo::getNewHelper
+// clsHnd - Corresponding class handle
+// type - Tree return type (e.g. TYP_REF)
+// op1 - Node containing an address of VtablePtr
+//
+// Return Value:
+// Returns GT_ALLOCOBJ node that will be later morphed into an
+// allocation helper call or local variable allocation on the stack.
+inline GenTreePtr Compiler::gtNewAllocObjNode(unsigned int helper,
+ CORINFO_CLASS_HANDLE clsHnd,
+ var_types type,
+ GenTreePtr op1)
+{
+ GenTreePtr node = new (this, GT_ALLOCOBJ) GenTreeAllocObj(type, helper, clsHnd, op1);
+ return node;
+}
+
+/*****************************************************************************/
+
+inline GenTreePtr Compiler::gtNewCodeRef(BasicBlock* block)
+{
+ GenTreePtr node = new (this, GT_LABEL) GenTreeLabel(block);
+ return node;
+}
+
+/*****************************************************************************
+ *
+ * A little helper to create a data member reference node.
+ */
+
+inline GenTreePtr Compiler::gtNewFieldRef(
+ var_types typ, CORINFO_FIELD_HANDLE fldHnd, GenTreePtr obj, DWORD offset, bool nullcheck)
+{
+#if SMALL_TREE_NODES
+ /* 'GT_FIELD' nodes may later get transformed into 'GT_IND' */
+
+ assert(GenTree::s_gtNodeSizes[GT_IND] <= GenTree::s_gtNodeSizes[GT_FIELD]);
+ GenTreePtr tree = new (this, GT_FIELD) GenTreeField(typ);
+#else
+ GenTreePtr tree = new (this, GT_FIELD) GenTreeField(typ);
+#endif
+ tree->gtField.gtFldObj = obj;
+ tree->gtField.gtFldHnd = fldHnd;
+ tree->gtField.gtFldOffset = offset;
+ tree->gtFlags |= GTF_GLOB_REF;
+
+#ifdef FEATURE_READYTORUN_COMPILER
+ tree->gtField.gtFieldLookup.addr = nullptr;
+#endif
+
+ if (nullcheck)
+ {
+ tree->gtFlags |= GTF_FLD_NULLCHECK;
+ }
+
+ // If "obj" is the address of a local, note that a field of that struct local has been accessed.
+ if (obj != nullptr && obj->OperGet() == GT_ADDR && varTypeIsStruct(obj->gtOp.gtOp1) &&
+ obj->gtOp.gtOp1->OperGet() == GT_LCL_VAR)
+ {
+ unsigned lclNum = obj->gtOp.gtOp1->gtLclVarCommon.gtLclNum;
+ lvaTable[lclNum].lvFieldAccessed = 1;
+ }
+
+ return tree;
+}
+
+/*****************************************************************************
+ *
+ * A little helper to create an array index node.
+ */
+
+inline GenTreePtr Compiler::gtNewIndexRef(var_types typ, GenTreePtr arrayOp, GenTreePtr indexOp)
+{
+ GenTreeIndex* gtIndx = new (this, GT_INDEX) GenTreeIndex(typ, arrayOp, indexOp, genTypeSize(typ));
+
+ return gtIndx;
+}
+
+/*****************************************************************************
+ *
+ * Create (and check for) a "nothing" node, i.e. a node that doesn't produce
+ * any code. We currently use a "nop" node of type void for this purpose.
+ */
+
+inline GenTreePtr Compiler::gtNewNothingNode()
+{
+ return new (this, GT_NOP) GenTreeOp(GT_NOP, TYP_VOID);
+}
+/*****************************************************************************/
+
+inline bool GenTree::IsNothingNode() const
+{
+ return (gtOper == GT_NOP && gtType == TYP_VOID);
+}
+
+/*****************************************************************************
+ *
+ * Change the given node to a NOP - May be later changed to a GT_COMMA
+ *
+ *****************************************************************************/
+
+inline void GenTree::gtBashToNOP()
+{
+ ChangeOper(GT_NOP);
+
+ gtType = TYP_VOID;
+ gtOp.gtOp1 = gtOp.gtOp2 = nullptr;
+
+ gtFlags &= ~(GTF_ALL_EFFECT | GTF_REVERSE_OPS);
+}
+
+// return new arg placeholder node. Does not do anything but has a type associated
+// with it so we can keep track of register arguments in lists associated w/ call nodes
+
+inline GenTreePtr Compiler::gtNewArgPlaceHolderNode(var_types type, CORINFO_CLASS_HANDLE clsHnd)
+{
+ GenTreePtr node = new (this, GT_ARGPLACE) GenTreeArgPlace(type, clsHnd);
+ return node;
+}
+
+/*****************************************************************************/
+
+inline GenTreePtr Compiler::gtUnusedValNode(GenTreePtr expr)
+{
+ return gtNewOperNode(GT_COMMA, TYP_VOID, expr, gtNewNothingNode());
+}
+
+/*****************************************************************************
+ *
+ * A wrapper for gtSetEvalOrder and gtComputeFPlvls
+ * Necessary because the FP levels may need to be re-computed if we reverse
+ * operands
+ */
+
+inline void Compiler::gtSetStmtInfo(GenTree* stmt)
+{
+ assert(stmt->gtOper == GT_STMT);
+ GenTreePtr expr = stmt->gtStmt.gtStmtExpr;
+
+#if FEATURE_STACK_FP_X87
+ /* We will try to compute the FP stack level at each node */
+ codeGen->genResetFPstkLevel();
+
+ /* Sometimes we need to redo the FP level computation */
+ gtFPstLvlRedo = false;
+#endif // FEATURE_STACK_FP_X87
+
+#ifdef DEBUG
+ if (verbose && 0)
+ {
+ gtDispTree(stmt);
+ }
+#endif
+
+ /* Recursively process the expression */
+
+ gtSetEvalOrder(expr);
+
+ // Set the statement to have the same costs as the top node of the tree.
+ stmt->CopyCosts(expr);
+
+#if FEATURE_STACK_FP_X87
+ /* Unused float values leave one operand on the stack */
+ assert(codeGen->genGetFPstkLevel() == 0 || codeGen->genGetFPstkLevel() == 1);
+
+ /* Do we need to recompute FP stack levels? */
+
+ if (gtFPstLvlRedo)
+ {
+ codeGen->genResetFPstkLevel();
+ gtComputeFPlvls(expr);
+ assert(codeGen->genGetFPstkLevel() == 0 || codeGen->genGetFPstkLevel() == 1);
+ }
+#endif // FEATURE_STACK_FP_X87
+}
+
+#if FEATURE_STACK_FP_X87
+inline unsigned Compiler::gtSetEvalOrderAndRestoreFPstkLevel(GenTree* tree)
+{
+ unsigned FPlvlSave = codeGen->genFPstkLevel;
+ unsigned result = gtSetEvalOrder(tree);
+ codeGen->genFPstkLevel = FPlvlSave;
+
+ return result;
+}
+#else // !FEATURE_STACK_FP_X87
+inline unsigned Compiler::gtSetEvalOrderAndRestoreFPstkLevel(GenTree* tree)
+{
+ return gtSetEvalOrder(tree);
+}
+#endif // FEATURE_STACK_FP_X87
+
+/*****************************************************************************/
+#if SMALL_TREE_NODES
+/*****************************************************************************/
+
+inline void GenTree::SetOper(genTreeOps oper, ValueNumberUpdate vnUpdate)
+{
+ assert(((gtDebugFlags & GTF_DEBUG_NODE_SMALL) != 0) != ((gtDebugFlags & GTF_DEBUG_NODE_LARGE) != 0));
+
+ /* Make sure the node isn't too small for the new operator */
+
+ assert(GenTree::s_gtNodeSizes[gtOper] == TREE_NODE_SZ_SMALL ||
+ GenTree::s_gtNodeSizes[gtOper] == TREE_NODE_SZ_LARGE);
+ assert(GenTree::s_gtNodeSizes[oper] == TREE_NODE_SZ_SMALL || GenTree::s_gtNodeSizes[oper] == TREE_NODE_SZ_LARGE);
+
+ assert(GenTree::s_gtNodeSizes[oper] == TREE_NODE_SZ_SMALL || (gtDebugFlags & GTF_DEBUG_NODE_LARGE));
+
+ gtOper = oper;
+
+#ifdef DEBUG
+ // Maintain the invariant that unary operators always have NULL gtOp2.
+ // If we ever start explicitly allocating GenTreeUnOp nodes, we wouldn't be
+ // able to do that (but if we did, we'd have to have a check in gtOp -- perhaps
+ // a gtUnOp...)
+ if (OperKind(oper) == GTK_UNOP)
+ {
+ gtOp.gtOp2 = nullptr;
+ }
+#endif // DEBUG
+
+#if DEBUGGABLE_GENTREE
+ // Until we eliminate SetOper/ChangeOper, we also change the vtable of the node, so that
+ // it shows up correctly in the debugger.
+ SetVtableForOper(oper);
+#endif // DEBUGGABLE_GENTREE
+
+ if (oper == GT_CNS_INT)
+ {
+ gtIntCon.gtFieldSeq = nullptr;
+ }
+
+ if (vnUpdate == CLEAR_VN)
+ {
+ // Clear the ValueNum field as well.
+ gtVNPair.SetBoth(ValueNumStore::NoVN);
+ }
+}
+
+inline void GenTree::CopyFrom(const GenTree* src, Compiler* comp)
+{
+ /* The source may be big only if the target is also a big node */
+
+ assert((gtDebugFlags & GTF_DEBUG_NODE_LARGE) || GenTree::s_gtNodeSizes[src->gtOper] == TREE_NODE_SZ_SMALL);
+ GenTreePtr prev = gtPrev;
+ GenTreePtr next = gtNext;
+ // The VTable pointer is copied intentionally here
+ memcpy((void*)this, (void*)src, src->GetNodeSize());
+ this->gtPrev = prev;
+ this->gtNext = next;
+#ifdef DEBUG
+ gtSeqNum = 0;
+#endif
+ // Transfer any annotations.
+ if (src->OperGet() == GT_IND && src->gtFlags & GTF_IND_ARR_INDEX)
+ {
+ ArrayInfo arrInfo;
+ bool b = comp->GetArrayInfoMap()->Lookup(src, &arrInfo);
+ assert(b);
+ comp->GetArrayInfoMap()->Set(this, arrInfo);
+ }
+}
+
+inline GenTreePtr Compiler::gtNewCastNode(var_types typ, GenTreePtr op1, var_types castType)
+{
+ GenTreePtr res = new (this, GT_CAST) GenTreeCast(typ, op1, castType);
+ return res;
+}
+
+inline GenTreePtr Compiler::gtNewCastNodeL(var_types typ, GenTreePtr op1, var_types castType)
+{
+ /* Some casts get transformed into 'GT_CALL' or 'GT_IND' nodes */
+
+ assert(GenTree::s_gtNodeSizes[GT_CALL] >= GenTree::s_gtNodeSizes[GT_CAST]);
+ assert(GenTree::s_gtNodeSizes[GT_CALL] >= GenTree::s_gtNodeSizes[GT_IND]);
+
+ /* Make a big node first and then change it to be GT_CAST */
+
+ GenTreePtr res = new (this, LargeOpOpcode()) GenTreeCast(typ, op1, castType DEBUGARG(/*largeNode*/ true));
+ return res;
+}
+
+/*****************************************************************************/
+#else // SMALL_TREE_NODES
+/*****************************************************************************/
+
+inline void GenTree::InitNodeSize()
+{
+}
+
+inline void GenTree::SetOper(genTreeOps oper, ValueNumberUpdate vnUpdate)
+{
+ gtOper = oper;
+
+ if (vnUpdate == CLEAR_VN)
+ {
+ // Clear the ValueNum field.
+ gtVNPair.SetBoth(ValueNumStore::NoVN);
+ }
+}
+
+inline void GenTree::CopyFrom(GenTreePtr src)
+{
+ *this = *src;
+#ifdef DEBUG
+ gtSeqNum = 0;
+#endif
+}
+
+inline GenTreePtr Compiler::gtNewCastNode(var_types typ, GenTreePtr op1, var_types castType)
+{
+ GenTreePtr tree = gtNewOperNode(GT_CAST, typ, op1);
+ tree->gtCast.gtCastType = castType;
+}
+
+inline GenTreePtr Compiler::gtNewCastNodeL(var_types typ, GenTreePtr op1, var_types castType)
+{
+ return gtNewCastNode(typ, op1, castType);
+}
+
+/*****************************************************************************/
+#endif // SMALL_TREE_NODES
+/*****************************************************************************/
+
+inline void GenTree::SetOperResetFlags(genTreeOps oper)
+{
+ SetOper(oper);
+ gtFlags &= GTF_NODE_MASK;
+}
+
+inline void GenTree::ChangeOperConst(genTreeOps oper)
+{
+#ifdef _TARGET_64BIT_
+ assert(oper != GT_CNS_LNG); // We should never see a GT_CNS_LNG for a 64-bit target!
+#endif
+ assert(OperIsConst(oper)); // use ChangeOper() instead
+ SetOperResetFlags(oper);
+ // Some constant subtypes have additional fields that must be initialized.
+ if (oper == GT_CNS_INT)
+ {
+ gtIntCon.gtFieldSeq = FieldSeqStore::NotAField();
+ }
+}
+
+inline void GenTree::ChangeOper(genTreeOps oper, ValueNumberUpdate vnUpdate)
+{
+ assert(!OperIsConst(oper)); // use ChangeOperLeaf() instead
+
+ SetOper(oper, vnUpdate);
+ gtFlags &= GTF_COMMON_MASK;
+
+ // Do "oper"-specific initializations...
+ switch (oper)
+ {
+ case GT_LCL_FLD:
+ gtLclFld.gtLclOffs = 0;
+ gtLclFld.gtFieldSeq = FieldSeqStore::NotAField();
+ break;
+ default:
+ break;
+ }
+}
+
+inline void GenTree::ChangeOperUnchecked(genTreeOps oper)
+{
+ gtOper = oper; // Trust the caller and don't use SetOper()
+ gtFlags &= GTF_COMMON_MASK;
+}
+
+/*****************************************************************************
+ * Returns true if the node is &var (created by ldarga and ldloca)
+ */
+
+inline bool GenTree::IsVarAddr() const
+{
+ if (gtOper == GT_ADDR)
+ {
+ if (gtFlags & GTF_ADDR_ONSTACK)
+ {
+ assert((gtType == TYP_BYREF) || (gtType == TYP_I_IMPL));
+ return true;
+ }
+ }
+ return false;
+}
+
+/*****************************************************************************
+ *
+ * Returns true if the node is of the "ovf" variety, for example, add.ovf.i1.
+ * + gtOverflow() can only be called for valid operators (that is, we know it is one
+ * of the operators which may have GTF_OVERFLOW set).
+ * + gtOverflowEx() is more expensive, and should be called only if gtOper may be
+ * an operator for which GTF_OVERFLOW is invalid.
+ */
+
+inline bool GenTree::gtOverflow() const
+{
+#if !defined(_TARGET_64BIT_) && !defined(LEGACY_BACKEND)
+ assert(gtOper == GT_MUL || gtOper == GT_CAST || gtOper == GT_ADD || gtOper == GT_SUB || gtOper == GT_ASG_ADD ||
+ gtOper == GT_ASG_SUB || gtOper == GT_ADD_LO || gtOper == GT_SUB_LO || gtOper == GT_ADD_HI ||
+ gtOper == GT_SUB_HI);
+#else
+ assert(gtOper == GT_MUL || gtOper == GT_CAST || gtOper == GT_ADD || gtOper == GT_SUB || gtOper == GT_ASG_ADD ||
+ gtOper == GT_ASG_SUB);
+#endif
+
+ if (gtFlags & GTF_OVERFLOW)
+ {
+ assert(varTypeIsIntegral(TypeGet()));
+
+ return true;
+ }
+ else
+ {
+ return false;
+ }
+}
+
+inline bool GenTree::gtOverflowEx() const
+{
+ if (gtOper == GT_MUL || gtOper == GT_CAST || gtOper == GT_ADD || gtOper == GT_SUB ||
+#if !defined(_TARGET_64BIT_) && !defined(LEGACY_BACKEND)
+ gtOper == GT_ADD_HI || gtOper == GT_SUB_HI ||
+#endif
+ gtOper == GT_ASG_ADD || gtOper == GT_ASG_SUB)
+ {
+ return gtOverflow();
+ }
+ return false;
+}
+
+/*
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX LclVarsInfo XX
+XX Inline functions XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+inline bool Compiler::lvaHaveManyLocals() const
+{
+ return (lvaCount >= lclMAX_TRACKED);
+}
+
+/*****************************************************************************
+ *
+ * Allocate a temporary variable or a set of temp variables.
+ */
+
+inline unsigned Compiler::lvaGrabTemp(bool shortLifetime DEBUGARG(const char* reason))
+{
+ if (compIsForInlining())
+ {
+ // Grab the temp using Inliner's Compiler instance.
+ Compiler* pComp = impInlineInfo->InlinerCompiler; // The Compiler instance for the caller (i.e. the inliner)
+
+ if (pComp->lvaHaveManyLocals())
+ {
+ // Don't create more LclVar with inlining
+ compInlineResult->NoteFatal(InlineObservation::CALLSITE_TOO_MANY_LOCALS);
+ }
+
+ unsigned tmpNum = pComp->lvaGrabTemp(shortLifetime DEBUGARG(reason));
+ lvaTable = pComp->lvaTable;
+ lvaCount = pComp->lvaCount;
+ lvaTableCnt = pComp->lvaTableCnt;
+ return tmpNum;
+ }
+
+ // You cannot allocate more space after frame layout!
+ noway_assert(lvaDoneFrameLayout < Compiler::TENTATIVE_FRAME_LAYOUT);
+
+ /* Check if the lvaTable has to be grown */
+ if (lvaCount + 1 > lvaTableCnt)
+ {
+ unsigned newLvaTableCnt = lvaCount + (lvaCount / 2) + 1;
+
+ // Check for overflow
+ if (newLvaTableCnt <= lvaCount)
+ {
+ IMPL_LIMITATION("too many locals");
+ }
+
+ // Note: compGetMemArray might throw.
+ LclVarDsc* newLvaTable = (LclVarDsc*)compGetMemArray(newLvaTableCnt, sizeof(*lvaTable), CMK_LvaTable);
+
+ memcpy(newLvaTable, lvaTable, lvaCount * sizeof(*lvaTable));
+ memset(newLvaTable + lvaCount, 0, (newLvaTableCnt - lvaCount) * sizeof(*lvaTable));
+
+ for (unsigned i = lvaCount; i < newLvaTableCnt; i++)
+ {
+ new (&newLvaTable[i], jitstd::placement_t()) LclVarDsc(this); // call the constructor.
+ }
+
+#if 0
+ // TODO-Cleanup: Enable this and test.
+#ifdef DEBUG
+ // Fill the old table with junks. So to detect the un-intended use.
+ memset(lvaTable, fDefaultFill2.val_DontUse_(CLRConfig::INTERNAL_JitDefaultFill, 0xFF), lvaCount * sizeof(*lvaTable));
+#endif
+#endif
+
+ lvaTableCnt = newLvaTableCnt;
+ lvaTable = newLvaTable;
+ }
+
+ lvaTable[lvaCount].lvType = TYP_UNDEF; // Initialize lvType, lvIsTemp and lvOnFrame
+ lvaTable[lvaCount].lvIsTemp = shortLifetime;
+ lvaTable[lvaCount].lvOnFrame = true;
+
+ unsigned tempNum = lvaCount;
+
+ lvaCount++;
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nlvaGrabTemp returning %d (", tempNum);
+ gtDispLclVar(tempNum, false);
+ printf(")%s called for %s.\n", shortLifetime ? "" : " (a long lifetime temp)", reason);
+ }
+#endif // DEBUG
+
+ return tempNum;
+}
+
+inline unsigned Compiler::lvaGrabTemps(unsigned cnt DEBUGARG(const char* reason))
+{
+ if (compIsForInlining())
+ {
+ // Grab the temps using Inliner's Compiler instance.
+ unsigned tmpNum = impInlineInfo->InlinerCompiler->lvaGrabTemps(cnt DEBUGARG(reason));
+
+ lvaTable = impInlineInfo->InlinerCompiler->lvaTable;
+ lvaCount = impInlineInfo->InlinerCompiler->lvaCount;
+ lvaTableCnt = impInlineInfo->InlinerCompiler->lvaTableCnt;
+ return tmpNum;
+ }
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nlvaGrabTemps(%d) returning %d..%d (long lifetime temps) called for %s", cnt, lvaCount,
+ lvaCount + cnt - 1, reason);
+ }
+#endif
+
+ // You cannot allocate more space after frame layout!
+ noway_assert(lvaDoneFrameLayout < Compiler::TENTATIVE_FRAME_LAYOUT);
+
+ /* Check if the lvaTable has to be grown */
+ if (lvaCount + cnt > lvaTableCnt)
+ {
+ unsigned newLvaTableCnt = lvaCount + max(lvaCount / 2 + 1, cnt);
+
+ // Check for overflow
+ if (newLvaTableCnt <= lvaCount)
+ {
+ IMPL_LIMITATION("too many locals");
+ }
+
+ // Note: compGetMemArray might throw.
+ LclVarDsc* newLvaTable = (LclVarDsc*)compGetMemArray(newLvaTableCnt, sizeof(*lvaTable), CMK_LvaTable);
+
+ memcpy(newLvaTable, lvaTable, lvaCount * sizeof(*lvaTable));
+ memset(newLvaTable + lvaCount, 0, (newLvaTableCnt - lvaCount) * sizeof(*lvaTable));
+ for (unsigned i = lvaCount; i < newLvaTableCnt; i++)
+ {
+ new (&newLvaTable[i], jitstd::placement_t()) LclVarDsc(this); // call the constructor.
+ }
+
+#if 0
+#ifdef DEBUG
+ // TODO-Cleanup: Enable this and test.
+ // Fill the old table with junks. So to detect the un-intended use.
+ memset(lvaTable, fDefaultFill2.val_DontUse_(CLRConfig::INTERNAL_JitDefaultFill, 0xFF), lvaCount * sizeof(*lvaTable));
+#endif
+#endif
+
+ lvaTableCnt = newLvaTableCnt;
+ lvaTable = newLvaTable;
+ }
+
+ unsigned tempNum = lvaCount;
+
+ while (cnt--)
+ {
+ lvaTable[lvaCount].lvType = TYP_UNDEF; // Initialize lvType, lvIsTemp and lvOnFrame
+ lvaTable[lvaCount].lvIsTemp = false;
+ lvaTable[lvaCount].lvOnFrame = true;
+ lvaCount++;
+ }
+
+ return tempNum;
+}
+
+/*****************************************************************************
+ *
+ * Allocate a temporary variable which is implicitly used by code-gen
+ * There will be no explicit references to the temp, and so it needs to
+ * be forced to be kept alive, and not be optimized away.
+ */
+
+inline unsigned Compiler::lvaGrabTempWithImplicitUse(bool shortLifetime DEBUGARG(const char* reason))
+{
+ if (compIsForInlining())
+ {
+ // Grab the temp using Inliner's Compiler instance.
+ unsigned tmpNum = impInlineInfo->InlinerCompiler->lvaGrabTempWithImplicitUse(shortLifetime DEBUGARG(reason));
+
+ lvaTable = impInlineInfo->InlinerCompiler->lvaTable;
+ lvaCount = impInlineInfo->InlinerCompiler->lvaCount;
+ lvaTableCnt = impInlineInfo->InlinerCompiler->lvaTableCnt;
+ return tmpNum;
+ }
+
+ unsigned lclNum = lvaGrabTemp(shortLifetime DEBUGARG(reason));
+
+ LclVarDsc* varDsc = &lvaTable[lclNum];
+
+ // This will prevent it from being optimized away
+ // TODO-CQ: We shouldn't have to go as far as to declare these
+ // address-exposed -- DoNotEnregister should suffice?
+ lvaSetVarAddrExposed(lclNum);
+
+ // We need lvRefCnt to be non-zero to prevent various asserts from firing.
+ varDsc->lvRefCnt = 1;
+ varDsc->lvRefCntWtd = BB_UNITY_WEIGHT;
+
+ return lclNum;
+}
+
+/*****************************************************************************
+ *
+ * If lvaTrackedFixed is false then set the lvaSortAgain flag
+ * (this allows us to grow the number of tracked variables)
+ * and zero lvRefCntWtd when lvRefCnt is zero
+ */
+
+inline void LclVarDsc::lvaResetSortAgainFlag(Compiler* comp)
+{
+ if (!comp->lvaTrackedFixed)
+ {
+ /* Flag this change, set lvaSortAgain to true */
+ comp->lvaSortAgain = true;
+ }
+ /* Set weighted ref count to zero if ref count is zero */
+ if (lvRefCnt == 0)
+ {
+ lvRefCntWtd = 0;
+ }
+}
+
+/*****************************************************************************
+ *
+ * Decrement the ref counts for a local variable
+ */
+
+inline void LclVarDsc::decRefCnts(BasicBlock::weight_t weight, Compiler* comp, bool propagate)
+{
+ /* Decrement lvRefCnt and lvRefCntWtd */
+ Compiler::lvaPromotionType promotionType = DUMMY_INIT(Compiler::PROMOTION_TYPE_NONE);
+ if (varTypeIsStruct(lvType))
+ {
+ promotionType = comp->lvaGetPromotionType(this);
+ }
+
+ //
+ // Decrement counts on the local itself.
+ //
+ if (lvType != TYP_STRUCT || promotionType != Compiler::PROMOTION_TYPE_INDEPENDENT)
+ {
+ assert(lvRefCnt); // Can't decrement below zero
+
+ // TODO: Well, the assert above could be bogus.
+ // If lvRefCnt has overflowed before, then might drop to 0.
+ // Therefore we do need the following check to keep lvRefCnt from underflow:
+ if (lvRefCnt > 0)
+ {
+ //
+ // Decrement lvRefCnt
+ //
+ lvRefCnt--;
+
+ //
+ // Decrement lvRefCntWtd
+ //
+ if (weight != 0)
+ {
+ if (lvIsTemp && (weight * 2 > weight))
+ {
+ weight *= 2;
+ }
+
+ if (lvRefCntWtd <= weight)
+ { // Can't go below zero
+ lvRefCntWtd = 0;
+ }
+ else
+ {
+ lvRefCntWtd -= weight;
+ }
+ }
+ }
+ }
+
+ if (varTypeIsStruct(lvType) && propagate)
+ {
+ // For promoted struct locals, decrement lvRefCnt on its field locals as well.
+ if (promotionType == Compiler::PROMOTION_TYPE_INDEPENDENT ||
+ promotionType == Compiler::PROMOTION_TYPE_DEPENDENT)
+ {
+ for (unsigned i = lvFieldLclStart; i < lvFieldLclStart + lvFieldCnt; ++i)
+ {
+ comp->lvaTable[i].decRefCnts(comp->lvaMarkRefsWeight, comp, false); // Don't propagate
+ }
+ }
+ }
+
+ if (lvIsStructField && propagate)
+ {
+ // Depending on the promotion type, decrement the ref count for the parent struct as well.
+ promotionType = comp->lvaGetParentPromotionType(this);
+ LclVarDsc* parentvarDsc = &comp->lvaTable[lvParentLcl];
+ assert(!parentvarDsc->lvRegStruct);
+ if (promotionType == Compiler::PROMOTION_TYPE_DEPENDENT)
+ {
+ parentvarDsc->decRefCnts(comp->lvaMarkRefsWeight, comp, false); // Don't propagate
+ }
+ }
+
+ lvaResetSortAgainFlag(comp);
+
+#ifdef DEBUG
+ if (comp->verbose)
+ {
+ unsigned varNum = (unsigned)(this - comp->lvaTable);
+ assert(&comp->lvaTable[varNum] == this);
+ printf("New refCnts for V%02u: refCnt = %2u, refCntWtd = %s\n", varNum, lvRefCnt, refCntWtd2str(lvRefCntWtd));
+ }
+#endif
+}
+
+/*****************************************************************************
+ *
+ * Increment the ref counts for a local variable
+ */
+
+inline void LclVarDsc::incRefCnts(BasicBlock::weight_t weight, Compiler* comp, bool propagate)
+{
+ Compiler::lvaPromotionType promotionType = DUMMY_INIT(Compiler::PROMOTION_TYPE_NONE);
+ if (varTypeIsStruct(lvType))
+ {
+ promotionType = comp->lvaGetPromotionType(this);
+ }
+
+ //
+ // Increment counts on the local itself.
+ //
+ if (lvType != TYP_STRUCT || promotionType != Compiler::PROMOTION_TYPE_INDEPENDENT)
+ {
+ //
+ // Increment lvRefCnt
+ //
+ int newRefCnt = lvRefCnt + 1;
+ if (newRefCnt == (unsigned short)newRefCnt) // lvRefCnt is an "unsigned short". Don't overflow it.
+ {
+ lvRefCnt = (unsigned short)newRefCnt;
+ }
+
+ // This fires when an uninitialize value for 'weight' is used (see lvaMarkRefsWeight)
+ assert(weight != 0xdddddddd);
+ //
+ // Increment lvRefCntWtd
+ //
+ if (weight != 0)
+ {
+ // We double the weight of internal temps
+ //
+ if (lvIsTemp && (weight * 2 > weight))
+ {
+ weight *= 2;
+ }
+
+ unsigned newWeight = lvRefCntWtd + weight;
+ if (newWeight >= lvRefCntWtd)
+ { // lvRefCntWtd is an "unsigned". Don't overflow it
+ lvRefCntWtd = newWeight;
+ }
+ else
+ { // On overflow we assign ULONG_MAX
+ lvRefCntWtd = ULONG_MAX;
+ }
+ }
+ }
+
+ if (varTypeIsStruct(lvType) && propagate)
+ {
+ // For promoted struct locals, increment lvRefCnt on its field locals as well.
+ if (promotionType == Compiler::PROMOTION_TYPE_INDEPENDENT ||
+ promotionType == Compiler::PROMOTION_TYPE_DEPENDENT)
+ {
+ for (unsigned i = lvFieldLclStart; i < lvFieldLclStart + lvFieldCnt; ++i)
+ {
+ comp->lvaTable[i].incRefCnts(comp->lvaMarkRefsWeight, comp, false); // Don't propagate
+ }
+ }
+ }
+
+ if (lvIsStructField && propagate)
+ {
+ // Depending on the promotion type, increment the ref count for the parent struct as well.
+ promotionType = comp->lvaGetParentPromotionType(this);
+ LclVarDsc* parentvarDsc = &comp->lvaTable[lvParentLcl];
+ assert(!parentvarDsc->lvRegStruct);
+ if (promotionType == Compiler::PROMOTION_TYPE_DEPENDENT)
+ {
+ parentvarDsc->incRefCnts(comp->lvaMarkRefsWeight, comp, false); // Don't propagate
+ }
+ }
+
+ lvaResetSortAgainFlag(comp);
+
+#ifdef DEBUG
+ if (comp->verbose)
+ {
+ unsigned varNum = (unsigned)(this - comp->lvaTable);
+ assert(&comp->lvaTable[varNum] == this);
+ printf("New refCnts for V%02u: refCnt = %2u, refCntWtd = %s\n", varNum, lvRefCnt, refCntWtd2str(lvRefCntWtd));
+ }
+#endif
+}
+
+/*****************************************************************************
+ *
+ * Set the lvPrefReg field to reg
+ */
+
+inline void LclVarDsc::setPrefReg(regNumber regNum, Compiler* comp)
+{
+ regMaskTP regMask;
+ if (isFloatRegType(TypeGet()))
+ {
+ // Check for FP struct-promoted field being passed in integer register
+ //
+ if (!genIsValidFloatReg(regNum))
+ {
+ return;
+ }
+ regMask = genRegMaskFloat(regNum, TypeGet());
+ }
+ else
+ {
+ regMask = genRegMask(regNum);
+ }
+
+#ifdef _TARGET_ARM_
+ // Don't set a preferred register for a TYP_STRUCT that takes more than one register slot
+ if ((TypeGet() == TYP_STRUCT) && (lvSize() > REGSIZE_BYTES))
+ return;
+#endif
+
+ /* Only interested if we have a new register bit set */
+ if (lvPrefReg & regMask)
+ {
+ return;
+ }
+
+#ifdef DEBUG
+ if (comp->verbose)
+ {
+ if (lvPrefReg)
+ {
+ printf("Change preferred register for V%02u from ", this - comp->lvaTable);
+ dspRegMask(lvPrefReg);
+ }
+ else
+ {
+ printf("Set preferred register for V%02u", this - comp->lvaTable);
+ }
+ printf(" to ");
+ dspRegMask(regMask);
+ printf("\n");
+ }
+#endif
+
+ /* Overwrite the lvPrefReg field */
+
+ lvPrefReg = (regMaskSmall)regMask;
+
+#ifdef LEGACY_BACKEND
+ // This is specific to the classic register allocator.
+ // While walking the trees during reg predict we set the lvPrefReg mask
+ // and then re-sort the 'tracked' variable when the lvPrefReg mask changes.
+ if (lvTracked)
+ {
+ /* Flag this change, set lvaSortAgain to true */
+ comp->lvaSortAgain = true;
+ }
+#endif // LEGACY_BACKEND
+}
+
+/*****************************************************************************
+ *
+ * Add regMask to the lvPrefReg field
+ */
+
+inline void LclVarDsc::addPrefReg(regMaskTP regMask, Compiler* comp)
+{
+ assert(regMask != RBM_NONE);
+
+#ifdef _TARGET_ARM_
+ // Don't set a preferred register for a TYP_STRUCT that takes more than one register slot
+ if ((lvType == TYP_STRUCT) && (lvSize() > sizeof(void*)))
+ return;
+#endif
+
+ /* Only interested if we have a new register bit set */
+ if (lvPrefReg & regMask)
+ {
+ return;
+ }
+
+#ifdef DEBUG
+ if (comp->verbose)
+ {
+ if (lvPrefReg)
+ {
+ printf("Additional preferred register for V%02u from ", this - comp->lvaTable);
+ dspRegMask(lvPrefReg);
+ }
+ else
+ {
+ printf("Set preferred register for V%02u", this - comp->lvaTable);
+ }
+ printf(" to ");
+ dspRegMask(lvPrefReg | regMask);
+ printf("\n");
+ }
+#endif
+
+ /* Update the lvPrefReg field */
+
+ lvPrefReg |= regMask;
+
+#ifdef LEGACY_BACKEND
+ // This is specific to the classic register allocator
+ // While walking the trees during reg predict we set the lvPrefReg mask
+ // and then resort the 'tracked' variable when the lvPrefReg mask changes
+ if (lvTracked)
+ {
+ /* Flag this change, set lvaSortAgain to true */
+ comp->lvaSortAgain = true;
+ }
+#endif // LEGACY_BACKEND
+}
+
+/*****************************************************************************
+ *
+ * The following returns the mask of all tracked locals
+ * referenced in a statement.
+ */
+
+inline VARSET_VALRET_TP Compiler::lvaStmtLclMask(GenTreePtr stmt)
+{
+ GenTreePtr tree;
+ unsigned varNum;
+ LclVarDsc* varDsc;
+ VARSET_TP VARSET_INIT_NOCOPY(lclMask, VarSetOps::MakeEmpty(this));
+
+ assert(stmt->gtOper == GT_STMT);
+ assert(fgStmtListThreaded);
+
+ for (tree = stmt->gtStmt.gtStmtList; tree; tree = tree->gtNext)
+ {
+ if (tree->gtOper != GT_LCL_VAR)
+ {
+ continue;
+ }
+
+ varNum = tree->gtLclVarCommon.gtLclNum;
+ assert(varNum < lvaCount);
+ varDsc = lvaTable + varNum;
+
+ if (!varDsc->lvTracked)
+ {
+ continue;
+ }
+
+ VarSetOps::UnionD(this, lclMask, VarSetOps::MakeSingleton(this, varDsc->lvVarIndex));
+ }
+
+ return lclMask;
+}
+
+/*****************************************************************************
+ * Returns true if the lvType is a TYP_REF or a TYP_BYREF.
+ * When the lvType is a TYP_STRUCT it searches the GC layout
+ * of the struct and returns true iff it contains a GC ref.
+ */
+
+inline bool Compiler::lvaTypeIsGC(unsigned varNum)
+{
+ if (lvaTable[varNum].TypeGet() == TYP_STRUCT)
+ {
+ assert(lvaTable[varNum].lvGcLayout != nullptr); // bits are intialized
+ return (lvaTable[varNum].lvStructGcCount != 0);
+ }
+ return (varTypeIsGC(lvaTable[varNum].TypeGet()));
+}
+
+/*****************************************************************************
+ Is this a synchronized instance method? If so, we will need to report "this"
+ in the GC information, so that the EE can release the object lock
+ in case of an exception
+
+ We also need to report "this" and keep it alive for all shared generic
+ code that gets the actual generic context from the "this" pointer and
+ has exception handlers.
+
+ For example, if List<T>::m() is shared between T = object and T = string,
+ then inside m() an exception handler "catch E<T>" needs to be able to fetch
+ the 'this' pointer to find out what 'T' is in order to tell if we
+ should catch the exception or not.
+ */
+
+inline bool Compiler::lvaKeepAliveAndReportThis()
+{
+ if (info.compIsStatic || lvaTable[0].TypeGet() != TYP_REF)
+ {
+ return false;
+ }
+
+#ifdef JIT32_GCENCODER
+ if (info.compFlags & CORINFO_FLG_SYNCH)
+ return true;
+
+ if (info.compMethodInfo->options & CORINFO_GENERICS_CTXT_FROM_THIS)
+ {
+ // TODO: Check if any of the exception clauses are
+ // typed using a generic type. Else, we do not need to report this.
+ if (info.compXcptnsCount > 0)
+ return true;
+
+ if (opts.compDbgCode)
+ return true;
+
+ if (lvaGenericsContextUsed)
+ return true;
+ }
+#else // !JIT32_GCENCODER
+ // If the generics context is the this pointer we need to report it if either
+ // the VM requires us to keep the generics context alive or it is used in a look-up.
+ // We keep it alive in the lookup scenario, even when the VM didn't ask us too
+ // because collectible types need the generics context when gc-ing.
+ if ((info.compMethodInfo->options & CORINFO_GENERICS_CTXT_FROM_THIS) &&
+ (lvaGenericsContextUsed || (info.compMethodInfo->options & CORINFO_GENERICS_CTXT_KEEP_ALIVE)))
+ {
+ return true;
+ }
+#endif
+
+ return false;
+}
+
+/*****************************************************************************
+ Similar to lvaKeepAliveAndReportThis
+ */
+
+inline bool Compiler::lvaReportParamTypeArg()
+{
+ if (info.compMethodInfo->options & (CORINFO_GENERICS_CTXT_FROM_METHODDESC | CORINFO_GENERICS_CTXT_FROM_METHODTABLE))
+ {
+ assert(info.compTypeCtxtArg != -1);
+
+ // If the VM requires us to keep the generics context alive and report it (for example, if any catch
+ // clause catches a type that uses a generic parameter of this method) this flag will be set.
+ if (info.compMethodInfo->options & CORINFO_GENERICS_CTXT_KEEP_ALIVE)
+ {
+ return true;
+ }
+
+ // Otherwise, if an exact type parameter is needed in the body, report the generics context.
+ // We do this because collectible types needs the generics context when gc-ing.
+ if (lvaGenericsContextUsed)
+ {
+ return true;
+ }
+ }
+
+ // Otherwise, we don't need to report it -- the generics context parameter is unused.
+ return false;
+}
+
+//*****************************************************************************
+
+inline unsigned Compiler::lvaCachedGenericContextArgOffset()
+{
+ assert(lvaDoneFrameLayout == FINAL_FRAME_LAYOUT);
+
+ return lvaCachedGenericContextArgOffs;
+}
+
+/*****************************************************************************
+ *
+ * Return the stack framed offset of the given variable; set *FPbased to
+ * true if the variable is addressed off of FP, false if it's addressed
+ * off of SP. Note that 'varNum' can be a negated spill-temporary var index.
+ *
+ * mustBeFPBased - strong about whether the base reg is FP. But it is also
+ * strong about not being FPBased after FINAL_FRAME_LAYOUT. i.e.,
+ * it enforces SP based.
+ *
+ * addrModeOffset - is the addressing mode offset, for example: v02 + 0x10
+ * So, V02 itself is at offset sp + 0x10 and then addrModeOffset is what gets
+ * added beyond that.
+ */
+
+inline
+#ifdef _TARGET_ARM_
+ int
+ Compiler::lvaFrameAddress(int varNum, bool mustBeFPBased, regNumber* pBaseReg, int addrModeOffset)
+#else
+ int
+ Compiler::lvaFrameAddress(int varNum, bool* pFPbased)
+#endif
+{
+ assert(lvaDoneFrameLayout != NO_FRAME_LAYOUT);
+
+ int offset;
+ bool FPbased;
+ bool fConservative = false;
+ var_types type = TYP_UNDEF;
+ if (varNum >= 0)
+ {
+ LclVarDsc* varDsc;
+
+ assert((unsigned)varNum < lvaCount);
+ varDsc = lvaTable + varNum;
+ type = varDsc->TypeGet();
+ bool isPrespilledArg = false;
+#if defined(_TARGET_ARM_) && defined(PROFILING_SUPPORTED)
+ isPrespilledArg = varDsc->lvIsParam && compIsProfilerHookNeeded() &&
+ lvaIsPreSpilled(varNum, codeGen->regSet.rsMaskPreSpillRegs(false));
+#endif
+
+ // If we have finished with register allocation, and this isn't a stack-based local,
+ // check that this has a valid stack location.
+ if (lvaDoneFrameLayout > REGALLOC_FRAME_LAYOUT && !varDsc->lvOnFrame)
+ {
+#ifdef _TARGET_AMD64_
+#ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ // On amd64, every param has a stack location, except on Unix-like systems.
+ assert(varDsc->lvIsParam);
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+#elif defined(_TARGET_X86_) && !defined(LEGACY_BACKEND)
+ // For !LEGACY_BACKEND on x86, a stack parameter that is enregistered will have a stack location.
+ assert(varDsc->lvIsParam && !varDsc->lvIsRegArg);
+#else // !(_TARGET_AMD64 || !(defined(_TARGET_X86_) && !defined(LEGACY_BACKEND)))
+ // Otherwise, we only have a valid stack location for:
+ // A parameter that was passed on the stack, being homed into its register home,
+ // or a prespilled argument on arm under profiler.
+ assert((varDsc->lvIsParam && !varDsc->lvIsRegArg && varDsc->lvRegister) || isPrespilledArg);
+#endif // !(_TARGET_AMD64 || !(defined(_TARGET_X86_) && !defined(LEGACY_BACKEND)))
+ }
+
+ FPbased = varDsc->lvFramePointerBased;
+
+#ifdef DEBUG
+#if FEATURE_FIXED_OUT_ARGS
+ if ((unsigned)varNum == lvaOutgoingArgSpaceVar)
+ {
+ assert(FPbased == false);
+ }
+ else
+#endif
+ {
+#if DOUBLE_ALIGN
+ assert(FPbased == (isFramePointerUsed() || (genDoubleAlign() && varDsc->lvIsParam && !varDsc->lvIsRegArg)));
+#else
+#ifdef _TARGET_X86_
+ assert(FPbased == isFramePointerUsed());
+#endif
+#endif
+ }
+#endif // DEBUG
+
+ offset = varDsc->lvStkOffs;
+ }
+ else // Its a spill-temp
+ {
+ FPbased = isFramePointerUsed();
+ if (lvaDoneFrameLayout == Compiler::FINAL_FRAME_LAYOUT)
+ {
+ TempDsc* tmpDsc = tmpFindNum(varNum);
+#ifndef LEGACY_BACKEND
+ // The temp might be in use, since this might be during code generation.
+ if (tmpDsc == nullptr)
+ {
+ tmpDsc = tmpFindNum(varNum, Compiler::TEMP_USAGE_USED);
+ }
+#endif // !LEGACY_BACKEND
+ assert(tmpDsc != nullptr);
+ offset = tmpDsc->tdTempOffs();
+ type = tmpDsc->tdTempType();
+ }
+ else
+ {
+ // This value is an estimate until we calculate the
+ // offset after the final frame layout
+ // ---------------------------------------------------
+ // : :
+ // +-------------------------+ base --+
+ // | LR, ++N for ARM | | frameBaseOffset (= N)
+ // +-------------------------+ |
+ // | R11, ++N for ARM | <---FP |
+ // +-------------------------+ --+
+ // | compCalleeRegsPushed - N| | lclFrameOffset
+ // +-------------------------+ --+
+ // | lclVars | |
+ // +-------------------------+ |
+ // | tmp[MAX_SPILL_TEMP] | |
+ // | tmp[1] | |
+ // | tmp[0] | | compLclFrameSize
+ // +-------------------------+ |
+ // | outgoingArgSpaceSize | |
+ // +-------------------------+ --+
+ // | | <---SP
+ // : :
+ // ---------------------------------------------------
+
+ type = compFloatingPointUsed ? TYP_FLOAT : TYP_INT;
+ fConservative = true;
+ if (!FPbased)
+ {
+ // Worst case stack based offset.
+ CLANG_FORMAT_COMMENT_ANCHOR;
+#if FEATURE_FIXED_OUT_ARGS
+ int outGoingArgSpaceSize = lvaOutgoingArgSpaceSize;
+#else
+ int outGoingArgSpaceSize = 0;
+#endif
+ offset = outGoingArgSpaceSize + max(-varNum * TARGET_POINTER_SIZE, (int)lvaGetMaxSpillTempSize());
+ }
+ else
+ {
+ // Worst case FP based offset.
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef _TARGET_ARM_
+ offset = codeGen->genCallerSPtoInitialSPdelta() - codeGen->genCallerSPtoFPdelta();
+#else
+ offset = -(codeGen->genTotalFrameSize());
+#endif
+ }
+ }
+ }
+
+#ifdef _TARGET_ARM_
+ if (FPbased)
+ {
+ if (mustBeFPBased)
+ {
+ *pBaseReg = REG_FPBASE;
+ }
+ // Change the FP-based addressing to the SP-based addressing when possible because
+ // it generates smaller code on ARM. See frame picture above for the math.
+ else
+ {
+ // If it is the final frame layout phase, we don't have a choice, we should stick
+ // to either FP based or SP based that we decided in the earlier phase. Because
+ // we have already selected the instruction. Min-opts will have R10 enabled, so just
+ // use that.
+
+ int spOffset = fConservative ? compLclFrameSize : offset + codeGen->genSPtoFPdelta();
+ int actualOffset = (spOffset + addrModeOffset);
+ int ldrEncodeLimit = (varTypeIsFloating(type) ? 0x3FC : 0xFFC);
+ // Use ldr sp imm encoding.
+ if (lvaDoneFrameLayout == FINAL_FRAME_LAYOUT || opts.MinOpts() || (actualOffset <= ldrEncodeLimit))
+ {
+ offset = spOffset;
+ *pBaseReg = compLocallocUsed ? REG_SAVED_LOCALLOC_SP : REG_SPBASE;
+ }
+ // Use ldr +/-imm8 encoding.
+ else if (offset >= -0x7C && offset <= ldrEncodeLimit)
+ {
+ *pBaseReg = REG_FPBASE;
+ }
+ // Use a single movw. prefer locals.
+ else if (actualOffset <= 0xFFFC) // Fix 383910 ARM ILGEN
+ {
+ offset = spOffset;
+ *pBaseReg = compLocallocUsed ? REG_SAVED_LOCALLOC_SP : REG_SPBASE;
+ }
+ // Use movw, movt.
+ else
+ {
+ *pBaseReg = REG_FPBASE;
+ }
+ }
+ }
+ else
+ {
+ *pBaseReg = REG_SPBASE;
+ }
+#else
+ *pFPbased = FPbased;
+#endif
+
+ return offset;
+}
+
+inline bool Compiler::lvaIsParameter(unsigned varNum)
+{
+ LclVarDsc* varDsc;
+
+ assert(varNum < lvaCount);
+ varDsc = lvaTable + varNum;
+
+ return varDsc->lvIsParam;
+}
+
+inline bool Compiler::lvaIsRegArgument(unsigned varNum)
+{
+ LclVarDsc* varDsc;
+
+ assert(varNum < lvaCount);
+ varDsc = lvaTable + varNum;
+
+ return varDsc->lvIsRegArg;
+}
+
+inline BOOL Compiler::lvaIsOriginalThisArg(unsigned varNum)
+{
+ assert(varNum < lvaCount);
+
+ BOOL isOriginalThisArg = (varNum == info.compThisArg) && (info.compIsStatic == false);
+
+#ifdef DEBUG
+ if (isOriginalThisArg)
+ {
+ LclVarDsc* varDsc = lvaTable + varNum;
+ // Should never write to or take the address of the original 'this' arg
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifndef JIT32_GCENCODER
+ // With the general encoder/decoder, when the original 'this' arg is needed as a generics context param, we
+ // copy to a new local, and mark the original as DoNotEnregister, to
+ // ensure that it is stack-allocated. It should not be the case that the original one can be modified -- it
+ // should not be written to, or address-exposed.
+ assert(!varDsc->lvArgWrite &&
+ (!varDsc->lvAddrExposed || ((info.compMethodInfo->options & CORINFO_GENERICS_CTXT_FROM_THIS) != 0)));
+#else
+ assert(!varDsc->lvArgWrite && !varDsc->lvAddrExposed);
+#endif
+ }
+#endif
+
+ return isOriginalThisArg;
+}
+
+inline BOOL Compiler::lvaIsOriginalThisReadOnly()
+{
+ return lvaArg0Var == info.compThisArg;
+}
+
+/*****************************************************************************
+ *
+ * The following is used to detect the cases where the same local variable#
+ * is used both as a long/double value and a 32-bit value and/or both as an
+ * integer/address and a float value.
+ */
+
+/* static */ inline unsigned Compiler::lvaTypeRefMask(var_types type)
+{
+ const static BYTE lvaTypeRefMasks[] = {
+#define DEF_TP(tn, nm, jitType, verType, sz, sze, asze, st, al, tf, howUsed) howUsed,
+#include "typelist.h"
+#undef DEF_TP
+ };
+
+ assert((unsigned)type < sizeof(lvaTypeRefMasks));
+ assert(lvaTypeRefMasks[type] != 0);
+
+ return lvaTypeRefMasks[type];
+}
+
+/*****************************************************************************
+ *
+ * The following is used to detect the cases where the same local variable#
+ * is used both as a long/double value and a 32-bit value and/or both as an
+ * integer/address and a float value.
+ */
+
+inline var_types Compiler::lvaGetActualType(unsigned lclNum)
+{
+ return genActualType(lvaGetRealType(lclNum));
+}
+
+inline var_types Compiler::lvaGetRealType(unsigned lclNum)
+{
+ return lvaTable[lclNum].TypeGet();
+}
+
+/*
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX Importer XX
+XX Inline functions XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+inline unsigned Compiler::compMapILargNum(unsigned ILargNum)
+{
+ assert(ILargNum < info.compILargsCount || tiVerificationNeeded);
+
+ // Note that this works because if compRetBuffArg/compTypeCtxtArg/lvVarargsHandleArg are not present
+ // they will be BAD_VAR_NUM (MAX_UINT), which is larger than any variable number.
+ if (ILargNum >= info.compRetBuffArg)
+ {
+ ILargNum++;
+ assert(ILargNum < info.compLocalsCount || tiVerificationNeeded); // compLocals count already adjusted.
+ }
+
+ if (ILargNum >= (unsigned)info.compTypeCtxtArg)
+ {
+ ILargNum++;
+ assert(ILargNum < info.compLocalsCount || tiVerificationNeeded); // compLocals count already adjusted.
+ }
+
+ if (ILargNum >= (unsigned)lvaVarargsHandleArg)
+ {
+ ILargNum++;
+ assert(ILargNum < info.compLocalsCount || tiVerificationNeeded); // compLocals count already adjusted.
+ }
+
+ assert(ILargNum < info.compArgsCount || tiVerificationNeeded);
+ return (ILargNum);
+}
+
+// For ARM varargs, all arguments go in integer registers, so swizzle the type
+inline var_types Compiler::mangleVarArgsType(var_types type)
+{
+#ifdef _TARGET_ARMARCH_
+ if (info.compIsVarArgs || opts.compUseSoftFP)
+ {
+ switch (type)
+ {
+ case TYP_FLOAT:
+ return TYP_INT;
+ case TYP_DOUBLE:
+ return TYP_LONG;
+ default:
+ break;
+ }
+ }
+#endif // _TARGET_ARMARCH_
+ return type;
+}
+
+// For CORECLR there is no vararg on System V systems.
+#if FEATURE_VARARG
+inline regNumber Compiler::getCallArgIntRegister(regNumber floatReg)
+{
+#ifdef _TARGET_AMD64_
+ switch (floatReg)
+ {
+ case REG_XMM0:
+ return REG_RCX;
+ case REG_XMM1:
+ return REG_RDX;
+ case REG_XMM2:
+ return REG_R8;
+ case REG_XMM3:
+ return REG_R9;
+ default:
+ unreached();
+ }
+#else // !_TARGET_AMD64_
+ // How will float args be passed for RyuJIT/x86?
+ NYI("getCallArgIntRegister for RyuJIT/x86");
+ return REG_NA;
+#endif // !_TARGET_AMD64_
+}
+
+inline regNumber Compiler::getCallArgFloatRegister(regNumber intReg)
+{
+#ifdef _TARGET_AMD64_
+ switch (intReg)
+ {
+ case REG_RCX:
+ return REG_XMM0;
+ case REG_RDX:
+ return REG_XMM1;
+ case REG_R8:
+ return REG_XMM2;
+ case REG_R9:
+ return REG_XMM3;
+ default:
+ unreached();
+ }
+#else // !_TARGET_AMD64_
+ // How will float args be passed for RyuJIT/x86?
+ NYI("getCallArgFloatRegister for RyuJIT/x86");
+ return REG_NA;
+#endif // !_TARGET_AMD64_
+}
+#endif // FEATURE_VARARG
+
+/*
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX Register Allocator XX
+XX Inline functions XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+/*****************************************************************************/
+
+inline bool rpCanAsgOperWithoutReg(GenTreePtr op, bool lclvar)
+{
+ var_types type;
+
+ switch (op->OperGet())
+ {
+ case GT_CNS_LNG:
+ case GT_CNS_INT:
+ return true;
+ case GT_LCL_VAR:
+ type = genActualType(op->TypeGet());
+ if (lclvar && ((type == TYP_INT) || (type == TYP_REF) || (type == TYP_BYREF)))
+ {
+ return true;
+ }
+ break;
+ default:
+ break;
+ }
+
+ return false;
+}
+
+/*
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX FlowGraph XX
+XX Inline functions XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+inline bool Compiler::compCanEncodePtrArgCntMax()
+{
+#ifdef JIT32_GCENCODER
+ // DDB 204533:
+ // The GC encoding for fully interruptible methods does not
+ // support more than 1023 pushed arguments, so we have to
+ // use a partially interruptible GC info/encoding.
+ //
+ return (fgPtrArgCntMax < MAX_PTRARG_OFS);
+#else // JIT32_GCENCODER
+ return true;
+#endif
+}
+
+/*****************************************************************************
+ *
+ * Call the given function pointer for all nodes in the tree. The 'visitor'
+ * fn should return one of the following values:
+ *
+ * WALK_ABORT stop walking and return immediately
+ * WALK_CONTINUE continue walking
+ * WALK_SKIP_SUBTREES don't walk any subtrees of the node just visited
+ *
+ * computeStack - true if we want to make stack visible to callback function
+ */
+
+inline Compiler::fgWalkResult Compiler::fgWalkTreePre(
+ GenTreePtr* pTree, fgWalkPreFn* visitor, void* callBackData, bool lclVarsOnly, bool computeStack)
+
+{
+ fgWalkData walkData;
+
+ walkData.compiler = this;
+ walkData.wtprVisitorFn = visitor;
+ walkData.pCallbackData = callBackData;
+ walkData.parent = nullptr;
+ walkData.wtprLclsOnly = lclVarsOnly;
+#ifdef DEBUG
+ walkData.printModified = false;
+#endif
+
+ fgWalkResult result;
+ if (computeStack)
+ {
+ GenTreeStack parentStack(this);
+ walkData.parentStack = &parentStack;
+ result = fgWalkTreePreRec<true>(pTree, &walkData);
+ }
+ else
+ {
+ walkData.parentStack = nullptr;
+ result = fgWalkTreePreRec<false>(pTree, &walkData);
+ }
+
+#ifdef DEBUG
+ if (verbose && walkData.printModified)
+ {
+ gtDispTree(*pTree);
+ }
+#endif
+
+ return result;
+}
+
+/*****************************************************************************
+ *
+ * Same as above, except the tree walk is performed in a depth-first fashion,
+ * The 'visitor' fn should return one of the following values:
+ *
+ * WALK_ABORT stop walking and return immediately
+ * WALK_CONTINUE continue walking
+ *
+ * computeStack - true if we want to make stack visible to callback function
+ */
+
+inline Compiler::fgWalkResult Compiler::fgWalkTreePost(GenTreePtr* pTree,
+ fgWalkPostFn* visitor,
+ void* callBackData,
+ bool computeStack)
+{
+ fgWalkData walkData;
+
+ walkData.compiler = this;
+ walkData.wtpoVisitorFn = visitor;
+ walkData.pCallbackData = callBackData;
+ walkData.parent = nullptr;
+
+ fgWalkResult result;
+ if (computeStack)
+ {
+ GenTreeStack parentStack(this);
+ walkData.parentStack = &parentStack;
+ result = fgWalkTreePostRec<true>(pTree, &walkData);
+ }
+ else
+ {
+ walkData.parentStack = nullptr;
+ result = fgWalkTreePostRec<false>(pTree, &walkData);
+ }
+
+ assert(result == WALK_CONTINUE || result == WALK_ABORT);
+
+ return result;
+}
+
+/*****************************************************************************
+ *
+ * Has this block been added to throw an inlined exception
+ * Returns true if the block was added to throw one of:
+ * range-check exception
+ * argument exception (used by feature SIMD)
+ * argument range-check exception (used by feature SIMD)
+ * divide by zero exception (Not used on X86/X64)
+ * null reference exception (Not currently used)
+ * overflow exception
+ */
+
+inline bool Compiler::fgIsThrowHlpBlk(BasicBlock* block)
+{
+ if (!fgIsCodeAdded())
+ {
+ return false;
+ }
+
+ if (!(block->bbFlags & BBF_INTERNAL) || block->bbJumpKind != BBJ_THROW)
+ {
+ return false;
+ }
+
+ GenTree* call = block->lastNode();
+
+#ifdef DEBUG
+ if (block->IsLIR())
+ {
+ LIR::Range& blockRange = LIR::AsRange(block);
+ for (LIR::Range::ReverseIterator node = blockRange.rbegin(), end = blockRange.rend(); node != end; ++node)
+ {
+ if (node->OperGet() == GT_CALL)
+ {
+ assert(*node == call);
+ assert(node == blockRange.rbegin());
+ break;
+ }
+ }
+ }
+#endif
+
+ if (!call || (call->gtOper != GT_CALL))
+ {
+ return false;
+ }
+
+ if (!((call->gtCall.gtCallMethHnd == eeFindHelper(CORINFO_HELP_RNGCHKFAIL)) ||
+ (call->gtCall.gtCallMethHnd == eeFindHelper(CORINFO_HELP_THROWDIVZERO)) ||
+#if COR_JIT_EE_VERSION > 460
+ (call->gtCall.gtCallMethHnd == eeFindHelper(CORINFO_HELP_THROWNULLREF)) ||
+#endif // COR_JIT_EE_VERSION
+ (call->gtCall.gtCallMethHnd == eeFindHelper(CORINFO_HELP_OVERFLOW))))
+ {
+ return false;
+ }
+
+ // We can get to this point for blocks that we didn't create as throw helper blocks
+ // under stress, with crazy flow graph optimizations. So, walk the fgAddCodeList
+ // for the final determination.
+
+ for (AddCodeDsc* add = fgAddCodeList; add; add = add->acdNext)
+ {
+ if (block == add->acdDstBlk)
+ {
+ return add->acdKind == SCK_RNGCHK_FAIL || add->acdKind == SCK_DIV_BY_ZERO || add->acdKind == SCK_OVERFLOW
+#if COR_JIT_EE_VERSION > 460
+ || add->acdKind == SCK_ARG_EXCPN || add->acdKind == SCK_ARG_RNG_EXCPN
+#endif // COR_JIT_EE_VERSION
+ ;
+ }
+ }
+
+ // We couldn't find it in the fgAddCodeList
+ return false;
+}
+
+/*****************************************************************************
+ *
+ * Return the stackLevel of the inserted block that throws exception
+ * (by calling the EE helper).
+ */
+
+inline unsigned Compiler::fgThrowHlpBlkStkLevel(BasicBlock* block)
+{
+ for (AddCodeDsc* add = fgAddCodeList; add; add = add->acdNext)
+ {
+ if (block == add->acdDstBlk)
+ {
+ // Compute assert cond separately as assert macro cannot have conditional compilation directives.
+ bool cond =
+ (add->acdKind == SCK_RNGCHK_FAIL || add->acdKind == SCK_DIV_BY_ZERO || add->acdKind == SCK_OVERFLOW
+#if COR_JIT_EE_VERSION > 460
+ || add->acdKind == SCK_ARG_EXCPN || add->acdKind == SCK_ARG_RNG_EXCPN
+#endif // COR_JIT_EE_VERSION
+ );
+ assert(cond);
+
+ // TODO: bbTgtStkDepth is DEBUG-only.
+ // Should we use it regularly and avoid this search.
+ assert(block->bbTgtStkDepth == add->acdStkLvl);
+ return add->acdStkLvl;
+ }
+ }
+
+ noway_assert(!"fgThrowHlpBlkStkLevel should only be called if fgIsThrowHlpBlk() is true, but we can't find the "
+ "block in the fgAddCodeList list");
+
+ /* We couldn't find the basic block: it must not have been a throw helper block */
+
+ return 0;
+}
+
+/*
+ Small inline function to change a given block to a throw block.
+
+*/
+inline void Compiler::fgConvertBBToThrowBB(BasicBlock* block)
+{
+ block->bbJumpKind = BBJ_THROW;
+ block->bbSetRunRarely(); // any block with a throw is rare
+}
+
+/*****************************************************************************
+ *
+ * Return true if we've added any new basic blocks.
+ */
+
+inline bool Compiler::fgIsCodeAdded()
+{
+ return fgAddCodeModf;
+}
+
+/*****************************************************************************
+ Is the offset too big?
+*/
+inline bool Compiler::fgIsBigOffset(size_t offset)
+{
+ return (offset > compMaxUncheckedOffsetForNullObject);
+}
+
+/***********************************************************************************
+*
+* Returns true if back-end will do other than integer division which currently occurs only
+* if "divisor" is a positive integer constant and a power of 2 other than 1 and INT_MIN
+*/
+
+inline bool Compiler::fgIsSignedDivOptimizable(GenTreePtr divisor)
+{
+ if (!opts.MinOpts() && divisor->IsCnsIntOrI())
+ {
+ ssize_t ival = divisor->gtIntConCommon.IconValue();
+
+ /* Is the divisor a power of 2 (excluding INT_MIN) ?.
+ The intent of the third condition below is to exclude INT_MIN on a 64-bit platform
+ and during codegen we need to encode ival-1 within 32 bits. If ival were INT_MIN
+ then ival-1 would cause underflow.
+
+ Note that we could put #ifdef around the third check so that it is applied only on
+ 64-bit platforms but the below is a more generic way to express it as it is a no-op
+ on 32-bit platforms.
+ */
+ return (ival > 0 && genMaxOneBit(ival) && ((ssize_t)(int)ival == ival));
+ }
+
+ return false;
+}
+
+/************************************************************************************
+*
+* Returns true if back-end will do other than integer division which currently occurs
+* if "divisor" is an unsigned integer constant and a power of 2 other than 1 and zero.
+*/
+
+inline bool Compiler::fgIsUnsignedDivOptimizable(GenTreePtr divisor)
+{
+ if (!opts.MinOpts() && divisor->IsCnsIntOrI())
+ {
+ size_t ival = divisor->gtIntCon.gtIconVal;
+
+ /* Is the divisor a power of 2 ? */
+ return ival && genMaxOneBit(ival);
+ }
+
+ return false;
+}
+
+/*****************************************************************************
+*
+* Returns true if back-end will do other than integer division which currently occurs
+* if "divisor" is a positive integer constant and a power of 2 other than zero
+*/
+
+inline bool Compiler::fgIsSignedModOptimizable(GenTreePtr divisor)
+{
+ if (!opts.MinOpts() && divisor->IsCnsIntOrI())
+ {
+ size_t ival = divisor->gtIntCon.gtIconVal;
+
+ /* Is the divisor a power of 2 ? */
+ return ssize_t(ival) > 0 && genMaxOneBit(ival);
+ }
+
+ return false;
+}
+
+/*****************************************************************************
+*
+* Returns true if back-end will do other than integer division which currently occurs
+* if "divisor" is a positive integer constant and a power of 2 other than zero
+*/
+
+inline bool Compiler::fgIsUnsignedModOptimizable(GenTreePtr divisor)
+{
+ if (!opts.MinOpts() && divisor->IsCnsIntOrI())
+ {
+ size_t ival = divisor->gtIntCon.gtIconVal;
+
+ /* Is the divisor a power of 2 ? */
+ return ival != 0 && ival == (unsigned)genFindLowestBit(ival);
+ }
+
+ return false;
+}
+
+/*
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX TempsInfo XX
+XX Inline functions XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+/*****************************************************************************/
+
+/* static */ inline unsigned Compiler::tmpSlot(unsigned size)
+{
+ noway_assert(size >= sizeof(int));
+ noway_assert(size <= TEMP_MAX_SIZE);
+ assert((size % sizeof(int)) == 0);
+
+ assert(size < UINT32_MAX);
+ return size / sizeof(int) - 1;
+}
+
+/*****************************************************************************
+ *
+ * Finish allocating temps - should be called each time after a pass is made
+ * over a function body.
+ */
+
+inline void Compiler::tmpEnd()
+{
+#ifdef DEBUG
+ if (verbose && (tmpCount > 0))
+ {
+ printf("%d tmps used\n", tmpCount);
+ }
+#endif // DEBUG
+}
+
+/*****************************************************************************
+ *
+ * Shuts down the temp-tracking code. Should be called once per function
+ * compiled.
+ */
+
+inline void Compiler::tmpDone()
+{
+#ifdef DEBUG
+ unsigned count;
+ TempDsc* temp;
+
+ assert(tmpAllFree());
+ for (temp = tmpListBeg(), count = temp ? 1 : 0; temp; temp = tmpListNxt(temp), count += temp ? 1 : 0)
+ {
+ assert(temp->tdLegalOffset());
+ }
+
+ // Make sure that all the temps were released
+ assert(count == tmpCount);
+ assert(tmpGetCount == 0);
+#endif // DEBUG
+}
+
+#ifdef DEBUG
+inline bool Compiler::shouldUseVerboseTrees()
+{
+ return (JitConfig.JitDumpVerboseTrees() == 1);
+}
+
+inline bool Compiler::shouldUseVerboseSsa()
+{
+ return (JitConfig.JitDumpVerboseSsa() == 1);
+}
+
+//------------------------------------------------------------------------
+// shouldDumpASCIITrees: Should we use only ASCII characters for tree dumps?
+//
+// Notes:
+// This is set to default to 1 in clrConfigValues.h
+
+inline bool Compiler::shouldDumpASCIITrees()
+{
+ return (JitConfig.JitDumpASCII() == 1);
+}
+
+/*****************************************************************************
+ * Should we enable JitStress mode?
+ * 0: No stress
+ * !=2: Vary stress. Performance will be slightly/moderately degraded
+ * 2: Check-all stress. Performance will be REALLY horrible
+ */
+
+inline DWORD getJitStressLevel()
+{
+ return JitConfig.JitStress();
+}
+
+/*****************************************************************************
+ * Should we do the strict check for non-virtual call to the virtual method?
+ */
+
+inline DWORD StrictCheckForNonVirtualCallToVirtualMethod()
+{
+ return JitConfig.JitStrictCheckForNonVirtualCallToVirtualMethod() == 1;
+}
+
+#endif // DEBUG
+
+/*****************************************************************************/
+/* Map a register argument number ("RegArgNum") to a register number ("RegNum").
+ * A RegArgNum is in this range:
+ * [0, MAX_REG_ARG) -- for integer registers
+ * [0, MAX_FLOAT_REG_ARG) -- for floating point registers
+ * Note that RegArgNum's are overlapping for integer and floating-point registers,
+ * while RegNum's are not (for ARM anyway, though for x86, it might be different).
+ * If we have a fixed return buffer register and are given it's index
+ * we return the fixed return buffer register
+ */
+
+inline regNumber genMapIntRegArgNumToRegNum(unsigned argNum)
+{
+ if (hasFixedRetBuffReg() && (argNum == theFixedRetBuffArgNum()))
+ {
+ return theFixedRetBuffReg();
+ }
+
+ assert(argNum < ArrLen(intArgRegs));
+
+ return intArgRegs[argNum];
+}
+
+inline regNumber genMapFloatRegArgNumToRegNum(unsigned argNum)
+{
+#ifndef _TARGET_X86_
+ assert(argNum < ArrLen(fltArgRegs));
+
+ return fltArgRegs[argNum];
+#else
+ assert(!"no x86 float arg regs\n");
+ return REG_NA;
+#endif
+}
+
+__forceinline regNumber genMapRegArgNumToRegNum(unsigned argNum, var_types type)
+{
+ if (varTypeIsFloating(type))
+ {
+ return genMapFloatRegArgNumToRegNum(argNum);
+ }
+ else
+ {
+ return genMapIntRegArgNumToRegNum(argNum);
+ }
+}
+
+/*****************************************************************************/
+/* Map a register argument number ("RegArgNum") to a register mask of the associated register.
+ * Note that for floating-pointer registers, only the low register for a register pair
+ * (for a double on ARM) is returned.
+ */
+
+inline regMaskTP genMapIntRegArgNumToRegMask(unsigned argNum)
+{
+ assert(argNum < ArrLen(intArgMasks));
+
+ return intArgMasks[argNum];
+}
+
+inline regMaskTP genMapFloatRegArgNumToRegMask(unsigned argNum)
+{
+#ifndef _TARGET_X86_
+ assert(argNum < ArrLen(fltArgMasks));
+
+ return fltArgMasks[argNum];
+#else
+ assert(!"no x86 float arg regs\n");
+ return RBM_NONE;
+#endif
+}
+
+__forceinline regMaskTP genMapArgNumToRegMask(unsigned argNum, var_types type)
+{
+ regMaskTP result;
+ if (varTypeIsFloating(type))
+ {
+ result = genMapFloatRegArgNumToRegMask(argNum);
+#ifdef _TARGET_ARM_
+ if (type == TYP_DOUBLE)
+ {
+ assert((result & RBM_DBL_REGS) != 0);
+ result |= (result << 1);
+ }
+#endif
+ }
+ else
+ {
+ result = genMapIntRegArgNumToRegMask(argNum);
+ }
+ return result;
+}
+
+/*****************************************************************************/
+/* Map a register number ("RegNum") to a register argument number ("RegArgNum")
+ * If we have a fixed return buffer register we return theFixedRetBuffArgNum
+ */
+
+inline unsigned genMapIntRegNumToRegArgNum(regNumber regNum)
+{
+ assert(genRegMask(regNum) & fullIntArgRegMask());
+
+ switch (regNum)
+ {
+ case REG_ARG_0:
+ return 0;
+#if MAX_REG_ARG >= 2
+ case REG_ARG_1:
+ return 1;
+#if MAX_REG_ARG >= 3
+ case REG_ARG_2:
+ return 2;
+#if MAX_REG_ARG >= 4
+ case REG_ARG_3:
+ return 3;
+#if MAX_REG_ARG >= 5
+ case REG_ARG_4:
+ return 4;
+#if MAX_REG_ARG >= 6
+ case REG_ARG_5:
+ return 5;
+#if MAX_REG_ARG >= 7
+ case REG_ARG_6:
+ return 6;
+#if MAX_REG_ARG >= 8
+ case REG_ARG_7:
+ return 7;
+#endif
+#endif
+#endif
+#endif
+#endif
+#endif
+#endif
+ default:
+ // Check for the Arm64 fixed return buffer argument register
+ if (hasFixedRetBuffReg() && (regNum == theFixedRetBuffReg()))
+ {
+ return theFixedRetBuffArgNum();
+ }
+ else
+ {
+ assert(!"invalid register arg register");
+ return BAD_VAR_NUM;
+ }
+ }
+}
+
+inline unsigned genMapFloatRegNumToRegArgNum(regNumber regNum)
+{
+ assert(genRegMask(regNum) & RBM_FLTARG_REGS);
+
+#ifdef _TARGET_ARM_
+ return regNum - REG_F0;
+#elif defined(_TARGET_ARM64_)
+ return regNum - REG_V0;
+#elif defined(UNIX_AMD64_ABI)
+ return regNum - REG_FLTARG_0;
+#else
+
+#if MAX_FLOAT_REG_ARG >= 1
+ switch (regNum)
+ {
+ case REG_FLTARG_0:
+ return 0;
+#if MAX_REG_ARG >= 2
+ case REG_FLTARG_1:
+ return 1;
+#if MAX_REG_ARG >= 3
+ case REG_FLTARG_2:
+ return 2;
+#if MAX_REG_ARG >= 4
+ case REG_FLTARG_3:
+ return 3;
+#if MAX_REG_ARG >= 5
+ case REG_FLTARG_4:
+ return 4;
+#endif
+#endif
+#endif
+#endif
+ default:
+ assert(!"invalid register arg register");
+ return BAD_VAR_NUM;
+ }
+#else
+ assert(!"flt reg args not allowed");
+ return BAD_VAR_NUM;
+#endif
+#endif // !arm
+}
+
+inline unsigned genMapRegNumToRegArgNum(regNumber regNum, var_types type)
+{
+ if (varTypeIsFloating(type))
+ {
+ return genMapFloatRegNumToRegArgNum(regNum);
+ }
+ else
+ {
+ return genMapIntRegNumToRegArgNum(regNum);
+ }
+}
+
+/*****************************************************************************/
+/* Return a register mask with the first 'numRegs' argument registers set.
+ */
+
+inline regMaskTP genIntAllRegArgMask(unsigned numRegs)
+{
+ assert(numRegs <= MAX_REG_ARG);
+
+ regMaskTP result = RBM_NONE;
+ for (unsigned i = 0; i < numRegs; i++)
+ {
+ result |= intArgMasks[i];
+ }
+ return result;
+}
+
+#if !FEATURE_STACK_FP_X87
+
+inline regMaskTP genFltAllRegArgMask(unsigned numRegs)
+{
+ assert(numRegs <= MAX_FLOAT_REG_ARG);
+
+ regMaskTP result = RBM_NONE;
+ for (unsigned i = 0; i < numRegs; i++)
+ {
+ result |= fltArgMasks[i];
+ }
+ return result;
+}
+
+#endif // !FEATURE_STACK_FP_X87
+
+/*
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX Liveness XX
+XX Inline functions XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+/*****************************************************************************
+ *
+ * Update the current set of live variables based on the life set recorded
+ * in the given expression tree node.
+ */
+
+template <bool ForCodeGen>
+inline void Compiler::compUpdateLife(GenTreePtr tree)
+{
+ // TODO-Cleanup: We shouldn't really be calling this more than once
+ if (tree == compCurLifeTree)
+ {
+ return;
+ }
+
+ if (!tree->OperIsNonPhiLocal() && fgIsIndirOfAddrOfLocal(tree) == nullptr)
+ {
+ return;
+ }
+
+ compUpdateLifeVar<ForCodeGen>(tree);
+}
+
+template <bool ForCodeGen>
+inline void Compiler::compUpdateLife(VARSET_VALARG_TP newLife)
+{
+ if (!VarSetOps::Equal(this, compCurLife, newLife))
+ {
+ compChangeLife<ForCodeGen>(newLife DEBUGARG(nullptr));
+ }
+#ifdef DEBUG
+ else
+ {
+ if (verbose)
+ {
+ printf("Liveness not changing: %s ", VarSetOps::ToString(this, compCurLife));
+ dumpConvertedVarSet(this, compCurLife);
+ printf("\n");
+ }
+ }
+#endif // DEBUG
+}
+
+/*****************************************************************************
+ *
+ * We stash cookies in basic blocks for the code emitter; this call retrieves
+ * the cookie associated with the given basic block.
+ */
+
+inline void* emitCodeGetCookie(BasicBlock* block)
+{
+ assert(block);
+ return block->bbEmitCookie;
+}
+
+/*
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX Optimizer XX
+XX Inline functions XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#if LOCAL_ASSERTION_PROP
+
+/*****************************************************************************
+ *
+ * The following resets the value assignment table
+ * used only during local assertion prop
+ */
+
+inline void Compiler::optAssertionReset(AssertionIndex limit)
+{
+ PREFAST_ASSUME(optAssertionCount <= optMaxAssertionCount);
+
+ while (optAssertionCount > limit)
+ {
+ AssertionIndex index = optAssertionCount;
+ AssertionDsc* curAssertion = optGetAssertion(index);
+ optAssertionCount--;
+ unsigned lclNum = curAssertion->op1.lcl.lclNum;
+ assert(lclNum < lvaTableCnt);
+ BitVecOps::RemoveElemD(apTraits, GetAssertionDep(lclNum), index - 1);
+
+ //
+ // Find the Copy assertions
+ //
+ if ((curAssertion->assertionKind == OAK_EQUAL) && (curAssertion->op1.kind == O1K_LCLVAR) &&
+ (curAssertion->op2.kind == O2K_LCLVAR_COPY))
+ {
+ //
+ // op2.lcl.lclNum no longer depends upon this assertion
+ //
+ lclNum = curAssertion->op2.lcl.lclNum;
+ BitVecOps::RemoveElemD(apTraits, GetAssertionDep(lclNum), index - 1);
+ }
+ }
+ while (optAssertionCount < limit)
+ {
+ AssertionIndex index = ++optAssertionCount;
+ AssertionDsc* curAssertion = optGetAssertion(index);
+ unsigned lclNum = curAssertion->op1.lcl.lclNum;
+ BitVecOps::AddElemD(apTraits, GetAssertionDep(lclNum), index - 1);
+
+ //
+ // Check for Copy assertions
+ //
+ if ((curAssertion->assertionKind == OAK_EQUAL) && (curAssertion->op1.kind == O1K_LCLVAR) &&
+ (curAssertion->op2.kind == O2K_LCLVAR_COPY))
+ {
+ //
+ // op2.lcl.lclNum now depends upon this assertion
+ //
+ lclNum = curAssertion->op2.lcl.lclNum;
+ BitVecOps::AddElemD(apTraits, GetAssertionDep(lclNum), index - 1);
+ }
+ }
+}
+
+/*****************************************************************************
+ *
+ * The following removes the i-th entry in the value assignment table
+ * used only during local assertion prop
+ */
+
+inline void Compiler::optAssertionRemove(AssertionIndex index)
+{
+ assert(index > 0);
+ assert(index <= optAssertionCount);
+ PREFAST_ASSUME(optAssertionCount <= optMaxAssertionCount);
+
+ AssertionDsc* curAssertion = optGetAssertion(index);
+
+ // Two cases to consider if (index == optAssertionCount) then the last
+ // entry in the table is to be removed and that happens automatically when
+ // optAssertionCount is decremented and we can just clear the optAssertionDep bits
+ // The other case is when index < optAssertionCount and here we overwrite the
+ // index-th entry in the table with the data found at the end of the table
+ // Since we are reordering the rable the optAssertionDep bits need to be recreated
+ // using optAssertionReset(0) and optAssertionReset(newAssertionCount) will
+ // correctly update the optAssertionDep bits
+ //
+ if (index == optAssertionCount)
+ {
+ unsigned lclNum = curAssertion->op1.lcl.lclNum;
+ BitVecOps::RemoveElemD(apTraits, GetAssertionDep(lclNum), index - 1);
+
+ //
+ // Check for Copy assertions
+ //
+ if ((curAssertion->assertionKind == OAK_EQUAL) && (curAssertion->op1.kind == O1K_LCLVAR) &&
+ (curAssertion->op2.kind == O2K_LCLVAR_COPY))
+ {
+ //
+ // op2.lcl.lclNum no longer depends upon this assertion
+ //
+ lclNum = curAssertion->op2.lcl.lclNum;
+ BitVecOps::RemoveElemD(apTraits, GetAssertionDep(lclNum), index - 1);
+ }
+
+ optAssertionCount--;
+ }
+ else
+ {
+ AssertionDsc* lastAssertion = optGetAssertion(optAssertionCount);
+ AssertionIndex newAssertionCount = optAssertionCount - 1;
+
+ optAssertionReset(0); // This make optAssertionCount equal 0
+
+ memcpy(curAssertion, // the entry to be removed
+ lastAssertion, // last entry in the table
+ sizeof(AssertionDsc));
+
+ optAssertionReset(newAssertionCount);
+ }
+}
+#endif // LOCAL_ASSERTION_PROP
+
+inline void Compiler::LoopDsc::AddModifiedField(Compiler* comp, CORINFO_FIELD_HANDLE fldHnd)
+{
+ if (lpFieldsModified == nullptr)
+ {
+ lpFieldsModified =
+ new (comp->getAllocatorLoopHoist()) Compiler::LoopDsc::FieldHandleSet(comp->getAllocatorLoopHoist());
+ }
+ lpFieldsModified->Set(fldHnd, true);
+}
+
+inline void Compiler::LoopDsc::AddModifiedElemType(Compiler* comp, CORINFO_CLASS_HANDLE structHnd)
+{
+ if (lpArrayElemTypesModified == nullptr)
+ {
+ lpArrayElemTypesModified =
+ new (comp->getAllocatorLoopHoist()) Compiler::LoopDsc::ClassHandleSet(comp->getAllocatorLoopHoist());
+ }
+ lpArrayElemTypesModified->Set(structHnd, true);
+}
+
+inline void Compiler::LoopDsc::VERIFY_lpIterTree()
+{
+#ifdef DEBUG
+ assert(lpFlags & LPFLG_ITER);
+
+ // iterTree should be "lcl <op>= const"
+
+ assert(lpIterTree);
+
+ assert(lpIterTree->OperKind() & GTK_ASGOP); // +=, -=, etc or = +, = -, etc
+
+ if (lpIterTree->OperGet() == GT_ASG)
+ {
+ GenTreePtr lhs = lpIterTree->gtOp.gtOp1;
+ GenTreePtr rhs = lpIterTree->gtOp.gtOp2;
+ assert(lhs->OperGet() == GT_LCL_VAR);
+
+ switch (rhs->gtOper)
+ {
+ case GT_ADD:
+ case GT_SUB:
+ case GT_MUL:
+ case GT_RSH:
+ case GT_LSH:
+ break;
+ default:
+ assert(!"Unknown operator for loop increment");
+ }
+ assert(rhs->gtOp.gtOp1->OperGet() == GT_LCL_VAR);
+ assert(rhs->gtOp.gtOp1->AsLclVarCommon()->GetLclNum() == lhs->AsLclVarCommon()->GetLclNum());
+ assert(rhs->gtOp.gtOp2->OperGet() == GT_CNS_INT);
+ }
+ else
+ {
+ assert(lpIterTree->gtOp.gtOp1->OperGet() == GT_LCL_VAR);
+ assert(lpIterTree->gtOp.gtOp2->OperGet() == GT_CNS_INT);
+ }
+#endif
+}
+
+//-----------------------------------------------------------------------------
+
+inline unsigned Compiler::LoopDsc::lpIterVar()
+{
+ VERIFY_lpIterTree();
+ return lpIterTree->gtOp.gtOp1->gtLclVarCommon.gtLclNum;
+}
+
+//-----------------------------------------------------------------------------
+
+inline int Compiler::LoopDsc::lpIterConst()
+{
+ VERIFY_lpIterTree();
+ if (lpIterTree->OperGet() == GT_ASG)
+ {
+ GenTreePtr rhs = lpIterTree->gtOp.gtOp2;
+ return (int)rhs->gtOp.gtOp2->gtIntCon.gtIconVal;
+ }
+ else
+ {
+ return (int)lpIterTree->gtOp.gtOp2->gtIntCon.gtIconVal;
+ }
+}
+
+//-----------------------------------------------------------------------------
+
+inline genTreeOps Compiler::LoopDsc::lpIterOper()
+{
+ VERIFY_lpIterTree();
+ if (lpIterTree->OperGet() == GT_ASG)
+ {
+ GenTreePtr rhs = lpIterTree->gtOp.gtOp2;
+ return rhs->OperGet();
+ }
+ else
+ {
+ return lpIterTree->OperGet();
+ }
+}
+
+inline var_types Compiler::LoopDsc::lpIterOperType()
+{
+ VERIFY_lpIterTree();
+
+ var_types type = lpIterTree->TypeGet();
+ assert(genActualType(type) == TYP_INT);
+
+ if ((lpIterTree->gtFlags & GTF_UNSIGNED) && type == TYP_INT)
+ {
+ type = TYP_UINT;
+ }
+
+ return type;
+}
+
+inline void Compiler::LoopDsc::VERIFY_lpTestTree()
+{
+#ifdef DEBUG
+ assert(lpFlags & LPFLG_ITER);
+ assert(lpTestTree);
+
+ genTreeOps oper = lpTestTree->OperGet();
+ assert(GenTree::OperIsCompare(oper));
+
+ GenTreePtr iterator = nullptr;
+ GenTreePtr limit = nullptr;
+ if ((lpTestTree->gtOp.gtOp2->gtOper == GT_LCL_VAR) && (lpTestTree->gtOp.gtOp2->gtFlags & GTF_VAR_ITERATOR) != 0)
+ {
+ iterator = lpTestTree->gtOp.gtOp2;
+ limit = lpTestTree->gtOp.gtOp1;
+ }
+ else if ((lpTestTree->gtOp.gtOp1->gtOper == GT_LCL_VAR) &&
+ (lpTestTree->gtOp.gtOp1->gtFlags & GTF_VAR_ITERATOR) != 0)
+ {
+ iterator = lpTestTree->gtOp.gtOp1;
+ limit = lpTestTree->gtOp.gtOp2;
+ }
+ else
+ {
+ // one of the nodes has to be the iterator
+ assert(false);
+ }
+
+ if (lpFlags & LPFLG_CONST_LIMIT)
+ {
+ assert(limit->OperIsConst());
+ }
+ if (lpFlags & LPFLG_VAR_LIMIT)
+ {
+ assert(limit->OperGet() == GT_LCL_VAR);
+ }
+ if (lpFlags & LPFLG_ARRLEN_LIMIT)
+ {
+ assert(limit->OperGet() == GT_ARR_LENGTH);
+ }
+#endif
+}
+
+//-----------------------------------------------------------------------------
+
+inline bool Compiler::LoopDsc::lpIsReversed()
+{
+ VERIFY_lpTestTree();
+ return ((lpTestTree->gtOp.gtOp2->gtOper == GT_LCL_VAR) &&
+ (lpTestTree->gtOp.gtOp2->gtFlags & GTF_VAR_ITERATOR) != 0);
+}
+
+//-----------------------------------------------------------------------------
+
+inline genTreeOps Compiler::LoopDsc::lpTestOper()
+{
+ VERIFY_lpTestTree();
+ genTreeOps op = lpTestTree->OperGet();
+ return lpIsReversed() ? GenTree::SwapRelop(op) : op;
+}
+
+//-----------------------------------------------------------------------------
+
+inline GenTreePtr Compiler::LoopDsc::lpIterator()
+{
+ VERIFY_lpTestTree();
+
+ return lpIsReversed() ? lpTestTree->gtOp.gtOp2 : lpTestTree->gtOp.gtOp1;
+}
+
+//-----------------------------------------------------------------------------
+
+inline GenTreePtr Compiler::LoopDsc::lpLimit()
+{
+ VERIFY_lpTestTree();
+
+ return lpIsReversed() ? lpTestTree->gtOp.gtOp1 : lpTestTree->gtOp.gtOp2;
+}
+
+//-----------------------------------------------------------------------------
+
+inline int Compiler::LoopDsc::lpConstLimit()
+{
+ VERIFY_lpTestTree();
+ assert(lpFlags & LPFLG_CONST_LIMIT);
+
+ GenTreePtr limit = lpLimit();
+ assert(limit->OperIsConst());
+ return (int)limit->gtIntCon.gtIconVal;
+}
+
+//-----------------------------------------------------------------------------
+
+inline unsigned Compiler::LoopDsc::lpVarLimit()
+{
+ VERIFY_lpTestTree();
+ assert(lpFlags & LPFLG_VAR_LIMIT);
+
+ GenTreePtr limit = lpLimit();
+ assert(limit->OperGet() == GT_LCL_VAR);
+ return limit->gtLclVarCommon.gtLclNum;
+}
+
+//-----------------------------------------------------------------------------
+
+inline bool Compiler::LoopDsc::lpArrLenLimit(Compiler* comp, ArrIndex* index)
+{
+ VERIFY_lpTestTree();
+ assert(lpFlags & LPFLG_ARRLEN_LIMIT);
+
+ GenTreePtr limit = lpLimit();
+ assert(limit->OperGet() == GT_ARR_LENGTH);
+
+ // Check if we have a.length or a[i][j].length
+ if (limit->gtArrLen.ArrRef()->gtOper == GT_LCL_VAR)
+ {
+ index->arrLcl = limit->gtArrLen.ArrRef()->gtLclVarCommon.gtLclNum;
+ index->rank = 0;
+ return true;
+ }
+ // We have a[i].length, extract a[i] pattern.
+ else if (limit->gtArrLen.ArrRef()->gtOper == GT_COMMA)
+ {
+ return comp->optReconstructArrIndex(limit->gtArrLen.ArrRef(), index, BAD_VAR_NUM);
+ }
+ return false;
+}
+
+/*****************************************************************************
+ * Is "var" assigned in the loop "lnum" ?
+ */
+
+inline bool Compiler::optIsVarAssgLoop(unsigned lnum, unsigned var)
+{
+ assert(lnum < optLoopCount);
+ if (var < lclMAX_ALLSET_TRACKED)
+ {
+ ALLVARSET_TP ALLVARSET_INIT_NOCOPY(vs, AllVarSetOps::MakeSingleton(this, var));
+ return optIsSetAssgLoop(lnum, vs) != 0;
+ }
+ else
+ {
+ return optIsVarAssigned(optLoopTable[lnum].lpHead->bbNext, optLoopTable[lnum].lpBottom, nullptr, var);
+ }
+}
+
+/*****************************************************************************
+ * If the tree is a tracked local variable, return its LclVarDsc ptr.
+ */
+
+inline LclVarDsc* Compiler::optIsTrackedLocal(GenTreePtr tree)
+{
+ LclVarDsc* varDsc;
+ unsigned lclNum;
+
+ if (tree->gtOper != GT_LCL_VAR)
+ {
+ return nullptr;
+ }
+
+ lclNum = tree->gtLclVarCommon.gtLclNum;
+
+ assert(lclNum < lvaCount);
+ varDsc = lvaTable + lclNum;
+
+ /* if variable not tracked, return NULL */
+ if (!varDsc->lvTracked)
+ {
+ return nullptr;
+ }
+
+ return varDsc;
+}
+
+/*
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX Optimization activation rules XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+// are we compiling for fast code, or are we compiling for blended code and
+// inside a loop?
+// We return true for BLENDED_CODE if the Block executes more than BB_LOOP_WEIGHT/2
+inline bool Compiler::optFastCodeOrBlendedLoop(BasicBlock::weight_t bbWeight)
+{
+ return (compCodeOpt() == FAST_CODE) ||
+ ((compCodeOpt() == BLENDED_CODE) && (bbWeight > (BB_LOOP_WEIGHT / 2 * BB_UNITY_WEIGHT)));
+}
+
+// are we running on a Intel Pentium 4?
+inline bool Compiler::optPentium4(void)
+{
+ return (info.genCPU == CPU_X86_PENTIUM_4);
+}
+
+// should we use add/sub instead of inc/dec? (faster on P4, but increases size)
+inline bool Compiler::optAvoidIncDec(BasicBlock::weight_t bbWeight)
+{
+ return optPentium4() && optFastCodeOrBlendedLoop(bbWeight);
+}
+
+// should we try to replace integer multiplication with lea/add/shift sequences?
+inline bool Compiler::optAvoidIntMult(void)
+{
+ return (compCodeOpt() != SMALL_CODE);
+}
+
+/*
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX EEInterface XX
+XX Inline functions XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+extern var_types JITtype2varType(CorInfoType type);
+
+#include "ee_il_dll.hpp"
+
+inline CORINFO_METHOD_HANDLE Compiler::eeFindHelper(unsigned helper)
+{
+ assert(helper < CORINFO_HELP_COUNT);
+
+ /* Helpers are marked by the fact that they are odd numbers
+ * force this to be an odd number (will shift it back to extract) */
+
+ return ((CORINFO_METHOD_HANDLE)(size_t)((helper << 2) + 1));
+}
+
+inline CorInfoHelpFunc Compiler::eeGetHelperNum(CORINFO_METHOD_HANDLE method)
+{
+ // Helpers are marked by the fact that they are odd numbers
+ if (!(((size_t)method) & 1))
+ {
+ return (CORINFO_HELP_UNDEF);
+ }
+ return ((CorInfoHelpFunc)(((size_t)method) >> 2));
+}
+
+inline Compiler::fgWalkResult Compiler::CountSharedStaticHelper(GenTreePtr* pTree, fgWalkData* data)
+{
+ if (Compiler::IsSharedStaticHelper(*pTree))
+ {
+ int* pCount = (int*)data->pCallbackData;
+ (*pCount)++;
+ }
+
+ return WALK_CONTINUE;
+}
+
+// TODO-Cleanup: Replace calls to IsSharedStaticHelper with new HelperCallProperties
+//
+
+inline bool Compiler::IsSharedStaticHelper(GenTreePtr tree)
+{
+ if (tree->gtOper != GT_CALL || tree->gtCall.gtCallType != CT_HELPER)
+ {
+ return false;
+ }
+
+ CorInfoHelpFunc helper = eeGetHelperNum(tree->gtCall.gtCallMethHnd);
+
+ bool result1 =
+ // More helpers being added to IsSharedStaticHelper (that have similar behaviors but are not true
+ // ShareStaticHelperts)
+ helper == CORINFO_HELP_STRCNS || helper == CORINFO_HELP_BOX ||
+
+ // helpers being added to IsSharedStaticHelper
+ helper == CORINFO_HELP_GETSTATICFIELDADDR_CONTEXT || helper == CORINFO_HELP_GETSTATICFIELDADDR_TLS ||
+ helper == CORINFO_HELP_GETGENERICS_GCSTATIC_BASE || helper == CORINFO_HELP_GETGENERICS_NONGCSTATIC_BASE ||
+ helper == CORINFO_HELP_GETGENERICS_GCTHREADSTATIC_BASE ||
+ helper == CORINFO_HELP_GETGENERICS_NONGCTHREADSTATIC_BASE ||
+
+ helper == CORINFO_HELP_GETSHARED_GCSTATIC_BASE || helper == CORINFO_HELP_GETSHARED_NONGCSTATIC_BASE ||
+ helper == CORINFO_HELP_GETSHARED_GCSTATIC_BASE_NOCTOR ||
+ helper == CORINFO_HELP_GETSHARED_NONGCSTATIC_BASE_NOCTOR ||
+ helper == CORINFO_HELP_GETSHARED_GCSTATIC_BASE_DYNAMICCLASS ||
+ helper == CORINFO_HELP_GETSHARED_NONGCSTATIC_BASE_DYNAMICCLASS ||
+ helper == CORINFO_HELP_GETSHARED_GCTHREADSTATIC_BASE ||
+ helper == CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE ||
+ helper == CORINFO_HELP_GETSHARED_GCTHREADSTATIC_BASE_NOCTOR ||
+ helper == CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE_NOCTOR ||
+ helper == CORINFO_HELP_GETSHARED_GCTHREADSTATIC_BASE_DYNAMICCLASS ||
+ helper == CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE_DYNAMICCLASS ||
+#ifdef FEATURE_READYTORUN_COMPILER
+ helper == CORINFO_HELP_READYTORUN_STATIC_BASE ||
+#endif
+ helper == CORINFO_HELP_CLASSINIT_SHARED_DYNAMICCLASS;
+#if 0
+ // See above TODO-Cleanup
+ bool result2 = s_helperCallProperties.IsPure(helper) && s_helperCallProperties.NonNullReturn(helper);
+ assert (result1 == result2);
+#endif
+ return result1;
+}
+
+inline bool Compiler::IsTreeAlwaysHoistable(GenTreePtr tree)
+{
+ if (IsSharedStaticHelper(tree))
+ {
+ return (GTF_CALL_HOISTABLE & tree->gtFlags) ? true : false;
+ }
+ else
+ {
+ return false;
+ }
+}
+
+//
+// Note that we want to have two special FIELD_HANDLES that will both
+// be considered non-Data Offset handles
+//
+// The special values that we use are FLD_GLOBAL_DS and FLD_GLOBAL_FS
+//
+
+inline bool jitStaticFldIsGlobAddr(CORINFO_FIELD_HANDLE fldHnd)
+{
+ return (fldHnd == FLD_GLOBAL_DS || fldHnd == FLD_GLOBAL_FS);
+}
+
+#if defined(DEBUG) || defined(FEATURE_JIT_METHOD_PERF) || defined(FEATURE_SIMD)
+
+inline bool Compiler::eeIsNativeMethod(CORINFO_METHOD_HANDLE method)
+{
+ return ((((size_t)method) & 0x2) == 0x2);
+}
+
+inline CORINFO_METHOD_HANDLE Compiler::eeGetMethodHandleForNative(CORINFO_METHOD_HANDLE method)
+{
+ assert((((size_t)method) & 0x3) == 0x2);
+ return (CORINFO_METHOD_HANDLE)(((size_t)method) & ~0x3);
+}
+#endif
+
+inline CORINFO_METHOD_HANDLE Compiler::eeMarkNativeTarget(CORINFO_METHOD_HANDLE method)
+{
+ assert((((size_t)method) & 0x3) == 0);
+ if (method == nullptr)
+ {
+ return method;
+ }
+ else
+ {
+ return (CORINFO_METHOD_HANDLE)(((size_t)method) | 0x2);
+ }
+}
+
+/*
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX Compiler XX
+XX Inline functions XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#ifndef DEBUG
+inline bool Compiler::compStressCompile(compStressArea stressArea, unsigned weightPercentage)
+{
+ return false;
+}
+#endif
+
+inline ArenaAllocator* Compiler::compGetAllocator()
+{
+ return compAllocator;
+}
+
+/*****************************************************************************
+ *
+ * Allocate memory from the no-release allocator. All such memory will be
+ * freed up simulataneously at the end of the procedure
+ */
+
+#ifndef DEBUG
+
+inline void* Compiler::compGetMem(size_t sz, CompMemKind cmk)
+{
+ assert(sz);
+
+#if MEASURE_MEM_ALLOC
+ genMemStats.AddAlloc(sz, cmk);
+#endif
+
+ return compAllocator->allocateMemory(sz);
+}
+
+#endif
+
+/*****************************************************************************
+ *
+ * A common memory allocation for arrays of structures involves the
+ * multiplication of the number of elements with the size of each element.
+ * If this computation overflows, then the memory allocation might succeed,
+ * but not allocate sufficient memory for all the elements. This can cause
+ * us to overwrite the allocation, and AV or worse, corrupt memory.
+ *
+ * This method checks for overflow, and succeeds only when it detects
+ * that there's no overflow. It should be cheap, because when inlined with
+ * a constant elemSize, the division should be done in compile time, and so
+ * at run time we simply have a check of numElem against some number (this
+ * is why we __forceinline).
+ */
+
+#define MAX_MEMORY_PER_ALLOCATION (512 * 1024 * 1024)
+
+__forceinline void* Compiler::compGetMemArray(size_t numElem, size_t elemSize, CompMemKind cmk)
+{
+ if (numElem > (MAX_MEMORY_PER_ALLOCATION / elemSize))
+ {
+ NOMEM();
+ }
+
+ return compGetMem(numElem * elemSize, cmk);
+}
+
+__forceinline void* Compiler::compGetMemArrayA(size_t numElem, size_t elemSize, CompMemKind cmk)
+{
+ if (numElem > (MAX_MEMORY_PER_ALLOCATION / elemSize))
+ {
+ NOMEM();
+ }
+
+ return compGetMemA(numElem * elemSize, cmk);
+}
+
+/******************************************************************************
+ *
+ * Roundup the allocated size so that if this memory block is aligned,
+ * then the next block allocated too will be aligned.
+ * The JIT will always try to keep all the blocks aligned.
+ */
+
+inline void* Compiler::compGetMemA(size_t sz, CompMemKind cmk)
+{
+ assert(sz);
+
+ size_t allocSz = roundUp(sz, sizeof(size_t));
+
+#if MEASURE_MEM_ALLOC
+ genMemStats.AddAlloc(allocSz, cmk);
+#endif
+
+ void* ptr = compAllocator->allocateMemory(allocSz);
+
+ // Verify that the current block is aligned. Only then will the next
+ // block allocated be on an aligned boundary.
+ assert((size_t(ptr) & (sizeof(size_t) - 1)) == 0);
+
+ return ptr;
+}
+
+inline void Compiler::compFreeMem(void* ptr)
+{
+}
+
+#define compFreeMem(ptr) compFreeMem((void*)ptr)
+
+inline bool Compiler::compIsProfilerHookNeeded()
+{
+#ifdef PROFILING_SUPPORTED
+ return compProfilerHookNeeded
+
+#if defined(_TARGET_ARM_) || defined(_TARGET_AMD64_)
+ // IL stubs are excluded by VM and we need to do the same even running
+ // under a complus env hook to generate profiler hooks
+ || (opts.compJitELTHookEnabled && !(opts.eeFlags & CORJIT_FLG_IL_STUB))
+#endif
+ ;
+#else // PROFILING_SUPPORTED
+ return false;
+#endif
+}
+
+/*****************************************************************************
+ *
+ * Check for the special case where the object is the constant 0.
+ * As we can't even fold the tree (null+fldOffs), we are left with
+ * op1 and op2 both being a constant. This causes lots of problems.
+ * We simply grab a temp and assign 0 to it and use it in place of the NULL.
+ */
+
+inline GenTreePtr Compiler::impCheckForNullPointer(GenTreePtr obj)
+{
+ /* If it is not a GC type, we will be able to fold it.
+ So don't need to do anything */
+
+ if (!varTypeIsGC(obj->TypeGet()))
+ {
+ return obj;
+ }
+
+ if (obj->gtOper == GT_CNS_INT)
+ {
+ assert(obj->gtType == TYP_REF || obj->gtType == TYP_BYREF);
+ assert(obj->gtIntCon.gtIconVal == 0);
+
+ unsigned tmp = lvaGrabTemp(true DEBUGARG("CheckForNullPointer"));
+
+ // We don't need to spill while appending as we are only assigning
+ // NULL to a freshly-grabbed temp.
+
+ impAssignTempGen(tmp, obj, (unsigned)CHECK_SPILL_NONE);
+
+ obj = gtNewLclvNode(tmp, obj->gtType);
+ }
+
+ return obj;
+}
+
+/*****************************************************************************
+ *
+ * Check for the special case where the object is the methods original 'this' pointer.
+ * Note that, the original 'this' pointer is always local var 0 for non-static method,
+ * even if we might have created the copy of 'this' pointer in lvaArg0Var.
+ */
+
+inline bool Compiler::impIsThis(GenTreePtr obj)
+{
+ if (compIsForInlining())
+ {
+ return impInlineInfo->InlinerCompiler->impIsThis(obj);
+ }
+ else
+ {
+ return ((obj != nullptr) && (obj->gtOper == GT_LCL_VAR) && lvaIsOriginalThisArg(obj->gtLclVarCommon.gtLclNum));
+ }
+}
+
+/*****************************************************************************
+ *
+ * Check to see if the delegate is created using "LDFTN <TOK>" or not.
+ */
+
+inline bool Compiler::impIsLDFTN_TOKEN(const BYTE* delegateCreateStart, const BYTE* newobjCodeAddr)
+{
+ assert(newobjCodeAddr[0] == CEE_NEWOBJ);
+ return (newobjCodeAddr - delegateCreateStart == 6 && // LDFTN <TOK> takes 6 bytes
+ delegateCreateStart[0] == CEE_PREFIX1 && delegateCreateStart[1] == (CEE_LDFTN & 0xFF));
+}
+
+/*****************************************************************************
+ *
+ * Check to see if the delegate is created using "DUP LDVIRTFTN <TOK>" or not.
+ */
+
+inline bool Compiler::impIsDUP_LDVIRTFTN_TOKEN(const BYTE* delegateCreateStart, const BYTE* newobjCodeAddr)
+{
+ assert(newobjCodeAddr[0] == CEE_NEWOBJ);
+ return (newobjCodeAddr - delegateCreateStart == 7 && // DUP LDVIRTFTN <TOK> takes 6 bytes
+ delegateCreateStart[0] == CEE_DUP && delegateCreateStart[1] == CEE_PREFIX1 &&
+ delegateCreateStart[2] == (CEE_LDVIRTFTN & 0xFF));
+}
+/*****************************************************************************
+ *
+ * Returns true if the compiler instance is created for import only (verification).
+ */
+
+inline bool Compiler::compIsForImportOnly()
+{
+ return ((opts.eeFlags & CORJIT_FLG_IMPORT_ONLY) != 0);
+}
+
+/*****************************************************************************
+ *
+ * Returns true if the compiler instance is created for inlining.
+ */
+
+inline bool Compiler::compIsForInlining()
+{
+ return (impInlineInfo != nullptr);
+}
+
+/*****************************************************************************
+ *
+ * Check the inline result field in the compiler to see if inlining failed or not.
+ */
+
+inline bool Compiler::compDonotInline()
+{
+ if (compIsForInlining())
+ {
+ assert(compInlineResult != nullptr);
+ return compInlineResult->IsFailure();
+ }
+ else
+ {
+ return false;
+ }
+}
+
+inline bool Compiler::impIsPrimitive(CorInfoType jitType)
+{
+ return ((CORINFO_TYPE_BOOL <= jitType && jitType <= CORINFO_TYPE_DOUBLE) || jitType == CORINFO_TYPE_PTR);
+}
+
+/*****************************************************************************
+ *
+ * Get the promotion type of a struct local.
+ */
+
+inline Compiler::lvaPromotionType Compiler::lvaGetPromotionType(const LclVarDsc* varDsc)
+{
+ assert(!varDsc->lvPromoted || varTypeIsPromotable(varDsc) || varDsc->lvUnusedStruct);
+
+ if (!varDsc->lvPromoted)
+ {
+ // no struct promotion for this LclVar
+ return PROMOTION_TYPE_NONE;
+ }
+ if (varDsc->lvDoNotEnregister)
+ {
+ // The struct is not enregistered
+ return PROMOTION_TYPE_DEPENDENT;
+ }
+ if (!varDsc->lvIsParam)
+ {
+ // The struct is a register candidate
+ return PROMOTION_TYPE_INDEPENDENT;
+ }
+
+ // Has struct promotion for arguments been disabled using COMPlus_JitNoStructPromotion=2
+ if (fgNoStructParamPromotion)
+ {
+ // The struct parameter is not enregistered
+ return PROMOTION_TYPE_DEPENDENT;
+ }
+
+ // We have a parameter that could be enregistered
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_)
+
+ // The struct parameter is a register candidate
+ return PROMOTION_TYPE_INDEPENDENT;
+#else
+ // The struct parameter is not enregistered
+ return PROMOTION_TYPE_DEPENDENT;
+#endif
+}
+
+/*****************************************************************************
+ *
+ * Get the promotion type of a struct local.
+ */
+
+inline Compiler::lvaPromotionType Compiler::lvaGetPromotionType(unsigned varNum)
+{
+ assert(varNum < lvaCount);
+ return lvaGetPromotionType(&lvaTable[varNum]);
+}
+
+/*****************************************************************************
+ *
+ * Given a field local, get the promotion type of its parent struct local.
+ */
+
+inline Compiler::lvaPromotionType Compiler::lvaGetParentPromotionType(const LclVarDsc* varDsc)
+{
+ assert(varDsc->lvIsStructField);
+ assert(varDsc->lvParentLcl < lvaCount);
+
+ lvaPromotionType promotionType = lvaGetPromotionType(varDsc->lvParentLcl);
+ assert(promotionType != PROMOTION_TYPE_NONE);
+ return promotionType;
+}
+
+/*****************************************************************************
+ *
+ * Given a field local, get the promotion type of its parent struct local.
+ */
+
+inline Compiler::lvaPromotionType Compiler::lvaGetParentPromotionType(unsigned varNum)
+{
+ assert(varNum < lvaCount);
+ return lvaGetParentPromotionType(&lvaTable[varNum]);
+}
+
+/*****************************************************************************
+ *
+ * Return true if the local is a field local of a promoted struct of type PROMOTION_TYPE_DEPENDENT.
+ * Return false otherwise.
+ */
+
+inline bool Compiler::lvaIsFieldOfDependentlyPromotedStruct(const LclVarDsc* varDsc)
+{
+ if (!varDsc->lvIsStructField)
+ {
+ return false;
+ }
+
+ lvaPromotionType promotionType = lvaGetParentPromotionType(varDsc);
+ if (promotionType == PROMOTION_TYPE_DEPENDENT)
+ {
+ return true;
+ }
+
+ assert(promotionType == PROMOTION_TYPE_INDEPENDENT);
+ return false;
+}
+
+//------------------------------------------------------------------------
+// lvaIsGCTracked: Determine whether this var should be reported
+// as tracked for GC purposes.
+//
+// Arguments:
+// varDsc - the LclVarDsc for the var in question.
+//
+// Return Value:
+// Returns true if the variable should be reported as tracked in the GC info.
+//
+// Notes:
+// This never returns true for struct variables, even if they are tracked.
+// This is because struct variables are never tracked as a whole for GC purposes.
+// It is up to the caller to ensure that the fields of struct variables are
+// correctly tracked.
+// On Amd64, we never GC-track fields of dependently promoted structs, even
+// though they may be tracked for optimization purposes.
+// It seems that on x86 and arm, we simply don't track these
+// fields, though I have not verified that. I attempted to make these GC-tracked,
+// but there was too much logic that depends on these being untracked, so changing
+// this would require non-trivial effort.
+
+inline bool Compiler::lvaIsGCTracked(const LclVarDsc* varDsc)
+{
+ if (varDsc->lvTracked && (varDsc->lvType == TYP_REF || varDsc->lvType == TYP_BYREF))
+ {
+#ifdef _TARGET_AMD64_
+ return !lvaIsFieldOfDependentlyPromotedStruct(varDsc);
+#else // !_TARGET_AMD64_
+ return true;
+#endif // !_TARGET_AMD64_
+ }
+ else
+ {
+ return false;
+ }
+}
+
+inline void Compiler::EndPhase(Phases phase)
+{
+#if defined(FEATURE_JIT_METHOD_PERF)
+ if (pCompJitTimer != NULL)
+ pCompJitTimer->EndPhase(phase);
+#endif
+#if DUMP_FLOWGRAPHS
+ fgDumpFlowGraph(phase);
+#endif // DUMP_FLOWGRAPHS
+ previousCompletedPhase = phase;
+#ifdef DEBUG
+ if (dumpIR)
+ {
+ if ((*dumpIRPhase == L'*') || (wcscmp(dumpIRPhase, PhaseShortNames[phase]) == 0))
+ {
+ printf("\n");
+ printf("IR after %s (switch: %ls)\n", PhaseEnums[phase], PhaseShortNames[phase]);
+ printf("\n");
+
+ if (dumpIRLinear)
+ {
+ dFuncIR();
+ }
+ else if (dumpIRTrees)
+ {
+ dTrees();
+ }
+
+ // If we are just dumping a single method and we have a request to exit
+ // after dumping, do so now.
+
+ if (dumpIRExit && ((*dumpIRPhase != L'*') || (phase == PHASE_EMIT_GCEH)))
+ {
+ exit(0);
+ }
+ }
+ }
+#endif
+}
+
+/*****************************************************************************/
+bool Compiler::fgExcludeFromSsa(unsigned lclNum)
+{
+ if (opts.MinOpts())
+ {
+ return true; // If we're doing MinOpts, no SSA vars.
+ }
+
+ LclVarDsc* varDsc = &lvaTable[lclNum];
+
+ if (varDsc->lvAddrExposed)
+ {
+ return true; // We exclude address-exposed variables.
+ }
+ if (!varDsc->lvTracked)
+ {
+ return true; // SSA is only done for tracked variables
+ }
+ // lvPromoted structs are never tracked...
+ assert(!varDsc->lvPromoted);
+
+ if (varDsc->lvOverlappingFields)
+ {
+ return true; // Don't use SSA on structs that have overlapping fields
+ }
+
+ if (varDsc->lvIsStructField && (lvaGetParentPromotionType(lclNum) != PROMOTION_TYPE_INDEPENDENT))
+ {
+ // SSA must exclude struct fields that are not independent
+ // - because we don't model the struct assignment properly when multiple fields can be assigned by one struct
+ // assignment.
+ // - SSA doesn't allow a single node to contain multiple SSA definitions.
+ // - and PROMOTION_TYPE_DEPENDEDNT fields are never candidates for a register.
+ //
+ // Example mscorlib method: CompatibilitySwitches:IsCompatibilitySwitchSet
+ //
+ return true;
+ }
+ // otherwise this variable is *not* excluded for SSA
+ return false;
+}
+
+/*****************************************************************************/
+ValueNum Compiler::GetUseAsgDefVNOrTreeVN(GenTreePtr op)
+{
+ if (op->gtFlags & GTF_VAR_USEASG)
+ {
+ unsigned lclNum = op->AsLclVarCommon()->GetLclNum();
+ unsigned ssaNum = GetSsaNumForLocalVarDef(op);
+ return lvaTable[lclNum].GetPerSsaData(ssaNum)->m_vnPair.GetConservative();
+ }
+ else
+ {
+ return op->gtVNPair.GetConservative();
+ }
+}
+
+/*****************************************************************************/
+unsigned Compiler::GetSsaNumForLocalVarDef(GenTreePtr lcl)
+{
+ // Address-taken variables don't have SSA numbers.
+ if (fgExcludeFromSsa(lcl->AsLclVarCommon()->gtLclNum))
+ {
+ return SsaConfig::RESERVED_SSA_NUM;
+ }
+
+ assert(lcl->gtFlags & (GTF_VAR_DEF | GTF_VAR_USEDEF));
+ if (lcl->gtFlags & GTF_VAR_USEASG)
+ {
+ assert((lcl->gtFlags & GTF_VAR_USEDEF) == 0);
+ // It's an "lcl op= rhs" assignment. "lcl" is both used and defined here;
+ // we've chosen in this case to annotate "lcl" with the SSA number (and VN) of the use,
+ // and to store the SSA number of the def in a side table.
+ unsigned ssaNum;
+ // In case of a remorph (fgMorph) in CSE/AssertionProp after SSA phase, there
+ // wouldn't be an entry for the USEASG portion of the indir addr, return
+ // reserved.
+ if (!GetOpAsgnVarDefSsaNums()->Lookup(lcl, &ssaNum))
+ {
+ return SsaConfig::RESERVED_SSA_NUM;
+ }
+ return ssaNum;
+ }
+ else
+ {
+ return lcl->AsLclVarCommon()->gtSsaNum;
+ }
+}
+
+/*****************************************************************************
+ * operator new
+ *
+ * Note that compGetMem is an arena allocator that returns memory that is
+ * not zero-initialized and can contain data from a prior allocation lifetime.
+ * it also requires that 'sz' be aligned to a multiple of sizeof(int)
+ */
+
+inline void* __cdecl operator new(size_t sz, Compiler* context, CompMemKind cmk)
+{
+ sz = AlignUp(sz, sizeof(int));
+ assert(sz != 0 && (sz & (sizeof(int) - 1)) == 0);
+ return context->compGetMem(sz, cmk);
+}
+
+inline void* __cdecl operator new[](size_t sz, Compiler* context, CompMemKind cmk)
+{
+ sz = AlignUp(sz, sizeof(int));
+ assert(sz != 0 && (sz & (sizeof(int) - 1)) == 0);
+ return context->compGetMem(sz, cmk);
+}
+
+inline void* __cdecl operator new(size_t sz, void* p, const jitstd::placement_t& /* syntax_difference */)
+{
+ return p;
+}
+
+inline void* __cdecl operator new(size_t sz, IAllocator* alloc)
+{
+ return alloc->Alloc(sz);
+}
+
+inline void* __cdecl operator new[](size_t sz, IAllocator* alloc)
+{
+ return alloc->Alloc(sz);
+}
+
+/*****************************************************************************/
+
+#ifdef DEBUG
+
+inline void printRegMask(regMaskTP mask)
+{
+ printf(REG_MASK_ALL_FMT, mask);
+}
+
+inline char* regMaskToString(regMaskTP mask, Compiler* context)
+{
+ const size_t cchRegMask = 24;
+ char* regmask = new (context, CMK_Unknown) char[cchRegMask];
+
+ sprintf_s(regmask, cchRegMask, REG_MASK_ALL_FMT, mask);
+
+ return regmask;
+}
+
+inline void printRegMaskInt(regMaskTP mask)
+{
+ printf(REG_MASK_INT_FMT, (mask & RBM_ALLINT));
+}
+
+inline char* regMaskIntToString(regMaskTP mask, Compiler* context)
+{
+ const size_t cchRegMask = 24;
+ char* regmask = new (context, CMK_Unknown) char[cchRegMask];
+
+ sprintf_s(regmask, cchRegMask, REG_MASK_INT_FMT, (mask & RBM_ALLINT));
+
+ return regmask;
+}
+
+#endif // DEBUG
+
+inline void BasicBlock::InitVarSets(Compiler* comp)
+{
+ VarSetOps::AssignNoCopy(comp, bbVarUse, VarSetOps::MakeEmpty(comp));
+ VarSetOps::AssignNoCopy(comp, bbVarDef, VarSetOps::MakeEmpty(comp));
+ VarSetOps::AssignNoCopy(comp, bbVarTmp, VarSetOps::MakeEmpty(comp));
+ VarSetOps::AssignNoCopy(comp, bbLiveIn, VarSetOps::MakeEmpty(comp));
+ VarSetOps::AssignNoCopy(comp, bbLiveOut, VarSetOps::MakeEmpty(comp));
+ VarSetOps::AssignNoCopy(comp, bbScope, VarSetOps::MakeEmpty(comp));
+
+ bbHeapUse = false;
+ bbHeapDef = false;
+ bbHeapLiveIn = false;
+ bbHeapLiveOut = false;
+}
+
+// Returns true if the basic block ends with GT_JMP
+inline bool BasicBlock::endsWithJmpMethod(Compiler* comp)
+{
+ if (comp->compJmpOpUsed && (bbJumpKind == BBJ_RETURN) && (bbFlags & BBF_HAS_JMP))
+ {
+ GenTree* lastNode = this->lastNode();
+ assert(lastNode != nullptr);
+ return lastNode->OperGet() == GT_JMP;
+ }
+
+ return false;
+}
+
+// Returns true if the basic block ends with either
+// i) GT_JMP or
+// ii) tail call (implicit or explicit)
+//
+// Params:
+// comp - Compiler instance
+// fastTailCallsOnly - Only consider fast tail calls excluding tail calls via helper.
+inline bool BasicBlock::endsWithTailCallOrJmp(Compiler* comp, bool fastTailCallsOnly /*=false*/)
+{
+ GenTreePtr tailCall = nullptr;
+ bool tailCallsConvertibleToLoopOnly = false;
+ return endsWithJmpMethod(comp) ||
+ endsWithTailCall(comp, fastTailCallsOnly, tailCallsConvertibleToLoopOnly, &tailCall);
+}
+
+//------------------------------------------------------------------------------
+// endsWithTailCall : Check if the block ends with a tail call.
+//
+// Arguments:
+// comp - compiler instance
+// fastTailCallsOnly - check for fast tail calls only
+// tailCallsConvertibleToLoopOnly - check for tail calls convertible to loop only
+// tailCall - a pointer to a tree that will be set to the call tree if the block
+// ends with a tail call and will be set to nullptr otherwise.
+//
+// Return Value:
+// true if the block ends with a tail call; false otherwise.
+//
+// Notes:
+// At most one of fastTailCallsOnly and tailCallsConvertibleToLoopOnly flags can be true.
+
+inline bool BasicBlock::endsWithTailCall(Compiler* comp,
+ bool fastTailCallsOnly,
+ bool tailCallsConvertibleToLoopOnly,
+ GenTree** tailCall)
+{
+ assert(!fastTailCallsOnly || !tailCallsConvertibleToLoopOnly);
+ *tailCall = nullptr;
+ bool result = false;
+
+ // Is this a tail call?
+ // The reason for keeping this under RyuJIT is so as not to impact existing Jit32 x86 and arm
+ // targets.
+ if (comp->compTailCallUsed)
+ {
+ if (fastTailCallsOnly || tailCallsConvertibleToLoopOnly)
+ {
+ // Only fast tail calls or only tail calls convertible to loops
+ result = (bbFlags & BBF_HAS_JMP) && (bbJumpKind == BBJ_RETURN);
+ }
+ else
+ {
+ // Fast tail calls, tail calls convertible to loops, and tails calls dispatched via helper
+ result = (bbJumpKind == BBJ_THROW) || ((bbFlags & BBF_HAS_JMP) && (bbJumpKind == BBJ_RETURN));
+ }
+
+ if (result)
+ {
+ GenTree* lastNode = this->lastNode();
+ if (lastNode->OperGet() == GT_CALL)
+ {
+ GenTreeCall* call = lastNode->AsCall();
+ if (tailCallsConvertibleToLoopOnly)
+ {
+ result = call->IsTailCallConvertibleToLoop();
+ }
+ else if (fastTailCallsOnly)
+ {
+ result = call->IsFastTailCall();
+ }
+ else
+ {
+ result = call->IsTailCall();
+ }
+
+ if (result)
+ {
+ *tailCall = call;
+ }
+ }
+ else
+ {
+ result = false;
+ }
+ }
+ }
+
+ return result;
+}
+
+//------------------------------------------------------------------------------
+// endsWithTailCallConvertibleToLoop : Check if the block ends with a tail call convertible to loop.
+//
+// Arguments:
+// comp - compiler instance
+// tailCall - a pointer to a tree that will be set to the call tree if the block
+// ends with a tail call convertible to loop and will be set to nullptr otherwise.
+//
+// Return Value:
+// true if the block ends with a tail call convertible to loop.
+
+inline bool BasicBlock::endsWithTailCallConvertibleToLoop(Compiler* comp, GenTree** tailCall)
+{
+ bool fastTailCallsOnly = false;
+ bool tailCallsConvertibleToLoopOnly = true;
+ return endsWithTailCall(comp, fastTailCallsOnly, tailCallsConvertibleToLoopOnly, tailCall);
+}
+
+inline static bool StructHasOverlappingFields(DWORD attribs)
+{
+ return ((attribs & CORINFO_FLG_OVERLAPPING_FIELDS) != 0);
+}
+
+inline static bool StructHasCustomLayout(DWORD attribs)
+{
+ return ((attribs & CORINFO_FLG_CUSTOMLAYOUT) != 0);
+}
+
+/*****************************************************************************
+ * This node should not be referenced by anyone now. Set its values to garbage
+ * to catch extra references
+ */
+
+inline void DEBUG_DESTROY_NODE(GenTreePtr tree)
+{
+#ifdef DEBUG
+ // printf("DEBUG_DESTROY_NODE for [0x%08x]\n", tree);
+
+ // Save gtOper in case we want to find out what this node was
+ tree->gtOperSave = tree->gtOper;
+
+ tree->gtType = TYP_UNDEF;
+ tree->gtFlags |= 0xFFFFFFFF & ~GTF_NODE_MASK;
+ if (tree->OperIsSimple())
+ {
+ tree->gtOp.gtOp1 = tree->gtOp.gtOp2 = nullptr;
+ }
+ // Must do this last, because the "gtOp" check above will fail otherwise.
+ // Don't call SetOper, because GT_COUNT is not a valid value
+ tree->gtOper = GT_COUNT;
+#endif
+}
+
+/*****************************************************************************/
+#endif //_COMPILER_HPP_
+/*****************************************************************************/
diff --git a/src/jit/compilerbitsettraits.h b/src/jit/compilerbitsettraits.h
new file mode 100644
index 0000000000..4365c518d7
--- /dev/null
+++ b/src/jit/compilerbitsettraits.h
@@ -0,0 +1,130 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+#ifndef CompilerBitSetTraits_DEFINED
+#define CompilerBitSetTraits_DEFINED 1
+
+#include "bitset.h"
+#include "compiler.h"
+#include "iallocator.h"
+#include "bitsetasshortlong.h"
+
+///////////////////////////////////////////////////////////////////////////////
+//
+// CompAllocBitSetTraits: a base class for other BitSet traits classes.
+//
+// The classes in this file define "BitSetTraits" arguments to the "BitSetOps" type, ones that assume that
+// Compiler* is the "Env" type.
+//
+// This class just captures the compiler's allocator as an IAllocator.
+//
+class CompAllocBitSetTraits
+{
+public:
+ static inline IAllocator* GetAllocator(class Compiler* comp);
+
+#ifdef DEBUG
+ static inline IAllocator* GetDebugOnlyAllocator(class Compiler* comp);
+#endif // DEBUG
+};
+
+///////////////////////////////////////////////////////////////////////////////
+//
+// TrackedVarBitSetTraits
+//
+// This class is customizes the bit set to represent sets of tracked local vars.
+// The size of the bitset is determined by the # of tracked locals (up to some internal
+// maximum), and the Compiler* tracks the tracked local epochs.
+//
+class TrackedVarBitSetTraits : public CompAllocBitSetTraits
+{
+public:
+ static inline unsigned GetSize(Compiler* comp);
+
+ static inline unsigned GetArrSize(Compiler* comp, unsigned elemSize);
+
+ static inline unsigned GetEpoch(class Compiler* comp);
+
+ static inline BitSetSupport::BitSetOpCounter* GetOpCounter(Compiler* comp);
+};
+
+///////////////////////////////////////////////////////////////////////////////
+//
+// AllVarBitSetTraits
+//
+// This class is customizes the bit set to represent sets of all local vars (tracked or not) --
+// at least up to some maximum index. (This index is private to the Compiler, and it is
+// the responsibility of the compiler not to use indices >= this maximum.)
+// We rely on the fact that variables are never deleted, and therefore use the
+// total # of locals as the epoch number (up to the maximum).
+//
+class AllVarBitSetTraits : public CompAllocBitSetTraits
+{
+public:
+ static inline unsigned GetSize(Compiler* comp);
+
+ static inline unsigned GetArrSize(Compiler* comp, unsigned elemSize);
+
+ static inline unsigned GetEpoch(class Compiler* comp);
+
+ static inline BitSetSupport::BitSetOpCounter* GetOpCounter(Compiler* comp);
+};
+
+///////////////////////////////////////////////////////////////////////////////
+//
+// BasicBlockBitSetTraits
+//
+// This class is customizes the bit set to represent sets of BasicBlocks.
+// The size of the bitset is determined by maximum assigned BasicBlock number
+// (Compiler::fgBBNumMax) (Note that fgBBcount is not equal to this during inlining,
+// when fgBBcount is the number of blocks in the inlined function, but the assigned
+// block numbers are higher than the inliner function. fgBBNumMax counts both.
+// Thus, if you only care about the inlinee, during inlining, this bit set will waste
+// the lower numbered block bits.) The Compiler* tracks the BasicBlock epochs.
+//
+class BasicBlockBitSetTraits : public CompAllocBitSetTraits
+{
+public:
+ static inline unsigned GetSize(Compiler* comp);
+
+ static inline unsigned GetArrSize(Compiler* comp, unsigned elemSize);
+
+ static inline unsigned GetEpoch(class Compiler* comp);
+
+ static inline BitSetSupport::BitSetOpCounter* GetOpCounter(Compiler* comp);
+};
+
+///////////////////////////////////////////////////////////////////////////////
+//
+// BitVecTraits
+//
+// This class simplifies creation and usage of "ShortLong" bitsets.
+//
+struct BitVecTraits
+{
+private:
+ unsigned size;
+ Compiler* comp;
+
+public:
+ BitVecTraits(unsigned size, Compiler* comp) : size(size), comp(comp)
+ {
+ }
+
+ static inline IAllocator* GetAllocator(BitVecTraits* b);
+
+#ifdef DEBUG
+ static inline IAllocator* GetDebugOnlyAllocator(BitVecTraits* b);
+#endif // DEBUG
+
+ static inline unsigned GetSize(BitVecTraits* b);
+
+ static inline unsigned GetArrSize(BitVecTraits* b, unsigned elemSize);
+
+ static inline unsigned GetEpoch(BitVecTraits* b);
+
+ static inline BitSetSupport::BitSetOpCounter* GetOpCounter(BitVecTraits* b);
+};
+
+#endif // CompilerBitSetTraits_DEFINED
diff --git a/src/jit/compilerbitsettraits.hpp b/src/jit/compilerbitsettraits.hpp
new file mode 100644
index 0000000000..e2ba2f8a7a
--- /dev/null
+++ b/src/jit/compilerbitsettraits.hpp
@@ -0,0 +1,181 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+#ifndef CompilerBitSetTraits_HPP_DEFINED
+#define CompilerBitSetTraits_HPP_DEFINED 1
+
+#include "compilerbitsettraits.h"
+#include "compiler.h"
+
+///////////////////////////////////////////////////////////////////////////////
+//
+// CompAllocBitSetTraits
+//
+///////////////////////////////////////////////////////////////////////////////
+
+// static
+IAllocator* CompAllocBitSetTraits::GetAllocator(Compiler* comp)
+{
+ return comp->getAllocatorBitset();
+}
+
+#ifdef DEBUG
+// static
+IAllocator* CompAllocBitSetTraits::GetDebugOnlyAllocator(Compiler* comp)
+{
+ return comp->getAllocatorDebugOnly();
+}
+#endif // DEBUG
+
+///////////////////////////////////////////////////////////////////////////////
+//
+// TrackedVarBitSetTraits
+//
+///////////////////////////////////////////////////////////////////////////////
+
+// static
+unsigned TrackedVarBitSetTraits::GetSize(Compiler* comp)
+{
+ return comp->lvaTrackedCount;
+}
+
+// static
+unsigned TrackedVarBitSetTraits::GetArrSize(Compiler* comp, unsigned elemSize)
+{
+ assert(elemSize == sizeof(size_t));
+ return comp->lvaTrackedCountInSizeTUnits;
+}
+
+// static
+unsigned TrackedVarBitSetTraits::GetEpoch(Compiler* comp)
+{
+ return comp->GetCurLVEpoch();
+}
+
+// static
+BitSetSupport::BitSetOpCounter* TrackedVarBitSetTraits::GetOpCounter(Compiler* comp)
+{
+#if VARSET_COUNTOPS
+ return &Compiler::m_varsetOpCounter;
+#else
+ return nullptr;
+#endif
+}
+
+///////////////////////////////////////////////////////////////////////////////
+//
+// AllVarBitSetTraits
+//
+///////////////////////////////////////////////////////////////////////////////
+
+// static
+unsigned AllVarBitSetTraits::GetSize(Compiler* comp)
+{
+ return min(comp->lvaCount, lclMAX_ALLSET_TRACKED);
+}
+
+// static
+unsigned AllVarBitSetTraits::GetArrSize(Compiler* comp, unsigned elemSize)
+{
+ return unsigned(roundUp(GetSize(comp), elemSize));
+}
+
+// static
+unsigned AllVarBitSetTraits::GetEpoch(Compiler* comp)
+{
+ return GetSize(comp);
+}
+
+// static
+BitSetSupport::BitSetOpCounter* AllVarBitSetTraits::GetOpCounter(Compiler* comp)
+{
+#if ALLVARSET_COUNTOPS
+ return &Compiler::m_allvarsetOpCounter;
+#else
+ return nullptr;
+#endif
+}
+
+///////////////////////////////////////////////////////////////////////////////
+//
+// BasicBlockBitSetTraits
+//
+///////////////////////////////////////////////////////////////////////////////
+
+// static
+unsigned BasicBlockBitSetTraits::GetSize(Compiler* comp)
+{
+ return comp->fgCurBBEpochSize;
+}
+
+// static
+unsigned BasicBlockBitSetTraits::GetArrSize(Compiler* comp, unsigned elemSize)
+{
+ // Assert that the epoch has been initialized. This is a convenient place to assert this because
+ // GetArrSize() is called for every function, via IsShort().
+ assert(GetEpoch(comp) != 0);
+
+ assert(elemSize == sizeof(size_t));
+ return comp->fgBBSetCountInSizeTUnits; // This is precomputed to avoid doing math every time this function is called
+}
+
+// static
+unsigned BasicBlockBitSetTraits::GetEpoch(Compiler* comp)
+{
+ return comp->GetCurBasicBlockEpoch();
+}
+
+// static
+BitSetSupport::BitSetOpCounter* BasicBlockBitSetTraits::GetOpCounter(Compiler* comp)
+{
+ return nullptr;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+//
+// BitVecTraits
+//
+///////////////////////////////////////////////////////////////////////////////
+
+// static
+IAllocator* BitVecTraits::GetAllocator(BitVecTraits* b)
+{
+ return b->comp->getAllocatorBitset();
+}
+
+#ifdef DEBUG
+// static
+IAllocator* BitVecTraits::GetDebugOnlyAllocator(BitVecTraits* b)
+{
+ return b->comp->getAllocatorDebugOnly();
+}
+#endif // DEBUG
+
+// static
+unsigned BitVecTraits::GetSize(BitVecTraits* b)
+{
+ return b->size;
+}
+
+// static
+unsigned BitVecTraits::GetArrSize(BitVecTraits* b, unsigned elemSize)
+{
+ assert(elemSize == sizeof(size_t));
+ unsigned elemBits = 8 * elemSize;
+ return (unsigned)roundUp(b->size, elemBits) / elemBits;
+}
+
+// static
+unsigned BitVecTraits::GetEpoch(BitVecTraits* b)
+{
+ return b->size;
+}
+
+// static
+BitSetSupport::BitSetOpCounter* BitVecTraits::GetOpCounter(BitVecTraits* b)
+{
+ return nullptr;
+}
+
+#endif // CompilerBitSetTraits_HPP_DEFINED
diff --git a/src/jit/compmemkind.h b/src/jit/compmemkind.h
new file mode 100644
index 0000000000..e27d2071f7
--- /dev/null
+++ b/src/jit/compmemkind.h
@@ -0,0 +1,56 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*****************************************************************************/
+#ifndef CompMemKindMacro
+#error Define CompMemKindMacro before including this file.
+#endif
+
+// This list of macro invocations should be used to define the CompMemKind enumeration,
+// and the corresponding array of string names for these enum members.
+
+// clang-format off
+CompMemKindMacro(AssertionProp)
+CompMemKindMacro(ASTNode)
+CompMemKindMacro(InstDesc)
+CompMemKindMacro(ImpStack)
+CompMemKindMacro(BasicBlock)
+CompMemKindMacro(fgArgInfo)
+CompMemKindMacro(fgArgInfoPtrArr)
+CompMemKindMacro(FlowList)
+CompMemKindMacro(TreeStatementList)
+CompMemKindMacro(SiScope)
+CompMemKindMacro(FlatFPStateX87)
+CompMemKindMacro(DominatorMemory)
+CompMemKindMacro(LSRA)
+CompMemKindMacro(LSRA_Interval)
+CompMemKindMacro(LSRA_RefPosition)
+CompMemKindMacro(Reachability)
+CompMemKindMacro(SSA)
+CompMemKindMacro(ValueNumber)
+CompMemKindMacro(LvaTable)
+CompMemKindMacro(UnwindInfo)
+CompMemKindMacro(hashBv)
+CompMemKindMacro(bitset)
+CompMemKindMacro(FixedBitVect)
+CompMemKindMacro(AsIAllocator)
+CompMemKindMacro(IndirAssignMap)
+CompMemKindMacro(FieldSeqStore)
+CompMemKindMacro(ZeroOffsetFieldMap)
+CompMemKindMacro(ArrayInfoMap)
+CompMemKindMacro(HeapPhiArg)
+CompMemKindMacro(CSE)
+CompMemKindMacro(GC)
+CompMemKindMacro(CorSig)
+CompMemKindMacro(Inlining)
+CompMemKindMacro(ArrayStack)
+CompMemKindMacro(DebugInfo)
+CompMemKindMacro(DebugOnly)
+CompMemKindMacro(Codegen)
+CompMemKindMacro(LoopOpt)
+CompMemKindMacro(LoopHoist)
+CompMemKindMacro(Unknown)
+//clang-format on
+
+#undef CompMemKindMacro
diff --git a/src/jit/compphases.h b/src/jit/compphases.h
new file mode 100644
index 0000000000..f193d04647
--- /dev/null
+++ b/src/jit/compphases.h
@@ -0,0 +1,91 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+//
+// File: CompPhases.h
+//
+
+//
+// Names of x86 JIT phases, in order. Assumes that the caller defines CompPhaseNameMacro
+// in a useful way before including this file, e.g., to define the phase enumeration and the
+// corresponding array of string names of those phases. This include file undefines CompPhaseNameMacro
+// after the last use.
+// The arguments are:
+// CompPhaseNameMacro(enumName, stringName, hasChildren, parent)
+// "enumName" is an Enumeration-style all-caps name.
+// "stringName" is a self-explanatory.
+// "hasChildren" is true if this phase is broken out into subphases.
+// (We should never do EndPhase on a phase that has children, only on 'leaf phases.')
+// "parent" is -1 for leaf phases, otherwise it is the "enumName" of the parent phase.
+
+// clang-format off
+CompPhaseNameMacro(PHASE_PRE_IMPORT, "Pre-import", "PRE-IMP", false, -1)
+CompPhaseNameMacro(PHASE_IMPORTATION, "Importation", "IMPORT", false, -1)
+CompPhaseNameMacro(PHASE_POST_IMPORT, "Post-import", "POST-IMP", false, -1)
+CompPhaseNameMacro(PHASE_MORPH, "Morph", "MORPH", false, -1)
+CompPhaseNameMacro(PHASE_GS_COOKIE, "GS Cookie", "GS-COOK", false, -1)
+CompPhaseNameMacro(PHASE_COMPUTE_PREDS, "Compute preds", "PREDS", false, -1)
+CompPhaseNameMacro(PHASE_MARK_GC_POLL_BLOCKS, "Mark GC poll blocks", "GC-POLL", false, -1)
+CompPhaseNameMacro(PHASE_COMPUTE_EDGE_WEIGHTS, "Compute edge weights (1)", "EDG-WGT", false, -1)
+#if FEATURE_EH_FUNCLETS
+CompPhaseNameMacro(PHASE_CREATE_FUNCLETS, "Create EH funclets", "EH-FUNC", false, -1)
+#endif // FEATURE_EH_FUNCLETS
+CompPhaseNameMacro(PHASE_OPTIMIZE_LAYOUT, "Optimize layout", "LAYOUT", false, -1)
+CompPhaseNameMacro(PHASE_ALLOCATE_OBJECTS, "Allocate Objects", "ALLOC-OBJ",false, -1)
+CompPhaseNameMacro(PHASE_OPTIMIZE_LOOPS, "Optimize loops", "LOOP-OPT", false, -1)
+CompPhaseNameMacro(PHASE_CLONE_LOOPS, "Clone loops", "LP-CLONE", false, -1)
+CompPhaseNameMacro(PHASE_UNROLL_LOOPS, "Unroll loops", "UNROLL", false, -1)
+CompPhaseNameMacro(PHASE_HOIST_LOOP_CODE, "Hoist loop code", "LP-HOIST", false, -1)
+CompPhaseNameMacro(PHASE_MARK_LOCAL_VARS, "Mark local vars", "MARK-LCL", false, -1)
+CompPhaseNameMacro(PHASE_OPTIMIZE_BOOLS, "Optimize bools", "OPT-BOOL", false, -1)
+CompPhaseNameMacro(PHASE_FIND_OPER_ORDER, "Find oper order", "OPER-ORD", false, -1)
+CompPhaseNameMacro(PHASE_SET_BLOCK_ORDER, "Set block order", "BLK-ORD", false, -1)
+CompPhaseNameMacro(PHASE_BUILD_SSA, "Build SSA representation", "SSA", true, -1)
+CompPhaseNameMacro(PHASE_BUILD_SSA_TOPOSORT, "SSA: topological sort", "SSA-SORT", false, PHASE_BUILD_SSA)
+CompPhaseNameMacro(PHASE_BUILD_SSA_DOMS, "SSA: Doms1", "SSA-DOMS", false, PHASE_BUILD_SSA)
+CompPhaseNameMacro(PHASE_BUILD_SSA_LIVENESS, "SSA: liveness", "SSA-LIVE", false, PHASE_BUILD_SSA)
+CompPhaseNameMacro(PHASE_BUILD_SSA_IDF, "SSA: IDF", "SSA-IDF", false, PHASE_BUILD_SSA)
+CompPhaseNameMacro(PHASE_BUILD_SSA_INSERT_PHIS, "SSA: insert phis", "SSA-PHI", false, PHASE_BUILD_SSA)
+CompPhaseNameMacro(PHASE_BUILD_SSA_RENAME, "SSA: rename", "SSA-REN", false, PHASE_BUILD_SSA)
+
+CompPhaseNameMacro(PHASE_EARLY_PROP, "Early Value Propagation", "ERL-PROP", false, -1)
+CompPhaseNameMacro(PHASE_VALUE_NUMBER, "Do value numbering", "VAL-NUM", false, -1)
+
+CompPhaseNameMacro(PHASE_OPTIMIZE_INDEX_CHECKS, "Optimize index checks", "OPT-CHK", false, -1)
+
+#if FEATURE_VALNUM_CSE
+CompPhaseNameMacro(PHASE_OPTIMIZE_VALNUM_CSES, "Optimize Valnum CSEs", "OPT-CSE", false, -1)
+#endif
+
+CompPhaseNameMacro(PHASE_VN_COPY_PROP, "VN based copy prop", "CP-PROP", false, -1)
+#if ASSERTION_PROP
+CompPhaseNameMacro(PHASE_ASSERTION_PROP_MAIN, "Assertion prop", "AST-PROP", false, -1)
+#endif
+CompPhaseNameMacro(PHASE_UPDATE_FLOW_GRAPH, "Update flow graph", "UPD-FG", false, -1)
+CompPhaseNameMacro(PHASE_COMPUTE_EDGE_WEIGHTS2, "Compute edge weights (2)", "EDG-WGT2", false, -1)
+CompPhaseNameMacro(PHASE_DETERMINE_FIRST_COLD_BLOCK, "Determine first cold block", "COLD-BLK", false, -1)
+CompPhaseNameMacro(PHASE_RATIONALIZE, "Rationalize IR", "RAT", false, -1)
+CompPhaseNameMacro(PHASE_SIMPLE_LOWERING, "Do 'simple' lowering", "SMP-LWR", false, -1)
+
+CompPhaseNameMacro(PHASE_LCLVARLIVENESS, "Local var liveness", "LIVENESS", true, -1)
+CompPhaseNameMacro(PHASE_LCLVARLIVENESS_INIT, "Local var liveness init", "LIV-INIT", false, PHASE_LCLVARLIVENESS)
+CompPhaseNameMacro(PHASE_LCLVARLIVENESS_PERBLOCK,"Per block local var liveness", "LIV-BLK", false, PHASE_LCLVARLIVENESS)
+CompPhaseNameMacro(PHASE_LCLVARLIVENESS_INTERBLOCK, "Global local var liveness", "LIV-GLBL", false, PHASE_LCLVARLIVENESS)
+
+#ifdef LEGACY_BACKEND
+CompPhaseNameMacro(PHASE_RA_ASSIGN_VARS, "RA assign vars", "REGALLOC", false, -1)
+#endif // LEGACY_BACKEND
+CompPhaseNameMacro(PHASE_LOWERING_DECOMP, "Lowering decomposition", "LWR-DEC", false, -1)
+CompPhaseNameMacro(PHASE_LOWERING, "Lowering nodeinfo", "LWR-INFO", false, -1)
+#ifndef LEGACY_BACKEND
+CompPhaseNameMacro(PHASE_LINEAR_SCAN, "Linear scan register alloc", "LSRA", true, -1)
+CompPhaseNameMacro(PHASE_LINEAR_SCAN_BUILD, "LSRA build intervals", "LSRA-BLD", false, PHASE_LINEAR_SCAN)
+CompPhaseNameMacro(PHASE_LINEAR_SCAN_ALLOC, "LSRA allocate", "LSRA-ALL", false, PHASE_LINEAR_SCAN)
+CompPhaseNameMacro(PHASE_LINEAR_SCAN_RESOLVE, "LSRA resolve", "LSRA-RES", false, PHASE_LINEAR_SCAN)
+#endif // !LEGACY_BACKEND
+CompPhaseNameMacro(PHASE_GENERATE_CODE, "Generate code", "CODEGEN", false, -1)
+CompPhaseNameMacro(PHASE_EMIT_CODE, "Emit code", "EMIT", false, -1)
+CompPhaseNameMacro(PHASE_EMIT_GCEH, "Emit GC+EH tables", "EMT-GCEH", false, -1)
+// clang-format on
+
+#undef CompPhaseNameMacro
diff --git a/src/jit/conventions.txt b/src/jit/conventions.txt
new file mode 100644
index 0000000000..2984ed6043
--- /dev/null
+++ b/src/jit/conventions.txt
@@ -0,0 +1,81 @@
+This file contains an extracted, plain-text version of some of the "CLR JIT
+Coding Conventions" document, that can be used as a template when writing new
+comments in the JIT source code. The definitive coding conventions document is
+located here:
+
+https://github.com/dotnet/coreclr/blob/master/Documentation/coding-guidelines/clr-jit-coding-conventions.md
+
+
+********** Section 7.1.5 TODO comments
+
+This is the format to be used:
+
+// TODO[-Arch][-Platform][-CQ|-Throughput|-Cleanup|-Bug|-Bug?]: description of the issue
+
+-- One type modifier (CQ, Throughput, Cleanup, Bug or Bug?) must be specified.
+-- The -Arch and -Platform modifiers are optional, and should generally specify
+actual architectures in all-caps (e.g. AMD64, X86, ARM, ARM64), and then in
+Pascal casing for Platforms and architecture classes (e.g. ARMArch, LdStArch, XArch, Unix, Windows).
+-- This list is not intended to be exhaustive.
+
+Examples:
+
+ // TODO-LdStArch-Bug: Should regTmp be a dst on the node or an internal reg?
+ // Either way, it is not currently being handled by Lowering.
+
+ // TODO-CQ: based on whether src type is aligned use movaps instead.
+
+ // TODO-Cleanup: Add a comment about why this is unreached() for RyuJIT backend.
+
+ // TODO-Arm64-Bug: handle large constants! Probably need something like the ARM
+ // case above: if (arm_Valid_Imm_For_Instr(ins, val)) ...
+
+
+********** Section 9.4 Function header comment
+
+All functions, except trivial accessors and wrappers, should have a function
+header comment which describes the behavior and the implementation details of
+the function. The format of the function header in an implementation file is
+as shown below.
+
+Within the comment, argument names (and other program-related names) should be
+surrounded by double quotes, to emphasize that they are program objects, and
+not simple English words. This helps clarify those cases where a function
+argument might be parsed (by a human) in either way.
+
+Any of the sections that do not apply to a method may be skipped. For example,
+if a method has no arguments, the "Arguments" section can be omitted. If a
+function is a void return function, the "Return Value" section can be omitted.
+
+If you can formulate any assumptions as asserts in the code itself, you should
+do so. The "Assumptions" section is intended to encapsulate things that are
+harder (or impossible) to formulate as asserts, or to provide a place to write
+a more easily read English description of any assumptions that exist, even if
+they can be written with asserts.
+
+
+//------------------------------------------------------------------------
+// <Function name>: <Short description of the function>
+//
+// <Full description of the function>
+//
+// Arguments:
+// <argument1-name> - Description of argument 1
+// <argument2-name> - Description of argument 2
+// ... one line for each function argument
+//
+// Return Value:
+// Description of the values this function could return
+// and under what conditions. When the return value is a
+// described as a function of the arguments, those arguments
+// should be mentioned specifically by name.
+//
+// Assumptions:
+// Any entry and exit conditions, such as required preconditions of
+// data structures, memory to be freed by caller, etc.
+//
+// Notes:
+// More detailed notes about the function.
+// What errors can the function return?
+// What other methods are related or alternatives to be considered?
+
diff --git a/src/jit/copyprop.cpp b/src/jit/copyprop.cpp
new file mode 100644
index 0000000000..bf714f0963
--- /dev/null
+++ b/src/jit/copyprop.cpp
@@ -0,0 +1,463 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+//
+//
+// CopyProp
+//
+// This stage performs value numbering based copy propagation. Since copy propagation
+// is about data flow, we cannot find them in assertion prop phase. In assertion prop
+// we can identify copies, like so: if (a == b) else, i.e., control flow assertions.
+//
+// To identify data flow copies, we'll follow a similar approach to SSA renaming.
+// We would walk each path in the graph keeping track of every live definition. Thus
+// when we see a variable that shares the VN with a live definition, we'd replace this
+// variable with the variable in the live definition, if suitable.
+//
+///////////////////////////////////////////////////////////////////////////////////////
+
+#include "jitpch.h"
+#include "ssabuilder.h"
+
+template <typename T>
+inline static T* allocate_any(jitstd::allocator<void>& alloc, size_t count = 1)
+{
+ return jitstd::allocator<T>(alloc).allocate(count);
+}
+
+/**************************************************************************************
+ *
+ * Corresponding to the live definition pushes, pop the stack as we finish a sub-paths
+ * of the graph originating from the block. Refer SSA renaming for any additional info.
+ * "curSsaName" tracks the currently live definitions.
+ */
+void Compiler::optBlockCopyPropPopStacks(BasicBlock* block, LclNumToGenTreePtrStack* curSsaName)
+{
+ for (GenTreePtr stmt = block->bbTreeList; stmt; stmt = stmt->gtNext)
+ {
+ for (GenTreePtr tree = stmt->gtStmt.gtStmtList; tree; tree = tree->gtNext)
+ {
+ if (!tree->IsLocal())
+ {
+ continue;
+ }
+ unsigned lclNum = tree->gtLclVarCommon.gtLclNum;
+ if (fgExcludeFromSsa(lclNum))
+ {
+ continue;
+ }
+ if (tree->gtFlags & GTF_VAR_DEF)
+ {
+ GenTreePtrStack* stack = nullptr;
+ curSsaName->Lookup(lclNum, &stack);
+ stack->Pop();
+ if (stack->Height() == 0)
+ {
+ curSsaName->Remove(lclNum);
+ }
+ }
+ }
+ }
+}
+
+/*******************************************************************************************************
+ *
+ * Given the "lclVar" and "copyVar" compute if the copy prop will be beneficial.
+ *
+ */
+int Compiler::optCopyProp_LclVarScore(LclVarDsc* lclVarDsc, LclVarDsc* copyVarDsc, bool preferOp2)
+{
+ int score = 0;
+
+ if (lclVarDsc->lvVolatileHint)
+ {
+ score += 4;
+ }
+
+ if (copyVarDsc->lvVolatileHint)
+ {
+ score -= 4;
+ }
+
+ if (lclVarDsc->lvDoNotEnregister)
+ {
+ score += 4;
+ }
+
+ if (copyVarDsc->lvDoNotEnregister)
+ {
+ score -= 4;
+ }
+
+#ifdef _TARGET_X86_
+ // For doubles we also prefer to change parameters into non-parameter local variables
+ if (lclVarDsc->lvType == TYP_DOUBLE)
+ {
+ if (lclVarDsc->lvIsParam)
+ {
+ score += 2;
+ }
+
+ if (copyVarDsc->lvIsParam)
+ {
+ score -= 2;
+ }
+ }
+#endif
+
+ // Otherwise we prefer to use the op2LclNum
+ return score + ((preferOp2) ? 1 : -1);
+}
+
+/**************************************************************************************
+ *
+ * Perform copy propagation on a given tree as we walk the graph and if it is a local
+ * variable, then look up all currently live definitions and check if any of those
+ * definitions share the same value number. If so, then we can make the replacement.
+ *
+ */
+void Compiler::optCopyProp(BasicBlock* block, GenTreePtr stmt, GenTreePtr tree, LclNumToGenTreePtrStack* curSsaName)
+{
+ // TODO-Review: EH successor/predecessor iteration seems broken.
+ if (block->bbCatchTyp == BBCT_FINALLY || block->bbCatchTyp == BBCT_FAULT)
+ {
+ return;
+ }
+
+ // If not local nothing to do.
+ if (!tree->IsLocal())
+ {
+ return;
+ }
+ if (tree->OperGet() == GT_PHI_ARG || tree->OperGet() == GT_LCL_FLD)
+ {
+ return;
+ }
+
+ // Propagate only on uses.
+ if (tree->gtFlags & GTF_VAR_DEF || tree->gtFlags & GTF_VAR_USEDEF)
+ {
+ return;
+ }
+ unsigned lclNum = tree->AsLclVarCommon()->GetLclNum();
+
+ // Skip address exposed variables.
+ if (fgExcludeFromSsa(lclNum))
+ {
+ return;
+ }
+
+ assert(tree->gtVNPair.GetConservative() != ValueNumStore::NoVN);
+
+ for (LclNumToGenTreePtrStack::KeyIterator iter = curSsaName->Begin(); !iter.Equal(curSsaName->End()); ++iter)
+ {
+ unsigned newLclNum = iter.Get();
+
+ GenTreePtr op = iter.GetValue()->Index(0);
+
+ // Nothing to do if same.
+ if (lclNum == newLclNum)
+ {
+ continue;
+ }
+
+ // Skip variables with assignments embedded in the statement (i.e., with a comma). Because we
+ // are not currently updating their SSA names as live in the copy-prop pass of the stmt.
+ if (VarSetOps::IsMember(this, optCopyPropKillSet, lvaTable[newLclNum].lvVarIndex))
+ {
+ continue;
+ }
+
+ if (op->gtFlags & GTF_VAR_CAST)
+ {
+ continue;
+ }
+ if (gsShadowVarInfo != nullptr && lvaTable[newLclNum].lvIsParam &&
+ gsShadowVarInfo[newLclNum].shadowCopy == lclNum)
+ {
+ continue;
+ }
+ ValueNum opVN = GetUseAsgDefVNOrTreeVN(op);
+ if (opVN == ValueNumStore::NoVN)
+ {
+ continue;
+ }
+ if (op->TypeGet() != tree->TypeGet())
+ {
+ continue;
+ }
+ if (opVN != tree->gtVNPair.GetConservative())
+ {
+ continue;
+ }
+ if (optCopyProp_LclVarScore(&lvaTable[lclNum], &lvaTable[newLclNum], true) <= 0)
+ {
+ continue;
+ }
+ // Check whether the newLclNum is live before being substituted. Otherwise, we could end
+ // up in a situation where there must've been a phi node that got pruned because the variable
+ // is not live anymore. For example,
+ // if
+ // x0 = 1
+ // else
+ // x1 = 2
+ // print(c) <-- x is not live here. Let's say 'c' shares the value number with "x0."
+ //
+ // If we simply substituted 'c' with "x0", we would be wrong. Ideally, there would be a phi
+ // node x2 = phi(x0, x1) which can then be used to substitute 'c' with. But because of pruning
+ // there would be no such phi node. To solve this we'll check if 'x' is live, before replacing
+ // 'c' with 'x.'
+ if (!lvaTable[newLclNum].lvVerTypeInfo.IsThisPtr())
+ {
+ if (lvaTable[newLclNum].lvAddrExposed)
+ {
+ continue;
+ }
+
+ // We compute liveness only on tracked variables. So skip untracked locals.
+ if (!lvaTable[newLclNum].lvTracked)
+ {
+ continue;
+ }
+
+ // Because of this dependence on live variable analysis, CopyProp phase is immediately
+ // after Liveness, SSA and VN.
+ if (!VarSetOps::IsMember(this, compCurLife, lvaTable[newLclNum].lvVarIndex))
+ {
+ continue;
+ }
+ }
+ unsigned newSsaNum = SsaConfig::RESERVED_SSA_NUM;
+ if (op->gtFlags & (GTF_VAR_DEF | GTF_VAR_USEDEF))
+ {
+ newSsaNum = GetSsaNumForLocalVarDef(op);
+ }
+ else // parameters, this pointer etc.
+ {
+ newSsaNum = op->AsLclVarCommon()->GetSsaNum();
+ }
+
+ if (newSsaNum == SsaConfig::RESERVED_SSA_NUM)
+ {
+ continue;
+ }
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ JITDUMP("VN based copy assertion for ");
+ printTreeID(tree);
+ printf(" V%02d @%08X by ", lclNum, tree->GetVN(VNK_Conservative));
+ printTreeID(op);
+ printf(" V%02d @%08X.\n", newLclNum, op->GetVN(VNK_Conservative));
+ gtDispTree(tree, nullptr, nullptr, true);
+ }
+#endif
+
+ lvaTable[lclNum].decRefCnts(block->getBBWeight(this), this);
+ lvaTable[newLclNum].incRefCnts(block->getBBWeight(this), this);
+ tree->gtLclVarCommon.SetLclNum(newLclNum);
+ tree->AsLclVarCommon()->SetSsaNum(newSsaNum);
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("copy propagated to:\n");
+ gtDispTree(tree, nullptr, nullptr, true);
+ }
+#endif
+ break;
+ }
+ return;
+}
+
+/**************************************************************************************
+ *
+ * Helper to check if tree is a local that participates in SSA numbering.
+ */
+bool Compiler::optIsSsaLocal(GenTreePtr tree)
+{
+ return tree->IsLocal() && !fgExcludeFromSsa(tree->AsLclVarCommon()->GetLclNum());
+}
+
+/**************************************************************************************
+ *
+ * Perform copy propagation using currently live definitions on the current block's
+ * variables. Also as new definitions are encountered update the "curSsaName" which
+ * tracks the currently live definitions.
+ *
+ */
+void Compiler::optBlockCopyProp(BasicBlock* block, LclNumToGenTreePtrStack* curSsaName)
+{
+ JITDUMP("Copy Assertion for BB%02u\n", block->bbNum);
+
+ // There are no definitions at the start of the block. So clear it.
+ compCurLifeTree = nullptr;
+ VarSetOps::Assign(this, compCurLife, block->bbLiveIn);
+ for (GenTreePtr stmt = block->bbTreeList; stmt; stmt = stmt->gtNext)
+ {
+ VarSetOps::ClearD(this, optCopyPropKillSet);
+
+ // Walk the tree to find if any local variable can be replaced with current live definitions.
+ for (GenTreePtr tree = stmt->gtStmt.gtStmtList; tree; tree = tree->gtNext)
+ {
+ compUpdateLife</*ForCodeGen*/ false>(tree);
+ optCopyProp(block, stmt, tree, curSsaName);
+
+ // TODO-Review: Merge this loop with the following loop to correctly update the
+ // live SSA num while also propagating copies.
+ //
+ // 1. This loop performs copy prop with currently live (on-top-of-stack) SSA num.
+ // 2. The subsequent loop maintains a stack for each lclNum with
+ // currently active SSA numbers when definitions are encountered.
+ //
+ // If there is an embedded definition using a "comma" in a stmt, then the currently
+ // live SSA number will get updated only in the next loop (2). However, this new
+ // definition is now supposed to be live (on tos). If we did not update the stacks
+ // using (2), copy prop (1) will use a SSA num defined outside the stmt ignoring the
+ // embedded update. Killing the variable is a simplification to produce 0 ASM diffs
+ // for an update release.
+ //
+ if (optIsSsaLocal(tree) && (tree->gtFlags & GTF_VAR_DEF))
+ {
+ VarSetOps::AddElemD(this, optCopyPropKillSet, lvaTable[tree->gtLclVarCommon.gtLclNum].lvVarIndex);
+ }
+ }
+
+ // This logic must be in sync with SSA renaming process.
+ for (GenTreePtr tree = stmt->gtStmt.gtStmtList; tree; tree = tree->gtNext)
+ {
+ if (!optIsSsaLocal(tree))
+ {
+ continue;
+ }
+
+ unsigned lclNum = tree->gtLclVarCommon.gtLclNum;
+
+ // As we encounter a definition add it to the stack as a live definition.
+ if (tree->gtFlags & GTF_VAR_DEF)
+ {
+ GenTreePtrStack* stack;
+ if (!curSsaName->Lookup(lclNum, &stack))
+ {
+ stack = new (getAllocator()) GenTreePtrStack(this);
+ }
+ stack->Push(tree);
+ curSsaName->Set(lclNum, stack);
+ }
+ // If we encounter first use of a param or this pointer add it as a live definition.
+ // Since they are always live, do it only once.
+ else if ((tree->gtOper == GT_LCL_VAR) && !(tree->gtFlags & (GTF_VAR_USEASG | GTF_VAR_USEDEF)) &&
+ (lvaTable[lclNum].lvIsParam || lvaTable[lclNum].lvVerTypeInfo.IsThisPtr()))
+ {
+ GenTreePtrStack* stack;
+ if (!curSsaName->Lookup(lclNum, &stack))
+ {
+ stack = new (getAllocator()) GenTreePtrStack(this);
+ stack->Push(tree);
+ curSsaName->Set(lclNum, stack);
+ }
+ }
+ }
+ }
+}
+
+/**************************************************************************************
+ *
+ * This stage performs value numbering based copy propagation. Since copy propagation
+ * is about data flow, we cannot find them in assertion prop phase. In assertion prop
+ * we can identify copies that like so: if (a == b) else, i.e., control flow assertions.
+ *
+ * To identify data flow copies, we follow a similar approach to SSA renaming. We walk
+ * each path in the graph keeping track of every live definition. Thus when we see a
+ * variable that shares the VN with a live definition, we'd replace this variable with
+ * the variable in the live definition.
+ *
+ * We do this to be in conventional SSA form. This can very well be changed later.
+ *
+ * For example, on some path in the graph:
+ * a0 = x0
+ * : <- other blocks
+ * :
+ * a1 = y0
+ * :
+ * : <- other blocks
+ * b0 = x0, we cannot substitute x0 with a0, because currently our backend doesn't
+ * treat lclNum and ssaNum together as a variable, but just looks at lclNum. If we
+ * substituted x0 with a0, then we'd be in general SSA form.
+ *
+ */
+void Compiler::optVnCopyProp()
+{
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("*************** In optVnCopyProp()\n");
+ }
+#endif
+
+ if (fgSsaPassesCompleted == 0)
+ {
+ return;
+ }
+ jitstd::allocator<void> allocator(getAllocator());
+
+ // Compute the domTree to use.
+ BlkToBlkSetMap* domTree = new (getAllocator()) BlkToBlkSetMap(getAllocator());
+ domTree->Reallocate(fgBBcount * 3 / 2); // Prime the allocation
+ SsaBuilder::ComputeDominators(this, domTree);
+
+ struct BlockWork
+ {
+ BasicBlock* m_blk;
+ bool m_processed;
+
+ BlockWork(BasicBlock* blk, bool processed = false) : m_blk(blk), m_processed(processed)
+ {
+ }
+ };
+ typedef jitstd::vector<BlockWork> BlockWorkStack;
+
+ VarSetOps::AssignNoCopy(this, compCurLife, VarSetOps::MakeEmpty(this));
+ VarSetOps::AssignNoCopy(this, optCopyPropKillSet, VarSetOps::MakeEmpty(this));
+
+ // The map from lclNum to its recently live definitions as a stack.
+ LclNumToGenTreePtrStack curSsaName(getAllocator());
+
+ BlockWorkStack* worklist =
+ new (allocate_any<BlockWorkStack>(allocator), jitstd::placement_t()) BlockWorkStack(allocator);
+
+ worklist->push_back(BlockWork(fgFirstBB));
+ while (!worklist->empty())
+ {
+ BlockWork work = worklist->back();
+ worklist->pop_back();
+
+ BasicBlock* block = work.m_blk;
+ if (work.m_processed)
+ {
+ // Pop all the live definitions for this block.
+ optBlockCopyPropPopStacks(block, &curSsaName);
+ continue;
+ }
+
+ // Generate copy assertions in this block, and keeping curSsaName variable up to date.
+ worklist->push_back(BlockWork(block, true));
+
+ optBlockCopyProp(block, &curSsaName);
+
+ // Add dom children to work on.
+ BlkSet* pBlkSet;
+ if (domTree->Lookup(block, &pBlkSet))
+ {
+ for (BlkSet::KeyIterator child = pBlkSet->Begin(); !child.Equal(pBlkSet->End()); ++child)
+ {
+ worklist->push_back(BlockWork(child.Get()));
+ }
+ }
+ }
+
+ // Tracked variable count increases after CopyProp, so don't keep a shorter array around.
+ // Destroy (release) the varset.
+ VarSetOps::AssignNoCopy(this, compCurLife, VarSetOps::UninitVal());
+}
diff --git a/src/jit/cpp.hint b/src/jit/cpp.hint
new file mode 100644
index 0000000000..b2fe33cc98
--- /dev/null
+++ b/src/jit/cpp.hint
@@ -0,0 +1,27 @@
+// cpp.hint for the JIT
+//
+// These hints are designed to improve the IntelliSense experience when browsing the JIT codebase.
+// Note that they don't need to be correct code; they just need to fix the IntelliSense problems that
+// exist without the hint.
+//
+// See the article on hints in MSDN for more information on their necessity and use:
+// http://msdn.microsoft.com/en-us/library/dd997977.aspx
+
+#define foreach_treenode_execution_order(__node, __stmt) for (;;)
+
+#define foreach_block(__compiler, __block) for (;;)
+
+#define FOREACH_REGISTER_FILE(file) for (;;)
+
+// From jit.h
+
+#define DECLARE_TYPED_ENUM(tag,baseType) enum tag : baseType
+
+#define END_DECLARE_TYPED_ENUM(tag,baseType) ;
+
+#define INDEBUG(x) x
+#define INDEBUG_COMMA(x) x,
+#define DEBUGARG(x) , x
+
+#define PROTO_ARG(x) x ,
+#define PROTO_ARGL(x) , x \ No newline at end of file
diff --git a/src/jit/crossgen/.gitmirror b/src/jit/crossgen/.gitmirror
new file mode 100644
index 0000000000..f507630f94
--- /dev/null
+++ b/src/jit/crossgen/.gitmirror
@@ -0,0 +1 @@
+Only contents of this folder, excluding subfolders, will be mirrored by the Git-TFS Mirror. \ No newline at end of file
diff --git a/src/jit/crossgen/CMakeLists.txt b/src/jit/crossgen/CMakeLists.txt
new file mode 100644
index 0000000000..f79d9e72ce
--- /dev/null
+++ b/src/jit/crossgen/CMakeLists.txt
@@ -0,0 +1,7 @@
+include(${CLR_DIR}/crossgen.cmake)
+
+if(CLR_CMAKE_TARGET_ARCH_I386 OR CLR_CMAKE_TARGET_ARCH_ARM)
+ add_definitions(-DLEGACY_BACKEND)
+endif()
+
+add_library_clr(${JIT_BASE_NAME}_crossgen ${SOURCES})
diff --git a/src/jit/crossgen/jit_crossgen.nativeproj b/src/jit/crossgen/jit_crossgen.nativeproj
new file mode 100644
index 0000000000..f8552dc2f5
--- /dev/null
+++ b/src/jit/crossgen/jit_crossgen.nativeproj
@@ -0,0 +1,20 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003" ToolsVersion="dogfood">
+
+ <!--Import the settings-->
+ <Import Project="$(_NTDRIVE)$(_NTROOT)\ndp\clr\xplat\SetCrossGen.props" />
+ <Import Project="$(_NTDRIVE)$(_NTROOT)\ndp\clr\clr.props" />
+
+ <PropertyGroup>
+ <BuildSysBinaries>true</BuildSysBinaries>
+ <OutputName>jit_crossgen</OutputName>
+ <FeatureMergeJitAndEngine>true</FeatureMergeJitAndEngine>
+ <TargetType>LIBRARY</TargetType>
+
+ <ClDefines Condition="'$(BuildArchitecture)' == 'i386'">$(ClDefines);LEGACY_BACKEND</ClDefines>
+ <ClDefines Condition="'$(BuildArchitecture)' == 'arm'">$(ClDefines);LEGACY_BACKEND</ClDefines>
+ </PropertyGroup>
+
+ <Import Project="..\jit.settings.targets" />
+
+</Project>
diff --git a/src/jit/dataflow.h b/src/jit/dataflow.h
new file mode 100644
index 0000000000..c9803a0cc1
--- /dev/null
+++ b/src/jit/dataflow.h
@@ -0,0 +1,81 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+//
+// This class is used to perform data flow optimizations.
+// An example usage would be:
+//
+// DataFlow flow(m_pCompiler);
+// flow.ForwardAnalysis(callback);
+//
+// The "callback" object needs to implement the necessary callback
+// functions that the "flow" object will call as the data flow
+// analysis progresses.
+//
+#pragma once
+
+#include "compiler.h"
+#include "jitstd.h"
+
+class DataFlow
+{
+private:
+ DataFlow();
+
+public:
+ // The callback interface that needs to be implemented by anyone
+ // needing updates by the dataflow object.
+ class Callback
+ {
+ public:
+ Callback(Compiler* pCompiler);
+
+ void StartMerge(BasicBlock* block);
+ void Merge(BasicBlock* block, BasicBlock* pred, flowList* preds);
+ bool EndMerge(BasicBlock* block);
+
+ private:
+ Compiler* m_pCompiler;
+ };
+
+ DataFlow(Compiler* pCompiler);
+
+ template <typename TCallback>
+ void ForwardAnalysis(TCallback& callback);
+
+private:
+ Compiler* m_pCompiler;
+};
+
+template <typename TCallback>
+void DataFlow::ForwardAnalysis(TCallback& callback)
+{
+ jitstd::list<BasicBlock*> worklist(jitstd::allocator<void>(m_pCompiler->getAllocator()));
+
+ worklist.insert(worklist.begin(), m_pCompiler->fgFirstBB);
+ while (!worklist.empty())
+ {
+ BasicBlock* block = *(worklist.begin());
+ worklist.erase(worklist.begin());
+
+ callback.StartMerge(block);
+ {
+ flowList* preds = m_pCompiler->BlockPredsWithEH(block);
+ for (flowList* pred = preds; pred; pred = pred->flNext)
+ {
+ callback.Merge(block, pred->flBlock, preds);
+ }
+ }
+
+ if (callback.EndMerge(block))
+ {
+ AllSuccessorIter succsBegin = block->GetAllSuccs(m_pCompiler).begin();
+ AllSuccessorIter succsEnd = block->GetAllSuccs(m_pCompiler).end();
+ for (AllSuccessorIter succ = succsBegin; succ != succsEnd; ++succ)
+ {
+ worklist.insert(worklist.end(), *succ);
+ }
+ }
+ }
+}
diff --git a/src/jit/decomposelongs.cpp b/src/jit/decomposelongs.cpp
new file mode 100644
index 0000000000..cf66487367
--- /dev/null
+++ b/src/jit/decomposelongs.cpp
@@ -0,0 +1,1028 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX DecomposeLongs XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX*/
+
+//
+// This file contains code to decompose 64-bit LONG operations on 32-bit platforms
+// into multiple single-register operations so individual register usage and requirements
+// are explicit for LSRA. The rationale behind this is to avoid adding code complexity
+// downstream caused by the introduction of handling longs as special cases,
+// especially in LSRA.
+//
+// Long decomposition happens on a statement immediately prior to more general
+// purpose lowering.
+//
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+#ifndef LEGACY_BACKEND // This file is ONLY used for the RyuJIT backend that uses the linear scan register allocator
+#ifndef _TARGET_64BIT_ // DecomposeLongs is only used on 32-bit platforms
+
+#include "decomposelongs.h"
+
+//------------------------------------------------------------------------
+// DecomposeLongs::PrepareForDecomposition:
+// Do one-time preparation required for LONG decomposition. Namely,
+// promote long variables to multi-register structs.
+//
+// Arguments:
+// None
+//
+// Return Value:
+// None.
+//
+void DecomposeLongs::PrepareForDecomposition()
+{
+ m_compiler->lvaPromoteLongVars();
+}
+
+//------------------------------------------------------------------------
+// DecomposeLongs::DecomposeBlock:
+// Do LONG decomposition on all the nodes in the given block. This must
+// be done before lowering the block, as decomposition can insert
+// additional nodes.
+//
+// Arguments:
+// block - the block to process
+//
+// Return Value:
+// None.
+//
+void DecomposeLongs::DecomposeBlock(BasicBlock* block)
+{
+ assert(block == m_compiler->compCurBB); // compCurBB must already be set.
+ assert(block->isEmpty() || block->IsLIR());
+
+ m_blockWeight = block->getBBWeight(m_compiler);
+ m_range = &LIR::AsRange(block);
+ DecomposeRangeHelper();
+}
+
+//------------------------------------------------------------------------
+// DecomposeLongs::DecomposeRange:
+// Do LONG decomposition on all the nodes in the given range. This must
+// be done before inserting a range of un-decomposed IR into a block
+// that has already been decomposed.
+//
+// Arguments:
+// compiler - The compiler context.
+// blockWeight - The weight of the block into which the range will be
+// inserted.
+// range - The range to decompose.
+//
+// Return Value:
+// None.
+//
+void DecomposeLongs::DecomposeRange(Compiler* compiler, unsigned blockWeight, LIR::Range& range)
+{
+ assert(compiler != nullptr);
+
+ DecomposeLongs decomposer(compiler);
+ decomposer.m_blockWeight = blockWeight;
+ decomposer.m_range = &range;
+
+ decomposer.DecomposeRangeHelper();
+}
+
+//------------------------------------------------------------------------
+// DecomposeLongs::DecomposeRangeHelper:
+// Decompiose each node in the current range.
+//
+// Decomposition is done as an execution-order walk. Decomposition of
+// a particular node can create new nodes that need to be further
+// decomposed at higher levels. That is, decomposition "bubbles up"
+// through dataflow.
+//
+void DecomposeLongs::DecomposeRangeHelper()
+{
+ assert(m_range != nullptr);
+
+ GenTree* node = Range().FirstNonPhiNode();
+ while (node != nullptr)
+ {
+ LIR::Use use;
+ if (!Range().TryGetUse(node, &use))
+ {
+ use = LIR::Use::GetDummyUse(Range(), node);
+ }
+
+ node = DecomposeNode(use);
+ }
+
+ assert(Range().CheckLIR(m_compiler));
+}
+
+//------------------------------------------------------------------------
+// DecomposeNode: Decompose long-type trees into lower and upper halves.
+//
+// Arguments:
+// use - the LIR::Use object for the def that needs to be decomposed.
+//
+// Return Value:
+// The next node to process.
+//
+GenTree* DecomposeLongs::DecomposeNode(LIR::Use& use)
+{
+ GenTree* tree = use.Def();
+
+ // Handle the case where we are implicitly using the lower half of a long lclVar.
+ if ((tree->TypeGet() == TYP_INT) && tree->OperIsLocal())
+ {
+ LclVarDsc* varDsc = m_compiler->lvaTable + tree->AsLclVarCommon()->gtLclNum;
+ if (varTypeIsLong(varDsc) && varDsc->lvPromoted)
+ {
+#ifdef DEBUG
+ if (m_compiler->verbose)
+ {
+ printf("Changing implicit reference to lo half of long lclVar to an explicit reference of its promoted "
+ "half:\n");
+ m_compiler->gtDispTreeRange(Range(), tree);
+ }
+#endif // DEBUG
+ m_compiler->lvaDecRefCnts(tree);
+ unsigned loVarNum = varDsc->lvFieldLclStart;
+ tree->AsLclVarCommon()->SetLclNum(loVarNum);
+ m_compiler->lvaIncRefCnts(tree);
+ return tree->gtNext;
+ }
+ }
+
+ if (tree->TypeGet() != TYP_LONG)
+ {
+ return tree->gtNext;
+ }
+
+#ifdef DEBUG
+ if (m_compiler->verbose)
+ {
+ printf("Decomposing TYP_LONG tree. BEFORE:\n");
+ m_compiler->gtDispTreeRange(Range(), tree);
+ }
+#endif // DEBUG
+
+ GenTree* nextNode = nullptr;
+ switch (tree->OperGet())
+ {
+ case GT_PHI:
+ case GT_PHI_ARG:
+ nextNode = tree->gtNext;
+ break;
+
+ case GT_LCL_VAR:
+ nextNode = DecomposeLclVar(use);
+ break;
+
+ case GT_LCL_FLD:
+ nextNode = DecomposeLclFld(use);
+ break;
+
+ case GT_STORE_LCL_VAR:
+ nextNode = DecomposeStoreLclVar(use);
+ break;
+
+ case GT_CAST:
+ nextNode = DecomposeCast(use);
+ break;
+
+ case GT_CNS_LNG:
+ nextNode = DecomposeCnsLng(use);
+ break;
+
+ case GT_CALL:
+ nextNode = DecomposeCall(use);
+ break;
+
+ case GT_RETURN:
+ assert(tree->gtOp.gtOp1->OperGet() == GT_LONG);
+ break;
+
+ case GT_STOREIND:
+ nextNode = DecomposeStoreInd(use);
+ break;
+
+ case GT_STORE_LCL_FLD:
+ assert(tree->gtOp.gtOp1->OperGet() == GT_LONG);
+ NYI("st.lclFld of of TYP_LONG");
+ break;
+
+ case GT_IND:
+ nextNode = DecomposeInd(use);
+ break;
+
+ case GT_NOT:
+ nextNode = DecomposeNot(use);
+ break;
+
+ case GT_NEG:
+ nextNode = DecomposeNeg(use);
+ break;
+
+ // Binary operators. Those that require different computation for upper and lower half are
+ // handled by the use of GetHiOper().
+ case GT_ADD:
+ case GT_SUB:
+ case GT_OR:
+ case GT_XOR:
+ case GT_AND:
+ nextNode = DecomposeArith(use);
+ break;
+
+ case GT_MUL:
+ NYI("Arithmetic binary operators on TYP_LONG - GT_MUL");
+ break;
+
+ case GT_DIV:
+ NYI("Arithmetic binary operators on TYP_LONG - GT_DIV");
+ break;
+
+ case GT_MOD:
+ NYI("Arithmetic binary operators on TYP_LONG - GT_MOD");
+ break;
+
+ case GT_UDIV:
+ NYI("Arithmetic binary operators on TYP_LONG - GT_UDIV");
+ break;
+
+ case GT_UMOD:
+ NYI("Arithmetic binary operators on TYP_LONG - GT_UMOD");
+ break;
+
+ case GT_LSH:
+ case GT_RSH:
+ case GT_RSZ:
+ nextNode = DecomposeShift(use);
+ break;
+
+ case GT_ROL:
+ case GT_ROR:
+ NYI("Arithmetic binary operators on TYP_LONG - ROTATE");
+ break;
+
+ case GT_MULHI:
+ NYI("Arithmetic binary operators on TYP_LONG - MULHI");
+ break;
+
+ case GT_LOCKADD:
+ case GT_XADD:
+ case GT_XCHG:
+ case GT_CMPXCHG:
+ NYI("Interlocked operations on TYP_LONG");
+ break;
+
+ default:
+ {
+ JITDUMP("Illegal TYP_LONG node %s in Decomposition.", GenTree::NodeName(tree->OperGet()));
+ noway_assert(!"Illegal TYP_LONG node in Decomposition.");
+ break;
+ }
+ }
+
+#ifdef DEBUG
+ if (m_compiler->verbose)
+ {
+ // NOTE: st_lcl_var doesn't dump properly afterwards.
+ printf("Decomposing TYP_LONG tree. AFTER:\n");
+ m_compiler->gtDispTreeRange(Range(), use.Def());
+ }
+#endif
+
+ return nextNode;
+}
+
+//------------------------------------------------------------------------
+// FinalizeDecomposition: A helper function to finalize LONG decomposition by
+// taking the resulting two halves of the decomposition, and tie them together
+// with a new GT_LONG node that will replace the original node.
+//
+// Arguments:
+// use - the LIR::Use object for the def that needs to be decomposed.
+// loResult - the decomposed low part
+// hiResult - the decomposed high part. This must follow loResult in the linear order,
+// as the new GT_LONG node will be inserted immediately after it.
+//
+// Return Value:
+// The next node to process.
+//
+GenTree* DecomposeLongs::FinalizeDecomposition(LIR::Use& use, GenTree* loResult, GenTree* hiResult)
+{
+ assert(use.IsInitialized());
+ assert(loResult != nullptr);
+ assert(hiResult != nullptr);
+ assert(Range().Contains(loResult));
+ assert(Range().Contains(hiResult));
+ assert(loResult->Precedes(hiResult));
+
+ GenTree* gtLong = new (m_compiler, GT_LONG) GenTreeOp(GT_LONG, TYP_LONG, loResult, hiResult);
+ Range().InsertAfter(hiResult, gtLong);
+
+ use.ReplaceWith(m_compiler, gtLong);
+
+ return gtLong->gtNext;
+}
+
+//------------------------------------------------------------------------
+// DecomposeLclVar: Decompose GT_LCL_VAR.
+//
+// Arguments:
+// use - the LIR::Use object for the def that needs to be decomposed.
+//
+// Return Value:
+// The next node to process.
+//
+GenTree* DecomposeLongs::DecomposeLclVar(LIR::Use& use)
+{
+ assert(use.IsInitialized());
+ assert(use.Def()->OperGet() == GT_LCL_VAR);
+
+ GenTree* tree = use.Def();
+ unsigned varNum = tree->AsLclVarCommon()->gtLclNum;
+ LclVarDsc* varDsc = m_compiler->lvaTable + varNum;
+ m_compiler->lvaDecRefCnts(tree);
+
+ GenTree* loResult = tree;
+ loResult->gtType = TYP_INT;
+
+ GenTree* hiResult = m_compiler->gtNewLclLNode(varNum, TYP_INT);
+ Range().InsertAfter(loResult, hiResult);
+
+ if (varDsc->lvPromoted)
+ {
+ assert(varDsc->lvFieldCnt == 2);
+ unsigned loVarNum = varDsc->lvFieldLclStart;
+ unsigned hiVarNum = loVarNum + 1;
+ loResult->AsLclVarCommon()->SetLclNum(loVarNum);
+ hiResult->AsLclVarCommon()->SetLclNum(hiVarNum);
+ }
+ else
+ {
+ noway_assert(varDsc->lvLRACandidate == false);
+
+ loResult->SetOper(GT_LCL_FLD);
+ loResult->AsLclFld()->gtLclOffs = 0;
+ loResult->AsLclFld()->gtFieldSeq = FieldSeqStore::NotAField();
+
+ hiResult->SetOper(GT_LCL_FLD);
+ hiResult->AsLclFld()->gtLclOffs = 4;
+ hiResult->AsLclFld()->gtFieldSeq = FieldSeqStore::NotAField();
+ }
+
+ m_compiler->lvaIncRefCnts(loResult);
+ m_compiler->lvaIncRefCnts(hiResult);
+
+ return FinalizeDecomposition(use, loResult, hiResult);
+}
+
+//------------------------------------------------------------------------
+// DecomposeLclFld: Decompose GT_LCL_FLD.
+//
+// Arguments:
+// use - the LIR::Use object for the def that needs to be decomposed.
+//
+// Return Value:
+// The next node to process.
+//
+GenTree* DecomposeLongs::DecomposeLclFld(LIR::Use& use)
+{
+ assert(use.IsInitialized());
+ assert(use.Def()->OperGet() == GT_LCL_FLD);
+
+ GenTree* tree = use.Def();
+ GenTreeLclFld* loResult = tree->AsLclFld();
+ loResult->gtType = TYP_INT;
+
+ GenTree* hiResult = m_compiler->gtNewLclFldNode(loResult->gtLclNum, TYP_INT, loResult->gtLclOffs + 4);
+ Range().InsertAfter(loResult, hiResult);
+
+ return FinalizeDecomposition(use, loResult, hiResult);
+}
+
+//------------------------------------------------------------------------
+// DecomposeStoreLclVar: Decompose GT_STORE_LCL_VAR.
+//
+// Arguments:
+// use - the LIR::Use object for the def that needs to be decomposed.
+//
+// Return Value:
+// The next node to process.
+//
+GenTree* DecomposeLongs::DecomposeStoreLclVar(LIR::Use& use)
+{
+ assert(use.IsInitialized());
+ assert(use.Def()->OperGet() == GT_STORE_LCL_VAR);
+
+ GenTree* tree = use.Def();
+ GenTree* rhs = tree->gtGetOp1();
+ if ((rhs->OperGet() == GT_PHI) || (rhs->OperGet() == GT_CALL))
+ {
+ // GT_CALLs are not decomposed, so will not be converted to GT_LONG
+ // GT_STORE_LCL_VAR = GT_CALL are handled in genMultiRegCallStoreToLocal
+ return tree->gtNext;
+ }
+
+ noway_assert(rhs->OperGet() == GT_LONG);
+ unsigned varNum = tree->AsLclVarCommon()->gtLclNum;
+ LclVarDsc* varDsc = m_compiler->lvaTable + varNum;
+ m_compiler->lvaDecRefCnts(tree);
+
+ GenTree* loRhs = rhs->gtGetOp1();
+ GenTree* hiRhs = rhs->gtGetOp2();
+ GenTree* hiStore = m_compiler->gtNewLclLNode(varNum, TYP_INT);
+
+ if (varDsc->lvPromoted)
+ {
+ assert(varDsc->lvFieldCnt == 2);
+
+ unsigned loVarNum = varDsc->lvFieldLclStart;
+ unsigned hiVarNum = loVarNum + 1;
+ tree->AsLclVarCommon()->SetLclNum(loVarNum);
+ hiStore->SetOper(GT_STORE_LCL_VAR);
+ hiStore->AsLclVarCommon()->SetLclNum(hiVarNum);
+ }
+ else
+ {
+ noway_assert(varDsc->lvLRACandidate == false);
+
+ tree->SetOper(GT_STORE_LCL_FLD);
+ tree->AsLclFld()->gtLclOffs = 0;
+ tree->AsLclFld()->gtFieldSeq = FieldSeqStore::NotAField();
+
+ hiStore->SetOper(GT_STORE_LCL_FLD);
+ hiStore->AsLclFld()->gtLclOffs = 4;
+ hiStore->AsLclFld()->gtFieldSeq = FieldSeqStore::NotAField();
+ }
+
+ // 'tree' is going to steal the loRhs node for itself, so we need to remove the
+ // GT_LONG node from the threading.
+ Range().Remove(rhs);
+
+ tree->gtOp.gtOp1 = loRhs;
+ tree->gtType = TYP_INT;
+
+ hiStore->gtOp.gtOp1 = hiRhs;
+ hiStore->gtFlags |= GTF_VAR_DEF;
+
+ m_compiler->lvaIncRefCnts(tree);
+ m_compiler->lvaIncRefCnts(hiStore);
+
+ Range().InsertAfter(tree, hiStore);
+
+ return hiStore->gtNext;
+}
+
+//------------------------------------------------------------------------
+// DecomposeCast: Decompose GT_CAST.
+//
+// Arguments:
+// use - the LIR::Use object for the def that needs to be decomposed.
+//
+// Return Value:
+// The next node to process.
+//
+GenTree* DecomposeLongs::DecomposeCast(LIR::Use& use)
+{
+ assert(use.IsInitialized());
+ assert(use.Def()->OperGet() == GT_CAST);
+
+ GenTree* tree = use.Def();
+ GenTree* loResult = nullptr;
+ GenTree* hiResult = nullptr;
+
+ assert(tree->gtPrev == tree->gtGetOp1());
+ NYI_IF(tree->gtOverflow(), "TYP_LONG cast with overflow");
+ switch (tree->AsCast()->CastFromType())
+ {
+ case TYP_INT:
+ if (tree->gtFlags & GTF_UNSIGNED)
+ {
+ loResult = tree->gtGetOp1();
+ Range().Remove(tree);
+
+ hiResult = new (m_compiler, GT_CNS_INT) GenTreeIntCon(TYP_INT, 0);
+ Range().InsertAfter(loResult, hiResult);
+ }
+ else
+ {
+ NYI("Lowering of signed cast TYP_INT->TYP_LONG");
+ }
+ break;
+
+ default:
+ NYI("Unimplemented type for Lowering of cast to TYP_LONG");
+ break;
+ }
+
+ return FinalizeDecomposition(use, loResult, hiResult);
+}
+
+//------------------------------------------------------------------------
+// DecomposeCnsLng: Decompose GT_CNS_LNG.
+//
+// Arguments:
+// use - the LIR::Use object for the def that needs to be decomposed.
+//
+// Return Value:
+// The next node to process.
+//
+GenTree* DecomposeLongs::DecomposeCnsLng(LIR::Use& use)
+{
+ assert(use.IsInitialized());
+ assert(use.Def()->OperGet() == GT_CNS_LNG);
+
+ GenTree* tree = use.Def();
+ INT32 hiVal = tree->AsLngCon()->HiVal();
+
+ GenTree* loResult = tree;
+ loResult->ChangeOperConst(GT_CNS_INT);
+ loResult->gtType = TYP_INT;
+
+ GenTree* hiResult = new (m_compiler, GT_CNS_INT) GenTreeIntCon(TYP_INT, hiVal);
+ Range().InsertAfter(loResult, hiResult);
+
+ return FinalizeDecomposition(use, loResult, hiResult);
+}
+
+//------------------------------------------------------------------------
+// DecomposeCall: Decompose GT_CALL.
+//
+// Arguments:
+// use - the LIR::Use object for the def that needs to be decomposed.
+//
+// Return Value:
+// The next node to process.
+//
+GenTree* DecomposeLongs::DecomposeCall(LIR::Use& use)
+{
+ assert(use.IsInitialized());
+ assert(use.Def()->OperGet() == GT_CALL);
+
+ // We only need to force var = call() if the call's result is used.
+ if (use.IsDummyUse())
+ return use.Def()->gtNext;
+
+ GenTree* user = use.User();
+ if (user->OperGet() == GT_STORE_LCL_VAR)
+ {
+ // If parent is already a STORE_LCL_VAR, we can skip it if
+ // it is already marked as lvIsMultiRegRet.
+ unsigned varNum = user->AsLclVarCommon()->gtLclNum;
+ if (m_compiler->lvaTable[varNum].lvIsMultiRegRet)
+ {
+ return use.Def()->gtNext;
+ }
+ else if (!m_compiler->lvaTable[varNum].lvPromoted)
+ {
+ // If var wasn't promoted, we can just set lvIsMultiRegRet.
+ m_compiler->lvaTable[varNum].lvIsMultiRegRet = true;
+ return use.Def()->gtNext;
+ }
+ }
+
+ GenTree* originalNode = use.Def();
+
+ // Otherwise, we need to force var = call()
+ unsigned varNum = use.ReplaceWithLclVar(m_compiler, m_blockWeight);
+ m_compiler->lvaTable[varNum].lvIsMultiRegRet = true;
+
+ // Decompose the new LclVar use
+ return DecomposeLclVar(use);
+}
+
+//------------------------------------------------------------------------
+// DecomposeStoreInd: Decompose GT_STOREIND.
+//
+// Arguments:
+// use - the LIR::Use object for the def that needs to be decomposed.
+//
+// Return Value:
+// The next node to process.
+//
+GenTree* DecomposeLongs::DecomposeStoreInd(LIR::Use& use)
+{
+ assert(use.IsInitialized());
+ assert(use.Def()->OperGet() == GT_STOREIND);
+
+ GenTree* tree = use.Def();
+
+ assert(tree->gtOp.gtOp2->OperGet() == GT_LONG);
+
+ // Example input (address expression omitted):
+ //
+ // t51 = const int 0x37C05E7D
+ // t154 = const int 0x2A0A3C80
+ // / --* t51 int
+ // + --* t154 int
+ // t155 = *gt_long long
+ // / --* t52 byref
+ // + --* t155 long
+ // * storeIndir long
+
+ GenTree* gtLong = tree->gtOp.gtOp2;
+
+ // Save address to a temp. It is used in storeIndLow and storeIndHigh trees.
+ LIR::Use address(Range(), &tree->gtOp.gtOp1, tree);
+ address.ReplaceWithLclVar(m_compiler, m_blockWeight);
+ JITDUMP("[DecomposeStoreInd]: Saving address tree to a temp var:\n");
+ DISPTREERANGE(Range(), address.Def());
+
+ if (!gtLong->gtOp.gtOp1->OperIsLeaf())
+ {
+ LIR::Use op1(Range(), &gtLong->gtOp.gtOp1, gtLong);
+ op1.ReplaceWithLclVar(m_compiler, m_blockWeight);
+ JITDUMP("[DecomposeStoreInd]: Saving low data tree to a temp var:\n");
+ DISPTREERANGE(Range(), op1.Def());
+ }
+
+ if (!gtLong->gtOp.gtOp2->OperIsLeaf())
+ {
+ LIR::Use op2(Range(), &gtLong->gtOp.gtOp2, gtLong);
+ op2.ReplaceWithLclVar(m_compiler, m_blockWeight);
+ JITDUMP("[DecomposeStoreInd]: Saving high data tree to a temp var:\n");
+ DISPTREERANGE(Range(), op2.Def());
+ }
+
+ GenTree* addrBase = tree->gtOp.gtOp1;
+ GenTree* dataHigh = gtLong->gtOp.gtOp2;
+ GenTree* dataLow = gtLong->gtOp.gtOp1;
+ GenTree* storeIndLow = tree;
+
+ Range().Remove(gtLong);
+ Range().Remove(dataHigh);
+ storeIndLow->gtOp.gtOp2 = dataLow;
+ storeIndLow->gtType = TYP_INT;
+
+ GenTree* addrBaseHigh = new (m_compiler, GT_LCL_VAR)
+ GenTreeLclVar(GT_LCL_VAR, addrBase->TypeGet(), addrBase->AsLclVarCommon()->GetLclNum(), BAD_IL_OFFSET);
+ GenTree* addrHigh =
+ new (m_compiler, GT_LEA) GenTreeAddrMode(TYP_REF, addrBaseHigh, nullptr, 0, genTypeSize(TYP_INT));
+ GenTree* storeIndHigh = new (m_compiler, GT_STOREIND) GenTreeStoreInd(TYP_INT, addrHigh, dataHigh);
+ storeIndHigh->gtFlags = (storeIndLow->gtFlags & (GTF_ALL_EFFECT | GTF_LIVENESS_MASK));
+ storeIndHigh->gtFlags |= GTF_REVERSE_OPS;
+
+ m_compiler->lvaIncRefCnts(addrBaseHigh);
+
+ Range().InsertAfter(storeIndLow, dataHigh, addrBaseHigh, addrHigh, storeIndHigh);
+
+ return storeIndHigh;
+
+ // Example final output:
+ //
+ // /--* t52 byref
+ // * st.lclVar byref V07 rat0
+ // t158 = lclVar byref V07 rat0
+ // t51 = const int 0x37C05E7D
+ // /--* t158 byref
+ // +--* t51 int
+ // * storeIndir int
+ // t154 = const int 0x2A0A3C80
+ // t159 = lclVar byref V07 rat0
+ // /--* t159 byref
+ // t160 = * lea(b + 4) ref
+ // /--* t154 int
+ // +--* t160 ref
+ // * storeIndir int
+}
+
+//------------------------------------------------------------------------
+// DecomposeInd: Decompose GT_IND.
+//
+// Arguments:
+// use - the LIR::Use object for the def that needs to be decomposed.
+//
+// Return Value:
+// The next node to process.
+//
+GenTree* DecomposeLongs::DecomposeInd(LIR::Use& use)
+{
+ GenTree* indLow = use.Def();
+
+ LIR::Use address(Range(), &indLow->gtOp.gtOp1, indLow);
+ address.ReplaceWithLclVar(m_compiler, m_blockWeight);
+ JITDUMP("[DecomposeInd]: Saving addr tree to a temp var:\n");
+ DISPTREERANGE(Range(), address.Def());
+
+ // Change the type of lower ind.
+ indLow->gtType = TYP_INT;
+
+ // Create tree of ind(addr+4)
+ GenTreePtr addrBase = indLow->gtGetOp1();
+ GenTreePtr addrBaseHigh = new (m_compiler, GT_LCL_VAR)
+ GenTreeLclVar(GT_LCL_VAR, addrBase->TypeGet(), addrBase->AsLclVarCommon()->GetLclNum(), BAD_IL_OFFSET);
+ GenTreePtr addrHigh =
+ new (m_compiler, GT_LEA) GenTreeAddrMode(TYP_REF, addrBaseHigh, nullptr, 0, genTypeSize(TYP_INT));
+ GenTreePtr indHigh = new (m_compiler, GT_IND) GenTreeIndir(GT_IND, TYP_INT, addrHigh, nullptr);
+
+ m_compiler->lvaIncRefCnts(addrBaseHigh);
+
+ Range().InsertAfter(indLow, addrBaseHigh, addrHigh, indHigh);
+
+ return FinalizeDecomposition(use, indLow, indHigh);
+}
+
+//------------------------------------------------------------------------
+// DecomposeNot: Decompose GT_NOT.
+//
+// Arguments:
+// use - the LIR::Use object for the def that needs to be decomposed.
+//
+// Return Value:
+// The next node to process.
+//
+GenTree* DecomposeLongs::DecomposeNot(LIR::Use& use)
+{
+ assert(use.IsInitialized());
+ assert(use.Def()->OperGet() == GT_NOT);
+
+ GenTree* tree = use.Def();
+ GenTree* gtLong = tree->gtGetOp1();
+ noway_assert(gtLong->OperGet() == GT_LONG);
+ GenTree* loOp1 = gtLong->gtGetOp1();
+ GenTree* hiOp1 = gtLong->gtGetOp2();
+
+ Range().Remove(gtLong);
+
+ GenTree* loResult = tree;
+ loResult->gtType = TYP_INT;
+ loResult->gtOp.gtOp1 = loOp1;
+
+ GenTree* hiResult = new (m_compiler, GT_NOT) GenTreeOp(GT_NOT, TYP_INT, hiOp1, nullptr);
+ Range().InsertAfter(loResult, hiResult);
+
+ return FinalizeDecomposition(use, loResult, hiResult);
+}
+
+//------------------------------------------------------------------------
+// DecomposeNeg: Decompose GT_NEG.
+//
+// Arguments:
+// use - the LIR::Use object for the def that needs to be decomposed.
+//
+// Return Value:
+// The next node to process.
+//
+GenTree* DecomposeLongs::DecomposeNeg(LIR::Use& use)
+{
+ assert(use.IsInitialized());
+ assert(use.Def()->OperGet() == GT_NEG);
+
+ GenTree* tree = use.Def();
+ GenTree* gtLong = tree->gtGetOp1();
+ noway_assert(gtLong->OperGet() == GT_LONG);
+
+ LIR::Use op1(Range(), &gtLong->gtOp.gtOp1, gtLong);
+ op1.ReplaceWithLclVar(m_compiler, m_blockWeight);
+
+ LIR::Use op2(Range(), &gtLong->gtOp.gtOp2, gtLong);
+ op2.ReplaceWithLclVar(m_compiler, m_blockWeight);
+
+ // Neither GT_NEG nor the introduced temporaries have side effects.
+ tree->gtFlags &= ~GTF_ALL_EFFECT;
+ GenTree* loOp1 = gtLong->gtGetOp1();
+ GenTree* hiOp1 = gtLong->gtGetOp2();
+
+ Range().Remove(gtLong);
+
+ GenTree* loResult = tree;
+ loResult->gtType = TYP_INT;
+ loResult->gtOp.gtOp1 = loOp1;
+
+ GenTree* zero = m_compiler->gtNewZeroConNode(TYP_INT);
+ GenTree* hiAdjust = m_compiler->gtNewOperNode(GT_ADD_HI, TYP_INT, hiOp1, zero);
+ GenTree* hiResult = m_compiler->gtNewOperNode(GT_NEG, TYP_INT, hiAdjust);
+ hiResult->gtFlags = tree->gtFlags;
+
+ Range().InsertAfter(loResult, zero, hiAdjust, hiResult);
+
+ return FinalizeDecomposition(use, loResult, hiResult);
+}
+
+//------------------------------------------------------------------------
+// DecomposeArith: Decompose GT_ADD, GT_SUB, GT_OR, GT_XOR, GT_AND.
+//
+// Arguments:
+// use - the LIR::Use object for the def that needs to be decomposed.
+//
+// Return Value:
+// The next node to process.
+//
+GenTree* DecomposeLongs::DecomposeArith(LIR::Use& use)
+{
+ assert(use.IsInitialized());
+
+ GenTree* tree = use.Def();
+ genTreeOps oper = tree->OperGet();
+
+ assert((oper == GT_ADD) || (oper == GT_SUB) || (oper == GT_OR) || (oper == GT_XOR) || (oper == GT_AND));
+
+ GenTree* op1 = tree->gtGetOp1();
+ GenTree* op2 = tree->gtGetOp2();
+
+ // Both operands must have already been decomposed into GT_LONG operators.
+ noway_assert((op1->OperGet() == GT_LONG) && (op2->OperGet() == GT_LONG));
+
+ // Capture the lo and hi halves of op1 and op2.
+ GenTree* loOp1 = op1->gtGetOp1();
+ GenTree* hiOp1 = op1->gtGetOp2();
+ GenTree* loOp2 = op2->gtGetOp1();
+ GenTree* hiOp2 = op2->gtGetOp2();
+
+ // Now, remove op1 and op2 from the node list.
+ Range().Remove(op1);
+ Range().Remove(op2);
+
+ // We will reuse "tree" for the loResult, which will now be of TYP_INT, and its operands
+ // will be the lo halves of op1 from above.
+ GenTree* loResult = tree;
+ loResult->SetOper(GetLoOper(oper));
+ loResult->gtType = TYP_INT;
+ loResult->gtOp.gtOp1 = loOp1;
+ loResult->gtOp.gtOp2 = loOp2;
+
+ GenTree* hiResult = new (m_compiler, oper) GenTreeOp(GetHiOper(oper), TYP_INT, hiOp1, hiOp2);
+ Range().InsertAfter(loResult, hiResult);
+
+ if ((oper == GT_ADD) || (oper == GT_SUB))
+ {
+ if (loResult->gtOverflow())
+ {
+ hiResult->gtFlags |= GTF_OVERFLOW;
+ loResult->gtFlags &= ~GTF_OVERFLOW;
+ }
+ if (loResult->gtFlags & GTF_UNSIGNED)
+ {
+ hiResult->gtFlags |= GTF_UNSIGNED;
+ }
+ }
+
+ return FinalizeDecomposition(use, loResult, hiResult);
+}
+
+//------------------------------------------------------------------------
+// DecomposeShift: Decompose GT_LSH, GT_RSH, GT_RSZ. For shift nodes, we need to use
+// the shift helper functions, so we here convert the shift into a helper call by
+// pulling its arguments out of linear order and making them the args to a call, then
+// replacing the original node with the new call.
+//
+// Arguments:
+// use - the LIR::Use object for the def that needs to be decomposed.
+//
+// Return Value:
+// The next node to process.
+//
+GenTree* DecomposeLongs::DecomposeShift(LIR::Use& use)
+{
+ assert(use.IsInitialized());
+
+ GenTree* tree = use.Def();
+ GenTree* gtLong = tree->gtGetOp1();
+ genTreeOps oper = tree->OperGet();
+
+ assert((oper == GT_LSH) || (oper == GT_RSH) || (oper == GT_RSZ));
+
+ LIR::Use loOp1Use(Range(), &gtLong->gtOp.gtOp1, gtLong);
+ loOp1Use.ReplaceWithLclVar(m_compiler, m_blockWeight);
+
+ LIR::Use hiOp1Use(Range(), &gtLong->gtOp.gtOp2, gtLong);
+ hiOp1Use.ReplaceWithLclVar(m_compiler, m_blockWeight);
+
+ LIR::Use shiftWidthUse(Range(), &tree->gtOp.gtOp2, tree);
+ shiftWidthUse.ReplaceWithLclVar(m_compiler, m_blockWeight);
+
+ GenTree* loOp1 = gtLong->gtGetOp1();
+ GenTree* hiOp1 = gtLong->gtGetOp2();
+
+ GenTree* shiftWidthOp = tree->gtGetOp2();
+
+ Range().Remove(gtLong);
+ Range().Remove(loOp1);
+ Range().Remove(hiOp1);
+
+ Range().Remove(shiftWidthOp);
+
+ // TODO-X86-CQ: If the shift operand is a GT_CNS_INT, we should pipe the instructions through to codegen
+ // and generate the shift instructions ourselves there, rather than replacing it with a helper call.
+
+ unsigned helper;
+
+ switch (oper)
+ {
+ case GT_LSH:
+ helper = CORINFO_HELP_LLSH;
+ break;
+ case GT_RSH:
+ helper = CORINFO_HELP_LRSH;
+ break;
+ case GT_RSZ:
+ helper = CORINFO_HELP_LRSZ;
+ break;
+ default:
+ unreached();
+ }
+
+ GenTreeArgList* argList = m_compiler->gtNewArgList(loOp1, hiOp1, shiftWidthOp);
+
+ GenTree* call = m_compiler->gtNewHelperCallNode(helper, TYP_LONG, 0, argList);
+
+ GenTreeCall* callNode = call->AsCall();
+ ReturnTypeDesc* retTypeDesc = callNode->GetReturnTypeDesc();
+ retTypeDesc->InitializeLongReturnType(m_compiler);
+
+ call = m_compiler->fgMorphArgs(callNode);
+ Range().InsertAfter(tree, LIR::SeqTree(m_compiler, call));
+
+ Range().Remove(tree);
+ use.ReplaceWith(m_compiler, call);
+ return call;
+}
+
+//------------------------------------------------------------------------
+// GetHiOper: Convert arithmetic operator to "high half" operator of decomposed node.
+//
+// Arguments:
+// oper - operator to map
+//
+// Return Value:
+// mapped operator
+//
+// static
+genTreeOps DecomposeLongs::GetHiOper(genTreeOps oper)
+{
+ switch (oper)
+ {
+ case GT_ADD:
+ return GT_ADD_HI;
+ break;
+ case GT_SUB:
+ return GT_SUB_HI;
+ break;
+ case GT_MUL:
+ return GT_MUL_HI;
+ break;
+ case GT_DIV:
+ return GT_DIV_HI;
+ break;
+ case GT_MOD:
+ return GT_MOD_HI;
+ break;
+ case GT_OR:
+ return GT_OR;
+ break;
+ case GT_AND:
+ return GT_AND;
+ break;
+ case GT_XOR:
+ return GT_XOR;
+ break;
+ default:
+ assert(!"GetHiOper called for invalid oper");
+ return GT_NONE;
+ }
+}
+
+//------------------------------------------------------------------------
+// GetLoOper: Convert arithmetic operator to "low half" operator of decomposed node.
+//
+// Arguments:
+// oper - operator to map
+//
+// Return Value:
+// mapped operator
+//
+// static
+genTreeOps DecomposeLongs::GetLoOper(genTreeOps oper)
+{
+ switch (oper)
+ {
+ case GT_ADD:
+ return GT_ADD_LO;
+ break;
+ case GT_SUB:
+ return GT_SUB_LO;
+ break;
+ case GT_OR:
+ return GT_OR;
+ break;
+ case GT_AND:
+ return GT_AND;
+ break;
+ case GT_XOR:
+ return GT_XOR;
+ break;
+ default:
+ assert(!"GetLoOper called for invalid oper");
+ return GT_NONE;
+ }
+}
+
+#endif // !_TARGET_64BIT_
+#endif // !LEGACY_BACKEND
diff --git a/src/jit/decomposelongs.h b/src/jit/decomposelongs.h
new file mode 100644
index 0000000000..af9b342fb2
--- /dev/null
+++ b/src/jit/decomposelongs.h
@@ -0,0 +1,67 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX DecomposeLongs XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#ifndef _DECOMPOSELONGS_H_
+#define _DECOMPOSELONGS_H_
+
+#include "compiler.h"
+
+class DecomposeLongs
+{
+public:
+ DecomposeLongs(Compiler* compiler) : m_compiler(compiler)
+ {
+ }
+
+ void PrepareForDecomposition();
+ void DecomposeBlock(BasicBlock* block);
+
+ static void DecomposeRange(Compiler* compiler, unsigned blockWeight, LIR::Range& range);
+
+private:
+ inline LIR::Range& Range() const
+ {
+ return *m_range;
+ }
+
+ // Driver functions
+ void DecomposeRangeHelper();
+ GenTree* DecomposeNode(LIR::Use& use);
+
+ // Per-node type decompose cases
+ GenTree* DecomposeLclVar(LIR::Use& use);
+ GenTree* DecomposeLclFld(LIR::Use& use);
+ GenTree* DecomposeStoreLclVar(LIR::Use& use);
+ GenTree* DecomposeCast(LIR::Use& use);
+ GenTree* DecomposeCnsLng(LIR::Use& use);
+ GenTree* DecomposeCall(LIR::Use& use);
+ GenTree* DecomposeInd(LIR::Use& use);
+ GenTree* DecomposeStoreInd(LIR::Use& use);
+ GenTree* DecomposeNot(LIR::Use& use);
+ GenTree* DecomposeNeg(LIR::Use& use);
+ GenTree* DecomposeArith(LIR::Use& use);
+ GenTree* DecomposeShift(LIR::Use& use);
+
+ // Helper functions
+ GenTree* FinalizeDecomposition(LIR::Use& use, GenTree* loResult, GenTree* hiResult);
+
+ static genTreeOps GetHiOper(genTreeOps oper);
+ static genTreeOps GetLoOper(genTreeOps oper);
+
+ // Data
+ Compiler* m_compiler;
+ unsigned m_blockWeight;
+ LIR::Range* m_range;
+};
+
+#endif // _DECOMPOSELONGS_H_
diff --git a/src/jit/delayload.cpp b/src/jit/delayload.cpp
new file mode 100644
index 0000000000..895a13a6bf
--- /dev/null
+++ b/src/jit/delayload.cpp
@@ -0,0 +1,10 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+#include "jitpch.h"
+#pragma hdrstop
+
+#include "shimload.h"
+
+ExternC PfnDliHook __pfnDliNotifyHook = ShimDelayLoadHook;
diff --git a/src/jit/disasm.cpp b/src/jit/disasm.cpp
new file mode 100644
index 0000000000..925f2c3343
--- /dev/null
+++ b/src/jit/disasm.cpp
@@ -0,0 +1,1568 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+/***********************************************************************
+*
+* File: dis.cpp
+*
+
+*
+* File Comments:
+*
+* This file handles disassembly. It is adapted from the MS linker.
+*
+***********************************************************************/
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+/*****************************************************************************/
+#ifdef LATE_DISASM
+/*****************************************************************************/
+
+// Define DISASM_DEBUG to get verbose output of late disassembler inner workings.
+//#define DISASM_DEBUG
+#ifdef DISASM_DEBUG
+#ifdef DEBUG
+#define DISASM_DUMP(...) \
+ if (VERBOSE) \
+ printf(__VA_ARGS__)
+#else // !DEBUG
+#define DISASM_DUMP(...) printf(__VA_ARGS__)
+#endif // !DEBUG
+#else // !DISASM_DEBUG
+#define DISASM_DUMP(...)
+#endif // !DISASM_DEBUG
+
+/*****************************************************************************/
+
+#define MAX_CLASSNAME_LENGTH 1024
+
+#if defined(_AMD64_)
+
+#pragma comment(linker, \
+ "/ALTERNATENAME:__imp_?CchFormatAddr@DIS@@QEBA_K_KPEAG0@Z=__imp_?CchFormatAddr@DIS@@QEBA_K_KPEA_W0@Z")
+#pragma comment(linker, \
+ "/ALTERNATENAME:__imp_?CchFormatInstr@DIS@@QEBA_KPEAG_K@Z=__imp_?CchFormatInstr@DIS@@QEBA_KPEA_W_K@Z")
+#pragma comment( \
+ linker, \
+ "/ALTERNATENAME:__imp_?PfncchaddrSet@DIS@@QEAAP6A_KPEBV1@_KPEAG1PEA_K@ZP6A_K01213@Z@Z=__imp_?PfncchaddrSet@DIS@@QEAAP6A_KPEBV1@_KPEA_W1PEA_K@ZP6A_K01213@Z@Z")
+#pragma comment( \
+ linker, \
+ "/ALTERNATENAME:__imp_?PfncchregSet@DIS@@QEAAP6A_KPEBV1@W4REGA@1@PEAG_K@ZP6A_K0123@Z@Z=__imp_?PfncchregSet@DIS@@QEAAP6A_KPEBV1@W4REGA@1@PEA_W_K@ZP6A_K0123@Z@Z")
+#pragma comment( \
+ linker, \
+ "/ALTERNATENAME:__imp_?PfncchregrelSet@DIS@@QEAAP6A_KPEBV1@W4REGA@1@KPEAG_KPEAK@ZP6A_K01K234@Z@Z=__imp_?PfncchregrelSet@DIS@@QEAAP6A_KPEBV1@W4REGA@1@KPEA_W_KPEAK@ZP6A_K01K234@Z@Z")
+#pragma comment( \
+ linker, \
+ "/ALTERNATENAME:__imp_?PfncchfixupSet@DIS@@QEAAP6A_KPEBV1@_K1PEAG1PEA_K@ZP6A_K011213@Z@Z=__imp_?PfncchfixupSet@DIS@@QEAAP6A_KPEBV1@_K1PEA_W1PEA_K@ZP6A_K011213@Z@Z")
+
+#elif defined(_X86_)
+
+#pragma comment(linker, "/ALTERNATENAME:__imp_?CchFormatAddr@DIS@@QBEI_KPAGI@Z=__imp_?CchFormatAddr@DIS@@QBEI_KPA_WI@Z")
+#pragma comment(linker, "/ALTERNATENAME:__imp_?CchFormatInstr@DIS@@QBEIPAGI@Z=__imp_?CchFormatInstr@DIS@@QBEIPA_WI@Z")
+#pragma comment( \
+ linker, \
+ "/ALTERNATENAME:__imp_?PfncchaddrSet@DIS@@QAEP6GIPBV1@_KPAGIPA_K@ZP6GI012I3@Z@Z=__imp_?PfncchaddrSet@DIS@@QAEP6GIPBV1@_KPA_WIPA_K@ZP6GI012I3@Z@Z")
+#pragma comment( \
+ linker, \
+ "/ALTERNATENAME:__imp_?PfncchregSet@DIS@@QAEP6GIPBV1@W4REGA@1@PAGI@ZP6GI012I@Z@Z=__imp_?PfncchregSet@DIS@@QAEP6GIPBV1@W4REGA@1@PA_WI@ZP6GI012I@Z@Z")
+#pragma comment( \
+ linker, \
+ "/ALTERNATENAME:__imp_?PfncchregrelSet@DIS@@QAEP6GIPBV1@W4REGA@1@KPAGIPAK@ZP6GI01K2I3@Z@Z=__imp_?PfncchregrelSet@DIS@@QAEP6GIPBV1@W4REGA@1@KPA_WIPAK@ZP6GI01K2I3@Z@Z")
+#pragma comment( \
+ linker, \
+ "/ALTERNATENAME:__imp_?PfncchfixupSet@DIS@@QAEP6GIPBV1@_KIPAGIPA_K@ZP6GI01I2I3@Z@Z=__imp_?PfncchfixupSet@DIS@@QAEP6GIPBV1@_KIPA_WIPA_K@ZP6GI01I2I3@Z@Z")
+
+#endif
+
+/*****************************************************************************
+ * Given an absolute address from the beginning of the code
+ * find the corresponding emitter block and the relative offset
+ * of the current address in that block
+ * Was used to get to the fixup list of each block. The new emitter has
+ * no such fixups. Something needs to be added for this.
+ */
+
+// These structs were defined in emit.h. Fake them here so DisAsm.cpp can compile
+
+typedef struct codeFix
+{
+ codeFix* cfNext;
+ unsigned cfFixup;
+} * codeFixPtr;
+
+typedef struct codeBlk
+{
+ codeFix* cbFixupLst;
+} * codeBlkPtr;
+
+/*****************************************************************************
+ * The following is the callback for jump label and direct function calls fixups.
+ * "addr" represents the address of jump that has to be
+ * replaced with a label or function name.
+ *
+ * Return 1 if a name was written representing the address, 0 otherwise.
+ */
+
+/* static */
+size_t __stdcall DisAssembler::disCchAddr(
+ const DIS* pdis, DIS::ADDR addr, __in_ecount(cchMax) wchar_t* wz, size_t cchMax, DWORDLONG* pdwDisp)
+{
+ DisAssembler* pDisAsm = (DisAssembler*)pdis->PvClient();
+ assert(pDisAsm);
+ return pDisAsm->disCchAddrMember(pdis, addr, wz, cchMax, pdwDisp);
+}
+
+size_t DisAssembler::disCchAddrMember(
+ const DIS* pdis, DIS::ADDR addr, __in_ecount(cchMax) wchar_t* wz, size_t cchMax, DWORDLONG* pdwDisp)
+{
+ /* First check the termination type of the instruction
+ * because this might be a helper or static function call
+ * check to see if we have a fixup for the current address */
+
+ size_t retval = 0; // assume we don't know
+
+#if defined(_TARGET_XARCH_)
+
+ DISX86::TRMTA terminationType = DISX86::TRMTA(pdis->Trmta());
+
+ DISASM_DUMP("AddrMember %p (%p), termType %u\n", addr, disGetLinearAddr((size_t)addr), terminationType);
+
+ switch (terminationType)
+ {
+ // int disCallSize;
+
+ case DISX86::trmtaJmpShort:
+ case DISX86::trmtaJmpCcShort:
+
+ /* We have a short jump in the current code block - generate the label to which we jump */
+
+ assert(0 <= disTarget && disTarget < disTotalCodeSize);
+ swprintf_s(wz, cchMax, W("short L_%02u"), disLabels[disTarget]);
+ retval = 1;
+ break;
+
+ case DISX86::trmtaJmpNear:
+ case DISX86::trmtaJmpCcNear:
+
+ /* We have a near jump. Check if is in the current code block.
+ * Otherwise we have no target for it. */
+
+ if (0 <= disTarget && disTarget < disTotalCodeSize)
+ {
+ swprintf_s(wz, cchMax, W("L_%02u"), disLabels[disTarget]);
+ retval = 1;
+ }
+ break;
+
+ case DISX86::trmtaCallNear16:
+ case DISX86::trmtaCallNear32:
+
+ /* check for local calls (i.e. CALL label) */
+
+ if (0 <= disTarget && disTarget < disTotalCodeSize)
+ {
+ /* not a "call ds:[0000]" - go ahead */
+ /* disTarget within block boundary -> local call */
+
+ swprintf_s(wz, cchMax, W("short L_%02u"), disLabels[disTarget]);
+ retval = 1;
+ break;
+ }
+
+ /* this is a near call - in our case usually VM helper functions */
+
+ /* find the emitter block and the offset of the call fixup */
+ /* for the fixup offset we have to add the opcode size for the call - in the case of a near call is 1 */
+
+ // disCallSize = 1;
+
+ {
+ size_t absoluteTarget = (size_t)disGetLinearAddr(disTarget);
+ const char* name = disGetMethodFullName(absoluteTarget);
+ if (name != nullptr)
+ {
+ swprintf_s(wz, cchMax, W("%p %S"), dspAddr(absoluteTarget), name);
+ retval = 1;
+ break;
+ }
+ }
+
+ break;
+
+#ifdef _TARGET_AMD64_
+
+ case DISX86::trmtaFallThrough:
+
+ /* memory indirect case. Could be for an LEA for the base address of a switch table, which is an arbitrary
+ * address, currently of the first block after the prolog. */
+
+ /* find the emitter block and the offset for the fixup
+ * "addr" is the address of the immediate */
+
+ break;
+
+#endif // _TARGET_AMD64_
+
+ default:
+
+ printf("Termination type is %d\n", (int)terminationType);
+ assert(!"treat this case\n");
+ break;
+ }
+
+#elif defined(_TARGET_ARM64_)
+
+ DISARM64::TRMTA terminationType = DISARM64::TRMTA(pdis->Trmta());
+
+ DISASM_DUMP("AddrMember %p (%p), termType %u\n", addr, disGetLinearAddr((size_t)addr), terminationType);
+
+ switch (terminationType)
+ {
+ // int disCallSize;
+
+ case DISARM64::TRMTA::trmtaBra:
+ case DISARM64::TRMTA::trmtaBraCase:
+ case DISARM64::TRMTA::trmtaBraCc:
+ case DISARM64::TRMTA::trmtaBraCcCase:
+ case DISARM64::TRMTA::trmtaBraCcInd:
+ case DISARM64::TRMTA::trmtaBraInd:
+
+ /* We have a jump. Check if is in the current code block.
+ * Otherwise we have no target for it. */
+
+ if (0 <= disTarget && disTarget < disTotalCodeSize)
+ {
+ swprintf_s(wz, cchMax, W("L_%02u"), disLabels[disTarget]);
+ retval = 1;
+ }
+ break;
+
+ case DISARM64::trmtaCall:
+ case DISARM64::trmtaCallCc:
+ case DISARM64::trmtaCallCcInd:
+ case DISARM64::trmtaCallInd:
+
+ /* check for local calls (i.e. CALL label) */
+
+ if (0 <= disTarget && disTarget < disTotalCodeSize)
+ {
+ /* not a "call [0000]" - go ahead */
+ /* disTarget within block boundary -> local call */
+
+ swprintf_s(wz, cchMax, W("L_%02u"), disLabels[disTarget]);
+ retval = 1;
+ break;
+ }
+
+ /* this is a near call - in our case usually VM helper functions */
+
+ /* find the emitter block and the offset of the call fixup */
+ /* for the fixup offset we have to add the opcode size for the call - in the case of a near call is 1 */
+
+ // disCallSize = 1;
+
+ {
+ size_t absoluteTarget = (size_t)disGetLinearAddr(disTarget);
+ const char* name = disGetMethodFullName(absoluteTarget);
+ if (name != nullptr)
+ {
+ swprintf_s(wz, cchMax, W("%p %S"), dspAddr(absoluteTarget), name);
+ retval = 1;
+ break;
+ }
+ }
+
+ break;
+
+ case DISARM64::trmtaFallThrough:
+
+ /* memory indirect case. Could be for an LEA for the base address of a switch table, which is an arbitrary
+ * address, currently of the first block after the prolog. */
+
+ /* find the emitter block and the offset for the fixup
+ * "addr" is the address of the immediate */
+
+ {
+ DIS::INSTRUCTION instr;
+ DIS::OPERAND ops[DISARM64::coperandMax];
+ bool ok = pdis->FDecode(&instr, ops, ArrLen(ops));
+ if (ok)
+ {
+ bool isAddress = false;
+ switch ((DISARM64::OPA)instr.opa)
+ {
+ case DISARM64::opaAdr:
+ case DISARM64::opaAdrp:
+ isAddress = true;
+ break;
+ default:
+ break;
+ }
+
+ if (isAddress && 0 <= addr && addr < disTotalCodeSize)
+ {
+ swprintf_s(wz, cchMax, W("L_%02u"), disLabels[addr]);
+ retval = 1;
+ }
+ }
+ }
+ break;
+
+ default:
+
+ printf("Termination type is %d\n", (int)terminationType);
+ assert(!"treat this case\n");
+ break;
+ }
+
+#else // _TARGET_*
+#error Unsupported or unset target architecture
+#endif // _TARGET_*
+
+ if (retval == 0)
+ {
+ if (disDiffable)
+ {
+ swprintf_s(wz, cchMax, W("%p"), dspAddr((void*)1));
+ }
+ }
+ else
+ {
+ /* no displacement */
+
+ *pdwDisp = 0x0;
+ }
+
+ return retval;
+}
+
+/*****************************************************************************
+ * We annotate some instructions to get info needed to display the symbols
+ * for that instruction.
+ *
+ * Return 1 if a name was written representing the address, 0 otherwise.
+ */
+
+/* static */
+size_t __stdcall DisAssembler::disCchFixup(
+ const DIS* pdis, DIS::ADDR addr, size_t size, __in_ecount(cchMax) wchar_t* wz, size_t cchMax, DWORDLONG* pdwDisp)
+{
+ DisAssembler* pDisAsm = (DisAssembler*)pdis->PvClient();
+ assert(pDisAsm);
+
+ return pDisAsm->disCchFixupMember(pdis, addr, size, wz, cchMax, pdwDisp);
+}
+
+size_t DisAssembler::disCchFixupMember(
+ const DIS* pdis, DIS::ADDR addr, size_t size, __in_ecount(cchMax) wchar_t* wz, size_t cchMax, DWORDLONG* pdwDisp)
+{
+#if defined(_TARGET_XARCH_)
+
+ DISX86::TRMTA terminationType = DISX86::TRMTA(pdis->Trmta());
+ // DIS::ADDR disIndAddr;
+
+ DISASM_DUMP("FixupMember %016I64X (%08IX), size %d, termType %u\n", addr, disGetLinearAddr((size_t)addr), size,
+ terminationType);
+
+ // Is there a relocation registered for the address?
+
+ size_t absoluteAddr = (size_t)disGetLinearAddr((size_t)addr);
+ size_t targetAddr;
+ bool anyReloc = GetRelocationMap()->Lookup(absoluteAddr, &targetAddr);
+
+ switch (terminationType)
+ {
+ DIS::ADDR disCallSize;
+
+ case DISX86::trmtaFallThrough:
+
+ /* memory indirect case */
+
+ assert(addr > pdis->Addr());
+
+ /* find the emitter block and the offset for the fixup
+ * "addr" is the address of the immediate */
+
+ if (anyReloc)
+ {
+ // Make instructions like "mov rcx, 7FE8247A638h" diffable.
+ swprintf_s(wz, cchMax, W("%IXh"), dspAddr(targetAddr));
+ break;
+ }
+
+ return 0;
+
+ case DISX86::trmtaJmpInd:
+
+ /* pretty rare case - something like "jmp [eax*4]"
+ * not a function call or anything worth annotating */
+
+ return 0;
+
+ case DISX86::trmtaTrap:
+ case DISX86::trmtaTrapCc:
+
+ /* some instructions like division have a TRAP termination type - ignore it */
+
+ return 0;
+
+ case DISX86::trmtaJmpShort:
+ case DISX86::trmtaJmpCcShort:
+
+ case DISX86::trmtaJmpNear:
+ case DISX86::trmtaJmpCcNear:
+
+ /* these are treated by the CchAddr callback - skip them */
+
+ return 0;
+
+ case DISX86::trmtaCallNear16:
+ case DISX86::trmtaCallNear32:
+
+ if (anyReloc)
+ {
+ const char* name = disGetMethodFullName(targetAddr);
+ if (name != nullptr)
+ {
+ swprintf_s(wz, cchMax, W("%p %S"), dspAddr(targetAddr), name);
+ break;
+ }
+ }
+
+ /* these are treated by the CchAddr callback - skip them */
+
+ return 0;
+
+ case DISX86::trmtaCallInd:
+
+ /* here we have an indirect call - find the indirect address */
+
+ // BYTE * code = disGetLinearAddr((size_t)addr);
+ // disIndAddr = (DIS::ADDR) (code+0);
+
+ /* find the size of the call opcode - less the immediate */
+ /* for the fixup offset we have to add the opcode size for the call */
+ /* addr is the address of the immediate, pdis->Addr() returns the address of the disassembled instruction */
+
+ assert(addr > pdis->Addr());
+ disCallSize = addr - pdis->Addr();
+
+ /* find the emitter block and the offset of the call fixup */
+
+ return 0;
+
+ default:
+
+ printf("Termination type is %d\n", (int)terminationType);
+ assert(!"treat this case\n");
+ break;
+ }
+
+#elif defined(_TARGET_ARM64_)
+
+ DISARM64::TRMTA terminationType = DISARM64::TRMTA(pdis->Trmta());
+ // DIS::ADDR disIndAddr;
+
+ DISASM_DUMP("FixupMember %016I64X (%08IX), size %d, termType %u\n", addr, disGetLinearAddr((size_t)addr), size,
+ terminationType);
+
+ // Is there a relocation registered for the address?
+
+ size_t absoluteAddr = (size_t)disGetLinearAddr((size_t)addr);
+ size_t targetAddr;
+ bool anyReloc = GetRelocationMap()->Lookup(absoluteAddr, &targetAddr);
+
+ switch (terminationType)
+ {
+ DIS::ADDR disCallSize;
+
+ case DISARM64::TRMTA::trmtaUnknown:
+ return 0;
+
+ case DISARM64::TRMTA::trmtaFallThrough:
+
+ if (anyReloc)
+ {
+ /* memory indirect case */
+
+ assert(addr > pdis->Addr());
+
+ /* find the emitter block and the offset for the fixup
+ * "addr" is the address of the immediate */
+
+ // Make instructions like "mov rcx, 7FE8247A638h" diffable.
+ swprintf_s(wz, cchMax, W("%IXh"), dspAddr(targetAddr));
+ break;
+ }
+
+ return 0;
+
+ case DISARM64::TRMTA::trmtaBraInd:
+ case DISARM64::TRMTA::trmtaBraCcInd:
+
+ /* pretty rare case - something like "jmp [eax*4]"
+ * not a function call or anything worth annotating */
+
+ return 0;
+
+ case DISARM64::TRMTA::trmtaTrap:
+ case DISARM64::TRMTA::trmtaTrapCc:
+
+ /* some instructions like division have a TRAP termination type - ignore it */
+
+ return 0;
+
+ case DISARM64::TRMTA::trmtaBra:
+ case DISARM64::TRMTA::trmtaBraCase:
+ case DISARM64::TRMTA::trmtaBraCc:
+ case DISARM64::TRMTA::trmtaBraCcCase:
+
+ /* these are treated by the CchAddr callback - skip them */
+
+ return 0;
+
+ case DISARM64::TRMTA::trmtaCall:
+ case DISARM64::TRMTA::trmtaCallCc:
+
+ if (anyReloc)
+ {
+ const char* name = disGetMethodFullName(targetAddr);
+ if (name != nullptr)
+ {
+ swprintf_s(wz, cchMax, W("%p %S"), dspAddr(targetAddr), name);
+ break;
+ }
+ }
+
+ /* these are treated by the CchAddr callback - skip them */
+
+ return 0;
+
+ case DISARM64::TRMTA::trmtaCallInd:
+ case DISARM64::TRMTA::trmtaCallCcInd:
+
+ /* here we have an indirect call - find the indirect address */
+
+ // BYTE * code = disGetLinearAddr((size_t)addr);
+ // disIndAddr = (DIS::ADDR) (code+0);
+
+ /* find the size of the call opcode - less the immediate */
+ /* for the fixup offset we have to add the opcode size for the call */
+ /* addr is the address of the immediate, pdis->Addr() returns the address of the disassembled instruction */
+
+ assert(addr > pdis->Addr());
+ disCallSize = addr - pdis->Addr();
+
+ /* find the emitter block and the offset of the call fixup */
+
+ return 0;
+
+ default:
+
+ printf("Termination type is %d\n", (int)terminationType);
+ assert(!"treat this case\n");
+ break;
+ }
+
+#else // _TARGET_*
+#error Unsupported or unset target architecture
+#endif // _TARGET_*
+
+ /* no displacement */
+
+ *pdwDisp = 0x0;
+
+ return 1;
+}
+
+/*****************************************************************************
+ * This the callback for register-relative operands in an instruction.
+ * If the register is ESP or EBP, the operand may be a local variable
+ * or a parameter, else the operand may be an instance variable
+ *
+ * Return 1 if a name was written representing the register-relative operand, 0 otherwise.
+ */
+
+/* static */
+size_t __stdcall DisAssembler::disCchRegRel(
+ const DIS* pdis, DIS::REGA reg, DWORD disp, __in_ecount(cchMax) wchar_t* wz, size_t cchMax, DWORD* pdwDisp)
+{
+ DisAssembler* pDisAsm = (DisAssembler*)pdis->PvClient();
+ assert(pDisAsm);
+
+ return pDisAsm->disCchRegRelMember(pdis, reg, disp, wz, cchMax, pdwDisp);
+}
+
+size_t DisAssembler::disCchRegRelMember(
+ const DIS* pdis, DIS::REGA reg, DWORD disp, __in_ecount(cchMax) wchar_t* wz, size_t cchMax, DWORD* pdwDisp)
+{
+#if defined(_TARGET_XARCH_)
+
+ DISX86::TRMTA terminationType = DISX86::TRMTA(pdis->Trmta());
+ // DIS::ADDR disIndAddr;
+
+ DISASM_DUMP("RegRelMember reg %u, disp %u, termType %u\n", reg, disp, terminationType);
+
+ switch (terminationType)
+ {
+ int disOpcodeSize;
+ const char* var;
+
+ case DISX86::trmtaFallThrough:
+
+ /* some instructions like division have a TRAP termination type - ignore it */
+
+ case DISX86::trmtaTrap:
+ case DISX86::trmtaTrapCc:
+
+ var = disComp->codeGen->siStackVarName((size_t)(pdis->Addr() - disStartAddr), pdis->Cb(), reg, disp);
+ if (var)
+ {
+ swprintf_s(wz, cchMax, W("%hs+%Xh '%hs'"), getRegName(reg), disp, var);
+ *pdwDisp = 0;
+
+ return 1;
+ }
+
+ /* This case consists of non-static members */
+
+ /* find the emitter block and the offset for the fixup
+ * fixup is emited after the coding of the instruction - size = word (2 bytes)
+ * GRRRR!!! - for the 16 bit case we have to check for the address size prefix = 0x66
+ */
+
+ if (*disGetLinearAddr(disCurOffset) == 0x66)
+ {
+ disOpcodeSize = 3;
+ }
+ else
+ {
+ disOpcodeSize = 2;
+ }
+
+ return 0;
+
+ case DISX86::trmtaCallNear16:
+ case DISX86::trmtaCallNear32:
+ case DISX86::trmtaJmpInd:
+
+ break;
+
+ case DISX86::trmtaCallInd:
+
+ /* check if this is a one byte displacement */
+
+ if ((signed char)disp == (int)disp)
+ {
+ /* we have a one byte displacement -> there were no previous callbacks */
+
+ /* find the size of the call opcode - less the immediate */
+ /* this is a call R/M indirect -> opcode size is 2 */
+
+ disOpcodeSize = 2;
+
+ /* find the emitter block and the offset of the call fixup */
+
+ return 0;
+ }
+ else
+ {
+ /* check if we already have a symbol name as replacement */
+
+ if (disHasName)
+ {
+ /* CchFixup has been called before - we have a symbol name saved in global var disFuncTempBuf */
+
+ swprintf_s(wz, cchMax, W("%hs+%u '%hs'"), getRegName(reg), disp, disFuncTempBuf);
+ *pdwDisp = 0;
+ disHasName = false;
+ return 1;
+ }
+ else
+ {
+ return 0;
+ }
+ }
+
+ default:
+
+ printf("Termination type is %d\n", (int)terminationType);
+ assert(!"treat this case\n");
+
+ break;
+ }
+
+#elif defined(_TARGET_ARM64_)
+
+ DISARM64::TRMTA terminationType = DISARM64::TRMTA(pdis->Trmta());
+
+ DISASM_DUMP("RegRelMember reg %u, disp %u, termType %u\n", reg, disp, terminationType);
+
+ switch (terminationType)
+ {
+ int disOpcodeSize;
+ const char* var;
+
+ case DISARM64::TRMTA::trmtaFallThrough:
+
+ /* some instructions like division have a TRAP termination type - ignore it */
+
+ case DISARM64::TRMTA::trmtaTrap:
+ case DISARM64::TRMTA::trmtaTrapCc:
+
+ var = disComp->codeGen->siStackVarName((size_t)(pdis->Addr() - disStartAddr), pdis->Cb(), reg, disp);
+ if (var)
+ {
+ swprintf_s(wz, cchMax, W("%hs+%Xh '%hs'"), getRegName(reg), disp, var);
+ *pdwDisp = 0;
+
+ return 1;
+ }
+
+ /* This case consists of non-static members */
+
+ // TODO-ARM64-Bug?: Is this correct?
+ disOpcodeSize = 2;
+ return 0;
+
+ case DISARM64::TRMTA::trmtaCall:
+ case DISARM64::TRMTA::trmtaCallCc:
+ case DISARM64::TRMTA::trmtaBraInd:
+ case DISARM64::TRMTA::trmtaBraCcInd:
+ break;
+
+ case DISARM64::TRMTA::trmtaCallInd:
+ case DISARM64::TRMTA::trmtaCallCcInd:
+
+ /* check if this is a one byte displacement */
+
+ if ((signed char)disp == (int)disp)
+ {
+ /* we have a one byte displacement -> there were no previous callbacks */
+
+ /* find the size of the call opcode - less the immediate */
+ /* this is a call R/M indirect -> opcode size is 2 */
+
+ // TODO-ARM64-Bug?: Is this correct?
+ disOpcodeSize = 2;
+
+ /* find the emitter block and the offset of the call fixup */
+
+ return 0;
+ }
+ else
+ {
+ /* check if we already have a symbol name as replacement */
+
+ if (disHasName)
+ {
+ /* CchFixup has been called before - we have a symbol name saved in global var disFuncTempBuf */
+
+ swprintf_s(wz, cchMax, W("%hs+%u '%hs'"), getRegName(reg), disp, disFuncTempBuf);
+ *pdwDisp = 0;
+ disHasName = false;
+ return 1;
+ }
+ else
+ {
+ return 0;
+ }
+ }
+
+ default:
+
+ printf("Termination type is %d\n", (int)terminationType);
+ assert(!"treat this case\n");
+
+ break;
+ }
+
+#else // _TARGET_*
+#error Unsupported or unset target architecture
+#endif // _TARGET_*
+
+ /* save displacement */
+
+ *pdwDisp = disp;
+
+ return 1;
+}
+
+/*****************************************************************************
+ *
+ * Callback for register operands. Most probably, this is a local variable or
+ * a parameter
+ *
+ * Return 1 if a name was written representing the register, 0 otherwise.
+ */
+
+/* static */
+size_t __stdcall DisAssembler::disCchReg(const DIS* pdis, DIS::REGA reg, __in_ecount(cchMax) wchar_t* wz, size_t cchMax)
+{
+ DisAssembler* pDisAsm = (DisAssembler*)pdis->PvClient();
+ assert(pDisAsm);
+
+ return pDisAsm->disCchRegMember(pdis, reg, wz, cchMax);
+}
+
+size_t DisAssembler::disCchRegMember(const DIS* pdis, DIS::REGA reg, __in_ecount(cchMax) wchar_t* wz, size_t cchMax)
+{
+ // TODO-Review: DIS::REGA does not directly map to our regNumber! E.g., look at DISARM64::REGA --
+ // the Wt registers come first (and do map to our regNumber), but the Xt registers follow.
+ // Until this is fixed, don't use this function!
+ disHasName = false;
+ return 0;
+
+#if 0
+ const char * var = disComp->codeGen->siRegVarName(
+ (size_t)(pdis->Addr() - disStartAddr),
+ pdis->Cb(),
+ reg);
+
+ if (var)
+ {
+ if (disHasName)
+ {
+ /* CchRegRel has been called before - we have a symbol name saved in global var disFuncTempBuf */
+
+ swprintf_s(wz, cchMax, W("%hs'%hs.%hs'"), getRegName(reg), var, disFuncTempBuf);
+ disHasName = false;
+ return 1;
+ }
+ else
+ {
+ swprintf_s(wz, cchMax, W("%hs'%hs'"), getRegName(reg), var);
+ return 1;
+ }
+ }
+ else
+ {
+ if (disHasName)
+ {
+ /* this is the ugly case when a variable is incorrectly presumed dead */
+
+ swprintf_s(wz, cchMax, W("%hs'%hs.%hs'"), getRegName(reg), "<InstVar>", disFuncTempBuf);
+ disHasName = false;
+ return 1;
+ }
+
+ /* just to make sure we didn't bungle if var returns NULL */
+ disHasName = false;
+ return 0;
+ }
+#endif // 0
+}
+
+/*****************************************************************************
+ * Helper function to lazily create a map from code address to CORINFO_METHOD_HANDLE.
+ */
+AddrToMethodHandleMap* DisAssembler::GetAddrToMethodHandleMap()
+{
+ if (disAddrToMethodHandleMap == nullptr)
+ {
+ assert(disComp->getAllocator() != nullptr);
+ disAddrToMethodHandleMap = new (disComp->getAllocator()) AddrToMethodHandleMap(disComp->getAllocator());
+ }
+ return disAddrToMethodHandleMap;
+}
+
+/*****************************************************************************
+ * Helper function to lazily create a map from code address to CORINFO_METHOD_HANDLE.
+ */
+AddrToMethodHandleMap* DisAssembler::GetHelperAddrToMethodHandleMap()
+{
+ if (disHelperAddrToMethodHandleMap == nullptr)
+ {
+ assert(disComp->getAllocator() != nullptr);
+ disHelperAddrToMethodHandleMap = new (disComp->getAllocator()) AddrToMethodHandleMap(disComp->getAllocator());
+ }
+ return disHelperAddrToMethodHandleMap;
+}
+
+/*****************************************************************************
+ * Helper function to lazily create a map from relocation address to relocation target address.
+ */
+AddrToAddrMap* DisAssembler::GetRelocationMap()
+{
+ if (disRelocationMap == nullptr)
+ {
+ assert(disComp->getAllocator() != nullptr);
+ disRelocationMap = new (disComp->getAllocator()) AddrToAddrMap(disComp->getAllocator());
+ }
+ return disRelocationMap;
+}
+
+/*****************************************************************************
+ * Return the count of bytes disassembled.
+ */
+
+size_t DisAssembler::CbDisassemble(DIS* pdis,
+ size_t offs,
+ DIS::ADDR addr,
+ const BYTE* pb,
+ size_t cbMax,
+ FILE* pfile,
+ bool findLabels,
+ bool printit /* = false */,
+ bool dispOffs /* = false */,
+ bool dispCodeBytes /* = false */)
+{
+ assert(pdis);
+
+ size_t cb = pdis->CbDisassemble(addr, pb, cbMax);
+
+ if (cb == 0)
+ {
+ DISASM_DUMP("CbDisassemble offs %Iu addr %I64u\n", offs, addr);
+ // assert(!"can't disassemble instruction!!!");
+ fprintf(pfile, "MSVCDIS can't disassemble instruction @ offset %Iu (0x%02x)!!!\n", offs, offs);
+#if defined(_TARGET_ARM64_)
+ fprintf(pfile, "%08Xh\n", *(unsigned int*)pb);
+ return 4;
+#else
+ fprintf(pfile, "%02Xh\n", *pb);
+ return 1;
+#endif
+ }
+
+#if defined(_TARGET_ARM64_)
+ assert(cb == 4); // all instructions are 4 bytes!
+#endif // _TARGET_ARM64_
+
+ /* remember current offset and instruction size */
+
+ disCurOffset = (size_t)addr;
+ disInstSize = cb;
+
+ /* Set the disTarget address */
+
+ disTarget = (size_t)pdis->AddrTarget();
+
+ if (findLabels)
+ {
+#if defined(_TARGET_XARCH_)
+ DISX86::TRMTA terminationType = DISX86::TRMTA(pdis->Trmta());
+
+ /* check the termination type of the instruction */
+
+ switch (terminationType)
+ {
+ case DISX86::trmtaCallNear16:
+ case DISX86::trmtaCallNear32:
+ case DISX86::trmtaCallFar:
+
+ {
+ // Don't count addresses in the relocation table
+ size_t targetAddr;
+ size_t absoluteAddr =
+ (size_t)disGetLinearAddr((size_t)pdis->AddrAddress(1)); // Get the address in the instruction of the
+ // call target address (the address the
+ // reloc is applied to).
+ if (GetRelocationMap()->Lookup(absoluteAddr, &targetAddr))
+ {
+ break;
+ }
+ }
+
+ __fallthrough;
+
+ case DISX86::trmtaJmpShort:
+ case DISX86::trmtaJmpNear:
+ case DISX86::trmtaJmpFar:
+ case DISX86::trmtaJmpCcShort:
+ case DISX86::trmtaJmpCcNear:
+
+ /* a CALL is local iff the disTarget is within the block boundary */
+
+ /* mark the jump label in the disTarget vector and return */
+
+ if (disTarget != DIS::addrNil) // There seems to be an assumption that you can't branch to the first
+ // address of the function (prolog).
+ {
+ if (0 <= disTarget && disTarget < disTotalCodeSize)
+ {
+ /* we're OK, disTarget within block boundary */
+
+ disLabels[disTarget] = 1;
+ }
+ }
+ break;
+
+ case DISX86::trmtaFallThrough:
+ // We'd like to be able to get a label for code like "lea rcx, [4]" that we use for jump tables, but I
+ // can't figure out how.
+ break;
+
+ default:
+
+ /* jump is not in the current code block */
+ break;
+
+ } // end switch
+#elif defined(_TARGET_ARM64_)
+ DISARM64::TRMTA terminationType = DISARM64::TRMTA(pdis->Trmta());
+
+ /* check the termination type of the instruction */
+
+ switch (terminationType)
+ {
+ case DISARM64::TRMTA::trmtaCall:
+ case DISARM64::TRMTA::trmtaCallCc:
+
+ {
+ // Don't count addresses in the relocation table
+ size_t targetAddr;
+ size_t absoluteAddr =
+ (size_t)disGetLinearAddr((size_t)pdis->AddrAddress(1)); // Get the address in the instruction of the
+ // call target address (the address the
+ // reloc is applied to).
+ if (GetRelocationMap()->Lookup(absoluteAddr, &targetAddr))
+ {
+ break;
+ }
+ }
+
+ __fallthrough;
+
+ case DISARM64::TRMTA::trmtaBra:
+ case DISARM64::TRMTA::trmtaBraCase:
+ case DISARM64::TRMTA::trmtaBraCc:
+ case DISARM64::TRMTA::trmtaBraCcCase:
+
+ /* a CALL is local iff the disTarget is within the block boundary */
+
+ /* mark the jump label in the disTarget vector and return */
+
+ if (disTarget != DIS::addrNil) // There seems to be an assumption that you can't branch to the first
+ // address of the function (prolog).
+ {
+ if (0 <= disTarget && disTarget < disTotalCodeSize)
+ {
+ /* we're OK, disTarget within block boundary */
+
+ disLabels[disTarget] = 1;
+ }
+ }
+ break;
+
+ case DISARM64::TRMTA::trmtaFallThrough:
+ {
+ DIS::INSTRUCTION instr;
+ DIS::OPERAND ops[DISARM64::coperandMax];
+ bool ok = pdis->FDecode(&instr, ops, ArrLen(ops));
+ if (ok)
+ {
+ switch ((DISARM64::OPA)instr.opa)
+ {
+ case DISARM64::opaAdr:
+ case DISARM64::opaAdrp:
+ // operand 1 is an address
+ assert(instr.coperand >= 2);
+ assert(ops[1].opcls == DIS::opclsImmediate);
+ assert(ops[1].imcls == DIS::imclsAddress);
+ disTarget = ops[1].dwl;
+ break;
+ default:
+ break;
+ }
+
+ if (0 <= disTarget && disTarget < disTotalCodeSize)
+ {
+ /* we're OK, disTarget within block boundary */
+
+ disLabels[disTarget] = 1;
+ }
+ }
+ }
+ break;
+
+ default:
+
+ /* jump is not in the current code block */
+ break;
+
+ } // end switch
+#else // _TARGET_*
+#error Unsupported or unset target architecture
+#endif // _TARGET_*
+
+ return cb;
+ } // end if
+
+ /* check if we have a label here */
+
+ if (printit)
+ {
+ if (disLabels[addr])
+ {
+ /* print the label and the offset */
+
+ fprintf(pfile, "L_%02u:\n", disLabels[addr]);
+ }
+ }
+
+ wchar_t wz[MAX_CLASSNAME_LENGTH];
+ pdis->CchFormatInstr(wz, sizeof(wz) / sizeof(wz[0]));
+
+ if (printit)
+ {
+ if (dispOffs)
+ {
+ fprintf(pfile, "%03X", offs);
+ }
+
+#ifdef _TARGET_ARM64_
+#define CCH_INDENT 8 // fixed sized instructions, always 8 characters
+#elif defined(_TARGET_AMD64_)
+#define CCH_INDENT 30 // large constants sometimes
+#else
+#define CCH_INDENT 24
+#endif
+
+ size_t cchIndent = CCH_INDENT;
+
+ if (dispCodeBytes)
+ {
+ static size_t cchBytesMax = -1;
+
+ if (cchBytesMax == -1)
+ {
+ cchBytesMax = pdis->CchFormatBytesMax();
+ }
+
+ wchar_t wzBytes[MAX_CLASSNAME_LENGTH];
+ assert(cchBytesMax < MAX_CLASSNAME_LENGTH);
+
+ size_t cchBytes = pdis->CchFormatBytes(wzBytes, sizeof(wzBytes) / sizeof(wzBytes[0]));
+
+ if (cchBytes > CCH_INDENT)
+ {
+ // Truncate the bytes if they are too long
+
+ static const wchar_t* elipses = W("...\0");
+ const size_t cchElipses = 4;
+
+ memcpy(&wzBytes[CCH_INDENT - cchElipses], elipses, cchElipses * sizeof(wchar_t));
+
+ cchBytes = CCH_INDENT;
+ }
+
+ fprintf(pfile, " %ls", wzBytes);
+ cchIndent = CCH_INDENT - cchBytes;
+ }
+
+ // print the dis-assembled instruction
+
+ fprintf(pfile, "%*c %ls\n", cchIndent, ' ', wz);
+ }
+
+ return cb;
+}
+
+// TODO-Cleanup: this is currently unused, unreferenced.
+size_t CbDisassembleWithBytes(DIS* pdis, DIS::ADDR addr, const BYTE* pb, size_t cbMax, FILE* pfile)
+{
+ assert(pdis);
+ DisAssembler* pDisAsm = (DisAssembler*)pdis->PvClient();
+ assert(pDisAsm);
+
+ wchar_t wz[MAX_CLASSNAME_LENGTH];
+
+ pdis->CchFormatAddr(addr, wz, sizeof(wz) / sizeof(wz[0]));
+
+ size_t cchIndent = (size_t)fprintf(pfile, " %ls: ", wz);
+
+ size_t cb = pdis->CbDisassemble(addr, pb, cbMax);
+
+ if (cb == 0)
+ {
+ fprintf(pfile, "%02Xh\n", *pb);
+ return (1);
+ }
+
+ size_t cchBytesMax = pdis->CchFormatBytesMax();
+
+ if (cchBytesMax > 18)
+ {
+ // Limit bytes coded to 18 characters
+
+ cchBytesMax = 18;
+ }
+
+ wchar_t wzBytes[64];
+ size_t cchBytes = pdis->CchFormatBytes(wzBytes, sizeof(wzBytes) / sizeof(wzBytes[0]));
+
+ wchar_t* pwzBytes;
+ wchar_t* pwzNext;
+
+ for (pwzBytes = wzBytes; pwzBytes != NULL; pwzBytes = pwzNext)
+ {
+ BOOL fFirst = (pwzBytes == wzBytes);
+
+ cchBytes = wcslen(pwzBytes);
+
+ if (cchBytes <= cchBytesMax)
+ {
+ pwzNext = NULL;
+ }
+
+ else
+ {
+ wchar_t ch = pwzBytes[cchBytesMax];
+ pwzBytes[cchBytesMax] = '\0';
+
+ if (ch == W(' '))
+ {
+ pwzNext = pwzBytes + cchBytesMax + 1;
+ }
+
+ else
+ {
+ pwzNext = wcsrchr(pwzBytes, W(' '));
+ assert(pwzNext);
+
+ pwzBytes[cchBytesMax] = ch;
+ *pwzNext++ = '\0';
+ }
+ }
+
+ if (fFirst)
+ {
+ pdis->CchFormatInstr(wz, sizeof(wz) / sizeof(wz[0]));
+ fprintf(pfile, "%-*ls %ls\n", cchBytesMax, pwzBytes, wz);
+ }
+
+ else
+ {
+ fprintf(pfile, "%*c%ls\n", cchIndent, ' ', pwzBytes);
+ }
+ }
+
+ return (cb);
+}
+
+void DisAssembler::DisasmBuffer(FILE* pfile, bool printit)
+{
+ DIS* pdis = NULL;
+
+#ifdef _TARGET_X86_
+ pdis = DIS::PdisNew(DIS::distX86);
+#elif defined(_TARGET_AMD64_)
+ pdis = DIS::PdisNew(DIS::distX8664);
+#elif defined(_TARGET_ARM64_)
+ pdis = DIS::PdisNew(DIS::distArm64);
+#else // _TARGET_*
+#error Unsupported or unset target architecture
+#endif
+
+ if (pdis == NULL)
+ {
+ assert(!"out of memory in disassembler?");
+ return;
+ }
+
+#ifdef _TARGET_64BIT_
+ pdis->SetAddr64(true);
+#endif
+
+ // Store a pointer to the DisAssembler so that the callback functions
+ // can get to it.
+
+ pdis->PvClientSet((void*)this);
+
+ /* Calculate addresses */
+
+ size_t ibCur = 0;
+ DIS::ADDR addr = 0; // Always emit code with respect to a "0" base address.
+
+ /* First walk the code to find all jump targets */
+
+ while (ibCur < disTotalCodeSize)
+ {
+ size_t cb;
+
+ cb = CbDisassemble(pdis, ibCur, addr + ibCur, disGetLinearAddr(ibCur), disGetBufferSize(ibCur), pfile,
+ true); // find labels
+
+ // CbDisassemble returning > MAX_INT... give me a break.
+ ibCur += cb;
+ }
+
+ /* reset the label counter and start assigning consecutive number labels to the label locations */
+
+ BYTE label = 0;
+ for (unsigned i = 0; i < disTotalCodeSize; i++)
+ {
+ if (disLabels[i] != 0)
+ {
+ disLabels[i] = ++label;
+ }
+ }
+
+ /* Re-initialize addresses for disassemble phase */
+
+ ibCur = 0;
+ addr = 0;
+
+ // Set callbacks only if we are displaying it. Else, the scheduler has called it
+
+ if (printit)
+ {
+ /* Set the callback functions for symbol lookup */
+
+ pdis->PfncchaddrSet(disCchAddr);
+ pdis->PfncchfixupSet(disCchFixup);
+ pdis->PfncchregrelSet(disCchRegRel);
+ pdis->PfncchregSet(disCchReg);
+ }
+
+ while (ibCur < disTotalCodeSize)
+ {
+ size_t cb;
+
+ cb = CbDisassemble(pdis, ibCur, addr + ibCur, disGetLinearAddr(ibCur), disGetBufferSize(ibCur), pfile,
+ false, // find labels
+ printit,
+ !disDiffable, // display relative offset
+#ifdef DEBUG
+ !disDiffable // Display code bytes?
+#else
+ false // Display code bytes?
+#endif
+ );
+
+ ibCur += (unsigned)cb;
+ }
+
+ delete pdis;
+}
+
+/*****************************************************************************
+ * Given a linear offset into the code, find a pointer to the actual code (either in the hot or cold section)
+ *
+ * Arguments:
+ * offset - The linear offset into the code. It must point within the code.
+ */
+
+const BYTE* DisAssembler::disGetLinearAddr(size_t offset)
+{
+ if (offset < disHotCodeSize)
+ {
+ return (const BYTE*)disHotCodeBlock + offset;
+ }
+ else
+ {
+ return (const BYTE*)disColdCodeBlock + offset - disHotCodeSize;
+ }
+}
+
+/*****************************************************************************
+ * Given a linear offset into the code, determine how many bytes are remaining in the buffer.
+ * This will only return the number of bytes left in either the hot or cold buffer. This is used
+ * to avoid walking off the end of the buffer.
+ *
+ * Arguments:
+ * offset - The linear offset into the code. It must point within the code.
+ */
+
+size_t DisAssembler::disGetBufferSize(size_t offset)
+{
+ if (offset < disHotCodeSize)
+ {
+ return disHotCodeSize - offset;
+ }
+ else
+ {
+ return disHotCodeSize + disColdCodeSize - offset;
+ }
+}
+
+/*****************************************************************************
+ * Get the function name for a given absolute address.
+ */
+
+const char* DisAssembler::disGetMethodFullName(size_t addr)
+{
+ CORINFO_METHOD_HANDLE res;
+
+ // First check the JIT helper table: they're very common.
+ if (GetHelperAddrToMethodHandleMap()->Lookup(addr, &res))
+ {
+ return disComp->eeGetMethodFullName(res);
+ }
+
+ // Next check the "normal" registered call targets
+ if (GetAddrToMethodHandleMap()->Lookup(addr, &res))
+ {
+ return disComp->eeGetMethodFullName(res);
+ }
+
+ return nullptr;
+}
+
+/*****************************************************************************
+ * Register a called function address as associated with a CORINFO_METHOD_HANDLE.
+ *
+ * Arguments:
+ * addr - The absolute address of the target function.
+ * methHnd - The method handle associated with 'addr'.
+ */
+
+void DisAssembler::disSetMethod(size_t addr, CORINFO_METHOD_HANDLE methHnd)
+{
+ if (!disComp->opts.doLateDisasm)
+ {
+ return;
+ }
+
+ if (disComp->eeGetHelperNum(methHnd))
+ {
+ DISASM_DUMP("Helper function: %p => %p\n", addr, methHnd);
+ GetHelperAddrToMethodHandleMap()->Set(addr, methHnd);
+ }
+ else
+ {
+ DISASM_DUMP("Function: %p => %p\n", addr, methHnd);
+ GetAddrToMethodHandleMap()->Set(addr, methHnd);
+ }
+}
+
+/*****************************************************************************
+ * Register a relocation.
+ *
+ * Arguments:
+ * relocAddr - The absolute address the relocation applies to.
+ * targetAddr - The absolute address the relocation points to.
+ */
+
+void DisAssembler::disRecordRelocation(size_t relocAddr, size_t targetAddr)
+{
+ if (!disComp->opts.doLateDisasm)
+ {
+ return;
+ }
+
+ DISASM_DUMP("Relocation %p => %p\n", relocAddr, targetAddr);
+ GetRelocationMap()->Set(relocAddr, targetAddr);
+}
+
+/*****************************************************************************
+ *
+ * Disassemble the code which has been generated
+ */
+
+void DisAssembler::disAsmCode(BYTE* hotCodePtr, size_t hotCodeSize, BYTE* coldCodePtr, size_t coldCodeSize)
+{
+ if (!disComp->opts.doLateDisasm)
+ {
+ return;
+ }
+
+#ifdef DEBUG
+ // Should we make it diffable?
+ disDiffable = disComp->opts.dspDiffable;
+#else // !DEBUG
+ // NOTE: non-debug builds are always diffable!
+ disDiffable = true;
+#endif // !DEBUG
+
+#ifdef DEBUG
+ const wchar_t* fileName = JitConfig.JitLateDisasmTo();
+ if (fileName != nullptr)
+ {
+ errno_t ec = _wfopen_s(&disAsmFile, fileName, W("a+"));
+ if (ec != 0)
+ {
+ disAsmFile = nullptr;
+ }
+ }
+#else // !DEBUG
+ // NOTE: non-DEBUG builds always use jitstdout currently!
+ disAsmFile = jitstdout;
+#endif // !DEBUG
+
+ if (disAsmFile == nullptr)
+ {
+ disAsmFile = jitstdout;
+ }
+
+ // As this writes to a common file, this is not reentrant.
+
+ assert(hotCodeSize > 0);
+ if (coldCodeSize == 0)
+ {
+ fprintf(disAsmFile, "************************** %hs:%hs size 0x%04IX **************************\n\n",
+ disCurClassName, disCurMethodName, hotCodeSize);
+
+ fprintf(disAsmFile, "Base address : %ph\n", dspAddr(hotCodePtr));
+ }
+ else
+ {
+ fprintf(disAsmFile,
+ "************************** %hs:%hs hot size 0x%04IX cold size 0x%04IX **************************\n\n",
+ disCurClassName, disCurMethodName, hotCodeSize, coldCodeSize);
+
+ fprintf(disAsmFile, "Hot address : %ph\n", dspAddr(hotCodePtr));
+ fprintf(disAsmFile, "Cold address : %ph\n", dspAddr(coldCodePtr));
+ }
+
+ disStartAddr = 0;
+ disHotCodeBlock = (size_t)hotCodePtr;
+ disHotCodeSize = hotCodeSize;
+ disColdCodeBlock = (size_t)coldCodePtr;
+ disColdCodeSize = coldCodeSize;
+
+ disTotalCodeSize = disHotCodeSize + disColdCodeSize;
+
+ disLabels = new (disComp, CMK_DebugOnly) BYTE[disTotalCodeSize]();
+
+ DisasmBuffer(disAsmFile, /* printIt */ true);
+ fprintf(disAsmFile, "\n");
+
+ if (disAsmFile != jitstdout)
+ {
+ fclose(disAsmFile);
+ }
+ else
+ {
+ fflush(disAsmFile);
+ }
+}
+
+/*****************************************************************************/
+// This function is called for every method. Checks if we are supposed to disassemble
+// the method, and where to send the disassembly output.
+
+void DisAssembler::disOpenForLateDisAsm(const char* curMethodName, const char* curClassName, PCCOR_SIGNATURE sig)
+{
+ if (!disComp->opts.doLateDisasm)
+ {
+ return;
+ }
+
+ disCurMethodName = curMethodName;
+ disCurClassName = curClassName;
+}
+
+/*****************************************************************************/
+
+void DisAssembler::disInit(Compiler* pComp)
+{
+ assert(pComp);
+ disComp = pComp;
+ disHasName = false;
+ disLabels = nullptr;
+ disAddrToMethodHandleMap = nullptr;
+ disHelperAddrToMethodHandleMap = nullptr;
+ disRelocationMap = nullptr;
+ disDiffable = false;
+ disAsmFile = nullptr;
+}
+
+/*****************************************************************************/
+#endif // LATE_DISASM
+/*****************************************************************************/
diff --git a/src/jit/disasm.h b/src/jit/disasm.h
new file mode 100644
index 0000000000..972243e4dc
--- /dev/null
+++ b/src/jit/disasm.h
@@ -0,0 +1,226 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX DisAsm XX
+XX XX
+XX The dis-assembler to display the native code generated XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+/*****************************************************************************/
+#ifndef _DIS_H_
+#define _DIS_H_
+/*****************************************************************************/
+#ifdef LATE_DISASM
+
+// free() is deprecated (we should only allocate and free memory through CLR hosting interfaces)
+// and is redefined in clrhost.h to cause a compiler error.
+// We don't call free(), but this function is mentioned in STL headers included by msvcdis.h
+// (and free() is only called by STL functions that we don't use).
+// To avoid the compiler error, but at the same time ensure that we don't accidentally use free(),
+// free() is redefined to cause a runtime error instead of a compile time error.
+#undef free
+#ifdef DEBUG
+#define free(x) assert(false && "Must not call free(). Use a ClrXXX function instead.")
+#endif
+
+#if CHECK_STRUCT_PADDING
+#pragma warning(pop)
+#endif // CHECK_STRUCT_PADDING
+
+#define _OLD_IOSTREAMS
+// This pragma is needed because public\vc\inc\xiosbase contains
+// a static local variable
+#pragma warning(disable : 4640)
+#include "msvcdis.h"
+#pragma warning(default : 4640)
+
+#ifdef _TARGET_XARCH_
+#include "disx86.h"
+#elif defined(_TARGET_ARM64_)
+#include "disarm64.h"
+#else // _TARGET_*
+#error Unsupported or unset target architecture
+#endif
+
+#if CHECK_STRUCT_PADDING
+#pragma warning(push)
+#pragma warning(default : 4820) // 'bytes' bytes padding added after construct 'member_name'
+#endif // CHECK_STRUCT_PADDING
+
+/*****************************************************************************/
+
+#ifdef _HOST_64BIT_
+template <typename T>
+struct SizeTKeyFuncs : LargePrimitiveKeyFuncs<T>
+{
+};
+#else // !_HOST_64BIT_
+template <typename T>
+struct SizeTKeyFuncs : SmallPrimitiveKeyFuncs<T>
+{
+};
+#endif // _HOST_64BIT_
+
+typedef SimplerHashTable<size_t, SizeTKeyFuncs<size_t>, CORINFO_METHOD_HANDLE, JitSimplerHashBehavior>
+ AddrToMethodHandleMap;
+typedef SimplerHashTable<size_t, SizeTKeyFuncs<size_t>, size_t, JitSimplerHashBehavior> AddrToAddrMap;
+
+class Compiler;
+
+class DisAssembler
+{
+public:
+ // Constructor
+ void disInit(Compiler* pComp);
+
+ // Initialize the class for the current method being generated.
+ void disOpenForLateDisAsm(const char* curMethodName, const char* curClassName, PCCOR_SIGNATURE sig);
+
+ // Disassemble a buffer: called after code for a method is generated.
+ void disAsmCode(BYTE* hotCodePtr, size_t hotCodeSize, BYTE* coldCodePtr, size_t coldCodeSize);
+
+ // Register an address to be associated with a method handle.
+ void disSetMethod(size_t addr, CORINFO_METHOD_HANDLE methHnd);
+
+ // Register a relocation address.
+ void disRecordRelocation(size_t relocAddr, size_t targetAddr);
+
+private:
+ /* Address of the hot and cold code blocks to dissasemble */
+ size_t disHotCodeBlock;
+ size_t disColdCodeBlock;
+
+ /* Size of the hot and cold code blocks to dissasemble */
+ size_t disHotCodeSize;
+ size_t disColdCodeSize;
+
+ /* Total code size (simply cached version of disHotCodeSize + disColdCodeSize) */
+ size_t disTotalCodeSize;
+
+ /* Address where the code block is to be loaded */
+ size_t disStartAddr;
+
+ /* Current offset in the code block */
+ size_t disCurOffset;
+
+ /* Size (in bytes) of current dissasembled instruction */
+ size_t disInstSize;
+
+ /* Target address of a jump */
+ size_t disTarget;
+
+ /* temporary buffer for function names */
+ // TODO-Review: there is some issue here where this is never set!
+ char disFuncTempBuf[1024];
+
+ /* Method and class name to output */
+ const char* disCurMethodName;
+ const char* disCurClassName;
+
+ /* flag that signals when replacing a symbol name has been deferred for following callbacks */
+ // TODO-Review: there is some issue here where this is never set to 'true'!
+ bool disHasName;
+
+ /* An array of labels, for jumps, LEAs, etc. There is one element in the array for each byte in the generated code.
+ * That byte is zero if the corresponding byte of generated code is not a label. Otherwise, the value
+ * is a label number.
+ */
+ BYTE* disLabels;
+
+ void DisasmBuffer(FILE* pfile, bool printit);
+
+ /* For the purposes of disassembly, we pretend that the hot and cold sections are linear, and not split.
+ * These functions create this model for the rest of the disassembly code.
+ */
+
+ /* Given a linear offset into the code, find a pointer to the actual code (either in the hot or cold section) */
+ const BYTE* disGetLinearAddr(size_t offset);
+
+ /* Given a linear offset into the code, determine how many bytes are left in the hot or cold buffer the offset
+ * points to */
+ size_t disGetBufferSize(size_t offset);
+
+ // Map of instruction addresses to call target method handles for normal calls.
+ AddrToMethodHandleMap* disAddrToMethodHandleMap;
+ AddrToMethodHandleMap* GetAddrToMethodHandleMap();
+
+ // Map of instruction addresses to call target method handles for JIT helper calls.
+ AddrToMethodHandleMap* disHelperAddrToMethodHandleMap;
+ AddrToMethodHandleMap* GetHelperAddrToMethodHandleMap();
+
+ // Map of relocation addresses to relocation target.
+ AddrToAddrMap* disRelocationMap;
+ AddrToAddrMap* GetRelocationMap();
+
+ const char* disGetMethodFullName(size_t addr);
+
+ FILE* disAsmFile;
+
+ Compiler* disComp;
+
+ bool disDiffable; // 'true' if the output should be diffable (hide or obscure absolute addresses)
+
+ template <typename T>
+ T dspAddr(T addr)
+ {
+ return (addr == 0) ? 0 : (disDiffable ? T(0xD1FFAB1E) : addr);
+ }
+
+ /* Callbacks from msdis */
+
+ static size_t __stdcall disCchAddr(
+ const DIS* pdis, DIS::ADDR addr, __in_ecount(cchMax) wchar_t* wz, size_t cchMax, DWORDLONG* pdwDisp);
+
+ size_t disCchAddrMember(
+ const DIS* pdis, DIS::ADDR addr, __in_ecount(cchMax) wchar_t* wz, size_t cchMax, DWORDLONG* pdwDisp);
+
+ static size_t __stdcall disCchFixup(const DIS* pdis,
+ DIS::ADDR addr,
+ size_t size,
+ __in_ecount(cchMax) wchar_t* wz,
+ size_t cchMax,
+ DWORDLONG* pdwDisp);
+
+ size_t disCchFixupMember(const DIS* pdis,
+ DIS::ADDR addr,
+ size_t size,
+ __in_ecount(cchMax) wchar_t* wz,
+ size_t cchMax,
+ DWORDLONG* pdwDisp);
+
+ static size_t __stdcall disCchRegRel(
+ const DIS* pdis, DIS::REGA reg, DWORD disp, __in_ecount(cchMax) wchar_t* wz, size_t cchMax, DWORD* pdwDisp);
+
+ size_t disCchRegRelMember(
+ const DIS* pdis, DIS::REGA reg, DWORD disp, __in_ecount(cchMax) wchar_t* wz, size_t cchMax, DWORD* pdwDisp);
+
+ static size_t __stdcall disCchReg(const DIS* pdis, DIS::REGA reg, __in_ecount(cchMax) wchar_t* wz, size_t cchMax);
+
+ size_t disCchRegMember(const DIS* pdis, DIS::REGA reg, __in_ecount(cchMax) wchar_t* wz, size_t cchMax);
+
+ /* Disassemble helper */
+
+ size_t CbDisassemble(DIS* pdis,
+ size_t offs,
+ DIS::ADDR addr,
+ const BYTE* pb,
+ size_t cbMax,
+ FILE* pfile,
+ bool findLabels,
+ bool printit = false,
+ bool dispOffs = false,
+ bool dispCodeBytes = false);
+};
+
+/*****************************************************************************/
+#endif // LATE_DISASM
+/*****************************************************************************/
+#endif // _DIS_H_
+/*****************************************************************************/
diff --git a/src/jit/dll/.gitmirror b/src/jit/dll/.gitmirror
new file mode 100644
index 0000000000..f507630f94
--- /dev/null
+++ b/src/jit/dll/.gitmirror
@@ -0,0 +1 @@
+Only contents of this folder, excluding subfolders, will be mirrored by the Git-TFS Mirror. \ No newline at end of file
diff --git a/src/jit/dll/CMakeLists.txt b/src/jit/dll/CMakeLists.txt
new file mode 100644
index 0000000000..01e58dbbb8
--- /dev/null
+++ b/src/jit/dll/CMakeLists.txt
@@ -0,0 +1,35 @@
+project(ClrJit)
+
+if(CLR_CMAKE_PLATFORM_ARCH_I386 OR CLR_CMAKE_PLATFORM_ARCH_ARM)
+ add_definitions(-DLEGACY_BACKEND)
+endif(CLR_CMAKE_PLATFORM_ARCH_I386 OR CLR_CMAKE_PLATFORM_ARCH_ARM)
+
+# Disable the following for UNIX altjit on Windows
+if(CLR_CMAKE_PLATFORM_UNIX)
+ add_compile_options(-fPIC)
+
+ add_library_clr(${JIT_BASE_NAME}_static
+ STATIC
+ ${SHARED_LIB_SOURCES}
+ )
+ add_dependencies(${JIT_BASE_NAME}_static coreclrpal gcinfo)
+else()
+ add_library_clr(${JIT_BASE_NAME}_static
+ ${SOURCES}
+ )
+# Disable up to here (see above) the following for UNIX altjit on Windows
+# Enable the following for UNIX altjit on Windows
+# add_library_clr(ClrJit
+# SHARED
+# ${SHARED_LIB_SOURCES}
+# )
+
+# Enable the following for UNIX altjit on Windows
+#target_link_libraries(ClrJit
+# utilcode
+# gcinfo
+# runtime_library
+# )
+
+# Disable the following for UNIX altjit on Windows
+endif(CLR_CMAKE_PLATFORM_UNIX)
diff --git a/src/jit/dll/clrjit.def b/src/jit/dll/clrjit.def
new file mode 100644
index 0000000000..1603af74ca
--- /dev/null
+++ b/src/jit/dll/clrjit.def
@@ -0,0 +1,7 @@
+; Licensed to the .NET Foundation under one or more agreements.
+; The .NET Foundation licenses this file to you under the MIT license.
+; See the LICENSE file in the project root for more information.
+EXPORTS
+ getJit
+ jitStartup
+ sxsJitStartup
diff --git a/src/jit/dll/jit.nativeproj b/src/jit/dll/jit.nativeproj
new file mode 100644
index 0000000000..97981e7eff
--- /dev/null
+++ b/src/jit/dll/jit.nativeproj
@@ -0,0 +1,84 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003" ToolsVersion="dogfood">
+
+ <!-- Import the CLR's settings -->
+
+ <Import Project="$(_NTDRIVE)$(_NTROOT)\ndp\clr\clr.props" />
+
+ <PropertyGroup Label="Globals">
+ <SccProjectName>SAK</SccProjectName>
+ <SccAuxPath>SAK</SccAuxPath>
+ <SccLocalPath>SAK</SccLocalPath>
+ <SccProvider>SAK</SccProvider>
+ </PropertyGroup>
+
+ <PropertyGroup>
+
+ <!-- Set the output -->
+
+ <OutputName>clrjit</OutputName>
+ <TargetType Condition="'$(FeatureMergeJitAndEngine)'=='true'">LIBRARY</TargetType>
+ <TargetType Condition="'$(FeatureMergeJitAndEngine)'!='true'">DYNLINK</TargetType>
+ <FileToMarkForSigning>$(BinariesDirectory)\clrjit.dll</FileToMarkForSigning>
+ <StaticLinkJit>false</StaticLinkJit>
+ <BuildCoreBinaries>true</BuildCoreBinaries>
+ <BuildSysBinaries>true</BuildSysBinaries>
+
+ <DllEntryPoint>_DllMainCRTStartup</DllEntryPoint>
+ <LinkSubsystem>windows</LinkSubsystem>
+ <LibCLib Condition="'$(StaticLinkJit)'!='true'">$(ClrCrtLib)</LibCLib>
+
+ <LinkModuleDefinitionFile>$(OutputName).def</LinkModuleDefinitionFile>
+
+ <ClDefines Condition="'$(BuildArchitecture)' == 'amd64'">$(ClDefines);FEATURE_SIMD;FEATURE_AVX_SUPPORT</ClDefines>
+
+ <Win32DllLibs>$(SdkLibPath)\kernel32.lib;$(SdkLibPath)\user32.lib;$(SdkLibPath)\advapi32.lib;$(SdkLibPath)\oleaut32.lib;$(SdkLibPath)\uuid.lib</Win32DllLibs>
+ <Win32DllLibs>$(Win32DllLibs);$(ClrLibPath)\utilcode.lib</Win32DllLibs>
+
+ <!-- Profile-guided optimization -->
+
+ <PogoOptimize Condition="('$(BuildArchitecture)' == 'arm')">false</PogoOptimize>
+ <PogoInstrument Condition="('$(BuildArchitecture)' == 'arm') and ('$(_BuildType)' == 'ret') and ('$(BuildProjectName)' == '')">true</PogoInstrument>
+ <PogoUpdate Condition="('$(BuildArchitecture)' == 'arm') and ('$(_BuildType)' == 'ret') and ('$(BuildProjectName)' == '')">true</PogoUpdate>
+ <Win32DllLibs Condition="'$(PogoInstrument)' == 'true' and '$(BuildArchitecture)' == 'amd64'">$(Win32DllLibs);$(CrtLibPath)\pgort.lib</Win32DllLibs>
+ <Win32DllLibs Condition="'$(PogoInstrument)' == 'true' and '$(BuildArchitecture)' == 'arm'">$(Win32DllLibs);$(CrtLibPath)\pgort.lib;$(SdkLibPath)\ntdll.lib</Win32DllLibs>
+ <OptimizationDataRelativeDir>$(_BuildArch)\CLR\Base</OptimizationDataRelativeDir>
+
+ <!-- Do we want to build with msvcdis disassembly capability? This should be enabled for DEBUG, disabled otherwise.
+ However, it can be useful for debugging purposes, such as generating assembly diffs between CHK and RET JITs,
+ to enable it temporarily in non-DEBUG builds, by forcing the EnableLateDisasm property to 'true'.
+ -->
+ <EnableLateDisasm Condition="'$(DebugBuild)' == 'true' and '$(BuildArchitecture)' != 'arm' and '$(BuildProjectName)' != 'CoreSys'">true</EnableLateDisasm>
+ <!--
+ <EnableLateDisasm Condition="'$(BuildArchitecture)' != 'arm' and '$(BuildProjectName)' != 'CoreSys'">true</EnableLateDisasm>
+ -->
+ <ClDefines Condition="'$(EnableLateDisasm)' == 'true'">$(ClDefines);LATE_DISASM=1</ClDefines>
+ <LinkDelayLoad Condition="'$(EnableLateDisasm)' == 'true'">$(LinkDelayLoad);msvcdis$(VC_NONCRT_ProdVerX).dll</LinkDelayLoad>
+ <UseDelayimpLib Condition="'$(EnableLateDisasm)' == 'true' and '$(FeatureMergeJitAndEngine)'!='true'">true</UseDelayimpLib>
+
+ <!-- Disable merge of text and rdata for DevDiv:696146-->
+ <LinkMergeRData Condition="'$(BuildArchitecture)'=='i386'">false</LinkMergeRData>
+ </PropertyGroup>
+
+ <!-- Leaf Project Items -->
+
+ <ItemGroup>
+ <ProjectReference Include="$(ClrSrcDirectory)utilcode\dyncrt\dyncrt.nativeproj" />
+ <TargetLib Include="$(SdkLibPath)\mscoree.lib" />
+ <TargetLib Condition="'$(BuildArchitecture)'!='i386'" Include="$(ClrLibPath)\gcinfo.lib">
+ <ProjectReference>$(ClrSrcDirectory)gcinfo\lib\gcinfo.nativeproj</ProjectReference>
+ </TargetLib>
+ <TargetLib Condition="'$(UseDelayimpLib)' == 'true'" Include="$(ClrLibPath)\delayimp.lib">
+ <ProjectReference>$(ClrSrcDirectory)delayimp\delayimp.nativeproj</ProjectReference>
+ </TargetLib>
+ <TargetLib Condition="'$(DebugBuild)' == 'true'" Include="$(ClrLibPath)\gcdump.lib">
+ <ProjectReference>$(ClrSrcDirectory)gcdump\lib\gcdump.nativeproj</ProjectReference>
+ </TargetLib>
+ <TargetLib Condition="'$(DebugBuild)' == 'true'" Include="$(SdkLibPath)\ole32.lib" />
+ <TargetLib Condition="'$(EnableLateDisasm)' == 'true'" Include="$(VCToolsLibPath)\msvcdis.lib" />
+ <RCResourceFile Include="..\native.rc" />
+ </ItemGroup>
+
+ <Import Project="..\jit.settings.targets" />
+
+</Project>
diff --git a/src/jit/earlyprop.cpp b/src/jit/earlyprop.cpp
new file mode 100644
index 0000000000..70d1012aa0
--- /dev/null
+++ b/src/jit/earlyprop.cpp
@@ -0,0 +1,671 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+//
+// Early Value Propagation
+//
+// This phase performs an SSA-based value propagation optimization that currently only applies to array
+// lengths, runtime type handles, and explicit null checks. An SSA-based backwards tracking of local variables
+// is performed at each point of interest, e.g., an array length reference site, a method table reference site, or
+// an indirection.
+// The tracking continues until an interesting value is encountered. The value is then used to rewrite
+// the source site or the value.
+//
+///////////////////////////////////////////////////////////////////////////////////////
+
+#include "jitpch.h"
+#include "ssabuilder.h"
+
+bool Compiler::optDoEarlyPropForFunc()
+{
+ bool propArrayLen = (optMethodFlags & OMF_HAS_NEWARRAY) && (optMethodFlags & OMF_HAS_ARRAYREF);
+ bool propGetType = (optMethodFlags & OMF_HAS_NEWOBJ) && (optMethodFlags & OMF_HAS_VTABLEREF);
+ bool propNullCheck = (optMethodFlags & OMF_HAS_NULLCHECK) != 0;
+ return propArrayLen || propGetType || propNullCheck;
+}
+
+bool Compiler::optDoEarlyPropForBlock(BasicBlock* block)
+{
+ bool bbHasArrayRef = (block->bbFlags & BBF_HAS_IDX_LEN) != 0;
+ bool bbHasVtableRef = (block->bbFlags & BBF_HAS_VTABREF) != 0;
+ bool bbHasNullCheck = (block->bbFlags & BBF_HAS_NULLCHECK) != 0;
+ return bbHasArrayRef || bbHasVtableRef || bbHasNullCheck;
+}
+
+//--------------------------------------------------------------------
+// gtIsVtableRef: Return true if the tree is a method table reference.
+//
+// Arguments:
+// tree - The input tree.
+//
+// Return Value:
+// Return true if the tree is a method table reference.
+
+bool Compiler::gtIsVtableRef(GenTreePtr tree)
+{
+ if (tree->OperGet() == GT_IND)
+ {
+ GenTree* addr = tree->AsIndir()->Addr();
+
+ if (addr->OperIsAddrMode())
+ {
+ GenTreeAddrMode* addrMode = addr->AsAddrMode();
+
+ return (!addrMode->HasIndex() && (addrMode->Base()->TypeGet() == TYP_REF));
+ }
+ }
+
+ return false;
+}
+
+//------------------------------------------------------------------------------
+// getArrayLengthFromAllocation: Return the array length for an array allocation
+// helper call.
+//
+// Arguments:
+// tree - The array allocation helper call.
+//
+// Return Value:
+// Return the array length node.
+
+GenTreePtr Compiler::getArrayLengthFromAllocation(GenTreePtr tree)
+{
+ assert(tree != nullptr);
+
+ if (tree->OperGet() == GT_CALL)
+ {
+ GenTreeCall* call = tree->AsCall();
+
+ if (call->gtCallType == CT_HELPER)
+ {
+ if (call->gtCallMethHnd == eeFindHelper(CORINFO_HELP_NEWARR_1_DIRECT) ||
+ call->gtCallMethHnd == eeFindHelper(CORINFO_HELP_NEWARR_1_OBJ) ||
+ call->gtCallMethHnd == eeFindHelper(CORINFO_HELP_NEWARR_1_VC) ||
+ call->gtCallMethHnd == eeFindHelper(CORINFO_HELP_NEWARR_1_ALIGN8))
+ {
+ // This is an array allocation site. Grab the array length node.
+ return gtArgEntryByArgNum(call, 1)->node;
+ }
+ }
+ }
+
+ return nullptr;
+}
+
+//-----------------------------------------------------------------------------
+// getObjectHandleNodeFromAllocation: Return the type handle for an object allocation
+// helper call.
+//
+// Arguments:
+// tree - The object allocation helper call.
+//
+// Return Value:
+// Return the object type handle node.
+
+GenTreePtr Compiler::getObjectHandleNodeFromAllocation(GenTreePtr tree)
+{
+ assert(tree != nullptr);
+
+ if (tree->OperGet() == GT_CALL)
+ {
+ GenTreeCall* call = tree->AsCall();
+
+ if (call->gtCallType == CT_HELPER)
+ {
+ if (call->gtCallMethHnd == eeFindHelper(CORINFO_HELP_NEWFAST) ||
+ call->gtCallMethHnd == eeFindHelper(CORINFO_HELP_NEWSFAST) ||
+ call->gtCallMethHnd == eeFindHelper(CORINFO_HELP_NEWSFAST_ALIGN8) ||
+ call->gtCallMethHnd == eeFindHelper(CORINFO_HELP_NEWARR_1_DIRECT) ||
+ call->gtCallMethHnd == eeFindHelper(CORINFO_HELP_NEWARR_1_OBJ) ||
+ call->gtCallMethHnd == eeFindHelper(CORINFO_HELP_NEWARR_1_VC) ||
+ call->gtCallMethHnd == eeFindHelper(CORINFO_HELP_NEWARR_1_ALIGN8))
+ {
+ // This is an object allocation site. Return the runtime type handle node.
+ fgArgTabEntryPtr argTabEntry = gtArgEntryByArgNum(call, 0);
+ return argTabEntry->node;
+ }
+ }
+ }
+
+ return nullptr;
+}
+
+//------------------------------------------------------------------------------------------
+// optEarlyProp: The entry point of the early value propagation.
+//
+// Notes:
+// This phase performs an SSA-based value propagation, including
+// 1. Array length propagation.
+// 2. Runtime type handle propagation.
+// 3. Null check folding.
+//
+// For array length propagation, a demand-driven SSA-based backwards tracking of constant
+// array lengths is performed at each array length reference site which is in form of a
+// GT_ARR_LENGTH node. When a GT_ARR_LENGTH node is seen, the array ref pointer which is
+// the only child node of the GT_ARR_LENGTH is tracked. This is only done for array ref
+// pointers that have valid SSA forms.The tracking is along SSA use-def chain and stops
+// at the original array allocation site where we can grab the array length. The
+// GT_ARR_LENGTH node will then be rewritten to a GT_CNS_INT node if the array length is
+// constant.
+//
+// Similarly, the same algorithm also applies to rewriting a method table (also known as
+// vtable) reference site which is in form of GT_INDIR node. The base pointer, which is
+// an object reference pointer, is treated in the same way as an array reference pointer.
+//
+// Null check folding tries to find GT_INDIR(obj + const) that GT_NULLCHECK(obj) can be folded into
+/// and removed. Currently, the algorithm only matches GT_INDIR and GT_NULLCHECK in the same basic block.
+
+void Compiler::optEarlyProp()
+{
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("*************** In optEarlyProp()\n");
+ }
+#endif
+
+ assert(fgSsaPassesCompleted == 1);
+
+ if (!optDoEarlyPropForFunc())
+ {
+ return;
+ }
+
+ for (BasicBlock* block = fgFirstBB; block != nullptr; block = block->bbNext)
+ {
+ if (!optDoEarlyPropForBlock(block))
+ {
+ continue;
+ }
+
+ compCurBB = block;
+
+ for (GenTreeStmt* stmt = block->firstStmt(); stmt != nullptr;)
+ {
+ // Preserve the next link before the propagation and morph.
+ GenTreeStmt* next = stmt->gtNextStmt;
+
+ compCurStmt = stmt;
+
+ // Walk the stmt tree in linear order to rewrite any array length reference with a
+ // constant array length.
+ bool isRewritten = false;
+ bool bbHasNullCheck = (block->bbFlags & BBF_HAS_NULLCHECK) != 0;
+ for (GenTreePtr tree = stmt->gtStmt.gtStmtList; tree != nullptr; tree = tree->gtNext)
+ {
+ if (optEarlyPropRewriteTree(tree))
+ {
+ isRewritten = true;
+ }
+ }
+
+ // Morph the stmt and update the evaluation order if the stmt has been rewritten.
+ if (isRewritten)
+ {
+ gtSetStmtInfo(stmt);
+ fgSetStmtSeq(stmt);
+ }
+
+ stmt = next;
+ }
+ }
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ JITDUMP("\nAfter optEarlyProp:\n");
+ fgDispBasicBlocks(/*dumpTrees*/ true);
+ }
+#endif
+}
+
+//----------------------------------------------------------------
+// optEarlyPropRewriteValue: Rewrite a tree to the actual value.
+//
+// Arguments:
+// tree - The input tree node to be rewritten.
+//
+// Return Value:
+// Return true iff "tree" is successfully rewritten.
+
+bool Compiler::optEarlyPropRewriteTree(GenTreePtr tree)
+{
+ GenTreePtr objectRefPtr = nullptr;
+ optPropKind propKind = optPropKind::OPK_INVALID;
+
+ if (tree->OperGet() == GT_ARR_LENGTH)
+ {
+ objectRefPtr = tree->gtOp.gtOp1;
+ propKind = optPropKind::OPK_ARRAYLEN;
+ }
+ else if ((tree->OperGet() == GT_IND) && !varTypeIsStruct(tree))
+ {
+ // TODO-1stClassStructs: The above condition should apply equally to all indirections,
+ // but previously the implicit indirections due to a struct assignment were not
+ // considered, so we are currently limiting it to non-structs to preserve existing
+ // behavior.
+ // optFoldNullCheck takes care of updating statement info if a null check is removed.
+ optFoldNullCheck(tree);
+
+ if (gtIsVtableRef(tree))
+ {
+ // Don't propagate type handles that are used as null checks, which are usually in
+ // form of
+ // * stmtExpr void (top level)
+ // \--* indir int
+ // \--* lclVar ref V02 loc0
+ if (compCurStmt->gtStmt.gtStmtExpr == tree)
+ {
+ return false;
+ }
+
+ objectRefPtr = tree->gtOp.gtOp1;
+ propKind = optPropKind::OPK_OBJ_GETTYPE;
+ }
+ else
+ {
+ return false;
+ }
+ }
+ else
+ {
+ return false;
+ }
+
+ if (!objectRefPtr->OperIsScalarLocal() || fgExcludeFromSsa(objectRefPtr->AsLclVarCommon()->GetLclNum()))
+
+ {
+ return false;
+ }
+
+ bool isRewritten = false;
+ GenTreePtr root = compCurStmt;
+ unsigned lclNum = objectRefPtr->AsLclVarCommon()->GetLclNum();
+ unsigned ssaNum = objectRefPtr->AsLclVarCommon()->GetSsaNum();
+
+ GenTreePtr actualVal = optPropGetValue(lclNum, ssaNum, propKind);
+
+ if (actualVal != nullptr)
+ {
+ if (propKind == optPropKind::OPK_ARRAYLEN)
+ {
+ assert(actualVal->IsCnsIntOrI());
+
+ if (actualVal->gtIntCon.gtIconVal > INT32_MAX)
+ {
+ // Don't propagate array lengths that are beyond the maximum value of a GT_ARR_LENGTH.
+ // node. CORINFO_HELP_NEWARR_1_OBJ helper call allows to take a long integer as the
+ // array length argument, but the type of GT_ARR_LENGTH is always INT32.
+ return false;
+ }
+ }
+ else if (propKind == optPropKind::OPK_OBJ_GETTYPE)
+ {
+ assert(actualVal->IsCnsIntOrI());
+ }
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("optEarlyProp Rewriting BB%02u\n", compCurBB->bbNum);
+ gtDispTree(root);
+ printf("\n");
+ }
+#endif
+ // Rewrite the tree using a copy of "actualVal"
+ GenTreePtr actualValCopy;
+ var_types origType = tree->gtType;
+ // Propagating a constant into an array index expression requires calling
+ // LabelIndex to update the FieldSeq annotations. EarlyProp may replace
+ // array length expressions with constants, so check if this is an array
+ // length operator that is part of an array index expression.
+ bool isIndexExpr = (tree->OperGet() == GT_ARR_LENGTH && ((tree->gtFlags & GTF_ARRLEN_ARR_IDX) != 0));
+
+ if (actualVal->GetNodeSize() <= tree->GetNodeSize())
+ {
+ actualValCopy = tree;
+ }
+ else
+ {
+ actualValCopy = gtNewLargeOperNode(GT_ADD, TYP_INT);
+ }
+
+ fgWalkTreePre(&tree, Compiler::lvaDecRefCntsCB, (void*)this, true);
+
+ actualValCopy->CopyFrom(actualVal, this);
+ actualValCopy->gtType = origType;
+ if (isIndexExpr)
+ {
+ actualValCopy->LabelIndex(this);
+ }
+
+ fgWalkTreePre(&actualValCopy, Compiler::lvaIncRefCntsCB, (void*)this, true);
+
+ if (actualValCopy != tree)
+ {
+ gtReplaceTree(root, tree, actualValCopy);
+ }
+
+ isRewritten = true;
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("to\n");
+ gtDispTree(compCurStmt);
+ printf("\n");
+ }
+#endif
+ }
+
+ return isRewritten;
+}
+
+//-------------------------------------------------------------------------------------------
+// optPropGetValue: Given an SSA object ref pointer, get the value needed based on valueKind.
+//
+// Arguments:
+// lclNum - The local var number of the ref pointer.
+// ssaNum - The SSA var number of the ref pointer.
+// valueKind - The kind of value of interest.
+//
+// Return Value:
+// Return the corresponding value based on valueKind.
+
+GenTreePtr Compiler::optPropGetValue(unsigned lclNum, unsigned ssaNum, optPropKind valueKind)
+{
+ return optPropGetValueRec(lclNum, ssaNum, valueKind, 0);
+}
+
+//-----------------------------------------------------------------------------------
+// optPropGetValueRec: Given an SSA object ref pointer, get the value needed based on valueKind
+// within a recursion bound.
+//
+// Arguments:
+// lclNum - The local var number of the array pointer.
+// ssaNum - The SSA var number of the array pointer.
+// valueKind - The kind of value of interest.
+// walkDepth - Current recursive walking depth.
+//
+// Return Value:
+// Return the corresponding value based on valueKind.
+
+GenTreePtr Compiler::optPropGetValueRec(unsigned lclNum, unsigned ssaNum, optPropKind valueKind, int walkDepth)
+{
+ if (ssaNum == SsaConfig::RESERVED_SSA_NUM)
+ {
+ return nullptr;
+ }
+
+ SSAName ssaName(lclNum, ssaNum);
+ GenTreePtr value = nullptr;
+
+ // Bound the recursion with a hard limit.
+ if (walkDepth > optEarlyPropRecurBound)
+ {
+ return nullptr;
+ }
+
+ // Track along the use-def chain to get the array length
+ GenTreePtr treelhs = lvaTable[lclNum].GetPerSsaData(ssaNum)->m_defLoc.m_tree;
+
+ if (treelhs == nullptr)
+ {
+ // Incoming parameters or live-in variables don't have actual definition tree node
+ // for their FIRST_SSA_NUM. See SsaBuilder::RenameVariables.
+ assert(ssaNum == SsaConfig::FIRST_SSA_NUM);
+ }
+ else
+ {
+ GenTreePtr* lhsPtr;
+ GenTreePtr treeDefParent = treelhs->gtGetParent(&lhsPtr);
+
+ if (treeDefParent->OperGet() == GT_ASG)
+ {
+ assert(treelhs == treeDefParent->gtGetOp1());
+ GenTreePtr treeRhs = treeDefParent->gtGetOp2();
+
+ if (treeRhs->OperIsScalarLocal() && !fgExcludeFromSsa(treeRhs->AsLclVarCommon()->GetLclNum()))
+ {
+ // Recursively track the Rhs
+ unsigned rhsLclNum = treeRhs->AsLclVarCommon()->GetLclNum();
+ unsigned rhsSsaNum = treeRhs->AsLclVarCommon()->GetSsaNum();
+
+ value = optPropGetValueRec(rhsLclNum, rhsSsaNum, valueKind, walkDepth + 1);
+ }
+ else
+ {
+ if (valueKind == optPropKind::OPK_ARRAYLEN)
+ {
+ value = getArrayLengthFromAllocation(treeRhs);
+ if (value != nullptr)
+ {
+ if (!value->IsCnsIntOrI())
+ {
+ // Leave out non-constant-sized array
+ value = nullptr;
+ }
+ }
+ }
+ else if (valueKind == optPropKind::OPK_OBJ_GETTYPE)
+ {
+ value = getObjectHandleNodeFromAllocation(treeRhs);
+ if (value != nullptr)
+ {
+ if (!value->IsCnsIntOrI())
+ {
+ // Leave out non-constant-sized array
+ value = nullptr;
+ }
+ }
+ }
+ }
+ }
+ }
+
+ return value;
+}
+
+//----------------------------------------------------------------
+// optFoldNullChecks: Try to find a GT_NULLCHECK node that can be folded into the GT_INDIR node.
+//
+// Arguments:
+// tree - The input GT_INDIR tree.
+//
+
+void Compiler::optFoldNullCheck(GenTreePtr tree)
+{
+ //
+ // Check for a pattern like this:
+ //
+ // =
+ // / \
+ // x comma
+ // / \
+ // nullcheck +
+ // | / \
+ // y y const
+ //
+ //
+ // some trees in the same
+ // basic block with
+ // no unsafe side effects
+ //
+ // indir
+ // |
+ // x
+ //
+ // where the const is suitably small
+ // and transform it into
+ //
+ // =
+ // / \
+ // x +
+ // / \
+ // y const
+ //
+ //
+ // some trees with no unsafe side effects here
+ //
+ // indir
+ // |
+ // x
+
+ assert(tree->OperGet() == GT_IND);
+ if (tree->gtGetOp1()->OperGet() == GT_LCL_VAR)
+ {
+ // Check if we have the pattern above and find the nullcheck node if we do.
+
+ // Find the definition of the indirected local (x in the picture)
+ GenTreePtr indLocalTree = tree->gtGetOp1();
+ unsigned lclNum = indLocalTree->AsLclVarCommon()->GetLclNum();
+ unsigned ssaNum = indLocalTree->AsLclVarCommon()->GetSsaNum();
+
+ if (ssaNum != SsaConfig::RESERVED_SSA_NUM)
+ {
+ DefLoc defLoc = lvaTable[lclNum].GetPerSsaData(ssaNum)->m_defLoc;
+ BasicBlock* defBlock = defLoc.m_blk;
+
+ if (compCurBB == defBlock)
+ {
+ GenTreePtr defTree = defLoc.m_tree;
+ GenTreePtr defParent = defTree->gtGetParent(nullptr);
+
+ if ((defParent->OperGet() == GT_ASG) && (defParent->gtNext == nullptr))
+ {
+ GenTreePtr defRHS = defParent->gtGetOp2();
+ if (defRHS->OperGet() == GT_COMMA)
+ {
+ if (defRHS->gtGetOp1()->OperGet() == GT_NULLCHECK)
+ {
+ GenTreePtr nullCheckTree = defRHS->gtGetOp1();
+ if (nullCheckTree->gtGetOp1()->OperGet() == GT_LCL_VAR)
+ {
+ // We found a candidate for 'y' in the picture
+ unsigned nullCheckLclNum = nullCheckTree->gtGetOp1()->AsLclVarCommon()->GetLclNum();
+
+ if (defRHS->gtGetOp2()->OperGet() == GT_ADD)
+ {
+ GenTreePtr additionNode = defRHS->gtGetOp2();
+ if ((additionNode->gtGetOp1()->OperGet() == GT_LCL_VAR) &&
+ (additionNode->gtGetOp1()->gtLclVarCommon.gtLclNum == nullCheckLclNum))
+ {
+ GenTreePtr offset = additionNode->gtGetOp2();
+ if (offset->IsCnsIntOrI())
+ {
+ if (!fgIsBigOffset(offset->gtIntConCommon.IconValue()))
+ {
+ // Walk from the use to the def in reverse execution order to see
+ // if any nodes have unsafe side effects.
+ GenTreePtr currentTree = indLocalTree->gtPrev;
+ bool isInsideTry = compCurBB->hasTryIndex();
+ bool canRemoveNullCheck = true;
+ const unsigned maxNodesWalked = 25;
+ unsigned nodesWalked = 0;
+
+ // First walk the nodes in the statement containing the indirection
+ // in reverse execution order starting with the indirection's
+ // predecessor.
+ while (canRemoveNullCheck && (currentTree != nullptr))
+ {
+ if ((nodesWalked++ > maxNodesWalked) ||
+ !optCanMoveNullCheckPastTree(currentTree, isInsideTry))
+ {
+ canRemoveNullCheck = false;
+ }
+ else
+ {
+ currentTree = currentTree->gtPrev;
+ }
+ }
+
+ // Then walk the statement list in reverse execution order
+ // until we get to the statement containing the null check.
+ // We only need to check the side effects at the root of each statement.
+ GenTreePtr curStmt = compCurStmt->gtPrev;
+ currentTree = curStmt->gtStmt.gtStmtExpr;
+ while (canRemoveNullCheck && (currentTree != defParent))
+ {
+ if ((nodesWalked++ > maxNodesWalked) ||
+ !optCanMoveNullCheckPastTree(currentTree, isInsideTry))
+ {
+ canRemoveNullCheck = false;
+ }
+ else
+ {
+ curStmt = curStmt->gtStmt.gtPrevStmt;
+ assert(curStmt != nullptr);
+ currentTree = curStmt->gtStmt.gtStmtExpr;
+ }
+ }
+
+ if (canRemoveNullCheck)
+ {
+ // Remove the null check
+ nullCheckTree->gtFlags &= ~(GTF_EXCEPT | GTF_DONT_CSE);
+
+ // Set this flag to prevent reordering
+ nullCheckTree->gtFlags |= GTF_ORDER_SIDEEFF;
+
+ defRHS->gtFlags &= ~(GTF_EXCEPT | GTF_DONT_CSE);
+ defRHS->gtFlags |=
+ additionNode->gtFlags & (GTF_EXCEPT | GTF_DONT_CSE);
+
+ // Re-morph the statement.
+ fgMorphBlockStmt(compCurBB, curStmt DEBUGARG("optFoldNullCheck"));
+
+ // Recalculate the gtCostSz, etc...
+ gtSetStmtInfo(curStmt);
+
+ // Re-thread the nodes
+ fgSetStmtSeq(curStmt);
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+}
+
+//----------------------------------------------------------------
+// optCanMoveNullCheckPastTree: Check if GT_NULLCHECK can be folded into a node that
+// is after tree is execution order.
+//
+// Arguments:
+// tree - The input GT_INDIR tree.
+// isInsideTry - True if tree is inside try, false otherwise
+//
+// Return Value:
+// True if GT_NULLCHECK can be folded into a node that is after tree is execution order,
+// false otherwise.
+
+bool Compiler::optCanMoveNullCheckPastTree(GenTreePtr tree, bool isInsideTry)
+{
+ bool result = true;
+ if (isInsideTry)
+ {
+ // We disallow calls, exception sources, and all assignments.
+ // Assignments to locals are disallowed inside try because
+ // they may be live in the handler.
+ if ((tree->gtFlags & GTF_SIDE_EFFECT) != 0)
+ {
+ result = false;
+ }
+ }
+ else
+ {
+ // We disallow calls, exception sources, and assignments to
+ // global memory.
+ if (GTF_GLOBALLY_VISIBLE_SIDE_EFFECTS(tree->gtFlags))
+ {
+ result = false;
+ }
+ }
+ return result;
+} \ No newline at end of file
diff --git a/src/jit/ee_il_dll.cpp b/src/jit/ee_il_dll.cpp
new file mode 100755
index 0000000000..527244221e
--- /dev/null
+++ b/src/jit/ee_il_dll.cpp
@@ -0,0 +1,1552 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX ee_jit.cpp XX
+XX XX
+XX The functionality needed for the JIT DLL. Includes the DLL entry point XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+#include "emit.h"
+#include "corexcep.h"
+
+#if !defined(PLATFORM_UNIX)
+#include <io.h> // For _dup, _setmode
+#include <fcntl.h> // For _O_TEXT
+#include <errno.h> // For EINVAL
+#endif
+
+/*****************************************************************************/
+
+FILE* jitstdout = nullptr;
+
+ICorJitHost* g_jitHost = nullptr;
+static CILJit* ILJitter = nullptr; // The one and only JITTER I return
+bool g_jitInitialized = false;
+#ifndef FEATURE_MERGE_JIT_AND_ENGINE
+HINSTANCE g_hInst = nullptr;
+#endif // FEATURE_MERGE_JIT_AND_ENGINE
+
+/*****************************************************************************/
+
+#ifdef DEBUG
+
+JitOptions jitOpts = {
+ nullptr, // methodName
+ nullptr, // className
+ 0.1, // CGknob
+ 0, // testMask
+
+ (JitOptions*)nullptr // lastDummyField.
+};
+
+#endif // DEBUG
+
+/*****************************************************************************/
+
+extern "C" void __stdcall jitStartup(ICorJitHost* jitHost)
+{
+ if (g_jitInitialized)
+ {
+ return;
+ }
+
+ g_jitHost = jitHost;
+
+ assert(!JitConfig.isInitialized());
+ JitConfig.initialize(jitHost);
+
+#if defined(PLATFORM_UNIX)
+ jitstdout = procstdout();
+#else
+ if (jitstdout == nullptr)
+ {
+ int stdoutFd = _fileno(procstdout());
+ // Check fileno error output(s) -1 may overlap with errno result
+ // but is included for completness.
+ // We want to detect the case where the initial handle is null
+ // or bogus and avoid making further calls.
+ if ((stdoutFd != -1) && (stdoutFd != -2) && (errno != EINVAL))
+ {
+ int jitstdoutFd = _dup(_fileno(procstdout()));
+ // Check the error status returned by dup.
+ if (jitstdoutFd != -1)
+ {
+ _setmode(jitstdoutFd, _O_TEXT);
+ jitstdout = _fdopen(jitstdoutFd, "w");
+ assert(jitstdout != nullptr);
+
+ // Prevent the FILE* from buffering its output in order to avoid calls to
+ // `fflush()` throughout the code.
+ setvbuf(jitstdout, nullptr, _IONBF, 0);
+ }
+ }
+ }
+
+ // If jitstdout is still null, fallback to whatever procstdout() was
+ // initially set to.
+ if (jitstdout == nullptr)
+ {
+ jitstdout = procstdout();
+ }
+#endif // PLATFORM_UNIX
+
+#ifdef FEATURE_TRACELOGGING
+ JitTelemetry::NotifyDllProcessAttach();
+#endif
+ Compiler::compStartup();
+
+ g_jitInitialized = true;
+}
+
+void jitShutdown()
+{
+ if (!g_jitInitialized)
+ {
+ return;
+ }
+
+ Compiler::compShutdown();
+
+ if (jitstdout != procstdout())
+ {
+ fclose(jitstdout);
+ }
+
+#ifdef FEATURE_TRACELOGGING
+ JitTelemetry::NotifyDllProcessDetach();
+#endif
+}
+
+#ifndef FEATURE_MERGE_JIT_AND_ENGINE
+
+extern "C" BOOL WINAPI DllMain(HANDLE hInstance, DWORD dwReason, LPVOID pvReserved)
+{
+ if (dwReason == DLL_PROCESS_ATTACH)
+ {
+ g_hInst = (HINSTANCE)hInstance;
+ DisableThreadLibraryCalls((HINSTANCE)hInstance);
+#if defined(SELF_NO_HOST) && COR_JIT_EE_VERSION <= 460
+ jitStartup(JitHost::getJitHost());
+#endif
+ }
+ else if (dwReason == DLL_PROCESS_DETACH)
+ {
+ jitShutdown();
+ }
+
+ return TRUE;
+}
+
+HINSTANCE GetModuleInst()
+{
+ return (g_hInst);
+}
+
+extern "C" void __stdcall sxsJitStartup(CoreClrCallbacks const& cccallbacks)
+{
+#ifndef SELF_NO_HOST
+ InitUtilcode(cccallbacks);
+#endif
+
+#if COR_JIT_EE_VERSION <= 460
+ jitStartup(JitHost::getJitHost());
+#endif
+}
+
+#endif // !FEATURE_MERGE_JIT_AND_ENGINE
+
+/*****************************************************************************/
+
+struct CILJitSingletonAllocator
+{
+ int x;
+};
+const CILJitSingletonAllocator CILJitSingleton = {0};
+
+void* __cdecl operator new(size_t, const CILJitSingletonAllocator&)
+{
+ static char CILJitBuff[sizeof(CILJit)];
+ return CILJitBuff;
+}
+
+ICorJitCompiler* g_realJitCompiler = nullptr;
+
+ICorJitCompiler* __stdcall getJit()
+{
+ if (ILJitter == nullptr)
+ {
+ ILJitter = new (CILJitSingleton) CILJit();
+ }
+ return (ILJitter);
+}
+
+/*****************************************************************************/
+
+// Information kept in thread-local storage. This is used in the noway_assert exceptional path.
+// If you are using it more broadly in retail code, you would need to understand the
+// performance implications of accessing TLS.
+//
+// If the JIT is being statically linked, these methods must be implemented by the consumer.
+#if !defined(FEATURE_MERGE_JIT_AND_ENGINE) || !defined(FEATURE_IMPLICIT_TLS)
+
+__declspec(thread) void* gJitTls = nullptr;
+
+static void* GetJitTls()
+{
+ return gJitTls;
+}
+
+void SetJitTls(void* value)
+{
+ gJitTls = value;
+}
+
+#else // !defined(FEATURE_MERGE_JIT_AND_ENGINE) || !defined(FEATURE_IMPLICIT_TLS)
+
+extern "C" {
+void* GetJitTls();
+void SetJitTls(void* value);
+}
+
+#endif // // defined(FEATURE_MERGE_JIT_AND_ENGINE) && defined(FEATURE_IMPLICIT_TLS)
+
+#if defined(DEBUG)
+
+JitTls::JitTls(ICorJitInfo* jitInfo) : m_compiler(nullptr), m_logEnv(jitInfo)
+{
+ m_next = reinterpret_cast<JitTls*>(GetJitTls());
+ SetJitTls(this);
+}
+
+JitTls::~JitTls()
+{
+ SetJitTls(m_next);
+}
+
+LogEnv* JitTls::GetLogEnv()
+{
+ return &reinterpret_cast<JitTls*>(GetJitTls())->m_logEnv;
+}
+
+Compiler* JitTls::GetCompiler()
+{
+ return reinterpret_cast<JitTls*>(GetJitTls())->m_compiler;
+}
+
+void JitTls::SetCompiler(Compiler* compiler)
+{
+ reinterpret_cast<JitTls*>(GetJitTls())->m_compiler = compiler;
+}
+
+#else // defined(DEBUG)
+
+JitTls::JitTls(ICorJitInfo* jitInfo)
+{
+}
+
+JitTls::~JitTls()
+{
+}
+
+Compiler* JitTls::GetCompiler()
+{
+ return reinterpret_cast<Compiler*>(GetJitTls());
+}
+
+void JitTls::SetCompiler(Compiler* compiler)
+{
+ SetJitTls(compiler);
+}
+
+#endif // !defined(DEBUG)
+
+//****************************************************************************
+// The main JIT function for the 32 bit JIT. See code:ICorJitCompiler#EEToJitInterface for more on the EE-JIT
+// interface. Things really don't get going inside the JIT until the code:Compiler::compCompile#Phases
+// method. Usually that is where you want to go.
+
+CorJitResult CILJit::compileMethod(
+ ICorJitInfo* compHnd, CORINFO_METHOD_INFO* methodInfo, unsigned flags, BYTE** entryAddress, ULONG* nativeSizeOfCode)
+{
+ if (g_realJitCompiler != nullptr)
+ {
+ return g_realJitCompiler->compileMethod(compHnd, methodInfo, flags, entryAddress, nativeSizeOfCode);
+ }
+
+ CORJIT_FLAGS jitFlags = {0};
+
+ DWORD jitFlagsSize = 0;
+#if COR_JIT_EE_VERSION > 460
+ if (flags == CORJIT_FLG_CALL_GETJITFLAGS)
+ {
+ jitFlagsSize = compHnd->getJitFlags(&jitFlags, sizeof(jitFlags));
+ }
+#endif
+
+ assert(jitFlagsSize <= sizeof(jitFlags));
+ if (jitFlagsSize == 0)
+ {
+ jitFlags.corJitFlags = flags;
+ }
+
+ int result;
+ void* methodCodePtr = nullptr;
+ CORINFO_METHOD_HANDLE methodHandle = methodInfo->ftn;
+
+ JitTls jitTls(compHnd); // Initialize any necessary thread-local state
+
+ assert(methodInfo->ILCode);
+
+ result = jitNativeCode(methodHandle, methodInfo->scope, compHnd, methodInfo, &methodCodePtr, nativeSizeOfCode,
+ &jitFlags, nullptr);
+
+ if (result == CORJIT_OK)
+ {
+ *entryAddress = (BYTE*)methodCodePtr;
+ }
+
+ return CorJitResult(result);
+}
+
+/*****************************************************************************
+ * Notification from VM to clear any caches
+ */
+void CILJit::clearCache(void)
+{
+ if (g_realJitCompiler != nullptr)
+ {
+ g_realJitCompiler->clearCache();
+ // Continue...
+ }
+
+ return;
+}
+
+/*****************************************************************************
+ * Notify vm that we have something to clean up
+ */
+BOOL CILJit::isCacheCleanupRequired(void)
+{
+ BOOL doCleanup;
+
+ if (g_realJitCompiler != nullptr)
+ {
+ if (g_realJitCompiler->isCacheCleanupRequired())
+ {
+ return TRUE;
+ }
+ // Continue...
+ }
+
+ return FALSE;
+}
+
+void CILJit::ProcessShutdownWork(ICorStaticInfo* statInfo)
+{
+ if (g_realJitCompiler != nullptr)
+ {
+ g_realJitCompiler->ProcessShutdownWork(statInfo);
+ // Continue, by shutting down this JIT as well.
+ }
+
+#ifdef FEATURE_MERGE_JIT_AND_ENGINE
+ jitShutdown();
+#endif
+
+ Compiler::ProcessShutdownWork(statInfo);
+}
+
+/*****************************************************************************
+ * Verify the JIT/EE interface identifier.
+ */
+void CILJit::getVersionIdentifier(GUID* versionIdentifier)
+{
+ if (g_realJitCompiler != nullptr)
+ {
+ g_realJitCompiler->getVersionIdentifier(versionIdentifier);
+ return;
+ }
+
+ assert(versionIdentifier != nullptr);
+ memcpy(versionIdentifier, &JITEEVersionIdentifier, sizeof(GUID));
+}
+
+/*****************************************************************************
+ * Determine the maximum length of SIMD vector supported by this JIT.
+ */
+unsigned CILJit::getMaxIntrinsicSIMDVectorLength(DWORD cpuCompileFlags)
+{
+ if (g_realJitCompiler != nullptr)
+ {
+ return g_realJitCompiler->getMaxIntrinsicSIMDVectorLength(cpuCompileFlags);
+ }
+
+#ifdef _TARGET_AMD64_
+#ifdef FEATURE_AVX_SUPPORT
+ if (((cpuCompileFlags & CORJIT_FLG_PREJIT) == 0) && ((cpuCompileFlags & CORJIT_FLG_FEATURE_SIMD) != 0) &&
+ ((cpuCompileFlags & CORJIT_FLG_USE_AVX2) != 0))
+ {
+ if (JitConfig.EnableAVX() != 0)
+ {
+ return 32;
+ }
+ }
+#endif // FEATURE_AVX_SUPPORT
+ return 16;
+#else // !_TARGET_AMD64_
+ return 0;
+#endif // !_TARGET_AMD64_
+}
+
+void CILJit::setRealJit(ICorJitCompiler* realJitCompiler)
+{
+ g_realJitCompiler = realJitCompiler;
+}
+
+/*****************************************************************************
+ * Returns the number of bytes required for the given type argument
+ */
+
+unsigned Compiler::eeGetArgSize(CORINFO_ARG_LIST_HANDLE list, CORINFO_SIG_INFO* sig)
+{
+#if defined(_TARGET_AMD64_)
+
+ // Everything fits into a single 'slot' size
+ // to accommodate irregular sized structs, they are passed byref
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ CORINFO_CLASS_HANDLE argClass;
+ CorInfoType argTypeJit = strip(info.compCompHnd->getArgType(sig, list, &argClass));
+ var_types argType = JITtype2varType(argTypeJit);
+ if (varTypeIsStruct(argType))
+ {
+ unsigned structSize = info.compCompHnd->getClassSize(argClass);
+ return structSize; // TODO: roundUp() needed here?
+ }
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+ return sizeof(size_t);
+
+#else // !_TARGET_AMD64_
+
+ CORINFO_CLASS_HANDLE argClass;
+ CorInfoType argTypeJit = strip(info.compCompHnd->getArgType(sig, list, &argClass));
+ var_types argType = JITtype2varType(argTypeJit);
+
+ if (varTypeIsStruct(argType))
+ {
+ unsigned structSize = info.compCompHnd->getClassSize(argClass);
+
+ // make certain the EE passes us back the right thing for refanys
+ assert(argTypeJit != CORINFO_TYPE_REFANY || structSize == 2 * sizeof(void*));
+
+ // For each target that supports passing struct args in multiple registers
+ // apply the target specific rules for them here:
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if FEATURE_MULTIREG_ARGS
+#if defined(_TARGET_ARM64_)
+ // Any structs that are larger than MAX_PASS_MULTIREG_BYTES are always passed by reference
+ if (structSize > MAX_PASS_MULTIREG_BYTES)
+ {
+ // This struct is passed by reference using a single 'slot'
+ return TARGET_POINTER_SIZE;
+ }
+ else
+ {
+ // Is the struct larger than 16 bytes
+ if (structSize > (2 * TARGET_POINTER_SIZE))
+ {
+ var_types hfaType = GetHfaType(argClass); // set to float or double if it is an HFA, otherwise TYP_UNDEF
+ bool isHfa = (hfaType != TYP_UNDEF);
+ if (!isHfa)
+ {
+ // This struct is passed by reference using a single 'slot'
+ return TARGET_POINTER_SIZE;
+ }
+ }
+ // otherwise will we pass this struct by value in multiple registers
+ }
+#elif defined(_TARGET_ARM_)
+// otherwise will we pass this struct by value in multiple registers
+#else
+ NYI("unknown target");
+#endif // defined(_TARGET_XXX_)
+#endif // FEATURE_MULTIREG_ARGS
+
+ // we pass this struct by value in multiple registers
+ return (unsigned)roundUp(structSize, TARGET_POINTER_SIZE);
+ }
+ else
+ {
+ unsigned argSize = sizeof(int) * genTypeStSz(argType);
+ assert(0 < argSize && argSize <= sizeof(__int64));
+ return (unsigned)roundUp(argSize, TARGET_POINTER_SIZE);
+ }
+#endif
+}
+
+/*****************************************************************************/
+
+GenTreePtr Compiler::eeGetPInvokeCookie(CORINFO_SIG_INFO* szMetaSig)
+{
+ void *cookie, *pCookie;
+ cookie = info.compCompHnd->GetCookieForPInvokeCalliSig(szMetaSig, &pCookie);
+ assert((cookie == nullptr) != (pCookie == nullptr));
+
+ return gtNewIconEmbHndNode(cookie, pCookie, GTF_ICON_PINVKI_HDL);
+}
+
+//------------------------------------------------------------------------
+// eeGetArrayDataOffset: Gets the offset of a SDArray's first element
+//
+// Arguments:
+// type - The array element type
+//
+// Return Value:
+// The offset to the first array element.
+
+unsigned Compiler::eeGetArrayDataOffset(var_types type)
+{
+ return varTypeIsGC(type) ? eeGetEEInfo()->offsetOfObjArrayData : offsetof(CORINFO_Array, u1Elems);
+}
+
+//------------------------------------------------------------------------
+// eeGetMDArrayDataOffset: Gets the offset of a MDArray's first element
+//
+// Arguments:
+// type - The array element type
+// rank - The array rank
+//
+// Return Value:
+// The offset to the first array element.
+//
+// Assumptions:
+// The rank should be greater than 0.
+
+unsigned Compiler::eeGetMDArrayDataOffset(var_types type, unsigned rank)
+{
+ assert(rank > 0);
+ // Note that below we're specifically using genTypeSize(TYP_INT) because array
+ // indices are not native int.
+ return eeGetArrayDataOffset(type) + 2 * genTypeSize(TYP_INT) * rank;
+}
+
+/*****************************************************************************/
+
+void Compiler::eeGetStmtOffsets()
+{
+ ULONG32 offsetsCount;
+ DWORD* offsets;
+ ICorDebugInfo::BoundaryTypes offsetsImplicit;
+
+ info.compCompHnd->getBoundaries(info.compMethodHnd, &offsetsCount, &offsets, &offsetsImplicit);
+
+ /* Set the implicit boundaries */
+
+ info.compStmtOffsetsImplicit = (ICorDebugInfo::BoundaryTypes)offsetsImplicit;
+
+ /* Process the explicit boundaries */
+
+ info.compStmtOffsetsCount = 0;
+
+ if (offsetsCount == 0)
+ {
+ return;
+ }
+
+ info.compStmtOffsets = new (this, CMK_DebugInfo) IL_OFFSET[offsetsCount];
+
+ for (unsigned i = 0; i < offsetsCount; i++)
+ {
+ if (offsets[i] > info.compILCodeSize)
+ {
+ continue;
+ }
+
+ info.compStmtOffsets[info.compStmtOffsetsCount] = offsets[i];
+ info.compStmtOffsetsCount++;
+ }
+
+ info.compCompHnd->freeArray(offsets);
+}
+
+/*****************************************************************************
+ *
+ * Debugging support - Local var info
+ */
+
+void Compiler::eeSetLVcount(unsigned count)
+{
+ assert(opts.compScopeInfo);
+
+ JITDUMP("VarLocInfo count is %d\n", count);
+
+ eeVarsCount = count;
+ if (eeVarsCount)
+ {
+ eeVars = (VarResultInfo*)info.compCompHnd->allocateArray(eeVarsCount * sizeof(eeVars[0]));
+ }
+ else
+ {
+ eeVars = nullptr;
+ }
+}
+
+void Compiler::eeSetLVinfo(unsigned which,
+ UNATIVE_OFFSET startOffs,
+ UNATIVE_OFFSET length,
+ unsigned varNum,
+ unsigned LVnum,
+ VarName name,
+ bool avail,
+ const Compiler::siVarLoc& varLoc)
+{
+ // ICorDebugInfo::VarLoc and Compiler::siVarLoc have to overlap
+ // This is checked in siInit()
+
+ assert(opts.compScopeInfo);
+ assert(eeVarsCount > 0);
+ assert(which < eeVarsCount);
+
+ if (eeVars != nullptr)
+ {
+ eeVars[which].startOffset = startOffs;
+ eeVars[which].endOffset = startOffs + length;
+ eeVars[which].varNumber = varNum;
+ eeVars[which].loc = varLoc;
+ }
+}
+
+void Compiler::eeSetLVdone()
+{
+ // necessary but not sufficient condition that the 2 struct definitions overlap
+ assert(sizeof(eeVars[0]) == sizeof(ICorDebugInfo::NativeVarInfo));
+ assert(opts.compScopeInfo);
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ eeDispVars(info.compMethodHnd, eeVarsCount, (ICorDebugInfo::NativeVarInfo*)eeVars);
+ }
+#endif // DEBUG
+
+ info.compCompHnd->setVars(info.compMethodHnd, eeVarsCount, (ICorDebugInfo::NativeVarInfo*)eeVars);
+
+ eeVars = nullptr; // We give up ownership after setVars()
+}
+
+void Compiler::eeGetVars()
+{
+ ICorDebugInfo::ILVarInfo* varInfoTable;
+ ULONG32 varInfoCount;
+ bool extendOthers;
+
+ info.compCompHnd->getVars(info.compMethodHnd, &varInfoCount, &varInfoTable, &extendOthers);
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("getVars() returned cVars = %d, extendOthers = %s\n", varInfoCount, extendOthers ? "true" : "false");
+ }
+#endif
+
+ // Over allocate in case extendOthers is set.
+
+ SIZE_T varInfoCountExtra = varInfoCount;
+ if (extendOthers)
+ {
+ varInfoCountExtra += info.compLocalsCount;
+ }
+
+ if (varInfoCountExtra == 0)
+ {
+ return;
+ }
+
+ info.compVarScopes = new (this, CMK_DebugInfo) VarScopeDsc[varInfoCountExtra];
+
+ VarScopeDsc* localVarPtr = info.compVarScopes;
+ ICorDebugInfo::ILVarInfo* v = varInfoTable;
+
+ for (unsigned i = 0; i < varInfoCount; i++, v++)
+ {
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("var:%d start:%d end:%d\n", v->varNumber, v->startOffset, v->endOffset);
+ }
+#endif
+
+ if (v->startOffset >= v->endOffset)
+ {
+ continue;
+ }
+
+ assert(v->startOffset <= info.compILCodeSize);
+ assert(v->endOffset <= info.compILCodeSize);
+
+ localVarPtr->vsdLifeBeg = v->startOffset;
+ localVarPtr->vsdLifeEnd = v->endOffset;
+ localVarPtr->vsdLVnum = i;
+ localVarPtr->vsdVarNum = compMapILvarNum(v->varNumber);
+
+#ifdef DEBUG
+ localVarPtr->vsdName = gtGetLclVarName(localVarPtr->vsdVarNum);
+#endif
+
+ localVarPtr++;
+ info.compVarScopesCount++;
+ }
+
+ /* If extendOthers is set, then assume the scope of unreported vars
+ is the entire method. Note that this will cause fgExtendDbgLifetimes()
+ to zero-initalize all of them. This will be expensive if it's used
+ for too many variables.
+ */
+ if (extendOthers)
+ {
+ // Allocate a bit-array for all the variables and initialize to false
+
+ bool* varInfoProvided = (bool*)compGetMemA(info.compLocalsCount * sizeof(varInfoProvided[0]));
+ unsigned i;
+ for (i = 0; i < info.compLocalsCount; i++)
+ {
+ varInfoProvided[i] = false;
+ }
+
+ // Find which vars have absolutely no varInfo provided
+
+ for (i = 0; i < info.compVarScopesCount; i++)
+ {
+ varInfoProvided[info.compVarScopes[i].vsdVarNum] = true;
+ }
+
+ // Create entries for the variables with no varInfo
+
+ for (unsigned varNum = 0; varNum < info.compLocalsCount; varNum++)
+ {
+ if (varInfoProvided[varNum])
+ {
+ continue;
+ }
+
+ // Create a varInfo with scope over the entire method
+
+ localVarPtr->vsdLifeBeg = 0;
+ localVarPtr->vsdLifeEnd = info.compILCodeSize;
+ localVarPtr->vsdVarNum = varNum;
+ localVarPtr->vsdLVnum = info.compVarScopesCount;
+
+#ifdef DEBUG
+ localVarPtr->vsdName = gtGetLclVarName(localVarPtr->vsdVarNum);
+#endif
+
+ localVarPtr++;
+ info.compVarScopesCount++;
+ }
+ }
+
+ assert(localVarPtr <= info.compVarScopes + varInfoCountExtra);
+
+ if (varInfoCount != 0)
+ {
+ info.compCompHnd->freeArray(varInfoTable);
+ }
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ compDispLocalVars();
+ }
+#endif // DEBUG
+}
+
+#ifdef DEBUG
+void Compiler::eeDispVar(ICorDebugInfo::NativeVarInfo* var)
+{
+ const char* name = nullptr;
+
+ if (var->varNumber == (DWORD)ICorDebugInfo::VARARGS_HND_ILNUM)
+ {
+ name = "varargsHandle";
+ }
+ else if (var->varNumber == (DWORD)ICorDebugInfo::RETBUF_ILNUM)
+ {
+ name = "retBuff";
+ }
+ else if (var->varNumber == (DWORD)ICorDebugInfo::TYPECTXT_ILNUM)
+ {
+ name = "typeCtx";
+ }
+ printf("%3d(%10s) : From %08Xh to %08Xh, in ", var->varNumber,
+ (VarNameToStr(name) == nullptr) ? "UNKNOWN" : VarNameToStr(name), var->startOffset, var->endOffset);
+
+ switch (var->loc.vlType)
+ {
+ case VLT_REG:
+ case VLT_REG_BYREF:
+ case VLT_REG_FP:
+ printf("%s", getRegName(var->loc.vlReg.vlrReg));
+ if (var->loc.vlType == (ICorDebugInfo::VarLocType)VLT_REG_BYREF)
+ {
+ printf(" byref");
+ }
+ break;
+
+ case VLT_STK:
+ case VLT_STK_BYREF:
+ if ((int)var->loc.vlStk.vlsBaseReg != (int)ICorDebugInfo::REGNUM_AMBIENT_SP)
+ {
+ printf("%s[%d] (1 slot)", getRegName(var->loc.vlStk.vlsBaseReg), var->loc.vlStk.vlsOffset);
+ }
+ else
+ {
+ printf(STR_SPBASE "'[%d] (1 slot)", var->loc.vlStk.vlsOffset);
+ }
+ if (var->loc.vlType == (ICorDebugInfo::VarLocType)VLT_REG_BYREF)
+ {
+ printf(" byref");
+ }
+ break;
+
+#ifndef _TARGET_AMD64_
+ case VLT_REG_REG:
+ printf("%s-%s", getRegName(var->loc.vlRegReg.vlrrReg1), getRegName(var->loc.vlRegReg.vlrrReg2));
+ break;
+
+ case VLT_REG_STK:
+ if ((int)var->loc.vlRegStk.vlrsStk.vlrssBaseReg != (int)ICorDebugInfo::REGNUM_AMBIENT_SP)
+ {
+ printf("%s-%s[%d]", getRegName(var->loc.vlRegStk.vlrsReg),
+ getRegName(var->loc.vlRegStk.vlrsStk.vlrssBaseReg), var->loc.vlRegStk.vlrsStk.vlrssOffset);
+ }
+ else
+ {
+ printf("%s-" STR_SPBASE "'[%d]", getRegName(var->loc.vlRegStk.vlrsReg),
+ var->loc.vlRegStk.vlrsStk.vlrssOffset);
+ }
+ break;
+
+ case VLT_STK_REG:
+ unreached(); // unexpected
+
+ case VLT_STK2:
+ if ((int)var->loc.vlStk2.vls2BaseReg != (int)ICorDebugInfo::REGNUM_AMBIENT_SP)
+ {
+ printf("%s[%d] (2 slots)", getRegName(var->loc.vlStk2.vls2BaseReg), var->loc.vlStk2.vls2Offset);
+ }
+ else
+ {
+ printf(STR_SPBASE "'[%d] (2 slots)", var->loc.vlStk2.vls2Offset);
+ }
+ break;
+
+ case VLT_FPSTK:
+ printf("ST(L-%d)", var->loc.vlFPstk.vlfReg);
+ break;
+
+ case VLT_FIXED_VA:
+ printf("fxd_va[%d]", var->loc.vlFixedVarArg.vlfvOffset);
+ break;
+#endif // !_TARGET_AMD64_
+
+ default:
+ unreached(); // unexpected
+ }
+
+ printf("\n");
+}
+
+// Same parameters as ICorStaticInfo::setVars().
+void Compiler::eeDispVars(CORINFO_METHOD_HANDLE ftn, ULONG32 cVars, ICorDebugInfo::NativeVarInfo* vars)
+{
+ printf("*************** Variable debug info\n");
+ printf("%d vars\n", cVars);
+ for (unsigned i = 0; i < cVars; i++)
+ {
+ eeDispVar(&vars[i]);
+ }
+}
+#endif // DEBUG
+
+/*****************************************************************************
+ *
+ * Debugging support - Line number info
+ */
+
+void Compiler::eeSetLIcount(unsigned count)
+{
+ assert(opts.compDbgInfo);
+
+ eeBoundariesCount = count;
+ if (eeBoundariesCount)
+ {
+ eeBoundaries = (boundariesDsc*)info.compCompHnd->allocateArray(eeBoundariesCount * sizeof(eeBoundaries[0]));
+ }
+ else
+ {
+ eeBoundaries = nullptr;
+ }
+}
+
+void Compiler::eeSetLIinfo(
+ unsigned which, UNATIVE_OFFSET nativeOffset, IL_OFFSET ilOffset, bool stkEmpty, bool callInstruction)
+{
+ assert(opts.compDbgInfo);
+ assert(eeBoundariesCount > 0);
+ assert(which < eeBoundariesCount);
+
+ if (eeBoundaries != nullptr)
+ {
+ eeBoundaries[which].nativeIP = nativeOffset;
+ eeBoundaries[which].ilOffset = ilOffset;
+ eeBoundaries[which].sourceReason = stkEmpty ? ICorDebugInfo::STACK_EMPTY : 0;
+ eeBoundaries[which].sourceReason |= callInstruction ? ICorDebugInfo::CALL_INSTRUCTION : 0;
+ }
+}
+
+void Compiler::eeSetLIdone()
+{
+ assert(opts.compDbgInfo);
+
+#if defined(DEBUG)
+ if (verbose)
+ {
+ eeDispLineInfos();
+ }
+#endif // DEBUG
+
+ // necessary but not sufficient condition that the 2 struct definitions overlap
+ assert(sizeof(eeBoundaries[0]) == sizeof(ICorDebugInfo::OffsetMapping));
+
+ info.compCompHnd->setBoundaries(info.compMethodHnd, eeBoundariesCount, (ICorDebugInfo::OffsetMapping*)eeBoundaries);
+
+ eeBoundaries = nullptr; // we give up ownership after setBoundaries();
+}
+
+#if defined(DEBUG)
+
+/* static */
+void Compiler::eeDispILOffs(IL_OFFSET offs)
+{
+ const char* specialOffs[] = {"EPILOG", "PROLOG", "NO_MAP"};
+
+ switch ((int)offs) // Need the cast since offs is unsigned and the case statements are comparing to signed.
+ {
+ case ICorDebugInfo::EPILOG:
+ case ICorDebugInfo::PROLOG:
+ case ICorDebugInfo::NO_MAPPING:
+ assert(DWORD(ICorDebugInfo::EPILOG) + 1 == (unsigned)ICorDebugInfo::PROLOG);
+ assert(DWORD(ICorDebugInfo::EPILOG) + 2 == (unsigned)ICorDebugInfo::NO_MAPPING);
+ int specialOffsNum;
+ specialOffsNum = offs - DWORD(ICorDebugInfo::EPILOG);
+ printf("%s", specialOffs[specialOffsNum]);
+ break;
+ default:
+ printf("0x%04X", offs);
+ }
+}
+
+/* static */
+void Compiler::eeDispLineInfo(const boundariesDsc* line)
+{
+ printf("IL offs ");
+
+ eeDispILOffs(line->ilOffset);
+
+ printf(" : 0x%08X", line->nativeIP);
+ if (line->sourceReason != 0)
+ {
+ // It seems like it should probably never be zero since ICorDebugInfo::SOURCE_TYPE_INVALID is zero.
+ // However, the JIT has always generated this and printed "stack non-empty".
+
+ printf(" ( ");
+ if ((line->sourceReason & ICorDebugInfo::STACK_EMPTY) != 0)
+ {
+ printf("STACK_EMPTY ");
+ }
+ if ((line->sourceReason & ICorDebugInfo::CALL_INSTRUCTION) != 0)
+ {
+ printf("CALL_INSTRUCTION ");
+ }
+ if ((line->sourceReason & ICorDebugInfo::CALL_SITE) != 0)
+ {
+ printf("CALL_SITE ");
+ }
+ printf(")");
+ }
+ printf("\n");
+
+ // We don't expect to see any other bits.
+ assert((line->sourceReason & ~(ICorDebugInfo::STACK_EMPTY | ICorDebugInfo::CALL_INSTRUCTION)) == 0);
+}
+
+void Compiler::eeDispLineInfos()
+{
+ printf("IP mapping count : %d\n", eeBoundariesCount); // this might be zero
+ for (unsigned i = 0; i < eeBoundariesCount; i++)
+ {
+ eeDispLineInfo(&eeBoundaries[i]);
+ }
+ printf("\n");
+}
+#endif // DEBUG
+
+/*****************************************************************************
+ *
+ * ICorJitInfo wrapper functions
+ *
+ * In many cases here, we don't tell the VM about various unwind or EH information if
+ * we're an altjit for an unexpected architecture. If it's not a same architecture JIT
+ * (e.g., host AMD64, target ARM64), then VM will get confused anyway.
+ */
+
+void Compiler::eeReserveUnwindInfo(BOOL isFunclet, BOOL isColdCode, ULONG unwindSize)
+{
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("reserveUnwindInfo(isFunclet=%s, isColdCode=%s, unwindSize=0x%x)\n", isFunclet ? "TRUE" : "FALSE",
+ isColdCode ? "TRUE" : "FALSE", unwindSize);
+ }
+#endif // DEBUG
+
+ if (info.compMatchedVM)
+ {
+ info.compCompHnd->reserveUnwindInfo(isFunclet, isColdCode, unwindSize);
+ }
+}
+
+void Compiler::eeAllocUnwindInfo(BYTE* pHotCode,
+ BYTE* pColdCode,
+ ULONG startOffset,
+ ULONG endOffset,
+ ULONG unwindSize,
+ BYTE* pUnwindBlock,
+ CorJitFuncKind funcKind)
+{
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("allocUnwindInfo(pHotCode=0x%p, pColdCode=0x%p, startOffset=0x%x, endOffset=0x%x, unwindSize=0x%x, "
+ "pUnwindBlock=0x%p, funKind=%d",
+ dspPtr(pHotCode), dspPtr(pColdCode), startOffset, endOffset, unwindSize, dspPtr(pUnwindBlock), funcKind);
+ switch (funcKind)
+ {
+ case CORJIT_FUNC_ROOT:
+ printf(" (main function)");
+ break;
+ case CORJIT_FUNC_HANDLER:
+ printf(" (handler)");
+ break;
+ case CORJIT_FUNC_FILTER:
+ printf(" (filter)");
+ break;
+ default:
+ printf(" (ILLEGAL)");
+ break;
+ }
+ printf(")\n");
+ }
+#endif // DEBUG
+
+ if (info.compMatchedVM)
+ {
+ info.compCompHnd->allocUnwindInfo(pHotCode, pColdCode, startOffset, endOffset, unwindSize, pUnwindBlock,
+ funcKind);
+ }
+}
+
+void Compiler::eeSetEHcount(unsigned cEH)
+{
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("setEHcount(cEH=%u)\n", cEH);
+ }
+#endif // DEBUG
+
+ if (info.compMatchedVM)
+ {
+ info.compCompHnd->setEHcount(cEH);
+ }
+}
+
+void Compiler::eeSetEHinfo(unsigned EHnumber, const CORINFO_EH_CLAUSE* clause)
+{
+#ifdef DEBUG
+ if (opts.dspEHTable)
+ {
+ dispOutgoingEHClause(EHnumber, *clause);
+ }
+#endif // DEBUG
+
+ if (info.compMatchedVM)
+ {
+ info.compCompHnd->setEHinfo(EHnumber, clause);
+ }
+}
+
+WORD Compiler::eeGetRelocTypeHint(void* target)
+{
+ if (info.compMatchedVM)
+ {
+ return info.compCompHnd->getRelocTypeHint(target);
+ }
+ else
+ {
+ // No hints
+ return (WORD)-1;
+ }
+}
+
+CORINFO_FIELD_HANDLE Compiler::eeFindJitDataOffs(unsigned dataOffs)
+{
+ // Data offsets are marked by the fact that the low two bits are 0b01 0x1
+ assert(dataOffs < 0x40000000);
+ return (CORINFO_FIELD_HANDLE)(size_t)((dataOffs << iaut_SHIFT) | iaut_DATA_OFFSET);
+}
+
+bool Compiler::eeIsJitDataOffs(CORINFO_FIELD_HANDLE field)
+{
+ // if 'field' is a jit data offset it has to fit into a 32-bit unsigned int
+ unsigned value = static_cast<unsigned>(reinterpret_cast<uintptr_t>(field));
+ if (((CORINFO_FIELD_HANDLE)(size_t)value) != field)
+ {
+ return false; // upper bits were set, not a jit data offset
+ }
+
+ // Data offsets are marked by the fact that the low two bits are 0b01 0x1
+ return (value & iaut_MASK) == iaut_DATA_OFFSET;
+}
+
+int Compiler::eeGetJitDataOffs(CORINFO_FIELD_HANDLE field)
+{
+ // Data offsets are marked by the fact that the low two bits are 0b01 0x1
+ if (eeIsJitDataOffs(field))
+ {
+ unsigned dataOffs = static_cast<unsigned>(reinterpret_cast<uintptr_t>(field));
+ assert(((CORINFO_FIELD_HANDLE)(size_t)dataOffs) == field);
+ assert(dataOffs < 0x40000000);
+ return (static_cast<int>(reinterpret_cast<intptr_t>(field))) >> iaut_SHIFT;
+ }
+ else
+ {
+ return -1;
+ }
+}
+
+/*****************************************************************************
+ *
+ * ICorStaticInfo wrapper functions
+ */
+
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+
+#ifdef DEBUG
+void Compiler::dumpSystemVClassificationType(SystemVClassificationType ct)
+{
+ switch (ct)
+ {
+ case SystemVClassificationTypeUnknown:
+ printf("UNKNOWN");
+ break;
+ case SystemVClassificationTypeStruct:
+ printf("Struct");
+ break;
+ case SystemVClassificationTypeNoClass:
+ printf("NoClass");
+ break;
+ case SystemVClassificationTypeMemory:
+ printf("Memory");
+ break;
+ case SystemVClassificationTypeInteger:
+ printf("Integer");
+ break;
+ case SystemVClassificationTypeIntegerReference:
+ printf("IntegerReference");
+ break;
+ case SystemVClassificationTypeIntegerByRef:
+ printf("IntegerByReference");
+ break;
+ case SystemVClassificationTypeSSE:
+ printf("SSE");
+ break;
+ default:
+ printf("ILLEGAL");
+ break;
+ }
+}
+#endif // DEBUG
+
+void Compiler::eeGetSystemVAmd64PassStructInRegisterDescriptor(
+ /*IN*/ CORINFO_CLASS_HANDLE structHnd,
+ /*OUT*/ SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR* structPassInRegDescPtr)
+{
+ bool ok = info.compCompHnd->getSystemVAmd64PassStructInRegisterDescriptor(structHnd, structPassInRegDescPtr);
+ noway_assert(ok);
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("**** getSystemVAmd64PassStructInRegisterDescriptor(0x%x (%s), ...) =>\n", dspPtr(structHnd),
+ eeGetClassName(structHnd));
+ printf(" passedInRegisters = %s\n", dspBool(structPassInRegDescPtr->passedInRegisters));
+ if (structPassInRegDescPtr->passedInRegisters)
+ {
+ printf(" eightByteCount = %d\n", structPassInRegDescPtr->eightByteCount);
+ for (unsigned int i = 0; i < structPassInRegDescPtr->eightByteCount; i++)
+ {
+ printf(" eightByte #%d -- classification: ", i);
+ dumpSystemVClassificationType(structPassInRegDescPtr->eightByteClassifications[i]);
+ printf(", byteSize: %d, byteOffset: %d\n", structPassInRegDescPtr->eightByteSizes[i],
+ structPassInRegDescPtr->eightByteOffsets[i]);
+ }
+ }
+ }
+#endif // DEBUG
+}
+
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+#if COR_JIT_EE_VERSION <= 460
+
+// Validate the token to determine whether to turn the bad image format exception into
+// verification failure (for backward compatibility)
+static bool isValidTokenForTryResolveToken(ICorJitInfo* corInfo, CORINFO_RESOLVED_TOKEN* resolvedToken)
+{
+ if (!corInfo->isValidToken(resolvedToken->tokenScope, resolvedToken->token))
+ return false;
+
+ CorInfoTokenKind tokenType = resolvedToken->tokenType;
+ switch (TypeFromToken(resolvedToken->token))
+ {
+ case mdtModuleRef:
+ case mdtTypeDef:
+ case mdtTypeRef:
+ case mdtTypeSpec:
+ if ((tokenType & CORINFO_TOKENKIND_Class) == 0)
+ return false;
+ break;
+
+ case mdtMethodDef:
+ case mdtMethodSpec:
+ if ((tokenType & CORINFO_TOKENKIND_Method) == 0)
+ return false;
+ break;
+
+ case mdtFieldDef:
+ if ((tokenType & CORINFO_TOKENKIND_Field) == 0)
+ return false;
+ break;
+
+ case mdtMemberRef:
+ if ((tokenType & (CORINFO_TOKENKIND_Method | CORINFO_TOKENKIND_Field)) == 0)
+ return false;
+ break;
+
+ default:
+ return false;
+ }
+
+ return true;
+}
+
+// This type encapsulates the information necessary for `TryResolveTokenFilter` and
+// `eeTryResolveToken` below.
+struct TryResolveTokenFilterParam
+{
+ ICorJitInfo* m_corInfo;
+ CORINFO_RESOLVED_TOKEN* m_resolvedToken;
+ EXCEPTION_POINTERS m_exceptionPointers;
+ bool m_success;
+};
+
+LONG TryResolveTokenFilter(struct _EXCEPTION_POINTERS* exceptionPointers, void* theParam)
+{
+ assert(exceptionPointers->ExceptionRecord->ExceptionCode != SEH_VERIFICATION_EXCEPTION);
+
+ // Backward compatibility: Convert bad image format exceptions thrown by the EE while resolving token to
+ // verification exceptions if we are verifying. Verification exceptions will cause the JIT of the basic block to
+ // fail, but the JITing of the whole method is still going to succeed. This is done for backward compatibility only.
+ // Ideally, we would always treat bad tokens in the IL stream as fatal errors.
+ if (exceptionPointers->ExceptionRecord->ExceptionCode == EXCEPTION_COMPLUS)
+ {
+ auto* param = reinterpret_cast<TryResolveTokenFilterParam*>(theParam);
+ if (!isValidTokenForTryResolveToken(param->m_corInfo, param->m_resolvedToken))
+ {
+ param->m_exceptionPointers = *exceptionPointers;
+ return param->m_corInfo->FilterException(exceptionPointers);
+ }
+ }
+
+ return EXCEPTION_CONTINUE_SEARCH;
+}
+
+bool Compiler::eeTryResolveToken(CORINFO_RESOLVED_TOKEN* resolvedToken)
+{
+ TryResolveTokenFilterParam param;
+ param.m_corInfo = info.compCompHnd;
+ param.m_resolvedToken = resolvedToken;
+ param.m_success = true;
+
+ PAL_TRY(TryResolveTokenFilterParam*, pParam, &param)
+ {
+ pParam->m_corInfo->resolveToken(pParam->m_resolvedToken);
+ }
+ PAL_EXCEPT_FILTER(TryResolveTokenFilter)
+ {
+ if (param.m_exceptionPointers.ExceptionRecord->ExceptionCode == EXCEPTION_COMPLUS)
+ {
+ param.m_corInfo->HandleException(&param.m_exceptionPointers);
+ }
+
+ param.m_success = false;
+ }
+ PAL_ENDTRY
+
+ return param.m_success;
+}
+
+struct TrapParam
+{
+ ICorJitInfo* m_corInfo;
+ EXCEPTION_POINTERS m_exceptionPointers;
+
+ void (*m_function)(void*);
+ void* m_param;
+ bool m_success;
+};
+
+static LONG __EEFilter(PEXCEPTION_POINTERS exceptionPointers, void* param)
+{
+ auto* trapParam = reinterpret_cast<TrapParam*>(param);
+ trapParam->m_exceptionPointers = *exceptionPointers;
+ return trapParam->m_corInfo->FilterException(exceptionPointers);
+}
+
+bool Compiler::eeRunWithErrorTrapImp(void (*function)(void*), void* param)
+{
+ TrapParam trapParam;
+ trapParam.m_corInfo = info.compCompHnd;
+ trapParam.m_function = function;
+ trapParam.m_param = param;
+ trapParam.m_success = true;
+
+ PAL_TRY(TrapParam*, __trapParam, &trapParam)
+ {
+ __trapParam->m_function(__trapParam->m_param);
+ }
+ PAL_EXCEPT_FILTER(__EEFilter)
+ {
+ trapParam.m_corInfo->HandleException(&trapParam.m_exceptionPointers);
+ trapParam.m_success = false;
+ }
+ PAL_ENDTRY
+
+ return trapParam.m_success;
+}
+
+#else // CORJIT_EE_VER <= 460
+
+bool Compiler::eeTryResolveToken(CORINFO_RESOLVED_TOKEN* resolvedToken)
+{
+ return info.compCompHnd->tryResolveToken(resolvedToken);
+}
+
+bool Compiler::eeRunWithErrorTrapImp(void (*function)(void*), void* param)
+{
+ return info.compCompHnd->runWithErrorTrap(function, param);
+}
+
+#endif // CORJIT_EE_VER > 460
+
+/*****************************************************************************
+ *
+ * Utility functions
+ */
+
+#if defined(DEBUG) || defined(FEATURE_JIT_METHOD_PERF) || defined(FEATURE_SIMD)
+
+/*****************************************************************************/
+
+// static helper names - constant array
+const char* jitHlpFuncTable[CORINFO_HELP_COUNT] = {
+#define JITHELPER(code, pfnHelper, sig) #code,
+#define DYNAMICJITHELPER(code, pfnHelper, sig) #code,
+#include "jithelpers.h"
+};
+
+/*****************************************************************************
+*
+* Filter wrapper to handle exception filtering.
+* On Unix compilers don't support SEH.
+*/
+
+struct FilterSuperPMIExceptionsParam_ee_il
+{
+ Compiler* pThis;
+ Compiler::Info* pJitInfo;
+ CORINFO_FIELD_HANDLE field;
+ CORINFO_METHOD_HANDLE method;
+ CORINFO_CLASS_HANDLE clazz;
+ const char** classNamePtr;
+ const char* fieldOrMethodOrClassNamePtr;
+ EXCEPTION_POINTERS exceptionPointers;
+};
+
+static LONG FilterSuperPMIExceptions_ee_il(PEXCEPTION_POINTERS pExceptionPointers, LPVOID lpvParam)
+{
+ FilterSuperPMIExceptionsParam_ee_il* pSPMIEParam = (FilterSuperPMIExceptionsParam_ee_il*)lpvParam;
+ pSPMIEParam->exceptionPointers = *pExceptionPointers;
+
+ if (pSPMIEParam->pThis->IsSuperPMIException(pExceptionPointers->ExceptionRecord->ExceptionCode))
+ {
+ return EXCEPTION_EXECUTE_HANDLER;
+ }
+
+ return EXCEPTION_CONTINUE_SEARCH;
+}
+
+const char* Compiler::eeGetMethodName(CORINFO_METHOD_HANDLE method, const char** classNamePtr)
+{
+ if (eeGetHelperNum(method))
+ {
+ if (classNamePtr != nullptr)
+ {
+ *classNamePtr = "HELPER";
+ }
+ CorInfoHelpFunc ftnNum = eeGetHelperNum(method);
+ const char* name = info.compCompHnd->getHelperName(ftnNum);
+
+ // If it's something unknown from a RET VM, or from SuperPMI, then use our own helper name table.
+ if ((strcmp(name, "AnyJITHelper") == 0) || (strcmp(name, "Yickish helper name") == 0))
+ {
+ if (ftnNum < CORINFO_HELP_COUNT)
+ {
+ name = jitHlpFuncTable[ftnNum];
+ }
+ }
+ return name;
+ }
+
+ if (eeIsNativeMethod(method))
+ {
+ if (classNamePtr != nullptr)
+ {
+ *classNamePtr = "NATIVE";
+ }
+ method = eeGetMethodHandleForNative(method);
+ }
+
+ FilterSuperPMIExceptionsParam_ee_il param;
+
+ param.pThis = this;
+ param.pJitInfo = &info;
+ param.method = method;
+ param.classNamePtr = classNamePtr;
+
+ PAL_TRY(FilterSuperPMIExceptionsParam_ee_il*, pParam, &param)
+ {
+ pParam->fieldOrMethodOrClassNamePtr =
+ pParam->pJitInfo->compCompHnd->getMethodName(pParam->method, pParam->classNamePtr);
+ }
+ PAL_EXCEPT_FILTER(FilterSuperPMIExceptions_ee_il)
+ {
+ if (param.classNamePtr != nullptr)
+ {
+ *(param.classNamePtr) = "hackishClassName";
+ }
+
+ param.fieldOrMethodOrClassNamePtr = "hackishMethodName";
+ }
+ PAL_ENDTRY
+
+ return param.fieldOrMethodOrClassNamePtr;
+}
+
+const char* Compiler::eeGetFieldName(CORINFO_FIELD_HANDLE field, const char** classNamePtr)
+{
+ FilterSuperPMIExceptionsParam_ee_il param;
+
+ param.pThis = this;
+ param.pJitInfo = &info;
+ param.field = field;
+ param.classNamePtr = classNamePtr;
+
+ PAL_TRY(FilterSuperPMIExceptionsParam_ee_il*, pParam, &param)
+ {
+ pParam->fieldOrMethodOrClassNamePtr =
+ pParam->pJitInfo->compCompHnd->getFieldName(pParam->field, pParam->classNamePtr);
+ }
+ PAL_EXCEPT_FILTER(FilterSuperPMIExceptions_ee_il)
+ {
+ param.fieldOrMethodOrClassNamePtr = "hackishFieldName";
+ }
+ PAL_ENDTRY
+
+ return param.fieldOrMethodOrClassNamePtr;
+}
+
+const char* Compiler::eeGetClassName(CORINFO_CLASS_HANDLE clsHnd)
+{
+ FilterSuperPMIExceptionsParam_ee_il param;
+
+ param.pThis = this;
+ param.pJitInfo = &info;
+ param.clazz = clsHnd;
+
+ PAL_TRY(FilterSuperPMIExceptionsParam_ee_il*, pParam, &param)
+ {
+ pParam->fieldOrMethodOrClassNamePtr = pParam->pJitInfo->compCompHnd->getClassName(pParam->clazz);
+ }
+ PAL_EXCEPT_FILTER(FilterSuperPMIExceptions_ee_il)
+ {
+ param.fieldOrMethodOrClassNamePtr = "hackishClassName";
+ }
+ PAL_ENDTRY
+ return param.fieldOrMethodOrClassNamePtr;
+}
+
+#endif // DEBUG || FEATURE_JIT_METHOD_PERF
+
+#ifdef DEBUG
+
+const wchar_t* Compiler::eeGetCPString(size_t strHandle)
+{
+ char buff[512 + sizeof(CORINFO_String)];
+
+ // make this bulletproof, so it works even if we are wrong.
+ if (ReadProcessMemory(GetCurrentProcess(), (void*)strHandle, buff, 4, nullptr) == 0)
+ {
+ return (nullptr);
+ }
+
+ CORINFO_String* asString = *((CORINFO_String**)strHandle);
+
+ if (ReadProcessMemory(GetCurrentProcess(), asString, buff, sizeof(buff), nullptr) == 0)
+ {
+ return (nullptr);
+ }
+
+ if (asString->stringLen >= 255 || asString->chars[asString->stringLen] != 0)
+ {
+ return nullptr;
+ }
+
+ return (asString->chars);
+}
+
+#endif // DEBUG
diff --git a/src/jit/ee_il_dll.hpp b/src/jit/ee_il_dll.hpp
new file mode 100644
index 0000000000..d9bf95fde8
--- /dev/null
+++ b/src/jit/ee_il_dll.hpp
@@ -0,0 +1,204 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+extern ICorJitHost* g_jitHost;
+
+class CILJit : public ICorJitCompiler
+{
+ CorJitResult __stdcall compileMethod(ICorJitInfo* comp, /* IN */
+ CORINFO_METHOD_INFO* methodInfo, /* IN */
+ unsigned flags, /* IN */
+ BYTE** nativeEntry, /* OUT */
+ ULONG* nativeSizeOfCode /* OUT */
+ );
+
+ void clearCache(void);
+ BOOL isCacheCleanupRequired(void);
+
+ void ProcessShutdownWork(ICorStaticInfo* statInfo);
+
+ void getVersionIdentifier(GUID* versionIdentifier /* OUT */
+ );
+
+ unsigned getMaxIntrinsicSIMDVectorLength(DWORD cpuCompileFlags);
+
+ void setRealJit(ICorJitCompiler* realJitCompiler);
+};
+
+/*****************************************************************************
+ *
+ * Functions to get various handles
+ */
+
+FORCEINLINE
+void Compiler::eeGetCallInfo(CORINFO_RESOLVED_TOKEN* pResolvedToken,
+ CORINFO_RESOLVED_TOKEN* pConstrainedToken,
+ CORINFO_CALLINFO_FLAGS flags,
+ CORINFO_CALL_INFO* pResult)
+{
+ info.compCompHnd->getCallInfo(pResolvedToken, pConstrainedToken, info.compMethodHnd, flags, pResult);
+}
+
+FORCEINLINE
+void Compiler::eeGetFieldInfo(CORINFO_RESOLVED_TOKEN* pResolvedToken,
+ CORINFO_ACCESS_FLAGS accessFlags,
+ CORINFO_FIELD_INFO* pResult)
+{
+ info.compCompHnd->getFieldInfo(pResolvedToken, info.compMethodHnd, accessFlags, pResult);
+}
+
+/*****************************************************************************
+ *
+ * VOS info, method sigs, etc
+ */
+
+FORCEINLINE
+BOOL Compiler::eeIsValueClass(CORINFO_CLASS_HANDLE clsHnd)
+{
+ return info.compCompHnd->isValueClass(clsHnd);
+}
+
+FORCEINLINE
+void Compiler::eeGetSig(unsigned sigTok,
+ CORINFO_MODULE_HANDLE scope,
+ CORINFO_CONTEXT_HANDLE context,
+ CORINFO_SIG_INFO* retSig)
+{
+ info.compCompHnd->findSig(scope, sigTok, context, retSig);
+
+ assert(!varTypeIsComposite(JITtype2varType(retSig->retType)) || retSig->retTypeClass != nullptr);
+}
+
+FORCEINLINE
+void Compiler::eeGetMethodSig(CORINFO_METHOD_HANDLE methHnd, CORINFO_SIG_INFO* sigRet, CORINFO_CLASS_HANDLE owner)
+{
+ info.compCompHnd->getMethodSig(methHnd, sigRet, owner);
+
+ assert(!varTypeIsComposite(JITtype2varType(sigRet->retType)) || sigRet->retTypeClass != nullptr);
+}
+
+/**********************************************************************
+ * For varargs we need the number of arguments at the call site
+ */
+
+FORCEINLINE
+void Compiler::eeGetCallSiteSig(unsigned sigTok,
+ CORINFO_MODULE_HANDLE scope,
+ CORINFO_CONTEXT_HANDLE context,
+ CORINFO_SIG_INFO* sigRet)
+{
+ info.compCompHnd->findCallSiteSig(scope, sigTok, context, sigRet);
+
+ assert(!varTypeIsComposite(JITtype2varType(sigRet->retType)) || sigRet->retTypeClass != nullptr);
+}
+
+/*****************************************************************************/
+inline var_types Compiler::eeGetArgType(CORINFO_ARG_LIST_HANDLE list, CORINFO_SIG_INFO* sig)
+{
+ CORINFO_CLASS_HANDLE argClass;
+ return (JITtype2varType(strip(info.compCompHnd->getArgType(sig, list, &argClass))));
+}
+
+/*****************************************************************************/
+inline var_types Compiler::eeGetArgType(CORINFO_ARG_LIST_HANDLE list, CORINFO_SIG_INFO* sig, bool* isPinned)
+{
+ CORINFO_CLASS_HANDLE argClass;
+ CorInfoTypeWithMod type = info.compCompHnd->getArgType(sig, list, &argClass);
+ *isPinned = ((type & ~CORINFO_TYPE_MASK) != 0);
+ return JITtype2varType(strip(type));
+}
+
+/*****************************************************************************
+ *
+ * Native Direct Optimizations
+ */
+
+inline CORINFO_EE_INFO* Compiler::eeGetEEInfo()
+{
+ if (!eeInfoInitialized)
+ {
+ info.compCompHnd->getEEInfo(&eeInfo);
+ eeInfoInitialized = true;
+ }
+
+ return &eeInfo;
+}
+
+/*****************************************************************************
+ *
+ * Convert the type returned from the VM to a var_type.
+ */
+
+inline var_types JITtype2varType(CorInfoType type)
+{
+
+ static const unsigned char varTypeMap[CORINFO_TYPE_COUNT] = {
+ // see the definition of enum CorInfoType in file inc/corinfo.h
+ TYP_UNDEF, // CORINFO_TYPE_UNDEF = 0x0,
+ TYP_VOID, // CORINFO_TYPE_VOID = 0x1,
+ TYP_BOOL, // CORINFO_TYPE_BOOL = 0x2,
+ TYP_CHAR, // CORINFO_TYPE_CHAR = 0x3,
+ TYP_BYTE, // CORINFO_TYPE_BYTE = 0x4,
+ TYP_UBYTE, // CORINFO_TYPE_UBYTE = 0x5,
+ TYP_SHORT, // CORINFO_TYPE_SHORT = 0x6,
+ TYP_CHAR, // CORINFO_TYPE_USHORT = 0x7,
+ TYP_INT, // CORINFO_TYPE_INT = 0x8,
+ TYP_INT, // CORINFO_TYPE_UINT = 0x9,
+ TYP_LONG, // CORINFO_TYPE_LONG = 0xa,
+ TYP_LONG, // CORINFO_TYPE_ULONG = 0xb,
+ TYP_I_IMPL, // CORINFO_TYPE_NATIVEINT = 0xc,
+ TYP_I_IMPL, // CORINFO_TYPE_NATIVEUINT = 0xd,
+ TYP_FLOAT, // CORINFO_TYPE_FLOAT = 0xe,
+ TYP_DOUBLE, // CORINFO_TYPE_DOUBLE = 0xf,
+ TYP_REF, // CORINFO_TYPE_STRING = 0x10, // Not used, should remove
+ TYP_I_IMPL, // CORINFO_TYPE_PTR = 0x11,
+ TYP_BYREF, // CORINFO_TYPE_BYREF = 0x12,
+ TYP_STRUCT, // CORINFO_TYPE_VALUECLASS = 0x13,
+ TYP_REF, // CORINFO_TYPE_CLASS = 0x14,
+ TYP_STRUCT, // CORINFO_TYPE_REFANY = 0x15,
+
+ // Generic type variables only appear when we're doing
+ // verification of generic code, in which case we're running
+ // in "import only" mode. Annoyingly the "import only"
+ // mode of the JIT actually does a fair bit of compilation,
+ // so we have to trick the compiler into thinking it's compiling
+ // a real instantiation. We do that by just pretending we're
+ // compiling the "object" instantiation of the code, i.e. by
+ // turing all generic type variables refs, except for a few
+ // choice places to do with verification, where we use
+ // verification types and CLASS_HANDLEs to track the difference.
+
+ TYP_REF, // CORINFO_TYPE_VAR = 0x16,
+ };
+
+ // spot check to make certain enumerations have not changed
+
+ assert(varTypeMap[CORINFO_TYPE_CLASS] == TYP_REF);
+ assert(varTypeMap[CORINFO_TYPE_BYREF] == TYP_BYREF);
+ assert(varTypeMap[CORINFO_TYPE_PTR] == TYP_I_IMPL);
+ assert(varTypeMap[CORINFO_TYPE_INT] == TYP_INT);
+ assert(varTypeMap[CORINFO_TYPE_UINT] == TYP_INT);
+ assert(varTypeMap[CORINFO_TYPE_DOUBLE] == TYP_DOUBLE);
+ assert(varTypeMap[CORINFO_TYPE_VOID] == TYP_VOID);
+ assert(varTypeMap[CORINFO_TYPE_VALUECLASS] == TYP_STRUCT);
+ assert(varTypeMap[CORINFO_TYPE_REFANY] == TYP_STRUCT);
+
+ assert(type < CORINFO_TYPE_COUNT);
+ assert(varTypeMap[type] != TYP_UNDEF);
+
+ return ((var_types)varTypeMap[type]);
+};
+
+inline CORINFO_CALLINFO_FLAGS combine(CORINFO_CALLINFO_FLAGS flag1, CORINFO_CALLINFO_FLAGS flag2)
+{
+ return (CORINFO_CALLINFO_FLAGS)(flag1 | flag2);
+}
+inline CORINFO_CALLINFO_FLAGS Compiler::addVerifyFlag(CORINFO_CALLINFO_FLAGS flags)
+{
+ if (tiVerificationNeeded)
+ {
+ flags = combine(flags, CORINFO_CALLINFO_VERIFICATION);
+ }
+ return flags;
+}
diff --git a/src/jit/eeinterface.cpp b/src/jit/eeinterface.cpp
new file mode 100644
index 0000000000..d8db947f02
--- /dev/null
+++ b/src/jit/eeinterface.cpp
@@ -0,0 +1,212 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX EEInterface XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+// ONLY FUNCTIONS common to all variants of the JIT (EXE, DLL) should go here)
+// otherwise they belong in the corresponding directory.
+
+#include "jitpch.h"
+
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+#if defined(DEBUG) || defined(FEATURE_JIT_METHOD_PERF) || defined(FEATURE_SIMD)
+
+#pragma warning(push)
+#pragma warning(disable : 4701) // difficult to get rid of C4701 with 'sig' below
+
+/*****************************************************************************/
+
+/*****************************************************************************
+*
+* Filter wrapper to handle exception filtering.
+* On Unix compilers don't support SEH.
+*/
+
+struct FilterSuperPMIExceptionsParam_eeinterface
+{
+ Compiler* pThis;
+ Compiler::Info* pJitInfo;
+ bool hasThis;
+ size_t siglength;
+ CORINFO_SIG_INFO sig;
+ CORINFO_ARG_LIST_HANDLE argLst;
+ CORINFO_METHOD_HANDLE hnd;
+ const char* returnType;
+ EXCEPTION_POINTERS exceptionPointers;
+};
+
+static LONG FilterSuperPMIExceptions_eeinterface(PEXCEPTION_POINTERS pExceptionPointers, LPVOID lpvParam)
+{
+ FilterSuperPMIExceptionsParam_eeinterface* pSPMIEParam = (FilterSuperPMIExceptionsParam_eeinterface*)lpvParam;
+ pSPMIEParam->exceptionPointers = *pExceptionPointers;
+
+ if (pSPMIEParam->pThis->IsSuperPMIException(pExceptionPointers->ExceptionRecord->ExceptionCode))
+ {
+ return EXCEPTION_EXECUTE_HANDLER;
+ }
+
+ return EXCEPTION_CONTINUE_SEARCH;
+}
+
+const char* Compiler::eeGetMethodFullName(CORINFO_METHOD_HANDLE hnd)
+{
+ const char* className;
+ const char* methodName = eeGetMethodName(hnd, &className);
+ if ((eeGetHelperNum(hnd) != CORINFO_HELP_UNDEF) || eeIsNativeMethod(hnd))
+ {
+ return methodName;
+ }
+
+ FilterSuperPMIExceptionsParam_eeinterface param;
+ param.returnType = nullptr;
+ param.pThis = this;
+ param.hasThis = false;
+ param.siglength = 0;
+ param.hnd = hnd;
+ param.pJitInfo = &info;
+
+ size_t length = 0;
+ unsigned i;
+
+ /* Generating the full signature is a two-pass process. First we have to walk
+ the components in order to assess the total size, then we allocate the buffer
+ and copy the elements into it.
+ */
+
+ /* Right now there is a race-condition in the EE, className can be nullptr */
+
+ /* initialize length with length of className and '.' */
+
+ if (className)
+ {
+ length = strlen(className) + 1;
+ }
+ else
+ {
+ assert(strlen("<NULL>.") == 7);
+ length = 7;
+ }
+
+ /* add length of methodName and opening bracket */
+ length += strlen(methodName) + 1;
+
+ /* figure out the signature */
+
+ EXCEPTION_POINTERS exceptionPointers;
+
+ PAL_TRY(FilterSuperPMIExceptionsParam_eeinterface*, pParam, &param)
+ {
+ unsigned i;
+ pParam->pThis->eeGetMethodSig(pParam->hnd, &pParam->sig);
+ pParam->argLst = pParam->sig.args;
+
+ for (i = 0; i < pParam->sig.numArgs; i++)
+ {
+ var_types type = pParam->pThis->eeGetArgType(pParam->argLst, &pParam->sig);
+
+ pParam->siglength += strlen(varTypeName(type));
+ pParam->argLst = pParam->pJitInfo->compCompHnd->getArgNext(pParam->argLst);
+ }
+
+ /* add ',' if there is more than one argument */
+
+ if (pParam->sig.numArgs > 1)
+ {
+ pParam->siglength += (pParam->sig.numArgs - 1);
+ }
+
+ if (JITtype2varType(pParam->sig.retType) != TYP_VOID)
+ {
+ pParam->returnType = varTypeName(JITtype2varType(pParam->sig.retType));
+ pParam->siglength += strlen(pParam->returnType) + 1; // don't forget the delimiter ':'
+ }
+
+ // Does it have a 'this' pointer? Don't count explicit this, which has the this pointer type as the first
+ // element of the arg type list
+ if (pParam->sig.hasThis() && !pParam->sig.hasExplicitThis())
+ {
+ assert(strlen(":this") == 5);
+ pParam->siglength += 5;
+ pParam->hasThis = true;
+ }
+ }
+ PAL_EXCEPT_FILTER(FilterSuperPMIExceptions_eeinterface)
+ {
+ param.siglength = 0;
+ }
+ PAL_ENDTRY
+
+ /* add closing bracket and null terminator */
+
+ length += param.siglength + 2;
+
+ char* retName = (char*)compGetMemA(length, CMK_DebugOnly);
+
+ /* Now generate the full signature string in the allocated buffer */
+
+ if (className)
+ {
+ strcpy_s(retName, length, className);
+ strcat_s(retName, length, ":");
+ }
+ else
+ {
+ strcpy_s(retName, length, "<NULL>.");
+ }
+
+ strcat_s(retName, length, methodName);
+
+ // append the signature
+ strcat_s(retName, length, "(");
+
+ if (param.siglength > 0)
+ {
+ param.argLst = param.sig.args;
+
+ for (i = 0; i < param.sig.numArgs; i++)
+ {
+ var_types type = eeGetArgType(param.argLst, &param.sig);
+ strcat_s(retName, length, varTypeName(type));
+
+ param.argLst = info.compCompHnd->getArgNext(param.argLst);
+ if (i + 1 < param.sig.numArgs)
+ {
+ strcat_s(retName, length, ",");
+ }
+ }
+ }
+
+ strcat_s(retName, length, ")");
+
+ if (param.returnType != nullptr)
+ {
+ strcat_s(retName, length, ":");
+ strcat_s(retName, length, param.returnType);
+ }
+
+ if (param.hasThis)
+ {
+ strcat_s(retName, length, ":this");
+ }
+
+ assert(strlen(retName) == (length - 1));
+
+ return (retName);
+}
+
+#pragma warning(pop)
+
+#endif // defined(DEBUG) || defined(FEATURE_JIT_METHOD_PERF) || defined(FEATURE_SIMD)
+
+/*****************************************************************************/
diff --git a/src/jit/emit.cpp b/src/jit/emit.cpp
new file mode 100644
index 0000000000..5c991ddf1b
--- /dev/null
+++ b/src/jit/emit.cpp
@@ -0,0 +1,7158 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX emit.cpp XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+#include "hostallocator.h"
+#include "instr.h"
+#include "emit.h"
+#include "codegen.h"
+
+/*****************************************************************************
+ *
+ * Represent an emitter location.
+ */
+
+void emitLocation::CaptureLocation(emitter* emit)
+{
+ ig = emit->emitCurIG;
+ codePos = emit->emitCurOffset();
+
+ assert(Valid());
+}
+
+bool emitLocation::IsCurrentLocation(emitter* emit) const
+{
+ assert(Valid());
+ return (ig == emit->emitCurIG) && (codePos == emit->emitCurOffset());
+}
+
+UNATIVE_OFFSET emitLocation::CodeOffset(emitter* emit) const
+{
+ assert(Valid());
+ return emit->emitCodeOffset(ig, codePos);
+}
+
+int emitLocation::GetInsNum() const
+{
+ return emitGetInsNumFromCodePos(codePos);
+}
+
+#ifdef _TARGET_AMD64_
+// Get the instruction offset in the current instruction group, which must be a funclet prolog group.
+// This is used to find an instruction offset used in unwind data.
+// TODO-AMD64-Bug?: We only support a single main function prolog group, but allow for multiple funclet prolog
+// groups (not that we actually use that flexibility, since the funclet prolog will be small). How to
+// handle that?
+UNATIVE_OFFSET emitLocation::GetFuncletPrologOffset(emitter* emit) const
+{
+ assert(ig->igFuncIdx != 0);
+ assert((ig->igFlags & IGF_FUNCLET_PROLOG) != 0);
+ assert(ig == emit->emitCurIG);
+
+ return emit->emitCurIGsize;
+}
+#endif // _TARGET_AMD64_
+
+#ifdef DEBUG
+void emitLocation::Print() const
+{
+ unsigned insNum = emitGetInsNumFromCodePos(codePos);
+ unsigned insOfs = emitGetInsOfsFromCodePos(codePos);
+ printf("(G_M%03u_IG%02u,ins#%d,ofs#%d)", Compiler::s_compMethodsCount, ig->igNum, insNum, insOfs);
+}
+#endif // DEBUG
+
+/*****************************************************************************
+ *
+ * Return the name of an instruction format.
+ */
+
+#if defined(DEBUG) || EMITTER_STATS
+
+const char* emitter::emitIfName(unsigned f)
+{
+ static const char* const ifNames[] = {
+#define IF_DEF(en, op1, op2) "IF_" #en,
+#include "emitfmts.h"
+ };
+
+ static char errBuff[32];
+
+ if (f < sizeof(ifNames) / sizeof(*ifNames))
+ {
+ return ifNames[f];
+ }
+
+ sprintf_s(errBuff, sizeof(errBuff), "??%u??", f);
+ return errBuff;
+}
+
+#endif
+
+#ifdef TRANSLATE_PDB
+
+/* these are protected */
+
+AddrMap* emitter::emitPDBOffsetTable = 0;
+LocalMap* emitter::emitPDBLocalTable = 0;
+bool emitter::emitIsPDBEnabled = true;
+BYTE* emitter::emitILBaseOfCode = 0;
+BYTE* emitter::emitILMethodBase = 0;
+BYTE* emitter::emitILMethodStart = 0;
+BYTE* emitter::emitImgBaseOfCode = 0;
+
+void emitter::MapCode(int ilOffset, BYTE* imgDest)
+{
+ if (emitIsPDBEnabled)
+ {
+ emitPDBOffsetTable->MapSrcToDest(ilOffset, (int)(imgDest - emitImgBaseOfCode));
+ }
+}
+
+void emitter::MapFunc(int imgOff,
+ int procLen,
+ int dbgStart,
+ int dbgEnd,
+ short frameReg,
+ int stkAdjust,
+ int lvaCount,
+ OptJit::LclVarDsc* lvaTable,
+ bool framePtr)
+{
+ if (emitIsPDBEnabled)
+ {
+ // this code stores information about local symbols for the PDB translation
+
+ assert(lvaCount >= 0); // don't allow a negative count
+
+ LvaDesc* rgLvaDesc = 0;
+
+ if (lvaCount > 0)
+ {
+ rgLvaDesc = new LvaDesc[lvaCount];
+
+ if (!rgLvaDesc)
+ {
+ NOMEM();
+ }
+
+ LvaDesc* pDst = rgLvaDesc;
+ OptJit::LclVarDsc* pSrc = lvaTable;
+ for (int i = 0; i < lvaCount; ++i, ++pDst, ++pSrc)
+ {
+ pDst->slotNum = pSrc->lvSlotNum;
+ pDst->isReg = pSrc->lvRegister;
+ pDst->reg = (pSrc->lvRegister ? pSrc->lvRegNum : frameReg);
+ pDst->off = pSrc->lvStkOffs + stkAdjust;
+ }
+ }
+
+ emitPDBLocalTable->AddFunc((int)(emitILMethodBase - emitILBaseOfCode), imgOff - (int)emitImgBaseOfCode, procLen,
+ dbgStart - imgOff, dbgEnd - imgOff, lvaCount, rgLvaDesc, framePtr);
+ // do not delete rgLvaDesc here -- responsibility is now on emitPDBLocalTable destructor
+ }
+}
+
+/* these are public */
+
+void emitter::SetILBaseOfCode(BYTE* pTextBase)
+{
+ emitILBaseOfCode = pTextBase;
+}
+
+void emitter::SetILMethodBase(BYTE* pMethodEntry)
+{
+ emitILMethodBase = pMethodEntry;
+}
+
+void emitter::SetILMethodStart(BYTE* pMethodCode)
+{
+ emitILMethodStart = pMethodCode;
+}
+
+void emitter::SetImgBaseOfCode(BYTE* pTextBase)
+{
+ emitImgBaseOfCode = pTextBase;
+}
+
+void emitter::SetIDBaseToProlog()
+{
+ emitInstrDescILBase = (int)(emitILMethodBase - emitILBaseOfCode);
+}
+
+void emitter::SetIDBaseToOffset(int methodOffset)
+{
+ emitInstrDescILBase = methodOffset + (int)(emitILMethodStart - emitILBaseOfCode);
+}
+
+void emitter::DisablePDBTranslation()
+{
+ // this function should disable PDB translation code
+ emitIsPDBEnabled = false;
+}
+
+bool emitter::IsPDBEnabled()
+{
+ return emitIsPDBEnabled;
+}
+
+void emitter::InitTranslationMaps(int ilCodeSize)
+{
+ if (emitIsPDBEnabled)
+ {
+ emitPDBOffsetTable = AddrMap::Create(ilCodeSize);
+ emitPDBLocalTable = LocalMap::Create();
+ }
+}
+
+void emitter::DeleteTranslationMaps()
+{
+ if (emitPDBOffsetTable)
+ {
+ delete emitPDBOffsetTable;
+ emitPDBOffsetTable = 0;
+ }
+ if (emitPDBLocalTable)
+ {
+ delete emitPDBLocalTable;
+ emitPDBLocalTable = 0;
+ }
+}
+
+void emitter::InitTranslator(PDBRewriter* pPDB, int* rgSecMap, IMAGE_SECTION_HEADER** rgpHeader, int numSections)
+{
+ if (emitIsPDBEnabled)
+ {
+ pPDB->InitMaps(rgSecMap, // new PE section header order
+ rgpHeader, // array of section headers
+ numSections, // number of sections
+ emitPDBOffsetTable, // code offset translation table
+ emitPDBLocalTable); // slot variable translation table
+ }
+}
+
+#endif // TRANSLATE_PDB
+
+/*****************************************************************************/
+
+#if EMITTER_STATS
+
+static unsigned totAllocdSize;
+static unsigned totActualSize;
+
+unsigned emitter::emitIFcounts[emitter::IF_COUNT];
+
+static unsigned emitSizeBuckets[] = {100, 1024 * 1, 1024 * 2, 1024 * 3, 1024 * 4, 1024 * 5, 1024 * 10, 0};
+static Histogram emitSizeTable(HostAllocator::getHostAllocator(), emitSizeBuckets);
+
+static unsigned GCrefsBuckets[] = {0, 1, 2, 5, 10, 20, 50, 128, 256, 512, 1024, 0};
+static Histogram GCrefsTable(HostAllocator::getHostAllocator(), GCrefsBuckets);
+
+static unsigned stkDepthBuckets[] = {0, 1, 2, 5, 10, 16, 32, 128, 1024, 0};
+static Histogram stkDepthTable(HostAllocator::getHostAllocator(), stkDepthBuckets);
+
+size_t emitter::emitSizeMethod;
+
+size_t emitter::emitTotMemAlloc;
+unsigned emitter::emitTotalInsCnt;
+unsigned emitter::emitTotalIGcnt;
+unsigned emitter::emitTotalPhIGcnt;
+unsigned emitter::emitTotalIGjmps;
+unsigned emitter::emitTotalIGptrs;
+unsigned emitter::emitTotalIGicnt;
+size_t emitter::emitTotalIGsize;
+unsigned emitter::emitTotalIGmcnt;
+
+unsigned emitter::emitSmallDspCnt;
+unsigned emitter::emitLargeDspCnt;
+
+unsigned emitter::emitSmallCnsCnt;
+unsigned emitter::emitLargeCnsCnt;
+unsigned emitter::emitSmallCns[SMALL_CNS_TSZ];
+
+void emitterStaticStats(FILE* fout)
+{
+ // insGroup members
+
+ fprintf(fout, "\n");
+ fprintf(fout, "insGroup:\n");
+ fprintf(fout, "Offset of igNext = %2u\n", offsetof(insGroup, igNext));
+#ifdef DEBUG
+ fprintf(fout, "Offset of igSelf = %2u\n", offsetof(insGroup, igSelf));
+#endif
+ fprintf(fout, "Offset of igNum = %2u\n", offsetof(insGroup, igNum));
+ fprintf(fout, "Offset of igOffs = %2u\n", offsetof(insGroup, igOffs));
+ fprintf(fout, "Offset of igFuncIdx = %2u\n", offsetof(insGroup, igFuncIdx));
+ fprintf(fout, "Offset of igFlags = %2u\n", offsetof(insGroup, igFlags));
+ fprintf(fout, "Offset of igSize = %2u\n", offsetof(insGroup, igSize));
+ fprintf(fout, "Offset of igData = %2u\n", offsetof(insGroup, igData));
+#if EMIT_TRACK_STACK_DEPTH
+ fprintf(fout, "Offset of igStkLvl = %2u\n", offsetof(insGroup, igStkLvl));
+#endif
+ fprintf(fout, "Offset of igGCregs = %2u\n", offsetof(insGroup, igGCregs));
+ fprintf(fout, "Offset of igInsCnt = %2u\n", offsetof(insGroup, igInsCnt));
+ fprintf(fout, "Size of insGroup = %u\n", sizeof(insGroup));
+
+ // insPlaceholderGroupData members
+
+ fprintf(fout, "\n");
+ fprintf(fout, "insPlaceholderGroupData:\n");
+ fprintf(fout, "Offset of igPhNext = %2u\n", offsetof(insPlaceholderGroupData, igPhNext));
+ fprintf(fout, "Offset of igPhBB = %2u\n", offsetof(insPlaceholderGroupData, igPhBB));
+ fprintf(fout, "Offset of igPhInitGCrefVars = %2u\n", offsetof(insPlaceholderGroupData, igPhInitGCrefVars));
+ fprintf(fout, "Offset of igPhInitGCrefRegs = %2u\n", offsetof(insPlaceholderGroupData, igPhInitGCrefRegs));
+ fprintf(fout, "Offset of igPhInitByrefRegs = %2u\n", offsetof(insPlaceholderGroupData, igPhInitByrefRegs));
+ fprintf(fout, "Offset of igPhPrevGCrefVars = %2u\n", offsetof(insPlaceholderGroupData, igPhPrevGCrefVars));
+ fprintf(fout, "Offset of igPhPrevGCrefRegs = %2u\n", offsetof(insPlaceholderGroupData, igPhPrevGCrefRegs));
+ fprintf(fout, "Offset of igPhPrevByrefRegs = %2u\n", offsetof(insPlaceholderGroupData, igPhPrevByrefRegs));
+ fprintf(fout, "Offset of igPhType = %2u\n", offsetof(insPlaceholderGroupData, igPhType));
+ fprintf(fout, "Size of insPlaceholderGroupData = %u\n", sizeof(insPlaceholderGroupData));
+
+ fprintf(fout, "\n");
+ fprintf(fout, "Size of tinyID = %2u\n", TINY_IDSC_SIZE);
+ fprintf(fout, "Size of instrDesc = %2u\n", sizeof(emitter::instrDesc));
+ // fprintf(fout, "Offset of _idIns = %2u\n", offsetof(emitter::instrDesc, _idIns ));
+ // fprintf(fout, "Offset of _idInsFmt = %2u\n", offsetof(emitter::instrDesc, _idInsFmt ));
+ // fprintf(fout, "Offset of _idOpSize = %2u\n", offsetof(emitter::instrDesc, _idOpSize ));
+ // fprintf(fout, "Offset of idSmallCns = %2u\n", offsetof(emitter::instrDesc, idSmallCns ));
+ // fprintf(fout, "Offset of _idAddrUnion= %2u\n", offsetof(emitter::instrDesc, _idAddrUnion));
+ // fprintf(fout, "\n");
+ // fprintf(fout, "Size of _idAddrUnion= %2u\n", sizeof(((emitter::instrDesc*)0)->_idAddrUnion));
+
+ fprintf(fout, "\n");
+ fprintf(fout, "GCInfo::regPtrDsc:\n");
+ fprintf(fout, "Offset of rpdNext = %2u\n", offsetof(GCInfo::regPtrDsc, rpdNext));
+ fprintf(fout, "Offset of rpdOffs = %2u\n", offsetof(GCInfo::regPtrDsc, rpdOffs));
+ fprintf(fout, "Offset of <union> = %2u\n", offsetof(GCInfo::regPtrDsc, rpdPtrArg));
+ fprintf(fout, "Size of GCInfo::regPtrDsc = %2u\n", sizeof(GCInfo::regPtrDsc));
+
+ fprintf(fout, "\n");
+}
+
+void emitterStats(FILE* fout)
+{
+ if (totAllocdSize > 0)
+ {
+ assert(totActualSize <= totAllocdSize);
+
+ fprintf(fout, "\nTotal allocated code size = %u\n", totAllocdSize);
+
+ if (totActualSize < totAllocdSize)
+ {
+ fprintf(fout, "Total generated code size = %u ", totActualSize);
+
+ fprintf(fout, "(%4.3f%% waste)", 100 * ((totAllocdSize - totActualSize) / (double)totActualSize));
+ fprintf(fout, "\n");
+ }
+
+ assert(emitter::emitTotalInsCnt);
+
+ fprintf(fout, "Average of %4.2f bytes of code generated per instruction\n",
+ (double)totActualSize / emitter::emitTotalInsCnt);
+ }
+
+ fprintf(fout, "\nInstruction format frequency table:\n\n");
+
+ unsigned f, ic = 0, dc = 0;
+
+ for (f = 0; f < emitter::IF_COUNT; f++)
+ {
+ ic += emitter::emitIFcounts[f];
+ }
+
+ for (f = 0; f < emitter::IF_COUNT; f++)
+ {
+ unsigned c = emitter::emitIFcounts[f];
+
+ if ((c > 0) && (1000 * c >= ic))
+ {
+ dc += c;
+ fprintf(fout, " %-13s %8u (%5.2f%%)\n", emitter::emitIfName(f), c, 100.0 * c / ic);
+ }
+ }
+
+ fprintf(fout, " --------------------------------\n");
+ fprintf(fout, " %-13s %8u (%5.2f%%)\n", "Total shown", dc, 100.0 * dc / ic);
+
+ if (emitter::emitTotalIGmcnt)
+ {
+ fprintf(fout, "Total of %8u methods\n", emitter::emitTotalIGmcnt);
+ fprintf(fout, "Total of %8u insGroup\n", emitter::emitTotalIGcnt);
+ fprintf(fout, "Total of %8u insPlaceholderGroupData\n", emitter::emitTotalPhIGcnt);
+ fprintf(fout, "Total of %8u instructions\n", emitter::emitTotalIGicnt);
+ fprintf(fout, "Total of %8u jumps\n", emitter::emitTotalIGjmps);
+ fprintf(fout, "Total of %8u GC livesets\n", emitter::emitTotalIGptrs);
+ fprintf(fout, "\n");
+ fprintf(fout, "Average of %8.1lf insGroup per method\n",
+ (double)emitter::emitTotalIGcnt / emitter::emitTotalIGmcnt);
+ fprintf(fout, "Average of %8.1lf insPhGroup per method\n",
+ (double)emitter::emitTotalPhIGcnt / emitter::emitTotalIGmcnt);
+ fprintf(fout, "Average of %8.1lf instructions per method\n",
+ (double)emitter::emitTotalIGicnt / emitter::emitTotalIGmcnt);
+ fprintf(fout, "Average of %8.1lf desc. bytes per method\n",
+ (double)emitter::emitTotalIGsize / emitter::emitTotalIGmcnt);
+ fprintf(fout, "Average of %8.1lf jumps per method\n",
+ (double)emitter::emitTotalIGjmps / emitter::emitTotalIGmcnt);
+ fprintf(fout, "Average of %8.1lf GC livesets per method\n",
+ (double)emitter::emitTotalIGptrs / emitter::emitTotalIGmcnt);
+ fprintf(fout, "\n");
+ fprintf(fout, "Average of %8.1lf instructions per group \n",
+ (double)emitter::emitTotalIGicnt / emitter::emitTotalIGcnt);
+ fprintf(fout, "Average of %8.1lf desc. bytes per group \n",
+ (double)emitter::emitTotalIGsize / emitter::emitTotalIGcnt);
+ fprintf(fout, "Average of %8.1lf jumps per group \n",
+ (double)emitter::emitTotalIGjmps / emitter::emitTotalIGcnt);
+ fprintf(fout, "\n");
+ fprintf(fout, "Average of %8.1lf bytes per instrDesc\n",
+ (double)emitter::emitTotalIGsize / emitter::emitTotalIGicnt);
+ fprintf(fout, "\n");
+ fprintf(fout, "A total of %8u desc. bytes\n", emitter::emitTotalIGsize);
+ fprintf(fout, "\n");
+ }
+
+ fprintf(fout, "Descriptor size distribution:\n");
+ emitSizeTable.dump(fout);
+ fprintf(fout, "\n");
+
+ fprintf(fout, "GC ref frame variable counts:\n");
+ GCrefsTable.dump(fout);
+ fprintf(fout, "\n");
+
+ fprintf(fout, "Max. stack depth distribution:\n");
+ stkDepthTable.dump(fout);
+ fprintf(fout, "\n");
+
+ int i;
+ unsigned c;
+ unsigned m;
+
+ if (emitter::emitSmallCnsCnt || emitter::emitLargeCnsCnt)
+ {
+ fprintf(fout, "SmallCnsCnt = %6u\n", emitter::emitSmallCnsCnt);
+ fprintf(fout, "LargeCnsCnt = %6u (%3u %% of total)\n", emitter::emitLargeCnsCnt,
+ 100 * emitter::emitLargeCnsCnt / (emitter::emitLargeCnsCnt + emitter::emitSmallCnsCnt));
+ }
+
+#if 0
+ // TODO-Cleanup: WHy is this in #if 0 - Is EMITTER_STATS ever used? Fix or delete this.
+ if (emitter::emitSmallCnsCnt)
+ {
+ fprintf(fout, "\n");
+
+ m = emitter::emitSmallCnsCnt/1000 + 1;
+
+ for (i = ID_MIN_SMALL_CNS; i < ID_MAX_SMALL_CNS; i++)
+ {
+ c = emitter::emitSmallCns[i-ID_MIN_SMALL_CNS];
+ if (c >= m)
+ fprintf(fout, "cns[%4d] = %u\n", i, c);
+ }
+ }
+#endif // 0
+
+ fprintf(fout, "%8u bytes allocated in the emitter\n", emitter::emitTotMemAlloc);
+}
+
+#endif // EMITTER_STATS
+
+/*****************************************************************************/
+
+const unsigned short emitTypeSizes[] = {
+#define DEF_TP(tn, nm, jitType, verType, sz, sze, asze, st, al, tf, howUsed) sze,
+#include "typelist.h"
+#undef DEF_TP
+};
+
+const unsigned short emitTypeActSz[] = {
+#define DEF_TP(tn, nm, jitType, verType, sz, sze, asze, st, al, tf, howUsed) asze,
+#include "typelist.h"
+#undef DEF_TP
+};
+
+/*****************************************************************************/
+/*****************************************************************************
+ *
+ * Initialize the emitter - called once, at DLL load time.
+ */
+
+void emitter::emitInit()
+{
+}
+
+/*****************************************************************************
+ *
+ * Shut down the emitter - called once, at DLL exit time.
+ */
+
+void emitter::emitDone()
+{
+}
+
+/*****************************************************************************
+ *
+ * Allocate memory.
+ */
+
+void* emitter::emitGetMem(size_t sz)
+{
+ assert(sz % sizeof(int) == 0);
+
+#if EMITTER_STATS
+ emitTotMemAlloc += sz;
+#endif
+
+ return emitComp->compGetMem(sz, CMK_InstDesc);
+}
+
+/*****************************************************************************
+ *
+ * emitLclVarAddr support methods
+ */
+void emitLclVarAddr::initLclVarAddr(int varNum, unsigned offset)
+{
+ if (varNum < 32768)
+ {
+ if (varNum >= 0)
+ {
+ if (offset < 32768)
+ {
+ _lvaTag = LVA_STANDARD_ENCODING;
+ _lvaExtra = offset; // offset known to be in [0..32767]
+ _lvaVarNum = (unsigned)varNum; // varNum known to be in [0..32767]
+ }
+ else // offset >= 32768
+ {
+ // We could support larger local offsets here at the cost of less varNums
+ if (offset >= 65536)
+ {
+ IMPL_LIMITATION("JIT doesn't support offsets larger than 65535 into valuetypes\n");
+ }
+
+ _lvaTag = LVA_LARGE_OFFSET;
+ _lvaExtra = (offset - 32768); // (offset-32768) is known to be in [0..32767]
+ _lvaVarNum = (unsigned)varNum; // varNum known to be in [0..32767]
+ }
+ }
+ else // varNum < 0, These are used for Compiler spill temps
+ {
+ if (varNum < -32767)
+ {
+ IMPL_LIMITATION("JIT doesn't support more than 32767 Compiler Spill temps\n");
+ }
+ if (offset > 32767)
+ {
+ IMPL_LIMITATION(
+ "JIT doesn't support offsets larger than 32767 into valuetypes for Compiler Spill temps\n");
+ }
+
+ _lvaTag = LVA_COMPILER_TEMP;
+ _lvaExtra = offset; // offset known to be in [0..32767]
+ _lvaVarNum = (unsigned)(-varNum); // -varNum known to be in [1..32767]
+ }
+ }
+ else // varNum >= 32768
+ {
+ if (offset >= 256)
+ {
+ IMPL_LIMITATION("JIT doesn't support offsets larger than 255 into valuetypes for local vars > 32767\n");
+ }
+ if (varNum >= 0x00400000)
+ { // 0x00400000 == 2^22
+ IMPL_LIMITATION("JIT doesn't support more than 2^22 variables\n");
+ }
+
+ _lvaTag = LVA_LARGE_VARNUM;
+ _lvaVarNum = varNum & 0x00007FFF; // varNum bits 14 to 0
+ _lvaExtra = (varNum & 0x003F8000) >> 15; // varNum bits 21 to 15 in _lvaExtra bits 6 to 0, 7 bits total
+ _lvaExtra |= (offset << 7); // offset bits 7 to 0 in _lvaExtra bits 14 to 7, 8 bits total
+ }
+}
+
+// Returns the variable to access. Note that it returns a negative number for compiler spill temps.
+int emitLclVarAddr::lvaVarNum()
+{
+ switch (_lvaTag)
+ {
+ case LVA_COMPILER_TEMP:
+ return -((int)_lvaVarNum);
+ case LVA_LARGE_VARNUM:
+ return (int)(((_lvaExtra & 0x007F) << 15) + _lvaVarNum);
+ default: // LVA_STANDARD_ENCODING or LVA_LARGE_OFFSET
+ assert((_lvaTag == LVA_STANDARD_ENCODING) || (_lvaTag == LVA_LARGE_OFFSET));
+ return (int)_lvaVarNum;
+ }
+}
+
+unsigned emitLclVarAddr::lvaOffset() // returns the offset into the variable to access
+{
+ switch (_lvaTag)
+ {
+ case LVA_LARGE_OFFSET:
+ return (32768 + _lvaExtra);
+ case LVA_LARGE_VARNUM:
+ return (_lvaExtra & 0x7F80) >> 7;
+ default: // LVA_STANDARD_ENCODING or LVA_COMPILER_TEMP
+ assert((_lvaTag == LVA_STANDARD_ENCODING) || (_lvaTag == LVA_COMPILER_TEMP));
+ return _lvaExtra;
+ }
+}
+
+/*****************************************************************************
+ *
+ * Record some info about the method about to be emitted.
+ */
+
+void emitter::emitBegCG(Compiler* comp, COMP_HANDLE cmpHandle)
+{
+ emitComp = comp;
+ emitCmpHandle = cmpHandle;
+}
+
+void emitter::emitEndCG()
+{
+}
+
+/*****************************************************************************
+ *
+ * Prepare the given IG for emission of code.
+ */
+
+void emitter::emitGenIG(insGroup* ig)
+{
+ /* Set the "current IG" value */
+
+ emitCurIG = ig;
+
+#if EMIT_TRACK_STACK_DEPTH
+
+ /* Record the stack level on entry to this group */
+
+ ig->igStkLvl = emitCurStackLvl;
+
+ // If we don't have enough bits in igStkLvl, refuse to compile
+
+ if (ig->igStkLvl != emitCurStackLvl)
+ {
+ IMPL_LIMITATION("Too many arguments pushed on stack");
+ }
+
+// printf("Start IG #%02u [stk=%02u]\n", ig->igNum, emitCurStackLvl);
+
+#endif
+
+ if (emitNoGCIG)
+ {
+ ig->igFlags |= IGF_NOGCINTERRUPT;
+ }
+
+ /* Prepare to issue instructions */
+
+ emitCurIGinsCnt = 0;
+ emitCurIGsize = 0;
+
+ assert(emitCurIGjmpList == nullptr);
+
+ /* Allocate the temp instruction buffer if we haven't done so */
+
+ if (emitCurIGfreeBase == nullptr)
+ {
+ emitIGbuffSize = SC_IG_BUFFER_SIZE;
+ emitCurIGfreeBase = (BYTE*)emitGetMem(emitIGbuffSize);
+ }
+
+ emitCurIGfreeNext = emitCurIGfreeBase;
+ emitCurIGfreeEndp = emitCurIGfreeBase + emitIGbuffSize;
+}
+
+/*****************************************************************************
+ *
+ * Finish and save the current IG.
+ */
+
+insGroup* emitter::emitSavIG(bool emitAdd)
+{
+ insGroup* ig;
+ BYTE* id;
+
+ size_t sz;
+ size_t gs;
+
+ assert(emitCurIGfreeNext <= emitCurIGfreeEndp);
+
+ /* Get hold of the IG descriptor */
+
+ ig = emitCurIG;
+ assert(ig);
+
+ /* Compute how much code we've generated */
+
+ sz = emitCurIGfreeNext - emitCurIGfreeBase;
+
+ /* Compute the total size we need to allocate */
+
+ gs = roundUp(sz);
+
+ /* Do we need space for GC? */
+
+ if (!(ig->igFlags & IGF_EMIT_ADD))
+ {
+ /* Is the initial set of live GC vars different from the previous one? */
+
+ if (emitForceStoreGCState || !VarSetOps::Equal(emitComp, emitPrevGCrefVars, emitInitGCrefVars))
+ {
+ /* Remember that we will have a new set of live GC variables */
+
+ ig->igFlags |= IGF_GC_VARS;
+
+#if EMITTER_STATS
+ emitTotalIGptrs++;
+#endif
+
+ /* We'll allocate extra space to record the liveset */
+
+ gs += sizeof(VARSET_TP);
+ }
+
+ /* Is the initial set of live Byref regs different from the previous one? */
+
+ /* Remember that we will have a new set of live GC variables */
+
+ ig->igFlags |= IGF_BYREF_REGS;
+
+ /* We'll allocate extra space (DWORD aligned) to record the GC regs */
+
+ gs += sizeof(int);
+ }
+
+ /* Allocate space for the instructions and optional liveset */
+
+ id = (BYTE*)emitGetMem(gs);
+
+ /* Do we need to store the byref regs */
+
+ if (ig->igFlags & IGF_BYREF_REGS)
+ {
+ /* Record the byref regs in front the of the instructions */
+
+ *castto(id, unsigned*)++ = (unsigned)emitInitByrefRegs;
+ }
+
+ /* Do we need to store the liveset? */
+
+ if (ig->igFlags & IGF_GC_VARS)
+ {
+ /* Record the liveset in front the of the instructions */
+ VarSetOps::AssignNoCopy(emitComp, (*castto(id, VARSET_TP*)), VarSetOps::MakeEmpty(emitComp));
+ VarSetOps::Assign(emitComp, (*castto(id, VARSET_TP*)++), emitInitGCrefVars);
+ }
+
+ /* Record the collected instructions */
+
+ assert((ig->igFlags & IGF_PLACEHOLDER) == 0);
+ ig->igData = id;
+
+ memcpy(id, emitCurIGfreeBase, sz);
+
+#ifdef DEBUG
+ if (false && emitComp->verbose) // this is not useful in normal dumps (hence it is normally under if (false)
+ {
+ // If there's an error during emission, we may want to connect the post-copy address
+ // of an instrDesc with the pre-copy address (the one that was originally created). This
+ // printing enables that.
+ printf("copying instruction group from [0x%x..0x%x) to [0x%x..0x%x).\n", dspPtr(emitCurIGfreeBase),
+ dspPtr(emitCurIGfreeBase + sz), dspPtr(id), dspPtr(id + sz));
+ }
+#endif
+
+ /* Record how many instructions and bytes of code this group contains */
+
+ noway_assert((BYTE)emitCurIGinsCnt == emitCurIGinsCnt);
+ noway_assert((unsigned short)emitCurIGsize == emitCurIGsize);
+
+ ig->igInsCnt = (BYTE)emitCurIGinsCnt;
+ ig->igSize = (unsigned short)emitCurIGsize;
+ emitCurCodeOffset += emitCurIGsize;
+ assert(IsCodeAligned(emitCurCodeOffset));
+
+#if EMITTER_STATS
+ emitTotalIGicnt += emitCurIGinsCnt;
+ emitTotalIGsize += sz;
+ emitSizeMethod += sz;
+#endif
+
+ // printf("Group [%08X]%3u has %2u instructions (%4u bytes at %08X)\n", ig, ig->igNum, emitCurIGinsCnt, sz, id);
+
+ /* Record the live GC register set - if and only if it is not an emitter added block */
+
+ if (!(ig->igFlags & IGF_EMIT_ADD))
+ {
+ ig->igGCregs = (regMaskSmall)emitInitGCrefRegs;
+ }
+
+ if (!emitAdd)
+ {
+ /* Update the previous recorded live GC ref sets, but not if
+ if we are starting an "overflow" buffer. Note that this is
+ only used to determine whether we need to store or not store
+ the GC ref sets for the next IG, which is dependent on exactly
+ what the state of the emitter GC ref sets will be when the
+ next IG is processed in the emitter.
+ */
+
+ VarSetOps::Assign(emitComp, emitPrevGCrefVars, emitThisGCrefVars);
+ emitPrevGCrefRegs = emitThisGCrefRegs;
+ emitPrevByrefRegs = emitThisByrefRegs;
+
+ emitForceStoreGCState = false;
+ }
+
+#ifdef DEBUG
+ if (emitComp->opts.dspCode)
+ {
+ printf("\n G_M%03u_IG%02u:", Compiler::s_compMethodsCount, ig->igNum);
+ if (emitComp->verbose)
+ {
+ printf(" ; offs=%06XH, funclet=%02u", ig->igOffs, ig->igFuncIdx);
+ }
+ else
+ {
+ printf(" ; funclet=%02u", ig->igFuncIdx);
+ }
+ printf("\n");
+ }
+#endif
+
+ /* Did we have any jumps in this group? */
+
+ if (emitCurIGjmpList)
+ {
+ instrDescJmp* list = nullptr;
+ instrDescJmp* last = nullptr;
+
+ /* Move jumps to the global list, update their 'next' links */
+
+ do
+ {
+ /* Grab the jump and remove it from the list */
+
+ instrDescJmp* oj = emitCurIGjmpList;
+ emitCurIGjmpList = oj->idjNext;
+
+ /* Figure out the address of where the jump got copied */
+
+ size_t of = (BYTE*)oj - emitCurIGfreeBase;
+ instrDescJmp* nj = (instrDescJmp*)(ig->igData + of);
+
+ // printf("Jump moved from %08X to %08X\n", oj, nj);
+ // printf("jmp [%08X] at %08X + %03u\n", nj, ig, nj->idjOffs);
+
+ assert(nj->idjIG == ig);
+ assert(nj->idIns() == oj->idIns());
+ assert(nj->idjNext == oj->idjNext);
+
+ /* Make sure the jumps are correctly ordered */
+
+ assert(last == nullptr || last->idjOffs > nj->idjOffs);
+
+ if (ig->igFlags & IGF_FUNCLET_PROLOG)
+ {
+ // Our funclet prologs have short jumps, if the prolog would ever have
+ // long jumps, then we'd have to insert the list in sorted order than
+ // just append to the emitJumpList.
+ noway_assert(nj->idjShort);
+ if (nj->idjShort)
+ {
+ continue;
+ }
+ }
+
+ /* Append the new jump to the list */
+
+ nj->idjNext = list;
+ list = nj;
+
+ if (last == nullptr)
+ {
+ last = nj;
+ }
+ } while (emitCurIGjmpList);
+
+ if (last != nullptr)
+ {
+ /* Append the jump(s) from this IG to the global list */
+ bool prologJump = (ig == emitPrologIG);
+ if ((emitJumpList == nullptr) || prologJump)
+ {
+ last->idjNext = emitJumpList;
+ emitJumpList = list;
+ }
+ else
+ {
+ last->idjNext = nullptr;
+ emitJumpLast->idjNext = list;
+ }
+
+ if (!prologJump || (emitJumpLast == nullptr))
+ {
+ emitJumpLast = last;
+ }
+ }
+ }
+
+ /* Fix the last instruction field */
+
+ if (sz != 0)
+ {
+ assert(emitLastIns != nullptr);
+ assert(emitCurIGfreeBase <= (BYTE*)emitLastIns);
+ assert((BYTE*)emitLastIns < emitCurIGfreeBase + sz);
+ emitLastIns = (instrDesc*)((BYTE*)id + ((BYTE*)emitLastIns - (BYTE*)emitCurIGfreeBase));
+ }
+
+ /* Reset the buffer free pointers */
+
+ emitCurIGfreeNext = emitCurIGfreeBase;
+
+ return ig;
+}
+
+#ifdef LEGACY_BACKEND
+void emitter::emitTmpSizeChanged(unsigned tmpSize)
+{
+ assert(emitGrowableMaxByteOffs <= SCHAR_MAX);
+
+#ifdef DEBUG
+ // Workaround for FP code
+ bool bAssert = JitConfig.JitMaxTempAssert() ? true : false;
+
+ if (tmpSize > emitMaxTmpSize && bAssert)
+ {
+ // TODO-Review: We have a known issue involving floating point code and this assert.
+ // The generated code will be ok, This is only a warning.
+ // To not receive this assert again you can set the registry key: JITMaxTempAssert=0.
+ //
+ assert(!"Incorrect max tmp size set.");
+ }
+#endif
+
+ if (tmpSize <= emitMaxTmpSize)
+ return;
+
+ unsigned change = tmpSize - emitMaxTmpSize;
+
+ /* If we have used a small offset to access a variable, growing the
+ temp size is a problem if we should have used a large offset instead.
+ Detect if such a situation happens and bail */
+
+ if (emitGrowableMaxByteOffs <= SCHAR_MAX && (emitGrowableMaxByteOffs + change) > SCHAR_MAX)
+ {
+#ifdef DEBUG
+ if (emitComp->verbose)
+ printf("Under-estimated var offset encoding size for ins #%Xh\n", emitMaxByteOffsIdNum);
+#endif
+ IMPL_LIMITATION("Should have used large offset to access var");
+ }
+
+ emitMaxTmpSize = tmpSize;
+ emitGrowableMaxByteOffs += change;
+}
+#endif // LEGACY_BACKEND
+
+/*****************************************************************************
+ *
+ * Start generating code to be scheduled; called once per method.
+ */
+
+void emitter::emitBegFN(bool hasFramePtr
+#if defined(DEBUG)
+ ,
+ bool chkAlign
+#endif
+#ifdef LEGACY_BACKEND
+ ,
+ unsigned lclSize
+#endif // LEGACY_BACKEND
+ ,
+ unsigned maxTmpSize)
+{
+ insGroup* ig;
+
+ /* Assume we won't need the temp instruction buffer */
+
+ emitCurIGfreeBase = nullptr;
+ emitIGbuffSize = 0;
+
+ /* Record stack frame info (the temp size is just an estimate) */
+
+ emitHasFramePtr = hasFramePtr;
+
+ emitMaxTmpSize = maxTmpSize;
+
+#ifdef LEGACY_BACKEND
+ emitLclSize = lclSize;
+ emitGrowableMaxByteOffs = 0;
+#ifdef DEBUG
+ emitMaxByteOffsIdNum = (unsigned)-1;
+#endif // DEBUG
+#endif // LEGACY_BACKEND
+
+#ifdef DEBUG
+ emitChkAlign = chkAlign;
+#endif
+
+ /* We have no epilogs yet */
+
+ emitEpilogSize = 0;
+ emitEpilogCnt = 0;
+
+#ifdef _TARGET_XARCH_
+ emitExitSeqBegLoc.Init();
+ emitExitSeqSize = INT_MAX;
+#endif // _TARGET_XARCH_
+
+ emitPlaceholderList = emitPlaceholderLast = nullptr;
+
+#ifdef JIT32_GCENCODER
+ emitEpilogList = emitEpilogLast = NULL;
+#endif // JIT32_GCENCODER
+
+ /* We don't have any jumps */
+
+ emitJumpList = emitJumpLast = nullptr;
+ emitCurIGjmpList = nullptr;
+
+ emitFwdJumps = false;
+ emitNoGCIG = false;
+ emitForceNewIG = false;
+
+ /* We have not recorded any live sets */
+
+ assert(VarSetOps::IsEmpty(emitComp, emitThisGCrefVars));
+ assert(VarSetOps::IsEmpty(emitComp, emitInitGCrefVars));
+ assert(VarSetOps::IsEmpty(emitComp, emitPrevGCrefVars));
+ emitThisGCrefRegs = RBM_NONE;
+ emitInitGCrefRegs = RBM_NONE;
+ emitPrevGCrefRegs = RBM_NONE;
+ emitThisByrefRegs = RBM_NONE;
+ emitInitByrefRegs = RBM_NONE;
+ emitPrevByrefRegs = RBM_NONE;
+
+ emitForceStoreGCState = false;
+
+#ifdef DEBUG
+
+ emitIssuing = false;
+
+#endif
+
+ /* Assume there will be no GC ref variables */
+
+ emitGCrFrameOffsMin = emitGCrFrameOffsMax = emitGCrFrameOffsCnt = 0;
+#ifdef DEBUG
+ emitGCrFrameLiveTab = nullptr;
+#endif
+
+ /* We have no groups / code at this point */
+
+ emitIGlist = emitIGlast = nullptr;
+
+ emitCurCodeOffset = 0;
+ emitFirstColdIG = nullptr;
+ emitTotalCodeSize = 0;
+
+#if EMITTER_STATS
+ emitTotalIGmcnt++;
+ emitSizeMethod = 0;
+#endif
+
+ emitInsCount = 0;
+
+ /* The stack is empty now */
+
+ emitCurStackLvl = 0;
+
+#if EMIT_TRACK_STACK_DEPTH
+ emitMaxStackDepth = 0;
+ emitCntStackDepth = sizeof(int);
+#endif
+
+ /* No data sections have been created */
+
+ emitDataSecCur = nullptr;
+
+ memset(&emitConsDsc, 0, sizeof(emitConsDsc));
+
+#ifdef PSEUDORANDOM_NOP_INSERTION
+ // for random NOP insertion
+
+ emitEnableRandomNops();
+ emitComp->info.compRNG.Init(emitComp->info.compChecksum);
+ emitNextNop = emitNextRandomNop();
+ emitInInstrumentation = false;
+#endif // PSEUDORANDOM_NOP_INSERTION
+
+ /* Create the first IG, it will be used for the prolog */
+
+ emitNxtIGnum = 1;
+
+ emitPrologIG = emitIGlist = emitIGlast = emitCurIG = ig = emitAllocIG();
+
+ emitLastIns = nullptr;
+
+ ig->igNext = nullptr;
+
+#ifdef DEBUG
+ emitScratchSigInfo = nullptr;
+#endif // DEBUG
+
+ /* Append another group, to start generating the method body */
+
+ emitNewIG();
+}
+
+#ifdef PSEUDORANDOM_NOP_INSERTION
+int emitter::emitNextRandomNop()
+{
+ return emitComp->info.compRNG.Next(1, 9);
+}
+#endif
+
+/*****************************************************************************
+ *
+ * Done generating code to be scheduled; called once per method.
+ */
+
+void emitter::emitEndFN()
+{
+}
+
+// member function iiaIsJitDataOffset for idAddrUnion, defers to Compiler::eeIsJitDataOffs
+bool emitter::instrDesc::idAddrUnion::iiaIsJitDataOffset() const
+{
+ return Compiler::eeIsJitDataOffs(iiaFieldHnd);
+}
+
+// member function iiaGetJitDataOffset for idAddrUnion, defers to Compiler::eeGetJitDataOffs
+int emitter::instrDesc::idAddrUnion::iiaGetJitDataOffset() const
+{
+ assert(iiaIsJitDataOffset());
+ return Compiler::eeGetJitDataOffs(iiaFieldHnd);
+}
+
+void emitter::dispIns(instrDesc* id)
+{
+#ifdef DEBUG
+ emitInsSanityCheck(id);
+
+ if (emitComp->opts.dspCode)
+ {
+ emitDispIns(id, true, false, false);
+ }
+
+#if EMIT_TRACK_STACK_DEPTH
+ assert((int)emitCurStackLvl >= 0);
+#endif
+ size_t sz = emitSizeOfInsDsc(id);
+ assert(id->idDebugOnlyInfo()->idSize == sz);
+#endif // DEBUG
+
+#if EMITTER_STATS
+ emitIFcounts[id->idInsFmt()]++;
+#endif
+}
+
+void emitter::appendToCurIG(instrDesc* id)
+{
+ emitCurIGsize += id->idCodeSize();
+}
+
+/*****************************************************************************
+ *
+ * Display (optionally) an instruction offset.
+ */
+
+#ifdef DEBUG
+
+void emitter::emitDispInsOffs(unsigned offs, bool doffs)
+{
+ if (doffs)
+ {
+ printf("%06X", offs);
+ }
+ else
+ {
+ printf(" ");
+ }
+}
+
+#endif // DEBUG
+
+#ifdef JIT32_GCENCODER
+
+/*****************************************************************************
+ *
+ * Call the specified function pointer for each epilog block in the current
+ * method with the epilog's relative code offset. Returns the sum of the
+ * values returned by the callback.
+ */
+
+size_t emitter::emitGenEpilogLst(size_t (*fp)(void*, unsigned), void* cp)
+{
+ EpilogList* el;
+ size_t sz;
+
+ for (el = emitEpilogList, sz = 0; el; el = el->elNext)
+ {
+ assert(el->elIG->igFlags & IGF_EPILOG);
+
+ UNATIVE_OFFSET ofs =
+ el->elIG->igOffs; // The epilog starts at the beginning of the IG, so the IG offset is correct
+
+ sz += fp(cp, ofs);
+ }
+
+ return sz;
+}
+
+#endif // JIT32_GCENCODER
+
+/*****************************************************************************
+ *
+ * The following series of methods allocates instruction descriptors.
+ */
+
+void* emitter::emitAllocInstr(size_t sz, emitAttr opsz)
+{
+ instrDesc* id;
+
+#ifdef DEBUG
+ // Under STRESS_EMITTER, put every instruction in its own instruction group.
+ // We can't do this for a prolog, epilog, funclet prolog, or funclet epilog,
+ // because those are generated out of order. We currently have a limitation
+ // where the jump shortening pass uses the instruction group number to determine
+ // if something is earlier or later in the code stream. This implies that
+ // these groups cannot be more than a single instruction group. Note that
+ // the prolog/epilog placeholder groups ARE generated in order, and are
+ // re-used. But generating additional groups would not work.
+ if (emitComp->compStressCompile(Compiler::STRESS_EMITTER, 1) && emitCurIGinsCnt && !emitIGisInProlog(emitCurIG) &&
+ !emitIGisInEpilog(emitCurIG)
+#if FEATURE_EH_FUNCLETS
+ && !emitIGisInFuncletProlog(emitCurIG) && !emitIGisInFuncletEpilog(emitCurIG)
+#endif // FEATURE_EH_FUNCLETS
+ )
+ {
+ emitNxtIG(true);
+ }
+#endif
+
+#ifdef PSEUDORANDOM_NOP_INSERTION
+ // TODO-ARM-Bug?: PSEUDORANDOM_NOP_INSERTION is not defined for _TARGET_ARM_
+ // ARM - This is currently broken on _TARGET_ARM_
+ // When nopSize is odd we misalign emitCurIGsize
+ //
+ if (!(emitComp->opts.eeFlags & CORJIT_FLG_PREJIT) && !emitInInstrumentation &&
+ !emitIGisInProlog(emitCurIG) // don't do this in prolog or epilog
+ && !emitIGisInEpilog(emitCurIG) &&
+ emitRandomNops // sometimes we turn off where exact codegen is needed (pinvoke inline)
+ )
+ {
+ if (emitNextNop == 0)
+ {
+ int nopSize = 4;
+ emitInInstrumentation = true;
+ instrDesc* idnop = emitNewInstr();
+ emitInInstrumentation = false;
+ idnop->idInsFmt(IF_NONE);
+ idnop->idIns(INS_nop);
+#if defined(_TARGET_XARCH_)
+ idnop->idCodeSize(nopSize);
+#else
+#error "Undefined target for pseudorandom NOP insertion"
+#endif
+
+ emitCurIGsize += nopSize;
+ emitNextNop = emitNextRandomNop();
+ }
+ else
+ emitNextNop--;
+ }
+#endif // PSEUDORANDOM_NOP_INSERTION
+
+ assert(IsCodeAligned(emitCurIGsize));
+
+ /* Make sure we have enough space for the new instruction */
+
+ if ((emitCurIGfreeNext + sz >= emitCurIGfreeEndp) || emitForceNewIG)
+ {
+ emitNxtIG(true);
+ }
+
+ /* Grab the space for the instruction */
+
+ emitLastIns = id = (instrDesc*)emitCurIGfreeNext;
+ emitCurIGfreeNext += sz;
+
+ assert(sz >= sizeof(void*));
+ memset(id, 0, sz);
+
+ // These fields should have been zero-ed by the above
+ assert(id->idReg1() == regNumber(0));
+ assert(id->idReg2() == regNumber(0));
+#ifdef _TARGET_XARCH_
+ assert(id->idCodeSize() == 0);
+#endif
+
+#if HAS_TINY_DESC
+ /* Is the second area to be cleared actually present? */
+ if (sz >= SMALL_IDSC_SIZE)
+ {
+ /* Clear the second 4 bytes, or the 'SMALL' part */
+ *(int*)((BYTE*)id + (SMALL_IDSC_SIZE - sizeof(int))) = 0;
+
+ // These fields should have been zero-ed by the above
+ assert(id->idIsLargeCns() == false);
+ assert(id->idIsLargeDsp() == false);
+ assert(id->idIsLargeCall() == false);
+ }
+#endif
+
+ // Make sure that idAddrUnion is just a union of various pointer sized things
+ C_ASSERT(sizeof(CORINFO_FIELD_HANDLE) <= sizeof(void*));
+ C_ASSERT(sizeof(CORINFO_METHOD_HANDLE) <= sizeof(void*));
+ C_ASSERT(sizeof(emitter::emitAddrMode) <= sizeof(void*));
+ C_ASSERT(sizeof(emitLclVarAddr) <= sizeof(void*));
+ C_ASSERT(sizeof(emitter::instrDesc) == (SMALL_IDSC_SIZE + sizeof(void*)));
+
+ emitInsCount++;
+
+#if defined(DEBUG) || defined(LATE_DISASM)
+ /* In debug mode we clear/set some additional fields */
+
+ instrDescDebugInfo* info = (instrDescDebugInfo*)emitGetMem(sizeof(*info));
+
+ info->idNum = emitInsCount;
+ info->idSize = sz;
+ info->idVarRefOffs = 0;
+ info->idMemCookie = 0;
+ info->idClsCookie = nullptr;
+#ifdef TRANSLATE_PDB
+ info->idilStart = emitInstrDescILBase;
+#endif
+ info->idFinallyCall = false;
+ info->idCatchRet = false;
+ info->idCallSig = nullptr;
+
+ id->idDebugOnlyInfo(info);
+
+#endif // defined(DEBUG) || defined(LATE_DISASM)
+
+ /* Store the size and handle the two special values
+ that indicate GCref and ByRef */
+
+ if (EA_IS_GCREF(opsz))
+ {
+ /* A special value indicates a GCref pointer value */
+
+ id->idGCref(GCT_GCREF);
+ id->idOpSize(EA_PTRSIZE);
+ }
+ else if (EA_IS_BYREF(opsz))
+ {
+ /* A special value indicates a Byref pointer value */
+
+ id->idGCref(GCT_BYREF);
+ id->idOpSize(EA_PTRSIZE);
+ }
+ else
+ {
+ id->idGCref(GCT_NONE);
+ id->idOpSize(EA_SIZE(opsz));
+ }
+
+#if RELOC_SUPPORT
+ // Amd64: ip-relative addressing is supported even when not generating relocatable ngen code
+ if (EA_IS_DSP_RELOC(opsz)
+#ifndef _TARGET_AMD64_
+ && emitComp->opts.compReloc
+#endif //_TARGET_AMD64_
+ )
+ {
+ /* Mark idInfo()->idDspReloc to remember that the */
+ /* address mode has a displacement that is relocatable */
+ id->idSetIsDspReloc();
+ }
+
+ if (EA_IS_CNS_RELOC(opsz) && emitComp->opts.compReloc)
+ {
+ /* Mark idInfo()->idCnsReloc to remember that the */
+ /* instruction has an immediate constant that is relocatable */
+ id->idSetIsCnsReloc();
+ }
+#endif
+
+#if EMITTER_STATS
+ emitTotalInsCnt++;
+#endif
+
+ /* Update the instruction count */
+
+ emitCurIGinsCnt++;
+
+ return id;
+}
+
+#ifdef DEBUG
+
+/*****************************************************************************
+ *
+ * Make sure the code offsets of all instruction groups look reasonable.
+ */
+void emitter::emitCheckIGoffsets()
+{
+ insGroup* tempIG;
+ size_t offsIG;
+
+ for (tempIG = emitIGlist, offsIG = 0; tempIG; tempIG = tempIG->igNext)
+ {
+ if (tempIG->igOffs != offsIG)
+ {
+ printf("Block #%u has offset %08X, expected %08X\n", tempIG->igNum, tempIG->igOffs, offsIG);
+ assert(!"bad block offset");
+ }
+
+ offsIG += tempIG->igSize;
+ }
+
+ if (emitTotalCodeSize && emitTotalCodeSize != offsIG)
+ {
+ printf("Total code size is %08X, expected %08X\n", emitTotalCodeSize, offsIG);
+
+ assert(!"bad total code size");
+ }
+}
+
+#endif // DEBUG
+
+/*****************************************************************************
+ *
+ * Begin generating a method prolog.
+ */
+
+void emitter::emitBegProlog()
+{
+ assert(emitComp->compGeneratingProlog);
+
+#if EMIT_TRACK_STACK_DEPTH
+
+ /* Don't measure stack depth inside the prolog, it's misleading */
+
+ emitCntStackDepth = 0;
+
+ assert(emitCurStackLvl == 0);
+
+#endif
+
+ emitNoGCIG = true;
+ emitForceNewIG = false;
+
+ /* Switch to the pre-allocated prolog IG */
+
+ emitGenIG(emitPrologIG);
+
+ /* Nothing is live on entry to the prolog */
+
+ // These were initialized to Empty at the start of compilation.
+ VarSetOps::ClearD(emitComp, emitInitGCrefVars);
+ VarSetOps::ClearD(emitComp, emitPrevGCrefVars);
+ emitInitGCrefRegs = RBM_NONE;
+ emitPrevGCrefRegs = RBM_NONE;
+ emitInitByrefRegs = RBM_NONE;
+ emitPrevByrefRegs = RBM_NONE;
+}
+
+/*****************************************************************************
+ *
+ * Return the code offset of the current location in the prolog.
+ */
+
+unsigned emitter::emitGetPrologOffsetEstimate()
+{
+ /* For now only allow a single prolog ins group */
+
+ assert(emitPrologIG);
+ assert(emitPrologIG == emitCurIG);
+
+ return emitCurIGsize;
+}
+
+/*****************************************************************************
+ *
+ * Mark the code offset of the current location as the end of the prolog,
+ * so it can be used later to compute the actual size of the prolog.
+ */
+
+void emitter::emitMarkPrologEnd()
+{
+ assert(emitComp->compGeneratingProlog);
+
+ /* For now only allow a single prolog ins group */
+
+ assert(emitPrologIG);
+ assert(emitPrologIG == emitCurIG);
+
+ emitPrologEndPos = emitCurOffset();
+}
+
+/*****************************************************************************
+ *
+ * Finish generating a method prolog.
+ */
+
+void emitter::emitEndProlog()
+{
+ assert(emitComp->compGeneratingProlog);
+
+ size_t prolSz;
+
+ insGroup* tempIG;
+
+ emitNoGCIG = false;
+
+ /* Save the prolog IG if non-empty or if only one block */
+
+ if (emitCurIGnonEmpty() || emitCurIG == emitPrologIG)
+ {
+ emitSavIG();
+ }
+
+#if EMIT_TRACK_STACK_DEPTH
+ /* Reset the stack depth values */
+
+ emitCurStackLvl = 0;
+ emitCntStackDepth = sizeof(int);
+#endif
+}
+
+/*****************************************************************************
+ *
+ * Create a placeholder instruction group to be used by a prolog or epilog,
+ * either for the main function, or a funclet.
+ */
+
+void emitter::emitCreatePlaceholderIG(insGroupPlaceholderType igType,
+ BasicBlock* igBB,
+ VARSET_VALARG_TP GCvars,
+ regMaskTP gcrefRegs,
+ regMaskTP byrefRegs,
+ bool last)
+{
+ assert(igBB != nullptr);
+
+ bool emitAdd = false;
+
+ if (igType == IGPT_EPILOG
+#if FEATURE_EH_FUNCLETS
+ || igType == IGPT_FUNCLET_EPILOG
+#endif // FEATURE_EH_FUNCLETS
+ )
+ {
+#ifdef _TARGET_AMD64_
+ emitOutputPreEpilogNOP();
+#endif // _TARGET_AMD64_
+
+ emitAdd = true;
+ }
+
+ if (emitCurIGnonEmpty())
+ {
+ emitNxtIG(emitAdd);
+ }
+
+ /* Update GC tracking for the beginning of the placeholder IG */
+
+ if (!emitAdd)
+ {
+ VarSetOps::Assign(emitComp, emitThisGCrefVars, GCvars);
+ VarSetOps::Assign(emitComp, emitInitGCrefVars, GCvars);
+ emitThisGCrefRegs = emitInitGCrefRegs = gcrefRegs;
+ emitThisByrefRegs = emitInitByrefRegs = byrefRegs;
+ }
+
+ /* Convert the group to a placeholder group */
+
+ insGroup* igPh = emitCurIG;
+
+ igPh->igFlags |= IGF_PLACEHOLDER;
+
+ /* Note that we might be re-using a previously created but empty IG. In this
+ * case, we need to make sure any re-used fields, such as igFuncIdx, are correct.
+ */
+
+ igPh->igFuncIdx = emitComp->compCurrFuncIdx;
+
+ /* Create a separate block of memory to store placeholder information.
+ * We could use unions to put some of this into the insGroup itself, but we don't
+ * want to grow the insGroup, and it's difficult to make sure the
+ * insGroup fields are getting set and used elsewhere.
+ */
+
+ igPh->igPhData = new (emitComp, CMK_InstDesc) insPlaceholderGroupData;
+
+ igPh->igPhData->igPhNext = nullptr;
+ igPh->igPhData->igPhType = igType;
+ igPh->igPhData->igPhBB = igBB;
+
+ VarSetOps::AssignNoCopy(emitComp, igPh->igPhData->igPhPrevGCrefVars, VarSetOps::UninitVal());
+ VarSetOps::Assign(emitComp, igPh->igPhData->igPhPrevGCrefVars, emitPrevGCrefVars);
+ igPh->igPhData->igPhPrevGCrefRegs = emitPrevGCrefRegs;
+ igPh->igPhData->igPhPrevByrefRegs = emitPrevByrefRegs;
+
+ VarSetOps::AssignNoCopy(emitComp, igPh->igPhData->igPhInitGCrefVars, VarSetOps::UninitVal());
+ VarSetOps::Assign(emitComp, igPh->igPhData->igPhInitGCrefVars, emitInitGCrefVars);
+ igPh->igPhData->igPhInitGCrefRegs = emitInitGCrefRegs;
+ igPh->igPhData->igPhInitByrefRegs = emitInitByrefRegs;
+
+#if EMITTER_STATS
+ emitTotalPhIGcnt += 1;
+#endif
+
+ // Mark function prologs and epilogs properly in the igFlags bits. These bits
+ // will get used and propagated when the placeholder is converted to a non-placeholder
+ // during prolog/epilog generation.
+
+ if (igType == IGPT_EPILOG)
+ {
+ igPh->igFlags |= IGF_EPILOG;
+ }
+#if FEATURE_EH_FUNCLETS
+ else if (igType == IGPT_FUNCLET_PROLOG)
+ {
+ igPh->igFlags |= IGF_FUNCLET_PROLOG;
+ }
+#ifdef DEBUG
+ else if (igType == IGPT_FUNCLET_EPILOG)
+ {
+ igPh->igFlags |= IGF_FUNCLET_EPILOG;
+ }
+#endif // DEBUG
+#endif // FEATURE_EH_FUNCLETS
+
+ /* Link it into the placeholder list */
+
+ if (emitPlaceholderList)
+ {
+ emitPlaceholderLast->igPhData->igPhNext = igPh;
+ }
+ else
+ {
+ emitPlaceholderList = igPh;
+ }
+
+ emitPlaceholderLast = igPh;
+
+ // Give an estimated size of this placeholder IG and
+ // increment emitCurCodeOffset since we are not calling emitNewIG()
+ //
+ emitCurIGsize += MAX_PLACEHOLDER_IG_SIZE;
+ emitCurCodeOffset += emitCurIGsize;
+
+#ifdef DEBUGGING_SUPPORT
+
+#if FEATURE_EH_FUNCLETS
+ // Add the appropriate IP mapping debugging record for this placeholder
+ // group.
+
+ // genExitCode() adds the mapping for main function epilogs
+ if (emitComp->opts.compDbgInfo)
+ {
+ if (igType == IGPT_FUNCLET_PROLOG)
+ {
+ codeGen->genIPmappingAdd((IL_OFFSETX)ICorDebugInfo::PROLOG, true);
+ }
+ else if (igType == IGPT_FUNCLET_EPILOG)
+ {
+ codeGen->genIPmappingAdd((IL_OFFSETX)ICorDebugInfo::EPILOG, true);
+ }
+ }
+#endif // FEATURE_EH_FUNCLETS
+
+#endif // DEBUGGING_SUPPORT
+
+ /* Start a new IG if more code follows */
+
+ if (last)
+ {
+ emitCurIG = nullptr;
+ }
+ else
+ {
+ if (igType == IGPT_EPILOG
+#if FEATURE_EH_FUNCLETS
+ || igType == IGPT_FUNCLET_EPILOG
+#endif // FEATURE_EH_FUNCLETS
+ )
+ {
+ // If this was an epilog, then assume this is the end of any currently in progress
+ // no-GC region. If a block after the epilog needs to be no-GC, it needs to call
+ // emitter::emitDisableGC() directly. This behavior is depended upon by the fast
+ // tailcall implementation, which disables GC at the beginning of argument setup,
+ // but assumes that after the epilog it will be re-enabled.
+ emitNoGCIG = false;
+ }
+
+ emitNewIG();
+
+ // We don't know what the GC ref state will be at the end of the placeholder
+ // group. So, force the next IG to store all the GC ref state variables;
+ // don't omit them because emitPrev* is the same as emitInit*, because emitPrev*
+ // will be inaccurate. (Note that, currently, GCrefRegs and ByrefRegs are always
+ // saved anyway.)
+ //
+ // There is no need to re-initialize the emitPrev* variables, as they won't be used
+ // with emitForceStoreGCState==true, and will be re-initialized just before
+ // emitForceStoreGCState is set to false;
+
+ emitForceStoreGCState = true;
+
+ /* The group after the placeholder group doesn't get the "propagate" flags */
+
+ emitCurIG->igFlags &= ~IGF_PROPAGATE_MASK;
+ }
+
+#ifdef DEBUG
+ if (emitComp->verbose)
+ {
+ printf("*************** After placeholder IG creation\n");
+ emitDispIGlist(false);
+ }
+#endif
+}
+
+/*****************************************************************************
+ *
+ * Generate all prologs and epilogs
+ */
+
+void emitter::emitGeneratePrologEpilog()
+{
+#ifdef DEBUG
+ unsigned prologCnt = 0;
+ unsigned epilogCnt = 0;
+#if FEATURE_EH_FUNCLETS
+ unsigned funcletPrologCnt = 0;
+ unsigned funcletEpilogCnt = 0;
+#endif // FEATURE_EH_FUNCLETS
+#endif // DEBUG
+
+ insGroup* igPh;
+ insGroup* igPhNext;
+
+ // Generating the prolog/epilog is going to destroy the placeholder group,
+ // so save the "next" pointer before that happens.
+
+ for (igPh = emitPlaceholderList; igPh != nullptr; igPh = igPhNext)
+ {
+ assert(igPh->igFlags & IGF_PLACEHOLDER);
+
+ igPhNext = igPh->igPhData->igPhNext;
+
+ BasicBlock* igPhBB = igPh->igPhData->igPhBB;
+
+ switch (igPh->igPhData->igPhType)
+ {
+ case IGPT_PROLOG: // currently unused
+ INDEBUG(++prologCnt);
+ break;
+
+ case IGPT_EPILOG:
+ INDEBUG(++epilogCnt);
+ emitBegFnEpilog(igPh);
+ codeGen->genFnEpilog(igPhBB);
+ emitEndFnEpilog();
+ break;
+
+#if FEATURE_EH_FUNCLETS
+
+ case IGPT_FUNCLET_PROLOG:
+ INDEBUG(++funcletPrologCnt);
+ emitBegFuncletProlog(igPh);
+ codeGen->genFuncletProlog(igPhBB);
+ emitEndFuncletProlog();
+ break;
+
+ case IGPT_FUNCLET_EPILOG:
+ INDEBUG(++funcletEpilogCnt);
+ emitBegFuncletEpilog(igPh);
+ codeGen->genFuncletEpilog();
+ emitEndFuncletEpilog();
+ break;
+
+#endif // FEATURE_EH_FUNCLETS
+
+ default:
+ unreached();
+ }
+ }
+
+#ifdef DEBUG
+ if (emitComp->verbose)
+ {
+ printf("%d prologs, %d epilogs", prologCnt, epilogCnt);
+#if FEATURE_EH_FUNCLETS
+ printf(", %d funclet prologs, %d funclet epilogs", funcletPrologCnt, funcletEpilogCnt);
+#endif // FEATURE_EH_FUNCLETS
+ printf("\n");
+
+// prolog/epilog code doesn't use this yet
+// noway_assert(prologCnt == 1);
+// noway_assert(epilogCnt == emitEpilogCnt); // Is this correct?
+#if FEATURE_EH_FUNCLETS
+ assert(funcletPrologCnt == emitComp->ehFuncletCount());
+#endif // FEATURE_EH_FUNCLETS
+ }
+#endif // DEBUG
+}
+
+/*****************************************************************************
+ *
+ * Begin all prolog and epilog generation
+ */
+
+void emitter::emitStartPrologEpilogGeneration()
+{
+ /* Save the current IG if it's non-empty */
+
+ if (emitCurIGnonEmpty())
+ {
+ emitSavIG();
+ }
+ else
+ {
+ assert(emitCurIG == nullptr);
+ }
+}
+
+/*****************************************************************************
+ *
+ * Finish all prolog and epilog generation
+ */
+
+void emitter::emitFinishPrologEpilogGeneration()
+{
+ /* Update the offsets of all the blocks */
+
+ emitRecomputeIGoffsets();
+
+ /* We should not generate any more code after this */
+
+ emitCurIG = nullptr;
+}
+
+/*****************************************************************************
+ *
+ * Common code for prolog / epilog beginning. Convert the placeholder group to actual code IG,
+ * and set it as the current group.
+ */
+
+void emitter::emitBegPrologEpilog(insGroup* igPh)
+{
+ assert(igPh->igFlags & IGF_PLACEHOLDER);
+
+ /* Save the current IG if it's non-empty */
+
+ if (emitCurIGnonEmpty())
+ {
+ emitSavIG();
+ }
+
+ /* Convert the placeholder group to a normal group.
+ * We need to be very careful to re-initialize the IG properly.
+ * It turns out, this means we only need to clear the placeholder bit
+ * and clear the igPhData field, and emitGenIG() will do the rest,
+ * since in the placeholder IG we didn't touch anything that is set by emitAllocIG().
+ */
+
+ igPh->igFlags &= ~IGF_PLACEHOLDER;
+ emitNoGCIG = true;
+ emitForceNewIG = false;
+
+ /* Set up the GC info that we stored in the placeholder */
+
+ VarSetOps::Assign(emitComp, emitPrevGCrefVars, igPh->igPhData->igPhPrevGCrefVars);
+ emitPrevGCrefRegs = igPh->igPhData->igPhPrevGCrefRegs;
+ emitPrevByrefRegs = igPh->igPhData->igPhPrevByrefRegs;
+
+ VarSetOps::Assign(emitComp, emitThisGCrefVars, igPh->igPhData->igPhInitGCrefVars);
+ VarSetOps::Assign(emitComp, emitInitGCrefVars, igPh->igPhData->igPhInitGCrefVars);
+ emitThisGCrefRegs = emitInitGCrefRegs = igPh->igPhData->igPhInitGCrefRegs;
+ emitThisByrefRegs = emitInitByrefRegs = igPh->igPhData->igPhInitByrefRegs;
+
+ igPh->igPhData = nullptr;
+
+ /* Create a non-placeholder group pointer that we'll now use */
+
+ insGroup* ig = igPh;
+
+ /* Set the current function using the function index we stored */
+
+ emitComp->funSetCurrentFunc(ig->igFuncIdx);
+
+ /* Set the new IG as the place to generate code */
+
+ emitGenIG(ig);
+
+#if EMIT_TRACK_STACK_DEPTH
+
+ /* Don't measure stack depth inside the prolog / epilog, it's misleading */
+
+ emitCntStackDepth = 0;
+
+ assert(emitCurStackLvl == 0);
+
+#endif
+}
+
+/*****************************************************************************
+ *
+ * Common code for end of prolog / epilog
+ */
+
+void emitter::emitEndPrologEpilog()
+{
+ emitNoGCIG = false;
+
+ /* Save the IG if non-empty */
+
+ if (emitCurIGnonEmpty())
+ {
+ emitSavIG();
+ }
+
+ assert(emitCurIGsize <= MAX_PLACEHOLDER_IG_SIZE);
+
+#if EMIT_TRACK_STACK_DEPTH
+ /* Reset the stack depth values */
+
+ emitCurStackLvl = 0;
+ emitCntStackDepth = sizeof(int);
+#endif
+}
+
+/*****************************************************************************
+ *
+ * Begin generating a main function epilog.
+ */
+
+void emitter::emitBegFnEpilog(insGroup* igPh)
+{
+ emitEpilogCnt++;
+
+ emitBegPrologEpilog(igPh);
+
+#ifdef JIT32_GCENCODER
+
+ EpilogList* el = new (emitComp, CMK_GC) EpilogList;
+ el->elNext = NULL;
+ el->elIG = emitCurIG;
+
+ if (emitEpilogLast)
+ emitEpilogLast->elNext = el;
+ else
+ emitEpilogList = el;
+
+ emitEpilogLast = el;
+
+#endif // JIT32_GCENCODER
+
+ /* Remember current position so that we can compute total epilog size */
+
+ emitEpilogBegLoc.CaptureLocation(this);
+}
+
+/*****************************************************************************
+ *
+ * Finish generating a funclet epilog.
+ */
+
+void emitter::emitEndFnEpilog()
+{
+ emitEndPrologEpilog();
+
+ UNATIVE_OFFSET newSize;
+ UNATIVE_OFFSET epilogBegCodeOffset = emitEpilogBegLoc.CodeOffset(this);
+#ifdef _TARGET_XARCH_
+ UNATIVE_OFFSET epilogExitSeqStartCodeOffset = emitExitSeqBegLoc.CodeOffset(this);
+#else
+ UNATIVE_OFFSET epilogExitSeqStartCodeOffset = emitCodeOffset(emitCurIG, emitCurOffset());
+#endif
+
+ newSize = epilogExitSeqStartCodeOffset - epilogBegCodeOffset;
+
+#ifdef _TARGET_X86_
+
+ /* Compute total epilog size */
+
+ assert(emitEpilogSize == 0 || emitEpilogSize == newSize); // All epilogs must be identical
+ emitEpilogSize = newSize;
+ UNATIVE_OFFSET epilogEndCodeOffset = emitCodeOffset(emitCurIG, emitCurOffset());
+ assert(epilogExitSeqStartCodeOffset != epilogEndCodeOffset);
+
+ newSize = epilogEndCodeOffset - epilogExitSeqStartCodeOffset;
+ if (newSize < emitExitSeqSize)
+ {
+ // We expect either the epilog to be the same every time, or that
+ // one will be a ret or a ret <n> and others will be a jmp addr or jmp [addr];
+ // we make the epilogs the minimum of these. Note that this ONLY works
+ // because the only instruction is the last one and thus a slight
+ // underestimation of the epilog size is harmless (since the EIP
+ // can not be between instructions).
+ assert(emitEpilogCnt == 1 ||
+ (emitExitSeqSize - newSize) <= 5 // delta between size of various forms of jmp (size is either 6 or 5)
+ // and various forms of ret (size is either 1 or 3). The combination can
+ // be anything been 1 and 5.
+ );
+ emitExitSeqSize = newSize;
+ }
+
+#endif // _TARGET_X86_
+}
+
+#if FEATURE_EH_FUNCLETS
+
+/*****************************************************************************
+ *
+ * Begin generating a funclet prolog.
+ */
+
+void emitter::emitBegFuncletProlog(insGroup* igPh)
+{
+ emitBegPrologEpilog(igPh);
+}
+
+/*****************************************************************************
+ *
+ * Finish generating a funclet prolog.
+ */
+
+void emitter::emitEndFuncletProlog()
+{
+ emitEndPrologEpilog();
+}
+
+/*****************************************************************************
+ *
+ * Begin generating a funclet epilog.
+ */
+
+void emitter::emitBegFuncletEpilog(insGroup* igPh)
+{
+ emitBegPrologEpilog(igPh);
+}
+
+/*****************************************************************************
+ *
+ * Finish generating a funclet epilog.
+ */
+
+void emitter::emitEndFuncletEpilog()
+{
+ emitEndPrologEpilog();
+}
+
+#endif // FEATURE_EH_FUNCLETS
+
+#ifdef JIT32_GCENCODER
+
+/*****************************************************************************
+ *
+ * Return non-zero if the current method only has one epilog, which is
+ * at the very end of the method body.
+ */
+
+bool emitter::emitHasEpilogEnd()
+{
+ if (emitEpilogCnt == 1 && (emitIGlast->igFlags & IGF_EPILOG)) // This wouldn't work for funclets
+ return true;
+ else
+ return false;
+}
+
+#endif // JIT32_GCENCODER
+
+#ifdef _TARGET_XARCH_
+
+/*****************************************************************************
+ *
+ * Mark the beginning of the epilog exit sequence by remembering our position.
+ */
+
+void emitter::emitStartExitSeq()
+{
+ assert(emitComp->compGeneratingEpilog);
+
+ emitExitSeqBegLoc.CaptureLocation(this);
+}
+
+#endif // _TARGET_XARCH_
+
+/*****************************************************************************
+ *
+ * The code generator tells us the range of GC ref locals through this
+ * method. Needless to say, locals and temps should be allocated so that
+ * the size of the range is as small as possible.
+ *
+ * offsLo - The FP offset from which the GC pointer range starts.
+ * offsHi - The FP offset at which the GC pointer region ends (exclusive).
+ */
+
+void emitter::emitSetFrameRangeGCRs(int offsLo, int offsHi)
+{
+ assert(emitComp->compGeneratingProlog);
+ assert(offsHi > offsLo);
+
+#ifdef DEBUG
+
+ // A total of 47254 methods compiled.
+ //
+ // GC ref frame variable counts:
+ //
+ // <= 0 ===> 43175 count ( 91% of total)
+ // 1 .. 1 ===> 2367 count ( 96% of total)
+ // 2 .. 2 ===> 887 count ( 98% of total)
+ // 3 .. 5 ===> 579 count ( 99% of total)
+ // 6 .. 10 ===> 141 count ( 99% of total)
+ // 11 .. 20 ===> 40 count ( 99% of total)
+ // 21 .. 50 ===> 42 count ( 99% of total)
+ // 51 .. 128 ===> 15 count ( 99% of total)
+ // 129 .. 256 ===> 4 count ( 99% of total)
+ // 257 .. 512 ===> 4 count (100% of total)
+ // 513 .. 1024 ===> 0 count (100% of total)
+
+ if (emitComp->verbose)
+ {
+ unsigned count = (offsHi - offsLo) / sizeof(void*);
+ printf("%u tracked GC refs are at stack offsets ", count);
+
+ if (offsLo >= 0)
+ {
+ printf(" %04X ... %04X\n", offsLo, offsHi);
+ assert(offsHi >= 0);
+ }
+ else
+#if defined(_TARGET_ARM_) && defined(PROFILING_SUPPORTED)
+ if (!emitComp->compIsProfilerHookNeeded())
+#endif
+ {
+#ifdef _TARGET_AMD64_
+ // doesn't have to be all negative on amd
+ printf("-%04X ... %04X\n", -offsLo, offsHi);
+#else
+ printf("-%04X ... -%04X\n", -offsLo, -offsHi);
+ assert(offsHi <= 0);
+#endif
+ }
+#if defined(_TARGET_ARM_) && defined(PROFILING_SUPPORTED)
+ else
+ {
+ // Under profiler due to prespilling of arguments, offHi need not be < 0
+ if (offsHi < 0)
+ printf("-%04X ... -%04X\n", -offsLo, -offsHi);
+ else
+ printf("-%04X ... %04X\n", -offsLo, offsHi);
+ }
+#endif
+ }
+
+#endif // DEBUG
+
+ assert(((offsHi - offsLo) % sizeof(void*)) == 0);
+ assert((offsLo % sizeof(void*)) == 0);
+ assert((offsHi % sizeof(void*)) == 0);
+
+ emitGCrFrameOffsMin = offsLo;
+ emitGCrFrameOffsMax = offsHi;
+ emitGCrFrameOffsCnt = (offsHi - offsLo) / sizeof(void*);
+}
+
+/*****************************************************************************
+ *
+ * The code generator tells us the range of local variables through this
+ * method.
+ */
+
+void emitter::emitSetFrameRangeLcls(int offsLo, int offsHi)
+{
+}
+
+/*****************************************************************************
+ *
+ * The code generator tells us the range of used arguments through this
+ * method.
+ */
+
+void emitter::emitSetFrameRangeArgs(int offsLo, int offsHi)
+{
+}
+
+/*****************************************************************************
+ *
+ * A conversion table used to map an operand size value (in bytes) into its
+ * small encoding (0 through 3), and vice versa.
+ */
+
+const emitter::opSize emitter::emitSizeEncode[] = {
+ emitter::OPSZ1, emitter::OPSZ2, OPSIZE_INVALID, emitter::OPSZ4, OPSIZE_INVALID, OPSIZE_INVALID, OPSIZE_INVALID,
+ emitter::OPSZ8, OPSIZE_INVALID, OPSIZE_INVALID, OPSIZE_INVALID, OPSIZE_INVALID, OPSIZE_INVALID, OPSIZE_INVALID,
+ OPSIZE_INVALID, emitter::OPSZ16, OPSIZE_INVALID, OPSIZE_INVALID, OPSIZE_INVALID, OPSIZE_INVALID, OPSIZE_INVALID,
+ OPSIZE_INVALID, OPSIZE_INVALID, OPSIZE_INVALID, OPSIZE_INVALID, OPSIZE_INVALID, OPSIZE_INVALID, OPSIZE_INVALID,
+ OPSIZE_INVALID, OPSIZE_INVALID, OPSIZE_INVALID, emitter::OPSZ32,
+};
+
+const emitAttr emitter::emitSizeDecode[emitter::OPSZ_COUNT] = {EA_1BYTE, EA_2BYTE, EA_4BYTE,
+ EA_8BYTE, EA_16BYTE, EA_32BYTE};
+
+/*****************************************************************************
+ *
+ * Allocate an instruction descriptor for an instruction that uses both
+ * a displacement and a constant.
+ */
+
+emitter::instrDesc* emitter::emitNewInstrCnsDsp(emitAttr size, ssize_t cns, int dsp)
+{
+ if (dsp == 0)
+ {
+ if (instrDesc::fitsInSmallCns(cns))
+ {
+ instrDesc* id = emitAllocInstr(size);
+
+ id->idSmallCns(cns);
+
+#if EMITTER_STATS
+ emitSmallCnsCnt++;
+ emitSmallCns[cns - ID_MIN_SMALL_CNS]++;
+ emitSmallDspCnt++;
+#endif
+
+ return id;
+ }
+ else
+ {
+ instrDescCns* id = emitAllocInstrCns(size);
+
+ id->idSetIsLargeCns();
+ id->idcCnsVal = cns;
+
+#if EMITTER_STATS
+ emitLargeCnsCnt++;
+ emitSmallDspCnt++;
+#endif
+
+ return id;
+ }
+ }
+ else
+ {
+ if (instrDesc::fitsInSmallCns(cns))
+ {
+ instrDescDsp* id = emitAllocInstrDsp(size);
+
+ id->idSetIsLargeDsp();
+ id->iddDspVal = dsp;
+
+ id->idSmallCns(cns);
+
+#if EMITTER_STATS
+ emitLargeDspCnt++;
+ emitSmallCnsCnt++;
+ emitSmallCns[cns - ID_MIN_SMALL_CNS]++;
+#endif
+
+ return id;
+ }
+ else
+ {
+ instrDescCnsDsp* id = emitAllocInstrCnsDsp(size);
+
+ id->idSetIsLargeCns();
+ id->iddcCnsVal = cns;
+
+ id->idSetIsLargeDsp();
+ id->iddcDspVal = dsp;
+
+#if EMITTER_STATS
+ emitLargeDspCnt++;
+ emitLargeCnsCnt++;
+#endif
+
+ return id;
+ }
+ }
+}
+
+/*****************************************************************************
+ *
+ * Returns true if garbage-collection won't happen within the helper call.
+ * Don't need to record live pointers for such call sites.
+ */
+
+bool emitter::emitNoGChelper(unsigned IHX)
+{
+ // TODO-Throughput: Make this faster (maybe via a simple table of bools?)
+
+ switch (IHX)
+ {
+ case CORINFO_HELP_UNDEF:
+ return false;
+
+ case CORINFO_HELP_PROF_FCN_LEAVE:
+ case CORINFO_HELP_PROF_FCN_ENTER:
+#ifdef _TARGET_AMD64_
+ case CORINFO_HELP_PROF_FCN_TAILCALL:
+#endif
+ case CORINFO_HELP_LLSH:
+ case CORINFO_HELP_LRSH:
+ case CORINFO_HELP_LRSZ:
+
+// case CORINFO_HELP_LMUL:
+// case CORINFO_HELP_LDIV:
+// case CORINFO_HELP_LMOD:
+// case CORINFO_HELP_ULDIV:
+// case CORINFO_HELP_ULMOD:
+
+#ifdef _TARGET_X86_
+ case CORINFO_HELP_ASSIGN_REF_EAX:
+ case CORINFO_HELP_ASSIGN_REF_ECX:
+ case CORINFO_HELP_ASSIGN_REF_EBX:
+ case CORINFO_HELP_ASSIGN_REF_EBP:
+ case CORINFO_HELP_ASSIGN_REF_ESI:
+ case CORINFO_HELP_ASSIGN_REF_EDI:
+
+ case CORINFO_HELP_CHECKED_ASSIGN_REF_EAX:
+ case CORINFO_HELP_CHECKED_ASSIGN_REF_ECX:
+ case CORINFO_HELP_CHECKED_ASSIGN_REF_EBX:
+ case CORINFO_HELP_CHECKED_ASSIGN_REF_EBP:
+ case CORINFO_HELP_CHECKED_ASSIGN_REF_ESI:
+ case CORINFO_HELP_CHECKED_ASSIGN_REF_EDI:
+#endif
+
+ case CORINFO_HELP_ASSIGN_REF:
+
+ case CORINFO_HELP_CHECKED_ASSIGN_REF:
+
+ case CORINFO_HELP_GETSHARED_GCSTATIC_BASE_NOCTOR:
+
+ case CORINFO_HELP_GETSHARED_NONGCSTATIC_BASE_NOCTOR:
+
+ case CORINFO_HELP_ASSIGN_BYREF:
+
+ case CORINFO_HELP_INIT_PINVOKE_FRAME:
+
+ return true;
+ }
+
+ return false;
+}
+
+/*****************************************************************************
+ *
+ * Mark the current spot as having a label.
+ */
+
+void* emitter::emitAddLabel(VARSET_VALARG_TP GCvars, regMaskTP gcrefRegs, regMaskTP byrefRegs, BOOL isFinallyTarget)
+{
+ /* Create a new IG if the current one is non-empty */
+
+ if (emitCurIGnonEmpty())
+ {
+ emitNxtIG();
+ }
+
+ VarSetOps::Assign(emitComp, emitThisGCrefVars, GCvars);
+ VarSetOps::Assign(emitComp, emitInitGCrefVars, GCvars);
+ emitThisGCrefRegs = emitInitGCrefRegs = gcrefRegs;
+ emitThisByrefRegs = emitInitByrefRegs = byrefRegs;
+
+#if FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
+ if (isFinallyTarget)
+ {
+ emitCurIG->igFlags |= IGF_FINALLY_TARGET;
+ }
+#endif // FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
+
+#ifdef DEBUG
+ if (EMIT_GC_VERBOSE)
+ {
+ printf("Label: IG%02u, GCvars=%s ", emitCurIG->igNum, VarSetOps::ToString(emitComp, GCvars));
+ dumpConvertedVarSet(emitComp, GCvars);
+ printf(", gcrefRegs=");
+ printRegMaskInt(gcrefRegs);
+ emitDispRegSet(gcrefRegs);
+ printf(", byrefRegs=");
+ printRegMaskInt(byrefRegs);
+ emitDispRegSet(byrefRegs);
+ printf("\n");
+ }
+#endif
+ return emitCurIG;
+}
+
+#ifdef _TARGET_ARMARCH_
+
+// Does the argument location point to an IG at the end of a function or funclet?
+// We can ignore the codePos part of the location, since it doesn't affect the
+// determination. If 'emitLocNextFragment' is non-NULL, it indicates the first
+// IG of the next fragment, so it represents a function end.
+bool emitter::emitIsFuncEnd(emitLocation* emitLoc, emitLocation* emitLocNextFragment /* = NULL */)
+{
+ assert(emitLoc);
+
+ insGroup* ig = emitLoc->GetIG();
+ assert(ig);
+
+ // Are we at the end of the IG list?
+ if ((emitLocNextFragment != NULL) && (ig->igNext == emitLocNextFragment->GetIG()))
+ return true;
+
+ // Safety check
+ if (ig->igNext == NULL)
+ return true;
+
+ // Is the next IG the start of a funclet prolog?
+ if (ig->igNext->igFlags & IGF_FUNCLET_PROLOG)
+ return true;
+
+#if FEATURE_EH_FUNCLETS
+
+ // Is the next IG a placeholder group for a funclet prolog?
+ if ((ig->igNext->igFlags & IGF_PLACEHOLDER) && (ig->igNext->igPhData->igPhType == IGPT_FUNCLET_PROLOG))
+ {
+ return true;
+ }
+
+#endif // FEATURE_EH_FUNCLETS
+
+ return false;
+}
+
+/*****************************************************************************
+ *
+ * Split the region from 'startLoc' to 'endLoc' into fragments by calling
+ * a callback function to indicate the beginning of a fragment. The initial code,
+ * starting at 'startLoc', doesn't get a callback, but the first code fragment,
+ * about 'maxSplitSize' bytes out does, as does the beginning of each fragment
+ * after that. There is no callback for the end (only the beginning of the last
+ * fragment gets a callback). A fragment must contain at least one instruction
+ * group. It should be smaller than 'maxSplitSize', although it may be larger to
+ * satisfy the "at least one instruction group" rule. Do not split prologs or
+ * epilogs. (Currently, prologs exist in a single instruction group at the main
+ * function beginning, so they aren't split. Funclets, however, might span IGs,
+ * so we can't split in between them.)
+ *
+ * Note that the locations must be the start of instruction groups; the part of
+ * the location indicating offset within a group must be zero.
+ *
+ * If 'startLoc' is NULL, it means the start of the code.
+ * If 'endLoc' is NULL, it means the end of the code.
+ */
+
+void emitter::emitSplit(emitLocation* startLoc,
+ emitLocation* endLoc,
+ UNATIVE_OFFSET maxSplitSize,
+ void* context,
+ emitSplitCallbackType callbackFunc)
+{
+ insGroup* igStart = (startLoc == NULL) ? emitIGlist : startLoc->GetIG();
+ insGroup* igEnd = (endLoc == NULL) ? NULL : endLoc->GetIG();
+ insGroup* igPrev;
+ insGroup* ig;
+ insGroup* igLastReported;
+ insGroup* igLastCandidate;
+ UNATIVE_OFFSET curSize;
+ UNATIVE_OFFSET candidateSize;
+
+ for (igPrev = NULL, ig = igLastReported = igStart, igLastCandidate = NULL, candidateSize = 0, curSize = 0;
+ ig != igEnd && ig != NULL; igPrev = ig, ig = ig->igNext)
+ {
+ // Keep looking until we've gone past the maximum split size
+ if (curSize >= maxSplitSize)
+ {
+ bool reportCandidate = true;
+
+ // Is there a candidate?
+ if (igLastCandidate == NULL)
+ {
+#ifdef DEBUG
+ if (EMITVERBOSE)
+ printf("emitSplit: can't split at IG%02u; we don't have a candidate to report\n", ig->igNum);
+#endif
+ reportCandidate = false;
+ }
+
+ // Don't report the same thing twice (this also happens for the first block, since igLastReported is
+ // initialized to igStart).
+ if (igLastCandidate == igLastReported)
+ {
+#ifdef DEBUG
+ if (EMITVERBOSE)
+ printf("emitSplit: can't split at IG%02u; we already reported it\n", igLastCandidate->igNum);
+#endif
+ reportCandidate = false;
+ }
+
+ // Report it!
+ if (reportCandidate)
+ {
+#ifdef DEBUG
+ if (EMITVERBOSE && (candidateSize >= maxSplitSize))
+ printf("emitSplit: split at IG%02u is size %d, larger than requested maximum size of %d\n",
+ igLastCandidate->igNum, candidateSize, maxSplitSize);
+#endif
+
+ // hand memory ownership to the callback function
+ emitLocation* pEmitLoc = new (emitComp, CMK_Unknown) emitLocation(igLastCandidate);
+ callbackFunc(context, pEmitLoc);
+ igLastReported = igLastCandidate;
+ igLastCandidate = NULL;
+ curSize -= candidateSize;
+ }
+ }
+
+ // Update the current candidate to be this block, if it isn't in the middle of a
+ // prolog or epilog, which we can't split. All we know is that certain
+ // IGs are marked as prolog or epilog. We don't actually know if two adjacent
+ // IGs are part of the *same* prolog or epilog, so we have to assume they are.
+
+ if (igPrev && (((igPrev->igFlags & IGF_FUNCLET_PROLOG) && (ig->igFlags & IGF_FUNCLET_PROLOG)) ||
+ ((igPrev->igFlags & IGF_EPILOG) && (ig->igFlags & IGF_EPILOG))))
+ {
+ // We can't update the candidate
+ }
+ else
+ {
+ igLastCandidate = ig;
+ candidateSize = curSize;
+ }
+
+ curSize += ig->igSize;
+
+ } // end for loop
+}
+
+/*****************************************************************************
+ *
+ * Given an instruction group, find the array of instructions (instrDesc) and
+ * number of instructions in the array. If the IG is the current IG, we assume
+ * that igData does NOT hold the instructions; they are unsaved and pointed
+ * to by emitCurIGfreeBase.
+ *
+ * This function can't be called for placeholder groups, which have no instrDescs.
+ */
+
+void emitter::emitGetInstrDescs(insGroup* ig, instrDesc** id, int* insCnt)
+{
+ assert(!(ig->igFlags & IGF_PLACEHOLDER));
+ if (ig == emitCurIG)
+ {
+ *id = (instrDesc*)emitCurIGfreeBase;
+ *insCnt = emitCurIGinsCnt;
+ }
+ else
+ {
+ *id = (instrDesc*)ig->igData;
+ *insCnt = ig->igInsCnt;
+ }
+
+ assert(*id);
+}
+
+/*****************************************************************************
+ *
+ * Given a location (an 'emitLocation'), find the instruction group (IG) and
+ * instruction descriptor (instrDesc) corresponding to that location. Returns
+ * 'true' if there is an instruction, 'false' if there is no instruction
+ * (i.e., we're at the end of the instruction list). Also, optionally return
+ * the number of instructions that follow that instruction in the IG (in *pinsRemaining,
+ * if pinsRemaining is non-NULL), which can be used for iterating over the
+ * remaining instrDescs in the IG.
+ *
+ * We assume that emitCurIG points to the end of the instructions we care about.
+ * For the prologs or epilogs, it points to the last IG of the prolog or epilog
+ * that is being generated. For body code gen, it points to the place we are currently
+ * adding code, namely, the end of currently generated code.
+ */
+
+bool emitter::emitGetLocationInfo(emitLocation* emitLoc,
+ insGroup** pig,
+ instrDesc** pid,
+ int* pinsRemaining /* = NULL */)
+{
+ assert(emitLoc != nullptr);
+ assert(emitLoc->Valid());
+ assert(emitLoc->GetIG() != nullptr);
+ assert(pig != nullptr);
+ assert(pid != nullptr);
+
+ insGroup* ig = emitLoc->GetIG();
+ instrDesc* id;
+ int insNum = emitLoc->GetInsNum();
+ int insCnt;
+
+ emitGetInstrDescs(ig, &id, &insCnt);
+ assert(insNum <= insCnt);
+
+ // There is a special-case: if the insNum points to the end, then we "wrap" and
+ // consider that the instruction it is pointing at is actually the first instruction
+ // of the next non-empty IG (which has its own valid emitLocation). This handles the
+ // case where you capture a location, then the next instruction creates a new IG.
+
+ if (insNum == insCnt)
+ {
+ if (ig == emitCurIG)
+ {
+ // No instructions beyond the current location.
+ return false;
+ }
+
+ for (ig = ig->igNext; ig; ig = ig->igNext)
+ {
+ emitGetInstrDescs(ig, &id, &insCnt);
+
+ if (insCnt > 0)
+ {
+ insNum = 0; // Pretend the index is 0 -- the first instruction
+ break;
+ }
+
+ if (ig == emitCurIG)
+ {
+ // There aren't any instructions in the current IG, and this is
+ // the current location, so we're at the end.
+ return false;
+ }
+ }
+
+ if (ig == NULL)
+ {
+ // 'ig' can't be NULL, or we went past the current IG represented by 'emitCurIG'.
+ // Perhaps 'loc' was corrupt coming in?
+ noway_assert(!"corrupt emitter location");
+ return false;
+ }
+ }
+
+ // Now find the instrDesc within this group that corresponds to the location
+
+ assert(insNum < insCnt);
+
+ int i;
+ for (i = 0; i != insNum; ++i)
+ {
+ castto(id, BYTE*) += emitSizeOfInsDsc(id);
+ }
+
+ // Return the info we found
+
+ *pig = ig;
+ *pid = id;
+
+ if (pinsRemaining)
+ {
+ *pinsRemaining = insCnt - insNum - 1;
+ }
+
+ return true;
+}
+
+/*****************************************************************************
+ *
+ * Compute the next instrDesc, either in this IG, or in a subsequent IG. 'id'
+ * will point to this instrDesc. 'ig' and 'insRemaining' will also be updated.
+ * Returns true if there is an instruction, or false if we've iterated over all
+ * the instructions up to the current instruction (based on 'emitCurIG').
+ */
+
+bool emitter::emitNextID(insGroup*& ig, instrDesc*& id, int& insRemaining)
+{
+ if (insRemaining > 0)
+ {
+ castto(id, BYTE*) += emitSizeOfInsDsc(id);
+ --insRemaining;
+ return true;
+ }
+
+ // We're out of instrDesc in 'ig'. Is this the current IG? If so, we're done.
+
+ if (ig == emitCurIG)
+ {
+ return false;
+ }
+
+ for (ig = ig->igNext; ig; ig = ig->igNext)
+ {
+ int insCnt;
+ emitGetInstrDescs(ig, &id, &insCnt);
+
+ if (insCnt > 0)
+ {
+ insRemaining = insCnt - 1;
+ return true;
+ }
+
+ if (ig == emitCurIG)
+ {
+ return false;
+ }
+ }
+
+ return false;
+}
+
+/*****************************************************************************
+ *
+ * Walk instrDesc's from the location given by 'locFrom', up to the current location.
+ * For each instruction, call the callback function 'processFunc'. 'context' is simply
+ * passed through to the callback function.
+ */
+
+void emitter::emitWalkIDs(emitLocation* locFrom, emitProcessInstrFunc_t processFunc, void* context)
+{
+ insGroup* ig;
+ instrDesc* id;
+ int insRemaining;
+
+ if (!emitGetLocationInfo(locFrom, &ig, &id, &insRemaining))
+ return; // no instructions at the 'from' location
+
+ do
+ {
+ // process <<id>>
+ (*processFunc)(id, context);
+
+ } while (emitNextID(ig, id, insRemaining));
+}
+
+/*****************************************************************************
+ *
+ * A callback function for emitWalkIDs() that calls Compiler::unwindNop().
+ */
+
+void emitter::emitGenerateUnwindNop(instrDesc* id, void* context)
+{
+ Compiler* comp = (Compiler*)context;
+#if defined(_TARGET_ARM_)
+ comp->unwindNop(id->idCodeSize());
+#elif defined(_TARGET_ARM64_)
+ comp->unwindNop();
+#endif // defined(_TARGET_ARM64_)
+}
+
+/*****************************************************************************
+ *
+ * emitUnwindNopPadding: call unwindNop() for every instruction from a given
+ * location 'emitLoc' up to the current location.
+ */
+
+void emitter::emitUnwindNopPadding(emitLocation* locFrom, Compiler* comp)
+{
+ emitWalkIDs(locFrom, emitGenerateUnwindNop, comp);
+}
+
+#endif // _TARGET_ARMARCH_
+
+#if defined(_TARGET_ARM_)
+
+/*****************************************************************************
+ *
+ * Return the instruction size in bytes for the instruction at the specified location.
+ * This is used to assert that the unwind code being generated on ARM has the
+ * same size as the instruction for which it is being generated (since on ARM
+ * the unwind codes have a one-to-one relationship with instructions, and the
+ * unwind codes have an implicit instruction size that must match the instruction size.)
+ * An instruction must exist at the specified location.
+ */
+
+unsigned emitter::emitGetInstructionSize(emitLocation* emitLoc)
+{
+ insGroup* ig;
+ instrDesc* id;
+
+ bool anyInstrs = emitGetLocationInfo(emitLoc, &ig, &id);
+ assert(anyInstrs); // There better be an instruction at this location (otherwise, we're at the end of the
+ // instruction list)
+ return id->idCodeSize();
+}
+
+#endif // defined(_TARGET_ARM_)
+
+/*****************************************************************************/
+#ifdef DEBUG
+/*****************************************************************************
+ *
+ * Returns the name for the register to use to access frame based variables
+ */
+
+const char* emitter::emitGetFrameReg()
+{
+ if (emitHasFramePtr)
+ {
+ return STR_FPBASE;
+ }
+ else
+ {
+ return STR_SPBASE;
+ }
+}
+
+/*****************************************************************************
+ *
+ * Display a register set in a readable form.
+ */
+
+void emitter::emitDispRegSet(regMaskTP regs)
+{
+ regNumber reg;
+ bool sp = false;
+
+ printf(" {");
+
+ for (reg = REG_FIRST; reg < ACTUAL_REG_COUNT; reg = REG_NEXT(reg))
+ {
+ if ((regs & genRegMask(reg)) == 0)
+ {
+ continue;
+ }
+
+ if (sp)
+ {
+ printf(" ");
+ }
+ else
+ {
+ sp = true;
+ }
+
+ printf("%s", emitRegName(reg));
+ }
+
+ printf("}");
+}
+
+/*****************************************************************************
+ *
+ * Display the current GC ref variable set in a readable form.
+ */
+
+void emitter::emitDispVarSet()
+{
+ unsigned vn;
+ int of;
+ bool sp = false;
+
+ for (vn = 0, of = emitGCrFrameOffsMin; vn < emitGCrFrameOffsCnt; vn += 1, of += sizeof(void*))
+ {
+ if (emitGCrFrameLiveTab[vn])
+ {
+ if (sp)
+ {
+ printf(" ");
+ }
+ else
+ {
+ sp = true;
+ }
+
+ printf("[%s", emitGetFrameReg());
+
+ if (of < 0)
+ {
+ printf("-%02XH", -of);
+ }
+ else if (of > 0)
+ {
+ printf("+%02XH", +of);
+ }
+
+ printf("]");
+ }
+ }
+
+ if (!sp)
+ {
+ printf("none");
+ }
+}
+
+/*****************************************************************************/
+#endif // DEBUG
+
+#if MULTIREG_HAS_SECOND_GC_RET
+//------------------------------------------------------------------------
+// emitSetSecondRetRegGCType: Sets the GC type of the second return register for instrDescCGCA struct.
+//
+// Arguments:
+// id - The large call instr descriptor to set the second GC return register type on.
+// secondRetSize - The EA_SIZE for second return register type.
+//
+// Return Value:
+// None
+//
+
+void emitter::emitSetSecondRetRegGCType(instrDescCGCA* id, emitAttr secondRetSize)
+{
+ if (EA_IS_GCREF(secondRetSize))
+ {
+ id->idSecondGCref(GCT_GCREF);
+ }
+ else if (EA_IS_BYREF(secondRetSize))
+ {
+ id->idSecondGCref(GCT_BYREF);
+ }
+ else
+ {
+ id->idSecondGCref(GCT_NONE);
+ }
+}
+#endif // MULTIREG_HAS_SECOND_GC_RET
+
+/*****************************************************************************
+ *
+ * Allocate an instruction descriptor for an indirect call.
+ *
+ * We use two different descriptors to save space - the common case records
+ * no GC variables and has both a very small argument count and an address
+ * mode displacement; the other case records the current GC var set,
+ * the call scope, and an arbitrarily large argument count and the
+ * address mode displacement.
+ */
+
+emitter::instrDesc* emitter::emitNewInstrCallInd(int argCnt,
+ ssize_t disp,
+ VARSET_VALARG_TP GCvars,
+ regMaskTP gcrefRegs,
+ regMaskTP byrefRegs,
+ emitAttr retSizeIn
+ MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize))
+{
+ emitAttr retSize = (retSizeIn != EA_UNKNOWN) ? retSizeIn : EA_PTRSIZE;
+
+ bool gcRefRegsInScratch = ((gcrefRegs & RBM_CALLEE_TRASH) != 0);
+
+ // Allocate a larger descriptor if any GC values need to be saved
+ // or if we have an absurd number of arguments or a large address
+ // mode displacement, or we have some byref registers
+ //
+ // On Amd64 System V OSs a larger descriptor is also needed if the
+ // call returns a two-register-returned struct and the second
+ // register (RDX) is a GCRef or ByRef pointer.
+
+ if (!VarSetOps::IsEmpty(emitComp, GCvars) || // any frame GCvars live
+ (gcRefRegsInScratch) || // any register gc refs live in scratch regs
+ (byrefRegs != 0) || // any register byrefs live
+ (disp < AM_DISP_MIN) || // displacement too negative
+ (disp > AM_DISP_MAX) || // displacement too positive
+ (argCnt > ID_MAX_SMALL_CNS) || // too many args
+ (argCnt < 0) // caller pops arguments
+ // There is a second ref/byref return register.
+ MULTIREG_HAS_SECOND_GC_RET_ONLY(|| EA_IS_GCREF_OR_BYREF(secondRetSize)))
+ {
+ instrDescCGCA* id;
+
+ id = emitAllocInstrCGCA(retSize);
+
+ id->idSetIsLargeCall();
+
+ VarSetOps::Assign(emitComp, id->idcGCvars, GCvars);
+ id->idcGcrefRegs = gcrefRegs;
+ id->idcByrefRegs = byrefRegs;
+ id->idcArgCnt = argCnt;
+ id->idcDisp = disp;
+
+#if MULTIREG_HAS_SECOND_GC_RET
+ emitSetSecondRetRegGCType(id, secondRetSize);
+#endif // MULTIREG_HAS_SECOND_GC_RET
+
+ return id;
+ }
+ else
+ {
+ instrDesc* id;
+
+ id = emitNewInstrCns(retSize, argCnt);
+
+ /* Make sure we didn't waste space unexpectedly */
+ assert(!id->idIsLargeCns());
+
+ /* Store the displacement and make sure the value fit */
+ id->idAddr()->iiaAddrMode.amDisp = disp;
+ assert(id->idAddr()->iiaAddrMode.amDisp == disp);
+
+ /* Save the the live GC registers in the unused register fields */
+ emitEncodeCallGCregs(gcrefRegs, id);
+
+ return id;
+ }
+}
+
+/*****************************************************************************
+ *
+ * Allocate an instruction descriptor for a direct call.
+ *
+ * We use two different descriptors to save space - the common case records
+ * with no GC variables or byrefs and has a very small argument count, and no
+ * explicit scope;
+ * the other case records the current GC var set, the call scope,
+ * and an arbitrarily large argument count.
+ */
+
+emitter::instrDesc* emitter::emitNewInstrCallDir(int argCnt,
+ VARSET_VALARG_TP GCvars,
+ regMaskTP gcrefRegs,
+ regMaskTP byrefRegs,
+ emitAttr retSizeIn
+ MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize))
+{
+ emitAttr retSize = (retSizeIn != EA_UNKNOWN) ? retSizeIn : EA_PTRSIZE;
+
+ // Allocate a larger descriptor if new GC values need to be saved
+ // or if we have an absurd number of arguments or if we need to
+ // save the scope.
+ //
+ // On Amd64 System V OSs a larger descriptor is also needed if the
+ // call returns a two-register-returned struct and the second
+ // register (RDX) is a GCRef or ByRef pointer.
+
+ bool gcRefRegsInScratch = ((gcrefRegs & RBM_CALLEE_TRASH) != 0);
+
+ if (!VarSetOps::IsEmpty(emitComp, GCvars) || // any frame GCvars live
+ gcRefRegsInScratch || // any register gc refs live in scratch regs
+ (byrefRegs != 0) || // any register byrefs live
+ (argCnt > ID_MAX_SMALL_CNS) || // too many args
+ (argCnt < 0) // caller pops arguments
+ // There is a second ref/byref return register.
+ MULTIREG_HAS_SECOND_GC_RET_ONLY(|| EA_IS_GCREF_OR_BYREF(secondRetSize)))
+ {
+ instrDescCGCA* id = emitAllocInstrCGCA(retSize);
+
+ // printf("Direct call with GC vars / big arg cnt / explicit scope\n");
+
+ id->idSetIsLargeCall();
+
+ VarSetOps::Assign(emitComp, id->idcGCvars, GCvars);
+ id->idcGcrefRegs = gcrefRegs;
+ id->idcByrefRegs = byrefRegs;
+ id->idcDisp = 0;
+ id->idcArgCnt = argCnt;
+
+#if MULTIREG_HAS_SECOND_GC_RET
+ emitSetSecondRetRegGCType(id, secondRetSize);
+#endif // MULTIREG_HAS_SECOND_GC_RET
+
+ return id;
+ }
+ else
+ {
+ instrDesc* id = emitNewInstrCns(retSize, argCnt);
+
+ // printf("Direct call w/o GC vars / big arg cnt / explicit scope\n");
+
+ /* Make sure we didn't waste space unexpectedly */
+ assert(!id->idIsLargeCns());
+
+ /* Save the the live GC registers in the unused register fields */
+ emitEncodeCallGCregs(gcrefRegs, id);
+
+ return id;
+ }
+}
+
+/*****************************************************************************/
+#ifdef DEBUG
+/*****************************************************************************
+ *
+ * Return a string with the name of the given class field (blank string (not
+ * NULL) is returned when the name isn't available).
+ */
+
+const char* emitter::emitFldName(CORINFO_FIELD_HANDLE fieldVal)
+{
+ if (emitComp->opts.varNames)
+ {
+ const char* memberName;
+ const char* className;
+
+ const int TEMP_BUFFER_LEN = 1024;
+ static char buff[TEMP_BUFFER_LEN];
+
+ memberName = emitComp->eeGetFieldName(fieldVal, &className);
+
+ sprintf_s(buff, TEMP_BUFFER_LEN, "'<%s>.%s'", className, memberName);
+ return buff;
+ }
+ else
+ {
+ return "";
+ }
+}
+
+/*****************************************************************************
+ *
+ * Return a string with the name of the given function (blank string (not
+ * NULL) is returned when the name isn't available).
+ */
+
+const char* emitter::emitFncName(CORINFO_METHOD_HANDLE methHnd)
+{
+ return emitComp->eeGetMethodFullName(methHnd);
+}
+
+#endif // DEBUG
+
+/*****************************************************************************
+ *
+ * Be very careful, some instruction descriptors are allocated as "tiny" and
+ * don't have some of the tail fields of instrDesc (in particular, "idInfo").
+ */
+
+const BYTE emitter::emitFmtToOps[] = {
+#define IF_DEF(en, op1, op2) ID_OP_##op2,
+#include "emitfmts.h"
+};
+
+#ifdef DEBUG
+const unsigned emitter::emitFmtCount = sizeof(emitFmtToOps) / sizeof(emitFmtToOps[0]);
+#endif
+
+/*****************************************************************************
+ *
+ * Display the current instruction group list.
+ */
+
+#ifdef DEBUG
+
+void emitter::emitDispIGflags(unsigned flags)
+{
+ if (flags & IGF_GC_VARS)
+ {
+ printf(", gcvars");
+ }
+ if (flags & IGF_BYREF_REGS)
+ {
+ printf(", byref");
+ }
+#if FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
+ if (flags & IGF_FINALLY_TARGET)
+ {
+ printf(", ftarget");
+ }
+#endif // FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
+ if (flags & IGF_FUNCLET_PROLOG)
+ {
+ printf(", funclet prolog");
+ }
+ if (flags & IGF_FUNCLET_EPILOG)
+ {
+ printf(", funclet epilog");
+ }
+ if (flags & IGF_EPILOG)
+ {
+ printf(", epilog");
+ }
+ if (flags & IGF_NOGCINTERRUPT)
+ {
+ printf(", nogc");
+ }
+ if (flags & IGF_UPD_ISZ)
+ {
+ printf(", isz");
+ }
+ if (flags & IGF_EMIT_ADD)
+ {
+ printf(", emitadd");
+ }
+}
+
+void emitter::emitDispIG(insGroup* ig, insGroup* igPrev, bool verbose)
+{
+ const int TEMP_BUFFER_LEN = 40;
+ char buff[TEMP_BUFFER_LEN];
+
+ sprintf_s(buff, TEMP_BUFFER_LEN, "G_M%03u_IG%02u: ", Compiler::s_compMethodsCount, ig->igNum);
+ printf("%s; ", buff);
+ if ((igPrev == nullptr) || (igPrev->igFuncIdx != ig->igFuncIdx))
+ {
+ printf("func=%02u, ", ig->igFuncIdx);
+ }
+
+ if (ig->igFlags & IGF_PLACEHOLDER)
+ {
+ insGroup* igPh = ig;
+
+ const char* pszType;
+ switch (igPh->igPhData->igPhType)
+ {
+ case IGPT_PROLOG:
+ pszType = "prolog";
+ break;
+ case IGPT_EPILOG:
+ pszType = "epilog";
+ break;
+#if FEATURE_EH_FUNCLETS
+ case IGPT_FUNCLET_PROLOG:
+ pszType = "funclet prolog";
+ break;
+ case IGPT_FUNCLET_EPILOG:
+ pszType = "funclet epilog";
+ break;
+#endif // FEATURE_EH_FUNCLETS
+ default:
+ pszType = "UNKNOWN";
+ break;
+ }
+ printf("%s placeholder, next placeholder=", pszType);
+ if (igPh->igPhData->igPhNext)
+ {
+ printf("IG%02u ", igPh->igPhData->igPhNext->igNum);
+ }
+ else
+ {
+ printf("<END>");
+ }
+ printf(", BB=%08XH (BB%02u)", dspPtr(igPh->igPhData->igPhBB),
+ (igPh->igPhData->igPhBB != nullptr) ? igPh->igPhData->igPhBB->bbNum : 0);
+
+ emitDispIGflags(igPh->igFlags);
+
+ if (ig == emitCurIG)
+ {
+ printf(" <-- Current IG");
+ }
+ if (igPh == emitPlaceholderList)
+ {
+ printf(" <-- First placeholder");
+ }
+ if (igPh == emitPlaceholderLast)
+ {
+ printf(" <-- Last placeholder");
+ }
+ printf("\n");
+
+ printf("%*s; PrevGCVars=%s ", strlen(buff), "",
+ VarSetOps::ToString(emitComp, igPh->igPhData->igPhPrevGCrefVars));
+ dumpConvertedVarSet(emitComp, igPh->igPhData->igPhPrevGCrefVars);
+ printf(", PrevGCrefRegs=");
+ printRegMaskInt(igPh->igPhData->igPhPrevGCrefRegs);
+ emitDispRegSet(igPh->igPhData->igPhPrevGCrefRegs);
+ printf(", PrevByrefRegs=");
+ printRegMaskInt(igPh->igPhData->igPhPrevByrefRegs);
+ emitDispRegSet(igPh->igPhData->igPhPrevByrefRegs);
+ printf("\n");
+
+ printf("%*s; InitGCVars=%s ", strlen(buff), "",
+ VarSetOps::ToString(emitComp, igPh->igPhData->igPhInitGCrefVars));
+ dumpConvertedVarSet(emitComp, igPh->igPhData->igPhInitGCrefVars);
+ printf(", InitGCrefRegs=");
+ printRegMaskInt(igPh->igPhData->igPhInitGCrefRegs);
+ emitDispRegSet(igPh->igPhData->igPhInitGCrefRegs);
+ printf(", InitByrefRegs=");
+ printRegMaskInt(igPh->igPhData->igPhInitByrefRegs);
+ emitDispRegSet(igPh->igPhData->igPhInitByrefRegs);
+ printf("\n");
+
+ assert(!(ig->igFlags & IGF_GC_VARS));
+ assert(!(ig->igFlags & IGF_BYREF_REGS));
+ }
+ else
+ {
+ printf("offs=%06XH, size=%04XH", ig->igOffs, ig->igSize);
+
+ if (ig->igFlags & IGF_GC_VARS)
+ {
+ printf(", gcVars=%s ", VarSetOps::ToString(emitComp, ig->igGCvars()));
+ dumpConvertedVarSet(emitComp, ig->igGCvars());
+ }
+
+ if (!(ig->igFlags & IGF_EMIT_ADD))
+ {
+ printf(", gcrefRegs=");
+ printRegMaskInt(ig->igGCregs);
+ emitDispRegSet(ig->igGCregs);
+ }
+
+ if (ig->igFlags & IGF_BYREF_REGS)
+ {
+ printf(", byrefRegs=");
+ printRegMaskInt(ig->igByrefRegs());
+ emitDispRegSet(ig->igByrefRegs());
+ }
+
+ emitDispIGflags(ig->igFlags);
+
+ if (ig == emitCurIG)
+ {
+ printf(" <-- Current IG");
+ }
+ if (ig == emitPrologIG)
+ {
+ printf(" <-- Prolog IG");
+ }
+ printf("\n");
+
+ if (verbose)
+ {
+ BYTE* ins = ig->igData;
+ UNATIVE_OFFSET ofs = ig->igOffs;
+ unsigned cnt = ig->igInsCnt;
+
+ if (cnt)
+ {
+ printf("\n");
+
+ do
+ {
+ instrDesc* id = (instrDesc*)ins;
+
+ emitDispIns(id, false, true, false, ofs, nullptr, 0, ig);
+
+ ins += emitSizeOfInsDsc(id);
+ ofs += emitInstCodeSz(id);
+ } while (--cnt);
+
+ printf("\n");
+ }
+ }
+ }
+}
+
+void emitter::emitDispIGlist(bool verbose)
+{
+ insGroup* ig;
+ insGroup* igPrev;
+
+ for (igPrev = nullptr, ig = emitIGlist; ig; igPrev = ig, ig = ig->igNext)
+ {
+ emitDispIG(ig, igPrev, verbose);
+ }
+}
+
+void emitter::emitDispGCinfo()
+{
+ printf("Emitter GC tracking info:");
+ printf("\n emitPrevGCrefVars(0x%p)=%016llX ", dspPtr(&emitPrevGCrefVars), emitPrevGCrefVars);
+ dumpConvertedVarSet(emitComp, emitPrevGCrefVars);
+ printf("\n emitPrevGCrefRegs(0x%p)=", dspPtr(&emitPrevGCrefRegs));
+ printRegMaskInt(emitPrevGCrefRegs);
+ emitDispRegSet(emitPrevGCrefRegs);
+ printf("\n emitPrevByrefRegs(0x%p)=", dspPtr(&emitPrevByrefRegs));
+ printRegMaskInt(emitPrevByrefRegs);
+ emitDispRegSet(emitPrevByrefRegs);
+ printf("\n emitInitGCrefVars(0x%p)=%016llX ", dspPtr(&emitInitGCrefVars), emitInitGCrefVars);
+ dumpConvertedVarSet(emitComp, emitInitGCrefVars);
+ printf("\n emitInitGCrefRegs(0x%p)=", dspPtr(&emitInitGCrefRegs));
+ printRegMaskInt(emitInitGCrefRegs);
+ emitDispRegSet(emitInitGCrefRegs);
+ printf("\n emitInitByrefRegs(0x%p)=", dspPtr(&emitInitByrefRegs));
+ printRegMaskInt(emitInitByrefRegs);
+ emitDispRegSet(emitInitByrefRegs);
+ printf("\n emitThisGCrefVars(0x%p)=%016llX ", dspPtr(&emitThisGCrefVars), emitThisGCrefVars);
+ dumpConvertedVarSet(emitComp, emitThisGCrefVars);
+ printf("\n emitThisGCrefRegs(0x%p)=", dspPtr(&emitThisGCrefRegs));
+ printRegMaskInt(emitThisGCrefRegs);
+ emitDispRegSet(emitThisGCrefRegs);
+ printf("\n emitThisByrefRegs(0x%p)=", dspPtr(&emitThisByrefRegs));
+ printRegMaskInt(emitThisByrefRegs);
+ emitDispRegSet(emitThisByrefRegs);
+ printf("\n\n");
+}
+
+#endif // DEBUG
+
+/*****************************************************************************
+ *
+ * Issue the given instruction. Basically, this is just a thin wrapper around
+ * emitOutputInstr() that does a few debug checks.
+ */
+
+size_t emitter::emitIssue1Instr(insGroup* ig, instrDesc* id, BYTE** dp)
+{
+ size_t is;
+
+ /* Record the beginning offset of the instruction */
+
+ BYTE* curInsAdr = *dp;
+
+ /* Issue the next instruction */
+
+ // printf("[S=%02u] " , emitCurStackLvl);
+
+ is = emitOutputInstr(ig, id, dp);
+
+// printf("[S=%02u]\n", emitCurStackLvl);
+
+#if EMIT_TRACK_STACK_DEPTH
+
+ /*
+ If we're generating a full pointer map and the stack
+ is empty, there better not be any "pending" argument
+ push entries.
+ */
+
+ assert(emitFullGCinfo == false || emitCurStackLvl != 0 || u2.emitGcArgTrackCnt == 0);
+
+#endif
+
+#if defined(DEBUGGING_SUPPORT) || defined(DEBUG)
+
+ /* Did the size of the instruction match our expectations? */
+
+ UNATIVE_OFFSET csz = (UNATIVE_OFFSET)(*dp - curInsAdr);
+
+ if (csz != id->idCodeSize())
+ {
+ /* It is fatal to under-estimate the instruction size */
+ noway_assert(emitInstCodeSz(id) >= csz);
+
+#if DEBUG_EMIT
+ if (EMITVERBOSE)
+ {
+ printf("Instruction predicted size = %u, actual = %u\n", emitInstCodeSz(id), csz);
+ }
+#endif // DEBUG_EMIT
+
+ /* The instruction size estimate wasn't accurate; remember this */
+
+ ig->igFlags |= IGF_UPD_ISZ;
+#if defined(_TARGET_XARCH_)
+ id->idCodeSize(csz);
+#elif defined(_TARGET_ARM_)
+// This is done as part of emitSetShortJump();
+// insSize isz = emitInsSize(id->idInsFmt());
+// id->idInsSize(isz);
+#else
+ /* It is fatal to over-estimate the instruction size */
+ IMPL_LIMITATION("Over-estimated instruction size");
+#endif
+ }
+
+#endif
+
+#ifdef DEBUG
+ /* Make sure the instruction descriptor size also matches our expectations */
+ if (is != emitSizeOfInsDsc(id))
+ {
+ printf("%s at %u: Expected size = %u , actual size = %u\n", emitIfName(id->idInsFmt()),
+ id->idDebugOnlyInfo()->idNum, is, emitSizeOfInsDsc(id));
+ assert(is == emitSizeOfInsDsc(id));
+ }
+#endif
+
+ return is;
+}
+
+/*****************************************************************************
+ *
+ * Update the offsets of all the instruction groups (note: please don't be
+ * lazy and call this routine frequently, it walks the list of instruction
+ * groups and thus it isn't cheap).
+ */
+
+void emitter::emitRecomputeIGoffsets()
+{
+ UNATIVE_OFFSET offs;
+ insGroup* ig;
+
+ for (ig = emitIGlist, offs = 0; ig; ig = ig->igNext)
+ {
+ ig->igOffs = offs;
+ assert(IsCodeAligned(ig->igOffs));
+ offs += ig->igSize;
+ }
+
+ /* Set the total code size */
+
+ emitTotalCodeSize = offs;
+
+#ifdef DEBUG
+ emitCheckIGoffsets();
+#endif
+}
+
+/*****************************************************************************
+ * Bind targets of relative jumps to choose the smallest possible encoding.
+ * X86 and AMD64 have a small and large encoding.
+ * ARM has a small, medium, and large encoding. The large encoding is a pseudo-op
+ * to handle greater range than the conditional branch instructions can handle.
+ * ARM64 has a small and large encoding for both conditional branch and loading label addresses.
+ * The large encodings are pseudo-ops that represent a multiple instruction sequence, similar to ARM. (Currently
+ * NYI).
+ */
+
+void emitter::emitJumpDistBind()
+{
+#ifdef DEBUG
+ if (emitComp->verbose)
+ {
+ printf("*************** In emitJumpDistBind()\n");
+ }
+ if (EMIT_INSTLIST_VERBOSE)
+ {
+ printf("\nInstruction list before jump distance binding:\n\n");
+ emitDispIGlist(true);
+ }
+#endif
+
+ instrDescJmp* jmp;
+
+ UNATIVE_OFFSET minShortExtra; // The smallest offset greater than that required for a jump to be converted
+ // to a small jump. If it is small enough, we will iterate in hopes of
+ // converting those jumps we missed converting the first (or second...) time.
+
+#if defined(_TARGET_ARM_)
+ UNATIVE_OFFSET minMediumExtra; // Same as 'minShortExtra', but for medium-sized jumps.
+#endif // _TARGET_ARM_
+
+ UNATIVE_OFFSET adjIG;
+ UNATIVE_OFFSET adjLJ;
+ insGroup* lstIG;
+#ifdef DEBUG
+ insGroup* prologIG = emitPrologIG;
+#endif // DEBUG
+
+ int jmp_iteration = 1;
+
+/*****************************************************************************/
+/* If we iterate to look for more jumps to shorten, we start again here. */
+/*****************************************************************************/
+
+AGAIN:
+
+#ifdef DEBUG
+ emitCheckIGoffsets();
+#endif
+
+/*
+ In the following loop we convert all jump targets from "BasicBlock *"
+ to "insGroup *" values. We also estimate which jumps will be short.
+ */
+
+#ifdef DEBUG
+ insGroup* lastIG = nullptr;
+ instrDescJmp* lastLJ = nullptr;
+#endif
+
+ lstIG = nullptr;
+ adjLJ = 0;
+ adjIG = 0;
+ minShortExtra = (UNATIVE_OFFSET)-1;
+
+#if defined(_TARGET_ARM_)
+ minMediumExtra = (UNATIVE_OFFSET)-1;
+#endif // _TARGET_ARM_
+
+ for (jmp = emitJumpList; jmp; jmp = jmp->idjNext)
+ {
+ insGroup* jmpIG;
+ insGroup* tgtIG;
+
+ UNATIVE_OFFSET jsz; // size of the jump instruction in bytes
+
+ UNATIVE_OFFSET ssz = 0; // small jump size
+ NATIVE_OFFSET nsd = 0; // small jump max. neg distance
+ NATIVE_OFFSET psd = 0; // small jump max. pos distance
+
+#if defined(_TARGET_ARM_)
+ UNATIVE_OFFSET msz = 0; // medium jump size
+ NATIVE_OFFSET nmd = 0; // medium jump max. neg distance
+ NATIVE_OFFSET pmd = 0; // medium jump max. pos distance
+ NATIVE_OFFSET mextra; // How far beyond the medium jump range is this jump offset?
+#endif // _TARGET_ARM_
+
+ NATIVE_OFFSET extra; // How far beyond the short jump range is this jump offset?
+ UNATIVE_OFFSET srcInstrOffs; // offset of the source instruction of the jump
+ UNATIVE_OFFSET srcEncodingOffs; // offset of the source used by the instruction set to calculate the relative
+ // offset of the jump
+ UNATIVE_OFFSET dstOffs;
+ NATIVE_OFFSET jmpDist; // the relative jump distance, as it will be encoded
+ UNATIVE_OFFSET oldSize;
+ UNATIVE_OFFSET sizeDif;
+
+#ifdef _TARGET_XARCH_
+ assert(jmp->idInsFmt() == IF_LABEL || jmp->idInsFmt() == IF_RWR_LABEL || jmp->idInsFmt() == IF_SWR_LABEL);
+
+ /* Figure out the smallest size we can end up with */
+
+ if (jmp->idInsFmt() == IF_LABEL)
+ {
+ if (emitIsCondJump(jmp))
+ {
+ ssz = JCC_SIZE_SMALL;
+ nsd = JCC_DIST_SMALL_MAX_NEG;
+ psd = JCC_DIST_SMALL_MAX_POS;
+ }
+ else
+ {
+ ssz = JMP_SIZE_SMALL;
+ nsd = JMP_DIST_SMALL_MAX_NEG;
+ psd = JMP_DIST_SMALL_MAX_POS;
+ }
+ }
+#endif // _TARGET_XARCH_
+
+#ifdef _TARGET_ARM_
+ assert((jmp->idInsFmt() == IF_T2_J1) || (jmp->idInsFmt() == IF_T2_J2) || (jmp->idInsFmt() == IF_T1_I) ||
+ (jmp->idInsFmt() == IF_T1_K) || (jmp->idInsFmt() == IF_T1_M) || (jmp->idInsFmt() == IF_T2_M1) ||
+ (jmp->idInsFmt() == IF_T2_N1) || (jmp->idInsFmt() == IF_T1_J3) || (jmp->idInsFmt() == IF_LARGEJMP));
+
+ /* Figure out the smallest size we can end up with */
+
+ if (emitIsCondJump(jmp))
+ {
+ ssz = JCC_SIZE_SMALL;
+ nsd = JCC_DIST_SMALL_MAX_NEG;
+ psd = JCC_DIST_SMALL_MAX_POS;
+
+ msz = JCC_SIZE_MEDIUM;
+ nmd = JCC_DIST_MEDIUM_MAX_NEG;
+ pmd = JCC_DIST_MEDIUM_MAX_POS;
+ }
+ else if (emitIsCmpJump(jmp))
+ {
+ ssz = JMP_SIZE_SMALL;
+ nsd = 0;
+ psd = 126;
+ }
+ else if (emitIsUncondJump(jmp))
+ {
+ ssz = JMP_SIZE_SMALL;
+ nsd = JMP_DIST_SMALL_MAX_NEG;
+ psd = JMP_DIST_SMALL_MAX_POS;
+ }
+ else if (emitIsLoadLabel(jmp))
+ {
+ ssz = LBL_SIZE_SMALL;
+ nsd = LBL_DIST_SMALL_MAX_NEG;
+ psd = LBL_DIST_SMALL_MAX_POS;
+ }
+ else
+ {
+ assert(!"Unknown jump instruction");
+ }
+#endif // _TARGET_ARM_
+
+#ifdef _TARGET_ARM64_
+ /* Figure out the smallest size we can end up with */
+
+ if (emitIsCondJump(jmp))
+ {
+ ssz = JCC_SIZE_SMALL;
+ nsd = JCC_DIST_SMALL_MAX_NEG;
+ psd = JCC_DIST_SMALL_MAX_POS;
+ }
+ else if (emitIsUncondJump(jmp))
+ {
+ // Nothing to do; we don't shrink these.
+ assert(jmp->idjShort);
+ ssz = JMP_SIZE_SMALL;
+ }
+ else if (emitIsCmpJump(jmp))
+ {
+ NYI("branch shortening compare-and-branch instructions");
+ }
+ else if (emitIsLoadLabel(jmp))
+ {
+ ssz = LBL_SIZE_SMALL;
+ nsd = LBL_DIST_SMALL_MAX_NEG;
+ psd = LBL_DIST_SMALL_MAX_POS;
+ }
+ else if (emitIsLoadConstant(jmp))
+ {
+ ssz = LDC_SIZE_SMALL;
+ nsd = LDC_DIST_SMALL_MAX_NEG;
+ psd = LDC_DIST_SMALL_MAX_POS;
+ }
+ else
+ {
+ assert(!"Unknown jump instruction");
+ }
+#endif // _TARGET_ARM64_
+
+/* Make sure the jumps are properly ordered */
+
+#ifdef DEBUG
+ assert(lastLJ == nullptr || lastIG != jmp->idjIG || lastLJ->idjOffs < jmp->idjOffs);
+ lastLJ = (lastIG == jmp->idjIG) ? jmp : nullptr;
+
+ assert(lastIG == nullptr || lastIG->igNum <= jmp->idjIG->igNum || jmp->idjIG == prologIG ||
+ emitNxtIGnum > unsigned(0xFFFF)); // igNum might overflow
+ lastIG = jmp->idjIG;
+#endif // DEBUG
+
+ /* Get hold of the current jump size */
+
+ jsz = emitSizeOfJump(jmp);
+
+ /* Get the group the jump is in */
+
+ jmpIG = jmp->idjIG;
+
+ /* Are we in a group different from the previous jump? */
+
+ if (lstIG != jmpIG)
+ {
+ /* Were there any jumps before this one? */
+
+ if (lstIG)
+ {
+ /* Adjust the offsets of the intervening blocks */
+
+ do
+ {
+ lstIG = lstIG->igNext;
+ assert(lstIG);
+ // printf("Adjusted offset of block %02u from %04X to %04X\n", lstIG->igNum, lstIG->igOffs,
+ // lstIG->igOffs - adjIG);
+ lstIG->igOffs -= adjIG;
+ assert(IsCodeAligned(lstIG->igOffs));
+ } while (lstIG != jmpIG);
+ }
+
+ /* We've got the first jump in a new group */
+
+ adjLJ = 0;
+ lstIG = jmpIG;
+ }
+
+ /* Apply any local size adjustment to the jump's relative offset */
+
+ jmp->idjOffs -= adjLJ;
+
+ // If this is a jump via register, the instruction size does not change, so we are done.
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if defined(_TARGET_ARM64_)
+ // JIT code and data will be allocated together for arm64 so the relative offset to JIT data is known.
+ // In case such offset can be encodeable for `ldr` (+-1MB), shorten it.
+ if (jmp->idAddr()->iiaIsJitDataOffset())
+ {
+ // Reference to JIT data
+ assert(jmp->idIsBound());
+ UNATIVE_OFFSET srcOffs = jmpIG->igOffs + jmp->idjOffs;
+
+ int doff = jmp->idAddr()->iiaGetJitDataOffset();
+ assert(doff >= 0);
+ ssize_t imm = emitGetInsSC(jmp);
+ assert((imm >= 0) && (imm < 0x1000)); // 0x1000 is arbitrary, currently 'imm' is always 0
+
+ unsigned dataOffs = (unsigned)(doff + imm);
+ assert(dataOffs < emitDataSize());
+
+ // Conservately assume JIT data starts after the entire code size.
+ // TODO-ARM64: we might consider only hot code size which will be computed later in emitComputeCodeSizes().
+ assert(emitTotalCodeSize > 0);
+ UNATIVE_OFFSET maxDstOffs = emitTotalCodeSize + dataOffs;
+
+ // Check if the distance is within the encoding length.
+ jmpDist = maxDstOffs - srcOffs;
+ extra = jmpDist - psd;
+ if (extra <= 0)
+ {
+ goto SHORT_JMP;
+ }
+
+ // Keep the large form.
+ continue;
+ }
+#endif
+
+ /* Have we bound this jump's target already? */
+
+ if (jmp->idIsBound())
+ {
+ /* Does the jump already have the smallest size? */
+
+ if (jmp->idjShort)
+ {
+ assert(emitSizeOfJump(jmp) == ssz);
+
+ // We should not be jumping/branching across funclets/functions
+ emitCheckFuncletBranch(jmp, jmpIG);
+
+ continue;
+ }
+
+ tgtIG = jmp->idAddr()->iiaIGlabel;
+ }
+ else
+ {
+ /* First time we've seen this label, convert its target */
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef DEBUG
+ if (EMITVERBOSE)
+ {
+ printf("Binding: ");
+ emitDispIns(jmp, false, false, false);
+ printf("Binding L_M%03u_BB%02u ", Compiler::s_compMethodsCount, jmp->idAddr()->iiaBBlabel->bbNum);
+ }
+#endif // DEBUG
+
+ tgtIG = (insGroup*)emitCodeGetCookie(jmp->idAddr()->iiaBBlabel);
+
+#ifdef DEBUG
+ if (EMITVERBOSE)
+ {
+ if (tgtIG)
+ {
+ printf("to G_M%03u_IG%02u\n", Compiler::s_compMethodsCount, tgtIG->igNum);
+ }
+ else
+ {
+ printf("-- ERROR, no emitter cookie for BB%02u; it is probably missing BBF_JMP_TARGET or "
+ "BBF_HAS_LABEL.\n",
+ jmp->idAddr()->iiaBBlabel->bbNum);
+ }
+ }
+ assert(tgtIG);
+#endif // DEBUG
+
+ /* Record the bound target */
+
+ jmp->idAddr()->iiaIGlabel = tgtIG;
+ jmp->idSetIsBound();
+ }
+
+ // We should not be jumping/branching across funclets/functions
+ emitCheckFuncletBranch(jmp, jmpIG);
+
+#ifdef _TARGET_XARCH_
+ /* Done if this is not a variable-sized jump */
+
+ if ((jmp->idIns() == INS_push) || (jmp->idIns() == INS_mov) || (jmp->idIns() == INS_call) ||
+ (jmp->idIns() == INS_push_hide))
+ {
+ continue;
+ }
+#endif
+#ifdef _TARGET_ARM_
+ if ((jmp->idIns() == INS_push) || (jmp->idIns() == INS_mov) || (jmp->idIns() == INS_movt) ||
+ (jmp->idIns() == INS_movw))
+ {
+ continue;
+ }
+#endif
+#ifdef _TARGET_ARM64_
+ // There is only one size of unconditional branch; we don't support functions larger than 2^28 bytes (our branch
+ // range).
+ if (emitIsUncondJump(jmp))
+ {
+ continue;
+ }
+#endif
+
+ /*
+ In the following distance calculations, if we're not actually
+ scheduling the code (i.e. reordering instructions), we can
+ use the actual offset of the jump (rather than the beg/end of
+ the instruction group) since the jump will not be moved around
+ and thus its offset is accurate.
+
+ First we need to figure out whether this jump is a forward or
+ backward one; to do this we simply look at the ordinals of the
+ group that contains the jump and the target.
+ */
+
+ srcInstrOffs = jmpIG->igOffs + jmp->idjOffs;
+
+ /* Note that the destination is always the beginning of an IG, so no need for an offset inside it */
+ dstOffs = tgtIG->igOffs;
+
+#if defined(_TARGET_ARM_)
+ srcEncodingOffs =
+ srcInstrOffs + 4; // For relative branches, ARM PC is always considered to be the instruction address + 4
+#elif defined(_TARGET_ARM64_)
+ srcEncodingOffs =
+ srcInstrOffs; // For relative branches, ARM64 PC is always considered to be the instruction address
+#else
+ srcEncodingOffs = srcInstrOffs + ssz; // Encoding offset of relative offset for small branch
+#endif
+
+ if (jmpIG->igNum < tgtIG->igNum)
+ {
+ /* Forward jump */
+
+ /* Adjust the target offset by the current delta. This is a worst-case estimate, as jumps between
+ here and the target could be shortened, causing the actual distance to shrink.
+ */
+
+ dstOffs -= adjIG;
+
+ /* Compute the distance estimate */
+
+ jmpDist = dstOffs - srcEncodingOffs;
+
+ /* How much beyond the max. short distance does the jump go? */
+
+ extra = jmpDist - psd;
+
+#if DEBUG_EMIT
+ assert(jmp->idDebugOnlyInfo() != nullptr);
+ if (jmp->idDebugOnlyInfo()->idNum == (unsigned)INTERESTING_JUMP_NUM || INTERESTING_JUMP_NUM == 0)
+ {
+ if (INTERESTING_JUMP_NUM == 0)
+ {
+ printf("[1] Jump %u:\n", jmp->idDebugOnlyInfo()->idNum);
+ }
+ printf("[1] Jump block is at %08X\n", jmpIG->igOffs);
+ printf("[1] Jump reloffset is %04X\n", jmp->idjOffs);
+ printf("[1] Jump source is at %08X\n", srcEncodingOffs);
+ printf("[1] Label block is at %08X\n", dstOffs);
+ printf("[1] Jump dist. is %04X\n", jmpDist);
+ if (extra > 0)
+ {
+ printf("[1] Dist excess [S] = %d \n", extra);
+ }
+ }
+ if (EMITVERBOSE)
+ {
+ printf("Estimate of fwd jump [%08X/%03u]: %04X -> %04X = %04X\n", dspPtr(jmp),
+ jmp->idDebugOnlyInfo()->idNum, srcInstrOffs, dstOffs, jmpDist);
+ }
+#endif // DEBUG_EMIT
+
+ if (extra <= 0)
+ {
+ /* This jump will be a short one */
+ goto SHORT_JMP;
+ }
+ }
+ else
+ {
+ /* Backward jump */
+
+ /* Compute the distance estimate */
+
+ jmpDist = srcEncodingOffs - dstOffs;
+
+ /* How much beyond the max. short distance does the jump go? */
+
+ extra = jmpDist + nsd;
+
+#if DEBUG_EMIT
+ assert(jmp->idDebugOnlyInfo() != nullptr);
+ if (jmp->idDebugOnlyInfo()->idNum == (unsigned)INTERESTING_JUMP_NUM || INTERESTING_JUMP_NUM == 0)
+ {
+ if (INTERESTING_JUMP_NUM == 0)
+ {
+ printf("[2] Jump %u:\n", jmp->idDebugOnlyInfo()->idNum);
+ }
+ printf("[2] Jump block is at %08X\n", jmpIG->igOffs);
+ printf("[2] Jump reloffset is %04X\n", jmp->idjOffs);
+ printf("[2] Jump source is at %08X\n", srcEncodingOffs);
+ printf("[2] Label block is at %08X\n", dstOffs);
+ printf("[2] Jump dist. is %04X\n", jmpDist);
+ if (extra > 0)
+ {
+ printf("[2] Dist excess [S] = %d \n", extra);
+ }
+ }
+ if (EMITVERBOSE)
+ {
+ printf("Estimate of bwd jump [%08X/%03u]: %04X -> %04X = %04X\n", dspPtr(jmp),
+ jmp->idDebugOnlyInfo()->idNum, srcInstrOffs, dstOffs, jmpDist);
+ }
+#endif // DEBUG_EMIT
+
+ if (extra <= 0)
+ {
+ /* This jump will be a short one */
+ goto SHORT_JMP;
+ }
+ }
+
+ /* We arrive here if the jump couldn't be made short, at least for now */
+
+ /* We had better not have eagerly marked the jump as short
+ * in emitIns_J(). If we did, then it has to be able to stay short
+ * as emitIns_J() uses the worst case scenario, and blocks can
+ * only move closer together after that.
+ */
+ assert(jmp->idjShort == 0);
+
+ /* Keep track of the closest distance we got */
+
+ if (minShortExtra > (unsigned)extra)
+ {
+ minShortExtra = (unsigned)extra;
+ }
+
+#if defined(_TARGET_ARM_)
+
+ // If we're here, we couldn't convert to a small jump.
+ // Handle conversion to medium-sized conditional jumps.
+ // 'srcInstrOffs', 'srcEncodingOffs', 'dstOffs', 'jmpDist' have already been computed
+ // and don't need to be recomputed.
+
+ if (emitIsCondJump(jmp))
+ {
+ if (jmpIG->igNum < tgtIG->igNum)
+ {
+ /* Forward jump */
+
+ /* How much beyond the max. medium distance does the jump go? */
+
+ mextra = jmpDist - pmd;
+
+#if DEBUG_EMIT
+ assert(jmp->idDebugOnlyInfo() != NULL);
+ if (jmp->idDebugOnlyInfo()->idNum == (unsigned)INTERESTING_JUMP_NUM || INTERESTING_JUMP_NUM == 0)
+ {
+ if (mextra > 0)
+ {
+ if (INTERESTING_JUMP_NUM == 0)
+ printf("[6] Jump %u:\n", jmp->idDebugOnlyInfo()->idNum);
+ printf("[6] Dist excess [S] = %d \n", mextra);
+ }
+ }
+#endif // DEBUG_EMIT
+
+ if (mextra <= 0)
+ {
+ /* This jump will be a medium one */
+ goto MEDIUM_JMP;
+ }
+ }
+ else
+ {
+ /* Backward jump */
+
+ /* How much beyond the max. medium distance does the jump go? */
+
+ mextra = jmpDist + nmd;
+
+#if DEBUG_EMIT
+ assert(jmp->idDebugOnlyInfo() != NULL);
+ if (jmp->idDebugOnlyInfo()->idNum == (unsigned)INTERESTING_JUMP_NUM || INTERESTING_JUMP_NUM == 0)
+ {
+ if (mextra > 0)
+ {
+ if (INTERESTING_JUMP_NUM == 0)
+ printf("[7] Jump %u:\n", jmp->idDebugOnlyInfo()->idNum);
+ printf("[7] Dist excess [S] = %d \n", mextra);
+ }
+ }
+#endif // DEBUG_EMIT
+
+ if (mextra <= 0)
+ {
+ /* This jump will be a medium one */
+ goto MEDIUM_JMP;
+ }
+ }
+
+ /* We arrive here if the jump couldn't be made medium, at least for now */
+
+ /* Keep track of the closest distance we got */
+
+ if (minMediumExtra > (unsigned)mextra)
+ minMediumExtra = (unsigned)mextra;
+ }
+
+#endif // _TARGET_ARM_
+
+ /*****************************************************************************
+ * We arrive here if the jump must stay long, at least for now.
+ * Go try the next one.
+ */
+
+ continue;
+
+ /*****************************************************************************/
+ /* Handle conversion to short jump */
+ /*****************************************************************************/
+
+ SHORT_JMP:
+
+ /* Try to make this jump a short one */
+
+ emitSetShortJump(jmp);
+
+ if (!jmp->idjShort)
+ {
+ continue; // This jump must be kept long
+ }
+
+ /* This jump is becoming either short or medium */
+
+ oldSize = jsz;
+ jsz = ssz;
+ assert(oldSize >= jsz);
+ sizeDif = oldSize - jsz;
+
+#if defined(_TARGET_XARCH_)
+ jmp->idCodeSize(jsz);
+#elif defined(_TARGET_ARM_)
+#if 0
+ // This is done as part of emitSetShortJump():
+ insSize isz = emitInsSize(jmp->idInsFmt());
+ jmp->idInsSize(isz);
+#endif
+#elif defined(_TARGET_ARM64_)
+ // The size of IF_LARGEJMP/IF_LARGEADR/IF_LARGELDC are 8 or 12.
+ // All other code size is 4.
+ assert((sizeDif == 4) || (sizeDif == 8));
+#else
+#error Unsupported or unset target architecture
+#endif
+
+ goto NEXT_JMP;
+
+#if defined(_TARGET_ARM_)
+
+ /*****************************************************************************/
+ /* Handle conversion to medium jump */
+ /*****************************************************************************/
+
+ MEDIUM_JMP:
+
+ /* Try to make this jump a medium one */
+
+ emitSetMediumJump(jmp);
+
+ if (jmp->idCodeSize() > msz)
+ {
+ continue; // This jump wasn't shortened
+ }
+ assert(jmp->idCodeSize() == msz);
+
+ /* This jump is becoming medium */
+
+ oldSize = jsz;
+ jsz = msz;
+ assert(oldSize >= jsz);
+ sizeDif = oldSize - jsz;
+
+ goto NEXT_JMP;
+
+#endif // _TARGET_ARM_
+
+ /*****************************************************************************/
+
+ NEXT_JMP:
+
+ /* Make sure the size of the jump is marked correctly */
+
+ assert((0 == (jsz | jmpDist)) || (jsz == emitSizeOfJump(jmp)));
+
+#ifdef DEBUG
+ if (EMITVERBOSE)
+ {
+ printf("Shrinking jump [%08X/%03u]\n", dspPtr(jmp), jmp->idDebugOnlyInfo()->idNum);
+ }
+#endif
+ noway_assert((unsigned short)sizeDif == sizeDif);
+
+ adjIG += sizeDif;
+ adjLJ += sizeDif;
+ jmpIG->igSize -= (unsigned short)sizeDif;
+ emitTotalCodeSize -= sizeDif;
+
+ /* The jump size estimate wasn't accurate; flag its group */
+
+ jmpIG->igFlags |= IGF_UPD_ISZ;
+
+ } // end for each jump
+
+ /* Did we shorten any jumps? */
+
+ if (adjIG)
+ {
+ /* Adjust offsets of any remaining blocks */
+
+ assert(lstIG);
+
+ for (;;)
+ {
+ lstIG = lstIG->igNext;
+ if (!lstIG)
+ {
+ break;
+ }
+ // printf("Adjusted offset of block %02u from %04X to %04X\n", lstIG->igNum, lstIG->igOffs,
+ // lstIG->igOffs - adjIG);
+ lstIG->igOffs -= adjIG;
+ assert(IsCodeAligned(lstIG->igOffs));
+ }
+
+#ifdef DEBUG
+ emitCheckIGoffsets();
+#endif
+
+ /* Is there a chance of other jumps becoming short? */
+ CLANG_FORMAT_COMMENT_ANCHOR;
+#ifdef DEBUG
+#if defined(_TARGET_ARM_)
+ if (EMITVERBOSE)
+ printf("Total shrinkage = %3u, min extra short jump size = %3u, min extra medium jump size = %u\n", adjIG,
+ minShortExtra, minMediumExtra);
+#else
+ if (EMITVERBOSE)
+ {
+ printf("Total shrinkage = %3u, min extra jump size = %3u\n", adjIG, minShortExtra);
+ }
+#endif
+#endif
+
+ if ((minShortExtra <= adjIG)
+#if defined(_TARGET_ARM_)
+ || (minMediumExtra <= adjIG)
+#endif // _TARGET_ARM_
+ )
+ {
+ jmp_iteration++;
+
+#ifdef DEBUG
+ if (EMITVERBOSE)
+ {
+ printf("Iterating branch shortening. Iteration = %d\n", jmp_iteration);
+ }
+#endif
+
+ goto AGAIN;
+ }
+ }
+}
+
+void emitter::emitCheckFuncletBranch(instrDesc* jmp, insGroup* jmpIG)
+{
+#ifdef DEBUG
+ // We should not be jumping/branching across funclets/functions
+ // Except possibly a 'call' to a finally funclet for a local unwind
+ // or a 'return' from a catch handler (that can go just about anywhere)
+ // This routine attempts to validate that any branches across funclets
+ // meets one of those criteria...
+ assert(jmp->idIsBound());
+
+#ifdef _TARGET_AMD64_
+ // An lea of a code address (for constant data stored with the code)
+ // is treated like a jump for emission purposes but is not really a jump so
+ // we don't have to check anything here.
+ if (jmp->idIns() == INS_lea)
+ {
+ return;
+ }
+#endif
+
+#ifdef _TARGET_ARMARCH_
+ if (jmp->idAddr()->iiaHasInstrCount())
+ {
+ // Too hard to figure out funclets from just an instruction count
+ // You're on your own!
+ return;
+ }
+#endif // _TARGET_ARMARCH_
+
+#ifdef _TARGET_ARM64_
+ // No interest if it's not jmp.
+ if (emitIsLoadLabel(jmp) || emitIsLoadConstant(jmp))
+ {
+ return;
+ }
+#endif // _TARGET_ARM64_
+
+ insGroup* tgtIG = jmp->idAddr()->iiaIGlabel;
+ assert(tgtIG);
+ if (tgtIG->igFuncIdx != jmpIG->igFuncIdx)
+ {
+ if (jmp->idDebugOnlyInfo()->idFinallyCall)
+ {
+ // We don't record enough information to determine this accurately, so instead
+ // we assume that any branch to the very start of a finally is OK.
+
+ // No branches back to the root method
+ assert(tgtIG->igFuncIdx > 0);
+ FuncInfoDsc* tgtFunc = emitComp->funGetFunc(tgtIG->igFuncIdx);
+ assert(tgtFunc->funKind == FUNC_HANDLER);
+ EHblkDsc* tgtEH = emitComp->ehGetDsc(tgtFunc->funEHIndex);
+
+ // Only branches to finallys (not faults, catches, filters, etc.)
+ assert(tgtEH->HasFinallyHandler());
+
+ // Only to the first block of the finally (which is properly marked)
+ BasicBlock* tgtBlk = tgtEH->ebdHndBeg;
+ assert(tgtBlk->bbFlags & BBF_FUNCLET_BEG);
+
+ // And now we made it back to where we started
+ assert(tgtIG == emitCodeGetCookie(tgtBlk));
+ assert(tgtIG->igFuncIdx == emitComp->funGetFuncIdx(tgtBlk));
+ }
+ else if (jmp->idDebugOnlyInfo()->idCatchRet)
+ {
+ // Again there isn't enough information to prove this correct
+ // so just allow a 'branch' to any other 'parent' funclet
+
+ FuncInfoDsc* jmpFunc = emitComp->funGetFunc(jmpIG->igFuncIdx);
+ assert(jmpFunc->funKind == FUNC_HANDLER);
+ EHblkDsc* jmpEH = emitComp->ehGetDsc(jmpFunc->funEHIndex);
+
+ // Only branches out of catches
+ assert(jmpEH->HasCatchHandler());
+
+ FuncInfoDsc* tgtFunc = emitComp->funGetFunc(tgtIG->igFuncIdx);
+ assert(tgtFunc);
+ if (tgtFunc->funKind == FUNC_HANDLER)
+ {
+ // An outward chain to the containing funclet/EH handler
+ // Note that it might be anywhere within nested try bodies
+ assert(jmpEH->ebdEnclosingHndIndex == tgtFunc->funEHIndex);
+ }
+ else
+ {
+ // This funclet is 'top level' and so it is branching back to the
+ // root function, and should have no containing EH handlers
+ // but it could be nested within try bodies...
+ assert(tgtFunc->funKind == FUNC_ROOT);
+ assert(jmpEH->ebdEnclosingHndIndex == EHblkDsc::NO_ENCLOSING_INDEX);
+ }
+ }
+ else
+ {
+ printf("Hit an illegal branch between funclets!");
+ assert(tgtIG->igFuncIdx == jmpIG->igFuncIdx);
+ }
+ }
+#endif // DEBUG
+}
+
+/*****************************************************************************
+ *
+ * Compute the code sizes that we're going to use to allocate the code buffers.
+ *
+ * This sets:
+ *
+ * emitTotalHotCodeSize
+ * emitTotalColdCodeSize
+ * Compiler::info.compTotalHotCodeSize
+ * Compiler::info.compTotalColdCodeSize
+ */
+
+void emitter::emitComputeCodeSizes()
+{
+ assert((emitComp->fgFirstColdBlock == nullptr) == (emitFirstColdIG == nullptr));
+
+ if (emitFirstColdIG)
+ {
+ emitTotalHotCodeSize = emitFirstColdIG->igOffs;
+ emitTotalColdCodeSize = emitTotalCodeSize - emitTotalHotCodeSize;
+ }
+ else
+ {
+ emitTotalHotCodeSize = emitTotalCodeSize;
+ emitTotalColdCodeSize = 0;
+ }
+
+ emitComp->info.compTotalHotCodeSize = emitTotalHotCodeSize;
+ emitComp->info.compTotalColdCodeSize = emitTotalColdCodeSize;
+
+#ifdef DEBUG
+ if (emitComp->verbose)
+ {
+ printf("\nHot code size = 0x%X bytes\n", emitTotalHotCodeSize);
+ printf("Cold code size = 0x%X bytes\n", emitTotalColdCodeSize);
+ }
+#endif
+}
+
+/*****************************************************************************
+ *
+ * Called at the end of code generation, this method creates the code, data
+ * and GC info blocks for the method. Returns the size of the method (which must fit in an unsigned).
+ */
+
+unsigned emitter::emitEndCodeGen(Compiler* comp,
+ bool contTrkPtrLcls,
+ bool fullyInt,
+ bool fullPtrMap,
+ bool returnsGCr,
+ unsigned xcptnsCount,
+ unsigned* prologSize,
+ unsigned* epilogSize,
+ void** codeAddr,
+ void** coldCodeAddr,
+ void** consAddr)
+{
+#ifdef DEBUG
+ if (emitComp->verbose)
+ {
+ printf("*************** In emitEndCodeGen()\n");
+ }
+#endif
+
+ insGroup* ig;
+
+ BYTE* consBlock;
+ BYTE* codeBlock;
+ BYTE* coldCodeBlock;
+ BYTE* cp;
+
+ assert(emitCurIG == nullptr);
+
+ emitCodeBlock = nullptr;
+ emitConsBlock = nullptr;
+
+ /* Tell everyone whether we have fully interruptible code or not */
+
+ emitFullyInt = fullyInt;
+ emitFullGCinfo = fullPtrMap;
+
+#if EMITTER_STATS
+ GCrefsTable.record(emitGCrFrameOffsCnt);
+ emitSizeTable.record(static_cast<unsigned>(emitSizeMethod));
+ stkDepthTable.record(emitMaxStackDepth);
+#endif // EMITTER_STATS
+
+ // Default values, correct even if EMIT_TRACK_STACK_DEPTH is 0.
+ emitSimpleStkUsed = true;
+ u1.emitSimpleStkMask = 0;
+ u1.emitSimpleByrefStkMask = 0;
+
+#if EMIT_TRACK_STACK_DEPTH
+ /* Convert max. stack depth from # of bytes to # of entries */
+
+ emitMaxStackDepth /= sizeof(int);
+
+ /* Should we use the simple stack */
+
+ if (emitMaxStackDepth > MAX_SIMPLE_STK_DEPTH || emitFullGCinfo)
+ {
+ /* We won't use the "simple" argument table */
+
+ emitSimpleStkUsed = false;
+
+ /* Allocate the argument tracking table */
+
+ if (emitMaxStackDepth <= sizeof(u2.emitArgTrackLcl))
+ {
+ u2.emitArgTrackTab = (BYTE*)u2.emitArgTrackLcl;
+ }
+ else
+ {
+ u2.emitArgTrackTab = (BYTE*)emitGetMem(roundUp(emitMaxStackDepth));
+ }
+
+ u2.emitArgTrackTop = u2.emitArgTrackTab;
+ u2.emitGcArgTrackCnt = 0;
+ }
+#endif
+
+ if (emitEpilogCnt == 0)
+ {
+ /* No epilogs, make sure the epilog size is set to 0 */
+
+ emitEpilogSize = 0;
+
+#ifdef _TARGET_XARCH_
+ emitExitSeqSize = 0;
+#endif // _TARGET_XARCH_
+ }
+
+ /* Return the size of the epilog to the caller */
+
+ *epilogSize = emitEpilogSize;
+
+#ifdef _TARGET_XARCH_
+ *epilogSize += emitExitSeqSize;
+#endif // _TARGET_XARCH_
+
+#ifdef DEBUG
+ if (EMIT_INSTLIST_VERBOSE)
+ {
+ printf("\nInstruction list before instruction issue:\n\n");
+ emitDispIGlist(true);
+ }
+
+ emitCheckIGoffsets();
+#endif
+
+ /* Allocate the code block (and optionally the data blocks) */
+
+ // If we're doing procedure splitting and we found cold blocks, then
+ // allocate hot and cold buffers. Otherwise only allocate a hot
+ // buffer.
+
+ coldCodeBlock = nullptr;
+
+ CorJitAllocMemFlag allocMemFlag = CORJIT_ALLOCMEM_DEFAULT_CODE_ALIGN;
+
+#ifdef _TARGET_X86_
+ //
+ // These are the heuristics we use to decide whether or not to force the
+ // code to be 16-byte aligned.
+ //
+ // 1. For ngen code with IBC data, use 16-byte alignment if the method
+ // has been called more than BB_VERY_HOT_WEIGHT times.
+ // 2. For JITed code and ngen code without IBC data, use 16-byte alignment
+ // when the code is 16 bytes or smaller. We align small getters/setters
+ // because of they are penalized heavily on certain hardware when not 16-byte
+ // aligned (VSWhidbey #373938). To minimize size impact of this optimization,
+ // we do not align large methods because of the penalty is amortized for them.
+ //
+ if (emitComp->fgHaveProfileData())
+ {
+ if (emitComp->fgCalledWeight > (BB_VERY_HOT_WEIGHT * emitComp->fgNumProfileRuns))
+ {
+ allocMemFlag = CORJIT_ALLOCMEM_FLG_16BYTE_ALIGN;
+ }
+ }
+ else
+ {
+ if (emitTotalHotCodeSize <= 16)
+ {
+ allocMemFlag = CORJIT_ALLOCMEM_FLG_16BYTE_ALIGN;
+ }
+ }
+#endif
+
+#ifdef _TARGET_ARM64_
+ // For arm64, we want to allocate JIT data always adjacent to code similar to what native compiler does.
+ // This way allows us to use a single `ldr` to access such data like float constant/jmp table.
+ if (emitTotalColdCodeSize > 0)
+ {
+ // JIT data might be far away from the cold code.
+ NYI_ARM64("Need to handle fix-up to data from cold code.");
+ }
+
+ UNATIVE_OFFSET roDataAlignmentDelta = 0;
+ if (emitConsDsc.dsdOffs)
+ {
+ UNATIVE_OFFSET roDataAlignment = sizeof(void*); // 8 Byte align by default.
+ roDataAlignmentDelta = (UNATIVE_OFFSET)ALIGN_UP(emitTotalHotCodeSize, roDataAlignment) - emitTotalHotCodeSize;
+ assert((roDataAlignmentDelta == 0) || (roDataAlignmentDelta == 4));
+ }
+ emitCmpHandle->allocMem(emitTotalHotCodeSize + roDataAlignmentDelta + emitConsDsc.dsdOffs, emitTotalColdCodeSize, 0,
+ xcptnsCount, allocMemFlag, (void**)&codeBlock, (void**)&coldCodeBlock, (void**)&consBlock);
+
+ consBlock = codeBlock + emitTotalHotCodeSize + roDataAlignmentDelta;
+
+#else
+ emitCmpHandle->allocMem(emitTotalHotCodeSize, emitTotalColdCodeSize, emitConsDsc.dsdOffs, xcptnsCount, allocMemFlag,
+ (void**)&codeBlock, (void**)&coldCodeBlock, (void**)&consBlock);
+#endif
+
+ // if (emitConsDsc.dsdOffs)
+ // printf("Cons=%08X\n", consBlock);
+
+ /* Give the block addresses to the caller and other functions here */
+
+ *codeAddr = emitCodeBlock = codeBlock;
+ *coldCodeAddr = emitColdCodeBlock = coldCodeBlock;
+ *consAddr = emitConsBlock = consBlock;
+
+ /* Nothing has been pushed on the stack */
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if EMIT_TRACK_STACK_DEPTH
+ emitCurStackLvl = 0;
+#endif
+
+ /* Assume no live GC ref variables on entry */
+
+ VarSetOps::ClearD(emitComp, emitThisGCrefVars); // This is initialized to Empty at the start of codegen.
+ emitThisGCrefRegs = emitThisByrefRegs = RBM_NONE;
+ emitThisGCrefVset = true;
+
+#ifdef DEBUG
+
+ emitIssuing = true;
+
+ // We don't use these after this point
+
+ VarSetOps::AssignNoCopy(emitComp, emitPrevGCrefVars, VarSetOps::UninitVal());
+ emitPrevGCrefRegs = emitPrevByrefRegs = 0xBAADFEED;
+
+ VarSetOps::AssignNoCopy(emitComp, emitInitGCrefVars, VarSetOps::UninitVal());
+ emitInitGCrefRegs = emitInitByrefRegs = 0xBAADFEED;
+
+#endif
+
+ /* Initialize the GC ref variable lifetime tracking logic */
+
+ codeGen->gcInfo.gcVarPtrSetInit();
+
+ emitSyncThisObjOffs = -1; /* -1 means no offset set */
+ emitSyncThisObjReg = REG_NA; /* REG_NA means not set */
+
+#ifdef JIT32_GCENCODER
+ if (emitComp->lvaKeepAliveAndReportThis())
+ {
+ assert(emitComp->lvaIsOriginalThisArg(0));
+ LclVarDsc* thisDsc = &emitComp->lvaTable[0];
+
+ /* If "this" (which is passed in as a register argument in REG_ARG_0)
+ is enregistered, we normally spot the "mov REG_ARG_0 -> thisReg"
+ in the prolog and note the location of "this" at that point.
+ However, if 'this' is enregistered into REG_ARG_0 itself, no code
+ will be generated in the prolog, so we explicitly need to note
+ the location of "this" here.
+ NOTE that we can do this even if "this" is not enregistered in
+ REG_ARG_0, and it will result in more accurate "this" info over the
+ prolog. However, as methods are not interruptible over the prolog,
+ we try to save space by avoiding that.
+ */
+
+ if (thisDsc->lvRegister)
+ {
+ emitSyncThisObjReg = thisDsc->lvRegNum;
+
+ if (emitSyncThisObjReg == (int)REG_ARG_0 &&
+ (codeGen->intRegState.rsCalleeRegArgMaskLiveIn & genRegMask(REG_ARG_0)))
+ {
+ if (emitFullGCinfo)
+ {
+ emitGCregLiveSet(GCT_GCREF, genRegMask(REG_ARG_0),
+ emitCodeBlock, // from offset 0
+ true);
+ }
+ else
+ {
+ /* If emitFullGCinfo==false, the we don't use any
+ regPtrDsc's and so explictly note the location
+ of "this" in GCEncode.cpp
+ */
+ }
+ }
+ }
+ }
+#endif // JIT32_GCENCODER
+
+ emitContTrkPtrLcls = contTrkPtrLcls;
+
+ /* Are there any GC ref variables on the stack? */
+
+ if (emitGCrFrameOffsCnt)
+ {
+ size_t siz;
+ unsigned cnt;
+ unsigned num;
+ LclVarDsc* dsc;
+ int* tab;
+
+ /* Allocate and clear emitGCrFrameLiveTab[]. This is the table
+ mapping "stkOffs -> varPtrDsc". It holds a pointer to
+ the liveness descriptor that was created when the
+ variable became alive. When the variable becomes dead, the
+ descriptor will be appended to the liveness descriptor list, and
+ the entry in emitGCrFrameLiveTab[] will be made NULL.
+
+ Note that if all GC refs are assigned consecutively,
+ emitGCrFrameLiveTab[] can be only as big as the number of GC refs
+ present, instead of lvaTrackedCount.
+ */
+
+ siz = emitGCrFrameOffsCnt * sizeof(*emitGCrFrameLiveTab);
+ emitGCrFrameLiveTab = (varPtrDsc**)emitGetMem(roundUp(siz));
+ memset(emitGCrFrameLiveTab, 0, siz);
+
+ /* Allocate and fill in emitGCrFrameOffsTab[]. This is the table
+ mapping "varIndex -> stkOffs".
+ Non-ptrs or reg vars have entries of -1.
+ Entries of Tracked stack byrefs have the lower bit set to 1.
+ */
+
+ emitTrkVarCnt = cnt = emitComp->lvaTrackedCount;
+ assert(cnt);
+ emitGCrFrameOffsTab = tab = (int*)emitGetMem(cnt * sizeof(int));
+
+ memset(emitGCrFrameOffsTab, -1, cnt * sizeof(int));
+
+ /* Now fill in all the actual used entries */
+
+ for (num = 0, dsc = emitComp->lvaTable, cnt = emitComp->lvaCount; num < cnt; num++, dsc++)
+ {
+ if (!dsc->lvOnFrame || (dsc->lvIsParam && !dsc->lvIsRegArg))
+ {
+ continue;
+ }
+
+#if FEATURE_FIXED_OUT_ARGS
+ if (num == emitComp->lvaOutgoingArgSpaceVar)
+ {
+ continue;
+ }
+#endif // FEATURE_FIXED_OUT_ARGS
+
+ int offs = dsc->lvStkOffs;
+
+ /* Is it within the interesting range of offsets */
+
+ if (offs >= emitGCrFrameOffsMin && offs < emitGCrFrameOffsMax)
+ {
+ /* Are tracked stack ptr locals laid out contiguously?
+ If not, skip non-ptrs. The emitter is optimized to work
+ with contiguous ptrs, but for EditNContinue, the variables
+ are laid out in the order they occur in the local-sig.
+ */
+
+ if (!emitContTrkPtrLcls)
+ {
+ if (!emitComp->lvaIsGCTracked(dsc))
+ {
+ continue;
+ }
+ }
+
+ unsigned indx = dsc->lvVarIndex;
+
+ assert(!dsc->lvRegister);
+ assert(dsc->lvTracked);
+ assert(dsc->lvRefCnt != 0);
+
+ assert(dsc->TypeGet() == TYP_REF || dsc->TypeGet() == TYP_BYREF);
+
+ assert(indx < emitComp->lvaTrackedCount);
+
+// printf("Variable #%2u/%2u is at stack offset %d\n", num, indx, offs);
+
+#ifdef JIT32_GCENCODER
+ /* Remember the frame offset of the "this" argument for synchronized methods */
+ if (emitComp->lvaIsOriginalThisArg(num) && emitComp->lvaKeepAliveAndReportThis())
+ {
+ emitSyncThisObjOffs = offs;
+ offs |= this_OFFSET_FLAG;
+ }
+#endif // JIT32_GCENCODER
+
+ if (dsc->TypeGet() == TYP_BYREF)
+ {
+ offs |= byref_OFFSET_FLAG;
+ }
+ tab[indx] = offs;
+ }
+ }
+ }
+ else
+ {
+#ifdef DEBUG
+ emitTrkVarCnt = 0;
+ emitGCrFrameOffsTab = nullptr;
+#endif
+ }
+
+#ifdef DEBUG
+ if (emitComp->verbose)
+ {
+ printf("\n***************************************************************************\n");
+ printf("Instructions as they come out of the scheduler\n\n");
+ }
+#endif
+
+ /* Issue all instruction groups in order */
+ cp = codeBlock;
+
+#define DEFAULT_CODE_BUFFER_INIT 0xcc
+
+ for (ig = emitIGlist; ig; ig = ig->igNext)
+ {
+ assert(!(ig->igFlags & IGF_PLACEHOLDER)); // There better not be any placeholder groups left
+
+ /* Is this the first cold block? */
+ if (ig == emitFirstColdIG)
+ {
+ unsigned actualHotCodeSize = emitCurCodeOffs(cp);
+
+ /* Fill in eventual unused space */
+ while (emitCurCodeOffs(cp) < emitTotalHotCodeSize)
+ {
+ *cp++ = DEFAULT_CODE_BUFFER_INIT;
+ }
+
+ assert(coldCodeBlock);
+ cp = coldCodeBlock;
+#ifdef DEBUG
+ if (emitComp->opts.disAsm || emitComp->opts.dspEmit || emitComp->verbose)
+ {
+ printf("\n************** Beginning of cold code **************\n");
+ }
+#endif
+ }
+
+ /* Are we overflowing? */
+ if (ig->igNext && ig->igNum + 1 != ig->igNext->igNum)
+ {
+ NO_WAY("Too many instruction groups");
+ }
+
+ // If this instruction group is returned to from a funclet implementing a finally,
+ // on architectures where it is necessary generate GC info for the current instruction as
+ // if it were the instruction following a call.
+ emitGenGCInfoIfFuncletRetTarget(ig, cp);
+
+ instrDesc* id = (instrDesc*)ig->igData;
+
+#ifdef DEBUG
+
+ /* Print the IG label, but only if it is a branch label */
+
+ if (emitComp->opts.disAsm || emitComp->opts.dspEmit || emitComp->verbose)
+ {
+ if (emitComp->verbose)
+ {
+ printf("\n");
+ emitDispIG(ig); // Display the flags, IG data, etc.
+ }
+ else
+ {
+ printf("\nG_M%03u_IG%02u:\n", Compiler::s_compMethodsCount, ig->igNum);
+ }
+ }
+
+#endif // DEBUG
+
+ BYTE* bp = cp;
+
+ /* Record the actual offset of the block, noting the difference */
+
+ emitOffsAdj = ig->igOffs - emitCurCodeOffs(cp);
+ assert(emitOffsAdj >= 0);
+
+#if DEBUG_EMIT
+ if ((emitOffsAdj != 0) && emitComp->verbose)
+ {
+ printf("Block predicted offs = %08X, actual = %08X -> size adj = %d\n", ig->igOffs, emitCurCodeOffs(cp),
+ emitOffsAdj);
+ }
+#endif // DEBUG_EMIT
+
+ ig->igOffs = emitCurCodeOffs(cp);
+ assert(IsCodeAligned(ig->igOffs));
+
+#if EMIT_TRACK_STACK_DEPTH
+
+ /* Set the proper stack level if appropriate */
+
+ if (ig->igStkLvl != emitCurStackLvl)
+ {
+ /* We are pushing stuff implicitly at this label */
+
+ assert((unsigned)ig->igStkLvl > (unsigned)emitCurStackLvl);
+ emitStackPushN(cp, (ig->igStkLvl - (unsigned)emitCurStackLvl) / sizeof(int));
+ }
+
+#endif
+
+ /* Update current GC information for non-overflow IG (not added implicitly by the emitter) */
+
+ if (!(ig->igFlags & IGF_EMIT_ADD))
+ {
+ /* Is there a new set of live GC ref variables? */
+
+ if (ig->igFlags & IGF_GC_VARS)
+ {
+ emitUpdateLiveGCvars(ig->igGCvars(), cp);
+ }
+ else if (!emitThisGCrefVset)
+ {
+ emitUpdateLiveGCvars(emitThisGCrefVars, cp);
+ }
+
+ /* Update the set of live GC ref registers */
+
+ {
+ regMaskTP GCregs = ig->igGCregs;
+
+ if (GCregs != emitThisGCrefRegs)
+ {
+ emitUpdateLiveGCregs(GCT_GCREF, GCregs, cp);
+ }
+ }
+
+ /* Is there a new set of live byref registers? */
+
+ if (ig->igFlags & IGF_BYREF_REGS)
+ {
+ unsigned byrefRegs = ig->igByrefRegs();
+
+ if (byrefRegs != emitThisByrefRegs)
+ {
+ emitUpdateLiveGCregs(GCT_BYREF, byrefRegs, cp);
+ }
+ }
+ }
+ else
+ {
+ // These are not set for "overflow" groups
+ assert(!(ig->igFlags & IGF_GC_VARS));
+ assert(!(ig->igFlags & IGF_BYREF_REGS));
+ }
+
+ /* Issue each instruction in order */
+
+ emitCurIG = ig;
+
+ for (unsigned cnt = ig->igInsCnt; cnt; cnt--)
+ {
+ castto(id, BYTE*) += emitIssue1Instr(ig, id, &cp);
+ }
+
+ emitCurIG = nullptr;
+
+ assert(ig->igSize >= cp - bp);
+ ig->igSize = (unsigned short)(cp - bp);
+ }
+
+#if EMIT_TRACK_STACK_DEPTH
+ assert(emitCurStackLvl == 0);
+#endif
+
+ /* Output any initialized data we may have */
+
+ if (emitConsDsc.dsdOffs)
+ {
+ emitOutputDataSec(&emitConsDsc, consBlock);
+ }
+
+ /* Make sure all GC ref variables are marked as dead */
+
+ if (emitGCrFrameOffsCnt)
+ {
+ unsigned vn;
+ int of;
+ varPtrDsc** dp;
+
+ for (vn = 0, of = emitGCrFrameOffsMin, dp = emitGCrFrameLiveTab; vn < emitGCrFrameOffsCnt;
+ vn++, of += sizeof(void*), dp++)
+ {
+ if (*dp)
+ {
+ emitGCvarDeadSet(of, cp, vn);
+ }
+ }
+ }
+
+ /* No GC registers are live any more */
+
+ if (emitThisByrefRegs)
+ {
+ emitUpdateLiveGCregs(GCT_BYREF, RBM_NONE, cp);
+ }
+ if (emitThisGCrefRegs)
+ {
+ emitUpdateLiveGCregs(GCT_GCREF, RBM_NONE, cp);
+ }
+
+ /* Patch any forward jumps */
+
+ if (emitFwdJumps)
+ {
+ instrDescJmp* jmp;
+
+ for (jmp = emitJumpList; jmp; jmp = jmp->idjNext)
+ {
+ insGroup* tgt;
+#ifdef _TARGET_XARCH_
+ assert(jmp->idInsFmt() == IF_LABEL || jmp->idInsFmt() == IF_RWR_LABEL || jmp->idInsFmt() == IF_SWR_LABEL);
+#endif
+ tgt = jmp->idAddr()->iiaIGlabel;
+
+ if (jmp->idjTemp.idjAddr == nullptr)
+ {
+ continue;
+ }
+
+ if (jmp->idjOffs != tgt->igOffs)
+ {
+ BYTE* adr = jmp->idjTemp.idjAddr;
+ int adj = jmp->idjOffs - tgt->igOffs;
+#ifdef _TARGET_ARM_
+ // On Arm, the offset is encoded in unit of 2 bytes.
+ adj >>= 1;
+#endif
+
+#if DEBUG_EMIT
+ if (jmp->idDebugOnlyInfo()->idNum == (unsigned)INTERESTING_JUMP_NUM || INTERESTING_JUMP_NUM == 0)
+ {
+#ifdef _TARGET_ARM_
+ printf("[5] This output is broken for ARM, since it doesn't properly decode the jump offsets of "
+ "the instruction at adr\n");
+#endif
+
+ if (INTERESTING_JUMP_NUM == 0)
+ {
+ printf("[5] Jump %u:\n", jmp->idDebugOnlyInfo()->idNum);
+ }
+
+ if (jmp->idjShort)
+ {
+ printf("[5] Jump is at %08X\n", (adr + 1 - emitCodeBlock));
+ printf("[5] Jump distance is %02X - %02X = %02X\n", *(BYTE*)adr, adj, *(BYTE*)adr - adj);
+ }
+ else
+ {
+ printf("[5] Jump is at %08X\n", (adr + 4 - emitCodeBlock));
+ printf("[5] Jump distance is %08X - %02X = %08X\n", *(int*)adr, adj, *(int*)adr - adj);
+ }
+ }
+#endif // DEBUG_EMIT
+
+ if (jmp->idjShort)
+ {
+ // Patch Forward Short Jump
+ CLANG_FORMAT_COMMENT_ANCHOR;
+#if defined(_TARGET_XARCH_)
+ *(BYTE*)adr -= (BYTE)adj;
+#elif defined(_TARGET_ARM_)
+ // The following works because the jump offset is in the low order bits of the instruction.
+ // Presumably we could also just call "emitOutputLJ(NULL, adr, jmp)", like for long jumps?
+ *(short int*)adr -= (short)adj;
+#elif defined(_TARGET_ARM64_)
+ assert(!jmp->idAddr()->iiaHasInstrCount());
+ emitOutputLJ(NULL, adr, jmp);
+#else
+#error Unsupported or unset target architecture
+#endif
+ }
+ else
+ {
+ // Patch Forward non-Short Jump
+ CLANG_FORMAT_COMMENT_ANCHOR;
+#if defined(_TARGET_XARCH_)
+ *(int*)adr -= adj;
+#elif defined(_TARGET_ARMARCH_)
+ assert(!jmp->idAddr()->iiaHasInstrCount());
+ emitOutputLJ(NULL, adr, jmp);
+#else
+#error Unsupported or unset target architecture
+#endif
+ }
+ }
+ }
+ }
+
+#ifdef DEBUG
+ if (emitComp->opts.disAsm)
+ {
+ printf("\n");
+ }
+
+ if (emitComp->verbose)
+ {
+ printf("Allocated method code size = %4u , actual size = %4u\n", emitTotalCodeSize, cp - codeBlock);
+ }
+#endif
+
+ unsigned actualCodeSize = emitCurCodeOffs(cp);
+
+ /* Fill in eventual unused space */
+ while (emitCurCodeOffs(cp) < emitTotalCodeSize)
+ {
+ *cp++ = DEFAULT_CODE_BUFFER_INIT;
+ }
+
+#if EMITTER_STATS
+ totAllocdSize += emitTotalCodeSize;
+ totActualSize += actualCodeSize;
+#endif
+
+#ifdef DEBUG
+
+ // Make sure these didn't change during the "issuing" phase
+
+ assert(VarSetOps::MayBeUninit(emitPrevGCrefVars));
+ assert(emitPrevGCrefRegs == 0xBAADFEED);
+ assert(emitPrevByrefRegs == 0xBAADFEED);
+
+ assert(VarSetOps::MayBeUninit(emitInitGCrefVars));
+ assert(emitInitGCrefRegs == 0xBAADFEED);
+ assert(emitInitByrefRegs == 0xBAADFEED);
+
+#endif
+
+ // Assign the real prolog size
+ *prologSize = emitCodeOffset(emitPrologIG, emitPrologEndPos);
+
+ /* Return the amount of code we've generated */
+
+ return actualCodeSize;
+}
+
+// See specification comment at the declaration.
+void emitter::emitGenGCInfoIfFuncletRetTarget(insGroup* ig, BYTE* cp)
+{
+#if FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
+ // We only emit this GC information on targets where finally's are implemented via funclets,
+ // and the finally is invoked, during non-exceptional execution, via a branch with a predefined
+ // link register, rather than a "true call" for which we would already generate GC info. Currently,
+ // this means precisely ARM.
+ if (ig->igFlags & IGF_FINALLY_TARGET)
+ {
+ // We don't actually have a call instruction in this case, so we don't have
+ // a real size for that instruction. We'll use 1.
+ emitStackPop(cp, /*isCall*/ true, /*callInstrSize*/ 1, /*args*/ 0);
+
+ /* Do we need to record a call location for GC purposes? */
+ if (!emitFullGCinfo)
+ {
+ emitRecordGCcall(cp, /*callInstrSize*/ 1);
+ }
+ }
+#endif // FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
+}
+
+/*****************************************************************************
+ *
+ * We have an instruction in an insGroup and we need to know the
+ * instruction number for this instruction
+ */
+
+unsigned emitter::emitFindInsNum(insGroup* ig, instrDesc* idMatch)
+{
+ instrDesc* id = (instrDesc*)ig->igData;
+
+ // Check if we are the first instruction in the group
+ if (id == idMatch)
+ {
+ return 0;
+ }
+
+ /* Walk the list of instructions until we find a match */
+ unsigned insNum = 0;
+ unsigned insRemaining = ig->igInsCnt;
+
+ while (insRemaining > 0)
+ {
+ castto(id, BYTE*) += emitSizeOfInsDsc(id);
+ insNum++;
+ insRemaining--;
+
+ if (id == idMatch)
+ {
+ return insNum;
+ }
+ }
+ assert(!"emitFindInsNum failed");
+ return -1;
+}
+
+/*****************************************************************************
+ *
+ * We've been asked for the code offset of an instruction but alas one or
+ * more instruction sizes in the block have been mis-predicted, so we have
+ * to find the true offset by looking for the instruction within the group.
+ */
+
+UNATIVE_OFFSET emitter::emitFindOffset(insGroup* ig, unsigned insNum)
+{
+ instrDesc* id = (instrDesc*)ig->igData;
+ UNATIVE_OFFSET of = 0;
+
+#ifdef DEBUG
+ /* Make sure we were passed reasonable arguments */
+ assert(ig && ig->igSelf == ig);
+ assert(ig->igInsCnt >= insNum);
+#endif
+
+ /* Walk the instruction list until all are counted */
+
+ while (insNum > 0)
+ {
+ of += emitInstCodeSz(id);
+
+ castto(id, BYTE*) += emitSizeOfInsDsc(id);
+
+ insNum--;
+ }
+
+ return of;
+}
+
+/*****************************************************************************
+ *
+ * Start generating a constant data section for the current
+ * function. Returns the offset of the section in the appropriate data
+ * block.
+ */
+
+UNATIVE_OFFSET emitter::emitDataGenBeg(UNATIVE_OFFSET size, bool dblAlign, bool codeLtab)
+{
+ unsigned secOffs;
+ dataSection* secDesc;
+
+ assert(emitDataSecCur == nullptr);
+
+ /* The size better not be some kind of an odd thing */
+
+ assert(size && size % sizeof(int) == 0);
+
+ /* Get hold of the current offset */
+
+ secOffs = emitConsDsc.dsdOffs;
+
+ /* Are we require to align this request on an eight byte boundry? */
+ if (dblAlign && (secOffs % sizeof(double) != 0))
+ {
+ /* Need to skip 4 bytes to honor dblAlign */
+ /* Must allocate a dummy 4 byte integer */
+ int zero = 0;
+ emitDataGenBeg(4, false, false);
+ emitDataGenData(0, &zero, 4);
+ emitDataGenEnd();
+
+ /* Get the new secOffs */
+ secOffs = emitConsDsc.dsdOffs;
+ /* Now it should be a multiple of 8 */
+ assert(secOffs % sizeof(double) == 0);
+ }
+
+ /* Advance the current offset */
+
+ emitConsDsc.dsdOffs += size;
+
+ /* Allocate a data section descriptor and add it to the list */
+
+ secDesc = emitDataSecCur = (dataSection*)emitGetMem(roundUp(sizeof(*secDesc) + size));
+
+ secDesc->dsSize = size;
+
+ secDesc->dsType = dataSection::data;
+
+ secDesc->dsNext = nullptr;
+
+ if (emitConsDsc.dsdLast)
+ {
+ emitConsDsc.dsdLast->dsNext = secDesc;
+ }
+ else
+ {
+ emitConsDsc.dsdList = secDesc;
+ }
+ emitConsDsc.dsdLast = secDesc;
+
+ return secOffs;
+}
+
+// Start generating a constant data section for the current function
+// populated with BasicBlock references.
+// You can choose the references to be either absolute pointers, or
+// 4-byte relative addresses.
+// Currently the relative references are relative to the start of the
+// first block (this is somewhat arbitrary)
+
+UNATIVE_OFFSET emitter::emitBBTableDataGenBeg(unsigned numEntries, bool relativeAddr)
+{
+ unsigned secOffs;
+ dataSection* secDesc;
+
+ assert(emitDataSecCur == nullptr);
+
+ UNATIVE_OFFSET emittedSize;
+
+ if (relativeAddr)
+ {
+ emittedSize = numEntries * 4;
+ }
+ else
+ {
+ emittedSize = numEntries * TARGET_POINTER_SIZE;
+ }
+
+ /* Get hold of the current offset */
+
+ secOffs = emitConsDsc.dsdOffs;
+
+ /* Advance the current offset */
+
+ emitConsDsc.dsdOffs += emittedSize;
+
+ /* Allocate a data section descriptor and add it to the list */
+
+ secDesc = emitDataSecCur = (dataSection*)emitGetMem(roundUp(sizeof(*secDesc) + numEntries * sizeof(BasicBlock*)));
+
+ secDesc->dsSize = emittedSize;
+
+ secDesc->dsType = relativeAddr ? dataSection::blockRelative32 : dataSection::blockAbsoluteAddr;
+
+ secDesc->dsNext = nullptr;
+
+ if (emitConsDsc.dsdLast)
+ {
+ emitConsDsc.dsdLast->dsNext = secDesc;
+ }
+ else
+ {
+ emitConsDsc.dsdList = secDesc;
+ }
+
+ emitConsDsc.dsdLast = secDesc;
+
+ return secOffs;
+}
+
+/*****************************************************************************
+ *
+ * Emit the given block of bits into the current data section.
+ */
+
+void emitter::emitDataGenData(unsigned offs, const void* data, size_t size)
+{
+ assert(emitDataSecCur && (emitDataSecCur->dsSize >= offs + size));
+
+ assert(emitDataSecCur->dsType == dataSection::data);
+
+ memcpy(emitDataSecCur->dsCont + offs, data, size);
+}
+
+/*****************************************************************************
+ *
+ * Emit the address of the given basic block into the current data section.
+ */
+
+void emitter::emitDataGenData(unsigned index, BasicBlock* label)
+{
+ assert(emitDataSecCur != nullptr);
+ assert(emitDataSecCur->dsType == dataSection::blockAbsoluteAddr ||
+ emitDataSecCur->dsType == dataSection::blockRelative32);
+
+ unsigned emittedElemSize = emitDataSecCur->dsType == dataSection::blockAbsoluteAddr ? TARGET_POINTER_SIZE : 4;
+
+ assert(emitDataSecCur->dsSize >= emittedElemSize * (index + 1));
+
+ ((BasicBlock**)(emitDataSecCur->dsCont))[index] = label;
+}
+
+/*****************************************************************************
+ *
+ * We're done generating a data section.
+ */
+
+void emitter::emitDataGenEnd()
+{
+
+#ifdef DEBUG
+ assert(emitDataSecCur);
+ emitDataSecCur = nullptr;
+#endif
+}
+
+/********************************************************************************
+ * Generates a data section constant
+ *
+ * Parameters:
+ * cnsAddr - memory location containing constant value
+ * cnsSize - size of constant in bytes
+ * dblAlign - whether to double align the data section constant
+ *
+ * Returns constant number as offset into data section.
+ */
+UNATIVE_OFFSET emitter::emitDataConst(const void* cnsAddr, unsigned cnsSize, bool dblAlign)
+{
+ // When generating SMALL_CODE, we don't bother with dblAlign
+ if (dblAlign && (emitComp->compCodeOpt() == Compiler::SMALL_CODE))
+ {
+ dblAlign = false;
+ }
+
+ UNATIVE_OFFSET cnum = emitDataGenBeg(cnsSize, dblAlign, false);
+ emitDataGenData(0, cnsAddr, cnsSize);
+ emitDataGenEnd();
+
+ return cnum;
+}
+
+/*****************************************************************************
+ *
+ * Output the given data section at the specified address.
+ */
+
+void emitter::emitOutputDataSec(dataSecDsc* sec, BYTE* dst)
+{
+#ifdef DEBUG
+ if (EMITVERBOSE)
+ {
+ printf("\nEmitting data sections: %u total bytes\n", sec->dsdOffs);
+ }
+
+ unsigned secNum = 0;
+#endif
+
+ assert(dst);
+ assert(sec->dsdOffs);
+ assert(sec->dsdList);
+
+ /* Walk and emit the contents of all the data blocks */
+
+ dataSection* dsc;
+
+ for (dsc = sec->dsdList; dsc; dsc = dsc->dsNext)
+ {
+ size_t dscSize = dsc->dsSize;
+
+ // absolute label table
+ if (dsc->dsType == dataSection::blockAbsoluteAddr)
+ {
+ JITDUMP(" section %u, size %u, block absolute addr\n", secNum++, dscSize);
+
+ assert(dscSize && dscSize % sizeof(BasicBlock*) == 0);
+ size_t numElems = dscSize / TARGET_POINTER_SIZE;
+ BYTE** bDst = (BYTE**)dst;
+ for (unsigned i = 0; i < numElems; i++)
+ {
+ BasicBlock* block = ((BasicBlock**)dsc->dsCont)[i];
+
+ // Convert the BasicBlock* value to an IG address
+ insGroup* lab = (insGroup*)emitCodeGetCookie(block);
+
+ // Append the appropriate address to the destination
+ BYTE* target = emitOffsetToPtr(lab->igOffs);
+
+#ifdef _TARGET_ARM_
+ target = (BYTE*)((size_t)target | 1); // Or in thumb bit
+#endif
+ bDst[i] = target;
+ if (emitComp->opts.compReloc)
+ {
+ emitRecordRelocation(&(bDst[i]), target, IMAGE_REL_BASED_HIGHLOW);
+ }
+
+ JITDUMP(" BB%02u: 0x%p\n", block->bbNum, bDst[i]);
+ }
+ }
+ // relative label table
+ else if (dsc->dsType == dataSection::blockRelative32)
+ {
+ JITDUMP(" section %u, size %u, block relative addr\n", secNum++, dscSize);
+
+ unsigned elemSize = 4;
+ size_t numElems = dscSize / 4;
+ unsigned* uDst = (unsigned*)dst;
+ insGroup* labFirst = (insGroup*)emitCodeGetCookie(emitComp->fgFirstBB);
+
+ for (unsigned i = 0; i < numElems; i++)
+ {
+ BasicBlock* block = ((BasicBlock**)dsc->dsCont)[i];
+
+ // Convert the BasicBlock* value to an IG address
+ insGroup* lab = (insGroup*)emitCodeGetCookie(block);
+
+ assert(FitsIn<uint32_t>(lab->igOffs - labFirst->igOffs));
+ uDst[i] = lab->igOffs - labFirst->igOffs;
+
+ JITDUMP(" BB%02u: 0x%x\n", block->bbNum, uDst[i]);
+ }
+ }
+ else
+ {
+ JITDUMP(" section %u, size %u, raw data\n", secNum++, dscSize);
+
+ // Simple binary data: copy the bytes to the target
+ assert(dsc->dsType == dataSection::data);
+
+ memcpy(dst, dsc->dsCont, dscSize);
+
+#ifdef DEBUG
+ if (EMITVERBOSE)
+ {
+ printf(" ");
+ for (size_t i = 0; i < dscSize; i++)
+ {
+ printf("%02x ", dsc->dsCont[i]);
+ if ((((i + 1) % 16) == 0) && (i + 1 != dscSize))
+ {
+ printf("\n ");
+ }
+ }
+ printf("\n");
+ }
+#endif // DEBUG
+ }
+ dst += dscSize;
+ }
+}
+
+/*****************************************************************************/
+/*****************************************************************************
+ *
+ * Record the fact that the given variable now contains a live GC ref.
+ */
+
+void emitter::emitGCvarLiveSet(int offs, GCtype gcType, BYTE* addr, ssize_t disp)
+{
+ assert(emitIssuing);
+
+ varPtrDsc* desc;
+
+ assert((abs(offs) % sizeof(ssize_t)) == 0);
+ assert(needsGC(gcType));
+
+ /* Compute the index into the GC frame table if the caller didn't do it */
+
+ if (disp == -1)
+ {
+ disp = (offs - emitGCrFrameOffsMin) / sizeof(void*);
+ }
+
+ assert((size_t)disp < emitGCrFrameOffsCnt);
+
+ /* Allocate a lifetime record */
+
+ desc = new (emitComp, CMK_GC) varPtrDsc;
+
+ desc->vpdBegOfs = emitCurCodeOffs(addr);
+#ifdef DEBUG
+ desc->vpdEndOfs = 0xFACEDEAD;
+#endif
+
+ desc->vpdVarNum = offs;
+
+ desc->vpdNext = nullptr;
+
+ /* the lower 2 bits encode props about the stk ptr */
+
+ if (offs == emitSyncThisObjOffs)
+ {
+ desc->vpdVarNum |= this_OFFSET_FLAG;
+ }
+
+ if (gcType == GCT_BYREF)
+ {
+ desc->vpdVarNum |= byref_OFFSET_FLAG;
+ }
+
+ /* Append the new entry to the end of the list */
+ if (codeGen->gcInfo.gcVarPtrLast == nullptr)
+ {
+ assert(codeGen->gcInfo.gcVarPtrList == nullptr);
+ codeGen->gcInfo.gcVarPtrList = codeGen->gcInfo.gcVarPtrLast = desc;
+ }
+ else
+ {
+ assert(codeGen->gcInfo.gcVarPtrList != nullptr);
+ codeGen->gcInfo.gcVarPtrLast->vpdNext = desc;
+ codeGen->gcInfo.gcVarPtrLast = desc;
+ }
+
+ /* Record the variable descriptor in the table */
+
+ assert(emitGCrFrameLiveTab[disp] == nullptr);
+ emitGCrFrameLiveTab[disp] = desc;
+
+#ifdef DEBUG
+ if (EMITVERBOSE)
+ {
+ printf("[%08X] %s var born at [%s", dspPtr(desc), GCtypeStr(gcType), emitGetFrameReg());
+
+ if (offs < 0)
+ {
+ printf("-%02XH", -offs);
+ }
+ else if (offs > 0)
+ {
+ printf("+%02XH", +offs);
+ }
+
+ printf("]\n");
+ }
+#endif
+
+ /* The "global" live GC variable mask is no longer up-to-date */
+
+ emitThisGCrefVset = false;
+}
+
+/*****************************************************************************
+ *
+ * Record the fact that the given variable no longer contains a live GC ref.
+ */
+
+void emitter::emitGCvarDeadSet(int offs, BYTE* addr, ssize_t disp)
+{
+ assert(emitIssuing);
+
+ varPtrDsc* desc;
+
+ assert(abs(offs) % sizeof(int) == 0);
+
+ /* Compute the index into the GC frame table if the caller didn't do it */
+
+ if (disp == -1)
+ {
+ disp = (offs - emitGCrFrameOffsMin) / sizeof(void*);
+ }
+
+ assert((unsigned)disp < emitGCrFrameOffsCnt);
+
+ /* Get hold of the lifetime descriptor and clear the entry */
+
+ desc = emitGCrFrameLiveTab[disp];
+ emitGCrFrameLiveTab[disp] = nullptr;
+
+ assert(desc);
+ assert((desc->vpdVarNum & ~OFFSET_MASK) == (unsigned)offs);
+
+ /* Record the death code offset */
+
+ assert(desc->vpdEndOfs == 0xFACEDEAD);
+ desc->vpdEndOfs = emitCurCodeOffs(addr);
+
+#ifdef DEBUG
+ if (EMITVERBOSE)
+ {
+ GCtype gcType = (desc->vpdVarNum & byref_OFFSET_FLAG) ? GCT_BYREF : GCT_GCREF;
+ bool isThis = (desc->vpdVarNum & this_OFFSET_FLAG) != 0;
+
+ printf("[%08X] %s%s var died at [%s", dspPtr(desc), GCtypeStr(gcType), isThis ? "this-ptr" : "",
+ emitGetFrameReg());
+
+ if (offs < 0)
+ {
+ printf("-%02XH", -offs);
+ }
+ else if (offs > 0)
+ {
+ printf("+%02XH", +offs);
+ }
+
+ printf("]\n");
+ }
+#endif
+
+ /* The "global" live GC variable mask is no longer up-to-date */
+
+ emitThisGCrefVset = false;
+}
+
+/*****************************************************************************
+ *
+ * Record a new set of live GC ref variables.
+ */
+
+void emitter::emitUpdateLiveGCvars(VARSET_VALARG_TP vars, BYTE* addr)
+{
+ assert(emitIssuing);
+
+ // Don't track GC changes in epilogs
+ if (emitIGisInEpilog(emitCurIG))
+ {
+ return;
+ }
+
+ /* Is the current set accurate and unchanged? */
+
+ if (emitThisGCrefVset && VarSetOps::Equal(emitComp, emitThisGCrefVars, vars))
+ {
+ return;
+ }
+
+#ifdef DEBUG
+ if (EMIT_GC_VERBOSE)
+ {
+ printf("New GC ref live vars=%s ", VarSetOps::ToString(emitComp, vars));
+ dumpConvertedVarSet(emitComp, vars);
+ printf("\n");
+ }
+#endif
+
+ VarSetOps::Assign(emitComp, emitThisGCrefVars, vars);
+
+ /* Are there any GC ref variables on the stack? */
+
+ if (emitGCrFrameOffsCnt)
+ {
+ int* tab;
+ unsigned cnt = emitTrkVarCnt;
+ unsigned num;
+
+ /* Test all the tracked variable bits in the mask */
+
+ for (num = 0, tab = emitGCrFrameOffsTab; num < cnt; num++, tab++)
+ {
+ int val = *tab;
+
+ if (val != -1)
+ {
+ // byref_OFFSET_FLAG and this_OFFSET_FLAG are set
+ // in the table-offsets for byrefs and this-ptr
+
+ int offs = val & ~OFFSET_MASK;
+
+ // printf("var #%2u at %3d is now %s\n", num, offs, (vars & 1) ? "live" : "dead");
+
+ if (VarSetOps::IsMember(emitComp, vars, num))
+ {
+ GCtype gcType = (val & byref_OFFSET_FLAG) ? GCT_BYREF : GCT_GCREF;
+ emitGCvarLiveUpd(offs, INT_MAX, gcType, addr);
+ }
+ else
+ {
+ emitGCvarDeadUpd(offs, addr);
+ }
+ }
+ }
+ }
+
+ emitThisGCrefVset = true;
+}
+
+/*****************************************************************************
+ *
+ * Record a call location for GC purposes (we know that this is a method that
+ * will not be fully interruptible).
+ */
+
+void emitter::emitRecordGCcall(BYTE* codePos, unsigned char callInstrSize)
+{
+ assert(emitIssuing);
+ assert(!emitFullGCinfo);
+
+ unsigned offs = emitCurCodeOffs(codePos);
+ unsigned regs = (emitThisGCrefRegs | emitThisByrefRegs) & ~RBM_INTRET;
+ callDsc* call;
+
+#ifdef JIT32_GCENCODER
+ // The JIT32 GCInfo encoder allows us to (as the comment previously here said):
+ // "Bail if this is a totally boring call", but the GCInfoEncoder/Decoder interface
+ // requires a definition for every call site, so we skip these "early outs" when we're
+ // using the general encoder.
+ if (regs == 0)
+ {
+#if EMIT_TRACK_STACK_DEPTH
+ if (emitCurStackLvl == 0)
+ return;
+#endif
+ /* Nope, only interesting calls get recorded */
+
+ if (emitSimpleStkUsed)
+ {
+ if (!u1.emitSimpleStkMask)
+ return;
+ }
+ else
+ {
+ if (u2.emitGcArgTrackCnt == 0)
+ return;
+ }
+ }
+#endif // JIT32_GCENCODER
+
+#ifdef DEBUG
+
+ if (EMIT_GC_VERBOSE)
+ {
+ printf("; Call at %04X [stk=%u], GCvars=", offs - callInstrSize, emitCurStackLvl);
+ emitDispVarSet();
+ printf(", gcrefRegs=");
+ printRegMaskInt(emitThisGCrefRegs);
+ emitDispRegSet(emitThisGCrefRegs);
+ // printRegMaskInt(emitThisGCrefRegs & ~RBM_INTRET & RBM_CALLEE_SAVED); // only display callee-saved
+ // emitDispRegSet (emitThisGCrefRegs & ~RBM_INTRET & RBM_CALLEE_SAVED); // only display callee-saved
+ printf(", byrefRegs=");
+ printRegMaskInt(emitThisByrefRegs);
+ emitDispRegSet(emitThisByrefRegs);
+ // printRegMaskInt(emitThisByrefRegs & ~RBM_INTRET & RBM_CALLEE_SAVED); // only display callee-saved
+ // emitDispRegSet (emitThisByrefRegs & ~RBM_INTRET & RBM_CALLEE_SAVED); // only display callee-saved
+ printf("\n");
+ }
+
+#endif
+
+ /* Allocate a 'call site' descriptor and start filling it in */
+
+ call = new (emitComp, CMK_GC) callDsc;
+
+ call->cdBlock = nullptr;
+ call->cdOffs = offs;
+#ifndef JIT32_GCENCODER
+ call->cdCallInstrSize = callInstrSize;
+#endif
+ call->cdNext = nullptr;
+
+ call->cdGCrefRegs = (regMaskSmall)emitThisGCrefRegs;
+ call->cdByrefRegs = (regMaskSmall)emitThisByrefRegs;
+
+#if EMIT_TRACK_STACK_DEPTH
+#ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ noway_assert(FitsIn<USHORT>(emitCurStackLvl / ((unsigned)sizeof(unsigned))));
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+#endif
+
+ // Append the call descriptor to the list */
+ if (codeGen->gcInfo.gcCallDescLast == nullptr)
+ {
+ assert(codeGen->gcInfo.gcCallDescList == nullptr);
+ codeGen->gcInfo.gcCallDescList = codeGen->gcInfo.gcCallDescLast = call;
+ }
+ else
+ {
+ assert(codeGen->gcInfo.gcCallDescList != nullptr);
+ codeGen->gcInfo.gcCallDescLast->cdNext = call;
+ codeGen->gcInfo.gcCallDescLast = call;
+ }
+
+ /* Record the current "pending" argument list */
+
+ if (emitSimpleStkUsed)
+ {
+ /* The biggest call is less than MAX_SIMPLE_STK_DEPTH. So use
+ small format */
+
+ call->u1.cdArgMask = u1.emitSimpleStkMask;
+ call->u1.cdByrefArgMask = u1.emitSimpleByrefStkMask;
+ call->cdArgCnt = 0;
+ }
+ else
+ {
+ /* The current call has too many arguments, so we need to report the
+ offsets of each individual GC arg. */
+
+ call->cdArgCnt = u2.emitGcArgTrackCnt;
+ if (call->cdArgCnt == 0)
+ {
+ call->u1.cdArgMask = call->u1.cdByrefArgMask = 0;
+ return;
+ }
+
+ call->cdArgTable = new (emitComp, CMK_GC) unsigned[u2.emitGcArgTrackCnt];
+
+ unsigned gcArgs = 0;
+ unsigned stkLvl = emitCurStackLvl / sizeof(int);
+
+ for (unsigned i = 0; i < stkLvl; i++)
+ {
+ GCtype gcType = (GCtype)u2.emitArgTrackTab[stkLvl - i - 1];
+
+ if (needsGC(gcType))
+ {
+ call->cdArgTable[gcArgs] = i * sizeof(void*);
+
+ if (gcType == GCT_BYREF)
+ {
+ call->cdArgTable[gcArgs] |= byref_OFFSET_FLAG;
+ }
+
+ gcArgs++;
+ }
+ }
+
+ assert(gcArgs == u2.emitGcArgTrackCnt);
+ }
+}
+
+/*****************************************************************************
+ *
+ * Record a new set of live GC ref registers.
+ */
+
+void emitter::emitUpdateLiveGCregs(GCtype gcType, regMaskTP regs, BYTE* addr)
+{
+ assert(emitIssuing);
+
+ // Don't track GC changes in epilogs
+ if (emitIGisInEpilog(emitCurIG))
+ {
+ return;
+ }
+
+ regMaskTP life;
+ regMaskTP dead;
+ regMaskTP chg;
+
+#ifdef DEBUG
+ if (EMIT_GC_VERBOSE)
+ {
+ printf("New %sReg live regs=", GCtypeStr(gcType));
+ printRegMaskInt(regs);
+ emitDispRegSet(regs);
+ printf("\n");
+ }
+#endif
+
+ assert(needsGC(gcType));
+
+ regMaskTP& emitThisXXrefRegs = (gcType == GCT_GCREF) ? emitThisGCrefRegs : emitThisByrefRegs;
+ regMaskTP& emitThisYYrefRegs = (gcType == GCT_GCREF) ? emitThisByrefRegs : emitThisGCrefRegs;
+ assert(emitThisXXrefRegs != regs);
+
+ if (emitFullGCinfo)
+ {
+ /* Figure out which GC registers are becoming live/dead at this point */
+
+ dead = (emitThisXXrefRegs & ~regs);
+ life = (~emitThisXXrefRegs & regs);
+
+ /* Can't simultaneously become live and dead at the same time */
+
+ assert((dead | life) != 0);
+ assert((dead & life) == 0);
+
+ /* Compute the 'changing state' mask */
+
+ chg = (dead | life);
+
+ do
+ {
+ regMaskTP bit = genFindLowestBit(chg);
+ regNumber reg = genRegNumFromMask(bit);
+
+ if (life & bit)
+ {
+ emitGCregLiveUpd(gcType, reg, addr);
+ }
+ else
+ {
+ emitGCregDeadUpd(reg, addr);
+ }
+
+ chg -= bit;
+ } while (chg);
+
+ assert(emitThisXXrefRegs == regs);
+ }
+ else
+ {
+ emitThisYYrefRegs &= ~regs; // Kill the regs from the other GC type (if live)
+ emitThisXXrefRegs = regs; // Mark them as live in the requested GC type
+ }
+
+ // The 2 GC reg masks can't be overlapping
+
+ assert((emitThisGCrefRegs & emitThisByrefRegs) == 0);
+}
+
+/*****************************************************************************
+ *
+ * Record the fact that the given register now contains a live GC ref.
+ */
+
+void emitter::emitGCregLiveSet(GCtype gcType, regMaskTP regMask, BYTE* addr, bool isThis)
+{
+ assert(emitIssuing);
+ assert(needsGC(gcType));
+
+ regPtrDsc* regPtrNext;
+
+ assert(!isThis || emitComp->lvaKeepAliveAndReportThis());
+ // assert(emitFullyInt || isThis);
+ assert(emitFullGCinfo);
+
+ assert(((emitThisGCrefRegs | emitThisByrefRegs) & regMask) == 0);
+
+ /* Allocate a new regptr entry and fill it in */
+
+ regPtrNext = codeGen->gcInfo.gcRegPtrAllocDsc();
+ regPtrNext->rpdGCtype = gcType;
+
+ regPtrNext->rpdOffs = emitCurCodeOffs(addr);
+ regPtrNext->rpdArg = FALSE;
+ regPtrNext->rpdCall = FALSE;
+ regPtrNext->rpdIsThis = isThis;
+ regPtrNext->rpdCompiler.rpdAdd = (regMaskSmall)regMask;
+ regPtrNext->rpdCompiler.rpdDel = 0;
+}
+
+/*****************************************************************************
+ *
+ * Record the fact that the given register no longer contains a live GC ref.
+ */
+
+void emitter::emitGCregDeadSet(GCtype gcType, regMaskTP regMask, BYTE* addr)
+{
+ assert(emitIssuing);
+ assert(needsGC(gcType));
+
+ regPtrDsc* regPtrNext;
+
+ // assert(emitFullyInt);
+ assert(emitFullGCinfo);
+
+ assert(((emitThisGCrefRegs | emitThisByrefRegs) & regMask) != 0);
+
+ /* Allocate a new regptr entry and fill it in */
+
+ regPtrNext = codeGen->gcInfo.gcRegPtrAllocDsc();
+ regPtrNext->rpdGCtype = gcType;
+
+ regPtrNext->rpdOffs = emitCurCodeOffs(addr);
+ regPtrNext->rpdCall = FALSE;
+ regPtrNext->rpdIsThis = FALSE;
+ regPtrNext->rpdArg = FALSE;
+ regPtrNext->rpdCompiler.rpdAdd = 0;
+ regPtrNext->rpdCompiler.rpdDel = (regMaskSmall)regMask;
+}
+
+/*****************************************************************************
+ *
+ * Emit an 8-bit integer as code.
+ */
+
+unsigned char emitter::emitOutputByte(BYTE* dst, ssize_t val)
+{
+ *castto(dst, unsigned char*) = (unsigned char)val;
+
+#ifdef DEBUG
+ if (emitComp->opts.dspEmit)
+ {
+ printf("; emit_byte 0%02XH\n", val & 0xFF);
+ }
+#ifdef _TARGET_AMD64_
+ // if we're emitting code bytes, ensure that we've already emitted the rex prefix!
+ assert(((val & 0xFF00000000LL) == 0) || ((val & 0xFFFFFFFF00000000LL) == 0xFFFFFFFF00000000LL));
+#endif // _TARGET_AMD64_
+#endif
+
+ return sizeof(unsigned char);
+}
+
+/*****************************************************************************
+ *
+ * Emit a 16-bit integer as code.
+ */
+
+unsigned char emitter::emitOutputWord(BYTE* dst, ssize_t val)
+{
+ MISALIGNED_WR_I2(dst, (short)val);
+
+#ifdef DEBUG
+ if (emitComp->opts.dspEmit)
+ {
+ printf("; emit_word 0%02XH,0%02XH\n", (val & 0xFF), (val >> 8) & 0xFF);
+ }
+#ifdef _TARGET_AMD64_
+ // if we're emitting code bytes, ensure that we've already emitted the rex prefix!
+ assert(((val & 0xFF00000000LL) == 0) || ((val & 0xFFFFFFFF00000000LL) == 0xFFFFFFFF00000000LL));
+#endif // _TARGET_AMD64_
+#endif
+
+ return sizeof(short);
+}
+
+/*****************************************************************************
+ *
+ * Emit a 32-bit integer as code.
+ */
+
+unsigned char emitter::emitOutputLong(BYTE* dst, ssize_t val)
+{
+ MISALIGNED_WR_I4(dst, (int)val);
+
+#ifdef DEBUG
+ if (emitComp->opts.dspEmit)
+ {
+ printf("; emit_long 0%08XH\n", val);
+ }
+#ifdef _TARGET_AMD64_
+ // if we're emitting code bytes, ensure that we've already emitted the rex prefix!
+ assert(((val & 0xFF00000000LL) == 0) || ((val & 0xFFFFFFFF00000000LL) == 0xFFFFFFFF00000000LL));
+#endif // _TARGET_AMD64_
+#endif
+
+ return sizeof(int);
+}
+
+/*****************************************************************************
+ *
+ * Emit a pointer-sized integer as code.
+ */
+
+unsigned char emitter::emitOutputSizeT(BYTE* dst, ssize_t val)
+{
+ MISALIGNED_WR_ST(dst, val);
+
+#ifdef DEBUG
+ if (emitComp->opts.dspEmit)
+ {
+#ifdef _TARGET_AMD64_
+ printf("; emit_size_t 0%016llXH\n", (size_t)val);
+#else // _TARGET_AMD64_
+ printf("; emit_size_t 0%08XH\n", (size_t)val);
+#endif // _TARGET_AMD64_
+ }
+#endif // DEBUG
+
+ return sizeof(size_t);
+}
+
+/*****************************************************************************
+ *
+ * Given a block cookie and a code position, return the actual code offset;
+ * this can only be called at the end of code generation.
+ */
+
+UNATIVE_OFFSET emitter::emitCodeOffset(void* blockPtr, unsigned codePos)
+{
+ insGroup* ig;
+
+ UNATIVE_OFFSET of;
+ unsigned no = emitGetInsNumFromCodePos(codePos);
+
+ /* Make sure we weren't passed some kind of a garbage thing */
+
+ ig = (insGroup*)blockPtr;
+#ifdef DEBUG
+ assert(ig && ig->igSelf == ig);
+#endif
+
+ /* The first and last offsets are always easy */
+
+ if (no == 0)
+ {
+ of = 0;
+ }
+ else if (no == ig->igInsCnt)
+ {
+ of = ig->igSize;
+ }
+ else if (ig->igFlags & IGF_UPD_ISZ)
+ {
+ /*
+ Some instruction sizes have changed, so we'll have to figure
+ out the instruction offset "the hard way".
+ */
+
+ of = emitFindOffset(ig, no);
+ }
+ else
+ {
+ /* All instructions correctly predicted, the offset stays the same */
+
+ of = emitGetInsOfsFromCodePos(codePos);
+
+ // printf("[IG=%02u;ID=%03u;OF=%04X] <= %08X\n", ig->igNum, emitGetInsNumFromCodePos(codePos), of, codePos);
+
+ /* Make sure the offset estimate is accurate */
+
+ assert(of == emitFindOffset(ig, emitGetInsNumFromCodePos(codePos)));
+ }
+
+ return ig->igOffs + of;
+}
+
+/*****************************************************************************
+ *
+ * Record the fact that the given register now contains a live GC ref.
+ */
+
+void emitter::emitGCregLiveUpd(GCtype gcType, regNumber reg, BYTE* addr)
+{
+ assert(emitIssuing);
+
+ // Don't track GC changes in epilogs
+ if (emitIGisInEpilog(emitCurIG))
+ {
+ return;
+ }
+
+ assert(needsGC(gcType));
+
+ regMaskTP regMask = genRegMask(reg);
+
+ regMaskTP& emitThisXXrefRegs = (gcType == GCT_GCREF) ? emitThisGCrefRegs : emitThisByrefRegs;
+ regMaskTP& emitThisYYrefRegs = (gcType == GCT_GCREF) ? emitThisByrefRegs : emitThisGCrefRegs;
+
+ if ((emitThisXXrefRegs & regMask) == 0)
+ {
+ // If the register was holding the other GC type, that type should
+ // go dead now
+
+ if (emitThisYYrefRegs & regMask)
+ {
+ emitGCregDeadUpd(reg, addr);
+ }
+
+ // For synchronized methods, "this" is always alive and in the same register.
+ // However, if we generate any code after the epilog block (where "this"
+ // goes dead), "this" will come alive again. We need to notice that.
+ // Note that we only expect isThis to be true at an insGroup boundary.
+
+ bool isThis = (reg == emitSyncThisObjReg) ? true : false;
+
+ if (emitFullGCinfo)
+ {
+ emitGCregLiveSet(gcType, regMask, addr, isThis);
+ }
+
+ emitThisXXrefRegs |= regMask;
+
+#ifdef DEBUG
+ if (EMIT_GC_VERBOSE)
+ {
+ printf("%sReg +[%s]\n", GCtypeStr(gcType), emitRegName(reg));
+ }
+#endif
+ }
+
+ // The 2 GC reg masks can't be overlapping
+
+ assert((emitThisGCrefRegs & emitThisByrefRegs) == 0);
+}
+
+/*****************************************************************************
+ *
+ * Record the fact that the given set of registers no longer contain live GC refs.
+ */
+
+void emitter::emitGCregDeadUpdMask(regMaskTP regs, BYTE* addr)
+{
+ assert(emitIssuing);
+
+ // Don't track GC changes in epilogs
+ if (emitIGisInEpilog(emitCurIG))
+ {
+ return;
+ }
+
+ // First, handle the gcref regs going dead
+
+ regMaskTP gcrefRegs = emitThisGCrefRegs & regs;
+
+ // "this" can never go dead in synchronized methods, except in the epilog
+ // after the call to CORINFO_HELP_MON_EXIT.
+ assert(emitSyncThisObjReg == REG_NA || (genRegMask(emitSyncThisObjReg) & regs) == 0);
+
+ if (gcrefRegs)
+ {
+ assert((emitThisByrefRegs & gcrefRegs) == 0);
+
+ if (emitFullGCinfo)
+ {
+ emitGCregDeadSet(GCT_GCREF, gcrefRegs, addr);
+ }
+
+ emitThisGCrefRegs &= ~gcrefRegs;
+
+#ifdef DEBUG
+ if (EMIT_GC_VERBOSE)
+ {
+ printf("gcrReg ");
+ printRegMaskInt(gcrefRegs);
+ printf(" -");
+ emitDispRegSet(gcrefRegs);
+ printf("\n");
+ }
+#endif
+ }
+
+ // Second, handle the byref regs going dead
+
+ regMaskTP byrefRegs = emitThisByrefRegs & regs;
+
+ if (byrefRegs)
+ {
+ assert((emitThisGCrefRegs & byrefRegs) == 0);
+
+ if (emitFullGCinfo)
+ {
+ emitGCregDeadSet(GCT_BYREF, byrefRegs, addr);
+ }
+
+ emitThisByrefRegs &= ~byrefRegs;
+
+#ifdef DEBUG
+ if (EMIT_GC_VERBOSE)
+ {
+ printf("byrReg ");
+ printRegMaskInt(byrefRegs);
+ printf(" -");
+ emitDispRegSet(byrefRegs);
+ printf("\n");
+ }
+#endif
+ }
+}
+
+/*****************************************************************************
+ *
+ * Record the fact that the given register no longer contains a live GC ref.
+ */
+
+void emitter::emitGCregDeadUpd(regNumber reg, BYTE* addr)
+{
+ assert(emitIssuing);
+
+ // Don't track GC changes in epilogs
+ if (emitIGisInEpilog(emitCurIG))
+ {
+ return;
+ }
+
+ regMaskTP regMask = genRegMask(reg);
+
+ if ((emitThisGCrefRegs & regMask) != 0)
+ {
+ assert((emitThisByrefRegs & regMask) == 0);
+
+ if (emitFullGCinfo)
+ {
+ emitGCregDeadSet(GCT_GCREF, regMask, addr);
+ }
+
+ emitThisGCrefRegs &= ~regMask;
+
+#ifdef DEBUG
+ if (EMIT_GC_VERBOSE)
+ {
+ printf("%s -[%s]\n", "gcrReg", emitRegName(reg));
+ }
+#endif
+ }
+ else if ((emitThisByrefRegs & regMask) != 0)
+ {
+ if (emitFullGCinfo)
+ {
+ emitGCregDeadSet(GCT_BYREF, regMask, addr);
+ }
+
+ emitThisByrefRegs &= ~regMask;
+
+#ifdef DEBUG
+ if (EMIT_GC_VERBOSE)
+ {
+ printf("%s -[%s]\n", "byrReg", emitRegName(reg));
+ }
+#endif
+ }
+}
+
+/*****************************************************************************
+ *
+ * Record the fact that the given variable now contains a live GC ref.
+ * varNum may be INT_MAX or negative (indicating a spill temp) only if
+ * offs is guaranteed to be the offset of a tracked GC ref. Else we
+ * need a valid value to check if the variable is tracked or not.
+ */
+
+void emitter::emitGCvarLiveUpd(int offs, int varNum, GCtype gcType, BYTE* addr)
+{
+ assert(abs(offs) % sizeof(int) == 0);
+ assert(needsGC(gcType));
+
+#if FEATURE_FIXED_OUT_ARGS
+ if ((unsigned)varNum == emitComp->lvaOutgoingArgSpaceVar)
+ {
+ if (emitFullGCinfo)
+ {
+ /* Append an "arg push" entry to track a GC written to the
+ outgoing argument space.
+ Allocate a new ptr arg entry and fill it in */
+
+ regPtrDsc* regPtrNext = gcInfo->gcRegPtrAllocDsc();
+ regPtrNext->rpdGCtype = gcType;
+ regPtrNext->rpdOffs = emitCurCodeOffs(addr);
+ regPtrNext->rpdArg = TRUE;
+ regPtrNext->rpdCall = FALSE;
+ noway_assert(FitsIn<unsigned short>(offs));
+ regPtrNext->rpdPtrArg = (unsigned short)offs;
+ regPtrNext->rpdArgType = (unsigned short)GCInfo::rpdARG_PUSH;
+ regPtrNext->rpdIsThis = FALSE;
+
+#ifdef DEBUG
+ if (EMIT_GC_VERBOSE)
+ {
+ printf("[%04X] %s arg write\n", offs, GCtypeStr(gcType));
+ }
+#endif
+ }
+ }
+ else
+#endif // FEATURE_FIXED_OUT_ARGS
+ {
+ /* Is the frame offset within the "interesting" range? */
+
+ if (offs >= emitGCrFrameOffsMin && offs < emitGCrFrameOffsMax)
+ {
+ /* Normally all variables in this range must be tracked stack
+ pointers. However, for EnC, we relax this condition. So we
+ must check if this is not such a variable.
+ Note that varNum might be negative, indicating a spill temp.
+ */
+
+ if (varNum != INT_MAX)
+ {
+ bool isTracked = false;
+ if (varNum >= 0)
+ {
+ // This is NOT a spill temp
+ LclVarDsc* varDsc = &emitComp->lvaTable[varNum];
+ isTracked = emitComp->lvaIsGCTracked(varDsc);
+ }
+ else
+ {
+ // Is it an untracked spill temp?
+ isTracked = TRACK_GC_TEMP_LIFETIMES;
+ }
+ if (!isTracked)
+ {
+#if DOUBLE_ALIGN
+ assert(!emitContTrkPtrLcls ||
+ // EBP based variables in the double-aligned frames are indeed input arguments.
+ // and we don't require them to fall into the "interesting" range.
+ ((emitComp->rpFrameType == FT_DOUBLE_ALIGN_FRAME) && (varNum >= 0) &&
+ (emitComp->lvaTable[varNum].lvFramePointerBased == 1)));
+#else
+ assert(!emitContTrkPtrLcls);
+#endif
+ return;
+ }
+ }
+
+ size_t disp;
+
+ /* Compute the index into the GC frame table */
+
+ disp = (offs - emitGCrFrameOffsMin) / sizeof(void*);
+ assert(disp < emitGCrFrameOffsCnt);
+
+ /* If the variable is currently dead, mark it as live */
+
+ if (emitGCrFrameLiveTab[disp] == nullptr)
+ {
+ emitGCvarLiveSet(offs, gcType, addr, disp);
+ }
+ }
+ }
+}
+
+/*****************************************************************************
+ *
+ * Record the fact that the given variable no longer contains a live GC ref.
+ */
+
+void emitter::emitGCvarDeadUpd(int offs, BYTE* addr)
+{
+ assert(emitIssuing);
+ assert(abs(offs) % sizeof(int) == 0);
+
+ /* Is the frame offset within the "interesting" range? */
+
+ if (offs >= emitGCrFrameOffsMin && offs < emitGCrFrameOffsMax)
+ {
+ size_t disp;
+
+ /* Compute the index into the GC frame table */
+
+ disp = (offs - emitGCrFrameOffsMin) / sizeof(void*);
+ assert(disp < emitGCrFrameOffsCnt);
+
+ /* If the variable is currently live, mark it as dead */
+
+ if (emitGCrFrameLiveTab[disp] != nullptr)
+ {
+ emitGCvarDeadSet(offs, addr, disp);
+ }
+ }
+}
+
+/*****************************************************************************
+ *
+ * Allocate a new IG and link it in to the global list after the current IG
+ */
+
+insGroup* emitter::emitAllocAndLinkIG()
+{
+ insGroup* ig = emitAllocIG();
+
+ assert(emitCurIG);
+
+ emitInsertIGAfter(emitCurIG, ig);
+
+ /* Propagate some IG flags from the current group to the new group */
+
+ ig->igFlags |= (emitCurIG->igFlags & IGF_PROPAGATE_MASK);
+
+ /* Set the new IG as the current IG */
+
+ emitCurIG = ig;
+
+ return ig;
+}
+
+/*****************************************************************************
+ *
+ * Allocate an instruction group descriptor and assign it the next index.
+ */
+
+insGroup* emitter::emitAllocIG()
+{
+ insGroup* ig;
+
+ /* Allocate a group descriptor */
+
+ size_t sz = sizeof(insGroup);
+ ig = (insGroup*)emitGetMem(sz);
+
+#ifdef DEBUG
+ ig->igSelf = ig;
+#endif
+
+#if EMITTER_STATS
+ emitTotalIGcnt += 1;
+ emitTotalIGsize += sz;
+ emitSizeMethod += sz;
+#endif
+
+ /* Do basic initialization */
+
+ emitInitIG(ig);
+
+ return ig;
+}
+
+/*****************************************************************************
+ *
+ * Initialize an instruction group
+ */
+
+void emitter::emitInitIG(insGroup* ig)
+{
+ /* Assign the next available index to the instruction group */
+
+ ig->igNum = emitNxtIGnum;
+
+ emitNxtIGnum++;
+
+ /* Record the (estimated) code offset of the group */
+
+ ig->igOffs = emitCurCodeOffset;
+ assert(IsCodeAligned(ig->igOffs));
+
+ /* Set the current function index */
+
+ ig->igFuncIdx = emitComp->compCurrFuncIdx;
+
+ ig->igFlags = 0;
+
+ /* Zero out some fields to avoid printing garbage in JitDumps. These
+ really only need to be set in DEBUG, but do it in all cases to make
+ sure we act the same in non-DEBUG builds.
+ */
+
+ ig->igSize = 0;
+ ig->igGCregs = RBM_NONE;
+ ig->igInsCnt = 0;
+}
+
+/*****************************************************************************
+ *
+ * Insert instruction group 'ig' after 'igInsertAfterIG'
+ */
+
+void emitter::emitInsertIGAfter(insGroup* insertAfterIG, insGroup* ig)
+{
+ assert(emitIGlist);
+ assert(emitIGlast);
+
+ ig->igNext = insertAfterIG->igNext;
+ insertAfterIG->igNext = ig;
+
+ if (emitIGlast == insertAfterIG)
+ {
+ // If we are inserting at the end, then update the 'last' pointer
+ emitIGlast = ig;
+ }
+}
+
+/*****************************************************************************
+ *
+ * Save the current IG and start a new one.
+ */
+
+void emitter::emitNxtIG(bool emitAdd)
+{
+ /* Right now we don't allow multi-IG prologs */
+
+ assert(emitCurIG != emitPrologIG);
+
+ /* First save the current group */
+
+ emitSavIG(emitAdd);
+
+ /* Update the GC live sets for the group's start
+ * Do it only if not an emitter added block */
+
+ if (!emitAdd)
+ {
+ VarSetOps::Assign(emitComp, emitInitGCrefVars, emitThisGCrefVars);
+ emitInitGCrefRegs = emitThisGCrefRegs;
+ emitInitByrefRegs = emitThisByrefRegs;
+ }
+
+ /* Start generating the new group */
+
+ emitNewIG();
+
+ /* If this is an emitter added block, flag it */
+
+ if (emitAdd)
+ {
+ emitCurIG->igFlags |= IGF_EMIT_ADD;
+ }
+
+ // We've created a new IG; no need to force another one.
+ emitForceNewIG = false;
+}
+
+/*****************************************************************************
+ *
+ * emitGetInsSC: Get the instruction's constant value.
+ */
+
+ssize_t emitter::emitGetInsSC(instrDesc* id)
+{
+#ifdef _TARGET_ARM_ // should it be _TARGET_ARMARCH_? Why do we need this? Note that on ARM64 we store scaled immediates
+ // for some formats
+ if (id->idIsLclVar())
+ {
+ int varNum = id->idAddr()->iiaLclVar.lvaVarNum();
+
+ regNumber baseReg;
+ int offs = id->idAddr()->iiaLclVar.lvaOffset();
+#if defined(_TARGET_ARM_)
+ int adr = emitComp->lvaFrameAddress(varNum, id->idIsLclFPBase(), &baseReg, offs);
+ int dsp = adr + offs;
+ if ((id->idIns() == INS_sub) || (id->idIns() == INS_subw))
+ dsp = -dsp;
+#elif defined(_TARGET_ARM64_)
+ // TODO-ARM64-Cleanup: this is currently unreachable. Do we need it?
+ bool FPbased;
+ int adr = emitComp->lvaFrameAddress(varNum, &FPbased);
+ int dsp = adr + offs;
+ if (id->idIns() == INS_sub)
+ dsp = -dsp;
+#endif
+ return dsp;
+ }
+ else
+#endif // _TARGET_ARM_
+ if (id->idIsLargeCns())
+ {
+ return ((instrDescCns*)id)->idcCnsVal;
+ }
+ else
+ {
+ return id->idSmallCns();
+ }
+}
+
+/*****************************************************************************/
+#if EMIT_TRACK_STACK_DEPTH
+/*****************************************************************************
+ *
+ * Record a push of a single dword on the stack.
+ */
+
+void emitter::emitStackPush(BYTE* addr, GCtype gcType)
+{
+#ifdef DEBUG
+ assert(IsValidGCtype(gcType));
+#endif
+
+ if (emitSimpleStkUsed)
+ {
+ assert(!emitFullGCinfo); // Simple stk not used for emitFullGCinfo
+ assert(emitCurStackLvl / sizeof(int) < MAX_SIMPLE_STK_DEPTH);
+
+ u1.emitSimpleStkMask <<= 1;
+ u1.emitSimpleStkMask |= (unsigned)needsGC(gcType);
+
+ u1.emitSimpleByrefStkMask <<= 1;
+ u1.emitSimpleByrefStkMask |= (gcType == GCT_BYREF);
+
+ assert((u1.emitSimpleStkMask & u1.emitSimpleByrefStkMask) == u1.emitSimpleByrefStkMask);
+ }
+ else
+ {
+ emitStackPushLargeStk(addr, gcType);
+ }
+
+ emitCurStackLvl += sizeof(int);
+}
+
+/*****************************************************************************
+ *
+ * Record a push of a bunch of non-GC dwords on the stack.
+ */
+
+void emitter::emitStackPushN(BYTE* addr, unsigned count)
+{
+ assert(count);
+
+ if (emitSimpleStkUsed)
+ {
+ assert(!emitFullGCinfo); // Simple stk not used for emitFullGCinfo
+
+ u1.emitSimpleStkMask <<= count;
+ u1.emitSimpleByrefStkMask <<= count;
+ }
+ else
+ {
+ emitStackPushLargeStk(addr, GCT_NONE, count);
+ }
+
+ emitCurStackLvl += count * sizeof(int);
+}
+
+/*****************************************************************************
+ *
+ * Record a pop of the given number of dwords from the stack.
+ */
+
+void emitter::emitStackPop(BYTE* addr, bool isCall, unsigned char callInstrSize, unsigned count)
+{
+ assert(emitCurStackLvl / sizeof(int) >= count);
+ assert(!isCall || callInstrSize > 0);
+
+ if (count)
+ {
+ if (emitSimpleStkUsed)
+ {
+ assert(!emitFullGCinfo); // Simple stk not used for emitFullGCinfo
+
+ unsigned cnt = count;
+
+ do
+ {
+ u1.emitSimpleStkMask >>= 1;
+ u1.emitSimpleByrefStkMask >>= 1;
+ } while (--cnt);
+ }
+ else
+ {
+ emitStackPopLargeStk(addr, isCall, callInstrSize, count);
+ }
+
+ emitCurStackLvl -= count * sizeof(int);
+ }
+ else
+ {
+ assert(isCall);
+
+ // For the general encoder we do the call below always when it's a call, to ensure that the call is
+ // recorded (when we're doing the ptr reg map for a non-fully-interruptible method).
+ if (emitFullGCinfo
+#ifndef JIT32_GCENCODER
+ || (emitComp->genFullPtrRegMap && (!emitComp->genInterruptible) && isCall)
+#endif // JIT32_GCENCODER
+ )
+ {
+ emitStackPopLargeStk(addr, isCall, callInstrSize, 0);
+ }
+ }
+}
+
+/*****************************************************************************
+ *
+ * Record a push of a single word on the stack for a full pointer map.
+ */
+
+void emitter::emitStackPushLargeStk(BYTE* addr, GCtype gcType, unsigned count)
+{
+ S_UINT32 level(emitCurStackLvl / sizeof(int));
+
+ assert(IsValidGCtype(gcType));
+ assert(count);
+ assert(!emitSimpleStkUsed);
+
+ do
+ {
+ /* Push an entry for this argument on the tracking stack */
+
+ // printf("Pushed [%d] at lvl %2u [max=%u]\n", isGCref, emitArgTrackTop - emitArgTrackTab, emitMaxStackDepth);
+
+ assert(level.IsOverflow() || u2.emitArgTrackTop == u2.emitArgTrackTab + level.Value());
+ *u2.emitArgTrackTop++ = (BYTE)gcType;
+ assert(u2.emitArgTrackTop <= u2.emitArgTrackTab + emitMaxStackDepth);
+
+ if (!emitHasFramePtr || needsGC(gcType))
+ {
+ if (emitFullGCinfo)
+ {
+ /* Append an "arg push" entry if this is a GC ref or
+ FPO method. Allocate a new ptr arg entry and fill it in */
+
+ regPtrDsc* regPtrNext = codeGen->gcInfo.gcRegPtrAllocDsc();
+ regPtrNext->rpdGCtype = gcType;
+
+ regPtrNext->rpdOffs = emitCurCodeOffs(addr);
+ regPtrNext->rpdArg = TRUE;
+ regPtrNext->rpdCall = FALSE;
+ if (level.IsOverflow() || !FitsIn<unsigned short>(level.Value()))
+ {
+ IMPL_LIMITATION("Too many/too big arguments to encode GC information");
+ }
+ regPtrNext->rpdPtrArg = (unsigned short)level.Value();
+ regPtrNext->rpdArgType = (unsigned short)GCInfo::rpdARG_PUSH;
+ regPtrNext->rpdIsThis = FALSE;
+
+#ifdef DEBUG
+ if (EMIT_GC_VERBOSE)
+ {
+ printf("[%08X] %s arg push %u\n", dspPtr(regPtrNext), GCtypeStr(gcType), level.Value());
+ }
+#endif
+ }
+
+ /* This is an "interesting" argument push */
+
+ u2.emitGcArgTrackCnt++;
+ }
+ level += 1;
+ assert(!level.IsOverflow());
+ } while (--count);
+}
+
+/*****************************************************************************
+ *
+ * Record a pop of the given number of words from the stack for a full ptr
+ * map.
+ */
+
+void emitter::emitStackPopLargeStk(BYTE* addr, bool isCall, unsigned char callInstrSize, unsigned count)
+{
+ assert(emitIssuing);
+
+ unsigned argStkCnt;
+ S_UINT16 argRecCnt(0); // arg count for ESP, ptr-arg count for EBP
+ unsigned gcrefRegs, byrefRegs;
+
+#ifdef JIT32_GCENCODER
+ // For the general encoder, we always need to record calls, so we make this call
+ // even when emitSimpleStkUsed is true.
+ assert(!emitSimpleStkUsed);
+#endif
+
+ /* Count how many pointer records correspond to this "pop" */
+
+ for (argStkCnt = count; argStkCnt; argStkCnt--)
+ {
+ assert(u2.emitArgTrackTop > u2.emitArgTrackTab);
+
+ GCtype gcType = (GCtype)(*--u2.emitArgTrackTop);
+
+ assert(IsValidGCtype(gcType));
+
+ // printf("Popped [%d] at lvl %u\n", GCtypeStr(gcType), emitArgTrackTop - emitArgTrackTab);
+
+ // This is an "interesting" argument
+
+ if (!emitHasFramePtr || needsGC(gcType))
+ {
+ argRecCnt += 1;
+ }
+ }
+
+ assert(u2.emitArgTrackTop >= u2.emitArgTrackTab);
+ assert(u2.emitArgTrackTop == u2.emitArgTrackTab + emitCurStackLvl / sizeof(int) - count);
+ noway_assert(!argRecCnt.IsOverflow());
+
+ /* We're about to pop the corresponding arg records */
+
+ u2.emitGcArgTrackCnt -= argRecCnt.Value();
+
+#ifdef JIT32_GCENCODER
+ // For the general encoder, we always have to record calls, so we don't take this early return.
+ if (!emitFullGCinfo)
+ return;
+#endif
+
+ // Do we have any interesting (i.e., callee-saved) registers live here?
+
+ gcrefRegs = byrefRegs = 0;
+
+ // We make a bitmask whose bits correspond to callee-saved register indices (in the sequence
+ // of callee-saved registers only).
+ for (unsigned calleeSavedRegIdx = 0; calleeSavedRegIdx < CNT_CALLEE_SAVED; calleeSavedRegIdx++)
+ {
+ regMaskTP calleeSavedRbm = raRbmCalleeSaveOrder[calleeSavedRegIdx];
+ if (emitThisGCrefRegs & calleeSavedRbm)
+ {
+ gcrefRegs |= (1 << calleeSavedRegIdx);
+ }
+ if (emitThisByrefRegs & calleeSavedRbm)
+ {
+ byrefRegs |= (1 << calleeSavedRegIdx);
+ }
+ }
+
+#ifdef JIT32_GCENCODER
+ // For the general encoder, we always have to record calls, so we don't take this early return. /* Are there any
+ // args to pop at this call site?
+
+ if (argRecCnt.Value() == 0)
+ {
+ /*
+ Or do we have a partially interruptible EBP-less frame, and any
+ of EDI,ESI,EBX,EBP are live, or is there an outer/pending call?
+ */
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if !FPO_INTERRUPTIBLE
+ if (emitFullyInt || (gcrefRegs == 0 && byrefRegs == 0 && u2.emitGcArgTrackCnt == 0))
+#endif
+ return;
+ }
+#endif // JIT32_GCENCODER
+
+ /* Only calls may pop more than one value */
+ // More detail:
+ // _cdecl calls accomplish this popping via a post-call-instruction SP adjustment.
+ // The "rpdCall" field below should be interpreted as "the instruction accomplishes
+ // call-related popping, even if it's not itself a call". Therefore, we don't just
+ // use the "isCall" input argument, which means that the instruction actually is a call --
+ // we use the OR of "isCall" or the "pops more than one value."
+
+ bool isCallRelatedPop = (argRecCnt.Value() > 1);
+
+ /* Allocate a new ptr arg entry and fill it in */
+
+ regPtrDsc* regPtrNext = codeGen->gcInfo.gcRegPtrAllocDsc();
+ regPtrNext->rpdGCtype = GCT_GCREF; // Pops need a non-0 value (??)
+
+ regPtrNext->rpdOffs = emitCurCodeOffs(addr);
+ regPtrNext->rpdCall = (isCall || isCallRelatedPop);
+#ifndef JIT32_GCENCODER
+ if (regPtrNext->rpdCall)
+ {
+ assert(isCall || callInstrSize == 0);
+ regPtrNext->rpdCallInstrSize = callInstrSize;
+ }
+#endif
+ regPtrNext->rpdCallGCrefRegs = gcrefRegs;
+ regPtrNext->rpdCallByrefRegs = byrefRegs;
+ regPtrNext->rpdArg = TRUE;
+ regPtrNext->rpdArgType = (unsigned short)GCInfo::rpdARG_POP;
+ regPtrNext->rpdPtrArg = argRecCnt.Value();
+
+#ifdef DEBUG
+ if (EMIT_GC_VERBOSE)
+ {
+ printf("[%08X] ptr arg pop %u\n", dspPtr(regPtrNext), count);
+ }
+#endif
+}
+
+/*****************************************************************************
+ * For caller-pop arguments, we report the arguments as pending arguments.
+ * However, any GC arguments are now dead, so we need to report them
+ * as non-GC.
+ */
+
+void emitter::emitStackKillArgs(BYTE* addr, unsigned count, unsigned char callInstrSize)
+{
+ assert(count > 0);
+
+ if (emitSimpleStkUsed)
+ {
+ assert(!emitFullGCinfo); // Simple stk not used for emitFullGCInfo
+
+ /* We don't need to report this to the GC info, but we do need
+ to kill mark the ptrs on the stack as non-GC */
+
+ assert(emitCurStackLvl / sizeof(int) >= count);
+
+ for (unsigned lvl = 0; lvl < count; lvl++)
+ {
+ u1.emitSimpleStkMask &= ~(1 << lvl);
+ u1.emitSimpleByrefStkMask &= ~(1 << lvl);
+ }
+ }
+ else
+ {
+ BYTE* argTrackTop = u2.emitArgTrackTop;
+ S_UINT16 gcCnt(0);
+
+ for (unsigned i = 0; i < count; i++)
+ {
+ assert(argTrackTop > u2.emitArgTrackTab);
+
+ --argTrackTop;
+
+ GCtype gcType = (GCtype)(*argTrackTop);
+ assert(IsValidGCtype(gcType));
+
+ if (needsGC(gcType))
+ {
+ // printf("Killed %s at lvl %u\n", GCtypeStr(gcType), argTrackTop - emitArgTrackTab);
+
+ *argTrackTop = GCT_NONE;
+ gcCnt += 1;
+ }
+ }
+
+ noway_assert(!gcCnt.IsOverflow());
+
+ /* We're about to kill the corresponding (pointer) arg records */
+
+ if (emitHasFramePtr)
+ {
+ u2.emitGcArgTrackCnt -= gcCnt.Value();
+ }
+
+ if (!emitFullGCinfo)
+ {
+ return;
+ }
+
+ /* Right after the call, the arguments are still sitting on the
+ stack, but they are effectively dead. For fully-interruptible
+ methods, we need to report that */
+
+ if (emitFullGCinfo && gcCnt.Value())
+ {
+ /* Allocate a new ptr arg entry and fill it in */
+
+ regPtrDsc* regPtrNext = codeGen->gcInfo.gcRegPtrAllocDsc();
+ regPtrNext->rpdGCtype = GCT_GCREF; // Kills need a non-0 value (??)
+
+ regPtrNext->rpdOffs = emitCurCodeOffs(addr);
+
+ regPtrNext->rpdArg = TRUE;
+ regPtrNext->rpdArgType = (unsigned short)GCInfo::rpdARG_KILL;
+ regPtrNext->rpdPtrArg = gcCnt.Value();
+
+#ifdef DEBUG
+ if (EMIT_GC_VERBOSE)
+ {
+ printf("[%08X] ptr arg kill %u\n", dspPtr(regPtrNext), count);
+ }
+#endif
+ }
+
+ /* Now that ptr args have been marked as non-ptrs, we need to record
+ the call itself as one that has no arguments. */
+
+ emitStackPopLargeStk(addr, true, callInstrSize, 0);
+ }
+}
+
+/*****************************************************************************
+ * A helper for recording a relocation with the EE.
+ */
+void emitter::emitRecordRelocation(void* location, /* IN */
+ void* target, /* IN */
+ WORD fRelocType, /* IN */
+ WORD slotNum /* = 0 */, /* IN */
+ INT32 addlDelta /* = 0 */) /* IN */
+{
+ // If we're an unmatched altjit, don't tell the VM anything. We still record the relocation for
+ // late disassembly; maybe we'll need it?
+ if (emitComp->info.compMatchedVM)
+ {
+ emitCmpHandle->recordRelocation(location, target, fRelocType, slotNum, addlDelta);
+ }
+#if defined(LATE_DISASM)
+ codeGen->getDisAssembler().disRecordRelocation((size_t)location, (size_t)target);
+#endif // defined(LATE_DISASM)
+}
+
+/*****************************************************************************
+ * A helper for recording a call site with the EE.
+ */
+void emitter::emitRecordCallSite(ULONG instrOffset, /* IN */
+ CORINFO_SIG_INFO* callSig, /* IN */
+ CORINFO_METHOD_HANDLE methodHandle) /* IN */
+{
+#if defined(DEBUG)
+ // Since CORINFO_SIG_INFO is a heavyweight structure, in most cases we can
+ // lazily obtain it here using the given method handle (we only save the sig
+ // info when we explicitly need it, i.e. for CALLI calls, vararg calls, and
+ // tail calls).
+ if (callSig == nullptr)
+ {
+ assert(methodHandle != nullptr);
+
+ if (Compiler::eeGetHelperNum(methodHandle) == CORINFO_HELP_UNDEF)
+ {
+ if (emitScratchSigInfo == nullptr)
+ {
+ emitScratchSigInfo = new (emitComp, CMK_CorSig) CORINFO_SIG_INFO;
+ }
+
+ emitComp->eeGetMethodSig(methodHandle, emitScratchSigInfo);
+ callSig = emitScratchSigInfo;
+ }
+ }
+
+ emitCmpHandle->recordCallSite(instrOffset, callSig, methodHandle);
+#endif // defined(DEBUG)
+}
+
+/*****************************************************************************/
+#endif // EMIT_TRACK_STACK_DEPTH
+/*****************************************************************************/
+/*****************************************************************************/
+
+#ifdef DEBUG
+
+/*****************************************************************************
+ * Given a code offset, return a string representing a label for that offset.
+ * If the code offset is just after the end of the code of the function, the
+ * label will be "END". If the code offset doesn't correspond to any known
+ * offset, the label will be "UNKNOWN". The strings are returned from static
+ * buffers. This function rotates amongst four such static buffers (there are
+ * cases where this function is called four times to provide data for a single
+ * printf()).
+ */
+
+const char* emitter::emitOffsetToLabel(unsigned offs)
+{
+ const size_t TEMP_BUFFER_LEN = 40;
+ static unsigned curBuf = 0;
+ static char buf[4][TEMP_BUFFER_LEN];
+ char* retbuf;
+
+ insGroup* ig;
+ UNATIVE_OFFSET of;
+ UNATIVE_OFFSET nextof = 0;
+
+ for (ig = emitIGlist; ig != nullptr; ig = ig->igNext)
+ {
+ assert(nextof == ig->igOffs);
+
+ if (ig->igOffs == offs)
+ {
+ // Found it!
+ sprintf_s(buf[curBuf], TEMP_BUFFER_LEN, "G_M%03u_IG%02u", Compiler::s_compMethodsCount, ig->igNum);
+ retbuf = buf[curBuf];
+ curBuf = (curBuf + 1) % 4;
+ return retbuf;
+ }
+ else if (ig->igOffs > offs)
+ {
+ // We went past the requested offset but didn't find it.
+ sprintf_s(buf[curBuf], TEMP_BUFFER_LEN, "UNKNOWN");
+ retbuf = buf[curBuf];
+ curBuf = (curBuf + 1) % 4;
+ return retbuf;
+ }
+
+ nextof = ig->igOffs + ig->igSize;
+ }
+
+ if (nextof == offs)
+ {
+ // It's a pseudo-label to the end.
+ sprintf_s(buf[curBuf], TEMP_BUFFER_LEN, "END");
+ retbuf = buf[curBuf];
+ curBuf = (curBuf + 1) % 4;
+ return retbuf;
+ }
+ else
+ {
+ sprintf_s(buf[curBuf], TEMP_BUFFER_LEN, "UNKNOWN");
+ retbuf = buf[curBuf];
+ curBuf = (curBuf + 1) % 4;
+ return retbuf;
+ }
+}
+
+#endif // DEBUG
diff --git a/src/jit/emit.h b/src/jit/emit.h
new file mode 100644
index 0000000000..8fb24bcd60
--- /dev/null
+++ b/src/jit/emit.h
@@ -0,0 +1,2742 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+/*****************************************************************************/
+
+#ifndef _EMIT_H_
+#define _EMIT_H_
+
+#include "instr.h"
+
+#ifndef _GCINFO_H_
+#include "gcinfo.h"
+#endif
+
+#include "jitgcinfo.h"
+
+/*****************************************************************************/
+#ifdef TRANSLATE_PDB
+#ifndef _ADDRMAP_INCLUDED_
+#include "addrmap.h"
+#endif
+#ifndef _LOCALMAP_INCLUDED_
+#include "localmap.h"
+#endif
+#ifndef _PDBREWRITE_H_
+#include "pdbrewrite.h"
+#endif
+#endif // TRANSLATE_PDB
+
+/*****************************************************************************/
+#ifdef _MSC_VER
+#pragma warning(disable : 4200) // allow arrays of 0 size inside structs
+#endif
+#define TRACK_GC_TEMP_LIFETIMES 0
+
+/*****************************************************************************/
+
+#if 0
+#define EMITVERBOSE 1
+#else
+#define EMITVERBOSE (emitComp->verbose)
+#endif
+
+#if 0
+#define EMIT_GC_VERBOSE 0
+#else
+#define EMIT_GC_VERBOSE (emitComp->verbose)
+#endif
+
+#if 1
+#define EMIT_INSTLIST_VERBOSE 0
+#else
+#define EMIT_INSTLIST_VERBOSE (emitComp->verbose)
+#endif
+
+/*****************************************************************************/
+
+#ifdef DEBUG
+#define DEBUG_EMIT 1
+#else
+#define DEBUG_EMIT 0
+#endif
+
+#if EMITTER_STATS
+void emitterStats(FILE* fout);
+void emitterStaticStats(FILE* fout); // Static stats about the emitter (data structure offsets, sizes, etc.)
+#endif
+
+void printRegMaskInt(regMaskTP mask);
+
+/*****************************************************************************/
+/* Forward declarations */
+
+class emitLocation;
+class emitter;
+struct insGroup;
+
+typedef void (*emitSplitCallbackType)(void* context, emitLocation* emitLoc);
+
+/*****************************************************************************/
+
+//-----------------------------------------------------------------------------
+
+inline bool needsGC(GCtype gcType)
+{
+ if (gcType == GCT_NONE)
+ {
+ return false;
+ }
+ else
+ {
+ assert(gcType == GCT_GCREF || gcType == GCT_BYREF);
+ return true;
+ }
+}
+
+//-----------------------------------------------------------------------------
+
+#ifdef DEBUG
+
+inline bool IsValidGCtype(GCtype gcType)
+{
+ return (gcType == GCT_NONE || gcType == GCT_GCREF || gcType == GCT_BYREF);
+}
+
+// Get a string name to represent the GC type
+
+inline const char* GCtypeStr(GCtype gcType)
+{
+ switch (gcType)
+ {
+ case GCT_NONE:
+ return "npt";
+ case GCT_GCREF:
+ return "gcr";
+ case GCT_BYREF:
+ return "byr";
+ default:
+ assert(!"Invalid GCtype");
+ return "err";
+ }
+}
+
+#endif // DEBUG
+
+/*****************************************************************************/
+
+#if DEBUG_EMIT
+#define INTERESTING_JUMP_NUM -1 // set to 0 to see all jump info
+//#define INTERESTING_JUMP_NUM 0
+#endif
+
+/*****************************************************************************
+ *
+ * Represent an emitter location.
+ */
+
+class emitLocation
+{
+public:
+ emitLocation() : ig(nullptr), codePos(0)
+ {
+ }
+
+ emitLocation(insGroup* _ig) : ig(_ig), codePos(0)
+ {
+ }
+
+ emitLocation(void* emitCookie) : ig((insGroup*)emitCookie), codePos(0)
+ {
+ }
+
+ // A constructor for code that needs to call it explicitly.
+ void Init()
+ {
+ this->emitLocation::emitLocation();
+ }
+
+ void CaptureLocation(emitter* emit);
+
+ bool IsCurrentLocation(emitter* emit) const;
+
+ // This function is highly suspect, since it presumes knowledge of the codePos "cookie",
+ // and doesn't look at the 'ig' pointer.
+ bool IsOffsetZero() const
+ {
+ return (codePos == 0);
+ }
+
+ UNATIVE_OFFSET CodeOffset(emitter* emit) const;
+
+ insGroup* GetIG() const
+ {
+ return ig;
+ }
+
+ int GetInsNum() const;
+
+ bool operator!=(const emitLocation& other) const
+ {
+ return (ig != other.ig) || (codePos != other.codePos);
+ }
+
+ bool operator==(const emitLocation& other) const
+ {
+ return !(*this != other);
+ }
+
+ bool Valid() const
+ {
+ // Things we could validate:
+ // 1. the instruction group pointer is non-nullptr.
+ // 2. 'ig' is a legal pointer to an instruction group.
+ // 3. 'codePos' is a legal offset into 'ig'.
+ // Currently, we just do #1.
+ // #2 and #3 should only be done in DEBUG, if they are implemented.
+
+ if (ig == nullptr)
+ {
+ return false;
+ }
+
+ return true;
+ }
+
+#ifdef _TARGET_AMD64_
+ UNATIVE_OFFSET GetFuncletPrologOffset(emitter* emit) const;
+#endif // _TARGET_AMD64_
+
+#ifdef DEBUG
+ void Print() const;
+#endif // DEBUG
+
+private:
+ insGroup* ig; // the instruction group
+ unsigned codePos; // the code position within the IG (see emitCurOffset())
+};
+
+/************************************************************************/
+/* The following describes an instruction group */
+/************************************************************************/
+
+DECLARE_TYPED_ENUM(insGroupPlaceholderType, unsigned char)
+{
+ IGPT_PROLOG, // currently unused
+ IGPT_EPILOG,
+#if FEATURE_EH_FUNCLETS
+ IGPT_FUNCLET_PROLOG, IGPT_FUNCLET_EPILOG,
+#endif // FEATURE_EH_FUNCLETS
+}
+END_DECLARE_TYPED_ENUM(insGroupPlaceholderType, unsigned char)
+
+#if defined(_MSC_VER) && defined(_TARGET_ARM_)
+// ARM aligns structures that contain 64-bit ints or doubles on 64-bit boundaries. This causes unwanted
+// padding to be added to the end, so sizeof() is unnecessarily big.
+#pragma pack(push)
+#pragma pack(4)
+#endif // defined(_MSC_VER) && defined(_TARGET_ARM_)
+
+struct insPlaceholderGroupData
+{
+ insGroup* igPhNext;
+ BasicBlock* igPhBB;
+ VARSET_TP igPhInitGCrefVars;
+ regMaskTP igPhInitGCrefRegs;
+ regMaskTP igPhInitByrefRegs;
+ VARSET_TP igPhPrevGCrefVars;
+ regMaskTP igPhPrevGCrefRegs;
+ regMaskTP igPhPrevByrefRegs;
+ insGroupPlaceholderType igPhType;
+}; // end of struct insPlaceholderGroupData
+
+struct insGroup
+{
+ insGroup* igNext;
+
+#ifdef DEBUG
+ insGroup* igSelf; // for consistency checking
+#endif
+
+ UNATIVE_OFFSET igNum; // for ordering (and display) purposes
+ UNATIVE_OFFSET igOffs; // offset of this group within method
+ unsigned int igFuncIdx; // Which function/funclet does this belong to? (Index into Compiler::compFuncInfos array.)
+ unsigned short igFlags; // see IGF_xxx below
+ unsigned short igSize; // # of bytes of code in this group
+
+#define IGF_GC_VARS 0x0001 // new set of live GC ref variables
+#define IGF_BYREF_REGS 0x0002 // new set of live by-ref registers
+#if FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
+#define IGF_FINALLY_TARGET 0x0004 // this group is the start of a basic block that is returned to after a finally.
+#endif // FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
+#define IGF_FUNCLET_PROLOG 0x0008 // this group belongs to a funclet prolog
+#ifdef DEBUG
+#define IGF_FUNCLET_EPILOG 0x0010 // this group belongs to a funclet epilog. Currently, this is only needed for DEBUG.
+#endif
+#define IGF_EPILOG 0x0020 // this group belongs to a main function epilog
+#define IGF_NOGCINTERRUPT 0x0040 // this IG is is a no-interrupt region (prolog, epilog, etc.)
+#define IGF_UPD_ISZ 0x0080 // some instruction sizes updated
+#define IGF_PLACEHOLDER 0x0100 // this is a placeholder group, to be filled in later
+#define IGF_EMIT_ADD 0x0200 // this is a block added by the emitter
+ // because the codegen block was too big. Also used for
+ // placeholder IGs that aren't also labels.
+
+// Mask of IGF_* flags that should be propagated to new blocks when they are created.
+// This allows prologs and epilogs to be any number of IGs, but still be
+// automatically marked properly.
+#if FEATURE_EH_FUNCLETS
+#ifdef DEBUG
+#define IGF_PROPAGATE_MASK (IGF_EPILOG | IGF_FUNCLET_PROLOG | IGF_FUNCLET_EPILOG)
+#else // DEBUG
+#define IGF_PROPAGATE_MASK (IGF_EPILOG | IGF_FUNCLET_PROLOG)
+#endif // DEBUG
+#else // FEATURE_EH_FUNCLETS
+#define IGF_PROPAGATE_MASK (IGF_EPILOG)
+#endif // FEATURE_EH_FUNCLETS
+
+ // Try to do better packing based on how large regMaskSmall is (8, 16, or 64 bits).
+ CLANG_FORMAT_COMMENT_ANCHOR;
+#if REGMASK_BITS <= 32
+
+ union {
+ BYTE* igData; // addr of instruction descriptors
+ insPlaceholderGroupData* igPhData; // when igFlags & IGF_PLACEHOLDER
+ };
+
+#if EMIT_TRACK_STACK_DEPTH
+ unsigned igStkLvl; // stack level on entry
+#endif
+ regMaskSmall igGCregs; // set of registers with live GC refs
+ unsigned char igInsCnt; // # of instructions in this group
+
+#else // REGMASK_BITS
+
+ regMaskSmall igGCregs; // set of registers with live GC refs
+
+ union {
+ BYTE* igData; // addr of instruction descriptors
+ insPlaceholderGroupData* igPhData; // when igFlags & IGF_PLACEHOLDER
+ };
+
+#if EMIT_TRACK_STACK_DEPTH
+ unsigned igStkLvl; // stack level on entry
+#endif
+
+ unsigned char igInsCnt; // # of instructions in this group
+
+#endif // REGMASK_BITS
+
+ VARSET_VALRET_TP igGCvars() const
+ {
+ assert(igFlags & IGF_GC_VARS);
+
+ BYTE* ptr = (BYTE*)igData;
+ ptr -= sizeof(VARSET_TP);
+
+ return *(VARSET_TP*)ptr;
+ }
+
+ unsigned igByrefRegs() const
+ {
+ assert(igFlags & IGF_BYREF_REGS);
+
+ BYTE* ptr = (BYTE*)igData;
+
+ if (igFlags & IGF_GC_VARS)
+ {
+ ptr -= sizeof(VARSET_TP);
+ }
+
+ ptr -= sizeof(unsigned);
+
+ return *(unsigned*)ptr;
+ }
+
+}; // end of struct insGroup
+
+// For AMD64 the maximum prolog/epilog size supported on the OS is 256 bytes
+// Since it is incorrect for us to be jumping across funclet prolog/epilogs
+// we will use the following estimate as the maximum placeholder size.
+//
+#define MAX_PLACEHOLDER_IG_SIZE 256
+
+#if defined(_MSC_VER) && defined(_TARGET_ARM_)
+#pragma pack(pop)
+#endif // defined(_MSC_VER) && defined(_TARGET_ARM_)
+
+/*****************************************************************************/
+
+#define DEFINE_ID_OPS
+#include "emitfmts.h"
+#undef DEFINE_ID_OPS
+
+enum LclVarAddrTag
+{
+ LVA_STANDARD_ENCODING = 0,
+ LVA_LARGE_OFFSET = 1,
+ LVA_COMPILER_TEMP = 2,
+ LVA_LARGE_VARNUM = 3
+};
+
+struct emitLclVarAddr
+{
+ // Constructor
+ void initLclVarAddr(int varNum, unsigned offset);
+
+ int lvaVarNum(); // Returns the variable to access. Note that it returns a negative number for compiler spill temps.
+ unsigned lvaOffset(); // returns the offset into the variable to access
+
+ // This struct should be 32 bits in size for the release build.
+ // We have this constraint because this type is used in a union
+ // with several other pointer sized types in the instrDesc struct.
+ //
+protected:
+ unsigned _lvaVarNum : 15; // Usually the lvaVarNum
+ unsigned _lvaExtra : 15; // Usually the lvaOffset
+ unsigned _lvaTag : 2; // tag field to support larger varnums
+};
+
+enum idAddrUnionTag
+{
+ iaut_ALIGNED_POINTER = 0x0,
+ iaut_DATA_OFFSET = 0x1,
+ iaut_INST_COUNT = 0x2,
+ iaut_UNUSED_TAG = 0x3,
+
+ iaut_MASK = 0x3,
+ iaut_SHIFT = 2
+};
+
+class emitter
+{
+ friend class emitLocation;
+ friend class Compiler;
+ friend class CodeGen;
+ friend class CodeGenInterface;
+
+public:
+ /*************************************************************************
+ *
+ * Define the public entry points.
+ */
+
+ // Constructor.
+ emitter()
+ {
+#ifdef DEBUG
+ // There seem to be some cases where this is used without being initialized via CodeGen::inst_set_SV_var().
+ emitVarRefOffs = 0;
+#endif // DEBUG
+#ifdef FEATURE_AVX_SUPPORT
+ SetUseAVX(false);
+#endif // FEATURE_AVX_SUPPORT
+ }
+
+#include "emitpub.h"
+
+protected:
+ /************************************************************************/
+ /* Miscellaneous stuff */
+ /************************************************************************/
+
+ Compiler* emitComp;
+ GCInfo* gcInfo;
+ CodeGen* codeGen;
+
+ typedef GCInfo::varPtrDsc varPtrDsc;
+ typedef GCInfo::regPtrDsc regPtrDsc;
+ typedef GCInfo::CallDsc callDsc;
+
+ void* emitGetMem(size_t sz);
+
+ DECLARE_TYPED_ENUM(opSize, unsigned)
+ {
+ OPSZ1 = 0, OPSZ2 = 1, OPSZ4 = 2, OPSZ8 = 3, OPSZ16 = 4, OPSZ32 = 5, OPSZ_COUNT = 6,
+#ifdef _TARGET_AMD64_
+ OPSZP = OPSZ8,
+#else
+ OPSZP = OPSZ4,
+#endif
+ }
+ END_DECLARE_TYPED_ENUM(opSize, unsigned)
+
+#define OPSIZE_INVALID ((opSize)0xffff)
+
+ static const emitter::opSize emitSizeEncode[];
+ static const emitAttr emitSizeDecode[];
+
+ static emitter::opSize emitEncodeSize(emitAttr size);
+ static emitAttr emitDecodeSize(emitter::opSize ensz);
+
+ // Currently, we only allow one IG for the prolog
+ bool emitIGisInProlog(const insGroup* ig)
+ {
+ return ig == emitPrologIG;
+ }
+
+ bool emitIGisInEpilog(const insGroup* ig)
+ {
+ return (ig != nullptr) && ((ig->igFlags & IGF_EPILOG) != 0);
+ }
+
+#if FEATURE_EH_FUNCLETS
+
+ bool emitIGisInFuncletProlog(const insGroup* ig)
+ {
+ return (ig != nullptr) && ((ig->igFlags & IGF_FUNCLET_PROLOG) != 0);
+ }
+
+#ifdef DEBUG
+ bool emitIGisInFuncletEpilog(const insGroup* ig)
+ {
+ return (ig != nullptr) && ((ig->igFlags & IGF_FUNCLET_EPILOG) != 0);
+ }
+#endif // DEBUG
+#endif // FEATURE_EH_FUNCLETS
+
+ // If "ig" corresponds to the start of a basic block that is the
+ // target of a funclet return, generate GC information for it's start
+ // address "cp", as if it were the return address of a call.
+ void emitGenGCInfoIfFuncletRetTarget(insGroup* ig, BYTE* cp);
+
+ void emitRecomputeIGoffsets();
+
+ /************************************************************************/
+ /* The following describes a single instruction */
+ /************************************************************************/
+
+ DECLARE_TYPED_ENUM(insFormat, unsigned)
+ {
+#define IF_DEF(en, op1, op2) IF_##en,
+#include "emitfmts.h"
+
+ IF_COUNT
+ }
+ END_DECLARE_TYPED_ENUM(insFormat, unsigned)
+
+#define AM_DISP_BITS ((sizeof(unsigned) * 8) - 2 * (REGNUM_BITS + 1) - 2)
+#define AM_DISP_BIG_VAL (-(1 << (AM_DISP_BITS - 1)))
+#define AM_DISP_MIN (-((1 << (AM_DISP_BITS - 1)) - 1))
+#define AM_DISP_MAX (+((1 << (AM_DISP_BITS - 1)) - 1))
+
+ struct emitAddrMode
+ {
+ regNumber amBaseReg : REGNUM_BITS + 1;
+ regNumber amIndxReg : REGNUM_BITS + 1;
+ emitter::opSize amScale : 2;
+ int amDisp : AM_DISP_BITS;
+ };
+
+#if defined(DEBUG) || defined(LATE_DISASM) // LATE_DISASM needs the idMemCookie on calls to display the call target name
+
+ struct instrDesc;
+
+ struct instrDescDebugInfo
+ {
+ unsigned idNum;
+ size_t idSize; // size of the instruction descriptor
+ unsigned idVarRefOffs; // IL offset for LclVar reference
+ size_t idMemCookie; // for display of member names in addr modes
+ void* idClsCookie; // for display of member names in addr modes
+#ifdef TRANSLATE_PDB
+ unsigned int idilStart; // instruction descriptor source information for PDB translation
+#endif
+ bool idFinallyCall; // Branch instruction is a call to finally
+ bool idCatchRet; // Instruction is for a catch 'return'
+ CORINFO_SIG_INFO* idCallSig; // Used to report native call site signatures to the EE
+ };
+
+#endif // defined(DEBUG) || defined(LATE_DISASM)
+
+#ifdef _TARGET_ARM_
+ unsigned insEncodeSetFlags(insFlags sf);
+
+ DECLARE_TYPED_ENUM(insSize, unsigned)
+ {
+ ISZ_16BIT, ISZ_32BIT, ISZ_48BIT // pseudo-instruction for conditional branch with imm24 range,
+ // encoded as IT of condition followed by an unconditional branch
+ }
+ END_DECLARE_TYPED_ENUM(insSize, unsigned)
+
+ unsigned insEncodeShiftOpts(insOpts opt);
+ unsigned insEncodePUW_G0(insOpts opt, int imm);
+ unsigned insEncodePUW_H0(insOpts opt, int imm);
+
+#endif // _TARGET_ARM_
+
+#if defined(_TARGET_X86_) && defined(LEGACY_BACKEND)
+#define HAS_TINY_DESC 1
+#else
+#define HAS_TINY_DESC 0
+#endif
+
+ struct instrDescCns;
+
+ struct instrDesc
+ {
+ private:
+#if defined(_TARGET_XARCH_) && !defined(LEGACY_BACKEND)
+ // The assembly instruction
+ instruction _idIns : 9;
+#else // !defined(_TARGET_XARCH_) || defined(LEGACY_BACKEND)
+ // The assembly instruction
+ instruction _idIns : 8;
+#endif // !defined(_TARGET_XARCH_) || defined(LEGACY_BACKEND)
+ // The format for the instruction
+ insFormat _idInsFmt : 8;
+
+ public:
+ instruction idIns() const
+ {
+ return _idIns;
+ }
+ void idIns(instruction ins)
+ {
+ _idIns = ins;
+ assert(_idIns == ins);
+ }
+
+ insFormat idInsFmt() const
+ {
+ return _idInsFmt;
+ }
+ void idInsFmt(insFormat insFmt)
+ {
+#if defined(_TARGET_ARM64_)
+ noway_assert(insFmt != IF_NONE); // Only the x86 emitter uses IF_NONE, it is invalid for ARM64 (and ARM32)
+#endif
+ _idInsFmt = insFmt;
+ assert(_idInsFmt == insFmt);
+ }
+
+ /*
+ The idReg1 and idReg2 fields hold the first and second register
+ operand(s), whenever these are present. Note that the size of
+ these fields ranges from 3 to 6 bits, and care needs to be taken
+ to make sure all of these fields stay reasonably packed.
+ */
+
+ void idSetRelocFlags(emitAttr attr)
+ {
+ _idCnsReloc = (EA_IS_CNS_RELOC(attr) ? 1 : 0);
+ _idDspReloc = (EA_IS_DSP_RELOC(attr) ? 1 : 0);
+ }
+
+ ////////////////////////////////////////////////////////////////////////
+ // Space taken up to here:
+ // x86: 16 bits
+ // amd64: 17 bits
+ // arm: 16 bits
+ // arm64: 16 bits
+
+ private:
+#ifdef _TARGET_XARCH_
+ unsigned _idCodeSize : 4; // size of instruction in bytes
+#endif
+
+#if defined(_TARGET_XARCH_) && !defined(LEGACY_BACKEND)
+ opSize _idOpSize : 3; // operand size: 0=1 , 1=2 , 2=4 , 3=8, 4=16, 5=32
+ // At this point we have fully consumed first DWORD so that next field
+ // doesn't cross a byte boundary.
+#elif defined(_TARGET_ARM64_)
+// Moved the definition of '_idOpSize' later so that we don't cross a 32-bit boundary when laying out bitfields
+#else // ARM or x86-LEGACY_BACKEND
+ opSize _idOpSize : 2; // operand size: 0=1 , 1=2 , 2=4 , 3=8
+#endif // ARM or x86-LEGACY_BACKEND
+
+ // On Amd64, this is where the second DWORD begins
+ // On System V a call could return a struct in 2 registers. The instrDescCGCA struct below has member that
+ // stores the GC-ness of the second register.
+ // It is added to the instrDescCGCA and not here (the base struct) since it is not needed by all the
+ // instructions. This struct (instrDesc) is very carefully kept to be no more than 128 bytes. There is no more
+ // space to add members for keeping GC-ness of the second return registers. It will also bloat the base struct
+ // unnecessarily since the GC-ness of the second register is only needed for call instructions.
+ // The instrDescCGCA struct's member keeping the GC-ness of the first return register is _idcSecondRetRegGCType.
+ GCtype _idGCref : 2; // GCref operand? (value is a "GCtype")
+
+ // Note that we use the _idReg1 and _idReg2 fields to hold
+ // the live gcrefReg mask for the call instructions on x86/x64
+ //
+ regNumber _idReg1 : REGNUM_BITS; // register num
+
+ regNumber _idReg2 : REGNUM_BITS;
+
+ ////////////////////////////////////////////////////////////////////////
+ // Space taken up to here:
+ // x86: 30 bits
+ // amd64: 38 bits
+ // arm: 32 bits
+ // arm64: 30 bits
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if HAS_TINY_DESC
+ //
+ // For x86 use last two bits to differentiate if we are tiny or small
+ //
+ unsigned _idTinyDsc : 1; // is this a "tiny" descriptor?
+ unsigned _idSmallDsc : 1; // is this a "small" descriptor?
+
+#else // !HAS_TINY_DESC
+
+ //
+ // On x86/arm platforms we have used 32 bits so far (4 bytes)
+ // On amd64 we have used 38 bits so far (4 bytes + 6 bits)
+ //
+
+ //
+ // For amd64 we just can't fit anything useful into a single DWORD
+ // So we eliminate the notion of 'tiny', and have small (2 DWORDS)
+ // or not small (which is bigger, just like x86)
+ //
+
+ unsigned _idSmallDsc : 1; // is this a "small" descriptor?
+ unsigned _idLargeCns : 1; // does a large constant follow?
+ unsigned _idLargeDsp : 1; // does a large displacement follow?
+ unsigned _idLargeCall : 1; // large call descriptor used
+
+ unsigned _idBound : 1; // jump target / frame offset bound
+ unsigned _idCallRegPtr : 1; // IL indirect calls: addr in reg
+ unsigned _idCallAddr : 1; // IL indirect calls: can make a direct call to iiaAddr
+ unsigned _idNoGC : 1; // Some helpers don't get recorded in GC tables
+
+#ifdef _TARGET_ARM64_
+ opSize _idOpSize : 3; // operand size: 0=1 , 1=2 , 2=4 , 3=8, 4=16
+ insOpts _idInsOpt : 6; // options for instructions
+ unsigned _idLclVar : 1; // access a local on stack
+#endif
+
+#ifdef _TARGET_ARM_
+ insSize _idInsSize : 2; // size of instruction: 16, 32 or 48 bits
+ insFlags _idInsFlags : 1; // will this instruction set the flags
+ unsigned _idLclVar : 1; // access a local on stack
+ unsigned _idLclFPBase : 1; // access a local on stack - SP based offset
+ insOpts _idInsOpt : 3; // options for Load/Store instructions
+
+// For arm we have used 16 bits
+#define ID_EXTRA_BITFIELD_BITS (16)
+
+#elif defined(_TARGET_ARM64_)
+// For Arm64, we have used 15 bits from the second DWORD.
+#define ID_EXTRA_BITFIELD_BITS (16)
+#elif defined(_TARGET_XARCH_) && !defined(LEGACY_BACKEND)
+// For xarch !LEGACY_BACKEND, we have used 14 bits from the second DWORD.
+#define ID_EXTRA_BITFIELD_BITS (14)
+#elif defined(_TARGET_X86_)
+// For x86, we have used 6 bits from the second DWORD.
+#define ID_EXTRA_BITFIELD_BITS (6)
+#else
+#error Unsupported or unset target architecture
+#endif
+
+ ////////////////////////////////////////////////////////////////////////
+ // Space taken up to here:
+ // x86: 38 bits // if HAS_TINY_DESC is not defined (which it is)
+ // amd64: 46 bits
+ // arm: 48 bits
+ // arm64: 48 bits
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef RELOC_SUPPORT
+
+ unsigned _idCnsReloc : 1; // LargeCns is an RVA and needs reloc tag
+ unsigned _idDspReloc : 1; // LargeDsp is an RVA and needs reloc tag
+
+#define ID_EXTRA_RELOC_BITS (2)
+
+#else // RELOC_SUPPORT
+
+#define ID_EXTRA_RELOC_BITS (0)
+
+#endif // RELOC_SUPPORT
+
+ ////////////////////////////////////////////////////////////////////////
+ // Space taken up to here (assuming RELOC_SUPPORT):
+ // x86: 40 bits
+ // amd64: 48 bits
+ // arm: 50 bits
+ // arm64: 50 bits
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#define ID_EXTRA_BITS (ID_EXTRA_RELOC_BITS + ID_EXTRA_BITFIELD_BITS)
+
+/* Use whatever bits are left over for small constants */
+
+#define ID_BIT_SMALL_CNS (32 - ID_EXTRA_BITS)
+#define ID_MIN_SMALL_CNS 0
+#define ID_MAX_SMALL_CNS (int)((1 << ID_BIT_SMALL_CNS) - 1U)
+
+ ////////////////////////////////////////////////////////////////////////
+ // Small constant size (assuming RELOC_SUPPORT):
+ // x86: 24 bits
+ // amd64: 16 bits
+ // arm: 14 bits
+ // arm64: 14 bits
+
+ unsigned _idSmallCns : ID_BIT_SMALL_CNS;
+
+ ////////////////////////////////////////////////////////////////////////
+ // Space taken up to here (with RELOC_SUPPORT): 64 bits, all architectures, by design.
+ ////////////////////////////////////////////////////////////////////////
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#endif // !HAS_TINY_DESC
+
+#if defined(DEBUG) || defined(LATE_DISASM)
+
+ instrDescDebugInfo* _idDebugOnlyInfo;
+
+ public:
+ instrDescDebugInfo* idDebugOnlyInfo() const
+ {
+ return _idDebugOnlyInfo;
+ }
+ void idDebugOnlyInfo(instrDescDebugInfo* info)
+ {
+ _idDebugOnlyInfo = info;
+ }
+
+ private:
+#endif // defined(DEBUG) || defined(LATE_DISASM)
+
+ //
+ // This is the end of the smallest instrDesc we can allocate for all
+ // platforms.
+ // Non-DEBUG sizes:
+ // x86: 32 bits, and it is called the 'tiny' descriptor.
+ // amd64/arm/arm64: 64 bits, and it is called the 'small' descriptor.
+ // DEBUG sizes (includes one pointer):
+ // x86: 2 DWORDs, 64 bits
+ // amd64: 4 DWORDs, 128 bits
+ // arm: 3 DWORDs, 96 bits
+ // arm64: 4 DWORDs, 128 bits
+ // There should no padding or alignment issues on any platform or
+ // configuration (including DEBUG which has 1 extra pointer).
+ //
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if HAS_TINY_DESC
+
+ unsigned _idLargeCns : 1; // does a large constant follow?
+ unsigned _idLargeDsp : 1; // does a large displacement follow?
+ unsigned _idLargeCall : 1; // large call descriptor used
+ unsigned _idBound : 1; // jump target / frame offset bound
+
+ unsigned _idCallRegPtr : 1; // IL indirect calls: addr in reg
+ unsigned _idCallAddr : 1; // IL indirect calls: can make a direct call to iiaAddr
+ unsigned _idNoGC : 1; // Some helpers don't get recorded in GC tables
+
+#define ID_EXTRA_BITFIELD_BITS (7)
+
+//
+// For x86, we are using 7 bits from the second DWORD for bitfields.
+//
+
+#ifdef RELOC_SUPPORT
+
+ unsigned _idCnsReloc : 1; // LargeCns is an RVA and needs reloc tag
+ unsigned _idDspReloc : 1; // LargeDsp is an RVA and needs reloc tag
+
+#define ID_EXTRA_RELOC_BITS (2)
+
+#else // RELOC_SUPPORT
+
+#define ID_EXTRA_RELOC_BITS (0)
+
+#endif // RELOC_SUPPORT
+
+#define ID_EXTRA_REG_BITS (0)
+
+#define ID_EXTRA_BITS (ID_EXTRA_BITFIELD_BITS + ID_EXTRA_RELOC_BITS + ID_EXTRA_REG_BITS)
+
+/* Use whatever bits are left over for small constants */
+
+#define ID_BIT_SMALL_CNS (32 - ID_EXTRA_BITS)
+#define ID_MIN_SMALL_CNS 0
+#define ID_MAX_SMALL_CNS (int)((1 << ID_BIT_SMALL_CNS) - 1U)
+
+ // For x86 (assuming RELOC_SUPPORT) we have 23 bits remaining for the
+ // small constant in this extra DWORD.
+
+ unsigned _idSmallCns : ID_BIT_SMALL_CNS;
+
+#endif // HAS_TINY_DESC
+
+//
+// This is the end of the 'small' instrDesc which is the same on all
+// platforms (except 64-bit DEBUG which is a little bigger).
+// Non-DEBUG sizes:
+// x86/amd64/arm/arm64: 64 bits
+// DEBUG sizes (includes one pointer):
+// x86: 2 DWORDs, 64 bits
+// amd64: 4 DWORDs, 128 bits
+// arm: 3 DWORDs, 96 bits
+// arm64: 4 DWORDs, 128 bits
+// There should no padding or alignment issues on any platform or
+// configuration (including DEBUG which has 1 extra pointer).
+//
+
+/*
+ If you add lots more fields that need to be cleared (such
+ as various flags), you might need to update the body of
+ emitter::emitAllocInstr() to clear them.
+ */
+
+#if defined(DEBUG) || defined(LATE_DISASM)
+#define TINY_IDSC_DEBUG_EXTRA (sizeof(void*))
+#else
+#define TINY_IDSC_DEBUG_EXTRA (0)
+#endif
+
+#if HAS_TINY_DESC
+#define TINY_IDSC_SIZE (4 + TINY_IDSC_DEBUG_EXTRA)
+#define SMALL_IDSC_SIZE (8 + TINY_IDSC_DEBUG_EXTRA)
+#else
+#define TINY_IDSC_SIZE (8 + TINY_IDSC_DEBUG_EXTRA)
+#define SMALL_IDSC_SIZE TINY_IDSC_SIZE
+#endif
+
+ void checkSizes();
+
+ union idAddrUnion {
+ // TODO-Cleanup: We should really add a DEBUG-only tag to this union so we can add asserts
+ // about reading what we think is here, to avoid unexpected corruption issues.
+
+ emitLclVarAddr iiaLclVar;
+ BasicBlock* iiaBBlabel;
+ insGroup* iiaIGlabel;
+ BYTE* iiaAddr;
+ emitAddrMode iiaAddrMode;
+
+ CORINFO_FIELD_HANDLE iiaFieldHnd; // iiaFieldHandle is also used to encode
+ // an offset into the JIT data constant area
+ bool iiaIsJitDataOffset() const;
+ int iiaGetJitDataOffset() const;
+
+#ifdef _TARGET_ARMARCH_
+
+ // iiaEncodedInstrCount and its accessor functions are used to specify an instruction
+ // count for jumps, instead of using a label and multiple blocks. This is used in the
+ // prolog as well as for IF_LARGEJMP pseudo-branch instructions.
+ int iiaEncodedInstrCount;
+
+ bool iiaHasInstrCount() const
+ {
+ return (iiaEncodedInstrCount & iaut_MASK) == iaut_INST_COUNT;
+ }
+ int iiaGetInstrCount() const
+ {
+ assert(iiaHasInstrCount());
+ return (iiaEncodedInstrCount >> iaut_SHIFT);
+ }
+ void iiaSetInstrCount(int count)
+ {
+ assert(abs(count) < 10);
+ iiaEncodedInstrCount = (count << iaut_SHIFT) | iaut_INST_COUNT;
+ }
+
+ struct
+ {
+ regNumber _idReg3 : REGNUM_BITS;
+ regNumber _idReg4 : REGNUM_BITS;
+#ifdef _TARGET_ARM64_
+ unsigned _idReg3Scaled : 1; // Reg3 is scaled by idOpSize bits
+#endif
+ };
+#elif defined(_TARGET_XARCH_) && !defined(LEGACY_BACKEND)
+ struct
+ {
+ regNumber _idReg3 : REGNUM_BITS;
+ };
+#endif // defined(_TARGET_XARCH_) && !defined(LEGACY_BACKEND)
+
+ } _idAddrUnion;
+
+ /* Trivial wrappers to return properly typed enums */
+ public:
+#if HAS_TINY_DESC
+
+ bool idIsTiny() const
+ {
+ return (_idTinyDsc != 0);
+ }
+ void idSetIsTiny()
+ {
+ _idTinyDsc = 1;
+ }
+
+#else
+
+ bool idIsTiny() const
+ {
+ return false;
+ }
+ void idSetIsTiny()
+ {
+ _idSmallDsc = 1;
+ }
+
+#endif // HAS_TINY_DESC
+
+ bool idIsSmallDsc() const
+ {
+ return (_idSmallDsc != 0);
+ }
+ void idSetIsSmallDsc()
+ {
+ _idSmallDsc = 1;
+ }
+
+#if defined(_TARGET_XARCH_)
+
+ unsigned idCodeSize() const
+ {
+ return _idCodeSize;
+ }
+ void idCodeSize(unsigned sz)
+ {
+ _idCodeSize = sz;
+ assert(sz == _idCodeSize);
+ }
+
+#elif defined(_TARGET_ARM64_)
+ unsigned idCodeSize() const
+ {
+ int size = 4;
+ switch (idInsFmt())
+ {
+ case IF_LARGEADR:
+ // adrp + add
+ case IF_LARGEJMP:
+ // b<cond> + b<uncond>
+ size = 8;
+ break;
+ case IF_LARGELDC:
+ if (isVectorRegister(idReg1()))
+ {
+ // adrp + ldr + fmov
+ size = 12;
+ }
+ else
+ {
+ // adrp + ldr
+ size = 8;
+ }
+ break;
+ default:
+ break;
+ }
+
+ return size;
+ }
+
+#elif defined(_TARGET_ARM_)
+
+ bool idInstrIsT1() const
+ {
+ return (_idInsSize == ISZ_16BIT);
+ }
+ unsigned idCodeSize() const
+ {
+ unsigned result = (_idInsSize == ISZ_16BIT) ? 2 : (_idInsSize == ISZ_32BIT) ? 4 : 6;
+ return result;
+ }
+ insSize idInsSize() const
+ {
+ return _idInsSize;
+ }
+ void idInsSize(insSize isz)
+ {
+ _idInsSize = isz;
+ assert(isz == _idInsSize);
+ }
+ insFlags idInsFlags() const
+ {
+ return _idInsFlags;
+ }
+ void idInsFlags(insFlags sf)
+ {
+ _idInsFlags = sf;
+ assert(sf == _idInsFlags);
+ }
+#endif // _TARGET_ARM_
+
+ emitAttr idOpSize()
+ {
+ return emitDecodeSize(_idOpSize);
+ }
+ void idOpSize(emitAttr opsz)
+ {
+ _idOpSize = emitEncodeSize(opsz);
+ }
+
+ GCtype idGCref() const
+ {
+ return (GCtype)_idGCref;
+ }
+ void idGCref(GCtype gctype)
+ {
+ _idGCref = gctype;
+ }
+
+ regNumber idReg1() const
+ {
+ return _idReg1;
+ }
+ void idReg1(regNumber reg)
+ {
+ _idReg1 = reg;
+ assert(reg == _idReg1);
+ }
+
+ regNumber idReg2() const
+ {
+ return _idReg2;
+ }
+ void idReg2(regNumber reg)
+ {
+ _idReg2 = reg;
+ assert(reg == _idReg2);
+ }
+
+#if defined(_TARGET_XARCH_) && !defined(LEGACY_BACKEND)
+ regNumber idReg3() const
+ {
+ assert(!idIsTiny());
+ assert(!idIsSmallDsc());
+ return idAddr()->_idReg3;
+ }
+ void idReg3(regNumber reg)
+ {
+ assert(!idIsTiny());
+ assert(!idIsSmallDsc());
+ idAddr()->_idReg3 = reg;
+ assert(reg == idAddr()->_idReg3);
+ }
+#endif // defined(_TARGET_XARCH_) && !defined(LEGACY_BACKEND)
+#ifdef _TARGET_ARMARCH_
+ insOpts idInsOpt() const
+ {
+ return (insOpts)_idInsOpt;
+ }
+ void idInsOpt(insOpts opt)
+ {
+ _idInsOpt = opt;
+ assert(opt == _idInsOpt);
+ }
+
+ regNumber idReg3() const
+ {
+ assert(!idIsTiny());
+ assert(!idIsSmallDsc());
+ return idAddr()->_idReg3;
+ }
+ void idReg3(regNumber reg)
+ {
+ assert(!idIsTiny());
+ assert(!idIsSmallDsc());
+ idAddr()->_idReg3 = reg;
+ assert(reg == idAddr()->_idReg3);
+ }
+ regNumber idReg4() const
+ {
+ assert(!idIsTiny());
+ assert(!idIsSmallDsc());
+ return idAddr()->_idReg4;
+ }
+ void idReg4(regNumber reg)
+ {
+ assert(!idIsTiny());
+ assert(!idIsSmallDsc());
+ idAddr()->_idReg4 = reg;
+ assert(reg == idAddr()->_idReg4);
+ }
+#ifdef _TARGET_ARM64_
+ bool idReg3Scaled() const
+ {
+ assert(!idIsTiny());
+ assert(!idIsSmallDsc());
+ return (idAddr()->_idReg3Scaled == 1);
+ }
+ void idReg3Scaled(bool val)
+ {
+ assert(!idIsTiny());
+ assert(!idIsSmallDsc());
+ idAddr()->_idReg3Scaled = val ? 1 : 0;
+ }
+#endif // _TARGET_ARM64_
+
+#endif // _TARGET_ARMARCH_
+
+ inline static bool fitsInSmallCns(ssize_t val)
+ {
+ return ((val >= ID_MIN_SMALL_CNS) && (val <= ID_MAX_SMALL_CNS));
+ }
+
+ bool idIsLargeCns() const
+ {
+ assert(!idIsTiny());
+ return _idLargeCns != 0;
+ }
+ void idSetIsLargeCns()
+ {
+ assert(!idIsTiny());
+ _idLargeCns = 1;
+ }
+
+ bool idIsLargeDsp() const
+ {
+ assert(!idIsTiny());
+ return _idLargeDsp != 0;
+ }
+ void idSetIsLargeDsp()
+ {
+ assert(!idIsTiny());
+ _idLargeDsp = 1;
+ }
+ void idSetIsSmallDsp()
+ {
+ assert(!idIsTiny());
+ _idLargeDsp = 0;
+ }
+
+ bool idIsLargeCall() const
+ {
+ assert(!idIsTiny());
+ return _idLargeCall != 0;
+ }
+ void idSetIsLargeCall()
+ {
+ assert(!idIsTiny());
+ _idLargeCall = 1;
+ }
+
+ bool idIsBound() const
+ {
+ assert(!idIsTiny());
+ return _idBound != 0;
+ }
+ void idSetIsBound()
+ {
+ assert(!idIsTiny());
+ _idBound = 1;
+ }
+
+ bool idIsCallRegPtr() const
+ {
+ assert(!idIsTiny());
+ return _idCallRegPtr != 0;
+ }
+ void idSetIsCallRegPtr()
+ {
+ assert(!idIsTiny());
+ _idCallRegPtr = 1;
+ }
+
+ bool idIsCallAddr() const
+ {
+ assert(!idIsTiny());
+ return _idCallAddr != 0;
+ }
+ void idSetIsCallAddr()
+ {
+ assert(!idIsTiny());
+ _idCallAddr = 1;
+ }
+
+ // Only call instructions that call helper functions may be marked as "IsNoGC", indicating
+ // that a thread executing such a call cannot be stopped for GC. Thus, in partially-interruptible
+ // code, it is not necessary to generate GC info for a call so labeled.
+ bool idIsNoGC() const
+ {
+ assert(!idIsTiny());
+ return _idNoGC != 0;
+ }
+ void idSetIsNoGC(bool val)
+ {
+ assert(!idIsTiny());
+ _idNoGC = val;
+ }
+
+#ifdef _TARGET_ARMARCH_
+ bool idIsLclVar() const
+ {
+ return !idIsTiny() && _idLclVar != 0;
+ }
+ void idSetIsLclVar()
+ {
+ assert(!idIsTiny());
+ _idLclVar = 1;
+ }
+#endif // _TARGET_ARMARCH_
+
+#if defined(_TARGET_ARM_)
+ bool idIsLclFPBase() const
+ {
+ return !idIsTiny() && _idLclFPBase != 0;
+ }
+ void idSetIsLclFPBase()
+ {
+ assert(!idIsTiny());
+ _idLclFPBase = 1;
+ }
+#endif // defined(_TARGET_ARM_)
+
+#ifdef RELOC_SUPPORT
+
+ bool idIsCnsReloc() const
+ {
+ assert(!idIsTiny());
+ return _idCnsReloc != 0;
+ }
+ void idSetIsCnsReloc()
+ {
+ assert(!idIsTiny());
+ _idCnsReloc = 1;
+ }
+
+ bool idIsDspReloc() const
+ {
+ assert(!idIsTiny());
+ return _idDspReloc != 0;
+ }
+ void idSetIsDspReloc(bool val = true)
+ {
+ assert(!idIsTiny());
+ _idDspReloc = val;
+ }
+ bool idIsReloc()
+ {
+ return idIsDspReloc() || idIsCnsReloc();
+ }
+
+#endif
+
+ unsigned idSmallCns() const
+ {
+ assert(!idIsTiny());
+ return _idSmallCns;
+ }
+ void idSmallCns(size_t value)
+ {
+ assert(!idIsTiny());
+ assert(fitsInSmallCns(value));
+ _idSmallCns = value;
+ }
+
+ inline const idAddrUnion* idAddr() const
+ {
+ assert(!idIsSmallDsc() && !idIsTiny());
+ return &this->_idAddrUnion;
+ }
+
+ inline idAddrUnion* idAddr()
+ {
+ assert(!idIsSmallDsc() && !idIsTiny());
+ return &this->_idAddrUnion;
+ }
+ }; // End of struct instrDesc
+
+ void dispIns(instrDesc* id);
+
+ void appendToCurIG(instrDesc* id);
+
+ /********************************************************************************************/
+
+ struct instrDescJmp : instrDesc
+ {
+ instrDescJmp* idjNext; // next jump in the group/method
+ insGroup* idjIG; // containing group
+
+ union {
+ BYTE* idjAddr; // address of jump ins (for patching)
+ } idjTemp;
+
+ unsigned idjOffs : 30; // Before jump emission, this is the byte offset within IG of the jump instruction.
+ // After emission, for forward jumps, this is the target offset -- in bytes from the
+ // beginning of the function -- of the target instruction of the jump, used to
+ // determine if this jump needs to be patched.
+ unsigned idjShort : 1; // is the jump known to be a short one?
+ unsigned idjKeepLong : 1; // should the jump be kept long? (used for
+ // hot to cold and cold to hot jumps)
+ };
+
+#if !defined(_TARGET_ARM64_) // This shouldn't be needed for ARM32, either, but I don't want to touch the ARM32 JIT.
+ struct instrDescLbl : instrDescJmp
+ {
+ emitLclVarAddr dstLclVar;
+ };
+#endif // !_TARGET_ARM64_
+
+ struct instrDescCns : instrDesc // large const
+ {
+ ssize_t idcCnsVal;
+ };
+
+ struct instrDescDsp : instrDesc // large displacement
+ {
+ ssize_t iddDspVal;
+ };
+
+ struct instrDescCnsDsp : instrDesc // large cons + disp
+ {
+ ssize_t iddcCnsVal;
+ int iddcDspVal;
+ };
+
+ struct instrDescAmd : instrDesc // large addrmode disp
+ {
+ ssize_t idaAmdVal;
+ };
+
+ struct instrDescCnsAmd : instrDesc // large cons + addrmode disp
+ {
+ ssize_t idacCnsVal;
+ ssize_t idacAmdVal;
+ };
+
+ struct instrDescCGCA : instrDesc // call with ...
+ {
+ VARSET_TP idcGCvars; // ... updated GC vars or
+ ssize_t idcDisp; // ... big addrmode disp
+ regMaskTP idcGcrefRegs; // ... gcref registers
+ regMaskTP idcByrefRegs; // ... byref registers
+ unsigned idcArgCnt; // ... lots of args or (<0 ==> caller pops args)
+
+#if MULTIREG_HAS_SECOND_GC_RET
+ // This method handle the GC-ness of the second register in a 2 register returned struct on System V.
+ GCtype idSecondGCref() const
+ {
+ return (GCtype)_idcSecondRetRegGCType;
+ }
+ void idSecondGCref(GCtype gctype)
+ {
+ _idcSecondRetRegGCType = gctype;
+ }
+
+ private:
+ // This member stores the GC-ness of the second register in a 2 register returned struct on System V.
+ // It is added to the call struct since it is not needed by the base instrDesc struct, which keeps GC-ness
+ // of the first register for the instCall nodes.
+ // The base instrDesc is very carefully kept to be no more than 128 bytes. There is no more space to add members
+ // for keeping GC-ness of the second return registers. It will also bloat the base struct unnecessarily
+ // since the GC-ness of the second register is only needed for call instructions.
+ // The base struct's member keeping the GC-ness of the first return register is _idGCref.
+ GCtype _idcSecondRetRegGCType : 2; // ... GC type for the second return register.
+#endif // MULTIREG_HAS_SECOND_GC_RET
+ };
+
+ struct instrDescArmFP : instrDesc
+ {
+ regNumber r1;
+ regNumber r2;
+ regNumber r3;
+ };
+
+ insUpdateModes emitInsUpdateMode(instruction ins);
+ insFormat emitInsModeFormat(instruction ins, insFormat base);
+
+ static const BYTE emitInsModeFmtTab[];
+#ifdef DEBUG
+ static const unsigned emitInsModeFmtCnt;
+#endif
+
+ size_t emitGetInstrDescSize(const instrDesc* id);
+ size_t emitGetInstrDescSizeSC(const instrDesc* id);
+
+ ssize_t emitGetInsCns(instrDesc* id);
+ ssize_t emitGetInsDsp(instrDesc* id);
+ ssize_t emitGetInsAmd(instrDesc* id);
+ ssize_t emitGetInsCnsDsp(instrDesc* id, ssize_t* dspPtr);
+ ssize_t emitGetInsSC(instrDesc* id);
+ ssize_t emitGetInsCIdisp(instrDesc* id);
+ unsigned emitGetInsCIargs(instrDesc* id);
+
+ // Return the argument count for a direct call "id".
+ int emitGetInsCDinfo(instrDesc* id);
+
+ unsigned emitInsCount;
+
+/************************************************************************/
+/* A few routines used for debug display purposes */
+/************************************************************************/
+
+#if defined(DEBUG) || EMITTER_STATS
+
+ static const char* emitIfName(unsigned f);
+
+#endif // defined(DEBUG) || EMITTER_STATS
+
+#ifdef DEBUG
+
+ unsigned emitVarRefOffs;
+
+ const char* emitRegName(regNumber reg, emitAttr size = EA_PTRSIZE, bool varName = true);
+ const char* emitFloatRegName(regNumber reg, emitAttr size = EA_PTRSIZE, bool varName = true);
+
+ const char* emitFldName(CORINFO_FIELD_HANDLE fieldVal);
+ const char* emitFncName(CORINFO_METHOD_HANDLE callVal);
+
+ void emitDispIGflags(unsigned flags);
+ void emitDispIG(insGroup* ig, insGroup* igPrev = nullptr, bool verbose = false);
+ void emitDispIGlist(bool verbose = false);
+ void emitDispGCinfo();
+ void emitDispClsVar(CORINFO_FIELD_HANDLE fldHnd, ssize_t offs, bool reloc = false);
+ void emitDispFrameRef(int varx, int disp, int offs, bool asmfm);
+ void emitDispInsOffs(unsigned offs, bool doffs);
+ void emitDispInsHex(BYTE* code, size_t sz);
+
+#else // !DEBUG
+#define emitVarRefOffs 0
+#endif // !DEBUG
+
+ /************************************************************************/
+ /* Method prolog and epilog */
+ /************************************************************************/
+
+ unsigned emitPrologEndPos;
+
+ unsigned emitEpilogCnt;
+ UNATIVE_OFFSET emitEpilogSize;
+
+#ifdef _TARGET_XARCH_
+
+ void emitStartExitSeq(); // Mark the start of the "return" sequence
+ emitLocation emitExitSeqBegLoc;
+ UNATIVE_OFFSET emitExitSeqSize; // minimum size of any return sequence - the 'ret' after the epilog
+
+#endif // _TARGET_XARCH_
+
+ insGroup* emitPlaceholderList; // per method placeholder list - head
+ insGroup* emitPlaceholderLast; // per method placeholder list - tail
+
+#ifdef JIT32_GCENCODER
+
+ // The x86 GC encoder needs to iterate over a list of epilogs to generate a table of
+ // epilog offsets. Epilogs always start at the beginning of an IG, so save the first
+ // IG of the epilog, and use it to find the epilog offset at the end of code generation.
+ struct EpilogList
+ {
+ EpilogList* elNext;
+ insGroup* elIG;
+ };
+
+ EpilogList* emitEpilogList; // per method epilog list - head
+ EpilogList* emitEpilogLast; // per method epilog list - tail
+
+public:
+ bool emitHasEpilogEnd();
+
+ size_t emitGenEpilogLst(size_t (*fp)(void*, unsigned), void* cp);
+
+#endif // JIT32_GCENCODER
+
+ void emitBegPrologEpilog(insGroup* igPh);
+ void emitEndPrologEpilog();
+
+ emitLocation emitEpilogBegLoc;
+
+ void emitBegFnEpilog(insGroup* igPh);
+ void emitEndFnEpilog();
+
+#if FEATURE_EH_FUNCLETS
+
+ void emitBegFuncletProlog(insGroup* igPh);
+ void emitEndFuncletProlog();
+
+ void emitBegFuncletEpilog(insGroup* igPh);
+ void emitEndFuncletEpilog();
+
+#endif // FEATURE_EH_FUNCLETS
+
+/************************************************************************/
+/* Members and methods used in PDB translation */
+/************************************************************************/
+
+#ifdef TRANSLATE_PDB
+
+ inline void SetIDSource(instrDesc* pID);
+ void MapCode(int ilOffset, BYTE* imgDest);
+ void MapFunc(int imgOff,
+ int procLen,
+ int dbgStart,
+ int dbgEnd,
+ short frameReg,
+ int stkAdjust,
+ int lvaCount,
+ OptJit::LclVarDsc* lvaTable,
+ bool framePtr);
+
+private:
+ int emitInstrDescILBase; // code offset of IL that produced this instruction desctriptor
+ int emitInstrDescILBase; // code offset of IL that produced this instruction desctriptor
+ static AddrMap* emitPDBOffsetTable; // translation table for mapping IL addresses to native addresses
+ static LocalMap* emitPDBLocalTable; // local symbol translation table
+ static bool emitIsPDBEnabled; // flag to disable PDB translation code when a PDB is not found
+ static BYTE* emitILBaseOfCode; // start of IL .text section
+ static BYTE* emitILMethodBase; // beginning of IL method (start of header)
+ static BYTE* emitILMethodStart; // beginning of IL method code (right after the header)
+ static BYTE* emitImgBaseOfCode; // start of the image .text section
+
+#endif
+
+ /************************************************************************/
+ /* Methods to record a code position and later convert to offset */
+ /************************************************************************/
+
+ unsigned emitFindInsNum(insGroup* ig, instrDesc* id);
+ UNATIVE_OFFSET emitFindOffset(insGroup* ig, unsigned insNum);
+
+/************************************************************************/
+/* Members and methods used to issue (encode) instructions. */
+/************************************************************************/
+
+#ifdef DEBUG
+ // If we have started issuing instructions from the list of instrDesc, this is set
+ bool emitIssuing;
+#endif
+
+ BYTE* emitCodeBlock; // Hot code block
+ BYTE* emitColdCodeBlock; // Cold code block
+ BYTE* emitConsBlock; // Read-only (constant) data block
+
+ UNATIVE_OFFSET emitTotalHotCodeSize;
+ UNATIVE_OFFSET emitTotalColdCodeSize;
+
+ UNATIVE_OFFSET emitCurCodeOffs(BYTE* dst)
+ {
+ size_t distance;
+ if ((dst >= emitCodeBlock) && (dst <= (emitCodeBlock + emitTotalHotCodeSize)))
+ {
+ distance = (dst - emitCodeBlock);
+ }
+ else
+ {
+ assert(emitFirstColdIG);
+ assert(emitColdCodeBlock);
+ assert((dst >= emitColdCodeBlock) && (dst <= (emitColdCodeBlock + emitTotalColdCodeSize)));
+
+ distance = (dst - emitColdCodeBlock + emitTotalHotCodeSize);
+ }
+ noway_assert((UNATIVE_OFFSET)distance == distance);
+ return (UNATIVE_OFFSET)distance;
+ }
+
+ BYTE* emitOffsetToPtr(UNATIVE_OFFSET offset)
+ {
+ if (offset < emitTotalHotCodeSize)
+ {
+ return emitCodeBlock + offset;
+ }
+ else
+ {
+ assert(offset < (emitTotalHotCodeSize + emitTotalColdCodeSize));
+
+ return emitColdCodeBlock + (offset - emitTotalHotCodeSize);
+ }
+ }
+
+ BYTE* emitDataOffsetToPtr(UNATIVE_OFFSET offset)
+ {
+ assert(offset < emitDataSize());
+ return emitConsBlock + offset;
+ }
+
+ bool emitJumpCrossHotColdBoundary(size_t srcOffset, size_t dstOffset)
+ {
+ if (emitTotalColdCodeSize == 0)
+ {
+ return false;
+ }
+
+ assert(srcOffset < (emitTotalHotCodeSize + emitTotalColdCodeSize));
+ assert(dstOffset < (emitTotalHotCodeSize + emitTotalColdCodeSize));
+
+ return ((srcOffset < emitTotalHotCodeSize) != (dstOffset < emitTotalHotCodeSize));
+ }
+
+ unsigned char emitOutputByte(BYTE* dst, ssize_t val);
+ unsigned char emitOutputWord(BYTE* dst, ssize_t val);
+ unsigned char emitOutputLong(BYTE* dst, ssize_t val);
+ unsigned char emitOutputSizeT(BYTE* dst, ssize_t val);
+
+ size_t emitIssue1Instr(insGroup* ig, instrDesc* id, BYTE** dp);
+ size_t emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp);
+
+ bool emitHasFramePtr;
+
+#ifdef PSEUDORANDOM_NOP_INSERTION
+ bool emitInInstrumentation;
+#endif // PSEUDORANDOM_NOP_INSERTION
+
+ unsigned emitMaxTmpSize;
+
+#ifdef LEGACY_BACKEND
+ unsigned emitLclSize;
+ unsigned emitGrowableMaxByteOffs;
+ void emitTmpSizeChanged(unsigned tmpSize);
+#ifdef DEBUG
+ unsigned emitMaxByteOffsIdNum;
+#endif // DEBUG
+#endif // LEGACY_BACKEND
+
+#ifdef DEBUG
+ bool emitChkAlign; // perform some alignment checks
+#endif
+
+ insGroup* emitCurIG;
+
+ void emitSetShortJump(instrDescJmp* id);
+ void emitSetMediumJump(instrDescJmp* id);
+ UNATIVE_OFFSET emitSizeOfJump(instrDescJmp* jmp);
+ UNATIVE_OFFSET emitInstCodeSz(instrDesc* id);
+
+#ifndef LEGACY_BACKEND
+ CORINFO_FIELD_HANDLE emitLiteralConst(ssize_t cnsValIn, emitAttr attr = EA_8BYTE);
+ CORINFO_FIELD_HANDLE emitFltOrDblConst(GenTreeDblCon* tree, emitAttr attr = EA_UNKNOWN);
+ regNumber emitInsBinary(instruction ins, emitAttr attr, GenTree* dst, GenTree* src);
+ regNumber emitInsTernary(instruction ins, emitAttr attr, GenTree* dst, GenTree* src1, GenTree* src2);
+ void emitInsMov(instruction ins, emitAttr attr, GenTree* node);
+ insFormat emitMapFmtForIns(insFormat fmt, instruction ins);
+ insFormat emitMapFmtAtoM(insFormat fmt);
+ void emitHandleMemOp(GenTreeIndir* indir, instrDesc* id, insFormat fmt, instruction ins);
+ void spillIntArgRegsToShadowSlots();
+#endif // !LEGACY_BACKEND
+
+/************************************************************************/
+/* The logic that creates and keeps track of instruction groups */
+/************************************************************************/
+
+#ifdef _TARGET_ARM_
+// The only place where this limited instruction group size is a problem is
+// in the prolog, where we only support a single instruction group. We should really fix that.
+// ARM can require a bigger prolog instruction group. One scenario is where a
+// function uses all the incoming integer and single-precision floating-point arguments,
+// and must store them all to the frame on entry. If the frame is very large, we generate
+// ugly code like "movw r10, 0x488; add r10, sp; vstr s0, [r10]" for each store, which
+// eats up our insGroup buffer.
+#define SC_IG_BUFFER_SIZE (100 * sizeof(instrDesc) + 14 * TINY_IDSC_SIZE)
+#else // !_TARGET_ARM_
+#define SC_IG_BUFFER_SIZE (50 * sizeof(instrDesc) + 14 * TINY_IDSC_SIZE)
+#endif // !_TARGET_ARM_
+
+ size_t emitIGbuffSize;
+
+ insGroup* emitIGlist; // first instruction group
+ insGroup* emitIGlast; // last instruction group
+ insGroup* emitIGthis; // issued instruction group
+
+ insGroup* emitPrologIG; // prolog instruction group
+
+ instrDescJmp* emitJumpList; // list of local jumps in method
+ instrDescJmp* emitJumpLast; // last of local jumps in method
+ void emitJumpDistBind(); // Bind all the local jumps in method
+
+ void emitCheckFuncletBranch(instrDesc* jmp, insGroup* jmpIG); // Check for illegal branches between funclets
+
+ bool emitFwdJumps; // forward jumps present?
+ bool emitNoGCIG; // Are we generating IGF_NOGCINTERRUPT insGroups (for prologs, epilogs, etc.)
+ bool emitForceNewIG; // If we generate an instruction, and not another instruction group, force create a new emitAdd
+ // instruction group.
+
+ BYTE* emitCurIGfreeNext; // next available byte in buffer
+ BYTE* emitCurIGfreeEndp; // one byte past the last available byte in buffer
+ BYTE* emitCurIGfreeBase; // first byte address
+
+ unsigned emitCurIGinsCnt; // # of collected instr's in buffer
+ unsigned emitCurIGsize; // estimated code size of current group in bytes
+ UNATIVE_OFFSET emitCurCodeOffset; // current code offset within group
+ UNATIVE_OFFSET emitTotalCodeSize; // bytes of code in entire method
+
+ insGroup* emitFirstColdIG; // first cold instruction group
+
+ void emitSetFirstColdIGCookie(void* bbEmitCookie)
+ {
+ emitFirstColdIG = (insGroup*)bbEmitCookie;
+ }
+
+ int emitOffsAdj; // current code offset adjustment
+
+ instrDescJmp* emitCurIGjmpList; // list of jumps in current IG
+
+ // emitPrev* and emitInit* are only used during code generation, not during
+ // emission (issuing), to determine what GC values to store into an IG.
+ // Note that only the Vars ones are actually used, apparently due to bugs
+ // in that tracking. See emitSavIG(): the important use of ByrefRegs is commented
+ // out, and GCrefRegs is always saved.
+
+ VARSET_TP emitPrevGCrefVars;
+ regMaskTP emitPrevGCrefRegs;
+ regMaskTP emitPrevByrefRegs;
+
+ VARSET_TP emitInitGCrefVars;
+ regMaskTP emitInitGCrefRegs;
+ regMaskTP emitInitByrefRegs;
+
+ // If this is set, we ignore comparing emitPrev* and emitInit* to determine
+ // whether to save GC state (to save space in the IG), and always save it.
+
+ bool emitForceStoreGCState;
+
+ // emitThis* variables are used during emission, to track GC updates
+ // on a per-instruction basis. During code generation, per-instruction
+ // tracking is done with variables gcVarPtrSetCur, gcRegGCrefSetCur,
+ // and gcRegByrefSetCur. However, these are also used for a slightly
+ // different purpose during code generation: to try to minimize the
+ // amount of GC data stored to an IG, by only storing deltas from what
+ // we expect to see at an IG boundary. Also, only emitThisGCrefVars is
+ // really the only one used; the others seem to be calculated, but not
+ // used due to bugs.
+
+ VARSET_TP emitThisGCrefVars;
+ regMaskTP emitThisGCrefRegs; // Current set of registers holding GC references
+ regMaskTP emitThisByrefRegs; // Current set of registers holding BYREF references
+
+ bool emitThisGCrefVset; // Is "emitThisGCrefVars" up to date?
+
+ regNumber emitSyncThisObjReg; // where is "this" enregistered for synchronized methods?
+
+#if MULTIREG_HAS_SECOND_GC_RET
+ void emitSetSecondRetRegGCType(instrDescCGCA* id, emitAttr secondRetSize);
+#endif // MULTIREG_HAS_SECOND_GC_RET
+
+ static void emitEncodeCallGCregs(regMaskTP regs, instrDesc* id);
+ static unsigned emitDecodeCallGCregs(instrDesc* id);
+
+ unsigned emitNxtIGnum;
+
+ // random nop insertion to break up nop sleds
+ unsigned emitNextNop;
+ bool emitRandomNops;
+ void emitEnableRandomNops()
+ {
+ emitRandomNops = true;
+ }
+ void emitDisableRandomNops()
+ {
+ emitRandomNops = false;
+ }
+
+ insGroup* emitAllocAndLinkIG();
+ insGroup* emitAllocIG();
+ void emitInitIG(insGroup* ig);
+ void emitInsertIGAfter(insGroup* insertAfterIG, insGroup* ig);
+
+ void emitNewIG();
+ void emitDisableGC();
+ void emitEnableGC();
+ void emitGenIG(insGroup* ig);
+ insGroup* emitSavIG(bool emitAdd = false);
+ void emitNxtIG(bool emitAdd = false);
+
+ bool emitCurIGnonEmpty()
+ {
+ return (emitCurIG && emitCurIGfreeNext > emitCurIGfreeBase);
+ }
+
+ instrDesc* emitLastIns;
+
+#ifdef DEBUG
+ void emitCheckIGoffsets();
+#endif
+
+ // Terminates any in-progress instruction group, making the current IG a new empty one.
+ // Mark this instruction group as having a label; return the the new instruction group.
+ // Sets the emitter's record of the currently live GC variables
+ // and registers. The "isFinallyTarget" parameter indicates that the current location is
+ // the start of a basic block that is returned to after a finally clause in non-exceptional execution.
+ void* emitAddLabel(VARSET_VALARG_TP GCvars, regMaskTP gcrefRegs, regMaskTP byrefRegs, BOOL isFinallyTarget = FALSE);
+
+#ifdef _TARGET_ARMARCH_
+
+ void emitGetInstrDescs(insGroup* ig, instrDesc** id, int* insCnt);
+
+ bool emitGetLocationInfo(emitLocation* emitLoc, insGroup** pig, instrDesc** pid, int* pinsRemaining = NULL);
+
+ bool emitNextID(insGroup*& ig, instrDesc*& id, int& insRemaining);
+
+ typedef void (*emitProcessInstrFunc_t)(instrDesc* id, void* context);
+
+ void emitWalkIDs(emitLocation* locFrom, emitProcessInstrFunc_t processFunc, void* context);
+
+ static void emitGenerateUnwindNop(instrDesc* id, void* context);
+
+#endif // _TARGET_ARMARCH_
+
+#if defined(_TARGET_ARM_)
+ emitter::insFormat emitInsFormat(instruction ins);
+ size_t emitInsCode(instruction ins, insFormat fmt);
+#endif
+
+#ifdef _TARGET_X86_
+ void emitMarkStackLvl(unsigned stackLevel);
+#endif
+
+ int emitNextRandomNop();
+
+ void* emitAllocInstr(size_t sz, emitAttr attr);
+
+ instrDesc* emitAllocInstr(emitAttr attr)
+ {
+ return (instrDesc*)emitAllocInstr(sizeof(instrDesc), attr);
+ }
+
+ instrDescJmp* emitAllocInstrJmp()
+ {
+ return (instrDescJmp*)emitAllocInstr(sizeof(instrDescJmp), EA_1BYTE);
+ }
+
+#if !defined(_TARGET_ARM64_)
+ instrDescLbl* emitAllocInstrLbl()
+ {
+ return (instrDescLbl*)emitAllocInstr(sizeof(instrDescLbl), EA_4BYTE);
+ }
+#endif // !_TARGET_ARM64_
+
+ instrDescCns* emitAllocInstrCns(emitAttr attr)
+ {
+ return (instrDescCns*)emitAllocInstr(sizeof(instrDescCns), attr);
+ }
+ instrDescCns* emitAllocInstrCns(emitAttr attr, int cns)
+ {
+ instrDescCns* result = (instrDescCns*)emitAllocInstr(sizeof(instrDescCns), attr);
+ result->idSetIsLargeCns();
+ result->idcCnsVal = cns;
+ return result;
+ }
+
+ instrDescDsp* emitAllocInstrDsp(emitAttr attr)
+ {
+ return (instrDescDsp*)emitAllocInstr(sizeof(instrDescDsp), attr);
+ }
+
+ instrDescCnsDsp* emitAllocInstrCnsDsp(emitAttr attr)
+ {
+ return (instrDescCnsDsp*)emitAllocInstr(sizeof(instrDescCnsDsp), attr);
+ }
+
+ instrDescAmd* emitAllocInstrAmd(emitAttr attr)
+ {
+ return (instrDescAmd*)emitAllocInstr(sizeof(instrDescAmd), attr);
+ }
+
+ instrDescCnsAmd* emitAllocInstrCnsAmd(emitAttr attr)
+ {
+ return (instrDescCnsAmd*)emitAllocInstr(sizeof(instrDescCnsAmd), attr);
+ }
+
+ instrDescCGCA* emitAllocInstrCGCA(emitAttr attr)
+ {
+ return (instrDescCGCA*)emitAllocInstr(sizeof(instrDescCGCA), attr);
+ }
+
+ instrDesc* emitNewInstrTiny(emitAttr attr);
+ instrDesc* emitNewInstrSmall(emitAttr attr);
+ instrDesc* emitNewInstr(emitAttr attr = EA_4BYTE);
+ instrDesc* emitNewInstrSC(emitAttr attr, ssize_t cns);
+ instrDesc* emitNewInstrCns(emitAttr attr, ssize_t cns);
+ instrDesc* emitNewInstrDsp(emitAttr attr, ssize_t dsp);
+ instrDesc* emitNewInstrCnsDsp(emitAttr attr, ssize_t cns, int dsp);
+ instrDescJmp* emitNewInstrJmp();
+
+#if !defined(_TARGET_ARM64_)
+ instrDescLbl* emitNewInstrLbl();
+#endif // !_TARGET_ARM64_
+
+ static const BYTE emitFmtToOps[];
+
+#ifdef DEBUG
+ static const unsigned emitFmtCount;
+#endif
+
+ bool emitIsTinyInsDsc(instrDesc* id);
+ bool emitIsScnsInsDsc(instrDesc* id);
+
+ size_t emitSizeOfInsDsc(instrDesc* id);
+
+ /************************************************************************/
+ /* The following keeps track of stack-based GC values */
+ /************************************************************************/
+
+ unsigned emitTrkVarCnt;
+ int* emitGCrFrameOffsTab; // Offsets of tracked stack ptr vars (varTrkIndex -> stkOffs)
+
+ unsigned emitGCrFrameOffsCnt; // Number of tracked stack ptr vars
+ int emitGCrFrameOffsMin; // Min offset of a tracked stack ptr var
+ int emitGCrFrameOffsMax; // Max offset of a tracked stack ptr var
+ bool emitContTrkPtrLcls; // All lcl between emitGCrFrameOffsMin/Max are only tracked stack ptr vars
+ varPtrDsc** emitGCrFrameLiveTab; // Cache of currently live varPtrs (stkOffs -> varPtrDsc)
+
+ int emitArgFrameOffsMin;
+ int emitArgFrameOffsMax;
+
+ int emitLclFrameOffsMin;
+ int emitLclFrameOffsMax;
+
+ int emitSyncThisObjOffs; // what is the offset of "this" for synchronized methods?
+
+public:
+ void emitSetFrameRangeGCRs(int offsLo, int offsHi);
+ void emitSetFrameRangeLcls(int offsLo, int offsHi);
+ void emitSetFrameRangeArgs(int offsLo, int offsHi);
+
+ static instruction emitJumpKindToIns(emitJumpKind jumpKind);
+ static emitJumpKind emitInsToJumpKind(instruction ins);
+ static emitJumpKind emitReverseJumpKind(emitJumpKind jumpKind);
+
+#ifdef _TARGET_ARM_
+ static unsigned emitJumpKindCondCode(emitJumpKind jumpKind);
+#endif
+
+#ifdef DEBUG
+ void emitInsSanityCheck(instrDesc* id);
+#endif
+
+#ifdef _TARGET_ARMARCH_
+ // Returns true if instruction "id->idIns()" writes to a register that might be used to contain a GC
+ // pointer. This exempts the SP and PC registers, and floating point registers. Memory access
+ // instructions that pre- or post-increment their memory address registers are *not* considered to write
+ // to GC registers, even if that memory address is a by-ref: such an instruction cannot change the GC
+ // status of that register, since it must be a byref before and remains one after.
+ //
+ // This may return false positives.
+ bool emitInsMayWriteToGCReg(instrDesc* id);
+
+ // Returns "true" if instruction "id->idIns()" writes to a LclVar stack location.
+ bool emitInsWritesToLclVarStackLoc(instrDesc* id);
+
+ // Returns true if the instruction may write to more than one register.
+ bool emitInsMayWriteMultipleRegs(instrDesc* id);
+#endif // _TARGET_ARMARCH_
+
+ /************************************************************************/
+ /* The following is used to distinguish helper vs non-helper calls */
+ /************************************************************************/
+
+ static bool emitNoGChelper(unsigned IHX);
+
+ /************************************************************************/
+ /* The following logic keeps track of live GC ref values */
+ /************************************************************************/
+
+ bool emitFullGCinfo; // full GC pointer maps?
+ bool emitFullyInt; // fully interruptible code?
+
+#if EMIT_TRACK_STACK_DEPTH
+ unsigned emitCntStackDepth; // 0 in prolog/epilog, One DWORD elsewhere
+ unsigned emitMaxStackDepth; // actual computed max. stack depth
+#endif
+
+ /* Stack modelling wrt GC */
+
+ bool emitSimpleStkUsed; // using the "simple" stack table?
+
+ union {
+ struct // if emitSimpleStkUsed==true
+ {
+#define BITS_IN_BYTE (8)
+#define MAX_SIMPLE_STK_DEPTH (BITS_IN_BYTE * sizeof(unsigned))
+
+ unsigned emitSimpleStkMask; // bit per pushed dword (if it fits. Lowest bit <==> last pushed arg)
+ unsigned emitSimpleByrefStkMask; // byref qualifier for emitSimpleStkMask
+ } u1;
+
+ struct // if emitSimpleStkUsed==false
+ {
+ BYTE emitArgTrackLcl[16]; // small local table to avoid malloc
+ BYTE* emitArgTrackTab; // base of the argument tracking stack
+ BYTE* emitArgTrackTop; // top of the argument tracking stack
+ USHORT emitGcArgTrackCnt; // count of pending arg records (stk-depth for frameless methods, gc ptrs on stk
+ // for framed methods)
+ } u2;
+ };
+
+ unsigned emitCurStackLvl; // amount of bytes pushed on stack
+
+#if EMIT_TRACK_STACK_DEPTH
+ /* Functions for stack tracking */
+
+ void emitStackPush(BYTE* addr, GCtype gcType);
+
+ void emitStackPushN(BYTE* addr, unsigned count);
+
+ void emitStackPop(BYTE* addr, bool isCall, unsigned char callInstrSize, unsigned count = 1);
+
+ void emitStackKillArgs(BYTE* addr, unsigned count, unsigned char callInstrSize);
+
+ void emitRecordGCcall(BYTE* codePos, unsigned char callInstrSize);
+
+ // Helpers for the above
+
+ void emitStackPushLargeStk(BYTE* addr, GCtype gcType, unsigned count = 1);
+ void emitStackPopLargeStk(BYTE* addr, bool isCall, unsigned char callInstrSize, unsigned count = 1);
+#endif // EMIT_TRACK_STACK_DEPTH
+
+ /* Liveness of stack variables, and registers */
+
+ void emitUpdateLiveGCvars(int offs, BYTE* addr, bool birth);
+ void emitUpdateLiveGCvars(VARSET_VALARG_TP vars, BYTE* addr);
+ void emitUpdateLiveGCregs(GCtype gcType, regMaskTP regs, BYTE* addr);
+
+#ifdef DEBUG
+ const char* emitGetFrameReg();
+ void emitDispRegSet(regMaskTP regs);
+ void emitDispVarSet();
+#endif
+
+ void emitGCregLiveUpd(GCtype gcType, regNumber reg, BYTE* addr);
+ void emitGCregLiveSet(GCtype gcType, regMaskTP mask, BYTE* addr, bool isThis);
+ void emitGCregDeadUpdMask(regMaskTP, BYTE* addr);
+ void emitGCregDeadUpd(regNumber reg, BYTE* addr);
+ void emitGCregDeadSet(GCtype gcType, regMaskTP mask, BYTE* addr);
+
+ void emitGCvarLiveUpd(int offs, int varNum, GCtype gcType, BYTE* addr);
+ void emitGCvarLiveSet(int offs, GCtype gcType, BYTE* addr, ssize_t disp = -1);
+ void emitGCvarDeadUpd(int offs, BYTE* addr);
+ void emitGCvarDeadSet(int offs, BYTE* addr, ssize_t disp = -1);
+
+ GCtype emitRegGCtype(regNumber reg);
+
+ // We have a mixture of code emission methods, some of which return the size of the emitted instruction,
+ // requiring the caller to add this to the current code pointer (dst += <call to emit code>), others of which
+ // return the updated code pointer (dst = <call to emit code>). Sometimes we'd like to get the size of
+ // the generated instruction for the latter style. This method accomplishes that --
+ // "emitCodeWithInstructionSize(dst, <call to emitCode>, &instrSize)" will do the call, and set
+ // "*instrSize" to the after-before code pointer difference. Returns the result of the call. (And
+ // asserts that the instruction size fits in an unsigned char.)
+ static BYTE* emitCodeWithInstructionSize(BYTE* codePtrBefore, BYTE* newCodePointer, unsigned char* instrSize);
+
+ /************************************************************************/
+ /* The following logic keeps track of initialized data sections */
+ /************************************************************************/
+
+ /* One of these is allocated for every blob of initialized data */
+
+ struct dataSection
+ {
+ enum sectionType
+ {
+ data,
+ blockAbsoluteAddr,
+ blockRelative32
+ };
+
+ dataSection* dsNext;
+ UNATIVE_OFFSET dsSize;
+ sectionType dsType;
+ // variable-sized array used to store the constant data
+ // or BasicBlock* array in the block cases.
+ BYTE dsCont[0];
+ };
+
+ /* These describe the entire initialized/uninitialized data sections */
+
+ struct dataSecDsc
+ {
+ dataSection* dsdList;
+ dataSection* dsdLast;
+ UNATIVE_OFFSET dsdOffs;
+ };
+
+ dataSecDsc emitConsDsc;
+
+ dataSection* emitDataSecCur;
+
+ void emitOutputDataSec(dataSecDsc* sec, BYTE* dst);
+
+ /************************************************************************/
+ /* Handles to the current class and method. */
+ /************************************************************************/
+
+ COMP_HANDLE emitCmpHandle;
+
+ /************************************************************************/
+ /* Helpers for interface to EE */
+ /************************************************************************/
+
+ void emitRecordRelocation(void* location, /* IN */
+ void* target, /* IN */
+ WORD fRelocType, /* IN */
+ WORD slotNum = 0, /* IN */
+ INT32 addlDelta = 0); /* IN */
+
+ void emitRecordCallSite(ULONG instrOffset, /* IN */
+ CORINFO_SIG_INFO* callSig, /* IN */
+ CORINFO_METHOD_HANDLE methodHandle); /* IN */
+
+#ifdef DEBUG
+ // This is a scratch buffer used to minimize the number of sig info structs
+ // we have to allocate for recordCallSite.
+ CORINFO_SIG_INFO* emitScratchSigInfo;
+#endif // DEBUG
+
+/************************************************************************/
+/* Logic to collect and display statistics */
+/************************************************************************/
+
+#if EMITTER_STATS
+
+ friend void emitterStats(FILE* fout);
+ friend void emitterStaticStats(FILE* fout);
+
+ static size_t emitSizeMethod;
+
+ static unsigned emitTotalInsCnt;
+
+ static unsigned emitTotalIGcnt; // total number of insGroup allocated
+ static unsigned emitTotalPhIGcnt; // total number of insPlaceholderGroupData allocated
+ static unsigned emitTotalIGicnt;
+ static size_t emitTotalIGsize;
+ static unsigned emitTotalIGmcnt; // total method count
+ static unsigned emitTotalIGjmps;
+ static unsigned emitTotalIGptrs;
+
+ static size_t emitTotMemAlloc;
+
+ static unsigned emitSmallDspCnt;
+ static unsigned emitLargeDspCnt;
+
+ static unsigned emitSmallCnsCnt;
+#define SMALL_CNS_TSZ 256
+ static unsigned emitSmallCns[SMALL_CNS_TSZ];
+ static unsigned emitLargeCnsCnt;
+
+ static unsigned emitIFcounts[IF_COUNT];
+
+#endif // EMITTER_STATS
+
+/*************************************************************************
+ *
+ * Define any target-dependent emitter members.
+ */
+
+#include "emitdef.h"
+
+ // It would be better if this were a constructor, but that would entail revamping the allocation
+ // infrastructure of the entire JIT...
+ void Init()
+ {
+ VarSetOps::AssignNoCopy(emitComp, emitPrevGCrefVars, VarSetOps::MakeEmpty(emitComp));
+ VarSetOps::AssignNoCopy(emitComp, emitInitGCrefVars, VarSetOps::MakeEmpty(emitComp));
+ VarSetOps::AssignNoCopy(emitComp, emitThisGCrefVars, VarSetOps::MakeEmpty(emitComp));
+ }
+};
+
+/*****************************************************************************
+ *
+ * Define any target-dependent inlines.
+ */
+
+#include "emitinl.h"
+
+inline void emitter::instrDesc::checkSizes()
+{
+#ifdef DEBUG
+#if HAS_TINY_DESC
+ C_ASSERT(TINY_IDSC_SIZE == (offsetof(instrDesc, _idDebugOnlyInfo) + sizeof(instrDescDebugInfo*)));
+#else // !tiny
+ C_ASSERT(SMALL_IDSC_SIZE == (offsetof(instrDesc, _idDebugOnlyInfo) + sizeof(instrDescDebugInfo*)));
+#endif
+#endif
+ C_ASSERT(SMALL_IDSC_SIZE == offsetof(instrDesc, _idAddrUnion));
+}
+
+/*****************************************************************************
+ *
+ * Returns true if the given instruction descriptor is a "tiny" or a "small
+ * constant" one (i.e. one of the descriptors that don't have all instrDesc
+ * fields allocated).
+ */
+
+inline bool emitter::emitIsTinyInsDsc(instrDesc* id)
+{
+ return id->idIsTiny();
+}
+
+inline bool emitter::emitIsScnsInsDsc(instrDesc* id)
+{
+ return id->idIsSmallDsc();
+}
+
+/*****************************************************************************
+ *
+ * Given an instruction, return its "update mode" (RD/WR/RW).
+ */
+
+inline insUpdateModes emitter::emitInsUpdateMode(instruction ins)
+{
+#ifdef DEBUG
+ assert((unsigned)ins < emitInsModeFmtCnt);
+#endif
+ return (insUpdateModes)emitInsModeFmtTab[ins];
+}
+
+/*****************************************************************************
+ *
+ * Return the number of epilog blocks generated so far.
+ */
+
+inline unsigned emitter::emitGetEpilogCnt()
+{
+ return emitEpilogCnt;
+}
+
+/*****************************************************************************
+ *
+ * Return the current size of the specified data section.
+ */
+
+inline UNATIVE_OFFSET emitter::emitDataSize()
+{
+ return emitConsDsc.dsdOffs;
+}
+
+/*****************************************************************************
+ *
+ * Return a handle to the current position in the output stream. This can
+ * be later converted to an actual code offset in bytes.
+ */
+
+inline void* emitter::emitCurBlock()
+{
+ return emitCurIG;
+}
+
+/*****************************************************************************
+ *
+ * The emitCurOffset() method returns a cookie that identifies the current
+ * position in the instruction stream. Due to things like scheduling (and
+ * the fact that the final size of some instructions cannot be known until
+ * the end of code generation), we return a value with the instruction number
+ * and its estimated offset to the caller.
+ */
+
+inline unsigned emitGetInsNumFromCodePos(unsigned codePos)
+{
+ return (codePos & 0xFFFF);
+}
+
+inline unsigned emitGetInsOfsFromCodePos(unsigned codePos)
+{
+ return (codePos >> 16);
+}
+
+inline unsigned emitter::emitCurOffset()
+{
+ unsigned codePos = emitCurIGinsCnt + (emitCurIGsize << 16);
+
+ assert(emitGetInsOfsFromCodePos(codePos) == emitCurIGsize);
+ assert(emitGetInsNumFromCodePos(codePos) == emitCurIGinsCnt);
+
+ // printf("[IG=%02u;ID=%03u;OF=%04X] => %08X\n", emitCurIG->igNum, emitCurIGinsCnt, emitCurIGsize, codePos);
+
+ return codePos;
+}
+
+extern const unsigned short emitTypeSizes[TYP_COUNT];
+
+template <class T>
+inline emitAttr emitTypeSize(T type)
+{
+ assert(TypeGet(type) < TYP_COUNT);
+ assert(emitTypeSizes[TypeGet(type)] > 0);
+ return (emitAttr)emitTypeSizes[TypeGet(type)];
+}
+
+extern const unsigned short emitTypeActSz[TYP_COUNT];
+
+inline emitAttr emitActualTypeSize(var_types type)
+{
+ assert(type < TYP_COUNT);
+ assert(emitTypeActSz[type] > 0);
+ return (emitAttr)emitTypeActSz[type];
+}
+
+/*****************************************************************************
+ *
+ * Convert between an operand size in bytes and a smaller encoding used for
+ * storage in instruction descriptors.
+ */
+
+/* static */ inline emitter::opSize emitter::emitEncodeSize(emitAttr size)
+{
+ assert(size == EA_1BYTE || size == EA_2BYTE || size == EA_4BYTE || size == EA_8BYTE || size == EA_16BYTE ||
+ size == EA_32BYTE);
+
+ return emitSizeEncode[((int)size) - 1];
+}
+
+/* static */ inline emitAttr emitter::emitDecodeSize(emitter::opSize ensz)
+{
+ assert(((unsigned)ensz) < OPSZ_COUNT);
+
+ return emitSizeDecode[ensz];
+}
+
+/*****************************************************************************
+ *
+ * Little helpers to allocate various flavors of instructions.
+ */
+
+inline emitter::instrDesc* emitter::emitNewInstrTiny(emitAttr attr)
+{
+ instrDesc* id;
+
+ id = (instrDesc*)emitAllocInstr(TINY_IDSC_SIZE, attr);
+ id->idSetIsTiny();
+
+ return id;
+}
+
+inline emitter::instrDesc* emitter::emitNewInstrSmall(emitAttr attr)
+{
+ instrDesc* id;
+
+ // This is larger than the Tiny Descr
+ id = (instrDesc*)emitAllocInstr(SMALL_IDSC_SIZE, attr);
+ id->idSetIsSmallDsc();
+
+ return id;
+}
+
+inline emitter::instrDesc* emitter::emitNewInstr(emitAttr attr)
+{
+ // This is larger than the Small Descr
+ return emitAllocInstr(attr);
+}
+
+inline emitter::instrDescJmp* emitter::emitNewInstrJmp()
+{
+ return emitAllocInstrJmp();
+}
+
+#if !defined(_TARGET_ARM64_)
+inline emitter::instrDescLbl* emitter::emitNewInstrLbl()
+{
+ return emitAllocInstrLbl();
+}
+#endif // !_TARGET_ARM64_
+
+inline emitter::instrDesc* emitter::emitNewInstrDsp(emitAttr attr, ssize_t dsp)
+{
+ if (dsp == 0)
+ {
+ instrDesc* id = emitAllocInstr(attr);
+
+#if EMITTER_STATS
+ emitSmallDspCnt++;
+#endif
+
+ return id;
+ }
+ else
+ {
+ instrDescDsp* id = emitAllocInstrDsp(attr);
+
+ id->idSetIsLargeDsp();
+ id->iddDspVal = dsp;
+
+#if EMITTER_STATS
+ emitLargeDspCnt++;
+#endif
+
+ return id;
+ }
+}
+
+/*****************************************************************************
+ *
+ * Allocate an instruction descriptor for an instruction with a constant operand.
+ * The instruction descriptor uses the idAddrUnion to save additional info
+ * so the smallest size that this can be is sizeof(instrDesc).
+ * Note that this very similar to emitter::emitNewInstrSC(), except it never
+ * allocates a small descriptor.
+ */
+inline emitter::instrDesc* emitter::emitNewInstrCns(emitAttr attr, ssize_t cns)
+{
+ if (instrDesc::fitsInSmallCns(cns))
+ {
+ instrDesc* id = emitAllocInstr(attr);
+
+ id->idSmallCns(cns);
+
+#if EMITTER_STATS
+ emitSmallCnsCnt++;
+ if (cns - ID_MIN_SMALL_CNS >= SMALL_CNS_TSZ)
+ emitSmallCns[SMALL_CNS_TSZ - 1]++;
+ else
+ emitSmallCns[cns - ID_MIN_SMALL_CNS]++;
+#endif
+
+ return id;
+ }
+ else
+ {
+ instrDescCns* id = emitAllocInstrCns(attr);
+
+ id->idSetIsLargeCns();
+ id->idcCnsVal = cns;
+
+#if EMITTER_STATS
+ emitLargeCnsCnt++;
+#endif
+
+ return id;
+ }
+}
+
+/*****************************************************************************
+ *
+ * Get the instrDesc size, general purpose version
+ *
+ */
+
+inline size_t emitter::emitGetInstrDescSize(const instrDesc* id)
+{
+ if (id->idIsTiny())
+ {
+ return TINY_IDSC_SIZE;
+ }
+
+ if (id->idIsSmallDsc())
+ {
+ return SMALL_IDSC_SIZE;
+ }
+
+ if (id->idIsLargeCns())
+ {
+ return sizeof(instrDescCns);
+ }
+
+ return sizeof(instrDesc);
+}
+
+/*****************************************************************************
+ *
+ * Allocate an instruction descriptor for an instruction with a small integer
+ * constant operand. This is the same as emitNewInstrCns() except that here
+ * any constant that is small enough for instrDesc::fitsInSmallCns() only gets
+ * allocated SMALL_IDSC_SIZE bytes (and is thus a small descriptor, whereas
+ * emitNewInstrCns() always allocates at least sizeof(instrDesc).
+ */
+
+inline emitter::instrDesc* emitter::emitNewInstrSC(emitAttr attr, ssize_t cns)
+{
+ instrDesc* id;
+
+ if (instrDesc::fitsInSmallCns(cns))
+ {
+ id = (instrDesc*)emitAllocInstr(SMALL_IDSC_SIZE, attr);
+
+ id->idSmallCns(cns);
+ id->idSetIsSmallDsc();
+ }
+ else
+ {
+ id = (instrDesc*)emitAllocInstr(sizeof(instrDescCns), attr);
+
+ id->idSetIsLargeCns();
+ ((instrDescCns*)id)->idcCnsVal = cns;
+ }
+
+ return id;
+}
+
+/*****************************************************************************
+ *
+ * Get the instrDesc size for something that contains a constant
+ */
+
+inline size_t emitter::emitGetInstrDescSizeSC(const instrDesc* id)
+{
+ if (id->idIsSmallDsc())
+ {
+ return SMALL_IDSC_SIZE;
+ }
+ else if (id->idIsLargeCns())
+ {
+ return sizeof(instrDescCns);
+ }
+ else
+ {
+ return sizeof(instrDesc);
+ }
+}
+
+/*****************************************************************************
+ *
+ * The following helpers should be used to access the various values that
+ * get stored in different places within the instruction descriptor.
+ */
+
+inline ssize_t emitter::emitGetInsCns(instrDesc* id)
+{
+ return id->idIsLargeCns() ? ((instrDescCns*)id)->idcCnsVal : id->idSmallCns();
+}
+
+inline ssize_t emitter::emitGetInsDsp(instrDesc* id)
+{
+ if (id->idIsLargeDsp())
+ {
+ if (id->idIsLargeCns())
+ {
+ return ((instrDescCnsDsp*)id)->iddcDspVal;
+ }
+ return ((instrDescDsp*)id)->iddDspVal;
+ }
+ return 0;
+}
+
+inline ssize_t emitter::emitGetInsCnsDsp(instrDesc* id, ssize_t* dspPtr)
+{
+ if (id->idIsLargeCns())
+ {
+ if (id->idIsLargeDsp())
+ {
+ *dspPtr = ((instrDescCnsDsp*)id)->iddcDspVal;
+ return ((instrDescCnsDsp*)id)->iddcCnsVal;
+ }
+ else
+ {
+ *dspPtr = 0;
+ return ((instrDescCns*)id)->idcCnsVal;
+ }
+ }
+ else
+ {
+ if (id->idIsLargeDsp())
+ {
+ *dspPtr = ((instrDescDsp*)id)->iddDspVal;
+ return id->idSmallCns();
+ }
+ else
+ {
+ *dspPtr = 0;
+ return id->idSmallCns();
+ }
+ }
+}
+
+/*****************************************************************************
+ *
+ * Get hold of the argument count for an indirect call.
+ */
+
+inline unsigned emitter::emitGetInsCIargs(instrDesc* id)
+{
+ if (id->idIsLargeCall())
+ {
+ return ((instrDescCGCA*)id)->idcArgCnt;
+ }
+ else
+ {
+ assert(id->idIsLargeDsp() == false);
+ assert(id->idIsLargeCns() == false);
+
+ ssize_t cns = emitGetInsCns(id);
+ assert((unsigned)cns == (size_t)cns);
+ return (unsigned)cns;
+ }
+}
+
+/*****************************************************************************
+ *
+ * Returns true if the given register contains a live GC ref.
+ */
+
+inline GCtype emitter::emitRegGCtype(regNumber reg)
+{
+ assert(emitIssuing);
+
+ if ((emitThisGCrefRegs & genRegMask(reg)) != 0)
+ {
+ return GCT_GCREF;
+ }
+ else if ((emitThisByrefRegs & genRegMask(reg)) != 0)
+ {
+ return GCT_BYREF;
+ }
+ else
+ {
+ return GCT_NONE;
+ }
+}
+
+#ifdef DEBUG
+
+#if EMIT_TRACK_STACK_DEPTH
+#define CHECK_STACK_DEPTH() assert((int)emitCurStackLvl >= 0)
+#else
+#define CHECK_STACK_DEPTH()
+#endif
+
+#endif // DEBUG
+
+/*****************************************************************************
+ *
+ * Return true when a given code offset is properly aligned for the target
+ */
+
+inline bool IsCodeAligned(UNATIVE_OFFSET offset)
+{
+ return ((offset & (CODE_ALIGN - 1)) == 0);
+}
+
+// Static:
+inline BYTE* emitter::emitCodeWithInstructionSize(BYTE* codePtrBefore, BYTE* newCodePointer, unsigned char* instrSize)
+{
+ // DLD: Perhaps this method should return the instruction size, and we should do dst += <that size>
+ // as is done in other cases?
+ assert(newCodePointer >= codePtrBefore);
+ ClrSafeInt<unsigned char> callInstrSizeSafe = ClrSafeInt<unsigned char>(newCodePointer - codePtrBefore);
+ assert(!callInstrSizeSafe.IsOverflow());
+ *instrSize = callInstrSizeSafe.Value();
+ return newCodePointer;
+}
+
+/*****************************************************************************
+ *
+ * Add a new IG to the current list, and get it ready to receive code.
+ */
+
+inline void emitter::emitNewIG()
+{
+ insGroup* ig = emitAllocAndLinkIG();
+
+ /* It's linked in. Now, set it up to accept code */
+
+ emitGenIG(ig);
+}
+
+// Start a new instruction group that is not interruptable
+inline void emitter::emitDisableGC()
+{
+ emitNoGCIG = true;
+
+ if (emitCurIGnonEmpty())
+ {
+ emitNxtIG(true);
+ }
+ else
+ {
+ emitCurIG->igFlags |= IGF_NOGCINTERRUPT;
+ }
+}
+
+// Start a new instruction group that is interruptable
+inline void emitter::emitEnableGC()
+{
+ emitNoGCIG = false;
+
+ // The next time an instruction needs to be generated, force a new instruction group.
+ // It will be an emitAdd group in that case. Note that the next thing we see might be
+ // a label, which will force a non-emitAdd group.
+ //
+ // Note that we can't just create a new instruction group here, because we don't know
+ // if there are going to be any instructions added to it, and we don't support empty
+ // instruction groups.
+ emitForceNewIG = true;
+}
+
+/*****************************************************************************/
+#endif // _EMIT_H_
+/*****************************************************************************/
diff --git a/src/jit/emitarm.cpp b/src/jit/emitarm.cpp
new file mode 100644
index 0000000000..1f57048a80
--- /dev/null
+++ b/src/jit/emitarm.cpp
@@ -0,0 +1,7623 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX emitArm.cpp XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+#if defined(_TARGET_ARM_)
+
+/*****************************************************************************/
+/*****************************************************************************/
+
+#include "instr.h"
+#include "emit.h"
+#include "codegen.h"
+
+/*****************************************************************************/
+
+const instruction emitJumpKindInstructions[] = {
+ INS_nop,
+
+#define JMP_SMALL(en, rev, ins) INS_##ins,
+#include "emitjmps.h"
+};
+
+const emitJumpKind emitReverseJumpKinds[] = {
+ EJ_NONE,
+
+#define JMP_SMALL(en, rev, ins) EJ_##rev,
+#include "emitjmps.h"
+};
+
+/*****************************************************************************
+ * Look up the instruction for a jump kind
+ */
+
+/*static*/ instruction emitter::emitJumpKindToIns(emitJumpKind jumpKind)
+{
+ assert((unsigned)jumpKind < ArrLen(emitJumpKindInstructions));
+ return emitJumpKindInstructions[jumpKind];
+}
+
+/*****************************************************************************
+ * Look up the jump kind for an instruction. It better be a conditional
+ * branch instruction with a jump kind!
+ */
+
+/*static*/ emitJumpKind emitter::emitInsToJumpKind(instruction ins)
+{
+ for (unsigned i = 0; i < ArrLen(emitJumpKindInstructions); i++)
+ {
+ if (ins == emitJumpKindInstructions[i])
+ {
+ emitJumpKind ret = (emitJumpKind)i;
+ assert(EJ_NONE < ret && ret < EJ_COUNT);
+ return ret;
+ }
+ }
+ unreached();
+}
+
+/*****************************************************************************
+ * Reverse the conditional jump
+ */
+
+/*static*/ emitJumpKind emitter::emitReverseJumpKind(emitJumpKind jumpKind)
+{
+ assert(jumpKind < EJ_COUNT);
+ return emitReverseJumpKinds[jumpKind];
+}
+
+/*****************************************************************************
+ *
+ * Return the allocated size (in bytes) of the given instruction descriptor.
+ */
+
+size_t emitter::emitSizeOfInsDsc(instrDesc* id)
+{
+ assert(!emitIsTinyInsDsc(id));
+
+ if (emitIsScnsInsDsc(id))
+ return SMALL_IDSC_SIZE;
+
+ assert((unsigned)id->idInsFmt() < emitFmtCount);
+
+ ID_OPS idOp = (ID_OPS)emitFmtToOps[id->idInsFmt()];
+ bool isCallIns = (id->idIns() == INS_bl) || (id->idIns() == INS_blx);
+ bool maybeCallIns = (id->idIns() == INS_b) || (id->idIns() == INS_bx);
+
+ // An INS_call instruction may use a "fat" direct/indirect call descriptor
+ // except for a local call to a label (i.e. call to a finally).
+ // Only ID_OP_CALL and ID_OP_SPEC check for this, so we enforce that the
+ // INS_call instruction always uses one of these idOps.
+
+ assert(!isCallIns || // either not a call or
+ idOp == ID_OP_CALL || // is a direct call
+ idOp == ID_OP_SPEC || // is an indirect call
+ idOp == ID_OP_JMP); // is a local call to finally clause
+
+ switch (idOp)
+ {
+ case ID_OP_NONE:
+ break;
+
+ case ID_OP_JMP:
+ return sizeof(instrDescJmp);
+
+ case ID_OP_LBL:
+ return sizeof(instrDescLbl);
+
+ case ID_OP_CALL:
+ case ID_OP_SPEC:
+ assert(isCallIns || maybeCallIns);
+ if (id->idIsLargeCall())
+ {
+ /* Must be a "fat" indirect call descriptor */
+ return sizeof(instrDescCGCA);
+ }
+ else
+ {
+ assert(!id->idIsLargeDsp());
+ assert(!id->idIsLargeCns());
+ return sizeof(instrDesc);
+ }
+ break;
+
+ default:
+ NO_WAY("unexpected instruction descriptor format");
+ break;
+ }
+
+ if (id->idIsLargeCns())
+ {
+ if (id->idIsLargeDsp())
+ return sizeof(instrDescCnsDsp);
+ else
+ return sizeof(instrDescCns);
+ }
+ else
+ {
+ if (id->idIsLargeDsp())
+ return sizeof(instrDescDsp);
+ else
+ return sizeof(instrDesc);
+ }
+}
+
+bool offsetFitsInVectorMem(int disp)
+{
+ unsigned imm = unsigned_abs(disp);
+ return ((imm & 0x03fc) == imm);
+}
+
+#ifdef DEBUG
+/*****************************************************************************
+ *
+ * The following called for each recorded instruction -- use for debugging.
+ */
+void emitter::emitInsSanityCheck(instrDesc* id)
+{
+ /* What instruction format have we got? */
+
+ switch (id->idInsFmt())
+ {
+ case IF_T1_A: // T1_A ................
+ case IF_T2_A: // T2_A ................ ................
+ break;
+
+ case IF_T1_B: // T1_B ........cccc.... cond
+ case IF_T2_B: // T2_B ................ ............iiii imm4
+ assert(emitGetInsSC(id) < 0x10);
+ break;
+
+ case IF_T1_C: // T1_C .....iiiiinnnddd R1 R2 imm5
+ assert(isLowRegister(id->idReg1()));
+ assert(isLowRegister(id->idReg2()));
+ if (emitInsIsLoadOrStore(id->idIns()))
+ {
+ emitAttr size = id->idOpSize();
+ int imm = emitGetInsSC(id);
+
+ imm = insUnscaleImm(imm, size);
+ assert(imm < 0x20);
+ }
+ else
+ {
+ assert(id->idSmallCns() < 0x20);
+ }
+ break;
+
+ case IF_T1_D0: // T1_D0 ........Dmmmmddd R1* R2*
+ assert(isGeneralRegister(id->idReg1()));
+ assert(isGeneralRegister(id->idReg2()));
+ break;
+
+ case IF_T1_D1: // T1_D1 .........mmmm... R1*
+ assert(isGeneralRegister(id->idReg1()));
+ break;
+
+ case IF_T1_D2: // T1_D2 .........mmmm... R3*
+ assert(isGeneralRegister(id->idReg3()));
+ break;
+
+ case IF_T1_E: // T1_E ..........nnnddd R1 R2
+ assert(isLowRegister(id->idReg1()));
+ assert(isLowRegister(id->idReg2()));
+ assert(id->idSmallCns() < 0x20);
+ break;
+
+ case IF_T1_F: // T1_F .........iiiiiii SP imm7
+ assert(id->idReg1() == REG_SP);
+ assert(id->idOpSize() == EA_4BYTE);
+ assert((emitGetInsSC(id) & ~0x1FC) == 0);
+ break;
+
+ case IF_T1_G: // T1_G .......iiinnnddd R1 R2 imm3
+ assert(isLowRegister(id->idReg1()));
+ assert(isLowRegister(id->idReg2()));
+ assert(id->idSmallCns() < 0x8);
+ break;
+
+ case IF_T1_H: // T1_H .......mmmnnnddd R1 R2 R3
+ assert(isLowRegister(id->idReg1()));
+ assert(isLowRegister(id->idReg2()));
+ assert(isLowRegister(id->idReg3()));
+ break;
+
+ case IF_T1_I: // T1_I ......i.iiiiiddd R1 imm6
+ assert(isLowRegister(id->idReg1()));
+ break;
+
+ case IF_T1_J0: // T1_J0 .....dddiiiiiiii R1 imm8
+ assert(isLowRegister(id->idReg1()));
+ assert(emitGetInsSC(id) < 0x100);
+ break;
+
+ case IF_T1_J1: // T1_J1 .....dddiiiiiiii R1 <regmask8>
+ assert(isLowRegister(id->idReg1()));
+ assert(emitGetInsSC(id) < 0x100);
+ break;
+
+ case IF_T1_J2: // T1_J2 .....dddiiiiiiii R1 SP imm8
+ assert(isLowRegister(id->idReg1()));
+ assert(id->idReg2() == REG_SP);
+ assert(id->idOpSize() == EA_4BYTE);
+ assert((emitGetInsSC(id) & ~0x3FC) == 0);
+ break;
+
+ case IF_T1_L0: // T1_L0 ........iiiiiiii imm8
+ assert(emitGetInsSC(id) < 0x100);
+ break;
+
+ case IF_T1_L1: // T1_L1 .......Rrrrrrrrr <regmask8+2>
+ assert(emitGetInsSC(id) < 0x400);
+ break;
+
+ case IF_T2_C0: // T2_C0 ...........Snnnn .iiiddddiishmmmm R1 R2 R3 S, imm5, sh
+ assert(isGeneralRegister(id->idReg1()));
+ assert(isGeneralRegister(id->idReg2()));
+ assert(isGeneralRegister(id->idReg3()));
+ assert(emitGetInsSC(id) < 0x20);
+ break;
+
+ case IF_T2_C4: // T2_C4 ...........Snnnn ....dddd....mmmm R1 R2 R3 S
+ case IF_T2_C5: // T2_C5 ............nnnn ....dddd....mmmm R1 R2 R3
+ case IF_T2_G1: // T2_G1 ............nnnn ttttTTTT........ R1 R2 R3
+ assert(isGeneralRegister(id->idReg1()));
+ assert(isGeneralRegister(id->idReg2()));
+ assert(isGeneralRegister(id->idReg3()));
+ break;
+
+ case IF_T2_C1: // T2_C1 ...........S.... .iiiddddiishmmmm R1 R2 S, imm5, sh
+ case IF_T2_C2: // T2_C2 ...........S.... .iiiddddii..mmmm R1 R2 S, imm5
+ case IF_T2_C8: // T2_C8 ............nnnn .iii....iishmmmm R1 R2 imm5, sh
+ assert(isGeneralRegister(id->idReg1()));
+ assert(isGeneralRegister(id->idReg2()));
+ assert(emitGetInsSC(id) < 0x20);
+ break;
+
+ case IF_T2_C6: // T2_C6 ................ ....dddd..iimmmm R1 R2 imm2
+ case IF_T2_C7: // T2_C7 ............nnnn ..........shmmmm R1 R2 imm2
+ assert(isGeneralRegister(id->idReg1()));
+ assert(isGeneralRegister(id->idReg2()));
+ assert(emitGetInsSC(id) < 0x4);
+ break;
+
+ case IF_T2_C3: // T2_C3 ...........S.... ....dddd....mmmm R1 R2 S
+ case IF_T2_C9: // T2_C9 ............nnnn ............mmmm R1 R2
+ case IF_T2_C10: // T2_C10 ............mmmm ....dddd....mmmm R1 R2
+ assert(isGeneralRegister(id->idReg1()));
+ assert(isGeneralRegister(id->idReg2()));
+ break;
+
+ case IF_T2_D0: // T2_D0 ............nnnn .iiiddddii.wwwww R1 R2 imm5, imm5
+ assert(isGeneralRegister(id->idReg1()));
+ assert(isGeneralRegister(id->idReg2()));
+ assert(emitGetInsSC(id) < 0x400);
+ break;
+
+ case IF_T2_D1: // T2_D1 ................ .iiiddddii.wwwww R1 imm5, imm5
+ assert(isGeneralRegister(id->idReg1()));
+ assert(emitGetInsSC(id) < 0x400);
+ break;
+
+ case IF_T2_E0: // T2_E0 ............nnnn tttt......shmmmm R1 R2 R3 imm2
+ assert(isGeneralRegister(id->idReg1()));
+ assert(isGeneralRegister(id->idReg2()));
+ if (id->idIsLclVar())
+ {
+ assert(isGeneralRegister(codeGen->rsGetRsvdReg()));
+ }
+ else
+ {
+ assert(isGeneralRegister(id->idReg3()));
+ assert(emitGetInsSC(id) < 0x4);
+ }
+ break;
+
+ case IF_T2_E1: // T2_E1 ............nnnn tttt............ R1 R2
+ assert(isGeneralRegister(id->idReg1()));
+ assert(isGeneralRegister(id->idReg2()));
+ break;
+
+ case IF_T2_E2: // T2_E2 ................ tttt............ R1
+ assert(isGeneralRegister(id->idReg1()));
+ break;
+
+ case IF_T2_F1: // T2_F1 ............nnnn ttttdddd....mmmm R1 R2 R3 R4
+ case IF_T2_F2: // T2_F2 ............nnnn aaaadddd....mmmm R1 R2 R3 R4
+ assert(isGeneralRegister(id->idReg1()));
+ assert(isGeneralRegister(id->idReg2()));
+ assert(isGeneralRegister(id->idReg3()));
+ assert(isGeneralRegister(id->idReg4()));
+ break;
+
+ case IF_T2_G0: // T2_G0 .......PU.W.nnnn ttttTTTTiiiiiiii R1 R2 R3 imm8, PUW
+ assert(isGeneralRegister(id->idReg1()));
+ assert(isGeneralRegister(id->idReg2()));
+ assert(isGeneralRegister(id->idReg3()));
+ assert(unsigned_abs(emitGetInsSC(id)) < 0x100);
+ break;
+
+ case IF_T2_H0: // T2_H0 ............nnnn tttt.PUWiiiiiiii R1 R2 imm8, PUW
+ assert(isGeneralRegister(id->idReg1()));
+ assert(isGeneralRegister(id->idReg2()));
+ assert(unsigned_abs(emitGetInsSC(id)) < 0x100);
+ break;
+
+ case IF_T2_H1: // T2_H1 ............nnnn tttt....iiiiiiii R1 R2 imm8
+ assert(isGeneralRegister(id->idReg1()));
+ assert(isGeneralRegister(id->idReg2()));
+ assert(emitGetInsSC(id) < 0x100);
+ break;
+
+ case IF_T2_H2: // T2_H2 ............nnnn ........iiiiiiii R1 imm8
+ assert(isGeneralRegister(id->idReg1()));
+ assert(emitGetInsSC(id) < 0x100);
+ break;
+
+ case IF_T2_I0: // T2_I0 ..........W.nnnn rrrrrrrrrrrrrrrr R1 W, imm16
+ assert(isGeneralRegister(id->idReg1()));
+ assert(emitGetInsSC(id) < 0x10000);
+ break;
+
+ case IF_T2_N: // T2_N .....i......iiii .iiiddddiiiiiiii R1 imm16
+ assert(isGeneralRegister(id->idReg1()));
+ break;
+
+ case IF_T2_N2: // T2_N2 .....i......iiii .iiiddddiiiiiiii R1 imm16
+ assert(isGeneralRegister(id->idReg1()));
+ assert((size_t)emitGetInsSC(id) < emitDataSize());
+ break;
+
+ case IF_T2_I1: // T2_I1 ................ rrrrrrrrrrrrrrrr imm16
+ assert(emitGetInsSC(id) < 0x10000);
+ break;
+
+ case IF_T2_K1: // T2_K1 ............nnnn ttttiiiiiiiiiiii R1 R2 imm12
+ case IF_T2_M0: // T2_M0 .....i......nnnn .iiiddddiiiiiiii R1 R2 imm12
+ assert(isGeneralRegister(id->idReg1()));
+ assert(isGeneralRegister(id->idReg2()));
+ assert(emitGetInsSC(id) < 0x1000);
+ break;
+
+ case IF_T2_L0: // T2_L0 .....i.....Snnnn .iiiddddiiiiiiii R1 R2 S, imm8<<imm4
+ assert(isGeneralRegister(id->idReg1()));
+ assert(isGeneralRegister(id->idReg2()));
+ assert(isModImmConst(emitGetInsSC(id)));
+ break;
+
+ case IF_T2_K4: // T2_K4 ........U....... ttttiiiiiiiiiiii R1 PC U, imm12
+ case IF_T2_M1: // T2_M1 .....i.......... .iiiddddiiiiiiii R1 PC imm12
+ assert(isGeneralRegister(id->idReg1()));
+ assert(id->idReg2() == REG_PC);
+ assert(emitGetInsSC(id) < 0x1000);
+ break;
+
+ case IF_T2_K3: // T2_K3 ........U....... ....iiiiiiiiiiii PC U, imm12
+ assert(id->idReg1() == REG_PC);
+ assert(emitGetInsSC(id) < 0x1000);
+ break;
+
+ case IF_T2_K2: // T2_K2 ............nnnn ....iiiiiiiiiiii R1 imm12
+ assert(isGeneralRegister(id->idReg1()));
+ assert(emitGetInsSC(id) < 0x1000);
+ break;
+
+ case IF_T2_L1: // T2_L1 .....i.....S.... .iiiddddiiiiiiii R1 S, imm8<<imm4
+ case IF_T2_L2: // T2_L2 .....i......nnnn .iii....iiiiiiii R1 imm8<<imm4
+ assert(isGeneralRegister(id->idReg1()));
+ assert(isModImmConst(emitGetInsSC(id)));
+ break;
+
+ case IF_T1_J3: // T1_J3 .....dddiiiiiiii R1 PC imm8
+ assert(isGeneralRegister(id->idReg1()));
+ assert(id->idReg2() == REG_PC);
+ assert(emitGetInsSC(id) < 0x100);
+ break;
+
+ case IF_T1_K: // T1_K ....cccciiiiiiii Branch imm8, cond4
+ case IF_T1_M: // T1_M .....iiiiiiiiiii Branch imm11
+ case IF_T2_J1: // T2_J1 .....Scccciiiiii ..j.jiiiiiiiiiii Branch imm20, cond4
+ case IF_T2_J2: // T2_J2 .....Siiiiiiiiii ..j.jiiiiiiiiii. Branch imm24
+ case IF_T2_N1: // T2_N .....i......iiii .iiiddddiiiiiiii R1 imm16
+ case IF_T2_J3: // T2_J3 .....Siiiiiiiiii ..j.jiiiiiiiiii. Call imm24
+ case IF_LARGEJMP:
+ break;
+
+ case IF_T2_VFP3:
+ if (id->idOpSize() == EA_8BYTE)
+ {
+ assert(isDoubleReg(id->idReg1()));
+ assert(isDoubleReg(id->idReg2()));
+ assert(isDoubleReg(id->idReg3()));
+ }
+ else
+ {
+ assert(id->idOpSize() == EA_4BYTE);
+ assert(isFloatReg(id->idReg1()));
+ assert(isFloatReg(id->idReg2()));
+ assert(isFloatReg(id->idReg3()));
+ }
+ break;
+
+ case IF_T2_VFP2:
+ assert(isFloatReg(id->idReg1()));
+ assert(isFloatReg(id->idReg2()));
+ break;
+
+ case IF_T2_VLDST:
+ if (id->idOpSize() == EA_8BYTE)
+ assert(isDoubleReg(id->idReg1()));
+ else
+ assert(isFloatReg(id->idReg1()));
+ assert(isGeneralRegister(id->idReg2()));
+ assert(offsetFitsInVectorMem(emitGetInsSC(id)));
+ break;
+
+ case IF_T2_VMOVD:
+ assert(id->idOpSize() == EA_8BYTE);
+ if (id->idIns() == INS_vmov_d2i)
+ {
+ assert(isGeneralRegister(id->idReg1()));
+ assert(isGeneralRegister(id->idReg2()));
+ assert(isDoubleReg(id->idReg3()));
+ }
+ else
+ {
+ assert(id->idIns() == INS_vmov_i2d);
+ assert(isDoubleReg(id->idReg1()));
+ assert(isGeneralRegister(id->idReg2()));
+ assert(isGeneralRegister(id->idReg3()));
+ }
+ break;
+
+ case IF_T2_VMOVS:
+ assert(id->idOpSize() == EA_4BYTE);
+ if (id->idIns() == INS_vmov_i2f)
+ {
+ assert(isFloatReg(id->idReg1()));
+ assert(isGeneralRegister(id->idReg2()));
+ }
+ else
+ {
+ assert(id->idIns() == INS_vmov_f2i);
+ assert(isGeneralRegister(id->idReg1()));
+ assert(isFloatReg(id->idReg2()));
+ }
+ break;
+
+ default:
+ printf("unexpected format %s\n", emitIfName(id->idInsFmt()));
+ assert(!"Unexpected format");
+ break;
+ }
+}
+#endif // DEBUG
+
+bool emitter::emitInsMayWriteToGCReg(instrDesc* id)
+{
+ instruction ins = id->idIns();
+ insFormat fmt = id->idInsFmt();
+
+ switch (fmt)
+ {
+
+ // These are the formats with "destination" or "target" registers:
+ case IF_T1_C:
+ case IF_T1_D0:
+ case IF_T1_E:
+ case IF_T1_G:
+ case IF_T1_H:
+ case IF_T1_J0:
+ case IF_T1_J1:
+ case IF_T1_J2:
+ case IF_T1_J3:
+ case IF_T2_C0:
+ case IF_T2_C1:
+ case IF_T2_C2:
+ case IF_T2_C3:
+ case IF_T2_C4:
+ case IF_T2_C5:
+ case IF_T2_C6:
+ case IF_T2_C10:
+ case IF_T2_D0:
+ case IF_T2_D1:
+ case IF_T2_F1:
+ case IF_T2_F2:
+ case IF_T2_L0:
+ case IF_T2_L1:
+ case IF_T2_M0:
+ case IF_T2_M1:
+ case IF_T2_N:
+ case IF_T2_N1:
+ case IF_T2_N2:
+ case IF_T2_VFP3:
+ case IF_T2_VFP2:
+ case IF_T2_VLDST:
+ case IF_T2_E0:
+ case IF_T2_E1:
+ case IF_T2_E2:
+ case IF_T2_G0:
+ case IF_T2_G1:
+ case IF_T2_H0:
+ case IF_T2_H1:
+ case IF_T2_K1:
+ case IF_T2_K4:
+ // Some formats with "destination" or "target" registers are actually used for store instructions, for the
+ // "source" value written to memory.
+ // Similarly, PUSH has a target register, indicating the start of the set of registers to push. POP
+ // *does* write to at least one register, so we do not make that a special case.
+ // Various compare/test instructions do not write (except to the flags). Technically "teq" does not need to
+ // be
+ // be in this list because it has no forms matched above, but I'm putting it here for completeness.
+ switch (ins)
+ {
+ case INS_str:
+ case INS_strb:
+ case INS_strh:
+ case INS_strd:
+ case INS_strex:
+ case INS_strexb:
+ case INS_strexd:
+ case INS_strexh:
+ case INS_push:
+ case INS_cmp:
+ case INS_cmn:
+ case INS_tst:
+ case INS_teq:
+ return false;
+ default:
+ return true;
+ }
+ case IF_T2_VMOVS:
+ // VMOV.i2f reads from the integer register. Conversely VMOV.f2i writes to GC pointer-sized
+ // integer register that might have previously held GC pointers, so they need to be included.
+ assert(id->idGCref() == GCT_NONE);
+ return (ins == INS_vmov_f2i);
+
+ case IF_T2_VMOVD:
+ // VMOV.i2d reads from the integer registers. Conversely VMOV.d2i writes to GC pointer-sized
+ // integer registers that might have previously held GC pointers, so they need to be included.
+ assert(id->idGCref() == GCT_NONE);
+ return (ins == INS_vmov_d2i);
+
+ default:
+ return false;
+ }
+}
+
+bool emitter::emitInsWritesToLclVarStackLoc(instrDesc* id)
+{
+ if (!id->idIsLclVar())
+ return false;
+
+ instruction ins = id->idIns();
+
+ // This list is related to the list of instructions used to store local vars in emitIns_S_R().
+ // We don't accept writing to float local vars.
+
+ switch (ins)
+ {
+ case INS_strb:
+ case INS_strh:
+ case INS_str:
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool emitter::emitInsMayWriteMultipleRegs(instrDesc* id)
+{
+ instruction ins = id->idIns();
+
+ switch (ins)
+ {
+ case INS_ldm:
+ case INS_ldmdb:
+ case INS_pop:
+ case INS_smlal:
+ case INS_smull:
+ case INS_umlal:
+ case INS_umull:
+ case INS_vmov_d2i:
+ return true;
+ default:
+ return false;
+ }
+}
+
+/*****************************************************************************/
+#ifdef DEBUG
+/*****************************************************************************
+ *
+ * Return a string that represents the given register.
+ */
+
+const char* emitter::emitRegName(regNumber reg, emitAttr attr, bool varName)
+{
+ assert(reg < REG_COUNT);
+
+ const char* rn = emitComp->compRegVarName(reg, varName, false);
+
+ assert(strlen(rn) >= 1);
+
+ return rn;
+}
+
+const char* emitter::emitFloatRegName(regNumber reg, emitAttr attr, bool varName)
+{
+ assert(reg < REG_COUNT);
+
+ const char* rn = emitComp->compRegVarName(reg, varName, true);
+
+ assert(strlen(rn) >= 1);
+
+ return rn;
+}
+#endif // DEBUG
+
+/*****************************************************************************
+ *
+ * Returns the base encoding of the given CPU instruction.
+ */
+
+emitter::insFormat emitter::emitInsFormat(instruction ins)
+{
+ // clang-format off
+ const static insFormat insFormats[] =
+ {
+ #define INST1(id, nm, fp, ldst, fmt, e1 ) fmt,
+ #define INST2(id, nm, fp, ldst, fmt, e1, e2 ) fmt,
+ #define INST3(id, nm, fp, ldst, fmt, e1, e2, e3 ) fmt,
+ #define INST4(id, nm, fp, ldst, fmt, e1, e2, e3, e4 ) fmt,
+ #define INST5(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5 ) fmt,
+ #define INST6(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6 ) fmt,
+ #define INST8(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6, e7, e8 ) fmt,
+ #define INST9(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9) fmt,
+ #include "instrs.h"
+ };
+ // clang-format on
+
+ assert(ins < ArrLen(insFormats));
+ assert((insFormats[ins] != IF_NONE));
+
+ return insFormats[ins];
+}
+
+// INST_FP is 1
+#define LD 2
+#define ST 4
+#define CMP 8
+
+// clang-format off
+/*static*/ const BYTE CodeGenInterface::instInfo[] =
+{
+ #define INST1(id, nm, fp, ldst, fmt, e1 ) ldst | INST_FP*fp,
+ #define INST2(id, nm, fp, ldst, fmt, e1, e2 ) ldst | INST_FP*fp,
+ #define INST3(id, nm, fp, ldst, fmt, e1, e2, e3 ) ldst | INST_FP*fp,
+ #define INST4(id, nm, fp, ldst, fmt, e1, e2, e3, e4 ) ldst | INST_FP*fp,
+ #define INST5(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5 ) ldst | INST_FP*fp,
+ #define INST6(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6 ) ldst | INST_FP*fp,
+ #define INST8(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6, e7, e8 ) ldst | INST_FP*fp,
+ #define INST9(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9) ldst | INST_FP*fp,
+ #include "instrs.h"
+};
+// clang-format on
+
+/*****************************************************************************
+ *
+ * Returns true if the instruction is some kind of load instruction
+ */
+
+bool emitter::emitInsIsLoad(instruction ins)
+{
+ // We have pseudo ins like lea which are not included in emitInsLdStTab.
+ if (ins < ArrLen(CodeGenInterface::instInfo))
+ return (CodeGenInterface::instInfo[ins] & LD) ? true : false;
+ else
+ return false;
+}
+
+/*****************************************************************************
+ *
+ * Returns true if the instruction is some kind of compare or test instruction
+ */
+
+bool emitter::emitInsIsCompare(instruction ins)
+{
+ // We have pseudo ins like lea which are not included in emitInsLdStTab.
+ if (ins < ArrLen(CodeGenInterface::instInfo))
+ return (CodeGenInterface::instInfo[ins] & CMP) ? true : false;
+ else
+ return false;
+}
+
+/*****************************************************************************
+ *
+ * Returns true if the instruction is some kind of store instruction
+ */
+
+bool emitter::emitInsIsStore(instruction ins)
+{
+ // We have pseudo ins like lea which are not included in emitInsLdStTab.
+ if (ins < ArrLen(CodeGenInterface::instInfo))
+ return (CodeGenInterface::instInfo[ins] & ST) ? true : false;
+ else
+ return false;
+}
+
+/*****************************************************************************
+ *
+ * Returns true if the instruction is some kind of load/store instruction
+ */
+
+bool emitter::emitInsIsLoadOrStore(instruction ins)
+{
+ // We have pseudo ins like lea which are not included in emitInsLdStTab.
+ if (ins < ArrLen(CodeGenInterface::instInfo))
+ return (CodeGenInterface::instInfo[ins] & (LD | ST)) ? true : false;
+ else
+ return false;
+}
+
+#undef LD
+#undef ST
+#undef CMP
+
+/*****************************************************************************
+ *
+ * Returns the specific encoding of the given CPU instruction and format
+ */
+
+size_t emitter::emitInsCode(instruction ins, insFormat fmt)
+{
+ // clang-format off
+ const static size_t insCodes1[] =
+ {
+ #define INST1(id, nm, fp, ldst, fmt, e1 ) e1,
+ #define INST2(id, nm, fp, ldst, fmt, e1, e2 ) e1,
+ #define INST3(id, nm, fp, ldst, fmt, e1, e2, e3 ) e1,
+ #define INST4(id, nm, fp, ldst, fmt, e1, e2, e3, e4 ) e1,
+ #define INST5(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5 ) e1,
+ #define INST6(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6 ) e1,
+ #define INST8(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6, e7, e8 ) e1,
+ #define INST9(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9) e1,
+ #include "instrs.h"
+ };
+ const static size_t insCodes2[] =
+ {
+ #define INST1(id, nm, fp, ldst, fmt, e1 )
+ #define INST2(id, nm, fp, ldst, fmt, e1, e2 ) e2,
+ #define INST3(id, nm, fp, ldst, fmt, e1, e2, e3 ) e2,
+ #define INST4(id, nm, fp, ldst, fmt, e1, e2, e3, e4 ) e2,
+ #define INST5(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5 ) e2,
+ #define INST6(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6 ) e2,
+ #define INST8(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6, e7, e8 ) e2,
+ #define INST9(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9) e2,
+ #include "instrs.h"
+ };
+ const static size_t insCodes3[] =
+ {
+ #define INST1(id, nm, fp, ldst, fmt, e1 )
+ #define INST2(id, nm, fp, ldst, fmt, e1, e2 )
+ #define INST3(id, nm, fp, ldst, fmt, e1, e2, e3 ) e3,
+ #define INST4(id, nm, fp, ldst, fmt, e1, e2, e3, e4 ) e3,
+ #define INST5(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5 ) e3,
+ #define INST6(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6 ) e3,
+ #define INST8(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6, e7, e8 ) e3,
+ #define INST9(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9) e3,
+ #include "instrs.h"
+ };
+ const static size_t insCodes4[] =
+ {
+ #define INST1(id, nm, fp, ldst, fmt, e1 )
+ #define INST2(id, nm, fp, ldst, fmt, e1, e2 )
+ #define INST3(id, nm, fp, ldst, fmt, e1, e2, e3 )
+ #define INST4(id, nm, fp, ldst, fmt, e1, e2, e3, e4 ) e4,
+ #define INST5(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5 ) e4,
+ #define INST6(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6 ) e4,
+ #define INST8(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6, e7, e8 ) e4,
+ #define INST9(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9) e4,
+ #include "instrs.h"
+ };
+ const static size_t insCodes5[] =
+ {
+ #define INST1(id, nm, fp, ldst, fmt, e1 )
+ #define INST2(id, nm, fp, ldst, fmt, e1, e2 )
+ #define INST3(id, nm, fp, ldst, fmt, e1, e2, e3 )
+ #define INST4(id, nm, fp, ldst, fmt, e1, e2, e3, e4 )
+ #define INST5(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5 ) e5,
+ #define INST6(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6 ) e5,
+ #define INST8(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6, e7, e8 ) e5,
+ #define INST9(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9) e5,
+ #include "instrs.h"
+ };
+ const static size_t insCodes6[] =
+ {
+ #define INST1(id, nm, fp, ldst, fmt, e1 )
+ #define INST2(id, nm, fp, ldst, fmt, e1, e2 )
+ #define INST3(id, nm, fp, ldst, fmt, e1, e2, e3 )
+ #define INST4(id, nm, fp, ldst, fmt, e1, e2, e3, e4 )
+ #define INST5(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5 )
+ #define INST6(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6 ) e6,
+ #define INST8(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6, e7, e8 ) e6,
+ #define INST9(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9) e6,
+ #include "instrs.h"
+ };
+ const static size_t insCodes7[] =
+ {
+ #define INST1(id, nm, fp, ldst, fmt, e1 )
+ #define INST2(id, nm, fp, ldst, fmt, e1, e2 )
+ #define INST3(id, nm, fp, ldst, fmt, e1, e2, e3 )
+ #define INST4(id, nm, fp, ldst, fmt, e1, e2, e3, e4 )
+ #define INST5(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5 )
+ #define INST6(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6 )
+ #define INST8(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6, e7, e8 ) e7,
+ #define INST9(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9) e7,
+ #include "instrs.h"
+ };
+ const static size_t insCodes8[] =
+ {
+ #define INST1(id, nm, fp, ldst, fmt, e1 )
+ #define INST2(id, nm, fp, ldst, fmt, e1, e2 )
+ #define INST3(id, nm, fp, ldst, fmt, e1, e2, e3 )
+ #define INST4(id, nm, fp, ldst, fmt, e1, e2, e3, e4 )
+ #define INST5(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5 )
+ #define INST6(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6 )
+ #define INST8(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6, e7, e8 ) e8,
+ #define INST9(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9) e8,
+ #include "instrs.h"
+ };
+ const static size_t insCodes9[] =
+ {
+ #define INST1(id, nm, fp, ldst, fmt, e1 )
+ #define INST2(id, nm, fp, ldst, fmt, e1, e2 )
+ #define INST3(id, nm, fp, ldst, fmt, e1, e2, e3 )
+ #define INST4(id, nm, fp, ldst, fmt, e1, e2, e3, e4 )
+ #define INST5(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5 )
+ #define INST6(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6 )
+ #define INST8(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6, e7, e8 )
+ #define INST9(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9) e9,
+ #include "instrs.h"
+ };
+ const static insFormat formatEncode9[9] = { IF_T1_D0, IF_T1_H, IF_T1_J0, IF_T1_G, IF_T2_L0, IF_T2_C0, IF_T1_F, IF_T1_J2, IF_T1_J3 };
+ const static insFormat formatEncode8[8] = { IF_T1_H, IF_T1_C, IF_T2_E0, IF_T2_H0, IF_T2_K1, IF_T2_K4, IF_T1_J2, IF_T1_J3 };
+ const static insFormat formatEncode6A[6] = { IF_T1_H, IF_T1_C, IF_T2_E0, IF_T2_H0, IF_T2_K1, IF_T2_K4};
+ const static insFormat formatEncode6B[6] = { IF_T1_H, IF_T1_C, IF_T2_E0, IF_T2_H0, IF_T2_K1, IF_T1_J2 };
+ const static insFormat formatEncode5A[5] = { IF_T1_E, IF_T1_D0, IF_T1_J0, IF_T2_L1, IF_T2_C3 };
+ const static insFormat formatEncode5B[5] = { IF_T1_E, IF_T1_D0, IF_T1_J0, IF_T2_L2, IF_T2_C8 };
+ const static insFormat formatEncode4A[4] = { IF_T1_E, IF_T1_C, IF_T2_C4, IF_T2_C2 };
+ const static insFormat formatEncode4B[4] = { IF_T2_K2, IF_T2_H2, IF_T2_C7, IF_T2_K3 };
+ const static insFormat formatEncode3A[3] = { IF_T1_E, IF_T2_C0, IF_T2_L0 };
+ const static insFormat formatEncode3B[3] = { IF_T1_E, IF_T2_C8, IF_T2_L2 };
+ const static insFormat formatEncode3C[3] = { IF_T1_E, IF_T2_C1, IF_T2_L1 };
+ const static insFormat formatEncode3D[3] = { IF_T1_L1, IF_T2_E2, IF_T2_I1 };
+ const static insFormat formatEncode3E[3] = { IF_T2_N, IF_T2_N1, IF_T2_N2 };
+ const static insFormat formatEncode3F[3] = { IF_T1_M, IF_T2_J2, IF_T2_J3 };
+ const static insFormat formatEncode2A[2] = { IF_T1_K, IF_T2_J1 };
+ const static insFormat formatEncode2B[2] = { IF_T1_D1, IF_T1_D2 };
+ const static insFormat formatEncode2C[2] = { IF_T1_D2, IF_T2_J3 };
+ const static insFormat formatEncode2D[2] = { IF_T1_J1, IF_T2_I0 };
+ const static insFormat formatEncode2E[2] = { IF_T1_E, IF_T2_C6 };
+ const static insFormat formatEncode2F[2] = { IF_T1_E, IF_T2_C5 };
+ const static insFormat formatEncode2G[2] = { IF_T1_J3, IF_T2_M1 };
+ // clang-format on
+
+ size_t code = BAD_CODE;
+ insFormat insFmt = emitInsFormat(ins);
+ bool found = false;
+ int index = 0;
+
+ switch (insFmt)
+ {
+ case IF_EN9:
+ for (index = 0; index < 9; index++)
+ {
+ if (fmt == formatEncode9[index])
+ {
+ found = true;
+ break;
+ }
+ }
+ break;
+
+ case IF_EN8:
+ for (index = 0; index < 8; index++)
+ {
+ if (fmt == formatEncode8[index])
+ {
+ found = true;
+ break;
+ }
+ }
+ break;
+
+ case IF_EN6A:
+ for (index = 0; index < 6; index++)
+ {
+ if (fmt == formatEncode6A[index])
+ {
+ found = true;
+ break;
+ }
+ }
+ break;
+
+ case IF_EN6B:
+ for (index = 0; index < 6; index++)
+ {
+ if (fmt == formatEncode6B[index])
+ {
+ found = true;
+ break;
+ }
+ }
+ break;
+
+ case IF_EN5A:
+ for (index = 0; index < 5; index++)
+ {
+ if (fmt == formatEncode5A[index])
+ {
+ found = true;
+ break;
+ }
+ }
+ break;
+
+ case IF_EN5B:
+ for (index = 0; index < 5; index++)
+ {
+ if (fmt == formatEncode5B[index])
+ {
+ found = true;
+ break;
+ }
+ }
+ break;
+
+ case IF_EN4A:
+ for (index = 0; index < 4; index++)
+ {
+ if (fmt == formatEncode4A[index])
+ {
+ found = true;
+ break;
+ }
+ }
+ break;
+
+ case IF_EN4B:
+ for (index = 0; index < 4; index++)
+ {
+ if (fmt == formatEncode4B[index])
+ {
+ found = true;
+ break;
+ }
+ }
+ break;
+
+ case IF_EN3A:
+ for (index = 0; index < 3; index++)
+ {
+ if (fmt == formatEncode3A[index])
+ {
+ found = true;
+ break;
+ }
+ }
+ break;
+
+ case IF_EN3B:
+ for (index = 0; index < 3; index++)
+ {
+ if (fmt == formatEncode3B[index])
+ {
+ found = true;
+ break;
+ }
+ }
+ break;
+ case IF_EN3C:
+ for (index = 0; index < 3; index++)
+ {
+ if (fmt == formatEncode3C[index])
+ {
+ found = true;
+ break;
+ }
+ }
+ break;
+ case IF_EN3D:
+ for (index = 0; index < 3; index++)
+ {
+ if (fmt == formatEncode3D[index])
+ {
+ found = true;
+ break;
+ }
+ }
+ break;
+ case IF_EN3E:
+ for (index = 0; index < 3; index++)
+ {
+ if (fmt == formatEncode3E[index])
+ {
+ found = true;
+ break;
+ }
+ }
+ break;
+ case IF_EN3F:
+ for (index = 0; index < 3; index++)
+ {
+ if (fmt == formatEncode3F[index])
+ {
+ found = true;
+ break;
+ }
+ }
+ break;
+
+ case IF_EN2A:
+ for (index = 0; index < 2; index++)
+ {
+ if (fmt == formatEncode2A[index])
+ {
+ found = true;
+ break;
+ }
+ }
+ break;
+ case IF_EN2B:
+ for (index = 0; index < 2; index++)
+ {
+ if (fmt == formatEncode2B[index])
+ {
+ found = true;
+ break;
+ }
+ }
+ break;
+ case IF_EN2C:
+ for (index = 0; index < 2; index++)
+ {
+ if (fmt == formatEncode2C[index])
+ {
+ found = true;
+ break;
+ }
+ }
+ break;
+ case IF_EN2D:
+ for (index = 0; index < 2; index++)
+ {
+ if (fmt == formatEncode2D[index])
+ {
+ found = true;
+ break;
+ }
+ }
+ break;
+ case IF_EN2E:
+ for (index = 0; index < 2; index++)
+ {
+ if (fmt == formatEncode2E[index])
+ {
+ found = true;
+ break;
+ }
+ }
+ break;
+ case IF_EN2F:
+ for (index = 0; index < 2; index++)
+ {
+ if (fmt == formatEncode2F[index])
+ {
+ found = true;
+ break;
+ }
+ }
+ break;
+
+ case IF_EN2G:
+ for (index = 0; index < 2; index++)
+ {
+ if (fmt == formatEncode2G[index])
+ {
+ found = true;
+ break;
+ }
+ }
+ break;
+
+ default:
+ index = 0;
+ found = true;
+ break;
+ }
+
+ assert(found);
+
+ switch (index)
+ {
+ case 0:
+ assert(ins < ArrLen(insCodes1));
+ code = insCodes1[ins];
+ break;
+ case 1:
+ assert(ins < ArrLen(insCodes2));
+ code = insCodes2[ins];
+ break;
+ case 2:
+ assert(ins < ArrLen(insCodes3));
+ code = insCodes3[ins];
+ break;
+ case 3:
+ assert(ins < ArrLen(insCodes4));
+ code = insCodes4[ins];
+ break;
+ case 4:
+ assert(ins < ArrLen(insCodes5));
+ code = insCodes5[ins];
+ break;
+ case 5:
+ assert(ins < ArrLen(insCodes6));
+ code = insCodes6[ins];
+ break;
+ case 6:
+ assert(ins < ArrLen(insCodes7));
+ code = insCodes7[ins];
+ break;
+ case 7:
+ assert(ins < ArrLen(insCodes8));
+ code = insCodes8[ins];
+ break;
+ case 8:
+ assert(ins < ArrLen(insCodes9));
+ code = insCodes9[ins];
+ break;
+ }
+
+ assert((code != BAD_CODE));
+
+ return code;
+}
+
+/*****************************************************************************
+ *
+ * Return the code size of the given instruction format. The 'insSize' return type enum
+ * indicates a 16 bit, 32 bit, or 48 bit instruction.
+ */
+
+emitter::insSize emitter::emitInsSize(insFormat insFmt)
+{
+ if ((insFmt >= IF_T1_A) && (insFmt < IF_T2_A))
+ return ISZ_16BIT;
+
+ if ((insFmt >= IF_T2_A) && (insFmt < IF_INVALID))
+ return ISZ_32BIT;
+
+ if (insFmt == IF_LARGEJMP)
+ return ISZ_48BIT;
+
+ assert(!"Invalid insFormat");
+ return ISZ_48BIT;
+}
+
+/*****************************************************************************
+ *
+ * isModImmConst() returns true when immediate 'val32' can be encoded
+ * using the special modified immediate constant available in Thumb
+ */
+
+/*static*/ bool emitter::isModImmConst(int val32)
+{
+ unsigned uval32 = (unsigned)val32;
+ unsigned imm8 = uval32 & 0xff;
+
+ /* encode = 0000x */
+ if (imm8 == uval32)
+ return true;
+
+ unsigned imm32a = (imm8 << 16) | imm8;
+ /* encode = 0001x */
+ if (imm32a == uval32)
+ return true;
+
+ unsigned imm32b = (imm32a << 8);
+ /* encode = 0010x */
+ if (imm32b == uval32)
+ return true;
+
+ unsigned imm32c = (imm32a | imm32b);
+ /* encode = 0011x */
+ if (imm32c == uval32)
+ return true;
+
+ unsigned mask32 = 0x00000ff;
+
+ unsigned encode = 31; /* 11111 */
+ unsigned temp;
+
+ do
+ {
+ mask32 <<= 1;
+ temp = uval32 & ~mask32;
+ if (temp == 0)
+ return true;
+ encode--;
+ } while (encode >= 8);
+
+ return false;
+}
+
+/*****************************************************************************
+ *
+ * encodeModImmConst() returns the special ARM 12-bit immediate encoding.
+ * that is used to encode the immediate. (4-bits, 8-bits)
+ * If the imm can not be encoded then 0x0BADC0DE is returned.
+ */
+
+/*static*/ int emitter::encodeModImmConst(int val32)
+{
+ unsigned uval32 = (unsigned)val32;
+ unsigned imm8 = uval32 & 0xff;
+ unsigned encode = imm8 >> 7;
+ unsigned imm32a;
+ unsigned imm32b;
+ unsigned imm32c;
+ unsigned mask32;
+ unsigned temp;
+
+ /* encode = 0000x */
+ if (imm8 == uval32)
+ {
+ goto DONE;
+ }
+
+ imm32a = (imm8 << 16) | imm8;
+ /* encode = 0001x */
+ if (imm32a == uval32)
+ {
+ encode += 2;
+ goto DONE;
+ }
+
+ imm32b = (imm32a << 8);
+ /* encode = 0010x */
+ if (imm32b == uval32)
+ {
+ encode += 4;
+ goto DONE;
+ }
+
+ imm32c = (imm32a | imm32b);
+ /* encode = 0011x */
+ if (imm32c == uval32)
+ {
+ encode += 6;
+ goto DONE;
+ }
+
+ mask32 = 0x00000ff;
+
+ encode = 31; /* 11111 */
+ do
+ {
+ mask32 <<= 1;
+ temp = uval32 & ~mask32;
+ if (temp == 0)
+ {
+ imm8 = (uval32 & mask32) >> (32 - encode);
+ assert((imm8 & 0x80) != 0);
+ goto DONE;
+ }
+ encode--;
+ } while (encode >= 8);
+
+ assert(!"encodeModImmConst failed!");
+ return BAD_CODE;
+
+DONE:
+ unsigned result = (encode << 7) | (imm8 & 0x7f);
+ assert(result <= 0x0fff);
+ assert(result >= 0);
+ return (int)result;
+}
+
+/*****************************************************************************
+ *
+ * emitIns_valid_imm_for_alu() returns true when the immediate 'imm'
+ * can be encoded using the 12-bit funky Arm immediate encoding
+ */
+/*static*/ bool emitter::emitIns_valid_imm_for_alu(int imm)
+{
+ if (isModImmConst(imm))
+ return true;
+ return false;
+}
+
+/*****************************************************************************
+ *
+ * emitIns_valid_imm_for_mov() returns true when the immediate 'imm'
+ * can be encoded using a single mov or mvn instruction.
+ */
+/*static*/ bool emitter::emitIns_valid_imm_for_mov(int imm)
+{
+ if ((imm & 0x0000ffff) == imm) // 16-bit immediate
+ return true;
+ if (isModImmConst(imm)) // funky arm immediate
+ return true;
+ if (isModImmConst(~imm)) // funky arm immediate via mvn
+ return true;
+ return false;
+}
+
+/*****************************************************************************
+ *
+ * emitIns_valid_imm_for_small_mov() returns true when the immediate 'imm'
+ * can be encoded using a single 2-byte mov instruction.
+ */
+/*static*/ bool emitter::emitIns_valid_imm_for_small_mov(regNumber reg, int imm, insFlags flags)
+{
+ return isLowRegister(reg) && insSetsFlags(flags) && ((imm & 0x00ff) == imm);
+}
+
+/*****************************************************************************
+ *
+ * emitIns_valid_imm_for_add() returns true when the immediate 'imm'
+ * can be encoded using a single add or sub instruction.
+ */
+/*static*/ bool emitter::emitIns_valid_imm_for_add(int imm, insFlags flags)
+{
+ if ((unsigned_abs(imm) <= 0x00000fff) && (flags != INS_FLAGS_SET)) // 12-bit immediate via add/sub
+ return true;
+ if (isModImmConst(imm)) // funky arm immediate
+ return true;
+ if (isModImmConst(-imm)) // funky arm immediate via sub
+ return true;
+ return false;
+}
+
+/*****************************************************************************
+ *
+ * emitIns_valid_imm_for_add_sp() returns true when the immediate 'imm'
+ * can be encoded in "add Rd,SP,i10".
+ */
+/*static*/ bool emitter::emitIns_valid_imm_for_add_sp(int imm)
+{
+ if ((imm & 0x03fc) == imm)
+ return true;
+ return false;
+}
+
+/*****************************************************************************
+ *
+ * Add an instruction with no operands.
+ */
+
+void emitter::emitIns(instruction ins)
+{
+ instrDesc* id = emitNewInstrSmall(EA_4BYTE);
+ insFormat fmt = emitInsFormat(ins);
+ insSize isz = emitInsSize(fmt);
+
+ assert((fmt == IF_T1_A) || (fmt == IF_T2_A));
+
+ id->idIns(ins);
+ id->idInsFmt(fmt);
+ id->idInsSize(isz);
+
+ dispIns(id);
+ appendToCurIG(id);
+}
+
+/*****************************************************************************
+ *
+ * Add an instruction with a single immediate value.
+ */
+
+void emitter::emitIns_I(instruction ins, emitAttr attr, ssize_t imm)
+{
+ insFormat fmt = IF_NONE;
+ bool hasLR = false;
+ bool hasPC = false;
+ bool useT2 = false;
+ bool onlyT1 = false;
+
+ /* Figure out the encoding format of the instruction */
+ switch (ins)
+ {
+#ifdef FEATURE_ITINSTRUCTION
+ case INS_it:
+ case INS_itt:
+ case INS_ite:
+ case INS_ittt:
+ case INS_itte:
+ case INS_itet:
+ case INS_itee:
+ case INS_itttt:
+ case INS_ittte:
+ case INS_ittet:
+ case INS_ittee:
+ case INS_itett:
+ case INS_itete:
+ case INS_iteet:
+ case INS_iteee:
+ assert((imm & 0x0F) == imm);
+ fmt = IF_T1_B;
+ attr = EA_4BYTE;
+ break;
+#endif // FEATURE_ITINSTRUCTION
+
+ case INS_push:
+ assert((imm & 0xA000) == 0); // Cannot push PC or SP
+
+ if (imm & 0x4000) // Is the LR being pushed?
+ hasLR = true;
+
+ goto COMMON_PUSH_POP;
+
+ case INS_pop:
+ assert((imm & 0x2000) == 0); // Cannot pop SP
+ assert((imm & 0xC000) != 0xC000); // Cannot pop both PC and LR
+
+ if (imm & 0x8000) // Is the PC being popped?
+ hasPC = true;
+ if (imm & 0x4000) // Is the LR being popped?
+ {
+ hasLR = true;
+ useT2 = true;
+ }
+
+ COMMON_PUSH_POP:
+
+ if (((imm - 1) & imm) == 0) // Is only one or zero bits set in imm?
+ {
+ if (((imm == 0) && !hasLR) || // imm has no bits set, but hasLR is set
+ (!hasPC && !hasLR)) // imm has one bit set, and neither of hasPC/hasLR are set
+ {
+ onlyT1 = true; // if only one bit is set we must use the T1 encoding
+ }
+ }
+
+ imm &= ~0xE000; // ensure that PC, LR and SP bits are removed from imm
+
+ if (((imm & 0x00ff) == imm) && !useT2)
+ {
+ fmt = IF_T1_L1;
+ }
+ else if (!onlyT1)
+ {
+ fmt = IF_T2_I1;
+ }
+ else
+ {
+ // We have to use the Thumb-2 push single register encoding
+ regNumber reg = genRegNumFromMask(imm);
+ emitIns_R(ins, attr, reg);
+ return;
+ }
+
+ //
+ // Encode the PC and LR bits as the lowest two bits
+ //
+ imm <<= 2;
+ if (hasPC)
+ imm |= 2;
+ if (hasLR)
+ imm |= 1;
+
+ assert(imm != 0);
+
+ break;
+
+#if 0
+ // TODO-ARM-Cleanup: Enable or delete.
+ case INS_bkpt: // Windows uses a different encoding
+ if ((imm & 0x0000ffff) == imm)
+ {
+ fmt = IF_T1_L0;
+ }
+ else
+ {
+ assert(!"Instruction cannot be encoded");
+ }
+ break;
+#endif
+
+ case INS_dmb:
+ case INS_ism:
+ if ((imm & 0x000f) == imm)
+ {
+ fmt = IF_T2_B;
+ attr = EA_4BYTE;
+ }
+ else
+ {
+ assert(!"Instruction cannot be encoded");
+ }
+ break;
+
+ default:
+ unreached();
+ }
+ assert((fmt == IF_T1_B) || (fmt == IF_T1_L0) || (fmt == IF_T1_L1) || (fmt == IF_T2_I1) || (fmt == IF_T2_B));
+
+ instrDesc* id = emitNewInstrSC(attr, imm);
+ insSize isz = emitInsSize(fmt);
+
+ id->idIns(ins);
+ id->idInsFmt(fmt);
+ id->idInsSize(isz);
+
+ dispIns(id);
+ appendToCurIG(id);
+}
+
+/*****************************************************************************
+ *
+ * Add an instruction referencing a single register.
+ */
+
+void emitter::emitIns_R(instruction ins, emitAttr attr, regNumber reg)
+{
+ emitAttr size = EA_SIZE(attr);
+ insFormat fmt = IF_NONE;
+
+ /* Figure out the encoding format of the instruction */
+ switch (ins)
+ {
+ case INS_pop:
+ case INS_push:
+ if (isLowRegister(reg))
+ {
+ int regmask = 1 << ((int)reg);
+ emitIns_I(ins, attr, regmask);
+ return;
+ }
+ assert(size == EA_PTRSIZE);
+ fmt = IF_T2_E2;
+ break;
+
+ case INS_vmrs:
+ assert(size == EA_PTRSIZE);
+ fmt = IF_T2_E2;
+ break;
+
+ case INS_bx:
+ assert(size == EA_PTRSIZE);
+ fmt = IF_T1_D1;
+ break;
+ case INS_rsb:
+ case INS_mvn:
+ emitIns_R_R_I(ins, attr, reg, reg, 0);
+ return;
+
+ default:
+ unreached();
+ }
+ assert((fmt == IF_T1_D1) || (fmt == IF_T2_E2));
+
+ instrDesc* id = emitNewInstrSmall(attr);
+ insSize isz = emitInsSize(fmt);
+
+ id->idIns(ins);
+ id->idInsFmt(fmt);
+ id->idInsSize(isz);
+ id->idReg1(reg);
+
+ dispIns(id);
+ appendToCurIG(id);
+}
+
+/*****************************************************************************
+ *
+ * Add an instruction referencing a register and a constant.
+ */
+
+void emitter::emitIns_R_I(
+ instruction ins, emitAttr attr, regNumber reg, int imm, insFlags flags /* = INS_FLAGS_DONT_CARE */)
+
+{
+ insFormat fmt = IF_NONE;
+ insFlags sf = INS_FLAGS_DONT_CARE;
+
+ /* Figure out the encoding format of the instruction */
+ switch (ins)
+ {
+ case INS_add:
+ case INS_sub:
+ if ((reg == REG_SP) && insDoesNotSetFlags(flags) && ((imm & 0x01fc) == imm))
+ {
+ fmt = IF_T1_F;
+ sf = INS_FLAGS_NOT_SET;
+ }
+ else if (isLowRegister(reg) && insSetsFlags(flags) && (unsigned_abs(imm) <= 0x00ff))
+ {
+ if (imm < 0)
+ {
+ assert((ins == INS_add) || (ins == INS_sub));
+ if (ins == INS_add)
+ ins = INS_sub;
+ else // ins == INS_sub
+ ins = INS_add;
+ imm = -imm;
+ }
+ fmt = IF_T1_J0;
+ sf = INS_FLAGS_SET;
+ }
+ else
+ {
+ // otherwise we have to use a Thumb-2 encoding
+ emitIns_R_R_I(ins, attr, reg, reg, imm, flags);
+ return;
+ }
+ break;
+
+ case INS_adc:
+ emitIns_R_R_I(ins, attr, reg, reg, imm, flags);
+ return;
+
+ case INS_vpush:
+ case INS_vpop:
+ assert(imm > 0);
+ if (attr == EA_8BYTE)
+ {
+ assert(isDoubleReg(reg));
+ assert(imm <= 16);
+ imm *= 2;
+ }
+ else
+ {
+ assert(attr == EA_4BYTE);
+ assert(isFloatReg(reg));
+ assert(imm <= 16);
+ }
+ assert(((reg - REG_F0) + imm) <= 32);
+ imm *= 4;
+
+ if (ins == INS_vpush)
+ imm = -imm;
+
+ sf = INS_FLAGS_NOT_SET;
+ fmt = IF_T2_VLDST;
+ break;
+
+ case INS_stm:
+ {
+ sf = INS_FLAGS_NOT_SET;
+
+ bool hasLR = false;
+ bool hasPC = false;
+ bool useT2 = false;
+ bool onlyT1 = false;
+
+ assert((imm & 0x2000) == 0); // Cannot pop SP
+ assert((imm & 0xC000) != 0xC000); // Cannot pop both PC and LR
+ assert((imm & 0xFFFF0000) == 0); // Can only contain lower 16 bits
+
+ if (imm & 0x8000) // Is the PC being popped?
+ hasPC = true;
+
+ if (imm & 0x4000) // Is the LR being pushed?
+ {
+ hasLR = true;
+ useT2 = true;
+ }
+
+ if (!isLowRegister(reg))
+ useT2 = true;
+
+ if (((imm - 1) & imm) == 0) // Is only one or zero bits set in imm?
+ {
+ if (((imm == 0) && !hasLR) || // imm has no bits set, but hasLR is set
+ (!hasPC && !hasLR)) // imm has one bit set, and neither of hasPC/hasLR are set
+ {
+ onlyT1 = true; // if only one bit is set we must use the T1 encoding
+ }
+ }
+
+ imm &= ~0xE000; // ensure that PC, LR and SP bits are removed from imm
+
+ if (((imm & 0x00ff) == imm) && !useT2)
+ {
+ fmt = IF_T1_J1;
+ }
+ else if (!onlyT1)
+ {
+ fmt = IF_T2_I0;
+ }
+ else
+ {
+ assert(!"Instruction cannot be encoded");
+ // We have to use the Thumb-2 str single register encoding
+ // reg = genRegNumFromMask(imm);
+ // emitIns_R(ins, attr, reg);
+ return;
+ }
+
+ //
+ // Encode the PC and LR bits as the lowest two bits
+ //
+ if (fmt == IF_T2_I0)
+ {
+ imm <<= 2;
+ if (hasPC)
+ imm |= 2;
+ if (hasLR)
+ imm |= 1;
+ }
+ assert(imm != 0);
+ }
+ break;
+
+ case INS_and:
+ case INS_bic:
+ case INS_eor:
+ case INS_orr:
+ case INS_orn:
+ case INS_rsb:
+ case INS_sbc:
+
+ case INS_ror:
+ case INS_asr:
+ case INS_lsl:
+ case INS_lsr:
+ // use the Reg, Reg, Imm encoding
+ emitIns_R_R_I(ins, attr, reg, reg, imm, flags);
+ return;
+
+ case INS_mov:
+ assert(!EA_IS_CNS_RELOC(attr));
+
+ if (isLowRegister(reg) && insSetsFlags(flags) && ((imm & 0x00ff) == imm))
+ {
+ fmt = IF_T1_J0;
+ sf = INS_FLAGS_SET;
+ }
+ else if (isModImmConst(imm))
+ {
+ fmt = IF_T2_L1;
+ sf = insMustSetFlags(flags);
+ }
+ else if (isModImmConst(~imm)) // See if we can use move negated instruction instead
+ {
+ ins = INS_mvn;
+ imm = ~imm;
+ fmt = IF_T2_L1;
+ sf = insMustSetFlags(flags);
+ }
+ else if (insDoesNotSetFlags(flags) && ((imm & 0x0000ffff) == imm))
+ {
+ // mov => movw instruction
+ ins = INS_movw;
+ fmt = IF_T2_N;
+ sf = INS_FLAGS_NOT_SET;
+ }
+ else
+ {
+ assert(!"Instruction cannot be encoded");
+ }
+ break;
+
+ case INS_movw:
+ case INS_movt:
+ assert(insDoesNotSetFlags(flags));
+ sf = INS_FLAGS_NOT_SET;
+ if ((imm & 0x0000ffff) == imm || EA_IS_RELOC(attr))
+ {
+ fmt = IF_T2_N;
+ }
+ else
+ {
+ assert(!"Instruction cannot be encoded");
+ }
+ break;
+
+ case INS_mvn:
+ if (isModImmConst(imm))
+ {
+ fmt = IF_T2_L1;
+ sf = insMustSetFlags(flags);
+ }
+ else
+ {
+ assert(!"Instruction cannot be encoded");
+ }
+ break;
+
+ case INS_cmp:
+ assert(!EA_IS_CNS_RELOC(attr));
+ assert(insSetsFlags(flags));
+ sf = INS_FLAGS_SET;
+ if (isLowRegister(reg) && ((imm & 0x0ff) == imm))
+ {
+ fmt = IF_T1_J0;
+ }
+ else if (isModImmConst(imm))
+ {
+ fmt = IF_T2_L2;
+ }
+ else if (isModImmConst(-imm))
+ {
+ ins = INS_cmn;
+ fmt = IF_T2_L2;
+ imm = -imm;
+ }
+ else
+ {
+#ifndef LEGACY_BACKEND
+ assert(!"emitIns_R_I: immediate doesn't fit into the instruction");
+#else // LEGACY_BACKEND
+ // Load val into a register
+ regNumber valReg = codeGen->regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(reg));
+ codeGen->instGen_Set_Reg_To_Imm(EA_PTRSIZE, valReg, (ssize_t)imm);
+ emitIns_R_R(ins, attr, reg, valReg, flags);
+#endif // LEGACY_BACKEND
+ return;
+ }
+ break;
+
+ case INS_cmn:
+ case INS_tst:
+ case INS_teq:
+ assert(insSetsFlags(flags));
+ sf = INS_FLAGS_SET;
+ if (isModImmConst(imm))
+ {
+ fmt = IF_T2_L2;
+ }
+ else
+ {
+ assert(!"Instruction cannot be encoded");
+ }
+ break;
+
+#ifdef FEATURE_PLI_INSTRUCTION
+ case INS_pli:
+ assert(insDoesNotSetFlags(flags));
+ if ((reg == REG_SP) && (unsigned_abs(imm) <= 0x0fff))
+ {
+ fmt = IF_T2_K3;
+ sf = INS_FLAGS_NOT_SET;
+ }
+ __fallthrough;
+#endif // FEATURE_PLI_INSTRUCTION
+
+ case INS_pld:
+ case INS_pldw:
+ assert(insDoesNotSetFlags(flags));
+ sf = INS_FLAGS_NOT_SET;
+ if ((imm >= 0) && (imm <= 0x0fff))
+ {
+ fmt = IF_T2_K2;
+ }
+ else if ((imm < 0) && (-imm <= 0x00ff))
+ {
+ imm = -imm;
+ fmt = IF_T2_H2;
+ }
+ else
+ {
+ assert(!"Instruction cannot be encoded");
+ }
+ break;
+
+ default:
+ unreached();
+ }
+ assert((fmt == IF_T1_F) || (fmt == IF_T1_J0) || (fmt == IF_T1_J1) || (fmt == IF_T2_H2) || (fmt == IF_T2_I0) ||
+ (fmt == IF_T2_K2) || (fmt == IF_T2_K3) || (fmt == IF_T2_L1) || (fmt == IF_T2_L2) || (fmt == IF_T2_M1) ||
+ (fmt == IF_T2_N) || (fmt == IF_T2_VLDST));
+
+ assert(sf != INS_FLAGS_DONT_CARE);
+
+ instrDesc* id = emitNewInstrSC(attr, imm);
+ insSize isz = emitInsSize(fmt);
+
+ id->idIns(ins);
+ id->idInsFmt(fmt);
+ id->idInsSize(isz);
+ id->idInsFlags(sf);
+ id->idReg1(reg);
+
+ dispIns(id);
+ appendToCurIG(id);
+}
+
+/*****************************************************************************
+ *
+ * Add an instruction referencing two registers
+ */
+
+void emitter::emitIns_R_R(
+ instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, insFlags flags /* = INS_FLAGS_DONT_CARE */)
+
+{
+ emitAttr size = EA_SIZE(attr);
+ insFormat fmt = IF_NONE;
+ insFlags sf = INS_FLAGS_DONT_CARE;
+
+ /* Figure out the encoding format of the instruction */
+ switch (ins)
+ {
+ case INS_add:
+ if (insDoesNotSetFlags(flags))
+ {
+ fmt = IF_T1_D0;
+ sf = INS_FLAGS_NOT_SET;
+ break;
+ }
+ __fallthrough;
+
+ case INS_sub:
+ // Use the Thumb-1 reg,reg,reg encoding
+ emitIns_R_R_R(ins, attr, reg1, reg1, reg2, flags);
+ return;
+
+ case INS_mov:
+ if (insDoesNotSetFlags(flags))
+ {
+ assert(reg1 != reg2);
+ fmt = IF_T1_D0;
+ sf = INS_FLAGS_NOT_SET;
+ }
+ else // insSetsFlags(flags)
+ {
+ sf = INS_FLAGS_SET;
+ if (isLowRegister(reg1) && isLowRegister(reg2))
+ {
+ fmt = IF_T1_E;
+ }
+ else
+ {
+ fmt = IF_T2_C3;
+ }
+ }
+ break;
+
+ case INS_cmp:
+ assert(insSetsFlags(flags));
+ sf = INS_FLAGS_SET;
+ if (isLowRegister(reg1) && isLowRegister(reg2))
+ {
+ fmt = IF_T1_E; // both are low registers
+ }
+ else
+ {
+ fmt = IF_T1_D0; // one or both are high registers
+ }
+ break;
+
+ case INS_vmov_f2i:
+ assert(isGeneralRegister(reg1));
+ assert(isFloatReg(reg2));
+ fmt = IF_T2_VMOVS;
+ sf = INS_FLAGS_NOT_SET;
+ break;
+
+ case INS_vmov_i2f:
+ assert(isFloatReg(reg1));
+ assert(isGeneralRegister(reg2));
+ fmt = IF_T2_VMOVS;
+ sf = INS_FLAGS_NOT_SET;
+ break;
+
+ case INS_vcvt_d2i:
+ case INS_vcvt_d2u:
+ case INS_vcvt_d2f:
+ assert(isFloatReg(reg1));
+ assert(isDoubleReg(reg2));
+ goto VCVT_COMMON;
+
+ case INS_vcvt_f2d:
+ case INS_vcvt_u2d:
+ case INS_vcvt_i2d:
+ assert(isDoubleReg(reg1));
+ assert(isFloatReg(reg2));
+ goto VCVT_COMMON;
+
+ case INS_vcvt_u2f:
+ case INS_vcvt_i2f:
+ case INS_vcvt_f2i:
+ case INS_vcvt_f2u:
+ assert(size == EA_4BYTE);
+ assert(isFloatReg(reg1));
+ assert(isFloatReg(reg2));
+ goto VCVT_COMMON;
+
+ case INS_vmov:
+ assert(reg1 != reg2);
+ __fallthrough;
+
+ case INS_vabs:
+ case INS_vsqrt:
+ case INS_vcmp:
+ case INS_vneg:
+ if (size == EA_8BYTE)
+ {
+ assert(isDoubleReg(reg1));
+ assert(isDoubleReg(reg2));
+ }
+ else
+ {
+ assert(isFloatReg(reg1));
+ assert(isFloatReg(reg2));
+ }
+ __fallthrough;
+
+ VCVT_COMMON:
+ fmt = IF_T2_VFP2;
+ sf = INS_FLAGS_NOT_SET;
+ break;
+
+ case INS_vadd:
+ case INS_vmul:
+ case INS_vsub:
+ case INS_vdiv:
+ emitIns_R_R_R(ins, attr, reg1, reg1, reg2);
+ return;
+
+ case INS_vldr:
+ case INS_vstr:
+ case INS_ldr:
+ case INS_ldrb:
+ case INS_ldrsb:
+ case INS_ldrh:
+ case INS_ldrsh:
+
+ case INS_str:
+ case INS_strb:
+ case INS_strh:
+ emitIns_R_R_I(ins, attr, reg1, reg2, 0);
+ return;
+
+ case INS_adc:
+ case INS_and:
+ case INS_bic:
+ case INS_eor:
+ case INS_orr:
+ case INS_sbc:
+ if (insSetsFlags(flags) && isLowRegister(reg1) && isLowRegister(reg2))
+ {
+ fmt = IF_T1_E;
+ sf = INS_FLAGS_SET;
+ break;
+ }
+ __fallthrough;
+
+ case INS_orn:
+ // assert below fired for bug 281892 where the two operands of an OR were
+ // the same static field load which got cse'd.
+ // there's no reason why this assert would be true in general
+ // assert(reg1 != reg2);
+ // Use the Thumb-2 three register encoding
+ emitIns_R_R_R_I(ins, attr, reg1, reg1, reg2, 0, flags);
+ return;
+
+ case INS_asr:
+ case INS_lsl:
+ case INS_lsr:
+ case INS_ror:
+ // assert below fired for bug 296394 where the two operands of an
+ // arithmetic right shift were the same local variable
+ // there's no reason why this assert would be true in general
+ // assert(reg1 != reg2);
+ if (insSetsFlags(flags) && isLowRegister(reg1) && isLowRegister(reg2))
+ {
+ fmt = IF_T1_E;
+ sf = INS_FLAGS_SET;
+ }
+ else
+ {
+ // Use the Thumb-2 three register encoding
+ emitIns_R_R_R(ins, attr, reg1, reg1, reg2, flags);
+ return;
+ }
+ break;
+
+ case INS_mul:
+ // We will prefer the T2 encoding, unless (flags == INS_FLAGS_SET)
+ // The thumb-1 instruction executes much slower as it must always set the flags
+ //
+ if (insMustSetFlags(flags) && isLowRegister(reg1) && isLowRegister(reg2))
+ {
+ fmt = IF_T1_E;
+ sf = INS_FLAGS_SET;
+ }
+ else
+ {
+ // Use the Thumb-2 three register encoding
+ emitIns_R_R_R(ins, attr, reg1, reg2, reg1, flags);
+ return;
+ }
+ break;
+
+ case INS_mvn:
+ case INS_cmn:
+ case INS_tst:
+ if (insSetsFlags(flags) && isLowRegister(reg1) && isLowRegister(reg2))
+ {
+ fmt = IF_T1_E;
+ sf = INS_FLAGS_SET;
+ }
+ else
+ {
+ // Use the Thumb-2 register with shift encoding
+ emitIns_R_R_I(ins, attr, reg1, reg2, 0, flags);
+ return;
+ }
+ break;
+
+ case INS_sxtb:
+ case INS_uxtb:
+ assert(size == EA_1BYTE);
+ goto EXTEND_COMMON;
+
+ case INS_sxth:
+ case INS_uxth:
+ assert(size == EA_2BYTE);
+ EXTEND_COMMON:
+ assert(insDoesNotSetFlags(flags));
+ if (isLowRegister(reg1) && isLowRegister(reg2))
+ {
+ fmt = IF_T1_E;
+ sf = INS_FLAGS_NOT_SET;
+ }
+ else
+ {
+ // Use the Thumb-2 reg,reg with rotation encoding
+ emitIns_R_R_I(ins, attr, reg1, reg2, 0, INS_FLAGS_NOT_SET);
+ return;
+ }
+ break;
+
+ case INS_tbb:
+ assert(size == EA_1BYTE);
+ assert(insDoesNotSetFlags(flags));
+ fmt = IF_T2_C9;
+ sf = INS_FLAGS_NOT_SET;
+ break;
+
+ case INS_tbh:
+ assert(size == EA_2BYTE);
+ assert(insDoesNotSetFlags(flags));
+ fmt = IF_T2_C9;
+ sf = INS_FLAGS_NOT_SET;
+ break;
+
+ case INS_clz:
+ assert(insDoesNotSetFlags(flags));
+ fmt = IF_T2_C10;
+ sf = INS_FLAGS_NOT_SET;
+ break;
+
+ case INS_ldrexb:
+ case INS_strexb:
+ assert(size == EA_1BYTE);
+ assert(insDoesNotSetFlags(flags));
+ fmt = IF_T2_E1;
+ sf = INS_FLAGS_NOT_SET;
+ break;
+
+ case INS_ldrexh:
+ case INS_strexh:
+ assert(size == EA_2BYTE);
+ assert(insDoesNotSetFlags(flags));
+ fmt = IF_T2_E1;
+ sf = INS_FLAGS_NOT_SET;
+ break;
+ default:
+#ifdef DEBUG
+ printf("did not expect instruction %s\n", codeGen->genInsName(ins));
+#endif
+ unreached();
+ }
+
+ assert((fmt == IF_T1_D0) || (fmt == IF_T1_E) || (fmt == IF_T2_C3) || (fmt == IF_T2_C9) || (fmt == IF_T2_C10) ||
+ (fmt == IF_T2_VFP2) || (fmt == IF_T2_VMOVD) || (fmt == IF_T2_VMOVS) || (fmt == IF_T2_E1));
+
+ assert(sf != INS_FLAGS_DONT_CARE);
+
+ instrDesc* id = emitNewInstrSmall(attr);
+ insSize isz = emitInsSize(fmt);
+
+ id->idIns(ins);
+ id->idInsFmt(fmt);
+ id->idInsSize(isz);
+ id->idInsFlags(sf);
+ id->idReg1(reg1);
+ id->idReg2(reg2);
+
+ dispIns(id);
+ appendToCurIG(id);
+}
+
+/*****************************************************************************
+ *
+ * Add an instruction referencing a register and two constants.
+ */
+
+void emitter::emitIns_R_I_I(
+ instruction ins, emitAttr attr, regNumber reg, int imm1, int imm2, insFlags flags /* = INS_FLAGS_DONT_CARE */)
+
+{
+ insFormat fmt = IF_NONE;
+ insFlags sf = INS_FLAGS_DONT_CARE;
+ int imm = 0; // combined immediates
+
+ /* Figure out the encoding format of the instruction */
+ switch (ins)
+ {
+ case INS_bfc:
+ {
+ int lsb = imm1;
+ int msb = lsb + imm2 - 1;
+
+ assert((lsb >= 0) && (lsb <= 31)); // required for encoding of INS_bfc
+ assert((msb >= 0) && (msb <= 31)); // required for encoding of INS_bfc
+ assert(msb >= lsb); // required for encoding of INS_bfc
+
+ imm = (lsb << 5) | msb;
+
+ assert(insDoesNotSetFlags(flags));
+ fmt = IF_T2_D1;
+ sf = INS_FLAGS_NOT_SET;
+ }
+ break;
+
+ default:
+ unreached();
+ }
+ assert(fmt == IF_T2_D1);
+ assert(sf != INS_FLAGS_DONT_CARE);
+
+ instrDesc* id = emitNewInstrSC(attr, imm);
+ insSize isz = emitInsSize(fmt);
+
+ id->idIns(ins);
+ id->idInsFmt(fmt);
+ id->idInsSize(isz);
+ id->idInsFlags(sf);
+ id->idReg1(reg);
+
+ dispIns(id);
+ appendToCurIG(id);
+}
+
+/*****************************************************************************
+ *
+ * Add an instruction referencing two registers and a constant.
+ */
+
+void emitter::emitIns_R_R_I(instruction ins,
+ emitAttr attr,
+ regNumber reg1,
+ regNumber reg2,
+ int imm,
+ insFlags flags /* = INS_FLAGS_DONT_CARE */,
+ insOpts opt /* = INS_OPTS_NONE */)
+{
+ emitAttr size = EA_SIZE(attr);
+ insFormat fmt = IF_NONE;
+ insFlags sf = INS_FLAGS_DONT_CARE;
+
+ if (ins == INS_lea)
+ {
+ ins = INS_add;
+ }
+
+ /* Figure out the encoding format of the instruction */
+ switch (ins)
+ {
+ case INS_add:
+ assert(insOptsNone(opt));
+
+ // Can we possibly encode the immediate 'imm' using a Thumb-1 encoding?
+ if ((reg2 == REG_SP) && insDoesNotSetFlags(flags) && ((imm & 0x03fc) == imm))
+ {
+ if ((reg1 == REG_SP) && ((imm & 0x01fc) == imm))
+ {
+ // Use Thumb-1 encoding
+ emitIns_R_I(ins, attr, reg1, imm, flags);
+ return;
+ }
+ else if (isLowRegister(reg1))
+ {
+ fmt = IF_T1_J2;
+ sf = INS_FLAGS_NOT_SET;
+ break;
+ }
+ }
+ __fallthrough;
+
+ case INS_sub:
+ assert(insOptsNone(opt));
+
+ // Is it just a mov?
+ if (imm == 0)
+ {
+ // Is the mov even necessary?
+ // Fix 383915 ARM ILGEN
+ if (reg1 != reg2)
+ {
+ emitIns_R_R(INS_mov, attr, reg1, reg2, flags);
+ }
+ return;
+ }
+ // Can we encode the immediate 'imm' using a Thumb-1 encoding?
+ else if (isLowRegister(reg1) && isLowRegister(reg2) && insSetsFlags(flags) && (unsigned_abs(imm) <= 0x0007))
+ {
+ if (imm < 0)
+ {
+ assert((ins == INS_add) || (ins == INS_sub));
+ if (ins == INS_add)
+ ins = INS_sub;
+ else
+ ins = INS_add;
+ imm = -imm;
+ }
+ fmt = IF_T1_G;
+ sf = INS_FLAGS_SET;
+ }
+ else if ((reg1 == reg2) && isLowRegister(reg1) && insSetsFlags(flags) && (unsigned_abs(imm) <= 0x00ff))
+ {
+ if (imm < 0)
+ {
+ assert((ins == INS_add) || (ins == INS_sub));
+ if (ins == INS_add)
+ ins = INS_sub;
+ else
+ ins = INS_add;
+ imm = -imm;
+ }
+ // Use Thumb-1 encoding
+ emitIns_R_I(ins, attr, reg1, imm, flags);
+ return;
+ }
+ else if (isModImmConst(imm))
+ {
+ fmt = IF_T2_L0;
+ sf = insMustSetFlags(flags);
+ }
+ else if (isModImmConst(-imm))
+ {
+ assert((ins == INS_add) || (ins == INS_sub));
+ ins = (ins == INS_add) ? INS_sub : INS_add;
+ imm = -imm;
+ fmt = IF_T2_L0;
+ sf = insMustSetFlags(flags);
+ }
+ else if (insDoesNotSetFlags(flags) && (unsigned_abs(imm) <= 0x0fff))
+ {
+ if (imm < 0)
+ {
+ assert((ins == INS_add) || (ins == INS_sub));
+ ins = (ins == INS_add) ? INS_sub : INS_add;
+ imm = -imm;
+ }
+ // add/sub => addw/subw instruction
+ // Note that even when using the w prefix the immediate is still only 12 bits?
+ ins = (ins == INS_add) ? INS_addw : INS_subw;
+ fmt = IF_T2_M0;
+ sf = INS_FLAGS_NOT_SET;
+ }
+ else
+ {
+ assert(!"Instruction cannot be encoded");
+ }
+ break;
+
+ case INS_and:
+ case INS_bic:
+ case INS_orr:
+ case INS_orn:
+ assert(insOptsNone(opt));
+ if (isModImmConst(imm))
+ {
+ fmt = IF_T2_L0;
+ sf = insMustSetFlags(flags);
+ }
+ else if (isModImmConst(~imm))
+ {
+ fmt = IF_T2_L0;
+ sf = insMustSetFlags(flags);
+ imm = ~imm;
+
+ if (ins == INS_and)
+ ins = INS_bic;
+ else if (ins == INS_bic)
+ ins = INS_and;
+ else if (ins == INS_orr)
+ ins = INS_orn;
+ else if (ins == INS_orn)
+ ins = INS_orr;
+ else
+ assert(!"Instruction cannot be encoded");
+ }
+ else
+ {
+ assert(!"Instruction cannot be encoded");
+ }
+ break;
+
+ case INS_rsb:
+ assert(insOptsNone(opt));
+ if (imm == 0 && isLowRegister(reg1) && isLowRegister(reg2) && insSetsFlags(flags))
+ {
+ fmt = IF_T1_E;
+ sf = INS_FLAGS_SET;
+ break;
+ }
+ __fallthrough;
+
+ case INS_adc:
+ case INS_eor:
+ case INS_sbc:
+ assert(insOptsNone(opt));
+ if (isModImmConst(imm))
+ {
+ fmt = IF_T2_L0;
+ sf = insMustSetFlags(flags);
+ }
+ else
+ {
+ assert(!"Instruction cannot be encoded");
+ }
+ break;
+
+ case INS_adr:
+ assert(insOptsNone(opt));
+ assert(insDoesNotSetFlags(flags));
+ assert(reg2 == REG_PC);
+ sf = INS_FLAGS_NOT_SET;
+
+ if (isLowRegister(reg1) && ((imm & 0x00ff) == imm))
+ {
+ fmt = IF_T1_J3;
+ }
+ else if ((imm & 0x0fff) == imm)
+ {
+ fmt = IF_T2_M1;
+ }
+ else
+ {
+ assert(!"Instruction cannot be encoded");
+ }
+ break;
+
+ case INS_mvn:
+ assert((imm >= 0) && (imm <= 31)); // required for encoding
+ assert(!insOptAnyInc(opt));
+ if (imm == 0)
+ {
+ assert(insOptsNone(opt));
+ if (isLowRegister(reg1) && isLowRegister(reg2) && insSetsFlags(flags))
+ {
+ // Use the Thumb-1 reg,reg encoding
+ emitIns_R_R(ins, attr, reg1, reg2, flags);
+ return;
+ }
+ }
+ else // imm > 0 && imm <= 31
+ {
+ assert(insOptAnyShift(opt));
+ }
+ fmt = IF_T2_C1;
+ sf = insMustSetFlags(flags);
+ break;
+
+ case INS_cmp:
+ case INS_cmn:
+ case INS_teq:
+ case INS_tst:
+ assert(insSetsFlags(flags));
+ assert((imm >= 0) && (imm <= 31)); // required for encoding
+ assert(!insOptAnyInc(opt));
+ if (imm == 0)
+ {
+ assert(insOptsNone(opt));
+ if (ins == INS_cmp)
+ {
+ // Use the Thumb-1 reg,reg encoding
+ emitIns_R_R(ins, attr, reg1, reg2, flags);
+ return;
+ }
+ if (((ins == INS_cmn) || (ins == INS_tst)) && isLowRegister(reg1) && isLowRegister(reg2))
+ {
+ // Use the Thumb-1 reg,reg encoding
+ emitIns_R_R(ins, attr, reg1, reg2, flags);
+ return;
+ }
+ }
+ else // imm > 0 && imm <= 31)
+ {
+ assert(insOptAnyShift(opt));
+ if (insOptsRRX(opt))
+ assert(imm == 1);
+ }
+
+ fmt = IF_T2_C8;
+ sf = INS_FLAGS_SET;
+ break;
+
+ case INS_ror:
+ case INS_asr:
+ case INS_lsl:
+ case INS_lsr:
+ assert(insOptsNone(opt));
+
+ // On ARM, the immediate shift count of LSL and ROR must be between 1 and 31. For LSR and ASR, it is between
+ // 1 and 32, though we don't ever use 32. Although x86 allows an immediate shift count of 8-bits in
+ // instruction encoding, the CPU looks at only the lower 5 bits. As per ECMA, specifying a shift count to
+ // the IL SHR, SHL, or SHL.UN instruction that is greater than or equal to the width of the type will yield
+ // an undefined value. We choose that undefined value in this case to match x86 behavior, by only using the
+ // lower 5 bits of the constant shift count.
+ imm &= 0x1f;
+
+ if (imm == 0)
+ {
+ // Additional Fix 383915 ARM ILGEN
+ if ((reg1 != reg2) || insMustSetFlags(flags))
+ {
+ // Use MOV/MOVS instriction
+ emitIns_R_R(INS_mov, attr, reg1, reg2, flags);
+ }
+ return;
+ }
+
+ if (insSetsFlags(flags) && (ins != INS_ror) && isLowRegister(reg1) && isLowRegister(reg2))
+ {
+ fmt = IF_T1_C;
+ sf = INS_FLAGS_SET;
+ }
+ else
+ {
+ fmt = IF_T2_C2;
+ sf = insMustSetFlags(flags);
+ }
+ break;
+
+ case INS_sxtb:
+ case INS_uxtb:
+ assert(size == EA_1BYTE);
+ goto EXTEND_COMMON;
+
+ case INS_sxth:
+ case INS_uxth:
+ assert(size == EA_2BYTE);
+ EXTEND_COMMON:
+ assert(insOptsNone(opt));
+ assert(insDoesNotSetFlags(flags));
+ assert((imm & 0x018) == imm); // required for encoding
+
+ if ((imm == 0) && isLowRegister(reg1) && isLowRegister(reg2))
+ {
+ // Use Thumb-1 encoding
+ emitIns_R_R(ins, attr, reg1, reg2, INS_FLAGS_NOT_SET);
+ return;
+ }
+
+ fmt = IF_T2_C6;
+ sf = INS_FLAGS_NOT_SET;
+ break;
+
+ case INS_pld:
+ case INS_pldw:
+#ifdef FEATURE_PLI_INSTRUCTION
+ case INS_pli:
+#endif // FEATURE_PLI_INSTRUCTION
+ assert(insOptsNone(opt));
+ assert(insDoesNotSetFlags(flags));
+ assert((imm & 0x003) == imm); // required for encoding
+
+ fmt = IF_T2_C7;
+ sf = INS_FLAGS_NOT_SET;
+ break;
+
+ case INS_ldrb:
+ case INS_strb:
+ assert(size == EA_1BYTE);
+ assert(insDoesNotSetFlags(flags));
+
+ if (isLowRegister(reg1) && isLowRegister(reg2) && insOptsNone(opt) && ((imm & 0x001f) == imm))
+ {
+ fmt = IF_T1_C;
+ sf = INS_FLAGS_NOT_SET;
+ break;
+ }
+ goto COMMON_THUMB2_LDST;
+
+ case INS_ldrsb:
+ assert(size == EA_1BYTE);
+ goto COMMON_THUMB2_LDST;
+
+ case INS_ldrh:
+ case INS_strh:
+ assert(size == EA_2BYTE);
+ assert(insDoesNotSetFlags(flags));
+
+ if (isLowRegister(reg1) && isLowRegister(reg2) && insOptsNone(opt) && ((imm & 0x003e) == imm))
+ {
+ fmt = IF_T1_C;
+ sf = INS_FLAGS_NOT_SET;
+ break;
+ }
+ goto COMMON_THUMB2_LDST;
+
+ case INS_ldrsh:
+ assert(size == EA_2BYTE);
+ goto COMMON_THUMB2_LDST;
+
+ case INS_vldr:
+ case INS_vstr:
+ case INS_vldm:
+ case INS_vstm:
+ assert(fmt == IF_NONE);
+ assert(insDoesNotSetFlags(flags));
+ assert(offsetFitsInVectorMem(imm)); // required for encoding
+ if (insOptAnyInc(opt))
+ {
+ if (insOptsPostInc(opt))
+ {
+ assert(imm > 0);
+ }
+ else // insOptsPreDec(opt)
+ {
+ assert(imm < 0);
+ }
+ }
+ else
+ {
+ assert(insOptsNone(opt));
+ }
+
+ sf = INS_FLAGS_NOT_SET;
+ fmt = IF_T2_VLDST;
+ break;
+
+ case INS_ldr:
+ case INS_str:
+ assert(size == EA_4BYTE);
+ assert(insDoesNotSetFlags(flags));
+
+ // Can we possibly encode the immediate 'imm' using a Thumb-1 encoding?
+ if (isLowRegister(reg1) && insOptsNone(opt) && ((imm & 0x03fc) == imm))
+ {
+ if (reg2 == REG_SP)
+ {
+ fmt = IF_T1_J2;
+ sf = INS_FLAGS_NOT_SET;
+ break;
+ }
+ else if (reg2 == REG_PC)
+ {
+ if (ins == INS_ldr)
+ {
+ fmt = IF_T1_J3;
+ sf = INS_FLAGS_NOT_SET;
+ break;
+ }
+ }
+ else if (isLowRegister(reg2))
+ {
+ // Only the smaller range 'imm' can be encoded
+ if ((imm & 0x07c) == imm)
+ {
+ fmt = IF_T1_C;
+ sf = INS_FLAGS_NOT_SET;
+ break;
+ }
+ }
+ }
+ //
+ // If we did not find a thumb-1 encoding above
+ //
+ __fallthrough;
+
+ COMMON_THUMB2_LDST:
+ assert(fmt == IF_NONE);
+ assert(insDoesNotSetFlags(flags));
+ sf = INS_FLAGS_NOT_SET;
+
+ if (insOptAnyInc(opt))
+ {
+ if (insOptsPostInc(opt))
+ assert(imm > 0);
+ else // insOptsPreDec(opt)
+ assert(imm < 0);
+
+ if (unsigned_abs(imm) <= 0x00ff)
+ {
+ fmt = IF_T2_H0;
+ }
+ else
+ {
+ assert(!"Instruction cannot be encoded");
+ }
+ }
+ else
+ {
+ assert(insOptsNone(opt));
+ if ((reg2 == REG_PC) && (unsigned_abs(imm) <= 0x0fff))
+ {
+ fmt = IF_T2_K4;
+ }
+ else if ((imm & 0x0fff) == imm)
+ {
+ fmt = IF_T2_K1;
+ }
+ else if (unsigned_abs(imm) <= 0x0ff)
+ {
+ fmt = IF_T2_H0;
+ }
+ else
+ {
+ // Load imm into a register
+ regNumber rsvdReg = codeGen->rsGetRsvdReg();
+ codeGen->instGen_Set_Reg_To_Imm(EA_4BYTE, rsvdReg, (ssize_t)imm);
+ emitIns_R_R_R(ins, attr, reg1, reg2, rsvdReg);
+ return;
+ }
+ }
+ break;
+
+ case INS_ldrex:
+ case INS_strex:
+ assert(insOptsNone(opt));
+ assert(insDoesNotSetFlags(flags));
+ sf = INS_FLAGS_NOT_SET;
+
+ if ((imm & 0x03fc) == imm)
+ {
+ fmt = IF_T2_H0;
+ }
+ else
+ {
+ assert(!"Instruction cannot be encoded");
+ }
+ break;
+
+ default:
+ assert(!"Unexpected instruction");
+ }
+ assert((fmt == IF_T1_C) || (fmt == IF_T1_E) || (fmt == IF_T1_G) || (fmt == IF_T1_J2) || (fmt == IF_T1_J3) ||
+ (fmt == IF_T2_C1) || (fmt == IF_T2_C2) || (fmt == IF_T2_C6) || (fmt == IF_T2_C7) || (fmt == IF_T2_C8) ||
+ (fmt == IF_T2_H0) || (fmt == IF_T2_H1) || (fmt == IF_T2_K1) || (fmt == IF_T2_K4) || (fmt == IF_T2_L0) ||
+ (fmt == IF_T2_M0) || (fmt == IF_T2_VLDST) || (fmt == IF_T2_M1));
+ assert(sf != INS_FLAGS_DONT_CARE);
+
+ instrDesc* id = emitNewInstrSC(attr, imm);
+ insSize isz = emitInsSize(fmt);
+
+ id->idIns(ins);
+ id->idInsFmt(fmt);
+ id->idInsSize(isz);
+ id->idInsFlags(sf);
+ id->idInsOpt(opt);
+ id->idReg1(reg1);
+ id->idReg2(reg2);
+
+ dispIns(id);
+ appendToCurIG(id);
+}
+
+/*****************************************************************************
+ *
+ * Add an instruction referencing three registers.
+ */
+
+void emitter::emitIns_R_R_R(instruction ins,
+ emitAttr attr,
+ regNumber reg1,
+ regNumber reg2,
+ regNumber reg3,
+ insFlags flags /* = INS_FLAGS_DONT_CARE */)
+{
+ emitAttr size = EA_SIZE(attr);
+ insFormat fmt = IF_NONE;
+ insFlags sf = INS_FLAGS_DONT_CARE;
+
+ /* Figure out the encoding format of the instruction */
+ switch (ins)
+ {
+ case INS_add:
+ // Encodings do not support SP in the reg3 slot
+ if (reg3 == REG_SP)
+ {
+ // Swap reg2 and reg3
+ reg3 = reg2;
+ reg2 = REG_SP;
+ }
+ __fallthrough;
+
+ case INS_sub:
+ assert(reg3 != REG_SP);
+
+ if (isLowRegister(reg1) && isLowRegister(reg2) && isLowRegister(reg3) && insSetsFlags(flags))
+ {
+ fmt = IF_T1_H;
+ sf = INS_FLAGS_SET;
+ break;
+ }
+
+ if ((ins == INS_add) && insDoesNotSetFlags(flags))
+ {
+ if (reg1 == reg2)
+ {
+ // Use the Thumb-1 regdest,reg encoding
+ emitIns_R_R(ins, attr, reg1, reg3, flags);
+ return;
+ }
+ if (reg1 == reg3)
+ {
+ // Use the Thumb-1 regdest,reg encoding
+ emitIns_R_R(ins, attr, reg1, reg2, flags);
+ return;
+ }
+ }
+
+ // Use the Thumb-2 reg,reg,reg with shift encoding
+ emitIns_R_R_R_I(ins, attr, reg1, reg2, reg3, 0, flags);
+ return;
+
+ case INS_adc:
+ case INS_and:
+ case INS_bic:
+ case INS_eor:
+ case INS_orr:
+ case INS_sbc:
+ if (reg1 == reg2)
+ {
+ // Try to encode as a Thumb-1 instruction
+ emitIns_R_R(ins, attr, reg1, reg3, flags);
+ return;
+ }
+ __fallthrough;
+
+ case INS_orn:
+ // Use the Thumb-2 three register encoding, with imm=0
+ emitIns_R_R_R_I(ins, attr, reg1, reg2, reg3, 0, flags);
+ return;
+
+ case INS_asr:
+ case INS_lsl:
+ case INS_lsr:
+ if (reg1 == reg2 && insSetsFlags(flags) && isLowRegister(reg1) && isLowRegister(reg3))
+ {
+ // Use the Thumb-1 regdest,reg encoding
+ emitIns_R_R(ins, attr, reg1, reg3, flags);
+ return;
+ }
+ __fallthrough;
+
+ case INS_ror:
+ fmt = IF_T2_C4;
+ sf = insMustSetFlags(flags);
+ break;
+
+ case INS_mul:
+ if (insMustSetFlags(flags))
+ {
+ if ((reg1 == reg2) && isLowRegister(reg1))
+ {
+ // Use the Thumb-1 regdest,reg encoding
+ emitIns_R_R(ins, attr, reg1, reg3, flags);
+ return;
+ }
+ if ((reg1 == reg3) && isLowRegister(reg1))
+ {
+ // Use the Thumb-1 regdest,reg encoding
+ emitIns_R_R(ins, attr, reg1, reg2, flags);
+ return;
+ }
+ else
+ {
+ assert(!"Instruction cannot be encoded");
+ }
+ }
+ __fallthrough;
+
+ case INS_sdiv:
+ case INS_udiv:
+ assert(insDoesNotSetFlags(flags));
+ fmt = IF_T2_C5;
+ sf = INS_FLAGS_NOT_SET;
+ break;
+
+ case INS_ldrb:
+ case INS_strb:
+ case INS_ldrsb:
+ assert(size == EA_1BYTE);
+ goto COMMON_THUMB1_LDST;
+
+ case INS_ldrsh:
+ case INS_ldrh:
+ case INS_strh:
+ assert(size == EA_2BYTE);
+ goto COMMON_THUMB1_LDST;
+
+ case INS_ldr:
+ case INS_str:
+ assert(size == EA_4BYTE);
+
+ COMMON_THUMB1_LDST:
+ assert(insDoesNotSetFlags(flags));
+
+ if (isLowRegister(reg1) && isLowRegister(reg2) && isLowRegister(reg3))
+ {
+ fmt = IF_T1_H;
+ sf = INS_FLAGS_NOT_SET;
+ }
+ else
+ {
+ // Use the Thumb-2 reg,reg,reg with shift encoding
+ emitIns_R_R_R_I(ins, attr, reg1, reg2, reg3, 0, flags);
+ return;
+ }
+ break;
+
+ case INS_vadd:
+ case INS_vmul:
+ case INS_vsub:
+ case INS_vdiv:
+ if (size == EA_8BYTE)
+ {
+ assert(isDoubleReg(reg1));
+ assert(isDoubleReg(reg2));
+ assert(isDoubleReg(reg3));
+ }
+ else
+ {
+ assert(isFloatReg(reg1));
+ assert(isFloatReg(reg2));
+ assert(isFloatReg(reg3));
+ }
+ fmt = IF_T2_VFP3;
+ sf = INS_FLAGS_NOT_SET;
+ break;
+
+ case INS_vmov_i2d:
+ assert(isDoubleReg(reg1));
+ assert(isGeneralRegister(reg2));
+ assert(isGeneralRegister(reg3));
+ fmt = IF_T2_VMOVD;
+ sf = INS_FLAGS_NOT_SET;
+ break;
+
+ case INS_vmov_d2i:
+ assert(isGeneralRegister(reg1));
+ assert(isGeneralRegister(reg2));
+ assert(isDoubleReg(reg3));
+ fmt = IF_T2_VMOVD;
+ sf = INS_FLAGS_NOT_SET;
+ break;
+
+ case INS_ldrexd:
+ case INS_strexd:
+ assert(insDoesNotSetFlags(flags));
+ fmt = IF_T2_G1;
+ sf = INS_FLAGS_NOT_SET;
+ break;
+
+ default:
+ unreached();
+ }
+ assert((fmt == IF_T1_H) || (fmt == IF_T2_C4) || (fmt == IF_T2_C5) || (fmt == IF_T2_VFP3) || (fmt == IF_T2_VMOVD) ||
+ (fmt == IF_T2_G1));
+ assert(sf != INS_FLAGS_DONT_CARE);
+
+ instrDesc* id = emitNewInstr(attr);
+ insSize isz = emitInsSize(fmt);
+
+ id->idIns(ins);
+ id->idInsFmt(fmt);
+ id->idInsSize(isz);
+ id->idInsFlags(sf);
+ id->idReg1(reg1);
+ id->idReg2(reg2);
+ id->idReg3(reg3);
+
+ dispIns(id);
+ appendToCurIG(id);
+}
+
+/*****************************************************************************
+ *
+ * Add an instruction referencing two registers and two constants.
+ */
+
+void emitter::emitIns_R_R_I_I(instruction ins,
+ emitAttr attr,
+ regNumber reg1,
+ regNumber reg2,
+ int imm1,
+ int imm2,
+ insFlags flags /* = INS_FLAGS_DONT_CARE */)
+{
+ insFormat fmt = IF_NONE;
+ insFlags sf = INS_FLAGS_DONT_CARE;
+
+ int lsb = imm1;
+ int width = imm2;
+ int msb = lsb + width - 1;
+ int imm = 0; /* combined immediate */
+
+ assert((lsb >= 0) && (lsb <= 31)); // required for encodings
+ assert((width > 0) && (width <= 32)); // required for encodings
+ assert((msb >= 0) && (msb <= 31)); // required for encodings
+ assert(msb >= lsb); // required for encodings
+
+ /* Figure out the encoding format of the instruction */
+ switch (ins)
+ {
+ case INS_bfi:
+ assert(insDoesNotSetFlags(flags));
+ imm = (lsb << 5) | msb;
+
+ fmt = IF_T2_D0;
+ sf = INS_FLAGS_NOT_SET;
+ break;
+
+ case INS_sbfx:
+ case INS_ubfx:
+ assert(insDoesNotSetFlags(flags));
+ imm = (lsb << 5) | (width - 1);
+
+ fmt = IF_T2_D0;
+ sf = INS_FLAGS_NOT_SET;
+ break;
+
+ default:
+ unreached();
+ }
+ assert((fmt == IF_T2_D0));
+ assert(sf != INS_FLAGS_DONT_CARE);
+
+ instrDesc* id = emitNewInstrSC(attr, imm);
+ insSize isz = emitInsSize(fmt);
+
+ id->idIns(ins);
+ id->idInsFmt(fmt);
+ id->idInsSize(isz);
+ id->idInsFlags(sf);
+ id->idReg1(reg1);
+ id->idReg2(reg2);
+
+ dispIns(id);
+ appendToCurIG(id);
+}
+
+/*****************************************************************************
+ *
+ * Add an instruction referencing three registers and a constant.
+ */
+
+void emitter::emitIns_R_R_R_I(instruction ins,
+ emitAttr attr,
+ regNumber reg1,
+ regNumber reg2,
+ regNumber reg3,
+ int imm,
+ insFlags flags /* = INS_FLAGS_DONT_CARE */,
+ insOpts opt /* = INS_OPTS_NONE */)
+{
+ emitAttr size = EA_SIZE(attr);
+ insFormat fmt = IF_NONE;
+ insFlags sf = INS_FLAGS_DONT_CARE;
+
+ /* Figure out the encoding format of the instruction */
+ switch (ins)
+ {
+
+ case INS_add:
+ case INS_sub:
+ if (imm == 0)
+ {
+ if (isLowRegister(reg1) && isLowRegister(reg2) && isLowRegister(reg3) && insSetsFlags(flags))
+ {
+ // Use the Thumb-1 reg,reg,reg encoding
+ emitIns_R_R_R(ins, attr, reg1, reg2, reg3, flags);
+ return;
+ }
+ if ((ins == INS_add) && insDoesNotSetFlags(flags))
+ {
+ if (reg1 == reg2)
+ {
+ // Use the Thumb-1 regdest,reg encoding
+ emitIns_R_R(ins, attr, reg1, reg3, flags);
+ return;
+ }
+ if (reg1 == reg3)
+ {
+ // Use the Thumb-1 regdest,reg encoding
+ emitIns_R_R(ins, attr, reg1, reg2, flags);
+ return;
+ }
+ }
+ }
+ __fallthrough;
+
+ case INS_adc:
+ case INS_and:
+ case INS_bic:
+ case INS_eor:
+ case INS_orn:
+ case INS_orr:
+ case INS_sbc:
+ assert((imm >= 0) && (imm <= 31)); // required for encoding
+ assert(!insOptAnyInc(opt));
+ if (imm == 0)
+ {
+ if (opt == INS_OPTS_LSL) // left shift of zero
+ opt = INS_OPTS_NONE; // is a nop
+
+ assert(insOptsNone(opt));
+ if (isLowRegister(reg1) && isLowRegister(reg2) && isLowRegister(reg3) && insSetsFlags(flags))
+ {
+ if (reg1 == reg2)
+ {
+ // Use the Thumb-1 regdest,reg encoding
+ emitIns_R_R(ins, attr, reg1, reg3, flags);
+ return;
+ }
+ if ((reg1 == reg3) && (ins != INS_bic) && (ins != INS_orn) && (ins != INS_sbc))
+ {
+ // Use the Thumb-1 regdest,reg encoding
+ emitIns_R_R(ins, attr, reg1, reg2, flags);
+ return;
+ }
+ }
+ }
+ else // imm > 0 && imm <= 31)
+ {
+ assert(insOptAnyShift(opt));
+ if (insOptsRRX(opt))
+ assert(imm == 1);
+ }
+ fmt = IF_T2_C0;
+ sf = insMustSetFlags(flags);
+ break;
+
+ case INS_ldrb:
+ case INS_ldrsb:
+ case INS_strb:
+ assert(size == EA_1BYTE);
+ goto COMMON_THUMB2_LDST;
+
+ case INS_ldrh:
+ case INS_ldrsh:
+ case INS_strh:
+ assert(size == EA_2BYTE);
+ goto COMMON_THUMB2_LDST;
+
+ case INS_ldr:
+ case INS_str:
+ assert(size == EA_4BYTE);
+
+ COMMON_THUMB2_LDST:
+ assert(insDoesNotSetFlags(flags));
+ assert((imm & 0x0003) == imm); // required for encoding
+
+ if ((imm == 0) && insOptsNone(opt) && isLowRegister(reg1) && isLowRegister(reg2) && isLowRegister(reg3))
+ {
+ // Use the Thumb-1 reg,reg,reg encoding
+ emitIns_R_R_R(ins, attr, reg1, reg2, reg3, flags);
+ return;
+ }
+ assert(insOptsNone(opt) || insOptsLSL(opt));
+ fmt = IF_T2_E0;
+ sf = INS_FLAGS_NOT_SET;
+ break;
+
+ case INS_ldrd:
+ case INS_strd:
+ assert(insDoesNotSetFlags(flags));
+ assert((imm & 0x03) == 0);
+ sf = INS_FLAGS_NOT_SET;
+
+ if (insOptAnyInc(opt))
+ {
+ if (insOptsPostInc(opt))
+ assert(imm > 0);
+ else // insOptsPreDec(opt)
+ assert(imm < 0);
+ }
+ else
+ {
+ assert(insOptsNone(opt));
+ }
+
+ if (unsigned_abs(imm) <= 0x03fc)
+ {
+ imm >>= 2;
+ fmt = IF_T2_G0;
+ }
+ else
+ {
+ assert(!"Instruction cannot be encoded");
+ }
+ break;
+
+ default:
+ unreached();
+ }
+ assert((fmt == IF_T2_C0) || (fmt == IF_T2_E0) || (fmt == IF_T2_G0));
+ assert(sf != INS_FLAGS_DONT_CARE);
+
+ // 3-reg ops can't use the small instrdesc
+ instrDescCns* id = emitAllocInstrCns(attr);
+ id->idSetIsLargeCns();
+ id->idcCnsVal = imm;
+
+ id->idIns(ins);
+ id->idInsFmt(fmt);
+ id->idInsSize(emitInsSize(fmt));
+
+ id->idInsFlags(sf);
+ id->idInsOpt(opt);
+ id->idReg1(reg1);
+ id->idReg2(reg2);
+ id->idReg3(reg3);
+
+ dispIns(id);
+ appendToCurIG(id);
+}
+
+/*****************************************************************************
+ *
+ * Add an instruction referencing four registers.
+ */
+
+void emitter::emitIns_R_R_R_R(
+ instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, regNumber reg3, regNumber reg4)
+{
+ insFormat fmt = IF_NONE;
+ insFlags sf = INS_FLAGS_NOT_SET;
+
+ /* Figure out the encoding format of the instruction */
+ switch (ins)
+ {
+
+ case INS_smull:
+ case INS_umull:
+ case INS_smlal:
+ case INS_umlal:
+ assert(reg1 != reg2); // Illegal encoding
+ fmt = IF_T2_F1;
+ break;
+ case INS_mla:
+ case INS_mls:
+ fmt = IF_T2_F2;
+ break;
+ default:
+ unreached();
+ }
+ assert((fmt == IF_T2_F1) || (fmt == IF_T2_F2));
+
+ instrDesc* id = emitNewInstr(attr);
+ insSize isz = emitInsSize(fmt);
+
+ id->idIns(ins);
+ id->idInsFmt(fmt);
+ id->idInsSize(isz);
+ id->idInsFlags(sf);
+ id->idReg1(reg1);
+ id->idReg2(reg2);
+ id->idReg3(reg3);
+ id->idReg4(reg4);
+
+ dispIns(id);
+ appendToCurIG(id);
+}
+
+/*****************************************************************************
+ *
+ * Add an instruction with a static data member operand. If 'size' is 0, the
+ * instruction operates on the address of the static member instead of its
+ * value (e.g. "push offset clsvar", rather than "push dword ptr [clsvar]").
+ */
+
+void emitter::emitIns_C(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fldHnd, int offs)
+{
+ NYI("emitIns_C");
+}
+
+/*****************************************************************************
+ *
+ * Add an instruction referencing stack-based local variable.
+ */
+
+void emitter::emitIns_S(instruction ins, emitAttr attr, int varx, int offs)
+{
+ NYI("emitIns_S");
+}
+
+/*****************************************************************************
+ *
+ * Add an instruction referencing a register and a stack-based local variable.
+ */
+void emitter::emitIns_R_S(instruction ins, emitAttr attr, regNumber reg1, int varx, int offs)
+{
+ if (ins == INS_mov)
+ {
+ assert(!"Please use ins_Load() to select the correct instruction");
+ }
+
+ switch (ins)
+ {
+ case INS_add:
+ case INS_ldr:
+ case INS_ldrh:
+ case INS_ldrb:
+ case INS_ldrsh:
+ case INS_ldrsb:
+ case INS_vldr:
+ case INS_vmov:
+ case INS_movw:
+ case INS_movt:
+ break;
+
+ case INS_lea:
+ ins = INS_add;
+ break;
+
+ default:
+ NYI("emitIns_R_S");
+ return;
+ }
+
+ insFormat fmt = IF_NONE;
+ insFlags sf = INS_FLAGS_NOT_SET;
+ regNumber reg2;
+
+ /* Figure out the variable's frame position */
+ int base;
+ int disp;
+ unsigned undisp;
+
+ base = emitComp->lvaFrameAddress(varx, emitComp->funCurrentFunc()->funKind != FUNC_ROOT, &reg2, offs);
+
+ disp = base + offs;
+ undisp = unsigned_abs(disp);
+
+ if (CodeGen::instIsFP(ins))
+ {
+ // all fp mem ops take 8 bit immediate, multiplied by 4, plus sign
+ //
+ // Note if undisp is not a multiple of four we will fail later on
+ // when we try to encode this instruction
+ // Its better to fail later with a better error message than
+ // to fail here when the RBM_OPT_RSVD is not available
+ //
+ if (undisp <= 0x03fb)
+ {
+ fmt = IF_T2_VLDST;
+ }
+ else
+ {
+ regNumber rsvdReg = codeGen->rsGetRsvdReg();
+ emitIns_genStackOffset(rsvdReg, varx, offs);
+ emitIns_R_R(INS_add, EA_4BYTE, rsvdReg, reg2);
+ emitIns_R_R_I(ins, attr, reg1, rsvdReg, 0);
+ return;
+ }
+ }
+ else if (emitInsIsLoadOrStore(ins))
+ {
+ if (isLowRegister(reg1) && (reg2 == REG_SP) && (ins == INS_ldr) && ((disp & 0x03fc) == disp && disp <= 0x03f8))
+ {
+ fmt = IF_T1_J2;
+ }
+ else if (disp >= 0 && disp <= 0x0ffb)
+ {
+ fmt = IF_T2_K1;
+ }
+ else if (undisp <= 0x0fb)
+ {
+ fmt = IF_T2_H0;
+ }
+ else
+ {
+ // Load disp into a register
+ regNumber rsvdReg = codeGen->rsGetRsvdReg();
+ emitIns_genStackOffset(rsvdReg, varx, offs);
+ fmt = IF_T2_E0;
+ }
+ }
+ else if (ins == INS_add)
+ {
+ if (isLowRegister(reg1) && (reg2 == REG_SP) && ((disp & 0x03fc) == disp && disp <= 0x03f8))
+ {
+ fmt = IF_T1_J2;
+ }
+ else if (undisp <= 0x0ffb)
+ {
+ if (disp < 0)
+ {
+ ins = INS_sub;
+ disp = -disp;
+ }
+ // add/sub => addw/subw instruction
+ // Note that even when using the w prefix the immediate is still only 12 bits?
+ ins = (ins == INS_add) ? INS_addw : INS_subw;
+ fmt = IF_T2_M0;
+ }
+ else
+ {
+ // Load disp into a register
+ regNumber rsvdReg = codeGen->rsGetRsvdReg();
+ emitIns_genStackOffset(rsvdReg, varx, offs);
+ emitIns_R_R_R(ins, attr, reg1, reg2, rsvdReg);
+ return;
+ }
+ }
+ else if (ins == INS_movw || ins == INS_movt)
+ {
+ fmt = IF_T2_N;
+ }
+
+ assert((fmt == IF_T1_J2) || (fmt == IF_T2_E0) || (fmt == IF_T2_H0) || (fmt == IF_T2_K1) || (fmt == IF_T2_L0) ||
+ (fmt == IF_T2_N) || (fmt == IF_T2_VLDST) || (fmt == IF_T2_M0));
+ assert(sf != INS_FLAGS_DONT_CARE);
+
+ instrDesc* id = emitNewInstrCns(attr, disp);
+ insSize isz = emitInsSize(fmt);
+
+ id->idIns(ins);
+ id->idInsFmt(fmt);
+ id->idInsSize(isz);
+ id->idInsFlags(sf);
+ id->idInsOpt(INS_OPTS_NONE);
+ id->idReg1(reg1);
+ id->idReg2(reg2);
+ id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs);
+ id->idSetIsLclVar();
+ if (reg2 == REG_FP)
+ id->idSetIsLclFPBase();
+
+#ifdef DEBUG
+ id->idDebugOnlyInfo()->idVarRefOffs = emitVarRefOffs;
+#endif
+
+ dispIns(id);
+ appendToCurIG(id);
+}
+
+// generate the offset of &varx + offs into a register
+void emitter::emitIns_genStackOffset(regNumber r, int varx, int offs)
+{
+ regNumber regBase;
+ int base;
+ int disp;
+
+ base = emitComp->lvaFrameAddress(varx, emitComp->funCurrentFunc()->funKind != FUNC_ROOT, &regBase, offs);
+ disp = base + offs;
+
+ emitIns_R_S(INS_movw, EA_4BYTE, r, varx, offs);
+
+ if ((disp & 0xffff) != disp)
+ {
+ emitIns_R_S(INS_movt, EA_4BYTE, r, varx, offs);
+ }
+}
+
+/*****************************************************************************
+ *
+ * Add an instruction referencing a stack-based local variable and a register
+ */
+void emitter::emitIns_S_R(instruction ins, emitAttr attr, regNumber reg1, int varx, int offs)
+{
+ if (ins == INS_mov)
+ {
+ assert(!"Please use ins_Store() to select the correct instruction");
+ }
+
+ switch (ins)
+ {
+ case INS_str:
+ case INS_strh:
+ case INS_strb:
+ case INS_vstr:
+ break;
+
+ default:
+ NYI("emitIns_R_S");
+ return;
+ }
+
+ insFormat fmt = IF_NONE;
+ insFlags sf = INS_FLAGS_NOT_SET;
+ regNumber reg2;
+
+ /* Figure out the variable's frame position */
+ int base;
+ int disp;
+ unsigned undisp;
+
+ base = emitComp->lvaFrameAddress(varx, emitComp->funCurrentFunc()->funKind != FUNC_ROOT, &reg2, offs);
+
+ disp = base + offs;
+ undisp = unsigned_abs(disp);
+
+ if (CodeGen::instIsFP(ins))
+ {
+ // all fp mem ops take 8 bit immediate, multiplied by 4, plus sign
+ //
+ // Note if undisp is not a multiple of four we will fail later on
+ // when we try to encode this instruction
+ // Its better to fail later with a better error message than
+ // to fail here when the RBM_OPT_RSVD is not available
+ //
+ if (undisp <= 0x03fb)
+ {
+ fmt = IF_T2_VLDST;
+ }
+ else
+ {
+ regNumber rsvdReg = codeGen->rsGetRsvdReg();
+ emitIns_genStackOffset(rsvdReg, varx, offs);
+ emitIns_R_R(INS_add, EA_4BYTE, rsvdReg, reg2);
+ emitIns_R_R_I(ins, attr, reg1, rsvdReg, 0);
+ return;
+ }
+ }
+ else if (isLowRegister(reg1) && (reg2 == REG_SP) && (ins == INS_str) && ((disp & 0x03fc) == disp && disp <= 0x03f8))
+ {
+ fmt = IF_T1_J2;
+ }
+ else if (disp >= 0 && disp <= 0x0ffb)
+ {
+ fmt = IF_T2_K1;
+ }
+ else if (undisp <= 0x0fb)
+ {
+ fmt = IF_T2_H0;
+ }
+ else
+ {
+ // Load disp into a register
+ regNumber rsvdReg = codeGen->rsGetRsvdReg();
+ emitIns_genStackOffset(rsvdReg, varx, offs);
+ fmt = IF_T2_E0;
+ }
+ assert((fmt == IF_T1_J2) || (fmt == IF_T2_E0) || (fmt == IF_T2_H0) || (fmt == IF_T2_VLDST) || (fmt == IF_T2_K1));
+ assert(sf != INS_FLAGS_DONT_CARE);
+
+ instrDesc* id = emitNewInstrCns(attr, disp);
+ insSize isz = emitInsSize(fmt);
+
+ id->idIns(ins);
+ id->idInsFmt(fmt);
+ id->idInsSize(isz);
+ id->idInsFlags(sf);
+ id->idInsOpt(INS_OPTS_NONE);
+ id->idReg1(reg1);
+ id->idReg2(reg2);
+ id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs);
+ id->idSetIsLclVar();
+ if (reg2 == REG_FP)
+ id->idSetIsLclFPBase();
+#ifdef DEBUG
+ id->idDebugOnlyInfo()->idVarRefOffs = emitVarRefOffs;
+#endif
+
+ dispIns(id);
+ appendToCurIG(id);
+}
+
+/*****************************************************************************
+ *
+ * Add an instruction referencing stack-based local variable and an immediate
+ */
+void emitter::emitIns_S_I(instruction ins, emitAttr attr, int varx, int offs, int val)
+{
+ NYI("emitIns_S_I");
+}
+
+/*****************************************************************************
+ *
+ * Add an instruction with a register + static member operands.
+ */
+void emitter::emitIns_R_C(instruction ins, emitAttr attr, regNumber reg, CORINFO_FIELD_HANDLE fldHnd, int offs)
+{
+ if (ins == INS_mov)
+ {
+ assert(!"Please use ins_Load() to select the correct instruction");
+ }
+ assert(emitInsIsLoad(ins) || (ins == INS_lea));
+ if (ins == INS_lea)
+ {
+ ins = INS_add;
+ }
+
+ int doff = Compiler::eeGetJitDataOffs(fldHnd);
+ ssize_t addr = NULL;
+
+ if (doff >= 0)
+ {
+ NYI_ARM("JitDataOffset static fields");
+ }
+ else if (fldHnd == FLD_GLOBAL_FS)
+ {
+ NYI_ARM("Thread-Local-Storage static fields");
+ }
+ else if (fldHnd == FLD_GLOBAL_DS)
+ {
+ addr = (ssize_t)offs;
+ offs = 0;
+ }
+ else
+ {
+ assert(!jitStaticFldIsGlobAddr(fldHnd));
+ addr = (ssize_t)emitComp->info.compCompHnd->getFieldAddress(fldHnd, NULL);
+ if (addr == NULL)
+ NO_WAY("could not obtain address of static field");
+ }
+
+ // We can use reg to load the constant address,
+ // as long as it is not a floating point register
+ regNumber regTmp = reg;
+
+ if (isFloatReg(regTmp))
+ {
+#ifndef LEGACY_BACKEND
+ assert(!"emitIns_R_C() cannot be called with floating point target");
+#else // LEGACY_BACKEND
+ regTmp = codeGen->regSet.rsPickFreeReg(RBM_ALLINT & ~genRegMask(reg));
+#endif // LEGACY_BACKEND
+ }
+
+ // Load address of CLS_VAR into a register
+ codeGen->instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, regTmp, addr);
+
+ if ((ins != INS_add) || (offs != 0) || (reg != regTmp))
+ {
+ emitIns_R_R_I(ins, attr, reg, regTmp, offs);
+ }
+}
+
+/*****************************************************************************
+ *
+ * Add an instruction with a static member + register operands.
+ */
+
+void emitter::emitIns_C_R(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fldHnd, regNumber reg, int offs)
+{
+#ifndef LEGACY_BACKEND
+ assert(!"emitIns_C_R not supported for RyuJIT backend");
+#else // LEGACY_BACKEND
+ if (ins == INS_mov)
+ {
+ assert(!"Please use ins_Store() to select the correct instruction");
+ }
+ assert(emitInsIsStore(ins));
+
+ int doff = Compiler::eeGetJitDataOffs(fldHnd);
+ ssize_t addr = NULL;
+
+ if (doff >= 0)
+ {
+ NYI_ARM("JitDataOffset static fields");
+ }
+ else if (fldHnd == FLD_GLOBAL_FS)
+ {
+ NYI_ARM("Thread-Local-Storage static fields");
+ }
+ else if (fldHnd == FLD_GLOBAL_DS)
+ {
+ addr = (ssize_t)offs;
+ offs = 0;
+ }
+ else
+ {
+ assert(!jitStaticFldIsGlobAddr(fldHnd));
+ addr = (ssize_t)emitComp->info.compCompHnd->getFieldAddress(fldHnd, NULL);
+ if (addr == NULL)
+ NO_WAY("could not obtain address of static field");
+ }
+
+ regNumber regTmp = codeGen->regSet.rsPickFreeReg(RBM_ALLINT & ~genRegMask(reg));
+
+ // Load address of CLS_VAR into a register
+ codeGen->instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, regTmp, addr);
+
+ emitIns_R_R_I(ins, attr, reg, regTmp, offs);
+#endif // LEGACY_BACKEND
+}
+
+/*****************************************************************************
+ *
+ * Add an instruction with a static member + constant.
+ */
+
+void emitter::emitIns_C_I(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fldHnd, int offs, ssize_t val)
+{
+ NYI("emitIns_C_I");
+}
+
+/*****************************************************************************
+ *
+ * The following adds instructions referencing address modes.
+ */
+
+void emitter::emitIns_I_AR(
+ instruction ins, emitAttr attr, int val, regNumber reg, int offs, int memCookie, void* clsCookie)
+{
+ NYI("emitIns_I_AR");
+}
+
+void emitter::emitIns_R_AR(instruction ins,
+ emitAttr attr,
+ regNumber ireg,
+ regNumber reg,
+ int offs,
+ int memCookie /* = 0 */,
+ void* clsCookie /* = NULL */)
+{
+ if (ins == INS_mov)
+ {
+ assert(!"Please use ins_Load() to select the correct instruction");
+ }
+
+ if (ins == INS_lea)
+ {
+ if (emitter::emitIns_valid_imm_for_add(offs, INS_FLAGS_DONT_CARE))
+ {
+ emitIns_R_R_I(INS_add, attr, ireg, reg, offs);
+ }
+ else
+ {
+#ifndef LEGACY_BACKEND
+ assert(!"emitIns_R_AR: immediate doesn't fit in the instruction");
+#else // LEGACY_BACKEND
+ // Load val into a register
+ regNumber immReg = codeGen->regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(ireg) & ~genRegMask(reg));
+ codeGen->instGen_Set_Reg_To_Imm(EA_PTRSIZE, immReg, (ssize_t)offs);
+ emitIns_R_R_R(INS_add, attr, ireg, reg, immReg);
+#endif // LEGACY_BACKEND
+ }
+ return;
+ }
+ else if (emitInsIsLoad(ins))
+ {
+ emitIns_R_R_I(ins, attr, ireg, reg, offs);
+ return;
+ }
+ else if ((ins == INS_mov) || (ins == INS_ldr))
+ {
+ if (EA_SIZE(attr) == EA_4BYTE)
+ {
+ emitIns_R_R_I(INS_ldr, attr, ireg, reg, offs);
+ return;
+ }
+ }
+ else if (ins == INS_vldr)
+ {
+ emitIns_R_R_I(ins, attr, ireg, reg, offs);
+ }
+ NYI("emitIns_R_AR");
+}
+
+void emitter::emitIns_R_AI(instruction ins, emitAttr attr, regNumber ireg, ssize_t disp)
+{
+ if (emitInsIsLoad(ins))
+ {
+ // We can use ireg to load the constant address,
+ // as long as it is not a floating point register
+ regNumber regTmp = ireg;
+
+ if (isFloatReg(regTmp))
+ {
+#ifndef LEGACY_BACKEND
+ assert(!"emitIns_R_AI with floating point reg");
+#else // LEGACY_BACKEND
+ regTmp = codeGen->regSet.rsPickFreeReg(RBM_ALLINT & ~genRegMask(ireg));
+#endif // LEGACY_BACKEND
+ }
+
+ codeGen->instGen_Set_Reg_To_Imm(EA_IS_RELOC(attr) ? EA_HANDLE_CNS_RELOC : EA_PTRSIZE, regTmp, disp);
+ emitIns_R_R_I(ins, EA_TYPE(attr), ireg, regTmp, 0);
+ return;
+ }
+ NYI("emitIns_R_AI");
+}
+
+void emitter::emitIns_AR_R(instruction ins,
+ emitAttr attr,
+ regNumber ireg,
+ regNumber reg,
+ int offs,
+ int memCookie /* = 0 */,
+ void* clsCookie /* = NULL */)
+{
+ if (ins == INS_mov)
+ {
+ assert(!"Please use ins_Store() to select the correct instruction");
+ }
+ emitIns_R_R_I(ins, attr, ireg, reg, offs);
+}
+
+void emitter::emitIns_R_ARR(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, regNumber rg2, int disp)
+{
+ if (ins == INS_mov)
+ {
+ assert(!"Please use ins_Load() to select the correct instruction");
+ }
+
+ if (ins == INS_lea)
+ {
+ emitIns_R_R_R(INS_add, attr, ireg, reg, rg2);
+ if (disp != 0)
+ {
+ emitIns_R_R_I(INS_add, attr, ireg, ireg, disp);
+ }
+ return;
+ }
+ else if (emitInsIsLoad(ins))
+ {
+ if (disp == 0)
+ {
+ emitIns_R_R_R_I(ins, attr, ireg, reg, rg2, 0, INS_FLAGS_DONT_CARE, INS_OPTS_NONE);
+ return;
+ }
+ }
+ assert(!"emitIns_R_ARR: Unexpected instruction");
+}
+
+void emitter::emitIns_ARR_R(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, regNumber rg2, int disp)
+{
+ if (ins == INS_mov)
+ {
+ assert(!"Please use ins_Store() to select the correct instruction");
+ }
+ if (emitInsIsStore(ins))
+ {
+ if (disp == 0)
+ {
+ emitIns_R_R_R(ins, attr, ireg, reg, rg2);
+ }
+ else
+ {
+ emitIns_R_R_R(INS_add, attr, ireg, reg, rg2);
+ emitIns_R_R_I(ins, attr, ireg, ireg, disp);
+ }
+ return;
+ }
+ assert(!"emitIns_ARR_R: Unexpected instruction");
+}
+
+void emitter::emitIns_R_ARX(
+ instruction ins, emitAttr attr, regNumber ireg, regNumber reg, regNumber rg2, unsigned mul, int disp)
+{
+ if (ins == INS_mov)
+ {
+ assert(!"Please use ins_Load() to select the correct instruction");
+ }
+
+ unsigned shift = genLog2((unsigned)mul);
+
+ if ((ins == INS_lea) || emitInsIsLoad(ins))
+ {
+ if (ins == INS_lea)
+ {
+ ins = INS_add;
+ }
+ if (disp == 0)
+ {
+ emitIns_R_R_R_I(ins, attr, ireg, reg, rg2, (int)shift, INS_FLAGS_DONT_CARE, INS_OPTS_LSL);
+ return;
+ }
+ else
+ {
+ bool useForm2 = false;
+ bool mustUseForm1 = ((disp % mul) != 0) || (reg == ireg);
+ if (!mustUseForm1)
+ {
+ // If all of the below things are true we can generate a Thumb-1 add instruction
+ // followed by a Thumb-2 add instruction
+ // We also useForm1 when reg is a low register since the second instruction
+ // can then always be generated using a Thumb-1 add
+ //
+ if ((reg >= REG_R8) && (ireg < REG_R8) && (rg2 < REG_R8) && ((disp >> shift) <= 7))
+ {
+ useForm2 = true;
+ }
+ }
+
+ if (useForm2)
+ {
+ // Form2:
+ // Thumb-1 instruction add Rd, Rx, disp>>shift
+ // Thumb-2 instructions ldr Rd, Rb, Rd LSL shift
+ //
+ emitIns_R_R_I(INS_add, EA_4BYTE, ireg, rg2, disp >> shift);
+ emitIns_R_R_R_I(ins, attr, ireg, reg, ireg, shift, INS_FLAGS_NOT_SET, INS_OPTS_LSL);
+ }
+ else
+ {
+ // Form1:
+ // Thumb-2 instruction add Rd, Rb, Rx LSL shift
+ // Thumb-1/2 instructions ldr Rd, Rd, disp
+ //
+ emitIns_R_R_R_I(INS_add, attr, ireg, reg, rg2, shift, INS_FLAGS_NOT_SET, INS_OPTS_LSL);
+ emitIns_R_R_I(ins, attr, ireg, ireg, disp);
+ }
+ return;
+ }
+ }
+
+ assert(!"emitIns_R_ARX: Unexpected instruction");
+}
+
+/*****************************************************************************
+ *
+ * Record that a jump instruction uses the short encoding
+ *
+ */
+void emitter::emitSetShortJump(instrDescJmp* id)
+{
+ if (id->idjKeepLong)
+ return;
+
+ if (emitIsCondJump(id))
+ {
+ id->idInsFmt(IF_T1_K);
+ }
+ else if (emitIsCmpJump(id))
+ {
+ // These are always only ever short!
+ assert(id->idjShort);
+ return;
+ }
+ else if (emitIsUncondJump(id))
+ {
+ id->idInsFmt(IF_T1_M);
+ }
+ else if (emitIsLoadLabel(id))
+ {
+ return; // Keep long - we don't know the alignment of the target
+ }
+ else
+ {
+ assert(!"Unknown instruction in emitSetShortJump()");
+ }
+
+ id->idjShort = true;
+
+#if DEBUG_EMIT
+ if (id->idDebugOnlyInfo()->idNum == (unsigned)INTERESTING_JUMP_NUM || INTERESTING_JUMP_NUM == 0)
+ {
+ printf("[8] Converting jump %u to short\n", id->idDebugOnlyInfo()->idNum);
+ }
+#endif // DEBUG_EMIT
+
+ insSize isz = emitInsSize(id->idInsFmt());
+ id->idInsSize(isz);
+}
+
+/*****************************************************************************
+ *
+ * Record that a jump instruction uses the medium encoding
+ *
+ */
+void emitter::emitSetMediumJump(instrDescJmp* id)
+{
+ if (id->idjKeepLong)
+ return;
+
+#if DEBUG_EMIT
+ if (id->idDebugOnlyInfo()->idNum == (unsigned)INTERESTING_JUMP_NUM || INTERESTING_JUMP_NUM == 0)
+ {
+ printf("[9] Converting jump %u to medium\n", id->idDebugOnlyInfo()->idNum);
+ }
+#endif // DEBUG_EMIT
+
+ assert(emitIsCondJump(id));
+ id->idInsFmt(IF_T2_J1);
+ id->idjShort = false;
+
+ insSize isz = emitInsSize(id->idInsFmt());
+ id->idInsSize(isz);
+}
+
+/*****************************************************************************
+ *
+ * Add a jmp instruction.
+ * When dst is NULL, instrCount specifies number of instructions
+ * to jump: positive is forward, negative is backward.
+ * Unconditional branches have two sizes: short and long.
+ * Conditional branches have three sizes: short, medium, and long. A long
+ * branch is a pseudo-instruction that represents two instructions:
+ * a short conditional branch to branch around a large unconditional
+ * branch. Thus, we can handle branch offsets of imm24 instead of just imm20.
+ */
+
+void emitter::emitIns_J(instruction ins, BasicBlock* dst, int instrCount /* = 0 */)
+{
+ insFormat fmt = IF_NONE;
+
+ if (dst != NULL)
+ {
+ assert(dst->bbFlags & BBF_JMP_TARGET);
+ }
+ else
+ {
+ assert(instrCount != 0);
+ }
+
+ /* Figure out the encoding format of the instruction */
+ switch (ins)
+ {
+ case INS_b:
+ fmt = IF_T2_J2; /* Assume the jump will be long */
+ break;
+
+ case INS_beq:
+ case INS_bne:
+ case INS_bhs:
+ case INS_blo:
+ case INS_bmi:
+ case INS_bpl:
+ case INS_bvs:
+ case INS_bvc:
+ case INS_bhi:
+ case INS_bls:
+ case INS_bge:
+ case INS_blt:
+ case INS_bgt:
+ case INS_ble:
+ fmt = IF_LARGEJMP; /* Assume the jump will be long */
+ break;
+
+ default:
+ unreached();
+ }
+ assert((fmt == IF_LARGEJMP) || (fmt == IF_T2_J2));
+
+ instrDescJmp* id = emitNewInstrJmp();
+ insSize isz = emitInsSize(fmt);
+
+ id->idIns(ins);
+ id->idInsFmt(fmt);
+ id->idInsSize(isz);
+
+#ifdef DEBUG
+ // Mark the finally call
+ if (ins == INS_b && emitComp->compCurBB->bbJumpKind == BBJ_CALLFINALLY)
+ {
+ id->idDebugOnlyInfo()->idFinallyCall = true;
+ }
+#endif // DEBUG
+
+ /* Assume the jump will be long */
+
+ id->idjShort = 0;
+ if (dst != NULL)
+ {
+ id->idAddr()->iiaBBlabel = dst;
+ id->idjKeepLong = emitComp->fgInDifferentRegions(emitComp->compCurBB, dst);
+
+#ifdef DEBUG
+ if (emitComp->opts.compLongAddress) // Force long branches
+ id->idjKeepLong = 1;
+#endif // DEBUG
+ }
+ else
+ {
+ id->idAddr()->iiaSetInstrCount(instrCount);
+ id->idjKeepLong = false;
+ /* This jump must be short */
+ emitSetShortJump(id);
+ id->idSetIsBound();
+ }
+
+ /* Record the jump's IG and offset within it */
+
+ id->idjIG = emitCurIG;
+ id->idjOffs = emitCurIGsize;
+
+ /* Append this jump to this IG's jump list */
+
+ id->idjNext = emitCurIGjmpList;
+ emitCurIGjmpList = id;
+
+#if EMITTER_STATS
+ emitTotalIGjmps++;
+#endif
+
+ /* Figure out the max. size of the jump/call instruction */
+
+ if (!id->idjKeepLong)
+ {
+ insGroup* tgt = NULL;
+
+ /* Can we guess at the jump distance? */
+
+ if (dst != NULL)
+ {
+ tgt = (insGroup*)emitCodeGetCookie(dst);
+ }
+
+ if (tgt)
+ {
+ UNATIVE_OFFSET srcOffs;
+ int jmpDist;
+
+ assert(JMP_SIZE_SMALL == JCC_SIZE_SMALL);
+
+ /* This is a backward jump - figure out the distance */
+
+ srcOffs = emitCurCodeOffset + emitCurIGsize;
+
+ /* Compute the distance estimate */
+
+ jmpDist = srcOffs - tgt->igOffs;
+ assert(jmpDist >= 0);
+ jmpDist += 4; // Adjustment for ARM PC
+
+ switch (fmt)
+ {
+ case IF_T2_J2:
+ if (JMP_DIST_SMALL_MAX_NEG <= -jmpDist)
+ {
+ /* This jump surely will be short */
+ emitSetShortJump(id);
+ }
+ break;
+
+ case IF_LARGEJMP:
+ if (JCC_DIST_SMALL_MAX_NEG <= -jmpDist)
+ {
+ /* This jump surely will be short */
+ emitSetShortJump(id);
+ }
+ else if (JCC_DIST_MEDIUM_MAX_NEG <= -jmpDist)
+ {
+ /* This jump surely will be medium */
+ emitSetMediumJump(id);
+ }
+ break;
+
+ default:
+ unreached();
+ break;
+ }
+ }
+ }
+
+ dispIns(id);
+ appendToCurIG(id);
+}
+
+/*****************************************************************************
+ *
+ * Add a label instruction.
+ */
+
+void emitter::emitIns_R_L(instruction ins, emitAttr attr, BasicBlock* dst, regNumber reg)
+{
+ insFormat fmt = IF_NONE;
+
+ assert(dst->bbFlags & BBF_JMP_TARGET);
+
+ /* Figure out the encoding format of the instruction */
+ switch (ins)
+ {
+ case INS_movt:
+ case INS_movw:
+ fmt = IF_T2_N1;
+ break;
+ default:
+ unreached();
+ }
+ assert(fmt == IF_T2_N1);
+
+ instrDescJmp* id = emitNewInstrJmp();
+ insSize isz = emitInsSize(fmt);
+
+ id->idIns(ins);
+ id->idReg1(reg);
+ id->idInsFmt(fmt);
+ id->idInsSize(isz);
+
+#ifdef DEBUG
+ // Mark the catch return
+ if (emitComp->compCurBB->bbJumpKind == BBJ_EHCATCHRET)
+ {
+ id->idDebugOnlyInfo()->idCatchRet = true;
+ }
+#endif // DEBUG
+
+ id->idAddr()->iiaBBlabel = dst;
+ id->idjShort = false;
+ id->idjKeepLong = true;
+
+ /* Record the jump's IG and offset within it */
+
+ id->idjIG = emitCurIG;
+ id->idjOffs = emitCurIGsize;
+
+ /* Append this jump to this IG's jump list */
+
+ id->idjNext = emitCurIGjmpList;
+ emitCurIGjmpList = id;
+
+ // Set the relocation flags - these give hint to zap to perform
+ // relocation of the specified 32bit address.
+ id->idSetRelocFlags(attr);
+
+#if EMITTER_STATS
+ emitTotalIGjmps++;
+#endif
+
+ dispIns(id);
+ appendToCurIG(id);
+}
+
+/*****************************************************************************
+ *
+ * Add a data label instruction.
+ */
+
+void emitter::emitIns_R_D(instruction ins, emitAttr attr, unsigned offs, regNumber reg)
+{
+ noway_assert((ins == INS_movw) || (ins == INS_movt));
+
+ insFormat fmt = IF_T2_N2;
+ instrDesc* id = emitNewInstrSC(attr, offs);
+ insSize isz = emitInsSize(fmt);
+
+ id->idIns(ins);
+ id->idReg1(reg);
+ id->idInsFmt(fmt);
+ id->idInsSize(isz);
+
+#if RELOC_SUPPORT
+ if (emitComp->opts.compReloc)
+ {
+ // Set the relocation flags - these give hint to zap to perform
+ // relocation of the specified 32bit address.
+ id->idSetRelocFlags(attr);
+ }
+#endif // RELOC_SUPPORT
+
+ dispIns(id);
+ appendToCurIG(id);
+}
+
+void emitter::emitIns_J_R(instruction ins, emitAttr attr, BasicBlock* dst, regNumber reg)
+{
+ assert(dst->bbFlags & BBF_JMP_TARGET);
+
+ instrDescJmp* id;
+ if (ins == INS_adr)
+ {
+ id = emitNewInstrLbl();
+
+ id->idIns(INS_adr);
+ id->idInsFmt(IF_T2_M1);
+ id->idInsSize(emitInsSize(IF_T2_M1));
+ id->idAddr()->iiaBBlabel = dst;
+ id->idReg1(reg);
+ id->idReg2(REG_PC);
+
+ /* Assume the label reference will be long */
+
+ id->idjShort = 0;
+ id->idjKeepLong = emitComp->fgInDifferentRegions(emitComp->compCurBB, dst);
+ }
+ else
+ {
+ assert(ins == INS_cbz || INS_cbnz);
+ assert(isLowRegister(reg));
+ id = emitNewInstrJmp();
+
+ id->idIns(ins);
+ id->idInsFmt(IF_T1_I);
+ id->idInsSize(emitInsSize(IF_T1_I));
+ id->idReg1(reg);
+
+ /* This jump better be short or-else! */
+ id->idjShort = true;
+ id->idAddr()->iiaBBlabel = dst;
+ id->idjKeepLong = false;
+ }
+
+ /* Record the jump's IG and offset within it */
+
+ id->idjIG = emitCurIG;
+ id->idjOffs = emitCurIGsize;
+
+ /* Append this jump to this IG's jump list */
+
+ id->idjNext = emitCurIGjmpList;
+ emitCurIGjmpList = id;
+
+#if EMITTER_STATS
+ emitTotalIGjmps++;
+#endif
+
+ dispIns(id);
+ appendToCurIG(id);
+}
+
+/*****************************************************************************
+ *
+ * Add a call instruction (direct or indirect).
+ * argSize<0 means that the caller will pop the arguments
+ *
+ * The other arguments are interpreted depending on callType as shown:
+ * Unless otherwise specified, ireg,xreg,xmul,disp should have default values.
+ *
+ * EC_FUNC_TOKEN : addr is the method address
+ * EC_FUNC_ADDR : addr is the absolute address of the function
+ *
+ * If callType is one of these emitCallTypes, addr has to be NULL.
+ * EC_INDIR_R : "call ireg".
+ *
+ * For ARM xreg, xmul and disp are never used and should always be 0/REG_NA.
+ *
+ * Please consult the "debugger team notification" comment in genFnProlog().
+ */
+
+void emitter::emitIns_Call(EmitCallType callType,
+ CORINFO_METHOD_HANDLE methHnd, // used for pretty printing
+ INDEBUG_LDISASM_COMMA(CORINFO_SIG_INFO* sigInfo) // used to report call sites to the EE
+ void* addr,
+ ssize_t argSize,
+ emitAttr retSize,
+ VARSET_VALARG_TP ptrVars,
+ regMaskTP gcrefRegs,
+ regMaskTP byrefRegs,
+ IL_OFFSETX ilOffset /* = BAD_IL_OFFSET */,
+ regNumber ireg /* = REG_NA */,
+ regNumber xreg /* = REG_NA */,
+ unsigned xmul /* = 0 */,
+ int disp /* = 0 */,
+ bool isJump /* = false */,
+ bool isNoGC /* = false */,
+ bool isProfLeaveCB /* = false */)
+{
+ /* Sanity check the arguments depending on callType */
+
+ assert(callType < EC_COUNT);
+ assert((callType != EC_FUNC_TOKEN && callType != EC_FUNC_ADDR) ||
+ (ireg == REG_NA && xreg == REG_NA && xmul == 0 && disp == 0));
+ assert(callType < EC_INDIR_R || addr == NULL);
+ assert(callType != EC_INDIR_R || (ireg < REG_COUNT && xreg == REG_NA && xmul == 0 && disp == 0));
+
+ // ARM never uses these
+ assert(xreg == REG_NA && xmul == 0 && disp == 0);
+
+ // Our stack level should be always greater than the bytes of arguments we push. Just
+ // a sanity test.
+ assert((unsigned)abs(argSize) <= codeGen->genStackLevel);
+
+ int argCnt;
+ instrDesc* id;
+
+ /* This is the saved set of registers after a normal call */
+ regMaskTP savedSet = RBM_CALLEE_SAVED;
+
+ /* some special helper calls have a different saved set registers */
+
+ if (isNoGC)
+ {
+ assert(emitNoGChelper(Compiler::eeGetHelperNum(methHnd)));
+
+ // This call will preserve the liveness of most registers
+ //
+ // - On the ARM the NOGC helpers will preserve all registers,
+ // except for those listed in the RBM_CALLEE_TRASH_NOGC mask
+
+ savedSet = RBM_ALLINT & ~RBM_CALLEE_TRASH_NOGC;
+
+ // In case of Leave profiler callback, we need to preserve liveness of REG_PROFILER_RET_SCRATCH
+ if (isProfLeaveCB)
+ {
+ savedSet |= RBM_PROFILER_RET_SCRATCH;
+ }
+ }
+ else
+ {
+ assert(!emitNoGChelper(Compiler::eeGetHelperNum(methHnd)));
+ }
+
+ /* Trim out any callee-trashed registers from the live set */
+
+ gcrefRegs &= savedSet;
+ byrefRegs &= savedSet;
+
+#ifdef DEBUG
+ if (EMIT_GC_VERBOSE)
+ {
+ printf("Call: GCvars=%s ", VarSetOps::ToString(emitComp, ptrVars));
+ dumpConvertedVarSet(emitComp, ptrVars);
+ printf(", gcrefRegs=");
+ printRegMaskInt(gcrefRegs);
+ emitDispRegSet(gcrefRegs);
+ printf(", byrefRegs=");
+ printRegMaskInt(byrefRegs);
+ emitDispRegSet(byrefRegs);
+ printf("\n");
+ }
+#endif
+
+ assert(argSize % (int)sizeof(void*) == 0);
+ argCnt = argSize / (int)sizeof(void*);
+
+#ifdef DEBUGGING_SUPPORT
+ /* Managed RetVal: emit sequence point for the call */
+ if (emitComp->opts.compDbgInfo && ilOffset != BAD_IL_OFFSET)
+ {
+ codeGen->genIPmappingAdd(ilOffset, false);
+ }
+#endif
+
+ /*
+ We need to allocate the appropriate instruction descriptor based
+ on whether this is a direct/indirect call, and whether we need to
+ record an updated set of live GC variables.
+
+ The stats for a ton of classes is as follows:
+
+ Direct call w/o GC vars 220,216
+ Indir. call w/o GC vars 144,781
+
+ Direct call with GC vars 9,440
+ Indir. call with GC vars 5,768
+ */
+
+ if (callType >= EC_INDIR_R)
+ {
+ /* Indirect call, virtual calls */
+
+ assert(callType == EC_INDIR_R);
+
+ id = emitNewInstrCallInd(argCnt, disp, ptrVars, gcrefRegs, byrefRegs, retSize);
+ }
+ else
+ {
+ /* Helper/static/nonvirtual/function calls (direct or through handle),
+ and calls to an absolute addr. */
+
+ assert(callType == EC_FUNC_TOKEN || callType == EC_FUNC_ADDR);
+
+ id = emitNewInstrCallDir(argCnt, ptrVars, gcrefRegs, byrefRegs, retSize);
+ }
+
+ /* Update the emitter's live GC ref sets */
+
+ VarSetOps::Assign(emitComp, emitThisGCrefVars, ptrVars);
+ emitThisGCrefRegs = gcrefRegs;
+ emitThisByrefRegs = byrefRegs;
+
+ /* Set the instruction - special case jumping a function */
+ instruction ins;
+ insFormat fmt = IF_NONE;
+
+ id->idSetIsNoGC(isNoGC);
+
+ /* Record the address: method, indirection, or funcptr */
+
+ if (callType > EC_FUNC_ADDR)
+ {
+ /* This is an indirect call (either a virtual call or func ptr call) */
+
+ switch (callType)
+ {
+ case EC_INDIR_R: // the address is in a register
+
+ id->idSetIsCallRegPtr();
+
+ if (isJump)
+ {
+ ins = INS_bx; // INS_bx Reg
+ }
+ else
+ {
+ ins = INS_blx; // INS_blx Reg
+ }
+ fmt = IF_T1_D2;
+
+ id->idIns(ins);
+ id->idInsFmt(fmt);
+ id->idInsSize(emitInsSize(fmt));
+ id->idReg3(ireg);
+ assert(xreg == REG_NA);
+ break;
+
+ default:
+ NO_WAY("unexpected instruction");
+ break;
+ }
+ }
+ else
+ {
+ /* This is a simple direct call: "call helper/method/addr" */
+
+ assert(callType == EC_FUNC_TOKEN || callType == EC_FUNC_ADDR);
+
+ assert(addr != NULL);
+ assert(codeGen->validImmForBL((ssize_t)addr));
+
+ if (isJump)
+ {
+ ins = INS_b; // INS_b imm24
+ }
+ else
+ {
+ ins = INS_bl; // INS_bl imm24
+ }
+
+ fmt = IF_T2_J3;
+
+ id->idIns(ins);
+ id->idInsFmt(fmt);
+ id->idInsSize(emitInsSize(fmt));
+
+ id->idAddr()->iiaAddr = (BYTE*)addr;
+
+ if (callType == EC_FUNC_ADDR)
+ {
+ id->idSetIsCallAddr();
+ }
+
+#if RELOC_SUPPORT
+ if (emitComp->opts.compReloc)
+ {
+ // Since this is an indirect call through a pointer and we don't
+ // currently pass in emitAttr into this function we have decided
+ // to always mark the displacement as being relocatable.
+
+ id->idSetIsDspReloc();
+ }
+#endif
+ }
+
+#ifdef DEBUG
+ if (EMIT_GC_VERBOSE)
+ {
+ if (id->idIsLargeCall())
+ {
+ printf("[%02u] Rec call GC vars = %s\n", id->idDebugOnlyInfo()->idNum,
+ VarSetOps::ToString(emitComp, ((instrDescCGCA*)id)->idcGCvars));
+ }
+ }
+#endif
+
+#if defined(DEBUG) || defined(LATE_DISASM)
+ id->idDebugOnlyInfo()->idMemCookie = (size_t)methHnd; // method token
+ id->idDebugOnlyInfo()->idClsCookie = 0;
+ id->idDebugOnlyInfo()->idCallSig = sigInfo;
+#endif
+
+#if defined(LATE_DISASM)
+ if (addr != nullptr)
+ {
+ codeGen->getDisAssembler().disSetMethod((size_t)addr, methHnd);
+ }
+#endif // defined(LATE_DISASM)
+
+ dispIns(id);
+ appendToCurIG(id);
+}
+
+/*****************************************************************************
+ *
+ * Returns an encoding for the specified register (any-reg) to be used in
+ * a Thumb-1 encoding in the M4 position
+ */
+
+inline unsigned insEncodeRegT1_M4(regNumber reg)
+{
+ assert(reg < REG_STK);
+
+ return reg << 3;
+}
+
+/*****************************************************************************
+ *
+ * Returns an encoding for the specified register (any-reg) to be used in
+ * a Thumb-1 encoding in the D4 position
+ */
+
+inline unsigned insEncodeRegT1_D4(regNumber reg)
+{
+ assert(reg < REG_STK);
+
+ return (reg & 0x7) | ((reg & 0x8) << 4);
+}
+
+/*****************************************************************************
+ *
+ * Returns an encoding for the specified register (low-only) to be used in
+ * a Thumb-1 encoding in the M3 position
+ */
+
+inline unsigned insEncodeRegT1_M3(regNumber reg)
+{
+ assert(reg < REG_R8);
+
+ return reg << 6;
+}
+
+/*****************************************************************************
+ *
+ * Returns an encoding for the specified register (low-only) to be used in
+ * a Thumb-1 encoding in the N3 position
+ */
+
+inline unsigned insEncodeRegT1_N3(regNumber reg)
+{
+ assert(reg < REG_R8);
+
+ return reg << 3;
+}
+
+/*****************************************************************************
+ *
+ * Returns an encoding for the specified register (low-only) to be used in
+ * a Thumb-1 encoding in the D3 position
+ */
+
+inline unsigned insEncodeRegT1_D3(regNumber reg)
+{
+ assert(reg < REG_R8);
+
+ return reg;
+}
+/*****************************************************************************
+ *
+ * Returns an encoding for the specified register (low-only) to be used in
+ * a Thumb-1 encoding in the DI position
+ */
+
+inline unsigned insEncodeRegT1_DI(regNumber reg)
+{
+ assert(reg < REG_R8);
+
+ return reg << 8;
+}
+
+/*****************************************************************************
+ *
+ * Returns an encoding for the specified register to be used in
+ * a Thumb-2 encoding in the N position
+ */
+
+inline unsigned insEncodeRegT2_N(regNumber reg)
+{
+ assert(reg < REG_STK);
+
+ return reg << 16;
+}
+
+inline unsigned floatRegIndex(regNumber reg, int size)
+{
+ // theoretically this could support quad floats as well but for now...
+ assert(size == EA_8BYTE || size == EA_4BYTE);
+
+ if (size == EA_8BYTE)
+ assert(emitter::isDoubleReg(reg));
+ else
+ assert(emitter::isFloatReg(reg));
+
+ unsigned result = reg - REG_F0;
+
+ // the assumption here is that the register F8 also refers to D4
+ if (size == EA_8BYTE)
+ {
+ result >>= 1;
+ }
+
+ return result;
+}
+
+// variant: SOME arm VFP instructions use the convention that
+// for doubles, the split bit holds the msb of the register index
+// for singles it holds the lsb
+// excerpt : d = if dp_operation then UInt(D:Vd)
+// if single UInt(Vd:D);
+
+inline unsigned floatRegEncoding(unsigned index, int size, bool variant = false)
+{
+ if (!variant || size == EA_8BYTE)
+ return index;
+ else
+ {
+ return ((index & 1) << 4) | (index >> 1);
+ }
+}
+
+// thumb2 VFP M register encoding
+inline unsigned insEncodeRegT2_VectorM(regNumber reg, int size, bool variant)
+{
+ unsigned enc = floatRegIndex(reg, size);
+ enc = floatRegEncoding(enc, size, variant);
+ return ((enc & 0xf) << 0) | ((enc & 0x10) << 1);
+}
+
+// thumb2 VFP N register encoding
+inline unsigned insEncodeRegT2_VectorN(regNumber reg, int size, bool variant)
+{
+ unsigned enc = floatRegIndex(reg, size);
+ enc = floatRegEncoding(enc, size, variant);
+ return ((enc & 0xf) << 16) | ((enc & 0x10) << 3);
+}
+
+// thumb2 VFP D register encoding
+inline unsigned insEncodeRegT2_VectorD(regNumber reg, int size, bool variant)
+{
+ unsigned enc = floatRegIndex(reg, size);
+ enc = floatRegEncoding(enc, size, variant);
+ return ((enc & 0xf) << 12) | ((enc & 0x10) << 18);
+}
+
+/*****************************************************************************
+ *
+ * Returns an encoding for the specified register to be used in
+ * a Thumb-2 encoding in the T position
+ */
+
+inline unsigned insEncodeRegT2_T(regNumber reg)
+{
+ assert(reg < REG_STK);
+
+ return reg << 12;
+}
+
+/*****************************************************************************
+ *
+ * Returns an encoding for the specified register to be used in
+ * a Thumb-2 encoding in the D position
+ */
+
+inline unsigned insEncodeRegT2_D(regNumber reg)
+{
+ assert(reg < REG_STK);
+
+ return reg << 8;
+}
+
+/*****************************************************************************
+ *
+ * Returns an encoding for the specified register to be used in
+ * a Thumb-2 encoding in the M position
+ */
+
+inline unsigned insEncodeRegT2_M(regNumber reg)
+{
+ assert(reg < REG_STK);
+
+ return reg;
+}
+
+/*****************************************************************************
+ *
+ * Returns the encoding for the Set Flags bit to be used in a Thumb-2 encoding
+ */
+
+unsigned emitter::insEncodeSetFlags(insFlags sf)
+{
+ if (sf == INS_FLAGS_SET)
+ return (1 << 20);
+ else
+ return 0;
+}
+
+/*****************************************************************************
+ *
+ * Returns the encoding for the Shift Type bits to be used in a Thumb-2 encoding
+ */
+
+unsigned emitter::insEncodeShiftOpts(insOpts opt)
+{
+ if (opt == INS_OPTS_NONE)
+ return 0;
+ else if (opt == INS_OPTS_LSL)
+ return 0x00;
+ else if (opt == INS_OPTS_LSR)
+ return 0x10;
+ else if (opt == INS_OPTS_ASR)
+ return 0x20;
+ else if (opt == INS_OPTS_ROR)
+ return 0x30;
+ else if (opt == INS_OPTS_RRX)
+ return 0x30;
+
+ assert(!"Invalid insOpts");
+ return 0;
+}
+
+/*****************************************************************************
+ *
+ * Returns the encoding for the PUW bits to be used in a T2_G0 Thumb-2 encoding
+ */
+
+unsigned emitter::insEncodePUW_G0(insOpts opt, int imm)
+{
+ unsigned result = 0;
+
+ if (opt != INS_OPTS_LDST_POST_INC)
+ result |= (1 << 24); // The P bit
+
+ if (imm >= 0)
+ result |= (1 << 23); // The U bit
+
+ if (opt != INS_OPTS_NONE)
+ result |= (1 << 21); // The W bits
+ return result;
+}
+
+/*****************************************************************************
+ *
+ * Returns the encoding for the PUW bits to be used in a T2_H0 Thumb-2 encoding
+ */
+
+unsigned emitter::insEncodePUW_H0(insOpts opt, int imm)
+{
+ unsigned result = 0;
+
+ if (opt != INS_OPTS_LDST_POST_INC)
+ result |= (1 << 10); // The P bit
+
+ if (imm >= 0)
+ result |= (1 << 9); // The U bit
+
+ if (opt != INS_OPTS_NONE)
+ result |= (1 << 8); // The W bits
+
+ return result;
+}
+
+/*****************************************************************************
+ *
+ * Returns the encoding for the Shift Count bits to be used in a Thumb-2 encoding
+ */
+
+inline unsigned insEncodeShiftCount(int imm)
+{
+ unsigned result;
+
+ assert((imm & 0x001F) == imm);
+ result = (imm & 0x03) << 6;
+ result |= (imm & 0x1C) << 10;
+
+ return result;
+}
+
+/*****************************************************************************
+ *
+ * Returns the encoding for the immediate use by BFI/BFC Thumb-2 encodings
+ */
+
+inline unsigned insEncodeBitFieldImm(int imm)
+{
+ unsigned result;
+
+ assert((imm & 0x03FF) == imm);
+ result = (imm & 0x001f);
+ result |= (imm & 0x0060) << 1;
+ result |= (imm & 0x0380) << 5;
+
+ return result;
+}
+
+/*****************************************************************************
+ *
+ * Unscales the immediate based on the operand size in 'size'
+ */
+/*static*/ int emitter::insUnscaleImm(int imm, emitAttr size)
+{
+ switch (size)
+ {
+ case EA_8BYTE:
+ case EA_4BYTE:
+ assert((imm & 0x0003) == 0);
+ imm >>= 2;
+ break;
+
+ case EA_2BYTE:
+ assert((imm & 0x0001) == 0);
+ imm >>= 1;
+ break;
+
+ case EA_1BYTE:
+ // Do nothing
+ break;
+
+ default:
+ assert(!"Invalid value in size");
+ break;
+ }
+ return imm;
+}
+
+/*****************************************************************************
+ *
+ * Emit a Thumb-1 instruction (a 16-bit integer as code)
+ */
+
+/*static*/ unsigned emitter::emitOutput_Thumb1Instr(BYTE* dst, ssize_t code)
+{
+ unsigned short word1 = code & 0xffff;
+ assert(word1 == code);
+
+#ifdef DEBUG
+ unsigned short top5bits = (word1 & 0xf800) >> 11;
+ assert(top5bits < 29);
+#endif
+
+ MISALIGNED_WR_I2(dst, word1);
+
+ return sizeof(short);
+}
+/*****************************************************************************
+ *
+ * Emit a Thumb-2 instruction (two 16-bit integers as code)
+ */
+
+/*static*/ unsigned emitter::emitOutput_Thumb2Instr(BYTE* dst, ssize_t code)
+{
+ unsigned short word1 = (code >> 16) & 0xffff;
+ unsigned short word2 = (code)&0xffff;
+ assert(((word1 << 16) | word2) == code);
+
+#ifdef DEBUG
+ unsigned short top5bits = (word1 & 0xf800) >> 11;
+ assert(top5bits >= 29);
+#endif
+
+ MISALIGNED_WR_I2(dst, word1);
+ dst += 2;
+ MISALIGNED_WR_I2(dst, word2);
+
+ return sizeof(short) * 2;
+}
+
+/*****************************************************************************
+ *
+ * Output a local jump instruction.
+ * Note that this may be invoked to overwrite an existing jump instruction at 'dst'
+ * to handle forward branch patching.
+ */
+
+BYTE* emitter::emitOutputLJ(insGroup* ig, BYTE* dst, instrDesc* i)
+{
+ unsigned srcOffs;
+ unsigned dstOffs;
+ ssize_t distVal;
+
+ instrDescJmp* id = (instrDescJmp*)i;
+ instruction ins = id->idIns();
+ ssize_t code;
+
+ bool loadLabel = false;
+ bool isJump = false;
+ bool relAddr = true; // does the instruction use relative-addressing?
+
+ size_t sdistneg;
+
+ switch (ins)
+ {
+ default:
+ sdistneg = JCC_DIST_SMALL_MAX_NEG;
+ isJump = true;
+ break;
+
+ case INS_cbz:
+ case INS_cbnz:
+ // One size fits all!
+ sdistneg = 0;
+ isJump = true;
+ break;
+
+ case INS_adr:
+ sdistneg = LBL_DIST_SMALL_MAX_NEG;
+ loadLabel = true;
+ break;
+
+ case INS_movw:
+ case INS_movt:
+ sdistneg = LBL_DIST_SMALL_MAX_NEG;
+ relAddr = false;
+ loadLabel = true;
+ break;
+ }
+
+ /* Figure out the distance to the target */
+
+ srcOffs = emitCurCodeOffs(dst);
+ if (id->idAddr()->iiaHasInstrCount())
+ {
+ assert(ig != NULL);
+ int instrCount = id->idAddr()->iiaGetInstrCount();
+ unsigned insNum = emitFindInsNum(ig, id);
+ if (instrCount < 0)
+ {
+ // Backward branches using instruction count must be within the same instruction group.
+ assert(insNum + 1 >= (unsigned)(-instrCount));
+ }
+ dstOffs = ig->igOffs + emitFindOffset(ig, (insNum + 1 + instrCount));
+ }
+ else
+ {
+ dstOffs = id->idAddr()->iiaIGlabel->igOffs;
+ }
+
+ if (relAddr)
+ {
+ if (ins == INS_adr)
+ {
+ // for adr, the distance is calculated from 4-byte aligned srcOffs.
+ distVal = (ssize_t)((emitOffsetToPtr(dstOffs) - (BYTE*)(((size_t)emitOffsetToPtr(srcOffs)) & ~3)) + 1);
+ }
+ else
+ {
+ distVal = (ssize_t)(emitOffsetToPtr(dstOffs) - emitOffsetToPtr(srcOffs));
+ }
+ }
+ else
+ {
+ assert(ins == INS_movw || ins == INS_movt);
+ distVal = (ssize_t)emitOffsetToPtr(dstOffs) + 1; // Or in thumb bit
+ }
+
+ if (dstOffs <= srcOffs)
+ {
+/* This is a backward jump - distance is known at this point */
+
+#if DEBUG_EMIT
+ if (id->idDebugOnlyInfo()->idNum == (unsigned)INTERESTING_JUMP_NUM || INTERESTING_JUMP_NUM == 0)
+ {
+ size_t blkOffs = id->idjIG->igOffs;
+
+ if (INTERESTING_JUMP_NUM == 0)
+ printf("[3] Jump %u:\n", id->idDebugOnlyInfo()->idNum);
+ printf("[3] Jump block is at %08X - %02X = %08X\n", blkOffs, emitOffsAdj, blkOffs - emitOffsAdj);
+ printf("[3] Jump is at %08X - %02X = %08X\n", srcOffs, emitOffsAdj, srcOffs - emitOffsAdj);
+ printf("[3] Label block is at %08X - %02X = %08X\n", dstOffs, emitOffsAdj, dstOffs - emitOffsAdj);
+ }
+#endif
+
+ // This format only supports forward branches
+ noway_assert(id->idInsFmt() != IF_T1_I);
+
+ /* Can we use a short jump? */
+
+ if (isJump && ((unsigned)(distVal - 4) >= (unsigned)sdistneg))
+ {
+ emitSetShortJump(id);
+ }
+ }
+ else
+ {
+ /* This is a forward jump - distance will be an upper limit */
+
+ emitFwdJumps = true;
+
+ /* The target offset will be closer by at least 'emitOffsAdj', but only if this
+ jump doesn't cross the hot-cold boundary. */
+
+ if (!emitJumpCrossHotColdBoundary(srcOffs, dstOffs))
+ {
+ dstOffs -= emitOffsAdj;
+ distVal -= emitOffsAdj;
+ }
+
+ /* Record the location of the jump for later patching */
+
+ id->idjOffs = dstOffs;
+
+ /* Are we overflowing the id->idjOffs bitfield? */
+ if (id->idjOffs != dstOffs)
+ IMPL_LIMITATION("Method is too large");
+
+#if DEBUG_EMIT
+ if (id->idDebugOnlyInfo()->idNum == (unsigned)INTERESTING_JUMP_NUM || INTERESTING_JUMP_NUM == 0)
+ {
+ size_t blkOffs = id->idjIG->igOffs;
+
+ if (INTERESTING_JUMP_NUM == 0)
+ printf("[4] Jump %u:\n", id->idDebugOnlyInfo()->idNum);
+ printf("[4] Jump block is at %08X\n", blkOffs);
+ printf("[4] Jump is at %08X\n", srcOffs);
+ printf("[4] Label block is at %08X - %02X = %08X\n", dstOffs + emitOffsAdj, emitOffsAdj, dstOffs);
+ }
+#endif
+ }
+
+ /* Adjust the offset to emit relative to the end of the instruction */
+
+ if (relAddr)
+ distVal -= 4;
+
+#ifdef DEBUG
+ if (0 && emitComp->verbose)
+ {
+ size_t sz = 4; // Thumb-2 pretends all instructions are 4-bytes long for computing jump offsets?
+ int distValSize = id->idjShort ? 4 : 8;
+ printf("; %s jump [%08X/%03u] from %0*X to %0*X: dist = %08XH\n", (dstOffs <= srcOffs) ? "Fwd" : "Bwd",
+ dspPtr(id), id->idDebugOnlyInfo()->idNum, distValSize, srcOffs + sz, distValSize, dstOffs, distVal);
+ }
+#endif
+
+ insFormat fmt = id->idInsFmt();
+
+ if (isJump)
+ {
+ /* What size jump should we use? */
+
+ if (id->idjShort)
+ {
+ /* Short jump */
+
+ assert(!id->idjKeepLong);
+ assert(emitJumpCrossHotColdBoundary(srcOffs, dstOffs) == false);
+
+ assert(JMP_SIZE_SMALL == JCC_SIZE_SMALL);
+ assert(JMP_SIZE_SMALL == 2);
+
+ /* For forward jumps, record the address of the distance value */
+ id->idjTemp.idjAddr = (distVal > 0) ? dst : NULL;
+
+ dst = emitOutputShortBranch(dst, ins, fmt, distVal, id);
+ }
+ else
+ {
+ /* Long jump */
+
+ /* For forward jumps, record the address of the distance value */
+ id->idjTemp.idjAddr = (dstOffs > srcOffs) ? dst : NULL;
+
+ if (fmt == IF_LARGEJMP)
+ {
+ // This is a pseudo-instruction format representing a large conditional branch, to allow
+ // us to get a greater branch target range than we can get by using a straightforward conditional
+ // branch. It is encoded as a short conditional branch that branches around a long unconditional
+ // branch.
+ //
+ // Conceptually, we have:
+ //
+ // b<cond> L_target
+ //
+ // The code we emit is:
+ //
+ // b<!cond> L_not // 2 bytes. Note that we reverse the condition.
+ // b L_target // 4 bytes
+ // L_not:
+ //
+ // Note that we don't actually insert any blocks: we simply encode "b <!cond> L_not" as a branch with
+ // the correct offset. Note also that this works for both integer and floating-point conditions, because
+ // the condition inversion takes ordered/unordered into account, preserving NaN behavior. For example,
+ // "GT" (greater than) is inverted to "LE" (less than, equal, or unordered).
+ //
+ // History: previously, we generated:
+ // it<cond>
+ // b L_target
+ // but the "it" instruction was deprecated, so we can't use it.
+
+ dst = emitOutputShortBranch(dst,
+ emitJumpKindToIns(emitReverseJumpKind(
+ emitInsToJumpKind(ins))), // reverse the conditional instruction
+ IF_T1_K,
+ 6 - 4, /* 6 bytes from start of this large conditional pseudo-instruction to
+ L_not. Jumps are encoded as offset from instr address + 4. */
+ NULL /* only used for cbz/cbnz */);
+
+ // Now, pretend we've got a normal unconditional branch, and fall through to the code to emit that.
+ ins = INS_b;
+ fmt = IF_T2_J2;
+
+ // The distVal was computed based on the beginning of the pseudo-instruction, which is
+ // the IT. So subtract the size of the IT from the offset, so it is relative to the
+ // unconditional branch.
+ distVal -= 2;
+ }
+
+ code = emitInsCode(ins, fmt);
+
+ if (fmt == IF_T2_J1)
+ {
+ // Can't use this form for jumps between the hot and cold regions
+ assert(!id->idjKeepLong);
+ assert(emitJumpCrossHotColdBoundary(srcOffs, dstOffs) == false);
+
+ assert((distVal & 1) == 0);
+ assert(distVal >= -1048576);
+ assert(distVal <= 1048574);
+
+ if (distVal < 0)
+ code |= 1 << 26;
+ code |= ((distVal >> 1) & 0x0007ff);
+ code |= (((distVal >> 1) & 0x01f800) << 5);
+ code |= (((distVal >> 1) & 0x020000) >> 4);
+ code |= (((distVal >> 1) & 0x040000) >> 7);
+ }
+ else if (fmt == IF_T2_J2)
+ {
+ assert((distVal & 1) == 0);
+#ifdef RELOC_SUPPORT
+ if (emitComp->opts.compReloc && emitJumpCrossHotColdBoundary(srcOffs, dstOffs))
+ {
+ // dst isn't an actual final target location, just some intermediate
+ // location. Thus we cannot make any guarantees about distVal (not
+ // even the direction/sign). Instead we don't encode any offset and
+ // rely on the relocation to do all the work
+ }
+ else
+#endif
+ {
+ assert(distVal >= -16777216);
+ assert(distVal <= 16777214);
+
+ if (distVal < 0)
+ code |= 1 << 26;
+ code |= ((distVal >> 1) & 0x0007ff);
+ code |= (((distVal >> 1) & 0x1ff800) << 5);
+
+ bool S = (distVal < 0);
+ bool I1 = ((distVal & 0x00800000) == 0);
+ bool I2 = ((distVal & 0x00400000) == 0);
+
+ if (S ^ I1)
+ code |= (1 << 13); // J1 bit
+ if (S ^ I2)
+ code |= (1 << 11); // J2 bit
+ }
+ }
+ else
+ {
+ assert(!"Unknown fmt");
+ }
+
+ unsigned instrSize = emitOutput_Thumb2Instr(dst, code);
+
+#ifdef RELOC_SUPPORT
+ if (emitComp->opts.compReloc)
+ {
+ if (emitJumpCrossHotColdBoundary(srcOffs, dstOffs))
+ {
+ assert(id->idjKeepLong);
+ if (emitComp->info.compMatchedVM)
+ {
+ void* target = emitOffsetToPtr(dstOffs);
+ emitRecordRelocation((void*)dst, target, IMAGE_REL_BASED_THUMB_BRANCH24);
+ }
+ }
+ }
+#endif // RELOC_SUPPORT
+
+ dst += instrSize;
+ }
+ }
+ else if (loadLabel)
+ {
+ /* For forward jumps, record the address of the distance value */
+ id->idjTemp.idjAddr = (distVal > 0) ? dst : NULL;
+
+ code = emitInsCode(ins, fmt);
+
+ if (fmt == IF_T1_J3)
+ {
+ assert((dstOffs & 3) == 0); // The target label must be 4-byte aligned
+ assert(distVal >= 0);
+ assert(distVal <= 1022);
+ code |= ((distVal >> 2) & 0xff);
+
+ dst += emitOutput_Thumb1Instr(dst, code);
+ }
+ else if (fmt == IF_T2_M1)
+ {
+ assert(distVal >= -4095);
+ assert(distVal <= +4095);
+ if (distVal < 0)
+ {
+ code |= 0x00A0 << 16;
+ distVal = -distVal;
+ }
+ assert((distVal & 0x0fff) == distVal);
+ code |= (distVal & 0x00ff);
+ code |= ((distVal & 0x0700) << 4);
+
+ code |= ((distVal & 0x0800) << 15);
+ code |= id->idReg1() << 8;
+
+ dst += emitOutput_Thumb2Instr(dst, code);
+ }
+ else if (fmt == IF_T2_N1)
+ {
+ code |= insEncodeRegT2_D(id->idReg1());
+ unsigned imm = distVal;
+ if (ins == INS_movw)
+ {
+ imm &= 0xffff;
+ }
+ else
+ {
+ imm = (imm >> 16) & 0xffff;
+ }
+ ((instrDescJmp*)id)->idjTemp.idjAddr = (dstOffs > srcOffs) ? dst : NULL;
+
+ assert((imm & 0x0000ffff) == imm);
+ code |= (imm & 0x00ff);
+ code |= ((imm & 0x0700) << 4);
+ code |= ((imm & 0x0800) << 15);
+ code |= ((imm & 0xf000) << 4);
+ dst += emitOutput_Thumb2Instr(dst, code);
+
+ if (id->idIsCnsReloc() || id->idIsDspReloc())
+ {
+ assert(ins == INS_movt || ins == INS_movw);
+ if ((ins == INS_movt) && emitComp->info.compMatchedVM)
+ emitRecordRelocation((void*)(dst - 8), (void*)distVal, IMAGE_REL_BASED_THUMB_MOV32);
+ }
+ }
+ else
+ {
+ assert(!"Unknown fmt");
+ }
+ }
+
+ return dst;
+}
+
+/*****************************************************************************
+ *
+ * Output a short branch instruction.
+ */
+
+BYTE* emitter::emitOutputShortBranch(BYTE* dst, instruction ins, insFormat fmt, ssize_t distVal, instrDescJmp* id)
+{
+ size_t code;
+
+ code = emitInsCode(ins, fmt);
+
+ if (fmt == IF_T1_K)
+ {
+ assert((distVal & 1) == 0);
+ assert(distVal >= -256);
+ assert(distVal <= 254);
+
+ if (distVal < 0)
+ code |= 1 << 7;
+ code |= ((distVal >> 1) & 0x7f);
+ }
+ else if (fmt == IF_T1_M)
+ {
+ assert((distVal & 1) == 0);
+ assert(distVal >= -2048);
+ assert(distVal <= 2046);
+
+ if (distVal < 0)
+ code |= 1 << 10;
+ code |= ((distVal >> 1) & 0x3ff);
+ }
+ else if (fmt == IF_T1_I)
+ {
+ assert(id != NULL);
+ assert(ins == INS_cbz || INS_cbnz);
+ assert((distVal & 1) == 0);
+ assert(distVal >= 0);
+ assert(distVal <= 126);
+
+ code |= ((distVal << 3) & 0x0200);
+ code |= ((distVal << 2) & 0x00F8);
+ code |= (id->idReg1() & 0x0007);
+ }
+ else
+ {
+ assert(!"Unknown fmt");
+ }
+
+ dst += emitOutput_Thumb1Instr(dst, code);
+
+ return dst;
+}
+
+#ifdef FEATURE_ITINSTRUCTION
+
+/*****************************************************************************
+ * The "IT" instruction is deprecated (with a very few exceptions). Don't generate it!
+ * Don't delete this code, though, in case we ever want to bring it back.
+ *****************************************************************************/
+
+/*****************************************************************************
+ *
+ * Output an IT instruction.
+ */
+
+BYTE* emitter::emitOutputIT(BYTE* dst, instruction ins, insFormat fmt, ssize_t condcode)
+{
+ ssize_t imm0;
+ size_t code, mask, bit;
+
+ code = emitInsCode(ins, fmt);
+ code |= (condcode << 4); // encode firstcond
+ imm0 = condcode & 1; // this is firstcond[0]
+ mask = code & 0x0f; // initialize mask encoded in opcode
+ bit = 0x08; // where in mask we are encoding
+ while ((mask & (bit - 1)) != 0) // are the remaining bits all zeros?
+ { // then we are done
+ // otherwise determine the setting of bit
+ if ((imm0 == 1) ^ ((bit & mask) != 0))
+ {
+ code |= bit; // set the current bit
+ }
+ else
+ {
+ code &= ~bit; // clear the current bit
+ }
+ bit >>= 1;
+ }
+ dst += emitOutput_Thumb1Instr(dst, code);
+
+ return dst;
+}
+
+#endif // FEATURE_ITINSTRUCTION
+
+/*****************************************************************************
+ *
+ * Output a 32-bit nop instruction.
+ */
+
+BYTE* emitter::emitOutputNOP(BYTE* dst, instruction ins, insFormat fmt)
+{
+ size_t code = emitInsCode(ins, fmt);
+
+ dst += emitOutput_Thumb2Instr(dst, code);
+
+ return dst;
+}
+
+/*****************************************************************************
+*
+ * Append the machine code corresponding to the given instruction descriptor
+ * to the code block at '*dp'; the base of the code block is 'bp', and 'ig'
+ * is the instruction group that contains the instruction. Updates '*dp' to
+ * point past the generated code, and returns the size of the instruction
+ * descriptor in bytes.
+ */
+
+size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
+{
+ BYTE* dst = *dp;
+ BYTE* odst = dst;
+ size_t code = 0;
+ size_t sz = 0;
+ instruction ins = id->idIns();
+ insFormat fmt = id->idInsFmt();
+ emitAttr size = id->idOpSize();
+ unsigned char callInstrSize = 0;
+ ssize_t condcode;
+
+#ifdef DEBUG
+ bool dspOffs = emitComp->opts.dspGCtbls || !emitComp->opts.disDiffable;
+#endif // DEBUG
+
+ assert(REG_NA == (int)REG_NA);
+
+ VARSET_TP VARSET_INIT_NOCOPY(GCvars, VarSetOps::UninitVal());
+
+ /* What instruction format have we got? */
+
+ switch (fmt)
+ {
+ int imm;
+ int imm0;
+ int mask;
+ int bit;
+ BYTE* addr;
+ regMaskTP gcrefRegs;
+ regMaskTP byrefRegs;
+
+ case IF_T1_A: // T1_A ................
+ sz = SMALL_IDSC_SIZE;
+ code = emitInsCode(ins, fmt);
+ dst += emitOutput_Thumb1Instr(dst, code);
+ break;
+
+#ifdef FEATURE_ITINSTRUCTION
+ case IF_T1_B: // T1_B ........cccc.... cond
+ assert(id->idGCref() == GCT_NONE);
+ condcode = emitGetInsSC(id);
+ dst = emitOutputIT(dst, ins, fmt, condcode);
+ sz = SMALL_IDSC_SIZE;
+ break;
+#endif // FEATURE_ITINSTRUCTION
+
+ case IF_T1_C: // T1_C .....iiiiinnnddd R1 R2 imm5
+ sz = SMALL_IDSC_SIZE;
+ imm = emitGetInsSC(id);
+ code = emitInsCode(ins, fmt);
+ code |= insEncodeRegT1_D3(id->idReg1());
+ code |= insEncodeRegT1_N3(id->idReg2());
+ if (emitInsIsLoadOrStore(ins))
+ {
+ imm = insUnscaleImm(imm, size);
+ }
+ assert((imm & 0x001f) == imm);
+ code |= (imm << 6);
+ dst += emitOutput_Thumb1Instr(dst, code);
+ break;
+
+ case IF_T1_D0: // T1_D0 ........Dmmmmddd R1* R2*
+ sz = SMALL_IDSC_SIZE;
+ code = emitInsCode(ins, fmt);
+ code |= insEncodeRegT1_D4(id->idReg1());
+ code |= insEncodeRegT1_M4(id->idReg2());
+ dst += emitOutput_Thumb1Instr(dst, code);
+ break;
+
+ case IF_T1_E: // T1_E ..........nnnddd R1 R2
+ sz = SMALL_IDSC_SIZE;
+ code = emitInsCode(ins, fmt);
+ code |= insEncodeRegT1_D3(id->idReg1());
+ code |= insEncodeRegT1_N3(id->idReg2());
+ dst += emitOutput_Thumb1Instr(dst, code);
+ break;
+
+ case IF_T1_F: // T1_F .........iiiiiii SP imm7
+ sz = emitGetInstrDescSize(id);
+ imm = emitGetInsSC(id);
+ code = emitInsCode(ins, fmt);
+ imm = insUnscaleImm(imm, size);
+ assert((imm & 0x007F) == imm);
+ code |= imm;
+ dst += emitOutput_Thumb1Instr(dst, code);
+ break;
+
+ case IF_T1_G: // T1_G .......iiinnnddd R1 R2 imm3
+ sz = SMALL_IDSC_SIZE;
+ imm = emitGetInsSC(id);
+ code = emitInsCode(ins, fmt);
+ code |= insEncodeRegT1_D3(id->idReg1());
+ code |= insEncodeRegT1_N3(id->idReg2());
+ assert((imm & 0x0007) == imm);
+ code |= (imm << 6);
+ dst += emitOutput_Thumb1Instr(dst, code);
+ break;
+
+ case IF_T1_H: // T1_H .......mmmnnnddd R1 R2 R3
+ sz = emitGetInstrDescSize(id);
+ code = emitInsCode(ins, fmt);
+ code |= insEncodeRegT1_D3(id->idReg1());
+ code |= insEncodeRegT1_N3(id->idReg2());
+ code |= insEncodeRegT1_M3(id->idReg3());
+ dst += emitOutput_Thumb1Instr(dst, code);
+ break;
+
+ case IF_T1_I: // T1_I ......i.iiiiiddd R1 imm6
+ assert(id->idIsBound());
+
+ dst = emitOutputLJ(ig, dst, id);
+ sz = sizeof(instrDescJmp);
+ break;
+
+ case IF_T1_J0: // T1_J0 .....dddiiiiiiii R1 imm8
+ case IF_T1_J1: // T1_J1 .....dddiiiiiiii R1 <regmask8>
+ case IF_T1_J2: // T1_J2 .....dddiiiiiiii R1 SP imm8
+ sz = emitGetInstrDescSize(id);
+ imm = emitGetInsSC(id);
+ code = emitInsCode(ins, fmt);
+ code |= insEncodeRegT1_DI(id->idReg1());
+ if (fmt == IF_T1_J2)
+ {
+ imm = insUnscaleImm(imm, size);
+ }
+ assert((imm & 0x00ff) == imm);
+ code |= imm;
+ dst += emitOutput_Thumb1Instr(dst, code);
+ break;
+
+ case IF_T1_L0: // T1_L0 ........iiiiiiii imm8
+ case IF_T1_L1: // T1_L1 .......Rrrrrrrrr <regmask8>
+ sz = emitGetInstrDescSize(id);
+ imm = emitGetInsSC(id);
+ code = emitInsCode(ins, fmt);
+ if (fmt == IF_T1_L1)
+ {
+ assert((imm & 0x3) != 0x3);
+ if (imm & 0x3)
+ code |= 0x0100; // R bit
+ imm >>= 2;
+ }
+ assert((imm & 0x00ff) == imm);
+ code |= imm;
+ dst += emitOutput_Thumb1Instr(dst, code);
+ break;
+
+ case IF_T2_A: // T2_A ................ ................
+ sz = SMALL_IDSC_SIZE;
+ code = emitInsCode(ins, fmt);
+ dst += emitOutput_Thumb2Instr(dst, code);
+ break;
+
+ case IF_T2_B: // T2_B ................ ............iiii imm4
+ sz = SMALL_IDSC_SIZE;
+ imm = emitGetInsSC(id);
+ code = emitInsCode(ins, fmt);
+ assert((imm & 0x000F) == imm);
+ code |= imm;
+ dst += emitOutput_Thumb2Instr(dst, code);
+ break;
+
+ case IF_T2_C0: // T2_C0 ...........Snnnn .iiiddddiishmmmm R1 R2 R3 S, imm5, sh
+ case IF_T2_C4: // T2_C4 ...........Snnnn ....dddd....mmmm R1 R2 R3 S
+ case IF_T2_C5: // T2_C5 ............nnnn ....dddd....mmmm R1 R2 R3
+ sz = emitGetInstrDescSize(id);
+ code = emitInsCode(ins, fmt);
+ code |= insEncodeRegT2_D(id->idReg1());
+ code |= insEncodeRegT2_N(id->idReg2());
+ code |= insEncodeRegT2_M(id->idReg3());
+ if (fmt != IF_T2_C5)
+ code |= insEncodeSetFlags(id->idInsFlags());
+ if (fmt == IF_T2_C0)
+ {
+ imm = emitGetInsSC(id);
+ code |= insEncodeShiftCount(imm);
+ code |= insEncodeShiftOpts(id->idInsOpt());
+ }
+ dst += emitOutput_Thumb2Instr(dst, code);
+ break;
+
+ case IF_T2_C1: // T2_C1 ...........S.... .iiiddddiishmmmm R1 R2 S, imm5, sh
+ case IF_T2_C2: // T2_C2 ...........S.... .iiiddddii..mmmm R1 R2 S, imm5
+ case IF_T2_C6: // T2_C6 ................ ....dddd..iimmmm R1 R2 imm2
+ sz = SMALL_IDSC_SIZE;
+ imm = emitGetInsSC(id);
+ code = emitInsCode(ins, fmt);
+ code |= insEncodeRegT2_D(id->idReg1());
+ code |= insEncodeRegT2_M(id->idReg2());
+ if (fmt == IF_T2_C6)
+ {
+ assert((imm & 0x0018) == imm);
+ code |= (imm << 1);
+ }
+ else
+ {
+ code |= insEncodeSetFlags(id->idInsFlags());
+ code |= insEncodeShiftCount(imm);
+ if (fmt == IF_T2_C1)
+ code |= insEncodeShiftOpts(id->idInsOpt());
+ }
+ dst += emitOutput_Thumb2Instr(dst, code);
+ break;
+
+ case IF_T2_C3: // T2_C3 ...........S.... ....dddd....mmmm R1 R2 S
+ sz = SMALL_IDSC_SIZE;
+ code = emitInsCode(ins, fmt);
+ code |= insEncodeRegT2_D(id->idReg1());
+ code |= insEncodeRegT2_M(id->idReg2());
+ code |= insEncodeSetFlags(id->idInsFlags());
+ dst += emitOutput_Thumb2Instr(dst, code);
+ break;
+
+ case IF_T2_C7: // T2_C7 ............nnnn ..........shmmmm R1 R2 imm2
+ case IF_T2_C8: // T2_C8 ............nnnn .iii....iishmmmm R1 R2 imm5, sh
+ sz = SMALL_IDSC_SIZE;
+ imm = emitGetInsSC(id);
+ code = emitInsCode(ins, fmt);
+ code |= insEncodeRegT2_N(id->idReg1());
+ code |= insEncodeRegT2_M(id->idReg2());
+ if (fmt == IF_T2_C7)
+ {
+ assert((imm & 0x0003) == imm);
+ code |= (imm << 4);
+ }
+ else if (fmt == IF_T2_C8)
+ {
+ code |= insEncodeShiftCount(imm);
+ code |= insEncodeShiftOpts(id->idInsOpt());
+ }
+ dst += emitOutput_Thumb2Instr(dst, code);
+ break;
+
+ case IF_T2_C9: // T2_C9 ............nnnn ............mmmm R1 R2
+ sz = SMALL_IDSC_SIZE;
+ code = emitInsCode(ins, fmt);
+ code |= insEncodeRegT2_N(id->idReg1());
+ code |= insEncodeRegT2_M(id->idReg2());
+ dst += emitOutput_Thumb2Instr(dst, code);
+ break;
+
+ case IF_T2_C10: // T2_C10 ............mmmm ....dddd....mmmm R1 R2
+ sz = SMALL_IDSC_SIZE;
+ code = emitInsCode(ins, fmt);
+ code |= insEncodeRegT2_D(id->idReg1());
+ code |= insEncodeRegT2_M(id->idReg2());
+ code |= insEncodeRegT2_N(id->idReg2());
+ dst += emitOutput_Thumb2Instr(dst, code);
+ break;
+
+ case IF_T2_D0: // T2_D0 ............nnnn .iiiddddii.wwwww R1 R2 imm5, imm5
+ case IF_T2_D1: // T2_D1 ................ .iiiddddii.wwwww R1 imm5, imm5
+ sz = SMALL_IDSC_SIZE;
+ imm = emitGetInsSC(id);
+ code = emitInsCode(ins, fmt);
+ code |= insEncodeRegT2_D(id->idReg1());
+ if (fmt == IF_T2_D0)
+ code |= insEncodeRegT2_N(id->idReg2());
+ code |= insEncodeBitFieldImm(imm);
+ dst += emitOutput_Thumb2Instr(dst, code);
+ break;
+
+ case IF_T2_E0: // T2_E0 ............nnnn tttt......shmmmm R1 R2 R3 imm2
+ case IF_T2_E1: // T2_E1 ............nnnn tttt............ R1 R2
+ case IF_T2_E2: // T2_E2 ................ tttt............ R1
+ code = emitInsCode(ins, fmt);
+ code |= insEncodeRegT2_T(id->idReg1());
+ if (fmt == IF_T2_E0)
+ {
+ sz = emitGetInstrDescSize(id);
+ code |= insEncodeRegT2_N(id->idReg2());
+ if (id->idIsLclVar())
+ {
+ code |= insEncodeRegT2_M(codeGen->rsGetRsvdReg());
+ imm = 0;
+ }
+ else
+ {
+ code |= insEncodeRegT2_M(id->idReg3());
+ imm = emitGetInsSC(id);
+ assert((imm & 0x0003) == imm);
+ code |= (imm << 4);
+ }
+ }
+ else
+ {
+ sz = SMALL_IDSC_SIZE;
+ if (fmt != IF_T2_E2)
+ {
+ code |= insEncodeRegT2_N(id->idReg2());
+ }
+ }
+ dst += emitOutput_Thumb2Instr(dst, code);
+ break;
+
+ case IF_T2_F1: // T2_F1 ............nnnn ttttdddd....mmmm R1 R2 R3 R4
+ sz = emitGetInstrDescSize(id);
+ ;
+ code = emitInsCode(ins, fmt);
+ code |= insEncodeRegT2_T(id->idReg1());
+ code |= insEncodeRegT2_D(id->idReg2());
+ code |= insEncodeRegT2_N(id->idReg3());
+ code |= insEncodeRegT2_M(id->idReg4());
+ dst += emitOutput_Thumb2Instr(dst, code);
+ break;
+
+ case IF_T2_F2: // T2_F2 ............nnnn aaaadddd....mmmm R1 R2 R3 R4
+ sz = emitGetInstrDescSize(id);
+ code = emitInsCode(ins, fmt);
+ code |= insEncodeRegT2_D(id->idReg1());
+ code |= insEncodeRegT2_N(id->idReg2());
+ code |= insEncodeRegT2_M(id->idReg3());
+ code |= insEncodeRegT2_T(id->idReg4());
+ dst += emitOutput_Thumb2Instr(dst, code);
+ break;
+
+ case IF_T2_G0: // T2_G0 .......PU.W.nnnn ttttTTTTiiiiiiii R1 R2 R3 imm8, PUW
+ case IF_T2_G1: // T2_G1 ............nnnn ttttTTTT........ R1 R2 R3
+ code = emitInsCode(ins, fmt);
+ code |= insEncodeRegT2_T(id->idReg1());
+ code |= insEncodeRegT2_D(id->idReg2());
+ code |= insEncodeRegT2_N(id->idReg3());
+ if (fmt == IF_T2_G0)
+ {
+ sz = emitGetInstrDescSizeSC(id);
+ imm = emitGetInsSC(id);
+ assert(unsigned_abs(imm) <= 0x00ff);
+ code |= abs(imm);
+ code |= insEncodePUW_G0(id->idInsOpt(), imm);
+ }
+ else
+ {
+ sz = emitGetInstrDescSize(id);
+ }
+ dst += emitOutput_Thumb2Instr(dst, code);
+ break;
+
+ case IF_T2_H0: // T2_H0 ............nnnn tttt.PUWiiiiiiii R1 R2 imm8, PUW
+ case IF_T2_H1: // T2_H1 ............nnnn tttt....iiiiiiii R1 R2 imm8
+ case IF_T2_H2: // T2_H2 ............nnnn ........iiiiiiii R1 imm8
+ sz = emitGetInstrDescSizeSC(id);
+ imm = emitGetInsSC(id);
+ code = emitInsCode(ins, fmt);
+ code |= insEncodeRegT2_T(id->idReg1());
+
+ if (fmt != IF_T2_H2)
+ code |= insEncodeRegT2_N(id->idReg2());
+
+ if (fmt == IF_T2_H0)
+ {
+ assert(unsigned_abs(imm) <= 0x00ff);
+ code |= insEncodePUW_H0(id->idInsOpt(), imm);
+ code |= unsigned_abs(imm);
+ }
+ else
+ {
+ assert((imm & 0x00ff) == imm);
+ code |= imm;
+ }
+ dst += emitOutput_Thumb2Instr(dst, code);
+ break;
+
+ case IF_T2_I0: // T2_I0 ..........W.nnnn rrrrrrrrrrrrrrrr R1 W, imm16
+ case IF_T2_I1: // T2_I1 ................ rrrrrrrrrrrrrrrr imm16
+ sz = emitGetInstrDescSizeSC(id);
+ code = emitInsCode(ins, fmt);
+ if (fmt == IF_T2_I0)
+ {
+ code |= insEncodeRegT2_N(id->idReg1());
+ code |= (1 << 21); // W bit
+ }
+ imm = emitGetInsSC(id);
+ assert((imm & 0x3) != 0x3);
+ if (imm & 0x2)
+ code |= 0x8000; // PC bit
+ if (imm & 0x1)
+ code |= 0x4000; // LR bit
+ imm >>= 2;
+ assert(imm <= 0x1fff); // 13 bits
+ code |= imm;
+ dst += emitOutput_Thumb2Instr(dst, code);
+ break;
+
+ case IF_T2_K1: // T2_K1 ............nnnn ttttiiiiiiiiiiii R1 R2 imm12
+ case IF_T2_K4: // T2_K4 ........U....... ttttiiiiiiiiiiii R1 PC U, imm12
+ case IF_T2_K3: // T2_K3 ........U....... ....iiiiiiiiiiii PC U, imm12
+ sz = emitGetInstrDescSize(id);
+ imm = emitGetInsSC(id);
+ code = emitInsCode(ins, fmt);
+ if (fmt != IF_T2_K3)
+ {
+ code |= insEncodeRegT2_T(id->idReg1());
+ }
+ if (fmt == IF_T2_K1)
+ {
+ code |= insEncodeRegT2_N(id->idReg2());
+ assert(imm <= 0xfff); // 12 bits
+ code |= imm;
+ }
+ else
+ {
+ assert(unsigned_abs(imm) <= 0xfff); // 12 bits (signed)
+ code |= abs(imm);
+ if (imm >= 0)
+ code |= (1 << 23); // U bit
+ }
+ dst += emitOutput_Thumb2Instr(dst, code);
+ break;
+
+ case IF_T2_K2: // T2_K2 ............nnnn ....iiiiiiiiiiii R1 imm12
+ sz = emitGetInstrDescSizeSC(id);
+ imm = emitGetInsSC(id);
+ code = emitInsCode(ins, fmt);
+ code |= insEncodeRegT2_N(id->idReg1());
+ assert(imm <= 0xfff); // 12 bits
+ code |= imm;
+ dst += emitOutput_Thumb2Instr(dst, code);
+ break;
+
+ case IF_T2_L0: // T2_L0 .....i.....Snnnn .iiiddddiiiiiiii R1 R2 S, imm8<<imm4
+ case IF_T2_L1: // T2_L1 .....i.....S.... .iiiddddiiiiiiii R1 S, imm8<<imm4
+ case IF_T2_L2: // T2_L2 .....i......nnnn .iii....iiiiiiii R1 imm8<<imm4
+ sz = emitGetInstrDescSize(id);
+ imm = emitGetInsSC(id);
+ code = emitInsCode(ins, fmt);
+
+ if (fmt == IF_T2_L2)
+ code |= insEncodeRegT2_N(id->idReg1());
+ else
+ {
+ code |= insEncodeSetFlags(id->idInsFlags());
+ code |= insEncodeRegT2_D(id->idReg1());
+ if (fmt == IF_T2_L0)
+ code |= insEncodeRegT2_N(id->idReg2());
+ }
+ assert(isModImmConst(imm)); // Funky ARM imm encoding
+ imm = encodeModImmConst(imm);
+ assert(imm <= 0xfff); // 12 bits
+ code |= (imm & 0x00ff);
+ code |= (imm & 0x0700) << 4;
+ code |= (imm & 0x0800) << 15;
+ dst += emitOutput_Thumb2Instr(dst, code);
+ break;
+
+ case IF_T2_M0: // T2_M0 .....i......nnnn .iiiddddiiiiiiii R1 R2 imm12
+ sz = emitGetInstrDescSizeSC(id);
+ imm = emitGetInsSC(id);
+ code = emitInsCode(ins, fmt);
+ code |= insEncodeRegT2_D(id->idReg1());
+ if (fmt == IF_T2_M0)
+ code |= insEncodeRegT2_N(id->idReg2());
+ imm = emitGetInsSC(id);
+ assert(imm <= 0xfff); // 12 bits
+ code |= (imm & 0x00ff);
+ code |= (imm & 0x0700) << 4;
+ code |= (imm & 0x0800) << 15;
+ dst += emitOutput_Thumb2Instr(dst, code);
+ break;
+
+ case IF_T2_N: // T2_N .....i......iiii .iiiddddiiiiiiii R1 imm16
+ case IF_T2_N2: // T2_N2 .....i......iiii .iiiddddiiiiiiii R1 imm16
+ sz = emitGetInstrDescSizeSC(id);
+ code = emitInsCode(ins, fmt);
+ code |= insEncodeRegT2_D(id->idReg1());
+ imm = emitGetInsSC(id);
+ if (fmt == IF_T2_N2)
+ {
+ assert(!id->idIsLclVar());
+ assert((ins == INS_movw) || (ins == INS_movt));
+ imm += (size_t)emitConsBlock;
+#ifdef RELOC_SUPPORT
+ if (!id->idIsCnsReloc() && !id->idIsDspReloc())
+#endif
+ {
+ goto SPLIT_IMM;
+ }
+ }
+ else if (id->idIsLclVar())
+ {
+ SPLIT_IMM:
+ if (ins == INS_movw)
+ {
+ imm &= 0xffff;
+ }
+ else
+ {
+ imm = (imm >> 16) & 0xffff;
+ }
+ }
+
+#ifdef RELOC_SUPPORT
+ if (id->idIsCnsReloc() || id->idIsDspReloc())
+ {
+ assert((ins == INS_movt) || (ins == INS_movw));
+ dst += emitOutput_Thumb2Instr(dst, code);
+ if ((ins == INS_movt) && emitComp->info.compMatchedVM)
+ emitRecordRelocation((void*)(dst - 8), (void*)imm, IMAGE_REL_BASED_THUMB_MOV32);
+ }
+ else
+#endif // RELOC_SUPPORT
+ {
+ assert((imm & 0x0000ffff) == imm);
+ code |= (imm & 0x00ff);
+ code |= ((imm & 0x0700) << 4);
+ code |= ((imm & 0x0800) << 15);
+ code |= ((imm & 0xf000) << 4);
+ dst += emitOutput_Thumb2Instr(dst, code);
+ }
+ break;
+
+ case IF_T2_VFP3:
+ // these are the binary operators
+ // d = n - m
+ sz = emitGetInstrDescSize(id);
+ code = emitInsCode(ins, fmt);
+ code |= insEncodeRegT2_VectorN(id->idReg2(), size, true);
+ code |= insEncodeRegT2_VectorM(id->idReg3(), size, true);
+ code |= insEncodeRegT2_VectorD(id->idReg1(), size, true);
+ if (size == EA_8BYTE)
+ code |= 1 << 8;
+ dst += emitOutput_Thumb2Instr(dst, code);
+ break;
+
+ case IF_T2_VFP2:
+ {
+ emitAttr srcSize;
+ emitAttr dstSize;
+ size_t szCode = 0;
+
+ switch (ins)
+ {
+ case INS_vcvt_i2d:
+ case INS_vcvt_u2d:
+ case INS_vcvt_f2d:
+ srcSize = EA_4BYTE;
+ dstSize = EA_8BYTE;
+ break;
+
+ case INS_vcvt_d2i:
+ case INS_vcvt_d2u:
+ case INS_vcvt_d2f:
+ srcSize = EA_8BYTE;
+ dstSize = EA_4BYTE;
+ break;
+
+ case INS_vmov:
+ case INS_vabs:
+ case INS_vsqrt:
+ case INS_vcmp:
+ case INS_vneg:
+ if (id->idOpSize() == EA_8BYTE)
+ szCode |= (1 << 8);
+ __fallthrough;
+
+ default:
+ srcSize = dstSize = id->idOpSize();
+ break;
+ }
+
+ sz = emitGetInstrDescSize(id);
+ code = emitInsCode(ins, fmt);
+ code |= szCode;
+ code |= insEncodeRegT2_VectorD(id->idReg1(), dstSize, true);
+ code |= insEncodeRegT2_VectorM(id->idReg2(), srcSize, true);
+
+ dst += emitOutput_Thumb2Instr(dst, code);
+ break;
+ }
+
+ case IF_T2_VLDST:
+ sz = emitGetInstrDescSizeSC(id);
+ code = emitInsCode(ins, fmt);
+ code |= insEncodeRegT2_N(id->idReg2());
+ code |= insEncodeRegT2_VectorD(id->idReg1(), size, true);
+
+ imm = emitGetInsSC(id);
+ if (imm < 0)
+ imm = -imm; // bit 23 at 0 means negate
+ else
+ code |= 1 << 23; // set the positive bit
+
+ // offset is +/- 1020
+ assert(!(imm % 4));
+ assert(imm >> 10 == 0);
+ code |= imm >> 2;
+ // bit 8 is set for doubles
+ if (id->idOpSize() == EA_8BYTE)
+ code |= (1 << 8);
+ dst += emitOutput_Thumb2Instr(dst, code);
+ break;
+
+ case IF_T2_VMOVD:
+ // 3op assemble a double from two int regs (or back)
+ sz = emitGetInstrDescSize(id);
+ code = emitInsCode(ins, fmt);
+ if (ins == INS_vmov_i2d)
+ {
+ code |= insEncodeRegT2_VectorM(id->idReg1(), size, true);
+ code |= id->idReg2() << 12;
+ code |= id->idReg3() << 16;
+ }
+ else
+ {
+ assert(ins == INS_vmov_d2i);
+ code |= id->idReg1() << 12;
+ code |= id->idReg2() << 16;
+ code |= insEncodeRegT2_VectorM(id->idReg3(), size, true);
+ }
+ dst += emitOutput_Thumb2Instr(dst, code);
+ break;
+
+ case IF_T2_VMOVS:
+ // 2op assemble a float from one int reg (or back)
+ sz = emitGetInstrDescSize(id);
+ code = emitInsCode(ins, fmt);
+ if (ins == INS_vmov_f2i)
+ {
+ code |= insEncodeRegT2_VectorN(id->idReg2(), EA_4BYTE, true);
+ code |= id->idReg1() << 12;
+ }
+ else
+ {
+ assert(ins == INS_vmov_i2f);
+ code |= insEncodeRegT2_VectorN(id->idReg1(), EA_4BYTE, true);
+ code |= id->idReg2() << 12;
+ }
+
+ dst += emitOutput_Thumb2Instr(dst, code);
+ break;
+
+ case IF_T1_J3: // T1_J3 .....dddiiiiiiii R1 PC imm8
+ case IF_T2_M1: // T2_M1 .....i.......... .iiiddddiiiiiiii R1 PC imm12
+ assert(id->idGCref() == GCT_NONE);
+ assert(id->idIsBound());
+
+ dst = emitOutputLJ(ig, dst, id);
+ sz = sizeof(instrDescLbl);
+ break;
+
+ case IF_T1_K: // T1_K ....cccciiiiiiii Branch imm8, cond4
+ case IF_T1_M: // T1_M .....iiiiiiiiiii Branch imm11
+ case IF_T2_J1: // T2_J1 .....Scccciiiiii ..j.jiiiiiiiiiii Branch imm20, cond4
+ case IF_T2_J2: // T2_J2 .....Siiiiiiiiii ..j.jiiiiiiiiii. Branch imm24
+ case IF_T2_N1: // T2_N .....i......iiii .iiiddddiiiiiiii R1 imm16
+ case IF_LARGEJMP:
+ assert(id->idGCref() == GCT_NONE);
+ assert(id->idIsBound());
+
+ dst = emitOutputLJ(ig, dst, id);
+ sz = sizeof(instrDescJmp);
+ break;
+
+ case IF_T1_D1: // T1_D1 .........mmmm... R1*
+
+ code = emitInsCode(ins, fmt);
+ code |= insEncodeRegT1_M4(id->idReg1());
+ dst += emitOutput_Thumb1Instr(dst, code);
+ sz = SMALL_IDSC_SIZE;
+ break;
+
+ case IF_T1_D2: // T1_D2 .........mmmm... R3*
+
+ /* Is this a "fat" call descriptor? */
+
+ if (id->idIsLargeCall())
+ {
+ instrDescCGCA* idCall = (instrDescCGCA*)id;
+ gcrefRegs = idCall->idcGcrefRegs;
+ byrefRegs = idCall->idcByrefRegs;
+ VarSetOps::Assign(emitComp, GCvars, idCall->idcGCvars);
+ sz = sizeof(instrDescCGCA);
+ }
+ else
+ {
+ assert(!id->idIsLargeDsp());
+ assert(!id->idIsLargeCns());
+
+ gcrefRegs = emitDecodeCallGCregs(id);
+ byrefRegs = 0;
+ VarSetOps::AssignNoCopy(emitComp, GCvars, VarSetOps::MakeEmpty(emitComp));
+ sz = sizeof(instrDesc);
+ }
+
+ code = emitInsCode(ins, fmt);
+ code |= insEncodeRegT1_M4(id->idReg3());
+ callInstrSize = SafeCvtAssert<unsigned char>(emitOutput_Thumb1Instr(dst, code));
+ dst += callInstrSize;
+ goto DONE_CALL;
+
+ case IF_T2_J3: // T2_J3 .....Siiiiiiiiii ..j.jiiiiiiiiii. Call imm24
+
+ /* Is this a "fat" call descriptor? */
+
+ if (id->idIsLargeCall())
+ {
+ instrDescCGCA* idCall = (instrDescCGCA*)id;
+ gcrefRegs = idCall->idcGcrefRegs;
+ byrefRegs = idCall->idcByrefRegs;
+ VarSetOps::Assign(emitComp, GCvars, idCall->idcGCvars);
+ sz = sizeof(instrDescCGCA);
+ }
+ else
+ {
+ assert(!id->idIsLargeDsp());
+ assert(!id->idIsLargeCns());
+
+ gcrefRegs = emitDecodeCallGCregs(id);
+ byrefRegs = 0;
+ VarSetOps::AssignNoCopy(emitComp, GCvars, VarSetOps::MakeEmpty(emitComp));
+ sz = sizeof(instrDesc);
+ }
+
+ addr = id->idAddr()->iiaAddr;
+ code = emitInsCode(ins, fmt);
+
+#ifdef RELOC_SUPPORT
+ if (id->idIsDspReloc())
+ {
+ callInstrSize = SafeCvtAssert<unsigned char>(emitOutput_Thumb2Instr(dst, code));
+ dst += callInstrSize;
+ if (emitComp->info.compMatchedVM)
+ emitRecordRelocation((void*)(dst - 4), addr, IMAGE_REL_BASED_THUMB_BRANCH24);
+ }
+ else
+#endif // RELOC_SUPPORT
+ {
+ addr = (BYTE*)((size_t)addr & ~1); // Clear the lowest bit from target address
+
+ /* Calculate PC relative displacement */
+ int disp = addr - (dst + 4);
+ bool S = (disp < 0);
+ bool I1 = ((disp & 0x00800000) == 0);
+ bool I2 = ((disp & 0x00400000) == 0);
+
+ if (S)
+ code |= (1 << 26); // S bit
+ if (S ^ I1)
+ code |= (1 << 13); // J1 bit
+ if (S ^ I2)
+ code |= (1 << 11); // J2 bit
+
+ int immLo = (disp & 0x00000ffe) >> 1;
+ int immHi = (disp & 0x003ff000) >> 12;
+
+ code |= (immHi << 16);
+ code |= immLo;
+
+ disp = abs(disp);
+ assert((disp & 0x00fffffe) == disp);
+
+ callInstrSize = SafeCvtAssert<unsigned char>(emitOutput_Thumb2Instr(dst, code));
+ dst += callInstrSize;
+ }
+
+ DONE_CALL:
+
+ /* We update the GC info before the call as the variables cannot be
+ used by the call. Killing variables before the call helps with
+ boundary conditions if the call is CORINFO_HELP_THROW - see bug 50029.
+ If we ever track aliased variables (which could be used by the
+ call), we would have to keep them alive past the call. */
+
+ emitUpdateLiveGCvars(GCvars, *dp);
+
+ // If the method returns a GC ref, mark R0 appropriately.
+ if (id->idGCref() == GCT_GCREF)
+ gcrefRegs |= RBM_R0;
+ else if (id->idGCref() == GCT_BYREF)
+ byrefRegs |= RBM_R0;
+
+ // If the GC register set has changed, report the new set.
+ if (gcrefRegs != emitThisGCrefRegs)
+ emitUpdateLiveGCregs(GCT_GCREF, gcrefRegs, dst);
+
+ if (byrefRegs != emitThisByrefRegs)
+ emitUpdateLiveGCregs(GCT_BYREF, byrefRegs, dst);
+
+ // Some helper calls may be marked as not requiring GC info to be recorded.
+ if ((!id->idIsNoGC()))
+ {
+ // On ARM, as on AMD64, we don't change the stack pointer to push/pop args.
+ // So we're not really doing a "stack pop" here (note that "args" is 0), but we use this mechanism
+ // to record the call for GC info purposes. (It might be best to use an alternate call,
+ // and protect "emitStackPop" under the EMIT_TRACK_STACK_DEPTH preprocessor variable.)
+ emitStackPop(dst, /*isCall*/ true, callInstrSize, /*args*/ 0);
+
+ /* Do we need to record a call location for GC purposes? */
+
+ if (!emitFullGCinfo)
+ {
+ emitRecordGCcall(dst, callInstrSize);
+ }
+ }
+
+ break;
+
+ /********************************************************************/
+ /* oops */
+ /********************************************************************/
+
+ default:
+
+#ifdef DEBUG
+ printf("unexpected format %s\n", emitIfName(id->idInsFmt()));
+ assert(!"don't know how to encode this instruction");
+#endif
+ break;
+ }
+
+ // Determine if any registers now hold GC refs, or whether a register that was overwritten held a GC ref.
+ // We assume here that "id->idGCref()" is not GC_NONE only if the instruction described by "id" writes a
+ // GC ref to register "id->idReg1()". (It may, apparently, also not be GC_NONE in other cases, such as
+ // for stores, but we ignore those cases here.)
+ if (emitInsMayWriteToGCReg(id)) // True if "id->idIns()" writes to a register than can hold GC ref.
+ {
+ // If we ever generate instructions that write to multiple registers (LDM, or POP),
+ // then we'd need to more work here to ensure that changes in the status of GC refs are
+ // tracked properly.
+ if (emitInsMayWriteMultipleRegs(id))
+ {
+ // We explicitly list the multiple-destination-target instruction that we expect to
+ // be emitted outside of the prolog and epilog here.
+ switch (ins)
+ {
+ case INS_smull:
+ case INS_umull:
+ case INS_smlal:
+ case INS_umlal:
+ case INS_vmov_d2i:
+ // For each of these, idReg1() and idReg2() are the destination registers.
+ emitGCregDeadUpd(id->idReg1(), dst);
+ emitGCregDeadUpd(id->idReg2(), dst);
+ break;
+ default:
+ assert(false); // We need to recognize this multi-target instruction...
+ }
+ }
+ else
+ {
+ if (id->idGCref() != GCT_NONE)
+ {
+ emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst);
+ }
+ else
+ {
+ // I also assume that "idReg1" is the destination register of all instructions that write to registers.
+ emitGCregDeadUpd(id->idReg1(), dst);
+ }
+ }
+ }
+
+ // Now we determine if the instruction has written to a (local variable) stack location, and either written a GC
+ // ref or overwritten one.
+ if (emitInsWritesToLclVarStackLoc(id))
+ {
+ int varNum = id->idAddr()->iiaLclVar.lvaVarNum();
+ unsigned ofs = AlignDown(id->idAddr()->iiaLclVar.lvaOffset(), sizeof(size_t));
+ regNumber regBase;
+ int adr = emitComp->lvaFrameAddress(varNum, true, &regBase, ofs);
+ if (id->idGCref() != GCT_NONE)
+ {
+ emitGCvarLiveUpd(adr + ofs, varNum, id->idGCref(), dst);
+ }
+ else
+ {
+ // If the type of the local is a gc ref type, update the liveness.
+ var_types vt;
+ if (varNum >= 0)
+ {
+ // "Regular" (non-spill-temp) local.
+ vt = var_types(emitComp->lvaTable[varNum].lvType);
+ }
+ else
+ {
+ TempDsc* tmpDsc = emitComp->tmpFindNum(varNum);
+ vt = tmpDsc->tdTempType();
+ }
+ if (vt == TYP_REF || vt == TYP_BYREF)
+ emitGCvarDeadUpd(adr + ofs, dst);
+ }
+ }
+
+#ifdef DEBUG
+ /* Make sure we set the instruction descriptor size correctly */
+
+ size_t expected = emitSizeOfInsDsc(id);
+ assert(sz == expected);
+
+ if (emitComp->opts.disAsm || emitComp->opts.dspEmit || emitComp->verbose)
+ {
+ emitDispIns(id, false, dspOffs, true, emitCurCodeOffs(odst), *dp, (dst - *dp), ig);
+ }
+
+ if (emitComp->compDebugBreak)
+ {
+ // set JitEmitPrintRefRegs=1 will print out emitThisGCrefRegs and emitThisByrefRegs
+ // at the beginning of this method.
+ if (JitConfig.JitEmitPrintRefRegs() != 0)
+ {
+ printf("Before emitOutputInstr for id->idDebugOnlyInfo()->idNum=0x%02x\n", id->idDebugOnlyInfo()->idNum);
+ printf(" emitThisGCrefRegs(0x%p)=", dspPtr(&emitThisGCrefRegs));
+ printRegMaskInt(emitThisGCrefRegs);
+ emitDispRegSet(emitThisGCrefRegs);
+ printf("\n");
+ printf(" emitThisByrefRegs(0x%p)=", dspPtr(&emitThisByrefRegs));
+ printRegMaskInt(emitThisByrefRegs);
+ emitDispRegSet(emitThisByrefRegs);
+ printf("\n");
+ }
+
+ // For example, set JitBreakEmitOutputInstr=a6 will break when this method is called for
+ // emitting instruction a6, (i.e. IN00a6 in jitdump).
+ if ((unsigned)JitConfig.JitBreakEmitOutputInstr() == id->idDebugOnlyInfo()->idNum)
+ {
+ assert(!"JitBreakEmitOutputInstr reached");
+ }
+ }
+#endif
+
+ /* All instructions are expected to generate code */
+
+ assert(*dp != dst);
+
+ *dp = dst;
+
+ return sz;
+}
+
+/*****************************************************************************/
+/*****************************************************************************/
+
+#ifdef DEBUG
+
+static bool insAlwaysSetFlags(instruction ins)
+{
+ bool result = false;
+ switch (ins)
+ {
+ case INS_cmp:
+ case INS_cmn:
+ case INS_teq:
+ case INS_tst:
+ result = true;
+ break;
+
+ default:
+ break;
+ }
+ return result;
+}
+
+/*****************************************************************************
+ *
+ * Display the instruction name, optionally the instruction
+ * can add the "s" suffix if it must set the flags.
+ */
+void emitter::emitDispInst(instruction ins, insFlags flags)
+{
+ const char* insstr = codeGen->genInsName(ins);
+ int len = strlen(insstr);
+
+ /* Display the instruction name */
+
+ printf("%s", insstr);
+ if (insSetsFlags(flags) && !insAlwaysSetFlags(ins))
+ {
+ printf("s");
+ len++;
+ }
+
+ //
+ // Add at least one space after the instruction name
+ // and add spaces until we have reach the normal size of 8
+ do
+ {
+ printf(" ");
+ len++;
+ } while (len < 8);
+}
+
+/*****************************************************************************
+ *
+ * Display an reloc value
+ * If we are formatting for an assembly listing don't print the hex value
+ * since it will prevent us from doing assembly diffs
+ */
+void emitter::emitDispReloc(int value, bool addComma)
+{
+ if (emitComp->opts.disAsm)
+ {
+ printf("(reloc)");
+ }
+ else
+ {
+ printf("(reloc 0x%x)", dspPtr(value));
+ }
+
+ if (addComma)
+ printf(", ");
+}
+
+#define STRICT_ARM_ASM 0
+
+/*****************************************************************************
+ *
+ * Display an immediate value
+ */
+void emitter::emitDispImm(int imm, bool addComma, bool alwaysHex /* =false */)
+{
+ if (!alwaysHex && (imm > -1000) && (imm < 1000))
+ printf("%d", imm);
+ else if ((imm > 0) ||
+ (imm == -imm) || // -0x80000000 == 0x80000000. So we don't want to add an extra "-" at the beginning.
+ (emitComp->opts.disDiffable && (imm == 0xD1FFAB1E))) // Don't display this as negative
+ printf("0x%02x", imm);
+ else // val <= -1000
+ printf("-0x%02x", -imm);
+
+ if (addComma)
+ printf(", ");
+}
+
+/*****************************************************************************
+ *
+ * Display an arm condition for the IT instructions
+ */
+void emitter::emitDispCond(int cond)
+{
+ const static char* armCond[16] = {"eq", "ne", "hs", "lo", "mi", "pl", "vs", "vc",
+ "hi", "ls", "ge", "lt", "gt", "le", "AL", "NV"}; // The last two are invalid
+ assert(0 <= cond && (unsigned)cond < ArrLen(armCond));
+ printf(armCond[cond]);
+}
+
+/*****************************************************************************
+ *
+ * Display a register range in a range format
+ */
+void emitter::emitDispRegRange(regNumber reg, int len, emitAttr attr)
+{
+ printf("{");
+ emitDispReg(reg, attr, false);
+ if (len > 1)
+ {
+ printf("-");
+ emitDispReg((regNumber)(reg + len - 1), attr, false);
+ }
+ printf("}");
+}
+
+/*****************************************************************************
+ *
+ * Display an register mask in a list format
+ */
+void emitter::emitDispRegmask(int imm, bool encodedPC_LR)
+{
+ bool printedOne = false;
+ bool hasPC;
+ bool hasLR;
+
+ if (encodedPC_LR)
+ {
+ hasPC = (imm & 2) != 0;
+ hasLR = (imm & 1) != 0;
+ imm >>= 2;
+ }
+ else
+ {
+ hasPC = (imm & RBM_PC) != 0;
+ hasLR = (imm & RBM_LR) != 0;
+ imm &= ~(RBM_PC | RBM_LR);
+ }
+
+ regNumber reg = REG_R0;
+ unsigned bit = 1;
+
+ printf("{");
+ while (imm != 0)
+ {
+ if (bit & imm)
+ {
+ if (printedOne)
+ printf(",");
+ printf("%s", emitRegName(reg));
+ printedOne = true;
+ imm -= bit;
+ }
+
+ reg = regNumber(reg + 1);
+ bit <<= 1;
+ }
+
+ if (hasLR)
+ {
+ if (printedOne)
+ printf(",");
+ printf("%s", emitRegName(REG_LR));
+ printedOne = true;
+ }
+
+ if (hasPC)
+ {
+ if (printedOne)
+ printf(",");
+ printf("%s", emitRegName(REG_PC));
+ printedOne = true;
+ }
+ printf("}");
+}
+
+/*****************************************************************************
+ *
+ * Returns the encoding for the Shift Type bits to be used in a Thumb-2 encoding
+ */
+
+void emitter::emitDispShiftOpts(insOpts opt)
+{
+ if (opt == INS_OPTS_LSL)
+ printf(" LSL ");
+ else if (opt == INS_OPTS_LSR)
+ printf(" LSR ");
+ else if (opt == INS_OPTS_ASR)
+ printf(" ASR ");
+ else if (opt == INS_OPTS_ROR)
+ printf(" ROR ");
+ else if (opt == INS_OPTS_RRX)
+ printf(" RRX ");
+}
+
+/*****************************************************************************
+ *
+ * Display a register
+ */
+void emitter::emitDispReg(regNumber reg, emitAttr attr, bool addComma)
+{
+ if (isFloatReg(reg))
+ {
+ const char* size = attr == EA_8BYTE ? "d" : "s";
+ printf("%s%s", size, emitFloatRegName(reg, attr) + 1);
+ }
+ else
+ {
+ printf("%s", emitRegName(reg, attr));
+ }
+
+ if (addComma)
+ printf(", ");
+}
+
+void emitter::emitDispFloatReg(regNumber reg, emitAttr attr, bool addComma)
+{
+}
+
+/*****************************************************************************
+ *
+ * Display an addressing operand [reg]
+ */
+void emitter::emitDispAddrR(regNumber reg, emitAttr attr)
+{
+ printf("[");
+ emitDispReg(reg, attr, false);
+ printf("]");
+ emitDispGC(attr);
+}
+
+/*****************************************************************************
+ *
+ * Display an addressing operand [reg + imm]
+ */
+void emitter::emitDispAddrRI(regNumber reg, int imm, emitAttr attr)
+{
+ bool regIsSPorFP = (reg == REG_SP) || (reg == REG_FP);
+
+ printf("[");
+ emitDispReg(reg, attr, false);
+ if (imm != 0)
+ {
+ if (imm >= 0)
+ {
+#if STRICT_ARM_ASM
+ printf(", ");
+#else
+ printf("+");
+#endif
+ }
+ emitDispImm(imm, false, regIsSPorFP);
+ }
+ printf("]");
+ emitDispGC(attr);
+}
+
+/*****************************************************************************
+ *
+ * Display an addressing operand [reg + reg]
+ */
+void emitter::emitDispAddrRR(regNumber reg1, regNumber reg2, emitAttr attr)
+{
+ printf("[");
+ emitDispReg(reg1, attr, false);
+#if STRICT_ARM_ASM
+ printf(", ");
+#else
+ printf("+");
+#endif
+ emitDispReg(reg2, attr, false);
+ printf("]");
+ emitDispGC(attr);
+}
+
+/*****************************************************************************
+ *
+ * Display an addressing operand [reg + reg * imm]
+ */
+void emitter::emitDispAddrRRI(regNumber reg1, regNumber reg2, int imm, emitAttr attr)
+{
+ printf("[");
+ emitDispReg(reg1, attr, false);
+#if STRICT_ARM_ASM
+ printf(", ");
+ emitDispReg(reg2, attr, false);
+ if (imm > 0)
+ {
+ printf(" LSL ");
+ emitDispImm(1 << imm, false);
+ }
+#else
+ printf("+");
+ if (imm > 0)
+ {
+ emitDispImm(1 << imm, false);
+ printf("*");
+ }
+ emitDispReg(reg2, attr, false);
+#endif
+ printf("]");
+ emitDispGC(attr);
+}
+
+/*****************************************************************************
+ *
+ * Display an addressing operand [reg + imm]
+ */
+void emitter::emitDispAddrPUW(regNumber reg, int imm, insOpts opt, emitAttr attr)
+{
+ bool regIsSPorFP = (reg == REG_SP) || (reg == REG_FP);
+
+ printf("[");
+ emitDispReg(reg, attr, false);
+ if (insOptAnyInc(opt))
+ printf("!");
+
+ if (imm != 0)
+ {
+ if (imm >= 0)
+ {
+#if STRICT_ARM_ASM
+ printf(", ");
+#else
+ printf("+");
+#endif
+ }
+ emitDispImm(imm, false, regIsSPorFP);
+ }
+ printf("]");
+
+ emitDispGC(attr);
+}
+
+/*****************************************************************************
+ *
+ * Display the gc-ness of the operand
+ */
+void emitter::emitDispGC(emitAttr attr)
+{
+#if 0
+ // TODO-ARM-Cleanup: Fix or delete.
+ if (attr == EA_GCREF)
+ printf(" @gc");
+ else if (attr == EA_BYREF)
+ printf(" @byref");
+#endif
+}
+
+/*****************************************************************************
+ *
+ * Display (optionally) the instruction encoding in hex
+ */
+
+void emitter::emitDispInsHex(BYTE* code, size_t sz)
+{
+ // We do not display the instruction hex if we want diff-able disassembly
+ if (!emitComp->opts.disDiffable)
+ {
+ if (sz == 2)
+ {
+ printf(" %04X ", (*((unsigned short*)code)));
+ }
+ else if (sz == 4)
+ {
+ printf(" %04X %04X", (*((unsigned short*)(code + 0))), (*((unsigned short*)(code + 2))));
+ }
+ }
+}
+
+/****************************************************************************
+ *
+ * Display the given instruction.
+ */
+
+void emitter::emitDispInsHelp(
+ instrDesc* id, bool isNew, bool doffs, bool asmfm, unsigned offset, BYTE* code, size_t sz, insGroup* ig)
+{
+ if (EMITVERBOSE)
+ {
+ unsigned idNum = id->idDebugOnlyInfo()->idNum; // Do not remove this! It is needed for VisualStudio
+ // conditional breakpoints
+
+ printf("IN%04x: ", idNum);
+ }
+
+ if (code == NULL)
+ sz = 0;
+
+ if (!emitComp->opts.dspEmit && !isNew && !asmfm && sz)
+ doffs = true;
+
+ /* Display the instruction offset */
+
+ emitDispInsOffs(offset, doffs);
+
+ /* Display the instruction hex code */
+
+ emitDispInsHex(code, sz);
+
+ printf(" ");
+
+ /* Get the instruction and format */
+
+ instruction ins = id->idIns();
+ insFormat fmt = id->idInsFmt();
+
+ emitDispInst(ins, id->idInsFlags());
+
+ /* If this instruction has just been added, check its size */
+
+ assert(isNew == false || (int)emitSizeOfInsDsc(id) == emitCurIGfreeNext - (BYTE*)id);
+
+ /* Figure out the operand size */
+ emitAttr attr;
+ if (id->idGCref() == GCT_GCREF)
+ attr = EA_GCREF;
+ else if (id->idGCref() == GCT_BYREF)
+ attr = EA_BYREF;
+ else
+ attr = id->idOpSize();
+
+ switch (fmt)
+ {
+ int imm;
+ int offs;
+ const char* methodName;
+
+ case IF_T1_A: // None
+ case IF_T2_A:
+ break;
+
+ case IF_T1_L0: // Imm
+ case IF_T2_B:
+ emitDispImm(emitGetInsSC(id), false);
+ break;
+
+ case IF_T1_B: // <cond>
+ emitDispCond(emitGetInsSC(id));
+ break;
+
+ case IF_T1_L1: // <regmask8>
+ case IF_T2_I1: // <regmask16>
+ emitDispRegmask(emitGetInsSC(id), true);
+ break;
+
+ case IF_T2_E2: // Reg
+ if (id->idIns() == INS_vmrs)
+ {
+ if (id->idReg1() != REG_R15)
+ {
+ emitDispReg(id->idReg1(), attr, true);
+ printf("FPSCR");
+ }
+ else
+ {
+ printf("APSR, FPSCR");
+ }
+ }
+ else
+ {
+ emitDispReg(id->idReg1(), attr, false);
+ }
+ break;
+
+ case IF_T1_D1:
+ emitDispReg(id->idReg1(), attr, false);
+ break;
+
+ case IF_T1_D2:
+ emitDispReg(id->idReg3(), attr, false);
+ {
+ CORINFO_METHOD_HANDLE handle = (CORINFO_METHOD_HANDLE)id->idDebugOnlyInfo()->idMemCookie;
+ if (handle != 0)
+ {
+ methodName = emitComp->eeGetMethodFullName(handle);
+ printf("\t\t// %s", methodName);
+ }
+ }
+ break;
+
+ case IF_T1_F: // SP, Imm
+ emitDispReg(REG_SP, attr, true);
+ emitDispImm(emitGetInsSC(id), false);
+ break;
+
+ case IF_T1_J0: // Reg, Imm
+ case IF_T2_L1:
+ case IF_T2_L2:
+ case IF_T2_N:
+ emitDispReg(id->idReg1(), attr, true);
+ imm = emitGetInsSC(id);
+ if (fmt == IF_T2_N)
+ {
+ if (emitComp->opts.disDiffable)
+ imm = 0xD1FF;
+#if RELOC_SUPPORT
+ if (id->idIsCnsReloc() || id->idIsDspReloc())
+ {
+ if (emitComp->opts.disDiffable)
+ imm = 0xD1FFAB1E;
+ printf("%s RELOC ", (id->idIns() == INS_movw) ? "LOW" : "HIGH");
+ }
+#endif // RELOC_SUPPORT
+ }
+ emitDispImm(imm, false, (fmt == IF_T2_N));
+ break;
+
+ case IF_T2_N2:
+ emitDispReg(id->idReg1(), attr, true);
+ imm = emitGetInsSC(id);
+ {
+ dataSection* jdsc = 0;
+ NATIVE_OFFSET offs = 0;
+
+ /* Find the appropriate entry in the data section list */
+
+ for (jdsc = emitConsDsc.dsdList; jdsc; jdsc = jdsc->dsNext)
+ {
+ UNATIVE_OFFSET size = jdsc->dsSize;
+
+ /* Is this a label table? */
+
+ if (jdsc->dsType == dataSection::blockAbsoluteAddr)
+ {
+ if (offs == imm)
+ break;
+ }
+
+ offs += size;
+ }
+
+ assert(jdsc != NULL);
+
+#ifdef RELOC_SUPPORT
+ if (id->idIsDspReloc())
+ {
+ printf("reloc ");
+ }
+#endif
+ printf("%s ADDRESS J_M%03u_DS%02u", (id->idIns() == INS_movw) ? "LOW" : "HIGH",
+ Compiler::s_compMethodsCount, imm);
+
+ // After the MOVT, dump the table
+ if (id->idIns() == INS_movt)
+ {
+ unsigned cnt = jdsc->dsSize / TARGET_POINTER_SIZE;
+ BasicBlock** bbp = (BasicBlock**)jdsc->dsCont;
+
+ bool isBound = (emitCodeGetCookie(*bbp) != NULL);
+
+ if (isBound)
+ {
+ printf("\n\n J_M%03u_DS%02u LABEL DWORD", Compiler::s_compMethodsCount, imm);
+
+ /* Display the label table (it's stored as "BasicBlock*" values) */
+
+ do
+ {
+ insGroup* lab;
+
+ /* Convert the BasicBlock* value to an IG address */
+
+ lab = (insGroup*)emitCodeGetCookie(*bbp++);
+ assert(lab);
+
+ printf("\n DD G_M%03u_IG%02u", Compiler::s_compMethodsCount, lab->igNum);
+ } while (--cnt);
+ }
+ }
+ }
+ break;
+
+ case IF_T2_H2: // [Reg+imm]
+ case IF_T2_K2:
+ emitDispAddrRI(id->idReg1(), emitGetInsSC(id), attr);
+ break;
+
+ case IF_T2_K3: // [PC+imm]
+ emitDispAddrRI(REG_PC, emitGetInsSC(id), attr);
+ break;
+
+ case IF_T1_J1: // reg, <regmask8>
+ case IF_T2_I0: // reg, <regmask16>
+ emitDispReg(id->idReg1(), attr, false);
+ printf("!, ");
+ emitDispRegmask(emitGetInsSC(id), false);
+ break;
+
+ case IF_T1_D0: // Reg, Reg
+ case IF_T1_E:
+ case IF_T2_C3:
+ case IF_T2_C9:
+ case IF_T2_C10:
+ emitDispReg(id->idReg1(), attr, true);
+ emitDispReg(id->idReg2(), attr, false);
+ if (fmt == IF_T1_E && id->idIns() == INS_rsb)
+ {
+ printf(", 0");
+ }
+ break;
+
+ case IF_T2_E1: // Reg, [Reg]
+ emitDispReg(id->idReg1(), attr, true);
+ emitDispAddrR(id->idReg2(), attr);
+ break;
+
+ case IF_T2_D1: // Reg, Imm, Imm
+ emitDispReg(id->idReg1(), attr, true);
+ imm = emitGetInsSC(id);
+ {
+ int lsb = (imm >> 5) & 0x1f;
+ int msb = imm & 0x1f;
+ int imm1 = lsb;
+ int imm2 = msb + 1 - lsb;
+ emitDispImm(imm1, true);
+ emitDispImm(imm2, false);
+ }
+ break;
+
+ case IF_T1_C: // Reg, Reg, Imm
+ case IF_T1_G:
+ case IF_T2_C2:
+ case IF_T2_H1:
+ case IF_T2_K1:
+ case IF_T2_L0:
+ case IF_T2_M0:
+ emitDispReg(id->idReg1(), attr, true);
+ imm = emitGetInsSC(id);
+ if (emitInsIsLoadOrStore(ins))
+ {
+ emitDispAddrRI(id->idReg2(), imm, attr);
+ }
+ else
+ {
+ emitDispReg(id->idReg2(), attr, true);
+ emitDispImm(imm, false);
+ }
+ break;
+
+ case IF_T1_J2:
+ emitDispReg(id->idReg1(), attr, true);
+ imm = emitGetInsSC(id);
+ if (emitInsIsLoadOrStore(ins))
+ {
+ emitDispAddrRI(REG_SP, imm, attr);
+ }
+ else
+ {
+ emitDispReg(REG_SP, attr, true);
+ emitDispImm(imm, false);
+ }
+ break;
+
+ case IF_T2_K4:
+ emitDispReg(id->idReg1(), attr, true);
+ emitDispAddrRI(REG_PC, emitGetInsSC(id), attr);
+ break;
+
+ case IF_T2_C1:
+ case IF_T2_C8:
+ emitDispReg(id->idReg1(), attr, true);
+ emitDispReg(id->idReg2(), attr, false);
+ imm = emitGetInsSC(id);
+ if (id->idInsOpt() == INS_OPTS_RRX)
+ {
+ emitDispShiftOpts(id->idInsOpt());
+ assert(imm == 1);
+ }
+ else if (imm > 0)
+ {
+ emitDispShiftOpts(id->idInsOpt());
+ emitDispImm(imm, false);
+ }
+ break;
+
+ case IF_T2_C6:
+ imm = emitGetInsSC(id);
+ emitDispReg(id->idReg1(), attr, true);
+ emitDispReg(id->idReg2(), attr, (imm != 0));
+ if (imm != 0)
+ {
+ emitDispImm(imm, false);
+ }
+ break;
+
+ case IF_T2_C7:
+ emitDispAddrRRI(id->idReg1(), id->idReg2(), emitGetInsSC(id), attr);
+ break;
+
+ case IF_T2_H0:
+ emitDispReg(id->idReg1(), attr, true);
+ emitDispAddrPUW(id->idReg2(), emitGetInsSC(id), id->idInsOpt(), attr);
+ break;
+
+ case IF_T1_H: // Reg, Reg, Reg
+ emitDispReg(id->idReg1(), attr, true);
+ if (emitInsIsLoadOrStore(ins))
+ {
+ emitDispAddrRR(id->idReg2(), id->idReg3(), attr);
+ }
+ else
+ {
+ emitDispReg(id->idReg2(), attr, true);
+ emitDispReg(id->idReg3(), attr, false);
+ }
+ break;
+
+ case IF_T2_C4:
+ case IF_T2_C5:
+ emitDispReg(id->idReg1(), attr, true);
+ emitDispReg(id->idReg2(), attr, true);
+ emitDispReg(id->idReg3(), attr, false);
+ break;
+
+ case IF_T2_VFP3:
+ emitDispReg(id->idReg1(), attr, true);
+ emitDispReg(id->idReg2(), attr, true);
+ emitDispReg(id->idReg3(), attr, false);
+ break;
+
+ case IF_T2_VFP2:
+ switch (id->idIns())
+ {
+ case INS_vcvt_d2i:
+ case INS_vcvt_d2u:
+ case INS_vcvt_d2f:
+ emitDispReg(id->idReg1(), EA_4BYTE, true);
+ emitDispReg(id->idReg2(), EA_8BYTE, false);
+ break;
+
+ case INS_vcvt_i2d:
+ case INS_vcvt_u2d:
+ case INS_vcvt_f2d:
+ emitDispReg(id->idReg1(), EA_8BYTE, true);
+ emitDispReg(id->idReg2(), EA_4BYTE, false);
+ break;
+
+ // we just use the type on the instruction
+ // unless it is an asymmetrical one like the converts
+ default:
+ emitDispReg(id->idReg1(), attr, true);
+ emitDispReg(id->idReg2(), attr, false);
+ break;
+ }
+ break;
+
+ case IF_T2_VLDST:
+ imm = emitGetInsSC(id);
+ switch (id->idIns())
+ {
+ case INS_vldr:
+ case INS_vstr:
+ emitDispReg(id->idReg1(), attr, true);
+ emitDispAddrPUW(id->idReg2(), imm, id->idInsOpt(), attr);
+ break;
+
+ case INS_vldm:
+ case INS_vstm:
+ emitDispReg(id->idReg2(), attr, false);
+ if (insOptAnyInc(id->idInsOpt()))
+ printf("!");
+ printf(", ");
+ emitDispRegRange(id->idReg1(), abs(imm) >> 2, attr);
+ break;
+
+ case INS_vpush:
+ case INS_vpop:
+ emitDispRegRange(id->idReg1(), abs(imm) >> 2, attr);
+ break;
+
+ default:
+ unreached();
+ }
+ break;
+
+ case IF_T2_VMOVD:
+ switch (id->idIns())
+ {
+ case INS_vmov_i2d:
+ emitDispReg(id->idReg1(), attr, true); // EA_8BYTE
+ emitDispReg(id->idReg2(), EA_4BYTE, true);
+ emitDispReg(id->idReg3(), EA_4BYTE, false);
+ break;
+ case INS_vmov_d2i:
+ emitDispReg(id->idReg1(), EA_4BYTE, true);
+ emitDispReg(id->idReg2(), EA_4BYTE, true);
+ emitDispReg(id->idReg3(), attr, false); // EA_8BYTE
+ break;
+ default:
+ unreached();
+ }
+ break;
+
+ case IF_T2_VMOVS:
+ emitDispReg(id->idReg1(), attr, true);
+ emitDispReg(id->idReg2(), attr, false);
+ break;
+
+ case IF_T2_G1:
+ emitDispReg(id->idReg1(), attr, true);
+ emitDispAddrRR(id->idReg2(), id->idReg3(), attr);
+ break;
+
+ case IF_T2_D0: // Reg, Reg, Imm, Imm
+ emitDispReg(id->idReg1(), attr, true);
+ emitDispReg(id->idReg2(), attr, true);
+ imm = emitGetInsSC(id);
+ if (ins == INS_bfi)
+ {
+ int lsb = (imm >> 5) & 0x1f;
+ int msb = imm & 0x1f;
+ int imm1 = lsb;
+ int imm2 = msb + 1 - lsb;
+ emitDispImm(imm1, true);
+ emitDispImm(imm2, false);
+ }
+ else
+ {
+ int lsb = (imm >> 5) & 0x1f;
+ int widthm1 = imm & 0x1f;
+ int imm1 = lsb;
+ int imm2 = widthm1 + 1;
+ emitDispImm(imm1, true);
+ emitDispImm(imm2, false);
+ }
+ break;
+
+ case IF_T2_C0: // Reg, Reg, Reg, Imm
+ emitDispReg(id->idReg1(), attr, true);
+ emitDispReg(id->idReg2(), attr, true);
+ emitDispReg(id->idReg3(), attr, false);
+ imm = emitGetInsSC(id);
+ if (id->idInsOpt() == INS_OPTS_RRX)
+ {
+ emitDispShiftOpts(id->idInsOpt());
+ assert(imm == 1);
+ }
+ else if (imm > 0)
+ {
+ emitDispShiftOpts(id->idInsOpt());
+ emitDispImm(imm, false);
+ }
+ break;
+
+ case IF_T2_E0:
+ emitDispReg(id->idReg1(), attr, true);
+ if (id->idIsLclVar())
+ {
+ emitDispAddrRRI(id->idReg2(), codeGen->rsGetRsvdReg(), 0, attr);
+ }
+ else
+ {
+ emitDispAddrRRI(id->idReg2(), id->idReg3(), emitGetInsSC(id), attr);
+ }
+ break;
+
+ case IF_T2_G0:
+ emitDispReg(id->idReg1(), attr, true);
+ emitDispReg(id->idReg2(), attr, true);
+ emitDispAddrPUW(id->idReg3(), emitGetInsSC(id), id->idInsOpt(), attr);
+ break;
+
+ case IF_T2_F1: // Reg, Reg, Reg, Reg
+ case IF_T2_F2:
+ emitDispReg(id->idReg1(), attr, true);
+ emitDispReg(id->idReg2(), attr, true);
+ emitDispReg(id->idReg3(), attr, true);
+ emitDispReg(id->idReg4(), attr, false);
+ break;
+
+ case IF_T1_J3:
+ case IF_T2_M1: // Load Label
+ emitDispReg(id->idReg1(), attr, true);
+ if (id->idIsBound())
+ printf("G_M%03u_IG%02u", Compiler::s_compMethodsCount, id->idAddr()->iiaIGlabel->igNum);
+ else
+ printf("L_M%03u_BB%02u", Compiler::s_compMethodsCount, id->idAddr()->iiaBBlabel->bbNum);
+ break;
+
+ case IF_T1_I: // Special Compare-and-branch
+ emitDispReg(id->idReg1(), attr, true);
+ __fallthrough;
+
+ case IF_T1_K: // Special Branch, conditional
+ case IF_T1_M:
+ assert(((instrDescJmp*)id)->idjShort);
+ printf("SHORT ");
+ __fallthrough;
+
+ case IF_T2_N1:
+ if (fmt == IF_T2_N1)
+ {
+ emitDispReg(id->idReg1(), attr, true);
+ printf("%s ADDRESS ", (id->idIns() == INS_movw) ? "LOW" : "HIGH");
+ }
+ __fallthrough;
+
+ case IF_T2_J1:
+ case IF_T2_J2:
+ case IF_LARGEJMP:
+ {
+ if (id->idAddr()->iiaHasInstrCount())
+ {
+ int instrCount = id->idAddr()->iiaGetInstrCount();
+
+ if (ig == NULL)
+ {
+ printf("pc%s%d instructions", (instrCount >= 0) ? "+" : "", instrCount);
+ }
+ else
+ {
+ unsigned insNum = emitFindInsNum(ig, id);
+ UNATIVE_OFFSET srcOffs = ig->igOffs + emitFindOffset(ig, insNum + 1);
+ UNATIVE_OFFSET dstOffs = ig->igOffs + emitFindOffset(ig, insNum + 1 + instrCount);
+ ssize_t relOffs = (ssize_t)(emitOffsetToPtr(dstOffs) - emitOffsetToPtr(srcOffs));
+ printf("pc%s%d (%d instructions)", (relOffs >= 0) ? "+" : "", relOffs, instrCount);
+ }
+ }
+ else if (id->idIsBound())
+ printf("G_M%03u_IG%02u", Compiler::s_compMethodsCount, id->idAddr()->iiaIGlabel->igNum);
+ else
+ printf("L_M%03u_BB%02u", Compiler::s_compMethodsCount, id->idAddr()->iiaBBlabel->bbNum);
+ }
+ break;
+
+ case IF_T2_J3:
+ if (id->idIsCallAddr())
+ {
+ offs = (ssize_t)id->idAddr()->iiaAddr;
+ methodName = "";
+ }
+ else
+ {
+ offs = 0;
+ methodName = emitComp->eeGetMethodFullName((CORINFO_METHOD_HANDLE)id->idDebugOnlyInfo()->idMemCookie);
+ }
+
+ if (offs)
+ {
+ if (id->idIsDspReloc())
+ printf("reloc ");
+ printf("%08X", offs);
+ }
+ else
+ {
+ printf("%s", methodName);
+ }
+
+ break;
+
+ default:
+ printf("unexpected format %s", emitIfName(id->idInsFmt()));
+ assert(!"unexpectedFormat");
+ break;
+ }
+
+ if (id->idDebugOnlyInfo()->idVarRefOffs)
+ {
+ printf("\t// ");
+ emitDispFrameRef(id->idAddr()->iiaLclVar.lvaVarNum(), id->idAddr()->iiaLclVar.lvaOffset(),
+ id->idDebugOnlyInfo()->idVarRefOffs, asmfm);
+ }
+
+ printf("\n");
+}
+
+void emitter::emitDispIns(
+ instrDesc* id, bool isNew, bool doffs, bool asmfm, unsigned offset, BYTE* code, size_t sz, insGroup* ig)
+{
+ insFormat fmt = id->idInsFmt();
+
+ /* Special-case IF_LARGEJMP */
+
+ if ((fmt == IF_LARGEJMP) && id->idIsBound())
+ {
+ // This is a pseudo-instruction format representing a large conditional branch. See the comment
+ // in emitter::emitOutputLJ() for the full description.
+ //
+ // For this pseudo-instruction, we will actually generate:
+ //
+ // b<!cond> L_not // 2 bytes. Note that we reverse the condition.
+ // b L_target // 4 bytes
+ // L_not:
+ //
+ // These instructions don't exist in the actual instruction stream, so we need to fake them
+ // up to display them.
+ //
+ // Note: don't touch the actual instrDesc. If we accidentally messed it up, it would create a very
+ // difficult to find bug.
+
+ instrDescJmp idJmp;
+ instrDescJmp* pidJmp = &idJmp;
+
+ memset(&idJmp, 0, sizeof(idJmp));
+
+ pidJmp->idIns(emitJumpKindToIns(emitReverseJumpKind(emitInsToJumpKind(id->idIns())))); // reverse the
+ // conditional
+ // instruction
+ pidJmp->idInsFmt(IF_T1_K);
+ pidJmp->idInsSize(emitInsSize(IF_T1_K));
+ pidJmp->idjShort = 1;
+ pidJmp->idAddr()->iiaSetInstrCount(1);
+ pidJmp->idDebugOnlyInfo(id->idDebugOnlyInfo()); // share the idDebugOnlyInfo() field
+
+ size_t bcondSizeOrZero = (code == NULL) ? 0 : 2; // branch is 2 bytes
+ emitDispInsHelp(pidJmp, false, doffs, asmfm, offset, code, bcondSizeOrZero,
+ NULL /* force display of pc-relative branch */);
+
+ code += bcondSizeOrZero;
+ offset += 2;
+
+ // Next, display the unconditional branch
+
+ // Reset the local instrDesc
+ memset(&idJmp, 0, sizeof(idJmp));
+
+ pidJmp->idIns(INS_b);
+ pidJmp->idInsFmt(IF_T2_J2);
+ pidJmp->idInsSize(emitInsSize(IF_T2_J2));
+ pidJmp->idjShort = 0;
+ if (id->idIsBound())
+ {
+ pidJmp->idSetIsBound();
+ pidJmp->idAddr()->iiaIGlabel = id->idAddr()->iiaIGlabel;
+ }
+ else
+ {
+ pidJmp->idAddr()->iiaBBlabel = id->idAddr()->iiaBBlabel;
+ }
+ pidJmp->idDebugOnlyInfo(id->idDebugOnlyInfo()); // share the idDebugOnlyInfo() field
+
+ size_t brSizeOrZero = (code == NULL) ? 0 : 4; // unconditional branch is 4 bytes
+ emitDispInsHelp(pidJmp, isNew, doffs, asmfm, offset, code, brSizeOrZero, ig);
+ }
+ else
+ {
+ emitDispInsHelp(id, isNew, doffs, asmfm, offset, code, sz, ig);
+ }
+}
+
+/*****************************************************************************
+ *
+ * Display a stack frame reference.
+ */
+
+void emitter::emitDispFrameRef(int varx, int disp, int offs, bool asmfm)
+{
+ printf("[");
+
+ if (varx < 0)
+ printf("TEMP_%02u", -varx);
+ else
+ emitComp->gtDispLclVar(+varx, false);
+
+ if (disp < 0)
+ printf("-0x%02x", -disp);
+ else if (disp > 0)
+ printf("+0x%02x", +disp);
+
+ printf("]");
+
+ if (varx >= 0 && emitComp->opts.varNames)
+ {
+ LclVarDsc* varDsc;
+ const char* varName;
+
+ assert((unsigned)varx < emitComp->lvaCount);
+ varDsc = emitComp->lvaTable + varx;
+ varName = emitComp->compLocalVarName(varx, offs);
+
+ if (varName)
+ {
+ printf("'%s", varName);
+
+ if (disp < 0)
+ printf("-%d", -disp);
+ else if (disp > 0)
+ printf("+%d", +disp);
+
+ printf("'");
+ }
+ }
+}
+
+#endif // DEBUG
+
+#ifndef LEGACY_BACKEND
+
+// this is very similar to emitInsBinary and probably could be folded in to same
+// except the requirements on the incoming parameter are different,
+// ex: the memory op in storeind case must NOT be contained
+void emitter::emitInsMov(instruction ins, emitAttr attr, GenTree* node)
+{
+ switch (node->OperGet())
+ {
+ case GT_IND:
+ {
+ GenTree* addr = node->gtGetOp1();
+ assert(!addr->isContained());
+ codeGen->genConsumeReg(addr);
+ emitIns_R_R(ins, attr, node->gtRegNum, addr->gtRegNum);
+ }
+ break;
+
+ case GT_STOREIND:
+ {
+ GenTree* addr = node->gtGetOp1();
+ GenTree* data = node->gtOp.gtOp2;
+
+ assert(!addr->isContained());
+ assert(!data->isContained());
+ codeGen->genConsumeReg(addr);
+ codeGen->genConsumeReg(data);
+
+ if (addr->OperGet() == GT_CLS_VAR_ADDR)
+ {
+ emitIns_C_R(ins, attr, addr->gtClsVar.gtClsVarHnd, data->gtRegNum, 0);
+ }
+ else
+ {
+ emitIns_R_R(ins, attr, addr->gtRegNum, data->gtRegNum);
+ }
+ }
+ break;
+
+ case GT_STORE_LCL_VAR:
+ {
+ GenTreeLclVarCommon* varNode = node->AsLclVarCommon();
+
+ GenTree* data = node->gtOp.gtOp1->gtEffectiveVal();
+ codeGen->inst_set_SV_var(varNode);
+ assert(varNode->gtRegNum == REG_NA); // stack store
+
+ if (data->isContainedIntOrIImmed())
+ {
+ emitIns_S_I(ins, attr, varNode->GetLclNum(), 0, (int)data->AsIntConCommon()->IconValue());
+ codeGen->genUpdateLife(varNode);
+ }
+ else
+ {
+ assert(!data->isContained());
+ codeGen->genConsumeReg(data);
+ emitIns_S_R(ins, attr, data->gtRegNum, varNode->GetLclNum(), 0);
+ codeGen->genUpdateLife(varNode);
+ }
+ }
+ return;
+
+ default:
+ unreached();
+ }
+}
+
+// The callee must call genConsumeReg() for any non-contained srcs
+// and genProduceReg() for any non-contained dsts.
+
+regNumber emitter::emitInsBinary(instruction ins, emitAttr attr, GenTree* dst, GenTree* src)
+{
+ regNumber result = REG_NA;
+
+ // dst can only be a reg
+ assert(!dst->isContained());
+
+ // src can be immed or reg
+ assert(!src->isContained() || src->isContainedIntOrIImmed());
+
+ // find immed (if any) - it cannot be a dst
+ GenTreeIntConCommon* intConst = nullptr;
+ if (src->isContainedIntOrIImmed())
+ {
+ intConst = src->AsIntConCommon();
+ }
+
+ if (intConst)
+ {
+ emitIns_R_I(ins, attr, dst->gtRegNum, intConst->IconValue());
+ return dst->gtRegNum;
+ }
+ else
+ {
+ emitIns_R_R(ins, attr, dst->gtRegNum, src->gtRegNum);
+ return dst->gtRegNum;
+ }
+}
+
+#endif // !LEGACY_BACKEND
+#endif // defined(_TARGET_ARM_)
diff --git a/src/jit/emitarm.h b/src/jit/emitarm.h
new file mode 100644
index 0000000000..1440148f42
--- /dev/null
+++ b/src/jit/emitarm.h
@@ -0,0 +1,414 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+#if defined(_TARGET_ARM_)
+
+/************************************************************************/
+/* Routines that compute the size of / encode instructions */
+/************************************************************************/
+
+struct CnsVal
+{
+ int cnsVal;
+#ifdef RELOC_SUPPORT
+ bool cnsReloc;
+#endif
+};
+
+insSize emitInsSize(insFormat insFmt);
+
+BYTE* emitOutputAM(BYTE* dst, instrDesc* id, size_t code, CnsVal* addc = NULL);
+BYTE* emitOutputSV(BYTE* dst, instrDesc* id, size_t code, CnsVal* addc = NULL);
+BYTE* emitOutputCV(BYTE* dst, instrDesc* id, size_t code, CnsVal* addc = NULL);
+
+BYTE* emitOutputR(BYTE* dst, instrDesc* id);
+BYTE* emitOutputRI(BYTE* dst, instrDesc* id);
+BYTE* emitOutputRR(BYTE* dst, instrDesc* id);
+BYTE* emitOutputIV(BYTE* dst, instrDesc* id);
+#ifdef FEATURE_ITINSTRUCTION
+BYTE* emitOutputIT(BYTE* dst, instruction ins, insFormat fmt, ssize_t condcode);
+#endif // FEATURE_ITINSTRUCTION
+BYTE* emitOutputNOP(BYTE* dst, instruction ins, insFormat fmt);
+
+BYTE* emitOutputLJ(insGroup* ig, BYTE* dst, instrDesc* id);
+BYTE* emitOutputShortBranch(BYTE* dst, instruction ins, insFormat fmt, ssize_t distVal, instrDescJmp* id);
+
+static unsigned emitOutput_Thumb1Instr(BYTE* dst, ssize_t code);
+static unsigned emitOutput_Thumb2Instr(BYTE* dst, ssize_t code);
+
+/************************************************************************/
+/* Debug-only routines to display instructions */
+/************************************************************************/
+
+#ifdef DEBUG
+
+const char* emitFPregName(unsigned reg, bool varName = true);
+
+void emitDispInst(instruction ins, insFlags flags);
+void emitDispReloc(int value, bool addComma);
+void emitDispImm(int imm, bool addComma, bool alwaysHex = false);
+void emitDispCond(int cond);
+void emitDispShiftOpts(insOpts opt);
+void emitDispRegmask(int imm, bool encodedPC_LR);
+void emitDispRegRange(regNumber reg, int len, emitAttr attr);
+void emitDispReg(regNumber reg, emitAttr attr, bool addComma);
+void emitDispFloatReg(regNumber reg, emitAttr attr, bool addComma);
+void emitDispAddrR(regNumber reg, emitAttr attr);
+void emitDispAddrRI(regNumber reg, int imm, emitAttr attr);
+void emitDispAddrRR(regNumber reg1, regNumber reg2, emitAttr attr);
+void emitDispAddrRRI(regNumber reg1, regNumber reg2, int imm, emitAttr attr);
+void emitDispAddrPUW(regNumber reg, int imm, insOpts opt, emitAttr attr);
+void emitDispGC(emitAttr attr);
+
+void emitDispInsHelp(instrDesc* id,
+ bool isNew,
+ bool doffs,
+ bool asmfm,
+ unsigned offs = 0,
+ BYTE* code = 0,
+ size_t sz = 0,
+ insGroup* ig = NULL);
+void emitDispIns(instrDesc* id,
+ bool isNew,
+ bool doffs,
+ bool asmfm,
+ unsigned offs = 0,
+ BYTE* code = 0,
+ size_t sz = 0,
+ insGroup* ig = NULL);
+
+#endif // DEBUG
+
+/************************************************************************/
+/* Private members that deal with target-dependent instr. descriptors */
+/************************************************************************/
+
+private:
+instrDesc* emitNewInstrAmd(emitAttr attr, int dsp);
+instrDesc* emitNewInstrAmdCns(emitAttr attr, int dsp, int cns);
+
+instrDesc* emitNewInstrCallDir(
+ int argCnt, VARSET_VALARG_TP GCvars, regMaskTP gcrefRegs, regMaskTP byrefRegs, emitAttr retSize);
+
+instrDesc* emitNewInstrCallInd(
+ int argCnt, ssize_t disp, VARSET_VALARG_TP GCvars, regMaskTP gcrefRegs, regMaskTP byrefRegs, emitAttr retSize);
+
+void emitGetInsCns(instrDesc* id, CnsVal* cv);
+int emitGetInsAmdCns(instrDesc* id, CnsVal* cv);
+void emitGetInsDcmCns(instrDesc* id, CnsVal* cv);
+int emitGetInsAmdAny(instrDesc* id);
+
+/************************************************************************/
+/* Private helpers for instruction output */
+/************************************************************************/
+
+private:
+bool emitInsIsCompare(instruction ins);
+bool emitInsIsLoad(instruction ins);
+bool emitInsIsStore(instruction ins);
+bool emitInsIsLoadOrStore(instruction ins);
+
+/*****************************************************************************
+*
+* Convert between an index scale in bytes to a smaller encoding used for
+* storage in instruction descriptors.
+*/
+
+inline emitter::opSize emitEncodeScale(size_t scale)
+{
+ assert(scale == 1 || scale == 2 || scale == 4 || scale == 8);
+
+ return emitSizeEncode[scale - 1];
+}
+
+inline emitAttr emitDecodeScale(unsigned ensz)
+{
+ assert(ensz < 4);
+
+ return emitter::emitSizeDecode[ensz];
+}
+
+static bool isModImmConst(int imm);
+
+static int encodeModImmConst(int imm);
+
+static int insUnscaleImm(int imm, emitAttr size);
+
+/************************************************************************/
+/* Public inline informational methods */
+/************************************************************************/
+
+public:
+inline static bool isLowRegister(regNumber reg)
+{
+ return (reg <= REG_R7);
+}
+
+inline static bool isGeneralRegister(regNumber reg)
+{
+ return (reg <= REG_R15);
+}
+
+inline static bool isFloatReg(regNumber reg)
+{
+ return (reg >= REG_F0 && reg <= REG_F31);
+}
+
+inline static bool isDoubleReg(regNumber reg)
+{
+ return isFloatReg(reg) && ((reg % 2) == 0);
+}
+
+inline static bool insSetsFlags(insFlags flags)
+{
+ return (flags != INS_FLAGS_NOT_SET);
+}
+
+inline static bool insDoesNotSetFlags(insFlags flags)
+{
+ return (flags != INS_FLAGS_SET);
+}
+
+inline static insFlags insMustSetFlags(insFlags flags)
+{
+ return (flags == INS_FLAGS_SET) ? INS_FLAGS_SET : INS_FLAGS_NOT_SET;
+}
+
+inline static insFlags insMustNotSetFlags(insFlags flags)
+{
+ return (flags == INS_FLAGS_NOT_SET) ? INS_FLAGS_NOT_SET : INS_FLAGS_SET;
+}
+
+inline static bool insOptsNone(insOpts opt)
+{
+ return (opt == INS_OPTS_NONE);
+}
+
+inline static bool insOptAnyInc(insOpts opt)
+{
+ return (opt == INS_OPTS_LDST_PRE_DEC) || (opt == INS_OPTS_LDST_POST_INC);
+}
+
+inline static bool insOptsPreDec(insOpts opt)
+{
+ return (opt == INS_OPTS_LDST_PRE_DEC);
+}
+
+inline static bool insOptsPostInc(insOpts opt)
+{
+ return (opt == INS_OPTS_LDST_POST_INC);
+}
+
+inline static bool insOptAnyShift(insOpts opt)
+{
+ return ((opt >= INS_OPTS_RRX) && (opt <= INS_OPTS_ROR));
+}
+
+inline static bool insOptsRRX(insOpts opt)
+{
+ return (opt == INS_OPTS_RRX);
+}
+
+inline static bool insOptsLSL(insOpts opt)
+{
+ return (opt == INS_OPTS_LSL);
+}
+
+inline static bool insOptsLSR(insOpts opt)
+{
+ return (opt == INS_OPTS_LSR);
+}
+
+inline static bool insOptsASR(insOpts opt)
+{
+ return (opt == INS_OPTS_ASR);
+}
+
+inline static bool insOptsROR(insOpts opt)
+{
+ return (opt == INS_OPTS_ROR);
+}
+
+/************************************************************************/
+/* The public entry points to output instructions */
+/************************************************************************/
+
+public:
+static bool emitIns_valid_imm_for_alu(int imm);
+static bool emitIns_valid_imm_for_mov(int imm);
+static bool emitIns_valid_imm_for_small_mov(regNumber reg, int imm, insFlags flags);
+static bool emitIns_valid_imm_for_add(int imm, insFlags flags);
+static bool emitIns_valid_imm_for_add_sp(int imm);
+
+void emitIns(instruction ins);
+
+void emitIns_I(instruction ins, emitAttr attr, ssize_t imm);
+
+void emitIns_R(instruction ins, emitAttr attr, regNumber reg);
+
+void emitIns_R_I(instruction ins, emitAttr attr, regNumber reg, ssize_t imm, insFlags flags = INS_FLAGS_DONT_CARE);
+
+void emitIns_R_R(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, insFlags flags = INS_FLAGS_DONT_CARE);
+
+void emitIns_R_I_I(
+ instruction ins, emitAttr attr, regNumber reg1, int imm1, int imm2, insFlags flags = INS_FLAGS_DONT_CARE);
+
+void emitIns_R_R_I(instruction ins,
+ emitAttr attr,
+ regNumber reg1,
+ regNumber reg2,
+ int imm,
+ insFlags flags = INS_FLAGS_DONT_CARE,
+ insOpts opt = INS_OPTS_NONE);
+
+void emitIns_R_R_R(instruction ins,
+ emitAttr attr,
+ regNumber reg1,
+ regNumber reg2,
+ regNumber reg3,
+ insFlags flags = INS_FLAGS_DONT_CARE);
+
+void emitIns_R_R_I_I(instruction ins,
+ emitAttr attr,
+ regNumber reg1,
+ regNumber reg2,
+ int imm1,
+ int imm2,
+ insFlags flags = INS_FLAGS_DONT_CARE);
+
+void emitIns_R_R_R_I(instruction ins,
+ emitAttr attr,
+ regNumber reg1,
+ regNumber reg2,
+ regNumber reg3,
+ int imm,
+ insFlags flags = INS_FLAGS_DONT_CARE,
+ insOpts opt = INS_OPTS_NONE);
+
+void emitIns_R_R_R_R(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, regNumber reg3, regNumber reg4);
+
+void emitIns_C(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fdlHnd, int offs);
+
+void emitIns_S(instruction ins, emitAttr attr, int varx, int offs);
+
+void emitIns_genStackOffset(regNumber r, int varx, int offs);
+
+void emitIns_S_R(instruction ins, emitAttr attr, regNumber ireg, int varx, int offs);
+
+void emitIns_R_S(instruction ins, emitAttr attr, regNumber ireg, int varx, int offs);
+
+void emitIns_S_I(instruction ins, emitAttr attr, int varx, int offs, int val);
+
+void emitIns_R_C(instruction ins, emitAttr attr, regNumber reg, CORINFO_FIELD_HANDLE fldHnd, int offs);
+
+void emitIns_C_R(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fldHnd, regNumber reg, int offs);
+
+void emitIns_C_I(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fdlHnd, ssize_t offs, ssize_t val);
+
+void emitIns_R_L(instruction ins, emitAttr attr, BasicBlock* dst, regNumber reg);
+
+void emitIns_R_D(instruction ins, emitAttr attr, unsigned offs, regNumber reg);
+
+void emitIns_J_R(instruction ins, emitAttr attr, BasicBlock* dst, regNumber reg);
+
+void emitIns_I_AR(
+ instruction ins, emitAttr attr, int val, regNumber reg, int offs, int memCookie = 0, void* clsCookie = NULL);
+
+void emitIns_R_AR(
+ instruction ins, emitAttr attr, regNumber ireg, regNumber reg, int offs, int memCookie = 0, void* clsCookie = NULL);
+
+void emitIns_R_AI(instruction ins, emitAttr attr, regNumber ireg, ssize_t disp);
+
+void emitIns_AR_R(
+ instruction ins, emitAttr attr, regNumber ireg, regNumber reg, int offs, int memCookie = 0, void* clsCookie = NULL);
+
+void emitIns_R_ARR(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, regNumber rg2, int disp);
+
+void emitIns_ARR_R(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, regNumber rg2, int disp);
+
+void emitIns_R_ARX(
+ instruction ins, emitAttr attr, regNumber ireg, regNumber reg, regNumber rg2, unsigned mul, int disp);
+
+enum EmitCallType
+{
+
+ // I have included here, but commented out, all the values used by the x86 emitter.
+ // However, ARM has a much reduced instruction set, and so the ARM emitter only
+ // supports a subset of the x86 variants. By leaving them commented out, it becomes
+ // a compile time error if code tries to use them (and hopefully see this comment
+ // and know why they are unavailible on ARM), while making it easier to stay
+ // in-sync with x86 and possibly add them back in if needed.
+
+ EC_FUNC_TOKEN, // Direct call to a helper/static/nonvirtual/global method
+ // EC_FUNC_TOKEN_INDIR, // Indirect call to a helper/static/nonvirtual/global method
+ EC_FUNC_ADDR, // Direct call to an absolute address
+
+ // EC_FUNC_VIRTUAL, // Call to a virtual method (using the vtable)
+ EC_INDIR_R, // Indirect call via register
+ // EC_INDIR_SR, // Indirect call via stack-reference (local var)
+ // EC_INDIR_C, // Indirect call via static class var
+ // EC_INDIR_ARD, // Indirect call via an addressing mode
+
+ EC_COUNT
+};
+
+void emitIns_Call(EmitCallType callType,
+ CORINFO_METHOD_HANDLE methHnd, // used for pretty printing
+ INDEBUG_LDISASM_COMMA(CORINFO_SIG_INFO* sigInfo) // used to report call sites to the EE
+ void* addr,
+ ssize_t argSize,
+ emitAttr retSize,
+ VARSET_VALARG_TP ptrVars,
+ regMaskTP gcrefRegs,
+ regMaskTP byrefRegs,
+ IL_OFFSETX ilOffset = BAD_IL_OFFSET,
+ regNumber ireg = REG_NA,
+ regNumber xreg = REG_NA,
+ unsigned xmul = 0,
+ int disp = 0,
+ bool isJump = false,
+ bool isNoGC = false,
+ bool isProfLeaveCB = false);
+
+/*****************************************************************************
+ *
+ * Given an instrDesc, return true if it's a conditional jump.
+ */
+
+inline bool emitIsCondJump(instrDesc* jmp)
+{
+ return (jmp->idInsFmt() == IF_T2_J1) || (jmp->idInsFmt() == IF_T1_K) || (jmp->idInsFmt() == IF_LARGEJMP);
+}
+
+/*****************************************************************************
+ *
+ * Given an instrDesc, return true if it's a comapre and jump.
+ */
+
+inline bool emitIsCmpJump(instrDesc* jmp)
+{
+ return (jmp->idInsFmt() == IF_T1_I);
+}
+
+/*****************************************************************************
+ *
+ * Given a instrDesc, return true if it's an unconditional jump.
+ */
+
+inline bool emitIsUncondJump(instrDesc* jmp)
+{
+ return (jmp->idInsFmt() == IF_T2_J2) || (jmp->idInsFmt() == IF_T1_M);
+}
+
+/*****************************************************************************
+ *
+ * Given a instrDesc, return true if it's a load label instruction.
+ */
+
+inline bool emitIsLoadLabel(instrDesc* jmp)
+{
+ return (jmp->idInsFmt() == IF_T2_M1) || (jmp->idInsFmt() == IF_T1_J3) || (jmp->idInsFmt() == IF_T2_N1);
+}
+
+#endif // _TARGET_ARM_
diff --git a/src/jit/emitarm64.cpp b/src/jit/emitarm64.cpp
new file mode 100644
index 0000000000..a632ec12c8
--- /dev/null
+++ b/src/jit/emitarm64.cpp
@@ -0,0 +1,11167 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX emitArm64.cpp XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+#if defined(_TARGET_ARM64_)
+
+/*****************************************************************************/
+/*****************************************************************************/
+
+#include "instr.h"
+#include "emit.h"
+#include "codegen.h"
+
+/* static */ bool emitter::strictArmAsm = true;
+
+/*****************************************************************************/
+
+const instruction emitJumpKindInstructions[] = {
+ INS_nop,
+
+#define JMP_SMALL(en, rev, ins) INS_##ins,
+#include "emitjmps.h"
+};
+
+const emitJumpKind emitReverseJumpKinds[] = {
+ EJ_NONE,
+
+#define JMP_SMALL(en, rev, ins) EJ_##rev,
+#include "emitjmps.h"
+};
+
+/*****************************************************************************
+ * Look up the instruction for a jump kind
+ */
+
+/*static*/ instruction emitter::emitJumpKindToIns(emitJumpKind jumpKind)
+{
+ assert((unsigned)jumpKind < ArrLen(emitJumpKindInstructions));
+ return emitJumpKindInstructions[jumpKind];
+}
+
+/*****************************************************************************
+* Look up the jump kind for an instruction. It better be a conditional
+* branch instruction with a jump kind!
+*/
+
+/*static*/ emitJumpKind emitter::emitInsToJumpKind(instruction ins)
+{
+ for (unsigned i = 0; i < ArrLen(emitJumpKindInstructions); i++)
+ {
+ if (ins == emitJumpKindInstructions[i])
+ {
+ emitJumpKind ret = (emitJumpKind)i;
+ assert(EJ_NONE < ret && ret < EJ_COUNT);
+ return ret;
+ }
+ }
+ unreached();
+}
+
+/*****************************************************************************
+ * Reverse the conditional jump
+ */
+
+/*static*/ emitJumpKind emitter::emitReverseJumpKind(emitJumpKind jumpKind)
+{
+ assert(jumpKind < EJ_COUNT);
+ return emitReverseJumpKinds[jumpKind];
+}
+
+/*****************************************************************************
+ *
+ * Return the allocated size (in bytes) of the given instruction descriptor.
+ */
+
+size_t emitter::emitSizeOfInsDsc(instrDesc* id)
+{
+ assert(!emitIsTinyInsDsc(id));
+
+ if (emitIsScnsInsDsc(id))
+ return SMALL_IDSC_SIZE;
+
+ assert((unsigned)id->idInsFmt() < emitFmtCount);
+
+ ID_OPS idOp = (ID_OPS)emitFmtToOps[id->idInsFmt()];
+ bool isCallIns = (id->idIns() == INS_bl) || (id->idIns() == INS_blr) || (id->idIns() == INS_b_tail) ||
+ (id->idIns() == INS_br_tail);
+ bool maybeCallIns = (id->idIns() == INS_b) || (id->idIns() == INS_br);
+
+ switch (idOp)
+ {
+ case ID_OP_NONE:
+ break;
+
+ case ID_OP_JMP:
+ return sizeof(instrDescJmp);
+
+ case ID_OP_CALL:
+ assert(isCallIns || maybeCallIns);
+ if (id->idIsLargeCall())
+ {
+ /* Must be a "fat" call descriptor */
+ return sizeof(instrDescCGCA);
+ }
+ else
+ {
+ assert(!id->idIsLargeDsp());
+ assert(!id->idIsLargeCns());
+ return sizeof(instrDesc);
+ }
+ break;
+
+ default:
+ NO_WAY("unexpected instruction descriptor format");
+ break;
+ }
+
+ if (id->idIsLargeCns())
+ {
+ if (id->idIsLargeDsp())
+ return sizeof(instrDescCnsDsp);
+ else
+ return sizeof(instrDescCns);
+ }
+ else
+ {
+ if (id->idIsLargeDsp())
+ return sizeof(instrDescDsp);
+ else
+ return sizeof(instrDesc);
+ }
+}
+
+#ifdef DEBUG
+/*****************************************************************************
+ *
+ * The following called for each recorded instruction -- use for debugging.
+ */
+void emitter::emitInsSanityCheck(instrDesc* id)
+{
+ /* What instruction format have we got? */
+
+ switch (id->idInsFmt())
+ {
+ instruction ins;
+ emitAttr elemsize;
+ emitAttr datasize;
+ emitAttr dstsize;
+ emitAttr srcsize;
+ ssize_t imm;
+ unsigned immShift;
+ ssize_t index;
+ ssize_t index2;
+
+ case IF_BI_0A: // BI_0A ......iiiiiiiiii iiiiiiiiiiiiiiii simm26:00
+ break;
+
+ case IF_BI_0B: // BI_0B ......iiiiiiiiii iiiiiiiiiiii.... simm19:00
+ break;
+
+ case IF_LARGEJMP:
+ case IF_LARGEADR:
+ case IF_LARGELDC:
+ break;
+
+ case IF_BI_0C: // BI_0C ......iiiiiiiiii iiiiiiiiiiiiiiii simm26:00
+ break;
+
+ case IF_BI_1A: // BI_1A ......iiiiiiiiii iiiiiiiiiiittttt Rt simm19:00
+ assert(isValidGeneralDatasize(id->idOpSize()));
+ assert(isGeneralRegister(id->idReg1()));
+ break;
+
+ case IF_BI_1B: // BI_1B B.......bbbbbiii iiiiiiiiiiittttt Rt imm6, simm14:00
+ assert(isValidGeneralDatasize(id->idOpSize()));
+ assert(isGeneralRegister(id->idReg1()));
+ assert(isValidImmShift(emitGetInsSC(id), id->idOpSize()));
+ break;
+
+ case IF_BR_1A: // BR_1A ................ ......nnnnn..... Rn
+ assert(isGeneralRegister(id->idReg1()));
+ break;
+
+ case IF_BR_1B: // BR_1B ................ ......nnnnn..... Rn
+ assert(isGeneralRegister(id->idReg3()));
+ break;
+
+ case IF_LS_1A: // LS_1A .X......iiiiiiii iiiiiiiiiiittttt Rt PC imm(1MB)
+ assert(isGeneralRegister(id->idReg1()) || isVectorRegister(id->idReg1()));
+ assert(insOptsNone(id->idInsOpt()));
+ break;
+
+ case IF_LS_2A: // LS_2A .X.......X...... ......nnnnnttttt Rt Rn
+ assert(isIntegerRegister(id->idReg1()) || // ZR
+ isVectorRegister(id->idReg1()));
+ assert(isIntegerRegister(id->idReg2())); // SP
+ assert(emitGetInsSC(id) == 0);
+ assert(insOptsNone(id->idInsOpt()));
+ break;
+
+ case IF_LS_2B: // LS_2B .X.......Xiiiiii iiiiiinnnnnttttt Rt Rn imm(0-4095)
+ assert(isIntegerRegister(id->idReg1()) || // ZR
+ isVectorRegister(id->idReg1()));
+ assert(isIntegerRegister(id->idReg2())); // SP
+ assert(isValidUimm12(emitGetInsSC(id)));
+ assert(insOptsNone(id->idInsOpt()));
+ break;
+
+ case IF_LS_2C: // LS_2C .X.......X.iiiii iiiiPPnnnnnttttt Rt Rn imm(-256..+255) no/pre/post inc
+ assert(isIntegerRegister(id->idReg1()) || // ZR
+ isVectorRegister(id->idReg1()));
+ assert(isIntegerRegister(id->idReg2())); // SP
+ assert(emitGetInsSC(id) >= -0x100);
+ assert(emitGetInsSC(id) < 0x100);
+ assert(insOptsNone(id->idInsOpt()) || insOptsIndexed(id->idInsOpt()));
+ break;
+
+ case IF_LS_3A: // LS_3A .X.......X.mmmmm oooS..nnnnnttttt Rt Rn Rm ext(Rm) LSL {}
+ assert(isIntegerRegister(id->idReg1()) || // ZR
+ isVectorRegister(id->idReg1()));
+ assert(isIntegerRegister(id->idReg2())); // SP
+ if (id->idIsLclVar())
+ {
+ assert(isGeneralRegister(codeGen->rsGetRsvdReg()));
+ }
+ else
+ {
+ assert(isGeneralRegister(id->idReg3()));
+ }
+ assert(insOptsLSExtend(id->idInsOpt()));
+ break;
+
+ case IF_LS_3B: // LS_3B X............... .aaaaannnnnttttt Rt Ra Rn
+ assert((isValidGeneralDatasize(id->idOpSize()) && isIntegerRegister(id->idReg1())) ||
+ (isValidVectorLSPDatasize(id->idOpSize()) && isVectorRegister(id->idReg1())));
+ assert(isIntegerRegister(id->idReg1()) || // ZR
+ isVectorRegister(id->idReg1()));
+ assert(isIntegerRegister(id->idReg2()) || // ZR
+ isVectorRegister(id->idReg2()));
+ assert(isIntegerRegister(id->idReg3())); // SP
+ assert(emitGetInsSC(id) == 0);
+ assert(insOptsNone(id->idInsOpt()));
+ break;
+
+ case IF_LS_3C: // LS_3C X.........iiiiii iaaaaannnnnttttt Rt Ra Rn imm(im7,sh)
+ assert((isValidGeneralDatasize(id->idOpSize()) && isIntegerRegister(id->idReg1())) ||
+ (isValidVectorLSPDatasize(id->idOpSize()) && isVectorRegister(id->idReg1())));
+ assert(isIntegerRegister(id->idReg1()) || // ZR
+ isVectorRegister(id->idReg1()));
+ assert(isIntegerRegister(id->idReg2()) || // ZR
+ isVectorRegister(id->idReg2()));
+ assert(isIntegerRegister(id->idReg3())); // SP
+ assert(emitGetInsSC(id) >= -0x40);
+ assert(emitGetInsSC(id) < 0x40);
+ assert(insOptsNone(id->idInsOpt()) || insOptsIndexed(id->idInsOpt()));
+ break;
+
+ case IF_DI_1A: // DI_1A X.......shiiiiii iiiiiinnnnn..... Rn imm(i12,sh)
+ assert(isValidGeneralDatasize(id->idOpSize()));
+ assert(isGeneralRegister(id->idReg1()));
+ assert(isValidUimm12(emitGetInsSC(id)));
+ assert(insOptsNone(id->idInsOpt()) || insOptsLSL12(id->idInsOpt()));
+ break;
+
+ case IF_DI_1B: // DI_1B X........hwiiiii iiiiiiiiiiiddddd Rd imm(i16,hw)
+ assert(isValidGeneralDatasize(id->idOpSize()));
+ assert(isGeneralRegister(id->idReg1()));
+ assert(isValidImmHWVal(emitGetInsSC(id), id->idOpSize()));
+ break;
+
+ case IF_DI_1C: // DI_1C X........Nrrrrrr ssssssnnnnn..... Rn imm(N,r,s)
+ assert(isValidGeneralDatasize(id->idOpSize()));
+ assert(isGeneralRegister(id->idReg1()));
+ assert(isValidImmNRS(emitGetInsSC(id), id->idOpSize()));
+ break;
+
+ case IF_DI_1D: // DI_1D X........Nrrrrrr ssssss.....ddddd Rd imm(N,r,s)
+ assert(isValidGeneralDatasize(id->idOpSize()));
+ assert(isIntegerRegister(id->idReg1())); // SP
+ assert(isValidImmNRS(emitGetInsSC(id), id->idOpSize()));
+ break;
+
+ case IF_DI_1E: // DI_1E .ii.....iiiiiiii iiiiiiiiiiiddddd Rd simm21
+ assert(isGeneralRegister(id->idReg1()));
+ break;
+
+ case IF_DI_1F: // DI_1F X..........iiiii cccc..nnnnn.nzcv Rn imm5 nzcv cond
+ assert(isValidGeneralDatasize(id->idOpSize()));
+ assert(isGeneralRegister(id->idReg1()));
+ assert(isValidImmCondFlagsImm5(emitGetInsSC(id)));
+ break;
+
+ case IF_DI_2A: // DI_2A X.......shiiiiii iiiiiinnnnnddddd Rd Rn imm(i12,sh)
+ assert(isValidGeneralDatasize(id->idOpSize()));
+ assert(isIntegerRegister(id->idReg1())); // SP
+ assert(isIntegerRegister(id->idReg2())); // SP
+ assert(isValidUimm12(emitGetInsSC(id)));
+ assert(insOptsNone(id->idInsOpt()) || insOptsLSL12(id->idInsOpt()));
+ break;
+
+ case IF_DI_2B: // DI_2B X.........Xnnnnn ssssssnnnnnddddd Rd Rn imm(0-63)
+ assert(isValidGeneralDatasize(id->idOpSize()));
+ assert(isGeneralRegister(id->idReg1()));
+ assert(isGeneralRegister(id->idReg2()));
+ assert(isValidImmShift(emitGetInsSC(id), id->idOpSize()));
+ break;
+
+ case IF_DI_2C: // DI_2C X........Nrrrrrr ssssssnnnnnddddd Rd Rn imm(N,r,s)
+ assert(isValidGeneralDatasize(id->idOpSize()));
+ assert(isIntegerRegister(id->idReg1())); // SP
+ assert(isGeneralRegister(id->idReg2()));
+ assert(isValidImmNRS(emitGetInsSC(id), id->idOpSize()));
+ break;
+
+ case IF_DI_2D: // DI_2D X........Nrrrrrr ssssssnnnnnddddd Rd Rn imr, imms (N,r,s)
+ assert(isValidGeneralDatasize(id->idOpSize()));
+ assert(isGeneralRegister(id->idReg1()));
+ assert(isGeneralRegister(id->idReg2()));
+ assert(isValidImmNRS(emitGetInsSC(id), id->idOpSize()));
+ break;
+
+ case IF_DR_1D: // DR_1D X............... cccc.......ddddd Rd cond
+ assert(isValidGeneralDatasize(id->idOpSize()));
+ assert(isGeneralRegister(id->idReg1()));
+ assert(isValidImmCond(emitGetInsSC(id)));
+ break;
+
+ case IF_DR_2A: // DR_2A X..........mmmmm ......nnnnn..... Rn Rm
+ assert(isValidGeneralDatasize(id->idOpSize()));
+ assert(isGeneralRegister(id->idReg1()));
+ assert(isGeneralRegister(id->idReg2()));
+ break;
+
+ case IF_DR_2B: // DR_2B X.......sh.mmmmm ssssssnnnnn..... Rn Rm {LSL,LSR,ASR,ROR} imm(0-63)
+ assert(isValidGeneralDatasize(id->idOpSize()));
+ assert(isIntegerRegister(id->idReg1())); // ZR
+ assert(isGeneralRegister(id->idReg2()));
+ assert(isValidImmShift(emitGetInsSC(id), id->idOpSize()));
+ if (!insOptsNone(id->idInsOpt()))
+ {
+ if (id->idIns() == INS_tst) // tst allows ROR, cmp/cmn don't
+ {
+ assert(insOptsAnyShift(id->idInsOpt()));
+ }
+ else
+ {
+ assert(insOptsAluShift(id->idInsOpt()));
+ }
+ }
+ assert(insOptsNone(id->idInsOpt()) || (emitGetInsSC(id) > 0));
+ break;
+
+ case IF_DR_2C: // DR_2C X..........mmmmm ooosssnnnnn..... Rn Rm ext(Rm) LSL imm(0-4)
+ assert(isValidGeneralDatasize(id->idOpSize()));
+ assert(isIntegerRegister(id->idReg1())); // SP
+ assert(isGeneralRegister(id->idReg2()));
+ assert(insOptsNone(id->idInsOpt()) || insOptsLSL(id->idInsOpt()) || insOptsAnyExtend(id->idInsOpt()));
+ assert(emitGetInsSC(id) >= 0);
+ assert(emitGetInsSC(id) <= 4);
+ if (insOptsLSL(id->idInsOpt()))
+ {
+ assert(emitGetInsSC(id) > 0);
+ }
+ break;
+
+ case IF_DR_2D: // DR_2D X..........nnnnn cccc..nnnnnmmmmm Rd Rn cond
+ assert(isValidGeneralDatasize(id->idOpSize()));
+ assert(isGeneralRegister(id->idReg1()));
+ assert(isGeneralRegister(id->idReg2()));
+ assert(isValidImmCond(emitGetInsSC(id)));
+ break;
+
+ case IF_DR_2E: // DR_2E X..........mmmmm ...........ddddd Rd Rm
+ assert(isValidGeneralDatasize(id->idOpSize()));
+ assert(isGeneralRegister(id->idReg1()));
+ assert(isIntegerRegister(id->idReg2())); // ZR
+ break;
+
+ case IF_DR_2F: // DR_2F X.......sh.mmmmm ssssss.....ddddd Rd Rm {LSL,LSR,ASR} imm(0-63)
+ assert(isValidGeneralDatasize(id->idOpSize()));
+ assert(isGeneralRegister(id->idReg1()));
+ assert(isGeneralRegister(id->idReg2()));
+ assert(isValidImmShift(emitGetInsSC(id), id->idOpSize()));
+ assert(insOptsNone(id->idInsOpt()) || insOptsAluShift(id->idInsOpt()));
+ assert(insOptsNone(id->idInsOpt()) || (emitGetInsSC(id) > 0));
+ break;
+
+ case IF_DR_2G: // DR_2G X............... ......nnnnnddddd Rd Rm
+ assert(isValidGeneralDatasize(id->idOpSize()));
+ assert(isIntegerRegister(id->idReg1())); // SP
+ assert(isIntegerRegister(id->idReg2())); // SP
+ break;
+
+ case IF_DR_2H: // DR_2H X........X...... ......nnnnnddddd Rd Rn
+ assert(isValidGeneralDatasize(id->idOpSize()));
+ assert(isGeneralRegister(id->idReg1()));
+ assert(isGeneralRegister(id->idReg2()));
+ break;
+
+ case IF_DR_2I: // DR_2I X..........mmmmm cccc..nnnnn.nzcv Rn Rm nzcv cond
+ assert(isValidGeneralDatasize(id->idOpSize()));
+ assert(isGeneralRegister(id->idReg1()));
+ assert(isGeneralRegister(id->idReg2()));
+ assert(isValidImmCondFlags(emitGetInsSC(id)));
+ break;
+
+ case IF_DR_3A: // DR_3A X..........mmmmm ......nnnnnmmmmm Rd Rn Rm
+ assert(isValidGeneralDatasize(id->idOpSize()));
+ assert(isIntegerRegister(id->idReg1())); // SP
+ assert(isIntegerRegister(id->idReg2())); // SP
+ if (id->idIsLclVar())
+ {
+ assert(isGeneralRegister(codeGen->rsGetRsvdReg()));
+ }
+ else
+ {
+ assert(isGeneralRegister(id->idReg3()));
+ }
+ assert(insOptsNone(id->idInsOpt()));
+ break;
+
+ case IF_DR_3B: // DR_3B X.......sh.mmmmm ssssssnnnnnddddd Rd Rn Rm {LSL,LSR,ASR,ROR} imm(0-63)
+ assert(isValidGeneralDatasize(id->idOpSize()));
+ assert(isGeneralRegister(id->idReg1()));
+ assert(isGeneralRegister(id->idReg2()));
+ assert(isGeneralRegister(id->idReg3()));
+ assert(isValidImmShift(emitGetInsSC(id), id->idOpSize()));
+ assert(insOptsNone(id->idInsOpt()) || insOptsAnyShift(id->idInsOpt()));
+ assert(insOptsNone(id->idInsOpt()) || (emitGetInsSC(id) > 0));
+ break;
+
+ case IF_DR_3C: // DR_3C X..........mmmmm ooosssnnnnnddddd Rd Rn Rm ext(Rm) LSL imm(0-4)
+ assert(isValidGeneralDatasize(id->idOpSize()));
+ assert(isIntegerRegister(id->idReg1())); // SP
+ assert(isIntegerRegister(id->idReg2())); // SP
+ assert(isGeneralRegister(id->idReg3()));
+ assert(insOptsNone(id->idInsOpt()) || insOptsLSL(id->idInsOpt()) || insOptsAnyExtend(id->idInsOpt()));
+ assert(emitGetInsSC(id) >= 0);
+ assert(emitGetInsSC(id) <= 4);
+ if (insOptsLSL(id->idInsOpt()))
+ {
+ assert((emitGetInsSC(id) > 0) ||
+ (id->idReg2() == REG_ZR)); // REG_ZR encodes SP and we allow a shift of zero
+ }
+ break;
+
+ case IF_DR_3D: // DR_3D X..........mmmmm cccc..nnnnnmmmmm Rd Rn Rm cond
+ assert(isValidGeneralDatasize(id->idOpSize()));
+ assert(isGeneralRegister(id->idReg1()));
+ assert(isGeneralRegister(id->idReg2()));
+ assert(isGeneralRegister(id->idReg3()));
+ assert(isValidImmCond(emitGetInsSC(id)));
+ break;
+
+ case IF_DR_3E: // DR_3E X........X.mmmmm ssssssnnnnnddddd Rd Rn Rm imm(0-63)
+ assert(isValidGeneralDatasize(id->idOpSize()));
+ assert(isGeneralRegister(id->idReg1()));
+ assert(isGeneralRegister(id->idReg2()));
+ assert(isGeneralRegister(id->idReg3()));
+ assert(isValidImmShift(emitGetInsSC(id), id->idOpSize()));
+ assert(insOptsNone(id->idInsOpt()));
+ break;
+
+ case IF_DR_4A: // DR_4A X..........mmmmm .aaaaannnnnddddd Rd Rn Rm Ra
+ assert(isValidGeneralDatasize(id->idOpSize()));
+ assert(isGeneralRegister(id->idReg1()));
+ assert(isGeneralRegister(id->idReg2()));
+ assert(isGeneralRegister(id->idReg3()));
+ assert(isGeneralRegister(id->idReg4()));
+ break;
+
+ case IF_DV_1A: // DV_1A .........X.iiiii iii........ddddd Vd imm8 (fmov - immediate scalar)
+ assert(insOptsNone(id->idInsOpt()));
+ elemsize = id->idOpSize();
+ assert(isValidVectorElemsizeFloat(elemsize));
+ assert(isVectorRegister(id->idReg1()));
+ assert(isValidUimm8(emitGetInsSC(id)));
+ break;
+
+ case IF_DV_1B: // DV_1B .QX..........iii cmod..iiiiiddddd Vd imm8 (immediate vector)
+ ins = id->idIns();
+ imm = emitGetInsSC(id) & 0x0ff;
+ immShift = (emitGetInsSC(id) & 0x700) >> 8;
+ assert(immShift >= 0);
+ datasize = id->idOpSize();
+ assert(isValidVectorDatasize(datasize));
+ assert(isValidArrangement(datasize, id->idInsOpt()));
+ elemsize = optGetElemsize(id->idInsOpt());
+ if (ins == INS_fmov)
+ {
+ assert(isValidVectorElemsizeFloat(elemsize));
+ assert(id->idInsOpt() != INS_OPTS_1D); // Reserved encoding
+ assert(immShift == 0);
+ }
+ else
+ {
+ assert(isValidVectorElemsize(elemsize));
+ assert((immShift != 4) && (immShift != 7)); // always invalid values
+ if (ins != INS_movi) // INS_mvni, INS_orr, INS_bic
+ {
+ assert((elemsize != EA_1BYTE) && (elemsize != EA_8BYTE)); // only H or S
+ if (elemsize == EA_2BYTE)
+ {
+ assert(immShift < 2);
+ }
+ else // (elemsize == EA_4BYTE)
+ {
+ if (ins != INS_mvni)
+ {
+ assert(immShift < 4);
+ }
+ }
+ }
+ }
+ assert(isVectorRegister(id->idReg1()));
+ assert(isValidUimm8(imm));
+ break;
+
+ case IF_DV_1C: // DV_1C .........X...... ......nnnnn..... Vn #0.0 (fcmp - with zero)
+ assert(insOptsNone(id->idInsOpt()));
+ elemsize = id->idOpSize();
+ assert(isValidVectorElemsizeFloat(elemsize));
+ assert(isVectorRegister(id->idReg1()));
+ break;
+
+ case IF_DV_2A: // DV_2A .Q.......X...... ......nnnnnddddd Vd Vn (fabs, fcvt - vector)
+ case IF_DV_2M: // DV_2M .Q......XX...... ......nnnnnddddd Vd Vn (abs, neg - vector)
+ assert(isValidVectorDatasize(id->idOpSize()));
+ assert(isValidArrangement(id->idOpSize(), id->idInsOpt()));
+ assert(isVectorRegister(id->idReg1()));
+ assert(isVectorRegister(id->idReg2()));
+ break;
+
+ case IF_DV_2N: // DV_2N .........iiiiiii ......nnnnnddddd Vd Vn imm (shift - scalar)
+ assert(id->idOpSize() == EA_8BYTE);
+ assert(insOptsNone(id->idInsOpt()));
+ assert(isVectorRegister(id->idReg1()));
+ assert(isVectorRegister(id->idReg2()));
+ assert(isValidImmShift(emitGetInsSC(id), EA_8BYTE));
+ break;
+
+ case IF_DV_2O: // DV_2O .Q.......iiiiiii ......nnnnnddddd Vd Vn imm (shift - vector)
+ assert(isValidVectorDatasize(id->idOpSize()));
+ assert(isValidArrangement(id->idOpSize(), id->idInsOpt()));
+ assert(isVectorRegister(id->idReg1()));
+ assert(isVectorRegister(id->idReg2()));
+ elemsize = optGetElemsize(id->idInsOpt());
+ assert(isValidImmShift(emitGetInsSC(id), elemsize));
+ break;
+
+ case IF_DV_2B: // DV_2B .Q.........iiiii ......nnnnnddddd Rd Vn[] (umov/smov - to general)
+ elemsize = id->idOpSize();
+ index = emitGetInsSC(id);
+ assert(insOptsNone(id->idInsOpt()));
+ assert(isValidVectorIndex(EA_16BYTE, elemsize, index));
+ assert(isValidVectorElemsize(elemsize));
+ assert(isGeneralRegister(id->idReg1()));
+ assert(isVectorRegister(id->idReg2()));
+ break;
+
+ case IF_DV_2C: // DV_2C .Q.........iiiii ......nnnnnddddd Vd Rn (dup/ins - vector from general)
+ if (id->idIns() == INS_dup)
+ {
+ datasize = id->idOpSize();
+ assert(isValidVectorDatasize(datasize));
+ assert(isValidArrangement(datasize, id->idInsOpt()));
+ elemsize = optGetElemsize(id->idInsOpt());
+ }
+ else // INS_ins
+ {
+ datasize = EA_16BYTE;
+ elemsize = id->idOpSize();
+ assert(isValidVectorElemsize(elemsize));
+ }
+ assert(isVectorRegister(id->idReg1()));
+ assert(isGeneralRegisterOrZR(id->idReg2()));
+ break;
+
+ case IF_DV_2D: // DV_2D .Q.........iiiii ......nnnnnddddd Vd Vn[] (dup - vector)
+ datasize = id->idOpSize();
+ assert(isValidVectorDatasize(datasize));
+ assert(isValidArrangement(datasize, id->idInsOpt()));
+ elemsize = optGetElemsize(id->idInsOpt());
+ index = emitGetInsSC(id);
+ assert(isValidVectorIndex(datasize, elemsize, index));
+ assert(isVectorRegister(id->idReg1()));
+ assert(isVectorRegister(id->idReg2()));
+ break;
+
+ case IF_DV_2E: // DV_2E ...........iiiii ......nnnnnddddd Vd Vn[] (dup - scalar)
+ elemsize = id->idOpSize();
+ index = emitGetInsSC(id);
+ assert(isValidVectorIndex(EA_16BYTE, elemsize, index));
+ assert(isValidVectorElemsize(elemsize));
+ assert(isVectorRegister(id->idReg1()));
+ assert(isVectorRegister(id->idReg2()));
+ break;
+
+ case IF_DV_2F: // DV_2F ...........iiiii .jjjj.nnnnnddddd Vd[] Vn[] (ins - element)
+ imm = emitGetInsSC(id);
+ index = (imm >> 4) & 0xf;
+ index2 = imm & 0xf;
+ elemsize = id->idOpSize();
+ assert(isValidVectorElemsize(elemsize));
+ assert(isValidVectorIndex(EA_16BYTE, elemsize, index));
+ assert(isValidVectorIndex(EA_16BYTE, elemsize, index2));
+ assert(isVectorRegister(id->idReg1()));
+ assert(isVectorRegister(id->idReg2()));
+ break;
+
+ case IF_DV_2L: // DV_2L ........XX...... ......nnnnnddddd Vd Vn (abs, neg - scalar)
+ assert(id->idOpSize() == EA_8BYTE); // only type D is supported
+ __fallthrough;
+
+ case IF_DV_2G: // DV_2G .........X...... ......nnnnnddddd Vd Vn (fmov, fcvtXX - register)
+ case IF_DV_2K: // DV_2K .........X.mmmmm ......nnnnn..... Vn Vm (fcmp)
+ assert(insOptsNone(id->idInsOpt()));
+ assert(isValidVectorElemsizeFloat(id->idOpSize()));
+ assert(isVectorRegister(id->idReg1()));
+ assert(isVectorRegister(id->idReg2()));
+ break;
+
+ case IF_DV_2H: // DV_2H X........X...... ......nnnnnddddd Rd Vn (fmov/fcvtXX - to general)
+ assert(insOptsConvertFloatToInt(id->idInsOpt()));
+ dstsize = optGetDstsize(id->idInsOpt());
+ srcsize = optGetSrcsize(id->idInsOpt());
+ assert(isValidGeneralDatasize(dstsize));
+ assert(isValidVectorElemsizeFloat(srcsize));
+ assert(dstsize == id->idOpSize());
+ assert(isGeneralRegister(id->idReg1()));
+ assert(isVectorRegister(id->idReg2()));
+ break;
+
+ case IF_DV_2I: // DV_2I X........X...... ......nnnnnddddd Vd Rn (fmov/Xcvtf - from general)
+ assert(insOptsConvertIntToFloat(id->idInsOpt()));
+ dstsize = optGetDstsize(id->idInsOpt());
+ srcsize = optGetSrcsize(id->idInsOpt());
+ assert(isValidGeneralDatasize(srcsize));
+ assert(isValidVectorElemsizeFloat(dstsize));
+ assert(dstsize == id->idOpSize());
+ assert(isVectorRegister(id->idReg1()));
+ assert(isGeneralRegister(id->idReg2()));
+ break;
+
+ case IF_DV_2J: // DV_2J ........SS.....D D.....nnnnnddddd Vd Vn (fcvt)
+ assert(insOptsConvertFloatToFloat(id->idInsOpt()));
+ dstsize = optGetDstsize(id->idInsOpt());
+ srcsize = optGetSrcsize(id->idInsOpt());
+ assert(isValidVectorFcvtsize(srcsize));
+ assert(isValidVectorFcvtsize(dstsize));
+ assert(dstsize == id->idOpSize());
+ assert(isVectorRegister(id->idReg1()));
+ assert(isVectorRegister(id->idReg2()));
+ break;
+
+ case IF_DV_3A: // DV_3A .Q......XX.mmmmm ......nnnnnddddd Vd Vn Vm (vector)
+ assert(isValidVectorDatasize(id->idOpSize()));
+ assert(isValidArrangement(id->idOpSize(), id->idInsOpt()));
+ assert(isVectorRegister(id->idReg1()));
+ assert(isVectorRegister(id->idReg2()));
+ assert(isVectorRegister(id->idReg3()));
+ elemsize = optGetElemsize(id->idInsOpt());
+ ins = id->idIns();
+ if (ins == INS_mul)
+ {
+ assert(elemsize != EA_8BYTE); // can't use 2D or 1D
+ }
+ else if (ins == INS_pmul)
+ {
+ assert(elemsize == EA_1BYTE); // only supports 8B or 16B
+ }
+ break;
+
+ case IF_DV_3AI: // DV_3AI .Q......XXLMmmmm ....H.nnnnnddddd Vd Vn Vm[] (vector by elem)
+ assert(isValidVectorDatasize(id->idOpSize()));
+ assert(isValidArrangement(id->idOpSize(), id->idInsOpt()));
+ assert(isVectorRegister(id->idReg1()));
+ assert(isVectorRegister(id->idReg2()));
+ assert(isVectorRegister(id->idReg3()));
+ elemsize = optGetElemsize(id->idInsOpt());
+ assert(isValidVectorIndex(EA_16BYTE, elemsize, emitGetInsSC(id)));
+ // Only has encodings for H or S elemsize
+ assert((elemsize == EA_2BYTE) || (elemsize == EA_4BYTE));
+ break;
+
+ case IF_DV_3B: // DV_3B .Q.......X.mmmmm ......nnnnnddddd Vd Vn Vm (vector)
+ assert(isValidVectorDatasize(id->idOpSize()));
+ assert(isValidArrangement(id->idOpSize(), id->idInsOpt()));
+ assert(isVectorRegister(id->idReg1()));
+ assert(isVectorRegister(id->idReg2()));
+ assert(isVectorRegister(id->idReg3()));
+ break;
+
+ case IF_DV_3BI: // DV_3BI .Q.......XLmmmmm ....H.nnnnnddddd Vd Vn Vm[] (vector by elem)
+ assert(isValidVectorDatasize(id->idOpSize()));
+ assert(isValidArrangement(id->idOpSize(), id->idInsOpt()));
+ assert(isVectorRegister(id->idReg1()));
+ assert(isVectorRegister(id->idReg2()));
+ assert(isVectorRegister(id->idReg3()));
+ elemsize = optGetElemsize(id->idInsOpt());
+ assert(isValidVectorIndex(id->idOpSize(), elemsize, emitGetInsSC(id)));
+ break;
+
+ case IF_DV_3C: // DV_3C .Q.........mmmmm ......nnnnnddddd Vd Vn Vm (vector)
+ assert(isValidVectorDatasize(id->idOpSize()));
+ assert(isValidArrangement(id->idOpSize(), id->idInsOpt()));
+ assert(isVectorRegister(id->idReg1()));
+ assert(isVectorRegister(id->idReg2()));
+ assert(isVectorRegister(id->idReg3()));
+ break;
+
+ case IF_DV_3D: // DV_3D .........X.mmmmm ......nnnnnddddd Vd Vn Vm (scalar)
+ assert(isValidScalarDatasize(id->idOpSize()));
+ assert(insOptsNone(id->idInsOpt()));
+ assert(isVectorRegister(id->idReg1()));
+ assert(isVectorRegister(id->idReg2()));
+ assert(isVectorRegister(id->idReg3()));
+ break;
+
+ case IF_DV_3DI: // DV_3DI .........XLmmmmm ....H.nnnnnddddd Vd Vn Vm[] (scalar by elem)
+ assert(isValidScalarDatasize(id->idOpSize()));
+ assert(insOptsNone(id->idInsOpt()));
+ assert(isVectorRegister(id->idReg1()));
+ assert(isVectorRegister(id->idReg2()));
+ assert(isVectorRegister(id->idReg3()));
+ elemsize = id->idOpSize();
+ assert(isValidVectorIndex(EA_16BYTE, elemsize, emitGetInsSC(id)));
+ break;
+
+ case IF_DV_3E: // DV_3E ...........mmmmm ......nnnnnddddd Vd Vn Vm (scalar)
+ assert(insOptsNone(id->idInsOpt()));
+ assert(id->idOpSize() == EA_8BYTE);
+ assert(isVectorRegister(id->idReg1()));
+ assert(isVectorRegister(id->idReg2()));
+ assert(isVectorRegister(id->idReg3()));
+ break;
+
+ case IF_DV_4A: // DR_4A .........X.mmmmm .aaaaannnnnddddd Rd Rn Rm Ra (scalar)
+ assert(isValidGeneralDatasize(id->idOpSize()));
+ assert(isVectorRegister(id->idReg1()));
+ assert(isVectorRegister(id->idReg2()));
+ assert(isVectorRegister(id->idReg3()));
+ assert(isVectorRegister(id->idReg4()));
+ break;
+
+ case IF_SN_0A: // SN_0A ................ ................
+ case IF_SI_0A: // SI_0A ...........iiiii iiiiiiiiiii..... imm16
+ case IF_SI_0B: // SI_0B ................ ....bbbb........ imm4 - barrier
+ break;
+
+ default:
+ printf("unexpected format %s\n", emitIfName(id->idInsFmt()));
+ assert(!"Unexpected format");
+ break;
+ }
+}
+#endif // DEBUG
+
+bool emitter::emitInsMayWriteToGCReg(instrDesc* id)
+{
+ instruction ins = id->idIns();
+ insFormat fmt = id->idInsFmt();
+
+ switch (fmt)
+ {
+
+ // These are the formats with "destination" registers:
+
+ case IF_DI_1B: // DI_1B X........hwiiiii iiiiiiiiiiiddddd Rd imm(i16,hw)
+ case IF_DI_1D: // DI_1D X........Nrrrrrr ssssss.....ddddd Rd imm(N,r,s)
+ case IF_DI_1E: // DI_1E .ii.....iiiiiiii iiiiiiiiiiiddddd Rd simm21
+
+ case IF_DI_2A: // DI_2A X.......shiiiiii iiiiiinnnnnddddd Rd Rn imm(i12,sh)
+ case IF_DI_2B: // DI_2B X.........Xnnnnn ssssssnnnnnddddd Rd Rn imm(0-63)
+ case IF_DI_2C: // DI_2C X........Nrrrrrr ssssssnnnnnddddd Rd Rn imm(N,r,s)
+ case IF_DI_2D: // DI_2D X........Nrrrrrr ssssssnnnnnddddd Rd Rn imr, imms (N,r,s)
+
+ case IF_DR_1D: // DR_1D X............... cccc.......ddddd Rd cond
+
+ case IF_DR_2D: // DR_2D X..........nnnnn cccc..nnnnnddddd Rd Rn cond
+ case IF_DR_2E: // DR_2E X..........mmmmm ...........ddddd Rd Rm
+ case IF_DR_2F: // DR_2F X.......sh.mmmmm ssssss.....ddddd Rd Rm {LSL,LSR,ASR} imm(0-63)
+ case IF_DR_2G: // DR_2G X............... ......nnnnnddddd Rd Rn
+ case IF_DR_2H: // DR_2H X........X...... ......nnnnnddddd Rd Rn
+
+ case IF_DR_3A: // DR_3A X..........mmmmm ......nnnnnddddd Rd Rn Rm
+ case IF_DR_3B: // DR_3B X.......sh.mmmmm ssssssnnnnnddddd Rd Rn Rm {LSL,LSR,ASR} imm(0-63)
+ case IF_DR_3C: // DR_3C X..........mmmmm xxxsssnnnnnddddd Rd Rn Rm ext(Rm) LSL imm(0-4)
+ case IF_DR_3D: // DR_3D X..........mmmmm cccc..nnnnnddddd Rd Rn Rm cond
+ case IF_DR_3E: // DR_3E X........X.mmmmm ssssssnnnnnddddd Rd Rn Rm imm(0-63)
+
+ case IF_DR_4A: // DR_4A X..........mmmmm .aaaaannnnnddddd Rd Rn Rm Ra
+
+ case IF_DV_2B: // DV_2B .Q.........iiiii ......nnnnnddddd Rd Vn[] (umov - to general)
+ case IF_DV_2H: // DV_2H X........X...... ......nnnnnddddd Rd Vn (fmov - to general)
+
+ return true;
+
+ case IF_DV_2C: // DV_2C .Q.........iiiii ......nnnnnddddd Vd Rn (dup/ins - vector from general)
+ case IF_DV_2D: // DV_2D .Q.........iiiii ......nnnnnddddd Vd Vn[] (dup - vector)
+ case IF_DV_2E: // DV_2E ...........iiiii ......nnnnnddddd Vd Vn[] (dup - scalar)
+ case IF_DV_2F: // DV_2F ...........iiiii .jjjj.nnnnnddddd Vd[] Vn[] (ins - element)
+ case IF_DV_2G: // DV_2G .........X...... ......nnnnnddddd Vd Vn (fmov, fcvtXX - register)
+ case IF_DV_2I: // DV_2I X........X...... ......nnnnnddddd Vd Rn (fmov - from general)
+ case IF_DV_2J: // DV_2J ........SS.....D D.....nnnnnddddd Vd Vn (fcvt)
+ case IF_DV_2K: // DV_2K .........X.mmmmm ......nnnnn..... Vn Vm (fcmp)
+ case IF_DV_2L: // DV_2L ........XX...... ......nnnnnddddd Vd Vn (abs, neg - scalar)
+ case IF_DV_2M: // DV_2M .Q......XX...... ......nnnnnddddd Vd Vn (abs, neg - vector)
+ case IF_DV_3A: // DV_3A .Q......XX.mmmmm ......nnnnnddddd Vd Vn Vm (vector)
+ case IF_DV_3AI: // DV_3AI .Q......XXLMmmmm ....H.nnnnnddddd Vd Vn Vm[] (vector)
+ case IF_DV_3B: // DV_3B .Q.......X.mmmmm ......nnnnnddddd Vd Vn Vm (vector)
+ case IF_DV_3BI: // DV_3BI .Q.......XLmmmmm ....H.nnnnnddddd Vd Vn Vm[] (vector by elem)
+ case IF_DV_3C: // DV_3C .Q.........mmmmm ......nnnnnddddd Vd Vn Vm (vector)
+ case IF_DV_3D: // DV_3D .........X.mmmmm ......nnnnnddddd Vd Vn Vm (scalar)
+ case IF_DV_3DI: // DV_3DI .........XLmmmmm ....H.nnnnnddddd Vd Vn Vm[] (scalar by elem)
+ case IF_DV_3E: // DV_3E ...........mmmmm ......nnnnnddddd Vd Vn Vm (scalar)
+ case IF_DV_4A: // DV_4A .........X.mmmmm .aaaaannnnnddddd Vd Va Vn Vm (scalar)
+ // Tracked GC pointers cannot be placed into the SIMD registers.
+ return false;
+
+ // These are the load/store formats with "target" registers:
+
+ case IF_LS_1A: // LS_1A XX...V..iiiiiiii iiiiiiiiiiittttt Rt PC imm(1MB)
+ case IF_LS_2A: // LS_2A .X.......X...... ......nnnnnttttt Rt Rn
+ case IF_LS_2B: // LS_2B .X.......Xiiiiii iiiiiinnnnnttttt Rt Rn imm(0-4095)
+ case IF_LS_2C: // LS_2C .X.......X.iiiii iiiiP.nnnnnttttt Rt Rn imm(-256..+255) pre/post inc
+ case IF_LS_3A: // LS_3A .X.......X.mmmmm xxxS..nnnnnttttt Rt Rn Rm ext(Rm) LSL {}
+ case IF_LS_3B: // LS_3B X............... .aaaaannnnnttttt Rt Ra Rn
+ case IF_LS_3C: // LS_3C X.........iiiiii iaaaaannnnnttttt Rt Ra Rn imm(im7,sh)
+
+ // For the Store instructions the "target" register is actually a "source" value
+
+ if (emitInsIsStore(ins))
+ {
+ return false;
+ }
+ else
+ {
+ assert(emitInsIsLoad(ins));
+ return true;
+ }
+
+ default:
+ return false;
+ }
+}
+
+bool emitter::emitInsWritesToLclVarStackLoc(instrDesc* id)
+{
+ if (!id->idIsLclVar())
+ return false;
+
+ instruction ins = id->idIns();
+
+ // This list is related to the list of instructions used to store local vars in emitIns_S_R().
+ // We don't accept writing to float local vars.
+
+ switch (ins)
+ {
+ case INS_strb:
+ case INS_strh:
+ case INS_str:
+ case INS_stur:
+ case INS_sturb:
+ case INS_sturh:
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool emitter::emitInsMayWriteMultipleRegs(instrDesc* id)
+{
+ instruction ins = id->idIns();
+
+ switch (ins)
+ {
+ case INS_ldp:
+ case INS_ldpsw:
+ case INS_ldnp:
+ return true;
+ default:
+ return false;
+ }
+}
+
+// For the small loads/store instruction we adjust the size 'attr'
+// depending upon whether we have a load or a store
+//
+emitAttr emitter::emitInsAdjustLoadStoreAttr(instruction ins, emitAttr attr)
+{
+ if (EA_SIZE(attr) <= EA_4BYTE)
+ {
+ if (emitInsIsLoad(ins))
+ {
+ // The value of 'ins' encodes the size to load
+ // we use EA_8BYTE here because it is the size we will write (into dataReg)
+ // it is also required when ins is INS_ldrsw
+ //
+ attr = EA_8BYTE;
+ }
+ else
+ {
+ assert(emitInsIsStore(ins));
+
+ // The value of 'ins' encodes the size to store
+ // we use EA_4BYTE here because it is the size of the register
+ // that we want to display when storing small values
+ //
+ attr = EA_4BYTE;
+ }
+ }
+ return attr;
+}
+
+// Takes an instrDesc 'id' and uses the instruction 'ins' to determine the
+// size of the target register that is written or read by the instruction.
+// Note that even if EA_4BYTE is returned a load instruction will still
+// always zero the upper 4 bytes of the target register.
+// This method is required so that we can distinguish between loads that are
+// sign-extending as they can have two different sizes for their target register.
+// Additionally for instructions like 'ldr' and 'str' these can load/store
+// either 4 byte or 8 bytes to/from the target register.
+// By convention the small unsigned load instructions are considered to write
+// a 4 byte sized target register, though since these also zero the upper 4 bytes
+// they could equally be considered to write the unsigned value to full 8 byte register.
+//
+emitAttr emitter::emitInsTargetRegSize(instrDesc* id)
+{
+ instruction ins = id->idIns();
+ emitAttr result = EA_UNKNOWN;
+
+ // This is used to determine the size of the target registers for a load/store instruction
+
+ switch (ins)
+ {
+ case INS_ldrb:
+ case INS_strb:
+ case INS_ldurb:
+ case INS_sturb:
+ result = EA_4BYTE;
+ break;
+
+ case INS_ldrh:
+ case INS_strh:
+ case INS_ldurh:
+ case INS_sturh:
+ result = EA_4BYTE;
+ break;
+
+ case INS_ldrsb:
+ case INS_ldursb:
+ case INS_ldrsh:
+ case INS_ldursh:
+ if (id->idOpSize() == EA_8BYTE)
+ result = EA_8BYTE;
+ else
+ result = EA_4BYTE;
+ break;
+
+ case INS_ldrsw:
+ case INS_ldursw:
+ case INS_ldpsw:
+ result = EA_8BYTE;
+ break;
+
+ case INS_ldp:
+ case INS_stp:
+ case INS_ldnp:
+ case INS_stnp:
+ result = id->idOpSize();
+ break;
+
+ case INS_ldr:
+ case INS_str:
+ case INS_ldur:
+ case INS_stur:
+ result = id->idOpSize();
+ break;
+
+ default:
+ NO_WAY("unexpected instruction");
+ break;
+ }
+ return result;
+}
+
+// Takes an instrDesc and uses the instruction to determine the 'size' of the
+// data that is loaded from memory.
+//
+emitAttr emitter::emitInsLoadStoreSize(instrDesc* id)
+{
+ instruction ins = id->idIns();
+ emitAttr result = EA_UNKNOWN;
+
+ // The 'result' returned is the 'size' of the data that is loaded from memory.
+
+ switch (ins)
+ {
+ case INS_ldrb:
+ case INS_strb:
+ case INS_ldurb:
+ case INS_sturb:
+ case INS_ldrsb:
+ case INS_ldursb:
+ result = EA_1BYTE;
+ break;
+
+ case INS_ldrh:
+ case INS_strh:
+ case INS_ldurh:
+ case INS_sturh:
+ case INS_ldrsh:
+ case INS_ldursh:
+ result = EA_2BYTE;
+ break;
+
+ case INS_ldrsw:
+ case INS_ldursw:
+ case INS_ldpsw:
+ result = EA_4BYTE;
+ break;
+
+ case INS_ldp:
+ case INS_stp:
+ case INS_ldnp:
+ case INS_stnp:
+ result = id->idOpSize();
+ break;
+
+ case INS_ldr:
+ case INS_str:
+ case INS_ldur:
+ case INS_stur:
+ result = id->idOpSize();
+ break;
+
+ default:
+ NO_WAY("unexpected instruction");
+ break;
+ }
+ return result;
+}
+
+/*****************************************************************************/
+#ifdef DEBUG
+
+// clang-format off
+static const char * const xRegNames[] =
+{
+ #define REGDEF(name, rnum, mask, xname, wname) xname,
+ #include "register.h"
+};
+
+static const char * const wRegNames[] =
+{
+ #define REGDEF(name, rnum, mask, xname, wname) wname,
+ #include "register.h"
+};
+
+static const char * const vRegNames[] =
+{
+ "v0", "v1", "v2", "v3", "v4",
+ "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14",
+ "v15", "v16", "v17", "v18", "v19",
+ "v20", "v21", "v22", "v23", "v24",
+ "v25", "v26", "v27", "v28", "v29",
+ "v30", "v31"
+};
+
+static const char * const qRegNames[] =
+{
+ "q0", "q1", "q2", "q3", "q4",
+ "q5", "q6", "q7", "q8", "q9",
+ "q10", "q11", "q12", "q13", "q14",
+ "q15", "q16", "q17", "q18", "q19",
+ "q20", "q21", "q22", "q23", "q24",
+ "q25", "q26", "q27", "q28", "q29",
+ "q30", "q31"
+};
+
+static const char * const hRegNames[] =
+{
+ "h0", "h1", "h2", "h3", "h4",
+ "h5", "h6", "h7", "h8", "h9",
+ "h10", "h11", "h12", "h13", "h14",
+ "h15", "h16", "h17", "h18", "h19",
+ "h20", "h21", "h22", "h23", "h24",
+ "h25", "h26", "h27", "h28", "h29",
+ "h30", "h31"
+};
+static const char * const bRegNames[] =
+{
+ "b0", "b1", "b2", "b3", "b4",
+ "b5", "b6", "b7", "b8", "b9",
+ "b10", "b11", "b12", "b13", "b14",
+ "b15", "b16", "b17", "b18", "b19",
+ "b20", "b21", "b22", "b23", "b24",
+ "b25", "b26", "b27", "b28", "b29",
+ "b30", "b31"
+};
+// clang-format on
+
+/*****************************************************************************
+ *
+ * Return a string that represents the given register.
+ */
+
+const char* emitter::emitRegName(regNumber reg, emitAttr size, bool varName)
+{
+ assert(reg < REG_COUNT);
+
+ const char* rn = nullptr;
+
+ if (size == EA_8BYTE)
+ {
+ rn = xRegNames[reg];
+ }
+ else if (size == EA_4BYTE)
+ {
+ rn = wRegNames[reg];
+ }
+ else if (isVectorRegister(reg))
+ {
+ if (size == EA_16BYTE)
+ {
+ rn = qRegNames[reg - REG_V0];
+ }
+ else if (size == EA_2BYTE)
+ {
+ rn = hRegNames[reg - REG_V0];
+ }
+ else if (size == EA_1BYTE)
+ {
+ rn = bRegNames[reg - REG_V0];
+ }
+ }
+
+ assert(rn != nullptr);
+
+ return rn;
+}
+
+/*****************************************************************************
+ *
+ * Return a string that represents the given register.
+ */
+
+const char* emitter::emitVectorRegName(regNumber reg)
+{
+ assert((reg >= REG_V0) && (reg <= REG_V31));
+
+ int index = (int)reg - (int)REG_V0;
+
+ return vRegNames[index];
+}
+#endif // DEBUG
+
+/*****************************************************************************
+ *
+ * Returns the base encoding of the given CPU instruction.
+ */
+
+emitter::insFormat emitter::emitInsFormat(instruction ins)
+{
+ // clang-format off
+ const static insFormat insFormats[] =
+ {
+ #define INST1(id, nm, fp, ldst, fmt, e1 ) fmt,
+ #define INST2(id, nm, fp, ldst, fmt, e1, e2 ) fmt,
+ #define INST3(id, nm, fp, ldst, fmt, e1, e2, e3 ) fmt,
+ #define INST4(id, nm, fp, ldst, fmt, e1, e2, e3, e4 ) fmt,
+ #define INST5(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5 ) fmt,
+ #define INST6(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6 ) fmt,
+ #define INST9(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9) fmt,
+ #include "instrs.h"
+ };
+ // clang-format on
+
+ assert(ins < ArrLen(insFormats));
+ assert((insFormats[ins] != IF_NONE));
+
+ return insFormats[ins];
+}
+
+// INST_FP is 1
+#define LD 2
+#define ST 4
+#define CMP 8
+
+// clang-format off
+/*static*/ const BYTE CodeGenInterface::instInfo[] =
+{
+ #define INST1(id, nm, fp, ldst, fmt, e1 ) ldst | INST_FP*fp,
+ #define INST2(id, nm, fp, ldst, fmt, e1, e2 ) ldst | INST_FP*fp,
+ #define INST3(id, nm, fp, ldst, fmt, e1, e2, e3 ) ldst | INST_FP*fp,
+ #define INST4(id, nm, fp, ldst, fmt, e1, e2, e3, e4 ) ldst | INST_FP*fp,
+ #define INST5(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5 ) ldst | INST_FP*fp,
+ #define INST6(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6 ) ldst | INST_FP*fp,
+ #define INST9(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9) ldst | INST_FP*fp,
+ #include "instrs.h"
+};
+// clang-format on
+
+/*****************************************************************************
+ *
+ * Returns true if the instruction is some kind of compare or test instruction
+ */
+
+bool emitter::emitInsIsCompare(instruction ins)
+{
+ // We have pseudo ins like lea which are not included in emitInsLdStTab.
+ if (ins < ArrLen(CodeGenInterface::instInfo))
+ return (CodeGenInterface::instInfo[ins] & CMP) ? true : false;
+ else
+ return false;
+}
+
+/*****************************************************************************
+ *
+ * Returns true if the instruction is some kind of load instruction
+ */
+
+bool emitter::emitInsIsLoad(instruction ins)
+{
+ // We have pseudo ins like lea which are not included in emitInsLdStTab.
+ if (ins < ArrLen(CodeGenInterface::instInfo))
+ return (CodeGenInterface::instInfo[ins] & LD) ? true : false;
+ else
+ return false;
+}
+/*****************************************************************************
+ *
+ * Returns true if the instruction is some kind of store instruction
+ */
+
+bool emitter::emitInsIsStore(instruction ins)
+{
+ // We have pseudo ins like lea which are not included in emitInsLdStTab.
+ if (ins < ArrLen(CodeGenInterface::instInfo))
+ return (CodeGenInterface::instInfo[ins] & ST) ? true : false;
+ else
+ return false;
+}
+
+/*****************************************************************************
+ *
+ * Returns true if the instruction is some kind of load/store instruction
+ */
+
+bool emitter::emitInsIsLoadOrStore(instruction ins)
+{
+ // We have pseudo ins like lea which are not included in emitInsLdStTab.
+ if (ins < ArrLen(CodeGenInterface::instInfo))
+ return (CodeGenInterface::instInfo[ins] & (LD | ST)) ? true : false;
+ else
+ return false;
+}
+
+#undef LD
+#undef ST
+#undef CMP
+
+/*****************************************************************************
+ *
+ * Returns the specific encoding of the given CPU instruction and format
+ */
+
+emitter::code_t emitter::emitInsCode(instruction ins, insFormat fmt)
+{
+ // clang-format off
+ const static code_t insCodes1[] =
+ {
+ #define INST1(id, nm, fp, ldst, fmt, e1 ) e1,
+ #define INST2(id, nm, fp, ldst, fmt, e1, e2 ) e1,
+ #define INST3(id, nm, fp, ldst, fmt, e1, e2, e3 ) e1,
+ #define INST4(id, nm, fp, ldst, fmt, e1, e2, e3, e4 ) e1,
+ #define INST5(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5 ) e1,
+ #define INST6(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6 ) e1,
+ #define INST9(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9) e1,
+ #include "instrs.h"
+ };
+ const static code_t insCodes2[] =
+ {
+ #define INST1(id, nm, fp, ldst, fmt, e1 )
+ #define INST2(id, nm, fp, ldst, fmt, e1, e2 ) e2,
+ #define INST3(id, nm, fp, ldst, fmt, e1, e2, e3 ) e2,
+ #define INST4(id, nm, fp, ldst, fmt, e1, e2, e3, e4 ) e2,
+ #define INST5(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5 ) e2,
+ #define INST6(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6 ) e2,
+ #define INST9(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9) e2,
+ #include "instrs.h"
+ };
+ const static code_t insCodes3[] =
+ {
+ #define INST1(id, nm, fp, ldst, fmt, e1 )
+ #define INST2(id, nm, fp, ldst, fmt, e1, e2 )
+ #define INST3(id, nm, fp, ldst, fmt, e1, e2, e3 ) e3,
+ #define INST4(id, nm, fp, ldst, fmt, e1, e2, e3, e4 ) e3,
+ #define INST5(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5 ) e3,
+ #define INST6(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6 ) e3,
+ #define INST9(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9) e3,
+ #include "instrs.h"
+ };
+ const static code_t insCodes4[] =
+ {
+ #define INST1(id, nm, fp, ldst, fmt, e1 )
+ #define INST2(id, nm, fp, ldst, fmt, e1, e2 )
+ #define INST3(id, nm, fp, ldst, fmt, e1, e2, e3 )
+ #define INST4(id, nm, fp, ldst, fmt, e1, e2, e3, e4 ) e4,
+ #define INST5(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5 ) e4,
+ #define INST6(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6 ) e4,
+ #define INST9(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9) e4,
+ #include "instrs.h"
+ };
+ const static code_t insCodes5[] =
+ {
+ #define INST1(id, nm, fp, ldst, fmt, e1 )
+ #define INST2(id, nm, fp, ldst, fmt, e1, e2 )
+ #define INST3(id, nm, fp, ldst, fmt, e1, e2, e3 )
+ #define INST4(id, nm, fp, ldst, fmt, e1, e2, e3, e4 )
+ #define INST5(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5 ) e5,
+ #define INST6(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6 ) e5,
+ #define INST9(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9) e5,
+ #include "instrs.h"
+ };
+ const static code_t insCodes6[] =
+ {
+ #define INST1(id, nm, fp, ldst, fmt, e1 )
+ #define INST2(id, nm, fp, ldst, fmt, e1, e2 )
+ #define INST3(id, nm, fp, ldst, fmt, e1, e2, e3 )
+ #define INST4(id, nm, fp, ldst, fmt, e1, e2, e3, e4 )
+ #define INST5(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5 )
+ #define INST6(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6 ) e6,
+ #define INST9(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9) e6,
+ #include "instrs.h"
+ };
+ const static code_t insCodes7[] =
+ {
+ #define INST1(id, nm, fp, ldst, fmt, e1 )
+ #define INST2(id, nm, fp, ldst, fmt, e1, e2 )
+ #define INST3(id, nm, fp, ldst, fmt, e1, e2, e3 )
+ #define INST4(id, nm, fp, ldst, fmt, e1, e2, e3, e4 )
+ #define INST5(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5 )
+ #define INST6(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6 )
+ #define INST9(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9) e7,
+ #include "instrs.h"
+ };
+ const static code_t insCodes8[] =
+ {
+ #define INST1(id, nm, fp, ldst, fmt, e1 )
+ #define INST2(id, nm, fp, ldst, fmt, e1, e2 )
+ #define INST3(id, nm, fp, ldst, fmt, e1, e2, e3 )
+ #define INST4(id, nm, fp, ldst, fmt, e1, e2, e3, e4 )
+ #define INST5(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5 )
+ #define INST6(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6 )
+ #define INST9(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9) e8,
+ #include "instrs.h"
+ };
+ const static code_t insCodes9[] =
+ {
+ #define INST1(id, nm, fp, ldst, fmt, e1 )
+ #define INST2(id, nm, fp, ldst, fmt, e1, e2 )
+ #define INST3(id, nm, fp, ldst, fmt, e1, e2, e3 )
+ #define INST4(id, nm, fp, ldst, fmt, e1, e2, e3, e4 )
+ #define INST5(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5 )
+ #define INST6(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6 )
+ #define INST9(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9) e9,
+ #include "instrs.h"
+ };
+ // clang-format on
+
+ const static insFormat formatEncode9[9] = {IF_DR_2E, IF_DR_2G, IF_DI_1B, IF_DI_1D, IF_DV_3C,
+ IF_DV_2B, IF_DV_2C, IF_DV_2E, IF_DV_2F};
+ const static insFormat formatEncode6A[6] = {IF_DR_3A, IF_DR_3B, IF_DR_3C, IF_DI_2A, IF_DV_3A, IF_DV_3E};
+ const static insFormat formatEncode5A[5] = {IF_LS_2A, IF_LS_2B, IF_LS_2C, IF_LS_3A, IF_LS_1A};
+ const static insFormat formatEncode5B[5] = {IF_DV_2G, IF_DV_2H, IF_DV_2I, IF_DV_1A, IF_DV_1B};
+ const static insFormat formatEncode5C[5] = {IF_DR_3A, IF_DR_3B, IF_DI_2C, IF_DV_3C, IF_DV_1B};
+ const static insFormat formatEncode4A[4] = {IF_LS_2A, IF_LS_2B, IF_LS_2C, IF_LS_3A};
+ const static insFormat formatEncode4B[4] = {IF_DR_3A, IF_DR_3B, IF_DR_3C, IF_DI_2A};
+ const static insFormat formatEncode4C[4] = {IF_DR_2A, IF_DR_2B, IF_DR_2C, IF_DI_1A};
+ const static insFormat formatEncode4D[4] = {IF_DV_3B, IF_DV_3D, IF_DV_3BI, IF_DV_3DI};
+ const static insFormat formatEncode4E[4] = {IF_DR_3A, IF_DR_3B, IF_DI_2C, IF_DV_3C};
+ const static insFormat formatEncode4F[4] = {IF_DR_3A, IF_DR_3B, IF_DV_3C, IF_DV_1B};
+ const static insFormat formatEncode4G[4] = {IF_DR_2E, IF_DR_2F, IF_DV_2M, IF_DV_2L};
+ const static insFormat formatEncode3A[3] = {IF_DR_3A, IF_DR_3B, IF_DI_2C};
+ const static insFormat formatEncode3B[3] = {IF_DR_2A, IF_DR_2B, IF_DI_1C};
+ const static insFormat formatEncode3C[3] = {IF_DR_3A, IF_DR_3B, IF_DV_3C};
+ const static insFormat formatEncode3D[3] = {IF_DV_2C, IF_DV_2D, IF_DV_2E};
+ const static insFormat formatEncode3E[3] = {IF_DV_3B, IF_DV_3BI, IF_DV_3DI};
+ const static insFormat formatEncode3F[3] = {IF_DV_2A, IF_DV_2G, IF_DV_2H};
+ const static insFormat formatEncode3G[3] = {IF_DV_2A, IF_DV_2G, IF_DV_2I};
+ const static insFormat formatEncode3H[3] = {IF_DR_3A, IF_DV_3A, IF_DV_3AI};
+ const static insFormat formatEncode3I[3] = {IF_DR_2E, IF_DR_2F, IF_DV_2M};
+ const static insFormat formatEncode2A[2] = {IF_DR_2E, IF_DR_2F};
+ const static insFormat formatEncode2B[2] = {IF_DR_3A, IF_DR_3B};
+ const static insFormat formatEncode2C[2] = {IF_DR_3A, IF_DI_2D};
+ const static insFormat formatEncode2D[2] = {IF_DR_3A, IF_DI_2B};
+ const static insFormat formatEncode2E[2] = {IF_LS_3B, IF_LS_3C};
+ const static insFormat formatEncode2F[2] = {IF_DR_2I, IF_DI_1F};
+ const static insFormat formatEncode2G[2] = {IF_DV_3B, IF_DV_3D};
+ const static insFormat formatEncode2H[2] = {IF_DV_2C, IF_DV_2F};
+ const static insFormat formatEncode2I[2] = {IF_DV_2K, IF_DV_1C};
+ const static insFormat formatEncode2J[2] = {IF_DV_2A, IF_DV_2G};
+ const static insFormat formatEncode2K[2] = {IF_DV_2M, IF_DV_2L};
+ const static insFormat formatEncode2L[2] = {IF_DV_2G, IF_DV_2M};
+ const static insFormat formatEncode2M[2] = {IF_DV_3A, IF_DV_3AI};
+ const static insFormat formatEncode2N[2] = {IF_DV_2N, IF_DV_2O};
+
+ code_t code = BAD_CODE;
+ insFormat insFmt = emitInsFormat(ins);
+ bool encoding_found = false;
+ int index = -1;
+
+ switch (insFmt)
+ {
+ case IF_EN9:
+ for (index = 0; index < 9; index++)
+ {
+ if (fmt == formatEncode9[index])
+ {
+ encoding_found = true;
+ break;
+ }
+ }
+ break;
+
+ case IF_EN6A:
+ for (index = 0; index < 6; index++)
+ {
+ if (fmt == formatEncode6A[index])
+ {
+ encoding_found = true;
+ break;
+ }
+ }
+ break;
+
+ case IF_EN5A:
+ for (index = 0; index < 5; index++)
+ {
+ if (fmt == formatEncode5A[index])
+ {
+ encoding_found = true;
+ break;
+ }
+ }
+ break;
+
+ case IF_EN5B:
+ for (index = 0; index < 5; index++)
+ {
+ if (fmt == formatEncode5B[index])
+ {
+ encoding_found = true;
+ break;
+ }
+ }
+ break;
+
+ case IF_EN5C:
+ for (index = 0; index < 5; index++)
+ {
+ if (fmt == formatEncode5C[index])
+ {
+ encoding_found = true;
+ break;
+ }
+ }
+ break;
+
+ case IF_EN4A:
+ for (index = 0; index < 4; index++)
+ {
+ if (fmt == formatEncode4A[index])
+ {
+ encoding_found = true;
+ break;
+ }
+ }
+ break;
+
+ case IF_EN4B:
+ for (index = 0; index < 4; index++)
+ {
+ if (fmt == formatEncode4B[index])
+ {
+ encoding_found = true;
+ break;
+ }
+ }
+ break;
+
+ case IF_EN4C:
+ for (index = 0; index < 4; index++)
+ {
+ if (fmt == formatEncode4C[index])
+ {
+ encoding_found = true;
+ break;
+ }
+ }
+ break;
+
+ case IF_EN4D:
+ for (index = 0; index < 4; index++)
+ {
+ if (fmt == formatEncode4D[index])
+ {
+ encoding_found = true;
+ break;
+ }
+ }
+ break;
+
+ case IF_EN4E:
+ for (index = 0; index < 4; index++)
+ {
+ if (fmt == formatEncode4E[index])
+ {
+ encoding_found = true;
+ break;
+ }
+ }
+ break;
+
+ case IF_EN4F:
+ for (index = 0; index < 4; index++)
+ {
+ if (fmt == formatEncode4F[index])
+ {
+ encoding_found = true;
+ break;
+ }
+ }
+ break;
+
+ case IF_EN4G:
+ for (index = 0; index < 4; index++)
+ {
+ if (fmt == formatEncode4G[index])
+ {
+ encoding_found = true;
+ break;
+ }
+ }
+ break;
+
+ case IF_EN3A:
+ for (index = 0; index < 3; index++)
+ {
+ if (fmt == formatEncode3A[index])
+ {
+ encoding_found = true;
+ break;
+ }
+ }
+ break;
+
+ case IF_EN3B:
+ for (index = 0; index < 3; index++)
+ {
+ if (fmt == formatEncode3B[index])
+ {
+ encoding_found = true;
+ break;
+ }
+ }
+ break;
+
+ case IF_EN3C:
+ for (index = 0; index < 3; index++)
+ {
+ if (fmt == formatEncode3C[index])
+ {
+ encoding_found = true;
+ break;
+ }
+ }
+ break;
+
+ case IF_EN3D:
+ for (index = 0; index < 3; index++)
+ {
+ if (fmt == formatEncode3D[index])
+ {
+ encoding_found = true;
+ break;
+ }
+ }
+ break;
+
+ case IF_EN3E:
+ for (index = 0; index < 3; index++)
+ {
+ if (fmt == formatEncode3E[index])
+ {
+ encoding_found = true;
+ break;
+ }
+ }
+ break;
+
+ case IF_EN3F:
+ for (index = 0; index < 3; index++)
+ {
+ if (fmt == formatEncode3F[index])
+ {
+ encoding_found = true;
+ break;
+ }
+ }
+ break;
+
+ case IF_EN3G:
+ for (index = 0; index < 3; index++)
+ {
+ if (fmt == formatEncode3G[index])
+ {
+ encoding_found = true;
+ break;
+ }
+ }
+ break;
+
+ case IF_EN3H:
+ for (index = 0; index < 3; index++)
+ {
+ if (fmt == formatEncode3H[index])
+ {
+ encoding_found = true;
+ break;
+ }
+ }
+ break;
+
+ case IF_EN3I:
+ for (index = 0; index < 3; index++)
+ {
+ if (fmt == formatEncode3I[index])
+ {
+ encoding_found = true;
+ break;
+ }
+ }
+ break;
+
+ case IF_EN2A:
+ for (index = 0; index < 2; index++)
+ {
+ if (fmt == formatEncode2A[index])
+ {
+ encoding_found = true;
+ break;
+ }
+ }
+ break;
+
+ case IF_EN2B:
+ for (index = 0; index < 2; index++)
+ {
+ if (fmt == formatEncode2B[index])
+ {
+ encoding_found = true;
+ break;
+ }
+ }
+ break;
+
+ case IF_EN2C:
+ for (index = 0; index < 2; index++)
+ {
+ if (fmt == formatEncode2C[index])
+ {
+ encoding_found = true;
+ break;
+ }
+ }
+ break;
+
+ case IF_EN2D:
+ for (index = 0; index < 2; index++)
+ {
+ if (fmt == formatEncode2D[index])
+ {
+ encoding_found = true;
+ break;
+ }
+ }
+ break;
+
+ case IF_EN2E:
+ for (index = 0; index < 2; index++)
+ {
+ if (fmt == formatEncode2E[index])
+ {
+ encoding_found = true;
+ break;
+ }
+ }
+ break;
+
+ case IF_EN2F:
+ for (index = 0; index < 2; index++)
+ {
+ if (fmt == formatEncode2F[index])
+ {
+ encoding_found = true;
+ break;
+ }
+ }
+ break;
+
+ case IF_EN2G:
+ for (index = 0; index < 2; index++)
+ {
+ if (fmt == formatEncode2G[index])
+ {
+ encoding_found = true;
+ break;
+ }
+ }
+ break;
+
+ case IF_EN2H:
+ for (index = 0; index < 2; index++)
+ {
+ if (fmt == formatEncode2H[index])
+ {
+ encoding_found = true;
+ break;
+ }
+ }
+ break;
+
+ case IF_EN2I:
+ for (index = 0; index < 2; index++)
+ {
+ if (fmt == formatEncode2I[index])
+ {
+ encoding_found = true;
+ break;
+ }
+ }
+ break;
+
+ case IF_EN2J:
+ for (index = 0; index < 2; index++)
+ {
+ if (fmt == formatEncode2J[index])
+ {
+ encoding_found = true;
+ break;
+ }
+ }
+ break;
+
+ case IF_EN2K:
+ for (index = 0; index < 2; index++)
+ {
+ if (fmt == formatEncode2K[index])
+ {
+ encoding_found = true;
+ break;
+ }
+ }
+ break;
+
+ case IF_EN2L:
+ for (index = 0; index < 2; index++)
+ {
+ if (fmt == formatEncode2L[index])
+ {
+ encoding_found = true;
+ break;
+ }
+ }
+ break;
+
+ case IF_EN2M:
+ for (index = 0; index < 2; index++)
+ {
+ if (fmt == formatEncode2M[index])
+ {
+ encoding_found = true;
+ break;
+ }
+ }
+ break;
+
+ case IF_EN2N:
+ for (index = 0; index < 2; index++)
+ {
+ if (fmt == formatEncode2N[index])
+ {
+ encoding_found = true;
+ break;
+ }
+ }
+ break;
+
+ case IF_BI_0A:
+ case IF_BI_0B:
+ case IF_BI_0C:
+ case IF_BI_1A:
+ case IF_BI_1B:
+ case IF_BR_1A:
+ case IF_BR_1B:
+ case IF_LS_1A:
+ case IF_LS_2A:
+ case IF_LS_2B:
+ case IF_LS_2C:
+ case IF_LS_3A:
+ case IF_LS_3B:
+ case IF_LS_3C:
+ case IF_DI_1A:
+ case IF_DI_1B:
+ case IF_DI_1C:
+ case IF_DI_1D:
+ case IF_DI_1E:
+ case IF_DI_1F:
+ case IF_DI_2A:
+ case IF_DI_2B:
+ case IF_DI_2C:
+ case IF_DI_2D:
+ case IF_DR_1D:
+ case IF_DR_2A:
+ case IF_DR_2B:
+ case IF_DR_2C:
+ case IF_DR_2D:
+ case IF_DR_2E:
+ case IF_DR_2F:
+ case IF_DR_2G:
+ case IF_DR_2H:
+ case IF_DR_2I:
+ case IF_DR_3A:
+ case IF_DR_3B:
+ case IF_DR_3C:
+ case IF_DR_3D:
+ case IF_DR_3E:
+ case IF_DR_4A:
+ case IF_DV_1A:
+ case IF_DV_1B:
+ case IF_DV_1C:
+ case IF_DV_2A:
+ case IF_DV_2B:
+ case IF_DV_2C:
+ case IF_DV_2D:
+ case IF_DV_2E:
+ case IF_DV_2F:
+ case IF_DV_2G:
+ case IF_DV_2H:
+ case IF_DV_2I:
+ case IF_DV_2J:
+ case IF_DV_2K:
+ case IF_DV_2L:
+ case IF_DV_2M:
+ case IF_DV_2N:
+ case IF_DV_2O:
+ case IF_DV_3A:
+ case IF_DV_3AI:
+ case IF_DV_3B:
+ case IF_DV_3BI:
+ case IF_DV_3C:
+ case IF_DV_3D:
+ case IF_DV_3DI:
+ case IF_DV_3E:
+ case IF_DV_4A:
+ case IF_SN_0A:
+ case IF_SI_0A:
+ case IF_SI_0B:
+
+ index = 0;
+ encoding_found = true;
+ break;
+
+ default:
+
+ encoding_found = false;
+ break;
+ }
+
+ assert(encoding_found);
+
+ switch (index)
+ {
+ case 0:
+ assert(ins < ArrLen(insCodes1));
+ code = insCodes1[ins];
+ break;
+ case 1:
+ assert(ins < ArrLen(insCodes2));
+ code = insCodes2[ins];
+ break;
+ case 2:
+ assert(ins < ArrLen(insCodes3));
+ code = insCodes3[ins];
+ break;
+ case 3:
+ assert(ins < ArrLen(insCodes4));
+ code = insCodes4[ins];
+ break;
+ case 4:
+ assert(ins < ArrLen(insCodes5));
+ code = insCodes5[ins];
+ break;
+ case 5:
+ assert(ins < ArrLen(insCodes6));
+ code = insCodes6[ins];
+ break;
+ case 6:
+ assert(ins < ArrLen(insCodes7));
+ code = insCodes7[ins];
+ break;
+ case 7:
+ assert(ins < ArrLen(insCodes8));
+ code = insCodes8[ins];
+ break;
+ case 8:
+ assert(ins < ArrLen(insCodes9));
+ code = insCodes9[ins];
+ break;
+ }
+
+ assert((code != BAD_CODE));
+
+ return code;
+}
+
+// true if this 'imm' can be encoded as a input operand to a mov instruction
+/*static*/ bool emitter::emitIns_valid_imm_for_mov(INT64 imm, emitAttr size)
+{
+ // Check for "MOV (wide immediate)".
+ if (canEncodeHalfwordImm(imm, size))
+ return true;
+
+ // Next try the ones-complement form of 'halfword immediate' imm(i16,hw),
+ // namely "MOV (inverted wide immediate)".
+ ssize_t notOfImm = NOT_helper(imm, getBitWidth(size));
+ if (canEncodeHalfwordImm(notOfImm, size))
+ return true;
+
+ // Finally try "MOV (bitmask immediate)" imm(N,r,s)
+ if (canEncodeBitMaskImm(imm, size))
+ return true;
+
+ return false;
+}
+
+// true if this 'imm' can be encoded as a input operand to a vector movi instruction
+/*static*/ bool emitter::emitIns_valid_imm_for_movi(INT64 imm, emitAttr elemsize)
+{
+ if (elemsize == EA_8BYTE)
+ {
+ UINT64 uimm = imm;
+ while (uimm != 0)
+ {
+ INT64 loByte = uimm & 0xFF;
+ if ((loByte == 0) || (loByte == 0xFF))
+ {
+ uimm >>= 8;
+ }
+ else
+ {
+ return false;
+ }
+ }
+ assert(uimm == 0);
+ return true;
+ }
+ else
+ {
+ // First try the standard 'byteShifted immediate' imm(i8,bySh)
+ if (canEncodeByteShiftedImm(imm, elemsize, true))
+ return true;
+
+ // Next try the ones-complement form of the 'immediate' imm(i8,bySh)
+ ssize_t notOfImm = NOT_helper(imm, getBitWidth(elemsize));
+ if (canEncodeByteShiftedImm(notOfImm, elemsize, true))
+ return true;
+ }
+ return false;
+}
+
+// true if this 'imm' can be encoded as a input operand to a fmov instruction
+/*static*/ bool emitter::emitIns_valid_imm_for_fmov(double immDbl)
+{
+ if (canEncodeFloatImm8(immDbl))
+ return true;
+
+ return false;
+}
+
+// true if this 'imm' can be encoded as a input operand to an add instruction
+/*static*/ bool emitter::emitIns_valid_imm_for_add(INT64 imm, emitAttr size)
+{
+ if (unsigned_abs(imm) <= 0x0fff)
+ return true;
+ else if (canEncodeWithShiftImmBy12(imm)) // Try the shifted by 12 encoding
+ return true;
+
+ return false;
+}
+
+// true if this 'imm' can be encoded as a input operand to an non-add/sub alu instruction
+/*static*/ bool emitter::emitIns_valid_imm_for_cmp(INT64 imm, emitAttr size)
+{
+ return emitIns_valid_imm_for_add(imm, size);
+}
+
+// true if this 'imm' can be encoded as a input operand to an non-add/sub alu instruction
+/*static*/ bool emitter::emitIns_valid_imm_for_alu(INT64 imm, emitAttr size)
+{
+ if (canEncodeBitMaskImm(imm, size))
+ return true;
+
+ return false;
+}
+
+// true if this 'imm' can be encoded as the offset in a ldr/str instruction
+/*static*/ bool emitter::emitIns_valid_imm_for_ldst_offset(INT64 imm, emitAttr attr)
+{
+ if (imm == 0)
+ return true; // Encodable using IF_LS_2A
+
+ if ((imm >= -256) && (imm <= 255))
+ return true; // Encodable using IF_LS_2C (or possibly IF_LS_2B)
+
+ if (imm < 0)
+ return false; // not encodable
+
+ emitAttr size = EA_SIZE(attr);
+ unsigned scale = NaturalScale_helper(size);
+ ssize_t mask = size - 1; // the mask of low bits that must be zero to encode the immediate
+
+ if (((imm & mask) == 0) && ((imm >> scale) < 0x1000))
+ return true; // Encodable using IF_LS_2B
+
+ return false; // not encodable
+}
+
+/************************************************************************
+ *
+ * A helper method to return the natural scale for an EA 'size'
+ */
+
+/*static*/ unsigned emitter::NaturalScale_helper(emitAttr size)
+{
+ assert(size == EA_1BYTE || size == EA_2BYTE || size == EA_4BYTE || size == EA_8BYTE || size == EA_16BYTE);
+
+ unsigned result = 0;
+ unsigned utemp = (unsigned)size;
+
+ // Compute log base 2 of utemp (aka 'size')
+ while (utemp > 1)
+ {
+ result++;
+ utemp >>= 1;
+ }
+
+ return result;
+}
+
+/************************************************************************
+ *
+ * A helper method to perform a Rotate-Right shift operation
+ * the source is 'value' and it is rotated right by 'sh' bits
+ * 'value' is considered to be a fixed size 'width' set of bits.
+ *
+ * Example
+ * value is '00001111', sh is 2 and width is 8
+ * result is '11000011'
+ */
+
+/*static*/ UINT64 emitter::ROR_helper(UINT64 value, unsigned sh, unsigned width)
+{
+ assert(width <= 64);
+ // Check that 'value' fits in 'width' bits
+ assert((width == 64) || (value < (1ULL << width)));
+ // We don't support shifts >= width
+ assert(sh < width);
+
+ UINT64 result;
+
+ unsigned rsh = sh;
+ unsigned lsh = width - rsh;
+
+ result = (value >> rsh);
+ result |= (value << lsh);
+
+ if (width < 64)
+ {
+ // mask off any extra bits that we got from the left shift
+ result &= ((1ULL << width) - 1);
+ }
+ return result;
+}
+/************************************************************************
+ *
+ * A helper method to perform a 'NOT' bitwise complement operation.
+ * 'value' is considered to be a fixed size 'width' set of bits.
+ *
+ * Example
+ * value is '01001011', and width is 8
+ * result is '10110100'
+ */
+
+/*static*/ UINT64 emitter::NOT_helper(UINT64 value, unsigned width)
+{
+ assert(width <= 64);
+
+ UINT64 result = ~value;
+
+ if (width < 64)
+ {
+ // Check that 'value' fits in 'width' bits. Don't consider "sign" bits above width.
+ UINT64 maxVal = 1ULL << width;
+ UINT64 lowBitsMask = maxVal - 1;
+ UINT64 signBitsMask = ~lowBitsMask | (1ULL << (width - 1)); // The high bits must be set, and the top bit
+ // (sign bit) must be set.
+ assert((value < maxVal) || ((value & signBitsMask) == signBitsMask));
+
+ // mask off any extra bits that we got from the complement operation
+ result &= lowBitsMask;
+ }
+
+ return result;
+}
+
+/************************************************************************
+ *
+ * A helper method to perform a bit Replicate operation
+ * the source is 'value' with a fixed size 'width' set of bits.
+ * value is replicated to fill out 32 or 64 bits as determined by 'size'.
+ *
+ * Example
+ * value is '11000011' (0xE3), width is 8 and size is EA_8BYTE
+ * result is '11000011 11000011 11000011 11000011 11000011 11000011 11000011 11000011'
+ * 0xE3E3E3E3E3E3E3E3
+ */
+
+/*static*/ UINT64 emitter::Replicate_helper(UINT64 value, unsigned width, emitAttr size)
+{
+ assert(emitter::isValidGeneralDatasize(size));
+
+ unsigned immWidth = (size == EA_8BYTE) ? 64 : 32;
+ assert(width <= immWidth);
+
+ UINT64 result = value;
+ unsigned filledBits = width;
+
+ while (filledBits < immWidth)
+ {
+ value <<= width;
+ result |= value;
+ filledBits += width;
+ }
+ return result;
+}
+
+/************************************************************************
+ *
+ * Convert an imm(N,r,s) into a 64-bit immediate
+ * inputs 'bmImm' a bitMaskImm struct
+ * 'size' specifies the size of the result (64 or 32 bits)
+ */
+
+/*static*/ INT64 emitter::emitDecodeBitMaskImm(const emitter::bitMaskImm bmImm, emitAttr size)
+{
+ assert(isValidGeneralDatasize(size)); // Only EA_4BYTE or EA_8BYTE forms
+
+ unsigned N = bmImm.immN; // read the N,R and S values from the 'bitMaskImm' encoding
+ unsigned R = bmImm.immR;
+ unsigned S = bmImm.immS;
+
+ unsigned elemWidth = 64; // used when immN == 1
+
+ if (bmImm.immN == 0) // find the smaller elemWidth when immN == 0
+ {
+ // Scan S for the highest bit not set
+ elemWidth = 32;
+ for (unsigned bitNum = 5; bitNum > 0; bitNum--)
+ {
+ unsigned oneBit = elemWidth;
+ if ((S & oneBit) == 0)
+ break;
+ elemWidth /= 2;
+ }
+ }
+ else
+ {
+ assert(size == EA_8BYTE);
+ }
+
+ unsigned maskSR = elemWidth - 1;
+
+ S &= maskSR;
+ R &= maskSR;
+
+ // encoding for S is one less than the number of consecutive one bits
+ S++; // Number of consecutive ones to generate in 'welem'
+
+ // At this point:
+ //
+ // 'elemWidth' is the number of bits that we will use for the ROR and Replicate operations
+ // 'S' is the number of consecutive 1 bits for the immediate
+ // 'R' is the number of bits that we will Rotate Right the immediate
+ // 'size' selects the final size of the immedate that we return (64 or 32 bits)
+
+ assert(S < elemWidth); // 'elemWidth' consecutive one's is a reserved encoding
+
+ UINT64 welem;
+ UINT64 wmask;
+
+ welem = (1ULL << S) - 1;
+
+ wmask = ROR_helper(welem, R, elemWidth);
+ wmask = Replicate_helper(wmask, elemWidth, size);
+
+ return wmask;
+}
+
+/*****************************************************************************
+ *
+ * Check if an immediate can use the left shifted by 12 bits encoding
+ */
+
+/*static*/ bool emitter::canEncodeWithShiftImmBy12(INT64 imm)
+{
+ if (imm < 0)
+ {
+ imm = -imm; // convert to unsigned
+ }
+
+ if (imm < 0)
+ {
+ return false; // Must be MIN_INT64
+ }
+
+ if ((imm & 0xfff) != 0) // Now the low 12 bits all have to be zero
+ {
+ return false;
+ }
+
+ imm >>= 12; // shift right by 12 bits
+
+ return (imm <= 0x0fff); // Does it fit in 12 bits
+}
+
+/*****************************************************************************
+ *
+ * Normalize the 'imm' so that the upper bits, as defined by 'size' are zero
+ */
+
+/*static*/ INT64 emitter::normalizeImm64(INT64 imm, emitAttr size)
+{
+ unsigned immWidth = getBitWidth(size);
+ INT64 result = imm;
+
+ if (immWidth < 64)
+ {
+ // Check that 'imm' fits in 'immWidth' bits. Don't consider "sign" bits above width.
+ INT64 maxVal = 1LL << immWidth;
+ INT64 lowBitsMask = maxVal - 1;
+ INT64 hiBitsMask = ~lowBitsMask;
+ INT64 signBitsMask =
+ hiBitsMask | (1LL << (immWidth - 1)); // The high bits must be set, and the top bit (sign bit) must be set.
+ assert((imm < maxVal) || ((imm & signBitsMask) == signBitsMask));
+
+ // mask off the hiBits
+ result &= lowBitsMask;
+ }
+ return result;
+}
+
+/*****************************************************************************
+ *
+ * Normalize the 'imm' so that the upper bits, as defined by 'size' are zero
+ */
+
+/*static*/ INT32 emitter::normalizeImm32(INT32 imm, emitAttr size)
+{
+ unsigned immWidth = getBitWidth(size);
+ INT32 result = imm;
+
+ if (immWidth < 32)
+ {
+ // Check that 'imm' fits in 'immWidth' bits. Don't consider "sign" bits above width.
+ INT32 maxVal = 1 << immWidth;
+ INT32 lowBitsMask = maxVal - 1;
+ INT32 hiBitsMask = ~lowBitsMask;
+ INT32 signBitsMask = hiBitsMask | (1 << (immWidth - 1)); // The high bits must be set, and the top bit
+ // (sign bit) must be set.
+ assert((imm < maxVal) || ((imm & signBitsMask) == signBitsMask));
+
+ // mask off the hiBits
+ result &= lowBitsMask;
+ }
+ return result;
+}
+
+/************************************************************************
+ *
+ * returns true if 'imm' of 'size bits (32/64) can be encoded
+ * using the ARM64 'bitmask immediate' form.
+ * When a non-null value is passed for 'wbBMI' then this method
+ * writes back the 'N','S' and 'R' values use to encode this immediate
+ *
+ */
+
+/*static*/ bool emitter::canEncodeBitMaskImm(INT64 imm, emitAttr size, emitter::bitMaskImm* wbBMI)
+{
+ assert(isValidGeneralDatasize(size)); // Only EA_4BYTE or EA_8BYTE forms
+
+ unsigned immWidth = (size == EA_8BYTE) ? 64 : 32;
+ unsigned maxLen = (size == EA_8BYTE) ? 6 : 5;
+
+ imm = normalizeImm64(imm, size);
+
+ // Starting with len=1, elemWidth is 2 bits
+ // len=2, elemWidth is 4 bits
+ // len=3, elemWidth is 8 bits
+ // len=4, elemWidth is 16 bits
+ // len=5, elemWidth is 32 bits
+ // (optionally) len=6, elemWidth is 64 bits
+ //
+ for (unsigned len = 1; (len <= maxLen); len++)
+ {
+ unsigned elemWidth = 1 << len;
+ UINT64 elemMask = ((UINT64)-1) >> (64 - elemWidth);
+ UINT64 tempImm = (UINT64)imm; // A working copy of 'imm' that we can mutate
+ UINT64 elemVal = tempImm & elemMask; // The low 'elemWidth' bits of 'imm'
+
+ // Check for all 1's or 0's as these can't be encoded
+ if ((elemVal == 0) || (elemVal == elemMask))
+ continue;
+
+ // 'checkedBits' is the count of bits that are known to match 'elemVal' when replicated
+ unsigned checkedBits = elemWidth; // by definition the first 'elemWidth' bits match
+
+ // Now check to see if each of the next bits match...
+ //
+ while (checkedBits < immWidth)
+ {
+ tempImm >>= elemWidth;
+
+ UINT64 nextElem = tempImm & elemMask;
+ if (nextElem != elemVal)
+ {
+ // Not matching, exit this loop and checkedBits will not be equal to immWidth
+ break;
+ }
+
+ // The 'nextElem' is matching, so increment 'checkedBits'
+ checkedBits += elemWidth;
+ }
+
+ // Did the full immediate contain bits that can be formed by repeating 'elemVal'?
+ if (checkedBits == immWidth)
+ {
+ // We are not quite done, since the only values that we can encode as a
+ // 'bitmask immediate' are those that can be formed by starting with a
+ // bit string of 0*1* that is rotated by some number of bits.
+ //
+ // We check to see if 'elemVal' can be formed using these restrictions.
+ //
+ // Observation:
+ // Rotating by one bit any value that passes these restrictions
+ // can be xor-ed with the original value and will result it a string
+ // of bits that have exactly two 1 bits: 'elemRorXor'
+ // Further the distance between the two one bits tells us the value
+ // of S and the location of the 1 bits tells us the value of R
+ //
+ // Some examples: (immWidth is 8)
+ //
+ // S=4,R=0 S=5,R=3 S=3,R=6
+ // elemVal: 00001111 11100011 00011100
+ // elemRor: 10000111 11110001 00001110
+ // elemRorXor: 10001000 00010010 00010010
+ // compute S 45678--- ---5678- ---3210-
+ // compute R 01234567 ---34567 ------67
+
+ UINT64 elemRor = ROR_helper(elemVal, 1, elemWidth); // Rotate 'elemVal' Right by one bit
+ UINT64 elemRorXor = elemVal ^ elemRor; // Xor elemVal and elemRor
+
+ // If we only have a two-bit change in elemROR then we can form a mask for this value
+ unsigned bitCount = 0;
+ UINT64 oneBit = 0x1;
+ unsigned R = elemWidth; // R is shift count for ROR (rotate right shift)
+ unsigned S = 0; // S is number of consecutive one bits
+ int incr = -1;
+
+ // Loop over the 'elemWidth' bits in 'elemRorXor'
+ //
+ for (unsigned bitNum = 0; bitNum < elemWidth; bitNum++)
+ {
+ if (incr == -1)
+ {
+ R--; // We decrement R by one whenever incr is -1
+ }
+ if (bitCount == 1)
+ {
+ S += incr; // We incr/decr S, after we find the first one bit in 'elemRorXor'
+ }
+
+ // Is this bit position a 1 bit in 'elemRorXor'?
+ //
+ if (oneBit & elemRorXor)
+ {
+ bitCount++;
+ // Is this the first 1 bit that we found in 'elemRorXor'?
+ if (bitCount == 1)
+ {
+ // Does this 1 bit represent a transition to zero bits?
+ bool toZeros = ((oneBit & elemVal) != 0);
+ if (toZeros)
+ {
+ // S :: Count down from elemWidth
+ S = elemWidth;
+ incr = -1;
+ }
+ else // this 1 bit represent a transition to one bits.
+ {
+ // S :: Count up from zero
+ S = 0;
+ incr = +1;
+ }
+ }
+ else // bitCount > 1
+ {
+ // We found the second (or third...) 1 bit in 'elemRorXor'
+ incr = 0; // stop decrementing 'R'
+
+ if (bitCount > 2)
+ {
+ // More than 2 transitions from 0/1 in 'elemVal'
+ // This means that 'elemVal' can't be encoded
+ // using a 'bitmask immediate'.
+ //
+ // Furthermore, it will continue to fail
+ // with any larger 'len' that we try.
+ // so just return false.
+ //
+ return false;
+ }
+ }
+ }
+
+ // shift oneBit left by one bit to test the next position
+ oneBit <<= 1;
+ }
+
+ // We expect that bitCount will always be two at this point
+ // but just in case return false for any bad cases.
+ //
+ assert(bitCount == 2);
+ if (bitCount != 2)
+ return false;
+
+ // Perform some sanity checks on the values of 'S' and 'R'
+ assert(S > 0);
+ assert(S < elemWidth);
+ assert(R < elemWidth);
+
+ // Does the caller want us to return the N,R,S encoding values?
+ //
+ if (wbBMI != nullptr)
+ {
+
+ // The encoding used for S is one less than the
+ // number of consecutive one bits
+ S--;
+
+ if (len == 6)
+ {
+ wbBMI->immN = 1;
+ }
+ else
+ {
+ wbBMI->immN = 0;
+ // The encoding used for 'S' here is a bit peculiar.
+ //
+ // The upper bits need to be complemented, followed by a zero bit
+ // then the value of 'S-1'
+ //
+ unsigned upperBitsOfS = 64 - (1 << (len + 1));
+ S |= upperBitsOfS;
+ }
+ wbBMI->immR = R;
+ wbBMI->immS = S;
+
+ // Verify that what we are returning is correct.
+ assert(imm == emitDecodeBitMaskImm(*wbBMI, size));
+ }
+ // Tell the caller that we can successfully encode this immediate
+ // using a 'bitmask immediate'.
+ //
+ return true;
+ }
+ }
+ return false;
+}
+
+/************************************************************************
+ *
+ * Convert a 64-bit immediate into its 'bitmask immediate' representation imm(N,r,s)
+ */
+
+/*static*/ emitter::bitMaskImm emitter::emitEncodeBitMaskImm(INT64 imm, emitAttr size)
+{
+ emitter::bitMaskImm result;
+ result.immNRS = 0;
+
+ bool canEncode = canEncodeBitMaskImm(imm, size, &result);
+ assert(canEncode);
+
+ return result;
+}
+
+/************************************************************************
+ *
+ * Convert an imm(i16,hw) into a 32/64-bit immediate
+ * inputs 'hwImm' a halfwordImm struct
+ * 'size' specifies the size of the result (64 or 32 bits)
+ */
+
+/*static*/ INT64 emitter::emitDecodeHalfwordImm(const emitter::halfwordImm hwImm, emitAttr size)
+{
+ assert(isValidGeneralDatasize(size)); // Only EA_4BYTE or EA_8BYTE forms
+
+ unsigned hw = hwImm.immHW;
+ INT64 val = (INT64)hwImm.immVal;
+
+ assert((hw <= 1) || (size == EA_8BYTE));
+
+ INT64 result = val << (16 * hw);
+ return result;
+}
+
+/************************************************************************
+ *
+ * returns true if 'imm' of 'size' bits (32/64) can be encoded
+ * using the ARM64 'halfword immediate' form.
+ * When a non-null value is passed for 'wbHWI' then this method
+ * writes back the 'immHW' and 'immVal' values use to encode this immediate
+ *
+ */
+
+/*static*/ bool emitter::canEncodeHalfwordImm(INT64 imm, emitAttr size, emitter::halfwordImm* wbHWI)
+{
+ assert(isValidGeneralDatasize(size)); // Only EA_4BYTE or EA_8BYTE forms
+
+ unsigned immWidth = (size == EA_8BYTE) ? 64 : 32;
+ unsigned maxHW = (size == EA_8BYTE) ? 4 : 2;
+
+ // setup immMask to a (EA_4BYTE) 0x00000000_FFFFFFFF or (EA_8BYTE) 0xFFFFFFFF_FFFFFFFF
+ const UINT64 immMask = ((UINT64)-1) >> (64 - immWidth);
+ const INT64 mask16 = (INT64)0xFFFF;
+
+ imm = normalizeImm64(imm, size);
+
+ // Try each of the valid hw shift sizes
+ for (unsigned hw = 0; (hw < maxHW); hw++)
+ {
+ INT64 curMask = mask16 << (hw * 16); // Represents the mask of the bits in the current halfword
+ INT64 checkBits = immMask & ~curMask;
+
+ // Excluding the current halfword (using ~curMask)
+ // does the immediate have zero bits in every other bit that we care about?
+ // note we care about all 64-bits for EA_8BYTE
+ // and we care about the lowest 32 bits for EA_4BYTE
+ //
+ if ((imm & checkBits) == 0)
+ {
+ // Does the caller want us to return the imm(i16,hw) encoding values?
+ //
+ if (wbHWI != nullptr)
+ {
+ INT64 val = ((imm & curMask) >> (hw * 16)) & mask16;
+ wbHWI->immHW = hw;
+ wbHWI->immVal = val;
+
+ // Verify that what we are returning is correct.
+ assert(imm == emitDecodeHalfwordImm(*wbHWI, size));
+ }
+ // Tell the caller that we can successfully encode this immediate
+ // using a 'halfword immediate'.
+ //
+ return true;
+ }
+ }
+ return false;
+}
+
+/************************************************************************
+ *
+ * Convert a 64-bit immediate into its 'halfword immediate' representation imm(i16,hw)
+ */
+
+/*static*/ emitter::halfwordImm emitter::emitEncodeHalfwordImm(INT64 imm, emitAttr size)
+{
+ emitter::halfwordImm result;
+ result.immHWVal = 0;
+
+ bool canEncode = canEncodeHalfwordImm(imm, size, &result);
+ assert(canEncode);
+
+ return result;
+}
+
+/************************************************************************
+ *
+ * Convert an imm(i8,sh) into a 16/32-bit immediate
+ * inputs 'bsImm' a byteShiftedImm struct
+ * 'size' specifies the size of the result (16 or 32 bits)
+ */
+
+/*static*/ INT32 emitter::emitDecodeByteShiftedImm(const emitter::byteShiftedImm bsImm, emitAttr size)
+{
+ bool onesShift = (bsImm.immOnes == 1);
+ unsigned bySh = bsImm.immBY; // Num Bytes to shift 0,1,2,3
+ INT32 val = (INT32)bsImm.immVal; // 8-bit immediate
+ INT32 result = val;
+
+ if (bySh > 0)
+ {
+ assert((size == EA_2BYTE) || (size == EA_4BYTE)); // Only EA_2BYTE or EA_4BYTE forms
+ if (size == EA_2BYTE)
+ {
+ assert(bySh < 2);
+ }
+ else
+ {
+ assert(bySh < 4);
+ }
+
+ result <<= (8 * bySh);
+
+ if (onesShift)
+ {
+ result |= ((1 << (8 * bySh)) - 1);
+ }
+ }
+ return result;
+}
+
+/************************************************************************
+ *
+ * returns true if 'imm' of 'size' bits (16/32) can be encoded
+ * using the ARM64 'byteShifted immediate' form.
+ * When a non-null value is passed for 'wbBSI' then this method
+ * writes back the 'immBY' and 'immVal' values use to encode this immediate
+ *
+ */
+
+/*static*/ bool emitter::canEncodeByteShiftedImm(INT64 imm,
+ emitAttr size,
+ bool allow_MSL,
+ emitter::byteShiftedImm* wbBSI)
+{
+ bool canEncode = false;
+ bool onesShift = false; // true if we use the shifting ones variant
+ unsigned bySh = 0; // number of bytes to shift: 0, 1, 2, 3
+ unsigned imm8 = 0; // immediate to use in the encoding
+
+ imm = normalizeImm64(imm, size);
+
+ if (size == EA_1BYTE)
+ {
+ imm8 = (unsigned)imm;
+ assert(imm8 < 0x100);
+ canEncode = true;
+ }
+ else if (size == EA_8BYTE)
+ {
+ imm8 = (unsigned)imm;
+ assert(imm8 < 0x100);
+ canEncode = true;
+ }
+ else
+ {
+ assert((size == EA_2BYTE) || (size == EA_4BYTE)); // Only EA_2BYTE or EA_4BYTE forms
+
+ unsigned immWidth = (size == EA_4BYTE) ? 32 : 16;
+ unsigned maxBY = (size == EA_4BYTE) ? 4 : 2;
+
+ // setup immMask to a (EA_2BYTE) 0x0000FFFF or (EA_4BYTE) 0xFFFFFFFF
+ const UINT32 immMask = ((UINT32)-1) >> (32 - immWidth);
+ const INT32 mask8 = (INT32)0xFF;
+
+ // Try each of the valid by shift sizes
+ for (bySh = 0; (bySh < maxBY); bySh++)
+ {
+ INT32 curMask = mask8 << (bySh * 8); // Represents the mask of the bits in the current byteShifted
+ INT32 checkBits = immMask & ~curMask;
+ INT32 immCheck = (imm & checkBits);
+
+ // Excluding the current byte (using ~curMask)
+ // does the immediate have zero bits in every other bit that we care about?
+ // or can be use the shifted one variant?
+ // note we care about all 32-bits for EA_4BYTE
+ // and we care about the lowest 16 bits for EA_2BYTE
+ //
+ if (immCheck == 0)
+ {
+ canEncode = true;
+ }
+ if (allow_MSL)
+ {
+ if ((bySh == 1) && (immCheck == 0xFF))
+ {
+ canEncode = true;
+ onesShift = true;
+ }
+ else if ((bySh == 2) && (immCheck == 0xFFFF))
+ {
+ canEncode = true;
+ onesShift = true;
+ }
+ }
+ if (canEncode)
+ {
+ imm8 = (unsigned)(((imm & curMask) >> (bySh * 8)) & mask8);
+ break;
+ }
+ }
+ }
+
+ if (canEncode)
+ {
+ // Does the caller want us to return the imm(i8,bySh) encoding values?
+ //
+ if (wbBSI != nullptr)
+ {
+ wbBSI->immOnes = onesShift;
+ wbBSI->immBY = bySh;
+ wbBSI->immVal = imm8;
+
+ // Verify that what we are returning is correct.
+ assert(imm == emitDecodeByteShiftedImm(*wbBSI, size));
+ }
+ // Tell the caller that we can successfully encode this immediate
+ // using a 'byteShifted immediate'.
+ //
+ return true;
+ }
+ return false;
+}
+
+/************************************************************************
+ *
+ * Convert a 32-bit immediate into its 'byteShifted immediate' representation imm(i8,by)
+ */
+
+/*static*/ emitter::byteShiftedImm emitter::emitEncodeByteShiftedImm(INT64 imm, emitAttr size, bool allow_MSL)
+{
+ emitter::byteShiftedImm result;
+ result.immBSVal = 0;
+
+ bool canEncode = canEncodeByteShiftedImm(imm, size, allow_MSL, &result);
+ assert(canEncode);
+
+ return result;
+}
+
+/************************************************************************
+ *
+ * Convert a 'float 8-bit immediate' into a double.
+ * inputs 'fpImm' a floatImm8 struct
+ */
+
+/*static*/ double emitter::emitDecodeFloatImm8(const emitter::floatImm8 fpImm)
+{
+ unsigned sign = fpImm.immSign;
+ unsigned exp = fpImm.immExp ^ 0x4;
+ unsigned mant = fpImm.immMant + 16;
+ unsigned scale = 16 * 8;
+
+ while (exp > 0)
+ {
+ scale /= 2;
+ exp--;
+ }
+
+ double result = ((double)mant) / ((double)scale);
+ if (sign == 1)
+ {
+ result = -result;
+ }
+
+ return result;
+}
+
+/************************************************************************
+ *
+ * returns true if the 'immDbl' can be encoded using the 'float 8-bit immediate' form.
+ * also returns the encoding if wbFPI is non-null
+ *
+ */
+
+/*static*/ bool emitter::canEncodeFloatImm8(double immDbl, emitter::floatImm8* wbFPI)
+{
+ bool canEncode = false;
+ double val = immDbl;
+
+ int sign = 0;
+ if (val < 0.0)
+ {
+ val = -val;
+ sign = 1;
+ }
+
+ int exp = 0;
+ while ((val < 1.0) && (exp >= -4))
+ {
+ val *= 2.0;
+ exp--;
+ }
+ while ((val >= 2.0) && (exp <= 5))
+ {
+ val *= 0.5;
+ exp++;
+ }
+ exp += 3;
+ val *= 16.0;
+ int ival = (int)val;
+
+ if ((exp >= 0) && (exp <= 7))
+ {
+ if (val == (double)ival)
+ {
+ canEncode = true;
+
+ if (wbFPI != nullptr)
+ {
+ ival -= 16;
+ assert((ival >= 0) && (ival <= 15));
+
+ wbFPI->immSign = sign;
+ wbFPI->immExp = exp ^ 0x4;
+ wbFPI->immMant = ival;
+ unsigned imm8 = wbFPI->immFPIVal;
+ assert((imm8 >= 0) && (imm8 <= 0xff));
+ }
+ }
+ }
+
+ return canEncode;
+}
+
+/************************************************************************
+ *
+ * Convert a double into its 'float 8-bit immediate' representation
+ */
+
+/*static*/ emitter::floatImm8 emitter::emitEncodeFloatImm8(double immDbl)
+{
+ emitter::floatImm8 result;
+ result.immFPIVal = 0;
+
+ bool canEncode = canEncodeFloatImm8(immDbl, &result);
+ assert(canEncode);
+
+ return result;
+}
+
+/*****************************************************************************
+ *
+ * For the given 'ins' returns the reverse instruction
+ * if one exists, otherwise returns INS_INVALID
+ */
+
+/*static*/ instruction emitter::insReverse(instruction ins)
+{
+ switch (ins)
+ {
+ case INS_add:
+ return INS_sub;
+ case INS_adds:
+ return INS_subs;
+
+ case INS_sub:
+ return INS_add;
+ case INS_subs:
+ return INS_adds;
+
+ case INS_cmp:
+ return INS_cmn;
+ case INS_cmn:
+ return INS_cmp;
+
+ case INS_ccmp:
+ return INS_ccmn;
+ case INS_ccmn:
+ return INS_ccmp;
+
+ default:
+ return INS_invalid;
+ }
+}
+
+/*****************************************************************************
+ *
+ * For the given 'datasize' and 'elemsize', make the proper arrangement option
+ * returns the insOpts that specifies the vector register arrangement
+ * if one does not exist returns INS_OPTS_NONE
+ */
+
+/*static*/ insOpts emitter::optMakeArrangement(emitAttr datasize, emitAttr elemsize)
+{
+ insOpts result = INS_OPTS_NONE;
+
+ if (datasize == EA_8BYTE)
+ {
+ switch (elemsize)
+ {
+ case EA_1BYTE:
+ result = INS_OPTS_8B;
+ break;
+ case EA_2BYTE:
+ result = INS_OPTS_4H;
+ break;
+ case EA_4BYTE:
+ result = INS_OPTS_2S;
+ break;
+ case EA_8BYTE:
+ result = INS_OPTS_1D;
+ break;
+ default:
+ // TODO-Cleanup: add unreached() here
+ break;
+ }
+ }
+ else if (datasize == EA_16BYTE)
+ {
+ switch (elemsize)
+ {
+ case EA_1BYTE:
+ result = INS_OPTS_16B;
+ break;
+ case EA_2BYTE:
+ result = INS_OPTS_8H;
+ break;
+ case EA_4BYTE:
+ result = INS_OPTS_4S;
+ break;
+ case EA_8BYTE:
+ result = INS_OPTS_2D;
+ break;
+ default:
+ // TODO-Cleanup: add unreached() here
+ break;
+ }
+ }
+ return result;
+}
+
+/*****************************************************************************
+ *
+ * For the given 'datasize' and arrangement 'opts'
+ * returns true is the pair spcifies a valid arrangement
+ */
+/*static*/ bool emitter::isValidArrangement(emitAttr datasize, insOpts opt)
+{
+ if (datasize == EA_8BYTE)
+ {
+ if ((opt == INS_OPTS_8B) || (opt == INS_OPTS_4H) || (opt == INS_OPTS_2S) || (opt == INS_OPTS_1D))
+ {
+ return true;
+ }
+ }
+ else if (datasize == EA_16BYTE)
+ {
+ if ((opt == INS_OPTS_16B) || (opt == INS_OPTS_8H) || (opt == INS_OPTS_4S) || (opt == INS_OPTS_2D))
+ {
+ return true;
+ }
+ }
+ return false;
+}
+
+// For the given 'arrangement' returns the 'datasize' specified by the vector register arrangement
+// asserts and returns EA_UNKNOWN if an invalid 'arrangement' value is passed
+//
+/*static*/ emitAttr emitter::optGetDatasize(insOpts arrangement)
+{
+ if ((arrangement == INS_OPTS_8B) || (arrangement == INS_OPTS_4H) || (arrangement == INS_OPTS_2S) ||
+ (arrangement == INS_OPTS_1D))
+ {
+ return EA_8BYTE;
+ }
+ else if ((arrangement == INS_OPTS_16B) || (arrangement == INS_OPTS_8H) || (arrangement == INS_OPTS_4S) ||
+ (arrangement == INS_OPTS_2D))
+ {
+ return EA_16BYTE;
+ }
+ else
+ {
+ assert(!" invalid 'arrangement' value");
+ return EA_UNKNOWN;
+ }
+}
+
+// For the given 'arrangement' returns the 'elemsize' specified by the vector register arrangement
+// asserts and returns EA_UNKNOWN if an invalid 'arrangement' value is passed
+//
+/*static*/ emitAttr emitter::optGetElemsize(insOpts arrangement)
+{
+ if ((arrangement == INS_OPTS_8B) || (arrangement == INS_OPTS_16B))
+ {
+ return EA_1BYTE;
+ }
+ else if ((arrangement == INS_OPTS_4H) || (arrangement == INS_OPTS_8H))
+ {
+ return EA_2BYTE;
+ }
+ else if ((arrangement == INS_OPTS_2S) || (arrangement == INS_OPTS_4S))
+ {
+ return EA_4BYTE;
+ }
+ else if ((arrangement == INS_OPTS_1D) || (arrangement == INS_OPTS_2D))
+ {
+ return EA_8BYTE;
+ }
+ else
+ {
+ assert(!" invalid 'arrangement' value");
+ return EA_UNKNOWN;
+ }
+}
+
+// For the given 'arrangement' returns the 'widen-arrangement' specified by the vector register arrangement
+// asserts and returns INS_OPTS_NONE if an invalid 'arrangement' value is passed
+//
+/*static*/ insOpts emitter::optWidenElemsize(insOpts arrangement)
+{
+ if ((arrangement == INS_OPTS_8B) || (arrangement == INS_OPTS_16B))
+ {
+ return INS_OPTS_8H;
+ }
+ else if ((arrangement == INS_OPTS_4H) || (arrangement == INS_OPTS_8H))
+ {
+ return INS_OPTS_4S;
+ }
+ else if ((arrangement == INS_OPTS_2S) || (arrangement == INS_OPTS_4S))
+ {
+ return INS_OPTS_2D;
+ }
+ else
+ {
+ assert(!" invalid 'arrangement' value");
+ return INS_OPTS_NONE;
+ }
+}
+
+// For the given 'conversion' returns the 'dstsize' specified by the conversion option
+/*static*/ emitAttr emitter::optGetDstsize(insOpts conversion)
+{
+ switch (conversion)
+ {
+ case INS_OPTS_S_TO_8BYTE:
+ case INS_OPTS_D_TO_8BYTE:
+ case INS_OPTS_4BYTE_TO_D:
+ case INS_OPTS_8BYTE_TO_D:
+ case INS_OPTS_S_TO_D:
+ case INS_OPTS_H_TO_D:
+
+ return EA_8BYTE;
+
+ case INS_OPTS_S_TO_4BYTE:
+ case INS_OPTS_D_TO_4BYTE:
+ case INS_OPTS_4BYTE_TO_S:
+ case INS_OPTS_8BYTE_TO_S:
+ case INS_OPTS_D_TO_S:
+ case INS_OPTS_H_TO_S:
+
+ return EA_4BYTE;
+
+ case INS_OPTS_S_TO_H:
+ case INS_OPTS_D_TO_H:
+
+ return EA_2BYTE;
+
+ default:
+ assert(!" invalid 'conversion' value");
+ return EA_UNKNOWN;
+ }
+}
+
+// For the given 'conversion' returns the 'srcsize' specified by the conversion option
+/*static*/ emitAttr emitter::optGetSrcsize(insOpts conversion)
+{
+ switch (conversion)
+ {
+ case INS_OPTS_D_TO_8BYTE:
+ case INS_OPTS_D_TO_4BYTE:
+ case INS_OPTS_8BYTE_TO_D:
+ case INS_OPTS_8BYTE_TO_S:
+ case INS_OPTS_D_TO_S:
+ case INS_OPTS_D_TO_H:
+
+ return EA_8BYTE;
+
+ case INS_OPTS_S_TO_8BYTE:
+ case INS_OPTS_S_TO_4BYTE:
+ case INS_OPTS_4BYTE_TO_S:
+ case INS_OPTS_4BYTE_TO_D:
+ case INS_OPTS_S_TO_D:
+ case INS_OPTS_S_TO_H:
+
+ return EA_4BYTE;
+
+ case INS_OPTS_H_TO_S:
+ case INS_OPTS_H_TO_D:
+
+ return EA_2BYTE;
+
+ default:
+ assert(!" invalid 'conversion' value");
+ return EA_UNKNOWN;
+ }
+}
+
+// For the given 'size' and 'index' returns true if it specifies a valid index for a vector register of 'size'
+/*static*/ bool emitter::isValidVectorIndex(emitAttr datasize, emitAttr elemsize, ssize_t index)
+{
+ assert(isValidVectorDatasize(datasize));
+ assert(isValidVectorElemsize(elemsize));
+
+ bool result = false;
+ if (index >= 0)
+ {
+ if (datasize == EA_8BYTE)
+ {
+ switch (elemsize)
+ {
+ case EA_1BYTE:
+ result = (index < 8);
+ break;
+ case EA_2BYTE:
+ result = (index < 4);
+ break;
+ case EA_4BYTE:
+ result = (index < 2);
+ break;
+ case EA_8BYTE:
+ result = (index < 1);
+ break;
+ default:
+ // TODO-Cleanup: add unreached() here
+ break;
+ }
+ }
+ else if (datasize == EA_16BYTE)
+ {
+ switch (elemsize)
+ {
+ case EA_1BYTE:
+ result = (index < 16);
+ break;
+ case EA_2BYTE:
+ result = (index < 8);
+ break;
+ case EA_4BYTE:
+ result = (index < 4);
+ break;
+ case EA_8BYTE:
+ result = (index < 2);
+ break;
+ default:
+ // TODO-Cleanup: add unreached() here
+ break;
+ }
+ }
+ }
+ return result;
+}
+
+/*****************************************************************************
+ *
+ * Add an instruction with no operands.
+ */
+
+void emitter::emitIns(instruction ins)
+{
+ instrDesc* id = emitNewInstrSmall(EA_8BYTE);
+ insFormat fmt = emitInsFormat(ins);
+
+ assert(fmt == IF_SN_0A);
+
+ id->idIns(ins);
+ id->idInsFmt(fmt);
+
+ dispIns(id);
+ appendToCurIG(id);
+}
+
+/*****************************************************************************
+ *
+ * Add an instruction with a single immediate value.
+ */
+
+void emitter::emitIns_I(instruction ins, emitAttr attr, ssize_t imm)
+{
+ insFormat fmt = IF_NONE;
+
+ /* Figure out the encoding format of the instruction */
+ switch (ins)
+ {
+ case INS_brk:
+ if ((imm & 0x0000ffff) == imm)
+ {
+ fmt = IF_SI_0A;
+ }
+ else
+ {
+ assert(!"Instruction cannot be encoded: IF_SI_0A");
+ }
+ break;
+ default:
+ // TODO-Cleanup: add unreached() here
+ break;
+ }
+ assert(fmt != IF_NONE);
+
+ instrDesc* id = emitNewInstrSC(attr, imm);
+
+ id->idIns(ins);
+ id->idInsFmt(fmt);
+
+ dispIns(id);
+ appendToCurIG(id);
+}
+
+/*****************************************************************************
+ *
+ * Add an instruction referencing a single register.
+ */
+
+void emitter::emitIns_R(instruction ins, emitAttr attr, regNumber reg)
+{
+ emitAttr size = EA_SIZE(attr);
+ insFormat fmt = IF_NONE;
+ instrDesc* id = nullptr;
+
+ /* Figure out the encoding format of the instruction */
+ switch (ins)
+ {
+ case INS_br:
+ case INS_ret:
+ assert(isGeneralRegister(reg));
+ id = emitNewInstrSmall(attr);
+ id->idReg1(reg);
+ fmt = IF_BR_1A;
+ break;
+
+ default:
+ unreached();
+ }
+
+ assert(fmt != IF_NONE);
+
+ id->idIns(ins);
+ id->idInsFmt(fmt);
+
+ dispIns(id);
+ appendToCurIG(id);
+}
+
+/*****************************************************************************
+ *
+ * Add an instruction referencing a register and a constant.
+ */
+
+void emitter::emitIns_R_I(instruction ins, emitAttr attr, regNumber reg, ssize_t imm, insOpts opt /* = INS_OPTS_NONE */)
+{
+ emitAttr size = EA_SIZE(attr);
+ emitAttr elemsize = EA_UNKNOWN;
+ insFormat fmt = IF_NONE;
+ bool canEncode = false;
+
+ /* Figure out the encoding format of the instruction */
+ switch (ins)
+ {
+ bitMaskImm bmi;
+ halfwordImm hwi;
+ byteShiftedImm bsi;
+ ssize_t notOfImm;
+
+ case INS_tst:
+ assert(insOptsNone(opt));
+ assert(isGeneralRegister(reg));
+ bmi.immNRS = 0;
+ canEncode = canEncodeBitMaskImm(imm, size, &bmi);
+ if (canEncode)
+ {
+ imm = bmi.immNRS;
+ assert(isValidImmNRS(imm, size));
+ fmt = IF_DI_1C;
+ }
+ break;
+
+ case INS_movk:
+ case INS_movn:
+ case INS_movz:
+ assert(isValidGeneralDatasize(size));
+ assert(insOptsNone(opt)); // No LSL here (you must use emitIns_R_I_I if a shift is needed)
+ assert(isGeneralRegister(reg));
+ assert(isValidUimm16(imm));
+
+ hwi.immHW = 0;
+ hwi.immVal = imm;
+ assert(imm == emitDecodeHalfwordImm(hwi, size));
+
+ imm = hwi.immHWVal;
+ canEncode = true;
+ fmt = IF_DI_1B;
+ break;
+
+ case INS_mov:
+ assert(isValidGeneralDatasize(size));
+ assert(insOptsNone(opt)); // No explicit LSL here
+ // We will automatically determine the shift based upon the imm
+
+ // First try the standard 'halfword immediate' imm(i16,hw)
+ hwi.immHWVal = 0;
+ canEncode = canEncodeHalfwordImm(imm, size, &hwi);
+ if (canEncode)
+ {
+ // uses a movz encoding
+ assert(isGeneralRegister(reg));
+ imm = hwi.immHWVal;
+ assert(isValidImmHWVal(imm, size));
+ fmt = IF_DI_1B;
+ break;
+ }
+
+ // Next try the ones-complement form of 'halfword immediate' imm(i16,hw)
+ notOfImm = NOT_helper(imm, getBitWidth(size));
+ canEncode = canEncodeHalfwordImm(notOfImm, size, &hwi);
+ if (canEncode)
+ {
+ assert(isGeneralRegister(reg));
+ imm = hwi.immHWVal;
+ ins = INS_movn; // uses a movn encoding
+ assert(isValidImmHWVal(imm, size));
+ fmt = IF_DI_1B;
+ break;
+ }
+
+ // Finally try the 'bitmask immediate' imm(N,r,s)
+ bmi.immNRS = 0;
+ canEncode = canEncodeBitMaskImm(imm, size, &bmi);
+ if (canEncode)
+ {
+ assert(isGeneralRegisterOrSP(reg));
+ reg = encodingSPtoZR(reg);
+ imm = bmi.immNRS;
+ assert(isValidImmNRS(imm, size));
+ fmt = IF_DI_1D;
+ break;
+ }
+ else
+ {
+ assert(!"Instruction cannot be encoded: mov imm");
+ }
+
+ break;
+
+ case INS_movi:
+ assert(isValidVectorDatasize(size));
+ assert(isVectorRegister(reg));
+ if (insOptsNone(opt) && (size == EA_8BYTE))
+ {
+ opt = INS_OPTS_1D;
+ }
+ assert(isValidArrangement(size, opt));
+ elemsize = optGetElemsize(opt);
+
+ if (elemsize == EA_8BYTE)
+ {
+ size_t uimm = imm;
+ ssize_t imm8 = 0;
+ unsigned pos = 0;
+ canEncode = true;
+ bool failed = false;
+ while (uimm != 0)
+ {
+ INT64 loByte = uimm & 0xFF;
+ if (((loByte == 0) || (loByte == 0xFF)) && (pos < 8))
+ {
+ if (loByte == 0xFF)
+ {
+ imm8 |= (1 << pos);
+ }
+ uimm >>= 8;
+ pos++;
+ }
+ else
+ {
+ canEncode = false;
+ break;
+ }
+ }
+ imm = imm8;
+ assert(isValidUimm8(imm));
+ fmt = IF_DV_1B;
+ break;
+ }
+ else
+ {
+ // Vector operation
+
+ // No explicit LSL/MSL is used for the immediate
+ // We will automatically determine the shift based upon the value of imm
+
+ // First try the standard 'byteShifted immediate' imm(i8,bySh)
+ bsi.immBSVal = 0;
+ canEncode = canEncodeByteShiftedImm(imm, elemsize, true, &bsi);
+ if (canEncode)
+ {
+ imm = bsi.immBSVal;
+ assert(isValidImmBSVal(imm, size));
+ fmt = IF_DV_1B;
+ break;
+ }
+
+ // Next try the ones-complement form of the 'immediate' imm(i8,bySh)
+ if ((elemsize == EA_2BYTE) || (elemsize == EA_4BYTE)) // Only EA_2BYTE or EA_4BYTE forms
+ {
+ notOfImm = NOT_helper(imm, getBitWidth(elemsize));
+ canEncode = canEncodeByteShiftedImm(notOfImm, elemsize, true, &bsi);
+ if (canEncode)
+ {
+ imm = bsi.immBSVal;
+ ins = INS_mvni; // uses a mvni encoding
+ assert(isValidImmBSVal(imm, size));
+ fmt = IF_DV_1B;
+ break;
+ }
+ }
+ }
+ break;
+
+ case INS_orr:
+ case INS_bic:
+ case INS_mvni:
+ assert(isValidVectorDatasize(size));
+ assert(isVectorRegister(reg));
+ assert(isValidArrangement(size, opt));
+ elemsize = optGetElemsize(opt);
+ assert((elemsize == EA_2BYTE) || (elemsize == EA_4BYTE)); // Only EA_2BYTE or EA_4BYTE forms
+
+ // Vector operation
+
+ // No explicit LSL/MSL is used for the immediate
+ // We will automatically determine the shift based upon the value of imm
+
+ // First try the standard 'byteShifted immediate' imm(i8,bySh)
+ bsi.immBSVal = 0;
+ canEncode = canEncodeByteShiftedImm(imm, elemsize,
+ (ins == INS_mvni), // mvni supports the ones shifting variant (aka MSL)
+ &bsi);
+ if (canEncode)
+ {
+ imm = bsi.immBSVal;
+ assert(isValidImmBSVal(imm, size));
+ fmt = IF_DV_1B;
+ break;
+ }
+ break;
+
+ case INS_cmp:
+ case INS_cmn:
+ assert(insOptsNone(opt));
+ assert(isGeneralRegister(reg));
+
+ if (unsigned_abs(imm) <= 0x0fff)
+ {
+ if (imm < 0)
+ {
+ ins = insReverse(ins);
+ imm = -imm;
+ }
+ assert(isValidUimm12(imm));
+ canEncode = true;
+ fmt = IF_DI_1A;
+ }
+ else if (canEncodeWithShiftImmBy12(imm)) // Try the shifted by 12 encoding
+ {
+ // Encoding will use a 12-bit left shift of the immediate
+ opt = INS_OPTS_LSL12;
+ if (imm < 0)
+ {
+ ins = insReverse(ins);
+ imm = -imm;
+ }
+ assert((imm & 0xfff) == 0);
+ imm >>= 12;
+ assert(isValidUimm12(imm));
+ canEncode = true;
+ fmt = IF_DI_1A;
+ }
+ else
+ {
+ assert(!"Instruction cannot be encoded: IF_DI_1A");
+ }
+ break;
+
+ default:
+ // TODO-Cleanup: add unreached() here
+ break;
+
+ } // end switch (ins)
+
+ assert(canEncode);
+ assert(fmt != IF_NONE);
+
+ instrDesc* id = emitNewInstrSC(attr, imm);
+
+ id->idIns(ins);
+ id->idInsFmt(fmt);
+ id->idInsOpt(opt);
+
+ id->idReg1(reg);
+
+ dispIns(id);
+ appendToCurIG(id);
+}
+
+/*****************************************************************************
+ *
+ * Add an instruction referencing a register and a floating point constant.
+ */
+
+void emitter::emitIns_R_F(
+ instruction ins, emitAttr attr, regNumber reg, double immDbl, insOpts opt /* = INS_OPTS_NONE */)
+
+{
+ emitAttr size = EA_SIZE(attr);
+ emitAttr elemsize = EA_UNKNOWN;
+ insFormat fmt = IF_NONE;
+ ssize_t imm = 0;
+ bool canEncode = false;
+
+ /* Figure out the encoding format of the instruction */
+ switch (ins)
+ {
+ floatImm8 fpi;
+
+ case INS_fcmp:
+ case INS_fcmpe:
+ assert(insOptsNone(opt));
+ assert(isValidVectorElemsizeFloat(size));
+ assert(isVectorRegister(reg));
+ if (immDbl == 0.0)
+ {
+ canEncode = true;
+ fmt = IF_DV_1C;
+ }
+ break;
+
+ case INS_fmov:
+ assert(isVectorRegister(reg));
+ fpi.immFPIVal = 0;
+ canEncode = canEncodeFloatImm8(immDbl, &fpi);
+
+ if (insOptsAnyArrangement(opt))
+ {
+ // Vector operation
+ assert(isValidVectorDatasize(size));
+ assert(isValidArrangement(size, opt));
+ elemsize = optGetElemsize(opt);
+ assert(isValidVectorElemsizeFloat(elemsize));
+ assert(opt != INS_OPTS_1D); // Reserved encoding
+
+ if (canEncode)
+ {
+ imm = fpi.immFPIVal;
+ assert((imm >= 0) && (imm <= 0xff));
+ fmt = IF_DV_1B;
+ }
+ }
+ else
+ {
+ // Scalar operation
+ assert(insOptsNone(opt));
+ assert(isValidVectorElemsizeFloat(size));
+
+ if (canEncode)
+ {
+ imm = fpi.immFPIVal;
+ assert((imm >= 0) && (imm <= 0xff));
+ fmt = IF_DV_1A;
+ }
+ }
+ break;
+
+ default:
+ // TODO-Cleanup: add unreached() here
+ break;
+
+ } // end switch (ins)
+
+ assert(canEncode);
+ assert(fmt != IF_NONE);
+
+ instrDesc* id = emitNewInstrSC(attr, imm);
+
+ id->idIns(ins);
+ id->idInsFmt(fmt);
+ id->idInsOpt(opt);
+
+ id->idReg1(reg);
+
+ dispIns(id);
+ appendToCurIG(id);
+}
+
+/*****************************************************************************
+ *
+ * Add an instruction referencing two registers
+ */
+
+void emitter::emitIns_R_R(
+ instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, insOpts opt /* = INS_OPTS_NONE */)
+{
+ emitAttr size = EA_SIZE(attr);
+ emitAttr elemsize = EA_UNKNOWN;
+ insFormat fmt = IF_NONE;
+
+ /* Figure out the encoding format of the instruction */
+ switch (ins)
+ {
+ case INS_mov:
+ assert(insOptsNone(opt));
+ // Is the mov even necessary?
+ if (reg1 == reg2)
+ {
+ // A mov with a EA_4BYTE has the side-effect of clearing the upper bits
+ // So only eliminate mov instructions that are not clearing the upper bits
+ //
+ if (isGeneralRegisterOrSP(reg1) && (size == EA_8BYTE))
+ {
+ return;
+ }
+ else if (isVectorRegister(reg1) && (size == EA_16BYTE))
+ {
+ return;
+ }
+ }
+
+ // Check for the 'mov' aliases for the vector registers
+ if (isVectorRegister(reg1))
+ {
+ if (isVectorRegister(reg2) && isValidVectorDatasize(size))
+ {
+ return emitIns_R_R_R(INS_mov, size, reg1, reg2, reg2);
+ }
+ else
+ {
+ return emitIns_R_R_I(INS_mov, size, reg1, reg2, 0);
+ }
+ }
+ else
+ {
+ if (isVectorRegister(reg2))
+ {
+ assert(isGeneralRegister(reg1));
+ return emitIns_R_R_I(INS_mov, size, reg1, reg2, 0);
+ }
+ }
+
+ // Is this a MOV to/from SP instruction?
+ if ((reg1 == REG_SP) || (reg2 == REG_SP))
+ {
+ assert(isGeneralRegisterOrSP(reg1));
+ assert(isGeneralRegisterOrSP(reg2));
+ reg1 = encodingSPtoZR(reg1);
+ reg2 = encodingSPtoZR(reg2);
+ fmt = IF_DR_2G;
+ }
+ else
+ {
+ assert(insOptsNone(opt));
+ assert(isGeneralRegister(reg1));
+ assert(isGeneralRegisterOrZR(reg2));
+ fmt = IF_DR_2E;
+ }
+ break;
+
+ case INS_abs:
+ case INS_not:
+ assert(isVectorRegister(reg1));
+ assert(isVectorRegister(reg2));
+ // for 'NOT' we can construct the arrangement: 8B or 16B
+ if ((ins == INS_not) && insOptsNone(opt))
+ {
+ assert(isValidVectorDatasize(size));
+ elemsize = EA_1BYTE;
+ opt = optMakeArrangement(size, elemsize);
+ }
+ if (insOptsNone(opt))
+ {
+ // Scalar operation
+ assert(size == EA_8BYTE); // Only type D is supported
+ fmt = IF_DV_2L;
+ }
+ else
+ {
+ // Vector operation
+ assert(insOptsAnyArrangement(opt));
+ assert(isValidVectorDatasize(size));
+ assert(isValidArrangement(size, opt));
+ elemsize = optGetElemsize(opt);
+ if (ins == INS_not)
+ {
+ assert(elemsize == EA_1BYTE);
+ }
+ fmt = IF_DV_2M;
+ }
+ break;
+
+ case INS_mvn:
+ case INS_neg:
+ if (isVectorRegister(reg1))
+ {
+ assert(isVectorRegister(reg2));
+ // for 'mvn' we can construct the arrangement: 8B or 16b
+ if ((ins == INS_mvn) && insOptsNone(opt))
+ {
+ assert(isValidVectorDatasize(size));
+ elemsize = EA_1BYTE;
+ opt = optMakeArrangement(size, elemsize);
+ }
+ if (insOptsNone(opt))
+ {
+ // Scalar operation
+ assert(size == EA_8BYTE); // Only type D is supported
+ fmt = IF_DV_2L;
+ }
+ else
+ {
+ // Vector operation
+ assert(isValidVectorDatasize(size));
+ assert(isValidArrangement(size, opt));
+ elemsize = optGetElemsize(opt);
+ if (ins == INS_mvn)
+ {
+ assert(elemsize == EA_1BYTE); // Only supports 8B or 16B
+ }
+ fmt = IF_DV_2M;
+ }
+ break;
+ }
+ __fallthrough;
+
+ case INS_negs:
+ assert(insOptsNone(opt));
+ assert(isGeneralRegister(reg1));
+ assert(isGeneralRegisterOrZR(reg2));
+ fmt = IF_DR_2E;
+ break;
+
+ case INS_sxtw:
+ assert(size == EA_8BYTE);
+ __fallthrough;
+
+ case INS_sxtb:
+ case INS_sxth:
+ case INS_uxtb:
+ case INS_uxth:
+ assert(insOptsNone(opt));
+ assert(isValidGeneralDatasize(size));
+ assert(isGeneralRegister(reg1));
+ assert(isGeneralRegister(reg2));
+ fmt = IF_DR_2H;
+ break;
+
+ case INS_sxtl:
+ case INS_sxtl2:
+ case INS_uxtl:
+ case INS_uxtl2:
+ return emitIns_R_R_I(ins, size, reg1, reg2, 0, opt);
+
+ case INS_cls:
+ case INS_clz:
+ case INS_rbit:
+ case INS_rev16:
+ case INS_rev32:
+ case INS_cnt:
+ if (isVectorRegister(reg1))
+ {
+ assert(isVectorRegister(reg2));
+ assert(isValidVectorDatasize(size));
+ assert(isValidArrangement(size, opt));
+ elemsize = optGetElemsize(opt);
+ if ((ins == INS_cls) || (ins == INS_clz))
+ {
+ assert(elemsize != EA_8BYTE); // No encoding for type D
+ }
+ else if (ins == INS_rev32)
+ {
+ assert((elemsize == EA_2BYTE) || (elemsize == EA_1BYTE));
+ }
+ else
+ {
+ assert(elemsize == EA_1BYTE); // Only supports 8B or 16B
+ }
+ fmt = IF_DV_2M;
+ break;
+ }
+ if (ins == INS_cnt)
+ {
+ // Doesn't have general register version(s)
+ break;
+ }
+
+ __fallthrough;
+
+ case INS_rev:
+ assert(insOptsNone(opt));
+ assert(isGeneralRegister(reg1));
+ assert(isGeneralRegister(reg2));
+ if (ins == INS_rev32)
+ {
+ assert(size == EA_8BYTE);
+ }
+ else
+ {
+ assert(isValidGeneralDatasize(size));
+ }
+ fmt = IF_DR_2G;
+ break;
+
+ case INS_rev64:
+ assert(isVectorRegister(reg1));
+ assert(isVectorRegister(reg2));
+ assert(isValidVectorDatasize(size));
+ assert(isValidArrangement(size, opt));
+ elemsize = optGetElemsize(opt);
+ assert(elemsize != EA_8BYTE); // No encoding for type D
+ fmt = IF_DV_2M;
+ break;
+
+ case INS_ldr:
+ case INS_ldrb:
+ case INS_ldrh:
+ case INS_ldrsb:
+ case INS_ldrsh:
+ case INS_ldrsw:
+ case INS_str:
+ case INS_strb:
+ case INS_strh:
+
+ case INS_cmp:
+ case INS_cmn:
+ case INS_tst:
+ assert(insOptsNone(opt));
+ emitIns_R_R_I(ins, attr, reg1, reg2, 0, INS_OPTS_NONE);
+ return;
+
+ case INS_fmov:
+ assert(isValidVectorElemsizeFloat(size));
+
+ // Is the mov even necessary?
+ if (reg1 == reg2)
+ {
+ return;
+ }
+
+ if (isVectorRegister(reg1))
+ {
+ if (isVectorRegister(reg2))
+ {
+ assert(insOptsNone(opt));
+ fmt = IF_DV_2G;
+ }
+ else
+ {
+ assert(isGeneralRegister(reg2));
+
+ // if the optional conversion specifier is not present we calculate it
+ if (opt == INS_OPTS_NONE)
+ {
+ opt = (size == EA_4BYTE) ? INS_OPTS_4BYTE_TO_S : INS_OPTS_8BYTE_TO_D;
+ }
+ assert(insOptsConvertIntToFloat(opt));
+
+ fmt = IF_DV_2I;
+ }
+ }
+ else
+ {
+ assert(isGeneralRegister(reg1));
+ assert(isVectorRegister(reg2));
+
+ // if the optional conversion specifier is not present we calculate it
+ if (opt == INS_OPTS_NONE)
+ {
+ opt = (size == EA_4BYTE) ? INS_OPTS_S_TO_4BYTE : INS_OPTS_D_TO_8BYTE;
+ }
+ assert(insOptsConvertFloatToInt(opt));
+
+ fmt = IF_DV_2H;
+ }
+ break;
+
+ case INS_fcmp:
+ case INS_fcmpe:
+ assert(insOptsNone(opt));
+ assert(isValidVectorElemsizeFloat(size));
+ assert(isVectorRegister(reg1));
+ assert(isVectorRegister(reg2));
+ fmt = IF_DV_2K;
+ break;
+
+ case INS_fcvtns:
+ case INS_fcvtnu:
+ case INS_fcvtas:
+ case INS_fcvtau:
+ case INS_fcvtps:
+ case INS_fcvtpu:
+ case INS_fcvtms:
+ case INS_fcvtmu:
+ case INS_fcvtzs:
+ case INS_fcvtzu:
+ if (insOptsAnyArrangement(opt))
+ {
+ // Vector operation
+ assert(isVectorRegister(reg1));
+ assert(isVectorRegister(reg2));
+ assert(isValidVectorDatasize(size));
+ assert(isValidArrangement(size, opt));
+ elemsize = optGetElemsize(opt);
+ assert(isValidVectorElemsizeFloat(elemsize));
+ assert(opt != INS_OPTS_1D); // Reserved encoding
+ fmt = IF_DV_2A;
+ }
+ else
+ {
+ // Scalar operation
+ assert(isVectorRegister(reg2));
+ if (isVectorRegister(reg1))
+ {
+ assert(insOptsNone(opt));
+ assert(isValidVectorElemsizeFloat(size));
+ fmt = IF_DV_2G;
+ }
+ else
+ {
+ assert(isGeneralRegister(reg1));
+ assert(insOptsConvertFloatToInt(opt));
+ assert(isValidVectorElemsizeFloat(size));
+ fmt = IF_DV_2H;
+ }
+ }
+ break;
+
+ case INS_scvtf:
+ case INS_ucvtf:
+ if (insOptsAnyArrangement(opt))
+ {
+ // Vector operation
+ assert(isVectorRegister(reg1));
+ assert(isVectorRegister(reg2));
+ assert(isValidVectorDatasize(size));
+ assert(isValidArrangement(size, opt));
+ elemsize = optGetElemsize(opt);
+ assert(isValidVectorElemsizeFloat(elemsize));
+ assert(opt != INS_OPTS_1D); // Reserved encoding
+ fmt = IF_DV_2A;
+ }
+ else
+ {
+ // Scalar operation
+ assert(isVectorRegister(reg1));
+ if (isVectorRegister(reg2))
+ {
+ assert(insOptsNone(opt));
+ assert(isValidVectorElemsizeFloat(size));
+ fmt = IF_DV_2G;
+ }
+ else
+ {
+ assert(isGeneralRegister(reg2));
+ assert(insOptsConvertIntToFloat(opt));
+ assert(isValidVectorElemsizeFloat(size));
+ fmt = IF_DV_2I;
+ }
+ }
+ break;
+
+ case INS_fabs:
+ case INS_fneg:
+ case INS_fsqrt:
+ case INS_frinta:
+ case INS_frinti:
+ case INS_frintm:
+ case INS_frintn:
+ case INS_frintp:
+ case INS_frintx:
+ case INS_frintz:
+ if (insOptsAnyArrangement(opt))
+ {
+ // Vector operation
+ assert(isVectorRegister(reg1));
+ assert(isVectorRegister(reg2));
+ assert(isValidVectorDatasize(size));
+ assert(isValidArrangement(size, opt));
+ elemsize = optGetElemsize(opt);
+ assert(isValidVectorElemsizeFloat(elemsize));
+ assert(opt != INS_OPTS_1D); // Reserved encoding
+ fmt = IF_DV_2A;
+ }
+ else
+ {
+ // Scalar operation
+ assert(insOptsNone(opt));
+ assert(isValidVectorElemsizeFloat(size));
+ assert(isVectorRegister(reg1));
+ assert(isVectorRegister(reg2));
+ fmt = IF_DV_2G;
+ }
+ break;
+
+ case INS_fcvt:
+ assert(insOptsConvertFloatToFloat(opt));
+ assert(isValidVectorFcvtsize(size));
+ assert(isVectorRegister(reg1));
+ assert(isVectorRegister(reg2));
+ fmt = IF_DV_2J;
+ break;
+
+ default:
+ // TODO-Cleanup: add unreached() here
+ break;
+
+ } // end switch (ins)
+
+ assert(fmt != IF_NONE);
+
+ instrDesc* id = emitNewInstrSmall(attr);
+
+ id->idIns(ins);
+ id->idInsFmt(fmt);
+ id->idInsOpt(opt);
+
+ id->idReg1(reg1);
+ id->idReg2(reg2);
+
+ dispIns(id);
+ appendToCurIG(id);
+}
+
+/*****************************************************************************
+ *
+ * Add an instruction referencing a register and two constants.
+ */
+
+void emitter::emitIns_R_I_I(
+ instruction ins, emitAttr attr, regNumber reg, ssize_t imm1, ssize_t imm2, insOpts opt /* = INS_OPTS_NONE */)
+{
+ emitAttr size = EA_SIZE(attr);
+ insFormat fmt = IF_NONE;
+ size_t immOut = 0; // composed from imm1 and imm2 and stored in the instrDesc
+
+ /* Figure out the encoding format of the instruction */
+ switch (ins)
+ {
+ bool canEncode;
+ halfwordImm hwi;
+
+ case INS_mov:
+ ins = INS_movz; // INS_mov with LSL is an alias for INS_movz LSL
+ __fallthrough;
+
+ case INS_movk:
+ case INS_movn:
+ case INS_movz:
+ assert(isValidGeneralDatasize(size));
+ assert(isGeneralRegister(reg));
+ assert(isValidUimm16(imm1));
+ assert(insOptsLSL(opt)); // Must be INS_OPTS_LSL
+
+ if (size == EA_8BYTE)
+ {
+ assert((imm2 == 0) || (imm2 == 16) || // shift amount: 0, 16, 32 or 48
+ (imm2 == 32) || (imm2 == 48));
+ }
+ else // EA_4BYTE
+ {
+ assert((imm2 == 0) || (imm2 == 16)); // shift amount: 0 or 16
+ }
+
+ hwi.immHWVal = 0;
+
+ switch (imm2)
+ {
+ case 0:
+ hwi.immHW = 0;
+ canEncode = true;
+ break;
+
+ case 16:
+ hwi.immHW = 1;
+ canEncode = true;
+ break;
+
+ case 32:
+ hwi.immHW = 2;
+ canEncode = true;
+ break;
+
+ case 48:
+ hwi.immHW = 3;
+ canEncode = true;
+ break;
+
+ default:
+ canEncode = false;
+ }
+
+ if (canEncode)
+ {
+ hwi.immVal = imm1;
+
+ immOut = hwi.immHWVal;
+ assert(isValidImmHWVal(immOut, size));
+ fmt = IF_DI_1B;
+ }
+ break;
+
+ default:
+ // TODO-Cleanup: add unreached() here
+ break;
+
+ } // end switch (ins)
+
+ assert(fmt != IF_NONE);
+
+ instrDesc* id = emitNewInstrSC(attr, immOut);
+
+ id->idIns(ins);
+ id->idInsFmt(fmt);
+
+ id->idReg1(reg);
+
+ dispIns(id);
+ appendToCurIG(id);
+}
+
+/*****************************************************************************
+ *
+ * Add an instruction referencing two registers and a constant.
+ */
+
+void emitter::emitIns_R_R_I(
+ instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, ssize_t imm, insOpts opt /* = INS_OPTS_NONE */)
+{
+ emitAttr size = EA_SIZE(attr);
+ emitAttr elemsize = EA_UNKNOWN;
+ insFormat fmt = IF_NONE;
+ bool isLdSt = false;
+ bool isSIMD = false;
+ bool isAddSub = false;
+ bool setFlags = false;
+ unsigned scale = 0;
+ bool unscaledOp = false;
+
+ /* Figure out the encoding format of the instruction */
+ switch (ins)
+ {
+ bool canEncode;
+ bitMaskImm bmi;
+
+ case INS_mov:
+ // Check for the 'mov' aliases for the vector registers
+ assert(insOptsNone(opt));
+ assert(isValidVectorElemsize(size));
+ elemsize = size;
+ assert(isValidVectorIndex(EA_16BYTE, elemsize, imm));
+
+ if (isVectorRegister(reg1))
+ {
+ if (isGeneralRegisterOrZR(reg2))
+ {
+ fmt = IF_DV_2C; // Alias for 'ins'
+ break;
+ }
+ else if (isVectorRegister(reg2))
+ {
+ fmt = IF_DV_2E; // Alias for 'dup'
+ break;
+ }
+ }
+ else // isGeneralRegister(reg1)
+ {
+ assert(isGeneralRegister(reg1));
+ if (isVectorRegister(reg2))
+ {
+ fmt = IF_DV_2B; // Alias for 'umov'
+ break;
+ }
+ }
+ assert(!" invalid INS_mov operands");
+ break;
+
+ case INS_lsl:
+ case INS_lsr:
+ case INS_asr:
+ assert(insOptsNone(opt));
+ assert(isValidGeneralDatasize(size));
+ assert(isGeneralRegister(reg1));
+ assert(isGeneralRegister(reg2));
+ assert(isValidImmShift(imm, size));
+ fmt = IF_DI_2D;
+ break;
+
+ case INS_ror:
+ assert(insOptsNone(opt));
+ assert(isValidGeneralDatasize(size));
+ assert(isGeneralRegister(reg1));
+ assert(isGeneralRegister(reg2));
+ assert(isValidImmShift(imm, size));
+ fmt = IF_DI_2B;
+ break;
+
+ case INS_sshr:
+ case INS_ssra:
+ case INS_srshr:
+ case INS_srsra:
+ case INS_shl:
+ case INS_ushr:
+ case INS_usra:
+ case INS_urshr:
+ case INS_ursra:
+ case INS_sri:
+ case INS_sli:
+ assert(isVectorRegister(reg1));
+ assert(isVectorRegister(reg2));
+ if (insOptsAnyArrangement(opt))
+ {
+ // Vector operation
+ assert(isValidVectorDatasize(size));
+ assert(isValidArrangement(size, opt));
+ elemsize = optGetElemsize(opt);
+ assert(isValidVectorElemsize(elemsize));
+ assert(isValidImmShift(imm, elemsize));
+ assert(opt != INS_OPTS_1D); // Reserved encoding
+ fmt = IF_DV_2O;
+ break;
+ }
+ else
+ {
+ // Scalar operation
+ assert(insOptsNone(opt));
+ assert(size == EA_8BYTE); // only supported size
+ assert(isValidImmShift(imm, size));
+ fmt = IF_DV_2N;
+ }
+ break;
+
+ case INS_sxtl:
+ case INS_uxtl:
+ assert(imm == 0);
+ __fallthrough;
+
+ case INS_shrn:
+ case INS_rshrn:
+ case INS_sshll:
+ case INS_ushll:
+ assert(isVectorRegister(reg1));
+ assert(isVectorRegister(reg2));
+ // Vector operation
+ assert(size == EA_8BYTE);
+ assert(isValidArrangement(size, opt));
+ elemsize = optGetElemsize(opt);
+ assert(elemsize != EA_8BYTE); // Reserved encodings
+ assert(isValidVectorElemsize(elemsize));
+ assert(isValidImmShift(imm, elemsize));
+ fmt = IF_DV_2O;
+ break;
+
+ case INS_sxtl2:
+ case INS_uxtl2:
+ assert(imm == 0);
+ __fallthrough;
+
+ case INS_shrn2:
+ case INS_rshrn2:
+ case INS_sshll2:
+ case INS_ushll2:
+ assert(isVectorRegister(reg1));
+ assert(isVectorRegister(reg2));
+ // Vector operation
+ assert(size == EA_16BYTE);
+ assert(isValidArrangement(size, opt));
+ elemsize = optGetElemsize(opt);
+ assert(elemsize != EA_8BYTE); // Reserved encodings
+ assert(isValidVectorElemsize(elemsize));
+ assert(isValidImmShift(imm, elemsize));
+ fmt = IF_DV_2O;
+ break;
+
+ case INS_mvn:
+ case INS_neg:
+ case INS_negs:
+ assert(isValidGeneralDatasize(size));
+ assert(isGeneralRegister(reg1));
+ assert(isGeneralRegisterOrZR(reg2));
+
+ if (imm == 0)
+ {
+ assert(insOptsNone(opt)); // a zero imm, means no alu shift kind
+
+ fmt = IF_DR_2E;
+ }
+ else
+ {
+ if (ins == INS_mvn)
+ {
+ assert(insOptsAnyShift(opt)); // a non-zero imm, must select shift kind
+ }
+ else // neg or negs
+ {
+ assert(insOptsAluShift(opt)); // a non-zero imm, must select shift kind, can't use ROR
+ }
+ assert(isValidImmShift(imm, size));
+ fmt = IF_DR_2F;
+ }
+ break;
+
+ case INS_tst:
+ assert(isValidGeneralDatasize(size));
+ assert(isGeneralRegisterOrZR(reg1));
+ assert(isGeneralRegister(reg2));
+
+ if (insOptsAnyShift(opt))
+ {
+ assert(isValidImmShift(imm, size) && (imm != 0));
+ fmt = IF_DR_2B;
+ }
+ else
+ {
+ assert(insOptsNone(opt)); // a zero imm, means no alu shift kind
+ assert(imm == 0);
+ fmt = IF_DR_2A;
+ }
+ break;
+
+ case INS_cmp:
+ case INS_cmn:
+ assert(isValidGeneralDatasize(size));
+ assert(isGeneralRegisterOrSP(reg1));
+ assert(isGeneralRegister(reg2));
+
+ reg1 = encodingSPtoZR(reg1);
+ if (insOptsAnyExtend(opt))
+ {
+ assert((imm >= 0) && (imm <= 4));
+
+ fmt = IF_DR_2C;
+ }
+ else if (imm == 0)
+ {
+ assert(insOptsNone(opt)); // a zero imm, means no alu shift kind
+
+ fmt = IF_DR_2A;
+ }
+ else
+ {
+ assert(insOptsAnyShift(opt)); // a non-zero imm, must select shift kind
+ assert(isValidImmShift(imm, size));
+ fmt = IF_DR_2B;
+ }
+ break;
+
+ case INS_ands:
+ case INS_and:
+ case INS_eor:
+ case INS_orr:
+ assert(insOptsNone(opt));
+ assert(isGeneralRegister(reg2));
+ if (ins == INS_ands)
+ {
+ assert(isGeneralRegister(reg1));
+ }
+ else
+ {
+ assert(isGeneralRegisterOrSP(reg1));
+ reg1 = encodingSPtoZR(reg1);
+ }
+
+ bmi.immNRS = 0;
+ canEncode = canEncodeBitMaskImm(imm, size, &bmi);
+ if (canEncode)
+ {
+ imm = bmi.immNRS;
+ assert(isValidImmNRS(imm, size));
+ fmt = IF_DI_2C;
+ }
+ break;
+
+ case INS_dup: // by element, imm selects the element of reg2
+ assert(isVectorRegister(reg1));
+ if (isVectorRegister(reg2))
+ {
+ if (insOptsAnyArrangement(opt))
+ {
+ // Vector operation
+ assert(isValidVectorDatasize(size));
+ assert(isValidArrangement(size, opt));
+ elemsize = optGetElemsize(opt);
+ assert(isValidVectorElemsize(elemsize));
+ assert(isValidVectorIndex(size, elemsize, imm));
+ assert(opt != INS_OPTS_1D); // Reserved encoding
+ fmt = IF_DV_2D;
+ break;
+ }
+ else
+ {
+ // Scalar operation
+ assert(insOptsNone(opt));
+ elemsize = size;
+ assert(isValidVectorElemsize(elemsize));
+ assert(isValidVectorIndex(EA_16BYTE, elemsize, imm));
+ fmt = IF_DV_2E;
+ break;
+ }
+ }
+ __fallthrough;
+
+ case INS_ins: // (MOV from general)
+ assert(insOptsNone(opt));
+ assert(isValidVectorElemsize(size));
+ assert(isVectorRegister(reg1));
+ assert(isGeneralRegisterOrZR(reg2));
+ elemsize = size;
+ assert(isValidVectorIndex(EA_16BYTE, elemsize, imm));
+ fmt = IF_DV_2C;
+ break;
+
+ case INS_umov: // (MOV to general)
+ assert(insOptsNone(opt));
+ assert(isValidVectorElemsize(size));
+ assert(isGeneralRegister(reg1));
+ assert(isVectorRegister(reg2));
+ elemsize = size;
+ assert(isValidVectorIndex(EA_16BYTE, elemsize, imm));
+ fmt = IF_DV_2B;
+ break;
+
+ case INS_smov:
+ assert(insOptsNone(opt));
+ assert(isValidVectorElemsize(size));
+ assert(size != EA_8BYTE); // no encoding, use INS_umov
+ assert(isGeneralRegister(reg1));
+ assert(isVectorRegister(reg2));
+ elemsize = size;
+ assert(isValidVectorIndex(EA_16BYTE, elemsize, imm));
+ fmt = IF_DV_2B;
+ break;
+
+ case INS_add:
+ case INS_sub:
+ setFlags = false;
+ isAddSub = true;
+ break;
+
+ case INS_adds:
+ case INS_subs:
+ setFlags = true;
+ isAddSub = true;
+ break;
+
+ case INS_ldrsb:
+ case INS_ldursb:
+ // 'size' specifies how we sign-extend into 4 or 8 bytes of the target register
+ assert(isValidGeneralDatasize(size));
+ unscaledOp = (ins == INS_ldursb);
+ scale = 0;
+ isLdSt = true;
+ break;
+
+ case INS_ldrsh:
+ case INS_ldursh:
+ // 'size' specifies how we sign-extend into 4 or 8 bytes of the target register
+ assert(isValidGeneralDatasize(size));
+ unscaledOp = (ins == INS_ldursh);
+ scale = 1;
+ isLdSt = true;
+ break;
+
+ case INS_ldrsw:
+ case INS_ldursw:
+ // 'size' specifies how we sign-extend into 4 or 8 bytes of the target register
+ assert(size == EA_8BYTE);
+ unscaledOp = (ins == INS_ldursw);
+ scale = 2;
+ isLdSt = true;
+ break;
+
+ case INS_ldrb:
+ case INS_strb:
+ // size is ignored
+ unscaledOp = false;
+ scale = 0;
+ isLdSt = true;
+ break;
+
+ case INS_ldurb:
+ case INS_sturb:
+ // size is ignored
+ unscaledOp = true;
+ scale = 0;
+ isLdSt = true;
+ break;
+
+ case INS_ldrh:
+ case INS_strh:
+ // size is ignored
+ unscaledOp = false;
+ scale = 1;
+ isLdSt = true;
+ break;
+
+ case INS_ldurh:
+ case INS_sturh:
+ // size is ignored
+ unscaledOp = true;
+ scale = 0;
+ isLdSt = true;
+ break;
+
+ case INS_ldr:
+ case INS_str:
+ // Is the target a vector register?
+ if (isVectorRegister(reg1))
+ {
+ assert(isValidVectorLSDatasize(size));
+ assert(isGeneralRegisterOrSP(reg2));
+ isSIMD = true;
+ }
+ else
+ {
+ assert(isValidGeneralDatasize(size));
+ }
+ unscaledOp = false;
+ scale = NaturalScale_helper(size);
+ isLdSt = true;
+ break;
+
+ case INS_ldur:
+ case INS_stur:
+ // Is the target a vector register?
+ if (isVectorRegister(reg1))
+ {
+ assert(isValidVectorLSDatasize(size));
+ assert(isGeneralRegisterOrSP(reg2));
+ isSIMD = true;
+ }
+ else
+ {
+ assert(isValidGeneralDatasize(size));
+ }
+ unscaledOp = true;
+ scale = 0;
+ isLdSt = true;
+ break;
+
+ default:
+ // TODO-Cleanup: add unreached() here
+ break;
+
+ } // end switch (ins)
+
+ if (isLdSt)
+ {
+ assert(!isAddSub);
+
+ if (isSIMD)
+ {
+ assert(isValidVectorLSDatasize(size));
+ assert(isVectorRegister(reg1));
+ assert((scale >= 0) && (scale <= 4));
+ }
+ else
+ {
+ assert(isValidGeneralLSDatasize(size));
+ assert(isGeneralRegisterOrZR(reg1));
+ assert((scale >= 0) && (scale <= 3));
+ }
+
+ assert(isGeneralRegisterOrSP(reg2));
+
+ // Load/Store reserved encodings:
+ if (insOptsIndexed(opt))
+ {
+ assert(reg1 != reg2);
+ }
+
+ reg2 = encodingSPtoZR(reg2);
+
+ ssize_t mask = (1 << scale) - 1; // the mask of low bits that must be zero to encode the immediate
+ if (imm == 0)
+ {
+ assert(insOptsNone(opt)); // PRE/POST Index doesn't make sense with an immediate of zero
+
+ fmt = IF_LS_2A;
+ }
+ else if (insOptsIndexed(opt) || unscaledOp || (imm < 0) || ((imm & mask) != 0))
+ {
+ if ((imm >= -256) && (imm <= 255))
+ {
+ fmt = IF_LS_2C;
+ }
+ else
+ {
+ assert(!"Instruction cannot be encoded: IF_LS_2C");
+ }
+ }
+ else if (imm > 0)
+ {
+ assert(insOptsNone(opt));
+ assert(!unscaledOp);
+
+ if (((imm & mask) == 0) && ((imm >> scale) < 0x1000))
+ {
+ imm >>= scale; // The immediate is scaled by the size of the ld/st
+
+ fmt = IF_LS_2B;
+ }
+ else
+ {
+ assert(!"Instruction cannot be encoded: IF_LS_2B");
+ }
+ }
+ }
+ else if (isAddSub)
+ {
+ assert(!isLdSt);
+ assert(insOptsNone(opt));
+
+ if (setFlags) // Can't encode SP with setFlags
+ {
+ assert(isGeneralRegister(reg1));
+ assert(isGeneralRegister(reg2));
+ }
+ else
+ {
+ assert(isGeneralRegisterOrSP(reg1));
+ assert(isGeneralRegisterOrSP(reg2));
+
+ // Is it just a mov?
+ if (imm == 0)
+ {
+ // Is the mov even necessary?
+ if (reg1 != reg2)
+ {
+ emitIns_R_R(INS_mov, attr, reg1, reg2);
+ }
+ return;
+ }
+
+ reg1 = encodingSPtoZR(reg1);
+ reg2 = encodingSPtoZR(reg2);
+ }
+
+ if (unsigned_abs(imm) <= 0x0fff)
+ {
+ if (imm < 0)
+ {
+ ins = insReverse(ins);
+ imm = -imm;
+ }
+ assert(isValidUimm12(imm));
+ fmt = IF_DI_2A;
+ }
+ else if (canEncodeWithShiftImmBy12(imm)) // Try the shifted by 12 encoding
+ {
+ // Encoding will use a 12-bit left shift of the immediate
+ opt = INS_OPTS_LSL12;
+ if (imm < 0)
+ {
+ ins = insReverse(ins);
+ imm = -imm;
+ }
+ assert((imm & 0xfff) == 0);
+ imm >>= 12;
+ assert(isValidUimm12(imm));
+ fmt = IF_DI_2A;
+ }
+ else
+ {
+ assert(!"Instruction cannot be encoded: IF_DI_2A");
+ }
+ }
+
+ assert(fmt != IF_NONE);
+
+ instrDesc* id = emitNewInstrSC(attr, imm);
+
+ id->idIns(ins);
+ id->idInsFmt(fmt);
+ id->idInsOpt(opt);
+
+ id->idReg1(reg1);
+ id->idReg2(reg2);
+
+ dispIns(id);
+ appendToCurIG(id);
+}
+
+/*****************************************************************************
+*
+* Add an instruction referencing two registers and a constant.
+* Also checks for a large immediate that needs a second instruction
+* and will load it in reg1
+*
+* - Supports instructions: add, adds, sub, subs, and, ands, eor and orr
+* - Requires that reg1 is a general register and not SP or ZR
+* - Requires that reg1 != reg2
+*/
+void emitter::emitIns_R_R_Imm(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, ssize_t imm)
+{
+ assert(isGeneralRegister(reg1));
+ assert(reg1 != reg2);
+
+ bool immFits = true;
+
+ switch (ins)
+ {
+ case INS_add:
+ case INS_adds:
+ case INS_sub:
+ case INS_subs:
+ immFits = emitter::emitIns_valid_imm_for_add(imm, attr);
+ break;
+
+ case INS_ands:
+ case INS_and:
+ case INS_eor:
+ case INS_orr:
+ immFits = emitter::emitIns_valid_imm_for_alu(imm, attr);
+ break;
+
+ default:
+ assert(!"Unsupported instruction in emitIns_R_R_Imm");
+ }
+
+ if (immFits)
+ {
+ emitIns_R_R_I(ins, attr, reg1, reg2, imm);
+ }
+ else
+ {
+ // Load 'imm' into the reg1 register
+ // then issue: 'ins' reg1, reg2, reg1
+ //
+ codeGen->instGen_Set_Reg_To_Imm(attr, reg1, imm);
+ emitIns_R_R_R(ins, attr, reg1, reg2, reg1);
+ }
+}
+
+/*****************************************************************************
+ *
+ * Add an instruction referencing three registers.
+ */
+
+void emitter::emitIns_R_R_R(
+ instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, regNumber reg3, insOpts opt) /* = INS_OPTS_NONE */
+{
+ emitAttr size = EA_SIZE(attr);
+ emitAttr elemsize = EA_UNKNOWN;
+ insFormat fmt = IF_NONE;
+
+ /* Figure out the encoding format of the instruction */
+ switch (ins)
+ {
+ case INS_lsl:
+ case INS_lsr:
+ case INS_asr:
+ case INS_ror:
+ case INS_adc:
+ case INS_adcs:
+ case INS_sbc:
+ case INS_sbcs:
+ case INS_udiv:
+ case INS_sdiv:
+ case INS_mneg:
+ case INS_smull:
+ case INS_smnegl:
+ case INS_smulh:
+ case INS_umull:
+ case INS_umnegl:
+ case INS_umulh:
+ case INS_lslv:
+ case INS_lsrv:
+ case INS_asrv:
+ case INS_rorv:
+ assert(insOptsNone(opt));
+ assert(isValidGeneralDatasize(size));
+ assert(isGeneralRegister(reg1));
+ assert(isGeneralRegister(reg2));
+ assert(isGeneralRegister(reg3));
+ fmt = IF_DR_3A;
+ break;
+
+ case INS_mul:
+ if (insOptsNone(opt))
+ {
+ // general register
+ assert(isValidGeneralDatasize(size));
+ assert(isGeneralRegister(reg1));
+ assert(isGeneralRegister(reg2));
+ assert(isGeneralRegister(reg3));
+ fmt = IF_DR_3A;
+ break;
+ }
+ __fallthrough;
+
+ case INS_mla:
+ case INS_mls:
+ case INS_pmul:
+ assert(insOptsAnyArrangement(opt));
+ assert(isVectorRegister(reg1));
+ assert(isVectorRegister(reg2));
+ assert(isVectorRegister(reg3));
+ assert(isValidVectorDatasize(size));
+ assert(isValidArrangement(size, opt));
+ elemsize = optGetElemsize(opt);
+ if (ins == INS_pmul)
+ {
+ assert(elemsize == EA_1BYTE); // only supports 8B or 16B
+ }
+ else // INS_mul, INS_mla, INS_mls
+ {
+ assert(elemsize != EA_8BYTE); // can't use 2D or 1D
+ }
+ fmt = IF_DV_3A;
+ break;
+
+ case INS_add:
+ case INS_sub:
+ if (isVectorRegister(reg1))
+ {
+ assert(isVectorRegister(reg2));
+ assert(isVectorRegister(reg3));
+
+ if (insOptsAnyArrangement(opt))
+ {
+ // Vector operation
+ assert(opt != INS_OPTS_1D); // Reserved encoding
+ assert(isValidVectorDatasize(size));
+ assert(isValidArrangement(size, opt));
+ fmt = IF_DV_3A;
+ }
+ else
+ {
+ // Scalar operation
+ assert(insOptsNone(opt));
+ assert(size == EA_8BYTE);
+ fmt = IF_DV_3E;
+ }
+ break;
+ }
+ __fallthrough;
+
+ case INS_adds:
+ case INS_subs:
+ emitIns_R_R_R_I(ins, attr, reg1, reg2, reg3, 0, INS_OPTS_NONE);
+ return;
+
+ case INS_saba:
+ case INS_sabd:
+ case INS_uaba:
+ case INS_uabd:
+ assert(isVectorRegister(reg1));
+ assert(isVectorRegister(reg2));
+ assert(isVectorRegister(reg3));
+ assert(insOptsAnyArrangement(opt));
+
+ // Vector operation
+ assert(isValidVectorDatasize(size));
+ assert(isValidArrangement(size, opt));
+ elemsize = optGetElemsize(opt);
+ assert(elemsize != EA_8BYTE); // can't use 2D or 1D
+
+ fmt = IF_DV_3A;
+ break;
+
+ case INS_mov:
+ assert(isVectorRegister(reg1));
+ assert(isVectorRegister(reg2));
+ assert(reg2 == reg3);
+ assert(isValidVectorDatasize(size));
+ // INS_mov is an alias for INS_orr (vector register)
+ if (opt == INS_OPTS_NONE)
+ {
+ elemsize = EA_1BYTE;
+ opt = optMakeArrangement(size, elemsize);
+ }
+ assert(isValidArrangement(size, opt));
+ fmt = IF_DV_3C;
+ break;
+
+ case INS_and:
+ case INS_bic:
+ case INS_eor:
+ case INS_orr:
+ case INS_orn:
+ if (isVectorRegister(reg1))
+ {
+ assert(isValidVectorDatasize(size));
+ assert(isVectorRegister(reg2));
+ assert(isVectorRegister(reg3));
+ if (opt == INS_OPTS_NONE)
+ {
+ elemsize = EA_1BYTE;
+ opt = optMakeArrangement(size, elemsize);
+ }
+ assert(isValidArrangement(size, opt));
+ fmt = IF_DV_3C;
+ break;
+ }
+ __fallthrough;
+
+ case INS_ands:
+ case INS_bics:
+ case INS_eon:
+ emitIns_R_R_R_I(ins, attr, reg1, reg2, reg3, 0, INS_OPTS_NONE);
+ return;
+
+ case INS_bsl:
+ case INS_bit:
+ case INS_bif:
+ assert(isValidVectorDatasize(size));
+ assert(isVectorRegister(reg1));
+ assert(isVectorRegister(reg2));
+ assert(isVectorRegister(reg3));
+ if (opt == INS_OPTS_NONE)
+ {
+ elemsize = EA_1BYTE;
+ opt = optMakeArrangement(size, elemsize);
+ }
+ assert(isValidArrangement(size, opt));
+ fmt = IF_DV_3C;
+ break;
+
+ case INS_fadd:
+ case INS_fsub:
+ case INS_fdiv:
+ case INS_fmax:
+ case INS_fmin:
+ case INS_fabd:
+ case INS_fmul:
+ case INS_fmulx:
+ assert(isVectorRegister(reg1));
+ assert(isVectorRegister(reg2));
+ assert(isVectorRegister(reg3));
+ if (insOptsAnyArrangement(opt))
+ {
+ // Vector operation
+ assert(isValidVectorDatasize(size));
+ assert(isValidArrangement(size, opt));
+ elemsize = optGetElemsize(opt);
+ assert(isValidVectorElemsizeFloat(elemsize));
+ assert(opt != INS_OPTS_1D); // Reserved encoding
+ fmt = IF_DV_3B;
+ }
+ else
+ {
+ // Scalar operation
+ assert(insOptsNone(opt));
+ assert(isValidScalarDatasize(size));
+ fmt = IF_DV_3D;
+ }
+ break;
+
+ case INS_fnmul:
+ // Scalar operation
+ assert(insOptsNone(opt));
+ assert(isVectorRegister(reg1));
+ assert(isVectorRegister(reg2));
+ assert(isVectorRegister(reg3));
+ assert(isValidScalarDatasize(size));
+ fmt = IF_DV_3D;
+ break;
+
+ case INS_fmla:
+ case INS_fmls:
+ assert(isVectorRegister(reg1));
+ assert(isVectorRegister(reg2));
+ assert(isVectorRegister(reg3));
+ assert(insOptsAnyArrangement(opt)); // no scalar encoding, use 4-operand 'fmadd' or 'fmsub'
+
+ // Vector operation
+ assert(isValidVectorDatasize(size));
+ assert(isValidArrangement(size, opt));
+ elemsize = optGetElemsize(opt);
+ assert(isValidVectorElemsizeFloat(elemsize));
+ assert(opt != INS_OPTS_1D); // Reserved encoding
+ fmt = IF_DV_3B;
+ break;
+
+ case INS_ldr:
+ case INS_ldrb:
+ case INS_ldrh:
+ case INS_ldrsb:
+ case INS_ldrsh:
+ case INS_ldrsw:
+ case INS_str:
+ case INS_strb:
+ case INS_strh:
+ emitIns_R_R_R_Ext(ins, attr, reg1, reg2, reg3, opt);
+ return;
+
+ case INS_ldp:
+ case INS_ldpsw:
+ case INS_ldnp:
+ case INS_stp:
+ case INS_stnp:
+ emitIns_R_R_R_I(ins, attr, reg1, reg2, reg3, 0);
+ return;
+
+ default:
+ // TODO-Cleanup: add unreached() here
+ break;
+
+ } // end switch (ins)
+
+ assert(fmt != IF_NONE);
+
+ instrDesc* id = emitNewInstr(attr);
+
+ id->idIns(ins);
+ id->idInsFmt(fmt);
+ id->idInsOpt(opt);
+
+ id->idReg1(reg1);
+ id->idReg2(reg2);
+ id->idReg3(reg3);
+
+ dispIns(id);
+ appendToCurIG(id);
+}
+
+/*****************************************************************************
+ *
+ * Add an instruction referencing three registers and a constant.
+ */
+
+void emitter::emitIns_R_R_R_I(instruction ins,
+ emitAttr attr,
+ regNumber reg1,
+ regNumber reg2,
+ regNumber reg3,
+ ssize_t imm,
+ insOpts opt /* = INS_OPTS_NONE */)
+{
+ emitAttr size = EA_SIZE(attr);
+ emitAttr elemsize = EA_UNKNOWN;
+ insFormat fmt = IF_NONE;
+ bool isLdSt = false;
+ bool isSIMD = false;
+ bool isAddSub = false;
+ bool setFlags = false;
+ unsigned scale = 0;
+
+ /* Figure out the encoding format of the instruction */
+ switch (ins)
+ {
+ case INS_extr:
+ assert(insOptsNone(opt));
+ assert(isValidGeneralDatasize(size));
+ assert(isGeneralRegister(reg1));
+ assert(isGeneralRegister(reg2));
+ assert(isGeneralRegister(reg3));
+ assert(isValidImmShift(imm, size));
+ fmt = IF_DR_3E;
+ break;
+
+ case INS_and:
+ case INS_ands:
+ case INS_eor:
+ case INS_orr:
+ case INS_bic:
+ case INS_bics:
+ case INS_eon:
+ case INS_orn:
+ assert(isValidGeneralDatasize(size));
+ assert(isGeneralRegister(reg1));
+ assert(isGeneralRegister(reg2));
+ assert(isGeneralRegister(reg3));
+ assert(isValidImmShift(imm, size));
+ if (imm == 0)
+ {
+ assert(insOptsNone(opt)); // a zero imm, means no shift kind
+ fmt = IF_DR_3A;
+ }
+ else
+ {
+ assert(insOptsAnyShift(opt)); // a non-zero imm, must select shift kind
+ fmt = IF_DR_3B;
+ }
+ break;
+
+ case INS_fmul: // by element, imm[0..3] selects the element of reg3
+ case INS_fmla:
+ case INS_fmls:
+ case INS_fmulx:
+ assert(isVectorRegister(reg1));
+ assert(isVectorRegister(reg2));
+ assert(isVectorRegister(reg3));
+ if (insOptsAnyArrangement(opt))
+ {
+ // Vector operation
+ assert(isValidVectorDatasize(size));
+ assert(isValidArrangement(size, opt));
+ elemsize = optGetElemsize(opt);
+ assert(isValidVectorElemsizeFloat(elemsize));
+ assert(isValidVectorIndex(size, elemsize, imm));
+ assert(opt != INS_OPTS_1D); // Reserved encoding
+ fmt = IF_DV_3BI;
+ }
+ else
+ {
+ // Scalar operation
+ assert(insOptsNone(opt));
+ assert(isValidScalarDatasize(size));
+ elemsize = size;
+ assert(isValidVectorIndex(EA_16BYTE, elemsize, imm));
+ fmt = IF_DV_3DI;
+ }
+ break;
+
+ case INS_mul: // by element, imm[0..7] selects the element of reg3
+ case INS_mla:
+ case INS_mls:
+ assert(isVectorRegister(reg1));
+ assert(isVectorRegister(reg2));
+ assert(isVectorRegister(reg3));
+ // Vector operation
+ assert(insOptsAnyArrangement(opt));
+ assert(isValidVectorDatasize(size));
+ assert(isValidArrangement(size, opt));
+ elemsize = optGetElemsize(opt);
+ assert(isValidVectorIndex(EA_16BYTE, elemsize, imm));
+ // Only has encodings for H or S elemsize
+ assert((elemsize == EA_2BYTE) || (elemsize == EA_4BYTE));
+ // Only has encodings for V0..V15
+ if ((elemsize == EA_2BYTE) && (reg3 >= REG_V16))
+ {
+ noway_assert(!"Invalid reg3");
+ }
+ fmt = IF_DV_3AI;
+ break;
+
+ case INS_add:
+ case INS_sub:
+ setFlags = false;
+ isAddSub = true;
+ break;
+
+ case INS_adds:
+ case INS_subs:
+ setFlags = true;
+ isAddSub = true;
+ break;
+
+ case INS_ldpsw:
+ scale = 2;
+ isLdSt = true;
+ break;
+
+ case INS_ldnp:
+ case INS_stnp:
+ assert(insOptsNone(opt)); // Can't use Pre/Post index on these two instructions
+ __fallthrough;
+
+ case INS_ldp:
+ case INS_stp:
+ // Is the target a vector register?
+ if (isVectorRegister(reg1))
+ {
+ scale = NaturalScale_helper(size);
+ isSIMD = true;
+ }
+ else
+ {
+ scale = (size == EA_8BYTE) ? 3 : 2;
+ }
+ isLdSt = true;
+ break;
+
+ default:
+ // TODO-Cleanup: add unreached() here
+ break;
+
+ } // end switch (ins)
+
+ if (isLdSt)
+ {
+ assert(!isAddSub);
+ assert(isGeneralRegisterOrSP(reg3));
+ assert(insOptsNone(opt) || insOptsIndexed(opt));
+
+ if (isSIMD)
+ {
+ assert(isValidVectorLSPDatasize(size));
+ assert(isVectorRegister(reg1));
+ assert(isVectorRegister(reg2));
+ assert((scale >= 2) && (scale <= 4));
+ }
+ else
+ {
+ assert(isValidGeneralDatasize(size));
+ assert(isGeneralRegisterOrZR(reg1));
+ assert(isGeneralRegisterOrZR(reg2));
+ assert((scale == 2) || (scale == 3));
+ }
+
+ // Load/Store Pair reserved encodings:
+ if (emitInsIsLoad(ins))
+ {
+ assert(reg1 != reg2);
+ }
+ if (insOptsIndexed(opt))
+ {
+ assert(reg1 != reg3);
+ assert(reg2 != reg3);
+ }
+
+ reg3 = encodingSPtoZR(reg3);
+
+ ssize_t mask = (1 << scale) - 1; // the mask of low bits that must be zero to encode the immediate
+ if (imm == 0)
+ {
+ assert(insOptsNone(opt)); // PRE/POST Index doesn't make sense with an immediate of zero
+
+ fmt = IF_LS_3B;
+ }
+ else
+ {
+ if ((imm & mask) == 0)
+ {
+ imm >>= scale; // The immediate is scaled by the size of the ld/st
+
+ if ((imm >= -64) && (imm <= 63))
+ {
+ fmt = IF_LS_3C;
+ }
+ }
+#ifdef DEBUG
+ if (fmt != IF_LS_3C)
+ {
+ assert(!"Instruction cannot be encoded: IF_LS_3C");
+ }
+#endif
+ }
+ }
+ else if (isAddSub)
+ {
+ bool reg2IsSP = (reg2 == REG_SP);
+ assert(!isLdSt);
+ assert(isValidGeneralDatasize(size));
+ assert(isGeneralRegister(reg3));
+
+ if (setFlags || insOptsAluShift(opt)) // Can't encode SP in reg1 with setFlags or AluShift option
+ {
+ assert(isGeneralRegisterOrZR(reg1));
+ }
+ else
+ {
+ assert(isGeneralRegisterOrSP(reg1));
+ reg1 = encodingSPtoZR(reg1);
+ }
+
+ if (insOptsAluShift(opt)) // Can't encode SP in reg2 with AluShift option
+ {
+ assert(isGeneralRegister(reg2));
+ }
+ else
+ {
+ assert(isGeneralRegisterOrSP(reg2));
+ reg2 = encodingSPtoZR(reg2);
+ }
+
+ if (insOptsAnyExtend(opt))
+ {
+ assert((imm >= 0) && (imm <= 4));
+
+ fmt = IF_DR_3C;
+ }
+ else if (insOptsAluShift(opt))
+ {
+ // imm should be non-zero and in [1..63]
+ assert(isValidImmShift(imm, size) && (imm != 0));
+ fmt = IF_DR_3B;
+ }
+ else if (imm == 0)
+ {
+ assert(insOptsNone(opt));
+
+ if (reg2IsSP)
+ {
+ // To encode the SP register as reg2 we must use the IF_DR_3C encoding
+ // and also specify a LSL of zero (imm == 0)
+ opt = INS_OPTS_LSL;
+ fmt = IF_DR_3C;
+ }
+ else
+ {
+ fmt = IF_DR_3A;
+ }
+ }
+ else
+ {
+ assert(!"Instruction cannot be encoded: Add/Sub IF_DR_3A");
+ }
+ }
+ assert(fmt != IF_NONE);
+
+ instrDesc* id = emitNewInstrCns(attr, imm);
+
+ id->idIns(ins);
+ id->idInsFmt(fmt);
+ id->idInsOpt(opt);
+
+ id->idReg1(reg1);
+ id->idReg2(reg2);
+ id->idReg3(reg3);
+
+ dispIns(id);
+ appendToCurIG(id);
+}
+
+/*****************************************************************************
+ *
+ * Add an instruction referencing three registers, with an extend option
+ */
+
+void emitter::emitIns_R_R_R_Ext(instruction ins,
+ emitAttr attr,
+ regNumber reg1,
+ regNumber reg2,
+ regNumber reg3,
+ insOpts opt, /* = INS_OPTS_NONE */
+ int shiftAmount) /* = -1 -- unset */
+{
+ emitAttr size = EA_SIZE(attr);
+ insFormat fmt = IF_NONE;
+ bool isSIMD = false;
+ int scale = -1;
+
+ /* Figure out the encoding format of the instruction */
+ switch (ins)
+ {
+ case INS_ldrb:
+ case INS_ldrsb:
+ case INS_strb:
+ scale = 0;
+ break;
+
+ case INS_ldrh:
+ case INS_ldrsh:
+ case INS_strh:
+ scale = 1;
+ break;
+
+ case INS_ldrsw:
+ scale = 2;
+ break;
+
+ case INS_ldr:
+ case INS_str:
+ // Is the target a vector register?
+ if (isVectorRegister(reg1))
+ {
+ assert(isValidVectorLSDatasize(size));
+ scale = NaturalScale_helper(size);
+ isSIMD = true;
+ }
+ else
+ {
+ assert(isValidGeneralDatasize(size));
+ scale = (size == EA_8BYTE) ? 3 : 2;
+ }
+
+ break;
+
+ default:
+ // TODO-Cleanup: add unreached() here
+ break;
+
+ } // end switch (ins)
+
+ assert(scale != -1);
+ assert(insOptsLSExtend(opt));
+
+ if (isSIMD)
+ {
+ assert(isValidVectorLSDatasize(size));
+ assert(isVectorRegister(reg1));
+ }
+ else
+ {
+ assert(isValidGeneralLSDatasize(size));
+ assert(isGeneralRegisterOrZR(reg1));
+ }
+
+ assert(isGeneralRegisterOrSP(reg2));
+ assert(isGeneralRegister(reg3));
+
+ // Load/Store reserved encodings:
+ if (insOptsIndexed(opt))
+ {
+ assert(reg1 != reg2);
+ }
+
+ if (shiftAmount == -1)
+ {
+ shiftAmount = insOptsLSL(opt) ? scale : 0;
+ }
+ assert((shiftAmount == scale) || (shiftAmount == 0));
+
+ reg2 = encodingSPtoZR(reg2);
+ fmt = IF_LS_3A;
+
+ instrDesc* id = emitNewInstr(attr);
+
+ id->idIns(ins);
+ id->idInsFmt(fmt);
+ id->idInsOpt(opt);
+
+ id->idReg1(reg1);
+ id->idReg2(reg2);
+ id->idReg3(reg3);
+ id->idReg3Scaled(shiftAmount == scale);
+
+ dispIns(id);
+ appendToCurIG(id);
+}
+
+/*****************************************************************************
+ *
+ * Add an instruction referencing two registers and two constants.
+ */
+
+void emitter::emitIns_R_R_I_I(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, int imm1, int imm2)
+{
+ emitAttr size = EA_SIZE(attr);
+ emitAttr elemsize = EA_UNKNOWN;
+ insFormat fmt = IF_NONE;
+ size_t immOut = 0; // composed from imm1 and imm2 and stored in the instrDesc
+
+ /* Figure out the encoding format of the instruction */
+ switch (ins)
+ {
+ int lsb;
+ int width;
+ bitMaskImm bmi;
+
+ case INS_bfm:
+ case INS_sbfm:
+ case INS_ubfm:
+ assert(isGeneralRegister(reg1));
+ assert(isGeneralRegister(reg2));
+ assert(isValidImmShift(imm1, size));
+ assert(isValidImmShift(imm2, size));
+ bmi.immNRS = 0;
+ bmi.immN = (size == EA_8BYTE);
+ bmi.immR = imm1;
+ bmi.immS = imm2;
+ immOut = bmi.immNRS;
+ fmt = IF_DI_2D;
+ break;
+
+ case INS_bfi:
+ case INS_sbfiz:
+ case INS_ubfiz:
+ assert(isGeneralRegister(reg1));
+ assert(isGeneralRegister(reg2));
+ lsb = getBitWidth(size) - imm1;
+ width = imm2 - 1;
+ assert(isValidImmShift(lsb, size));
+ assert(isValidImmShift(width, size));
+ bmi.immNRS = 0;
+ bmi.immN = (size == EA_8BYTE);
+ bmi.immR = lsb;
+ bmi.immS = width;
+ immOut = bmi.immNRS;
+ fmt = IF_DI_2D;
+ break;
+
+ case INS_bfxil:
+ case INS_sbfx:
+ case INS_ubfx:
+ assert(isGeneralRegister(reg1));
+ assert(isGeneralRegister(reg2));
+ lsb = imm1;
+ width = imm2 + imm1 - 1;
+ assert(isValidImmShift(lsb, size));
+ assert(isValidImmShift(width, size));
+ bmi.immNRS = 0;
+ bmi.immN = (size == EA_8BYTE);
+ bmi.immR = imm1;
+ bmi.immS = imm2 + imm1 - 1;
+ immOut = bmi.immNRS;
+ fmt = IF_DI_2D;
+ break;
+
+ case INS_mov:
+ case INS_ins:
+ assert(isVectorRegister(reg1));
+ assert(isVectorRegister(reg2));
+ elemsize = size;
+ assert(isValidVectorElemsize(elemsize));
+ assert(isValidVectorIndex(EA_16BYTE, elemsize, imm1));
+ assert(isValidVectorIndex(EA_16BYTE, elemsize, imm2));
+ immOut = (imm1 << 4) + imm2;
+ fmt = IF_DV_2F;
+ break;
+
+ default:
+ // TODO-Cleanup: add unreached() here
+ break;
+ }
+ assert(fmt != IF_NONE);
+
+ instrDesc* id = emitNewInstrSC(attr, immOut);
+
+ id->idIns(ins);
+ id->idInsFmt(fmt);
+
+ id->idReg1(reg1);
+ id->idReg2(reg2);
+
+ dispIns(id);
+ appendToCurIG(id);
+}
+
+/*****************************************************************************
+ *
+ * Add an instruction referencing four registers.
+ */
+
+void emitter::emitIns_R_R_R_R(
+ instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, regNumber reg3, regNumber reg4)
+{
+ emitAttr size = EA_SIZE(attr);
+ insFormat fmt = IF_NONE;
+
+ /* Figure out the encoding format of the instruction */
+ switch (ins)
+ {
+ case INS_madd:
+ case INS_msub:
+ case INS_smaddl:
+ case INS_smsubl:
+ case INS_umaddl:
+ case INS_umsubl:
+ assert(isValidGeneralDatasize(size));
+ assert(isGeneralRegister(reg1));
+ assert(isGeneralRegister(reg2));
+ assert(isGeneralRegister(reg3));
+ assert(isGeneralRegister(reg4));
+ fmt = IF_DR_4A;
+ break;
+
+ case INS_fmadd:
+ case INS_fmsub:
+ case INS_fnmadd:
+ case INS_fnmsub:
+ // Scalar operation
+ assert(isValidScalarDatasize(size));
+ assert(isVectorRegister(reg1));
+ assert(isVectorRegister(reg2));
+ assert(isVectorRegister(reg3));
+ assert(isVectorRegister(reg4));
+ fmt = IF_DV_4A;
+ break;
+
+ case INS_invalid:
+ fmt = IF_NONE;
+ break;
+
+ default:
+ // TODO-Cleanup: add unreached() here
+ break;
+ }
+ assert(fmt != IF_NONE);
+
+ instrDesc* id = emitNewInstr(attr);
+
+ id->idIns(ins);
+ id->idInsFmt(fmt);
+
+ id->idReg1(reg1);
+ id->idReg2(reg2);
+ id->idReg3(reg3);
+ id->idReg4(reg4);
+
+ dispIns(id);
+ appendToCurIG(id);
+}
+
+/*****************************************************************************
+ *
+ * Add an instruction referencing a register and a condition code
+ */
+
+void emitter::emitIns_R_COND(instruction ins, emitAttr attr, regNumber reg, insCond cond)
+{
+ emitAttr size = EA_SIZE(attr);
+ insFormat fmt = IF_NONE;
+ condFlagsImm cfi;
+ cfi.immCFVal = 0;
+
+ /* Figure out the encoding format of the instruction */
+ switch (ins)
+ {
+ case INS_cset:
+ case INS_csetm:
+ assert(isGeneralRegister(reg));
+ cfi.cond = cond;
+ fmt = IF_DR_1D;
+ break;
+
+ default:
+ // TODO-Cleanup: add unreached() here
+ break;
+
+ } // end switch (ins)
+
+ assert(fmt != IF_NONE);
+ assert(isValidImmCond(cfi.immCFVal));
+
+ instrDesc* id = emitNewInstrSC(attr, cfi.immCFVal);
+
+ id->idIns(ins);
+ id->idInsFmt(fmt);
+ id->idInsOpt(INS_OPTS_NONE);
+
+ id->idReg1(reg);
+
+ dispIns(id);
+ appendToCurIG(id);
+}
+
+/*****************************************************************************
+ *
+ * Add an instruction referencing two registers and a condition code
+ */
+
+void emitter::emitIns_R_R_COND(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, insCond cond)
+{
+ emitAttr size = EA_SIZE(attr);
+ insFormat fmt = IF_NONE;
+ condFlagsImm cfi;
+ cfi.immCFVal = 0;
+
+ /* Figure out the encoding format of the instruction */
+ switch (ins)
+ {
+ case INS_cinc:
+ case INS_cinv:
+ case INS_cneg:
+ assert(isGeneralRegister(reg1));
+ assert(isGeneralRegister(reg2));
+ cfi.cond = cond;
+ fmt = IF_DR_2D;
+ break;
+ default:
+ // TODO-Cleanup: add unreached() here
+ break;
+
+ } // end switch (ins)
+
+ assert(fmt != IF_NONE);
+ assert(isValidImmCond(cfi.immCFVal));
+
+ instrDesc* id = emitNewInstrSC(attr, cfi.immCFVal);
+
+ id->idIns(ins);
+ id->idInsFmt(fmt);
+ id->idInsOpt(INS_OPTS_NONE);
+
+ id->idReg1(reg1);
+ id->idReg2(reg2);
+
+ dispIns(id);
+ appendToCurIG(id);
+}
+
+/*****************************************************************************
+ *
+ * Add an instruction referencing two registers and a condition code
+ */
+
+void emitter::emitIns_R_R_R_COND(
+ instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, regNumber reg3, insCond cond)
+{
+ emitAttr size = EA_SIZE(attr);
+ insFormat fmt = IF_NONE;
+ condFlagsImm cfi;
+ cfi.immCFVal = 0;
+
+ /* Figure out the encoding format of the instruction */
+ switch (ins)
+ {
+ case INS_csel:
+ case INS_csinc:
+ case INS_csinv:
+ case INS_csneg:
+ assert(isGeneralRegister(reg1));
+ assert(isGeneralRegister(reg2));
+ assert(isGeneralRegister(reg3));
+ cfi.cond = cond;
+ fmt = IF_DR_3D;
+ break;
+
+ default:
+ // TODO-Cleanup: add unreached() here
+ break;
+
+ } // end switch (ins)
+
+ assert(fmt != IF_NONE);
+ assert(isValidImmCond(cfi.immCFVal));
+
+ instrDesc* id = emitNewInstr(attr);
+
+ id->idIns(ins);
+ id->idInsFmt(fmt);
+ id->idInsOpt(INS_OPTS_NONE);
+
+ id->idReg1(reg1);
+ id->idReg2(reg2);
+ id->idReg3(reg3);
+ id->idSmallCns(cfi.immCFVal);
+
+ dispIns(id);
+ appendToCurIG(id);
+}
+
+/*****************************************************************************
+ *
+ * Add an instruction referencing two registers the flags and a condition code
+ */
+
+void emitter::emitIns_R_R_FLAGS_COND(
+ instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, insCflags flags, insCond cond)
+{
+ emitAttr size = EA_SIZE(attr);
+ insFormat fmt = IF_NONE;
+ condFlagsImm cfi;
+ cfi.immCFVal = 0;
+
+ /* Figure out the encoding format of the instruction */
+ switch (ins)
+ {
+ case INS_ccmp:
+ case INS_ccmn:
+ assert(isGeneralRegister(reg1));
+ assert(isGeneralRegister(reg2));
+ cfi.flags = flags;
+ cfi.cond = cond;
+ fmt = IF_DR_2I;
+ break;
+ default:
+ // TODO-Cleanup: add unreached() here
+ break;
+ } // end switch (ins)
+
+ assert(fmt != IF_NONE);
+ assert(isValidImmCondFlags(cfi.immCFVal));
+
+ instrDesc* id = emitNewInstrSC(attr, cfi.immCFVal);
+
+ id->idIns(ins);
+ id->idInsFmt(fmt);
+ id->idInsOpt(INS_OPTS_NONE);
+
+ id->idReg1(reg1);
+ id->idReg2(reg2);
+
+ dispIns(id);
+ appendToCurIG(id);
+}
+
+/*****************************************************************************
+ *
+ * Add an instruction referencing a register, an immediate, the flags and a condition code
+ */
+
+void emitter::emitIns_R_I_FLAGS_COND(
+ instruction ins, emitAttr attr, regNumber reg, int imm, insCflags flags, insCond cond)
+{
+ emitAttr size = EA_SIZE(attr);
+ insFormat fmt = IF_NONE;
+ condFlagsImm cfi;
+ cfi.immCFVal = 0;
+
+ /* Figure out the encoding format of the instruction */
+ switch (ins)
+ {
+ case INS_ccmp:
+ case INS_ccmn:
+ assert(isGeneralRegister(reg));
+ if (imm < 0)
+ {
+ ins = insReverse(ins);
+ imm = -imm;
+ }
+ if ((imm >= 0) && (imm <= 31))
+ {
+ cfi.imm5 = imm;
+ cfi.flags = flags;
+ cfi.cond = cond;
+ fmt = IF_DI_1F;
+ }
+ else
+ {
+ assert(!"Instruction cannot be encoded: ccmp/ccmn imm5");
+ }
+ break;
+ default:
+ // TODO-Cleanup: add unreached() here
+ break;
+ } // end switch (ins)
+
+ assert(fmt != IF_NONE);
+ assert(isValidImmCondFlagsImm5(cfi.immCFVal));
+
+ instrDesc* id = emitNewInstrSC(attr, cfi.immCFVal);
+
+ id->idIns(ins);
+ id->idInsFmt(fmt);
+ id->idInsOpt(INS_OPTS_NONE);
+
+ id->idReg1(reg);
+
+ dispIns(id);
+ appendToCurIG(id);
+}
+
+/*****************************************************************************
+ *
+ * Add a memory barrier instruction with a 'barrier' immediate
+ */
+
+void emitter::emitIns_BARR(instruction ins, insBarrier barrier)
+{
+ insFormat fmt = IF_NONE;
+ ssize_t imm = 0;
+
+ /* Figure out the encoding format of the instruction */
+ switch (ins)
+ {
+ case INS_dsb:
+ case INS_dmb:
+ case INS_isb:
+
+ fmt = IF_SI_0B;
+ imm = (ssize_t)barrier;
+ break;
+ default:
+ // TODO-Cleanup: add unreached() here
+ break;
+ } // end switch (ins)
+
+ assert(fmt != IF_NONE);
+
+ instrDesc* id = emitNewInstrSC(EA_8BYTE, imm);
+
+ id->idIns(ins);
+ id->idInsFmt(fmt);
+ id->idInsOpt(INS_OPTS_NONE);
+
+ dispIns(id);
+ appendToCurIG(id);
+}
+
+/*****************************************************************************
+ *
+ * Add an instruction with a static data member operand. If 'size' is 0, the
+ * instruction operates on the address of the static member instead of its
+ * value (e.g. "push offset clsvar", rather than "push dword ptr [clsvar]").
+ */
+
+void emitter::emitIns_C(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fldHnd, int offs)
+{
+ NYI("emitIns_C");
+}
+
+/*****************************************************************************
+ *
+ * Add an instruction referencing stack-based local variable.
+ */
+
+void emitter::emitIns_S(instruction ins, emitAttr attr, int varx, int offs)
+{
+ NYI("emitIns_S");
+}
+
+/*****************************************************************************
+ *
+ * Add an instruction referencing a register and a stack-based local variable.
+ */
+void emitter::emitIns_R_S(instruction ins, emitAttr attr, regNumber reg1, int varx, int offs)
+{
+ emitAttr size = EA_SIZE(attr);
+ insFormat fmt = IF_NONE;
+ int disp = 0;
+ unsigned scale = 0;
+
+ assert(offs >= 0);
+
+ // TODO-ARM64-CQ: use unscaled loads?
+ /* Figure out the encoding format of the instruction */
+ switch (ins)
+ {
+ case INS_strb:
+ case INS_ldrb:
+ case INS_ldrsb:
+ scale = 0;
+ break;
+
+ case INS_strh:
+ case INS_ldrh:
+ case INS_ldrsh:
+ scale = 1;
+ break;
+
+ case INS_ldrsw:
+ scale = 2;
+ break;
+
+ case INS_str:
+ case INS_ldr:
+ assert(isValidGeneralDatasize(size));
+ scale = (size == EA_8BYTE) ? 3 : 2;
+ break;
+
+ case INS_lea:
+ assert(size == EA_8BYTE);
+ scale = 0;
+ break;
+
+ default:
+ NYI("emitIns_R_S"); // FP locals?
+ return;
+
+ } // end switch (ins)
+
+ /* Figure out the variable's frame position */
+ ssize_t imm;
+ int base;
+ bool FPbased;
+
+ base = emitComp->lvaFrameAddress(varx, &FPbased);
+ disp = base + offs;
+ assert((scale >= 0) && (scale <= 3));
+
+ regNumber reg2 = FPbased ? REG_FPBASE : REG_SPBASE;
+ reg2 = encodingSPtoZR(reg2);
+
+ if (ins == INS_lea)
+ {
+ if (disp >= 0)
+ {
+ ins = INS_add;
+ imm = disp;
+ }
+ else
+ {
+ ins = INS_sub;
+ imm = -disp;
+ }
+
+ if (imm <= 0x0fff)
+ {
+ fmt = IF_DI_2A; // add reg1,reg2,#disp
+ }
+ else
+ {
+ regNumber rsvdReg = codeGen->rsGetRsvdReg();
+ codeGen->instGen_Set_Reg_To_Imm(size, rsvdReg, imm);
+ fmt = IF_DR_3A; // add reg1,reg2,rsvdReg
+ }
+ }
+ else
+ {
+ bool useRegForImm = false;
+ ssize_t mask = (1 << scale) - 1; // the mask of low bits that must be zero to encode the immediate
+
+ imm = disp;
+ if (imm == 0)
+ {
+ fmt = IF_LS_2A;
+ }
+ else if ((imm < 0) || ((imm & mask) != 0))
+ {
+ if ((imm >= -256) && (imm <= 255))
+ {
+ fmt = IF_LS_2C;
+ }
+ else
+ {
+ useRegForImm = true;
+ }
+ }
+ else if (imm > 0)
+ {
+ if (((imm & mask) == 0) && ((imm >> scale) < 0x1000))
+ {
+ imm >>= scale; // The immediate is scaled by the size of the ld/st
+
+ fmt = IF_LS_2B;
+ }
+ else
+ {
+ useRegForImm = true;
+ }
+ }
+
+ if (useRegForImm)
+ {
+ regNumber rsvdReg = codeGen->rsGetRsvdReg();
+ codeGen->instGen_Set_Reg_To_Imm(size, rsvdReg, imm);
+ fmt = IF_LS_3A;
+ }
+ }
+
+ assert(fmt != IF_NONE);
+
+ instrDesc* id = emitNewInstrCns(attr, imm);
+
+ id->idIns(ins);
+ id->idInsFmt(fmt);
+ id->idInsOpt(INS_OPTS_NONE);
+
+ id->idReg1(reg1);
+ id->idReg2(reg2);
+ id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs);
+ id->idSetIsLclVar();
+
+#ifdef DEBUG
+ id->idDebugOnlyInfo()->idVarRefOffs = emitVarRefOffs;
+#endif
+
+ dispIns(id);
+ appendToCurIG(id);
+}
+
+/*****************************************************************************
+ *
+ * Add an instruction referencing a stack-based local variable and a register
+ */
+void emitter::emitIns_S_R(instruction ins, emitAttr attr, regNumber reg1, int varx, int offs)
+{
+ assert(offs >= 0);
+ emitAttr size = EA_SIZE(attr);
+ insFormat fmt = IF_NONE;
+ int disp = 0;
+ unsigned scale = 0;
+ bool isVectorStore = false;
+
+ // TODO-ARM64-CQ: use unscaled loads?
+ /* Figure out the encoding format of the instruction */
+ switch (ins)
+ {
+ case INS_strb:
+ scale = 0;
+ assert(isGeneralRegisterOrZR(reg1));
+ break;
+
+ case INS_strh:
+ scale = 1;
+ assert(isGeneralRegisterOrZR(reg1));
+ break;
+
+ case INS_str:
+ if (isGeneralRegisterOrZR(reg1))
+ {
+ assert(isValidGeneralDatasize(size));
+ scale = (size == EA_8BYTE) ? 3 : 2;
+ }
+ else
+ {
+ assert(isVectorRegister(reg1));
+ assert(isValidVectorLSDatasize(size));
+ scale = NaturalScale_helper(size);
+ isVectorStore = true;
+ }
+ break;
+
+ default:
+ NYI("emitIns_S_R"); // FP locals?
+ return;
+
+ } // end switch (ins)
+
+ /* Figure out the variable's frame position */
+ int base;
+ bool FPbased;
+
+ base = emitComp->lvaFrameAddress(varx, &FPbased);
+ disp = base + offs;
+ assert(scale >= 0);
+ if (isVectorStore)
+ {
+ assert(scale <= 4);
+ }
+ else
+ {
+ assert(scale <= 3);
+ }
+
+ // TODO-ARM64-CQ: with compLocallocUsed, should we use REG_SAVED_LOCALLOC_SP instead?
+ regNumber reg2 = FPbased ? REG_FPBASE : REG_SPBASE;
+ reg2 = encodingSPtoZR(reg2);
+
+ bool useRegForImm = false;
+ ssize_t imm = disp;
+ ssize_t mask = (1 << scale) - 1; // the mask of low bits that must be zero to encode the immediate
+ if (imm == 0)
+ {
+ fmt = IF_LS_2A;
+ }
+ else if ((imm < 0) || ((imm & mask) != 0))
+ {
+ if ((imm >= -256) && (imm <= 255))
+ {
+ fmt = IF_LS_2C;
+ }
+ else
+ {
+ useRegForImm = true;
+ }
+ }
+ else if (imm > 0)
+ {
+ if (((imm & mask) == 0) && ((imm >> scale) < 0x1000))
+ {
+ imm >>= scale; // The immediate is scaled by the size of the ld/st
+
+ fmt = IF_LS_2B;
+ }
+ else
+ {
+ useRegForImm = true;
+ }
+ }
+
+ if (useRegForImm)
+ {
+ // The reserved register is not stored in idReg3() since that field overlaps with iiaLclVar.
+ // It is instead implicit when idSetIsLclVar() is set, with this encoding format.
+ regNumber rsvdReg = codeGen->rsGetRsvdReg();
+ codeGen->instGen_Set_Reg_To_Imm(size, rsvdReg, imm);
+ fmt = IF_LS_3A;
+ }
+
+ assert(fmt != IF_NONE);
+
+ instrDesc* id = emitNewInstrCns(attr, imm);
+
+ id->idIns(ins);
+ id->idInsFmt(fmt);
+ id->idInsOpt(INS_OPTS_NONE);
+
+ id->idReg1(reg1);
+ id->idReg2(reg2);
+ id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs);
+ id->idSetIsLclVar();
+
+#ifdef DEBUG
+ id->idDebugOnlyInfo()->idVarRefOffs = emitVarRefOffs;
+#endif
+
+ dispIns(id);
+ appendToCurIG(id);
+}
+
+/*****************************************************************************
+ *
+ * Add an instruction referencing stack-based local variable and an immediate
+ */
+void emitter::emitIns_S_I(instruction ins, emitAttr attr, int varx, int offs, int val)
+{
+ NYI("emitIns_S_I");
+}
+
+/*****************************************************************************
+ *
+ * Add an instruction with a register + static member operands.
+ * Constant is stored into JIT data which is adjacent to code.
+ * No relocation is needed. PC-relative offset will be encoded directly into instruction.
+ *
+ */
+void emitter::emitIns_R_C(
+ instruction ins, emitAttr attr, regNumber reg, regNumber addrReg, CORINFO_FIELD_HANDLE fldHnd, int offs)
+{
+ assert(offs >= 0);
+ assert(instrDesc::fitsInSmallCns(offs));
+
+ emitAttr size = EA_SIZE(attr);
+ insFormat fmt = IF_NONE;
+ int disp = 0;
+ instrDescJmp* id = emitNewInstrJmp();
+
+ switch (ins)
+ {
+ case INS_adr:
+ // This is case to get address to the constant data.
+ fmt = IF_LARGEADR;
+ assert(isGeneralRegister(reg));
+ assert(isValidGeneralDatasize(size));
+ break;
+
+ case INS_ldr:
+ fmt = IF_LARGELDC;
+ if (isVectorRegister(reg))
+ {
+ assert(isValidScalarDatasize(size));
+ // For vector (float/double) register, we should have an integer address reg to
+ // compute long address which consists of page address and page offset.
+ // For integer constant, this is not needed since the dest reg can be used to
+ // compute address as well as contain the final contents.
+ assert(isGeneralRegister(reg) || (addrReg != REG_NA));
+ }
+ else
+ {
+ assert(isGeneralRegister(reg));
+ assert(isValidGeneralDatasize(size));
+ }
+ break;
+ default:
+ unreached();
+ }
+
+ assert(fmt != IF_NONE);
+
+ id->idIns(ins);
+ id->idInsFmt(fmt);
+ id->idInsOpt(INS_OPTS_NONE);
+ id->idSmallCns(offs);
+ id->idOpSize(size);
+ id->idAddr()->iiaFieldHnd = fldHnd;
+ id->idSetIsBound(); // We won't patch address since we will know the exact distance once JIT code and data are
+ // allocated together.
+
+ id->idReg1(reg); // destination register that will get the constant value.
+ if (addrReg != REG_NA)
+ {
+ id->idReg2(addrReg); // integer register to compute long address (used for vector dest when we end up with long
+ // address)
+ }
+ id->idjShort = false; // Assume loading constant from long address
+
+ // Keep it long if it's in cold code.
+ id->idjKeepLong = emitComp->fgIsBlockCold(emitComp->compCurBB);
+
+#ifdef DEBUG
+ if (emitComp->opts.compLongAddress)
+ id->idjKeepLong = 1;
+#endif // DEBUG
+
+ // If it's possible to be shortened, then put it in jump list
+ // to be revisited by emitJumpDistBind.
+ if (!id->idjKeepLong)
+ {
+ /* Record the jump's IG and offset within it */
+ id->idjIG = emitCurIG;
+ id->idjOffs = emitCurIGsize;
+
+ /* Append this jump to this IG's jump list */
+ id->idjNext = emitCurIGjmpList;
+ emitCurIGjmpList = id;
+
+#if EMITTER_STATS
+ emitTotalIGjmps++;
+#endif
+ }
+
+ dispIns(id);
+ appendToCurIG(id);
+}
+
+/*****************************************************************************
+ *
+ * Add an instruction with a static member + constant.
+ */
+
+void emitter::emitIns_C_I(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fldHnd, ssize_t offs, ssize_t val)
+{
+ NYI("emitIns_C_I");
+}
+
+/*****************************************************************************
+ *
+ * Add an instruction with a static member + register operands.
+ */
+
+void emitter::emitIns_C_R(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fldHnd, regNumber reg, int offs)
+{
+ assert(!"emitIns_C_R not supported for RyuJIT backend");
+}
+
+void emitter::emitIns_R_AR(instruction ins,
+ emitAttr attr,
+ regNumber ireg,
+ regNumber reg,
+ int offs,
+ int memCookie /* = 0 */,
+ void* clsCookie /* = NULL */)
+{
+ NYI("emitIns_R_AR");
+}
+
+// This computes address from the immediate which is relocatable.
+void emitter::emitIns_R_AI(instruction ins, emitAttr attr, regNumber ireg, ssize_t addr)
+{
+ assert(EA_IS_RELOC(attr));
+ emitAttr size = EA_SIZE(attr);
+ insFormat fmt = IF_DI_1E;
+ bool needAdd = false;
+ instrDescJmp* id = emitNewInstrJmp();
+
+ switch (ins)
+ {
+ case INS_adrp:
+ // This computes page address.
+ // page offset is needed using add.
+ needAdd = true;
+ break;
+ case INS_adr:
+ break;
+ default:
+ unreached();
+ }
+
+ id->idIns(ins);
+ id->idInsFmt(fmt);
+ id->idInsOpt(INS_OPTS_NONE);
+ id->idOpSize(size);
+ id->idAddr()->iiaAddr = (BYTE*)addr;
+ id->idReg1(ireg);
+ id->idSetIsDspReloc();
+
+ dispIns(id);
+ appendToCurIG(id);
+
+ if (needAdd)
+ {
+ // add reg, reg, imm
+ ins = INS_add;
+ fmt = IF_DI_2A;
+ instrDesc* id = emitAllocInstr(attr);
+ assert(id->idIsReloc());
+
+ id->idIns(ins);
+ id->idInsFmt(fmt);
+ id->idInsOpt(INS_OPTS_NONE);
+ id->idOpSize(size);
+ id->idAddr()->iiaAddr = (BYTE*)addr;
+ id->idReg1(ireg);
+ id->idReg2(ireg);
+
+ dispIns(id);
+ appendToCurIG(id);
+ }
+}
+
+void emitter::emitIns_AR_R(instruction ins,
+ emitAttr attr,
+ regNumber ireg,
+ regNumber reg,
+ int offs,
+ int memCookie /* = 0 */,
+ void* clsCookie /* = NULL */)
+{
+ NYI("emitIns_AR_R");
+}
+
+void emitter::emitIns_R_ARR(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, regNumber rg2, int disp)
+{
+ NYI("emitIns_R_ARR");
+}
+
+void emitter::emitIns_ARR_R(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, regNumber rg2, int disp)
+{
+ NYI("emitIns_R_ARR");
+}
+
+void emitter::emitIns_R_ARX(
+ instruction ins, emitAttr attr, regNumber ireg, regNumber reg, regNumber rg2, unsigned mul, int disp)
+{
+ NYI("emitIns_R_ARR");
+}
+
+/*****************************************************************************
+ *
+ * Record that a jump instruction uses the short encoding
+ *
+ */
+void emitter::emitSetShortJump(instrDescJmp* id)
+{
+ if (id->idjKeepLong)
+ return;
+
+ insFormat fmt = IF_NONE;
+ if (emitIsCondJump(id))
+ {
+ fmt = IF_BI_0B;
+ }
+ else if (emitIsLoadLabel(id))
+ {
+ fmt = IF_DI_1E;
+ }
+ else if (emitIsLoadConstant(id))
+ {
+ fmt = IF_LS_1A;
+ }
+ else
+ {
+ unreached();
+ }
+
+ id->idInsFmt(fmt);
+ id->idjShort = true;
+}
+
+/*****************************************************************************
+ *
+ * Add a label instruction.
+ */
+
+void emitter::emitIns_R_L(instruction ins, emitAttr attr, BasicBlock* dst, regNumber reg)
+{
+ assert(dst->bbFlags & BBF_JMP_TARGET);
+
+ insFormat fmt = IF_NONE;
+
+ switch (ins)
+ {
+ case INS_adr:
+ fmt = IF_LARGEADR;
+ break;
+ default:
+ unreached();
+ }
+
+ instrDescJmp* id = emitNewInstrJmp();
+
+ id->idIns(ins);
+ id->idInsFmt(fmt);
+ id->idjShort = false;
+ id->idAddr()->iiaBBlabel = dst;
+ id->idReg1(reg);
+ id->idOpSize(EA_PTRSIZE);
+
+#ifdef DEBUG
+ // Mark the catch return
+ if (emitComp->compCurBB->bbJumpKind == BBJ_EHCATCHRET)
+ {
+ id->idDebugOnlyInfo()->idCatchRet = true;
+ }
+#endif // DEBUG
+
+ id->idjKeepLong = emitComp->fgInDifferentRegions(emitComp->compCurBB, dst);
+
+#ifdef DEBUG
+ if (emitComp->opts.compLongAddress)
+ id->idjKeepLong = 1;
+#endif // DEBUG
+
+ /* Record the jump's IG and offset within it */
+
+ id->idjIG = emitCurIG;
+ id->idjOffs = emitCurIGsize;
+
+ /* Append this jump to this IG's jump list */
+
+ id->idjNext = emitCurIGjmpList;
+ emitCurIGjmpList = id;
+
+#if EMITTER_STATS
+ emitTotalIGjmps++;
+#endif
+
+ dispIns(id);
+ appendToCurIG(id);
+}
+
+/*****************************************************************************
+ *
+ * Add a data label instruction.
+ */
+
+void emitter::emitIns_R_D(instruction ins, emitAttr attr, unsigned offs, regNumber reg)
+{
+ NYI("emitIns_R_D");
+}
+
+void emitter::emitIns_J_R(instruction ins, emitAttr attr, BasicBlock* dst, regNumber reg)
+{
+ NYI("emitIns_J_R");
+}
+
+void emitter::emitIns_J(instruction ins, BasicBlock* dst, int instrCount)
+{
+ insFormat fmt = IF_NONE;
+
+ if (dst != nullptr)
+ {
+ assert(dst->bbFlags & BBF_JMP_TARGET);
+ }
+ else
+ {
+ assert(instrCount != 0);
+ }
+
+ /* Figure out the encoding format of the instruction */
+
+ bool idjShort = false;
+ switch (ins)
+ {
+ case INS_bl_local:
+ case INS_b:
+ // Unconditional jump is a single form.
+ idjShort = true;
+ fmt = IF_BI_0A;
+ break;
+
+ case INS_beq:
+ case INS_bne:
+ case INS_bhs:
+ case INS_blo:
+ case INS_bmi:
+ case INS_bpl:
+ case INS_bvs:
+ case INS_bvc:
+ case INS_bhi:
+ case INS_bls:
+ case INS_bge:
+ case INS_blt:
+ case INS_bgt:
+ case INS_ble:
+ // Assume conditional jump is long.
+ fmt = IF_LARGEJMP;
+ break;
+
+ default:
+ unreached();
+ break;
+ }
+
+ instrDescJmp* id = emitNewInstrJmp();
+
+ id->idIns(ins);
+ id->idInsFmt(fmt);
+ id->idjShort = idjShort;
+
+#ifdef DEBUG
+ // Mark the finally call
+ if (ins == INS_bl_local && emitComp->compCurBB->bbJumpKind == BBJ_CALLFINALLY)
+ {
+ id->idDebugOnlyInfo()->idFinallyCall = true;
+ }
+#endif // DEBUG
+
+ if (dst != nullptr)
+ {
+ id->idAddr()->iiaBBlabel = dst;
+
+ // Skip unconditional jump that has a single form.
+ // TODO-ARM64-NYI: enable hot/cold splittingNYI.
+ // The target needs to be relocated.
+ if (!idjShort)
+ {
+ id->idjKeepLong = emitComp->fgInDifferentRegions(emitComp->compCurBB, dst);
+
+#ifdef DEBUG
+ if (emitComp->opts.compLongAddress) // Force long branches
+ id->idjKeepLong = 1;
+#endif // DEBUG
+ }
+ }
+ else
+ {
+ id->idAddr()->iiaSetInstrCount(instrCount);
+ id->idjKeepLong = false;
+ /* This jump must be short */
+ emitSetShortJump(id);
+ id->idSetIsBound();
+ }
+
+ /* Record the jump's IG and offset within it */
+
+ id->idjIG = emitCurIG;
+ id->idjOffs = emitCurIGsize;
+
+ /* Append this jump to this IG's jump list */
+
+ id->idjNext = emitCurIGjmpList;
+ emitCurIGjmpList = id;
+
+#if EMITTER_STATS
+ emitTotalIGjmps++;
+#endif
+
+ dispIns(id);
+ appendToCurIG(id);
+}
+
+/*****************************************************************************
+ *
+ * Add a call instruction (direct or indirect).
+ * argSize<0 means that the caller will pop the arguments
+ *
+ * The other arguments are interpreted depending on callType as shown:
+ * Unless otherwise specified, ireg,xreg,xmul,disp should have default values.
+ *
+ * EC_FUNC_TOKEN : addr is the method address
+ * EC_FUNC_ADDR : addr is the absolute address of the function
+ *
+ * If callType is one of these emitCallTypes, addr has to be NULL.
+ * EC_INDIR_R : "call ireg".
+ *
+ * For ARM xreg, xmul and disp are never used and should always be 0/REG_NA.
+ *
+ * Please consult the "debugger team notification" comment in genFnProlog().
+ */
+
+void emitter::emitIns_Call(EmitCallType callType,
+ CORINFO_METHOD_HANDLE methHnd,
+ INDEBUG_LDISASM_COMMA(CORINFO_SIG_INFO* sigInfo) // used to report call sites to the EE
+ void* addr,
+ ssize_t argSize,
+ emitAttr retSize,
+ emitAttr secondRetSize,
+ VARSET_VALARG_TP ptrVars,
+ regMaskTP gcrefRegs,
+ regMaskTP byrefRegs,
+ IL_OFFSETX ilOffset /* = BAD_IL_OFFSET */,
+ regNumber ireg /* = REG_NA */,
+ regNumber xreg /* = REG_NA */,
+ unsigned xmul /* = 0 */,
+ ssize_t disp /* = 0 */,
+ bool isJump /* = false */,
+ bool isNoGC /* = false */,
+ bool isProfLeaveCB /* = false */)
+{
+ /* Sanity check the arguments depending on callType */
+
+ assert(callType < EC_COUNT);
+ assert((callType != EC_FUNC_TOKEN && callType != EC_FUNC_ADDR) ||
+ (ireg == REG_NA && xreg == REG_NA && xmul == 0 && disp == 0));
+ assert(callType < EC_INDIR_R || addr == NULL);
+ assert(callType != EC_INDIR_R || (ireg < REG_COUNT && xreg == REG_NA && xmul == 0 && disp == 0));
+
+ // ARM never uses these
+ assert(xreg == REG_NA && xmul == 0 && disp == 0);
+
+ // Our stack level should be always greater than the bytes of arguments we push. Just
+ // a sanity test.
+ assert((unsigned)abs(argSize) <= codeGen->genStackLevel);
+
+ int argCnt;
+ instrDesc* id;
+
+ /* This is the saved set of registers after a normal call */
+ regMaskTP savedSet = RBM_CALLEE_SAVED;
+
+ /* some special helper calls have a different saved set registers */
+
+ if (isNoGC)
+ {
+ assert(emitNoGChelper(Compiler::eeGetHelperNum(methHnd)));
+
+ // This call will preserve the liveness of most registers
+ //
+ // - On the ARM64 the NOGC helpers will preserve all registers,
+ // except for those listed in the RBM_CALLEE_TRASH_NOGC mask
+
+ savedSet = RBM_ALLINT & ~RBM_CALLEE_TRASH_NOGC;
+
+ // In case of Leave profiler callback, we need to preserve liveness of REG_PROFILER_RET_SCRATCH
+ if (isProfLeaveCB)
+ {
+ savedSet |= RBM_PROFILER_RET_SCRATCH;
+ }
+ }
+ else
+ {
+ assert(!emitNoGChelper(Compiler::eeGetHelperNum(methHnd)));
+ }
+
+ /* Trim out any callee-trashed registers from the live set */
+
+ gcrefRegs &= savedSet;
+ byrefRegs &= savedSet;
+
+#ifdef DEBUG
+ if (EMIT_GC_VERBOSE)
+ {
+ printf("Call: GCvars=%s ", VarSetOps::ToString(emitComp, ptrVars));
+ dumpConvertedVarSet(emitComp, ptrVars);
+ printf(", gcrefRegs=");
+ printRegMaskInt(gcrefRegs);
+ emitDispRegSet(gcrefRegs);
+ printf(", byrefRegs=");
+ printRegMaskInt(byrefRegs);
+ emitDispRegSet(byrefRegs);
+ printf("\n");
+ }
+#endif
+
+ assert(argSize % REGSIZE_BYTES == 0);
+ argCnt = (int)(argSize / (int)sizeof(void*));
+
+#ifdef DEBUGGING_SUPPORT
+ /* Managed RetVal: emit sequence point for the call */
+ if (emitComp->opts.compDbgInfo && ilOffset != BAD_IL_OFFSET)
+ {
+ codeGen->genIPmappingAdd(ilOffset, false);
+ }
+#endif
+
+ /*
+ We need to allocate the appropriate instruction descriptor based
+ on whether this is a direct/indirect call, and whether we need to
+ record an updated set of live GC variables.
+ */
+
+ if (callType >= EC_INDIR_R)
+ {
+ /* Indirect call, virtual calls */
+
+ assert(callType == EC_INDIR_R);
+
+ id = emitNewInstrCallInd(argCnt, disp, ptrVars, gcrefRegs, byrefRegs, retSize, secondRetSize);
+ }
+ else
+ {
+ /* Helper/static/nonvirtual/function calls (direct or through handle),
+ and calls to an absolute addr. */
+
+ assert(callType == EC_FUNC_TOKEN || callType == EC_FUNC_ADDR);
+
+ id = emitNewInstrCallDir(argCnt, ptrVars, gcrefRegs, byrefRegs, retSize, secondRetSize);
+ }
+
+ /* Update the emitter's live GC ref sets */
+
+ VarSetOps::Assign(emitComp, emitThisGCrefVars, ptrVars);
+ emitThisGCrefRegs = gcrefRegs;
+ emitThisByrefRegs = byrefRegs;
+
+ /* Set the instruction - special case jumping a function */
+ instruction ins;
+ insFormat fmt = IF_NONE;
+
+ id->idSetIsNoGC(isNoGC);
+
+ /* Record the address: method, indirection, or funcptr */
+
+ if (callType > EC_FUNC_ADDR)
+ {
+ /* This is an indirect call (either a virtual call or func ptr call) */
+
+ switch (callType)
+ {
+ case EC_INDIR_R: // the address is in a register
+
+ id->idSetIsCallRegPtr();
+
+ if (isJump)
+ {
+ ins = INS_br_tail; // INS_br_tail Reg
+ }
+ else
+ {
+ ins = INS_blr; // INS_blr Reg
+ }
+ fmt = IF_BR_1B;
+
+ id->idIns(ins);
+ id->idInsFmt(fmt);
+
+ id->idReg3(ireg);
+ assert(xreg == REG_NA);
+ break;
+
+ default:
+ NO_WAY("unexpected instruction");
+ break;
+ }
+ }
+ else
+ {
+ /* This is a simple direct call: "call helper/method/addr" */
+
+ assert(callType == EC_FUNC_TOKEN || callType == EC_FUNC_ADDR);
+
+ assert(addr != NULL);
+
+ if (isJump)
+ {
+ ins = INS_b_tail; // INS_b_tail imm28
+ }
+ else
+ {
+ ins = INS_bl; // INS_bl imm28
+ }
+ fmt = IF_BI_0C;
+
+ id->idIns(ins);
+ id->idInsFmt(fmt);
+
+ id->idAddr()->iiaAddr = (BYTE*)addr;
+
+ if (callType == EC_FUNC_ADDR)
+ {
+ id->idSetIsCallAddr();
+ }
+
+#if RELOC_SUPPORT
+ if (emitComp->opts.compReloc)
+ {
+ id->idSetIsDspReloc();
+ }
+#endif
+ }
+
+#ifdef DEBUG
+ if (EMIT_GC_VERBOSE)
+ {
+ if (id->idIsLargeCall())
+ {
+ printf("[%02u] Rec call GC vars = %s\n", id->idDebugOnlyInfo()->idNum,
+ VarSetOps::ToString(emitComp, ((instrDescCGCA*)id)->idcGCvars));
+ }
+ }
+#endif
+
+#if defined(DEBUG) || defined(LATE_DISASM)
+ id->idDebugOnlyInfo()->idMemCookie = (size_t)methHnd; // method token
+ id->idDebugOnlyInfo()->idClsCookie = 0;
+ id->idDebugOnlyInfo()->idCallSig = sigInfo;
+#endif
+
+#if defined(LATE_DISASM)
+ if (addr != nullptr)
+ {
+ codeGen->getDisAssembler().disSetMethod((size_t)addr, methHnd);
+ }
+#endif // defined(LATE_DISASM)
+
+ dispIns(id);
+ appendToCurIG(id);
+}
+
+/*****************************************************************************
+ *
+ * Returns true if 'imm' is valid Cond encoding
+ */
+
+/*static*/ bool emitter::isValidImmCond(ssize_t imm)
+{
+ // range check the ssize_t value, to make sure it is a small unsigned value
+ // and that only the bits in the cfi.cond are set
+ if ((imm < 0) || (imm > 0xF))
+ return false;
+
+ condFlagsImm cfi;
+ cfi.immCFVal = (unsigned)imm;
+
+ return (cfi.cond <= INS_COND_LE); // Don't allow 14 & 15 (AL & NV).
+}
+
+/*****************************************************************************
+ *
+ * Returns true if 'imm' is valid Cond/Flags encoding
+ */
+
+/*static*/ bool emitter::isValidImmCondFlags(ssize_t imm)
+{
+ // range check the ssize_t value, to make sure it is a small unsigned value
+ // and that only the bits in the cfi.cond or cfi.flags are set
+ if ((imm < 0) || (imm > 0xFF))
+ return false;
+
+ condFlagsImm cfi;
+ cfi.immCFVal = (unsigned)imm;
+
+ return (cfi.cond <= INS_COND_LE); // Don't allow 14 & 15 (AL & NV).
+}
+
+/*****************************************************************************
+ *
+ * Returns true if 'imm' is valid Cond/Flags/Imm5 encoding
+ */
+
+/*static*/ bool emitter::isValidImmCondFlagsImm5(ssize_t imm)
+{
+ // range check the ssize_t value, to make sure it is a small unsigned value
+ // and that only the bits in the cfi.cond, cfi.flags or cfi.imm5 are set
+ if ((imm < 0) || (imm > 0x1FFF))
+ return false;
+
+ condFlagsImm cfi;
+ cfi.immCFVal = (unsigned)imm;
+
+ return (cfi.cond <= INS_COND_LE); // Don't allow 14 & 15 (AL & NV).
+}
+
+/*****************************************************************************
+ *
+ * Returns an encoding for the specified register used in the 'Rd' position
+ */
+
+/*static*/ emitter::code_t emitter::insEncodeReg_Rd(regNumber reg)
+{
+ assert(isIntegerRegister(reg));
+ emitter::code_t ureg = (emitter::code_t)reg;
+ assert((ureg >= 0) && (ureg <= 31));
+ return ureg;
+}
+
+/*****************************************************************************
+ *
+ * Returns an encoding for the specified register used in the 'Rt' position
+ */
+
+/*static*/ emitter::code_t emitter::insEncodeReg_Rt(regNumber reg)
+{
+ assert(isIntegerRegister(reg));
+ emitter::code_t ureg = (emitter::code_t)reg;
+ assert((ureg >= 0) && (ureg <= 31));
+ return ureg;
+}
+
+/*****************************************************************************
+ *
+ * Returns an encoding for the specified register used in the 'Rn' position
+ */
+
+/*static*/ emitter::code_t emitter::insEncodeReg_Rn(regNumber reg)
+{
+ assert(isIntegerRegister(reg));
+ emitter::code_t ureg = (emitter::code_t)reg;
+ assert((ureg >= 0) && (ureg <= 31));
+ return ureg << 5;
+}
+
+/*****************************************************************************
+ *
+ * Returns an encoding for the specified register used in the 'Rm' position
+ */
+
+/*static*/ emitter::code_t emitter::insEncodeReg_Rm(regNumber reg)
+{
+ assert(isIntegerRegister(reg));
+ emitter::code_t ureg = (emitter::code_t)reg;
+ assert((ureg >= 0) && (ureg <= 31));
+ return ureg << 16;
+}
+
+/*****************************************************************************
+ *
+ * Returns an encoding for the specified register used in the 'Ra' position
+ */
+
+/*static*/ emitter::code_t emitter::insEncodeReg_Ra(regNumber reg)
+{
+ assert(isIntegerRegister(reg));
+ emitter::code_t ureg = (emitter::code_t)reg;
+ assert((ureg >= 0) && (ureg <= 31));
+ return ureg << 10;
+}
+
+/*****************************************************************************
+ *
+ * Returns an encoding for the specified register used in the 'Vd' position
+ */
+
+/*static*/ emitter::code_t emitter::insEncodeReg_Vd(regNumber reg)
+{
+ assert(emitter::isVectorRegister(reg));
+ emitter::code_t ureg = (emitter::code_t)reg - (emitter::code_t)REG_V0;
+ assert((ureg >= 0) && (ureg <= 31));
+ return ureg;
+}
+
+/*****************************************************************************
+ *
+ * Returns an encoding for the specified register used in the 'Vt' position
+ */
+
+/*static*/ emitter::code_t emitter::insEncodeReg_Vt(regNumber reg)
+{
+ assert(emitter::isVectorRegister(reg));
+ emitter::code_t ureg = (emitter::code_t)reg - (emitter::code_t)REG_V0;
+ assert((ureg >= 0) && (ureg <= 31));
+ return ureg;
+}
+
+/*****************************************************************************
+ *
+ * Returns an encoding for the specified register used in the 'Vn' position
+ */
+
+/*static*/ emitter::code_t emitter::insEncodeReg_Vn(regNumber reg)
+{
+ assert(emitter::isVectorRegister(reg));
+ emitter::code_t ureg = (emitter::code_t)reg - (emitter::code_t)REG_V0;
+ assert((ureg >= 0) && (ureg <= 31));
+ return ureg << 5;
+}
+
+/*****************************************************************************
+ *
+ * Returns an encoding for the specified register used in the 'Vm' position
+ */
+
+/*static*/ emitter::code_t emitter::insEncodeReg_Vm(regNumber reg)
+{
+ assert(emitter::isVectorRegister(reg));
+ emitter::code_t ureg = (emitter::code_t)reg - (emitter::code_t)REG_V0;
+ assert((ureg >= 0) && (ureg <= 31));
+ return ureg << 16;
+}
+
+/*****************************************************************************
+ *
+ * Returns an encoding for the specified register used in the 'Va' position
+ */
+
+/*static*/ emitter::code_t emitter::insEncodeReg_Va(regNumber reg)
+{
+ assert(emitter::isVectorRegister(reg));
+ emitter::code_t ureg = (emitter::code_t)reg - (emitter::code_t)REG_V0;
+ assert((ureg >= 0) && (ureg <= 31));
+ return ureg << 10;
+}
+
+/*****************************************************************************
+ *
+ * Returns an encoding for the specified condition code.
+ */
+
+/*static*/ emitter::code_t emitter::insEncodeCond(insCond cond)
+{
+ emitter::code_t uimm = (emitter::code_t)cond;
+ return uimm << 12;
+}
+
+/*****************************************************************************
+ *
+ * Returns an encoding for the condition code with the lowest bit inverted (marked by invert(<cond>) in the
+ * architecture manual).
+ */
+
+/*static*/ emitter::code_t emitter::insEncodeInvertedCond(insCond cond)
+{
+ emitter::code_t uimm = (emitter::code_t)cond;
+ uimm ^= 1; // invert the lowest bit
+ return uimm << 12;
+}
+
+/*****************************************************************************
+ *
+ * Returns an encoding for the specified flags.
+ */
+
+/*static*/ emitter::code_t emitter::insEncodeFlags(insCflags flags)
+{
+ emitter::code_t uimm = (emitter::code_t)flags;
+ return uimm;
+}
+
+/*****************************************************************************
+ *
+ * Returns the encoding for the Shift Count bits to be used for Arm64 encodings
+ */
+
+/*static*/ emitter::code_t emitter::insEncodeShiftCount(ssize_t imm, emitAttr size)
+{
+ assert((imm & 0x003F) == imm);
+ assert(((imm & 0x0020) == 0) || (size == EA_8BYTE));
+
+ return (emitter::code_t)imm << 10;
+}
+
+/*****************************************************************************
+ *
+ * Returns the encoding to select a 64-bit datasize for an Arm64 instruction
+ */
+
+/*static*/ emitter::code_t emitter::insEncodeDatasize(emitAttr size)
+{
+ if (size == EA_8BYTE)
+ {
+ return 0x80000000; // set the bit at location 31
+ }
+ else
+ {
+ assert(size == EA_4BYTE);
+ return 0;
+ }
+}
+
+/*****************************************************************************
+ *
+ * Returns the encoding to select the datasize for the general load/store Arm64 instructions
+ *
+ */
+
+/*static*/ emitter::code_t emitter::insEncodeDatasizeLS(emitter::code_t code, emitAttr size)
+{
+ if (code & 0x00800000) // Is this a sign-extending opcode? (i.e. ldrsw, ldrsh, ldrsb)
+ {
+ assert((size == EA_4BYTE) || (size == EA_8BYTE));
+ if ((code & 0x80000000) == 0) // Is it a ldrsh or ldrsb and not ldrsw ?
+ {
+ if (size == EA_4BYTE) // Do we need to encode the 32-bit Rt size bit?
+ {
+ return 0x00400000; // set the bit at location 22
+ }
+ }
+ }
+ else if (code & 0x80000000) // Is this a ldr/str/ldur/stur opcode?
+ {
+ assert((size == EA_4BYTE) || (size == EA_8BYTE));
+ if (size == EA_8BYTE) // Do we need to encode the 64-bit size bit?
+ {
+ return 0x40000000; // set the bit at location 30
+ }
+ }
+ return 0;
+}
+
+/*****************************************************************************
+ *
+ * Returns the encoding to select the datasize for the vector load/store Arm64 instructions
+ *
+ */
+
+/*static*/ emitter::code_t emitter::insEncodeDatasizeVLS(emitter::code_t code, emitAttr size)
+{
+ code_t result = 0;
+
+ // Check bit 29
+ if ((code & 0x20000000) == 0)
+ {
+ // LDR literal
+
+ if (size == EA_16BYTE)
+ {
+ // set the operation size in bit 31
+ result = 0x80000000;
+ }
+ else if (size == EA_8BYTE)
+ {
+ // set the operation size in bit 30
+ result = 0x40000000;
+ }
+ else
+ {
+ assert(size == EA_4BYTE);
+ // no bits are set
+ result = 0x00000000;
+ }
+ }
+ else
+ {
+ // LDR non-literal
+
+ if (size == EA_16BYTE)
+ {
+ // The operation size in bits 31 and 30 are zero
+ // Bit 23 specifies a 128-bit Load/Store
+ result = 0x00800000;
+ }
+ else if (size == EA_8BYTE)
+ {
+ // set the operation size in bits 31 and 30
+ result = 0xC0000000;
+ }
+ else if (size == EA_4BYTE)
+ {
+ // set the operation size in bit 31
+ result = 0x80000000;
+ }
+ else if (size == EA_2BYTE)
+ {
+ // set the operation size in bit 30
+ result = 0x40000000;
+ }
+ else
+ {
+ assert(size == EA_1BYTE);
+ // The operation size in bits 31 and 30 are zero
+ result = 0x00000000;
+ }
+ }
+
+ // Or in bit 26 to indicate a Vector register is used as 'target'
+ result |= 0x04000000;
+
+ return result;
+}
+
+/*****************************************************************************
+ *
+ * Returns the encoding to select the datasize for the vector load/store Arm64 instructions
+ *
+ */
+
+/*static*/ emitter::code_t emitter::insEncodeDatasizeVPLS(emitter::code_t code, emitAttr size)
+{
+ code_t result = 0;
+
+ if (size == EA_16BYTE)
+ {
+ // The operation size in bits 31 and 30 are zero
+ // Bit 23 specifies a 128-bit Load/Store
+ result = 0x80000000;
+ }
+ else if (size == EA_8BYTE)
+ {
+ // set the operation size in bits 31 and 30
+ result = 0x40000000;
+ }
+ else if (size == EA_4BYTE)
+ {
+ // set the operation size in bit 31
+ result = 0x00000000;
+ }
+
+ // Or in bit 26 to indicate a Vector register is used as 'target'
+ result |= 0x04000000;
+
+ return result;
+}
+
+/*****************************************************************************
+ *
+ * Returns the encoding to set the size bit and the N bits for a 'bitfield' instruction
+ *
+ */
+
+/*static*/ emitter::code_t emitter::insEncodeDatasizeBF(emitter::code_t code, emitAttr size)
+{
+ // is bit 30 equal to 0?
+ if ((code & 0x40000000) == 0) // is the opcode one of extr, sxtb, sxth or sxtw
+ {
+ if (size == EA_8BYTE) // Do we need to set the sf and N bits?
+ {
+ return 0x80400000; // set the sf-bit at location 31 and the N-bit at location 22
+ }
+ }
+ return 0; // don't set any bits
+}
+
+/*****************************************************************************
+ *
+ * Returns the encoding to select the 64/128-bit datasize for an Arm64 vector instruction
+ */
+
+/*static*/ emitter::code_t emitter::insEncodeVectorsize(emitAttr size)
+{
+ if (size == EA_16BYTE)
+ {
+ return 0x40000000; // set the bit at location 30
+ }
+ else
+ {
+ assert(size == EA_8BYTE);
+ return 0;
+ }
+}
+
+/*****************************************************************************
+ *
+ * Returns the encoding to select 'index' for an Arm64 vector elem instruction
+ */
+/*static*/ emitter::code_t emitter::insEncodeVectorIndex(emitAttr elemsize, ssize_t index)
+{
+ code_t bits = (code_t)index;
+ if (elemsize == EA_1BYTE)
+ {
+ bits <<= 1;
+ bits |= 1;
+ }
+ else if (elemsize == EA_2BYTE)
+ {
+ bits <<= 2;
+ bits |= 2;
+ }
+ else if (elemsize == EA_4BYTE)
+ {
+ bits <<= 3;
+ bits |= 4;
+ }
+ else
+ {
+ assert(elemsize == EA_8BYTE);
+ bits <<= 4;
+ bits |= 8;
+ }
+ assert((bits >= 1) && (bits <= 0x1f));
+
+ return (bits << 16); // bits at locations [20,19,18,17,16]
+}
+
+/*****************************************************************************
+ *
+ * Returns the encoding to select 'index2' for an Arm64 'ins' elem instruction
+ */
+/*static*/ emitter::code_t emitter::insEncodeVectorIndex2(emitAttr elemsize, ssize_t index2)
+{
+ code_t bits = (code_t)index2;
+ if (elemsize == EA_1BYTE)
+ {
+ // bits are correct
+ }
+ else if (elemsize == EA_2BYTE)
+ {
+ bits <<= 1;
+ }
+ else if (elemsize == EA_4BYTE)
+ {
+ bits <<= 2;
+ }
+ else
+ {
+ assert(elemsize == EA_8BYTE);
+ bits <<= 3;
+ }
+ assert((bits >= 0) && (bits <= 0xf));
+
+ return (bits << 11); // bits at locations [14,13,12,11]
+}
+
+/*****************************************************************************
+ *
+ * Returns the encoding to select the 'index' for an Arm64 'mul' by elem instruction
+ */
+/*static*/ emitter::code_t emitter::insEncodeVectorIndexLMH(emitAttr elemsize, ssize_t index)
+{
+ code_t bits = 0;
+
+ if (elemsize == EA_2BYTE)
+ {
+ assert((index >= 0) && (index <= 7));
+ if (index & 0x4)
+ {
+ bits |= (1 << 11); // set bit 11 'H'
+ }
+ if (index & 0x2)
+ {
+ bits |= (1 << 21); // set bit 21 'L'
+ }
+ if (index & 0x1)
+ {
+ bits |= (1 << 20); // set bit 20 'M'
+ }
+ }
+ else if (elemsize == EA_4BYTE)
+ {
+ assert((index >= 0) && (index <= 3));
+ if (index & 0x2)
+ {
+ bits |= (1 << 11); // set bit 11 'H'
+ }
+ if (index & 0x1)
+ {
+ bits |= (1 << 21); // set bit 21 'L'
+ }
+ }
+ else
+ {
+ assert(!"Invalid 'elemsize' value");
+ }
+
+ return bits;
+}
+
+/*****************************************************************************
+ *
+ * Returns the encoding to shift by 'shift' for an Arm64 vector or scalar instruction
+ */
+
+/*static*/ emitter::code_t emitter::insEncodeVectorShift(emitAttr size, ssize_t shift)
+{
+ assert(shift < getBitWidth(size));
+
+ code_t imm = (code_t)(getBitWidth(size) + shift);
+
+ return imm << 16;
+}
+
+/*****************************************************************************
+ *
+ * Returns the encoding to select the 1/2/4/8 byte elemsize for an Arm64 vector instruction
+ */
+
+/*static*/ emitter::code_t emitter::insEncodeElemsize(emitAttr size)
+{
+ if (size == EA_8BYTE)
+ {
+ return 0x00C00000; // set the bit at location 23 and 22
+ }
+ else if (size == EA_4BYTE)
+ {
+ return 0x00800000; // set the bit at location 23
+ }
+ else if (size == EA_2BYTE)
+ {
+ return 0x00400000; // set the bit at location 22
+ }
+ assert(size == EA_1BYTE);
+ return 0x00000000;
+}
+
+/*****************************************************************************
+ *
+ * Returns the encoding to select the 4/8 byte elemsize for an Arm64 float vector instruction
+ */
+
+/*static*/ emitter::code_t emitter::insEncodeFloatElemsize(emitAttr size)
+{
+ if (size == EA_8BYTE)
+ {
+ return 0x00400000; // set the bit at location 22
+ }
+ assert(size == EA_4BYTE);
+ return 0x00000000;
+}
+
+// Returns the encoding to select the index for an Arm64 float vector by elem instruction
+/*static*/ emitter::code_t emitter::insEncodeFloatIndex(emitAttr elemsize, ssize_t index)
+{
+ code_t result = 0x00000000;
+ if (elemsize == EA_8BYTE)
+ {
+ assert((index >= 0) && (index <= 1));
+ if (index == 1)
+ {
+ result |= 0x00000800; // 'H' - set the bit at location 11
+ }
+ }
+ else
+ {
+ assert(elemsize == EA_4BYTE);
+ assert((index >= 0) && (index <= 3));
+ if (index & 2)
+ {
+ result |= 0x00000800; // 'H' - set the bit at location 11
+ }
+ if (index & 1)
+ {
+ result |= 0x00200000; // 'L' - set the bit at location 21
+ }
+ }
+ return result;
+}
+
+/*****************************************************************************
+ *
+ * Returns the encoding to select the fcvt operation for Arm64 instructions
+ */
+/*static*/ emitter::code_t emitter::insEncodeConvertOpt(insFormat fmt, insOpts conversion)
+{
+ code_t result = 0;
+ switch (conversion)
+ {
+ case INS_OPTS_S_TO_D: // Single to Double
+ assert(fmt == IF_DV_2J);
+ result = 0x00008000; // type=00, opc=01
+ break;
+
+ case INS_OPTS_D_TO_S: // Double to Single
+ assert(fmt == IF_DV_2J);
+ result = 0x00400000; // type=01, opc=00
+ break;
+
+ case INS_OPTS_H_TO_S: // Half to Single
+ assert(fmt == IF_DV_2J);
+ result = 0x00C00000; // type=11, opc=00
+ break;
+
+ case INS_OPTS_H_TO_D: // Half to Double
+ assert(fmt == IF_DV_2J);
+ result = 0x00C08000; // type=11, opc=01
+ break;
+
+ case INS_OPTS_S_TO_H: // Single to Half
+ assert(fmt == IF_DV_2J);
+ result = 0x00018000; // type=00, opc=11
+ break;
+
+ case INS_OPTS_D_TO_H: // Double to Half
+ assert(fmt == IF_DV_2J);
+ result = 0x00418000; // type=01, opc=11
+ break;
+
+ case INS_OPTS_S_TO_4BYTE: // Single to INT32
+ assert(fmt == IF_DV_2H);
+ result = 0x00000000; // sf=0, type=00
+ break;
+
+ case INS_OPTS_D_TO_4BYTE: // Double to INT32
+ assert(fmt == IF_DV_2H);
+ result = 0x00400000; // sf=0, type=01
+ break;
+
+ case INS_OPTS_S_TO_8BYTE: // Single to INT64
+ assert(fmt == IF_DV_2H);
+ result = 0x80000000; // sf=1, type=00
+ break;
+
+ case INS_OPTS_D_TO_8BYTE: // Double to INT64
+ assert(fmt == IF_DV_2H);
+ result = 0x80400000; // sf=1, type=01
+ break;
+
+ case INS_OPTS_4BYTE_TO_S: // INT32 to Single
+ assert(fmt == IF_DV_2I);
+ result = 0x00000000; // sf=0, type=00
+ break;
+
+ case INS_OPTS_4BYTE_TO_D: // INT32 to Double
+ assert(fmt == IF_DV_2I);
+ result = 0x00400000; // sf=0, type=01
+ break;
+
+ case INS_OPTS_8BYTE_TO_S: // INT64 to Single
+ assert(fmt == IF_DV_2I);
+ result = 0x80000000; // sf=1, type=00
+ break;
+
+ case INS_OPTS_8BYTE_TO_D: // INT64 to Double
+ assert(fmt == IF_DV_2I);
+ result = 0x80400000; // sf=1, type=01
+ break;
+
+ default:
+ assert(!"Invalid 'conversion' value");
+ break;
+ }
+ return result;
+}
+
+/*****************************************************************************
+ *
+ * Returns the encoding to have the Rn register be updated Pre/Post indexed
+ * or not updated
+ */
+
+/*static*/ emitter::code_t emitter::insEncodeIndexedOpt(insOpts opt)
+{
+ assert(emitter::insOptsNone(opt) || emitter::insOptsIndexed(opt));
+
+ if (emitter::insOptsIndexed(opt))
+ {
+ if (emitter::insOptsPostIndex(opt))
+ {
+ return 0x00000400; // set the bit at location 10
+ }
+ else
+ {
+ assert(emitter::insOptsPreIndex(opt));
+ return 0x00000C00; // set the bit at location 10 and 11
+ }
+ }
+ else
+ {
+ assert(emitter::insOptsNone(opt));
+ return 0; // bits 10 and 11 are zero
+ }
+}
+
+/*****************************************************************************
+ *
+ * Returns the encoding for a ldp/stp instruction to have the Rn register
+ * be updated Pre/Post indexed or not updated
+ */
+
+/*static*/ emitter::code_t emitter::insEncodePairIndexedOpt(instruction ins, insOpts opt)
+{
+ assert(emitter::insOptsNone(opt) || emitter::insOptsIndexed(opt));
+
+ if ((ins == INS_ldnp) || (ins == INS_stnp))
+ {
+ assert(emitter::insOptsNone(opt));
+ return 0; // bits 23 and 24 are zero
+ }
+ else
+ {
+ if (emitter::insOptsIndexed(opt))
+ {
+ if (emitter::insOptsPostIndex(opt))
+ {
+ return 0x00800000; // set the bit at location 23
+ }
+ else
+ {
+ assert(emitter::insOptsPreIndex(opt));
+ return 0x01800000; // set the bit at location 24 and 23
+ }
+ }
+ else
+ {
+ assert(emitter::insOptsNone(opt));
+ return 0x01000000; // set the bit at location 24
+ }
+ }
+}
+
+/*****************************************************************************
+ *
+ * Returns the encoding to apply a Shift Type on the Rm register
+ */
+
+/*static*/ emitter::code_t emitter::insEncodeShiftType(insOpts opt)
+{
+ if (emitter::insOptsNone(opt))
+ {
+ // None implies the we encode LSL (with a zero immediate)
+ opt = INS_OPTS_LSL;
+ }
+ assert(emitter::insOptsAnyShift(opt));
+
+ emitter::code_t option = (emitter::code_t)opt - (emitter::code_t)INS_OPTS_LSL;
+ assert(option <= 3);
+
+ return option << 22; // bits 23, 22
+}
+
+/*****************************************************************************
+ *
+ * Returns the encoding to apply a 12 bit left shift to the immediate
+ */
+
+/*static*/ emitter::code_t emitter::insEncodeShiftImm12(insOpts opt)
+{
+ if (emitter::insOptsLSL12(opt))
+ {
+ return 0x00400000; // set the bit at location 22
+ }
+ return 0;
+}
+
+/*****************************************************************************
+ *
+ * Returns the encoding to have the Rm register use an extend operation
+ */
+
+/*static*/ emitter::code_t emitter::insEncodeExtend(insOpts opt)
+{
+ if (emitter::insOptsNone(opt) || (opt == INS_OPTS_LSL))
+ {
+ // None or LSL implies the we encode UXTX
+ opt = INS_OPTS_UXTX;
+ }
+ assert(emitter::insOptsAnyExtend(opt));
+
+ emitter::code_t option = (emitter::code_t)opt - (emitter::code_t)INS_OPTS_UXTB;
+ assert(option <= 7);
+
+ return option << 13; // bits 15,14,13
+}
+
+/*****************************************************************************
+ *
+ * Returns the encoding to scale the Rm register by {0,1,2,3,4}
+ * when using an extend operation
+ */
+
+/*static*/ emitter::code_t emitter::insEncodeExtendScale(ssize_t imm)
+{
+ assert((imm >= 0) && (imm <= 4));
+
+ return (emitter::code_t)imm << 10; // bits 12,11,10
+}
+
+/*****************************************************************************
+ *
+ * Returns the encoding to have the Rm register be auto scaled by the ld/st size
+ */
+
+/*static*/ emitter::code_t emitter::insEncodeReg3Scale(bool isScaled)
+{
+ if (isScaled)
+ {
+ return 0x00001000; // set the bit at location 12
+ }
+ else
+ {
+ return 0;
+ }
+}
+
+BYTE* emitter::emitOutputLoadLabel(BYTE* dst, BYTE* srcAddr, BYTE* dstAddr, instrDescJmp* id)
+{
+ instruction ins = id->idIns();
+ insFormat fmt = id->idInsFmt();
+ regNumber dstReg = id->idReg1();
+ if (id->idjShort)
+ {
+ // adr x, [rel addr] -- compute address: current addr(ip) + rel addr.
+ assert(ins == INS_adr);
+ assert(fmt == IF_DI_1E);
+ ssize_t distVal = (ssize_t)(dstAddr - srcAddr);
+ dst = emitOutputShortAddress(dst, ins, fmt, distVal, dstReg);
+ }
+ else
+ {
+ // adrp x, [rel page addr] -- compute page address: current page addr + rel page addr
+ assert(fmt == IF_LARGEADR);
+ ssize_t relPageAddr =
+ (((ssize_t)dstAddr & 0xFFFFFFFFFFFFF000LL) - ((ssize_t)srcAddr & 0xFFFFFFFFFFFFF000LL)) >> 12;
+ dst = emitOutputShortAddress(dst, INS_adrp, IF_DI_1E, relPageAddr, dstReg);
+
+ // add x, x, page offs -- compute address = page addr + page offs
+ ssize_t imm12 = (ssize_t)dstAddr & 0xFFF; // 12 bits
+ assert(isValidUimm12(imm12));
+ code_t code =
+ emitInsCode(INS_add, IF_DI_2A); // DI_2A X0010001shiiiiii iiiiiinnnnnddddd 1100 0000 imm(i12, sh)
+ code |= insEncodeDatasize(EA_8BYTE); // X
+ code |= ((code_t)imm12 << 10); // iiiiiiiiiiii
+ code |= insEncodeReg_Rd(dstReg); // ddddd
+ code |= insEncodeReg_Rn(dstReg); // nnnnn
+ dst += emitOutput_Instr(dst, code);
+ }
+ return dst;
+}
+
+/*****************************************************************************
+ *
+ * Output a local jump or other instruction with a pc-relative immediate.
+ * Note that this may be invoked to overwrite an existing jump instruction at 'dst'
+ * to handle forward branch patching.
+ */
+
+BYTE* emitter::emitOutputLJ(insGroup* ig, BYTE* dst, instrDesc* i)
+{
+ instrDescJmp* id = (instrDescJmp*)i;
+
+ unsigned srcOffs;
+ unsigned dstOffs;
+ BYTE* srcAddr;
+ BYTE* dstAddr;
+ ssize_t distVal;
+ ssize_t loBits;
+
+ // Set default ins/fmt from id.
+ instruction ins = id->idIns();
+ insFormat fmt = id->idInsFmt();
+
+ bool loadLabel = false;
+ bool isJump = false;
+ bool loadConstant = false;
+
+ switch (ins)
+ {
+ default:
+ isJump = true;
+ break;
+
+ case INS_tbz:
+ case INS_tbnz:
+ case INS_cbz:
+ case INS_cbnz:
+ isJump = true;
+ break;
+
+ case INS_ldr:
+ case INS_ldrsw:
+ loadConstant = true;
+ break;
+
+ case INS_adr:
+ case INS_adrp:
+ loadLabel = true;
+ break;
+ }
+
+ /* Figure out the distance to the target */
+
+ srcOffs = emitCurCodeOffs(dst);
+ srcAddr = emitOffsetToPtr(srcOffs);
+
+ if (id->idAddr()->iiaIsJitDataOffset())
+ {
+ assert(loadConstant || loadLabel);
+ int doff = id->idAddr()->iiaGetJitDataOffset();
+ assert(doff >= 0);
+ ssize_t imm = emitGetInsSC(id);
+ assert((imm >= 0) && (imm < 0x1000)); // 0x1000 is arbitrary, currently 'imm' is always 0
+
+ unsigned dataOffs = (unsigned)(doff + imm);
+ assert(dataOffs < emitDataSize());
+ dstAddr = emitDataOffsetToPtr(dataOffs);
+
+ regNumber dstReg = id->idReg1();
+ regNumber addrReg = dstReg; // an integer register to compute long address.
+ emitAttr opSize = id->idOpSize();
+
+ if (loadConstant)
+ {
+ if (id->idjShort)
+ {
+ // ldr x/v, [rel addr] -- load constant from current addr(ip) + rel addr.
+ assert(ins == INS_ldr);
+ assert(fmt == IF_LS_1A);
+ distVal = (ssize_t)(dstAddr - srcAddr);
+ dst = emitOutputShortConstant(dst, ins, fmt, distVal, dstReg, opSize);
+ }
+ else
+ {
+ // adrp x, [rel page addr] -- compute page address: current page addr + rel page addr
+ assert(fmt == IF_LARGELDC);
+ ssize_t relPageAddr =
+ (((ssize_t)dstAddr & 0xFFFFFFFFFFFFF000LL) - ((ssize_t)srcAddr & 0xFFFFFFFFFFFFF000LL)) >> 12;
+ if (isVectorRegister(dstReg))
+ {
+ // Update addrReg with the reserved integer register
+ // since we cannot use dstReg (vector) to load constant directly from memory.
+ addrReg = id->idReg2();
+ assert(isGeneralRegister(addrReg));
+ }
+ ins = INS_adrp;
+ fmt = IF_DI_1E;
+ dst = emitOutputShortAddress(dst, ins, fmt, relPageAddr, addrReg);
+
+ // ldr x, [x, page offs] -- load constant from page address + page offset into integer register.
+ ssize_t imm12 = (ssize_t)dstAddr & 0xFFF; // 12 bits
+ assert(isValidUimm12(imm12));
+ ins = INS_ldr;
+ fmt = IF_LS_2B;
+ dst = emitOutputShortConstant(dst, ins, fmt, imm12, addrReg, opSize);
+
+ // fmov v, d -- copy constant in integer register to vector register.
+ // This is needed only for vector constant.
+ if (addrReg != dstReg)
+ {
+ // fmov Vd,Rn DV_2I X00111100X100111 000000nnnnnddddd 1E27 0000 Vd,Rn
+ // (scalar, from general)
+ assert(isVectorRegister(dstReg) && isGeneralRegister(addrReg));
+ ins = INS_fmov;
+ fmt = IF_DV_2I;
+ code_t code = emitInsCode(ins, fmt);
+
+ code |= insEncodeReg_Vd(dstReg); // ddddd
+ code |= insEncodeReg_Rn(addrReg); // nnnnn
+ if (id->idOpSize() == EA_8BYTE)
+ {
+ code |= 0x80400000; // X ... X
+ }
+ dst += emitOutput_Instr(dst, code);
+ }
+ }
+ }
+ else
+ {
+ assert(loadLabel);
+ dst = emitOutputLoadLabel(dst, srcAddr, dstAddr, id);
+ }
+
+ return dst;
+ }
+
+ assert(loadLabel || isJump);
+
+ if (id->idAddr()->iiaHasInstrCount())
+ {
+ assert(ig != NULL);
+ int instrCount = id->idAddr()->iiaGetInstrCount();
+ unsigned insNum = emitFindInsNum(ig, id);
+ if (instrCount < 0)
+ {
+ // Backward branches using instruction count must be within the same instruction group.
+ assert(insNum + 1 >= (unsigned)(-instrCount));
+ }
+ dstOffs = ig->igOffs + emitFindOffset(ig, (insNum + 1 + instrCount));
+ dstAddr = emitOffsetToPtr(dstOffs);
+ }
+ else
+ {
+ dstOffs = id->idAddr()->iiaIGlabel->igOffs;
+ dstAddr = emitOffsetToPtr(dstOffs);
+ }
+
+ distVal = (ssize_t)(dstAddr - srcAddr);
+
+ if (dstOffs <= srcOffs)
+ {
+#if DEBUG_EMIT
+ /* This is a backward jump - distance is known at this point */
+
+ if (id->idDebugOnlyInfo()->idNum == (unsigned)INTERESTING_JUMP_NUM || INTERESTING_JUMP_NUM == 0)
+ {
+ size_t blkOffs = id->idjIG->igOffs;
+
+ if (INTERESTING_JUMP_NUM == 0)
+ printf("[3] Jump %u:\n", id->idDebugOnlyInfo()->idNum);
+ printf("[3] Jump block is at %08X - %02X = %08X\n", blkOffs, emitOffsAdj, blkOffs - emitOffsAdj);
+ printf("[3] Jump is at %08X - %02X = %08X\n", srcOffs, emitOffsAdj, srcOffs - emitOffsAdj);
+ printf("[3] Label block is at %08X - %02X = %08X\n", dstOffs, emitOffsAdj, dstOffs - emitOffsAdj);
+ }
+#endif
+ }
+ else
+ {
+ /* This is a forward jump - distance will be an upper limit */
+
+ emitFwdJumps = true;
+
+ /* The target offset will be closer by at least 'emitOffsAdj', but only if this
+ jump doesn't cross the hot-cold boundary. */
+
+ if (!emitJumpCrossHotColdBoundary(srcOffs, dstOffs))
+ {
+ dstOffs -= emitOffsAdj;
+ distVal -= emitOffsAdj;
+ }
+
+ /* Record the location of the jump for later patching */
+
+ id->idjOffs = dstOffs;
+
+ /* Are we overflowing the id->idjOffs bitfield? */
+ if (id->idjOffs != dstOffs)
+ IMPL_LIMITATION("Method is too large");
+
+#if DEBUG_EMIT
+ if (id->idDebugOnlyInfo()->idNum == (unsigned)INTERESTING_JUMP_NUM || INTERESTING_JUMP_NUM == 0)
+ {
+ size_t blkOffs = id->idjIG->igOffs;
+
+ if (INTERESTING_JUMP_NUM == 0)
+ printf("[4] Jump %u:\n", id->idDebugOnlyInfo()->idNum);
+ printf("[4] Jump block is at %08X\n", blkOffs);
+ printf("[4] Jump is at %08X\n", srcOffs);
+ printf("[4] Label block is at %08X - %02X = %08X\n", dstOffs + emitOffsAdj, emitOffsAdj, dstOffs);
+ }
+#endif
+ }
+
+#ifdef DEBUG
+ if (0 && emitComp->verbose)
+ {
+ size_t sz = 4;
+ int distValSize = id->idjShort ? 4 : 8;
+ printf("; %s jump [%08X/%03u] from %0*X to %0*X: dist = %08XH\n", (dstOffs <= srcOffs) ? "Fwd" : "Bwd",
+ dspPtr(id), id->idDebugOnlyInfo()->idNum, distValSize, srcOffs + sz, distValSize, dstOffs, distVal);
+ }
+#endif
+
+ /* For forward jumps, record the address of the distance value */
+ id->idjTemp.idjAddr = (distVal > 0) ? dst : NULL;
+
+ if (emitJumpCrossHotColdBoundary(srcOffs, dstOffs))
+ {
+ assert(!id->idjShort);
+ NYI_ARM64("Relocation Support for long address");
+ }
+
+ assert(insOptsNone(id->idInsOpt()));
+
+ if (isJump)
+ {
+ if (id->idjShort)
+ {
+ // Short conditional/unconditional jump
+ assert(!id->idjKeepLong);
+ assert(emitJumpCrossHotColdBoundary(srcOffs, dstOffs) == false);
+ assert((fmt == IF_BI_0A) || (fmt == IF_BI_0B));
+ }
+ else
+ {
+ // Long conditional jump
+ assert(fmt == IF_LARGEJMP);
+ // This is a pseudo-instruction format representing a large conditional branch, to allow
+ // us to get a greater branch target range than we can get by using a straightforward conditional
+ // branch. It is encoded as a short conditional branch that branches around a long unconditional
+ // branch.
+ //
+ // Conceptually, we have:
+ //
+ // b<cond> L_target
+ //
+ // The code we emit is:
+ //
+ // b<!cond> L_not // 4 bytes. Note that we reverse the condition.
+ // b L_target // 4 bytes
+ // L_not:
+ //
+ // Note that we don't actually insert any blocks: we simply encode "b <!cond> L_not" as a branch with
+ // the correct offset. Note also that this works for both integer and floating-point conditions, because
+ // the condition inversion takes ordered/unordered into account, preserving NaN behavior. For example,
+ // "GT" (greater than) is inverted to "LE" (less than, equal, or unordered).
+ dst =
+ emitOutputShortBranch(dst,
+ emitJumpKindToIns(emitReverseJumpKind(
+ emitInsToJumpKind(ins))), // reverse the conditional instruction
+ IF_BI_0B,
+ 8, /* 8 bytes from start of this large conditional pseudo-instruction to L_not. */
+ nullptr /* only used for tbz/tbnzcbz/cbnz */);
+
+ // Now, pretend we've got a normal unconditional branch, and fall through to the code to emit that.
+ ins = INS_b;
+ fmt = IF_BI_0A;
+
+ // The distVal was computed based on the beginning of the pseudo-instruction,
+ // So subtract the size of the conditional branch so that it is relative to the
+ // unconditional branch.
+ distVal -= 4;
+ }
+
+ dst = emitOutputShortBranch(dst, ins, fmt, distVal, id);
+ }
+ else if (loadLabel)
+ {
+ dst = emitOutputLoadLabel(dst, srcAddr, dstAddr, id);
+ }
+
+ return dst;
+}
+
+/*****************************************************************************
+*
+* Output a short branch instruction.
+*/
+BYTE* emitter::emitOutputShortBranch(BYTE* dst, instruction ins, insFormat fmt, ssize_t distVal, instrDescJmp* id)
+{
+ code_t code = emitInsCode(ins, fmt);
+
+ ssize_t loBits = (distVal & 3);
+ noway_assert(loBits == 0);
+ distVal >>= 2; // branch offset encodings are scaled by 4.
+
+ if (fmt == IF_BI_0A)
+ {
+ // INS_b or INS_bl_local
+ noway_assert(isValidSimm26(distVal));
+ distVal &= 0x3FFFFFFLL;
+ code |= distVal;
+ }
+ else if (fmt == IF_BI_0B) // BI_0B 01010100iiiiiiii iiiiiiiiiiiXXXXX simm19:00
+ {
+ // INS_beq, INS_bne, etc...
+ noway_assert(isValidSimm19(distVal));
+ distVal &= 0x7FFFFLL;
+ code |= distVal << 5;
+ }
+ else if (fmt == IF_BI_1A) // BI_1A X.......iiiiiiii iiiiiiiiiiittttt Rt simm19:00
+ {
+ // INS_cbz or INS_cbnz
+ assert(id != nullptr);
+ code |= insEncodeDatasize(id->idOpSize()); // X
+ code |= insEncodeReg_Rt(id->idReg1()); // ttttt
+
+ noway_assert(isValidSimm19(distVal));
+ distVal &= 0x7FFFFLL; // 19 bits
+ code |= distVal << 5;
+ }
+ else if (fmt == IF_BI_1B) // BI_1B B.......bbbbbiii iiiiiiiiiiittttt Rt imm6, simm14:00
+ {
+ // INS_tbz or INS_tbnz
+ assert(id != nullptr);
+ ssize_t imm = emitGetInsSC(id);
+ assert(isValidImmShift(imm, id->idOpSize()));
+
+ if (imm & 0x20) // test bit 32-63 ?
+ {
+ code |= 0x80000000; // B
+ }
+ code |= ((imm & 0x1F) << 19); // bbbbb
+ code |= insEncodeReg_Rt(id->idReg1()); // ttttt
+
+ noway_assert(isValidSimm14(distVal));
+ distVal &= 0x3FFFLL; // 14 bits
+ code |= distVal << 5;
+ }
+ else
+ {
+ assert(!"Unknown fmt for emitOutputShortBranch");
+ }
+
+ dst += emitOutput_Instr(dst, code);
+
+ return dst;
+}
+
+/*****************************************************************************
+*
+* Output a short address instruction.
+*/
+BYTE* emitter::emitOutputShortAddress(BYTE* dst, instruction ins, insFormat fmt, ssize_t distVal, regNumber reg)
+{
+ ssize_t loBits = (distVal & 3);
+ distVal >>= 2;
+
+ code_t code = emitInsCode(ins, fmt);
+ if (fmt == IF_DI_1E) // DI_1E .ii.....iiiiiiii iiiiiiiiiiiddddd Rd simm21
+ {
+ // INS_adr or INS_adrp
+ code |= insEncodeReg_Rd(reg); // ddddd
+
+ noway_assert(isValidSimm19(distVal));
+ distVal &= 0x7FFFFLL; // 19 bits
+ code |= distVal << 5;
+ code |= loBits << 29; // 2 bits
+ }
+ else
+ {
+ assert(!"Unknown fmt for emitOutputShortAddress");
+ }
+
+ dst += emitOutput_Instr(dst, code);
+
+ return dst;
+}
+
+/*****************************************************************************
+*
+* Output a short constant instruction.
+*/
+BYTE* emitter::emitOutputShortConstant(
+ BYTE* dst, instruction ins, insFormat fmt, ssize_t imm, regNumber reg, emitAttr opSize)
+{
+ code_t code = emitInsCode(ins, fmt);
+
+ if (fmt == IF_LS_1A)
+ {
+ // LS_1A XX...V..iiiiiiii iiiiiiiiiiittttt Rt simm21
+ // INS_ldr or INS_ldrsw (PC-Relative)
+
+ ssize_t loBits = (imm & 3);
+ noway_assert(loBits == 0);
+ ssize_t distVal = imm >>= 2; // load offset encodings are scaled by 4.
+
+ noway_assert(isValidSimm19(distVal));
+
+ // Is the target a vector register?
+ if (isVectorRegister(reg))
+ {
+ code |= insEncodeDatasizeVLS(code, opSize); // XX V
+ code |= insEncodeReg_Vt(reg); // ttttt
+ }
+ else
+ {
+ assert(isGeneralRegister(reg));
+ // insEncodeDatasizeLS is not quite right for this case.
+ // So just specialize it.
+ if ((ins == INS_ldr) && (opSize == EA_8BYTE))
+ {
+ // set the operation size in bit 30
+ code |= 0x40000000;
+ }
+
+ code |= insEncodeReg_Rt(reg); // ttttt
+ }
+
+ distVal &= 0x7FFFFLL; // 19 bits
+ code |= distVal << 5;
+ }
+ else if (fmt == IF_LS_2B)
+ {
+ // ldr Rt,[Xn+pimm12] LS_2B 1X11100101iiiiii iiiiiinnnnnttttt B940 0000 imm(0-4095<<{2,3})
+ // INS_ldr or INS_ldrsw (PC-Relative)
+ noway_assert(isValidUimm12(imm));
+ assert(isGeneralRegister(reg));
+
+ if (opSize == EA_8BYTE)
+ {
+ // insEncodeDatasizeLS is not quite right for this case.
+ // So just specialize it.
+ if (ins == INS_ldr)
+ {
+ // set the operation size in bit 30
+ code |= 0x40000000;
+ }
+ // Low 3 bits should be 0 -- 8 byte JIT data should be aligned on 8 byte.
+ assert((imm & 7) == 0);
+ imm >>= 3;
+ }
+ else
+ {
+ assert(opSize == EA_4BYTE);
+ // Low 2 bits should be 0 -- 4 byte aligned data.
+ assert((imm & 3) == 0);
+ imm >>= 2;
+ }
+
+ code |= insEncodeReg_Rt(reg); // ttttt
+ code |= insEncodeReg_Rn(reg); // nnnnn
+ code |= imm << 10;
+ }
+ else
+ {
+ assert(!"Unknown fmt for emitOutputShortConstant");
+ }
+
+ dst += emitOutput_Instr(dst, code);
+
+ return dst;
+}
+/*****************************************************************************
+ *
+ * Output a call instruction.
+ */
+
+unsigned emitter::emitOutputCall(insGroup* ig, BYTE* dst, instrDesc* id, code_t code)
+{
+ const unsigned char callInstrSize = sizeof(code_t); // 4 bytes
+ regMaskTP gcrefRegs;
+ regMaskTP byrefRegs;
+
+ VARSET_TP VARSET_INIT_NOCOPY(GCvars, VarSetOps::UninitVal());
+
+ // Is this a "fat" call descriptor?
+ if (id->idIsLargeCall())
+ {
+ instrDescCGCA* idCall = (instrDescCGCA*)id;
+ gcrefRegs = idCall->idcGcrefRegs;
+ byrefRegs = idCall->idcByrefRegs;
+ VarSetOps::Assign(emitComp, GCvars, idCall->idcGCvars);
+ }
+ else
+ {
+ assert(!id->idIsLargeDsp());
+ assert(!id->idIsLargeCns());
+
+ gcrefRegs = emitDecodeCallGCregs(id);
+ byrefRegs = 0;
+ VarSetOps::AssignNoCopy(emitComp, GCvars, VarSetOps::MakeEmpty(emitComp));
+ }
+
+ /* We update the GC info before the call as the variables cannot be
+ used by the call. Killing variables before the call helps with
+ boundary conditions if the call is CORINFO_HELP_THROW - see bug 50029.
+ If we ever track aliased variables (which could be used by the
+ call), we would have to keep them alive past the call. */
+
+ emitUpdateLiveGCvars(GCvars, dst);
+
+ // Now output the call instruction and update the 'dst' pointer
+ //
+ unsigned outputInstrSize = emitOutput_Instr(dst, code);
+ dst += outputInstrSize;
+
+ // All call instructions are 4-byte in size on ARM64
+ //
+ assert(outputInstrSize == callInstrSize);
+
+ // If the method returns a GC ref, mark INTRET (R0) appropriately.
+ if (id->idGCref() == GCT_GCREF)
+ {
+ gcrefRegs |= RBM_INTRET;
+ }
+ else if (id->idGCref() == GCT_BYREF)
+ {
+ byrefRegs |= RBM_INTRET;
+ }
+
+ // If is a multi-register return method is called, mark INTRET_1 (X1) appropriately
+ if (id->idIsLargeCall())
+ {
+ instrDescCGCA* idCall = (instrDescCGCA*)id;
+ if (idCall->idSecondGCref() == GCT_GCREF)
+ {
+ gcrefRegs |= RBM_INTRET_1;
+ }
+ else if (idCall->idSecondGCref() == GCT_BYREF)
+ {
+ byrefRegs |= RBM_INTRET_1;
+ }
+ }
+
+ // If the GC register set has changed, report the new set.
+ if (gcrefRegs != emitThisGCrefRegs)
+ {
+ emitUpdateLiveGCregs(GCT_GCREF, gcrefRegs, dst);
+ }
+ // If the Byref register set has changed, report the new set.
+ if (byrefRegs != emitThisByrefRegs)
+ {
+ emitUpdateLiveGCregs(GCT_BYREF, byrefRegs, dst);
+ }
+
+ // Some helper calls may be marked as not requiring GC info to be recorded.
+ if ((!id->idIsNoGC()))
+ {
+ // On ARM64, as on AMD64, we don't change the stack pointer to push/pop args.
+ // So we're not really doing a "stack pop" here (note that "args" is 0), but we use this mechanism
+ // to record the call for GC info purposes. (It might be best to use an alternate call,
+ // and protect "emitStackPop" under the EMIT_TRACK_STACK_DEPTH preprocessor variable.)
+ emitStackPop(dst, /*isCall*/ true, callInstrSize, /*args*/ 0);
+
+ // Do we need to record a call location for GC purposes?
+ //
+ if (!emitFullGCinfo)
+ {
+ emitRecordGCcall(dst, callInstrSize);
+ }
+ }
+ return callInstrSize;
+}
+
+/*****************************************************************************
+ *
+ * Emit a 32-bit Arm64 instruction
+ */
+
+/*static*/ unsigned emitter::emitOutput_Instr(BYTE* dst, code_t code)
+{
+ assert(sizeof(code_t) == 4);
+ *((code_t*)dst) = code;
+
+ return sizeof(code_t);
+}
+
+/*****************************************************************************
+*
+ * Append the machine code corresponding to the given instruction descriptor
+ * to the code block at '*dp'; the base of the code block is 'bp', and 'ig'
+ * is the instruction group that contains the instruction. Updates '*dp' to
+ * point past the generated code, and returns the size of the instruction
+ * descriptor in bytes.
+ */
+
+size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
+{
+ BYTE* dst = *dp;
+ BYTE* odst = dst;
+ code_t code = 0;
+ size_t sz = emitGetInstrDescSize(id); // TODO-ARM64-Cleanup: on ARM, this is set in each case. why?
+ instruction ins = id->idIns();
+ insFormat fmt = id->idInsFmt();
+ emitAttr size = id->idOpSize();
+ unsigned char callInstrSize = 0;
+ unsigned condcode;
+
+#ifdef DEBUG
+#if DUMP_GC_TABLES
+ bool dspOffs = emitComp->opts.dspGCtbls;
+#else
+ bool dspOffs = !emitComp->opts.disDiffable;
+#endif
+#endif // DEBUG
+
+ assert(REG_NA == (int)REG_NA);
+
+ VARSET_TP VARSET_INIT_NOCOPY(GCvars, VarSetOps::UninitVal());
+
+ /* What instruction format have we got? */
+
+ switch (fmt)
+ {
+ ssize_t imm;
+ ssize_t index;
+ ssize_t index2;
+ unsigned scale;
+ unsigned cmode;
+ unsigned immShift;
+ bool hasShift;
+ emitAttr extSize;
+ emitAttr elemsize;
+ emitAttr datasize;
+
+ case IF_BI_0A: // BI_0A ......iiiiiiiiii iiiiiiiiiiiiiiii simm26:00
+ case IF_BI_0B: // BI_0B ......iiiiiiiiii iiiiiiiiiii..... simm19:00
+ case IF_LARGEJMP:
+ assert(id->idGCref() == GCT_NONE);
+ assert(id->idIsBound());
+ dst = emitOutputLJ(ig, dst, id);
+ sz = sizeof(instrDescJmp);
+ break;
+
+ case IF_BI_0C: // BI_0C ......iiiiiiiiii iiiiiiiiiiiiiiii simm26:00
+ code = emitInsCode(ins, fmt);
+ sz = id->idIsLargeCall() ? sizeof(instrDescCGCA) : sizeof(instrDesc);
+ dst += emitOutputCall(ig, dst, id, code);
+ // Always call RecordRelocation so that we wire in a JumpStub when we don't reach
+ emitRecordRelocation(odst, id->idAddr()->iiaAddr, IMAGE_REL_ARM64_BRANCH26);
+ break;
+
+ case IF_BI_1A: // BI_1A ......iiiiiiiiii iiiiiiiiiiittttt Rt simm19:00
+ assert(insOptsNone(id->idInsOpt()));
+ assert(id->idIsBound());
+
+ dst = emitOutputLJ(ig, dst, id);
+ sz = sizeof(instrDescJmp);
+ break;
+
+ case IF_BI_1B: // BI_1B B.......bbbbbiii iiiiiiiiiiittttt Rt imm6, simm14:00
+ assert(insOptsNone(id->idInsOpt()));
+ assert(id->idIsBound());
+
+ dst = emitOutputLJ(ig, dst, id);
+ sz = sizeof(instrDescJmp);
+ break;
+
+ case IF_BR_1A: // BR_1A ................ ......nnnnn..... Rn
+ assert(insOptsNone(id->idInsOpt()));
+ assert((ins == INS_ret) || (ins == INS_br));
+ code = emitInsCode(ins, fmt);
+ code |= insEncodeReg_Rn(id->idReg1()); // nnnnn
+
+ dst += emitOutput_Instr(dst, code);
+ break;
+
+ case IF_BR_1B: // BR_1B ................ ......nnnnn..... Rn
+ assert(insOptsNone(id->idInsOpt()));
+ assert((ins == INS_br_tail) || (ins == INS_blr));
+ code = emitInsCode(ins, fmt);
+ code |= insEncodeReg_Rn(id->idReg3()); // nnnnn
+
+ sz = id->idIsLargeCall() ? sizeof(instrDescCGCA) : sizeof(instrDesc);
+ dst += emitOutputCall(ig, dst, id, code);
+ break;
+
+ case IF_LS_1A: // LS_1A XX...V..iiiiiiii iiiiiiiiiiittttt Rt PC imm(1MB)
+ case IF_LARGELDC:
+ assert(insOptsNone(id->idInsOpt()));
+ assert(id->idIsBound());
+
+ dst = emitOutputLJ(ig, dst, id);
+ sz = sizeof(instrDescJmp);
+ break;
+
+ case IF_LS_2A: // LS_2A .X.......X...... ......nnnnnttttt Rt Rn
+ assert(insOptsNone(id->idInsOpt()));
+ code = emitInsCode(ins, fmt);
+ // Is the target a vector register?
+ if (isVectorRegister(id->idReg1()))
+ {
+ code &= 0x3FFFFFFF; // clear the size bits
+ code |= insEncodeDatasizeVLS(code, id->idOpSize()); // XX
+ code |= insEncodeReg_Vt(id->idReg1()); // ttttt
+ }
+ else
+ {
+ code |= insEncodeDatasizeLS(code, id->idOpSize()); // .X.......X
+ code |= insEncodeReg_Rt(id->idReg1()); // ttttt
+ }
+ code |= insEncodeReg_Rn(id->idReg2()); // nnnnn
+ dst += emitOutput_Instr(dst, code);
+ break;
+
+ case IF_LS_2B: // LS_2B .X.......Xiiiiii iiiiiinnnnnttttt Rt Rn imm(0-4095)
+ assert(insOptsNone(id->idInsOpt()));
+ imm = emitGetInsSC(id);
+ assert(isValidUimm12(imm));
+ code = emitInsCode(ins, fmt);
+ // Is the target a vector register?
+ if (isVectorRegister(id->idReg1()))
+ {
+ code &= 0x3FFFFFFF; // clear the size bits
+ code |= insEncodeDatasizeVLS(code, id->idOpSize()); // XX
+ code |= insEncodeReg_Vt(id->idReg1()); // ttttt
+ }
+ else
+ {
+ code |= insEncodeDatasizeLS(code, id->idOpSize()); // .X.......X
+ code |= insEncodeReg_Rt(id->idReg1()); // ttttt
+ }
+ code |= ((code_t)imm << 10); // iiiiiiiiiiii
+ code |= insEncodeReg_Rn(id->idReg2()); // nnnnn
+ dst += emitOutput_Instr(dst, code);
+ break;
+
+ case IF_LS_2C: // LS_2C .X.......X.iiiii iiiiPPnnnnnttttt Rt Rn imm(-256..+255) no/pre/post inc
+ assert(insOptsNone(id->idInsOpt()) || insOptsIndexed(id->idInsOpt()));
+ imm = emitGetInsSC(id);
+ assert((imm >= -256) && (imm <= 255)); // signed 9 bits
+ imm &= 0x1ff; // force into unsigned 9 bit representation
+ code = emitInsCode(ins, fmt);
+ // Is the target a vector register?
+ if (isVectorRegister(id->idReg1()))
+ {
+ code &= 0x3FFFFFFF; // clear the size bits
+ code |= insEncodeDatasizeVLS(code, id->idOpSize()); // XX
+ code |= insEncodeReg_Vt(id->idReg1()); // ttttt
+ }
+ else
+ {
+ code |= insEncodeDatasizeLS(code, id->idOpSize()); // .X.......X
+ code |= insEncodeReg_Rt(id->idReg1()); // ttttt
+ }
+ code |= insEncodeIndexedOpt(id->idInsOpt()); // PP
+ code |= ((code_t)imm << 12); // iiiiiiiii
+ code |= insEncodeReg_Rn(id->idReg2()); // nnnnn
+ dst += emitOutput_Instr(dst, code);
+ break;
+
+ case IF_LS_3A: // LS_3A .X.......X.mmmmm oooS..nnnnnttttt Rt Rn Rm ext(Rm) LSL {}
+ assert(insOptsLSExtend(id->idInsOpt()));
+ code = emitInsCode(ins, fmt);
+ // Is the target a vector register?
+ if (isVectorRegister(id->idReg1()))
+ {
+ code &= 0x3FFFFFFF; // clear the size bits
+ code |= insEncodeDatasizeVLS(code, id->idOpSize()); // XX
+ code |= insEncodeReg_Vt(id->idReg1()); // ttttt
+ }
+ else
+ {
+ code |= insEncodeDatasizeLS(code, id->idOpSize()); // .X.......X
+ code |= insEncodeReg_Rt(id->idReg1()); // ttttt
+ }
+ code |= insEncodeExtend(id->idInsOpt()); // ooo
+ code |= insEncodeReg_Rn(id->idReg2()); // nnnnn
+ if (id->idIsLclVar())
+ {
+ code |= insEncodeReg_Rm(codeGen->rsGetRsvdReg()); // mmmmm
+ }
+ else
+ {
+ code |= insEncodeReg3Scale(id->idReg3Scaled()); // S
+ code |= insEncodeReg_Rm(id->idReg3()); // mmmmm
+ }
+ dst += emitOutput_Instr(dst, code);
+ break;
+
+ case IF_LS_3B: // LS_3B X............... .aaaaannnnnddddd Rd Ra Rn
+ assert(insOptsNone(id->idInsOpt()));
+ code = emitInsCode(ins, fmt);
+ // Is the target a vector register?
+ if (isVectorRegister(id->idReg1()))
+ {
+ code &= 0x3FFFFFFF; // clear the size bits
+ code |= insEncodeDatasizeVPLS(code, id->idOpSize()); // XX
+ code |= insEncodeReg_Vt(id->idReg1()); // ttttt
+ code |= insEncodeReg_Va(id->idReg2()); // aaaaa
+ }
+ else
+ {
+ code |= insEncodeDatasize(id->idOpSize()); // X
+ code |= insEncodeReg_Rt(id->idReg1()); // ttttt
+ code |= insEncodeReg_Ra(id->idReg2()); // aaaaa
+ }
+ code |= insEncodeReg_Rn(id->idReg3()); // nnnnn
+ dst += emitOutput_Instr(dst, code);
+ break;
+
+ case IF_LS_3C: // LS_3C X......PP.iiiiii iaaaaannnnnddddd Rd Ra Rn imm(im7,sh)
+ assert(insOptsNone(id->idInsOpt()) || insOptsIndexed(id->idInsOpt()));
+ imm = emitGetInsSC(id);
+ assert((imm >= -64) && (imm <= 63)); // signed 7 bits
+ imm &= 0x7f; // force into unsigned 7 bit representation
+ code = emitInsCode(ins, fmt);
+ // Is the target a vector register?
+ if (isVectorRegister(id->idReg1()))
+ {
+ code &= 0x3FFFFFFF; // clear the size bits
+ code |= insEncodeDatasizeVPLS(code, id->idOpSize()); // XX
+ code |= insEncodeReg_Vt(id->idReg1()); // ttttt
+ code |= insEncodeReg_Va(id->idReg2()); // aaaaa
+ }
+ else
+ {
+ code |= insEncodeDatasize(id->idOpSize()); // X
+ code |= insEncodeReg_Rt(id->idReg1()); // ttttt
+ code |= insEncodeReg_Ra(id->idReg2()); // aaaaa
+ }
+ code |= insEncodePairIndexedOpt(ins, id->idInsOpt()); // PP
+ code |= ((code_t)imm << 15); // iiiiiiiii
+ code |= insEncodeReg_Rn(id->idReg3()); // nnnnn
+ dst += emitOutput_Instr(dst, code);
+ break;
+
+ case IF_DI_1A: // DI_1A X.......shiiiiii iiiiiinnnnn..... Rn imm(i12,sh)
+ assert(insOptsNone(id->idInsOpt()) || insOptsLSL12(id->idInsOpt()));
+ imm = emitGetInsSC(id);
+ assert(isValidUimm12(imm));
+ code = emitInsCode(ins, fmt);
+ code |= insEncodeDatasize(id->idOpSize()); // X
+ code |= insEncodeShiftImm12(id->idInsOpt()); // sh
+ code |= ((code_t)imm << 10); // iiiiiiiiiiii
+ code |= insEncodeReg_Rn(id->idReg1()); // nnnnn
+ dst += emitOutput_Instr(dst, code);
+ break;
+
+ case IF_DI_1B: // DI_1B X........hwiiiii iiiiiiiiiiiddddd Rd imm(i16,hw)
+ imm = emitGetInsSC(id);
+ assert(isValidImmHWVal(imm, id->idOpSize()));
+ code = emitInsCode(ins, fmt);
+ code |= insEncodeDatasize(id->idOpSize()); // X
+ code |= ((code_t)imm << 5); // hwiiiii iiiiiiiiiii
+ code |= insEncodeReg_Rd(id->idReg1()); // ddddd
+ dst += emitOutput_Instr(dst, code);
+ break;
+
+ case IF_DI_1C: // DI_1C X........Nrrrrrr ssssssnnnnn..... Rn imm(N,r,s)
+ imm = emitGetInsSC(id);
+ assert(isValidImmNRS(imm, id->idOpSize()));
+ code = emitInsCode(ins, fmt);
+ code |= ((code_t)imm << 10); // Nrrrrrrssssss
+ code |= insEncodeDatasize(id->idOpSize()); // X
+ code |= insEncodeReg_Rn(id->idReg1()); // nnnnn
+ dst += emitOutput_Instr(dst, code);
+ break;
+
+ case IF_DI_1D: // DI_1D X........Nrrrrrr ssssss.....ddddd Rd imm(N,r,s)
+ imm = emitGetInsSC(id);
+ assert(isValidImmNRS(imm, id->idOpSize()));
+ code = emitInsCode(ins, fmt);
+ code |= ((code_t)imm << 10); // Nrrrrrrssssss
+ code |= insEncodeDatasize(id->idOpSize()); // X
+ code |= insEncodeReg_Rd(id->idReg1()); // ddddd
+ dst += emitOutput_Instr(dst, code);
+ break;
+
+ case IF_DI_1E: // DI_1E .ii.....iiiiiiii iiiiiiiiiiiddddd Rd simm21
+ case IF_LARGEADR:
+ assert(insOptsNone(id->idInsOpt()));
+ if (id->idIsReloc())
+ {
+ code = emitInsCode(ins, fmt);
+ code |= insEncodeReg_Rd(id->idReg1()); // ddddd
+ dst += emitOutput_Instr(dst, code);
+ emitRecordRelocation(odst, id->idAddr()->iiaAddr, IMAGE_REL_ARM64_PAGEBASE_REL21);
+ }
+ else
+ {
+ // Local jmp/load case which does not need a relocation.
+ assert(id->idIsBound());
+ dst = emitOutputLJ(ig, dst, id);
+ }
+ sz = sizeof(instrDescJmp);
+ break;
+
+ case IF_DI_1F: // DI_1F X..........iiiii cccc..nnnnn.nzcv Rn imm5 nzcv cond
+ imm = emitGetInsSC(id);
+ assert(isValidImmCondFlagsImm5(imm));
+ {
+ condFlagsImm cfi;
+ cfi.immCFVal = (unsigned)imm;
+ code = emitInsCode(ins, fmt);
+ code |= insEncodeDatasize(id->idOpSize()); // X
+ code |= insEncodeReg_Rn(id->idReg1()); // nnnnn
+ code |= ((code_t)cfi.imm5 << 16); // iiiii
+ code |= insEncodeFlags(cfi.flags); // nzcv
+ code |= insEncodeCond(cfi.cond); // cccc
+ dst += emitOutput_Instr(dst, code);
+ }
+ break;
+
+ case IF_DI_2A: // DI_2A X.......shiiiiii iiiiiinnnnnddddd Rd Rn imm(i12,sh)
+ assert(insOptsNone(id->idInsOpt()) || insOptsLSL12(id->idInsOpt()));
+ imm = emitGetInsSC(id);
+ assert(isValidUimm12(imm));
+ code = emitInsCode(ins, fmt);
+ code |= insEncodeDatasize(id->idOpSize()); // X
+ code |= insEncodeShiftImm12(id->idInsOpt()); // sh
+ code |= ((code_t)imm << 10); // iiiiiiiiiiii
+ code |= insEncodeReg_Rd(id->idReg1()); // ddddd
+ code |= insEncodeReg_Rn(id->idReg2()); // nnnnn
+ dst += emitOutput_Instr(dst, code);
+
+ if (id->idIsReloc())
+ {
+ assert(sz == sizeof(instrDesc));
+ assert(id->idAddr()->iiaAddr != nullptr);
+ emitRecordRelocation(odst, id->idAddr()->iiaAddr, IMAGE_REL_ARM64_PAGEOFFSET_12A);
+ }
+ break;
+
+ case IF_DI_2B: // DI_2B X.........Xnnnnn ssssssnnnnnddddd Rd Rn imm(0-63)
+ code = emitInsCode(ins, fmt);
+ imm = emitGetInsSC(id);
+ assert(isValidImmShift(imm, id->idOpSize()));
+ code |= insEncodeDatasizeBF(code, id->idOpSize()); // X........X
+ code |= insEncodeReg_Rd(id->idReg1()); // ddddd
+ code |= insEncodeReg_Rn(id->idReg2()); // nnnnn
+ code |= insEncodeReg_Rm(id->idReg2()); // Reg2 also in mmmmm
+ code |= insEncodeShiftCount(imm, id->idOpSize()); // ssssss
+ dst += emitOutput_Instr(dst, code);
+ break;
+
+ case IF_DI_2C: // DI_2C X........Nrrrrrr ssssssnnnnnddddd Rd Rn imm(N,r,s)
+ imm = emitGetInsSC(id);
+ assert(isValidImmNRS(imm, id->idOpSize()));
+ code = emitInsCode(ins, fmt);
+ code |= ((code_t)imm << 10); // Nrrrrrrssssss
+ code |= insEncodeDatasize(id->idOpSize()); // X
+ code |= insEncodeReg_Rd(id->idReg1()); // ddddd
+ code |= insEncodeReg_Rn(id->idReg2()); // nnnnn
+ dst += emitOutput_Instr(dst, code);
+ break;
+
+ case IF_DI_2D: // DI_2D X........Nrrrrrr ssssssnnnnnddddd Rd Rn imr, imms (N,r,s)
+ if (ins == INS_asr || ins == INS_lsl || ins == INS_lsr)
+ {
+ imm = emitGetInsSC(id);
+ assert(isValidImmShift(imm, id->idOpSize()));
+
+ // Shift immediates are aliases of the SBFM/UBFM instructions
+ // that actually take 2 registers and 2 constants,
+ // Since we stored the shift immediate value
+ // we need to calculate the N,R and S values here.
+
+ bitMaskImm bmi;
+ bmi.immNRS = 0;
+
+ bmi.immN = (size == EA_8BYTE) ? 1 : 0;
+ bmi.immR = imm;
+ bmi.immS = (size == EA_8BYTE) ? 0x3f : 0x1f;
+
+ // immR and immS are now set correctly for INS_asr and INS_lsr
+ // but for INS_lsl we have to adjust the values for immR and immS
+ //
+ if (ins == INS_lsl)
+ {
+ bmi.immR = -imm & bmi.immS;
+ bmi.immS = bmi.immS - imm;
+ }
+
+ // setup imm with the proper 13 bit value N:R:S
+ //
+ imm = bmi.immNRS;
+ }
+ else
+ {
+ // The other instructions have already have encoded N,R and S values
+ imm = emitGetInsSC(id);
+ }
+ assert(isValidImmNRS(imm, id->idOpSize()));
+
+ code = emitInsCode(ins, fmt);
+ code |= ((code_t)imm << 10); // Nrrrrrrssssss
+ code |= insEncodeDatasize(id->idOpSize()); // X
+ code |= insEncodeReg_Rd(id->idReg1()); // ddddd
+ code |= insEncodeReg_Rn(id->idReg2()); // nnnnn
+ dst += emitOutput_Instr(dst, code);
+ break;
+
+ case IF_DR_1D: // DR_1D X............... cccc.......ddddd Rd cond
+ imm = emitGetInsSC(id);
+ assert(isValidImmCond(imm));
+ {
+ condFlagsImm cfi;
+ cfi.immCFVal = (unsigned)imm;
+ code = emitInsCode(ins, fmt);
+ code |= insEncodeDatasize(id->idOpSize()); // X
+ code |= insEncodeReg_Rd(id->idReg1()); // ddddd
+ code |= insEncodeInvertedCond(cfi.cond); // cccc
+ dst += emitOutput_Instr(dst, code);
+ }
+ break;
+
+ case IF_DR_2A: // DR_2A X..........mmmmm ......nnnnn..... Rn Rm
+ assert(insOptsNone(id->idInsOpt()));
+ code = emitInsCode(ins, fmt);
+ code |= insEncodeDatasize(id->idOpSize()); // X
+ code |= insEncodeReg_Rn(id->idReg1()); // nnnnn
+ code |= insEncodeReg_Rm(id->idReg2()); // mmmmm
+ dst += emitOutput_Instr(dst, code);
+ break;
+
+ case IF_DR_2B: // DR_2B X.......sh.mmmmm ssssssnnnnn..... Rn Rm {LSL,LSR,ASR,ROR} imm(0-63)
+ code = emitInsCode(ins, fmt);
+ imm = emitGetInsSC(id);
+ assert(isValidImmShift(imm, id->idOpSize()));
+ code |= insEncodeDatasize(id->idOpSize()); // X
+ code |= insEncodeShiftType(id->idInsOpt()); // sh
+ code |= insEncodeShiftCount(imm, id->idOpSize()); // ssssss
+ code |= insEncodeReg_Rn(id->idReg1()); // nnnnn
+ code |= insEncodeReg_Rm(id->idReg2()); // mmmmm
+ dst += emitOutput_Instr(dst, code);
+ break;
+
+ case IF_DR_2C: // DR_2C X..........mmmmm ooosssnnnnn..... Rn Rm ext(Rm) LSL imm(0-4)
+ code = emitInsCode(ins, fmt);
+ imm = emitGetInsSC(id);
+ assert((imm >= 0) && (imm <= 4)); // imm [0..4]
+ code |= insEncodeDatasize(id->idOpSize()); // X
+ code |= insEncodeExtend(id->idInsOpt()); // ooo
+ code |= insEncodeExtendScale(imm); // sss
+ code |= insEncodeReg_Rn(id->idReg1()); // nnnnn
+ code |= insEncodeReg_Rm(id->idReg2()); // mmmmm
+ dst += emitOutput_Instr(dst, code);
+ break;
+
+ case IF_DR_2D: // DR_2D X..........nnnnn cccc..nnnnnddddd Rd Rn cond
+ imm = emitGetInsSC(id);
+ assert(isValidImmCond(imm));
+ {
+ condFlagsImm cfi;
+ cfi.immCFVal = (unsigned)imm;
+ code = emitInsCode(ins, fmt);
+ code |= insEncodeDatasize(id->idOpSize()); // X
+ code |= insEncodeReg_Rd(id->idReg1()); // ddddd
+ code |= insEncodeReg_Rn(id->idReg2()); // nnnnn
+ code |= insEncodeReg_Rm(id->idReg2()); // mmmmm
+ code |= insEncodeInvertedCond(cfi.cond); // cccc
+ dst += emitOutput_Instr(dst, code);
+ }
+ break;
+
+ case IF_DR_2E: // DR_2E X..........mmmmm ...........ddddd Rd Rm
+ code = emitInsCode(ins, fmt);
+ code |= insEncodeDatasize(id->idOpSize()); // X
+ code |= insEncodeReg_Rd(id->idReg1()); // ddddd
+ code |= insEncodeReg_Rm(id->idReg2()); // mmmmm
+ dst += emitOutput_Instr(dst, code);
+ break;
+
+ case IF_DR_2F: // DR_2F X.......sh.mmmmm ssssss.....ddddd Rd Rm {LSL,LSR,ASR} imm(0-63)
+ code = emitInsCode(ins, fmt);
+ imm = emitGetInsSC(id);
+ assert(isValidImmShift(imm, id->idOpSize()));
+ code |= insEncodeDatasize(id->idOpSize()); // X
+ code |= insEncodeShiftType(id->idInsOpt()); // sh
+ code |= insEncodeShiftCount(imm, id->idOpSize()); // ssssss
+ code |= insEncodeReg_Rd(id->idReg1()); // ddddd
+ code |= insEncodeReg_Rm(id->idReg2()); // mmmmm
+ dst += emitOutput_Instr(dst, code);
+ break;
+
+ case IF_DR_2G: // DR_2G X............... .....xnnnnnddddd Rd Rn
+ code = emitInsCode(ins, fmt);
+ code |= insEncodeDatasize(id->idOpSize()); // X
+ if (ins == INS_rev)
+ {
+ if (size == EA_8BYTE)
+ {
+ code |= 0x00000400; // x - bit at location 10
+ }
+ }
+ code |= insEncodeReg_Rd(id->idReg1()); // ddddd
+ code |= insEncodeReg_Rn(id->idReg2()); // nnnnn
+ dst += emitOutput_Instr(dst, code);
+ break;
+
+ case IF_DR_2H: // DR_2H X........X...... ......nnnnnddddd Rd Rn
+ code = emitInsCode(ins, fmt);
+ code |= insEncodeDatasizeBF(code, id->idOpSize()); // X........X
+ code |= insEncodeReg_Rd(id->idReg1()); // ddddd
+ code |= insEncodeReg_Rn(id->idReg2()); // nnnnn
+ dst += emitOutput_Instr(dst, code);
+ break;
+
+ case IF_DR_2I: // DR_2I X..........mmmmm cccc..nnnnn.nzcv Rn Rm nzcv cond
+ imm = emitGetInsSC(id);
+ assert(isValidImmCondFlags(imm));
+ {
+ condFlagsImm cfi;
+ cfi.immCFVal = (unsigned)imm;
+ code = emitInsCode(ins, fmt);
+ code |= insEncodeDatasize(id->idOpSize()); // X
+ code |= insEncodeReg_Rn(id->idReg1()); // nnnnn
+ code |= insEncodeReg_Rm(id->idReg2()); // mmmmm
+ code |= insEncodeFlags(cfi.flags); // nzcv
+ code |= insEncodeCond(cfi.cond); // cccc
+ dst += emitOutput_Instr(dst, code);
+ }
+ break;
+
+ case IF_DR_3A: // DR_3A X..........mmmmm ......nnnnnmmmmm Rd Rn Rm
+ code = emitInsCode(ins, fmt);
+ code |= insEncodeDatasize(id->idOpSize()); // X
+ code |= insEncodeReg_Rd(id->idReg1()); // ddddd
+ code |= insEncodeReg_Rn(id->idReg2()); // nnnnn
+ if (id->idIsLclVar())
+ {
+ code |= insEncodeReg_Rm(codeGen->rsGetRsvdReg()); // mmmmm
+ }
+ else
+ {
+ code |= insEncodeReg_Rm(id->idReg3()); // mmmmm
+ }
+ dst += emitOutput_Instr(dst, code);
+ break;
+
+ case IF_DR_3B: // DR_3B X.......sh.mmmmm ssssssnnnnnddddd Rd Rn Rm {LSL,LSR,ASR} imm(0-63)
+ code = emitInsCode(ins, fmt);
+ imm = emitGetInsSC(id);
+ assert(isValidImmShift(imm, id->idOpSize()));
+ code |= insEncodeDatasize(id->idOpSize()); // X
+ code |= insEncodeReg_Rd(id->idReg1()); // ddddd
+ code |= insEncodeReg_Rn(id->idReg2()); // nnnnn
+ code |= insEncodeReg_Rm(id->idReg3()); // mmmmm
+ code |= insEncodeShiftType(id->idInsOpt()); // sh
+ code |= insEncodeShiftCount(imm, id->idOpSize()); // ssssss
+ dst += emitOutput_Instr(dst, code);
+ break;
+
+ case IF_DR_3C: // DR_3C X..........mmmmm ooosssnnnnnddddd Rd Rn Rm ext(Rm) LSL imm(0-4)
+ code = emitInsCode(ins, fmt);
+ imm = emitGetInsSC(id);
+ assert((imm >= 0) && (imm <= 4)); // imm [0..4]
+ code |= insEncodeDatasize(id->idOpSize()); // X
+ code |= insEncodeExtend(id->idInsOpt()); // ooo
+ code |= insEncodeExtendScale(imm); // sss
+ code |= insEncodeReg_Rd(id->idReg1()); // ddddd
+ code |= insEncodeReg_Rn(id->idReg2()); // nnnnn
+ code |= insEncodeReg_Rm(id->idReg3()); // mmmmm
+ dst += emitOutput_Instr(dst, code);
+ break;
+
+ case IF_DR_3D: // DR_3D X..........mmmmm cccc..nnnnnddddd Rd Rn Rm cond
+ imm = emitGetInsSC(id);
+ assert(isValidImmCond(imm));
+ {
+ condFlagsImm cfi;
+ cfi.immCFVal = (unsigned)imm;
+ code = emitInsCode(ins, fmt);
+ code |= insEncodeDatasize(id->idOpSize()); // X
+ code |= insEncodeReg_Rd(id->idReg1()); // ddddd
+ code |= insEncodeReg_Rn(id->idReg2()); // nnnnn
+ code |= insEncodeReg_Rm(id->idReg3()); // mmmmm
+ code |= insEncodeCond(cfi.cond); // cccc
+ dst += emitOutput_Instr(dst, code);
+ }
+ break;
+
+ case IF_DR_3E: // DR_3E X........X.mmmmm ssssssnnnnnddddd Rd Rn Rm imm(0-63)
+ code = emitInsCode(ins, fmt);
+ imm = emitGetInsSC(id);
+ assert(isValidImmShift(imm, id->idOpSize()));
+ code |= insEncodeDatasizeBF(code, id->idOpSize()); // X........X
+ code |= insEncodeReg_Rd(id->idReg1()); // ddddd
+ code |= insEncodeReg_Rn(id->idReg2()); // nnnnn
+ code |= insEncodeReg_Rm(id->idReg3()); // mmmmm
+ code |= insEncodeShiftCount(imm, id->idOpSize()); // ssssss
+ dst += emitOutput_Instr(dst, code);
+ break;
+
+ case IF_DR_4A: // DR_4A X..........mmmmm .aaaaannnnnmmmmm Rd Rn Rm Ra
+ code = emitInsCode(ins, fmt);
+ code |= insEncodeDatasize(id->idOpSize()); // X
+ code |= insEncodeReg_Rd(id->idReg1()); // ddddd
+ code |= insEncodeReg_Rn(id->idReg2()); // nnnnn
+ code |= insEncodeReg_Rm(id->idReg3()); // mmmmm
+ code |= insEncodeReg_Ra(id->idReg4()); // aaaaa
+ dst += emitOutput_Instr(dst, code);
+ break;
+
+ case IF_DV_1A: // DV_1A .........X.iiiii iii........ddddd Vd imm8 (fmov - immediate scalar)
+ imm = emitGetInsSC(id);
+ elemsize = id->idOpSize();
+ code = emitInsCode(ins, fmt);
+ code |= insEncodeFloatElemsize(elemsize); // X
+ code |= ((code_t)imm << 13); // iiiii iii
+ code |= insEncodeReg_Vd(id->idReg1()); // ddddd
+ dst += emitOutput_Instr(dst, code);
+ break;
+
+ case IF_DV_1B: // DV_1B .QX..........iii cmod..iiiiiddddd Vd imm8 (immediate vector)
+ imm = emitGetInsSC(id) & 0x0ff;
+ immShift = (emitGetInsSC(id) & 0x700) >> 8;
+ elemsize = optGetElemsize(id->idInsOpt());
+ cmode = 0;
+ switch (elemsize)
+ { // cmode
+ case EA_1BYTE:
+ cmode = 0xE; // 1110
+ break;
+ case EA_2BYTE:
+ cmode = 0x8;
+ cmode |= (immShift << 1); // 10x0
+ break;
+ case EA_4BYTE:
+ if (immShift < 4)
+ {
+ cmode = 0x0;
+ cmode |= (immShift << 1); // 0xx0
+ }
+ else // MSL
+ {
+ cmode = 0xC;
+ if (immShift & 2)
+ cmode |= 1; // 110x
+ }
+ break;
+ case EA_8BYTE:
+ cmode = 0xE; // 1110
+ break;
+ default:
+ // TODO-Cleanup: add unreached() here
+ break;
+ }
+
+ code = emitInsCode(ins, fmt);
+ code |= insEncodeVectorsize(id->idOpSize()); // Q
+ if ((ins == INS_fmov) || (ins == INS_movi))
+ {
+ if (elemsize == EA_8BYTE)
+ {
+ code |= 0x20000000; // X
+ }
+ }
+ if (ins != INS_fmov)
+ {
+ assert((cmode >= 0) && (cmode <= 0xF));
+ code |= (cmode << 12); // cmod
+ }
+ code |= (((code_t)imm >> 5) << 16); // iii
+ code |= (((code_t)imm & 0x1f) << 5); // iiiii
+ code |= insEncodeReg_Vd(id->idReg1()); // ddddd
+ dst += emitOutput_Instr(dst, code);
+ break;
+
+ case IF_DV_1C: // DV_1C .........X...... ......nnnnn..... Vn #0.0 (fcmp - with zero)
+ elemsize = id->idOpSize();
+ code = emitInsCode(ins, fmt);
+ code |= insEncodeFloatElemsize(elemsize); // X
+ code |= insEncodeReg_Vn(id->idReg1()); // nnnnn
+ dst += emitOutput_Instr(dst, code);
+ break;
+
+ case IF_DV_2A: // DV_2A .Q.......X...... ......nnnnnddddd Vd Vn (fabs, fcvt - vector)
+ elemsize = optGetElemsize(id->idInsOpt());
+ code = emitInsCode(ins, fmt);
+ code |= insEncodeVectorsize(id->idOpSize()); // Q
+ code |= insEncodeFloatElemsize(elemsize); // X
+ code |= insEncodeReg_Vd(id->idReg1()); // ddddd
+ code |= insEncodeReg_Vn(id->idReg2()); // nnnnn
+ dst += emitOutput_Instr(dst, code);
+ break;
+
+ case IF_DV_2B: // DV_2B .Q.........iiiii ......nnnnnddddd Rd Vn[] (umov/smov - to general)
+ elemsize = id->idOpSize();
+ index = emitGetInsSC(id);
+ datasize = (elemsize == EA_8BYTE) ? EA_16BYTE : EA_8BYTE;
+ if (ins == INS_smov)
+ {
+ datasize = EA_16BYTE;
+ }
+ code = emitInsCode(ins, fmt);
+ code |= insEncodeVectorsize(datasize); // Q
+ code |= insEncodeVectorIndex(elemsize, index); // iiiii
+ code |= insEncodeReg_Rd(id->idReg1()); // ddddd
+ code |= insEncodeReg_Vn(id->idReg2()); // nnnnn
+ dst += emitOutput_Instr(dst, code);
+ break;
+
+ case IF_DV_2C: // DV_2C .Q.........iiiii ......nnnnnddddd Vd Rn (dup/ins - vector from general)
+ if (ins == INS_dup)
+ {
+ datasize = id->idOpSize();
+ elemsize = optGetElemsize(id->idInsOpt());
+ index = 0;
+ }
+ else // INS_ins
+ {
+ datasize = EA_16BYTE;
+ elemsize = id->idOpSize();
+ index = emitGetInsSC(id);
+ }
+ code = emitInsCode(ins, fmt);
+ code |= insEncodeVectorsize(datasize); // Q
+ code |= insEncodeVectorIndex(elemsize, index); // iiiii
+ code |= insEncodeReg_Vd(id->idReg1()); // ddddd
+ code |= insEncodeReg_Rn(id->idReg2()); // nnnnn
+ dst += emitOutput_Instr(dst, code);
+ break;
+
+ case IF_DV_2D: // DV_2D .Q.........iiiii ......nnnnnddddd Vd Vn[] (dup - vector)
+ index = emitGetInsSC(id);
+ elemsize = optGetElemsize(id->idInsOpt());
+ code = emitInsCode(ins, fmt);
+ code |= insEncodeVectorsize(id->idOpSize()); // Q
+ code |= insEncodeVectorIndex(elemsize, index); // iiiii
+ code |= insEncodeReg_Vd(id->idReg1()); // ddddd
+ code |= insEncodeReg_Vn(id->idReg2()); // nnnnn
+ dst += emitOutput_Instr(dst, code);
+ break;
+
+ case IF_DV_2E: // DV_2E ...........iiiii ......nnnnnddddd Vd Vn[] (dup - scalar)
+ index = emitGetInsSC(id);
+ elemsize = id->idOpSize();
+ code = emitInsCode(ins, fmt);
+ code |= insEncodeVectorIndex(elemsize, index); // iiiii
+ code |= insEncodeReg_Vd(id->idReg1()); // ddddd
+ code |= insEncodeReg_Vn(id->idReg2()); // nnnnn
+ dst += emitOutput_Instr(dst, code);
+ break;
+
+ case IF_DV_2F: // DV_2F ...........iiiii .jjjj.nnnnnddddd Vd[] Vn[] (ins - element)
+ elemsize = id->idOpSize();
+ imm = emitGetInsSC(id);
+ index = (imm >> 4) & 0xf;
+ index2 = imm & 0xf;
+ code = emitInsCode(ins, fmt);
+ code |= insEncodeVectorIndex(elemsize, index); // iiiii
+ code |= insEncodeVectorIndex2(elemsize, index2); // jjjj
+ code |= insEncodeReg_Vd(id->idReg1()); // ddddd
+ code |= insEncodeReg_Vn(id->idReg2()); // nnnnn
+ dst += emitOutput_Instr(dst, code);
+ break;
+
+ case IF_DV_2G: // DV_2G .........X...... ......nnnnnddddd Vd Vn (fmov,fcvtXX - register)
+ elemsize = id->idOpSize();
+ code = emitInsCode(ins, fmt);
+ code |= insEncodeFloatElemsize(elemsize); // X
+ code |= insEncodeReg_Vd(id->idReg1()); // ddddd
+ code |= insEncodeReg_Vn(id->idReg2()); // nnnnn
+ dst += emitOutput_Instr(dst, code);
+ break;
+
+ case IF_DV_2H: // DV_2H X........X...... ......nnnnnddddd Rd Vn (fmov - to general)
+ elemsize = id->idOpSize();
+ code = emitInsCode(ins, fmt);
+ code |= insEncodeConvertOpt(fmt, id->idInsOpt()); // X X
+ code |= insEncodeReg_Rd(id->idReg1()); // ddddd
+ code |= insEncodeReg_Vn(id->idReg2()); // nnnnn
+ dst += emitOutput_Instr(dst, code);
+ break;
+
+ case IF_DV_2I: // DV_2I X........X...... ......nnnnnddddd Vd Rn (fmov - from general)
+ elemsize = id->idOpSize();
+ code = emitInsCode(ins, fmt);
+ code |= insEncodeConvertOpt(fmt, id->idInsOpt()); // X X
+ code |= insEncodeReg_Vd(id->idReg1()); // ddddd
+ code |= insEncodeReg_Rn(id->idReg2()); // nnnnn
+ dst += emitOutput_Instr(dst, code);
+ break;
+
+ case IF_DV_2J: // DV_2J ........SS.....D D.....nnnnnddddd Vd Vn (fcvt)
+ code = emitInsCode(ins, fmt);
+ code |= insEncodeConvertOpt(fmt, id->idInsOpt()); // SS DD
+ code |= insEncodeReg_Vd(id->idReg1()); // ddddd
+ code |= insEncodeReg_Vn(id->idReg2()); // nnnnn
+ dst += emitOutput_Instr(dst, code);
+ break;
+
+ case IF_DV_2K: // DV_2K .........X.mmmmm ......nnnnn..... Vn Vm (fcmp)
+ elemsize = id->idOpSize();
+ code = emitInsCode(ins, fmt);
+ code |= insEncodeFloatElemsize(elemsize); // X
+ code |= insEncodeReg_Vn(id->idReg1()); // nnnnn
+ code |= insEncodeReg_Vm(id->idReg2()); // mmmmm
+ dst += emitOutput_Instr(dst, code);
+ break;
+
+ case IF_DV_2L: // DV_2L ........XX...... ......nnnnnddddd Vd Vn (abs, neg - scalar)
+ elemsize = id->idOpSize();
+ code = emitInsCode(ins, fmt);
+ code |= insEncodeElemsize(elemsize); // XX
+ code |= insEncodeReg_Vd(id->idReg1()); // ddddd
+ code |= insEncodeReg_Vn(id->idReg2()); // nnnnn
+ dst += emitOutput_Instr(dst, code);
+ break;
+
+ case IF_DV_2M: // DV_2M .Q......XX...... ......nnnnnddddd Vd Vn (abs, neg - vector)
+ elemsize = optGetElemsize(id->idInsOpt());
+ code = emitInsCode(ins, fmt);
+ code |= insEncodeVectorsize(id->idOpSize()); // Q
+ code |= insEncodeElemsize(elemsize); // XX
+ code |= insEncodeReg_Vd(id->idReg1()); // ddddd
+ code |= insEncodeReg_Vn(id->idReg2()); // nnnnn
+ dst += emitOutput_Instr(dst, code);
+ break;
+
+ case IF_DV_2N: // DV_2N .........iiiiiii ......nnnnnddddd Vd Vn imm (shift - scalar)
+ imm = emitGetInsSC(id);
+ code = emitInsCode(ins, fmt);
+ code |= insEncodeVectorShift(EA_8BYTE, imm); // iiiiiii
+ code |= insEncodeReg_Vd(id->idReg1()); // ddddd
+ code |= insEncodeReg_Vn(id->idReg2()); // nnnnn
+ dst += emitOutput_Instr(dst, code);
+ break;
+
+ case IF_DV_2O: // DV_2O .Q.......iiiiiii ......nnnnnddddd Vd Vn imm (shift - vector)
+ imm = emitGetInsSC(id);
+ elemsize = optGetElemsize(id->idInsOpt());
+ code = emitInsCode(ins, fmt);
+ code |= insEncodeVectorsize(id->idOpSize()); // Q
+ code |= insEncodeVectorShift(elemsize, imm); // iiiiiii
+ code |= insEncodeReg_Vd(id->idReg1()); // ddddd
+ code |= insEncodeReg_Vn(id->idReg2()); // nnnnn
+ dst += emitOutput_Instr(dst, code);
+ break;
+
+ case IF_DV_3A: // DV_3A .Q......XX.mmmmm ......nnnnnddddd Vd Vn Vm (vector)
+ code = emitInsCode(ins, fmt);
+ elemsize = optGetElemsize(id->idInsOpt());
+ code |= insEncodeVectorsize(id->idOpSize()); // Q
+ code |= insEncodeElemsize(elemsize); // XX
+ code |= insEncodeReg_Vd(id->idReg1()); // ddddd
+ code |= insEncodeReg_Vn(id->idReg2()); // nnnnn
+ code |= insEncodeReg_Vm(id->idReg3()); // mmmmm
+ dst += emitOutput_Instr(dst, code);
+ break;
+
+ case IF_DV_3AI: // DV_3AI .Q......XXLMmmmm ....H.nnnnnddddd Vd Vn Vm[] (vector)
+ code = emitInsCode(ins, fmt);
+ imm = emitGetInsSC(id);
+ elemsize = optGetElemsize(id->idInsOpt());
+ assert(isValidVectorIndex(EA_16BYTE, elemsize, imm));
+ code |= insEncodeVectorsize(id->idOpSize()); // Q
+ code |= insEncodeElemsize(elemsize); // XX
+ code |= insEncodeVectorIndexLMH(elemsize, imm); // LM H
+ code |= insEncodeReg_Vd(id->idReg1()); // ddddd
+ code |= insEncodeReg_Vn(id->idReg2()); // nnnnn
+ code |= insEncodeReg_Vm(id->idReg3()); // mmmmm
+ dst += emitOutput_Instr(dst, code);
+ break;
+
+ case IF_DV_3B: // DV_3B .Q.......X.mmmmm ......nnnnnddddd Vd Vn Vm (vector)
+ code = emitInsCode(ins, fmt);
+ elemsize = optGetElemsize(id->idInsOpt());
+ code |= insEncodeVectorsize(id->idOpSize()); // Q
+ code |= insEncodeFloatElemsize(elemsize); // X
+ code |= insEncodeReg_Vd(id->idReg1()); // ddddd
+ code |= insEncodeReg_Vn(id->idReg2()); // nnnnn
+ code |= insEncodeReg_Vm(id->idReg3()); // mmmmm
+ dst += emitOutput_Instr(dst, code);
+ break;
+
+ case IF_DV_3BI: // DV_3BI .Q.......XLmmmmm ....H.nnnnnddddd Vd Vn Vm[] (vector by elem)
+ code = emitInsCode(ins, fmt);
+ imm = emitGetInsSC(id);
+ elemsize = optGetElemsize(id->idInsOpt());
+ assert(isValidVectorIndex(id->idOpSize(), elemsize, imm));
+ code |= insEncodeVectorsize(id->idOpSize()); // Q
+ code |= insEncodeFloatElemsize(elemsize); // X
+ code |= insEncodeFloatIndex(elemsize, imm); // L H
+ code |= insEncodeReg_Vd(id->idReg1()); // ddddd
+ code |= insEncodeReg_Vn(id->idReg2()); // nnnnn
+ code |= insEncodeReg_Vm(id->idReg3()); // mmmmm
+ dst += emitOutput_Instr(dst, code);
+ break;
+
+ case IF_DV_3C: // DV_3C .Q.........mmmmm ......nnnnnddddd Vd Vn Vm (vector)
+ code = emitInsCode(ins, fmt);
+ code |= insEncodeVectorsize(id->idOpSize()); // Q
+ code |= insEncodeReg_Vd(id->idReg1()); // ddddd
+ code |= insEncodeReg_Vn(id->idReg2()); // nnnnn
+ code |= insEncodeReg_Vm(id->idReg3()); // mmmmm
+ dst += emitOutput_Instr(dst, code);
+ break;
+
+ case IF_DV_3D: // DV_3D .........X.mmmmm ......nnnnnddddd Vd Vn Vm (scalar)
+ code = emitInsCode(ins, fmt);
+ code |= insEncodeFloatElemsize(id->idOpSize()); // X
+ code |= insEncodeReg_Vd(id->idReg1()); // ddddd
+ code |= insEncodeReg_Vn(id->idReg2()); // nnnnn
+ code |= insEncodeReg_Vm(id->idReg3()); // mmmmm
+ dst += emitOutput_Instr(dst, code);
+ break;
+
+ case IF_DV_3DI: // DV_3DI .........XLmmmmm ....H.nnnnnddddd Vd Vn Vm[] (scalar by elem)
+ code = emitInsCode(ins, fmt);
+ imm = emitGetInsSC(id);
+ elemsize = id->idOpSize();
+ assert(isValidVectorIndex(EA_16BYTE, elemsize, imm));
+ code |= insEncodeFloatElemsize(elemsize); // X
+ code |= insEncodeFloatIndex(elemsize, imm); // L H
+ code |= insEncodeReg_Vd(id->idReg1()); // ddddd
+ code |= insEncodeReg_Vn(id->idReg2()); // nnnnn
+ code |= insEncodeReg_Vm(id->idReg3()); // mmmmm
+ dst += emitOutput_Instr(dst, code);
+ break;
+
+ case IF_DV_3E: // DV_3E ...........mmmmm ......nnnnnddddd Vd Vn Vm (scalar)
+ code = emitInsCode(ins, fmt);
+ code |= insEncodeReg_Vd(id->idReg1()); // ddddd
+ code |= insEncodeReg_Vn(id->idReg2()); // nnnnn
+ code |= insEncodeReg_Vm(id->idReg3()); // mmmmm
+ dst += emitOutput_Instr(dst, code);
+ break;
+
+ case IF_DV_4A: // DV_4A .........X.mmmmm .aaaaannnnnddddd Vd Va Vn Vm (scalar)
+ code = emitInsCode(ins, fmt);
+ elemsize = id->idOpSize();
+ code |= insEncodeFloatElemsize(elemsize); // X
+ code |= insEncodeReg_Vd(id->idReg1()); // ddddd
+ code |= insEncodeReg_Vn(id->idReg2()); // nnnnn
+ code |= insEncodeReg_Vm(id->idReg3()); // mmmmm
+ code |= insEncodeReg_Va(id->idReg4()); // aaaaa
+ dst += emitOutput_Instr(dst, code);
+ break;
+
+ case IF_SN_0A: // SN_0A ................ ................
+ code = emitInsCode(ins, fmt);
+ dst += emitOutput_Instr(dst, code);
+ break;
+
+ case IF_SI_0A: // SI_0A ...........iiiii iiiiiiiiiii..... imm16
+ imm = emitGetInsSC(id);
+ assert(isValidUimm16(imm));
+ code = emitInsCode(ins, fmt);
+ code |= ((code_t)imm << 5); // iiiii iiiiiiiiiii
+ dst += emitOutput_Instr(dst, code);
+ break;
+
+ case IF_SI_0B: // SI_0B ................ ....bbbb........ imm4 - barrier
+ imm = emitGetInsSC(id);
+ assert((imm >= 0) && (imm <= 15));
+ code = emitInsCode(ins, fmt);
+ code |= ((code_t)imm << 8); // bbbb
+ dst += emitOutput_Instr(dst, code);
+ break;
+
+ default:
+ assert(!"Unexpected format");
+ break;
+ }
+
+ // Determine if any registers now hold GC refs, or whether a register that was overwritten held a GC ref.
+ // We assume here that "id->idGCref()" is not GC_NONE only if the instruction described by "id" writes a
+ // GC ref to register "id->idReg1()". (It may, apparently, also not be GC_NONE in other cases, such as
+ // for stores, but we ignore those cases here.)
+ if (emitInsMayWriteToGCReg(id)) // True if "id->idIns()" writes to a register than can hold GC ref.
+ {
+ // If we ever generate instructions that write to multiple registers,
+ // then we'd need to more work here to ensure that changes in the status of GC refs are
+ // tracked properly.
+ if (emitInsMayWriteMultipleRegs(id))
+ {
+ // INS_ldp etc...
+ // We assume that "idReg1" and "idReg2" are the destination register for all instructions
+ emitGCregDeadUpd(id->idReg1(), dst);
+ emitGCregDeadUpd(id->idReg2(), dst);
+ }
+ else
+ {
+ // We assume that "idReg1" is the destination register for all instructions
+ if (id->idGCref() != GCT_NONE)
+ {
+ emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst);
+ }
+ else
+ {
+ emitGCregDeadUpd(id->idReg1(), dst);
+ }
+ }
+ }
+
+ // Now we determine if the instruction has written to a (local variable) stack location, and either written a GC
+ // ref or overwritten one.
+ if (emitInsWritesToLclVarStackLoc(id))
+ {
+ int varNum = id->idAddr()->iiaLclVar.lvaVarNum();
+ unsigned ofs = AlignDown(id->idAddr()->iiaLclVar.lvaOffset(), sizeof(size_t));
+ bool FPbased;
+ int adr = emitComp->lvaFrameAddress(varNum, &FPbased);
+ if (id->idGCref() != GCT_NONE)
+ {
+ emitGCvarLiveUpd(adr + ofs, varNum, id->idGCref(), dst);
+ }
+ else
+ {
+ // If the type of the local is a gc ref type, update the liveness.
+ var_types vt;
+ if (varNum >= 0)
+ {
+ // "Regular" (non-spill-temp) local.
+ vt = var_types(emitComp->lvaTable[varNum].lvType);
+ }
+ else
+ {
+ TempDsc* tmpDsc = emitComp->tmpFindNum(varNum);
+ vt = tmpDsc->tdTempType();
+ }
+ if (vt == TYP_REF || vt == TYP_BYREF)
+ emitGCvarDeadUpd(adr + ofs, dst);
+ }
+ }
+
+#ifdef DEBUG
+ /* Make sure we set the instruction descriptor size correctly */
+
+ size_t expected = emitSizeOfInsDsc(id);
+ assert(sz == expected);
+
+ if (emitComp->opts.disAsm || emitComp->opts.dspEmit || emitComp->verbose)
+ {
+ emitDispIns(id, false, dspOffs, true, emitCurCodeOffs(odst), *dp, (dst - *dp), ig);
+ }
+
+ if (emitComp->compDebugBreak)
+ {
+ // For example, set JitBreakEmitOutputInstr=a6 will break when this method is called for
+ // emitting instruction a6, (i.e. IN00a6 in jitdump).
+ if ((unsigned)JitConfig.JitBreakEmitOutputInstr() == id->idDebugOnlyInfo()->idNum)
+ {
+ assert(!"JitBreakEmitOutputInstr reached");
+ }
+ }
+#endif
+
+ /* All instructions are expected to generate code */
+
+ assert(*dp != dst);
+
+ *dp = dst;
+
+ return sz;
+}
+
+/*****************************************************************************/
+/*****************************************************************************/
+
+#ifdef DEBUG
+
+/*****************************************************************************
+ *
+ * Display the instruction name
+ */
+void emitter::emitDispInst(instruction ins)
+{
+ const char* insstr = codeGen->genInsName(ins);
+ size_t len = strlen(insstr);
+
+ /* Display the instruction name */
+
+ printf("%s", insstr);
+
+ //
+ // Add at least one space after the instruction name
+ // and add spaces until we have reach the normal size of 8
+ do
+ {
+ printf(" ");
+ len++;
+ } while (len < 8);
+}
+
+/*****************************************************************************
+ *
+ * Display an reloc value
+ * If we are formatting for an assembly listing don't print the hex value
+ * since it will prevent us from doing assembly diffs
+ */
+void emitter::emitDispReloc(int value, bool addComma)
+{
+ if (emitComp->opts.disAsm)
+ {
+ printf("(reloc)");
+ }
+ else
+ {
+ printf("(reloc 0x%x)", dspPtr(value));
+ }
+
+ if (addComma)
+ printf(", ");
+}
+
+/*****************************************************************************
+ *
+ * Display an immediate value
+ */
+void emitter::emitDispImm(ssize_t imm, bool addComma, bool alwaysHex /* =false */)
+{
+ if (strictArmAsm)
+ {
+ printf("#");
+ }
+
+ // Munge any pointers if we want diff-able disassembly
+ if (emitComp->opts.disDiffable)
+ {
+ ssize_t top44bits = (imm >> 20);
+ if ((top44bits != 0) && (top44bits != -1))
+ imm = 0xD1FFAB1E;
+ }
+
+ if (!alwaysHex && (imm > -1000) && (imm < 1000))
+ {
+ printf("%d", imm);
+ }
+ else
+ {
+ if ((imm < 0) && ((imm & 0xFFFFFFFF00000000LL) == 0xFFFFFFFF00000000LL))
+ {
+ printf("-");
+ imm = -imm;
+ }
+
+ if ((imm & 0xFFFFFFFF00000000LL) != 0)
+ {
+ printf("0x%llx", imm);
+ }
+ else
+ {
+ printf("0x%02x", imm);
+ }
+ }
+
+ if (addComma)
+ printf(", ");
+}
+
+/*****************************************************************************
+ *
+ * Display a float zero constant
+ */
+void emitter::emitDispFloatZero()
+{
+ if (strictArmAsm)
+ {
+ printf("#");
+ }
+ printf("0.0");
+}
+
+/*****************************************************************************
+ *
+ * Display an encoded float constant value
+ */
+void emitter::emitDispFloatImm(ssize_t imm8)
+{
+ assert((0 <= imm8) && (imm8 <= 0x0ff));
+ if (strictArmAsm)
+ {
+ printf("#");
+ }
+
+ floatImm8 fpImm;
+ fpImm.immFPIVal = (unsigned)imm8;
+ double result = emitDecodeFloatImm8(fpImm);
+
+ printf("%.4f", result);
+}
+
+/*****************************************************************************
+ *
+ * Display an immediate that is optionally LSL12.
+ */
+void emitter::emitDispImmOptsLSL12(ssize_t imm, insOpts opt)
+{
+ if (!strictArmAsm && insOptsLSL12(opt))
+ {
+ imm <<= 12;
+ }
+ emitDispImm(imm, false);
+ if (strictArmAsm && insOptsLSL12(opt))
+ {
+ printf(", LSL #12");
+ }
+}
+
+/*****************************************************************************
+ *
+ * Display an ARM64 condition code for the conditional instructions
+ */
+void emitter::emitDispCond(insCond cond)
+{
+ const static char* armCond[16] = {"eq", "ne", "hs", "lo", "mi", "pl", "vs", "vc",
+ "hi", "ls", "ge", "lt", "gt", "le", "AL", "NV"}; // The last two are invalid
+ unsigned imm = (unsigned)cond;
+ assert((0 <= imm) && (imm < ArrLen(armCond)));
+ printf(armCond[imm]);
+}
+
+/*****************************************************************************
+ *
+ * Display an ARM64 flags for the conditional instructions
+ */
+void emitter::emitDispFlags(insCflags flags)
+{
+ const static char* armFlags[16] = {"0", "v", "c", "cv", "z", "zv", "zc", "zcv",
+ "n", "nv", "nc", "ncv", "nz", "nzv", "nzc", "nzcv"};
+ unsigned imm = (unsigned)flags;
+ assert((0 <= imm) && (imm < ArrLen(armFlags)));
+ printf(armFlags[imm]);
+}
+
+/*****************************************************************************
+ *
+ * Display an ARM64 'barrier' for the memory barrier instructions
+ */
+void emitter::emitDispBarrier(insBarrier barrier)
+{
+ const static char* armBarriers[16] = {"#0", "oshld", "oshst", "osh", "#4", "nshld", "nshst", "nsh",
+ "#8", "ishld", "ishst", "ish", "#12", "ld", "st", "sy"};
+ unsigned imm = (unsigned)barrier;
+ assert((0 <= imm) && (imm < ArrLen(armBarriers)));
+ printf(armBarriers[imm]);
+}
+
+/*****************************************************************************
+ *
+ * Prints the encoding for the Shift Type encoding
+ */
+
+void emitter::emitDispShiftOpts(insOpts opt)
+{
+ if (opt == INS_OPTS_LSL)
+ printf(" LSL ");
+ else if (opt == INS_OPTS_LSR)
+ printf(" LSR ");
+ else if (opt == INS_OPTS_ASR)
+ printf(" ASR ");
+ else if (opt == INS_OPTS_ROR)
+ printf(" ROR ");
+ else if (opt == INS_OPTS_MSL)
+ printf(" MSL ");
+ else
+ assert(!"Bad value");
+}
+
+/*****************************************************************************
+ *
+ * Prints the encoding for the Extend Type encoding
+ */
+
+void emitter::emitDispExtendOpts(insOpts opt)
+{
+ if (opt == INS_OPTS_UXTB)
+ printf("UXTB");
+ else if (opt == INS_OPTS_UXTH)
+ printf("UXTH");
+ else if (opt == INS_OPTS_UXTW)
+ printf("UXTW");
+ else if (opt == INS_OPTS_UXTX)
+ printf("UXTX");
+ else if (opt == INS_OPTS_SXTB)
+ printf("SXTB");
+ else if (opt == INS_OPTS_SXTH)
+ printf("SXTH");
+ else if (opt == INS_OPTS_SXTW)
+ printf("SXTW");
+ else if (opt == INS_OPTS_SXTX)
+ printf("SXTX");
+ else
+ assert(!"Bad value");
+}
+
+/*****************************************************************************
+ *
+ * Prints the encoding for the Extend Type encoding in loads/stores
+ */
+
+void emitter::emitDispLSExtendOpts(insOpts opt)
+{
+ if (opt == INS_OPTS_LSL)
+ printf("LSL");
+ else if (opt == INS_OPTS_UXTW)
+ printf("UXTW");
+ else if (opt == INS_OPTS_UXTX)
+ printf("UXTX");
+ else if (opt == INS_OPTS_SXTW)
+ printf("SXTW");
+ else if (opt == INS_OPTS_SXTX)
+ printf("SXTX");
+ else
+ assert(!"Bad value");
+}
+
+/*****************************************************************************
+ *
+ * Display a register
+ */
+void emitter::emitDispReg(regNumber reg, emitAttr attr, bool addComma)
+{
+ emitAttr size = EA_SIZE(attr);
+ printf(emitRegName(reg, size));
+
+ if (addComma)
+ printf(", ");
+}
+
+/*****************************************************************************
+ *
+ * Display a vector register with an arrangement suffix
+ */
+void emitter::emitDispVectorReg(regNumber reg, insOpts opt, bool addComma)
+{
+ assert(isVectorRegister(reg));
+ printf(emitVectorRegName(reg));
+ emitDispArrangement(opt);
+
+ if (addComma)
+ printf(", ");
+}
+
+/*****************************************************************************
+ *
+ * Display an vector register index suffix
+ */
+void emitter::emitDispVectorRegIndex(regNumber reg, emitAttr elemsize, ssize_t index, bool addComma)
+{
+ assert(isVectorRegister(reg));
+ printf(emitVectorRegName(reg));
+
+ switch (elemsize)
+ {
+ case EA_1BYTE:
+ printf(".b");
+ break;
+ case EA_2BYTE:
+ printf(".h");
+ break;
+ case EA_4BYTE:
+ printf(".s");
+ break;
+ case EA_8BYTE:
+ printf(".d");
+ break;
+ default:
+ assert(!"invalid elemsize");
+ break;
+ }
+
+ printf("[%d]", index);
+
+ if (addComma)
+ printf(", ");
+}
+
+/*****************************************************************************
+ *
+ * Display an arrangement suffix
+ */
+void emitter::emitDispArrangement(insOpts opt)
+{
+ const char* str = "???";
+
+ switch (opt)
+ {
+ case INS_OPTS_8B:
+ str = "8b";
+ break;
+ case INS_OPTS_16B:
+ str = "16b";
+ break;
+ case INS_OPTS_4H:
+ str = "4h";
+ break;
+ case INS_OPTS_8H:
+ str = "8h";
+ break;
+ case INS_OPTS_2S:
+ str = "2s";
+ break;
+ case INS_OPTS_4S:
+ str = "4s";
+ break;
+ case INS_OPTS_1D:
+ str = "1d";
+ break;
+ case INS_OPTS_2D:
+ str = "2d";
+ break;
+
+ default:
+ assert(!"Invalid insOpt for vector register");
+ }
+ printf(".");
+ printf(str);
+}
+
+/*****************************************************************************
+ *
+ * Display a register with an optional shift operation
+ */
+void emitter::emitDispShiftedReg(regNumber reg, insOpts opt, ssize_t imm, emitAttr attr)
+{
+ emitAttr size = EA_SIZE(attr);
+ assert((imm & 0x003F) == imm);
+ assert(((imm & 0x0020) == 0) || (size == EA_8BYTE));
+
+ printf(emitRegName(reg, size));
+
+ if (imm > 0)
+ {
+ if (strictArmAsm)
+ {
+ printf(",");
+ }
+ emitDispShiftOpts(opt);
+ emitDispImm(imm, false);
+ }
+}
+
+/*****************************************************************************
+ *
+ * Display a register with an optional extend and scale operations
+ */
+void emitter::emitDispExtendReg(regNumber reg, insOpts opt, ssize_t imm)
+{
+ assert((imm >= 0) && (imm <= 4));
+ assert(insOptsNone(opt) || insOptsAnyExtend(opt) || (opt == INS_OPTS_LSL));
+
+ // size is based on the extend option, not the instr size.
+ emitAttr size = insOpts32BitExtend(opt) ? EA_4BYTE : EA_8BYTE;
+
+ if (strictArmAsm)
+ {
+ if (insOptsNone(opt))
+ {
+ emitDispReg(reg, size, false);
+ }
+ else
+ {
+ emitDispReg(reg, size, true);
+ if (opt == INS_OPTS_LSL)
+ printf("LSL");
+ else
+ emitDispExtendOpts(opt);
+ if ((imm > 0) || (opt == INS_OPTS_LSL))
+ {
+ printf(" ");
+ emitDispImm(imm, false);
+ }
+ }
+ }
+ else // !strictArmAsm
+ {
+ if (insOptsNone(opt))
+ {
+ emitDispReg(reg, size, false);
+ }
+ else
+ {
+ if (opt != INS_OPTS_LSL)
+ {
+ emitDispExtendOpts(opt);
+ printf("(");
+ emitDispReg(reg, size, false);
+ printf(")");
+ }
+ }
+ if (imm > 0)
+ {
+ printf("*");
+ emitDispImm(1 << imm, false);
+ }
+ }
+}
+
+/*****************************************************************************
+ *
+ * Display an addressing operand [reg + imm]
+ */
+void emitter::emitDispAddrRI(regNumber reg, insOpts opt, ssize_t imm)
+{
+ reg = encodingZRtoSP(reg); // ZR (R31) encodes the SP register
+
+ if (strictArmAsm)
+ {
+ printf("[");
+
+ emitDispReg(reg, EA_8BYTE, false);
+
+ if (!insOptsPostIndex(opt) && (imm != 0))
+ {
+ printf(",");
+ emitDispImm(imm, false);
+ }
+ printf("]");
+
+ if (insOptsPreIndex(opt))
+ {
+ printf("!");
+ }
+ else if (insOptsPostIndex(opt))
+ {
+ printf(",");
+ emitDispImm(imm, false);
+ }
+ }
+ else // !strictArmAsm
+ {
+ printf("[");
+
+ const char* operStr = "++";
+ if (imm < 0)
+ {
+ operStr = "--";
+ imm = -imm;
+ }
+
+ if (insOptsPreIndex(opt))
+ {
+ printf(operStr);
+ }
+
+ emitDispReg(reg, EA_8BYTE, false);
+
+ if (insOptsPostIndex(opt))
+ {
+ printf(operStr);
+ }
+
+ if (insOptsIndexed(opt))
+ {
+ printf(", ");
+ }
+ else
+ {
+ printf("%c", operStr[1]);
+ }
+ emitDispImm(imm, false);
+ printf("]");
+ }
+}
+
+/*****************************************************************************
+ *
+ * Display an addressing operand [reg + extended reg]
+ */
+void emitter::emitDispAddrRRExt(regNumber reg1, regNumber reg2, insOpts opt, bool isScaled, emitAttr size)
+{
+ reg1 = encodingZRtoSP(reg1); // ZR (R31) encodes the SP register
+
+ unsigned scale = 0;
+ if (isScaled)
+ {
+ scale = NaturalScale_helper(size);
+ }
+
+ printf("[");
+
+ if (strictArmAsm)
+ {
+ emitDispReg(reg1, EA_8BYTE, true);
+ emitDispExtendReg(reg2, opt, scale);
+ }
+ else // !strictArmAsm
+ {
+ emitDispReg(reg1, EA_8BYTE, false);
+ printf("+");
+ emitDispExtendReg(reg2, opt, scale);
+ }
+
+ printf("]");
+}
+
+/*****************************************************************************
+ *
+ * Display (optionally) the instruction encoding in hex
+ */
+
+void emitter::emitDispInsHex(BYTE* code, size_t sz)
+{
+ // We do not display the instruction hex if we want diff-able disassembly
+ if (!emitComp->opts.disDiffable)
+ {
+ if (sz == 4)
+ {
+ printf(" %08X ", (*((code_t*)code)));
+ }
+ else
+ {
+ printf(" ");
+ }
+ }
+}
+
+/****************************************************************************
+ *
+ * Display the given instruction.
+ */
+
+void emitter::emitDispIns(
+ instrDesc* id, bool isNew, bool doffs, bool asmfm, unsigned offset, BYTE* pCode, size_t sz, insGroup* ig)
+{
+ if (EMITVERBOSE)
+ {
+ unsigned idNum =
+ id->idDebugOnlyInfo()->idNum; // Do not remove this! It is needed for VisualStudio conditional breakpoints
+
+ printf("IN%04x: ", idNum);
+ }
+
+ if (pCode == NULL)
+ sz = 0;
+
+ if (!emitComp->opts.dspEmit && !isNew && !asmfm && sz)
+ doffs = true;
+
+ /* Display the instruction offset */
+
+ emitDispInsOffs(offset, doffs);
+
+ /* Display the instruction hex code */
+
+ emitDispInsHex(pCode, sz);
+
+ printf(" ");
+
+ /* Get the instruction and format */
+
+ instruction ins = id->idIns();
+ insFormat fmt = id->idInsFmt();
+
+ emitDispInst(ins);
+
+ /* If this instruction has just been added, check its size */
+
+ assert(isNew == false || (int)emitSizeOfInsDsc(id) == emitCurIGfreeNext - (BYTE*)id);
+
+ /* Figure out the operand size */
+ emitAttr size = id->idOpSize();
+ emitAttr attr = size;
+ if (id->idGCref() == GCT_GCREF)
+ attr = EA_GCREF;
+ else if (id->idGCref() == GCT_BYREF)
+ attr = EA_BYREF;
+
+ switch (fmt)
+ {
+ code_t code;
+ ssize_t imm;
+ int doffs;
+ bool isExtendAlias;
+ bool canEncode;
+ bitMaskImm bmi;
+ halfwordImm hwi;
+ condFlagsImm cfi;
+ unsigned scale;
+ unsigned immShift;
+ bool hasShift;
+ ssize_t offs;
+ const char* methodName;
+ emitAttr elemsize;
+ emitAttr datasize;
+ emitAttr srcsize;
+ emitAttr dstsize;
+ ssize_t index;
+ ssize_t index2;
+
+ case IF_BI_0A: // BI_0A ......iiiiiiiiii iiiiiiiiiiiiiiii simm26:00
+ case IF_BI_0B: // BI_0B ......iiiiiiiiii iiiiiiiiiii..... simm19:00
+ case IF_LARGEJMP:
+ {
+ if (fmt == IF_LARGEJMP)
+ {
+ printf("(LARGEJMP)");
+ }
+ if (id->idAddr()->iiaHasInstrCount())
+ {
+ int instrCount = id->idAddr()->iiaGetInstrCount();
+
+ if (ig == nullptr)
+ {
+ printf("pc%s%d instructions", (instrCount >= 0) ? "+" : "", instrCount);
+ }
+ else
+ {
+ unsigned insNum = emitFindInsNum(ig, id);
+ UNATIVE_OFFSET srcOffs = ig->igOffs + emitFindOffset(ig, insNum + 1);
+ UNATIVE_OFFSET dstOffs = ig->igOffs + emitFindOffset(ig, insNum + 1 + instrCount);
+ ssize_t relOffs = (ssize_t)(emitOffsetToPtr(dstOffs) - emitOffsetToPtr(srcOffs));
+ printf("pc%s%d (%d instructions)", (relOffs >= 0) ? "+" : "", relOffs, instrCount);
+ }
+ }
+ else if (id->idIsBound())
+ {
+ printf("G_M%03u_IG%02u", Compiler::s_compMethodsCount, id->idAddr()->iiaIGlabel->igNum);
+ }
+ else
+ {
+ printf("L_M%03u_BB%02u", Compiler::s_compMethodsCount, id->idAddr()->iiaBBlabel->bbNum);
+ }
+ }
+ break;
+
+ case IF_BI_0C: // BI_0C ......iiiiiiiiii iiiiiiiiiiiiiiii simm26:00
+ if (id->idIsCallAddr())
+ {
+ offs = (ssize_t)id->idAddr()->iiaAddr;
+ methodName = "";
+ }
+ else
+ {
+ offs = 0;
+ methodName = emitComp->eeGetMethodFullName((CORINFO_METHOD_HANDLE)id->idDebugOnlyInfo()->idMemCookie);
+ }
+
+ if (offs)
+ {
+ if (id->idIsDspReloc())
+ printf("reloc ");
+ printf("%08X", offs);
+ }
+ else
+ {
+ printf("%s", methodName);
+ }
+ break;
+
+ case IF_BI_1A: // BI_1A ......iiiiiiiiii iiiiiiiiiiittttt Rt simm19:00
+ assert(insOptsNone(id->idInsOpt()));
+ emitDispReg(id->idReg1(), size, true);
+ if (id->idIsBound())
+ {
+ printf("G_M%03u_IG%02u", Compiler::s_compMethodsCount, id->idAddr()->iiaIGlabel->igNum);
+ }
+ else
+ {
+ printf("L_M%03u_BB%02u", Compiler::s_compMethodsCount, id->idAddr()->iiaBBlabel->bbNum);
+ }
+ break;
+
+ case IF_BI_1B: // BI_1B B.......bbbbbiii iiiiiiiiiiittttt Rt imm6, simm14:00
+ assert(insOptsNone(id->idInsOpt()));
+ emitDispReg(id->idReg1(), size, true);
+ emitDispImm(emitGetInsSC(id), true);
+ if (id->idIsBound())
+ {
+ printf("G_M%03u_IG%02u", Compiler::s_compMethodsCount, id->idAddr()->iiaIGlabel->igNum);
+ }
+ else
+ {
+ printf("L_M%03u_BB%02u", Compiler::s_compMethodsCount, id->idAddr()->iiaBBlabel->bbNum);
+ }
+ break;
+
+ case IF_BR_1A: // BR_1A ................ ......nnnnn..... Rn
+ assert(insOptsNone(id->idInsOpt()));
+ emitDispReg(id->idReg1(), size, false);
+ break;
+
+ case IF_BR_1B: // BR_1B ................ ......nnnnn..... Rn
+ assert(insOptsNone(id->idInsOpt()));
+ emitDispReg(id->idReg3(), size, false);
+ break;
+
+ case IF_LS_1A: // LS_1A XX...V..iiiiiiii iiiiiiiiiiittttt Rt PC imm(1MB)
+ case IF_DI_1E: // DI_1E .ii.....iiiiiiii iiiiiiiiiiiddddd Rd simm21
+ case IF_LARGELDC:
+ case IF_LARGEADR:
+ assert(insOptsNone(id->idInsOpt()));
+ emitDispReg(id->idReg1(), size, true);
+ imm = emitGetInsSC(id);
+
+ /* Is this actually a reference to a data section? */
+ if (fmt == IF_LARGEADR)
+ {
+ printf("(LARGEADR)");
+ }
+ else if (fmt == IF_LARGELDC)
+ {
+ printf("(LARGELDC)");
+ }
+
+ printf("[");
+ if (id->idAddr()->iiaIsJitDataOffset())
+ {
+ doffs = Compiler::eeGetJitDataOffs(id->idAddr()->iiaFieldHnd);
+ /* Display a data section reference */
+
+ if (doffs & 1)
+ printf("@CNS%02u", doffs - 1);
+ else
+ printf("@RWD%02u", doffs);
+
+ if (imm != 0)
+ printf("%+Id", imm);
+ }
+ else
+ {
+ assert(imm == 0);
+ if (id->idIsReloc())
+ {
+ printf("RELOC ");
+ emitDispImm((ssize_t)id->idAddr()->iiaAddr, false);
+ }
+ else if (id->idIsBound())
+ {
+ printf("G_M%03u_IG%02u", Compiler::s_compMethodsCount, id->idAddr()->iiaIGlabel->igNum);
+ }
+ else
+ {
+ printf("L_M%03u_BB%02u", Compiler::s_compMethodsCount, id->idAddr()->iiaBBlabel->bbNum);
+ }
+ }
+ printf("]");
+ break;
+
+ case IF_LS_2A: // LS_2A .X.......X...... ......nnnnnttttt Rt Rn
+ assert(insOptsNone(id->idInsOpt()));
+ assert(emitGetInsSC(id) == 0);
+ emitDispReg(id->idReg1(), emitInsTargetRegSize(id), true);
+ emitDispAddrRI(id->idReg2(), id->idInsOpt(), 0);
+ break;
+
+ case IF_LS_2B: // LS_2B .X.......Xiiiiii iiiiiinnnnnttttt Rt Rn imm(0-4095)
+ assert(insOptsNone(id->idInsOpt()));
+ imm = emitGetInsSC(id);
+ scale = NaturalScale_helper(emitInsLoadStoreSize(id));
+ imm <<= scale; // The immediate is scaled by the size of the ld/st
+ emitDispReg(id->idReg1(), emitInsTargetRegSize(id), true);
+ emitDispAddrRI(id->idReg2(), id->idInsOpt(), imm);
+ break;
+
+ case IF_LS_2C: // LS_2C .X.......X.iiiii iiiiPPnnnnnttttt Rt Rn imm(-256..+255) no/pre/post inc
+ assert(insOptsNone(id->idInsOpt()) || insOptsIndexed(id->idInsOpt()));
+ imm = emitGetInsSC(id);
+ emitDispReg(id->idReg1(), emitInsTargetRegSize(id), true);
+ emitDispAddrRI(id->idReg2(), id->idInsOpt(), imm);
+ break;
+
+ case IF_LS_3A: // LS_3A .X.......X.mmmmm oooS..nnnnnttttt Rt Rn Rm ext(Rm) LSL {}
+ assert(insOptsLSExtend(id->idInsOpt()));
+ emitDispReg(id->idReg1(), emitInsTargetRegSize(id), true);
+ if (id->idIsLclVar())
+ {
+ emitDispAddrRRExt(id->idReg2(), codeGen->rsGetRsvdReg(), id->idInsOpt(), false, size);
+ }
+ else
+ {
+ emitDispAddrRRExt(id->idReg2(), id->idReg3(), id->idInsOpt(), id->idReg3Scaled(), size);
+ }
+ break;
+
+ case IF_LS_3B: // LS_3B X............... .aaaaannnnnddddd Rt Ra Rn
+ assert(insOptsNone(id->idInsOpt()));
+ assert(emitGetInsSC(id) == 0);
+ emitDispReg(id->idReg1(), emitInsTargetRegSize(id), true);
+ emitDispReg(id->idReg2(), emitInsTargetRegSize(id), true);
+ emitDispAddrRI(id->idReg3(), id->idInsOpt(), 0);
+ break;
+
+ case IF_LS_3C: // LS_3C X.........iiiiii iaaaaannnnnddddd Rt Ra Rn imm(im7,sh)
+ assert(insOptsNone(id->idInsOpt()) || insOptsIndexed(id->idInsOpt()));
+ imm = emitGetInsSC(id);
+ scale = NaturalScale_helper(emitInsLoadStoreSize(id));
+ imm <<= scale;
+ emitDispReg(id->idReg1(), emitInsTargetRegSize(id), true);
+ emitDispReg(id->idReg2(), emitInsTargetRegSize(id), true);
+ emitDispAddrRI(id->idReg3(), id->idInsOpt(), imm);
+ break;
+
+ case IF_DI_1A: // DI_1A X.......shiiiiii iiiiiinnnnn..... Rn imm(i12,sh)
+ emitDispReg(id->idReg1(), size, true);
+ emitDispImmOptsLSL12(emitGetInsSC(id), id->idInsOpt());
+ break;
+
+ case IF_DI_1B: // DI_1B X........hwiiiii iiiiiiiiiiiddddd Rd imm(i16,hw)
+ emitDispReg(id->idReg1(), size, true);
+ hwi.immHWVal = (unsigned)emitGetInsSC(id);
+ if (ins == INS_mov)
+ {
+ emitDispImm(emitDecodeHalfwordImm(hwi, size), false);
+ }
+ else // movz, movn, movk
+ {
+ emitDispImm(hwi.immVal, false);
+ if (hwi.immHW != 0)
+ {
+ emitDispShiftOpts(INS_OPTS_LSL);
+ emitDispImm(hwi.immHW * 16, false);
+ }
+ }
+ break;
+
+ case IF_DI_1C: // DI_1C X........Nrrrrrr ssssssnnnnn..... Rn imm(N,r,s)
+ emitDispReg(id->idReg1(), size, true);
+ bmi.immNRS = (unsigned)emitGetInsSC(id);
+ emitDispImm(emitDecodeBitMaskImm(bmi, size), false);
+ break;
+
+ case IF_DI_1D: // DI_1D X........Nrrrrrr ssssss.....ddddd Rd imm(N,r,s)
+ emitDispReg(encodingZRtoSP(id->idReg1()), size, true);
+ bmi.immNRS = (unsigned)emitGetInsSC(id);
+ emitDispImm(emitDecodeBitMaskImm(bmi, size), false);
+ break;
+
+ case IF_DI_2A: // DI_2A X.......shiiiiii iiiiiinnnnnddddd Rd Rn imm(i12,sh)
+ if ((ins == INS_add) || (ins == INS_sub))
+ {
+ emitDispReg(encodingZRtoSP(id->idReg1()), size, true);
+ emitDispReg(encodingZRtoSP(id->idReg2()), size, true);
+ }
+ else
+ {
+ emitDispReg(id->idReg1(), size, true);
+ emitDispReg(id->idReg2(), size, true);
+ }
+ emitDispImmOptsLSL12(emitGetInsSC(id), id->idInsOpt());
+ break;
+
+ case IF_DI_2B: // DI_2B X........X.nnnnn ssssssnnnnnddddd Rd Rn imm(0-63)
+ emitDispReg(id->idReg1(), size, true);
+ emitDispReg(id->idReg2(), size, true);
+ emitDispImm(emitGetInsSC(id), false);
+ break;
+
+ case IF_DI_2C: // DI_2C X........Nrrrrrr ssssssnnnnnddddd Rd Rn imm(N,r,s)
+ if (ins == INS_ands)
+ {
+ emitDispReg(id->idReg1(), size, true);
+ }
+ else
+ {
+ emitDispReg(encodingZRtoSP(id->idReg1()), size, true);
+ }
+ emitDispReg(id->idReg2(), size, true);
+ bmi.immNRS = (unsigned)emitGetInsSC(id);
+ emitDispImm(emitDecodeBitMaskImm(bmi, size), false);
+ break;
+
+ case IF_DI_2D: // DI_2D X........Nrrrrrr ssssssnnnnnddddd Rd Rn imr, ims (N,r,s)
+ emitDispReg(id->idReg1(), size, true);
+ emitDispReg(id->idReg2(), size, true);
+
+ imm = emitGetInsSC(id);
+ bmi.immNRS = (unsigned)imm;
+
+ switch (ins)
+ {
+ case INS_bfm:
+ case INS_sbfm:
+ case INS_ubfm:
+ emitDispImm(bmi.immR, true);
+ emitDispImm(bmi.immS, false);
+ break;
+
+ case INS_bfi:
+ case INS_sbfiz:
+ case INS_ubfiz:
+ emitDispImm(getBitWidth(size) - bmi.immR, true);
+ emitDispImm(bmi.immS + 1, false);
+ break;
+
+ case INS_bfxil:
+ case INS_sbfx:
+ case INS_ubfx:
+ emitDispImm(bmi.immR, true);
+ emitDispImm(bmi.immS - bmi.immR + 1, false);
+ break;
+
+ case INS_asr:
+ case INS_lsr:
+ case INS_lsl:
+ emitDispImm(imm, false);
+ break;
+
+ default:
+ assert(!"Unexpected instruction in IF_DI_2D");
+ }
+
+ break;
+
+ case IF_DI_1F: // DI_1F X..........iiiii cccc..nnnnn.nzcv Rn imm5 nzcv cond
+ emitDispReg(id->idReg1(), size, true);
+ cfi.immCFVal = (unsigned)emitGetInsSC(id);
+ emitDispImm(cfi.imm5, true);
+ emitDispFlags(cfi.flags);
+ printf(",");
+ emitDispCond(cfi.cond);
+ break;
+
+ case IF_DR_1D: // DR_1D X............... cccc.......mmmmm Rd cond
+ emitDispReg(id->idReg1(), size, true);
+ cfi.immCFVal = (unsigned)emitGetInsSC(id);
+ emitDispCond(cfi.cond);
+ break;
+
+ case IF_DR_2A: // DR_2A X..........mmmmm ......nnnnn..... Rn Rm
+ emitDispReg(id->idReg1(), size, true);
+ emitDispReg(id->idReg2(), size, false);
+ break;
+
+ case IF_DR_2B: // DR_2B X.......sh.mmmmm ssssssnnnnn..... Rn Rm {LSL,LSR,ASR,ROR} imm(0-63)
+ emitDispReg(id->idReg1(), size, true);
+ emitDispShiftedReg(id->idReg2(), id->idInsOpt(), emitGetInsSC(id), size);
+ break;
+
+ case IF_DR_2C: // DR_2C X..........mmmmm ooosssnnnnn..... Rn Rm ext(Rm) LSL imm(0-4)
+ emitDispReg(encodingZRtoSP(id->idReg1()), size, true);
+ imm = emitGetInsSC(id);
+ emitDispExtendReg(id->idReg2(), id->idInsOpt(), imm);
+ break;
+
+ case IF_DR_2D: // DR_2D X..........nnnnn cccc..nnnnnddddd Rd Rn cond
+ emitDispReg(id->idReg1(), size, true);
+ emitDispReg(id->idReg2(), size, true);
+ cfi.immCFVal = (unsigned)emitGetInsSC(id);
+ emitDispCond(cfi.cond);
+ break;
+
+ case IF_DR_2E: // DR_2E X..........mmmmm ...........ddddd Rd Rm
+ emitDispReg(id->idReg1(), size, true);
+ emitDispReg(id->idReg2(), size, false);
+ break;
+
+ case IF_DR_2F: // DR_2F X.......sh.mmmmm ssssss.....ddddd Rd Rm {LSL,LSR,ASR} imm(0-63)
+ emitDispReg(id->idReg1(), size, true);
+ emitDispShiftedReg(id->idReg2(), id->idInsOpt(), emitGetInsSC(id), size);
+ break;
+
+ case IF_DR_2G: // DR_2G X............... ......nnnnnddddd Rd Rn
+ emitDispReg(encodingZRtoSP(id->idReg1()), size, true);
+ emitDispReg(encodingZRtoSP(id->idReg2()), size, false);
+ break;
+
+ case IF_DR_2H: // DR_2H X........X...... ......nnnnnddddd Rd Rn
+ emitDispReg(id->idReg1(), size, true);
+ emitDispReg(id->idReg2(), size, false);
+ break;
+
+ case IF_DR_2I: // DR_2I X..........mmmmm cccc..nnnnn.nzcv Rn Rm nzcv cond
+ emitDispReg(id->idReg1(), size, true);
+ emitDispReg(id->idReg2(), size, true);
+ cfi.immCFVal = (unsigned)emitGetInsSC(id);
+ emitDispFlags(cfi.flags);
+ printf(",");
+ emitDispCond(cfi.cond);
+ break;
+
+ case IF_DR_3A: // DR_3A X..........mmmmm ......nnnnnmmmmm Rd Rn Rm
+ if ((ins == INS_add) || (ins == INS_sub))
+ {
+ emitDispReg(encodingZRtoSP(id->idReg1()), size, true);
+ emitDispReg(encodingZRtoSP(id->idReg2()), size, true);
+ }
+ else
+ {
+ emitDispReg(id->idReg1(), size, true);
+ emitDispReg(id->idReg2(), size, true);
+ }
+ if (id->idIsLclVar())
+ {
+ emitDispReg(codeGen->rsGetRsvdReg(), size, false);
+ }
+ else
+ {
+ emitDispReg(id->idReg3(), size, false);
+ }
+
+ break;
+
+ case IF_DR_3B: // DR_3B X.......sh.mmmmm ssssssnnnnnddddd Rd Rn Rm {LSL,LSR,ASR} imm(0-63)
+ emitDispReg(id->idReg1(), size, true);
+ emitDispReg(id->idReg2(), size, true);
+ emitDispShiftedReg(id->idReg3(), id->idInsOpt(), emitGetInsSC(id), size);
+ break;
+
+ case IF_DR_3C: // DR_3C X..........mmmmm ooosssnnnnnddddd Rd Rn Rm ext(Rm) LSL imm(0-4)
+ emitDispReg(encodingZRtoSP(id->idReg1()), size, true);
+ emitDispReg(encodingZRtoSP(id->idReg2()), size, true);
+ imm = emitGetInsSC(id);
+ emitDispExtendReg(id->idReg3(), id->idInsOpt(), imm);
+ break;
+
+ case IF_DR_3D: // DR_3D X..........mmmmm cccc..nnnnnmmmmm Rd Rn Rm cond
+ emitDispReg(id->idReg1(), size, true);
+ emitDispReg(id->idReg2(), size, true);
+ emitDispReg(id->idReg3(), size, true);
+ cfi.immCFVal = (unsigned)emitGetInsSC(id);
+ emitDispCond(cfi.cond);
+ break;
+
+ case IF_DR_3E: // DR_3E X........X.mmmmm ssssssnnnnnddddd Rd Rn Rm imm(0-63)
+ emitDispReg(id->idReg1(), size, true);
+ emitDispReg(id->idReg2(), size, true);
+ emitDispReg(id->idReg3(), size, true);
+ emitDispImm(emitGetInsSC(id), false);
+ break;
+
+ case IF_DR_4A: // DR_4A X..........mmmmm .aaaaannnnnmmmmm Rd Rn Rm Ra
+ emitDispReg(id->idReg1(), size, true);
+ emitDispReg(id->idReg2(), size, true);
+ emitDispReg(id->idReg3(), size, true);
+ emitDispReg(id->idReg4(), size, false);
+ break;
+
+ case IF_DV_1A: // DV_1A .........X.iiiii iii........ddddd Vd imm8 (fmov - immediate scalar)
+ elemsize = id->idOpSize();
+ emitDispReg(id->idReg1(), elemsize, true);
+ emitDispFloatImm(emitGetInsSC(id));
+ break;
+
+ case IF_DV_1B: // DV_1B .QX..........iii cmod..iiiiiddddd Vd imm8 (immediate vector)
+ imm = emitGetInsSC(id) & 0x0ff;
+ immShift = (emitGetInsSC(id) & 0x700) >> 8;
+ hasShift = (immShift != 0);
+ elemsize = optGetElemsize(id->idInsOpt());
+ if (id->idInsOpt() == INS_OPTS_1D)
+ {
+ assert(elemsize == size);
+ emitDispReg(id->idReg1(), size, true);
+ }
+ else
+ {
+ emitDispVectorReg(id->idReg1(), id->idInsOpt(), true);
+ }
+ if (ins == INS_fmov)
+ {
+ emitDispFloatImm(imm);
+ assert(hasShift == false);
+ }
+ else
+ {
+ if (elemsize == EA_8BYTE)
+ {
+ assert(ins == INS_movi);
+ ssize_t imm64 = 0;
+ const ssize_t mask8 = 0xFF;
+ for (unsigned b = 0; b < 8; b++)
+ {
+ if (imm & (1 << b))
+ {
+ imm64 |= (mask8 << (b * 8));
+ }
+ }
+ emitDispImm(imm64, hasShift, true);
+ }
+ else
+ {
+ emitDispImm(imm, hasShift, true);
+ }
+ if (hasShift)
+ {
+ insOpts opt = (immShift & 0x4) ? INS_OPTS_MSL : INS_OPTS_LSL;
+ unsigned shift = (immShift & 0x3) * 8;
+ emitDispShiftOpts(opt);
+ emitDispImm(shift, false);
+ }
+ }
+ break;
+
+ case IF_DV_1C: // DV_1C .........X...... ......nnnnn..... Vn #0.0 (fcmp - with zero)
+ elemsize = id->idOpSize();
+ emitDispReg(id->idReg1(), elemsize, true);
+ emitDispFloatZero();
+ break;
+
+ case IF_DV_2A: // DV_2A .Q.......X...... ......nnnnnddddd Vd Vn (fabs, fcvt - vector)
+ case IF_DV_2M: // DV_2M .Q......XX...... ......nnnnnddddd Vd Vn (abs, neg - vector)
+ emitDispVectorReg(id->idReg1(), id->idInsOpt(), true);
+ emitDispVectorReg(id->idReg2(), id->idInsOpt(), false);
+ break;
+
+ case IF_DV_2N: // DV_2N .........iiiiiii ......nnnnnddddd Vd Vn imm (shift - scalar)
+ elemsize = id->idOpSize();
+ emitDispReg(id->idReg1(), elemsize, true);
+ emitDispReg(id->idReg2(), elemsize, true);
+ emitDispImm(emitGetInsSC(id), false);
+ break;
+
+ case IF_DV_2O: // DV_2O .Q.......iiiiiii ......nnnnnddddd Vd Vn imm (shift - vector)
+ imm = emitGetInsSC(id);
+ // Do we have a sxtl or uxtl instruction?
+ isExtendAlias = ((ins == INS_sxtl) || (ins == INS_sxtl2) || (ins == INS_uxtl) || (ins == INS_uxtl2));
+ code = emitInsCode(ins, fmt);
+ if (code & 0x00008000) // widen/narrow opcodes
+ {
+ if (code & 0x00002000) // SHL opcodes
+ {
+ emitDispVectorReg(id->idReg1(), optWidenElemsize(id->idInsOpt()), true);
+ emitDispVectorReg(id->idReg2(), id->idInsOpt(), !isExtendAlias);
+ }
+ else // SHR opcodes
+ {
+ emitDispVectorReg(id->idReg1(), id->idInsOpt(), true);
+ emitDispVectorReg(id->idReg2(), optWidenElemsize(id->idInsOpt()), !isExtendAlias);
+ }
+ }
+ else
+ {
+ emitDispVectorReg(id->idReg1(), id->idInsOpt(), true);
+ emitDispVectorReg(id->idReg2(), id->idInsOpt(), !isExtendAlias);
+ }
+ // Print the immediate unless we have a sxtl or uxtl instruction
+ if (!isExtendAlias)
+ {
+ emitDispImm(imm, false);
+ }
+ break;
+
+ case IF_DV_2B: // DV_2B .Q.........iiiii ......nnnnnddddd Rd Vn[] (umov/smov - to general)
+ srcsize = id->idOpSize();
+ index = emitGetInsSC(id);
+ if (ins == INS_smov)
+ {
+ dstsize = EA_8BYTE;
+ }
+ else // INS_umov or INS_mov
+ {
+ dstsize = (srcsize == EA_8BYTE) ? EA_8BYTE : EA_4BYTE;
+ }
+ emitDispReg(id->idReg1(), dstsize, true);
+ emitDispVectorRegIndex(id->idReg2(), srcsize, index, false);
+ break;
+
+ case IF_DV_2C: // DV_2C .Q.........iiiii ......nnnnnddddd Vd Rn (dup/ins - vector from general)
+ if (ins == INS_dup)
+ {
+ datasize = id->idOpSize();
+ assert(isValidVectorDatasize(datasize));
+ assert(isValidArrangement(datasize, id->idInsOpt()));
+ elemsize = optGetElemsize(id->idInsOpt());
+ emitDispVectorReg(id->idReg1(), id->idInsOpt(), true);
+ }
+ else // INS_ins
+ {
+ elemsize = id->idOpSize();
+ index = emitGetInsSC(id);
+ assert(isValidVectorElemsize(elemsize));
+ emitDispVectorRegIndex(id->idReg1(), elemsize, index, true);
+ }
+ emitDispReg(id->idReg2(), (elemsize == EA_8BYTE) ? EA_8BYTE : EA_4BYTE, false);
+ break;
+
+ case IF_DV_2D: // DV_2D .Q.........iiiii ......nnnnnddddd Vd Vn[] (dup - vector)
+ datasize = id->idOpSize();
+ assert(isValidVectorDatasize(datasize));
+ assert(isValidArrangement(datasize, id->idInsOpt()));
+ elemsize = optGetElemsize(id->idInsOpt());
+ index = emitGetInsSC(id);
+ emitDispVectorReg(id->idReg1(), id->idInsOpt(), true);
+ emitDispVectorRegIndex(id->idReg2(), elemsize, index, false);
+ break;
+
+ case IF_DV_2E: // DV_2E ...........iiiii ......nnnnnddddd Vd Vn[] (dup - scalar)
+ elemsize = id->idOpSize();
+ index = emitGetInsSC(id);
+ emitDispReg(id->idReg1(), elemsize, true);
+ emitDispVectorRegIndex(id->idReg2(), elemsize, index, false);
+ break;
+
+ case IF_DV_2F: // DV_2F ...........iiiii .jjjj.nnnnnddddd Vd[] Vn[] (ins - element)
+ imm = emitGetInsSC(id);
+ index = (imm >> 4) & 0xf;
+ index2 = imm & 0xf;
+ elemsize = id->idOpSize();
+ emitDispVectorRegIndex(id->idReg1(), elemsize, index, true);
+ emitDispVectorRegIndex(id->idReg2(), elemsize, index2, false);
+ break;
+
+ case IF_DV_2G: // DV_2G .........X...... ......nnnnnddddd Vd Vn (fmov, fcvtXX - register)
+ case IF_DV_2K: // DV_2K .........X.mmmmm ......nnnnn..... Vn Vm (fcmp)
+ case IF_DV_2L: // DV_2L ........XX...... ......nnnnnddddd Vd Vn (abs, neg - scalar)
+ elemsize = id->idOpSize();
+ emitDispReg(id->idReg1(), elemsize, true);
+ emitDispReg(id->idReg2(), elemsize, false);
+ break;
+
+ case IF_DV_2H: // DV_2H X........X...... ......nnnnnddddd Rd Vn (fmov, fcvtXX - to general)
+ case IF_DV_2I: // DV_2I X........X...... ......nnnnnddddd Vd Rn (fmov, Xcvtf - from general)
+ case IF_DV_2J: // DV_2J ........SS.....D D.....nnnnnddddd Vd Vn (fcvt)
+ dstsize = optGetDstsize(id->idInsOpt());
+ srcsize = optGetSrcsize(id->idInsOpt());
+
+ emitDispReg(id->idReg1(), dstsize, true);
+ emitDispReg(id->idReg2(), srcsize, false);
+ break;
+
+ case IF_DV_3A: // DV_3A .Q......XX.mmmmm ......nnnnnddddd Vd Vn Vm (vector)
+ case IF_DV_3B: // DV_3B .Q.........mmmmm ......nnnnnddddd Vd Vn Vm (vector)
+ emitDispVectorReg(id->idReg1(), id->idInsOpt(), true);
+ emitDispVectorReg(id->idReg2(), id->idInsOpt(), true);
+ emitDispVectorReg(id->idReg3(), id->idInsOpt(), false);
+ break;
+
+ case IF_DV_3C: // DV_3C .Q.........mmmmm ......nnnnnddddd Vd Vn Vm (vector)
+ emitDispVectorReg(id->idReg1(), id->idInsOpt(), true);
+ if (ins != INS_mov)
+ {
+ emitDispVectorReg(id->idReg2(), id->idInsOpt(), true);
+ }
+ emitDispVectorReg(id->idReg3(), id->idInsOpt(), false);
+ break;
+
+ case IF_DV_3AI: // DV_3AI .Q......XXLMmmmm ....H.nnnnnddddd Vd Vn Vm[] (vector by elem)
+ case IF_DV_3BI: // DV_3BI .Q........Lmmmmm ....H.nnnnnddddd Vd Vn Vm[] (vector by elem)
+ emitDispVectorReg(id->idReg1(), id->idInsOpt(), true);
+ emitDispVectorReg(id->idReg2(), id->idInsOpt(), true);
+ elemsize = optGetElemsize(id->idInsOpt());
+ emitDispVectorRegIndex(id->idReg3(), elemsize, emitGetInsSC(id), false);
+ break;
+
+ case IF_DV_3D: // DV_3D .........X.mmmmm ......nnnnnddddd Vd Vn Vm (scalar)
+ case IF_DV_3E: // DV_3E ...........mmmmm ......nnnnnddddd Vd Vn Vm (scalar)
+ emitDispReg(id->idReg1(), size, true);
+ emitDispReg(id->idReg2(), size, true);
+ emitDispReg(id->idReg3(), size, false);
+ break;
+
+ case IF_DV_3DI: // DV_3DI .........XLmmmmm ....H.nnnnnddddd Vd Vn Vm[] (scalar by elem)
+ emitDispReg(id->idReg1(), size, true);
+ emitDispReg(id->idReg2(), size, true);
+ elemsize = size;
+ emitDispVectorRegIndex(id->idReg3(), elemsize, emitGetInsSC(id), false);
+ break;
+
+ case IF_DV_4A: // DV_4A .........X.mmmmm .aaaaannnnnddddd Vd Va Vn Vm (scalar)
+ emitDispReg(id->idReg1(), size, true);
+ emitDispReg(id->idReg2(), size, true);
+ emitDispReg(id->idReg3(), size, true);
+ emitDispReg(id->idReg4(), size, false);
+ break;
+
+ case IF_SN_0A: // SN_0A ................ ................
+ break;
+
+ case IF_SI_0A: // SI_0A ...........iiiii iiiiiiiiiii..... imm16
+ emitDispImm(emitGetInsSC(id), false);
+ break;
+
+ case IF_SI_0B: // SI_0B ................ ....bbbb........ imm4 - barrier
+ emitDispBarrier((insBarrier)emitGetInsSC(id));
+ break;
+
+ default:
+ printf("unexpected format %s", emitIfName(id->idInsFmt()));
+ assert(!"unexpectedFormat");
+ break;
+ }
+
+ if (id->idDebugOnlyInfo()->idVarRefOffs)
+ {
+ printf("\t// ");
+ emitDispFrameRef(id->idAddr()->iiaLclVar.lvaVarNum(), id->idAddr()->iiaLclVar.lvaOffset(),
+ id->idDebugOnlyInfo()->idVarRefOffs, asmfm);
+ }
+
+ printf("\n");
+}
+
+/*****************************************************************************
+ *
+ * Display a stack frame reference.
+ */
+
+void emitter::emitDispFrameRef(int varx, int disp, int offs, bool asmfm)
+{
+ printf("[");
+
+ if (varx < 0)
+ printf("TEMP_%02u", -varx);
+ else
+ emitComp->gtDispLclVar(+varx, false);
+
+ if (disp < 0)
+ printf("-0x%02x", -disp);
+ else if (disp > 0)
+ printf("+0x%02x", +disp);
+
+ printf("]");
+
+ if (varx >= 0 && emitComp->opts.varNames)
+ {
+ LclVarDsc* varDsc;
+ const char* varName;
+
+ assert((unsigned)varx < emitComp->lvaCount);
+ varDsc = emitComp->lvaTable + varx;
+ varName = emitComp->compLocalVarName(varx, offs);
+
+ if (varName)
+ {
+ printf("'%s", varName);
+
+ if (disp < 0)
+ printf("-%d", -disp);
+ else if (disp > 0)
+ printf("+%d", +disp);
+
+ printf("'");
+ }
+ }
+}
+
+#endif // DEBUG
+
+// Generate code for a load or store operation with a potentially complex addressing mode
+// This method handles the case of a GT_IND with contained GT_LEA op1 of the x86 form [base + index*sccale + offset]
+// Since Arm64 does not directly support this complex of an addressing mode
+// we may generates up to three instructions for this for Arm64
+//
+void emitter::emitInsLoadStoreOp(instruction ins, emitAttr attr, regNumber dataReg, GenTreeIndir* indir)
+{
+ emitAttr ldstAttr = isVectorRegister(dataReg) ? attr : emitInsAdjustLoadStoreAttr(ins, attr);
+
+ GenTree* addr = indir->Addr();
+
+ if (addr->isContained())
+ {
+ assert(addr->OperGet() == GT_LCL_VAR_ADDR || addr->OperGet() == GT_LEA);
+
+ int offset = 0;
+ DWORD lsl = 0;
+
+ if (addr->OperGet() == GT_LEA)
+ {
+ offset = (int)addr->AsAddrMode()->gtOffset;
+ if (addr->AsAddrMode()->gtScale > 0)
+ {
+ assert(isPow2(addr->AsAddrMode()->gtScale));
+ BitScanForward(&lsl, addr->AsAddrMode()->gtScale);
+ }
+ }
+
+ GenTree* memBase = indir->Base();
+
+ if (indir->HasIndex())
+ {
+ GenTree* index = indir->Index();
+
+ if (offset != 0)
+ {
+ regMaskTP tmpRegMask = indir->gtRsvdRegs;
+ regNumber tmpReg = genRegNumFromMask(tmpRegMask);
+ noway_assert(tmpReg != REG_NA);
+
+ if (emitIns_valid_imm_for_add(offset, EA_8BYTE))
+ {
+ if (lsl > 0)
+ {
+ // Generate code to set tmpReg = base + index*scale
+ emitIns_R_R_R_I(INS_add, EA_PTRSIZE, tmpReg, memBase->gtRegNum, index->gtRegNum, lsl,
+ INS_OPTS_LSL);
+ }
+ else // no scale
+ {
+ // Generate code to set tmpReg = base + index
+ emitIns_R_R_R(INS_add, EA_PTRSIZE, tmpReg, memBase->gtRegNum, index->gtRegNum);
+ }
+
+ noway_assert(emitInsIsLoad(ins) || (tmpReg != dataReg));
+
+ // Then load/store dataReg from/to [tmpReg + offset]
+ emitIns_R_R_I(ins, ldstAttr, dataReg, tmpReg, offset);
+ ;
+ }
+ else // large offset
+ {
+ // First load/store tmpReg with the large offset constant
+ codeGen->instGen_Set_Reg_To_Imm(EA_PTRSIZE, tmpReg, offset);
+ // Then add the base register
+ // rd = rd + base
+ emitIns_R_R_R(INS_add, EA_PTRSIZE, tmpReg, tmpReg, memBase->gtRegNum);
+
+ noway_assert(emitInsIsLoad(ins) || (tmpReg != dataReg));
+ noway_assert(tmpReg != index->gtRegNum);
+
+ // Then load/store dataReg from/to [tmpReg + index*scale]
+ emitIns_R_R_R_I(ins, ldstAttr, dataReg, tmpReg, index->gtRegNum, lsl, INS_OPTS_LSL);
+ }
+ }
+ else // (offset == 0)
+ {
+ if (lsl > 0)
+ {
+ // Then load/store dataReg from/to [memBase + index*scale]
+ emitIns_R_R_R_I(ins, ldstAttr, dataReg, memBase->gtRegNum, index->gtRegNum, lsl, INS_OPTS_LSL);
+ }
+ else // no scale
+ {
+ // Then load/store dataReg from/to [memBase + index]
+ emitIns_R_R_R(ins, ldstAttr, dataReg, memBase->gtRegNum, index->gtRegNum);
+ }
+ }
+ }
+ else // no Index register
+ {
+ if (emitIns_valid_imm_for_ldst_offset(offset, EA_SIZE(attr)))
+ {
+ // Then load/store dataReg from/to [memBase + offset]
+ emitIns_R_R_I(ins, ldstAttr, dataReg, memBase->gtRegNum, offset);
+ }
+ else
+ {
+ // We require a tmpReg to hold the offset
+ regMaskTP tmpRegMask = indir->gtRsvdRegs;
+ regNumber tmpReg = genRegNumFromMask(tmpRegMask);
+ noway_assert(tmpReg != REG_NA);
+
+ // First load/store tmpReg with the large offset constant
+ codeGen->instGen_Set_Reg_To_Imm(EA_PTRSIZE, tmpReg, offset);
+
+ // Then load/store dataReg from/to [memBase + tmpReg]
+ emitIns_R_R_R(ins, ldstAttr, dataReg, memBase->gtRegNum, tmpReg);
+ }
+ }
+ }
+ else // addr is not contained, so we evaluate it into a register
+ {
+ codeGen->genConsumeReg(addr);
+ // Then load/store dataReg from/to [addrReg]
+ emitIns_R_R(ins, ldstAttr, dataReg, addr->gtRegNum);
+ }
+}
+
+// Generates an integer data section constant and returns a field handle representing
+// the data offset to access the constant via a load instruction.
+// This is called during ngen for any relocatable constants
+//
+CORINFO_FIELD_HANDLE emitter::emitLiteralConst(ssize_t cnsValIn, emitAttr attr /*=EA_8BYTE*/)
+{
+ ssize_t constValue = cnsValIn;
+ void* cnsAddr = &constValue;
+ bool dblAlign;
+
+ if (attr == EA_4BYTE)
+ {
+ dblAlign = false;
+ }
+ else
+ {
+ assert(attr == EA_8BYTE);
+ dblAlign = true;
+ }
+
+ // Access to inline data is 'abstracted' by a special type of static member
+ // (produced by eeFindJitDataOffs) which the emitter recognizes as being a reference
+ // to constant data, not a real static field.
+
+ UNATIVE_OFFSET cnsSize = (attr == EA_4BYTE) ? 4 : 8;
+ UNATIVE_OFFSET cnum = emitDataConst(cnsAddr, cnsSize, dblAlign);
+ return emitComp->eeFindJitDataOffs(cnum);
+}
+
+// Generates a float or double data section constant and returns field handle representing
+// the data offset to access the constant. This is called by emitInsBinary() in case
+// of contained float of double constants.
+CORINFO_FIELD_HANDLE emitter::emitFltOrDblConst(GenTreeDblCon* tree, emitAttr attr /*=EA_UNKNOWN*/)
+{
+ if (attr == EA_UNKNOWN)
+ {
+ attr = emitTypeSize(tree->TypeGet());
+ }
+ else
+ {
+ assert(emitTypeSize(tree->TypeGet()) == attr);
+ }
+
+ double constValue = tree->gtDblCon.gtDconVal;
+ void* cnsAddr;
+ float f;
+ bool dblAlign;
+
+ if (attr == EA_4BYTE)
+ {
+ f = forceCastToFloat(constValue);
+ cnsAddr = &f;
+ dblAlign = false;
+ }
+ else
+ {
+ cnsAddr = &constValue;
+ dblAlign = true;
+ }
+
+ // Access to inline data is 'abstracted' by a special type of static member
+ // (produced by eeFindJitDataOffs) which the emitter recognizes as being a reference
+ // to constant data, not a real static field.
+
+ UNATIVE_OFFSET cnsSize = (attr == EA_4BYTE) ? 4 : 8;
+ UNATIVE_OFFSET cnum = emitDataConst(cnsAddr, cnsSize, dblAlign);
+ return emitComp->eeFindJitDataOffs(cnum);
+}
+
+// The callee must call genConsumeReg() for any non-contained srcs
+// and genProduceReg() for any non-contained dsts.
+
+regNumber emitter::emitInsBinary(instruction ins, emitAttr attr, GenTree* dst, GenTree* src)
+{
+ regNumber result = REG_NA;
+
+ // dst can only be a reg
+ assert(!dst->isContained());
+
+ // src can be immed or reg
+ assert(!src->isContained() || src->isContainedIntOrIImmed());
+
+ // find immed (if any) - it cannot be a dst
+ GenTreeIntConCommon* intConst = nullptr;
+ if (src->isContainedIntOrIImmed())
+ {
+ intConst = src->AsIntConCommon();
+ }
+
+ if (intConst)
+ {
+ emitIns_R_I(ins, attr, dst->gtRegNum, intConst->IconValue());
+ return dst->gtRegNum;
+ }
+ else
+ {
+ emitIns_R_R(ins, attr, dst->gtRegNum, src->gtRegNum);
+ return dst->gtRegNum;
+ }
+}
+
+// The callee must call genConsumeReg() for any non-contained srcs
+// and genProduceReg() for any non-contained dsts.
+
+regNumber emitter::emitInsTernary(instruction ins, emitAttr attr, GenTree* dst, GenTree* src1, GenTree* src2)
+{
+ regNumber result = REG_NA;
+
+ // dst can only be a reg
+ assert(!dst->isContained());
+
+ // find immed (if any) - it cannot be a dst
+ // Only one src can be an int.
+ GenTreeIntConCommon* intConst = nullptr;
+ GenTree* nonIntReg = nullptr;
+
+ if (varTypeIsFloating(dst))
+ {
+ // src1 can only be a reg
+ assert(!src1->isContained());
+ // src2 can only be a reg
+ assert(!src2->isContained());
+ }
+ else // not floating point
+ {
+ // src2 can be immed or reg
+ assert(!src2->isContained() || src2->isContainedIntOrIImmed());
+
+ // Check src2 first as we can always allow it to be a contained immediate
+ if (src2->isContainedIntOrIImmed())
+ {
+ intConst = src2->AsIntConCommon();
+ nonIntReg = src1;
+ }
+ // Only for commutative operations do we check src1 and allow it to be a contained immediate
+ else if (dst->OperIsCommutative())
+ {
+ // src1 can be immed or reg
+ assert(!src1->isContained() || src1->isContainedIntOrIImmed());
+
+ // Check src1 and allow it to be a contained immediate
+ if (src1->isContainedIntOrIImmed())
+ {
+ assert(!src2->isContainedIntOrIImmed());
+ intConst = src1->AsIntConCommon();
+ nonIntReg = src2;
+ }
+ }
+ else
+ {
+ // src1 can only be a reg
+ assert(!src1->isContained());
+ }
+ }
+ bool isMulOverflow = false;
+ bool isUnsignedMul = false;
+ regNumber extraReg = REG_NA;
+ if (dst->gtOverflowEx())
+ {
+ if (ins == INS_add)
+ {
+ ins = INS_adds;
+ }
+ else if (ins == INS_sub)
+ {
+ ins = INS_subs;
+ }
+ else if (ins == INS_mul)
+ {
+ isMulOverflow = true;
+ isUnsignedMul = ((dst->gtFlags & GTF_UNSIGNED) != 0);
+ assert(intConst == nullptr); // overflow format doesn't support an int constant operand
+ }
+ else
+ {
+ assert(!"Invalid ins for overflow check");
+ }
+ }
+ if (intConst != nullptr)
+ {
+ emitIns_R_R_I(ins, attr, dst->gtRegNum, nonIntReg->gtRegNum, intConst->IconValue());
+ }
+ else
+ {
+ if (isMulOverflow)
+ {
+ // Make sure that we have an internal register
+ assert(genCountBits(dst->gtRsvdRegs) == 2);
+
+ // There will be two bits set in tmpRegsMask.
+ // Remove the bit for 'dst->gtRegNum' from 'tmpRegsMask'
+ regMaskTP tmpRegsMask = dst->gtRsvdRegs & ~genRegMask(dst->gtRegNum);
+ assert(tmpRegsMask != RBM_NONE);
+ regMaskTP tmpRegMask = genFindLowestBit(tmpRegsMask); // set tmpRegMsk to a one-bit mask
+ extraReg = genRegNumFromMask(tmpRegMask); // set tmpReg from that mask
+
+ if (isUnsignedMul)
+ {
+ if (attr == EA_4BYTE)
+ {
+ // Compute 8 byte results from 4 byte by 4 byte multiplication.
+ emitIns_R_R_R(INS_umull, EA_8BYTE, dst->gtRegNum, src1->gtRegNum, src2->gtRegNum);
+
+ // Get the high result by shifting dst.
+ emitIns_R_R_I(INS_lsr, EA_8BYTE, extraReg, dst->gtRegNum, 32);
+ }
+ else
+ {
+ assert(attr == EA_8BYTE);
+ // Compute the high result.
+ emitIns_R_R_R(INS_umulh, attr, extraReg, src1->gtRegNum, src2->gtRegNum);
+
+ // Now multiply without skewing the high result.
+ emitIns_R_R_R(ins, attr, dst->gtRegNum, src1->gtRegNum, src2->gtRegNum);
+ }
+
+ // zero-sign bit comparision to detect overflow.
+ emitIns_R_I(INS_cmp, attr, extraReg, 0);
+ }
+ else
+ {
+ int bitShift = 0;
+ if (attr == EA_4BYTE)
+ {
+ // Compute 8 byte results from 4 byte by 4 byte multiplication.
+ emitIns_R_R_R(INS_smull, EA_8BYTE, dst->gtRegNum, src1->gtRegNum, src2->gtRegNum);
+
+ // Get the high result by shifting dst.
+ emitIns_R_R_I(INS_lsr, EA_8BYTE, extraReg, dst->gtRegNum, 32);
+
+ bitShift = 31;
+ }
+ else
+ {
+ assert(attr == EA_8BYTE);
+ // Save the high result in a temporary register.
+ emitIns_R_R_R(INS_smulh, attr, extraReg, src1->gtRegNum, src2->gtRegNum);
+
+ // Now multiply without skewing the high result.
+ emitIns_R_R_R(ins, attr, dst->gtRegNum, src1->gtRegNum, src2->gtRegNum);
+
+ bitShift = 63;
+ }
+
+ // Sign bit comparision to detect overflow.
+ emitIns_R_R_I(INS_cmp, attr, extraReg, dst->gtRegNum, bitShift, INS_OPTS_ASR);
+ }
+ }
+ else
+ {
+ // We can just multiply.
+ emitIns_R_R_R(ins, attr, dst->gtRegNum, src1->gtRegNum, src2->gtRegNum);
+ }
+ }
+
+ if (dst->gtOverflowEx())
+ {
+ assert(!varTypeIsFloating(dst));
+ codeGen->genCheckOverflow(dst);
+ }
+
+ return dst->gtRegNum;
+}
+
+#endif // defined(_TARGET_ARM64_)
diff --git a/src/jit/emitarm64.h b/src/jit/emitarm64.h
new file mode 100644
index 0000000000..5459a0d6c8
--- /dev/null
+++ b/src/jit/emitarm64.h
@@ -0,0 +1,909 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+#if defined(_TARGET_ARM64_)
+
+// The ARM64 instructions are all 32 bits in size.
+// we use an unsigned int to hold the encoded instructions.
+// This typedef defines the type that we use to hold encoded instructions.
+//
+typedef unsigned int code_t;
+
+static bool strictArmAsm;
+
+/************************************************************************/
+/* Routines that compute the size of / encode instructions */
+/************************************************************************/
+
+struct CnsVal
+{
+ ssize_t cnsVal;
+#ifdef RELOC_SUPPORT
+ bool cnsReloc;
+#endif
+};
+
+#ifdef DEBUG
+
+/************************************************************************/
+/* Debug-only routines to display instructions */
+/************************************************************************/
+
+const char* emitFPregName(unsigned reg, bool varName = true);
+const char* emitVectorRegName(regNumber reg);
+
+void emitDispInst(instruction ins);
+void emitDispReloc(int value, bool addComma);
+void emitDispImm(ssize_t imm, bool addComma, bool alwaysHex = false);
+void emitDispFloatZero();
+void emitDispFloatImm(ssize_t imm8);
+void emitDispImmOptsLSL12(ssize_t imm, insOpts opt);
+void emitDispCond(insCond cond);
+void emitDispFlags(insCflags flags);
+void emitDispBarrier(insBarrier barrier);
+void emitDispShiftOpts(insOpts opt);
+void emitDispExtendOpts(insOpts opt);
+void emitDispLSExtendOpts(insOpts opt);
+void emitDispReg(regNumber reg, emitAttr attr, bool addComma);
+void emitDispVectorReg(regNumber reg, insOpts opt, bool addComma);
+void emitDispVectorRegIndex(regNumber reg, emitAttr elemsize, ssize_t index, bool addComma);
+void emitDispArrangement(insOpts opt);
+void emitDispShiftedReg(regNumber reg, insOpts opt, ssize_t imm, emitAttr attr);
+void emitDispExtendReg(regNumber reg, insOpts opt, ssize_t imm);
+void emitDispAddrRI(regNumber reg, insOpts opt, ssize_t imm);
+void emitDispAddrRRExt(regNumber reg1, regNumber reg2, insOpts opt, bool isScaled, emitAttr size);
+
+void emitDispIns(instrDesc* id,
+ bool isNew,
+ bool doffs,
+ bool asmfm,
+ unsigned offs = 0,
+ BYTE* pCode = 0,
+ size_t sz = 0,
+ insGroup* ig = NULL);
+#endif // DEBUG
+
+/************************************************************************/
+/* Private members that deal with target-dependent instr. descriptors */
+/************************************************************************/
+
+private:
+instrDesc* emitNewInstrAmd(emitAttr attr, int dsp);
+instrDesc* emitNewInstrAmdCns(emitAttr attr, int dsp, int cns);
+
+instrDesc* emitNewInstrCallDir(int argCnt,
+ VARSET_VALARG_TP GCvars,
+ regMaskTP gcrefRegs,
+ regMaskTP byrefRegs,
+ emitAttr retSize,
+ emitAttr secondRetSize);
+
+instrDesc* emitNewInstrCallInd(int argCnt,
+ ssize_t disp,
+ VARSET_VALARG_TP GCvars,
+ regMaskTP gcrefRegs,
+ regMaskTP byrefRegs,
+ emitAttr retSize,
+ emitAttr secondRetSize);
+
+void emitGetInsCns(instrDesc* id, CnsVal* cv);
+ssize_t emitGetInsAmdCns(instrDesc* id, CnsVal* cv);
+void emitGetInsDcmCns(instrDesc* id, CnsVal* cv);
+ssize_t emitGetInsAmdAny(instrDesc* id);
+
+/************************************************************************/
+/* Private helpers for instruction output */
+/************************************************************************/
+
+private:
+bool emitInsIsCompare(instruction ins);
+bool emitInsIsLoad(instruction ins);
+bool emitInsIsStore(instruction ins);
+bool emitInsIsLoadOrStore(instruction ins);
+emitAttr emitInsAdjustLoadStoreAttr(instruction ins, emitAttr attr);
+emitAttr emitInsTargetRegSize(instrDesc* id);
+emitAttr emitInsLoadStoreSize(instrDesc* id);
+
+emitter::insFormat emitInsFormat(instruction ins);
+emitter::code_t emitInsCode(instruction ins, insFormat fmt);
+
+// Generate code for a load or store operation and handle the case of contained GT_LEA op1 with [base + index<<scale +
+// offset]
+void emitInsLoadStoreOp(instruction ins, emitAttr attr, regNumber dataReg, GenTreeIndir* indir);
+
+// Emit the 32-bit Arm64 instruction 'code' into the 'dst' buffer
+static unsigned emitOutput_Instr(BYTE* dst, code_t code);
+
+// A helper method to return the natural scale for an EA 'size'
+static unsigned NaturalScale_helper(emitAttr size);
+
+// A helper method to perform a Rotate-Right shift operation
+static UINT64 ROR_helper(UINT64 value, unsigned sh, unsigned width);
+
+// A helper method to perform a 'NOT' bitwise complement operation
+static UINT64 NOT_helper(UINT64 value, unsigned width);
+
+// A helper method to perform a bit Replicate operation
+static UINT64 Replicate_helper(UINT64 value, unsigned width, emitAttr size);
+
+/************************************************************************
+*
+* This union is used to to encode/decode the special ARM64 immediate values
+* that is listed as imm(N,r,s) and referred to as 'bitmask immediate'
+*/
+
+union bitMaskImm {
+ struct
+ {
+ unsigned immS : 6; // bits 0..5
+ unsigned immR : 6; // bits 6..11
+ unsigned immN : 1; // bits 12
+ };
+ unsigned immNRS; // concat N:R:S forming a 13-bit unsigned immediate
+};
+
+/************************************************************************
+*
+* Convert between a 64-bit immediate and its 'bitmask immediate'
+* representation imm(i16,hw)
+*/
+
+static emitter::bitMaskImm emitEncodeBitMaskImm(INT64 imm, emitAttr size);
+
+static INT64 emitDecodeBitMaskImm(const emitter::bitMaskImm bmImm, emitAttr size);
+
+/************************************************************************
+*
+* This union is used to to encode/decode the special ARM64 immediate values
+* that is listed as imm(i16,hw) and referred to as 'halfword immediate'
+*/
+
+union halfwordImm {
+ struct
+ {
+ unsigned immVal : 16; // bits 0..15
+ unsigned immHW : 2; // bits 16..17
+ };
+ unsigned immHWVal; // concat HW:Val forming a 18-bit unsigned immediate
+};
+
+/************************************************************************
+*
+* Convert between a 64-bit immediate and its 'halfword immediate'
+* representation imm(i16,hw)
+*/
+
+static emitter::halfwordImm emitEncodeHalfwordImm(INT64 imm, emitAttr size);
+
+static INT64 emitDecodeHalfwordImm(const emitter::halfwordImm hwImm, emitAttr size);
+
+/************************************************************************
+*
+* This union is used to encode/decode the special ARM64 immediate values
+* that is listed as imm(i16,by) and referred to as 'byteShifted immediate'
+*/
+
+union byteShiftedImm {
+ struct
+ {
+ unsigned immVal : 8; // bits 0..7
+ unsigned immBY : 2; // bits 8..9
+ unsigned immOnes : 1; // bit 10
+ };
+ unsigned immBSVal; // concat Ones:BY:Val forming a 10-bit unsigned immediate
+};
+
+/************************************************************************
+*
+* Convert between a 16/32-bit immediate and its 'byteShifted immediate'
+* representation imm(i8,by)
+*/
+
+static emitter::byteShiftedImm emitEncodeByteShiftedImm(INT64 imm, emitAttr size, bool allow_MSL);
+
+static INT32 emitDecodeByteShiftedImm(const emitter::byteShiftedImm bsImm, emitAttr size);
+
+/************************************************************************
+*
+* This union is used to to encode/decode the special ARM64 immediate values
+* that are use for FMOV immediate and referred to as 'float 8-bit immediate'
+*/
+
+union floatImm8 {
+ struct
+ {
+ unsigned immMant : 4; // bits 0..3
+ unsigned immExp : 3; // bits 4..6
+ unsigned immSign : 1; // bits 7
+ };
+ unsigned immFPIVal; // concat Sign:Exp:Mant forming an 8-bit unsigned immediate
+};
+
+/************************************************************************
+*
+* Convert between a double and its 'float 8-bit immediate' representation
+*/
+
+static emitter::floatImm8 emitEncodeFloatImm8(double immDbl);
+
+static double emitDecodeFloatImm8(const emitter::floatImm8 fpImm);
+
+/************************************************************************
+*
+* This union is used to to encode/decode the cond, nzcv and imm5 values for
+* instructions that use them in the small constant immediate field
+*/
+
+union condFlagsImm {
+ struct
+ {
+ insCond cond : 4; // bits 0..3
+ insCflags flags : 4; // bits 4..7
+ unsigned imm5 : 5; // bits 8..12
+ };
+ unsigned immCFVal; // concat imm5:flags:cond forming an 13-bit unsigned immediate
+};
+
+// Returns an encoding for the specified register used in the 'Rd' position
+static code_t insEncodeReg_Rd(regNumber reg);
+
+// Returns an encoding for the specified register used in the 'Rt' position
+static code_t insEncodeReg_Rt(regNumber reg);
+
+// Returns an encoding for the specified register used in the 'Rn' position
+static code_t insEncodeReg_Rn(regNumber reg);
+
+// Returns an encoding for the specified register used in the 'Rm' position
+static code_t insEncodeReg_Rm(regNumber reg);
+
+// Returns an encoding for the specified register used in the 'Ra' position
+static code_t insEncodeReg_Ra(regNumber reg);
+
+// Returns an encoding for the specified register used in the 'Vd' position
+static code_t insEncodeReg_Vd(regNumber reg);
+
+// Returns an encoding for the specified register used in the 'Vt' position
+static code_t insEncodeReg_Vt(regNumber reg);
+
+// Returns an encoding for the specified register used in the 'Vn' position
+static code_t insEncodeReg_Vn(regNumber reg);
+
+// Returns an encoding for the specified register used in the 'Vm' position
+static code_t insEncodeReg_Vm(regNumber reg);
+
+// Returns an encoding for the specified register used in the 'Va' position
+static code_t insEncodeReg_Va(regNumber reg);
+
+// Returns an encoding for the imm which represents the condition code.
+static code_t insEncodeCond(insCond cond);
+
+// Returns an encoding for the imm whioch represents the 'condition code'
+// with the lowest bit inverted (marked by invert(<cond>) in the architecture manual.
+static code_t insEncodeInvertedCond(insCond cond);
+
+// Returns an encoding for the imm which represents the flags.
+static code_t insEncodeFlags(insCflags flags);
+
+// Returns the encoding for the Shift Count bits to be used for Arm64 encodings
+static code_t insEncodeShiftCount(ssize_t imm, emitAttr size);
+
+// Returns the encoding to select the datasize for most Arm64 instructions
+static code_t insEncodeDatasize(emitAttr size);
+
+// Returns the encoding to select the datasize for the general load/store Arm64 instructions
+static code_t insEncodeDatasizeLS(code_t code, emitAttr size);
+
+// Returns the encoding to select the datasize for the vector load/store Arm64 instructions
+static code_t insEncodeDatasizeVLS(code_t code, emitAttr size);
+
+// Returns the encoding to select the datasize for the vector load/store pair Arm64 instructions
+static code_t insEncodeDatasizeVPLS(code_t code, emitAttr size);
+
+// Returns the encoding to select the datasize for bitfield Arm64 instructions
+static code_t insEncodeDatasizeBF(code_t code, emitAttr size);
+
+// Returns the encoding to select the vectorsize for SIMD Arm64 instructions
+static code_t insEncodeVectorsize(emitAttr size);
+
+// Returns the encoding to select 'index' for an Arm64 vector elem instruction
+static code_t insEncodeVectorIndex(emitAttr elemsize, ssize_t index);
+
+// Returns the encoding to select 'index2' for an Arm64 'ins' elem instruction
+static code_t insEncodeVectorIndex2(emitAttr elemsize, ssize_t index2);
+
+// Returns the encoding to select 'index' for an Arm64 'mul' elem instruction
+static code_t insEncodeVectorIndexLMH(emitAttr elemsize, ssize_t index);
+
+// Returns the encoding to shift by 'shift' bits for an Arm64 vector or scalar instruction
+static code_t insEncodeVectorShift(emitAttr size, ssize_t shift);
+
+// Returns the encoding to select the 1/2/4/8 byte elemsize for an Arm64 vector instruction
+static code_t insEncodeElemsize(emitAttr size);
+
+// Returns the encoding to select the 4/8 byte elemsize for an Arm64 float vector instruction
+static code_t insEncodeFloatElemsize(emitAttr size);
+
+// Returns the encoding to select the index for an Arm64 float vector by elem instruction
+static code_t insEncodeFloatIndex(emitAttr elemsize, ssize_t index);
+
+// Returns the encoding to select the 'conversion' operation for a type 'fmt' Arm64 instruction
+static code_t insEncodeConvertOpt(insFormat fmt, insOpts conversion);
+
+// Returns the encoding to have the Rn register of a ld/st reg be Pre/Post/Not indexed updated
+static code_t insEncodeIndexedOpt(insOpts opt);
+
+// Returns the encoding to have the Rn register of a ld/st pair be Pre/Post/Not indexed updated
+static code_t insEncodePairIndexedOpt(instruction ins, insOpts opt);
+
+// Returns the encoding to apply a Shift Type on the Rm register
+static code_t insEncodeShiftType(insOpts opt);
+
+// Returns the encoding to apply a 12 bit left shift to the immediate
+static code_t insEncodeShiftImm12(insOpts opt);
+
+// Returns the encoding to have the Rm register use an extend operation
+static code_t insEncodeExtend(insOpts opt);
+
+// Returns the encoding to scale the Rm register by {0,1,2,3,4} in an extend operation
+static code_t insEncodeExtendScale(ssize_t imm);
+
+// Returns the encoding to have the Rm register be auto scaled by the ld/st size
+static code_t insEncodeReg3Scale(bool isScaled);
+
+// Returns true if 'reg' represents an integer register.
+static bool isIntegerRegister(regNumber reg)
+{
+ return (reg >= REG_INT_FIRST) && (reg <= REG_INT_LAST);
+}
+
+// Returns true if 'value' is a legal unsigned immediate 8 bit encoding (such as for fMOV).
+static bool isValidUimm8(ssize_t value)
+{
+ return (0 <= value) && (value <= 0xFFLL);
+};
+
+// Returns true if 'value' is a legal unsigned immediate 12 bit encoding (such as for CMP, CMN).
+static bool isValidUimm12(ssize_t value)
+{
+ return (0 <= value) && (value <= 0xFFFLL);
+};
+
+// Returns true if 'value' is a legal unsigned immediate 16 bit encoding (such as for MOVZ, MOVN, MOVK).
+static bool isValidUimm16(ssize_t value)
+{
+ return (0 <= value) && (value <= 0xFFFFLL);
+};
+
+// Returns true if 'value' is a legal signed immediate 26 bit encoding (such as for B or BL).
+static bool isValidSimm26(ssize_t value)
+{
+ return (-0x2000000LL <= value) && (value <= 0x1FFFFFFLL);
+};
+
+// Returns true if 'value' is a legal signed immediate 19 bit encoding (such as for B.cond, CBNZ, CBZ).
+static bool isValidSimm19(ssize_t value)
+{
+ return (-0x40000LL <= value) && (value <= 0x3FFFFLL);
+};
+
+// Returns true if 'value' is a legal signed immediate 14 bit encoding (such as for TBNZ, TBZ).
+static bool isValidSimm14(ssize_t value)
+{
+ return (-0x2000LL <= value) && (value <= 0x1FFFLL);
+};
+
+// Returns true if 'value' represents a valid 'bitmask immediate' encoding.
+static bool isValidImmNRS(size_t value, emitAttr size)
+{
+ return (value >= 0) && (value < 0x2000);
+} // any unsigned 13-bit immediate
+
+// Returns true if 'value' represents a valid 'halfword immediate' encoding.
+static bool isValidImmHWVal(size_t value, emitAttr size)
+{
+ return (value >= 0) && (value < 0x40000);
+} // any unsigned 18-bit immediate
+
+// Returns true if 'value' represents a valid 'byteShifted immediate' encoding.
+static bool isValidImmBSVal(size_t value, emitAttr size)
+{
+ return (value >= 0) && (value < 0x800);
+} // any unsigned 11-bit immediate
+
+// The return value replaces REG_ZR with REG_SP
+static regNumber encodingZRtoSP(regNumber reg)
+{
+ return (reg == REG_ZR) ? REG_SP : reg;
+} // ZR (R31) encodes the SP register
+
+// The return value replaces REG_SP with REG_ZR
+static regNumber encodingSPtoZR(regNumber reg)
+{
+ return (reg == REG_SP) ? REG_ZR : reg;
+} // SP is encoded using ZR (R31)
+
+// For the given 'ins' returns the reverse instruction, if one exists, otherwise returns INS_INVALID
+static instruction insReverse(instruction ins);
+
+// For the given 'datasize' and 'elemsize' returns the insOpts that specifies the vector register arrangement
+static insOpts optMakeArrangement(emitAttr datasize, emitAttr elemsize);
+
+// For the given 'datasize' and 'opt' returns true if it specifies a valid vector register arrangement
+static bool isValidArrangement(emitAttr datasize, insOpts opt);
+
+// For the given 'arrangement' returns the 'datasize' specified by the vector register arrangement
+static emitAttr optGetDatasize(insOpts arrangement);
+
+// For the given 'arrangement' returns the 'elemsize' specified by the vector register arrangement
+static emitAttr optGetElemsize(insOpts arrangement);
+
+// For the given 'arrangement' returns the 'widen-arrangement' specified by the vector register arrangement
+static insOpts optWidenElemsize(insOpts arrangement);
+
+// For the given 'conversion' returns the 'dstsize' specified by the conversion option
+static emitAttr optGetDstsize(insOpts conversion);
+
+// For the given 'conversion' returns the 'srcsize' specified by the conversion option
+static emitAttr optGetSrcsize(insOpts conversion);
+
+// For the given 'datasize', 'elemsize' and 'index' returns true, if it specifies a valid 'index'
+// for an element of size 'elemsize' in a vector register of size 'datasize'
+static bool isValidVectorIndex(emitAttr datasize, emitAttr elemsize, ssize_t index);
+
+/************************************************************************/
+/* Public inline informational methods */
+/************************************************************************/
+
+public:
+// true if this 'imm' can be encoded as a input operand to a mov instruction
+static bool emitIns_valid_imm_for_mov(INT64 imm, emitAttr size);
+
+// true if this 'imm' can be encoded as a input operand to a vector movi instruction
+static bool emitIns_valid_imm_for_movi(INT64 imm, emitAttr size);
+
+// true if this 'immDbl' can be encoded as a input operand to a fmov instruction
+static bool emitIns_valid_imm_for_fmov(double immDbl);
+
+// true if this 'imm' can be encoded as a input operand to an add instruction
+static bool emitIns_valid_imm_for_add(INT64 imm, emitAttr size);
+
+// true if this 'imm' can be encoded as a input operand to a cmp instruction
+static bool emitIns_valid_imm_for_cmp(INT64 imm, emitAttr size);
+
+// true if this 'imm' can be encoded as a input operand to an alu instruction
+static bool emitIns_valid_imm_for_alu(INT64 imm, emitAttr size);
+
+// true if this 'imm' can be encoded as the offset in a ldr/str instruction
+static bool emitIns_valid_imm_for_ldst_offset(INT64 imm, emitAttr size);
+
+// true if 'imm' can use the left shifted by 12 bits encoding
+static bool canEncodeWithShiftImmBy12(INT64 imm);
+
+// Normalize the 'imm' so that the upper bits, as defined by 'size' are zero
+static INT64 normalizeImm64(INT64 imm, emitAttr size);
+
+// Normalize the 'imm' so that the upper bits, as defined by 'size' are zero
+static INT32 normalizeImm32(INT32 imm, emitAttr size);
+
+// true if 'imm' can be encoded using a 'bitmask immediate', also returns the encoding if wbBMI is non-null
+static bool canEncodeBitMaskImm(INT64 imm, emitAttr size, emitter::bitMaskImm* wbBMI = nullptr);
+
+// true if 'imm' can be encoded using a 'halfword immediate', also returns the encoding if wbHWI is non-null
+static bool canEncodeHalfwordImm(INT64 imm, emitAttr size, emitter::halfwordImm* wbHWI = nullptr);
+
+// true if 'imm' can be encoded using a 'byteShifted immediate', also returns the encoding if wbBSI is non-null
+static bool canEncodeByteShiftedImm(INT64 imm, emitAttr size, bool allow_MSL, emitter::byteShiftedImm* wbBSI = nullptr);
+
+// true if 'immDbl' can be encoded using a 'float immediate', also returns the encoding if wbFPI is non-null
+static bool canEncodeFloatImm8(double immDbl, emitter::floatImm8* wbFPI = nullptr);
+
+// Returns the number of bits used by the given 'size'.
+inline static unsigned getBitWidth(emitAttr size)
+{
+ assert(size <= EA_8BYTE);
+ return (unsigned)size * BITS_PER_BYTE;
+}
+
+// Returns true if the imm represents a valid bit shift or bit position for the given 'size' [0..31] or [0..63]
+inline static unsigned isValidImmShift(ssize_t imm, emitAttr size)
+{
+ return (imm >= 0) && (imm < getBitWidth(size));
+}
+
+inline static bool isValidGeneralDatasize(emitAttr size)
+{
+ return (size == EA_8BYTE) || (size == EA_4BYTE);
+}
+
+inline static bool isValidScalarDatasize(emitAttr size)
+{
+ return (size == EA_8BYTE) || (size == EA_4BYTE);
+}
+
+inline static bool isValidVectorDatasize(emitAttr size)
+{
+ return (size == EA_16BYTE) || (size == EA_8BYTE);
+}
+
+inline static bool isValidGeneralLSDatasize(emitAttr size)
+{
+ return (size == EA_8BYTE) || (size == EA_4BYTE) || (size == EA_2BYTE) || (size == EA_1BYTE);
+}
+
+inline static bool isValidVectorLSDatasize(emitAttr size)
+{
+ return (size == EA_16BYTE) || (size == EA_8BYTE) || (size == EA_4BYTE) || (size == EA_2BYTE) || (size == EA_1BYTE);
+}
+
+inline static bool isValidVectorLSPDatasize(emitAttr size)
+{
+ return (size == EA_16BYTE) || (size == EA_8BYTE) || (size == EA_4BYTE);
+}
+
+inline static bool isValidVectorElemsize(emitAttr size)
+{
+ return (size == EA_8BYTE) || (size == EA_4BYTE) || (size == EA_2BYTE) || (size == EA_1BYTE);
+}
+
+inline static bool isValidVectorFcvtsize(emitAttr size)
+{
+ return (size == EA_8BYTE) || (size == EA_4BYTE) || (size == EA_2BYTE);
+}
+
+inline static bool isValidVectorElemsizeFloat(emitAttr size)
+{
+ return (size == EA_8BYTE) || (size == EA_4BYTE);
+}
+
+inline static bool isGeneralRegister(regNumber reg)
+{
+ return (reg >= REG_INT_FIRST) && (reg <= REG_LR);
+} // Excludes REG_ZR
+
+inline static bool isGeneralRegisterOrZR(regNumber reg)
+{
+ return (reg >= REG_INT_FIRST) && (reg <= REG_ZR);
+} // Includes REG_ZR
+
+inline static bool isGeneralRegisterOrSP(regNumber reg)
+{
+ return isGeneralRegister(reg) || (reg == REG_SP);
+} // Includes REG_SP, Excludes REG_ZR
+
+inline static bool isVectorRegister(regNumber reg)
+{
+ return (reg >= REG_FP_FIRST && reg <= REG_FP_LAST);
+}
+
+inline static bool isFloatReg(regNumber reg)
+{
+ return isVectorRegister(reg);
+}
+
+inline static bool insOptsNone(insOpts opt)
+{
+ return (opt == INS_OPTS_NONE);
+}
+
+inline static bool insOptsIndexed(insOpts opt)
+{
+ return (opt == INS_OPTS_PRE_INDEX) || (opt == INS_OPTS_POST_INDEX);
+}
+
+inline static bool insOptsPreIndex(insOpts opt)
+{
+ return (opt == INS_OPTS_PRE_INDEX);
+}
+
+inline static bool insOptsPostIndex(insOpts opt)
+{
+ return (opt == INS_OPTS_POST_INDEX);
+}
+
+inline static bool insOptsLSL12(insOpts opt) // special 12-bit shift only used for imm12
+{
+ return (opt == INS_OPTS_LSL12);
+}
+
+inline static bool insOptsAnyShift(insOpts opt)
+{
+ return ((opt >= INS_OPTS_LSL) && (opt <= INS_OPTS_ROR));
+}
+
+inline static bool insOptsAluShift(insOpts opt) // excludes ROR
+{
+ return ((opt >= INS_OPTS_LSL) && (opt <= INS_OPTS_ASR));
+}
+
+inline static bool insOptsVectorImmShift(insOpts opt)
+{
+ return ((opt == INS_OPTS_LSL) || (opt == INS_OPTS_MSL));
+}
+
+inline static bool insOptsLSL(insOpts opt)
+{
+ return (opt == INS_OPTS_LSL);
+}
+
+inline static bool insOptsLSR(insOpts opt)
+{
+ return (opt == INS_OPTS_LSR);
+}
+
+inline static bool insOptsASR(insOpts opt)
+{
+ return (opt == INS_OPTS_ASR);
+}
+
+inline static bool insOptsROR(insOpts opt)
+{
+ return (opt == INS_OPTS_ROR);
+}
+
+inline static bool insOptsAnyExtend(insOpts opt)
+{
+ return ((opt >= INS_OPTS_UXTB) && (opt <= INS_OPTS_SXTX));
+}
+
+inline static bool insOptsLSExtend(insOpts opt)
+{
+ return ((opt == INS_OPTS_NONE) || (opt == INS_OPTS_LSL) || (opt == INS_OPTS_UXTW) || (opt == INS_OPTS_SXTW) ||
+ (opt == INS_OPTS_UXTX) || (opt == INS_OPTS_SXTX));
+}
+
+inline static bool insOpts32BitExtend(insOpts opt)
+{
+ return ((opt == INS_OPTS_UXTW) || (opt == INS_OPTS_SXTW));
+}
+
+inline static bool insOpts64BitExtend(insOpts opt)
+{
+ return ((opt == INS_OPTS_UXTX) || (opt == INS_OPTS_SXTX));
+}
+
+inline static bool insOptsAnyArrangement(insOpts opt)
+{
+ return ((opt >= INS_OPTS_8B) && (opt <= INS_OPTS_2D));
+}
+
+inline static bool insOptsConvertFloatToFloat(insOpts opt)
+{
+ return ((opt >= INS_OPTS_S_TO_D) && (opt <= INS_OPTS_D_TO_H));
+}
+
+inline static bool insOptsConvertFloatToInt(insOpts opt)
+{
+ return ((opt >= INS_OPTS_S_TO_4BYTE) && (opt <= INS_OPTS_D_TO_8BYTE));
+}
+
+inline static bool insOptsConvertIntToFloat(insOpts opt)
+{
+ return ((opt >= INS_OPTS_4BYTE_TO_S) && (opt <= INS_OPTS_8BYTE_TO_D));
+}
+
+static bool isValidImmCond(ssize_t imm);
+static bool isValidImmCondFlags(ssize_t imm);
+static bool isValidImmCondFlagsImm5(ssize_t imm);
+
+/************************************************************************/
+/* The public entry points to output instructions */
+/************************************************************************/
+
+public:
+void emitIns(instruction ins);
+
+void emitIns_I(instruction ins, emitAttr attr, ssize_t imm);
+
+void emitIns_R(instruction ins, emitAttr attr, regNumber reg);
+
+void emitIns_R_I(instruction ins, emitAttr attr, regNumber reg, ssize_t imm, insOpts opt = INS_OPTS_NONE);
+
+void emitIns_R_F(instruction ins, emitAttr attr, regNumber reg, double immDbl, insOpts opt = INS_OPTS_NONE);
+
+void emitIns_R_R(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, insOpts opt = INS_OPTS_NONE);
+
+void emitIns_R_R(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, insFlags flags)
+{
+ emitIns_R_R(ins, attr, reg1, reg2);
+}
+
+void emitIns_R_I_I(
+ instruction ins, emitAttr attr, regNumber reg1, ssize_t imm1, ssize_t imm2, insOpts opt = INS_OPTS_NONE);
+
+void emitIns_R_R_I(
+ instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, ssize_t imm, insOpts opt = INS_OPTS_NONE);
+
+// Checks for a large immediate that needs a second instruction
+void emitIns_R_R_Imm(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, ssize_t imm);
+
+void emitIns_R_R_R(
+ instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, regNumber reg3, insOpts opt = INS_OPTS_NONE);
+
+void emitIns_R_R_R_I(instruction ins,
+ emitAttr attr,
+ regNumber reg1,
+ regNumber reg2,
+ regNumber reg3,
+ ssize_t imm,
+ insOpts opt = INS_OPTS_NONE);
+
+void emitIns_R_R_R_Ext(instruction ins,
+ emitAttr attr,
+ regNumber reg1,
+ regNumber reg2,
+ regNumber reg3,
+ insOpts opt = INS_OPTS_NONE,
+ int shiftAmount = -1);
+
+void emitIns_R_R_I_I(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, int imm1, int imm2);
+
+void emitIns_R_R_R_R(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, regNumber reg3, regNumber reg4);
+
+void emitIns_R_COND(instruction ins, emitAttr attr, regNumber reg, insCond cond);
+
+void emitIns_R_R_COND(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, insCond cond);
+
+void emitIns_R_R_R_COND(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, regNumber reg3, insCond cond);
+
+void emitIns_R_R_FLAGS_COND(
+ instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, insCflags flags, insCond cond);
+
+void emitIns_R_I_FLAGS_COND(instruction ins, emitAttr attr, regNumber reg1, int imm, insCflags flags, insCond cond);
+
+void emitIns_BARR(instruction ins, insBarrier barrier);
+
+void emitIns_C(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fdlHnd, int offs);
+
+void emitIns_S(instruction ins, emitAttr attr, int varx, int offs);
+
+void emitIns_S_R(instruction ins, emitAttr attr, regNumber ireg, int varx, int offs);
+
+void emitIns_R_S(instruction ins, emitAttr attr, regNumber ireg, int varx, int offs);
+
+void emitIns_S_I(instruction ins, emitAttr attr, int varx, int offs, int val);
+
+void emitIns_R_C(
+ instruction ins, emitAttr attr, regNumber reg, regNumber tmpReg, CORINFO_FIELD_HANDLE fldHnd, int offs);
+
+void emitIns_C_R(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fldHnd, regNumber reg, int offs);
+
+void emitIns_C_I(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fdlHnd, ssize_t offs, ssize_t val);
+
+void emitIns_R_L(instruction ins, emitAttr attr, BasicBlock* dst, regNumber reg);
+
+void emitIns_R_D(instruction ins, emitAttr attr, unsigned offs, regNumber reg);
+
+void emitIns_J_R(instruction ins, emitAttr attr, BasicBlock* dst, regNumber reg);
+
+void emitIns_I_AR(
+ instruction ins, emitAttr attr, int val, regNumber reg, int offs, int memCookie = 0, void* clsCookie = NULL);
+
+void emitIns_R_AR(
+ instruction ins, emitAttr attr, regNumber ireg, regNumber reg, int offs, int memCookie = 0, void* clsCookie = NULL);
+
+void emitIns_R_AI(instruction ins, emitAttr attr, regNumber ireg, ssize_t disp);
+
+void emitIns_AR_R(
+ instruction ins, emitAttr attr, regNumber ireg, regNumber reg, int offs, int memCookie = 0, void* clsCookie = NULL);
+
+void emitIns_R_ARR(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, regNumber rg2, int disp);
+
+void emitIns_ARR_R(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, regNumber rg2, int disp);
+
+void emitIns_R_ARX(
+ instruction ins, emitAttr attr, regNumber ireg, regNumber reg, regNumber rg2, unsigned mul, int disp);
+
+enum EmitCallType
+{
+
+ // I have included here, but commented out, all the values used by the x86 emitter.
+ // However, ARM has a much reduced instruction set, and so the ARM emitter only
+ // supports a subset of the x86 variants. By leaving them commented out, it becomes
+ // a compile time error if code tries to use them (and hopefully see this comment
+ // and know why they are unavailible on ARM), while making it easier to stay
+ // in-sync with x86 and possibly add them back in if needed.
+
+ EC_FUNC_TOKEN, // Direct call to a helper/static/nonvirtual/global method
+ // EC_FUNC_TOKEN_INDIR, // Indirect call to a helper/static/nonvirtual/global method
+ EC_FUNC_ADDR, // Direct call to an absolute address
+
+ // EC_FUNC_VIRTUAL, // Call to a virtual method (using the vtable)
+ EC_INDIR_R, // Indirect call via register
+ // EC_INDIR_SR, // Indirect call via stack-reference (local var)
+ // EC_INDIR_C, // Indirect call via static class var
+ // EC_INDIR_ARD, // Indirect call via an addressing mode
+
+ EC_COUNT
+};
+
+void emitIns_Call(EmitCallType callType,
+ CORINFO_METHOD_HANDLE methHnd,
+ INDEBUG_LDISASM_COMMA(CORINFO_SIG_INFO* sigInfo) // used to report call sites to the EE
+ void* addr,
+ ssize_t argSize,
+ emitAttr retSize,
+ emitAttr secondRetSize,
+ VARSET_VALARG_TP ptrVars,
+ regMaskTP gcrefRegs,
+ regMaskTP byrefRegs,
+ IL_OFFSETX ilOffset = BAD_IL_OFFSET,
+ regNumber ireg = REG_NA,
+ regNumber xreg = REG_NA,
+ unsigned xmul = 0,
+ ssize_t disp = 0,
+ bool isJump = false,
+ bool isNoGC = false,
+ bool isProfLeaveCB = false);
+
+BYTE* emitOutputLJ(insGroup* ig, BYTE* dst, instrDesc* i);
+unsigned emitOutputCall(insGroup* ig, BYTE* dst, instrDesc* i, code_t code);
+BYTE* emitOutputLoadLabel(BYTE* dst, BYTE* srcAddr, BYTE* dstAddr, instrDescJmp* id);
+BYTE* emitOutputShortBranch(BYTE* dst, instruction ins, insFormat fmt, ssize_t distVal, instrDescJmp* id);
+BYTE* emitOutputShortAddress(BYTE* dst, instruction ins, insFormat fmt, ssize_t distVal, regNumber reg);
+BYTE* emitOutputShortConstant(
+ BYTE* dst, instruction ins, insFormat fmt, ssize_t distVal, regNumber reg, emitAttr opSize);
+
+/*****************************************************************************
+ *
+ * Given an instrDesc, return true if it's a conditional jump.
+ */
+
+inline bool emitIsCondJump(instrDesc* jmp)
+{
+ return ((jmp->idInsFmt() == IF_BI_0B) || (jmp->idInsFmt() == IF_LARGEJMP));
+}
+
+/*****************************************************************************
+ *
+ * Given an instrDesc, return true if it's a compare and jump.
+ */
+
+inline bool emitIsCmpJump(instrDesc* jmp)
+{
+ return ((jmp->idInsFmt() == IF_BI_1A) || (jmp->idInsFmt() == IF_BI_1B));
+}
+
+/*****************************************************************************
+ *
+ * Given a instrDesc, return true if it's an unconditional jump.
+ */
+
+inline bool emitIsUncondJump(instrDesc* jmp)
+{
+ return (jmp->idInsFmt() == IF_BI_0A);
+}
+
+/*****************************************************************************
+ *
+ * Given a instrDesc, return true if it's a direct call.
+ */
+
+inline bool emitIsDirectCall(instrDesc* call)
+{
+ return (call->idInsFmt() == IF_BI_0C);
+}
+
+/*****************************************************************************
+ *
+ * Given a instrDesc, return true if it's a load label instruction.
+ */
+
+inline bool emitIsLoadLabel(instrDesc* jmp)
+{
+ return ((jmp->idInsFmt() == IF_DI_1E) || // adr or arp
+ (jmp->idInsFmt() == IF_LARGEADR));
+}
+
+/*****************************************************************************
+*
+* Given a instrDesc, return true if it's a load constant instruction.
+*/
+
+inline bool emitIsLoadConstant(instrDesc* jmp)
+{
+ return ((jmp->idInsFmt() == IF_LS_1A) || // ldr
+ (jmp->idInsFmt() == IF_LARGELDC));
+}
+
+#endif // _TARGET_ARM64_
diff --git a/src/jit/emitdef.h b/src/jit/emitdef.h
new file mode 100644
index 0000000000..f7f9325b79
--- /dev/null
+++ b/src/jit/emitdef.h
@@ -0,0 +1,22 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+/*****************************************************************************/
+
+#ifndef _EMITDEF_H_
+#define _EMITDEF_H_
+/*****************************************************************************/
+
+#if defined(_TARGET_XARCH_)
+#include "emitxarch.h"
+#elif defined(_TARGET_ARM_)
+#include "emitarm.h"
+#elif defined(_TARGET_ARM64_)
+#include "emitarm64.h"
+#else
+#error Unsupported or unset target architecture
+#endif
+
+/*****************************************************************************/
+#endif //_EMITDEF_H_
+/*****************************************************************************/
diff --git a/src/jit/emitfmts.h b/src/jit/emitfmts.h
new file mode 100644
index 0000000000..587033f2e9
--- /dev/null
+++ b/src/jit/emitfmts.h
@@ -0,0 +1,14 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+//////////////////////////////////////////////////////////////////////////////
+
+#if defined(_TARGET_XARCH_)
+#include "emitfmtsxarch.h"
+#elif defined(_TARGET_ARM_)
+#include "emitfmtsarm.h"
+#elif defined(_TARGET_ARM64_)
+#include "emitfmtsarm64.h"
+#else
+#error Unsupported or unset target architecture
+#endif // target type
diff --git a/src/jit/emitfmtsarm.h b/src/jit/emitfmtsarm.h
new file mode 100644
index 0000000000..bc7492003a
--- /dev/null
+++ b/src/jit/emitfmtsarm.h
@@ -0,0 +1,153 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+//////////////////////////////////////////////////////////////////////////////
+
+// clang-format off
+#if !defined(_TARGET_ARM_)
+ #error Unexpected target type
+#endif
+
+#ifdef DEFINE_ID_OPS
+//////////////////////////////////////////////////////////////////////////////
+
+#undef DEFINE_ID_OPS
+
+enum ID_OPS
+{
+ ID_OP_NONE, // no additional arguments
+ ID_OP_SCNS, // small const operand (21-bits or less, no reloc)
+ ID_OP_JMP, // local jump
+ ID_OP_LBL, // label operand
+ ID_OP_CALL, // direct method call
+ ID_OP_SPEC, // special handling required
+};
+
+//////////////////////////////////////////////////////////////////////////////
+#else // !DEFINE_ID_OPS
+//////////////////////////////////////////////////////////////////////////////
+
+#ifndef IF_DEF
+#error Must define IF_DEF macro before including this file
+#endif
+
+//////////////////////////////////////////////////////////////////////////////
+//
+// enum insFormat instruction enum ID_OPS
+// scheduling
+// (unused)
+//////////////////////////////////////////////////////////////////////////////
+
+IF_DEF(NONE, IS_NONE, NONE) //
+
+IF_DEF(LABEL, IS_NONE, JMP ) // label
+//IF_DEF(SWR_LABEL, IS_NONE, LBL ) // write label to stack
+//IF_DEF(METHOD, IS_NONE, CALL) // method
+//IF_DEF(CNS, IS_NONE, SCNS) // const
+
+IF_DEF(LARGEJMP, IS_NONE, JMP) // large conditional branch pseudo-op
+
+/////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+IF_DEF(EN9, IS_NONE, NONE) // Instruction has 9 possible encoding types
+IF_DEF(EN8, IS_NONE, NONE) // Instruction has 8 possible encoding types
+IF_DEF(EN6A, IS_NONE, NONE) // Instruction has 6 possible encoding types, type A
+IF_DEF(EN6B, IS_NONE, NONE) // Instruction has 6 possible encoding types, type B
+IF_DEF(EN5A, IS_NONE, NONE) // Instruction has 5 possible encoding types, type A
+IF_DEF(EN5B, IS_NONE, NONE) // Instruction has 5 possible encoding types, type B
+IF_DEF(EN4A, IS_NONE, NONE) // Instruction has 4 possible encoding types, type A
+IF_DEF(EN4B, IS_NONE, NONE) // Instruction has 4 possible encoding types, type B
+IF_DEF(EN3A, IS_NONE, NONE) // Instruction has 3 possible encoding types, type A
+IF_DEF(EN3B, IS_NONE, NONE) // Instruction has 3 possible encoding types, type B
+IF_DEF(EN3C, IS_NONE, NONE) // Instruction has 3 possible encoding types, type C
+IF_DEF(EN3D, IS_NONE, NONE) // Instruction has 3 possible encoding types, type D
+IF_DEF(EN3E, IS_NONE, NONE) // Instruction has 3 possible encoding types, type E
+IF_DEF(EN3F, IS_NONE, NONE) // Instruction has 3 possible encoding types, type F
+IF_DEF(EN2A, IS_NONE, NONE) // Instruction has 2 possible encoding types, type A
+IF_DEF(EN2B, IS_NONE, NONE) // Instruction has 2 possible encoding types, type B
+IF_DEF(EN2C, IS_NONE, NONE) // Instruction has 2 possible encoding types, type C
+IF_DEF(EN2D, IS_NONE, NONE) // Instruction has 2 possible encoding types, type D
+IF_DEF(EN2E, IS_NONE, NONE) // Instruction has 2 possible encoding types, type E
+IF_DEF(EN2F, IS_NONE, NONE) // Instruction has 2 possible encoding types, type F
+IF_DEF(EN2G, IS_NONE, NONE) // Instruction has 2 possible encoding types, type G
+
+/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+IF_DEF(T1_A, IS_NONE, NONE) // T1_A ................
+IF_DEF(T1_B, IS_NONE, NONE) // T1_B ........cccc.... cond
+IF_DEF(T1_C, IS_NONE, NONE) // T1_C .....iiiiimmmddd R1 R2 imm5
+IF_DEF(T1_D0, IS_NONE, NONE) // T1_D0 ........Dmmmmddd R1* R2*
+IF_DEF(T1_D1, IS_NONE, SPEC) // T1_D1 .........mmmm... R1*
+IF_DEF(T1_D2, IS_NONE, SPEC) // T1_D2 .........mmmm... R3*
+IF_DEF(T1_E, IS_NONE, NONE) // T1_E ..........mmmddd R1 R2
+IF_DEF(T1_F, IS_NONE, NONE) // T1_F .........iiiiiii SP imm7
+IF_DEF(T1_G, IS_NONE, NONE) // T1_G .......iiinnnddd R1 R2 imm3
+IF_DEF(T1_H, IS_NONE, NONE) // T1_H .......mmmnnnddd R1 R2 R3
+IF_DEF(T1_I, IS_NONE, JMP ) // T1_I ......i.iiiiinnn R1 imm6
+IF_DEF(T1_J0, IS_NONE, NONE) // T1_J .....dddiiiiiiii R1 imm8
+IF_DEF(T1_J1, IS_NONE, NONE) // T1_J .....dddiiiiiiii R1 <regmask8>
+IF_DEF(T1_J2, IS_NONE, NONE) // T1_J .....dddiiiiiiii R1 SP imm8
+IF_DEF(T1_J3, IS_NONE, LBL ) // T1_J .....dddiiiiiiii R1 PC imm8
+IF_DEF(T1_K, IS_NONE, JMP ) // T1_K ....cccciiiiiiii Branch imm8, cond4
+IF_DEF(T1_L0, IS_NONE, NONE) // T1_L0 ........iiiiiiii imm8
+IF_DEF(T1_L1, IS_NONE, NONE) // T1_L1 ........rrrrrrrr <regmask8>
+IF_DEF(T1_M, IS_NONE, JMP ) // T1_M .....iiiiiiiiiii Branch imm11
+
+
+IF_DEF(T2_A, IS_NONE, NONE) // T2_A ................ ................
+IF_DEF(T2_B, IS_NONE, NONE) // T2_B ................ ............iiii imm4
+IF_DEF(T2_C0, IS_NONE, NONE) // T2_C0 ...........Snnnn .iiiddddiishmmmm R1 R2 R3 S, imm5, sh
+IF_DEF(T2_C1, IS_NONE, NONE) // T2_C1 ...........S.... .iiiddddiishmmmm R1 R2 S, imm5, sh
+IF_DEF(T2_C2, IS_NONE, NONE) // T2_C2 ...........S.... .iiiddddii..mmmm R1 R2 S, imm5
+IF_DEF(T2_C3, IS_NONE, NONE) // T2_C3 ...........S.... ....dddd....mmmm R1 R2 S
+IF_DEF(T2_C4, IS_NONE, NONE) // T2_C4 ...........Snnnn ....dddd....mmmm R1 R2 R3 S
+IF_DEF(T2_C5, IS_NONE, NONE) // T2_C5 ............nnnn ....dddd....mmmm R1 R2 R3
+IF_DEF(T2_C6, IS_NONE, NONE) // T2_C6 ................ ....dddd..iimmmm R1 R2 imm2
+IF_DEF(T2_C7, IS_NONE, NONE) // T2_C7 ............nnnn ..........shmmmm R1 R2 imm2
+IF_DEF(T2_C8, IS_NONE, NONE) // T2_C8 ............nnnn .iii....iishmmmm R1 R2 imm5, sh
+IF_DEF(T2_C9, IS_NONE, NONE) // T2_C9 ............nnnn ............mmmm R1 R2
+IF_DEF(T2_C10, IS_NONE, NONE) // T2_C10 ............mmmm ....dddd....mmmm R1 R2
+IF_DEF(T2_D0, IS_NONE, NONE) // T2_D0 ............nnnn .iiiddddii.wwwww R1 R2 imm5, imm5
+IF_DEF(T2_D1, IS_NONE, NONE) // T2_D1 ................ .iiiddddii.wwwww R1 imm5, imm5
+IF_DEF(T2_E0, IS_NONE, NONE) // T2_E0 ............nnnn tttt......shmmmm R1 R2 R3 imm2
+IF_DEF(T2_E1, IS_NONE, NONE) // T2_E1 ............nnnn tttt............ R1 R2
+IF_DEF(T2_E2, IS_NONE, NONE) // T2_E2 ................ tttt............ R1
+IF_DEF(T2_F1, IS_NONE, NONE) // T2_F1 ............nnnn ttttdddd....mmmm R1 R2 R3 R4
+IF_DEF(T2_F2, IS_NONE, NONE) // T2_F2 ............nnnn aaaadddd....mmmm R1 R2 R3 R4
+IF_DEF(T2_G0, IS_NONE, NONE) // T2_G0 .......PU.W.nnnn ttttTTTTiiiiiiii R1 R2 R3 imm8, PUW
+IF_DEF(T2_G1, IS_NONE, NONE) // T2_G1 ............nnnn ttttTTTT........ R1 R2 R3
+IF_DEF(T2_H0, IS_NONE, NONE) // T2_H0 ............nnnn tttt.PUWiiiiiiii R1 R2 imm8, PUW
+IF_DEF(T2_H1, IS_NONE, NONE) // T2_H1 ............nnnn tttt....iiiiiiii R1 R2 imm8
+IF_DEF(T2_H2, IS_NONE, NONE) // T2_H2 ............nnnn ........iiiiiiii R1 imm8
+IF_DEF(T2_I0, IS_NONE, NONE) // T2_I0 ..........W.nnnn rrrrrrrrrrrrrrrr R1 W, imm16
+IF_DEF(T2_I1, IS_NONE, NONE) // T2_I1 ................ rrrrrrrrrrrrrrrr imm16
+IF_DEF(T2_J1, IS_NONE, JMP ) // T2_J1 .....Scccciiiiii ..j.jiiiiiiiiiii Branch imm20, cond4
+IF_DEF(T2_J2, IS_NONE, JMP ) // T2_J2 .....Siiiiiiiiii ..j.jiiiiiiiiii. Branch imm24
+IF_DEF(T2_J3, IS_NONE, CALL) // T2_J3 .....Siiiiiiiiii ..j.jiiiiiiiiii. Call imm24
+IF_DEF(T2_K1, IS_NONE, NONE) // T2_K1 ............nnnn ttttiiiiiiiiiiii R1 R2 imm12
+IF_DEF(T2_K2, IS_NONE, NONE) // T2_K2 ............nnnn ....iiiiiiiiiiii R1 imm12
+IF_DEF(T2_K3, IS_NONE, NONE) // T2_K3 ........U....... ....iiiiiiiiiiii PC U, imm12
+IF_DEF(T2_K4, IS_NONE, NONE) // T2_K4 ........U....... ttttiiiiiiiiiiii R1 PC U, imm12
+IF_DEF(T2_L0, IS_NONE, NONE) // T2_L0 .....i.....Snnnn .iiiddddiiiiiiii R1 R2 S, imm8<<imm4
+IF_DEF(T2_L1, IS_NONE, NONE) // T2_L1 .....i.....S.... .iiiddddiiiiiiii R1 S, imm8<<imm4
+IF_DEF(T2_L2, IS_NONE, NONE) // T2_L2 .....i......nnnn .iii....iiiiiiii R1 imm8<<imm4
+IF_DEF(T2_M0, IS_NONE, NONE) // T2_M0 .....i......nnnn .iiiddddiiiiiiii R1 R2 imm12
+IF_DEF(T2_M1, IS_NONE, LBL ) // T2_M1 .....i.......... .iiiddddiiiiiiii R1 PC imm12
+IF_DEF(T2_N, IS_NONE, NONE) // T2_N .....i......iiii .iiiddddiiiiiiii R1 imm16 ; movw/movt
+IF_DEF(T2_N1, IS_NONE, JMP) // T2_N1 .....i......iiii .iiiddddiiiiiiii R1 imm16 ; movw/movt of a code address
+IF_DEF(T2_N2, IS_NONE, NONE) // T2_N2 .....i......iiii .iiiddddiiiiiiii R1 imm16 ; movw/movt of a data address
+IF_DEF(T2_VLDST, IS_NONE, NONE) // T2_VLDST 11101101UD0Lnnnn dddd101Ziiiiiiii D1 R2 imm(+-1020)
+IF_DEF(T2_VFP2, IS_NONE, NONE) // T2_VFP2 111011101D110--- dddd101Z--M0mmmm D1 D2
+IF_DEF(T2_VFP3, IS_NONE, NONE) // T2_VFP3 11101110-D--nnnn dddd101ZN-M0mmmm D1 D2 D3
+IF_DEF(T2_VMOVS, IS_NONE, NONE)
+IF_DEF(T2_VMOVD, IS_NONE, NONE)
+
+IF_DEF(INVALID, IS_NONE, NONE) //
+
+//////////////////////////////////////////////////////////////////////////////
+#undef IF_DEF
+//////////////////////////////////////////////////////////////////////////////
+
+#endif // !DEFINE_ID_OPS
+//////////////////////////////////////////////////////////////////////////////
+// clang-format on
diff --git a/src/jit/emitfmtsarm64.h b/src/jit/emitfmtsarm64.h
new file mode 100644
index 0000000000..c4be8ae45a
--- /dev/null
+++ b/src/jit/emitfmtsarm64.h
@@ -0,0 +1,210 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+//////////////////////////////////////////////////////////////////////////////
+
+// clang-format off
+#if !defined(_TARGET_ARM64_)
+#error Unexpected target type
+#endif
+
+#ifdef DEFINE_ID_OPS
+//////////////////////////////////////////////////////////////////////////////
+
+#undef DEFINE_ID_OPS
+
+enum ID_OPS
+{
+ ID_OP_NONE, // no additional arguments
+ ID_OP_SCNS, // small const operand (21-bits or less, no reloc)
+ ID_OP_JMP, // local jump
+ ID_OP_CALL, // method call
+ ID_OP_SPEC, // special handling required
+};
+
+//////////////////////////////////////////////////////////////////////////////
+#else // !DEFINE_ID_OPS
+//////////////////////////////////////////////////////////////////////////////
+
+#ifndef IF_DEF
+#error Must define IF_DEF macro before including this file
+#endif
+
+//////////////////////////////////////////////////////////////////////////////
+//
+// enum insFormat instruction enum ID_OPS
+// scheduling
+// (unused)
+//////////////////////////////////////////////////////////////////////////////
+
+IF_DEF(NONE, IS_NONE, NONE) //
+
+IF_DEF(LABEL, IS_NONE, JMP) // label
+IF_DEF(LARGEJMP, IS_NONE, JMP) // large conditional branch pseudo-op (cond branch + uncond branch)
+IF_DEF(LARGEADR, IS_NONE, JMP) // large address pseudo-op (adrp + add)
+IF_DEF(LARGELDC, IS_NONE, JMP) // large constant pseudo-op (adrp + ldr)
+
+/////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+IF_DEF(EN9, IS_NONE, NONE) // Instruction has 9 possible encoding types
+IF_DEF(EN6A, IS_NONE, NONE) // Instruction has 6 possible encoding types, type A
+IF_DEF(EN5A, IS_NONE, NONE) // Instruction has 5 possible encoding types, type A
+IF_DEF(EN5B, IS_NONE, NONE) // Instruction has 5 possible encoding types, type B
+IF_DEF(EN5C, IS_NONE, NONE) // Instruction has 5 possible encoding types, type C
+IF_DEF(EN4A, IS_NONE, NONE) // Instruction has 4 possible encoding types, type A
+IF_DEF(EN4B, IS_NONE, NONE) // Instruction has 4 possible encoding types, type B
+IF_DEF(EN4C, IS_NONE, NONE) // Instruction has 4 possible encoding types, type C
+IF_DEF(EN4D, IS_NONE, NONE) // Instruction has 4 possible encoding types, type D
+IF_DEF(EN4E, IS_NONE, NONE) // Instruction has 4 possible encoding types, type E
+IF_DEF(EN4F, IS_NONE, NONE) // Instruction has 4 possible encoding types, type F
+IF_DEF(EN4G, IS_NONE, NONE) // Instruction has 4 possible encoding types, type G
+IF_DEF(EN3A, IS_NONE, NONE) // Instruction has 3 possible encoding types, type A
+IF_DEF(EN3B, IS_NONE, NONE) // Instruction has 3 possible encoding types, type B
+IF_DEF(EN3C, IS_NONE, NONE) // Instruction has 3 possible encoding types, type C
+IF_DEF(EN3D, IS_NONE, NONE) // Instruction has 3 possible encoding types, type D
+IF_DEF(EN3E, IS_NONE, NONE) // Instruction has 3 possible encoding types, type E
+IF_DEF(EN3F, IS_NONE, NONE) // Instruction has 3 possible encoding types, type F
+IF_DEF(EN3G, IS_NONE, NONE) // Instruction has 3 possible encoding types, type G
+IF_DEF(EN3H, IS_NONE, NONE) // Instruction has 3 possible encoding types, type H
+IF_DEF(EN3I, IS_NONE, NONE) // Instruction has 3 possible encoding types, type I
+IF_DEF(EN2A, IS_NONE, NONE) // Instruction has 2 possible encoding types, type A
+IF_DEF(EN2B, IS_NONE, NONE) // Instruction has 2 possible encoding types, type B
+IF_DEF(EN2C, IS_NONE, NONE) // Instruction has 2 possible encoding types, type C
+IF_DEF(EN2D, IS_NONE, NONE) // Instruction has 2 possible encoding types, type D
+IF_DEF(EN2E, IS_NONE, NONE) // Instruction has 2 possible encoding types, type E
+IF_DEF(EN2F, IS_NONE, NONE) // Instruction has 2 possible encoding types, type F
+IF_DEF(EN2G, IS_NONE, NONE) // Instruction has 2 possible encoding types, type G
+IF_DEF(EN2H, IS_NONE, NONE) // Instruction has 2 possible encoding types, type H
+IF_DEF(EN2I, IS_NONE, NONE) // Instruction has 2 possible encoding types, type I
+IF_DEF(EN2J, IS_NONE, NONE) // Instruction has 2 possible encoding types, type J
+IF_DEF(EN2K, IS_NONE, NONE) // Instruction has 2 possible encoding types, type K
+IF_DEF(EN2L, IS_NONE, NONE) // Instruction has 2 possible encoding types, type L
+IF_DEF(EN2M, IS_NONE, NONE) // Instruction has 2 possible encoding types, type M
+IF_DEF(EN2N, IS_NONE, NONE) // Instruction has 2 possible encoding types, type N
+
+/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//
+// Key for insFormat names:
+//
+// Above (Specifies multiple encodings)
+//
+// EN#? :: (count of the number of encodings)
+// (? is a unique letter A,B,C...)
+//
+// Below (Specifies an exact instruction encoding)
+//
+// -- the first two characters are
+//
+// DI :: Data Processing - Immediate
+// DR :: Data Processing - Register
+// DV :: Data Processing - Vector Register
+// LS :: Loads and Stores
+// BI :: Branches - Immediate
+// BR :: Branches - Register
+// SN :: System - No Registers or Immediates
+// SI :: System - Immediate
+//
+// _ :: a separator char '_'
+//
+// -- the next two characters are
+//
+// # :: number of registers in the encoding
+// ? :: A unique letter A,B,C,...
+// -- optional third character
+// I :: by elem immediate
+//
+/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+IF_DEF(BI_0A, IS_NONE, JMP) // BI_0A ......iiiiiiiiii iiiiiiiiiiiiiiii simm26:00 b
+IF_DEF(BI_0B, IS_NONE, JMP) // BI_0B ......iiiiiiiiii iiiiiiiiiii..... simm19:00 b<cond>
+IF_DEF(BI_0C, IS_NONE, CALL) // BI_0C ......iiiiiiiiii iiiiiiiiiiiiiiii simm26:00 bl
+IF_DEF(BI_1A, IS_NONE, JMP) // BI_1A X.......iiiiiiii iiiiiiiiiiittttt Rt simm19:00 cbz cbnz
+IF_DEF(BI_1B, IS_NONE, JMP) // BI_1B B.......bbbbbiii iiiiiiiiiiittttt Rt imm6 simm14:00 tbz tbnz
+IF_DEF(BR_1A, IS_NONE, CALL) // BR_1A ................ ......nnnnn..... Rn ret
+IF_DEF(BR_1B, IS_NONE, CALL) // BR_1B ................ ......nnnnn..... Rn br blr
+
+IF_DEF(LS_1A, IS_NONE, JMP) // LS_1A XX...V..iiiiiiii iiiiiiiiiiittttt Rt PC imm(1MB)
+IF_DEF(LS_2A, IS_NONE, NONE) // LS_2A .X.......X...... ......nnnnnttttt Rt Rn
+IF_DEF(LS_2B, IS_NONE, NONE) // LS_2B .X.......Xiiiiii iiiiiinnnnnttttt Rt Rn imm(0-4095)
+IF_DEF(LS_2C, IS_NONE, NONE) // LS_2C .X.......X.iiiii iiiiP.nnnnnttttt Rt Rn imm(-256..+255) pre/post inc
+IF_DEF(LS_3A, IS_NONE, NONE) // LS_3A .X.......X.mmmmm xxxS..nnnnnttttt Rt Rn Rm ext(Rm) LSL {}
+IF_DEF(LS_3B, IS_NONE, NONE) // LS_3B X............... .aaaaannnnnddddd Rd Ra Rn
+IF_DEF(LS_3C, IS_NONE, NONE) // LS_3C X.........iiiiii iaaaaannnnnddddd Rd Ra Rn imm(im7,sh)
+
+IF_DEF(DI_1A, IS_NONE, NONE) // DI_1A X.......shiiiiii iiiiiinnnnn..... Rn imm(i12,sh)
+IF_DEF(DI_1B, IS_NONE, NONE) // DI_1B X........hwiiiii iiiiiiiiiiiddddd Rd imm(i16,hw)
+IF_DEF(DI_1C, IS_NONE, NONE) // DI_1C X........Nrrrrrr ssssssnnnnn..... Rn imm(N,r,s)
+IF_DEF(DI_1D, IS_NONE, NONE) // DI_1D X........Nrrrrrr ssssss.....ddddd Rd imm(N,r,s)
+IF_DEF(DI_1E, IS_NONE, JMP) // DI_1E .ii.....iiiiiiii iiiiiiiiiiiddddd Rd simm21
+IF_DEF(DI_1F, IS_NONE, NONE) // DI_1F X..........iiiii cccc..nnnnn.nzcv Rn imm5 nzcv cond
+
+IF_DEF(DI_2A, IS_NONE, NONE) // DI_2A X.......shiiiiii iiiiiinnnnnddddd Rd Rn imm(i12,sh)
+IF_DEF(DI_2B, IS_NONE, NONE) // DI_2B X.........Xnnnnn ssssssnnnnnddddd Rd Rn imm(0-63)
+IF_DEF(DI_2C, IS_NONE, NONE) // DI_2C X........Nrrrrrr ssssssnnnnnddddd Rd Rn imm(N,r,s)
+IF_DEF(DI_2D, IS_NONE, NONE) // DI_2D X........Nrrrrrr ssssssnnnnnddddd Rd Rn imr, imms (N,r,s)
+
+IF_DEF(DR_1D, IS_NONE, NONE) // DR_1D X............... cccc.......ddddd Rd cond
+
+IF_DEF(DR_2A, IS_NONE, NONE) // DR_2A X..........mmmmm ......nnnnn..... Rn Rm
+IF_DEF(DR_2B, IS_NONE, NONE) // DR_2B X.......sh.mmmmm ssssssnnnnn..... Rn Rm {LSL,LSR,ASR} imm(0-63)
+IF_DEF(DR_2C, IS_NONE, NONE) // DR_2C X..........mmmmm xxxsssnnnnn..... Rn Rm ext(Rm) LSL imm(0-4)
+IF_DEF(DR_2D, IS_NONE, NONE) // DR_2D X..........nnnnn cccc..nnnnnddddd Rd Rn cond
+IF_DEF(DR_2E, IS_NONE, NONE) // DR_2E X..........mmmmm ...........ddddd Rd Rm
+IF_DEF(DR_2F, IS_NONE, NONE) // DR_2F X.......sh.mmmmm ssssss.....ddddd Rd Rm {LSL,LSR,ASR} imm(0-63)
+IF_DEF(DR_2G, IS_NONE, NONE) // DR_2G X............... ......nnnnnddddd Rd Rn
+IF_DEF(DR_2H, IS_NONE, NONE) // DR_2H X........X...... ......nnnnnddddd Rd Rn
+IF_DEF(DR_2I, IS_NONE, NONE) // DR_2I X..........mmmmm cccc..nnnnn.nzcv Rn Rm nzcv cond
+
+IF_DEF(DR_3A, IS_NONE, NONE) // DR_3A X..........mmmmm ......nnnnnddddd Rd Rn Rm
+IF_DEF(DR_3B, IS_NONE, NONE) // DR_3B X.......sh.mmmmm ssssssnnnnnddddd Rd Rn Rm {LSL,LSR,ASR} imm(0-63)
+IF_DEF(DR_3C, IS_NONE, NONE) // DR_3C X..........mmmmm xxxsssnnnnnddddd Rd Rn Rm ext(Rm) LSL imm(0-4)
+IF_DEF(DR_3D, IS_NONE, NONE) // DR_3D X..........mmmmm cccc..nnnnnddddd Rd Rn Rm cond
+IF_DEF(DR_3E, IS_NONE, NONE) // DR_3E X........X.mmmmm ssssssnnnnnddddd Rd Rn Rm imm(0-63)
+
+IF_DEF(DR_4A, IS_NONE, NONE) // DR_4A X..........mmmmm .aaaaannnnnddddd Rd Rn Rm Ra
+
+IF_DEF(DV_1A, IS_NONE, NONE) // DV_1A .........X.iiiii iii........ddddd Vd imm8 (fmov - immediate scalar)
+IF_DEF(DV_1B, IS_NONE, NONE) // DV_1B .QX..........iii jjjj..iiiiiddddd Vd imm8 (fmov/movi - immediate vector)
+IF_DEF(DV_1C, IS_NONE, NONE) // DV_1C .........X...... ......nnnnn..... Vn #0.0 (fcmp - with zero)
+
+IF_DEF(DV_2A, IS_NONE, NONE) // DV_2A .Q.......X...... ......nnnnnddddd Vd Vn (fabs, fcvtXX - vector)
+IF_DEF(DV_2B, IS_NONE, NONE) // DV_2B .Q.........iiiii ......nnnnnddddd Rd Vn[] (umov/smov - to general)
+IF_DEF(DV_2C, IS_NONE, NONE) // DV_2C .Q.........iiiii ......nnnnnddddd Vd Rn (dup/ins - vector from
+ // general)
+IF_DEF(DV_2D, IS_NONE, NONE) // DV_2D .Q.........iiiii ......nnnnnddddd Vd Vn[] (dup - vector)
+IF_DEF(DV_2E, IS_NONE, NONE) // DV_2E ...........iiiii ......nnnnnddddd Vd Vn[] (dup - scalar)
+IF_DEF(DV_2F, IS_NONE, NONE) // DV_2F ...........iiiii .jjjj.nnnnnddddd Vd[] Vn[] (ins - element)
+IF_DEF(DV_2G, IS_NONE, NONE) // DV_2G .........X...... ......nnnnnddddd Vd Vn (fmov, fcvtXX - register)
+IF_DEF(DV_2H, IS_NONE, NONE) // DV_2H X........X...... ......nnnnnddddd Rd Vn (fmov, fcvtXX - to general)
+IF_DEF(DV_2I, IS_NONE, NONE) // DV_2I X........X...... ......nnnnnddddd Vd Rn (fmov, fcvtXX - from general)
+IF_DEF(DV_2J, IS_NONE, NONE) // DV_2J .........d...... D.....nnnnnddddd Vd Vn (fcvt)
+IF_DEF(DV_2K, IS_NONE, NONE) // DV_2K .........X.mmmmm ......nnnnn..... Vn Vm (fcmp)
+IF_DEF(DV_2L, IS_NONE, NONE) // DV_2L ........XX...... ......nnnnnddddd Vd Vn (abs, neg - scalar)
+IF_DEF(DV_2M, IS_NONE, NONE) // DV_2M .Q......XX...... ......nnnnnddddd Vd Vn (abs, neg - vector)
+IF_DEF(DV_2N, IS_NONE, NONE) // DV_2N .........iiiiiii ......nnnnnddddd Vd Vn imm (shift - scalar)
+IF_DEF(DV_2O, IS_NONE, NONE) // DV_2O .Q.......iiiiiii ......nnnnnddddd Vd Vn imm (shift - vector)
+
+IF_DEF(DV_3A, IS_NONE, NONE) // DV_3A .Q......XX.mmmmm ......nnnnnddddd Vd Vn Vm (vector)
+IF_DEF(DV_3AI, IS_NONE, NONE) // DV_3AI .Q......XXLMmmmm ....H.nnnnnddddd Vd Vn Vm[] (vector by elem)
+IF_DEF(DV_3B, IS_NONE, NONE) // DV_3B .Q.......X.mmmmm ......nnnnnddddd Vd Vn Vm (vector)
+IF_DEF(DV_3BI, IS_NONE, NONE) // DV_3BI .Q.......XLmmmmm ....H.nnnnnddddd Vd Vn Vm[] (vector by elem)
+IF_DEF(DV_3C, IS_NONE, NONE) // DV_3C .Q.........mmmmm ......nnnnnddddd Vd Vn Vm (vector)
+IF_DEF(DV_3D, IS_NONE, NONE) // DV_3D .........X.mmmmm ......nnnnnddddd Vd Vn Vm (scalar)
+IF_DEF(DV_3DI, IS_NONE, NONE) // DV_3DI .........XLmmmmm ....H.nnnnnddddd Vd Vn Vm[] (scalar by elem)
+IF_DEF(DV_3E, IS_NONE, NONE) // DV_3E ...........mmmmm ......nnnnnddddd Vd Vn Vm (scalar)
+
+IF_DEF(DV_4A, IS_NONE, NONE) // DV_4A .........X.mmmmm .aaaaannnnnddddd Vd Vn Vm Va (scalar)
+
+IF_DEF(SN_0A, IS_NONE, NONE) // SN_0A ................ ................
+IF_DEF(SI_0A, IS_NONE, NONE) // SI_0A ...........iiiii iiiiiiiiiii..... imm16
+IF_DEF(SI_0B, IS_NONE, NONE) // SI_0B ................ ....bbbb........ imm4 - barrier
+
+IF_DEF(INVALID, IS_NONE, NONE) //
+
+//////////////////////////////////////////////////////////////////////////////
+#undef IF_DEF
+//////////////////////////////////////////////////////////////////////////////
+
+#endif // !DEFINE_ID_OPS
+//////////////////////////////////////////////////////////////////////////////
+// clang-format on
diff --git a/src/jit/emitfmtsxarch.h b/src/jit/emitfmtsxarch.h
new file mode 100644
index 0000000000..49afcb5c8b
--- /dev/null
+++ b/src/jit/emitfmtsxarch.h
@@ -0,0 +1,240 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+//////////////////////////////////////////////////////////////////////////////
+
+//
+// This file was previously known as emitfmts.h
+//
+
+// clang-format off
+#if !defined(_TARGET_XARCH_)
+ #error Unexpected target type
+#endif
+
+#ifdef DEFINE_ID_OPS
+//////////////////////////////////////////////////////////////////////////////
+
+#undef DEFINE_ID_OPS
+
+enum ID_OPS
+{
+ ID_OP_NONE, // no additional arguments
+ ID_OP_SCNS, // small const operand (21-bits or less, no reloc)
+ ID_OP_CNS, // constant operand
+ ID_OP_DSP, // displacement operand
+ ID_OP_DSP_CNS, // displacement + constant
+ ID_OP_AMD, // addrmode with dsp
+ ID_OP_AMD_CNS, // addrmode with dsp + constant
+ ID_OP_JMP, // local jump
+ ID_OP_LBL, // label operand
+ ID_OP_CALL, // direct method call
+ ID_OP_SPEC, // special handling required
+};
+
+//////////////////////////////////////////////////////////////////////////////
+#else // !DEFINE_ID_OPS
+//////////////////////////////////////////////////////////////////////////////
+
+#ifdef DEFINE_IS_OPS
+#undef DEFINE_IS_OPS
+
+#else // DEFINE_IS_OPS
+
+//////////////////////////////////////////////////////////////////////////////
+
+#ifndef IF_DEF
+#error Must define IF_DEF macro before including this file
+#endif
+
+//////////////////////////////////////////////////////////////////////////////
+//
+// A note on the naming convention for instruction forms (IF_xxxxx).
+// For 3-character code XYY, generally we have:
+// X =
+// R - register
+// M - memory
+// S - stack
+// A - address mode
+// YY =
+// RD - read
+// WR - write
+// RW - read / write
+//
+// The following sequences don't follow this pattern:
+// XYY =
+// CNS - constant
+// SHF - shift-constant
+//
+// For IF_XXX_YYY, the first operand is XXX, the second operand is YYY.
+//
+//////////////////////////////////////////////////////////////////////////////
+
+//////////////////////////////////////////////////////////////////////////////
+//
+// enum insFormat instruction enum ID_OPS
+// scheduling
+// (unused)
+//////////////////////////////////////////////////////////////////////////////
+
+IF_DEF(NONE, IS_NONE, NONE) // no operands
+
+IF_DEF(LABEL, IS_NONE, JMP ) // label
+IF_DEF(RWR_LABEL, IS_R1_WR, JMP ) // write label to register
+IF_DEF(SWR_LABEL, IS_SF_WR, LBL ) // write label to stack
+
+IF_DEF(METHOD, IS_NONE, CALL) // method
+IF_DEF(METHPTR, IS_NONE, CALL) // method ptr (glbl)
+
+IF_DEF(CNS, IS_NONE, SCNS) // const
+
+//----------------------------------------------------------------------------
+// NOTE: The order of the "RD/WR/RW" varieties must match that of
+// the "insUpdateModes" enum in "instr.h".
+//----------------------------------------------------------------------------
+
+IF_DEF(RRD, IS_R1_RD, NONE) // read reg
+IF_DEF(RWR, IS_R1_WR, NONE) // write reg
+IF_DEF(RRW, IS_R1_RW, NONE) // r/w reg
+
+IF_DEF(RRD_CNS, IS_R1_RD, SCNS) // read reg , const
+IF_DEF(RWR_CNS, IS_R1_WR, SCNS) // write reg , const
+IF_DEF(RRW_CNS, IS_R1_RW, SCNS) // r/w reg , const
+IF_DEF(RRW_SHF, IS_R1_RW, SCNS) // r/w reg , shift-const
+
+IF_DEF(RRD_RRD, IS_R1_RD|IS_R2_RD, NONE) // read reg , read reg2
+IF_DEF(RWR_RRD, IS_R1_WR|IS_R2_RD, NONE) // write reg , read reg2
+IF_DEF(RRW_RRD, IS_R1_RW|IS_R2_RD, NONE) // r/w reg , read reg2
+IF_DEF(RRW_RRW, IS_R1_RW|IS_R2_RW, NONE) // r/w reg , r/w reg2 - for XCHG reg, reg2
+IF_DEF(RRW_RRW_CNS, IS_R1_RW|IS_R2_RW, SCNS) // r/w reg , r/w reg2 , const
+
+IF_DEF(RWR_RRD_RRD, IS_R1_WR|IS_R2_RD|IS_R3_RD, NONE) // write reg , read reg2 , read reg3
+
+//----------------------------------------------------------------------------
+// The following formats are used for direct addresses (e.g. static data members)
+//----------------------------------------------------------------------------
+
+IF_DEF(MRD, IS_GM_RD, SPEC) // read [mem] (indirect call req. SPEC)
+IF_DEF(MWR, IS_GM_WR, DSP) // write [mem]
+IF_DEF(MRW, IS_GM_RW, DSP) // r/w [mem]
+IF_DEF(MRD_OFF, IS_GM_RD, DSP) // offset mem
+
+IF_DEF(RRD_MRD, IS_GM_RD|IS_R1_RD, DSP) // read reg , read [mem]
+IF_DEF(RWR_MRD, IS_GM_RD|IS_R1_WR, DSP) // write reg , read [mem]
+IF_DEF(RRW_MRD, IS_GM_RD|IS_R1_RW, DSP) // r/w reg , read [mem]
+
+IF_DEF(RWR_MRD_OFF, IS_GM_RD|IS_R1_WR, DSP) // write reg , offset mem
+
+IF_DEF(MRD_RRD, IS_GM_RD|IS_R1_RD, DSP) // read [mem], read reg
+IF_DEF(MWR_RRD, IS_GM_WR|IS_R1_RD, DSP) // write [mem], read reg
+IF_DEF(MRW_RRD, IS_GM_RW|IS_R1_RD, DSP) // r/w [mem], read reg
+
+IF_DEF(MRD_CNS, IS_GM_RD, DSP_CNS) // read [mem], const
+IF_DEF(MWR_CNS, IS_GM_WR, DSP_CNS) // write [mem], const
+IF_DEF(MRW_CNS, IS_GM_RW, DSP_CNS) // r/w [mem], const
+
+IF_DEF(MRW_SHF, IS_GM_RW, DSP_CNS) // shift [mem], const
+
+//----------------------------------------------------------------------------
+// The following formats are used for stack frame refs
+//----------------------------------------------------------------------------
+
+IF_DEF(SRD, IS_SF_RD, SPEC) // read [stk] (indirect call req. SPEC)
+IF_DEF(SWR, IS_SF_WR, NONE) // write [stk]
+IF_DEF(SRW, IS_SF_RW, NONE) // r/w [stk]
+
+IF_DEF(RRD_SRD, IS_SF_RD|IS_R1_RD, NONE) // read reg , read [stk]
+IF_DEF(RWR_SRD, IS_SF_RD|IS_R1_WR, NONE) // write reg , read [stk]
+IF_DEF(RRW_SRD, IS_SF_RD|IS_R1_RW, NONE) // r/w reg , read [stk]
+
+IF_DEF(SRD_RRD, IS_SF_RD|IS_R1_RD, NONE) // read [stk], read reg
+IF_DEF(SWR_RRD, IS_SF_WR|IS_R1_RD, NONE) // write [stk], read reg
+IF_DEF(SRW_RRD, IS_SF_RW|IS_R1_RD, NONE) // r/w [stk], read reg
+
+IF_DEF(SRD_CNS, IS_SF_RD, CNS ) // read [stk], const
+IF_DEF(SWR_CNS, IS_SF_WR, CNS ) // write [stk], const
+IF_DEF(SRW_CNS, IS_SF_RW, CNS ) // r/w [stk], const
+
+IF_DEF(SRW_SHF, IS_SF_RW, CNS ) // shift [stk], const
+
+//----------------------------------------------------------------------------
+// The following formats are used for indirect address modes
+//----------------------------------------------------------------------------
+
+
+IF_DEF(ARD, IS_AM_RD, SPEC) // read [adr] (indirect call req. SPEC)
+IF_DEF(AWR, IS_AM_WR, AMD ) // write [adr]
+IF_DEF(ARW, IS_AM_RW, AMD ) // r/w [adr]
+
+IF_DEF(RRD_ARD, IS_AM_RD|IS_R1_RD, AMD ) // read reg , read [adr]
+IF_DEF(RWR_ARD, IS_AM_RD|IS_R1_WR, AMD ) // write reg , read [adr]
+IF_DEF(RRW_ARD, IS_AM_RD|IS_R1_RW, AMD ) // r/w reg , read [adr]
+
+IF_DEF(ARD_RRD, IS_AM_RD|IS_R1_RD, AMD ) // read [adr], read reg
+IF_DEF(AWR_RRD, IS_AM_WR|IS_R1_RD, AMD ) // write [adr], read reg
+IF_DEF(ARW_RRD, IS_AM_RW|IS_R1_RD, AMD ) // r/w [adr], read reg
+
+IF_DEF(ARD_CNS, IS_AM_RD, AMD_CNS) // read [adr], const
+IF_DEF(AWR_CNS, IS_AM_WR, AMD_CNS) // write [adr], const
+IF_DEF(ARW_CNS, IS_AM_RW, AMD_CNS) // r/w [adr], const
+
+IF_DEF(ARW_SHF, IS_AM_RW, AMD_CNS) // shift [adr], const
+
+
+
+//----------------------------------------------------------------------------
+// The following formats are used for FP coprocessor instructions
+//----------------------------------------------------------------------------
+#if FEATURE_STACK_FP_X87
+
+IF_DEF(FRD, IS_FP_STK, NONE) // read ST(n)
+IF_DEF(FWR, IS_FP_STK, NONE) // write ST(n)
+IF_DEF(FRW, IS_FP_STK, NONE) // r/w ST(n)
+
+IF_DEF(TRD, IS_FP_STK, NONE) // read ST(0)
+IF_DEF(TWR, IS_FP_STK, NONE) // write ST(0)
+IF_DEF(TRW, IS_FP_STK, NONE) // r/w ST(0)
+
+IF_DEF(FRD_TRD, IS_FP_STK, NONE) // read ST(n), read ST(0)
+IF_DEF(FWR_TRD, IS_FP_STK, NONE) // write ST(n), read ST(0)
+IF_DEF(FRW_TRD, IS_FP_STK, NONE) // r/w ST(n), read ST(0)
+
+IF_DEF(TRD_FRD, IS_FP_STK, NONE) // read ST(0), read ST(n)
+IF_DEF(TWR_FRD, IS_FP_STK, NONE) // write ST(0), read ST(n)
+IF_DEF(TRW_FRD, IS_FP_STK, NONE) // r/w ST(0), read ST(n)
+
+IF_DEF(TRD_SRD, IS_FP_STK|IS_SF_RD, NONE) // read ST(0), read [stk]
+IF_DEF(TWR_SRD, IS_FP_STK|IS_SF_RD, NONE) // write ST(0), read [stk]
+IF_DEF(TRW_SRD, IS_FP_STK|IS_SF_RD, NONE) // r/w ST(0), read [stk]
+
+//////(SRD_TRD, IS_FP_STK|IS_SF_RD, NONE) // read [stk], read ST(n)
+IF_DEF(SWR_TRD, IS_FP_STK|IS_SF_WR, NONE) // write [stk], read ST(n)
+//////(SRW_TRD, IS_FP_STK|IS_SF_RW, NONE) // r/w [stk], read ST(n)
+
+IF_DEF(TRD_MRD, IS_FP_STK|IS_GM_RD, NONE) // read ST(0), read [mem]
+IF_DEF(TWR_MRD, IS_FP_STK|IS_GM_RD, NONE) // write ST(0), read [mem]
+IF_DEF(TRW_MRD, IS_FP_STK|IS_GM_RD, NONE) // r/w ST(0), read [mem]
+
+//////(MRD_TRD, IS_FP_STK|IS_GM_RD, NONE) // read [mem], read ST(n)
+IF_DEF(MWR_TRD, IS_FP_STK|IS_GM_WR, NONE) // write [mem], read ST(n)
+//////(MRW_TRD, IS_FP_STK|IS_GM_RW, NONE) // r/w [mem], read ST(n)
+
+IF_DEF(TRD_ARD, IS_FP_STK|IS_AM_RD, AMD ) // read ST(0), read [adr]
+IF_DEF(TWR_ARD, IS_FP_STK|IS_AM_RD, AMD ) // write ST(0), read [adr]
+IF_DEF(TRW_ARD, IS_FP_STK|IS_AM_RD, AMD ) // r/w ST(0), read [adr]
+
+//////(ARD_TRD, IS_FP_STK|IS_AM_RD, AMD ) // read [adr], read ST(n)
+IF_DEF(AWR_TRD, IS_FP_STK|IS_AM_WR, AMD ) // write [adr], read ST(n)
+//////(ARW_TRD, IS_FP_STK|IS_AM_RW, AMD ) // r/w [adr], read ST(n)
+
+#endif // FEATURE_STACK_FP_X87
+
+//////////////////////////////////////////////////////////////////////////////
+
+#undef IF_DEF
+
+//////////////////////////////////////////////////////////////////////////////
+#endif // DEFINE_IS_OPS
+#endif // DEFINE_ID_OPS
+//////////////////////////////////////////////////////////////////////////////
+// clang-format on
diff --git a/src/jit/emitinl.h b/src/jit/emitinl.h
new file mode 100644
index 0000000000..302b8ea448
--- /dev/null
+++ b/src/jit/emitinl.h
@@ -0,0 +1,508 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+/*****************************************************************************/
+
+#ifndef _EMITINL_H_
+#define _EMITINL_H_
+/*****************************************************************************/
+/*****************************************************************************
+ *
+ * Return the number of bytes of machine code the given instruction will
+ * produce.
+ */
+
+inline UNATIVE_OFFSET emitter::emitInstCodeSz(instrDesc* id)
+{
+ return id->idCodeSize();
+}
+
+inline UNATIVE_OFFSET emitter::emitSizeOfJump(instrDescJmp* jmp)
+{
+ return jmp->idCodeSize();
+}
+
+#ifdef _TARGET_XARCH_
+
+/* static */
+inline bool emitter::instrIs3opImul(instruction ins)
+{
+#ifdef _TARGET_X86_
+ return ((ins >= INS_imul_AX) && (ins <= INS_imul_DI));
+#else // _TARGET_AMD64
+ return ((ins >= INS_imul_AX) && (ins <= INS_imul_15));
+#endif
+}
+
+/* static */
+inline bool emitter::instrIsExtendedReg3opImul(instruction ins)
+{
+#ifdef _TARGET_X86_
+ return false;
+#else // _TARGET_AMD64
+ return ((ins >= INS_imul_08) && (ins <= INS_imul_15));
+#endif
+}
+
+/* static */
+inline bool emitter::instrHasImplicitRegPairDest(instruction ins)
+{
+ return (ins == INS_mulEAX) || (ins == INS_imulEAX) || (ins == INS_div) || (ins == INS_idiv);
+}
+
+// Because we don't actually have support for encoding these 3-op
+// multiplies we fake it with special opcodes. Make sure they are
+// contiguous.
+/* static */
+inline void emitter::check3opImulValues()
+{
+ assert(INS_imul_AX - INS_imul_AX == REG_EAX);
+ assert(INS_imul_BX - INS_imul_AX == REG_EBX);
+ assert(INS_imul_CX - INS_imul_AX == REG_ECX);
+ assert(INS_imul_DX - INS_imul_AX == REG_EDX);
+ assert(INS_imul_BP - INS_imul_AX == REG_EBP);
+ assert(INS_imul_SI - INS_imul_AX == REG_ESI);
+ assert(INS_imul_DI - INS_imul_AX == REG_EDI);
+#ifdef _TARGET_AMD64_
+ assert(INS_imul_08 - INS_imul_AX == REG_R8);
+ assert(INS_imul_09 - INS_imul_AX == REG_R9);
+ assert(INS_imul_10 - INS_imul_AX == REG_R10);
+ assert(INS_imul_11 - INS_imul_AX == REG_R11);
+ assert(INS_imul_12 - INS_imul_AX == REG_R12);
+ assert(INS_imul_13 - INS_imul_AX == REG_R13);
+ assert(INS_imul_14 - INS_imul_AX == REG_R14);
+ assert(INS_imul_15 - INS_imul_AX == REG_R15);
+#endif
+}
+
+/*****************************************************************************
+ *
+ * Return the instruction that uses the given register in the imul instruction
+ */
+
+/* static */
+inline instruction emitter::inst3opImulForReg(regNumber reg)
+{
+ assert(genIsValidIntReg(reg));
+
+ instruction ins = instruction(reg + INS_imul_AX);
+ check3opImulValues();
+ assert(instrIs3opImul(ins));
+
+ return ins;
+}
+
+/*****************************************************************************
+ *
+ * Return the register which is used implicitly by the IMUL_REG instruction
+ */
+
+/* static */
+inline regNumber emitter::inst3opImulReg(instruction ins)
+{
+ regNumber reg = ((regNumber)(ins - INS_imul_AX));
+
+ assert(genIsValidIntReg(reg));
+
+ /* Make sure we return the appropriate register */
+
+ check3opImulValues();
+
+ return reg;
+}
+#endif
+
+/*****************************************************************************
+ *
+ * The following helpers should be used to access the various values that
+ * get stored in different places within the instruction descriptor.
+ */
+
+inline ssize_t emitter::emitGetInsAmd(instrDesc* id)
+{
+ return id->idIsLargeDsp() ? ((instrDescAmd*)id)->idaAmdVal : id->idAddr()->iiaAddrMode.amDisp;
+}
+
+inline int emitter::emitGetInsCDinfo(instrDesc* id)
+{
+ if (id->idIsLargeCall())
+ {
+ return ((instrDescCGCA*)id)->idcArgCnt;
+ }
+ else
+ {
+ assert(!id->idIsLargeDsp());
+ assert(!id->idIsLargeCns());
+ ssize_t cns = emitGetInsCns(id);
+
+ // We only encode 32-bit ints, so this is safe
+ noway_assert((int)cns == cns);
+
+ return (int)cns;
+ }
+}
+
+inline void emitter::emitGetInsCns(instrDesc* id, CnsVal* cv)
+{
+#ifdef RELOC_SUPPORT
+ cv->cnsReloc = id->idIsCnsReloc();
+#endif
+ if (id->idIsLargeCns())
+ {
+ cv->cnsVal = ((instrDescCns*)id)->idcCnsVal;
+ }
+ else
+ {
+ cv->cnsVal = id->idSmallCns();
+ }
+}
+
+inline ssize_t emitter::emitGetInsAmdCns(instrDesc* id, CnsVal* cv)
+{
+#ifdef RELOC_SUPPORT
+ cv->cnsReloc = id->idIsCnsReloc();
+#endif
+ if (id->idIsLargeDsp())
+ {
+ if (id->idIsLargeCns())
+ {
+ cv->cnsVal = ((instrDescCnsAmd*)id)->idacCnsVal;
+ return ((instrDescCnsAmd*)id)->idacAmdVal;
+ }
+ else
+ {
+ cv->cnsVal = id->idSmallCns();
+ return ((instrDescAmd*)id)->idaAmdVal;
+ }
+ }
+ else
+ {
+ if (id->idIsLargeCns())
+ {
+ cv->cnsVal = ((instrDescCns*)id)->idcCnsVal;
+ }
+ else
+ {
+ cv->cnsVal = id->idSmallCns();
+ }
+
+ return id->idAddr()->iiaAddrMode.amDisp;
+ }
+}
+
+inline void emitter::emitGetInsDcmCns(instrDesc* id, CnsVal* cv)
+{
+#ifdef RELOC_SUPPORT
+ cv->cnsReloc = id->idIsCnsReloc();
+#endif
+ if (id->idIsLargeCns())
+ {
+ if (id->idIsLargeDsp())
+ {
+ cv->cnsVal = ((instrDescCnsDsp*)id)->iddcCnsVal;
+ }
+ else
+ {
+ cv->cnsVal = ((instrDescCns*)id)->idcCnsVal;
+ }
+ }
+ else
+ {
+ cv->cnsVal = id->idSmallCns();
+ }
+}
+
+inline ssize_t emitter::emitGetInsAmdAny(instrDesc* id)
+{
+ if (id->idIsLargeDsp())
+ {
+ if (id->idIsLargeCns())
+ {
+ return ((instrDescCnsAmd*)id)->idacAmdVal;
+ }
+ return ((instrDescAmd*)id)->idaAmdVal;
+ }
+
+ return id->idAddr()->iiaAddrMode.amDisp;
+}
+
+/*****************************************************************************
+ *
+ * Convert between a register mask and a smaller version for storage.
+ */
+
+/*static*/ inline void emitter::emitEncodeCallGCregs(regMaskTP regmask, instrDesc* id)
+{
+ assert((regmask & RBM_CALLEE_TRASH) == 0);
+
+ unsigned encodeMask;
+
+#ifdef _TARGET_X86_
+ assert(REGNUM_BITS >= 3);
+ encodeMask = 0;
+
+ if ((regmask & RBM_ESI) != RBM_NONE)
+ encodeMask |= 0x01;
+ if ((regmask & RBM_EDI) != RBM_NONE)
+ encodeMask |= 0x02;
+ if ((regmask & RBM_EBX) != RBM_NONE)
+ encodeMask |= 0x04;
+
+ id->idReg1((regNumber)encodeMask); // Save in idReg1
+
+#elif defined(_TARGET_AMD64_)
+ assert(REGNUM_BITS >= 4);
+ encodeMask = 0;
+
+ if ((regmask & RBM_RSI) != RBM_NONE)
+ {
+ encodeMask |= 0x01;
+ }
+ if ((regmask & RBM_RDI) != RBM_NONE)
+ {
+ encodeMask |= 0x02;
+ }
+ if ((regmask & RBM_RBX) != RBM_NONE)
+ {
+ encodeMask |= 0x04;
+ }
+ if ((regmask & RBM_RBP) != RBM_NONE)
+ {
+ encodeMask |= 0x08;
+ }
+
+ id->idReg1((regNumber)encodeMask); // Save in idReg1
+
+ encodeMask = 0;
+
+ if ((regmask & RBM_R12) != RBM_NONE)
+ {
+ encodeMask |= 0x01;
+ }
+ if ((regmask & RBM_R13) != RBM_NONE)
+ {
+ encodeMask |= 0x02;
+ }
+ if ((regmask & RBM_R14) != RBM_NONE)
+ {
+ encodeMask |= 0x04;
+ }
+ if ((regmask & RBM_R15) != RBM_NONE)
+ {
+ encodeMask |= 0x08;
+ }
+
+ id->idReg2((regNumber)encodeMask); // Save in idReg2
+
+#elif defined(_TARGET_ARM_)
+ assert(REGNUM_BITS >= 4);
+ encodeMask = 0;
+
+ if ((regmask & RBM_R4) != RBM_NONE)
+ encodeMask |= 0x01;
+ if ((regmask & RBM_R5) != RBM_NONE)
+ encodeMask |= 0x02;
+ if ((regmask & RBM_R6) != RBM_NONE)
+ encodeMask |= 0x04;
+ if ((regmask & RBM_R7) != RBM_NONE)
+ encodeMask |= 0x08;
+
+ id->idReg1((regNumber)encodeMask); // Save in idReg1
+
+ encodeMask = 0;
+
+ if ((regmask & RBM_R8) != RBM_NONE)
+ encodeMask |= 0x01;
+ if ((regmask & RBM_R9) != RBM_NONE)
+ encodeMask |= 0x02;
+ if ((regmask & RBM_R10) != RBM_NONE)
+ encodeMask |= 0x04;
+ if ((regmask & RBM_R11) != RBM_NONE)
+ encodeMask |= 0x08;
+
+ id->idReg2((regNumber)encodeMask); // Save in idReg2
+
+#elif defined(_TARGET_ARM64_)
+ assert(REGNUM_BITS >= 5);
+ encodeMask = 0;
+
+ if ((regmask & RBM_R19) != RBM_NONE)
+ encodeMask |= 0x01;
+ if ((regmask & RBM_R20) != RBM_NONE)
+ encodeMask |= 0x02;
+ if ((regmask & RBM_R21) != RBM_NONE)
+ encodeMask |= 0x04;
+ if ((regmask & RBM_R22) != RBM_NONE)
+ encodeMask |= 0x08;
+ if ((regmask & RBM_R23) != RBM_NONE)
+ encodeMask |= 0x10;
+
+ id->idReg1((regNumber)encodeMask); // Save in idReg1
+
+ encodeMask = 0;
+
+ if ((regmask & RBM_R24) != RBM_NONE)
+ encodeMask |= 0x01;
+ if ((regmask & RBM_R25) != RBM_NONE)
+ encodeMask |= 0x02;
+ if ((regmask & RBM_R26) != RBM_NONE)
+ encodeMask |= 0x04;
+ if ((regmask & RBM_R27) != RBM_NONE)
+ encodeMask |= 0x08;
+ if ((regmask & RBM_R28) != RBM_NONE)
+ encodeMask |= 0x10;
+
+ id->idReg2((regNumber)encodeMask); // Save in idReg2
+
+#else
+ NYI("unknown target");
+#endif
+}
+
+/*static*/ inline unsigned emitter::emitDecodeCallGCregs(instrDesc* id)
+{
+ unsigned regmask = 0;
+ unsigned encodeMask;
+
+#ifdef _TARGET_X86_
+ assert(REGNUM_BITS >= 3);
+ encodeMask = id->idReg1();
+
+ if ((encodeMask & 0x01) != 0)
+ regmask |= RBM_ESI;
+ if ((encodeMask & 0x02) != 0)
+ regmask |= RBM_EDI;
+ if ((encodeMask & 0x04) != 0)
+ regmask |= RBM_EBX;
+#elif defined(_TARGET_AMD64_)
+ assert(REGNUM_BITS >= 4);
+ encodeMask = id->idReg1();
+
+ if ((encodeMask & 0x01) != 0)
+ {
+ regmask |= RBM_RSI;
+ }
+ if ((encodeMask & 0x02) != 0)
+ {
+ regmask |= RBM_RDI;
+ }
+ if ((encodeMask & 0x04) != 0)
+ {
+ regmask |= RBM_RBX;
+ }
+ if ((encodeMask & 0x08) != 0)
+ {
+ regmask |= RBM_RBP;
+ }
+
+ encodeMask = id->idReg2();
+
+ if ((encodeMask & 0x01) != 0)
+ {
+ regmask |= RBM_R12;
+ }
+ if ((encodeMask & 0x02) != 0)
+ {
+ regmask |= RBM_R13;
+ }
+ if ((encodeMask & 0x04) != 0)
+ {
+ regmask |= RBM_R14;
+ }
+ if ((encodeMask & 0x08) != 0)
+ {
+ regmask |= RBM_R15;
+ }
+
+#elif defined(_TARGET_ARM_)
+ assert(REGNUM_BITS >= 4);
+ encodeMask = id->idReg1();
+
+ if ((encodeMask & 0x01) != 0)
+ regmask |= RBM_R4;
+ if ((encodeMask & 0x02) != 0)
+ regmask |= RBM_R5;
+ if ((encodeMask & 0x04) != 0)
+ regmask |= RBM_R6;
+ if ((encodeMask & 0x08) != 0)
+ regmask |= RBM_R7;
+
+ encodeMask = id->idReg2();
+
+ if ((encodeMask & 0x01) != 0)
+ regmask |= RBM_R8;
+ if ((encodeMask & 0x02) != 0)
+ regmask |= RBM_R9;
+ if ((encodeMask & 0x04) != 0)
+ regmask |= RBM_R10;
+ if ((encodeMask & 0x08) != 0)
+ regmask |= RBM_R11;
+
+#elif defined(_TARGET_ARM64_)
+ assert(REGNUM_BITS >= 5);
+ encodeMask = id->idReg1();
+
+ if ((encodeMask & 0x01) != 0)
+ regmask |= RBM_R19;
+ if ((encodeMask & 0x02) != 0)
+ regmask |= RBM_R20;
+ if ((encodeMask & 0x04) != 0)
+ regmask |= RBM_R21;
+ if ((encodeMask & 0x08) != 0)
+ regmask |= RBM_R22;
+ if ((encodeMask & 0x10) != 0)
+ regmask |= RBM_R23;
+
+ encodeMask = id->idReg2();
+
+ if ((encodeMask & 0x01) != 0)
+ regmask |= RBM_R24;
+ if ((encodeMask & 0x02) != 0)
+ regmask |= RBM_R25;
+ if ((encodeMask & 0x04) != 0)
+ regmask |= RBM_R26;
+ if ((encodeMask & 0x08) != 0)
+ regmask |= RBM_R27;
+ if ((encodeMask & 0x10) != 0)
+ regmask |= RBM_R28;
+
+#else
+ NYI("unknown target");
+#endif
+
+ return regmask;
+}
+
+#ifdef _TARGET_XARCH_
+inline bool insIsCMOV(instruction ins)
+{
+ return ((ins >= INS_cmovo) && (ins <= INS_cmovg));
+}
+#endif
+
+/*****************************************************************************
+ *
+ * Call the specified function pointer for each insGroup in the current
+ * method that is marked IGF_NOGCINTERRUPT. Stops if the callback returns
+ * false. Returns the final result of the callback.
+ */
+template <typename Callback>
+bool emitter::emitGenNoGCLst(Callback& cb)
+{
+ for (insGroup* ig = emitIGlist; ig; ig = ig->igNext)
+ {
+ if (ig->igFlags & IGF_NOGCINTERRUPT)
+ {
+ if (!cb(ig->igFuncIdx, ig->igOffs, ig->igSize))
+ {
+ return false;
+ }
+ }
+ }
+
+ return true;
+}
+
+/*****************************************************************************/
+#endif //_EMITINL_H_
+/*****************************************************************************/
diff --git a/src/jit/emitjmps.h b/src/jit/emitjmps.h
new file mode 100644
index 0000000000..60815d13ea
--- /dev/null
+++ b/src/jit/emitjmps.h
@@ -0,0 +1,58 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+// clang-format off
+#ifndef JMP_SMALL
+#error Must define JMP_SMALL macro before including this file
+#endif
+
+#if defined(_TARGET_XARCH_)
+
+// jump reverse instruction
+JMP_SMALL(jmp , jmp , jmp )
+JMP_SMALL(jo , jno , jo )
+JMP_SMALL(jno , jo , jno )
+JMP_SMALL(jb , jae , jb )
+JMP_SMALL(jae , jb , jae )
+JMP_SMALL(je , jne , je )
+JMP_SMALL(jne , je , jne )
+JMP_SMALL(jbe , ja , jbe )
+JMP_SMALL(ja , jbe , ja )
+JMP_SMALL(js , jns , js )
+JMP_SMALL(jns , js , jns )
+JMP_SMALL(jpe , jpo , jpe )
+JMP_SMALL(jpo , jpe , jpo )
+JMP_SMALL(jl , jge , jl )
+JMP_SMALL(jge , jl , jge )
+JMP_SMALL(jle , jg , jle )
+JMP_SMALL(jg , jle , jg )
+
+#elif defined(_TARGET_ARMARCH_)
+
+// jump reverse instruction condcode
+JMP_SMALL(jmp , jmp , b ) // AL always
+JMP_SMALL(eq , ne , beq ) // EQ
+JMP_SMALL(ne , eq , bne ) // NE
+JMP_SMALL(hs , lo , bhs ) // HS also CS
+JMP_SMALL(lo , hs , blo ) // LO also CC
+JMP_SMALL(mi , pl , bmi ) // MI
+JMP_SMALL(pl , mi , bpl ) // PL
+JMP_SMALL(vs , vc , bvs ) // VS
+JMP_SMALL(vc , vs , bvc ) // VC
+JMP_SMALL(hi , ls , bhi ) // HI
+JMP_SMALL(ls , hi , bls ) // LS
+JMP_SMALL(ge , lt , bge ) // GE
+JMP_SMALL(lt , ge , blt ) // LT
+JMP_SMALL(gt , le , bgt ) // GT
+JMP_SMALL(le , gt , ble ) // LE
+
+#else
+ #error Unsupported or unset target architecture
+#endif // target type
+
+/*****************************************************************************/
+#undef JMP_SMALL
+/*****************************************************************************/
+
+// clang-format on
diff --git a/src/jit/emitpub.h b/src/jit/emitpub.h
new file mode 100644
index 0000000000..a2f041a5f3
--- /dev/null
+++ b/src/jit/emitpub.h
@@ -0,0 +1,162 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/************************************************************************/
+/* Overall emitter control (including startup and shutdown) */
+/************************************************************************/
+
+static void emitInit();
+static void emitDone();
+
+void emitBegCG(Compiler* comp, COMP_HANDLE cmpHandle);
+void emitEndCG();
+
+void emitBegFN(bool hasFramePtr
+#if defined(DEBUG)
+ ,
+ bool checkAlign
+#endif
+#ifdef LEGACY_BACKEND
+ ,
+ unsigned lclSize
+#endif // LEGACY_BACKEND
+ ,
+ unsigned maxTmpSize);
+
+void emitEndFN();
+
+void emitComputeCodeSizes();
+
+unsigned emitEndCodeGen(Compiler* comp,
+ bool contTrkPtrLcls,
+ bool fullyInt,
+ bool fullPtrMap,
+ bool returnsGCr,
+ unsigned xcptnsCount,
+ unsigned* prologSize,
+ unsigned* epilogSize,
+ void** codeAddr,
+ void** coldCodeAddr,
+ void** consAddr);
+
+/************************************************************************/
+/* Method prolog and epilog */
+/************************************************************************/
+
+unsigned emitGetEpilogCnt();
+
+template <typename Callback>
+bool emitGenNoGCLst(Callback& cb);
+
+void emitBegProlog();
+unsigned emitGetPrologOffsetEstimate();
+void emitMarkPrologEnd();
+void emitEndProlog();
+
+void emitCreatePlaceholderIG(insGroupPlaceholderType igType,
+ BasicBlock* igBB,
+ VARSET_VALARG_TP GCvars,
+ regMaskTP gcrefRegs,
+ regMaskTP byrefRegs,
+ bool last);
+
+void emitGeneratePrologEpilog();
+void emitStartPrologEpilogGeneration();
+void emitFinishPrologEpilogGeneration();
+
+/************************************************************************/
+/* Record a code position and later convert it to offset */
+/************************************************************************/
+
+void* emitCurBlock();
+unsigned emitCurOffset();
+
+UNATIVE_OFFSET emitCodeOffset(void* blockPtr, unsigned codeOffs);
+
+#ifdef DEBUG
+const char* emitOffsetToLabel(unsigned offs);
+#endif // DEBUG
+
+/************************************************************************/
+/* Output target-independent instructions */
+/************************************************************************/
+
+void emitIns_J(instruction ins, BasicBlock* dst, int instrCount = 0);
+
+/************************************************************************/
+/* Emit initialized data sections */
+/************************************************************************/
+
+UNATIVE_OFFSET emitDataGenBeg(UNATIVE_OFFSET size, bool dblAlign, bool codeLtab);
+
+UNATIVE_OFFSET emitBBTableDataGenBeg(unsigned numEntries, bool relativeAddr);
+
+void emitDataGenData(unsigned offs, const void* data, size_t size);
+
+void emitDataGenData(unsigned offs, BasicBlock* label);
+
+void emitDataGenEnd();
+
+UNATIVE_OFFSET emitDataConst(const void* cnsAddr, unsigned cnsSize, bool dblAlign);
+
+UNATIVE_OFFSET emitDataSize();
+
+/************************************************************************/
+/* Instruction information */
+/************************************************************************/
+
+#ifdef _TARGET_XARCH_
+static bool instrIs3opImul(instruction ins);
+static bool instrIsExtendedReg3opImul(instruction ins);
+static bool instrHasImplicitRegPairDest(instruction ins);
+static void check3opImulValues();
+static regNumber inst3opImulReg(instruction ins);
+static instruction inst3opImulForReg(regNumber reg);
+#endif
+
+/************************************************************************/
+/* Emit PDB offset translation information */
+/************************************************************************/
+
+#ifdef TRANSLATE_PDB
+
+static void SetILBaseOfCode(BYTE* pTextBase);
+static void SetILMethodBase(BYTE* pMethodEntry);
+static void SetILMethodStart(BYTE* pMethodCode);
+static void SetImgBaseOfCode(BYTE* pTextBase);
+
+void SetIDBaseToProlog();
+void SetIDBaseToOffset(int methodOffset);
+
+static void DisablePDBTranslation();
+static bool IsPDBEnabled();
+
+static void InitTranslationMaps(int ilCodeSize);
+static void DeleteTranslationMaps();
+static void InitTranslator(PDBRewriter* pPDB, int* rgSecMap, IMAGE_SECTION_HEADER** rgpHeader, int numSections);
+#endif
+
+/************************************************************************/
+/* Interface for generating unwind information */
+/************************************************************************/
+
+#ifdef _TARGET_ARMARCH_
+
+bool emitIsFuncEnd(emitLocation* emitLoc, emitLocation* emitLocNextFragment = NULL);
+
+void emitSplit(emitLocation* startLoc,
+ emitLocation* endLoc,
+ UNATIVE_OFFSET maxSplitSize,
+ void* context,
+ emitSplitCallbackType callbackFunc);
+
+void emitUnwindNopPadding(emitLocation* locFrom, Compiler* comp);
+
+#endif // _TARGET_ARMARCH_
+
+#if defined(_TARGET_ARM_)
+
+unsigned emitGetInstructionSize(emitLocation* emitLoc);
+
+#endif // defined(_TARGET_ARM_)
diff --git a/src/jit/emitxarch.cpp b/src/jit/emitxarch.cpp
new file mode 100644
index 0000000000..d43f766ee8
--- /dev/null
+++ b/src/jit/emitxarch.cpp
@@ -0,0 +1,11398 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX emitX86.cpp XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+#if defined(_TARGET_XARCH_)
+
+/*****************************************************************************/
+/*****************************************************************************/
+
+#include "instr.h"
+#include "emit.h"
+#include "codegen.h"
+
+bool IsSSE2Instruction(instruction ins)
+{
+ return (ins >= INS_FIRST_SSE2_INSTRUCTION && ins <= INS_LAST_SSE2_INSTRUCTION);
+}
+
+bool IsSSEOrAVXInstruction(instruction ins)
+{
+#ifdef FEATURE_AVX_SUPPORT
+ return (ins >= INS_FIRST_SSE2_INSTRUCTION && ins <= INS_LAST_AVX_INSTRUCTION);
+#else // !FEATURE_AVX_SUPPORT
+ return IsSSE2Instruction(ins);
+#endif // !FEATURE_AVX_SUPPORT
+}
+
+bool emitter::IsAVXInstruction(instruction ins)
+{
+#ifdef FEATURE_AVX_SUPPORT
+ return (UseAVX() && IsSSEOrAVXInstruction(ins));
+#else
+ return false;
+#endif
+}
+
+#define REX_PREFIX_MASK 0xFF00000000LL
+
+#ifdef FEATURE_AVX_SUPPORT
+// Returns true if the AVX instruction is a binary operator that requires 3 operands.
+// When we emit an instruction with only two operands, we will duplicate the destination
+// as a source.
+// TODO-XArch-Cleanup: This is a temporary solution for now. Eventually this needs to
+// be formalized by adding an additional field to instruction table to
+// to indicate whether a 3-operand instruction.
+bool emitter::IsThreeOperandBinaryAVXInstruction(instruction ins)
+{
+ return IsAVXInstruction(ins) &&
+ (ins == INS_cvtsi2ss || ins == INS_cvtsi2sd || ins == INS_cvtss2sd || ins == INS_cvtsd2ss ||
+ ins == INS_addss || ins == INS_addsd || ins == INS_subss || ins == INS_subsd || ins == INS_mulss ||
+ ins == INS_mulsd || ins == INS_divss || ins == INS_divsd || ins == INS_addps || ins == INS_addpd ||
+ ins == INS_subps || ins == INS_subpd || ins == INS_mulps || ins == INS_mulpd || ins == INS_cmpps ||
+ ins == INS_cmppd || ins == INS_andps || ins == INS_andpd || ins == INS_orps || ins == INS_orpd ||
+ ins == INS_xorps || ins == INS_xorpd || ins == INS_dpps || ins == INS_dppd || ins == INS_haddpd ||
+ ins == INS_por || ins == INS_pand || ins == INS_pandn || ins == INS_pcmpeqd || ins == INS_pcmpgtd ||
+ ins == INS_pcmpeqw || ins == INS_pcmpgtw || ins == INS_pcmpeqb || ins == INS_pcmpgtb ||
+ ins == INS_pcmpeqq || ins == INS_pcmpgtq || ins == INS_pmulld || ins == INS_pmullw ||
+
+ ins == INS_shufps || ins == INS_shufpd || ins == INS_minps || ins == INS_minss || ins == INS_minpd ||
+ ins == INS_minsd || ins == INS_divps || ins == INS_divpd || ins == INS_maxps || ins == INS_maxpd ||
+ ins == INS_maxss || ins == INS_maxsd || ins == INS_andnps || ins == INS_andnpd || ins == INS_paddb ||
+ ins == INS_paddw || ins == INS_paddd || ins == INS_paddq || ins == INS_psubb || ins == INS_psubw ||
+ ins == INS_psubd || ins == INS_psubq || ins == INS_pmuludq || ins == INS_pxor || ins == INS_pmaxub ||
+ ins == INS_pminub || ins == INS_pmaxsw || ins == INS_pminsw || ins == INS_insertps || ins == INS_vinsertf128 ||
+ ins == INS_punpckldq
+
+ );
+}
+
+// Returns true if the AVX instruction is a move operator that requires 3 operands.
+// When we emit an instruction with only two operands, we will duplicate the source
+// register in the vvvv field. This is because these merge sources into the dest.
+// TODO-XArch-Cleanup: This is a temporary solution for now. Eventually this needs to
+// be formalized by adding an additional field to instruction table to
+// to indicate whether a 3-operand instruction.
+bool emitter::IsThreeOperandMoveAVXInstruction(instruction ins)
+{
+ return IsAVXInstruction(ins) &&
+ (ins == INS_movlpd || ins == INS_movlps || ins == INS_movhpd || ins == INS_movhps || ins == INS_movss);
+}
+#endif // FEATURE_AVX_SUPPORT
+
+// Returns true if the AVX instruction is a 4-byte opcode.
+// Note that this should be true for any of the instructions in instrsXArch.h
+// that use the SSE38 or SSE3A macro.
+// TODO-XArch-Cleanup: This is a temporary solution for now. Eventually this
+// needs to be addressed by expanding instruction encodings.
+bool Is4ByteAVXInstruction(instruction ins)
+{
+#ifdef FEATURE_AVX_SUPPORT
+ return (ins == INS_dpps || ins == INS_dppd || ins == INS_insertps || ins == INS_pcmpeqq || ins == INS_pcmpgtq ||
+ ins == INS_vbroadcastss || ins == INS_vbroadcastsd || ins == INS_vpbroadcastb || ins == INS_vpbroadcastw ||
+ ins == INS_vpbroadcastd || ins == INS_vpbroadcastq || ins == INS_vextractf128 || ins == INS_vinsertf128 ||
+ ins == INS_pmulld);
+#else
+ return false;
+#endif
+}
+
+#ifdef FEATURE_AVX_SUPPORT
+// Returns true if this instruction requires a VEX prefix
+// All AVX instructions require a VEX prefix
+bool emitter::TakesVexPrefix(instruction ins)
+{
+ // special case vzeroupper as it requires 2-byte VEX prefix
+ if (ins == INS_vzeroupper)
+ {
+ return false;
+ }
+
+ return IsAVXInstruction(ins);
+}
+
+// Add base VEX prefix without setting W, R, X, or B bits
+// L bit will be set based on emitter attr.
+//
+// 3-byte VEX prefix = C4 <R,X,B,m-mmmm> <W,vvvv,L,pp>
+// - R, X, B, W - bits to express corresponding REX prefixes
+// - m-mmmmm (5-bit)
+// 0-00001 - implied leading 0F opcode byte
+// 0-00010 - implied leading 0F 38 opcode bytes
+// 0-00011 - implied leading 0F 3A opcode bytes
+// Rest - reserved for future use and usage of them will uresult in Undefined instruction exception
+//
+// - vvvv (4-bits) - register specifier in 1's complement form; must be 1111 if unused
+// - L - scalar or AVX-128 bit operations (L=0), 256-bit operations (L=1)
+// - pp (2-bits) - opcode extension providing equivalent functionality of a SIMD size prefix
+// these prefixes are treated mandatory when used with escape opcode 0Fh for
+// some SIMD instructions
+// 00 - None (0F - packed float)
+// 01 - 66 (66 0F - packed double)
+// 10 - F3 (F3 0F - scalar float
+// 11 - F2 (F2 0F - scalar double)
+//
+// TODO-AMD64-CQ: for simplicity of implementation this routine always adds 3-byte VEX
+// prefix. Based on 'attr' param we could add 2-byte VEX prefix in case of scalar
+// and AVX-128 bit operations.
+#define DEFAULT_3BYTE_VEX_PREFIX 0xC4E07800000000ULL
+#define LBIT_IN_3BYTE_VEX_PREFIX 0X00000400000000ULL
+size_t emitter::AddVexPrefix(instruction ins, size_t code, emitAttr attr)
+{
+ // Only AVX instructions require VEX prefix
+ assert(IsAVXInstruction(ins));
+
+ // Shouldn't have already added Vex prefix
+ assert(!hasVexPrefix(code));
+
+ // Set L bit to 1 in case of instructions that operate on 256-bits.
+ code |= DEFAULT_3BYTE_VEX_PREFIX;
+ if (attr == EA_32BYTE)
+ {
+ code |= LBIT_IN_3BYTE_VEX_PREFIX;
+ }
+
+ return code;
+}
+#endif // FEATURE_AVX_SUPPORT
+
+// Returns true if this instruction, for the given EA_SIZE(attr), will require a REX.W prefix
+bool TakesRexWPrefix(instruction ins, emitAttr attr)
+{
+#ifdef _TARGET_AMD64_
+ // movsx should always sign extend out to 8 bytes just because we don't track
+ // whether the dest should be 4 bytes or 8 bytes (attr indicates the size
+ // of the source, not the dest).
+ // A 4-byte movzx is equivalent to an 8 byte movzx, so it is not special
+ // cased here.
+ //
+ // Rex_jmp = jmp with rex prefix always requires rex.w prefix.
+ if (ins == INS_movsx || ins == INS_rex_jmp)
+ {
+ return true;
+ }
+
+ if (EA_SIZE(attr) != EA_8BYTE)
+ {
+ return false;
+ }
+
+ if (IsSSEOrAVXInstruction(ins))
+ {
+ if (ins == INS_cvttsd2si || ins == INS_cvttss2si || ins == INS_cvtsd2si || ins == INS_cvtss2si ||
+ ins == INS_cvtsi2sd || ins == INS_cvtsi2ss || ins == INS_mov_xmm2i || ins == INS_mov_i2xmm)
+ {
+ return true;
+ }
+
+ return false;
+ }
+
+ // TODO-XArch-Cleanup: Better way to not emit REX.W when we don't need it, than just testing all these
+ // opcodes...
+ // These are all the instructions that default to 8-byte operand without the REX.W bit
+ // With 1 special case: movzx because the 4 byte version still zeros-out the hi 4 bytes
+ // so we never need it
+ if ((ins != INS_push) && (ins != INS_pop) && (ins != INS_movq) && (ins != INS_movzx) && (ins != INS_push_hide) &&
+ (ins != INS_pop_hide) && (ins != INS_ret) && (ins != INS_call) && !((ins >= INS_i_jmp) && (ins <= INS_l_jg)))
+ {
+ return true;
+ }
+ else
+ {
+ return false;
+ }
+#else //!_TARGET_AMD64 = _TARGET_X86_
+ return false;
+#endif //!_TARGET_AMD64_
+}
+
+// Returns true if using this register will require a REX.* prefix.
+// Since XMM registers overlap with YMM registers, this routine
+// can also be used to know whether a YMM register if the
+// instruction in question is AVX.
+bool IsExtendedReg(regNumber reg)
+{
+#ifdef _TARGET_AMD64_
+ return ((reg >= REG_R8) && (reg <= REG_R15)) || ((reg >= REG_XMM8) && (reg <= REG_XMM15));
+#else
+ // X86 JIT operates in 32-bit mode and hence extended reg are not available.
+ return false;
+#endif
+}
+
+// Returns true if using this register, for the given EA_SIZE(attr), will require a REX.* prefix
+bool IsExtendedReg(regNumber reg, emitAttr attr)
+{
+#ifdef _TARGET_AMD64_
+ // Not a register, so doesn't need a prefix
+ if (reg > REG_XMM15)
+ {
+ return false;
+ }
+
+ // Opcode field only has 3 bits for the register, these high registers
+ // need a 4th bit, that comes from the REX prefix (eiter REX.X, REX.R, or REX.B)
+ if (IsExtendedReg(reg))
+ {
+ return true;
+ }
+
+ if (EA_SIZE(attr) != EA_1BYTE)
+ {
+ return false;
+ }
+
+ // There are 12 one byte registers addressible 'below' r8b:
+ // al, cl, dl, bl, ah, ch, dh, bh, spl, bpl, sil, dil.
+ // The first 4 are always addressible, the last 8 are divided into 2 sets:
+ // ah, ch, dh, bh
+ // -- or --
+ // spl, bpl, sil, dil
+ // Both sets are encoded exactly the same, the difference is the presence
+ // of a REX prefix, even a REX prefix with no other bits set (0x40).
+ // So in order to get to the second set we need a REX prefix (but no bits).
+ //
+ // TODO-AMD64-CQ: if we ever want to start using the first set, we'll need a different way of
+ // encoding/tracking/encoding registers.
+ return (reg >= REG_RSP);
+#else
+ // X86 JIT operates in 32-bit mode and hence extended reg are not available.
+ return false;
+#endif
+}
+
+// Since XMM registers overlap with YMM registers, this routine
+// can also used to know whether a YMM register in case of AVX instructions.
+//
+// Legacy X86: we have XMM0-XMM7 available but this routine cannot be used to
+// determine whether a reg is XMM because they share the same reg numbers
+// with integer registers. Hence always return false.
+bool IsXMMReg(regNumber reg)
+{
+#ifndef LEGACY_BACKEND
+#ifdef _TARGET_AMD64_
+ return (reg >= REG_XMM0) && (reg <= REG_XMM15);
+#else // !_TARGET_AMD64_
+ return (reg >= REG_XMM0) && (reg <= REG_XMM7);
+#endif // !_TARGET_AMD64_
+#else // LEGACY_BACKEND
+ return false;
+#endif // LEGACY_BACKEND
+}
+
+// Returns bits to be encoded in instruction for the given register.
+regNumber RegEncoding(regNumber reg)
+{
+#ifndef LEGACY_BACKEND
+ // XMM registers do not share the same reg numbers as integer registers.
+ // But register encoding of integer and XMM registers is the same.
+ // Therefore, subtract XMMBASE from regNumber to get the register encoding
+ // in case of XMM registers.
+ return (regNumber)((IsXMMReg(reg) ? reg - XMMBASE : reg) & 0x7);
+#else // LEGACY_BACKEND
+ // Legacy X86: XMM registers share the same reg numbers as integer registers and
+ // hence nothing to do to get reg encoding.
+ return (regNumber)(reg & 0x7);
+#endif // LEGACY_BACKEND
+}
+
+// Utility routines that abstract the logic of adding REX.W, REX.R, REX.X, REX.B and REX prefixes
+// SSE2: separate 1-byte prefix gets added before opcode.
+// AVX: specific bits within VEX prefix need to be set in bit-inverted form.
+size_t emitter::AddRexWPrefix(instruction ins, size_t code)
+{
+#ifdef _TARGET_AMD64_
+ if (UseAVX() && IsAVXInstruction(ins))
+ {
+ // W-bit is available only in 3-byte VEX prefix that starts with byte C4.
+ assert(hasVexPrefix(code));
+
+ // W-bit is the only bit that is added in non bit-inverted form.
+ return code | 0x00008000000000ULL;
+ }
+
+ return code | 0x4800000000ULL;
+#else
+ assert(!"UNREACHED");
+ return code;
+#endif
+}
+
+#ifdef _TARGET_AMD64_
+
+size_t emitter::AddRexRPrefix(instruction ins, size_t code)
+{
+ if (UseAVX() && IsAVXInstruction(ins))
+ {
+ // Right now support 3-byte VEX prefix
+ assert(hasVexPrefix(code));
+
+ // R-bit is added in bit-inverted form.
+ return code & 0xFF7FFFFFFFFFFFULL;
+ }
+
+ return code | 0x4400000000ULL;
+}
+
+size_t emitter::AddRexXPrefix(instruction ins, size_t code)
+{
+ if (UseAVX() && IsAVXInstruction(ins))
+ {
+ // Right now support 3-byte VEX prefix
+ assert(hasVexPrefix(code));
+
+ // X-bit is added in bit-inverted form.
+ return code & 0xFFBFFFFFFFFFFFULL;
+ }
+
+ return code | 0x4200000000ULL;
+}
+
+size_t emitter::AddRexBPrefix(instruction ins, size_t code)
+{
+ if (UseAVX() && IsAVXInstruction(ins))
+ {
+ // Right now support 3-byte VEX prefix
+ assert(hasVexPrefix(code));
+
+ // B-bit is added in bit-inverted form.
+ return code & 0xFFDFFFFFFFFFFFULL;
+ }
+
+ return code | 0x4100000000ULL;
+}
+
+// Adds REX prefix (0x40) without W, R, X or B bits set
+size_t emitter::AddRexPrefix(instruction ins, size_t code)
+{
+ assert(!UseAVX() || !IsAVXInstruction(ins));
+ return code | 0x4000000000ULL;
+}
+
+bool isPrefix(BYTE b)
+{
+ assert(b != 0); // Caller should check this
+ assert(b != 0x67); // We don't use the address size prefix
+ assert(b != 0x65); // The GS segment override prefix is emitted separately
+ assert(b != 0x64); // The FS segment override prefix is emitted separately
+ assert(b != 0xF0); // The lock prefix is emitted separately
+ assert(b != 0x2E); // We don't use the CS segment override prefix
+ assert(b != 0x3E); // Or the DS segment override prefix
+ assert(b != 0x26); // Or the ES segment override prefix
+ assert(b != 0x36); // Or the SS segment override prefix
+
+ // That just leaves the size prefixes used in SSE opcodes:
+ // Scalar Double Scalar Single Packed Double
+ return ((b == 0xF2) || (b == 0xF3) || (b == 0x66));
+}
+
+#endif //_TARGET_AMD64_
+
+// Outputs VEX prefix (in case of AVX instructions) and REX.R/X/W/B otherwise.
+unsigned emitter::emitOutputRexOrVexPrefixIfNeeded(instruction ins, BYTE* dst, size_t& code)
+{
+#ifdef _TARGET_AMD64_ // TODO-x86: This needs to be enabled for AVX support on x86.
+ if (hasVexPrefix(code))
+ {
+ // Only AVX instructions should have a VEX prefix
+ assert(UseAVX() && IsAVXInstruction(ins));
+ size_t vexPrefix = (code >> 32) & 0x00FFFFFF;
+ code &= 0x00000000FFFFFFFFLL;
+
+ WORD leadingBytes = 0;
+ BYTE check = (code >> 24) & 0xFF;
+ if (check != 0)
+ {
+ // 3-byte opcode: with the bytes ordered as 0x2211RM33 or
+ // 4-byte opcode: with the bytes ordered as 0x22114433
+ // check for a prefix in the 11 position
+ BYTE sizePrefix = (code >> 16) & 0xFF;
+ if (sizePrefix != 0 && isPrefix(sizePrefix))
+ {
+ // 'pp' bits in byte2 of VEX prefix allows us to encode SIMD size prefixes as two bits
+ //
+ // 00 - None (0F - packed float)
+ // 01 - 66 (66 0F - packed double)
+ // 10 - F3 (F3 0F - scalar float
+ // 11 - F2 (F2 0F - scalar double)
+ switch (sizePrefix)
+ {
+ case 0x66:
+ vexPrefix |= 0x01;
+ break;
+ case 0xF3:
+ vexPrefix |= 0x02;
+ break;
+ case 0xF2:
+ vexPrefix |= 0x03;
+ break;
+ default:
+ assert(!"unrecognized SIMD size prefix");
+ unreached();
+ }
+
+ // Now the byte in the 22 position must be an escape byte 0F
+ leadingBytes = check;
+ assert(leadingBytes == 0x0F);
+
+ // Get rid of both sizePrefix and escape byte
+ code &= 0x0000FFFFLL;
+
+ // Check the byte in the 33 position to see if it is 3A or 38.
+ // In such a case escape bytes must be 0x0F3A or 0x0F38
+ check = code & 0xFF;
+ if (check == 0x3A || check == 0x38)
+ {
+ leadingBytes = (leadingBytes << 8) | check;
+ code &= 0x0000FF00LL;
+ }
+ }
+ }
+ else
+ {
+ // 2-byte opcode with the bytes ordered as 0x0011RM22
+ // the byte in position 11 must be an escape byte.
+ leadingBytes = (code >> 16) & 0xFF;
+ assert(leadingBytes == 0x0F || leadingBytes == 0x00);
+ code &= 0xFFFF;
+ }
+
+ // If there is an escape byte it must be 0x0F or 0x0F3A or 0x0F38
+ // m-mmmmm bits in byte 1 of VEX prefix allows us to encode these
+ // implied leading bytes
+ switch (leadingBytes)
+ {
+ case 0x00:
+ // there is no leading byte
+ break;
+ case 0x0F:
+ vexPrefix |= 0x0100;
+ break;
+ case 0x0F38:
+ vexPrefix |= 0x0200;
+ break;
+ case 0x0F3A:
+ vexPrefix |= 0x0300;
+ break;
+ default:
+ assert(!"encountered unknown leading bytes");
+ unreached();
+ }
+
+ // At this point
+ // VEX.2211RM33 got transformed as VEX.0000RM33
+ // VEX.0011RM22 got transformed as VEX.0000RM22
+ //
+ // Now output VEX prefix leaving the 4-byte opcode
+ emitOutputByte(dst, ((vexPrefix >> 16) & 0xFF));
+ emitOutputByte(dst + 1, ((vexPrefix >> 8) & 0xFF));
+ emitOutputByte(dst + 2, vexPrefix & 0xFF);
+ return 3;
+ }
+ else if (code > 0x00FFFFFFFFLL)
+ {
+ BYTE prefix = (code >> 32) & 0xFF;
+ noway_assert(prefix >= 0x40 && prefix <= 0x4F);
+ code &= 0x00000000FFFFFFFFLL;
+
+ // TODO-AMD64-Cleanup: when we remove the prefixes (just the SSE opcodes right now)
+ // we can remove this code as well
+
+ // The REX prefix is required to come after all other prefixes.
+ // Some of our 'opcodes' actually include some prefixes, if that
+ // is the case, shift them over and place the REX prefix after
+ // the other prefixes, and emit any prefix that got moved out.
+ BYTE check = (code >> 24) & 0xFF;
+ if (check == 0)
+ {
+ // 3-byte opcode: with the bytes ordered as 0x00113322
+ // check for a prefix in the 11 position
+ check = (code >> 16) & 0xFF;
+ if (check != 0 && isPrefix(check))
+ {
+ // Swap the rex prefix and whatever this prefix is
+ code = (((DWORD)prefix << 16) | (code & 0x0000FFFFLL));
+ // and then emit the other prefix
+ return emitOutputByte(dst, check);
+ }
+ }
+ else
+ {
+ // 4-byte opcode with the bytes ordered as 0x22114433
+ // first check for a prefix in the 11 position
+ BYTE check2 = (code >> 16) & 0xFF;
+ if (isPrefix(check2))
+ {
+ assert(!isPrefix(check)); // We currently don't use this, so it is untested
+ if (isPrefix(check))
+ {
+ // 3 prefixes were rex = rr, check = c1, check2 = c2 encoded as 0xrrc1c2XXXX
+ // Change to c2rrc1XXXX, and emit check2 now
+ code = (((size_t)prefix << 24) | ((size_t)check << 16) | (code & 0x0000FFFFLL));
+ }
+ else
+ {
+ // 2 prefixes were rex = rr, check2 = c2 encoded as 0xrrXXc2XXXX, (check is part of the opcode)
+ // Change to c2XXrrXXXX, and emit check2 now
+ code = (((size_t)check << 24) | ((size_t)prefix << 16) | (code & 0x0000FFFFLL));
+ }
+ return emitOutputByte(dst, check2);
+ }
+ }
+
+ return emitOutputByte(dst, prefix);
+ }
+#endif // _TARGET_AMD64_
+
+ return 0;
+}
+
+#ifdef _TARGET_AMD64_
+/*****************************************************************************
+ * Is the last instruction emitted a call instruction?
+ */
+bool emitter::emitIsLastInsCall()
+{
+ if ((emitLastIns != nullptr) && (emitLastIns->idIns() == INS_call))
+ {
+ return true;
+ }
+
+ return false;
+}
+
+/*****************************************************************************
+ * We're about to create an epilog. If the last instruction we output was a 'call',
+ * then we need to insert a NOP, to allow for proper exception-handling behavior.
+ */
+void emitter::emitOutputPreEpilogNOP()
+{
+ if (emitIsLastInsCall())
+ {
+ emitIns(INS_nop);
+ }
+}
+
+#endif //_TARGET_AMD64_
+
+// Size of rex prefix in bytes
+unsigned emitter::emitGetRexPrefixSize(instruction ins)
+{
+
+ // In case of AVX instructions, REX prefixes are part of VEX prefix.
+ // And hence requires no additional byte to encode REX prefixes.
+ if (IsAVXInstruction(ins))
+ {
+ return 0;
+ }
+
+ // If not AVX, then we would need 1-byte to encode REX prefix.
+ return 1;
+}
+
+// Size of vex prefix in bytes
+unsigned emitter::emitGetVexPrefixSize(instruction ins, emitAttr attr)
+{
+ // TODO-XArch-CQ: right now we default to 3-byte VEX prefix. There is a
+ // scope for size win by using 2-byte vex prefix for some of the
+ // scalar, avx-128 and most common avx-256 instructions.
+ if (IsAVXInstruction(ins))
+ {
+ return 3;
+ }
+
+ // If not AVX, then we don't need to encode vex prefix.
+ return 0;
+}
+
+// VEX prefix encodes some bytes of the opcode and as a result, overall size of the instruction reduces.
+// Therefore, to estimate the size adding VEX prefix size and size of instruction opcode bytes will always overstimate.
+// Instead this routine will adjust the size of VEX prefix based on the number of bytes of opcode it encodes so that
+// instruction size estimate will be accurate.
+// Basically this function will decrease the vexPrefixSize,
+// so that opcodeSize + vexPrefixAdjustedSize will be the right size.
+// rightOpcodeSize + vexPrefixSize
+//=(opcodeSize - ExtrabytesSize) + vexPrefixSize
+//=opcodeSize + (vexPrefixSize - ExtrabytesSize)
+//=opcodeSize + vexPrefixAdjustedSize
+unsigned emitter::emitGetVexPrefixAdjustedSize(instruction ins, emitAttr attr, size_t code)
+{
+#ifdef FEATURE_AVX_SUPPORT
+ if (IsAVXInstruction(ins))
+ {
+ unsigned vexPrefixAdjustedSize = emitGetVexPrefixSize(ins, attr);
+ // Currently vex prefix size is hard coded as 3 bytes,
+ // In future we should support 2 bytes vex prefix.
+ assert(vexPrefixAdjustedSize == 3);
+
+ // In this case, opcode will contains escape prefix at least one byte,
+ // vexPrefixAdjustedSize should be minus one.
+ vexPrefixAdjustedSize -= 1;
+
+ // Get the fourth byte in Opcode.
+ // If this byte is non-zero, then we should check whether the opcode contains SIMD prefix or not.
+ BYTE check = (code >> 24) & 0xFF;
+ if (check != 0)
+ {
+ // 3-byte opcode: with the bytes ordered as 0x2211RM33 or
+ // 4-byte opcode: with the bytes ordered as 0x22114433
+ // Simd prefix is at the first byte.
+ BYTE sizePrefix = (code >> 16) & 0xFF;
+ if (sizePrefix != 0 && isPrefix(sizePrefix))
+ {
+ vexPrefixAdjustedSize -= 1;
+ }
+
+ // If the opcode size is 4 bytes, then the second escape prefix is at fourth byte in opcode.
+ // But in this case the opcode has not counted R\M part.
+ // opcodeSize + VexPrefixAdjustedSize - ExtraEscapePrefixSize + ModR\MSize
+ //=opcodeSize + VexPrefixAdjustedSize -1 + 1
+ //=opcodeSize + VexPrefixAdjustedSize
+ // So although we may have second byte escape prefix, we won't decrease vexPrefixAjustedSize.
+ }
+
+ return vexPrefixAdjustedSize;
+ }
+#endif // FEATURE_AVX_SUPPORT
+
+ return 0;
+}
+
+// Get size of rex or vex prefix emitted in code
+unsigned emitter::emitGetPrefixSize(size_t code)
+{
+#ifdef FEATURE_AVX_SUPPORT
+ if (code & VEX_PREFIX_MASK_3BYTE)
+ {
+ return 3;
+ }
+ else
+#endif
+ if (code & REX_PREFIX_MASK)
+ {
+ return 1;
+ }
+
+ return 0;
+}
+
+#ifdef _TARGET_X86_
+/*****************************************************************************
+ *
+ * Record a non-empty stack
+ */
+
+void emitter::emitMarkStackLvl(unsigned stackLevel)
+{
+ assert(int(stackLevel) >= 0);
+ assert(emitCurStackLvl == 0);
+ assert(emitCurIG->igStkLvl == 0);
+ assert(emitCurIGfreeNext == emitCurIGfreeBase);
+
+ assert(stackLevel && stackLevel % sizeof(int) == 0);
+
+ emitCurStackLvl = emitCurIG->igStkLvl = stackLevel;
+
+ if (emitMaxStackDepth < emitCurStackLvl)
+ emitMaxStackDepth = emitCurStackLvl;
+}
+#endif
+
+/*****************************************************************************
+ *
+ * Get hold of the address mode displacement value for an indirect call.
+ */
+
+inline ssize_t emitter::emitGetInsCIdisp(instrDesc* id)
+{
+ if (id->idIsLargeCall())
+ {
+ return ((instrDescCGCA*)id)->idcDisp;
+ }
+ else
+ {
+ assert(!id->idIsLargeDsp());
+ assert(!id->idIsLargeCns());
+
+ return id->idAddr()->iiaAddrMode.amDisp;
+ }
+}
+
+/** ***************************************************************************
+ *
+ * The following table is used by the instIsFP()/instUse/DefFlags() helpers.
+ */
+
+#define INST_DEF_FL 0x20 // does the instruction set flags?
+#define INST_USE_FL 0x40 // does the instruction use flags?
+
+// clang-format off
+const BYTE CodeGenInterface::instInfo[] =
+{
+ #define INST0(id, nm, fp, um, rf, wf, mr ) (INST_USE_FL*rf|INST_DEF_FL*wf|INST_FP*fp),
+ #define INST1(id, nm, fp, um, rf, wf, mr ) (INST_USE_FL*rf|INST_DEF_FL*wf|INST_FP*fp),
+ #define INST2(id, nm, fp, um, rf, wf, mr, mi ) (INST_USE_FL*rf|INST_DEF_FL*wf|INST_FP*fp),
+ #define INST3(id, nm, fp, um, rf, wf, mr, mi, rm ) (INST_USE_FL*rf|INST_DEF_FL*wf|INST_FP*fp),
+ #define INST4(id, nm, fp, um, rf, wf, mr, mi, rm, a4 ) (INST_USE_FL*rf|INST_DEF_FL*wf|INST_FP*fp),
+ #define INST5(id, nm, fp, um, rf, wf, mr, mi, rm, a4, rr ) (INST_USE_FL*rf|INST_DEF_FL*wf|INST_FP*fp),
+ #include "instrs.h"
+ #undef INST0
+ #undef INST1
+ #undef INST2
+ #undef INST3
+ #undef INST4
+ #undef INST5
+};
+// clang-format on
+
+/*****************************************************************************
+ *
+ * Initialize the table used by emitInsModeFormat().
+ */
+
+// clang-format off
+const BYTE emitter::emitInsModeFmtTab[] =
+{
+ #define INST0(id, nm, fp, um, rf, wf, mr ) um,
+ #define INST1(id, nm, fp, um, rf, wf, mr ) um,
+ #define INST2(id, nm, fp, um, rf, wf, mr, mi ) um,
+ #define INST3(id, nm, fp, um, rf, wf, mr, mi, rm ) um,
+ #define INST4(id, nm, fp, um, rf, wf, mr, mi, rm, a4 ) um,
+ #define INST5(id, nm, fp, um, rf, wf, mr, mi, rm, a4, rr) um,
+ #include "instrs.h"
+ #undef INST0
+ #undef INST1
+ #undef INST2
+ #undef INST3
+ #undef INST4
+ #undef INST5
+};
+// clang-format on
+
+#ifdef DEBUG
+unsigned const emitter::emitInsModeFmtCnt = sizeof(emitInsModeFmtTab) / sizeof(emitInsModeFmtTab[0]);
+#endif
+
+/*****************************************************************************
+ *
+ * Combine the given base format with the update mode of the instuction.
+ */
+
+inline emitter::insFormat emitter::emitInsModeFormat(instruction ins, insFormat base)
+{
+ assert(IF_RRD + IUM_RD == IF_RRD);
+ assert(IF_RRD + IUM_WR == IF_RWR);
+ assert(IF_RRD + IUM_RW == IF_RRW);
+
+ return (insFormat)(base + emitInsUpdateMode(ins));
+}
+
+/*****************************************************************************
+ *
+ * A version of scInsModeFormat() that handles X87 floating-point instructions.
+ */
+
+#if FEATURE_STACK_FP_X87
+emitter::insFormat emitter::emitInsModeFormat(instruction ins, insFormat base, insFormat FPld, insFormat FPst)
+{
+ if (CodeGen::instIsFP(ins))
+ {
+ assert(IF_TRD_SRD + 1 == IF_TWR_SRD);
+ assert(IF_TRD_SRD + 2 == IF_TRW_SRD);
+
+ assert(IF_TRD_MRD + 1 == IF_TWR_MRD);
+ assert(IF_TRD_MRD + 2 == IF_TRW_MRD);
+
+ assert(IF_TRD_ARD + 1 == IF_TWR_ARD);
+ assert(IF_TRD_ARD + 2 == IF_TRW_ARD);
+
+ switch (ins)
+ {
+ case INS_fst:
+ case INS_fstp:
+ case INS_fistp:
+ case INS_fistpl:
+ return (insFormat)(FPst);
+
+ case INS_fld:
+ case INS_fild:
+ return (insFormat)(FPld + 1);
+
+ case INS_fcomp:
+ case INS_fcompp:
+ case INS_fcomip:
+ return (insFormat)(FPld);
+
+ default:
+ return (insFormat)(FPld + 2);
+ }
+ }
+ else
+ {
+ return emitInsModeFormat(ins, base);
+ }
+}
+#endif // FEATURE_STACK_FP_X87
+
+// This is a helper we need due to Vs Whidbey #254016 in order to distinguish
+// if we can not possibly be updating an integer register. This is not the best
+// solution, but the other ones (see bug) are going to be much more complicated.
+// The issue here is that on legacy x86, the XMM registers use the same register numbers
+// as the general purpose registers, so we need to distinguish them.
+// We really only need this for x86 where this issue exists.
+bool emitter::emitInsCanOnlyWriteSSE2OrAVXReg(instrDesc* id)
+{
+ instruction ins = id->idIns();
+
+ // The following SSE2 instructions write to a general purpose integer register.
+ if (!IsSSEOrAVXInstruction(ins) || ins == INS_mov_xmm2i || ins == INS_cvttsd2si
+#ifndef LEGACY_BACKEND
+ || ins == INS_cvttss2si || ins == INS_cvtsd2si || ins == INS_cvtss2si
+#endif // !LEGACY_BACKEND
+ )
+ {
+ return false;
+ }
+
+ return true;
+}
+
+/*****************************************************************************
+ *
+ * Returns the base encoding of the given CPU instruction.
+ */
+
+inline size_t insCode(instruction ins)
+{
+ // clang-format off
+ const static
+ size_t insCodes[] =
+ {
+ #define INST0(id, nm, fp, um, rf, wf, mr ) mr,
+ #define INST1(id, nm, fp, um, rf, wf, mr ) mr,
+ #define INST2(id, nm, fp, um, rf, wf, mr, mi ) mr,
+ #define INST3(id, nm, fp, um, rf, wf, mr, mi, rm ) mr,
+ #define INST4(id, nm, fp, um, rf, wf, mr, mi, rm, a4 ) mr,
+ #define INST5(id, nm, fp, um, rf, wf, mr, mi, rm, a4, rr) mr,
+ #include "instrs.h"
+ #undef INST0
+ #undef INST1
+ #undef INST2
+ #undef INST3
+ #undef INST4
+ #undef INST5
+ };
+ // clang-format on
+
+ assert((unsigned)ins < sizeof(insCodes) / sizeof(insCodes[0]));
+ assert((insCodes[ins] != BAD_CODE));
+
+ return insCodes[ins];
+}
+
+/*****************************************************************************
+ *
+ * Returns the "[r/m], 32-bit icon" encoding of the given CPU instruction.
+ */
+
+inline size_t insCodeMI(instruction ins)
+{
+ // clang-format off
+ const static
+ size_t insCodesMI[] =
+ {
+ #define INST0(id, nm, fp, um, rf, wf, mr )
+ #define INST1(id, nm, fp, um, rf, wf, mr )
+ #define INST2(id, nm, fp, um, rf, wf, mr, mi ) mi,
+ #define INST3(id, nm, fp, um, rf, wf, mr, mi, rm ) mi,
+ #define INST4(id, nm, fp, um, rf, wf, mr, mi, rm, a4 ) mi,
+ #define INST5(id, nm, fp, um, rf, wf, mr, mi, rm, a4, rr) mi,
+ #include "instrs.h"
+ #undef INST0
+ #undef INST1
+ #undef INST2
+ #undef INST3
+ #undef INST4
+ #undef INST5
+ };
+ // clang-format on
+
+ assert((unsigned)ins < sizeof(insCodesMI) / sizeof(insCodesMI[0]));
+ assert((insCodesMI[ins] != BAD_CODE));
+
+ return insCodesMI[ins];
+}
+
+/*****************************************************************************
+ *
+ * Returns the "reg, [r/m]" encoding of the given CPU instruction.
+ */
+
+inline size_t insCodeRM(instruction ins)
+{
+ // clang-format off
+ const static
+ size_t insCodesRM[] =
+ {
+ #define INST0(id, nm, fp, um, rf, wf, mr )
+ #define INST1(id, nm, fp, um, rf, wf, mr )
+ #define INST2(id, nm, fp, um, rf, wf, mr, mi )
+ #define INST3(id, nm, fp, um, rf, wf, mr, mi, rm ) rm,
+ #define INST4(id, nm, fp, um, rf, wf, mr, mi, rm, a4 ) rm,
+ #define INST5(id, nm, fp, um, rf, wf, mr, mi, rm, a4, rr) rm,
+ #include "instrs.h"
+ #undef INST0
+ #undef INST1
+ #undef INST2
+ #undef INST3
+ #undef INST4
+ #undef INST5
+ };
+ // clang-format on
+
+ assert((unsigned)ins < sizeof(insCodesRM) / sizeof(insCodesRM[0]));
+ assert((insCodesRM[ins] != BAD_CODE));
+
+ return insCodesRM[ins];
+}
+
+/*****************************************************************************
+ *
+ * Returns the "AL/AX/EAX, imm" accumulator encoding of the given instruction.
+ */
+
+inline size_t insCodeACC(instruction ins)
+{
+ // clang-format off
+ const static
+ size_t insCodesACC[] =
+ {
+ #define INST0(id, nm, fp, um, rf, wf, mr )
+ #define INST1(id, nm, fp, um, rf, wf, mr )
+ #define INST2(id, nm, fp, um, rf, wf, mr, mi )
+ #define INST3(id, nm, fp, um, rf, wf, mr, mi, rm )
+ #define INST4(id, nm, fp, um, rf, wf, mr, mi, rm, a4 ) a4,
+ #define INST5(id, nm, fp, um, rf, wf, mr, mi, rm, a4, rr) a4,
+ #include "instrs.h"
+ #undef INST0
+ #undef INST1
+ #undef INST2
+ #undef INST3
+ #undef INST4
+ #undef INST5
+ };
+ // clang-format on
+
+ assert((unsigned)ins < sizeof(insCodesACC) / sizeof(insCodesACC[0]));
+ assert((insCodesACC[ins] != BAD_CODE));
+
+ return insCodesACC[ins];
+}
+
+/*****************************************************************************
+ *
+ * Returns the "register" encoding of the given CPU instruction.
+ */
+
+inline size_t insCodeRR(instruction ins)
+{
+ // clang-format off
+ const static
+ size_t insCodesRR[] =
+ {
+ #define INST0(id, nm, fp, um, rf, wf, mr )
+ #define INST1(id, nm, fp, um, rf, wf, mr )
+ #define INST2(id, nm, fp, um, rf, wf, mr, mi )
+ #define INST3(id, nm, fp, um, rf, wf, mr, mi, rm )
+ #define INST4(id, nm, fp, um, rf, wf, mr, mi, rm, a4 )
+ #define INST5(id, nm, fp, um, rf, wf, mr, mi, rm, a4, rr) rr,
+ #include "instrs.h"
+ #undef INST0
+ #undef INST1
+ #undef INST2
+ #undef INST3
+ #undef INST4
+ #undef INST5
+ };
+ // clang-format on
+
+ assert((unsigned)ins < sizeof(insCodesRR) / sizeof(insCodesRR[0]));
+ assert((insCodesRR[ins] != BAD_CODE));
+
+ return insCodesRR[ins];
+}
+
+// clang-format off
+const static
+size_t insCodesMR[] =
+{
+ #define INST0(id, nm, fp, um, rf, wf, mr )
+ #define INST1(id, nm, fp, um, rf, wf, mr ) mr,
+ #define INST2(id, nm, fp, um, rf, wf, mr, mi ) mr,
+ #define INST3(id, nm, fp, um, rf, wf, mr, mi, rm ) mr,
+ #define INST4(id, nm, fp, um, rf, wf, mr, mi, rm, a4 ) mr,
+ #define INST5(id, nm, fp, um, rf, wf, mr, mi, rm, a4, rr) mr,
+ #include "instrs.h"
+ #undef INST0
+ #undef INST1
+ #undef INST2
+ #undef INST3
+ #undef INST4
+ #undef INST5
+};
+// clang-format on
+
+// Returns true iff the give CPU instruction has an MR encoding.
+inline size_t hasCodeMR(instruction ins)
+{
+ assert((unsigned)ins < sizeof(insCodesMR) / sizeof(insCodesMR[0]));
+ return ((insCodesMR[ins] != BAD_CODE));
+}
+
+/*****************************************************************************
+ *
+ * Returns the "[r/m], reg" or "[r/m]" encoding of the given CPU instruction.
+ */
+
+inline size_t insCodeMR(instruction ins)
+{
+ assert((unsigned)ins < sizeof(insCodesMR) / sizeof(insCodesMR[0]));
+ assert((insCodesMR[ins] != BAD_CODE));
+
+ return insCodesMR[ins];
+}
+
+/*****************************************************************************
+ *
+ * Returns an encoding for the specified register to be used in the bit0-2
+ * part of an opcode.
+ */
+
+inline unsigned emitter::insEncodeReg012(instruction ins, regNumber reg, emitAttr size, size_t* code)
+{
+ assert(reg < REG_STK);
+
+#ifndef LEGACY_BACKEND
+#ifdef _TARGET_AMD64_
+ // Either code is not NULL or reg is not an extended reg.
+ // If reg is an extended reg, instruction needs to be prefixed with 'REX'
+ // which would require code != NULL.
+ assert(code != nullptr || !IsExtendedReg(reg));
+
+ if (IsExtendedReg(reg))
+ {
+ *code = AddRexBPrefix(ins, *code); // REX.B
+ }
+ else if ((EA_SIZE(size) == EA_1BYTE) && (reg > REG_RBX) && (code != nullptr))
+ {
+ // We are assuming that we only use/encode SPL, BPL, SIL and DIL
+ // not the corresponding AH, CH, DH, or BH
+ *code = AddRexPrefix(ins, *code); // REX
+ }
+#endif // _TARGET_AMD64_
+
+ reg = RegEncoding(reg);
+ assert(reg < 8);
+ return reg;
+
+#else // LEGACY_BACKEND
+
+ assert(reg < 8);
+ return reg;
+
+#endif // LEGACY_BACKEND
+}
+
+/*****************************************************************************
+ *
+ * Returns an encoding for the specified register to be used in the bit3-5
+ * part of an opcode.
+ */
+
+inline unsigned emitter::insEncodeReg345(instruction ins, regNumber reg, emitAttr size, size_t* code)
+{
+ assert(reg < REG_STK);
+
+#ifndef LEGACY_BACKEND
+#ifdef _TARGET_AMD64_
+ // Either code is not NULL or reg is not an extended reg.
+ // If reg is an extended reg, instruction needs to be prefixed with 'REX'
+ // which would require code != NULL.
+ assert(code != nullptr || !IsExtendedReg(reg));
+
+ if (IsExtendedReg(reg))
+ {
+ *code = AddRexRPrefix(ins, *code); // REX.R
+ }
+ else if ((EA_SIZE(size) == EA_1BYTE) && (reg > REG_RBX) && (code != nullptr))
+ {
+ // We are assuming that we only use/encode SPL, BPL, SIL and DIL
+ // not the corresponding AH, CH, DH, or BH
+ *code = AddRexPrefix(ins, *code); // REX
+ }
+#endif // _TARGET_AMD64_
+
+ reg = RegEncoding(reg);
+ assert(reg < 8);
+ return (reg << 3);
+
+#else // LEGACY_BACKEND
+ assert(reg < 8);
+ return (reg << 3);
+#endif // LEGACY_BACKEND
+}
+
+/***********************************************************************************
+ *
+ * Returns modified AVX opcode with the specified register encoded in bits 3-6 of
+ * byte 2 of VEX prefix.
+ */
+inline size_t emitter::insEncodeReg3456(instruction ins, regNumber reg, emitAttr size, size_t code)
+{
+#ifdef FEATURE_AVX_SUPPORT
+ assert(reg < REG_STK);
+ assert(IsAVXInstruction(ins));
+ assert(hasVexPrefix(code));
+
+ // Get 4-bit register encoding
+ // RegEncoding() gives lower 3 bits
+ // IsExtendedReg() gives MSB.
+ size_t regBits = RegEncoding(reg);
+ if (IsExtendedReg(reg))
+ {
+ regBits |= 0x08;
+ }
+
+ // VEX prefix encodes register operand in 1's complement form
+ // Shift count = 4-bytes of opcode + 0-2 bits
+ assert(regBits <= 0xF);
+ regBits <<= 35;
+ return code ^ regBits;
+
+#else
+ return code;
+#endif
+}
+
+/*****************************************************************************
+ *
+ * Returns an encoding for the specified register to be used in the bit3-5
+ * part of an SIB byte (unshifted).
+ * Used exclusively to generate the REX.X bit and truncate the register.
+ */
+
+inline unsigned emitter::insEncodeRegSIB(instruction ins, regNumber reg, size_t* code)
+{
+ assert(reg < REG_STK);
+
+#ifdef _TARGET_AMD64_
+ // Either code is not NULL or reg is not an extended reg.
+ // If reg is an extended reg, instruction needs to be prefixed with 'REX'
+ // which would require code != NULL.
+ assert(code != nullptr || reg < REG_R8 || (reg >= REG_XMM0 && reg < REG_XMM8));
+
+ if (IsExtendedReg(reg))
+ {
+ *code = AddRexXPrefix(ins, *code); // REX.X
+ }
+ reg = RegEncoding(reg);
+#endif
+
+ assert(reg < 8);
+ return reg;
+}
+
+/*****************************************************************************
+ *
+ * Returns the "[r/m]" opcode with the mod/RM field set to register.
+ */
+
+inline size_t emitter::insEncodeMRreg(instruction ins, size_t code)
+{
+ // If Byte 4 (which is 0xFF00) is 0, that's where the RM encoding goes.
+ // Otherwise, it will be placed after the 4 byte encoding.
+ if ((code & 0xFF00) == 0)
+ {
+ assert((code & 0xC000) == 0);
+ code |= 0xC000;
+ }
+
+ return code;
+}
+
+/*****************************************************************************
+ *
+ * Returns the "[r/m], icon" opcode with the mod/RM field set to register.
+ */
+
+inline size_t insEncodeMIreg(instruction ins, size_t code)
+{
+ assert((code & 0xC000) == 0);
+ code |= 0xC000;
+ return code;
+}
+
+/*****************************************************************************
+ *
+ * Returns the given "[r/m]" opcode with the mod/RM field set to register.
+ */
+
+inline size_t insEncodeRMreg(instruction ins, size_t code)
+{
+ // If Byte 4 (which is 0xFF00) is 0, that's where the RM encoding goes.
+ // Otherwise, it will be placed after the 4 byte encoding.
+ if ((code & 0xFF00) == 0)
+ {
+ assert((code & 0xC000) == 0);
+ code |= 0xC000;
+ }
+ return code;
+}
+
+/*****************************************************************************
+ *
+ * Returns the "byte ptr [r/m]" opcode with the mod/RM field set to
+ * the given register.
+ */
+
+inline size_t emitter::insEncodeMRreg(instruction ins, regNumber reg, emitAttr size, size_t code)
+{
+ assert((code & 0xC000) == 0);
+ code |= 0xC000;
+ unsigned regcode = insEncodeReg012(ins, reg, size, &code) << 8;
+ code |= regcode;
+ return code;
+}
+
+/*****************************************************************************
+ *
+ * Returns the "byte ptr [r/m], icon" opcode with the mod/RM field set to
+ * the given register.
+ */
+
+inline size_t emitter::insEncodeMIreg(instruction ins, regNumber reg, emitAttr size, size_t code)
+{
+ assert((code & 0xC000) == 0);
+ code |= 0xC000;
+ unsigned regcode = insEncodeReg012(ins, reg, size, &code) << 8;
+ code |= regcode;
+ return code;
+}
+
+/*****************************************************************************
+ *
+ * Returns true iff the given instruction does not have a "[r/m], icon" form, but *does* have a
+ * "reg,reg,imm8" form.
+ */
+inline bool insNeedsRRIb(instruction ins)
+{
+ // If this list gets longer, use a switch or a table.
+ return ins == INS_imul;
+}
+
+/*****************************************************************************
+ *
+ * Returns the "reg,reg,imm8" opcode with both the reg's set to the
+ * the given register.
+ */
+inline size_t emitter::insEncodeRRIb(instruction ins, regNumber reg, emitAttr size)
+{
+ assert(size == EA_4BYTE); // All we handle for now.
+ assert(insNeedsRRIb(ins));
+ // If this list gets longer, use a switch, or a table lookup.
+ size_t code = 0x69c0;
+ unsigned regcode = insEncodeReg012(ins, reg, size, &code);
+ // We use the same register as source and destination. (Could have another version that does both regs...)
+ code |= regcode;
+ code |= (regcode << 3);
+ return code;
+}
+
+/*****************************************************************************
+ *
+ * Returns the "+reg" opcode with the the given register set into the low
+ * nibble of the opcode
+ */
+
+inline size_t emitter::insEncodeOpreg(instruction ins, regNumber reg, emitAttr size)
+{
+ size_t code = insCodeRR(ins);
+ unsigned regcode = insEncodeReg012(ins, reg, size, &code);
+ code |= regcode;
+ return code;
+}
+
+/*****************************************************************************
+ *
+ * Return the 'SS' field value for the given index scale factor.
+ */
+
+inline unsigned insSSval(unsigned scale)
+{
+ assert(scale == 1 || scale == 2 || scale == 4 || scale == 8);
+
+ const static BYTE scales[] = {
+ 0x00, // 1
+ 0x40, // 2
+ 0xFF, // 3
+ 0x80, // 4
+ 0xFF, // 5
+ 0xFF, // 6
+ 0xFF, // 7
+ 0xC0, // 8
+ };
+
+ return scales[scale - 1];
+}
+
+const instruction emitJumpKindInstructions[] = {INS_nop,
+
+#define JMP_SMALL(en, rev, ins) INS_##ins,
+#include "emitjmps.h"
+
+ INS_call};
+
+const emitJumpKind emitReverseJumpKinds[] = {
+ EJ_NONE,
+
+#define JMP_SMALL(en, rev, ins) EJ_##rev,
+#include "emitjmps.h"
+};
+
+/*****************************************************************************
+ * Look up the instruction for a jump kind
+ */
+
+/*static*/ instruction emitter::emitJumpKindToIns(emitJumpKind jumpKind)
+{
+ assert((unsigned)jumpKind < ArrLen(emitJumpKindInstructions));
+ return emitJumpKindInstructions[jumpKind];
+}
+
+/*****************************************************************************
+ * Reverse the conditional jump
+ */
+
+/* static */ emitJumpKind emitter::emitReverseJumpKind(emitJumpKind jumpKind)
+{
+ assert(jumpKind < EJ_COUNT);
+ return emitReverseJumpKinds[jumpKind];
+}
+
+/*****************************************************************************
+ * The size for these instructions is less than EA_4BYTE,
+ * but the target register need not be byte-addressable
+ */
+
+inline bool emitInstHasNoCode(instruction ins)
+{
+ if (ins == INS_align)
+ {
+ return true;
+ }
+
+ return false;
+}
+
+/*****************************************************************************
+ * When encoding instructions that operate on byte registers
+ * we have to ensure that we use a low register (EAX, EBX, ECX or EDX)
+ * otherwise we will incorrectly encode the instruction
+ */
+
+bool emitter::emitVerifyEncodable(instruction ins, emitAttr size, regNumber reg1, regNumber reg2 /* = REG_NA */)
+{
+#if CPU_HAS_BYTE_REGS
+ if (size != EA_1BYTE) // Not operating on a byte register is fine
+ {
+ return true;
+ }
+
+ if ((ins != INS_movsx) && // These two instructions support high register
+ (ins != INS_movzx)) // encodings for reg1
+ {
+ // reg1 must be a byte-able register
+ if ((genRegMask(reg1) & RBM_BYTE_REGS) == 0)
+ {
+ return false;
+ }
+ }
+ // if reg2 is not REG_NA then reg2 must be a byte-able register
+ if ((reg2 != REG_NA) && ((genRegMask(reg2) & RBM_BYTE_REGS) == 0))
+ {
+ return false;
+ }
+#endif
+ // The instruction can be encoded
+ return true;
+}
+
+/*****************************************************************************
+ *
+ * Estimate the size (in bytes of generated code) of the given instruction.
+ */
+
+inline UNATIVE_OFFSET emitter::emitInsSize(size_t code)
+{
+ UNATIVE_OFFSET size = (code & 0xFF000000) ? 4 : (code & 0x00FF0000) ? 3 : 2;
+#ifdef _TARGET_AMD64_
+ size += emitGetPrefixSize(code);
+#endif
+ return size;
+}
+
+inline UNATIVE_OFFSET emitter::emitInsSizeRM(instruction ins)
+{
+ return emitInsSize(insCodeRM(ins));
+}
+
+inline UNATIVE_OFFSET emitter::emitInsSizeRR(instruction ins, regNumber reg1, regNumber reg2, emitAttr attr)
+{
+ emitAttr size = EA_SIZE(attr);
+
+ UNATIVE_OFFSET sz;
+#ifdef _TARGET_AMD64_
+ // If Byte 4 (which is 0xFF00) is non-zero, that's where the RM encoding goes.
+ // Otherwise, it will be placed after the 4 byte encoding, making the total 5 bytes.
+ // This would probably be better expressed as a different format or something?
+ if (insCodeRM(ins) & 0xFF00)
+ {
+ sz = 5;
+ }
+ else
+#endif // _TARGET_AMD64_
+ {
+ size_t code = insCodeRM(ins);
+ sz = emitInsSize(insEncodeRMreg(ins, code));
+ }
+
+ // Most 16-bit operand instructions will need a prefix
+ if (size == EA_2BYTE && ins != INS_movsx && ins != INS_movzx)
+ {
+ sz += 1;
+ }
+
+ // VEX prefix
+ sz += emitGetVexPrefixAdjustedSize(ins, size, insCodeRM(ins));
+
+ // REX prefix
+ if ((TakesRexWPrefix(ins, size) && ((ins != INS_xor) || (reg1 != reg2))) || IsExtendedReg(reg1, attr) ||
+ IsExtendedReg(reg2, attr))
+ {
+ sz += emitGetRexPrefixSize(ins);
+ }
+
+ return sz;
+}
+
+/*****************************************************************************/
+
+inline UNATIVE_OFFSET emitter::emitInsSizeSV(size_t code, int var, int dsp)
+{
+ UNATIVE_OFFSET size = emitInsSize(code);
+ UNATIVE_OFFSET offs;
+ bool offsIsUpperBound = true;
+ bool EBPbased = true;
+
+ /* Is this a temporary? */
+
+ if (var < 0)
+ {
+ /* An address off of ESP takes an extra byte */
+
+ if (!emitHasFramePtr)
+ {
+ size++;
+ }
+
+#ifndef LEGACY_BACKEND
+ // The offset is already assigned. Find the temp.
+ TempDsc* tmp = emitComp->tmpFindNum(var, Compiler::TEMP_USAGE_USED);
+ if (tmp == nullptr)
+ {
+ // It might be in the free lists, if we're working on zero initializing the temps.
+ tmp = emitComp->tmpFindNum(var, Compiler::TEMP_USAGE_FREE);
+ }
+ assert(tmp != nullptr);
+ offs = tmp->tdTempOffs();
+
+ // We only care about the magnitude of the offset here, to determine instruction size.
+ if (emitComp->isFramePointerUsed())
+ {
+ if ((int)offs < 0)
+ {
+ offs = -(int)offs;
+ }
+ }
+ else
+ {
+ // SP-based offsets must already be positive.
+ assert((int)offs >= 0);
+ }
+#else // LEGACY_BACKEND
+ /* We'll have to estimate the max. possible offset of this temp */
+
+ // TODO: Get an estimate of the temp offset instead of assuming
+ // TODO: that any temp may be at the max. temp offset!!!!!!!!!!
+
+ if (emitComp->lvaTempsHaveLargerOffsetThanVars())
+ {
+ offs = emitLclSize + emitMaxTmpSize;
+ }
+ else
+ {
+ offs = emitMaxTmpSize;
+ }
+
+ offsIsUpperBound = false;
+#endif // LEGACY_BACKEND
+ }
+ else
+ {
+
+ /* Get the frame offset of the (non-temp) variable */
+
+ offs = dsp + emitComp->lvaFrameAddress(var, &EBPbased);
+
+ /* An address off of ESP takes an extra byte */
+
+ if (!EBPbased)
+ {
+ ++size;
+ }
+
+ /* Is this a stack parameter reference? */
+
+ if (emitComp->lvaIsParameter(var)
+#if !defined(_TARGET_AMD64_) || defined(UNIX_AMD64_ABI)
+ && !emitComp->lvaIsRegArgument(var)
+#endif // !_TARGET_AMD64_ || UNIX_AMD64_ABI
+ )
+ {
+ /* If no EBP frame, arguments are off of ESP, above temps */
+
+ if (!EBPbased)
+ {
+ assert((int)offs >= 0);
+
+ offsIsUpperBound = false; // since #temps can increase
+ offs += emitMaxTmpSize;
+ }
+ }
+ else
+ {
+ /* Locals off of EBP are at negative offsets */
+
+ if (EBPbased)
+ {
+#if defined(_TARGET_AMD64_) && !defined(PLATFORM_UNIX)
+ // If localloc is not used, then ebp chaining is done and hence
+ // offset of locals will be at negative offsets, Otherwise offsets
+ // will be positive. In future, when RBP gets positioned in the
+ // middle of the frame so as to optimize instruction encoding size,
+ // the below asserts needs to be modified appropriately.
+ // However, for Unix platforms, we always do frame pointer chaining,
+ // so offsets from the frame pointer will always be negative.
+ if (emitComp->compLocallocUsed || emitComp->opts.compDbgEnC)
+ {
+ noway_assert((int)offs >= 0);
+ }
+ else
+#endif
+ {
+ // Dev10 804810 - failing this assert can lead to bad codegen and runtime crashes
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef UNIX_AMD64_ABI
+ LclVarDsc* varDsc = emitComp->lvaTable + var;
+ bool isRegPassedArg = varDsc->lvIsParam && varDsc->lvIsRegArg;
+ // Register passed args could have a stack offset of 0.
+ noway_assert((int)offs < 0 || isRegPassedArg);
+#else // !UNIX_AMD64_ABI
+ noway_assert((int)offs < 0);
+#endif // !UNIX_AMD64_ABI
+ }
+
+ assert(emitComp->lvaTempsHaveLargerOffsetThanVars());
+
+ // lvaInlinedPInvokeFrameVar and lvaStubArgumentVar are placed below the temps
+ if (unsigned(var) == emitComp->lvaInlinedPInvokeFrameVar ||
+ unsigned(var) == emitComp->lvaStubArgumentVar)
+ {
+ offs -= emitMaxTmpSize;
+ }
+
+ if ((int)offs < 0)
+ {
+ // offset is negative
+ return size + ((int(offs) >= SCHAR_MIN) ? sizeof(char) : sizeof(int));
+ }
+#ifdef _TARGET_AMD64_
+ // This case arises for localloc frames
+ else
+ {
+ return size + ((offs <= SCHAR_MAX) ? sizeof(char) : sizeof(int));
+ }
+#endif
+ }
+
+ if (emitComp->lvaTempsHaveLargerOffsetThanVars() == false)
+ {
+ offs += emitMaxTmpSize;
+ }
+ }
+ }
+
+ assert((int)offs >= 0);
+
+#if !FEATURE_FIXED_OUT_ARGS
+
+ /* Are we addressing off of ESP? */
+
+ if (!emitHasFramePtr)
+ {
+ /* Adjust the effective offset if necessary */
+
+ if (emitCntStackDepth)
+ offs += emitCurStackLvl;
+
+ // we could (and used to) check for the special case [sp] here but the stack offset
+ // estimator was off, and there is very little harm in overestimating for such a
+ // rare case.
+ }
+
+#endif // !FEATURE_FIXED_OUT_ARGS
+
+// printf("lcl = %04X, tmp = %04X, stk = %04X, offs = %04X\n",
+// emitLclSize, emitMaxTmpSize, emitCurStackLvl, offs);
+
+#ifdef _TARGET_AMD64_
+ bool useSmallEncoding = (SCHAR_MIN <= (int)offs) && ((int)offs <= SCHAR_MAX);
+#else
+ bool useSmallEncoding = (offs <= size_t(SCHAR_MAX));
+#endif
+
+#ifdef LEGACY_BACKEND
+ /* If we are using a small encoding, there is a danger that we might
+ end up having to use a larger encoding. Record 'offs' so that
+ we can detect if such a situation occurs */
+
+ if (useSmallEncoding && !offsIsUpperBound)
+ {
+ if (emitGrowableMaxByteOffs < offs)
+ {
+ emitGrowableMaxByteOffs = offs;
+#ifdef DEBUG
+ // Remember which instruction this is
+ emitMaxByteOffsIdNum = emitInsCount;
+#endif
+ }
+ }
+#endif // LEGACY_BACKEND
+
+ // If it is ESP based, and the offset is zero, we will not encode the disp part.
+ if (!EBPbased && offs == 0)
+ {
+ return size;
+ }
+ else
+ {
+ return size + (useSmallEncoding ? sizeof(char) : sizeof(int));
+ }
+}
+
+inline UNATIVE_OFFSET emitter::emitInsSizeSV(instrDesc* id, int var, int dsp, int val)
+{
+ instruction ins = id->idIns();
+ UNATIVE_OFFSET valSize = EA_SIZE_IN_BYTES(id->idOpSize());
+ UNATIVE_OFFSET prefix = 0;
+ bool valInByte = ((signed char)val == val) && (ins != INS_mov) && (ins != INS_test);
+
+#ifdef _TARGET_AMD64_
+ // mov reg, imm64 is the only opcode which takes a full 8 byte immediate
+ // all other opcodes take a sign-extended 4-byte immediate
+ noway_assert(valSize <= sizeof(int) || !id->idIsCnsReloc());
+#endif // _TARGET_AMD64_
+
+ if (valSize > sizeof(int))
+ {
+ valSize = sizeof(int);
+ }
+
+#ifdef RELOC_SUPPORT
+ if (id->idIsCnsReloc())
+ {
+ valInByte = false; // relocs can't be placed in a byte
+ assert(valSize == sizeof(int));
+ }
+#endif
+
+ if (valInByte)
+ {
+ valSize = sizeof(char);
+ }
+
+ // 16-bit operand instructions need a prefix.
+ // This referes to 66h size prefix override
+ if (id->idOpSize() == EA_2BYTE)
+ {
+ prefix = 1;
+ }
+
+ return prefix + valSize + emitInsSizeSV(insCodeMI(ins), var, dsp);
+}
+
+/*****************************************************************************/
+
+static bool baseRegisterRequiresSibByte(regNumber base)
+{
+#ifdef _TARGET_AMD64_
+ return base == REG_ESP || base == REG_R12;
+#else
+ return base == REG_ESP;
+#endif
+}
+
+static bool baseRegisterRequiresDisplacement(regNumber base)
+{
+#ifdef _TARGET_AMD64_
+ return base == REG_EBP || base == REG_R13;
+#else
+ return base == REG_EBP;
+#endif
+}
+
+UNATIVE_OFFSET emitter::emitInsSizeAM(instrDesc* id, size_t code)
+{
+ emitAttr attrSize = id->idOpSize();
+ instruction ins = id->idIns();
+ /* The displacement field is in an unusual place for calls */
+ ssize_t dsp = (ins == INS_call) ? emitGetInsCIdisp(id) : emitGetInsAmdAny(id);
+ bool dspInByte = ((signed char)dsp == (ssize_t)dsp);
+ bool dspIsZero = (dsp == 0);
+ UNATIVE_OFFSET size;
+
+ // Note that the values in reg and rgx are used in this method to decide
+ // how many bytes will be needed by the address [reg+rgx+cns]
+ // this includes the prefix bytes when reg or rgx are registers R8-R15
+ regNumber reg;
+ regNumber rgx;
+
+ // The idAddr field is a union and only some of the instruction formats use the iiaAddrMode variant
+ // these are IF_AWR_*, IF_ARD_*, IF_ARW_* and IF_*_ARD
+ // ideally these should really be the only idInsFmts that we see here
+ // but we have some outliers to deal with:
+ // emitIns_R_L adds IF_RWR_LABEL and calls emitInsSizeAM
+ // emitInsRMW adds IF_MRW_CNS, IF_MRW_RRD, IF_MRW_SHF, and calls emitInsSizeAM
+
+ switch (id->idInsFmt())
+ {
+ case IF_RWR_LABEL:
+ case IF_MRW_CNS:
+ case IF_MRW_RRD:
+ case IF_MRW_SHF:
+ reg = REG_NA;
+ rgx = REG_NA;
+ break;
+
+ default:
+ reg = id->idAddr()->iiaAddrMode.amBaseReg;
+ rgx = id->idAddr()->iiaAddrMode.amIndxReg;
+ break;
+ }
+
+#ifdef RELOC_SUPPORT
+ if (id->idIsDspReloc())
+ {
+ dspInByte = false; // relocs can't be placed in a byte
+ dspIsZero = false; // relocs won't always be zero
+ }
+#endif
+
+ if (code & 0xFF000000)
+ {
+ size = 4;
+ }
+ else if (code & 0x00FF0000)
+ {
+ assert((attrSize == EA_4BYTE) || (attrSize == EA_PTRSIZE) // Only for x64
+ || (attrSize == EA_16BYTE) // only for x64
+ || (ins == INS_movzx) || (ins == INS_movsx));
+
+ size = 3;
+ }
+ else
+ {
+ size = 2;
+
+ // Most 16-bit operands will require a size prefix.
+ // This refers to 66h size prefix override.
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if FEATURE_STACK_FP_X87
+ if ((attrSize == EA_2BYTE) && (ins != INS_fldcw) && (ins != INS_fnstcw))
+#else // FEATURE_STACK_FP_X87
+ if (attrSize == EA_2BYTE)
+#endif // FEATURE_STACK_FP_X87
+ {
+ size++;
+ }
+ }
+
+#ifdef _TARGET_AMD64_
+ size += emitGetVexPrefixAdjustedSize(ins, attrSize, code);
+
+ if (code & REX_PREFIX_MASK)
+ {
+ // REX prefix
+ size += emitGetRexPrefixSize(ins);
+ }
+ else if (TakesRexWPrefix(ins, attrSize))
+ {
+ // REX.W prefix
+ size += emitGetRexPrefixSize(ins);
+ }
+ else if (IsExtendedReg(reg, EA_PTRSIZE) || IsExtendedReg(rgx, EA_PTRSIZE) || IsExtendedReg(id->idReg1(), attrSize))
+ {
+ // Should have a REX byte
+ size += emitGetRexPrefixSize(ins);
+ }
+#endif // _TARGET_AMD64_
+
+ if (rgx == REG_NA)
+ {
+ /* The address is of the form "[reg+disp]" */
+
+ if (reg == REG_NA)
+ {
+ /* The address is of the form "[disp]" */
+
+ size += sizeof(INT32);
+
+#ifdef _TARGET_AMD64_
+ // If id is not marked for reloc, add 1 additional byte for SIB that follows disp32
+ if (!id->idIsDspReloc())
+ {
+ size++;
+ }
+#endif
+ return size;
+ }
+
+ // If the base register is ESP (or R12 on 64-bit systems), a SIB byte must be used.
+ if (baseRegisterRequiresSibByte(reg))
+ {
+ size++;
+ }
+
+ // If the base register is EBP (or R13 on 64-bit systems), a displacement is required.
+ // Otherwise, the displacement can be elided if it is zero.
+ if (dspIsZero && !baseRegisterRequiresDisplacement(reg))
+ {
+ return size;
+ }
+
+ /* Does the offset fit in a byte? */
+
+ if (dspInByte)
+ {
+ size += sizeof(char);
+ }
+ else
+ {
+ size += sizeof(INT32);
+ }
+ }
+ else
+ {
+ /* An index register is present */
+
+ size++;
+
+ /* Is the index value scaled? */
+
+ if (emitDecodeScale(id->idAddr()->iiaAddrMode.amScale) > 1)
+ {
+ /* Is there a base register? */
+
+ if (reg != REG_NA)
+ {
+ /* The address is "[reg + {2/4/8} * rgx + icon]" */
+
+ if (dspIsZero && !baseRegisterRequiresDisplacement(reg))
+ {
+ /* The address is "[reg + {2/4/8} * rgx]" */
+ }
+ else
+ {
+ /* The address is "[reg + {2/4/8} * rgx + disp]" */
+
+ if (dspInByte)
+ {
+ size += sizeof(char);
+ }
+ else
+ {
+ size += sizeof(int);
+ }
+ }
+ }
+ else
+ {
+ /* The address is "[{2/4/8} * rgx + icon]" */
+
+ size += sizeof(INT32);
+ }
+ }
+ else
+ {
+ if (dspIsZero && baseRegisterRequiresDisplacement(reg) && !baseRegisterRequiresDisplacement(rgx))
+ {
+ /* Swap reg and rgx, such that reg is not EBP/R13 */
+ regNumber tmp = reg;
+ id->idAddr()->iiaAddrMode.amBaseReg = reg = rgx;
+ id->idAddr()->iiaAddrMode.amIndxReg = rgx = tmp;
+ }
+
+ /* The address is "[reg+rgx+dsp]" */
+
+ if (dspIsZero && !baseRegisterRequiresDisplacement(reg))
+ {
+ /* This is [reg+rgx]" */
+ }
+ else
+ {
+ /* This is [reg+rgx+dsp]" */
+
+ if (dspInByte)
+ {
+ size += sizeof(char);
+ }
+ else
+ {
+ size += sizeof(int);
+ }
+ }
+ }
+ }
+
+ return size;
+}
+
+inline UNATIVE_OFFSET emitter::emitInsSizeAM(instrDesc* id, size_t code, int val)
+{
+ instruction ins = id->idIns();
+ UNATIVE_OFFSET valSize = EA_SIZE_IN_BYTES(id->idOpSize());
+ bool valInByte = ((signed char)val == val) && (ins != INS_mov) && (ins != INS_test);
+
+#ifdef _TARGET_AMD64_
+ // mov reg, imm64 is the only opcode which takes a full 8 byte immediate
+ // all other opcodes take a sign-extended 4-byte immediate
+ noway_assert(valSize <= sizeof(INT32) || !id->idIsCnsReloc());
+#endif // _TARGET_AMD64_
+
+ if (valSize > sizeof(INT32))
+ {
+ valSize = sizeof(INT32);
+ }
+
+#ifdef RELOC_SUPPORT
+ if (id->idIsCnsReloc())
+ {
+ valInByte = false; // relocs can't be placed in a byte
+ assert(valSize == sizeof(INT32));
+ }
+#endif
+
+ if (valInByte)
+ {
+ valSize = sizeof(char);
+ }
+
+ return valSize + emitInsSizeAM(id, code);
+}
+
+inline UNATIVE_OFFSET emitter::emitInsSizeCV(instrDesc* id, size_t code)
+{
+ instruction ins = id->idIns();
+
+ // fgMorph changes any statics that won't fit into 32-bit addresses
+ // into constants with an indir, rather than GT_CLS_VAR
+ // so we should only hit this path for statics that are RIP-relative
+ UNATIVE_OFFSET size = sizeof(INT32);
+
+ // Most 16-bit operand instructions will need a prefix.
+ // This refers to 66h size prefix override.
+
+ if (id->idOpSize() == EA_2BYTE && ins != INS_movzx && ins != INS_movsx)
+ {
+ size++;
+ }
+
+ return size + emitInsSize(code);
+}
+
+inline UNATIVE_OFFSET emitter::emitInsSizeCV(instrDesc* id, size_t code, int val)
+{
+ instruction ins = id->idIns();
+ UNATIVE_OFFSET valSize = EA_SIZE_IN_BYTES(id->idOpSize());
+ bool valInByte = ((signed char)val == val) && (ins != INS_mov) && (ins != INS_test);
+
+#ifndef _TARGET_AMD64_
+ // occasionally longs get here on x86
+ if (valSize > sizeof(INT32))
+ valSize = sizeof(INT32);
+#endif // !_TARGET_AMD64_
+
+#ifdef RELOC_SUPPORT
+ if (id->idIsCnsReloc())
+ {
+ valInByte = false; // relocs can't be placed in a byte
+ assert(valSize == sizeof(INT32));
+ }
+#endif
+
+ if (valInByte)
+ {
+ valSize = sizeof(char);
+ }
+
+ return valSize + emitInsSizeCV(id, code);
+}
+
+/*****************************************************************************
+ *
+ * Allocate instruction descriptors for instructions with address modes.
+ */
+
+inline emitter::instrDesc* emitter::emitNewInstrAmd(emitAttr size, ssize_t dsp)
+{
+ if (dsp < AM_DISP_MIN || dsp > AM_DISP_MAX)
+ {
+ instrDescAmd* id = emitAllocInstrAmd(size);
+
+ id->idSetIsLargeDsp();
+#ifdef DEBUG
+ id->idAddr()->iiaAddrMode.amDisp = AM_DISP_BIG_VAL;
+#endif
+ id->idaAmdVal = dsp;
+
+ return id;
+ }
+ else
+ {
+ instrDesc* id = emitAllocInstr(size);
+
+ id->idAddr()->iiaAddrMode.amDisp = dsp;
+ assert(id->idAddr()->iiaAddrMode.amDisp == dsp); // make sure the value fit
+
+ return id;
+ }
+}
+
+/*****************************************************************************
+ *
+ * Set the displacement field in an instruction. Only handles instrDescAmd type.
+ */
+
+inline void emitter::emitSetAmdDisp(instrDescAmd* id, ssize_t dsp)
+{
+ if (dsp < AM_DISP_MIN || dsp > AM_DISP_MAX)
+ {
+ id->idSetIsLargeDsp();
+#ifdef DEBUG
+ id->idAddr()->iiaAddrMode.amDisp = AM_DISP_BIG_VAL;
+#endif
+ id->idaAmdVal = dsp;
+ }
+ else
+ {
+ id->idSetIsSmallDsp();
+ id->idAddr()->iiaAddrMode.amDisp = dsp;
+ assert(id->idAddr()->iiaAddrMode.amDisp == dsp); // make sure the value fit
+ }
+}
+
+/*****************************************************************************
+ *
+ * Allocate an instruction descriptor for an instruction that uses both
+ * an address mode displacement and a constant.
+ */
+
+emitter::instrDesc* emitter::emitNewInstrAmdCns(emitAttr size, ssize_t dsp, int cns)
+{
+ if (dsp >= AM_DISP_MIN && dsp <= AM_DISP_MAX)
+ {
+ if (cns >= ID_MIN_SMALL_CNS && cns <= ID_MAX_SMALL_CNS)
+ {
+ instrDesc* id = emitAllocInstr(size);
+
+ id->idSmallCns(cns);
+
+ id->idAddr()->iiaAddrMode.amDisp = dsp;
+ assert(id->idAddr()->iiaAddrMode.amDisp == dsp); // make sure the value fit
+
+ return id;
+ }
+ else
+ {
+ instrDescCns* id = emitAllocInstrCns(size);
+
+ id->idSetIsLargeCns();
+ id->idcCnsVal = cns;
+
+ id->idAddr()->iiaAddrMode.amDisp = dsp;
+ assert(id->idAddr()->iiaAddrMode.amDisp == dsp); // make sure the value fit
+
+ return id;
+ }
+ }
+ else
+ {
+ if (cns >= ID_MIN_SMALL_CNS && cns <= ID_MAX_SMALL_CNS)
+ {
+ instrDescAmd* id = emitAllocInstrAmd(size);
+
+ id->idSetIsLargeDsp();
+#ifdef DEBUG
+ id->idAddr()->iiaAddrMode.amDisp = AM_DISP_BIG_VAL;
+#endif
+ id->idaAmdVal = dsp;
+
+ id->idSmallCns(cns);
+
+ return id;
+ }
+ else
+ {
+ instrDescCnsAmd* id = emitAllocInstrCnsAmd(size);
+
+ id->idSetIsLargeCns();
+ id->idacCnsVal = cns;
+
+ id->idSetIsLargeDsp();
+#ifdef DEBUG
+ id->idAddr()->iiaAddrMode.amDisp = AM_DISP_BIG_VAL;
+#endif
+ id->idacAmdVal = dsp;
+
+ return id;
+ }
+ }
+}
+
+/*****************************************************************************
+ *
+ * The next instruction will be a loop head entry point
+ * So insert a dummy instruction here to ensure that
+ * the x86 I-cache alignment rule is followed.
+ */
+
+void emitter::emitLoopAlign()
+{
+ /* Insert a pseudo-instruction to ensure that we align
+ the next instruction properly */
+
+ instrDesc* id = emitNewInstrTiny(EA_1BYTE);
+ id->idIns(INS_align);
+ id->idCodeSize(15); // We may need to skip up to 15 bytes of code
+ emitCurIGsize += 15;
+}
+
+/*****************************************************************************
+ *
+ * Add a NOP instruction of the given size.
+ */
+
+void emitter::emitIns_Nop(unsigned size)
+{
+ assert(size <= 15);
+
+ instrDesc* id = emitNewInstr();
+ id->idIns(INS_nop);
+ id->idInsFmt(IF_NONE);
+ id->idCodeSize(size);
+
+ dispIns(id);
+ emitCurIGsize += size;
+}
+
+/*****************************************************************************
+ *
+ * Add an instruction with no operands.
+ */
+#ifdef DEBUG
+static bool isX87InsWithNoOperands(instruction ins)
+{
+#if FEATURE_STACK_FP_X87
+ return (ins == INS_f2xm1 || ins == INS_fchs || ins == INS_fld1 || ins == INS_fld1 || ins == INS_fldl2e ||
+ ins == INS_fldz || ins == INS_fprem || ins == INS_frndint || ins == INS_fscale);
+#else // !FEATURE_STACK_FP_X87
+ return false;
+#endif // !FEATURE_STACK_FP_X87
+}
+#endif // DEBUG
+
+void emitter::emitIns(instruction ins)
+{
+ UNATIVE_OFFSET sz;
+ instrDesc* id = emitNewInstr();
+ size_t code = insCodeMR(ins);
+
+#ifdef DEBUG
+#if FEATURE_STACK_FP_X87
+ if (ins != INS_fabs && ins != INS_fsqrt && ins != INS_fsin && ins != INS_fcos)
+#endif // FEATURE_STACK_FP_X87
+
+ {
+ // We cannot have #ifdef inside macro expansion.
+ bool assertCond = (ins == INS_cdq || isX87InsWithNoOperands(ins) || ins == INS_int3 || ins == INS_lock ||
+ ins == INS_leave || ins == INS_movsb || ins == INS_movsd || ins == INS_movsp ||
+ ins == INS_nop || ins == INS_r_movsb || ins == INS_r_movsd || ins == INS_r_movsp ||
+ ins == INS_r_stosb || ins == INS_r_stosd || ins == INS_r_stosp || ins == INS_ret ||
+ ins == INS_sahf || ins == INS_stosb || ins == INS_stosd || ins == INS_stosp
+#ifndef LEGACY_BACKEND
+ || ins == INS_vzeroupper
+#endif
+ );
+
+ assert(assertCond);
+ }
+#endif // DEBUG
+
+#ifdef _TARGET_AMD64_
+ assert((code & REX_PREFIX_MASK) == 0); // Can't have a REX bit with no operands, right?
+#endif // _TARGET_AMD64_
+
+ if (code & 0xFF000000)
+ {
+ sz = 2; // TODO-XArch-Bug?: Shouldn't this be 4? Or maybe we should assert that we don't see this case.
+ }
+ else if (code & 0x00FF0000)
+ {
+ sz = 3;
+ }
+ else if (code & 0x0000FF00)
+ {
+ sz = 2;
+ }
+ else
+ {
+ sz = 1;
+ }
+
+#ifndef LEGACY_BACKEND
+ // Account for 2-byte VEX prefix in case of vzeroupper
+ if (ins == INS_vzeroupper)
+ {
+ sz += 2;
+ }
+#endif
+
+ insFormat fmt = IF_NONE;
+
+#if FEATURE_STACK_FP_X87
+ if (CodeGen::instIsFP(ins))
+ {
+ fmt = emitInsModeFormat(ins, IF_TRD);
+ }
+#endif // FEATURE_STACK_FP_X87
+
+ id->idIns(ins);
+ id->idInsFmt(fmt);
+ id->idCodeSize(sz);
+
+ dispIns(id);
+ emitCurIGsize += sz;
+}
+
+#if !defined(LEGACY_BACKEND)
+// Add an instruction with no operands, but whose encoding depends on the size
+// (Only CDQ/CQO currently)
+void emitter::emitIns(instruction ins, emitAttr attr)
+{
+ UNATIVE_OFFSET sz;
+ instrDesc* id = emitNewInstr(attr);
+ size_t code = insCodeMR(ins);
+ assert(ins == INS_cdq);
+ assert((code & 0xFFFFFF00) == 0);
+ sz = 1;
+
+ insFormat fmt = IF_NONE;
+
+ sz += emitGetVexPrefixAdjustedSize(ins, attr, code);
+ if (TakesRexWPrefix(ins, attr))
+ {
+ sz += emitGetRexPrefixSize(ins);
+ }
+
+ id->idIns(ins);
+ id->idInsFmt(fmt);
+ id->idCodeSize(sz);
+
+ dispIns(id);
+ emitCurIGsize += sz;
+}
+
+//------------------------------------------------------------------------
+// emitMapFmtForIns: map the instruction format based on the instruction.
+// Shift-by-a-constant instructions have a special format.
+//
+// Arguments:
+// fmt - the instruction format to map
+// ins - the instruction
+//
+// Returns:
+// The mapped instruction format.
+//
+emitter::insFormat emitter::emitMapFmtForIns(insFormat fmt, instruction ins)
+{
+ switch (ins)
+ {
+ case INS_rol_N:
+ case INS_ror_N:
+ case INS_rcl_N:
+ case INS_rcr_N:
+ case INS_shl_N:
+ case INS_shr_N:
+ case INS_sar_N:
+ {
+ switch (fmt)
+ {
+ case IF_RRW_CNS:
+ return IF_RRW_SHF;
+ case IF_MRW_CNS:
+ return IF_MRW_SHF;
+ case IF_SRW_CNS:
+ return IF_SRW_SHF;
+ case IF_ARW_CNS:
+ return IF_ARW_SHF;
+ default:
+ unreached();
+ }
+ }
+
+ default:
+ return fmt;
+ }
+}
+
+//------------------------------------------------------------------------
+// emitMapFmtAtoM: map the address mode formats ARD, ARW, and AWR to their direct address equivalents.
+//
+// Arguments:
+// fmt - the instruction format to map
+//
+// Returns:
+// The mapped instruction format.
+//
+emitter::insFormat emitter::emitMapFmtAtoM(insFormat fmt)
+{
+ switch (fmt)
+ {
+ case IF_ARD:
+ return IF_MRD;
+ case IF_AWR:
+ return IF_MWR;
+ case IF_ARW:
+ return IF_MRW;
+
+ case IF_RRD_ARD:
+ return IF_RRD_MRD;
+ case IF_RWR_ARD:
+ return IF_RWR_MRD;
+ case IF_RRW_ARD:
+ return IF_RRW_MRD;
+
+ case IF_ARD_RRD:
+ return IF_MRD_RRD;
+ case IF_AWR_RRD:
+ return IF_MWR_RRD;
+ case IF_ARW_RRD:
+ return IF_MRW_RRD;
+
+ case IF_ARD_CNS:
+ return IF_MRD_CNS;
+ case IF_AWR_CNS:
+ return IF_MWR_CNS;
+ case IF_ARW_CNS:
+ return IF_MRW_CNS;
+
+ case IF_ARW_SHF:
+ return IF_MRW_SHF;
+
+ default:
+ unreached();
+ }
+}
+
+//------------------------------------------------------------------------
+// emitHandleMemOp: For a memory operand, fill in the relevant fields of the instrDesc.
+//
+// Arguments:
+// indir - the memory operand.
+// id - the instrDesc to fill in.
+// fmt - the instruction format to use. This must be one of the ARD, AWR, or ARW formats. If necessary (such as for
+// GT_CLS_VAR_ADDR), this function will map it to the correct format.
+// ins - the instruction we are generating. This might affect the instruction format we choose.
+//
+// Assumptions:
+// The correctly sized instrDesc must already be created, e.g., via emitNewInstrAmd() or emitNewInstrAmdCns();
+//
+// Post-conditions:
+// For base address of int constant:
+// -- the caller must have added the int constant base to the instrDesc when creating it via
+// emitNewInstrAmdCns().
+// For simple address modes (base + scale * index + offset):
+// -- the base register, index register, and scale factor are set.
+// -- the caller must have added the addressing mode offset int constant to the instrDesc when creating it via
+// emitNewInstrAmdCns().
+//
+// The instruction format is set.
+//
+// idSetIsDspReloc() is called if necessary.
+//
+void emitter::emitHandleMemOp(GenTreeIndir* indir, instrDesc* id, insFormat fmt, instruction ins)
+{
+ assert(fmt != IF_NONE);
+
+ GenTree* memBase = indir->Base();
+
+ if ((memBase != nullptr) && memBase->isContained() && (memBase->OperGet() == GT_CLS_VAR_ADDR))
+ {
+ CORINFO_FIELD_HANDLE fldHnd = memBase->gtClsVar.gtClsVarHnd;
+
+ // Static always need relocs
+ if (!jitStaticFldIsGlobAddr(fldHnd))
+ {
+ // Contract:
+ // fgMorphField() changes any statics that won't fit into 32-bit addresses into
+ // constants with an indir, rather than GT_CLS_VAR, based on reloc type hint given
+ // by VM. Hence emitter should always mark GT_CLS_VAR_ADDR as relocatable.
+ //
+ // Data section constants: these get allocated close to code block of the method and
+ // always addressable IP relative. These too should be marked as relocatable.
+
+ id->idSetIsDspReloc();
+ }
+
+ id->idAddr()->iiaFieldHnd = fldHnd;
+ id->idInsFmt(emitMapFmtForIns(emitMapFmtAtoM(fmt), ins));
+ }
+ else if ((memBase != nullptr) && memBase->IsCnsIntOrI() && memBase->isContained())
+ {
+ // Absolute addresses marked as contained should fit within the base of addr mode.
+ assert(memBase->AsIntConCommon()->FitsInAddrBase(emitComp));
+
+ // Either not generating relocatable code or addr must be an icon handle
+ assert(!emitComp->opts.compReloc || memBase->IsIconHandle());
+
+ if (memBase->AsIntConCommon()->AddrNeedsReloc(emitComp))
+ {
+ id->idSetIsDspReloc();
+ }
+
+ id->idAddr()->iiaAddrMode.amBaseReg = REG_NA;
+ id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
+ id->idAddr()->iiaAddrMode.amScale = emitter::OPSZ1; // for completeness
+
+ id->idInsFmt(emitMapFmtForIns(fmt, ins));
+
+ // Absolute address must have already been set in the instrDesc constructor.
+ assert(emitGetInsAmdAny(id) == memBase->AsIntConCommon()->IconValue());
+ }
+ else
+ {
+ if (memBase != nullptr)
+ {
+ id->idAddr()->iiaAddrMode.amBaseReg = memBase->gtRegNum;
+ }
+ else
+ {
+ id->idAddr()->iiaAddrMode.amBaseReg = REG_NA;
+ }
+
+ if (indir->HasIndex())
+ {
+ id->idAddr()->iiaAddrMode.amIndxReg = indir->Index()->gtRegNum;
+ }
+ else
+ {
+ id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
+ }
+ id->idAddr()->iiaAddrMode.amScale = emitEncodeScale(indir->Scale());
+
+ id->idInsFmt(emitMapFmtForIns(fmt, ins));
+
+ // disp must have already been set in the instrDesc constructor.
+ assert(emitGetInsAmdAny(id) == ssize_t(indir->Offset())); // make sure "disp" is stored properly
+ }
+}
+
+// Takes care of storing all incoming register parameters
+// into its corresponding shadow space (defined by the x64 ABI)
+void emitter::spillIntArgRegsToShadowSlots()
+{
+ unsigned argNum;
+ instrDesc* id;
+ UNATIVE_OFFSET sz;
+
+ assert(emitComp->compGeneratingProlog);
+
+ for (argNum = 0; argNum < MAX_REG_ARG; ++argNum)
+ {
+ regNumber argReg = intArgRegs[argNum];
+
+ // The offsets for the shadow space start at RSP + 8
+ // (right before the caller return address)
+ int offset = (argNum + 1) * EA_PTRSIZE;
+
+ id = emitNewInstrAmd(EA_PTRSIZE, offset);
+ id->idIns(INS_mov);
+ id->idInsFmt(IF_AWR_RRD);
+ id->idAddr()->iiaAddrMode.amBaseReg = REG_SPBASE;
+ id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
+ id->idAddr()->iiaAddrMode.amScale = emitEncodeScale(1);
+
+ // The offset has already been set in the intrDsc ctor,
+ // make sure we got it right.
+ assert(emitGetInsAmdAny(id) == ssize_t(offset));
+
+ id->idReg1(argReg);
+ sz = emitInsSizeAM(id, insCodeMR(INS_mov));
+ id->idCodeSize(sz);
+ emitCurIGsize += sz;
+ }
+}
+
+// this is very similar to emitInsBinary and probably could be folded in to same
+// except the requirements on the incoming parameter are different,
+// ex: the memory op in storeind case must NOT be contained
+void emitter::emitInsMov(instruction ins, emitAttr attr, GenTree* node)
+{
+ UNATIVE_OFFSET sz;
+ instrDesc* id;
+
+ switch (node->OperGet())
+ {
+ case GT_IND:
+ {
+ GenTreeIndir* mem = node->AsIndir();
+ GenTreePtr addr = mem->Addr();
+
+ if (addr->OperGet() == GT_CLS_VAR_ADDR)
+ {
+ emitIns_R_C(ins, attr, mem->gtRegNum, addr->gtClsVar.gtClsVarHnd, 0);
+ return;
+ }
+ else if (addr->OperGet() == GT_LCL_VAR_ADDR)
+ {
+ GenTreeLclVarCommon* varNode = addr->AsLclVarCommon();
+ emitIns_R_S(ins, attr, mem->gtRegNum, varNode->GetLclNum(), 0);
+ codeGen->genUpdateLife(varNode);
+ return;
+ }
+ else
+ {
+ assert(addr->OperIsAddrMode() || (addr->IsCnsIntOrI() && addr->isContained()) || !addr->isContained());
+ size_t offset = mem->Offset();
+ id = emitNewInstrAmd(attr, offset);
+ id->idIns(ins);
+ id->idReg1(mem->gtRegNum);
+ emitHandleMemOp(mem, id, IF_RWR_ARD, ins);
+ sz = emitInsSizeAM(id, insCodeRM(ins));
+ id->idCodeSize(sz);
+ }
+ }
+ break;
+
+ case GT_STOREIND:
+ {
+ GenTreeStoreInd* mem = node->AsStoreInd();
+ GenTreePtr addr = mem->Addr();
+ size_t offset = mem->Offset();
+ GenTree* data = mem->Data();
+
+ if (addr->OperGet() == GT_CLS_VAR_ADDR)
+ {
+ if (data->isContainedIntOrIImmed())
+ {
+ emitIns_C_I(ins, attr, addr->gtClsVar.gtClsVarHnd, 0, (int)data->AsIntConCommon()->IconValue());
+ }
+ else
+ {
+ assert(!data->isContained());
+ emitIns_C_R(ins, attr, addr->gtClsVar.gtClsVarHnd, data->gtRegNum, 0);
+ }
+ return;
+ }
+ else if (addr->OperGet() == GT_LCL_VAR_ADDR)
+ {
+ GenTreeLclVarCommon* varNode = addr->AsLclVarCommon();
+ if (data->isContainedIntOrIImmed())
+ {
+ emitIns_S_I(ins, attr, varNode->GetLclNum(), 0, (int)data->AsIntConCommon()->IconValue());
+ }
+ else
+ {
+ assert(!data->isContained());
+ emitIns_S_R(ins, attr, data->gtRegNum, varNode->GetLclNum(), 0);
+ }
+ codeGen->genUpdateLife(varNode);
+ return;
+ }
+ else if (data->isContainedIntOrIImmed())
+ {
+ int icon = (int)data->AsIntConCommon()->IconValue();
+ id = emitNewInstrAmdCns(attr, offset, icon);
+ id->idIns(ins);
+ emitHandleMemOp(mem, id, IF_AWR_CNS, ins);
+ sz = emitInsSizeAM(id, insCodeMI(ins), icon);
+ id->idCodeSize(sz);
+ }
+ else
+ {
+ assert(!data->isContained());
+ id = emitNewInstrAmd(attr, offset);
+ id->idIns(ins);
+ emitHandleMemOp(mem, id, IF_AWR_RRD, ins);
+ id->idReg1(data->gtRegNum);
+ sz = emitInsSizeAM(id, insCodeMR(ins));
+ id->idCodeSize(sz);
+ }
+ }
+ break;
+
+ case GT_STORE_LCL_VAR:
+ {
+ GenTreeLclVarCommon* varNode = node->AsLclVarCommon();
+ GenTree* data = varNode->gtOp.gtOp1->gtEffectiveVal();
+ codeGen->inst_set_SV_var(varNode);
+ assert(varNode->gtRegNum == REG_NA); // stack store
+
+ if (data->isContainedIntOrIImmed())
+ {
+ emitIns_S_I(ins, attr, varNode->GetLclNum(), 0, (int)data->AsIntConCommon()->IconValue());
+ }
+ else
+ {
+ assert(!data->isContained());
+ emitIns_S_R(ins, attr, data->gtRegNum, varNode->GetLclNum(), 0);
+ }
+ codeGen->genUpdateLife(varNode);
+ }
+ return;
+
+ default:
+ unreached();
+ }
+
+ dispIns(id);
+ emitCurIGsize += sz;
+}
+
+CORINFO_FIELD_HANDLE emitter::emitLiteralConst(ssize_t cnsValIn, emitAttr attr /*= EA_8BYTE*/)
+{
+ NYI("emitLiteralConst");
+ return nullptr;
+}
+
+// Generates a float or double data section constant and returns field handle representing
+// the data offset to access the constant. This is called by emitInsBinary() in case
+// of contained float of double constants.
+CORINFO_FIELD_HANDLE emitter::emitFltOrDblConst(GenTreeDblCon* tree, emitAttr attr /*=EA_UNKNOWN*/)
+{
+ if (attr == EA_UNKNOWN)
+ {
+ attr = emitTypeSize(tree->TypeGet());
+ }
+ else
+ {
+ assert(emitTypeSize(tree->TypeGet()) == attr);
+ }
+
+ double constValue = tree->gtDblCon.gtDconVal;
+ void* cnsAddr;
+ float f;
+ bool dblAlign;
+
+ if (attr == EA_4BYTE)
+ {
+ f = forceCastToFloat(constValue);
+ cnsAddr = &f;
+ dblAlign = false;
+ }
+ else
+ {
+ cnsAddr = &constValue;
+ dblAlign = true;
+ }
+
+ // Access to inline data is 'abstracted' by a special type of static member
+ // (produced by eeFindJitDataOffs) which the emitter recognizes as being a reference
+ // to constant data, not a real static field.
+
+ UNATIVE_OFFSET cnsSize = (attr == EA_4BYTE) ? 4 : 8;
+ UNATIVE_OFFSET cnum = emitDataConst(cnsAddr, cnsSize, dblAlign);
+ return emitComp->eeFindJitDataOffs(cnum);
+}
+
+// The callee must call genConsumeReg() for all sources, including address registers
+// of both source and destination, and genProduceReg() for the destination register, if any.
+
+regNumber emitter::emitInsBinary(instruction ins, emitAttr attr, GenTree* dst, GenTree* src)
+{
+ // dst can only be a reg or modrm
+ assert(!dst->isContained() || dst->isContainedMemoryOp() ||
+ instrIs3opImul(ins)); // dst on these isn't really the dst
+
+#ifdef DEBUG
+ // src can be anything but both src and dst cannot be addr modes
+ // or at least cannot be contained addr modes
+ if (dst->isContainedMemoryOp())
+ {
+ assert(!src->isContainedMemoryOp());
+ }
+
+ if (src->isContainedMemoryOp())
+ {
+ assert(!dst->isContainedMemoryOp());
+ }
+#endif
+
+ // find which operand is a memory op (if any)
+ // and what its base is
+ GenTreeIndir* mem = nullptr;
+ GenTree* memBase = nullptr;
+
+ if (dst->isContainedIndir())
+ {
+ mem = dst->AsIndir();
+ }
+ else if (src->isContainedIndir())
+ {
+ mem = src->AsIndir();
+ }
+
+ if (mem)
+ {
+ memBase = mem->gtOp1;
+ }
+
+ // Find immed (if any) - it cannot be the dst
+ // SSE2 instructions allow only the second operand to be a memory operand.
+ GenTreeIntConCommon* intConst = nullptr;
+ GenTreeDblCon* dblConst = nullptr;
+ if (src->isContainedIntOrIImmed())
+ {
+ intConst = src->AsIntConCommon();
+ }
+ else if (src->isContainedFltOrDblImmed())
+ {
+ dblConst = src->AsDblCon();
+ }
+
+ // find local field if any
+ GenTreeLclFld* lclField = nullptr;
+ if (src->isContainedLclField())
+ {
+ lclField = src->AsLclFld();
+ }
+ else if (dst->isLclField() && dst->gtRegNum == REG_NA)
+ {
+ lclField = dst->AsLclFld();
+ }
+
+ // find contained lcl var if any
+ GenTreeLclVar* lclVar = nullptr;
+ if (src->isContainedLclVar())
+ {
+ assert(src->IsRegOptional());
+ lclVar = src->AsLclVar();
+ }
+ else if (dst->isContainedLclVar())
+ {
+ assert(dst->IsRegOptional());
+ lclVar = dst->AsLclVar();
+ }
+
+ // find contained spill tmp if any
+ TempDsc* tmpDsc = nullptr;
+ if (src->isContainedSpillTemp())
+ {
+ assert(src->IsRegOptional());
+ tmpDsc = codeGen->getSpillTempDsc(src);
+ }
+ else if (dst->isContainedSpillTemp())
+ {
+ assert(dst->IsRegOptional());
+ tmpDsc = codeGen->getSpillTempDsc(dst);
+ }
+
+ // First handle the simple non-memory cases
+ //
+ if ((mem == nullptr) && (lclField == nullptr) && (lclVar == nullptr) && (tmpDsc == nullptr))
+ {
+ if (intConst != nullptr)
+ {
+ // reg, immed
+ assert(!dst->isContained());
+
+ emitIns_R_I(ins, attr, dst->gtRegNum, intConst->IconValue());
+ // TODO-XArch-Bug?: does the caller call regTracker.rsTrackRegTrash(dst->gtRegNum) or
+ // rsTrackRegIntCns(dst->gtRegNum, intConst->IconValue()) (as appropriate)?
+ }
+ else if (dblConst != nullptr)
+ {
+ // Emit a data section constant for float or double constant.
+ CORINFO_FIELD_HANDLE hnd = emitFltOrDblConst(dblConst);
+
+ emitIns_R_C(ins, attr, dst->gtRegNum, hnd, 0);
+ }
+ else
+ {
+ // reg, reg
+ assert(!src->isContained() && !dst->isContained());
+
+ if (instrHasImplicitRegPairDest(ins))
+ {
+ emitIns_R(ins, attr, src->gtRegNum);
+ }
+ else
+ {
+ emitIns_R_R(ins, attr, dst->gtRegNum, src->gtRegNum);
+ }
+ // ToDo-XArch-Bug?: does the caller call regTracker.rsTrackRegTrash(dst->gtRegNum) or, for ins=MOV:
+ // regTracker.rsTrackRegCopy(dst->gtRegNum, src->gtRegNum); ?
+ }
+
+ return dst->gtRegNum;
+ }
+
+ // Next handle the cases where we have a stack based local memory operand.
+ //
+ unsigned varNum = BAD_VAR_NUM;
+ unsigned offset = (unsigned)-1;
+
+ if (lclField != nullptr)
+ {
+ varNum = lclField->AsLclVarCommon()->GetLclNum();
+ offset = lclField->gtLclFld.gtLclOffs;
+ }
+ else if (lclVar != nullptr)
+ {
+ varNum = lclVar->AsLclVarCommon()->GetLclNum();
+ offset = 0;
+ }
+ else if (tmpDsc != nullptr)
+ {
+ varNum = tmpDsc->tdTempNum();
+ offset = 0;
+ }
+
+ // Spill temp numbers are negative and start with -1
+ // which also happens to be BAD_VAR_NUM. For this reason
+ // we also need to check 'tmpDsc != nullptr' here.
+ if (varNum != BAD_VAR_NUM || tmpDsc != nullptr)
+ {
+ // Is the memory op in the source position?
+ if (src->isContainedLclField() || src->isContainedLclVar() || src->isContainedSpillTemp())
+ {
+ if (instrHasImplicitRegPairDest(ins))
+ {
+ // src is a stack based local variable
+ // dst is implicit - RDX:RAX
+ emitIns_S(ins, attr, varNum, offset);
+ }
+ else
+ {
+ // src is a stack based local variable
+ // dst is a register
+ emitIns_R_S(ins, attr, dst->gtRegNum, varNum, offset);
+ }
+ }
+ else // The memory op is in the dest position.
+ {
+ assert(dst->gtRegNum == REG_NA || dst->IsRegOptional());
+
+ // src could be int or reg
+ if (src->isContainedIntOrIImmed())
+ {
+ // src is an contained immediate
+ // dst is a stack based local variable
+ emitIns_S_I(ins, attr, varNum, offset, (int)src->gtIntConCommon.IconValue());
+ }
+ else
+ {
+ // src is a register
+ // dst is a stack based local variable
+ assert(!src->isContained());
+ emitIns_S_R(ins, attr, src->gtRegNum, varNum, offset);
+ }
+ }
+
+ if (tmpDsc != nullptr)
+ {
+ emitComp->tmpRlsTemp(tmpDsc);
+ }
+
+ return dst->gtRegNum;
+ }
+
+ // Now we are left with only the cases where the instruction has some kind of a memory operand
+ //
+ assert(mem != nullptr);
+
+ // Next handle the class static variable cases
+ //
+ if (memBase->OperGet() == GT_CLS_VAR_ADDR)
+ {
+ // Is the memory op in the source position?
+ if (mem == src)
+ {
+ if (instrHasImplicitRegPairDest(ins))
+ {
+ // src is a class static variable
+ // dst is implicit - RDX:RAX
+ emitIns_C(ins, attr, memBase->gtClsVar.gtClsVarHnd, 0);
+ }
+ else
+ {
+ // src is a class static variable
+ // dst is a register
+ emitIns_R_C(ins, attr, dst->gtRegNum, memBase->gtClsVar.gtClsVarHnd, 0);
+ }
+ }
+ else // The memory op is in the dest position.
+ {
+ if (src->isContained())
+ {
+ // src is an contained immediate
+ // dst is a class static variable
+ emitIns_C_I(ins, attr, memBase->gtClsVar.gtClsVarHnd, 0, (int)src->gtIntConCommon.IconValue());
+ }
+ else
+ {
+ // src is a register
+ // dst is a class static variable
+ emitIns_C_R(ins, attr, memBase->gtClsVar.gtClsVarHnd, src->gtRegNum, 0);
+ }
+ }
+
+ return dst->gtRegNum;
+ }
+
+ // Finally we handle addressing modes case [regBase + regIndex*scale + const]
+ //
+ // We will have to construct and fill in the instruction descriptor for this case
+ //
+ instrDesc* id = nullptr;
+
+ // Is the src an immediate constant?
+ if (intConst)
+ {
+ // [mem], imm
+ id = emitNewInstrAmdCns(attr, mem->Offset(), (int)intConst->IconValue());
+ }
+ else // [mem], reg OR reg, [mem]
+ {
+ size_t offset = mem->Offset();
+ id = emitNewInstrAmd(attr, offset);
+ id->idIns(ins);
+
+ GenTree* regTree = (src == mem) ? dst : src;
+
+ // there must be one non-contained src
+ assert(!regTree->isContained());
+ id->idReg1(regTree->gtRegNum);
+ }
+ assert(id != nullptr);
+
+ id->idIns(ins); // Set the instruction.
+
+ // Determine the instruction format
+ //
+ insFormat fmt = IF_NONE;
+ if (mem == dst)
+ {
+ if (!src->isContained())
+ {
+ fmt = emitInsModeFormat(ins, IF_ARD_RRD);
+ }
+ else
+ {
+ fmt = emitInsModeFormat(ins, IF_ARD_CNS);
+ }
+ }
+ else
+ {
+ assert(!dst->isContained());
+ if (instrHasImplicitRegPairDest(ins))
+ {
+ fmt = emitInsModeFormat(ins, IF_ARD);
+ }
+ else
+ {
+ fmt = emitInsModeFormat(ins, IF_RRD_ARD);
+ }
+ }
+ assert(fmt != IF_NONE);
+ emitHandleMemOp(mem, id, fmt, ins);
+
+ // Determine the instruction size
+ //
+ UNATIVE_OFFSET sz = 0;
+ if (intConst)
+ {
+ sz = emitInsSizeAM(id, insCodeMI(ins), (int)intConst->IconValue());
+ }
+ else
+ {
+ if (mem == dst)
+ {
+ sz = emitInsSizeAM(id, insCodeMR(ins));
+ }
+ else // mem == src
+ {
+ if (instrHasImplicitRegPairDest(ins))
+ {
+ sz = emitInsSizeAM(id, insCode(ins));
+ }
+ else
+ {
+ sz = emitInsSizeAM(id, insCodeRM(ins));
+ }
+ }
+ }
+ assert(sz != 0);
+
+ regNumber result = REG_NA;
+ if (src == mem)
+ {
+ result = dst->gtRegNum;
+ }
+
+ id->idCodeSize(sz);
+
+ dispIns(id);
+ emitCurIGsize += sz;
+
+ return result;
+}
+
+//------------------------------------------------------------------------
+// emitInsRMW: Emit logic for Read-Modify-Write binary instructions.
+//
+// Responsible for emitting a single instruction that will perform an operation of the form:
+// *addr = *addr <BinOp> src
+// For example:
+// ADD [RAX], RCX
+//
+// Arguments:
+// ins - instruction to generate
+// attr - emitter attribute for instruction
+// storeInd - indir for RMW addressing mode
+// src - source operand of instruction
+//
+// Assumptions:
+// Lowering has taken care of recognizing the StoreInd pattern of:
+// StoreInd( AddressTree, BinOp( Ind ( AddressTree ), Operand ) )
+// The address to store is already sitting in a register.
+//
+// Notes:
+// This is a no-produce operation, meaning that no register output will
+// be produced for future use in the code stream.
+//
+void emitter::emitInsRMW(instruction ins, emitAttr attr, GenTreeStoreInd* storeInd, GenTree* src)
+{
+ GenTreePtr addr = storeInd->Addr();
+ addr = addr->gtSkipReloadOrCopy();
+ assert(addr->OperGet() == GT_LCL_VAR || addr->OperGet() == GT_LCL_VAR_ADDR || addr->OperGet() == GT_LEA ||
+ addr->OperGet() == GT_CLS_VAR_ADDR || addr->OperGet() == GT_CNS_INT);
+
+ instrDesc* id = nullptr;
+ UNATIVE_OFFSET sz;
+
+ size_t offset = 0;
+ if (addr->OperGet() != GT_CLS_VAR_ADDR)
+ {
+ offset = storeInd->Offset();
+ }
+
+ if (src->isContainedIntOrIImmed())
+ {
+ GenTreeIntConCommon* intConst = src->AsIntConCommon();
+ id = emitNewInstrAmdCns(attr, offset, (int)intConst->IconValue());
+ emitHandleMemOp(storeInd, id, IF_ARW_CNS, ins);
+ id->idIns(ins);
+ sz = emitInsSizeAM(id, insCodeMI(ins), (int)intConst->IconValue());
+ }
+ else
+ {
+ assert(!src->isContained()); // there must be one non-contained src
+
+ // ind, reg
+ id = emitNewInstrAmd(attr, offset);
+ emitHandleMemOp(storeInd, id, IF_ARW_RRD, ins);
+ id->idReg1(src->gtRegNum);
+ id->idIns(ins);
+ sz = emitInsSizeAM(id, insCodeMR(ins));
+ }
+
+ id->idCodeSize(sz);
+
+ dispIns(id);
+ emitCurIGsize += sz;
+}
+
+//------------------------------------------------------------------------
+// emitInsRMW: Emit logic for Read-Modify-Write unary instructions.
+//
+// Responsible for emitting a single instruction that will perform an operation of the form:
+// *addr = UnaryOp *addr
+// For example:
+// NOT [RAX]
+//
+// Arguments:
+// ins - instruction to generate
+// attr - emitter attribute for instruction
+// storeInd - indir for RMW addressing mode
+//
+// Assumptions:
+// Lowering has taken care of recognizing the StoreInd pattern of:
+// StoreInd( AddressTree, UnaryOp( Ind ( AddressTree ) ) )
+// The address to store is already sitting in a register.
+//
+// Notes:
+// This is a no-produce operation, meaning that no register output will
+// be produced for future use in the code stream.
+//
+void emitter::emitInsRMW(instruction ins, emitAttr attr, GenTreeStoreInd* storeInd)
+{
+ GenTreePtr addr = storeInd->Addr();
+ addr = addr->gtSkipReloadOrCopy();
+ assert(addr->OperGet() == GT_LCL_VAR || addr->OperGet() == GT_LCL_VAR_ADDR || addr->OperGet() == GT_CLS_VAR_ADDR ||
+ addr->OperGet() == GT_LEA || addr->OperGet() == GT_CNS_INT);
+
+ size_t offset = 0;
+ if (addr->OperGet() != GT_CLS_VAR_ADDR)
+ {
+ offset = storeInd->Offset();
+ }
+
+ instrDesc* id = emitNewInstrAmd(attr, offset);
+ emitHandleMemOp(storeInd, id, IF_ARW, ins);
+ id->idIns(ins);
+ UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeMR(ins));
+ id->idCodeSize(sz);
+
+ dispIns(id);
+ emitCurIGsize += sz;
+}
+
+#endif // !LEGACY_BACKEND
+
+#if FEATURE_STACK_FP_X87
+/*****************************************************************************
+ *
+ * Add an instruction of the form "op ST(0),ST(n)".
+ */
+
+void emitter::emitIns_F0_F(instruction ins, unsigned fpreg)
+{
+ UNATIVE_OFFSET sz = 2;
+ instrDesc* id = emitNewInstr();
+ insFormat fmt = emitInsModeFormat(ins, IF_TRD_FRD);
+
+ id->idIns(ins);
+ id->idInsFmt(fmt);
+ id->idReg1((regNumber)fpreg);
+ id->idCodeSize(sz);
+
+ dispIns(id);
+ emitCurIGsize += sz;
+}
+
+/*****************************************************************************
+ *
+ * Add an instruction of the form "op ST(n),ST(0)".
+ */
+
+void emitter::emitIns_F_F0(instruction ins, unsigned fpreg)
+{
+ UNATIVE_OFFSET sz = 2;
+ instrDesc* id = emitNewInstr();
+ insFormat fmt = emitInsModeFormat(ins, IF_FRD_TRD);
+
+ id->idIns(ins);
+ id->idInsFmt(fmt);
+ id->idReg1((regNumber)fpreg);
+ id->idCodeSize(sz);
+
+ dispIns(id);
+ emitCurIGsize += sz;
+}
+#endif // FEATURE_STACK_FP_X87
+
+/*****************************************************************************
+ *
+ * Add an instruction referencing a single register.
+ */
+
+void emitter::emitIns_R(instruction ins, emitAttr attr, regNumber reg)
+{
+ emitAttr size = EA_SIZE(attr);
+
+ assert(size <= EA_PTRSIZE);
+ noway_assert(emitVerifyEncodable(ins, size, reg));
+
+ UNATIVE_OFFSET sz;
+ instrDesc* id = emitNewInstrTiny(attr);
+
+ switch (ins)
+ {
+ case INS_inc:
+ case INS_dec:
+#ifdef _TARGET_AMD64_
+
+ sz = 2; // x64 has no 1-byte opcode (it is the same encoding as the REX prefix)
+
+#else // !_TARGET_AMD64_
+
+ if (size == EA_1BYTE)
+ sz = 2; // Use the long form as the small one has no 'w' bit
+ else
+ sz = 1; // Use short form
+
+#endif // !_TARGET_AMD64_
+
+ break;
+
+ case INS_pop:
+ case INS_pop_hide:
+ case INS_push:
+ case INS_push_hide:
+
+ /* We don't currently push/pop small values */
+
+ assert(size == EA_PTRSIZE);
+
+ sz = 1;
+ break;
+
+ default:
+
+ /* All the sixteen INS_setCCs are contiguous. */
+
+ if (INS_seto <= ins && ins <= INS_setg)
+ {
+ // Rough check that we used the endpoints for the range check
+
+ assert(INS_seto + 0xF == INS_setg);
+
+ // The caller must specify EA_1BYTE for 'attr'
+
+ assert(attr == EA_1BYTE);
+
+ /* We expect this to always be a 'big' opcode */
+
+ assert(insEncodeMRreg(ins, reg, attr, insCodeMR(ins)) & 0x00FF0000);
+
+ size = attr;
+
+ sz = 3;
+ break;
+ }
+ else
+ {
+ sz = 2;
+ break;
+ }
+ }
+ insFormat fmt = emitInsModeFormat(ins, IF_RRD);
+
+ id->idIns(ins);
+ id->idInsFmt(fmt);
+ id->idReg1(reg);
+
+ // 16-bit operand instructions will need a prefix.
+ // This refers to 66h size prefix override.
+ if (size == EA_2BYTE)
+ {
+ sz += 1;
+ }
+
+ // Vex bytes
+ sz += emitGetVexPrefixAdjustedSize(ins, attr, insEncodeMRreg(ins, reg, attr, insCodeMR(ins)));
+
+ // REX byte
+ if (IsExtendedReg(reg, attr) || TakesRexWPrefix(ins, attr))
+ {
+ sz += emitGetRexPrefixSize(ins);
+ }
+
+ id->idCodeSize(sz);
+
+ dispIns(id);
+ emitCurIGsize += sz;
+
+#if !FEATURE_FIXED_OUT_ARGS
+
+ if (ins == INS_push)
+ {
+ emitCurStackLvl += emitCntStackDepth;
+
+ if (emitMaxStackDepth < emitCurStackLvl)
+ emitMaxStackDepth = emitCurStackLvl;
+ }
+ else if (ins == INS_pop)
+ {
+ emitCurStackLvl -= emitCntStackDepth;
+ assert((int)emitCurStackLvl >= 0);
+ }
+
+#endif // !FEATURE_FIXED_OUT_ARGS
+}
+
+/*****************************************************************************
+ *
+ * Add an instruction referencing a register and a constant.
+ */
+
+void emitter::emitIns_R_I(instruction ins, emitAttr attr, regNumber reg, ssize_t val)
+{
+ emitAttr size = EA_SIZE(attr);
+
+ // Allow emitting SSE2/AVX SIMD instructions of R_I form that can specify EA_16BYTE or EA_32BYTE
+ assert(size <= EA_PTRSIZE || IsSSEOrAVXInstruction(ins));
+
+ noway_assert(emitVerifyEncodable(ins, size, reg));
+
+#ifdef _TARGET_AMD64_
+ // mov reg, imm64 is the only opcode which takes a full 8 byte immediate
+ // all other opcodes take a sign-extended 4-byte immediate
+ noway_assert(size < EA_8BYTE || ins == INS_mov || ((int)val == val && !EA_IS_CNS_RELOC(attr)));
+#endif
+
+ UNATIVE_OFFSET sz;
+ instrDesc* id;
+ insFormat fmt = emitInsModeFormat(ins, IF_RRD_CNS);
+ bool valInByte = ((signed char)val == val) && (ins != INS_mov) && (ins != INS_test);
+
+ // Figure out the size of the instruction
+ switch (ins)
+ {
+ case INS_mov:
+#ifdef _TARGET_AMD64_
+ // mov reg, imm64 is equivalent to mov reg, imm32 if the high order bits are all 0
+ // and this isn't a reloc constant.
+ if (((size > EA_4BYTE) && (0 == (val & 0xFFFFFFFF00000000LL))) && !EA_IS_CNS_RELOC(attr))
+ {
+ attr = size = EA_4BYTE;
+ }
+
+ if (size > EA_4BYTE)
+ {
+ sz = 9; // Really it is 10, but we'll add one more later
+ break;
+ }
+#endif // _TARGET_AMD64_
+ sz = 5;
+ break;
+
+ case INS_rcl_N:
+ case INS_rcr_N:
+ case INS_rol_N:
+ case INS_ror_N:
+ case INS_shl_N:
+ case INS_shr_N:
+ case INS_sar_N:
+ assert(val != 1);
+ fmt = IF_RRW_SHF;
+ sz = 3;
+ val &= 0x7F;
+ valInByte = true; // shift amount always placed in a byte
+ break;
+
+ default:
+
+ if (EA_IS_CNS_RELOC(attr))
+ {
+ valInByte = false; // relocs can't be placed in a byte
+ }
+
+ if (valInByte)
+ {
+ if (IsSSEOrAVXInstruction(ins))
+ {
+ sz = 5;
+ }
+ else
+ {
+ sz = 3;
+ }
+ }
+ else
+ {
+ if (reg == REG_EAX && !instrIs3opImul(ins))
+ {
+ sz = 1;
+ }
+ else
+ {
+ sz = 2;
+ }
+
+#ifdef _TARGET_AMD64_
+ if (size > EA_4BYTE)
+ {
+ // We special-case anything that takes a full 8-byte constant.
+ sz += 4;
+ }
+ else
+#endif // _TARGET_AMD64_
+ {
+ sz += EA_SIZE_IN_BYTES(attr);
+ }
+ }
+ break;
+ }
+
+ // Vex prefix size
+ sz += emitGetVexPrefixSize(ins, attr);
+
+ // Do we need a REX prefix for AMD64? We need one if we are using any extended register (REX.R), or if we have a
+ // 64-bit sized operand (REX.W). Note that IMUL in our encoding is special, with a "built-in", implicit, target
+ // register. So we also need to check if that built-in register is an extended register.
+ if (IsExtendedReg(reg, attr) || TakesRexWPrefix(ins, size) || instrIsExtendedReg3opImul(ins))
+ {
+ sz += emitGetRexPrefixSize(ins);
+ }
+
+#ifdef _TARGET_X86_
+ assert(reg < 8);
+#endif
+
+ id = emitNewInstrSC(attr, val);
+ id->idIns(ins);
+ id->idInsFmt(fmt);
+ id->idReg1(reg);
+
+ // 16-bit operand instructions will need a prefix
+ if (size == EA_2BYTE)
+ {
+ sz += 1;
+ }
+
+ id->idCodeSize(sz);
+
+ dispIns(id);
+ emitCurIGsize += sz;
+
+#if !FEATURE_FIXED_OUT_ARGS
+
+ if (reg == REG_ESP)
+ {
+ if (emitCntStackDepth)
+ {
+ if (ins == INS_sub)
+ {
+ S_UINT32 newStackLvl(emitCurStackLvl);
+ newStackLvl += S_UINT32(val);
+ noway_assert(!newStackLvl.IsOverflow());
+
+ emitCurStackLvl = newStackLvl.Value();
+
+ if (emitMaxStackDepth < emitCurStackLvl)
+ emitMaxStackDepth = emitCurStackLvl;
+ }
+ else if (ins == INS_add)
+ {
+ S_UINT32 newStackLvl = S_UINT32(emitCurStackLvl) - S_UINT32(val);
+ noway_assert(!newStackLvl.IsOverflow());
+
+ emitCurStackLvl = newStackLvl.Value();
+ }
+ }
+ }
+
+#endif // !FEATURE_FIXED_OUT_ARGS
+}
+
+/*****************************************************************************
+ *
+ * Add an instruction referencing an integer constant.
+ */
+
+void emitter::emitIns_I(instruction ins, emitAttr attr, int val)
+{
+ UNATIVE_OFFSET sz;
+ instrDesc* id;
+ bool valInByte = ((signed char)val == val);
+
+#ifdef _TARGET_AMD64_
+ // mov reg, imm64 is the only opcode which takes a full 8 byte immediate
+ // all other opcodes take a sign-extended 4-byte immediate
+ noway_assert(EA_SIZE(attr) < EA_8BYTE || !EA_IS_CNS_RELOC(attr));
+#endif
+
+ if (EA_IS_CNS_RELOC(attr))
+ {
+ valInByte = false; // relocs can't be placed in a byte
+ }
+
+ switch (ins)
+ {
+ case INS_loop:
+ case INS_jge:
+ sz = 2;
+ break;
+
+ case INS_ret:
+ sz = 3;
+ break;
+
+ case INS_push_hide:
+ case INS_push:
+ sz = valInByte ? 2 : 5;
+ break;
+
+ default:
+ NO_WAY("unexpected instruction");
+ }
+
+ id = emitNewInstrSC(attr, val);
+ id->idIns(ins);
+ id->idInsFmt(IF_CNS);
+ id->idCodeSize(sz);
+
+ dispIns(id);
+ emitCurIGsize += sz;
+
+#if !FEATURE_FIXED_OUT_ARGS
+
+ if (ins == INS_push)
+ {
+ emitCurStackLvl += emitCntStackDepth;
+
+ if (emitMaxStackDepth < emitCurStackLvl)
+ emitMaxStackDepth = emitCurStackLvl;
+ }
+
+#endif // !FEATURE_FIXED_OUT_ARGS
+}
+
+/*****************************************************************************
+ *
+ * Add a "jump through a table" instruction.
+ */
+
+void emitter::emitIns_IJ(emitAttr attr, regNumber reg, unsigned base)
+{
+ assert(EA_SIZE(attr) == EA_4BYTE);
+
+ UNATIVE_OFFSET sz = 3 + 4;
+ const instruction ins = INS_i_jmp;
+
+ if (IsExtendedReg(reg, attr))
+ {
+ sz += emitGetRexPrefixSize(ins);
+ }
+
+ instrDesc* id = emitNewInstrAmd(attr, base);
+
+ id->idIns(ins);
+ id->idInsFmt(IF_ARD);
+ id->idAddr()->iiaAddrMode.amBaseReg = REG_NA;
+ id->idAddr()->iiaAddrMode.amIndxReg = reg;
+ id->idAddr()->iiaAddrMode.amScale = emitter::OPSZP;
+
+#ifdef DEBUG
+ id->idDebugOnlyInfo()->idMemCookie = base;
+#endif
+
+ id->idCodeSize(sz);
+
+ dispIns(id);
+ emitCurIGsize += sz;
+}
+
+/*****************************************************************************
+ *
+ * Add an instruction with a static data member operand. If 'size' is 0, the
+ * instruction operates on the address of the static member instead of its
+ * value (e.g. "push offset clsvar", rather than "push dword ptr [clsvar]").
+ */
+
+void emitter::emitIns_C(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fldHnd, int offs)
+{
+#if RELOC_SUPPORT
+ // Static always need relocs
+ if (!jitStaticFldIsGlobAddr(fldHnd))
+ {
+ attr = EA_SET_FLG(attr, EA_DSP_RELOC_FLG);
+ }
+#endif
+
+ UNATIVE_OFFSET sz;
+ instrDesc* id;
+
+ /* Are we pushing the offset of the class variable? */
+
+ if (EA_IS_OFFSET(attr))
+ {
+ assert(ins == INS_push);
+ sz = 1 + sizeof(void*);
+
+ id = emitNewInstrDsp(EA_1BYTE, offs);
+ id->idIns(ins);
+ id->idInsFmt(IF_MRD_OFF);
+ }
+ else
+ {
+#if FEATURE_STACK_FP_X87
+ insFormat fmt = emitInsModeFormat(ins, IF_MRD, IF_TRD_MRD, IF_MWR_TRD);
+#else // !FEATURE_STACK_FP_X87
+ insFormat fmt = emitInsModeFormat(ins, IF_MRD);
+#endif // !FEATURE_STACK_FP_X87
+
+ id = emitNewInstrDsp(attr, offs);
+ id->idIns(ins);
+ id->idInsFmt(fmt);
+ sz = emitInsSizeCV(id, insCodeMR(ins));
+ }
+
+ // Vex prefix size
+ sz += emitGetVexPrefixAdjustedSize(ins, attr, insCodeMR(ins));
+
+ if (TakesRexWPrefix(ins, attr))
+ {
+ // REX.W prefix
+ sz += emitGetRexPrefixSize(ins);
+ }
+
+ id->idAddr()->iiaFieldHnd = fldHnd;
+
+ id->idCodeSize(sz);
+
+ dispIns(id);
+ emitCurIGsize += sz;
+
+#if !FEATURE_FIXED_OUT_ARGS
+
+ if (ins == INS_push)
+ {
+ emitCurStackLvl += emitCntStackDepth;
+
+ if (emitMaxStackDepth < emitCurStackLvl)
+ emitMaxStackDepth = emitCurStackLvl;
+ }
+ else if (ins == INS_pop)
+ {
+ emitCurStackLvl -= emitCntStackDepth;
+ assert((int)emitCurStackLvl >= 0);
+ }
+
+#endif // !FEATURE_FIXED_OUT_ARGS
+}
+
+/*****************************************************************************
+ *
+ * Add an instruction with two register operands.
+ */
+
+void emitter::emitIns_R_R(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2)
+{
+ emitAttr size = EA_SIZE(attr);
+
+ /* We don't want to generate any useless mov instructions! */
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef _TARGET_AMD64_
+ // Same-reg 4-byte mov can be useful because it performs a
+ // zero-extension to 8 bytes.
+ assert(ins != INS_mov || reg1 != reg2 || size == EA_4BYTE);
+#else
+ assert(ins != INS_mov || reg1 != reg2);
+#endif // _TARGET_AMD64_
+
+ assert(size <= EA_32BYTE);
+ noway_assert(emitVerifyEncodable(ins, size, reg1, reg2));
+
+ UNATIVE_OFFSET sz = emitInsSizeRR(ins, reg1, reg2, attr);
+
+ /* Special case: "XCHG" uses a different format */
+ insFormat fmt = (ins == INS_xchg) ? IF_RRW_RRW : emitInsModeFormat(ins, IF_RRD_RRD);
+
+ instrDesc* id = emitNewInstrTiny(attr);
+ id->idIns(ins);
+ id->idInsFmt(fmt);
+ id->idReg1(reg1);
+ id->idReg2(reg2);
+ id->idCodeSize(sz);
+
+ dispIns(id);
+ emitCurIGsize += sz;
+}
+
+/*****************************************************************************
+ *
+ * Add an instruction with two register operands and an integer constant.
+ */
+
+void emitter::emitIns_R_R_I(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, int ival)
+{
+ // SSE2 version requires 5 bytes and AVX version 6 bytes
+ UNATIVE_OFFSET sz = 4;
+ if (IsSSEOrAVXInstruction(ins))
+ {
+ sz = UseAVX() ? 6 : 5;
+ }
+
+#ifdef _TARGET_AMD64_
+ // mov reg, imm64 is the only opcode which takes a full 8 byte immediate
+ // all other opcodes take a sign-extended 4-byte immediate
+ noway_assert(EA_SIZE(attr) < EA_8BYTE || !EA_IS_CNS_RELOC(attr));
+#endif
+
+ instrDesc* id = emitNewInstrSC(attr, ival);
+
+ // REX prefix
+ if (IsExtendedReg(reg1, attr) || IsExtendedReg(reg2, attr))
+ {
+ sz += emitGetRexPrefixSize(ins);
+ }
+
+ id->idIns(ins);
+ id->idInsFmt(IF_RRW_RRW_CNS);
+ id->idReg1(reg1);
+ id->idReg2(reg2);
+ id->idCodeSize(sz);
+
+ dispIns(id);
+ emitCurIGsize += sz;
+}
+#ifdef FEATURE_AVX_SUPPORT
+/*****************************************************************************
+*
+* Add an instruction with three register operands.
+*/
+
+void emitter::emitIns_R_R_R(instruction ins, emitAttr attr, regNumber targetReg, regNumber reg1, regNumber reg2)
+{
+ assert(IsSSEOrAVXInstruction(ins));
+ assert(IsThreeOperandAVXInstruction(ins));
+ // Currently vex prefix only use three bytes mode.
+ // size = vex + opcode + ModR/M = 3 + 1 + 1 = 5
+ // TODO-XArch-CQ: We should create function which can calculate all kinds of AVX instructions size in future
+ UNATIVE_OFFSET sz = 5;
+
+ instrDesc* id = emitNewInstr(attr);
+ id->idIns(ins);
+ id->idInsFmt(IF_RWR_RRD_RRD);
+ id->idReg1(targetReg);
+ id->idReg2(reg1);
+ id->idReg3(reg2);
+
+ id->idCodeSize(sz);
+ dispIns(id);
+ emitCurIGsize += sz;
+}
+
+#endif
+/*****************************************************************************
+ *
+ * Add an instruction with a register + static member operands.
+ */
+void emitter::emitIns_R_C(instruction ins, emitAttr attr, regNumber reg, CORINFO_FIELD_HANDLE fldHnd, int offs)
+{
+#if RELOC_SUPPORT
+ // Static always need relocs
+ if (!jitStaticFldIsGlobAddr(fldHnd))
+ {
+ attr = EA_SET_FLG(attr, EA_DSP_RELOC_FLG);
+ }
+#endif
+
+ emitAttr size = EA_SIZE(attr);
+
+ assert(size <= EA_32BYTE);
+ noway_assert(emitVerifyEncodable(ins, size, reg));
+
+ UNATIVE_OFFSET sz;
+ instrDesc* id;
+
+ // Are we MOV'ing the offset of the class variable into EAX?
+ if (EA_IS_OFFSET(attr))
+ {
+ id = emitNewInstrDsp(EA_1BYTE, offs);
+ id->idIns(ins);
+ id->idInsFmt(IF_RWR_MRD_OFF);
+
+ assert(ins == INS_mov && reg == REG_EAX);
+
+ // Special case: "mov eax, [addr]" is smaller
+ sz = 1 + sizeof(void*);
+ }
+ else
+ {
+ insFormat fmt = emitInsModeFormat(ins, IF_RRD_MRD);
+
+ id = emitNewInstrDsp(attr, offs);
+ id->idIns(ins);
+ id->idInsFmt(fmt);
+
+#ifdef _TARGET_X86_
+ // Special case: "mov eax, [addr]" is smaller.
+ // This case is not enabled for amd64 as it always uses RIP relative addressing
+ // and it results in smaller instruction size than encoding 64-bit addr in the
+ // instruction.
+ if (ins == INS_mov && reg == REG_EAX)
+ {
+ sz = 1 + sizeof(void*);
+ if (size == EA_2BYTE)
+ sz += 1;
+ }
+ else
+#endif //_TARGET_X86_
+ {
+ sz = emitInsSizeCV(id, insCodeRM(ins));
+ }
+
+ // Special case: mov reg, fs:[ddd]
+ if (fldHnd == FLD_GLOBAL_FS)
+ {
+ sz += 1;
+ }
+ }
+
+ // VEX prefix
+ sz += emitGetVexPrefixAdjustedSize(ins, attr, insCodeRM(ins));
+
+ // REX prefix
+ if (TakesRexWPrefix(ins, attr) || IsExtendedReg(reg, attr))
+ {
+ sz += emitGetRexPrefixSize(ins);
+ }
+
+ id->idReg1(reg);
+ id->idCodeSize(sz);
+
+ id->idAddr()->iiaFieldHnd = fldHnd;
+
+ dispIns(id);
+ emitCurIGsize += sz;
+}
+
+/*****************************************************************************
+ *
+ * Add an instruction with a static member + register operands.
+ */
+
+void emitter::emitIns_C_R(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fldHnd, regNumber reg, int offs)
+{
+#if RELOC_SUPPORT
+ // Static always need relocs
+ if (!jitStaticFldIsGlobAddr(fldHnd))
+ {
+ attr = EA_SET_FLG(attr, EA_DSP_RELOC_FLG);
+ }
+#endif
+
+ emitAttr size = EA_SIZE(attr);
+
+#if defined(_TARGET_X86_) && !FEATURE_STACK_FP_X87
+ // For x86 RyuJIT it is valid to storeind a double sized operand in an xmm reg to memory
+ assert(size <= EA_8BYTE);
+#else
+ assert(size <= EA_PTRSIZE);
+#endif
+
+ noway_assert(emitVerifyEncodable(ins, size, reg));
+
+ instrDesc* id = emitNewInstrDsp(attr, offs);
+ insFormat fmt = emitInsModeFormat(ins, IF_MRD_RRD);
+
+ id->idIns(ins);
+ id->idInsFmt(fmt);
+
+ UNATIVE_OFFSET sz;
+
+#ifdef _TARGET_X86_
+ // Special case: "mov [addr], EAX" is smaller.
+ // This case is not enable for amd64 as it always uses RIP relative addressing
+ // and it will result in smaller instruction size than encoding 64-bit addr in
+ // the instruction.
+ if (ins == INS_mov && reg == REG_EAX)
+ {
+ sz = 1 + sizeof(void*);
+ if (size == EA_2BYTE)
+ sz += 1;
+ }
+ else
+#endif //_TARGET_X86_
+ {
+ sz = emitInsSizeCV(id, insCodeMR(ins));
+ }
+
+ // Special case: mov reg, fs:[ddd]
+ if (fldHnd == FLD_GLOBAL_FS)
+ {
+ sz += 1;
+ }
+
+ // VEX prefix
+ sz += emitGetVexPrefixAdjustedSize(ins, attr, insCodeMR(ins));
+
+ // REX prefix
+ if (TakesRexWPrefix(ins, attr) || IsExtendedReg(reg, attr))
+ {
+ sz += emitGetRexPrefixSize(ins);
+ }
+
+ id->idReg1(reg);
+ id->idCodeSize(sz);
+
+ id->idAddr()->iiaFieldHnd = fldHnd;
+
+ dispIns(id);
+ emitCurIGsize += sz;
+}
+
+/*****************************************************************************
+ *
+ * Add an instruction with a static member + constant.
+ */
+
+void emitter::emitIns_C_I(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fldHnd, int offs, int val)
+{
+#if RELOC_SUPPORT
+ // Static always need relocs
+ if (!jitStaticFldIsGlobAddr(fldHnd))
+ {
+ attr = EA_SET_FLG(attr, EA_DSP_RELOC_FLG);
+ }
+#endif
+
+ insFormat fmt;
+
+ switch (ins)
+ {
+ case INS_rcl_N:
+ case INS_rcr_N:
+ case INS_rol_N:
+ case INS_ror_N:
+ case INS_shl_N:
+ case INS_shr_N:
+ case INS_sar_N:
+ assert(val != 1);
+ fmt = IF_MRW_SHF;
+ val &= 0x7F;
+ break;
+
+ default:
+ fmt = emitInsModeFormat(ins, IF_MRD_CNS);
+ break;
+ }
+
+ instrDesc* id = emitNewInstrCnsDsp(attr, val, offs);
+ id->idIns(ins);
+ id->idInsFmt(fmt);
+
+ size_t code = insCodeMI(ins);
+ UNATIVE_OFFSET sz = emitInsSizeCV(id, code, val);
+
+#ifdef _TARGET_AMD64_
+ // Vex prefix
+ sz += emitGetVexPrefixAdjustedSize(ins, attr, insCodeMI(ins));
+
+ // REX prefix, if not already included in "code"
+ if (TakesRexWPrefix(ins, attr) && (code & REX_PREFIX_MASK) == 0)
+ {
+ sz += emitGetRexPrefixSize(ins);
+ }
+#endif // _TARGET_AMD64_
+
+ id->idAddr()->iiaFieldHnd = fldHnd;
+ id->idCodeSize(sz);
+
+ dispIns(id);
+ emitCurIGsize += sz;
+}
+
+void emitter::emitIns_J_S(instruction ins, emitAttr attr, BasicBlock* dst, int varx, int offs)
+{
+ assert(ins == INS_mov);
+ assert(dst->bbFlags & BBF_JMP_TARGET);
+
+ instrDescLbl* id = emitNewInstrLbl();
+
+ id->idIns(ins);
+ id->idInsFmt(IF_SWR_LABEL);
+ id->idAddr()->iiaBBlabel = dst;
+
+ /* The label reference is always long */
+
+ id->idjShort = 0;
+ id->idjKeepLong = 1;
+
+ /* Record the current IG and offset within it */
+
+ id->idjIG = emitCurIG;
+ id->idjOffs = emitCurIGsize;
+
+ /* Append this instruction to this IG's jump list */
+
+ id->idjNext = emitCurIGjmpList;
+ emitCurIGjmpList = id;
+
+ UNATIVE_OFFSET sz = sizeof(INT32) + emitInsSizeSV(insCodeMI(ins), varx, offs);
+ id->dstLclVar.initLclVarAddr(varx, offs);
+#ifdef DEBUG
+ id->idDebugOnlyInfo()->idVarRefOffs = emitVarRefOffs;
+#endif
+
+#if EMITTER_STATS
+ emitTotalIGjmps++;
+#endif
+
+#if RELOC_SUPPORT
+#ifndef _TARGET_AMD64_
+ // Storing the address of a basicBlock will need a reloc
+ // as the instruction uses the absolute address,
+ // not a relative address.
+ //
+ // On Amd64, Absolute code addresses should always go through a reloc to
+ // to be encoded as RIP rel32 offset.
+ if (emitComp->opts.compReloc)
+#endif
+ {
+ id->idSetIsDspReloc();
+ }
+#endif // RELOC_SUPPORT
+
+ id->idCodeSize(sz);
+
+ dispIns(id);
+ emitCurIGsize += sz;
+}
+
+/*****************************************************************************
+ *
+ * Add a label instruction.
+ */
+void emitter::emitIns_R_L(instruction ins, emitAttr attr, BasicBlock* dst, regNumber reg)
+{
+ assert(ins == INS_lea);
+ assert(dst->bbFlags & BBF_JMP_TARGET);
+
+ instrDescJmp* id = emitNewInstrJmp();
+
+ id->idIns(ins);
+ id->idReg1(reg);
+ id->idInsFmt(IF_RWR_LABEL);
+ id->idOpSize(EA_SIZE(attr)); // emitNewInstrJmp() sets the size (incorrectly) to EA_1BYTE
+ id->idAddr()->iiaBBlabel = dst;
+
+ /* The label reference is always long */
+
+ id->idjShort = 0;
+ id->idjKeepLong = 1;
+
+ /* Record the current IG and offset within it */
+
+ id->idjIG = emitCurIG;
+ id->idjOffs = emitCurIGsize;
+
+ /* Append this instruction to this IG's jump list */
+
+ id->idjNext = emitCurIGjmpList;
+ emitCurIGjmpList = id;
+
+#ifdef DEBUG
+ // Mark the catch return
+ if (emitComp->compCurBB->bbJumpKind == BBJ_EHCATCHRET)
+ {
+ id->idDebugOnlyInfo()->idCatchRet = true;
+ }
+#endif // DEBUG
+
+#if EMITTER_STATS
+ emitTotalIGjmps++;
+#endif
+
+ UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins));
+ id->idCodeSize(sz);
+
+ // Set the relocation flags - these give hint to zap to perform
+ // relocation of the specified 32bit address.
+ id->idSetRelocFlags(attr);
+
+ dispIns(id);
+ emitCurIGsize += sz;
+}
+
+/*****************************************************************************
+ *
+ * The following adds instructions referencing address modes.
+ */
+
+void emitter::emitIns_I_AR(
+ instruction ins, emitAttr attr, int val, regNumber reg, int disp, int memCookie, void* clsCookie)
+{
+ assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE));
+
+#ifdef _TARGET_AMD64_
+ // mov reg, imm64 is the only opcode which takes a full 8 byte immediate
+ // all other opcodes take a sign-extended 4-byte immediate
+ noway_assert(EA_SIZE(attr) < EA_8BYTE || !EA_IS_CNS_RELOC(attr));
+#endif
+
+ insFormat fmt;
+
+ switch (ins)
+ {
+ case INS_rcl_N:
+ case INS_rcr_N:
+ case INS_rol_N:
+ case INS_ror_N:
+ case INS_shl_N:
+ case INS_shr_N:
+ case INS_sar_N:
+ assert(val != 1);
+ fmt = IF_ARW_SHF;
+ val &= 0x7F;
+ break;
+
+ default:
+ fmt = emitInsModeFormat(ins, IF_ARD_CNS);
+ break;
+ }
+
+ /*
+ Useful if you want to trap moves with 0 constant
+ if (ins == INS_mov && val == 0 && EA_SIZE(attr) >= EA_4BYTE)
+ {
+ printf("MOV 0\n");
+ }
+ */
+
+ UNATIVE_OFFSET sz;
+ instrDesc* id = emitNewInstrAmdCns(attr, disp, val);
+ id->idIns(ins);
+ id->idInsFmt(fmt);
+
+ assert((memCookie == NULL) == (clsCookie == nullptr));
+
+#ifdef DEBUG
+ id->idDebugOnlyInfo()->idMemCookie = memCookie;
+ id->idDebugOnlyInfo()->idClsCookie = clsCookie;
+#endif
+
+ id->idAddr()->iiaAddrMode.amBaseReg = reg;
+ id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
+
+ assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
+
+ sz = emitInsSizeAM(id, insCodeMI(ins), val);
+ id->idCodeSize(sz);
+
+ dispIns(id);
+ emitCurIGsize += sz;
+}
+
+void emitter::emitIns_I_AI(instruction ins, emitAttr attr, int val, ssize_t disp)
+{
+ assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE));
+
+#ifdef _TARGET_AMD64_
+ // mov reg, imm64 is the only opcode which takes a full 8 byte immediate
+ // all other opcodes take a sign-extended 4-byte immediate
+ noway_assert(EA_SIZE(attr) < EA_8BYTE || !EA_IS_CNS_RELOC(attr));
+#endif
+
+ insFormat fmt;
+
+ switch (ins)
+ {
+ case INS_rcl_N:
+ case INS_rcr_N:
+ case INS_rol_N:
+ case INS_ror_N:
+ case INS_shl_N:
+ case INS_shr_N:
+ case INS_sar_N:
+ assert(val != 1);
+ fmt = IF_ARW_SHF;
+ val &= 0x7F;
+ break;
+
+ default:
+ fmt = emitInsModeFormat(ins, IF_ARD_CNS);
+ break;
+ }
+
+ /*
+ Useful if you want to trap moves with 0 constant
+ if (ins == INS_mov && val == 0 && EA_SIZE(attr) >= EA_4BYTE)
+ {
+ printf("MOV 0\n");
+ }
+ */
+
+ UNATIVE_OFFSET sz;
+ instrDesc* id = emitNewInstrAmdCns(attr, disp, val);
+ id->idIns(ins);
+ id->idInsFmt(fmt);
+
+ id->idAddr()->iiaAddrMode.amBaseReg = REG_NA;
+ id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
+
+ assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
+
+ sz = emitInsSizeAM(id, insCodeMI(ins), val);
+ id->idCodeSize(sz);
+
+ dispIns(id);
+ emitCurIGsize += sz;
+}
+
+void emitter::emitIns_R_AR(
+ instruction ins, emitAttr attr, regNumber ireg, regNumber base, int disp, int memCookie, void* clsCookie)
+{
+ assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_32BYTE) && (ireg != REG_NA));
+ noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), ireg));
+
+ if (ins == INS_lea)
+ {
+ if (ireg == base && disp == 0)
+ {
+ // Maybe the emitter is not the common place for this optimization, but it's a better choke point
+ // for all the emitIns(ins, tree), we would have to be analyzing at each call site
+ //
+ return;
+ }
+ }
+
+ UNATIVE_OFFSET sz;
+ instrDesc* id = emitNewInstrAmd(attr, disp);
+ insFormat fmt = emitInsModeFormat(ins, IF_RRD_ARD);
+
+ id->idIns(ins);
+ id->idInsFmt(fmt);
+ id->idReg1(ireg);
+
+ assert((memCookie == NULL) == (clsCookie == nullptr));
+
+#ifdef DEBUG
+ id->idDebugOnlyInfo()->idMemCookie = memCookie;
+ id->idDebugOnlyInfo()->idClsCookie = clsCookie;
+#endif
+
+ id->idAddr()->iiaAddrMode.amBaseReg = base;
+ id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
+
+ assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
+
+ sz = emitInsSizeAM(id, insCodeRM(ins));
+ id->idCodeSize(sz);
+
+ dispIns(id);
+ emitCurIGsize += sz;
+}
+
+void emitter::emitIns_R_AI(instruction ins, emitAttr attr, regNumber ireg, ssize_t disp)
+{
+ assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE) && (ireg != REG_NA));
+ noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), ireg));
+
+ UNATIVE_OFFSET sz;
+ instrDesc* id = emitNewInstrAmd(attr, disp);
+ insFormat fmt = emitInsModeFormat(ins, IF_RRD_ARD);
+
+ id->idIns(ins);
+ id->idInsFmt(fmt);
+ id->idReg1(ireg);
+
+ id->idAddr()->iiaAddrMode.amBaseReg = REG_NA;
+ id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
+
+ assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
+
+ sz = emitInsSizeAM(id, insCodeRM(ins));
+ id->idCodeSize(sz);
+
+ dispIns(id);
+ emitCurIGsize += sz;
+}
+
+void emitter::emitIns_AR_R(
+ instruction ins, emitAttr attr, regNumber ireg, regNumber base, int disp, int memCookie, void* clsCookie)
+{
+ UNATIVE_OFFSET sz;
+ instrDesc* id = emitNewInstrAmd(attr, disp);
+ insFormat fmt;
+
+ if (ireg == REG_NA)
+ {
+#if FEATURE_STACK_FP_X87
+ fmt = emitInsModeFormat(ins, IF_ARD, IF_TRD_ARD, IF_AWR_TRD);
+#else // !FEATURE_STACK_FP_X87
+ fmt = emitInsModeFormat(ins, IF_ARD);
+#endif // !FEATURE_STACK_FP_X87
+ }
+ else
+ {
+ fmt = emitInsModeFormat(ins, IF_ARD_RRD);
+
+ assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_32BYTE));
+ noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), ireg));
+
+ id->idReg1(ireg);
+ }
+
+ id->idIns(ins);
+ id->idInsFmt(fmt);
+
+ assert((memCookie == NULL) == (clsCookie == nullptr));
+
+#ifdef DEBUG
+ id->idDebugOnlyInfo()->idMemCookie = memCookie;
+ id->idDebugOnlyInfo()->idClsCookie = clsCookie;
+#endif
+
+ id->idAddr()->iiaAddrMode.amBaseReg = base;
+ id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
+
+ assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
+
+ sz = emitInsSizeAM(id, insCodeMR(ins));
+ id->idCodeSize(sz);
+
+ dispIns(id);
+ emitCurIGsize += sz;
+
+#if !FEATURE_FIXED_OUT_ARGS
+
+ if (ins == INS_push)
+ {
+ emitCurStackLvl += emitCntStackDepth;
+
+ if (emitMaxStackDepth < emitCurStackLvl)
+ emitMaxStackDepth = emitCurStackLvl;
+ }
+ else if (ins == INS_pop)
+ {
+ emitCurStackLvl -= emitCntStackDepth;
+ assert((int)emitCurStackLvl >= 0);
+ }
+
+#endif // !FEATURE_FIXED_OUT_ARGS
+}
+
+void emitter::emitIns_AI_R(instruction ins, emitAttr attr, regNumber ireg, ssize_t disp)
+{
+ UNATIVE_OFFSET sz;
+ instrDesc* id = emitNewInstrAmd(attr, disp);
+ insFormat fmt;
+
+ if (ireg == REG_NA)
+ {
+#if FEATURE_STACK_FP_X87
+ fmt = emitInsModeFormat(ins, IF_ARD, IF_TRD_ARD, IF_AWR_TRD);
+#else // FEATURE_STACK_FP_X87
+ fmt = emitInsModeFormat(ins, IF_ARD);
+#endif // FEATURE_STACK_FP_X87
+ }
+ else
+ {
+ fmt = emitInsModeFormat(ins, IF_ARD_RRD);
+
+ assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE));
+ noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), ireg));
+
+ id->idReg1(ireg);
+ }
+
+ id->idIns(ins);
+ id->idInsFmt(fmt);
+
+ id->idAddr()->iiaAddrMode.amBaseReg = REG_NA;
+ id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
+
+ assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
+
+ sz = emitInsSizeAM(id, insCodeMR(ins));
+ id->idCodeSize(sz);
+
+ dispIns(id);
+ emitCurIGsize += sz;
+
+#if !FEATURE_FIXED_OUT_ARGS
+
+ if (ins == INS_push)
+ {
+ emitCurStackLvl += emitCntStackDepth;
+
+ if (emitMaxStackDepth < emitCurStackLvl)
+ emitMaxStackDepth = emitCurStackLvl;
+ }
+ else if (ins == INS_pop)
+ {
+ emitCurStackLvl -= emitCntStackDepth;
+ assert((int)emitCurStackLvl >= 0);
+ }
+
+#endif // !FEATURE_FIXED_OUT_ARGS
+}
+
+void emitter::emitIns_I_ARR(instruction ins, emitAttr attr, int val, regNumber reg, regNumber rg2, int disp)
+{
+ assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE));
+
+#ifdef _TARGET_AMD64_
+ // mov reg, imm64 is the only opcode which takes a full 8 byte immediate
+ // all other opcodes take a sign-extended 4-byte immediate
+ noway_assert(EA_SIZE(attr) < EA_8BYTE || !EA_IS_CNS_RELOC(attr));
+#endif
+
+ insFormat fmt;
+
+ switch (ins)
+ {
+ case INS_rcl_N:
+ case INS_rcr_N:
+ case INS_rol_N:
+ case INS_ror_N:
+ case INS_shl_N:
+ case INS_shr_N:
+ case INS_sar_N:
+ assert(val != 1);
+ fmt = IF_ARW_SHF;
+ val &= 0x7F;
+ break;
+
+ default:
+ fmt = emitInsModeFormat(ins, IF_ARD_CNS);
+ break;
+ }
+
+ UNATIVE_OFFSET sz;
+ instrDesc* id = emitNewInstrAmdCns(attr, disp, val);
+ id->idIns(ins);
+ id->idInsFmt(fmt);
+
+ id->idAddr()->iiaAddrMode.amBaseReg = reg;
+ id->idAddr()->iiaAddrMode.amIndxReg = rg2;
+ id->idAddr()->iiaAddrMode.amScale = emitter::OPSZ1;
+
+ assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
+
+ sz = emitInsSizeAM(id, insCodeMI(ins), val);
+ id->idCodeSize(sz);
+
+ dispIns(id);
+ emitCurIGsize += sz;
+}
+
+void emitter::emitIns_R_ARR(instruction ins, emitAttr attr, regNumber ireg, regNumber base, regNumber index, int disp)
+{
+ assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE) && (ireg != REG_NA));
+ noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), ireg));
+
+ UNATIVE_OFFSET sz;
+ instrDesc* id = emitNewInstrAmd(attr, disp);
+ insFormat fmt = emitInsModeFormat(ins, IF_RRD_ARD);
+
+ id->idIns(ins);
+ id->idInsFmt(fmt);
+ id->idReg1(ireg);
+
+ id->idAddr()->iiaAddrMode.amBaseReg = base;
+ id->idAddr()->iiaAddrMode.amIndxReg = index;
+ id->idAddr()->iiaAddrMode.amScale = emitter::OPSZ1;
+
+ assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
+
+ sz = emitInsSizeAM(id, insCodeRM(ins));
+ id->idCodeSize(sz);
+
+ dispIns(id);
+ emitCurIGsize += sz;
+}
+
+void emitter::emitIns_ARR_R(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, regNumber index, int disp)
+{
+ UNATIVE_OFFSET sz;
+ instrDesc* id = emitNewInstrAmd(attr, disp);
+ insFormat fmt;
+
+ if (ireg == REG_NA)
+ {
+#if FEATURE_STACK_FP_X87
+ fmt = emitInsModeFormat(ins, IF_ARD, IF_TRD_ARD, IF_AWR_TRD);
+#else // FEATURE_STACK_FP_X87
+ fmt = emitInsModeFormat(ins, IF_ARD);
+#endif // FEATURE_STACK_FP_X87
+ }
+ else
+ {
+ fmt = emitInsModeFormat(ins, IF_ARD_RRD);
+
+ assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE));
+ noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), ireg));
+
+ id->idReg1(ireg);
+ }
+
+ id->idIns(ins);
+ id->idInsFmt(fmt);
+
+ id->idAddr()->iiaAddrMode.amBaseReg = reg;
+ id->idAddr()->iiaAddrMode.amIndxReg = index;
+ id->idAddr()->iiaAddrMode.amScale = emitEncodeScale(1);
+
+ assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
+
+ sz = emitInsSizeAM(id, insCodeMR(ins));
+ id->idCodeSize(sz);
+
+ dispIns(id);
+ emitCurIGsize += sz;
+
+#if !FEATURE_FIXED_OUT_ARGS
+
+ if (ins == INS_push)
+ {
+ emitCurStackLvl += emitCntStackDepth;
+
+ if (emitMaxStackDepth < emitCurStackLvl)
+ emitMaxStackDepth = emitCurStackLvl;
+ }
+ else if (ins == INS_pop)
+ {
+ emitCurStackLvl -= emitCntStackDepth;
+ assert((int)emitCurStackLvl >= 0);
+ }
+
+#endif // !FEATURE_FIXED_OUT_ARGS
+}
+
+void emitter::emitIns_I_ARX(
+ instruction ins, emitAttr attr, int val, regNumber reg, regNumber rg2, unsigned mul, int disp)
+{
+ assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE));
+
+#ifdef _TARGET_AMD64_
+ // mov reg, imm64 is the only opcode which takes a full 8 byte immediate
+ // all other opcodes take a sign-extended 4-byte immediate
+ noway_assert(EA_SIZE(attr) < EA_8BYTE || !EA_IS_CNS_RELOC(attr));
+#endif
+
+ insFormat fmt;
+
+ switch (ins)
+ {
+ case INS_rcl_N:
+ case INS_rcr_N:
+ case INS_rol_N:
+ case INS_ror_N:
+ case INS_shl_N:
+ case INS_shr_N:
+ case INS_sar_N:
+ assert(val != 1);
+ fmt = IF_ARW_SHF;
+ val &= 0x7F;
+ break;
+
+ default:
+ fmt = emitInsModeFormat(ins, IF_ARD_CNS);
+ break;
+ }
+
+ UNATIVE_OFFSET sz;
+ instrDesc* id = emitNewInstrAmdCns(attr, disp, val);
+
+ id->idIns(ins);
+ id->idInsFmt(fmt);
+
+ id->idAddr()->iiaAddrMode.amBaseReg = reg;
+ id->idAddr()->iiaAddrMode.amIndxReg = rg2;
+ id->idAddr()->iiaAddrMode.amScale = emitEncodeScale(mul);
+
+ assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
+
+ sz = emitInsSizeAM(id, insCodeMI(ins), val);
+ id->idCodeSize(sz);
+
+ dispIns(id);
+ emitCurIGsize += sz;
+}
+
+void emitter::emitIns_R_ARX(
+ instruction ins, emitAttr attr, regNumber ireg, regNumber base, regNumber index, unsigned mul, int disp)
+{
+ assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE) && (ireg != REG_NA));
+ noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), ireg));
+
+ UNATIVE_OFFSET sz;
+ instrDesc* id = emitNewInstrAmd(attr, disp);
+ insFormat fmt = emitInsModeFormat(ins, IF_RRD_ARD);
+
+ id->idIns(ins);
+ id->idInsFmt(fmt);
+ id->idReg1(ireg);
+
+ id->idAddr()->iiaAddrMode.amBaseReg = base;
+ id->idAddr()->iiaAddrMode.amIndxReg = index;
+ id->idAddr()->iiaAddrMode.amScale = emitEncodeScale(mul);
+
+ assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
+
+ sz = emitInsSizeAM(id, insCodeRM(ins));
+ id->idCodeSize(sz);
+
+ dispIns(id);
+ emitCurIGsize += sz;
+}
+
+void emitter::emitIns_ARX_R(
+ instruction ins, emitAttr attr, regNumber ireg, regNumber base, regNumber index, unsigned mul, int disp)
+{
+ UNATIVE_OFFSET sz;
+ instrDesc* id = emitNewInstrAmd(attr, disp);
+ insFormat fmt;
+
+ if (ireg == REG_NA)
+ {
+#if FEATURE_STACK_FP_X87
+ fmt = emitInsModeFormat(ins, IF_ARD, IF_TRD_ARD, IF_AWR_TRD);
+#else // !FEATURE_STACK_FP_X87
+ fmt = emitInsModeFormat(ins, IF_ARD);
+#endif // !FEATURE_STACK_FP_X87
+ }
+ else
+ {
+ fmt = emitInsModeFormat(ins, IF_ARD_RRD);
+
+ noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), ireg));
+ assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE));
+
+ id->idReg1(ireg);
+ }
+
+ id->idIns(ins);
+ id->idInsFmt(fmt);
+
+ id->idAddr()->iiaAddrMode.amBaseReg = base;
+ id->idAddr()->iiaAddrMode.amIndxReg = index;
+ id->idAddr()->iiaAddrMode.amScale = emitEncodeScale(mul);
+
+ assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
+
+ sz = emitInsSizeAM(id, insCodeMR(ins));
+ id->idCodeSize(sz);
+
+ dispIns(id);
+ emitCurIGsize += sz;
+
+#if !FEATURE_FIXED_OUT_ARGS
+
+ if (ins == INS_push)
+ {
+ emitCurStackLvl += emitCntStackDepth;
+
+ if (emitMaxStackDepth < emitCurStackLvl)
+ emitMaxStackDepth = emitCurStackLvl;
+ }
+ else if (ins == INS_pop)
+ {
+ emitCurStackLvl -= emitCntStackDepth;
+ assert((int)emitCurStackLvl >= 0);
+ }
+
+#endif // !FEATURE_FIXED_OUT_ARGS
+}
+
+void emitter::emitIns_I_AX(instruction ins, emitAttr attr, int val, regNumber reg, unsigned mul, int disp)
+{
+ assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE));
+
+#ifdef _TARGET_AMD64_
+ // mov reg, imm64 is the only opcode which takes a full 8 byte immediate
+ // all other opcodes take a sign-extended 4-byte immediate
+ noway_assert(EA_SIZE(attr) < EA_8BYTE || !EA_IS_CNS_RELOC(attr));
+#endif
+
+ insFormat fmt;
+
+ switch (ins)
+ {
+ case INS_rcl_N:
+ case INS_rcr_N:
+ case INS_rol_N:
+ case INS_ror_N:
+ case INS_shl_N:
+ case INS_shr_N:
+ case INS_sar_N:
+ assert(val != 1);
+ fmt = IF_ARW_SHF;
+ val &= 0x7F;
+ break;
+
+ default:
+ fmt = emitInsModeFormat(ins, IF_ARD_CNS);
+ break;
+ }
+
+ UNATIVE_OFFSET sz;
+ instrDesc* id = emitNewInstrAmdCns(attr, disp, val);
+ id->idIns(ins);
+ id->idInsFmt(fmt);
+
+ id->idAddr()->iiaAddrMode.amBaseReg = REG_NA;
+ id->idAddr()->iiaAddrMode.amIndxReg = reg;
+ id->idAddr()->iiaAddrMode.amScale = emitEncodeScale(mul);
+
+ assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
+
+ sz = emitInsSizeAM(id, insCodeMI(ins), val);
+ id->idCodeSize(sz);
+
+ dispIns(id);
+ emitCurIGsize += sz;
+}
+
+void emitter::emitIns_R_AX(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, unsigned mul, int disp)
+{
+ assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE) && (ireg != REG_NA));
+ noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), ireg));
+
+ UNATIVE_OFFSET sz;
+ instrDesc* id = emitNewInstrAmd(attr, disp);
+ insFormat fmt = emitInsModeFormat(ins, IF_RRD_ARD);
+
+ id->idIns(ins);
+ id->idInsFmt(fmt);
+ id->idReg1(ireg);
+
+ id->idAddr()->iiaAddrMode.amBaseReg = REG_NA;
+ id->idAddr()->iiaAddrMode.amIndxReg = reg;
+ id->idAddr()->iiaAddrMode.amScale = emitEncodeScale(mul);
+
+ assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
+
+ sz = emitInsSizeAM(id, insCodeRM(ins));
+ id->idCodeSize(sz);
+
+ dispIns(id);
+ emitCurIGsize += sz;
+}
+
+void emitter::emitIns_AX_R(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, unsigned mul, int disp)
+{
+ UNATIVE_OFFSET sz;
+ instrDesc* id = emitNewInstrAmd(attr, disp);
+ insFormat fmt;
+
+ if (ireg == REG_NA)
+ {
+#if FEATURE_STACK_FP_X87
+ fmt = emitInsModeFormat(ins, IF_ARD, IF_TRD_ARD, IF_AWR_TRD);
+#else // !FEATURE_STACK_FP_X87
+ fmt = emitInsModeFormat(ins, IF_ARD);
+#endif // !FEATURE_STACK_FP_X87
+ }
+ else
+ {
+ fmt = emitInsModeFormat(ins, IF_ARD_RRD);
+ noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), ireg));
+ assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE));
+
+ id->idReg1(ireg);
+ }
+
+ id->idIns(ins);
+ id->idInsFmt(fmt);
+
+ id->idAddr()->iiaAddrMode.amBaseReg = REG_NA;
+ id->idAddr()->iiaAddrMode.amIndxReg = reg;
+ id->idAddr()->iiaAddrMode.amScale = emitEncodeScale(mul);
+
+ assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
+
+ sz = emitInsSizeAM(id, insCodeMR(ins));
+ id->idCodeSize(sz);
+
+ dispIns(id);
+ emitCurIGsize += sz;
+
+#if !FEATURE_FIXED_OUT_ARGS
+
+ if (ins == INS_push)
+ {
+ emitCurStackLvl += emitCntStackDepth;
+
+ if (emitMaxStackDepth < emitCurStackLvl)
+ emitMaxStackDepth = emitCurStackLvl;
+ }
+ else if (ins == INS_pop)
+ {
+ emitCurStackLvl -= emitCntStackDepth;
+ assert((int)emitCurStackLvl >= 0);
+ }
+
+#endif // !FEATURE_FIXED_OUT_ARGS
+}
+
+/*****************************************************************************
+ *
+ * The following add instructions referencing stack-based local variables.
+ */
+
+void emitter::emitIns_S(instruction ins, emitAttr attr, int varx, int offs)
+{
+ instrDesc* id = emitNewInstr(attr);
+ UNATIVE_OFFSET sz = emitInsSizeSV(insCodeMR(ins), varx, offs);
+#if FEATURE_STACK_FP_X87
+ insFormat fmt = emitInsModeFormat(ins, IF_SRD, IF_TRD_SRD, IF_SWR_TRD);
+#else // !FEATURE_STACK_FP_X87
+ insFormat fmt = emitInsModeFormat(ins, IF_SRD);
+#endif // !FEATURE_STACK_FP_X87
+
+ // 16-bit operand instructions will need a prefix
+ if (EA_SIZE(attr) == EA_2BYTE)
+ {
+ sz += 1;
+ }
+
+ // VEX prefix
+ sz += emitGetVexPrefixAdjustedSize(ins, attr, insCodeMR(ins));
+
+ // 64-bit operand instructions will need a REX.W prefix
+ if (TakesRexWPrefix(ins, attr))
+ {
+ sz += emitGetRexPrefixSize(ins);
+ }
+
+ id->idIns(ins);
+ id->idInsFmt(fmt);
+ id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs);
+ id->idCodeSize(sz);
+
+#ifdef DEBUG
+ id->idDebugOnlyInfo()->idVarRefOffs = emitVarRefOffs;
+#endif
+ dispIns(id);
+ emitCurIGsize += sz;
+
+#if !FEATURE_FIXED_OUT_ARGS
+
+ if (ins == INS_push)
+ {
+ emitCurStackLvl += emitCntStackDepth;
+
+ if (emitMaxStackDepth < emitCurStackLvl)
+ emitMaxStackDepth = emitCurStackLvl;
+ }
+ else if (ins == INS_pop)
+ {
+ emitCurStackLvl -= emitCntStackDepth;
+ assert((int)emitCurStackLvl >= 0);
+ }
+
+#endif // !FEATURE_FIXED_OUT_ARGS
+}
+
+void emitter::emitIns_S_R(instruction ins, emitAttr attr, regNumber ireg, int varx, int offs)
+{
+ instrDesc* id = emitNewInstr(attr);
+ UNATIVE_OFFSET sz = emitInsSizeSV(insCodeMR(ins), varx, offs);
+ insFormat fmt = emitInsModeFormat(ins, IF_SRD_RRD);
+
+ // 16-bit operand instructions will need a prefix
+ if (EA_SIZE(attr) == EA_2BYTE)
+ {
+ sz++;
+ }
+
+ // VEX prefix
+ sz += emitGetVexPrefixAdjustedSize(ins, attr, insCodeMR(ins));
+
+ // 64-bit operand instructions will need a REX.W prefix
+ if (TakesRexWPrefix(ins, attr) || IsExtendedReg(ireg, attr))
+ {
+ sz += emitGetRexPrefixSize(ins);
+ }
+
+ id->idIns(ins);
+ id->idInsFmt(fmt);
+ id->idReg1(ireg);
+ id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs);
+ id->idCodeSize(sz);
+#ifdef DEBUG
+ id->idDebugOnlyInfo()->idVarRefOffs = emitVarRefOffs;
+#endif
+ dispIns(id);
+ emitCurIGsize += sz;
+}
+
+void emitter::emitIns_R_S(instruction ins, emitAttr attr, regNumber ireg, int varx, int offs)
+{
+ emitAttr size = EA_SIZE(attr);
+ noway_assert(emitVerifyEncodable(ins, size, ireg));
+
+ instrDesc* id = emitNewInstr(attr);
+ UNATIVE_OFFSET sz = emitInsSizeSV(insCodeRM(ins), varx, offs);
+ insFormat fmt = emitInsModeFormat(ins, IF_RRD_SRD);
+
+ // Most 16-bit operand instructions need a prefix
+ if (size == EA_2BYTE && ins != INS_movsx && ins != INS_movzx)
+ {
+ sz++;
+ }
+
+ // VEX prefix
+ sz += emitGetVexPrefixAdjustedSize(ins, attr, insCodeRM(ins));
+
+ // 64-bit operand instructions will need a REX.W prefix
+ if (TakesRexWPrefix(ins, attr) || IsExtendedReg(ireg, attr))
+ {
+ sz += emitGetRexPrefixSize(ins);
+ }
+
+ id->idIns(ins);
+ id->idInsFmt(fmt);
+ id->idReg1(ireg);
+ id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs);
+ id->idCodeSize(sz);
+#ifdef DEBUG
+ id->idDebugOnlyInfo()->idVarRefOffs = emitVarRefOffs;
+#endif
+ dispIns(id);
+ emitCurIGsize += sz;
+}
+
+void emitter::emitIns_S_I(instruction ins, emitAttr attr, int varx, int offs, int val)
+{
+#ifdef _TARGET_AMD64_
+ // mov reg, imm64 is the only opcode which takes a full 8 byte immediate
+ // all other opcodes take a sign-extended 4-byte immediate
+ noway_assert(EA_SIZE(attr) < EA_8BYTE || !EA_IS_CNS_RELOC(attr));
+#endif
+
+ insFormat fmt;
+
+ switch (ins)
+ {
+ case INS_rcl_N:
+ case INS_rcr_N:
+ case INS_rol_N:
+ case INS_ror_N:
+ case INS_shl_N:
+ case INS_shr_N:
+ case INS_sar_N:
+ assert(val != 1);
+ fmt = IF_SRW_SHF;
+ val &= 0x7F;
+ break;
+
+ default:
+ fmt = emitInsModeFormat(ins, IF_SRD_CNS);
+ break;
+ }
+
+ instrDesc* id = emitNewInstrCns(attr, val);
+ id->idIns(ins);
+ id->idInsFmt(fmt);
+ UNATIVE_OFFSET sz = emitInsSizeSV(id, varx, offs, val);
+
+ // VEX prefix
+ sz += emitGetVexPrefixAdjustedSize(ins, attr, insCodeMI(ins));
+
+ // 64-bit operand instructions will need a REX.W prefix
+ if (TakesRexWPrefix(ins, attr))
+ {
+ sz += emitGetRexPrefixSize(ins);
+ }
+
+ id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs);
+ id->idCodeSize(sz);
+#ifdef DEBUG
+ id->idDebugOnlyInfo()->idVarRefOffs = emitVarRefOffs;
+#endif
+ dispIns(id);
+ emitCurIGsize += sz;
+}
+
+/*****************************************************************************
+ *
+ * Record that a jump instruction uses the short encoding
+ *
+ */
+void emitter::emitSetShortJump(instrDescJmp* id)
+{
+ if (id->idjKeepLong)
+ {
+ return;
+ }
+
+ id->idjShort = true;
+}
+
+/*****************************************************************************
+ *
+ * Add a jmp instruction.
+ */
+
+void emitter::emitIns_J(instruction ins, BasicBlock* dst, int instrCount /* = 0 */)
+{
+ UNATIVE_OFFSET sz;
+ instrDescJmp* id = emitNewInstrJmp();
+
+ assert(dst->bbFlags & BBF_JMP_TARGET);
+
+ id->idIns(ins);
+ id->idInsFmt(IF_LABEL);
+ id->idAddr()->iiaBBlabel = dst;
+
+#ifdef DEBUG
+ // Mark the finally call
+ if (ins == INS_call && emitComp->compCurBB->bbJumpKind == BBJ_CALLFINALLY)
+ {
+ id->idDebugOnlyInfo()->idFinallyCall = true;
+ }
+#endif // DEBUG
+
+ /* Assume the jump will be long */
+
+ id->idjShort = 0;
+ id->idjKeepLong = emitComp->fgInDifferentRegions(emitComp->compCurBB, dst);
+
+ /* Record the jump's IG and offset within it */
+
+ id->idjIG = emitCurIG;
+ id->idjOffs = emitCurIGsize;
+
+ /* Append this jump to this IG's jump list */
+
+ id->idjNext = emitCurIGjmpList;
+ emitCurIGjmpList = id;
+
+#if EMITTER_STATS
+ emitTotalIGjmps++;
+#endif
+
+ /* Figure out the max. size of the jump/call instruction */
+
+ if (ins == INS_call)
+ {
+ sz = CALL_INST_SIZE;
+ }
+ else if (ins == INS_push || ins == INS_push_hide)
+ {
+#if RELOC_SUPPORT
+ // Pushing the address of a basicBlock will need a reloc
+ // as the instruction uses the absolute address,
+ // not a relative address
+ if (emitComp->opts.compReloc)
+ {
+ id->idSetIsDspReloc();
+ }
+#endif
+ sz = PUSH_INST_SIZE;
+ }
+ else
+ {
+ insGroup* tgt;
+
+ /* This is a jump - assume the worst */
+
+ sz = (ins == INS_jmp) ? JMP_SIZE_LARGE : JCC_SIZE_LARGE;
+
+ /* Can we guess at the jump distance? */
+
+ tgt = (insGroup*)emitCodeGetCookie(dst);
+
+ if (tgt)
+ {
+ int extra;
+ UNATIVE_OFFSET srcOffs;
+ int jmpDist;
+
+ assert(JMP_SIZE_SMALL == JCC_SIZE_SMALL);
+
+ /* This is a backward jump - figure out the distance */
+
+ srcOffs = emitCurCodeOffset + emitCurIGsize + JMP_SIZE_SMALL;
+
+ /* Compute the distance estimate */
+
+ jmpDist = srcOffs - tgt->igOffs;
+ assert((int)jmpDist > 0);
+
+ /* How much beyond the max. short distance does the jump go? */
+
+ extra = jmpDist + JMP_DIST_SMALL_MAX_NEG;
+
+#if DEBUG_EMIT
+ if (id->idDebugOnlyInfo()->idNum == (unsigned)INTERESTING_JUMP_NUM || INTERESTING_JUMP_NUM == 0)
+ {
+ if (INTERESTING_JUMP_NUM == 0)
+ {
+ printf("[0] Jump %u:\n", id->idDebugOnlyInfo()->idNum);
+ }
+ printf("[0] Jump source is at %08X\n", srcOffs);
+ printf("[0] Label block is at %08X\n", tgt->igOffs);
+ printf("[0] Jump distance - %04X\n", jmpDist);
+ if (extra > 0)
+ {
+ printf("[0] Distance excess = %d \n", extra);
+ }
+ }
+#endif
+
+ if (extra <= 0 && !id->idjKeepLong)
+ {
+ /* Wonderful - this jump surely will be short */
+
+ emitSetShortJump(id);
+ sz = JMP_SIZE_SMALL;
+ }
+ }
+#if DEBUG_EMIT
+ else
+ {
+ if (id->idDebugOnlyInfo()->idNum == (unsigned)INTERESTING_JUMP_NUM || INTERESTING_JUMP_NUM == 0)
+ {
+ if (INTERESTING_JUMP_NUM == 0)
+ {
+ printf("[0] Jump %u:\n", id->idDebugOnlyInfo()->idNum);
+ }
+ printf("[0] Jump source is at %04X/%08X\n", emitCurIGsize,
+ emitCurCodeOffset + emitCurIGsize + JMP_SIZE_SMALL);
+ printf("[0] Label block is unknown\n");
+ }
+ }
+#endif
+ }
+
+ id->idCodeSize(sz);
+
+ dispIns(id);
+ emitCurIGsize += sz;
+
+#if !FEATURE_FIXED_OUT_ARGS
+
+ if (ins == INS_push)
+ {
+ emitCurStackLvl += emitCntStackDepth;
+
+ if (emitMaxStackDepth < emitCurStackLvl)
+ emitMaxStackDepth = emitCurStackLvl;
+ }
+
+#endif // !FEATURE_FIXED_OUT_ARGS
+}
+
+/*****************************************************************************
+ *
+ * Add a call instruction (direct or indirect).
+ * argSize<0 means that the caller will pop the arguments
+ *
+ * The other arguments are interpreted depending on callType as shown:
+ * Unless otherwise specified, ireg,xreg,xmul,disp should have default values.
+ *
+ * EC_FUNC_TOKEN : addr is the method address
+ * EC_FUNC_TOKEN_INDIR : addr is the indirect method address
+ * EC_FUNC_ADDR : addr is the absolute address of the function
+ * EC_FUNC_VIRTUAL : "call [ireg+disp]"
+ *
+ * If callType is one of these emitCallTypes, addr has to be NULL.
+ * EC_INDIR_R : "call ireg".
+ * EC_INDIR_SR : "call lcl<disp>" (eg. call [ebp-8]).
+ * EC_INDIR_C : "call clsVar<disp>" (eg. call [clsVarAddr])
+ * EC_INDIR_ARD : "call [ireg+xreg*xmul+disp]"
+ *
+ */
+
+void emitter::emitIns_Call(EmitCallType callType,
+ CORINFO_METHOD_HANDLE methHnd,
+ INDEBUG_LDISASM_COMMA(CORINFO_SIG_INFO* sigInfo) // used to report call sites to the EE
+ void* addr,
+ ssize_t argSize,
+ emitAttr retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize),
+ VARSET_VALARG_TP ptrVars,
+ regMaskTP gcrefRegs,
+ regMaskTP byrefRegs,
+ IL_OFFSETX ilOffset, // = BAD_IL_OFFSET
+ regNumber ireg, // = REG_NA
+ regNumber xreg, // = REG_NA
+ unsigned xmul, // = 0
+ ssize_t disp, // = 0
+ bool isJump, // = false
+ bool isNoGC) // = false
+{
+ /* Sanity check the arguments depending on callType */
+
+ assert(callType < EC_COUNT);
+ assert((callType != EC_FUNC_TOKEN && callType != EC_FUNC_TOKEN_INDIR && callType != EC_FUNC_ADDR) ||
+ (ireg == REG_NA && xreg == REG_NA && xmul == 0 && disp == 0));
+ assert(callType != EC_FUNC_VIRTUAL || (ireg < REG_COUNT && xreg == REG_NA && xmul == 0));
+ assert(callType < EC_INDIR_R || callType == EC_INDIR_ARD || callType == EC_INDIR_C || addr == nullptr);
+ assert(callType != EC_INDIR_R || (ireg < REG_COUNT && xreg == REG_NA && xmul == 0 && disp == 0));
+ assert(callType != EC_INDIR_SR ||
+ (ireg == REG_NA && xreg == REG_NA && xmul == 0 && disp < (int)emitComp->lvaCount));
+ assert(callType != EC_INDIR_C || (ireg == REG_NA && xreg == REG_NA && xmul == 0 && disp != 0));
+
+ // Our stack level should be always greater than the bytes of arguments we push. Just
+ // a sanity test.
+ assert((unsigned)abs((signed)argSize) <= codeGen->genStackLevel);
+
+#if STACK_PROBES
+ if (emitComp->opts.compNeedStackProbes)
+ {
+ // If we've pushed more than JIT_RESERVED_STACK allows, do an aditional stack probe
+ // Else, just make sure the prolog does a probe for us. Invariant we're trying
+ // to get is that at any point we go out to unmanaged code, there is at least
+ // CORINFO_STACKPROBE_DEPTH bytes of stack available.
+ //
+ // The reason why we are not doing one probe for the max size at the prolog
+ // is that when don't have the max depth precomputed (it can depend on codegen),
+ // and we need it at the time we generate locallocs
+ //
+ // Compiler::lvaAssignFrameOffsets sets up compLclFrameSize, which takes in
+ // account everything except for the arguments of a callee.
+ //
+ //
+ //
+ if ((sizeof(void*) + // return address for call
+ emitComp->genStackLevel +
+ // Current stack level. This gets resetted on every
+ // localloc and on the prolog (invariant is that
+ // genStackLevel is 0 on basic block entry and exit and
+ // after any alloca). genStackLevel will include any arguments
+ // to the call, so we will insert an aditional probe if
+ // we've consumed more than JIT_RESERVED_STACK bytes
+ // of stack, which is what the prolog probe covers (in
+ // addition to the EE requested size)
+ (emitComp->compHndBBtabCount * sizeof(void*))
+ // Hidden slots for calling finallys
+ ) >= JIT_RESERVED_STACK)
+ {
+ // This happens when you have a call with a lot of arguments or a call is done
+ // when there's a lot of stuff pushed on the stack (for example a call whos returned
+ // value is an argument of another call that has pushed stuff on the stack)
+ // This should't be very frequent.
+ // For different values of JIT_RESERVED_STACK
+ //
+ // For mscorlib (109605 calls)
+ //
+ // 14190 probes in prologs (56760 bytes of code)
+ //
+ // JIT_RESERVED_STACK = 16 : 5452 extra probes
+ // JIT_RESERVED_STACK = 32 : 1084 extra probes
+ // JIT_RESERVED_STACK = 64 : 1 extra probes
+ // JIT_RESERVED_STACK = 96 : 0 extra probes
+ emitComp->genGenerateStackProbe();
+ }
+ else
+ {
+ if (emitComp->compGeneratingProlog || emitComp->compGeneratingEpilog)
+ {
+ if (emitComp->compStackProbePrologDone)
+ {
+ // We already generated a probe and this call is not happening
+ // at a depth >= JIT_RESERVED_STACK, so nothing to do here
+ }
+ else
+ {
+ // 3 possible ways to get here:
+ // - We are in an epilog and haven't generated a probe in the prolog.
+ // This shouldn't happen as we don't generate any calls in epilog.
+ // - We are in the prolog, but doing a call before generating the probe.
+ // This shouldn't happen at all.
+ // - We are in the prolog, did not generate a probe but now we need
+ // to generate a probe because we need a call (eg: profiler). We'll
+ // need a probe.
+ //
+ // In any case, we need a probe
+
+ // Ignore the profiler callback for now.
+ if (!emitComp->compIsProfilerHookNeeded())
+ {
+ assert(!"We do not expect to get here");
+ emitComp->genGenerateStackProbe();
+ }
+ }
+ }
+ else
+ {
+ // We will need a probe and will generate it in the prolog
+ emitComp->genNeedPrologStackProbe = true;
+ }
+ }
+ }
+#endif // STACK_PROBES
+
+ int argCnt;
+
+ UNATIVE_OFFSET sz;
+ instrDesc* id;
+
+ /* This is the saved set of registers after a normal call */
+ unsigned savedSet = RBM_CALLEE_SAVED;
+
+ /* some special helper calls have a different saved set registers */
+
+ if (isNoGC)
+ {
+ // Get the set of registers that this call kills and remove it from the saved set.
+ savedSet = RBM_ALLINT & ~emitComp->compNoGCHelperCallKillSet(Compiler::eeGetHelperNum(methHnd));
+ }
+ else
+ {
+ assert(!emitNoGChelper(Compiler::eeGetHelperNum(methHnd)));
+ }
+
+ /* Trim out any callee-trashed registers from the live set */
+
+ gcrefRegs &= savedSet;
+ byrefRegs &= savedSet;
+
+#ifdef DEBUG
+ if (EMIT_GC_VERBOSE)
+ {
+ printf("\t\t\t\t\t\t\tCall: GCvars=%s ", VarSetOps::ToString(emitComp, ptrVars));
+ dumpConvertedVarSet(emitComp, ptrVars);
+ printf(", gcrefRegs=");
+ printRegMaskInt(gcrefRegs);
+ emitDispRegSet(gcrefRegs);
+ printf(", byrefRegs=");
+ printRegMaskInt(byrefRegs);
+ emitDispRegSet(byrefRegs);
+ printf("\n");
+ }
+#endif
+
+ assert(argSize % sizeof(void*) == 0);
+ argCnt = (int)(argSize / (ssize_t)sizeof(void*)); // we need a signed-divide
+
+#ifdef DEBUGGING_SUPPORT
+ /* Managed RetVal: emit sequence point for the call */
+ if (emitComp->opts.compDbgInfo && ilOffset != BAD_IL_OFFSET)
+ {
+ codeGen->genIPmappingAdd(ilOffset, false);
+ }
+#endif
+
+ /*
+ We need to allocate the appropriate instruction descriptor based
+ on whether this is a direct/indirect call, and whether we need to
+ record an updated set of live GC variables.
+
+ The stats for a ton of classes is as follows:
+
+ Direct call w/o GC vars 220,216
+ Indir. call w/o GC vars 144,781
+
+ Direct call with GC vars 9,440
+ Indir. call with GC vars 5,768
+ */
+
+ if (callType >= EC_FUNC_VIRTUAL)
+ {
+ /* Indirect call, virtual calls */
+
+ assert(callType == EC_FUNC_VIRTUAL || callType == EC_INDIR_R || callType == EC_INDIR_SR ||
+ callType == EC_INDIR_C || callType == EC_INDIR_ARD);
+
+ id = emitNewInstrCallInd(argCnt, disp, ptrVars, gcrefRegs, byrefRegs,
+ retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize));
+ }
+ else
+ {
+ // Helper/static/nonvirtual/function calls (direct or through handle),
+ // and calls to an absolute addr.
+
+ assert(callType == EC_FUNC_TOKEN || callType == EC_FUNC_TOKEN_INDIR || callType == EC_FUNC_ADDR);
+
+ id = emitNewInstrCallDir(argCnt, ptrVars, gcrefRegs, byrefRegs,
+ retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize));
+ }
+
+ /* Update the emitter's live GC ref sets */
+
+ VarSetOps::Assign(emitComp, emitThisGCrefVars, ptrVars);
+ emitThisGCrefRegs = gcrefRegs;
+ emitThisByrefRegs = byrefRegs;
+
+ /* Set the instruction - special case jumping a function */
+ instruction ins = INS_call;
+
+ if (isJump)
+ {
+ assert(callType == EC_FUNC_TOKEN || callType == EC_FUNC_TOKEN_INDIR);
+ if (callType == EC_FUNC_TOKEN)
+ {
+ ins = INS_l_jmp;
+ }
+ else
+ {
+ ins = INS_i_jmp;
+ }
+ }
+ id->idIns(ins);
+
+ id->idSetIsNoGC(isNoGC);
+
+ // Record the address: method, indirection, or funcptr
+ if (callType >= EC_FUNC_VIRTUAL)
+ {
+ // This is an indirect call (either a virtual call or func ptr call)
+
+ switch (callType)
+ {
+ case EC_INDIR_C:
+ // Indirect call using an absolute code address.
+ // Must be marked as relocatable and is done at the
+ // branch target location.
+ goto CALL_ADDR_MODE;
+
+ case EC_INDIR_R: // the address is in a register
+
+ id->idSetIsCallRegPtr();
+
+ __fallthrough;
+
+ case EC_INDIR_ARD: // the address is an indirection
+
+ goto CALL_ADDR_MODE;
+
+ case EC_INDIR_SR: // the address is in a lcl var
+
+ id->idInsFmt(IF_SRD);
+ // disp is really a lclVarNum
+ noway_assert((unsigned)disp == (size_t)disp);
+ id->idAddr()->iiaLclVar.initLclVarAddr((unsigned)disp, 0);
+ sz = emitInsSizeSV(insCodeMR(INS_call), (unsigned)disp, 0);
+
+ break;
+
+ case EC_FUNC_VIRTUAL:
+
+ CALL_ADDR_MODE:
+
+ // fall-through
+
+ // The function is "ireg" if id->idIsCallRegPtr(),
+ // else [ireg+xmul*xreg+disp]
+
+ id->idInsFmt(IF_ARD);
+
+ id->idAddr()->iiaAddrMode.amBaseReg = ireg;
+ id->idAddr()->iiaAddrMode.amIndxReg = xreg;
+ id->idAddr()->iiaAddrMode.amScale = xmul ? emitEncodeScale(xmul) : emitter::OPSZ1;
+
+ sz = emitInsSizeAM(id, insCodeMR(INS_call));
+
+ if (ireg == REG_NA && xreg == REG_NA)
+ {
+ if (codeGen->genCodeIndirAddrNeedsReloc(disp))
+ {
+ id->idSetIsDspReloc();
+ }
+#ifdef _TARGET_AMD64_
+ else
+ {
+ // An absolute indir address that doesn't need reloc should fit within 32-bits
+ // to be encoded as offset relative to zero. This addr mode requires an extra
+ // SIB byte
+ noway_assert(static_cast<int>(reinterpret_cast<intptr_t>(addr)) == (size_t)addr);
+ sz++;
+ }
+#endif //_TARGET_AMD64_
+ }
+
+ break;
+
+ default:
+ NO_WAY("unexpected instruction");
+ break;
+ }
+ }
+ else if (callType == EC_FUNC_TOKEN_INDIR)
+ {
+ /* "call [method_addr]" */
+
+ assert(addr != nullptr);
+
+ id->idInsFmt(IF_METHPTR);
+ id->idAddr()->iiaAddr = (BYTE*)addr;
+ sz = 6;
+
+#if RELOC_SUPPORT
+ // Since this is an indirect call through a pointer and we don't
+ // currently pass in emitAttr into this function, we query codegen
+ // whether addr needs a reloc.
+ if (codeGen->genCodeIndirAddrNeedsReloc((size_t)addr))
+ {
+ id->idSetIsDspReloc();
+ }
+#ifdef _TARGET_AMD64_
+ else
+ {
+ // An absolute indir address that doesn't need reloc should fit within 32-bits
+ // to be encoded as offset relative to zero. This addr mode requires an extra
+ // SIB byte
+ noway_assert(static_cast<int>(reinterpret_cast<intptr_t>(addr)) == (size_t)addr);
+ sz++;
+ }
+#endif //_TARGET_AMD64_
+#endif // RELOC_SUPPORT
+ }
+ else
+ {
+ /* This is a simple direct call: "call helper/method/addr" */
+
+ assert(callType == EC_FUNC_TOKEN || callType == EC_FUNC_ADDR);
+
+ assert(addr != nullptr);
+
+ id->idInsFmt(IF_METHOD);
+ sz = 5;
+
+ id->idAddr()->iiaAddr = (BYTE*)addr;
+
+ if (callType == EC_FUNC_ADDR)
+ {
+ id->idSetIsCallAddr();
+ }
+
+#if RELOC_SUPPORT
+ // Direct call to a method and no addr indirection is needed.
+ if (codeGen->genCodeAddrNeedsReloc((size_t)addr))
+ {
+ id->idSetIsDspReloc();
+ }
+#endif
+ }
+
+#ifdef DEBUG
+ if (emitComp->verbose && 0)
+ {
+ if (id->idIsLargeCall())
+ {
+ if (callType >= EC_FUNC_VIRTUAL)
+ {
+ printf("[%02u] Rec call GC vars = %s\n", id->idDebugOnlyInfo()->idNum,
+ VarSetOps::ToString(emitComp, ((instrDescCGCA*)id)->idcGCvars));
+ }
+ else
+ {
+ printf("[%02u] Rec call GC vars = %s\n", id->idDebugOnlyInfo()->idNum,
+ VarSetOps::ToString(emitComp, ((instrDescCGCA*)id)->idcGCvars));
+ }
+ }
+ }
+#endif
+
+#if defined(DEBUG) || defined(LATE_DISASM)
+ id->idDebugOnlyInfo()->idMemCookie = (size_t)methHnd; // method token
+ id->idDebugOnlyInfo()->idClsCookie = nullptr;
+ id->idDebugOnlyInfo()->idCallSig = sigInfo;
+#endif
+
+#if defined(LATE_DISASM)
+ if (addr != nullptr)
+ {
+ codeGen->getDisAssembler().disSetMethod((size_t)addr, methHnd);
+ }
+#endif // defined(LATE_DISASM)
+
+ id->idCodeSize(sz);
+
+ dispIns(id);
+ emitCurIGsize += sz;
+
+#if !FEATURE_FIXED_OUT_ARGS
+
+ /* The call will pop the arguments */
+
+ if (emitCntStackDepth && argSize > 0)
+ {
+ noway_assert((ssize_t)emitCurStackLvl >= argSize);
+ emitCurStackLvl -= (int)argSize;
+ assert((int)emitCurStackLvl >= 0);
+ }
+
+#endif // !FEATURE_FIXED_OUT_ARGS
+}
+
+#ifdef DEBUG
+/*****************************************************************************
+ *
+ * The following called for each recorded instruction -- use for debugging.
+ */
+void emitter::emitInsSanityCheck(instrDesc* id)
+{
+ // make certain you only try to put relocs on things that can have them.
+ ID_OPS idOp = (ID_OPS)emitFmtToOps[id->idInsFmt()];
+ if ((idOp == ID_OP_SCNS) && id->idIsLargeCns())
+ {
+ idOp = ID_OP_CNS;
+ }
+
+ if (!id->idIsTiny())
+ {
+ if (id->idIsDspReloc())
+ {
+ assert(idOp == ID_OP_NONE || idOp == ID_OP_AMD || idOp == ID_OP_DSP || idOp == ID_OP_DSP_CNS ||
+ idOp == ID_OP_AMD_CNS || idOp == ID_OP_SPEC || idOp == ID_OP_CALL || idOp == ID_OP_JMP ||
+ idOp == ID_OP_LBL);
+ }
+
+ if (id->idIsCnsReloc())
+ {
+ assert(idOp == ID_OP_CNS || idOp == ID_OP_AMD_CNS || idOp == ID_OP_DSP_CNS || idOp == ID_OP_SPEC ||
+ idOp == ID_OP_CALL || idOp == ID_OP_JMP);
+ }
+ }
+}
+#endif
+
+/*****************************************************************************
+ *
+ * Return the allocated size (in bytes) of the given instruction descriptor.
+ */
+
+size_t emitter::emitSizeOfInsDsc(instrDesc* id)
+{
+ if (emitIsTinyInsDsc(id))
+ {
+ return TINY_IDSC_SIZE;
+ }
+
+ if (emitIsScnsInsDsc(id))
+ {
+ return SMALL_IDSC_SIZE;
+ }
+
+ assert((unsigned)id->idInsFmt() < emitFmtCount);
+
+ ID_OPS idOp = (ID_OPS)emitFmtToOps[id->idInsFmt()];
+
+ // An INS_call instruction may use a "fat" direct/indirect call descriptor
+ // except for a local call to a label (i.e. call to a finally)
+ // Only ID_OP_CALL and ID_OP_SPEC check for this, so we enforce that the
+ // INS_call instruction always uses one of these idOps
+
+ if (id->idIns() == INS_call)
+ {
+ assert(idOp == ID_OP_CALL || // is a direct call
+ idOp == ID_OP_SPEC || // is a indirect call
+ idOp == ID_OP_JMP); // is a local call to finally clause
+ }
+
+ switch (idOp)
+ {
+ case ID_OP_NONE:
+ break;
+
+ case ID_OP_LBL:
+ return sizeof(instrDescLbl);
+
+ case ID_OP_JMP:
+ return sizeof(instrDescJmp);
+
+ case ID_OP_CALL:
+ case ID_OP_SPEC:
+ if (id->idIsLargeCall())
+ {
+ /* Must be a "fat" indirect call descriptor */
+ return sizeof(instrDescCGCA);
+ }
+
+ __fallthrough;
+
+ case ID_OP_SCNS:
+ case ID_OP_CNS:
+ case ID_OP_DSP:
+ case ID_OP_DSP_CNS:
+ case ID_OP_AMD:
+ case ID_OP_AMD_CNS:
+ if (id->idIsLargeCns())
+ {
+ if (id->idIsLargeDsp())
+ {
+ return sizeof(instrDescCnsDsp);
+ }
+ else
+ {
+ return sizeof(instrDescCns);
+ }
+ }
+ else
+ {
+ if (id->idIsLargeDsp())
+ {
+ return sizeof(instrDescDsp);
+ }
+ else
+ {
+ return sizeof(instrDesc);
+ }
+ }
+
+ default:
+ NO_WAY("unexpected instruction descriptor format");
+ break;
+ }
+
+ return sizeof(instrDesc);
+}
+
+/*****************************************************************************/
+#ifdef DEBUG
+/*****************************************************************************
+ *
+ * Return a string that represents the given register.
+ */
+
+const char* emitter::emitRegName(regNumber reg, emitAttr attr, bool varName)
+{
+ static char rb[2][128];
+ static unsigned char rbc = 0;
+
+ const char* rn = emitComp->compRegVarName(reg, varName);
+
+#ifdef _TARGET_AMD64_
+ char suffix = '\0';
+
+ switch (EA_SIZE(attr))
+ {
+ case EA_32BYTE:
+ return emitYMMregName(reg);
+
+ case EA_16BYTE:
+ return emitXMMregName(reg);
+
+ case EA_8BYTE:
+ break;
+
+ case EA_4BYTE:
+ if (reg > REG_R15)
+ {
+ break;
+ }
+
+ if (reg > REG_RDI)
+ {
+ suffix = 'd';
+ goto APPEND_SUFFIX;
+ }
+ rbc = (rbc + 1) % 2;
+ rb[rbc][0] = 'e';
+ rb[rbc][1] = rn[1];
+ rb[rbc][2] = rn[2];
+ rb[rbc][3] = 0;
+ rn = rb[rbc];
+ break;
+
+ case EA_2BYTE:
+ if (reg > REG_RDI)
+ {
+ suffix = 'w';
+ goto APPEND_SUFFIX;
+ }
+ rn++;
+ break;
+
+ case EA_1BYTE:
+ if (reg > REG_RDI)
+ {
+ suffix = 'b';
+ APPEND_SUFFIX:
+ rbc = (rbc + 1) % 2;
+ rb[rbc][0] = rn[0];
+ rb[rbc][1] = rn[1];
+ if (rn[2])
+ {
+ assert(rn[3] == 0);
+ rb[rbc][2] = rn[2];
+ rb[rbc][3] = suffix;
+ rb[rbc][4] = 0;
+ }
+ else
+ {
+ rb[rbc][2] = suffix;
+ rb[rbc][3] = 0;
+ }
+ }
+ else
+ {
+ rbc = (rbc + 1) % 2;
+ rb[rbc][0] = rn[1];
+ if (reg < 4)
+ {
+ rb[rbc][1] = 'l';
+ rb[rbc][2] = 0;
+ }
+ else
+ {
+ rb[rbc][1] = rn[2];
+ rb[rbc][2] = 'l';
+ rb[rbc][3] = 0;
+ }
+ }
+
+ rn = rb[rbc];
+ break;
+
+ default:
+ break;
+ }
+#endif // _TARGET_AMD64_
+
+#ifdef _TARGET_X86_
+ assert(strlen(rn) >= 3);
+
+ switch (EA_SIZE(attr))
+ {
+#ifndef LEGACY_BACKEND
+ case EA_32BYTE:
+ return emitYMMregName(reg);
+
+ case EA_16BYTE:
+ return emitXMMregName(reg);
+#endif // LEGACY_BACKEND
+
+ case EA_4BYTE:
+ break;
+
+ case EA_2BYTE:
+ rn++;
+ break;
+
+ case EA_1BYTE:
+ rbc = (rbc + 1) % 2;
+ rb[rbc][0] = rn[1];
+ rb[rbc][1] = 'l';
+ strcpy_s(&rb[rbc][2], sizeof(rb[0]) - 2, rn + 3);
+
+ rn = rb[rbc];
+ break;
+
+ default:
+ break;
+ }
+#endif // _TARGET_X86_
+
+#if 0
+ // The following is useful if you want register names to be tagged with * or ^ representing gcref or byref, respectively,
+ // however it's possibly not interesting most of the time.
+ if (EA_IS_GCREF(attr) || EA_IS_BYREF(attr))
+ {
+ if (rn != rb[rbc])
+ {
+ rbc = (rbc+1)%2;
+ strcpy_s(rb[rbc], sizeof(rb[rbc]), rn);
+ rn = rb[rbc];
+ }
+
+ if (EA_IS_GCREF(attr))
+ {
+ strcat_s(rb[rbc], sizeof(rb[rbc]), "*");
+ }
+ else if (EA_IS_BYREF(attr))
+ {
+ strcat_s(rb[rbc], sizeof(rb[rbc]), "^");
+ }
+ }
+#endif // 0
+
+ return rn;
+}
+
+/*****************************************************************************
+ *
+ * Return a string that represents the given FP register.
+ */
+
+const char* emitter::emitFPregName(unsigned reg, bool varName)
+{
+ assert(reg < REG_COUNT);
+
+ return emitComp->compFPregVarName((regNumber)(reg), varName);
+}
+
+/*****************************************************************************
+ *
+ * Return a string that represents the given XMM register.
+ */
+
+const char* emitter::emitXMMregName(unsigned reg)
+{
+ static const char* const regNames[] = {
+#define REGDEF(name, rnum, mask, sname) "x" sname,
+#ifndef LEGACY_BACKEND
+#include "register.h"
+#else // LEGACY_BACKEND
+#include "registerxmm.h"
+#endif // LEGACY_BACKEND
+ };
+
+ assert(reg < REG_COUNT);
+ assert(reg < sizeof(regNames) / sizeof(regNames[0]));
+
+ return regNames[reg];
+}
+
+/*****************************************************************************
+ *
+ * Return a string that represents the given YMM register.
+ */
+
+const char* emitter::emitYMMregName(unsigned reg)
+{
+ static const char* const regNames[] = {
+#define REGDEF(name, rnum, mask, sname) "y" sname,
+#ifndef LEGACY_BACKEND
+#include "register.h"
+#else // LEGACY_BACKEND
+#include "registerxmm.h"
+#endif // LEGACY_BACKEND
+ };
+
+ assert(reg < REG_COUNT);
+ assert(reg < sizeof(regNames) / sizeof(regNames[0]));
+
+ return regNames[reg];
+}
+
+/*****************************************************************************
+ *
+ * Display a static data member reference.
+ */
+
+void emitter::emitDispClsVar(CORINFO_FIELD_HANDLE fldHnd, ssize_t offs, bool reloc /* = false */)
+{
+ int doffs;
+
+ /* Filter out the special case of fs:[offs] */
+
+ // Munge any pointers if we want diff-able disassembly
+ if (emitComp->opts.disDiffable)
+ {
+ ssize_t top12bits = (offs >> 20);
+ if ((top12bits != 0) && (top12bits != -1))
+ {
+ offs = 0xD1FFAB1E;
+ }
+ }
+
+ if (fldHnd == FLD_GLOBAL_FS)
+ {
+ printf("FS:[0x%04X]", offs);
+ return;
+ }
+
+ if (fldHnd == FLD_GLOBAL_DS)
+ {
+ printf("[0x%04X]", offs);
+ return;
+ }
+
+ printf("[");
+
+ doffs = Compiler::eeGetJitDataOffs(fldHnd);
+
+#ifdef RELOC_SUPPORT
+ if (reloc)
+ {
+ printf("reloc ");
+ }
+#endif
+
+ if (doffs >= 0)
+ {
+ if (doffs & 1)
+ {
+ printf("@CNS%02u", doffs - 1);
+ }
+ else
+ {
+ printf("@RWD%02u", doffs);
+ }
+
+ if (offs)
+ {
+ printf("%+Id", offs);
+ }
+ }
+ else
+ {
+ printf("classVar[%#x]", emitComp->dspPtr(fldHnd));
+
+ if (offs)
+ {
+ printf("%+Id", offs);
+ }
+ }
+
+ printf("]");
+
+ if (emitComp->opts.varNames && offs < 0)
+ {
+ printf("'%s", emitComp->eeGetFieldName(fldHnd));
+ if (offs)
+ {
+ printf("%+Id", offs);
+ }
+ printf("'");
+ }
+}
+
+/*****************************************************************************
+ *
+ * Display a stack frame reference.
+ */
+
+void emitter::emitDispFrameRef(int varx, int disp, int offs, bool asmfm)
+{
+ int addr;
+ bool bEBP;
+
+ printf("[");
+
+ if (!asmfm || emitComp->lvaDoneFrameLayout == Compiler::NO_FRAME_LAYOUT)
+ {
+ if (varx < 0)
+ {
+ printf("TEMP_%02u", -varx);
+ }
+ else
+ {
+ printf("V%02u", +varx);
+ }
+
+ if (disp < 0)
+ {
+ printf("-0x%X", -disp);
+ }
+ else if (disp > 0)
+ {
+ printf("+0x%X", +disp);
+ }
+ }
+
+ if (emitComp->lvaDoneFrameLayout == Compiler::FINAL_FRAME_LAYOUT)
+ {
+ if (!asmfm)
+ {
+ printf(" ");
+ }
+
+ addr = emitComp->lvaFrameAddress(varx, &bEBP) + disp;
+
+ if (bEBP)
+ {
+ printf(STR_FPBASE);
+
+ if (addr < 0)
+ {
+ printf("-%02XH", -addr);
+ }
+ else if (addr > 0)
+ {
+ printf("+%02XH", addr);
+ }
+ }
+ else
+ {
+ /* Adjust the offset by amount currently pushed on the stack */
+
+ printf(STR_SPBASE);
+
+ if (addr < 0)
+ {
+ printf("-%02XH", -addr);
+ }
+ else if (addr > 0)
+ {
+ printf("+%02XH", addr);
+ }
+
+#if !FEATURE_FIXED_OUT_ARGS
+
+ if (emitCurStackLvl)
+ printf("+%02XH", emitCurStackLvl);
+
+#endif // !FEATURE_FIXED_OUT_ARGS
+ }
+ }
+
+ printf("]");
+
+ if (varx >= 0 && emitComp->opts.varNames)
+ {
+ LclVarDsc* varDsc;
+ const char* varName;
+
+ assert((unsigned)varx < emitComp->lvaCount);
+ varDsc = emitComp->lvaTable + varx;
+ varName = emitComp->compLocalVarName(varx, offs);
+
+ if (varName)
+ {
+ printf("'%s", varName);
+
+ if (disp < 0)
+ {
+ printf("-%d", -disp);
+ }
+ else if (disp > 0)
+ {
+ printf("+%d", +disp);
+ }
+
+ printf("'");
+ }
+ }
+}
+
+/*****************************************************************************
+ *
+ * Display an reloc value
+ * If we are formatting for an assembly listing don't print the hex value
+ * since it will prevent us from doing assembly diffs
+ */
+void emitter::emitDispReloc(ssize_t value)
+{
+ if (emitComp->opts.disAsm)
+ {
+ printf("(reloc)");
+ }
+ else
+ {
+ printf("(reloc 0x%Ix)", emitComp->dspPtr(value));
+ }
+}
+
+/*****************************************************************************
+ *
+ * Display an address mode.
+ */
+
+void emitter::emitDispAddrMode(instrDesc* id, bool noDetail)
+{
+ bool nsep = false;
+ ssize_t disp;
+
+ unsigned jtno = 0;
+ dataSection* jdsc = nullptr;
+
+ /* The displacement field is in an unusual place for calls */
+
+ disp = (id->idIns() == INS_call) ? emitGetInsCIdisp(id) : emitGetInsAmdAny(id);
+
+ /* Display a jump table label if this is a switch table jump */
+
+ if (id->idIns() == INS_i_jmp)
+ {
+ UNATIVE_OFFSET offs = 0;
+
+ /* Find the appropriate entry in the data section list */
+
+ for (jdsc = emitConsDsc.dsdList, jtno = 0; jdsc; jdsc = jdsc->dsNext)
+ {
+ UNATIVE_OFFSET size = jdsc->dsSize;
+
+ /* Is this a label table? */
+
+ if (size & 1)
+ {
+ size--;
+ jtno++;
+
+ if (offs == id->idDebugOnlyInfo()->idMemCookie)
+ {
+ break;
+ }
+ }
+
+ offs += size;
+ }
+
+ /* If we've found a matching entry then is a table jump */
+
+ if (jdsc)
+ {
+#ifdef RELOC_SUPPORT
+ if (id->idIsDspReloc())
+ {
+ printf("reloc ");
+ }
+#endif
+ printf("J_M%03u_DS%02u", Compiler::s_compMethodsCount, id->idDebugOnlyInfo()->idMemCookie);
+ }
+
+ disp -= id->idDebugOnlyInfo()->idMemCookie;
+ }
+
+ bool frameRef = false;
+
+ printf("[");
+
+ if (id->idAddr()->iiaAddrMode.amBaseReg != REG_NA)
+ {
+ printf("%s", emitRegName(id->idAddr()->iiaAddrMode.amBaseReg));
+ nsep = true;
+ if (id->idAddr()->iiaAddrMode.amBaseReg == REG_ESP)
+ {
+ frameRef = true;
+ }
+ else if (emitComp->isFramePointerUsed() && id->idAddr()->iiaAddrMode.amBaseReg == REG_EBP)
+ {
+ frameRef = true;
+ }
+ }
+
+ if (id->idAddr()->iiaAddrMode.amIndxReg != REG_NA)
+ {
+ size_t scale = emitDecodeScale(id->idAddr()->iiaAddrMode.amScale);
+
+ if (nsep)
+ {
+ printf("+");
+ }
+ if (scale > 1)
+ {
+ printf("%u*", scale);
+ }
+ printf("%s", emitRegName(id->idAddr()->iiaAddrMode.amIndxReg));
+ nsep = true;
+ }
+
+#ifdef RELOC_SUPPORT
+ if ((id->idIsDspReloc()) && (id->idIns() != INS_i_jmp))
+ {
+ if (nsep)
+ {
+ printf("+");
+ }
+ emitDispReloc(disp);
+ }
+ else
+#endif
+ {
+ // Munge any pointers if we want diff-able disassembly
+ if (emitComp->opts.disDiffable)
+ {
+ ssize_t top12bits = (disp >> 20);
+ if ((top12bits != 0) && (top12bits != -1))
+ {
+ disp = 0xD1FFAB1E;
+ }
+ }
+
+ if (disp > 0)
+ {
+ if (nsep)
+ {
+ printf("+");
+ }
+ if (frameRef)
+ {
+ printf("%02XH", disp);
+ }
+ else if (disp < 1000)
+ {
+ printf("%d", disp);
+ }
+ else if (disp <= 0xFFFF)
+ {
+ printf("%04XH", disp);
+ }
+ else
+ {
+ printf("%08XH", disp);
+ }
+ }
+ else if (disp < 0)
+ {
+ if (frameRef)
+ {
+ printf("-%02XH", -disp);
+ }
+ else if (disp > -1000)
+ {
+ printf("-%d", -disp);
+ }
+ else if (disp >= -0xFFFF)
+ {
+ printf("-%04XH", -disp);
+ }
+ else if ((disp & 0x7F000000) != 0x7F000000)
+ {
+ printf("%08XH", disp);
+ }
+ else
+ {
+ printf("-%08XH", -disp);
+ }
+ }
+ else if (!nsep)
+ {
+ printf("%04XH", disp);
+ }
+ }
+
+ printf("]");
+
+ if (id->idDebugOnlyInfo()->idClsCookie)
+ {
+ if (id->idIns() == INS_call)
+ {
+ printf("%s", emitFncName((CORINFO_METHOD_HANDLE)id->idDebugOnlyInfo()->idMemCookie));
+ }
+ else
+ {
+ printf("%s", emitFldName((CORINFO_FIELD_HANDLE)id->idDebugOnlyInfo()->idMemCookie));
+ }
+ }
+ // pretty print string if it looks like one
+ else if (id->idGCref() == GCT_GCREF && id->idIns() == INS_mov && id->idAddr()->iiaAddrMode.amBaseReg == REG_NA)
+ {
+ const wchar_t* str = emitComp->eeGetCPString(disp);
+ if (str != nullptr)
+ {
+ printf(" '%S'", str);
+ }
+ }
+
+ if (jdsc && !noDetail)
+ {
+ unsigned cnt = (jdsc->dsSize - 1) / sizeof(void*);
+ BasicBlock** bbp = (BasicBlock**)jdsc->dsCont;
+
+#ifdef _TARGET_AMD64_
+#define SIZE_LETTER "Q"
+#else
+#define SIZE_LETTER "D"
+#endif
+ printf("\n\n J_M%03u_DS%02u LABEL " SIZE_LETTER "WORD", Compiler::s_compMethodsCount, jtno);
+
+ /* Display the label table (it's stored as "BasicBlock*" values) */
+
+ do
+ {
+ insGroup* lab;
+
+ /* Convert the BasicBlock* value to an IG address */
+
+ lab = (insGroup*)emitCodeGetCookie(*bbp++);
+ assert(lab);
+
+ printf("\n D" SIZE_LETTER " G_M%03u_IG%02u", Compiler::s_compMethodsCount, lab->igNum);
+ } while (--cnt);
+ }
+}
+
+/*****************************************************************************
+ *
+ * If the given instruction is a shift, display the 2nd operand.
+ */
+
+void emitter::emitDispShift(instruction ins, int cnt)
+{
+ switch (ins)
+ {
+ case INS_rcl_1:
+ case INS_rcr_1:
+ case INS_rol_1:
+ case INS_ror_1:
+ case INS_shl_1:
+ case INS_shr_1:
+ case INS_sar_1:
+ printf(", 1");
+ break;
+
+ case INS_rcl:
+ case INS_rcr:
+ case INS_rol:
+ case INS_ror:
+ case INS_shl:
+ case INS_shr:
+ case INS_sar:
+ printf(", cl");
+ break;
+
+ case INS_rcl_N:
+ case INS_rcr_N:
+ case INS_rol_N:
+ case INS_ror_N:
+ case INS_shl_N:
+ case INS_shr_N:
+ case INS_sar_N:
+ printf(", %d", cnt);
+ break;
+
+ default:
+ break;
+ }
+}
+
+/*****************************************************************************
+ *
+ * Display (optionally) the bytes for the instruction encoding in hex
+ */
+
+void emitter::emitDispInsHex(BYTE* code, size_t sz)
+{
+ // We do not display the instruction hex if we want diff-able disassembly
+ if (!emitComp->opts.disDiffable)
+ {
+#ifdef _TARGET_AMD64_
+ // how many bytes per instruction we format for
+ const size_t digits = 10;
+#else // _TARGET_X86
+ const size_t digits = 6;
+#endif
+ printf(" ");
+ for (unsigned i = 0; i < sz; i++)
+ {
+ printf("%02X", (*((BYTE*)(code + i))));
+ }
+
+ if (sz < digits)
+ {
+ printf("%.*s", 2 * (digits - sz), " ");
+ }
+ }
+}
+
+/*****************************************************************************
+ *
+ * Display the given instruction.
+ */
+
+void emitter::emitDispIns(
+ instrDesc* id, bool isNew, bool doffs, bool asmfm, unsigned offset, BYTE* code, size_t sz, insGroup* ig)
+{
+ emitAttr attr;
+ const char* sstr;
+
+ instruction ins = id->idIns();
+
+ if (emitComp->verbose)
+ {
+ unsigned idNum = id->idDebugOnlyInfo()->idNum;
+ printf("IN%04x: ", idNum);
+ }
+
+#ifdef RELOC_SUPPORT
+#define ID_INFO_DSP_RELOC ((bool)(id->idIsDspReloc()))
+#else
+#define ID_INFO_DSP_RELOC false
+#endif
+ /* Display a constant value if the instruction references one */
+
+ if (!isNew)
+ {
+ switch (id->idInsFmt())
+ {
+ int offs;
+
+ case IF_MRD_RRD:
+ case IF_MWR_RRD:
+ case IF_MRW_RRD:
+
+ case IF_RRD_MRD:
+ case IF_RWR_MRD:
+ case IF_RRW_MRD:
+
+ case IF_MRD_CNS:
+ case IF_MWR_CNS:
+ case IF_MRW_CNS:
+ case IF_MRW_SHF:
+
+ case IF_MRD:
+ case IF_MWR:
+ case IF_MRW:
+
+#if FEATURE_STACK_FP_X87
+
+ case IF_TRD_MRD:
+ case IF_TWR_MRD:
+ case IF_TRW_MRD:
+
+ // case IF_MRD_TRD:
+ // case IF_MRW_TRD:
+ case IF_MWR_TRD:
+
+#endif // FEATURE_STACK_FP_X87
+ case IF_MRD_OFF:
+
+ /* Is this actually a reference to a data section? */
+
+ offs = Compiler::eeGetJitDataOffs(id->idAddr()->iiaFieldHnd);
+
+ if (offs >= 0)
+ {
+ void* addr;
+
+ /* Display a data section reference */
+
+ assert((unsigned)offs < emitConsDsc.dsdOffs);
+ addr = emitConsBlock ? emitConsBlock + offs : nullptr;
+
+#if 0
+ // TODO-XArch-Cleanup: Fix or remove this code.
+ /* Is the operand an integer or floating-point value? */
+
+ bool isFP = false;
+
+ if (CodeGen::instIsFP(id->idIns()))
+ {
+ switch (id->idIns())
+ {
+ case INS_fild:
+ case INS_fildl:
+ break;
+
+ default:
+ isFP = true;
+ break;
+ }
+ }
+
+ if (offs & 1)
+ printf("@CNS%02u", offs);
+ else
+ printf("@RWD%02u", offs);
+
+ printf(" ");
+
+ if (addr)
+ {
+ addr = 0;
+ // TODO-XArch-Bug?:
+ // This was busted by switching the order
+ // in which we output the code block vs.
+ // the data blocks -- when we get here,
+ // the data block has not been filled in
+ // yet, so we'll display garbage.
+
+ if (isFP)
+ {
+ if (id->idOpSize() == EA_4BYTE)
+ printf("DF %f \n", addr ? *(float *)addr : 0);
+ else
+ printf("DQ %lf\n", addr ? *(double *)addr : 0);
+ }
+ else
+ {
+ if (id->idOpSize() <= EA_4BYTE)
+ printf("DD %d \n", addr ? *(int *)addr : 0);
+ else
+ printf("DQ %D \n", addr ? *(__int64 *)addr : 0);
+ }
+ }
+#endif
+ }
+ break;
+
+ default:
+ break;
+ }
+ }
+
+ // printf("[F=%s] " , emitIfName(id->idInsFmt()));
+ // printf("INS#%03u: ", id->idDebugOnlyInfo()->idNum);
+ // printf("[S=%02u] " , emitCurStackLvl); if (isNew) printf("[M=%02u] ", emitMaxStackDepth);
+ // printf("[S=%02u] " , emitCurStackLvl/sizeof(INT32));
+ // printf("[A=%08X] " , emitSimpleStkMask);
+ // printf("[A=%08X] " , emitSimpleByrefStkMask);
+ // printf("[L=%02u] " , id->idCodeSize());
+
+ if (!emitComp->opts.dspEmit && !isNew && !asmfm)
+ {
+ doffs = true;
+ }
+
+ /* Display the instruction offset */
+
+ emitDispInsOffs(offset, doffs);
+
+ if (code != nullptr)
+ {
+ /* Display the instruction hex code */
+
+ emitDispInsHex(code, sz);
+ }
+
+ /* Display the instruction name */
+
+ sstr = codeGen->genInsName(ins);
+#ifdef FEATURE_AVX_SUPPORT
+ if (IsAVXInstruction(ins))
+ {
+ printf(" v%-8s", sstr);
+ }
+ else
+#endif // FEATURE_AVX_SUPPORT
+ {
+ printf(" %-9s", sstr);
+ }
+#ifndef FEATURE_PAL
+ if (strnlen_s(sstr, 10) > 8)
+#else // FEATURE_PAL
+ if (strnlen(sstr, 10) > 8)
+#endif // FEATURE_PAL
+ {
+ printf(" ");
+ }
+
+ /* By now the size better be set to something */
+
+ assert(emitInstCodeSz(id) || emitInstHasNoCode(ins));
+
+ /* Figure out the operand size */
+
+ if (id->idGCref() == GCT_GCREF)
+ {
+ attr = EA_GCREF;
+ sstr = "gword ptr ";
+ }
+ else if (id->idGCref() == GCT_BYREF)
+ {
+ attr = EA_BYREF;
+ sstr = "bword ptr ";
+ }
+ else
+ {
+ attr = id->idOpSize();
+ sstr = codeGen->genSizeStr(attr);
+
+ if (ins == INS_lea)
+ {
+#ifdef _TARGET_AMD64_
+ assert((attr == EA_4BYTE) || (attr == EA_8BYTE));
+#else
+ assert(attr == EA_4BYTE);
+#endif
+ sstr = "";
+ }
+ }
+
+ /* Now see what instruction format we've got */
+
+ // First print the implicit register usage
+ if (instrHasImplicitRegPairDest(ins))
+ {
+ printf("%s:%s, ", emitRegName(REG_EDX, id->idOpSize()), emitRegName(REG_EAX, id->idOpSize()));
+ }
+ else if (instrIs3opImul(ins))
+ {
+ regNumber tgtReg = inst3opImulReg(ins);
+ printf("%s, ", emitRegName(tgtReg, id->idOpSize()));
+ }
+
+ switch (id->idInsFmt())
+ {
+ ssize_t val;
+ ssize_t offs;
+ CnsVal cnsVal;
+ const char* methodName;
+
+ case IF_CNS:
+ val = emitGetInsSC(id);
+#ifdef _TARGET_AMD64_
+ // no 8-byte immediates allowed here!
+ assert((val >= 0xFFFFFFFF80000000LL) && (val <= 0x000000007FFFFFFFLL));
+#endif
+#ifdef RELOC_SUPPORT
+ if (id->idIsCnsReloc())
+ {
+ emitDispReloc(val);
+ }
+ else
+#endif
+ {
+ PRINT_CONSTANT:
+ // Munge any pointers if we want diff-able disassembly
+ if (emitComp->opts.disDiffable)
+ {
+ ssize_t top12bits = (val >> 20);
+ if ((top12bits != 0) && (top12bits != -1))
+ {
+ val = 0xD1FFAB1E;
+ }
+ }
+ if ((val > -1000) && (val < 1000))
+ {
+ printf("%d", val);
+ }
+ else if ((val > 0) || ((val & 0x7F000000) != 0x7F000000))
+ {
+ printf("0x%IX", val);
+ }
+ else
+ { // (val < 0)
+ printf("-0x%IX", -val);
+ }
+ }
+ break;
+
+ case IF_ARD:
+ case IF_AWR:
+ case IF_ARW:
+
+#if FEATURE_STACK_FP_X87
+
+ case IF_TRD_ARD:
+ case IF_TWR_ARD:
+ case IF_TRW_ARD:
+
+ // case IF_ARD_TRD:
+ case IF_AWR_TRD:
+// case IF_ARW_TRD:
+
+#endif // FEATURE_STACK_FP_X87
+ if (ins == INS_call && id->idIsCallRegPtr())
+ {
+ printf("%s", emitRegName(id->idAddr()->iiaAddrMode.amBaseReg));
+ break;
+ }
+
+ printf("%s", sstr);
+ emitDispAddrMode(id, isNew);
+ emitDispShift(ins);
+
+ if (ins == INS_call)
+ {
+ assert(id->idInsFmt() == IF_ARD);
+
+ /* Ignore indirect calls */
+
+ if (id->idDebugOnlyInfo()->idMemCookie == 0)
+ {
+ break;
+ }
+
+ assert(id->idDebugOnlyInfo()->idMemCookie);
+
+ /* This is a virtual call */
+
+ methodName = emitComp->eeGetMethodFullName((CORINFO_METHOD_HANDLE)id->idDebugOnlyInfo()->idMemCookie);
+ printf("%s", methodName);
+ }
+ break;
+
+ case IF_RRD_ARD:
+ case IF_RWR_ARD:
+ case IF_RRW_ARD:
+ if (IsAVXInstruction(ins))
+ {
+ printf("%s, %s", emitYMMregName((unsigned)id->idReg1()), sstr);
+ }
+ else if (IsSSE2Instruction(ins))
+ {
+ printf("%s, %s", emitXMMregName((unsigned)id->idReg1()), sstr);
+ }
+ else
+#ifdef _TARGET_AMD64_
+ if (ins == INS_movsxd)
+ {
+ printf("%s, %s", emitRegName(id->idReg1(), EA_8BYTE), sstr);
+ }
+ else
+#endif
+ if (ins == INS_movsx || ins == INS_movzx)
+ {
+ printf("%s, %s", emitRegName(id->idReg1(), EA_PTRSIZE), sstr);
+ }
+ else
+ {
+ printf("%s, %s", emitRegName(id->idReg1(), attr), sstr);
+ }
+ emitDispAddrMode(id);
+ break;
+
+ case IF_ARD_RRD:
+ case IF_AWR_RRD:
+ case IF_ARW_RRD:
+
+ printf("%s", sstr);
+ emitDispAddrMode(id);
+ if (IsAVXInstruction(ins))
+ {
+ printf(", %s", emitYMMregName((unsigned)id->idReg1()));
+ }
+ else if (IsSSE2Instruction(ins))
+ {
+ printf(", %s", emitXMMregName((unsigned)id->idReg1()));
+ }
+ else
+ {
+ printf(", %s", emitRegName(id->idReg1(), attr));
+ }
+ break;
+
+ case IF_ARD_CNS:
+ case IF_AWR_CNS:
+ case IF_ARW_CNS:
+ case IF_ARW_SHF:
+
+ printf("%s", sstr);
+ emitDispAddrMode(id);
+ emitGetInsAmdCns(id, &cnsVal);
+ val = cnsVal.cnsVal;
+#ifdef _TARGET_AMD64_
+ // no 8-byte immediates allowed here!
+ assert((val >= 0xFFFFFFFF80000000LL) && (val <= 0x000000007FFFFFFFLL));
+#endif
+ if (id->idInsFmt() == IF_ARW_SHF)
+ {
+ emitDispShift(ins, (BYTE)val);
+ }
+ else
+ {
+ printf(", ");
+#ifdef RELOC_SUPPORT
+ if (cnsVal.cnsReloc)
+ {
+ emitDispReloc(val);
+ }
+ else
+#endif
+ {
+ goto PRINT_CONSTANT;
+ }
+ }
+ break;
+
+ case IF_SRD:
+ case IF_SWR:
+ case IF_SRW:
+
+#if FEATURE_STACK_FP_X87
+ case IF_TRD_SRD:
+ case IF_TWR_SRD:
+ case IF_TRW_SRD:
+
+ // case IF_SRD_TRD:
+ // case IF_SRW_TRD:
+ case IF_SWR_TRD:
+
+#endif // FEATURE_STACK_FP_X87
+
+ printf("%s", sstr);
+
+#if !FEATURE_FIXED_OUT_ARGS
+ if (ins == INS_pop)
+ emitCurStackLvl -= sizeof(int);
+#endif
+
+ emitDispFrameRef(id->idAddr()->iiaLclVar.lvaVarNum(), id->idAddr()->iiaLclVar.lvaOffset(),
+ id->idDebugOnlyInfo()->idVarRefOffs, asmfm);
+
+#if !FEATURE_FIXED_OUT_ARGS
+ if (ins == INS_pop)
+ emitCurStackLvl += sizeof(int);
+#endif
+
+ emitDispShift(ins);
+ break;
+
+ case IF_SRD_RRD:
+ case IF_SWR_RRD:
+ case IF_SRW_RRD:
+
+ printf("%s", sstr);
+
+ emitDispFrameRef(id->idAddr()->iiaLclVar.lvaVarNum(), id->idAddr()->iiaLclVar.lvaOffset(),
+ id->idDebugOnlyInfo()->idVarRefOffs, asmfm);
+
+ if (IsAVXInstruction(ins))
+ {
+ printf(", %s", emitYMMregName((unsigned)id->idReg1()));
+ }
+ else if (IsSSE2Instruction(ins))
+ {
+ printf(", %s", emitXMMregName((unsigned)id->idReg1()));
+ }
+ else
+ {
+ printf(", %s", emitRegName(id->idReg1(), attr));
+ }
+ break;
+
+ case IF_SRD_CNS:
+ case IF_SWR_CNS:
+ case IF_SRW_CNS:
+ case IF_SRW_SHF:
+
+ printf("%s", sstr);
+
+ emitDispFrameRef(id->idAddr()->iiaLclVar.lvaVarNum(), id->idAddr()->iiaLclVar.lvaOffset(),
+ id->idDebugOnlyInfo()->idVarRefOffs, asmfm);
+
+ emitGetInsCns(id, &cnsVal);
+ val = cnsVal.cnsVal;
+#ifdef _TARGET_AMD64_
+ // no 8-byte immediates allowed here!
+ assert((val >= 0xFFFFFFFF80000000LL) && (val <= 0x000000007FFFFFFFLL));
+#endif
+ if (id->idInsFmt() == IF_SRW_SHF)
+ {
+ emitDispShift(ins, (BYTE)val);
+ }
+ else
+ {
+ printf(", ");
+#ifdef RELOC_SUPPORT
+ if (cnsVal.cnsReloc)
+ {
+ emitDispReloc(val);
+ }
+ else
+#endif
+ {
+ goto PRINT_CONSTANT;
+ }
+ }
+ break;
+
+ case IF_RRD_SRD:
+ case IF_RWR_SRD:
+ case IF_RRW_SRD:
+ if (IsAVXInstruction(ins))
+ {
+ printf("%s, %s", emitYMMregName((unsigned)id->idReg1()), sstr);
+ }
+ else if (IsSSE2Instruction(ins))
+ {
+ printf("%s, %s", emitXMMregName((unsigned)id->idReg1()), sstr);
+ }
+ else
+#ifdef _TARGET_AMD64_
+ if (ins == INS_movsxd)
+ {
+ printf("%s, %s", emitRegName(id->idReg1(), EA_8BYTE), sstr);
+ }
+ else
+#endif
+ if (ins == INS_movsx || ins == INS_movzx)
+ {
+ printf("%s, %s", emitRegName(id->idReg1(), EA_PTRSIZE), sstr);
+ }
+ else
+ {
+ printf("%s, %s", emitRegName(id->idReg1(), attr), sstr);
+ }
+
+ emitDispFrameRef(id->idAddr()->iiaLclVar.lvaVarNum(), id->idAddr()->iiaLclVar.lvaOffset(),
+ id->idDebugOnlyInfo()->idVarRefOffs, asmfm);
+
+ break;
+
+ case IF_RRD_RRD:
+ case IF_RWR_RRD:
+ case IF_RRW_RRD:
+
+ if (ins == INS_mov_i2xmm)
+ {
+ printf("%s, %s", emitXMMregName((unsigned)id->idReg1()), emitRegName(id->idReg2(), attr));
+ }
+ else if (ins == INS_mov_xmm2i)
+ {
+ printf("%s, %s", emitRegName(id->idReg2(), attr), emitXMMregName((unsigned)id->idReg1()));
+ }
+#ifndef LEGACY_BACKEND
+ else if ((ins == INS_cvtsi2ss) || (ins == INS_cvtsi2sd))
+ {
+ printf(" %s, %s", emitXMMregName((unsigned)id->idReg1()), emitRegName(id->idReg2(), attr));
+ }
+#endif
+ else if ((ins == INS_cvttsd2si)
+#ifndef LEGACY_BACKEND
+ || (ins == INS_cvtss2si) || (ins == INS_cvtsd2si) || (ins == INS_cvttss2si)
+#endif
+ )
+ {
+ printf(" %s, %s", emitRegName(id->idReg1(), attr), emitXMMregName((unsigned)id->idReg2()));
+ }
+ else if (IsAVXInstruction(ins))
+ {
+ printf("%s, %s", emitYMMregName((unsigned)id->idReg1()), emitYMMregName((unsigned)id->idReg2()));
+ }
+ else if (IsSSE2Instruction(ins))
+ {
+ printf("%s, %s", emitXMMregName((unsigned)id->idReg1()), emitXMMregName((unsigned)id->idReg2()));
+ }
+#ifdef _TARGET_AMD64_
+ else if (ins == INS_movsxd)
+ {
+ printf("%s, %s", emitRegName(id->idReg1(), EA_8BYTE), emitRegName(id->idReg2(), EA_4BYTE));
+ }
+#endif // _TARGET_AMD64_
+ else if (ins == INS_movsx || ins == INS_movzx)
+ {
+ printf("%s, %s", emitRegName(id->idReg1(), EA_PTRSIZE), emitRegName(id->idReg2(), attr));
+ }
+ else
+ {
+ printf("%s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), attr));
+ }
+ break;
+
+ case IF_RRW_RRW:
+ assert(ins == INS_xchg);
+ printf("%s,", emitRegName(id->idReg1(), attr));
+ printf(" %s", emitRegName(id->idReg2(), attr));
+ break;
+
+#ifdef FEATURE_AVX_SUPPORT
+ case IF_RWR_RRD_RRD:
+ assert(IsAVXInstruction(ins));
+ assert(IsThreeOperandAVXInstruction(ins));
+ printf("%s, ", emitRegName(id->idReg1(), attr));
+ printf("%s, ", emitRegName(id->idReg2(), attr));
+ printf("%s", emitRegName(id->idReg3(), attr));
+ break;
+#endif
+ case IF_RRW_RRW_CNS:
+ if (IsAVXInstruction(ins))
+ {
+ printf("%s,", emitYMMregName((unsigned)id->idReg1()), attr);
+ printf(" %s", emitYMMregName((unsigned)id->idReg2()), attr);
+ }
+ else
+ {
+ printf("%s,", emitRegName(id->idReg1(), attr));
+ printf(" %s", emitRegName(id->idReg2(), attr));
+ }
+ val = emitGetInsSC(id);
+#ifdef _TARGET_AMD64_
+ // no 8-byte immediates allowed here!
+ assert((val >= 0xFFFFFFFF80000000LL) && (val <= 0x000000007FFFFFFFLL));
+#endif
+ printf(", ");
+#ifdef RELOC_SUPPORT
+ if (id->idIsCnsReloc())
+ {
+ emitDispReloc(val);
+ }
+ else
+#endif
+ {
+ goto PRINT_CONSTANT;
+ }
+ break;
+
+ case IF_RRD:
+ case IF_RWR:
+ case IF_RRW:
+ printf("%s", emitRegName(id->idReg1(), attr));
+ emitDispShift(ins);
+ break;
+
+ case IF_RRW_SHF:
+ printf("%s", emitRegName(id->idReg1(), attr));
+ emitDispShift(ins, (BYTE)emitGetInsSC(id));
+ break;
+
+ case IF_RRD_MRD:
+ case IF_RWR_MRD:
+ case IF_RRW_MRD:
+
+ if (ins == INS_movsx || ins == INS_movzx)
+ {
+ attr = EA_PTRSIZE;
+ }
+#ifdef _TARGET_AMD64_
+ else if (ins == INS_movsxd)
+ {
+ attr = EA_PTRSIZE;
+ }
+#endif
+ if (IsAVXInstruction(ins))
+ {
+ printf("%s, %s", emitYMMregName((unsigned)id->idReg1()), sstr);
+ }
+ else if (IsSSE2Instruction(ins))
+ {
+ printf("%s, %s", emitXMMregName((unsigned)id->idReg1()), sstr);
+ }
+ else
+ {
+ printf("%s, %s", emitRegName(id->idReg1(), attr), sstr);
+ }
+ offs = emitGetInsDsp(id);
+ emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC);
+ break;
+
+ case IF_RWR_MRD_OFF:
+
+ printf("%s, %s", emitRegName(id->idReg1(), attr), "offset");
+ offs = emitGetInsDsp(id);
+ emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC);
+ break;
+
+ case IF_MRD_RRD:
+ case IF_MWR_RRD:
+ case IF_MRW_RRD:
+
+ printf("%s", sstr);
+ offs = emitGetInsDsp(id);
+ emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC);
+ printf(", %s", emitRegName(id->idReg1(), attr));
+ break;
+
+ case IF_MRD_CNS:
+ case IF_MWR_CNS:
+ case IF_MRW_CNS:
+ case IF_MRW_SHF:
+
+ printf("%s", sstr);
+ offs = emitGetInsDsp(id);
+ emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC);
+ emitGetInsDcmCns(id, &cnsVal);
+ val = cnsVal.cnsVal;
+#ifdef _TARGET_AMD64_
+ // no 8-byte immediates allowed here!
+ assert((val >= 0xFFFFFFFF80000000LL) && (val <= 0x000000007FFFFFFFLL));
+#endif
+#ifdef RELOC_SUPPORT
+ if (cnsVal.cnsReloc)
+ {
+ emitDispReloc(val);
+ }
+ else
+#endif
+ if (id->idInsFmt() == IF_MRW_SHF)
+ {
+ emitDispShift(ins, (BYTE)val);
+ }
+ else
+ {
+ printf(", ");
+ goto PRINT_CONSTANT;
+ }
+ break;
+
+ case IF_MRD:
+ case IF_MWR:
+ case IF_MRW:
+
+#if FEATURE_STACK_FP_X87
+
+ case IF_TRD_MRD:
+ case IF_TWR_MRD:
+ case IF_TRW_MRD:
+
+ // case IF_MRD_TRD:
+ // case IF_MRW_TRD:
+ case IF_MWR_TRD:
+
+#endif // FEATURE_STACK_FP_X87
+
+ printf("%s", sstr);
+ offs = emitGetInsDsp(id);
+ emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC);
+ emitDispShift(ins);
+ break;
+
+ case IF_MRD_OFF:
+
+ printf("offset ");
+ offs = emitGetInsDsp(id);
+ emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC);
+ break;
+
+ case IF_RRD_CNS:
+ case IF_RWR_CNS:
+ case IF_RRW_CNS:
+ printf("%s, ", emitRegName(id->idReg1(), attr));
+ val = emitGetInsSC(id);
+#ifdef RELOC_SUPPORT
+ if (id->idIsCnsReloc())
+ {
+ emitDispReloc(val);
+ }
+ else
+#endif
+ {
+ goto PRINT_CONSTANT;
+ }
+ break;
+
+#if FEATURE_STACK_FP_X87
+ case IF_TRD_FRD:
+ case IF_TWR_FRD:
+ case IF_TRW_FRD:
+ switch (ins)
+ {
+ case INS_fld:
+ case INS_fxch:
+ break;
+
+ default:
+ printf("%s, ", emitFPregName(0));
+ break;
+ }
+ printf("%s", emitFPregName((unsigned)id->idReg1()));
+ break;
+
+ case IF_FRD_TRD:
+ case IF_FWR_TRD:
+ case IF_FRW_TRD:
+ printf("%s", emitFPregName((unsigned)id->idReg1()));
+ if (ins != INS_fst && ins != INS_fstp)
+ printf(", %s", emitFPregName(0));
+ break;
+#endif // FEATURE_STACK_FP_X87
+
+ case IF_LABEL:
+ case IF_RWR_LABEL:
+ case IF_SWR_LABEL:
+
+ if (ins == INS_lea)
+ {
+ printf("%s, ", emitRegName(id->idReg1(), attr));
+ }
+ else if (ins == INS_mov)
+ {
+ /* mov dword ptr [frame.callSiteReturnAddress], label */
+ assert(id->idInsFmt() == IF_SWR_LABEL);
+ instrDescLbl* idlbl = (instrDescLbl*)id;
+
+ emitDispFrameRef(idlbl->dstLclVar.lvaVarNum(), idlbl->dstLclVar.lvaOffset(), 0, asmfm);
+
+ printf(", ");
+ }
+
+ if (((instrDescJmp*)id)->idjShort)
+ {
+ printf("SHORT ");
+ }
+
+ if (id->idIsBound())
+ {
+ printf("G_M%03u_IG%02u", Compiler::s_compMethodsCount, id->idAddr()->iiaIGlabel->igNum);
+ }
+ else
+ {
+ printf("L_M%03u_BB%02u", Compiler::s_compMethodsCount, id->idAddr()->iiaBBlabel->bbNum);
+ }
+ break;
+
+ case IF_METHOD:
+ case IF_METHPTR:
+ if (id->idIsCallAddr())
+ {
+ offs = (ssize_t)id->idAddr()->iiaAddr;
+ methodName = "";
+ }
+ else
+ {
+ offs = 0;
+ methodName = emitComp->eeGetMethodFullName((CORINFO_METHOD_HANDLE)id->idDebugOnlyInfo()->idMemCookie);
+ }
+
+ if (id->idInsFmt() == IF_METHPTR)
+ {
+ printf("[");
+ }
+
+ if (offs)
+ {
+ if (id->idIsDspReloc())
+ {
+ printf("reloc ");
+ }
+ printf("%08X", offs);
+ }
+ else
+ {
+ printf("%s", methodName);
+ }
+
+ if (id->idInsFmt() == IF_METHPTR)
+ {
+ printf("]");
+ }
+
+ break;
+
+#if FEATURE_STACK_FP_X87
+ case IF_TRD:
+ case IF_TWR:
+ case IF_TRW:
+#endif // FEATURE_STACK_FP_X87
+ case IF_NONE:
+ break;
+
+ default:
+ printf("unexpected format %s", emitIfName(id->idInsFmt()));
+ assert(!"unexpectedFormat");
+ break;
+ }
+
+ if (sz != 0 && sz != id->idCodeSize() && (!asmfm || emitComp->verbose))
+ {
+ // Code size in the instrDesc is different from the actual code size we've been given!
+ printf(" (ECS:%d, ACS:%d)", id->idCodeSize(), sz);
+ }
+
+ printf("\n");
+}
+
+/*****************************************************************************/
+#endif
+
+/*****************************************************************************
+ *
+ * Output nBytes bytes of NOP instructions
+ */
+
+static BYTE* emitOutputNOP(BYTE* dst, size_t nBytes)
+{
+ assert(nBytes <= 15);
+
+#ifndef _TARGET_AMD64_
+ // TODO-X86-CQ: when VIA C3 CPU's are out of circulation, switch to the
+ // more efficient real NOP: 0x0F 0x1F +modR/M
+ // Also can't use AMD recommended, multiple size prefixes (i.e. 0x66 0x66 0x90 for 3 byte NOP)
+ // because debugger and msdis don't like it, so maybe VIA doesn't either
+ // So instead just stick to repeating single byte nops
+
+ switch (nBytes)
+ {
+ case 15:
+ *dst++ = 0x90;
+ __fallthrough;
+ case 14:
+ *dst++ = 0x90;
+ __fallthrough;
+ case 13:
+ *dst++ = 0x90;
+ __fallthrough;
+ case 12:
+ *dst++ = 0x90;
+ __fallthrough;
+ case 11:
+ *dst++ = 0x90;
+ __fallthrough;
+ case 10:
+ *dst++ = 0x90;
+ __fallthrough;
+ case 9:
+ *dst++ = 0x90;
+ __fallthrough;
+ case 8:
+ *dst++ = 0x90;
+ __fallthrough;
+ case 7:
+ *dst++ = 0x90;
+ __fallthrough;
+ case 6:
+ *dst++ = 0x90;
+ __fallthrough;
+ case 5:
+ *dst++ = 0x90;
+ __fallthrough;
+ case 4:
+ *dst++ = 0x90;
+ __fallthrough;
+ case 3:
+ *dst++ = 0x90;
+ __fallthrough;
+ case 2:
+ *dst++ = 0x90;
+ __fallthrough;
+ case 1:
+ *dst++ = 0x90;
+ break;
+ case 0:
+ break;
+ }
+#else // _TARGET_AMD64_
+ switch (nBytes)
+ {
+ case 2:
+ *dst++ = 0x66;
+ __fallthrough;
+ case 1:
+ *dst++ = 0x90;
+ break;
+ case 0:
+ break;
+ case 3:
+ *dst++ = 0x0F;
+ *dst++ = 0x1F;
+ *dst++ = 0x00;
+ break;
+ case 4:
+ *dst++ = 0x0F;
+ *dst++ = 0x1F;
+ *dst++ = 0x40;
+ *dst++ = 0x00;
+ break;
+ case 6:
+ *dst++ = 0x66;
+ __fallthrough;
+ case 5:
+ *dst++ = 0x0F;
+ *dst++ = 0x1F;
+ *dst++ = 0x44;
+ *dst++ = 0x00;
+ *dst++ = 0x00;
+ break;
+ case 7:
+ *dst++ = 0x0F;
+ *dst++ = 0x1F;
+ *dst++ = 0x80;
+ *dst++ = 0x00;
+ *dst++ = 0x00;
+ *dst++ = 0x00;
+ *dst++ = 0x00;
+ break;
+ case 15:
+ // More than 3 prefixes is slower than just 2 NOPs
+ dst = emitOutputNOP(emitOutputNOP(dst, 7), 8);
+ break;
+ case 14:
+ // More than 3 prefixes is slower than just 2 NOPs
+ dst = emitOutputNOP(emitOutputNOP(dst, 7), 7);
+ break;
+ case 13:
+ // More than 3 prefixes is slower than just 2 NOPs
+ dst = emitOutputNOP(emitOutputNOP(dst, 5), 8);
+ break;
+ case 12:
+ // More than 3 prefixes is slower than just 2 NOPs
+ dst = emitOutputNOP(emitOutputNOP(dst, 4), 8);
+ break;
+ case 11:
+ *dst++ = 0x66;
+ __fallthrough;
+ case 10:
+ *dst++ = 0x66;
+ __fallthrough;
+ case 9:
+ *dst++ = 0x66;
+ __fallthrough;
+ case 8:
+ *dst++ = 0x0F;
+ *dst++ = 0x1F;
+ *dst++ = 0x84;
+ *dst++ = 0x00;
+ *dst++ = 0x00;
+ *dst++ = 0x00;
+ *dst++ = 0x00;
+ *dst++ = 0x00;
+ break;
+ }
+#endif // _TARGET_AMD64_
+
+ return dst;
+}
+
+/*****************************************************************************
+ *
+ * Output an instruction involving an address mode.
+ */
+
+BYTE* emitter::emitOutputAM(BYTE* dst, instrDesc* id, size_t code, CnsVal* addc)
+{
+ regNumber reg;
+ regNumber rgx;
+ ssize_t dsp;
+ bool dspInByte;
+ bool dspIsZero;
+
+ instruction ins = id->idIns();
+ emitAttr size = id->idOpSize();
+ size_t opsz = EA_SIZE_IN_BYTES(size);
+
+ // Get the base/index registers
+ reg = id->idAddr()->iiaAddrMode.amBaseReg;
+ rgx = id->idAddr()->iiaAddrMode.amIndxReg;
+
+ // For INS_call the instruction size is actually the return value size
+ if (ins == INS_call)
+ {
+ // Special case: call via a register
+ if (id->idIsCallRegPtr())
+ {
+ size_t opcode = insEncodeMRreg(INS_call, reg, EA_PTRSIZE, insCodeMR(INS_call));
+
+ dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, opcode);
+ dst += emitOutputWord(dst, opcode);
+ goto DONE;
+ }
+
+ // The displacement field is in an unusual place for calls
+ dsp = emitGetInsCIdisp(id);
+
+#ifdef _TARGET_AMD64_
+
+ // Compute the REX prefix if it exists
+ if (IsExtendedReg(reg, EA_PTRSIZE))
+ {
+ insEncodeReg012(ins, reg, EA_PTRSIZE, &code);
+ reg = RegEncoding(reg);
+ }
+
+ if (IsExtendedReg(rgx, EA_PTRSIZE))
+ {
+ insEncodeRegSIB(ins, rgx, &code);
+ rgx = RegEncoding(rgx);
+ }
+
+ // And emit the REX prefix
+ dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
+
+#endif // _TARGET_AMD64_
+
+ goto GOT_DSP;
+ }
+
+ // Is there a large constant operand?
+ if (addc && (size > EA_1BYTE))
+ {
+ ssize_t cval = addc->cnsVal;
+
+ // Does the constant fit in a byte?
+ if ((signed char)cval == cval &&
+#ifdef RELOC_SUPPORT
+ addc->cnsReloc == false &&
+#endif
+ ins != INS_mov && ins != INS_test)
+ {
+ if (id->idInsFmt() != IF_ARW_SHF)
+ {
+ code |= 2;
+ }
+
+ opsz = 1;
+ }
+ }
+
+ // Emit VEX prefix if required
+ // There are some callers who already add VEX prefix and call this routine.
+ // Therefore, add VEX prefix is one is not already present.
+ code = AddVexPrefixIfNeededAndNotPresent(ins, code, size);
+
+ // For this format, moves do not support a third operand, so we only need to handle the binary ops.
+ if (IsThreeOperandBinaryAVXInstruction(ins))
+ {
+ // Encode source operand reg in 'vvvv' bits in 1's compliement form
+ // The order of operands are reversed, therefore use reg2 as the source.
+ code = insEncodeReg3456(ins, id->idReg1(), size, code);
+ }
+
+ // Emit the REX prefix if required
+ if (TakesRexWPrefix(ins, size))
+ {
+ code = AddRexWPrefix(ins, code);
+ }
+
+ if (IsExtendedReg(reg, EA_PTRSIZE))
+ {
+ insEncodeReg012(ins, reg, EA_PTRSIZE, &code);
+ reg = RegEncoding(reg);
+ }
+
+ if (IsExtendedReg(rgx, EA_PTRSIZE))
+ {
+ insEncodeRegSIB(ins, rgx, &code);
+ rgx = RegEncoding(rgx);
+ }
+
+ // Is this a 'big' opcode?
+ if (code & 0xFF000000)
+ {
+ // Output the REX prefix
+ dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
+
+ // Output the highest word of the opcode
+ // We need to check again as in case of AVX instructions leading opcode bytes are stripped off
+ // and encoded as part of VEX prefix.
+ if (code & 0xFF000000)
+ {
+ dst += emitOutputWord(dst, code >> 16);
+ code &= 0x0000FFFF;
+ }
+ }
+ else if (code & 0x00FF0000)
+ {
+ // Output the REX prefix
+ dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
+
+ // Output the highest byte of the opcode
+ if (code & 0x00FF0000)
+ {
+ dst += emitOutputByte(dst, code >> 16);
+ code &= 0x0000FFFF;
+ }
+
+ // Use the large version if this is not a byte. This trick will not
+ // work in case of SSE2 and AVX instructions.
+ if ((size != EA_1BYTE) && (ins != INS_imul) && !IsSSE2Instruction(ins) && !IsAVXInstruction(ins))
+ {
+ code++;
+ }
+ }
+ else if (CodeGen::instIsFP(ins))
+ {
+#if FEATURE_STACK_FP_X87
+ assert(size == EA_4BYTE || size == EA_8BYTE || ins == INS_fldcw || ins == INS_fnstcw);
+#else // !FEATURE_STACK_FP_X87
+ assert(size == EA_4BYTE || size == EA_8BYTE);
+#endif // ! FEATURE_STACK_FP_X87
+
+ if (size == EA_8BYTE)
+ {
+ code += 4;
+ }
+ }
+ else if (!IsSSE2Instruction(ins) && !IsAVXInstruction(ins))
+ {
+ /* Is the operand size larger than a byte? */
+
+ switch (size)
+ {
+ case EA_1BYTE:
+ break;
+
+ case EA_2BYTE:
+
+ /* Output a size prefix for a 16-bit operand */
+
+ dst += emitOutputByte(dst, 0x66);
+
+ __fallthrough;
+
+ case EA_4BYTE:
+#ifdef _TARGET_AMD64_
+ case EA_8BYTE:
+#endif
+
+ /* Set the 'w' bit to get the large version */
+
+ code |= 0x1;
+ break;
+
+#ifdef _TARGET_X86_
+ case EA_8BYTE:
+
+ /* Double operand - set the appropriate bit */
+
+ code |= 0x04;
+ break;
+
+#endif // _TARGET_X86_
+
+ default:
+ NO_WAY("unexpected size");
+ break;
+ }
+ }
+
+ // Output the REX prefix
+ dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
+
+ // Get the displacement value
+ dsp = emitGetInsAmdAny(id);
+
+GOT_DSP:
+
+ dspInByte = ((signed char)dsp == (ssize_t)dsp);
+ dspIsZero = (dsp == 0);
+
+#ifdef RELOC_SUPPORT
+ if (id->idIsDspReloc())
+ {
+ dspInByte = false; // relocs can't be placed in a byte
+ }
+#endif
+
+ // Is there a [scaled] index component?
+ if (rgx == REG_NA)
+ {
+ // The address is of the form "[reg+disp]"
+ switch (reg)
+ {
+ case REG_NA:
+ if (id->idIsDspReloc())
+ {
+ INT32 addlDelta = 0;
+
+ // The address is of the form "[disp]"
+ // On x86 - disp is relative to zero
+ // On Amd64 - disp is relative to RIP
+ dst += emitOutputWord(dst, code | 0x0500);
+
+ if (addc)
+ {
+ // It is of the form "ins [disp], immed"
+ // For emitting relocation, we also need to take into account of the
+ // additional bytes of code emitted for immed val.
+
+ ssize_t cval = addc->cnsVal;
+
+#ifdef _TARGET_AMD64_
+ // all these opcodes only take a sign-extended 4-byte immediate
+ noway_assert(opsz < 8 || ((int)cval == cval && !addc->cnsReloc));
+#else
+ noway_assert(opsz <= 4);
+#endif
+
+ switch (opsz)
+ {
+ case 0:
+ case 4:
+ case 8:
+ addlDelta = -4;
+ break;
+ case 2:
+ addlDelta = -2;
+ break;
+ case 1:
+ addlDelta = -1;
+ break;
+
+ default:
+ assert(!"unexpected operand size");
+ unreached();
+ }
+ }
+
+#ifdef _TARGET_AMD64_
+ // We emit zero on Amd64, to avoid the assert in emitOutputLong()
+ dst += emitOutputLong(dst, 0);
+#else
+ dst += emitOutputLong(dst, dsp);
+#endif
+ emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)dsp, IMAGE_REL_BASED_DISP32, 0,
+ addlDelta);
+ }
+ else
+ {
+#ifdef _TARGET_X86_
+ dst += emitOutputWord(dst, code | 0x0500);
+#else //_TARGET_AMD64_
+ // Amd64: addr fits within 32-bits and can be encoded as a displacement relative to zero.
+ // This addr mode should never be used while generating relocatable ngen code nor if
+ // the addr can be encoded as pc-relative address.
+ noway_assert(!emitComp->opts.compReloc);
+ noway_assert(codeGen->genAddrRelocTypeHint((size_t)dsp) != IMAGE_REL_BASED_REL32);
+ noway_assert((int)dsp == dsp);
+
+ // This requires, specifying a SIB byte after ModRM byte.
+ dst += emitOutputWord(dst, code | 0x0400);
+ dst += emitOutputByte(dst, 0x25);
+#endif //_TARGET_AMD64_
+ dst += emitOutputLong(dst, dsp);
+ }
+ break;
+
+ case REG_EBP:
+ // Does the offset fit in a byte?
+ if (dspInByte)
+ {
+ dst += emitOutputWord(dst, code | 0x4500);
+ dst += emitOutputByte(dst, dsp);
+ }
+ else
+ {
+ dst += emitOutputWord(dst, code | 0x8500);
+ dst += emitOutputLong(dst, dsp);
+
+#ifdef RELOC_SUPPORT
+ if (id->idIsDspReloc())
+ {
+ emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)dsp, IMAGE_REL_BASED_HIGHLOW);
+ }
+#endif
+ }
+ break;
+
+ case REG_ESP:
+#ifdef LEGACY_BACKEND
+ // REG_ESP could be REG_R12, which applies to any instruction
+ //
+ // This assert isn't too helpful from the OptJit point of view
+ //
+ // a better question is why is it here at all
+ //
+ assert((ins == INS_lea) || (ins == INS_mov) || (ins == INS_test) || (ins == INS_cmp) ||
+ (ins == INS_fld && dspIsZero) || (ins == INS_fstp && dspIsZero) ||
+ (ins == INS_fistp && dspIsZero) || IsSSE2Instruction(ins) || IsAVXInstruction(ins) ||
+ (ins == INS_or));
+#endif // LEGACY_BACKEND
+
+ // Is the offset 0 or does it at least fit in a byte?
+ if (dspIsZero)
+ {
+ dst += emitOutputWord(dst, code | 0x0400);
+ dst += emitOutputByte(dst, 0x24);
+ }
+ else if (dspInByte)
+ {
+ dst += emitOutputWord(dst, code | 0x4400);
+ dst += emitOutputByte(dst, 0x24);
+ dst += emitOutputByte(dst, dsp);
+ }
+ else
+ {
+ dst += emitOutputWord(dst, code | 0x8400);
+ dst += emitOutputByte(dst, 0x24);
+ dst += emitOutputLong(dst, dsp);
+#ifdef RELOC_SUPPORT
+ if (id->idIsDspReloc())
+ {
+ emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)dsp, IMAGE_REL_BASED_HIGHLOW);
+ }
+#endif
+ }
+ break;
+
+ default:
+ // Put the register in the opcode
+ code |= insEncodeReg012(ins, reg, EA_PTRSIZE, nullptr) << 8;
+
+ // Is there a displacement?
+ if (dspIsZero)
+ {
+ // This is simply "[reg]"
+ dst += emitOutputWord(dst, code);
+ }
+ else
+ {
+ // This is [reg + dsp]" -- does the offset fit in a byte?
+ if (dspInByte)
+ {
+ dst += emitOutputWord(dst, code | 0x4000);
+ dst += emitOutputByte(dst, dsp);
+ }
+ else
+ {
+ dst += emitOutputWord(dst, code | 0x8000);
+ dst += emitOutputLong(dst, dsp);
+#ifdef RELOC_SUPPORT
+ if (id->idIsDspReloc())
+ {
+ emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)dsp, IMAGE_REL_BASED_HIGHLOW);
+ }
+#endif
+ }
+ }
+
+ break;
+ }
+ }
+ else
+ {
+ unsigned regByte;
+
+ // We have a scaled index operand
+ unsigned mul = emitDecodeScale(id->idAddr()->iiaAddrMode.amScale);
+
+ // Is the index operand scaled?
+ if (mul > 1)
+ {
+ // Is there a base register?
+ if (reg != REG_NA)
+ {
+ // The address is "[reg + {2/4/8} * rgx + icon]"
+ regByte = insEncodeReg012(ins, reg, EA_PTRSIZE, nullptr) |
+ insEncodeReg345(ins, rgx, EA_PTRSIZE, nullptr) | insSSval(mul);
+
+ // Emit [ebp + {2/4/8} * rgz] as [ebp + {2/4/8} * rgx + 0]
+ if (dspIsZero && reg != REG_EBP)
+ {
+ // The address is "[reg + {2/4/8} * rgx]"
+ dst += emitOutputWord(dst, code | 0x0400);
+ dst += emitOutputByte(dst, regByte);
+ }
+ else
+ {
+ // The address is "[reg + {2/4/8} * rgx + disp]"
+ if (dspInByte)
+ {
+ dst += emitOutputWord(dst, code | 0x4400);
+ dst += emitOutputByte(dst, regByte);
+ dst += emitOutputByte(dst, dsp);
+ }
+ else
+ {
+ dst += emitOutputWord(dst, code | 0x8400);
+ dst += emitOutputByte(dst, regByte);
+ dst += emitOutputLong(dst, dsp);
+#ifdef RELOC_SUPPORT
+ if (id->idIsDspReloc())
+ {
+ emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)dsp, IMAGE_REL_BASED_HIGHLOW);
+ }
+#endif
+ }
+ }
+ }
+ else
+ {
+ // The address is "[{2/4/8} * rgx + icon]"
+ regByte = insEncodeReg012(ins, REG_EBP, EA_PTRSIZE, nullptr) |
+ insEncodeReg345(ins, rgx, EA_PTRSIZE, nullptr) | insSSval(mul);
+
+ dst += emitOutputWord(dst, code | 0x0400);
+ dst += emitOutputByte(dst, regByte);
+
+ // Special case: jump through a jump table
+ if (ins == INS_i_jmp)
+ {
+ dsp += (size_t)emitConsBlock;
+ }
+
+ dst += emitOutputLong(dst, dsp);
+#ifdef RELOC_SUPPORT
+ if (id->idIsDspReloc())
+ {
+ emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)dsp, IMAGE_REL_BASED_HIGHLOW);
+ }
+#endif
+ }
+ }
+ else
+ {
+ // The address is "[reg+rgx+dsp]"
+ regByte = insEncodeReg012(ins, reg, EA_PTRSIZE, nullptr) | insEncodeReg345(ins, rgx, EA_PTRSIZE, nullptr);
+
+ if (dspIsZero && reg != REG_EBP)
+ {
+ // This is [reg+rgx]"
+ dst += emitOutputWord(dst, code | 0x0400);
+ dst += emitOutputByte(dst, regByte);
+ }
+ else
+ {
+ // This is [reg+rgx+dsp]" -- does the offset fit in a byte?
+ if (dspInByte)
+ {
+ dst += emitOutputWord(dst, code | 0x4400);
+ dst += emitOutputByte(dst, regByte);
+ dst += emitOutputByte(dst, dsp);
+ }
+ else
+ {
+ dst += emitOutputWord(dst, code | 0x8400);
+ dst += emitOutputByte(dst, regByte);
+ dst += emitOutputLong(dst, dsp);
+#ifdef RELOC_SUPPORT
+ if (id->idIsDspReloc())
+ {
+ emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)dsp, IMAGE_REL_BASED_HIGHLOW);
+ }
+#endif
+ }
+ }
+ }
+ }
+
+ // Now generate the constant value, if present
+ if (addc)
+ {
+ ssize_t cval = addc->cnsVal;
+
+#ifdef _TARGET_AMD64_
+ // all these opcodes only take a sign-extended 4-byte immediate
+ noway_assert(opsz < 8 || ((int)cval == cval && !addc->cnsReloc));
+#endif
+
+ switch (opsz)
+ {
+ case 0:
+ case 4:
+ case 8:
+ dst += emitOutputLong(dst, cval);
+ break;
+ case 2:
+ dst += emitOutputWord(dst, cval);
+ break;
+ case 1:
+ dst += emitOutputByte(dst, cval);
+ break;
+
+ default:
+ assert(!"unexpected operand size");
+ }
+
+#ifdef RELOC_SUPPORT
+ if (addc->cnsReloc)
+ {
+ emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)(size_t)cval, IMAGE_REL_BASED_HIGHLOW);
+ assert(opsz == 4);
+ }
+#endif
+ }
+
+DONE:
+
+ // Does this instruction operate on a GC ref value?
+ if (id->idGCref())
+ {
+ switch (id->idInsFmt())
+ {
+ case IF_ARD:
+ case IF_AWR:
+ case IF_ARW:
+ break;
+
+ case IF_RRD_ARD:
+ break;
+
+ case IF_RWR_ARD:
+ emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst);
+ break;
+
+ case IF_RRW_ARD:
+ assert(id->idGCref() == GCT_BYREF);
+
+#ifdef DEBUG
+ regMaskTP regMask;
+ regMask = genRegMask(id->idReg1());
+
+ // <BUGNUM> VSW 335101 </BUGNUM>
+ // Either id->idReg1(), id->idAddr()->iiaAddrMode.amBaseReg, or id->idAddr()->iiaAddrMode.amIndxReg
+ // could be a BYREF.
+ // For example in the following case:
+ // mov EDX, bword ptr [EBP-78H] ; EDX becomes BYREF after this instr.
+ // add EAX, bword ptr [EDX+8] ; It is the EDX that's causing id->idGCref to be GCT_BYREF.
+ // ; EAX becomes BYREF after this instr.
+ // <BUGNUM> DD 273707 </BUGNUM>
+ // add EDX, bword ptr [036464E0H] ; int + static field (technically a GCREF)=BYREF
+ regMaskTP baseRegMask;
+ if (reg == REG_NA)
+ {
+ baseRegMask = RBM_NONE;
+ }
+ else
+ {
+ baseRegMask = genRegMask(reg);
+ }
+ regMaskTP indexRegMask;
+ if (rgx == REG_NA)
+ {
+ indexRegMask = RBM_NONE;
+ }
+ else
+ {
+ indexRegMask = genRegMask(rgx);
+ }
+
+ // r1 could have been a GCREF as GCREF + int=BYREF
+ // or BYREF+/-int=BYREF
+ assert(((reg == REG_NA) && (rgx == REG_NA) && (ins == INS_add || ins == INS_sub)) ||
+ (((regMask | baseRegMask | indexRegMask) & emitThisGCrefRegs) && (ins == INS_add)) ||
+ (((regMask | baseRegMask | indexRegMask) & emitThisByrefRegs) &&
+ (ins == INS_add || ins == INS_sub)));
+#endif
+ // Mark it as holding a GCT_BYREF
+ emitGCregLiveUpd(GCT_BYREF, id->idReg1(), dst);
+ break;
+
+ case IF_ARD_RRD:
+ case IF_AWR_RRD:
+ break;
+
+ case IF_ARD_CNS:
+ case IF_AWR_CNS:
+ break;
+
+ case IF_ARW_RRD:
+ case IF_ARW_CNS:
+ assert(id->idGCref() == GCT_BYREF && (ins == INS_add || ins == INS_sub));
+ break;
+
+ default:
+#ifdef DEBUG
+ emitDispIns(id, false, false, false);
+#endif
+ assert(!"unexpected GC ref instruction format");
+ }
+
+ // mul can never produce a GC ref
+ assert(!instrIs3opImul(ins));
+ assert(ins != INS_mulEAX && ins != INS_imulEAX);
+ }
+ else
+ {
+ if (emitInsCanOnlyWriteSSE2OrAVXReg(id))
+ {
+ }
+ else
+ {
+ switch (id->idInsFmt())
+ {
+ case IF_RWR_ARD:
+ emitGCregDeadUpd(id->idReg1(), dst);
+ break;
+ default:
+ break;
+ }
+
+ if (ins == INS_mulEAX || ins == INS_imulEAX)
+ {
+ emitGCregDeadUpd(REG_EAX, dst);
+ emitGCregDeadUpd(REG_EDX, dst);
+ }
+
+ // For the three operand imul instruction the target register
+ // is encoded in the opcode
+
+ if (instrIs3opImul(ins))
+ {
+ regNumber tgtReg = inst3opImulReg(ins);
+ emitGCregDeadUpd(tgtReg, dst);
+ }
+ }
+ }
+
+ return dst;
+}
+
+/*****************************************************************************
+ *
+ * Output an instruction involving a stack frame value.
+ */
+
+BYTE* emitter::emitOutputSV(BYTE* dst, instrDesc* id, size_t code, CnsVal* addc)
+{
+ int adr;
+ int dsp;
+ bool EBPbased;
+ bool dspInByte;
+ bool dspIsZero;
+
+ instruction ins = id->idIns();
+ emitAttr size = id->idOpSize();
+ size_t opsz = EA_SIZE_IN_BYTES(size);
+
+ assert(ins != INS_imul || id->idReg1() == REG_EAX || size == EA_4BYTE || size == EA_8BYTE);
+
+ // Is there a large constant operand?
+ if (addc && (size > EA_1BYTE))
+ {
+ ssize_t cval = addc->cnsVal;
+
+ // Does the constant fit in a byte?
+ if ((signed char)cval == cval &&
+#ifdef RELOC_SUPPORT
+ addc->cnsReloc == false &&
+#endif
+ ins != INS_mov && ins != INS_test)
+ {
+ if (id->idInsFmt() != IF_SRW_SHF)
+ {
+ code |= 2;
+ }
+
+ opsz = 1;
+ }
+ }
+
+ // Add VEX prefix if required.
+ // There are some callers who already add VEX prefix and call this routine.
+ // Therefore, add VEX prefix is one is not already present.
+ code = AddVexPrefixIfNeededAndNotPresent(ins, code, size);
+
+ // Compute the REX prefix
+ if (TakesRexWPrefix(ins, size))
+ {
+ code = AddRexWPrefix(ins, code);
+ }
+
+ // Special case emitting AVX instructions
+ if (Is4ByteAVXInstruction(ins))
+ {
+ size_t regcode = insEncodeReg345(ins, id->idReg1(), size, &code);
+ dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
+
+ // Emit last opcode byte
+ assert((code & 0xFF) == 0);
+ dst += emitOutputByte(dst, (code >> 8) & 0xFF);
+ code = regcode;
+ }
+ // Is this a 'big' opcode?
+ else if (code & 0xFF000000)
+ {
+ // Output the REX prefix
+ dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
+
+ // Output the highest word of the opcode
+ // We need to check again because in case of AVX instructions the leading
+ // escape byte(s) (e.g. 0x0F) will be encoded as part of VEX prefix.
+ if (code & 0xFF000000)
+ {
+ dst += emitOutputWord(dst, code >> 16);
+ code &= 0x0000FFFF;
+ }
+ }
+ else if (code & 0x00FF0000)
+ {
+ // Output the REX prefix
+ dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
+
+ // Output the highest byte of the opcode.
+ // We need to check again because in case of AVX instructions the leading
+ // escape byte(s) (e.g. 0x0F) will be encoded as part of VEX prefix.
+ if (code & 0x00FF0000)
+ {
+ dst += emitOutputByte(dst, code >> 16);
+ code &= 0x0000FFFF;
+ }
+
+ // Use the large version if this is not a byte
+ if ((size != EA_1BYTE) && (ins != INS_imul) && (!insIsCMOV(ins)) && !IsSSE2Instruction(ins) &&
+ !IsAVXInstruction(ins))
+ {
+ code |= 0x1;
+ }
+ }
+ else if (CodeGen::instIsFP(ins))
+ {
+ assert(size == EA_4BYTE || size == EA_8BYTE);
+
+ if (size == EA_8BYTE)
+ {
+ code += 4;
+ }
+ }
+ else if (!IsSSE2Instruction(ins) && !IsAVXInstruction(ins))
+ {
+ // Is the operand size larger than a byte?
+ switch (size)
+ {
+ case EA_1BYTE:
+ break;
+
+ case EA_2BYTE:
+ // Output a size prefix for a 16-bit operand
+ dst += emitOutputByte(dst, 0x66);
+ __fallthrough;
+
+ case EA_4BYTE:
+#ifdef _TARGET_AMD64_
+ case EA_8BYTE:
+#endif // _TARGET_AMD64_
+
+ /* Set the 'w' size bit to indicate 32-bit operation
+ * Note that incrementing "code" for INS_call (0xFF) would
+ * overflow, whereas setting the lower bit to 1 just works out
+ */
+
+ code |= 0x01;
+ break;
+
+#ifdef _TARGET_X86_
+ case EA_8BYTE:
+
+ // Double operand - set the appropriate bit.
+ // I don't know what a legitimate reason to end up in this case would be
+ // considering that FP is taken care of above...
+ // what is an instruction that takes a double which is not covered by the
+ // above instIsFP? Of the list in instrsxarch, only INS_fprem
+ code |= 0x04;
+ NO_WAY("bad 8 byte op");
+ break;
+#endif // _TARGET_X86_
+
+ default:
+ NO_WAY("unexpected size");
+ break;
+ }
+ }
+
+ // Output the REX prefix
+ dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
+
+ // Figure out the variable's frame position
+ int varNum = id->idAddr()->iiaLclVar.lvaVarNum();
+
+ adr = emitComp->lvaFrameAddress(varNum, &EBPbased);
+ dsp = adr + id->idAddr()->iiaLclVar.lvaOffset();
+
+ dspInByte = ((signed char)dsp == (int)dsp);
+ dspIsZero = (dsp == 0);
+
+#ifdef RELOC_SUPPORT
+ // for stack varaibles the dsp should never be a reloc
+ assert(id->idIsDspReloc() == 0);
+#endif
+
+ if (EBPbased)
+ {
+ // EBP-based variable: does the offset fit in a byte?
+ if (Is4ByteAVXInstruction(ins))
+ {
+ if (dspInByte)
+ {
+ dst += emitOutputByte(dst, code | 0x45);
+ dst += emitOutputByte(dst, dsp);
+ }
+ else
+ {
+ dst += emitOutputByte(dst, code | 0x85);
+ dst += emitOutputLong(dst, dsp);
+ }
+ }
+ else
+ {
+ if (dspInByte)
+ {
+ dst += emitOutputWord(dst, code | 0x4500);
+ dst += emitOutputByte(dst, dsp);
+ }
+ else
+ {
+ dst += emitOutputWord(dst, code | 0x8500);
+ dst += emitOutputLong(dst, dsp);
+ }
+ }
+ }
+ else
+ {
+
+#if !FEATURE_FIXED_OUT_ARGS
+ // Adjust the offset by the amount currently pushed on the CPU stack
+ dsp += emitCurStackLvl;
+#endif
+
+ dspInByte = ((signed char)dsp == (int)dsp);
+ dspIsZero = (dsp == 0);
+
+ // Does the offset fit in a byte?
+ if (Is4ByteAVXInstruction(ins))
+ {
+ if (dspInByte)
+ {
+ if (dspIsZero)
+ {
+ dst += emitOutputByte(dst, code | 0x04);
+ dst += emitOutputByte(dst, 0x24);
+ }
+ else
+ {
+ dst += emitOutputByte(dst, code | 0x44);
+ dst += emitOutputByte(dst, 0x24);
+ dst += emitOutputByte(dst, dsp);
+ }
+ }
+ else
+ {
+ dst += emitOutputByte(dst, code | 0x84);
+ dst += emitOutputByte(dst, 0x24);
+ dst += emitOutputLong(dst, dsp);
+ }
+ }
+ else
+ {
+ if (dspInByte)
+ {
+ if (dspIsZero)
+ {
+ dst += emitOutputWord(dst, code | 0x0400);
+ dst += emitOutputByte(dst, 0x24);
+ }
+ else
+ {
+ dst += emitOutputWord(dst, code | 0x4400);
+ dst += emitOutputByte(dst, 0x24);
+ dst += emitOutputByte(dst, dsp);
+ }
+ }
+ else
+ {
+ dst += emitOutputWord(dst, code | 0x8400);
+ dst += emitOutputByte(dst, 0x24);
+ dst += emitOutputLong(dst, dsp);
+ }
+ }
+ }
+
+ // Now generate the constant value, if present
+ if (addc)
+ {
+ ssize_t cval = addc->cnsVal;
+
+#ifdef _TARGET_AMD64_
+ // all these opcodes only take a sign-extended 4-byte immediate
+ noway_assert(opsz < 8 || ((int)cval == cval && !addc->cnsReloc));
+#endif
+
+ switch (opsz)
+ {
+ case 0:
+ case 4:
+ case 8:
+ dst += emitOutputLong(dst, cval);
+ break;
+ case 2:
+ dst += emitOutputWord(dst, cval);
+ break;
+ case 1:
+ dst += emitOutputByte(dst, cval);
+ break;
+
+ default:
+ assert(!"unexpected operand size");
+ }
+
+#ifdef RELOC_SUPPORT
+ if (addc->cnsReloc)
+ {
+ emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)(size_t)cval, IMAGE_REL_BASED_HIGHLOW);
+ assert(opsz == 4);
+ }
+#endif
+ }
+
+ // Does this instruction operate on a GC ref value?
+ if (id->idGCref())
+ {
+ // Factor in the sub-variable offset
+ adr += AlignDown(id->idAddr()->iiaLclVar.lvaOffset(), TARGET_POINTER_SIZE);
+
+ switch (id->idInsFmt())
+ {
+ case IF_SRD:
+ // Read stack -- no change
+ break;
+
+ case IF_SWR: // Stack Write (So we need to update GC live for stack var)
+ // Write stack -- GC var may be born
+ emitGCvarLiveUpd(adr, varNum, id->idGCref(), dst);
+ break;
+
+ case IF_SRD_CNS:
+ // Read stack -- no change
+ break;
+
+ case IF_SWR_CNS:
+ // Write stack -- no change
+ break;
+
+ case IF_SRD_RRD:
+ case IF_RRD_SRD:
+ // Read stack , read register -- no change
+ break;
+
+ case IF_RWR_SRD: // Register Write, Stack Read (So we need to update GC live for register)
+
+ // Read stack , write register -- GC reg may be born
+ emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst);
+ break;
+
+ case IF_SWR_RRD: // Stack Write, Register Read (So we need to update GC live for stack var)
+ // Read register, write stack -- GC var may be born
+ emitGCvarLiveUpd(adr, varNum, id->idGCref(), dst);
+ break;
+
+ case IF_RRW_SRD: // Register Read/Write, Stack Read (So we need to update GC live for register)
+
+ // reg could have been a GCREF as GCREF + int=BYREF
+ // or BYREF+/-int=BYREF
+ assert(id->idGCref() == GCT_BYREF && (ins == INS_add || ins == INS_sub));
+ emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst);
+ break;
+
+ case IF_SRW_CNS:
+ case IF_SRW_RRD:
+ // += -= of a byref, no change
+
+ case IF_SRW:
+ break;
+
+ default:
+#ifdef DEBUG
+ emitDispIns(id, false, false, false);
+#endif
+ assert(!"unexpected GC ref instruction format");
+ }
+ }
+ else
+ {
+ if (emitInsCanOnlyWriteSSE2OrAVXReg(id))
+ {
+ }
+ else
+ {
+ switch (id->idInsFmt())
+ {
+ case IF_RWR_SRD: // Register Write, Stack Read
+ case IF_RRW_SRD: // Register Read/Write, Stack Read
+ emitGCregDeadUpd(id->idReg1(), dst);
+ break;
+ default:
+ break;
+ }
+
+ if (ins == INS_mulEAX || ins == INS_imulEAX)
+ {
+ emitGCregDeadUpd(REG_EAX, dst);
+ emitGCregDeadUpd(REG_EDX, dst);
+ }
+
+ // For the three operand imul instruction the target register
+ // is encoded in the opcode
+
+ if (instrIs3opImul(ins))
+ {
+ regNumber tgtReg = inst3opImulReg(ins);
+ emitGCregDeadUpd(tgtReg, dst);
+ }
+ }
+ }
+
+ return dst;
+}
+
+/*****************************************************************************
+ *
+ * Output an instruction with a static data member (class variable).
+ */
+
+BYTE* emitter::emitOutputCV(BYTE* dst, instrDesc* id, size_t code, CnsVal* addc)
+{
+ BYTE* addr;
+ CORINFO_FIELD_HANDLE fldh;
+ ssize_t offs;
+ int doff;
+
+ emitAttr size = id->idOpSize();
+ size_t opsz = EA_SIZE_IN_BYTES(size);
+ instruction ins = id->idIns();
+ bool isMoffset = false;
+
+ // Get hold of the field handle and offset
+ fldh = id->idAddr()->iiaFieldHnd;
+ offs = emitGetInsDsp(id);
+
+ // Special case: mov reg, fs:[ddd]
+ if (fldh == FLD_GLOBAL_FS)
+ {
+ dst += emitOutputByte(dst, 0x64);
+ }
+
+ // Compute VEX prefix
+ // Some of its callers already add VEX prefix and then call this routine.
+ // Therefore add VEX prefix is not already present.
+ code = AddVexPrefixIfNeededAndNotPresent(ins, code, size);
+
+ // Compute the REX prefix
+ if (TakesRexWPrefix(ins, size))
+ {
+ code = AddRexWPrefix(ins, code);
+ }
+
+ // Is there a large constant operand?
+ if (addc && (size > EA_1BYTE))
+ {
+ ssize_t cval = addc->cnsVal;
+ // Does the constant fit in a byte?
+ if ((signed char)cval == cval &&
+#ifdef RELOC_SUPPORT
+ addc->cnsReloc == false &&
+#endif
+ ins != INS_mov && ins != INS_test)
+ {
+ if (id->idInsFmt() != IF_MRW_SHF)
+ {
+ code |= 2;
+ }
+
+ opsz = 1;
+ }
+ }
+#ifdef _TARGET_X86_
+ else
+ {
+ // Special case: "mov eax, [addr]" and "mov [addr], eax"
+ // Amd64: this is one case where addr can be 64-bit in size. This is
+ // currently unused or not enabled on amd64 as it always uses RIP
+ // relative addressing which results in smaller instruction size.
+ if (ins == INS_mov && id->idReg1() == REG_EAX)
+ {
+ switch (id->idInsFmt())
+ {
+ case IF_RWR_MRD:
+
+ assert((unsigned)code ==
+ (insCodeRM(ins) | (insEncodeReg345(ins, REG_EAX, EA_PTRSIZE, NULL) << 8) | 0x0500));
+
+ code &= ~((size_t)0xFFFFFFFF);
+ code |= 0xA0;
+ isMoffset = true;
+ break;
+
+ case IF_MWR_RRD:
+
+ assert((unsigned)code ==
+ (insCodeMR(ins) | (insEncodeReg345(ins, REG_EAX, EA_PTRSIZE, NULL) << 8) | 0x0500));
+
+ code &= ~((size_t)0xFFFFFFFF);
+ code |= 0xA2;
+ isMoffset = true;
+ break;
+
+ default:
+ break;
+ }
+ }
+ }
+#endif //_TARGET_X86_
+
+ // Special case emitting AVX instructions
+ if (Is4ByteAVXInstruction(ins))
+ {
+ size_t regcode = insEncodeReg345(ins, id->idReg1(), size, &code);
+ dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
+
+ // Emit last opcode byte
+ // TODO-XArch-CQ: Right now support 4-byte opcode instructions only
+ assert((code & 0xFF) == 0);
+ dst += emitOutputByte(dst, (code >> 8) & 0xFF);
+ code = 0;
+
+ // Emit Mod,R/M byte
+ dst += emitOutputByte(dst, regcode | 0x05);
+ }
+ // Is this a 'big' opcode?
+ else if (code & 0xFF000000)
+ {
+ // Output the REX prefix
+ dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
+
+ // Output the highest word of the opcode.
+ // Check again since AVX instructions encode leading opcode bytes as part of VEX prefix.
+ if (code & 0xFF000000)
+ {
+ dst += emitOutputWord(dst, code >> 16);
+ }
+ code &= 0x0000FFFF;
+ }
+ else if (code & 0x00FF0000)
+ {
+ // Output the REX prefix
+ dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
+
+ // Check again as VEX prefix would have encoded leading opcode byte
+ if (code & 0x00FF0000)
+ {
+ dst += emitOutputByte(dst, code >> 16);
+ code &= 0x0000FFFF;
+ }
+
+ if ((ins == INS_movsx || ins == INS_movzx || ins == INS_cmpxchg || ins == INS_xchg || ins == INS_xadd ||
+ insIsCMOV(ins)) &&
+ size != EA_1BYTE)
+ {
+ // movsx and movzx are 'big' opcodes but also have the 'w' bit
+ code++;
+ }
+ }
+ else if (CodeGen::instIsFP(ins))
+ {
+ assert(size == EA_4BYTE || size == EA_8BYTE);
+
+ if (size == EA_8BYTE)
+ {
+ code += 4;
+ }
+ }
+ else
+ {
+ // Is the operand size larger than a byte?
+ switch (size)
+ {
+ case EA_1BYTE:
+ break;
+
+ case EA_2BYTE:
+ // Output a size prefix for a 16-bit operand
+ dst += emitOutputByte(dst, 0x66);
+ __fallthrough;
+
+ case EA_4BYTE:
+#ifdef _TARGET_AMD64_
+ case EA_8BYTE:
+#endif
+ // Set the 'w' bit to get the large version
+ code |= 0x1;
+ break;
+
+#ifdef _TARGET_X86_
+ case EA_8BYTE:
+ // Double operand - set the appropriate bit
+ code |= 0x04;
+ break;
+#endif // _TARGET_X86_
+
+ default:
+ assert(!"unexpected size");
+ }
+ }
+
+ // Output the REX prefix
+ dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
+
+ if (code)
+ {
+ if (id->idInsFmt() == IF_MRD_OFF || id->idInsFmt() == IF_RWR_MRD_OFF || isMoffset)
+ {
+ dst += emitOutputByte(dst, code);
+ }
+ else
+ {
+ dst += emitOutputWord(dst, code);
+ }
+ }
+
+ // Do we have a constant or a static data member?
+ doff = Compiler::eeGetJitDataOffs(fldh);
+ if (doff >= 0)
+ {
+ addr = emitConsBlock + doff;
+
+ int byteSize = EA_SIZE_IN_BYTES(size);
+
+#ifndef LEGACY_BACKEND
+ // this instruction has a fixed size (4) src.
+ if (ins == INS_cvttss2si || ins == INS_cvtss2sd || ins == INS_vbroadcastss)
+ {
+ byteSize = 4;
+ }
+ // This has a fixed size (8) source.
+ if (ins == INS_vbroadcastsd)
+ {
+ byteSize = 8;
+ }
+#endif // !LEGACY_BACKEND
+
+ // Check that the offset is properly aligned (i.e. the ddd in [ddd])
+ assert((emitChkAlign == false) || (ins == INS_lea) || (((size_t)addr & (byteSize - 1)) == 0));
+ }
+ else
+ {
+ // Special case: mov reg, fs:[ddd] or mov reg, [ddd]
+ if (jitStaticFldIsGlobAddr(fldh))
+ {
+ addr = nullptr;
+ }
+ else
+ {
+ addr = (BYTE*)emitComp->info.compCompHnd->getFieldAddress(fldh, nullptr);
+ if (addr == nullptr)
+ {
+ NO_WAY("could not obtain address of static field");
+ }
+ }
+ }
+
+ BYTE* target = (addr + offs);
+
+ if (!isMoffset)
+ {
+ INT32 addlDelta = 0;
+
+ if (addc)
+ {
+ // It is of the form "ins [disp], immed"
+ // For emitting relocation, we also need to take into account of the
+ // additional bytes of code emitted for immed val.
+
+ ssize_t cval = addc->cnsVal;
+
+#ifdef _TARGET_AMD64_
+ // all these opcodes only take a sign-extended 4-byte immediate
+ noway_assert(opsz < 8 || ((int)cval == cval && !addc->cnsReloc));
+#else
+ noway_assert(opsz <= 4);
+#endif
+
+ switch (opsz)
+ {
+ case 0:
+ case 4:
+ case 8:
+ addlDelta = -4;
+ break;
+ case 2:
+ addlDelta = -2;
+ break;
+ case 1:
+ addlDelta = -1;
+ break;
+
+ default:
+ assert(!"unexpected operand size");
+ unreached();
+ }
+ }
+
+#ifdef _TARGET_AMD64_
+ // All static field and data section constant accesses should be marked as relocatable
+ noway_assert(id->idIsDspReloc());
+ dst += emitOutputLong(dst, 0);
+#else //_TARGET_X86_
+ dst += emitOutputLong(dst, (int)target);
+#endif //_TARGET_X86_
+
+#ifdef RELOC_SUPPORT
+ if (id->idIsDspReloc())
+ {
+ emitRecordRelocation((void*)(dst - sizeof(int)), target, IMAGE_REL_BASED_DISP32, 0, addlDelta);
+ }
+#endif
+ }
+ else
+ {
+#ifdef _TARGET_AMD64_
+ // This code path should never be hit on amd64 since it always uses RIP relative addressing.
+ // In future if ever there is a need to enable this special case, also enable the logic
+ // that sets isMoffset to true on amd64.
+ unreached();
+#else //_TARGET_X86_
+
+ dst += emitOutputSizeT(dst, (ssize_t)target);
+
+#ifdef RELOC_SUPPORT
+ if (id->idIsDspReloc())
+ {
+ emitRecordRelocation((void*)(dst - sizeof(void*)), target, IMAGE_REL_BASED_MOFFSET);
+ }
+#endif
+
+#endif //_TARGET_X86_
+ }
+
+ // Now generate the constant value, if present
+ if (addc)
+ {
+ ssize_t cval = addc->cnsVal;
+
+#ifdef _TARGET_AMD64_
+ // all these opcodes only take a sign-extended 4-byte immediate
+ noway_assert(opsz < 8 || ((int)cval == cval && !addc->cnsReloc));
+#endif
+
+ switch (opsz)
+ {
+ case 0:
+ case 4:
+ case 8:
+ dst += emitOutputLong(dst, cval);
+ break;
+ case 2:
+ dst += emitOutputWord(dst, cval);
+ break;
+ case 1:
+ dst += emitOutputByte(dst, cval);
+ break;
+
+ default:
+ assert(!"unexpected operand size");
+ }
+#ifdef RELOC_SUPPORT
+ if (addc->cnsReloc)
+ {
+ emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)(size_t)cval, IMAGE_REL_BASED_HIGHLOW);
+ assert(opsz == 4);
+ }
+#endif
+ }
+
+ // Does this instruction operate on a GC ref value?
+ if (id->idGCref())
+ {
+ switch (id->idInsFmt())
+ {
+ case IF_MRD:
+ case IF_MRW:
+ case IF_MWR:
+ break;
+
+ case IF_RRD_MRD:
+ break;
+
+ case IF_RWR_MRD:
+ emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst);
+ break;
+
+ case IF_MRD_RRD:
+ case IF_MWR_RRD:
+ case IF_MRW_RRD:
+ break;
+
+ case IF_MRD_CNS:
+ case IF_MWR_CNS:
+ case IF_MRW_CNS:
+ break;
+
+ case IF_RRW_MRD:
+
+ assert(id->idGCref() == GCT_BYREF);
+ assert(ins == INS_add || ins == INS_sub);
+
+ // Mark it as holding a GCT_BYREF
+ emitGCregLiveUpd(GCT_BYREF, id->idReg1(), dst);
+ break;
+
+ default:
+#ifdef DEBUG
+ emitDispIns(id, false, false, false);
+#endif
+ assert(!"unexpected GC ref instruction format");
+ }
+ }
+ else
+ {
+ if (emitInsCanOnlyWriteSSE2OrAVXReg(id))
+ {
+ }
+ else
+ {
+ switch (id->idInsFmt())
+ {
+ case IF_RWR_MRD:
+ emitGCregDeadUpd(id->idReg1(), dst);
+ break;
+ default:
+ break;
+ }
+
+ if (ins == INS_mulEAX || ins == INS_imulEAX)
+ {
+ emitGCregDeadUpd(REG_EAX, dst);
+ emitGCregDeadUpd(REG_EDX, dst);
+ }
+
+ // For the three operand imul instruction the target register
+ // is encoded in the opcode
+
+ if (instrIs3opImul(ins))
+ {
+ regNumber tgtReg = inst3opImulReg(ins);
+ emitGCregDeadUpd(tgtReg, dst);
+ }
+ }
+ }
+
+ return dst;
+}
+
+/*****************************************************************************
+ *
+ * Output an instruction with one register operand.
+ */
+
+BYTE* emitter::emitOutputR(BYTE* dst, instrDesc* id)
+{
+ size_t code;
+
+ instruction ins = id->idIns();
+ regNumber reg = id->idReg1();
+ emitAttr size = id->idOpSize();
+
+ // We would to update GC info correctly
+ assert(!IsSSE2Instruction(ins));
+ assert(!IsAVXInstruction(ins));
+
+ // Get the 'base' opcode
+ switch (ins)
+ {
+ case INS_inc:
+ case INS_dec:
+
+#ifdef _TARGET_AMD64_
+ if (true)
+#else
+ if (size == EA_1BYTE)
+#endif
+ {
+ assert(INS_inc_l == INS_inc + 1);
+ assert(INS_dec_l == INS_dec + 1);
+
+ // Can't use the compact form, use the long form
+ ins = (instruction)(ins + 1);
+ if (size == EA_2BYTE)
+ {
+ // Output a size prefix for a 16-bit operand
+ dst += emitOutputByte(dst, 0x66);
+ }
+
+ code = insCodeRR(ins);
+ if (size != EA_1BYTE)
+ {
+ // Set the 'w' bit to get the large version
+ code |= 0x1;
+ }
+
+ if (TakesRexWPrefix(ins, size))
+ {
+ code = AddRexWPrefix(ins, code);
+ }
+
+ // Register...
+ unsigned regcode = insEncodeReg012(ins, reg, size, &code);
+
+ // Output the REX prefix
+ dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
+
+ dst += emitOutputWord(dst, code | (regcode << 8));
+ }
+ else
+ {
+ if (size == EA_2BYTE)
+ {
+ // Output a size prefix for a 16-bit operand
+ dst += emitOutputByte(dst, 0x66);
+ }
+ dst += emitOutputByte(dst, insCodeRR(ins) | insEncodeReg012(ins, reg, size, nullptr));
+ }
+ break;
+
+ case INS_pop:
+ case INS_pop_hide:
+ case INS_push:
+ case INS_push_hide:
+
+ assert(size == EA_PTRSIZE);
+ code = insEncodeOpreg(ins, reg, size);
+
+ assert(!TakesVexPrefix(ins));
+ assert(!TakesRexWPrefix(ins, size));
+
+ // Output the REX prefix
+ dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
+
+ dst += emitOutputByte(dst, code);
+ break;
+
+ case INS_seto:
+ case INS_setno:
+ case INS_setb:
+ case INS_setae:
+ case INS_sete:
+ case INS_setne:
+ case INS_setbe:
+ case INS_seta:
+ case INS_sets:
+ case INS_setns:
+ case INS_setpe:
+ case INS_setpo:
+ case INS_setl:
+ case INS_setge:
+ case INS_setle:
+ case INS_setg:
+
+ assert(id->idGCref() == GCT_NONE);
+ assert(size == EA_1BYTE);
+
+ code = insEncodeMRreg(ins, reg, EA_1BYTE, insCodeMR(ins));
+
+ // Output the REX prefix
+ dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
+
+ // We expect this to always be a 'big' opcode
+ assert(code & 0x00FF0000);
+
+ dst += emitOutputByte(dst, code >> 16);
+ dst += emitOutputWord(dst, code & 0x0000FFFF);
+
+ break;
+
+ case INS_mulEAX:
+ case INS_imulEAX:
+
+ // Kill off any GC refs in EAX or EDX
+ emitGCregDeadUpd(REG_EAX, dst);
+ emitGCregDeadUpd(REG_EDX, dst);
+
+ __fallthrough;
+
+ default:
+
+ assert(id->idGCref() == GCT_NONE);
+
+ code = insEncodeMRreg(ins, reg, size, insCodeMR(ins));
+
+ if (size != EA_1BYTE)
+ {
+ // Set the 'w' bit to get the large version
+ code |= 0x1;
+
+ if (size == EA_2BYTE)
+ {
+ // Output a size prefix for a 16-bit operand
+ dst += emitOutputByte(dst, 0x66);
+ }
+ }
+
+ code = AddVexPrefixIfNeeded(ins, code, size);
+
+ if (TakesRexWPrefix(ins, size))
+ {
+ code = AddRexWPrefix(ins, code);
+ }
+
+ // Output the REX prefix
+ dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
+
+ dst += emitOutputWord(dst, code);
+ break;
+ }
+
+ // Are we writing the register? if so then update the GC information
+ switch (id->idInsFmt())
+ {
+ case IF_RRD:
+ break;
+ case IF_RWR:
+ if (id->idGCref())
+ {
+ emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst);
+ }
+ else
+ {
+ emitGCregDeadUpd(id->idReg1(), dst);
+ }
+ break;
+ case IF_RRW:
+ {
+#ifdef DEBUG
+ regMaskTP regMask = genRegMask(reg);
+#endif
+ if (id->idGCref())
+ {
+ // The reg must currently be holding either a gcref or a byref
+ // and the instruction must be inc or dec
+ assert(((emitThisGCrefRegs | emitThisByrefRegs) & regMask) &&
+ (ins == INS_inc || ins == INS_dec || ins == INS_inc_l || ins == INS_dec_l));
+ assert(id->idGCref() == GCT_BYREF);
+ // Mark it as holding a GCT_BYREF
+ emitGCregLiveUpd(GCT_BYREF, id->idReg1(), dst);
+ }
+ else
+ {
+ // Can't use RRW to trash a GC ref. It's OK for unverifiable code
+ // to trash Byrefs.
+ assert((emitThisGCrefRegs & regMask) == 0);
+ }
+ }
+ break;
+ default:
+#ifdef DEBUG
+ emitDispIns(id, false, false, false);
+#endif
+ assert(!"unexpected instruction format");
+ break;
+ }
+
+ return dst;
+}
+
+/*****************************************************************************
+ *
+ * Output an instruction with two register operands.
+ */
+
+BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id)
+{
+ size_t code;
+
+ instruction ins = id->idIns();
+ regNumber reg1 = id->idReg1();
+ regNumber reg2 = id->idReg2();
+ emitAttr size = id->idOpSize();
+
+ // Get the 'base' opcode
+ code = insCodeRM(ins);
+ code = AddVexPrefixIfNeeded(ins, code, size);
+ if (IsSSE2Instruction(ins) || IsAVXInstruction(ins))
+ {
+ code = insEncodeRMreg(ins, code);
+
+ if (TakesRexWPrefix(ins, size))
+ {
+ code = AddRexWPrefix(ins, code);
+ }
+ }
+ else if ((ins == INS_movsx) || (ins == INS_movzx) || (insIsCMOV(ins)))
+ {
+ code = insEncodeRMreg(ins, code) | (int)(size == EA_2BYTE);
+#ifdef _TARGET_AMD64_
+
+ assert((size < EA_4BYTE) || (insIsCMOV(ins)));
+ if ((size == EA_8BYTE) || (ins == INS_movsx))
+ {
+ code = AddRexWPrefix(ins, code);
+ }
+ }
+ else if (ins == INS_movsxd)
+ {
+ code = insEncodeRMreg(ins, code);
+
+#endif // _TARGET_AMD64_
+ }
+ else
+ {
+ code = insEncodeMRreg(ins, insCodeMR(ins));
+
+ if (ins != INS_test)
+ {
+ code |= 2;
+ }
+
+ switch (size)
+ {
+ case EA_1BYTE:
+ noway_assert(RBM_BYTE_REGS & genRegMask(reg1));
+ noway_assert(RBM_BYTE_REGS & genRegMask(reg2));
+ break;
+
+ case EA_2BYTE:
+ // Output a size prefix for a 16-bit operand
+ dst += emitOutputByte(dst, 0x66);
+ __fallthrough;
+
+ case EA_4BYTE:
+ // Set the 'w' bit to get the large version
+ code |= 0x1;
+ break;
+
+#ifdef _TARGET_AMD64_
+ case EA_8BYTE:
+ // TODO-AMD64-CQ: Better way to not emit REX.W when we don't need it
+ // Don't need to zero out the high bits explicitly
+ if ((ins != INS_xor) || (reg1 != reg2))
+ {
+ code = AddRexWPrefix(ins, code);
+ }
+
+ // Set the 'w' bit to get the large version
+ code |= 0x1;
+ break;
+
+#endif // _TARGET_AMD64_
+
+ default:
+ assert(!"unexpected size");
+ }
+ }
+
+ unsigned regCode = insEncodeReg345(ins, reg1, size, &code);
+ regCode |= insEncodeReg012(ins, reg2, size, &code);
+
+ // In case of AVX instructions that take 3 operands, we generally want to encode reg1
+ // as first source. In this case, reg1 is both a source and a destination.
+ // The exception is the "merge" 3-operand case, where we have a move instruction, such
+ // as movss, and we want to merge the source with itself.
+ //
+ // TODO-XArch-CQ: Eventually we need to support 3 operand instruction formats. For
+ // now we use the single source as source1 and source2.
+ if (IsThreeOperandBinaryAVXInstruction(ins))
+ {
+ // encode source/dest operand reg in 'vvvv' bits in 1's compliement form
+ code = insEncodeReg3456(ins, reg1, size, code);
+ }
+ else if (IsThreeOperandMoveAVXInstruction(ins))
+ {
+ // encode source operand reg in 'vvvv' bits in 1's compliement form
+ code = insEncodeReg3456(ins, reg2, size, code);
+ }
+
+ // Output the REX prefix
+ dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
+
+ // Is this a 'big' opcode?
+ if (code & 0xFF000000)
+ {
+ // Output the highest word of the opcode
+ dst += emitOutputWord(dst, code >> 16);
+ code &= 0x0000FFFF;
+ }
+ else if (code & 0x00FF0000)
+ {
+ dst += emitOutputByte(dst, code >> 16);
+ code &= 0x0000FFFF;
+ }
+
+ // If byte 4 is 0xC0, then it contains the Mod/RM encoding for a 3-byte
+ // encoding. Otherwise, this is an instruction with a 4-byte encoding,
+ // and the MOd/RM encoding needs to go in the 5th byte.
+ // TODO-XArch-CQ: Currently, this will only support registers in the 5th byte.
+ // We probably need a different mechanism to identify the 4-byte encodings.
+ if ((code & 0xFF) == 0x00)
+ {
+ // This case happens for AVX instructions only
+ assert(IsAVXInstruction(ins));
+ if ((code & 0xFF00) == 0xC000)
+ {
+ dst += emitOutputByte(dst, (0xC0 | regCode));
+ }
+ else
+ {
+ dst += emitOutputByte(dst, (code >> 8) & 0xFF);
+ dst += emitOutputByte(dst, (0xC0 | regCode));
+ }
+ }
+ else if ((code & 0xFF00) == 0xC000)
+ {
+ dst += emitOutputWord(dst, code | (regCode << 8));
+ }
+ else
+ {
+ dst += emitOutputWord(dst, code);
+ dst += emitOutputByte(dst, (0xC0 | regCode));
+ }
+
+ // Does this instruction operate on a GC ref value?
+ if (id->idGCref())
+ {
+ switch (id->idInsFmt())
+ {
+ case IF_RRD_RRD:
+ break;
+
+ case IF_RWR_RRD:
+
+ if (emitSyncThisObjReg != REG_NA && emitIGisInProlog(emitCurIG) && reg2 == (int)REG_ARG_0)
+ {
+ // We're relocating "this" in the prolog
+ assert(emitComp->lvaIsOriginalThisArg(0));
+ assert(emitComp->lvaTable[0].lvRegister);
+ assert(emitComp->lvaTable[0].lvRegNum == reg1);
+
+ if (emitFullGCinfo)
+ {
+ emitGCregLiveSet(id->idGCref(), genRegMask(reg1), dst, true);
+ break;
+ }
+ else
+ {
+ /* If emitFullGCinfo==false, the we don't use any
+ regPtrDsc's and so explictly note the location
+ of "this" in GCEncode.cpp
+ */
+ }
+ }
+
+ emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst);
+ break;
+
+ case IF_RRW_RRD:
+
+ switch (id->idIns())
+ {
+ /*
+ This must be one of the following cases:
+
+ xor reg, reg to assign NULL
+
+ and r1 , r2 if (ptr1 && ptr2) ...
+ or r1 , r2 if (ptr1 || ptr2) ...
+
+ add r1 , r2 to compute a normal byref
+ sub r1 , r2 to compute a strange byref (VC only)
+
+ */
+ case INS_xor:
+ assert(id->idReg1() == id->idReg2());
+ emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst);
+ break;
+
+ case INS_or:
+ case INS_and:
+ emitGCregDeadUpd(id->idReg1(), dst);
+ break;
+
+ case INS_add:
+ case INS_sub:
+ assert(id->idGCref() == GCT_BYREF);
+
+#ifdef DEBUG
+ regMaskTP regMask;
+ regMask = genRegMask(reg1) | genRegMask(reg2);
+
+ // r1/r2 could have been a GCREF as GCREF + int=BYREF
+ // or BYREF+/-int=BYREF
+ assert(((regMask & emitThisGCrefRegs) && (ins == INS_add)) ||
+ ((regMask & emitThisByrefRegs) && (ins == INS_add || ins == INS_sub)));
+#endif
+ // Mark r1 as holding a byref
+ emitGCregLiveUpd(GCT_BYREF, id->idReg1(), dst);
+ break;
+
+ default:
+#ifdef DEBUG
+ emitDispIns(id, false, false, false);
+#endif
+ assert(!"unexpected GC reg update instruction");
+ }
+
+ break;
+
+ case IF_RRW_RRW:
+ // This must be "xchg reg1, reg2"
+ assert(id->idIns() == INS_xchg);
+
+ // If we got here, the GC-ness of the registers doesn't match, so we have to "swap" them in the GC
+ // register pointer mask.
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifndef LEGACY_BACKEND
+ GCtype gc1, gc2;
+
+ gc1 = emitRegGCtype(reg1);
+ gc2 = emitRegGCtype(reg2);
+
+ if (gc1 != gc2)
+ {
+ // Kill the GC-info about the GC registers
+
+ if (needsGC(gc1))
+ {
+ emitGCregDeadUpd(reg1, dst);
+ }
+
+ if (needsGC(gc2))
+ {
+ emitGCregDeadUpd(reg2, dst);
+ }
+
+ // Now, swap the info
+
+ if (needsGC(gc1))
+ {
+ emitGCregLiveUpd(gc1, reg2, dst);
+ }
+
+ if (needsGC(gc2))
+ {
+ emitGCregLiveUpd(gc2, reg1, dst);
+ }
+ }
+#endif // !LEGACY_BACKEND
+ break;
+
+ default:
+#ifdef DEBUG
+ emitDispIns(id, false, false, false);
+#endif
+ assert(!"unexpected GC ref instruction format");
+ }
+ }
+ else
+ {
+ if (emitInsCanOnlyWriteSSE2OrAVXReg(id))
+ {
+ }
+ else
+ {
+ switch (id->idInsFmt())
+ {
+ case IF_RRD_CNS:
+ // INS_mulEAX can not be used with any of these formats
+ assert(ins != INS_mulEAX && ins != INS_imulEAX);
+
+ // For the three operand imul instruction the target
+ // register is encoded in the opcode
+
+ if (instrIs3opImul(ins))
+ {
+ regNumber tgtReg = inst3opImulReg(ins);
+ emitGCregDeadUpd(tgtReg, dst);
+ }
+ break;
+
+ case IF_RWR_RRD:
+ case IF_RRW_RRD:
+ // INS_movxmm2i writes to reg2.
+ if (ins == INS_mov_xmm2i)
+ {
+ emitGCregDeadUpd(id->idReg2(), dst);
+ }
+ else
+ {
+ emitGCregDeadUpd(id->idReg1(), dst);
+ }
+ break;
+
+ default:
+ break;
+ }
+ }
+ }
+
+ return dst;
+}
+
+#ifdef FEATURE_AVX_SUPPORT
+BYTE* emitter::emitOutputRRR(BYTE* dst, instrDesc* id)
+{
+ size_t code;
+
+ instruction ins = id->idIns();
+ assert(IsAVXInstruction(ins));
+ assert(IsThreeOperandAVXInstruction(ins));
+ regNumber targetReg = id->idReg1();
+ regNumber src1 = id->idReg2();
+ regNumber src2 = id->idReg3();
+ emitAttr size = id->idOpSize();
+
+ code = insCodeRM(ins);
+ code = AddVexPrefixIfNeeded(ins, code, size);
+ code = insEncodeRMreg(ins, code);
+
+ if (TakesRexWPrefix(ins, size))
+ {
+ code = AddRexWPrefix(ins, code);
+ }
+
+ unsigned regCode = insEncodeReg345(ins, targetReg, size, &code);
+ regCode |= insEncodeReg012(ins, src2, size, &code);
+ // encode source operand reg in 'vvvv' bits in 1's compliement form
+ code = insEncodeReg3456(ins, src1, size, code);
+
+ // Output the REX prefix
+ dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
+
+ // Is this a 'big' opcode?
+ if (code & 0xFF000000)
+ {
+ // Output the highest word of the opcode
+ dst += emitOutputWord(dst, code >> 16);
+ code &= 0x0000FFFF;
+ }
+ else if (code & 0x00FF0000)
+ {
+ dst += emitOutputByte(dst, code >> 16);
+ code &= 0x0000FFFF;
+ }
+
+ // If byte 4 is 0xC0, then it contains the Mod/RM encoding for a 3-byte
+ // encoding. Otherwise, this is an instruction with a 4-byte encoding,
+ // and the MOd/RM encoding needs to go in the 5th byte.
+ // TODO-XArch-CQ: Currently, this will only support registers in the 5th byte.
+ // We probably need a different mechanism to identify the 4-byte encodings.
+ if ((code & 0xFF) == 0x00)
+ {
+ // This case happens for AVX instructions only
+ assert(IsAVXInstruction(ins));
+ if ((code & 0xFF00) == 0xC000)
+ {
+ dst += emitOutputByte(dst, (0xC0 | regCode));
+ }
+ else
+ {
+ dst += emitOutputByte(dst, (code >> 8) & 0xFF);
+ dst += emitOutputByte(dst, (0xC0 | regCode));
+ }
+ }
+ else if ((code & 0xFF00) == 0xC000)
+ {
+ dst += emitOutputWord(dst, code | (regCode << 8));
+ }
+ else
+ {
+ dst += emitOutputWord(dst, code);
+ dst += emitOutputByte(dst, (0xC0 | regCode));
+ }
+
+ noway_assert(!id->idGCref());
+
+ return dst;
+}
+#endif
+
+/*****************************************************************************
+ *
+ * Output an instruction with a register and constant operands.
+ */
+
+BYTE* emitter::emitOutputRI(BYTE* dst, instrDesc* id)
+{
+ size_t code;
+ emitAttr size = id->idOpSize();
+ instruction ins = id->idIns();
+ regNumber reg = id->idReg1();
+ ssize_t val = emitGetInsSC(id);
+ bool valInByte = ((signed char)val == val) && (ins != INS_mov) && (ins != INS_test);
+
+#ifdef RELOC_SUPPORT
+ if (id->idIsCnsReloc())
+ {
+ valInByte = false; // relocs can't be placed in a byte
+ }
+#endif
+
+ noway_assert(emitVerifyEncodable(ins, size, reg));
+
+#ifndef LEGACY_BACKEND
+ if (IsSSEOrAVXInstruction(ins))
+ {
+ // Handle SSE2 instructions of the form "opcode reg, immed8"
+
+ assert(id->idGCref() == GCT_NONE);
+ assert(valInByte);
+ assert(ins == INS_psrldq || ins == INS_pslldq);
+
+ // Get the 'base' opcode.
+ code = insCodeMI(ins);
+ code = AddVexPrefixIfNeeded(ins, code, size);
+ code = insEncodeMIreg(ins, reg, size, code);
+ assert(code & 0x00FF0000);
+ if (TakesVexPrefix(ins))
+ {
+ // The 'vvvv' bits encode the destination register, which for this case (RI)
+ // is the same as the source.
+ code = insEncodeReg3456(ins, reg, size, code);
+ }
+
+ // In case of psrldq
+ // Reg/Opcode = 3
+ // R/M = reg1
+ //
+ // In case of pslldq
+ // Reg/Opcode = 7
+ // R/M = reg1
+ regNumber regOpcode = (regNumber)((ins == INS_psrldq) ? 3 : 7);
+ unsigned regcode = (insEncodeReg345(ins, regOpcode, size, &code) | insEncodeReg012(ins, reg, size, &code)) << 8;
+
+ // Output the REX prefix
+ dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
+
+ if (code & 0xFF000000)
+ {
+ dst += emitOutputWord(dst, code >> 16);
+ }
+ else if (code & 0xFF0000)
+ {
+ dst += emitOutputByte(dst, code >> 16);
+ }
+
+ dst += emitOutputWord(dst, code | regcode);
+
+ dst += emitOutputByte(dst, val);
+
+ return dst;
+ }
+#endif // !LEGACY_BACKEND
+
+ // The 'mov' opcode is special
+ if (ins == INS_mov)
+ {
+ code = insCodeACC(ins);
+ assert(code < 0x100);
+
+ code |= 0x08; // Set the 'w' bit
+ unsigned regcode = insEncodeReg012(ins, reg, size, &code);
+ code |= regcode;
+
+ // This is INS_mov and will not take VEX prefix
+ assert(!TakesVexPrefix(ins));
+
+ if (TakesRexWPrefix(ins, size))
+ {
+ code = AddRexWPrefix(ins, code);
+ }
+
+ dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
+
+ dst += emitOutputByte(dst, code);
+ if (size == EA_4BYTE)
+ {
+ dst += emitOutputLong(dst, val);
+ }
+#ifdef _TARGET_AMD64_
+ else
+ {
+ assert(size == EA_PTRSIZE);
+ dst += emitOutputSizeT(dst, val);
+ }
+#endif
+
+#ifdef RELOC_SUPPORT
+ if (id->idIsCnsReloc())
+ {
+ emitRecordRelocation((void*)(dst - (unsigned)EA_SIZE(size)), (void*)(size_t)val, IMAGE_REL_BASED_MOFFSET);
+ }
+#endif
+
+ goto DONE;
+ }
+
+ // Decide which encoding is the shortest
+ bool useSigned, useACC;
+
+ if (reg == REG_EAX && !instrIs3opImul(ins))
+ {
+ if (size == EA_1BYTE || (ins == INS_test))
+ {
+ // For al, ACC encoding is always the smallest
+ useSigned = false;
+ useACC = true;
+ }
+ else
+ {
+ /* For ax/eax, we avoid ACC encoding for small constants as we
+ * can emit the small constant and have it sign-extended.
+ * For big constants, the ACC encoding is better as we can use
+ * the 1 byte opcode
+ */
+
+ if (valInByte)
+ {
+ // avoid using ACC encoding
+ useSigned = true;
+ useACC = false;
+ }
+ else
+ {
+ useSigned = false;
+ useACC = true;
+ }
+ }
+ }
+ else
+ {
+ useACC = false;
+
+ if (valInByte)
+ {
+ useSigned = true;
+ }
+ else
+ {
+ useSigned = false;
+ }
+ }
+
+ // "test" has no 's' bit
+ if (ins == INS_test)
+ {
+ useSigned = false;
+ }
+
+ // Get the 'base' opcode
+ if (useACC)
+ {
+ assert(!useSigned);
+ code = insCodeACC(ins);
+ }
+ else
+ {
+ assert(!useSigned || valInByte);
+
+ // Some instructions (at least 'imul') do not have a
+ // r/m, immed form, but do have a dstReg,srcReg,imm8 form.
+ if (valInByte && useSigned && insNeedsRRIb(ins))
+ {
+ code = insEncodeRRIb(ins, reg, size);
+ }
+ else
+ {
+ code = insCodeMI(ins);
+ code = AddVexPrefixIfNeeded(ins, code, size);
+ code = insEncodeMIreg(ins, reg, size, code);
+ }
+ }
+
+ switch (size)
+ {
+ case EA_1BYTE:
+ break;
+
+ case EA_2BYTE:
+ // Output a size prefix for a 16-bit operand
+ dst += emitOutputByte(dst, 0x66);
+ __fallthrough;
+
+ case EA_4BYTE:
+ // Set the 'w' bit to get the large version
+ code |= 0x1;
+ break;
+
+#ifdef _TARGET_AMD64_
+ case EA_8BYTE:
+ /* Set the 'w' bit to get the large version */
+ /* and the REX.W bit to get the really large version */
+
+ code = AddRexWPrefix(ins, code);
+ code |= 0x1;
+ break;
+#endif
+
+ default:
+ assert(!"unexpected size");
+ }
+
+ // Output the REX prefix
+ dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
+
+ // Does the value fit in a sign-extended byte?
+ // Important! Only set the 's' bit when we have a size larger than EA_1BYTE.
+ // Note: A sign-extending immediate when (size == EA_1BYTE) is invalid in 64-bit mode.
+
+ if (useSigned && (size > EA_1BYTE))
+ {
+ // We can just set the 's' bit, and issue an immediate byte
+
+ code |= 0x2; // Set the 's' bit to use a sign-extended immediate byte.
+ dst += emitOutputWord(dst, code);
+ dst += emitOutputByte(dst, val);
+ }
+ else
+ {
+ // Can we use an accumulator (EAX) encoding?
+ if (useACC)
+ {
+ dst += emitOutputByte(dst, code);
+ }
+ else
+ {
+ dst += emitOutputWord(dst, code);
+ }
+
+ switch (size)
+ {
+ case EA_1BYTE:
+ dst += emitOutputByte(dst, val);
+ break;
+ case EA_2BYTE:
+ dst += emitOutputWord(dst, val);
+ break;
+ case EA_4BYTE:
+ dst += emitOutputLong(dst, val);
+ break;
+#ifdef _TARGET_AMD64_
+ case EA_8BYTE:
+ dst += emitOutputLong(dst, val);
+ break;
+#endif // _TARGET_AMD64_
+ default:
+ break;
+ }
+
+#ifdef RELOC_SUPPORT
+ if (id->idIsCnsReloc())
+ {
+ emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)(size_t)val, IMAGE_REL_BASED_HIGHLOW);
+ assert(size == EA_4BYTE);
+ }
+#endif
+ }
+
+DONE:
+
+ // Does this instruction operate on a GC ref value?
+ if (id->idGCref())
+ {
+ switch (id->idInsFmt())
+ {
+ case IF_RRD_CNS:
+ break;
+
+ case IF_RWR_CNS:
+ emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst);
+ break;
+
+ case IF_RRW_CNS:
+ assert(id->idGCref() == GCT_BYREF);
+
+#ifdef DEBUG
+ regMaskTP regMask;
+ regMask = genRegMask(reg);
+ // FIXNOW review the other places and relax the assert there too
+
+ // The reg must currently be holding either a gcref or a byref
+ // GCT_GCREF+int = GCT_BYREF, and GCT_BYREF+/-int = GCT_BYREF
+ if (emitThisGCrefRegs & regMask)
+ {
+ assert(ins == INS_add);
+ }
+ if (emitThisByrefRegs & regMask)
+ {
+ assert(ins == INS_add || ins == INS_sub);
+ }
+#endif
+ // Mark it as holding a GCT_BYREF
+ emitGCregLiveUpd(GCT_BYREF, id->idReg1(), dst);
+ break;
+
+ default:
+#ifdef DEBUG
+ emitDispIns(id, false, false, false);
+#endif
+ assert(!"unexpected GC ref instruction format");
+ }
+
+ // mul can never produce a GC ref
+ assert(!instrIs3opImul(ins));
+ assert(ins != INS_mulEAX && ins != INS_imulEAX);
+ }
+ else
+ {
+ switch (id->idInsFmt())
+ {
+ case IF_RRD_CNS:
+ // INS_mulEAX can not be used with any of these formats
+ assert(ins != INS_mulEAX && ins != INS_imulEAX);
+
+ // For the three operand imul instruction the target
+ // register is encoded in the opcode
+
+ if (instrIs3opImul(ins))
+ {
+ regNumber tgtReg = inst3opImulReg(ins);
+ emitGCregDeadUpd(tgtReg, dst);
+ }
+ break;
+
+ case IF_RRW_CNS:
+ case IF_RWR_CNS:
+ assert(!instrIs3opImul(ins));
+
+ emitGCregDeadUpd(id->idReg1(), dst);
+ break;
+
+ default:
+#ifdef DEBUG
+ emitDispIns(id, false, false, false);
+#endif
+ assert(!"unexpected GC ref instruction format");
+ }
+ }
+
+ return dst;
+}
+
+/*****************************************************************************
+ *
+ * Output an instruction with a constant operand.
+ */
+
+BYTE* emitter::emitOutputIV(BYTE* dst, instrDesc* id)
+{
+ size_t code;
+ instruction ins = id->idIns();
+ emitAttr size = id->idOpSize();
+ ssize_t val = emitGetInsSC(id);
+ bool valInByte = ((signed char)val == val);
+
+ // We would to update GC info correctly
+ assert(!IsSSE2Instruction(ins));
+ assert(!IsAVXInstruction(ins));
+
+#ifdef _TARGET_AMD64_
+ // all these opcodes take a sign-extended 4-byte immediate, max
+ noway_assert(size < EA_8BYTE || ((int)val == val && !id->idIsCnsReloc()));
+#endif
+
+#ifdef RELOC_SUPPORT
+ if (id->idIsCnsReloc())
+ {
+ valInByte = false; // relocs can't be placed in a byte
+
+ // Of these instructions only the push instruction can have reloc
+ assert(ins == INS_push || ins == INS_push_hide);
+ }
+#endif
+
+ switch (ins)
+ {
+ case INS_jge:
+ assert((val >= -128) && (val <= 127));
+ dst += emitOutputByte(dst, insCode(ins));
+ dst += emitOutputByte(dst, val);
+ break;
+
+ case INS_loop:
+ assert((val >= -128) && (val <= 127));
+ dst += emitOutputByte(dst, insCodeMI(ins));
+ dst += emitOutputByte(dst, val);
+ break;
+
+ case INS_ret:
+ assert(val);
+ dst += emitOutputByte(dst, insCodeMI(ins));
+ dst += emitOutputWord(dst, val);
+ break;
+
+ case INS_push_hide:
+ case INS_push:
+ code = insCodeMI(ins);
+
+ // Does the operand fit in a byte?
+ if (valInByte)
+ {
+ dst += emitOutputByte(dst, code | 2);
+ dst += emitOutputByte(dst, val);
+ }
+ else
+ {
+ if (TakesRexWPrefix(ins, size))
+ {
+ code = AddRexWPrefix(ins, code);
+ dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
+ }
+
+ dst += emitOutputByte(dst, code);
+ dst += emitOutputLong(dst, val);
+#ifdef RELOC_SUPPORT
+ if (id->idIsCnsReloc())
+ {
+ emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)(size_t)val, IMAGE_REL_BASED_HIGHLOW);
+ }
+#endif
+ }
+
+ // Did we push a GC ref value?
+ if (id->idGCref())
+ {
+#ifdef DEBUG
+ printf("UNDONE: record GCref push [cns]\n");
+#endif
+ }
+
+ break;
+
+ default:
+ assert(!"unexpected instruction");
+ }
+
+ return dst;
+}
+
+/*****************************************************************************
+ *
+ * Output a local jump instruction.
+ * This function also handles non-jumps that have jump-like characteristics, like RIP-relative LEA of a label that
+ * needs to get bound to an actual address and processed by branch shortening.
+ */
+
+BYTE* emitter::emitOutputLJ(BYTE* dst, instrDesc* i)
+{
+ unsigned srcOffs;
+ unsigned dstOffs;
+ ssize_t distVal;
+
+ instrDescJmp* id = (instrDescJmp*)i;
+ instruction ins = id->idIns();
+ bool jmp;
+ bool relAddr = true; // does the instruction use relative-addressing?
+
+ // SSE2 doesnt make any sense here
+ assert(!IsSSE2Instruction(ins));
+ assert(!IsAVXInstruction(ins));
+
+ size_t ssz;
+ size_t lsz;
+
+ switch (ins)
+ {
+ default:
+ ssz = JCC_SIZE_SMALL;
+ lsz = JCC_SIZE_LARGE;
+ jmp = true;
+ break;
+
+ case INS_jmp:
+ ssz = JMP_SIZE_SMALL;
+ lsz = JMP_SIZE_LARGE;
+ jmp = true;
+ break;
+
+ case INS_call:
+ ssz = lsz = CALL_INST_SIZE;
+ jmp = false;
+ break;
+
+ case INS_push_hide:
+ case INS_push:
+ ssz = lsz = 5;
+ jmp = false;
+ relAddr = false;
+ break;
+
+ case INS_mov:
+ case INS_lea:
+ ssz = lsz = id->idCodeSize();
+ jmp = false;
+ relAddr = false;
+ break;
+ }
+
+ // Figure out the distance to the target
+ srcOffs = emitCurCodeOffs(dst);
+ dstOffs = id->idAddr()->iiaIGlabel->igOffs;
+
+ if (relAddr)
+ {
+ distVal = (ssize_t)(emitOffsetToPtr(dstOffs) - emitOffsetToPtr(srcOffs));
+ }
+ else
+ {
+ distVal = (ssize_t)emitOffsetToPtr(dstOffs);
+ }
+
+ if (dstOffs <= srcOffs)
+ {
+ // This is a backward jump - distance is known at this point
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if DEBUG_EMIT
+ if (id->idDebugOnlyInfo()->idNum == (unsigned)INTERESTING_JUMP_NUM || INTERESTING_JUMP_NUM == 0)
+ {
+ size_t blkOffs = id->idjIG->igOffs;
+
+ if (INTERESTING_JUMP_NUM == 0)
+ {
+ printf("[3] Jump %u:\n", id->idDebugOnlyInfo()->idNum);
+ }
+ printf("[3] Jump block is at %08X - %02X = %08X\n", blkOffs, emitOffsAdj, blkOffs - emitOffsAdj);
+ printf("[3] Jump is at %08X - %02X = %08X\n", srcOffs, emitOffsAdj, srcOffs - emitOffsAdj);
+ printf("[3] Label block is at %08X - %02X = %08X\n", dstOffs, emitOffsAdj, dstOffs - emitOffsAdj);
+ }
+#endif
+
+ // Can we use a short jump?
+ if (jmp && distVal - ssz >= (size_t)JMP_DIST_SMALL_MAX_NEG)
+ {
+ emitSetShortJump(id);
+ }
+ }
+ else
+ {
+ // This is a forward jump - distance will be an upper limit
+ emitFwdJumps = true;
+
+ // The target offset will be closer by at least 'emitOffsAdj', but only if this
+ // jump doesn't cross the hot-cold boundary.
+ if (!emitJumpCrossHotColdBoundary(srcOffs, dstOffs))
+ {
+ dstOffs -= emitOffsAdj;
+ distVal -= emitOffsAdj;
+ }
+
+ // Record the location of the jump for later patching
+ id->idjOffs = dstOffs;
+
+ // Are we overflowing the id->idjOffs bitfield?
+ if (id->idjOffs != dstOffs)
+ {
+ IMPL_LIMITATION("Method is too large");
+ }
+
+#if DEBUG_EMIT
+ if (id->idDebugOnlyInfo()->idNum == (unsigned)INTERESTING_JUMP_NUM || INTERESTING_JUMP_NUM == 0)
+ {
+ size_t blkOffs = id->idjIG->igOffs;
+
+ if (INTERESTING_JUMP_NUM == 0)
+ {
+ printf("[4] Jump %u:\n", id->idDebugOnlyInfo()->idNum);
+ }
+ printf("[4] Jump block is at %08X\n", blkOffs);
+ printf("[4] Jump is at %08X\n", srcOffs);
+ printf("[4] Label block is at %08X - %02X = %08X\n", dstOffs + emitOffsAdj, emitOffsAdj, dstOffs);
+ }
+#endif
+
+ // Can we use a short jump?
+ if (jmp && distVal - ssz <= (size_t)JMP_DIST_SMALL_MAX_POS)
+ {
+ emitSetShortJump(id);
+ }
+ }
+
+ // Adjust the offset to emit relative to the end of the instruction
+ if (relAddr)
+ {
+ distVal -= id->idjShort ? ssz : lsz;
+ }
+
+#ifdef DEBUG
+ if (0 && emitComp->verbose)
+ {
+ size_t sz = id->idjShort ? ssz : lsz;
+ int distValSize = id->idjShort ? 4 : 8;
+ printf("; %s jump [%08X/%03u] from %0*X to %0*X: dist = %08XH\n", (dstOffs <= srcOffs) ? "Fwd" : "Bwd",
+ emitComp->dspPtr(id), id->idDebugOnlyInfo()->idNum, distValSize, srcOffs + sz, distValSize, dstOffs,
+ distVal);
+ }
+#endif
+
+ // What size jump should we use?
+ if (id->idjShort)
+ {
+ // Short jump
+ assert(!id->idjKeepLong);
+ assert(emitJumpCrossHotColdBoundary(srcOffs, dstOffs) == false);
+
+ assert(JMP_SIZE_SMALL == JCC_SIZE_SMALL);
+ assert(JMP_SIZE_SMALL == 2);
+
+ assert(jmp);
+
+ if (emitInstCodeSz(id) != JMP_SIZE_SMALL)
+ {
+ emitOffsAdj += emitInstCodeSz(id) - JMP_SIZE_SMALL;
+
+#ifdef DEBUG
+ if (emitComp->verbose)
+ {
+ printf("; NOTE: size of jump [%08X] mis-predicted\n", emitComp->dspPtr(id));
+ }
+#endif
+ }
+
+ dst += emitOutputByte(dst, insCode(ins));
+
+ // For forward jumps, record the address of the distance value
+ id->idjTemp.idjAddr = (distVal > 0) ? dst : nullptr;
+
+ dst += emitOutputByte(dst, distVal);
+ }
+ else
+ {
+ size_t code;
+
+ // Long jump
+ if (jmp)
+ {
+ assert(INS_jmp + (INS_l_jmp - INS_jmp) == INS_l_jmp);
+ assert(INS_jo + (INS_l_jmp - INS_jmp) == INS_l_jo);
+ assert(INS_jb + (INS_l_jmp - INS_jmp) == INS_l_jb);
+ assert(INS_jae + (INS_l_jmp - INS_jmp) == INS_l_jae);
+ assert(INS_je + (INS_l_jmp - INS_jmp) == INS_l_je);
+ assert(INS_jne + (INS_l_jmp - INS_jmp) == INS_l_jne);
+ assert(INS_jbe + (INS_l_jmp - INS_jmp) == INS_l_jbe);
+ assert(INS_ja + (INS_l_jmp - INS_jmp) == INS_l_ja);
+ assert(INS_js + (INS_l_jmp - INS_jmp) == INS_l_js);
+ assert(INS_jns + (INS_l_jmp - INS_jmp) == INS_l_jns);
+ assert(INS_jpe + (INS_l_jmp - INS_jmp) == INS_l_jpe);
+ assert(INS_jpo + (INS_l_jmp - INS_jmp) == INS_l_jpo);
+ assert(INS_jl + (INS_l_jmp - INS_jmp) == INS_l_jl);
+ assert(INS_jge + (INS_l_jmp - INS_jmp) == INS_l_jge);
+ assert(INS_jle + (INS_l_jmp - INS_jmp) == INS_l_jle);
+ assert(INS_jg + (INS_l_jmp - INS_jmp) == INS_l_jg);
+
+ code = insCode((instruction)(ins + (INS_l_jmp - INS_jmp)));
+ }
+ else if (ins == INS_push || ins == INS_push_hide)
+ {
+ assert(insCodeMI(INS_push) == 0x68);
+ code = 0x68;
+ }
+ else if (ins == INS_mov)
+ {
+ // Make it look like IF_SWR_CNS so that emitOutputSV emits the r/m32 for us
+ insFormat tmpInsFmt = id->idInsFmt();
+ insGroup* tmpIGlabel = id->idAddr()->iiaIGlabel;
+ bool tmpDspReloc = id->idIsDspReloc();
+
+ id->idInsFmt(IF_SWR_CNS);
+ id->idAddr()->iiaLclVar = ((instrDescLbl*)id)->dstLclVar;
+ id->idSetIsDspReloc(false);
+
+ dst = emitOutputSV(dst, id, insCodeMI(ins));
+
+ // Restore id fields with original values
+ id->idInsFmt(tmpInsFmt);
+ id->idAddr()->iiaIGlabel = tmpIGlabel;
+ id->idSetIsDspReloc(tmpDspReloc);
+ code = 0xCC;
+ }
+ else if (ins == INS_lea)
+ {
+ // Make an instrDesc that looks like IF_RWR_ARD so that emitOutputAM emits the r/m32 for us.
+ // We basically are doing what emitIns_R_AI does.
+ // TODO-XArch-Cleanup: revisit this.
+ instrDescAmd idAmdStackLocal;
+ instrDescAmd* idAmd = &idAmdStackLocal;
+ *(instrDesc*)idAmd = *(instrDesc*)id; // copy all the "core" fields
+ memset((BYTE*)idAmd + sizeof(instrDesc), 0,
+ sizeof(instrDescAmd) - sizeof(instrDesc)); // zero out the tail that wasn't copied
+
+ idAmd->idInsFmt(IF_RWR_ARD);
+ idAmd->idAddr()->iiaAddrMode.amBaseReg = REG_NA;
+ idAmd->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
+ emitSetAmdDisp(idAmd, distVal); // set the displacement
+ idAmd->idSetIsDspReloc(id->idIsDspReloc());
+ assert(emitGetInsAmdAny(idAmd) == distVal); // make sure "disp" is stored properly
+
+ UNATIVE_OFFSET sz = emitInsSizeAM(idAmd, insCodeRM(ins));
+ idAmd->idCodeSize(sz);
+
+ code = insCodeRM(ins);
+ code |= (insEncodeReg345(ins, id->idReg1(), EA_PTRSIZE, &code) << 8);
+
+ dst = emitOutputAM(dst, idAmd, code, nullptr);
+
+ code = 0xCC;
+
+ // For forward jumps, record the address of the distance value
+ // Hard-coded 4 here because we already output the displacement, as the last thing.
+ id->idjTemp.idjAddr = (dstOffs > srcOffs) ? (dst - 4) : nullptr;
+
+ // We're done
+ return dst;
+ }
+ else
+ {
+ code = 0xE8;
+ }
+
+ if (ins != INS_mov)
+ {
+ dst += emitOutputByte(dst, code);
+
+ if (code & 0xFF00)
+ {
+ dst += emitOutputByte(dst, code >> 8);
+ }
+ }
+
+ // For forward jumps, record the address of the distance value
+ id->idjTemp.idjAddr = (dstOffs > srcOffs) ? dst : nullptr;
+
+ dst += emitOutputLong(dst, distVal);
+
+#ifndef _TARGET_AMD64_ // all REL32 on AMD have to go through recordRelocation
+ if (emitComp->opts.compReloc)
+#endif
+ {
+ if (!relAddr)
+ {
+ emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)distVal, IMAGE_REL_BASED_HIGHLOW);
+ }
+ else if (emitJumpCrossHotColdBoundary(srcOffs, dstOffs))
+ {
+ assert(id->idjKeepLong);
+ emitRecordRelocation((void*)(dst - sizeof(INT32)), dst + distVal, IMAGE_REL_BASED_REL32);
+ }
+ }
+ }
+
+ // Local calls kill all registers
+ if (ins == INS_call && (emitThisGCrefRegs | emitThisByrefRegs))
+ {
+ emitGCregDeadUpdMask(emitThisGCrefRegs | emitThisByrefRegs, dst);
+ }
+
+ return dst;
+}
+
+/*****************************************************************************
+ *
+ * Append the machine code corresponding to the given instruction descriptor
+ * to the code block at '*dp'; the base of the code block is 'bp', and 'ig'
+ * is the instruction group that contains the instruction. Updates '*dp' to
+ * point past the generated code, and returns the size of the instruction
+ * descriptor in bytes.
+ */
+
+#ifdef _PREFAST_
+#pragma warning(push)
+#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
+#endif
+size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
+{
+ assert(emitIssuing);
+
+ BYTE* dst = *dp;
+ size_t sz = sizeof(instrDesc);
+ instruction ins = id->idIns();
+ unsigned char callInstrSize = 0;
+
+#ifdef DEBUG
+ bool dspOffs = emitComp->opts.dspGCtbls;
+#endif // DEBUG
+
+ emitAttr size = id->idOpSize();
+
+ assert(REG_NA == (int)REG_NA);
+
+ assert(ins != INS_imul || size >= EA_4BYTE); // Has no 'w' bit
+ assert(instrIs3opImul(id->idIns()) == 0 || size >= EA_4BYTE); // Has no 'w' bit
+
+ VARSET_TP VARSET_INIT_NOCOPY(GCvars, VarSetOps::UninitVal());
+
+ // What instruction format have we got?
+ switch (id->idInsFmt())
+ {
+ size_t code;
+ size_t regcode;
+ int args;
+ CnsVal cnsVal;
+
+ BYTE* addr;
+ bool recCall;
+
+ regMaskTP gcrefRegs;
+ regMaskTP byrefRegs;
+
+ /********************************************************************/
+ /* No operands */
+ /********************************************************************/
+ case IF_NONE:
+ // the loop alignment pseudo instruction
+ if (ins == INS_align)
+ {
+ sz = TINY_IDSC_SIZE;
+ dst = emitOutputNOP(dst, (-(int)(size_t)dst) & 0x0f);
+ assert(((size_t)dst & 0x0f) == 0);
+ break;
+ }
+
+ if (ins == INS_nop)
+ {
+ dst = emitOutputNOP(dst, id->idCodeSize());
+ break;
+ }
+
+ // the cdq instruction kills the EDX register implicitly
+ if (ins == INS_cdq)
+ {
+ emitGCregDeadUpd(REG_EDX, dst);
+ }
+
+ __fallthrough;
+
+#if FEATURE_STACK_FP_X87
+ case IF_TRD:
+ case IF_TWR:
+ case IF_TRW:
+#endif // FEATURE_STACK_FP_X87
+
+ assert(id->idGCref() == GCT_NONE);
+
+ code = insCodeMR(ins);
+
+#ifdef _TARGET_AMD64_
+ // Support only scalar AVX instructions and hence size is hard coded to 4-byte.
+ code = AddVexPrefixIfNeeded(ins, code, EA_4BYTE);
+
+ if (ins == INS_cdq && TakesRexWPrefix(ins, id->idOpSize()))
+ {
+ code = AddRexWPrefix(ins, code);
+ }
+ dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
+#endif
+ // Is this a 'big' opcode?
+ if (code & 0xFF000000)
+ {
+ // The high word and then the low word
+ dst += emitOutputWord(dst, code >> 16);
+ code &= 0x0000FFFF;
+ dst += emitOutputWord(dst, code);
+ }
+ else if (code & 0x00FF0000)
+ {
+ // The high byte and then the low word
+ dst += emitOutputByte(dst, code >> 16);
+ code &= 0x0000FFFF;
+ dst += emitOutputWord(dst, code);
+ }
+ else if (code & 0xFF00)
+ {
+ // The 2 byte opcode
+ dst += emitOutputWord(dst, code);
+ }
+ else
+ {
+ // The 1 byte opcode
+ dst += emitOutputByte(dst, code);
+ }
+
+ break;
+
+ /********************************************************************/
+ /* Simple constant, local label, method */
+ /********************************************************************/
+
+ case IF_CNS:
+ dst = emitOutputIV(dst, id);
+ sz = emitSizeOfInsDsc(id);
+ break;
+
+ case IF_LABEL:
+ case IF_RWR_LABEL:
+ case IF_SWR_LABEL:
+ assert(id->idGCref() == GCT_NONE);
+ assert(id->idIsBound());
+
+ // TODO-XArch-Cleanup: handle IF_RWR_LABEL in emitOutputLJ() or change it to emitOutputAM()?
+ dst = emitOutputLJ(dst, id);
+ sz = (id->idInsFmt() == IF_SWR_LABEL ? sizeof(instrDescLbl) : sizeof(instrDescJmp));
+ break;
+
+ case IF_METHOD:
+ case IF_METHPTR:
+ // Assume we'll be recording this call
+ recCall = true;
+
+ // Get hold of the argument count and field Handle
+ args = emitGetInsCDinfo(id);
+
+ // Is this a "fat" call descriptor?
+ if (id->idIsLargeCall())
+ {
+ instrDescCGCA* idCall = (instrDescCGCA*)id;
+ gcrefRegs = idCall->idcGcrefRegs;
+ byrefRegs = idCall->idcByrefRegs;
+ VarSetOps::Assign(emitComp, GCvars, idCall->idcGCvars);
+ sz = sizeof(instrDescCGCA);
+ }
+ else
+ {
+ assert(!id->idIsLargeDsp());
+ assert(!id->idIsLargeCns());
+
+ gcrefRegs = emitDecodeCallGCregs(id);
+ byrefRegs = 0;
+ VarSetOps::AssignNoCopy(emitComp, GCvars, VarSetOps::MakeEmpty(emitComp));
+ sz = sizeof(instrDesc);
+ }
+
+ addr = (BYTE*)id->idAddr()->iiaAddr;
+ assert(addr != nullptr);
+
+ // Some helpers don't get recorded in GC tables
+ if (id->idIsNoGC())
+ {
+ recCall = false;
+ }
+
+ // What kind of a call do we have here?
+ if (id->idInsFmt() == IF_METHPTR)
+ {
+ // This is call indirect via a method pointer
+
+ code = insCodeMR(ins);
+ if (ins == INS_i_jmp)
+ {
+ code |= 1;
+ }
+
+ if (id->idIsDspReloc())
+ {
+ dst += emitOutputWord(dst, code | 0x0500);
+#ifdef _TARGET_AMD64_
+ dst += emitOutputLong(dst, 0);
+#else
+ dst += emitOutputLong(dst, (int)addr);
+#endif
+ emitRecordRelocation((void*)(dst - sizeof(int)), addr, IMAGE_REL_BASED_DISP32);
+ }
+ else
+ {
+#ifdef _TARGET_X86_
+ dst += emitOutputWord(dst, code | 0x0500);
+#else //_TARGET_AMD64_
+ // Amd64: addr fits within 32-bits and can be encoded as a displacement relative to zero.
+ // This addr mode should never be used while generating relocatable ngen code nor if
+ // the addr can be encoded as pc-relative address.
+ noway_assert(!emitComp->opts.compReloc);
+ noway_assert(codeGen->genAddrRelocTypeHint((size_t)addr) != IMAGE_REL_BASED_REL32);
+ noway_assert(static_cast<int>(reinterpret_cast<intptr_t>(addr)) == (ssize_t)addr);
+
+ // This requires, specifying a SIB byte after ModRM byte.
+ dst += emitOutputWord(dst, code | 0x0400);
+ dst += emitOutputByte(dst, 0x25);
+#endif //_TARGET_AMD64_
+ dst += emitOutputLong(dst, static_cast<int>(reinterpret_cast<intptr_t>(addr)));
+ }
+ goto DONE_CALL;
+ }
+
+ // Else
+ // This is call direct where we know the target, thus we can
+ // use a direct call; the target to jump to is in iiaAddr.
+ assert(id->idInsFmt() == IF_METHOD);
+
+ // Output the call opcode followed by the target distance
+ dst += (ins == INS_l_jmp) ? emitOutputByte(dst, insCode(ins)) : emitOutputByte(dst, insCodeMI(ins));
+
+ ssize_t offset;
+#ifdef _TARGET_AMD64_
+ // All REL32 on Amd64 go through recordRelocation. Here we will output zero to advance dst.
+ offset = 0;
+ assert(id->idIsDspReloc());
+#else
+ // Calculate PC relative displacement.
+ // Although you think we should be using sizeof(void*), the x86 and x64 instruction set
+ // only allow a 32-bit offset, so we correctly use sizeof(INT32)
+ offset = addr - (dst + sizeof(INT32));
+#endif
+
+ dst += emitOutputLong(dst, offset);
+
+#ifdef RELOC_SUPPORT
+ if (id->idIsDspReloc())
+ {
+ emitRecordRelocation((void*)(dst - sizeof(INT32)), addr, IMAGE_REL_BASED_REL32);
+ }
+#endif
+
+ DONE_CALL:
+
+ /* We update the GC info before the call as the variables cannot be
+ used by the call. Killing variables before the call helps with
+ boundary conditions if the call is CORINFO_HELP_THROW - see bug 50029.
+ If we ever track aliased variables (which could be used by the
+ call), we would have to keep them alive past the call.
+ */
+ assert(FitsIn<unsigned char>(dst - *dp));
+ callInstrSize = static_cast<unsigned char>(dst - *dp);
+ emitUpdateLiveGCvars(GCvars, *dp);
+
+ // If the method returns a GC ref, mark EAX appropriately
+ if (id->idGCref() == GCT_GCREF)
+ {
+ gcrefRegs |= RBM_EAX;
+ }
+ else if (id->idGCref() == GCT_BYREF)
+ {
+ byrefRegs |= RBM_EAX;
+ }
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ // If is a multi-register return method is called, mark RDX appropriately (for System V AMD64).
+ if (id->idIsLargeCall())
+ {
+ instrDescCGCA* idCall = (instrDescCGCA*)id;
+ if (idCall->idSecondGCref() == GCT_GCREF)
+ {
+ gcrefRegs |= RBM_RDX;
+ }
+ else if (idCall->idSecondGCref() == GCT_BYREF)
+ {
+ byrefRegs |= RBM_RDX;
+ }
+ }
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+ // If the GC register set has changed, report the new set
+ if (gcrefRegs != emitThisGCrefRegs)
+ {
+ emitUpdateLiveGCregs(GCT_GCREF, gcrefRegs, dst);
+ }
+
+ if (byrefRegs != emitThisByrefRegs)
+ {
+ emitUpdateLiveGCregs(GCT_BYREF, byrefRegs, dst);
+ }
+
+ if (recCall || args)
+ {
+ // For callee-pop, all arguments will be popped after the call.
+ // For caller-pop, any GC arguments will go dead after the call.
+
+ assert(callInstrSize != 0);
+
+ if (args >= 0)
+ {
+ emitStackPop(dst, /*isCall*/ true, callInstrSize, args);
+ }
+ else
+ {
+ emitStackKillArgs(dst, -args, callInstrSize);
+ }
+ }
+
+ // Do we need to record a call location for GC purposes?
+ if (!emitFullGCinfo && recCall)
+ {
+ assert(callInstrSize != 0);
+ emitRecordGCcall(dst, callInstrSize);
+ }
+
+#ifdef DEBUG
+ if (ins == INS_call)
+ {
+ emitRecordCallSite(emitCurCodeOffs(*dp), id->idDebugOnlyInfo()->idCallSig,
+ (CORINFO_METHOD_HANDLE)id->idDebugOnlyInfo()->idMemCookie);
+ }
+#endif // DEBUG
+
+ break;
+
+ /********************************************************************/
+ /* One register operand */
+ /********************************************************************/
+
+ case IF_RRD:
+ case IF_RWR:
+ case IF_RRW:
+ dst = emitOutputR(dst, id);
+ sz = TINY_IDSC_SIZE;
+ break;
+
+ /********************************************************************/
+ /* Register and register/constant */
+ /********************************************************************/
+
+ case IF_RRW_SHF:
+ code = insCodeMR(ins);
+ // Emit the VEX prefix if it exists
+ code = AddVexPrefixIfNeeded(ins, code, size);
+ code = insEncodeMRreg(ins, id->idReg1(), size, code);
+
+ // set the W bit
+ if (size != EA_1BYTE)
+ {
+ code |= 1;
+ }
+
+ // Emit the REX prefix if it exists
+ if (TakesRexWPrefix(ins, size))
+ {
+ code = AddRexWPrefix(ins, code);
+ }
+
+ // Output a size prefix for a 16-bit operand
+ if (size == EA_2BYTE)
+ {
+ dst += emitOutputByte(dst, 0x66);
+ }
+
+ dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
+ dst += emitOutputWord(dst, code);
+ dst += emitOutputByte(dst, emitGetInsSC(id));
+ sz = emitSizeOfInsDsc(id);
+ break;
+
+ case IF_RRD_RRD:
+ case IF_RWR_RRD:
+ case IF_RRW_RRD:
+ case IF_RRW_RRW:
+ dst = emitOutputRR(dst, id);
+ sz = TINY_IDSC_SIZE;
+ break;
+
+ case IF_RRD_CNS:
+ case IF_RWR_CNS:
+ case IF_RRW_CNS:
+ dst = emitOutputRI(dst, id);
+ sz = emitSizeOfInsDsc(id);
+ break;
+
+#ifdef FEATURE_AVX_SUPPORT
+ case IF_RWR_RRD_RRD:
+ dst = emitOutputRRR(dst, id);
+ sz = emitSizeOfInsDsc(id);
+ break;
+#endif
+
+ case IF_RRW_RRW_CNS:
+ assert(id->idGCref() == GCT_NONE);
+
+ // Get the 'base' opcode (it's a big one)
+ // Also, determine which operand goes where in the ModRM byte.
+ regNumber mReg;
+ regNumber rReg;
+ // if (ins == INS_shld || ins == INS_shrd || ins == INS_vextractf128 || ins == INS_vinsertf128)
+ if (hasCodeMR(ins))
+ {
+ code = insCodeMR(ins);
+ // Emit the VEX prefix if it exists
+ code = AddVexPrefixIfNeeded(ins, code, size);
+ code = insEncodeMRreg(ins, code);
+ mReg = id->idReg1();
+ rReg = id->idReg2();
+ }
+ else
+ {
+ code = insCodeRM(ins);
+ // Emit the VEX prefix if it exists
+ code = AddVexPrefixIfNeeded(ins, code, size);
+ code = insEncodeRMreg(ins, code);
+ mReg = id->idReg2();
+ rReg = id->idReg1();
+ }
+ assert(code & 0x00FF0000);
+
+#ifdef FEATURE_AVX_SUPPORT
+ if (TakesVexPrefix(ins))
+ {
+ if (IsThreeOperandBinaryAVXInstruction(ins))
+ {
+ // Encode source/dest operand reg in 'vvvv' bits in 1's complement form
+ // This code will have to change when we support 3 operands.
+ // For now, we always overload this source with the destination (always reg1).
+ // (Though we will need to handle the few ops that can have the 'vvvv' bits as destination,
+ // e.g. pslldq, when/if we support those instructions with 2 registers.)
+ // (see x64 manual Table 2-9. Instructions with a VEX.vvvv destination)
+ code = insEncodeReg3456(ins, id->idReg1(), size, code);
+ }
+ else if (IsThreeOperandMoveAVXInstruction(ins))
+ {
+ // This is a "merge" move instruction.
+ // Encode source operand reg in 'vvvv' bits in 1's complement form
+ code = insEncodeReg3456(ins, id->idReg2(), size, code);
+ }
+ }
+#endif // FEATURE_AVX_SUPPORT
+
+ regcode = (insEncodeReg345(ins, rReg, size, &code) | insEncodeReg012(ins, mReg, size, &code)) << 8;
+
+ // Output the REX prefix
+ dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
+
+ if (UseAVX() && Is4ByteAVXInstruction(ins))
+ {
+ // We just need to output the last byte of the opcode.
+ assert((code & 0xFF) == 0);
+ assert((code & 0xFF00) != 0xC000);
+ dst += emitOutputByte(dst, (code >> 8) & 0xFF);
+ code = 0;
+ }
+ else if (code & 0xFF000000)
+ {
+ dst += emitOutputWord(dst, code >> 16);
+ code &= 0x0000FFFF;
+ }
+ else if (code & 0x00FF0000)
+ {
+ dst += emitOutputByte(dst, code >> 16);
+ code &= 0x0000FFFF;
+ }
+
+ // Note that regcode is shifted by 8-bits above to align with RM byte.
+ if (code != 0)
+ {
+ assert((code & 0xFF00) == 0xC000);
+ dst += emitOutputWord(dst, code | regcode);
+ }
+ else
+ {
+ // This case occurs for AVX instructions.
+ // Note that regcode is left shifted by 8-bits.
+ assert(Is4ByteAVXInstruction(ins));
+ dst += emitOutputByte(dst, 0xC0 | (regcode >> 8));
+ }
+
+ dst += emitOutputByte(dst, emitGetInsSC(id));
+ sz = emitSizeOfInsDsc(id);
+ break;
+
+ /********************************************************************/
+ /* Address mode operand */
+ /********************************************************************/
+
+ case IF_ARD:
+ case IF_AWR:
+ case IF_ARW:
+
+#if FEATURE_STACK_FP_X87
+
+ case IF_TRD_ARD:
+ case IF_TWR_ARD:
+ case IF_TRW_ARD:
+
+ // case IF_ARD_TRD:
+ // case IF_ARW_TRD:
+ case IF_AWR_TRD:
+
+#endif // FEATURE_STACK_FP_X87
+
+ dst = emitCodeWithInstructionSize(dst, emitOutputAM(dst, id, insCodeMR(ins)), &callInstrSize);
+
+ switch (ins)
+ {
+ case INS_call:
+
+ IND_CALL:
+ // Get hold of the argument count and method handle
+ args = emitGetInsCIargs(id);
+
+ // Is this a "fat" call descriptor?
+ if (id->idIsLargeCall())
+ {
+ instrDescCGCA* idCall = (instrDescCGCA*)id;
+
+ gcrefRegs = idCall->idcGcrefRegs;
+ byrefRegs = idCall->idcByrefRegs;
+ VarSetOps::Assign(emitComp, GCvars, idCall->idcGCvars);
+ sz = sizeof(instrDescCGCA);
+ }
+ else
+ {
+ assert(!id->idIsLargeDsp());
+ assert(!id->idIsLargeCns());
+
+ gcrefRegs = emitDecodeCallGCregs(id);
+ byrefRegs = 0;
+ VarSetOps::AssignNoCopy(emitComp, GCvars, VarSetOps::MakeEmpty(emitComp));
+ sz = sizeof(instrDesc);
+ }
+
+ recCall = true;
+
+ goto DONE_CALL;
+
+ default:
+ sz = emitSizeOfInsDsc(id);
+ break;
+ }
+ break;
+
+ case IF_RRD_ARD:
+ case IF_RWR_ARD:
+ case IF_RRW_ARD:
+ code = insCodeRM(ins);
+ code = AddVexPrefixIfNeeded(ins, code, size);
+ regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << 8);
+ dst = emitOutputAM(dst, id, code | regcode);
+ sz = emitSizeOfInsDsc(id);
+ break;
+
+ case IF_ARD_RRD:
+ case IF_AWR_RRD:
+ case IF_ARW_RRD:
+ code = insCodeMR(ins);
+ code = AddVexPrefixIfNeeded(ins, code, size);
+ regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << 8);
+ dst = emitOutputAM(dst, id, code | regcode);
+ sz = emitSizeOfInsDsc(id);
+ break;
+
+ case IF_ARD_CNS:
+ case IF_AWR_CNS:
+ case IF_ARW_CNS:
+ emitGetInsAmdCns(id, &cnsVal);
+ dst = emitOutputAM(dst, id, insCodeMI(ins), &cnsVal);
+ sz = emitSizeOfInsDsc(id);
+ break;
+
+ case IF_ARW_SHF:
+ emitGetInsAmdCns(id, &cnsVal);
+ dst = emitOutputAM(dst, id, insCodeMR(ins), &cnsVal);
+ sz = emitSizeOfInsDsc(id);
+ break;
+
+ /********************************************************************/
+ /* Stack-based operand */
+ /********************************************************************/
+
+ case IF_SRD:
+ case IF_SWR:
+ case IF_SRW:
+
+#if FEATURE_STACK_FP_X87
+
+ case IF_TRD_SRD:
+ case IF_TWR_SRD:
+ case IF_TRW_SRD:
+
+ // case IF_SRD_TRD:
+ // case IF_SRW_TRD:
+ case IF_SWR_TRD:
+
+#endif // FEATURE_STACK_FP_X87
+
+ assert(ins != INS_pop_hide);
+ if (ins == INS_pop)
+ {
+ // The offset in "pop [ESP+xxx]" is relative to the new ESP value
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if !FEATURE_FIXED_OUT_ARGS
+ emitCurStackLvl -= sizeof(int);
+#endif
+ dst = emitOutputSV(dst, id, insCodeMR(ins));
+
+#if !FEATURE_FIXED_OUT_ARGS
+ emitCurStackLvl += sizeof(int);
+#endif
+ break;
+ }
+
+ dst = emitCodeWithInstructionSize(dst, emitOutputSV(dst, id, insCodeMR(ins)), &callInstrSize);
+
+ if (ins == INS_call)
+ {
+ goto IND_CALL;
+ }
+
+ break;
+
+ case IF_SRD_CNS:
+ case IF_SWR_CNS:
+ case IF_SRW_CNS:
+ emitGetInsCns(id, &cnsVal);
+ dst = emitOutputSV(dst, id, insCodeMI(ins), &cnsVal);
+ sz = emitSizeOfInsDsc(id);
+ break;
+
+ case IF_SRW_SHF:
+ emitGetInsCns(id, &cnsVal);
+ dst = emitOutputSV(dst, id, insCodeMR(ins), &cnsVal);
+ sz = emitSizeOfInsDsc(id);
+ break;
+
+ case IF_RRD_SRD:
+ case IF_RWR_SRD:
+ case IF_RRW_SRD:
+ code = insCodeRM(ins);
+
+ // 4-byte AVX instructions are special cased inside emitOutputSV
+ // since they do not have space to encode ModRM byte.
+ if (Is4ByteAVXInstruction(ins))
+ {
+ dst = emitOutputSV(dst, id, code);
+ }
+ else
+ {
+ code = AddVexPrefixIfNeeded(ins, code, size);
+
+ // In case of AVX instructions that take 3 operands, encode reg1 as first source.
+ // Note that reg1 is both a source and a destination.
+ //
+ // TODO-XArch-CQ: Eventually we need to support 3 operand instruction formats. For
+ // now we use the single source as source1 and source2.
+ // For this format, moves do not support a third operand, so we only need to handle the binary ops.
+ if (IsThreeOperandBinaryAVXInstruction(ins))
+ {
+ // encode source operand reg in 'vvvv' bits in 1's compliement form
+ code = insEncodeReg3456(ins, id->idReg1(), size, code);
+ }
+
+ regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << 8);
+ dst = emitOutputSV(dst, id, code | regcode);
+ }
+ break;
+
+ case IF_SRD_RRD:
+ case IF_SWR_RRD:
+ case IF_SRW_RRD:
+ code = insCodeMR(ins);
+ code = AddVexPrefixIfNeeded(ins, code, size);
+
+ // In case of AVX instructions that take 3 operands, encode reg1 as first source.
+ // Note that reg1 is both a source and a destination.
+ //
+ // TODO-XArch-CQ: Eventually we need to support 3 operand instruction formats. For
+ // now we use the single source as source1 and source2.
+ // For this format, moves do not support a third operand, so we only need to handle the binary ops.
+ if (IsThreeOperandBinaryAVXInstruction(ins))
+ {
+ // encode source operand reg in 'vvvv' bits in 1's compliement form
+ code = insEncodeReg3456(ins, id->idReg1(), size, code);
+ }
+
+ regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << 8);
+ dst = emitOutputSV(dst, id, code | regcode);
+ break;
+
+ /********************************************************************/
+ /* Direct memory address */
+ /********************************************************************/
+
+ case IF_MRD:
+ case IF_MRW:
+ case IF_MWR:
+
+#if FEATURE_STACK_FP_X87
+
+ case IF_TRD_MRD:
+ case IF_TWR_MRD:
+ case IF_TRW_MRD:
+
+ // case IF_MRD_TRD:
+ // case IF_MRW_TRD:
+ case IF_MWR_TRD:
+
+#endif // FEATURE_STACK_FP_X87
+
+ noway_assert(ins != INS_call);
+ dst = emitOutputCV(dst, id, insCodeMR(ins) | 0x0500);
+ sz = emitSizeOfInsDsc(id);
+ break;
+
+ case IF_MRD_OFF:
+ dst = emitOutputCV(dst, id, insCodeMI(ins));
+ break;
+
+ case IF_RRD_MRD:
+ case IF_RWR_MRD:
+ case IF_RRW_MRD:
+ code = insCodeRM(ins);
+ // Special case 4-byte AVX instructions
+ if (Is4ByteAVXInstruction(ins))
+ {
+ dst = emitOutputCV(dst, id, code);
+ }
+ else
+ {
+ code = AddVexPrefixIfNeeded(ins, code, size);
+
+ // In case of AVX instructions that take 3 operands, encode reg1 as first source.
+ // Note that reg1 is both a source and a destination.
+ //
+ // TODO-XArch-CQ: Eventually we need to support 3 operand instruction formats. For
+ // now we use the single source as source1 and source2.
+ // For this format, moves do not support a third operand, so we only need to handle the binary ops.
+ if (IsThreeOperandBinaryAVXInstruction(ins))
+ {
+ // encode source operand reg in 'vvvv' bits in 1's compliement form
+ code = insEncodeReg3456(ins, id->idReg1(), size, code);
+ }
+
+ regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << 8);
+ dst = emitOutputCV(dst, id, code | regcode | 0x0500);
+ }
+ sz = emitSizeOfInsDsc(id);
+ break;
+
+ case IF_RWR_MRD_OFF:
+ code = insCode(ins);
+ code = AddVexPrefixIfNeeded(ins, code, size);
+
+ // In case of AVX instructions that take 3 operands, encode reg1 as first source.
+ // Note that reg1 is both a source and a destination.
+ //
+ // TODO-XArch-CQ: Eventually we need to support 3 operand instruction formats. For
+ // now we use the single source as source1 and source2.
+ // For this format, moves do not support a third operand, so we only need to handle the binary ops.
+ if (IsThreeOperandBinaryAVXInstruction(ins))
+ {
+ // encode source operand reg in 'vvvv' bits in 1's compliement form
+ code = insEncodeReg3456(ins, id->idReg1(), size, code);
+ }
+
+ regcode = insEncodeReg012(id->idIns(), id->idReg1(), size, &code);
+ dst = emitOutputCV(dst, id, code | 0x30 | regcode);
+ sz = emitSizeOfInsDsc(id);
+ break;
+
+ case IF_MRD_RRD:
+ case IF_MWR_RRD:
+ case IF_MRW_RRD:
+ code = insCodeMR(ins);
+#ifdef FEATURE_AVX_SUPPORT
+ code = AddVexPrefixIfNeeded(ins, code, size);
+
+ // In case of AVX instructions that take 3 operands, encode reg1 as first source.
+ // Note that reg1 is both a source and a destination.
+ //
+ // TODO-XArch-CQ: Eventually we need to support 3 operand instruction formats. For
+ // now we use the single source as source1 and source2.
+ // For this format, moves do not support a third operand, so we only need to handle the binary ops.
+ if (IsThreeOperandBinaryAVXInstruction(ins))
+ {
+ // encode source operand reg in 'vvvv' bits in 1's compliement form
+ code = insEncodeReg3456(ins, id->idReg1(), size, code);
+ }
+#endif // FEATURE_AVX_SUPPORT
+
+ regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << 8);
+ dst = emitOutputCV(dst, id, code | regcode | 0x0500);
+ sz = emitSizeOfInsDsc(id);
+ break;
+
+ case IF_MRD_CNS:
+ case IF_MWR_CNS:
+ case IF_MRW_CNS:
+ emitGetInsDcmCns(id, &cnsVal);
+ dst = emitOutputCV(dst, id, insCodeMI(ins) | 0x0500, &cnsVal);
+ sz = emitSizeOfInsDsc(id);
+ break;
+
+ case IF_MRW_SHF:
+ emitGetInsDcmCns(id, &cnsVal);
+ dst = emitOutputCV(dst, id, insCodeMR(ins) | 0x0500, &cnsVal);
+ sz = emitSizeOfInsDsc(id);
+ break;
+
+#if FEATURE_STACK_FP_X87
+
+ /********************************************************************/
+ /* FP coprocessor stack operands */
+ /********************************************************************/
+
+ case IF_TRD_FRD:
+ case IF_TWR_FRD:
+ case IF_TRW_FRD:
+ assert(id->idGCref() == GCT_NONE);
+ dst += emitOutputWord(dst, insCodeMR(ins) | 0xC000 | (id->idReg1() << 8));
+ break;
+
+ case IF_FRD_TRD:
+ case IF_FWR_TRD:
+ case IF_FRW_TRD:
+ assert(id->idGCref() == GCT_NONE);
+ dst += emitOutputWord(dst, insCodeMR(ins) | 0xC004 | (id->idReg1() << 8));
+ break;
+
+#endif // FEATURE_STACK_FP_X87
+
+ /********************************************************************/
+ /* oops */
+ /********************************************************************/
+
+ default:
+
+#ifdef DEBUG
+ printf("unexpected format %s\n", emitIfName(id->idInsFmt()));
+ assert(!"don't know how to encode this instruction");
+#endif
+ break;
+ }
+
+ // Make sure we set the instruction descriptor size correctly
+ assert(sz == emitSizeOfInsDsc(id));
+
+#if !FEATURE_FIXED_OUT_ARGS
+
+ // Make sure we keep the current stack level up to date
+ if (!emitIGisInProlog(ig) && !emitIGisInEpilog(ig))
+ {
+ switch (ins)
+ {
+ case INS_push:
+ // Please note: {INS_push_hide,IF_LABEL} is used to push the address of the
+ // finally block for calling it locally for an op_leave.
+ emitStackPush(dst, id->idGCref());
+ break;
+
+ case INS_pop:
+ emitStackPop(dst, false, /*callInstrSize*/ 0, 1);
+ break;
+
+ case INS_sub:
+ // Check for "sub ESP, icon"
+ if (ins == INS_sub && id->idInsFmt() == IF_RRW_CNS && id->idReg1() == REG_ESP)
+ {
+ assert((size_t)emitGetInsSC(id) < 0x00000000FFFFFFFFLL);
+ emitStackPushN(dst, (unsigned)(emitGetInsSC(id) / sizeof(void*)));
+ }
+ break;
+
+ case INS_add:
+ // Check for "add ESP, icon"
+ if (ins == INS_add && id->idInsFmt() == IF_RRW_CNS && id->idReg1() == REG_ESP)
+ {
+ assert((size_t)emitGetInsSC(id) < 0x00000000FFFFFFFFLL);
+ emitStackPop(dst, /*isCall*/ false, /*callInstrSize*/ 0,
+ (unsigned)(emitGetInsSC(id) / sizeof(void*)));
+ }
+ break;
+
+ default:
+ break;
+ }
+ }
+
+#endif // !FEATURE_FIXED_OUT_ARGS
+
+ assert((int)emitCurStackLvl >= 0);
+
+ // Only epilog "instructions" and some pseudo-instrs
+ // are allowed not to generate any code
+
+ assert(*dp != dst || emitInstHasNoCode(ins));
+
+#ifdef DEBUG
+ if (emitComp->opts.disAsm || emitComp->opts.dspEmit || emitComp->verbose)
+ {
+ emitDispIns(id, false, dspOffs, true, emitCurCodeOffs(*dp), *dp, (dst - *dp));
+ }
+
+ if (emitComp->compDebugBreak)
+ {
+ // set JitEmitPrintRefRegs=1 will print out emitThisGCrefRegs and emitThisByrefRegs
+ // at the beginning of this method.
+ if (JitConfig.JitEmitPrintRefRegs() != 0)
+ {
+ printf("Before emitOutputInstr for id->idDebugOnlyInfo()->idNum=0x%02x\n", id->idDebugOnlyInfo()->idNum);
+ printf(" emitThisGCrefRegs(0x%p)=", emitComp->dspPtr(&emitThisGCrefRegs));
+ printRegMaskInt(emitThisGCrefRegs);
+ emitDispRegSet(emitThisGCrefRegs);
+ printf("\n");
+ printf(" emitThisByrefRegs(0x%p)=", emitComp->dspPtr(&emitThisByrefRegs));
+ printRegMaskInt(emitThisByrefRegs);
+ emitDispRegSet(emitThisByrefRegs);
+ printf("\n");
+ }
+
+ // For example, set JitBreakEmitOutputInstr=a6 will break when this method is called for
+ // emitting instruction a6, (i.e. IN00a6 in jitdump).
+ if ((unsigned)JitConfig.JitBreakEmitOutputInstr() == id->idDebugOnlyInfo()->idNum)
+ {
+ assert(!"JitBreakEmitOutputInstr reached");
+ }
+ }
+#endif
+
+#ifdef TRANSLATE_PDB
+ if (*dp != dst)
+ {
+ // only map instruction groups to instruction groups
+ MapCode(id->idDebugOnlyInfo()->idilStart, *dp);
+ }
+#endif
+
+ *dp = dst;
+
+#ifdef DEBUG
+ if (ins == INS_mulEAX || ins == INS_imulEAX)
+ {
+ // INS_mulEAX has implicit target of Edx:Eax. Make sure
+ // that we detected this cleared its GC-status.
+
+ assert(((RBM_EAX | RBM_EDX) & (emitThisGCrefRegs | emitThisByrefRegs)) == 0);
+ }
+
+ if (instrIs3opImul(ins))
+ {
+ // The target of the 3-operand imul is implicitly encoded. Make sure
+ // that we detected the implicit register and cleared its GC-status.
+
+ regMaskTP regMask = genRegMask(inst3opImulReg(ins));
+ assert((regMask & (emitThisGCrefRegs | emitThisByrefRegs)) == 0);
+ }
+#endif
+
+ return sz;
+}
+#ifdef _PREFAST_
+#pragma warning(pop)
+#endif
+
+/*****************************************************************************/
+/*****************************************************************************/
+
+#endif // defined(_TARGET_XARCH_)
diff --git a/src/jit/emitxarch.h b/src/jit/emitxarch.h
new file mode 100644
index 0000000000..dfd7e6ec50
--- /dev/null
+++ b/src/jit/emitxarch.h
@@ -0,0 +1,437 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+#if defined(_TARGET_XARCH_)
+
+/************************************************************************/
+/* Public inline informational methods */
+/************************************************************************/
+
+public:
+inline static bool isGeneralRegister(regNumber reg)
+{
+ return (reg <= REG_INT_LAST);
+}
+
+inline static bool isFloatReg(regNumber reg)
+{
+ return (reg >= REG_FP_FIRST && reg <= REG_FP_LAST);
+}
+
+inline static bool isDoubleReg(regNumber reg)
+{
+ return isFloatReg(reg);
+}
+
+/************************************************************************/
+/* Routines that compute the size of / encode instructions */
+/************************************************************************/
+
+struct CnsVal
+{
+ ssize_t cnsVal;
+#ifdef RELOC_SUPPORT
+ bool cnsReloc;
+#endif
+};
+
+UNATIVE_OFFSET emitInsSize(size_t code);
+UNATIVE_OFFSET emitInsSizeRM(instruction ins);
+UNATIVE_OFFSET emitInsSizeSV(size_t code, int var, int dsp);
+UNATIVE_OFFSET emitInsSizeSV(instrDesc* id, int var, int dsp, int val);
+UNATIVE_OFFSET emitInsSizeRR(instruction ins, regNumber reg1, regNumber reg2, emitAttr attr);
+UNATIVE_OFFSET emitInsSizeAM(instrDesc* id, size_t code);
+UNATIVE_OFFSET emitInsSizeAM(instrDesc* id, size_t code, int val);
+UNATIVE_OFFSET emitInsSizeCV(instrDesc* id, size_t code);
+UNATIVE_OFFSET emitInsSizeCV(instrDesc* id, size_t code, int val);
+
+BYTE* emitOutputAM(BYTE* dst, instrDesc* id, size_t code, CnsVal* addc = nullptr);
+BYTE* emitOutputSV(BYTE* dst, instrDesc* id, size_t code, CnsVal* addc = nullptr);
+BYTE* emitOutputCV(BYTE* dst, instrDesc* id, size_t code, CnsVal* addc = nullptr);
+
+BYTE* emitOutputR(BYTE* dst, instrDesc* id);
+BYTE* emitOutputRI(BYTE* dst, instrDesc* id);
+BYTE* emitOutputRR(BYTE* dst, instrDesc* id);
+BYTE* emitOutputIV(BYTE* dst, instrDesc* id);
+
+#ifdef FEATURE_AVX_SUPPORT
+BYTE* emitOutputRRR(BYTE* dst, instrDesc* id);
+#endif
+
+BYTE* emitOutputLJ(BYTE* dst, instrDesc* id);
+
+unsigned emitOutputRexOrVexPrefixIfNeeded(instruction ins, BYTE* dst, size_t& code);
+unsigned emitGetRexPrefixSize(instruction ins);
+unsigned emitGetVexPrefixSize(instruction ins, emitAttr attr);
+unsigned emitGetPrefixSize(size_t code);
+unsigned emitGetVexPrefixAdjustedSize(instruction ins, emitAttr attr, size_t code);
+
+unsigned insEncodeReg345(instruction ins, regNumber reg, emitAttr size, size_t* code);
+unsigned insEncodeReg012(instruction ins, regNumber reg, emitAttr size, size_t* code);
+size_t insEncodeReg3456(instruction ins, regNumber reg, emitAttr size, size_t code);
+unsigned insEncodeRegSIB(instruction ins, regNumber reg, size_t* code);
+
+size_t insEncodeMRreg(instruction ins, size_t code);
+size_t insEncodeMRreg(instruction ins, regNumber reg, emitAttr size, size_t code);
+size_t insEncodeRRIb(instruction ins, regNumber reg, emitAttr size);
+size_t insEncodeOpreg(instruction ins, regNumber reg, emitAttr size);
+
+bool IsAVXInstruction(instruction ins);
+size_t insEncodeMIreg(instruction ins, regNumber reg, emitAttr size, size_t code);
+
+size_t AddRexWPrefix(instruction ins, size_t code);
+size_t AddRexRPrefix(instruction ins, size_t code);
+size_t AddRexXPrefix(instruction ins, size_t code);
+size_t AddRexBPrefix(instruction ins, size_t code);
+size_t AddRexPrefix(instruction ins, size_t code);
+
+#ifdef FEATURE_AVX_SUPPORT
+// 3-byte VEX prefix starts with byte 0xC4
+#define VEX_PREFIX_MASK_3BYTE 0xC4000000000000LL
+bool TakesVexPrefix(instruction ins);
+// Returns true if the instruction encoding already contains VEX prefix
+bool hasVexPrefix(size_t code)
+{
+ return (code & VEX_PREFIX_MASK_3BYTE) != 0;
+}
+size_t AddVexPrefix(instruction ins, size_t code, emitAttr attr);
+size_t AddVexPrefixIfNeeded(instruction ins, size_t code, emitAttr size)
+{
+ if (TakesVexPrefix(ins))
+ {
+ code = AddVexPrefix(ins, code, size);
+ }
+ return code;
+}
+size_t AddVexPrefixIfNeededAndNotPresent(instruction ins, size_t code, emitAttr size)
+{
+ if (TakesVexPrefix(ins) && !hasVexPrefix(code))
+ {
+ code = AddVexPrefix(ins, code, size);
+ }
+ return code;
+}
+bool useAVXEncodings;
+bool UseAVX()
+{
+ return useAVXEncodings;
+}
+void SetUseAVX(bool value)
+{
+ useAVXEncodings = value;
+}
+bool IsThreeOperandBinaryAVXInstruction(instruction ins);
+bool IsThreeOperandMoveAVXInstruction(instruction ins);
+bool IsThreeOperandAVXInstruction(instruction ins)
+{
+ return (IsThreeOperandBinaryAVXInstruction(ins) || IsThreeOperandMoveAVXInstruction(ins));
+}
+#else // !FEATURE_AVX_SUPPORT
+bool UseAVX()
+{
+ return false;
+}
+bool hasVexPrefix(size_t code)
+{
+ return false;
+}
+bool IsThreeOperandBinaryAVXInstruction(instruction ins)
+{
+ return false;
+}
+bool IsThreeOperandMoveAVXInstruction(instruction ins)
+{
+ return false;
+}
+bool IsThreeOperandAVXInstruction(instruction ins)
+{
+ return false;
+}
+bool TakesVexPrefix(instruction ins)
+{
+ return false;
+}
+size_t AddVexPrefixIfNeeded(instruction ins, size_t code, emitAttr attr)
+{
+ return code;
+}
+size_t AddVexPrefixIfNeededAndNotPresent(instruction ins, size_t code, emitAttr size)
+{
+ return code;
+}
+#endif // !FEATURE_AVX_SUPPORT
+
+/************************************************************************/
+/* Debug-only routines to display instructions */
+/************************************************************************/
+
+#ifdef DEBUG
+
+const char* emitFPregName(unsigned reg, bool varName = true);
+
+void emitDispReloc(ssize_t value);
+void emitDispAddrMode(instrDesc* id, bool noDetail = false);
+void emitDispShift(instruction ins, int cnt = 0);
+
+void emitDispIns(instrDesc* id,
+ bool isNew,
+ bool doffs,
+ bool asmfm,
+ unsigned offs = 0,
+ BYTE* code = nullptr,
+ size_t sz = 0,
+ insGroup* ig = nullptr);
+
+const char* emitXMMregName(unsigned reg);
+const char* emitYMMregName(unsigned reg);
+
+#endif
+
+/************************************************************************/
+/* Private members that deal with target-dependent instr. descriptors */
+/************************************************************************/
+
+private:
+void emitSetAmdDisp(instrDescAmd* id, ssize_t dsp);
+instrDesc* emitNewInstrAmd(emitAttr attr, ssize_t dsp);
+instrDesc* emitNewInstrAmdCns(emitAttr attr, ssize_t dsp, int cns);
+
+instrDesc* emitNewInstrCallDir(int argCnt,
+ VARSET_VALARG_TP GCvars,
+ regMaskTP gcrefRegs,
+ regMaskTP byrefRegs,
+ emitAttr retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize));
+
+instrDesc* emitNewInstrCallInd(int argCnt,
+ ssize_t disp,
+ VARSET_VALARG_TP GCvars,
+ regMaskTP gcrefRegs,
+ regMaskTP byrefRegs,
+ emitAttr retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize));
+
+void emitGetInsCns(instrDesc* id, CnsVal* cv);
+ssize_t emitGetInsAmdCns(instrDesc* id, CnsVal* cv);
+void emitGetInsDcmCns(instrDesc* id, CnsVal* cv);
+ssize_t emitGetInsAmdAny(instrDesc* id);
+
+/************************************************************************/
+/* Private helpers for instruction output */
+/************************************************************************/
+
+private:
+insFormat emitInsModeFormat(instruction ins, insFormat base, insFormat FPld, insFormat FPst);
+
+bool emitVerifyEncodable(instruction ins, emitAttr size, regNumber reg1, regNumber reg2 = REG_NA);
+
+bool emitInsCanOnlyWriteSSE2OrAVXReg(instrDesc* id);
+
+/*****************************************************************************
+*
+* Convert between an index scale in bytes to a smaller encoding used for
+* storage in instruction descriptors.
+*/
+
+inline emitter::opSize emitEncodeScale(size_t scale)
+{
+ assert(scale == 1 || scale == 2 || scale == 4 || scale == 8);
+
+ return emitSizeEncode[scale - 1];
+}
+
+inline emitAttr emitDecodeScale(unsigned ensz)
+{
+ assert(ensz < 4);
+
+ return emitter::emitSizeDecode[ensz];
+}
+
+/************************************************************************/
+/* The public entry points to output instructions */
+/************************************************************************/
+
+public:
+void emitLoopAlign();
+
+void emitIns(instruction ins);
+
+void emitIns(instruction ins, emitAttr attr);
+
+void emitInsRMW(instruction inst, emitAttr attr, GenTreeStoreInd* storeInd, GenTreePtr src);
+
+void emitInsRMW(instruction inst, emitAttr attr, GenTreeStoreInd* storeInd);
+
+void emitIns_Nop(unsigned size);
+
+void emitIns_I(instruction ins, emitAttr attr, int val);
+
+void emitIns_R(instruction ins, emitAttr attr, regNumber reg);
+
+void emitIns_C(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fdlHnd, int offs);
+
+void emitIns_R_I(instruction ins, emitAttr attr, regNumber reg, ssize_t val);
+
+void emitIns_R_R(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2);
+
+void emitIns_R_R_I(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, int ival);
+
+#ifdef FEATURE_AVX_SUPPORT
+void emitIns_R_R_R(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, regNumber reg3);
+#endif
+
+void emitIns_S(instruction ins, emitAttr attr, int varx, int offs);
+
+void emitIns_S_R(instruction ins, emitAttr attr, regNumber ireg, int varx, int offs);
+
+void emitIns_R_S(instruction ins, emitAttr attr, regNumber ireg, int varx, int offs);
+
+void emitIns_S_I(instruction ins, emitAttr attr, int varx, int offs, int val);
+
+void emitIns_R_C(instruction ins, emitAttr attr, regNumber reg, CORINFO_FIELD_HANDLE fldHnd, int offs);
+
+void emitIns_C_R(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fldHnd, regNumber reg, int offs);
+
+void emitIns_C_I(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fdlHnd, int offs, int val);
+
+void emitIns_IJ(emitAttr attr, regNumber reg, unsigned base);
+
+void emitIns_J_S(instruction ins, emitAttr attr, BasicBlock* dst, int varx, int offs);
+
+void emitIns_R_L(instruction ins, emitAttr attr, BasicBlock* dst, regNumber reg);
+
+void emitIns_R_D(instruction ins, emitAttr attr, unsigned offs, regNumber reg);
+
+void emitIns_I_AR(
+ instruction ins, emitAttr attr, int val, regNumber reg, int offs, int memCookie = 0, void* clsCookie = nullptr);
+
+void emitIns_I_AI(instruction ins, emitAttr attr, int val, ssize_t disp);
+
+void emitIns_R_AR(instruction ins,
+ emitAttr attr,
+ regNumber ireg,
+ regNumber reg,
+ int offs,
+ int memCookie = 0,
+ void* clsCookie = nullptr);
+
+void emitIns_R_AI(instruction ins, emitAttr attr, regNumber ireg, ssize_t disp);
+
+void emitIns_AR_R(instruction ins,
+ emitAttr attr,
+ regNumber ireg,
+ regNumber reg,
+ int offs,
+ int memCookie = 0,
+ void* clsCookie = nullptr);
+
+void emitIns_AI_R(instruction ins, emitAttr attr, regNumber ireg, ssize_t disp);
+
+void emitIns_I_ARR(instruction ins, emitAttr attr, int val, regNumber reg, regNumber rg2, int disp);
+
+void emitIns_R_ARR(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, regNumber rg2, int disp);
+
+void emitIns_ARR_R(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, regNumber rg2, int disp);
+
+void emitIns_I_ARX(instruction ins, emitAttr attr, int val, regNumber reg, regNumber rg2, unsigned mul, int disp);
+
+void emitIns_R_ARX(
+ instruction ins, emitAttr attr, regNumber ireg, regNumber reg, regNumber rg2, unsigned mul, int disp);
+
+void emitIns_ARX_R(
+ instruction ins, emitAttr attr, regNumber ireg, regNumber reg, regNumber rg2, unsigned mul, int disp);
+
+void emitIns_I_AX(instruction ins, emitAttr attr, int val, regNumber reg, unsigned mul, int disp);
+
+void emitIns_R_AX(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, unsigned mul, int disp);
+
+void emitIns_AX_R(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, unsigned mul, int disp);
+
+#if FEATURE_STACK_FP_X87
+void emitIns_F_F0(instruction ins, unsigned fpreg);
+
+void emitIns_F0_F(instruction ins, unsigned fpreg);
+#endif // FEATURE_STACK_FP_X87
+
+enum EmitCallType
+{
+ EC_FUNC_TOKEN, // Direct call to a helper/static/nonvirtual/global method
+ EC_FUNC_TOKEN_INDIR, // Indirect call to a helper/static/nonvirtual/global method
+ EC_FUNC_ADDR, // Direct call to an absolute address
+
+ EC_FUNC_VIRTUAL, // Call to a virtual method (using the vtable)
+ EC_INDIR_R, // Indirect call via register
+ EC_INDIR_SR, // Indirect call via stack-reference (local var)
+ EC_INDIR_C, // Indirect call via static class var
+ EC_INDIR_ARD, // Indirect call via an addressing mode
+
+ EC_COUNT
+};
+
+void emitIns_Call(EmitCallType callType,
+ CORINFO_METHOD_HANDLE methHnd,
+ CORINFO_SIG_INFO* sigInfo, // used to report call sites to the EE
+ void* addr,
+ ssize_t argSize,
+ emitAttr retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize),
+ VARSET_VALARG_TP ptrVars,
+ regMaskTP gcrefRegs,
+ regMaskTP byrefRegs,
+ GenTreeIndir* indir,
+ bool isJump = false,
+ bool isNoGC = false);
+
+void emitIns_Call(EmitCallType callType,
+ CORINFO_METHOD_HANDLE methHnd,
+ INDEBUG_LDISASM_COMMA(CORINFO_SIG_INFO* sigInfo) // used to report call sites to the EE
+ void* addr,
+ ssize_t argSize,
+ emitAttr retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize),
+ VARSET_VALARG_TP ptrVars,
+ regMaskTP gcrefRegs,
+ regMaskTP byrefRegs,
+ IL_OFFSETX ilOffset = BAD_IL_OFFSET,
+ regNumber ireg = REG_NA,
+ regNumber xreg = REG_NA,
+ unsigned xmul = 0,
+ ssize_t disp = 0,
+ bool isJump = false,
+ bool isNoGC = false);
+
+#ifdef _TARGET_AMD64_
+// Is the last instruction emitted a call instruction?
+bool emitIsLastInsCall();
+
+// Insert a NOP at the end of the the current instruction group if the last emitted instruction was a 'call',
+// because the next instruction group will be an epilog.
+void emitOutputPreEpilogNOP();
+#endif // _TARGET_AMD64_
+
+/*****************************************************************************
+ *
+ * Given a jump, return true if it's a conditional jump.
+ */
+
+inline bool emitIsCondJump(instrDesc* jmp)
+{
+ instruction ins = jmp->idIns();
+
+ assert(jmp->idInsFmt() == IF_LABEL);
+
+ return (ins != INS_call && ins != INS_jmp);
+}
+
+/*****************************************************************************
+ *
+ * Given a jump, return true if it's an unconditional jump.
+ */
+
+inline bool emitIsUncondJump(instrDesc* jmp)
+{
+ instruction ins = jmp->idIns();
+
+ assert(jmp->idInsFmt() == IF_LABEL);
+
+ return (ins == INS_jmp);
+}
+
+#endif // _TARGET_XARCH_
diff --git a/src/jit/error.cpp b/src/jit/error.cpp
new file mode 100644
index 0000000000..71c3301045
--- /dev/null
+++ b/src/jit/error.cpp
@@ -0,0 +1,536 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX error.cpp XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+#include "compiler.h"
+
+#if MEASURE_FATAL
+unsigned fatal_badCode;
+unsigned fatal_noWay;
+unsigned fatal_NOMEM;
+unsigned fatal_noWayAssertBody;
+#ifdef DEBUG
+unsigned fatal_noWayAssertBodyArgs;
+#endif // DEBUG
+unsigned fatal_NYI;
+#endif // MEASURE_FATAL
+
+/*****************************************************************************/
+void DECLSPEC_NORETURN fatal(int errCode)
+{
+#ifdef DEBUG
+ if (errCode != CORJIT_SKIPPED) // Don't stop on NYI: use COMPlus_AltJitAssertOnNYI for that.
+ {
+ if (JitConfig.DebugBreakOnVerificationFailure())
+ {
+ DebugBreak();
+ }
+ }
+#endif // DEBUG
+
+ ULONG_PTR exceptArg = errCode;
+ RaiseException(FATAL_JIT_EXCEPTION, EXCEPTION_NONCONTINUABLE, 1, &exceptArg);
+ UNREACHABLE();
+}
+
+/*****************************************************************************/
+void DECLSPEC_NORETURN badCode()
+{
+#if MEASURE_FATAL
+ fatal_badCode += 1;
+#endif // MEASURE_FATAL
+
+ fatal(CORJIT_BADCODE);
+}
+
+/*****************************************************************************/
+void DECLSPEC_NORETURN noWay()
+{
+#if MEASURE_FATAL
+ fatal_noWay += 1;
+#endif // MEASURE_FATAL
+
+ fatal(CORJIT_INTERNALERROR);
+}
+
+/*****************************************************************************/
+void DECLSPEC_NORETURN NOMEM()
+{
+#if MEASURE_FATAL
+ fatal_NOMEM += 1;
+#endif // MEASURE_FATAL
+
+ fatal(CORJIT_OUTOFMEM);
+}
+
+/*****************************************************************************/
+void DECLSPEC_NORETURN noWayAssertBody()
+{
+#if MEASURE_FATAL
+ fatal_noWayAssertBody += 1;
+#endif // MEASURE_FATAL
+
+#ifndef DEBUG
+ // Even in retail, if we hit a noway, and we have this variable set, we don't want to fall back
+ // to MinOpts, which might hide a regression. Instead, hit a breakpoint (and crash). We don't
+ // have the assert code to fall back on here.
+ // The debug path goes through this function also, to do the call to 'fatal'.
+ // This kind of noway is hit for unreached().
+ if (JitConfig.JitEnableNoWayAssert())
+ {
+ DebugBreak();
+ }
+#endif // !DEBUG
+
+ fatal(CORJIT_RECOVERABLEERROR);
+}
+
+inline static bool ShouldThrowOnNoway(
+#ifdef FEATURE_TRACELOGGING
+ const char* filename, unsigned line
+#endif
+ )
+{
+ return JitTls::GetCompiler() == nullptr ||
+ JitTls::GetCompiler()->compShouldThrowOnNoway(
+#ifdef FEATURE_TRACELOGGING
+ filename, line
+#endif
+ );
+}
+
+/*****************************************************************************/
+void noWayAssertBodyConditional(
+#ifdef FEATURE_TRACELOGGING
+ const char* filename, unsigned line
+#endif
+ )
+{
+#ifdef FEATURE_TRACELOGGING
+ if (ShouldThrowOnNoway(filename, line))
+#else
+ if (ShouldThrowOnNoway())
+#endif // FEATURE_TRACELOGGING
+ {
+ noWayAssertBody();
+ }
+}
+
+#if !defined(_TARGET_X86_) || !defined(LEGACY_BACKEND)
+
+/*****************************************************************************/
+void notYetImplemented(const char* msg, const char* filename, unsigned line)
+{
+#if FUNC_INFO_LOGGING
+#ifdef DEBUG
+ LogEnv* env = JitTls::GetLogEnv();
+ if (env != nullptr)
+ {
+ const Compiler* const pCompiler = env->compiler;
+ if (pCompiler->verbose)
+ {
+ printf("\n\n%s - NYI (%s:%d - %s)\n", pCompiler->info.compFullName, filename, line, msg);
+ }
+ }
+ if (Compiler::compJitFuncInfoFile != nullptr)
+ {
+ fprintf(Compiler::compJitFuncInfoFile, "%s - NYI (%s:%d - %s)\n",
+ (env == nullptr) ? "UNKNOWN" : env->compiler->info.compFullName, filename, line, msg);
+ fflush(Compiler::compJitFuncInfoFile);
+ }
+#else // !DEBUG
+ if (Compiler::compJitFuncInfoFile != nullptr)
+ {
+ fprintf(Compiler::compJitFuncInfoFile, "NYI (%s:%d - %s)\n", filename, line, msg);
+ fflush(Compiler::compJitFuncInfoFile);
+ }
+#endif // !DEBUG
+#endif // FUNC_INFO_LOGGING
+
+#ifdef DEBUG
+ Compiler* pCompiler = JitTls::GetCompiler();
+ if (pCompiler != nullptr)
+ {
+ // Assume we're within a compFunctionTrace boundary, which might not be true.
+ pCompiler->compFunctionTraceEnd(nullptr, 0, true);
+ }
+#endif // DEBUG
+
+ DWORD value = JitConfig.AltJitAssertOnNYI();
+
+ // 0 means just silently skip
+ // If we are in retail builds, assume ignore
+ // 1 means popup the assert (abort=abort, retry=debugger, ignore=skip)
+ // 2 means silently don't skip (same as 3 for retail)
+ // 3 means popup the assert (abort=abort, retry=debugger, ignore=don't skip)
+ if (value & 1)
+ {
+#ifdef DEBUG
+ assertAbort(msg, filename, line);
+#endif
+ }
+
+ if ((value & 2) == 0)
+ {
+#if MEASURE_FATAL
+ fatal_NYI += 1;
+#endif // MEASURE_FATAL
+
+ fatal(CORJIT_SKIPPED);
+ }
+}
+
+#endif // #if !defined(_TARGET_X86_) || !defined(LEGACY_BACKEND)
+
+/*****************************************************************************/
+LONG __JITfilter(PEXCEPTION_POINTERS pExceptionPointers, LPVOID lpvParam)
+{
+ DWORD exceptCode = pExceptionPointers->ExceptionRecord->ExceptionCode;
+
+ if (exceptCode == FATAL_JIT_EXCEPTION)
+ {
+ ErrorTrapParam* pParam = (ErrorTrapParam*)lpvParam;
+
+ assert(pExceptionPointers->ExceptionRecord->NumberParameters == 1);
+ pParam->errc = (int)pExceptionPointers->ExceptionRecord->ExceptionInformation[0];
+
+ ICorJitInfo* jitInfo = pParam->jitInfo;
+
+ if (jitInfo != nullptr)
+ {
+ jitInfo->reportFatalError((CorJitResult)pParam->errc);
+ }
+
+ return EXCEPTION_EXECUTE_HANDLER;
+ }
+
+ return EXCEPTION_CONTINUE_SEARCH;
+}
+
+/*****************************************************************************/
+#ifdef DEBUG
+
+DWORD getBreakOnBadCode()
+{
+ return JitConfig.JitBreakOnBadCode();
+}
+
+/*****************************************************************************/
+void debugError(const char* msg, const char* file, unsigned line)
+{
+ const char* tail = strrchr(file, '\\');
+ if (tail)
+ {
+ file = tail + 1;
+ }
+
+ LogEnv* env = JitTls::GetLogEnv();
+
+ logf(LL_ERROR, "COMPILATION FAILED: file: %s:%d compiling method %s reason %s\n", file, line,
+ env->compiler->info.compFullName, msg);
+
+ // We now only assert when user explicitly set ComPlus_JitRequired=1
+ // If ComPlus_JitRequired is 0 or is not set, we will not assert.
+ if (JitConfig.JitRequired() == 1 || getBreakOnBadCode())
+ {
+ // Don't assert if verification is done.
+ if (!env->compiler->tiVerificationNeeded || getBreakOnBadCode())
+ {
+ assertAbort(msg, "NO-FILE", 0);
+ }
+ }
+
+ BreakIfDebuggerPresent();
+}
+
+/*****************************************************************************/
+LogEnv::LogEnv(ICorJitInfo* aCompHnd) : compHnd(aCompHnd), compiler(nullptr)
+{
+}
+
+/*****************************************************************************/
+extern "C" void __cdecl assertAbort(const char* why, const char* file, unsigned line)
+{
+ const char* msg = why;
+ LogEnv* env = JitTls::GetLogEnv();
+ const int BUFF_SIZE = 8192;
+ char* buff = (char*)alloca(BUFF_SIZE);
+ if (env->compiler)
+ {
+ _snprintf_s(buff, BUFF_SIZE, _TRUNCATE, "Assertion failed '%s' in '%s' (IL size %d)\n", why,
+ env->compiler->info.compFullName, env->compiler->info.compILCodeSize);
+ msg = buff;
+ }
+ printf(""); // null string means flush
+
+#if FUNC_INFO_LOGGING
+ if (Compiler::compJitFuncInfoFile != nullptr)
+ {
+ fprintf(Compiler::compJitFuncInfoFile, "%s - Assertion failed (%s:%d - %s)\n",
+ (env == nullptr) ? "UNKNOWN" : env->compiler->info.compFullName, file, line, why);
+ }
+#endif // FUNC_INFO_LOGGING
+
+ if (env->compHnd->doAssert(file, line, msg))
+ {
+ DebugBreak();
+ }
+
+#ifdef ALT_JIT
+ // If we hit an assert, and we got here, it's either because the user hit "ignore" on the
+ // dialog pop-up, or they set COMPlus_ContinueOnAssert=1 to not emit a pop-up, but just continue.
+ // If we're an altjit, we have two options: (1) silently continue, as a normal JIT would, probably
+ // leading to additional asserts, or (2) tell the VM that the AltJit wants to skip this function,
+ // thus falling back to the fallback JIT. Setting COMPlus_AltJitSkipOnAssert=1 chooses this "skip"
+ // to the fallback JIT behavior. This is useful when doing ASM diffs, where we only want to see
+ // the first assert for any function, but we don't want to kill the whole ngen process on the
+ // first assert (which would happen if you used COMPlus_NoGuiOnAssert=1 for example).
+ if (JitConfig.AltJitSkipOnAssert() != 0)
+ {
+ fatal(CORJIT_SKIPPED);
+ }
+#elif defined(_TARGET_ARM64_)
+ // TODO-ARM64-NYI: remove this after the JIT no longer asserts during startup
+ //
+ // When we are bringing up the new Arm64 JIT we set COMPlus_ContinueOnAssert=1
+ // We only want to hit one assert then we will fall back to the interpreter.
+ //
+ bool interpreterFallback = (JitConfig.InterpreterFallback() != 0);
+
+ if (interpreterFallback)
+ {
+ fatal(CORJIT_SKIPPED);
+ }
+#endif
+}
+
+/*********************************************************************/
+BOOL vlogf(unsigned level, const char* fmt, va_list args)
+{
+ return JitTls::GetLogEnv()->compHnd->logMsg(level, fmt, args);
+}
+
+int vflogf(FILE* file, const char* fmt, va_list args)
+{
+ // 0-length string means flush
+ if (fmt[0] == '\0')
+ {
+ fflush(file);
+ return 0;
+ }
+
+ const int BUFF_SIZE = 8192;
+ char buffer[BUFF_SIZE];
+ int written = _vsnprintf_s(&buffer[0], BUFF_SIZE, _TRUNCATE, fmt, args);
+
+ if (JitConfig.JitDumpToDebugger())
+ {
+ OutputDebugStringA(buffer);
+ }
+
+ // We use fputs here so that this executes as fast a possible
+ fputs(&buffer[0], file);
+ return written;
+}
+
+int flogf(FILE* file, const char* fmt, ...)
+{
+ va_list args;
+ va_start(args, fmt);
+ int written = vflogf(file, fmt, args);
+ va_end(args);
+ return written;
+}
+
+/*********************************************************************/
+int logf(const char* fmt, ...)
+{
+ va_list args;
+ static bool logToEEfailed = false;
+ int written = 0;
+ //
+ // We remember when the EE failed to log, because vlogf()
+ // is very slow in a checked build.
+ //
+ // If it fails to log an LL_INFO1000 message once
+ // it will always fail when logging an LL_INFO1000 message.
+ //
+ if (!logToEEfailed)
+ {
+ va_start(args, fmt);
+ if (!vlogf(LL_INFO1000, fmt, args))
+ {
+ logToEEfailed = true;
+ }
+ va_end(args);
+ }
+
+ if (logToEEfailed)
+ {
+ // if the EE refuses to log it, we try to send it to stdout
+ va_start(args, fmt);
+ written = vflogf(jitstdout, fmt, args);
+ va_end(args);
+ }
+#if 0 // Enable this only when you need it
+ else
+ {
+ //
+ // The EE just successfully logged our message
+ //
+ static ConfigDWORD fJitBreakOnDumpToken;
+ DWORD breakOnDumpToken = fJitBreakOnDumpToken.val(CLRConfig::INTERNAL_BreakOnDumpToken);
+ static DWORD forbidEntry = 0;
+
+ if ((breakOnDumpToken != 0xffffffff) && (forbidEntry == 0))
+ {
+ forbidEntry = 1;
+
+ // Use value of 0 to get the dump
+ static DWORD currentLine = 1;
+
+ if (currentLine == breakOnDumpToken)
+ {
+ assert(!"Dump token reached");
+ }
+
+ printf("(Token=0x%x) ", currentLine++);
+ forbidEntry = 0;
+ }
+ }
+#endif // 0
+ va_end(args);
+
+ return written;
+}
+
+/*********************************************************************/
+void gcDump_logf(const char* fmt, ...)
+{
+ va_list args;
+ static bool logToEEfailed = false;
+ //
+ // We remember when the EE failed to log, because vlogf()
+ // is very slow in a checked build.
+ //
+ // If it fails to log an LL_INFO1000 message once
+ // it will always fail when logging an LL_INFO1000 message.
+ //
+ if (!logToEEfailed)
+ {
+ va_start(args, fmt);
+ if (!vlogf(LL_INFO1000, fmt, args))
+ {
+ logToEEfailed = true;
+ }
+ va_end(args);
+ }
+
+ if (logToEEfailed)
+ {
+ // if the EE refuses to log it, we try to send it to stdout
+ va_start(args, fmt);
+ vflogf(jitstdout, fmt, args);
+ va_end(args);
+ }
+#if 0 // Enable this only when you need it
+ else
+ {
+ //
+ // The EE just successfully logged our message
+ //
+ static ConfigDWORD fJitBreakOnDumpToken;
+ DWORD breakOnDumpToken = fJitBreakOnDumpToken.val(CLRConfig::INTERNAL_BreakOnDumpToken);
+ static DWORD forbidEntry = 0;
+
+ if ((breakOnDumpToken != 0xffffffff) && (forbidEntry == 0))
+ {
+ forbidEntry = 1;
+
+ // Use value of 0 to get the dump
+ static DWORD currentLine = 1;
+
+ if (currentLine == breakOnDumpToken)
+ {
+ assert(!"Dump token reached");
+ }
+
+ printf("(Token=0x%x) ", currentLine++);
+ forbidEntry = 0;
+ }
+ }
+#endif // 0
+ va_end(args);
+}
+
+/*********************************************************************/
+void logf(unsigned level, const char* fmt, ...)
+{
+ va_list args;
+ va_start(args, fmt);
+ vlogf(level, fmt, args);
+ va_end(args);
+}
+
+void DECLSPEC_NORETURN badCode3(const char* msg, const char* msg2, int arg, __in_z const char* file, unsigned line)
+{
+ const int BUFF_SIZE = 512;
+ char buf1[BUFF_SIZE];
+ char buf2[BUFF_SIZE];
+ sprintf_s(buf1, BUFF_SIZE, "%s%s", msg, msg2);
+ sprintf_s(buf2, BUFF_SIZE, buf1, arg);
+
+ debugError(buf2, file, line);
+ badCode();
+}
+
+void noWayAssertAbortHelper(const char* cond, const char* file, unsigned line)
+{
+ // Show the assert UI.
+ if (JitConfig.JitEnableNoWayAssert())
+ {
+ assertAbort(cond, file, line);
+ }
+}
+
+void noWayAssertBodyConditional(const char* cond, const char* file, unsigned line)
+{
+#ifdef FEATURE_TRACELOGGING
+ if (ShouldThrowOnNoway(file, line))
+#else
+ if (ShouldThrowOnNoway())
+#endif
+ {
+ noWayAssertBody(cond, file, line);
+ }
+ // In CHK we want the assert UI to show up in min-opts.
+ else
+ {
+ noWayAssertAbortHelper(cond, file, line);
+ }
+}
+
+void DECLSPEC_NORETURN noWayAssertBody(const char* cond, const char* file, unsigned line)
+{
+#if MEASURE_FATAL
+ fatal_noWayAssertBodyArgs += 1;
+#endif // MEASURE_FATAL
+
+ noWayAssertAbortHelper(cond, file, line);
+ noWayAssertBody();
+}
+
+#endif // DEBUG
diff --git a/src/jit/error.h b/src/jit/error.h
new file mode 100644
index 0000000000..c56971aaf7
--- /dev/null
+++ b/src/jit/error.h
@@ -0,0 +1,295 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+/*****************************************************************************/
+
+#ifndef _ERROR_H_
+#define _ERROR_H_
+/*****************************************************************************/
+
+#include <corjit.h> // for CORJIT_INTERNALERROR
+#include <safemath.h> // For FitsIn, used by SafeCvt methods.
+
+#define FATAL_JIT_EXCEPTION 0x02345678
+class Compiler;
+
+struct ErrorTrapParam
+{
+ int errc;
+ ICorJitInfo* jitInfo;
+ EXCEPTION_POINTERS exceptionPointers;
+ ErrorTrapParam()
+ {
+ jitInfo = nullptr;
+ }
+};
+
+// Only catch JIT internal errors (will not catch EE generated Errors)
+extern LONG __JITfilter(PEXCEPTION_POINTERS pExceptionPointers, LPVOID lpvParam);
+
+#define setErrorTrap(compHnd, ParamType, paramDef, paramRef) \
+ struct __JITParam : ErrorTrapParam \
+ { \
+ ParamType param; \
+ } __JITparam; \
+ __JITparam.errc = CORJIT_INTERNALERROR; \
+ __JITparam.jitInfo = compHnd; \
+ __JITparam.param = paramRef; \
+ PAL_TRY(__JITParam*, __JITpParam, &__JITparam) \
+ { \
+ ParamType paramDef = __JITpParam->param;
+
+// Only catch JIT internal errors (will not catch EE generated Errors)
+#define impJitErrorTrap() \
+ } \
+ PAL_EXCEPT_FILTER(__JITfilter) \
+ { \
+ int __errc = __JITparam.errc; \
+ (void)__errc;
+
+#define endErrorTrap() \
+ } \
+ PAL_ENDTRY
+
+#define finallyErrorTrap() \
+ } \
+ PAL_FINALLY \
+ {
+
+/*****************************************************************************/
+
+extern void debugError(const char* msg, const char* file, unsigned line);
+extern void DECLSPEC_NORETURN badCode();
+extern void DECLSPEC_NORETURN
+badCode3(const char* msg, const char* msg2, int arg, __in_z const char* file, unsigned line);
+extern void DECLSPEC_NORETURN noWay();
+extern void DECLSPEC_NORETURN NOMEM();
+extern void DECLSPEC_NORETURN fatal(int errCode);
+
+extern void DECLSPEC_NORETURN noWayAssertBody();
+extern void DECLSPEC_NORETURN noWayAssertBody(const char* cond, const char* file, unsigned line);
+
+// Conditionally invoke the noway assert body. The conditional predicate is evaluated using a method on the tlsCompiler.
+// If a noway_assert is hit, we ask the Compiler whether to raise an exception (i.e., conditionally raise exception.)
+// To have backward compatibility between v4.5 and v4.0, in min-opts we take a shot at codegen rather than rethrow.
+extern void noWayAssertBodyConditional(
+#ifdef FEATURE_TRACELOGGING
+ const char* file, unsigned line
+#endif
+ );
+extern void noWayAssertBodyConditional(const char* cond, const char* file, unsigned line);
+
+#if !defined(_TARGET_X86_) || !defined(LEGACY_BACKEND)
+
+// This guy can return based on Config flag/Debugger
+extern void notYetImplemented(const char* msg, const char* file, unsigned line);
+#define NYI(msg) notYetImplemented("NYI: " #msg, __FILE__, __LINE__)
+#define NYI_IF(cond, msg) \
+ if (cond) \
+ notYetImplemented("NYI: " #msg, __FILE__, __LINE__)
+
+#ifdef _TARGET_AMD64_
+
+#define NYI_AMD64(msg) notYetImplemented("NYI_AMD64: " #msg, __FILE__, __LINE__)
+#define NYI_X86(msg) \
+ do \
+ { \
+ } while (0)
+#define NYI_ARM(msg) \
+ do \
+ { \
+ } while (0)
+#define NYI_ARM64(msg) \
+ do \
+ { \
+ } while (0)
+
+#elif defined(_TARGET_X86_)
+
+#define NYI_AMD64(msg) \
+ do \
+ { \
+ } while (0)
+#define NYI_X86(msg) notYetImplemented("NYI_X86: " #msg, __FILE__, __LINE__)
+#define NYI_ARM(msg) \
+ do \
+ { \
+ } while (0)
+#define NYI_ARM64(msg) \
+ do \
+ { \
+ } while (0)
+
+#elif defined(_TARGET_ARM_)
+
+#define NYI_AMD64(msg) \
+ do \
+ { \
+ } while (0)
+#define NYI_X86(msg) \
+ do \
+ { \
+ } while (0)
+#define NYI_ARM(msg) notYetImplemented("NYI_ARM: " #msg, __FILE__, __LINE__)
+#define NYI_ARM64(msg) \
+ do \
+ { \
+ } while (0)
+
+#elif defined(_TARGET_ARM64_)
+
+#define NYI_AMD64(msg) \
+ do \
+ { \
+ } while (0)
+#define NYI_X86(msg) \
+ do \
+ { \
+ } while (0)
+#define NYI_ARM(msg) \
+ do \
+ { \
+ } while (0)
+#define NYI_ARM64(msg) notYetImplemented("NYI_ARM64: " #msg, __FILE__, __LINE__)
+
+#else
+
+#error "Unknown platform, not x86, ARM, or AMD64?"
+
+#endif
+
+#else // defined(_TARGET_X86_) && defined(LEGACY_BACKEND)
+
+#define NYI(msg) assert(!msg)
+#define NYI_AMD64(msg) \
+ do \
+ { \
+ } while (0)
+#define NYI_ARM(msg) \
+ do \
+ { \
+ } while (0)
+#define NYI_ARM64(msg) \
+ do \
+ { \
+ } while (0)
+
+#endif // _TARGET_X86_
+
+#if !defined(_TARGET_X86_) && !defined(FEATURE_STACK_FP_X87)
+#define NYI_FLAT_FP_X87(msg) notYetImplemented("NYI: " #msg, __FILE__, __LINE__)
+#define NYI_FLAT_FP_X87_NC(msg) notYetImplemented("NYI: " #msg, __FILE__, __LINE__)
+
+#else
+
+#define NYI_FLAT_FP_X87(msg) \
+ do \
+ { \
+ } while (0)
+#define NYI_FLAT_FP_X87_NC(msg) \
+ do \
+ { \
+ } while (0)
+
+#endif // !_TARGET_X86_ && !FEATURE_STACK_FP_X87
+
+#ifdef DEBUG
+#define NO_WAY(msg) (debugError(msg, __FILE__, __LINE__), noWay())
+// Used for fallback stress mode
+#define NO_WAY_NOASSERT(msg) noWay()
+#define BADCODE(msg) (debugError(msg, __FILE__, __LINE__), badCode())
+#define BADCODE3(msg, msg2, arg) badCode3(msg, msg2, arg, __FILE__, __LINE__)
+// Used for an assert that we want to convert into BADCODE to force minopts, or in minopts to force codegen.
+#define noway_assert(cond) \
+ do \
+ { \
+ if (!(cond)) \
+ { \
+ noWayAssertBodyConditional(#cond, __FILE__, __LINE__); \
+ } \
+ } while (0)
+#define unreached() noWayAssertBody("unreached", __FILE__, __LINE__)
+
+#else
+
+#define NO_WAY(msg) noWay()
+#define BADCODE(msg) badCode()
+#define BADCODE3(msg, msg2, arg) badCode()
+
+#ifdef FEATURE_TRACELOGGING
+#define NOWAY_ASSERT_BODY_ARGUMENTS __FILE__, __LINE__
+#else
+#define NOWAY_ASSERT_BODY_ARGUMENTS
+#endif
+
+#define noway_assert(cond) \
+ do \
+ { \
+ if (!(cond)) \
+ { \
+ noWayAssertBodyConditional(NOWAY_ASSERT_BODY_ARGUMENTS); \
+ } \
+ } while (0)
+#define unreached() noWayAssertBody()
+
+#endif
+
+// IMPL_LIMITATION is called when we encounter valid IL that is not
+// supported by our current implementation because of various
+// limitations (that could be removed in the future)
+#define IMPL_LIMITATION(msg) NO_WAY(msg)
+
+#if defined(_HOST_X86_)
+
+// While debugging in an Debugger, the "int 3" will cause the program to break
+// Outside, the exception handler will just filter out the "int 3".
+
+#define BreakIfDebuggerPresent() \
+ do \
+ { \
+ __try \
+ { \
+ __asm {int 3} \
+ } \
+ __except (EXCEPTION_EXECUTE_HANDLER) \
+ { \
+ } \
+ } while (0)
+
+#else
+#define BreakIfDebuggerPresent() \
+ do \
+ { \
+ if (IsDebuggerPresent()) \
+ DebugBreak(); \
+ } while (0)
+#endif
+
+#ifdef DEBUG
+DWORD getBreakOnBadCode();
+#endif
+
+// For narrowing numeric conversions, the following two methods ensure that the
+// source value fits in the destination type, using either "assert" or
+// "noway_assert" to validate the conversion. Obviously, each returns the source value as
+// the destination type.
+
+// (There is an argument that these should be macros, to let the preprocessor capture
+// a more useful file/line for the error message. But then we have to use comma expressions
+// so that these can be used in expressions, etc., which is ugly. So I propose we rely on
+// getting stack traces in other ways.)
+template <typename Dst, typename Src>
+inline Dst SafeCvtAssert(Src val)
+{
+ assert(FitsIn<Dst>(val));
+ return static_cast<Dst>(val);
+}
+
+template <typename Dst, typename Src>
+inline Dst SafeCvtNowayAssert(Src val)
+{
+ noway_assert(FitsIn<Dst>(val));
+ return static_cast<Dst>(val);
+}
+
+#endif
diff --git a/src/jit/flowgraph.cpp b/src/jit/flowgraph.cpp
new file mode 100644
index 0000000000..1c68bfd96a
--- /dev/null
+++ b/src/jit/flowgraph.cpp
@@ -0,0 +1,22276 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX FlowGraph XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+#include "allocacheck.h" // for alloca
+
+/*****************************************************************************/
+
+void Compiler::fgInit()
+{
+ impInit();
+
+ /* Initialization for fgWalkTreePre() and fgWalkTreePost() */
+
+ fgFirstBBScratch = nullptr;
+
+#ifdef DEBUG
+ fgPrintInlinedMethods = JitConfig.JitPrintInlinedMethods() == 1;
+#endif // DEBUG
+
+ /* We haven't yet computed the bbPreds lists */
+ fgComputePredsDone = false;
+
+ /* We haven't yet computed the bbCheapPreds lists */
+ fgCheapPredsValid = false;
+
+ /* We haven't yet computed the edge weight */
+ fgEdgeWeightsComputed = false;
+ fgHaveValidEdgeWeights = false;
+ fgSlopUsedInEdgeWeights = false;
+ fgRangeUsedInEdgeWeights = true;
+ fgNeedsUpdateFlowGraph = false;
+ fgCalledWeight = BB_ZERO_WEIGHT;
+
+ /* We haven't yet computed the dominator sets */
+ fgDomsComputed = false;
+
+#ifdef DEBUG
+ fgReachabilitySetsValid = false;
+#endif // DEBUG
+
+ /* We don't know yet which loops will always execute calls */
+ fgLoopCallMarked = false;
+
+ /* We haven't created GC Poll blocks yet. */
+ fgGCPollsCreated = false;
+
+ /* Initialize the basic block list */
+
+ fgFirstBB = nullptr;
+ fgLastBB = nullptr;
+ fgFirstColdBlock = nullptr;
+
+#if FEATURE_EH_FUNCLETS
+ fgFirstFuncletBB = nullptr;
+ fgFuncletsCreated = false;
+#endif // FEATURE_EH_FUNCLETS
+
+ fgBBcount = 0;
+
+#ifdef DEBUG
+ fgBBcountAtCodegen = 0;
+#endif // DEBUG
+
+ fgBBNumMax = 0;
+ fgEdgeCount = 0;
+ fgDomBBcount = 0;
+ fgBBVarSetsInited = false;
+ fgReturnCount = 0;
+
+ // Initialize BlockSet data.
+ fgCurBBEpoch = 0;
+ fgCurBBEpochSize = 0;
+ fgBBSetCountInSizeTUnits = 0;
+
+ genReturnBB = nullptr;
+
+ /* We haven't reached the global morphing phase */
+ fgGlobalMorph = false;
+ fgExpandInline = false;
+ fgModified = false;
+
+#ifdef DEBUG
+ fgSafeBasicBlockCreation = true;
+#endif // DEBUG
+
+ fgLocalVarLivenessDone = false;
+
+ /* Statement list is not threaded yet */
+
+ fgStmtListThreaded = false;
+
+ // Initialize the logic for adding code. This is used to insert code such
+ // as the code that raises an exception when an array range check fails.
+
+ fgAddCodeList = nullptr;
+ fgAddCodeModf = false;
+
+ for (int i = 0; i < SCK_COUNT; i++)
+ {
+ fgExcptnTargetCache[i] = nullptr;
+ }
+
+ /* Keep track of the max count of pointer arguments */
+
+ fgPtrArgCntCur = 0;
+ fgPtrArgCntMax = 0;
+
+ /* This global flag is set whenever we remove a statement */
+ fgStmtRemoved = false;
+
+ /* This global flag is set whenever we add a throw block for a RngChk */
+ fgRngChkThrowAdded = false; /* reset flag for fgIsCodeAdded() */
+
+ fgIncrCount = 0;
+
+ /* We will record a list of all BBJ_RETURN blocks here */
+ fgReturnBlocks = nullptr;
+
+ /* This is set by fgComputeReachability */
+ fgEnterBlks = BlockSetOps::UninitVal();
+
+#ifdef DEBUG
+ fgEnterBlksSetValid = false;
+#endif // DEBUG
+
+#if !FEATURE_EH_FUNCLETS
+ ehMaxHndNestingCount = 0;
+#endif // !FEATURE_EH_FUNCLETS
+
+ /* Init the fgBigOffsetMorphingTemps to be BAD_VAR_NUM. */
+ for (int i = 0; i < TYP_COUNT; i++)
+ {
+ fgBigOffsetMorphingTemps[i] = BAD_VAR_NUM;
+ }
+
+ fgNoStructPromotion = false;
+ fgNoStructParamPromotion = false;
+
+ optValnumCSE_phase = false; // referenced in fgMorphSmpOp()
+
+#ifdef DEBUG
+ fgNormalizeEHDone = false;
+#endif // DEBUG
+
+#ifdef DEBUG
+ if (!compIsForInlining())
+ {
+ if ((JitConfig.JitNoStructPromotion() & 1) == 1)
+ {
+ fgNoStructPromotion = true;
+ }
+ if ((JitConfig.JitNoStructPromotion() & 2) == 2)
+ {
+ fgNoStructParamPromotion = true;
+ }
+ }
+#endif // DEBUG
+
+ if (!compIsForInlining())
+ {
+ m_promotedStructDeathVars = nullptr;
+ }
+#ifdef FEATURE_SIMD
+ fgPreviousCandidateSIMDFieldAsgStmt = nullptr;
+#endif
+}
+
+bool Compiler::fgHaveProfileData()
+{
+ if (compIsForInlining() || compIsForImportOnly())
+ {
+ return false;
+ }
+
+ return (fgProfileBuffer != nullptr);
+}
+
+bool Compiler::fgGetProfileWeightForBasicBlock(IL_OFFSET offset, unsigned* weightWB)
+{
+ noway_assert(weightWB != nullptr);
+ unsigned weight = 0;
+
+#ifdef DEBUG
+ unsigned hashSeed = fgStressBBProf();
+ if (hashSeed != 0)
+ {
+ unsigned hash = (info.compMethodHash() * hashSeed) ^ (offset * 1027);
+
+ // We need to especially stress the procedure splitting codepath. Therefore
+ // one third the time we should return a weight of zero.
+ // Otherwise we should return some random weight (usually between 0 and 288).
+ // The below gives a weight of zero, 44% of the time
+
+ if (hash % 3 == 0)
+ {
+ weight = 0;
+ }
+ else if (hash % 11 == 0)
+ {
+ weight = (hash % 23) * (hash % 29) * (hash % 31);
+ }
+ else
+ {
+ weight = (hash % 17) * (hash % 19);
+ }
+
+ // The first block is never given a weight of zero
+ if ((offset == 0) && (weight == 0))
+ {
+ weight = 1 + (hash % 5);
+ }
+
+ *weightWB = weight;
+ return true;
+ }
+#endif // DEBUG
+
+ if (fgHaveProfileData() == false)
+ {
+ return false;
+ }
+
+ noway_assert(!compIsForInlining());
+ for (unsigned i = 0; i < fgProfileBufferCount; i++)
+ {
+ if (fgProfileBuffer[i].ILOffset == offset)
+ {
+ weight = fgProfileBuffer[i].ExecutionCount;
+
+ *weightWB = weight;
+ return true;
+ }
+ }
+
+ *weightWB = 0;
+ return true;
+}
+
+void Compiler::fgInstrumentMethod()
+{
+ noway_assert(!compIsForInlining());
+
+ // Count the number of basic blocks in the method
+
+ int countOfBlocks = 0;
+ BasicBlock* block;
+ for (block = fgFirstBB; block; block = block->bbNext)
+ {
+ if (!(block->bbFlags & BBF_IMPORTED) || (block->bbFlags & BBF_INTERNAL))
+ {
+ continue;
+ }
+ countOfBlocks++;
+ }
+
+ // Allocate the profile buffer
+
+ ICorJitInfo::ProfileBuffer* bbProfileBuffer;
+
+ HRESULT res = info.compCompHnd->allocBBProfileBuffer(countOfBlocks, &bbProfileBuffer);
+
+ ICorJitInfo::ProfileBuffer* bbProfileBufferStart = bbProfileBuffer;
+
+ GenTreePtr stmt;
+
+ if (!SUCCEEDED(res))
+ {
+ // The E_NOTIMPL status is returned when we are profiling a generic method from a different assembly
+ if (res == E_NOTIMPL)
+ {
+ // In such cases we still want to add the method entry callback node
+
+ GenTreeArgList* args = gtNewArgList(gtNewIconEmbMethHndNode(info.compMethodHnd));
+ GenTreePtr call = gtNewHelperCallNode(CORINFO_HELP_BBT_FCN_ENTER, TYP_VOID, 0, args);
+
+ stmt = gtNewStmt(call);
+ }
+ else
+ {
+ noway_assert(!"Error: failed to allocate bbProfileBuffer");
+ return;
+ }
+ }
+ else
+ {
+ // Assign a buffer entry for each basic block
+
+ for (block = fgFirstBB; block; block = block->bbNext)
+ {
+ if (!(block->bbFlags & BBF_IMPORTED) || (block->bbFlags & BBF_INTERNAL))
+ {
+ continue;
+ }
+
+ bbProfileBuffer->ILOffset = block->bbCodeOffs;
+
+ GenTreePtr addr;
+ GenTreePtr value;
+
+ value = gtNewOperNode(GT_IND, TYP_INT, gtNewIconEmbHndNode((void*)&bbProfileBuffer->ExecutionCount, nullptr,
+ GTF_ICON_BBC_PTR));
+ value = gtNewOperNode(GT_ADD, TYP_INT, value, gtNewIconNode(1));
+
+ addr = gtNewOperNode(GT_IND, TYP_INT, gtNewIconEmbHndNode((void*)&bbProfileBuffer->ExecutionCount, nullptr,
+ GTF_ICON_BBC_PTR));
+
+ addr = gtNewAssignNode(addr, value);
+
+ fgInsertStmtAtBeg(block, addr);
+
+ countOfBlocks--;
+ bbProfileBuffer++;
+ }
+ noway_assert(countOfBlocks == 0);
+
+ // Add the method entry callback node
+
+ GenTreeArgList* args = gtNewArgList(gtNewIconEmbMethHndNode(info.compMethodHnd));
+ GenTreePtr call = gtNewHelperCallNode(CORINFO_HELP_BBT_FCN_ENTER, TYP_VOID, 0, args);
+
+ GenTreePtr handle =
+ gtNewIconEmbHndNode((void*)&bbProfileBufferStart->ExecutionCount, nullptr, GTF_ICON_BBC_PTR);
+ GenTreePtr value = gtNewOperNode(GT_IND, TYP_INT, handle);
+ GenTreePtr relop = gtNewOperNode(GT_NE, TYP_INT, value, gtNewIconNode(0, TYP_INT));
+ relop->gtFlags |= GTF_RELOP_QMARK;
+ GenTreePtr colon = new (this, GT_COLON) GenTreeColon(TYP_VOID, gtNewNothingNode(), call);
+ GenTreePtr cond = gtNewQmarkNode(TYP_VOID, relop, colon);
+ stmt = gtNewStmt(cond);
+ }
+
+ fgEnsureFirstBBisScratch();
+
+ fgInsertStmtAtEnd(fgFirstBB, stmt);
+}
+
+/*****************************************************************************
+ *
+ * Create a basic block and append it to the current BB list.
+ */
+
+BasicBlock* Compiler::fgNewBasicBlock(BBjumpKinds jumpKind)
+{
+ // This method must not be called after the exception table has been
+ // constructed, because it doesn't not provide support for patching
+ // the exception table.
+
+ noway_assert(compHndBBtabCount == 0);
+
+ BasicBlock* block;
+
+ /* Allocate the block descriptor */
+
+ block = bbNewBasicBlock(jumpKind);
+ noway_assert(block->bbJumpKind == jumpKind);
+
+ /* Append the block to the end of the global basic block list */
+
+ if (fgFirstBB)
+ {
+ fgLastBB->setNext(block);
+ }
+ else
+ {
+ fgFirstBB = block;
+ block->bbPrev = nullptr;
+ }
+
+ fgLastBB = block;
+
+ return block;
+}
+
+/*****************************************************************************
+ *
+ * Ensures that fgFirstBB is a scratch BasicBlock that we have added.
+ * This can be used to add initialization code (without worrying
+ * about other blocks jumping to it).
+ *
+ * Callers have to be careful that they do not mess up the order of things
+ * added to fgEnsureFirstBBisScratch in a way as to change semantics.
+ */
+
+void Compiler::fgEnsureFirstBBisScratch()
+{
+ // Have we already allocated a scratch block?
+
+ if (fgFirstBBisScratch())
+ {
+ return;
+ }
+
+ assert(fgFirstBBScratch == nullptr);
+
+ BasicBlock* block = bbNewBasicBlock(BBJ_NONE);
+
+ if (fgFirstBB != nullptr)
+ {
+ // If we have profile data the new block will inherit fgFirstBlock's weight
+ if (fgFirstBB->bbFlags & BBF_PROF_WEIGHT)
+ {
+ block->inheritWeight(fgFirstBB);
+ }
+ fgInsertBBbefore(fgFirstBB, block);
+ }
+ else
+ {
+ noway_assert(fgLastBB == nullptr);
+ fgFirstBB = block;
+ fgLastBB = block;
+ }
+
+ noway_assert(fgLastBB != nullptr);
+
+ block->bbFlags |= (BBF_INTERNAL | BBF_IMPORTED);
+
+ fgFirstBBScratch = fgFirstBB;
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("New scratch BB%02u\n", block->bbNum);
+ }
+#endif
+}
+
+bool Compiler::fgFirstBBisScratch()
+{
+ if (fgFirstBBScratch != nullptr)
+ {
+ assert(fgFirstBBScratch == fgFirstBB);
+ assert(fgFirstBBScratch->bbFlags & BBF_INTERNAL);
+ assert(fgFirstBBScratch->countOfInEdges() == 1);
+
+ // Normally, the first scratch block is a fall-through block. However, if the block after it was an empty
+ // BBJ_ALWAYS block, it might get removed, and the code that removes it will make the first scratch block
+ // a BBJ_ALWAYS block.
+ assert((fgFirstBBScratch->bbJumpKind == BBJ_NONE) || (fgFirstBBScratch->bbJumpKind == BBJ_ALWAYS));
+
+ return true;
+ }
+ else
+ {
+ return false;
+ }
+}
+
+bool Compiler::fgBBisScratch(BasicBlock* block)
+{
+ return fgFirstBBisScratch() && (block == fgFirstBB);
+}
+
+#ifdef DEBUG
+// Check to see if block contains a statement but don't spend more than a certain
+// budget doing this per method compiled.
+// If the budget is exceeded, return 'answerOnBoundExceeded' as the answer.
+/* static */
+bool Compiler::fgBlockContainsStatementBounded(BasicBlock* block, GenTree* stmt, bool answerOnBoundExceeded /*= true*/)
+{
+ const __int64 maxLinks = 1000000000;
+
+ assert(stmt->gtOper == GT_STMT);
+
+ __int64* numTraversed = &JitTls::GetCompiler()->compNumStatementLinksTraversed;
+
+ if (*numTraversed > maxLinks)
+ {
+ return answerOnBoundExceeded;
+ }
+
+ GenTree* curr = block->firstStmt();
+ do
+ {
+ (*numTraversed)++;
+ if (curr == stmt)
+ {
+ break;
+ }
+ curr = curr->gtNext;
+ } while (curr);
+ return curr != nullptr;
+}
+#endif // DEBUG
+
+//------------------------------------------------------------------------
+// fgInsertStmtAtBeg: Insert the given tree or statement at the start of the given basic block.
+//
+// Arguments:
+// block - The block into which 'stmt' will be inserted.
+// stmt - The statement to be inserted.
+//
+// Return Value:
+// Returns the new (potentially) GT_STMT node.
+//
+// Notes:
+// If 'stmt' is not already a statement, a new statement is created from it.
+// We always insert phi statements at the beginning.
+// In other cases, if there are any phi assignments and/or an assignment of
+// the GT_CATCH_ARG, we insert after those.
+
+GenTreePtr Compiler::fgInsertStmtAtBeg(BasicBlock* block, GenTreePtr stmt)
+{
+ if (stmt->gtOper != GT_STMT)
+ {
+ stmt = gtNewStmt(stmt);
+ }
+
+ GenTreePtr list = block->firstStmt();
+
+ if (!stmt->IsPhiDefnStmt())
+ {
+ GenTreePtr insertBeforeStmt = block->FirstNonPhiDefOrCatchArgAsg();
+ if (insertBeforeStmt != nullptr)
+ {
+ return fgInsertStmtBefore(block, insertBeforeStmt, stmt);
+ }
+ else if (list != nullptr)
+ {
+ return fgInsertStmtAtEnd(block, stmt);
+ }
+ // Otherwise, we will simply insert at the beginning, below.
+ }
+
+ /* The new tree will now be the first one of the block */
+
+ block->bbTreeList = stmt;
+ stmt->gtNext = list;
+
+ /* Are there any statements in the block? */
+
+ if (list)
+ {
+ GenTreePtr last;
+
+ /* There is at least one statement already */
+
+ last = list->gtPrev;
+ noway_assert(last && last->gtNext == nullptr);
+
+ /* Insert the statement in front of the first one */
+
+ list->gtPrev = stmt;
+ stmt->gtPrev = last;
+ }
+ else
+ {
+ /* The block was completely empty */
+
+ stmt->gtPrev = stmt;
+ }
+
+ return stmt;
+}
+
+/*****************************************************************************
+ *
+ * Insert the given tree or statement at the end of the given basic block.
+ * Returns the (potentially) new GT_STMT node.
+ * If the block can be a conditional block, use fgInsertStmtNearEnd.
+ */
+
+GenTreeStmt* Compiler::fgInsertStmtAtEnd(BasicBlock* block, GenTreePtr node)
+{
+ GenTreePtr list = block->firstStmt();
+ GenTreeStmt* stmt;
+
+ if (node->gtOper != GT_STMT)
+ {
+ stmt = gtNewStmt(node);
+ }
+ else
+ {
+ stmt = node->AsStmt();
+ }
+
+ assert(stmt->gtNext == nullptr); // We don't set it, and it needs to be this after the insert
+
+ if (list)
+ {
+ GenTreePtr last;
+
+ /* There is at least one statement already */
+
+ last = list->gtPrev;
+ noway_assert(last && last->gtNext == nullptr);
+
+ /* Append the statement after the last one */
+
+ last->gtNext = stmt;
+ stmt->gtPrev = last;
+ list->gtPrev = stmt;
+ }
+ else
+ {
+ /* The block is completely empty */
+
+ block->bbTreeList = stmt;
+ stmt->gtPrev = stmt;
+ }
+
+ return stmt;
+}
+
+/*****************************************************************************
+ *
+ * Insert the given tree or statement at the end of the given basic block, but before
+ * the GT_JTRUE, if present.
+ * Returns the (potentially) new GT_STMT node.
+ */
+
+GenTreeStmt* Compiler::fgInsertStmtNearEnd(BasicBlock* block, GenTreePtr node)
+{
+ GenTreeStmt* stmt;
+
+ // This routine can only be used when in tree order.
+ assert(fgOrder == FGOrderTree);
+
+ if ((block->bbJumpKind == BBJ_COND) || (block->bbJumpKind == BBJ_SWITCH) || (block->bbJumpKind == BBJ_RETURN))
+ {
+ if (node->gtOper != GT_STMT)
+ {
+ stmt = gtNewStmt(node);
+ }
+ else
+ {
+ stmt = node->AsStmt();
+ }
+
+ GenTreeStmt* first = block->firstStmt();
+ noway_assert(first);
+ GenTreeStmt* last = block->lastStmt();
+ noway_assert(last && last->gtNext == nullptr);
+ GenTreePtr after = last->gtPrev;
+
+#if DEBUG
+ if (block->bbJumpKind == BBJ_COND)
+ {
+ noway_assert(last->gtStmtExpr->gtOper == GT_JTRUE);
+ }
+ else if (block->bbJumpKind == BBJ_RETURN)
+ {
+ noway_assert((last->gtStmtExpr->gtOper == GT_RETURN) || (last->gtStmtExpr->gtOper == GT_JMP) ||
+ // BBJ_RETURN blocks in functions returning void do not get a GT_RETURN node if they
+ // have a .tail prefix (even if canTailCall returns false for these calls)
+ // code:Compiler::impImportBlockCode (search for the RET: label)
+ // Ditto for real tail calls (all code after them has been removed)
+ ((last->gtStmtExpr->gtOper == GT_CALL) &&
+ ((info.compRetType == TYP_VOID) || last->gtStmtExpr->AsCall()->IsTailCall())));
+ }
+ else
+ {
+ noway_assert(block->bbJumpKind == BBJ_SWITCH);
+ noway_assert(last->gtStmtExpr->gtOper == GT_SWITCH);
+ }
+#endif // DEBUG
+
+ /* Append 'stmt' before 'last' */
+
+ stmt->gtNext = last;
+ last->gtPrev = stmt;
+
+ if (first == last)
+ {
+ /* There is only one stmt in the block */
+
+ block->bbTreeList = stmt;
+ stmt->gtPrev = last;
+ }
+ else
+ {
+ noway_assert(after && (after->gtNext == last));
+
+ /* Append 'stmt' after 'after' */
+
+ after->gtNext = stmt;
+ stmt->gtPrev = after;
+ }
+
+ return stmt;
+ }
+ else
+ {
+ return fgInsertStmtAtEnd(block, node);
+ }
+}
+
+/*****************************************************************************
+ *
+ * Insert the given statement "stmt" after GT_STMT node "insertionPoint".
+ * Returns the newly inserted GT_STMT node.
+ * Note that the gtPrev list of statement nodes is circular, but the gtNext list is not.
+ */
+
+GenTreePtr Compiler::fgInsertStmtAfter(BasicBlock* block, GenTreePtr insertionPoint, GenTreePtr stmt)
+{
+ assert(block->bbTreeList != nullptr);
+ noway_assert(insertionPoint->gtOper == GT_STMT);
+ noway_assert(stmt->gtOper == GT_STMT);
+ assert(fgBlockContainsStatementBounded(block, insertionPoint));
+ assert(!fgBlockContainsStatementBounded(block, stmt, false));
+
+ if (insertionPoint->gtNext == nullptr)
+ {
+ // Ok, we want to insert after the last statement of the block.
+ stmt->gtNext = nullptr;
+ stmt->gtPrev = insertionPoint;
+
+ insertionPoint->gtNext = stmt;
+
+ // Update the backward link of the first statement of the block
+ // to point to the new last statement.
+ assert(block->bbTreeList->gtPrev == insertionPoint);
+ block->bbTreeList->gtPrev = stmt;
+ }
+ else
+ {
+ stmt->gtNext = insertionPoint->gtNext;
+ stmt->gtPrev = insertionPoint;
+
+ insertionPoint->gtNext->gtPrev = stmt;
+ insertionPoint->gtNext = stmt;
+ }
+
+ return stmt;
+}
+
+// Insert the given tree or statement before GT_STMT node "insertionPoint".
+// Returns the newly inserted GT_STMT node.
+
+GenTreePtr Compiler::fgInsertStmtBefore(BasicBlock* block, GenTreePtr insertionPoint, GenTreePtr stmt)
+{
+ assert(block->bbTreeList != nullptr);
+ noway_assert(insertionPoint->gtOper == GT_STMT);
+ noway_assert(stmt->gtOper == GT_STMT);
+ assert(fgBlockContainsStatementBounded(block, insertionPoint));
+ assert(!fgBlockContainsStatementBounded(block, stmt, false));
+
+ if (insertionPoint == block->bbTreeList)
+ {
+ // We're inserting before the first statement in the block.
+ GenTreePtr list = block->bbTreeList;
+ GenTreePtr last = list->gtPrev;
+
+ stmt->gtNext = list;
+ stmt->gtPrev = last;
+
+ block->bbTreeList = stmt;
+ list->gtPrev = stmt;
+ }
+ else
+ {
+ stmt->gtNext = insertionPoint;
+ stmt->gtPrev = insertionPoint->gtPrev;
+
+ insertionPoint->gtPrev->gtNext = stmt;
+ insertionPoint->gtPrev = stmt;
+ }
+
+ return stmt;
+}
+
+/*****************************************************************************
+ *
+ * Insert the list of statements stmtList after the stmtAfter in block.
+ * Return the last statement stmtList.
+ */
+
+GenTreePtr Compiler::fgInsertStmtListAfter(BasicBlock* block, // the block where stmtAfter is in.
+ GenTreePtr stmtAfter, // the statement where stmtList should be inserted
+ // after.
+ GenTreePtr stmtList)
+{
+ // Currently we can handle when stmtAfter and stmtList are non-NULL. This makes everything easy.
+ noway_assert(stmtAfter && stmtAfter->gtOper == GT_STMT);
+ noway_assert(stmtList && stmtList->gtOper == GT_STMT);
+
+ GenTreePtr stmtLast = stmtList->gtPrev; // Last statement in a non-empty list, circular in the gtPrev list.
+ noway_assert(stmtLast);
+ noway_assert(stmtLast->gtNext == nullptr);
+
+ GenTreePtr stmtNext = stmtAfter->gtNext;
+
+ if (!stmtNext)
+ {
+ stmtAfter->gtNext = stmtList;
+ stmtList->gtPrev = stmtAfter;
+ block->bbTreeList->gtPrev = stmtLast;
+ goto _Done;
+ }
+
+ stmtAfter->gtNext = stmtList;
+ stmtList->gtPrev = stmtAfter;
+
+ stmtLast->gtNext = stmtNext;
+ stmtNext->gtPrev = stmtLast;
+
+_Done:
+
+ noway_assert(block->bbTreeList == nullptr || block->bbTreeList->gtPrev->gtNext == nullptr);
+
+ return stmtLast;
+}
+
+/*
+ Removes a block from the return block list
+*/
+void Compiler::fgRemoveReturnBlock(BasicBlock* block)
+{
+ if (fgReturnBlocks == nullptr)
+ {
+ return;
+ }
+
+ if (fgReturnBlocks->block == block)
+ {
+ // It's the 1st entry, assign new head of list.
+ fgReturnBlocks = fgReturnBlocks->next;
+ return;
+ }
+
+ for (BasicBlockList* retBlocks = fgReturnBlocks; retBlocks->next != nullptr; retBlocks = retBlocks->next)
+ {
+ if (retBlocks->next->block == block)
+ {
+ // Found it; splice it out.
+ retBlocks->next = retBlocks->next->next;
+ return;
+ }
+ }
+}
+
+//------------------------------------------------------------------------
+// fgGetPredForBlock: Find and return the predecessor edge corresponding to a given predecessor block.
+//
+// Arguments:
+// block -- The block with the predecessor list to operate on.
+// blockPred -- The predecessor block to find in the predecessor list.
+//
+// Return Value:
+// The flowList edge corresponding to "blockPred". If "blockPred" is not in the predecessor list of "block",
+// then returns nullptr.
+//
+// Assumptions:
+// -- This only works on the full predecessor lists, not the cheap preds lists.
+
+flowList* Compiler::fgGetPredForBlock(BasicBlock* block, BasicBlock* blockPred)
+{
+ noway_assert(block);
+ noway_assert(blockPred);
+ assert(!fgCheapPredsValid);
+
+ flowList* pred;
+
+ for (pred = block->bbPreds; pred != nullptr; pred = pred->flNext)
+ {
+ if (blockPred == pred->flBlock)
+ {
+ return pred;
+ }
+ }
+
+ return nullptr;
+}
+
+//------------------------------------------------------------------------
+// fgGetPredForBlock: Find and return the predecessor edge corresponding to a given predecessor block.
+// Also returns the address of the pointer that points to this edge, to make it possible to remove this edge from the
+// predecessor list without doing another linear search over the edge list.
+//
+// Arguments:
+// block -- The block with the predecessor list to operate on.
+// blockPred -- The predecessor block to find in the predecessor list.
+// ptrToPred -- Out parameter: set to the address of the pointer that points to the returned predecessor edge.
+//
+// Return Value:
+// The flowList edge corresponding to "blockPred". If "blockPred" is not in the predecessor list of "block",
+// then returns nullptr.
+//
+// Assumptions:
+// -- This only works on the full predecessor lists, not the cheap preds lists.
+
+flowList* Compiler::fgGetPredForBlock(BasicBlock* block, BasicBlock* blockPred, flowList*** ptrToPred)
+{
+ assert(block);
+ assert(blockPred);
+ assert(ptrToPred);
+ assert(!fgCheapPredsValid);
+
+ flowList** predPrevAddr;
+ flowList* pred;
+
+ for (predPrevAddr = &block->bbPreds, pred = *predPrevAddr; pred != nullptr;
+ predPrevAddr = &pred->flNext, pred = *predPrevAddr)
+ {
+ if (blockPred == pred->flBlock)
+ {
+ *ptrToPred = predPrevAddr;
+ return pred;
+ }
+ }
+
+ *ptrToPred = nullptr;
+ return nullptr;
+}
+
+//------------------------------------------------------------------------
+// fgSpliceOutPred: Removes a predecessor edge for a block from the predecessor list.
+//
+// Arguments:
+// block -- The block with the predecessor list to operate on.
+// blockPred -- The predecessor block to remove from the predecessor list. It must be a predecessor of "block".
+//
+// Return Value:
+// The flowList edge that was removed.
+//
+// Assumptions:
+// -- "blockPred" must be a predecessor block of "block".
+// -- This simply splices out the flowList object. It doesn't update block ref counts, handle duplicate counts, etc.
+// For that, use fgRemoveRefPred() or fgRemoveAllRefPred().
+// -- This only works on the full predecessor lists, not the cheap preds lists.
+//
+// Notes:
+// -- This must walk the predecessor list to find the block in question. If the predecessor edge
+// is found using fgGetPredForBlock(), consider using the version that hands back the predecessor pointer
+// address instead, to avoid this search.
+// -- Marks fgModified = true, since the flow graph has changed.
+
+flowList* Compiler::fgSpliceOutPred(BasicBlock* block, BasicBlock* blockPred)
+{
+ assert(!fgCheapPredsValid);
+ noway_assert(block->bbPreds);
+
+ flowList* oldEdge = nullptr;
+
+ // Is this the first block in the pred list?
+ if (blockPred == block->bbPreds->flBlock)
+ {
+ oldEdge = block->bbPreds;
+ block->bbPreds = block->bbPreds->flNext;
+ }
+ else
+ {
+ flowList* pred;
+ for (pred = block->bbPreds; (pred->flNext != nullptr) && (blockPred != pred->flNext->flBlock);
+ pred = pred->flNext)
+ {
+ // empty
+ }
+ oldEdge = pred->flNext;
+ if (oldEdge == nullptr)
+ {
+ noway_assert(!"Should always find the blockPred");
+ }
+ pred->flNext = pred->flNext->flNext;
+ }
+
+ // Any changes to the flow graph invalidate the dominator sets.
+ fgModified = true;
+
+ return oldEdge;
+}
+
+//------------------------------------------------------------------------
+// fgAddRefPred: Increment block->bbRefs by one and add "blockPred" to the predecessor list of "block".
+//
+// Arguments:
+// block -- A block to operate on.
+// blockPred -- The predecessor block to add to the predecessor list.
+// oldEdge -- Optional (default: nullptr). If non-nullptr, and a new edge is created (and the dup count
+// of an existing edge is not just incremented), the edge weights are copied from this edge.
+// initializingPreds -- Optional (default: false). Only set to "true" when the initial preds computation is
+// happening.
+//
+// Return Value:
+// The flow edge representing the predecessor.
+//
+// Assumptions:
+// -- This only works on the full predecessor lists, not the cheap preds lists.
+//
+// Notes:
+// -- block->bbRefs is incremented by one to account for the reduction in incoming edges.
+// -- block->bbRefs is adjusted even if preds haven't been computed. If preds haven't been computed,
+// the preds themselves aren't touched.
+// -- fgModified is set if a new flow edge is created (but not if an existing flow edge dup count is incremented),
+// indicating that the flow graph shape has changed.
+
+flowList* Compiler::fgAddRefPred(BasicBlock* block,
+ BasicBlock* blockPred,
+ flowList* oldEdge /* = nullptr */,
+ bool initializingPreds /* = false */)
+{
+ assert(block != nullptr);
+ assert(blockPred != nullptr);
+
+ block->bbRefs++;
+
+ if (!fgComputePredsDone && !initializingPreds)
+ {
+ // Why is someone trying to update the preds list when the preds haven't been created?
+ // Ignore them! This can happen when fgMorph is called before the preds list is created.
+ return nullptr;
+ }
+
+ assert(!fgCheapPredsValid);
+
+ flowList* flow = fgGetPredForBlock(block, blockPred);
+
+ if (flow)
+ {
+ noway_assert(flow->flDupCount > 0);
+ flow->flDupCount++;
+ }
+ else
+ {
+ flow = new (this, CMK_FlowList) flowList();
+
+#if MEASURE_BLOCK_SIZE
+ genFlowNodeCnt += 1;
+ genFlowNodeSize += sizeof(flowList);
+#endif // MEASURE_BLOCK_SIZE
+
+ // Any changes to the flow graph invalidate the dominator sets.
+ fgModified = true;
+
+ // Keep the predecessor list in lowest to highest bbNum order
+ // This allows us to discover the loops in optFindNaturalLoops
+ // from innermost to outermost.
+
+ // TODO-Throughput: This search is quadratic if you have many jumps
+ // to the same target. We need to either not bother sorting for
+ // debuggable code, or sort in optFindNaturalLoops, or better, make
+ // the code in optFindNaturalLoops not depend on order.
+
+ flowList** listp = &block->bbPreds;
+ while (*listp && ((*listp)->flBlock->bbNum < blockPred->bbNum))
+ {
+ listp = &(*listp)->flNext;
+ }
+
+ flow->flNext = *listp;
+ *listp = flow;
+
+ flow->flBlock = blockPred;
+ flow->flDupCount = 1;
+
+ if (fgHaveValidEdgeWeights)
+ {
+ // We are creating an edge from blockPred to block
+ // and we have already computed the edge weights, so
+ // we will try to setup this new edge with valid edge weights.
+ //
+ if (oldEdge != nullptr)
+ {
+ // If our caller has given us the old edge weights
+ // then we will use them.
+ //
+ flow->flEdgeWeightMin = oldEdge->flEdgeWeightMin;
+ flow->flEdgeWeightMax = oldEdge->flEdgeWeightMax;
+ }
+ else
+ {
+ // Set the max edge weight to be the minimum of block's or blockPred's weight
+ //
+ flow->flEdgeWeightMax = min(block->bbWeight, blockPred->bbWeight);
+
+ // If we are inserting a conditional block the minimum weight is zero,
+ // otherwise it is the same as the edge's max weight.
+ if (blockPred->NumSucc() > 1)
+ {
+ flow->flEdgeWeightMin = BB_ZERO_WEIGHT;
+ }
+ else
+ {
+ flow->flEdgeWeightMin = flow->flEdgeWeightMax;
+ }
+ }
+ }
+ else
+ {
+ flow->flEdgeWeightMin = BB_ZERO_WEIGHT;
+ flow->flEdgeWeightMax = BB_MAX_WEIGHT;
+ }
+ }
+ return flow;
+}
+
+//------------------------------------------------------------------------
+// fgRemoveRefPred: Decrements the reference count of a predecessor edge from "blockPred" to "block",
+// removing the edge if it is no longer necessary.
+//
+// Arguments:
+// block -- A block to operate on.
+// blockPred -- The predecessor block to remove from the predecessor list. It must be a predecessor of "block".
+//
+// Return Value:
+// If the flow edge was removed (the predecessor has a "dup count" of 1),
+// returns the flow graph edge that was removed. This means "blockPred" is no longer a predecessor of "block".
+// Otherwise, returns nullptr. This means that "blockPred" is still a predecessor of "block" (because "blockPred"
+// is a switch with multiple cases jumping to "block", or a BBJ_COND with both conditional and fall-through
+// paths leading to "block").
+//
+// Assumptions:
+// -- "blockPred" must be a predecessor block of "block".
+// -- This only works on the full predecessor lists, not the cheap preds lists.
+//
+// Notes:
+// -- block->bbRefs is decremented by one to account for the reduction in incoming edges.
+// -- block->bbRefs is adjusted even if preds haven't been computed. If preds haven't been computed,
+// the preds themselves aren't touched.
+// -- fgModified is set if a flow edge is removed (but not if an existing flow edge dup count is decremented),
+// indicating that the flow graph shape has changed.
+
+flowList* Compiler::fgRemoveRefPred(BasicBlock* block, BasicBlock* blockPred)
+{
+ noway_assert(block != nullptr);
+ noway_assert(blockPred != nullptr);
+
+ noway_assert(block->countOfInEdges() > 0);
+ block->bbRefs--;
+
+ // Do nothing if we haven't calculated the predecessor list yet.
+ // Yes, this does happen.
+ // For example the predecessor lists haven't been created yet when we do fgMorph.
+ // But fgMorph calls fgFoldConditional, which in turn calls fgRemoveRefPred.
+ if (!fgComputePredsDone)
+ {
+ return nullptr;
+ }
+
+ assert(!fgCheapPredsValid);
+
+ flowList** ptrToPred;
+ flowList* pred = fgGetPredForBlock(block, blockPred, &ptrToPred);
+ noway_assert(pred);
+ noway_assert(pred->flDupCount > 0);
+
+ pred->flDupCount--;
+
+ if (pred->flDupCount == 0)
+ {
+ // Splice out the predecessor edge since it's no longer necessary.
+ *ptrToPred = pred->flNext;
+
+ // Any changes to the flow graph invalidate the dominator sets.
+ fgModified = true;
+
+ return pred;
+ }
+ else
+ {
+ return nullptr;
+ }
+}
+
+//------------------------------------------------------------------------
+// fgRemoveAllRefPreds: Removes a predecessor edge from one block to another, no matter what the "dup count" is.
+//
+// Arguments:
+// block -- A block to operate on.
+// blockPred -- The predecessor block to remove from the predecessor list. It must be a predecessor of "block".
+//
+// Return Value:
+// Returns the flow graph edge that was removed. The dup count on the edge is no longer valid.
+//
+// Assumptions:
+// -- "blockPred" must be a predecessor block of "block".
+// -- This only works on the full predecessor lists, not the cheap preds lists.
+//
+// Notes:
+// block->bbRefs is decremented to account for the reduction in incoming edges.
+
+flowList* Compiler::fgRemoveAllRefPreds(BasicBlock* block, BasicBlock* blockPred)
+{
+ assert(block != nullptr);
+ assert(blockPred != nullptr);
+ assert(fgComputePredsDone);
+ assert(!fgCheapPredsValid);
+ assert(block->countOfInEdges() > 0);
+
+ flowList** ptrToPred;
+ flowList* pred = fgGetPredForBlock(block, blockPred, &ptrToPred);
+ assert(pred != nullptr);
+ assert(pred->flDupCount > 0);
+
+ assert(block->bbRefs >= pred->flDupCount);
+ block->bbRefs -= pred->flDupCount;
+
+ // Now splice out the predecessor edge.
+ *ptrToPred = pred->flNext;
+
+ // Any changes to the flow graph invalidate the dominator sets.
+ fgModified = true;
+
+ return pred;
+}
+
+//------------------------------------------------------------------------
+// fgRemoveAllRefPreds: Remove a predecessor edge, given the address of a pointer to it in the
+// predecessor list, no matter what the "dup count" is.
+//
+// Arguments:
+// block -- A block with the predecessor list to operate on.
+// ptrToPred -- The address of a pointer to the predecessor to remove.
+//
+// Return Value:
+// The removed predecessor edge. The dup count on the edge is no longer valid.
+//
+// Assumptions:
+// -- The predecessor edge must be in the predecessor list for "block".
+// -- This only works on the full predecessor lists, not the cheap preds lists.
+//
+// Notes:
+// block->bbRefs is decremented by the dup count of the predecessor edge, to account for the reduction in incoming
+// edges.
+
+flowList* Compiler::fgRemoveAllRefPreds(BasicBlock* block, flowList** ptrToPred)
+{
+ assert(block != nullptr);
+ assert(ptrToPred != nullptr);
+ assert(fgComputePredsDone);
+ assert(!fgCheapPredsValid);
+ assert(block->countOfInEdges() > 0);
+
+ flowList* pred = *ptrToPred;
+ assert(pred != nullptr);
+ assert(pred->flDupCount > 0);
+
+ assert(block->bbRefs >= pred->flDupCount);
+ block->bbRefs -= pred->flDupCount;
+
+ // Now splice out the predecessor edge.
+ *ptrToPred = pred->flNext;
+
+ // Any changes to the flow graph invalidate the dominator sets.
+ fgModified = true;
+
+ return pred;
+}
+
+/*
+ Removes all the appearances of block as predecessor of others
+*/
+
+void Compiler::fgRemoveBlockAsPred(BasicBlock* block)
+{
+ assert(!fgCheapPredsValid);
+
+ PREFIX_ASSUME(block != nullptr);
+
+ BasicBlock* bNext;
+
+ switch (block->bbJumpKind)
+ {
+ case BBJ_CALLFINALLY:
+ if (!(block->bbFlags & BBF_RETLESS_CALL))
+ {
+ assert(block->isBBCallAlwaysPair());
+
+ /* The block after the BBJ_CALLFINALLY block is not reachable */
+ bNext = block->bbNext;
+
+ /* bNext is an unreachable BBJ_ALWAYS block */
+ noway_assert(bNext->bbJumpKind == BBJ_ALWAYS);
+
+ while (bNext->countOfInEdges() > 0)
+ {
+ fgRemoveRefPred(bNext, bNext->bbPreds->flBlock);
+ }
+ }
+
+ __fallthrough;
+
+ case BBJ_COND:
+ case BBJ_ALWAYS:
+ case BBJ_EHCATCHRET:
+
+ /* Update the predecessor list for 'block->bbJumpDest' and 'block->bbNext' */
+ fgRemoveRefPred(block->bbJumpDest, block);
+
+ if (block->bbJumpKind != BBJ_COND)
+ {
+ break;
+ }
+
+ /* If BBJ_COND fall through */
+ __fallthrough;
+
+ case BBJ_NONE:
+
+ /* Update the predecessor list for 'block->bbNext' */
+ fgRemoveRefPred(block->bbNext, block);
+ break;
+
+ case BBJ_EHFILTERRET:
+
+ block->bbJumpDest->bbRefs++; // To compensate the bbRefs-- inside fgRemoveRefPred
+ fgRemoveRefPred(block->bbJumpDest, block);
+ break;
+
+ case BBJ_EHFINALLYRET:
+ {
+ /* Remove block as the predecessor of the bbNext of all
+ BBJ_CALLFINALLY blocks calling this finally. No need
+ to look for BBJ_CALLFINALLY for fault handlers. */
+
+ unsigned hndIndex = block->getHndIndex();
+ EHblkDsc* ehDsc = ehGetDsc(hndIndex);
+
+ if (ehDsc->HasFinallyHandler())
+ {
+ BasicBlock* begBlk;
+ BasicBlock* endBlk;
+ ehGetCallFinallyBlockRange(hndIndex, &begBlk, &endBlk);
+
+ BasicBlock* finBeg = ehDsc->ebdHndBeg;
+
+ for (BasicBlock* bcall = begBlk; bcall != endBlk; bcall = bcall->bbNext)
+ {
+ if ((bcall->bbFlags & BBF_REMOVED) || bcall->bbJumpKind != BBJ_CALLFINALLY ||
+ bcall->bbJumpDest != finBeg)
+ {
+ continue;
+ }
+
+ assert(bcall->isBBCallAlwaysPair());
+ fgRemoveRefPred(bcall->bbNext, block);
+ }
+ }
+ }
+ break;
+
+ case BBJ_THROW:
+ case BBJ_RETURN:
+ break;
+
+ case BBJ_SWITCH:
+ {
+ unsigned jumpCnt = block->bbJumpSwt->bbsCount;
+ BasicBlock** jumpTab = block->bbJumpSwt->bbsDstTab;
+
+ do
+ {
+ fgRemoveRefPred(*jumpTab, block);
+ } while (++jumpTab, --jumpCnt);
+
+ break;
+ }
+
+ default:
+ noway_assert(!"Block doesn't have a valid bbJumpKind!!!!");
+ break;
+ }
+}
+
+/*****************************************************************************
+ * fgChangeSwitchBlock:
+ *
+ * We have a BBJ_SWITCH jump at 'oldSwitchBlock' and we want to move this
+ * switch jump over to 'newSwitchBlock'. All of the blocks that are jumped
+ * to from jumpTab[] need to have their predecessor lists updated by removing
+ * the 'oldSwitchBlock' and adding 'newSwitchBlock'.
+ */
+
+void Compiler::fgChangeSwitchBlock(BasicBlock* oldSwitchBlock, BasicBlock* newSwitchBlock)
+{
+ noway_assert(oldSwitchBlock != nullptr);
+ noway_assert(newSwitchBlock != nullptr);
+ noway_assert(oldSwitchBlock->bbJumpKind == BBJ_SWITCH);
+
+ unsigned jumpCnt = oldSwitchBlock->bbJumpSwt->bbsCount;
+ BasicBlock** jumpTab = oldSwitchBlock->bbJumpSwt->bbsDstTab;
+
+ unsigned i;
+
+ // Walk the switch's jump table, updating the predecessor for each branch.
+ for (i = 0; i < jumpCnt; i++)
+ {
+ BasicBlock* bJump = jumpTab[i];
+ noway_assert(bJump != nullptr);
+
+ // Note that if there are duplicate branch targets in the switch jump table,
+ // fgRemoveRefPred()/fgAddRefPred() will do the right thing: the second and
+ // subsequent duplicates will simply subtract from and add to the duplicate
+ // count (respectively).
+
+ //
+ // Remove the old edge [oldSwitchBlock => bJump]
+ //
+ fgRemoveRefPred(bJump, oldSwitchBlock);
+
+ //
+ // Create the new edge [newSwitchBlock => bJump]
+ //
+ fgAddRefPred(bJump, newSwitchBlock);
+ }
+
+ if (m_switchDescMap != nullptr)
+ {
+ SwitchUniqueSuccSet uniqueSuccSet;
+
+ // If already computed and cached the unique descriptors for the old block, let's
+ // update those for the new block.
+ if (m_switchDescMap->Lookup(oldSwitchBlock, &uniqueSuccSet))
+ {
+ m_switchDescMap->Set(newSwitchBlock, uniqueSuccSet);
+ }
+ else
+ {
+ fgInvalidateSwitchDescMapEntry(newSwitchBlock);
+ }
+ fgInvalidateSwitchDescMapEntry(oldSwitchBlock);
+ }
+}
+
+/*****************************************************************************
+ * fgReplaceSwitchJumpTarget:
+ *
+ * We have a BBJ_SWITCH at 'blockSwitch' and we want to replace all entries
+ * in the jumpTab[] such that so that jumps that previously went to
+ * 'oldTarget' now go to 'newTarget'.
+ * We also must update the predecessor lists for 'oldTarget' and 'newPred'.
+ */
+
+void Compiler::fgReplaceSwitchJumpTarget(BasicBlock* blockSwitch, BasicBlock* newTarget, BasicBlock* oldTarget)
+{
+ noway_assert(blockSwitch != nullptr);
+ noway_assert(newTarget != nullptr);
+ noway_assert(oldTarget != nullptr);
+ noway_assert(blockSwitch->bbJumpKind == BBJ_SWITCH);
+
+ // For the jump targets values that match oldTarget of our BBJ_SWITCH
+ // replace predecessor 'blockSwitch' with 'newTarget'
+ //
+
+ unsigned jumpCnt = blockSwitch->bbJumpSwt->bbsCount;
+ BasicBlock** jumpTab = blockSwitch->bbJumpSwt->bbsDstTab;
+
+ unsigned i = 0;
+
+ // Walk the switch's jump table looking for blocks to update the preds for
+ while (i < jumpCnt)
+ {
+ if (jumpTab[i] == oldTarget) // We will update when jumpTab[i] matches
+ {
+ // Remove the old edge [oldTarget from blockSwitch]
+ //
+ fgRemoveAllRefPreds(oldTarget, blockSwitch);
+
+ //
+ // Change the jumpTab entry to branch to the new location
+ //
+ jumpTab[i] = newTarget;
+
+ //
+ // Create the new edge [newTarget from blockSwitch]
+ //
+ flowList* newEdge = fgAddRefPred(newTarget, blockSwitch);
+
+ // Now set the correct value of newEdge->flDupCount
+ // and replace any other jumps in jumpTab[] that go to oldTarget.
+ //
+ i++;
+ while (i < jumpCnt)
+ {
+ if (jumpTab[i] == oldTarget)
+ {
+ //
+ // We also must update this entry in the jumpTab
+ //
+ jumpTab[i] = newTarget;
+ newTarget->bbRefs++;
+
+ //
+ // Increment the flDupCount
+ //
+ newEdge->flDupCount++;
+ }
+ i++; // Check the next entry in jumpTab[]
+ }
+
+ // Maintain, if necessary, the set of unique targets of "block."
+ UpdateSwitchTableTarget(blockSwitch, oldTarget, newTarget);
+
+ // Make sure the new target has the proper bits set for being a branch target.
+ newTarget->bbFlags |= BBF_HAS_LABEL | BBF_JMP_TARGET;
+
+ return; // We have replaced the jumps to oldTarget with newTarget
+ }
+ i++; // Check the next entry in jumpTab[] for a match
+ }
+ noway_assert(!"Did not find oldTarget in jumpTab[]");
+}
+
+//------------------------------------------------------------------------
+// Compiler::fgReplaceJumpTarget: For a given block, replace the target 'oldTarget' with 'newTarget'.
+//
+// Arguments:
+// block - the block in which a jump target will be replaced.
+// newTarget - the new branch target of the block.
+// oldTarget - the old branch target of the block.
+//
+// Notes:
+// 1. Only branches are changed: BBJ_ALWAYS, the non-fallthrough path of BBJ_COND, BBJ_SWITCH, etc.
+// We ignore other block types.
+// 2. Only the first target found is updated. If there are multiple ways for a block
+// to reach 'oldTarget' (e.g., multiple arms of a switch), only the first one found is changed.
+// 3. The predecessor lists are not changed.
+// 4. The switch table "unique successor" cache is invalidated.
+//
+// This function is most useful early, before the full predecessor lists have been computed.
+//
+void Compiler::fgReplaceJumpTarget(BasicBlock* block, BasicBlock* newTarget, BasicBlock* oldTarget)
+{
+ assert(block != nullptr);
+
+ switch (block->bbJumpKind)
+ {
+ case BBJ_CALLFINALLY:
+ case BBJ_COND:
+ case BBJ_ALWAYS:
+ case BBJ_EHCATCHRET:
+ case BBJ_EHFILTERRET:
+ case BBJ_LEAVE: // This function will be called before import, so we still have BBJ_LEAVE
+
+ if (block->bbJumpDest == oldTarget)
+ {
+ block->bbJumpDest = newTarget;
+ }
+ break;
+
+ case BBJ_NONE:
+ case BBJ_EHFINALLYRET:
+ case BBJ_THROW:
+ case BBJ_RETURN:
+ break;
+
+ case BBJ_SWITCH:
+ unsigned jumpCnt;
+ jumpCnt = block->bbJumpSwt->bbsCount;
+ BasicBlock** jumpTab;
+ jumpTab = block->bbJumpSwt->bbsDstTab;
+
+ for (unsigned i = 0; i < jumpCnt; i++)
+ {
+ if (jumpTab[i] == oldTarget)
+ {
+ jumpTab[i] = newTarget;
+ break;
+ }
+ }
+ break;
+
+ default:
+ assert(!"Block doesn't have a valid bbJumpKind!!!!");
+ unreached();
+ break;
+ }
+}
+
+/*****************************************************************************
+ * Updates the predecessor list for 'block' by replacing 'oldPred' with 'newPred'.
+ * Note that a block can only appear once in the preds list (for normal preds, not
+ * cheap preds): if a predecessor has multiple ways to get to this block, then
+ * flDupCount will be >1, but the block will still appear exactly once. Thus, this
+ * function assumes that all branches from the predecessor (practically, that all
+ * switch cases that target this block) are changed to branch from the new predecessor,
+ * with the same dup count.
+ *
+ * Note that the block bbRefs is not changed, since 'block' has the same number of
+ * references as before, just from a different predecessor block.
+ */
+
+void Compiler::fgReplacePred(BasicBlock* block, BasicBlock* oldPred, BasicBlock* newPred)
+{
+ noway_assert(block != nullptr);
+ noway_assert(oldPred != nullptr);
+ noway_assert(newPred != nullptr);
+ assert(!fgCheapPredsValid);
+
+ flowList* pred;
+
+ for (pred = block->bbPreds; pred != nullptr; pred = pred->flNext)
+ {
+ if (oldPred == pred->flBlock)
+ {
+ pred->flBlock = newPred;
+ break;
+ }
+ }
+}
+
+/*****************************************************************************
+ *
+ * Returns true if block b1 dominates block b2.
+ */
+
+bool Compiler::fgDominate(BasicBlock* b1, BasicBlock* b2)
+{
+ noway_assert(fgDomsComputed);
+ assert(!fgCheapPredsValid);
+
+ //
+ // If the fgModified flag is false then we made some modifications to
+ // the flow graph, like adding a new block or changing a conditional branch
+ // into an unconditional branch.
+ //
+ // We can continue to use the dominator and reachable information to
+ // unmark loops as long as we haven't renumbered the blocks or we aren't
+ // asking for information about a new block
+ //
+
+ if (b2->bbNum > fgDomBBcount)
+ {
+ if (b1 == b2)
+ {
+ return true;
+ }
+
+ for (flowList* pred = b2->bbPreds; pred != nullptr; pred = pred->flNext)
+ {
+ if (!fgDominate(b1, pred->flBlock))
+ {
+ return false;
+ }
+ }
+
+ return b2->bbPreds != nullptr;
+ }
+
+ if (b1->bbNum > fgDomBBcount)
+ {
+ // if b1 is a loop preheader and Succ is its only successor, then all predecessors of
+ // Succ either are b1 itself or are dominated by Succ. Under these conditions, b1
+ // dominates b2 if and only if Succ dominates b2 (or if b2 == b1, but we already tested
+ // for this case)
+ if (b1->bbFlags & BBF_LOOP_PREHEADER)
+ {
+ noway_assert(b1->bbFlags & BBF_INTERNAL);
+ noway_assert(b1->bbJumpKind == BBJ_NONE);
+ return fgDominate(b1->bbNext, b2);
+ }
+
+ // unknown dominators; err on the safe side and return false
+ return false;
+ }
+
+ /* Check if b1 dominates b2 */
+ unsigned numA = b1->bbNum;
+ noway_assert(numA <= fgDomBBcount);
+ unsigned numB = b2->bbNum;
+ noway_assert(numB <= fgDomBBcount);
+
+ // What we want to ask here is basically if A is in the middle of the path from B to the root (the entry node)
+ // in the dominator tree. Turns out that can be translated as:
+ //
+ // A dom B <-> preorder(A) <= preorder(B) && postorder(A) >= postorder(B)
+ //
+ // where the equality holds when you ask if A dominates itself.
+ bool treeDom =
+ fgDomTreePreOrder[numA] <= fgDomTreePreOrder[numB] && fgDomTreePostOrder[numA] >= fgDomTreePostOrder[numB];
+
+ return treeDom;
+}
+
+/*****************************************************************************
+ *
+ * Returns true if block b1 can reach block b2.
+ */
+
+bool Compiler::fgReachable(BasicBlock* b1, BasicBlock* b2)
+{
+ noway_assert(fgDomsComputed);
+ assert(!fgCheapPredsValid);
+
+ //
+ // If the fgModified flag is false then we made some modifications to
+ // the flow graph, like adding a new block or changing a conditional branch
+ // into an unconditional branch.
+ //
+ // We can continue to use the dominator and reachable information to
+ // unmark loops as long as we haven't renumbered the blocks or we aren't
+ // asking for information about a new block
+ //
+
+ if (b2->bbNum > fgDomBBcount)
+ {
+ if (b1 == b2)
+ {
+ return true;
+ }
+
+ for (flowList* pred = b2->bbPreds; pred != nullptr; pred = pred->flNext)
+ {
+ if (fgReachable(b1, pred->flBlock))
+ {
+ return true;
+ }
+ }
+
+ return false;
+ }
+
+ if (b1->bbNum > fgDomBBcount)
+ {
+ noway_assert(b1->bbJumpKind == BBJ_NONE || b1->bbJumpKind == BBJ_ALWAYS || b1->bbJumpKind == BBJ_COND);
+
+ if (b1->bbFallsThrough() && fgReachable(b1->bbNext, b2))
+ {
+ return true;
+ }
+
+ if (b1->bbJumpKind == BBJ_ALWAYS || b1->bbJumpKind == BBJ_COND)
+ {
+ return fgReachable(b1->bbJumpDest, b2);
+ }
+
+ return false;
+ }
+
+ /* Check if b1 can reach b2 */
+ assert(fgReachabilitySetsValid);
+ assert(BasicBlockBitSetTraits::GetSize(this) == fgDomBBcount + 1);
+ return BlockSetOps::IsMember(this, b2->bbReach, b1->bbNum);
+}
+
+/*****************************************************************************
+ * Update changed flow graph information.
+ *
+ * If the flow graph has changed, we need to recompute various information if we want to use
+ * it again.
+ */
+
+void Compiler::fgUpdateChangedFlowGraph()
+{
+ // We need to clear this so we don't hit an assert calling fgRenumberBlocks().
+ fgDomsComputed = false;
+
+ JITDUMP("\nRenumbering the basic blocks for fgUpdateChangeFlowGraph\n");
+ fgRenumberBlocks();
+
+ fgComputePreds();
+ fgComputeEnterBlocksSet();
+ fgComputeReachabilitySets();
+ fgComputeDoms();
+}
+
+/*****************************************************************************
+ * Compute the bbReach sets.
+ *
+ * This can be called to recompute the bbReach sets after the flow graph changes, such as when the
+ * number of BasicBlocks change (and thus, the BlockSet epoch changes).
+ *
+ * Finally, this also sets the BBF_GC_SAFE_POINT flag on blocks.
+ *
+ * Assumes the predecessor lists are correct.
+ *
+ * TODO-Throughput: This algorithm consumes O(n^2) because we're using dense bitsets to
+ * represent reachability. While this yields O(1) time queries, it bloats the memory usage
+ * for large code. We can do better if we try to approach reachability by
+ * computing the strongly connected components of the flow graph. That way we only need
+ * linear memory to label every block with its SCC.
+ */
+
+void Compiler::fgComputeReachabilitySets()
+{
+ assert(fgComputePredsDone);
+ assert(!fgCheapPredsValid);
+
+#ifdef DEBUG
+ fgReachabilitySetsValid = false;
+#endif // DEBUG
+
+ BasicBlock* block;
+
+ for (block = fgFirstBB; block != nullptr; block = block->bbNext)
+ {
+ // Initialize the per-block bbReach sets. (Note that we can't just call BlockSetOps::ClearD()
+ // when re-running this computation, because if the epoch changes, the size and representation of the
+ // sets might change).
+ block->bbReach = BlockSetOps::MakeEmpty(this);
+
+ /* Mark block as reaching itself */
+ BlockSetOps::AddElemD(this, block->bbReach, block->bbNum);
+ }
+
+ /* Find the reachable blocks */
+ // Also, set BBF_GC_SAFE_POINT.
+
+ bool change;
+ BlockSet BLOCKSET_INIT_NOCOPY(newReach, BlockSetOps::MakeEmpty(this));
+ do
+ {
+ change = false;
+
+ for (block = fgFirstBB; block != nullptr; block = block->bbNext)
+ {
+ BlockSetOps::Assign(this, newReach, block->bbReach);
+
+ bool predGcSafe = (block->bbPreds != nullptr); // Do all of our predecessor blocks have a GC safe bit?
+
+ for (flowList* pred = block->bbPreds; pred != nullptr; pred = pred->flNext)
+ {
+ BasicBlock* predBlock = pred->flBlock;
+
+ /* Union the predecessor's reachability set into newReach */
+ BlockSetOps::UnionD(this, newReach, predBlock->bbReach);
+
+ if (!(predBlock->bbFlags & BBF_GC_SAFE_POINT))
+ {
+ predGcSafe = false;
+ }
+ }
+
+ if (predGcSafe)
+ {
+ block->bbFlags |= BBF_GC_SAFE_POINT;
+ }
+
+ if (!BlockSetOps::Equal(this, newReach, block->bbReach))
+ {
+ BlockSetOps::Assign(this, block->bbReach, newReach);
+ change = true;
+ }
+ }
+ } while (change);
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nAfter computing reachability sets:\n");
+ fgDispReach();
+ }
+
+ fgReachabilitySetsValid = true;
+#endif // DEBUG
+}
+
+/*****************************************************************************
+ * Compute the entry blocks set.
+ *
+ * Initialize fgEnterBlks to the set of blocks for which we don't have explicit control
+ * flow edges. These are the entry basic block and each of the EH handler blocks.
+ * For ARM, also include the BBJ_ALWAYS block of a BBJ_CALLFINALLY/BBJ_ALWAYS pair,
+ * to avoid creating "retless" calls, since we need the BBJ_ALWAYS for the purpose
+ * of unwinding, even if the call doesn't return (due to an explicit throw, for example).
+ */
+
+void Compiler::fgComputeEnterBlocksSet()
+{
+#ifdef DEBUG
+ fgEnterBlksSetValid = false;
+#endif // DEBUG
+
+ fgEnterBlks = BlockSetOps::MakeEmpty(this);
+
+ /* Now set the entry basic block */
+ BlockSetOps::AddElemD(this, fgEnterBlks, fgFirstBB->bbNum);
+ assert(fgFirstBB->bbNum == 1);
+
+ if (compHndBBtabCount > 0)
+ {
+ /* Also 'or' in the handler basic blocks */
+ EHblkDsc* HBtab;
+ EHblkDsc* HBtabEnd;
+ for (HBtab = compHndBBtab, HBtabEnd = compHndBBtab + compHndBBtabCount; HBtab < HBtabEnd; HBtab++)
+ {
+ if (HBtab->HasFilter())
+ {
+ BlockSetOps::AddElemD(this, fgEnterBlks, HBtab->ebdFilter->bbNum);
+ }
+ BlockSetOps::AddElemD(this, fgEnterBlks, HBtab->ebdHndBeg->bbNum);
+ }
+ }
+
+#if FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
+ // TODO-ARM-Cleanup: The ARM code here to prevent creating retless calls by adding the BBJ_ALWAYS
+ // to the enter blocks is a bit of a compromise, because sometimes the blocks are already reachable,
+ // and it messes up DFS ordering to have them marked as enter block. We should prevent the
+ // creation of retless calls some other way.
+ for (BasicBlock* block = fgFirstBB; block != nullptr; block = block->bbNext)
+ {
+ if (block->bbJumpKind == BBJ_CALLFINALLY)
+ {
+ assert(block->isBBCallAlwaysPair());
+
+ // Don't remove the BBJ_ALWAYS block that is only here for the unwinder. It might be dead
+ // if the finally is no-return, so mark it as an entry point.
+ BlockSetOps::AddElemD(this, fgEnterBlks, block->bbNext->bbNum);
+ }
+ }
+#endif // FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Enter blocks: ");
+ BLOCKSET_ITER_INIT(this, iter, fgEnterBlks, bbNum);
+ while (iter.NextElem(this, &bbNum))
+ {
+ printf("BB%02u ", bbNum);
+ }
+ printf("\n");
+ }
+#endif // DEBUG
+
+#ifdef DEBUG
+ fgEnterBlksSetValid = true;
+#endif // DEBUG
+}
+
+/*****************************************************************************
+ * Remove unreachable blocks.
+ *
+ * Return true if any unreachable blocks were removed.
+ */
+
+bool Compiler::fgRemoveUnreachableBlocks()
+{
+ assert(!fgCheapPredsValid);
+ assert(fgReachabilitySetsValid);
+
+ bool hasLoops = false;
+ bool hasUnreachableBlocks = false;
+ BasicBlock* block;
+
+ /* Record unreachable blocks */
+ for (block = fgFirstBB; block != nullptr; block = block->bbNext)
+ {
+ /* Internal throw blocks are also reachable */
+ if (fgIsThrowHlpBlk(block))
+ {
+ goto SKIP_BLOCK;
+ }
+ else if (block == genReturnBB)
+ {
+ // Don't remove statements for the genReturnBB block, as we might have special hookups there.
+ // For example, <BUGNUM> in VSW 364383, </BUGNUM>
+ // the profiler hookup needs to have the "void GT_RETURN" statement
+ // to properly set the info.compProfilerCallback flag.
+ goto SKIP_BLOCK;
+ }
+ else
+ {
+ // If any of the entry blocks can reach this block, then we skip it.
+ if (!BlockSetOps::IsEmptyIntersection(this, fgEnterBlks, block->bbReach))
+ {
+ goto SKIP_BLOCK;
+ }
+ }
+
+ // Remove all the code for the block
+ fgUnreachableBlock(block);
+
+ // Make sure that the block was marked as removed */
+ noway_assert(block->bbFlags & BBF_REMOVED);
+
+ // Some blocks mark the end of trys and catches
+ // and can't be removed. We convert these into
+ // empty blocks of type BBJ_THROW
+
+ if (block->bbFlags & BBF_DONT_REMOVE)
+ {
+ bool bIsBBCallAlwaysPair = block->isBBCallAlwaysPair();
+
+ /* Unmark the block as removed, */
+ /* clear BBF_INTERNAL as well and set BBJ_IMPORTED */
+
+ block->bbFlags &= ~(BBF_REMOVED | BBF_INTERNAL | BBF_NEEDS_GCPOLL);
+ block->bbFlags |= BBF_IMPORTED;
+ block->bbJumpKind = BBJ_THROW;
+ block->bbSetRunRarely();
+
+#if FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
+ // If this is a <BBJ_CALLFINALLY, BBJ_ALWAYS> pair, we have to clear BBF_FINALLY_TARGET flag on
+ // the target node (of BBJ_ALWAYS) since BBJ_CALLFINALLY node is getting converted to a BBJ_THROW.
+ if (bIsBBCallAlwaysPair)
+ {
+ noway_assert(block->bbNext->bbJumpKind == BBJ_ALWAYS);
+ fgClearFinallyTargetBit(block->bbNext->bbJumpDest);
+ }
+#endif // FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
+ }
+ else
+ {
+ /* We have to call fgRemoveBlock next */
+ hasUnreachableBlocks = true;
+ }
+ continue;
+
+ SKIP_BLOCK:;
+
+ // if (block->isRunRarely())
+ // continue;
+ if (block->bbJumpKind == BBJ_RETURN)
+ {
+ continue;
+ }
+
+ /* Set BBF_LOOP_HEAD if we have backwards branches to this block */
+
+ unsigned blockNum = block->bbNum;
+ for (flowList* pred = block->bbPreds; pred != nullptr; pred = pred->flNext)
+ {
+ BasicBlock* predBlock = pred->flBlock;
+ if (blockNum <= predBlock->bbNum)
+ {
+ if (predBlock->bbJumpKind == BBJ_CALLFINALLY)
+ {
+ continue;
+ }
+
+ /* If block can reach predBlock then we have a loop head */
+ if (BlockSetOps::IsMember(this, predBlock->bbReach, blockNum))
+ {
+ hasLoops = true;
+
+ /* Set the BBF_LOOP_HEAD flag */
+ block->bbFlags |= BBF_LOOP_HEAD;
+ break;
+ }
+ }
+ }
+ }
+
+ fgHasLoops = hasLoops;
+
+ if (hasUnreachableBlocks)
+ {
+ // Now remove the unreachable blocks
+ for (block = fgFirstBB; block != nullptr; block = block->bbNext)
+ {
+ // If we mark the block with BBF_REMOVED then
+ // we need to call fgRemovedBlock() on it
+
+ if (block->bbFlags & BBF_REMOVED)
+ {
+ fgRemoveBlock(block, true);
+
+ // When we have a BBJ_CALLFINALLY, BBJ_ALWAYS pair; fgRemoveBlock will remove
+ // both blocks, so we must advance 1 extra place in the block list
+ //
+ if (block->isBBCallAlwaysPair())
+ {
+ block = block->bbNext;
+ }
+ }
+ }
+ }
+
+ return hasUnreachableBlocks;
+}
+
+/*****************************************************************************
+ *
+ * Function called to compute the dominator and reachable sets.
+ *
+ * Assumes the predecessor lists are computed and correct.
+ */
+
+void Compiler::fgComputeReachability()
+{
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("*************** In fgComputeReachability\n");
+ }
+
+ fgVerifyHandlerTab();
+
+ // Make sure that the predecessor lists are accurate
+ assert(fgComputePredsDone);
+ fgDebugCheckBBlist();
+#endif // DEBUG
+
+ /* Create a list of all BBJ_RETURN blocks. The head of the list is 'fgReturnBlocks'. */
+ fgReturnBlocks = nullptr;
+
+ for (BasicBlock* block = fgFirstBB; block != nullptr; block = block->bbNext)
+ {
+ // If this is a BBJ_RETURN block, add it to our list of all BBJ_RETURN blocks. This list is only
+ // used to find return blocks.
+ if (block->bbJumpKind == BBJ_RETURN)
+ {
+ fgReturnBlocks = new (this, CMK_Reachability) BasicBlockList(block, fgReturnBlocks);
+ }
+ }
+
+ // Compute reachability and then delete blocks determined to be unreachable. If we delete blocks, we
+ // need to loop, as that might have caused more blocks to become unreachable. This can happen in the
+ // case where a call to a finally is unreachable and deleted (maybe the call to the finally is
+ // preceded by a throw or an infinite loop), making the blocks following the finally unreachable.
+ // However, all EH entry blocks are considered global entry blocks, causing the blocks following the
+ // call to the finally to stay rooted, until a second round of reachability is done.
+ // The dominator algorithm expects that all blocks can be reached from the fgEnterBlks set.
+ unsigned passNum = 1;
+ bool changed;
+ do
+ {
+ // Just to be paranoid, avoid infinite loops; fall back to minopts.
+ if (passNum > 10)
+ {
+ noway_assert(!"Too many unreachable block removal loops");
+ }
+
+ /* Walk the flow graph, reassign block numbers to keep them in ascending order */
+ JITDUMP("\nRenumbering the basic blocks for fgComputeReachability pass #%u\n", passNum);
+ passNum++;
+ fgRenumberBlocks();
+
+ //
+ // Compute fgEnterBlks
+ //
+
+ fgComputeEnterBlocksSet();
+
+ //
+ // Compute bbReach
+ //
+
+ fgComputeReachabilitySets();
+
+ //
+ // Use reachability information to delete unreachable blocks.
+ // Also, determine if the flow graph has loops and set 'fgHasLoops' accordingly.
+ // Set the BBF_LOOP_HEAD flag on the block target of backwards branches.
+ //
+
+ changed = fgRemoveUnreachableBlocks();
+
+ } while (changed);
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nAfter computing reachability:\n");
+ fgDispBasicBlocks(verboseTrees);
+ printf("\n");
+ }
+
+ fgVerifyHandlerTab();
+ fgDebugCheckBBlist(true);
+#endif // DEBUG
+
+ //
+ // Now, compute the dominators
+ //
+
+ fgComputeDoms();
+}
+
+/** In order to be able to compute dominance, we need to first get a DFS reverse post order sort on the basic flow graph
+ * for the dominance algorithm to operate correctly. The reason why we need the DFS sort is because
+ * we will build the dominance sets using the partial order induced by the DFS sorting. With this
+ * precondition not holding true, the algorithm doesn't work properly.
+ */
+void Compiler::fgDfsInvPostOrder()
+{
+ // NOTE: This algorithm only pays attention to the actual blocks. It ignores the imaginary entry block.
+
+ // visited : Once we run the DFS post order sort recursive algorithm, we mark the nodes we visited to avoid
+ // backtracking.
+ BlockSet BLOCKSET_INIT_NOCOPY(visited, BlockSetOps::MakeEmpty(this));
+
+ // We begin by figuring out which basic blocks don't have incoming edges and mark them as
+ // start nodes. Later on we run the recursive algorithm for each node that we
+ // mark in this step.
+ BlockSet_ValRet_T startNodes = fgDomFindStartNodes();
+
+ // Make sure fgEnterBlks are still there in startNodes, even if they participate in a loop (i.e., there is
+ // an incoming edge into the block).
+ assert(fgEnterBlksSetValid);
+
+#if FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
+ //
+ // BlockSetOps::UnionD(this, startNodes, fgEnterBlks);
+ //
+ // This causes problems on ARM, because we for BBJ_CALLFINALLY/BBJ_ALWAYS pairs, we add the BBJ_ALWAYS
+ // to the enter blocks set to prevent flow graph optimizations from removing it and creating retless call finallies
+ // (BBF_RETLESS_CALL). This leads to an incorrect DFS ordering in some cases, because we start the recursive walk
+ // from the BBJ_ALWAYS, which is reachable from other blocks. A better solution would be to change ARM to avoid
+ // creating retless calls in a different way, not by adding BBJ_ALWAYS to fgEnterBlks.
+ //
+ // So, let us make sure at least fgFirstBB is still there, even if it participates in a loop.
+ BlockSetOps::AddElemD(this, startNodes, 1);
+ assert(fgFirstBB->bbNum == 1);
+#else
+ BlockSetOps::UnionD(this, startNodes, fgEnterBlks);
+#endif
+
+ assert(BlockSetOps::IsMember(this, startNodes, fgFirstBB->bbNum));
+
+ // Call the flowgraph DFS traversal helper.
+ unsigned postIndex = 1;
+ for (BasicBlock* block = fgFirstBB; block != nullptr; block = block->bbNext)
+ {
+ // If the block has no predecessors, and we haven't already visited it (because it's in fgEnterBlks but also
+ // reachable from the first block), go ahead and traverse starting from this block.
+ if (BlockSetOps::IsMember(this, startNodes, block->bbNum) &&
+ !BlockSetOps::IsMember(this, visited, block->bbNum))
+ {
+ fgDfsInvPostOrderHelper(block, visited, &postIndex);
+ }
+ }
+
+ // After the DFS reverse postorder is completed, we must have visited all the basic blocks.
+ noway_assert(postIndex == fgBBcount + 1);
+ noway_assert(fgBBNumMax == fgBBcount);
+
+#ifdef DEBUG
+ if (0 && verbose)
+ {
+ printf("\nAfter doing a post order traversal of the BB graph, this is the ordering:\n");
+ for (unsigned i = 1; i <= fgBBNumMax; ++i)
+ {
+ printf("%02u -> BB%02u\n", i, fgBBInvPostOrder[i]->bbNum);
+ }
+ printf("\n");
+ }
+#endif // DEBUG
+}
+
+BlockSet_ValRet_T Compiler::fgDomFindStartNodes()
+{
+ unsigned j;
+ BasicBlock* block;
+
+ // startNodes :: A set that represents which basic blocks in the flow graph don't have incoming edges.
+ // We begin assuming everything is a start block and remove any block that is being referenced by another in its
+ // successor list.
+
+ BlockSet BLOCKSET_INIT_NOCOPY(startNodes, BlockSetOps::MakeFull(this));
+
+ for (block = fgFirstBB; block != nullptr; block = block->bbNext)
+ {
+ unsigned cSucc = block->NumSucc(this);
+ for (j = 0; j < cSucc; ++j)
+ {
+ BasicBlock* succ = block->GetSucc(j, this);
+ BlockSetOps::RemoveElemD(this, startNodes, succ->bbNum);
+ }
+ }
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nDominator computation start blocks (those blocks with no incoming edges):\n");
+ BLOCKSET_ITER_INIT(this, iter, startNodes, bbNum);
+ while (iter.NextElem(this, &bbNum))
+ {
+ printf("BB%02u ", bbNum);
+ }
+ printf("\n");
+ }
+#endif // DEBUG
+
+ return startNodes;
+}
+
+//------------------------------------------------------------------------
+// fgDfsInvPostOrderHelper: Helper to assign post-order numbers to blocks.
+//
+// Arguments:
+// block - The starting entry block
+// visited - The set of visited blocks
+// count - Pointer to the Dfs counter
+//
+// Notes:
+// Compute a non-recursive DFS traversal of the flow graph using an
+// evaluation stack to assign post-order numbers.
+
+void Compiler::fgDfsInvPostOrderHelper(BasicBlock* block, BlockSet& visited, unsigned* count)
+{
+ // Assume we haven't visited this node yet (callers ensure this).
+ assert(!BlockSetOps::IsMember(this, visited, block->bbNum));
+
+ // Allocate a local stack to hold the DFS traversal actions necessary
+ // to compute pre/post-ordering of the control flowgraph.
+ ArrayStack<DfsBlockEntry> stack(this);
+
+ // Push the first block on the stack to seed the traversal.
+ stack.Push(DfsBlockEntry(DSS_Pre, block));
+ // Flag the node we just visited to avoid backtracking.
+ BlockSetOps::AddElemD(this, visited, block->bbNum);
+
+ // The search is terminated once all the actions have been processed.
+ while (stack.Height() != 0)
+ {
+ DfsBlockEntry current = stack.Pop();
+ BasicBlock* currentBlock = current.dfsBlock;
+
+ if (current.dfsStackState == DSS_Pre)
+ {
+ // This is a pre-visit that corresponds to the first time the
+ // node is encountered in the spanning tree and receives pre-order
+ // numberings. By pushing the post-action on the stack here we
+ // are guaranteed to only process it after all of its successors
+ // pre and post actions are processed.
+ stack.Push(DfsBlockEntry(DSS_Post, currentBlock));
+
+ unsigned cSucc = currentBlock->NumSucc(this);
+ for (unsigned j = 0; j < cSucc; ++j)
+ {
+ BasicBlock* succ = currentBlock->GetSucc(j, this);
+
+ // If this is a node we haven't seen before, go ahead and process
+ if (!BlockSetOps::IsMember(this, visited, succ->bbNum))
+ {
+ // Push a pre-visit action for this successor onto the stack and
+ // mark it as visited in case this block has multiple successors
+ // to the same node (multi-graph).
+ stack.Push(DfsBlockEntry(DSS_Pre, succ));
+ BlockSetOps::AddElemD(this, visited, succ->bbNum);
+ }
+ }
+ }
+ else
+ {
+ // This is a post-visit that corresponds to the last time the
+ // node is visited in the spanning tree and only happens after
+ // all descendents in the spanning tree have had pre and post
+ // actions applied.
+
+ assert(current.dfsStackState == DSS_Post);
+
+ unsigned invCount = fgBBcount - *count + 1;
+ assert(1 <= invCount && invCount <= fgBBNumMax);
+ fgBBInvPostOrder[invCount] = currentBlock;
+ currentBlock->bbDfsNum = invCount;
+ ++(*count);
+ }
+ }
+}
+
+void Compiler::fgComputeDoms()
+{
+ assert(!fgCheapPredsValid);
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("*************** In fgComputeDoms\n");
+ }
+
+ fgVerifyHandlerTab();
+
+ // Make sure that the predecessor lists are accurate.
+ // Also check that the blocks are properly, densely numbered (so calling fgRenumberBlocks is not necessary).
+ fgDebugCheckBBlist(true);
+
+ // Assert things related to the BlockSet epoch.
+ assert(fgBBcount == fgBBNumMax);
+ assert(BasicBlockBitSetTraits::GetSize(this) == fgBBNumMax + 1);
+#endif // DEBUG
+
+ BlockSet BLOCKSET_INIT_NOCOPY(processedBlks, BlockSetOps::MakeEmpty(this));
+
+ fgBBInvPostOrder = new (this, CMK_DominatorMemory) BasicBlock*[fgBBNumMax + 1];
+ memset(fgBBInvPostOrder, 0, sizeof(BasicBlock*) * (fgBBNumMax + 1));
+
+ fgDfsInvPostOrder();
+ noway_assert(fgBBInvPostOrder[0] == nullptr);
+
+ // flRoot and bbRoot represent an imaginary unique entry point in the flow graph.
+ // All the orphaned EH blocks and fgFirstBB will temporarily have its predecessors list
+ // (with bbRoot as the only basic block in it) set as flRoot.
+ // Later on, we clear their predecessors and let them to be nullptr again.
+ // Since we number basic blocks starting at one, the imaginary entry block is conveniently numbered as zero.
+ flowList flRoot;
+ BasicBlock bbRoot;
+
+ bbRoot.bbPreds = nullptr;
+ bbRoot.bbNum = 0;
+ bbRoot.bbIDom = &bbRoot;
+ bbRoot.bbDfsNum = 0;
+ flRoot.flNext = nullptr;
+ flRoot.flBlock = &bbRoot;
+
+ fgBBInvPostOrder[0] = &bbRoot;
+
+ // Mark both bbRoot and fgFirstBB processed
+ BlockSetOps::AddElemD(this, processedBlks, 0); // bbRoot == block #0
+ BlockSetOps::AddElemD(this, processedBlks, 1); // fgFirstBB == block #1
+ assert(fgFirstBB->bbNum == 1);
+
+ // Special case fgFirstBB to say its IDom is bbRoot.
+ fgFirstBB->bbIDom = &bbRoot;
+
+ BasicBlock* block = nullptr;
+
+ for (block = fgFirstBB->bbNext; block != nullptr; block = block->bbNext)
+ {
+ // If any basic block has no predecessors then we flag it as processed and temporarily
+ // mark its precedessor list to be flRoot. This makes the flowgraph connected,
+ // a precondition that is needed by the dominance algorithm to operate properly.
+ if (block->bbPreds == nullptr)
+ {
+ block->bbPreds = &flRoot;
+ block->bbIDom = &bbRoot;
+ BlockSetOps::AddElemD(this, processedBlks, block->bbNum);
+ }
+ else
+ {
+ block->bbIDom = nullptr;
+ }
+ }
+
+ // Mark the EH blocks as entry blocks and also flag them as processed.
+ if (compHndBBtabCount > 0)
+ {
+ EHblkDsc* HBtab;
+ EHblkDsc* HBtabEnd;
+ for (HBtab = compHndBBtab, HBtabEnd = compHndBBtab + compHndBBtabCount; HBtab < HBtabEnd; HBtab++)
+ {
+ if (HBtab->HasFilter())
+ {
+ HBtab->ebdFilter->bbIDom = &bbRoot;
+ BlockSetOps::AddElemD(this, processedBlks, HBtab->ebdFilter->bbNum);
+ }
+ HBtab->ebdHndBeg->bbIDom = &bbRoot;
+ BlockSetOps::AddElemD(this, processedBlks, HBtab->ebdHndBeg->bbNum);
+ }
+ }
+
+ // Now proceed to compute the immediate dominators for each basic block.
+ bool changed = true;
+ while (changed)
+ {
+ changed = false;
+ for (unsigned i = 1; i <= fgBBNumMax;
+ ++i) // Process each actual block; don't process the imaginary predecessor block.
+ {
+ flowList* first = nullptr;
+ BasicBlock* newidom = nullptr;
+ block = fgBBInvPostOrder[i];
+
+ // If we have a block that has bbRoot as its bbIDom
+ // it means we flag it as processed and as an entry block so
+ // in this case we're all set.
+ if (block->bbIDom == &bbRoot)
+ {
+ continue;
+ }
+
+ // Pick up the first processed predecesor of the current block.
+ for (first = block->bbPreds; first != nullptr; first = first->flNext)
+ {
+ if (BlockSetOps::IsMember(this, processedBlks, first->flBlock->bbNum))
+ {
+ break;
+ }
+ }
+ noway_assert(first != nullptr);
+
+ // We assume the first processed predecessor will be the
+ // immediate dominator and then compute the forward flow analysis.
+ newidom = first->flBlock;
+ for (flowList* p = block->bbPreds; p != nullptr; p = p->flNext)
+ {
+ if (p->flBlock == first->flBlock)
+ {
+ continue;
+ }
+ if (p->flBlock->bbIDom != nullptr)
+ {
+ // fgIntersectDom is basically the set intersection between
+ // the dominance sets of the new IDom and the current predecessor
+ // Since the nodes are ordered in DFS inverse post order and
+ // IDom induces a tree, fgIntersectDom actually computes
+ // the lowest common ancestor in the dominator tree.
+ newidom = fgIntersectDom(p->flBlock, newidom);
+ }
+ }
+
+ // If the Immediate dominator changed, assign the new one
+ // to the current working basic block.
+ if (block->bbIDom != newidom)
+ {
+ noway_assert(newidom != nullptr);
+ block->bbIDom = newidom;
+ changed = true;
+ }
+ BlockSetOps::AddElemD(this, processedBlks, block->bbNum);
+ }
+ }
+
+ // As stated before, once we have computed immediate dominance we need to clear
+ // all the basic blocks whose predecessor list was set to flRoot. This
+ // reverts that and leaves the blocks the same as before.
+ for (block = fgFirstBB; block != nullptr; block = block->bbNext)
+ {
+ if (block->bbPreds == &flRoot)
+ {
+ block->bbPreds = nullptr;
+ }
+ }
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ fgDispDoms();
+ }
+#endif
+
+ fgBuildDomTree();
+
+ fgModified = false;
+ fgDomBBcount = fgBBcount;
+ assert(fgBBcount == fgBBNumMax);
+ assert(BasicBlockBitSetTraits::GetSize(this) == fgDomBBcount + 1);
+
+ fgDomsComputed = true;
+}
+
+void Compiler::fgBuildDomTree()
+{
+ unsigned i;
+ BasicBlock* block;
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nInside fgBuildDomTree\n");
+ }
+#endif // DEBUG
+
+ // domTree :: The dominance tree represented using adjacency lists. We use BasicBlockList to represent edges.
+ // Indexed by basic block number.
+ unsigned bbArraySize = fgBBNumMax + 1;
+ BasicBlockList** domTree = new (this, CMK_DominatorMemory) BasicBlockList*[bbArraySize];
+
+ fgDomTreePreOrder = new (this, CMK_DominatorMemory) unsigned[bbArraySize];
+ fgDomTreePostOrder = new (this, CMK_DominatorMemory) unsigned[bbArraySize];
+
+ // Initialize all the data structures.
+ for (i = 0; i < bbArraySize; ++i)
+ {
+ domTree[i] = nullptr;
+ fgDomTreePreOrder[i] = fgDomTreePostOrder[i] = 0;
+ }
+
+ // Build the dominance tree.
+ for (block = fgFirstBB; block != nullptr; block = block->bbNext)
+ {
+ // If the immediate dominator is not the imaginary root (bbRoot)
+ // we proceed to append this block to the children of the dominator node.
+ if (block->bbIDom->bbNum != 0)
+ {
+ int bbNum = block->bbIDom->bbNum;
+ domTree[bbNum] = new (this, CMK_DominatorMemory) BasicBlockList(block, domTree[bbNum]);
+ }
+ else
+ {
+ // This means this block had bbRoot set as its IDom. We clear it out
+ // and convert the tree back to a forest.
+ block->bbIDom = nullptr;
+ }
+ }
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nAfter computing the Dominance Tree:\n");
+ fgDispDomTree(domTree);
+ }
+#endif // DEBUG
+
+ // Get the bitset that represents the roots of the dominance tree.
+ // Something to note here is that the dominance tree has been converted from a forest to a tree
+ // by using the bbRoot trick on fgComputeDoms. The reason we have a forest instead of a real tree
+ // is because we treat the EH blocks as entry nodes so the real dominance tree is not necessarily connected.
+ BlockSet_ValRet_T domTreeEntryNodes = fgDomTreeEntryNodes(domTree);
+
+ // The preorder and postorder numbers.
+ // We start from 1 to match the bbNum ordering.
+ unsigned preNum = 1;
+ unsigned postNum = 1;
+
+ // There will be nodes in the dominance tree that will not be reachable:
+ // the catch blocks that return since they don't have any predecessor.
+ // For that matter we'll keep track of how many nodes we can
+ // reach and assert at the end that we visited all of them.
+ unsigned domTreeReachable = fgBBcount;
+
+ // Once we have the dominance tree computed, we need to traverse it
+ // to get the preorder and postorder numbers for each node. The purpose of
+ // this is to achieve O(1) queries for of the form A dominates B.
+ for (i = 1; i <= fgBBNumMax; ++i)
+ {
+ if (BlockSetOps::IsMember(this, domTreeEntryNodes, i))
+ {
+ if (domTree[i] == nullptr)
+ {
+ // If this is an entry node but there's no children on this
+ // node, it means it's unreachable so we decrement the reachable
+ // counter.
+ --domTreeReachable;
+ }
+ else
+ {
+ // Otherwise, we do a DFS traversal of the dominator tree.
+ fgTraverseDomTree(i, domTree, &preNum, &postNum);
+ }
+ }
+ }
+
+ noway_assert(preNum == domTreeReachable + 1);
+ noway_assert(postNum == domTreeReachable + 1);
+
+ // Once we have all the reachable nodes numbered, we proceed to
+ // assign numbers to the non-reachable ones, just assign incrementing
+ // values. We must reach fgBBcount at the end.
+
+ for (i = 1; i <= fgBBNumMax; ++i)
+ {
+ if (BlockSetOps::IsMember(this, domTreeEntryNodes, i))
+ {
+ if (domTree[i] == nullptr)
+ {
+ fgDomTreePreOrder[i] = preNum++;
+ fgDomTreePostOrder[i] = postNum++;
+ }
+ }
+ }
+
+ noway_assert(preNum == fgBBNumMax + 1);
+ noway_assert(postNum == fgBBNumMax + 1);
+ noway_assert(fgDomTreePreOrder[0] == 0); // Unused first element
+ noway_assert(fgDomTreePostOrder[0] == 0); // Unused first element
+
+#ifdef DEBUG
+ if (0 && verbose)
+ {
+ printf("\nAfter traversing the dominance tree:\n");
+ printf("PreOrder:\n");
+ for (i = 1; i <= fgBBNumMax; ++i)
+ {
+ printf("BB%02u : %02u\n", i, fgDomTreePreOrder[i]);
+ }
+ printf("PostOrder:\n");
+ for (i = 1; i <= fgBBNumMax; ++i)
+ {
+ printf("BB%02u : %02u\n", i, fgDomTreePostOrder[i]);
+ }
+ }
+#endif // DEBUG
+}
+
+BlockSet_ValRet_T Compiler::fgDomTreeEntryNodes(BasicBlockList** domTree)
+{
+ // domTreeEntryNodes :: Set that represents which basic blocks are roots of the dominator forest.
+
+ BlockSet BLOCKSET_INIT_NOCOPY(domTreeEntryNodes, BlockSetOps::MakeFull(this));
+
+ // First of all we need to find all the roots of the dominance forest.
+
+ for (unsigned i = 1; i <= fgBBNumMax; ++i)
+ {
+ for (BasicBlockList* current = domTree[i]; current != nullptr; current = current->next)
+ {
+ BlockSetOps::RemoveElemD(this, domTreeEntryNodes, current->block->bbNum);
+ }
+ }
+
+ return domTreeEntryNodes;
+}
+
+#ifdef DEBUG
+void Compiler::fgDispDomTree(BasicBlockList** domTree)
+{
+ for (unsigned i = 1; i <= fgBBNumMax; ++i)
+ {
+ if (domTree[i] != nullptr)
+ {
+ printf("BB%02u : ", i);
+ for (BasicBlockList* current = domTree[i]; current != nullptr; current = current->next)
+ {
+ assert(current->block);
+ printf("BB%02u ", current->block->bbNum);
+ }
+ printf("\n");
+ }
+ }
+ printf("\n");
+}
+#endif // DEBUG
+
+//------------------------------------------------------------------------
+// fgTraverseDomTree: Assign pre/post-order numbers to the dominator tree.
+//
+// Arguments:
+// bbNum - The basic block number of the starting block
+// domTree - The dominator tree (as child block lists)
+// preNum - Pointer to the pre-number counter
+// postNum - Pointer to the post-number counter
+//
+// Notes:
+// Runs a non-recursive DFS traversal of the dominator tree using an
+// evaluation stack to assign pre-order and post-order numbers.
+// These numberings are used to provide constant time lookup for
+// ancestor/descendent tests between pairs of nodes in the tree.
+
+void Compiler::fgTraverseDomTree(unsigned bbNum, BasicBlockList** domTree, unsigned* preNum, unsigned* postNum)
+{
+ noway_assert(bbNum <= fgBBNumMax);
+
+ // If the block preorder number is not zero it means we already visited
+ // that node, so we skip it.
+ if (fgDomTreePreOrder[bbNum] == 0)
+ {
+ // If this is the first time we visit this node, both preorder and postnumber
+ // values must be zero.
+ noway_assert(fgDomTreePostOrder[bbNum] == 0);
+
+ // Allocate a local stack to hold the Dfs traversal actions necessary
+ // to compute pre/post-ordering of the dominator tree.
+ ArrayStack<DfsNumEntry> stack(this);
+
+ // Push the first entry number on the stack to seed the traversal.
+ stack.Push(DfsNumEntry(DSS_Pre, bbNum));
+
+ // The search is terminated once all the actions have been processed.
+ while (stack.Height() != 0)
+ {
+ DfsNumEntry current = stack.Pop();
+ unsigned currentNum = current.dfsNum;
+
+ if (current.dfsStackState == DSS_Pre)
+ {
+ // This pre-visit action corresponds to the first time the
+ // node is encountered during the spanning traversal.
+ noway_assert(fgDomTreePreOrder[currentNum] == 0);
+ noway_assert(fgDomTreePostOrder[currentNum] == 0);
+
+ // Assign the preorder number on the first visit.
+ fgDomTreePreOrder[currentNum] = (*preNum)++;
+
+ // Push this nodes post-action on the stack such that all successors
+ // pre-order visits occur before this nodes post-action. We will assign
+ // its post-order numbers when we pop off the stack.
+ stack.Push(DfsNumEntry(DSS_Post, currentNum));
+
+ // For each child in the dominator tree process its pre-actions.
+ for (BasicBlockList* child = domTree[currentNum]; child != nullptr; child = child->next)
+ {
+ unsigned childNum = child->block->bbNum;
+
+ // This is a tree so never could have been visited
+ assert(fgDomTreePreOrder[childNum] == 0);
+
+ // Push the successor in the dominator tree for pre-actions.
+ stack.Push(DfsNumEntry(DSS_Pre, childNum));
+ }
+ }
+ else
+ {
+ // This post-visit action corresponds to the last time the node
+ // is encountered and only after all descendents in the spanning
+ // tree have had pre and post-order numbers assigned.
+
+ assert(current.dfsStackState == DSS_Post);
+ assert(fgDomTreePreOrder[currentNum] != 0);
+ assert(fgDomTreePostOrder[currentNum] == 0);
+
+ // Now assign this nodes post-order number.
+ fgDomTreePostOrder[currentNum] = (*postNum)++;
+ }
+ }
+ }
+}
+
+// This code finds the lowest common ancestor in the
+// dominator tree between two basic blocks. The LCA in the Dominance tree
+// represents the closest dominator between the two basic blocks. Used to
+// adjust the IDom value in fgComputDoms.
+BasicBlock* Compiler::fgIntersectDom(BasicBlock* a, BasicBlock* b)
+{
+ BasicBlock* finger1 = a;
+ BasicBlock* finger2 = b;
+ while (finger1 != finger2)
+ {
+ while (finger1->bbDfsNum > finger2->bbDfsNum)
+ {
+ finger1 = finger1->bbIDom;
+ }
+ while (finger2->bbDfsNum > finger1->bbDfsNum)
+ {
+ finger2 = finger2->bbIDom;
+ }
+ }
+ return finger1;
+}
+
+// Return a BlockSet containing all the blocks that dominate 'block'.
+BlockSet_ValRet_T Compiler::fgGetDominatorSet(BasicBlock* block)
+{
+ assert(block != nullptr);
+
+ BlockSet BLOCKSET_INIT_NOCOPY(domSet, BlockSetOps::MakeEmpty(this));
+
+ do
+ {
+ BlockSetOps::AddElemD(this, domSet, block->bbNum);
+ if (block == block->bbIDom)
+ {
+ break; // We found a cycle in the IDom list, so we're done.
+ }
+ block = block->bbIDom;
+ } while (block != nullptr);
+
+ return domSet;
+}
+
+/*****************************************************************************
+ *
+ * fgComputeCheapPreds: Function called to compute the BasicBlock::bbCheapPreds lists.
+ *
+ * No other block data is changed (e.g., bbRefs, bbFlags).
+ *
+ * The cheap preds lists are similar to the normal (bbPreds) predecessor lists, but are cheaper to
+ * compute and store, as follows:
+ * 1. A flow edge is typed BasicBlockList, which only has a block pointer and 'next' pointer. It doesn't
+ * have weights or a dup count.
+ * 2. The preds list for a block is not sorted by block number.
+ * 3. The predecessors of the block following a BBJ_CALLFINALLY (the corresponding BBJ_ALWAYS,
+ * for normal, non-retless calls to the finally) are not computed.
+ * 4. The cheap preds lists will contain duplicates if a single switch table has multiple branches
+ * to the same block. Thus, we don't spend the time looking for duplicates for every edge we insert.
+ */
+void Compiler::fgComputeCheapPreds()
+{
+ noway_assert(!fgComputePredsDone); // We can't do this if we've got the full preds.
+ noway_assert(fgFirstBB != nullptr);
+
+ BasicBlock* block;
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\n*************** In fgComputeCheapPreds()\n");
+ fgDispBasicBlocks();
+ printf("\n");
+ }
+#endif // DEBUG
+
+ // Clear out the cheap preds lists.
+ fgRemovePreds();
+
+ for (block = fgFirstBB; block != nullptr; block = block->bbNext)
+ {
+ switch (block->bbJumpKind)
+ {
+ case BBJ_COND:
+ fgAddCheapPred(block->bbJumpDest, block);
+ fgAddCheapPred(block->bbNext, block);
+ break;
+
+ case BBJ_CALLFINALLY:
+ case BBJ_LEAVE: // If fgComputeCheapPreds is called before all blocks are imported, BBJ_LEAVE blocks are
+ // still in the BB list.
+ case BBJ_ALWAYS:
+ case BBJ_EHCATCHRET:
+ fgAddCheapPred(block->bbJumpDest, block);
+ break;
+
+ case BBJ_NONE:
+ fgAddCheapPred(block->bbNext, block);
+ break;
+
+ case BBJ_EHFILTERRET:
+ // Connect end of filter to catch handler.
+ // In a well-formed program, this cannot be null. Tolerate here, so that we can call
+ // fgComputeCheapPreds before fgImport on an ill-formed program; the problem will be detected in
+ // fgImport.
+ if (block->bbJumpDest != nullptr)
+ {
+ fgAddCheapPred(block->bbJumpDest, block);
+ }
+ break;
+
+ case BBJ_SWITCH:
+ unsigned jumpCnt;
+ jumpCnt = block->bbJumpSwt->bbsCount;
+ BasicBlock** jumpTab;
+ jumpTab = block->bbJumpSwt->bbsDstTab;
+
+ do
+ {
+ fgAddCheapPred(*jumpTab, block);
+ } while (++jumpTab, --jumpCnt);
+
+ break;
+
+ case BBJ_EHFINALLYRET: // It's expensive to compute the preds for this case, so we don't for the cheap
+ // preds.
+ case BBJ_THROW:
+ case BBJ_RETURN:
+ break;
+
+ default:
+ noway_assert(!"Unexpected bbJumpKind");
+ break;
+ }
+ }
+
+ fgCheapPredsValid = true;
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\n*************** After fgComputeCheapPreds()\n");
+ fgDispBasicBlocks();
+ printf("\n");
+ }
+#endif
+}
+
+/*****************************************************************************
+ * Add 'blockPred' to the cheap predecessor list of 'block'.
+ */
+
+void Compiler::fgAddCheapPred(BasicBlock* block, BasicBlock* blockPred)
+{
+ assert(!fgComputePredsDone);
+ assert(block != nullptr);
+ assert(blockPred != nullptr);
+
+ block->bbCheapPreds = new (this, CMK_FlowList) BasicBlockList(blockPred, block->bbCheapPreds);
+
+#if MEASURE_BLOCK_SIZE
+ genFlowNodeCnt += 1;
+ genFlowNodeSize += sizeof(BasicBlockList);
+#endif // MEASURE_BLOCK_SIZE
+}
+
+/*****************************************************************************
+ * Remove 'blockPred' from the cheap predecessor list of 'block'.
+ * If there are duplicate edges, only remove one of them.
+ */
+void Compiler::fgRemoveCheapPred(BasicBlock* block, BasicBlock* blockPred)
+{
+ assert(!fgComputePredsDone);
+ assert(fgCheapPredsValid);
+
+ flowList* oldEdge = nullptr;
+
+ assert(block != nullptr);
+ assert(blockPred != nullptr);
+ assert(block->bbCheapPreds != nullptr);
+
+ /* Is this the first block in the pred list? */
+ if (blockPred == block->bbCheapPreds->block)
+ {
+ block->bbCheapPreds = block->bbCheapPreds->next;
+ }
+ else
+ {
+ BasicBlockList* pred;
+ for (pred = block->bbCheapPreds; pred->next != nullptr; pred = pred->next)
+ {
+ if (blockPred == pred->next->block)
+ {
+ break;
+ }
+ }
+ noway_assert(pred->next != nullptr); // we better have found it!
+ pred->next = pred->next->next; // splice it out
+ }
+}
+
+void Compiler::fgRemovePreds()
+{
+ C_ASSERT(offsetof(BasicBlock, bbPreds) ==
+ offsetof(BasicBlock, bbCheapPreds)); // bbPreds and bbCheapPreds are at the same place in a union,
+ C_ASSERT(sizeof(((BasicBlock*)0)->bbPreds) ==
+ sizeof(((BasicBlock*)0)->bbCheapPreds)); // and are the same size. So, this function removes both.
+
+ for (BasicBlock* block = fgFirstBB; block != nullptr; block = block->bbNext)
+ {
+ block->bbPreds = nullptr;
+ }
+ fgComputePredsDone = false;
+ fgCheapPredsValid = false;
+}
+
+/*****************************************************************************
+ *
+ * Function called to compute the bbPreds lists.
+ */
+void Compiler::fgComputePreds()
+{
+ noway_assert(fgFirstBB);
+
+ BasicBlock* block;
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\n*************** In fgComputePreds()\n");
+ fgDispBasicBlocks();
+ printf("\n");
+ }
+#endif // DEBUG
+
+ // reset the refs count for each basic block
+
+ for (block = fgFirstBB; block; block = block->bbNext)
+ {
+ block->bbRefs = 0;
+ }
+
+ /* the first block is always reachable! */
+ fgFirstBB->bbRefs = 1;
+
+ /* Treat the initial block as a jump target */
+ fgFirstBB->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL;
+
+ fgRemovePreds();
+
+ for (block = fgFirstBB; block; block = block->bbNext)
+ {
+ switch (block->bbJumpKind)
+ {
+ case BBJ_CALLFINALLY:
+ if (!(block->bbFlags & BBF_RETLESS_CALL))
+ {
+ assert(block->isBBCallAlwaysPair());
+
+ /* Mark the next block as being a jump target,
+ since the call target will return there */
+ PREFIX_ASSUME(block->bbNext != nullptr);
+ block->bbNext->bbFlags |= (BBF_JMP_TARGET | BBF_HAS_LABEL);
+ }
+
+ __fallthrough;
+
+ case BBJ_LEAVE: // Sometimes fgComputePreds is called before all blocks are imported, so BBJ_LEAVE
+ // blocks are still in the BB list.
+ case BBJ_COND:
+ case BBJ_ALWAYS:
+ case BBJ_EHCATCHRET:
+
+ /* Mark the jump dest block as being a jump target */
+ block->bbJumpDest->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL;
+
+ fgAddRefPred(block->bbJumpDest, block, nullptr, true);
+
+ /* Is the next block reachable? */
+
+ if (block->bbJumpKind != BBJ_COND)
+ {
+ break;
+ }
+
+ noway_assert(block->bbNext);
+
+ /* Fall through, the next block is also reachable */
+ __fallthrough;
+
+ case BBJ_NONE:
+
+ fgAddRefPred(block->bbNext, block, nullptr, true);
+ break;
+
+ case BBJ_EHFILTERRET:
+
+ // Connect end of filter to catch handler.
+ // In a well-formed program, this cannot be null. Tolerate here, so that we can call
+ // fgComputePreds before fgImport on an ill-formed program; the problem will be detected in fgImport.
+ if (block->bbJumpDest != nullptr)
+ {
+ fgAddRefPred(block->bbJumpDest, block, nullptr, true);
+ }
+ break;
+
+ case BBJ_EHFINALLYRET:
+ {
+ /* Connect the end of the finally to the successor of
+ the call to this finally */
+
+ if (!block->hasHndIndex())
+ {
+ NO_WAY("endfinally outside a finally/fault block.");
+ }
+
+ unsigned hndIndex = block->getHndIndex();
+ EHblkDsc* ehDsc = ehGetDsc(hndIndex);
+
+ if (!ehDsc->HasFinallyOrFaultHandler())
+ {
+ NO_WAY("endfinally outside a finally/fault block.");
+ }
+
+ if (ehDsc->HasFinallyHandler())
+ {
+ // Find all BBJ_CALLFINALLY that branched to this finally handler.
+ BasicBlock* begBlk;
+ BasicBlock* endBlk;
+ ehGetCallFinallyBlockRange(hndIndex, &begBlk, &endBlk);
+
+ BasicBlock* finBeg = ehDsc->ebdHndBeg;
+ for (BasicBlock* bcall = begBlk; bcall != endBlk; bcall = bcall->bbNext)
+ {
+ if (bcall->bbJumpKind != BBJ_CALLFINALLY || bcall->bbJumpDest != finBeg)
+ {
+ continue;
+ }
+
+ noway_assert(bcall->isBBCallAlwaysPair());
+ fgAddRefPred(bcall->bbNext, block, nullptr, true);
+ }
+ }
+ }
+ break;
+
+ case BBJ_THROW:
+ case BBJ_RETURN:
+ break;
+
+ case BBJ_SWITCH:
+ unsigned jumpCnt;
+ jumpCnt = block->bbJumpSwt->bbsCount;
+ BasicBlock** jumpTab;
+ jumpTab = block->bbJumpSwt->bbsDstTab;
+
+ do
+ {
+ /* Mark the target block as being a jump target */
+ (*jumpTab)->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL;
+
+ fgAddRefPred(*jumpTab, block, nullptr, true);
+ } while (++jumpTab, --jumpCnt);
+
+ break;
+
+ default:
+ noway_assert(!"Unexpected bbJumpKind");
+ break;
+ }
+ }
+
+ for (unsigned EHnum = 0; EHnum < compHndBBtabCount; EHnum++)
+ {
+ EHblkDsc* ehDsc = ehGetDsc(EHnum);
+
+ if (ehDsc->HasFilter())
+ {
+ ehDsc->ebdFilter->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL;
+ }
+
+ ehDsc->ebdHndBeg->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL;
+ }
+
+ fgModified = false;
+ fgComputePredsDone = true;
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\n*************** After fgComputePreds()\n");
+ fgDispBasicBlocks();
+ printf("\n");
+ }
+#endif
+}
+
+unsigned Compiler::fgNSuccsOfFinallyRet(BasicBlock* block)
+{
+ BasicBlock* bb;
+ unsigned res;
+ fgSuccOfFinallyRetWork(block, ~0, &bb, &res);
+ return res;
+}
+
+BasicBlock* Compiler::fgSuccOfFinallyRet(BasicBlock* block, unsigned i)
+{
+ BasicBlock* bb;
+ unsigned res;
+ fgSuccOfFinallyRetWork(block, i, &bb, &res);
+ return bb;
+}
+
+void Compiler::fgSuccOfFinallyRetWork(BasicBlock* block, unsigned i, BasicBlock** bres, unsigned* nres)
+{
+ assert(block->hasHndIndex()); // Otherwise, endfinally outside a finally/fault block?
+
+ unsigned hndIndex = block->getHndIndex();
+ EHblkDsc* ehDsc = ehGetDsc(hndIndex);
+
+ assert(ehDsc->HasFinallyOrFaultHandler()); // Otherwise, endfinally outside a finally/fault block.
+
+ *bres = nullptr;
+ unsigned succNum = 0;
+
+ if (ehDsc->HasFinallyHandler())
+ {
+ BasicBlock* begBlk;
+ BasicBlock* endBlk;
+ ehGetCallFinallyBlockRange(hndIndex, &begBlk, &endBlk);
+
+ BasicBlock* finBeg = ehDsc->ebdHndBeg;
+
+ for (BasicBlock* bcall = begBlk; bcall != endBlk; bcall = bcall->bbNext)
+ {
+ if (bcall->bbJumpKind != BBJ_CALLFINALLY || bcall->bbJumpDest != finBeg)
+ {
+ continue;
+ }
+
+ assert(bcall->isBBCallAlwaysPair());
+
+ if (succNum == i)
+ {
+ *bres = bcall->bbNext;
+ return;
+ }
+ succNum++;
+ }
+ }
+ assert(i == ~0u || ehDsc->HasFaultHandler()); // Should reach here only for fault blocks.
+ if (i == ~0u)
+ {
+ *nres = succNum;
+ }
+}
+
+Compiler::SwitchUniqueSuccSet Compiler::GetDescriptorForSwitch(BasicBlock* switchBlk)
+{
+ assert(switchBlk->bbJumpKind == BBJ_SWITCH);
+ BlockToSwitchDescMap* switchMap = GetSwitchDescMap();
+ SwitchUniqueSuccSet res;
+ if (switchMap->Lookup(switchBlk, &res))
+ {
+ return res;
+ }
+ else
+ {
+ // We must compute the descriptor. Find which are dups, by creating a bit set with the unique successors.
+ // We create a temporary bitset of blocks to compute the unique set of successor blocks,
+ // since adding a block's number twice leaves just one "copy" in the bitset. Note that
+ // we specifically don't use the BlockSet type, because doing so would require making a
+ // call to EnsureBasicBlockEpoch() to make sure the epoch is up-to-date. However, that
+ // can create a new epoch, thus invalidating all existing BlockSet objects, such as
+ // reachability information stored in the blocks. To avoid that, we just use a local BitVec.
+
+ BitVecTraits blockVecTraits(fgBBNumMax + 1, this);
+ BitVec BITVEC_INIT_NOCOPY(uniqueSuccBlocks, BitVecOps::MakeEmpty(&blockVecTraits));
+ BasicBlock** jumpTable = switchBlk->bbJumpSwt->bbsDstTab;
+ unsigned jumpCount = switchBlk->bbJumpSwt->bbsCount;
+ for (unsigned i = 0; i < jumpCount; i++)
+ {
+ BasicBlock* targ = jumpTable[i];
+ BitVecOps::AddElemD(&blockVecTraits, uniqueSuccBlocks, targ->bbNum);
+ }
+ // Now we have a set of unique successors.
+ unsigned numNonDups = BitVecOps::Count(&blockVecTraits, uniqueSuccBlocks);
+
+ typedef BasicBlock* BasicBlockPtr;
+ BasicBlockPtr* nonDups = new (getAllocator()) BasicBlockPtr[numNonDups];
+
+ unsigned nonDupInd = 0;
+ // At this point, all unique targets are in "uniqueSuccBlocks". As we encounter each,
+ // add to nonDups, remove from "uniqueSuccBlocks".
+ for (unsigned i = 0; i < jumpCount; i++)
+ {
+ BasicBlock* targ = jumpTable[i];
+ if (BitVecOps::IsMember(&blockVecTraits, uniqueSuccBlocks, targ->bbNum))
+ {
+ nonDups[nonDupInd] = targ;
+ nonDupInd++;
+ BitVecOps::RemoveElemD(&blockVecTraits, uniqueSuccBlocks, targ->bbNum);
+ }
+ }
+
+ assert(nonDupInd == numNonDups);
+ assert(BitVecOps::Count(&blockVecTraits, uniqueSuccBlocks) == 0);
+ res.numDistinctSuccs = numNonDups;
+ res.nonDuplicates = nonDups;
+ switchMap->Set(switchBlk, res);
+ return res;
+ }
+}
+
+void Compiler::SwitchUniqueSuccSet::UpdateTarget(IAllocator* alloc,
+ BasicBlock* switchBlk,
+ BasicBlock* from,
+ BasicBlock* to)
+{
+ assert(switchBlk->bbJumpKind == BBJ_SWITCH); // Precondition.
+ unsigned jmpTabCnt = switchBlk->bbJumpSwt->bbsCount;
+ BasicBlock** jmpTab = switchBlk->bbJumpSwt->bbsDstTab;
+
+ // Is "from" still in the switch table (because it had more than one entry before?)
+ bool fromStillPresent = false;
+ for (unsigned i = 0; i < jmpTabCnt; i++)
+ {
+ if (jmpTab[i] == from)
+ {
+ fromStillPresent = true;
+ break;
+ }
+ }
+
+ // Is "to" already in "this"?
+ bool toAlreadyPresent = false;
+ for (unsigned i = 0; i < numDistinctSuccs; i++)
+ {
+ if (nonDuplicates[i] == to)
+ {
+ toAlreadyPresent = true;
+ break;
+ }
+ }
+
+ // Four cases:
+ // If "from" is still present, and "to" is already present, do nothing
+ // If "from" is still present, and "to" is not, must reallocate to add an entry.
+ // If "from" is not still present, and "to" is not present, write "to" where "from" was.
+ // If "from" is not still present, but "to" is present, remove "from".
+ if (fromStillPresent && toAlreadyPresent)
+ {
+ return;
+ }
+ else if (fromStillPresent && !toAlreadyPresent)
+ {
+ // reallocate to add an entry
+ typedef BasicBlock* BasicBlockPtr;
+ BasicBlockPtr* newNonDups = new (alloc) BasicBlockPtr[numDistinctSuccs + 1];
+ memcpy(newNonDups, nonDuplicates, numDistinctSuccs * sizeof(BasicBlock*));
+ newNonDups[numDistinctSuccs] = to;
+ numDistinctSuccs++;
+ nonDuplicates = newNonDups;
+ }
+ else if (!fromStillPresent && !toAlreadyPresent)
+ {
+#ifdef DEBUG
+ // write "to" where "from" was
+ bool foundFrom = false;
+#endif // DEBUG
+ for (unsigned i = 0; i < numDistinctSuccs; i++)
+ {
+ if (nonDuplicates[i] == from)
+ {
+ nonDuplicates[i] = to;
+#ifdef DEBUG
+ foundFrom = true;
+#endif // DEBUG
+ break;
+ }
+ }
+ assert(foundFrom);
+ }
+ else
+ {
+ assert(!fromStillPresent && toAlreadyPresent);
+#ifdef DEBUG
+ // remove "from".
+ bool foundFrom = false;
+#endif // DEBUG
+ for (unsigned i = 0; i < numDistinctSuccs; i++)
+ {
+ if (nonDuplicates[i] == from)
+ {
+ nonDuplicates[i] = nonDuplicates[numDistinctSuccs - 1];
+ numDistinctSuccs--;
+#ifdef DEBUG
+ foundFrom = true;
+#endif // DEBUG
+ break;
+ }
+ }
+ assert(foundFrom);
+ }
+}
+
+/*****************************************************************************
+ *
+ * Simple utility function to remove an entry for a block in the switch desc
+ * map. So it can be called from other phases.
+ *
+ */
+void Compiler::fgInvalidateSwitchDescMapEntry(BasicBlock* block)
+{
+ // Check if map has no entries yet.
+ if (m_switchDescMap != nullptr)
+ {
+ m_switchDescMap->Remove(block);
+ }
+}
+
+void Compiler::UpdateSwitchTableTarget(BasicBlock* switchBlk, BasicBlock* from, BasicBlock* to)
+{
+ if (m_switchDescMap == nullptr)
+ {
+ return; // No mappings, nothing to do.
+ }
+
+ // Otherwise...
+ BlockToSwitchDescMap* switchMap = GetSwitchDescMap();
+ SwitchUniqueSuccSet* res = switchMap->LookupPointer(switchBlk);
+ if (res != nullptr)
+ {
+ // If no result, nothing to do. Otherwise, update it.
+ res->UpdateTarget(getAllocator(), switchBlk, from, to);
+ }
+}
+
+/*****************************************************************************
+ * For a block that is in a handler region, find the first block of the most-nested
+ * handler containing the block.
+ */
+BasicBlock* Compiler::fgFirstBlockOfHandler(BasicBlock* block)
+{
+ assert(block->hasHndIndex());
+ return ehGetDsc(block->getHndIndex())->ebdHndBeg;
+}
+
+/*****************************************************************************
+ *
+ * Function called to find back edges and return blocks and mark them as needing GC Polls. This marks all
+ * blocks.
+ */
+void Compiler::fgMarkGCPollBlocks()
+{
+ if (GCPOLL_NONE == opts.compGCPollType)
+ {
+ return;
+ }
+
+#ifdef DEBUG
+ /* Check that the flowgraph data (bbNum, bbRefs, bbPreds) is up-to-date */
+ fgDebugCheckBBlist();
+#endif
+
+ BasicBlock* block;
+
+ // Return blocks always need GC polls. In addition, all back edges (including those from switch
+ // statements) need GC polls. The poll is on the block with the outgoing back edge (or ret), rather than
+ // on the destination or on the edge itself.
+ for (block = fgFirstBB; block; block = block->bbNext)
+ {
+ bool blockNeedsPoll = false;
+ switch (block->bbJumpKind)
+ {
+ case BBJ_COND:
+ case BBJ_ALWAYS:
+ blockNeedsPoll = (block->bbJumpDest->bbNum <= block->bbNum);
+ break;
+
+ case BBJ_RETURN:
+ blockNeedsPoll = true;
+ break;
+
+ case BBJ_SWITCH:
+ unsigned jumpCnt;
+ jumpCnt = block->bbJumpSwt->bbsCount;
+ BasicBlock** jumpTab;
+ jumpTab = block->bbJumpSwt->bbsDstTab;
+
+ do
+ {
+ if ((*jumpTab)->bbNum <= block->bbNum)
+ {
+ blockNeedsPoll = true;
+ break;
+ }
+ } while (++jumpTab, --jumpCnt);
+ break;
+
+ default:
+ break;
+ }
+
+ if (blockNeedsPoll)
+ {
+ block->bbFlags |= BBF_NEEDS_GCPOLL;
+ }
+ }
+}
+
+void Compiler::fgInitBlockVarSets()
+{
+ for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
+ {
+ block->InitVarSets(this);
+ }
+
+ // QMarks are much like blocks, and need their VarSets initialized.
+ assert(!compIsForInlining());
+ for (unsigned i = 0; i < compQMarks->Size(); i++)
+ {
+ GenTreePtr qmark = compQMarks->Get(i);
+ // Perhaps the gtOper of a QMark node was changed to something else since it was created and put on this list.
+ // So can't hurt to check.
+ if (qmark->OperGet() == GT_QMARK)
+ {
+ VarSetOps::AssignAllowUninitRhs(this, qmark->gtQmark.gtThenLiveSet, VarSetOps::UninitVal());
+ VarSetOps::AssignAllowUninitRhs(this, qmark->gtQmark.gtElseLiveSet, VarSetOps::UninitVal());
+ }
+ }
+ fgBBVarSetsInited = true;
+}
+
+/*****************************************************************************
+ *
+ * The following does the final pass on BBF_NEEDS_GCPOLL and then actually creates the GC Polls.
+ */
+void Compiler::fgCreateGCPolls()
+{
+ if (GCPOLL_NONE == opts.compGCPollType)
+ {
+ return;
+ }
+
+ bool createdPollBlocks = false;
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("*************** In fgCreateGCPolls() for %s\n", info.compFullName);
+ }
+#endif // DEBUG
+
+ if (!(opts.MinOpts() || opts.compDbgCode))
+ {
+ // Remove polls from well formed loops with a constant upper bound.
+ for (unsigned lnum = 0; lnum < optLoopCount; ++lnum)
+ {
+ // Look for constant counted loops that run for a short duration. This logic is very similar to
+ // what's in code:Compiler::optUnrollLoops, since they have similar constraints. However, this
+ // logic is much more permissive since we're not doing a complex transformation.
+
+ /* TODO-Cleanup:
+ * I feel bad cloning so much logic from optUnrollLoops
+ */
+
+ // Filter out loops not meeting the obvious preconditions.
+ //
+ if (optLoopTable[lnum].lpFlags & LPFLG_REMOVED)
+ {
+ continue;
+ }
+
+ if (!(optLoopTable[lnum].lpFlags & LPFLG_CONST))
+ {
+ continue;
+ }
+
+ BasicBlock* head = optLoopTable[lnum].lpHead;
+ BasicBlock* bottom = optLoopTable[lnum].lpBottom;
+
+ // Loops dominated by GC_SAFE_POINT won't have this set.
+ if (!(bottom->bbFlags & BBF_NEEDS_GCPOLL))
+ {
+ continue;
+ }
+
+ /* Get the loop data:
+ - initial constant
+ - limit constant
+ - iterator
+ - iterator increment
+ - increment operation type (i.e. ASG_ADD, ASG_SUB, etc...)
+ - loop test type (i.e. GT_GE, GT_LT, etc...)
+ */
+
+ int lbeg = optLoopTable[lnum].lpConstInit;
+ int llim = optLoopTable[lnum].lpConstLimit();
+ genTreeOps testOper = optLoopTable[lnum].lpTestOper();
+
+ int lvar = optLoopTable[lnum].lpIterVar();
+ int iterInc = optLoopTable[lnum].lpIterConst();
+ genTreeOps iterOper = optLoopTable[lnum].lpIterOper();
+
+ var_types iterOperType = optLoopTable[lnum].lpIterOperType();
+ bool unsTest = (optLoopTable[lnum].lpTestTree->gtFlags & GTF_UNSIGNED) != 0;
+ if (lvaTable[lvar].lvAddrExposed)
+ { // Can't reason about the value of the iteration variable.
+ continue;
+ }
+
+ unsigned totalIter;
+
+ /* Find the number of iterations - the function returns false if not a constant number */
+
+ if (!optComputeLoopRep(lbeg, llim, iterInc, iterOper, iterOperType, testOper, unsTest,
+ // The value here doesn't matter for this variation of the optimization
+ true, &totalIter))
+ {
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Could not compute loop iterations for loop from BB%02u to BB%02u", head->bbNum,
+ bottom->bbNum);
+ }
+#endif // DEBUG
+ (void)head; // suppress gcc error.
+
+ continue;
+ }
+
+ /* Forget it if there are too many repetitions or not a constant loop */
+
+ static const unsigned ITER_LIMIT = 256;
+ if (totalIter > ITER_LIMIT)
+ {
+ continue;
+ }
+
+ // It is safe to elminate the poll from this loop.
+ bottom->bbFlags &= ~BBF_NEEDS_GCPOLL;
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Removing poll in block BB%02u because it forms a bounded counted loop\n", bottom->bbNum);
+ }
+#endif // DEBUG
+ }
+ }
+
+ // Final chance to optimize the polls. Move all polls in loops from the bottom of the loop up to the
+ // loop head. Also eliminate all epilog polls in non-leaf methods. This only works if we have dominator
+ // information.
+ if (fgDomsComputed)
+ {
+ for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
+ {
+ if (!(block->bbFlags & BBF_NEEDS_GCPOLL))
+ {
+ continue;
+ }
+
+ if (block->bbJumpKind == BBJ_COND || block->bbJumpKind == BBJ_ALWAYS)
+ {
+ // make sure that this is loop-like
+ if (!fgReachable(block->bbJumpDest, block))
+ {
+ block->bbFlags &= ~BBF_NEEDS_GCPOLL;
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Removing poll in block BB%02u because it is not loop\n", block->bbNum);
+ }
+#endif // DEBUG
+ continue;
+ }
+ }
+ else if (!(block->bbJumpKind == BBJ_RETURN || block->bbJumpKind == BBJ_SWITCH))
+ {
+ noway_assert(!"GC Poll on a block that has no control transfer.");
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Removing poll in block BB%02u because it is not a jump\n", block->bbNum);
+ }
+#endif // DEBUG
+ block->bbFlags &= ~BBF_NEEDS_GCPOLL;
+ continue;
+ }
+
+ // Because of block compaction, it's possible to end up with a block that is both poll and safe.
+ // Clean those up now.
+
+ if (block->bbFlags & BBF_GC_SAFE_POINT)
+ {
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Removing poll in return block BB%02u because it is GC Safe\n", block->bbNum);
+ }
+#endif // DEBUG
+ block->bbFlags &= ~BBF_NEEDS_GCPOLL;
+ continue;
+ }
+
+ if (block->bbJumpKind == BBJ_RETURN)
+ {
+ if (!optReachWithoutCall(fgFirstBB, block))
+ {
+ // check to see if there is a call along the path between the first block and the return
+ // block.
+ block->bbFlags &= ~BBF_NEEDS_GCPOLL;
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Removing poll in return block BB%02u because it dominated by a call\n", block->bbNum);
+ }
+#endif // DEBUG
+ continue;
+ }
+ }
+ }
+ }
+
+ noway_assert(!fgGCPollsCreated);
+ BasicBlock* block;
+ fgGCPollsCreated = true;
+
+ // Walk through the blocks and hunt for a block that has BBF_NEEDS_GCPOLL
+ for (block = fgFirstBB; block; block = block->bbNext)
+ {
+ // Because of block compaction, it's possible to end up with a block that is both poll and safe.
+ // And if !fgDomsComputed, we won't have cleared them, so skip them now
+ if (!(block->bbFlags & BBF_NEEDS_GCPOLL) || (block->bbFlags & BBF_GC_SAFE_POINT))
+ {
+ continue;
+ }
+
+ // This block needs a poll. We either just insert a callout or we split the block and inline part of
+ // the test. This depends on the value of opts.compGCPollType.
+
+ // If we're doing GCPOLL_CALL, just insert a GT_CALL node before the last node in the block.
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef DEBUG
+ switch (block->bbJumpKind)
+ {
+ case BBJ_RETURN:
+ case BBJ_ALWAYS:
+ case BBJ_COND:
+ case BBJ_SWITCH:
+ break;
+ default:
+ noway_assert(!"Unknown block type for BBF_NEEDS_GCPOLL");
+ }
+#endif // DEBUG
+
+ noway_assert(opts.compGCPollType);
+
+ GCPollType pollType = opts.compGCPollType;
+ // pollType is set to either CALL or INLINE at this point. Below is the list of places where we
+ // can't or don't want to emit an inline check. Check all of those. If after all of that we still
+ // have INLINE, then emit an inline check.
+
+ if (opts.MinOpts() || opts.compDbgCode)
+ {
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Selecting CALL poll in block BB%02u because of debug/minopts\n", block->bbNum);
+ }
+#endif // DEBUG
+
+ // Don't split blocks and create inlined polls unless we're optimizing.
+ pollType = GCPOLL_CALL;
+ }
+ else if (genReturnBB == block)
+ {
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Selecting CALL poll in block BB%02u because it is the single return block\n", block->bbNum);
+ }
+#endif // DEBUG
+
+ // we don't want to split the single return block
+ pollType = GCPOLL_CALL;
+ }
+ else if (BBJ_SWITCH == block->bbJumpKind)
+ {
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Selecting CALL poll in block BB%02u because it is a loop formed by a SWITCH\n", block->bbNum);
+ }
+#endif // DEBUG
+
+ // I don't want to deal with all the outgoing edges of a switch block.
+ pollType = GCPOLL_CALL;
+ }
+
+ // TODO-Cleanup: potentially don't split if we're in an EH region.
+
+ createdPollBlocks |= fgCreateGCPoll(pollType, block);
+ }
+
+ // If we split a block to create a GC Poll, then rerun fgReorderBlocks to push the rarely run blocks out
+ // past the epilog. We should never split blocks unless we're optimizing.
+ if (createdPollBlocks)
+ {
+ noway_assert(!opts.MinOpts() && !opts.compDbgCode);
+ fgReorderBlocks();
+ }
+}
+
+/*****************************************************************************
+ *
+ * Actually create a GCPoll in the given block. Returns true if it created
+ * a basic block.
+ */
+
+bool Compiler::fgCreateGCPoll(GCPollType pollType, BasicBlock* block)
+{
+ assert(!(block->bbFlags & BBF_GC_SAFE_POINT));
+ bool createdPollBlocks;
+
+ void* addrTrap;
+ void* pAddrOfCaptureThreadGlobal;
+
+ addrTrap = info.compCompHnd->getAddrOfCaptureThreadGlobal(&pAddrOfCaptureThreadGlobal);
+
+#ifdef ENABLE_FAST_GCPOLL_HELPER
+ // I never want to split blocks if we've got two indirections here.
+ // This is a size trade-off assuming the VM has ENABLE_FAST_GCPOLL_HELPER.
+ // So don't do it when that is off
+ if (pAddrOfCaptureThreadGlobal != NULL)
+ {
+ pollType = GCPOLL_CALL;
+ }
+#endif // ENABLE_FAST_GCPOLL_HELPER
+
+ if (GCPOLL_CALL == pollType)
+ {
+ createdPollBlocks = false;
+ GenTreePtr tree = gtNewHelperCallNode(CORINFO_HELP_POLL_GC, TYP_VOID);
+#if GTF_CALL_REG_SAVE
+ tree->gtCall.gtCallMoreFlags |= GTF_CALL_REG_SAVE;
+#endif // GTF_CALL_REG_SAVE
+
+ // for BBJ_ALWAYS I don't need to insert it before the condition. Just append it.
+ if (block->bbJumpKind == BBJ_ALWAYS)
+ {
+ fgInsertStmtAtEnd(block, tree);
+ }
+ else
+ {
+ GenTreeStmt* newStmt = fgInsertStmtNearEnd(block, tree);
+ // For DDB156656, we need to associate the GC Poll with the IL offset (and therefore sequence
+ // point) of the tree before which we inserted the poll. One example of when this is a
+ // problem:
+ // if (...) { //1
+ // ...
+ // } //2
+ // else { //3
+ // ...
+ // }
+ // (gcpoll) //4
+ // return. //5
+ //
+ // If we take the if statement at 1, we encounter a jump at 2. This jumps over the else
+ // and lands at 4. 4 is where we inserted the gcpoll. However, that is associated with
+ // the sequence point a 3. Therefore, the debugger displays the wrong source line at the
+ // gc poll location.
+ //
+ // More formally, if control flow targets an instruction, that instruction must be the
+ // start of a new sequence point.
+ if (newStmt->gtNext)
+ {
+ // Is it possible for gtNext to be NULL?
+ noway_assert(newStmt->gtNext->gtOper == GT_STMT);
+ newStmt->gtStmtILoffsx = newStmt->gtNextStmt->gtStmtILoffsx;
+ }
+ }
+
+ block->bbFlags |= BBF_GC_SAFE_POINT;
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("*** creating GC Poll in block BB%02u\n", block->bbNum);
+ gtDispTreeList(block->bbTreeList);
+ }
+#endif // DEBUG
+ }
+ else
+ {
+ createdPollBlocks = true;
+ // if we're doing GCPOLL_INLINE, then:
+ // 1) Create two new blocks: Poll and Bottom. The original block is called Top.
+
+ // I want to create:
+ // top -> poll -> bottom (lexically)
+ // so that we jump over poll to get to bottom.
+ BasicBlock* top = block;
+ BasicBlock* poll = fgNewBBafter(BBJ_NONE, top, true);
+ BasicBlock* bottom = fgNewBBafter(top->bbJumpKind, poll, true);
+ BBjumpKinds oldJumpKind = top->bbJumpKind;
+
+ // Update block flags
+ unsigned originalFlags;
+ originalFlags = top->bbFlags | BBF_GC_SAFE_POINT;
+
+ // Unlike Fei's inliner from puclr, I'm allowed to split loops.
+ // And we keep a few other flags...
+ noway_assert((originalFlags & (BBF_SPLIT_NONEXIST & ~(BBF_LOOP_HEAD | BBF_LOOP_CALL0 | BBF_LOOP_CALL1))) == 0);
+ top->bbFlags = originalFlags & (~BBF_SPLIT_LOST | BBF_GC_SAFE_POINT);
+ bottom->bbFlags |= originalFlags & (BBF_SPLIT_GAINED | BBF_IMPORTED | BBF_GC_SAFE_POINT);
+ bottom->inheritWeight(top);
+ poll->bbFlags |= originalFlags & (BBF_SPLIT_GAINED | BBF_IMPORTED | BBF_GC_SAFE_POINT);
+
+ // 9) Mark Poll as rarely run.
+ poll->bbSetRunRarely();
+
+ // 5) Bottom gets all the outgoing edges and inherited flags of Original.
+ bottom->bbJumpDest = top->bbJumpDest;
+
+ // 2) Add a GC_CALL node to Poll.
+ GenTreePtr tree = gtNewHelperCallNode(CORINFO_HELP_POLL_GC, TYP_VOID);
+#if GTF_CALL_REG_SAVE
+ tree->gtCall.gtCallMoreFlags |= GTF_CALL_REG_SAVE;
+#endif // GTF_CALL_REG_SAVE
+ fgInsertStmtAtEnd(poll, tree);
+
+ // 3) Remove the last statement from Top and add it to Bottom.
+ if (oldJumpKind != BBJ_ALWAYS)
+ {
+ // if I'm always jumping to the target, then this is not a condition that needs moving.
+ GenTreeStmt* stmt = top->firstStmt();
+ while (stmt->gtNext)
+ {
+ stmt = stmt->gtNextStmt;
+ }
+ fgRemoveStmt(top, stmt);
+ fgInsertStmtAtEnd(bottom, stmt);
+ }
+
+ // for BBJ_ALWAYS blocks, bottom is an empty block.
+
+ // 4) Create a GT_EQ node that checks against g_TrapReturningThreads. True jumps to Bottom,
+ // false falls through to poll. Add this to the end of Top. Top is now BBJ_COND. Bottom is
+ // now a jump target
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef ENABLE_FAST_GCPOLL_HELPER
+ // Prefer the fast gc poll helepr over the double indirection
+ noway_assert(pAddrOfCaptureThreadGlobal == nullptr);
+#endif
+
+ GenTreePtr trap;
+ if (pAddrOfCaptureThreadGlobal != nullptr)
+ {
+ trap = gtNewOperNode(GT_IND, TYP_I_IMPL,
+ gtNewIconHandleNode((size_t)pAddrOfCaptureThreadGlobal, GTF_ICON_PTR_HDL));
+ }
+ else
+ {
+ trap = gtNewIconHandleNode((size_t)addrTrap, GTF_ICON_PTR_HDL);
+ }
+
+ GenTreePtr trapRelop = gtNewOperNode(GT_EQ, TYP_INT,
+ // lhs [g_TrapReturningThreads]
+ gtNewOperNode(GT_IND, TYP_INT, trap),
+ // rhs 0
+ gtNewIconNode(0, TYP_INT));
+ trapRelop->gtFlags |= GTF_RELOP_JMP_USED | GTF_DONT_CSE; // Treat reading g_TrapReturningThreads as volatile.
+ GenTreePtr trapCheck = gtNewOperNode(GT_JTRUE, TYP_VOID, trapRelop);
+ fgInsertStmtAtEnd(top, trapCheck);
+ top->bbJumpDest = bottom;
+ top->bbJumpKind = BBJ_COND;
+ bottom->bbFlags |= BBF_JMP_TARGET;
+
+ // 7) Bottom has Top and Poll as its predecessors. Poll has just Top as a predecessor.
+ fgAddRefPred(bottom, poll);
+ fgAddRefPred(bottom, top);
+ fgAddRefPred(poll, top);
+
+ // 8) Replace Top with Bottom in the predecessor list of all outgoing edges from Bottom (1 for
+ // jumps, 2 for conditional branches, N for switches).
+ switch (oldJumpKind)
+ {
+ case BBJ_RETURN:
+ // no successors
+ break;
+ case BBJ_COND:
+ // replace predecessor in the fall through block.
+ noway_assert(bottom->bbNext);
+ fgReplacePred(bottom->bbNext, top, bottom);
+
+ // fall through for the jump target
+ __fallthrough;
+
+ case BBJ_ALWAYS:
+ fgReplacePred(bottom->bbJumpDest, top, bottom);
+ break;
+ case BBJ_SWITCH:
+ NO_WAY("SWITCH should be a call rather than an inlined poll.");
+ break;
+ default:
+ NO_WAY("Unknown block type for updating predecessor lists.");
+ }
+
+ top->bbFlags &= ~BBF_NEEDS_GCPOLL;
+ noway_assert(!(poll->bbFlags & BBF_NEEDS_GCPOLL));
+ noway_assert(!(bottom->bbFlags & BBF_NEEDS_GCPOLL));
+
+ if (compCurBB == top)
+ {
+ compCurBB = bottom;
+ }
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("*** creating inlined GC Poll in top block BB%02u\n", top->bbNum);
+ gtDispTreeList(top->bbTreeList);
+ printf(" poll block is BB%02u\n", poll->bbNum);
+ gtDispTreeList(poll->bbTreeList);
+ printf(" bottom block is BB%02u\n", bottom->bbNum);
+ gtDispTreeList(bottom->bbTreeList);
+ }
+#endif // DEBUG
+ }
+
+ return createdPollBlocks;
+}
+
+/*****************************************************************************
+ *
+ * The following helps find a basic block given its PC offset.
+ */
+
+void Compiler::fgInitBBLookup()
+{
+ BasicBlock** dscBBptr;
+ BasicBlock* tmpBBdesc;
+
+ /* Allocate the basic block table */
+
+ dscBBptr = fgBBs = new (this, CMK_BasicBlock) BasicBlock*[fgBBcount];
+
+ /* Walk all the basic blocks, filling in the table */
+
+ for (tmpBBdesc = fgFirstBB; tmpBBdesc; tmpBBdesc = tmpBBdesc->bbNext)
+ {
+ *dscBBptr++ = tmpBBdesc;
+ }
+
+ noway_assert(dscBBptr == fgBBs + fgBBcount);
+}
+
+BasicBlock* Compiler::fgLookupBB(unsigned addr)
+{
+ unsigned lo;
+ unsigned hi;
+
+ /* Do a binary search */
+
+ for (lo = 0, hi = fgBBcount - 1;;)
+ {
+
+ AGAIN:;
+
+ if (lo > hi)
+ {
+ break;
+ }
+
+ unsigned mid = (lo + hi) / 2;
+ BasicBlock* dsc = fgBBs[mid];
+
+ // We introduce internal blocks for BBJ_CALLFINALLY. Skip over these.
+
+ while (dsc->bbFlags & BBF_INTERNAL)
+ {
+ dsc = dsc->bbNext;
+ mid++;
+
+ // We skipped over too many, Set hi back to the original mid - 1
+
+ if (mid > hi)
+ {
+ mid = (lo + hi) / 2;
+ hi = mid - 1;
+ goto AGAIN;
+ }
+ }
+
+ unsigned pos = dsc->bbCodeOffs;
+
+ if (pos < addr)
+ {
+ if ((lo == hi) && (lo == (fgBBcount - 1)))
+ {
+ noway_assert(addr == dsc->bbCodeOffsEnd);
+ return nullptr; // NULL means the end of method
+ }
+ lo = mid + 1;
+ continue;
+ }
+
+ if (pos > addr)
+ {
+ hi = mid - 1;
+ continue;
+ }
+
+ return dsc;
+ }
+#ifdef DEBUG
+ printf("ERROR: Couldn't find basic block at offset %04X\n", addr);
+#endif // DEBUG
+ NO_WAY("fgLookupBB failed.");
+}
+
+/*****************************************************************************
+ *
+ * The 'jump target' array uses the following flags to indicate what kind
+ * of label is present.
+ */
+
+#define JT_NONE 0x00 // This IL offset is never used
+#define JT_ADDR 0x01 // merely make sure this is an OK address
+#define JT_JUMP 0x02 // 'normal' jump target
+#define JT_MULTI 0x04 // target of multiple jumps
+
+inline void Compiler::fgMarkJumpTarget(BYTE* jumpTarget, unsigned offs)
+{
+ /* Make sure we set JT_MULTI if target of multiple jumps */
+
+ noway_assert(JT_MULTI == JT_JUMP << 1);
+
+ jumpTarget[offs] |= (jumpTarget[offs] & JT_JUMP) << 1 | JT_JUMP;
+}
+
+//------------------------------------------------------------------------
+// FgStack: simple stack model for the inlinee's evaluation stack.
+//
+// Model the inputs available to various operations in the inline body.
+// Tracks constants, arguments, array lengths.
+
+class FgStack
+{
+public:
+ FgStack() : slot0(SLOT_INVALID), slot1(SLOT_INVALID), depth(0)
+ {
+ // Empty
+ }
+
+ void Clear()
+ {
+ depth = 0;
+ }
+ void PushUnknown()
+ {
+ Push(SLOT_UNKNOWN);
+ }
+ void PushConstant()
+ {
+ Push(SLOT_CONSTANT);
+ }
+ void PushArrayLen()
+ {
+ Push(SLOT_ARRAYLEN);
+ }
+ void PushArgument(unsigned arg)
+ {
+ Push(SLOT_ARGUMENT + arg);
+ }
+ unsigned GetSlot0() const
+ {
+ assert(depth >= 1);
+ return slot0;
+ }
+ unsigned GetSlot1() const
+ {
+ assert(depth >= 2);
+ return slot1;
+ }
+ static bool IsConstant(unsigned value)
+ {
+ return value == SLOT_CONSTANT;
+ }
+ static bool IsArrayLen(unsigned value)
+ {
+ return value == SLOT_ARRAYLEN;
+ }
+ static bool IsArgument(unsigned value)
+ {
+ return value >= SLOT_ARGUMENT;
+ }
+ static unsigned SlotTypeToArgNum(unsigned value)
+ {
+ assert(IsArgument(value));
+ return value - SLOT_ARGUMENT;
+ }
+ bool IsStackTwoDeep() const
+ {
+ return depth == 2;
+ }
+ bool IsStackOneDeep() const
+ {
+ return depth == 1;
+ }
+ bool IsStackAtLeastOneDeep() const
+ {
+ return depth >= 1;
+ }
+
+private:
+ enum
+ {
+ SLOT_INVALID = UINT_MAX,
+ SLOT_UNKNOWN = 0,
+ SLOT_CONSTANT = 1,
+ SLOT_ARRAYLEN = 2,
+ SLOT_ARGUMENT = 3
+ };
+
+ void Push(int type)
+ {
+ switch (depth)
+ {
+ case 0:
+ ++depth;
+ slot0 = type;
+ break;
+ case 1:
+ ++depth;
+ __fallthrough;
+ case 2:
+ slot1 = slot0;
+ slot0 = type;
+ }
+ }
+
+ unsigned slot0;
+ unsigned slot1;
+ unsigned depth;
+};
+
+//------------------------------------------------------------------------
+// fgFindJumpTargets: walk the IL stream, determining jump target offsets
+//
+// Arguments:
+// codeAddr - base address of the IL code buffer
+// codeSize - number of bytes in the IL code buffer
+// jumpTarget - [OUT] byte array for flagging jump targets
+//
+// Notes:
+// If inlining or prejitting the root, this method also makes
+// various observations about the method that factor into inline
+// decisions.
+//
+// May throw an exception if the IL is malformed.
+//
+// jumpTarget[N] is set to a JT_* value if IL offset N is a
+// jump target in the method.
+//
+// Also sets lvAddrExposed and lvArgWrite in lvaTable[].
+
+#ifdef _PREFAST_
+#pragma warning(push)
+#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
+#endif
+
+void Compiler::fgFindJumpTargets(const BYTE* codeAddr, IL_OFFSET codeSize, BYTE* jumpTarget)
+{
+ const BYTE* codeBegp = codeAddr;
+ const BYTE* codeEndp = codeAddr + codeSize;
+ unsigned varNum;
+ bool seenJump = false;
+ var_types varType = DUMMY_INIT(TYP_UNDEF); // TYP_ type
+ typeInfo ti; // Verifier type.
+ bool typeIsNormed = false;
+ FgStack pushedStack;
+ const bool isForceInline = (info.compFlags & CORINFO_FLG_FORCEINLINE) != 0;
+ const bool makeInlineObservations = (compInlineResult != nullptr);
+ const bool isInlining = compIsForInlining();
+
+ if (makeInlineObservations)
+ {
+ // Observe force inline state and code size.
+ compInlineResult->NoteBool(InlineObservation::CALLEE_IS_FORCE_INLINE, isForceInline);
+ compInlineResult->NoteInt(InlineObservation::CALLEE_IL_CODE_SIZE, codeSize);
+
+#ifdef DEBUG
+
+ // If inlining, this method should still be a candidate.
+ if (isInlining)
+ {
+ assert(compInlineResult->IsCandidate());
+ }
+
+#endif // DEBUG
+
+ // note that we're starting to look at the opcodes.
+ compInlineResult->Note(InlineObservation::CALLEE_BEGIN_OPCODE_SCAN);
+ }
+
+ while (codeAddr < codeEndp)
+ {
+ OPCODE opcode = (OPCODE)getU1LittleEndian(codeAddr);
+ codeAddr += sizeof(__int8);
+ opts.instrCount++;
+ typeIsNormed = false;
+
+ DECODE_OPCODE:
+
+ if (opcode >= CEE_COUNT)
+ {
+ BADCODE3("Illegal opcode", ": %02X", (int)opcode);
+ }
+
+ if ((opcode >= CEE_LDARG_0 && opcode <= CEE_STLOC_S) || (opcode >= CEE_LDARG && opcode <= CEE_STLOC))
+ {
+ opts.lvRefCount++;
+ }
+
+ if (makeInlineObservations && (opcode >= CEE_LDNULL) && (opcode <= CEE_LDC_R8))
+ {
+ pushedStack.PushConstant();
+ }
+
+ unsigned sz = opcodeSizes[opcode];
+
+ switch (opcode)
+ {
+ case CEE_PREFIX1:
+ {
+ if (codeAddr >= codeEndp)
+ {
+ goto TOO_FAR;
+ }
+ opcode = (OPCODE)(256 + getU1LittleEndian(codeAddr));
+ codeAddr += sizeof(__int8);
+ goto DECODE_OPCODE;
+ }
+
+ case CEE_PREFIX2:
+ case CEE_PREFIX3:
+ case CEE_PREFIX4:
+ case CEE_PREFIX5:
+ case CEE_PREFIX6:
+ case CEE_PREFIX7:
+ case CEE_PREFIXREF:
+ {
+ BADCODE3("Illegal opcode", ": %02X", (int)opcode);
+ }
+
+ case CEE_CALL:
+ case CEE_CALLVIRT:
+ {
+ // There has to be code after the call, otherwise the inlinee is unverifiable.
+ if (isInlining)
+ {
+
+ noway_assert(codeAddr < codeEndp - sz);
+ }
+
+ // If the method has a call followed by a ret, assume that
+ // it is a wrapper method.
+ if (makeInlineObservations)
+ {
+ if ((OPCODE)getU1LittleEndian(codeAddr + sz) == CEE_RET)
+ {
+ compInlineResult->Note(InlineObservation::CALLEE_LOOKS_LIKE_WRAPPER);
+ }
+ }
+ }
+ break;
+
+ case CEE_LEAVE:
+ case CEE_LEAVE_S:
+ case CEE_BR:
+ case CEE_BR_S:
+ case CEE_BRFALSE:
+ case CEE_BRFALSE_S:
+ case CEE_BRTRUE:
+ case CEE_BRTRUE_S:
+ case CEE_BEQ:
+ case CEE_BEQ_S:
+ case CEE_BGE:
+ case CEE_BGE_S:
+ case CEE_BGE_UN:
+ case CEE_BGE_UN_S:
+ case CEE_BGT:
+ case CEE_BGT_S:
+ case CEE_BGT_UN:
+ case CEE_BGT_UN_S:
+ case CEE_BLE:
+ case CEE_BLE_S:
+ case CEE_BLE_UN:
+ case CEE_BLE_UN_S:
+ case CEE_BLT:
+ case CEE_BLT_S:
+ case CEE_BLT_UN:
+ case CEE_BLT_UN_S:
+ case CEE_BNE_UN:
+ case CEE_BNE_UN_S:
+ {
+ seenJump = true;
+
+ if (codeAddr > codeEndp - sz)
+ {
+ goto TOO_FAR;
+ }
+
+ // Compute jump target address
+ signed jmpDist = (sz == 1) ? getI1LittleEndian(codeAddr) : getI4LittleEndian(codeAddr);
+
+ if (compIsForInlining() && jmpDist == 0 &&
+ (opcode == CEE_LEAVE || opcode == CEE_LEAVE_S || opcode == CEE_BR || opcode == CEE_BR_S))
+ {
+ break; /* NOP */
+ }
+
+ unsigned jmpAddr = (IL_OFFSET)(codeAddr - codeBegp) + sz + jmpDist;
+
+ // Make sure target is reasonable
+ if (jmpAddr >= codeSize)
+ {
+ BADCODE3("code jumps to outer space", " at offset %04X", (IL_OFFSET)(codeAddr - codeBegp));
+ }
+
+ // Mark the jump target
+ fgMarkJumpTarget(jumpTarget, jmpAddr);
+
+ // See if jump might be sensitive to inlining
+ if (makeInlineObservations && (opcode != CEE_BR_S) && (opcode != CEE_BR))
+ {
+ fgObserveInlineConstants(opcode, pushedStack, isInlining);
+ }
+ }
+ break;
+
+ case CEE_SWITCH:
+ {
+ seenJump = true;
+
+ if (makeInlineObservations)
+ {
+ compInlineResult->Note(InlineObservation::CALLEE_HAS_SWITCH);
+
+ // Fail fast, if we're inlining and can't handle this.
+ if (isInlining && compInlineResult->IsFailure())
+ {
+ return;
+ }
+ }
+
+ // Make sure we don't go past the end reading the number of cases
+ if (codeAddr > codeEndp - sizeof(DWORD))
+ {
+ goto TOO_FAR;
+ }
+
+ // Read the number of cases
+ unsigned jmpCnt = getU4LittleEndian(codeAddr);
+ codeAddr += sizeof(DWORD);
+
+ if (jmpCnt > codeSize / sizeof(DWORD))
+ {
+ goto TOO_FAR;
+ }
+
+ // Find the end of the switch table
+ unsigned jmpBase = (unsigned)((codeAddr - codeBegp) + jmpCnt * sizeof(DWORD));
+
+ // Make sure there is more code after the switch
+ if (jmpBase >= codeSize)
+ {
+ goto TOO_FAR;
+ }
+
+ // jmpBase is also the target of the default case, so mark it
+ fgMarkJumpTarget(jumpTarget, jmpBase);
+
+ // Process table entries
+ while (jmpCnt > 0)
+ {
+ unsigned jmpAddr = jmpBase + getI4LittleEndian(codeAddr);
+ codeAddr += 4;
+
+ if (jmpAddr >= codeSize)
+ {
+ BADCODE3("jump target out of range", " at offset %04X", (IL_OFFSET)(codeAddr - codeBegp));
+ }
+
+ fgMarkJumpTarget(jumpTarget, jmpAddr);
+ jmpCnt--;
+ }
+
+ // We've advanced past all the bytes in this instruction
+ sz = 0;
+ }
+ break;
+
+ case CEE_UNALIGNED:
+ case CEE_CONSTRAINED:
+ case CEE_READONLY:
+ case CEE_VOLATILE:
+ case CEE_TAILCALL:
+ {
+ if (codeAddr >= codeEndp)
+ {
+ goto TOO_FAR;
+ }
+ }
+ break;
+
+ case CEE_STARG:
+ case CEE_STARG_S:
+ {
+ noway_assert(sz == sizeof(BYTE) || sz == sizeof(WORD));
+
+ if (codeAddr > codeEndp - sz)
+ {
+ goto TOO_FAR;
+ }
+
+ varNum = (sz == sizeof(BYTE)) ? getU1LittleEndian(codeAddr) : getU2LittleEndian(codeAddr);
+ varNum = compMapILargNum(varNum); // account for possible hidden param
+
+ // This check is only intended to prevent an AV. Bad varNum values will later
+ // be handled properly by the verifier.
+ if (varNum < lvaTableCnt)
+ {
+ if (isInlining)
+ {
+ impInlineInfo->inlArgInfo[varNum].argHasStargOp = true;
+ }
+ else
+ {
+ // In non-inline cases, note written-to locals.
+ lvaTable[varNum].lvArgWrite = 1;
+ }
+ }
+ }
+ break;
+
+ case CEE_LDARGA:
+ case CEE_LDARGA_S:
+ case CEE_LDLOCA:
+ case CEE_LDLOCA_S:
+ {
+ // Handle address-taken args or locals
+ noway_assert(sz == sizeof(BYTE) || sz == sizeof(WORD));
+
+ if (codeAddr > codeEndp - sz)
+ {
+ goto TOO_FAR;
+ }
+
+ varNum = (sz == sizeof(BYTE)) ? getU1LittleEndian(codeAddr) : getU2LittleEndian(codeAddr);
+
+ if (isInlining)
+ {
+ if (opcode == CEE_LDLOCA || opcode == CEE_LDLOCA_S)
+ {
+ varType = impInlineInfo->lclVarInfo[varNum + impInlineInfo->argCnt].lclTypeInfo;
+ ti = impInlineInfo->lclVarInfo[varNum + impInlineInfo->argCnt].lclVerTypeInfo;
+
+ impInlineInfo->lclVarInfo[varNum + impInlineInfo->argCnt].lclHasLdlocaOp = true;
+ }
+ else
+ {
+ noway_assert(opcode == CEE_LDARGA || opcode == CEE_LDARGA_S);
+
+ varType = impInlineInfo->lclVarInfo[varNum].lclTypeInfo;
+ ti = impInlineInfo->lclVarInfo[varNum].lclVerTypeInfo;
+
+ impInlineInfo->inlArgInfo[varNum].argHasLdargaOp = true;
+
+ pushedStack.PushArgument(varNum);
+ }
+ }
+ else
+ {
+ if (opcode == CEE_LDLOCA || opcode == CEE_LDLOCA_S)
+ {
+ if (varNum >= info.compMethodInfo->locals.numArgs)
+ {
+ BADCODE("bad local number");
+ }
+
+ varNum += info.compArgsCount;
+ }
+ else
+ {
+ noway_assert(opcode == CEE_LDARGA || opcode == CEE_LDARGA_S);
+
+ if (varNum >= info.compILargsCount)
+ {
+ BADCODE("bad argument number");
+ }
+
+ varNum = compMapILargNum(varNum); // account for possible hidden param
+ }
+
+ varType = (var_types)lvaTable[varNum].lvType;
+ ti = lvaTable[varNum].lvVerTypeInfo;
+
+ // Determine if the next instruction will consume
+ // the address. If so we won't mark this var as
+ // address taken.
+ //
+ // We will put structs on the stack and changing
+ // the addrTaken of a local requires an extra pass
+ // in the morpher so we won't apply this
+ // optimization to structs.
+ //
+ // Debug code spills for every IL instruction, and
+ // therefore it will split statements, so we will
+ // need the address. Note that this optimization
+ // is based in that we know what trees we will
+ // generate for this ldfld, and we require that we
+ // won't need the address of this local at all
+ noway_assert(varNum < lvaTableCnt);
+
+ const bool notStruct = !varTypeIsStruct(&lvaTable[varNum]);
+ const bool notLastInstr = (codeAddr < codeEndp - sz);
+ const bool notDebugCode = !opts.compDbgCode;
+
+ if (notStruct && notLastInstr && notDebugCode &&
+ impILConsumesAddr(codeAddr + sz, impTokenLookupContextHandle, info.compScopeHnd))
+ {
+ // We can skip the addrtaken, as next IL instruction consumes
+ // the address.
+ }
+ else
+ {
+ lvaTable[varNum].lvHasLdAddrOp = 1;
+ if (!info.compIsStatic && (varNum == 0))
+ {
+ // Addr taken on "this" pointer is significant,
+ // go ahead to mark it as permanently addr-exposed here.
+ lvaSetVarAddrExposed(0);
+ // This may be conservative, but probably not very.
+ }
+ }
+ } // isInlining
+
+ typeIsNormed = ti.IsValueClass() && !varTypeIsStruct(varType);
+ }
+ break;
+
+ case CEE_JMP:
+
+#if !defined(_TARGET_X86_) && !defined(_TARGET_ARM_)
+ if (!isInlining)
+ {
+ // We transform this into a set of ldarg's + tail call and
+ // thus may push more onto the stack than originally thought.
+ // This doesn't interfere with verification because CEE_JMP
+ // is never verifiable, and there's nothing unsafe you can
+ // do with a an IL stack overflow if the JIT is expecting it.
+ info.compMaxStack = max(info.compMaxStack, info.compILargsCount);
+ break;
+ }
+#endif // !_TARGET_X86_ && !_TARGET_ARM_
+
+ // If we are inlining, we need to fail for a CEE_JMP opcode, just like
+ // the list of other opcodes (for all platforms).
+
+ __fallthrough;
+
+ case CEE_CALLI:
+ case CEE_LOCALLOC:
+ case CEE_MKREFANY:
+ case CEE_RETHROW:
+ // CEE_CALLI should not be inlined because the JIT cannot generate an inlined call frame. If the call
+ // target
+ // is a no-marshal CALLI P/Invoke we end up calling the IL stub. We don't NGEN these stubs, so we'll
+ // have to
+ // JIT an IL stub for a trivial func. It's almost certainly a better choice to leave out the inline
+ // candidate so we can generate an inlined call frame. It might be nice to call getCallInfo to figure
+ // out
+ // what kind of call we have here.
+
+ // Consider making this only for not force inline.
+ if (makeInlineObservations)
+ {
+ // Arguably this should be NoteFatal, but the legacy behavior is
+ // to ignore this for the prejit root.
+ compInlineResult->Note(InlineObservation::CALLEE_UNSUPPORTED_OPCODE);
+
+ // Fail fast if we're inlining...
+ if (isInlining)
+ {
+ assert(compInlineResult->IsFailure());
+ return;
+ }
+ }
+ break;
+
+ case CEE_LDARG_0:
+ case CEE_LDARG_1:
+ case CEE_LDARG_2:
+ case CEE_LDARG_3:
+ if (makeInlineObservations)
+ {
+ pushedStack.PushArgument(opcode - CEE_LDARG_0);
+ }
+ break;
+
+ case CEE_LDARG_S:
+ case CEE_LDARG:
+ {
+ if (codeAddr > codeEndp - sz)
+ {
+ goto TOO_FAR;
+ }
+
+ varNum = (sz == sizeof(BYTE)) ? getU1LittleEndian(codeAddr) : getU2LittleEndian(codeAddr);
+
+ if (makeInlineObservations)
+ {
+ pushedStack.PushArgument(varNum);
+ }
+ }
+ break;
+
+ case CEE_LDLEN:
+ if (makeInlineObservations)
+ {
+ pushedStack.PushArrayLen();
+ }
+ break;
+
+ case CEE_CEQ:
+ case CEE_CGT:
+ case CEE_CGT_UN:
+ case CEE_CLT:
+ case CEE_CLT_UN:
+ if (makeInlineObservations)
+ {
+ fgObserveInlineConstants(opcode, pushedStack, isInlining);
+ }
+ break;
+
+ default:
+ break;
+ }
+
+ // Skip any remaining operands this opcode may have
+ codeAddr += sz;
+
+ // Note the opcode we just saw
+ if (makeInlineObservations)
+ {
+ InlineObservation obs =
+ typeIsNormed ? InlineObservation::CALLEE_OPCODE_NORMED : InlineObservation::CALLEE_OPCODE;
+ compInlineResult->NoteInt(obs, opcode);
+ }
+ }
+
+ if (codeAddr != codeEndp)
+ {
+ TOO_FAR:
+ BADCODE3("Code ends in the middle of an opcode, or there is a branch past the end of the method",
+ " at offset %04X", (IL_OFFSET)(codeAddr - codeBegp));
+ }
+
+ if (makeInlineObservations)
+ {
+ compInlineResult->Note(InlineObservation::CALLEE_END_OPCODE_SCAN);
+
+ // If the inline is viable and discretionary, do the
+ // profitability screening.
+ if (compInlineResult->IsDiscretionaryCandidate())
+ {
+ // Make some callsite specific observations that will feed
+ // into the profitability model.
+ impMakeDiscretionaryInlineObservations(impInlineInfo, compInlineResult);
+
+ // None of those observations should have changed the
+ // inline's viability.
+ assert(compInlineResult->IsCandidate());
+
+ if (isInlining)
+ {
+ // Assess profitability...
+ CORINFO_METHOD_INFO* methodInfo = &impInlineInfo->inlineCandidateInfo->methInfo;
+ compInlineResult->DetermineProfitability(methodInfo);
+
+ if (compInlineResult->IsFailure())
+ {
+ impInlineRoot()->m_inlineStrategy->NoteUnprofitable();
+ JITDUMP("\n\nInline expansion aborted, inline not profitable\n");
+ return;
+ }
+ else
+ {
+ // The inline is still viable.
+ assert(compInlineResult->IsCandidate());
+ }
+ }
+ else
+ {
+ // Prejit root case. Profitability assessment for this
+ // is done over in compCompileHelper.
+ }
+ }
+ }
+
+ // None of the local vars in the inlinee should have address taken or been written to.
+ // Therefore we should NOT need to enter this "if" statement.
+ if (!isInlining && !info.compIsStatic)
+ {
+ fgAdjustForAddressExposedOrWrittenThis();
+ }
+}
+
+#ifdef _PREFAST_
+#pragma warning(pop)
+#endif
+
+//------------------------------------------------------------------------
+// fgAdjustForAddressExposedOrWrittenThis: update var table for cases
+// where the this pointer value can change.
+//
+// Notes:
+// Modifies lvaArg0Var to refer to a temp if the value of 'this' can
+// change. The original this (info.compThisArg) then remains
+// unmodified in the method. fgAddInternal is reponsible for
+// adding the code to copy the initial this into the temp.
+
+void Compiler::fgAdjustForAddressExposedOrWrittenThis()
+{
+ // Optionally enable adjustment during stress.
+ if (!tiVerificationNeeded && compStressCompile(STRESS_GENERIC_VARN, 15))
+ {
+ lvaTable[info.compThisArg].lvArgWrite = true;
+ }
+
+ // If this is exposed or written to, create a temp for the modifiable this
+ if (lvaTable[info.compThisArg].lvAddrExposed || lvaTable[info.compThisArg].lvArgWrite)
+ {
+ // If there is a "ldarga 0" or "starg 0", grab and use the temp.
+ lvaArg0Var = lvaGrabTemp(false DEBUGARG("Address-exposed, or written this pointer"));
+ noway_assert(lvaArg0Var > (unsigned)info.compThisArg);
+ lvaTable[lvaArg0Var].lvType = lvaTable[info.compThisArg].TypeGet();
+ lvaTable[lvaArg0Var].lvAddrExposed = lvaTable[info.compThisArg].lvAddrExposed;
+ lvaTable[lvaArg0Var].lvDoNotEnregister = lvaTable[info.compThisArg].lvDoNotEnregister;
+#ifdef DEBUG
+ lvaTable[lvaArg0Var].lvVMNeedsStackAddr = lvaTable[info.compThisArg].lvVMNeedsStackAddr;
+ lvaTable[lvaArg0Var].lvLiveInOutOfHndlr = lvaTable[info.compThisArg].lvLiveInOutOfHndlr;
+ lvaTable[lvaArg0Var].lvLclFieldExpr = lvaTable[info.compThisArg].lvLclFieldExpr;
+ lvaTable[lvaArg0Var].lvLiveAcrossUCall = lvaTable[info.compThisArg].lvLiveAcrossUCall;
+#endif
+ lvaTable[lvaArg0Var].lvArgWrite = lvaTable[info.compThisArg].lvArgWrite;
+ lvaTable[lvaArg0Var].lvVerTypeInfo = lvaTable[info.compThisArg].lvVerTypeInfo;
+
+ // Clear the TI_FLAG_THIS_PTR in the original 'this' pointer.
+ noway_assert(lvaTable[lvaArg0Var].lvVerTypeInfo.IsThisPtr());
+ lvaTable[info.compThisArg].lvVerTypeInfo.ClearThisPtr();
+ lvaTable[info.compThisArg].lvAddrExposed = false;
+ lvaTable[info.compThisArg].lvArgWrite = false;
+ }
+}
+
+//------------------------------------------------------------------------
+// fgObserveInlineConstants: look for operations that might get optimized
+// if this method were to be inlined, and report these to the inliner.
+//
+// Arguments:
+// opcode -- MSIL opcode under consideration
+// stack -- abstract stack model at this point in the IL
+// isInlining -- true if we're inlining (vs compiling a prejit root)
+//
+// Notes:
+// Currently only invoked on compare and branch opcodes.
+//
+// If we're inlining we also look at the argument values supplied by
+// the caller at this call site.
+//
+// The crude stack model may overestimate stack depth.
+
+void Compiler::fgObserveInlineConstants(OPCODE opcode, const FgStack& stack, bool isInlining)
+{
+ // We should be able to record inline observations.
+ assert(compInlineResult != nullptr);
+
+ // The stack only has to be 1 deep for BRTRUE/FALSE
+ bool lookForBranchCases = stack.IsStackAtLeastOneDeep();
+
+ if (compInlineResult->UsesLegacyPolicy())
+ {
+ // LegacyPolicy misses cases where the stack is really one
+ // deep but the model says it's two deep. We need to do
+ // likewise to preseve old behavior.
+ lookForBranchCases &= !stack.IsStackTwoDeep();
+ }
+
+ if (lookForBranchCases)
+ {
+ if (opcode == CEE_BRFALSE || opcode == CEE_BRFALSE_S || opcode == CEE_BRTRUE || opcode == CEE_BRTRUE_S)
+ {
+ unsigned slot0 = stack.GetSlot0();
+ if (FgStack::IsArgument(slot0))
+ {
+ compInlineResult->Note(InlineObservation::CALLEE_ARG_FEEDS_CONSTANT_TEST);
+
+ if (isInlining)
+ {
+ // Check for the double whammy of an incoming constant argument
+ // feeding a constant test.
+ unsigned varNum = FgStack::SlotTypeToArgNum(slot0);
+ if (impInlineInfo->inlArgInfo[varNum].argNode->OperIsConst())
+ {
+ compInlineResult->Note(InlineObservation::CALLSITE_CONSTANT_ARG_FEEDS_TEST);
+ }
+ }
+ }
+
+ return;
+ }
+ }
+
+ // Remaining cases require at least two things on the stack.
+ if (!stack.IsStackTwoDeep())
+ {
+ return;
+ }
+
+ unsigned slot0 = stack.GetSlot0();
+ unsigned slot1 = stack.GetSlot1();
+
+ // Arg feeds constant test
+ if ((FgStack::IsConstant(slot0) && FgStack::IsArgument(slot1)) ||
+ (FgStack::IsConstant(slot1) && FgStack::IsArgument(slot0)))
+ {
+ compInlineResult->Note(InlineObservation::CALLEE_ARG_FEEDS_CONSTANT_TEST);
+ }
+
+ // Arg feeds range check
+ if ((FgStack::IsArrayLen(slot0) && FgStack::IsArgument(slot1)) ||
+ (FgStack::IsArrayLen(slot1) && FgStack::IsArgument(slot0)))
+ {
+ compInlineResult->Note(InlineObservation::CALLEE_ARG_FEEDS_RANGE_CHECK);
+ }
+
+ // Check for an incoming arg that's a constant
+ if (isInlining)
+ {
+ if (FgStack::IsArgument(slot0))
+ {
+ unsigned varNum = FgStack::SlotTypeToArgNum(slot0);
+ if (impInlineInfo->inlArgInfo[varNum].argNode->OperIsConst())
+ {
+ compInlineResult->Note(InlineObservation::CALLSITE_CONSTANT_ARG_FEEDS_TEST);
+ }
+ }
+
+ if (FgStack::IsArgument(slot1))
+ {
+ unsigned varNum = FgStack::SlotTypeToArgNum(slot1);
+ if (impInlineInfo->inlArgInfo[varNum].argNode->OperIsConst())
+ {
+ compInlineResult->Note(InlineObservation::CALLSITE_CONSTANT_ARG_FEEDS_TEST);
+ }
+ }
+ }
+}
+
+/*****************************************************************************
+ *
+ * Finally link up the bbJumpDest of the blocks together
+ */
+
+void Compiler::fgMarkBackwardJump(BasicBlock* startBlock, BasicBlock* endBlock)
+{
+ noway_assert(startBlock->bbNum <= endBlock->bbNum);
+
+ for (BasicBlock* block = startBlock; block != endBlock->bbNext; block = block->bbNext)
+ {
+ if ((block->bbFlags & BBF_BACKWARD_JUMP) == 0)
+ {
+ block->bbFlags |= BBF_BACKWARD_JUMP;
+ }
+ }
+}
+
+/*****************************************************************************
+ *
+ * Finally link up the bbJumpDest of the blocks together
+ */
+
+void Compiler::fgLinkBasicBlocks()
+{
+ /* Create the basic block lookup tables */
+
+ fgInitBBLookup();
+
+ /* First block is always reachable */
+
+ fgFirstBB->bbRefs = 1;
+
+ /* Walk all the basic blocks, filling in the target addresses */
+
+ for (BasicBlock* curBBdesc = fgFirstBB; curBBdesc; curBBdesc = curBBdesc->bbNext)
+ {
+ switch (curBBdesc->bbJumpKind)
+ {
+ case BBJ_COND:
+ case BBJ_ALWAYS:
+ case BBJ_LEAVE:
+ curBBdesc->bbJumpDest = fgLookupBB(curBBdesc->bbJumpOffs);
+ curBBdesc->bbJumpDest->bbRefs++;
+ if (curBBdesc->bbJumpDest->bbNum <= curBBdesc->bbNum)
+ {
+ fgMarkBackwardJump(curBBdesc->bbJumpDest, curBBdesc);
+ }
+
+ /* Is the next block reachable? */
+
+ if (curBBdesc->bbJumpKind == BBJ_ALWAYS || curBBdesc->bbJumpKind == BBJ_LEAVE)
+ {
+ break;
+ }
+
+ if (!curBBdesc->bbNext)
+ {
+ BADCODE("Fall thru the end of a method");
+ }
+
+ // Fall through, the next block is also reachable
+
+ case BBJ_NONE:
+ curBBdesc->bbNext->bbRefs++;
+ break;
+
+ case BBJ_EHFINALLYRET:
+ case BBJ_EHFILTERRET:
+ case BBJ_THROW:
+ case BBJ_RETURN:
+ break;
+
+ case BBJ_SWITCH:
+
+ unsigned jumpCnt;
+ jumpCnt = curBBdesc->bbJumpSwt->bbsCount;
+ BasicBlock** jumpPtr;
+ jumpPtr = curBBdesc->bbJumpSwt->bbsDstTab;
+
+ do
+ {
+ *jumpPtr = fgLookupBB((unsigned)*(size_t*)jumpPtr);
+ (*jumpPtr)->bbRefs++;
+ if ((*jumpPtr)->bbNum <= curBBdesc->bbNum)
+ {
+ fgMarkBackwardJump(*jumpPtr, curBBdesc);
+ }
+ } while (++jumpPtr, --jumpCnt);
+
+ /* Default case of CEE_SWITCH (next block), is at end of jumpTab[] */
+
+ noway_assert(*(jumpPtr - 1) == curBBdesc->bbNext);
+ break;
+
+ case BBJ_CALLFINALLY: // BBJ_CALLFINALLY and BBJ_EHCATCHRET don't appear until later
+ case BBJ_EHCATCHRET:
+ default:
+ noway_assert(!"Unexpected bbJumpKind");
+ break;
+ }
+ }
+}
+
+/*****************************************************************************
+ *
+ * Walk the instrs to create the basic blocks.
+ */
+
+void Compiler::fgMakeBasicBlocks(const BYTE* codeAddr, IL_OFFSET codeSize, BYTE* jumpTarget)
+{
+ const BYTE* codeBegp = codeAddr;
+ const BYTE* codeEndp = codeAddr + codeSize;
+ bool tailCall = false;
+ unsigned curBBoffs;
+ BasicBlock* curBBdesc;
+
+ /* Clear the beginning offset for the first BB */
+
+ curBBoffs = 0;
+
+#ifdef DEBUGGING_SUPPORT
+ if (opts.compDbgCode && (info.compVarScopesCount > 0))
+ {
+ compResetScopeLists();
+
+ // Ignore scopes beginning at offset 0
+ while (compGetNextEnterScope(0))
+ { /* do nothing */
+ }
+ while (compGetNextExitScope(0))
+ { /* do nothing */
+ }
+ }
+#endif
+
+ BBjumpKinds jmpKind;
+
+ do
+ {
+ OPCODE opcode;
+ unsigned sz;
+ unsigned jmpAddr = DUMMY_INIT(BAD_IL_OFFSET);
+ unsigned bbFlags = 0;
+ BBswtDesc* swtDsc = nullptr;
+ unsigned nxtBBoffs;
+
+ opcode = (OPCODE)getU1LittleEndian(codeAddr);
+ codeAddr += sizeof(__int8);
+ jmpKind = BBJ_NONE;
+
+ DECODE_OPCODE:
+
+ /* Get the size of additional parameters */
+
+ noway_assert(opcode < CEE_COUNT);
+
+ sz = opcodeSizes[opcode];
+
+ switch (opcode)
+ {
+ signed jmpDist;
+
+ case CEE_PREFIX1:
+ if (jumpTarget[codeAddr - codeBegp] != JT_NONE)
+ {
+ BADCODE3("jump target between prefix 0xFE and opcode", " at offset %04X",
+ (IL_OFFSET)(codeAddr - codeBegp));
+ }
+
+ opcode = (OPCODE)(256 + getU1LittleEndian(codeAddr));
+ codeAddr += sizeof(__int8);
+ goto DECODE_OPCODE;
+
+ /* Check to see if we have a jump/return opcode */
+
+ case CEE_BRFALSE:
+ case CEE_BRFALSE_S:
+ case CEE_BRTRUE:
+ case CEE_BRTRUE_S:
+
+ case CEE_BEQ:
+ case CEE_BEQ_S:
+ case CEE_BGE:
+ case CEE_BGE_S:
+ case CEE_BGE_UN:
+ case CEE_BGE_UN_S:
+ case CEE_BGT:
+ case CEE_BGT_S:
+ case CEE_BGT_UN:
+ case CEE_BGT_UN_S:
+ case CEE_BLE:
+ case CEE_BLE_S:
+ case CEE_BLE_UN:
+ case CEE_BLE_UN_S:
+ case CEE_BLT:
+ case CEE_BLT_S:
+ case CEE_BLT_UN:
+ case CEE_BLT_UN_S:
+ case CEE_BNE_UN:
+ case CEE_BNE_UN_S:
+
+ jmpKind = BBJ_COND;
+ goto JMP;
+
+ case CEE_LEAVE:
+ case CEE_LEAVE_S:
+
+ // We need to check if we are jumping out of a finally-protected try.
+ jmpKind = BBJ_LEAVE;
+ goto JMP;
+
+ case CEE_BR:
+ case CEE_BR_S:
+ jmpKind = BBJ_ALWAYS;
+ goto JMP;
+
+ JMP:
+
+ /* Compute the target address of the jump */
+
+ jmpDist = (sz == 1) ? getI1LittleEndian(codeAddr) : getI4LittleEndian(codeAddr);
+
+ if (compIsForInlining() && jmpDist == 0 && (opcode == CEE_BR || opcode == CEE_BR_S))
+ {
+ continue; /* NOP */
+ }
+
+ jmpAddr = (IL_OFFSET)(codeAddr - codeBegp) + sz + jmpDist;
+ break;
+
+ case CEE_SWITCH:
+ {
+ unsigned jmpBase;
+ unsigned jmpCnt; // # of switch cases (excluding defualt)
+
+ BasicBlock** jmpTab;
+ BasicBlock** jmpPtr;
+
+ /* Allocate the switch descriptor */
+
+ swtDsc = new (this, CMK_BasicBlock) BBswtDesc;
+
+ /* Read the number of entries in the table */
+
+ jmpCnt = getU4LittleEndian(codeAddr);
+ codeAddr += 4;
+
+ /* Compute the base offset for the opcode */
+
+ jmpBase = (IL_OFFSET)((codeAddr - codeBegp) + jmpCnt * sizeof(DWORD));
+
+ /* Allocate the jump table */
+
+ jmpPtr = jmpTab = new (this, CMK_BasicBlock) BasicBlock*[jmpCnt + 1];
+
+ /* Fill in the jump table */
+
+ for (unsigned count = jmpCnt; count; count--)
+ {
+ jmpDist = getI4LittleEndian(codeAddr);
+ codeAddr += 4;
+
+ // store the offset in the pointer. We change these in fgLinkBasicBlocks().
+ *jmpPtr++ = (BasicBlock*)(size_t)(jmpBase + jmpDist);
+ }
+
+ /* Append the default label to the target table */
+
+ *jmpPtr++ = (BasicBlock*)(size_t)jmpBase;
+
+ /* Make sure we found the right number of labels */
+
+ noway_assert(jmpPtr == jmpTab + jmpCnt + 1);
+
+ /* Compute the size of the switch opcode operands */
+
+ sz = sizeof(DWORD) + jmpCnt * sizeof(DWORD);
+
+ /* Fill in the remaining fields of the switch descriptor */
+
+ swtDsc->bbsCount = jmpCnt + 1;
+ swtDsc->bbsDstTab = jmpTab;
+
+ /* This is definitely a jump */
+
+ jmpKind = BBJ_SWITCH;
+ fgHasSwitch = true;
+
+#ifndef LEGACY_BACKEND
+ if (opts.compProcedureSplitting)
+ {
+ // TODO-CQ: We might need to create a switch table; we won't know for sure until much later.
+ // However, switch tables don't work with hot/cold splitting, currently. The switch table data needs
+ // a relocation such that if the base (the first block after the prolog) and target of the switch
+ // branch are put in different sections, the difference stored in the table is updated. However, our
+ // relocation implementation doesn't support three different pointers (relocation address, base, and
+ // target). So, we need to change our switch table implementation to be more like
+ // JIT64: put the table in the code section, in the same hot/cold section as the switch jump itself
+ // (maybe immediately after the switch jump), and make the "base" address be also in that section,
+ // probably the address after the switch jump.
+ opts.compProcedureSplitting = false;
+ JITDUMP("Turning off procedure splitting for this method, as it might need switch tables; "
+ "implementation limitation.\n");
+ }
+#endif // !LEGACY_BACKEND
+ }
+ goto GOT_ENDP;
+
+ case CEE_ENDFILTER:
+ bbFlags |= BBF_DONT_REMOVE;
+ jmpKind = BBJ_EHFILTERRET;
+ break;
+
+ case CEE_ENDFINALLY:
+ jmpKind = BBJ_EHFINALLYRET;
+ break;
+
+ case CEE_TAILCALL:
+ if (compIsForInlining())
+ {
+ // TODO-CQ: We can inline some callees with explicit tail calls if we can guarantee that the calls
+ // can be dispatched as tail calls from the caller.
+ compInlineResult->NoteFatal(InlineObservation::CALLEE_EXPLICIT_TAIL_PREFIX);
+ return;
+ }
+
+ __fallthrough;
+
+ case CEE_READONLY:
+ case CEE_CONSTRAINED:
+ case CEE_VOLATILE:
+ case CEE_UNALIGNED:
+ // fgFindJumpTargets should have ruled out this possibility
+ // (i.e. a prefix opcodes as last intruction in a block)
+ noway_assert(codeAddr < codeEndp);
+
+ if (jumpTarget[codeAddr - codeBegp] != JT_NONE)
+ {
+ BADCODE3("jump target between prefix and an opcode", " at offset %04X",
+ (IL_OFFSET)(codeAddr - codeBegp));
+ }
+ break;
+
+ case CEE_CALL:
+ case CEE_CALLVIRT:
+ case CEE_CALLI:
+ {
+ if (compIsForInlining() || // Ignore tail call in the inlinee. Period.
+ (!tailCall && !compTailCallStress()) // A new BB with BBJ_RETURN would have been created
+
+ // after a tailcall statement.
+ // We need to keep this invariant if we want to stress the tailcall.
+ // That way, the potential (tail)call statement is always the last
+ // statement in the block.
+ // Otherwise, we will assert at the following line in fgMorphCall()
+ // noway_assert(fgMorphStmt->gtNext == NULL);
+ )
+ {
+ // Neither .tailcall prefix, no tailcall stress. So move on.
+ break;
+ }
+
+ // Make sure the code sequence is legal for the tail call.
+ // If so, mark this BB as having a BBJ_RETURN.
+
+ if (codeAddr >= codeEndp - sz)
+ {
+ BADCODE3("No code found after the call instruction", " at offset %04X",
+ (IL_OFFSET)(codeAddr - codeBegp));
+ }
+
+ if (tailCall)
+ {
+ bool isCallPopAndRet = false;
+
+ // impIsTailCallILPattern uses isRecursive flag to determine whether ret in a fallthrough block is
+ // allowed. We don't know at this point whether the call is recursive so we conservatively pass
+ // false. This will only affect explicit tail calls when IL verification is not needed for the
+ // method.
+ bool isRecursive = false;
+ if (!impIsTailCallILPattern(tailCall, opcode, codeAddr + sz, codeEndp, isRecursive,
+ &isCallPopAndRet))
+ {
+#ifdef _TARGET_AMD64_
+ BADCODE3("tail call not followed by ret or pop+ret", " at offset %04X",
+ (IL_OFFSET)(codeAddr - codeBegp));
+#else
+ BADCODE3("tail call not followed by ret", " at offset %04X", (IL_OFFSET)(codeAddr - codeBegp));
+#endif //_TARGET_AMD64_
+ }
+
+#ifdef _TARGET_AMD64_
+ if (isCallPopAndRet)
+ {
+ // By breaking here, we let pop and ret opcodes to be
+ // imported after tail call. If tail prefix is honored,
+ // stmts corresponding to pop and ret will be removed
+ // in fgMorphCall().
+ break;
+ }
+#endif //_TARGET_AMD64_
+ }
+ else
+ {
+ OPCODE nextOpcode = (OPCODE)getU1LittleEndian(codeAddr + sz);
+
+ if (nextOpcode != CEE_RET)
+ {
+ noway_assert(compTailCallStress());
+ // Next OPCODE is not a CEE_RET, bail the attempt to stress the tailcall.
+ // (I.e. We will not make a new BB after the "call" statement.)
+ break;
+ }
+ }
+ }
+
+ /* For tail call, we just call CORINFO_HELP_TAILCALL, and it jumps to the
+ target. So we don't need an epilog - just like CORINFO_HELP_THROW.
+ Make the block BBJ_RETURN, but we will change it to BBJ_THROW
+ if the tailness of the call is satisfied.
+ NOTE : The next instruction is guaranteed to be a CEE_RET
+ and it will create another BasicBlock. But there may be an
+ jump directly to that CEE_RET. If we want to avoid creating
+ an unnecessary block, we need to check if the CEE_RETURN is
+ the target of a jump.
+ */
+
+ // fall-through
+
+ case CEE_JMP:
+ /* These are equivalent to a return from the current method
+ But instead of directly returning to the caller we jump and
+ execute something else in between */
+ case CEE_RET:
+ jmpKind = BBJ_RETURN;
+ break;
+
+ case CEE_THROW:
+ case CEE_RETHROW:
+ jmpKind = BBJ_THROW;
+ break;
+
+#ifdef DEBUG
+// make certain we did not forget any flow of control instructions
+// by checking the 'ctrl' field in opcode.def. First filter out all
+// non-ctrl instructions
+#define BREAK(name) \
+ case name: \
+ break;
+#define NEXT(name) \
+ case name: \
+ break;
+#define CALL(name)
+#define THROW(name)
+#undef RETURN // undef contract RETURN macro
+#define RETURN(name)
+#define META(name)
+#define BRANCH(name)
+#define COND_BRANCH(name)
+#define PHI(name)
+
+#define OPDEF(name, string, pop, push, oprType, opcType, l, s1, s2, ctrl) ctrl(name)
+#include "opcode.def"
+#undef OPDEF
+
+#undef PHI
+#undef BREAK
+#undef CALL
+#undef NEXT
+#undef THROW
+#undef RETURN
+#undef META
+#undef BRANCH
+#undef COND_BRANCH
+
+ // These ctrl-flow opcodes don't need any special handling
+ case CEE_NEWOBJ: // CTRL_CALL
+ break;
+
+ // what's left are forgotten instructions
+ default:
+ BADCODE("Unrecognized control Opcode");
+ break;
+#else // !DEBUG
+ default:
+ break;
+#endif // !DEBUG
+ }
+
+ /* Jump over the operand */
+
+ codeAddr += sz;
+
+ GOT_ENDP:
+
+ tailCall = (opcode == CEE_TAILCALL);
+
+ /* Make sure a jump target isn't in the middle of our opcode */
+
+ if (sz)
+ {
+ IL_OFFSET offs = (IL_OFFSET)(codeAddr - codeBegp) - sz; // offset of the operand
+
+ for (unsigned i = 0; i < sz; i++, offs++)
+ {
+ if (jumpTarget[offs] != JT_NONE)
+ {
+ BADCODE3("jump into the middle of an opcode", " at offset %04X", (IL_OFFSET)(codeAddr - codeBegp));
+ }
+ }
+ }
+
+ /* Compute the offset of the next opcode */
+
+ nxtBBoffs = (IL_OFFSET)(codeAddr - codeBegp);
+
+#ifdef DEBUGGING_SUPPORT
+
+ bool foundScope = false;
+
+ if (opts.compDbgCode && (info.compVarScopesCount > 0))
+ {
+ while (compGetNextEnterScope(nxtBBoffs))
+ {
+ foundScope = true;
+ }
+ while (compGetNextExitScope(nxtBBoffs))
+ {
+ foundScope = true;
+ }
+ }
+#endif
+
+ /* Do we have a jump? */
+
+ if (jmpKind == BBJ_NONE)
+ {
+ /* No jump; make sure we don't fall off the end of the function */
+
+ if (codeAddr == codeEndp)
+ {
+ BADCODE3("missing return opcode", " at offset %04X", (IL_OFFSET)(codeAddr - codeBegp));
+ }
+
+ /* If a label follows this opcode, we'll have to make a new BB */
+
+ bool makeBlock = (jumpTarget[nxtBBoffs] != JT_NONE);
+
+#ifdef DEBUGGING_SUPPORT
+ if (!makeBlock && foundScope)
+ {
+ makeBlock = true;
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Splitting at BBoffs = %04u\n", nxtBBoffs);
+ }
+#endif // DEBUG
+ }
+#endif // DEBUGGING_SUPPORT
+
+ if (!makeBlock)
+ {
+ continue;
+ }
+ }
+
+ /* We need to create a new basic block */
+
+ curBBdesc = fgNewBasicBlock(jmpKind);
+
+ curBBdesc->bbFlags |= bbFlags;
+ curBBdesc->bbRefs = 0;
+
+ curBBdesc->bbCodeOffs = curBBoffs;
+ curBBdesc->bbCodeOffsEnd = nxtBBoffs;
+
+ unsigned profileWeight;
+ if (fgGetProfileWeightForBasicBlock(curBBoffs, &profileWeight))
+ {
+ curBBdesc->setBBProfileWeight(profileWeight);
+ if (profileWeight == 0)
+ {
+ curBBdesc->bbSetRunRarely();
+ }
+ else
+ {
+ // Note that bbNewBasicBlock (called from fgNewBasicBlock) may have
+ // already marked the block as rarely run. In that case (and when we know
+ // that the block profile weight is non-zero) we want to unmark that.
+
+ curBBdesc->bbFlags &= ~BBF_RUN_RARELY;
+ }
+ }
+
+ switch (jmpKind)
+ {
+ case BBJ_SWITCH:
+ curBBdesc->bbJumpSwt = swtDsc;
+ break;
+
+ case BBJ_COND:
+ case BBJ_ALWAYS:
+ case BBJ_LEAVE:
+ noway_assert(jmpAddr != DUMMY_INIT(BAD_IL_OFFSET));
+ curBBdesc->bbJumpOffs = jmpAddr;
+ break;
+
+ default:
+ break;
+ }
+
+ DBEXEC(verbose, curBBdesc->dspBlockHeader(this, false, false, false));
+
+ /* Remember where the next BB will start */
+
+ curBBoffs = nxtBBoffs;
+ } while (codeAddr < codeEndp);
+
+ noway_assert(codeAddr == codeEndp);
+
+ /* Finally link up the bbJumpDest of the blocks together */
+
+ fgLinkBasicBlocks();
+}
+
+/*****************************************************************************
+ *
+ * Main entry point to discover the basic blocks for the current function.
+ */
+
+void Compiler::fgFindBasicBlocks()
+{
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("*************** In fgFindBasicBlocks() for %s\n", info.compFullName);
+ }
+#endif
+
+ /* Allocate the 'jump target' vector
+ *
+ * We need one extra byte as we mark
+ * jumpTarget[info.compILCodeSize] with JT_ADDR
+ * when we need to add a dummy block
+ * to record the end of a try or handler region.
+ */
+ BYTE* jumpTarget = new (this, CMK_Unknown) BYTE[info.compILCodeSize + 1];
+ memset(jumpTarget, JT_NONE, info.compILCodeSize + 1);
+ noway_assert(JT_NONE == 0);
+
+ /* Walk the instrs to find all jump targets */
+
+ fgFindJumpTargets(info.compCode, info.compILCodeSize, jumpTarget);
+ if (compDonotInline())
+ {
+ return;
+ }
+
+ unsigned XTnum;
+
+ /* Are there any exception handlers? */
+
+ if (info.compXcptnsCount > 0)
+ {
+ noway_assert(!compIsForInlining());
+
+ /* Check and mark all the exception handlers */
+
+ for (XTnum = 0; XTnum < info.compXcptnsCount; XTnum++)
+ {
+ DWORD tmpOffset;
+ CORINFO_EH_CLAUSE clause;
+ info.compCompHnd->getEHinfo(info.compMethodHnd, XTnum, &clause);
+ noway_assert(clause.HandlerLength != (unsigned)-1);
+
+ if (clause.TryLength <= 0)
+ {
+ BADCODE("try block length <=0");
+ }
+
+ /* Mark the 'try' block extent and the handler itself */
+
+ if (clause.TryOffset > info.compILCodeSize)
+ {
+ BADCODE("try offset is > codesize");
+ }
+ if (jumpTarget[clause.TryOffset] == JT_NONE)
+ {
+ jumpTarget[clause.TryOffset] = JT_ADDR;
+ }
+
+ tmpOffset = clause.TryOffset + clause.TryLength;
+ if (tmpOffset > info.compILCodeSize)
+ {
+ BADCODE("try end is > codesize");
+ }
+ if (jumpTarget[tmpOffset] == JT_NONE)
+ {
+ jumpTarget[tmpOffset] = JT_ADDR;
+ }
+
+ if (clause.HandlerOffset > info.compILCodeSize)
+ {
+ BADCODE("handler offset > codesize");
+ }
+ if (jumpTarget[clause.HandlerOffset] == JT_NONE)
+ {
+ jumpTarget[clause.HandlerOffset] = JT_ADDR;
+ }
+
+ tmpOffset = clause.HandlerOffset + clause.HandlerLength;
+ if (tmpOffset > info.compILCodeSize)
+ {
+ BADCODE("handler end > codesize");
+ }
+ if (jumpTarget[tmpOffset] == JT_NONE)
+ {
+ jumpTarget[tmpOffset] = JT_ADDR;
+ }
+
+ if (clause.Flags & CORINFO_EH_CLAUSE_FILTER)
+ {
+ if (clause.FilterOffset > info.compILCodeSize)
+ {
+ BADCODE("filter offset > codesize");
+ }
+ if (jumpTarget[clause.FilterOffset] == JT_NONE)
+ {
+ jumpTarget[clause.FilterOffset] = JT_ADDR;
+ }
+ }
+ }
+ }
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ bool anyJumpTargets = false;
+ printf("Jump targets:\n");
+ for (unsigned i = 0; i < info.compILCodeSize + 1; i++)
+ {
+ if (jumpTarget[i] == JT_NONE)
+ {
+ continue;
+ }
+
+ anyJumpTargets = true;
+ printf(" IL_%04x", i);
+
+ if (jumpTarget[i] & JT_ADDR)
+ {
+ printf(" addr");
+ }
+ if (jumpTarget[i] & JT_MULTI)
+ {
+ printf(" multi");
+ }
+ printf("\n");
+ }
+ if (!anyJumpTargets)
+ {
+ printf(" none\n");
+ }
+ }
+#endif // DEBUG
+
+ /* Now create the basic blocks */
+
+ fgMakeBasicBlocks(info.compCode, info.compILCodeSize, jumpTarget);
+
+ if (compIsForInlining())
+ {
+ if (compInlineResult->IsFailure())
+ {
+ return;
+ }
+
+ bool hasReturnBlocks = false;
+ bool hasMoreThanOneReturnBlock = false;
+
+ for (BasicBlock* block = fgFirstBB; block != nullptr; block = block->bbNext)
+ {
+ if (block->bbJumpKind == BBJ_RETURN)
+ {
+ if (hasReturnBlocks)
+ {
+ hasMoreThanOneReturnBlock = true;
+ break;
+ }
+
+ hasReturnBlocks = true;
+ }
+ }
+
+ if (!hasReturnBlocks && !compInlineResult->UsesLegacyPolicy())
+ {
+ //
+ // Mark the call node as "no return". The inliner might ignore CALLEE_DOES_NOT_RETURN and
+ // fail inline for a different reasons. In that case we still want to make the "no return"
+ // information available to the caller as it can impact caller's code quality.
+ //
+
+ impInlineInfo->iciCall->gtCallMoreFlags |= GTF_CALL_M_DOES_NOT_RETURN;
+ }
+
+ compInlineResult->NoteBool(InlineObservation::CALLEE_DOES_NOT_RETURN, !hasReturnBlocks);
+
+ if (compInlineResult->IsFailure())
+ {
+ return;
+ }
+
+ noway_assert(info.compXcptnsCount == 0);
+ compHndBBtab = impInlineInfo->InlinerCompiler->compHndBBtab;
+ compHndBBtabAllocCount =
+ impInlineInfo->InlinerCompiler->compHndBBtabAllocCount; // we probably only use the table, not add to it.
+ compHndBBtabCount = impInlineInfo->InlinerCompiler->compHndBBtabCount;
+ info.compXcptnsCount = impInlineInfo->InlinerCompiler->info.compXcptnsCount;
+
+ if (info.compRetNativeType != TYP_VOID && hasMoreThanOneReturnBlock)
+ {
+ // The lifetime of this var might expand multiple BBs. So it is a long lifetime compiler temp.
+ lvaInlineeReturnSpillTemp = lvaGrabTemp(false DEBUGARG("Inline candidate multiple BBJ_RETURN spill temp"));
+ lvaTable[lvaInlineeReturnSpillTemp].lvType = info.compRetNativeType;
+ }
+ return;
+ }
+
+ /* Mark all blocks within 'try' blocks as such */
+
+ if (info.compXcptnsCount == 0)
+ {
+ return;
+ }
+
+ if (info.compXcptnsCount > MAX_XCPTN_INDEX)
+ {
+ IMPL_LIMITATION("too many exception clauses");
+ }
+
+ /* Allocate the exception handler table */
+
+ fgAllocEHTable();
+
+ /* Assume we don't need to sort the EH table (such that nested try/catch
+ * appear before their try or handler parent). The EH verifier will notice
+ * when we do need to sort it.
+ */
+
+ fgNeedToSortEHTable = false;
+
+ verInitEHTree(info.compXcptnsCount);
+ EHNodeDsc* initRoot = ehnNext; // remember the original root since
+ // it may get modified during insertion
+
+ // Annotate BBs with exception handling information required for generating correct eh code
+ // as well as checking for correct IL
+
+ EHblkDsc* HBtab;
+
+ for (XTnum = 0, HBtab = compHndBBtab; XTnum < compHndBBtabCount; XTnum++, HBtab++)
+ {
+ CORINFO_EH_CLAUSE clause;
+ info.compCompHnd->getEHinfo(info.compMethodHnd, XTnum, &clause);
+ noway_assert(clause.HandlerLength != (unsigned)-1); // @DEPRECATED
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ dispIncomingEHClause(XTnum, clause);
+ }
+#endif // DEBUG
+
+ IL_OFFSET tryBegOff = clause.TryOffset;
+ IL_OFFSET tryEndOff = tryBegOff + clause.TryLength;
+ IL_OFFSET filterBegOff = 0;
+ IL_OFFSET hndBegOff = clause.HandlerOffset;
+ IL_OFFSET hndEndOff = hndBegOff + clause.HandlerLength;
+
+ if (clause.Flags & CORINFO_EH_CLAUSE_FILTER)
+ {
+ filterBegOff = clause.FilterOffset;
+ }
+
+ if (tryEndOff > info.compILCodeSize)
+ {
+ BADCODE3("end of try block beyond end of method for try", " at offset %04X", tryBegOff);
+ }
+ if (hndEndOff > info.compILCodeSize)
+ {
+ BADCODE3("end of hnd block beyond end of method for try", " at offset %04X", tryBegOff);
+ }
+
+ HBtab->ebdTryBegOffset = tryBegOff;
+ HBtab->ebdTryEndOffset = tryEndOff;
+ HBtab->ebdFilterBegOffset = filterBegOff;
+ HBtab->ebdHndBegOffset = hndBegOff;
+ HBtab->ebdHndEndOffset = hndEndOff;
+
+ /* Convert the various addresses to basic blocks */
+
+ BasicBlock* tryBegBB = fgLookupBB(tryBegOff);
+ BasicBlock* tryEndBB =
+ fgLookupBB(tryEndOff); // note: this can be NULL if the try region is at the end of the function
+ BasicBlock* hndBegBB = fgLookupBB(hndBegOff);
+ BasicBlock* hndEndBB = nullptr;
+ BasicBlock* filtBB = nullptr;
+ BasicBlock* block;
+
+ //
+ // Assert that the try/hnd beginning blocks are set up correctly
+ //
+ if (tryBegBB == nullptr)
+ {
+ BADCODE("Try Clause is invalid");
+ }
+
+ if (hndBegBB == nullptr)
+ {
+ BADCODE("Handler Clause is invalid");
+ }
+
+ tryBegBB->bbFlags |= BBF_HAS_LABEL;
+ hndBegBB->bbFlags |= BBF_HAS_LABEL | BBF_JMP_TARGET;
+
+#if HANDLER_ENTRY_MUST_BE_IN_HOT_SECTION
+ // This will change the block weight from 0 to 1
+ // and clear the rarely run flag
+ hndBegBB->makeBlockHot();
+#else
+ hndBegBB->bbSetRunRarely(); // handler entry points are rarely executed
+#endif
+
+ if (hndEndOff < info.compILCodeSize)
+ {
+ hndEndBB = fgLookupBB(hndEndOff);
+ }
+
+ if (clause.Flags & CORINFO_EH_CLAUSE_FILTER)
+ {
+ filtBB = HBtab->ebdFilter = fgLookupBB(clause.FilterOffset);
+
+ filtBB->bbCatchTyp = BBCT_FILTER;
+ filtBB->bbFlags |= BBF_HAS_LABEL | BBF_JMP_TARGET;
+
+ hndBegBB->bbCatchTyp = BBCT_FILTER_HANDLER;
+
+#if HANDLER_ENTRY_MUST_BE_IN_HOT_SECTION
+ // This will change the block weight from 0 to 1
+ // and clear the rarely run flag
+ filtBB->makeBlockHot();
+#else
+ filtBB->bbSetRunRarely(); // filter entry points are rarely executed
+#endif
+
+ // Mark all BBs that belong to the filter with the XTnum of the corresponding handler
+ for (block = filtBB; /**/; block = block->bbNext)
+ {
+ if (block == nullptr)
+ {
+ BADCODE3("Missing endfilter for filter", " at offset %04X", filtBB->bbCodeOffs);
+ return;
+ }
+
+ // Still inside the filter
+ block->setHndIndex(XTnum);
+
+ if (block->bbJumpKind == BBJ_EHFILTERRET)
+ {
+ // Mark catch handler as successor.
+ block->bbJumpDest = hndBegBB;
+ assert(block->bbJumpDest->bbCatchTyp == BBCT_FILTER_HANDLER);
+ break;
+ }
+ }
+
+ if (!block->bbNext || block->bbNext != hndBegBB)
+ {
+ BADCODE3("Filter does not immediately precede handler for filter", " at offset %04X",
+ filtBB->bbCodeOffs);
+ }
+ }
+ else
+ {
+ HBtab->ebdTyp = clause.ClassToken;
+
+ /* Set bbCatchTyp as appropriate */
+
+ if (clause.Flags & CORINFO_EH_CLAUSE_FINALLY)
+ {
+ hndBegBB->bbCatchTyp = BBCT_FINALLY;
+ }
+ else
+ {
+ if (clause.Flags & CORINFO_EH_CLAUSE_FAULT)
+ {
+ hndBegBB->bbCatchTyp = BBCT_FAULT;
+ }
+ else
+ {
+ hndBegBB->bbCatchTyp = clause.ClassToken;
+
+ // These values should be non-zero value that will
+ // not collide with real tokens for bbCatchTyp
+ if (clause.ClassToken == 0)
+ {
+ BADCODE("Exception catch type is Null");
+ }
+
+ noway_assert(clause.ClassToken != BBCT_FAULT);
+ noway_assert(clause.ClassToken != BBCT_FINALLY);
+ noway_assert(clause.ClassToken != BBCT_FILTER);
+ noway_assert(clause.ClassToken != BBCT_FILTER_HANDLER);
+ }
+ }
+ }
+
+ /* Mark the initial block and last blocks in the 'try' region */
+
+ tryBegBB->bbFlags |= BBF_TRY_BEG | BBF_HAS_LABEL;
+
+ /* Prevent future optimizations of removing the first block */
+ /* of a TRY block and the first block of an exception handler */
+
+ tryBegBB->bbFlags |= BBF_DONT_REMOVE;
+ hndBegBB->bbFlags |= BBF_DONT_REMOVE;
+ hndBegBB->bbRefs++; // The first block of a handler gets an extra, "artificial" reference count.
+
+ if (clause.Flags & CORINFO_EH_CLAUSE_FILTER)
+ {
+ filtBB->bbFlags |= BBF_DONT_REMOVE;
+ filtBB->bbRefs++; // The first block of a filter gets an extra, "artificial" reference count.
+ }
+
+ tryBegBB->bbFlags |= BBF_DONT_REMOVE;
+ hndBegBB->bbFlags |= BBF_DONT_REMOVE;
+
+ //
+ // Store the info to the table of EH block handlers
+ //
+
+ HBtab->ebdHandlerType = ToEHHandlerType(clause.Flags);
+
+ HBtab->ebdTryBeg = tryBegBB;
+ HBtab->ebdTryLast = (tryEndBB == nullptr) ? fgLastBB : tryEndBB->bbPrev;
+
+ HBtab->ebdHndBeg = hndBegBB;
+ HBtab->ebdHndLast = (hndEndBB == nullptr) ? fgLastBB : hndEndBB->bbPrev;
+
+ //
+ // Assert that all of our try/hnd blocks are setup correctly.
+ //
+ if (HBtab->ebdTryLast == nullptr)
+ {
+ BADCODE("Try Clause is invalid");
+ }
+
+ if (HBtab->ebdHndLast == nullptr)
+ {
+ BADCODE("Handler Clause is invalid");
+ }
+
+ //
+ // Verify that it's legal
+ //
+
+ verInsertEhNode(&clause, HBtab);
+
+ } // end foreach handler table entry
+
+ fgSortEHTable();
+
+ // Next, set things related to nesting that depend on the sorting being complete.
+
+ for (XTnum = 0, HBtab = compHndBBtab; XTnum < compHndBBtabCount; XTnum++, HBtab++)
+ {
+ /* Mark all blocks in the finally/fault or catch clause */
+
+ BasicBlock* tryBegBB = HBtab->ebdTryBeg;
+ BasicBlock* hndBegBB = HBtab->ebdHndBeg;
+
+ IL_OFFSET tryBegOff = HBtab->ebdTryBegOffset;
+ IL_OFFSET tryEndOff = HBtab->ebdTryEndOffset;
+
+ IL_OFFSET hndBegOff = HBtab->ebdHndBegOffset;
+ IL_OFFSET hndEndOff = HBtab->ebdHndEndOffset;
+
+ BasicBlock* block;
+
+ for (block = hndBegBB; block && (block->bbCodeOffs < hndEndOff); block = block->bbNext)
+ {
+ if (!block->hasHndIndex())
+ {
+ block->setHndIndex(XTnum);
+ }
+
+ // All blocks in a catch handler or filter are rarely run, except the entry
+ if ((block != hndBegBB) && (hndBegBB->bbCatchTyp != BBCT_FINALLY))
+ {
+ block->bbSetRunRarely();
+ }
+ }
+
+ /* Mark all blocks within the covered range of the try */
+
+ for (block = tryBegBB; block && (block->bbCodeOffs < tryEndOff); block = block->bbNext)
+ {
+ /* Mark this BB as belonging to a 'try' block */
+
+ if (!block->hasTryIndex())
+ {
+ block->setTryIndex(XTnum);
+ }
+
+#ifdef DEBUG
+ /* Note: the BB can't span the 'try' block */
+
+ if (!(block->bbFlags & BBF_INTERNAL))
+ {
+ noway_assert(tryBegOff <= block->bbCodeOffs);
+ noway_assert(tryEndOff >= block->bbCodeOffsEnd || tryEndOff == tryBegOff);
+ }
+#endif
+ }
+
+/* Init ebdHandlerNestingLevel of current clause, and bump up value for all
+ * enclosed clauses (which have to be before it in the table).
+ * Innermost try-finally blocks must precede outermost
+ * try-finally blocks.
+ */
+
+#if !FEATURE_EH_FUNCLETS
+ HBtab->ebdHandlerNestingLevel = 0;
+#endif // !FEATURE_EH_FUNCLETS
+
+ HBtab->ebdEnclosingTryIndex = EHblkDsc::NO_ENCLOSING_INDEX;
+ HBtab->ebdEnclosingHndIndex = EHblkDsc::NO_ENCLOSING_INDEX;
+
+ noway_assert(XTnum < compHndBBtabCount);
+ noway_assert(XTnum == ehGetIndex(HBtab));
+
+ for (EHblkDsc* xtab = compHndBBtab; xtab < HBtab; xtab++)
+ {
+#if !FEATURE_EH_FUNCLETS
+ if (jitIsBetween(xtab->ebdHndBegOffs(), hndBegOff, hndEndOff))
+ {
+ xtab->ebdHandlerNestingLevel++;
+ }
+#endif // !FEATURE_EH_FUNCLETS
+
+ /* If we haven't recorded an enclosing try index for xtab then see
+ * if this EH region should be recorded. We check if the
+ * first offset in the xtab lies within our region. If so,
+ * the last offset also must lie within the region, due to
+ * nesting rules. verInsertEhNode(), below, will check for proper nesting.
+ */
+ if (xtab->ebdEnclosingTryIndex == EHblkDsc::NO_ENCLOSING_INDEX)
+ {
+ bool begBetween = jitIsBetween(xtab->ebdTryBegOffs(), tryBegOff, tryEndOff);
+ if (begBetween)
+ {
+ // Record the enclosing scope link
+ xtab->ebdEnclosingTryIndex = (unsigned short)XTnum;
+ }
+ }
+
+ /* Do the same for the enclosing handler index.
+ */
+ if (xtab->ebdEnclosingHndIndex == EHblkDsc::NO_ENCLOSING_INDEX)
+ {
+ bool begBetween = jitIsBetween(xtab->ebdTryBegOffs(), hndBegOff, hndEndOff);
+ if (begBetween)
+ {
+ // Record the enclosing scope link
+ xtab->ebdEnclosingHndIndex = (unsigned short)XTnum;
+ }
+ }
+ }
+
+ } // end foreach handler table entry
+
+#if !FEATURE_EH_FUNCLETS
+
+ EHblkDsc* HBtabEnd;
+ for (HBtab = compHndBBtab, HBtabEnd = compHndBBtab + compHndBBtabCount; HBtab < HBtabEnd; HBtab++)
+ {
+ if (ehMaxHndNestingCount <= HBtab->ebdHandlerNestingLevel)
+ ehMaxHndNestingCount = HBtab->ebdHandlerNestingLevel + 1;
+ }
+
+#endif // !FEATURE_EH_FUNCLETS
+
+#ifndef DEBUG
+ if (tiVerificationNeeded)
+#endif
+ {
+ // always run these checks for a debug build
+ verCheckNestingLevel(initRoot);
+ }
+
+#ifndef DEBUG
+ // fgNormalizeEH assumes that this test has been passed. And Ssa assumes that fgNormalizeEHTable
+ // has been run. So do this unless we're in minOpts mode (and always in debug).
+ if (tiVerificationNeeded || !opts.MinOpts())
+#endif
+ {
+ fgCheckBasicBlockControlFlow();
+ }
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ JITDUMP("*************** After fgFindBasicBlocks() has created the EH table\n");
+ fgDispHandlerTab();
+ }
+
+ // We can't verify the handler table until all the IL legality checks have been done (above), since bad IL
+ // (such as illegal nesting of regions) will trigger asserts here.
+ fgVerifyHandlerTab();
+#endif
+
+ fgNormalizeEH();
+}
+
+/*****************************************************************************
+ * Check control flow constraints for well formed IL. Bail if any of the constraints
+ * are violated.
+ */
+
+void Compiler::fgCheckBasicBlockControlFlow()
+{
+ assert(!fgNormalizeEHDone); // These rules aren't quite correct after EH normalization has introduced new blocks
+
+ EHblkDsc* HBtab;
+
+ for (BasicBlock* blk = fgFirstBB; blk; blk = blk->bbNext)
+ {
+ if (blk->bbFlags & BBF_INTERNAL)
+ {
+ continue;
+ }
+
+ switch (blk->bbJumpKind)
+ {
+ case BBJ_NONE: // block flows into the next one (no jump)
+
+ fgControlFlowPermitted(blk, blk->bbNext);
+
+ break;
+
+ case BBJ_ALWAYS: // block does unconditional jump to target
+
+ fgControlFlowPermitted(blk, blk->bbJumpDest);
+
+ break;
+
+ case BBJ_COND: // block conditionally jumps to the target
+
+ fgControlFlowPermitted(blk, blk->bbNext);
+
+ fgControlFlowPermitted(blk, blk->bbJumpDest);
+
+ break;
+
+ case BBJ_RETURN: // block ends with 'ret'
+
+ if (blk->hasTryIndex() || blk->hasHndIndex())
+ {
+ BADCODE3("Return from a protected block", ". Before offset %04X", blk->bbCodeOffsEnd);
+ }
+ break;
+
+ case BBJ_EHFINALLYRET:
+ case BBJ_EHFILTERRET:
+
+ if (!blk->hasHndIndex()) // must be part of a handler
+ {
+ BADCODE3("Missing handler", ". Before offset %04X", blk->bbCodeOffsEnd);
+ }
+
+ HBtab = ehGetDsc(blk->getHndIndex());
+
+ // Endfilter allowed only in a filter block
+ if (blk->bbJumpKind == BBJ_EHFILTERRET)
+ {
+ if (!HBtab->HasFilter())
+ {
+ BADCODE("Unexpected endfilter");
+ }
+ }
+ // endfinally allowed only in a finally/fault block
+ else if (!HBtab->HasFinallyOrFaultHandler())
+ {
+ BADCODE("Unexpected endfinally");
+ }
+
+ // The handler block should be the innermost block
+ // Exception blocks are listed, innermost first.
+ if (blk->hasTryIndex() && (blk->getTryIndex() < blk->getHndIndex()))
+ {
+ BADCODE("endfinally / endfilter in nested try block");
+ }
+
+ break;
+
+ case BBJ_THROW: // block ends with 'throw'
+ /* throw is permitted from every BB, so nothing to check */
+ /* importer makes sure that rethrow is done from a catch */
+ break;
+
+ case BBJ_LEAVE: // block always jumps to the target, maybe out of guarded
+ // region. Used temporarily until importing
+ fgControlFlowPermitted(blk, blk->bbJumpDest, TRUE);
+
+ break;
+
+ case BBJ_SWITCH: // block ends with a switch statement
+
+ BBswtDesc* swtDesc;
+ swtDesc = blk->bbJumpSwt;
+
+ assert(swtDesc);
+
+ unsigned i;
+ for (i = 0; i < swtDesc->bbsCount; i++)
+ {
+ fgControlFlowPermitted(blk, swtDesc->bbsDstTab[i]);
+ }
+
+ break;
+
+ case BBJ_EHCATCHRET: // block ends with a leave out of a catch (only #if FEATURE_EH_FUNCLETS)
+ case BBJ_CALLFINALLY: // block always calls the target finally
+ default:
+ noway_assert(!"Unexpected bbJumpKind"); // these blocks don't get created until importing
+ break;
+ }
+ }
+}
+
+/****************************************************************************
+ * Check that the leave from the block is legal.
+ * Consider removing this check here if we can do it cheaply during importing
+ */
+
+void Compiler::fgControlFlowPermitted(BasicBlock* blkSrc, BasicBlock* blkDest, BOOL isLeave)
+{
+ assert(!fgNormalizeEHDone); // These rules aren't quite correct after EH normalization has introduced new blocks
+
+ unsigned srcHndBeg, destHndBeg;
+ unsigned srcHndEnd, destHndEnd;
+ bool srcInFilter, destInFilter;
+ bool srcInCatch = false;
+
+ EHblkDsc* srcHndTab;
+
+ srcHndTab = ehInitHndRange(blkSrc, &srcHndBeg, &srcHndEnd, &srcInFilter);
+ ehInitHndRange(blkDest, &destHndBeg, &destHndEnd, &destInFilter);
+
+ /* Impose the rules for leaving or jumping from handler blocks */
+
+ if (blkSrc->hasHndIndex())
+ {
+ srcInCatch = srcHndTab->HasCatchHandler() && srcHndTab->InHndRegionILRange(blkSrc);
+
+ /* Are we jumping within the same handler index? */
+ if (BasicBlock::sameHndRegion(blkSrc, blkDest))
+ {
+ /* Do we have a filter clause? */
+ if (srcHndTab->HasFilter())
+ {
+ /* filters and catch handlers share same eh index */
+ /* we need to check for control flow between them. */
+ if (srcInFilter != destInFilter)
+ {
+ if (!jitIsBetween(blkDest->bbCodeOffs, srcHndBeg, srcHndEnd))
+ {
+ BADCODE3("Illegal control flow between filter and handler", ". Before offset %04X",
+ blkSrc->bbCodeOffsEnd);
+ }
+ }
+ }
+ }
+ else
+ {
+ /* The handler indexes of blkSrc and blkDest are different */
+ if (isLeave)
+ {
+ /* Any leave instructions must not enter the dest handler from outside*/
+ if (!jitIsBetween(srcHndBeg, destHndBeg, destHndEnd))
+ {
+ BADCODE3("Illegal use of leave to enter handler", ". Before offset %04X", blkSrc->bbCodeOffsEnd);
+ }
+ }
+ else
+ {
+ /* We must use a leave to exit a handler */
+ BADCODE3("Illegal control flow out of a handler", ". Before offset %04X", blkSrc->bbCodeOffsEnd);
+ }
+
+ /* Do we have a filter clause? */
+ if (srcHndTab->HasFilter())
+ {
+ /* It is ok to leave from the handler block of a filter, */
+ /* but not from the filter block of a filter */
+ if (srcInFilter != destInFilter)
+ {
+ BADCODE3("Illegal to leave a filter handler", ". Before offset %04X", blkSrc->bbCodeOffsEnd);
+ }
+ }
+
+ /* We should never leave a finally handler */
+ if (srcHndTab->HasFinallyHandler())
+ {
+ BADCODE3("Illegal to leave a finally handler", ". Before offset %04X", blkSrc->bbCodeOffsEnd);
+ }
+
+ /* We should never leave a fault handler */
+ if (srcHndTab->HasFaultHandler())
+ {
+ BADCODE3("Illegal to leave a fault handler", ". Before offset %04X", blkSrc->bbCodeOffsEnd);
+ }
+ }
+ }
+ else if (blkDest->hasHndIndex())
+ {
+ /* blkSrc was not inside a handler, but blkDst is inside a handler */
+ BADCODE3("Illegal control flow into a handler", ". Before offset %04X", blkSrc->bbCodeOffsEnd);
+ }
+
+ /* Are we jumping from a catch handler into the corresponding try? */
+ /* VB uses this for "on error goto " */
+
+ if (isLeave && srcInCatch)
+ {
+ // inspect all handlers containing the jump source
+
+ bool bValidJumpToTry = false; // are we jumping in a valid way from a catch to the corresponding try?
+ bool bCatchHandlerOnly = true; // false if we are jumping out of a non-catch handler
+ EHblkDsc* ehTableEnd;
+ EHblkDsc* ehDsc;
+
+ for (ehDsc = compHndBBtab, ehTableEnd = compHndBBtab + compHndBBtabCount;
+ bCatchHandlerOnly && ehDsc < ehTableEnd; ehDsc++)
+ {
+ if (ehDsc->InHndRegionILRange(blkSrc))
+ {
+ if (ehDsc->HasCatchHandler())
+ {
+ if (ehDsc->InTryRegionILRange(blkDest))
+ {
+ // If we already considered the jump for a different try/catch,
+ // we would have two overlapping try regions with two overlapping catch
+ // regions, which is illegal.
+ noway_assert(!bValidJumpToTry);
+
+ // Allowed if it is the first instruction of an inner try
+ // (and all trys in between)
+ //
+ // try {
+ // ..
+ // _tryAgain:
+ // ..
+ // try {
+ // _tryNestedInner:
+ // ..
+ // try {
+ // _tryNestedIllegal:
+ // ..
+ // } catch {
+ // ..
+ // }
+ // ..
+ // } catch {
+ // ..
+ // }
+ // ..
+ // } catch {
+ // ..
+ // leave _tryAgain // Allowed
+ // ..
+ // leave _tryNestedInner // Allowed
+ // ..
+ // leave _tryNestedIllegal // Not Allowed
+ // ..
+ // }
+ //
+ // Note: The leave is allowed also from catches nested inside the catch shown above.
+
+ /* The common case where leave is to the corresponding try */
+ if (ehDsc->ebdIsSameTry(this, blkDest->getTryIndex()) ||
+ /* Also allowed is a leave to the start of a try which starts in the handler's try */
+ fgFlowToFirstBlockOfInnerTry(ehDsc->ebdTryBeg, blkDest, false))
+ {
+ bValidJumpToTry = true;
+ }
+ }
+ }
+ else
+ {
+ // We are jumping from a handler which is not a catch handler.
+
+ // If it's a handler, but not a catch handler, it must be either a finally or fault
+ if (!ehDsc->HasFinallyOrFaultHandler())
+ {
+ BADCODE3("Handlers must be catch, finally, or fault", ". Before offset %04X",
+ blkSrc->bbCodeOffsEnd);
+ }
+
+ // Are we jumping out of this handler?
+ if (!ehDsc->InHndRegionILRange(blkDest))
+ {
+ bCatchHandlerOnly = false;
+ }
+ }
+ }
+ else if (ehDsc->InFilterRegionILRange(blkSrc))
+ {
+ // Are we jumping out of a filter?
+ if (!ehDsc->InFilterRegionILRange(blkDest))
+ {
+ bCatchHandlerOnly = false;
+ }
+ }
+ }
+
+ if (bCatchHandlerOnly)
+ {
+ if (bValidJumpToTry)
+ {
+ return;
+ }
+ else
+ {
+ // FALL THROUGH
+ // This is either the case of a leave to outside the try/catch,
+ // or a leave to a try not nested in this try/catch.
+ // The first case is allowed, the second one will be checked
+ // later when we check the try block rules (it is illegal if we
+ // jump to the middle of the destination try).
+ }
+ }
+ else
+ {
+ BADCODE3("illegal leave to exit a finally, fault or filter", ". Before offset %04X", blkSrc->bbCodeOffsEnd);
+ }
+ }
+
+ /* Check all the try block rules */
+
+ IL_OFFSET srcTryBeg;
+ IL_OFFSET srcTryEnd;
+ IL_OFFSET destTryBeg;
+ IL_OFFSET destTryEnd;
+
+ ehInitTryRange(blkSrc, &srcTryBeg, &srcTryEnd);
+ ehInitTryRange(blkDest, &destTryBeg, &destTryEnd);
+
+ /* Are we jumping between try indexes? */
+ if (!BasicBlock::sameTryRegion(blkSrc, blkDest))
+ {
+ // Are we exiting from an inner to outer try?
+ if (jitIsBetween(srcTryBeg, destTryBeg, destTryEnd) && jitIsBetween(srcTryEnd - 1, destTryBeg, destTryEnd))
+ {
+ if (!isLeave)
+ {
+ BADCODE3("exit from try block without a leave", ". Before offset %04X", blkSrc->bbCodeOffsEnd);
+ }
+ }
+ else if (jitIsBetween(destTryBeg, srcTryBeg, srcTryEnd))
+ {
+ // check that the dest Try is first instruction of an inner try
+ if (!fgFlowToFirstBlockOfInnerTry(blkSrc, blkDest, false))
+ {
+ BADCODE3("control flow into middle of try", ". Before offset %04X", blkSrc->bbCodeOffsEnd);
+ }
+ }
+ else // there is no nesting relationship between src and dest
+ {
+ if (isLeave)
+ {
+ // check that the dest Try is first instruction of an inner try sibling
+ if (!fgFlowToFirstBlockOfInnerTry(blkSrc, blkDest, true))
+ {
+ BADCODE3("illegal leave into middle of try", ". Before offset %04X", blkSrc->bbCodeOffsEnd);
+ }
+ }
+ else
+ {
+ BADCODE3("illegal control flow in to/out of try block", ". Before offset %04X", blkSrc->bbCodeOffsEnd);
+ }
+ }
+ }
+}
+
+/*****************************************************************************
+ * Check that blkDest is the first block of an inner try or a sibling
+ * with no intervening trys in between
+ */
+
+bool Compiler::fgFlowToFirstBlockOfInnerTry(BasicBlock* blkSrc, BasicBlock* blkDest, bool sibling)
+{
+ assert(!fgNormalizeEHDone); // These rules aren't quite correct after EH normalization has introduced new blocks
+
+ noway_assert(blkDest->hasTryIndex());
+
+ unsigned XTnum = blkDest->getTryIndex();
+ unsigned lastXTnum = blkSrc->hasTryIndex() ? blkSrc->getTryIndex() : compHndBBtabCount;
+ noway_assert(XTnum < compHndBBtabCount);
+ noway_assert(lastXTnum <= compHndBBtabCount);
+
+ EHblkDsc* HBtab = ehGetDsc(XTnum);
+
+ // check that we are not jumping into middle of try
+ if (HBtab->ebdTryBeg != blkDest)
+ {
+ return false;
+ }
+
+ if (sibling)
+ {
+ noway_assert(!BasicBlock::sameTryRegion(blkSrc, blkDest));
+
+ // find the l.u.b of the two try ranges
+ // Set lastXTnum to the l.u.b.
+
+ HBtab = ehGetDsc(lastXTnum);
+
+ for (lastXTnum++, HBtab++; lastXTnum < compHndBBtabCount; lastXTnum++, HBtab++)
+ {
+ if (jitIsBetweenInclusive(blkDest->bbNum, HBtab->ebdTryBeg->bbNum, HBtab->ebdTryLast->bbNum))
+ {
+ break;
+ }
+ }
+ }
+
+ // now check there are no intervening trys between dest and l.u.b
+ // (it is ok to have intervening trys as long as they all start at
+ // the same code offset)
+
+ HBtab = ehGetDsc(XTnum);
+
+ for (XTnum++, HBtab++; XTnum < lastXTnum; XTnum++, HBtab++)
+ {
+ if (HBtab->ebdTryBeg->bbNum < blkDest->bbNum && blkDest->bbNum <= HBtab->ebdTryLast->bbNum)
+ {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+/*****************************************************************************
+ * Returns the handler nesting level of the block.
+ * *pFinallyNesting is set to the nesting level of the inner-most
+ * finally-protected try the block is in.
+ */
+
+unsigned Compiler::fgGetNestingLevel(BasicBlock* block, unsigned* pFinallyNesting)
+{
+ unsigned curNesting = 0; // How many handlers is the block in
+ unsigned tryFin = (unsigned)-1; // curNesting when we see innermost finally-protected try
+ unsigned XTnum;
+ EHblkDsc* HBtab;
+
+ /* We find the block's handler nesting level by walking over the
+ complete exception table and find enclosing clauses. */
+
+ for (XTnum = 0, HBtab = compHndBBtab; XTnum < compHndBBtabCount; XTnum++, HBtab++)
+ {
+ noway_assert(HBtab->ebdTryBeg && HBtab->ebdHndBeg);
+
+ if (HBtab->HasFinallyHandler() && (tryFin == (unsigned)-1) && bbInTryRegions(XTnum, block))
+ {
+ tryFin = curNesting;
+ }
+ else if (bbInHandlerRegions(XTnum, block))
+ {
+ curNesting++;
+ }
+ }
+
+ if (tryFin == (unsigned)-1)
+ {
+ tryFin = curNesting;
+ }
+
+ if (pFinallyNesting)
+ {
+ *pFinallyNesting = curNesting - tryFin;
+ }
+
+ return curNesting;
+}
+
+/*****************************************************************************
+ *
+ * Import the basic blocks of the procedure.
+ */
+
+void Compiler::fgImport()
+{
+ fgHasPostfix = false;
+
+ impImport(fgFirstBB);
+
+ if (!(opts.eeFlags & CORJIT_FLG_SKIP_VERIFICATION))
+ {
+ CorInfoMethodRuntimeFlags verFlag;
+ verFlag = tiIsVerifiableCode ? CORINFO_FLG_VERIFIABLE : CORINFO_FLG_UNVERIFIABLE;
+ info.compCompHnd->setMethodAttribs(info.compMethodHnd, verFlag);
+ }
+}
+
+/*****************************************************************************
+ * This function returns true if tree is a node with a call
+ * that unconditionally throws an exception
+ */
+
+bool Compiler::fgIsThrow(GenTreePtr tree)
+{
+ if ((tree->gtOper != GT_CALL) || (tree->gtCall.gtCallType != CT_HELPER))
+ {
+ return false;
+ }
+
+ // TODO-Throughput: Replace all these calls to eeFindHelper() with a table based lookup
+
+ if ((tree->gtCall.gtCallMethHnd == eeFindHelper(CORINFO_HELP_OVERFLOW)) ||
+ (tree->gtCall.gtCallMethHnd == eeFindHelper(CORINFO_HELP_VERIFICATION)) ||
+ (tree->gtCall.gtCallMethHnd == eeFindHelper(CORINFO_HELP_RNGCHKFAIL)) ||
+ (tree->gtCall.gtCallMethHnd == eeFindHelper(CORINFO_HELP_THROWDIVZERO)) ||
+#if COR_JIT_EE_VERSION > 460
+ (tree->gtCall.gtCallMethHnd == eeFindHelper(CORINFO_HELP_THROWNULLREF)) ||
+#endif // COR_JIT_EE_VERSION
+ (tree->gtCall.gtCallMethHnd == eeFindHelper(CORINFO_HELP_THROW)) ||
+ (tree->gtCall.gtCallMethHnd == eeFindHelper(CORINFO_HELP_RETHROW)))
+ {
+ noway_assert(tree->gtFlags & GTF_CALL);
+ noway_assert(tree->gtFlags & GTF_EXCEPT);
+ return true;
+ }
+
+ // TODO-CQ: there are a bunch of managed methods in [mscorlib]System.ThrowHelper
+ // that would be nice to recognize.
+
+ return false;
+}
+
+/*****************************************************************************
+ * This function returns true for blocks that are in different hot-cold regions.
+ * It returns false when the blocks are both in the same regions
+ */
+
+bool Compiler::fgInDifferentRegions(BasicBlock* blk1, BasicBlock* blk2)
+{
+ noway_assert(blk1 != nullptr);
+ noway_assert(blk2 != nullptr);
+
+ if (fgFirstColdBlock == nullptr)
+ {
+ return false;
+ }
+
+ // If one block is Hot and the other is Cold then we are in different regions
+ return ((blk1->bbFlags & BBF_COLD) != (blk2->bbFlags & BBF_COLD));
+}
+
+bool Compiler::fgIsBlockCold(BasicBlock* blk)
+{
+ noway_assert(blk != nullptr);
+
+ if (fgFirstColdBlock == nullptr)
+ {
+ return false;
+ }
+
+ return ((blk->bbFlags & BBF_COLD) != 0);
+}
+
+/*****************************************************************************
+ * This function returns true if tree is a GT_COMMA node with a call
+ * that unconditionally throws an exception
+ */
+
+bool Compiler::fgIsCommaThrow(GenTreePtr tree, bool forFolding /* = false */)
+{
+ // Instead of always folding comma throws,
+ // with stress enabled we only fold half the time
+
+ if (forFolding && compStressCompile(STRESS_FOLD, 50))
+ {
+ return false; /* Don't fold */
+ }
+
+ /* Check for cast of a GT_COMMA with a throw overflow */
+ if ((tree->gtOper == GT_COMMA) && (tree->gtFlags & GTF_CALL) && (tree->gtFlags & GTF_EXCEPT))
+ {
+ return (fgIsThrow(tree->gtOp.gtOp1));
+ }
+ return false;
+}
+
+//------------------------------------------------------------------------
+// fgIsIndirOfAddrOfLocal: Determine whether "tree" is an indirection of a local.
+//
+// Arguments:
+// tree - The tree node under consideration
+//
+// Return Value:
+// If "tree" is a indirection (GT_IND, GT_BLK, or GT_OBJ) whose arg is an ADDR,
+// whose arg in turn is a LCL_VAR, return that LCL_VAR node, else nullptr.
+//
+// static
+GenTreePtr Compiler::fgIsIndirOfAddrOfLocal(GenTreePtr tree)
+{
+ GenTreePtr res = nullptr;
+ if (tree->OperIsIndir())
+ {
+ GenTreePtr addr = tree->AsIndir()->Addr();
+
+ // Post rationalization, we can have Indir(Lea(..) trees. Therefore to recognize
+ // Indir of addr of a local, skip over Lea in Indir(Lea(base, index, scale, offset))
+ // to get to base variable.
+ if (addr->OperGet() == GT_LEA)
+ {
+ // We use this method in backward dataflow after liveness computation - fgInterBlockLocalVarLiveness().
+ // Therefore it is critical that we don't miss 'uses' of any local. It may seem this method overlooks
+ // if the index part of the LEA has indir( someAddrOperator ( lclVar ) ) to search for a use but it's
+ // covered by the fact we're traversing the expression in execution order and we also visit the index.
+ GenTreeAddrMode* lea = addr->AsAddrMode();
+ GenTreePtr base = lea->Base();
+
+ if (base != nullptr)
+ {
+ if (base->OperGet() == GT_IND)
+ {
+ return fgIsIndirOfAddrOfLocal(base);
+ }
+ // else use base as addr
+ addr = base;
+ }
+ }
+
+ if (addr->OperGet() == GT_ADDR)
+ {
+ GenTreePtr lclvar = addr->gtOp.gtOp1;
+ if (lclvar->OperGet() == GT_LCL_VAR)
+ {
+ res = lclvar;
+ }
+ }
+ else if (addr->OperGet() == GT_LCL_VAR_ADDR)
+ {
+ res = addr;
+ }
+ }
+ return res;
+}
+
+GenTreePtr Compiler::fgGetStaticsCCtorHelper(CORINFO_CLASS_HANDLE cls, CorInfoHelpFunc helper)
+{
+ bool bNeedClassID = true;
+ unsigned callFlags = 0;
+
+ var_types type = TYP_BYREF;
+
+ // This is sort of ugly, as we have knowledge of what the helper is returning.
+ // We need the return type.
+ switch (helper)
+ {
+ case CORINFO_HELP_GETSHARED_GCSTATIC_BASE_NOCTOR:
+ bNeedClassID = false;
+ __fallthrough;
+
+ case CORINFO_HELP_GETSHARED_GCTHREADSTATIC_BASE_NOCTOR:
+ callFlags |= GTF_CALL_HOISTABLE;
+ __fallthrough;
+
+ case CORINFO_HELP_GETSHARED_GCSTATIC_BASE:
+ case CORINFO_HELP_GETSHARED_GCSTATIC_BASE_DYNAMICCLASS:
+ case CORINFO_HELP_GETSHARED_NONGCSTATIC_BASE_DYNAMICCLASS:
+ case CORINFO_HELP_GETSHARED_GCTHREADSTATIC_BASE:
+ case CORINFO_HELP_GETSHARED_GCTHREADSTATIC_BASE_DYNAMICCLASS:
+ // type = TYP_BYREF;
+ break;
+
+ case CORINFO_HELP_GETSHARED_NONGCSTATIC_BASE_NOCTOR:
+ bNeedClassID = false;
+ __fallthrough;
+
+ case CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE_NOCTOR:
+ callFlags |= GTF_CALL_HOISTABLE;
+ __fallthrough;
+
+ case CORINFO_HELP_GETSHARED_NONGCSTATIC_BASE:
+ case CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE:
+ case CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE_DYNAMICCLASS:
+ case CORINFO_HELP_CLASSINIT_SHARED_DYNAMICCLASS:
+ type = TYP_I_IMPL;
+ break;
+
+ default:
+ assert(!"unknown shared statics helper");
+ break;
+ }
+
+ GenTreeArgList* argList = nullptr;
+
+ GenTreePtr opModuleIDArg;
+ GenTreePtr opClassIDArg;
+
+ // Get the class ID
+ unsigned clsID;
+ size_t moduleID;
+ void* pclsID;
+ void* pmoduleID;
+
+ clsID = info.compCompHnd->getClassDomainID(cls, &pclsID);
+
+ moduleID = info.compCompHnd->getClassModuleIdForStatics(cls, nullptr, &pmoduleID);
+
+ if (!(callFlags & GTF_CALL_HOISTABLE))
+ {
+ if (info.compCompHnd->getClassAttribs(cls) & CORINFO_FLG_BEFOREFIELDINIT)
+ {
+ callFlags |= GTF_CALL_HOISTABLE;
+ }
+ }
+
+ if (pmoduleID)
+ {
+ opModuleIDArg = gtNewIconHandleNode((size_t)pmoduleID, GTF_ICON_CIDMID_HDL);
+ opModuleIDArg = gtNewOperNode(GT_IND, TYP_I_IMPL, opModuleIDArg);
+ opModuleIDArg->gtFlags |= GTF_IND_INVARIANT;
+ }
+ else
+ {
+ opModuleIDArg = gtNewIconNode((size_t)moduleID, TYP_I_IMPL);
+ }
+
+ if (bNeedClassID)
+ {
+ if (pclsID)
+ {
+ opClassIDArg = gtNewIconHandleNode((size_t)pclsID, GTF_ICON_CIDMID_HDL);
+ opClassIDArg = gtNewOperNode(GT_IND, TYP_INT, opClassIDArg);
+ opClassIDArg->gtFlags |= GTF_IND_INVARIANT;
+ }
+ else
+ {
+ opClassIDArg = gtNewIconNode(clsID, TYP_INT);
+ }
+
+ // call the helper to get the base
+ argList = gtNewArgList(opModuleIDArg, opClassIDArg);
+ }
+ else
+ {
+ argList = gtNewArgList(opModuleIDArg);
+ }
+
+ if (!s_helperCallProperties.NoThrow(helper))
+ {
+ callFlags |= GTF_EXCEPT;
+ }
+
+ return gtNewHelperCallNode(helper, type, callFlags, argList);
+}
+
+GenTreePtr Compiler::fgGetSharedCCtor(CORINFO_CLASS_HANDLE cls)
+{
+#ifdef FEATURE_READYTORUN_COMPILER
+ if (opts.IsReadyToRun())
+ {
+ CORINFO_RESOLVED_TOKEN resolvedToken;
+ ZeroMemory(&resolvedToken, sizeof(resolvedToken));
+ resolvedToken.hClass = cls;
+
+ return impReadyToRunHelperToTree(&resolvedToken, CORINFO_HELP_READYTORUN_STATIC_BASE, TYP_BYREF);
+ }
+#endif
+
+ // Call the shared non gc static helper, as its the fastest
+ return fgGetStaticsCCtorHelper(cls, info.compCompHnd->getSharedCCtorHelper(cls));
+}
+
+//
+// Returns true unless the address expression could
+// never represent a NULL
+//
+bool Compiler::fgAddrCouldBeNull(GenTreePtr addr)
+{
+ if (addr->gtOper == GT_ADDR)
+ {
+ if (addr->gtOp.gtOp1->gtOper == GT_CNS_INT)
+ {
+ GenTreePtr cns1Tree = addr->gtOp.gtOp1;
+ if (!cns1Tree->IsIconHandle())
+ {
+ // Indirection of some random constant...
+ // It is safest just to return true
+ return true;
+ }
+ }
+ else if (addr->gtOp.gtOp1->OperIsLocalAddr())
+ {
+ return false;
+ }
+ return false; // we can't have a null address
+ }
+ else if (addr->gtOper == GT_ADD)
+ {
+ if (addr->gtOp.gtOp1->gtOper == GT_CNS_INT)
+ {
+ GenTreePtr cns1Tree = addr->gtOp.gtOp1;
+ if (!cns1Tree->IsIconHandle())
+ {
+ if (!fgIsBigOffset(cns1Tree->gtIntCon.gtIconVal))
+ {
+ // Op1 was an ordinary small constant
+ return fgAddrCouldBeNull(addr->gtOp.gtOp2);
+ }
+ }
+ else // Op1 was a handle represented as a constant
+ {
+ // Is Op2 also a constant?
+ if (addr->gtOp.gtOp2->gtOper == GT_CNS_INT)
+ {
+ GenTreePtr cns2Tree = addr->gtOp.gtOp2;
+ // Is this an addition of a handle and constant
+ if (!cns2Tree->IsIconHandle())
+ {
+ if (!fgIsBigOffset(cns2Tree->gtIntCon.gtIconVal))
+ {
+ // Op2 was an ordinary small constant
+ return false; // we can't have a null address
+ }
+ }
+ }
+ }
+ }
+ else
+ {
+ // Op1 is not a constant
+ // What about Op2?
+ if (addr->gtOp.gtOp2->gtOper == GT_CNS_INT)
+ {
+ GenTreePtr cns2Tree = addr->gtOp.gtOp2;
+ // Is this an addition of a small constant
+ if (!cns2Tree->IsIconHandle())
+ {
+ if (!fgIsBigOffset(cns2Tree->gtIntCon.gtIconVal))
+ {
+ // Op2 was an ordinary small constant
+ return fgAddrCouldBeNull(addr->gtOp.gtOp1);
+ }
+ }
+ }
+ }
+ }
+ return true; // default result: addr could be null
+}
+
+/*****************************************************************************
+ * Optimize the call to the delegate constructor.
+ */
+
+GenTreePtr Compiler::fgOptimizeDelegateConstructor(GenTreePtr call, CORINFO_CONTEXT_HANDLE* ExactContextHnd)
+{
+ noway_assert(call->gtOper == GT_CALL);
+
+ noway_assert(call->gtCall.gtCallType == CT_USER_FUNC);
+ CORINFO_METHOD_HANDLE methHnd = call->gtCall.gtCallMethHnd;
+ CORINFO_CLASS_HANDLE clsHnd = info.compCompHnd->getMethodClass(methHnd);
+
+ GenTreePtr targetMethod = call->gtCall.gtCallArgs->gtOp.gtOp2->gtOp.gtOp1;
+ noway_assert(targetMethod->TypeGet() == TYP_I_IMPL);
+ genTreeOps oper = targetMethod->OperGet();
+ if (oper == GT_FTN_ADDR || oper == GT_CALL || oper == GT_QMARK)
+ {
+ CORINFO_METHOD_HANDLE targetMethodHnd = nullptr;
+ GenTreePtr qmarkNode = nullptr;
+ if (oper == GT_FTN_ADDR)
+ {
+ targetMethodHnd = targetMethod->gtFptrVal.gtFptrMethod;
+ }
+ else if (oper == GT_CALL && targetMethod->gtCall.gtCallMethHnd == eeFindHelper(CORINFO_HELP_VIRTUAL_FUNC_PTR))
+ {
+ GenTreePtr handleNode = targetMethod->gtCall.gtCallArgs->gtOp.gtOp2->gtOp.gtOp2->gtOp.gtOp1;
+
+ if (handleNode->OperGet() == GT_CNS_INT)
+ {
+ // it's a ldvirtftn case, fetch the methodhandle off the helper for ldvirtftn. It's the 3rd arg
+ targetMethodHnd = CORINFO_METHOD_HANDLE(handleNode->gtIntCon.gtCompileTimeHandle);
+ }
+ // Sometimes the argument to this is the result of a generic dictionary lookup, which shows
+ // up as a GT_QMARK.
+ else if (handleNode->OperGet() == GT_QMARK)
+ {
+ qmarkNode = handleNode;
+ }
+ }
+ // Sometimes we don't call CORINFO_HELP_VIRTUAL_FUNC_PTR but instead just call
+ // CORINFO_HELP_RUNTIMEHANDLE_METHOD directly.
+ else if (oper == GT_QMARK)
+ {
+ qmarkNode = targetMethod;
+ }
+ if (qmarkNode)
+ {
+ noway_assert(qmarkNode->OperGet() == GT_QMARK);
+ // The argument is actually a generic dictionary lookup. For delegate creation it looks
+ // like:
+ // GT_QMARK
+ // GT_COLON
+ // op1 -> call
+ // Arg 1 -> token (has compile time handle)
+ // op2 -> lclvar
+ //
+ //
+ // In this case I can find the token (which is a method handle) and that is the compile time
+ // handle.
+ noway_assert(qmarkNode->gtOp.gtOp2->OperGet() == GT_COLON);
+ noway_assert(qmarkNode->gtOp.gtOp2->gtOp.gtOp1->OperGet() == GT_CALL);
+ GenTreePtr runtimeLookupCall = qmarkNode->gtOp.gtOp2->gtOp.gtOp1;
+
+ // This could be any of CORINFO_HELP_RUNTIMEHANDLE_(METHOD|CLASS)(_LOG?)
+ GenTreePtr tokenNode = runtimeLookupCall->gtCall.gtCallArgs->gtOp.gtOp2->gtOp.gtOp1;
+ noway_assert(tokenNode->OperGet() == GT_CNS_INT);
+ targetMethodHnd = CORINFO_METHOD_HANDLE(tokenNode->gtIntCon.gtCompileTimeHandle);
+ }
+
+#ifdef FEATURE_READYTORUN_COMPILER
+ if (opts.IsReadyToRun())
+ {
+ // ReadyToRun has this optimization for a non-virtual function pointers only for now.
+ if (oper == GT_FTN_ADDR)
+ {
+ // The first argument of the helper is delegate this pointer
+ GenTreeArgList* helperArgs = gtNewArgList(call->gtCall.gtCallObjp);
+ CORINFO_CONST_LOOKUP entryPoint;
+
+ // The second argument of the helper is the target object pointers
+ helperArgs->gtOp.gtOp2 = gtNewArgList(call->gtCall.gtCallArgs->gtOp.gtOp1);
+
+ call = gtNewHelperCallNode(CORINFO_HELP_READYTORUN_DELEGATE_CTOR, TYP_VOID, GTF_EXCEPT, helperArgs);
+#if COR_JIT_EE_VERSION > 460
+ info.compCompHnd->getReadyToRunDelegateCtorHelper(targetMethod->gtFptrVal.gtLdftnResolvedToken, clsHnd,
+ &entryPoint);
+#else
+ info.compCompHnd->getReadyToRunHelper(targetMethod->gtFptrVal.gtLdftnResolvedToken,
+ CORINFO_HELP_READYTORUN_DELEGATE_CTOR, &entryPoint);
+#endif
+ call->gtCall.setEntryPoint(entryPoint);
+ }
+ }
+ else
+#endif
+ if (targetMethodHnd != nullptr)
+ {
+ CORINFO_METHOD_HANDLE alternateCtor = nullptr;
+ DelegateCtorArgs ctorData;
+ ctorData.pMethod = info.compMethodHnd;
+ ctorData.pArg3 = nullptr;
+ ctorData.pArg4 = nullptr;
+ ctorData.pArg5 = nullptr;
+
+ alternateCtor = info.compCompHnd->GetDelegateCtor(methHnd, clsHnd, targetMethodHnd, &ctorData);
+ if (alternateCtor != methHnd)
+ {
+ // we erase any inline info that may have been set for generics has it is not needed here,
+ // and in fact it will pass the wrong info to the inliner code
+ *ExactContextHnd = nullptr;
+
+ call->gtCall.gtCallMethHnd = alternateCtor;
+
+ noway_assert(call->gtCall.gtCallArgs->gtOp.gtOp2->gtOp.gtOp2 == nullptr);
+ if (ctorData.pArg3)
+ {
+ call->gtCall.gtCallArgs->gtOp.gtOp2->gtOp.gtOp2 =
+ gtNewArgList(gtNewIconHandleNode(size_t(ctorData.pArg3), GTF_ICON_FTN_ADDR));
+
+ if (ctorData.pArg4)
+ {
+ call->gtCall.gtCallArgs->gtOp.gtOp2->gtOp.gtOp2->gtOp.gtOp2 =
+ gtNewArgList(gtNewIconHandleNode(size_t(ctorData.pArg4), GTF_ICON_FTN_ADDR));
+
+ if (ctorData.pArg5)
+ {
+ call->gtCall.gtCallArgs->gtOp.gtOp2->gtOp.gtOp2->gtOp.gtOp2->gtOp.gtOp2 =
+ gtNewArgList(gtNewIconHandleNode(size_t(ctorData.pArg5), GTF_ICON_FTN_ADDR));
+ }
+ }
+ }
+ }
+ }
+ }
+
+ return call;
+}
+
+bool Compiler::fgCastNeeded(GenTreePtr tree, var_types toType)
+{
+ //
+ // If tree is a relop and we need an 4-byte integer
+ // then we never need to insert a cast
+ //
+ if ((tree->OperKind() & GTK_RELOP) && (genActualType(toType) == TYP_INT))
+ {
+ return false;
+ }
+
+ var_types fromType;
+
+ //
+ // Is the tree as GT_CAST or a GT_CALL ?
+ //
+ if (tree->OperGet() == GT_CAST)
+ {
+ fromType = tree->CastToType();
+ }
+ else if (tree->OperGet() == GT_CALL)
+ {
+ fromType = (var_types)tree->gtCall.gtReturnType;
+ }
+ else
+ {
+ fromType = tree->TypeGet();
+ }
+
+ //
+ // If both types are the same then an additional cast is not necessary
+ //
+ if (toType == fromType)
+ {
+ return false;
+ }
+ //
+ // If the sign-ness of the two types are different then a cast is necessary
+ //
+ if (varTypeIsUnsigned(toType) != varTypeIsUnsigned(fromType))
+ {
+ return true;
+ }
+ //
+ // If the from type is the same size or smaller then an additional cast is not necessary
+ //
+ if (genTypeSize(toType) >= genTypeSize(fromType))
+ {
+ return false;
+ }
+
+ //
+ // Looks like we will need the cast
+ //
+ return true;
+}
+
+// If assigning to a local var, add a cast if the target is
+// marked as NormalizedOnStore. Returns true if any change was made
+GenTreePtr Compiler::fgDoNormalizeOnStore(GenTreePtr tree)
+{
+ //
+ // Only normalize the stores in the global morph phase
+ //
+ if (fgGlobalMorph)
+ {
+ noway_assert(tree->OperGet() == GT_ASG);
+
+ GenTreePtr op1 = tree->gtOp.gtOp1;
+ GenTreePtr op2 = tree->gtOp.gtOp2;
+
+ if (op1->gtOper == GT_LCL_VAR && genActualType(op1->TypeGet()) == TYP_INT)
+ {
+ // Small-typed arguments and aliased locals are normalized on load.
+ // Other small-typed locals are normalized on store.
+ // If it is an assignment to one of the latter, insert the cast on RHS
+ unsigned varNum = op1->gtLclVarCommon.gtLclNum;
+ LclVarDsc* varDsc = &lvaTable[varNum];
+
+ if (varDsc->lvNormalizeOnStore())
+ {
+ noway_assert(op1->gtType <= TYP_INT);
+ op1->gtType = TYP_INT;
+
+ if (fgCastNeeded(op2, varDsc->TypeGet()))
+ {
+ op2 = gtNewCastNode(TYP_INT, op2, varDsc->TypeGet());
+ tree->gtOp.gtOp2 = op2;
+
+ // Propagate GTF_COLON_COND
+ op2->gtFlags |= (tree->gtFlags & GTF_COLON_COND);
+ }
+ }
+ }
+ }
+
+ return tree;
+}
+
+/*****************************************************************************
+ *
+ * Mark whether the edge "srcBB -> dstBB" forms a loop that will always
+ * execute a call or not.
+ */
+
+inline void Compiler::fgLoopCallTest(BasicBlock* srcBB, BasicBlock* dstBB)
+{
+ /* Bail if this is not a backward edge */
+
+ if (srcBB->bbNum < dstBB->bbNum)
+ {
+ return;
+ }
+
+ /* Unless we already know that there is a loop without a call here ... */
+
+ if (!(dstBB->bbFlags & BBF_LOOP_CALL0))
+ {
+ /* Check whether there is a loop path that doesn't call */
+
+ if (optReachWithoutCall(dstBB, srcBB))
+ {
+ dstBB->bbFlags |= BBF_LOOP_CALL0;
+ dstBB->bbFlags &= ~BBF_LOOP_CALL1;
+ }
+ else
+ {
+ dstBB->bbFlags |= BBF_LOOP_CALL1;
+ }
+ }
+ // if this loop will always call, then we can omit the GC Poll
+ if ((GCPOLL_NONE != opts.compGCPollType) && (dstBB->bbFlags & BBF_LOOP_CALL1))
+ {
+ srcBB->bbFlags &= ~BBF_NEEDS_GCPOLL;
+ }
+}
+
+/*****************************************************************************
+ *
+ * Mark which loops are guaranteed to execute a call.
+ */
+
+void Compiler::fgLoopCallMark()
+{
+ BasicBlock* block;
+
+ /* If we've already marked all the block, bail */
+
+ if (fgLoopCallMarked)
+ {
+ return;
+ }
+
+ fgLoopCallMarked = true;
+
+ /* Walk the blocks, looking for backward edges */
+
+ for (block = fgFirstBB; block; block = block->bbNext)
+ {
+ switch (block->bbJumpKind)
+ {
+ case BBJ_COND:
+ case BBJ_CALLFINALLY:
+ case BBJ_ALWAYS:
+ case BBJ_EHCATCHRET:
+ fgLoopCallTest(block, block->bbJumpDest);
+ break;
+
+ case BBJ_SWITCH:
+
+ unsigned jumpCnt;
+ jumpCnt = block->bbJumpSwt->bbsCount;
+ BasicBlock** jumpPtr;
+ jumpPtr = block->bbJumpSwt->bbsDstTab;
+
+ do
+ {
+ fgLoopCallTest(block, *jumpPtr);
+ } while (++jumpPtr, --jumpCnt);
+
+ break;
+
+ default:
+ break;
+ }
+ }
+}
+
+/*****************************************************************************
+ *
+ * Note the fact that the given block is a loop header.
+ */
+
+inline void Compiler::fgMarkLoopHead(BasicBlock* block)
+{
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("fgMarkLoopHead: Checking loop head block BB%02u: ", block->bbNum);
+ }
+#endif
+
+ /* Have we decided to generate fully interruptible code already? */
+
+ if (genInterruptible)
+ {
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("method is already fully interruptible\n");
+ }
+#endif
+ return;
+ }
+
+ /* Is the loop head block known to execute a method call? */
+
+ if (block->bbFlags & BBF_GC_SAFE_POINT)
+ {
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("this block will execute a call\n");
+ }
+#endif
+ // single block loops that contain GC safe points don't need polls.
+ block->bbFlags &= ~BBF_NEEDS_GCPOLL;
+ return;
+ }
+
+ /* Are dominator sets available? */
+
+ if (fgDomsComputed)
+ {
+ /* Make sure that we know which loops will always execute calls */
+
+ if (!fgLoopCallMarked)
+ {
+ fgLoopCallMark();
+ }
+
+ /* Will every trip through our loop execute a call? */
+
+ if (block->bbFlags & BBF_LOOP_CALL1)
+ {
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("this block dominates a block that will execute a call\n");
+ }
+#endif
+ return;
+ }
+ }
+
+ /*
+ * We have to make this method fully interruptible since we can not
+ * ensure that this loop will execute a call every time it loops.
+ *
+ * We'll also need to generate a full register map for this method.
+ */
+
+ assert(!codeGen->isGCTypeFixed());
+
+ if (!compCanEncodePtrArgCntMax())
+ {
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("a callsite with more than 1023 pushed args exists\n");
+ }
+#endif
+ return;
+ }
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("no guaranteed callsite exits, marking method as fully interruptible\n");
+ }
+#endif
+
+ // only enable fully interruptible code for if we're hijacking.
+ if (GCPOLL_NONE == opts.compGCPollType)
+ {
+ genInterruptible = true;
+ }
+}
+
+GenTreePtr Compiler::fgGetCritSectOfStaticMethod()
+{
+ noway_assert(!compIsForInlining());
+
+ noway_assert(info.compIsStatic); // This method should only be called for static methods.
+
+ GenTreePtr tree = nullptr;
+
+ CORINFO_LOOKUP_KIND kind = info.compCompHnd->getLocationOfThisType(info.compMethodHnd);
+
+ if (!kind.needsRuntimeLookup)
+ {
+ void *critSect = nullptr, **pCrit = nullptr;
+ critSect = info.compCompHnd->getMethodSync(info.compMethodHnd, (void**)&pCrit);
+ noway_assert((!critSect) != (!pCrit));
+
+ tree = gtNewIconEmbHndNode(critSect, pCrit, GTF_ICON_METHOD_HDL);
+ }
+ else
+ {
+ // Collectible types requires that for shared generic code, if we use the generic context paramter
+ // that we report it. (This is a conservative approach, we could detect some cases particularly when the
+ // context parameter is this that we don't need the eager reporting logic.)
+ lvaGenericsContextUsed = true;
+
+ switch (kind.runtimeLookupKind)
+ {
+ case CORINFO_LOOKUP_THISOBJ:
+ {
+ noway_assert(!"Should never get this for static method.");
+ break;
+ }
+
+ case CORINFO_LOOKUP_CLASSPARAM:
+ {
+ // In this case, the hidden param is the class handle.
+ tree = gtNewLclvNode(info.compTypeCtxtArg, TYP_I_IMPL);
+ break;
+ }
+
+ case CORINFO_LOOKUP_METHODPARAM:
+ {
+ // In this case, the hidden param is the method handle.
+ tree = gtNewLclvNode(info.compTypeCtxtArg, TYP_I_IMPL);
+ // Call helper CORINFO_HELP_GETCLASSFROMMETHODPARAM to get the class handle
+ // from the method handle.
+ tree = gtNewHelperCallNode(CORINFO_HELP_GETCLASSFROMMETHODPARAM, TYP_I_IMPL, 0, gtNewArgList(tree));
+ break;
+ }
+
+ default:
+ {
+ noway_assert(!"Unknown LOOKUP_KIND");
+ break;
+ }
+ }
+
+ noway_assert(tree); // tree should now contain the CORINFO_CLASS_HANDLE for the exact class.
+
+ // Given the class handle, get the pointer to the Monitor.
+ tree = gtNewHelperCallNode(CORINFO_HELP_GETSYNCFROMCLASSHANDLE, TYP_I_IMPL, 0, gtNewArgList(tree));
+ }
+
+ noway_assert(tree);
+ return tree;
+}
+
+#if !defined(_TARGET_X86_)
+
+/*****************************************************************************
+ *
+ * Add monitor enter/exit calls for synchronized methods, and a try/fault
+ * to ensure the 'exit' is called if the 'enter' was successful. On x86, we
+ * generate monitor enter/exit calls and tell the VM the code location of
+ * these calls. When an exception occurs between those locations, the VM
+ * automatically releases the lock. For non-x86 platforms, the JIT is
+ * responsible for creating a try/finally to protect the monitor enter/exit,
+ * and the VM doesn't need to know anything special about the method during
+ * exception processing -- it's just a normal try/finally.
+ *
+ * We generate the following code:
+ *
+ * void Foo()
+ * {
+ * unsigned byte acquired = 0;
+ * try {
+ * JIT_MonEnterWorker(<lock object>, &acquired);
+ *
+ * *** all the preexisting user code goes here ***
+ *
+ * JIT_MonExitWorker(<lock object>, &acquired);
+ * } fault {
+ * JIT_MonExitWorker(<lock object>, &acquired);
+ * }
+ * L_return:
+ * ret
+ * }
+ *
+ * If the lock is actually acquired, then the 'acquired' variable is set to 1
+ * by the helper call. During normal exit, the finally is called, 'acquired'
+ * is 1, and the lock is released. If an exception occurs before the lock is
+ * acquired, but within the 'try' (extremely unlikely, but possible), 'acquired'
+ * will be 0, and the monitor exit call will quickly return without attempting
+ * to release the lock. Otherwise, 'acquired' will be 1, and the lock will be
+ * released during exception processing.
+ *
+ * For synchronized methods, we generate a single return block.
+ * We can do this without creating additional "step" blocks because "ret" blocks
+ * must occur at the top-level (of the original code), not nested within any EH
+ * constructs. From the CLI spec, 12.4.2.8.2.3 "ret": "Shall not be enclosed in any
+ * protected block, filter, or handler." Also, 3.57: "The ret instruction cannot be
+ * used to transfer control out of a try, filter, catch, or finally block. From within
+ * a try or catch, use the leave instruction with a destination of a ret instruction
+ * that is outside all enclosing exception blocks."
+ *
+ * In addition, we can add a "fault" at the end of a method and be guaranteed that no
+ * control falls through. From the CLI spec, section 12.4 "Control flow": "Control is not
+ * permitted to simply fall through the end of a method. All paths shall terminate with one
+ * of these instructions: ret, throw, jmp, or (tail. followed by call, calli, or callvirt)."
+ *
+ * We only need to worry about "ret" and "throw", as the CLI spec prevents any other
+ * alternatives. Section 15.4.3.3 "Implementation information" states about exiting
+ * synchronized methods: "Exiting a synchronized method using a tail. call shall be
+ * implemented as though the tail. had not been specified." Section 3.37 "jmp" states:
+ * "The jmp instruction cannot be used to transferred control out of a try, filter,
+ * catch, fault or finally block; or out of a synchronized region." And, "throw" will
+ * be handled naturally; no additional work is required.
+ */
+
+void Compiler::fgAddSyncMethodEnterExit()
+{
+ assert((info.compFlags & CORINFO_FLG_SYNCH) != 0);
+
+ // We need to do this transformation before funclets are created.
+ assert(!fgFuncletsCreated);
+
+ // Assume we don't need to update the bbPreds lists.
+ assert(!fgComputePredsDone);
+
+#if !FEATURE_EH
+ // If we don't support EH, we can't add the EH needed by synchronized methods.
+ // Of course, we could simply ignore adding the EH constructs, since we don't
+ // support exceptions being thrown in this mode, but we would still need to add
+ // the monitor enter/exit, and that doesn't seem worth it for this minor case.
+ // By the time EH is working, we can just enable the whole thing.
+ NYI("No support for synchronized methods");
+#endif // !FEATURE_EH
+
+ // Create a scratch first BB where we can put the new variable initialization.
+ // Don't put the scratch BB in the protected region.
+
+ fgEnsureFirstBBisScratch();
+
+ // Create a block for the start of the try region, where the monitor enter call
+ // will go.
+
+ assert(fgFirstBB->bbFallsThrough());
+
+ BasicBlock* tryBegBB = fgNewBBafter(BBJ_NONE, fgFirstBB, false);
+ BasicBlock* tryLastBB = fgLastBB;
+
+ // Create a block for the fault.
+
+ assert(!tryLastBB->bbFallsThrough());
+ BasicBlock* faultBB = fgNewBBafter(BBJ_EHFINALLYRET, tryLastBB, false);
+
+ assert(tryLastBB->bbNext == faultBB);
+ assert(faultBB->bbNext == nullptr);
+ assert(faultBB == fgLastBB);
+
+ { // Scope the EH region creation
+
+ // Add the new EH region at the end, since it is the least nested,
+ // and thus should be last.
+
+ EHblkDsc* newEntry;
+ unsigned XTnew = compHndBBtabCount;
+
+ newEntry = fgAddEHTableEntry(XTnew);
+
+ // Initialize the new entry
+
+ newEntry->ebdHandlerType = EH_HANDLER_FAULT;
+
+ newEntry->ebdTryBeg = tryBegBB;
+ newEntry->ebdTryLast = tryLastBB;
+
+ newEntry->ebdHndBeg = faultBB;
+ newEntry->ebdHndLast = faultBB;
+
+ newEntry->ebdTyp = 0; // unused for fault
+
+ newEntry->ebdEnclosingTryIndex = EHblkDsc::NO_ENCLOSING_INDEX;
+ newEntry->ebdEnclosingHndIndex = EHblkDsc::NO_ENCLOSING_INDEX;
+
+ newEntry->ebdTryBegOffset = tryBegBB->bbCodeOffs;
+ newEntry->ebdTryEndOffset = tryLastBB->bbCodeOffsEnd;
+ newEntry->ebdFilterBegOffset = 0;
+ newEntry->ebdHndBegOffset = 0; // handler doesn't correspond to any IL
+ newEntry->ebdHndEndOffset = 0; // handler doesn't correspond to any IL
+
+ // Set some flags on the new region. This is the same as when we set up
+ // EH regions in fgFindBasicBlocks(). Note that the try has no enclosing
+ // handler, and the fault has no enclosing try.
+
+ tryBegBB->bbFlags |= BBF_HAS_LABEL | BBF_DONT_REMOVE | BBF_TRY_BEG | BBF_IMPORTED;
+
+ faultBB->bbFlags |= BBF_HAS_LABEL | BBF_DONT_REMOVE | BBF_IMPORTED;
+ faultBB->bbCatchTyp = BBCT_FAULT;
+
+ tryBegBB->setTryIndex(XTnew);
+ tryBegBB->clearHndIndex();
+
+ faultBB->clearTryIndex();
+ faultBB->setHndIndex(XTnew);
+
+ // Walk the user code blocks and set all blocks that don't already have a try handler
+ // to point to the new try handler.
+
+ BasicBlock* tmpBB;
+ for (tmpBB = tryBegBB->bbNext; tmpBB != faultBB; tmpBB = tmpBB->bbNext)
+ {
+ if (!tmpBB->hasTryIndex())
+ {
+ tmpBB->setTryIndex(XTnew);
+ }
+ }
+
+ // Walk the EH table. Make every EH entry that doesn't already have an enclosing
+ // try index mark this new entry as their enclosing try index.
+
+ unsigned XTnum;
+ EHblkDsc* HBtab;
+
+ for (XTnum = 0, HBtab = compHndBBtab; XTnum < XTnew; XTnum++, HBtab++)
+ {
+ if (HBtab->ebdEnclosingTryIndex == EHblkDsc::NO_ENCLOSING_INDEX)
+ {
+ HBtab->ebdEnclosingTryIndex =
+ (unsigned short)XTnew; // This EH region wasn't previously nested, but now it is.
+ }
+ }
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ JITDUMP("Synchronized method - created additional EH descriptor EH#%u for try/fault wrapping monitor "
+ "enter/exit\n",
+ XTnew);
+ fgDispBasicBlocks();
+ fgDispHandlerTab();
+ }
+
+ fgVerifyHandlerTab();
+#endif // DEBUG
+ }
+
+ // Create a 'monitor acquired' boolean (actually, an unsigned byte: 1 = acquired, 0 = not acquired).
+
+ var_types typeMonAcquired = TYP_UBYTE;
+ this->lvaMonAcquired = lvaGrabTemp(true DEBUGARG("Synchronized method monitor acquired boolean"));
+
+ lvaTable[lvaMonAcquired].lvType = typeMonAcquired;
+
+ { // Scope the variables of the variable initialization
+
+ // Initialize the 'acquired' boolean.
+
+ GenTreePtr zero = gtNewZeroConNode(genActualType(typeMonAcquired));
+ GenTreePtr varNode = gtNewLclvNode(lvaMonAcquired, typeMonAcquired);
+ GenTreePtr initNode = gtNewAssignNode(varNode, zero);
+
+ fgInsertStmtAtEnd(fgFirstBB, initNode);
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nSynchronized method - Add 'acquired' initialization in first block BB%02u [%08p]\n", fgFirstBB,
+ dspPtr(fgFirstBB));
+ gtDispTree(initNode);
+ printf("\n");
+ }
+#endif
+ }
+
+ // Make a copy of the 'this' pointer to be used in the handler so it does not inhibit enregistration
+ // of all uses of the variable.
+ unsigned lvaCopyThis = 0;
+ if (!info.compIsStatic)
+ {
+ lvaCopyThis = lvaGrabTemp(true DEBUGARG("Synchronized method monitor acquired boolean"));
+ lvaTable[lvaCopyThis].lvType = TYP_REF;
+
+ GenTreePtr thisNode = gtNewLclvNode(info.compThisArg, TYP_REF);
+ GenTreePtr copyNode = gtNewLclvNode(lvaCopyThis, TYP_REF);
+ GenTreePtr initNode = gtNewAssignNode(copyNode, thisNode);
+
+ fgInsertStmtAtEnd(tryBegBB, initNode);
+ }
+
+ fgCreateMonitorTree(lvaMonAcquired, info.compThisArg, tryBegBB, true /*enter*/);
+
+ // exceptional case
+ fgCreateMonitorTree(lvaMonAcquired, lvaCopyThis, faultBB, false /*exit*/);
+
+ // non-exceptional cases
+ for (BasicBlock* block = fgFirstBB; block != nullptr; block = block->bbNext)
+ {
+ if (block->bbJumpKind == BBJ_RETURN)
+ {
+ fgCreateMonitorTree(lvaMonAcquired, info.compThisArg, block, false /*exit*/);
+ }
+ }
+}
+
+// fgCreateMonitorTree: Create tree to execute a monitor enter or exit operation for synchronized methods
+// lvaMonAcquired: lvaNum of boolean variable that tracks if monitor has been acquired.
+// lvaThisVar: lvaNum of variable being used as 'this' pointer, may not be the original one. Is only used for
+// nonstatic methods
+// block: block to insert the tree in. It is inserted at the end or in the case of a return, immediately before the
+// GT_RETURN
+// enter: whether to create a monitor enter or exit
+
+GenTree* Compiler::fgCreateMonitorTree(unsigned lvaMonAcquired, unsigned lvaThisVar, BasicBlock* block, bool enter)
+{
+ // Insert the expression "enter/exitCrit(this, &acquired)" or "enter/exitCrit(handle, &acquired)"
+
+ var_types typeMonAcquired = TYP_UBYTE;
+ GenTreePtr varNode = gtNewLclvNode(lvaMonAcquired, typeMonAcquired);
+ GenTreePtr varAddrNode = gtNewOperNode(GT_ADDR, TYP_BYREF, varNode);
+ GenTreePtr tree;
+
+ if (info.compIsStatic)
+ {
+ tree = fgGetCritSectOfStaticMethod();
+ tree = gtNewHelperCallNode(enter ? CORINFO_HELP_MON_ENTER_STATIC : CORINFO_HELP_MON_EXIT_STATIC, TYP_VOID, 0,
+ gtNewArgList(tree, varAddrNode));
+ }
+ else
+ {
+ tree = gtNewLclvNode(lvaThisVar, TYP_REF);
+ tree = gtNewHelperCallNode(enter ? CORINFO_HELP_MON_ENTER : CORINFO_HELP_MON_EXIT, TYP_VOID, 0,
+ gtNewArgList(tree, varAddrNode));
+ }
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nSynchronized method - Add monitor %s call to block BB%02u [%08p]\n", enter ? "enter" : "exit", block,
+ dspPtr(block));
+ gtDispTree(tree);
+ printf("\n");
+ }
+#endif
+
+ if (block->bbJumpKind == BBJ_RETURN && block->lastStmt()->gtStmtExpr->gtOper == GT_RETURN)
+ {
+ GenTree* retNode = block->lastStmt()->gtStmtExpr;
+ GenTree* retExpr = retNode->gtOp.gtOp1;
+
+ if (retExpr != nullptr)
+ {
+ // have to insert this immediately before the GT_RETURN so we transform:
+ // ret(...) ->
+ // ret(comma(comma(tmp=...,call mon_exit), tmp)
+ //
+ //
+ // Before morph stage, it is possible to have a case of GT_RETURN(TYP_LONG, op1) where op1's type is
+ // TYP_STRUCT (of 8-bytes) and op1 is call node. See the big comment block in impReturnInstruction()
+ // for details for the case where info.compRetType is not the same as info.compRetNativeType. For
+ // this reason pass compMethodInfo->args.retTypeClass which is guaranteed to be a valid class handle
+ // if the return type is a value class. Note that fgInsertCommFormTemp() in turn uses this class handle
+ // if the type of op1 is TYP_STRUCT to perform lvaSetStruct() on the new temp that is created, which
+ // in turn passes it to VM to know the size of value type.
+ GenTree* temp = fgInsertCommaFormTemp(&retNode->gtOp.gtOp1, info.compMethodInfo->args.retTypeClass);
+
+ GenTree* lclVar = retNode->gtOp.gtOp1->gtOp.gtOp2;
+ retNode->gtOp.gtOp1->gtOp.gtOp2 = gtNewOperNode(GT_COMMA, retExpr->TypeGet(), tree, lclVar);
+ }
+ else
+ {
+ // Insert this immediately before the GT_RETURN
+ fgInsertStmtNearEnd(block, tree);
+ }
+ }
+ else
+ {
+ fgInsertStmtAtEnd(block, tree);
+ }
+
+ return tree;
+}
+
+// Convert a BBJ_RETURN block in a synchronized method to a BBJ_ALWAYS.
+// We've previously added a 'try' block around the original program code using fgAddSyncMethodEnterExit().
+// Thus, we put BBJ_RETURN blocks inside a 'try'. In IL this is illegal. Instead, we would
+// see a 'leave' inside a 'try' that would get transformed into BBJ_CALLFINALLY/BBJ_ALWAYS blocks
+// during importing, and the BBJ_ALWAYS would point at an outer block with the BBJ_RETURN.
+// Here, we mimic some of the logic of importing a LEAVE to get the same effect for synchronized methods.
+void Compiler::fgConvertSyncReturnToLeave(BasicBlock* block)
+{
+ assert(!fgFuncletsCreated);
+ assert(info.compFlags & CORINFO_FLG_SYNCH);
+ assert(genReturnBB != nullptr);
+ assert(genReturnBB != block);
+ assert(fgReturnCount <= 1); // We have a single return for synchronized methods
+ assert(block->bbJumpKind == BBJ_RETURN);
+ assert((block->bbFlags & BBF_HAS_JMP) == 0);
+ assert(block->hasTryIndex());
+ assert(!block->hasHndIndex());
+ assert(compHndBBtabCount >= 1);
+
+ unsigned tryIndex = block->getTryIndex();
+ assert(tryIndex == compHndBBtabCount - 1); // The BBJ_RETURN must be at the top-level before we inserted the
+ // try/finally, which must be the last EH region.
+
+ EHblkDsc* ehDsc = ehGetDsc(tryIndex);
+ assert(ehDsc->ebdEnclosingTryIndex ==
+ EHblkDsc::NO_ENCLOSING_INDEX); // There are no enclosing regions of the BBJ_RETURN block
+ assert(ehDsc->ebdEnclosingHndIndex == EHblkDsc::NO_ENCLOSING_INDEX);
+
+ // Convert the BBJ_RETURN to BBJ_ALWAYS, jumping to genReturnBB.
+ block->bbJumpKind = BBJ_ALWAYS;
+ block->bbJumpDest = genReturnBB;
+ block->bbJumpDest->bbRefs++;
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Synchronized method - convert block BB%02u to BBJ_ALWAYS [targets BB%02u]\n", block->bbNum,
+ block->bbJumpDest->bbNum);
+ }
+#endif
+}
+
+#endif // !_TARGET_X86_
+
+//------------------------------------------------------------------------
+// fgAddReversePInvokeEnterExit: Add enter/exit calls for reverse PInvoke methods
+//
+// Arguments:
+// None.
+//
+// Return Value:
+// None.
+
+void Compiler::fgAddReversePInvokeEnterExit()
+{
+ assert(opts.IsReversePInvoke());
+
+#if COR_JIT_EE_VERSION > 460
+ lvaReversePInvokeFrameVar = lvaGrabTempWithImplicitUse(false DEBUGARG("Reverse Pinvoke FrameVar"));
+
+ LclVarDsc* varDsc = &lvaTable[lvaReversePInvokeFrameVar];
+ varDsc->lvType = TYP_BLK;
+ varDsc->lvExactSize = eeGetEEInfo()->sizeOfReversePInvokeFrame;
+
+ GenTreePtr tree;
+
+ // Add enter pinvoke exit callout at the start of prolog
+
+ tree = gtNewOperNode(GT_ADDR, TYP_I_IMPL, gtNewLclvNode(lvaReversePInvokeFrameVar, TYP_BLK));
+
+ tree = gtNewHelperCallNode(CORINFO_HELP_JIT_REVERSE_PINVOKE_ENTER, TYP_VOID, 0, gtNewArgList(tree));
+
+ fgEnsureFirstBBisScratch();
+
+ fgInsertStmtAtBeg(fgFirstBB, tree);
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nReverse PInvoke method - Add reverse pinvoke enter in first basic block [%08p]\n", dspPtr(fgFirstBB));
+ gtDispTree(tree);
+ printf("\n");
+ }
+#endif
+
+ // Add reverse pinvoke exit callout at the end of epilog
+
+ tree = gtNewOperNode(GT_ADDR, TYP_I_IMPL, gtNewLclvNode(lvaReversePInvokeFrameVar, TYP_BLK));
+
+ tree = gtNewHelperCallNode(CORINFO_HELP_JIT_REVERSE_PINVOKE_EXIT, TYP_VOID, 0, gtNewArgList(tree));
+
+ assert(genReturnBB != nullptr);
+
+ fgInsertStmtAtEnd(genReturnBB, tree);
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nReverse PInvoke method - Add reverse pinvoke exit in return basic block [%08p]\n",
+ dspPtr(genReturnBB));
+ gtDispTree(tree);
+ printf("\n");
+ }
+#endif
+
+#endif // COR_JIT_EE_VERSION > 460
+}
+
+/*****************************************************************************
+ *
+ * Return 'true' if there is more than one BBJ_RETURN block.
+ */
+
+bool Compiler::fgMoreThanOneReturnBlock()
+{
+ unsigned retCnt = 0;
+
+ for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
+ {
+ if (block->bbJumpKind == BBJ_RETURN)
+ {
+ retCnt++;
+ if (retCnt > 1)
+ {
+ return true;
+ }
+ }
+ }
+
+ return false;
+}
+
+/*****************************************************************************
+ *
+ * Add any internal blocks/trees we may need
+ */
+
+void Compiler::fgAddInternal()
+{
+ noway_assert(!compIsForInlining());
+
+ /*
+ <BUGNUM> VSW441487 </BUGNUM>
+
+ The "this" pointer is implicitly used in the following cases:
+ 1. Locking of synchronized methods
+ 2. Dictionary access of shared generics code
+ 3. If a method has "catch(FooException<T>)", the EH code accesses "this" to determine T.
+ 4. Initializing the type from generic methods which require precise cctor semantics
+ 5. Verifier does special handling of "this" in the .ctor
+
+ However, we might overwrite it with a "starg 0".
+ In this case, we will redirect all "ldarg(a)/starg(a) 0" to a temp lvaTable[lvaArg0Var]
+ */
+
+ if (!info.compIsStatic)
+ {
+ if (lvaArg0Var != info.compThisArg)
+ {
+ // When we're using the general encoder, we mark compThisArg address-taken to ensure that it is not
+ // enregistered (since the decoder always reports a stack location for "this" for generics
+ // context vars).
+ bool lva0CopiedForGenericsCtxt;
+#ifndef JIT32_GCENCODER
+ lva0CopiedForGenericsCtxt = ((info.compMethodInfo->options & CORINFO_GENERICS_CTXT_FROM_THIS) != 0);
+#else // JIT32_GCENCODER
+ lva0CopiedForGenericsCtxt = false;
+#endif // JIT32_GCENCODER
+ noway_assert(lva0CopiedForGenericsCtxt || !lvaTable[info.compThisArg].lvAddrExposed);
+ noway_assert(!lvaTable[info.compThisArg].lvArgWrite);
+ noway_assert(lvaTable[lvaArg0Var].lvAddrExposed || lvaTable[lvaArg0Var].lvArgWrite ||
+ lva0CopiedForGenericsCtxt);
+
+ var_types thisType = lvaTable[info.compThisArg].TypeGet();
+
+ // Now assign the original input "this" to the temp
+
+ GenTreePtr tree;
+
+ tree = gtNewLclvNode(lvaArg0Var, thisType);
+
+ tree = gtNewAssignNode(tree, // dst
+ gtNewLclvNode(info.compThisArg, thisType) // src
+ );
+
+ /* Create a new basic block and stick the assignment in it */
+
+ fgEnsureFirstBBisScratch();
+
+ fgInsertStmtAtEnd(fgFirstBB, tree);
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nCopy \"this\" to lvaArg0Var in first basic block [%08p]\n", dspPtr(fgFirstBB));
+ gtDispTree(tree);
+ printf("\n");
+ }
+#endif
+ }
+ }
+
+ // Grab a temp for the security object.
+ // (Note: opts.compDbgEnC currently also causes the security object to be generated. See Compiler::compCompile)
+ if (opts.compNeedSecurityCheck)
+ {
+ noway_assert(lvaSecurityObject == BAD_VAR_NUM);
+ lvaSecurityObject = lvaGrabTempWithImplicitUse(false DEBUGARG("security check"));
+ lvaTable[lvaSecurityObject].lvType = TYP_REF;
+ }
+
+ /* Assume we will generate a single shared return sequence */
+
+ ULONG returnWeight = 0;
+ bool oneReturn;
+ bool allProfWeight;
+
+ //
+ // We will generate just one epilog (return block)
+ // when we are asked to generate enter/leave callbacks
+ // or for methods with PInvoke
+ // or for methods calling into unmanaged code
+ // or for synchronized methods.
+ //
+ if (compIsProfilerHookNeeded() || (info.compCallUnmanaged != 0) || opts.IsReversePInvoke() ||
+ ((info.compFlags & CORINFO_FLG_SYNCH) != 0))
+ {
+ // We will generate only one return block
+ // We will transform the BBJ_RETURN blocks
+ // into jumps to the one return block
+ //
+ oneReturn = true;
+ allProfWeight = false;
+ }
+ else
+ {
+ //
+ // We are allowed to have multiple individual exits
+ // However we can still decide to have a single return
+ //
+ oneReturn = false;
+ allProfWeight = true;
+
+ // Count the BBJ_RETURN blocks and set the returnWeight to the
+ // sum of all these blocks.
+ //
+ fgReturnCount = 0;
+ for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
+ {
+ if (block->bbJumpKind == BBJ_RETURN)
+ {
+ //
+ // returnCount is the count of BBJ_RETURN blocks in this method
+ //
+ fgReturnCount++;
+ //
+ // If all BBJ_RETURN blocks have a valid profiled weights
+ // then allProfWeight will be true, else it is false
+ //
+ if ((block->bbFlags & BBF_PROF_WEIGHT) == 0)
+ {
+ allProfWeight = false;
+ }
+ //
+ // returnWeight is the sum of the weights of all BBJ_RETURN blocks
+ returnWeight += block->bbWeight;
+ }
+ }
+
+ //
+ // If we only have one (or zero) return blocks then
+ // we do not need a special one return block
+ //
+ if (fgReturnCount > 1)
+ {
+ //
+ // should we generate a single return block?
+ //
+ if (fgReturnCount > 4)
+ {
+ // Our epilog encoding only supports up to 4 epilogs
+ // TODO-CQ: support >4 return points for ARM/AMD64, which presumably support any number of epilogs?
+ //
+ oneReturn = true;
+ }
+ else if (compCodeOpt() == SMALL_CODE)
+ {
+ // For the Small_Code case we always generate a
+ // single return block when we have multiple
+ // return points
+ //
+ oneReturn = true;
+ }
+ }
+ }
+
+#if !defined(_TARGET_X86_)
+ // Add the synchronized method enter/exit calls and try/finally protection. Note
+ // that this must happen before the one BBJ_RETURN block is created below, so the
+ // BBJ_RETURN block gets placed at the top-level, not within an EH region. (Otherwise,
+ // we'd have to be really careful when creating the synchronized method try/finally
+ // not to include the BBJ_RETURN block.)
+ if ((info.compFlags & CORINFO_FLG_SYNCH) != 0)
+ {
+ fgAddSyncMethodEnterExit();
+ }
+#endif // !_TARGET_X86_
+
+ if (oneReturn)
+ {
+ genReturnBB = fgNewBBinRegion(BBJ_RETURN);
+ genReturnBB->bbRefs = 1; // bbRefs gets update later, for now it should be 1
+ fgReturnCount++;
+
+ if (allProfWeight)
+ {
+ //
+ // if we have profile data for all BBJ_RETURN blocks
+ // then we can set BBF_PROF_WEIGHT for genReturnBB
+ //
+ genReturnBB->bbFlags |= BBF_PROF_WEIGHT;
+ }
+ else
+ {
+ //
+ // We can't rely upon the calculated returnWeight unless
+ // all of the BBJ_RETURN blocks had valid profile weights
+ // So we will use the weight of the first block instead
+ //
+ returnWeight = fgFirstBB->bbWeight;
+ }
+
+ //
+ // Set the weight of the oneReturn block
+ //
+ genReturnBB->bbWeight = min(returnWeight, BB_MAX_WEIGHT);
+
+ if (returnWeight == 0)
+ {
+ //
+ // If necessary set the Run Rarely flag
+ //
+ genReturnBB->bbFlags |= BBF_RUN_RARELY;
+ }
+ else
+ {
+ // Make sure that the RunRarely flag is clear
+ // because fgNewBBinRegion will set it to true
+ //
+ genReturnBB->bbFlags &= ~BBF_RUN_RARELY;
+ }
+
+ genReturnBB->bbFlags |= (BBF_INTERNAL | BBF_DONT_REMOVE);
+
+ noway_assert(genReturnBB->bbNext == nullptr);
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\n genReturnBB [BB%02u] created\n", genReturnBB->bbNum);
+ }
+#endif
+ }
+ else
+ {
+ //
+ // We don't have a oneReturn block for this method
+ //
+ genReturnBB = nullptr;
+ }
+
+ // If there is a return value, then create a temp for it. Real returns will store the value in there and
+ // it'll be reloaded by the single return.
+ if (genReturnBB && compMethodHasRetVal())
+ {
+ genReturnLocal = lvaGrabTemp(true DEBUGARG("Single return block return value"));
+
+ if (compMethodReturnsNativeScalarType())
+ {
+ lvaTable[genReturnLocal].lvType = genActualType(info.compRetNativeType);
+ }
+ else if (compMethodReturnsRetBufAddr())
+ {
+ lvaTable[genReturnLocal].lvType = TYP_BYREF;
+ }
+ else if (compMethodReturnsMultiRegRetType())
+ {
+ lvaTable[genReturnLocal].lvType = TYP_STRUCT;
+ lvaSetStruct(genReturnLocal, info.compMethodInfo->args.retTypeClass, true);
+ lvaTable[genReturnLocal].lvIsMultiRegRet = true;
+ }
+ else
+ {
+ assert(!"unreached");
+ }
+
+ if (varTypeIsFloating(lvaTable[genReturnLocal].lvType))
+ {
+ this->compFloatingPointUsed = true;
+ }
+
+ if (!varTypeIsFloating(info.compRetType))
+ {
+ lvaTable[genReturnLocal].setPrefReg(REG_INTRET, this);
+#ifdef REG_FLOATRET
+ }
+ else
+ {
+ lvaTable[genReturnLocal].setPrefReg(REG_FLOATRET, this);
+ }
+#endif
+
+#ifdef DEBUG
+ // This temporary should not be converted to a double in stress mode,
+ // because we introduce assigns to it after the stress conversion
+ lvaTable[genReturnLocal].lvKeepType = 1;
+#endif
+ }
+ else
+ {
+ genReturnLocal = BAD_VAR_NUM;
+ }
+
+ if (info.compCallUnmanaged != 0)
+ {
+ // The P/Invoke helpers only require a frame variable, so only allocate the
+ // TCB variable if we're not using them.
+ if (!opts.ShouldUsePInvokeHelpers())
+ {
+ info.compLvFrameListRoot = lvaGrabTemp(false DEBUGARG("Pinvoke FrameListRoot"));
+ }
+
+ lvaInlinedPInvokeFrameVar = lvaGrabTempWithImplicitUse(false DEBUGARG("Pinvoke FrameVar"));
+
+ LclVarDsc* varDsc = &lvaTable[lvaInlinedPInvokeFrameVar];
+ varDsc->addPrefReg(RBM_PINVOKE_TCB, this);
+ varDsc->lvType = TYP_BLK;
+ // Make room for the inlined frame.
+ varDsc->lvExactSize = eeGetEEInfo()->inlinedCallFrameInfo.size;
+#if FEATURE_FIXED_OUT_ARGS
+ // Grab and reserve space for TCB, Frame regs used in PInvoke epilog to pop the inlined frame.
+ // See genPInvokeMethodEpilog() for use of the grabbed var. This is only necessary if we are
+ // not using the P/Invoke helpers.
+ if (!opts.ShouldUsePInvokeHelpers() && compJmpOpUsed)
+ {
+ lvaPInvokeFrameRegSaveVar = lvaGrabTempWithImplicitUse(false DEBUGARG("PInvokeFrameRegSave Var"));
+ varDsc = &lvaTable[lvaPInvokeFrameRegSaveVar];
+ varDsc->lvType = TYP_BLK;
+ varDsc->lvExactSize = 2 * REGSIZE_BYTES;
+ }
+#endif
+ }
+
+ // Do we need to insert a "JustMyCode" callback?
+
+ CORINFO_JUST_MY_CODE_HANDLE* pDbgHandle = nullptr;
+ CORINFO_JUST_MY_CODE_HANDLE dbgHandle = nullptr;
+ if (opts.compDbgCode && !(opts.eeFlags & CORJIT_FLG_IL_STUB))
+ {
+ dbgHandle = info.compCompHnd->getJustMyCodeHandle(info.compMethodHnd, &pDbgHandle);
+ }
+
+#ifdef _TARGET_ARM64_
+ // TODO-ARM64-NYI: don't do just-my-code
+ dbgHandle = nullptr;
+ pDbgHandle = nullptr;
+#endif // _TARGET_ARM64_
+
+ noway_assert(!dbgHandle || !pDbgHandle);
+
+ if (dbgHandle || pDbgHandle)
+ {
+ GenTreePtr guardCheckVal =
+ gtNewOperNode(GT_IND, TYP_INT, gtNewIconEmbHndNode(dbgHandle, pDbgHandle, GTF_ICON_TOKEN_HDL));
+ GenTreePtr guardCheckCond = gtNewOperNode(GT_EQ, TYP_INT, guardCheckVal, gtNewZeroConNode(TYP_INT));
+ guardCheckCond->gtFlags |= GTF_RELOP_QMARK;
+
+ // Create the callback which will yield the final answer
+
+ GenTreePtr callback = gtNewHelperCallNode(CORINFO_HELP_DBG_IS_JUST_MY_CODE, TYP_VOID);
+ callback = new (this, GT_COLON) GenTreeColon(TYP_VOID, gtNewNothingNode(), callback);
+
+ // Stick the conditional call at the start of the method
+
+ fgEnsureFirstBBisScratch();
+ fgInsertStmtAtEnd(fgFirstBB, gtNewQmarkNode(TYP_VOID, guardCheckCond, callback));
+ }
+
+ /* Do we need to call out for security ? */
+
+ if (tiSecurityCalloutNeeded)
+ {
+ // We must have grabbed this local.
+ noway_assert(opts.compNeedSecurityCheck);
+ noway_assert(lvaSecurityObject != BAD_VAR_NUM);
+
+ GenTreePtr tree;
+
+ /* Insert the expression "call JIT_Security_Prolog(MethodHnd, &SecurityObject)" */
+
+ tree = gtNewIconEmbMethHndNode(info.compMethodHnd);
+
+ tree = gtNewHelperCallNode(info.compCompHnd->getSecurityPrologHelper(info.compMethodHnd), TYP_VOID, 0,
+ gtNewArgList(tree, gtNewOperNode(GT_ADDR, TYP_BYREF,
+ gtNewLclvNode(lvaSecurityObject, TYP_REF))));
+
+ /* Create a new basic block and stick the call in it */
+
+ fgEnsureFirstBBisScratch();
+
+ fgInsertStmtAtEnd(fgFirstBB, tree);
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\ntiSecurityCalloutNeeded - Add call JIT_Security_Prolog(%08p) statement ",
+ dspPtr(info.compMethodHnd));
+ printTreeID(tree);
+ printf(" in first basic block [%08p]\n", dspPtr(fgFirstBB));
+ gtDispTree(tree);
+ printf("\n");
+ }
+#endif
+ }
+
+#if defined(_TARGET_X86_)
+
+ /* Is this a 'synchronized' method? */
+
+ if (info.compFlags & CORINFO_FLG_SYNCH)
+ {
+ GenTreePtr tree = NULL;
+
+ /* Insert the expression "enterCrit(this)" or "enterCrit(handle)" */
+
+ if (info.compIsStatic)
+ {
+ tree = fgGetCritSectOfStaticMethod();
+
+ tree = gtNewHelperCallNode(CORINFO_HELP_MON_ENTER_STATIC, TYP_VOID, 0, gtNewArgList(tree));
+ }
+ else
+ {
+ noway_assert(lvaTable[info.compThisArg].lvType == TYP_REF);
+
+ tree = gtNewLclvNode(info.compThisArg, TYP_REF);
+
+ tree = gtNewHelperCallNode(CORINFO_HELP_MON_ENTER, TYP_VOID, 0, gtNewArgList(tree));
+ }
+
+ /* Create a new basic block and stick the call in it */
+
+ fgEnsureFirstBBisScratch();
+
+ fgInsertStmtAtEnd(fgFirstBB, tree);
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nSynchronized method - Add enterCrit statement in first basic block [%08p]\n", dspPtr(fgFirstBB));
+ gtDispTree(tree);
+ printf("\n");
+ }
+#endif
+
+ /* We must be generating a single exit point for this to work */
+
+ noway_assert(oneReturn);
+ noway_assert(genReturnBB);
+
+ /* Create the expression "exitCrit(this)" or "exitCrit(handle)" */
+
+ if (info.compIsStatic)
+ {
+ tree = fgGetCritSectOfStaticMethod();
+
+ tree = gtNewHelperCallNode(CORINFO_HELP_MON_EXIT_STATIC, TYP_VOID, 0, gtNewArgList(tree));
+ }
+ else
+ {
+ tree = gtNewLclvNode(info.compThisArg, TYP_REF);
+
+ tree = gtNewHelperCallNode(CORINFO_HELP_MON_EXIT, TYP_VOID, 0, gtNewArgList(tree));
+ }
+
+ fgInsertStmtAtEnd(genReturnBB, tree);
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nSynchronized method - Add exit expression ");
+ printTreeID(tree);
+ printf("\n");
+ }
+#endif
+
+ // Reset cookies used to track start and end of the protected region in synchronized methods
+ syncStartEmitCookie = NULL;
+ syncEndEmitCookie = NULL;
+ }
+
+#endif // _TARGET_X86_
+
+ /* Do we need to do runtime call out to check the security? */
+
+ if (tiRuntimeCalloutNeeded)
+ {
+ GenTreePtr tree;
+
+ /* Insert the expression "call verificationRuntimeCheck(MethodHnd)" */
+
+ tree = gtNewIconEmbMethHndNode(info.compMethodHnd);
+
+ tree = gtNewHelperCallNode(CORINFO_HELP_VERIFICATION_RUNTIME_CHECK, TYP_VOID, 0, gtNewArgList(tree));
+
+ /* Create a new basic block and stick the call in it */
+
+ fgEnsureFirstBBisScratch();
+
+ fgInsertStmtAtEnd(fgFirstBB, tree);
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\ntiRuntimeCalloutNeeded - Call verificationRuntimeCheck(%08p) statement in first basic block "
+ "[%08p]\n",
+ dspPtr(info.compMethodHnd), dspPtr(fgFirstBB));
+ gtDispTree(tree);
+ printf("\n");
+ }
+#endif
+ }
+
+ if (opts.IsReversePInvoke())
+ {
+ fgAddReversePInvokeEnterExit();
+ }
+
+ //
+ // Add 'return' expression to the return block if we made it as "oneReturn" before.
+ //
+ if (oneReturn)
+ {
+ GenTreePtr tree;
+
+ //
+ // Make the 'return' expression.
+ //
+
+ // make sure to reload the return value as part of the return (it is saved by the "real return").
+ if (genReturnLocal != BAD_VAR_NUM)
+ {
+ noway_assert(compMethodHasRetVal());
+
+ GenTreePtr retTemp = gtNewLclvNode(genReturnLocal, lvaTable[genReturnLocal].TypeGet());
+
+ // make sure copy prop ignores this node (make sure it always does a reload from the temp).
+ retTemp->gtFlags |= GTF_DONT_CSE;
+ tree = gtNewOperNode(GT_RETURN, retTemp->gtType, retTemp);
+ }
+ else
+ {
+ noway_assert(info.compRetType == TYP_VOID || varTypeIsStruct(info.compRetType));
+ // return void
+ tree = new (this, GT_RETURN) GenTreeOp(GT_RETURN, TYP_VOID);
+ }
+
+ /* Add 'return' expression to the return block */
+
+ noway_assert(genReturnBB);
+
+ fgInsertStmtAtEnd(genReturnBB, tree);
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\noneReturn statement tree ");
+ printTreeID(tree);
+ printf(" added to genReturnBB [%08p]\n", dspPtr(genReturnBB));
+ gtDispTree(tree);
+ printf("\n");
+ }
+#endif
+ }
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\n*************** After fgAddInternal()\n");
+ fgDispBasicBlocks();
+ fgDispHandlerTab();
+ }
+#endif
+}
+
+/*****************************************************************************
+ *
+ * Create a new statement from tree and wire the links up.
+ */
+GenTreeStmt* Compiler::fgNewStmtFromTree(GenTreePtr tree, BasicBlock* block, IL_OFFSETX offs)
+{
+ GenTreeStmt* stmt = gtNewStmt(tree, offs);
+ gtSetStmtInfo(stmt);
+ fgSetStmtSeq(stmt);
+
+#if DEBUG
+ if (block != nullptr)
+ {
+ fgDebugCheckNodeLinks(block, stmt);
+ }
+#endif
+
+ return stmt;
+}
+
+GenTreeStmt* Compiler::fgNewStmtFromTree(GenTreePtr tree)
+{
+ return fgNewStmtFromTree(tree, nullptr, BAD_IL_OFFSET);
+}
+
+GenTreeStmt* Compiler::fgNewStmtFromTree(GenTreePtr tree, BasicBlock* block)
+{
+ return fgNewStmtFromTree(tree, block, BAD_IL_OFFSET);
+}
+
+GenTreeStmt* Compiler::fgNewStmtFromTree(GenTreePtr tree, IL_OFFSETX offs)
+{
+ return fgNewStmtFromTree(tree, nullptr, offs);
+}
+
+//------------------------------------------------------------------------
+// fgFindBlockILOffset: Given a block, find the IL offset corresponding to the first statement
+// in the block with a legal IL offset. Skip any leading statements that have BAD_IL_OFFSET.
+// If no statement has an initialized statement offset (including the case where there are
+// no statements in the block), then return BAD_IL_OFFSET. This function is used when
+// blocks are split or modified, and we want to maintain the IL offset as much as possible
+// to preserve good debugging behavior.
+//
+// Arguments:
+// block - The block to check.
+//
+// Return Value:
+// The first good IL offset of a statement in the block, or BAD_IL_OFFSET if such an IL offset
+// cannot be found.
+//
+// If we are not built with DEBUGGING_SUPPORT or DEBUG, then always report BAD_IL_OFFSET,
+// since in that case statements don't contain an IL offset. The effect will be that split
+// blocks will lose their IL offset information.
+
+IL_OFFSET Compiler::fgFindBlockILOffset(BasicBlock* block)
+{
+ // This function searches for IL offsets in statement nodes, so it can't be used in LIR. We
+ // could have a similar function for LIR that searches for GT_IL_OFFSET nodes.
+ assert(!block->IsLIR());
+
+#if defined(DEBUGGING_SUPPORT) || defined(DEBUG)
+ for (GenTree* stmt = block->bbTreeList; stmt != nullptr; stmt = stmt->gtNext)
+ {
+ assert(stmt->IsStatement());
+ if (stmt->gtStmt.gtStmtILoffsx != BAD_IL_OFFSET)
+ {
+ return jitGetILoffs(stmt->gtStmt.gtStmtILoffsx);
+ }
+ }
+#endif // defined(DEBUGGING_SUPPORT) || defined(DEBUG)
+
+ return BAD_IL_OFFSET;
+}
+
+//------------------------------------------------------------------------------
+// fgSplitBlockAtEnd - split the given block into two blocks.
+// All code in the block stays in the original block.
+// Control falls through from original to new block, and
+// the new block is returned.
+//------------------------------------------------------------------------------
+BasicBlock* Compiler::fgSplitBlockAtEnd(BasicBlock* curr)
+{
+ // We'd like to use fgNewBBafter(), but we need to update the preds list before linking in the new block.
+ // (We need the successors of 'curr' to be correct when we do this.)
+ BasicBlock* newBlock = bbNewBasicBlock(curr->bbJumpKind);
+
+ // Start the new block with no refs. When we set the preds below, this will get updated correctly.
+ newBlock->bbRefs = 0;
+
+ // For each successor of the original block, set the new block as their predecessor.
+ // Note we are using the "rational" version of the successor iterator that does not hide the finallyret arcs.
+ // Without these arcs, a block 'b' may not be a member of succs(preds(b))
+ if (curr->bbJumpKind != BBJ_SWITCH)
+ {
+ unsigned numSuccs = curr->NumSucc(this);
+ for (unsigned i = 0; i < numSuccs; i++)
+ {
+ BasicBlock* succ = curr->GetSucc(i, this);
+ if (succ != newBlock)
+ {
+ JITDUMP("BB%02u previous predecessor was BB%02u, now is BB%02u\n", succ->bbNum, curr->bbNum,
+ newBlock->bbNum);
+ fgReplacePred(succ, curr, newBlock);
+ }
+ }
+
+ newBlock->bbJumpDest = curr->bbJumpDest;
+ curr->bbJumpDest = nullptr;
+ }
+ else
+ {
+ // In the case of a switch statement there's more complicated logic in order to wire up the predecessor lists
+ // but fortunately there's an existing method that implements this functionality.
+ newBlock->bbJumpSwt = curr->bbJumpSwt;
+
+ fgChangeSwitchBlock(curr, newBlock);
+
+ curr->bbJumpSwt = nullptr;
+ }
+
+ newBlock->inheritWeight(curr);
+
+ // Set the new block's flags. Note that the new block isn't BBF_INTERNAL unless the old block is.
+ newBlock->bbFlags = curr->bbFlags;
+
+ // Remove flags that the new block can't have.
+ newBlock->bbFlags &= ~(BBF_TRY_BEG | BBF_LOOP_HEAD | BBF_LOOP_CALL0 | BBF_LOOP_CALL1 | BBF_HAS_LABEL |
+ BBF_JMP_TARGET | BBF_FUNCLET_BEG | BBF_LOOP_PREHEADER | BBF_KEEP_BBJ_ALWAYS);
+
+ // Remove the GC safe bit on the new block. It seems clear that if we split 'curr' at the end,
+ // such that all the code is left in 'curr', and 'newBlock' just gets the control flow, then
+ // both 'curr' and 'newBlock' could accurately retain an existing GC safe bit. However, callers
+ // use this function to split blocks in the middle, or at the beginning, and they don't seem to
+ // be careful about updating this flag appropriately. So, removing the GC safe bit is simply
+ // conservative: some functions might end up being fully interruptible that could be partially
+ // interruptible if we exercised more care here.
+ newBlock->bbFlags &= ~BBF_GC_SAFE_POINT;
+
+#if FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
+ newBlock->bbFlags &= ~(BBF_FINALLY_TARGET);
+#endif // FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
+
+ // The new block has no code, so we leave bbCodeOffs/bbCodeOffsEnd set to BAD_IL_OFFSET. If a caller
+ // puts code in the block, then it needs to update these.
+
+ // Insert the new block in the block list after the 'curr' block.
+ fgInsertBBafter(curr, newBlock);
+ fgExtendEHRegionAfter(curr); // The new block is in the same EH region as the old block.
+
+ // Remove flags from the old block that are no longer possible.
+ curr->bbFlags &= ~(BBF_HAS_JMP | BBF_RETLESS_CALL);
+
+ // Default to fallthru, and add the arc for that.
+ curr->bbJumpKind = BBJ_NONE;
+ fgAddRefPred(newBlock, curr);
+
+ return newBlock;
+}
+
+//------------------------------------------------------------------------------
+// fgSplitBlockAfterStatement - Split the given block, with all code after
+// the given statement going into the second block.
+//------------------------------------------------------------------------------
+BasicBlock* Compiler::fgSplitBlockAfterStatement(BasicBlock* curr, GenTree* stmt)
+{
+ assert(!curr->IsLIR()); // No statements in LIR, so you can't use this function.
+
+ BasicBlock* newBlock = fgSplitBlockAtEnd(curr);
+
+ if (stmt)
+ {
+ newBlock->bbTreeList = stmt->gtNext;
+ if (newBlock->bbTreeList)
+ {
+ newBlock->bbTreeList->gtPrev = curr->bbTreeList->gtPrev;
+ }
+ curr->bbTreeList->gtPrev = stmt;
+ stmt->gtNext = nullptr;
+
+ // Update the IL offsets of the blocks to match the split.
+
+ assert(newBlock->bbCodeOffs == BAD_IL_OFFSET);
+ assert(newBlock->bbCodeOffsEnd == BAD_IL_OFFSET);
+
+ // curr->bbCodeOffs remains the same
+ newBlock->bbCodeOffsEnd = curr->bbCodeOffsEnd;
+
+ IL_OFFSET splitPointILOffset = fgFindBlockILOffset(newBlock);
+
+ curr->bbCodeOffsEnd = splitPointILOffset;
+ newBlock->bbCodeOffs = splitPointILOffset;
+ }
+ else
+ {
+ assert(curr->bbTreeList == nullptr); // if no tree was given then it better be an empty block
+ }
+
+ return newBlock;
+}
+
+//------------------------------------------------------------------------------
+// fgSplitBlockAfterNode - Split the given block, with all code after
+// the given node going into the second block.
+// This function is only used in LIR.
+//------------------------------------------------------------------------------
+BasicBlock* Compiler::fgSplitBlockAfterNode(BasicBlock* curr, GenTree* node)
+{
+ assert(curr->IsLIR());
+
+ BasicBlock* newBlock = fgSplitBlockAtEnd(curr);
+
+ if (node != nullptr)
+ {
+ LIR::Range& currBBRange = LIR::AsRange(curr);
+
+ if (node != currBBRange.LastNode())
+ {
+ LIR::Range nodesToMove = currBBRange.Remove(node->gtNext, currBBRange.LastNode());
+ LIR::AsRange(newBlock).InsertAtBeginning(std::move(nodesToMove));
+ }
+
+ // Update the IL offsets of the blocks to match the split.
+
+ assert(newBlock->bbCodeOffs == BAD_IL_OFFSET);
+ assert(newBlock->bbCodeOffsEnd == BAD_IL_OFFSET);
+
+ // curr->bbCodeOffs remains the same
+ newBlock->bbCodeOffsEnd = curr->bbCodeOffsEnd;
+
+ // Search backwards from the end of the current block looking for the IL offset to use
+ // for the end IL offset for the original block.
+ IL_OFFSET splitPointILOffset = BAD_IL_OFFSET;
+ LIR::Range::ReverseIterator riter;
+ LIR::Range::ReverseIterator riterEnd;
+ for (riter = currBBRange.rbegin(), riterEnd = currBBRange.rend(); riter != riterEnd; ++riter)
+ {
+ if ((*riter)->gtOper == GT_IL_OFFSET)
+ {
+ GenTreeStmt* stmt = (*riter)->AsStmt();
+ if (stmt->gtStmtILoffsx != BAD_IL_OFFSET)
+ {
+ splitPointILOffset = jitGetILoffs(stmt->gtStmtILoffsx);
+ break;
+ }
+ }
+ }
+
+ curr->bbCodeOffsEnd = splitPointILOffset;
+
+ // Also use this as the beginning offset of the next block. Presumably we could/should
+ // look to see if the first node is a GT_IL_OFFSET node, and use that instead.
+ newBlock->bbCodeOffs = splitPointILOffset;
+ }
+ else
+ {
+ assert(curr->bbTreeList == nullptr); // if no node was given then it better be an empty block
+ }
+
+ return newBlock;
+}
+
+//------------------------------------------------------------------------------
+// fgSplitBlockAtBeginning - Split the given block into two blocks.
+// Control falls through from original to new block,
+// and the new block is returned.
+// All code in the original block goes into the new block
+//------------------------------------------------------------------------------
+BasicBlock* Compiler::fgSplitBlockAtBeginning(BasicBlock* curr)
+{
+ BasicBlock* newBlock = fgSplitBlockAtEnd(curr);
+
+ newBlock->bbTreeList = curr->bbTreeList;
+ curr->bbTreeList = nullptr;
+
+ // The new block now has all the code, and the old block has none. Update the
+ // IL offsets for the block to reflect this.
+
+ newBlock->bbCodeOffs = curr->bbCodeOffs;
+ newBlock->bbCodeOffsEnd = curr->bbCodeOffsEnd;
+
+ curr->bbCodeOffs = BAD_IL_OFFSET;
+ curr->bbCodeOffsEnd = BAD_IL_OFFSET;
+
+ return newBlock;
+}
+
+//------------------------------------------------------------------------
+// fgSplitEdge: Splits the edge between a block 'curr' and its successor 'succ' by creating a new block
+// that replaces 'succ' as a successor of 'curr', and which branches unconditionally
+// to (or falls through to) 'succ'. Note that for a BBJ_COND block 'curr',
+// 'succ' might be the fall-through path or the branch path from 'curr'.
+//
+// Arguments:
+// curr - A block which branches conditionally to 'succ'
+// succ - The target block
+//
+// Return Value:
+// Returns a new block, that is a successor of 'curr' and which branches unconditionally to 'succ'
+//
+// Assumptions:
+// 'curr' must have a bbJumpKind of BBJ_COND or BBJ_SWITCH
+//
+// Notes:
+// The returned block is empty.
+
+BasicBlock* Compiler::fgSplitEdge(BasicBlock* curr, BasicBlock* succ)
+{
+ assert(curr->bbJumpKind == BBJ_COND || curr->bbJumpKind == BBJ_SWITCH);
+ assert(fgGetPredForBlock(succ, curr) != nullptr);
+
+ BasicBlock* newBlock;
+ if (succ == curr->bbNext)
+ {
+ // The successor is the fall-through path of a BBJ_COND, or
+ // an immediately following block of a BBJ_SWITCH (which has
+ // no fall-through path). For this case, simply insert a new
+ // fall-through block after 'curr'.
+ newBlock = fgNewBBafter(BBJ_NONE, curr, true /*extendRegion*/);
+ }
+ else
+ {
+ newBlock = fgNewBBinRegion(BBJ_ALWAYS, curr, curr->isRunRarely());
+ // The new block always jumps to 'succ'
+ newBlock->bbJumpDest = succ;
+ }
+ newBlock->bbFlags |= (curr->bbFlags & succ->bbFlags & (BBF_BACKWARD_JUMP));
+
+ JITDUMP("Splitting edge from BB%02u to BB%02u; adding BB%02u\n", curr->bbNum, succ->bbNum, newBlock->bbNum);
+
+ if (curr->bbJumpKind == BBJ_COND)
+ {
+ fgReplacePred(succ, curr, newBlock);
+ if (curr->bbJumpDest == succ)
+ {
+ // Now 'curr' jumps to newBlock
+ curr->bbJumpDest = newBlock;
+ newBlock->bbFlags |= BBF_JMP_TARGET;
+ }
+ fgAddRefPred(newBlock, curr);
+ }
+ else
+ {
+ assert(curr->bbJumpKind == BBJ_SWITCH);
+
+ // newBlock replaces 'succ' in the switch.
+ fgReplaceSwitchJumpTarget(curr, newBlock, succ);
+
+ // And 'succ' has 'newBlock' as a new predecessor.
+ fgAddRefPred(succ, newBlock);
+ }
+
+ // This isn't accurate, but it is complex to compute a reasonable number so just assume that we take the
+ // branch 50% of the time.
+ newBlock->inheritWeightPercentage(curr, 50);
+
+ // The bbLiveIn and bbLiveOut are both equal to the bbLiveIn of 'succ'
+ if (fgLocalVarLivenessDone)
+ {
+ VarSetOps::Assign(this, newBlock->bbLiveIn, succ->bbLiveIn);
+ VarSetOps::Assign(this, newBlock->bbLiveOut, succ->bbLiveIn);
+ }
+
+ return newBlock;
+}
+
+/*****************************************************************************/
+/*****************************************************************************/
+
+void Compiler::fgFindOperOrder()
+{
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("*************** In fgFindOperOrder()\n");
+ }
+#endif
+
+ BasicBlock* block;
+ GenTreeStmt* stmt;
+
+ /* Walk the basic blocks and for each statement determine
+ * the evaluation order, cost, FP levels, etc... */
+
+ for (block = fgFirstBB; block; block = block->bbNext)
+ {
+ compCurBB = block;
+ for (stmt = block->firstStmt(); stmt; stmt = stmt->gtNextStmt)
+ {
+ /* Recursively process the statement */
+
+ compCurStmt = stmt;
+ gtSetStmtInfo(stmt);
+ }
+ }
+}
+
+/*****************************************************************************/
+void Compiler::fgSimpleLowering()
+{
+ for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
+ {
+ // Walk the statement trees in this basic block, converting ArrLength nodes.
+ compCurBB = block; // Used in fgRngChkTarget.
+
+#ifdef LEGACY_BACKEND
+ for (GenTreeStmt* stmt = block->FirstNonPhiDef(); stmt; stmt = stmt->gtNextStmt)
+ {
+ for (GenTreePtr tree = stmt->gtStmtList; tree; tree = tree->gtNext)
+ {
+#else
+ LIR::Range& range = LIR::AsRange(block);
+ for (GenTree* tree : range)
+ {
+ {
+#endif
+ if (tree->gtOper == GT_ARR_LENGTH)
+ {
+ GenTreeArrLen* arrLen = tree->AsArrLen();
+ GenTreePtr arr = arrLen->gtArrLen.ArrRef();
+ GenTreePtr add;
+ GenTreePtr con;
+
+ /* Create the expression "*(array_addr + ArrLenOffs)" */
+
+ noway_assert(arr->gtNext == tree);
+
+ noway_assert(arrLen->ArrLenOffset() == offsetof(CORINFO_Array, length) ||
+ arrLen->ArrLenOffset() == offsetof(CORINFO_String, stringLen));
+
+ if ((arr->gtOper == GT_CNS_INT) && (arr->gtIntCon.gtIconVal == 0))
+ {
+ // If the array is NULL, then we should get a NULL reference
+ // exception when computing its length. We need to maintain
+ // an invariant where there is no sum of two constants node, so
+ // let's simply return an indirection of NULL.
+
+ add = arr;
+ }
+ else
+ {
+ con = gtNewIconNode(arrLen->ArrLenOffset(), TYP_I_IMPL);
+ con->gtRsvdRegs = 0;
+
+ add = gtNewOperNode(GT_ADD, TYP_REF, arr, con);
+ add->gtRsvdRegs = arr->gtRsvdRegs;
+
+#ifdef LEGACY_BACKEND
+ con->gtCopyFPlvl(arr);
+
+ add->gtCopyFPlvl(arr);
+ add->CopyCosts(arr);
+
+ arr->gtNext = con;
+ con->gtPrev = arr;
+
+ con->gtNext = add;
+ add->gtPrev = con;
+
+ add->gtNext = tree;
+ tree->gtPrev = add;
+#else
+ range.InsertAfter(arr, con, add);
+#endif
+ }
+
+ // Change to a GT_IND.
+ tree->ChangeOperUnchecked(GT_IND);
+
+ tree->gtOp.gtOp1 = add;
+ }
+ else if (tree->OperGet() == GT_ARR_BOUNDS_CHECK
+#ifdef FEATURE_SIMD
+ || tree->OperGet() == GT_SIMD_CHK
+#endif // FEATURE_SIMD
+ )
+ {
+ // Add in a call to an error routine.
+ fgSetRngChkTarget(tree, false);
+ }
+ }
+ }
+ }
+
+#ifdef DEBUG
+ if (verbose && fgRngChkThrowAdded)
+ {
+ printf("\nAfter fgSimpleLowering() added some RngChk throw blocks");
+ fgDispBasicBlocks();
+ fgDispHandlerTab();
+ printf("\n");
+ }
+#endif
+}
+
+/*****************************************************************************
+ */
+
+void Compiler::fgUpdateRefCntForClone(BasicBlock* addedToBlock, GenTreePtr clonedTree)
+{
+ assert(clonedTree->gtOper != GT_STMT);
+
+ if (lvaLocalVarRefCounted)
+ {
+ compCurBB = addedToBlock;
+ fgWalkTreePre(&clonedTree, Compiler::lvaIncRefCntsCB, (void*)this, true);
+ }
+}
+
+/*****************************************************************************
+ */
+
+void Compiler::fgUpdateRefCntForExtract(GenTreePtr wholeTree, GenTreePtr keptTree)
+{
+ if (lvaLocalVarRefCounted)
+ {
+ /* Update the refCnts of removed lcl vars - The problem is that
+ * we have to consider back the side effects trees so we first
+ * increment all refCnts for side effects then decrement everything
+ * in the statement
+ */
+ if (keptTree)
+ {
+ fgWalkTreePre(&keptTree, Compiler::lvaIncRefCntsCB, (void*)this, true);
+ }
+
+ fgWalkTreePre(&wholeTree, Compiler::lvaDecRefCntsCB, (void*)this, true);
+ }
+}
+
+VARSET_VALRET_TP Compiler::fgGetVarBits(GenTreePtr tree)
+{
+ VARSET_TP VARSET_INIT_NOCOPY(varBits, VarSetOps::MakeEmpty(this));
+
+ assert(tree->gtOper == GT_LCL_VAR || tree->gtOper == GT_LCL_FLD || tree->gtOper == GT_REG_VAR);
+
+ unsigned int lclNum = tree->gtLclVarCommon.gtLclNum;
+ LclVarDsc* varDsc = lvaTable + lclNum;
+ if (varDsc->lvTracked)
+ {
+ VarSetOps::AddElemD(this, varBits, varDsc->lvVarIndex);
+ }
+ else if (varDsc->lvType == TYP_STRUCT && varDsc->lvPromoted)
+ {
+ for (unsigned i = varDsc->lvFieldLclStart; i < varDsc->lvFieldLclStart + varDsc->lvFieldCnt; ++i)
+ {
+ noway_assert(lvaTable[i].lvIsStructField);
+ if (lvaTable[i].lvTracked)
+ {
+ unsigned varIndex = lvaTable[i].lvVarIndex;
+ noway_assert(varIndex < lvaTrackedCount);
+ VarSetOps::AddElemD(this, varBits, varIndex);
+ }
+ }
+ }
+ return varBits;
+}
+
+/*****************************************************************************
+ *
+ * Find and remove any basic blocks that are useless (e.g. they have not been
+ * imported because they are not reachable, or they have been optimized away).
+ */
+
+void Compiler::fgRemoveEmptyBlocks()
+{
+ BasicBlock* cur;
+ BasicBlock* nxt;
+
+ /* If we remove any blocks, we'll have to do additional work */
+
+ unsigned removedBlks = 0;
+
+ for (cur = fgFirstBB; cur != nullptr; cur = nxt)
+ {
+ /* Get hold of the next block (in case we delete 'cur') */
+
+ nxt = cur->bbNext;
+
+ /* Should this block be removed? */
+
+ if (!(cur->bbFlags & BBF_IMPORTED))
+ {
+ noway_assert(cur->isEmpty());
+
+ if (ehCanDeleteEmptyBlock(cur))
+ {
+ /* Mark the block as removed */
+
+ cur->bbFlags |= BBF_REMOVED;
+
+ /* Remember that we've removed a block from the list */
+
+ removedBlks++;
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("BB%02u was not imported, marked as removed (%d)\n", cur->bbNum, removedBlks);
+ }
+#endif // DEBUG
+
+ /* Drop the block from the list */
+
+ fgUnlinkBlock(cur);
+ }
+ else
+ {
+ // We were prevented from deleting this block by EH normalization. Mark the block as imported.
+ cur->bbFlags |= BBF_IMPORTED;
+ }
+ }
+ }
+
+ /* If no blocks were removed, we're done */
+
+ if (removedBlks == 0)
+ {
+ return;
+ }
+
+ /* Update all references in the exception handler table.
+ * Mark the new blocks as non-removable.
+ *
+ * We may have made the entire try block unreachable.
+ * Check for this case and remove the entry from the EH table.
+ */
+
+ unsigned XTnum;
+ EHblkDsc* HBtab;
+ INDEBUG(unsigned delCnt = 0;)
+
+ for (XTnum = 0, HBtab = compHndBBtab; XTnum < compHndBBtabCount; XTnum++, HBtab++)
+ {
+ AGAIN:
+ /* If the beginning of the try block was not imported, we
+ * need to remove the entry from the EH table. */
+
+ if (HBtab->ebdTryBeg->bbFlags & BBF_REMOVED)
+ {
+ noway_assert(!(HBtab->ebdTryBeg->bbFlags & BBF_IMPORTED));
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Beginning of try block (BB%02u) not imported "
+ "- remove index #%u from the EH table\n",
+ HBtab->ebdTryBeg->bbNum, XTnum + delCnt);
+ }
+ delCnt++;
+#endif // DEBUG
+
+ fgRemoveEHTableEntry(XTnum);
+
+ if (XTnum < compHndBBtabCount)
+ {
+ // There are more entries left to process, so do more. Note that
+ // HBtab now points to the next entry, that we copied down to the
+ // current slot. XTnum also stays the same.
+ goto AGAIN;
+ }
+
+ break; // no more entries (we deleted the last one), so exit the loop
+ }
+
+/* At this point we know we have a valid try block */
+
+#ifdef DEBUG
+ assert(HBtab->ebdTryBeg->bbFlags & BBF_IMPORTED);
+ assert(HBtab->ebdTryBeg->bbFlags & BBF_DONT_REMOVE);
+
+ assert(HBtab->ebdHndBeg->bbFlags & BBF_IMPORTED);
+ assert(HBtab->ebdHndBeg->bbFlags & BBF_DONT_REMOVE);
+
+ if (HBtab->HasFilter())
+ {
+ assert(HBtab->ebdFilter->bbFlags & BBF_IMPORTED);
+ assert(HBtab->ebdFilter->bbFlags & BBF_DONT_REMOVE);
+ }
+#endif // DEBUG
+
+ fgSkipRmvdBlocks(HBtab);
+ } /* end of the for loop over XTnum */
+
+ // Renumber the basic blocks
+ JITDUMP("\nRenumbering the basic blocks for fgRemoveEmptyBlocks\n");
+ fgRenumberBlocks();
+
+#ifdef DEBUG
+ fgVerifyHandlerTab();
+#endif // DEBUG
+}
+
+/*****************************************************************************
+ *
+ * Remove a useless statement from a basic block.
+ * The default is to decrement ref counts of included vars
+ *
+ */
+
+void Compiler::fgRemoveStmt(BasicBlock* block,
+ GenTreePtr node,
+ // whether to decrement ref counts for tracked vars in statement
+ bool updateRefCount)
+{
+ noway_assert(node);
+ assert(fgOrder == FGOrderTree);
+
+ GenTreeStmt* tree = block->firstStmt();
+ GenTreeStmt* stmt = node->AsStmt();
+
+#ifdef DEBUG
+ if (verbose &&
+ stmt->gtStmtExpr->gtOper != GT_NOP) // Don't print if it is a GT_NOP. Too much noise from the inliner.
+ {
+ printf("\nRemoving statement ");
+ printTreeID(stmt);
+ printf(" in BB%02u as useless:\n", block->bbNum);
+ gtDispTree(stmt);
+ }
+#endif // DEBUG
+
+ if (opts.compDbgCode && stmt->gtPrev != stmt && stmt->gtStmtILoffsx != BAD_IL_OFFSET)
+ {
+ /* TODO: For debuggable code, should we remove significant
+ statement boundaries. Or should we leave a GT_NO_OP in its place? */
+ }
+
+ /* Is it the first statement in the list? */
+
+ GenTreeStmt* firstStmt = block->firstStmt();
+ if (firstStmt == stmt)
+ {
+ if (firstStmt->gtNext == nullptr)
+ {
+ assert(firstStmt == block->lastStmt());
+
+ /* this is the only statement - basic block becomes empty */
+ block->bbTreeList = nullptr;
+ }
+ else
+ {
+ block->bbTreeList = tree->gtNext;
+ block->bbTreeList->gtPrev = tree->gtPrev;
+ }
+ goto DONE;
+ }
+
+ /* Is it the last statement in the list? */
+
+ if (stmt == block->lastStmt())
+ {
+ stmt->gtPrev->gtNext = nullptr;
+ block->bbTreeList->gtPrev = stmt->gtPrev;
+ goto DONE;
+ }
+
+ tree = stmt->gtPrevStmt;
+ noway_assert(tree);
+
+ tree->gtNext = stmt->gtNext;
+ stmt->gtNext->gtPrev = tree;
+
+DONE:
+ fgStmtRemoved = true;
+
+ if (optValnumCSE_phase)
+ {
+ optValnumCSE_UnmarkCSEs(stmt->gtStmtExpr, nullptr);
+ }
+ else
+ {
+ if (updateRefCount)
+ {
+ if (fgStmtListThreaded)
+ {
+ fgWalkTreePre(&stmt->gtStmtExpr, Compiler::lvaDecRefCntsCB, (void*)this, true);
+ }
+ }
+ }
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ if (block->bbTreeList == nullptr)
+ {
+ printf("\nBB%02u becomes empty", block->bbNum);
+ }
+ printf("\n");
+ }
+#endif // DEBUG
+}
+
+/******************************************************************************/
+// Returns true if the operator is involved in control-flow
+// TODO-Cleanup: Move this into genTreeKinds in genTree.h
+
+inline bool OperIsControlFlow(genTreeOps oper)
+{
+ switch (oper)
+ {
+ case GT_JTRUE:
+ case GT_SWITCH:
+ case GT_LABEL:
+
+ case GT_CALL:
+ case GT_JMP:
+
+ case GT_RETURN:
+ case GT_RETFILT:
+#if !FEATURE_EH_FUNCLETS
+ case GT_END_LFIN:
+#endif // !FEATURE_EH_FUNCLETS
+ return true;
+
+ default:
+ return false;
+ }
+}
+
+/******************************************************************************
+ * Tries to throw away a stmt. The statement can be anywhere in block->bbTreeList.
+ * Returns true if it did remove the statement.
+ */
+
+bool Compiler::fgCheckRemoveStmt(BasicBlock* block, GenTreePtr node)
+{
+ if (opts.compDbgCode)
+ {
+ return false;
+ }
+
+ GenTreeStmt* stmt = node->AsStmt();
+
+ GenTreePtr tree = stmt->gtStmtExpr;
+ genTreeOps oper = tree->OperGet();
+
+ if (OperIsControlFlow(oper) || oper == GT_NO_OP)
+ {
+ return false;
+ }
+
+ // TODO: Use a recursive version of gtNodeHasSideEffects()
+ if (tree->gtFlags & GTF_SIDE_EFFECT)
+ {
+ return false;
+ }
+
+ fgRemoveStmt(block, stmt);
+ return true;
+}
+
+/****************************************************************************************************
+ *
+ *
+ */
+bool Compiler::fgCanCompactBlocks(BasicBlock* block, BasicBlock* bNext)
+{
+ if ((block == nullptr) || (bNext == nullptr))
+ {
+ return false;
+ }
+
+ noway_assert(block->bbNext == bNext);
+
+ if (block->bbJumpKind != BBJ_NONE)
+ {
+ return false;
+ }
+
+ // If the next block has multiple incoming edges, we can still compact if the first block is empty.
+ // However, not if it is the beginning of a handler.
+ if (bNext->countOfInEdges() != 1 &&
+ (!block->isEmpty() || (block->bbFlags & BBF_FUNCLET_BEG) || (block->bbCatchTyp != BBCT_NONE)))
+ {
+ return false;
+ }
+
+ if (bNext->bbFlags & BBF_DONT_REMOVE)
+ {
+ return false;
+ }
+
+ // Don't compact the first block if it was specially created as a scratch block.
+ if (fgBBisScratch(block))
+ {
+ return false;
+ }
+
+#if defined(_TARGET_ARM_)
+ // We can't compact a finally target block, as we need to generate special code for such blocks during code
+ // generation
+ if ((bNext->bbFlags & BBF_FINALLY_TARGET) != 0)
+ return false;
+#endif
+
+ // We don't want to compact blocks that are in different Hot/Cold regions
+ //
+ if (fgInDifferentRegions(block, bNext))
+ {
+ return false;
+ }
+
+ // We cannot compact two blocks in different EH regions.
+ //
+ if (fgCanRelocateEHRegions)
+ {
+ if (!BasicBlock::sameEHRegion(block, bNext))
+ {
+ return false;
+ }
+ }
+ // if there is a switch predecessor don't bother because we'd have to update the uniquesuccs as well
+ // (if they are valid)
+ for (flowList* pred = bNext->bbPreds; pred; pred = pred->flNext)
+ {
+ if (pred->flBlock->bbJumpKind == BBJ_SWITCH)
+ {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+/*****************************************************************************************************
+ *
+ * Function called to compact two given blocks in the flowgraph
+ * Assumes that all necessary checks have been performed,
+ * i.e. fgCanCompactBlocks returns true.
+ *
+ * Uses for this function - whenever we change links, insert blocks,...
+ * It will keep the flowgraph data in synch - bbNum, bbRefs, bbPreds
+ */
+
+void Compiler::fgCompactBlocks(BasicBlock* block, BasicBlock* bNext)
+{
+ noway_assert(block != nullptr);
+ noway_assert((block->bbFlags & BBF_REMOVED) == 0);
+ noway_assert(block->bbJumpKind == BBJ_NONE);
+
+ noway_assert(bNext == block->bbNext);
+ noway_assert(bNext != nullptr);
+ noway_assert((bNext->bbFlags & BBF_REMOVED) == 0);
+ noway_assert(bNext->countOfInEdges() == 1 || block->isEmpty());
+ noway_assert(bNext->bbPreds);
+
+#if FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
+ noway_assert((bNext->bbFlags & BBF_FINALLY_TARGET) == 0);
+#endif // FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
+
+ // Make sure the second block is not the start of a TRY block or an exception handler
+
+ noway_assert(bNext->bbCatchTyp == BBCT_NONE);
+ noway_assert((bNext->bbFlags & BBF_TRY_BEG) == 0);
+ noway_assert((bNext->bbFlags & BBF_DONT_REMOVE) == 0);
+
+ /* both or none must have an exception handler */
+ noway_assert(block->hasTryIndex() == bNext->hasTryIndex());
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nCompacting blocks BB%02u and BB%02u:\n", block->bbNum, bNext->bbNum);
+ }
+#endif
+
+ if (bNext->countOfInEdges() > 1)
+ {
+ JITDUMP("Second block has multiple incoming edges\n");
+
+ assert(block->isEmpty());
+ block->bbFlags |= BBF_JMP_TARGET;
+ for (flowList* pred = bNext->bbPreds; pred; pred = pred->flNext)
+ {
+ fgReplaceJumpTarget(pred->flBlock, block, bNext);
+
+ if (pred->flBlock != block)
+ {
+ fgAddRefPred(block, pred->flBlock);
+ }
+ }
+ bNext->bbPreds = nullptr;
+ }
+ else
+ {
+ noway_assert(bNext->bbPreds->flNext == nullptr);
+ noway_assert(bNext->bbPreds->flBlock == block);
+ }
+
+ /* Start compacting - move all the statements in the second block to the first block */
+
+ // First move any phi definitions of the second block after the phi defs of the first.
+ // TODO-CQ: This may be the wrong thing to do. If we're compacting blocks, it's because a
+ // control-flow choice was constant-folded away. So probably phi's need to go away,
+ // as well, in favor of one of the incoming branches. Or at least be modified.
+
+ assert(block->IsLIR() == bNext->IsLIR());
+ if (block->IsLIR())
+ {
+ LIR::Range& blockRange = LIR::AsRange(block);
+ LIR::Range& nextRange = LIR::AsRange(bNext);
+
+ // Does the next block have any phis?
+ GenTree* nextFirstNonPhi = nullptr;
+ LIR::ReadOnlyRange nextPhis = nextRange.PhiNodes();
+ if (!nextPhis.IsEmpty())
+ {
+ GenTree* blockLastPhi = blockRange.LastPhiNode();
+ nextFirstNonPhi = nextPhis.LastNode()->gtNext;
+
+ LIR::Range phisToMove = nextRange.Remove(std::move(nextPhis));
+ blockRange.InsertAfter(blockLastPhi, std::move(phisToMove));
+ }
+ else
+ {
+ nextFirstNonPhi = nextRange.FirstNode();
+ }
+
+ // Does the block have any other code?
+ if (nextFirstNonPhi != nullptr)
+ {
+ LIR::Range nextNodes = nextRange.Remove(nextFirstNonPhi, nextRange.LastNode());
+ blockRange.InsertAtEnd(std::move(nextNodes));
+ }
+ }
+ else
+ {
+ GenTreePtr blkNonPhi1 = block->FirstNonPhiDef();
+ GenTreePtr bNextNonPhi1 = bNext->FirstNonPhiDef();
+ GenTreePtr blkFirst = block->firstStmt();
+ GenTreePtr bNextFirst = bNext->firstStmt();
+
+ // Does the second have any phis?
+ if (bNextFirst != nullptr && bNextFirst != bNextNonPhi1)
+ {
+ GenTreePtr bNextLast = bNextFirst->gtPrev;
+ assert(bNextLast->gtNext == nullptr);
+
+ // Does "blk" have phis?
+ if (blkNonPhi1 != blkFirst)
+ {
+ // Yes, has phis.
+ // Insert after the last phi of "block."
+ // First, bNextPhis after last phi of block.
+ GenTreePtr blkLastPhi;
+ if (blkNonPhi1 != nullptr)
+ {
+ blkLastPhi = blkNonPhi1->gtPrev;
+ }
+ else
+ {
+ blkLastPhi = blkFirst->gtPrev;
+ }
+
+ blkLastPhi->gtNext = bNextFirst;
+ bNextFirst->gtPrev = blkLastPhi;
+
+ // Now, rest of "block" after last phi of "bNext".
+ GenTreePtr bNextLastPhi = nullptr;
+ if (bNextNonPhi1 != nullptr)
+ {
+ bNextLastPhi = bNextNonPhi1->gtPrev;
+ }
+ else
+ {
+ bNextLastPhi = bNextFirst->gtPrev;
+ }
+
+ bNextLastPhi->gtNext = blkNonPhi1;
+ if (blkNonPhi1 != nullptr)
+ {
+ blkNonPhi1->gtPrev = bNextLastPhi;
+ }
+ else
+ {
+ // block has no non phis, so make the last statement be the last added phi.
+ blkFirst->gtPrev = bNextLastPhi;
+ }
+
+ // Now update the bbTreeList of "bNext".
+ bNext->bbTreeList = bNextNonPhi1;
+ if (bNextNonPhi1 != nullptr)
+ {
+ bNextNonPhi1->gtPrev = bNextLast;
+ }
+ }
+ else
+ {
+ if (blkFirst != nullptr) // If "block" has no statements, fusion will work fine...
+ {
+ // First, bNextPhis at start of block.
+ GenTreePtr blkLast = blkFirst->gtPrev;
+ block->bbTreeList = bNextFirst;
+ // Now, rest of "block" (if it exists) after last phi of "bNext".
+ GenTreePtr bNextLastPhi = nullptr;
+ if (bNextNonPhi1 != nullptr)
+ {
+ // There is a first non phi, so the last phi is before it.
+ bNextLastPhi = bNextNonPhi1->gtPrev;
+ }
+ else
+ {
+ // All the statements are phi defns, so the last one is the prev of the first.
+ bNextLastPhi = bNextFirst->gtPrev;
+ }
+ bNextFirst->gtPrev = blkLast;
+ bNextLastPhi->gtNext = blkFirst;
+ blkFirst->gtPrev = bNextLastPhi;
+ // Now update the bbTreeList of "bNext"
+ bNext->bbTreeList = bNextNonPhi1;
+ if (bNextNonPhi1 != nullptr)
+ {
+ bNextNonPhi1->gtPrev = bNextLast;
+ }
+ }
+ }
+ }
+
+ // Now proceed with the updated bbTreeLists.
+ GenTreePtr stmtList1 = block->firstStmt();
+ GenTreePtr stmtList2 = bNext->firstStmt();
+
+ /* the block may have an empty list */
+
+ if (stmtList1)
+ {
+ GenTreePtr stmtLast1 = block->lastStmt();
+
+ /* The second block may be a GOTO statement or something with an empty bbTreeList */
+ if (stmtList2)
+ {
+ GenTreePtr stmtLast2 = bNext->lastStmt();
+
+ /* append list2 to list 1 */
+
+ stmtLast1->gtNext = stmtList2;
+ stmtList2->gtPrev = stmtLast1;
+ stmtList1->gtPrev = stmtLast2;
+ }
+ }
+ else
+ {
+ /* block was formerly empty and now has bNext's statements */
+ block->bbTreeList = stmtList2;
+ }
+ }
+
+ // Note we could update the local variable weights here by
+ // calling lvaMarkLocalVars, with the block and weight adjustment.
+
+ // If either block or bNext has a profile weight
+ // or if both block and bNext have non-zero weights
+ // then we select the highest weight block.
+
+ if ((block->bbFlags & BBF_PROF_WEIGHT) || (bNext->bbFlags & BBF_PROF_WEIGHT) ||
+ (block->bbWeight && bNext->bbWeight))
+ {
+ // We are keeping block so update its fields
+ // when bNext has a greater weight
+
+ if (block->bbWeight < bNext->bbWeight)
+ {
+ block->bbWeight = bNext->bbWeight;
+
+ block->bbFlags |= (bNext->bbFlags & BBF_PROF_WEIGHT); // Set the profile weight flag (if necessary)
+ if (block->bbWeight != 0)
+ {
+ block->bbFlags &= ~BBF_RUN_RARELY; // Clear any RarelyRun flag
+ }
+ }
+ }
+ // otherwise if either block has a zero weight we select the zero weight
+ else
+ {
+ noway_assert((block->bbWeight == BB_ZERO_WEIGHT) || (bNext->bbWeight == BB_ZERO_WEIGHT));
+ block->bbWeight = BB_ZERO_WEIGHT;
+ block->bbFlags |= BBF_RUN_RARELY; // Set the RarelyRun flag
+ }
+
+ /* set the right links */
+
+ block->bbJumpKind = bNext->bbJumpKind;
+ VarSetOps::AssignAllowUninitRhs(this, block->bbLiveOut, bNext->bbLiveOut);
+
+ // Update the beginning and ending IL offsets (bbCodeOffs and bbCodeOffsEnd).
+ // Set the beginning IL offset to the minimum, and the ending offset to the maximum, of the respective blocks.
+ // If one block has an unknown offset, we take the other block.
+ // We are merging into 'block', so if its values are correct, just leave them alone.
+ // TODO: we should probably base this on the statements within.
+
+ if (block->bbCodeOffs == BAD_IL_OFFSET)
+ {
+ block->bbCodeOffs = bNext->bbCodeOffs; // If they are both BAD_IL_OFFSET, this doesn't change anything.
+ }
+ else if (bNext->bbCodeOffs != BAD_IL_OFFSET)
+ {
+ // The are both valid offsets; compare them.
+ if (block->bbCodeOffs > bNext->bbCodeOffs)
+ {
+ block->bbCodeOffs = bNext->bbCodeOffs;
+ }
+ }
+
+ if (block->bbCodeOffsEnd == BAD_IL_OFFSET)
+ {
+ block->bbCodeOffsEnd = bNext->bbCodeOffsEnd; // If they are both BAD_IL_OFFSET, this doesn't change anything.
+ }
+ else if (bNext->bbCodeOffsEnd != BAD_IL_OFFSET)
+ {
+ // The are both valid offsets; compare them.
+ if (block->bbCodeOffsEnd < bNext->bbCodeOffsEnd)
+ {
+ block->bbCodeOffsEnd = bNext->bbCodeOffsEnd;
+ }
+ }
+
+ if (((block->bbFlags & BBF_INTERNAL) != 0) && ((bNext->bbFlags & BBF_INTERNAL) == 0))
+ {
+ // If 'block' is an internal block and 'bNext' isn't, then adjust the flags set on 'block'.
+ block->bbFlags &= ~BBF_INTERNAL; // Clear the BBF_INTERNAL flag
+ block->bbFlags |= BBF_IMPORTED; // Set the BBF_IMPORTED flag
+ }
+
+ /* Update the flags for block with those found in bNext */
+
+ block->bbFlags |= (bNext->bbFlags & BBF_COMPACT_UPD);
+
+ /* mark bNext as removed */
+
+ bNext->bbFlags |= BBF_REMOVED;
+
+ /* Unlink bNext and update all the marker pointers if necessary */
+
+ fgUnlinkRange(block->bbNext, bNext);
+
+ // If bNext was the last block of a try or handler, update the EH table.
+
+ ehUpdateForDeletedBlock(bNext);
+
+ /* If we're collapsing a block created after the dominators are
+ computed, rename the block and reuse dominator information from
+ the other block */
+ if (fgDomsComputed && block->bbNum > fgDomBBcount)
+ {
+ BlockSetOps::Assign(this, block->bbReach, bNext->bbReach);
+ BlockSetOps::ClearD(this, bNext->bbReach);
+
+ block->bbIDom = bNext->bbIDom;
+ bNext->bbIDom = nullptr;
+
+ // In this case, there's no need to update the preorder and postorder numbering
+ // since we're changing the bbNum, this makes the basic block all set.
+ block->bbNum = bNext->bbNum;
+ }
+
+ /* Set the jump targets */
+
+ switch (bNext->bbJumpKind)
+ {
+ case BBJ_CALLFINALLY:
+ // Propagate RETLESS property
+ block->bbFlags |= (bNext->bbFlags & BBF_RETLESS_CALL);
+
+ __fallthrough;
+
+ case BBJ_COND:
+ case BBJ_ALWAYS:
+ case BBJ_EHCATCHRET:
+ block->bbJumpDest = bNext->bbJumpDest;
+
+ /* Update the predecessor list for 'bNext->bbJumpDest' */
+ fgReplacePred(bNext->bbJumpDest, bNext, block);
+
+ /* Update the predecessor list for 'bNext->bbNext' if it is different than 'bNext->bbJumpDest' */
+ if (bNext->bbJumpKind == BBJ_COND && bNext->bbJumpDest != bNext->bbNext)
+ {
+ fgReplacePred(bNext->bbNext, bNext, block);
+ }
+ break;
+
+ case BBJ_NONE:
+ /* Update the predecessor list for 'bNext->bbNext' */
+ fgReplacePred(bNext->bbNext, bNext, block);
+ break;
+
+ case BBJ_EHFILTERRET:
+ fgReplacePred(bNext->bbJumpDest, bNext, block);
+ break;
+
+ case BBJ_EHFINALLYRET:
+ {
+ unsigned hndIndex = block->getHndIndex();
+ EHblkDsc* ehDsc = ehGetDsc(hndIndex);
+
+ if (ehDsc->HasFinallyHandler()) // No need to do this for fault handlers
+ {
+ BasicBlock* begBlk;
+ BasicBlock* endBlk;
+ ehGetCallFinallyBlockRange(hndIndex, &begBlk, &endBlk);
+
+ BasicBlock* finBeg = ehDsc->ebdHndBeg;
+
+ for (BasicBlock* bcall = begBlk; bcall != endBlk; bcall = bcall->bbNext)
+ {
+ if (bcall->bbJumpKind != BBJ_CALLFINALLY || bcall->bbJumpDest != finBeg)
+ {
+ continue;
+ }
+
+ noway_assert(bcall->isBBCallAlwaysPair());
+ fgReplacePred(bcall->bbNext, bNext, block);
+ }
+ }
+ }
+ break;
+
+ case BBJ_THROW:
+ case BBJ_RETURN:
+ /* no jumps or fall through blocks to set here */
+ break;
+
+ case BBJ_SWITCH:
+ block->bbJumpSwt = bNext->bbJumpSwt;
+ // We are moving the switch jump from bNext to block. Examine the jump targets
+ // of the BBJ_SWITCH at bNext and replace the predecessor to 'bNext' with ones to 'block'
+ fgChangeSwitchBlock(bNext, block);
+ break;
+
+ default:
+ noway_assert(!"Unexpected bbJumpKind");
+ break;
+ }
+
+ fgUpdateLoopsAfterCompacting(block, bNext);
+
+#if DEBUG
+ if (verbose && 0)
+ {
+ printf("\nAfter compacting:\n");
+ fgDispBasicBlocks(false);
+ }
+#endif
+
+#if DEBUG
+ if (JitConfig.JitSlowDebugChecksEnabled() != 0)
+ {
+ // Make sure that the predecessor lists are accurate
+ fgDebugCheckBBlist();
+ }
+#endif // DEBUG
+}
+
+void Compiler::fgUpdateLoopsAfterCompacting(BasicBlock* block, BasicBlock* bNext)
+{
+ /* Check if the removed block is not part the loop table */
+ noway_assert(bNext);
+
+ for (unsigned loopNum = 0; loopNum < optLoopCount; loopNum++)
+ {
+ /* Some loops may have been already removed by
+ * loop unrolling or conditional folding */
+
+ if (optLoopTable[loopNum].lpFlags & LPFLG_REMOVED)
+ {
+ continue;
+ }
+
+ /* Check the loop head (i.e. the block preceding the loop) */
+
+ if (optLoopTable[loopNum].lpHead == bNext)
+ {
+ optLoopTable[loopNum].lpHead = block;
+ }
+
+ /* Check the loop bottom */
+
+ if (optLoopTable[loopNum].lpBottom == bNext)
+ {
+ optLoopTable[loopNum].lpBottom = block;
+ }
+
+ /* Check the loop exit */
+
+ if (optLoopTable[loopNum].lpExit == bNext)
+ {
+ noway_assert(optLoopTable[loopNum].lpExitCnt == 1);
+ optLoopTable[loopNum].lpExit = block;
+ }
+
+ /* Check the loop entry */
+
+ if (optLoopTable[loopNum].lpEntry == bNext)
+ {
+ optLoopTable[loopNum].lpEntry = block;
+ }
+ }
+}
+
+/*****************************************************************************************************
+ *
+ * Function called to remove a block when it is unreachable.
+ *
+ * This function cannot remove the first block.
+ */
+
+void Compiler::fgUnreachableBlock(BasicBlock* block)
+{
+ // genReturnBB should never be removed, as we might have special hookups there.
+ // Therefore, we should never come here to remove the statements in the genReturnBB block.
+ // For example, <BUGNUM> in VSW 364383, </BUGNUM>
+ // the profiler hookup needs to have the "void GT_RETURN" statement
+ // to properly set the info.compProfilerCallback flag.
+ noway_assert(block != genReturnBB);
+
+ if (block->bbFlags & BBF_REMOVED)
+ {
+ return;
+ }
+
+/* Removing an unreachable block */
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nRemoving unreachable BB%02u\n", block->bbNum);
+ }
+#endif // DEBUG
+
+ noway_assert(block->bbPrev != nullptr); // Can use this function to remove the first block
+
+#if FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
+ assert(!block->bbPrev->isBBCallAlwaysPair()); // can't remove the BBJ_ALWAYS of a BBJ_CALLFINALLY / BBJ_ALWAYS pair
+#endif // FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
+
+ /* First walk the statement trees in this basic block and delete each stmt */
+
+ /* Make the block publicly available */
+ compCurBB = block;
+
+ if (block->IsLIR())
+ {
+ LIR::Range& blockRange = LIR::AsRange(block);
+ if (!blockRange.IsEmpty())
+ {
+ blockRange.Delete(this, block, blockRange.FirstNode(), blockRange.LastNode());
+ }
+ }
+ else
+ {
+ // TODO-Cleanup: I'm not sure why this happens -- if the block is unreachable, why does it have phis?
+ // Anyway, remove any phis.
+
+ GenTreePtr firstNonPhi = block->FirstNonPhiDef();
+ if (block->bbTreeList != firstNonPhi)
+ {
+ if (firstNonPhi != nullptr)
+ {
+ firstNonPhi->gtPrev = block->lastStmt();
+ }
+ block->bbTreeList = firstNonPhi;
+ }
+
+ for (GenTreeStmt* stmt = block->firstStmt(); stmt; stmt = stmt->gtNextStmt)
+ {
+ fgRemoveStmt(block, stmt);
+ }
+ noway_assert(block->bbTreeList == nullptr);
+ }
+
+ /* Next update the loop table and bbWeights */
+ optUpdateLoopsBeforeRemoveBlock(block);
+
+ /* Mark the block as removed */
+ block->bbFlags |= BBF_REMOVED;
+
+ /* update bbRefs and bbPreds for the blocks reached by this block */
+ fgRemoveBlockAsPred(block);
+}
+
+/*****************************************************************************************************
+ *
+ * Function called to remove or morph a GT_JTRUE statement when we jump to the same
+ * block when both the condition is true or false.
+ */
+void Compiler::fgRemoveJTrue(BasicBlock* block)
+{
+ noway_assert(block->bbJumpKind == BBJ_COND && block->bbJumpDest == block->bbNext);
+ assert(compRationalIRForm == block->IsLIR());
+
+ flowList* flow = fgGetPredForBlock(block->bbNext, block);
+ noway_assert(flow->flDupCount == 2);
+
+ // Change the BBJ_COND to BBJ_NONE, and adjust the refCount and dupCount.
+ block->bbJumpKind = BBJ_NONE;
+ block->bbFlags &= ~BBF_NEEDS_GCPOLL;
+ --block->bbNext->bbRefs;
+ --flow->flDupCount;
+
+#ifdef DEBUG
+ block->bbJumpDest = nullptr;
+ if (verbose)
+ {
+ printf("Block BB%02u becoming a BBJ_NONE to BB%02u (jump target is the same whether the condition is true or "
+ "false)\n",
+ block->bbNum, block->bbNext->bbNum);
+ }
+#endif
+
+ /* Remove the block jump condition */
+
+ if (block->IsLIR())
+ {
+ LIR::Range& blockRange = LIR::AsRange(block);
+
+ GenTree* test = blockRange.LastNode();
+ assert(test->OperGet() == GT_JTRUE);
+
+ bool isClosed;
+ unsigned sideEffects;
+ LIR::ReadOnlyRange testRange = blockRange.GetTreeRange(test, &isClosed, &sideEffects);
+
+ // TODO-LIR: this should really be checking GTF_ALL_EFFECT, but that produces unacceptable
+ // diffs compared to the existing backend.
+ if (isClosed && ((sideEffects & GTF_SIDE_EFFECT) == 0))
+ {
+ // If the jump and its operands form a contiguous, side-effect-free range,
+ // remove them.
+ blockRange.Delete(this, block, std::move(testRange));
+ }
+ else
+ {
+ // Otherwise, just remove the jump node itself.
+ blockRange.Remove(test);
+ }
+ }
+ else
+ {
+ GenTreeStmt* test = block->lastStmt();
+ GenTree* tree = test->gtStmtExpr;
+
+ noway_assert(tree->gtOper == GT_JTRUE);
+
+ GenTree* sideEffList = nullptr;
+
+ if (tree->gtFlags & GTF_SIDE_EFFECT)
+ {
+ gtExtractSideEffList(tree, &sideEffList);
+
+ if (sideEffList)
+ {
+ noway_assert(sideEffList->gtFlags & GTF_SIDE_EFFECT);
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Extracted side effects list from condition...\n");
+ gtDispTree(sideEffList);
+ printf("\n");
+ }
+#endif
+ }
+ }
+
+ // Delete the cond test or replace it with the side effect tree
+ if (sideEffList == nullptr)
+ {
+ fgRemoveStmt(block, test);
+ }
+ else
+ {
+ test->gtStmtExpr = sideEffList;
+
+ fgMorphBlockStmt(block, test DEBUGARG("fgRemoveJTrue"));
+ }
+ }
+}
+
+/*****************************************************************************************************
+ *
+ * Function to return the last basic block in the main part of the function. With funclets, it is
+ * the block immediately before the first funclet.
+ * An inclusive end of the main method.
+ */
+
+BasicBlock* Compiler::fgLastBBInMainFunction()
+{
+#if FEATURE_EH_FUNCLETS
+
+ if (fgFirstFuncletBB != nullptr)
+ {
+ return fgFirstFuncletBB->bbPrev;
+ }
+
+#endif // FEATURE_EH_FUNCLETS
+
+ assert(fgLastBB->bbNext == nullptr);
+
+ return fgLastBB;
+}
+
+/*****************************************************************************************************
+ *
+ * Function to return the first basic block after the main part of the function. With funclets, it is
+ * the block of the first funclet. Otherwise it is NULL if there are no funclets (fgLastBB->bbNext).
+ * This is equivalent to fgLastBBInMainFunction()->bbNext
+ * An exclusive end of the main method.
+ */
+
+BasicBlock* Compiler::fgEndBBAfterMainFunction()
+{
+#if FEATURE_EH_FUNCLETS
+
+ if (fgFirstFuncletBB != nullptr)
+ {
+ return fgFirstFuncletBB;
+ }
+
+#endif // FEATURE_EH_FUNCLETS
+
+ assert(fgLastBB->bbNext == nullptr);
+
+ return nullptr;
+}
+
+// Removes the block from the bbPrev/bbNext chain
+// Updates fgFirstBB and fgLastBB if necessary
+// Does not update fgFirstFuncletBB or fgFirstColdBlock (fgUnlinkRange does)
+
+void Compiler::fgUnlinkBlock(BasicBlock* block)
+{
+ if (block->bbPrev)
+ {
+ block->bbPrev->bbNext = block->bbNext;
+ if (block->bbNext)
+ {
+ block->bbNext->bbPrev = block->bbPrev;
+ }
+ else
+ {
+ fgLastBB = block->bbPrev;
+ }
+ }
+ else
+ {
+ assert(block == fgFirstBB);
+ assert(block != fgLastBB);
+ assert((fgFirstBBScratch == nullptr) || (fgFirstBBScratch == fgFirstBB));
+
+ fgFirstBB = block->bbNext;
+ fgFirstBB->bbPrev = nullptr;
+
+ if (fgFirstBBScratch != nullptr)
+ {
+#ifdef DEBUG
+ // We had created an initial scratch BB, but now we're deleting it.
+ if (verbose)
+ {
+ printf("Unlinking scratch BB%02u\n", block->bbNum);
+ }
+#endif // DEBUG
+ fgFirstBBScratch = nullptr;
+ }
+ }
+}
+
+/*****************************************************************************************************
+ *
+ * Function called to unlink basic block range [bBeg .. bEnd] from the basic block list.
+ *
+ * 'bBeg' can't be the first block.
+ */
+
+void Compiler::fgUnlinkRange(BasicBlock* bBeg, BasicBlock* bEnd)
+{
+ assert(bBeg != nullptr);
+ assert(bEnd != nullptr);
+
+ BasicBlock* bPrev = bBeg->bbPrev;
+ assert(bPrev != nullptr); // Can't unlink a range starting with the first block
+
+ bPrev->setNext(bEnd->bbNext);
+
+ /* If we removed the last block in the method then update fgLastBB */
+ if (fgLastBB == bEnd)
+ {
+ fgLastBB = bPrev;
+ noway_assert(fgLastBB->bbNext == nullptr);
+ }
+
+ // If bEnd was the first Cold basic block update fgFirstColdBlock
+ if (fgFirstColdBlock == bEnd)
+ {
+ fgFirstColdBlock = bPrev->bbNext;
+ }
+
+#if FEATURE_EH_FUNCLETS
+#ifdef DEBUG
+ // You can't unlink a range that includes the first funclet block. A range certainly
+ // can't cross the non-funclet/funclet region. And you can't unlink the first block
+ // of the first funclet with this, either. (If that's necessary, it could be allowed
+ // by updating fgFirstFuncletBB to bEnd->bbNext.)
+ for (BasicBlock* tempBB = bBeg; tempBB != bEnd->bbNext; tempBB = tempBB->bbNext)
+ {
+ assert(tempBB != fgFirstFuncletBB);
+ }
+#endif // DEBUG
+#endif // FEATURE_EH_FUNCLETS
+}
+
+/*****************************************************************************************************
+ *
+ * Function called to remove a basic block
+ */
+
+void Compiler::fgRemoveBlock(BasicBlock* block, bool unreachable)
+{
+ BasicBlock* bPrev = block->bbPrev;
+
+ /* The block has to be either unreachable or empty */
+
+ PREFIX_ASSUME(block != nullptr);
+
+ JITDUMP("fgRemoveBlock BB%02u\n", block->bbNum);
+
+ // If we've cached any mappings from switch blocks to SwitchDesc's (which contain only the
+ // *unique* successors of the switch block), invalidate that cache, since an entry in one of
+ // the SwitchDescs might be removed.
+ InvalidateUniqueSwitchSuccMap();
+
+ noway_assert((block == fgFirstBB) || (bPrev && (bPrev->bbNext == block)));
+ noway_assert(!(block->bbFlags & BBF_DONT_REMOVE));
+
+ // Should never remove a genReturnBB, as we might have special hookups there.
+ noway_assert(block != genReturnBB);
+
+#if FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
+ // Don't remove a finally target
+ assert(!(block->bbFlags & BBF_FINALLY_TARGET));
+#endif // FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
+
+ if (unreachable)
+ {
+ PREFIX_ASSUME(bPrev != nullptr);
+
+ fgUnreachableBlock(block);
+
+ /* If this is the last basic block update fgLastBB */
+ if (block == fgLastBB)
+ {
+ fgLastBB = bPrev;
+ }
+
+#if FEATURE_EH_FUNCLETS
+ // If block was the fgFirstFuncletBB then set fgFirstFuncletBB to block->bbNext
+ if (block == fgFirstFuncletBB)
+ {
+ fgFirstFuncletBB = block->bbNext;
+ }
+#endif // FEATURE_EH_FUNCLETS
+
+ if (bPrev->bbJumpKind == BBJ_CALLFINALLY)
+ {
+ // bPrev CALL becomes RETLESS as the BBJ_ALWAYS block is unreachable
+ bPrev->bbFlags |= BBF_RETLESS_CALL;
+
+#if FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
+ NO_WAY("No retless call finally blocks; need unwind target instead");
+#endif // FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
+ }
+ else if (bPrev->bbJumpKind == BBJ_ALWAYS && bPrev->bbJumpDest == block->bbNext &&
+ !(bPrev->bbFlags & BBF_KEEP_BBJ_ALWAYS) && (block != fgFirstColdBlock) &&
+ (block->bbNext != fgFirstColdBlock))
+ {
+ // previous block is a BBJ_ALWAYS to the next block: change to BBJ_NONE.
+ // Note that we don't do it if bPrev follows a BBJ_CALLFINALLY block (BBF_KEEP_BBJ_ALWAYS),
+ // because that would violate our invariant that BBJ_CALLFINALLY blocks are followed by
+ // BBJ_ALWAYS blocks.
+ bPrev->bbJumpKind = BBJ_NONE;
+ bPrev->bbFlags &= ~BBF_NEEDS_GCPOLL;
+ }
+
+ // If this is the first Cold basic block update fgFirstColdBlock
+ if (block == fgFirstColdBlock)
+ {
+ fgFirstColdBlock = block->bbNext;
+ }
+
+ /* Unlink this block from the bbNext chain */
+ fgUnlinkBlock(block);
+
+ /* At this point the bbPreds and bbRefs had better be zero */
+ noway_assert((block->bbRefs == 0) && (block->bbPreds == nullptr));
+
+ /* A BBJ_CALLFINALLY is usually paired with a BBJ_ALWAYS.
+ * If we delete such a BBJ_CALLFINALLY we also delete the BBJ_ALWAYS
+ */
+ if (block->isBBCallAlwaysPair())
+ {
+ BasicBlock* leaveBlk = block->bbNext;
+ noway_assert(leaveBlk->bbJumpKind == BBJ_ALWAYS);
+
+ leaveBlk->bbFlags &= ~BBF_DONT_REMOVE;
+ leaveBlk->bbRefs = 0;
+ leaveBlk->bbPreds = nullptr;
+
+ fgRemoveBlock(leaveBlk, true);
+
+#if FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
+ fgClearFinallyTargetBit(leaveBlk->bbJumpDest);
+#endif // FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
+ }
+ else if (block->bbJumpKind == BBJ_RETURN)
+ {
+ fgRemoveReturnBlock(block);
+ }
+ }
+ else // block is empty
+ {
+ noway_assert(block->isEmpty());
+
+ /* The block cannot follow a non-retless BBJ_CALLFINALLY (because we don't know who may jump to it) */
+ noway_assert((bPrev == nullptr) || !bPrev->isBBCallAlwaysPair());
+
+ /* This cannot be the last basic block */
+ noway_assert(block != fgLastBB);
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Removing empty BB%02u\n", block->bbNum);
+ }
+#endif // DEBUG
+
+#ifdef DEBUG
+ /* Some extra checks for the empty case */
+
+ switch (block->bbJumpKind)
+ {
+ case BBJ_NONE:
+ break;
+
+ case BBJ_ALWAYS:
+ /* Do not remove a block that jumps to itself - used for while (true){} */
+ noway_assert(block->bbJumpDest != block);
+
+ /* Empty GOTO can be removed iff bPrev is BBJ_NONE */
+ noway_assert(bPrev && bPrev->bbJumpKind == BBJ_NONE);
+ break;
+
+ default:
+ noway_assert(!"Empty block of this type cannot be removed!");
+ break;
+ }
+#endif // DEBUG
+
+ noway_assert(block->bbJumpKind == BBJ_NONE || block->bbJumpKind == BBJ_ALWAYS);
+
+ /* Who is the "real" successor of this block? */
+
+ BasicBlock* succBlock;
+
+ if (block->bbJumpKind == BBJ_ALWAYS)
+ {
+ succBlock = block->bbJumpDest;
+ }
+ else
+ {
+ succBlock = block->bbNext;
+ }
+
+ bool skipUnmarkLoop = false;
+
+ // If block is the backedge for a loop and succBlock precedes block
+ // then the succBlock becomes the new LOOP HEAD
+ // NOTE: there's an assumption here that the blocks are numbered in increasing bbNext order.
+ // NOTE 2: if fgDomsComputed is false, then we can't check reachability. However, if this is
+ // the case, then the loop structures probably are also invalid, and shouldn't be used. This
+ // can be the case late in compilation (such as Lower), where remnants of earlier created
+ // structures exist, but haven't been maintained.
+ if (block->isLoopHead() && (succBlock->bbNum <= block->bbNum))
+ {
+ succBlock->bbFlags |= BBF_LOOP_HEAD;
+ if (fgDomsComputed && fgReachable(succBlock, block))
+ {
+ /* Mark all the reachable blocks between 'succBlock' and 'block', excluding 'block' */
+ optMarkLoopBlocks(succBlock, block, true);
+ }
+ }
+ else if (succBlock->isLoopHead() && bPrev && (succBlock->bbNum <= bPrev->bbNum))
+ {
+ skipUnmarkLoop = true;
+ }
+
+ noway_assert(succBlock);
+
+ // If this is the first Cold basic block update fgFirstColdBlock
+ if (block == fgFirstColdBlock)
+ {
+ fgFirstColdBlock = block->bbNext;
+ }
+
+#if FEATURE_EH_FUNCLETS
+ // Update fgFirstFuncletBB if necessary
+ if (block == fgFirstFuncletBB)
+ {
+ fgFirstFuncletBB = block->bbNext;
+ }
+#endif // FEATURE_EH_FUNCLETS
+
+ /* First update the loop table and bbWeights */
+ optUpdateLoopsBeforeRemoveBlock(block, skipUnmarkLoop);
+
+ /* Remove the block */
+
+ if (bPrev == nullptr)
+ {
+ /* special case if this is the first BB */
+
+ noway_assert(block == fgFirstBB);
+
+ /* Must be a fall through to next block */
+
+ noway_assert(block->bbJumpKind == BBJ_NONE);
+
+ /* old block no longer gets the extra ref count for being the first block */
+ block->bbRefs--;
+ succBlock->bbRefs++;
+
+ /* Set the new firstBB */
+ fgUnlinkBlock(block);
+
+ /* Always treat the initial block as a jump target */
+ fgFirstBB->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL;
+ }
+ else
+ {
+ fgUnlinkBlock(block);
+ }
+
+ /* mark the block as removed and set the change flag */
+
+ block->bbFlags |= BBF_REMOVED;
+
+ /* Update bbRefs and bbPreds.
+ * All blocks jumping to 'block' now jump to 'succBlock'.
+ * First, remove 'block' from the predecessor list of succBlock.
+ */
+
+ fgRemoveRefPred(succBlock, block);
+
+ for (flowList* pred = block->bbPreds; pred; pred = pred->flNext)
+ {
+ BasicBlock* predBlock = pred->flBlock;
+
+ /* Are we changing a loop backedge into a forward jump? */
+
+ if (block->isLoopHead() && (predBlock->bbNum >= block->bbNum) && (predBlock->bbNum <= succBlock->bbNum))
+ {
+ /* First update the loop table and bbWeights */
+ optUpdateLoopsBeforeRemoveBlock(predBlock);
+ }
+
+ /* If predBlock is a new predecessor, then add it to succBlock's
+ predecessor's list. */
+ if (predBlock->bbJumpKind != BBJ_SWITCH)
+ {
+ // Even if the pred is not a switch, we could have a conditional branch
+ // to the fallthrough, so duplicate there could be preds
+ for (unsigned i = 0; i < pred->flDupCount; i++)
+ {
+ fgAddRefPred(succBlock, predBlock);
+ }
+ }
+
+ /* change all jumps to the removed block */
+ switch (predBlock->bbJumpKind)
+ {
+ default:
+ noway_assert(!"Unexpected bbJumpKind in fgRemoveBlock()");
+ break;
+
+ case BBJ_NONE:
+ noway_assert(predBlock == bPrev);
+ PREFIX_ASSUME(bPrev != nullptr);
+
+ /* In the case of BBJ_ALWAYS we have to change the type of its predecessor */
+ if (block->bbJumpKind == BBJ_ALWAYS)
+ {
+ /* bPrev now becomes a BBJ_ALWAYS */
+ bPrev->bbJumpKind = BBJ_ALWAYS;
+ bPrev->bbJumpDest = succBlock;
+ }
+ break;
+
+ case BBJ_COND:
+ /* The links for the direct predecessor case have already been updated above */
+ if (predBlock->bbJumpDest != block)
+ {
+ succBlock->bbFlags |= BBF_HAS_LABEL | BBF_JMP_TARGET;
+ break;
+ }
+
+ /* Check if both side of the BBJ_COND now jump to the same block */
+ if (predBlock->bbNext == succBlock)
+ {
+ // Make sure we are replacing "block" with "succBlock" in predBlock->bbJumpDest.
+ noway_assert(predBlock->bbJumpDest == block);
+ predBlock->bbJumpDest = succBlock;
+ fgRemoveJTrue(predBlock);
+ break;
+ }
+
+ /* Fall through for the jump case */
+ __fallthrough;
+
+ case BBJ_CALLFINALLY:
+ case BBJ_ALWAYS:
+ case BBJ_EHCATCHRET:
+ noway_assert(predBlock->bbJumpDest == block);
+ predBlock->bbJumpDest = succBlock;
+ succBlock->bbFlags |= BBF_HAS_LABEL | BBF_JMP_TARGET;
+ break;
+
+ case BBJ_SWITCH:
+ // Change any jumps from 'predBlock' (a BBJ_SWITCH) to 'block' to jump to 'succBlock'
+ //
+ // For the jump targets of 'predBlock' (a BBJ_SWITCH) that jump to 'block'
+ // remove the old predecessor at 'block' from 'predBlock' and
+ // add the new predecessor at 'succBlock' from 'predBlock'
+ //
+ fgReplaceSwitchJumpTarget(predBlock, succBlock, block);
+ break;
+ }
+ }
+ }
+
+ if (bPrev != nullptr)
+ {
+ switch (bPrev->bbJumpKind)
+ {
+ case BBJ_CALLFINALLY:
+ // If prev is a BBJ_CALLFINALLY it better be marked as RETLESS
+ noway_assert(bPrev->bbFlags & BBF_RETLESS_CALL);
+ break;
+
+ case BBJ_ALWAYS:
+ // Check for branch to next block. Just make sure the BBJ_ALWAYS block is not
+ // part of a BBJ_CALLFINALLY/BBJ_ALWAYS pair. We do this here and don't rely on fgUpdateFlowGraph
+ // because we can be called by ComputeDominators and it expects it to remove this jump to
+ // the next block. This is the safest fix. We should remove all this BBJ_CALLFINALLY/BBJ_ALWAYS
+ // pairing.
+
+ if ((bPrev->bbJumpDest == bPrev->bbNext) &&
+ !fgInDifferentRegions(bPrev, bPrev->bbJumpDest)) // We don't remove a branch from Hot -> Cold
+ {
+ if ((bPrev == fgFirstBB) || !bPrev->bbPrev->isBBCallAlwaysPair())
+ {
+ // It's safe to change the jump type
+ bPrev->bbJumpKind = BBJ_NONE;
+ bPrev->bbFlags &= ~BBF_NEEDS_GCPOLL;
+ }
+ }
+ break;
+
+ case BBJ_COND:
+ /* Check for branch to next block */
+ if (bPrev->bbJumpDest == bPrev->bbNext)
+ {
+ fgRemoveJTrue(bPrev);
+ }
+ break;
+
+ default:
+ break;
+ }
+
+ ehUpdateForDeletedBlock(block);
+ }
+}
+
+/*****************************************************************************
+ *
+ * Function called to connect to block that previously had a fall through
+ */
+
+BasicBlock* Compiler::fgConnectFallThrough(BasicBlock* bSrc, BasicBlock* bDst)
+{
+ BasicBlock* jmpBlk = nullptr;
+
+ /* If bSrc is non-NULL */
+
+ if (bSrc != nullptr)
+ {
+ /* If bSrc falls through to a block that is not bDst, we will insert a jump to bDst */
+
+ if (bSrc->bbFallsThrough() && (bSrc->bbNext != bDst))
+ {
+ switch (bSrc->bbJumpKind)
+ {
+
+ case BBJ_NONE:
+ bSrc->bbJumpKind = BBJ_ALWAYS;
+ bSrc->bbJumpDest = bDst;
+ bSrc->bbJumpDest->bbFlags |= (BBF_JMP_TARGET | BBF_HAS_LABEL);
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Block BB%02u ended with a BBJ_NONE, Changed to an unconditional jump to BB%02u\n",
+ bSrc->bbNum, bSrc->bbJumpDest->bbNum);
+ }
+#endif
+ break;
+
+ case BBJ_CALLFINALLY:
+ case BBJ_COND:
+
+ // Add a new block after bSrc which jumps to 'bDst'
+ jmpBlk = fgNewBBafter(BBJ_ALWAYS, bSrc, true);
+
+ if (fgComputePredsDone)
+ {
+ fgAddRefPred(jmpBlk, bSrc, fgGetPredForBlock(bDst, bSrc));
+ }
+
+ // When adding a new jmpBlk we will set the bbWeight and bbFlags
+ //
+ if (fgHaveValidEdgeWeights)
+ {
+ noway_assert(fgComputePredsDone);
+
+ flowList* newEdge = fgGetPredForBlock(jmpBlk, bSrc);
+
+ jmpBlk->bbWeight = (newEdge->flEdgeWeightMin + newEdge->flEdgeWeightMax) / 2;
+ if (bSrc->bbWeight == 0)
+ {
+ jmpBlk->bbWeight = 0;
+ }
+
+ if (jmpBlk->bbWeight == 0)
+ {
+ jmpBlk->bbFlags |= BBF_RUN_RARELY;
+ }
+
+ BasicBlock::weight_t weightDiff = (newEdge->flEdgeWeightMax - newEdge->flEdgeWeightMin);
+ BasicBlock::weight_t slop = BasicBlock::GetSlopFraction(bSrc, bDst);
+
+ //
+ // If the [min/max] values for our edge weight is within the slop factor
+ // then we will set the BBF_PROF_WEIGHT flag for the block
+ //
+ if (weightDiff <= slop)
+ {
+ jmpBlk->bbFlags |= BBF_PROF_WEIGHT;
+ }
+ }
+ else
+ {
+ // We set the bbWeight to the smaller of bSrc->bbWeight or bDst->bbWeight
+ if (bSrc->bbWeight < bDst->bbWeight)
+ {
+ jmpBlk->bbWeight = bSrc->bbWeight;
+ jmpBlk->bbFlags |= (bSrc->bbFlags & BBF_RUN_RARELY);
+ }
+ else
+ {
+ jmpBlk->bbWeight = bDst->bbWeight;
+ jmpBlk->bbFlags |= (bDst->bbFlags & BBF_RUN_RARELY);
+ }
+ }
+
+ jmpBlk->bbJumpDest = bDst;
+ jmpBlk->bbJumpDest->bbFlags |= (BBF_JMP_TARGET | BBF_HAS_LABEL);
+
+ if (fgComputePredsDone)
+ {
+ fgReplacePred(bDst, bSrc, jmpBlk);
+ }
+ else
+ {
+ jmpBlk->bbFlags |= BBF_IMPORTED;
+ }
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Added an unconditional jump to BB%02u after block BB%02u\n", jmpBlk->bbJumpDest->bbNum,
+ bSrc->bbNum);
+ }
+#endif // DEBUG
+ break;
+
+ default:
+ noway_assert(!"Unexpected bbJumpKind");
+ break;
+ }
+ }
+ else
+ {
+ // If bSrc is an unconditional branch to the next block
+ // then change it to a BBJ_NONE block
+ //
+ if ((bSrc->bbJumpKind == BBJ_ALWAYS) && !(bSrc->bbFlags & BBF_KEEP_BBJ_ALWAYS) &&
+ (bSrc->bbJumpDest == bSrc->bbNext))
+ {
+ bSrc->bbJumpKind = BBJ_NONE;
+ bSrc->bbFlags &= ~BBF_NEEDS_GCPOLL;
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Changed an unconditional jump from BB%02u to the next block BB%02u into a BBJ_NONE block\n",
+ bSrc->bbNum, bSrc->bbNext->bbNum);
+ }
+#endif // DEBUG
+ }
+ }
+ }
+
+ return jmpBlk;
+}
+
+/*****************************************************************************
+ Walk the flow graph, reassign block numbers to keep them in ascending order.
+ Returns 'true' if any renumbering was actually done, OR if we change the
+ maximum number of assigned basic blocks (this can happen if we do inlining,
+ create a new, high-numbered block, then that block goes away. We go to
+ renumber the blocks, none of them actually change number, but we shrink the
+ maximum assigned block number. This affects the block set epoch).
+*/
+
+bool Compiler::fgRenumberBlocks()
+{
+ // If we renumber the blocks the dominator information will be out-of-date
+ if (fgDomsComputed)
+ {
+ noway_assert(!"Can't call Compiler::fgRenumberBlocks() when fgDomsComputed==true");
+ }
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\n*************** Before renumbering the basic blocks\n");
+ fgDispBasicBlocks();
+ fgDispHandlerTab();
+ }
+#endif // DEBUG
+
+ bool renumbered = false;
+ bool newMaxBBNum = false;
+ BasicBlock* block;
+
+ unsigned numStart = 1 + (compIsForInlining() ? impInlineInfo->InlinerCompiler->fgBBNumMax : 0);
+ unsigned num;
+
+ for (block = fgFirstBB, num = numStart; block != nullptr; block = block->bbNext, num++)
+ {
+ noway_assert((block->bbFlags & BBF_REMOVED) == 0);
+
+ if (block->bbNum != num)
+ {
+ renumbered = true;
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Renumber BB%02u to BB%02u\n", block->bbNum, num);
+ }
+#endif // DEBUG
+ block->bbNum = num;
+ }
+
+ if (block->bbNext == nullptr)
+ {
+ fgLastBB = block;
+ fgBBcount = num - numStart + 1;
+ if (compIsForInlining())
+ {
+ if (impInlineInfo->InlinerCompiler->fgBBNumMax != num)
+ {
+ impInlineInfo->InlinerCompiler->fgBBNumMax = num;
+ newMaxBBNum = true;
+ }
+ }
+ else
+ {
+ if (fgBBNumMax != num)
+ {
+ fgBBNumMax = num;
+ newMaxBBNum = true;
+ }
+ }
+ }
+ }
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\n*************** After renumbering the basic blocks\n");
+ if (renumbered)
+ {
+ fgDispBasicBlocks();
+ fgDispHandlerTab();
+ }
+ else
+ {
+ printf("=============== No blocks renumbered!\n");
+ }
+ }
+#endif // DEBUG
+
+ // Now update the BlockSet epoch, which depends on the block numbers.
+ // If any blocks have been renumbered then create a new BlockSet epoch.
+ // Even if we have not renumbered any blocks, we might still need to force
+ // a new BlockSet epoch, for one of several reasons. If there are any new
+ // blocks with higher numbers than the former maximum numbered block, then we
+ // need a new epoch with a new size matching the new largest numbered block.
+ // Also, if the number of blocks is different from the last time we set the
+ // BlockSet epoch, then we need a new epoch. This wouldn't happen if we
+ // renumbered blocks after every block addition/deletion, but it might be
+ // the case that we can change the number of blocks, then set the BlockSet
+ // epoch without renumbering, then change the number of blocks again, then
+ // renumber.
+ if (renumbered || newMaxBBNum)
+ {
+ NewBasicBlockEpoch();
+
+ // The key in the unique switch successor map is dependent on the block number, so invalidate that cache.
+ InvalidateUniqueSwitchSuccMap();
+ }
+ else
+ {
+ EnsureBasicBlockEpoch();
+ }
+
+ // Tell our caller if any blocks actually were renumbered.
+ return renumbered || newMaxBBNum;
+}
+
+/*****************************************************************************
+ *
+ * Is the BasicBlock bJump a forward branch?
+ * Optionally bSrc can be supplied to indicate that
+ * bJump must be forward with respect to bSrc
+ */
+bool Compiler::fgIsForwardBranch(BasicBlock* bJump, BasicBlock* bSrc /* = NULL */)
+{
+ bool result = false;
+
+ if ((bJump->bbJumpKind == BBJ_COND) || (bJump->bbJumpKind == BBJ_ALWAYS))
+ {
+ BasicBlock* bDest = bJump->bbJumpDest;
+ BasicBlock* bTemp = (bSrc == nullptr) ? bJump : bSrc;
+
+ while (true)
+ {
+ bTemp = bTemp->bbNext;
+
+ if (bTemp == nullptr)
+ {
+ break;
+ }
+
+ if (bTemp == bDest)
+ {
+ result = true;
+ break;
+ }
+ }
+ }
+
+ return result;
+}
+
+/*****************************************************************************
+ *
+ * Function called to expand the set of rarely run blocks
+ */
+
+bool Compiler::fgExpandRarelyRunBlocks()
+{
+ bool result = false;
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\n*************** In fgExpandRarelyRunBlocks()\n");
+ }
+
+ const char* reason = nullptr;
+#endif
+
+ // We expand the number of rarely run blocks by observing
+ // that a block that falls into or jumps to a rarely run block,
+ // must itself be rarely run and when we have a conditional
+ // jump in which both branches go to rarely run blocks then
+ // the block must itself be rarely run
+
+ BasicBlock* block;
+ BasicBlock* bPrev;
+
+ for (bPrev = fgFirstBB, block = bPrev->bbNext; block != nullptr; bPrev = block, block = block->bbNext)
+ {
+ if (bPrev->isRunRarely())
+ {
+ continue;
+ }
+
+ /* bPrev is known to be a normal block here */
+ switch (bPrev->bbJumpKind)
+ {
+ case BBJ_ALWAYS:
+
+ /* Is the jump target rarely run? */
+ if (bPrev->bbJumpDest->isRunRarely())
+ {
+ INDEBUG(reason = "Unconditional jump to a rarely run block";)
+ goto NEW_RARELY_RUN;
+ }
+ break;
+
+ case BBJ_CALLFINALLY:
+
+ // Check for a BBJ_CALLFINALLY followed by a rarely run paired BBJ_ALWAYS
+ //
+ if (bPrev->isBBCallAlwaysPair())
+ {
+ /* Is the next block rarely run? */
+ if (block->isRunRarely())
+ {
+ INDEBUG(reason = "Call of finally followed by a rarely run block";)
+ goto NEW_RARELY_RUN;
+ }
+ }
+ break;
+
+ case BBJ_NONE:
+
+ /* is fall through target rarely run? */
+ if (block->isRunRarely())
+ {
+ INDEBUG(reason = "Falling into a rarely run block";)
+ goto NEW_RARELY_RUN;
+ }
+ break;
+
+ case BBJ_COND:
+
+ if (!block->isRunRarely())
+ {
+ continue;
+ }
+
+ /* If both targets of the BBJ_COND are run rarely then don't reorder */
+ if (bPrev->bbJumpDest->isRunRarely())
+ {
+ /* bPrev should also be marked as run rarely */
+ if (!bPrev->isRunRarely())
+ {
+ INDEBUG(reason = "Both sides of a conditional jump are rarely run";)
+
+ NEW_RARELY_RUN:
+ /* If the weight of the block was obtained from a profile run,
+ than it's more accurate than our static analysis */
+ if (bPrev->bbFlags & BBF_PROF_WEIGHT)
+ {
+ continue;
+ }
+ result = true;
+
+#ifdef DEBUG
+ assert(reason != nullptr);
+ if (verbose)
+ {
+ printf("%s, marking BB%02u as rarely run\n", reason, bPrev->bbNum);
+ }
+#endif // DEBUG
+
+ /* Must not have previously been marked */
+ noway_assert(!bPrev->isRunRarely());
+
+ /* Mark bPrev as a new rarely run block */
+ bPrev->bbSetRunRarely();
+
+ BasicBlock* bPrevPrev = nullptr;
+ BasicBlock* tmpbb;
+
+ if ((bPrev->bbFlags & BBF_KEEP_BBJ_ALWAYS) != 0)
+ {
+ // If we've got a BBJ_CALLFINALLY/BBJ_ALWAYS pair, treat the BBJ_CALLFINALLY as an
+ // additional predecessor for the BBJ_ALWAYS block
+ tmpbb = bPrev->bbPrev;
+ noway_assert(tmpbb != nullptr);
+#if FEATURE_EH_FUNCLETS
+ noway_assert(tmpbb->isBBCallAlwaysPair());
+ bPrevPrev = tmpbb;
+#else
+ if (tmpbb->bbJumpKind == BBJ_CALLFINALLY)
+ {
+ bPrevPrev = tmpbb;
+ }
+#endif
+ }
+
+ /* Now go back to it's earliest predecessor to see */
+ /* if it too should now be marked as rarely run */
+ flowList* pred = bPrev->bbPreds;
+
+ if ((pred != nullptr) || (bPrevPrev != nullptr))
+ {
+ // bPrevPrev will be set to the lexically
+ // earliest predecessor of bPrev.
+
+ while (pred != nullptr)
+ {
+ if (bPrevPrev == nullptr)
+ {
+ // Initially we select the first block in the bbPreds list
+ bPrevPrev = pred->flBlock;
+ continue;
+ }
+
+ // Walk the flow graph lexically forward from pred->flBlock
+ // if we find (block == bPrevPrev) then
+ // pred->flBlock is an earlier predecessor.
+ for (tmpbb = pred->flBlock; tmpbb != nullptr; tmpbb = tmpbb->bbNext)
+ {
+ if (tmpbb == bPrevPrev)
+ {
+ /* We found an ealier predecessor */
+ bPrevPrev = pred->flBlock;
+ break;
+ }
+ else if (tmpbb == bPrev)
+ {
+ // We have reached bPrev so stop walking
+ // as this cannot be an earlier predecessor
+ break;
+ }
+ }
+
+ // Onto the next predecessor
+ pred = pred->flNext;
+ }
+
+ // Walk the flow graph forward from bPrevPrev
+ // if we don't find (tmpbb == bPrev) then our candidate
+ // bPrevPrev is lexically after bPrev and we do not
+ // want to select it as our new block
+
+ for (tmpbb = bPrevPrev; tmpbb != nullptr; tmpbb = tmpbb->bbNext)
+ {
+ if (tmpbb == bPrev)
+ {
+ // Set up block back to the lexically
+ // earliest predecessor of pPrev
+
+ block = bPrevPrev;
+ }
+ }
+ }
+ }
+ break;
+
+ default:
+ break;
+ }
+ }
+ }
+
+ // Now iterate over every block to see if we can prove that a block is rarely run
+ // (i.e. when all predecessors to the block are rarely run)
+ //
+ for (bPrev = fgFirstBB, block = bPrev->bbNext; block != nullptr; bPrev = block, block = block->bbNext)
+ {
+ // If block is not run rarely, then check to make sure that it has
+ // at least one non-rarely run block.
+
+ if (!block->isRunRarely())
+ {
+ bool rare = true;
+
+ /* Make sure that block has at least one normal predecessor */
+ for (flowList* pred = block->bbPreds; pred != nullptr; pred = pred->flNext)
+ {
+ /* Find the fall through predecessor, if any */
+ if (!pred->flBlock->isRunRarely())
+ {
+ rare = false;
+ break;
+ }
+ }
+
+ if (rare)
+ {
+ // If 'block' is the start of a handler or filter then we cannot make it
+ // rarely run because we may have an exceptional edge that
+ // branches here.
+ //
+ if (bbIsHandlerBeg(block))
+ {
+ rare = false;
+ }
+ }
+
+ if (rare)
+ {
+ block->bbSetRunRarely();
+ result = true;
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("All branches to BB%02u are from rarely run blocks, marking as rarely run\n", block->bbNum);
+ }
+#endif // DEBUG
+
+ // When marking a BBJ_CALLFINALLY as rarely run we also mark
+ // the BBJ_ALWAYS that comes after it as rarely run
+ //
+ if (block->isBBCallAlwaysPair())
+ {
+ BasicBlock* bNext = block->bbNext;
+ PREFIX_ASSUME(bNext != nullptr);
+ bNext->bbSetRunRarely();
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Also marking the BBJ_ALWAYS at BB%02u as rarely run\n", bNext->bbNum);
+ }
+#endif // DEBUG
+ }
+ }
+ }
+
+ /* COMPACT blocks if possible */
+ if (bPrev->bbJumpKind == BBJ_NONE)
+ {
+ if (fgCanCompactBlocks(bPrev, block))
+ {
+ fgCompactBlocks(bPrev, block);
+
+ block = bPrev;
+ continue;
+ }
+ }
+ //
+ // if bPrev->bbWeight is not based upon profile data we can adjust
+ // the weights of bPrev and block
+ //
+ else if (bPrev->isBBCallAlwaysPair() && // we must have a BBJ_CALLFINALLY and BBK_ALWAYS pair
+ (bPrev->bbWeight != block->bbWeight) && // the weights are currently different
+ ((bPrev->bbFlags & BBF_PROF_WEIGHT) == 0)) // and the BBJ_CALLFINALLY block is not using profiled
+ // weights
+ {
+ if (block->isRunRarely())
+ {
+ bPrev->bbWeight =
+ block->bbWeight; // the BBJ_CALLFINALLY block now has the same weight as the BBJ_ALWAYS block
+ bPrev->bbFlags |= BBF_RUN_RARELY; // and is now rarely run
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Marking the BBJ_CALLFINALLY block at BB%02u as rarely run because BB%02u is rarely run\n",
+ bPrev->bbNum, block->bbNum);
+ }
+#endif // DEBUG
+ }
+ else if (bPrev->isRunRarely())
+ {
+ block->bbWeight =
+ bPrev->bbWeight; // the BBJ_ALWAYS block now has the same weight as the BBJ_CALLFINALLY block
+ block->bbFlags |= BBF_RUN_RARELY; // and is now rarely run
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Marking the BBJ_ALWAYS block at BB%02u as rarely run because BB%02u is rarely run\n",
+ block->bbNum, bPrev->bbNum);
+ }
+#endif // DEBUG
+ }
+ else // Both blocks are hot, bPrev is known not to be using profiled weight
+ {
+ bPrev->bbWeight =
+ block->bbWeight; // the BBJ_CALLFINALLY block now has the same weight as the BBJ_ALWAYS block
+ }
+ noway_assert(block->bbWeight == bPrev->bbWeight);
+ }
+ }
+
+ return result;
+}
+
+/*****************************************************************************
+ *
+ * Returns true if it is allowable (based upon the EH regions)
+ * to place block bAfter immediately after bBefore. It is allowable
+ * if the 'bBefore' and 'bAfter' blocks are in the exact same EH region.
+ */
+
+bool Compiler::fgEhAllowsMoveBlock(BasicBlock* bBefore, BasicBlock* bAfter)
+{
+ return BasicBlock::sameEHRegion(bBefore, bAfter);
+}
+
+/*****************************************************************************
+ *
+ * Function called to move the range of blocks [bStart .. bEnd].
+ * The blocks are placed immediately after the insertAfterBlk.
+ * fgFirstFuncletBB is not updated; that is the responsibility of the caller, if necessary.
+ */
+
+void Compiler::fgMoveBlocksAfter(BasicBlock* bStart, BasicBlock* bEnd, BasicBlock* insertAfterBlk)
+{
+ /* We have decided to insert the block(s) after 'insertAfterBlk' */
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Relocated block%s [BB%02u..BB%02u] inserted after BB%02u%s\n", (bStart == bEnd) ? "" : "s",
+ bStart->bbNum, bEnd->bbNum, insertAfterBlk->bbNum,
+ (insertAfterBlk->bbNext == nullptr) ? " at the end of method" : "");
+ }
+#endif // DEBUG
+
+ /* relink [bStart .. bEnd] into the flow graph */
+
+ bEnd->bbNext = insertAfterBlk->bbNext;
+ if (insertAfterBlk->bbNext)
+ {
+ insertAfterBlk->bbNext->bbPrev = bEnd;
+ }
+ insertAfterBlk->setNext(bStart);
+
+ /* If insertAfterBlk was fgLastBB then update fgLastBB */
+ if (insertAfterBlk == fgLastBB)
+ {
+ fgLastBB = bEnd;
+ noway_assert(fgLastBB->bbNext == nullptr);
+ }
+}
+
+/*****************************************************************************
+ *
+ * Function called to relocate a single range to the end of the method.
+ * Only an entire consecutive region can be moved and it will be kept together.
+ * Except for the first block, the range cannot have any blocks that jump into or out of the region.
+ * When successful we return the bLast block which is the last block that we relocated.
+ * When unsuccessful we return NULL.
+
+ =============================================================
+ NOTE: This function can invalidate all pointers into the EH table, as well as change the size of the EH table!
+ =============================================================
+ */
+
+BasicBlock* Compiler::fgRelocateEHRange(unsigned regionIndex, FG_RELOCATE_TYPE relocateType)
+{
+ INDEBUG(const char* reason = "None";)
+
+ // Figure out the range of blocks we're going to move
+
+ unsigned XTnum;
+ EHblkDsc* HBtab;
+ BasicBlock* bStart = nullptr;
+ BasicBlock* bMiddle = nullptr;
+ BasicBlock* bLast = nullptr;
+ BasicBlock* bPrev = nullptr;
+
+#if FEATURE_EH_FUNCLETS
+ // We don't support moving try regions... yet?
+ noway_assert(relocateType == FG_RELOCATE_HANDLER);
+#endif // FEATURE_EH_FUNCLETS
+
+ HBtab = ehGetDsc(regionIndex);
+
+ if (relocateType == FG_RELOCATE_TRY)
+ {
+ bStart = HBtab->ebdTryBeg;
+ bLast = HBtab->ebdTryLast;
+ }
+ else if (relocateType == FG_RELOCATE_HANDLER)
+ {
+ if (HBtab->HasFilter())
+ {
+ // The filter and handler funclets must be moved together, and remain contiguous.
+ bStart = HBtab->ebdFilter;
+ bMiddle = HBtab->ebdHndBeg;
+ bLast = HBtab->ebdHndLast;
+ }
+ else
+ {
+ bStart = HBtab->ebdHndBeg;
+ bLast = HBtab->ebdHndLast;
+ }
+ }
+
+ // Our range must contain either all rarely run blocks or all non-rarely run blocks
+ bool inTheRange = false;
+ bool validRange = false;
+
+ BasicBlock* block;
+
+ noway_assert(bStart != nullptr && bLast != nullptr);
+ if (bStart == fgFirstBB)
+ {
+ INDEBUG(reason = "can not relocate first block";)
+ goto FAILURE;
+ }
+
+#if !FEATURE_EH_FUNCLETS
+ // In the funclets case, we still need to set some information on the handler blocks
+ if (bLast->bbNext == NULL)
+ {
+ INDEBUG(reason = "region is already at the end of the method";)
+ goto FAILURE;
+ }
+#endif // !FEATURE_EH_FUNCLETS
+
+ // Walk the block list for this purpose:
+ // 1. Verify that all the blocks in the range are either all rarely run or not rarely run.
+ // When creating funclets, we ignore the run rarely flag, as we need to be able to move any blocks
+ // in the range.
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if !FEATURE_EH_FUNCLETS
+ bool isRare;
+ isRare = bStart->isRunRarely();
+#endif // !FEATURE_EH_FUNCLETS
+ block = fgFirstBB;
+ while (true)
+ {
+ if (block == bStart)
+ {
+ noway_assert(inTheRange == false);
+ inTheRange = true;
+ }
+ else if (block == bLast->bbNext)
+ {
+ noway_assert(inTheRange == true);
+ inTheRange = false;
+ break; // we found the end, so we're done
+ }
+
+ if (inTheRange)
+ {
+#if !FEATURE_EH_FUNCLETS
+ // Unless all blocks are (not) run rarely we must return false.
+ if (isRare != block->isRunRarely())
+ {
+ INDEBUG(reason = "this region contains both rarely run and non-rarely run blocks";)
+ goto FAILURE;
+ }
+#endif // !FEATURE_EH_FUNCLETS
+
+ validRange = true;
+ }
+
+ if (block == nullptr)
+ {
+ break;
+ }
+
+ block = block->bbNext;
+ }
+ // Ensure that bStart .. bLast defined a valid range
+ noway_assert((validRange == true) && (inTheRange == false));
+
+ bPrev = bStart->bbPrev;
+ noway_assert(bPrev != nullptr); // Can't move a range that includes the first block of the function.
+
+ JITDUMP("Relocating %s range BB%02u..BB%02u (EH#%u) to end of BBlist\n",
+ (relocateType == FG_RELOCATE_TRY) ? "try" : "handler", bStart->bbNum, bLast->bbNum, regionIndex);
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ fgDispBasicBlocks();
+ fgDispHandlerTab();
+ }
+
+ if (!FEATURE_EH_FUNCLETS)
+ {
+ // This is really expensive, and quickly becomes O(n^n) with funclets
+ // so only do it once after we've created them (see fgCreateFunclets)
+ if (expensiveDebugCheckLevel >= 2)
+ {
+ fgDebugCheckBBlist();
+ }
+ }
+#endif // DEBUG
+
+#if FEATURE_EH_FUNCLETS
+
+ bStart->bbFlags |= BBF_FUNCLET_BEG; // Mark the start block of the funclet
+
+ if (bMiddle != nullptr)
+ {
+ bMiddle->bbFlags |= BBF_FUNCLET_BEG; // Also mark the start block of a filter handler as a funclet
+ }
+
+#endif // FEATURE_EH_FUNCLETS
+
+ BasicBlock* bNext;
+ bNext = bLast->bbNext;
+
+ /* Temporarily unlink [bStart .. bLast] from the flow graph */
+ fgUnlinkRange(bStart, bLast);
+
+ BasicBlock* insertAfterBlk;
+ insertAfterBlk = fgLastBB;
+
+#if FEATURE_EH_FUNCLETS
+
+ // There are several cases we need to consider when moving an EH range.
+ // If moving a range X, we must consider its relationship to every other EH
+ // range A in the table. Note that each entry in the table represents both
+ // a protected region and a handler region (possibly including a filter region
+ // that must live before and adjacent to the handler region), so we must
+ // consider try and handler regions independently. These are the cases:
+ // 1. A is completely contained within X (where "completely contained" means
+ // that the 'begin' and 'last' parts of A are strictly between the 'begin'
+ // and 'end' parts of X, and aren't equal to either, for example, they don't
+ // share 'last' blocks). In this case, when we move X, A moves with it, and
+ // the EH table doesn't need to change.
+ // 2. X is completely contained within A. In this case, X gets extracted from A,
+ // and the range of A shrinks, but because A is strictly within X, the EH
+ // table doesn't need to change.
+ // 3. A and X have exactly the same range. In this case, A is moving with X and
+ // the EH table doesn't need to change.
+ // 4. A and X share the 'last' block. There are two sub-cases:
+ // (a) A is a larger range than X (such that the beginning of A precedes the
+ // beginning of X): in this case, we are moving the tail of A. We set the
+ // 'last' block of A to the the block preceding the beginning block of X.
+ // (b) A is a smaller range than X. Thus, we are moving the entirety of A along
+ // with X. In this case, nothing in the EH record for A needs to change.
+ // 5. A and X share the 'beginning' block (but aren't the same range, as in #3).
+ // This can never happen here, because we are only moving handler ranges (we don't
+ // move try ranges), and handler regions cannot start at the beginning of a try
+ // range or handler range and be a subset.
+ //
+ // Note that A and X must properly nest for the table to be well-formed. For example,
+ // the beginning of A can't be strictly within the range of X (that is, the beginning
+ // of A isn't shared with the beginning of X) and the end of A outside the range.
+
+ for (XTnum = 0, HBtab = compHndBBtab; XTnum < compHndBBtabCount; XTnum++, HBtab++)
+ {
+ if (XTnum != regionIndex) // we don't need to update our 'last' pointer
+ {
+ if (HBtab->ebdTryLast == bLast)
+ {
+ // If we moved a set of blocks that were at the end of
+ // a different try region then we may need to update ebdTryLast
+ for (block = HBtab->ebdTryBeg; block != nullptr; block = block->bbNext)
+ {
+ if (block == bPrev)
+ {
+ // We were contained within it, so shrink its region by
+ // setting its 'last'
+ fgSetTryEnd(HBtab, bPrev);
+ break;
+ }
+ else if (block == HBtab->ebdTryLast->bbNext)
+ {
+ // bPrev does not come after the TryBeg, thus we are larger, and
+ // it is moving with us.
+ break;
+ }
+ }
+ }
+ if (HBtab->ebdHndLast == bLast)
+ {
+ // If we moved a set of blocks that were at the end of
+ // a different handler region then we must update ebdHndLast
+ for (block = HBtab->ebdHndBeg; block != nullptr; block = block->bbNext)
+ {
+ if (block == bPrev)
+ {
+ fgSetHndEnd(HBtab, bPrev);
+ break;
+ }
+ else if (block == HBtab->ebdHndLast->bbNext)
+ {
+ // bPrev does not come after the HndBeg
+ break;
+ }
+ }
+ }
+ }
+ } // end exception table iteration
+
+ // Insert the block(s) we are moving after fgLastBlock
+ fgMoveBlocksAfter(bStart, bLast, insertAfterBlk);
+
+ if (fgFirstFuncletBB == nullptr) // The funclet region isn't set yet
+ {
+ fgFirstFuncletBB = bStart;
+ }
+ else
+ {
+ assert(fgFirstFuncletBB !=
+ insertAfterBlk->bbNext); // We insert at the end, not at the beginning, of the funclet region.
+ }
+
+ // These asserts assume we aren't moving try regions (which we might need to do). Only
+ // try regions can have fall through into or out of the region.
+
+ noway_assert(!bPrev->bbFallsThrough()); // There can be no fall through into a filter or handler region
+ noway_assert(!bLast->bbFallsThrough()); // There can be no fall through out of a handler region
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Create funclets: moved region\n");
+ fgDispHandlerTab();
+ }
+
+ // We have to wait to do this until we've created all the additional regions
+ // Because this relies on ebdEnclosingTryIndex and ebdEnclosingHndIndex
+ if (!FEATURE_EH_FUNCLETS)
+ {
+ // This is really expensive, and quickly becomes O(n^n) with funclets
+ // so only do it once after we've created them (see fgCreateFunclets)
+ if (expensiveDebugCheckLevel >= 2)
+ {
+ fgDebugCheckBBlist();
+ }
+ }
+#endif // DEBUG
+
+#else // FEATURE_EH_FUNCLETS
+
+ for (XTnum = 0, HBtab = compHndBBtab; XTnum < compHndBBtabCount; XTnum++, HBtab++)
+ {
+ if (XTnum == regionIndex)
+ {
+ // Don't update our handler's Last info
+ continue;
+ }
+
+ if (HBtab->ebdTryLast == bLast)
+ {
+ // If we moved a set of blocks that were at the end of
+ // a different try region then we may need to update ebdTryLast
+ for (block = HBtab->ebdTryBeg; block != NULL; block = block->bbNext)
+ {
+ if (block == bPrev)
+ {
+ fgSetTryEnd(HBtab, bPrev);
+ break;
+ }
+ else if (block == HBtab->ebdTryLast->bbNext)
+ {
+ // bPrev does not come after the TryBeg
+ break;
+ }
+ }
+ }
+ if (HBtab->ebdHndLast == bLast)
+ {
+ // If we moved a set of blocks that were at the end of
+ // a different handler region then we must update ebdHndLast
+ for (block = HBtab->ebdHndBeg; block != NULL; block = block->bbNext)
+ {
+ if (block == bPrev)
+ {
+ fgSetHndEnd(HBtab, bPrev);
+ break;
+ }
+ else if (block == HBtab->ebdHndLast->bbNext)
+ {
+ // bPrev does not come after the HndBeg
+ break;
+ }
+ }
+ }
+ } // end exception table iteration
+
+ // We have decided to insert the block(s) after fgLastBlock
+ fgMoveBlocksAfter(bStart, bLast, insertAfterBlk);
+
+ // If bPrev falls through, we will insert a jump to block
+ fgConnectFallThrough(bPrev, bStart);
+
+ // If bLast falls through, we will insert a jump to bNext
+ fgConnectFallThrough(bLast, bNext);
+
+#endif // FEATURE_EH_FUNCLETS
+
+ goto DONE;
+
+FAILURE:
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("*************** Failed fgRelocateEHRange(BB%02u..BB%02u) because %s\n", bStart->bbNum, bLast->bbNum,
+ reason);
+ }
+#endif // DEBUG
+
+ bLast = nullptr;
+
+DONE:
+
+ return bLast;
+}
+
+#if FEATURE_EH_FUNCLETS
+
+#if defined(_TARGET_ARM_)
+
+/*****************************************************************************
+ * We just removed a BBJ_CALLFINALLY/BBJ_ALWAYS pair. If this was the only such pair
+ * targeting the BBJ_ALWAYS target, then we need to clear the BBF_FINALLY_TARGET bit
+ * so that target can also be removed. 'block' is the finally target. Since we just
+ * removed the BBJ_ALWAYS, it better have the BBF_FINALLY_TARGET bit set.
+ */
+
+void Compiler::fgClearFinallyTargetBit(BasicBlock* block)
+{
+ assert((block->bbFlags & BBF_FINALLY_TARGET) != 0);
+
+ for (flowList* pred = block->bbPreds; pred; pred = pred->flNext)
+ {
+ if (pred->flBlock->bbJumpKind == BBJ_ALWAYS && pred->flBlock->bbJumpDest == block)
+ {
+ BasicBlock* pPrev = pred->flBlock->bbPrev;
+ if (pPrev != NULL)
+ {
+ if (pPrev->bbJumpKind == BBJ_CALLFINALLY)
+ {
+ // We found a BBJ_CALLFINALLY / BBJ_ALWAYS that still points to this finally target
+ return;
+ }
+ }
+ }
+ }
+
+ // Didn't find any BBJ_CALLFINALLY / BBJ_ALWAYS that still points here, so clear the bit
+
+ block->bbFlags &= ~BBF_FINALLY_TARGET;
+}
+
+#endif // defined(_TARGET_ARM_)
+
+/*****************************************************************************
+ * Is this an intra-handler control flow edge?
+ *
+ * 'block' is the head block of a funclet/handler region, or .
+ * 'predBlock' is a predecessor block of 'block' in the predecessor list.
+ *
+ * 'predBlock' can legally only be one of three things:
+ * 1. in the same handler region (e.g., the source of a back-edge of a loop from
+ * 'predBlock' to 'block'), including in nested regions within the handler,
+ * 2. if 'block' begins a handler that is a filter-handler, 'predBlock' must be in the 'filter' region,
+ * 3. for other handlers, 'predBlock' must be in the 'try' region corresponding to handler (or any
+ * region nested in the 'try' region).
+ *
+ * Note that on AMD64/ARM64, the BBJ_CALLFINALLY block that calls a finally handler is not
+ * within the corresponding 'try' region: it is placed in the corresponding 'try' region's
+ * parent (which might be the main function body). This is how it is represented to the VM
+ * (with a special "cloned finally" EH table entry).
+ *
+ * Return 'true' for case #1, and 'false' otherwise.
+ */
+bool Compiler::fgIsIntraHandlerPred(BasicBlock* predBlock, BasicBlock* block)
+{
+ // Some simple preconditions (as stated above)
+ assert(!fgFuncletsCreated);
+ assert(fgGetPredForBlock(block, predBlock) != nullptr);
+ assert(block->hasHndIndex());
+
+ EHblkDsc* xtab = ehGetDsc(block->getHndIndex());
+
+#if FEATURE_EH_CALLFINALLY_THUNKS
+ if (xtab->HasFinallyHandler())
+ {
+ assert((xtab->ebdHndBeg == block) || // The normal case
+ ((xtab->ebdHndBeg->bbNext == block) &&
+ (xtab->ebdHndBeg->bbFlags & BBF_INTERNAL))); // After we've already inserted a header block, and we're
+ // trying to decide how to split up the predecessor edges.
+ if (predBlock->bbJumpKind == BBJ_CALLFINALLY)
+ {
+ assert(predBlock->bbJumpDest == block);
+
+ // A BBJ_CALLFINALLY predecessor of the handler can only come from the corresponding try,
+ // not from any EH clauses nested in this handler. However, we represent the BBJ_CALLFINALLY
+ // as being in the 'try' region's parent EH region, which might be the main function body.
+
+ unsigned tryIndex = xtab->ebdEnclosingTryIndex;
+ if (tryIndex == EHblkDsc::NO_ENCLOSING_INDEX)
+ {
+ assert(!predBlock->hasTryIndex());
+ }
+ else
+ {
+ assert(predBlock->hasTryIndex());
+ assert(tryIndex == predBlock->getTryIndex());
+ assert(ehGetDsc(tryIndex)->InTryRegionBBRange(predBlock));
+ }
+ return false;
+ }
+ }
+#endif // FEATURE_EH_CALLFINALLY_THUNKS
+
+ assert(predBlock->hasHndIndex() || predBlock->hasTryIndex());
+
+ // We could search the try region looking for predBlock by using bbInTryRegions
+ // but that does a lexical search for the block, and then assumes funclets
+ // have been created and does a lexical search of all funclets that were pulled
+ // out of the parent try region.
+ // First, funclets haven't been created yet, and even if they had been, we shouldn't
+ // have any funclet directly branching to another funclet (they have to return first).
+ // So we can safely use CheckIsTryRegion instead of bbInTryRegions.
+ // Second, I believe the depth of any EH graph will on average be smaller than the
+ // breadth of the blocks within a try body. Thus it is faster to get our answer by
+ // looping outward over the region graph. However, I have added asserts, as a
+ // precaution, to ensure both algorithms agree. The asserts also check that the only
+ // way to reach the head of a funclet is from the corresponding try body or from
+ // within the funclet (and *not* any nested funclets).
+
+ if (predBlock->hasTryIndex())
+ {
+ // Because the EH clauses are listed inside-out, any nested trys will be at a
+ // lower index than the current try and if there's no enclosing try, tryIndex
+ // will terminate at NO_ENCLOSING_INDEX
+
+ unsigned tryIndex = predBlock->getTryIndex();
+ while (tryIndex < block->getHndIndex())
+ {
+ tryIndex = ehGetEnclosingTryIndex(tryIndex);
+ }
+ // tryIndex should enclose predBlock
+ assert((tryIndex == EHblkDsc::NO_ENCLOSING_INDEX) || ehGetDsc(tryIndex)->InTryRegionBBRange(predBlock));
+
+ // At this point tryIndex is either block's handler's corresponding try body
+ // or some outer try region that contains both predBlock & block or
+ // NO_ENCLOSING_REGION (because there was no try body that encloses both).
+ if (tryIndex == block->getHndIndex())
+ {
+ assert(xtab->InTryRegionBBRange(predBlock));
+ assert(!xtab->InHndRegionBBRange(predBlock));
+ return false;
+ }
+ // tryIndex should enclose block (and predBlock as previously asserted)
+ assert((tryIndex == EHblkDsc::NO_ENCLOSING_INDEX) || ehGetDsc(tryIndex)->InTryRegionBBRange(block));
+ }
+ if (xtab->HasFilter())
+ {
+ // The block is a handler. Check if the pred block is from its filter. We only need to
+ // check the end filter flag, as there is only a single filter for any handler, and we
+ // already know predBlock is a predecessor of block.
+ if (predBlock->bbJumpKind == BBJ_EHFILTERRET)
+ {
+ assert(!xtab->InHndRegionBBRange(predBlock));
+ return false;
+ }
+ }
+ // It is not in our try region (or filter), so it must be within this handler (or try bodies
+ // within this handler)
+ assert(!xtab->InTryRegionBBRange(predBlock));
+ assert(xtab->InHndRegionBBRange(predBlock));
+ return true;
+}
+
+/*****************************************************************************
+ * Does this block, first block of a handler region, have any predecessor edges
+ * that are not from its corresponding try region?
+ */
+
+bool Compiler::fgAnyIntraHandlerPreds(BasicBlock* block)
+{
+ assert(block->hasHndIndex());
+ assert(fgFirstBlockOfHandler(block) == block); // this block is the first block of a handler
+
+ flowList* pred;
+
+ for (pred = block->bbPreds; pred; pred = pred->flNext)
+ {
+ BasicBlock* predBlock = pred->flBlock;
+
+ if (fgIsIntraHandlerPred(predBlock, block))
+ {
+ // We have a predecessor that is not from our try region
+ return true;
+ }
+ }
+
+ return false;
+}
+
+/*****************************************************************************
+ * Introduce a new head block of the handler for the prolog to be put in, ahead
+ * of the current handler head 'block'.
+ * Note that this code has some similarities to fgCreateLoopPreHeader().
+ */
+
+void Compiler::fgInsertFuncletPrologBlock(BasicBlock* block)
+{
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nCreating funclet prolog header for BB%02u\n", block->bbNum);
+ }
+#endif
+
+ assert(block->hasHndIndex());
+ assert(fgFirstBlockOfHandler(block) == block); // this block is the first block of a handler
+
+ /* Allocate a new basic block */
+
+ BasicBlock* newHead = bbNewBasicBlock(BBJ_NONE);
+
+ // In fgComputePreds() we set the BBF_JMP_TARGET and BBF_HAS_LABEL for all of the handler entry points
+ //
+ newHead->bbFlags |= (BBF_INTERNAL | BBF_JMP_TARGET | BBF_HAS_LABEL);
+ newHead->inheritWeight(block);
+ newHead->bbRefs = 0;
+
+ fgInsertBBbefore(block, newHead); // insert the new block in the block list
+ fgExtendEHRegionBefore(block); // Update the EH table to make the prolog block the first block in the block's EH
+ // block.
+
+ // fgExtendEHRegionBefore mucks with the bbRefs without updating the pred list, which we will
+ // do below for this block. So, undo that change.
+ assert(newHead->bbRefs > 0);
+ newHead->bbRefs--;
+ block->bbRefs++;
+
+ // Distribute the pred list between newHead and block. Incoming edges coming from outside
+ // the handler go to the prolog. Edges coming from with the handler are back-edges, and
+ // go to the existing 'block'.
+
+ for (flowList* pred = block->bbPreds; pred; pred = pred->flNext)
+ {
+ BasicBlock* predBlock = pred->flBlock;
+ if (!fgIsIntraHandlerPred(predBlock, block))
+ {
+ // It's a jump from outside the handler; add it to the newHead preds list and remove
+ // it from the block preds list.
+
+ switch (predBlock->bbJumpKind)
+ {
+ case BBJ_CALLFINALLY:
+ noway_assert(predBlock->bbJumpDest == block);
+ predBlock->bbJumpDest = newHead;
+ fgRemoveRefPred(block, predBlock);
+ fgAddRefPred(newHead, predBlock);
+ break;
+
+ default:
+ // The only way into the handler is via a BBJ_CALLFINALLY (to a finally handler), or
+ // via exception handling.
+ noway_assert(false);
+ break;
+ }
+ }
+ }
+
+ assert(nullptr == fgGetPredForBlock(block, newHead));
+ fgAddRefPred(block, newHead);
+
+ assert((newHead->bbFlags & (BBF_INTERNAL | BBF_JMP_TARGET | BBF_HAS_LABEL)) ==
+ (BBF_INTERNAL | BBF_JMP_TARGET | BBF_HAS_LABEL));
+}
+
+/*****************************************************************************
+ *
+ * Every funclet will have a prolog. That prolog will be inserted as the first instructions
+ * in the first block of the funclet. If the prolog is also the head block of a loop, we
+ * would end up with the prolog instructions being executed more than once.
+ * Check for this by searching the predecessor list for loops, and create a new prolog header
+ * block when needed. We detect a loop by looking for any predecessor that isn't in the
+ * handler's try region, since the only way to get into a handler is via that try region.
+ */
+
+void Compiler::fgCreateFuncletPrologBlocks()
+{
+ noway_assert(fgComputePredsDone);
+ noway_assert(!fgDomsComputed); // this function doesn't maintain the dom sets
+ assert(!fgFuncletsCreated);
+
+ bool prologBlocksCreated = false;
+ EHblkDsc* HBtabEnd;
+ EHblkDsc* HBtab;
+
+ for (HBtab = compHndBBtab, HBtabEnd = compHndBBtab + compHndBBtabCount; HBtab < HBtabEnd; HBtab++)
+ {
+ BasicBlock* head = HBtab->ebdHndBeg;
+
+ if (fgAnyIntraHandlerPreds(head))
+ {
+ // We need to create a new block in which to place the prolog, and split the existing
+ // head block predecessor edges into those that should point to the prolog, and those
+ // that shouldn't.
+ //
+ // It's arguable that we should just always do this, and not only when we "need to",
+ // so there aren't two different code paths. However, it's unlikely to be necessary
+ // for catch handlers because they have an incoming argument (the exception object)
+ // that needs to get stored or saved, so back-arcs won't normally go to the head. It's
+ // possible when writing in IL to generate a legal loop (e.g., push an Exception object
+ // on the stack before jumping back to the catch head), but C# probably won't. This will
+ // most commonly only be needed for finallys with a do/while loop at the top of the
+ // finally.
+ //
+ // Note that we don't check filters. This might be a bug, but filters always have a filter
+ // object live on entry, so it's at least unlikely (illegal?) that a loop edge targets the
+ // filter head.
+
+ fgInsertFuncletPrologBlock(head);
+ prologBlocksCreated = true;
+ }
+ }
+
+ if (prologBlocksCreated)
+ {
+ // If we've modified the graph, reset the 'modified' flag, since the dominators haven't
+ // been computed.
+ fgModified = false;
+
+#if DEBUG
+ if (verbose)
+ {
+ JITDUMP("\nAfter fgCreateFuncletPrologBlocks()");
+ fgDispBasicBlocks();
+ fgDispHandlerTab();
+ }
+
+ fgVerifyHandlerTab();
+ fgDebugCheckBBlist();
+#endif // DEBUG
+ }
+}
+
+/*****************************************************************************
+ *
+ * Function to create funclets out of all EH catch/finally/fault blocks.
+ * We only move filter and handler blocks, not try blocks.
+ */
+
+void Compiler::fgCreateFunclets()
+{
+ assert(!fgFuncletsCreated);
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("*************** In fgCreateFunclets()\n");
+ }
+#endif
+
+ fgCreateFuncletPrologBlocks();
+
+ unsigned XTnum;
+ EHblkDsc* HBtab;
+ const unsigned int funcCnt = ehFuncletCount() + 1;
+
+ if (!FitsIn<unsigned short>(funcCnt))
+ {
+ IMPL_LIMITATION("Too many funclets");
+ }
+
+ FuncInfoDsc* funcInfo = new (this, CMK_BasicBlock) FuncInfoDsc[funcCnt];
+
+ unsigned short funcIdx;
+
+ // Setup the root FuncInfoDsc and prepare to start associating
+ // FuncInfoDsc's with their corresponding EH region
+ memset((void*)funcInfo, 0, funcCnt * sizeof(FuncInfoDsc));
+ assert(funcInfo[0].funKind == FUNC_ROOT);
+ funcIdx = 1;
+
+ // Because we iterate from the top to the bottom of the compHndBBtab array, we are iterating
+ // from most nested (innermost) to least nested (outermost) EH region. It would be reasonable
+ // to iterate in the opposite order, but the order of funclets shouldn't matter.
+ //
+ // We move every handler region to the end of the function: each handler will become a funclet.
+ //
+ // Note that fgRelocateEHRange() can add new entries to the EH table. However, they will always
+ // be added *after* the current index, so our iteration here is not invalidated.
+ // It *can* invalidate the compHndBBtab pointer itself, though, if it gets reallocated!
+
+ for (XTnum = 0; XTnum < compHndBBtabCount; XTnum++)
+ {
+ HBtab = ehGetDsc(XTnum); // must re-compute this every loop, since fgRelocateEHRange changes the table
+ if (HBtab->HasFilter())
+ {
+ assert(funcIdx < funcCnt);
+ funcInfo[funcIdx].funKind = FUNC_FILTER;
+ funcInfo[funcIdx].funEHIndex = (unsigned short)XTnum;
+ funcIdx++;
+ }
+ assert(funcIdx < funcCnt);
+ funcInfo[funcIdx].funKind = FUNC_HANDLER;
+ funcInfo[funcIdx].funEHIndex = (unsigned short)XTnum;
+ HBtab->ebdFuncIndex = funcIdx;
+ funcIdx++;
+ fgRelocateEHRange(XTnum, FG_RELOCATE_HANDLER);
+ }
+
+ // We better have populated all of them by now
+ assert(funcIdx == funcCnt);
+
+ // Publish
+ compCurrFuncIdx = 0;
+ compFuncInfos = funcInfo;
+ compFuncInfoCount = (unsigned short)funcCnt;
+
+ fgFuncletsCreated = true;
+
+#if DEBUG
+ if (verbose)
+ {
+ JITDUMP("\nAfter fgCreateFunclets()");
+ fgDispBasicBlocks();
+ fgDispHandlerTab();
+ }
+
+ fgVerifyHandlerTab();
+ fgDebugCheckBBlist();
+#endif // DEBUG
+}
+
+#else // !FEATURE_EH_FUNCLETS
+
+ /*****************************************************************************
+ *
+ * Function called to relocate any and all EH regions.
+ * Only entire consecutive EH regions will be moved and they will be kept together.
+ * Except for the first block, the range can not have any blocks that jump into or out of the region.
+ */
+
+ bool Compiler::fgRelocateEHRegions()
+ {
+ bool result = false; // Our return value
+
+#ifdef DEBUG
+ if (verbose)
+ printf("*************** In fgRelocateEHRegions()\n");
+#endif
+
+ if (fgCanRelocateEHRegions)
+ {
+ unsigned XTnum;
+ EHblkDsc* HBtab;
+
+ for (XTnum = 0, HBtab = compHndBBtab; XTnum < compHndBBtabCount; XTnum++, HBtab++)
+ {
+ // Nested EH regions cannot be moved.
+ // Also we don't want to relocate an EH region that has a filter
+ if ((HBtab->ebdHandlerNestingLevel == 0) && !HBtab->HasFilter())
+ {
+ bool movedTry = false;
+#if DEBUG
+ bool movedHnd = false;
+#endif // DEBUG
+
+ // Only try to move the outermost try region
+ if (HBtab->ebdEnclosingTryIndex == EHblkDsc::NO_ENCLOSING_INDEX)
+ {
+ // Move the entire try region if it can be moved
+ if (HBtab->ebdTryBeg->isRunRarely())
+ {
+ BasicBlock* bTryLastBB = fgRelocateEHRange(XTnum, FG_RELOCATE_TRY);
+ if (bTryLastBB != NULL)
+ {
+ result = true;
+ movedTry = true;
+ }
+ }
+#if DEBUG
+ if (verbose && movedTry)
+ {
+ printf("\nAfter relocating an EH try region");
+ fgDispBasicBlocks();
+ fgDispHandlerTab();
+
+ // Make sure that the predecessor lists are accurate
+ if (expensiveDebugCheckLevel >= 2)
+ {
+ fgDebugCheckBBlist();
+ }
+ }
+#endif // DEBUG
+ }
+
+ // Currently it is not good to move the rarely run handler regions to the end of the method
+ // because fgDetermineFirstColdBlock() must put the start of any handler region in the hot section.
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if 0
+ // Now try to move the entire handler region if it can be moved.
+ // Don't try to move a finally handler unless we already moved the try region.
+ if (HBtab->ebdHndBeg->isRunRarely() &&
+ !HBtab->ebdHndBeg->hasTryIndex() &&
+ (movedTry || !HBtab->HasFinallyHandler()))
+ {
+ BasicBlock* bHndLastBB = fgRelocateEHRange(XTnum, FG_RELOCATE_HANDLER);
+ if (bHndLastBB != NULL)
+ {
+ result = true;
+ movedHnd = true;
+ }
+ }
+#endif // 0
+
+#if DEBUG
+ if (verbose && movedHnd)
+ {
+ printf("\nAfter relocating an EH handler region");
+ fgDispBasicBlocks();
+ fgDispHandlerTab();
+
+ // Make sure that the predecessor lists are accurate
+ if (expensiveDebugCheckLevel >= 2)
+ {
+ fgDebugCheckBBlist();
+ }
+ }
+#endif // DEBUG
+ }
+ }
+ }
+
+#if DEBUG
+ fgVerifyHandlerTab();
+
+ if (verbose && result)
+ {
+ printf("\nAfter fgRelocateEHRegions()");
+ fgDispBasicBlocks();
+ fgDispHandlerTab();
+ // Make sure that the predecessor lists are accurate
+ fgDebugCheckBBlist();
+ }
+#endif // DEBUG
+
+ return result;
+ }
+
+#endif // !FEATURE_EH_FUNCLETS
+
+bool flowList::setEdgeWeightMinChecked(BasicBlock::weight_t newWeight, BasicBlock::weight_t slop, bool* wbUsedSlop)
+{
+ bool result = false;
+ if ((newWeight <= flEdgeWeightMax) && (newWeight >= flEdgeWeightMin))
+ {
+ flEdgeWeightMin = newWeight;
+ result = true;
+ }
+ else if (slop > 0)
+ {
+ // We allow for a small amount of inaccuracy in block weight counts.
+ if (flEdgeWeightMax < newWeight)
+ {
+ // We have already determined that this edge's weight
+ // is less than newWeight, so we just allow for the slop
+ if (newWeight <= (flEdgeWeightMax + slop))
+ {
+ result = true;
+
+ if (flEdgeWeightMax != 0)
+ {
+ // We will raise flEdgeWeightMin and Max towards newWeight
+ flEdgeWeightMin = flEdgeWeightMax;
+ flEdgeWeightMax = newWeight;
+ }
+
+ if (wbUsedSlop != nullptr)
+ {
+ *wbUsedSlop = true;
+ }
+ }
+ }
+ else
+ {
+ assert(flEdgeWeightMin > newWeight);
+
+ // We have already determined that this edge's weight
+ // is more than newWeight, so we just allow for the slop
+ if ((newWeight + slop) >= flEdgeWeightMin)
+ {
+ result = true;
+
+ assert(flEdgeWeightMax != 0);
+
+ // We will lower flEdgeWeightMin towards newWeight
+ flEdgeWeightMin = newWeight;
+
+ if (wbUsedSlop != nullptr)
+ {
+ *wbUsedSlop = true;
+ }
+ }
+ }
+
+ // If we are returning true then we should have adjusted the range so that
+ // the newWeight is in new range [Min..Max] or fgEdjeWeightMax is zero.
+ // Also we should have set wbUsedSlop to true.
+ if (result == true)
+ {
+ assert((flEdgeWeightMax == 0) || ((newWeight <= flEdgeWeightMax) && (newWeight >= flEdgeWeightMin)));
+
+ if (wbUsedSlop != nullptr)
+ {
+ assert(*wbUsedSlop == true);
+ }
+ }
+ }
+
+#if DEBUG
+ if (result == false)
+ {
+ result = false; // break here
+ }
+#endif // DEBUG
+
+ return result;
+}
+
+bool flowList::setEdgeWeightMaxChecked(BasicBlock::weight_t newWeight, BasicBlock::weight_t slop, bool* wbUsedSlop)
+{
+ bool result = false;
+ if ((newWeight >= flEdgeWeightMin) && (newWeight <= flEdgeWeightMax))
+ {
+ flEdgeWeightMax = newWeight;
+ result = true;
+ }
+ else if (slop > 0)
+ {
+ // We allow for a small amount of inaccuracy in block weight counts.
+ if (flEdgeWeightMax < newWeight)
+ {
+ // We have already determined that this edge's weight
+ // is less than newWeight, so we just allow for the slop
+ if (newWeight <= (flEdgeWeightMax + slop))
+ {
+ result = true;
+
+ if (flEdgeWeightMax != 0)
+ {
+ // We will allow this to raise flEdgeWeightMax towards newWeight
+ flEdgeWeightMax = newWeight;
+ }
+
+ if (wbUsedSlop != nullptr)
+ {
+ *wbUsedSlop = true;
+ }
+ }
+ }
+ else
+ {
+ assert(flEdgeWeightMin > newWeight);
+
+ // We have already determined that this edge's weight
+ // is more than newWeight, so we just allow for the slop
+ if ((newWeight + slop) >= flEdgeWeightMin)
+ {
+ result = true;
+
+ assert(flEdgeWeightMax != 0);
+
+ // We will allow this to lower flEdgeWeightMin and Max towards newWeight
+ flEdgeWeightMax = flEdgeWeightMin;
+ flEdgeWeightMin = newWeight;
+
+ if (wbUsedSlop != nullptr)
+ {
+ *wbUsedSlop = true;
+ }
+ }
+ }
+
+ // If we are returning true then we should have adjusted the range so that
+ // the newWeight is in new range [Min..Max] or fgEdjeWeightMax is zero
+ // Also we should have set wbUsedSlop to true, unless it is NULL
+ if (result == true)
+ {
+ assert((flEdgeWeightMax == 0) || ((newWeight <= flEdgeWeightMax) && (newWeight >= flEdgeWeightMin)));
+
+ assert((wbUsedSlop == nullptr) || (*wbUsedSlop == true));
+ }
+ }
+
+#if DEBUG
+ if (result == false)
+ {
+ result = false; // break here
+ }
+#endif // DEBUG
+
+ return result;
+}
+
+#ifdef DEBUG
+void Compiler::fgPrintEdgeWeights()
+{
+ BasicBlock* bSrc;
+ BasicBlock* bDst;
+ flowList* edge;
+
+ // Print out all of the edge weights
+ for (bDst = fgFirstBB; bDst != nullptr; bDst = bDst->bbNext)
+ {
+ if (bDst->bbPreds != nullptr)
+ {
+ printf(" Edge weights into BB%02u :", bDst->bbNum);
+ for (edge = bDst->bbPreds; edge != nullptr; edge = edge->flNext)
+ {
+ bSrc = edge->flBlock;
+ // This is the control flow edge (bSrc -> bDst)
+
+ printf("BB%02u ", bSrc->bbNum);
+
+ if (edge->flEdgeWeightMin < BB_MAX_WEIGHT)
+ {
+ printf("(%s", refCntWtd2str(edge->flEdgeWeightMin));
+ }
+ else
+ {
+ printf("(MAX");
+ }
+ if (edge->flEdgeWeightMin != edge->flEdgeWeightMax)
+ {
+ if (edge->flEdgeWeightMax < BB_MAX_WEIGHT)
+ {
+ printf("..%s", refCntWtd2str(edge->flEdgeWeightMax));
+ }
+ else
+ {
+ printf("..MAX");
+ }
+ }
+ printf(")");
+ if (edge->flNext != nullptr)
+ {
+ printf(", ");
+ }
+ }
+ printf("\n");
+ }
+ }
+}
+#endif // DEBUG
+
+// return true if there is a possibility that the method has a loop (a backedge is present)
+bool Compiler::fgMightHaveLoop()
+{
+ // Don't use a BlockSet for this temporary bitset of blocks: we don't want to have to call EnsureBasicBlockEpoch()
+ // and potentially change the block epoch.
+
+ BitVecTraits blockVecTraits(fgBBNumMax + 1, this);
+ BitVec BLOCKSET_INIT_NOCOPY(blocksSeen, BitVecOps::MakeEmpty(&blockVecTraits));
+
+ for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
+ {
+ BitVecOps::AddElemD(&blockVecTraits, blocksSeen, block->bbNum);
+
+ AllSuccessorIter succsEnd = block->GetAllSuccs(this).end();
+ for (AllSuccessorIter succs = block->GetAllSuccs(this).begin(); succs != succsEnd; ++succs)
+ {
+ BasicBlock* succ = (*succs);
+ if (BitVecOps::IsMember(&blockVecTraits, blocksSeen, succ->bbNum))
+ {
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+void Compiler::fgComputeEdgeWeights()
+{
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("*************** In fgComputeEdgeWeights()\n");
+ }
+#endif // DEBUG
+
+ if (fgIsUsingProfileWeights() == false)
+ {
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("fgComputeEdgeWeights() we do not have any profile data so we are not using the edge weights\n");
+ }
+#endif // DEBUG
+ fgHaveValidEdgeWeights = false;
+ fgCalledWeight = BB_UNITY_WEIGHT;
+ }
+
+#if DEBUG
+ if (verbose)
+ {
+ fgDispBasicBlocks();
+ printf("\n");
+ }
+#endif // DEBUG
+
+ BasicBlock* bSrc;
+ BasicBlock* bDst;
+ flowList* edge;
+ unsigned iterations = 0;
+ unsigned goodEdgeCountCurrent = 0;
+ unsigned goodEdgeCountPrevious = 0;
+ bool inconsistentProfileData = false;
+ bool hasIncompleteEdgeWeights = false;
+ unsigned numEdges = 0;
+ bool usedSlop = false;
+ bool changed;
+ bool modified;
+
+ BasicBlock::weight_t returnWeight;
+ BasicBlock::weight_t slop;
+
+ // If we have any blocks that did not have profile derived weight
+ // we will try to fix their weight up here
+ //
+ modified = false;
+ do // while (changed)
+ {
+ changed = false;
+ returnWeight = 0;
+ iterations++;
+
+ for (bDst = fgFirstBB; bDst != nullptr; bDst = bDst->bbNext)
+ {
+ if (((bDst->bbFlags & BBF_PROF_WEIGHT) == 0) && (bDst->bbPreds != nullptr))
+ {
+ BasicBlock* bOnlyNext;
+
+ // This block does not have a profile derived weight
+ //
+ BasicBlock::weight_t newWeight = BB_MAX_WEIGHT;
+
+ if (bDst->countOfInEdges() == 1)
+ {
+ // Only one block flows into bDst
+ bSrc = bDst->bbPreds->flBlock;
+
+ // Does this block flow into only one other block
+ if (bSrc->bbJumpKind == BBJ_NONE)
+ {
+ bOnlyNext = bSrc->bbNext;
+ }
+ else if (bSrc->bbJumpKind == BBJ_ALWAYS)
+ {
+ bOnlyNext = bSrc->bbJumpDest;
+ }
+ else
+ {
+ bOnlyNext = nullptr;
+ }
+
+ if ((bOnlyNext == bDst) && ((bSrc->bbFlags & BBF_PROF_WEIGHT) != 0))
+ {
+ // We know the exact weight of bDst
+ newWeight = bSrc->bbWeight;
+ }
+ }
+
+ // Does this block flow into only one other block
+ if (bDst->bbJumpKind == BBJ_NONE)
+ {
+ bOnlyNext = bDst->bbNext;
+ }
+ else if (bDst->bbJumpKind == BBJ_ALWAYS)
+ {
+ bOnlyNext = bDst->bbJumpDest;
+ }
+ else
+ {
+ bOnlyNext = nullptr;
+ }
+
+ if ((bOnlyNext != nullptr) && (bOnlyNext->bbPreds != nullptr))
+ {
+ // Does only one block flow into bOnlyNext
+ if (bOnlyNext->countOfInEdges() == 1)
+ {
+ noway_assert(bOnlyNext->bbPreds->flBlock == bDst);
+
+ // We know the exact weight of bDst
+ newWeight = bOnlyNext->bbWeight;
+ }
+ }
+
+ if ((newWeight != BB_MAX_WEIGHT) && (bDst->bbWeight != newWeight))
+ {
+ changed = true;
+ modified = true;
+ bDst->bbWeight = newWeight;
+ if (newWeight == 0)
+ {
+ bDst->bbFlags |= BBF_RUN_RARELY;
+ }
+ else
+ {
+ bDst->bbFlags &= ~BBF_RUN_RARELY;
+ }
+ }
+ }
+
+ // Sum up the weights of all of the return blocks and throw blocks
+ // This is used when we have a back-edge into block 1
+ //
+ if (((bDst->bbFlags & BBF_PROF_WEIGHT) != 0) &&
+ ((bDst->bbJumpKind == BBJ_RETURN) || (bDst->bbJumpKind == BBJ_THROW)))
+ {
+ returnWeight += bDst->bbWeight;
+ }
+ }
+ }
+ // Generally when we synthesize profile estimates we do it in a way where this algorithm will converge
+ // but downstream opts that remove conditional branches may create a situation where this is not the case.
+ // For instance a loop that becomes unreachable creates a sort of 'ring oscillator' (See test b539509)
+ while (changed && iterations < 10);
+
+#if DEBUG
+ if (verbose && modified)
+ {
+ printf("fgComputeEdgeWeights() adjusted the weight of some blocks\n");
+ fgDispBasicBlocks();
+ printf("\n");
+ }
+#endif
+
+ // When we are not using profile data we have already setup fgCalledWeight
+ // only set it here if we are using profile data
+ //
+ if (fgIsUsingProfileWeights())
+ {
+ // If the first block has one ref then it's weight is the fgCalledWeight
+ // otherwise we have backedge's into the first block so instead
+ // we use the sum of the return block weights.
+ // If the profile data has a 0 for the returnWeoght
+ // then just use the first block weight rather than the 0
+ //
+ if ((fgFirstBB->countOfInEdges() == 1) || (returnWeight == 0))
+ {
+ fgCalledWeight = fgFirstBB->bbWeight;
+ }
+ else
+ {
+ fgCalledWeight = returnWeight;
+ }
+ }
+
+ // Now we will compute the initial flEdgeWeightMin and flEdgeWeightMax values
+ for (bDst = fgFirstBB; bDst != nullptr; bDst = bDst->bbNext)
+ {
+ BasicBlock::weight_t bDstWeight = bDst->bbWeight;
+
+ // We subtract out the called count so that bDstWeight is
+ // the sum of all edges that go into this block from this method.
+ //
+ if (bDst == fgFirstBB)
+ {
+ bDstWeight -= fgCalledWeight;
+ }
+
+ for (edge = bDst->bbPreds; edge != nullptr; edge = edge->flNext)
+ {
+ bool assignOK = true;
+
+ bSrc = edge->flBlock;
+ // We are processing the control flow edge (bSrc -> bDst)
+
+ numEdges++;
+
+ //
+ // If the bSrc or bDst blocks do not have exact profile weights
+ // then we must reset any values that they currently have
+ //
+
+ if (((bSrc->bbFlags & BBF_PROF_WEIGHT) == 0) || ((bDst->bbFlags & BBF_PROF_WEIGHT) == 0))
+ {
+ edge->flEdgeWeightMin = BB_ZERO_WEIGHT;
+ edge->flEdgeWeightMax = BB_MAX_WEIGHT;
+ }
+
+ slop = BasicBlock::GetSlopFraction(bSrc, bDst) + 1;
+ switch (bSrc->bbJumpKind)
+ {
+ case BBJ_ALWAYS:
+ case BBJ_EHCATCHRET:
+ case BBJ_NONE:
+ case BBJ_CALLFINALLY:
+ // We know the exact edge weight
+ assignOK &= edge->setEdgeWeightMinChecked(bSrc->bbWeight, slop, &usedSlop);
+ assignOK &= edge->setEdgeWeightMaxChecked(bSrc->bbWeight, slop, &usedSlop);
+ break;
+
+ case BBJ_COND:
+ case BBJ_SWITCH:
+ case BBJ_EHFINALLYRET:
+ case BBJ_EHFILTERRET:
+ if (edge->flEdgeWeightMax > bSrc->bbWeight)
+ {
+ // The maximum edge weight to block can't be greater than the weight of bSrc
+ assignOK &= edge->setEdgeWeightMaxChecked(bSrc->bbWeight, slop, &usedSlop);
+ }
+ break;
+
+ default:
+ // We should never have an edge that starts from one of these jump kinds
+ noway_assert(!"Unexpected bbJumpKind");
+ break;
+ }
+
+ // The maximum edge weight to block can't be greater than the weight of bDst
+ if (edge->flEdgeWeightMax > bDstWeight)
+ {
+ assignOK &= edge->setEdgeWeightMaxChecked(bDstWeight, slop, &usedSlop);
+ }
+
+ if (!assignOK)
+ {
+ // Here we have inconsistent profile data
+ inconsistentProfileData = true;
+ // No point in continuing
+ goto EARLY_EXIT;
+ }
+ }
+ }
+
+ fgEdgeCount = numEdges;
+
+ iterations = 0;
+
+ do
+ {
+ iterations++;
+ goodEdgeCountPrevious = goodEdgeCountCurrent;
+ goodEdgeCountCurrent = 0;
+ hasIncompleteEdgeWeights = false;
+
+ for (bDst = fgFirstBB; bDst != nullptr; bDst = bDst->bbNext)
+ {
+ for (edge = bDst->bbPreds; edge != nullptr; edge = edge->flNext)
+ {
+ bool assignOK = true;
+
+ // We are processing the control flow edge (bSrc -> bDst)
+ bSrc = edge->flBlock;
+
+ slop = BasicBlock::GetSlopFraction(bSrc, bDst) + 1;
+ if (bSrc->bbJumpKind == BBJ_COND)
+ {
+ int diff;
+ flowList* otherEdge;
+ if (bSrc->bbNext == bDst)
+ {
+ otherEdge = fgGetPredForBlock(bSrc->bbJumpDest, bSrc);
+ }
+ else
+ {
+ otherEdge = fgGetPredForBlock(bSrc->bbNext, bSrc);
+ }
+ noway_assert(edge->flEdgeWeightMin <= edge->flEdgeWeightMax);
+ noway_assert(otherEdge->flEdgeWeightMin <= otherEdge->flEdgeWeightMax);
+
+ // Adjust edge->flEdgeWeightMin up or adjust otherEdge->flEdgeWeightMax down
+ diff = ((int)bSrc->bbWeight) - ((int)edge->flEdgeWeightMin + (int)otherEdge->flEdgeWeightMax);
+ if (diff > 0)
+ {
+ assignOK &= edge->setEdgeWeightMinChecked(edge->flEdgeWeightMin + diff, slop, &usedSlop);
+ }
+ else if (diff < 0)
+ {
+ assignOK &=
+ otherEdge->setEdgeWeightMaxChecked(otherEdge->flEdgeWeightMax + diff, slop, &usedSlop);
+ }
+
+ // Adjust otherEdge->flEdgeWeightMin up or adjust edge->flEdgeWeightMax down
+ diff = ((int)bSrc->bbWeight) - ((int)otherEdge->flEdgeWeightMin + (int)edge->flEdgeWeightMax);
+ if (diff > 0)
+ {
+ assignOK &=
+ otherEdge->setEdgeWeightMinChecked(otherEdge->flEdgeWeightMin + diff, slop, &usedSlop);
+ }
+ else if (diff < 0)
+ {
+ assignOK &= edge->setEdgeWeightMaxChecked(edge->flEdgeWeightMax + diff, slop, &usedSlop);
+ }
+
+ if (!assignOK)
+ {
+ // Here we have inconsistent profile data
+ inconsistentProfileData = true;
+ // No point in continuing
+ goto EARLY_EXIT;
+ }
+#ifdef DEBUG
+ // Now edge->flEdgeWeightMin and otherEdge->flEdgeWeightMax) should add up to bSrc->bbWeight
+ diff = ((int)bSrc->bbWeight) - ((int)edge->flEdgeWeightMin + (int)otherEdge->flEdgeWeightMax);
+ noway_assert((-((int)slop) <= diff) && (diff <= ((int)slop)));
+
+ // Now otherEdge->flEdgeWeightMin and edge->flEdgeWeightMax) should add up to bSrc->bbWeight
+ diff = ((int)bSrc->bbWeight) - ((int)otherEdge->flEdgeWeightMin + (int)edge->flEdgeWeightMax);
+ noway_assert((-((int)slop) <= diff) && (diff <= ((int)slop)));
+#endif // DEBUG
+ }
+ }
+ }
+
+ for (bDst = fgFirstBB; bDst != nullptr; bDst = bDst->bbNext)
+ {
+ BasicBlock::weight_t bDstWeight = bDst->bbWeight;
+
+ if (bDstWeight == BB_MAX_WEIGHT)
+ {
+ inconsistentProfileData = true;
+ // No point in continuing
+ goto EARLY_EXIT;
+ }
+ else
+ {
+ // We subtract out the called count so that bDstWeight is
+ // the sum of all edges that go into this block from this method.
+ //
+ if (bDst == fgFirstBB)
+ {
+ bDstWeight -= fgCalledWeight;
+ }
+
+ UINT64 minEdgeWeightSum = 0;
+ UINT64 maxEdgeWeightSum = 0;
+
+ // Calculate the sums of the minimum and maximum edge weights
+ for (edge = bDst->bbPreds; edge != nullptr; edge = edge->flNext)
+ {
+ // We are processing the control flow edge (bSrc -> bDst)
+ bSrc = edge->flBlock;
+
+ maxEdgeWeightSum += edge->flEdgeWeightMax;
+ minEdgeWeightSum += edge->flEdgeWeightMin;
+ }
+
+ // maxEdgeWeightSum is the sum of all flEdgeWeightMax values into bDst
+ // minEdgeWeightSum is the sum of all flEdgeWeightMin values into bDst
+
+ for (edge = bDst->bbPreds; edge != nullptr; edge = edge->flNext)
+ {
+ bool assignOK = true;
+
+ // We are processing the control flow edge (bSrc -> bDst)
+ bSrc = edge->flBlock;
+ slop = BasicBlock::GetSlopFraction(bSrc, bDst) + 1;
+
+ // otherMaxEdgesWeightSum is the sum of all of the other edges flEdgeWeightMax values
+ // This can be used to compute a lower bound for our minimum edge weight
+ noway_assert(maxEdgeWeightSum >= edge->flEdgeWeightMax);
+ UINT64 otherMaxEdgesWeightSum = maxEdgeWeightSum - edge->flEdgeWeightMax;
+
+ // otherMinEdgesWeightSum is the sum of all of the other edges flEdgeWeightMin values
+ // This can be used to compute an upper bound for our maximum edge weight
+ noway_assert(minEdgeWeightSum >= edge->flEdgeWeightMin);
+ UINT64 otherMinEdgesWeightSum = minEdgeWeightSum - edge->flEdgeWeightMin;
+
+ if (bDstWeight >= otherMaxEdgesWeightSum)
+ {
+ // minWeightCalc is our minWeight when every other path to bDst takes it's flEdgeWeightMax value
+ BasicBlock::weight_t minWeightCalc =
+ (BasicBlock::weight_t)(bDstWeight - otherMaxEdgesWeightSum);
+ if (minWeightCalc > edge->flEdgeWeightMin)
+ {
+ assignOK &= edge->setEdgeWeightMinChecked(minWeightCalc, slop, &usedSlop);
+ }
+ }
+
+ if (bDstWeight >= otherMinEdgesWeightSum)
+ {
+ // maxWeightCalc is our maxWeight when every other path to bDst takes it's flEdgeWeightMin value
+ BasicBlock::weight_t maxWeightCalc =
+ (BasicBlock::weight_t)(bDstWeight - otherMinEdgesWeightSum);
+ if (maxWeightCalc < edge->flEdgeWeightMax)
+ {
+ assignOK &= edge->setEdgeWeightMaxChecked(maxWeightCalc, slop, &usedSlop);
+ }
+ }
+
+ if (!assignOK)
+ {
+ // Here we have inconsistent profile data
+ inconsistentProfileData = true;
+ // No point in continuing
+ goto EARLY_EXIT;
+ }
+
+ // When flEdgeWeightMin equals flEdgeWeightMax we have a "good" edge weight
+ if (edge->flEdgeWeightMin == edge->flEdgeWeightMax)
+ {
+ // Count how many "good" edge weights we have
+ // Each time through we should have more "good" weights
+ // We exit the while loop when no longer find any new "good" edges
+ goodEdgeCountCurrent++;
+ }
+ else
+ {
+ // Remember that we have seen at least one "Bad" edge weight
+ // so that we will repeat the while loop again
+ hasIncompleteEdgeWeights = true;
+ }
+ }
+ }
+ }
+
+ if (inconsistentProfileData)
+ {
+ hasIncompleteEdgeWeights = true;
+ break;
+ }
+
+ if (numEdges == goodEdgeCountCurrent)
+ {
+ noway_assert(hasIncompleteEdgeWeights == false);
+ break;
+ }
+
+ } while (hasIncompleteEdgeWeights && (goodEdgeCountCurrent > goodEdgeCountPrevious) && (iterations < 8));
+
+EARLY_EXIT:;
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ if (inconsistentProfileData)
+ {
+ printf("fgComputeEdgeWeights() found inconsistent profile data, not using the edge weights\n");
+ }
+ else
+ {
+ if (hasIncompleteEdgeWeights)
+ {
+ printf("fgComputeEdgeWeights() was able to compute exact edge weights for %3d of the %3d edges, using "
+ "%d passes.\n",
+ goodEdgeCountCurrent, numEdges, iterations);
+ }
+ else
+ {
+ printf("fgComputeEdgeWeights() was able to compute exact edge weights for all of the %3d edges, using "
+ "%d passes.\n",
+ numEdges, iterations);
+ }
+
+ fgPrintEdgeWeights();
+ }
+ }
+#endif // DEBUG
+
+ fgSlopUsedInEdgeWeights = usedSlop;
+ fgRangeUsedInEdgeWeights = false;
+
+ // See if any edge weight are expressed in [min..max] form
+
+ for (bDst = fgFirstBB; bDst != nullptr; bDst = bDst->bbNext)
+ {
+ if (bDst->bbPreds != nullptr)
+ {
+ for (edge = bDst->bbPreds; edge != nullptr; edge = edge->flNext)
+ {
+ bSrc = edge->flBlock;
+ // This is the control flow edge (bSrc -> bDst)
+
+ if (edge->flEdgeWeightMin != edge->flEdgeWeightMax)
+ {
+ fgRangeUsedInEdgeWeights = true;
+ break;
+ }
+ }
+ if (fgRangeUsedInEdgeWeights)
+ {
+ break;
+ }
+ }
+ }
+
+ fgHaveValidEdgeWeights = !inconsistentProfileData;
+ fgEdgeWeightsComputed = true;
+}
+
+// fgOptimizeBranchToEmptyUnconditional:
+// optimize a jump to an empty block which ends in an unconditional branch.
+// Args:
+// block: source block
+// bDest: destination
+// Returns: true if we changed the code
+//
+bool Compiler::fgOptimizeBranchToEmptyUnconditional(BasicBlock* block, BasicBlock* bDest)
+{
+ bool optimizeJump = true;
+
+ assert(bDest->isEmpty());
+ assert(bDest->bbJumpKind == BBJ_ALWAYS);
+
+ // We do not optimize jumps between two different try regions.
+ // However jumping to a block that is not in any try region is OK
+ //
+ if (bDest->hasTryIndex() && !BasicBlock::sameTryRegion(block, bDest))
+ {
+ optimizeJump = false;
+ }
+
+ // Don't optimize a jump to a removed block
+ if (bDest->bbJumpDest->bbFlags & BBF_REMOVED)
+ {
+ optimizeJump = false;
+ }
+
+#if FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
+ // Don't optimize a jump to a finally target. For BB1->BB2->BB3, where
+ // BB2 is a finally target, if we changed BB1 to jump directly to BB3,
+ // it would skip the finally target. BB1 might be a BBJ_ALWAYS block part
+ // of a BBJ_CALLFINALLY/BBJ_ALWAYS pair, so changing the finally target
+ // would change the unwind behavior.
+ if (bDest->bbFlags & BBF_FINALLY_TARGET)
+ {
+ optimizeJump = false;
+ }
+#endif // FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
+
+ // Must optimize jump if bDest has been removed
+ //
+ if (bDest->bbFlags & BBF_REMOVED)
+ {
+ optimizeJump = true;
+ }
+
+ // If we are optimizing using real profile weights
+ // then don't optimize a conditional jump to an unconditional jump
+ // until after we have computed the edge weights
+ //
+ if (fgIsUsingProfileWeights() && !fgEdgeWeightsComputed)
+ {
+ fgNeedsUpdateFlowGraph = true;
+ optimizeJump = false;
+ }
+
+ if (optimizeJump)
+ {
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nOptimizing a jump to an unconditional jump (BB%02u -> BB%02u -> BB%02u)\n", block->bbNum,
+ bDest->bbNum, bDest->bbJumpDest->bbNum);
+ }
+#endif // DEBUG
+
+ //
+ // When we optimize a branch to branch we need to update the profile weight
+ // of bDest by subtracting out the block/edge weight of the path that is being optimized.
+ //
+ if (fgHaveValidEdgeWeights && ((bDest->bbFlags & BBF_PROF_WEIGHT) != 0))
+ {
+ flowList* edge1 = fgGetPredForBlock(bDest, block);
+ noway_assert(edge1 != nullptr);
+
+ BasicBlock::weight_t edgeWeight;
+
+ if (edge1->flEdgeWeightMin != edge1->flEdgeWeightMax)
+ {
+ //
+ // We only have an estimate for the edge weight
+ //
+ edgeWeight = (edge1->flEdgeWeightMin + edge1->flEdgeWeightMax) / 2;
+ //
+ // Clear the profile weight flag
+ //
+ bDest->bbFlags &= ~BBF_PROF_WEIGHT;
+ }
+ else
+ {
+ //
+ // We only have the exact edge weight
+ //
+ edgeWeight = edge1->flEdgeWeightMin;
+ }
+
+ //
+ // Update the bDest->bbWeight
+ //
+ if (bDest->bbWeight > edgeWeight)
+ {
+ bDest->bbWeight -= edgeWeight;
+ }
+ else
+ {
+ bDest->bbWeight = BB_ZERO_WEIGHT;
+ bDest->bbFlags |= BBF_RUN_RARELY; // Set the RarelyRun flag
+ }
+
+ flowList* edge2 = fgGetPredForBlock(bDest->bbJumpDest, bDest);
+
+ if (edge2 != nullptr)
+ {
+ //
+ // Update the edge2 min/max weights
+ //
+ if (edge2->flEdgeWeightMin > edge1->flEdgeWeightMin)
+ {
+ edge2->flEdgeWeightMin -= edge1->flEdgeWeightMin;
+ }
+ else
+ {
+ edge2->flEdgeWeightMin = BB_ZERO_WEIGHT;
+ }
+
+ if (edge2->flEdgeWeightMax > edge1->flEdgeWeightMin)
+ {
+ edge2->flEdgeWeightMax -= edge1->flEdgeWeightMin;
+ }
+ else
+ {
+ edge2->flEdgeWeightMax = BB_ZERO_WEIGHT;
+ }
+ }
+ }
+
+ // Optimize the JUMP to empty unconditional JUMP to go to the new target
+ block->bbJumpDest = bDest->bbJumpDest;
+
+ fgAddRefPred(bDest->bbJumpDest, block, fgRemoveRefPred(bDest, block));
+
+ return true;
+ }
+ return false;
+}
+
+// fgOptimizeEmptyBlock:
+// Does flow optimization of an empty block (can remove it in some cases)
+//
+// Args:
+// block: an empty block
+// Returns: true if we changed the code
+
+bool Compiler::fgOptimizeEmptyBlock(BasicBlock* block)
+{
+ assert(block->isEmpty());
+
+ BasicBlock* bPrev = block->bbPrev;
+
+ switch (block->bbJumpKind)
+ {
+ case BBJ_COND:
+ case BBJ_SWITCH:
+ case BBJ_THROW:
+
+ /* can never happen */
+ noway_assert(!"Conditional, switch, or throw block with empty body!");
+ break;
+
+ case BBJ_CALLFINALLY:
+ case BBJ_RETURN:
+ case BBJ_EHCATCHRET:
+ case BBJ_EHFINALLYRET:
+ case BBJ_EHFILTERRET:
+
+ /* leave them as is */
+ /* some compilers generate multiple returns and put all of them at the end -
+ * to solve that we need the predecessor list */
+
+ break;
+
+ case BBJ_ALWAYS:
+
+ // A GOTO cannot be to the next block since that
+ // should have been fixed by the optimization above
+ // An exception is made for a jump from Hot to Cold
+ noway_assert(block->bbJumpDest != block->bbNext || ((bPrev != nullptr) && bPrev->isBBCallAlwaysPair()) ||
+ fgInDifferentRegions(block, block->bbNext));
+
+ /* Cannot remove the first BB */
+ if (!bPrev)
+ {
+ break;
+ }
+
+ /* Do not remove a block that jumps to itself - used for while (true){} */
+ if (block->bbJumpDest == block)
+ {
+ break;
+ }
+
+ /* Empty GOTO can be removed iff bPrev is BBJ_NONE */
+ if (bPrev->bbJumpKind != BBJ_NONE)
+ {
+ break;
+ }
+
+ // can't allow fall through into cold code
+ if (block->bbNext == fgFirstColdBlock)
+ {
+ break;
+ }
+
+ /* Can fall through since this is similar with removing
+ * a BBJ_NONE block, only the successor is different */
+
+ __fallthrough;
+
+ case BBJ_NONE:
+
+ /* special case if this is the first BB */
+ if (!bPrev)
+ {
+ assert(block == fgFirstBB);
+ }
+ else
+ {
+ /* If this block follows a BBJ_CALLFINALLY do not remove it
+ * (because we don't know who may jump to it) */
+ if (bPrev->bbJumpKind == BBJ_CALLFINALLY)
+ {
+ break;
+ }
+ }
+
+#if FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
+ /* Don't remove finally targets */
+ if (block->bbFlags & BBF_FINALLY_TARGET)
+ break;
+#endif // FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
+
+#if FEATURE_EH_FUNCLETS
+ /* Don't remove an empty block that is in a different EH region
+ * from its successor block, if the block is the target of a
+ * catch return. It is required that the return address of a
+ * catch be in the correct EH region, for re-raise of thread
+ * abort exceptions to work. Insert a NOP in the empty block
+ * to ensure we generate code for the block, if we keep it.
+ */
+ {
+ BasicBlock* succBlock;
+
+ if (block->bbJumpKind == BBJ_ALWAYS)
+ {
+ succBlock = block->bbJumpDest;
+ }
+ else
+ {
+ succBlock = block->bbNext;
+ }
+
+ if ((succBlock != nullptr) && !BasicBlock::sameEHRegion(block, succBlock))
+ {
+ // The empty block and the block that follows it are in different
+ // EH regions. Is this a case where they can't be merged?
+
+ bool okToMerge = true; // assume it's ok
+ for (flowList* pred = block->bbPreds; pred; pred = pred->flNext)
+ {
+ if (pred->flBlock->bbJumpKind == BBJ_EHCATCHRET)
+ {
+ assert(pred->flBlock->bbJumpDest == block);
+ okToMerge = false; // we can't get rid of the empty block
+ break;
+ }
+ }
+
+ if (!okToMerge)
+ {
+ // Insert a NOP in the empty block to ensure we generate code
+ // for the catchret target in the right EH region.
+ GenTree* nop = new (this, GT_NO_OP) GenTree(GT_NO_OP, TYP_VOID);
+
+ if (block->IsLIR())
+ {
+ LIR::AsRange(block).InsertAtEnd(nop);
+ }
+ else
+ {
+ GenTreePtr nopStmt = fgInsertStmtAtEnd(block, nop);
+ fgSetStmtSeq(nopStmt);
+ gtSetStmtInfo(nopStmt);
+ }
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nKeeping empty block BB%02u - it is the target of a catch return\n", block->bbNum);
+ }
+#endif // DEBUG
+
+ break; // go to the next block
+ }
+ }
+ }
+#endif // FEATURE_EH_FUNCLETS
+
+ if (!ehCanDeleteEmptyBlock(block))
+ {
+ // We're not allowed to remove this block due to reasons related to the EH table.
+ break;
+ }
+
+ /* special case if this is the last BB */
+ if (block == fgLastBB)
+ {
+ if (!bPrev)
+ {
+ break;
+ }
+ fgLastBB = bPrev;
+ }
+
+ /* Remove the block */
+ compCurBB = block;
+ fgRemoveBlock(block, false);
+ return true;
+
+ default:
+ noway_assert(!"Unexpected bbJumpKind");
+ break;
+ }
+ return false;
+}
+
+// fgOptimizeSwitchBranches:
+// Does flow optimization for a switch - bypasses jumps to empty unconditional branches,
+// and transforms degenerate switch cases like those with 1 or 2 targets
+//
+// Args:
+// block: BasicBlock that contains the switch
+// Returns: true if we changed the code
+//
+bool Compiler::fgOptimizeSwitchBranches(BasicBlock* block)
+{
+ assert(block->bbJumpKind == BBJ_SWITCH);
+
+ unsigned jmpCnt = block->bbJumpSwt->bbsCount;
+ BasicBlock** jmpTab = block->bbJumpSwt->bbsDstTab;
+ BasicBlock* bNewDest; // the new jump target for the current switch case
+ BasicBlock* bDest;
+ bool returnvalue = false;
+
+ do
+ {
+ REPEAT_SWITCH:;
+ bDest = *jmpTab;
+ bNewDest = bDest;
+
+ // Do we have a JUMP to an empty unconditional JUMP block?
+ if (bDest->isEmpty() && (bDest->bbJumpKind == BBJ_ALWAYS) &&
+ (bDest != bDest->bbJumpDest)) // special case for self jumps
+ {
+ bool optimizeJump = true;
+
+ // We do not optimize jumps between two different try regions.
+ // However jumping to a block that is not in any try region is OK
+ //
+ if (bDest->hasTryIndex() && !BasicBlock::sameTryRegion(block, bDest))
+ {
+ optimizeJump = false;
+ }
+
+ // If we are optimize using real profile weights
+ // then don't optimize a switch jump to an unconditional jump
+ // until after we have computed the edge weights
+ //
+ if (fgIsUsingProfileWeights() && !fgEdgeWeightsComputed)
+ {
+ fgNeedsUpdateFlowGraph = true;
+ optimizeJump = false;
+ }
+
+ if (optimizeJump)
+ {
+ bNewDest = bDest->bbJumpDest;
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nOptimizing a switch jump to an empty block with an unconditional jump (BB%02u -> BB%02u "
+ "-> BB%02u)\n",
+ block->bbNum, bDest->bbNum, bNewDest->bbNum);
+ }
+#endif // DEBUG
+ }
+ }
+
+ if (bNewDest != bDest)
+ {
+ //
+ // When we optimize a branch to branch we need to update the profile weight
+ // of bDest by subtracting out the block/edge weight of the path that is being optimized.
+ //
+ if (fgIsUsingProfileWeights() && ((bDest->bbFlags & BBF_PROF_WEIGHT) != 0))
+ {
+ if (fgHaveValidEdgeWeights)
+ {
+ flowList* edge = fgGetPredForBlock(bDest, block);
+ BasicBlock::weight_t branchThroughWeight = edge->flEdgeWeightMin;
+
+ if (bDest->bbWeight > branchThroughWeight)
+ {
+ bDest->bbWeight -= branchThroughWeight;
+ }
+ else
+ {
+ bDest->bbWeight = BB_ZERO_WEIGHT;
+ bDest->bbFlags |= BBF_RUN_RARELY;
+ }
+ }
+ }
+
+ // Update the switch jump table
+ *jmpTab = bNewDest;
+
+ // Maintain, if necessary, the set of unique targets of "block."
+ UpdateSwitchTableTarget(block, bDest, bNewDest);
+
+ fgAddRefPred(bNewDest, block, fgRemoveRefPred(bDest, block));
+
+ // we optimized a Switch label - goto REPEAT_SWITCH to follow this new jump
+ returnvalue = true;
+
+ goto REPEAT_SWITCH;
+ }
+ } while (++jmpTab, --jmpCnt);
+
+ GenTreeStmt* switchStmt = nullptr;
+ LIR::Range* blockRange = nullptr;
+
+ GenTree* switchTree;
+ if (block->IsLIR())
+ {
+ blockRange = &LIR::AsRange(block);
+ switchTree = blockRange->LastNode();
+
+ assert(switchTree->OperGet() == GT_SWITCH_TABLE);
+ }
+ else
+ {
+ switchStmt = block->lastStmt();
+ switchTree = switchStmt->gtStmtExpr;
+
+ assert(switchTree->OperGet() == GT_SWITCH);
+ }
+
+ noway_assert(switchTree->gtType == TYP_VOID);
+
+ // At this point all of the case jump targets have been updated such
+ // that none of them go to block that is an empty unconditional block
+ //
+ jmpTab = block->bbJumpSwt->bbsDstTab;
+ jmpCnt = block->bbJumpSwt->bbsCount;
+ // Now check for two trivial switch jumps.
+ //
+ if (block->NumSucc(this) == 1)
+ {
+ // Use BBJ_ALWAYS for a switch with only a default clause, or with only one unique successor.
+ BasicBlock* uniqueSucc = jmpTab[0];
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nRemoving a switch jump with a single target (BB%02u)\n", block->bbNum);
+ printf("BEFORE:\n");
+ }
+#endif // DEBUG
+
+ if (block->IsLIR())
+ {
+ bool isClosed;
+ unsigned sideEffects;
+ LIR::ReadOnlyRange switchTreeRange = blockRange->GetTreeRange(switchTree, &isClosed, &sideEffects);
+
+ // The switch tree should form a contiguous, side-effect free range by construction. See
+ // Lowering::LowerSwitch for details.
+ assert(isClosed);
+ assert((sideEffects & GTF_ALL_EFFECT) == 0);
+
+ blockRange->Delete(this, block, std::move(switchTreeRange));
+ }
+ else
+ {
+ /* check for SIDE_EFFECTS */
+ if (switchTree->gtFlags & GTF_SIDE_EFFECT)
+ {
+ /* Extract the side effects from the conditional */
+ GenTreePtr sideEffList = nullptr;
+
+ gtExtractSideEffList(switchTree, &sideEffList);
+
+ if (sideEffList == nullptr)
+ {
+ goto NO_SWITCH_SIDE_EFFECT;
+ }
+
+ noway_assert(sideEffList->gtFlags & GTF_SIDE_EFFECT);
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nSwitch expression has side effects! Extracting side effects...\n");
+ gtDispTree(switchTree);
+ printf("\n");
+ gtDispTree(sideEffList);
+ printf("\n");
+ }
+#endif // DEBUG
+
+ /* Replace the conditional statement with the list of side effects */
+ noway_assert(sideEffList->gtOper != GT_STMT);
+ noway_assert(sideEffList->gtOper != GT_SWITCH);
+
+ switchStmt->gtStmtExpr = sideEffList;
+
+ if (fgStmtListThreaded)
+ {
+ /* Update the lclvar ref counts */
+ compCurBB = block;
+ fgUpdateRefCntForExtract(switchTree, sideEffList);
+
+ /* Update ordering, costs, FP levels, etc. */
+ gtSetStmtInfo(switchStmt);
+
+ /* Re-link the nodes for this statement */
+ fgSetStmtSeq(switchStmt);
+ }
+ }
+ else
+ {
+
+ NO_SWITCH_SIDE_EFFECT:
+
+ /* conditional has NO side effect - remove it */
+ fgRemoveStmt(block, switchStmt);
+ }
+ }
+
+ // Change the switch jump into a BBJ_ALWAYS
+ block->bbJumpDest = block->bbJumpSwt->bbsDstTab[0];
+ block->bbJumpKind = BBJ_ALWAYS;
+ if (jmpCnt > 1)
+ {
+ for (unsigned i = 1; i < jmpCnt; ++i)
+ {
+ (void)fgRemoveRefPred(jmpTab[i], block);
+ }
+ }
+
+ return true;
+ }
+ else if (block->bbJumpSwt->bbsCount == 2 && block->bbJumpSwt->bbsDstTab[1] == block->bbNext)
+ {
+ /* Use a BBJ_COND(switchVal==0) for a switch with only one
+ significant clause besides the default clause, if the
+ default clause is bbNext */
+ GenTree* switchVal = switchTree->gtOp.gtOp1;
+ noway_assert(genActualTypeIsIntOrI(switchVal->TypeGet()));
+
+ // If we are in LIR, remove the jump table from the block.
+ if (block->IsLIR())
+ {
+ GenTree* jumpTable = switchTree->gtOp.gtOp2;
+ assert(jumpTable->OperGet() == GT_JMPTABLE);
+ blockRange->Remove(jumpTable);
+ }
+
+ // Change the GT_SWITCH(switchVal) into GT_JTRUE(GT_EQ(switchVal==0)).
+ // Also mark the node as GTF_DONT_CSE as further down JIT is not capable of handling it.
+ // For example CSE could determine that the expression rooted at GT_EQ is a candidate cse and
+ // replace it with a COMMA node. In such a case we will end up with GT_JTRUE node pointing to
+ // a COMMA node which results in noway asserts in fgMorphSmpOp(), optAssertionGen() and rpPredictTreeRegUse().
+ // For the same reason fgMorphSmpOp() marks GT_JTRUE nodes with RELOP children as GTF_DONT_CSE.
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nConverting a switch (BB%02u) with only one significant clause besides a default target to a "
+ "conditional branch\n",
+ block->bbNum);
+ }
+#endif // DEBUG
+
+ switchTree->ChangeOper(GT_JTRUE);
+ GenTree* zeroConstNode = gtNewZeroConNode(genActualType(switchVal->TypeGet()));
+ GenTree* condNode = gtNewOperNode(GT_EQ, TYP_INT, switchVal, zeroConstNode);
+ switchTree->gtOp.gtOp1 = condNode;
+ switchTree->gtOp.gtOp1->gtFlags |= (GTF_RELOP_JMP_USED | GTF_DONT_CSE);
+
+ if (block->IsLIR())
+ {
+ blockRange->InsertAfter(switchVal, zeroConstNode, condNode);
+ }
+ else
+ {
+ // Re-link the nodes for this statement.
+ fgSetStmtSeq(switchStmt);
+ }
+
+ block->bbJumpDest = block->bbJumpSwt->bbsDstTab[0];
+ block->bbJumpKind = BBJ_COND;
+
+ return true;
+ }
+ return returnvalue;
+}
+
+// fgBlockEndFavorsTailDuplication:
+// Heuristic function that returns true if this block ends in a statement that looks favorable
+// for tail-duplicating its successor (such as assigning a constant to a local).
+// Args:
+// block: BasicBlock we are considering duplicating the successor of
+// Returns:
+// true if it seems like a good idea
+//
+bool Compiler::fgBlockEndFavorsTailDuplication(BasicBlock* block)
+{
+ if (block->isRunRarely())
+ {
+ return false;
+ }
+
+ if (!block->lastStmt())
+ {
+ return false;
+ }
+ else
+ {
+ // Tail duplication tends to pay off when the last statement
+ // is an assignment of a constant, arraylength, or a relop.
+ // This is because these statements produce information about values
+ // that would otherwise be lost at the upcoming merge point.
+
+ GenTreeStmt* lastStmt = block->lastStmt();
+ GenTree* tree = lastStmt->gtStmtExpr;
+ if (tree->gtOper != GT_ASG)
+ {
+ return false;
+ }
+
+ if (tree->OperIsBlkOp())
+ {
+ return false;
+ }
+
+ GenTree* op2 = tree->gtOp.gtOp2;
+ if (op2->gtOper != GT_ARR_LENGTH && !op2->OperIsConst() && ((op2->OperKind() & GTK_RELOP) == 0))
+ {
+ return false;
+ }
+ }
+ return true;
+}
+
+// fgBlockIsGoodTailDuplicationCandidate:
+// Heuristic function that examines a block (presumably one that is a merge point) to determine
+// if it should be duplicated.
+// args:
+// target - the tail block (candidate for duplication)
+// returns:
+// true if this block seems like a good candidate for duplication
+//
+bool Compiler::fgBlockIsGoodTailDuplicationCandidate(BasicBlock* target)
+{
+ GenTreeStmt* stmt = target->FirstNonPhiDef();
+
+ // Here we are looking for blocks with a single statement feeding a conditional branch.
+ // These blocks are small, and when duplicated onto the tail of blocks that end in
+ // assignments, there is a high probability of the branch completely going away.
+
+ // This is by no means the only kind of tail that it is beneficial to duplicate,
+ // just the only one we recognize for now.
+
+ if (stmt != target->lastStmt())
+ {
+ return false;
+ }
+
+ if (target->bbJumpKind != BBJ_COND)
+ {
+ return false;
+ }
+
+ GenTree* tree = stmt->gtStmtExpr;
+
+ if (tree->gtOper != GT_JTRUE)
+ {
+ return false;
+ }
+
+ // must be some kind of relational operator
+ GenTree* cond = tree->gtOp.gtOp1;
+ if (!(cond->OperKind() & GTK_RELOP))
+ {
+ return false;
+ }
+
+ // op1 must be some combinations of casts of local or constant
+ GenTree* op1 = cond->gtOp.gtOp1;
+ while (op1->gtOper == GT_CAST)
+ {
+ op1 = op1->gtOp.gtOp1;
+ }
+ if (!op1->IsLocal() && !op1->OperIsConst())
+ {
+ return false;
+ }
+
+ // op2 must be some combinations of casts of local or constant
+ GenTree* op2 = cond->gtOp.gtOp2;
+ while (op2->gtOper == GT_CAST)
+ {
+ op2 = op2->gtOp.gtOp1;
+ }
+ if (!op2->IsLocal() && !op2->OperIsConst())
+ {
+ return false;
+ }
+
+ return true;
+}
+
+// fgOptimizeUncondBranchToSimpleCond:
+// For a block which has an unconditional branch, look to see if its target block
+// is a good candidate for tail duplication, and if so do that duplication.
+//
+// Args:
+// block - block with uncond branch
+// target - block which is target of first block
+//
+// returns: true if changes were made
+
+bool Compiler::fgOptimizeUncondBranchToSimpleCond(BasicBlock* block, BasicBlock* target)
+{
+ assert(block->bbJumpKind == BBJ_ALWAYS);
+ assert(block->bbJumpDest == target);
+
+ // TODO-Review: OK if they are in the same region?
+ if (compHndBBtabCount > 0)
+ {
+ return false;
+ }
+
+ if (!fgBlockIsGoodTailDuplicationCandidate(target))
+ {
+ return false;
+ }
+
+ if (!fgBlockEndFavorsTailDuplication(block))
+ {
+ return false;
+ }
+
+ // NOTE: we do not currently hit this assert because this function is only called when
+ // `fgUpdateFlowGraph` has been called with `doTailDuplication` set to true, and the
+ // backend always calls `fgUpdateFlowGraph` with `doTailDuplication` set to false.
+ assert(!block->IsLIR());
+
+ GenTreeStmt* stmt = target->FirstNonPhiDef();
+ assert(stmt == target->lastStmt());
+
+ // Duplicate the target block at the end of this block
+
+ GenTree* cloned = gtCloneExpr(stmt->gtStmtExpr);
+ noway_assert(cloned);
+ GenTree* jmpStmt = gtNewStmt(cloned);
+
+ block->bbJumpKind = BBJ_COND;
+ block->bbJumpDest = target->bbJumpDest;
+ fgAddRefPred(block->bbJumpDest, block);
+ fgRemoveRefPred(target, block);
+
+ // add an unconditional block after this block to jump to the target block's fallthrough block
+
+ BasicBlock* next = fgNewBBafter(BBJ_ALWAYS, block, true);
+ next->bbFlags = block->bbFlags | BBF_INTERNAL;
+ next->bbFlags &= ~(BBF_TRY_BEG | BBF_LOOP_HEAD | BBF_LOOP_CALL0 | BBF_LOOP_CALL1 | BBF_HAS_LABEL | BBF_JMP_TARGET |
+ BBF_FUNCLET_BEG | BBF_LOOP_PREHEADER | BBF_KEEP_BBJ_ALWAYS);
+
+ next->bbJumpDest = target->bbNext;
+ target->bbNext->bbFlags |= BBF_JMP_TARGET;
+ fgAddRefPred(next, block);
+ fgAddRefPred(next->bbJumpDest, next);
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("fgOptimizeUncondBranchToSimpleCond(from BB%02u to cond BB%02u), created new uncond BB%02u\n",
+ block->bbNum, target->bbNum, next->bbNum);
+ }
+#endif // DEBUG
+
+ if (fgStmtListThreaded)
+ {
+ gtSetStmtInfo(jmpStmt);
+ }
+
+ fgInsertStmtAtEnd(block, jmpStmt);
+
+ return true;
+}
+
+// fgOptimizeBranchToNext:
+// Optimize a block which has a branch to the following block
+// Args:
+// block - block with a branch
+// bNext - block which is both next and the target of the first block
+// bPrev - block which is prior to the first block
+//
+// returns: true if changes were made
+//
+bool Compiler::fgOptimizeBranchToNext(BasicBlock* block, BasicBlock* bNext, BasicBlock* bPrev)
+{
+ assert(block->bbJumpKind == BBJ_COND || block->bbJumpKind == BBJ_ALWAYS);
+ assert(block->bbJumpDest == bNext);
+ assert(block->bbNext = bNext);
+ assert(block->bbPrev == bPrev);
+
+ if (block->bbJumpKind == BBJ_ALWAYS)
+ {
+ // We can't remove it if it is a branch from hot => cold
+ if (!fgInDifferentRegions(block, bNext))
+ {
+ // We can't remove if it is marked as BBF_KEEP_BBJ_ALWAYS
+ if (!(block->bbFlags & BBF_KEEP_BBJ_ALWAYS))
+ {
+ // We can't remove if the BBJ_ALWAYS is part of a BBJ_CALLFINALLY pair
+ if ((bPrev == nullptr) || !bPrev->isBBCallAlwaysPair())
+ {
+ /* the unconditional jump is to the next BB */
+ block->bbJumpKind = BBJ_NONE;
+ block->bbFlags &= ~BBF_NEEDS_GCPOLL;
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nRemoving unconditional jump to next block (BB%02u -> BB%02u) (converted BB%02u to "
+ "fall-through)\n",
+ block->bbNum, bNext->bbNum, block->bbNum);
+ }
+#endif // DEBUG
+ return true;
+ }
+ }
+ }
+ }
+ else
+ {
+ /* remove the conditional statement at the end of block */
+ noway_assert(block->bbJumpKind == BBJ_COND);
+ noway_assert(block->bbTreeList);
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nRemoving conditional jump to next block (BB%02u -> BB%02u)\n", block->bbNum, bNext->bbNum);
+ }
+#endif // DEBUG
+
+ if (block->IsLIR())
+ {
+ LIR::Range& blockRange = LIR::AsRange(block);
+ GenTree* jmp = blockRange.LastNode();
+ assert(jmp->OperGet() == GT_JTRUE);
+
+ bool isClosed;
+ unsigned sideEffects;
+ LIR::ReadOnlyRange jmpRange = blockRange.GetTreeRange(jmp, &isClosed, &sideEffects);
+
+ // TODO-LIR: this should really be checking GTF_ALL_EFFECT, but that produces unacceptable
+ // diffs compared to the existing backend.
+ if (isClosed && ((sideEffects & GTF_SIDE_EFFECT) == 0))
+ {
+ // If the jump and its operands form a contiguous, side-effect-free range,
+ // remove them.
+ blockRange.Delete(this, block, std::move(jmpRange));
+ }
+ else
+ {
+ // Otherwise, just remove the jump node itself.
+ blockRange.Remove(jmp);
+ }
+ }
+ else
+ {
+ GenTreeStmt* cond = block->lastStmt();
+ noway_assert(cond->gtStmtExpr->gtOper == GT_JTRUE);
+
+ /* check for SIDE_EFFECTS */
+ if (cond->gtStmtExpr->gtFlags & GTF_SIDE_EFFECT)
+ {
+ /* Extract the side effects from the conditional */
+ GenTreePtr sideEffList = nullptr;
+
+ gtExtractSideEffList(cond->gtStmtExpr, &sideEffList);
+
+ if (sideEffList == nullptr)
+ {
+ compCurBB = block;
+ fgRemoveStmt(block, cond);
+ }
+ else
+ {
+ noway_assert(sideEffList->gtFlags & GTF_SIDE_EFFECT);
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nConditional has side effects! Extracting side effects...\n");
+ gtDispTree(cond);
+ printf("\n");
+ gtDispTree(sideEffList);
+ printf("\n");
+ }
+#endif // DEBUG
+
+ /* Replace the conditional statement with the list of side effects */
+ noway_assert(sideEffList->gtOper != GT_STMT);
+ noway_assert(sideEffList->gtOper != GT_JTRUE);
+
+ cond->gtStmtExpr = sideEffList;
+
+ if (fgStmtListThreaded)
+ {
+ /* Update the lclvar ref counts */
+ compCurBB = block;
+ fgUpdateRefCntForExtract(cond->gtStmtExpr, sideEffList);
+
+ /* Update ordering, costs, FP levels, etc. */
+ gtSetStmtInfo(cond);
+
+ /* Re-link the nodes for this statement */
+ fgSetStmtSeq(cond);
+ }
+ }
+ }
+ else
+ {
+ compCurBB = block;
+ /* conditional has NO side effect - remove it */
+ fgRemoveStmt(block, cond);
+ }
+ }
+
+ /* Conditional is gone - simply fall into the next block */
+
+ block->bbJumpKind = BBJ_NONE;
+ block->bbFlags &= ~BBF_NEEDS_GCPOLL;
+
+ /* Update bbRefs and bbNum - Conditional predecessors to the same
+ * block are counted twice so we have to remove one of them */
+
+ noway_assert(bNext->countOfInEdges() > 1);
+ fgRemoveRefPred(bNext, block);
+
+ return true;
+ }
+ return false;
+}
+
+/*****************************************************************************
+ *
+ * Function called to optimize an unconditional branch that branches
+ * to a conditional branch.
+ * Currently we require that the conditional branch jump back to the
+ * block that follows the unconditional branch.
+ *
+ * We can improve the code execution and layout by concatenating a copy
+ * of the conditional branch block at the end of the conditional branch
+ * and reversing the sense of the branch.
+ *
+ * This is only done when the amount of code to be copied is smaller than
+ * our calculated threshold in maxDupCostSz.
+ *
+ */
+
+bool Compiler::fgOptimizeBranch(BasicBlock* bJump)
+{
+ if (opts.MinOpts())
+ {
+ return false;
+ }
+
+ if (bJump->bbJumpKind != BBJ_ALWAYS)
+ {
+ return false;
+ }
+
+ if (bJump->bbFlags & BBF_KEEP_BBJ_ALWAYS)
+ {
+ return false;
+ }
+
+ // Don't hoist a conditional branch into the scratch block; we'd prefer it stay
+ // either BBJ_NONE or BBJ_ALWAYS.
+ if (fgBBisScratch(bJump))
+ {
+ return false;
+ }
+
+ BasicBlock* bDest = bJump->bbJumpDest;
+
+ if (bDest->bbJumpKind != BBJ_COND)
+ {
+ return false;
+ }
+
+ if (bDest->bbJumpDest != bJump->bbNext)
+ {
+ return false;
+ }
+
+ // 'bJump' must be in the same try region as the condition, since we're going to insert
+ // a duplicated condition in 'bJump', and the condition might include exception throwing code.
+ if (!BasicBlock::sameTryRegion(bJump, bDest))
+ {
+ return false;
+ }
+
+ // do not jump into another try region
+ BasicBlock* bDestNext = bDest->bbNext;
+ if (bDestNext->hasTryIndex() && !BasicBlock::sameTryRegion(bJump, bDestNext))
+ {
+ return false;
+ }
+
+ // This function is only called by fgReorderBlocks, which we do not run in the backend.
+ // If we wanted to run block reordering in the backend, we would need to be able to
+ // calculate cost information for LIR on a per-node basis in order for this function
+ // to work.
+ assert(!bJump->IsLIR());
+ assert(!bDest->IsLIR());
+
+ GenTreeStmt* stmt;
+ unsigned estDupCostSz = 0;
+ for (stmt = bDest->firstStmt(); stmt; stmt = stmt->gtNextStmt)
+ {
+ GenTreePtr expr = stmt->gtStmtExpr;
+
+ /* We call gtPrepareCost to measure the cost of duplicating this tree */
+ gtPrepareCost(expr);
+
+ estDupCostSz += expr->gtCostSz;
+ }
+
+ bool allProfileWeightsAreValid = false;
+ BasicBlock::weight_t weightJump = bJump->bbWeight;
+ BasicBlock::weight_t weightDest = bDest->bbWeight;
+ BasicBlock::weight_t weightNext = bJump->bbNext->bbWeight;
+ bool rareJump = bJump->isRunRarely();
+ bool rareDest = bDest->isRunRarely();
+ bool rareNext = bJump->bbNext->isRunRarely();
+
+ // If we have profile data then we calculate the number of time
+ // the loop will iterate into loopIterations
+ if (fgIsUsingProfileWeights())
+ {
+ // Only rely upon the profile weight when all three of these blocks
+ // have either good profile weights or are rarelyRun
+ //
+ if ((bJump->bbFlags & (BBF_PROF_WEIGHT | BBF_RUN_RARELY)) &&
+ (bDest->bbFlags & (BBF_PROF_WEIGHT | BBF_RUN_RARELY)) &&
+ (bJump->bbNext->bbFlags & (BBF_PROF_WEIGHT | BBF_RUN_RARELY)))
+ {
+ allProfileWeightsAreValid = true;
+
+ if ((weightJump * 100) < weightDest)
+ {
+ rareJump = true;
+ }
+
+ if ((weightNext * 100) < weightDest)
+ {
+ rareNext = true;
+ }
+
+ if (((weightDest * 100) < weightJump) && ((weightDest * 100) < weightNext))
+ {
+ rareDest = true;
+ }
+ }
+ }
+
+ unsigned maxDupCostSz = 6;
+
+ //
+ // Branches between the hot and rarely run regions
+ // should be minimized. So we allow a larger size
+ //
+ if (rareDest != rareJump)
+ {
+ maxDupCostSz += 6;
+ }
+
+ if (rareDest != rareNext)
+ {
+ maxDupCostSz += 6;
+ }
+
+ //
+ // We we are ngen-ing:
+ // If the uncondional branch is a rarely run block then
+ // we are willing to have more code expansion since we
+ // won't be running code from this page
+ //
+ if (opts.eeFlags & CORJIT_FLG_PREJIT)
+ {
+ if (rareJump)
+ {
+ maxDupCostSz *= 2;
+ }
+ }
+
+ // If the compare has too high cost then we don't want to dup
+
+ bool costIsTooHigh = (estDupCostSz > maxDupCostSz);
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nDuplication of the conditional block BB%02u (always branch from BB%02u) %s, because the cost of "
+ "duplication (%i) is %s than %i,"
+ " validProfileWeights = %s\n",
+ bDest->bbNum, bJump->bbNum, costIsTooHigh ? "not done" : "performed", estDupCostSz,
+ costIsTooHigh ? "greater" : "less or equal", maxDupCostSz, allProfileWeightsAreValid ? "true" : "false");
+ }
+#endif // DEBUG
+
+ if (costIsTooHigh)
+ {
+ return false;
+ }
+
+ /* Looks good - duplicate the conditional block */
+
+ GenTree* newStmtList = nullptr; // new stmt list to be added to bJump
+ GenTree* newStmtLast = nullptr;
+ bool cloneExprFailed = false;
+
+ /* Visit all the statements in bDest */
+
+ for (GenTree* curStmt = bDest->bbTreeList; curStmt; curStmt = curStmt->gtNext)
+ {
+ /* Clone/substitute the expression */
+
+ stmt = gtCloneExpr(curStmt)->AsStmt();
+
+ // cloneExpr doesn't handle everything
+
+ if (stmt == nullptr)
+ {
+ cloneExprFailed = true;
+ break;
+ }
+
+ /* Append the expression to our list */
+
+ if (newStmtList != nullptr)
+ {
+ newStmtLast->gtNext = stmt;
+ }
+ else
+ {
+ newStmtList = stmt;
+ }
+
+ stmt->gtPrev = newStmtLast;
+ newStmtLast = stmt;
+ }
+
+ if (cloneExprFailed)
+ {
+ return false;
+ }
+
+ noway_assert(newStmtLast != nullptr);
+ noway_assert(stmt != nullptr);
+ noway_assert(stmt->gtOper == GT_STMT);
+
+ if ((newStmtLast == nullptr) || (stmt == nullptr) || (stmt->gtOper != GT_STMT))
+ {
+ return false;
+ }
+
+ /* Get to the condition node from the statement tree */
+
+ GenTreePtr condTree = stmt->gtStmtExpr;
+ noway_assert(condTree->gtOper == GT_JTRUE);
+
+ if (condTree->gtOper != GT_JTRUE)
+ {
+ return false;
+ }
+
+ //
+ // Set condTree to the operand to the GT_JTRUE
+ //
+ condTree = condTree->gtOp.gtOp1;
+
+ //
+ // This condTree has to be a RelOp comparison
+ //
+ if (condTree->OperIsCompare() == false)
+ {
+ return false;
+ }
+
+ // Bump up the ref-counts of any variables in 'stmt'
+ fgUpdateRefCntForClone(bJump, stmt->gtStmtExpr);
+
+ //
+ // Find the last statement in the bJump block
+ //
+ GenTreeStmt* lastStmt = nullptr;
+ for (stmt = bJump->firstStmt(); stmt; stmt = stmt->gtNextStmt)
+ {
+ lastStmt = stmt;
+ }
+ stmt = bJump->firstStmt();
+
+ /* Join the two linked lists */
+ newStmtLast->gtNext = nullptr;
+
+ if (lastStmt != nullptr)
+ {
+ stmt->gtPrev = newStmtLast;
+ lastStmt->gtNext = newStmtList;
+ newStmtList->gtPrev = lastStmt;
+ }
+ else
+ {
+ bJump->bbTreeList = newStmtList;
+ newStmtList->gtPrev = newStmtLast;
+ }
+
+ //
+ // Reverse the sense of the compare
+ //
+ gtReverseCond(condTree);
+
+ bJump->bbJumpKind = BBJ_COND;
+ bJump->bbJumpDest = bDest->bbNext;
+
+ /* Mark the jump dest block as being a jump target */
+ bJump->bbJumpDest->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL;
+
+ // We need to update the following flags of the bJump block if they were set in the bbJumpDest block
+ bJump->bbFlags |= (bJump->bbJumpDest->bbFlags &
+ (BBF_HAS_NEWOBJ | BBF_HAS_NEWARRAY | BBF_HAS_NULLCHECK | BBF_HAS_IDX_LEN | BBF_HAS_VTABREF));
+
+ /* Update bbRefs and bbPreds */
+
+ // bJump now falls through into the next block
+ //
+ fgAddRefPred(bJump->bbNext, bJump);
+
+ // bJump no longer jumps to bDest
+ //
+ fgRemoveRefPred(bDest, bJump);
+
+ // bJump now jumps to bDest->bbNext
+ //
+ fgAddRefPred(bDest->bbNext, bJump);
+
+ if (weightJump > 0)
+ {
+ if (allProfileWeightsAreValid)
+ {
+ if (weightDest > weightJump)
+ {
+ bDest->bbWeight = (weightDest - weightJump);
+ }
+ else if (!bDest->isRunRarely())
+ {
+ bDest->bbWeight = BB_UNITY_WEIGHT;
+ }
+ }
+ else
+ {
+ BasicBlock::weight_t newWeightDest = 0;
+ BasicBlock::weight_t unloopWeightDest = 0;
+
+ if (weightDest > weightJump)
+ {
+ newWeightDest = (weightDest - weightJump);
+ }
+ if (weightDest >= (BB_LOOP_WEIGHT * BB_UNITY_WEIGHT) / 2)
+ {
+ newWeightDest = (weightDest * 2) / (BB_LOOP_WEIGHT * BB_UNITY_WEIGHT);
+ }
+ if ((newWeightDest > 0) || (unloopWeightDest > 0))
+ {
+ bDest->bbWeight = Max(newWeightDest, unloopWeightDest);
+ }
+ }
+ }
+
+#if DEBUG
+ if (verbose)
+ {
+ printf("\nAfter this change in fgOptimizeBranch");
+ fgDispBasicBlocks(verboseTrees);
+ printf("\n");
+ }
+#endif // DEBUG
+
+ return true;
+}
+
+/*****************************************************************************
+ *
+ * Function called to optimize switch statements
+ */
+
+bool Compiler::fgOptimizeSwitchJumps()
+{
+ bool result = false; // Our return value
+
+#if 0
+ // TODO-CQ: Add switch jump optimizations?
+ if (!fgHasSwitch)
+ return false;
+
+ if (!fgHaveValidEdgeWeights)
+ return false;
+
+ for (BasicBlock* bSrc = fgFirstBB; bSrc != NULL; bSrc = bSrc->bbNext)
+ {
+ if (bSrc->bbJumpKind == BBJ_SWITCH)
+ {
+ unsigned jumpCnt; jumpCnt = bSrc->bbJumpSwt->bbsCount;
+ BasicBlock** jumpTab; jumpTab = bSrc->bbJumpSwt->bbsDstTab;
+
+ do
+ {
+ BasicBlock* bDst = *jumpTab;
+ flowList* edgeToDst = fgGetPredForBlock(bDst, bSrc);
+ double outRatio = (double) edgeToDst->flEdgeWeightMin / (double) bSrc->bbWeight;
+
+ if (outRatio >= 0.60)
+ {
+ // straighten switch here...
+ }
+ }
+ while (++jumpTab, --jumpCnt);
+ }
+ }
+#endif
+
+ return result;
+}
+
+#ifdef _PREFAST_
+#pragma warning(push)
+#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
+#endif
+/*****************************************************************************
+ *
+ * Function called to reorder the flowgraph of BasicBlocks such that any
+ * rarely run blocks are placed at the end of the block list.
+ * If we have profile information we also use that information to reverse
+ * all conditional jumps that would benefit.
+ */
+
+void Compiler::fgReorderBlocks()
+{
+ noway_assert(opts.compDbgCode == false);
+
+#if FEATURE_EH_FUNCLETS
+ assert(fgFuncletsCreated);
+#endif // FEATURE_EH_FUNCLETS
+
+ // We can't relocate anything if we only have one block
+ if (fgFirstBB->bbNext == nullptr)
+ {
+ return;
+ }
+
+ bool newRarelyRun = false;
+ bool movedBlocks = false;
+ bool optimizedSwitches = false;
+
+ // First let us expand the set of run rarely blocks
+ newRarelyRun |= fgExpandRarelyRunBlocks();
+
+#if !FEATURE_EH_FUNCLETS
+ movedBlocks |= fgRelocateEHRegions();
+#endif // !FEATURE_EH_FUNCLETS
+
+ //
+ // If we are using profile weights we can change some
+ // switch jumps into conditional test and jump
+ //
+ if (fgIsUsingProfileWeights())
+ {
+ //
+ // Note that this is currently not yet implemented
+ //
+ optimizedSwitches = fgOptimizeSwitchJumps();
+ if (optimizedSwitches)
+ {
+ fgUpdateFlowGraph();
+ }
+ }
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("*************** In fgReorderBlocks()\n");
+
+ printf("\nInitial BasicBlocks");
+ fgDispBasicBlocks(verboseTrees);
+ printf("\n");
+ }
+#endif // DEBUG
+
+ BasicBlock* bNext;
+ BasicBlock* bPrev;
+ BasicBlock* block;
+ unsigned XTnum;
+ EHblkDsc* HBtab;
+
+ // Iterate over every block, remembering our previous block in bPrev
+ for (bPrev = fgFirstBB, block = bPrev->bbNext; block != nullptr; bPrev = block, block = block->bbNext)
+ {
+ //
+ // Consider relocating the rarely run blocks such that they are at the end of the method.
+ // We also consider reversing conditional branches so that they become a not taken forwards branch.
+ //
+
+ // If block is marked with a BBF_KEEP_BBJ_ALWAYS flag then we don't move the block
+ if ((block->bbFlags & BBF_KEEP_BBJ_ALWAYS) != 0)
+ {
+ continue;
+ }
+
+ // Finally and handlers blocks are to be kept contiguous.
+ // TODO-CQ: Allow reordering within the handler region
+ if (block->hasHndIndex() == true)
+ {
+ continue;
+ }
+
+ bool reorderBlock = true; // This is set to false if we decide not to reorder 'block'
+ bool isRare = block->isRunRarely();
+ BasicBlock* bDest = nullptr;
+ bool forwardBranch = false;
+ bool backwardBranch = false;
+
+ // Setup bDest
+ if ((bPrev->bbJumpKind == BBJ_COND) || (bPrev->bbJumpKind == BBJ_ALWAYS))
+ {
+ bDest = bPrev->bbJumpDest;
+ forwardBranch = fgIsForwardBranch(bPrev);
+ backwardBranch = !forwardBranch;
+ }
+
+ // We will look for bPrev as a non rarely run block followed by block as a rarely run block
+ //
+ if (bPrev->isRunRarely())
+ {
+ reorderBlock = false;
+ }
+
+ // If the weights of the bPrev, block and bDest were all obtained from a profile run
+ // then we can use them to decide if it is useful to reverse this conditional branch
+
+ BasicBlock::weight_t profHotWeight = -1;
+
+ if ((bPrev->bbFlags & BBF_PROF_WEIGHT) && (block->bbFlags & BBF_PROF_WEIGHT) &&
+ ((bDest == nullptr) || (bDest->bbFlags & BBF_PROF_WEIGHT)))
+ {
+ //
+ // All blocks have profile information
+ //
+ if (forwardBranch)
+ {
+ if (bPrev->bbJumpKind == BBJ_ALWAYS)
+ {
+ // We can pull up the blocks that the unconditional jump branches to
+ // if the weight of bDest is greater or equal to the weight of block
+ // also the weight of bDest can't be zero.
+ //
+ if ((bDest->bbWeight < block->bbWeight) || (bDest->bbWeight == 0))
+ {
+ reorderBlock = false;
+ }
+ else
+ {
+ //
+ // If this remains true then we will try to pull up bDest to succeed bPrev
+ //
+ bool moveDestUp = true;
+
+ if (fgHaveValidEdgeWeights)
+ {
+ //
+ // The edge bPrev -> bDest must have a higher minimum weight
+ // than every other edge into bDest
+ //
+ flowList* edgeFromPrev = fgGetPredForBlock(bDest, bPrev);
+ noway_assert(edgeFromPrev != nullptr);
+
+ // Examine all of the other edges into bDest
+ for (flowList* edge = bDest->bbPreds; edge != nullptr; edge = edge->flNext)
+ {
+ if (edge != edgeFromPrev)
+ {
+ if (edge->flEdgeWeightMax >= edgeFromPrev->flEdgeWeightMin)
+ {
+ moveDestUp = false;
+ break;
+ }
+ }
+ }
+ }
+ else
+ {
+ //
+ // The block bPrev must have a higher weight
+ // than every other block that goes into bDest
+ //
+
+ // Examine all of the other edges into bDest
+ for (flowList* edge = bDest->bbPreds; edge != nullptr; edge = edge->flNext)
+ {
+ BasicBlock* bTemp = edge->flBlock;
+
+ if ((bTemp != bPrev) && (bTemp->bbWeight >= bPrev->bbWeight))
+ {
+ moveDestUp = false;
+ break;
+ }
+ }
+ }
+
+ // Are we still good to move bDest up to bPrev?
+ if (moveDestUp)
+ {
+ //
+ // We will consider all blocks that have less weight than profHotWeight to be
+ // uncommonly run blocks as compared with the hot path of bPrev taken-jump to bDest
+ //
+ profHotWeight = bDest->bbWeight - 1;
+ }
+ else
+ {
+ if (block->isRunRarely())
+ {
+ // We will move any rarely run blocks blocks
+ profHotWeight = 0;
+ }
+ else
+ {
+ // We will move all blocks that have a weight less or equal to our fall through block
+ profHotWeight = block->bbWeight + 1;
+ }
+ // But we won't try to connect with bDest
+ bDest = nullptr;
+ }
+ }
+ }
+ else // (bPrev->bbJumpKind == BBJ_COND)
+ {
+ noway_assert(bPrev->bbJumpKind == BBJ_COND);
+ //
+ // We will reverse branch if the taken-jump to bDest ratio (i.e. 'takenRatio')
+ // is more than 51%
+ //
+ // We will setup profHotWeight to be maximum bbWeight that a block
+ // could have for us not to want to reverse the conditional branch
+ //
+ // We will consider all blocks that have less weight than profHotWeight to be
+ // uncommonly run blocks as compared with the hot path of bPrev taken-jump to bDest
+ //
+ if (fgHaveValidEdgeWeights)
+ {
+ // We have valid edge weights, however even with valid edge weights
+ // we may have a minimum and maximum range for each edges value
+ //
+ // We will check that the min weight of the bPrev to bDest edge
+ // is more than twice the max weight of the bPrev to block edge.
+ //
+ // bPrev --> [BB04, weight 31]
+ // | \
+ // edgeToBlock -------------> O \
+ // [min=8,max=10] V \
+ // block --> [BB05, weight 10] \
+ // \
+ // edgeToDest ----------------------------> O
+ // [min=21,max=23] |
+ // V
+ // bDest ---------------> [BB08, weight 21]
+ //
+ flowList* edgeToDest = fgGetPredForBlock(bDest, bPrev);
+ flowList* edgeToBlock = fgGetPredForBlock(block, bPrev);
+ noway_assert(edgeToDest != nullptr);
+ noway_assert(edgeToBlock != nullptr);
+ //
+ // Calculate the taken ratio
+ // A takenRation of 0.10 means taken 10% of the time, not taken 90% of the time
+ // A takenRation of 0.50 means taken 50% of the time, not taken 50% of the time
+ // A takenRation of 0.90 means taken 90% of the time, not taken 10% of the time
+ //
+ double takenCount =
+ ((double)edgeToDest->flEdgeWeightMin + (double)edgeToDest->flEdgeWeightMax) / 2.0;
+ double notTakenCount =
+ ((double)edgeToBlock->flEdgeWeightMin + (double)edgeToBlock->flEdgeWeightMax) / 2.0;
+ double totalCount = takenCount + notTakenCount;
+ double takenRatio = takenCount / totalCount;
+
+ // If the takenRatio is greater or equal to 51% then we will reverse the branch
+ if (takenRatio < 0.51)
+ {
+ reorderBlock = false;
+ }
+ else
+ {
+ // set profHotWeight
+ profHotWeight = (edgeToBlock->flEdgeWeightMin + edgeToBlock->flEdgeWeightMax) / 2 - 1;
+ }
+ }
+ else
+ {
+ // We don't have valid edge weight so we will be more conservative
+ // We could have bPrev, block or bDest as part of a loop and thus have extra weight
+ //
+ // We will do two checks:
+ // 1. Check that the weight of bDest is at least two times more than block
+ // 2. Check that the weight of bPrev is at least three times more than block
+ //
+ // bPrev --> [BB04, weight 31]
+ // | \
+ // V \
+ // block --> [BB05, weight 10] \
+ // \
+ // |
+ // V
+ // bDest ---------------> [BB08, weight 21]
+ //
+ // For this case weightDest is calculated as (21+1)/2 or 11
+ // and weightPrev is calculated as (31+2)/3 also 11
+ //
+ // Generally both weightDest and weightPrev should calculate
+ // the same value unless bPrev or bDest are part of a loop
+ //
+ BasicBlock::weight_t weightDest =
+ bDest->isMaxBBWeight() ? bDest->bbWeight : (bDest->bbWeight + 1) / 2;
+ BasicBlock::weight_t weightPrev =
+ bPrev->isMaxBBWeight() ? bPrev->bbWeight : (bPrev->bbWeight + 2) / 3;
+
+ // select the lower of weightDest and weightPrev
+ profHotWeight = (weightDest < weightPrev) ? weightDest : weightPrev;
+
+ // if the weight of block is greater (or equal) to profHotWeight then we don't reverse the cond
+ if (block->bbWeight >= profHotWeight)
+ {
+ reorderBlock = false;
+ }
+ }
+ }
+ }
+ else // not a forwardBranch
+ {
+ if (bPrev->bbFallsThrough())
+ {
+ goto CHECK_FOR_RARE;
+ }
+
+ // Here we should pull up the highest weight block remaining
+ // and place it here since bPrev does not fall through.
+
+ BasicBlock::weight_t highestWeight = 0;
+ BasicBlock* candidateBlock = nullptr;
+ BasicBlock* lastNonFallThroughBlock = bPrev;
+ BasicBlock* bTmp = bPrev->bbNext;
+
+ while (bTmp != nullptr)
+ {
+ // Don't try to split a Call/Always pair
+ //
+ if (bTmp->isBBCallAlwaysPair())
+ {
+ // Move bTmp forward
+ bTmp = bTmp->bbNext;
+ }
+
+ //
+ // Check for loop exit condition
+ //
+ if (bTmp == nullptr)
+ {
+ break;
+ }
+
+ //
+ // if its weight is the highest one we've seen and
+ // the EH regions allow for us to place bTmp after bPrev
+ //
+ if ((bTmp->bbWeight > highestWeight) && fgEhAllowsMoveBlock(bPrev, bTmp))
+ {
+ // When we have a current candidateBlock that is a conditional (or unconditional) jump
+ // to bTmp (which is a higher weighted block) then it is better to keep out current
+ // candidateBlock and have it fall into bTmp
+ //
+ if ((candidateBlock == nullptr) ||
+ ((candidateBlock->bbJumpKind != BBJ_COND) && (candidateBlock->bbJumpKind != BBJ_ALWAYS)) ||
+ (candidateBlock->bbJumpDest != bTmp))
+ {
+ // otherwise we have a new candidateBlock
+ //
+ highestWeight = bTmp->bbWeight;
+ candidateBlock = lastNonFallThroughBlock->bbNext;
+ }
+ }
+
+ if ((bTmp->bbFallsThrough() == false) || (bTmp->bbWeight == 0))
+ {
+ lastNonFallThroughBlock = bTmp;
+ }
+
+ bTmp = bTmp->bbNext;
+ }
+
+ // If we didn't find a suitable block then skip this
+ if (highestWeight == 0)
+ {
+ reorderBlock = false;
+ }
+ else
+ {
+ noway_assert(candidateBlock != nullptr);
+
+ // If the candidateBlock is the same a block then skip this
+ if (candidateBlock == block)
+ {
+ reorderBlock = false;
+ }
+ else
+ {
+ // Set bDest to the block that we want to come after bPrev
+ bDest = candidateBlock;
+
+ // set profHotWeight
+ profHotWeight = highestWeight - 1;
+ }
+ }
+ }
+ }
+ else // we don't have good profile info (or we are falling through)
+ {
+
+ CHECK_FOR_RARE:;
+
+ /* We only want to reorder when we have a rarely run */
+ /* block right after a normal block, */
+ /* (bPrev is known to be a normal block at this point) */
+ if (!isRare)
+ {
+ reorderBlock = false;
+ }
+ else
+ {
+ /* If the jump target bDest is also a rarely run block then we don't want to do the reversal */
+ if (bDest && bDest->isRunRarely())
+ {
+ reorderBlock = false; /* Both block and bDest are rarely run */
+ }
+ else
+ {
+ // We will move any rarely run blocks blocks
+ profHotWeight = 0;
+ }
+ }
+ }
+
+ if (reorderBlock == false)
+ {
+ //
+ // Check for an unconditional branch to a conditional branch
+ // which also branches back to our next block
+ //
+ if (fgOptimizeBranch(bPrev))
+ {
+ noway_assert(bPrev->bbJumpKind == BBJ_COND);
+ }
+ continue;
+ }
+
+ // Now we need to determine which blocks should be moved
+ //
+ // We consider one of two choices:
+ //
+ // 1. Moving the fall-through blocks (or rarely run blocks) down to
+ // later in the method and hopefully connecting the jump dest block
+ // so that it becomes the fall through block
+ //
+ // And when bDest in not NULL, we also consider:
+ //
+ // 2. Moving the bDest block (or blocks) up to bPrev
+ // so that it could be used as a fall through block
+ //
+ // We will prefer option #1 if we are able to connect the jump dest
+ // block as the fall though block otherwise will we try to use option #2
+ //
+
+ //
+ // Consider option #1: relocating blocks starting at 'block'
+ // to later in flowgraph
+ //
+ // We set bStart to the first block that will be relocated
+ // and bEnd to the last block that will be relocated
+
+ BasicBlock* bStart = block;
+ BasicBlock* bEnd = bStart;
+ bNext = bEnd->bbNext;
+ bool connected_bDest = false;
+
+ if ((backwardBranch && !isRare) ||
+ ((block->bbFlags & BBF_DONT_REMOVE) != 0)) // Don't choose option #1 when block is the start of a try region
+ {
+ bStart = nullptr;
+ bEnd = nullptr;
+ }
+ else
+ {
+ while (true)
+ {
+ // Don't try to split a Call/Always pair
+ //
+ if (bEnd->isBBCallAlwaysPair())
+ {
+ // Move bEnd and bNext forward
+ bEnd = bNext;
+ bNext = bNext->bbNext;
+ }
+
+ //
+ // Check for loop exit condition
+ //
+ if (bNext == nullptr)
+ {
+ break;
+ }
+
+#if FEATURE_EH_FUNCLETS
+ // Check if we've reached the funclets region, at the end of the function
+ if (fgFirstFuncletBB == bEnd->bbNext)
+ {
+ break;
+ }
+#endif // FEATURE_EH_FUNCLETS
+
+ if (bNext == bDest)
+ {
+ connected_bDest = true;
+ break;
+ }
+
+ // All the blocks must have the same try index
+ // and must not have the BBF_DONT_REMOVE flag set
+
+ if (!BasicBlock::sameTryRegion(bStart, bNext) || ((bNext->bbFlags & BBF_DONT_REMOVE) != 0))
+ {
+ // exit the loop, bEnd is now set to the
+ // last block that we want to relocate
+ break;
+ }
+
+ // If we are relocating rarely run blocks..
+ if (isRare)
+ {
+ // ... then all blocks must be rarely run
+ if (!bNext->isRunRarely())
+ {
+ // exit the loop, bEnd is now set to the
+ // last block that we want to relocate
+ break;
+ }
+ }
+ else
+ {
+ // If we are moving blocks that are hot then all
+ // of the blocks moved must be less than profHotWeight */
+ if (bNext->bbWeight >= profHotWeight)
+ {
+ // exit the loop, bEnd is now set to the
+ // last block that we would relocate
+ break;
+ }
+ }
+
+ // Move bEnd and bNext forward
+ bEnd = bNext;
+ bNext = bNext->bbNext;
+ }
+
+ // Set connected_bDest to true if moving blocks [bStart .. bEnd]
+ // connects with the the jump dest of bPrev (i.e bDest) and
+ // thus allows bPrev fall through instead of jump.
+ if (bNext == bDest)
+ {
+ connected_bDest = true;
+ }
+ }
+
+ // Now consider option #2: Moving the jump dest block (or blocks)
+ // up to bPrev
+ //
+ // The variables bStart2, bEnd2 and bPrev2 are used for option #2
+ //
+ // We will setup bStart2 to the first block that will be relocated
+ // and bEnd2 to the last block that will be relocated
+ // and bPrev2 to be the lexical pred of bDest
+ //
+ // If after this calculation bStart2 is NULL we cannot use option #2,
+ // otherwise bStart2, bEnd2 and bPrev2 are all non-NULL and we will use option #2
+
+ BasicBlock* bStart2 = nullptr;
+ BasicBlock* bEnd2 = nullptr;
+ BasicBlock* bPrev2 = nullptr;
+
+ // If option #1 didn't connect bDest and bDest isn't NULL
+ if ((connected_bDest == false) && (bDest != nullptr) &&
+ // The jump target cannot be moved if it has the BBF_DONT_REMOVE flag set
+ ((bDest->bbFlags & BBF_DONT_REMOVE) == 0))
+ {
+ // We will consider option #2: relocating blocks starting at 'bDest' to succeed bPrev
+ //
+ // setup bPrev2 to be the lexical pred of bDest
+
+ bPrev2 = block;
+ while (bPrev2 != nullptr)
+ {
+ if (bPrev2->bbNext == bDest)
+ {
+ break;
+ }
+
+ bPrev2 = bPrev2->bbNext;
+ }
+
+ if ((bPrev2 != nullptr) && fgEhAllowsMoveBlock(bPrev, bDest))
+ {
+ // We have decided that relocating bDest to be after bPrev is best
+ // Set bStart2 to the first block that will be relocated
+ // and bEnd2 to the last block that will be relocated
+ //
+ // Assigning to bStart2 selects option #2
+ //
+ bStart2 = bDest;
+ bEnd2 = bStart2;
+ bNext = bEnd2->bbNext;
+
+ while (true)
+ {
+ // Don't try to split a Call/Always pair
+ //
+ if (bEnd2->isBBCallAlwaysPair())
+ {
+ noway_assert(bNext->bbJumpKind == BBJ_ALWAYS);
+ // Move bEnd2 and bNext forward
+ bEnd2 = bNext;
+ bNext = bNext->bbNext;
+ }
+
+ // Check for the Loop exit conditions
+
+ if (bNext == nullptr)
+ {
+ break;
+ }
+
+ if (bEnd2->bbFallsThrough() == false)
+ {
+ break;
+ }
+
+ // If we are relocating rarely run blocks..
+ // All the blocks must have the same try index,
+ // and must not have the BBF_DONT_REMOVE flag set
+
+ if (!BasicBlock::sameTryRegion(bStart2, bNext) || ((bNext->bbFlags & BBF_DONT_REMOVE) != 0))
+ {
+ // exit the loop, bEnd2 is now set to the
+ // last block that we want to relocate
+ break;
+ }
+
+ if (isRare)
+ {
+ /* ... then all blocks must not be rarely run */
+ if (bNext->isRunRarely())
+ {
+ // exit the loop, bEnd2 is now set to the
+ // last block that we want to relocate
+ break;
+ }
+ }
+ else
+ {
+ // If we are relocating hot blocks
+ // all blocks moved must be greater than profHotWeight
+ if (bNext->bbWeight <= profHotWeight)
+ {
+ // exit the loop, bEnd2 is now set to the
+ // last block that we want to relocate
+ break;
+ }
+ }
+
+ // Move bEnd2 and bNext forward
+ bEnd2 = bNext;
+ bNext = bNext->bbNext;
+ }
+ }
+ }
+
+ // If we are using option #1 then ...
+ if (bStart2 == nullptr)
+ {
+ // Don't use option #1 for a backwards branch
+ if (bStart == nullptr)
+ {
+ continue;
+ }
+
+ // .... Don't move a set of blocks that are already at the end of the main method
+ if (bEnd == fgLastBBInMainFunction())
+ {
+ continue;
+ }
+ }
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ if (bDest != nullptr)
+ {
+ if (bPrev->bbJumpKind == BBJ_COND)
+ {
+ printf("Decided to reverse conditional branch at block BB%02u branch to BB%02u ", bPrev->bbNum,
+ bDest->bbNum);
+ }
+ else if (bPrev->bbJumpKind == BBJ_ALWAYS)
+ {
+ printf("Decided to straighten unconditional branch at block BB%02u branch to BB%02u ", bPrev->bbNum,
+ bDest->bbNum);
+ }
+ else
+ {
+ printf("Decided to place hot code after BB%02u, placed BB%02u after this block ", bPrev->bbNum,
+ bDest->bbNum);
+ }
+
+ if (profHotWeight > 0)
+ {
+ printf("because of IBC profile data\n");
+ }
+ else
+ {
+ if (bPrev->bbFallsThrough())
+ {
+ printf("since it falls into a rarely run block\n");
+ }
+ else
+ {
+ printf("since it is succeeded by a rarely run block\n");
+ }
+ }
+ }
+ else
+ {
+ printf("Decided to relocate block(s) after block BB%02u since they are %s block(s)\n", bPrev->bbNum,
+ block->isRunRarely() ? "rarely run" : "uncommonly run");
+ }
+ }
+#endif // DEBUG
+
+ // We will set insertAfterBlk to the block the precedes our insertion range
+ // We will set bStartPrev to be the block that precedes the set of blocks that we are moving
+ BasicBlock* insertAfterBlk;
+ BasicBlock* bStartPrev;
+
+ if (bStart2 != nullptr)
+ {
+ // Option #2: relocating blocks starting at 'bDest' to follow bPrev
+
+ // Update bStart and bEnd so that we can use these two for all later operations
+ bStart = bStart2;
+ bEnd = bEnd2;
+
+ // Set bStartPrev to be the block that comes before bStart
+ bStartPrev = bPrev2;
+
+ // We will move [bStart..bEnd] to immediately after bPrev
+ insertAfterBlk = bPrev;
+ }
+ else
+ {
+ // option #1: Moving the fall-through blocks (or rarely run blocks) down to later in the method
+
+ // Set bStartPrev to be the block that come before bStart
+ bStartPrev = bPrev;
+
+ // We will move [bStart..bEnd] but we will pick the insert location later
+ insertAfterBlk = nullptr;
+ }
+
+ // We are going to move [bStart..bEnd] so they can't be NULL
+ noway_assert(bStart != nullptr);
+ noway_assert(bEnd != nullptr);
+
+ // bEnd can't be a BBJ_CALLFINALLY unless it is a RETLESS call
+ noway_assert((bEnd->bbJumpKind != BBJ_CALLFINALLY) || (bEnd->bbFlags & BBF_RETLESS_CALL));
+
+ // bStartPrev must be set to the block that precedes bStart
+ noway_assert(bStartPrev->bbNext == bStart);
+
+ // Since we will be unlinking [bStart..bEnd],
+ // we need to compute and remember if bStart is in each of
+ // the try and handler regions
+ //
+ bool* fStartIsInTry = nullptr;
+ bool* fStartIsInHnd = nullptr;
+
+ if (compHndBBtabCount > 0)
+ {
+ fStartIsInTry = new (this, CMK_Unknown) bool[compHndBBtabCount];
+ fStartIsInHnd = new (this, CMK_Unknown) bool[compHndBBtabCount];
+
+ for (XTnum = 0, HBtab = compHndBBtab; XTnum < compHndBBtabCount; XTnum++, HBtab++)
+ {
+ fStartIsInTry[XTnum] = HBtab->InTryRegionBBRange(bStart);
+ fStartIsInHnd[XTnum] = HBtab->InHndRegionBBRange(bStart);
+ }
+ }
+
+ /* Temporarily unlink [bStart..bEnd] from the flow graph */
+ fgUnlinkRange(bStart, bEnd);
+
+ if (insertAfterBlk == nullptr)
+ {
+ // Find new location for the unlinked block(s)
+ // Set insertAfterBlk to the block which will precede the insertion point
+
+ if (!bStart->hasTryIndex() && isRare)
+ {
+ // We'll just insert the blocks at the end of the method. If the method
+ // has funclets, we will insert at the end of the main method but before
+ // any of the funclets. Note that we create funclets before we call
+ // fgReorderBlocks().
+
+ insertAfterBlk = fgLastBBInMainFunction();
+ noway_assert(insertAfterBlk != bPrev);
+ }
+ else
+ {
+ BasicBlock* startBlk;
+ BasicBlock* lastBlk;
+ EHblkDsc* ehDsc = ehInitTryBlockRange(bStart, &startBlk, &lastBlk);
+
+ BasicBlock* endBlk;
+
+ /* Setup startBlk and endBlk as the range to search */
+
+ if (ehDsc != nullptr)
+ {
+ endBlk = lastBlk->bbNext;
+
+ /*
+ Multiple (nested) try regions might start from the same BB.
+ For example,
+
+ try3 try2 try1
+ |--- |--- |--- BB01
+ | | | BB02
+ | | |--- BB03
+ | | BB04
+ | |------------ BB05
+ | BB06
+ |------------------- BB07
+
+ Now if we want to insert in try2 region, we will start with startBlk=BB01.
+ The following loop will allow us to start from startBlk==BB04.
+ */
+ while (!BasicBlock::sameTryRegion(startBlk, bStart) && (startBlk != endBlk))
+ {
+ startBlk = startBlk->bbNext;
+ }
+
+ // startBlk cannot equal endBlk as it must come before endBlk
+ if (startBlk == endBlk)
+ {
+ goto CANNOT_MOVE;
+ }
+
+ // we also can't start searching the try region at bStart
+ if (startBlk == bStart)
+ {
+ // if bEnd is the last block in the method or
+ // or if bEnd->bbNext is in a different try region
+ // then we cannot move the blocks
+ //
+ if ((bEnd->bbNext == nullptr) || !BasicBlock::sameTryRegion(startBlk, bEnd->bbNext))
+ {
+ goto CANNOT_MOVE;
+ }
+
+ startBlk = bEnd->bbNext;
+
+ // Check that the new startBlk still comes before endBlk
+
+ // startBlk cannot equal endBlk as it must come before endBlk
+ if (startBlk == endBlk)
+ {
+ goto CANNOT_MOVE;
+ }
+
+ BasicBlock* tmpBlk = startBlk;
+ while ((tmpBlk != endBlk) && (tmpBlk != nullptr))
+ {
+ tmpBlk = tmpBlk->bbNext;
+ }
+
+ // when tmpBlk is NULL that means startBlk is after endBlk
+ // so there is no way to move bStart..bEnd within the try region
+ if (tmpBlk == nullptr)
+ {
+ goto CANNOT_MOVE;
+ }
+ }
+ }
+ else
+ {
+ noway_assert(isRare == false);
+
+ /* We'll search through the entire main method */
+ startBlk = fgFirstBB;
+ endBlk = fgEndBBAfterMainFunction();
+ }
+
+ // Calculate nearBlk and jumpBlk and then call fgFindInsertPoint()
+ // to find our insertion block
+ //
+ {
+ // If the set of blocks that we are moving ends with a BBJ_ALWAYS to
+ // another [rarely run] block that comes after bPrev (forward branch)
+ // then we can set up nearBlk to eliminate this jump sometimes
+ //
+ BasicBlock* nearBlk = nullptr;
+ BasicBlock* jumpBlk = nullptr;
+
+ if ((bEnd->bbJumpKind == BBJ_ALWAYS) && (!isRare || bEnd->bbJumpDest->isRunRarely()) &&
+ fgIsForwardBranch(bEnd, bPrev))
+ {
+ // Set nearBlk to be the block in [startBlk..endBlk]
+ // such that nearBlk->bbNext == bEnd->JumpDest
+ // if no such block exists then set nearBlk to NULL
+ nearBlk = startBlk;
+ jumpBlk = bEnd;
+ do
+ {
+ // We do not want to set nearBlk to bPrev
+ // since then we will not move [bStart..bEnd]
+ //
+ if (nearBlk != bPrev)
+ {
+ // Check if nearBlk satisfies our requirement
+ if (nearBlk->bbNext == bEnd->bbJumpDest)
+ {
+ break;
+ }
+ }
+
+ // Did we reach the endBlk?
+ if (nearBlk == endBlk)
+ {
+ nearBlk = nullptr;
+ break;
+ }
+
+ // advance nearBlk to the next block
+ nearBlk = nearBlk->bbNext;
+
+ } while (nearBlk != nullptr);
+ }
+
+ // if nearBlk is NULL then we set nearBlk to be the
+ // first block that we want to insert after.
+ if (nearBlk == nullptr)
+ {
+ if (bDest != nullptr)
+ {
+ // we want to insert after bDest
+ nearBlk = bDest;
+ }
+ else
+ {
+ // we want to insert after bPrev
+ nearBlk = bPrev;
+ }
+ }
+
+ /* Set insertAfterBlk to the block which we will insert after. */
+
+ insertAfterBlk =
+ fgFindInsertPoint(bStart->bbTryIndex,
+ true, // Insert in the try region.
+ startBlk, endBlk, nearBlk, jumpBlk, bStart->bbWeight == BB_ZERO_WEIGHT);
+ }
+
+ /* See if insertAfterBlk is the same as where we started, */
+ /* or if we could not find any insertion point */
+
+ if ((insertAfterBlk == bPrev) || (insertAfterBlk == nullptr))
+ {
+ CANNOT_MOVE:;
+ /* We couldn't move the blocks, so put everything back */
+ /* relink [bStart .. bEnd] into the flow graph */
+
+ bPrev->setNext(bStart);
+ if (bEnd->bbNext)
+ {
+ bEnd->bbNext->bbPrev = bEnd;
+ }
+#ifdef DEBUG
+ if (verbose)
+ {
+ if (bStart != bEnd)
+ {
+ printf("Could not relocate blocks (BB%02u .. BB%02u)\n", bStart->bbNum, bEnd->bbNum);
+ }
+ else
+ {
+ printf("Could not relocate block BB%02u\n", bStart->bbNum);
+ }
+ }
+#endif // DEBUG
+ continue;
+ }
+ }
+ }
+
+ noway_assert(insertAfterBlk != nullptr);
+ noway_assert(bStartPrev != nullptr);
+ noway_assert(bStartPrev != insertAfterBlk);
+
+#ifdef DEBUG
+ movedBlocks = true;
+
+ if (verbose)
+ {
+ const char* msg;
+ if (bStart2 != nullptr)
+ {
+ msg = "hot";
+ }
+ else
+ {
+ if (isRare)
+ {
+ msg = "rarely run";
+ }
+ else
+ {
+ msg = "uncommon";
+ }
+ }
+
+ printf("Relocated %s ", msg);
+ if (bStart != bEnd)
+ {
+ printf("blocks (BB%02u .. BB%02u)", bStart->bbNum, bEnd->bbNum);
+ }
+ else
+ {
+ printf("block BB%02u", bStart->bbNum);
+ }
+
+ if (bPrev->bbJumpKind == BBJ_COND)
+ {
+ printf(" by reversing conditional jump at BB%02u\n", bPrev->bbNum);
+ }
+ else
+ {
+ printf("\n", bPrev->bbNum);
+ }
+ }
+#endif // DEBUG
+
+ if (bPrev->bbJumpKind == BBJ_COND)
+ {
+ /* Reverse the bPrev jump condition */
+ GenTree* condTest = bPrev->lastStmt();
+
+ condTest = condTest->gtStmt.gtStmtExpr;
+ noway_assert(condTest->gtOper == GT_JTRUE);
+
+ condTest->gtOp.gtOp1 = gtReverseCond(condTest->gtOp.gtOp1);
+
+ if (bStart2 == nullptr)
+ {
+ /* Set the new jump dest for bPrev to the rarely run or uncommon block(s) */
+ bPrev->bbJumpDest = bStart;
+ bStart->bbFlags |= (BBF_JMP_TARGET | BBF_HAS_LABEL);
+ }
+ else
+ {
+ noway_assert(insertAfterBlk == bPrev);
+ noway_assert(insertAfterBlk->bbNext == block);
+
+ /* Set the new jump dest for bPrev to the rarely run or uncommon block(s) */
+ bPrev->bbJumpDest = block;
+ block->bbFlags |= (BBF_JMP_TARGET | BBF_HAS_LABEL);
+ }
+ }
+
+ // If we are moving blocks that are at the end of a try or handler
+ // we will need to shorten ebdTryLast or ebdHndLast
+ //
+ ehUpdateLastBlocks(bEnd, bStartPrev);
+
+ // If we are moving blocks into the end of a try region or handler region
+ // we will need to extend ebdTryLast or ebdHndLast so the blocks that we
+ // are moving are part of this try or handler region.
+ //
+ for (XTnum = 0, HBtab = compHndBBtab; XTnum < compHndBBtabCount; XTnum++, HBtab++)
+ {
+ // Are we moving blocks to the end of a try region?
+ if (HBtab->ebdTryLast == insertAfterBlk)
+ {
+ if (fStartIsInTry[XTnum])
+ {
+ // bStart..bEnd is in the try, so extend the try region
+ fgSetTryEnd(HBtab, bEnd);
+ }
+ }
+
+ // Are we moving blocks to the end of a handler region?
+ if (HBtab->ebdHndLast == insertAfterBlk)
+ {
+ if (fStartIsInHnd[XTnum])
+ {
+ // bStart..bEnd is in the handler, so extend the handler region
+ fgSetHndEnd(HBtab, bEnd);
+ }
+ }
+ }
+
+ /* We have decided to insert the block(s) after 'insertAfterBlk' */
+ fgMoveBlocksAfter(bStart, bEnd, insertAfterBlk);
+
+ if (bDest)
+ {
+ /* We may need to insert an unconditional branch after bPrev to bDest */
+ fgConnectFallThrough(bPrev, bDest);
+ }
+ else
+ {
+ /* If bPrev falls through, we must insert a jump to block */
+ fgConnectFallThrough(bPrev, block);
+ }
+
+ BasicBlock* bSkip = bEnd->bbNext;
+
+ /* If bEnd falls through, we must insert a jump to bNext */
+ fgConnectFallThrough(bEnd, bNext);
+
+ if (bStart2 == nullptr)
+ {
+ /* If insertAfterBlk falls through, we are forced to */
+ /* add a jump around the block(s) we just inserted */
+ fgConnectFallThrough(insertAfterBlk, bSkip);
+ }
+ else
+ {
+ /* We may need to insert an unconditional branch after bPrev2 to bStart */
+ fgConnectFallThrough(bPrev2, bStart);
+ }
+
+#if DEBUG
+ if (verbose)
+ {
+ printf("\nAfter this change in fgReorderBlocks");
+ fgDispBasicBlocks(verboseTrees);
+ printf("\n");
+ }
+ fgVerifyHandlerTab();
+
+ // Make sure that the predecessor lists are accurate
+ if (expensiveDebugCheckLevel >= 2)
+ {
+ fgDebugCheckBBlist();
+ }
+#endif // DEBUG
+
+ // Set our iteration point 'block' to be the new bPrev->bbNext
+ // It will be used as the next bPrev
+ block = bPrev->bbNext;
+
+ } // end of for loop(bPrev,block)
+
+ bool changed = movedBlocks || newRarelyRun || optimizedSwitches;
+
+ if (changed)
+ {
+ fgNeedsUpdateFlowGraph = true;
+#if DEBUG
+ // Make sure that the predecessor lists are accurate
+ if (expensiveDebugCheckLevel >= 2)
+ {
+ fgDebugCheckBBlist();
+ }
+#endif // DEBUG
+ }
+}
+#ifdef _PREFAST_
+#pragma warning(pop)
+#endif
+
+/*-------------------------------------------------------------------------
+ *
+ * Walk the basic blocks list to determine the first block to place in the
+ * cold section. This would be the first of a series of rarely executed blocks
+ * such that no succeeding blocks are in a try region or an exception handler
+ * or are rarely executed.
+ */
+
+void Compiler::fgDetermineFirstColdBlock()
+{
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\n*************** In fgDetermineFirstColdBlock()\n");
+ }
+#endif // DEBUG
+
+ // Since we may need to create a new transistion block
+ // we assert that it is OK to create new blocks.
+ //
+ assert(fgSafeBasicBlockCreation);
+
+ fgFirstColdBlock = nullptr;
+
+#if FEATURE_STACK_FP_X87
+ if (compMayHaveTransitionBlocks)
+ {
+ opts.compProcedureSplitting = false;
+
+ // See comment above declaration of compMayHaveTransitionBlocks for comments on this
+ JITDUMP("Turning off procedure splitting for this method, as it may end up having FP transition blocks\n");
+ }
+#endif // FEATURE_STACK_FP_X87
+
+ if (!opts.compProcedureSplitting)
+ {
+ JITDUMP("No procedure splitting will be done for this method\n");
+ return;
+ }
+
+#ifdef DEBUG
+ if ((compHndBBtabCount > 0) && !opts.compProcedureSplittingEH)
+ {
+ JITDUMP("No procedure splitting will be done for this method with EH (by request)\n");
+ return;
+ }
+#endif // DEBUG
+
+#if FEATURE_EH_FUNCLETS
+ // TODO-CQ: handle hot/cold splitting in functions with EH (including synchronized methods
+ // that create EH in methods without explicit EH clauses).
+
+ if (compHndBBtabCount > 0)
+ {
+ JITDUMP("No procedure splitting will be done for this method with EH (implementation limitation)\n");
+ return;
+ }
+#endif // FEATURE_EH_FUNCLETS
+
+ BasicBlock* firstColdBlock = nullptr;
+ BasicBlock* prevToFirstColdBlock = nullptr;
+ BasicBlock* block;
+ BasicBlock* lblk;
+
+ for (lblk = nullptr, block = fgFirstBB; block != nullptr; lblk = block, block = block->bbNext)
+ {
+ bool blockMustBeInHotSection = false;
+
+#if HANDLER_ENTRY_MUST_BE_IN_HOT_SECTION
+ if (bbIsHandlerBeg(block))
+ {
+ blockMustBeInHotSection = true;
+ }
+#endif // HANDLER_ENTRY_MUST_BE_IN_HOT_SECTION
+
+ // Do we have a candidate for the first cold block?
+ if (firstColdBlock != nullptr)
+ {
+ // We have a candidate for first cold block
+
+ // Is this a hot block?
+ if (blockMustBeInHotSection || (block->isRunRarely() == false))
+ {
+ // We have to restart the search for the first cold block
+ firstColdBlock = nullptr;
+ prevToFirstColdBlock = nullptr;
+ }
+ }
+ else // (firstColdBlock == NULL)
+ {
+ // We don't have a candidate for first cold block
+
+ // Is this a cold block?
+ if (!blockMustBeInHotSection && (block->isRunRarely() == true))
+ {
+ //
+ // If the last block that was hot was a BBJ_COND
+ // then we will have to add an unconditional jump
+ // so the code size for block needs be large
+ // enough to make it worth our while
+ //
+ if ((lblk == nullptr) || (lblk->bbJumpKind != BBJ_COND) || (fgGetCodeEstimate(block) >= 8))
+ {
+ // This block is now a candidate for first cold block
+ // Also remember the predecessor to this block
+ firstColdBlock = block;
+ prevToFirstColdBlock = lblk;
+ }
+ }
+ }
+ }
+
+ if (firstColdBlock == fgFirstBB)
+ {
+ // If the first block is Cold then we can't move any blocks
+ // into the cold section
+
+ firstColdBlock = nullptr;
+ }
+
+ if (firstColdBlock != nullptr)
+ {
+ noway_assert(prevToFirstColdBlock != nullptr);
+
+ if (prevToFirstColdBlock == nullptr)
+ {
+ return; // To keep Prefast happy
+ }
+
+ // If we only have one cold block
+ // then it may not be worth it to move it
+ // into the Cold section as a jump to the
+ // Cold section is 5 bytes in size.
+ //
+ if (firstColdBlock->bbNext == nullptr)
+ {
+ // If the size of the cold block is 7 or less
+ // then we will keep it in the Hot section.
+ //
+ if (fgGetCodeEstimate(firstColdBlock) < 8)
+ {
+ firstColdBlock = nullptr;
+ goto EXIT;
+ }
+ }
+
+ // When the last Hot block fall through into the Cold section
+ // we may need to add a jump
+ //
+ if (prevToFirstColdBlock->bbFallsThrough())
+ {
+ switch (prevToFirstColdBlock->bbJumpKind)
+ {
+ default:
+ noway_assert(!"Unhandled jumpkind in fgDetermineFirstColdBlock()");
+
+ case BBJ_CALLFINALLY:
+ // A BBJ_CALLFINALLY that falls through is always followed
+ // by an empty BBJ_ALWAYS.
+ //
+ assert(prevToFirstColdBlock->isBBCallAlwaysPair());
+ firstColdBlock =
+ firstColdBlock->bbNext; // Note that this assignment could make firstColdBlock == nullptr
+ break;
+
+ case BBJ_COND:
+ //
+ // This is a slightly more complicated case, because we will
+ // probably need to insert a block to jump to the cold section.
+ //
+ if (firstColdBlock->isEmpty() && (firstColdBlock->bbJumpKind == BBJ_ALWAYS))
+ {
+ // We can just use this block as the transitionBlock
+ firstColdBlock = firstColdBlock->bbNext;
+ // Note that this assignment could make firstColdBlock == NULL
+ }
+ else
+ {
+ BasicBlock* transitionBlock = fgNewBBafter(BBJ_ALWAYS, prevToFirstColdBlock, true);
+ transitionBlock->bbJumpDest = firstColdBlock;
+ transitionBlock->inheritWeight(firstColdBlock);
+
+ noway_assert(fgComputePredsDone);
+
+ // Update the predecessor list for firstColdBlock
+ fgReplacePred(firstColdBlock, prevToFirstColdBlock, transitionBlock);
+
+ // Add prevToFirstColdBlock as a predecessor for transitionBlock
+ fgAddRefPred(transitionBlock, prevToFirstColdBlock);
+ }
+ break;
+
+ case BBJ_NONE:
+ // If the block preceding the first cold block is BBJ_NONE,
+ // convert it to BBJ_ALWAYS to force an explicit jump.
+
+ prevToFirstColdBlock->bbJumpDest = firstColdBlock;
+ prevToFirstColdBlock->bbJumpKind = BBJ_ALWAYS;
+ break;
+ }
+ }
+ }
+
+ if (firstColdBlock != nullptr)
+ {
+ firstColdBlock->bbFlags |= BBF_JMP_TARGET;
+
+ for (block = firstColdBlock; block; block = block->bbNext)
+ {
+ block->bbFlags |= BBF_COLD;
+ }
+ }
+
+EXIT:;
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ if (firstColdBlock)
+ {
+ printf("fgFirstColdBlock is BB%02u.\n", firstColdBlock->bbNum);
+ }
+ else
+ {
+ printf("fgFirstColdBlock is NULL.\n");
+ }
+
+ fgDispBasicBlocks();
+ }
+
+ fgVerifyHandlerTab();
+#endif // DEBUG
+
+ fgFirstColdBlock = firstColdBlock;
+}
+
+#ifdef _PREFAST_
+#pragma warning(push)
+#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
+#endif
+/*****************************************************************************
+ *
+ * Function called to "comb" the basic block list.
+ * Removes any empty blocks, unreachable blocks and redundant jumps.
+ * Most of those appear after dead store removal and folding of conditionals.
+ *
+ * Returns: true if the flowgraph has been modified
+ *
+ * It also compacts basic blocks
+ * (consecutive basic blocks that should in fact be one).
+ *
+ * NOTE:
+ * Debuggable code and Min Optimization JIT also introduces basic blocks
+ * but we do not optimize those!
+ */
+
+bool Compiler::fgUpdateFlowGraph(bool doTailDuplication)
+{
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\n*************** In fgUpdateFlowGraph()");
+ }
+#endif // DEBUG
+
+ /* This should never be called for debuggable code */
+
+ noway_assert(!opts.MinOpts() && !opts.compDbgCode);
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nBefore updating the flow graph:\n");
+ fgDispBasicBlocks(verboseTrees);
+ printf("\n");
+ }
+#endif // DEBUG
+
+ /* Walk all the basic blocks - look for unconditional jumps, empty blocks, blocks to compact, etc...
+ *
+ * OBSERVATION:
+ * Once a block is removed the predecessors are not accurate (assuming they were at the beginning)
+ * For now we will only use the information in bbRefs because it is easier to be updated
+ */
+
+ bool modified = false;
+ bool change;
+ do
+ {
+ change = false;
+
+ BasicBlock* block; // the current block
+ BasicBlock* bPrev = nullptr; // the previous non-worthless block
+ BasicBlock* bNext; // the successor of the current block
+ BasicBlock* bDest; // the jump target of the current block
+
+ for (block = fgFirstBB; block != nullptr; block = block->bbNext)
+ {
+ /* Some blocks may be already marked removed by other optimizations
+ * (e.g worthless loop removal), without being explicitly removed
+ * from the list.
+ */
+
+ if (block->bbFlags & BBF_REMOVED)
+ {
+ if (bPrev)
+ {
+ bPrev->setNext(block->bbNext);
+ }
+ else
+ {
+ /* WEIRD first basic block is removed - should have an assert here */
+ noway_assert(!"First basic block marked as BBF_REMOVED???");
+
+ fgFirstBB = block->bbNext;
+ }
+ continue;
+ }
+
+ /* We jump to the REPEAT label if we performed a change involving the current block
+ * This is in case there are other optimizations that can show up
+ * (e.g. - compact 3 blocks in a row)
+ * If nothing happens, we then finish the iteration and move to the next block
+ */
+
+ REPEAT:;
+
+ bNext = block->bbNext;
+ bDest = nullptr;
+
+ if (block->bbJumpKind == BBJ_ALWAYS)
+ {
+ bDest = block->bbJumpDest;
+ if (doTailDuplication && fgOptimizeUncondBranchToSimpleCond(block, bDest))
+ {
+ change = true;
+ modified = true;
+ bDest = block->bbJumpDest;
+ bNext = block->bbNext;
+ }
+ }
+
+ // Remove JUMPS to the following block
+ // and optimize any JUMPS to JUMPS
+
+ if (block->bbJumpKind == BBJ_COND || block->bbJumpKind == BBJ_ALWAYS)
+ {
+ bDest = block->bbJumpDest;
+ if (bDest == bNext)
+ {
+ if (fgOptimizeBranchToNext(block, bNext, bPrev))
+ {
+ change = true;
+ modified = true;
+ bDest = nullptr;
+ }
+ }
+ }
+
+ if (bDest != nullptr)
+ {
+ // Do we have a JUMP to an empty unconditional JUMP block?
+ if (bDest->isEmpty() && (bDest->bbJumpKind == BBJ_ALWAYS) &&
+ (bDest != bDest->bbJumpDest)) // special case for self jumps
+ {
+ if (fgOptimizeBranchToEmptyUnconditional(block, bDest))
+ {
+ change = true;
+ modified = true;
+ goto REPEAT;
+ }
+ }
+
+ // Check for a conditional branch that just skips over an empty BBJ_ALWAYS block
+
+ if ((block->bbJumpKind == BBJ_COND) && // block is a BBJ_COND block
+ (bNext != nullptr) && // block is not the last block
+ (bNext->bbRefs == 1) && // No other block jumps to bNext
+ (bNext->bbNext == bDest) && // The block after bNext is the BBJ_COND jump dest
+ (bNext->bbJumpKind == BBJ_ALWAYS) && // The next block is a BBJ_ALWAYS block
+ bNext->isEmpty() && // and it is an an empty block
+ (bNext != bNext->bbJumpDest) && // special case for self jumps
+ (bDest != fgFirstColdBlock))
+ {
+ bool optimizeJump = true;
+
+ // We do not optimize jumps between two different try regions.
+ // However jumping to a block that is not in any try region is OK
+ //
+ if (bDest->hasTryIndex() && !BasicBlock::sameTryRegion(block, bDest))
+ {
+ optimizeJump = false;
+ }
+
+ // Also consider bNext's try region
+ //
+ if (bNext->hasTryIndex() && !BasicBlock::sameTryRegion(block, bNext))
+ {
+ optimizeJump = false;
+ }
+
+ // If we are optimizing using real profile weights
+ // then don't optimize a conditional jump to an unconditional jump
+ // until after we have computed the edge weights
+ //
+ if (fgIsUsingProfileWeights())
+ {
+ // if block and bdest are in different hot/cold regions we can't do this this optimization
+ // because we can't allow fall-through into the cold region.
+ if (!fgEdgeWeightsComputed || fgInDifferentRegions(block, bDest))
+ {
+ fgNeedsUpdateFlowGraph = true;
+ optimizeJump = false;
+ }
+ }
+
+ if (optimizeJump)
+ {
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nReversing a conditional jump around an unconditional jump (BB%02u -> BB%02u -> "
+ "BB%02u)\n",
+ block->bbNum, bDest->bbNum, bNext->bbJumpDest->bbNum);
+ }
+#endif // DEBUG
+ /* Reverse the jump condition */
+
+ GenTree* test = block->lastNode();
+ noway_assert(test->gtOper == GT_JTRUE);
+
+ GenTree* cond = gtReverseCond(test->gtOp.gtOp1);
+ assert(cond == test->gtOp.gtOp1); // Ensure `gtReverseCond` did not create a new node.
+ test->gtOp.gtOp1 = cond;
+
+ // Optimize the Conditional JUMP to go to the new target
+ block->bbJumpDest = bNext->bbJumpDest;
+
+ fgAddRefPred(bNext->bbJumpDest, block, fgRemoveRefPred(bNext->bbJumpDest, bNext));
+
+ /*
+ Unlink bNext from the BasicBlock list; note that we can
+ do this even though other blocks could jump to it - the
+ reason is that elsewhere in this function we always
+ redirect jumps to jumps to jump to the final label,
+ so even if another block jumps to bNext it won't matter
+ once we're done since any such jump will be redirected
+ to the final target by the time we're done here.
+ */
+
+ fgRemoveRefPred(bNext, block);
+ fgUnlinkBlock(bNext);
+
+ /* Mark the block as removed */
+ bNext->bbFlags |= BBF_REMOVED;
+
+ // If this is the first Cold basic block update fgFirstColdBlock
+ if (bNext == fgFirstColdBlock)
+ {
+ fgFirstColdBlock = bNext->bbNext;
+ }
+
+ //
+ // If we removed the end of a try region or handler region
+ // we will need to update ebdTryLast or ebdHndLast.
+ //
+
+ EHblkDsc* HBtab;
+ EHblkDsc* HBtabEnd;
+
+ for (HBtab = compHndBBtab, HBtabEnd = compHndBBtab + compHndBBtabCount; HBtab < HBtabEnd;
+ HBtab++)
+ {
+ if ((HBtab->ebdTryLast == bNext) || (HBtab->ebdHndLast == bNext))
+ {
+ fgSkipRmvdBlocks(HBtab);
+ }
+ }
+
+ // we optimized this JUMP - goto REPEAT to catch similar cases
+ change = true;
+ modified = true;
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nAfter reversing the jump:\n");
+ fgDispBasicBlocks(verboseTrees);
+ }
+#endif // DEBUG
+
+ /*
+ For a rare special case we cannot jump to REPEAT
+ as jumping to REPEAT will cause us to delete 'block'
+ because it currently appears to be unreachable. As
+ it is a self loop that only has a single bbRef (itself)
+ However since the unlinked bNext has additional bbRefs
+ (that we will later connect to 'block'), it is not really
+ unreachable.
+ */
+ if ((bNext->bbRefs > 0) && (bNext->bbJumpDest == block) && (block->bbRefs == 1))
+ {
+ continue;
+ }
+
+ goto REPEAT;
+ }
+ }
+ }
+
+ //
+ // Update the switch jump table such that it follows jumps to jumps:
+ //
+ if (block->bbJumpKind == BBJ_SWITCH)
+ {
+ if (fgOptimizeSwitchBranches(block))
+ {
+ change = true;
+ modified = true;
+ goto REPEAT;
+ }
+ }
+
+ noway_assert(!(block->bbFlags & BBF_REMOVED));
+
+ /* COMPACT blocks if possible */
+
+ if (fgCanCompactBlocks(block, bNext))
+ {
+ fgCompactBlocks(block, bNext);
+
+ /* we compacted two blocks - goto REPEAT to catch similar cases */
+ change = true;
+ modified = true;
+ goto REPEAT;
+ }
+
+ /* Remove unreachable or empty blocks - do not consider blocks marked BBF_DONT_REMOVE or genReturnBB block
+ * These include first and last block of a TRY, exception handlers and RANGE_CHECK_FAIL THROW blocks */
+
+ if ((block->bbFlags & BBF_DONT_REMOVE) == BBF_DONT_REMOVE || block == genReturnBB)
+ {
+ bPrev = block;
+ continue;
+ }
+
+#if FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
+ // Don't remove the BBJ_ALWAYS block of a BBJ_CALLFINALLY/BBJ_ALWAYS pair.
+ if (block->countOfInEdges() == 0 && bPrev->bbJumpKind == BBJ_CALLFINALLY)
+ {
+ assert(bPrev->isBBCallAlwaysPair());
+ noway_assert(!(bPrev->bbFlags & BBF_RETLESS_CALL));
+ noway_assert(block->bbJumpKind == BBJ_ALWAYS);
+ bPrev = block;
+ continue;
+ }
+#endif // FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
+
+ noway_assert(!block->bbCatchTyp);
+ noway_assert(!(block->bbFlags & BBF_TRY_BEG));
+
+ /* Remove unreachable blocks
+ *
+ * We'll look for blocks that have countOfInEdges() = 0 (blocks may become
+ * unreachable due to a BBJ_ALWAYS introduced by conditional folding for example)
+ */
+
+ if (block->countOfInEdges() == 0)
+ {
+ /* no references -> unreachable - remove it */
+ /* For now do not update the bbNum, do it at the end */
+
+ fgRemoveBlock(block, true);
+
+ change = true;
+ modified = true;
+
+ /* we removed the current block - the rest of the optimizations won't have a target
+ * continue with the next one */
+
+ continue;
+ }
+ else if (block->countOfInEdges() == 1)
+ {
+ switch (block->bbJumpKind)
+ {
+ case BBJ_COND:
+ case BBJ_ALWAYS:
+ if (block->bbJumpDest == block)
+ {
+ fgRemoveBlock(block, true);
+
+ change = true;
+ modified = true;
+
+ /* we removed the current block - the rest of the optimizations
+ * won't have a target so continue with the next block */
+
+ continue;
+ }
+ break;
+
+ default:
+ break;
+ }
+ }
+
+ noway_assert(!(block->bbFlags & BBF_REMOVED));
+
+ /* Remove EMPTY blocks */
+
+ if (block->isEmpty())
+ {
+ assert(bPrev == block->bbPrev);
+ if (fgOptimizeEmptyBlock(block))
+ {
+ change = true;
+ modified = true;
+ }
+
+ /* Have we removed the block? */
+
+ if (block->bbFlags & BBF_REMOVED)
+ {
+ /* block was removed - no change to bPrev */
+ continue;
+ }
+ }
+
+ /* Set the predecessor of the last reachable block
+ * If we removed the current block, the predecessor remains unchanged
+ * otherwise, since the current block is ok, it becomes the predecessor */
+
+ noway_assert(!(block->bbFlags & BBF_REMOVED));
+
+ bPrev = block;
+ }
+ } while (change);
+
+ fgNeedsUpdateFlowGraph = false;
+
+#ifdef DEBUG
+ if (verbose && modified)
+ {
+ printf("\nAfter updating the flow graph:\n");
+ fgDispBasicBlocks(verboseTrees);
+ fgDispHandlerTab();
+ }
+
+ if (compRationalIRForm)
+ {
+ for (BasicBlock* block = fgFirstBB; block != nullptr; block = block->bbNext)
+ {
+ LIR::AsRange(block).CheckLIR(this);
+ }
+ }
+
+ fgVerifyHandlerTab();
+ // Make sure that the predecessor lists are accurate
+ fgDebugCheckBBlist();
+ fgDebugCheckUpdate();
+#endif // DEBUG
+
+ return modified;
+}
+#ifdef _PREFAST_
+#pragma warning(pop)
+#endif
+
+/*****************************************************************************
+ * Check that the flow graph is really updated
+ */
+
+#ifdef DEBUG
+
+void Compiler::fgDebugCheckUpdate()
+{
+ if (!compStressCompile(STRESS_CHK_FLOW_UPDATE, 30))
+ {
+ return;
+ }
+
+ /* We check for these conditions:
+ * no unreachable blocks -> no blocks have countOfInEdges() = 0
+ * no empty blocks -> no blocks have bbTreeList = 0
+ * no un-imported blocks -> no blocks have BBF_IMPORTED not set (this is
+ * kind of redundand with the above, but to make sure)
+ * no un-compacted blocks -> BBJ_NONE followed by block with no jumps to it (countOfInEdges() = 1)
+ */
+
+ BasicBlock* prev;
+ BasicBlock* block;
+ for (prev = nullptr, block = fgFirstBB; block != nullptr; prev = block, block = block->bbNext)
+ {
+ /* no unreachable blocks */
+
+ if ((block->countOfInEdges() == 0) && !(block->bbFlags & BBF_DONT_REMOVE)
+#if FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
+ // With funclets, we never get rid of the BBJ_ALWAYS part of a BBJ_CALLFINALLY/BBJ_ALWAYS pair,
+ // even if we can prove that the finally block never returns.
+ && (prev == NULL || block->bbJumpKind != BBJ_ALWAYS || !prev->isBBCallAlwaysPair())
+#endif // FEATURE_EH_FUNCLETS
+ )
+ {
+ noway_assert(!"Unreachable block not removed!");
+ }
+
+ /* no empty blocks */
+
+ if (block->isEmpty() && !(block->bbFlags & BBF_DONT_REMOVE))
+ {
+ switch (block->bbJumpKind)
+ {
+ case BBJ_CALLFINALLY:
+ case BBJ_EHFINALLYRET:
+ case BBJ_EHFILTERRET:
+ case BBJ_RETURN:
+ /* for BBJ_ALWAYS is probably just a GOTO, but will have to be treated */
+ case BBJ_ALWAYS:
+ case BBJ_EHCATCHRET:
+ /* These jump kinds are allowed to have empty tree lists */
+ break;
+
+ default:
+ /* it may be the case that the block had more than one reference to it
+ * so we couldn't remove it */
+
+ if (block->countOfInEdges() == 0)
+ {
+ noway_assert(!"Empty block not removed!");
+ }
+ break;
+ }
+ }
+
+ /* no un-imported blocks */
+
+ if (!(block->bbFlags & BBF_IMPORTED))
+ {
+ /* internal blocks do not count */
+
+ if (!(block->bbFlags & BBF_INTERNAL))
+ {
+ noway_assert(!"Non IMPORTED block not removed!");
+ }
+ }
+
+ bool prevIsCallAlwaysPair = ((prev != nullptr) && prev->isBBCallAlwaysPair());
+
+ // Check for an unnecessary jumps to the next block
+ bool doAssertOnJumpToNextBlock = false; // unless we have a BBJ_COND or BBJ_ALWAYS we can not assert
+
+ if (block->bbJumpKind == BBJ_COND)
+ {
+ // A conditional branch should never jump to the next block
+ // as it can be folded into a BBJ_NONE;
+ doAssertOnJumpToNextBlock = true;
+ }
+ else if (block->bbJumpKind == BBJ_ALWAYS)
+ {
+ // Generally we will want to assert if a BBJ_ALWAYS branches to the next block
+ doAssertOnJumpToNextBlock = true;
+
+ // If the BBF_KEEP_BBJ_ALWAYS flag is set we allow it to jump to the next block
+ if (block->bbFlags & BBF_KEEP_BBJ_ALWAYS)
+ {
+ doAssertOnJumpToNextBlock = false;
+ }
+
+ // A call/always pair is also allowed to jump to the next block
+ if (prevIsCallAlwaysPair)
+ {
+ doAssertOnJumpToNextBlock = false;
+ }
+
+ // We are allowed to have a branch from a hot 'block' to a cold 'bbNext'
+ //
+ if ((block->bbNext != nullptr) && fgInDifferentRegions(block, block->bbNext))
+ {
+ doAssertOnJumpToNextBlock = false;
+ }
+ }
+
+ if (doAssertOnJumpToNextBlock)
+ {
+ if (block->bbJumpDest == block->bbNext)
+ {
+ noway_assert(!"Unnecessary jump to the next block!");
+ }
+ }
+
+ /* Make sure BBF_KEEP_BBJ_ALWAYS is set correctly */
+
+ if ((block->bbJumpKind == BBJ_ALWAYS) && prevIsCallAlwaysPair)
+ {
+ noway_assert(block->bbFlags & BBF_KEEP_BBJ_ALWAYS);
+ }
+
+ /* For a BBJ_CALLFINALLY block we make sure that we are followed by */
+ /* an BBJ_ALWAYS block with BBF_INTERNAL set */
+ /* or that it's a BBF_RETLESS_CALL */
+ if (block->bbJumpKind == BBJ_CALLFINALLY)
+ {
+ assert((block->bbFlags & BBF_RETLESS_CALL) || block->isBBCallAlwaysPair());
+ }
+
+ /* no un-compacted blocks */
+
+ if (fgCanCompactBlocks(block, block->bbNext))
+ {
+ noway_assert(!"Found un-compacted blocks!");
+ }
+ }
+}
+
+#endif // DEBUG
+
+/*****************************************************************************
+ * We've inserted a new block before 'block' that should be part of the same EH region as 'block'.
+ * Update the EH table to make this so. Also, set the new block to have the right EH region data
+ * (copy the bbTryIndex, bbHndIndex, and bbCatchTyp from 'block' to the new predecessor, and clear
+ * 'bbCatchTyp' from 'block').
+ */
+void Compiler::fgExtendEHRegionBefore(BasicBlock* block)
+{
+ assert(block->bbPrev != nullptr);
+
+ BasicBlock* bPrev = block->bbPrev;
+
+ bPrev->copyEHRegion(block);
+
+ // The first block (and only the first block) of a handler has bbCatchTyp set
+ bPrev->bbCatchTyp = block->bbCatchTyp;
+ block->bbCatchTyp = BBCT_NONE;
+
+ EHblkDsc* HBtab;
+ EHblkDsc* HBtabEnd;
+
+ for (HBtab = compHndBBtab, HBtabEnd = compHndBBtab + compHndBBtabCount; HBtab < HBtabEnd; HBtab++)
+ {
+ /* Multiple pointers in EHblkDsc can point to same block. We can not early out after the first match. */
+ if (HBtab->ebdTryBeg == block)
+ {
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("EH#%u: New first block of try: BB%02u\n", ehGetIndex(HBtab), bPrev->bbNum);
+ }
+#endif // DEBUG
+ HBtab->ebdTryBeg = bPrev;
+ bPrev->bbFlags |= BBF_TRY_BEG | BBF_DONT_REMOVE | BBF_HAS_LABEL;
+ // clear the TryBeg flag unless it begins another try region
+ if (!bbIsTryBeg(block))
+ {
+ block->bbFlags &= ~BBF_TRY_BEG;
+ }
+ }
+
+ if (HBtab->ebdHndBeg == block)
+ {
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("EH#%u: New first block of handler: BB%02u\n", ehGetIndex(HBtab), bPrev->bbNum);
+ }
+#endif // DEBUG
+
+ // The first block of a handler has an artificial extra refcount. Transfer that to the new block.
+ assert(block->bbRefs > 0);
+ block->bbRefs--;
+
+ HBtab->ebdHndBeg = bPrev;
+ bPrev->bbFlags |= BBF_DONT_REMOVE | BBF_HAS_LABEL;
+ bPrev->bbRefs++;
+
+ // If this is a handler for a filter, the last block of the filter will end with
+ // a BBJ_EJFILTERRET block that has a bbJumpDest that jumps to the first block of
+ // it's handler. So we need to update it to keep things in sync.
+ //
+ if (HBtab->HasFilter())
+ {
+ BasicBlock* bFilterLast = HBtab->BBFilterLast();
+ assert(bFilterLast != nullptr);
+ assert(bFilterLast->bbJumpKind == BBJ_EHFILTERRET);
+ assert(bFilterLast->bbJumpDest == block);
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("EH#%u: Updating bbJumpDest for filter ret block: BB%02u => BB%02u\n", ehGetIndex(HBtab),
+ bFilterLast->bbNum, bPrev->bbNum);
+ }
+#endif // DEBUG
+ // Change the bbJumpDest for bFilterLast from the old first 'block' to the new first 'bPrev'
+ bFilterLast->bbJumpDest = bPrev;
+ }
+ }
+
+ if (HBtab->HasFilter() && (HBtab->ebdFilter == block))
+ {
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("EH#%u: New first block of filter: BB%02u\n", ehGetIndex(HBtab), bPrev->bbNum);
+ }
+#endif // DEBUG
+
+ // The first block of a filter has an artificial extra refcount. Transfer that to the new block.
+ assert(block->bbRefs > 0);
+ block->bbRefs--;
+
+ HBtab->ebdFilter = bPrev;
+ bPrev->bbFlags |= BBF_DONT_REMOVE | BBF_HAS_LABEL;
+ bPrev->bbRefs++;
+ }
+ }
+}
+
+/*****************************************************************************
+ * We've inserted a new block after 'block' that should be part of the same EH region as 'block'.
+ * Update the EH table to make this so. Also, set the new block to have the right EH region data.
+ */
+
+void Compiler::fgExtendEHRegionAfter(BasicBlock* block)
+{
+ BasicBlock* newBlk = block->bbNext;
+ assert(newBlk != nullptr);
+
+ newBlk->copyEHRegion(block);
+ newBlk->bbCatchTyp =
+ BBCT_NONE; // Only the first block of a catch has this set, and 'newBlk' can't be the first block of a catch.
+
+ // TODO-Throughput: if the block is not in an EH region, then we don't need to walk the EH table looking for 'last'
+ // block pointers to update.
+ ehUpdateLastBlocks(block, newBlk);
+}
+
+/*****************************************************************************
+ *
+ * Insert a BasicBlock before the given block.
+ */
+
+BasicBlock* Compiler::fgNewBBbefore(BBjumpKinds jumpKind, BasicBlock* block, bool extendRegion)
+{
+ // Create a new BasicBlock and chain it in
+
+ BasicBlock* newBlk = bbNewBasicBlock(jumpKind);
+ newBlk->bbFlags |= BBF_INTERNAL;
+
+ fgInsertBBbefore(block, newBlk);
+
+ newBlk->bbRefs = 0;
+
+ if (newBlk->bbFallsThrough() && block->isRunRarely())
+ {
+ newBlk->bbSetRunRarely();
+ }
+
+ if (extendRegion)
+ {
+ fgExtendEHRegionBefore(block);
+ }
+ else
+ {
+ // When extendRegion is false the caller is responsible for setting these two values
+ newBlk->setTryIndex(MAX_XCPTN_INDEX); // Note: this is still a legal index, just unlikely
+ newBlk->setHndIndex(MAX_XCPTN_INDEX); // Note: this is still a legal index, just unlikely
+ }
+
+ // We assume that if the block we are inserting before is in the cold region, then this new
+ // block will also be in the cold region.
+ newBlk->bbFlags |= (block->bbFlags & BBF_COLD);
+
+ return newBlk;
+}
+
+/*****************************************************************************
+ *
+ * Insert a BasicBlock after the given block.
+ */
+
+BasicBlock* Compiler::fgNewBBafter(BBjumpKinds jumpKind, BasicBlock* block, bool extendRegion)
+{
+ // Create a new BasicBlock and chain it in
+
+ BasicBlock* newBlk = bbNewBasicBlock(jumpKind);
+ newBlk->bbFlags |= BBF_INTERNAL;
+
+ fgInsertBBafter(block, newBlk);
+
+ newBlk->bbRefs = 0;
+
+ if (block->bbFallsThrough() && block->isRunRarely())
+ {
+ newBlk->bbSetRunRarely();
+ }
+
+ if (extendRegion)
+ {
+ fgExtendEHRegionAfter(block);
+ }
+ else
+ {
+ // When extendRegion is false the caller is responsible for setting these two values
+ newBlk->setTryIndex(MAX_XCPTN_INDEX); // Note: this is still a legal index, just unlikely
+ newBlk->setHndIndex(MAX_XCPTN_INDEX); // Note: this is still a legal index, just unlikely
+ }
+
+ // If the new block is in the cold region (because the block we are inserting after
+ // is in the cold region), mark it as such.
+ newBlk->bbFlags |= (block->bbFlags & BBF_COLD);
+
+ return newBlk;
+}
+
+/*****************************************************************************
+ * Inserts basic block before existing basic block.
+ *
+ * If insertBeforeBlk is in the funclet region, then newBlk will be in the funclet region.
+ * (If insertBeforeBlk is the first block of the funclet region, then 'newBlk' will be the
+ * new first block of the funclet region.)
+ */
+void Compiler::fgInsertBBbefore(BasicBlock* insertBeforeBlk, BasicBlock* newBlk)
+{
+ if (insertBeforeBlk->bbPrev)
+ {
+ fgInsertBBafter(insertBeforeBlk->bbPrev, newBlk);
+ }
+ else
+ {
+ newBlk->setNext(fgFirstBB);
+
+ fgFirstBB = newBlk;
+ newBlk->bbPrev = nullptr;
+ }
+
+#if FEATURE_EH_FUNCLETS
+
+ /* Update fgFirstFuncletBB if insertBeforeBlk is the first block of the funclet region. */
+
+ if (fgFirstFuncletBB == insertBeforeBlk)
+ {
+ fgFirstFuncletBB = newBlk;
+ }
+
+#endif // FEATURE_EH_FUNCLETS
+}
+
+/*****************************************************************************
+ * Inserts basic block after existing basic block.
+ *
+ * If insertBeforeBlk is in the funclet region, then newBlk will be in the funclet region.
+ * (It can't be used to insert a block as the first block of the funclet region).
+ */
+void Compiler::fgInsertBBafter(BasicBlock* insertAfterBlk, BasicBlock* newBlk)
+{
+ newBlk->bbNext = insertAfterBlk->bbNext;
+
+ if (insertAfterBlk->bbNext)
+ {
+ insertAfterBlk->bbNext->bbPrev = newBlk;
+ }
+
+ insertAfterBlk->bbNext = newBlk;
+ newBlk->bbPrev = insertAfterBlk;
+
+ if (fgLastBB == insertAfterBlk)
+ {
+ fgLastBB = newBlk;
+ assert(fgLastBB->bbNext == nullptr);
+ }
+}
+
+// We have two edges (bAlt => bCur) and (bCur => bNext).
+//
+// Returns true if the weight of (bAlt => bCur)
+// is greater than the weight of (bCur => bNext).
+// We compare the edge weights if we have valid edge weights
+// otherwise we compare blocks weights.
+//
+bool Compiler::fgIsBetterFallThrough(BasicBlock* bCur, BasicBlock* bAlt)
+{
+ // bCur can't be NULL and must be a fall through bbJumpKind
+ noway_assert(bCur != nullptr);
+ noway_assert(bCur->bbFallsThrough());
+ noway_assert(bAlt != nullptr);
+
+ // We only handle the cases when bAlt is a BBJ_ALWAYS or a BBJ_COND
+ if ((bAlt->bbJumpKind != BBJ_ALWAYS) && (bAlt->bbJumpKind != BBJ_COND))
+ {
+ return false;
+ }
+
+ // if bAlt doesn't jump to bCur it can't be a better fall through than bCur
+ if (bAlt->bbJumpDest != bCur)
+ {
+ return false;
+ }
+
+ // Currently bNext is the fall through for bCur
+ BasicBlock* bNext = bCur->bbNext;
+ noway_assert(bNext != nullptr);
+
+ // We will set result to true if bAlt is a better fall through than bCur
+ bool result;
+ if (fgHaveValidEdgeWeights)
+ {
+ // We will compare the edge weight for our two choices
+ flowList* edgeFromAlt = fgGetPredForBlock(bCur, bAlt);
+ flowList* edgeFromCur = fgGetPredForBlock(bNext, bCur);
+ noway_assert(edgeFromCur != nullptr);
+ noway_assert(edgeFromAlt != nullptr);
+
+ result = (edgeFromAlt->flEdgeWeightMin > edgeFromCur->flEdgeWeightMax);
+ }
+ else
+ {
+ if (bAlt->bbJumpKind == BBJ_ALWAYS)
+ {
+ // Our result is true if bAlt's weight is more than bCur's weight
+ result = (bAlt->bbWeight > bCur->bbWeight);
+ }
+ else
+ {
+ noway_assert(bAlt->bbJumpKind == BBJ_COND);
+ // Our result is true if bAlt's weight is more than twice bCur's weight
+ result = (bAlt->bbWeight > (2 * bCur->bbWeight));
+ }
+ }
+ return result;
+}
+
+//------------------------------------------------------------------------
+// fgCheckEHCanInsertAfterBlock: Determine if a block can be inserted after
+// 'blk' and legally be put in the EH region specified by 'regionIndex'. This
+// can be true if the most nested region the block is in is already 'regionIndex',
+// as we'll just extend the most nested region (and any region ending at the same block).
+// It can also be true if it is the end of (a set of) EH regions, such that
+// inserting the block and properly extending some EH regions (if necessary)
+// puts the block in the correct region. We only consider the case of extending
+// an EH region after 'blk' (that is, to include 'blk' and the newly insert block);
+// we don't consider inserting a block as the the first block of an EH region following 'blk'.
+//
+// Consider this example:
+//
+// try3 try2 try1
+// |--- | | BB01
+// | |--- | BB02
+// | | |--- BB03
+// | | | BB04
+// | |--- |--- BB05
+// | BB06
+// |----------------- BB07
+//
+// Passing BB05 and try1/try2/try3 as the region to insert into (as well as putInTryRegion==true)
+// will all return 'true'. Here are the cases:
+// 1. Insert into try1: the most nested EH region BB05 is in is already try1, so we can insert after
+// it and extend try1 (and try2).
+// 2. Insert into try2: we can extend try2, but leave try1 alone.
+// 3. Insert into try3: we can leave try1 and try2 alone, and put the new block just in try3. Note that
+// in this case, after we "loop outwards" in the EH nesting, we get to a place where we're in the middle
+// of the try3 region, not at the end of it.
+// In all cases, it is possible to put a block after BB05 and put it in any of these three 'try' regions legally.
+//
+// Filters are ignored; if 'blk' is in a filter, the answer will be false.
+//
+// Arguments:
+// blk - the BasicBlock we are checking to see if we can insert after.
+// regionIndex - the EH region we want to insert a block into. regionIndex is
+// in the range [0..compHndBBtabCount]; 0 means "main method".
+// putInTryRegion - 'true' if the new block should be inserted in the 'try' region of 'regionIndex'.
+// For regionIndex 0 (the "main method"), this should be 'true'.
+//
+// Return Value:
+// 'true' if a block can be inserted after 'blk' and put in EH region 'regionIndex', else 'false'.
+//
+bool Compiler::fgCheckEHCanInsertAfterBlock(BasicBlock* blk, unsigned regionIndex, bool putInTryRegion)
+{
+ assert(blk != nullptr);
+ assert(regionIndex <= compHndBBtabCount);
+
+ if (regionIndex == 0)
+ {
+ assert(putInTryRegion);
+ }
+
+ bool inTryRegion;
+ unsigned nestedRegionIndex = ehGetMostNestedRegionIndex(blk, &inTryRegion);
+
+ bool insertOK = true;
+ for (;;)
+ {
+ if (nestedRegionIndex == regionIndex)
+ {
+ // This block is in the region we want to be in. We can insert here if it's the right type of region.
+ // (If we want to be in the 'try' region, but the block is in the handler region, then inserting a
+ // new block after 'blk' can't put it in the 'try' region, and vice-versa, since we only consider
+ // extending regions after, not prepending to regions.)
+ // This check will be 'true' if we are trying to put something in the main function (as putInTryRegion
+ // must be 'true' if regionIndex is zero, and inTryRegion will also be 'true' if nestedRegionIndex is zero).
+ insertOK = (putInTryRegion == inTryRegion);
+ break;
+ }
+ else if (nestedRegionIndex == 0)
+ {
+ // The block is in the main function, but we want to put something in a nested region. We can't do that.
+ insertOK = false;
+ break;
+ }
+
+ assert(nestedRegionIndex > 0);
+ EHblkDsc* ehDsc = ehGetDsc(nestedRegionIndex - 1); // ehGetDsc uses [0..compHndBBtabCount) form.
+
+ if (inTryRegion)
+ {
+ if (blk != ehDsc->ebdTryLast)
+ {
+ // Not the last block? Then it must be somewhere else within the try region, so we can't insert here.
+ insertOK = false;
+ break; // exit the 'for' loop
+ }
+ }
+ else
+ {
+ // We ignore filters.
+ if (blk != ehDsc->ebdHndLast)
+ {
+ // Not the last block? Then it must be somewhere else within the handler region, so we can't insert
+ // here.
+ insertOK = false;
+ break; // exit the 'for' loop
+ }
+ }
+
+ // Things look good for this region; check the enclosing regions, if any.
+
+ nestedRegionIndex =
+ ehGetEnclosingRegionIndex(nestedRegionIndex - 1,
+ &inTryRegion); // ehGetEnclosingRegionIndex uses [0..compHndBBtabCount) form.
+
+ // Convert to [0..compHndBBtabCount] form.
+ nestedRegionIndex = (nestedRegionIndex == EHblkDsc::NO_ENCLOSING_INDEX) ? 0 : nestedRegionIndex + 1;
+ } // end of for(;;)
+
+ return insertOK;
+}
+
+//------------------------------------------------------------------------
+// Finds the block closest to endBlk in the range [startBlk..endBlk) after which a block can be
+// inserted easily. Note that endBlk cannot be returned; its predecessor is the last block that can
+// be returned. The new block will be put in an EH region described by the arguments regionIndex,
+// putInTryRegion, startBlk, and endBlk (explained below), so it must be legal to place to put the
+// new block after the insertion location block, give it the specified EH region index, and not break
+// EH nesting rules. This function is careful to choose a block in the correct EH region. However,
+// it assumes that the new block can ALWAYS be placed at the end (just before endBlk). That means
+// that the caller must ensure that is true.
+//
+// Below are the possible cases for the arguments to this method:
+// 1. putInTryRegion == true and regionIndex > 0:
+// Search in the try region indicated by regionIndex.
+// 2. putInTryRegion == false and regionIndex > 0:
+// a. If startBlk is the first block of a filter and endBlk is the block after the end of the
+// filter (that is, the startBlk and endBlk match a filter bounds exactly), then choose a
+// location within this filter region. (Note that, due to IL rules, filters do not have any
+// EH nested within them.) Otherwise, filters are skipped.
+// b. Else, search in the handler region indicated by regionIndex.
+// 3. regionIndex = 0:
+// Search in the entire main method, excluding all EH regions. In this case, putInTryRegion must be true.
+//
+// This method makes sure to find an insertion point which would not cause the inserted block to
+// be put inside any inner try/filter/handler regions.
+//
+// The actual insertion occurs after the returned block. Note that the returned insertion point might
+// be the last block of a more nested EH region, because the new block will be inserted after the insertion
+// point, and will not extend the more nested EH region. For example:
+//
+// try3 try2 try1
+// |--- | | BB01
+// | |--- | BB02
+// | | |--- BB03
+// | | | BB04
+// | |--- |--- BB05
+// | BB06
+// |----------------- BB07
+//
+// for regionIndex==try3, putInTryRegion==true, we might return BB05, even though BB05 will have a try index
+// for try1 (the most nested 'try' region the block is in). That's because when we insert after BB05, the new
+// block will be in the correct, desired EH region, since try1 and try2 regions will not be extended to include
+// the inserted block. Furthermore, for regionIndex==try2, putInTryRegion==true, we can also return BB05. In this
+// case, when the new block is inserted, the try1 region remains the same, but we need extend region 'try2' to
+// include the inserted block. (We also need to check all parent regions as well, just in case any parent regions
+// also end on the same block, in which case we would also need to extend the parent regions. This is standard
+// procedure when inserting a block at the end of an EH region.)
+//
+// If nearBlk is non-nullptr then we return the closest block after nearBlk that will work best.
+//
+// We try to find a block in the appropriate region that is not a fallthrough block, so we can insert after it
+// without the need to insert a jump around the inserted block.
+//
+// Note that regionIndex is numbered the same as BasicBlock::bbTryIndex and BasicBlock::bbHndIndex, that is, "0" is
+// "main method" and otherwise is +1 from normal, so we can call, e.g., ehGetDsc(tryIndex - 1).
+//
+// Arguments:
+// regionIndex - the region index where the new block will be inserted. Zero means entire method;
+// non-zero means either a "try" or a "handler" region, depending on what putInTryRegion says.
+// putInTryRegion - 'true' to put the block in the 'try' region corresponding to 'regionIndex', 'false'
+// to put the block in the handler region. Should be 'true' if regionIndex==0.
+// startBlk - start block of range to search.
+// endBlk - end block of range to search (don't include this block in the range). Can be nullptr to indicate
+// the end of the function.
+// nearBlk - If non-nullptr, try to find an insertion location closely after this block. If nullptr, we insert
+// at the best location found towards the end of the acceptable block range.
+// jumpBlk - When nearBlk is set, this can be set to the block which jumps to bNext->bbNext (TODO: need to review
+// this?)
+// runRarely - true if the block being inserted is expected to be rarely run. This helps determine
+// the best place to put the new block, by putting in a place that has the same 'rarely run' characteristic.
+//
+// Return Value:
+// A block with the desired characteristics, so the new block will be inserted after this one.
+// If there is no suitable location, return nullptr. This should basically never happen.
+
+BasicBlock* Compiler::fgFindInsertPoint(unsigned regionIndex,
+ bool putInTryRegion,
+ BasicBlock* startBlk,
+ BasicBlock* endBlk,
+ BasicBlock* nearBlk,
+ BasicBlock* jumpBlk,
+ bool runRarely)
+{
+ noway_assert(startBlk != nullptr);
+ noway_assert(startBlk != endBlk);
+ noway_assert((regionIndex == 0 && putInTryRegion) || // Search in the main method
+ (putInTryRegion && regionIndex > 0 &&
+ startBlk->bbTryIndex == regionIndex) || // Search in the specified try region
+ (!putInTryRegion && regionIndex > 0 &&
+ startBlk->bbHndIndex == regionIndex)); // Search in the specified handler region
+
+#ifdef DEBUG
+ // Assert that startBlk precedes endBlk in the block list.
+ // We don't want to use bbNum to assert this condition, as we cannot depend on the block numbers being
+ // sequential at all times.
+ for (BasicBlock* b = startBlk; b != endBlk; b = b->bbNext)
+ {
+ assert(b != nullptr); // We reached the end of the block list, but never found endBlk.
+ }
+#endif // DEBUG
+
+ JITDUMP("fgFindInsertPoint(regionIndex=%u, putInTryRegion=%s, startBlk=BB%02u, endBlk=BB%02u, nearBlk=BB%02u, "
+ "jumpBlk=BB%02u, runRarely=%s)\n",
+ regionIndex, dspBool(putInTryRegion), startBlk->bbNum, (endBlk == nullptr) ? 0 : endBlk->bbNum,
+ (nearBlk == nullptr) ? 0 : nearBlk->bbNum, (jumpBlk == nullptr) ? 0 : jumpBlk->bbNum, dspBool(runRarely));
+
+ bool reachedNear = false; // Have we reached 'nearBlk' in our search? If not, we'll keep searching.
+ bool inFilter = false; // Are we in a filter region that we need to skip?
+ BasicBlock* bestBlk =
+ nullptr; // Set to the best insertion point we've found so far that meets all the EH requirements.
+ BasicBlock* goodBlk =
+ nullptr; // Set to an acceptable insertion point that we'll use if we don't find a 'best' option.
+ BasicBlock* blk;
+
+ if (nearBlk != nullptr)
+ {
+ // Does the nearBlk precede the startBlk?
+ for (blk = nearBlk; blk != nullptr; blk = blk->bbNext)
+ {
+ if (blk == startBlk)
+ {
+ reachedNear = true;
+ break;
+ }
+ else if (blk == endBlk)
+ {
+ break;
+ }
+ }
+ }
+
+ for (blk = startBlk; blk != endBlk; blk = blk->bbNext)
+ {
+ // The only way (blk == nullptr) could be true is if the caller passed an endBlk that preceded startBlk in the
+ // block list, or if endBlk isn't in the block list at all. In DEBUG, we'll instead hit the similar
+ // well-formedness assert earlier in this function.
+ noway_assert(blk != nullptr);
+
+ if (blk == nearBlk)
+ {
+ reachedNear = true;
+ }
+
+ if (blk->bbCatchTyp == BBCT_FILTER)
+ {
+ // Record the fact that we entered a filter region, so we don't insert into filters...
+ // Unless the caller actually wanted the block inserted in this exact filter region.
+ // Detect this by the fact that startBlk and endBlk point to the filter begin and end.
+ if (putInTryRegion || (blk != startBlk) || (startBlk != ehGetDsc(regionIndex - 1)->ebdFilter) ||
+ (endBlk != ehGetDsc(regionIndex - 1)->ebdHndBeg))
+ {
+ inFilter = true;
+ }
+ }
+ else if (blk->bbCatchTyp == BBCT_FILTER_HANDLER)
+ {
+ // Record the fact that we exited a filter region.
+ inFilter = false;
+ }
+
+ // Don't insert a block inside this filter region.
+ if (inFilter)
+ {
+ continue;
+ }
+
+ // Note that the new block will be inserted AFTER "blk". We check to make sure that doing so
+ // would put the block in the correct EH region. We make an assumption here that you can
+ // ALWAYS insert the new block before "endBlk" (that is, at the end of the search range)
+ // and be in the correct EH region. This is must be guaranteed by the caller (as it is by
+ // fgNewBBinRegion(), which passes the search range as an exact EH region block range).
+ // Because of this assumption, we only check the EH information for blocks before the last block.
+ if (blk->bbNext != endBlk)
+ {
+ // We are in the middle of the search range. We can't insert the new block in
+ // an inner try or handler region. We can, however, set the insertion
+ // point to the last block of an EH try/handler region, if the enclosing
+ // region is the region we wish to insert in. (Since multiple regions can
+ // end at the same block, we need to search outwards, checking that the
+ // block is the last block of every EH region out to the region we want
+ // to insert in.) This is especially useful for putting a call-to-finally
+ // block on AMD64 immediately after its corresponding 'try' block, so in the
+ // common case, we'll just fall through to it. For example:
+ //
+ // BB01
+ // BB02 -- first block of try
+ // BB03
+ // BB04 -- last block of try
+ // BB05 -- first block of finally
+ // BB06
+ // BB07 -- last block of handler
+ // BB08
+ //
+ // Assume there is only one try/finally, so BB01 and BB08 are in the "main function".
+ // For AMD64 call-to-finally, we'll want to insert the BBJ_CALLFINALLY in
+ // the main function, immediately after BB04. This allows us to do that.
+
+ if (!fgCheckEHCanInsertAfterBlock(blk, regionIndex, putInTryRegion))
+ {
+ // Can't insert here.
+ continue;
+ }
+ }
+
+ // Look for an insert location:
+ // 1. We want blocks that don't end with a fall through,
+ // 2. Also, when blk equals nearBlk we may want to insert here.
+ if (!blk->bbFallsThrough() || (blk == nearBlk))
+ {
+ bool updateBestBlk = true; // We will probably update the bestBlk
+
+ // If blk falls through then we must decide whether to use the nearBlk
+ // hint
+ if (blk->bbFallsThrough())
+ {
+ noway_assert(blk == nearBlk);
+ if (jumpBlk != nullptr)
+ {
+ updateBestBlk = fgIsBetterFallThrough(blk, jumpBlk);
+ }
+ else
+ {
+ updateBestBlk = false;
+ }
+ }
+
+ // If we already have a best block, see if the 'runRarely' flags influences
+ // our choice. If we want a runRarely insertion point, and the existing best
+ // block is run rarely but the current block isn't run rarely, then don't
+ // update the best block.
+ // TODO-CQ: We should also handle the reverse case, where runRarely is false (we
+ // want a non-rarely-run block), but bestBlock->isRunRarely() is true. In that
+ // case, we should update the block, also. Probably what we want is:
+ // (bestBlk->isRunRarely() != runRarely) && (blk->isRunRarely() == runRarely)
+ if (updateBestBlk && (bestBlk != nullptr) && runRarely && bestBlk->isRunRarely() && !blk->isRunRarely())
+ {
+ updateBestBlk = false;
+ }
+
+ if (updateBestBlk)
+ {
+ // We found a 'best' insertion location, so save it away.
+ bestBlk = blk;
+
+ // If we've reached nearBlk, we've satisfied all the criteria,
+ // so we're done.
+ if (reachedNear)
+ {
+ goto DONE;
+ }
+
+ // If we haven't reached nearBlk, keep looking for a 'best' location, just
+ // in case we'll find one at or after nearBlk. If no nearBlk was specified,
+ // we prefer inserting towards the end of the given range, so keep looking
+ // for more acceptable insertion locations.
+ }
+ }
+
+ // No need to update goodBlk after we have set bestBlk, but we could still find a better
+ // bestBlk, so keep looking.
+ if (bestBlk != nullptr)
+ {
+ continue;
+ }
+
+ // Set the current block as a "good enough" insertion point, if it meets certain criteria.
+ // We'll return this block if we don't find a "best" block in the search range. The block
+ // can't be a BBJ_CALLFINALLY of a BBJ_CALLFINALLY/BBJ_ALWAYS pair (since we don't want
+ // to insert anything between these two blocks). Otherwise, we can use it. However,
+ // if we'd previously chosen a BBJ_COND block, then we'd prefer the "good" block to be
+ // something else. We keep updating it until we've reached the 'nearBlk', to push it as
+ // close to endBlk as possible.
+ if (!blk->isBBCallAlwaysPair())
+ {
+ if (goodBlk == nullptr)
+ {
+ goodBlk = blk;
+ }
+ else if ((goodBlk->bbJumpKind == BBJ_COND) || (blk->bbJumpKind != BBJ_COND))
+ {
+ if ((blk == nearBlk) || !reachedNear)
+ {
+ goodBlk = blk;
+ }
+ }
+ }
+ }
+
+ // If we didn't find a non-fall_through block, then insert at the last good block.
+
+ if (bestBlk == nullptr)
+ {
+ bestBlk = goodBlk;
+ }
+
+DONE:;
+
+ return bestBlk;
+}
+
+//------------------------------------------------------------------------
+// Creates a new BasicBlock and inserts it in a specific EH region, given by 'tryIndex', 'hndIndex', and 'putInFilter'.
+//
+// If 'putInFilter' it true, then the block is inserted in the filter region given by 'hndIndex'. In this case, tryIndex
+// must be a less nested EH region (that is, tryIndex > hndIndex).
+//
+// Otherwise, the block is inserted in either the try region or the handler region, depending on which one is the inner
+// region. In other words, if the try region indicated by tryIndex is nested in the handler region indicated by
+// hndIndex,
+// then the new BB will be created in the try region. Vice versa.
+//
+// Note that tryIndex and hndIndex are numbered the same as BasicBlock::bbTryIndex and BasicBlock::bbHndIndex, that is,
+// "0" is "main method" and otherwise is +1 from normal, so we can call, e.g., ehGetDsc(tryIndex - 1).
+//
+// To be more specific, this function will create a new BB in one of the following 5 regions (if putInFilter is false):
+// 1. When tryIndex = 0 and hndIndex = 0:
+// The new BB will be created in the method region.
+// 2. When tryIndex != 0 and hndIndex = 0:
+// The new BB will be created in the try region indicated by tryIndex.
+// 3. When tryIndex == 0 and hndIndex != 0:
+// The new BB will be created in the handler region indicated by hndIndex.
+// 4. When tryIndex != 0 and hndIndex != 0 and tryIndex < hndIndex:
+// In this case, the try region is nested inside the handler region. Therefore, the new BB will be created
+// in the try region indicated by tryIndex.
+// 5. When tryIndex != 0 and hndIndex != 0 and tryIndex > hndIndex:
+// In this case, the handler region is nested inside the try region. Therefore, the new BB will be created
+// in the handler region indicated by hndIndex.
+//
+// Note that if tryIndex != 0 and hndIndex != 0 then tryIndex must not be equal to hndIndex (this makes sense because
+// if they are equal, you are asking to put the new block in both the try and handler, which is impossible).
+//
+// The BasicBlock will not be inserted inside an EH region that is more nested than the requested tryIndex/hndIndex
+// region (so the function is careful to skip more nested EH regions when searching for a place to put the new block).
+//
+// This function cannot be used to insert a block as the first block of any region. It always inserts a block after
+// an existing block in the given region.
+//
+// If nearBlk is nullptr, or the block is run rarely, then the new block is assumed to be run rarely.
+//
+// Arguments:
+// jumpKind - the jump kind of the new block to create.
+// tryIndex - the try region to insert the new block in, described above. This must be a number in the range
+// [0..compHndBBtabCount].
+// hndIndex - the handler region to insert the new block in, described above. This must be a number in the range
+// [0..compHndBBtabCount].
+// nearBlk - insert the new block closely after this block, if possible. If nullptr, put the new block anywhere
+// in the requested region.
+// putInFilter - put the new block in the filter region given by hndIndex, as described above.
+// runRarely - 'true' if the new block is run rarely.
+// insertAtEnd - 'true' if the block should be inserted at the end of the region. Note: this is currently only
+// implemented when inserting into the main function (not into any EH region).
+//
+// Return Value:
+// The new block.
+
+BasicBlock* Compiler::fgNewBBinRegion(BBjumpKinds jumpKind,
+ unsigned tryIndex,
+ unsigned hndIndex,
+ BasicBlock* nearBlk,
+ bool putInFilter /* = false */,
+ bool runRarely /* = false */,
+ bool insertAtEnd /* = false */)
+{
+ assert(tryIndex <= compHndBBtabCount);
+ assert(hndIndex <= compHndBBtabCount);
+
+ /* afterBlk is the block which will precede the newBB */
+ BasicBlock* afterBlk;
+
+ // start and end limit for inserting the block
+ BasicBlock* startBlk = nullptr;
+ BasicBlock* endBlk = nullptr;
+
+ bool putInTryRegion = true;
+ unsigned regionIndex = 0;
+
+ // First, figure out which region (the "try" region or the "handler" region) to put the newBB in.
+ if ((tryIndex == 0) && (hndIndex == 0))
+ {
+ assert(!putInFilter);
+
+ endBlk = fgEndBBAfterMainFunction(); // don't put new BB in funclet region
+
+ if (insertAtEnd || (nearBlk == nullptr))
+ {
+ /* We'll just insert the block at the end of the method, before the funclets */
+
+ afterBlk = fgLastBBInMainFunction();
+ goto _FoundAfterBlk;
+ }
+ else
+ {
+ // We'll search through the entire method
+ startBlk = fgFirstBB;
+ }
+
+ noway_assert(regionIndex == 0);
+ }
+ else
+ {
+ noway_assert(tryIndex > 0 || hndIndex > 0);
+ PREFIX_ASSUME(tryIndex <= compHndBBtabCount);
+ PREFIX_ASSUME(hndIndex <= compHndBBtabCount);
+
+ // Decide which region to put in, the "try" region or the "handler" region.
+ if (tryIndex == 0)
+ {
+ noway_assert(hndIndex > 0);
+ putInTryRegion = false;
+ }
+ else if (hndIndex == 0)
+ {
+ noway_assert(tryIndex > 0);
+ noway_assert(putInTryRegion);
+ assert(!putInFilter);
+ }
+ else
+ {
+ noway_assert(tryIndex > 0 && hndIndex > 0 && tryIndex != hndIndex);
+ putInTryRegion = (tryIndex < hndIndex);
+ }
+
+ if (putInTryRegion)
+ {
+ // Try region is the inner region.
+ // In other words, try region must be nested inside the handler region.
+ noway_assert(hndIndex == 0 || bbInHandlerRegions(hndIndex - 1, ehGetDsc(tryIndex - 1)->ebdTryBeg));
+ assert(!putInFilter);
+ }
+ else
+ {
+ // Handler region is the inner region.
+ // In other words, handler region must be nested inside the try region.
+ noway_assert(tryIndex == 0 || bbInTryRegions(tryIndex - 1, ehGetDsc(hndIndex - 1)->ebdHndBeg));
+ }
+
+ // Figure out the start and end block range to search for an insertion location. Pick the beginning and
+ // ending blocks of the target EH region (the 'endBlk' is one past the last block of the EH region, to make
+ // loop iteration easier). Note that, after funclets have been created (for FEATURE_EH_FUNCLETS),
+ // this linear block range will not include blocks of handlers for try/handler clauses nested within
+ // this EH region, as those blocks have been extracted as funclets. That is ok, though, because we don't
+ // want to insert a block in any nested EH region.
+
+ if (putInTryRegion)
+ {
+ // We will put the newBB in the try region.
+ EHblkDsc* ehDsc = ehGetDsc(tryIndex - 1);
+ startBlk = ehDsc->ebdTryBeg;
+ endBlk = ehDsc->ebdTryLast->bbNext;
+ regionIndex = tryIndex;
+ }
+ else if (putInFilter)
+ {
+ // We will put the newBB in the filter region.
+ EHblkDsc* ehDsc = ehGetDsc(hndIndex - 1);
+ startBlk = ehDsc->ebdFilter;
+ endBlk = ehDsc->ebdHndBeg;
+ regionIndex = hndIndex;
+ }
+ else
+ {
+ // We will put the newBB in the handler region.
+ EHblkDsc* ehDsc = ehGetDsc(hndIndex - 1);
+ startBlk = ehDsc->ebdHndBeg;
+ endBlk = ehDsc->ebdHndLast->bbNext;
+ regionIndex = hndIndex;
+ }
+
+ noway_assert(regionIndex > 0);
+ }
+
+ // Now find the insertion point.
+ afterBlk = fgFindInsertPoint(regionIndex, putInTryRegion, startBlk, endBlk, nearBlk, nullptr, runRarely);
+
+_FoundAfterBlk:;
+
+ /* We have decided to insert the block after 'afterBlk'. */
+ noway_assert(afterBlk != nullptr);
+
+ JITDUMP("fgNewBBinRegion(jumpKind=%u, tryIndex=%u, hndIndex=%u, putInFilter=%s, runRarely=%s, insertAtEnd=%s): "
+ "inserting after BB%02u\n",
+ jumpKind, tryIndex, hndIndex, dspBool(putInFilter), dspBool(runRarely), dspBool(insertAtEnd),
+ afterBlk->bbNum);
+
+ return fgNewBBinRegionWorker(jumpKind, afterBlk, regionIndex, putInTryRegion);
+}
+
+//------------------------------------------------------------------------
+// Creates a new BasicBlock and inserts it in the same EH region as 'srcBlk'.
+//
+// See the implementation of fgNewBBinRegion() used by this one for more notes.
+//
+// Arguments:
+// jumpKind - the jump kind of the new block to create.
+// srcBlk - insert the new block in the same EH region as this block, and closely after it if possible.
+//
+// Return Value:
+// The new block.
+
+BasicBlock* Compiler::fgNewBBinRegion(BBjumpKinds jumpKind,
+ BasicBlock* srcBlk,
+ bool runRarely /* = false */,
+ bool insertAtEnd /* = false */)
+{
+ assert(srcBlk != nullptr);
+
+ const unsigned tryIndex = srcBlk->bbTryIndex;
+ const unsigned hndIndex = srcBlk->bbHndIndex;
+ bool putInFilter = false;
+
+ // Check to see if we need to put the new block in a filter. We do if srcBlk is in a filter.
+ // This can only be true if there is a handler index, and the handler region is more nested than the
+ // try region (if any). This is because no EH regions can be nested within a filter.
+ if (BasicBlock::ehIndexMaybeMoreNested(hndIndex, tryIndex))
+ {
+ assert(hndIndex != 0); // If hndIndex is more nested, we must be in some handler!
+ putInFilter = ehGetDsc(hndIndex - 1)->InFilterRegionBBRange(srcBlk);
+ }
+
+ return fgNewBBinRegion(jumpKind, tryIndex, hndIndex, srcBlk, putInFilter, runRarely, insertAtEnd);
+}
+
+//------------------------------------------------------------------------
+// Creates a new BasicBlock and inserts it at the end of the function.
+//
+// See the implementation of fgNewBBinRegion() used by this one for more notes.
+//
+// Arguments:
+// jumpKind - the jump kind of the new block to create.
+//
+// Return Value:
+// The new block.
+
+BasicBlock* Compiler::fgNewBBinRegion(BBjumpKinds jumpKind)
+{
+ return fgNewBBinRegion(jumpKind, 0, 0, nullptr, /* putInFilter */ false, /* runRarely */ false,
+ /* insertAtEnd */ true);
+}
+
+//------------------------------------------------------------------------
+// Creates a new BasicBlock, and inserts it after 'afterBlk'.
+//
+// The block cannot be inserted into a more nested try/handler region than that specified by 'regionIndex'.
+// (It is given exactly 'regionIndex'.) Thus, the parameters must be passed to ensure proper EH nesting
+// rules are followed.
+//
+// Arguments:
+// jumpKind - the jump kind of the new block to create.
+// afterBlk - insert the new block after this one.
+// regionIndex - the block will be put in this EH region.
+// putInTryRegion - If true, put the new block in the 'try' region corresponding to 'regionIndex', and
+// set its handler index to the most nested handler region enclosing that 'try' region.
+// Otherwise, put the block in the handler region specified by 'regionIndex', and set its 'try'
+// index to the most nested 'try' region enclosing that handler region.
+//
+// Return Value:
+// The new block.
+
+BasicBlock* Compiler::fgNewBBinRegionWorker(BBjumpKinds jumpKind,
+ BasicBlock* afterBlk,
+ unsigned regionIndex,
+ bool putInTryRegion)
+{
+ /* Insert the new block */
+ BasicBlock* afterBlkNext = afterBlk->bbNext;
+ (void)afterBlkNext; // prevent "unused variable" error from GCC
+ BasicBlock* newBlk = fgNewBBafter(jumpKind, afterBlk, false);
+
+ if (putInTryRegion)
+ {
+ noway_assert(regionIndex <= MAX_XCPTN_INDEX);
+ newBlk->bbTryIndex = (unsigned short)regionIndex;
+ newBlk->bbHndIndex = bbFindInnermostHandlerRegionContainingTryRegion(regionIndex);
+ }
+ else
+ {
+ newBlk->bbTryIndex = bbFindInnermostTryRegionContainingHandlerRegion(regionIndex);
+ noway_assert(regionIndex <= MAX_XCPTN_INDEX);
+ newBlk->bbHndIndex = (unsigned short)regionIndex;
+ }
+
+ // We're going to compare for equal try regions (to handle the case of 'mutually protect'
+ // regions). We need to save off the current try region, otherwise we might change it
+ // before it gets compared later, thereby making future comparisons fail.
+
+ BasicBlock* newTryBeg;
+ BasicBlock* newTryLast;
+ (void)ehInitTryBlockRange(newBlk, &newTryBeg, &newTryLast);
+
+ unsigned XTnum;
+ EHblkDsc* HBtab;
+
+ for (XTnum = 0, HBtab = compHndBBtab; XTnum < compHndBBtabCount; XTnum++, HBtab++)
+ {
+ // Is afterBlk at the end of a try region?
+ if (HBtab->ebdTryLast == afterBlk)
+ {
+ noway_assert(afterBlkNext == newBlk->bbNext);
+
+ bool extendTryRegion = false;
+ if (newBlk->hasTryIndex())
+ {
+ // We're adding a block after the last block of some try region. Do
+ // we extend the try region to include the block, or not?
+ // If the try region is exactly the same as the try region
+ // associated with the new block (based on the block's try index,
+ // which represents the innermost try the block is a part of), then
+ // we extend it.
+ // If the try region is a "parent" try region -- an enclosing try region
+ // that has the same last block as the new block's try region -- then
+ // we also extend. For example:
+ // try { // 1
+ // ...
+ // try { // 2
+ // ...
+ // } /* 2 */ } /* 1 */
+ // This example is meant to indicate that both try regions 1 and 2 end at
+ // the same block, and we're extending 2. Thus, we must also extend 1. If we
+ // only extended 2, we would break proper nesting. (Dev11 bug 137967)
+
+ extendTryRegion = HBtab->ebdIsSameTry(newTryBeg, newTryLast) || bbInTryRegions(XTnum, newBlk);
+ }
+
+ // Does newBlk extend this try region?
+ if (extendTryRegion)
+ {
+ // Yes, newBlk extends this try region
+
+ // newBlk is the now the new try last block
+ fgSetTryEnd(HBtab, newBlk);
+ }
+ }
+
+ // Is afterBlk at the end of a handler region?
+ if (HBtab->ebdHndLast == afterBlk)
+ {
+ noway_assert(afterBlkNext == newBlk->bbNext);
+
+ // Does newBlk extend this handler region?
+ bool extendHndRegion = false;
+ if (newBlk->hasHndIndex())
+ {
+ // We're adding a block after the last block of some handler region. Do
+ // we extend the handler region to include the block, or not?
+ // If the handler region is exactly the same as the handler region
+ // associated with the new block (based on the block's handler index,
+ // which represents the innermost handler the block is a part of), then
+ // we extend it.
+ // If the handler region is a "parent" handler region -- an enclosing
+ // handler region that has the same last block as the new block's handler
+ // region -- then we also extend. For example:
+ // catch { // 1
+ // ...
+ // catch { // 2
+ // ...
+ // } /* 2 */ } /* 1 */
+ // This example is meant to indicate that both handler regions 1 and 2 end at
+ // the same block, and we're extending 2. Thus, we must also extend 1. If we
+ // only extended 2, we would break proper nesting. (Dev11 bug 372051)
+
+ extendHndRegion = bbInHandlerRegions(XTnum, newBlk);
+ }
+
+ if (extendHndRegion)
+ {
+ // Yes, newBlk extends this handler region
+
+ // newBlk is now the last block of the handler.
+ fgSetHndEnd(HBtab, newBlk);
+ }
+ }
+ }
+
+ /* If afterBlk falls through, we insert a jump around newBlk */
+ fgConnectFallThrough(afterBlk, newBlk->bbNext);
+
+#ifdef DEBUG
+ fgVerifyHandlerTab();
+#endif
+
+ return newBlk;
+}
+
+/*****************************************************************************
+ */
+
+/* static */
+unsigned Compiler::acdHelper(SpecialCodeKind codeKind)
+{
+ switch (codeKind)
+ {
+ case SCK_RNGCHK_FAIL:
+ return CORINFO_HELP_RNGCHKFAIL;
+#if COR_JIT_EE_VERSION > 460
+ case SCK_ARG_EXCPN:
+ return CORINFO_HELP_THROW_ARGUMENTEXCEPTION;
+ case SCK_ARG_RNG_EXCPN:
+ return CORINFO_HELP_THROW_ARGUMENTOUTOFRANGEEXCEPTION;
+#endif // COR_JIT_EE_VERSION
+ case SCK_DIV_BY_ZERO:
+ return CORINFO_HELP_THROWDIVZERO;
+ case SCK_ARITH_EXCPN:
+ return CORINFO_HELP_OVERFLOW;
+ default:
+ assert(!"Bad codeKind");
+ return 0;
+ }
+}
+
+/*****************************************************************************
+ *
+ * Find/create an added code entry associated with the given block and with
+ * the given kind.
+ */
+
+BasicBlock* Compiler::fgAddCodeRef(BasicBlock* srcBlk, unsigned refData, SpecialCodeKind kind, unsigned stkDepth)
+{
+ // Record that the code will call a THROW_HELPER
+ // so on Windows Amd64 we can allocate the 4 outgoing
+ // arg slots on the stack frame if there are no other calls.
+ compUsesThrowHelper = true;
+
+ // For debuggable code, genJumpToThrowHlpBlk() will generate the 'throw'
+ // code inline. It has to be kept consistent with fgAddCodeRef()
+ if (opts.compDbgCode)
+ {
+ return nullptr;
+ }
+
+ const static BBjumpKinds jumpKinds[] = {
+ BBJ_NONE, // SCK_NONE
+ BBJ_THROW, // SCK_RNGCHK_FAIL
+ BBJ_ALWAYS, // SCK_PAUSE_EXEC
+ BBJ_THROW, // SCK_DIV_BY_ZERO
+ BBJ_THROW, // SCK_ARITH_EXCP, SCK_OVERFLOW
+ BBJ_THROW, // SCK_ARG_EXCPN
+ BBJ_THROW, // SCK_ARG_RNG_EXCPN
+ };
+
+ noway_assert(sizeof(jumpKinds) == SCK_COUNT); // sanity check
+
+ /* First look for an existing entry that matches what we're looking for */
+
+ AddCodeDsc* add = fgFindExcptnTarget(kind, refData);
+
+ if (add) // found it
+ {
+#ifdef _TARGET_X86_
+ // If different range checks happen at different stack levels,
+ // they can't all jump to the same "call @rngChkFailed" AND have
+ // frameless methods, as the rngChkFailed may need to unwind the
+ // stack, and we have to be able to report the stack level.
+ //
+ // The following check forces most methods that reference an
+ // array element in a parameter list to have an EBP frame,
+ // this restriction could be removed with more careful code
+ // generation for BBJ_THROW (i.e. range check failed).
+ //
+ if (add->acdStkLvl != stkDepth)
+ {
+ codeGen->setFrameRequired(true);
+ }
+#endif // _TARGET_X86_
+
+ return add->acdDstBlk;
+ }
+
+ /* We have to allocate a new entry and prepend it to the list */
+
+ add = new (this, CMK_Unknown) AddCodeDsc;
+ add->acdData = refData;
+ add->acdKind = kind;
+ add->acdStkLvl = (unsigned short)stkDepth;
+ noway_assert(add->acdStkLvl == stkDepth);
+ add->acdNext = fgAddCodeList;
+ fgAddCodeList = add;
+
+ /* Create the target basic block */
+
+ BasicBlock* newBlk;
+
+ newBlk = add->acdDstBlk = fgNewBBinRegion(jumpKinds[kind], srcBlk, /* runRarely */ true, /* insertAtEnd */ true);
+
+ add->acdDstBlk->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL;
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ const char* msgWhere = "";
+ if (!srcBlk->hasTryIndex() && !srcBlk->hasHndIndex())
+ {
+ msgWhere = "non-EH region";
+ }
+ else if (!srcBlk->hasTryIndex())
+ {
+ msgWhere = "handler";
+ }
+ else if (!srcBlk->hasHndIndex())
+ {
+ msgWhere = "try";
+ }
+ else if (srcBlk->getTryIndex() < srcBlk->getHndIndex())
+ {
+ msgWhere = "try";
+ }
+ else
+ {
+ msgWhere = "handler";
+ }
+
+ const char* msg;
+ switch (kind)
+ {
+ case SCK_RNGCHK_FAIL:
+ msg = " for RNGCHK_FAIL";
+ break;
+ case SCK_PAUSE_EXEC:
+ msg = " for PAUSE_EXEC";
+ break;
+ case SCK_DIV_BY_ZERO:
+ msg = " for DIV_BY_ZERO";
+ break;
+ case SCK_OVERFLOW:
+ msg = " for OVERFLOW";
+ break;
+#if COR_JIT_EE_VERSION > 460
+ case SCK_ARG_EXCPN:
+ msg = " for ARG_EXCPN";
+ break;
+ case SCK_ARG_RNG_EXCPN:
+ msg = " for ARG_RNG_EXCPN";
+ break;
+#endif // COR_JIT_EE_VERSION
+ default:
+ msg = " for ??";
+ break;
+ }
+
+ printf("\nfgAddCodeRef -"
+ " Add BB in %s%s, new block BB%02u [%08p], stkDepth is %d\n",
+ msgWhere, msg, add->acdDstBlk->bbNum, dspPtr(add->acdDstBlk), stkDepth);
+ }
+#endif // DEBUG
+
+#ifdef DEBUG
+ newBlk->bbTgtStkDepth = stkDepth;
+#endif // DEBUG
+
+ /* Mark the block as added by the compiler and not removable by future flow
+ graph optimizations. Note that no bbJumpDest points to these blocks. */
+
+ newBlk->bbFlags |= BBF_IMPORTED;
+ newBlk->bbFlags |= BBF_DONT_REMOVE;
+
+ /* Remember that we're adding a new basic block */
+
+ fgAddCodeModf = true;
+ fgRngChkThrowAdded = true;
+
+ /* Now figure out what code to insert */
+
+ GenTreeCall* tree;
+ int helper = CORINFO_HELP_UNDEF;
+
+ switch (kind)
+ {
+ case SCK_RNGCHK_FAIL:
+ helper = CORINFO_HELP_RNGCHKFAIL;
+ break;
+
+ case SCK_DIV_BY_ZERO:
+ helper = CORINFO_HELP_THROWDIVZERO;
+ break;
+
+ case SCK_ARITH_EXCPN:
+ helper = CORINFO_HELP_OVERFLOW;
+ noway_assert(SCK_OVERFLOW == SCK_ARITH_EXCPN);
+ break;
+
+#if COR_JIT_EE_VERSION > 460
+ case SCK_ARG_EXCPN:
+ helper = CORINFO_HELP_THROW_ARGUMENTEXCEPTION;
+ break;
+
+ case SCK_ARG_RNG_EXCPN:
+ helper = CORINFO_HELP_THROW_ARGUMENTOUTOFRANGEEXCEPTION;
+ break;
+#endif // COR_JIT_EE_VERSION
+
+ // case SCK_PAUSE_EXEC:
+ // noway_assert(!"add code to pause exec");
+
+ default:
+ noway_assert(!"unexpected code addition kind");
+ return nullptr;
+ }
+
+ noway_assert(helper != CORINFO_HELP_UNDEF);
+
+ // Add the appropriate helper call.
+ tree = gtNewHelperCallNode(helper, TYP_VOID, GTF_EXCEPT);
+
+ // There are no args here but fgMorphArgs has side effects
+ // such as setting the outgoing arg area (which is necessary
+ // on AMD if there are any calls).
+ tree = fgMorphArgs(tree);
+
+ // Store the tree in the new basic block.
+ assert(!srcBlk->isEmpty());
+ if (!srcBlk->IsLIR())
+ {
+ fgInsertStmtAtEnd(newBlk, fgNewStmtFromTree(tree));
+ }
+ else
+ {
+ LIR::AsRange(newBlk).InsertAtEnd(LIR::SeqTree(this, tree));
+ }
+
+ return add->acdDstBlk;
+}
+
+/*****************************************************************************
+ * Finds the block to jump to, to throw a given kind of exception
+ * We maintain a cache of one AddCodeDsc for each kind, to make searching fast.
+ * Note : Each block uses the same (maybe shared) block as the jump target for
+ * a given type of exception
+ */
+
+Compiler::AddCodeDsc* Compiler::fgFindExcptnTarget(SpecialCodeKind kind, unsigned refData)
+{
+ if (!(fgExcptnTargetCache[kind] && // Try the cached value first
+ fgExcptnTargetCache[kind]->acdData == refData))
+ {
+ // Too bad, have to search for the jump target for the exception
+
+ AddCodeDsc* add = nullptr;
+
+ for (add = fgAddCodeList; add != nullptr; add = add->acdNext)
+ {
+ if (add->acdData == refData && add->acdKind == kind)
+ {
+ break;
+ }
+ }
+
+ fgExcptnTargetCache[kind] = add; // Cache it
+ }
+
+ return fgExcptnTargetCache[kind];
+}
+
+/*****************************************************************************
+ *
+ * The given basic block contains an array range check; return the label this
+ * range check is to jump to upon failure.
+ */
+
+BasicBlock* Compiler::fgRngChkTarget(BasicBlock* block, unsigned stkDepth, SpecialCodeKind kind)
+{
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("*** Computing fgRngChkTarget for block BB%02u to stkDepth %d\n", block->bbNum, stkDepth);
+ if (!block->IsLIR())
+ {
+ gtDispTree(compCurStmt);
+ }
+ }
+#endif // DEBUG
+
+ /* We attach the target label to the containing try block (if any) */
+ noway_assert(!compIsForInlining());
+ return fgAddCodeRef(block, bbThrowIndex(block), kind, stkDepth);
+}
+
+// Sequences the tree.
+// prevTree is what gtPrev of the first node in execution order gets set to.
+// Returns the first node (execution order) in the sequenced tree.
+GenTree* Compiler::fgSetTreeSeq(GenTree* tree, GenTree* prevTree, bool isLIR)
+{
+ GenTree list;
+
+ if (prevTree == nullptr)
+ {
+ prevTree = &list;
+ }
+ fgTreeSeqLst = prevTree;
+ fgTreeSeqNum = 0;
+ fgTreeSeqBeg = nullptr;
+ fgSetTreeSeqHelper(tree, isLIR);
+
+ GenTree* result = prevTree->gtNext;
+ if (prevTree == &list)
+ {
+ list.gtNext->gtPrev = nullptr;
+ }
+
+ return result;
+}
+
+/*****************************************************************************
+ *
+ * Assigns sequence numbers to the given tree and its sub-operands, and
+ * threads all the nodes together via the 'gtNext' and 'gtPrev' fields.
+ * Uses 'global' - fgTreeSeqLst
+ */
+
+void Compiler::fgSetTreeSeqHelper(GenTreePtr tree, bool isLIR)
+{
+ genTreeOps oper;
+ unsigned kind;
+
+ noway_assert(tree);
+ assert(!IsUninitialized(tree));
+ noway_assert(tree->gtOper != GT_STMT);
+
+ /* Figure out what kind of a node we have */
+
+ oper = tree->OperGet();
+ kind = tree->OperKind();
+
+ /* Is this a leaf/constant node? */
+
+ if (kind & (GTK_CONST | GTK_LEAF))
+ {
+ fgSetTreeSeqFinish(tree, isLIR);
+ return;
+ }
+
+ // Special handling for dynamic block ops.
+ if (tree->OperIsDynBlkOp())
+ {
+ GenTreeDynBlk* dynBlk;
+ GenTree* src;
+ GenTree* asg = tree;
+ if (tree->OperGet() == GT_ASG)
+ {
+ dynBlk = tree->gtGetOp1()->AsDynBlk();
+ src = tree->gtGetOp2();
+ }
+ else
+ {
+ dynBlk = tree->AsDynBlk();
+ src = dynBlk->Data();
+ asg = nullptr;
+ }
+ GenTree* sizeNode = dynBlk->gtDynamicSize;
+ GenTree* dstAddr = dynBlk->Addr();
+ if (dynBlk->gtEvalSizeFirst)
+ {
+ fgSetTreeSeqHelper(sizeNode, isLIR);
+ }
+ if (tree->gtFlags & GTF_REVERSE_OPS)
+ {
+ fgSetTreeSeqHelper(src, isLIR);
+ fgSetTreeSeqHelper(dstAddr, isLIR);
+ }
+ else
+ {
+ fgSetTreeSeqHelper(dstAddr, isLIR);
+ fgSetTreeSeqHelper(src, isLIR);
+ }
+ if (!dynBlk->gtEvalSizeFirst)
+ {
+ fgSetTreeSeqHelper(sizeNode, isLIR);
+ }
+ fgSetTreeSeqFinish(dynBlk, isLIR);
+ if (asg != nullptr)
+ {
+ fgSetTreeSeqFinish(asg, isLIR);
+ }
+ return;
+ }
+
+ /* Is it a 'simple' unary/binary operator? */
+
+ if (kind & GTK_SMPOP)
+ {
+ GenTreePtr op1 = tree->gtOp.gtOp1;
+ GenTreePtr op2 = tree->gtGetOp2();
+
+ // Special handling for GT_LIST
+ if (tree->OperGet() == GT_LIST)
+ {
+ // First, handle the list items, which will be linked in forward order.
+ // As we go, we will link the GT_LIST nodes in reverse order - we will number
+ // them and update fgTreeSeqList in a subsequent traversal.
+ GenTreePtr nextList = tree;
+ GenTreePtr list = nullptr;
+ while (nextList != nullptr && nextList->OperGet() == GT_LIST)
+ {
+ list = nextList;
+ GenTreePtr listItem = list->gtOp.gtOp1;
+ fgSetTreeSeqHelper(listItem, isLIR);
+ nextList = list->gtOp.gtOp2;
+ if (nextList != nullptr)
+ {
+ nextList->gtNext = list;
+ }
+ list->gtPrev = nextList;
+ }
+ // Next, handle the GT_LIST nodes.
+ // Note that fgSetTreeSeqFinish() sets the gtNext to null, so we need to capture the nextList
+ // before we call that method.
+ nextList = list;
+ do
+ {
+ assert(list != nullptr);
+ list = nextList;
+ nextList = list->gtNext;
+ fgSetTreeSeqFinish(list, isLIR);
+ } while (list != tree);
+ return;
+ }
+
+ /* Special handling for AddrMode */
+ if (tree->OperIsAddrMode())
+ {
+ bool reverse = ((tree->gtFlags & GTF_REVERSE_OPS) != 0);
+ if (reverse)
+ {
+ assert(op1 != nullptr && op2 != nullptr);
+ fgSetTreeSeqHelper(op2, isLIR);
+ }
+ if (op1 != nullptr)
+ {
+ fgSetTreeSeqHelper(op1, isLIR);
+ }
+ if (!reverse && op2 != nullptr)
+ {
+ fgSetTreeSeqHelper(op2, isLIR);
+ }
+
+ fgSetTreeSeqFinish(tree, isLIR);
+ return;
+ }
+
+ /* Check for a nilary operator */
+
+ if (op1 == nullptr)
+ {
+ noway_assert(op2 == nullptr);
+ fgSetTreeSeqFinish(tree, isLIR);
+ return;
+ }
+
+ /* Is this a unary operator?
+ * Although UNARY GT_IND has a special structure */
+
+ if (oper == GT_IND)
+ {
+ /* Visit the indirection first - op2 may point to the
+ * jump Label for array-index-out-of-range */
+
+ fgSetTreeSeqHelper(op1, isLIR);
+ fgSetTreeSeqFinish(tree, isLIR);
+ return;
+ }
+
+ /* Now this is REALLY a unary operator */
+
+ if (!op2)
+ {
+ /* Visit the (only) operand and we're done */
+
+ fgSetTreeSeqHelper(op1, isLIR);
+ fgSetTreeSeqFinish(tree, isLIR);
+ return;
+ }
+
+ /*
+ For "real" ?: operators, we make sure the order is
+ as follows:
+
+ condition
+ 1st operand
+ GT_COLON
+ 2nd operand
+ GT_QMARK
+ */
+
+ if (oper == GT_QMARK)
+ {
+ noway_assert((tree->gtFlags & GTF_REVERSE_OPS) == 0);
+
+ fgSetTreeSeqHelper(op1, isLIR);
+ // Here, for the colon, the sequence does not actually represent "order of evaluation":
+ // one or the other of the branches is executed, not both. Still, to make debugging checks
+ // work, we want the sequence to match the order in which we'll generate code, which means
+ // "else" clause then "then" clause.
+ fgSetTreeSeqHelper(op2->AsColon()->ElseNode(), isLIR);
+ fgSetTreeSeqHelper(op2, isLIR);
+ fgSetTreeSeqHelper(op2->AsColon()->ThenNode(), isLIR);
+
+ fgSetTreeSeqFinish(tree, isLIR);
+ return;
+ }
+
+ if (oper == GT_COLON)
+ {
+ fgSetTreeSeqFinish(tree, isLIR);
+ return;
+ }
+
+ /* This is a binary operator */
+
+ if (tree->gtFlags & GTF_REVERSE_OPS)
+ {
+ fgSetTreeSeqHelper(op2, isLIR);
+ fgSetTreeSeqHelper(op1, isLIR);
+ }
+ else
+ {
+ fgSetTreeSeqHelper(op1, isLIR);
+ fgSetTreeSeqHelper(op2, isLIR);
+ }
+
+ fgSetTreeSeqFinish(tree, isLIR);
+ return;
+ }
+
+ /* See what kind of a special operator we have here */
+
+ switch (oper)
+ {
+ case GT_FIELD:
+ noway_assert(tree->gtField.gtFldObj == nullptr);
+ break;
+
+ case GT_CALL:
+
+ /* We'll evaluate the 'this' argument value first */
+ if (tree->gtCall.gtCallObjp)
+ {
+ fgSetTreeSeqHelper(tree->gtCall.gtCallObjp, isLIR);
+ }
+
+ /* We'll evaluate the arguments next, left to right
+ * NOTE: setListOrder needs cleanup - eliminate the #ifdef afterwards */
+
+ if (tree->gtCall.gtCallArgs)
+ {
+ fgSetTreeSeqHelper(tree->gtCall.gtCallArgs, isLIR);
+ }
+
+ /* Evaluate the temp register arguments list
+ * This is a "hidden" list and its only purpose is to
+ * extend the life of temps until we make the call */
+
+ if (tree->gtCall.gtCallLateArgs)
+ {
+ fgSetTreeSeqHelper(tree->gtCall.gtCallLateArgs, isLIR);
+ }
+
+ if ((tree->gtCall.gtCallType == CT_INDIRECT) && (tree->gtCall.gtCallCookie != nullptr))
+ {
+ fgSetTreeSeqHelper(tree->gtCall.gtCallCookie, isLIR);
+ }
+
+ if (tree->gtCall.gtCallType == CT_INDIRECT)
+ {
+ fgSetTreeSeqHelper(tree->gtCall.gtCallAddr, isLIR);
+ }
+
+ if (tree->gtCall.gtControlExpr)
+ {
+ fgSetTreeSeqHelper(tree->gtCall.gtControlExpr, isLIR);
+ }
+
+ break;
+
+ case GT_ARR_ELEM:
+
+ fgSetTreeSeqHelper(tree->gtArrElem.gtArrObj, isLIR);
+
+ unsigned dim;
+ for (dim = 0; dim < tree->gtArrElem.gtArrRank; dim++)
+ {
+ fgSetTreeSeqHelper(tree->gtArrElem.gtArrInds[dim], isLIR);
+ }
+
+ break;
+
+ case GT_ARR_OFFSET:
+ fgSetTreeSeqHelper(tree->gtArrOffs.gtOffset, isLIR);
+ fgSetTreeSeqHelper(tree->gtArrOffs.gtIndex, isLIR);
+ fgSetTreeSeqHelper(tree->gtArrOffs.gtArrObj, isLIR);
+ break;
+
+ case GT_CMPXCHG:
+ // Evaluate the trees left to right
+ fgSetTreeSeqHelper(tree->gtCmpXchg.gtOpLocation, isLIR);
+ fgSetTreeSeqHelper(tree->gtCmpXchg.gtOpValue, isLIR);
+ fgSetTreeSeqHelper(tree->gtCmpXchg.gtOpComparand, isLIR);
+ break;
+
+ case GT_ARR_BOUNDS_CHECK:
+#ifdef FEATURE_SIMD
+ case GT_SIMD_CHK:
+#endif // FEATURE_SIMD
+ // Evaluate the trees left to right
+ fgSetTreeSeqHelper(tree->gtBoundsChk.gtArrLen, isLIR);
+ fgSetTreeSeqHelper(tree->gtBoundsChk.gtIndex, isLIR);
+ break;
+
+ case GT_STORE_DYN_BLK:
+ case GT_DYN_BLK:
+ noway_assert(!"DYN_BLK nodes should be sequenced as a special case");
+ break;
+
+ default:
+#ifdef DEBUG
+ gtDispTree(tree);
+ noway_assert(!"unexpected operator");
+#endif // DEBUG
+ break;
+ }
+
+ fgSetTreeSeqFinish(tree, isLIR);
+}
+
+void Compiler::fgSetTreeSeqFinish(GenTreePtr tree, bool isLIR)
+{
+ // If we are sequencing a node that does not appear in LIR,
+ // do not add it to the list.
+ if (isLIR && (((tree->OperGet() == GT_LIST) && !tree->AsArgList()->IsAggregate()) || tree->OperGet() == GT_ARGPLACE))
+ {
+ return;
+ }
+
+ /* Append to the node list */
+ ++fgTreeSeqNum;
+
+#ifdef DEBUG
+ tree->gtSeqNum = fgTreeSeqNum;
+
+ if (verbose & 0)
+ {
+ printf("SetTreeOrder: ");
+ printTreeID(fgTreeSeqLst);
+ printf(" followed by ");
+ printTreeID(tree);
+ printf("\n");
+ }
+#endif // DEBUG
+
+ fgTreeSeqLst->gtNext = tree;
+ tree->gtNext = nullptr;
+ tree->gtPrev = fgTreeSeqLst;
+ fgTreeSeqLst = tree;
+
+ /* Remember the very first node */
+
+ if (!fgTreeSeqBeg)
+ {
+ fgTreeSeqBeg = tree;
+ assert(tree->gtSeqNum == 1);
+ }
+}
+
+/*****************************************************************************
+ *
+ * Figure out the order in which operators should be evaluated, along with
+ * other information (such as the register sets trashed by each subtree).
+ * Also finds blocks that need GC polls and inserts them as needed.
+ */
+
+void Compiler::fgSetBlockOrder()
+{
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("*************** In fgSetBlockOrder()\n");
+ }
+#endif // DEBUG
+
+#ifdef DEBUG
+ BasicBlock::s_nMaxTrees = 0;
+#endif
+
+ /* Walk the basic blocks to assign sequence numbers */
+
+ /* If we don't compute the doms, then we never mark blocks as loops. */
+ if (fgDomsComputed)
+ {
+ for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
+ {
+ /* If this block is a loop header, mark it appropriately */
+
+ if (block->isLoopHead())
+ {
+ fgMarkLoopHead(block);
+ }
+ }
+ }
+ // only enable fully interruptible code for if we're hijacking.
+ else if (GCPOLL_NONE == opts.compGCPollType)
+ {
+ /* If we don't have the dominators, use an abbreviated test for fully interruptible. If there are
+ * any back edges, check the source and destination blocks to see if they're GC Safe. If not, then
+ * go fully interruptible. */
+
+ /* XXX Mon 1/21/2008
+ * Wouldn't it be nice to have a block iterator that can do this loop?
+ */
+ for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
+ {
+// true if the edge is forward, or if it is a back edge and either the source and dest are GC safe.
+#define EDGE_IS_GC_SAFE(src, dst) \
+ (((src)->bbNum < (dst)->bbNum) || (((src)->bbFlags | (dst)->bbFlags) & BBF_GC_SAFE_POINT))
+
+ bool partiallyInterruptible = true;
+ switch (block->bbJumpKind)
+ {
+ case BBJ_COND:
+ case BBJ_ALWAYS:
+ partiallyInterruptible = EDGE_IS_GC_SAFE(block, block->bbJumpDest);
+ break;
+
+ case BBJ_SWITCH:
+
+ unsigned jumpCnt;
+ jumpCnt = block->bbJumpSwt->bbsCount;
+ BasicBlock** jumpPtr;
+ jumpPtr = block->bbJumpSwt->bbsDstTab;
+
+ do
+ {
+ partiallyInterruptible &= EDGE_IS_GC_SAFE(block, *jumpPtr);
+ } while (++jumpPtr, --jumpCnt);
+
+ break;
+
+ default:
+ break;
+ }
+
+ if (!partiallyInterruptible)
+ {
+ // DDB 204533:
+ // The GC encoding for fully interruptible methods does not
+ // support more than 1023 pushed arguments, so we can't set
+ // genInterruptible here when we have 1024 or more pushed args
+ //
+ if (compCanEncodePtrArgCntMax())
+ {
+ genInterruptible = true;
+ }
+ break;
+ }
+#undef EDGE_IS_GC_SAFE
+ }
+ }
+
+ if (!fgGCPollsCreated)
+ {
+ fgCreateGCPolls();
+ }
+
+ for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
+ {
+
+#if FEATURE_FASTTAILCALL
+#ifndef JIT32_GCENCODER
+ if (block->endsWithTailCallOrJmp(this, true) && !(block->bbFlags & BBF_GC_SAFE_POINT) &&
+ optReachWithoutCall(fgFirstBB, block))
+ {
+ // We have a tail call that is reachable without making any other
+ // 'normal' call that would have counted as a GC Poll. If we were
+ // using polls, all return blocks meeting this criteria would have
+ // already added polls and then marked as being GC safe
+ // (BBF_GC_SAFE_POINT). Thus we can only reach here when *NOT*
+ // using GC polls, but instead relying on the JIT to generate
+ // fully-interruptible code.
+ noway_assert(GCPOLL_NONE == opts.compGCPollType);
+
+ // This tail call might combine with other tail calls to form a
+ // loop. Thus we need to either add a poll, or make the method
+ // fully interruptible. I chose the later because that's what
+ // JIT64 does.
+ genInterruptible = true;
+ }
+#endif // !JIT32_GCENCODER
+#endif // FEATURE_FASTTAILCALL
+
+ fgSetBlockOrder(block);
+ }
+
+ /* Remember that now the tree list is threaded */
+
+ fgStmtListThreaded = true;
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("The biggest BB has %4u tree nodes\n", BasicBlock::s_nMaxTrees);
+ }
+ fgDebugCheckLinks();
+#endif // DEBUG
+}
+
+/*****************************************************************************/
+
+void Compiler::fgSetStmtSeq(GenTreePtr tree)
+{
+ GenTree list; // helper node that we use to start the StmtList
+ // It's located in front of the first node in the list
+
+ noway_assert(tree->gtOper == GT_STMT);
+
+ /* Assign numbers and next/prev links for this tree */
+
+ fgTreeSeqNum = 0;
+ fgTreeSeqLst = &list;
+ fgTreeSeqBeg = nullptr;
+
+ fgSetTreeSeqHelper(tree->gtStmt.gtStmtExpr, false);
+
+ /* Record the address of the first node */
+
+ tree->gtStmt.gtStmtList = fgTreeSeqBeg;
+
+#ifdef DEBUG
+
+ if (list.gtNext->gtPrev != &list)
+ {
+ printf("&list ");
+ printTreeID(&list);
+ printf(" != list.next->prev ");
+ printTreeID(list.gtNext->gtPrev);
+ printf("\n");
+ goto BAD_LIST;
+ }
+
+ GenTreePtr temp;
+ GenTreePtr last;
+ for (temp = list.gtNext, last = &list; temp; last = temp, temp = temp->gtNext)
+ {
+ if (temp->gtPrev != last)
+ {
+ printTreeID(temp);
+ printf("->gtPrev = ");
+ printTreeID(temp->gtPrev);
+ printf(", but last = ");
+ printTreeID(last);
+ printf("\n");
+
+ BAD_LIST:;
+
+ printf("\n");
+ gtDispTree(tree->gtStmt.gtStmtExpr);
+ printf("\n");
+
+ for (GenTreePtr bad = &list; bad; bad = bad->gtNext)
+ {
+ printf(" entry at ");
+ printTreeID(bad);
+ printf(" (prev=");
+ printTreeID(bad->gtPrev);
+ printf(",next=)");
+ printTreeID(bad->gtNext);
+ printf("\n");
+ }
+
+ printf("\n");
+ noway_assert(!"Badly linked tree");
+ break;
+ }
+ }
+#endif // DEBUG
+
+ /* Fix the first node's 'prev' link */
+
+ noway_assert(list.gtNext->gtPrev == &list);
+ list.gtNext->gtPrev = nullptr;
+
+#ifdef DEBUG
+ /* Keep track of the highest # of tree nodes */
+
+ if (BasicBlock::s_nMaxTrees < fgTreeSeqNum)
+ {
+ BasicBlock::s_nMaxTrees = fgTreeSeqNum;
+ }
+#endif // DEBUG
+}
+
+/*****************************************************************************/
+
+void Compiler::fgSetBlockOrder(BasicBlock* block)
+{
+ GenTreePtr tree;
+
+ tree = block->bbTreeList;
+ if (!tree)
+ {
+ return;
+ }
+
+ for (;;)
+ {
+ fgSetStmtSeq(tree);
+
+ /* Are there any more trees in this basic block? */
+
+ if (tree->gtNext == nullptr)
+ {
+ /* last statement in the tree list */
+ noway_assert(block->lastStmt() == tree);
+ break;
+ }
+
+#ifdef DEBUG
+ if (block->bbTreeList == tree)
+ {
+ /* first statement in the list */
+ noway_assert(tree->gtPrev->gtNext == nullptr);
+ }
+ else
+ {
+ noway_assert(tree->gtPrev->gtNext == tree);
+ }
+
+ noway_assert(tree->gtNext->gtPrev == tree);
+#endif // DEBUG
+
+ tree = tree->gtNext;
+ }
+}
+
+#ifdef LEGACY_BACKEND
+//------------------------------------------------------------------------
+// fgOrderBlockOps: Get the execution order for a block assignment
+//
+// Arguments:
+// tree - The block assignment
+// reg0 - The register for the destination
+// reg1 - The register for the source
+// reg2 - The register for the size
+// opsPtr - An array of 3 GenTreePtr's, an out argument for the operands, in order
+// regsPtr - An array of three regMaskTP - an out argument for the registers, in order
+//
+// Return Value:
+// The return values go into the arrays that are passed in, and provide the
+// operands and associated registers, in execution order.
+//
+// Notes:
+// This method is somewhat convoluted in order to preserve old behavior from when
+// block assignments had their dst and src in a GT_LIST as op1, and their size as op2.
+// The old tree was like this:
+// tree->gtOp
+// / \
+// GT_LIST [size/clsHnd]
+// / \
+// [dest] [val/src]
+//
+// The new tree looks like this:
+// GT_ASG
+// / \
+// blk/obj [val/src]
+// / \
+// [destAddr] [*size/clsHnd] *only for GT_DYN_BLK
+//
+// For the (usual) case of GT_BLK or GT_OBJ, the size is always "evaluated" (i.e.
+// instantiated into a register) last. In those cases, the GTF_REVERSE_OPS flag
+// on the assignment works as usual.
+// In order to preserve previous possible orderings, the order for evaluating
+// the size of a GT_DYN_BLK node is controlled by its gtEvalSizeFirst flag. If
+// that is set, the size is evaluated first, and then the src and dst are evaluated
+// according to the GTF_REVERSE_OPS flag on the assignment.
+
+void Compiler::fgOrderBlockOps(GenTreePtr tree,
+ regMaskTP reg0,
+ regMaskTP reg1,
+ regMaskTP reg2,
+ GenTreePtr* opsPtr, // OUT
+ regMaskTP* regsPtr) // OUT
+{
+ assert(tree->OperIsBlkOp());
+
+ GenTreeBlk* destBlk = tree->gtOp.gtOp1->AsBlk();
+ GenTreePtr destAddr = destBlk->Addr();
+ GenTreePtr srcPtrOrVal = tree->gtOp.gtOp2;
+ if (tree->OperIsCopyBlkOp())
+ {
+ assert(srcPtrOrVal->OperIsIndir());
+ srcPtrOrVal = srcPtrOrVal->AsIndir()->Addr();
+ }
+ GenTreePtr sizeNode = (destBlk->gtOper == GT_DYN_BLK) ? destBlk->AsDynBlk()->gtDynamicSize : nullptr;
+ noway_assert((sizeNode != nullptr) || ((destBlk->gtFlags & GTF_REVERSE_OPS) == 0));
+ assert(destAddr != nullptr);
+ assert(srcPtrOrVal != nullptr);
+
+ GenTreePtr ops[3] = {
+ destAddr, // Dest address
+ srcPtrOrVal, // Val / Src address
+ sizeNode // Size of block
+ };
+
+ regMaskTP regs[3] = {reg0, reg1, reg2};
+
+ static int blockOpsOrder[4][3] =
+ // destBlk->gtEvalSizeFirst | tree->gtFlags
+ {
+ // -------------------------+----------------------------
+ {0, 1, 2}, // false | -
+ {2, 0, 1}, // true | -
+ {1, 0, 2}, // false | GTF_REVERSE_OPS
+ {2, 1, 0} // true | GTF_REVERSE_OPS
+ };
+
+ int orderNum = ((destBlk->gtFlags & GTF_REVERSE_OPS) != 0) * 1 + ((tree->gtFlags & GTF_REVERSE_OPS) != 0) * 2;
+
+ assert(orderNum < 4);
+
+ int* order = blockOpsOrder[orderNum];
+
+ PREFIX_ASSUME(order != NULL);
+
+ // Fill in the OUT arrays according to the order we have selected
+
+ opsPtr[0] = ops[order[0]];
+ opsPtr[1] = ops[order[1]];
+ opsPtr[2] = ops[order[2]];
+
+ regsPtr[0] = regs[order[0]];
+ regsPtr[1] = regs[order[1]];
+ regsPtr[2] = regs[order[2]];
+}
+#endif // LEGACY_BACKEND
+
+//------------------------------------------------------------------------
+// fgGetFirstNode: Get the first node in the tree, in execution order
+//
+// Arguments:
+// tree - The top node of the tree of interest
+//
+// Return Value:
+// The first node in execution order, that belongs to tree.
+//
+// Assumptions:
+// 'tree' must either be a leaf, or all of its constituent nodes must be contiguous
+// in execution order.
+// TODO-Cleanup: Add a debug-only method that verifies this.
+
+/* static */
+GenTreePtr Compiler::fgGetFirstNode(GenTreePtr tree)
+{
+ GenTreePtr child = tree;
+ while (child->NumChildren() > 0)
+ {
+ if (child->OperIsBinary() && child->IsReverseOp())
+ {
+ child = child->GetChild(1);
+ }
+ else
+ {
+ child = child->GetChild(0);
+ }
+ }
+ return child;
+}
+
+// Examine the bbTreeList and return the estimated code size for this block
+unsigned Compiler::fgGetCodeEstimate(BasicBlock* block)
+{
+ unsigned costSz = 0; // estimate of blocks code size cost
+
+ switch (block->bbJumpKind)
+ {
+ case BBJ_NONE:
+ costSz = 0;
+ break;
+ case BBJ_ALWAYS:
+ case BBJ_EHCATCHRET:
+ case BBJ_LEAVE:
+ case BBJ_COND:
+ costSz = 2;
+ break;
+ case BBJ_CALLFINALLY:
+ costSz = 5;
+ break;
+ case BBJ_SWITCH:
+ costSz = 10;
+ break;
+ case BBJ_THROW:
+ costSz = 1; // We place a int3 after the code for a throw block
+ break;
+ case BBJ_EHFINALLYRET:
+ case BBJ_EHFILTERRET:
+ costSz = 1;
+ break;
+ case BBJ_RETURN: // return from method
+ costSz = 3;
+ break;
+ default:
+ noway_assert(!"Bad bbJumpKind");
+ break;
+ }
+
+ GenTreePtr tree = block->FirstNonPhiDef();
+ if (tree)
+ {
+ do
+ {
+ noway_assert(tree->gtOper == GT_STMT);
+
+ if (tree->gtCostSz < MAX_COST)
+ {
+ costSz += tree->gtCostSz;
+ }
+ else
+ {
+ // We could walk the tree to find out the real gtCostSz,
+ // but just using MAX_COST for this trees code size works OK
+ costSz += tree->gtCostSz;
+ }
+
+ tree = tree->gtNext;
+ } while (tree);
+ }
+
+ return costSz;
+}
+
+#if DUMP_FLOWGRAPHS
+
+struct escapeMapping_t
+{
+ char ch;
+ const char* sub;
+};
+
+// clang-format off
+static escapeMapping_t s_EscapeFileMapping[] =
+{
+ {':', "="},
+ {'<', "["},
+ {'>', "]"},
+ {';', "~semi~"},
+ {'|', "~bar~"},
+ {'&', "~amp~"},
+ {'"', "~quot~"},
+ {'*', "~star~"},
+ {0, nullptr}
+};
+
+static escapeMapping_t s_EscapeMapping[] =
+{
+ {'<', "&lt;"},
+ {'>', "&gt;"},
+ {'&', "&amp;"},
+ {'"', "&quot;"},
+ {0, nullptr}
+};
+// clang-formt on
+
+const char* Compiler::fgProcessEscapes(const char* nameIn, escapeMapping_t* map)
+{
+ const char* nameOut = nameIn;
+ unsigned lengthOut;
+ unsigned index;
+ bool match;
+ bool subsitutionRequired;
+ const char* pChar;
+
+ lengthOut = 1;
+ subsitutionRequired = false;
+ pChar = nameIn;
+ while (*pChar != '\0')
+ {
+ match = false;
+ index = 0;
+ while (map[index].ch != 0)
+ {
+ if (*pChar == map[index].ch)
+ {
+ match = true;
+ break;
+ }
+ index++;
+ }
+ if (match)
+ {
+ subsitutionRequired = true;
+ lengthOut += (unsigned)strlen(map[index].sub);
+ }
+ else
+ {
+ lengthOut += 1;
+ }
+ pChar++;
+ }
+
+ if (subsitutionRequired)
+ {
+ char* newName = (char*) compGetMemA(lengthOut, CMK_DebugOnly);
+ char* pDest;
+ pDest = newName;
+ pChar = nameIn;
+ while (*pChar != '\0')
+ {
+ match = false;
+ index = 0;
+ while (map[index].ch != 0)
+ {
+ if (*pChar == map[index].ch)
+ {
+ match = true;
+ break;
+ }
+ index++;
+ }
+ if (match)
+ {
+ strcpy(pDest, map[index].sub);
+ pDest += strlen(map[index].sub);
+ }
+ else
+ {
+ *pDest++ = *pChar;
+ }
+ pChar++;
+ }
+ *pDest++ = '\0';
+ nameOut = (const char*) newName;
+ }
+
+ return nameOut;
+}
+
+static void fprintfDouble(FILE* fgxFile, double value)
+{
+ assert(value >= 0.0);
+
+ if ((value >= 0.010) || (value == 0.0))
+ {
+ fprintf(fgxFile, "\"%7.3f\"", value);
+ }
+ else if (value >= 0.00010)
+ {
+ fprintf(fgxFile, "\"%7.5f\"", value);
+ }
+ else
+ {
+ fprintf(fgxFile, "\"%7E\"", value);
+ }
+}
+
+//------------------------------------------------------------------------
+// fgOpenFlowGraphFile: Open a file to dump either the xml or dot format flow graph
+//
+// Arguments:
+// wbDontClose - A boolean out argument that indicates whether the caller should close the file
+// phase - A phase identifier to indicate which phase is associated with the dump
+// type - A (wide) string indicating the type of dump, "dot" or "xml"
+//
+// Return Value:
+// Opens a file to which a flowgraph can be dumped, whose name is based on the current
+// config vales.
+
+FILE* Compiler::fgOpenFlowGraphFile(bool* wbDontClose, Phases phase, LPCWSTR type)
+{
+ FILE* fgxFile;
+ LPCWSTR pattern = nullptr;
+ LPCWSTR filename = nullptr;
+ LPCWSTR pathname = nullptr;
+ const char* escapedString;
+ bool createDuplicateFgxFiles = true;
+
+#ifdef DEBUG
+ if (opts.eeFlags & CORJIT_FLG_PREJIT)
+ {
+ pattern = JitConfig.NgenDumpFg();
+ filename = JitConfig.NgenDumpFgFile();
+ pathname = JitConfig.NgenDumpFgDir();
+ }
+ else
+ {
+ pattern = JitConfig.JitDumpFg();
+ filename = JitConfig.JitDumpFgFile();
+ pathname = JitConfig.JitDumpFgDir();
+ }
+#endif // DEBUG
+
+ if (fgBBcount <= 1) {
+ return nullptr;
+}
+
+ if (pattern == nullptr) {
+ return nullptr;
+}
+
+ if (wcslen(pattern) == 0) {
+ return nullptr;
+}
+
+ LPCWSTR phasePattern = JitConfig.JitDumpFgPhase();
+ LPCWSTR phaseName = PhaseShortNames[phase];
+ if (phasePattern == nullptr)
+ {
+ if (phase != PHASE_DETERMINE_FIRST_COLD_BLOCK)
+ {
+ return nullptr;
+ }
+ }
+ else if (*phasePattern != W('*'))
+ {
+ if (wcsstr(phasePattern, phaseName) == nullptr)
+ {
+ return nullptr;
+ }
+ }
+
+ if (*pattern != W('*'))
+ {
+ bool hasColon = (wcschr(pattern, W(':')) != nullptr);
+
+ if (hasColon)
+ {
+ const char* className = info.compClassName;
+ if (*pattern == W('*'))
+ {
+ pattern++;
+ }
+ else
+ {
+ while ((*pattern != W(':')) && (*pattern != W('*')))
+ {
+ if (*pattern != *className) {
+ return nullptr;
+}
+
+ pattern++;
+ className++;
+ }
+ if (*pattern == W('*'))
+ {
+ pattern++;
+ }
+ else
+ {
+ if (*className != 0) {
+ return nullptr;
+}
+ }
+ }
+ if (*pattern != W(':')) {
+ return nullptr;
+ }
+
+ pattern++;
+ }
+
+ const char* methodName = info.compMethodName;
+ if (*pattern == W('*'))
+ {
+ pattern++;
+ }
+ else
+ {
+ while ((*pattern != 0) && (*pattern != W('*')))
+ {
+ if (*pattern != *methodName) {
+ return nullptr;
+}
+
+ pattern++;
+ methodName++;
+ }
+ if (*pattern == W('*'))
+ {
+ pattern++;
+ }
+ else
+ {
+ if (*methodName != 0) {
+ return nullptr;
+}
+ }
+ }
+ if (*pattern != 0) {
+ return nullptr;
+ }
+ }
+
+ if (filename == nullptr)
+ {
+ filename = W("default");
+ }
+
+ if (wcscmp(filename, W("profiled")) == 0)
+ {
+ if ((fgFirstBB->bbFlags & BBF_PROF_WEIGHT) != 0)
+ {
+ createDuplicateFgxFiles = true;
+ goto ONE_FILE_PER_METHOD;
+ }
+ else
+ {
+ return nullptr;
+ }
+ }
+ if (wcscmp(filename, W("hot")) == 0)
+ {
+ if (info.compMethodInfo->regionKind == CORINFO_REGION_HOT)
+
+ {
+ createDuplicateFgxFiles = true;
+ goto ONE_FILE_PER_METHOD;
+ }
+ else
+ {
+ return nullptr;
+ }
+ }
+ else if (wcscmp(filename, W("cold")) == 0)
+ {
+ if (info.compMethodInfo->regionKind == CORINFO_REGION_COLD)
+ {
+ createDuplicateFgxFiles = true;
+ goto ONE_FILE_PER_METHOD;
+ }
+ else
+ {
+ return nullptr;
+ }
+ }
+ else if (wcscmp(filename, W("jit")) == 0)
+ {
+ if (info.compMethodInfo->regionKind == CORINFO_REGION_JIT)
+ {
+ createDuplicateFgxFiles = true;
+ goto ONE_FILE_PER_METHOD;
+ }
+ else
+ {
+ return nullptr;
+ }
+ }
+ else if (wcscmp(filename, W("all")) == 0)
+ {
+ createDuplicateFgxFiles = true;
+
+ONE_FILE_PER_METHOD:;
+
+ escapedString = fgProcessEscapes(info.compFullName, s_EscapeFileMapping);
+ size_t wCharCount = strlen(escapedString) + wcslen(phaseName) + 1 + strlen("~999") + wcslen(type) + 1;
+ if (pathname != nullptr)
+ {
+ wCharCount += wcslen(pathname) + 1;
+ }
+ filename = (LPCWSTR) alloca(wCharCount * sizeof(WCHAR));
+ if (pathname != nullptr)
+ {
+ swprintf_s((LPWSTR)filename, wCharCount, W("%s\\%S-%s.%s"), pathname, escapedString, phaseName, type);
+ }
+ else
+ {
+ swprintf_s((LPWSTR)filename, wCharCount, W("%S.%s"), escapedString, type);
+ }
+ fgxFile = _wfopen(filename, W("r")); // Check if this file already exists
+ if (fgxFile != nullptr)
+ {
+ // For Generic methods we will have both hot and cold versions
+ if (createDuplicateFgxFiles == false)
+ {
+ fclose(fgxFile);
+ return nullptr;
+ }
+ // Yes, this filename already exists, so create a different one by appending ~2, ~3, etc...
+ for (int i = 2; i < 1000; i++)
+ {
+ fclose(fgxFile);
+ if (pathname != nullptr)
+ {
+ swprintf_s((LPWSTR)filename, wCharCount, W("%s\\%S~%d.%s"), pathname, escapedString, i, type);
+ }
+ else
+ {
+ swprintf_s((LPWSTR)filename, wCharCount, W("%S~%d.%s"), escapedString, i, type);
+ }
+ fgxFile = _wfopen(filename, W("r")); // Check if this file exists
+ if (fgxFile == nullptr) {
+ break;
+ }
+ }
+ // If we have already created 1000 files with this name then just fail
+ if (fgxFile != nullptr)
+ {
+ fclose(fgxFile);
+ return nullptr;
+ }
+ }
+ fgxFile = _wfopen(filename, W("a+"));
+ *wbDontClose = false;
+ }
+ else if (wcscmp(filename, W("stdout")) == 0)
+ {
+ fgxFile = jitstdout;
+ *wbDontClose = true;
+ }
+ else if (wcscmp(filename, W("stderr")) == 0)
+ {
+ fgxFile = stderr;
+ *wbDontClose = true;
+ }
+ else
+ {
+ LPCWSTR origFilename = filename;
+ size_t wCharCount = wcslen(origFilename) + wcslen(type) + 2;
+ if (pathname != nullptr)
+ {
+ wCharCount += wcslen(pathname) + 1;
+ }
+ filename = (LPCWSTR) alloca(wCharCount * sizeof(WCHAR));
+ if (pathname != nullptr)
+ {
+ swprintf_s((LPWSTR)filename, wCharCount, W("%s\\%s.%s"), pathname, origFilename, type);
+ }
+ else
+ {
+ swprintf_s((LPWSTR)filename, wCharCount, W("%s.%s"), origFilename, type);
+ }
+ fgxFile = _wfopen(filename, W("a+"));
+ *wbDontClose = false;
+ }
+
+ return fgxFile;
+}
+
+//------------------------------------------------------------------------
+// fgDumpFlowGraph: Dump the xml or dot format flow graph, if enabled for this phase.
+//
+// Arguments:
+// phase - A phase identifier to indicate which phase is associated with the dump,
+// i.e. which phase has just completed.
+//
+// Return Value:
+// True iff a flowgraph has been dumped.
+//
+// Notes:
+// The xml dumps are the historical mechanism for dumping the flowgraph.
+// The dot format can be viewed by:
+// - Graphviz (http://www.graphviz.org/)
+// - The command "C:\Program Files (x86)\Graphviz2.38\bin\dot.exe" -Tsvg -oFoo.svg -Kdot Foo.dot
+// will produce a Foo.svg file that can be opened with any svg-capable browser (e.g. IE).
+// - http://rise4fun.com/Agl/
+// - Cut and paste the graph from your .dot file, replacing the digraph on the page, and then click the play
+// button.
+// - It will show a rotating '/' and then render the graph in the browser.
+// MSAGL has also been open-sourced to https://github.com/Microsoft/automatic-graph-layout.git.
+//
+// Here are the config values that control it:
+// COMPlus_JitDumpFg A string (ala the COMPlus_JitDump string) indicating what methods to dump flowgraphs
+// for.
+// COMPlus_JitDumpFgDir A path to a directory into which the flowgraphs will be dumped.
+// COMPlus_JitDumpFgFile The filename to use. The default is "default.[xml|dot]".
+// Note that the new graphs will be appended to this file if it already exists.
+// COMPlus_JitDumpFgPhase Phase(s) after which to dump the flowgraph.
+// Set to the short name of a phase to see the flowgraph after that phase.
+// Leave unset to dump after COLD-BLK (determine first cold block) or set to * for all
+// phases.
+// COMPlus_JitDumpFgDot Set to non-zero to emit Dot instead of Xml Flowgraph dump. (Default is xml format.)
+
+bool Compiler::fgDumpFlowGraph(Phases phase)
+{
+ bool result = false;
+ bool dontClose = false;
+ bool createDotFile = false;
+ if (JitConfig.JitDumpFgDot())
+ {
+ createDotFile = true;
+ }
+
+ FILE* fgxFile = fgOpenFlowGraphFile(&dontClose, phase, createDotFile ? W("dot") : W("fgx"));
+
+ if (fgxFile == nullptr)
+ {
+ return false;
+ }
+ bool validWeights = fgHaveValidEdgeWeights;
+ unsigned calledCount = max(fgCalledWeight, BB_UNITY_WEIGHT) / BB_UNITY_WEIGHT;
+ double weightDivisor = (double) (calledCount * BB_UNITY_WEIGHT);
+ const char* escapedString;
+ const char* regionString = "NONE";
+
+ if (info.compMethodInfo->regionKind == CORINFO_REGION_HOT)
+ {
+ regionString="HOT";
+ }
+ else if (info.compMethodInfo->regionKind == CORINFO_REGION_COLD)
+ {
+ regionString="COLD";
+ }
+ else if (info.compMethodInfo->regionKind == CORINFO_REGION_JIT)
+ {
+ regionString="JIT";
+ }
+
+ if (createDotFile)
+ {
+ fprintf(fgxFile, "digraph %s\n{\n", info.compMethodName);
+ fprintf(fgxFile, "/* Method %d, after phase %s */", Compiler::jitTotalMethodCompiled, PhaseNames[phase]);
+ }
+ else
+ {
+ fprintf(fgxFile, "<method");
+
+ escapedString = fgProcessEscapes(info.compFullName, s_EscapeMapping);
+ fprintf(fgxFile, "\n name=\"%s\"", escapedString);
+
+ escapedString = fgProcessEscapes(info.compClassName, s_EscapeMapping);
+ fprintf(fgxFile, "\n className=\"%s\"", escapedString);
+
+ escapedString = fgProcessEscapes(info.compMethodName, s_EscapeMapping);
+ fprintf(fgxFile, "\n methodName=\"%s\"", escapedString);
+ fprintf(fgxFile, "\n ngenRegion=\"%s\"", regionString);
+
+ fprintf(fgxFile, "\n bytesOfIL=\"%d\"", info.compILCodeSize);
+ fprintf(fgxFile, "\n localVarCount=\"%d\"", lvaCount);
+
+ if (fgHaveProfileData())
+ {
+ fprintf(fgxFile, "\n calledCount=\"%d\"", calledCount);
+ fprintf(fgxFile, "\n profileData=\"true\"");
+ }
+ if (compHndBBtabCount > 0)
+ {
+ fprintf(fgxFile, "\n hasEHRegions=\"true\"");
+ }
+ if (fgHasLoops)
+ {
+ fprintf(fgxFile, "\n hasLoops=\"true\"");
+ }
+ if (validWeights)
+ {
+ fprintf(fgxFile, "\n validEdgeWeights=\"true\"");
+ if (!fgSlopUsedInEdgeWeights && !fgRangeUsedInEdgeWeights)
+ {
+ fprintf(fgxFile, "\n exactEdgeWeights=\"true\"");
+ }
+ }
+ if (fgFirstColdBlock != nullptr)
+ {
+ fprintf(fgxFile, "\n firstColdBlock=\"%d\"", fgFirstColdBlock->bbNum);
+ }
+
+ fprintf(fgxFile, ">");
+
+ fprintf(fgxFile, "\n <blocks");
+ fprintf(fgxFile, "\n blockCount=\"%d\"", fgBBcount);
+ fprintf(fgxFile, ">");
+ }
+
+ static const char* kindImage[] = { "EHFINALLYRET", "EHFILTERRET", "EHCATCHRET",
+ "THROW", "RETURN", "NONE", "ALWAYS", "LEAVE",
+ "CALLFINALLY", "COND", "SWITCH" };
+
+ BasicBlock* block;
+ unsigned blockOrdinal;
+ for (block = fgFirstBB , blockOrdinal = 1;
+ block != nullptr;
+ block = block->bbNext, blockOrdinal++)
+ {
+ if (createDotFile)
+ {
+ // Add constraint edges to try to keep nodes ordered.
+ // It seems to work best if these edges are all created first.
+ switch(block->bbJumpKind)
+ {
+ case BBJ_COND:
+ case BBJ_NONE:
+ assert(block->bbNext != nullptr);
+ fprintf(fgxFile, " BB%02u -> BB%02u\n", block->bbNum, block->bbNext->bbNum);
+ break;
+ default:
+ // These may or may not have an edge to the next block.
+ // Add a transparent edge to keep nodes ordered.
+ if (block->bbNext != nullptr)
+ {
+ fprintf(fgxFile, " BB%02u -> BB%02u [arrowtail=none,color=transparent]\n", block->bbNum, block->bbNext->bbNum);
+ }
+ }
+ }
+ else
+ {
+ fprintf(fgxFile,"\n <block");
+ fprintf(fgxFile,"\n id=\"%d\"", block->bbNum);
+ fprintf(fgxFile,"\n ordinal=\"%d\"", blockOrdinal);
+ fprintf(fgxFile,"\n jumpKind=\"%s\"", kindImage[block->bbJumpKind]);
+ if (block->hasTryIndex())
+ {
+ fprintf(fgxFile,"\n inTry=\"%s\"", "true");
+ }
+ if (block->hasHndIndex())
+ {
+ fprintf(fgxFile,"\n inHandler=\"%s\"", "true");
+ }
+ if (((fgFirstBB->bbFlags & BBF_PROF_WEIGHT) != 0) &&
+ ((block->bbFlags & BBF_COLD) == 0) )
+ {
+ fprintf(fgxFile,"\n hot=\"true\"");
+ }
+ if (block->bbFlags & (BBF_HAS_NEWOBJ | BBF_HAS_NEWARRAY))
+ {
+ fprintf(fgxFile,"\n callsNew=\"true\"");
+ }
+ if (block->bbFlags & BBF_LOOP_HEAD)
+ {
+ fprintf(fgxFile,"\n loopHead=\"true\"");
+ }
+ fprintf(fgxFile,"\n weight=");
+ fprintfDouble(fgxFile, ((double) block->bbWeight) / weightDivisor);
+ fprintf(fgxFile,"\n codeEstimate=\"%d\"", fgGetCodeEstimate(block));
+ fprintf(fgxFile,"\n startOffset=\"%d\"", block->bbCodeOffs);
+ fprintf(fgxFile,"\n endOffset=\"%d\"", block->bbCodeOffsEnd);
+ fprintf(fgxFile, ">");
+ fprintf(fgxFile,"\n </block>");
+ }
+ }
+
+ if (!createDotFile)
+ {
+ fprintf(fgxFile, "\n </blocks>");
+
+ fprintf(fgxFile, "\n <edges");
+ fprintf(fgxFile, "\n edgeCount=\"%d\"", fgEdgeCount);
+ fprintf(fgxFile, ">");
+ }
+
+ unsigned edgeNum = 1;
+ BasicBlock* bTarget;
+ for (bTarget = fgFirstBB; bTarget != nullptr; bTarget = bTarget->bbNext)
+ {
+ double targetWeightDivisor;
+ if (bTarget->bbWeight == BB_ZERO_WEIGHT)
+ {
+ targetWeightDivisor = 1.0;
+ }
+ else
+ {
+ targetWeightDivisor = (double) bTarget->bbWeight;
+ }
+
+ flowList* edge;
+ for (edge = bTarget->bbPreds; edge != nullptr; edge = edge->flNext, edgeNum++)
+ {
+ BasicBlock* bSource = edge->flBlock;
+ double sourceWeightDivisor;
+ if (bSource->bbWeight == BB_ZERO_WEIGHT)
+ {
+ sourceWeightDivisor = 1.0;
+ }
+ else
+ {
+ sourceWeightDivisor = (double) bSource->bbWeight;
+ }
+ if (createDotFile)
+ {
+ // Don't duplicate the edges we added above.
+ if ((bSource->bbNum == (bTarget->bbNum - 1)) &&
+ ((bSource->bbJumpKind == BBJ_NONE) || (bSource->bbJumpKind == BBJ_COND)))
+ {
+ continue;
+ }
+ fprintf(fgxFile, " BB%02u -> BB%02u", bSource->bbNum, bTarget->bbNum);
+ if ((bSource->bbNum > bTarget->bbNum))
+ {
+ fprintf(fgxFile, "[arrowhead=normal,arrowtail=none,color=green]\n");
+ }
+ else
+ {
+ fprintf(fgxFile, "\n");
+ }
+ }
+ else
+ {
+ fprintf(fgxFile,"\n <edge");
+ fprintf(fgxFile,"\n id=\"%d\"", edgeNum);
+ fprintf(fgxFile,"\n source=\"%d\"", bSource->bbNum);
+ fprintf(fgxFile,"\n target=\"%d\"", bTarget->bbNum);
+ if (bSource->bbJumpKind == BBJ_SWITCH)
+ {
+ if (edge->flDupCount >= 2)
+ {
+ fprintf(fgxFile,"\n switchCases=\"%d\"", edge->flDupCount);
+ }
+ if (bSource->bbJumpSwt->getDefault() == bTarget)
+ {
+ fprintf(fgxFile,"\n switchDefault=\"true\"");
+ }
+ }
+ if (validWeights)
+ {
+ unsigned edgeWeight = (edge->flEdgeWeightMin + edge->flEdgeWeightMax) / 2;
+ fprintf(fgxFile,"\n weight=");
+ fprintfDouble(fgxFile, ((double) edgeWeight) / weightDivisor);
+
+ if (edge->flEdgeWeightMin != edge->flEdgeWeightMax)
+ {
+ fprintf(fgxFile,"\n minWeight=");
+ fprintfDouble(fgxFile, ((double) edge->flEdgeWeightMin) / weightDivisor);
+ fprintf(fgxFile,"\n maxWeight=");
+ fprintfDouble(fgxFile, ((double) edge->flEdgeWeightMax) / weightDivisor);
+ }
+
+ if (edgeWeight > 0)
+ {
+ if (edgeWeight < bSource->bbWeight)
+ {
+ fprintf(fgxFile,"\n out=");
+ fprintfDouble(fgxFile, ((double) edgeWeight) / sourceWeightDivisor );
+ }
+ if (edgeWeight < bTarget->bbWeight)
+ {
+ fprintf(fgxFile,"\n in=");
+ fprintfDouble(fgxFile, ((double) edgeWeight) / targetWeightDivisor);
+ }
+ }
+ }
+ }
+ if (!createDotFile)
+ {
+ fprintf(fgxFile, ">");
+ fprintf(fgxFile,"\n </edge>");
+ }
+ }
+ }
+ if (createDotFile)
+ {
+ fprintf(fgxFile, "}\n");
+ }
+ else
+ {
+ fprintf(fgxFile, "\n </edges>");
+ fprintf(fgxFile, "\n</method>\n");
+ }
+
+ if (dontClose)
+ {
+ // fgxFile is jitstdout or stderr
+ fprintf(fgxFile, "\n");
+ }
+ else
+ {
+ fclose(fgxFile);
+ }
+
+ return result;
+}
+
+#endif // DUMP_FLOWGRAPHS
+
+/*****************************************************************************/
+#ifdef DEBUG
+
+void Compiler::fgDispReach()
+{
+ printf("------------------------------------------------\n");
+ printf("BBnum Reachable by \n");
+ printf("------------------------------------------------\n");
+
+ for (BasicBlock* block = fgFirstBB; block != nullptr; block = block->bbNext)
+ {
+ printf("BB%02u : ", block->bbNum);
+ BLOCKSET_ITER_INIT(this, iter, block->bbReach, bbNum);
+ while (iter.NextElem(this, &bbNum))
+ {
+ printf("BB%02u ", bbNum);
+ }
+ printf("\n");
+ }
+}
+
+void Compiler::fgDispDoms()
+{
+ // Don't bother printing this when we have a large number of BasicBlocks in the method
+ if (fgBBcount > 256)
+ {
+ return;
+ }
+
+ printf("------------------------------------------------\n");
+ printf("BBnum Dominated by\n");
+ printf("------------------------------------------------\n");
+
+ for (unsigned i = 1; i <= fgBBNumMax; ++i)
+ {
+ BasicBlock* current = fgBBInvPostOrder[i];
+ printf("BB%02u: ", current->bbNum);
+ while (current != current->bbIDom)
+ {
+ printf("BB%02u ", current->bbNum);
+ current = current->bbIDom;
+ }
+ printf("\n");
+ }
+}
+
+/*****************************************************************************/
+
+void Compiler::fgTableDispBasicBlock(BasicBlock* block,
+ int ibcColWidth /* = 0 */)
+{
+ unsigned flags = block->bbFlags;
+
+ unsigned bbNumMax = compIsForInlining() ? impInlineInfo->InlinerCompiler->fgBBNumMax : fgBBNumMax;
+ int maxBlockNumWidth = CountDigits(bbNumMax);
+ maxBlockNumWidth = max(maxBlockNumWidth, 2);
+ int blockNumWidth = CountDigits(block->bbNum);
+ blockNumWidth = max(blockNumWidth, 2);
+ int blockNumPadding = maxBlockNumWidth - blockNumWidth;
+
+ printf("BB%02u%*s [%08p] %2u",
+ block->bbNum,
+ blockNumPadding, "",
+ dspPtr(block),
+ block->bbRefs);
+
+ //
+ // Display EH 'try' region index
+ //
+
+ if (block->hasTryIndex())
+ {
+ printf(" %2u", block->getTryIndex());
+ }
+ else
+ {
+ printf(" ");
+ }
+
+ //
+ // Display EH handler region index
+ //
+
+ if (block->hasHndIndex())
+ {
+ printf(" %2u", block->getHndIndex());
+ }
+ else
+ {
+ printf(" ");
+ }
+
+ printf(" ");
+
+ //
+ // Display block predecessor list
+ //
+
+ unsigned charCnt;
+ if (fgCheapPredsValid)
+ {
+ charCnt = block->dspCheapPreds();
+ }
+ else
+ {
+ charCnt = block->dspPreds();
+ }
+
+ if (charCnt < 19)
+ {
+ printf("%*s", 19 - charCnt, "");
+ }
+
+ printf(" ");
+
+ //
+ // Display block weight
+ //
+
+ if (block->isMaxBBWeight())
+ {
+ printf(" MAX ");
+ }
+ else
+ {
+ printf("%6s", refCntWtd2str(block->getBBWeight(this)));
+ }
+
+ //
+ // Display optional IBC weight column.
+ // Note that iColWidth includes one character for a leading space, if there is an IBC column.
+ //
+
+ if (ibcColWidth > 0)
+ {
+ if (block->bbFlags & BBF_PROF_WEIGHT)
+ {
+ printf("%*u", ibcColWidth, block->bbWeight);
+ }
+ else
+ {
+ // No IBC data. Just print spaces to align the column.
+ printf("%*s", ibcColWidth, "");
+ }
+ }
+
+ printf(" ");
+
+ //
+ // Display block IL range
+ //
+
+ block->dspBlockILRange();
+
+ //
+ // Display block branch target
+ //
+
+ if (flags & BBF_REMOVED)
+ {
+ printf( "[removed] ");
+ }
+ else
+ {
+ switch (block->bbJumpKind)
+ {
+ case BBJ_COND:
+ printf("-> BB%02u%*s ( cond )", block->bbJumpDest->bbNum, maxBlockNumWidth - max(CountDigits(block->bbJumpDest->bbNum), 2), "");
+ break;
+
+ case BBJ_CALLFINALLY:
+ printf("-> BB%02u%*s (callf )", block->bbJumpDest->bbNum, maxBlockNumWidth - max(CountDigits(block->bbJumpDest->bbNum), 2), "");
+ break;
+
+ case BBJ_ALWAYS:
+ if (flags & BBF_KEEP_BBJ_ALWAYS)
+ {
+ printf("-> BB%02u%*s (ALWAYS)", block->bbJumpDest->bbNum, maxBlockNumWidth - max(CountDigits(block->bbJumpDest->bbNum), 2), "");
+ }
+ else
+ {
+ printf("-> BB%02u%*s (always)", block->bbJumpDest->bbNum, maxBlockNumWidth - max(CountDigits(block->bbJumpDest->bbNum), 2), "");
+ }
+ break;
+
+ case BBJ_LEAVE:
+ printf("-> BB%02u%*s (leave )", block->bbJumpDest->bbNum, maxBlockNumWidth - max(CountDigits(block->bbJumpDest->bbNum), 2), "");
+ break;
+
+ case BBJ_EHFINALLYRET:
+ printf( "%*s (finret)", maxBlockNumWidth - 2, "");
+ break;
+
+ case BBJ_EHFILTERRET:
+ printf( "%*s (fltret)", maxBlockNumWidth - 2, "");
+ break;
+
+ case BBJ_EHCATCHRET:
+ printf("-> BB%02u%*s ( cret )", block->bbJumpDest->bbNum, maxBlockNumWidth - max(CountDigits(block->bbJumpDest->bbNum), 2), "");
+ break;
+
+ case BBJ_THROW:
+ printf( "%*s (throw )", maxBlockNumWidth - 2, "");
+ break;
+
+ case BBJ_RETURN:
+ printf( "%*s (return)", maxBlockNumWidth - 2, "");
+ break;
+
+ default:
+ printf( "%*s ", maxBlockNumWidth - 2, "");
+ break;
+
+ case BBJ_SWITCH:
+ printf("->");
+
+ unsigned jumpCnt;
+ jumpCnt = block->bbJumpSwt->bbsCount;
+ BasicBlock** jumpTab;
+ jumpTab = block->bbJumpSwt->bbsDstTab;
+ int switchWidth;
+ switchWidth = 0;
+ do
+ {
+ printf("%cBB%02u",
+ (jumpTab == block->bbJumpSwt->bbsDstTab) ? ' ' : ',',
+ (*jumpTab)->bbNum);
+ switchWidth += 1 /* space/comma */ + 2 /* BB */ + max(CountDigits((*jumpTab)->bbNum), 2);
+ }
+ while (++jumpTab, --jumpCnt);
+
+ if (switchWidth < 7)
+ {
+ printf("%*s", 8 - switchWidth, "");
+ }
+
+ printf(" (switch)");
+ break;
+ }
+ }
+
+ printf(" ");
+
+ //
+ // Display block EH region and type, including nesting indicator
+ //
+
+ if (block->hasTryIndex())
+ {
+ printf("T%d ", block->getTryIndex());
+ }
+ else
+ {
+ printf(" ");
+ }
+
+ if (block->hasHndIndex())
+ {
+ printf("H%d ", block->getHndIndex());
+ }
+ else
+ {
+ printf(" ");
+ }
+
+ if (flags & BBF_FUNCLET_BEG)
+ {
+ printf("F ");
+ }
+ else
+ {
+ printf(" ");
+ }
+
+ int cnt = 0;
+
+ switch (block->bbCatchTyp)
+ {
+ case BBCT_NONE: break;
+ case BBCT_FAULT: printf("fault "); cnt += 6; break;
+ case BBCT_FINALLY: printf("finally "); cnt += 8; break;
+ case BBCT_FILTER: printf("filter "); cnt += 7; break;
+ case BBCT_FILTER_HANDLER: printf("filtHnd "); cnt += 8; break;
+ default: printf("catch "); cnt += 6; break;
+ }
+
+ if (block->bbCatchTyp != BBCT_NONE)
+ {
+ cnt += 2;
+ printf("{ ");
+ /* brace matching editor workaround to compensate for the preceding line: } */
+ }
+
+ if (flags & BBF_TRY_BEG)
+ {
+ // Output a brace for every try region that this block opens
+
+ EHblkDsc* HBtab;
+ EHblkDsc* HBtabEnd;
+
+ for (HBtab = compHndBBtab, HBtabEnd = compHndBBtab + compHndBBtabCount;
+ HBtab < HBtabEnd;
+ HBtab++)
+ {
+ if (HBtab->ebdTryBeg == block)
+ {
+ cnt += 6;
+ printf("try { ");
+ /* brace matching editor workaround to compensate for the preceding line: } */
+ }
+ }
+ }
+
+ EHblkDsc* HBtab;
+ EHblkDsc* HBtabEnd;
+
+ for (HBtab = compHndBBtab, HBtabEnd = compHndBBtab + compHndBBtabCount;
+ HBtab < HBtabEnd;
+ HBtab++)
+ {
+ if (HBtab->ebdTryLast == block)
+ {
+ cnt += 2;
+ /* brace matching editor workaround to compensate for the following line: { */
+ printf("} ");
+ }
+ if (HBtab->ebdHndLast == block)
+ {
+ cnt += 2;
+ /* brace matching editor workaround to compensate for the following line: { */
+ printf("} ");
+ }
+ if (HBtab->HasFilter() && block->bbNext == HBtab->ebdHndBeg)
+ {
+ cnt += 2;
+ /* brace matching editor workaround to compensate for the following line: { */
+ printf("} ");
+ }
+ }
+
+ while (cnt < 12)
+ {
+ cnt++;
+ printf(" ");
+ }
+
+ //
+ // Display block flags
+ //
+
+ block->dspFlags();
+
+ printf("\n");
+}
+
+/****************************************************************************
+ Dump blocks from firstBlock to lastBlock.
+*/
+
+void Compiler::fgDispBasicBlocks(BasicBlock* firstBlock,
+ BasicBlock* lastBlock,
+ bool dumpTrees)
+{
+ BasicBlock* block;
+
+ int padWidth = 0;
+#ifdef _TARGET_AMD64_
+ padWidth = 8;
+#endif // _TARGET_AMD64_
+
+ // If any block has IBC data, we add an "IBC weight" column just before the 'IL range' column. This column is as
+ // wide as necessary to accommodate all the various IBC weights. It's at least 4 characters wide, to accommodate
+ // the "IBC" title and leading space.
+ int ibcColWidth = 0;
+ for (block = firstBlock; block != nullptr; block = block->bbNext)
+ {
+ if (block->bbFlags & BBF_PROF_WEIGHT)
+ {
+ int thisIbcWidth = CountDigits(block->bbWeight);
+ ibcColWidth = max(ibcColWidth, thisIbcWidth);
+ }
+
+ if (block == lastBlock) {
+ break;
+ }
+ }
+ if (ibcColWidth > 0)
+ {
+ ibcColWidth = max(ibcColWidth, 3) + 1; // + 1 for the leading space
+ }
+
+ unsigned bbNumMax = compIsForInlining() ? impInlineInfo->InlinerCompiler->fgBBNumMax : fgBBNumMax;
+ int maxBlockNumWidth = CountDigits(bbNumMax);
+ maxBlockNumWidth = max(maxBlockNumWidth, 2);
+
+ padWidth += maxBlockNumWidth - 2; // Account for functions with a large number of blocks.
+
+ printf("\n");
+ printf("------%*s------------------------------------%*s-----------------------%*s----------------------------------------\n",
+ padWidth, "------------",
+ ibcColWidth, "------------",
+ maxBlockNumWidth, "----");
+ printf("BBnum %*sdescAddr ref try hnd %s weight %*s%s [IL range] [jump]%*s [EH region] [flags]\n",
+ padWidth, "",
+ fgCheapPredsValid ? "cheap preds" :
+ (fgComputePredsDone ? "preds "
+ : " "),
+ ((ibcColWidth > 0) ? ibcColWidth - 3 : 0), "", // Subtract 3 for the width of "IBC", printed next.
+ ((ibcColWidth > 0) ? "IBC"
+ : ""),
+ maxBlockNumWidth, ""
+ );
+ printf("------%*s------------------------------------%*s-----------------------%*s----------------------------------------\n",
+ padWidth, "------------",
+ ibcColWidth, "------------",
+ maxBlockNumWidth, "----");
+
+ for (block = firstBlock;
+ block;
+ block = block->bbNext)
+ {
+ // First, do some checking on the bbPrev links
+ if (block->bbPrev)
+ {
+ if (block->bbPrev->bbNext != block)
+ {
+ printf("bad prev link\n");
+ }
+ }
+ else if (block != fgFirstBB)
+ {
+ printf("bad prev link!\n");
+ }
+
+ if (block == fgFirstColdBlock)
+ {
+ printf("~~~~~~%*s~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~%*s~~~~~~~~~~~~~~~~~~~~~~~%*s~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n",
+ padWidth, "~~~~~~~~~~~~",
+ ibcColWidth, "~~~~~~~~~~~~",
+ maxBlockNumWidth, "~~~~");
+ }
+
+#if FEATURE_EH_FUNCLETS
+ if (block == fgFirstFuncletBB)
+ {
+ printf("++++++%*s++++++++++++++++++++++++++++++++++++%*s+++++++++++++++++++++++%*s++++++++++++++++++++++++++++++++++++++++ funclets follow\n",
+ padWidth, "++++++++++++",
+ ibcColWidth, "++++++++++++",
+ maxBlockNumWidth, "++++");
+ }
+#endif // FEATURE_EH_FUNCLETS
+
+ fgTableDispBasicBlock(block, ibcColWidth);
+
+ if (block == lastBlock) {
+ break;
+ }
+ }
+
+ printf("------%*s------------------------------------%*s-----------------------%*s----------------------------------------\n",
+ padWidth, "------------",
+ ibcColWidth, "------------",
+ maxBlockNumWidth, "----");
+
+ if (dumpTrees)
+ {
+ fgDumpTrees(firstBlock, lastBlock);
+ }
+}
+
+/*****************************************************************************/
+
+void Compiler::fgDispBasicBlocks(bool dumpTrees)
+{
+ fgDispBasicBlocks(fgFirstBB, nullptr, dumpTrees);
+}
+
+/*****************************************************************************/
+// Increment the stmtNum and dump the tree using gtDispTree
+//
+void Compiler::fgDumpStmtTree(GenTreePtr stmt, unsigned blkNum)
+{
+ compCurStmtNum++; // Increment the current stmtNum
+
+ printf("\n***** BB%02u, stmt %d\n", blkNum, compCurStmtNum);
+
+ if (fgOrder == FGOrderLinear || opts.compDbgInfo)
+ {
+ gtDispTree(stmt);
+ }
+ else
+ {
+ gtDispTree(stmt->gtStmt.gtStmtExpr);
+ }
+}
+
+//------------------------------------------------------------------------
+// Compiler::fgDumpBlock: dumps the contents of the given block to stdout.
+//
+// Arguments:
+// block - The block to dump.
+//
+void Compiler::fgDumpBlock(BasicBlock* block)
+{
+ printf("\n------------ ");
+ block->dspBlockHeader(this);
+
+ if (!block->IsLIR())
+ {
+ for (GenTreeStmt* stmt = block->firstStmt(); stmt != nullptr; stmt = stmt->gtNextStmt)
+ {
+ fgDumpStmtTree(stmt, block->bbNum);
+ if (stmt == block->bbTreeList)
+ {
+ block->bbStmtNum = compCurStmtNum; // Set the block->bbStmtNum
+ }
+ }
+ }
+ else
+ {
+ gtDispRange(LIR::AsRange(block));
+ }
+}
+
+/*****************************************************************************/
+// Walk the BasicBlock list calling fgDumpTree once per Stmt
+//
+void Compiler::fgDumpTrees(BasicBlock* firstBlock,
+ BasicBlock* lastBlock)
+{
+ compCurStmtNum = 0; // Reset the current stmtNum
+
+ /* Walk the basic blocks */
+
+ // Note that typically we have already called fgDispBasicBlocks()
+ // so we don't need to print the preds and succs again here
+ //
+ for (BasicBlock* block = firstBlock; block; block = block->bbNext)
+ {
+ fgDumpBlock(block);
+
+ if (block == lastBlock) {
+ break;
+ }
+ }
+ printf("\n-------------------------------------------------------------------------------------------------------------------\n");
+}
+
+
+/*****************************************************************************
+ * Try to create as many candidates for GTF_MUL_64RSLT as possible.
+ * We convert 'intOp1*intOp2' into 'int(long(nop(intOp1))*long(intOp2))'.
+ */
+
+/* static */
+Compiler::fgWalkResult Compiler::fgStress64RsltMulCB(GenTreePtr* pTree, fgWalkData* data)
+{
+ GenTreePtr tree = *pTree;
+ Compiler* pComp = data->compiler;
+
+ if (tree->gtOper != GT_MUL || tree->gtType != TYP_INT || (tree->gtOverflow())) {
+ return WALK_CONTINUE;
+}
+
+ // To ensure optNarrowTree() doesn't fold back to the original tree.
+ tree->gtOp.gtOp1 = pComp->gtNewOperNode(GT_NOP, TYP_LONG, tree->gtOp.gtOp1);
+ tree->gtOp.gtOp1 = pComp->gtNewCastNode(TYP_LONG, tree->gtOp.gtOp1, TYP_LONG);
+ tree->gtOp.gtOp2 = pComp->gtNewCastNode(TYP_LONG, tree->gtOp.gtOp2, TYP_LONG);
+ tree->gtType = TYP_LONG;
+ *pTree = pComp->gtNewCastNode(TYP_INT, tree, TYP_INT);
+
+ return WALK_SKIP_SUBTREES;
+}
+
+void Compiler::fgStress64RsltMul()
+{
+ if (!compStressCompile(STRESS_64RSLT_MUL, 20)) {
+ return;
+}
+
+ fgWalkAllTreesPre(fgStress64RsltMulCB, (void*)this);
+}
+
+
+// This variable is used to generate "traversal labels": one-time constants with which
+// we label basic blocks that are members of the basic block list, in order to have a
+// fast, high-probability test for membership in that list. Type is "volatile" because
+// it's incremented with an atomic operation, which wants a volatile type; "long" so that
+// wrap-around to 0 (which I think has the highest probability of accidental collision) is
+// postponed a *long* time.
+static volatile int bbTraverseLabel = 1;
+
+/*****************************************************************************
+ *
+ * A DEBUG routine to check the consistency of the flowgraph,
+ * i.e. bbNum, bbRefs, bbPreds have to be up to date.
+ *
+ *****************************************************************************/
+
+void Compiler::fgDebugCheckBBlist(bool checkBBNum /* = false */,
+ bool checkBBRefs /* = true */)
+{
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("*************** In fgDebugCheckBBlist\n");
+ }
+#endif // DEBUG
+
+ fgDebugCheckBlockLinks();
+
+ if (fgBBcount > 10000 && expensiveDebugCheckLevel < 1)
+ {
+ // The basic block checks are too expensive if there are too many blocks,
+ // so give up unless we've been told to try hard.
+ return;
+ }
+
+ DWORD startTickCount = GetTickCount();
+
+ BasicBlock* block;
+ BasicBlock* prevBlock;
+ BasicBlock* blockPred;
+ flowList* pred;
+ unsigned blockRefs;
+
+#if FEATURE_EH_FUNCLETS
+ bool reachedFirstFunclet = false;
+ if (fgFuncletsCreated)
+ {
+ //
+ // Make sure that fgFirstFuncletBB is accurate.
+ // It should be the first basic block in a handler region.
+ //
+ if (fgFirstFuncletBB != nullptr)
+ {
+ assert(fgFirstFuncletBB->hasHndIndex() == true);
+ assert(fgFirstFuncletBB->bbFlags & BBF_FUNCLET_BEG);
+ }
+ }
+#endif // FEATURE_EH_FUNCLETS
+
+ /* Check bbNum, bbRefs and bbPreds */
+ // First, pick a traversal stamp, and label all the blocks with it.
+ unsigned curTraversalStamp = unsigned(InterlockedIncrement((LONG*)&bbTraverseLabel));
+ for (block = fgFirstBB; block; block = block->bbNext)
+ {
+ block->bbTraversalStamp = curTraversalStamp;
+ }
+
+ for (prevBlock = nullptr, block = fgFirstBB;
+ block;
+ prevBlock = block, block = block->bbNext)
+ {
+ blockRefs = 0;
+
+ /* First basic block has countOfInEdges() >= 1 */
+
+ if (block == fgFirstBB)
+ {
+ noway_assert(block->countOfInEdges() >= 1);
+ blockRefs = 1;
+ }
+
+ if (checkBBNum)
+ {
+ // Check that bbNum is sequential
+ noway_assert(block->bbNext == nullptr || (block->bbNum + 1 == block->bbNext->bbNum));
+ }
+
+ // If the block is a BBJ_COND, a BBJ_SWITCH or a
+ // lowered GT_SWITCH_TABLE node then make sure it
+ // ends with a GT_JTRUE or a GT_SWITCH
+
+ if (block->bbJumpKind == BBJ_COND)
+ {
+ noway_assert(block->lastNode()->gtNext == nullptr && block->lastNode()->gtOper == GT_JTRUE);
+ }
+ else if (block->bbJumpKind == BBJ_SWITCH)
+ {
+#ifndef LEGACY_BACKEND
+ noway_assert(block->lastNode()->gtNext == nullptr &&
+ (block->lastNode()->gtOper == GT_SWITCH ||
+ block->lastNode()->gtOper == GT_SWITCH_TABLE));
+#else // LEGACY_BACKEND
+ noway_assert(block->lastStmt()->gtNext == NULL &&
+ block->lastStmt()->gtStmtExpr->gtOper == GT_SWITCH);
+#endif // LEGACY_BACKEND
+ }
+ else if (!( block->bbJumpKind == BBJ_ALWAYS
+ || block->bbJumpKind == BBJ_RETURN))
+ {
+ //this block cannot have a poll
+ noway_assert(!(block->bbFlags & BBF_NEEDS_GCPOLL));
+ }
+
+ if (block->bbCatchTyp == BBCT_FILTER)
+ {
+ if (!fgCheapPredsValid) // Don't check cheap preds
+ {
+ // A filter has no predecessors
+ noway_assert(block->bbPreds == nullptr);
+ }
+ }
+
+#if FEATURE_EH_FUNCLETS
+ if (fgFuncletsCreated)
+ {
+ //
+ // There should be no handler blocks until
+ // we get to the fgFirstFuncletBB block,
+ // then every block should be a handler block
+ //
+ if (!reachedFirstFunclet)
+ {
+ if (block == fgFirstFuncletBB)
+ {
+ assert(block->hasHndIndex() == true);
+ reachedFirstFunclet = true;
+ }
+ else
+ {
+ assert(block->hasHndIndex() == false);
+ }
+ }
+ else // reachedFirstFunclet
+ {
+ assert(block->hasHndIndex() == true);
+ }
+ }
+#endif // FEATURE_EH_FUNCLETS
+
+ // Don't check cheap preds.
+ for (pred = (fgCheapPredsValid ? nullptr : block->bbPreds); pred != nullptr; blockRefs += pred->flDupCount, pred = pred->flNext)
+ {
+ assert(fgComputePredsDone); // If this isn't set, why do we have a preds list?
+
+ /* make sure this pred is part of the BB list */
+
+ blockPred = pred->flBlock;
+ noway_assert(blockPred->bbTraversalStamp == curTraversalStamp);
+
+ EHblkDsc* ehTryDsc = ehGetBlockTryDsc(block);
+ if (ehTryDsc != nullptr)
+ {
+ // You can jump to the start of a try
+ if (ehTryDsc->ebdTryBeg == block) {
+ goto CHECK_HND;
+}
+
+ // You can jump within the same try region
+ if (bbInTryRegions(block->getTryIndex(), blockPred)) {
+ goto CHECK_HND;
+}
+
+ // The catch block can jump back into the middle of the try
+ if (bbInCatchHandlerRegions(block, blockPred)) {
+ goto CHECK_HND;
+}
+
+ // The end of a finally region is a BBJ_EHFINALLYRET block (during importing, BBJ_LEAVE) which
+ // is marked as "returning" to the BBJ_ALWAYS block following the BBJ_CALLFINALLY
+ // block that does a local call to the finally. This BBJ_ALWAYS is within
+ // the try region protected by the finally (for x86, ARM), but that's ok.
+ if (prevBlock->bbJumpKind == BBJ_CALLFINALLY &&
+ block->bbJumpKind == BBJ_ALWAYS &&
+ blockPred->bbJumpKind == BBJ_EHFINALLYRET) {
+ goto CHECK_HND;
+}
+
+ printf("Jump into the middle of try region: BB%02u branches to BB%02u\n", blockPred->bbNum, block->bbNum);
+ noway_assert(!"Jump into middle of try region");
+ }
+
+CHECK_HND:;
+
+ EHblkDsc* ehHndDsc = ehGetBlockHndDsc(block);
+ if (ehHndDsc != nullptr)
+ {
+ // You can do a BBJ_EHFINALLYRET or BBJ_EHFILTERRET into a handler region
+ if ( (blockPred->bbJumpKind == BBJ_EHFINALLYRET)
+ || (blockPred->bbJumpKind == BBJ_EHFILTERRET)) {
+ goto CHECK_JUMP;
+}
+
+ // Our try block can call our finally block
+ if ((block->bbCatchTyp == BBCT_FINALLY) &&
+ (blockPred->bbJumpKind == BBJ_CALLFINALLY) &&
+ ehCallFinallyInCorrectRegion(blockPred, block->getHndIndex()))
+ {
+ goto CHECK_JUMP;
+ }
+
+ // You can jump within the same handler region
+ if (bbInHandlerRegions(block->getHndIndex(), blockPred)) {
+ goto CHECK_JUMP;
+}
+
+ // A filter can jump to the start of the filter handler
+ if (ehHndDsc->HasFilter()) {
+ goto CHECK_JUMP;
+}
+
+ printf("Jump into the middle of handler region: BB%02u branches to BB%02u\n", blockPred->bbNum, block->bbNum);
+ noway_assert(!"Jump into the middle of handler region");
+ }
+
+CHECK_JUMP:;
+
+ switch (blockPred->bbJumpKind)
+ {
+ case BBJ_COND:
+ noway_assert(blockPred->bbNext == block || blockPred->bbJumpDest == block);
+ break;
+
+ case BBJ_NONE:
+ noway_assert(blockPred->bbNext == block);
+ break;
+
+ case BBJ_CALLFINALLY:
+ case BBJ_ALWAYS:
+ case BBJ_EHCATCHRET:
+ case BBJ_EHFILTERRET:
+ noway_assert(blockPred->bbJumpDest == block);
+ break;
+
+ case BBJ_EHFINALLYRET:
+ {
+ // If the current block is a successor to a BBJ_EHFINALLYRET (return from finally),
+ // then the lexically previous block should be a call to the same finally.
+ // Verify all of that.
+
+ unsigned hndIndex = blockPred->getHndIndex();
+ EHblkDsc* ehDsc = ehGetDsc(hndIndex);
+ BasicBlock* finBeg = ehDsc->ebdHndBeg;
+
+ // Because there is no bbPrev, we have to search for the lexically previous
+ // block. We can shorten the search by only looking in places where it is legal
+ // to have a call to the finally.
+
+ BasicBlock* begBlk;
+ BasicBlock* endBlk;
+ ehGetCallFinallyBlockRange(hndIndex, &begBlk, &endBlk);
+
+ for (BasicBlock* bcall = begBlk; bcall != endBlk; bcall = bcall->bbNext)
+ {
+ if (bcall->bbJumpKind != BBJ_CALLFINALLY || bcall->bbJumpDest != finBeg) {
+ continue;
+}
+
+ if (block == bcall->bbNext) {
+ goto PRED_OK;
+ }
+ }
+
+#if FEATURE_EH_FUNCLETS
+
+ if (fgFuncletsCreated)
+ {
+ // There is no easy way to search just the funclets that were pulled out of
+ // the corresponding try body, so instead we search all the funclets, and if
+ // we find a potential 'hit' we check if the funclet we're looking at is
+ // from the correct try region.
+
+ for (BasicBlock* bcall = fgFirstFuncletBB; bcall; bcall = bcall->bbNext)
+ {
+ if (bcall->bbJumpKind != BBJ_CALLFINALLY || bcall->bbJumpDest != finBeg) {
+ continue;
+}
+
+ if (block != bcall->bbNext) {
+ continue;
+}
+
+ if (ehCallFinallyInCorrectRegion(bcall, hndIndex)) {
+ goto PRED_OK;
+ }
+ }
+ }
+
+#endif // FEATURE_EH_FUNCLETS
+
+ noway_assert(!"BBJ_EHFINALLYRET predecessor of block that doesn't follow a BBJ_CALLFINALLY!");
+ }
+ break;
+
+ case BBJ_THROW:
+ case BBJ_RETURN:
+ noway_assert(!"THROW and RETURN block cannot be in the predecessor list!");
+ break;
+
+ case BBJ_SWITCH:
+ unsigned jumpCnt; jumpCnt = blockPred->bbJumpSwt->bbsCount;
+ BasicBlock** jumpTab; jumpTab = blockPred->bbJumpSwt->bbsDstTab;
+
+ do
+ {
+ if (block == *jumpTab)
+ {
+ goto PRED_OK;
+ }
+ }
+ while (++jumpTab, --jumpCnt);
+
+ noway_assert(!"SWITCH in the predecessor list with no jump label to BLOCK!");
+ break;
+
+ default:
+ noway_assert(!"Unexpected bbJumpKind");
+ break;
+ }
+
+PRED_OK:;
+
+ }
+
+ /* Check the bbRefs */
+ noway_assert(!checkBBRefs || block->bbRefs == blockRefs);
+
+ /* Check that BBF_HAS_HANDLER is valid bbTryIndex */
+ if (block->hasTryIndex())
+ {
+ noway_assert(block->getTryIndex() < compHndBBtabCount);
+ }
+
+ /* Check if BBF_RUN_RARELY is set that we have bbWeight of zero */
+ if (block->isRunRarely())
+ {
+ noway_assert(block->bbWeight == BB_ZERO_WEIGHT);
+ }
+ else
+ {
+ noway_assert(block->bbWeight > BB_ZERO_WEIGHT);
+ }
+ }
+
+ // Make sure the one return BB is not changed.
+ if (genReturnBB)
+ {
+ noway_assert(genReturnBB->bbTreeList);
+ noway_assert(genReturnBB->IsLIR() || genReturnBB->bbTreeList->gtOper == GT_STMT);
+ noway_assert(genReturnBB->IsLIR() || genReturnBB->bbTreeList->gtType == TYP_VOID);
+ }
+
+ // The general encoder/decoder (currently) only reports "this" as a generics context as a stack location,
+ // so we mark info.compThisArg as lvAddrTaken to ensure that it is not enregistered. Otherwise, it should
+ // not be address-taken. This variable determines if the address-taken-ness of "thisArg" is "OK".
+ bool copiedForGenericsCtxt;
+#ifndef JIT32_GCENCODER
+ copiedForGenericsCtxt = ((info.compMethodInfo->options & CORINFO_GENERICS_CTXT_FROM_THIS) != 0);
+#else // JIT32_GCENCODER
+ copiedForGenericsCtxt = FALSE;
+#endif // JIT32_GCENCODER
+
+ // This if only in support of the noway_asserts it contains.
+ if (info.compIsStatic)
+ {
+ // For static method, should have never grabbed the temp.
+ noway_assert(lvaArg0Var == BAD_VAR_NUM);
+ }
+ else
+ {
+ // For instance method:
+ assert(info.compThisArg != BAD_VAR_NUM);
+ bool compThisArgAddrExposedOK = !lvaTable[info.compThisArg].lvAddrExposed;
+#ifndef JIT32_GCENCODER
+ compThisArgAddrExposedOK = compThisArgAddrExposedOK || copiedForGenericsCtxt;
+#endif // !JIT32_GCENCODER
+ noway_assert(compThisArgAddrExposedOK && // should never expose the address of arg 0 or
+ !lvaTable[info.compThisArg].lvArgWrite && // write to arg 0.
+ ( // In addition,
+ lvaArg0Var == info.compThisArg || // lvArg0Var should remain 0 if arg0 is not written to or address-exposed.
+ lvaArg0Var != info.compThisArg &&
+ (lvaTable[lvaArg0Var].lvAddrExposed || lvaTable[lvaArg0Var].lvArgWrite || copiedForGenericsCtxt)
+ ));
+ }
+}
+
+/*****************************************************************************
+ *
+ * A DEBUG routine to check the that the exception flags are correctly set.
+ *
+ ****************************************************************************/
+
+void Compiler::fgDebugCheckFlags(GenTreePtr tree)
+{
+ noway_assert(tree->gtOper != GT_STMT);
+
+ genTreeOps oper = tree->OperGet();
+ unsigned kind = tree->OperKind();
+ unsigned treeFlags = tree->gtFlags & GTF_ALL_EFFECT;
+ unsigned chkFlags = 0;
+
+ /* Is this a leaf node? */
+
+ if (kind & GTK_LEAF)
+ {
+ switch (oper)
+ {
+ case GT_CLS_VAR:
+ chkFlags |= GTF_GLOB_REF;
+ break;
+
+ case GT_CATCH_ARG:
+ chkFlags |= GTF_ORDER_SIDEEFF;
+ break;
+
+ default:
+ break;
+ }
+ }
+
+ /* Is it a 'simple' unary/binary operator? */
+
+ else if (kind & GTK_SMPOP)
+ {
+ GenTreePtr op1 = tree->gtOp.gtOp1;
+ GenTreePtr op2 = tree->gtGetOp2();
+
+ // During GS work, we make shadow copies for params.
+ // In gsParamsToShadows(), we create a shadow var of TYP_INT for every small type param.
+ // Then in gsReplaceShadowParams(), we change the gtLclNum to the shadow var.
+ // We also change the types of the local var tree and the assignment tree to TYP_INT if necessary.
+ // However, since we don't morph the tree at this late stage. Manually propagating
+ // TYP_INT up to the GT_ASG tree is only correct if we don't need to propagate the TYP_INT back up.
+ // The following checks will ensure this.
+
+ // Is the left child of "tree" a GT_ASG?,
+ if (op1 && op1->gtOper == GT_ASG)
+ {
+ assert(tree->gtType == TYP_VOID || // If parent is a TYP_VOID, we don't no need to propagate TYP_INT up. We are fine.
+ tree->gtOper == GT_COMMA); // (or) If GT_ASG is the left child of a GT_COMMA, the type of the GT_COMMA node will
+ } // be determined by its right child. So we don't need to propagate TYP_INT up either. We are fine.
+
+ // Is the right child of "tree" a GT_ASG?,
+ if (op2 && op2->gtOper == GT_ASG)
+ {
+ assert(tree->gtType == TYP_VOID); // If parent is a TYP_VOID, we don't no need to propagate TYP_INT up. We are fine.
+ }
+
+ switch (oper)
+ {
+ case GT_QMARK:
+ if (op1->OperIsCompare())
+ {
+ noway_assert(op1->gtFlags & GTF_DONT_CSE);
+ }
+ else
+ {
+ noway_assert( (op1->gtOper == GT_CNS_INT) &&
+ ((op1->gtIntCon.gtIconVal == 0) || (op1->gtIntCon.gtIconVal == 1)) );
+ }
+ break;
+
+ default:
+ break;
+ }
+
+ /* Recursively check the subtrees */
+
+ if (op1) { fgDebugCheckFlags(op1);
+}
+ if (op2) { fgDebugCheckFlags(op2);
+}
+
+ if (op1) { chkFlags |= (op1->gtFlags & GTF_ALL_EFFECT);
+}
+ if (op2) { chkFlags |= (op2->gtFlags & GTF_ALL_EFFECT);
+}
+
+ // We reuse the value of GTF_REVERSE_OPS for a GT_IND-specific flag,
+ // so exempt that (unary) operator.
+ if (tree->OperGet() != GT_IND && tree->gtFlags & GTF_REVERSE_OPS)
+ {
+ /* Must have two operands if GTF_REVERSE is set */
+ noway_assert(op1 && op2);
+
+ /* Make sure that the order of side effects has not been swapped. */
+
+ /* However CSE may introduce an assignment after the reverse flag
+ was set and thus GTF_ASG cannot be considered here. */
+
+ /* For a GT_ASG(GT_IND(x), y) we are interested in the side effects of x */
+ GenTreePtr op1p;
+ if ((kind & GTK_ASGOP) && (op1->gtOper == GT_IND))
+ {
+ op1p = op1->gtOp.gtOp1;
+ }
+ else
+ {
+ op1p = op1;
+ }
+
+ /* This isn't true any more with the sticky GTF_REVERSE */
+ /*
+ // if op1p has side effects, then op2 cannot have side effects
+ if (op1p->gtFlags & (GTF_SIDE_EFFECT & ~GTF_ASG))
+ {
+ if (op2->gtFlags & (GTF_SIDE_EFFECT & ~GTF_ASG))
+ gtDispTree(tree);
+ noway_assert(!(op2->gtFlags & (GTF_SIDE_EFFECT & ~GTF_ASG)));
+ }
+ */
+ }
+
+ if (kind & GTK_ASGOP)
+ {
+ chkFlags |= GTF_ASG;
+ }
+
+ /* Note that it is OK for treeFlags not to have a GTF_EXCEPT,
+ AssertionProp's non-Null may have cleared it */
+ if (tree->OperMayThrow())
+ {
+ chkFlags |= (treeFlags & GTF_EXCEPT);
+ }
+
+ if (oper == GT_ADDR &&
+ (op1->OperIsLocal() ||
+ op1->gtOper == GT_CLS_VAR ||
+ (op1->gtOper == GT_IND && op1->gtOp.gtOp1->gtOper == GT_CLS_VAR_ADDR)))
+ {
+ /* &aliasedVar doesn't need GTF_GLOB_REF, though alisasedVar does.
+ Similarly for clsVar */
+ treeFlags |= GTF_GLOB_REF;
+ }
+ }
+
+ /* See what kind of a special operator we have here */
+
+ else { switch (tree->OperGet())
+ {
+ case GT_CALL:
+
+ GenTreePtr args;
+ GenTreePtr argx;
+ GenTreeCall* call;
+
+ call = tree->AsCall();
+
+ chkFlags |= GTF_CALL;
+
+ if ((treeFlags & GTF_EXCEPT) && !(chkFlags & GTF_EXCEPT))
+ {
+ switch (eeGetHelperNum(tree->gtCall.gtCallMethHnd))
+ {
+ // Is this a helper call that can throw an exception ?
+ case CORINFO_HELP_LDIV:
+ case CORINFO_HELP_LMOD:
+ case CORINFO_HELP_METHOD_ACCESS_CHECK:
+ case CORINFO_HELP_FIELD_ACCESS_CHECK:
+ case CORINFO_HELP_CLASS_ACCESS_CHECK:
+ case CORINFO_HELP_DELEGATE_SECURITY_CHECK:
+ chkFlags |= GTF_EXCEPT;
+ break;
+ default:
+ break;
+ }
+ }
+
+ if (call->gtCallObjp)
+ {
+ fgDebugCheckFlags(call->gtCallObjp);
+ chkFlags |= (call->gtCallObjp->gtFlags & GTF_SIDE_EFFECT);
+
+ if (call->gtCallObjp->gtFlags & GTF_ASG)
+ {
+ treeFlags |= GTF_ASG;
+ }
+ }
+
+ for (args = call->gtCallArgs; args; args = args->gtOp.gtOp2)
+ {
+ argx = args->gtOp.gtOp1;
+ fgDebugCheckFlags(argx);
+
+ chkFlags |= (argx->gtFlags & GTF_SIDE_EFFECT);
+
+ if (argx->gtFlags & GTF_ASG)
+ {
+ treeFlags |= GTF_ASG;
+ }
+ }
+
+ for (args = call->gtCallLateArgs; args; args = args->gtOp.gtOp2)
+ {
+ argx = args->gtOp.gtOp1;
+ fgDebugCheckFlags(argx);
+
+ chkFlags |= (argx->gtFlags & GTF_SIDE_EFFECT);
+
+ if (argx->gtFlags & GTF_ASG)
+ {
+ treeFlags |= GTF_ASG;
+ }
+ }
+
+ if ((call->gtCallType == CT_INDIRECT) && (call->gtCallCookie != nullptr))
+ {
+ fgDebugCheckFlags(call->gtCallCookie);
+ chkFlags |= (call->gtCallCookie->gtFlags & GTF_SIDE_EFFECT);
+ }
+
+ if (call->gtCallType == CT_INDIRECT)
+ {
+ fgDebugCheckFlags(call->gtCallAddr);
+ chkFlags |= (call->gtCallAddr->gtFlags & GTF_SIDE_EFFECT);
+ }
+
+ if (call->IsUnmanaged() &&
+ (call->gtCallMoreFlags & GTF_CALL_M_UNMGD_THISCALL))
+ {
+ if (call->gtCallArgs->gtOp.gtOp1->OperGet() == GT_NOP)
+ {
+ noway_assert(call->gtCallLateArgs->gtOp.gtOp1->TypeGet() == TYP_I_IMPL ||
+ call->gtCallLateArgs->gtOp.gtOp1->TypeGet() == TYP_BYREF);
+ }
+ else
+ {
+ noway_assert(call->gtCallArgs->gtOp.gtOp1->TypeGet() == TYP_I_IMPL ||
+ call->gtCallArgs->gtOp.gtOp1->TypeGet() == TYP_BYREF);
+ }
+ }
+ break;
+
+ case GT_ARR_ELEM:
+
+ GenTreePtr arrObj;
+ unsigned dim;
+
+ arrObj = tree->gtArrElem.gtArrObj;
+ fgDebugCheckFlags(arrObj);
+ chkFlags |= (arrObj->gtFlags & GTF_ALL_EFFECT);
+
+ for (dim = 0; dim < tree->gtArrElem.gtArrRank; dim++)
+ {
+ fgDebugCheckFlags(tree->gtArrElem.gtArrInds[dim]);
+ chkFlags |= tree->gtArrElem.gtArrInds[dim]->gtFlags & GTF_ALL_EFFECT;
+ }
+ break;
+
+ case GT_ARR_OFFSET:
+ fgDebugCheckFlags(tree->gtArrOffs.gtOffset);
+ chkFlags |= (tree->gtArrOffs.gtOffset->gtFlags & GTF_ALL_EFFECT);
+ fgDebugCheckFlags(tree->gtArrOffs.gtIndex);
+ chkFlags |= (tree->gtArrOffs.gtIndex->gtFlags & GTF_ALL_EFFECT);
+ fgDebugCheckFlags(tree->gtArrOffs.gtArrObj);
+ chkFlags |= (tree->gtArrOffs.gtArrObj->gtFlags & GTF_ALL_EFFECT);
+ break;
+
+ default:
+ break;
+ }
+}
+
+ if (chkFlags & ~treeFlags)
+ {
+ // Print the tree so we can see it in the log.
+ printf("Missing flags on tree [%06d]: ", dspTreeID(tree));
+ GenTree::gtDispFlags(chkFlags & ~treeFlags, GTF_DEBUG_NONE);
+ printf("\n");
+ gtDispTree(tree);
+
+ noway_assert(!"Missing flags on tree");
+
+ // Print the tree again so we can see it right after we hook up the debugger.
+ printf("Missing flags on tree [%06d]: ", dspTreeID(tree));
+ GenTree::gtDispFlags(chkFlags & ~treeFlags, GTF_DEBUG_NONE);
+ printf("\n");
+ gtDispTree(tree);
+ }
+ else if (treeFlags & ~chkFlags)
+ {
+#if 0
+ // TODO-Cleanup:
+ /* The tree has extra flags set. However, this will happen if we
+ replace a subtree with something, but don't clear the flags up
+ the tree. Can't flag this unless we start clearing flags above.
+
+ Note: we need this working for GTF_CALL and CSEs, so I'm enabling
+ it for calls.
+ */
+ if (tree->OperGet() != GT_CALL && (treeFlags & GTF_CALL) && !(chkFlags & GTF_CALL))
+ {
+ // Print the tree so we can see it in the log.
+ printf("Extra GTF_CALL flags on parent tree [%X]: ", tree);
+ GenTree::gtDispFlags(treeFlags & ~chkFlags, GTF_DEBUG_NONE);
+ printf("\n");
+ gtDispTree(tree);
+
+ noway_assert(!"Extra flags on tree");
+
+ // Print the tree again so we can see it right after we hook up the debugger.
+ printf("Extra GTF_CALL flags on parent tree [%X]: ", tree);
+ GenTree::gtDispFlags(treeFlags & ~chkFlags, GTF_DEBUG_NONE);
+ printf("\n");
+ gtDispTree(tree);
+ }
+#endif // 0
+ }
+}
+
+// DEBUG routine to check correctness of the internal gtNext, gtPrev threading of a statement.
+// This threading is only valid when fgStmtListThreaded is true.
+// This calls an alternate method for FGOrderLinear.
+void Compiler::fgDebugCheckNodeLinks(BasicBlock* block, GenTree* node)
+{
+ // LIR blocks are checked using BasicBlock::CheckLIR().
+ if (block->IsLIR())
+ {
+ LIR::AsRange(block).CheckLIR(this);
+ // TODO: return?
+ }
+
+ GenTreeStmt* stmt = node->AsStmt();
+
+ assert(fgStmtListThreaded);
+
+ noway_assert(stmt->gtStmtList);
+
+ // The first node's gtPrev must be nullptr (the gtPrev list is not circular).
+ // The last node's gtNext must be nullptr (the gtNext list is not circular). This is tested if the loop below terminates.
+ assert(stmt->gtStmtList->gtPrev == nullptr);
+
+ for (GenTreePtr tree = stmt->gtStmtList;
+ tree != nullptr;
+ tree = tree->gtNext)
+ {
+ if (tree->gtPrev)
+ {
+ noway_assert(tree->gtPrev->gtNext == tree);
+ }
+ else
+ {
+ noway_assert(tree == stmt->gtStmtList);
+ }
+
+ if (tree->gtNext)
+ {
+ noway_assert(tree->gtNext->gtPrev == tree);
+ }
+ else
+ {
+ noway_assert(tree == stmt->gtStmtExpr);
+ }
+
+ /* Cross-check gtPrev,gtNext with gtOp for simple trees */
+
+ GenTreePtr expectedPrevTree = nullptr;
+
+ if (tree->OperIsLeaf())
+ {
+ if (tree->gtOper == GT_CATCH_ARG)
+ {
+ // The GT_CATCH_ARG should always have GTF_ORDER_SIDEEFF set
+ noway_assert(tree->gtFlags & GTF_ORDER_SIDEEFF);
+ // The GT_CATCH_ARG has to be the first thing evaluated
+ noway_assert(stmt == block->FirstNonPhiDef());
+ noway_assert(stmt->gtStmtList->gtOper == GT_CATCH_ARG);
+ // The root of the tree should have GTF_ORDER_SIDEEFF set
+ noway_assert(stmt->gtStmtExpr->gtFlags & GTF_ORDER_SIDEEFF);
+ }
+ }
+
+ if (tree->OperIsUnary() && tree->gtOp.gtOp1)
+ {
+ GenTreePtr lclVarTree;
+ expectedPrevTree = tree->gtOp.gtOp1;
+ }
+ else if (tree->OperIsBinary() && tree->gtOp.gtOp1)
+ {
+ switch (tree->gtOper)
+ {
+ case GT_QMARK:
+ expectedPrevTree = tree->gtOp.gtOp2->AsColon()->ThenNode(); // "then" operand of the GT_COLON (generated second).
+ break;
+
+ case GT_COLON:
+ expectedPrevTree = tree->AsColon()->ElseNode(); // "else" branch result (generated first).
+ break;
+
+ default:
+ if (tree->gtOp.gtOp2)
+ {
+ if (tree->gtFlags & GTF_REVERSE_OPS)
+ {
+ expectedPrevTree = tree->gtOp.gtOp1;
+ }
+ else
+ {
+ expectedPrevTree = tree->gtOp.gtOp2;
+ }
+ }
+ else
+ {
+ expectedPrevTree = tree->gtOp.gtOp1;
+ }
+ break;
+ }
+ }
+
+ noway_assert(expectedPrevTree == nullptr || // No expectations about the prev node
+ tree->gtPrev == expectedPrevTree); // The "normal" case
+ }
+}
+
+
+/*****************************************************************************
+ *
+ * A DEBUG routine to check the correctness of the links between GT_STMT nodes
+ * and ordinary nodes within a statement.
+ *
+ ****************************************************************************/
+
+void Compiler::fgDebugCheckLinks(bool morphTrees)
+{
+ // This used to be only on for stress, and there was a comment stating that
+ // it was "quite an expensive operation" but I did not find that to be true.
+ // Set DO_SANITY_DEBUG_CHECKS to false to revert to that behavior.
+ const bool DO_SANITY_DEBUG_CHECKS = true;
+
+ if (!DO_SANITY_DEBUG_CHECKS &&
+ !compStressCompile(STRESS_CHK_FLOW_UPDATE, 30))
+ {
+ return;
+ }
+
+ fgDebugCheckBlockLinks();
+
+ /* For each basic block check the bbTreeList links */
+ for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
+ {
+PROCESS_BLOCK_AGAIN:;
+ if (block->IsLIR())
+ {
+ LIR::AsRange(block).CheckLIR(this);
+ }
+ else
+ {
+ for (GenTreeStmt* stmt = block->firstStmt(); stmt; stmt = stmt->gtNextStmt)
+ {
+ /* Verify that bbTreeList is threaded correctly */
+ /* Note that for the GT_STMT list, the gtPrev list is circular. The gtNext list is not: gtNext of the last GT_STMT in a block is nullptr. */
+
+ noway_assert(stmt->gtPrev);
+
+ if (stmt == block->bbTreeList)
+ {
+ noway_assert(stmt->gtPrev->gtNext == nullptr);
+ }
+ else
+ {
+ noway_assert(stmt->gtPrev->gtNext == stmt);
+ }
+
+ if (stmt->gtNext)
+ {
+ noway_assert(stmt->gtNext->gtPrev == stmt);
+ }
+ else
+ {
+ noway_assert(block->lastStmt() == stmt);
+ }
+
+ /* For each statement check that the exception flags are properly set */
+
+ noway_assert(stmt->gtStmtExpr);
+
+ if (verbose && 0)
+ {
+ gtDispTree(stmt->gtStmtExpr);
+ }
+
+ fgDebugCheckFlags(stmt->gtStmtExpr);
+
+ // Not only will this stress fgMorphBlockStmt(), but we also get all the checks
+ // done by fgMorphTree()
+
+ if (morphTrees)
+ {
+ // If 'stmt' is removed from the block, restart
+ if (fgMorphBlockStmt(block, stmt DEBUGARG("test morphing")))
+ {
+ goto PROCESS_BLOCK_AGAIN;
+ }
+ }
+
+ /* For each GT_STMT node check that the nodes are threaded correcly - gtStmtList */
+
+ if (fgStmtListThreaded)
+ {
+ fgDebugCheckNodeLinks(block, stmt);
+ }
+ }
+ }
+ }
+}
+
+// ensure that bbNext and bbPrev are consistent
+void Compiler::fgDebugCheckBlockLinks()
+{
+ assert(fgFirstBB->bbPrev == nullptr);
+
+ for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
+ {
+ if (block->bbNext)
+ {
+ assert(block->bbNext->bbPrev == block);
+ }
+ else
+ {
+ assert(block == fgLastBB);
+ }
+
+ if (block->bbPrev)
+ {
+ assert(block->bbPrev->bbNext == block);
+ }
+ else
+ {
+ assert(block == fgFirstBB);
+ }
+
+ // If this is a switch, check that the tables are consistent.
+ // Note that we don't call GetSwitchDescMap(), because it has the side-effect
+ // of allocating it if it is not present.
+ if (block->bbJumpKind == BBJ_SWITCH && m_switchDescMap != nullptr)
+ {
+ SwitchUniqueSuccSet uniqueSuccSet;
+ if (m_switchDescMap->Lookup(block, &uniqueSuccSet))
+ {
+ // Create a set with all the successors. Don't use BlockSet, so we don't need to worry
+ // about the BlockSet epoch.
+ BitVecTraits bitVecTraits(fgBBNumMax + 1, this);
+ BitVec BITVEC_INIT_NOCOPY(succBlocks, BitVecOps::MakeEmpty(&bitVecTraits));
+ BasicBlock** jumpTable = block->bbJumpSwt->bbsDstTab;
+ unsigned jumpCount = block->bbJumpSwt->bbsCount;
+ for (unsigned i = 0; i < jumpCount; i++)
+ {
+ BitVecOps::AddElemD(&bitVecTraits, succBlocks, jumpTable[i]->bbNum);
+ }
+ // Now we should have a set of unique successors that matches what's in the switchMap.
+ // First, check the number of entries, then make sure all the blocks in uniqueSuccSet
+ // are in the BlockSet.
+ unsigned count = BitVecOps::Count(&bitVecTraits, succBlocks);
+ assert(uniqueSuccSet.numDistinctSuccs == count);
+ for (unsigned i = 0; i < uniqueSuccSet.numDistinctSuccs; i++)
+ {
+ assert(BitVecOps::IsMember(&bitVecTraits, succBlocks, uniqueSuccSet.nonDuplicates[i]->bbNum));
+ }
+ }
+ }
+ }
+}
+
+/*****************************************************************************/
+#endif // DEBUG
+/*****************************************************************************/
+
+//------------------------------------------------------------------------
+// fgCheckForInlineDepthAndRecursion: compute depth of the candidate, and
+// check for recursion.
+//
+// Return Value:
+// The depth of the inline candidate. The root method is a depth 0, top-level
+// candidates at depth 1, etc.
+//
+// Notes:
+// We generally disallow recursive inlines by policy. However, they are
+// supported by the underlying machinery.
+//
+// Likewise the depth limit is a policy consideration, and serves mostly
+// as a safeguard to prevent runaway inlining of small methods.
+
+unsigned Compiler::fgCheckInlineDepthAndRecursion(InlineInfo* inlineInfo)
+{
+ BYTE* candidateCode = inlineInfo->inlineCandidateInfo->methInfo.ILCode;
+ InlineContext* inlineContext = inlineInfo->iciStmt->gtStmt.gtInlineContext;
+ InlineResult* inlineResult = inlineInfo->inlineResult;
+
+ // There should be a context for all candidates.
+ assert(inlineContext != nullptr);
+ int depth = 0;
+
+ for (; inlineContext != nullptr; inlineContext = inlineContext->GetParent())
+ {
+
+ depth++;
+
+ if (inlineContext->GetCode() == candidateCode)
+ {
+ // This inline candidate has the same IL code buffer as an already
+ // inlined method does.
+ inlineResult->NoteFatal(InlineObservation::CALLSITE_IS_RECURSIVE);
+ break;
+ }
+
+ if (depth > InlineStrategy::IMPLEMENTATION_MAX_INLINE_DEPTH)
+ {
+ break;
+ }
+ }
+
+ inlineResult->NoteInt(InlineObservation::CALLSITE_DEPTH, depth);
+ return depth;
+}
+
+/*****************************************************************************
+ *
+ * Inlining phase
+ */
+
+
+void Compiler::fgInline()
+{
+ if (!opts.OptEnabled(CLFLG_INLINING)) {
+ return;
+}
+
+#ifdef DEBUG
+ if (verbose) {
+ printf("*************** In fgInline()\n");
+}
+#endif // DEBUG
+
+ BasicBlock* block = fgFirstBB;
+ noway_assert(block != nullptr);
+
+ // Set the root inline context on all statements
+ InlineContext* rootContext = m_inlineStrategy->GetRootContext();
+
+ for (; block != nullptr; block = block->bbNext)
+ {
+ for (GenTreeStmt* stmt = block->firstStmt();
+ stmt;
+ stmt = stmt->gtNextStmt)
+ {
+ stmt->gtInlineContext = rootContext;
+ }
+ }
+
+ // Reset block back to start for inlining
+ block = fgFirstBB;
+
+ do
+ {
+ /* Make the current basic block address available globally */
+
+ compCurBB = block;
+
+ GenTreeStmt* stmt;
+ GenTreePtr expr;
+
+ for (stmt = block->firstStmt();
+ stmt != nullptr;
+ stmt = stmt->gtNextStmt)
+ {
+ expr = stmt->gtStmtExpr;
+
+ // See if we can expand the inline candidate
+ if ((expr->gtOper == GT_CALL) && ((expr->gtFlags & GTF_CALL_INLINE_CANDIDATE) != 0))
+ {
+ GenTreeCall* call = expr->AsCall();
+ InlineResult inlineResult(this, call, stmt, "fgInline");
+
+ fgMorphStmt = stmt;
+
+ fgMorphCallInline(call, &inlineResult);
+
+ if (stmt->gtStmtExpr->IsNothingNode())
+ {
+ fgRemoveStmt(block, stmt);
+ continue;
+ }
+ }
+ else
+ {
+#ifdef DEBUG
+ // Look for non-candidates.
+ fgWalkTreePre(&stmt->gtStmtExpr, fgFindNonInlineCandidate, stmt);
+#endif
+ }
+
+ // See if we need to replace the return value place holder.
+ fgWalkTreePre(&stmt->gtStmtExpr,
+ fgUpdateInlineReturnExpressionPlaceHolder,
+ (void *) this);
+
+ // See if stmt is of the form GT_COMMA(call, nop)
+ // If yes, we can get rid of GT_COMMA.
+ if (expr->OperGet() == GT_COMMA &&
+ expr->gtOp.gtOp1->OperGet() == GT_CALL &&
+ expr->gtOp.gtOp2->OperGet() == GT_NOP)
+ {
+ stmt->gtStmtExpr = expr->gtOp.gtOp1;
+ }
+ }
+
+ block = block->bbNext;
+
+ } while (block);
+
+#ifdef DEBUG
+
+ // Check that we should not have any inline candidate or return value place holder left.
+
+ block = fgFirstBB;
+ noway_assert(block);
+
+ do
+ {
+ GenTreeStmt* stmt;
+
+ for (stmt = block->firstStmt();
+ stmt;
+ stmt = stmt->gtNextStmt)
+ {
+ // Call Compiler::fgDebugCheckInlineCandidates on each node
+ fgWalkTreePre(&stmt->gtStmtExpr, fgDebugCheckInlineCandidates);
+ }
+
+ block = block->bbNext;
+
+ } while (block);
+
+ fgVerifyHandlerTab();
+
+ if (verbose)
+ {
+ printf("*************** After fgInline()\n");
+ fgDispBasicBlocks(true);
+ fgDispHandlerTab();
+ }
+
+ if (verbose || fgPrintInlinedMethods)
+ {
+ printf("**************** Inline Tree\n");
+ m_inlineStrategy->Dump();
+ }
+
+#endif // DEBUG
+}
+
+#ifdef DEBUG
+
+//------------------------------------------------------------------------
+// fgFindNonInlineCandidate: tree walk helper to ensure that a tree node
+// that is not an inline candidate is noted as a failed inline.
+//
+// Arguments:
+// pTree - pointer to pointer tree node being walked
+// data - contextual data for the walk
+//
+// Return Value:
+// walk result
+//
+// Note:
+// Invokes fgNoteNonInlineCandidate on the nodes it finds.
+
+Compiler::fgWalkResult Compiler::fgFindNonInlineCandidate(GenTreePtr* pTree,
+ fgWalkData* data)
+{
+ GenTreePtr tree = *pTree;
+ if (tree->gtOper == GT_CALL)
+ {
+ Compiler* compiler = data->compiler;
+ GenTreePtr stmt = (GenTreePtr) data->pCallbackData;
+ GenTreeCall* call = tree->AsCall();
+
+ compiler->fgNoteNonInlineCandidate(stmt, call);
+ }
+ return WALK_CONTINUE;
+}
+
+//------------------------------------------------------------------------
+// fgNoteNonInlineCandidate: account for inlining failures in calls
+// not marked as inline candidates.
+//
+// Arguments:
+// tree - statement containing the call
+// call - the call itself
+//
+// Notes:
+// Used in debug only to try and place descriptions of inline failures
+// into the proper context in the inline tree.
+
+void Compiler::fgNoteNonInlineCandidate(GenTreePtr tree,
+ GenTreeCall* call)
+{
+ InlineResult inlineResult(this, call, nullptr, "fgNotInlineCandidate");
+ InlineObservation currentObservation = InlineObservation::CALLSITE_NOT_CANDIDATE;
+
+ // Try and recover the reason left behind when the jit decided
+ // this call was not a candidate.
+ InlineObservation priorObservation = call->gtInlineObservation;
+
+ if (InlIsValidObservation(priorObservation))
+ {
+ currentObservation = priorObservation;
+ }
+
+ // Would like to just call noteFatal here, since this
+ // observation blocked candidacy, but policy comes into play
+ // here too. Also note there's no need to re-report these
+ // failures, since we reported them during the initial
+ // candidate scan.
+ InlineImpact impact = InlGetImpact(currentObservation);
+
+ if (impact == InlineImpact::FATAL)
+ {
+ inlineResult.NoteFatal(currentObservation);
+ }
+ else
+ {
+ inlineResult.Note(currentObservation);
+ }
+
+ inlineResult.SetReported();
+
+ if (call->gtCallType == CT_USER_FUNC)
+ {
+ // Create InlineContext for the failure
+ m_inlineStrategy->NewFailure(tree, &inlineResult);
+ }
+}
+
+#endif
+
+#if FEATURE_MULTIREG_RET
+
+/*********************************************************************************
+ *
+ * tree - The node which needs to be converted to a struct pointer.
+ *
+ * Return the pointer by either __replacing__ the tree node with a suitable pointer
+ * type or __without replacing__ and just returning a subtree or by __modifying__
+ * a subtree.
+ */
+GenTreePtr Compiler::fgGetStructAsStructPtr(GenTreePtr tree)
+{
+ noway_assert((tree->gtOper == GT_LCL_VAR) ||
+ (tree->gtOper == GT_FIELD) ||
+ (tree->gtOper == GT_IND) ||
+ (tree->gtOper == GT_BLK) ||
+ (tree->gtOper == GT_OBJ) ||
+ tree->OperIsSIMD() ||
+ // tree->gtOper == GT_CALL || cannot get address of call.
+ // tree->gtOper == GT_MKREFANY || inlining should've been aborted due to mkrefany opcode.
+ // tree->gtOper == GT_RET_EXPR || cannot happen after fgUpdateInlineReturnExpressionPlaceHolder
+ (tree->gtOper == GT_COMMA));
+
+ switch (tree->OperGet())
+ {
+ case GT_BLK:
+ case GT_OBJ:
+ case GT_IND:
+ return tree->gtOp.gtOp1;
+
+ case GT_COMMA:
+ tree->gtOp.gtOp2 = fgGetStructAsStructPtr(tree->gtOp.gtOp2);
+ tree->gtType = TYP_BYREF;
+ return tree;
+
+ default:
+ return gtNewOperNode(GT_ADDR, TYP_BYREF, tree);
+ }
+}
+
+/***************************************************************************************************
+ * child - The inlinee of the retExpr node.
+ * retClsHnd - The struct class handle of the type of the inlinee.
+ *
+ * Assign the inlinee to a tmp, if it is a call, just assign it to a lclVar, else we can
+ * use a copyblock to do the assignment.
+ */
+GenTreePtr Compiler::fgAssignStructInlineeToVar(GenTreePtr child, CORINFO_CLASS_HANDLE retClsHnd)
+{
+ assert(child->gtOper != GT_RET_EXPR && child->gtOper != GT_MKREFANY);
+
+ unsigned tmpNum = lvaGrabTemp(false DEBUGARG("RetBuf for struct inline return candidates."));
+ lvaSetStruct(tmpNum, retClsHnd, false);
+ var_types structType = lvaTable[tmpNum].lvType;
+
+ GenTreePtr dst = gtNewLclvNode(tmpNum, structType);
+
+ // If we have a call, we'd like it to be: V00 = call(), but first check if
+ // we have a ", , , call()" -- this is very defensive as we may never get
+ // an inlinee that is made of commas. If the inlinee is not a call, then
+ // we use a copy block to do the assignment.
+ GenTreePtr src = child;
+ GenTreePtr lastComma = NULL;
+ while (src->gtOper == GT_COMMA)
+ {
+ lastComma = src;
+ src = src->gtOp.gtOp2;
+ }
+
+ GenTreePtr newInlinee = NULL;
+ if (src->gtOper == GT_CALL)
+ {
+ // If inlinee was just a call, new inlinee is v05 = call()
+ newInlinee = gtNewAssignNode(dst, src);
+
+ // When returning a multi-register value in a local var, make sure the variable is
+ // marked as lvIsMultiRegRet, so it does not get promoted.
+ if (src->AsCall()->HasMultiRegRetVal())
+ {
+ lvaTable[tmpNum].lvIsMultiRegRet = true;
+ }
+
+ // If inlinee was comma, but a deeper call, new inlinee is (, , , v05 = call())
+ if (child->gtOper == GT_COMMA)
+ {
+ lastComma->gtOp.gtOp2 = newInlinee;
+ newInlinee = child;
+ }
+ }
+ else
+ {
+ // Inlinee is not a call, so just create a copy block to the tmp.
+ src = child;
+ GenTreePtr dstAddr = fgGetStructAsStructPtr(dst);
+ GenTreePtr srcAddr = fgGetStructAsStructPtr(src);
+ newInlinee = gtNewCpObjNode(dstAddr, srcAddr, retClsHnd, false);
+ }
+
+ GenTreePtr production = gtNewLclvNode(tmpNum, structType);
+ return gtNewOperNode(GT_COMMA, structType, newInlinee, production);
+}
+
+/***************************************************************************************************
+ * tree - The tree pointer that has one of its child nodes as retExpr.
+ * child - The inlinee child.
+ * retClsHnd - The struct class handle of the type of the inlinee.
+ *
+ * V04 = call() assignments are okay as we codegen it. Everything else needs to be a copy block or
+ * would need a temp. For example, a cast(ldobj) will then be, cast(v05 = ldobj, v05); But it is
+ * a very rare (or impossible) scenario that we'd have a retExpr transform into a ldobj other than
+ * a lclVar/call. So it is not worthwhile to do pattern matching optimizations like addr(ldobj(op1))
+ * can just be op1.
+ */
+void Compiler::fgAttachStructInlineeToAsg(GenTreePtr tree, GenTreePtr child, CORINFO_CLASS_HANDLE retClsHnd)
+{
+ // We are okay to have:
+ // 1. V02 = call();
+ // 2. copyBlk(dstAddr, srcAddr);
+ assert(tree->gtOper == GT_ASG);
+
+ // We have an assignment, we codegen only V05 = call().
+ if (child->gtOper == GT_CALL && tree->gtOp.gtOp1->gtOper == GT_LCL_VAR)
+ {
+ return;
+ }
+
+ GenTreePtr dstAddr = fgGetStructAsStructPtr(tree->gtOp.gtOp1);
+ GenTreePtr srcAddr = fgGetStructAsStructPtr((child->gtOper == GT_CALL)
+ ? fgAssignStructInlineeToVar(child, retClsHnd) // Assign to a variable if it is a call.
+ : child); // Just get the address, if not a call.
+
+ tree->CopyFrom(gtNewCpObjNode(dstAddr, srcAddr, retClsHnd, false), this);
+}
+
+#endif // FEATURE_MULTIREG_RET
+
+/*****************************************************************************
+ * Callback to replace the inline return expression place holder (GT_RET_EXPR)
+ */
+
+/* static */
+Compiler::fgWalkResult Compiler::fgUpdateInlineReturnExpressionPlaceHolder(GenTreePtr* pTree,
+ fgWalkData* data)
+{
+ GenTreePtr tree = *pTree;
+ Compiler* comp = data->compiler;
+ CORINFO_CLASS_HANDLE retClsHnd = NO_CLASS_HANDLE;
+
+ if (tree->gtOper == GT_RET_EXPR)
+ {
+ // We are going to copy the tree from the inlinee,
+ // so record the handle now.
+ //
+ if (varTypeIsStruct(tree))
+ {
+ retClsHnd = tree->gtRetExpr.gtRetClsHnd;
+ }
+
+ do
+ {
+ // Obtained the expanded inline candidate
+ GenTreePtr inlineCandidate = tree->gtRetExpr.gtInlineCandidate;
+
+#ifdef DEBUG
+ if (comp->verbose)
+ {
+ printf("\nReplacing the return expression placeholder ");
+ printTreeID(tree);
+ printf(" with ");
+ printTreeID(inlineCandidate);
+ printf("\n");
+ // Dump out the old return expression placeholder it will be overwritten by the CopyFrom below
+ comp->gtDispTree(tree);
+ }
+#endif // DEBUG
+
+ tree->CopyFrom(inlineCandidate, comp);
+
+#ifdef DEBUG
+ if (comp->verbose)
+ {
+ printf("\nInserting the inline return expression\n");
+ comp->gtDispTree(tree);
+ printf("\n");
+ }
+#endif // DEBUG
+ }
+ while (tree->gtOper == GT_RET_EXPR);
+ }
+
+#if FEATURE_MULTIREG_RET
+
+ // Did we record a struct return class handle above?
+ //
+ if (retClsHnd != NO_CLASS_HANDLE)
+ {
+ // Is this a type that is returned in multiple registers?
+ // if so we need to force into into a form we accept.
+ // i.e. LclVar = call()
+ //
+ if (comp->IsMultiRegReturnedType(retClsHnd))
+ {
+ GenTreePtr parent = data->parent;
+ // See assert below, we only look one level above for an asg parent.
+ if (parent->gtOper == GT_ASG)
+ {
+ // Either lhs is a call V05 = call(); or lhs is addr, and asg becomes a copyBlk.
+ comp->fgAttachStructInlineeToAsg(parent, tree, retClsHnd);
+ }
+ else
+ {
+ // Just assign the inlinee to a variable to keep it simple.
+ tree->CopyFrom(comp->fgAssignStructInlineeToVar(tree, retClsHnd), comp);
+ }
+ }
+ }
+
+#if defined(DEBUG)
+
+ // Make sure we don't have a tree like so: V05 = (, , , retExpr);
+ // Since we only look one level above for the parent for '=' and
+ // do not check if there is a series of COMMAs. See above.
+ // Importer and FlowGraph will not generate such a tree, so just
+ // leaving an assert in here. This can be fixed by looking ahead
+ // when we visit GT_ASG similar to fgAttachStructInlineeToAsg.
+ //
+ if ((tree->gtOper == GT_ASG) && (tree->gtOp.gtOp2->gtOper == GT_COMMA))
+ {
+ GenTreePtr comma;
+ for (comma = tree->gtOp.gtOp2;
+ comma->gtOper == GT_COMMA;
+ comma = comma->gtOp.gtOp2)
+ {
+ // empty
+ }
+
+ noway_assert(!varTypeIsStruct(comma) ||
+ comma->gtOper != GT_RET_EXPR ||
+ !comp->IsMultiRegReturnedType(comma->gtRetExpr.gtRetClsHnd));
+ }
+
+#endif // defined(DEBUG)
+#endif // FEATURE_MULTIREG_RET
+
+ return WALK_CONTINUE;
+}
+
+#ifdef DEBUG
+
+/*****************************************************************************
+ * Callback to make sure there is no more GT_RET_EXPR and GTF_CALL_INLINE_CANDIDATE nodes.
+ */
+
+/* static */
+Compiler::fgWalkResult Compiler::fgDebugCheckInlineCandidates(GenTreePtr* pTree,
+ fgWalkData* data)
+{
+ GenTreePtr tree = *pTree;
+ if (tree->gtOper == GT_CALL)
+ {
+ assert((tree->gtFlags & GTF_CALL_INLINE_CANDIDATE) == 0);
+ }
+ else
+ {
+ assert(tree->gtOper != GT_RET_EXPR);
+ }
+
+ return WALK_CONTINUE;
+}
+
+#endif // DEBUG
+
+
+void Compiler::fgInvokeInlineeCompiler(GenTreeCall* call,
+ InlineResult* inlineResult)
+{
+ noway_assert(call->gtOper == GT_CALL);
+ noway_assert((call->gtFlags & GTF_CALL_INLINE_CANDIDATE) != 0);
+ noway_assert(opts.OptEnabled(CLFLG_INLINING));
+
+ // This is the InlineInfo struct representing a method to be inlined.
+ InlineInfo inlineInfo = {nullptr};
+
+ CORINFO_METHOD_HANDLE fncHandle = call->gtCallMethHnd;
+
+ inlineInfo.fncHandle = fncHandle;
+ inlineInfo.iciCall = call;
+ inlineInfo.iciStmt = fgMorphStmt;
+ inlineInfo.iciBlock = compCurBB;
+ inlineInfo.thisDereferencedFirst = false;
+ inlineInfo.retExpr = nullptr;
+ inlineInfo.inlineResult = inlineResult;
+#ifdef FEATURE_SIMD
+ inlineInfo.hasSIMDTypeArgLocalOrReturn = false;
+#endif // FEATURE_SIMD
+
+ InlineCandidateInfo* inlineCandidateInfo = call->gtInlineCandidateInfo;
+ noway_assert(inlineCandidateInfo);
+ // Store the link to inlineCandidateInfo into inlineInfo
+ inlineInfo.inlineCandidateInfo = inlineCandidateInfo;
+
+ unsigned inlineDepth = fgCheckInlineDepthAndRecursion(&inlineInfo);
+
+ if (inlineResult->IsFailure())
+ {
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Recursive or deep inline recursion detected. Will not expand this INLINECANDIDATE \n");
+ }
+#endif // DEBUG
+ return;
+ }
+
+ // Set the trap to catch all errors (including recoverable ones from the EE)
+ struct Param
+ {
+ Compiler* pThis;
+ GenTree* call;
+ CORINFO_METHOD_HANDLE fncHandle;
+ InlineCandidateInfo* inlineCandidateInfo;
+ InlineInfo* inlineInfo;
+ } param = {nullptr};
+
+ param.pThis = this;
+ param.call = call;
+ param.fncHandle = fncHandle;
+ param.inlineCandidateInfo = inlineCandidateInfo;
+ param.inlineInfo = &inlineInfo;
+ bool success = eeRunWithErrorTrap<Param>([](Param* pParam)
+ {
+ // Init the local var info of the inlinee
+ pParam->pThis->impInlineInitVars(pParam->inlineInfo);
+
+ if (pParam->inlineInfo->inlineResult->IsCandidate())
+ {
+ /* Clear the temp table */
+ memset(pParam->inlineInfo->lclTmpNum, -1, sizeof(pParam->inlineInfo->lclTmpNum));
+
+ //
+ // Prepare the call to jitNativeCode
+ //
+
+ pParam->inlineInfo->InlinerCompiler = pParam->pThis;
+ if (pParam->pThis->impInlineInfo == nullptr)
+ {
+ pParam->inlineInfo->InlineRoot = pParam->pThis;
+ }
+ else
+ {
+ pParam->inlineInfo->InlineRoot = pParam->pThis->impInlineInfo->InlineRoot;
+ }
+ pParam->inlineInfo->argCnt = pParam->inlineCandidateInfo->methInfo.args.totalILArgs();
+ pParam->inlineInfo->tokenLookupContextHandle = pParam->inlineCandidateInfo->exactContextHnd;
+
+ JITLOG_THIS(pParam->pThis,
+ (LL_INFO100000,
+ "INLINER: inlineInfo.tokenLookupContextHandle for %s set to 0x%p:\n",
+ pParam->pThis->eeGetMethodFullName(pParam->fncHandle),
+ pParam->pThis->dspPtr(pParam->inlineInfo->tokenLookupContextHandle)));
+
+ CORJIT_FLAGS compileFlagsForInlinee;
+ memcpy(&compileFlagsForInlinee, pParam->pThis->opts.jitFlags, sizeof(compileFlagsForInlinee));
+ compileFlagsForInlinee.corJitFlags &= ~CORJIT_FLG_LOST_WHEN_INLINING;
+ compileFlagsForInlinee.corJitFlags |= CORJIT_FLG_SKIP_VERIFICATION;
+
+#ifdef DEBUG
+ if (pParam->pThis->verbose)
+ {
+ printf("\nInvoking compiler for the inlinee method %s :\n",
+ pParam->pThis->eeGetMethodFullName(pParam->fncHandle));
+ }
+#endif // DEBUG
+
+ int result = jitNativeCode(pParam->fncHandle,
+ pParam->inlineCandidateInfo->methInfo.scope,
+ pParam->pThis->info.compCompHnd,
+ &pParam->inlineCandidateInfo->methInfo,
+ (void**)pParam->inlineInfo,
+ nullptr,
+ &compileFlagsForInlinee,
+ pParam->inlineInfo);
+
+ if (result != CORJIT_OK)
+ {
+ // If we haven't yet determined why this inline fails, use
+ // a catch-all something bad happened observation.
+ InlineResult* innerInlineResult = pParam->inlineInfo->inlineResult;
+
+ if (!innerInlineResult->IsFailure())
+ {
+ innerInlineResult->NoteFatal(InlineObservation::CALLSITE_COMPILATION_FAILURE);
+ }
+ }
+ }
+ }, &param);
+ if (!success)
+ {
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nInlining failed due to an exception during invoking the compiler for the inlinee method %s.\n",
+ eeGetMethodFullName(fncHandle));
+ }
+#endif // DEBUG
+
+ // If we haven't yet determined why this inline fails, use
+ // a catch-all something bad happened observation.
+ if (!inlineResult->IsFailure())
+ {
+ inlineResult->NoteFatal(InlineObservation::CALLSITE_COMPILATION_ERROR);
+ }
+ }
+
+ if (inlineResult->IsFailure())
+ {
+ return;
+ }
+
+#ifdef DEBUG
+ if (0 && verbose)
+ {
+ printf("\nDone invoking compiler for the inlinee method %s\n",
+ eeGetMethodFullName(fncHandle));
+ }
+#endif // DEBUG
+
+ // If there is non-NULL return, but we haven't set the pInlineInfo->retExpr,
+ // That means we haven't imported any BB that contains CEE_RET opcode.
+ // (This could happen for example for a BBJ_THROW block fall through a BBJ_RETURN block which
+ // causes the BBJ_RETURN block not to be imported at all.)
+ // Fail the inlining attempt
+ if (inlineCandidateInfo->fncRetType != TYP_VOID && inlineInfo.retExpr == nullptr)
+ {
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nInlining failed because pInlineInfo->retExpr is not set in the inlinee method %s.\n",
+ eeGetMethodFullName(fncHandle));
+ }
+#endif // DEBUG
+ inlineResult->NoteFatal(InlineObservation::CALLEE_LACKS_RETURN);
+ return;
+ }
+
+ if (inlineCandidateInfo->initClassResult & CORINFO_INITCLASS_SPECULATIVE)
+ {
+ // we defer the call to initClass() until inlining is completed in case it fails. If inlining succeeds,
+ // we will call initClass().
+ if (!(info.compCompHnd->initClass(nullptr /* field */, fncHandle /* method */,
+ inlineCandidateInfo->exactContextHnd /* context */) & CORINFO_INITCLASS_INITIALIZED))
+ {
+ inlineResult->NoteFatal(InlineObservation::CALLEE_CLASS_INIT_FAILURE);
+ return;
+ }
+ }
+
+ // !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+ // The inlining attempt cannot be failed starting from this point.
+ // !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+ // We've successfully obtain the list of inlinee's basic blocks.
+ // Let's insert it to inliner's basic block list.
+ fgInsertInlineeBlocks(&inlineInfo);
+
+#ifdef DEBUG
+
+ if (verbose || fgPrintInlinedMethods)
+ {
+ printf("Successfully inlined %s (%d IL bytes) (depth %d) [%s]\n",
+ eeGetMethodFullName(fncHandle),
+ inlineCandidateInfo->methInfo.ILCodeSize,
+ inlineDepth,
+ inlineResult->ReasonString());
+ }
+
+ if (verbose)
+ {
+ printf("--------------------------------------------------------------------------------------------\n");
+ }
+#endif // DEBUG
+
+#if defined(DEBUG)
+ impInlinedCodeSize += inlineCandidateInfo->methInfo.ILCodeSize;
+#endif
+
+ // We inlined...
+ inlineResult->NoteSuccess();
+}
+
+// !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+// The inlining attempt cannot be failed starting from this point.
+// !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+void Compiler::fgInsertInlineeBlocks(InlineInfo* pInlineInfo)
+{
+ GenTreePtr iciCall = pInlineInfo->iciCall;
+ GenTreePtr iciStmt = pInlineInfo->iciStmt;
+ BasicBlock* iciBlock = pInlineInfo->iciBlock;
+ BasicBlock* block;
+
+ // We can write better assert here. For example, we can check that
+ // iciBlock contains iciStmt, which in turn contains iciCall.
+ noway_assert(iciBlock->bbTreeList != nullptr);
+ noway_assert(iciStmt->gtStmt.gtStmtExpr != nullptr);
+ noway_assert(iciCall->gtOper == GT_CALL);
+
+#ifdef DEBUG
+
+ GenTreePtr currentDumpStmt = nullptr;
+
+ if (verbose)
+ {
+ printf("\n\n----------- Statements (and blocks) added due to the inlining of call ");
+ printTreeID(iciCall);
+ printf(" -----------\n");
+ // gtDispTree(iciStmt);
+ }
+
+#endif // DEBUG
+
+ //
+ // Create a new inline context and mark the inlined statements with it
+ //
+ InlineContext* calleeContext = m_inlineStrategy->NewSuccess(pInlineInfo);
+
+ for (block = InlineeCompiler->fgFirstBB;
+ block != nullptr;
+ block = block->bbNext)
+ {
+ for (GenTreeStmt* stmt = block->firstStmt();
+ stmt;
+ stmt = stmt->gtNextStmt)
+ {
+ stmt->gtInlineContext = calleeContext;
+ }
+ }
+
+ //
+ // Prepend statements.
+ //
+ GenTreePtr stmtAfter;
+ stmtAfter = fgInlinePrependStatements(pInlineInfo);
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ currentDumpStmt = stmtAfter;
+ printf("\nInlinee method body:");
+ }
+#endif // DEBUG
+
+ if (InlineeCompiler->fgBBcount == 1)
+ {
+ // When fgBBCount is 1 we will always have a non-NULL fgFirstBB
+ //
+ PREFAST_ASSUME(InlineeCompiler->fgFirstBB != nullptr);
+
+ // DDB 91389: Don't throw away the (only) inlinee block
+ // when its return type is not BBJ_RETURN.
+ // In other words, we need its BBJ_ to perform the right thing.
+ if (InlineeCompiler->fgFirstBB->bbJumpKind == BBJ_RETURN)
+ {
+ // Inlinee contains just one BB. So just insert its statement list to topBlock.
+ if (InlineeCompiler->fgFirstBB->bbTreeList)
+ {
+ stmtAfter = fgInsertStmtListAfter(iciBlock,
+ stmtAfter,
+ InlineeCompiler->fgFirstBB->bbTreeList);
+
+ // Copy inlinee bbFlags to caller bbFlags.
+ const unsigned int inlineeBlockFlags = InlineeCompiler->fgFirstBB->bbFlags;
+ noway_assert((inlineeBlockFlags & BBF_HAS_JMP) == 0);
+ noway_assert((inlineeBlockFlags & BBF_KEEP_BBJ_ALWAYS) == 0);
+ iciBlock->bbFlags |= inlineeBlockFlags;
+ }
+#ifdef DEBUG
+ if (verbose)
+ {
+ noway_assert(currentDumpStmt);
+
+ if (currentDumpStmt != stmtAfter)
+ {
+ do
+ {
+ currentDumpStmt = currentDumpStmt->gtNext;
+
+ printf("\n");
+
+ noway_assert(currentDumpStmt->gtOper == GT_STMT);
+
+ gtDispTree(currentDumpStmt);
+ printf("\n");
+
+ } while (currentDumpStmt != stmtAfter);
+ }
+ }
+#endif // DEBUG
+ goto _Done;
+ }
+ }
+
+ //
+ // ======= Inserting inlinee's basic blocks ===============
+ //
+
+ BasicBlock* topBlock;
+ BasicBlock* bottomBlock;
+
+ topBlock = iciBlock;
+
+ bottomBlock = fgNewBBafter(topBlock->bbJumpKind, topBlock, true);
+ bottomBlock->bbRefs = 1;
+ bottomBlock->bbJumpDest = topBlock->bbJumpDest;
+ bottomBlock->inheritWeight(topBlock);
+
+ topBlock->bbJumpKind = BBJ_NONE;
+
+ // Update block flags
+ unsigned originalFlags;
+ originalFlags = topBlock->bbFlags;
+ noway_assert((originalFlags & BBF_SPLIT_NONEXIST) == 0);
+ topBlock->bbFlags &= ~(BBF_SPLIT_LOST);
+ bottomBlock->bbFlags |= originalFlags & BBF_SPLIT_GAINED;
+
+ //
+ // Split statements between topBlock and bottomBlock
+ //
+ GenTreePtr topBlock_Begin;
+ GenTreePtr topBlock_End;
+ GenTreePtr bottomBlock_Begin;
+ GenTreePtr bottomBlock_End;
+
+ topBlock_Begin = nullptr;
+ topBlock_End = nullptr;
+ bottomBlock_Begin = nullptr;
+ bottomBlock_End = nullptr;
+
+ //
+ // First figure out bottomBlock_Begin
+ //
+
+ bottomBlock_Begin = stmtAfter->gtNext;
+
+ if (topBlock->bbTreeList == nullptr)
+ {
+ // topBlock is empty before the split.
+ // In this case, both topBlock and bottomBlock should be empty
+ noway_assert(bottomBlock_Begin == nullptr);
+ topBlock->bbTreeList = nullptr;
+ bottomBlock->bbTreeList = nullptr;
+ }
+ else if (topBlock->bbTreeList == bottomBlock_Begin)
+ {
+ noway_assert(bottomBlock_Begin);
+
+ // topBlock contains at least one statement before the split.
+ // And the split is before the first statement.
+ // In this case, topBlock should be empty, and everything else should be moved to the bottonBlock.
+ bottomBlock->bbTreeList = topBlock->bbTreeList;
+ topBlock->bbTreeList = nullptr;
+ }
+ else if (bottomBlock_Begin == nullptr)
+ {
+ noway_assert(topBlock->bbTreeList);
+
+ // topBlock contains at least one statement before the split.
+ // And the split is at the end of the topBlock.
+ // In this case, everything should be kept in the topBlock, and the bottomBlock should be empty
+
+ bottomBlock->bbTreeList = nullptr;
+ }
+ else
+ {
+ noway_assert(topBlock->bbTreeList);
+ noway_assert(bottomBlock_Begin);
+
+ // This is the normal case where both blocks should contain at least one statement.
+ topBlock_Begin = topBlock->bbTreeList;
+ noway_assert(topBlock_Begin);
+ topBlock_End = bottomBlock_Begin->gtPrev;
+ noway_assert(topBlock_End);
+ bottomBlock_End = topBlock->lastStmt();
+ noway_assert(bottomBlock_End);
+
+ // Break the linkage between 2 blocks.
+ topBlock_End->gtNext = nullptr;
+
+ // Fix up all the pointers.
+ topBlock->bbTreeList = topBlock_Begin;
+ topBlock->bbTreeList->gtPrev = topBlock_End;
+
+ bottomBlock->bbTreeList = bottomBlock_Begin;
+ bottomBlock->bbTreeList->gtPrev = bottomBlock_End;
+ }
+
+ //
+ // Set the try and handler index and fix the jump types of inlinee's blocks.
+ //
+
+ bool inheritWeight;
+ inheritWeight = true; // The firstBB does inherit the weight from the iciBlock
+
+ for (block = InlineeCompiler->fgFirstBB;
+ block != nullptr;
+ block = block->bbNext)
+ {
+ noway_assert(!block->hasTryIndex());
+ noway_assert(!block->hasHndIndex());
+ block->copyEHRegion(iciBlock);
+ block->bbFlags |= iciBlock->bbFlags & BBF_BACKWARD_JUMP;
+
+ if (iciStmt->gtStmt.gtStmtILoffsx != BAD_IL_OFFSET)
+ {
+ block->bbCodeOffs = jitGetILoffs(iciStmt->gtStmt.gtStmtILoffsx);
+ block->bbCodeOffsEnd = block->bbCodeOffs + 1; // TODO: is code size of 1 some magic number for inlining?
+ }
+ else
+ {
+ block->bbCodeOffs = 0; // TODO: why not BAD_IL_OFFSET?
+ block->bbCodeOffsEnd = 0;
+ block->bbFlags |= BBF_INTERNAL;
+ }
+
+ if (block->bbJumpKind == BBJ_RETURN)
+ {
+ inheritWeight = true; // A return block does inherit the weight from the iciBlock
+ noway_assert((block->bbFlags & BBF_HAS_JMP) == 0);
+ if (block->bbNext)
+ {
+ block->bbJumpKind = BBJ_ALWAYS;
+ block->bbJumpDest = bottomBlock;
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nConvert bbJumpKind of BB%02u to BBJ_ALWAYS to bottomBlock BB%02u\n",
+ block->bbNum, bottomBlock->bbNum);
+ }
+#endif // DEBUG
+ }
+ else
+ {
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nConvert bbJumpKind of BB%02u to BBJ_NONE\n", block->bbNum);
+ }
+#endif // DEBUG
+ block->bbJumpKind = BBJ_NONE;
+ }
+ }
+ if (inheritWeight)
+ {
+ block->inheritWeight(iciBlock);
+ inheritWeight = false;
+ }
+ else
+ {
+ block->modifyBBWeight(iciBlock->bbWeight / 2);
+ }
+ }
+
+ // Insert inlinee's blocks into inliner's block list.
+ topBlock->setNext(InlineeCompiler->fgFirstBB);
+ InlineeCompiler->fgLastBB->setNext(bottomBlock);
+
+ //
+ // Add inlinee's block count to inliner's.
+ //
+ fgBBcount += InlineeCompiler->fgBBcount;
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ fgDispBasicBlocks(InlineeCompiler->fgFirstBB, InlineeCompiler->fgLastBB, true);
+ }
+#endif // DEBUG
+
+_Done:
+
+ //
+ // At this point, we have successully inserted inlinee's code.
+ //
+
+ //
+ // Copy out some flags
+ //
+ compLongUsed |= InlineeCompiler->compLongUsed;
+ compFloatingPointUsed |= InlineeCompiler->compFloatingPointUsed;
+ compLocallocUsed |= InlineeCompiler->compLocallocUsed;
+ compQmarkUsed |= InlineeCompiler->compQmarkUsed;
+ compUnsafeCastUsed |= InlineeCompiler->compUnsafeCastUsed;
+ compNeedsGSSecurityCookie |= InlineeCompiler->compNeedsGSSecurityCookie;
+ compGSReorderStackLayout |= InlineeCompiler->compGSReorderStackLayout;
+
+ // Update optMethodFlags
+
+#ifdef DEBUG
+ unsigned optMethodFlagsBefore = optMethodFlags;
+#endif
+
+ optMethodFlags |= InlineeCompiler->optMethodFlags;
+
+#ifdef DEBUG
+ if (optMethodFlags != optMethodFlagsBefore)
+ {
+ JITDUMP("INLINER: Updating optMethodFlags -- root:%0x callee:%0x new:%0x\n",
+ optMethodFlagsBefore, InlineeCompiler->optMethodFlags, optMethodFlags);
+ }
+#endif
+
+ // If there is non-NULL return, replace the GT_CALL with its return value expression,
+ // so later it will be picked up by the GT_RET_EXPR node.
+ if ((pInlineInfo->inlineCandidateInfo->fncRetType != TYP_VOID) || (iciCall->gtCall.gtReturnType == TYP_STRUCT))
+ {
+ noway_assert(pInlineInfo->retExpr);
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nReturn expression for call at ");
+ printTreeID(iciCall);
+ printf(" is\n");
+ gtDispTree(pInlineInfo->retExpr);
+ }
+#endif // DEBUG
+ // Replace the call with the return expression
+ iciCall->CopyFrom(pInlineInfo->retExpr, this);
+ }
+
+ //
+ // Detach the GT_CALL node from the original statement by hanging a "nothing" node under it,
+ // so that fgMorphStmts can remove the statement once we return from here.
+ //
+ iciStmt->gtStmt.gtStmtExpr = gtNewNothingNode();
+}
+
+// Prepend the statements that are needed before the inlined call.
+// Return the last statement that is prepended.
+
+GenTreePtr Compiler::fgInlinePrependStatements(InlineInfo* inlineInfo)
+{
+ BasicBlock* block = inlineInfo->iciBlock;
+
+ GenTreePtr callStmt = inlineInfo->iciStmt;
+ noway_assert(callStmt->gtOper == GT_STMT);
+ IL_OFFSETX callILOffset = callStmt->gtStmt.gtStmtILoffsx;
+
+ GenTreePtr afterStmt = callStmt; // afterStmt is the place where the new statements should be inserted after.
+ GenTreePtr newStmt;
+
+ GenTreePtr call = inlineInfo->iciCall;
+ noway_assert(call->gtOper == GT_CALL);
+
+#ifdef DEBUG
+ if (0 && verbose)
+ {
+ printf("\nfgInlinePrependStatements for iciCall= ");
+ printTreeID(call);
+ printf(":\n");
+ }
+#endif
+
+ // Prepend statements for any initialization / side effects
+
+ InlArgInfo* inlArgInfo = inlineInfo->inlArgInfo;
+ InlLclVarInfo* lclVarInfo = inlineInfo->lclVarInfo;
+
+ GenTreePtr tree;
+
+ // Create the null check statement (but not appending it to the statement list yet) for the 'this' pointer if necessary.
+ // The NULL check should be done after "argument setup statements".
+ // The only reason we move it here is for calling "impInlineFetchArg(0,..." to reserve a temp
+ // for the "this" pointer.
+ // Note: Here we no longer do the optimization that was done by thisDereferencedFirst in the old inliner.
+ // However the assetionProp logic will remove any unecessary null checks that we may have added
+ //
+ GenTreePtr nullcheck = nullptr;
+
+ if (call->gtFlags & GTF_CALL_NULLCHECK && !inlineInfo->thisDereferencedFirst)
+ {
+ // Call impInlineFetchArg to "reserve" a temp for the "this" pointer.
+ nullcheck = gtNewOperNode(GT_IND, TYP_INT,
+ impInlineFetchArg(0, inlArgInfo, lclVarInfo));
+ nullcheck->gtFlags |= GTF_EXCEPT;
+
+ // The NULL-check statement will be inserted to the statement list after those statements
+ // that assign arguments to temps and before the actual body of the inlinee method.
+ }
+
+ /* Treat arguments that had to be assigned to temps */
+ if (inlineInfo->argCnt)
+ {
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nArguments setup:\n");
+ }
+#endif // DEBUG
+
+ for (unsigned argNum = 0; argNum < inlineInfo->argCnt; argNum++)
+ {
+ if (inlArgInfo[argNum].argHasTmp)
+ {
+ noway_assert(inlArgInfo[argNum].argIsUsed);
+
+ /* argBashTmpNode is non-NULL iff the argument's value was
+ referenced exactly once by the original IL. This offers an
+ oppportunity to avoid an intermediate temp and just insert
+ the original argument tree.
+
+ However, if the temp node has been cloned somewhere while
+ importing (e.g. when handling isinst or dup), or if the IL
+ took the address of the argument, then argBashTmpNode will
+ be set (because the value was only explicitly retrieved
+ once) but the optimization cannot be applied.
+ */
+
+ GenTreePtr argSingleUseNode = inlArgInfo[argNum].argBashTmpNode;
+
+ if (argSingleUseNode &&
+ !(argSingleUseNode->gtFlags & GTF_VAR_CLONED) &&
+ !inlArgInfo[argNum].argHasLdargaOp &&
+ !inlArgInfo[argNum].argHasStargOp)
+ {
+ // Change the temp in-place to the actual argument.
+ // We currently do not support this for struct arguments, so it must not be a GT_OBJ.
+ GenTree* argNode = inlArgInfo[argNum].argNode;
+ assert(argNode->gtOper != GT_OBJ);
+ argSingleUseNode->CopyFrom(argNode, this);
+ continue;
+ }
+ else
+ {
+ /* Create the temp assignment for this argument */
+
+ CORINFO_CLASS_HANDLE structHnd = DUMMY_INIT(0);
+
+ if (varTypeIsStruct(lclVarInfo[argNum].lclTypeInfo))
+ {
+ structHnd = gtGetStructHandleIfPresent(inlArgInfo[argNum].argNode);
+ noway_assert(structHnd != NO_CLASS_HANDLE);
+ }
+
+ // Unsafe value cls check is not needed for argTmpNum here since in-linee compiler instance would have
+ // iterated over these and marked them accordingly.
+ impAssignTempGen(inlArgInfo[argNum].argTmpNum,
+ inlArgInfo[argNum].argNode,
+ structHnd,
+ (unsigned)CHECK_SPILL_NONE,
+ & afterStmt,
+ callILOffset,
+ block);
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ gtDispTree(afterStmt);
+ }
+#endif // DEBUG
+
+ }
+ }
+ else if (inlArgInfo[argNum].argIsByRefToStructLocal)
+ {
+ // Do nothing.
+ }
+ else
+ {
+ /* The argument is either not used or a const or lcl var */
+
+ noway_assert(!inlArgInfo[argNum].argIsUsed ||
+ inlArgInfo[argNum].argIsInvariant ||
+ inlArgInfo[argNum].argIsLclVar );
+
+ /* Make sure we didnt change argNode's along the way, or else
+ subsequent uses of the arg would have worked with the bashed value */
+ if (inlArgInfo[argNum].argIsInvariant)
+ {
+ assert(inlArgInfo[argNum].argNode->OperIsConst() ||
+ inlArgInfo[argNum].argNode->gtOper == GT_ADDR);
+ }
+ noway_assert((inlArgInfo[argNum].argIsLclVar == 0) ==
+ (inlArgInfo[argNum].argNode->gtOper != GT_LCL_VAR || (inlArgInfo[argNum].argNode->gtFlags & GTF_GLOB_REF)));
+
+ /* If the argument has side effects, append it */
+
+ if (inlArgInfo[argNum].argHasSideEff)
+ {
+ noway_assert(inlArgInfo[argNum].argIsUsed == false);
+
+ if (inlArgInfo[argNum].argNode->gtOper == GT_OBJ ||
+ inlArgInfo[argNum].argNode->gtOper == GT_MKREFANY)
+ {
+ // Don't put GT_OBJ node under a GT_COMMA.
+ // Codegen can't deal with it.
+ // Just hang the address here in case there are side-effect.
+ newStmt = gtNewStmt(gtUnusedValNode(inlArgInfo[argNum].argNode->gtOp.gtOp1), callILOffset);
+ }
+ else
+ {
+ newStmt = gtNewStmt(gtUnusedValNode(inlArgInfo[argNum].argNode), callILOffset);
+ }
+ afterStmt = fgInsertStmtAfter(block, afterStmt, newStmt);
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ gtDispTree(afterStmt);
+ }
+#endif // DEBUG
+
+ }
+ }
+ }
+ }
+
+ // Add the CCTOR check if asked for.
+ // Note: We no longer do the optimization that is done before by staticAccessedFirstUsingHelper in the old inliner.
+ // Therefore we might prepend redundant call to HELPER.CORINFO_HELP_GETSHARED_NONGCSTATIC_BASE
+ // before the inlined method body, even if a static field of this type was accessed in the inlinee
+ // using a helper before any other observable side-effect.
+
+ if (inlineInfo->inlineCandidateInfo->initClassResult & CORINFO_INITCLASS_USE_HELPER)
+ {
+ CORINFO_CONTEXT_HANDLE exactContext = inlineInfo->inlineCandidateInfo->exactContextHnd;
+ CORINFO_CLASS_HANDLE exactClass;
+
+ if (((SIZE_T)exactContext & CORINFO_CONTEXTFLAGS_MASK) == CORINFO_CONTEXTFLAGS_CLASS)
+ {
+ exactClass = CORINFO_CLASS_HANDLE((SIZE_T)exactContext & ~CORINFO_CONTEXTFLAGS_MASK);
+ }
+ else
+ {
+ exactClass = info.compCompHnd->getMethodClass(CORINFO_METHOD_HANDLE((SIZE_T)exactContext & ~CORINFO_CONTEXTFLAGS_MASK));
+ }
+
+ tree = fgGetSharedCCtor(exactClass);
+ newStmt = gtNewStmt(tree, callILOffset);
+ afterStmt = fgInsertStmtAfter(block, afterStmt, newStmt);
+ }
+
+ // Insert the nullcheck statement now.
+ if (nullcheck)
+ {
+ newStmt = gtNewStmt(nullcheck, callILOffset);
+ afterStmt = fgInsertStmtAfter(block, afterStmt, newStmt);
+ }
+
+ //
+ // Now zero-init inlinee locals
+ //
+
+ CORINFO_METHOD_INFO* InlineeMethodInfo = InlineeCompiler->info.compMethodInfo;
+
+ unsigned lclCnt = InlineeMethodInfo->locals.numArgs;
+
+ // Does callee contain any zero-init local?
+ if ((lclCnt != 0) &&
+ (InlineeMethodInfo->options & CORINFO_OPT_INIT_LOCALS) != 0)
+ {
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nZero init inlinee locals:\n");
+ }
+#endif // DEBUG
+
+ for (unsigned lclNum = 0; lclNum < lclCnt; lclNum++)
+ {
+ unsigned tmpNum = inlineInfo->lclTmpNum[lclNum];
+
+ // Is the local used at all?
+ if (tmpNum != BAD_VAR_NUM)
+ {
+ var_types lclTyp = (var_types)lvaTable[tmpNum].lvType;
+ noway_assert(lclTyp == lclVarInfo[lclNum + inlineInfo->argCnt].lclTypeInfo);
+
+ if (!varTypeIsStruct(lclTyp))
+ {
+ // Unsafe value cls check is not needed here since in-linee compiler instance would have
+ // iterated over locals and marked accordingly.
+ impAssignTempGen(tmpNum,
+ gtNewZeroConNode(genActualType(lclTyp)),
+ NO_CLASS_HANDLE,
+ (unsigned)CHECK_SPILL_NONE,
+ & afterStmt,
+ callILOffset,
+ block);
+ }
+ else
+ {
+ CORINFO_CLASS_HANDLE structType = lclVarInfo[lclNum + inlineInfo->argCnt].lclVerTypeInfo.GetClassHandle();
+
+ tree = gtNewBlkOpNode(gtNewLclvNode(tmpNum, lclTyp), // Dest
+ gtNewIconNode(0), // Value
+ info.compCompHnd->getClassSize(structType), // Size
+ false, // isVolatile
+ false); // not copyBlock
+
+ newStmt = gtNewStmt(tree, callILOffset);
+ afterStmt = fgInsertStmtAfter(block, afterStmt, newStmt);
+ }
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ gtDispTree(afterStmt);
+ }
+#endif // DEBUG
+ }
+ }
+ }
+
+ return afterStmt;
+}
+
+
+/*****************************************************************************/
+/*static*/
+Compiler::fgWalkResult Compiler::fgChkThrowCB(GenTreePtr* pTree,
+ fgWalkData* data)
+{
+ GenTreePtr tree = *pTree;
+
+ // If this tree doesn't have the EXCEPT flag set, then there is no
+ // way any of the child nodes could throw, so we can stop recursing.
+ if (!(tree->gtFlags & GTF_EXCEPT))
+ {
+ return Compiler::WALK_SKIP_SUBTREES;
+ }
+
+ switch (tree->gtOper)
+ {
+ case GT_MUL:
+ case GT_ADD:
+ case GT_SUB:
+ case GT_ASG_ADD:
+ case GT_ASG_SUB:
+ case GT_CAST:
+ if (tree->gtOverflow()) {
+ return Compiler::WALK_ABORT;
+}
+ break;
+
+ case GT_INDEX:
+ if (tree->gtFlags & GTF_INX_RNGCHK) {
+ return Compiler::WALK_ABORT;
+}
+ break;
+
+ case GT_ARR_BOUNDS_CHECK:
+ return Compiler::WALK_ABORT;
+
+ default:
+ break;
+ }
+
+ return Compiler::WALK_CONTINUE;
+}
+
+/*****************************************************************************/
+/*static*/
+Compiler::fgWalkResult Compiler::fgChkLocAllocCB(GenTreePtr* pTree,
+ fgWalkData* data)
+{
+ GenTreePtr tree = *pTree;
+
+ if (tree->gtOper == GT_LCLHEAP) {
+ return Compiler::WALK_ABORT;
+}
+
+ return Compiler::WALK_CONTINUE;
+}
+
+/*****************************************************************************/
+/*static*/
+Compiler::fgWalkResult Compiler::fgChkQmarkCB(GenTreePtr* pTree,
+ fgWalkData* data)
+{
+ GenTreePtr tree = *pTree;
+
+ if (tree->gtOper == GT_QMARK) {
+ return Compiler::WALK_ABORT;
+}
+
+ return Compiler::WALK_CONTINUE;
+}
+
+
+void Compiler::fgLclFldAssign(unsigned lclNum)
+{
+ assert(varTypeIsStruct(lvaTable[lclNum].lvType));
+ if (lvaTable[lclNum].lvPromoted && lvaTable[lclNum].lvFieldCnt > 1)
+ {
+ lvaSetVarDoNotEnregister(lclNum DEBUGARG(DNER_LocalField));
+ }
+}
diff --git a/src/jit/fp.h b/src/jit/fp.h
new file mode 100644
index 0000000000..f1cee9581a
--- /dev/null
+++ b/src/jit/fp.h
@@ -0,0 +1,73 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+#ifndef _JIT_FP
+
+#define _JIT_FP
+
+// Auxiliary structures.
+#if FEATURE_STACK_FP_X87
+
+enum dummyFPenum
+{
+#define REGDEF(name, rnum, mask, sname) dummmy_##name = rnum,
+#include "registerfp.h"
+
+ FP_VIRTUALREGISTERS,
+};
+
+// FlatFPStateX87 holds the state of the virtual register file. For each
+// virtual register we keep track to which physical register we're
+// mapping. We also keep track of the physical stack.
+
+#define FP_PHYSICREGISTERS FP_VIRTUALREGISTERS
+#define FP_VRNOTMAPPED -1
+
+struct FlatFPStateX87
+{
+public:
+ void Init(FlatFPStateX87* pFrom = 0);
+ bool Mapped(unsigned uEntry); // Is virtual register mapped
+ void Unmap(unsigned uEntry); // Unmaps a virtual register
+ void Associate(unsigned uEntry, unsigned uStack);
+ unsigned StackToST(unsigned uEntry); // Maps the stack to a ST(x) entry
+ unsigned VirtualToST(unsigned uEntry);
+ unsigned STToVirtual(unsigned uST);
+ unsigned TopIndex();
+ unsigned TopVirtual();
+ void Rename(unsigned uVirtualTo, unsigned uVirtualFrom);
+ unsigned Pop();
+ void Push(unsigned uEntry);
+ bool IsEmpty();
+
+ // Debug/test methods
+ static bool AreEqual(FlatFPStateX87* pSrc, FlatFPStateX87* pDst);
+#ifdef DEBUG
+ bool IsValidEntry(unsigned uEntry);
+ bool IsConsistent();
+ void UpdateMappingFromStack();
+ void Dump();
+
+ // In some optimizations the stack will be inconsistent in some transactions. We want to keep
+ // the checks for everthing else, so if have the stack in an inconsistent state, you must
+ // ignore it on purpose.
+ bool m_bIgnoreConsistencyChecks;
+
+ inline void IgnoreConsistencyChecks(bool bIgnore)
+ {
+ m_bIgnoreConsistencyChecks = bIgnore;
+ }
+#else
+ inline void IgnoreConsistencyChecks(bool bIgnore)
+ {
+ }
+#endif
+
+ unsigned m_uVirtualMap[FP_VIRTUALREGISTERS];
+ unsigned m_uStack[FP_PHYSICREGISTERS];
+ unsigned m_uStackSize;
+};
+
+#endif // FEATURE_STACK_FP_X87
+#endif
diff --git a/src/jit/gcdecode.cpp b/src/jit/gcdecode.cpp
new file mode 100644
index 0000000000..0722917490
--- /dev/null
+++ b/src/jit/gcdecode.cpp
@@ -0,0 +1,15 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+/* Precompiled header nonsense requires that we do it this way */
+
+/* GCDecoder.cpp is a common source file bewtween VM and JIT/IL */
+/* GCDecoder.cpp is located in $COM99/inc */
+
+#include "gcdecoder.cpp"
diff --git a/src/jit/gcencode.cpp b/src/jit/gcencode.cpp
new file mode 100644
index 0000000000..f20183b25a
--- /dev/null
+++ b/src/jit/gcencode.cpp
@@ -0,0 +1,4725 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX GCEncode XX
+XX XX
+XX Logic to encode the JIT method header and GC pointer tables XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+
+#pragma warning(disable : 4244) // loss of data int -> char ..
+
+#endif
+
+#include "gcinfotypes.h"
+
+#ifdef JIT32_GCENCODER
+
+#include "emit.h"
+
+/*****************************************************************************/
+/*****************************************************************************/
+
+/*****************************************************************************/
+// (see jit.h) #define REGEN_SHORTCUTS 0
+// To Regenerate the compressed info header shortcuts, define REGEN_SHORTCUTS
+// and use the following command line pipe/filter to give you the 128
+// most useful encodings.
+//
+// find . -name regen.txt | xargs cat | grep InfoHdr | sort | uniq -c | sort -r | head -128
+
+// (see jit.h) #define REGEN_CALLPAT 0
+// To Regenerate the compressed info header shortcuts, define REGEN_CALLPAT
+// and use the following command line pipe/filter to give you the 80
+// most useful encodings.
+//
+// find . -name regen.txt | xargs cat | grep CallSite | sort | uniq -c | sort -r | head -80
+
+#if REGEN_SHORTCUTS || REGEN_CALLPAT
+static FILE* logFile = NULL;
+CRITICAL_SECTION logFileLock;
+#endif
+
+#if REGEN_CALLPAT
+static void regenLog(unsigned codeDelta,
+ unsigned argMask,
+ unsigned regMask,
+ unsigned argCnt,
+ unsigned byrefArgMask,
+ unsigned byrefRegMask,
+ BYTE* base,
+ unsigned enSize)
+{
+ CallPattern pat;
+
+ pat.fld.argCnt = (argCnt < 0xff) ? argCnt : 0xff;
+ pat.fld.regMask = (regMask < 0xff) ? regMask : 0xff;
+ pat.fld.argMask = (argMask < 0xff) ? argMask : 0xff;
+ pat.fld.codeDelta = (codeDelta < 0xff) ? codeDelta : 0xff;
+
+ if (logFile == NULL)
+ {
+ logFile = fopen("regen.txt", "a");
+ InitializeCriticalSection(&logFileLock);
+ }
+
+ assert(((enSize > 0) && (enSize < 256)) && ((pat.val & 0xffffff) != 0xffffff));
+
+ EnterCriticalSection(&logFileLock);
+
+ fprintf(logFile, "CallSite( 0x%08x, 0x%02x%02x, 0x", pat.val, byrefArgMask, byrefRegMask);
+
+ while (enSize > 0)
+ {
+ fprintf(logFile, "%02x", *base++);
+ enSize--;
+ }
+ fprintf(logFile, "),\n");
+ fflush(logFile);
+
+ LeaveCriticalSection(&logFileLock);
+}
+#endif
+
+#if REGEN_SHORTCUTS
+static void regenLog(unsigned encoding, InfoHdr* header, InfoHdr* state)
+{
+ if (logFile == NULL)
+ {
+ logFile = fopen("regen.txt", "a");
+ InitializeCriticalSection(&logFileLock);
+ }
+
+ EnterCriticalSection(&logFileLock);
+
+ fprintf(logFile, "InfoHdr( %2d, %2d, %1d, %1d, %1d,"
+ " %1d, %1d, %1d, %1d, %1d,"
+ " %1d, %1d, %1d, %1d, %1d,"
+ " %1d, %2d, %2d, %2d, %2d,"
+ " %2d, %2d), \n",
+ state->prologSize, state->epilogSize, state->epilogCount, state->epilogAtEnd, state->ediSaved,
+ state->esiSaved, state->ebxSaved, state->ebpSaved, state->ebpFrame, state->interruptible,
+ state->doubleAlign, state->security, state->handlers, state->localloc, state->editNcontinue, state->varargs,
+ state->profCallbacks, state->argCount, state->frameSize,
+ (state->untrackedCnt <= SET_UNTRACKED_MAX) ? state->untrackedCnt : HAS_UNTRACKED,
+ (state->varPtrTableSize == 0) ? 0 : HAS_VARPTR,
+ (state->gsCookieOffset == INVALID_GS_COOKIE_OFFSET) ? 0 : HAS_GS_COOKIE_OFFSET,
+ (state->syncStartOffset == INVALID_SYNC_OFFSET) ? 0 : HAS_SYNC_OFFSET,
+ (state->syncStartOffset == INVALID_SYNC_OFFSET) ? 0 : HAS_SYNC_OFFSET);
+
+ fflush(logFile);
+
+ LeaveCriticalSection(&logFileLock);
+}
+#endif
+
+/*****************************************************************************
+ *
+ * Given the four parameters return the index into the callPatternTable[]
+ * that is used to encoding these four items. If an exact match cannot
+ * found then ignore the codeDelta and search the table again for a near
+ * match.
+ * Returns 0..79 for an exact match or
+ * (delta<<8) | (0..79) for a near match.
+ * A near match will be encoded using two bytes, the first byte will
+ * skip the adjustment delta that prevented an exact match and the
+ * rest of the delta plus the other three items are encoded in the
+ * second byte.
+ */
+int FASTCALL lookupCallPattern(unsigned argCnt, unsigned regMask, unsigned argMask, unsigned codeDelta)
+{
+ if ((argCnt <= CP_MAX_ARG_CNT) && (argMask <= CP_MAX_ARG_MASK))
+ {
+ CallPattern pat;
+
+ pat.fld.argCnt = argCnt;
+ pat.fld.regMask = regMask; // EBP,EBX,ESI,EDI
+ pat.fld.argMask = argMask;
+ pat.fld.codeDelta = codeDelta;
+
+ bool codeDeltaOK = (pat.fld.codeDelta == codeDelta);
+ unsigned bestDelta2 = 0xff;
+ unsigned bestPattern = 0xff;
+ unsigned patval = pat.val;
+ assert(sizeof(CallPattern) == sizeof(unsigned));
+
+ const unsigned* curp = &callPatternTable[0];
+ for (unsigned inx = 0; inx < 80; inx++, curp++)
+ {
+ unsigned curval = *curp;
+ if ((patval == curval) && codeDeltaOK)
+ return inx;
+
+ if (((patval ^ curval) & 0xffffff) == 0)
+ {
+ unsigned delta2 = codeDelta - (curval >> 24);
+ if (delta2 < bestDelta2)
+ {
+ bestDelta2 = delta2;
+ bestPattern = inx;
+ }
+ }
+ }
+
+ if (bestPattern != 0xff)
+ {
+ return (bestDelta2 << 8) | bestPattern;
+ }
+ }
+ return -1;
+}
+
+static bool initNeeded3(unsigned cur, unsigned tgt, unsigned max, unsigned* hint)
+{
+ assert(cur != tgt);
+
+ unsigned tmp = tgt;
+ unsigned nib = 0;
+ unsigned cnt = 0;
+
+ while (tmp > max)
+ {
+ nib = tmp & 0x07;
+ tmp >>= 3;
+ if (tmp == cur)
+ {
+ *hint = nib;
+ return false;
+ }
+ cnt++;
+ }
+
+ *hint = tmp;
+ return true;
+}
+
+static bool initNeeded4(unsigned cur, unsigned tgt, unsigned max, unsigned* hint)
+{
+ assert(cur != tgt);
+
+ unsigned tmp = tgt;
+ unsigned nib = 0;
+ unsigned cnt = 0;
+
+ while (tmp > max)
+ {
+ nib = tmp & 0x0f;
+ tmp >>= 4;
+ if (tmp == cur)
+ {
+ *hint = nib;
+ return false;
+ }
+ cnt++;
+ }
+
+ *hint = tmp;
+ return true;
+}
+
+static int bigEncoding3(unsigned cur, unsigned tgt, unsigned max)
+{
+ assert(cur != tgt);
+
+ unsigned tmp = tgt;
+ unsigned nib = 0;
+ unsigned cnt = 0;
+
+ while (tmp > max)
+ {
+ nib = tmp & 0x07;
+ tmp >>= 3;
+ if (tmp == cur)
+ break;
+ cnt++;
+ }
+ return cnt;
+}
+
+static int bigEncoding4(unsigned cur, unsigned tgt, unsigned max)
+{
+ assert(cur != tgt);
+
+ unsigned tmp = tgt;
+ unsigned nib = 0;
+ unsigned cnt = 0;
+
+ while (tmp > max)
+ {
+ nib = tmp & 0x0f;
+ tmp >>= 4;
+ if (tmp == cur)
+ break;
+ cnt++;
+ }
+ return cnt;
+}
+
+BYTE FASTCALL encodeHeaderNext(const InfoHdr& header, InfoHdr* state)
+{
+ BYTE encoding = 0xff;
+
+ if (state->argCount != header.argCount)
+ {
+ // We have one-byte encodings for 0..8
+ if (header.argCount <= SET_ARGCOUNT_MAX)
+ {
+ state->argCount = header.argCount;
+ encoding = SET_ARGCOUNT + header.argCount;
+ goto DO_RETURN;
+ }
+ else
+ {
+ unsigned hint;
+ if (initNeeded4(state->argCount, header.argCount, SET_ARGCOUNT_MAX, &hint))
+ {
+ assert(hint <= SET_ARGCOUNT_MAX);
+ state->argCount = hint;
+ encoding = SET_ARGCOUNT + hint;
+ goto DO_RETURN;
+ }
+ else
+ {
+ assert(hint <= 0xf);
+ state->argCount <<= 4;
+ state->argCount += hint;
+ encoding = NEXT_FOUR_ARGCOUNT + hint;
+ goto DO_RETURN;
+ }
+ }
+ }
+
+ if (state->frameSize != header.frameSize)
+ {
+ // We have one-byte encodings for 0..7
+ if (header.frameSize <= SET_FRAMESIZE_MAX)
+ {
+ state->frameSize = header.frameSize;
+ encoding = SET_FRAMESIZE + header.frameSize;
+ goto DO_RETURN;
+ }
+ else
+ {
+ unsigned hint;
+ if (initNeeded4(state->frameSize, header.frameSize, SET_FRAMESIZE_MAX, &hint))
+ {
+ assert(hint <= SET_FRAMESIZE_MAX);
+ state->frameSize = hint;
+ encoding = SET_FRAMESIZE + hint;
+ goto DO_RETURN;
+ }
+ else
+ {
+ assert(hint <= 0xf);
+ state->frameSize <<= 4;
+ state->frameSize += hint;
+ encoding = NEXT_FOUR_FRAMESIZE + hint;
+ goto DO_RETURN;
+ }
+ }
+ }
+
+ if ((state->epilogCount != header.epilogCount) || (state->epilogAtEnd != header.epilogAtEnd))
+ {
+ if (header.epilogCount > SET_EPILOGCNT_MAX)
+ IMPL_LIMITATION("More than SET_EPILOGCNT_MAX epilogs");
+
+ state->epilogCount = header.epilogCount;
+ state->epilogAtEnd = header.epilogAtEnd;
+ encoding = SET_EPILOGCNT + header.epilogCount * 2;
+ if (header.epilogAtEnd)
+ encoding++;
+ goto DO_RETURN;
+ }
+
+ if (state->varPtrTableSize != header.varPtrTableSize)
+ {
+ assert(state->varPtrTableSize == 0 || state->varPtrTableSize == HAS_VARPTR);
+
+ if (state->varPtrTableSize == 0)
+ {
+ state->varPtrTableSize = HAS_VARPTR;
+ encoding = FLIP_VAR_PTR_TABLE_SZ;
+ goto DO_RETURN;
+ }
+ else if (header.varPtrTableSize == 0)
+ {
+ state->varPtrTableSize = 0;
+ encoding = FLIP_VAR_PTR_TABLE_SZ;
+ goto DO_RETURN;
+ }
+ }
+
+ if (state->untrackedCnt != header.untrackedCnt)
+ {
+ assert(state->untrackedCnt <= SET_UNTRACKED_MAX || state->untrackedCnt == HAS_UNTRACKED);
+
+ // We have one-byte encodings for 0..3
+ if (header.untrackedCnt <= SET_UNTRACKED_MAX)
+ {
+ state->untrackedCnt = header.untrackedCnt;
+ encoding = SET_UNTRACKED + header.untrackedCnt;
+ goto DO_RETURN;
+ }
+ else if (state->untrackedCnt != HAS_UNTRACKED)
+ {
+ state->untrackedCnt = HAS_UNTRACKED;
+ encoding = FFFF_UNTRACKED_CNT;
+ goto DO_RETURN;
+ }
+ }
+
+ if (state->epilogSize != header.epilogSize)
+ {
+ // We have one-byte encodings for 0..10
+ if (header.epilogSize <= SET_EPILOGSIZE_MAX)
+ {
+ state->epilogSize = header.epilogSize;
+ encoding = SET_EPILOGSIZE + header.epilogSize;
+ goto DO_RETURN;
+ }
+ else
+ {
+ unsigned hint;
+ if (initNeeded3(state->epilogSize, header.epilogSize, SET_EPILOGSIZE_MAX, &hint))
+ {
+ assert(hint <= SET_EPILOGSIZE_MAX);
+ state->epilogSize = hint;
+ encoding = SET_EPILOGSIZE + hint;
+ goto DO_RETURN;
+ }
+ else
+ {
+ assert(hint <= 0x7);
+ state->epilogSize <<= 3;
+ state->epilogSize += hint;
+ encoding = NEXT_THREE_EPILOGSIZE + hint;
+ goto DO_RETURN;
+ }
+ }
+ }
+
+ if (state->prologSize != header.prologSize)
+ {
+ // We have one-byte encodings for 0..16
+ if (header.prologSize <= SET_PROLOGSIZE_MAX)
+ {
+ state->prologSize = header.prologSize;
+ encoding = SET_PROLOGSIZE + header.prologSize;
+ goto DO_RETURN;
+ }
+ else
+ {
+ unsigned hint;
+ assert(SET_PROLOGSIZE_MAX > 15);
+ if (initNeeded3(state->prologSize, header.prologSize, 15, &hint))
+ {
+ assert(hint <= 15);
+ state->prologSize = hint;
+ encoding = SET_PROLOGSIZE + hint;
+ goto DO_RETURN;
+ }
+ else
+ {
+ assert(hint <= 0x7);
+ state->prologSize <<= 3;
+ state->prologSize += hint;
+ encoding = NEXT_THREE_PROLOGSIZE + hint;
+ goto DO_RETURN;
+ }
+ }
+ }
+
+ if (state->ediSaved != header.ediSaved)
+ {
+ state->ediSaved = header.ediSaved;
+ encoding = FLIP_EDI_SAVED;
+ goto DO_RETURN;
+ }
+
+ if (state->esiSaved != header.esiSaved)
+ {
+ state->esiSaved = header.esiSaved;
+ encoding = FLIP_ESI_SAVED;
+ goto DO_RETURN;
+ }
+
+ if (state->ebxSaved != header.ebxSaved)
+ {
+ state->ebxSaved = header.ebxSaved;
+ encoding = FLIP_EBX_SAVED;
+ goto DO_RETURN;
+ }
+
+ if (state->ebpSaved != header.ebpSaved)
+ {
+ state->ebpSaved = header.ebpSaved;
+ encoding = FLIP_EBP_SAVED;
+ goto DO_RETURN;
+ }
+
+ if (state->ebpFrame != header.ebpFrame)
+ {
+ state->ebpFrame = header.ebpFrame;
+ encoding = FLIP_EBP_FRAME;
+ goto DO_RETURN;
+ }
+
+ if (state->interruptible != header.interruptible)
+ {
+ state->interruptible = header.interruptible;
+ encoding = FLIP_INTERRUPTIBLE;
+ goto DO_RETURN;
+ }
+
+#if DOUBLE_ALIGN
+ if (state->doubleAlign != header.doubleAlign)
+ {
+ state->doubleAlign = header.doubleAlign;
+ encoding = FLIP_DOUBLE_ALIGN;
+ goto DO_RETURN;
+ }
+#endif
+
+ if (state->security != header.security)
+ {
+ state->security = header.security;
+ encoding = FLIP_SECURITY;
+ goto DO_RETURN;
+ }
+
+ if (state->handlers != header.handlers)
+ {
+ state->handlers = header.handlers;
+ encoding = FLIP_HANDLERS;
+ goto DO_RETURN;
+ }
+
+ if (state->localloc != header.localloc)
+ {
+ state->localloc = header.localloc;
+ encoding = FLIP_LOCALLOC;
+ goto DO_RETURN;
+ }
+
+ if (state->editNcontinue != header.editNcontinue)
+ {
+ state->editNcontinue = header.editNcontinue;
+ encoding = FLIP_EDITnCONTINUE;
+ goto DO_RETURN;
+ }
+
+ if (state->varargs != header.varargs)
+ {
+ state->varargs = header.varargs;
+ encoding = FLIP_VARARGS;
+ goto DO_RETURN;
+ }
+
+ if (state->profCallbacks != header.profCallbacks)
+ {
+ state->profCallbacks = header.profCallbacks;
+ encoding = FLIP_PROF_CALLBACKS;
+ goto DO_RETURN;
+ }
+
+ if (state->genericsContext != header.genericsContext)
+ {
+ state->genericsContext = header.genericsContext;
+ encoding = FLIP_HAS_GENERICS_CONTEXT;
+ goto DO_RETURN;
+ }
+
+ if (state->genericsContextIsMethodDesc != header.genericsContextIsMethodDesc)
+ {
+ state->genericsContextIsMethodDesc = header.genericsContextIsMethodDesc;
+ encoding = FLIP_GENERICS_CONTEXT_IS_METHODDESC;
+ goto DO_RETURN;
+ }
+
+ if (state->gsCookieOffset != header.gsCookieOffset)
+ {
+ assert(state->gsCookieOffset == INVALID_GS_COOKIE_OFFSET || state->gsCookieOffset == HAS_GS_COOKIE_OFFSET);
+
+ if (state->gsCookieOffset == INVALID_GS_COOKIE_OFFSET)
+ {
+ // header.gsCookieOffset is non-zero. We can set it
+ // to zero using FLIP_HAS_GS_COOKIE
+ state->gsCookieOffset = HAS_GS_COOKIE_OFFSET;
+ encoding = FLIP_HAS_GS_COOKIE;
+ goto DO_RETURN;
+ }
+ else if (header.gsCookieOffset == INVALID_GS_COOKIE_OFFSET)
+ {
+ state->gsCookieOffset = INVALID_GS_COOKIE_OFFSET;
+ encoding = FLIP_HAS_GS_COOKIE;
+ goto DO_RETURN;
+ }
+ }
+
+ if (state->syncStartOffset != header.syncStartOffset)
+ {
+ assert(state->syncStartOffset == INVALID_SYNC_OFFSET || state->syncStartOffset == HAS_SYNC_OFFSET);
+
+ if (state->syncStartOffset == INVALID_SYNC_OFFSET)
+ {
+ // header.syncStartOffset is non-zero. We can set it
+ // to zero using FLIP_SYNC
+ state->syncStartOffset = HAS_SYNC_OFFSET;
+ encoding = FLIP_SYNC;
+ goto DO_RETURN;
+ }
+ else if (header.syncStartOffset == INVALID_SYNC_OFFSET)
+ {
+ state->syncStartOffset = INVALID_SYNC_OFFSET;
+ encoding = FLIP_SYNC;
+ goto DO_RETURN;
+ }
+ }
+
+DO_RETURN:
+ assert(encoding < 0x80);
+ if (!state->isHeaderMatch(header))
+ encoding |= 0x80;
+ return encoding;
+}
+
+static int measureDistance(const InfoHdr& header, const InfoHdrSmall* p, int closeness)
+{
+ int distance = 0;
+
+ if (p->untrackedCnt != header.untrackedCnt)
+ {
+ if (header.untrackedCnt > 3)
+ {
+ if (p->untrackedCnt != HAS_UNTRACKED)
+ distance += 1;
+ }
+ else
+ {
+ distance += 1;
+ }
+ if (distance >= closeness)
+ return distance;
+ }
+
+ if (p->varPtrTableSize != header.varPtrTableSize)
+ {
+ if (header.varPtrTableSize != 0)
+ {
+ if (p->varPtrTableSize != HAS_VARPTR)
+ distance += 1;
+ }
+ else
+ {
+ assert(p->varPtrTableSize == HAS_VARPTR);
+ distance += 1;
+ }
+ if (distance >= closeness)
+ return distance;
+ }
+
+ if (p->frameSize != header.frameSize)
+ {
+ distance += 1;
+ if (distance >= closeness)
+ return distance;
+
+ // We have one-byte encodings for 0..7
+ if (header.frameSize > SET_FRAMESIZE_MAX)
+ {
+ distance += bigEncoding4(p->frameSize, header.frameSize, SET_FRAMESIZE_MAX);
+ if (distance >= closeness)
+ return distance;
+ }
+ }
+
+ if (p->argCount != header.argCount)
+ {
+ distance += 1;
+ if (distance >= closeness)
+ return distance;
+
+ // We have one-byte encodings for 0..8
+ if (header.argCount > SET_ARGCOUNT_MAX)
+ {
+ distance += bigEncoding4(p->argCount, header.argCount, SET_ARGCOUNT_MAX);
+ if (distance >= closeness)
+ return distance;
+ }
+ }
+
+ if (p->prologSize != header.prologSize)
+ {
+ distance += 1;
+ if (distance >= closeness)
+ return distance;
+
+ // We have one-byte encodings for 0..16
+ if (header.prologSize > SET_PROLOGSIZE_MAX)
+ {
+ assert(SET_PROLOGSIZE_MAX > 15);
+ distance += bigEncoding3(p->prologSize, header.prologSize, 15);
+ if (distance >= closeness)
+ return distance;
+ }
+ }
+
+ if (p->epilogSize != header.epilogSize)
+ {
+ distance += 1;
+ if (distance >= closeness)
+ return distance;
+ // We have one-byte encodings for 0..10
+ if (header.epilogSize > SET_EPILOGSIZE_MAX)
+ {
+ distance += bigEncoding3(p->epilogSize, header.epilogSize, SET_EPILOGSIZE_MAX);
+ if (distance >= closeness)
+ return distance;
+ }
+ }
+
+ if ((p->epilogCount != header.epilogCount) || (p->epilogAtEnd != header.epilogAtEnd))
+ {
+ distance += 1;
+ if (distance >= closeness)
+ return distance;
+
+ if (header.epilogCount > SET_EPILOGCNT_MAX)
+ IMPL_LIMITATION("More than SET_EPILOGCNT_MAX epilogs");
+ }
+
+ if (p->ediSaved != header.ediSaved)
+ {
+ distance += 1;
+ if (distance >= closeness)
+ return distance;
+ }
+
+ if (p->esiSaved != header.esiSaved)
+ {
+ distance += 1;
+ if (distance >= closeness)
+ return distance;
+ }
+
+ if (p->ebxSaved != header.ebxSaved)
+ {
+ distance += 1;
+ if (distance >= closeness)
+ return distance;
+ }
+
+ if (p->ebpSaved != header.ebpSaved)
+ {
+ distance += 1;
+ if (distance >= closeness)
+ return distance;
+ }
+
+ if (p->ebpFrame != header.ebpFrame)
+ {
+ distance += 1;
+ if (distance >= closeness)
+ return distance;
+ }
+
+ if (p->interruptible != header.interruptible)
+ {
+ distance += 1;
+ if (distance >= closeness)
+ return distance;
+ }
+
+#if DOUBLE_ALIGN
+ if (p->doubleAlign != header.doubleAlign)
+ {
+ distance += 1;
+ if (distance >= closeness)
+ return distance;
+ }
+#endif
+
+ if (p->security != header.security)
+ {
+ distance += 1;
+ if (distance >= closeness)
+ return distance;
+ }
+
+ if (p->handlers != header.handlers)
+ {
+ distance += 1;
+ if (distance >= closeness)
+ return distance;
+ }
+
+ if (p->localloc != header.localloc)
+ {
+ distance += 1;
+ if (distance >= closeness)
+ return distance;
+ }
+
+ if (p->editNcontinue != header.editNcontinue)
+ {
+ distance += 1;
+ if (distance >= closeness)
+ return distance;
+ }
+
+ if (p->varargs != header.varargs)
+ {
+ distance += 1;
+ if (distance >= closeness)
+ return distance;
+ }
+
+ if (p->profCallbacks != header.profCallbacks)
+ {
+ distance += 1;
+ if (distance >= closeness)
+ return distance;
+ }
+
+ if (p->genericsContext != header.genericsContext)
+ {
+ distance += 1;
+ if (distance >= closeness)
+ return distance;
+ }
+
+ if (p->genericsContextIsMethodDesc != header.genericsContextIsMethodDesc)
+ {
+ distance += 1;
+ if (distance >= closeness)
+ return distance;
+ }
+
+ if (header.gsCookieOffset != INVALID_GS_COOKIE_OFFSET)
+ {
+ distance += 1;
+ if (distance >= closeness)
+ return distance;
+ }
+
+ if (header.syncStartOffset != INVALID_SYNC_OFFSET)
+ {
+ distance += 1;
+ if (distance >= closeness)
+ return distance;
+ }
+
+ return distance;
+}
+
+// DllMain calls gcInitEncoderLookupTable to fill in this table
+/* extern */ int infoHdrLookup[IH_MAX_PROLOG_SIZE + 2];
+
+/* static */ void GCInfo::gcInitEncoderLookupTable()
+{
+ const InfoHdrSmall* p = &infoHdrShortcut[0];
+ int lo = -1;
+ int hi = 0;
+ int n;
+
+ for (n = 0; n < 128; n++, p++)
+ {
+ if (p->prologSize != lo)
+ {
+ if (p->prologSize < lo)
+ {
+ assert(p->prologSize == 0);
+ hi = IH_MAX_PROLOG_SIZE;
+ }
+ else
+ hi = p->prologSize;
+
+ assert(hi <= IH_MAX_PROLOG_SIZE);
+
+ while (lo < hi)
+ infoHdrLookup[++lo] = n;
+
+ if (lo == IH_MAX_PROLOG_SIZE)
+ break;
+ }
+ }
+
+ assert(lo == IH_MAX_PROLOG_SIZE);
+ assert(infoHdrLookup[IH_MAX_PROLOG_SIZE] < 128);
+
+ while (p->prologSize == lo)
+ {
+ n++;
+ if (n >= 128)
+ break;
+ p++;
+ }
+
+ infoHdrLookup[++lo] = n;
+
+#ifdef DEBUG
+ //
+ // We do some other DEBUG only validity checks here
+ //
+ assert(callCommonDelta[0] < callCommonDelta[1]);
+ assert(callCommonDelta[1] < callCommonDelta[2]);
+ assert(callCommonDelta[2] < callCommonDelta[3]);
+ assert(sizeof(CallPattern) == sizeof(unsigned));
+ unsigned maxMarks = 0;
+ for (unsigned inx = 0; inx < 80; inx++)
+ {
+ CallPattern pat;
+ pat.val = callPatternTable[inx];
+
+ assert(pat.fld.codeDelta <= CP_MAX_CODE_DELTA);
+ if (pat.fld.codeDelta == CP_MAX_CODE_DELTA)
+ maxMarks |= 0x01;
+
+ assert(pat.fld.argCnt <= CP_MAX_ARG_CNT);
+ if (pat.fld.argCnt == CP_MAX_ARG_CNT)
+ maxMarks |= 0x02;
+
+ assert(pat.fld.argMask <= CP_MAX_ARG_MASK);
+ if (pat.fld.argMask == CP_MAX_ARG_MASK)
+ maxMarks |= 0x04;
+ }
+ assert(maxMarks == 0x07);
+#endif
+}
+
+const int NO_CACHED_HEADER = -1;
+
+BYTE FASTCALL encodeHeaderFirst(const InfoHdr& header, InfoHdr* state, int* more, int* pCached)
+{
+ // First try the cached value for an exact match, if there is one
+ //
+ int n = *pCached;
+ const InfoHdrSmall* p;
+
+ if (n != NO_CACHED_HEADER)
+ {
+ p = &infoHdrShortcut[n];
+ if (p->isHeaderMatch(header))
+ {
+ // exact match found
+ GetInfoHdr(n, state);
+ *more = 0;
+ return n;
+ }
+ }
+
+ // Next search the table for an exact match
+ // Only search entries that have a matching prolog size
+ // Note: lo and hi are saved here as they specify the
+ // range of entries that have the correct prolog size
+ //
+ unsigned psz = header.prologSize;
+ int lo = 0;
+ int hi = 0;
+
+ if (psz <= IH_MAX_PROLOG_SIZE)
+ {
+ lo = infoHdrLookup[psz];
+ hi = infoHdrLookup[psz + 1];
+ p = &infoHdrShortcut[lo];
+ for (n = lo; n < hi; n++, p++)
+ {
+ assert(psz == p->prologSize);
+ if (p->isHeaderMatch(header))
+ {
+ // exact match found
+ GetInfoHdr(n, state);
+ *pCached = n; // cache the value
+ *more = 0;
+ return n;
+ }
+ }
+ }
+
+ //
+ // no exact match in infoHdrShortcut[]
+ //
+ // find the nearest entry in the table
+ //
+ int nearest = -1;
+ int closeness = 255; // (i.e. not very close)
+
+ //
+ // Calculate the minimum acceptable distance
+ // if we find an entry that is at least this close
+ // we will stop the search and use that value
+ //
+ int min_acceptable_distance = 1;
+
+ if (header.frameSize > SET_FRAMESIZE_MAX)
+ {
+ ++min_acceptable_distance;
+ if (header.frameSize > 32)
+ ++min_acceptable_distance;
+ }
+ if (header.argCount > SET_ARGCOUNT_MAX)
+ {
+ ++min_acceptable_distance;
+ if (header.argCount > 32)
+ ++min_acceptable_distance;
+ }
+
+ // First try the cached value
+ // and see if it meets the minimum acceptable distance
+ //
+ if (*pCached != NO_CACHED_HEADER)
+ {
+ p = &infoHdrShortcut[*pCached];
+ int distance = measureDistance(header, p, closeness);
+ assert(distance > 0);
+ if (distance <= min_acceptable_distance)
+ {
+ GetInfoHdr(*pCached, state);
+ *more = distance;
+ return 0x80 | *pCached;
+ }
+ else
+ {
+ closeness = distance;
+ nearest = *pCached;
+ }
+ }
+
+ // Then try the ones pointed to by [lo..hi),
+ // (i.e. the ones that have the correct prolog size)
+ //
+ p = &infoHdrShortcut[lo];
+ for (n = lo; n < hi; n++, p++)
+ {
+ if (n == *pCached)
+ continue; // already tried this one
+ int distance = measureDistance(header, p, closeness);
+ assert(distance > 0);
+ if (distance <= min_acceptable_distance)
+ {
+ GetInfoHdr(n, state);
+ *pCached = n; // Cache this value
+ *more = distance;
+ return 0x80 | n;
+ }
+ else if (distance < closeness)
+ {
+ closeness = distance;
+ nearest = n;
+ }
+ }
+
+ int last = infoHdrLookup[IH_MAX_PROLOG_SIZE + 1];
+ assert(last <= 128);
+
+ // Then try all the rest [0..last-1]
+ p = &infoHdrShortcut[0];
+ for (n = 0; n < last; n++, p++)
+ {
+ if (n == *pCached)
+ continue; // already tried this one
+ if ((n >= lo) && (n < hi))
+ continue; // already tried these
+ int distance = measureDistance(header, p, closeness);
+ assert(distance > 0);
+ if (distance <= min_acceptable_distance)
+ {
+ GetInfoHdr(n, state);
+ *pCached = n; // Cache this value
+ *more = distance;
+ return 0x80 | n;
+ }
+ else if (distance < closeness)
+ {
+ closeness = distance;
+ nearest = n;
+ }
+ }
+
+ //
+ // If we reach here then there was no adjacent neighbor
+ // in infoHdrShortcut[], closeness indicate how many extra
+ // bytes we will need to encode this item.
+ //
+ assert((nearest >= 0) && (nearest <= 127));
+ GetInfoHdr(nearest, state);
+ *pCached = nearest; // Cache this value
+ *more = closeness;
+ return 0x80 | nearest;
+}
+
+/*****************************************************************************
+ *
+ * Write the initial part of the method info block. This is called twice;
+ * first to compute the size needed for the info (mask=0), the second time
+ * to actually generate the contents of the table (mask=-1,dest!=NULL).
+ */
+
+size_t GCInfo::gcInfoBlockHdrSave(
+ BYTE* dest, int mask, unsigned methodSize, unsigned prologSize, unsigned epilogSize, InfoHdr* header, int* pCached)
+{
+#ifdef DEBUG
+ if (compiler->verbose)
+ printf("*************** In gcInfoBlockHdrSave()\n");
+#endif
+ size_t size = 0;
+
+#if VERIFY_GC_TABLES
+ *castto(dest, unsigned short*)++ = 0xFEEF;
+ size += sizeof(short);
+#endif
+
+ /* Write the method size first (using between 1 and 5 bytes) */
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef DEBUG
+ if (compiler->verbose)
+ {
+ if (mask)
+ printf("GCINFO: methodSize = %04X\n", methodSize);
+ if (mask)
+ printf("GCINFO: prologSize = %04X\n", prologSize);
+ if (mask)
+ printf("GCINFO: epilogSize = %04X\n", epilogSize);
+ }
+#endif
+
+ size_t methSz = encodeUnsigned(dest, methodSize);
+ size += methSz;
+ dest += methSz & mask;
+
+ //
+ // New style InfoBlk Header
+ //
+ // Typically only uses one-byte to store everything.
+ //
+
+ if (mask == 0)
+ {
+ memset(header, 0, sizeof(InfoHdr));
+ *pCached = NO_CACHED_HEADER;
+ }
+
+ assert(FitsIn<unsigned char>(prologSize));
+ header->prologSize = static_cast<unsigned char>(prologSize);
+ assert(FitsIn<unsigned char>(epilogSize));
+ header->epilogSize = static_cast<unsigned char>(epilogSize);
+ header->epilogCount = compiler->getEmitter()->emitGetEpilogCnt();
+ if (header->epilogCount != compiler->getEmitter()->emitGetEpilogCnt())
+ IMPL_LIMITATION("emitGetEpilogCnt() does not fit in InfoHdr::epilogCount");
+ header->epilogAtEnd = compiler->getEmitter()->emitHasEpilogEnd();
+
+ if (compiler->codeGen->regSet.rsRegsModified(RBM_EDI))
+ header->ediSaved = 1;
+ if (compiler->codeGen->regSet.rsRegsModified(RBM_ESI))
+ header->esiSaved = 1;
+ if (compiler->codeGen->regSet.rsRegsModified(RBM_EBX))
+ header->ebxSaved = 1;
+
+ header->interruptible = compiler->codeGen->genInterruptible;
+
+ if (!compiler->isFramePointerUsed())
+ {
+#if DOUBLE_ALIGN
+ if (compiler->genDoubleAlign())
+ {
+ header->ebpSaved = true;
+ assert(!compiler->codeGen->regSet.rsRegsModified(RBM_EBP));
+ }
+#endif
+ if (compiler->codeGen->regSet.rsRegsModified(RBM_EBP))
+ {
+ header->ebpSaved = true;
+ }
+ }
+ else
+ {
+ header->ebpSaved = true;
+ header->ebpFrame = true;
+ }
+
+#if DOUBLE_ALIGN
+ header->doubleAlign = compiler->genDoubleAlign();
+#endif
+
+ header->security = compiler->opts.compNeedSecurityCheck;
+
+ header->handlers = compiler->ehHasCallableHandlers();
+ header->localloc = compiler->compLocallocUsed;
+
+ header->varargs = compiler->info.compIsVarArgs;
+ header->profCallbacks = compiler->info.compProfilerCallback;
+ header->editNcontinue = compiler->opts.compDbgEnC;
+ header->genericsContext = compiler->lvaReportParamTypeArg();
+ header->genericsContextIsMethodDesc =
+ header->genericsContext && (compiler->info.compMethodInfo->options & (CORINFO_GENERICS_CTXT_FROM_METHODDESC));
+ header->gsCookieOffset = INVALID_GS_COOKIE_OFFSET;
+ if (compiler->getNeedsGSSecurityCookie())
+ {
+ assert(compiler->lvaGSSecurityCookie != BAD_VAR_NUM);
+ int stkOffs = compiler->lvaTable[compiler->lvaGSSecurityCookie].lvStkOffs;
+ header->gsCookieOffset = compiler->isFramePointerUsed() ? -stkOffs : stkOffs;
+ assert(header->gsCookieOffset != INVALID_GS_COOKIE_OFFSET);
+ }
+
+ header->syncStartOffset = INVALID_SYNC_OFFSET;
+ header->syncEndOffset = INVALID_SYNC_OFFSET;
+ if (compiler->info.compFlags & CORINFO_FLG_SYNCH)
+ {
+ assert(compiler->syncStartEmitCookie != NULL);
+ header->syncStartOffset = compiler->getEmitter()->emitCodeOffset(compiler->syncStartEmitCookie, 0);
+ assert(header->syncStartOffset != INVALID_SYNC_OFFSET);
+
+ assert(compiler->syncEndEmitCookie != NULL);
+ header->syncEndOffset = compiler->getEmitter()->emitCodeOffset(compiler->syncEndEmitCookie, 0);
+ assert(header->syncEndOffset != INVALID_SYNC_OFFSET);
+
+ assert(header->syncStartOffset < header->syncEndOffset);
+ // synchronized methods can't have more than 1 epilog
+ assert(header->epilogCount <= 1);
+ }
+
+ assert((compiler->compArgSize & 0x3) == 0);
+
+ size_t argCount =
+ (compiler->compArgSize - (compiler->codeGen->intRegState.rsCalleeRegArgCount * sizeof(void*))) / sizeof(void*);
+ assert(argCount <= MAX_USHORT_SIZE_T);
+ header->argCount = static_cast<unsigned short>(argCount);
+
+ header->frameSize = compiler->compLclFrameSize / sizeof(int);
+ if (header->frameSize != (compiler->compLclFrameSize / sizeof(int)))
+ IMPL_LIMITATION("compLclFrameSize does not fit in InfoHdr::frameSize");
+
+ if (mask == 0)
+ {
+ gcCountForHeader((UNALIGNED unsigned int*)&header->untrackedCnt,
+ (UNALIGNED unsigned int*)&header->varPtrTableSize);
+ }
+
+ //
+ // If the high-order bit of headerEncoding is set
+ // then additional bytes will update the InfoHdr state
+ // until the fully state is encoded
+ //
+ InfoHdr state;
+ int more = 0;
+ BYTE headerEncoding = encodeHeaderFirst(*header, &state, &more, pCached);
+ ++size;
+ if (mask)
+ {
+#if REGEN_SHORTCUTS
+ regenLog(headerEncoding, header, &state);
+#endif
+ *dest++ = headerEncoding;
+
+ BYTE encoding = headerEncoding;
+ while (encoding & 0x80)
+ {
+ encoding = encodeHeaderNext(*header, &state);
+#if REGEN_SHORTCUTS
+ regenLog(headerEncoding, header, &state);
+#endif
+ *dest++ = encoding;
+ ++size;
+ }
+ }
+ else
+ {
+ size += more;
+ }
+
+ if (header->untrackedCnt > SET_UNTRACKED_MAX)
+ {
+ unsigned count = header->untrackedCnt;
+ unsigned sz = encodeUnsigned(mask ? dest : NULL, count);
+ size += sz;
+ dest += (sz & mask);
+ }
+
+ if (header->varPtrTableSize != 0)
+ {
+ unsigned count = header->varPtrTableSize;
+ unsigned sz = encodeUnsigned(mask ? dest : NULL, count);
+ size += sz;
+ dest += (sz & mask);
+ }
+
+ if (header->gsCookieOffset != INVALID_GS_COOKIE_OFFSET)
+ {
+ assert(mask == 0 || state.gsCookieOffset == HAS_GS_COOKIE_OFFSET);
+ unsigned offset = header->gsCookieOffset;
+ unsigned sz = encodeUnsigned(mask ? dest : NULL, offset);
+ size += sz;
+ dest += (sz & mask);
+ }
+
+ if (header->syncStartOffset != INVALID_SYNC_OFFSET)
+ {
+ assert(mask == 0 || state.syncStartOffset == HAS_SYNC_OFFSET);
+
+ {
+ unsigned offset = header->syncStartOffset;
+ unsigned sz = encodeUnsigned(mask ? dest : NULL, offset);
+ size += sz;
+ dest += (sz & mask);
+ }
+
+ {
+ unsigned offset = header->syncEndOffset;
+ unsigned sz = encodeUnsigned(mask ? dest : NULL, offset);
+ size += sz;
+ dest += (sz & mask);
+ }
+ }
+
+ if (header->epilogCount)
+ {
+ /* Generate table unless one epilog at the end of the method */
+
+ if (header->epilogAtEnd == 0 || header->epilogCount != 1)
+ {
+#if VERIFY_GC_TABLES
+ *castto(dest, unsigned short*)++ = 0xFACE;
+ size += sizeof(short);
+#endif
+
+ /* Simply write a sorted array of offsets using encodeUDelta */
+
+ gcEpilogTable = mask ? dest : NULL;
+ gcEpilogPrevOffset = 0;
+
+ size_t sz = compiler->getEmitter()->emitGenEpilogLst(gcRecordEpilog, this);
+
+ /* Add the size of the epilog table to the total size */
+
+ size += sz;
+ dest += (sz & mask);
+ }
+ }
+
+#if DISPLAY_SIZES
+
+ if (mask)
+ {
+ if (compiler->codeGen->genInterruptible)
+ {
+ genMethodICnt++;
+ }
+ else
+ {
+ genMethodNCnt++;
+ }
+ }
+
+#endif // DISPLAY_SIZES
+
+ return size;
+}
+
+/*****************************************************************************
+ *
+ * Return the size of the pointer tracking tables.
+ */
+
+size_t GCInfo::gcPtrTableSize(const InfoHdr& header, unsigned codeSize, size_t* pArgTabOffset)
+{
+ BYTE temp[16 + 1];
+#ifdef DEBUG
+ temp[16] = 0xAB; // Set some marker
+#endif
+
+ /* Compute the total size of the tables */
+
+ size_t size = gcMakeRegPtrTable(temp, 0, header, codeSize, pArgTabOffset);
+
+ assert(temp[16] == 0xAB); // Check that marker didnt get overwritten
+
+ return size;
+}
+
+/*****************************************************************************
+ * Encode the callee-saved registers into 3 bits.
+ */
+
+unsigned gceEncodeCalleeSavedRegs(unsigned regs)
+{
+ unsigned encodedRegs = 0;
+
+ if (regs & RBM_EBX)
+ encodedRegs |= 0x04;
+ if (regs & RBM_ESI)
+ encodedRegs |= 0x02;
+ if (regs & RBM_EDI)
+ encodedRegs |= 0x01;
+
+ return encodedRegs;
+}
+
+/*****************************************************************************
+ * Is the next entry for a byref pointer. If so, emit the prefix for the
+ * interruptible encoding. Check only for pushes and registers
+ */
+
+inline BYTE* gceByrefPrefixI(GCInfo::regPtrDsc* rpd, BYTE* dest)
+{
+ // For registers, we don't need a prefix if it is going dead.
+ assert(rpd->rpdArg || rpd->rpdCompiler.rpdDel == 0);
+
+ if (!rpd->rpdArg || rpd->rpdArgType == GCInfo::rpdARG_PUSH)
+ if (rpd->rpdGCtypeGet() == GCT_BYREF)
+ *dest++ = 0xBF;
+
+ return dest;
+}
+
+/*****************************************************************************/
+
+/* These functions are needed to work around a VC5.0 compiler bug */
+/* DO NOT REMOVE, unless you are sure that the free build works */
+static int zeroFN()
+{
+ return 0;
+}
+static int (*zeroFunc)() = zeroFN;
+
+/*****************************************************************************
+ * Modelling of the GC ptrs pushed on the stack
+ */
+
+typedef unsigned pasMaskType;
+#define BITS_IN_pasMask (BITS_IN_BYTE * sizeof(pasMaskType))
+#define HIGHEST_pasMask_BIT (((pasMaskType)0x1) << (BITS_IN_pasMask - 1))
+
+//-----------------------------------------------------------------------------
+
+class PendingArgsStack
+{
+public:
+ PendingArgsStack(unsigned maxDepth, Compiler* pComp);
+
+ void pasPush(GCtype gcType);
+ void pasPop(unsigned count);
+ void pasKill(unsigned gcCount);
+
+ unsigned pasCurDepth()
+ {
+ return pasDepth;
+ }
+ pasMaskType pasArgMask()
+ {
+ assert(pasDepth <= BITS_IN_pasMask);
+ return pasBottomMask;
+ }
+ pasMaskType pasByrefArgMask()
+ {
+ assert(pasDepth <= BITS_IN_pasMask);
+ return pasByrefBottomMask;
+ }
+ bool pasHasGCptrs();
+
+ // Use these in the case where there actually are more ptrs than pasArgMask
+ unsigned pasEnumGCoffsCount();
+#define pasENUM_START ((unsigned)-1)
+#define pasENUM_LAST ((unsigned)-2)
+#define pasENUM_END ((unsigned)-3)
+ unsigned pasEnumGCoffs(unsigned iter, unsigned* offs);
+
+protected:
+ unsigned pasMaxDepth;
+
+ unsigned pasDepth;
+
+ pasMaskType pasBottomMask; // The first 32 args
+ pasMaskType pasByrefBottomMask; // byref qualifier for pasBottomMask
+
+ BYTE* pasTopArray; // More than 32 args are represented here
+ unsigned pasPtrsInTopArray; // How many GCptrs here
+};
+
+//-----------------------------------------------------------------------------
+
+PendingArgsStack::PendingArgsStack(unsigned maxDepth, Compiler* pComp)
+ : pasMaxDepth(maxDepth)
+ , pasDepth(0)
+ , pasBottomMask(0)
+ , pasByrefBottomMask(0)
+ , pasTopArray(NULL)
+ , pasPtrsInTopArray(0)
+{
+ /* Do we need an array as well as the mask ? */
+
+ if (pasMaxDepth > BITS_IN_pasMask)
+ pasTopArray = (BYTE*)pComp->compGetMemA(pasMaxDepth - BITS_IN_pasMask);
+}
+
+//-----------------------------------------------------------------------------
+
+void PendingArgsStack::pasPush(GCtype gcType)
+{
+ assert(pasDepth < pasMaxDepth);
+
+ if (pasDepth < BITS_IN_pasMask)
+ {
+ /* Shift the mask */
+
+ pasBottomMask <<= 1;
+ pasByrefBottomMask <<= 1;
+
+ if (needsGC(gcType))
+ {
+ pasBottomMask |= 1;
+
+ if (gcType == GCT_BYREF)
+ pasByrefBottomMask |= 1;
+ }
+ }
+ else
+ {
+ /* Push on array */
+
+ pasTopArray[pasDepth - BITS_IN_pasMask] = (BYTE)gcType;
+
+ if (gcType)
+ pasPtrsInTopArray++;
+ }
+
+ pasDepth++;
+}
+
+//-----------------------------------------------------------------------------
+
+void PendingArgsStack::pasPop(unsigned count)
+{
+ assert(pasDepth >= count);
+
+ /* First pop from array (if applicable) */
+
+ for (/**/; (pasDepth > BITS_IN_pasMask) && count; pasDepth--, count--)
+ {
+ unsigned topIndex = pasDepth - BITS_IN_pasMask - 1;
+
+ GCtype topArg = (GCtype)pasTopArray[topIndex];
+
+ if (needsGC(topArg))
+ pasPtrsInTopArray--;
+ }
+ if (count == 0)
+ return;
+
+ /* Now un-shift the mask */
+
+ assert(pasPtrsInTopArray == 0);
+ assert(count <= BITS_IN_pasMask);
+
+ if (count == BITS_IN_pasMask) // (x>>32) is a nop on x86. So special-case it
+ {
+ pasBottomMask = pasByrefBottomMask = 0;
+ pasDepth = 0;
+ }
+ else
+ {
+ pasBottomMask >>= count;
+ pasByrefBottomMask >>= count;
+ pasDepth -= count;
+ }
+}
+
+//-----------------------------------------------------------------------------
+// Kill (but don't pop) the top 'gcCount' args
+
+void PendingArgsStack::pasKill(unsigned gcCount)
+{
+ assert(gcCount != 0);
+
+ /* First kill args in array (if any) */
+
+ for (unsigned curPos = pasDepth; (curPos > BITS_IN_pasMask) && gcCount; curPos--)
+ {
+ unsigned curIndex = curPos - BITS_IN_pasMask - 1;
+
+ GCtype curArg = (GCtype)pasTopArray[curIndex];
+
+ if (needsGC(curArg))
+ {
+ pasTopArray[curIndex] = GCT_NONE;
+ pasPtrsInTopArray--;
+ gcCount--;
+ }
+ }
+
+ /* Now kill bits from the mask */
+
+ assert(pasPtrsInTopArray == 0);
+ assert(gcCount <= BITS_IN_pasMask);
+
+ for (unsigned bitPos = 1; gcCount; bitPos <<= 1)
+ {
+ assert(pasBottomMask != 0);
+
+ if (pasBottomMask & bitPos)
+ {
+ pasBottomMask &= ~bitPos;
+ pasByrefBottomMask &= ~bitPos;
+ --gcCount;
+ }
+ else
+ {
+ assert(bitPos != HIGHEST_pasMask_BIT);
+ }
+ }
+}
+
+//-----------------------------------------------------------------------------
+// Used for the case where there are more than BITS_IN_pasMask args on stack,
+// but none are any pointers. May avoid reporting anything to GCinfo
+
+bool PendingArgsStack::pasHasGCptrs()
+{
+ if (pasDepth <= BITS_IN_pasMask)
+ return pasBottomMask != 0;
+ else
+ return pasBottomMask != 0 || pasPtrsInTopArray != 0;
+}
+
+//-----------------------------------------------------------------------------
+// Iterates over mask and array to return total count.
+// Use only when you are going to emit a table of the offsets
+
+unsigned PendingArgsStack::pasEnumGCoffsCount()
+{
+ /* Should only be used in the worst case, when just the mask can't be used */
+
+ assert(pasDepth > BITS_IN_pasMask && pasHasGCptrs());
+
+ /* Count number of set bits in mask */
+
+ unsigned count = 0;
+
+ for (pasMaskType mask = 0x1, i = 0; i < BITS_IN_pasMask; mask <<= 1, i++)
+ {
+ if (mask & pasBottomMask)
+ count++;
+ }
+
+ return count + pasPtrsInTopArray;
+}
+
+//-----------------------------------------------------------------------------
+// Initalize enumeration by passing in iter=pasENUM_START.
+// Continue by passing in the return value as the new value of iter
+// End of enumeration when pasENUM_END is returned
+// If return value != pasENUM_END, *offs is set to the offset for GCinfo
+
+unsigned PendingArgsStack::pasEnumGCoffs(unsigned iter, unsigned* offs)
+{
+ if (iter == pasENUM_LAST)
+ return pasENUM_END;
+
+ unsigned i = (iter == pasENUM_START) ? pasDepth : iter;
+
+ for (/**/; i > BITS_IN_pasMask; i--)
+ {
+ GCtype curArg = (GCtype)pasTopArray[i - BITS_IN_pasMask - 1];
+ if (needsGC(curArg))
+ {
+ unsigned offset;
+
+ offset = (pasDepth - i) * sizeof(void*);
+ if (curArg == GCT_BYREF)
+ offset |= byref_OFFSET_FLAG;
+
+ *offs = offset;
+ return i - 1;
+ }
+ }
+
+ if (!pasBottomMask)
+ return pasENUM_END;
+
+ // Have we already processed some of the bits in pasBottomMask ?
+
+ i = (iter == pasENUM_START || iter >= BITS_IN_pasMask) ? 0 // no
+ : iter; // yes
+
+ for (pasMaskType mask = 0x1 << i; mask; i++, mask <<= 1)
+ {
+ if (mask & pasBottomMask)
+ {
+ unsigned lvl = (pasDepth > BITS_IN_pasMask) ? (pasDepth - BITS_IN_pasMask) : 0; // How many in pasTopArray[]
+ lvl += i;
+
+ unsigned offset;
+ offset = lvl * sizeof(void*);
+ if (mask & pasByrefBottomMask)
+ offset |= byref_OFFSET_FLAG;
+
+ *offs = offset;
+
+ unsigned remMask = -int(mask << 1);
+ return ((pasBottomMask & remMask) ? (i + 1) : pasENUM_LAST);
+ }
+ }
+
+ assert(!"Shouldnt reach here");
+ return pasENUM_END;
+}
+
+/*****************************************************************************
+ *
+ * Generate the register pointer map, and return its total size in bytes. If
+ * 'mask' is 0, we don't actually store any data in 'dest' (except for one
+ * entry, which is never more than 10 bytes), so this can be used to merely
+ * compute the size of the table.
+ */
+
+#ifdef _PREFAST_
+#pragma warning(push)
+#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
+#endif
+size_t GCInfo::gcMakeRegPtrTable(BYTE* dest, int mask, const InfoHdr& header, unsigned codeSize, size_t* pArgTabOffset)
+{
+ unsigned count;
+
+ unsigned varNum;
+ LclVarDsc* varDsc;
+
+ unsigned pass;
+
+ size_t totalSize = 0;
+ unsigned lastOffset;
+
+ bool thisKeptAliveIsInUntracked = false;
+
+ /* The mask should be all 0's or all 1's */
+
+ assert(mask == 0 || mask == -1);
+
+ /* Start computing the total size of the table */
+
+ BOOL emitArgTabOffset = (header.varPtrTableSize != 0 || header.untrackedCnt > SET_UNTRACKED_MAX);
+ if (mask != 0 && emitArgTabOffset)
+ {
+ assert(*pArgTabOffset <= MAX_UNSIGNED_SIZE_T);
+ unsigned sz = encodeUnsigned(dest, static_cast<unsigned>(*pArgTabOffset));
+ dest += sz;
+ totalSize += sz;
+ }
+
+#if VERIFY_GC_TABLES
+ if (mask)
+ {
+ *(short*)dest = (short)0xBEEF;
+ dest += sizeof(short);
+ }
+ totalSize += sizeof(short);
+#endif
+
+ /**************************************************************************
+ *
+ * Untracked ptr variables
+ *
+ **************************************************************************
+ */
+
+ count = 0;
+ for (pass = 0; pass < 2; pass++)
+ {
+ /* If pass==0, generate the count
+ * If pass==1, write the table of untracked pointer variables.
+ */
+
+ int lastoffset = 0;
+ if (pass == 1)
+ {
+ assert(count == header.untrackedCnt);
+ if (header.untrackedCnt == 0)
+ break; // No entries, break exits the loop since pass==1
+ }
+
+ /* Count&Write untracked locals and non-enregistered args */
+
+ for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++)
+ {
+ if (compiler->lvaIsFieldOfDependentlyPromotedStruct(varDsc))
+ {
+ // Field local of a PROMOTION_TYPE_DEPENDENT struct must have been
+ // reported through its parent local
+ continue;
+ }
+
+ if (varTypeIsGC(varDsc->TypeGet()))
+ {
+ /* Do we have an argument or local variable? */
+ if (!varDsc->lvIsParam)
+ {
+ // If is is pinned, it must be an untracked local
+ assert(!varDsc->lvPinned || !varDsc->lvTracked);
+
+ if (varDsc->lvTracked || !varDsc->lvOnFrame)
+ continue;
+ }
+ else
+ {
+ /* Stack-passed arguments which are not enregistered
+ * are always reported in this "untracked stack
+ * pointers" section of the GC info even if lvTracked==true
+ */
+
+ /* Has this argument been enregistered? */
+#ifndef LEGACY_BACKEND
+ if (!varDsc->lvOnFrame)
+#else // LEGACY_BACKEND
+ if (varDsc->lvRegister)
+#endif // LEGACY_BACKEND
+ {
+ /* if a CEE_JMP has been used, then we need to report all the arguments
+ even if they are enregistered, since we will be using this value
+ in JMP call. Note that this is subtle as we require that
+ argument offsets are always fixed up properly even if lvRegister
+ is set */
+ if (!compiler->compJmpOpUsed)
+ continue;
+ }
+ else
+ {
+ if (!varDsc->lvOnFrame)
+ {
+ /* If this non-enregistered pointer arg is never
+ * used, we don't need to report it
+ */
+ assert(varDsc->lvRefCnt == 0); // This assert is currently a known issue for X86-RyuJit
+ continue;
+ }
+ else if (varDsc->lvIsRegArg && varDsc->lvTracked)
+ {
+ /* If this register-passed arg is tracked, then
+ * it has been allocated space near the other
+ * pointer variables and we have accurate life-
+ * time info. It will be reported with
+ * gcVarPtrList in the "tracked-pointer" section
+ */
+
+ continue;
+ }
+ }
+ }
+
+ if (compiler->lvaIsOriginalThisArg(varNum) && compiler->lvaKeepAliveAndReportThis())
+ {
+ // Encoding of untracked variables does not support reporting
+ // "this". So report it as a tracked variable with a liveness
+ // extending over the entire method.
+
+ thisKeptAliveIsInUntracked = true;
+ continue;
+ }
+
+ if (pass == 0)
+ count++;
+ else
+ {
+ int offset;
+ assert(pass == 1);
+
+ offset = varDsc->lvStkOffs;
+#if DOUBLE_ALIGN
+ // For genDoubleAlign(), locals are addressed relative to ESP and
+ // arguments are addressed relative to EBP.
+
+ if (compiler->genDoubleAlign() && varDsc->lvIsParam && !varDsc->lvIsRegArg)
+ offset += compiler->codeGen->genTotalFrameSize();
+#endif
+
+ // The lower bits of the offset encode properties of the stk ptr
+
+ assert(~OFFSET_MASK % sizeof(offset) == 0);
+
+ if (varDsc->TypeGet() == TYP_BYREF)
+ {
+ // Or in byref_OFFSET_FLAG for 'byref' pointer tracking
+ offset |= byref_OFFSET_FLAG;
+ }
+
+ if (varDsc->lvPinned)
+ {
+ // Or in pinned_OFFSET_FLAG for 'pinned' pointer tracking
+ offset |= pinned_OFFSET_FLAG;
+ }
+
+ int encodedoffset = lastoffset - offset;
+ lastoffset = offset;
+
+ if (mask == 0)
+ totalSize += encodeSigned(NULL, encodedoffset);
+ else
+ {
+ unsigned sz = encodeSigned(dest, encodedoffset);
+ dest += sz;
+ totalSize += sz;
+ }
+ }
+ }
+
+ // A struct will have gcSlots only if it is at least TARGET_POINTER_SIZE.
+ if (varDsc->lvType == TYP_STRUCT && varDsc->lvOnFrame && (varDsc->lvExactSize >= TARGET_POINTER_SIZE))
+ {
+ unsigned slots = compiler->lvaLclSize(varNum) / sizeof(void*);
+ BYTE* gcPtrs = compiler->lvaGetGcLayout(varNum);
+
+ // walk each member of the array
+ for (unsigned i = 0; i < slots; i++)
+ {
+ if (gcPtrs[i] == TYPE_GC_NONE) // skip non-gc slots
+ continue;
+
+ if (pass == 0)
+ count++;
+ else
+ {
+ assert(pass == 1);
+
+ unsigned offset = varDsc->lvStkOffs + i * sizeof(void*);
+#if DOUBLE_ALIGN
+ // For genDoubleAlign(), locals are addressed relative to ESP and
+ // arguments are addressed relative to EBP.
+
+ if (compiler->genDoubleAlign() && varDsc->lvIsParam && !varDsc->lvIsRegArg)
+ offset += compiler->codeGen->genTotalFrameSize();
+#endif
+ if (gcPtrs[i] == TYPE_GC_BYREF)
+ offset |= byref_OFFSET_FLAG; // indicate it is a byref GC pointer
+
+ int encodedoffset = lastoffset - offset;
+ lastoffset = offset;
+
+ if (mask == 0)
+ totalSize += encodeSigned(NULL, encodedoffset);
+ else
+ {
+ unsigned sz = encodeSigned(dest, encodedoffset);
+ dest += sz;
+ totalSize += sz;
+ }
+ }
+ }
+ }
+ }
+
+ /* Count&Write spill temps that hold pointers */
+
+ assert(compiler->tmpAllFree());
+ for (TempDsc* tempItem = compiler->tmpListBeg(); tempItem != nullptr; tempItem = compiler->tmpListNxt(tempItem))
+ {
+ if (varTypeIsGC(tempItem->tdTempType()))
+ {
+ if (pass == 0)
+ count++;
+ else
+ {
+ int offset;
+ assert(pass == 1);
+
+ offset = tempItem->tdTempOffs();
+
+ if (tempItem->tdTempType() == TYP_BYREF)
+ {
+ offset |= byref_OFFSET_FLAG;
+ }
+
+ int encodedoffset = lastoffset - offset;
+ lastoffset = offset;
+
+ if (mask == 0)
+ {
+ totalSize += encodeSigned(NULL, encodedoffset);
+ }
+ else
+ {
+ unsigned sz = encodeSigned(dest, encodedoffset);
+ dest += sz;
+ totalSize += sz;
+ }
+ }
+ }
+ }
+ }
+
+#if VERIFY_GC_TABLES
+ if (mask)
+ {
+ *(short*)dest = (short)0xCAFE;
+ dest += sizeof(short);
+ }
+ totalSize += sizeof(short);
+#endif
+
+ /**************************************************************************
+ *
+ * Generate the table of stack pointer variable lifetimes.
+ *
+ * In the first pass we'll count the lifetime entries and note
+ * whether there are any that don't fit in a small encoding. In
+ * the second pass we actually generate the table contents.
+ *
+ **************************************************************************
+ */
+
+ // First we check for the most common case - no lifetimes at all.
+
+ if (header.varPtrTableSize == 0)
+ goto DONE_VLT;
+
+ varPtrDsc* varTmp;
+ count = 0;
+
+ if (thisKeptAliveIsInUntracked)
+ {
+ count = 1;
+
+ // Encoding of untracked variables does not support reporting
+ // "this". So report it as a tracked variable with a liveness
+ // extending over the entire method.
+
+ assert(compiler->lvaTable[compiler->info.compThisArg].TypeGet() == TYP_REF);
+
+ unsigned varOffs = compiler->lvaTable[compiler->info.compThisArg].lvStkOffs;
+
+ /* For negative stack offsets we must reset the low bits,
+ * take abs and then set them back */
+
+ varOffs = abs(static_cast<int>(varOffs));
+ varOffs |= this_OFFSET_FLAG;
+
+ size_t sz = 0;
+ sz = encodeUnsigned(mask ? (dest + sz) : NULL, varOffs);
+ sz += encodeUDelta(mask ? (dest + sz) : NULL, 0, 0);
+ sz += encodeUDelta(mask ? (dest + sz) : NULL, codeSize, 0);
+
+ dest += (sz & mask);
+ totalSize += sz;
+ }
+
+ for (pass = 0; pass < 2; pass++)
+ {
+ /* If second pass, generate the count */
+
+ if (pass)
+ {
+ assert(header.varPtrTableSize > 0);
+ assert(header.varPtrTableSize == count);
+ }
+
+ /* We'll use a delta encoding for the lifetime offsets */
+
+ lastOffset = 0;
+
+ for (varTmp = gcVarPtrList; varTmp; varTmp = varTmp->vpdNext)
+ {
+ unsigned varOffs;
+ unsigned lowBits;
+
+ unsigned begOffs;
+ unsigned endOffs;
+
+ assert(~OFFSET_MASK % sizeof(void*) == 0);
+
+ /* Get hold of the variable's stack offset */
+
+ lowBits = varTmp->vpdVarNum & OFFSET_MASK;
+
+ /* For negative stack offsets we must reset the low bits,
+ * take abs and then set them back */
+
+ varOffs = abs(static_cast<int>(varTmp->vpdVarNum & ~OFFSET_MASK));
+ varOffs |= lowBits;
+
+ /* Compute the actual lifetime offsets */
+
+ begOffs = varTmp->vpdBegOfs;
+ endOffs = varTmp->vpdEndOfs;
+
+ /* Special case: skip any 0-length lifetimes */
+
+ if (endOffs == begOffs)
+ continue;
+
+ /* Are we counting or generating? */
+
+ if (!pass)
+ {
+ count++;
+ }
+ else
+ {
+ size_t sz = 0;
+ sz = encodeUnsigned(mask ? (dest + sz) : NULL, varOffs);
+ sz += encodeUDelta(mask ? (dest + sz) : NULL, begOffs, lastOffset);
+ sz += encodeUDelta(mask ? (dest + sz) : NULL, endOffs, begOffs);
+
+ dest += (sz & mask);
+ totalSize += sz;
+ }
+
+ /* The next entry will be relative to the one we just processed */
+
+ lastOffset = begOffs;
+ }
+ }
+
+DONE_VLT:
+
+ if (pArgTabOffset != NULL)
+ *pArgTabOffset = totalSize;
+
+#if VERIFY_GC_TABLES
+ if (mask)
+ {
+ *(short*)dest = (short)0xBABE;
+ dest += sizeof(short);
+ }
+ totalSize += sizeof(short);
+#endif
+
+ if (!mask && emitArgTabOffset)
+ {
+ assert(*pArgTabOffset <= MAX_UNSIGNED_SIZE_T);
+ totalSize += encodeUnsigned(NULL, static_cast<unsigned>(*pArgTabOffset));
+ }
+
+ /**************************************************************************
+ *
+ * Prepare to generate the pointer register/argument map
+ *
+ **************************************************************************
+ */
+
+ lastOffset = 0;
+
+ if (compiler->codeGen->genInterruptible)
+ {
+#ifdef _TARGET_X86_
+ assert(compiler->genFullPtrRegMap);
+
+ unsigned ptrRegs = 0;
+
+ regPtrDsc* genRegPtrTemp;
+
+ /* Walk the list of pointer register/argument entries */
+
+ for (genRegPtrTemp = gcRegPtrList; genRegPtrTemp; genRegPtrTemp = genRegPtrTemp->rpdNext)
+ {
+ BYTE* base = dest;
+
+ unsigned nextOffset;
+ DWORD codeDelta;
+
+ nextOffset = genRegPtrTemp->rpdOffs;
+
+ /*
+ Encoding table for methods that are fully interruptible
+
+ The encoding used is as follows:
+
+ ptr reg dead 00RRRDDD [RRR != 100]
+ ptr reg live 01RRRDDD [RRR != 100]
+
+ non-ptr arg push 10110DDD [SSS == 110]
+ ptr arg push 10SSSDDD [SSS != 110] && [SSS != 111]
+ ptr arg pop 11CCCDDD [CCC != 000] && [CCC != 110] && [CCC != 111]
+ little skip 11000DDD [CCC == 000]
+ bigger skip 11110BBB [CCC == 110]
+
+ The values used in the above encodings are as follows:
+
+ DDD code offset delta from previous entry (0-7)
+ BBB bigger delta 000=8,001=16,010=24,...,111=64
+ RRR register number (EAX=000,ECX=001,EDX=010,EBX=011,
+ EBP=101,ESI=110,EDI=111), ESP=100 is reserved
+ SSS argument offset from base of stack. This is
+ redundant for frameless methods as we can
+ infer it from the previous pushes+pops. However,
+ for EBP-methods, we only report GC pushes, and
+ so we need SSS
+ CCC argument count being popped (includes only ptrs for EBP methods)
+
+ The following are the 'large' versions:
+
+ large delta skip 10111000 [0xB8] , encodeUnsigned(delta)
+
+ large ptr arg push 11111000 [0xF8] , encodeUnsigned(pushCount)
+ large non-ptr arg push 11111001 [0xF9] , encodeUnsigned(pushCount)
+ large ptr arg pop 11111100 [0xFC] , encodeUnsigned(popCount)
+ large arg dead 11111101 [0xFD] , encodeUnsigned(popCount) for caller-pop args.
+ Any GC args go dead after the call,
+ but are still sitting on the stack
+
+ this pointer prefix 10111100 [0xBC] the next encoding is a ptr live
+ or a ptr arg push
+ and contains the this pointer
+
+ interior or by-ref 10111111 [0xBF] the next encoding is a ptr live
+ pointer prefix or a ptr arg push
+ and contains an interior
+ or by-ref pointer
+
+
+ The value 11111111 [0xFF] indicates the end of the table.
+ */
+
+ codeDelta = nextOffset - lastOffset;
+ assert((int)codeDelta >= 0);
+
+ // If the code delta is between 8 and (64+7),
+ // generate a 'bigger delta' encoding
+
+ if ((codeDelta >= 8) && (codeDelta <= (64 + 7)))
+ {
+ unsigned biggerDelta = ((codeDelta - 8) & 0x38) + 8;
+ *dest++ = 0xF0 | ((biggerDelta - 8) >> 3);
+ lastOffset += biggerDelta;
+ codeDelta &= 0x07;
+ }
+
+ // If the code delta is still bigger than 7,
+ // generate a 'large code delta' encoding
+
+ if (codeDelta > 7)
+ {
+ *dest++ = 0xB8;
+ dest += encodeUnsigned(dest, codeDelta);
+ codeDelta = 0;
+
+ /* Remember the new 'last' offset */
+
+ lastOffset = nextOffset;
+ }
+
+ /* Is this a pointer argument or register entry? */
+
+ if (genRegPtrTemp->rpdArg)
+ {
+ if (genRegPtrTemp->rpdArgTypeGet() == rpdARG_KILL)
+ {
+ if (codeDelta)
+ {
+ /*
+ Use the small encoding:
+ little delta skip 11000DDD [0xC0]
+ */
+
+ assert((codeDelta & 0x7) == codeDelta);
+ *dest++ = 0xC0 | (BYTE)codeDelta;
+
+ /* Remember the new 'last' offset */
+
+ lastOffset = nextOffset;
+ }
+
+ /* Caller-pop arguments are dead after call but are still
+ sitting on the stack */
+
+ *dest++ = 0xFD;
+ assert(genRegPtrTemp->rpdPtrArg != 0);
+ dest += encodeUnsigned(dest, genRegPtrTemp->rpdPtrArg);
+ }
+ else if (genRegPtrTemp->rpdPtrArg < 6 && genRegPtrTemp->rpdGCtypeGet())
+ {
+ /* Is the argument offset/count smaller than 6 ? */
+
+ dest = gceByrefPrefixI(genRegPtrTemp, dest);
+
+ if (genRegPtrTemp->rpdArgTypeGet() == rpdARG_PUSH || (genRegPtrTemp->rpdPtrArg != 0))
+ {
+ /*
+ Use the small encoding:
+
+ ptr arg push 10SSSDDD [SSS != 110] && [SSS != 111]
+ ptr arg pop 11CCCDDD [CCC != 110] && [CCC != 111]
+ */
+
+ bool isPop = genRegPtrTemp->rpdArgTypeGet() == rpdARG_POP;
+
+ *dest++ = 0x80 | (BYTE)codeDelta | genRegPtrTemp->rpdPtrArg << 3 | isPop << 6;
+
+ /* Remember the new 'last' offset */
+
+ lastOffset = nextOffset;
+ }
+ else
+ {
+ assert(!"Check this");
+ }
+ }
+ else if (genRegPtrTemp->rpdGCtypeGet() == GCT_NONE)
+ {
+ /*
+ Use the small encoding:
+` non-ptr arg push 10110DDD [0xB0] (push of sizeof(int))
+ */
+
+ assert((codeDelta & 0x7) == codeDelta);
+ *dest++ = 0xB0 | (BYTE)codeDelta;
+ assert(!compiler->isFramePointerUsed());
+
+ /* Remember the new 'last' offset */
+
+ lastOffset = nextOffset;
+ }
+ else
+ {
+ /* Will have to use large encoding;
+ * first do the code delta
+ */
+
+ if (codeDelta)
+ {
+ /*
+ Use the small encoding:
+ little delta skip 11000DDD [0xC0]
+ */
+
+ assert((codeDelta & 0x7) == codeDelta);
+ *dest++ = 0xC0 | (BYTE)codeDelta;
+ }
+
+ /*
+ Now append a large argument record:
+
+ large ptr arg push 11111000 [0xF8]
+ large ptr arg pop 11111100 [0xFC]
+ */
+
+ bool isPop = genRegPtrTemp->rpdArgTypeGet() == rpdARG_POP;
+
+ dest = gceByrefPrefixI(genRegPtrTemp, dest);
+
+ *dest++ = 0xF8 | (isPop << 2);
+ dest += encodeUnsigned(dest, genRegPtrTemp->rpdPtrArg);
+
+ /* Remember the new 'last' offset */
+
+ lastOffset = nextOffset;
+ }
+ }
+ else
+ {
+ unsigned regMask;
+
+ /* Record any registers that are becoming dead */
+
+ regMask = genRegPtrTemp->rpdCompiler.rpdDel & ptrRegs;
+
+ while (regMask) // EAX,ECX,EDX,EBX,---,EBP,ESI,EDI
+ {
+ unsigned tmpMask;
+ regNumber regNum;
+
+ /* Get hold of the next register bit */
+
+ tmpMask = genFindLowestReg(regMask);
+ assert(tmpMask);
+
+ /* Remember the new state of this register */
+
+ ptrRegs &= ~tmpMask;
+
+ /* Figure out which register the next bit corresponds to */
+
+ regNum = genRegNumFromMask(tmpMask);
+ assert(regNum <= 7);
+
+ /* Reserve ESP, regNum==4 for future use */
+
+ assert(regNum != 4);
+
+ /*
+ Generate a small encoding:
+
+ ptr reg dead 00RRRDDD
+ */
+
+ assert((codeDelta & 0x7) == codeDelta);
+ *dest++ = 0x00 | regNum << 3 | (BYTE)codeDelta;
+
+ /* Turn the bit we've just generated off and continue */
+
+ regMask -= tmpMask; // EAX,ECX,EDX,EBX,---,EBP,ESI,EDI
+
+ /* Remember the new 'last' offset */
+
+ lastOffset = nextOffset;
+
+ /* Any entries that follow will be at the same offset */
+
+ codeDelta = zeroFunc(); /* DO NOT REMOVE */
+ }
+
+ /* Record any registers that are becoming live */
+
+ regMask = genRegPtrTemp->rpdCompiler.rpdAdd & ~ptrRegs;
+
+ while (regMask) // EAX,ECX,EDX,EBX,---,EBP,ESI,EDI
+ {
+ unsigned tmpMask;
+ regNumber regNum;
+
+ /* Get hold of the next register bit */
+
+ tmpMask = genFindLowestReg(regMask);
+ assert(tmpMask);
+
+ /* Remember the new state of this register */
+
+ ptrRegs |= tmpMask;
+
+ /* Figure out which register the next bit corresponds to */
+
+ regNum = genRegNumFromMask(tmpMask);
+ assert(regNum <= 7);
+
+ /*
+ Generate a small encoding:
+
+ ptr reg live 01RRRDDD
+ */
+
+ dest = gceByrefPrefixI(genRegPtrTemp, dest);
+
+ if (!thisKeptAliveIsInUntracked && genRegPtrTemp->rpdIsThis)
+ {
+ // Mark with 'this' pointer prefix
+ *dest++ = 0xBC;
+ // Can only have one bit set in regMask
+ assert(regMask == tmpMask);
+ }
+
+ assert((codeDelta & 0x7) == codeDelta);
+ *dest++ = 0x40 | (regNum << 3) | (BYTE)codeDelta;
+
+ /* Turn the bit we've just generated off and continue */
+
+ regMask -= tmpMask; // EAX,ECX,EDX,EBX,---,EBP,ESI,EDI
+
+ /* Remember the new 'last' offset */
+
+ lastOffset = nextOffset;
+
+ /* Any entries that follow will be at the same offset */
+
+ codeDelta = zeroFunc(); /* DO NOT REMOVE */
+ }
+ }
+
+ /* Keep track of the total amount of generated stuff */
+
+ totalSize += dest - base;
+
+ /* Go back to the buffer start if we're not generating a table */
+
+ if (!mask)
+ dest = base;
+ }
+#endif // _TARGET_X86_
+
+ /* Terminate the table with 0xFF */
+
+ *dest = 0xFF;
+ dest -= mask;
+ totalSize++;
+ }
+ else if (compiler->isFramePointerUsed()) // genInterruptible is false
+ {
+#ifdef _TARGET_X86_
+ /*
+ Encoding table for methods with an EBP frame and
+ that are not fully interruptible
+
+ The encoding used is as follows:
+
+ this pointer encodings:
+
+ 01000000 this pointer in EBX
+ 00100000 this pointer in ESI
+ 00010000 this pointer in EDI
+
+ tiny encoding:
+
+ 0bsdDDDD
+ requires code delta > 0 & delta < 16 (4-bits)
+ requires pushed argmask == 0
+
+ where DDDD is code delta
+ b indicates that register EBX is a live pointer
+ s indicates that register ESI is a live pointer
+ d indicates that register EDI is a live pointer
+
+
+ small encoding:
+
+ 1DDDDDDD bsdAAAAA
+
+ requires code delta < 120 (7-bits)
+ requires pushed argmask < 64 (5-bits)
+
+ where DDDDDDD is code delta
+ AAAAA is the pushed args mask
+ b indicates that register EBX is a live pointer
+ s indicates that register ESI is a live pointer
+ d indicates that register EDI is a live pointer
+
+ medium encoding
+
+ 0xFD aaaaaaaa AAAAdddd bseDDDDD
+
+ requires code delta < 512 (9-bits)
+ requires pushed argmask < 2048 (12-bits)
+
+ where DDDDD is the upper 5-bits of the code delta
+ dddd is the low 4-bits of the code delta
+ AAAA is the upper 4-bits of the pushed arg mask
+ aaaaaaaa is the low 8-bits of the pushed arg mask
+ b indicates that register EBX is a live pointer
+ s indicates that register ESI is a live pointer
+ e indicates that register EDI is a live pointer
+
+ medium encoding with interior pointers
+
+ 0xF9 DDDDDDDD bsdAAAAAA iiiIIIII
+
+ requires code delta < 256 (8-bits)
+ requires pushed argmask < 64 (5-bits)
+
+ where DDDDDDD is the code delta
+ b indicates that register EBX is a live pointer
+ s indicates that register ESI is a live pointer
+ d indicates that register EDI is a live pointer
+ AAAAA is the pushed arg mask
+ iii indicates that EBX,EDI,ESI are interior pointers
+ IIIII indicates that bits in the arg mask are interior
+ pointers
+
+ large encoding
+
+ 0xFE [0BSD0bsd][32-bit code delta][32-bit argMask]
+
+ b indicates that register EBX is a live pointer
+ s indicates that register ESI is a live pointer
+ d indicates that register EDI is a live pointer
+ B indicates that register EBX is an interior pointer
+ S indicates that register ESI is an interior pointer
+ D indicates that register EDI is an interior pointer
+ requires pushed argmask < 32-bits
+
+ large encoding with interior pointers
+
+ 0xFA [0BSD0bsd][32-bit code delta][32-bit argMask][32-bit interior pointer mask]
+
+
+ b indicates that register EBX is a live pointer
+ s indicates that register ESI is a live pointer
+ d indicates that register EDI is a live pointer
+ B indicates that register EBX is an interior pointer
+ S indicates that register ESI is an interior pointer
+ D indicates that register EDI is an interior pointer
+ requires pushed argmask < 32-bits
+ requires pushed iArgmask < 32-bits
+
+
+ huge encoding This is the only encoding that supports
+ a pushed argmask which is greater than
+ 32-bits.
+
+ 0xFB [0BSD0bsd][32-bit code delta]
+ [32-bit table count][32-bit table size]
+ [pushed ptr offsets table...]
+
+ b indicates that register EBX is a live pointer
+ s indicates that register ESI is a live pointer
+ d indicates that register EDI is a live pointer
+ B indicates that register EBX is an interior pointer
+ S indicates that register ESI is an interior pointer
+ D indicates that register EDI is an interior pointer
+ the list count is the number of entries in the list
+ the list size gives the byte-length of the list
+ the offsets in the list are variable-length
+ */
+
+ /* If "this" is enregistered, note it. We do this explicitly here as
+ genFullPtrRegMap==false, and so we don't have any regPtrDsc's. */
+
+ if (compiler->lvaKeepAliveAndReportThis() && compiler->lvaTable[compiler->info.compThisArg].lvRegister)
+ {
+ unsigned thisRegMask = genRegMask(compiler->lvaTable[compiler->info.compThisArg].lvRegNum);
+ unsigned thisPtrRegEnc = gceEncodeCalleeSavedRegs(thisRegMask) << 4;
+
+ if (thisPtrRegEnc)
+ {
+ totalSize += 1;
+ if (mask)
+ *dest++ = thisPtrRegEnc;
+ }
+ }
+
+ CallDsc* call;
+
+ assert(compiler->genFullPtrRegMap == false);
+
+ /* Walk the list of pointer register/argument entries */
+
+ for (call = gcCallDescList; call; call = call->cdNext)
+ {
+ BYTE* base = dest;
+ unsigned nextOffset;
+
+ /* Figure out the code offset of this entry */
+
+ nextOffset = call->cdOffs;
+
+ /* Compute the distance from the previous call */
+
+ DWORD codeDelta = nextOffset - lastOffset;
+
+ assert((int)codeDelta >= 0);
+
+ /* Remember the new 'last' offset */
+
+ lastOffset = nextOffset;
+
+ /* Compute the register mask */
+
+ unsigned gcrefRegMask = 0;
+ unsigned byrefRegMask = 0;
+
+ gcrefRegMask |= gceEncodeCalleeSavedRegs(call->cdGCrefRegs);
+ byrefRegMask |= gceEncodeCalleeSavedRegs(call->cdByrefRegs);
+
+ assert((gcrefRegMask & byrefRegMask) == 0);
+
+ unsigned regMask = gcrefRegMask | byrefRegMask;
+
+ bool byref = (byrefRegMask | call->u1.cdByrefArgMask) != 0;
+
+ /* Check for the really large argument offset case */
+ /* The very rare Huge encodings */
+
+ if (call->cdArgCnt)
+ {
+ unsigned argNum;
+ DWORD argCnt = call->cdArgCnt;
+ DWORD argBytes = 0;
+ BYTE* pArgBytes = DUMMY_INIT(NULL);
+
+ if (mask != 0)
+ {
+ *dest++ = 0xFB;
+ *dest++ = (byrefRegMask << 4) | regMask;
+ *(DWORD*)dest = codeDelta;
+ dest += sizeof(DWORD);
+ *(DWORD*)dest = argCnt;
+ dest += sizeof(DWORD);
+ // skip the byte-size for now. Just note where it will go
+ pArgBytes = dest;
+ dest += sizeof(DWORD);
+ }
+
+ for (argNum = 0; argNum < argCnt; argNum++)
+ {
+ unsigned eltSize;
+ eltSize = encodeUnsigned(dest, call->cdArgTable[argNum]);
+ argBytes += eltSize;
+ if (mask)
+ dest += eltSize;
+ }
+
+ if (mask == 0)
+ {
+ dest = base + 1 + 1 + 3 * sizeof(DWORD) + argBytes;
+ }
+ else
+ {
+ assert(dest == pArgBytes + sizeof(argBytes) + argBytes);
+ *(DWORD*)pArgBytes = argBytes;
+ }
+ }
+
+ /* Check if we can use a tiny encoding */
+ else if ((codeDelta < 16) && (codeDelta != 0) && (call->u1.cdArgMask == 0) && !byref)
+ {
+ *dest++ = (regMask << 4) | (BYTE)codeDelta;
+ }
+
+ /* Check if we can use the small encoding */
+ else if ((codeDelta < 0x79) && (call->u1.cdArgMask <= 0x1F) && !byref)
+ {
+ *dest++ = 0x80 | (BYTE)codeDelta;
+ *dest++ = call->u1.cdArgMask | (regMask << 5);
+ }
+
+ /* Check if we can use the medium encoding */
+ else if (codeDelta <= 0x01FF && call->u1.cdArgMask <= 0x0FFF && !byref)
+ {
+ *dest++ = 0xFD;
+ *dest++ = call->u1.cdArgMask;
+ *dest++ = ((call->u1.cdArgMask >> 4) & 0xF0) | ((BYTE)codeDelta & 0x0F);
+ *dest++ = (regMask << 5) | (BYTE)((codeDelta >> 4) & 0x1F);
+ }
+
+ /* Check if we can use the medium encoding with byrefs */
+ else if (codeDelta <= 0x0FF && call->u1.cdArgMask <= 0x01F)
+ {
+ *dest++ = 0xF9;
+ *dest++ = (BYTE)codeDelta;
+ *dest++ = (regMask << 5) | call->u1.cdArgMask;
+ *dest++ = (byrefRegMask << 5) | call->u1.cdByrefArgMask;
+ }
+
+ /* We'll use the large encoding */
+ else if (!byref)
+ {
+ *dest++ = 0xFE;
+ *dest++ = (byrefRegMask << 4) | regMask;
+ *(DWORD*)dest = codeDelta;
+ dest += sizeof(DWORD);
+ *(DWORD*)dest = call->u1.cdArgMask;
+ dest += sizeof(DWORD);
+ }
+
+ /* We'll use the large encoding with byrefs */
+ else
+ {
+ *dest++ = 0xFA;
+ *dest++ = (byrefRegMask << 4) | regMask;
+ *(DWORD*)dest = codeDelta;
+ dest += sizeof(DWORD);
+ *(DWORD*)dest = call->u1.cdArgMask;
+ dest += sizeof(DWORD);
+ *(DWORD*)dest = call->u1.cdByrefArgMask;
+ dest += sizeof(DWORD);
+ }
+
+ /* Keep track of the total amount of generated stuff */
+
+ totalSize += dest - base;
+
+ /* Go back to the buffer start if we're not generating a table */
+
+ if (!mask)
+ dest = base;
+ }
+#endif // _TARGET_X86_
+
+ /* Terminate the table with 0xFF */
+
+ *dest = 0xFF;
+ dest -= mask;
+ totalSize++;
+ }
+ else // genInterruptible is false and we have an EBP-less frame
+ {
+ assert(compiler->genFullPtrRegMap);
+
+#ifdef _TARGET_X86_
+
+ regPtrDsc* genRegPtrTemp;
+ regNumber thisRegNum = regNumber(0);
+ PendingArgsStack pasStk(compiler->getEmitter()->emitMaxStackDepth, compiler);
+
+ /* Walk the list of pointer register/argument entries */
+
+ for (genRegPtrTemp = gcRegPtrList; genRegPtrTemp; genRegPtrTemp = genRegPtrTemp->rpdNext)
+ {
+
+ /*
+ * Encoding table for methods without an EBP frame and
+ * that are not fully interruptible
+ *
+ * The encoding used is as follows:
+ *
+ * push 000DDDDD ESP push one item with 5-bit delta
+ * push 00100000 [pushCount] ESP push multiple items
+ * reserved 0010xxxx xxxx != 0000
+ * reserved 0011xxxx
+ * skip 01000000 [Delta] Skip Delta, arbitrary sized delta
+ * skip 0100DDDD Skip small Delta, for call (DDDD != 0)
+ * pop 01CCDDDD ESP pop CC items with 4-bit delta (CC != 00)
+ * call 1PPPPPPP Call Pattern, P=[0..79]
+ * call 1101pbsd DDCCCMMM Call RegMask=pbsd,ArgCnt=CCC,
+ * ArgMask=MMM Delta=commonDelta[DD]
+ * call 1110pbsd [ArgCnt] [ArgMask] Call ArgCnt,RegMask=pbsd,ArgMask
+ * call 11111000 [PBSDpbsd][32-bit delta][32-bit ArgCnt]
+ * [32-bit PndCnt][32-bit PndSize][PndOffs...]
+ * iptr 11110000 [IPtrMask] Arbitrary Interior Pointer Mask
+ * thisptr 111101RR This pointer is in Register RR
+ * 00=EDI,01=ESI,10=EBX,11=EBP
+ * reserved 111100xx xx != 00
+ * reserved 111110xx xx != 00
+ * reserved 11111xxx xxx != 000 && xxx != 111(EOT)
+ *
+ * The value 11111111 [0xFF] indicates the end of the table. (EOT)
+ *
+ * An offset (at which stack-walking is performed) without an explicit encoding
+ * is assumed to be a trivial call-site (no GC registers, stack empty before and
+ * after) to avoid having to encode all trivial calls.
+ *
+ * Note on the encoding used for interior pointers
+ *
+ * The iptr encoding must immediately precede a call encoding. It is used
+ * to transform a normal GC pointer addresses into an interior pointers for
+ * GC purposes. The mask supplied to the iptr encoding is read from the
+ * least signicant bit to the most signicant bit. (i.e the lowest bit is
+ * read first)
+ *
+ * p indicates that register EBP is a live pointer
+ * b indicates that register EBX is a live pointer
+ * s indicates that register ESI is a live pointer
+ * d indicates that register EDI is a live pointer
+ * P indicates that register EBP is an interior pointer
+ * B indicates that register EBX is an interior pointer
+ * S indicates that register ESI is an interior pointer
+ * D indicates that register EDI is an interior pointer
+ *
+ * As an example the following sequence indicates that EDI.ESI and the
+ * second pushed pointer in ArgMask are really interior pointers. The
+ * pointer in ESI in a normal pointer:
+ *
+ * iptr 11110000 00010011 => read Interior Ptr, Interior Ptr,
+ * Normal Ptr, Normal Ptr, Interior Ptr
+ *
+ * call 11010011 DDCCC011 RRRR=1011 => read EDI is a GC-pointer,
+ * ESI is a GC-pointer.
+ * EBP is a GC-pointer
+ * MMM=0011 => read two GC-pointers arguments
+ * on the stack (nested call)
+ *
+ * Since the call instruction mentions 5 GC-pointers we list them in
+ * the required order: EDI, ESI, EBP, 1st-pushed pointer, 2nd-pushed pointer
+ *
+ * And we apply the Interior Pointer mask mmmm=10011 to the five GC-pointers
+ * we learn that EDI and ESI are interior GC-pointers and that
+ * the second push arg is an interior GC-pointer.
+ */
+
+ BYTE* base = dest;
+
+ bool usePopEncoding;
+ unsigned regMask;
+ unsigned argMask;
+ unsigned byrefRegMask;
+ unsigned byrefArgMask;
+ DWORD callArgCnt;
+
+ unsigned nextOffset;
+ DWORD codeDelta;
+
+ nextOffset = genRegPtrTemp->rpdOffs;
+
+ /* Compute the distance from the previous call */
+
+ codeDelta = nextOffset - lastOffset;
+ assert((int)codeDelta >= 0);
+
+#if REGEN_CALLPAT
+ // Must initialize this flag to true when REGEN_CALLPAT is on
+ usePopEncoding = true;
+ unsigned origCodeDelta = codeDelta;
+#endif
+
+ if (!thisKeptAliveIsInUntracked && genRegPtrTemp->rpdIsThis)
+ {
+ unsigned tmpMask = genRegPtrTemp->rpdCompiler.rpdAdd;
+
+ /* tmpMask must have exactly one bit set */
+
+ assert(tmpMask && ((tmpMask & (tmpMask - 1)) == 0));
+
+ thisRegNum = genRegNumFromMask(tmpMask);
+ switch (thisRegNum)
+ {
+ case 0: // EAX
+ case 1: // ECX
+ case 2: // EDX
+ case 4: // ESP
+ break;
+ case 7: // EDI
+ *dest++ = 0xF4; /* 11110100 This pointer is in EDI */
+ break;
+ case 6: // ESI
+ *dest++ = 0xF5; /* 11110100 This pointer is in ESI */
+ break;
+ case 3: // EBX
+ *dest++ = 0xF6; /* 11110100 This pointer is in EBX */
+ break;
+ case 5: // EBP
+ *dest++ = 0xF7; /* 11110100 This pointer is in EBP */
+ break;
+ default:
+ break;
+ }
+ }
+
+ /* Is this a stack pointer change or call? */
+
+ if (genRegPtrTemp->rpdArg)
+ {
+ if (genRegPtrTemp->rpdArgTypeGet() == rpdARG_KILL)
+ {
+ // kill 'rpdPtrArg' number of pointer variables in pasStk
+ pasStk.pasKill(genRegPtrTemp->rpdPtrArg);
+ }
+ /* Is this a call site? */
+ else if (genRegPtrTemp->rpdCall)
+ {
+ /* This is a true call site */
+
+ /* Remember the new 'last' offset */
+
+ lastOffset = nextOffset;
+
+ callArgCnt = genRegPtrTemp->rpdPtrArg;
+
+ unsigned gcrefRegMask = genRegPtrTemp->rpdCallGCrefRegs;
+
+ byrefRegMask = genRegPtrTemp->rpdCallByrefRegs;
+
+ assert((gcrefRegMask & byrefRegMask) == 0);
+
+ regMask = gcrefRegMask | byrefRegMask;
+
+ /* adjust argMask for this call-site */
+ pasStk.pasPop(callArgCnt);
+
+ /* Do we have to use the fat encoding */
+
+ if (pasStk.pasCurDepth() > BITS_IN_pasMask && pasStk.pasHasGCptrs())
+ {
+ /* use fat encoding:
+ * 11111000 [PBSDpbsd][32-bit delta][32-bit ArgCnt]
+ * [32-bit PndCnt][32-bit PndSize][PndOffs...]
+ */
+
+ DWORD pndCount = pasStk.pasEnumGCoffsCount();
+ DWORD pndSize = 0;
+ BYTE* pPndSize = DUMMY_INIT(NULL);
+
+ if (mask)
+ {
+ *dest++ = 0xF8;
+ *dest++ = (byrefRegMask << 4) | regMask;
+ *(DWORD*)dest = codeDelta;
+ dest += sizeof(DWORD);
+ *(DWORD*)dest = callArgCnt;
+ dest += sizeof(DWORD);
+ *(DWORD*)dest = pndCount;
+ dest += sizeof(DWORD);
+ pPndSize = dest;
+ dest += sizeof(DWORD); // Leave space for pndSize
+ }
+
+ unsigned offs, iter;
+
+ for (iter = pasStk.pasEnumGCoffs(pasENUM_START, &offs); pndCount;
+ iter = pasStk.pasEnumGCoffs(iter, &offs), pndCount--)
+ {
+ unsigned eltSize = encodeUnsigned(dest, offs);
+
+ pndSize += eltSize;
+ if (mask)
+ dest += eltSize;
+ }
+ assert(iter == pasENUM_END);
+
+ if (mask == 0)
+ {
+ dest = base + 2 + 4 * sizeof(DWORD) + pndSize;
+ }
+ else
+ {
+ assert(pPndSize + sizeof(pndSize) + pndSize == dest);
+ *(DWORD*)pPndSize = pndSize;
+ }
+
+ goto NEXT_RPD;
+ }
+
+ argMask = byrefArgMask = 0;
+
+ if (pasStk.pasHasGCptrs())
+ {
+ assert(pasStk.pasCurDepth() <= BITS_IN_pasMask);
+
+ argMask = pasStk.pasArgMask();
+ byrefArgMask = pasStk.pasByrefArgMask();
+ }
+
+ /* Shouldn't be reporting trivial call-sites */
+
+ assert(regMask || argMask || callArgCnt || pasStk.pasCurDepth());
+
+// Emit IPtrMask if needed
+
+#define CHK_NON_INTRPT_ESP_IPtrMask \
+ \
+ if (byrefRegMask || byrefArgMask) \
+ { \
+ *dest++ = 0xF0; \
+ unsigned imask = (byrefArgMask << 4) | byrefRegMask; \
+ dest += encodeUnsigned(dest, imask); \
+ }
+
+ /* When usePopEncoding is true:
+ * this is not an interesting call site
+ * because nothing is live here.
+ */
+ usePopEncoding = ((callArgCnt < 4) && (regMask == 0) && (argMask == 0));
+
+ if (!usePopEncoding)
+ {
+ int pattern = lookupCallPattern(callArgCnt, regMask, argMask, codeDelta);
+ if (pattern != -1)
+ {
+ if (pattern > 0xff)
+ {
+ codeDelta = pattern >> 8;
+ pattern &= 0xff;
+ if (codeDelta >= 16)
+ {
+ /* use encoding: */
+ /* skip 01000000 [Delta] */
+ *dest++ = 0x40;
+ dest += encodeUnsigned(dest, codeDelta);
+ codeDelta = 0;
+ }
+ else
+ {
+ /* use encoding: */
+ /* skip 0100DDDD small delta=DDDD */
+ *dest++ = 0x40 | (BYTE)codeDelta;
+ }
+ }
+
+ // Emit IPtrMask if needed
+ CHK_NON_INTRPT_ESP_IPtrMask;
+
+ assert((pattern >= 0) && (pattern < 80));
+ *dest++ = 0x80 | pattern;
+ goto NEXT_RPD;
+ }
+
+ /* See if we can use 2nd call encoding
+ * 1101RRRR DDCCCMMM encoding */
+
+ if ((callArgCnt <= 7) && (argMask <= 7))
+ {
+ unsigned inx; // callCommonDelta[] index
+ unsigned maxCommonDelta = callCommonDelta[3];
+
+ if (codeDelta > maxCommonDelta)
+ {
+ if (codeDelta > maxCommonDelta + 15)
+ {
+ /* use encoding: */
+ /* skip 01000000 [Delta] */
+ *dest++ = 0x40;
+ dest += encodeUnsigned(dest, codeDelta - maxCommonDelta);
+ }
+ else
+ {
+ /* use encoding: */
+ /* skip 0100DDDD small delta=DDDD */
+ *dest++ = 0x40 | (BYTE)(codeDelta - maxCommonDelta);
+ }
+
+ codeDelta = maxCommonDelta;
+ inx = 3;
+ goto EMIT_2ND_CALL_ENCODING;
+ }
+
+ for (inx = 0; inx < 4; inx++)
+ {
+ if (codeDelta == callCommonDelta[inx])
+ {
+ EMIT_2ND_CALL_ENCODING:
+ // Emit IPtrMask if needed
+ CHK_NON_INTRPT_ESP_IPtrMask;
+
+ *dest++ = 0xD0 | regMask;
+ *dest++ = (inx << 6) | (callArgCnt << 3) | argMask;
+ goto NEXT_RPD;
+ }
+ }
+
+ unsigned minCommonDelta = callCommonDelta[0];
+
+ if ((codeDelta > minCommonDelta) && (codeDelta < maxCommonDelta))
+ {
+ assert((minCommonDelta + 16) > maxCommonDelta);
+ /* use encoding: */
+ /* skip 0100DDDD small delta=DDDD */
+ *dest++ = 0x40 | (BYTE)(codeDelta - minCommonDelta);
+
+ codeDelta = minCommonDelta;
+ inx = 0;
+ goto EMIT_2ND_CALL_ENCODING;
+ }
+ }
+ }
+
+ if (codeDelta >= 16)
+ {
+ unsigned i = (usePopEncoding ? 15 : 0);
+ /* use encoding: */
+ /* skip 01000000 [Delta] arbitrary sized delta */
+ *dest++ = 0x40;
+ dest += encodeUnsigned(dest, codeDelta - i);
+ codeDelta = i;
+ }
+
+ if ((codeDelta > 0) || usePopEncoding)
+ {
+ if (usePopEncoding)
+ {
+ /* use encoding: */
+ /* pop 01CCDDDD ESP pop CC items, 4-bit delta */
+ if (callArgCnt || codeDelta)
+ *dest++ = (BYTE)(0x40 | (callArgCnt << 4) | codeDelta);
+ goto NEXT_RPD;
+ }
+ else
+ {
+ /* use encoding: */
+ /* skip 0100DDDD small delta=DDDD */
+ *dest++ = 0x40 | (BYTE)codeDelta;
+ }
+ }
+
+ // Emit IPtrMask if needed
+ CHK_NON_INTRPT_ESP_IPtrMask;
+
+ /* use encoding: */
+ /* call 1110RRRR [ArgCnt] [ArgMask] */
+
+ *dest++ = 0xE0 | regMask;
+ dest += encodeUnsigned(dest, callArgCnt);
+
+ dest += encodeUnsigned(dest, argMask);
+ }
+ else
+ {
+ /* This is a push or a pop site */
+
+ /* Remember the new 'last' offset */
+
+ lastOffset = nextOffset;
+
+ if (genRegPtrTemp->rpdArgTypeGet() == rpdARG_POP)
+ {
+ /* This must be a gcArgPopSingle */
+
+ assert(genRegPtrTemp->rpdPtrArg == 1);
+
+ if (codeDelta >= 16)
+ {
+ /* use encoding: */
+ /* skip 01000000 [Delta] */
+ *dest++ = 0x40;
+ dest += encodeUnsigned(dest, codeDelta - 15);
+ codeDelta = 15;
+ }
+
+ /* use encoding: */
+ /* pop1 0101DDDD ESP pop one item, 4-bit delta */
+
+ *dest++ = 0x50 | (BYTE)codeDelta;
+
+ /* adjust argMask for this pop */
+ pasStk.pasPop(1);
+ }
+ else
+ {
+ /* This is a push */
+
+ if (codeDelta >= 32)
+ {
+ /* use encoding: */
+ /* skip 01000000 [Delta] */
+ *dest++ = 0x40;
+ dest += encodeUnsigned(dest, codeDelta - 31);
+ codeDelta = 31;
+ }
+
+ assert(codeDelta < 32);
+
+ /* use encoding: */
+ /* push 000DDDDD ESP push one item, 5-bit delta */
+
+ *dest++ = (BYTE)codeDelta;
+
+ /* adjust argMask for this push */
+ pasStk.pasPush(genRegPtrTemp->rpdGCtypeGet());
+ }
+ }
+ }
+
+ /* We ignore the register live/dead information, since the
+ * rpdCallRegMask contains all the liveness information
+ * that we need
+ */
+ NEXT_RPD:
+
+ totalSize += dest - base;
+
+ /* Go back to the buffer start if we're not generating a table */
+
+ if (!mask)
+ dest = base;
+
+#if REGEN_CALLPAT
+ if ((mask == -1) && (usePopEncoding == false) && ((dest - base) > 0))
+ regenLog(origCodeDelta, argMask, regMask, callArgCnt, byrefArgMask, byrefRegMask, base, (dest - base));
+#endif
+ }
+
+ /* Verify that we pop every arg that was pushed and that argMask is 0 */
+
+ assert(pasStk.pasCurDepth() == 0);
+
+#endif // _TARGET_X86_
+
+ /* Terminate the table with 0xFF */
+
+ *dest = 0xFF;
+ dest -= mask;
+ totalSize++;
+ }
+
+#if VERIFY_GC_TABLES
+ if (mask)
+ {
+ *(short*)dest = (short)0xBEEB;
+ dest += sizeof(short);
+ }
+ totalSize += sizeof(short);
+#endif
+
+#if MEASURE_PTRTAB_SIZE
+
+ if (mask)
+ s_gcTotalPtrTabSize += totalSize;
+
+#endif
+
+ return totalSize;
+}
+#ifdef _PREFAST_
+#pragma warning(pop)
+#endif
+
+/*****************************************************************************/
+#if DUMP_GC_TABLES
+/*****************************************************************************
+ *
+ * Dump the contents of a GC pointer table.
+ */
+
+#include "gcdump.h"
+
+#if VERIFY_GC_TABLES
+const bool verifyGCTables = true;
+#else
+const bool verifyGCTables = false;
+#endif
+
+/*****************************************************************************
+ *
+ * Dump the info block header.
+ */
+
+unsigned GCInfo::gcInfoBlockHdrDump(const BYTE* table, InfoHdr* header, unsigned* methodSize)
+{
+ GCDump gcDump(GCINFO_VERSION);
+
+ gcDump.gcPrintf = gcDump_logf; // use my printf (which logs to VM)
+ printf("Method info block:\n");
+
+ return gcDump.DumpInfoHdr(table, header, methodSize, verifyGCTables);
+}
+
+/*****************************************************************************/
+
+unsigned GCInfo::gcDumpPtrTable(const BYTE* table, const InfoHdr& header, unsigned methodSize)
+{
+ printf("Pointer table:\n");
+
+ GCDump gcDump(GCINFO_VERSION);
+ gcDump.gcPrintf = gcDump_logf; // use my printf (which logs to VM)
+
+ return gcDump.DumpGCTable(table, header, methodSize, verifyGCTables);
+}
+
+/*****************************************************************************
+ *
+ * Find all the live pointers in a stack frame.
+ */
+
+void GCInfo::gcFindPtrsInFrame(const void* infoBlock, const void* codeBlock, unsigned offs)
+{
+ GCDump gcDump(GCINFO_VERSION);
+ gcDump.gcPrintf = gcDump_logf; // use my printf (which logs to VM)
+
+ gcDump.DumpPtrsInFrame((const BYTE*)infoBlock, (const BYTE*)codeBlock, offs, verifyGCTables);
+}
+
+#endif // DUMP_GC_TABLES
+
+#else // !JIT32_GCENCODER
+
+#include "gcinfoencoder.h"
+#include "simplerhash.h"
+
+// Do explicit instantiation.
+template class SimplerHashTable<RegSlotIdKey, RegSlotIdKey, GcSlotId, JitSimplerHashBehavior>;
+template class SimplerHashTable<StackSlotIdKey, StackSlotIdKey, GcSlotId, JitSimplerHashBehavior>;
+
+#ifdef DEBUG
+
+void GCInfo::gcDumpVarPtrDsc(varPtrDsc* desc)
+{
+ const int offs = (desc->vpdVarNum & ~OFFSET_MASK);
+ const GCtype gcType = (desc->vpdVarNum & byref_OFFSET_FLAG) ? GCT_BYREF : GCT_GCREF;
+ const bool isPin = (desc->vpdVarNum & pinned_OFFSET_FLAG) != 0;
+
+ printf("[%08X] %s%s var at [%s", dspPtr(desc), GCtypeStr(gcType), isPin ? "pinned-ptr" : "",
+ compiler->isFramePointerUsed() ? STR_FPBASE : STR_SPBASE);
+
+ if (offs < 0)
+ {
+ printf("-%02XH", -offs);
+ }
+ else if (offs > 0)
+ {
+ printf("+%02XH", +offs);
+ }
+
+ printf("] live from %04X to %04X\n", desc->vpdBegOfs, desc->vpdEndOfs);
+}
+
+static const char* const GcSlotFlagsNames[] = {"",
+ "(byref) ",
+ "(pinned) ",
+ "(byref, pinned) ",
+ "(untracked) ",
+ "(byref, untracked) ",
+ "(pinned, untracked) ",
+ "(byref, pinned, untracked) "};
+
+// I'm making a local wrapper class for GcInfoEncoder so that can add logging of my own (DLD).
+class GcInfoEncoderWithLogging
+{
+ GcInfoEncoder* m_gcInfoEncoder;
+ bool m_doLogging;
+
+public:
+ GcInfoEncoderWithLogging(GcInfoEncoder* gcInfoEncoder, bool verbose)
+ : m_gcInfoEncoder(gcInfoEncoder), m_doLogging(verbose || JitConfig.JitGCInfoLogging() != 0)
+ {
+ }
+
+ GcSlotId GetStackSlotId(INT32 spOffset, GcSlotFlags flags, GcStackSlotBase spBase = GC_CALLER_SP_REL)
+ {
+ GcSlotId newSlotId = m_gcInfoEncoder->GetStackSlotId(spOffset, flags, spBase);
+ if (m_doLogging)
+ {
+ printf("Stack slot id for offset %d (0x%x) (%s) %s= %d.\n", spOffset, spOffset,
+ GcStackSlotBaseNames[spBase], GcSlotFlagsNames[flags & 7], newSlotId);
+ }
+ return newSlotId;
+ }
+
+ GcSlotId GetRegisterSlotId(UINT32 regNum, GcSlotFlags flags)
+ {
+ GcSlotId newSlotId = m_gcInfoEncoder->GetRegisterSlotId(regNum, flags);
+ if (m_doLogging)
+ {
+ printf("Register slot id for reg %s %s= %d.\n", getRegName(regNum), GcSlotFlagsNames[flags & 7], newSlotId);
+ }
+ return newSlotId;
+ }
+
+ void SetSlotState(UINT32 instructionOffset, GcSlotId slotId, GcSlotState slotState)
+ {
+ m_gcInfoEncoder->SetSlotState(instructionOffset, slotId, slotState);
+ if (m_doLogging)
+ {
+ printf("Set state of slot %d at instr offset 0x%x to %s.\n", slotId, instructionOffset,
+ (slotState == GC_SLOT_LIVE ? "Live" : "Dead"));
+ }
+ }
+
+ void DefineCallSites(UINT32* pCallSites, BYTE* pCallSiteSizes, UINT32 numCallSites)
+ {
+ m_gcInfoEncoder->DefineCallSites(pCallSites, pCallSiteSizes, numCallSites);
+ if (m_doLogging)
+ {
+ printf("Defining %d call sites:\n", numCallSites);
+ for (UINT32 k = 0; k < numCallSites; k++)
+ {
+ printf(" Offset 0x%x, size %d.\n", pCallSites[k], pCallSiteSizes[k]);
+ }
+ }
+ }
+
+ void DefineInterruptibleRange(UINT32 startInstructionOffset, UINT32 length)
+ {
+ m_gcInfoEncoder->DefineInterruptibleRange(startInstructionOffset, length);
+ if (m_doLogging)
+ {
+ printf("Defining interruptible range: [0x%x, 0x%x).\n", startInstructionOffset,
+ startInstructionOffset + length);
+ }
+ }
+
+ void SetCodeLength(UINT32 length)
+ {
+ m_gcInfoEncoder->SetCodeLength(length);
+ if (m_doLogging)
+ {
+ printf("Set code length to %d.\n", length);
+ }
+ }
+
+ void SetReturnKind(ReturnKind returnKind)
+ {
+ m_gcInfoEncoder->SetReturnKind(returnKind);
+ if (m_doLogging)
+ {
+ printf("Set ReturnKind to %s.\n", ReturnKindToString(returnKind));
+ }
+ }
+
+ void SetStackBaseRegister(UINT32 registerNumber)
+ {
+ m_gcInfoEncoder->SetStackBaseRegister(registerNumber);
+ if (m_doLogging)
+ {
+ printf("Set stack base register to %s.\n", getRegName(registerNumber));
+ }
+ }
+
+ void SetPrologSize(UINT32 prologSize)
+ {
+ m_gcInfoEncoder->SetPrologSize(prologSize);
+ if (m_doLogging)
+ {
+ printf("Set prolog size 0x%x.\n", prologSize);
+ }
+ }
+
+ void SetGSCookieStackSlot(INT32 spOffsetGSCookie, UINT32 validRangeStart, UINT32 validRangeEnd)
+ {
+ m_gcInfoEncoder->SetGSCookieStackSlot(spOffsetGSCookie, validRangeStart, validRangeEnd);
+ if (m_doLogging)
+ {
+ printf("Set GS Cookie stack slot to %d, valid from 0x%x to 0x%x.\n", spOffsetGSCookie, validRangeStart,
+ validRangeEnd);
+ }
+ }
+
+ void SetPSPSymStackSlot(INT32 spOffsetPSPSym)
+ {
+ m_gcInfoEncoder->SetPSPSymStackSlot(spOffsetPSPSym);
+ if (m_doLogging)
+ {
+ printf("Set PSPSym stack slot to %d.\n", spOffsetPSPSym);
+ }
+ }
+
+ void SetGenericsInstContextStackSlot(INT32 spOffsetGenericsContext, GENERIC_CONTEXTPARAM_TYPE type)
+ {
+ m_gcInfoEncoder->SetGenericsInstContextStackSlot(spOffsetGenericsContext, type);
+ if (m_doLogging)
+ {
+ printf("Set generic instantiation context stack slot to %d, type is %s.\n", spOffsetGenericsContext,
+ (type == GENERIC_CONTEXTPARAM_THIS
+ ? "THIS"
+ : (type == GENERIC_CONTEXTPARAM_MT ? "MT"
+ : (type == GENERIC_CONTEXTPARAM_MD ? "MD" : "UNKNOWN!"))));
+ }
+ }
+
+ void SetSecurityObjectStackSlot(INT32 spOffset)
+ {
+ m_gcInfoEncoder->SetSecurityObjectStackSlot(spOffset);
+ if (m_doLogging)
+ {
+ printf("Set security object stack slot to %d.\n", spOffset);
+ }
+ }
+
+ void SetIsVarArg()
+ {
+ m_gcInfoEncoder->SetIsVarArg();
+ if (m_doLogging)
+ {
+ printf("SetIsVarArg.\n");
+ }
+ }
+
+ void SetWantsReportOnlyLeaf()
+ {
+ m_gcInfoEncoder->SetWantsReportOnlyLeaf();
+ if (m_doLogging)
+ {
+ printf("Set WantsReportOnlyLeaf.\n");
+ }
+ }
+
+ void SetSizeOfStackOutgoingAndScratchArea(UINT32 size)
+ {
+ m_gcInfoEncoder->SetSizeOfStackOutgoingAndScratchArea(size);
+ if (m_doLogging)
+ {
+ printf("Set Outgoing stack arg area size to %d.\n", size);
+ }
+ }
+};
+
+#define GCENCODER_WITH_LOGGING(withLog, realEncoder) \
+ GcInfoEncoderWithLogging withLog##Var(realEncoder, compiler->verbose || compiler->opts.dspGCtbls); \
+ GcInfoEncoderWithLogging* withLog = &withLog##Var;
+
+#else // DEBUG
+
+#define GCENCODER_WITH_LOGGING(withLog, realEncoder) GcInfoEncoder* withLog = realEncoder;
+
+#endif // DEBUG
+
+ReturnKind GCTypeToReturnKind(CorInfoGCType gcType)
+{
+
+ switch (gcType)
+ {
+ case TYPE_GC_NONE:
+ return RT_Scalar;
+ case TYPE_GC_REF:
+ return RT_Object;
+ case TYPE_GC_BYREF:
+ return RT_ByRef;
+ default:
+ _ASSERTE(!"TYP_GC_OTHER is unexpected");
+ return RT_Illegal;
+ }
+}
+
+void GCInfo::gcInfoBlockHdrSave(GcInfoEncoder* gcInfoEncoder, unsigned methodSize, unsigned prologSize)
+{
+#ifdef DEBUG
+ if (compiler->verbose)
+ {
+ printf("*************** In gcInfoBlockHdrSave()\n");
+ }
+#endif
+
+ GCENCODER_WITH_LOGGING(gcInfoEncoderWithLog, gcInfoEncoder);
+
+ // Can't create tables if we've not saved code.
+
+ gcInfoEncoderWithLog->SetCodeLength(methodSize);
+
+ ReturnKind returnKind = RT_Illegal;
+
+ switch (compiler->info.compRetType)
+ {
+ case TYP_REF:
+ case TYP_ARRAY:
+ returnKind = RT_Object;
+ break;
+ case TYP_BYREF:
+ returnKind = RT_ByRef;
+ break;
+ case TYP_STRUCT:
+ {
+ CORINFO_CLASS_HANDLE structType = compiler->info.compMethodInfo->args.retTypeClass;
+ var_types retType = compiler->getReturnTypeForStruct(structType);
+
+ switch (retType)
+ {
+ case TYP_ARRAY:
+ _ASSERTE(false && "TYP_ARRAY unexpected from getReturnTypeForStruct()");
+
+ case TYP_REF:
+ returnKind = RT_Object;
+ break;
+
+ case TYP_BYREF:
+ returnKind = RT_ByRef;
+ break;
+
+ case TYP_STRUCT:
+ if (compiler->IsHfa(structType))
+ {
+ returnKind = RT_Scalar;
+ }
+ else
+ {
+ // Multi-reg return
+ BYTE gcPtrs[2] = { TYPE_GC_NONE, TYPE_GC_NONE };
+ compiler->info.compCompHnd->getClassGClayout(structType, gcPtrs);
+
+ ReturnKind first = GCTypeToReturnKind((CorInfoGCType)gcPtrs[0]);
+ ReturnKind second = GCTypeToReturnKind((CorInfoGCType)gcPtrs[1]);
+
+ returnKind = GetStructReturnKind(first, second);
+ }
+ break;
+
+ default:
+ returnKind = RT_Scalar;
+ break;
+ }
+ break;
+ }
+ default:
+ returnKind = RT_Scalar;
+ }
+
+ _ASSERTE(returnKind != RT_Illegal);
+ gcInfoEncoderWithLog->SetReturnKind(returnKind);
+
+ if (compiler->isFramePointerUsed())
+ {
+ gcInfoEncoderWithLog->SetStackBaseRegister(REG_FPBASE);
+ }
+
+ if (compiler->info.compIsVarArgs)
+ {
+ gcInfoEncoderWithLog->SetIsVarArg();
+ }
+ // No equivalents.
+ // header->profCallbacks = compiler->info.compProfilerCallback;
+ // header->editNcontinue = compiler->opts.compDbgEnC;
+ //
+ if (compiler->lvaReportParamTypeArg())
+ {
+ // The predicate above is true only if there is an extra generic context parameter, not for
+ // the case where the generic context is provided by "this."
+ assert(compiler->info.compTypeCtxtArg != BAD_VAR_NUM);
+ GENERIC_CONTEXTPARAM_TYPE ctxtParamType = GENERIC_CONTEXTPARAM_NONE;
+ switch (compiler->info.compMethodInfo->options & CORINFO_GENERICS_CTXT_MASK)
+ {
+ case CORINFO_GENERICS_CTXT_FROM_METHODDESC:
+ ctxtParamType = GENERIC_CONTEXTPARAM_MD;
+ break;
+ case CORINFO_GENERICS_CTXT_FROM_METHODTABLE:
+ ctxtParamType = GENERIC_CONTEXTPARAM_MT;
+ break;
+
+ case CORINFO_GENERICS_CTXT_FROM_THIS: // See comment above.
+ default:
+ // If we have a generic context parameter, then we should have
+ // one of the two options flags handled above.
+ assert(false);
+ }
+
+ gcInfoEncoderWithLog->SetGenericsInstContextStackSlot(
+ compiler->lvaToCallerSPRelativeOffset(compiler->lvaCachedGenericContextArgOffset(),
+ compiler->isFramePointerUsed()),
+ ctxtParamType);
+ }
+ // As discussed above, handle the case where the generics context is obtained via
+ // the method table of "this".
+ else if (compiler->lvaKeepAliveAndReportThis())
+ {
+ assert(compiler->info.compThisArg != BAD_VAR_NUM);
+ gcInfoEncoderWithLog->SetGenericsInstContextStackSlot(
+ compiler->lvaToCallerSPRelativeOffset(compiler->lvaCachedGenericContextArgOffset(),
+ compiler->isFramePointerUsed()),
+ GENERIC_CONTEXTPARAM_THIS);
+ }
+
+ if (compiler->getNeedsGSSecurityCookie())
+ {
+ assert(compiler->lvaGSSecurityCookie != BAD_VAR_NUM);
+
+ // The lv offset is FP-relative, and the using code expects caller-sp relative, so translate.
+ // The code offset ranges assume that the GS Cookie slot is initialized in the prolog, and is valid
+ // through the remainder of the method. We will not query for the GS Cookie while we're in an epilog,
+ // so the question of where in the epilog it becomes invalid is moot.
+ gcInfoEncoderWithLog->SetGSCookieStackSlot(compiler->lvaGetCallerSPRelativeOffset(
+ compiler->lvaGSSecurityCookie),
+ prologSize, methodSize);
+ }
+ else if (compiler->opts.compNeedSecurityCheck || compiler->lvaReportParamTypeArg() ||
+ compiler->lvaKeepAliveAndReportThis())
+ {
+ gcInfoEncoderWithLog->SetPrologSize(prologSize);
+ }
+
+ if (compiler->opts.compNeedSecurityCheck)
+ {
+ assert(compiler->lvaSecurityObject != BAD_VAR_NUM);
+
+ // A VM requirement due to how the decoder works (it ignores partially interruptible frames when
+ // an exception has escaped, but the VM requires the security object to live on).
+ assert(compiler->codeGen->genInterruptible);
+
+ // The lv offset is FP-relative, and the using code expects caller-sp relative, so translate.
+ // The normal GC lifetime reporting mechanisms will report a proper lifetime to the GC.
+ // The security subsystem can safely assume that anywhere it might walk the stack, it will be
+ // valid (null or a live GC ref).
+ gcInfoEncoderWithLog->SetSecurityObjectStackSlot(
+ compiler->lvaGetCallerSPRelativeOffset(compiler->lvaSecurityObject));
+ }
+
+#if FEATURE_EH_FUNCLETS
+ if (compiler->ehNeedsPSPSym())
+ {
+ assert(compiler->lvaPSPSym != BAD_VAR_NUM);
+
+#ifdef _TARGET_AMD64_
+ // The PSPSym is relative to InitialSP on X64 and CallerSP on other platforms.
+ gcInfoEncoderWithLog->SetPSPSymStackSlot(compiler->lvaGetInitialSPRelativeOffset(compiler->lvaPSPSym));
+#else // !_TARGET_AMD64_
+ gcInfoEncoderWithLog->SetPSPSymStackSlot(compiler->lvaGetCallerSPRelativeOffset(compiler->lvaPSPSym));
+#endif // !_TARGET_AMD64_
+ }
+
+ if (compiler->ehAnyFunclets())
+ {
+ // Set this to avoid double-reporting the parent frame (unlike JIT64)
+ gcInfoEncoderWithLog->SetWantsReportOnlyLeaf();
+ }
+#endif // FEATURE_EH_FUNCLETS
+
+ // outgoing stack area size
+ gcInfoEncoderWithLog->SetSizeOfStackOutgoingAndScratchArea(compiler->lvaOutgoingArgSpaceSize);
+
+#if DISPLAY_SIZES
+
+ if (compiler->codeGen->genInterruptible)
+ {
+ genMethodICnt++;
+ }
+ else
+ {
+ genMethodNCnt++;
+ }
+
+#endif // DISPLAY_SIZES
+}
+
+#ifdef DEBUG
+#define Encoder GcInfoEncoderWithLogging
+#else
+#define Encoder GcInfoEncoder
+#endif
+
+// Small helper class to handle the No-GC-Interrupt callbacks
+// when reporting interruptible ranges.
+//
+// Encoder should be either GcInfoEncoder or GcInfoEncoderWithLogging
+//
+struct InterruptibleRangeReporter
+{
+ unsigned prevStart;
+ Encoder* gcInfoEncoderWithLog;
+
+ InterruptibleRangeReporter(unsigned _prevStart, Encoder* _gcInfo)
+ : prevStart(_prevStart), gcInfoEncoderWithLog(_gcInfo)
+ {
+ }
+
+ // This callback is called for each insGroup marked with
+ // IGF_NOGCINTERRUPT (currently just prologs and epilogs).
+ // Report everything between the previous region and the current
+ // region as interruptible.
+
+ bool operator()(unsigned igFuncIdx, unsigned igOffs, unsigned igSize)
+ {
+ if (igOffs < prevStart)
+ {
+ // We're still in the main method prolog, which has already
+ // had it's interruptible range reported.
+ assert(igFuncIdx == 0);
+ assert(igOffs + igSize <= prevStart);
+ return true;
+ }
+
+ assert(igOffs >= prevStart);
+ if (igOffs > prevStart)
+ {
+ gcInfoEncoderWithLog->DefineInterruptibleRange(prevStart, igOffs - prevStart);
+ }
+ prevStart = igOffs + igSize;
+ return true;
+ }
+};
+
+void GCInfo::gcMakeRegPtrTable(GcInfoEncoder* gcInfoEncoder,
+ unsigned codeSize,
+ unsigned prologSize,
+ MakeRegPtrMode mode)
+{
+ GCENCODER_WITH_LOGGING(gcInfoEncoderWithLog, gcInfoEncoder);
+
+ if (mode == MAKE_REG_PTR_MODE_ASSIGN_SLOTS)
+ {
+ m_regSlotMap = new (compiler->getAllocator()) RegSlotMap(compiler->getAllocator());
+ m_stackSlotMap = new (compiler->getAllocator()) StackSlotMap(compiler->getAllocator());
+ }
+
+ /**************************************************************************
+ *
+ * Untracked ptr variables
+ *
+ **************************************************************************
+ */
+
+ unsigned count = 0;
+
+ int lastoffset = 0;
+
+ /* Count&Write untracked locals and non-enregistered args */
+
+ unsigned varNum;
+ LclVarDsc* varDsc;
+ for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++)
+ {
+ if (compiler->lvaIsFieldOfDependentlyPromotedStruct(varDsc))
+ {
+ // Field local of a PROMOTION_TYPE_DEPENDENT struct must have been
+ // reported through its parent local.
+ continue;
+ }
+
+ if (varTypeIsGC(varDsc->TypeGet()))
+ {
+ // Do we have an argument or local variable?
+ if (!varDsc->lvIsParam)
+ {
+ // If is is pinned, it must be an untracked local.
+ assert(!varDsc->lvPinned || !varDsc->lvTracked);
+
+ if (varDsc->lvTracked || !varDsc->lvOnFrame)
+ {
+ continue;
+ }
+ }
+ else
+ {
+ // Stack-passed arguments which are not enregistered
+ // are always reported in this "untracked stack
+ // pointers" section of the GC info even if lvTracked==true
+
+ // Has this argument been fully enregistered?
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifndef LEGACY_BACKEND
+ if (!varDsc->lvOnFrame)
+#else // LEGACY_BACKEND
+ if (varDsc->lvRegister)
+#endif // LEGACY_BACKEND
+ {
+ // If a CEE_JMP has been used, then we need to report all the arguments
+ // even if they are enregistered, since we will be using this value
+ // in a JMP call. Note that this is subtle as we require that
+ // argument offsets are always fixed up properly even if lvRegister
+ // is set.
+ if (!compiler->compJmpOpUsed)
+ {
+ continue;
+ }
+ }
+ else
+ {
+ if (!varDsc->lvOnFrame)
+ {
+ // If this non-enregistered pointer arg is never
+ // used, we don't need to report it.
+ assert(varDsc->lvRefCnt == 0);
+ continue;
+ }
+ else if (varDsc->lvIsRegArg && varDsc->lvTracked)
+ {
+ // If this register-passed arg is tracked, then
+ // it has been allocated space near the other
+ // pointer variables and we have accurate life-
+ // time info. It will be reported with
+ // gcVarPtrList in the "tracked-pointer" section.
+ continue;
+ }
+ }
+ }
+
+ // If we haven't continued to the next variable, we should report this as an untracked local.
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if DOUBLE_ALIGN
+ // For genDoubleAlign(), locals are addressed relative to ESP and
+ // arguments are addressed relative to EBP.
+
+ if (genDoubleAlign() && varDsc->lvIsParam && !varDsc->lvIsRegArg)
+ offset += compiler->codeGen->genTotalFrameSize();
+#endif
+ GcSlotFlags flags = GC_SLOT_UNTRACKED;
+
+ if (varDsc->TypeGet() == TYP_BYREF)
+ {
+ // Or in byref_OFFSET_FLAG for 'byref' pointer tracking
+ flags = (GcSlotFlags)(flags | GC_SLOT_INTERIOR);
+ }
+
+ if (varDsc->lvPinned)
+ {
+ // Or in pinned_OFFSET_FLAG for 'pinned' pointer tracking
+ flags = (GcSlotFlags)(flags | GC_SLOT_PINNED);
+ }
+ GcStackSlotBase stackSlotBase = GC_SP_REL;
+ if (varDsc->lvFramePointerBased)
+ {
+ stackSlotBase = GC_FRAMEREG_REL;
+ }
+ StackSlotIdKey sskey(varDsc->lvStkOffs, (stackSlotBase == GC_FRAMEREG_REL), flags);
+ GcSlotId varSlotId;
+ if (mode == MAKE_REG_PTR_MODE_ASSIGN_SLOTS)
+ {
+ if (!m_stackSlotMap->Lookup(sskey, &varSlotId))
+ {
+ varSlotId = gcInfoEncoderWithLog->GetStackSlotId(varDsc->lvStkOffs, flags, stackSlotBase);
+ m_stackSlotMap->Set(sskey, varSlotId);
+ }
+ }
+ }
+
+ // If this is a TYP_STRUCT, handle its GC pointers.
+ // Note that the enregisterable struct types cannot have GC pointers in them.
+ if ((varDsc->lvType == TYP_STRUCT) && varDsc->lvOnFrame && (varDsc->lvExactSize >= TARGET_POINTER_SIZE))
+ {
+ unsigned slots = compiler->lvaLclSize(varNum) / sizeof(void*);
+ BYTE* gcPtrs = compiler->lvaGetGcLayout(varNum);
+
+ // walk each member of the array
+ for (unsigned i = 0; i < slots; i++)
+ {
+ if (gcPtrs[i] == TYPE_GC_NONE)
+ { // skip non-gc slots
+ continue;
+ }
+
+ int offset = varDsc->lvStkOffs + i * sizeof(void*);
+#if DOUBLE_ALIGN
+ // For genDoubleAlign(), locals are addressed relative to ESP and
+ // arguments are addressed relative to EBP.
+
+ if (genDoubleAlign() && varDsc->lvIsParam && !varDsc->lvIsRegArg)
+ offset += compiler->codeGen->genTotalFrameSize();
+#endif
+ GcSlotFlags flags = GC_SLOT_UNTRACKED;
+ if (gcPtrs[i] == TYPE_GC_BYREF)
+ {
+ flags = (GcSlotFlags)(flags | GC_SLOT_INTERIOR);
+ }
+
+ GcStackSlotBase stackSlotBase = GC_SP_REL;
+ if (varDsc->lvFramePointerBased)
+ {
+ stackSlotBase = GC_FRAMEREG_REL;
+ }
+ StackSlotIdKey sskey(offset, (stackSlotBase == GC_FRAMEREG_REL), flags);
+ GcSlotId varSlotId;
+ if (mode == MAKE_REG_PTR_MODE_ASSIGN_SLOTS)
+ {
+ if (!m_stackSlotMap->Lookup(sskey, &varSlotId))
+ {
+ varSlotId = gcInfoEncoderWithLog->GetStackSlotId(offset, flags, stackSlotBase);
+ m_stackSlotMap->Set(sskey, varSlotId);
+ }
+ }
+ }
+ }
+ }
+
+ if (mode == MAKE_REG_PTR_MODE_ASSIGN_SLOTS)
+ {
+ // Count&Write spill temps that hold pointers.
+
+ assert(compiler->tmpAllFree());
+ for (TempDsc* tempItem = compiler->tmpListBeg(); tempItem != nullptr; tempItem = compiler->tmpListNxt(tempItem))
+ {
+ if (varTypeIsGC(tempItem->tdTempType()))
+ {
+ int offset = tempItem->tdTempOffs();
+
+ GcSlotFlags flags = GC_SLOT_UNTRACKED;
+ if (tempItem->tdTempType() == TYP_BYREF)
+ {
+ flags = (GcSlotFlags)(flags | GC_SLOT_INTERIOR);
+ }
+
+ GcStackSlotBase stackSlotBase = GC_SP_REL;
+ if (compiler->isFramePointerUsed())
+ {
+ stackSlotBase = GC_FRAMEREG_REL;
+ }
+ StackSlotIdKey sskey(offset, (stackSlotBase == GC_FRAMEREG_REL), flags);
+ GcSlotId varSlotId;
+ if (!m_stackSlotMap->Lookup(sskey, &varSlotId))
+ {
+ varSlotId = gcInfoEncoderWithLog->GetStackSlotId(offset, flags, stackSlotBase);
+ m_stackSlotMap->Set(sskey, varSlotId);
+ }
+ }
+ }
+
+ if (compiler->lvaKeepAliveAndReportThis())
+ {
+ // We need to report the cached copy as an untracked pointer
+ assert(compiler->info.compThisArg != BAD_VAR_NUM);
+ assert(!compiler->lvaReportParamTypeArg());
+ GcSlotFlags flags = GC_SLOT_UNTRACKED;
+
+ if (compiler->lvaTable[compiler->info.compThisArg].TypeGet() == TYP_BYREF)
+ {
+ // Or in GC_SLOT_INTERIOR for 'byref' pointer tracking
+ flags = (GcSlotFlags)(flags | GC_SLOT_INTERIOR);
+ }
+
+ GcStackSlotBase stackSlotBase = compiler->isFramePointerUsed() ? GC_FRAMEREG_REL : GC_SP_REL;
+
+ gcInfoEncoderWithLog->GetStackSlotId(compiler->lvaCachedGenericContextArgOffset(), flags, stackSlotBase);
+ }
+ }
+
+ // Generate the table of tracked stack pointer variable lifetimes.
+ gcMakeVarPtrTable(gcInfoEncoder, mode);
+
+ /**************************************************************************
+ *
+ * Prepare to generate the pointer register/argument map
+ *
+ **************************************************************************
+ */
+
+ if (compiler->codeGen->genInterruptible)
+ {
+ assert(compiler->genFullPtrRegMap);
+
+ regMaskSmall ptrRegs = 0;
+ regPtrDsc* regStackArgFirst = nullptr;
+
+ // Walk the list of pointer register/argument entries.
+
+ for (regPtrDsc* genRegPtrTemp = gcRegPtrList; genRegPtrTemp != nullptr; genRegPtrTemp = genRegPtrTemp->rpdNext)
+ {
+ int nextOffset = genRegPtrTemp->rpdOffs;
+
+ if (genRegPtrTemp->rpdArg)
+ {
+ if (genRegPtrTemp->rpdArgTypeGet() == rpdARG_KILL)
+ {
+ // Kill all arguments for a call
+ if ((mode == MAKE_REG_PTR_MODE_DO_WORK) && (regStackArgFirst != nullptr))
+ {
+ // Record any outgoing arguments as becoming dead
+ gcInfoRecordGCStackArgsDead(gcInfoEncoder, genRegPtrTemp->rpdOffs, regStackArgFirst,
+ genRegPtrTemp);
+ }
+ regStackArgFirst = nullptr;
+ }
+ else if (genRegPtrTemp->rpdGCtypeGet() != GCT_NONE)
+ {
+ if (genRegPtrTemp->rpdArgTypeGet() == rpdARG_PUSH || (genRegPtrTemp->rpdPtrArg != 0))
+ {
+ bool isPop = genRegPtrTemp->rpdArgTypeGet() == rpdARG_POP;
+ assert(!isPop);
+ gcInfoRecordGCStackArgLive(gcInfoEncoder, mode, genRegPtrTemp);
+ if (regStackArgFirst == nullptr)
+ {
+ regStackArgFirst = genRegPtrTemp;
+ }
+ }
+ else
+ {
+ // We know it's a POP. Sometimes we'll record a POP for a call, just to make sure
+ // the call site is recorded.
+ // This is just the negation of the condition:
+ assert(genRegPtrTemp->rpdArgTypeGet() == rpdARG_POP && genRegPtrTemp->rpdPtrArg == 0);
+ // This asserts that we only get here when we're recording a call site.
+ assert(genRegPtrTemp->rpdArg && genRegPtrTemp->rpdIsCallInstr());
+
+ // Kill all arguments for a call
+ if ((mode == MAKE_REG_PTR_MODE_DO_WORK) && (regStackArgFirst != nullptr))
+ {
+ // Record any outgoing arguments as becoming dead
+ gcInfoRecordGCStackArgsDead(gcInfoEncoder, genRegPtrTemp->rpdOffs, regStackArgFirst,
+ genRegPtrTemp);
+ }
+ regStackArgFirst = nullptr;
+ }
+ }
+ }
+ else
+ {
+ // Record any registers that are becoming dead.
+
+ regMaskSmall regMask = genRegPtrTemp->rpdCompiler.rpdDel & ptrRegs;
+ regMaskSmall byRefMask = 0;
+ if (genRegPtrTemp->rpdGCtypeGet() == GCT_BYREF)
+ {
+ byRefMask = regMask;
+ }
+ gcInfoRecordGCRegStateChange(gcInfoEncoder, mode, genRegPtrTemp->rpdOffs, regMask, GC_SLOT_DEAD,
+ byRefMask, &ptrRegs);
+
+ // Record any registers that are becoming live.
+ regMask = genRegPtrTemp->rpdCompiler.rpdAdd & ~ptrRegs;
+ byRefMask = 0;
+ // As far as I (DLD, 2010) can tell, there's one GCtype for the entire genRegPtrTemp, so if
+ // it says byref then all the registers in "regMask" contain byrefs.
+ if (genRegPtrTemp->rpdGCtypeGet() == GCT_BYREF)
+ {
+ byRefMask = regMask;
+ }
+ gcInfoRecordGCRegStateChange(gcInfoEncoder, mode, genRegPtrTemp->rpdOffs, regMask, GC_SLOT_LIVE,
+ byRefMask, &ptrRegs);
+ }
+ }
+
+ // Now we can declare the entire method body fully interruptible.
+ if (mode == MAKE_REG_PTR_MODE_DO_WORK)
+ {
+ assert(prologSize <= codeSize);
+
+ // Now exempt any other region marked as IGF_NOGCINTERRUPT
+ // Currently just prologs and epilogs.
+
+ InterruptibleRangeReporter reporter(prologSize, gcInfoEncoderWithLog);
+ compiler->getEmitter()->emitGenNoGCLst(reporter);
+ prologSize = reporter.prevStart;
+
+ // Report any remainder
+ if (prologSize < codeSize)
+ {
+ gcInfoEncoderWithLog->DefineInterruptibleRange(prologSize, codeSize - prologSize);
+ }
+ }
+ }
+ else if (compiler->isFramePointerUsed()) // genInterruptible is false, and we're using EBP as a frame pointer.
+ {
+ assert(compiler->genFullPtrRegMap == false);
+
+ // Walk the list of pointer register/argument entries.
+
+ // First count them.
+ unsigned numCallSites = 0;
+
+ // Now we can allocate the information.
+ unsigned* pCallSites = nullptr;
+ BYTE* pCallSiteSizes = nullptr;
+ unsigned callSiteNum = 0;
+
+ if (mode == MAKE_REG_PTR_MODE_DO_WORK)
+ {
+ if (gcCallDescList != nullptr)
+ {
+ for (CallDsc* call = gcCallDescList; call != nullptr; call = call->cdNext)
+ {
+ numCallSites++;
+ }
+ pCallSites = new (compiler, CMK_GC) unsigned[numCallSites];
+ pCallSiteSizes = new (compiler, CMK_GC) BYTE[numCallSites];
+ }
+ }
+
+ // Now consider every call.
+ for (CallDsc* call = gcCallDescList; call != nullptr; call = call->cdNext)
+ {
+ if (mode == MAKE_REG_PTR_MODE_DO_WORK)
+ {
+ pCallSites[callSiteNum] = call->cdOffs - call->cdCallInstrSize;
+ pCallSiteSizes[callSiteNum] = call->cdCallInstrSize;
+ callSiteNum++;
+ }
+
+ unsigned nextOffset;
+
+ // Figure out the code offset of this entry.
+ nextOffset = call->cdOffs;
+
+ // As far as I (DLD, 2010) can determine by asking around, the "call->u1.cdArgMask"
+ // and "cdArgCnt" cases are to handle x86 situations in which a call expression is nested as an
+ // argument to an outer call. The "natural" (evaluation-order-preserving) thing to do is to
+ // evaluate the outer call's arguments, pushing those that are not enregistered, until you
+ // encounter the nested call. These parts of the call description, then, describe the "pending"
+ // pushed arguments. This situation does not exist outside of x86, where we're going to use a
+ // fixed-size stack frame: in situations like this nested call, we would evaluate the pending
+ // arguments to temporaries, and only "push" them (really, write them to the outgoing argument section
+ // of the stack frame) when it's the outer call's "turn." So we can assert that these
+ // situations never occur.
+ assert(call->u1.cdArgMask == 0 && call->cdArgCnt == 0);
+
+ // Other than that, we just have to deal with the regmasks.
+ regMaskSmall gcrefRegMask = call->cdGCrefRegs & RBM_CALLEE_SAVED;
+ regMaskSmall byrefRegMask = call->cdByrefRegs & RBM_CALLEE_SAVED;
+
+ assert((gcrefRegMask & byrefRegMask) == 0);
+
+ regMaskSmall regMask = gcrefRegMask | byrefRegMask;
+
+ assert(call->cdOffs >= call->cdCallInstrSize);
+ // call->cdOffs is actually the offset of the instruction *following* the call, so subtract
+ // the call instruction size to get the offset of the actual call instruction...
+ unsigned callOffset = call->cdOffs - call->cdCallInstrSize;
+ // Record that these registers are live before the call...
+ gcInfoRecordGCRegStateChange(gcInfoEncoder, mode, callOffset, regMask, GC_SLOT_LIVE, byrefRegMask, nullptr);
+ // ...and dead after.
+ gcInfoRecordGCRegStateChange(gcInfoEncoder, mode, call->cdOffs, regMask, GC_SLOT_DEAD, byrefRegMask,
+ nullptr);
+ }
+ // OK, define the call sites.
+ if (mode == MAKE_REG_PTR_MODE_DO_WORK)
+ {
+ gcInfoEncoderWithLog->DefineCallSites(pCallSites, pCallSiteSizes, numCallSites);
+ }
+ }
+ else // genInterruptible is false and we have an EBP-less frame
+ {
+ assert(compiler->genFullPtrRegMap);
+
+ // Walk the list of pointer register/argument entries */
+ // First count them.
+ unsigned numCallSites = 0;
+
+ // Now we can allocate the information (if we're in the "DO_WORK" pass...)
+ unsigned* pCallSites = nullptr;
+ BYTE* pCallSiteSizes = nullptr;
+ unsigned callSiteNum = 0;
+
+ if (mode == MAKE_REG_PTR_MODE_DO_WORK)
+ {
+ for (regPtrDsc* genRegPtrTemp = gcRegPtrList; genRegPtrTemp != nullptr;
+ genRegPtrTemp = genRegPtrTemp->rpdNext)
+ {
+ if (genRegPtrTemp->rpdArg && genRegPtrTemp->rpdIsCallInstr())
+ {
+ numCallSites++;
+ }
+ }
+
+ if (numCallSites > 0)
+ {
+ pCallSites = new (compiler, CMK_GC) unsigned[numCallSites];
+ pCallSiteSizes = new (compiler, CMK_GC) BYTE[numCallSites];
+ }
+ }
+
+ for (regPtrDsc* genRegPtrTemp = gcRegPtrList; genRegPtrTemp != nullptr; genRegPtrTemp = genRegPtrTemp->rpdNext)
+ {
+ if (genRegPtrTemp->rpdArg)
+ {
+ // Is this a call site?
+ if (genRegPtrTemp->rpdIsCallInstr())
+ {
+ // This is a true call site.
+
+ regMaskSmall gcrefRegMask = genRegMaskFromCalleeSavedMask(genRegPtrTemp->rpdCallGCrefRegs);
+
+ regMaskSmall byrefRegMask = genRegMaskFromCalleeSavedMask(genRegPtrTemp->rpdCallByrefRegs);
+
+ assert((gcrefRegMask & byrefRegMask) == 0);
+
+ regMaskSmall regMask = gcrefRegMask | byrefRegMask;
+
+ // The "rpdOffs" is (apparently) the offset of the following instruction already.
+ // GcInfoEncoder wants the call instruction, so subtract the width of the call instruction.
+ assert(genRegPtrTemp->rpdOffs >= genRegPtrTemp->rpdCallInstrSize);
+ unsigned callOffset = genRegPtrTemp->rpdOffs - genRegPtrTemp->rpdCallInstrSize;
+
+ // Tell the GCInfo encoder about these registers. We say that the registers become live
+ // before the call instruction, and dead after.
+ gcInfoRecordGCRegStateChange(gcInfoEncoder, mode, callOffset, regMask, GC_SLOT_LIVE, byrefRegMask,
+ nullptr);
+ gcInfoRecordGCRegStateChange(gcInfoEncoder, mode, genRegPtrTemp->rpdOffs, regMask, GC_SLOT_DEAD,
+ byrefRegMask, nullptr);
+
+ // Also remember the call site.
+ if (mode == MAKE_REG_PTR_MODE_DO_WORK)
+ {
+ assert(pCallSites != nullptr && pCallSiteSizes != nullptr);
+ pCallSites[callSiteNum] = callOffset;
+ pCallSiteSizes[callSiteNum] = genRegPtrTemp->rpdCallInstrSize;
+ callSiteNum++;
+ }
+ }
+ else
+ {
+ // These are reporting outgoing stack arguments, but we don't need to report anything
+ // for partially interruptible
+ assert(genRegPtrTemp->rpdGCtypeGet() != GCT_NONE);
+ assert(genRegPtrTemp->rpdArgTypeGet() == rpdARG_PUSH);
+ }
+ }
+ }
+ // The routine is fully interruptible.
+ if (mode == MAKE_REG_PTR_MODE_DO_WORK)
+ {
+ gcInfoEncoderWithLog->DefineCallSites(pCallSites, pCallSiteSizes, numCallSites);
+ }
+ }
+}
+
+void GCInfo::gcInfoRecordGCRegStateChange(GcInfoEncoder* gcInfoEncoder,
+ MakeRegPtrMode mode,
+ unsigned instrOffset,
+ regMaskSmall regMask,
+ GcSlotState newState,
+ regMaskSmall byRefMask,
+ regMaskSmall* pPtrRegs)
+{
+ // Precondition: byRefMask is a subset of regMask.
+ assert((byRefMask & ~regMask) == 0);
+
+ GCENCODER_WITH_LOGGING(gcInfoEncoderWithLog, gcInfoEncoder);
+
+ while (regMask)
+ {
+ // Get hold of the next register bit.
+ regMaskTP tmpMask = genFindLowestReg(regMask);
+ assert(tmpMask);
+
+ // Remember the new state of this register.
+ if (pPtrRegs != nullptr)
+ {
+ if (newState == GC_SLOT_DEAD)
+ {
+ *pPtrRegs &= ~tmpMask;
+ }
+ else
+ {
+ *pPtrRegs |= tmpMask;
+ }
+ }
+
+ // Figure out which register the next bit corresponds to.
+ regNumber regNum = genRegNumFromMask(tmpMask);
+
+ /* Reserve SP future use */
+ assert(regNum != REG_SPBASE);
+
+ GcSlotFlags regFlags = GC_SLOT_BASE;
+ if ((tmpMask & byRefMask) != 0)
+ {
+ regFlags = (GcSlotFlags)(regFlags | GC_SLOT_INTERIOR);
+ }
+
+ RegSlotIdKey rskey(regNum, regFlags);
+ GcSlotId regSlotId;
+ if (mode == MAKE_REG_PTR_MODE_ASSIGN_SLOTS)
+ {
+ if (!m_regSlotMap->Lookup(rskey, &regSlotId))
+ {
+ regSlotId = gcInfoEncoderWithLog->GetRegisterSlotId(regNum, regFlags);
+ m_regSlotMap->Set(rskey, regSlotId);
+ }
+ }
+ else
+ {
+ BOOL b = m_regSlotMap->Lookup(rskey, &regSlotId);
+ assert(b); // Should have been added in the first pass.
+ gcInfoEncoderWithLog->SetSlotState(instrOffset, regSlotId, newState);
+ }
+
+ // Turn the bit we've just generated off and continue.
+ regMask -= tmpMask; // EAX,ECX,EDX,EBX,---,EBP,ESI,EDI
+ }
+}
+
+/**************************************************************************
+ *
+ * gcMakeVarPtrTable - Generate the table of tracked stack pointer
+ * variable lifetimes.
+ *
+ * In the first pass we'll allocate slot Ids
+ * In the second pass we actually generate the lifetimes.
+ *
+ **************************************************************************
+ */
+
+void GCInfo::gcMakeVarPtrTable(GcInfoEncoder* gcInfoEncoder, MakeRegPtrMode mode)
+{
+ GCENCODER_WITH_LOGGING(gcInfoEncoderWithLog, gcInfoEncoder);
+
+ // Make sure any flags we hide in the offset are in the bits guaranteed
+ // unused by alignment
+ C_ASSERT((OFFSET_MASK + 1) <= sizeof(int));
+
+#ifdef DEBUG
+ if (mode == MAKE_REG_PTR_MODE_ASSIGN_SLOTS)
+ {
+ // Tracked variables can't be pinned, and the encoding takes
+ // advantage of that by using the same bit for 'pinned' and 'this'
+ // Since we don't track 'this', we should never see either flag here.
+ // Check it now before we potentially add some pinned flags.
+ for (varPtrDsc* varTmp = gcVarPtrList; varTmp != nullptr; varTmp = varTmp->vpdNext)
+ {
+ const unsigned flags = varTmp->vpdVarNum & OFFSET_MASK;
+ assert((flags & pinned_OFFSET_FLAG) == 0);
+ assert((flags & this_OFFSET_FLAG) == 0);
+ }
+ }
+#endif // DEBUG
+
+ // Only need to do this once, and only if we have EH.
+ if ((mode == MAKE_REG_PTR_MODE_ASSIGN_SLOTS) && compiler->ehAnyFunclets())
+ {
+ gcMarkFilterVarsPinned();
+ }
+
+ for (varPtrDsc* varTmp = gcVarPtrList; varTmp != nullptr; varTmp = varTmp->vpdNext)
+ {
+ C_ASSERT((OFFSET_MASK + 1) <= sizeof(int));
+
+ // Get hold of the variable's stack offset.
+
+ unsigned lowBits = varTmp->vpdVarNum & OFFSET_MASK;
+
+ // For negative stack offsets we must reset the low bits
+ int varOffs = static_cast<int>(varTmp->vpdVarNum & ~OFFSET_MASK);
+
+ // Compute the actual lifetime offsets.
+ unsigned begOffs = varTmp->vpdBegOfs;
+ unsigned endOffs = varTmp->vpdEndOfs;
+
+ // Special case: skip any 0-length lifetimes.
+ if (endOffs == begOffs)
+ {
+ continue;
+ }
+
+ GcSlotFlags flags = GC_SLOT_BASE;
+ if ((lowBits & byref_OFFSET_FLAG) != 0)
+ {
+ flags = (GcSlotFlags)(flags | GC_SLOT_INTERIOR);
+ }
+ if ((lowBits & pinned_OFFSET_FLAG) != 0)
+ {
+ flags = (GcSlotFlags)(flags | GC_SLOT_PINNED);
+ }
+
+ GcStackSlotBase stackSlotBase = GC_SP_REL;
+ if (compiler->isFramePointerUsed())
+ {
+ stackSlotBase = GC_FRAMEREG_REL;
+ }
+ StackSlotIdKey sskey(varOffs, (stackSlotBase == GC_FRAMEREG_REL), flags);
+ GcSlotId varSlotId;
+ if (mode == MAKE_REG_PTR_MODE_ASSIGN_SLOTS)
+ {
+ if (!m_stackSlotMap->Lookup(sskey, &varSlotId))
+ {
+ varSlotId = gcInfoEncoderWithLog->GetStackSlotId(varOffs, flags, stackSlotBase);
+ m_stackSlotMap->Set(sskey, varSlotId);
+ }
+ }
+ else
+ {
+ BOOL b = m_stackSlotMap->Lookup(sskey, &varSlotId);
+ assert(b); // Should have been added in the first pass.
+ // Live from the beginning to the end.
+ gcInfoEncoderWithLog->SetSlotState(begOffs, varSlotId, GC_SLOT_LIVE);
+ gcInfoEncoderWithLog->SetSlotState(endOffs, varSlotId, GC_SLOT_DEAD);
+ }
+ }
+}
+
+// gcMarkFilterVarsPinned - Walk all lifetimes and make it so that anything
+// live in a filter is marked as pinned (often by splitting the lifetime
+// so that *only* the filter region is pinned). This should only be
+// called once (after generating all lifetimes, but before slot ids are
+// finalized.
+//
+// DevDiv 376329 - The VM has to double report filters and their parent frame
+// because they occur during the 1st pass and the parent frame doesn't go dead
+// until we start unwinding in the 2nd pass.
+//
+// Untracked locals will only be reported in non-filter funclets and the
+// parent.
+// Registers can't be double reported by 2 frames since they're different.
+// That just leaves stack variables which might be double reported.
+//
+// Technically double reporting is only a problem when the GC has to relocate a
+// reference. So we avoid that problem by marking all live tracked stack
+// variables as pinned inside the filter. Thus if they are double reported, it
+// won't be a problem since they won't be double relocated.
+//
+void GCInfo::gcMarkFilterVarsPinned()
+{
+ assert(compiler->ehAnyFunclets());
+ const EHblkDsc* endHBtab = &(compiler->compHndBBtab[compiler->compHndBBtabCount]);
+
+ for (EHblkDsc* HBtab = compiler->compHndBBtab; HBtab < endHBtab; HBtab++)
+ {
+ if (HBtab->HasFilter())
+ {
+ const UNATIVE_OFFSET filterBeg = compiler->ehCodeOffset(HBtab->ebdFilter);
+ const UNATIVE_OFFSET filterEnd = compiler->ehCodeOffset(HBtab->ebdHndBeg);
+
+ for (varPtrDsc* varTmp = gcVarPtrList; varTmp != nullptr; varTmp = varTmp->vpdNext)
+ {
+ // Get hold of the variable's flags.
+ const unsigned lowBits = varTmp->vpdVarNum & OFFSET_MASK;
+
+ // Compute the actual lifetime offsets.
+ const unsigned begOffs = varTmp->vpdBegOfs;
+ const unsigned endOffs = varTmp->vpdEndOfs;
+
+ // Special case: skip any 0-length lifetimes.
+ if (endOffs == begOffs)
+ {
+ continue;
+ }
+
+ // Skip lifetimes with no overlap with the filter
+ if ((endOffs <= filterBeg) || (begOffs >= filterEnd))
+ {
+ continue;
+ }
+
+ // Because there is no nesting within filters, nothing
+ // should be already pinned.
+ assert((lowBits & pinned_OFFSET_FLAG) == 0);
+
+ if (begOffs < filterBeg)
+ {
+ if (endOffs > filterEnd)
+ {
+ // The variable lifetime is starts before AND ends after
+ // the filter, so we need to create 2 new lifetimes:
+ // (1) a pinned one for the filter
+ // (2) a regular one for after the filter
+ // and then adjust the original lifetime to end before
+ // the filter.
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef DEBUG
+ if (compiler->verbose)
+ {
+ printf("Splitting lifetime for filter: [%04X, %04X).\nOld: ", filterBeg, filterEnd);
+ gcDumpVarPtrDsc(varTmp);
+ }
+#endif // DEBUG
+
+ varPtrDsc* desc1 = new (compiler, CMK_GC) varPtrDsc;
+ desc1->vpdNext = gcVarPtrList;
+ desc1->vpdVarNum = varTmp->vpdVarNum | pinned_OFFSET_FLAG;
+ desc1->vpdBegOfs = filterBeg;
+ desc1->vpdEndOfs = filterEnd;
+
+ varPtrDsc* desc2 = new (compiler, CMK_GC) varPtrDsc;
+ desc2->vpdNext = desc1;
+ desc2->vpdVarNum = varTmp->vpdVarNum;
+ desc2->vpdBegOfs = filterEnd;
+ desc2->vpdEndOfs = endOffs;
+ gcVarPtrList = desc2;
+
+ varTmp->vpdEndOfs = filterBeg;
+#ifdef DEBUG
+ if (compiler->verbose)
+ {
+ printf("New (1 of 3): ");
+ gcDumpVarPtrDsc(varTmp);
+ printf("New (2 of 3): ");
+ gcDumpVarPtrDsc(desc1);
+ printf("New (3 of 3): ");
+ gcDumpVarPtrDsc(desc2);
+ }
+#endif // DEBUG
+ }
+ else
+ {
+ // The variable lifetime started before the filter and ends
+ // somewhere inside it, so we only create 1 new lifetime,
+ // and then adjust the original lifetime to end before
+ // the filter.
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef DEBUG
+ if (compiler->verbose)
+ {
+ printf("Splitting lifetime for filter.\nOld: ");
+ gcDumpVarPtrDsc(varTmp);
+ }
+#endif // DEBUG
+
+ varPtrDsc* desc = new (compiler, CMK_GC) varPtrDsc;
+ desc->vpdNext = gcVarPtrList;
+ desc->vpdVarNum = varTmp->vpdVarNum | pinned_OFFSET_FLAG;
+ desc->vpdBegOfs = filterBeg;
+ desc->vpdEndOfs = endOffs;
+ gcVarPtrList = desc;
+
+ varTmp->vpdEndOfs = filterBeg;
+
+#ifdef DEBUG
+ if (compiler->verbose)
+ {
+ printf("New (1 of 2): ");
+ gcDumpVarPtrDsc(varTmp);
+ printf("New (2 of 2): ");
+ gcDumpVarPtrDsc(desc);
+ }
+#endif // DEBUG
+ }
+ }
+ else
+ {
+ if (endOffs > filterEnd)
+ {
+ // The variable lifetime starts inside the filter and
+ // ends somewhere after it, so we create 1 new
+ // lifetime for the part inside the filter and adjust
+ // the start of the original lifetime to be the end
+ // of the filter
+ CLANG_FORMAT_COMMENT_ANCHOR;
+#ifdef DEBUG
+ if (compiler->verbose)
+ {
+ printf("Splitting lifetime for filter.\nOld: ");
+ gcDumpVarPtrDsc(varTmp);
+ }
+#endif // DEBUG
+
+ varPtrDsc* desc = new (compiler, CMK_GC) varPtrDsc;
+ desc->vpdNext = gcVarPtrList;
+ desc->vpdVarNum = varTmp->vpdVarNum | pinned_OFFSET_FLAG;
+ desc->vpdBegOfs = begOffs;
+ desc->vpdEndOfs = filterEnd;
+ gcVarPtrList = desc;
+
+ varTmp->vpdBegOfs = filterEnd;
+
+#ifdef DEBUG
+ if (compiler->verbose)
+ {
+ printf("New (1 of 2): ");
+ gcDumpVarPtrDsc(desc);
+ printf("New (2 of 2): ");
+ gcDumpVarPtrDsc(varTmp);
+ }
+#endif // DEBUG
+ }
+ else
+ {
+ // The variable lifetime is completely within the filter,
+ // so just add the pinned flag.
+ CLANG_FORMAT_COMMENT_ANCHOR;
+#ifdef DEBUG
+ if (compiler->verbose)
+ {
+ printf("Pinning lifetime for filter.\nOld: ");
+ gcDumpVarPtrDsc(varTmp);
+ }
+#endif // DEBUG
+
+ varTmp->vpdVarNum |= pinned_OFFSET_FLAG;
+#ifdef DEBUG
+ if (compiler->verbose)
+ {
+ printf("New : ");
+ gcDumpVarPtrDsc(varTmp);
+ }
+#endif // DEBUG
+ }
+ }
+ }
+ } // HasFilter
+ } // Foreach EH
+}
+
+void GCInfo::gcInfoRecordGCStackArgLive(GcInfoEncoder* gcInfoEncoder, MakeRegPtrMode mode, regPtrDsc* genStackPtr)
+{
+ // On non-x86 platforms, don't have pointer argument push/pop/kill declarations.
+ // But we use the same mechanism to record writes into the outgoing argument space...
+ assert(genStackPtr->rpdGCtypeGet() != GCT_NONE);
+ assert(genStackPtr->rpdArg);
+ assert(genStackPtr->rpdArgTypeGet() == rpdARG_PUSH);
+
+ // We only need to report these when we're doing fuly-interruptible
+ assert(compiler->codeGen->genInterruptible);
+
+ GCENCODER_WITH_LOGGING(gcInfoEncoderWithLog, gcInfoEncoder);
+
+ StackSlotIdKey sskey(genStackPtr->rpdPtrArg, FALSE,
+ GcSlotFlags(genStackPtr->rpdGCtypeGet() == GCT_BYREF ? GC_SLOT_INTERIOR : GC_SLOT_BASE));
+ GcSlotId varSlotId;
+ if (mode == MAKE_REG_PTR_MODE_ASSIGN_SLOTS)
+ {
+ if (!m_stackSlotMap->Lookup(sskey, &varSlotId))
+ {
+ varSlotId = gcInfoEncoderWithLog->GetStackSlotId(sskey.m_offset, (GcSlotFlags)sskey.m_flags, GC_SP_REL);
+ m_stackSlotMap->Set(sskey, varSlotId);
+ }
+ }
+ else
+ {
+ BOOL b = m_stackSlotMap->Lookup(sskey, &varSlotId);
+ assert(b); // Should have been added in the first pass.
+ // Live until the call.
+ gcInfoEncoderWithLog->SetSlotState(genStackPtr->rpdOffs, varSlotId, GC_SLOT_LIVE);
+ }
+}
+
+void GCInfo::gcInfoRecordGCStackArgsDead(GcInfoEncoder* gcInfoEncoder,
+ unsigned instrOffset,
+ regPtrDsc* genStackPtrFirst,
+ regPtrDsc* genStackPtrLast)
+{
+ // After a call all of the outgoing arguments are marked as dead.
+ // The calling loop keeps track of the first argument pushed for this call
+ // and passes it in as genStackPtrFirst.
+ // genStackPtrLast is the call.
+ // Re-walk that list and mark all outgoing arguments that we're marked as live
+ // earlier, as going dead after the call.
+
+ // We only need to report these when we're doing fuly-interruptible
+ assert(compiler->codeGen->genInterruptible);
+
+ GCENCODER_WITH_LOGGING(gcInfoEncoderWithLog, gcInfoEncoder);
+
+ for (regPtrDsc* genRegPtrTemp = genStackPtrFirst; genRegPtrTemp != genStackPtrLast;
+ genRegPtrTemp = genRegPtrTemp->rpdNext)
+ {
+ if (!genRegPtrTemp->rpdArg)
+ {
+ continue;
+ }
+
+ assert(genRegPtrTemp->rpdGCtypeGet() != GCT_NONE);
+ assert(genRegPtrTemp->rpdArgTypeGet() == rpdARG_PUSH);
+
+ StackSlotIdKey sskey(genRegPtrTemp->rpdPtrArg, FALSE,
+ genRegPtrTemp->rpdGCtypeGet() == GCT_BYREF ? GC_SLOT_INTERIOR : GC_SLOT_BASE);
+ GcSlotId varSlotId;
+ BOOL b = m_stackSlotMap->Lookup(sskey, &varSlotId);
+ assert(b); // Should have been added in the first pass.
+ // Live until the call.
+ gcInfoEncoderWithLog->SetSlotState(instrOffset, varSlotId, GC_SLOT_DEAD);
+ }
+}
+
+#undef GCENCODER_WITH_LOGGING
+
+#endif // !JIT32_GCENCODER
+
+/*****************************************************************************/
+/*****************************************************************************/
diff --git a/src/jit/gcinfo.cpp b/src/jit/gcinfo.cpp
new file mode 100644
index 0000000000..b64fd0a174
--- /dev/null
+++ b/src/jit/gcinfo.cpp
@@ -0,0 +1,867 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX GCInfo XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+#include "gcinfo.h"
+#include "emit.h"
+#include "jitgcinfo.h"
+
+#ifdef _TARGET_AMD64_
+#include "gcinfoencoder.h" //this includes a LOT of other files too
+#endif
+
+/*****************************************************************************/
+/*****************************************************************************/
+
+/*****************************************************************************/
+
+extern int JITGcBarrierCall;
+
+/*****************************************************************************/
+
+#if MEASURE_PTRTAB_SIZE
+/* static */ size_t GCInfo::s_gcRegPtrDscSize = 0;
+/* static */ size_t GCInfo::s_gcTotalPtrTabSize = 0;
+#endif // MEASURE_PTRTAB_SIZE
+
+/*
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX GCInfo XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+GCInfo::GCInfo(Compiler* theCompiler) : compiler(theCompiler)
+{
+ regSet = nullptr;
+ gcVarPtrList = nullptr;
+ gcVarPtrLast = nullptr;
+ gcRegPtrList = nullptr;
+ gcRegPtrLast = nullptr;
+ gcPtrArgCnt = 0;
+ gcCallDescList = nullptr;
+ gcCallDescLast = nullptr;
+#ifdef JIT32_GCENCODER
+ gcEpilogTable = nullptr;
+#else // !JIT32_GCENCODER
+ m_regSlotMap = nullptr;
+ m_stackSlotMap = nullptr;
+#endif // JIT32_GCENCODER
+}
+
+/*****************************************************************************/
+/*****************************************************************************
+ * Reset tracking info at the start of a basic block.
+ */
+
+void GCInfo::gcResetForBB()
+{
+ gcRegGCrefSetCur = RBM_NONE;
+ gcRegByrefSetCur = RBM_NONE;
+ VarSetOps::AssignNoCopy(compiler, gcVarPtrSetCur, VarSetOps::MakeEmpty(compiler));
+}
+
+#ifdef DEBUG
+
+/*****************************************************************************
+ *
+ * Print the changes in the gcRegGCrefSetCur sets.
+ */
+
+void GCInfo::gcDspGCrefSetChanges(regMaskTP gcRegGCrefSetNew DEBUGARG(bool forceOutput))
+{
+ if (compiler->verbose)
+ {
+ if (forceOutput || (gcRegGCrefSetCur != gcRegGCrefSetNew))
+ {
+ printf("\t\t\t\t\t\t\tGC regs: ");
+ if (gcRegGCrefSetCur == gcRegGCrefSetNew)
+ {
+ printf("(unchanged) ");
+ }
+ else
+ {
+ printRegMaskInt(gcRegGCrefSetCur);
+ compiler->getEmitter()->emitDispRegSet(gcRegGCrefSetCur);
+ printf(" => ");
+ }
+ printRegMaskInt(gcRegGCrefSetNew);
+ compiler->getEmitter()->emitDispRegSet(gcRegGCrefSetNew);
+ printf("\n");
+ }
+ }
+}
+
+/*****************************************************************************
+ *
+ * Print the changes in the gcRegByrefSetCur sets.
+ */
+
+void GCInfo::gcDspByrefSetChanges(regMaskTP gcRegByrefSetNew DEBUGARG(bool forceOutput))
+{
+ if (compiler->verbose)
+ {
+ if (forceOutput || (gcRegByrefSetCur != gcRegByrefSetNew))
+ {
+ printf("\t\t\t\t\t\t\tByref regs: ");
+ if (gcRegByrefSetCur == gcRegByrefSetNew)
+ {
+ printf("(unchanged) ");
+ }
+ else
+ {
+ printRegMaskInt(gcRegByrefSetCur);
+ compiler->getEmitter()->emitDispRegSet(gcRegByrefSetCur);
+ printf(" => ");
+ }
+ printRegMaskInt(gcRegByrefSetNew);
+ compiler->getEmitter()->emitDispRegSet(gcRegByrefSetNew);
+ printf("\n");
+ }
+ }
+}
+
+#endif // DEBUG
+
+/*****************************************************************************
+ *
+ * Mark the set of registers given by the specified mask as holding
+ * GCref pointer values.
+ */
+
+void GCInfo::gcMarkRegSetGCref(regMaskTP regMask DEBUGARG(bool forceOutput))
+{
+#ifdef DEBUG
+ if (compiler->compRegSetCheckLevel == 0)
+ {
+ // This set of registers are going to hold REFs.
+ // Make sure they were not holding BYREFs.
+ assert((gcRegByrefSetCur & regMask) == 0);
+ }
+#endif
+
+ regMaskTP gcRegByrefSetNew = gcRegByrefSetCur & ~regMask; // Clear it if set in Byref mask
+ regMaskTP gcRegGCrefSetNew = gcRegGCrefSetCur | regMask; // Set it in GCref mask
+
+ INDEBUG(gcDspGCrefSetChanges(gcRegGCrefSetNew, forceOutput));
+ INDEBUG(gcDspByrefSetChanges(gcRegByrefSetNew));
+
+ gcRegByrefSetCur = gcRegByrefSetNew;
+ gcRegGCrefSetCur = gcRegGCrefSetNew;
+}
+
+/*****************************************************************************
+ *
+ * Mark the set of registers given by the specified mask as holding
+ * Byref pointer values.
+ */
+
+void GCInfo::gcMarkRegSetByref(regMaskTP regMask DEBUGARG(bool forceOutput))
+{
+ regMaskTP gcRegByrefSetNew = gcRegByrefSetCur | regMask; // Set it in Byref mask
+ regMaskTP gcRegGCrefSetNew = gcRegGCrefSetCur & ~regMask; // Clear it if set in GCref mask
+
+ INDEBUG(gcDspGCrefSetChanges(gcRegGCrefSetNew));
+ INDEBUG(gcDspByrefSetChanges(gcRegByrefSetNew, forceOutput));
+
+ gcRegByrefSetCur = gcRegByrefSetNew;
+ gcRegGCrefSetCur = gcRegGCrefSetNew;
+}
+
+/*****************************************************************************
+ *
+ * Mark the set of registers given by the specified mask as holding
+ * non-pointer values.
+ */
+
+void GCInfo::gcMarkRegSetNpt(regMaskTP regMask DEBUGARG(bool forceOutput))
+{
+ /* NOTE: don't unmark any live register variables */
+
+ regMaskTP gcRegByrefSetNew = gcRegByrefSetCur & ~(regMask & ~regSet->rsMaskVars);
+ regMaskTP gcRegGCrefSetNew = gcRegGCrefSetCur & ~(regMask & ~regSet->rsMaskVars);
+
+ INDEBUG(gcDspGCrefSetChanges(gcRegGCrefSetNew, forceOutput));
+ INDEBUG(gcDspByrefSetChanges(gcRegByrefSetNew, forceOutput));
+
+ gcRegByrefSetCur = gcRegByrefSetNew;
+ gcRegGCrefSetCur = gcRegGCrefSetNew;
+}
+
+/*****************************************************************************
+ *
+ * Mark the specified register as now holding a value of the given type.
+ */
+
+void GCInfo::gcMarkRegPtrVal(regNumber reg, var_types type)
+{
+ regMaskTP regMask = genRegMask(reg);
+
+ switch (type)
+ {
+ case TYP_REF:
+ gcMarkRegSetGCref(regMask);
+ break;
+ case TYP_BYREF:
+ gcMarkRegSetByref(regMask);
+ break;
+ default:
+ gcMarkRegSetNpt(regMask);
+ break;
+ }
+}
+
+/*****************************************************************************/
+
+GCInfo::WriteBarrierForm GCInfo::gcIsWriteBarrierCandidate(GenTreePtr tgt, GenTreePtr assignVal)
+{
+#if FEATURE_WRITE_BARRIER
+
+ /* Are we storing a GC ptr? */
+
+ if (!varTypeIsGC(tgt->TypeGet()))
+ {
+ return WBF_NoBarrier;
+ }
+
+ /* Ignore any assignments of NULL */
+
+ // 'assignVal' can be the constant Null or something else (LclVar, etc..)
+ // that is known to be null via Value Numbering.
+ if (assignVal->GetVN(VNK_Liberal) == ValueNumStore::VNForNull())
+ {
+ return WBF_NoBarrier;
+ }
+
+ if (assignVal->gtOper == GT_CNS_INT && assignVal->gtIntCon.gtIconVal == 0)
+ {
+ return WBF_NoBarrier;
+ }
+
+ /* Where are we storing into? */
+
+ tgt = tgt->gtEffectiveVal();
+
+ switch (tgt->gtOper)
+ {
+
+#ifndef LEGACY_BACKEND
+ case GT_STOREIND:
+#endif // !LEGACY_BACKEND
+ case GT_IND: /* Could be the managed heap */
+ return gcWriteBarrierFormFromTargetAddress(tgt->gtOp.gtOp1);
+
+ case GT_LEA:
+ return gcWriteBarrierFormFromTargetAddress(tgt->AsAddrMode()->Base());
+
+ case GT_ARR_ELEM: /* Definitely in the managed heap */
+ case GT_CLS_VAR:
+ return WBF_BarrierUnchecked;
+
+ case GT_REG_VAR: /* Definitely not in the managed heap */
+ case GT_LCL_VAR:
+ case GT_LCL_FLD:
+ case GT_STORE_LCL_VAR:
+ case GT_STORE_LCL_FLD:
+ return WBF_NoBarrier;
+
+ default:
+ break;
+ }
+
+ assert(!"Missing case in gcIsWriteBarrierCandidate");
+#endif
+
+ return WBF_NoBarrier;
+}
+
+bool GCInfo::gcIsWriteBarrierAsgNode(GenTreePtr op)
+{
+ if (op->gtOper == GT_ASG)
+ {
+ return gcIsWriteBarrierCandidate(op->gtOp.gtOp1, op->gtOp.gtOp2) != WBF_NoBarrier;
+ }
+#ifndef LEGACY_BACKEND
+ else if (op->gtOper == GT_STOREIND)
+ {
+ return gcIsWriteBarrierCandidate(op, op->gtOp.gtOp2) != WBF_NoBarrier;
+ }
+#endif // !LEGACY_BACKEND
+ else
+ {
+ return false;
+ }
+}
+
+/*****************************************************************************/
+/*****************************************************************************
+ *
+ * If the given tree value is sitting in a register, free it now.
+ */
+
+void GCInfo::gcMarkRegPtrVal(GenTreePtr tree)
+{
+ if (varTypeIsGC(tree->TypeGet()))
+ {
+#ifdef LEGACY_BACKEND
+ if (tree->gtOper == GT_LCL_VAR)
+ compiler->codeGen->genMarkLclVar(tree);
+#endif // LEGACY_BACKEND
+ if (tree->gtFlags & GTF_REG_VAL)
+ {
+ gcMarkRegSetNpt(genRegMask(tree->gtRegNum));
+ }
+ }
+}
+
+/*****************************************************************************/
+/*****************************************************************************
+ *
+ * Initialize the non-register pointer variable tracking logic.
+ */
+
+void GCInfo::gcVarPtrSetInit()
+{
+ VarSetOps::AssignNoCopy(compiler, gcVarPtrSetCur, VarSetOps::MakeEmpty(compiler));
+
+ /* Initialize the list of lifetime entries */
+ gcVarPtrList = gcVarPtrLast = nullptr;
+}
+
+/*****************************************************************************
+ *
+ * Allocate a new pointer register set / pointer argument entry and append
+ * it to the list.
+ */
+
+GCInfo::regPtrDsc* GCInfo::gcRegPtrAllocDsc()
+{
+ regPtrDsc* regPtrNext;
+
+ assert(compiler->genFullPtrRegMap);
+
+ /* Allocate a new entry and initialize it */
+
+ regPtrNext = new (compiler, CMK_GC) regPtrDsc;
+
+ regPtrNext->rpdIsThis = FALSE;
+
+ regPtrNext->rpdOffs = 0;
+ regPtrNext->rpdNext = nullptr;
+
+ // Append the entry to the end of the list.
+ if (gcRegPtrLast == nullptr)
+ {
+ assert(gcRegPtrList == nullptr);
+ gcRegPtrList = gcRegPtrLast = regPtrNext;
+ }
+ else
+ {
+ assert(gcRegPtrList != nullptr);
+ gcRegPtrLast->rpdNext = regPtrNext;
+ gcRegPtrLast = regPtrNext;
+ }
+
+#if MEASURE_PTRTAB_SIZE
+ s_gcRegPtrDscSize += sizeof(*regPtrNext);
+#endif
+
+ return regPtrNext;
+}
+
+/*****************************************************************************
+ *
+ * Compute the various counts that get stored in the info block header.
+ */
+
+void GCInfo::gcCountForHeader(UNALIGNED unsigned int* untrackedCount, UNALIGNED unsigned int* varPtrTableSize)
+{
+ unsigned varNum;
+ LclVarDsc* varDsc;
+ varPtrDsc* varTmp;
+
+ bool thisKeptAliveIsInUntracked = false; // did we track "this" in a synchronized method?
+ unsigned int count = 0;
+
+ /* Count the untracked locals and non-enregistered args */
+
+ for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++)
+ {
+ if (varTypeIsGC(varDsc->TypeGet()))
+ {
+ if (compiler->lvaIsFieldOfDependentlyPromotedStruct(varDsc))
+ {
+ // Field local of a PROMOTION_TYPE_DEPENDENT struct must have been
+ // reported through its parent local
+ continue;
+ }
+
+ /* Do we have an argument or local variable? */
+ if (!varDsc->lvIsParam)
+ {
+ if (varDsc->lvTracked || !varDsc->lvOnFrame)
+ {
+ continue;
+ }
+ }
+ else
+ {
+ /* Stack-passed arguments which are not enregistered
+ * are always reported in this "untracked stack
+ * pointers" section of the GC info even if lvTracked==true
+ */
+
+ /* Has this argument been fully enregistered? */
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifndef LEGACY_BACKEND
+ if (!varDsc->lvOnFrame)
+#else // LEGACY_BACKEND
+ if (varDsc->lvRegister)
+#endif // LEGACY_BACKEND
+ {
+ /* if a CEE_JMP has been used, then we need to report all the arguments
+ even if they are enregistered, since we will be using this value
+ in JMP call. Note that this is subtle as we require that
+ argument offsets are always fixed up properly even if lvRegister
+ is set */
+ if (!compiler->compJmpOpUsed)
+ {
+ continue;
+ }
+ }
+ else
+ {
+ if (!varDsc->lvOnFrame)
+ {
+ /* If this non-enregistered pointer arg is never
+ * used, we don't need to report it
+ */
+ assert(varDsc->lvRefCnt == 0);
+ continue;
+ }
+ else if (varDsc->lvIsRegArg && varDsc->lvTracked)
+ {
+ /* If this register-passed arg is tracked, then
+ * it has been allocated space near the other
+ * pointer variables and we have accurate life-
+ * time info. It will be reported with
+ * gcVarPtrList in the "tracked-pointer" section
+ */
+
+ continue;
+ }
+ }
+ }
+
+ if (compiler->lvaIsOriginalThisArg(varNum) && compiler->lvaKeepAliveAndReportThis())
+ {
+ // Encoding of untracked variables does not support reporting
+ // "this". So report it as a tracked variable with a liveness
+ // extending over the entire method.
+
+ thisKeptAliveIsInUntracked = true;
+ continue;
+ }
+
+#ifdef DEBUG
+ if (compiler->verbose)
+ {
+ int offs = varDsc->lvStkOffs;
+
+ printf("GCINFO: untrckd %s lcl at [%s", varTypeGCstring(varDsc->TypeGet()),
+ compiler->genEmitter->emitGetFrameReg());
+
+ if (offs < 0)
+ {
+ printf("-%02XH", -offs);
+ }
+ else if (offs > 0)
+ {
+ printf("+%02XH", +offs);
+ }
+
+ printf("]\n");
+ }
+#endif
+
+ count++;
+ }
+ else if (varDsc->lvType == TYP_STRUCT && varDsc->lvOnFrame && (varDsc->lvExactSize >= TARGET_POINTER_SIZE))
+ {
+ unsigned slots = compiler->lvaLclSize(varNum) / sizeof(void*);
+ BYTE* gcPtrs = compiler->lvaGetGcLayout(varNum);
+
+ // walk each member of the array
+ for (unsigned i = 0; i < slots; i++)
+ {
+ if (gcPtrs[i] != TYPE_GC_NONE)
+ { // count only gc slots
+ count++;
+ }
+ }
+ }
+ }
+
+ /* Also count spill temps that hold pointers */
+
+ assert(compiler->tmpAllFree());
+ for (TempDsc* tempThis = compiler->tmpListBeg(); tempThis != nullptr; tempThis = compiler->tmpListNxt(tempThis))
+ {
+ if (varTypeIsGC(tempThis->tdTempType()) == false)
+ {
+ continue;
+ }
+
+#ifdef DEBUG
+ if (compiler->verbose)
+ {
+ int offs = tempThis->tdTempOffs();
+
+ printf("GCINFO: untrck %s Temp at [%s", varTypeGCstring(varDsc->TypeGet()),
+ compiler->genEmitter->emitGetFrameReg());
+
+ if (offs < 0)
+ {
+ printf("-%02XH", -offs);
+ }
+ else if (offs > 0)
+ {
+ printf("+%02XH", +offs);
+ }
+
+ printf("]\n");
+ }
+#endif
+
+ count++;
+ }
+
+#ifdef DEBUG
+ if (compiler->verbose)
+ {
+ printf("GCINFO: untrckVars = %u\n", count);
+ }
+#endif
+
+ *untrackedCount = count;
+
+ /* Count the number of entries in the table of non-register pointer
+ variable lifetimes. */
+
+ count = 0;
+
+ if (thisKeptAliveIsInUntracked)
+ {
+ count++;
+ }
+
+ if (gcVarPtrList)
+ {
+ /* We'll use a delta encoding for the lifetime offsets */
+
+ for (varTmp = gcVarPtrList; varTmp; varTmp = varTmp->vpdNext)
+ {
+ /* Special case: skip any 0-length lifetimes */
+
+ if (varTmp->vpdBegOfs == varTmp->vpdEndOfs)
+ {
+ continue;
+ }
+
+ count++;
+ }
+ }
+
+#ifdef DEBUG
+ if (compiler->verbose)
+ {
+ printf("GCINFO: trackdLcls = %u\n", count);
+ }
+#endif
+
+ *varPtrTableSize = count;
+}
+
+#ifdef JIT32_GCENCODER
+/*****************************************************************************
+ *
+ * Shutdown the 'pointer value' register tracking logic and save the necessary
+ * info (which will be used at runtime to locate all pointers) at the specified
+ * address. The number of bytes written to 'destPtr' must be identical to that
+ * returned from gcPtrTableSize().
+ */
+
+BYTE* GCInfo::gcPtrTableSave(BYTE* destPtr, const InfoHdr& header, unsigned codeSize, size_t* pArgTabOffset)
+{
+ /* Write the tables to the info block */
+
+ return destPtr + gcMakeRegPtrTable(destPtr, -1, header, codeSize, pArgTabOffset);
+}
+#endif
+
+/*****************************************************************************
+ *
+ * Initialize the 'pointer value' register/argument tracking logic.
+ */
+
+void GCInfo::gcRegPtrSetInit()
+{
+ gcRegGCrefSetCur = gcRegByrefSetCur = 0;
+
+ if (compiler->genFullPtrRegMap)
+ {
+ gcRegPtrList = gcRegPtrLast = nullptr;
+ }
+ else
+ {
+ /* Initialize the 'call descriptor' list */
+ gcCallDescList = gcCallDescLast = nullptr;
+ }
+}
+
+#ifdef JIT32_GCENCODER
+
+/*****************************************************************************
+ *
+ * Helper passed to genEmitter.emitGenEpilogLst() to generate
+ * the table of epilogs.
+ */
+
+/* static */ size_t GCInfo::gcRecordEpilog(void* pCallBackData, unsigned offset)
+{
+ GCInfo* gcInfo = (GCInfo*)pCallBackData;
+
+ assert(gcInfo);
+
+ size_t result = encodeUDelta(gcInfo->gcEpilogTable, offset, gcInfo->gcEpilogPrevOffset);
+
+ if (gcInfo->gcEpilogTable)
+ gcInfo->gcEpilogTable += result;
+
+ gcInfo->gcEpilogPrevOffset = offset;
+
+ return result;
+}
+
+#endif // JIT32_GCENCODER
+
+GCInfo::WriteBarrierForm GCInfo::gcWriteBarrierFormFromTargetAddress(GenTreePtr tgtAddr)
+{
+ GCInfo::WriteBarrierForm result = GCInfo::WBF_BarrierUnknown; // Default case, we have no information.
+
+ // If we store through an int to a GC_REF field, we'll assume that needs to use a checked barriers.
+ if (tgtAddr->TypeGet() == TYP_I_IMPL)
+ {
+ return GCInfo::WBF_BarrierChecked; // Why isn't this GCInfo::WBF_BarrierUnknown?
+ }
+
+ // Otherwise...
+ assert(tgtAddr->TypeGet() == TYP_BYREF);
+ bool simplifiedExpr = true;
+ while (simplifiedExpr)
+ {
+ simplifiedExpr = false;
+
+ tgtAddr = tgtAddr->gtSkipReloadOrCopy();
+
+ while (tgtAddr->OperGet() == GT_ADDR && tgtAddr->gtOp.gtOp1->OperGet() == GT_IND)
+ {
+ tgtAddr = tgtAddr->gtOp.gtOp1->gtOp.gtOp1;
+ simplifiedExpr = true;
+ assert(tgtAddr->TypeGet() == TYP_BYREF);
+ }
+ // For additions, one of the operands is a byref or a ref (and the other is not). Follow this down to its
+ // source.
+ while (tgtAddr->OperGet() == GT_ADD || tgtAddr->OperGet() == GT_LEA)
+ {
+ if (tgtAddr->OperGet() == GT_ADD)
+ {
+ if (tgtAddr->gtOp.gtOp1->TypeGet() == TYP_BYREF || tgtAddr->gtOp.gtOp1->TypeGet() == TYP_REF)
+ {
+ assert(!(tgtAddr->gtOp.gtOp2->TypeGet() == TYP_BYREF || tgtAddr->gtOp.gtOp2->TypeGet() == TYP_REF));
+ tgtAddr = tgtAddr->gtOp.gtOp1;
+ simplifiedExpr = true;
+ }
+ else if (tgtAddr->gtOp.gtOp2->TypeGet() == TYP_BYREF || tgtAddr->gtOp.gtOp2->TypeGet() == TYP_REF)
+ {
+ tgtAddr = tgtAddr->gtOp.gtOp2;
+ simplifiedExpr = true;
+ }
+ else
+ {
+ // We might have a native int. For example:
+ // const int 0
+ // + byref
+ // lclVar int V06 loc5 // this is a local declared "valuetype VType*"
+ return GCInfo::WBF_BarrierUnknown;
+ }
+ }
+ else
+ {
+ // Must be an LEA (i.e., an AddrMode)
+ assert(tgtAddr->OperGet() == GT_LEA);
+ tgtAddr = tgtAddr->AsAddrMode()->Base();
+ if (tgtAddr->TypeGet() == TYP_BYREF || tgtAddr->TypeGet() == TYP_REF)
+ {
+ simplifiedExpr = true;
+ }
+ else
+ {
+ // We might have a native int.
+ return GCInfo::WBF_BarrierUnknown;
+ }
+ }
+ }
+ }
+ if (tgtAddr->IsLocalAddrExpr() != nullptr)
+ {
+ // No need for a GC barrier when writing to a local variable.
+ return GCInfo::WBF_NoBarrier;
+ }
+ if (tgtAddr->OperGet() == GT_LCL_VAR || tgtAddr->OperGet() == GT_REG_VAR)
+ {
+ unsigned lclNum = 0;
+ if (tgtAddr->gtOper == GT_LCL_VAR)
+ {
+ lclNum = tgtAddr->gtLclVar.gtLclNum;
+ }
+ else
+ {
+ assert(tgtAddr->gtOper == GT_REG_VAR);
+ lclNum = tgtAddr->gtRegVar.gtLclNum;
+ }
+
+ LclVarDsc* varDsc = &compiler->lvaTable[lclNum];
+
+ // Instead of marking LclVar with 'lvStackByref',
+ // Consider decomposing the Value Number given to this LclVar to see if it was
+ // created using a GT_ADDR(GT_LCLVAR) or a GT_ADD( GT_ADDR(GT_LCLVAR), Constant)
+
+ // We may have an internal compiler temp created in fgMorphCopyBlock() that we know
+ // points at one of our stack local variables, it will have lvStackByref set to true.
+ //
+ if (varDsc->lvStackByref)
+ {
+ assert(varDsc->TypeGet() == TYP_BYREF);
+ return GCInfo::WBF_NoBarrier;
+ }
+
+ // We don't eliminate for inlined methods, where we (can) know where the "retBuff" points.
+ if (!compiler->compIsForInlining() && lclNum == compiler->info.compRetBuffArg)
+ {
+ assert(compiler->info.compRetType == TYP_STRUCT); // Else shouldn't have a ret buff.
+
+ // Are we assured that the ret buff pointer points into the stack of a caller?
+ if (compiler->info.compRetBuffDefStack)
+ {
+#if 0
+ // This is an optional debugging mode. If the #if 0 above is changed to #if 1,
+ // every barrier we remove for stores to GC ref fields of a retbuff use a special
+ // helper that asserts that the target is not in the heap.
+#ifdef DEBUG
+ return WBF_NoBarrier_CheckNotHeapInDebug;
+#else
+ return WBF_NoBarrier;
+#endif
+#else // 0
+ return GCInfo::WBF_NoBarrier;
+#endif // 0
+ }
+ }
+ }
+ if (tgtAddr->TypeGet() == TYP_REF)
+ {
+ return GCInfo::WBF_BarrierUnchecked;
+ }
+ // Otherwise, we have no information.
+ return GCInfo::WBF_BarrierUnknown;
+}
+
+#ifndef LEGACY_BACKEND
+//------------------------------------------------------------------------
+// gcUpdateForRegVarMove: Update the masks when a variable is moved
+//
+// Arguments:
+// srcMask - The register mask for the register(s) from which it is being moved
+// dstMask - The register mask for the register(s) to which it is being moved
+// type - The type of the variable
+//
+// Return Value:
+// None
+//
+// Notes:
+// This is called during codegen when a var is moved due to an LSRA_ASG.
+// It is also called by LinearScan::recordVarLocationAtStartOfBB() which is in turn called by
+// CodeGen::genCodeForBBList() at the block boundary.
+
+void GCInfo::gcUpdateForRegVarMove(regMaskTP srcMask, regMaskTP dstMask, LclVarDsc* varDsc)
+{
+ var_types type = varDsc->TypeGet();
+ bool isGCRef = (type == TYP_REF);
+ bool isByRef = (type == TYP_BYREF);
+
+ if (srcMask != RBM_NONE)
+ {
+ regSet->RemoveMaskVars(srcMask);
+ if (isGCRef)
+ {
+ assert((gcRegByrefSetCur & srcMask) == 0);
+ gcRegGCrefSetCur &= ~srcMask;
+ gcRegGCrefSetCur |= dstMask; // safe if no dst, i.e. RBM_NONE
+ }
+ else if (isByRef)
+ {
+ assert((gcRegGCrefSetCur & srcMask) == 0);
+ gcRegByrefSetCur &= ~srcMask;
+ gcRegByrefSetCur |= dstMask; // safe if no dst, i.e. RBM_NONE
+ }
+ }
+ else if (isGCRef || isByRef)
+ {
+ // In this case, we are moving it from the stack to a register,
+ // so remove it from the set of live stack gc refs
+ VarSetOps::RemoveElemD(compiler, gcVarPtrSetCur, varDsc->lvVarIndex);
+ }
+ if (dstMask != RBM_NONE)
+ {
+ regSet->AddMaskVars(dstMask);
+ // If the source is a reg, then the gc sets have been set appropriately
+ // Otherwise, we have to determine whether to set them
+ if (srcMask == RBM_NONE)
+ {
+ if (isGCRef)
+ {
+ gcRegGCrefSetCur |= dstMask;
+ }
+ else if (isByRef)
+ {
+ gcRegByrefSetCur |= dstMask;
+ }
+ }
+ }
+ else if (isGCRef || isByRef)
+ {
+ VarSetOps::AddElemD(compiler, gcVarPtrSetCur, varDsc->lvVarIndex);
+ }
+}
+#endif // !LEGACY_BACKEND
+
+/*****************************************************************************/
+/*****************************************************************************/
diff --git a/src/jit/gentree.cpp b/src/jit/gentree.cpp
new file mode 100644
index 0000000000..67474e11ec
--- /dev/null
+++ b/src/jit/gentree.cpp
@@ -0,0 +1,16748 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX GenTree XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#include "jitpch.h"
+#include "simd.h"
+
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+/*****************************************************************************/
+
+const unsigned short GenTree::gtOperKindTable[] = {
+#define GTNODE(en, sn, cm, ok) ok + GTK_COMMUTE *cm,
+#include "gtlist.h"
+};
+
+/*****************************************************************************/
+// static
+genTreeOps GenTree::OpAsgToOper(genTreeOps op)
+{
+ // Precondition.
+ assert(OperIsAssignment(op) && op != GT_ASG);
+ switch (op)
+ {
+ case GT_ASG_ADD:
+ return GT_ADD;
+ case GT_ASG_SUB:
+ return GT_SUB;
+ case GT_ASG_MUL:
+ return GT_MUL;
+ case GT_ASG_DIV:
+ return GT_DIV;
+ case GT_ASG_MOD:
+ return GT_MOD;
+
+ case GT_ASG_UDIV:
+ return GT_UDIV;
+ case GT_ASG_UMOD:
+ return GT_UMOD;
+
+ case GT_ASG_OR:
+ return GT_OR;
+ case GT_ASG_XOR:
+ return GT_XOR;
+ case GT_ASG_AND:
+ return GT_AND;
+ case GT_ASG_LSH:
+ return GT_LSH;
+ case GT_ASG_RSH:
+ return GT_RSH;
+ case GT_ASG_RSZ:
+ return GT_RSZ;
+
+ case GT_CHS:
+ return GT_NEG;
+
+ default:
+ unreached(); // Precondition implies we don't get here.
+ }
+}
+
+/*****************************************************************************
+ *
+ * The types of different GenTree nodes
+ */
+
+#ifdef DEBUG
+
+#define INDENT_SIZE 3
+
+//--------------------------------------------
+//
+// IndentStack: This struct is used, along with its related enums and strings,
+// to control both the indendtation and the printing of arcs.
+//
+// Notes:
+// The mode of printing is set in the Constructor, using its 'compiler' argument.
+// Currently it only prints arcs when fgOrder == fgOrderLinear.
+// The type of arc to print is specified by the IndentInfo enum, and is controlled
+// by the caller of the Push() method.
+
+enum IndentChars
+{
+ ICVertical,
+ ICBottom,
+ ICTop,
+ ICMiddle,
+ ICDash,
+ ICEmbedded,
+ ICTerminal,
+ ICError,
+ IndentCharCount
+};
+
+// clang-format off
+// Sets of strings for different dumping options vert bot top mid dash embedded terminal error
+static const char* emptyIndents[IndentCharCount] = { " ", " ", " ", " ", " ", "{", "", "?" };
+static const char* asciiIndents[IndentCharCount] = { "|", "\\", "/", "+", "-", "{", "*", "?" };
+static const char* unicodeIndents[IndentCharCount] = { "\xe2\x94\x82", "\xe2\x94\x94", "\xe2\x94\x8c", "\xe2\x94\x9c", "\xe2\x94\x80", "{", "\xe2\x96\x8c", "?" };
+// clang-format on
+
+typedef ArrayStack<Compiler::IndentInfo> IndentInfoStack;
+struct IndentStack
+{
+ IndentInfoStack stack;
+ const char** indents;
+
+ // Constructor for IndentStack. Uses 'compiler' to determine the mode of printing.
+ IndentStack(Compiler* compiler) : stack(compiler)
+ {
+ if (compiler->asciiTrees)
+ {
+ indents = asciiIndents;
+ }
+ else
+ {
+ indents = unicodeIndents;
+ }
+ }
+
+ // Return the depth of the current indentation.
+ unsigned Depth()
+ {
+ return stack.Height();
+ }
+
+ // Push a new indentation onto the stack, of the given type.
+ void Push(Compiler::IndentInfo info)
+ {
+ stack.Push(info);
+ }
+
+ // Pop the most recent indentation type off the stack.
+ Compiler::IndentInfo Pop()
+ {
+ return stack.Pop();
+ }
+
+ // Print the current indentation and arcs.
+ void print()
+ {
+ unsigned indentCount = Depth();
+ for (unsigned i = 0; i < indentCount; i++)
+ {
+ unsigned index = indentCount - 1 - i;
+ switch (stack.Index(index))
+ {
+ case Compiler::IndentInfo::IINone:
+ printf(" ");
+ break;
+ case Compiler::IndentInfo::IIEmbedded:
+ printf("%s ", indents[ICEmbedded]);
+ break;
+ case Compiler::IndentInfo::IIArc:
+ if (index == 0)
+ {
+ printf("%s%s%s", indents[ICMiddle], indents[ICDash], indents[ICDash]);
+ }
+ else
+ {
+ printf("%s ", indents[ICVertical]);
+ }
+ break;
+ case Compiler::IndentInfo::IIArcBottom:
+ printf("%s%s%s", indents[ICBottom], indents[ICDash], indents[ICDash]);
+ break;
+ case Compiler::IndentInfo::IIArcTop:
+ printf("%s%s%s", indents[ICTop], indents[ICDash], indents[ICDash]);
+ break;
+ case Compiler::IndentInfo::IIError:
+ printf("%s%s%s", indents[ICError], indents[ICDash], indents[ICDash]);
+ break;
+ default:
+ unreached();
+ }
+ }
+ printf("%s", indents[ICTerminal]);
+ }
+};
+
+//------------------------------------------------------------------------
+// printIndent: This is a static method which simply invokes the 'print'
+// method on its 'indentStack' argument.
+//
+// Arguments:
+// indentStack - specifies the information for the indentation & arcs to be printed
+//
+// Notes:
+// This method exists to localize the checking for the case where indentStack is null.
+
+static void printIndent(IndentStack* indentStack)
+{
+ if (indentStack == nullptr)
+ {
+ return;
+ }
+ indentStack->print();
+}
+
+static const char* nodeNames[] = {
+#define GTNODE(en, sn, cm, ok) sn,
+#include "gtlist.h"
+};
+
+const char* GenTree::NodeName(genTreeOps op)
+{
+ assert((unsigned)op < sizeof(nodeNames) / sizeof(nodeNames[0]));
+
+ return nodeNames[op];
+}
+
+static const char* opNames[] = {
+#define GTNODE(en, sn, cm, ok) #en,
+#include "gtlist.h"
+};
+
+const char* GenTree::OpName(genTreeOps op)
+{
+ assert((unsigned)op < sizeof(opNames) / sizeof(opNames[0]));
+
+ return opNames[op];
+}
+
+#endif
+
+/*****************************************************************************
+ *
+ * When 'SMALL_TREE_NODES' is enabled, we allocate tree nodes in 2 different
+ * sizes: 'GTF_DEBUG_NODE_SMALL' for most nodes and 'GTF_DEBUG_NODE_LARGE' for
+ * the few nodes (such as calls and statement list nodes) that have more fields
+ * and take up a lot more space.
+ */
+
+#if SMALL_TREE_NODES
+
+/* GT_COUNT'th oper is overloaded as 'undefined oper', so allocate storage for GT_COUNT'th oper also */
+/* static */
+unsigned char GenTree::s_gtNodeSizes[GT_COUNT + 1];
+
+/* static */
+void GenTree::InitNodeSize()
+{
+ /* 'GT_LCL_VAR' often gets changed to 'GT_REG_VAR' */
+
+ assert(GenTree::s_gtNodeSizes[GT_LCL_VAR] >= GenTree::s_gtNodeSizes[GT_REG_VAR]);
+
+ /* Set all sizes to 'small' first */
+
+ for (unsigned op = 0; op <= GT_COUNT; op++)
+ {
+ GenTree::s_gtNodeSizes[op] = TREE_NODE_SZ_SMALL;
+ }
+
+ // Now set all of the appropriate entries to 'large'
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if defined(FEATURE_HFA) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ // On ARM32, ARM64 and System V for struct returning
+ // there is code that does GT_ASG-tree.CopyObj call.
+ // CopyObj is a large node and the GT_ASG is small, which triggers an exception.
+ GenTree::s_gtNodeSizes[GT_ASG] = TREE_NODE_SZ_LARGE;
+ GenTree::s_gtNodeSizes[GT_RETURN] = TREE_NODE_SZ_LARGE;
+#endif // defined(FEATURE_HFA) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+
+ GenTree::s_gtNodeSizes[GT_CALL] = TREE_NODE_SZ_LARGE;
+ GenTree::s_gtNodeSizes[GT_CAST] = TREE_NODE_SZ_LARGE;
+ GenTree::s_gtNodeSizes[GT_FTN_ADDR] = TREE_NODE_SZ_LARGE;
+ GenTree::s_gtNodeSizes[GT_BOX] = TREE_NODE_SZ_LARGE;
+ GenTree::s_gtNodeSizes[GT_INDEX] = TREE_NODE_SZ_LARGE;
+ GenTree::s_gtNodeSizes[GT_ARR_BOUNDS_CHECK] = TREE_NODE_SZ_LARGE;
+#ifdef FEATURE_SIMD
+ GenTree::s_gtNodeSizes[GT_SIMD_CHK] = TREE_NODE_SZ_LARGE;
+#endif // FEATURE_SIMD
+ GenTree::s_gtNodeSizes[GT_ARR_ELEM] = TREE_NODE_SZ_LARGE;
+ GenTree::s_gtNodeSizes[GT_ARR_INDEX] = TREE_NODE_SZ_LARGE;
+ GenTree::s_gtNodeSizes[GT_ARR_OFFSET] = TREE_NODE_SZ_LARGE;
+ GenTree::s_gtNodeSizes[GT_RET_EXPR] = TREE_NODE_SZ_LARGE;
+ GenTree::s_gtNodeSizes[GT_OBJ] = TREE_NODE_SZ_LARGE;
+ GenTree::s_gtNodeSizes[GT_FIELD] = TREE_NODE_SZ_LARGE;
+ GenTree::s_gtNodeSizes[GT_STMT] = TREE_NODE_SZ_LARGE;
+ GenTree::s_gtNodeSizes[GT_CMPXCHG] = TREE_NODE_SZ_LARGE;
+ GenTree::s_gtNodeSizes[GT_QMARK] = TREE_NODE_SZ_LARGE;
+ GenTree::s_gtNodeSizes[GT_LEA] = TREE_NODE_SZ_LARGE;
+ GenTree::s_gtNodeSizes[GT_STORE_OBJ] = TREE_NODE_SZ_LARGE;
+ GenTree::s_gtNodeSizes[GT_DYN_BLK] = TREE_NODE_SZ_LARGE;
+ GenTree::s_gtNodeSizes[GT_STORE_DYN_BLK] = TREE_NODE_SZ_LARGE;
+ GenTree::s_gtNodeSizes[GT_INTRINSIC] = TREE_NODE_SZ_LARGE;
+ GenTree::s_gtNodeSizes[GT_ALLOCOBJ] = TREE_NODE_SZ_LARGE;
+#if USE_HELPERS_FOR_INT_DIV
+ GenTree::s_gtNodeSizes[GT_DIV] = TREE_NODE_SZ_LARGE;
+ GenTree::s_gtNodeSizes[GT_UDIV] = TREE_NODE_SZ_LARGE;
+ GenTree::s_gtNodeSizes[GT_MOD] = TREE_NODE_SZ_LARGE;
+ GenTree::s_gtNodeSizes[GT_UMOD] = TREE_NODE_SZ_LARGE;
+#endif
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ GenTree::s_gtNodeSizes[GT_PUTARG_STK] = TREE_NODE_SZ_LARGE;
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+ assert(GenTree::s_gtNodeSizes[GT_RETURN] == GenTree::s_gtNodeSizes[GT_ASG]);
+
+ // This list of assertions should come to contain all GenTree subtypes that are declared
+ // "small".
+ assert(sizeof(GenTreeLclFld) <= GenTree::s_gtNodeSizes[GT_LCL_FLD]);
+ assert(sizeof(GenTreeLclVar) <= GenTree::s_gtNodeSizes[GT_LCL_VAR]);
+
+ static_assert_no_msg(sizeof(GenTree) <= TREE_NODE_SZ_SMALL);
+ static_assert_no_msg(sizeof(GenTreeUnOp) <= TREE_NODE_SZ_SMALL);
+ static_assert_no_msg(sizeof(GenTreeOp) <= TREE_NODE_SZ_SMALL);
+ static_assert_no_msg(sizeof(GenTreeVal) <= TREE_NODE_SZ_SMALL);
+ static_assert_no_msg(sizeof(GenTreeIntConCommon) <= TREE_NODE_SZ_SMALL);
+ static_assert_no_msg(sizeof(GenTreePhysReg) <= TREE_NODE_SZ_SMALL);
+#ifndef LEGACY_BACKEND
+ static_assert_no_msg(sizeof(GenTreeJumpTable) <= TREE_NODE_SZ_SMALL);
+#endif // !LEGACY_BACKEND
+ static_assert_no_msg(sizeof(GenTreeIntCon) <= TREE_NODE_SZ_SMALL);
+ static_assert_no_msg(sizeof(GenTreeLngCon) <= TREE_NODE_SZ_SMALL);
+ static_assert_no_msg(sizeof(GenTreeDblCon) <= TREE_NODE_SZ_SMALL);
+ static_assert_no_msg(sizeof(GenTreeStrCon) <= TREE_NODE_SZ_SMALL);
+ static_assert_no_msg(sizeof(GenTreeLclVarCommon) <= TREE_NODE_SZ_SMALL);
+ static_assert_no_msg(sizeof(GenTreeLclVar) <= TREE_NODE_SZ_SMALL);
+ static_assert_no_msg(sizeof(GenTreeLclFld) <= TREE_NODE_SZ_SMALL);
+ static_assert_no_msg(sizeof(GenTreeRegVar) <= TREE_NODE_SZ_SMALL);
+ static_assert_no_msg(sizeof(GenTreeCast) <= TREE_NODE_SZ_LARGE); // *** large node
+ static_assert_no_msg(sizeof(GenTreeBox) <= TREE_NODE_SZ_LARGE); // *** large node
+ static_assert_no_msg(sizeof(GenTreeField) <= TREE_NODE_SZ_LARGE); // *** large node
+ static_assert_no_msg(sizeof(GenTreeArgList) <= TREE_NODE_SZ_SMALL);
+ static_assert_no_msg(sizeof(GenTreeColon) <= TREE_NODE_SZ_SMALL);
+ static_assert_no_msg(sizeof(GenTreeCall) <= TREE_NODE_SZ_LARGE); // *** large node
+ static_assert_no_msg(sizeof(GenTreeCmpXchg) <= TREE_NODE_SZ_LARGE); // *** large node
+ static_assert_no_msg(sizeof(GenTreeFptrVal) <= TREE_NODE_SZ_LARGE); // *** large node
+ static_assert_no_msg(sizeof(GenTreeQmark) <= TREE_NODE_SZ_LARGE); // *** large node
+ static_assert_no_msg(sizeof(GenTreeIntrinsic) <= TREE_NODE_SZ_LARGE); // *** large node
+ static_assert_no_msg(sizeof(GenTreeIndex) <= TREE_NODE_SZ_LARGE); // *** large node
+ static_assert_no_msg(sizeof(GenTreeArrLen) <= TREE_NODE_SZ_LARGE); // *** large node
+ static_assert_no_msg(sizeof(GenTreeBoundsChk) <= TREE_NODE_SZ_LARGE); // *** large node
+ static_assert_no_msg(sizeof(GenTreeArrElem) <= TREE_NODE_SZ_LARGE); // *** large node
+ static_assert_no_msg(sizeof(GenTreeArrIndex) <= TREE_NODE_SZ_LARGE); // *** large node
+ static_assert_no_msg(sizeof(GenTreeArrOffs) <= TREE_NODE_SZ_LARGE); // *** large node
+ static_assert_no_msg(sizeof(GenTreeIndir) <= TREE_NODE_SZ_SMALL);
+ static_assert_no_msg(sizeof(GenTreeStoreInd) <= TREE_NODE_SZ_SMALL);
+ static_assert_no_msg(sizeof(GenTreeAddrMode) <= TREE_NODE_SZ_SMALL);
+ static_assert_no_msg(sizeof(GenTreeObj) <= TREE_NODE_SZ_LARGE); // *** large node
+ static_assert_no_msg(sizeof(GenTreeBlk) <= TREE_NODE_SZ_SMALL);
+ static_assert_no_msg(sizeof(GenTreeRetExpr) <= TREE_NODE_SZ_LARGE); // *** large node
+ static_assert_no_msg(sizeof(GenTreeStmt) <= TREE_NODE_SZ_LARGE); // *** large node
+ static_assert_no_msg(sizeof(GenTreeClsVar) <= TREE_NODE_SZ_SMALL);
+ static_assert_no_msg(sizeof(GenTreeArgPlace) <= TREE_NODE_SZ_SMALL);
+ static_assert_no_msg(sizeof(GenTreeLabel) <= TREE_NODE_SZ_SMALL);
+ static_assert_no_msg(sizeof(GenTreePhiArg) <= TREE_NODE_SZ_SMALL);
+ static_assert_no_msg(sizeof(GenTreeAllocObj) <= TREE_NODE_SZ_LARGE); // *** large node
+#ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ static_assert_no_msg(sizeof(GenTreePutArgStk) <= TREE_NODE_SZ_SMALL);
+#else // FEATURE_UNIX_AMD64_STRUCT_PASSING
+ static_assert_no_msg(sizeof(GenTreePutArgStk) <= TREE_NODE_SZ_LARGE);
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+#ifdef FEATURE_SIMD
+ static_assert_no_msg(sizeof(GenTreeSIMD) <= TREE_NODE_SZ_SMALL);
+#endif // FEATURE_SIMD
+}
+
+size_t GenTree::GetNodeSize() const
+{
+ return GenTree::s_gtNodeSizes[gtOper];
+}
+
+#ifdef DEBUG
+bool GenTree::IsNodeProperlySized() const
+{
+ size_t size;
+
+ if (gtDebugFlags & GTF_DEBUG_NODE_SMALL)
+ {
+ size = TREE_NODE_SZ_SMALL;
+ }
+ else
+ {
+ assert(gtDebugFlags & GTF_DEBUG_NODE_LARGE);
+ size = TREE_NODE_SZ_LARGE;
+ }
+
+ return GenTree::s_gtNodeSizes[gtOper] <= size;
+}
+#endif
+
+#else // SMALL_TREE_NODES
+
+#ifdef DEBUG
+bool GenTree::IsNodeProperlySized() const
+{
+ return true;
+}
+#endif
+
+#endif // SMALL_TREE_NODES
+
+/*****************************************************************************/
+
+// make sure these get instantiated, because it's not in a header file
+// (emulating the c++ 'export' keyword here)
+// VC appears to be somewhat unpredictable about whether they end up in the .obj file without this
+template Compiler::fgWalkResult Compiler::fgWalkTreePostRec<true>(GenTreePtr* pTree, fgWalkData* fgWalkData);
+template Compiler::fgWalkResult Compiler::fgWalkTreePostRec<false>(GenTreePtr* pTree, fgWalkData* fgWalkData);
+template Compiler::fgWalkResult Compiler::fgWalkTreePreRec<true>(GenTreePtr* pTree, fgWalkData* fgWalkData);
+template Compiler::fgWalkResult Compiler::fgWalkTreePreRec<false>(GenTreePtr* pTree, fgWalkData* fgWalkData);
+template Compiler::fgWalkResult Compiler::fgWalkTreeRec<true, true>(GenTreePtr* pTree, fgWalkData* fgWalkData);
+template Compiler::fgWalkResult Compiler::fgWalkTreeRec<false, false>(GenTreePtr* pTree, fgWalkData* fgWalkData);
+template Compiler::fgWalkResult Compiler::fgWalkTreeRec<true, false>(GenTreePtr* pTree, fgWalkData* fgWalkData);
+template Compiler::fgWalkResult Compiler::fgWalkTreeRec<false, true>(GenTreePtr* pTree, fgWalkData* fgWalkData);
+
+//******************************************************************************
+// fgWalkTreePreRec - Helper function for fgWalkTreePre.
+// walk tree in pre order, executing callback on every node.
+// Template parameter 'computeStack' specifies whether to maintain
+// a stack of ancestor nodes which can be viewed in the callback.
+//
+template <bool computeStack>
+// static
+Compiler::fgWalkResult Compiler::fgWalkTreePreRec(GenTreePtr* pTree, fgWalkData* fgWalkData)
+{
+ fgWalkResult result = WALK_CONTINUE;
+ GenTreePtr currentParent = fgWalkData->parent;
+
+ genTreeOps oper;
+ unsigned kind;
+
+ do
+ {
+ GenTreePtr tree = *pTree;
+ assert(tree);
+ assert(tree->gtOper != GT_STMT);
+ GenTreeArgList* args; // For call node arg lists.
+
+ if (computeStack)
+ {
+ fgWalkData->parentStack->Push(tree);
+ }
+
+ /* Visit this node */
+
+ // if we are not in the mode where we only do the callback for local var nodes,
+ // visit the node unconditionally. Otherwise we will visit it under leaf handling.
+ if (!fgWalkData->wtprLclsOnly)
+ {
+ assert(tree == *pTree);
+ result = fgWalkData->wtprVisitorFn(pTree, fgWalkData);
+ if (result != WALK_CONTINUE)
+ {
+ break;
+ }
+ }
+
+ /* Figure out what kind of a node we have */
+
+ oper = tree->OperGet();
+ kind = tree->OperKind();
+
+ /* Is this a constant or leaf node? */
+
+ if (kind & (GTK_CONST | GTK_LEAF))
+ {
+ if (fgWalkData->wtprLclsOnly && (oper == GT_LCL_VAR || oper == GT_LCL_FLD))
+ {
+ result = fgWalkData->wtprVisitorFn(pTree, fgWalkData);
+ }
+ break;
+ }
+ else if (fgWalkData->wtprLclsOnly && GenTree::OperIsLocalStore(oper))
+ {
+ result = fgWalkData->wtprVisitorFn(pTree, fgWalkData);
+ if (result != WALK_CONTINUE)
+ {
+ break;
+ }
+ }
+
+ fgWalkData->parent = tree;
+
+ /* Is it a 'simple' unary/binary operator? */
+
+ if (kind & GTK_SMPOP)
+ {
+ if (tree->gtGetOp2())
+ {
+ if (tree->gtOp.gtOp1 != nullptr)
+ {
+ result = fgWalkTreePreRec<computeStack>(&tree->gtOp.gtOp1, fgWalkData);
+ if (result == WALK_ABORT)
+ {
+ return result;
+ }
+ }
+ else
+ {
+ assert(tree->NullOp1Legal());
+ }
+
+ pTree = &tree->gtOp.gtOp2;
+ continue;
+ }
+ else
+ {
+ pTree = &tree->gtOp.gtOp1;
+ if (*pTree)
+ {
+ continue;
+ }
+
+ break;
+ }
+ }
+
+ /* See what kind of a special operator we have here */
+
+ switch (oper)
+ {
+ case GT_FIELD:
+ pTree = &tree->gtField.gtFldObj;
+ break;
+
+ case GT_CALL:
+
+ assert(tree->gtFlags & GTF_CALL);
+
+ /* Is this a call to unmanaged code ? */
+ if (fgWalkData->wtprLclsOnly && (tree->gtFlags & GTF_CALL_UNMANAGED))
+ {
+ result = fgWalkData->wtprVisitorFn(pTree, fgWalkData);
+ if (result == WALK_ABORT)
+ {
+ return result;
+ }
+ }
+
+ if (tree->gtCall.gtCallObjp)
+ {
+ result = fgWalkTreePreRec<computeStack>(&tree->gtCall.gtCallObjp, fgWalkData);
+ if (result == WALK_ABORT)
+ {
+ return result;
+ }
+ }
+
+ for (args = tree->gtCall.gtCallArgs; args; args = args->Rest())
+ {
+ result = fgWalkTreePreRec<computeStack>(args->pCurrent(), fgWalkData);
+ if (result == WALK_ABORT)
+ {
+ return result;
+ }
+ }
+
+ for (args = tree->gtCall.gtCallLateArgs; args; args = args->Rest())
+ {
+ result = fgWalkTreePreRec<computeStack>(args->pCurrent(), fgWalkData);
+ if (result == WALK_ABORT)
+ {
+ return result;
+ }
+ }
+
+ if (tree->gtCall.gtControlExpr)
+ {
+ result = fgWalkTreePreRec<computeStack>(&tree->gtCall.gtControlExpr, fgWalkData);
+ if (result == WALK_ABORT)
+ {
+ return result;
+ }
+ }
+
+ if (tree->gtCall.gtCallType == CT_INDIRECT)
+ {
+ if (tree->gtCall.gtCallCookie)
+ {
+ result = fgWalkTreePreRec<computeStack>(&tree->gtCall.gtCallCookie, fgWalkData);
+ if (result == WALK_ABORT)
+ {
+ return result;
+ }
+ }
+ pTree = &tree->gtCall.gtCallAddr;
+ }
+ else
+ {
+ pTree = nullptr;
+ }
+
+ break;
+
+ case GT_ARR_ELEM:
+
+ result = fgWalkTreePreRec<computeStack>(&tree->gtArrElem.gtArrObj, fgWalkData);
+ if (result == WALK_ABORT)
+ {
+ return result;
+ }
+
+ unsigned dim;
+ for (dim = 0; dim < tree->gtArrElem.gtArrRank; dim++)
+ {
+ result = fgWalkTreePreRec<computeStack>(&tree->gtArrElem.gtArrInds[dim], fgWalkData);
+ if (result == WALK_ABORT)
+ {
+ return result;
+ }
+ }
+ pTree = nullptr;
+ break;
+
+ case GT_ARR_OFFSET:
+ result = fgWalkTreePreRec<computeStack>(&tree->gtArrOffs.gtOffset, fgWalkData);
+ if (result == WALK_ABORT)
+ {
+ return result;
+ }
+ result = fgWalkTreePreRec<computeStack>(&tree->gtArrOffs.gtIndex, fgWalkData);
+ if (result == WALK_ABORT)
+ {
+ return result;
+ }
+ result = fgWalkTreePreRec<computeStack>(&tree->gtArrOffs.gtArrObj, fgWalkData);
+ if (result == WALK_ABORT)
+ {
+ return result;
+ }
+ pTree = nullptr;
+ break;
+
+ case GT_CMPXCHG:
+ result = fgWalkTreePreRec<computeStack>(&tree->gtCmpXchg.gtOpLocation, fgWalkData);
+ if (result == WALK_ABORT)
+ {
+ return result;
+ }
+ result = fgWalkTreePreRec<computeStack>(&tree->gtCmpXchg.gtOpValue, fgWalkData);
+ if (result == WALK_ABORT)
+ {
+ return result;
+ }
+ result = fgWalkTreePreRec<computeStack>(&tree->gtCmpXchg.gtOpComparand, fgWalkData);
+ if (result == WALK_ABORT)
+ {
+ return result;
+ }
+ pTree = nullptr;
+ break;
+
+ case GT_ARR_BOUNDS_CHECK:
+#ifdef FEATURE_SIMD
+ case GT_SIMD_CHK:
+#endif // FEATURE_SIMD
+ result = fgWalkTreePreRec<computeStack>(&tree->gtBoundsChk.gtArrLen, fgWalkData);
+ if (result == WALK_ABORT)
+ {
+ return result;
+ }
+ result = fgWalkTreePreRec<computeStack>(&tree->gtBoundsChk.gtIndex, fgWalkData);
+ if (result == WALK_ABORT)
+ {
+ return result;
+ }
+ pTree = nullptr;
+ break;
+
+ case GT_STORE_DYN_BLK:
+ result = fgWalkTreePreRec<computeStack>(&tree->gtDynBlk.Data(), fgWalkData);
+ if (result == WALK_ABORT)
+ {
+ return result;
+ }
+ __fallthrough;
+
+ case GT_DYN_BLK:
+ result = fgWalkTreePreRec<computeStack>(&tree->gtDynBlk.Addr(), fgWalkData);
+ if (result == WALK_ABORT)
+ {
+ return result;
+ }
+ result = fgWalkTreePreRec<computeStack>(&tree->gtDynBlk.gtDynamicSize, fgWalkData);
+ if (result == WALK_ABORT)
+ {
+ return result;
+ }
+ pTree = nullptr;
+ break;
+
+ default:
+#ifdef DEBUG
+ fgWalkData->compiler->gtDispTree(tree);
+#endif
+ assert(!"unexpected operator");
+ }
+ } while (pTree != nullptr && *pTree != nullptr);
+
+ if (computeStack)
+ {
+ fgWalkData->parentStack->Pop();
+ }
+
+ if (result != WALK_ABORT)
+ {
+ //
+ // Restore fgWalkData->parent
+ //
+ fgWalkData->parent = currentParent;
+ }
+ return result;
+}
+
+/*****************************************************************************
+ *
+ * Walk all basic blocks and call the given function pointer for all tree
+ * nodes contained therein.
+ */
+
+void Compiler::fgWalkAllTreesPre(fgWalkPreFn* visitor, void* pCallBackData)
+{
+ BasicBlock* block;
+
+ for (block = fgFirstBB; block; block = block->bbNext)
+ {
+ GenTreePtr tree;
+
+ for (tree = block->bbTreeList; tree; tree = tree->gtNext)
+ {
+ assert(tree->gtOper == GT_STMT);
+
+ fgWalkTreePre(&tree->gtStmt.gtStmtExpr, visitor, pCallBackData);
+ }
+ }
+}
+
+//******************************************************************************
+// fgWalkTreePostRec - Helper function for fgWalkTreePost.
+// Walk tree in post order, executing callback on every node
+// template parameter 'computeStack' specifies whether to maintain
+// a stack of ancestor nodes which can be viewed in the callback.
+//
+template <bool computeStack>
+// static
+Compiler::fgWalkResult Compiler::fgWalkTreePostRec(GenTreePtr* pTree, fgWalkData* fgWalkData)
+{
+ fgWalkResult result;
+ GenTreePtr currentParent = fgWalkData->parent;
+
+ genTreeOps oper;
+ unsigned kind;
+
+ GenTree* tree = *pTree;
+ assert(tree);
+ assert(tree->gtOper != GT_STMT);
+ GenTreeArgList* args;
+
+ /* Figure out what kind of a node we have */
+
+ oper = tree->OperGet();
+ kind = tree->OperKind();
+
+ if (computeStack)
+ {
+ fgWalkData->parentStack->Push(tree);
+ }
+
+ /* Is this a constant or leaf node? */
+
+ if (kind & (GTK_CONST | GTK_LEAF))
+ {
+ goto DONE;
+ }
+
+ /* Is it a 'simple' unary/binary operator? */
+
+ fgWalkData->parent = tree;
+
+ /* See what kind of a special operator we have here */
+
+ switch (oper)
+ {
+ case GT_FIELD:
+ if (tree->gtField.gtFldObj)
+ {
+ result = fgWalkTreePostRec<computeStack>(&tree->gtField.gtFldObj, fgWalkData);
+ if (result == WALK_ABORT)
+ {
+ return result;
+ }
+ }
+
+ break;
+
+ case GT_CALL:
+
+ assert(tree->gtFlags & GTF_CALL);
+
+ if (tree->gtCall.gtCallObjp)
+ {
+ result = fgWalkTreePostRec<computeStack>(&tree->gtCall.gtCallObjp, fgWalkData);
+ if (result == WALK_ABORT)
+ {
+ return result;
+ }
+ }
+
+ for (args = tree->gtCall.gtCallArgs; args; args = args->Rest())
+ {
+ result = fgWalkTreePostRec<computeStack>(args->pCurrent(), fgWalkData);
+ if (result == WALK_ABORT)
+ {
+ return result;
+ }
+ }
+
+ for (args = tree->gtCall.gtCallLateArgs; args; args = args->Rest())
+ {
+ result = fgWalkTreePostRec<computeStack>(args->pCurrent(), fgWalkData);
+ if (result == WALK_ABORT)
+ {
+ return result;
+ }
+ }
+ if (tree->gtCall.gtCallType == CT_INDIRECT)
+ {
+ if (tree->gtCall.gtCallCookie)
+ {
+ result = fgWalkTreePostRec<computeStack>(&tree->gtCall.gtCallCookie, fgWalkData);
+ if (result == WALK_ABORT)
+ {
+ return result;
+ }
+ }
+ result = fgWalkTreePostRec<computeStack>(&tree->gtCall.gtCallAddr, fgWalkData);
+ if (result == WALK_ABORT)
+ {
+ return result;
+ }
+ }
+
+ if (tree->gtCall.gtControlExpr != nullptr)
+ {
+ result = fgWalkTreePostRec<computeStack>(&tree->gtCall.gtControlExpr, fgWalkData);
+ if (result == WALK_ABORT)
+ {
+ return result;
+ }
+ }
+ break;
+
+ case GT_ARR_ELEM:
+
+ result = fgWalkTreePostRec<computeStack>(&tree->gtArrElem.gtArrObj, fgWalkData);
+ if (result == WALK_ABORT)
+ {
+ return result;
+ }
+
+ unsigned dim;
+ for (dim = 0; dim < tree->gtArrElem.gtArrRank; dim++)
+ {
+ result = fgWalkTreePostRec<computeStack>(&tree->gtArrElem.gtArrInds[dim], fgWalkData);
+ if (result == WALK_ABORT)
+ {
+ return result;
+ }
+ }
+ break;
+
+ case GT_ARR_OFFSET:
+ result = fgWalkTreePostRec<computeStack>(&tree->gtArrOffs.gtOffset, fgWalkData);
+ if (result == WALK_ABORT)
+ {
+ return result;
+ }
+ result = fgWalkTreePostRec<computeStack>(&tree->gtArrOffs.gtIndex, fgWalkData);
+ if (result == WALK_ABORT)
+ {
+ return result;
+ }
+ result = fgWalkTreePostRec<computeStack>(&tree->gtArrOffs.gtArrObj, fgWalkData);
+ if (result == WALK_ABORT)
+ {
+ return result;
+ }
+ break;
+
+ case GT_CMPXCHG:
+ result = fgWalkTreePostRec<computeStack>(&tree->gtCmpXchg.gtOpComparand, fgWalkData);
+ if (result == WALK_ABORT)
+ {
+ return result;
+ }
+ result = fgWalkTreePostRec<computeStack>(&tree->gtCmpXchg.gtOpValue, fgWalkData);
+ if (result == WALK_ABORT)
+ {
+ return result;
+ }
+ result = fgWalkTreePostRec<computeStack>(&tree->gtCmpXchg.gtOpLocation, fgWalkData);
+ if (result == WALK_ABORT)
+ {
+ return result;
+ }
+ break;
+
+ case GT_ARR_BOUNDS_CHECK:
+#ifdef FEATURE_SIMD
+ case GT_SIMD_CHK:
+#endif // FEATURE_SIMD
+ result = fgWalkTreePostRec<computeStack>(&tree->gtBoundsChk.gtArrLen, fgWalkData);
+ if (result == WALK_ABORT)
+ {
+ return result;
+ }
+ result = fgWalkTreePostRec<computeStack>(&tree->gtBoundsChk.gtIndex, fgWalkData);
+ if (result == WALK_ABORT)
+ {
+ return result;
+ }
+ break;
+
+ case GT_STORE_DYN_BLK:
+ result = fgWalkTreePostRec<computeStack>(&tree->gtDynBlk.Data(), fgWalkData);
+ if (result == WALK_ABORT)
+ {
+ return result;
+ }
+ __fallthrough;
+
+ case GT_DYN_BLK:
+ result = fgWalkTreePostRec<computeStack>(&tree->gtDynBlk.Addr(), fgWalkData);
+ if (result == WALK_ABORT)
+ {
+ return result;
+ }
+ result = fgWalkTreePostRec<computeStack>(&tree->gtDynBlk.gtDynamicSize, fgWalkData);
+ if (result == WALK_ABORT)
+ {
+ return result;
+ }
+ break;
+
+ case GT_PHI:
+ {
+ GenTreeUnOp* phi = tree->AsUnOp();
+ if (phi->gtOp1 != nullptr)
+ {
+ for (GenTreeArgList* args = phi->gtOp1->AsArgList(); args != nullptr; args = args->Rest())
+ {
+ result = fgWalkTreePostRec<computeStack>(&args->gtOp1, fgWalkData);
+ if (result == WALK_ABORT)
+ {
+ return result;
+ }
+ }
+ }
+ }
+ break;
+
+ case GT_LIST:
+ {
+ GenTreeArgList* list = tree->AsArgList();
+ if (list->IsAggregate())
+ {
+ for (; list != nullptr; list = list->Rest())
+ {
+ result = fgWalkTreePostRec<computeStack>(&list->gtOp1, fgWalkData);
+ if (result == WALK_ABORT)
+ {
+ return result;
+ }
+ }
+ break;
+ }
+
+ // GT_LIST nodes that do not represent aggregate arguments intentionally fall through to the
+ // default node processing below.
+ __fallthrough;
+ }
+
+ default:
+ if (kind & GTK_SMPOP)
+ {
+ GenTree** op1Slot = &tree->gtOp.gtOp1;
+
+ GenTree** op2Slot;
+ if (tree->OperIsBinary())
+ {
+ if ((tree->gtFlags & GTF_REVERSE_OPS) == 0)
+ {
+ op2Slot = &tree->gtOp.gtOp2;
+ }
+ else
+ {
+ op2Slot = op1Slot;
+ op1Slot = &tree->gtOp.gtOp2;
+ }
+ }
+ else
+ {
+ op2Slot = nullptr;
+ }
+
+ if (*op1Slot != nullptr)
+ {
+ result = fgWalkTreePostRec<computeStack>(op1Slot, fgWalkData);
+ if (result == WALK_ABORT)
+ {
+ return result;
+ }
+ }
+
+ if (op2Slot != nullptr && *op2Slot != nullptr)
+ {
+ result = fgWalkTreePostRec<computeStack>(op2Slot, fgWalkData);
+ if (result == WALK_ABORT)
+ {
+ return result;
+ }
+ }
+ }
+#ifdef DEBUG
+ else
+ {
+ fgWalkData->compiler->gtDispTree(tree);
+ assert(!"unexpected operator");
+ }
+#endif
+ break;
+ }
+
+DONE:
+
+ fgWalkData->parent = currentParent;
+
+ /* Finally, visit the current node */
+ result = fgWalkData->wtpoVisitorFn(pTree, fgWalkData);
+
+ if (computeStack)
+ {
+ fgWalkData->parentStack->Pop();
+ }
+
+ return result;
+}
+
+// ****************************************************************************
+// walk tree doing callbacks in both pre- and post- order (both optional)
+
+template <bool doPreOrder, bool doPostOrder>
+// static
+Compiler::fgWalkResult Compiler::fgWalkTreeRec(GenTreePtr* pTree, fgWalkData* fgWalkData)
+{
+ fgWalkResult result = WALK_CONTINUE;
+
+ genTreeOps oper;
+ unsigned kind;
+
+ GenTree* tree = *pTree;
+ assert(tree);
+ assert(tree->gtOper != GT_STMT);
+ GenTreeArgList* args;
+
+ /* Figure out what kind of a node we have */
+
+ oper = tree->OperGet();
+ kind = tree->OperKind();
+
+ fgWalkData->parentStack->Push(tree);
+
+ if (doPreOrder)
+ {
+ result = fgWalkData->wtprVisitorFn(pTree, fgWalkData);
+ if (result == WALK_ABORT)
+ {
+ return result;
+ }
+ else
+ {
+ tree = *pTree;
+ oper = tree->OperGet();
+ kind = tree->OperKind();
+ }
+ }
+
+ // If we're skipping subtrees, we're done.
+ if (result == WALK_SKIP_SUBTREES)
+ {
+ goto DONE;
+ }
+
+ /* Is this a constant or leaf node? */
+
+ if ((kind & (GTK_CONST | GTK_LEAF)) != 0)
+ {
+ goto DONE;
+ }
+
+ /* Is it a 'simple' unary/binary operator? */
+
+ if (kind & GTK_SMPOP)
+ {
+ if (tree->gtOp.gtOp1)
+ {
+ result = fgWalkTreeRec<doPreOrder, doPostOrder>(&tree->gtOp.gtOp1, fgWalkData);
+ if (result == WALK_ABORT)
+ {
+ return result;
+ }
+ }
+
+ if (tree->gtGetOp2())
+ {
+ result = fgWalkTreeRec<doPreOrder, doPostOrder>(&tree->gtOp.gtOp2, fgWalkData);
+ if (result == WALK_ABORT)
+ {
+ return result;
+ }
+ }
+
+ goto DONE;
+ }
+
+ /* See what kind of a special operator we have here */
+
+ switch (oper)
+ {
+ case GT_FIELD:
+ if (tree->gtField.gtFldObj)
+ {
+ result = fgWalkTreeRec<doPreOrder, doPostOrder>(&tree->gtField.gtFldObj, fgWalkData);
+ if (result == WALK_ABORT)
+ {
+ return result;
+ }
+ }
+
+ break;
+
+ case GT_CALL:
+
+ assert(tree->gtFlags & GTF_CALL);
+
+ if (tree->gtCall.gtCallObjp)
+ {
+ result = fgWalkTreeRec<doPreOrder, doPostOrder>(&tree->gtCall.gtCallObjp, fgWalkData);
+ if (result == WALK_ABORT)
+ {
+ return result;
+ }
+ }
+
+ for (args = tree->gtCall.gtCallArgs; args; args = args->Rest())
+ {
+ result = fgWalkTreeRec<doPreOrder, doPostOrder>(args->pCurrent(), fgWalkData);
+ if (result == WALK_ABORT)
+ {
+ return result;
+ }
+ }
+
+ for (args = tree->gtCall.gtCallLateArgs; args; args = args->Rest())
+ {
+ result = fgWalkTreeRec<doPreOrder, doPostOrder>(args->pCurrent(), fgWalkData);
+ if (result == WALK_ABORT)
+ {
+ return result;
+ }
+ }
+ if (tree->gtCall.gtCallType == CT_INDIRECT)
+ {
+ if (tree->gtCall.gtCallCookie)
+ {
+ result = fgWalkTreeRec<doPreOrder, doPostOrder>(&tree->gtCall.gtCallCookie, fgWalkData);
+ if (result == WALK_ABORT)
+ {
+ return result;
+ }
+ }
+ result = fgWalkTreeRec<doPreOrder, doPostOrder>(&tree->gtCall.gtCallAddr, fgWalkData);
+ if (result == WALK_ABORT)
+ {
+ return result;
+ }
+ }
+
+ if (tree->gtCall.gtControlExpr)
+ {
+ result = fgWalkTreeRec<doPreOrder, doPostOrder>(&tree->gtCall.gtControlExpr, fgWalkData);
+ if (result == WALK_ABORT)
+ {
+ return result;
+ }
+ }
+
+ break;
+
+ case GT_ARR_ELEM:
+
+ result = fgWalkTreeRec<doPreOrder, doPostOrder>(&tree->gtArrElem.gtArrObj, fgWalkData);
+ if (result == WALK_ABORT)
+ {
+ return result;
+ }
+
+ unsigned dim;
+ for (dim = 0; dim < tree->gtArrElem.gtArrRank; dim++)
+ {
+ result = fgWalkTreeRec<doPreOrder, doPostOrder>(&tree->gtArrElem.gtArrInds[dim], fgWalkData);
+ if (result == WALK_ABORT)
+ {
+ return result;
+ }
+ }
+ break;
+
+ case GT_ARR_OFFSET:
+ result = fgWalkTreeRec<doPreOrder, doPostOrder>(&tree->gtArrOffs.gtOffset, fgWalkData);
+ if (result == WALK_ABORT)
+ {
+ return result;
+ }
+ result = fgWalkTreeRec<doPreOrder, doPostOrder>(&tree->gtArrOffs.gtIndex, fgWalkData);
+ if (result == WALK_ABORT)
+ {
+ return result;
+ }
+ result = fgWalkTreeRec<doPreOrder, doPostOrder>(&tree->gtArrOffs.gtArrObj, fgWalkData);
+ if (result == WALK_ABORT)
+ {
+ return result;
+ }
+ break;
+
+ case GT_CMPXCHG:
+ result = fgWalkTreeRec<doPreOrder, doPostOrder>(&tree->gtCmpXchg.gtOpComparand, fgWalkData);
+ if (result == WALK_ABORT)
+ {
+ return result;
+ }
+ result = fgWalkTreeRec<doPreOrder, doPostOrder>(&tree->gtCmpXchg.gtOpValue, fgWalkData);
+ if (result == WALK_ABORT)
+ {
+ return result;
+ }
+ result = fgWalkTreeRec<doPreOrder, doPostOrder>(&tree->gtCmpXchg.gtOpLocation, fgWalkData);
+ if (result == WALK_ABORT)
+ {
+ return result;
+ }
+ break;
+
+ case GT_ARR_BOUNDS_CHECK:
+#ifdef FEATURE_SIMD
+ case GT_SIMD_CHK:
+#endif // FEATURE_SIMD
+ result = fgWalkTreeRec<doPreOrder, doPostOrder>(&tree->gtBoundsChk.gtArrLen, fgWalkData);
+ if (result == WALK_ABORT)
+ {
+ return result;
+ }
+ result = fgWalkTreeRec<doPreOrder, doPostOrder>(&tree->gtBoundsChk.gtIndex, fgWalkData);
+ if (result == WALK_ABORT)
+ {
+ return result;
+ }
+ break;
+
+ case GT_STORE_DYN_BLK:
+ result = fgWalkTreeRec<doPreOrder, doPostOrder>(&tree->gtDynBlk.Data(), fgWalkData);
+ if (result == WALK_ABORT)
+ {
+ return result;
+ }
+ __fallthrough;
+
+ case GT_DYN_BLK:
+ result = fgWalkTreeRec<doPreOrder, doPostOrder>(&tree->gtDynBlk.Addr(), fgWalkData);
+ if (result == WALK_ABORT)
+ {
+ return result;
+ }
+ result = fgWalkTreeRec<doPreOrder, doPostOrder>(&tree->gtDynBlk.gtDynamicSize, fgWalkData);
+ if (result == WALK_ABORT)
+ {
+ return result;
+ }
+ break;
+
+ default:
+#ifdef DEBUG
+ fgWalkData->compiler->gtDispTree(tree);
+#endif
+ assert(!"unexpected operator");
+ }
+
+DONE:
+
+ /* Finally, visit the current node */
+ if (doPostOrder)
+ {
+ result = fgWalkData->wtpoVisitorFn(pTree, fgWalkData);
+ }
+
+ fgWalkData->parentStack->Pop();
+
+ return result;
+}
+
+/*****************************************************************************
+ *
+ * Call the given function pointer for all nodes in the tree. The 'visitor'
+ * fn should return one of the following values:
+ *
+ * WALK_ABORT stop walking and return immediately
+ * WALK_CONTINUE continue walking
+ * WALK_SKIP_SUBTREES don't walk any subtrees of the node just visited
+ */
+
+Compiler::fgWalkResult Compiler::fgWalkTree(GenTreePtr* pTree,
+ fgWalkPreFn* preVisitor,
+ fgWalkPreFn* postVisitor,
+ void* callBackData)
+
+{
+ fgWalkData walkData;
+
+ walkData.compiler = this;
+ walkData.wtprVisitorFn = preVisitor;
+ walkData.wtpoVisitorFn = postVisitor;
+ walkData.pCallbackData = callBackData;
+ walkData.parent = nullptr;
+ walkData.wtprLclsOnly = false;
+#ifdef DEBUG
+ walkData.printModified = false;
+#endif
+ ArrayStack<GenTree*> parentStack(this);
+ walkData.parentStack = &parentStack;
+
+ fgWalkResult result;
+
+ assert(preVisitor || postVisitor);
+
+ if (preVisitor && postVisitor)
+ {
+ result = fgWalkTreeRec<true, true>(pTree, &walkData);
+ }
+ else if (preVisitor)
+ {
+ result = fgWalkTreeRec<true, false>(pTree, &walkData);
+ }
+ else
+ {
+ result = fgWalkTreeRec<false, true>(pTree, &walkData);
+ }
+
+#ifdef DEBUG
+ if (verbose && walkData.printModified)
+ {
+ gtDispTree(*pTree);
+ }
+#endif
+
+ return result;
+}
+
+// ------------------------------------------------------------------------------------------
+// gtClearReg: Sets the register to the "no register assignment" value, depending upon
+// the type of the node, and whether it fits any of the special cases for register pairs
+// or multi-reg call nodes.
+//
+// Arguments:
+// compiler - compiler instance
+//
+// Return Value:
+// None
+void GenTree::gtClearReg(Compiler* compiler)
+{
+#if CPU_LONG_USES_REGPAIR
+ if (isRegPairType(TypeGet()) ||
+ // (IsLocal() && isRegPairType(compiler->lvaTable[gtLclVarCommon.gtLclNum].TypeGet())) ||
+ (OperGet() == GT_MUL && (gtFlags & GTF_MUL_64RSLT)))
+ {
+ gtRegPair = REG_PAIR_NONE;
+ }
+ else
+#endif // CPU_LONG_USES_REGPAIR
+ {
+ gtRegNum = REG_NA;
+ }
+
+ // Also clear multi-reg state if this is a call node
+ if (IsCall())
+ {
+ this->AsCall()->ClearOtherRegs();
+ }
+ else if (IsCopyOrReload())
+ {
+ this->AsCopyOrReload()->ClearOtherRegs();
+ }
+}
+
+//-----------------------------------------------------------
+// CopyReg: Copy the _gtRegNum/_gtRegPair/gtRegTag fields.
+//
+// Arguments:
+// from - GenTree node from which to copy
+//
+// Return Value:
+// None
+void GenTree::CopyReg(GenTreePtr from)
+{
+ // To do the copy, use _gtRegPair, which must be bigger than _gtRegNum. Note that the values
+ // might be undefined (so gtRegTag == GT_REGTAG_NONE).
+ _gtRegPair = from->_gtRegPair;
+ C_ASSERT(sizeof(_gtRegPair) >= sizeof(_gtRegNum));
+ INDEBUG(gtRegTag = from->gtRegTag;)
+
+ // Also copy multi-reg state if this is a call node
+ if (IsCall())
+ {
+ assert(from->IsCall());
+ this->AsCall()->CopyOtherRegs(from->AsCall());
+ }
+ else if (IsCopyOrReload())
+ {
+ this->AsCopyOrReload()->CopyOtherRegs(from->AsCopyOrReload());
+ }
+}
+
+//------------------------------------------------------------------
+// gtHasReg: Whether node beeen assigned a register by LSRA
+//
+// Arguments:
+// None
+//
+// Return Value:
+// Returns true if the node was assigned a register.
+//
+// In case of multi-reg call nodes, it is considered
+// having a reg if regs are allocated for all its
+// return values.
+//
+// In case of GT_COPY or GT_RELOAD of a multi-reg call,
+// GT_COPY/GT_RELOAD is considered having a reg if it
+// has a reg assigned to any of its positions.
+//
+// Assumption:
+// In order for this to work properly, gtClearReg must be called
+// prior to setting the register value.
+//
+bool GenTree::gtHasReg() const
+{
+ bool hasReg;
+
+#if CPU_LONG_USES_REGPAIR
+ if (isRegPairType(TypeGet()))
+ {
+ assert(_gtRegNum != REG_NA);
+ INDEBUG(assert(gtRegTag == GT_REGTAG_REGPAIR));
+ hasReg = (gtRegPair != REG_PAIR_NONE);
+ }
+ else
+#endif
+ {
+ assert(_gtRegNum != REG_PAIR_NONE);
+ INDEBUG(assert(gtRegTag == GT_REGTAG_REG));
+
+ if (IsMultiRegCall())
+ {
+ // Has to cast away const-ness because GetReturnTypeDesc() is a non-const method
+ GenTree* tree = const_cast<GenTree*>(this);
+ GenTreeCall* call = tree->AsCall();
+ unsigned regCount = call->GetReturnTypeDesc()->GetReturnRegCount();
+ hasReg = false;
+
+ // A Multi-reg call node is said to have regs, if it has
+ // reg assigned to each of its result registers.
+ for (unsigned i = 0; i < regCount; ++i)
+ {
+ hasReg = (call->GetRegNumByIdx(i) != REG_NA);
+ if (!hasReg)
+ {
+ break;
+ }
+ }
+ }
+ else if (IsCopyOrReloadOfMultiRegCall())
+ {
+ GenTree* tree = const_cast<GenTree*>(this);
+ GenTreeCopyOrReload* copyOrReload = tree->AsCopyOrReload();
+ GenTreeCall* call = copyOrReload->gtGetOp1()->AsCall();
+ unsigned regCount = call->GetReturnTypeDesc()->GetReturnRegCount();
+ hasReg = false;
+
+ // A Multi-reg copy or reload node is said to have regs,
+ // if it has valid regs in any of the positions.
+ for (unsigned i = 0; i < regCount; ++i)
+ {
+ hasReg = (copyOrReload->GetRegNumByIdx(i) != REG_NA);
+ if (hasReg)
+ {
+ break;
+ }
+ }
+ }
+ else
+ {
+ hasReg = (gtRegNum != REG_NA);
+ }
+ }
+
+ return hasReg;
+}
+
+//---------------------------------------------------------------
+// gtGetRegMask: Get the reg mask of the node.
+//
+// Arguments:
+// None
+//
+// Return Value:
+// Reg Mask of GenTree node.
+//
+regMaskTP GenTree::gtGetRegMask() const
+{
+ regMaskTP resultMask;
+
+#if CPU_LONG_USES_REGPAIR
+ if (isRegPairType(TypeGet()))
+ {
+ resultMask = genRegPairMask(gtRegPair);
+ }
+ else
+#endif
+ {
+ if (IsMultiRegCall())
+ {
+ // temporarily cast away const-ness as AsCall() method is not declared const
+ resultMask = genRegMask(gtRegNum);
+ GenTree* temp = const_cast<GenTree*>(this);
+ resultMask |= temp->AsCall()->GetOtherRegMask();
+ }
+ else if (IsCopyOrReloadOfMultiRegCall())
+ {
+ // A multi-reg copy or reload, will have valid regs for only those
+ // positions that need to be copied or reloaded. Hence we need
+ // to consider only those registers for computing reg mask.
+
+ GenTree* tree = const_cast<GenTree*>(this);
+ GenTreeCopyOrReload* copyOrReload = tree->AsCopyOrReload();
+ GenTreeCall* call = copyOrReload->gtGetOp1()->AsCall();
+ unsigned regCount = call->GetReturnTypeDesc()->GetReturnRegCount();
+
+ resultMask = RBM_NONE;
+ for (unsigned i = 0; i < regCount; ++i)
+ {
+ regNumber reg = copyOrReload->GetRegNumByIdx(i);
+ if (reg != REG_NA)
+ {
+ resultMask |= genRegMask(reg);
+ }
+ }
+ }
+ else
+ {
+ resultMask = genRegMask(gtRegNum);
+ }
+ }
+
+ return resultMask;
+}
+
+//---------------------------------------------------------------
+// GetOtherRegMask: Get the reg mask of gtOtherRegs of call node
+//
+// Arguments:
+// None
+//
+// Return Value:
+// Reg mask of gtOtherRegs of call node.
+//
+regMaskTP GenTreeCall::GetOtherRegMask() const
+{
+ regMaskTP resultMask = RBM_NONE;
+
+#if FEATURE_MULTIREG_RET
+ for (unsigned i = 0; i < MAX_RET_REG_COUNT - 1; ++i)
+ {
+ if (gtOtherRegs[i] != REG_NA)
+ {
+ resultMask |= genRegMask(gtOtherRegs[i]);
+ continue;
+ }
+ break;
+ }
+#endif
+
+ return resultMask;
+}
+
+//-------------------------------------------------------------------------
+// IsPure:
+// Returns true if this call is pure. For now, this uses the same
+// definition of "pure" that is that used by HelperCallProperties: a
+// pure call does not read or write any aliased (e.g. heap) memory or
+// have other global side effects (e.g. class constructors, finalizers),
+// but is allowed to throw an exception.
+//
+// NOTE: this call currently only returns true if the call target is a
+// helper method that is known to be pure. No other analysis is
+// performed.
+//
+// Arguments:
+// Copiler - the compiler context.
+//
+// Returns:
+// True if the call is pure; false otherwise.
+//
+bool GenTreeCall::IsPure(Compiler* compiler) const
+{
+ return (gtCallType == CT_HELPER) &&
+ compiler->s_helperCallProperties.IsPure(compiler->eeGetHelperNum(gtCallMethHnd));
+}
+
+#ifndef LEGACY_BACKEND
+
+//-------------------------------------------------------------------------
+// HasNonStandardAddedArgs: Return true if the method has non-standard args added to the call
+// argument list during argument morphing (fgMorphArgs), e.g., passed in R10 or R11 on AMD64.
+// See also GetNonStandardAddedArgCount().
+//
+// Arguments:
+// compiler - the compiler instance
+//
+// Return Value:
+// true if there are any such args, false otherwise.
+//
+bool GenTreeCall::HasNonStandardAddedArgs(Compiler* compiler) const
+{
+ return GetNonStandardAddedArgCount(compiler) != 0;
+}
+
+//-------------------------------------------------------------------------
+// GetNonStandardAddedArgCount: Get the count of non-standard arguments that have been added
+// during call argument morphing (fgMorphArgs). Do not count non-standard args that are already
+// counted in the argument list prior to morphing.
+//
+// This function is used to help map the caller and callee arguments during tail call setup.
+//
+// Arguments:
+// compiler - the compiler instance
+//
+// Return Value:
+// The count of args, as described.
+//
+// Notes:
+// It would be more general to have fgMorphArgs set a bit on the call node when such
+// args are added to a call, and a bit on each such arg, and then have this code loop
+// over the call args when the special call bit is set, counting the args with the special
+// arg bit. This seems pretty heavyweight, though. Instead, this logic needs to be kept
+// in sync with fgMorphArgs.
+//
+int GenTreeCall::GetNonStandardAddedArgCount(Compiler* compiler) const
+{
+ if (IsUnmanaged() && !compiler->opts.ShouldUsePInvokeHelpers())
+ {
+ // R11 = PInvoke cookie param
+ return 1;
+ }
+ else if (gtCallType == CT_INDIRECT)
+ {
+ if (IsVirtualStub())
+ {
+ // R11 = Virtual stub param
+ return 1;
+ }
+ else if (gtCallCookie != nullptr)
+ {
+ // R10 = PInvoke target param
+ // R11 = PInvoke cookie param
+ return 2;
+ }
+ }
+ return 0;
+}
+
+#endif // !LEGACY_BACKEND
+
+//-------------------------------------------------------------------------
+// TreatAsHasRetBufArg:
+//
+// Arguments:
+// compiler, the compiler instance so that we can call eeGetHelperNum
+//
+// Return Value:
+// Returns true if we treat the call as if it has a retBuf argument
+// This method may actually have a retBuf argument
+// or it could be a JIT helper that we are still transforming during
+// the importer phase.
+//
+// Notes:
+// On ARM64 marking the method with the GTF_CALL_M_RETBUFFARG flag
+// will make HasRetBufArg() return true, but will also force the
+// use of register x8 to pass the RetBuf argument.
+//
+// These two Jit Helpers that we handle here by returning true
+// aren't actually defined to return a struct, so they don't expect
+// their RetBuf to be passed in x8, instead they expect it in x0.
+//
+bool GenTreeCall::TreatAsHasRetBufArg(Compiler* compiler) const
+{
+ if (HasRetBufArg())
+ {
+ return true;
+ }
+ else
+ {
+ // If we see a Jit helper call that returns a TYP_STRUCT we will
+ // transform it as if it has a Return Buffer Argument
+ //
+ if (IsHelperCall() && (gtReturnType == TYP_STRUCT))
+ {
+ // There are two possible helper calls that use this path:
+ // CORINFO_HELP_GETFIELDSTRUCT and CORINFO_HELP_UNBOX_NULLABLE
+ //
+ CorInfoHelpFunc helpFunc = compiler->eeGetHelperNum(gtCallMethHnd);
+
+ if (helpFunc == CORINFO_HELP_GETFIELDSTRUCT)
+ {
+ return true;
+ }
+ else if (helpFunc == CORINFO_HELP_UNBOX_NULLABLE)
+ {
+ return true;
+ }
+ else
+ {
+ assert(!"Unexpected JIT helper in TreatAsHasRetBufArg");
+ }
+ }
+ }
+ return false;
+}
+
+//-------------------------------------------------------------------------
+// IsHelperCall: Determine if this GT_CALL node is a specific helper call.
+//
+// Arguments:
+// compiler - the compiler instance so that we can call eeFindHelper
+//
+// Return Value:
+// Returns true if this GT_CALL node is a call to the specified helper.
+//
+bool GenTreeCall::IsHelperCall(Compiler* compiler, unsigned helper) const
+{
+ return IsHelperCall(compiler->eeFindHelper(helper));
+}
+
+/*****************************************************************************
+ *
+ * Returns non-zero if the two trees are identical.
+ */
+
+bool GenTree::Compare(GenTreePtr op1, GenTreePtr op2, bool swapOK)
+{
+ genTreeOps oper;
+ unsigned kind;
+
+// printf("tree1:\n"); gtDispTree(op1);
+// printf("tree2:\n"); gtDispTree(op2);
+
+AGAIN:
+
+ if (op1 == nullptr)
+ {
+ return (op2 == nullptr);
+ }
+ if (op2 == nullptr)
+ {
+ return false;
+ }
+ if (op1 == op2)
+ {
+ return true;
+ }
+
+ assert(op1->gtOper != GT_STMT);
+ assert(op2->gtOper != GT_STMT);
+
+ oper = op1->OperGet();
+
+ /* The operators must be equal */
+
+ if (oper != op2->gtOper)
+ {
+ return false;
+ }
+
+ /* The types must be equal */
+
+ if (op1->gtType != op2->gtType)
+ {
+ return false;
+ }
+
+ /* Overflow must be equal */
+ if (op1->gtOverflowEx() != op2->gtOverflowEx())
+ {
+ return false;
+ }
+
+ /* Sensible flags must be equal */
+ if ((op1->gtFlags & (GTF_UNSIGNED)) != (op2->gtFlags & (GTF_UNSIGNED)))
+ {
+ return false;
+ }
+
+ /* Figure out what kind of nodes we're comparing */
+
+ kind = op1->OperKind();
+
+ /* Is this a constant node? */
+
+ if (kind & GTK_CONST)
+ {
+ switch (oper)
+ {
+ case GT_CNS_INT:
+ if (op1->gtIntCon.gtIconVal == op2->gtIntCon.gtIconVal)
+ {
+ return true;
+ }
+ break;
+#if 0
+ // TODO-CQ: Enable this in the future
+ case GT_CNS_LNG:
+ if (op1->gtLngCon.gtLconVal == op2->gtLngCon.gtLconVal)
+ return true;
+ break;
+
+ case GT_CNS_DBL:
+ if (op1->gtDblCon.gtDconVal == op2->gtDblCon.gtDconVal)
+ return true;
+ break;
+#endif
+ default:
+ break;
+ }
+
+ return false;
+ }
+
+ /* Is this a leaf node? */
+
+ if (kind & GTK_LEAF)
+ {
+ switch (oper)
+ {
+ case GT_LCL_VAR:
+ if (op1->gtLclVarCommon.gtLclNum != op2->gtLclVarCommon.gtLclNum)
+ {
+ break;
+ }
+
+ return true;
+
+ case GT_LCL_FLD:
+ if (op1->gtLclFld.gtLclNum != op2->gtLclFld.gtLclNum ||
+ op1->gtLclFld.gtLclOffs != op2->gtLclFld.gtLclOffs)
+ {
+ break;
+ }
+
+ return true;
+
+ case GT_CLS_VAR:
+ if (op1->gtClsVar.gtClsVarHnd != op2->gtClsVar.gtClsVarHnd)
+ {
+ break;
+ }
+
+ return true;
+
+ case GT_LABEL:
+ return true;
+
+ case GT_ARGPLACE:
+ if ((op1->gtType == TYP_STRUCT) &&
+ (op1->gtArgPlace.gtArgPlaceClsHnd != op2->gtArgPlace.gtArgPlaceClsHnd))
+ {
+ break;
+ }
+ return true;
+
+ default:
+ break;
+ }
+
+ return false;
+ }
+
+ /* Is it a 'simple' unary/binary operator? */
+
+ if (kind & GTK_UNOP)
+ {
+ if (IsExOp(kind))
+ {
+ // ExOp operators extend unary operator with extra, non-GenTreePtr members. In many cases,
+ // these should be included in the comparison.
+ switch (oper)
+ {
+ case GT_ARR_LENGTH:
+ if (op1->gtArrLen.ArrLenOffset() != op2->gtArrLen.ArrLenOffset())
+ {
+ return false;
+ }
+ break;
+ case GT_CAST:
+ if (op1->gtCast.gtCastType != op2->gtCast.gtCastType)
+ {
+ return false;
+ }
+ break;
+ case GT_OBJ:
+ if (op1->AsObj()->gtClass != op2->AsObj()->gtClass)
+ {
+ return false;
+ }
+ break;
+
+ // For the ones below no extra argument matters for comparison.
+ case GT_BOX:
+ break;
+
+ default:
+ assert(!"unexpected unary ExOp operator");
+ }
+ }
+ return Compare(op1->gtOp.gtOp1, op2->gtOp.gtOp1);
+ }
+
+ if (kind & GTK_BINOP)
+ {
+ if (IsExOp(kind))
+ {
+ // ExOp operators extend unary operator with extra, non-GenTreePtr members. In many cases,
+ // these should be included in the hash code.
+ switch (oper)
+ {
+ case GT_INTRINSIC:
+ if (op1->gtIntrinsic.gtIntrinsicId != op2->gtIntrinsic.gtIntrinsicId)
+ {
+ return false;
+ }
+ break;
+ case GT_LEA:
+ if (op1->gtAddrMode.gtScale != op2->gtAddrMode.gtScale)
+ {
+ return false;
+ }
+ if (op1->gtAddrMode.gtOffset != op2->gtAddrMode.gtOffset)
+ {
+ return false;
+ }
+ break;
+ case GT_INDEX:
+ if (op1->gtIndex.gtIndElemSize != op2->gtIndex.gtIndElemSize)
+ {
+ return false;
+ }
+ break;
+
+ // For the ones below no extra argument matters for comparison.
+ case GT_QMARK:
+ break;
+
+ default:
+ assert(!"unexpected binary ExOp operator");
+ }
+ }
+
+ if (op1->gtOp.gtOp2)
+ {
+ if (!Compare(op1->gtOp.gtOp1, op2->gtOp.gtOp1, swapOK))
+ {
+ if (swapOK && OperIsCommutative(oper) &&
+ ((op1->gtOp.gtOp1->gtFlags | op1->gtOp.gtOp2->gtFlags | op2->gtOp.gtOp1->gtFlags |
+ op2->gtOp.gtOp2->gtFlags) &
+ GTF_ALL_EFFECT) == 0)
+ {
+ if (Compare(op1->gtOp.gtOp1, op2->gtOp.gtOp2, swapOK))
+ {
+ op1 = op1->gtOp.gtOp2;
+ op2 = op2->gtOp.gtOp1;
+ goto AGAIN;
+ }
+ }
+
+ return false;
+ }
+
+ op1 = op1->gtOp.gtOp2;
+ op2 = op2->gtOp.gtOp2;
+
+ goto AGAIN;
+ }
+ else
+ {
+
+ op1 = op1->gtOp.gtOp1;
+ op2 = op2->gtOp.gtOp1;
+
+ if (!op1)
+ {
+ return (op2 == nullptr);
+ }
+ if (!op2)
+ {
+ return false;
+ }
+
+ goto AGAIN;
+ }
+ }
+
+ /* See what kind of a special operator we have here */
+
+ switch (oper)
+ {
+ case GT_FIELD:
+ if (op1->gtField.gtFldHnd != op2->gtField.gtFldHnd)
+ {
+ break;
+ }
+
+ op1 = op1->gtField.gtFldObj;
+ op2 = op2->gtField.gtFldObj;
+
+ if (op1 || op2)
+ {
+ if (op1 && op2)
+ {
+ goto AGAIN;
+ }
+ }
+
+ return true;
+
+ case GT_CALL:
+
+ if (op1->gtCall.gtCallType != op2->gtCall.gtCallType)
+ {
+ return false;
+ }
+
+ if (op1->gtCall.gtCallType != CT_INDIRECT)
+ {
+ if (op1->gtCall.gtCallMethHnd != op2->gtCall.gtCallMethHnd)
+ {
+ return false;
+ }
+
+#ifdef FEATURE_READYTORUN_COMPILER
+ if (op1->gtCall.gtEntryPoint.addr != op2->gtCall.gtEntryPoint.addr)
+ return false;
+#endif
+ }
+ else
+ {
+ if (!Compare(op1->gtCall.gtCallAddr, op2->gtCall.gtCallAddr))
+ {
+ return false;
+ }
+ }
+
+ if (Compare(op1->gtCall.gtCallLateArgs, op2->gtCall.gtCallLateArgs) &&
+ Compare(op1->gtCall.gtCallArgs, op2->gtCall.gtCallArgs) &&
+ Compare(op1->gtCall.gtControlExpr, op2->gtCall.gtControlExpr) &&
+ Compare(op1->gtCall.gtCallObjp, op2->gtCall.gtCallObjp))
+ {
+ return true;
+ }
+ break;
+
+ case GT_ARR_ELEM:
+
+ if (op1->gtArrElem.gtArrRank != op2->gtArrElem.gtArrRank)
+ {
+ return false;
+ }
+
+ // NOTE: gtArrElemSize may need to be handled
+
+ unsigned dim;
+ for (dim = 0; dim < op1->gtArrElem.gtArrRank; dim++)
+ {
+ if (!Compare(op1->gtArrElem.gtArrInds[dim], op2->gtArrElem.gtArrInds[dim]))
+ {
+ return false;
+ }
+ }
+
+ op1 = op1->gtArrElem.gtArrObj;
+ op2 = op2->gtArrElem.gtArrObj;
+ goto AGAIN;
+
+ case GT_ARR_OFFSET:
+ if (op1->gtArrOffs.gtCurrDim != op2->gtArrOffs.gtCurrDim ||
+ op1->gtArrOffs.gtArrRank != op2->gtArrOffs.gtArrRank)
+ {
+ return false;
+ }
+ return (Compare(op1->gtArrOffs.gtOffset, op2->gtArrOffs.gtOffset) &&
+ Compare(op1->gtArrOffs.gtIndex, op2->gtArrOffs.gtIndex) &&
+ Compare(op1->gtArrOffs.gtArrObj, op2->gtArrOffs.gtArrObj));
+
+ case GT_CMPXCHG:
+ return Compare(op1->gtCmpXchg.gtOpLocation, op2->gtCmpXchg.gtOpLocation) &&
+ Compare(op1->gtCmpXchg.gtOpValue, op2->gtCmpXchg.gtOpValue) &&
+ Compare(op1->gtCmpXchg.gtOpComparand, op2->gtCmpXchg.gtOpComparand);
+
+ case GT_ARR_BOUNDS_CHECK:
+#ifdef FEATURE_SIMD
+ case GT_SIMD_CHK:
+#endif // FEATURE_SIMD
+ return Compare(op1->gtBoundsChk.gtArrLen, op2->gtBoundsChk.gtArrLen) &&
+ Compare(op1->gtBoundsChk.gtIndex, op2->gtBoundsChk.gtIndex) &&
+ (op1->gtBoundsChk.gtThrowKind == op2->gtBoundsChk.gtThrowKind);
+
+ case GT_STORE_DYN_BLK:
+ case GT_DYN_BLK:
+ return Compare(op1->gtDynBlk.Addr(), op2->gtDynBlk.Addr()) &&
+ Compare(op1->gtDynBlk.Data(), op2->gtDynBlk.Data()) &&
+ Compare(op1->gtDynBlk.gtDynamicSize, op2->gtDynBlk.gtDynamicSize);
+
+ default:
+ assert(!"unexpected operator");
+ }
+
+ return false;
+}
+
+/*****************************************************************************
+ *
+ * Returns non-zero if the given tree contains a use of a local #lclNum.
+ */
+
+bool Compiler::gtHasRef(GenTreePtr tree, ssize_t lclNum, bool defOnly)
+{
+ genTreeOps oper;
+ unsigned kind;
+
+AGAIN:
+
+ assert(tree);
+
+ oper = tree->OperGet();
+ kind = tree->OperKind();
+
+ assert(oper != GT_STMT);
+
+ /* Is this a constant node? */
+
+ if (kind & GTK_CONST)
+ {
+ return false;
+ }
+
+ /* Is this a leaf node? */
+
+ if (kind & GTK_LEAF)
+ {
+ if (oper == GT_LCL_VAR)
+ {
+ if (tree->gtLclVarCommon.gtLclNum == (unsigned)lclNum)
+ {
+ if (!defOnly)
+ {
+ return true;
+ }
+ }
+ }
+ else if (oper == GT_RET_EXPR)
+ {
+ return gtHasRef(tree->gtRetExpr.gtInlineCandidate, lclNum, defOnly);
+ }
+
+ return false;
+ }
+
+ /* Is it a 'simple' unary/binary operator? */
+
+ if (kind & GTK_SMPOP)
+ {
+ if (tree->gtGetOp2())
+ {
+ if (gtHasRef(tree->gtOp.gtOp1, lclNum, defOnly))
+ {
+ return true;
+ }
+
+ tree = tree->gtOp.gtOp2;
+ goto AGAIN;
+ }
+ else
+ {
+ tree = tree->gtOp.gtOp1;
+
+ if (!tree)
+ {
+ return false;
+ }
+
+ if (kind & GTK_ASGOP)
+ {
+ // 'tree' is the gtOp1 of an assignment node. So we can handle
+ // the case where defOnly is either true or false.
+
+ if (tree->gtOper == GT_LCL_VAR && tree->gtLclVarCommon.gtLclNum == (unsigned)lclNum)
+ {
+ return true;
+ }
+ else if (tree->gtOper == GT_FIELD && lclNum == (ssize_t)tree->gtField.gtFldHnd)
+ {
+ return true;
+ }
+ }
+
+ goto AGAIN;
+ }
+ }
+
+ /* See what kind of a special operator we have here */
+
+ switch (oper)
+ {
+ case GT_FIELD:
+ if (lclNum == (ssize_t)tree->gtField.gtFldHnd)
+ {
+ if (!defOnly)
+ {
+ return true;
+ }
+ }
+
+ tree = tree->gtField.gtFldObj;
+ if (tree)
+ {
+ goto AGAIN;
+ }
+ break;
+
+ case GT_CALL:
+
+ if (tree->gtCall.gtCallObjp)
+ {
+ if (gtHasRef(tree->gtCall.gtCallObjp, lclNum, defOnly))
+ {
+ return true;
+ }
+ }
+
+ if (tree->gtCall.gtCallArgs)
+ {
+ if (gtHasRef(tree->gtCall.gtCallArgs, lclNum, defOnly))
+ {
+ return true;
+ }
+ }
+
+ if (tree->gtCall.gtCallLateArgs)
+ {
+ if (gtHasRef(tree->gtCall.gtCallLateArgs, lclNum, defOnly))
+ {
+ return true;
+ }
+ }
+
+ if (tree->gtCall.gtCallLateArgs)
+ {
+ if (gtHasRef(tree->gtCall.gtControlExpr, lclNum, defOnly))
+ {
+ return true;
+ }
+ }
+
+ if (tree->gtCall.gtCallType == CT_INDIRECT)
+ {
+ // pinvoke-calli cookie is a constant, or constant indirection
+ assert(tree->gtCall.gtCallCookie == nullptr || tree->gtCall.gtCallCookie->gtOper == GT_CNS_INT ||
+ tree->gtCall.gtCallCookie->gtOper == GT_IND);
+
+ tree = tree->gtCall.gtCallAddr;
+ }
+ else
+ {
+ tree = nullptr;
+ }
+
+ if (tree)
+ {
+ goto AGAIN;
+ }
+
+ break;
+
+ case GT_ARR_ELEM:
+ if (gtHasRef(tree->gtArrElem.gtArrObj, lclNum, defOnly))
+ {
+ return true;
+ }
+
+ unsigned dim;
+ for (dim = 0; dim < tree->gtArrElem.gtArrRank; dim++)
+ {
+ if (gtHasRef(tree->gtArrElem.gtArrInds[dim], lclNum, defOnly))
+ {
+ return true;
+ }
+ }
+
+ break;
+
+ case GT_ARR_OFFSET:
+ if (gtHasRef(tree->gtArrOffs.gtOffset, lclNum, defOnly) ||
+ gtHasRef(tree->gtArrOffs.gtIndex, lclNum, defOnly) ||
+ gtHasRef(tree->gtArrOffs.gtArrObj, lclNum, defOnly))
+ {
+ return true;
+ }
+ break;
+
+ case GT_CMPXCHG:
+ if (gtHasRef(tree->gtCmpXchg.gtOpLocation, lclNum, defOnly))
+ {
+ return true;
+ }
+ if (gtHasRef(tree->gtCmpXchg.gtOpValue, lclNum, defOnly))
+ {
+ return true;
+ }
+ if (gtHasRef(tree->gtCmpXchg.gtOpComparand, lclNum, defOnly))
+ {
+ return true;
+ }
+ break;
+
+ case GT_ARR_BOUNDS_CHECK:
+#ifdef FEATURE_SIMD
+ case GT_SIMD_CHK:
+#endif // FEATURE_SIMD
+ if (gtHasRef(tree->gtBoundsChk.gtArrLen, lclNum, defOnly))
+ {
+ return true;
+ }
+ if (gtHasRef(tree->gtBoundsChk.gtIndex, lclNum, defOnly))
+ {
+ return true;
+ }
+ break;
+
+ case GT_STORE_DYN_BLK:
+ if (gtHasRef(tree->gtDynBlk.Data(), lclNum, defOnly))
+ {
+ return true;
+ }
+ __fallthrough;
+ case GT_DYN_BLK:
+ if (gtHasRef(tree->gtDynBlk.Addr(), lclNum, defOnly))
+ {
+ return true;
+ }
+ if (gtHasRef(tree->gtDynBlk.gtDynamicSize, lclNum, defOnly))
+ {
+ return true;
+ }
+ break;
+
+ default:
+#ifdef DEBUG
+ gtDispTree(tree);
+#endif
+ assert(!"unexpected operator");
+ }
+
+ return false;
+}
+
+struct AddrTakenDsc
+{
+ Compiler* comp;
+ bool hasAddrTakenLcl;
+};
+
+/* static */
+Compiler::fgWalkResult Compiler::gtHasLocalsWithAddrOpCB(GenTreePtr* pTree, fgWalkData* data)
+{
+ GenTreePtr tree = *pTree;
+ Compiler* comp = data->compiler;
+
+ if (tree->gtOper == GT_LCL_VAR)
+ {
+ unsigned lclNum = tree->gtLclVarCommon.gtLclNum;
+ LclVarDsc* varDsc = &comp->lvaTable[lclNum];
+
+ if (varDsc->lvHasLdAddrOp || varDsc->lvAddrExposed)
+ {
+ ((AddrTakenDsc*)data->pCallbackData)->hasAddrTakenLcl = true;
+ return WALK_ABORT;
+ }
+ }
+
+ return WALK_CONTINUE;
+}
+
+/*****************************************************************************
+ *
+ * Return true if this tree contains locals with lvHasLdAddrOp or lvAddrExposed
+ * flag(s) set.
+ */
+
+bool Compiler::gtHasLocalsWithAddrOp(GenTreePtr tree)
+{
+ AddrTakenDsc desc;
+
+ desc.comp = this;
+ desc.hasAddrTakenLcl = false;
+
+ fgWalkTreePre(&tree, gtHasLocalsWithAddrOpCB, &desc);
+
+ return desc.hasAddrTakenLcl;
+}
+
+/*****************************************************************************
+ *
+ * Helper used to compute hash values for trees.
+ */
+
+inline unsigned genTreeHashAdd(unsigned old, unsigned add)
+{
+ return (old + old / 2) ^ add;
+}
+
+inline unsigned genTreeHashAdd(unsigned old, void* add)
+{
+ return genTreeHashAdd(old, (unsigned)(size_t)add);
+}
+
+inline unsigned genTreeHashAdd(unsigned old, unsigned add1, unsigned add2)
+{
+ return (old + old / 2) ^ add1 ^ add2;
+}
+
+/*****************************************************************************
+ *
+ * Given an arbitrary expression tree, compute a hash value for it.
+ */
+
+unsigned Compiler::gtHashValue(GenTree* tree)
+{
+ genTreeOps oper;
+ unsigned kind;
+
+ unsigned hash = 0;
+
+ GenTreePtr temp;
+
+AGAIN:
+ assert(tree);
+ assert(tree->gtOper != GT_STMT);
+
+ /* Figure out what kind of a node we have */
+
+ oper = tree->OperGet();
+ kind = tree->OperKind();
+
+ /* Include the operator value in the hash */
+
+ hash = genTreeHashAdd(hash, oper);
+
+ /* Is this a constant or leaf node? */
+
+ if (kind & (GTK_CONST | GTK_LEAF))
+ {
+ size_t add;
+
+ switch (oper)
+ {
+ case GT_LCL_VAR:
+ add = tree->gtLclVar.gtLclNum;
+ break;
+ case GT_LCL_FLD:
+ hash = genTreeHashAdd(hash, tree->gtLclFld.gtLclNum);
+ add = tree->gtLclFld.gtLclOffs;
+ break;
+
+ case GT_CNS_INT:
+ add = (int)tree->gtIntCon.gtIconVal;
+ break;
+ case GT_CNS_LNG:
+ add = (int)tree->gtLngCon.gtLconVal;
+ break;
+ case GT_CNS_DBL:
+ add = (int)tree->gtDblCon.gtDconVal;
+ break;
+ case GT_CNS_STR:
+ add = (int)tree->gtStrCon.gtSconCPX;
+ break;
+
+ case GT_JMP:
+ add = tree->gtVal.gtVal1;
+ break;
+
+ default:
+ add = 0;
+ break;
+ }
+
+ // narrowing cast, but for hashing.
+ hash = genTreeHashAdd(hash, (unsigned)add);
+ goto DONE;
+ }
+
+ /* Is it a 'simple' unary/binary operator? */
+
+ GenTreePtr op1;
+
+ if (kind & GTK_UNOP)
+ {
+ op1 = tree->gtOp.gtOp1;
+ /* Special case: no sub-operand at all */
+
+ if (GenTree::IsExOp(kind))
+ {
+ // ExOp operators extend operators with extra, non-GenTreePtr members. In many cases,
+ // these should be included in the hash code.
+ switch (oper)
+ {
+ case GT_ARR_LENGTH:
+ hash += tree->gtArrLen.ArrLenOffset();
+ break;
+ case GT_CAST:
+ hash ^= tree->gtCast.gtCastType;
+ break;
+ case GT_INDEX:
+ hash += tree->gtIndex.gtIndElemSize;
+ break;
+ case GT_ALLOCOBJ:
+ hash = genTreeHashAdd(hash, static_cast<unsigned>(
+ reinterpret_cast<uintptr_t>(tree->gtAllocObj.gtAllocObjClsHnd)));
+ hash = genTreeHashAdd(hash, tree->gtAllocObj.gtNewHelper);
+ break;
+ case GT_OBJ:
+ hash = genTreeHashAdd(hash, static_cast<unsigned>(
+ reinterpret_cast<uintptr_t>(tree->gtObj.gtClass)));
+ break;
+
+ // For the ones below no extra argument matters for comparison.
+ case GT_BOX:
+ break;
+
+ default:
+ assert(!"unexpected unary ExOp operator");
+ }
+ }
+
+ if (!op1)
+ {
+ goto DONE;
+ }
+
+ tree = op1;
+ goto AGAIN;
+ }
+
+ if (kind & GTK_BINOP)
+ {
+ if (GenTree::IsExOp(kind))
+ {
+ // ExOp operators extend operators with extra, non-GenTreePtr members. In many cases,
+ // these should be included in the hash code.
+ switch (oper)
+ {
+ case GT_INTRINSIC:
+ hash += tree->gtIntrinsic.gtIntrinsicId;
+ break;
+ case GT_LEA:
+ hash += (tree->gtAddrMode.gtOffset << 3) + tree->gtAddrMode.gtScale;
+ break;
+
+ case GT_BLK:
+ case GT_STORE_BLK:
+ hash += tree->gtBlk.gtBlkSize;
+ break;
+
+ case GT_OBJ:
+ case GT_STORE_OBJ:
+ hash ^= reinterpret_cast<unsigned>(tree->AsObj()->gtClass);
+ break;
+
+ case GT_DYN_BLK:
+ case GT_STORE_DYN_BLK:
+ hash += gtHashValue(tree->AsDynBlk()->gtDynamicSize);
+ break;
+
+ // For the ones below no extra argument matters for comparison.
+ case GT_ARR_INDEX:
+ case GT_QMARK:
+ case GT_INDEX:
+ break;
+
+#ifdef FEATURE_SIMD
+ case GT_SIMD:
+ hash += tree->gtSIMD.gtSIMDIntrinsicID;
+ hash += tree->gtSIMD.gtSIMDBaseType;
+ break;
+#endif // FEATURE_SIMD
+
+ default:
+ assert(!"unexpected binary ExOp operator");
+ }
+ }
+
+ op1 = tree->gtOp.gtOp1;
+ GenTreePtr op2 = tree->gtOp.gtOp2;
+
+ /* Is there a second sub-operand? */
+
+ if (!op2)
+ {
+ /* Special case: no sub-operands at all */
+
+ if (!op1)
+ {
+ goto DONE;
+ }
+
+ /* This is a unary operator */
+
+ tree = op1;
+ goto AGAIN;
+ }
+
+ /* This is a binary operator */
+
+ unsigned hsh1 = gtHashValue(op1);
+
+ /* Special case: addition of two values */
+
+ if (GenTree::OperIsCommutative(oper))
+ {
+ unsigned hsh2 = gtHashValue(op2);
+
+ /* Produce a hash that allows swapping the operands */
+
+ hash = genTreeHashAdd(hash, hsh1, hsh2);
+ goto DONE;
+ }
+
+ /* Add op1's hash to the running value and continue with op2 */
+
+ hash = genTreeHashAdd(hash, hsh1);
+
+ tree = op2;
+ goto AGAIN;
+ }
+
+ /* See what kind of a special operator we have here */
+ switch (tree->gtOper)
+ {
+ case GT_FIELD:
+ if (tree->gtField.gtFldObj)
+ {
+ temp = tree->gtField.gtFldObj;
+ assert(temp);
+ hash = genTreeHashAdd(hash, gtHashValue(temp));
+ }
+ break;
+
+ case GT_STMT:
+ temp = tree->gtStmt.gtStmtExpr;
+ assert(temp);
+ hash = genTreeHashAdd(hash, gtHashValue(temp));
+ break;
+
+ case GT_ARR_ELEM:
+
+ hash = genTreeHashAdd(hash, gtHashValue(tree->gtArrElem.gtArrObj));
+
+ unsigned dim;
+ for (dim = 0; dim < tree->gtArrElem.gtArrRank; dim++)
+ {
+ hash = genTreeHashAdd(hash, gtHashValue(tree->gtArrElem.gtArrInds[dim]));
+ }
+
+ break;
+
+ case GT_ARR_OFFSET:
+ hash = genTreeHashAdd(hash, gtHashValue(tree->gtArrOffs.gtOffset));
+ hash = genTreeHashAdd(hash, gtHashValue(tree->gtArrOffs.gtIndex));
+ hash = genTreeHashAdd(hash, gtHashValue(tree->gtArrOffs.gtArrObj));
+ break;
+
+ case GT_CALL:
+
+ if (tree->gtCall.gtCallObjp && tree->gtCall.gtCallObjp->gtOper != GT_NOP)
+ {
+ temp = tree->gtCall.gtCallObjp;
+ assert(temp);
+ hash = genTreeHashAdd(hash, gtHashValue(temp));
+ }
+
+ if (tree->gtCall.gtCallArgs)
+ {
+ temp = tree->gtCall.gtCallArgs;
+ assert(temp);
+ hash = genTreeHashAdd(hash, gtHashValue(temp));
+ }
+
+ if (tree->gtCall.gtCallType == CT_INDIRECT)
+ {
+ temp = tree->gtCall.gtCallAddr;
+ assert(temp);
+ hash = genTreeHashAdd(hash, gtHashValue(temp));
+ }
+ else
+ {
+ hash = genTreeHashAdd(hash, tree->gtCall.gtCallMethHnd);
+ }
+
+ if (tree->gtCall.gtCallLateArgs)
+ {
+ temp = tree->gtCall.gtCallLateArgs;
+ assert(temp);
+ hash = genTreeHashAdd(hash, gtHashValue(temp));
+ }
+ break;
+
+ case GT_CMPXCHG:
+ hash = genTreeHashAdd(hash, gtHashValue(tree->gtCmpXchg.gtOpLocation));
+ hash = genTreeHashAdd(hash, gtHashValue(tree->gtCmpXchg.gtOpValue));
+ hash = genTreeHashAdd(hash, gtHashValue(tree->gtCmpXchg.gtOpComparand));
+ break;
+
+ case GT_ARR_BOUNDS_CHECK:
+#ifdef FEATURE_SIMD
+ case GT_SIMD_CHK:
+#endif // FEATURE_SIMD
+ hash = genTreeHashAdd(hash, gtHashValue(tree->gtBoundsChk.gtArrLen));
+ hash = genTreeHashAdd(hash, gtHashValue(tree->gtBoundsChk.gtIndex));
+ hash = genTreeHashAdd(hash, tree->gtBoundsChk.gtThrowKind);
+ break;
+
+ case GT_STORE_DYN_BLK:
+ hash = genTreeHashAdd(hash, gtHashValue(tree->gtDynBlk.Data()));
+ __fallthrough;
+ case GT_DYN_BLK:
+ hash = genTreeHashAdd(hash, gtHashValue(tree->gtDynBlk.Addr()));
+ hash = genTreeHashAdd(hash, gtHashValue(tree->gtDynBlk.gtDynamicSize));
+ break;
+
+ default:
+#ifdef DEBUG
+ gtDispTree(tree);
+#endif
+ assert(!"unexpected operator");
+ break;
+ }
+
+DONE:
+
+ return hash;
+}
+
+/*****************************************************************************
+ *
+ * Given an arbitrary expression tree, attempts to find the set of all local variables
+ * referenced by the tree, and return them as "*result".
+ * If "findPtr" is null, this is a tracked variable set;
+ * if it is non-null, this is an "all var set."
+ * The "*result" value is valid only if the call returns "true." It may return "false"
+ * for several reasons:
+ * If "findPtr" is NULL, and the expression contains an untracked variable.
+ * If "findPtr" is non-NULL, and the expression contains a variable that can't be represented
+ * in an "all var set."
+ * If the expression accesses address-exposed variables.
+ *
+ * If there
+ * are any indirections or global refs in the expression, the "*refsPtr" argument
+ * will be assigned the appropriate bit set based on the 'varRefKinds' type.
+ * It won't be assigned anything when there are no indirections or global
+ * references, though, so this value should be initialized before the call.
+ * If we encounter an expression that is equal to *findPtr we set *findPtr
+ * to NULL.
+ */
+bool Compiler::lvaLclVarRefs(GenTreePtr tree, GenTreePtr* findPtr, varRefKinds* refsPtr, void* result)
+{
+ genTreeOps oper;
+ unsigned kind;
+ varRefKinds refs = VR_NONE;
+ ALLVARSET_TP ALLVARSET_INIT_NOCOPY(allVars, AllVarSetOps::UninitVal());
+ VARSET_TP VARSET_INIT_NOCOPY(trkdVars, VarSetOps::UninitVal());
+ if (findPtr)
+ {
+ AllVarSetOps::AssignNoCopy(this, allVars, AllVarSetOps::MakeEmpty(this));
+ }
+ else
+ {
+ VarSetOps::AssignNoCopy(this, trkdVars, VarSetOps::MakeEmpty(this));
+ }
+
+AGAIN:
+
+ assert(tree);
+ assert(tree->gtOper != GT_STMT);
+
+ /* Remember whether we've come across the expression we're looking for */
+
+ if (findPtr && *findPtr == tree)
+ {
+ *findPtr = nullptr;
+ }
+
+ /* Figure out what kind of a node we have */
+
+ oper = tree->OperGet();
+ kind = tree->OperKind();
+
+ /* Is this a constant or leaf node? */
+
+ if (kind & (GTK_CONST | GTK_LEAF))
+ {
+ if (oper == GT_LCL_VAR)
+ {
+ unsigned lclNum = tree->gtLclVarCommon.gtLclNum;
+
+ /* Should we use the variable table? */
+
+ if (findPtr)
+ {
+ if (lclNum >= lclMAX_ALLSET_TRACKED)
+ {
+ return false;
+ }
+
+ AllVarSetOps::AddElemD(this, allVars, lclNum);
+ }
+ else
+ {
+ assert(lclNum < lvaCount);
+ LclVarDsc* varDsc = lvaTable + lclNum;
+
+ if (varDsc->lvTracked == false)
+ {
+ return false;
+ }
+
+ // Don't deal with expressions with address-exposed variables.
+ if (varDsc->lvAddrExposed)
+ {
+ return false;
+ }
+
+ VarSetOps::AddElemD(this, trkdVars, varDsc->lvVarIndex);
+ }
+ }
+ else if (oper == GT_LCL_FLD)
+ {
+ /* We can't track every field of every var. Moreover, indirections
+ may access different parts of the var as different (but
+ overlapping) fields. So just treat them as indirect accesses */
+
+ if (varTypeIsGC(tree->TypeGet()))
+ {
+ refs = VR_IND_REF;
+ }
+ else
+ {
+ refs = VR_IND_SCL;
+ }
+ }
+ else if (oper == GT_CLS_VAR)
+ {
+ refs = VR_GLB_VAR;
+ }
+
+ if (refs != VR_NONE)
+ {
+ /* Write it back to callers parameter using an 'or' */
+ *refsPtr = varRefKinds((*refsPtr) | refs);
+ }
+ lvaLclVarRefsAccumIntoRes(findPtr, result, allVars, trkdVars);
+ return true;
+ }
+
+ /* Is it a 'simple' unary/binary operator? */
+
+ if (kind & GTK_SMPOP)
+ {
+ if (oper == GT_IND)
+ {
+ assert(tree->gtOp.gtOp2 == nullptr);
+
+ /* Set the proper indirection bit */
+
+ if ((tree->gtFlags & GTF_IND_INVARIANT) == 0)
+ {
+ if (varTypeIsGC(tree->TypeGet()))
+ {
+ refs = VR_IND_REF;
+ }
+ else
+ {
+ refs = VR_IND_SCL;
+ }
+
+ // If the flag GTF_IND_TGTANYWHERE is set this indirection
+ // could also point at a global variable
+
+ if (tree->gtFlags & GTF_IND_TGTANYWHERE)
+ {
+ refs = varRefKinds(((int)refs) | ((int)VR_GLB_VAR));
+ }
+ }
+
+ /* Write it back to callers parameter using an 'or' */
+ *refsPtr = varRefKinds((*refsPtr) | refs);
+
+ // For IL volatile memory accesses we mark the GT_IND node
+ // with a GTF_DONT_CSE flag.
+ //
+ // This flag is also set for the left hand side of an assignment.
+ //
+ // If this flag is set then we return false
+ //
+ if (tree->gtFlags & GTF_DONT_CSE)
+ {
+ return false;
+ }
+ }
+
+ if (tree->gtGetOp2())
+ {
+ /* It's a binary operator */
+ if (!lvaLclVarRefsAccum(tree->gtOp.gtOp1, findPtr, refsPtr, &allVars, &trkdVars))
+ {
+ return false;
+ }
+ // Otherwise...
+ tree = tree->gtOp.gtOp2;
+ assert(tree);
+ goto AGAIN;
+ }
+ else
+ {
+ /* It's a unary (or nilary) operator */
+
+ tree = tree->gtOp.gtOp1;
+ if (tree)
+ {
+ goto AGAIN;
+ }
+
+ lvaLclVarRefsAccumIntoRes(findPtr, result, allVars, trkdVars);
+ return true;
+ }
+ }
+
+ switch (oper)
+ {
+ case GT_ARR_ELEM:
+ if (!lvaLclVarRefsAccum(tree->gtArrElem.gtArrObj, findPtr, refsPtr, &allVars, &trkdVars))
+ {
+ return false;
+ }
+
+ unsigned dim;
+ for (dim = 0; dim < tree->gtArrElem.gtArrRank; dim++)
+ {
+ VARSET_TP VARSET_INIT_NOCOPY(tmpVs, VarSetOps::UninitVal());
+ if (!lvaLclVarRefsAccum(tree->gtArrElem.gtArrInds[dim], findPtr, refsPtr, &allVars, &trkdVars))
+ {
+ return false;
+ }
+ }
+ lvaLclVarRefsAccumIntoRes(findPtr, result, allVars, trkdVars);
+ return true;
+
+ case GT_ARR_OFFSET:
+ if (!lvaLclVarRefsAccum(tree->gtArrOffs.gtOffset, findPtr, refsPtr, &allVars, &trkdVars))
+ {
+ return false;
+ }
+ // Otherwise...
+ if (!lvaLclVarRefsAccum(tree->gtArrOffs.gtIndex, findPtr, refsPtr, &allVars, &trkdVars))
+ {
+ return false;
+ }
+ // Otherwise...
+ if (!lvaLclVarRefsAccum(tree->gtArrOffs.gtArrObj, findPtr, refsPtr, &allVars, &trkdVars))
+ {
+ return false;
+ }
+ // Otherwise...
+ lvaLclVarRefsAccumIntoRes(findPtr, result, allVars, trkdVars);
+ return true;
+
+ case GT_ARR_BOUNDS_CHECK:
+#ifdef FEATURE_SIMD
+ case GT_SIMD_CHK:
+#endif // FEATURE_SIMD
+ {
+ if (!lvaLclVarRefsAccum(tree->gtBoundsChk.gtArrLen, findPtr, refsPtr, &allVars, &trkdVars))
+ {
+ return false;
+ }
+ // Otherwise...
+ if (!lvaLclVarRefsAccum(tree->gtBoundsChk.gtIndex, findPtr, refsPtr, &allVars, &trkdVars))
+ {
+ return false;
+ }
+ // Otherwise...
+ lvaLclVarRefsAccumIntoRes(findPtr, result, allVars, trkdVars);
+ return true;
+ }
+
+ case GT_STORE_DYN_BLK:
+ if (!lvaLclVarRefsAccum(tree->gtDynBlk.Data(), findPtr, refsPtr, &allVars, &trkdVars))
+ {
+ return false;
+ }
+ // Otherwise...
+ __fallthrough;
+ case GT_DYN_BLK:
+ if (!lvaLclVarRefsAccum(tree->gtDynBlk.Addr(), findPtr, refsPtr, &allVars, &trkdVars))
+ {
+ return false;
+ }
+ // Otherwise...
+ if (!lvaLclVarRefsAccum(tree->gtDynBlk.gtDynamicSize, findPtr, refsPtr, &allVars, &trkdVars))
+ {
+ return false;
+ }
+ // Otherwise...
+ lvaLclVarRefsAccumIntoRes(findPtr, result, allVars, trkdVars);
+ break;
+
+ case GT_CALL:
+ /* Allow calls to the Shared Static helper */
+ if (IsSharedStaticHelper(tree))
+ {
+ *refsPtr = varRefKinds((*refsPtr) | VR_INVARIANT);
+ lvaLclVarRefsAccumIntoRes(findPtr, result, allVars, trkdVars);
+ return true;
+ }
+ break;
+ default:
+ break;
+
+ } // end switch (oper)
+
+ return false;
+}
+
+bool Compiler::lvaLclVarRefsAccum(
+ GenTreePtr tree, GenTreePtr* findPtr, varRefKinds* refsPtr, ALLVARSET_TP* allVars, VARSET_TP* trkdVars)
+{
+ if (findPtr)
+ {
+ ALLVARSET_TP ALLVARSET_INIT_NOCOPY(tmpVs, AllVarSetOps::UninitVal());
+ if (!lvaLclVarRefs(tree, findPtr, refsPtr, &tmpVs))
+ {
+ return false;
+ }
+ // Otherwise...
+ AllVarSetOps::UnionD(this, *allVars, tmpVs);
+ }
+ else
+ {
+ VARSET_TP VARSET_INIT_NOCOPY(tmpVs, VarSetOps::UninitVal());
+ if (!lvaLclVarRefs(tree, findPtr, refsPtr, &tmpVs))
+ {
+ return false;
+ }
+ // Otherwise...
+ VarSetOps::UnionD(this, *trkdVars, tmpVs);
+ }
+ return true;
+}
+
+void Compiler::lvaLclVarRefsAccumIntoRes(GenTreePtr* findPtr,
+ void* result,
+ ALLVARSET_VALARG_TP allVars,
+ VARSET_VALARG_TP trkdVars)
+{
+ if (findPtr)
+ {
+ ALLVARSET_TP* avsPtr = (ALLVARSET_TP*)result;
+ AllVarSetOps::AssignNoCopy(this, (*avsPtr), allVars);
+ }
+ else
+ {
+ VARSET_TP* vsPtr = (VARSET_TP*)result;
+ VarSetOps::AssignNoCopy(this, (*vsPtr), trkdVars);
+ }
+}
+
+/*****************************************************************************
+ *
+ * Return a relational operator that is the reverse of the given one.
+ */
+
+/* static */
+genTreeOps GenTree::ReverseRelop(genTreeOps relop)
+{
+ static const genTreeOps reverseOps[] = {
+ GT_NE, // GT_EQ
+ GT_EQ, // GT_NE
+ GT_GE, // GT_LT
+ GT_GT, // GT_LE
+ GT_LT, // GT_GE
+ GT_LE, // GT_GT
+ };
+
+ assert(reverseOps[GT_EQ - GT_EQ] == GT_NE);
+ assert(reverseOps[GT_NE - GT_EQ] == GT_EQ);
+
+ assert(reverseOps[GT_LT - GT_EQ] == GT_GE);
+ assert(reverseOps[GT_LE - GT_EQ] == GT_GT);
+ assert(reverseOps[GT_GE - GT_EQ] == GT_LT);
+ assert(reverseOps[GT_GT - GT_EQ] == GT_LE);
+
+ assert(OperIsCompare(relop));
+ assert(relop >= GT_EQ && (unsigned)(relop - GT_EQ) < sizeof(reverseOps));
+
+ return reverseOps[relop - GT_EQ];
+}
+
+/*****************************************************************************
+ *
+ * Return a relational operator that will work for swapped operands.
+ */
+
+/* static */
+genTreeOps GenTree::SwapRelop(genTreeOps relop)
+{
+ static const genTreeOps swapOps[] = {
+ GT_EQ, // GT_EQ
+ GT_NE, // GT_NE
+ GT_GT, // GT_LT
+ GT_GE, // GT_LE
+ GT_LE, // GT_GE
+ GT_LT, // GT_GT
+ };
+
+ assert(swapOps[GT_EQ - GT_EQ] == GT_EQ);
+ assert(swapOps[GT_NE - GT_EQ] == GT_NE);
+
+ assert(swapOps[GT_LT - GT_EQ] == GT_GT);
+ assert(swapOps[GT_LE - GT_EQ] == GT_GE);
+ assert(swapOps[GT_GE - GT_EQ] == GT_LE);
+ assert(swapOps[GT_GT - GT_EQ] == GT_LT);
+
+ assert(OperIsCompare(relop));
+ assert(relop >= GT_EQ && (unsigned)(relop - GT_EQ) < sizeof(swapOps));
+
+ return swapOps[relop - GT_EQ];
+}
+
+/*****************************************************************************
+ *
+ * Reverse the meaning of the given test condition.
+ */
+
+GenTreePtr Compiler::gtReverseCond(GenTree* tree)
+{
+ if (tree->OperIsCompare())
+ {
+ tree->SetOper(GenTree::ReverseRelop(tree->OperGet()));
+
+ // Flip the GTF_RELOP_NAN_UN bit
+ // a ord b === (a != NaN && b != NaN)
+ // a unord b === (a == NaN || b == NaN)
+ // => !(a ord b) === (a unord b)
+ if (varTypeIsFloating(tree->gtOp.gtOp1->TypeGet()))
+ {
+ tree->gtFlags ^= GTF_RELOP_NAN_UN;
+ }
+ }
+ else
+ {
+ tree = gtNewOperNode(GT_NOT, TYP_INT, tree);
+ }
+
+ return tree;
+}
+
+/*****************************************************************************/
+
+#ifdef DEBUG
+
+bool GenTree::gtIsValid64RsltMul()
+{
+ if ((gtOper != GT_MUL) || !(gtFlags & GTF_MUL_64RSLT))
+ {
+ return false;
+ }
+
+ GenTreePtr op1 = gtOp.gtOp1;
+ GenTreePtr op2 = gtOp.gtOp2;
+
+ if (TypeGet() != TYP_LONG || op1->TypeGet() != TYP_LONG || op2->TypeGet() != TYP_LONG)
+ {
+ return false;
+ }
+
+ if (gtOverflow())
+ {
+ return false;
+ }
+
+ // op1 has to be conv.i8(i4Expr)
+ if ((op1->gtOper != GT_CAST) || (genActualType(op1->CastFromType()) != TYP_INT))
+ {
+ return false;
+ }
+
+ // op2 has to be conv.i8(i4Expr)
+ if ((op2->gtOper != GT_CAST) || (genActualType(op2->CastFromType()) != TYP_INT))
+ {
+ return false;
+ }
+
+ // The signedness of both casts must be the same
+ if (((op1->gtFlags & GTF_UNSIGNED) != 0) != ((op2->gtFlags & GTF_UNSIGNED) != 0))
+ {
+ return false;
+ }
+
+ // Do unsigned mul iff both the casts are unsigned
+ if (((op1->gtFlags & GTF_UNSIGNED) != 0) != ((gtFlags & GTF_UNSIGNED) != 0))
+ {
+ return false;
+ }
+
+ return true;
+}
+
+#endif // DEBUG
+
+/*****************************************************************************
+ *
+ * Figure out the evaluation order for a list of values.
+ */
+
+unsigned Compiler::gtSetListOrder(GenTree* list, bool regs)
+{
+ assert(list && list->IsList());
+
+ unsigned level = 0;
+ unsigned ftreg = 0;
+ unsigned costSz = 0;
+ unsigned costEx = 0;
+
+#if FEATURE_STACK_FP_X87
+ /* Save the current FP stack level since an argument list
+ * will implicitly pop the FP stack when pushing the argument */
+ unsigned FPlvlSave = codeGen->genGetFPstkLevel();
+#endif // FEATURE_STACK_FP_X87
+
+ GenTreePtr next = list->gtOp.gtOp2;
+
+ if (next)
+ {
+ unsigned nxtlvl = gtSetListOrder(next, regs);
+
+ ftreg |= next->gtRsvdRegs;
+
+ if (level < nxtlvl)
+ {
+ level = nxtlvl;
+ }
+ costEx += next->gtCostEx;
+ costSz += next->gtCostSz;
+ }
+
+ GenTreePtr op1 = list->gtOp.gtOp1;
+ unsigned lvl = gtSetEvalOrder(op1);
+
+#if FEATURE_STACK_FP_X87
+ /* restore the FP level */
+ codeGen->genResetFPstkLevel(FPlvlSave);
+#endif // FEATURE_STACK_FP_X87
+
+ list->gtRsvdRegs = (regMaskSmall)(ftreg | op1->gtRsvdRegs);
+
+ if (level < lvl)
+ {
+ level = lvl;
+ }
+
+ if (op1->gtCostEx != 0)
+ {
+ costEx += op1->gtCostEx;
+ costEx += regs ? 0 : IND_COST_EX;
+ }
+
+ if (op1->gtCostSz != 0)
+ {
+ costSz += op1->gtCostSz;
+#ifdef _TARGET_XARCH_
+ if (regs) // push is smaller than mov to reg
+#endif
+ {
+ costSz += 1;
+ }
+ }
+
+ list->SetCosts(costEx, costSz);
+
+ return level;
+}
+
+/*****************************************************************************
+ *
+ * This routine is a helper routine for gtSetEvalOrder() and is used to
+ * mark the interior address computation nodes with the GTF_ADDRMODE_NO_CSE flag
+ * which prevents them from being considered for CSE's.
+ *
+ * Furthermore this routine is a factoring of the logic used to walk down
+ * the child nodes of a GT_IND tree, similar to optParseArrayRef().
+ *
+ * Previously we had this logic repeated three times inside of gtSetEvalOrder().
+ * Here we combine those three repeats into this routine and use the
+ * bool constOnly to modify the behavior of this routine for the first call.
+ *
+ * The object here is to mark all of the interior GT_ADD's and GT_NOP's
+ * with the GTF_ADDRMODE_NO_CSE flag and to set op1 and op2 to the terminal nodes
+ * which are later matched against 'adr' and 'idx'.
+ *
+ * *pbHasRangeCheckBelow is set to false if we traverse a range check GT_NOP
+ * node in our walk. It remains unchanged otherwise.
+ *
+ * TODO-Cleanup: It is essentially impossible to determine
+ * what it is supposed to do, or to write a reasonable specification comment
+ * for it that describes what it is supposed to do. There are obviously some
+ * very specific tree patterns that it expects to see, but those are not documented.
+ * The fact that it writes back to its op1WB and op2WB arguments, and traverses
+ * down both op1 and op2 trees, but op2 is only related to op1 in the (!constOnly)
+ * case (which really seems like a bug) is very confusing.
+ */
+
+void Compiler::gtWalkOp(GenTree** op1WB, GenTree** op2WB, GenTree* adr, bool constOnly)
+{
+ GenTreePtr op1 = *op1WB;
+ GenTreePtr op2 = *op2WB;
+ GenTreePtr op1EffectiveVal;
+
+ if (op1->gtOper == GT_COMMA)
+ {
+ op1EffectiveVal = op1->gtEffectiveVal();
+ if ((op1EffectiveVal->gtOper == GT_ADD) && (!op1EffectiveVal->gtOverflow()) &&
+ (!constOnly || (op1EffectiveVal->gtOp.gtOp2->IsCnsIntOrI())))
+ {
+ op1 = op1EffectiveVal;
+ }
+ }
+
+ // Now we look for op1's with non-overflow GT_ADDs [of constants]
+ while ((op1->gtOper == GT_ADD) && (!op1->gtOverflow()) && (!constOnly || (op1->gtOp.gtOp2->IsCnsIntOrI())))
+ {
+ // mark it with GTF_ADDRMODE_NO_CSE
+ op1->gtFlags |= GTF_ADDRMODE_NO_CSE;
+
+ if (!constOnly)
+ { // TODO-Cleanup: It seems bizarre that this is !constOnly
+ op2 = op1->gtOp.gtOp2;
+ }
+ op1 = op1->gtOp.gtOp1;
+
+ // If op1 is a GT_NOP then swap op1 and op2.
+ // (Why? Also, presumably op2 is not a GT_NOP in this case?)
+ if (op1->gtOper == GT_NOP)
+ {
+ GenTreePtr tmp;
+
+ tmp = op1;
+ op1 = op2;
+ op2 = tmp;
+ }
+
+ if (op1->gtOper == GT_COMMA)
+ {
+ op1EffectiveVal = op1->gtEffectiveVal();
+ if ((op1EffectiveVal->gtOper == GT_ADD) && (!op1EffectiveVal->gtOverflow()) &&
+ (!constOnly || (op1EffectiveVal->gtOp.gtOp2->IsCnsIntOrI())))
+ {
+ op1 = op1EffectiveVal;
+ }
+ }
+
+ if (!constOnly && ((op2 == adr) || (!op2->IsCnsIntOrI())))
+ {
+ break;
+ }
+ }
+
+ *op1WB = op1;
+ *op2WB = op2;
+}
+
+#ifdef DEBUG
+/*****************************************************************************
+ * This is a workaround. It is to help implement an assert in gtSetEvalOrder() that the values
+ * gtWalkOp() leaves in op1 and op2 correspond with the values of adr, idx, mul, and cns
+ * that are returned by genCreateAddrMode(). It's essentially impossible to determine
+ * what gtWalkOp() *should* return for all possible trees. This simply loosens one assert
+ * to handle the following case:
+
+ indir int
+ const(h) int 4 field
+ + byref
+ lclVar byref V00 this <-- op2
+ comma byref <-- adr (base)
+ indir byte
+ lclVar byref V00 this
+ + byref
+ const int 2 <-- mul == 4
+ << int <-- op1
+ lclVar int V01 arg1 <-- idx
+
+ * Here, we are planning to generate the address mode [edx+4*eax], where eax = idx and edx = the GT_COMMA expression.
+ * To check adr equivalence with op2, we need to walk down the GT_ADD tree just like gtWalkOp() does.
+ */
+GenTreePtr Compiler::gtWalkOpEffectiveVal(GenTreePtr op)
+{
+ for (;;)
+ {
+ if (op->gtOper == GT_COMMA)
+ {
+ GenTreePtr opEffectiveVal = op->gtEffectiveVal();
+ if ((opEffectiveVal->gtOper == GT_ADD) && (!opEffectiveVal->gtOverflow()) &&
+ (opEffectiveVal->gtOp.gtOp2->IsCnsIntOrI()))
+ {
+ op = opEffectiveVal;
+ }
+ }
+
+ if ((op->gtOper != GT_ADD) || op->gtOverflow() || !op->gtOp.gtOp2->IsCnsIntOrI())
+ {
+ break;
+ }
+
+ op = op->gtOp.gtOp1;
+ }
+
+ return op;
+}
+#endif // DEBUG
+
+/*****************************************************************************
+ *
+ * Given a tree, set the gtCostEx and gtCostSz fields which
+ * are used to measure the relative costs of the codegen of the tree
+ *
+ */
+
+void Compiler::gtPrepareCost(GenTree* tree)
+{
+#if FEATURE_STACK_FP_X87
+ codeGen->genResetFPstkLevel();
+#endif // FEATURE_STACK_FP_X87
+ gtSetEvalOrder(tree);
+}
+
+bool Compiler::gtIsLikelyRegVar(GenTree* tree)
+{
+ if (tree->gtOper != GT_LCL_VAR)
+ {
+ return false;
+ }
+
+ assert(tree->gtLclVar.gtLclNum < lvaTableCnt);
+ LclVarDsc* varDsc = lvaTable + tree->gtLclVar.gtLclNum;
+
+ if (varDsc->lvDoNotEnregister)
+ {
+ return false;
+ }
+
+ if (varDsc->lvRefCntWtd < (BB_UNITY_WEIGHT * 3))
+ {
+ return false;
+ }
+
+#ifdef _TARGET_X86_
+ if (varTypeIsFloating(tree->TypeGet()))
+ return false;
+ if (varTypeIsLong(tree->TypeGet()))
+ return false;
+#endif
+
+ return true;
+}
+
+//------------------------------------------------------------------------
+// gtCanSwapOrder: Returns true iff the secondNode can be swapped with firstNode.
+//
+// Arguments:
+// firstNode - An operand of a tree that can have GTF_REVERSE_OPS set.
+// secondNode - The other operand of the tree.
+//
+// Return Value:
+// Returns a boolean indicating whether it is safe to reverse the execution
+// order of the two trees, considering any exception, global effects, or
+// ordering constraints.
+//
+bool Compiler::gtCanSwapOrder(GenTree* firstNode, GenTree* secondNode)
+{
+ // Relative of order of global / side effects can't be swapped.
+
+ bool canSwap = true;
+
+ if (optValnumCSE_phase)
+ {
+ canSwap = optCSE_canSwap(firstNode, secondNode);
+ }
+
+ // We cannot swap in the presence of special side effects such as GT_CATCH_ARG.
+
+ if (canSwap && (firstNode->gtFlags & GTF_ORDER_SIDEEFF))
+ {
+ canSwap = false;
+ }
+
+ // When strict side effect order is disabled we allow GTF_REVERSE_OPS to be set
+ // when one or both sides contains a GTF_CALL or GTF_EXCEPT.
+ // Currently only the C and C++ languages allow non strict side effect order.
+
+ unsigned strictEffects = GTF_GLOB_EFFECT;
+
+ if (canSwap && (firstNode->gtFlags & strictEffects))
+ {
+ // op1 has side efects that can't be reordered.
+ // Check for some special cases where we still may be able to swap.
+
+ if (secondNode->gtFlags & strictEffects)
+ {
+ // op2 has also has non reorderable side effects - can't swap.
+ canSwap = false;
+ }
+ else
+ {
+ // No side effects in op2 - we can swap iff op1 has no way of modifying op2,
+ // i.e. through byref assignments or calls or op2 is a constant.
+
+ if (firstNode->gtFlags & strictEffects & GTF_PERSISTENT_SIDE_EFFECTS)
+ {
+ // We have to be conservative - can swap iff op2 is constant.
+ if (!secondNode->OperIsConst())
+ {
+ canSwap = false;
+ }
+ }
+ }
+ }
+ return canSwap;
+}
+
+/*****************************************************************************
+ *
+ * Given a tree, figure out the order in which its sub-operands should be
+ * evaluated. If the second operand of a binary operator is more expensive
+ * than the first operand, then try to swap the operand trees. Updates the
+ * GTF_REVERSE_OPS bit if necessary in this case.
+ *
+ * Returns the Sethi 'complexity' estimate for this tree (the higher
+ * the number, the higher is the tree's resources requirement).
+ *
+ * This function sets:
+ * 1. gtCostEx to the execution complexity estimate
+ * 2. gtCostSz to the code size estimate
+ * 3. gtRsvdRegs to the set of fixed registers trashed by the tree
+ * 4. gtFPlvl to the "floating point depth" value for node, i.e. the max. number
+ * of operands the tree will push on the x87 (coprocessor) stack. Also sets
+ * genFPstkLevel, tmpDoubleSpillMax, and possibly gtFPstLvlRedo.
+ * 5. Sometimes sets GTF_ADDRMODE_NO_CSE on nodes in the tree.
+ * 6. DEBUG-only: clears GTF_DEBUG_NODE_MORPHED.
+ */
+
+#ifdef _PREFAST_
+#pragma warning(push)
+#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
+#endif
+unsigned Compiler::gtSetEvalOrder(GenTree* tree)
+{
+ assert(tree);
+ assert(tree->gtOper != GT_STMT);
+
+#ifdef DEBUG
+ /* Clear the GTF_DEBUG_NODE_MORPHED flag as well */
+ tree->gtDebugFlags &= ~GTF_DEBUG_NODE_MORPHED;
+#endif
+
+ /* Is this a FP value? */
+
+ bool isflt = varTypeIsFloating(tree->TypeGet());
+ unsigned FPlvlSave;
+
+ /* Figure out what kind of a node we have */
+
+ genTreeOps oper = tree->OperGet();
+ unsigned kind = tree->OperKind();
+
+ /* Assume no fixed registers will be trashed */
+
+ regMaskTP ftreg = RBM_NONE; // Set of registers that will be used by the subtree
+ unsigned level;
+ int costEx;
+ int costSz;
+
+ bool bRngChk;
+
+#ifdef DEBUG
+ costEx = -1;
+ costSz = -1;
+#endif
+
+ /* Is this a constant or a leaf node? */
+
+ if (kind & (GTK_LEAF | GTK_CONST))
+ {
+ switch (oper)
+ {
+ bool iconNeedsReloc;
+
+#ifdef _TARGET_ARM_
+ case GT_CNS_LNG:
+ costSz = 9;
+ costEx = 4;
+ goto COMMON_CNS;
+
+ case GT_CNS_STR:
+ // Uses movw/movt
+ costSz = 7;
+ costEx = 3;
+ goto COMMON_CNS;
+
+ case GT_CNS_INT:
+
+ // If the constant is a handle then it will need to have a relocation
+ // applied to it.
+ // Any constant that requires a reloc must use the movw/movt sequence
+ //
+ iconNeedsReloc = opts.compReloc && tree->IsIconHandle() && !tree->IsIconHandle(GTF_ICON_FIELD_HDL);
+
+ if (iconNeedsReloc || !codeGen->validImmForInstr(INS_mov, tree->gtIntCon.gtIconVal))
+ {
+ // Uses movw/movt
+ costSz = 7;
+ costEx = 3;
+ }
+ else if (((unsigned)tree->gtIntCon.gtIconVal) <= 0x00ff)
+ {
+ // mov Rd, <const8>
+ costSz = 1;
+ costEx = 1;
+ }
+ else
+ {
+ // Uses movw/mvn
+ costSz = 3;
+ costEx = 1;
+ }
+ goto COMMON_CNS;
+
+#elif defined _TARGET_XARCH_
+
+ case GT_CNS_LNG:
+ costSz = 10;
+ costEx = 3;
+ goto COMMON_CNS;
+
+ case GT_CNS_STR:
+ costSz = 4;
+ costEx = 1;
+ goto COMMON_CNS;
+
+ case GT_CNS_INT:
+
+ // If the constant is a handle then it will need to have a relocation
+ // applied to it.
+ // Any constant that requires a reloc must use the movw/movt sequence
+ //
+ iconNeedsReloc = opts.compReloc && tree->IsIconHandle() && !tree->IsIconHandle(GTF_ICON_FIELD_HDL);
+
+ if (!iconNeedsReloc && (((signed char)tree->gtIntCon.gtIconVal) == tree->gtIntCon.gtIconVal))
+ {
+ costSz = 1;
+ costEx = 1;
+ }
+#if defined(_TARGET_AMD64_)
+ else if (iconNeedsReloc || ((tree->gtIntCon.gtIconVal & 0xFFFFFFFF00000000LL) != 0))
+ {
+ costSz = 10;
+ costEx = 3;
+ }
+#endif // _TARGET_AMD64_
+ else
+ {
+ costSz = 4;
+ costEx = 1;
+ }
+ goto COMMON_CNS;
+
+#elif defined(_TARGET_ARM64_)
+ case GT_CNS_LNG:
+ case GT_CNS_STR:
+ case GT_CNS_INT:
+ // TODO-ARM64-NYI: Need cost estimates.
+ costSz = 1;
+ costEx = 1;
+ goto COMMON_CNS;
+
+#else
+ case GT_CNS_LNG:
+ case GT_CNS_STR:
+ case GT_CNS_INT:
+#error "Unknown _TARGET_"
+#endif
+
+ COMMON_CNS:
+ /*
+ Note that some code below depends on constants always getting
+ moved to be the second operand of a binary operator. This is
+ easily accomplished by giving constants a level of 0, which
+ we do on the next line. If you ever decide to change this, be
+ aware that unless you make other arrangements for integer
+ constants to be moved, stuff will break.
+ */
+
+ level = 0;
+ break;
+
+ case GT_CNS_DBL:
+ level = 0;
+ /* We use fldz and fld1 to load 0.0 and 1.0, but all other */
+ /* floating point constants are loaded using an indirection */
+ if ((*((__int64*)&(tree->gtDblCon.gtDconVal)) == 0) ||
+ (*((__int64*)&(tree->gtDblCon.gtDconVal)) == I64(0x3ff0000000000000)))
+ {
+ costEx = 1;
+ costSz = 1;
+ }
+ else
+ {
+ costEx = IND_COST_EX;
+ costSz = 4;
+ }
+ break;
+
+ case GT_LCL_VAR:
+ level = 1;
+ if (gtIsLikelyRegVar(tree))
+ {
+ costEx = 1;
+ costSz = 1;
+ /* Sign-extend and zero-extend are more expensive to load */
+ if (lvaTable[tree->gtLclVar.gtLclNum].lvNormalizeOnLoad())
+ {
+ costEx += 1;
+ costSz += 1;
+ }
+ }
+ else
+ {
+ costEx = IND_COST_EX;
+ costSz = 2;
+ /* Sign-extend and zero-extend are more expensive to load */
+ if (varTypeIsSmall(tree->TypeGet()))
+ {
+ costEx += 1;
+ costSz += 1;
+ }
+ }
+#if defined(_TARGET_AMD64_)
+ // increase costSz for floating point locals
+ if (isflt)
+ {
+ costSz += 1;
+ if (!gtIsLikelyRegVar(tree))
+ {
+ costSz += 1;
+ }
+ }
+#endif
+#if CPU_LONG_USES_REGPAIR
+ if (varTypeIsLong(tree->TypeGet()))
+ {
+ costEx *= 2; // Longs are twice as expensive
+ costSz *= 2;
+ }
+#endif
+ break;
+
+ case GT_CLS_VAR:
+#ifdef _TARGET_ARM_
+ // We generate movw/movt/ldr
+ level = 1;
+ costEx = 3 + IND_COST_EX; // 6
+ costSz = 4 + 4 + 2; // 10
+ break;
+#endif
+ case GT_LCL_FLD:
+ level = 1;
+ costEx = IND_COST_EX;
+ costSz = 4;
+ if (varTypeIsSmall(tree->TypeGet()))
+ {
+ costEx += 1;
+ costSz += 1;
+ }
+ break;
+
+ case GT_PHI_ARG:
+ case GT_ARGPLACE:
+ level = 0;
+ costEx = 0;
+ costSz = 0;
+ break;
+
+ default:
+ level = 1;
+ costEx = 1;
+ costSz = 1;
+ break;
+ }
+#if FEATURE_STACK_FP_X87
+ if (isflt && (oper != GT_PHI_ARG))
+ {
+ codeGen->genIncrementFPstkLevel();
+ }
+#endif // FEATURE_STACK_FP_X87
+ goto DONE;
+ }
+
+ /* Is it a 'simple' unary/binary operator? */
+
+ if (kind & GTK_SMPOP)
+ {
+ int lvlb; // preference for op2
+ unsigned lvl2; // scratch variable
+
+ GenTreePtr op1 = tree->gtOp.gtOp1;
+ GenTreePtr op2 = tree->gtGetOp2();
+
+ costEx = 0;
+ costSz = 0;
+
+ if (tree->OperIsAddrMode())
+ {
+ if (op1 == nullptr)
+ {
+ op1 = op2;
+ op2 = nullptr;
+ }
+ }
+
+ /* Check for a nilary operator */
+
+ if (op1 == nullptr)
+ {
+ assert(op2 == nullptr);
+
+ level = 0;
+
+ goto DONE;
+ }
+
+ /* Is this a unary operator? */
+
+ if (op2 == nullptr)
+ {
+ /* Process the operand of the operator */
+
+ /* Most Unary ops have costEx of 1 */
+ costEx = 1;
+ costSz = 1;
+
+ level = gtSetEvalOrder(op1);
+ ftreg |= op1->gtRsvdRegs;
+
+ /* Special handling for some operators */
+
+ switch (oper)
+ {
+ case GT_JTRUE:
+ costEx = 2;
+ costSz = 2;
+ break;
+
+ case GT_SWITCH:
+ costEx = 10;
+ costSz = 5;
+ break;
+
+ case GT_CAST:
+#if defined(_TARGET_ARM_)
+ costEx = 1;
+ costSz = 1;
+ if (isflt || varTypeIsFloating(op1->TypeGet()))
+ {
+ costEx = 3;
+ costSz = 4;
+ }
+#elif defined(_TARGET_ARM64_)
+ costEx = 1;
+ costSz = 2;
+ if (isflt || varTypeIsFloating(op1->TypeGet()))
+ {
+ costEx = 2;
+ costSz = 4;
+ }
+#elif defined(_TARGET_XARCH_)
+ costEx = 1;
+ costSz = 2;
+
+ if (isflt || varTypeIsFloating(op1->TypeGet()))
+ {
+ /* cast involving floats always go through memory */
+ costEx = IND_COST_EX * 2;
+ costSz = 6;
+
+#if FEATURE_STACK_FP_X87
+ if (isflt != varTypeIsFloating(op1->TypeGet()))
+ {
+ isflt ? codeGen->genIncrementFPstkLevel() // Cast from int to float
+ : codeGen->genDecrementFPstkLevel(); // Cast from float to int
+ }
+#endif // FEATURE_STACK_FP_X87
+ }
+#else
+#error "Unknown _TARGET_"
+#endif
+
+#if CPU_LONG_USES_REGPAIR
+ if (varTypeIsLong(tree->TypeGet()))
+ {
+ if (varTypeIsUnsigned(tree->TypeGet()))
+ {
+ /* Cast to unsigned long */
+ costEx += 1;
+ costSz += 2;
+ }
+ else
+ {
+ /* Cast to signed long is slightly more costly */
+ costEx += 2;
+ costSz += 3;
+ }
+ }
+#endif // CPU_LONG_USES_REGPAIR
+
+ /* Overflow casts are a lot more expensive */
+ if (tree->gtOverflow())
+ {
+ costEx += 6;
+ costSz += 6;
+ }
+
+ break;
+
+ case GT_LIST:
+ case GT_NOP:
+ costEx = 0;
+ costSz = 0;
+ break;
+
+ case GT_INTRINSIC:
+ // GT_INTRINSIC intrinsics Sin, Cos, Sqrt, Abs ... have higher costs.
+ // TODO: tune these costs target specific as some of these are
+ // target intrinsics and would cost less to generate code.
+ switch (tree->gtIntrinsic.gtIntrinsicId)
+ {
+ default:
+ assert(!"missing case for gtIntrinsicId");
+ costEx = 12;
+ costSz = 12;
+ break;
+
+ case CORINFO_INTRINSIC_Sin:
+ case CORINFO_INTRINSIC_Cos:
+ case CORINFO_INTRINSIC_Sqrt:
+ case CORINFO_INTRINSIC_Cosh:
+ case CORINFO_INTRINSIC_Sinh:
+ case CORINFO_INTRINSIC_Tan:
+ case CORINFO_INTRINSIC_Tanh:
+ case CORINFO_INTRINSIC_Asin:
+ case CORINFO_INTRINSIC_Acos:
+ case CORINFO_INTRINSIC_Atan:
+ case CORINFO_INTRINSIC_Atan2:
+ case CORINFO_INTRINSIC_Log10:
+ case CORINFO_INTRINSIC_Pow:
+ case CORINFO_INTRINSIC_Exp:
+ case CORINFO_INTRINSIC_Ceiling:
+ case CORINFO_INTRINSIC_Floor:
+ case CORINFO_INTRINSIC_Object_GetType:
+ // Giving intrinsics a large fixed exectuion cost is because we'd like to CSE
+ // them, even if they are implemented by calls. This is different from modeling
+ // user calls since we never CSE user calls.
+ costEx = 36;
+ costSz = 4;
+ break;
+
+ case CORINFO_INTRINSIC_Abs:
+ costEx = 5;
+ costSz = 15;
+ break;
+
+ case CORINFO_INTRINSIC_Round:
+ costEx = 3;
+ costSz = 4;
+#if FEATURE_STACK_FP_X87
+ if (tree->TypeGet() == TYP_INT)
+ {
+ // This is a special case to handle the following
+ // optimization: conv.i4(round.d(d)) -> round.i(d)
+ codeGen->genDecrementFPstkLevel();
+ }
+#endif // FEATURE_STACK_FP_X87
+ break;
+ }
+ level++;
+ break;
+
+ case GT_NOT:
+ case GT_NEG:
+ // We need to ensure that -x is evaluated before x or else
+ // we get burned while adjusting genFPstkLevel in x*-x where
+ // the rhs x is the last use of the enregsitered x.
+ //
+ // Even in the integer case we want to prefer to
+ // evaluate the side without the GT_NEG node, all other things
+ // being equal. Also a GT_NOT requires a scratch register
+
+ level++;
+ break;
+
+ case GT_ADDR:
+
+#if FEATURE_STACK_FP_X87
+ /* If the operand was floating point, pop the value from the stack */
+
+ if (varTypeIsFloating(op1->TypeGet()))
+ {
+ codeGen->genDecrementFPstkLevel();
+ }
+#endif // FEATURE_STACK_FP_X87
+ costEx = 0;
+ costSz = 1;
+
+ // If we have a GT_ADDR of an GT_IND we can just copy the costs from indOp1
+ if (op1->OperGet() == GT_IND)
+ {
+ GenTreePtr indOp1 = op1->gtOp.gtOp1;
+ costEx = indOp1->gtCostEx;
+ costSz = indOp1->gtCostSz;
+ }
+ break;
+
+ case GT_ARR_LENGTH:
+ level++;
+
+ /* Array Len should be the same as an indirections, which have a costEx of IND_COST_EX */
+ costEx = IND_COST_EX - 1;
+ costSz = 2;
+ break;
+
+ case GT_MKREFANY:
+ case GT_OBJ:
+ // We estimate the cost of a GT_OBJ or GT_MKREFANY to be two loads (GT_INDs)
+ costEx = 2 * IND_COST_EX;
+ costSz = 2 * 2;
+ break;
+
+ case GT_BOX:
+ // We estimate the cost of a GT_BOX to be two stores (GT_INDs)
+ costEx = 2 * IND_COST_EX;
+ costSz = 2 * 2;
+ break;
+
+ case GT_BLK:
+ case GT_IND:
+
+ /* An indirection should always have a non-zero level.
+ * Only constant leaf nodes have level 0.
+ */
+
+ if (level == 0)
+ {
+ level = 1;
+ }
+
+ /* Indirections have a costEx of IND_COST_EX */
+ costEx = IND_COST_EX;
+ costSz = 2;
+
+ /* If we have to sign-extend or zero-extend, bump the cost */
+ if (varTypeIsSmall(tree->TypeGet()))
+ {
+ costEx += 1;
+ costSz += 1;
+ }
+
+ if (isflt)
+ {
+#if FEATURE_STACK_FP_X87
+ /* Indirect loads of FP values push a new value on the FP stack */
+ codeGen->genIncrementFPstkLevel();
+#endif // FEATURE_STACK_FP_X87
+ if (tree->TypeGet() == TYP_DOUBLE)
+ {
+ costEx += 1;
+ }
+#ifdef _TARGET_ARM_
+ costSz += 2;
+#endif // _TARGET_ARM_
+ }
+
+ // Can we form an addressing mode with this indirection?
+ // TODO-CQ: Consider changing this to op1->gtEffectiveVal() to take into account
+ // addressing modes hidden under a comma node.
+
+ if (op1->gtOper == GT_ADD)
+ {
+ bool rev;
+#if SCALED_ADDR_MODES
+ unsigned mul;
+#endif
+ unsigned cns;
+ GenTreePtr base;
+ GenTreePtr idx;
+
+ // See if we can form a complex addressing mode.
+
+ GenTreePtr addr = op1->gtEffectiveVal();
+
+ bool doAddrMode = true;
+ // See if we can form a complex addressing mode.
+ // Always use an addrMode for an array index indirection.
+ // TODO-1stClassStructs: Always do this, but first make sure it's
+ // done in Lowering as well.
+ if ((tree->gtFlags & GTF_IND_ARR_INDEX) == 0)
+ {
+ if (tree->TypeGet() == TYP_STRUCT)
+ {
+ doAddrMode = false;
+ }
+ else if (varTypeIsStruct(tree))
+ {
+ // This is a heuristic attempting to match prior behavior when indirections
+ // under a struct assignment would not be considered for addressing modes.
+ if (compCurStmt != nullptr)
+ {
+ GenTree* expr = compCurStmt->gtStmt.gtStmtExpr;
+ if ((expr->OperGet() == GT_ASG) &&
+ ((expr->gtGetOp1() == tree) || (expr->gtGetOp2() == tree)))
+ {
+ doAddrMode = false;
+ }
+ }
+ }
+ }
+ if ((doAddrMode) &&
+ codeGen->genCreateAddrMode(addr, // address
+ 0, // mode
+ false, // fold
+ RBM_NONE, // reg mask
+ &rev, // reverse ops
+ &base, // base addr
+ &idx, // index val
+#if SCALED_ADDR_MODES
+ &mul, // scaling
+#endif
+ &cns, // displacement
+ true)) // don't generate code
+ {
+ // We can form a complex addressing mode, so mark each of the interior
+ // nodes with GTF_ADDRMODE_NO_CSE and calculate a more accurate cost.
+
+ addr->gtFlags |= GTF_ADDRMODE_NO_CSE;
+#ifdef _TARGET_XARCH_
+ // addrmodeCount is the count of items that we used to form
+ // an addressing mode. The maximum value is 4 when we have
+ // all of these: { base, idx, cns, mul }
+ //
+ unsigned addrmodeCount = 0;
+ if (base)
+ {
+ costEx += base->gtCostEx;
+ costSz += base->gtCostSz;
+ addrmodeCount++;
+ }
+
+ if (idx)
+ {
+ costEx += idx->gtCostEx;
+ costSz += idx->gtCostSz;
+ addrmodeCount++;
+ }
+
+ if (cns)
+ {
+ if (((signed char)cns) == ((int)cns))
+ {
+ costSz += 1;
+ }
+ else
+ {
+ costSz += 4;
+ }
+ addrmodeCount++;
+ }
+ if (mul)
+ {
+ addrmodeCount++;
+ }
+ // When we form a complex addressing mode we can reduced the costs
+ // associated with the interior GT_ADD and GT_LSH nodes:
+ //
+ // GT_ADD -- reduce this interior GT_ADD by (-3,-3)
+ // / \ --
+ // GT_ADD 'cns' -- reduce this interior GT_ADD by (-2,-2)
+ // / \ --
+ // 'base' GT_LSL -- reduce this interior GT_LSL by (-1,-1)
+ // / \ --
+ // 'idx' 'mul'
+ //
+ if (addrmodeCount > 1)
+ {
+ // The number of interior GT_ADD and GT_LSL will always be one less than addrmodeCount
+ //
+ addrmodeCount--;
+
+ GenTreePtr tmp = addr;
+ while (addrmodeCount > 0)
+ {
+ // decrement the gtCosts for the interior GT_ADD or GT_LSH node by the remaining
+ // addrmodeCount
+ tmp->SetCosts(tmp->gtCostEx - addrmodeCount, tmp->gtCostSz - addrmodeCount);
+
+ addrmodeCount--;
+ if (addrmodeCount > 0)
+ {
+ GenTreePtr tmpOp1 = tmp->gtOp.gtOp1;
+ GenTreePtr tmpOp2 = tmp->gtGetOp2();
+ assert(tmpOp2 != nullptr);
+
+ if ((tmpOp1 != base) && (tmpOp1->OperGet() == GT_ADD))
+ {
+ tmp = tmpOp1;
+ }
+ else if (tmpOp2->OperGet() == GT_LSH)
+ {
+ tmp = tmpOp2;
+ }
+ else if (tmpOp1->OperGet() == GT_LSH)
+ {
+ tmp = tmpOp1;
+ }
+ else if (tmpOp2->OperGet() == GT_ADD)
+ {
+ tmp = tmpOp2;
+ }
+ else
+ {
+ // We can very rarely encounter a tree that has a GT_COMMA node
+ // that is difficult to walk, so we just early out without decrementing.
+ addrmodeCount = 0;
+ }
+ }
+ }
+ }
+#elif defined _TARGET_ARM_
+ if (base)
+ {
+ costEx += base->gtCostEx;
+ costSz += base->gtCostSz;
+ if ((base->gtOper == GT_LCL_VAR) && ((idx == NULL) || (cns == 0)))
+ {
+ costSz -= 1;
+ }
+ }
+
+ if (idx)
+ {
+ costEx += idx->gtCostEx;
+ costSz += idx->gtCostSz;
+ if (mul > 0)
+ {
+ costSz += 2;
+ }
+ }
+
+ if (cns)
+ {
+ if (cns >= 128) // small offsets fits into a 16-bit instruction
+ {
+ if (cns < 4096) // medium offsets require a 32-bit instruction
+ {
+ if (!isflt)
+ costSz += 2;
+ }
+ else
+ {
+ costEx += 2; // Very large offsets require movw/movt instructions
+ costSz += 8;
+ }
+ }
+ }
+#elif defined _TARGET_ARM64_
+ if (base)
+ {
+ costEx += base->gtCostEx;
+ costSz += base->gtCostSz;
+ }
+
+ if (idx)
+ {
+ costEx += idx->gtCostEx;
+ costSz += idx->gtCostSz;
+ }
+
+ if (cns != 0)
+ {
+ if (cns >= (4096 * genTypeSize(tree->TypeGet())))
+ {
+ costEx += 1;
+ costSz += 4;
+ }
+ }
+#else
+#error "Unknown _TARGET_"
+#endif
+
+ assert(addr->gtOper == GT_ADD);
+ assert(!addr->gtOverflow());
+ assert(op2 == nullptr);
+ assert(mul != 1);
+
+ // If we have an addressing mode, we have one of:
+ // [base + cns]
+ // [ idx * mul ] // mul >= 2, else we would use base instead of idx
+ // [ idx * mul + cns] // mul >= 2, else we would use base instead of idx
+ // [base + idx * mul ] // mul can be 0, 2, 4, or 8
+ // [base + idx * mul + cns] // mul can be 0, 2, 4, or 8
+ // Note that mul == 0 is semantically equivalent to mul == 1.
+ // Note that cns can be zero.
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if SCALED_ADDR_MODES
+ assert((base != nullptr) || (idx != nullptr && mul >= 2));
+#else
+ assert(base != NULL);
+#endif
+
+ INDEBUG(GenTreePtr op1Save = addr);
+
+ /* Walk addr looking for non-overflow GT_ADDs */
+ gtWalkOp(&addr, &op2, base, false);
+
+ // addr and op2 are now children of the root GT_ADD of the addressing mode
+ assert(addr != op1Save);
+ assert(op2 != nullptr);
+
+ /* Walk addr looking for non-overflow GT_ADDs of constants */
+ gtWalkOp(&addr, &op2, nullptr, true);
+
+ // TODO-Cleanup: It seems very strange that we might walk down op2 now, even though the
+ // prior
+ // call to gtWalkOp() may have altered op2.
+
+ /* Walk op2 looking for non-overflow GT_ADDs of constants */
+ gtWalkOp(&op2, &addr, nullptr, true);
+
+ // OK we are done walking the tree
+ // Now assert that addr and op2 correspond with base and idx
+ // in one of the several acceptable ways.
+
+ // Note that sometimes addr/op2 is equal to idx/base
+ // and other times addr/op2 is a GT_COMMA node with
+ // an effective value that is idx/base
+
+ if (mul > 1)
+ {
+ if ((addr != base) && (addr->gtOper == GT_LSH))
+ {
+ addr->gtFlags |= GTF_ADDRMODE_NO_CSE;
+ if (addr->gtOp.gtOp1->gtOper == GT_MUL)
+ {
+ addr->gtOp.gtOp1->gtFlags |= GTF_ADDRMODE_NO_CSE;
+ }
+ assert((base == nullptr) || (op2 == base) ||
+ (op2->gtEffectiveVal() == base->gtEffectiveVal()) ||
+ (gtWalkOpEffectiveVal(op2) == gtWalkOpEffectiveVal(base)));
+ }
+ else
+ {
+ assert(op2);
+ assert(op2->gtOper == GT_LSH || op2->gtOper == GT_MUL);
+ op2->gtFlags |= GTF_ADDRMODE_NO_CSE;
+ // We may have eliminated multiple shifts and multiplies in the addressing mode,
+ // so navigate down through them to get to "idx".
+ GenTreePtr op2op1 = op2->gtOp.gtOp1;
+ while ((op2op1->gtOper == GT_LSH || op2op1->gtOper == GT_MUL) && op2op1 != idx)
+ {
+ op2op1->gtFlags |= GTF_ADDRMODE_NO_CSE;
+ op2op1 = op2op1->gtOp.gtOp1;
+ }
+ assert(addr->gtEffectiveVal() == base);
+ assert(op2op1 == idx);
+ }
+ }
+ else
+ {
+ assert(mul == 0);
+
+ if ((addr == idx) || (addr->gtEffectiveVal() == idx))
+ {
+ if (idx != nullptr)
+ {
+ if ((addr->gtOper == GT_MUL) || (addr->gtOper == GT_LSH))
+ {
+ if ((addr->gtOp.gtOp1->gtOper == GT_NOP) ||
+ (addr->gtOp.gtOp1->gtOper == GT_MUL &&
+ addr->gtOp.gtOp1->gtOp.gtOp1->gtOper == GT_NOP))
+ {
+ addr->gtFlags |= GTF_ADDRMODE_NO_CSE;
+ if (addr->gtOp.gtOp1->gtOper == GT_MUL)
+ {
+ addr->gtOp.gtOp1->gtFlags |= GTF_ADDRMODE_NO_CSE;
+ }
+ }
+ }
+ }
+ assert((op2 == base) || (op2->gtEffectiveVal() == base));
+ }
+ else if ((addr == base) || (addr->gtEffectiveVal() == base))
+ {
+ if (idx != nullptr)
+ {
+ assert(op2);
+ if ((op2->gtOper == GT_MUL) || (op2->gtOper == GT_LSH))
+ {
+ if ((op2->gtOp.gtOp1->gtOper == GT_NOP) ||
+ (op2->gtOp.gtOp1->gtOper == GT_MUL &&
+ op2->gtOp.gtOp1->gtOp.gtOp1->gtOper == GT_NOP))
+ {
+ // assert(bRngChk);
+ op2->gtFlags |= GTF_ADDRMODE_NO_CSE;
+ if (op2->gtOp.gtOp1->gtOper == GT_MUL)
+ {
+ op2->gtOp.gtOp1->gtFlags |= GTF_ADDRMODE_NO_CSE;
+ }
+ }
+ }
+ assert((op2 == idx) || (op2->gtEffectiveVal() == idx));
+ }
+ }
+ else
+ {
+ // addr isn't base or idx. Is this possible? Or should there be an assert?
+ }
+ }
+ goto DONE;
+
+ } // end if (genCreateAddrMode(...))
+
+ } // end if (op1->gtOper == GT_ADD)
+ else if (gtIsLikelyRegVar(op1))
+ {
+ /* Indirection of an enregister LCL_VAR, don't increase costEx/costSz */
+ goto DONE;
+ }
+#ifdef _TARGET_XARCH_
+ else if (op1->IsCnsIntOrI())
+ {
+ // Indirection of a CNS_INT, subtract 1 from costEx
+ // makes costEx 3 for x86 and 4 for amd64
+ //
+ costEx += (op1->gtCostEx - 1);
+ costSz += op1->gtCostSz;
+ goto DONE;
+ }
+#endif
+ break;
+
+ default:
+ break;
+ }
+ costEx += op1->gtCostEx;
+ costSz += op1->gtCostSz;
+ goto DONE;
+ }
+
+ /* Binary operator - check for certain special cases */
+
+ lvlb = 0;
+
+ /* Default Binary ops have a cost of 1,1 */
+ costEx = 1;
+ costSz = 1;
+
+#ifdef _TARGET_ARM_
+ if (isflt)
+ {
+ costSz += 2;
+ }
+#endif
+#ifndef _TARGET_64BIT_
+ if (varTypeIsLong(op1->TypeGet()))
+ {
+ /* Operations on longs are more expensive */
+ costEx += 3;
+ costSz += 3;
+ }
+#endif
+ switch (oper)
+ {
+ case GT_MOD:
+ case GT_UMOD:
+
+ /* Modulo by a power of 2 is easy */
+
+ if (op2->IsCnsIntOrI())
+ {
+ size_t ival = op2->gtIntConCommon.IconValue();
+
+ if (ival > 0 && ival == genFindLowestBit(ival))
+ {
+ break;
+ }
+ }
+
+ __fallthrough;
+
+ case GT_DIV:
+ case GT_UDIV:
+
+ if (isflt)
+ {
+ /* fp division is very expensive to execute */
+ costEx = 36; // TYP_DOUBLE
+ costSz += 3;
+ }
+ else
+ {
+ /* integer division is also very expensive */
+ costEx = 20;
+ costSz += 2;
+
+ // Encourage the first operand to be evaluated (into EAX/EDX) first */
+ lvlb -= 3;
+
+#ifdef _TARGET_XARCH_
+ // the idiv and div instruction requires EAX/EDX
+ ftreg |= RBM_EAX | RBM_EDX;
+#endif
+ }
+ break;
+
+ case GT_MUL:
+
+ if (isflt)
+ {
+ /* FP multiplication instructions are more expensive */
+ costEx += 4;
+ costSz += 3;
+ }
+ else
+ {
+ /* Integer multiplication instructions are more expensive */
+ costEx += 3;
+ costSz += 2;
+
+ if (tree->gtOverflow())
+ {
+ /* Overflow check are more expensive */
+ costEx += 3;
+ costSz += 3;
+ }
+
+#ifdef _TARGET_X86_
+ if ((tree->gtType == TYP_LONG) || tree->gtOverflow())
+ {
+ /* We use imulEAX for TYP_LONG and overflow multiplications */
+ // Encourage the first operand to be evaluated (into EAX/EDX) first */
+ lvlb -= 4;
+
+ // the imulEAX instruction ob x86 requires EDX:EAX
+ ftreg |= (RBM_EAX | RBM_EDX);
+
+ /* The 64-bit imul instruction costs more */
+ costEx += 4;
+ }
+#endif // _TARGET_X86_
+ }
+ break;
+
+ case GT_ADD:
+ case GT_SUB:
+ case GT_ASG_ADD:
+ case GT_ASG_SUB:
+
+ if (isflt)
+ {
+ /* FP instructions are a bit more expensive */
+ costEx += 4;
+ costSz += 3;
+ break;
+ }
+
+ /* Overflow check are more expensive */
+ if (tree->gtOverflow())
+ {
+ costEx += 3;
+ costSz += 3;
+ }
+ break;
+
+ case GT_COMMA:
+
+ /* Comma tosses the result of the left operand */
+ gtSetEvalOrderAndRestoreFPstkLevel(op1);
+ level = gtSetEvalOrder(op2);
+
+ ftreg |= op1->gtRsvdRegs | op2->gtRsvdRegs;
+
+ /* GT_COMMA cost is the sum of op1 and op2 costs */
+ costEx = (op1->gtCostEx + op2->gtCostEx);
+ costSz = (op1->gtCostSz + op2->gtCostSz);
+
+ goto DONE;
+
+ case GT_COLON:
+
+ level = gtSetEvalOrderAndRestoreFPstkLevel(op1);
+ lvl2 = gtSetEvalOrder(op2);
+
+ if (level < lvl2)
+ {
+ level = lvl2;
+ }
+ else if (level == lvl2)
+ {
+ level += 1;
+ }
+
+ ftreg |= op1->gtRsvdRegs | op2->gtRsvdRegs;
+ costEx = op1->gtCostEx + op2->gtCostEx;
+ costSz = op1->gtCostSz + op2->gtCostSz;
+
+ goto DONE;
+
+ default:
+ break;
+ }
+
+ /* Assignments need a bit of special handling */
+
+ if (kind & GTK_ASGOP)
+ {
+ /* Process the target */
+
+ level = gtSetEvalOrder(op1);
+
+#if FEATURE_STACK_FP_X87
+
+ /* If assigning an FP value, the target won't get pushed */
+
+ if (isflt && !tree->IsPhiDefn())
+ {
+ op1->gtFPlvl--;
+ codeGen->genDecrementFPstkLevel();
+ }
+
+#endif // FEATURE_STACK_FP_X87
+
+ if (gtIsLikelyRegVar(op1))
+ {
+ assert(lvlb == 0);
+ lvl2 = gtSetEvalOrder(op2);
+ if (oper != GT_ASG)
+ {
+ ftreg |= op2->gtRsvdRegs;
+ }
+
+ /* Assignment to an enregistered LCL_VAR */
+ costEx = op2->gtCostEx;
+ costSz = max(3, op2->gtCostSz); // 3 is an estimate for a reg-reg assignment
+ goto DONE_OP1_AFTER_COST;
+ }
+ else if (oper != GT_ASG)
+ {
+ // Assign-Op instructions read and write op1
+ //
+ costEx += op1->gtCostEx;
+#ifdef _TARGET_ARM_
+ costSz += op1->gtCostSz;
+#endif
+ }
+
+ goto DONE_OP1;
+ }
+
+ /* Process the sub-operands */
+
+ level = gtSetEvalOrder(op1);
+ if (lvlb < 0)
+ {
+ level -= lvlb; // lvlb is negative, so this increases level
+ lvlb = 0;
+ }
+
+ DONE_OP1:
+ assert(lvlb >= 0);
+ lvl2 = gtSetEvalOrder(op2) + lvlb;
+ ftreg |= op1->gtRsvdRegs;
+ // For assignment, we execute op2 before op1, except that for block
+ // ops the destination address is evaluated first.
+ if ((oper != GT_ASG) || tree->OperIsBlkOp())
+ {
+ ftreg |= op2->gtRsvdRegs;
+ }
+
+ costEx += (op1->gtCostEx + op2->gtCostEx);
+ costSz += (op1->gtCostSz + op2->gtCostSz);
+
+ DONE_OP1_AFTER_COST:
+#if FEATURE_STACK_FP_X87
+ /*
+ Binary FP operators pop 2 operands and produce 1 result;
+ FP comparisons pop 2 operands and produces 0 results.
+ assignments consume 1 value and don't produce anything.
+ */
+
+ if (isflt && !tree->IsPhiDefn())
+ {
+ assert(oper != GT_COMMA);
+ codeGen->genDecrementFPstkLevel();
+ }
+#endif // FEATURE_STACK_FP_X87
+
+ bool bReverseInAssignment = false;
+ if (kind & GTK_ASGOP)
+ {
+ GenTreePtr op1Val = op1;
+
+ if (tree->gtOper == GT_ASG)
+ {
+ // Skip over the GT_IND/GT_ADDR tree (if one exists)
+ //
+ if ((op1->gtOper == GT_IND) && (op1->gtOp.gtOp1->gtOper == GT_ADDR))
+ {
+ op1Val = op1->gtOp.gtOp1->gtOp.gtOp1;
+ }
+ }
+
+ switch (op1Val->gtOper)
+ {
+ case GT_IND:
+
+ // Struct assignments are different from scalar assignments in that semantically
+ // the address of op1 is evaluated prior to op2.
+ if (!varTypeIsStruct(op1))
+ {
+ // If we have any side effects on the GT_IND child node
+ // we have to evaluate op1 first.
+ if (op1Val->gtOp.gtOp1->gtFlags & GTF_ALL_EFFECT)
+ {
+ break;
+ }
+ }
+
+ // In case op2 assigns to a local var that is used in op1Val, we have to evaluate op1Val first.
+ if (op2->gtFlags & GTF_ASG)
+ {
+ break;
+ }
+
+ // If op2 is simple then evaluate op1 first
+
+ if (op2->OperKind() & GTK_LEAF)
+ {
+ break;
+ }
+
+ // fall through and set GTF_REVERSE_OPS
+
+ case GT_LCL_VAR:
+ case GT_LCL_FLD:
+ case GT_BLK:
+ case GT_OBJ:
+ case GT_DYN_BLK:
+
+ // We evaluate op2 before op1
+ bReverseInAssignment = true;
+ tree->gtFlags |= GTF_REVERSE_OPS;
+ break;
+
+ default:
+ break;
+ }
+ }
+ else if (kind & GTK_RELOP)
+ {
+ /* Float compares remove both operands from the FP stack */
+ /* Also FP comparison uses EAX for flags */
+
+ if (varTypeIsFloating(op1->TypeGet()))
+ {
+#if FEATURE_STACK_FP_X87
+ codeGen->genDecrementFPstkLevel(2);
+#endif // FEATURE_STACK_FP_X87
+#ifdef _TARGET_XARCH_
+ ftreg |= RBM_EAX;
+#endif
+ level++;
+ lvl2++;
+ }
+#if CPU_LONG_USES_REGPAIR
+ if (varTypeIsLong(op1->TypeGet()))
+ {
+ costEx *= 2; // Longs are twice as expensive
+ costSz *= 2;
+ }
+#endif
+ if ((tree->gtFlags & GTF_RELOP_JMP_USED) == 0)
+ {
+ /* Using a setcc instruction is more expensive */
+ costEx += 3;
+ }
+ }
+
+ /* Check for other interesting cases */
+
+ switch (oper)
+ {
+ case GT_LSH:
+ case GT_RSH:
+ case GT_RSZ:
+ case GT_ROL:
+ case GT_ROR:
+ case GT_ASG_LSH:
+ case GT_ASG_RSH:
+ case GT_ASG_RSZ:
+
+ /* Variable sized shifts are more expensive and use REG_SHIFT */
+
+ if (!op2->IsCnsIntOrI())
+ {
+ costEx += 3;
+ if (REG_SHIFT != REG_NA)
+ {
+ ftreg |= RBM_SHIFT;
+ }
+
+#ifndef _TARGET_64BIT_
+ // Variable sized LONG shifts require the use of a helper call
+ //
+ if (tree->gtType == TYP_LONG)
+ {
+ level += 5;
+ lvl2 += 5;
+ costEx += 3 * IND_COST_EX;
+ costSz += 4;
+ ftreg |= RBM_CALLEE_TRASH;
+ }
+#endif // !_TARGET_64BIT_
+ }
+ break;
+
+ case GT_INTRINSIC:
+
+ switch (tree->gtIntrinsic.gtIntrinsicId)
+ {
+ case CORINFO_INTRINSIC_Atan2:
+ case CORINFO_INTRINSIC_Pow:
+ // These math intrinsics are actually implemented by user calls.
+ // Increase the Sethi 'complexity' by two to reflect the argument
+ // register requirement.
+ level += 2;
+ break;
+ default:
+ assert(!"Unknown binary GT_INTRINSIC operator");
+ break;
+ }
+
+ break;
+
+ default:
+ break;
+ }
+
+ /* We need to evalutate constants later as many places in codegen
+ can't handle op1 being a constant. This is normally naturally
+ enforced as constants have the least level of 0. However,
+ sometimes we end up with a tree like "cns1 < nop(cns2)". In
+ such cases, both sides have a level of 0. So encourage constants
+ to be evaluated last in such cases */
+
+ if ((level == 0) && (level == lvl2) && (op1->OperKind() & GTK_CONST) &&
+ (tree->OperIsCommutative() || tree->OperIsCompare()))
+ {
+ lvl2++;
+ }
+
+ /* We try to swap operands if the second one is more expensive */
+ bool tryToSwap;
+ GenTreePtr opA, opB;
+
+ if (tree->gtFlags & GTF_REVERSE_OPS)
+ {
+ opA = op2;
+ opB = op1;
+ }
+ else
+ {
+ opA = op1;
+ opB = op2;
+ }
+
+ if (fgOrder == FGOrderLinear)
+ {
+ // Don't swap anything if we're in linear order; we're really just interested in the costs.
+ tryToSwap = false;
+ }
+ else if (bReverseInAssignment)
+ {
+ // Assignments are special, we want the reverseops flags
+ // so if possible it was set above.
+ tryToSwap = false;
+ }
+ else
+ {
+ if (tree->gtFlags & GTF_REVERSE_OPS)
+ {
+ tryToSwap = (level > lvl2);
+ }
+ else
+ {
+ tryToSwap = (level < lvl2);
+ }
+
+ // Try to force extra swapping when in the stress mode:
+ if (compStressCompile(STRESS_REVERSE_FLAG, 60) && ((tree->gtFlags & GTF_REVERSE_OPS) == 0) &&
+ ((op2->OperKind() & GTK_CONST) == 0))
+ {
+ tryToSwap = true;
+ }
+ }
+
+ if (tryToSwap)
+ {
+ bool canSwap = gtCanSwapOrder(opA, opB);
+
+ if (canSwap)
+ {
+ /* Can we swap the order by commuting the operands? */
+
+ switch (oper)
+ {
+ case GT_EQ:
+ case GT_NE:
+ case GT_LT:
+ case GT_LE:
+ case GT_GE:
+ case GT_GT:
+ if (GenTree::SwapRelop(oper) != oper)
+ {
+ // SetOper will obliterate the VN for the underlying expression.
+ // If we're in VN CSE phase, we don't want to lose that information,
+ // so save the value numbers and put them back after the SetOper.
+ ValueNumPair vnp = tree->gtVNPair;
+ tree->SetOper(GenTree::SwapRelop(oper));
+ if (optValnumCSE_phase)
+ {
+ tree->gtVNPair = vnp;
+ }
+ }
+
+ __fallthrough;
+
+ case GT_ADD:
+ case GT_MUL:
+
+ case GT_OR:
+ case GT_XOR:
+ case GT_AND:
+
+ /* Swap the operands */
+
+ tree->gtOp.gtOp1 = op2;
+ tree->gtOp.gtOp2 = op1;
+
+#if FEATURE_STACK_FP_X87
+ /* We may have to recompute FP levels */
+ if (op1->gtFPlvl || op2->gtFPlvl)
+ gtFPstLvlRedo = true;
+#endif // FEATURE_STACK_FP_X87
+ break;
+
+ case GT_QMARK:
+ case GT_COLON:
+ case GT_MKREFANY:
+ break;
+
+ case GT_LIST:
+ break;
+
+ case GT_SUB:
+#ifdef LEGACY_BACKEND
+ // For LSRA we require that LclVars be "evaluated" just prior to their use,
+ // so that if they must be reloaded, it is done at the right place.
+ // This means that we allow reverse evaluation for all BINOPs.
+ // (Note that this doesn't affect the order of the operands in the instruction).
+ if (!isflt)
+ break;
+#endif // LEGACY_BACKEND
+
+ __fallthrough;
+
+ default:
+
+ /* Mark the operand's evaluation order to be swapped */
+ if (tree->gtFlags & GTF_REVERSE_OPS)
+ {
+ tree->gtFlags &= ~GTF_REVERSE_OPS;
+ }
+ else
+ {
+ tree->gtFlags |= GTF_REVERSE_OPS;
+ }
+
+#if FEATURE_STACK_FP_X87
+ /* We may have to recompute FP levels */
+ if (op1->gtFPlvl || op2->gtFPlvl)
+ gtFPstLvlRedo = true;
+#endif // FEATURE_STACK_FP_X87
+
+ break;
+ }
+ }
+ }
+
+ /* Swap the level counts */
+ if (tree->gtFlags & GTF_REVERSE_OPS)
+ {
+ unsigned tmpl;
+
+ tmpl = level;
+ level = lvl2;
+ lvl2 = tmpl;
+ }
+
+ /* Compute the sethi number for this binary operator */
+
+ if (level < 1)
+ {
+ level = lvl2;
+ }
+ else if (level == lvl2)
+ {
+ level += 1;
+ }
+
+ goto DONE;
+ }
+
+ /* See what kind of a special operator we have here */
+
+ switch (oper)
+ {
+ unsigned lvl2; // Scratch variable
+
+ case GT_CALL:
+
+ assert(tree->gtFlags & GTF_CALL);
+
+ level = 0;
+ costEx = 5;
+ costSz = 2;
+
+ /* Evaluate the 'this' argument, if present */
+
+ if (tree->gtCall.gtCallObjp)
+ {
+ GenTreePtr thisVal = tree->gtCall.gtCallObjp;
+
+ lvl2 = gtSetEvalOrder(thisVal);
+ if (level < lvl2)
+ {
+ level = lvl2;
+ }
+ costEx += thisVal->gtCostEx;
+ costSz += thisVal->gtCostSz + 1;
+ ftreg |= thisVal->gtRsvdRegs;
+ }
+
+ /* Evaluate the arguments, right to left */
+
+ if (tree->gtCall.gtCallArgs)
+ {
+#if FEATURE_STACK_FP_X87
+ FPlvlSave = codeGen->genGetFPstkLevel();
+#endif // FEATURE_STACK_FP_X87
+ lvl2 = gtSetListOrder(tree->gtCall.gtCallArgs, false);
+ if (level < lvl2)
+ {
+ level = lvl2;
+ }
+ costEx += tree->gtCall.gtCallArgs->gtCostEx;
+ costSz += tree->gtCall.gtCallArgs->gtCostSz;
+ ftreg |= tree->gtCall.gtCallArgs->gtRsvdRegs;
+#if FEATURE_STACK_FP_X87
+ codeGen->genResetFPstkLevel(FPlvlSave);
+#endif // FEATURE_STACK_FP_X87
+ }
+
+ /* Evaluate the temp register arguments list
+ * This is a "hidden" list and its only purpose is to
+ * extend the life of temps until we make the call */
+
+ if (tree->gtCall.gtCallLateArgs)
+ {
+#if FEATURE_STACK_FP_X87
+ FPlvlSave = codeGen->genGetFPstkLevel();
+#endif // FEATURE_STACK_FP_X87
+ lvl2 = gtSetListOrder(tree->gtCall.gtCallLateArgs, true);
+ if (level < lvl2)
+ {
+ level = lvl2;
+ }
+ costEx += tree->gtCall.gtCallLateArgs->gtCostEx;
+ costSz += tree->gtCall.gtCallLateArgs->gtCostSz;
+ ftreg |= tree->gtCall.gtCallLateArgs->gtRsvdRegs;
+#if FEATURE_STACK_FP_X87
+ codeGen->genResetFPstkLevel(FPlvlSave);
+#endif // FEATURE_STACK_FP_X87
+ }
+
+ if (tree->gtCall.gtCallType == CT_INDIRECT)
+ {
+ // pinvoke-calli cookie is a constant, or constant indirection
+ assert(tree->gtCall.gtCallCookie == nullptr || tree->gtCall.gtCallCookie->gtOper == GT_CNS_INT ||
+ tree->gtCall.gtCallCookie->gtOper == GT_IND);
+
+ GenTreePtr indirect = tree->gtCall.gtCallAddr;
+
+ lvl2 = gtSetEvalOrder(indirect);
+ if (level < lvl2)
+ {
+ level = lvl2;
+ }
+ costEx += indirect->gtCostEx + IND_COST_EX;
+ costSz += indirect->gtCostSz;
+ ftreg |= indirect->gtRsvdRegs;
+ }
+ else
+ {
+#ifdef _TARGET_ARM_
+ if ((tree->gtFlags & GTF_CALL_VIRT_KIND_MASK) == GTF_CALL_VIRT_STUB)
+ {
+ // We generate movw/movt/ldr
+ costEx += (1 + IND_COST_EX);
+ costSz += 8;
+ if (tree->gtCall.gtCallMoreFlags & GTF_CALL_M_VIRTSTUB_REL_INDIRECT)
+ {
+ // Must use R12 for the ldr target -- REG_JUMP_THUNK_PARAM
+ costSz += 2;
+ }
+ }
+ else if ((opts.eeFlags & CORJIT_FLG_PREJIT) == 0)
+ {
+ costEx += 2;
+ costSz += 6;
+ }
+ costSz += 2;
+#endif
+#ifdef _TARGET_XARCH_
+ costSz += 3;
+#endif
+ }
+
+ level += 1;
+
+ unsigned callKind;
+ callKind = (tree->gtFlags & GTF_CALL_VIRT_KIND_MASK);
+
+ /* Virtual calls are a bit more expensive */
+ if (callKind != GTF_CALL_NONVIRT)
+ {
+ costEx += 2 * IND_COST_EX;
+ costSz += 2;
+ }
+
+ /* Virtual stub calls also must reserve the VIRTUAL_STUB_PARAM reg */
+ if (callKind == GTF_CALL_VIRT_STUB)
+ {
+ ftreg |= RBM_VIRTUAL_STUB_PARAM;
+ }
+
+#ifdef FEATURE_READYTORUN_COMPILER
+#ifdef _TARGET_ARM64_
+ if (tree->gtCall.IsR2RRelativeIndir())
+ {
+ ftreg |= RBM_R2R_INDIRECT_PARAM;
+ }
+#endif
+#endif
+
+#if GTF_CALL_REG_SAVE
+ // Normally function calls don't preserve caller save registers
+ // and thus are much more expensive.
+ // However a few function calls do preserve these registers
+ // such as the GC WriteBarrier helper calls.
+
+ if (!(tree->gtFlags & GTF_CALL_REG_SAVE))
+#endif
+ {
+ level += 5;
+ costEx += 3 * IND_COST_EX;
+ ftreg |= RBM_CALLEE_TRASH;
+ }
+
+#if FEATURE_STACK_FP_X87
+ if (isflt)
+ codeGen->genIncrementFPstkLevel();
+#endif // FEATURE_STACK_FP_X87
+
+ break;
+
+ case GT_ARR_ELEM:
+
+ level = gtSetEvalOrder(tree->gtArrElem.gtArrObj);
+ costEx = tree->gtArrElem.gtArrObj->gtCostEx;
+ costSz = tree->gtArrElem.gtArrObj->gtCostSz;
+
+ unsigned dim;
+ for (dim = 0; dim < tree->gtArrElem.gtArrRank; dim++)
+ {
+ lvl2 = gtSetEvalOrder(tree->gtArrElem.gtArrInds[dim]);
+ if (level < lvl2)
+ {
+ level = lvl2;
+ }
+ costEx += tree->gtArrElem.gtArrInds[dim]->gtCostEx;
+ costSz += tree->gtArrElem.gtArrInds[dim]->gtCostSz;
+ }
+
+#if FEATURE_STACK_FP_X87
+ if (isflt)
+ codeGen->genIncrementFPstkLevel();
+#endif // FEATURE_STACK_FP_X87
+ level += tree->gtArrElem.gtArrRank;
+ costEx += 2 + (tree->gtArrElem.gtArrRank * (IND_COST_EX + 1));
+ costSz += 2 + (tree->gtArrElem.gtArrRank * 2);
+ break;
+
+ case GT_ARR_OFFSET:
+ level = gtSetEvalOrder(tree->gtArrOffs.gtOffset);
+ costEx = tree->gtArrOffs.gtOffset->gtCostEx;
+ costSz = tree->gtArrOffs.gtOffset->gtCostSz;
+ lvl2 = gtSetEvalOrder(tree->gtArrOffs.gtIndex);
+ level = max(level, lvl2);
+ costEx += tree->gtArrOffs.gtIndex->gtCostEx;
+ costSz += tree->gtArrOffs.gtIndex->gtCostSz;
+ lvl2 = gtSetEvalOrder(tree->gtArrOffs.gtArrObj);
+ level = max(level, lvl2);
+ costEx += tree->gtArrOffs.gtArrObj->gtCostEx;
+ costSz += tree->gtArrOffs.gtArrObj->gtCostSz;
+ break;
+
+ case GT_CMPXCHG:
+
+ level = gtSetEvalOrder(tree->gtCmpXchg.gtOpLocation);
+ costSz = tree->gtCmpXchg.gtOpLocation->gtCostSz;
+
+ lvl2 = gtSetEvalOrder(tree->gtCmpXchg.gtOpValue);
+ if (level < lvl2)
+ {
+ level = lvl2;
+ }
+ costSz += tree->gtCmpXchg.gtOpValue->gtCostSz;
+
+ lvl2 = gtSetEvalOrder(tree->gtCmpXchg.gtOpComparand);
+ if (level < lvl2)
+ {
+ level = lvl2;
+ }
+ costSz += tree->gtCmpXchg.gtOpComparand->gtCostSz;
+
+ costEx = MAX_COST; // Seriously, what could be more expensive than lock cmpxchg?
+ costSz += 5; // size of lock cmpxchg [reg+C], reg
+#ifdef _TARGET_XARCH_
+ ftreg |= RBM_EAX; // cmpxchg must be evaluated into eax.
+#endif
+ break;
+
+ case GT_ARR_BOUNDS_CHECK:
+#ifdef FEATURE_SIMD
+ case GT_SIMD_CHK:
+#endif // FEATURE_SIMD
+ costEx = 4; // cmp reg,reg and jae throw (not taken)
+ costSz = 7; // jump to cold section
+
+ level = gtSetEvalOrder(tree->gtBoundsChk.gtArrLen);
+ costEx += tree->gtBoundsChk.gtArrLen->gtCostEx;
+ costSz += tree->gtBoundsChk.gtArrLen->gtCostSz;
+
+ lvl2 = gtSetEvalOrder(tree->gtBoundsChk.gtIndex);
+ if (level < lvl2)
+ {
+ level = lvl2;
+ }
+ costEx += tree->gtBoundsChk.gtIndex->gtCostEx;
+ costSz += tree->gtBoundsChk.gtIndex->gtCostSz;
+
+ break;
+
+ case GT_STORE_DYN_BLK:
+ case GT_DYN_BLK:
+ {
+ costEx = 0;
+ costSz = 0;
+ level = 0;
+ if (oper == GT_STORE_DYN_BLK)
+ {
+ lvl2 = gtSetEvalOrder(tree->gtDynBlk.Data());
+ level = max(level, lvl2);
+ costEx += tree->gtDynBlk.Data()->gtCostEx;
+ costSz += tree->gtDynBlk.Data()->gtCostSz;
+ }
+ lvl2 = gtSetEvalOrder(tree->gtDynBlk.Addr());
+ level = max(level, lvl2);
+ costEx = tree->gtDynBlk.Addr()->gtCostEx;
+ costSz = tree->gtDynBlk.Addr()->gtCostSz;
+ unsigned sizeLevel = gtSetEvalOrder(tree->gtDynBlk.gtDynamicSize);
+
+ // Determine whether the size node should be evaluated first.
+ // We would like to do this if the sizeLevel is larger than the current level,
+ // but we have to ensure that we obey ordering constraints.
+ if (tree->AsDynBlk()->gtEvalSizeFirst != (level < sizeLevel))
+ {
+ bool canChange = true;
+
+ GenTree* sizeNode = tree->AsDynBlk()->gtDynamicSize;
+ GenTree* dst = tree->AsDynBlk()->Addr();
+ GenTree* src = tree->AsDynBlk()->Data();
+
+ if (tree->AsDynBlk()->gtEvalSizeFirst)
+ {
+ canChange = gtCanSwapOrder(sizeNode, dst);
+ if (canChange && (src != nullptr))
+ {
+ canChange = gtCanSwapOrder(sizeNode, src);
+ }
+ }
+ else
+ {
+ canChange = gtCanSwapOrder(dst, sizeNode);
+ if (canChange && (src != nullptr))
+ {
+ gtCanSwapOrder(src, sizeNode);
+ }
+ }
+ if (canChange)
+ {
+ tree->AsDynBlk()->gtEvalSizeFirst = (level < sizeLevel);
+ }
+ }
+ level = max(level, sizeLevel);
+ costEx += tree->gtDynBlk.gtDynamicSize->gtCostEx;
+ costSz += tree->gtDynBlk.gtDynamicSize->gtCostSz;
+ }
+ break;
+
+ default:
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("unexpected operator in this tree:\n");
+ gtDispTree(tree);
+ }
+#endif
+ NO_WAY("unexpected operator");
+ }
+
+DONE:
+
+#if FEATURE_STACK_FP_X87
+ // printf("[FPlvl=%2u] ", genGetFPstkLevel()); gtDispTree(tree, 0, true);
+ noway_assert((unsigned char)codeGen->genFPstkLevel == codeGen->genFPstkLevel);
+ tree->gtFPlvl = (unsigned char)codeGen->genFPstkLevel;
+
+ if (codeGen->genFPstkLevel > tmpDoubleSpillMax)
+ tmpDoubleSpillMax = codeGen->genFPstkLevel;
+#endif // FEATURE_STACK_FP_X87
+
+ tree->gtRsvdRegs = (regMaskSmall)ftreg;
+
+ // Some path through this function must have set the costs.
+ assert(costEx != -1);
+ assert(costSz != -1);
+
+ tree->SetCosts(costEx, costSz);
+
+ return level;
+}
+#ifdef _PREFAST_
+#pragma warning(pop)
+#endif
+
+#if FEATURE_STACK_FP_X87
+
+/*****************************************************************************/
+void Compiler::gtComputeFPlvls(GenTreePtr tree)
+{
+ genTreeOps oper;
+ unsigned kind;
+ bool isflt;
+ unsigned savFPstkLevel;
+
+ noway_assert(tree);
+ noway_assert(tree->gtOper != GT_STMT);
+
+ /* Figure out what kind of a node we have */
+
+ oper = tree->OperGet();
+ kind = tree->OperKind();
+ isflt = varTypeIsFloating(tree->TypeGet()) ? 1 : 0;
+
+ /* Is this a constant or leaf node? */
+
+ if (kind & (GTK_CONST | GTK_LEAF))
+ {
+ codeGen->genFPstkLevel += isflt;
+ goto DONE;
+ }
+
+ /* Is it a 'simple' unary/binary operator? */
+
+ if (kind & GTK_SMPOP)
+ {
+ GenTreePtr op1 = tree->gtOp.gtOp1;
+ GenTreePtr op2 = tree->gtGetOp2();
+
+ /* Check for some special cases */
+
+ switch (oper)
+ {
+ case GT_IND:
+
+ gtComputeFPlvls(op1);
+
+ /* Indirect loads of FP values push a new value on the FP stack */
+
+ codeGen->genFPstkLevel += isflt;
+ goto DONE;
+
+ case GT_CAST:
+
+ gtComputeFPlvls(op1);
+
+ /* Casts between non-FP and FP push on / pop from the FP stack */
+
+ if (varTypeIsFloating(op1->TypeGet()))
+ {
+ if (isflt == false)
+ codeGen->genFPstkLevel--;
+ }
+ else
+ {
+ if (isflt != false)
+ codeGen->genFPstkLevel++;
+ }
+
+ goto DONE;
+
+ case GT_LIST: /* GT_LIST presumably part of an argument list */
+ case GT_COMMA: /* Comma tosses the result of the left operand */
+
+ savFPstkLevel = codeGen->genFPstkLevel;
+ gtComputeFPlvls(op1);
+ codeGen->genFPstkLevel = savFPstkLevel;
+
+ if (op2)
+ gtComputeFPlvls(op2);
+
+ goto DONE;
+
+ default:
+ break;
+ }
+
+ if (!op1)
+ {
+ if (!op2)
+ goto DONE;
+
+ gtComputeFPlvls(op2);
+ goto DONE;
+ }
+
+ if (!op2)
+ {
+ gtComputeFPlvls(op1);
+ if (oper == GT_ADDR)
+ {
+ /* If the operand was floating point pop the value from the stack */
+ if (varTypeIsFloating(op1->TypeGet()))
+ {
+ noway_assert(codeGen->genFPstkLevel);
+ codeGen->genFPstkLevel--;
+ }
+ }
+
+ // This is a special case to handle the following
+ // optimization: conv.i4(round.d(d)) -> round.i(d)
+
+ if (oper == GT_INTRINSIC && tree->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Round &&
+ tree->TypeGet() == TYP_INT)
+ {
+ codeGen->genFPstkLevel--;
+ }
+
+ goto DONE;
+ }
+
+ /* FP assignments need a bit special handling */
+
+ if (isflt && (kind & GTK_ASGOP))
+ {
+ /* The target of the assignment won't get pushed */
+
+ if (tree->gtFlags & GTF_REVERSE_OPS)
+ {
+ gtComputeFPlvls(op2);
+ gtComputeFPlvls(op1);
+ op1->gtFPlvl--;
+ codeGen->genFPstkLevel--;
+ }
+ else
+ {
+ gtComputeFPlvls(op1);
+ op1->gtFPlvl--;
+ codeGen->genFPstkLevel--;
+ gtComputeFPlvls(op2);
+ }
+
+ codeGen->genFPstkLevel--;
+ goto DONE;
+ }
+
+ /* Here we have a binary operator; visit operands in proper order */
+
+ if (tree->gtFlags & GTF_REVERSE_OPS)
+ {
+ gtComputeFPlvls(op2);
+ gtComputeFPlvls(op1);
+ }
+ else
+ {
+ gtComputeFPlvls(op1);
+ gtComputeFPlvls(op2);
+ }
+
+ /*
+ Binary FP operators pop 2 operands and produce 1 result;
+ assignments consume 1 value and don't produce any.
+ */
+
+ if (isflt)
+ codeGen->genFPstkLevel--;
+
+ /* Float compares remove both operands from the FP stack */
+
+ if (kind & GTK_RELOP)
+ {
+ if (varTypeIsFloating(op1->TypeGet()))
+ codeGen->genFPstkLevel -= 2;
+ }
+
+ goto DONE;
+ }
+
+ /* See what kind of a special operator we have here */
+
+ switch (oper)
+ {
+ case GT_FIELD:
+ gtComputeFPlvls(tree->gtField.gtFldObj);
+ codeGen->genFPstkLevel += isflt;
+ break;
+
+ case GT_CALL:
+
+ if (tree->gtCall.gtCallObjp)
+ gtComputeFPlvls(tree->gtCall.gtCallObjp);
+
+ if (tree->gtCall.gtCallArgs)
+ {
+ savFPstkLevel = codeGen->genFPstkLevel;
+ gtComputeFPlvls(tree->gtCall.gtCallArgs);
+ codeGen->genFPstkLevel = savFPstkLevel;
+ }
+
+ if (tree->gtCall.gtCallLateArgs)
+ {
+ savFPstkLevel = codeGen->genFPstkLevel;
+ gtComputeFPlvls(tree->gtCall.gtCallLateArgs);
+ codeGen->genFPstkLevel = savFPstkLevel;
+ }
+
+ codeGen->genFPstkLevel += isflt;
+ break;
+
+ case GT_ARR_ELEM:
+
+ gtComputeFPlvls(tree->gtArrElem.gtArrObj);
+
+ unsigned dim;
+ for (dim = 0; dim < tree->gtArrElem.gtArrRank; dim++)
+ gtComputeFPlvls(tree->gtArrElem.gtArrInds[dim]);
+
+ /* Loads of FP values push a new value on the FP stack */
+ codeGen->genFPstkLevel += isflt;
+ break;
+
+ case GT_CMPXCHG:
+ // Evaluate the trees left to right
+ gtComputeFPlvls(tree->gtCmpXchg.gtOpLocation);
+ gtComputeFPlvls(tree->gtCmpXchg.gtOpValue);
+ gtComputeFPlvls(tree->gtCmpXchg.gtOpComparand);
+ noway_assert(!isflt);
+ break;
+
+ case GT_ARR_BOUNDS_CHECK:
+ gtComputeFPlvls(tree->gtBoundsChk.gtArrLen);
+ gtComputeFPlvls(tree->gtBoundsChk.gtIndex);
+ noway_assert(!isflt);
+ break;
+
+#ifdef DEBUG
+ default:
+ noway_assert(!"Unhandled special operator in gtComputeFPlvls()");
+ break;
+#endif
+ }
+
+DONE:
+
+ noway_assert((unsigned char)codeGen->genFPstkLevel == codeGen->genFPstkLevel);
+
+ tree->gtFPlvl = (unsigned char)codeGen->genFPstkLevel;
+}
+
+#endif // FEATURE_STACK_FP_X87
+
+/*****************************************************************************
+ *
+ * If the given tree is an integer constant that can be used
+ * in a scaled index address mode as a multiplier (e.g. "[4*index]"), then return
+ * the scale factor: 2, 4, or 8. Otherwise, return 0. Note that we never return 1,
+ * to match the behavior of GetScaleIndexShf().
+ */
+
+unsigned GenTree::GetScaleIndexMul()
+{
+ if (IsCnsIntOrI() && jitIsScaleIndexMul(gtIntConCommon.IconValue()) && gtIntConCommon.IconValue() != 1)
+ {
+ return (unsigned)gtIntConCommon.IconValue();
+ }
+
+ return 0;
+}
+
+/*****************************************************************************
+ *
+ * If the given tree is the right-hand side of a left shift (that is,
+ * 'y' in the tree 'x' << 'y'), and it is an integer constant that can be used
+ * in a scaled index address mode as a multiplier (e.g. "[4*index]"), then return
+ * the scale factor: 2, 4, or 8. Otherwise, return 0.
+ */
+
+unsigned GenTree::GetScaleIndexShf()
+{
+ if (IsCnsIntOrI() && jitIsScaleIndexShift(gtIntConCommon.IconValue()))
+ {
+ return (unsigned)(1 << gtIntConCommon.IconValue());
+ }
+
+ return 0;
+}
+
+/*****************************************************************************
+ *
+ * If the given tree is a scaled index (i.e. "op * 4" or "op << 2"), returns
+ * the multiplier: 2, 4, or 8; otherwise returns 0. Note that "1" is never
+ * returned.
+ */
+
+unsigned GenTree::GetScaledIndex()
+{
+ // with (!opts.OptEnabled(CLFLG_CONSTANTFOLD) we can have
+ // CNS_INT * CNS_INT
+ //
+ if (gtOp.gtOp1->IsCnsIntOrI())
+ {
+ return 0;
+ }
+
+ switch (gtOper)
+ {
+ case GT_MUL:
+ return gtOp.gtOp2->GetScaleIndexMul();
+
+ case GT_LSH:
+ return gtOp.gtOp2->GetScaleIndexShf();
+
+ default:
+ assert(!"GenTree::GetScaledIndex() called with illegal gtOper");
+ break;
+ }
+
+ return 0;
+}
+
+/*****************************************************************************
+ *
+ * Returns true if "addr" is a GT_ADD node, at least one of whose arguments is an integer (<= 32 bit)
+ * constant. If it returns true, it sets "*offset" to (one of the) constant value(s), and
+ * "*addr" to the other argument.
+ */
+
+bool GenTree::IsAddWithI32Const(GenTreePtr* addr, int* offset)
+{
+ if (OperGet() == GT_ADD)
+ {
+ if (gtOp.gtOp1->IsIntCnsFitsInI32())
+ {
+ *offset = (int)gtOp.gtOp1->gtIntCon.gtIconVal;
+ *addr = gtOp.gtOp2;
+ return true;
+ }
+ else if (gtOp.gtOp2->IsIntCnsFitsInI32())
+ {
+ *offset = (int)gtOp.gtOp2->gtIntCon.gtIconVal;
+ *addr = gtOp.gtOp1;
+ return true;
+ }
+ }
+ // Otherwise...
+ return false;
+}
+
+//------------------------------------------------------------------------
+// gtGetChildPointer: If 'parent' is the parent of this node, return the pointer
+// to the child node so that it can be modified; otherwise, return nullptr.
+//
+// Arguments:
+// parent - The possible parent of this node
+//
+// Return Value:
+// If "child" is a child of "parent", returns a pointer to the child node in the parent
+// (i.e. a pointer to a GenTree pointer).
+// Otherwise, returns nullptr.
+//
+// Assumptions:
+// 'parent' must be non-null
+//
+// Notes:
+// When FEATURE_MULTIREG_ARGS is defined we can get here with GT_LDOBJ tree.
+// This happens when we have a struct that is passed in multiple registers.
+//
+// Also note that when FEATURE_UNIX_AMD64_STRUCT_PASSING is defined the GT_LDOBJ
+// later gets converted to a GT_LIST with two GT_LCL_FLDs in Lower/LowerXArch.
+//
+
+GenTreePtr* GenTree::gtGetChildPointer(GenTreePtr parent)
+
+{
+ switch (parent->OperGet())
+ {
+ default:
+ if (!parent->OperIsSimple())
+ {
+ return nullptr;
+ }
+ if (this == parent->gtOp.gtOp1)
+ {
+ return &(parent->gtOp.gtOp1);
+ }
+ if (this == parent->gtOp.gtOp2)
+ {
+ return &(parent->gtOp.gtOp2);
+ }
+ break;
+
+ case GT_CMPXCHG:
+ if (this == parent->gtCmpXchg.gtOpLocation)
+ {
+ return &(parent->gtCmpXchg.gtOpLocation);
+ }
+ if (this == parent->gtCmpXchg.gtOpValue)
+ {
+ return &(parent->gtCmpXchg.gtOpValue);
+ }
+ if (this == parent->gtCmpXchg.gtOpComparand)
+ {
+ return &(parent->gtCmpXchg.gtOpComparand);
+ }
+ break;
+
+ case GT_ARR_BOUNDS_CHECK:
+#ifdef FEATURE_SIMD
+ case GT_SIMD_CHK:
+#endif // FEATURE_SIMD
+ if (this == parent->gtBoundsChk.gtArrLen)
+ {
+ return &(parent->gtBoundsChk.gtArrLen);
+ }
+ if (this == parent->gtBoundsChk.gtIndex)
+ {
+ return &(parent->gtBoundsChk.gtIndex);
+ }
+ if (this == parent->gtBoundsChk.gtIndRngFailBB)
+ {
+ return &(parent->gtBoundsChk.gtIndRngFailBB);
+ }
+ break;
+
+ case GT_ARR_ELEM:
+ if (this == parent->gtArrElem.gtArrObj)
+ {
+ return &(parent->gtArrElem.gtArrObj);
+ }
+ for (int i = 0; i < GT_ARR_MAX_RANK; i++)
+ {
+ if (this == parent->gtArrElem.gtArrInds[i])
+ {
+ return &(parent->gtArrElem.gtArrInds[i]);
+ }
+ }
+ break;
+
+ case GT_ARR_OFFSET:
+ if (this == parent->gtArrOffs.gtOffset)
+ {
+ return &(parent->gtArrOffs.gtOffset);
+ }
+ if (this == parent->gtArrOffs.gtIndex)
+ {
+ return &(parent->gtArrOffs.gtIndex);
+ }
+ if (this == parent->gtArrOffs.gtArrObj)
+ {
+ return &(parent->gtArrOffs.gtArrObj);
+ }
+ break;
+
+ case GT_STORE_DYN_BLK:
+ case GT_DYN_BLK:
+ if (this == parent->gtDynBlk.gtOp1)
+ {
+ return &(parent->gtDynBlk.gtOp1);
+ }
+ if (this == parent->gtDynBlk.gtOp2)
+ {
+ return &(parent->gtDynBlk.gtOp2);
+ }
+ if (this == parent->gtDynBlk.gtDynamicSize)
+ {
+ return &(parent->gtDynBlk.gtDynamicSize);
+ }
+ break;
+
+ case GT_FIELD:
+ if (this == parent->AsField()->gtFldObj)
+ {
+ return &(parent->AsField()->gtFldObj);
+ }
+ break;
+
+ case GT_RET_EXPR:
+ if (this == parent->gtRetExpr.gtInlineCandidate)
+ {
+ return &(parent->gtRetExpr.gtInlineCandidate);
+ }
+ break;
+
+ case GT_CALL:
+ {
+ GenTreeCall* call = parent->AsCall();
+
+ if (this == call->gtCallObjp)
+ {
+ return &(call->gtCallObjp);
+ }
+ if (this == call->gtCallArgs)
+ {
+ return reinterpret_cast<GenTreePtr*>(&(call->gtCallArgs));
+ }
+ if (this == call->gtCallLateArgs)
+ {
+ return reinterpret_cast<GenTreePtr*>(&(call->gtCallLateArgs));
+ }
+ if (this == call->gtControlExpr)
+ {
+ return &(call->gtControlExpr);
+ }
+ if (call->gtCallType == CT_INDIRECT)
+ {
+ if (this == call->gtCallCookie)
+ {
+ return &(call->gtCallCookie);
+ }
+ if (this == call->gtCallAddr)
+ {
+ return &(call->gtCallAddr);
+ }
+ }
+ }
+ break;
+
+ case GT_STMT:
+ noway_assert(!"Illegal node for gtGetChildPointer()");
+ unreached();
+ }
+
+ return nullptr;
+}
+
+bool GenTree::TryGetUse(GenTree* def, GenTree*** use)
+{
+ for (GenTree** useEdge : UseEdges())
+ {
+ if (*useEdge == def)
+ {
+ *use = useEdge;
+ return true;
+ }
+ }
+
+ return false;
+}
+
+//------------------------------------------------------------------------
+// gtGetParent: Get the parent of this node, and optionally capture the
+// pointer to the child so that it can be modified.
+//
+// Arguments:
+
+// parentChildPointer - A pointer to a GenTreePtr* (yes, that's three
+// levels, i.e. GenTree ***), which if non-null,
+// will be set to point to the field in the parent
+// that points to this node.
+//
+// Return value - The parent of this node.
+//
+// Notes:
+//
+// This requires that the execution order must be defined (i.e. gtSetEvalOrder() has been called).
+// To enable the child to be replaced, it accepts an argument, parentChildPointer that, if non-null,
+// will be set to point to the child pointer in the parent that points to this node.
+
+GenTreePtr GenTree::gtGetParent(GenTreePtr** parentChildPtrPtr)
+{
+ // Find the parent node; it must be after this node in the execution order.
+ GenTreePtr* parentChildPtr = nullptr;
+ GenTreePtr parent;
+ for (parent = gtNext; parent != nullptr; parent = parent->gtNext)
+ {
+ parentChildPtr = gtGetChildPointer(parent);
+ if (parentChildPtr != nullptr)
+ {
+ break;
+ }
+ }
+ if (parentChildPtrPtr != nullptr)
+ {
+ *parentChildPtrPtr = parentChildPtr;
+ }
+ return parent;
+}
+
+/*****************************************************************************
+ *
+ * Returns true if the given operator may cause an exception.
+ */
+
+bool GenTree::OperMayThrow()
+{
+ GenTreePtr op;
+
+ switch (gtOper)
+ {
+ case GT_MOD:
+ case GT_DIV:
+ case GT_UMOD:
+ case GT_UDIV:
+
+ /* Division with a non-zero, non-minus-one constant does not throw an exception */
+
+ op = gtOp.gtOp2;
+
+ if (varTypeIsFloating(op->TypeGet()))
+ {
+ return false; // Floating point division does not throw.
+ }
+
+ // For integers only division by 0 or by -1 can throw
+ if (op->IsIntegralConst() && !op->IsIntegralConst(0) && !op->IsIntegralConst(-1))
+ {
+ return false;
+ }
+ return true;
+
+ case GT_IND:
+ op = gtOp.gtOp1;
+
+ /* Indirections of handles are known to be safe */
+ if (op->gtOper == GT_CNS_INT)
+ {
+ if (op->IsIconHandle())
+ {
+ /* No exception is thrown on this indirection */
+ return false;
+ }
+ }
+ if (this->gtFlags & GTF_IND_NONFAULTING)
+ {
+ return false;
+ }
+ // Non-Null AssertionProp will remove the GTF_EXCEPT flag and mark the GT_IND with GTF_ORDER_SIDEEFF flag
+ if ((this->gtFlags & GTF_ALL_EFFECT) == GTF_ORDER_SIDEEFF)
+ {
+ return false;
+ }
+
+ return true;
+
+ case GT_INTRINSIC:
+ // If this is an intrinsic that represents the object.GetType(), it can throw an NullReferenceException.
+ // Report it as may throw.
+ // Note: Some of the rest of the existing intrinsics could potentially throw an exception (for example
+ // the array and string element access ones). They are handled differently than the GetType intrinsic
+ // and are not marked with GTF_EXCEPT. If these are revisited at some point to be marked as
+ // GTF_EXCEPT,
+ // the code below might need to be specialized to handle them properly.
+ if ((this->gtFlags & GTF_EXCEPT) != 0)
+ {
+ return true;
+ }
+
+ break;
+
+ case GT_BLK:
+ case GT_OBJ:
+ case GT_DYN_BLK:
+ case GT_STORE_BLK:
+ return !Compiler::fgIsIndirOfAddrOfLocal(this);
+
+ case GT_ARR_BOUNDS_CHECK:
+ case GT_ARR_ELEM:
+ case GT_ARR_INDEX:
+ case GT_CATCH_ARG:
+ case GT_ARR_LENGTH:
+ case GT_LCLHEAP:
+ case GT_CKFINITE:
+ case GT_NULLCHECK:
+#ifdef FEATURE_SIMD
+ case GT_SIMD_CHK:
+#endif // FEATURE_SIMD
+ return true;
+ default:
+ break;
+ }
+
+ /* Overflow arithmetic operations also throw exceptions */
+
+ if (gtOverflowEx())
+ {
+ return true;
+ }
+
+ return false;
+}
+
+#if DEBUGGABLE_GENTREE
+// static
+GenTree::VtablePtr GenTree::s_vtablesForOpers[] = {nullptr};
+GenTree::VtablePtr GenTree::s_vtableForOp = nullptr;
+
+GenTree::VtablePtr GenTree::GetVtableForOper(genTreeOps oper)
+{
+ noway_assert(oper < GT_COUNT);
+
+ if (s_vtablesForOpers[oper] != nullptr)
+ {
+ return s_vtablesForOpers[oper];
+ }
+ // Otherwise...
+ VtablePtr res = nullptr;
+ switch (oper)
+ {
+#define GTSTRUCT_0(nm, tag) /*handle explicitly*/
+#define GTSTRUCT_1(nm, tag) \
+ case tag: \
+ { \
+ GenTree##nm gt; \
+ res = *reinterpret_cast<VtablePtr*>(&gt); \
+ } \
+ break;
+#define GTSTRUCT_2(nm, tag, tag2) /*handle explicitly*/
+#define GTSTRUCT_3(nm, tag, tag2, tag3) /*handle explicitly*/
+#define GTSTRUCT_4(nm, tag, tag2, tag3, tag4) /*handle explicitly*/
+#define GTSTRUCT_N(nm, ...) /*handle explicitly*/
+#include "gtstructs.h"
+
+#if !FEATURE_EH_FUNCLETS
+ // If FEATURE_EH_FUNCLETS is set, then GT_JMP becomes the only member of Val, and will be handled above.
+ case GT_END_LFIN:
+ case GT_JMP:
+ {
+ GenTreeVal gt(GT_JMP, TYP_INT, 0);
+ res = *reinterpret_cast<VtablePtr*>(&gt);
+ break;
+ }
+#endif
+ case GT_OBJ:
+ {
+ GenTreeIntCon dummyOp(TYP_I_IMPL, 0);
+ GenTreeObj obj(TYP_STRUCT, &dummyOp, NO_CLASS_HANDLE, 0);
+ res = *reinterpret_cast<VtablePtr*>(&obj);
+ }
+ break;
+
+ default:
+ {
+ // Should be unary or binary op.
+ if (s_vtableForOp == nullptr)
+ {
+ unsigned opKind = OperKind(oper);
+ assert(!IsExOp(opKind));
+ assert(OperIsSimple(oper) || OperIsLeaf(oper));
+ // Need to provide non-null operands.
+ Compiler* comp = (Compiler*)_alloca(sizeof(Compiler));
+ GenTreeIntCon dummyOp(TYP_INT, 0);
+ GenTreeOp gt(oper, TYP_INT, &dummyOp, ((opKind & GTK_UNOP) ? nullptr : &dummyOp));
+ s_vtableForOp = *reinterpret_cast<VtablePtr*>(&gt);
+ }
+ res = s_vtableForOp;
+ break;
+ }
+ }
+ s_vtablesForOpers[oper] = res;
+ return res;
+}
+
+void GenTree::SetVtableForOper(genTreeOps oper)
+{
+ *reinterpret_cast<VtablePtr*>(this) = GetVtableForOper(oper);
+}
+#endif // DEBUGGABLE_GENTREE
+
+GenTreePtr Compiler::gtNewOperNode(genTreeOps oper, var_types type, GenTreePtr op1, GenTreePtr op2)
+{
+ assert(op1 != nullptr);
+ assert(op2 != nullptr);
+
+ // We should not be allocating nodes that extend GenTreeOp with this;
+ // should call the appropriate constructor for the extended type.
+ assert(!GenTree::IsExOp(GenTree::OperKind(oper)));
+
+ GenTreePtr node = new (this, oper) GenTreeOp(oper, type, op1, op2);
+
+ return node;
+}
+
+GenTreePtr Compiler::gtNewQmarkNode(var_types type, GenTreePtr cond, GenTreePtr colon)
+{
+ compQmarkUsed = true;
+ GenTree* result = new (this, GT_QMARK) GenTreeQmark(type, cond, colon, this);
+#ifdef DEBUG
+ if (compQmarkRationalized)
+ {
+ fgCheckQmarkAllowedForm(result);
+ }
+#endif
+ return result;
+}
+
+GenTreeQmark::GenTreeQmark(var_types type, GenTreePtr cond, GenTreePtr colonOp, Compiler* comp)
+ : GenTreeOp(GT_QMARK, type, cond, colonOp)
+ , gtThenLiveSet(VarSetOps::UninitVal())
+ , gtElseLiveSet(VarSetOps::UninitVal())
+{
+ // These must follow a specific form.
+ assert(cond != nullptr && cond->TypeGet() == TYP_INT);
+ assert(colonOp != nullptr && colonOp->OperGet() == GT_COLON);
+
+ comp->impInlineRoot()->compQMarks->Push(this);
+}
+
+GenTreeIntCon* Compiler::gtNewIconNode(ssize_t value, var_types type)
+{
+ return new (this, GT_CNS_INT) GenTreeIntCon(type, value);
+}
+
+// return a new node representing the value in a physical register
+GenTree* Compiler::gtNewPhysRegNode(regNumber reg, var_types type)
+{
+ assert(genIsValidIntReg(reg) || (reg == REG_SPBASE));
+ GenTree* result = new (this, GT_PHYSREG) GenTreePhysReg(reg, type);
+ return result;
+}
+
+// Return a new node representing a store of a value to a physical register
+// modifies: child's gtRegNum
+GenTree* Compiler::gtNewPhysRegNode(regNumber reg, GenTree* src)
+{
+ assert(genIsValidIntReg(reg));
+ GenTree* result = new (this, GT_PHYSREGDST) GenTreeOp(GT_PHYSREGDST, TYP_I_IMPL, src, nullptr);
+ result->gtRegNum = reg;
+ src->gtRegNum = reg;
+ return result;
+}
+
+#ifndef LEGACY_BACKEND
+GenTreePtr Compiler::gtNewJmpTableNode()
+{
+ GenTreePtr node = new (this, GT_JMPTABLE) GenTreeJumpTable(TYP_INT);
+ node->gtJumpTable.gtJumpTableAddr = 0;
+ return node;
+}
+#endif // !LEGACY_BACKEND
+
+/*****************************************************************************
+ *
+ * Converts an annotated token into an icon flags (so that we will later be
+ * able to tell the type of the handle that will be embedded in the icon
+ * node)
+ */
+
+unsigned Compiler::gtTokenToIconFlags(unsigned token)
+{
+ unsigned flags = 0;
+
+ switch (TypeFromToken(token))
+ {
+ case mdtTypeRef:
+ case mdtTypeDef:
+ case mdtTypeSpec:
+ flags = GTF_ICON_CLASS_HDL;
+ break;
+
+ case mdtMethodDef:
+ flags = GTF_ICON_METHOD_HDL;
+ break;
+
+ case mdtFieldDef:
+ flags = GTF_ICON_FIELD_HDL;
+ break;
+
+ default:
+ flags = GTF_ICON_TOKEN_HDL;
+ break;
+ }
+
+ return flags;
+}
+
+/*****************************************************************************
+ *
+ * Allocates a integer constant entry that represents a HANDLE to something.
+ * It may not be allowed to embed HANDLEs directly into the JITed code (for eg,
+ * as arguments to JIT helpers). Get a corresponding value that can be embedded.
+ * If the handle needs to be accessed via an indirection, pValue points to it.
+ */
+
+GenTreePtr Compiler::gtNewIconEmbHndNode(
+ void* value, void* pValue, unsigned flags, unsigned handle1, void* handle2, void* compileTimeHandle)
+{
+ GenTreePtr node;
+
+ assert((!value) != (!pValue));
+
+ if (value)
+ {
+ node = gtNewIconHandleNode((size_t)value, flags, /*fieldSeq*/ FieldSeqStore::NotAField(), handle1, handle2);
+ node->gtIntCon.gtCompileTimeHandle = (size_t)compileTimeHandle;
+ }
+ else
+ {
+ node = gtNewIconHandleNode((size_t)pValue, flags, /*fieldSeq*/ FieldSeqStore::NotAField(), handle1, handle2);
+ node->gtIntCon.gtCompileTimeHandle = (size_t)compileTimeHandle;
+ node = gtNewOperNode(GT_IND, TYP_I_IMPL, node);
+ }
+
+ return node;
+}
+
+/*****************************************************************************/
+GenTreePtr Compiler::gtNewStringLiteralNode(InfoAccessType iat, void* pValue)
+{
+ GenTreePtr tree = nullptr;
+
+ switch (iat)
+ {
+ case IAT_VALUE: // The info value is directly available
+ tree = gtNewIconEmbHndNode(pValue, nullptr, GTF_ICON_STR_HDL);
+ tree->gtType = TYP_REF;
+ tree = gtNewOperNode(GT_NOP, TYP_REF, tree); // prevents constant folding
+ break;
+
+ case IAT_PVALUE: // The value needs to be accessed via an indirection
+ tree = gtNewIconHandleNode((size_t)pValue, GTF_ICON_STR_HDL);
+ // An indirection of a string handle can't cause an exception so don't set GTF_EXCEPT
+ tree = gtNewOperNode(GT_IND, TYP_REF, tree);
+ tree->gtFlags |= GTF_GLOB_REF;
+ break;
+
+ case IAT_PPVALUE: // The value needs to be accessed via a double indirection
+ tree = gtNewIconHandleNode((size_t)pValue, GTF_ICON_PSTR_HDL);
+ tree = gtNewOperNode(GT_IND, TYP_I_IMPL, tree);
+ tree->gtFlags |= GTF_IND_INVARIANT;
+ // An indirection of a string handle can't cause an exception so don't set GTF_EXCEPT
+ tree = gtNewOperNode(GT_IND, TYP_REF, tree);
+ tree->gtFlags |= GTF_GLOB_REF;
+ break;
+
+ default:
+ assert(!"Unexpected InfoAccessType");
+ }
+
+ return tree;
+}
+
+/*****************************************************************************/
+
+GenTreePtr Compiler::gtNewLconNode(__int64 value)
+{
+#ifdef _TARGET_64BIT_
+ GenTreePtr node = new (this, GT_CNS_INT) GenTreeIntCon(TYP_LONG, value);
+#else
+ GenTreePtr node = new (this, GT_CNS_LNG) GenTreeLngCon(value);
+#endif
+
+ return node;
+}
+
+GenTreePtr Compiler::gtNewDconNode(double value)
+{
+ GenTreePtr node = new (this, GT_CNS_DBL) GenTreeDblCon(value);
+
+ return node;
+}
+
+GenTreePtr Compiler::gtNewSconNode(int CPX, CORINFO_MODULE_HANDLE scpHandle)
+{
+
+#if SMALL_TREE_NODES
+
+ /* 'GT_CNS_STR' nodes later get transformed into 'GT_CALL' */
+
+ assert(GenTree::s_gtNodeSizes[GT_CALL] > GenTree::s_gtNodeSizes[GT_CNS_STR]);
+
+ GenTreePtr node = new (this, GT_CALL) GenTreeStrCon(CPX, scpHandle DEBUGARG(/*largeNode*/ true));
+#else
+ GenTreePtr node = new (this, GT_CNS_STR) GenTreeStrCon(CPX, scpHandle DEBUGARG(/*largeNode*/ true));
+#endif
+
+ return node;
+}
+
+GenTreePtr Compiler::gtNewZeroConNode(var_types type)
+{
+ GenTreePtr zero;
+ switch (type)
+ {
+ case TYP_INT:
+ zero = gtNewIconNode(0);
+ break;
+
+ case TYP_BYREF:
+ __fallthrough;
+
+ case TYP_REF:
+ zero = gtNewIconNode(0);
+ zero->gtType = type;
+ break;
+
+ case TYP_LONG:
+ zero = gtNewLconNode(0);
+ break;
+
+ case TYP_FLOAT:
+ zero = gtNewDconNode(0.0);
+ zero->gtType = type;
+ break;
+
+ case TYP_DOUBLE:
+ zero = gtNewDconNode(0.0);
+ break;
+
+ default:
+ assert(!"Bad type");
+ zero = nullptr;
+ break;
+ }
+ return zero;
+}
+
+GenTreePtr Compiler::gtNewOneConNode(var_types type)
+{
+ switch (type)
+ {
+ case TYP_INT:
+ case TYP_UINT:
+ return gtNewIconNode(1);
+
+ case TYP_LONG:
+ case TYP_ULONG:
+ return gtNewLconNode(1);
+
+ case TYP_FLOAT:
+ {
+ GenTreePtr one = gtNewDconNode(1.0);
+ one->gtType = type;
+ return one;
+ }
+
+ case TYP_DOUBLE:
+ return gtNewDconNode(1.0);
+
+ default:
+ assert(!"Bad type");
+ return nullptr;
+ }
+}
+
+GenTreeCall* Compiler::gtNewIndCallNode(GenTreePtr addr, var_types type, GenTreeArgList* args, IL_OFFSETX ilOffset)
+{
+ return gtNewCallNode(CT_INDIRECT, (CORINFO_METHOD_HANDLE)addr, type, args, ilOffset);
+}
+
+GenTreeCall* Compiler::gtNewCallNode(
+ gtCallTypes callType, CORINFO_METHOD_HANDLE callHnd, var_types type, GenTreeArgList* args, IL_OFFSETX ilOffset)
+{
+ GenTreeCall* node = new (this, GT_CALL) GenTreeCall(genActualType(type));
+
+ node->gtFlags |= (GTF_CALL | GTF_GLOB_REF);
+ if (args)
+ {
+ node->gtFlags |= (args->gtFlags & GTF_ALL_EFFECT);
+ }
+ node->gtCallType = callType;
+ node->gtCallMethHnd = callHnd;
+ node->gtCallArgs = args;
+ node->gtCallObjp = nullptr;
+ node->fgArgInfo = nullptr;
+ node->callSig = nullptr;
+ node->gtRetClsHnd = nullptr;
+ node->gtControlExpr = nullptr;
+ node->gtCallMoreFlags = 0;
+
+ if (callType == CT_INDIRECT)
+ {
+ node->gtCallCookie = nullptr;
+ }
+ else
+ {
+ node->gtInlineCandidateInfo = nullptr;
+ }
+ node->gtCallLateArgs = nullptr;
+ node->gtReturnType = type;
+
+#ifdef LEGACY_BACKEND
+ node->gtCallRegUsedMask = RBM_NONE;
+#endif // LEGACY_BACKEND
+
+#ifdef FEATURE_READYTORUN_COMPILER
+ node->gtCall.gtEntryPoint.addr = nullptr;
+#endif
+
+#if defined(DEBUG) || defined(INLINE_DATA)
+ // These get updated after call node is built.
+ node->gtCall.gtInlineObservation = InlineObservation::CALLEE_UNUSED_INITIAL;
+ node->gtCall.gtRawILOffset = BAD_IL_OFFSET;
+#endif
+
+#ifdef DEBUGGING_SUPPORT
+ // Spec: Managed Retval sequence points needs to be generated while generating debug info for debuggable code.
+ //
+ // Implementation note: if not generating MRV info genCallSite2ILOffsetMap will be NULL and
+ // codegen will pass BAD_IL_OFFSET as IL offset of a call node to emitter, which will cause emitter
+ // not to emit IP mapping entry.
+ if (opts.compDbgCode && opts.compDbgInfo)
+ {
+ // Managed Retval - IL offset of the call. This offset is used to emit a
+ // CALL_INSTRUCTION type sequence point while emitting corresponding native call.
+ //
+ // TODO-Cleanup:
+ // a) (Opt) We need not store this offset if the method doesn't return a
+ // value. Rather it can be made BAD_IL_OFFSET to prevent a sequence
+ // point being emitted.
+ //
+ // b) (Opt) Add new sequence points only if requested by debugger through
+ // a new boundary type - ICorDebugInfo::BoundaryTypes
+ if (genCallSite2ILOffsetMap == nullptr)
+ {
+ genCallSite2ILOffsetMap = new (getAllocator()) CallSiteILOffsetTable(getAllocator());
+ }
+
+ // Make sure that there are no duplicate entries for a given call node
+ IL_OFFSETX value;
+ assert(!genCallSite2ILOffsetMap->Lookup(node, &value));
+ genCallSite2ILOffsetMap->Set(node, ilOffset);
+ }
+#endif
+
+ // Initialize gtOtherRegs
+ node->ClearOtherRegs();
+
+ // Initialize spill flags of gtOtherRegs
+ node->ClearOtherRegFlags();
+
+ return node;
+}
+
+GenTreePtr Compiler::gtNewLclvNode(unsigned lnum, var_types type, IL_OFFSETX ILoffs)
+{
+ // We need to ensure that all struct values are normalized.
+ // It might be nice to assert this in general, but we have assignments of int to long.
+ if (varTypeIsStruct(type))
+ {
+ assert(type == lvaTable[lnum].lvType);
+ }
+ GenTreePtr node = new (this, GT_LCL_VAR) GenTreeLclVar(type, lnum, ILoffs);
+
+ /* Cannot have this assert because the inliner uses this function
+ * to add temporaries */
+
+ // assert(lnum < lvaCount);
+
+ return node;
+}
+
+GenTreePtr Compiler::gtNewLclLNode(unsigned lnum, var_types type, IL_OFFSETX ILoffs)
+{
+ // We need to ensure that all struct values are normalized.
+ // It might be nice to assert this in general, but we have assignments of int to long.
+ if (varTypeIsStruct(type))
+ {
+ assert(type == lvaTable[lnum].lvType);
+ }
+#if SMALL_TREE_NODES
+ /* This local variable node may later get transformed into a large node */
+
+ // assert(GenTree::s_gtNodeSizes[GT_CALL] > GenTree::s_gtNodeSizes[GT_LCL_VAR]);
+
+ GenTreePtr node = new (this, GT_CALL) GenTreeLclVar(type, lnum, ILoffs DEBUGARG(/*largeNode*/ true));
+#else
+ GenTreePtr node = new (this, GT_LCL_VAR) GenTreeLclVar(type, lnum, ILoffs DEBUGARG(/*largeNode*/ true));
+#endif
+
+ return node;
+}
+
+GenTreeLclFld* Compiler::gtNewLclFldNode(unsigned lnum, var_types type, unsigned offset)
+{
+ GenTreeLclFld* node = new (this, GT_LCL_FLD) GenTreeLclFld(type, lnum, offset);
+
+ /* Cannot have this assert because the inliner uses this function
+ * to add temporaries */
+
+ // assert(lnum < lvaCount);
+
+ node->gtFieldSeq = FieldSeqStore::NotAField();
+ return node;
+}
+
+GenTreePtr Compiler::gtNewInlineCandidateReturnExpr(GenTreePtr inlineCandidate, var_types type)
+{
+ assert(GenTree::s_gtNodeSizes[GT_RET_EXPR] == TREE_NODE_SZ_LARGE);
+
+ GenTreePtr node = new (this, GT_RET_EXPR) GenTreeRetExpr(type);
+
+ node->gtRetExpr.gtInlineCandidate = inlineCandidate;
+
+ if (varTypeIsStruct(inlineCandidate) && !inlineCandidate->OperIsBlkOp())
+ {
+ node->gtRetExpr.gtRetClsHnd = gtGetStructHandle(inlineCandidate);
+ }
+
+ // GT_RET_EXPR node eventually might be bashed back to GT_CALL (when inlining is aborted for example).
+ // Therefore it should carry the GTF_CALL flag so that all the rules about spilling can apply to it as well.
+ // For example, impImportLeave or CEE_POP need to spill GT_RET_EXPR before empty the evaluation stack.
+ node->gtFlags |= GTF_CALL;
+
+ return node;
+}
+
+GenTreeArgList* Compiler::gtNewListNode(GenTreePtr op1, GenTreeArgList* op2)
+{
+ assert((op1 != nullptr) && (op1->OperGet() != GT_LIST));
+
+ return new (this, GT_LIST) GenTreeArgList(op1, op2);
+}
+
+/*****************************************************************************
+ *
+ * Create a list out of one value.
+ */
+
+GenTreeArgList* Compiler::gtNewArgList(GenTreePtr arg)
+{
+ return new (this, GT_LIST) GenTreeArgList(arg);
+}
+
+/*****************************************************************************
+ *
+ * Create a list out of the two values.
+ */
+
+GenTreeArgList* Compiler::gtNewArgList(GenTreePtr arg1, GenTreePtr arg2)
+{
+ return new (this, GT_LIST) GenTreeArgList(arg1, gtNewArgList(arg2));
+}
+
+//------------------------------------------------------------------------
+// Compiler::gtNewAggregate:
+// Creates a new aggregate argument node. These nodes are used to
+// represent arguments that are composed of multiple values (e.g.
+// the lclVars that represent the fields of a promoted struct).
+//
+// Note that aggregate arguments are currently represented by GT_LIST
+// nodes that are marked with the GTF_LIST_AGGREGATE flag. This
+// representation may be changed in the future to instead use its own
+// node type (e.g. GT_AGGREGATE).
+//
+// Arguments:
+// firstElement - The first element in the aggregate's list of values.
+//
+// Returns:
+// The newly-created aggregate node.
+GenTreeArgList* Compiler::gtNewAggregate(GenTree* firstElement)
+{
+ GenTreeArgList* agg = gtNewArgList(firstElement);
+ agg->gtFlags |= GTF_LIST_AGGREGATE;
+ return agg;
+}
+
+/*****************************************************************************
+ *
+ * Create a list out of the three values.
+ */
+
+GenTreeArgList* Compiler::gtNewArgList(GenTreePtr arg1, GenTreePtr arg2, GenTreePtr arg3)
+{
+ return new (this, GT_LIST) GenTreeArgList(arg1, gtNewArgList(arg2, arg3));
+}
+
+/*****************************************************************************
+ *
+ * Given a GT_CALL node, access the fgArgInfo and find the entry
+ * that has the matching argNum and return the fgArgTableEntryPtr
+ */
+
+fgArgTabEntryPtr Compiler::gtArgEntryByArgNum(GenTreePtr call, unsigned argNum)
+{
+ noway_assert(call->IsCall());
+ fgArgInfoPtr argInfo = call->gtCall.fgArgInfo;
+ noway_assert(argInfo != nullptr);
+
+ unsigned argCount = argInfo->ArgCount();
+ fgArgTabEntryPtr* argTable = argInfo->ArgTable();
+ fgArgTabEntryPtr curArgTabEntry = nullptr;
+
+ for (unsigned i = 0; i < argCount; i++)
+ {
+ curArgTabEntry = argTable[i];
+ if (curArgTabEntry->argNum == argNum)
+ {
+ return curArgTabEntry;
+ }
+ }
+ noway_assert(!"gtArgEntryByArgNum: argNum not found");
+ return nullptr;
+}
+
+/*****************************************************************************
+ *
+ * Given a GT_CALL node, access the fgArgInfo and find the entry
+ * that has the matching node and return the fgArgTableEntryPtr
+ */
+
+fgArgTabEntryPtr Compiler::gtArgEntryByNode(GenTreePtr call, GenTreePtr node)
+{
+ noway_assert(call->IsCall());
+ fgArgInfoPtr argInfo = call->gtCall.fgArgInfo;
+ noway_assert(argInfo != nullptr);
+
+ unsigned argCount = argInfo->ArgCount();
+ fgArgTabEntryPtr* argTable = argInfo->ArgTable();
+ fgArgTabEntryPtr curArgTabEntry = nullptr;
+
+ for (unsigned i = 0; i < argCount; i++)
+ {
+ curArgTabEntry = argTable[i];
+
+ if (curArgTabEntry->node == node)
+ {
+ return curArgTabEntry;
+ }
+#ifdef PROTO_JIT
+ else if (node->OperGet() == GT_RELOAD && node->gtOp.gtOp1 == curArgTabEntry->node)
+ {
+ return curArgTabEntry;
+ }
+#endif // PROTO_JIT
+ else if (curArgTabEntry->parent != nullptr)
+ {
+ assert(curArgTabEntry->parent->IsList());
+ if (curArgTabEntry->parent->Current() == node)
+ {
+ return curArgTabEntry;
+ }
+ }
+ else // (curArgTabEntry->parent == NULL)
+ {
+ if (call->gtCall.gtCallObjp == node)
+ {
+ return curArgTabEntry;
+ }
+ }
+ }
+ noway_assert(!"gtArgEntryByNode: node not found");
+ return nullptr;
+}
+
+/*****************************************************************************
+ *
+ * Find and return the entry with the given "lateArgInx". Requires that one is found
+ * (asserts this).
+ */
+fgArgTabEntryPtr Compiler::gtArgEntryByLateArgIndex(GenTreePtr call, unsigned lateArgInx)
+{
+ noway_assert(call->IsCall());
+ fgArgInfoPtr argInfo = call->gtCall.fgArgInfo;
+ noway_assert(argInfo != nullptr);
+
+ unsigned argCount = argInfo->ArgCount();
+ fgArgTabEntryPtr* argTable = argInfo->ArgTable();
+ fgArgTabEntryPtr curArgTabEntry = nullptr;
+
+ for (unsigned i = 0; i < argCount; i++)
+ {
+ curArgTabEntry = argTable[i];
+ if (curArgTabEntry->lateArgInx == lateArgInx)
+ {
+ return curArgTabEntry;
+ }
+ }
+ noway_assert(!"gtArgEntryByNode: node not found");
+ return nullptr;
+}
+
+/*****************************************************************************
+ *
+ * Given an fgArgTabEntryPtr, return true if it is the 'this' pointer argument.
+ */
+bool Compiler::gtArgIsThisPtr(fgArgTabEntryPtr argEntry)
+{
+ return (argEntry->parent == nullptr);
+}
+
+/*****************************************************************************
+ *
+ * Create a node that will assign 'src' to 'dst'.
+ */
+
+GenTreePtr Compiler::gtNewAssignNode(GenTreePtr dst, GenTreePtr src)
+{
+ /* Mark the target as being assigned */
+
+ if ((dst->gtOper == GT_LCL_VAR) || (dst->OperGet() == GT_LCL_FLD))
+ {
+ dst->gtFlags |= GTF_VAR_DEF;
+ if (dst->IsPartialLclFld(this))
+ {
+ // We treat these partial writes as combined uses and defs.
+ dst->gtFlags |= GTF_VAR_USEASG;
+ }
+ }
+ dst->gtFlags |= GTF_DONT_CSE;
+
+ /* Create the assignment node */
+
+ GenTreePtr asg = gtNewOperNode(GT_ASG, dst->TypeGet(), dst, src);
+
+ /* Mark the expression as containing an assignment */
+
+ asg->gtFlags |= GTF_ASG;
+
+ return asg;
+}
+
+//------------------------------------------------------------------------
+// gtNewObjNode: Creates a new Obj node.
+//
+// Arguments:
+// structHnd - The class handle of the struct type.
+// addr - The address of the struct.
+//
+// Return Value:
+// Returns a node representing the struct value at the given address.
+//
+// Assumptions:
+// Any entry and exit conditions, such as required preconditions of
+// data structures, memory to be freed by caller, etc.
+//
+// Notes:
+// It will currently return a GT_OBJ node for any struct type, but may
+// return a GT_IND or a non-indirection for a scalar type.
+// The node will not yet have its GC info initialized. This is because
+// we may not need this info if this is an r-value.
+
+GenTree* Compiler::gtNewObjNode(CORINFO_CLASS_HANDLE structHnd, GenTree* addr)
+{
+ var_types nodeType = impNormStructType(structHnd);
+ assert(varTypeIsStruct(nodeType));
+ unsigned size = info.compCompHnd->getClassSize(structHnd);
+
+ // It would be convenient to set the GC info at this time, but we don't actually require
+ // it unless this is going to be a destination.
+ if (!varTypeIsStruct(nodeType))
+ {
+ if ((addr->gtOper == GT_ADDR) && (addr->gtGetOp1()->TypeGet() == nodeType))
+ {
+ return addr->gtGetOp1();
+ }
+ else
+ {
+ return gtNewOperNode(GT_IND, nodeType, addr);
+ }
+ }
+ GenTreeBlk* newBlkOrObjNode = new (this, GT_OBJ) GenTreeObj(nodeType, addr, structHnd, size);
+
+ // An Obj is not a global reference, if it is known to be a local struct.
+ if ((addr->gtFlags & GTF_GLOB_REF) == 0)
+ {
+ GenTreeLclVarCommon* lclNode = addr->IsLocalAddrExpr();
+ if ((lclNode != nullptr) && !lvaIsImplicitByRefLocal(lclNode->gtLclNum))
+ {
+ newBlkOrObjNode->gtFlags &= ~GTF_GLOB_REF;
+ }
+ }
+ return newBlkOrObjNode;
+}
+
+//------------------------------------------------------------------------
+// gtSetObjGcInfo: Set the GC info on an object node
+//
+// Arguments:
+// objNode - The object node of interest
+
+void Compiler::gtSetObjGcInfo(GenTreeObj* objNode)
+{
+ CORINFO_CLASS_HANDLE structHnd = objNode->gtClass;
+ var_types nodeType = objNode->TypeGet();
+ unsigned size = objNode->gtBlkSize;
+ unsigned slots = 0;
+ unsigned gcPtrCount = 0;
+ BYTE* gcPtrs = nullptr;
+
+ assert(varTypeIsStruct(nodeType));
+ assert(size == info.compCompHnd->getClassSize(structHnd));
+ assert(nodeType == impNormStructType(structHnd));
+
+ if (nodeType == TYP_STRUCT)
+ {
+ if (size >= TARGET_POINTER_SIZE)
+ {
+ // Get the GC fields info
+ var_types simdBaseType; // Dummy argument
+ slots = (unsigned)(roundUp(size, TARGET_POINTER_SIZE) / TARGET_POINTER_SIZE);
+ gcPtrs = new (this, CMK_ASTNode) BYTE[slots];
+ nodeType = impNormStructType(structHnd, gcPtrs, &gcPtrCount, &simdBaseType);
+ }
+ }
+ objNode->SetGCInfo(gcPtrs, gcPtrCount, slots);
+ assert(objNode->gtType == nodeType);
+}
+
+//------------------------------------------------------------------------
+// gtNewStructVal: Return a node that represents a struct value
+//
+// Arguments:
+// structHnd - The class for the struct
+// addr - The address of the struct
+//
+// Return Value:
+// A block, object or local node that represents the struct value pointed to by 'addr'.
+
+GenTree* Compiler::gtNewStructVal(CORINFO_CLASS_HANDLE structHnd, GenTreePtr addr)
+{
+ if (addr->gtOper == GT_ADDR)
+ {
+ GenTree* val = addr->gtGetOp1();
+ if (val->OperGet() == GT_LCL_VAR)
+ {
+ unsigned lclNum = addr->gtGetOp1()->AsLclVarCommon()->gtLclNum;
+ LclVarDsc* varDsc = &(lvaTable[lclNum]);
+ if (varTypeIsStruct(varDsc) && (varDsc->lvVerTypeInfo.GetClassHandle() == structHnd) &&
+ !lvaIsImplicitByRefLocal(lclNum))
+ {
+ return addr->gtGetOp1();
+ }
+ }
+ }
+ return gtNewObjNode(structHnd, addr);
+}
+
+//------------------------------------------------------------------------
+// gtNewBlockVal: Return a node that represents a possibly untyped block value
+//
+// Arguments:
+// addr - The address of the block
+// size - The size of the block
+//
+// Return Value:
+// A block, object or local node that represents the block value pointed to by 'addr'.
+
+GenTree* Compiler::gtNewBlockVal(GenTreePtr addr, unsigned size)
+{
+ // By default we treat this as an opaque struct type with known size.
+ var_types blkType = TYP_STRUCT;
+#if FEATURE_SIMD
+ if ((addr->gtOper == GT_ADDR) && (addr->gtGetOp1()->OperGet() == GT_LCL_VAR))
+ {
+ GenTree* val = addr->gtGetOp1();
+ if (varTypeIsSIMD(val) && (genTypeSize(val->TypeGet()) == size))
+ {
+ blkType = val->TypeGet();
+ return addr->gtGetOp1();
+ }
+ }
+#endif // FEATURE_SIMD
+ return new (this, GT_BLK) GenTreeBlk(GT_BLK, blkType, addr, size);
+}
+
+// Creates a new assignment node for a CpObj.
+// Parameters (exactly the same as MSIL CpObj):
+//
+// dstAddr - The target to copy the struct to
+// srcAddr - The source to copy the struct from
+// structHnd - A class token that represents the type of object being copied. May be null
+// if FEATURE_SIMD is enabled and the source has a SIMD type.
+// isVolatile - Is this marked as volatile memory?
+
+GenTree* Compiler::gtNewCpObjNode(GenTreePtr dstAddr, GenTreePtr srcAddr, CORINFO_CLASS_HANDLE structHnd, bool isVolatile)
+{
+ GenTreePtr lhs = gtNewStructVal(structHnd, dstAddr);
+ GenTree* src = nullptr;
+ unsigned size;
+
+ if (lhs->OperIsBlk())
+ {
+ size = lhs->AsBlk()->gtBlkSize;
+ if (lhs->OperGet() == GT_OBJ)
+ {
+ gtSetObjGcInfo(lhs->AsObj());
+ }
+ }
+ else
+ {
+ size = genTypeSize(lhs->gtType);
+ }
+
+ if (srcAddr->OperGet() == GT_ADDR)
+ {
+ src = srcAddr->gtOp.gtOp1;
+ }
+ else
+ {
+ src = gtNewOperNode(GT_IND, lhs->TypeGet(), srcAddr);
+ }
+
+ GenTree* result = gtNewBlkOpNode(lhs, src, size, isVolatile, true);
+ return result;
+}
+
+//------------------------------------------------------------------------
+// FixupInitBlkValue: Fixup the init value for an initBlk operation
+//
+// Arguments:
+// asgType - The type of assignment that the initBlk is being transformed into
+//
+// Return Value:
+// Modifies the constant value on this node to be the appropriate "fill"
+// value for the initblk.
+//
+// Notes:
+// The initBlk MSIL instruction takes a byte value, which must be
+// extended to the size of the assignment when an initBlk is transformed
+// to an assignment of a primitive type.
+// This performs the appropriate extension.
+
+void GenTreeIntCon::FixupInitBlkValue(var_types asgType)
+{
+ assert(varTypeIsIntegralOrI(asgType));
+ unsigned size = genTypeSize(asgType);
+ if (size > 1)
+ {
+ size_t cns = gtIconVal;
+ cns = cns & 0xFF;
+ cns |= cns << 8;
+ if (size >= 4)
+ {
+ cns |= cns << 16;
+#ifdef _TARGET_64BIT_
+ if (size == 8)
+ {
+ cns |= cns << 32;
+ }
+#endif // _TARGET_64BIT_
+
+ // Make the type used in the GT_IND node match for evaluation types.
+ gtType = asgType;
+
+ // if we are using an GT_INITBLK on a GC type the value being assigned has to be zero (null).
+ assert(!varTypeIsGC(asgType) || (cns == 0));
+ }
+
+ gtIconVal = cns;
+ }
+}
+
+//
+//------------------------------------------------------------------------
+// gtBlockOpInit: Initializes a BlkOp GenTree
+//
+// Arguments:
+// result - an assignment node that is to be initialized.
+// dst - the target (destination) we want to either initialize or copy to.
+// src - the init value for InitBlk or the source struct for CpBlk/CpObj.
+// isVolatile - specifies whether this node is a volatile memory operation.
+//
+// Assumptions:
+// 'result' is an assignment that is newly constructed.
+// If 'dst' is TYP_STRUCT, then it must be a block node or lclVar.
+//
+// Notes:
+// This procedure centralizes all the logic to both enforce proper structure and
+// to properly construct any InitBlk/CpBlk node.
+
+void Compiler::gtBlockOpInit(GenTreePtr result, GenTreePtr dst, GenTreePtr srcOrFillVal, bool isVolatile)
+{
+ if (!result->OperIsBlkOp())
+ {
+ assert(dst->TypeGet() != TYP_STRUCT);
+ return;
+ }
+#ifdef DEBUG
+ // If the copy involves GC pointers, the caller must have already set
+ // the node additional members (gtGcPtrs, gtGcPtrCount, gtSlots) on the dst.
+ if ((dst->gtOper == GT_OBJ) && dst->AsBlk()->HasGCPtr())
+ {
+ GenTreeObj* objNode = dst->AsObj();
+ assert(objNode->gtGcPtrs != nullptr);
+ assert(!IsUninitialized(objNode->gtGcPtrs));
+ assert(!IsUninitialized(objNode->gtGcPtrCount));
+ assert(!IsUninitialized(objNode->gtSlots) && objNode->gtSlots > 0);
+
+ for (unsigned i = 0; i < objNode->gtGcPtrCount; ++i)
+ {
+ CorInfoGCType t = (CorInfoGCType)objNode->gtGcPtrs[i];
+ switch (t)
+ {
+ case TYPE_GC_NONE:
+ case TYPE_GC_REF:
+ case TYPE_GC_BYREF:
+ case TYPE_GC_OTHER:
+ break;
+ default:
+ unreached();
+ }
+ }
+ }
+#endif // DEBUG
+
+ /* In the case of CpBlk, we want to avoid generating
+ * nodes where the source and destination are the same
+ * because of two reasons, first, is useless, second
+ * it introduces issues in liveness and also copying
+ * memory from an overlapping memory location is
+ * undefined both as per the ECMA standard and also
+ * the memcpy semantics specify that.
+ *
+ * NOTE: In this case we'll only detect the case for addr of a local
+ * and a local itself, any other complex expressions won't be
+ * caught.
+ *
+ * TODO-Cleanup: though having this logic is goodness (i.e. avoids self-assignment
+ * of struct vars very early), it was added because fgInterBlockLocalVarLiveness()
+ * isn't handling self-assignment of struct variables correctly. This issue may not
+ * surface if struct promotion is ON (which is the case on x86/arm). But still the
+ * fundamental issue exists that needs to be addressed.
+ */
+ if (result->OperIsCopyBlkOp())
+ {
+ GenTreePtr currSrc = srcOrFillVal;
+ GenTreePtr currDst = dst;
+
+ if (currSrc->OperIsBlk() && (currSrc->AsBlk()->Addr()->OperGet() == GT_ADDR))
+ {
+ currSrc = currSrc->AsBlk()->Addr()->gtGetOp1();
+ }
+ if (currDst->OperIsBlk() && (currDst->AsBlk()->Addr()->OperGet() == GT_ADDR))
+ {
+ currDst = currDst->AsBlk()->Addr()->gtGetOp1();
+ }
+
+ if (currSrc->OperGet() == GT_LCL_VAR && currDst->OperGet() == GT_LCL_VAR &&
+ currSrc->gtLclVarCommon.gtLclNum == currDst->gtLclVarCommon.gtLclNum)
+ {
+ // Make this a NOP
+ // TODO-Cleanup: probably doesn't matter, but could do this earlier and avoid creating a GT_ASG
+ result->gtBashToNOP();
+ return;
+ }
+ }
+
+ // Propagate all effect flags from children
+ result->gtFlags |= dst->gtFlags & GTF_ALL_EFFECT;
+ result->gtFlags |= result->gtOp.gtOp2->gtFlags & GTF_ALL_EFFECT;
+
+ // TODO-1stClassStructs: This should be done only if the destination is non-local.
+ result->gtFlags |= (GTF_GLOB_REF | GTF_ASG);
+
+ // REVERSE_OPS is necessary because the use must occur before the def
+ result->gtFlags |= GTF_REVERSE_OPS;
+
+ result->gtFlags |= (dst->gtFlags & GTF_EXCEPT) | (srcOrFillVal->gtFlags & GTF_EXCEPT);
+
+ if (isVolatile)
+ {
+ result->gtFlags |= GTF_BLK_VOLATILE;
+ }
+
+#ifdef FEATURE_SIMD
+ if (result->OperIsCopyBlkOp() && varTypeIsSIMD(srcOrFillVal))
+ {
+ // If the source is a GT_SIMD node of SIMD type, then the dst lclvar struct
+ // should be labeled as simd intrinsic related struct.
+ // This is done so that the morpher can transform any field accesses into
+ // intrinsics, thus avoiding conflicting access methods (fields vs. whole-register).
+
+ GenTree* src = srcOrFillVal;
+ if (src->OperIsIndir() && (src->AsIndir()->Addr()->OperGet() == GT_ADDR))
+ {
+ src = src->AsIndir()->Addr()->gtGetOp1();
+ }
+ if (src->OperGet() == GT_SIMD)
+ {
+ if (dst->OperIsBlk() && (dst->AsIndir()->Addr()->OperGet() == GT_ADDR))
+ {
+ dst = dst->AsIndir()->Addr()->gtGetOp1();
+ }
+
+ if (dst->OperIsLocal() && varTypeIsStruct(dst))
+ {
+ unsigned lclNum = dst->AsLclVarCommon()->GetLclNum();
+ LclVarDsc* lclVarDsc = &lvaTable[lclNum];
+ lclVarDsc->lvUsedInSIMDIntrinsic = true;
+ }
+ }
+ }
+#endif // FEATURE_SIMD
+}
+
+//------------------------------------------------------------------------
+// gtNewBlkOpNode: Creates a GenTree for a block (struct) assignment.
+//
+// Arguments:
+// dst - Destination or target to copy to / initialize the buffer.
+// srcOrFillVall - the size of the buffer to copy/initialize or zero, in the case of CpObj.
+// size - The size of the buffer or a class token (in the case of CpObj).
+// isVolatile - Whether this is a volatile memory operation or not.
+// isCopyBlock - True if this is a block copy (rather than a block init).
+//
+// Return Value:
+// Returns the newly constructed and initialized block operation.
+//
+// Notes:
+// If size is zero, the dst must be a GT_OBJ with the class handle.
+// 'dst' must be a block node or lclVar.
+//
+GenTree* Compiler::gtNewBlkOpNode(
+ GenTreePtr dst, GenTreePtr srcOrFillVal, unsigned size, bool isVolatile, bool isCopyBlock)
+{
+ assert(dst->OperIsBlk() || dst->OperIsLocal());
+ if (isCopyBlock)
+ {
+ srcOrFillVal->gtFlags |= GTF_DONT_CSE;
+ if (srcOrFillVal->OperIsIndir() && (srcOrFillVal->gtGetOp1()->gtOper == GT_ADDR))
+ {
+ srcOrFillVal = srcOrFillVal->gtGetOp1()->gtGetOp1();
+ }
+ }
+
+ GenTree* result = gtNewAssignNode(dst, srcOrFillVal);
+ if (!isCopyBlock)
+ {
+ result->gtFlags |= GTF_BLK_INIT;
+ }
+ gtBlockOpInit(result, dst, srcOrFillVal, isVolatile);
+ return result;
+}
+
+/*****************************************************************************
+ *
+ * Clones the given tree value and returns a copy of the given tree.
+ * If 'complexOK' is false, the cloning is only done provided the tree
+ * is not too complex (whatever that may mean);
+ * If 'complexOK' is true, we try slightly harder to clone the tree.
+ * In either case, NULL is returned if the tree cannot be cloned
+ *
+ * Note that there is the function gtCloneExpr() which does a more
+ * complete job if you can't handle this function failing.
+ */
+
+GenTreePtr Compiler::gtClone(GenTree* tree, bool complexOK)
+{
+ GenTreePtr copy;
+
+ switch (tree->gtOper)
+ {
+ case GT_CNS_INT:
+
+#if defined(LATE_DISASM)
+ if (tree->IsIconHandle())
+ {
+ copy = gtNewIconHandleNode(tree->gtIntCon.gtIconVal, tree->gtFlags, tree->gtIntCon.gtFieldSeq,
+ tree->gtIntCon.gtIconHdl.gtIconHdl1, tree->gtIntCon.gtIconHdl.gtIconHdl2);
+ copy->gtIntCon.gtCompileTimeHandle = tree->gtIntCon.gtCompileTimeHandle;
+ copy->gtType = tree->gtType;
+ }
+ else
+#endif
+ {
+ copy = new (this, GT_CNS_INT)
+ GenTreeIntCon(tree->gtType, tree->gtIntCon.gtIconVal, tree->gtIntCon.gtFieldSeq);
+ copy->gtIntCon.gtCompileTimeHandle = tree->gtIntCon.gtCompileTimeHandle;
+ }
+ break;
+
+ case GT_LCL_VAR:
+ // Remember that the LclVar node has been cloned. The flag will be set
+ // on 'copy' as well.
+ tree->gtFlags |= GTF_VAR_CLONED;
+ copy = gtNewLclvNode(tree->gtLclVarCommon.gtLclNum, tree->gtType, tree->gtLclVar.gtLclILoffs);
+ break;
+
+ case GT_LCL_FLD:
+ case GT_LCL_FLD_ADDR:
+ // Remember that the LclVar node has been cloned. The flag will be set
+ // on 'copy' as well.
+ tree->gtFlags |= GTF_VAR_CLONED;
+ copy = new (this, tree->gtOper)
+ GenTreeLclFld(tree->gtOper, tree->TypeGet(), tree->gtLclFld.gtLclNum, tree->gtLclFld.gtLclOffs);
+ copy->gtLclFld.gtFieldSeq = tree->gtLclFld.gtFieldSeq;
+ break;
+
+ case GT_CLS_VAR:
+ copy = new (this, GT_CLS_VAR)
+ GenTreeClsVar(tree->gtType, tree->gtClsVar.gtClsVarHnd, tree->gtClsVar.gtFieldSeq);
+ break;
+
+ case GT_REG_VAR:
+ assert(!"clone regvar");
+
+ default:
+ if (!complexOK)
+ {
+ return nullptr;
+ }
+
+ if (tree->gtOper == GT_FIELD)
+ {
+ GenTreePtr objp;
+
+ // copied from line 9850
+
+ objp = nullptr;
+ if (tree->gtField.gtFldObj)
+ {
+ objp = gtClone(tree->gtField.gtFldObj, false);
+ if (!objp)
+ {
+ return objp;
+ }
+ }
+
+ copy = gtNewFieldRef(tree->TypeGet(), tree->gtField.gtFldHnd, objp, tree->gtField.gtFldOffset);
+ copy->gtField.gtFldMayOverlap = tree->gtField.gtFldMayOverlap;
+ }
+ else if (tree->gtOper == GT_ADD)
+ {
+ GenTreePtr op1 = tree->gtOp.gtOp1;
+ GenTreePtr op2 = tree->gtOp.gtOp2;
+
+ if (op1->OperIsLeaf() && op2->OperIsLeaf())
+ {
+ op1 = gtClone(op1);
+ if (op1 == nullptr)
+ {
+ return nullptr;
+ }
+ op2 = gtClone(op2);
+ if (op2 == nullptr)
+ {
+ return nullptr;
+ }
+
+ copy = gtNewOperNode(GT_ADD, tree->TypeGet(), op1, op2);
+ }
+ else
+ {
+ return nullptr;
+ }
+ }
+ else if (tree->gtOper == GT_ADDR)
+ {
+ GenTreePtr op1 = gtClone(tree->gtOp.gtOp1);
+ if (op1 == nullptr)
+ {
+ return nullptr;
+ }
+ copy = gtNewOperNode(GT_ADDR, tree->TypeGet(), op1);
+ }
+ else
+ {
+ return nullptr;
+ }
+
+ break;
+ }
+
+ copy->gtFlags |= tree->gtFlags & ~GTF_NODE_MASK;
+#if defined(DEBUG)
+ copy->gtDebugFlags |= tree->gtDebugFlags & ~GTF_DEBUG_NODE_MASK;
+#endif // defined(DEBUG)
+
+ return copy;
+}
+
+/*****************************************************************************
+ *
+ * Clones the given tree value and returns a copy of the given tree. Any
+ * references to local variable varNum will be replaced with the integer
+ * constant varVal.
+ */
+
+GenTreePtr Compiler::gtCloneExpr(GenTree* tree,
+ unsigned addFlags,
+ unsigned varNum, // = (unsigned)-1
+ int varVal)
+{
+ if (tree == nullptr)
+ {
+ return nullptr;
+ }
+
+ /* Figure out what kind of a node we have */
+
+ genTreeOps oper = tree->OperGet();
+ unsigned kind = tree->OperKind();
+ GenTree* copy;
+
+ /* Is this a constant or leaf node? */
+
+ if (kind & (GTK_CONST | GTK_LEAF))
+ {
+ switch (oper)
+ {
+ case GT_CNS_INT:
+
+#if defined(LATE_DISASM)
+ if (tree->IsIconHandle())
+ {
+ copy = gtNewIconHandleNode(tree->gtIntCon.gtIconVal, tree->gtFlags, tree->gtIntCon.gtFieldSeq,
+ tree->gtIntCon.gtIconFld.gtIconCPX, tree->gtIntCon.gtIconFld.gtIconCls);
+ copy->gtIntCon.gtCompileTimeHandle = tree->gtIntCon.gtCompileTimeHandle;
+ copy->gtType = tree->gtType;
+ }
+ else
+#endif
+ {
+ copy = gtNewIconNode(tree->gtIntCon.gtIconVal, tree->gtType);
+ copy->gtIntCon.gtCompileTimeHandle = tree->gtIntCon.gtCompileTimeHandle;
+ copy->gtIntCon.gtFieldSeq = tree->gtIntCon.gtFieldSeq;
+ }
+ goto DONE;
+
+ case GT_CNS_LNG:
+ copy = gtNewLconNode(tree->gtLngCon.gtLconVal);
+ goto DONE;
+
+ case GT_CNS_DBL:
+ copy = gtNewDconNode(tree->gtDblCon.gtDconVal);
+ copy->gtType = tree->gtType; // keep the same type
+ goto DONE;
+
+ case GT_CNS_STR:
+ copy = gtNewSconNode(tree->gtStrCon.gtSconCPX, tree->gtStrCon.gtScpHnd);
+ goto DONE;
+
+ case GT_LCL_VAR:
+
+ if (tree->gtLclVarCommon.gtLclNum == varNum)
+ {
+ copy = gtNewIconNode(varVal, tree->gtType);
+ }
+ else
+ {
+ // Remember that the LclVar node has been cloned. The flag will
+ // be set on 'copy' as well.
+ tree->gtFlags |= GTF_VAR_CLONED;
+ copy = gtNewLclvNode(tree->gtLclVar.gtLclNum, tree->gtType, tree->gtLclVar.gtLclILoffs);
+ copy->AsLclVarCommon()->SetSsaNum(tree->AsLclVarCommon()->GetSsaNum());
+ }
+ copy->gtFlags = tree->gtFlags;
+ goto DONE;
+
+ case GT_LCL_FLD:
+ if (tree->gtLclFld.gtLclNum == varNum)
+ {
+ IMPL_LIMITATION("replacing GT_LCL_FLD with a constant");
+ }
+ else
+ {
+ // Remember that the LclVar node has been cloned. The flag will
+ // be set on 'copy' as well.
+ tree->gtFlags |= GTF_VAR_CLONED;
+ copy = new (this, GT_LCL_FLD)
+ GenTreeLclFld(tree->TypeGet(), tree->gtLclFld.gtLclNum, tree->gtLclFld.gtLclOffs);
+ copy->gtLclFld.gtFieldSeq = tree->gtLclFld.gtFieldSeq;
+ copy->gtFlags = tree->gtFlags;
+ }
+ goto DONE;
+
+ case GT_CLS_VAR:
+ copy = new (this, GT_CLS_VAR)
+ GenTreeClsVar(tree->TypeGet(), tree->gtClsVar.gtClsVarHnd, tree->gtClsVar.gtFieldSeq);
+ goto DONE;
+
+ case GT_RET_EXPR:
+ copy = gtNewInlineCandidateReturnExpr(tree->gtRetExpr.gtInlineCandidate, tree->gtType);
+ goto DONE;
+
+ case GT_MEMORYBARRIER:
+ copy = new (this, GT_MEMORYBARRIER) GenTree(GT_MEMORYBARRIER, TYP_VOID);
+ goto DONE;
+
+ case GT_ARGPLACE:
+ copy = gtNewArgPlaceHolderNode(tree->gtType, tree->gtArgPlace.gtArgPlaceClsHnd);
+ goto DONE;
+
+ case GT_REG_VAR:
+ NO_WAY("Cloning of GT_REG_VAR node not supported");
+ goto DONE;
+
+ case GT_FTN_ADDR:
+ copy = new (this, oper) GenTreeFptrVal(tree->gtType, tree->gtFptrVal.gtFptrMethod);
+
+#ifdef FEATURE_READYTORUN_COMPILER
+ copy->gtFptrVal.gtEntryPoint = tree->gtFptrVal.gtEntryPoint;
+ copy->gtFptrVal.gtLdftnResolvedToken = tree->gtFptrVal.gtLdftnResolvedToken;
+#endif
+ goto DONE;
+
+ case GT_CATCH_ARG:
+ case GT_NO_OP:
+ copy = new (this, oper) GenTree(oper, tree->gtType);
+ goto DONE;
+
+#if !FEATURE_EH_FUNCLETS
+ case GT_END_LFIN:
+#endif // !FEATURE_EH_FUNCLETS
+ case GT_JMP:
+ copy = new (this, oper) GenTreeVal(oper, tree->gtType, tree->gtVal.gtVal1);
+ goto DONE;
+
+ case GT_LABEL:
+ copy = new (this, oper) GenTreeLabel(tree->gtLabel.gtLabBB);
+ goto DONE;
+
+ default:
+ NO_WAY("Cloning of node not supported");
+ goto DONE;
+ }
+ }
+
+ /* Is it a 'simple' unary/binary operator? */
+
+ if (kind & GTK_SMPOP)
+ {
+ /* If necessary, make sure we allocate a "fat" tree node */
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if SMALL_TREE_NODES
+ switch (oper)
+ {
+ /* These nodes sometimes get bashed to "fat" ones */
+
+ case GT_MUL:
+ case GT_DIV:
+ case GT_MOD:
+
+ case GT_UDIV:
+ case GT_UMOD:
+
+ // In the implementation of gtNewLargeOperNode you have
+ // to give an oper that will create a small node,
+ // otherwise it asserts.
+ //
+ if (GenTree::s_gtNodeSizes[oper] == TREE_NODE_SZ_SMALL)
+ {
+ copy = gtNewLargeOperNode(oper, tree->TypeGet(), tree->gtOp.gtOp1,
+ tree->OperIsBinary() ? tree->gtOp.gtOp2 : nullptr);
+ }
+ else // Always a large tree
+ {
+ if (tree->OperIsBinary())
+ {
+ copy = gtNewOperNode(oper, tree->TypeGet(), tree->gtOp.gtOp1, tree->gtOp.gtOp2);
+ }
+ else
+ {
+ copy = gtNewOperNode(oper, tree->TypeGet(), tree->gtOp.gtOp1);
+ }
+ }
+ break;
+
+ case GT_CAST:
+ copy = new (this, LargeOpOpcode()) GenTreeCast(tree->TypeGet(), tree->gtCast.CastOp(),
+ tree->gtCast.gtCastType DEBUGARG(/*largeNode*/ TRUE));
+ break;
+
+ // The nodes below this are not bashed, so they can be allocated at their individual sizes.
+
+ case GT_LIST:
+ // This is ridiculous, but would go away if we made a stronger distinction between argument lists, whose
+ // second argument *must* be an arglist*, and the uses of LIST in copyblk and initblk.
+ if (tree->gtOp.gtOp2 != nullptr && tree->gtOp.gtOp2->OperGet() == GT_LIST)
+ {
+ copy = new (this, GT_LIST) GenTreeArgList(tree->gtOp.gtOp1, tree->gtOp.gtOp2->AsArgList());
+ }
+ else
+ {
+ copy = new (this, GT_LIST) GenTreeOp(GT_LIST, TYP_VOID, tree->gtOp.gtOp1, tree->gtOp.gtOp2);
+ }
+ break;
+
+ case GT_INDEX:
+ {
+ GenTreeIndex* asInd = tree->AsIndex();
+ copy = new (this, GT_INDEX)
+ GenTreeIndex(asInd->TypeGet(), asInd->Arr(), asInd->Index(), asInd->gtIndElemSize);
+ copy->AsIndex()->gtStructElemClass = asInd->gtStructElemClass;
+ }
+ break;
+
+ case GT_ALLOCOBJ:
+ {
+ GenTreeAllocObj* asAllocObj = tree->AsAllocObj();
+ copy = new (this, GT_ALLOCOBJ) GenTreeAllocObj(tree->TypeGet(), asAllocObj->gtNewHelper,
+ asAllocObj->gtAllocObjClsHnd, asAllocObj->gtOp1);
+ }
+ break;
+
+ case GT_ARR_LENGTH:
+ copy = new (this, GT_ARR_LENGTH)
+ GenTreeArrLen(tree->TypeGet(), tree->gtOp.gtOp1, tree->gtArrLen.ArrLenOffset());
+ break;
+
+ case GT_ARR_INDEX:
+ copy = new (this, GT_ARR_INDEX)
+ GenTreeArrIndex(tree->TypeGet(), gtCloneExpr(tree->gtArrIndex.ArrObj(), addFlags, varNum, varVal),
+ gtCloneExpr(tree->gtArrIndex.IndexExpr(), addFlags, varNum, varVal),
+ tree->gtArrIndex.gtCurrDim, tree->gtArrIndex.gtArrRank,
+ tree->gtArrIndex.gtArrElemType);
+ break;
+
+ case GT_QMARK:
+ copy = new (this, GT_QMARK) GenTreeQmark(tree->TypeGet(), tree->gtOp.gtOp1, tree->gtOp.gtOp2, this);
+ VarSetOps::AssignAllowUninitRhs(this, copy->gtQmark.gtThenLiveSet, tree->gtQmark.gtThenLiveSet);
+ VarSetOps::AssignAllowUninitRhs(this, copy->gtQmark.gtElseLiveSet, tree->gtQmark.gtElseLiveSet);
+ break;
+
+ case GT_OBJ:
+ copy = new (this, GT_OBJ)
+ GenTreeObj(tree->TypeGet(), tree->gtOp.gtOp1, tree->AsObj()->gtClass, tree->gtBlk.gtBlkSize);
+ copy->AsObj()->CopyGCInfo(tree->AsObj());
+ copy->gtBlk.gtBlkOpGcUnsafe = tree->gtBlk.gtBlkOpGcUnsafe;
+ break;
+
+ case GT_BLK:
+ copy = new (this, GT_BLK) GenTreeBlk(GT_BLK, tree->TypeGet(), tree->gtOp.gtOp1, tree->gtBlk.gtBlkSize);
+ copy->gtBlk.gtBlkOpGcUnsafe = tree->gtBlk.gtBlkOpGcUnsafe;
+ break;
+
+ case GT_DYN_BLK:
+ copy = new (this, GT_DYN_BLK) GenTreeDynBlk(tree->gtOp.gtOp1, tree->gtDynBlk.gtDynamicSize);
+ copy->gtBlk.gtBlkOpGcUnsafe = tree->gtBlk.gtBlkOpGcUnsafe;
+ break;
+
+ case GT_BOX:
+ copy = new (this, GT_BOX)
+ GenTreeBox(tree->TypeGet(), tree->gtOp.gtOp1, tree->gtBox.gtAsgStmtWhenInlinedBoxValue);
+ break;
+
+ case GT_INTRINSIC:
+ copy = new (this, GT_INTRINSIC)
+ GenTreeIntrinsic(tree->TypeGet(), tree->gtOp.gtOp1, tree->gtOp.gtOp2,
+ tree->gtIntrinsic.gtIntrinsicId, tree->gtIntrinsic.gtMethodHandle);
+#ifdef FEATURE_READYTORUN_COMPILER
+ copy->gtIntrinsic.gtEntryPoint = tree->gtIntrinsic.gtEntryPoint;
+#endif
+ break;
+
+ case GT_LEA:
+ {
+ GenTreeAddrMode* addrModeOp = tree->AsAddrMode();
+ copy =
+ new (this, GT_LEA) GenTreeAddrMode(addrModeOp->TypeGet(), addrModeOp->Base(), addrModeOp->Index(),
+ addrModeOp->gtScale, addrModeOp->gtOffset);
+ }
+ break;
+
+ case GT_COPY:
+ case GT_RELOAD:
+ {
+ copy = new (this, oper) GenTreeCopyOrReload(oper, tree->TypeGet(), tree->gtGetOp1());
+ }
+ break;
+
+#ifdef FEATURE_SIMD
+ case GT_SIMD:
+ {
+ GenTreeSIMD* simdOp = tree->AsSIMD();
+ copy = gtNewSIMDNode(simdOp->TypeGet(), simdOp->gtGetOp1(), simdOp->gtGetOp2(),
+ simdOp->gtSIMDIntrinsicID, simdOp->gtSIMDBaseType, simdOp->gtSIMDSize);
+ }
+ break;
+#endif
+
+ default:
+ assert(!GenTree::IsExOp(tree->OperKind()) && tree->OperIsSimple());
+ // We're in the SimpleOp case, so it's always unary or binary.
+ if (GenTree::OperIsUnary(tree->OperGet()))
+ {
+ copy = gtNewOperNode(oper, tree->TypeGet(), tree->gtOp.gtOp1, /*doSimplifications*/ false);
+ }
+ else
+ {
+ assert(GenTree::OperIsBinary(tree->OperGet()));
+ copy = gtNewOperNode(oper, tree->TypeGet(), tree->gtOp.gtOp1, tree->gtOp.gtOp2);
+ }
+ break;
+ }
+#else
+ // We're in the SimpleOp case, so it's always unary or binary.
+ copy = gtNewOperNode(oper, tree->TypeGet(), tree->gtOp.gtOp1, tree->gtOp.gtOp2);
+#endif
+
+ // Some flags are conceptually part of the gtOper, and should be copied immediately.
+ if (tree->gtOverflowEx())
+ {
+ copy->gtFlags |= GTF_OVERFLOW;
+ }
+ if (copy->OperGet() == GT_CAST)
+ {
+ copy->gtFlags |= (tree->gtFlags & GTF_UNSIGNED);
+ }
+
+ if (tree->gtOp.gtOp1)
+ {
+ copy->gtOp.gtOp1 = gtCloneExpr(tree->gtOp.gtOp1, addFlags, varNum, varVal);
+ }
+
+ if (tree->gtGetOp2())
+ {
+ copy->gtOp.gtOp2 = gtCloneExpr(tree->gtOp.gtOp2, addFlags, varNum, varVal);
+ }
+
+ /* Flags */
+ addFlags |= tree->gtFlags;
+
+ // Copy any node annotations, if necessary.
+ switch (tree->gtOper)
+ {
+ case GT_ASG:
+ {
+ IndirectAssignmentAnnotation* pIndirAnnot = nullptr;
+ if (m_indirAssignMap != nullptr && GetIndirAssignMap()->Lookup(tree, &pIndirAnnot))
+ {
+ IndirectAssignmentAnnotation* pNewIndirAnnot = new (this, CMK_Unknown)
+ IndirectAssignmentAnnotation(pIndirAnnot->m_lclNum, pIndirAnnot->m_fieldSeq,
+ pIndirAnnot->m_isEntire);
+ GetIndirAssignMap()->Set(copy, pNewIndirAnnot);
+ }
+ }
+ break;
+
+ case GT_STOREIND:
+ case GT_IND:
+ case GT_OBJ:
+ case GT_STORE_OBJ:
+ if (tree->gtFlags & GTF_IND_ARR_INDEX)
+ {
+ ArrayInfo arrInfo;
+ bool b = GetArrayInfoMap()->Lookup(tree, &arrInfo);
+ assert(b);
+ GetArrayInfoMap()->Set(copy, arrInfo);
+ }
+ break;
+
+ default:
+ break;
+ }
+
+#ifdef DEBUG
+ /* GTF_NODE_MASK should not be propagated from 'tree' to 'copy' */
+ addFlags &= ~GTF_NODE_MASK;
+#endif
+
+ // Effects flags propagate upwards.
+ if (copy->gtOp.gtOp1 != nullptr)
+ {
+ copy->gtFlags |= (copy->gtOp.gtOp1->gtFlags & GTF_ALL_EFFECT);
+ }
+ if (copy->gtGetOp2() != nullptr)
+ {
+ copy->gtFlags |= (copy->gtGetOp2()->gtFlags & GTF_ALL_EFFECT);
+ }
+
+ // The early morph for TailCall creates a GT_NOP with GTF_REG_VAL flag set
+ // Thus we have to copy the gtRegNum/gtRegPair value if we clone it here.
+ //
+ if (addFlags & GTF_REG_VAL)
+ {
+ copy->CopyReg(tree);
+ }
+
+ // We can call gtCloneExpr() before we have called fgMorph when we expand a GT_INDEX node in fgMorphArrayIndex()
+ // The method gtFoldExpr() expects to be run after fgMorph so it will set the GTF_DEBUG_NODE_MORPHED
+ // flag on nodes that it adds/modifies. Then when we call fgMorph we will assert.
+ // We really only will need to fold when this method is used to replace references to
+ // local variable with an integer.
+ //
+ if (varNum != (unsigned)-1)
+ {
+ /* Try to do some folding */
+ copy = gtFoldExpr(copy);
+ }
+
+ goto DONE;
+ }
+
+ /* See what kind of a special operator we have here */
+
+ switch (oper)
+ {
+ case GT_STMT:
+ copy = gtCloneExpr(tree->gtStmt.gtStmtExpr, addFlags, varNum, varVal);
+ copy = gtNewStmt(copy, tree->gtStmt.gtStmtILoffsx);
+ goto DONE;
+
+ case GT_CALL:
+
+ copy = new (this, GT_CALL) GenTreeCall(tree->TypeGet());
+
+ copy->gtCall.gtCallObjp =
+ tree->gtCall.gtCallObjp ? gtCloneExpr(tree->gtCall.gtCallObjp, addFlags, varNum, varVal) : nullptr;
+ copy->gtCall.gtCallArgs = tree->gtCall.gtCallArgs
+ ? gtCloneExpr(tree->gtCall.gtCallArgs, addFlags, varNum, varVal)->AsArgList()
+ : nullptr;
+ copy->gtCall.gtCallMoreFlags = tree->gtCall.gtCallMoreFlags;
+ copy->gtCall.gtCallLateArgs =
+ tree->gtCall.gtCallLateArgs
+ ? gtCloneExpr(tree->gtCall.gtCallLateArgs, addFlags, varNum, varVal)->AsArgList()
+ : nullptr;
+
+#if !FEATURE_FIXED_OUT_ARGS
+ copy->gtCall.regArgList = tree->gtCall.regArgList;
+ copy->gtCall.regArgListCount = tree->gtCall.regArgListCount;
+#endif
+
+ // The call sig comes from the EE and doesn't change throughout the compilation process, meaning
+ // we only really need one physical copy of it. Therefore a shallow pointer copy will suffice.
+ // (Note that this still holds even if the tree we are cloning was created by an inlinee compiler,
+ // because the inlinee still uses the inliner's memory allocator anyway.)
+ copy->gtCall.callSig = tree->gtCall.callSig;
+
+ copy->gtCall.gtCallType = tree->gtCall.gtCallType;
+ copy->gtCall.gtReturnType = tree->gtCall.gtReturnType;
+ copy->gtCall.gtControlExpr = tree->gtCall.gtControlExpr;
+
+ /* Copy the union */
+ if (tree->gtCall.gtCallType == CT_INDIRECT)
+ {
+ copy->gtCall.gtCallCookie = tree->gtCall.gtCallCookie
+ ? gtCloneExpr(tree->gtCall.gtCallCookie, addFlags, varNum, varVal)
+ : nullptr;
+ copy->gtCall.gtCallAddr =
+ tree->gtCall.gtCallAddr ? gtCloneExpr(tree->gtCall.gtCallAddr, addFlags, varNum, varVal) : nullptr;
+ }
+ else if (tree->gtFlags & GTF_CALL_VIRT_STUB)
+ {
+ copy->gtCall.gtCallMethHnd = tree->gtCall.gtCallMethHnd;
+ copy->gtCall.gtStubCallStubAddr = tree->gtCall.gtStubCallStubAddr;
+ }
+ else
+ {
+ copy->gtCall.gtCallMethHnd = tree->gtCall.gtCallMethHnd;
+ copy->gtCall.gtInlineCandidateInfo = tree->gtCall.gtInlineCandidateInfo;
+ }
+
+ if (tree->gtCall.fgArgInfo)
+ {
+ // Create and initialize the fgArgInfo for our copy of the call tree
+ copy->gtCall.fgArgInfo = new (this, CMK_Unknown) fgArgInfo(copy, tree);
+ }
+ else
+ {
+ copy->gtCall.fgArgInfo = nullptr;
+ }
+ copy->gtCall.gtRetClsHnd = tree->gtCall.gtRetClsHnd;
+
+#if FEATURE_MULTIREG_RET
+ copy->gtCall.gtReturnTypeDesc = tree->gtCall.gtReturnTypeDesc;
+#endif
+
+#ifdef LEGACY_BACKEND
+ copy->gtCall.gtCallRegUsedMask = tree->gtCall.gtCallRegUsedMask;
+#endif // LEGACY_BACKEND
+
+#ifdef FEATURE_READYTORUN_COMPILER
+ copy->gtCall.setEntryPoint(tree->gtCall.gtEntryPoint);
+#endif
+
+#ifdef DEBUG
+ copy->gtCall.gtInlineObservation = tree->gtCall.gtInlineObservation;
+#endif
+
+ copy->AsCall()->CopyOtherRegFlags(tree->AsCall());
+ break;
+
+ case GT_FIELD:
+
+ copy = gtNewFieldRef(tree->TypeGet(), tree->gtField.gtFldHnd, nullptr, tree->gtField.gtFldOffset);
+
+ copy->gtField.gtFldObj =
+ tree->gtField.gtFldObj ? gtCloneExpr(tree->gtField.gtFldObj, addFlags, varNum, varVal) : nullptr;
+ copy->gtField.gtFldMayOverlap = tree->gtField.gtFldMayOverlap;
+#ifdef FEATURE_READYTORUN_COMPILER
+ copy->gtField.gtFieldLookup = tree->gtField.gtFieldLookup;
+#endif
+
+ break;
+
+ case GT_ARR_ELEM:
+ {
+ GenTreePtr inds[GT_ARR_MAX_RANK];
+ for (unsigned dim = 0; dim < tree->gtArrElem.gtArrRank; dim++)
+ {
+ inds[dim] = gtCloneExpr(tree->gtArrElem.gtArrInds[dim], addFlags, varNum, varVal);
+ }
+ copy = new (this, GT_ARR_ELEM)
+ GenTreeArrElem(tree->TypeGet(), gtCloneExpr(tree->gtArrElem.gtArrObj, addFlags, varNum, varVal),
+ tree->gtArrElem.gtArrRank, tree->gtArrElem.gtArrElemSize, tree->gtArrElem.gtArrElemType,
+ &inds[0]);
+ }
+ break;
+
+ case GT_ARR_OFFSET:
+ {
+ copy = new (this, GT_ARR_OFFSET)
+ GenTreeArrOffs(tree->TypeGet(), gtCloneExpr(tree->gtArrOffs.gtOffset, addFlags, varNum, varVal),
+ gtCloneExpr(tree->gtArrOffs.gtIndex, addFlags, varNum, varVal),
+ gtCloneExpr(tree->gtArrOffs.gtArrObj, addFlags, varNum, varVal),
+ tree->gtArrOffs.gtCurrDim, tree->gtArrOffs.gtArrRank, tree->gtArrOffs.gtArrElemType);
+ }
+ break;
+
+ case GT_CMPXCHG:
+ copy = new (this, GT_CMPXCHG)
+ GenTreeCmpXchg(tree->TypeGet(), gtCloneExpr(tree->gtCmpXchg.gtOpLocation, addFlags, varNum, varVal),
+ gtCloneExpr(tree->gtCmpXchg.gtOpValue, addFlags, varNum, varVal),
+ gtCloneExpr(tree->gtCmpXchg.gtOpComparand, addFlags, varNum, varVal));
+ break;
+
+ case GT_ARR_BOUNDS_CHECK:
+#ifdef FEATURE_SIMD
+ case GT_SIMD_CHK:
+#endif // FEATURE_SIMD
+ copy = new (this, oper) GenTreeBoundsChk(oper, tree->TypeGet(),
+ gtCloneExpr(tree->gtBoundsChk.gtArrLen, addFlags, varNum, varVal),
+ gtCloneExpr(tree->gtBoundsChk.gtIndex, addFlags, varNum, varVal),
+ tree->gtBoundsChk.gtThrowKind);
+ break;
+
+ case GT_STORE_DYN_BLK:
+ case GT_DYN_BLK:
+ copy = new (this, oper) GenTreeDynBlk(gtCloneExpr(tree->gtDynBlk.Addr(), addFlags, varNum, varVal),
+ gtCloneExpr(tree->gtDynBlk.gtDynamicSize, addFlags, varNum, varVal));
+ break;
+
+ default:
+#ifdef DEBUG
+ gtDispTree(tree);
+#endif
+ NO_WAY("unexpected operator");
+ }
+
+DONE:
+
+ // If it has a zero-offset field seq, copy annotation.
+ if (tree->TypeGet() == TYP_BYREF)
+ {
+ FieldSeqNode* fldSeq = nullptr;
+ if (GetZeroOffsetFieldMap()->Lookup(tree, &fldSeq))
+ {
+ GetZeroOffsetFieldMap()->Set(copy, fldSeq);
+ }
+ }
+
+ copy->gtVNPair = tree->gtVNPair; // A cloned tree gets the orginal's Value number pair
+
+ /* We assume the FP stack level will be identical */
+
+ copy->gtCopyFPlvl(tree);
+
+ /* Compute the flags for the copied node. Note that we can do this only
+ if we didnt gtFoldExpr(copy) */
+
+ if (copy->gtOper == oper)
+ {
+ addFlags |= tree->gtFlags;
+
+#ifdef DEBUG
+ /* GTF_NODE_MASK should not be propagated from 'tree' to 'copy' */
+ addFlags &= ~GTF_NODE_MASK;
+#endif
+ // Some other flags depend on the context of the expression, and should not be preserved.
+ // For example, GTF_RELOP_QMARK:
+ if (copy->OperKind() & GTK_RELOP)
+ {
+ addFlags &= ~GTF_RELOP_QMARK;
+ }
+ // On the other hand, if we're creating such a context, restore this flag.
+ if (copy->OperGet() == GT_QMARK)
+ {
+ copy->gtOp.gtOp1->gtFlags |= GTF_RELOP_QMARK;
+ }
+
+ copy->gtFlags |= addFlags;
+ }
+
+ /* GTF_COLON_COND should be propagated from 'tree' to 'copy' */
+ copy->gtFlags |= (tree->gtFlags & GTF_COLON_COND);
+
+#if defined(DEBUG)
+ // Non-node debug flags should be propagated from 'tree' to 'copy'
+ copy->gtDebugFlags |= (tree->gtDebugFlags & ~GTF_DEBUG_NODE_MASK);
+#endif
+
+ /* Make sure to copy back fields that may have been initialized */
+
+ copy->CopyRawCosts(tree);
+ copy->gtRsvdRegs = tree->gtRsvdRegs;
+ copy->CopyReg(tree);
+ return copy;
+}
+
+//------------------------------------------------------------------------
+// gtReplaceTree: Replace a tree with a new tree.
+//
+// Arguments:
+// stmt - The top-level root stmt of the tree being replaced.
+// Must not be null.
+// tree - The tree being replaced. Must not be null.
+// replacementTree - The replacement tree. Must not be null.
+//
+// Return Value:
+// The tree node that replaces the old tree.
+//
+// Assumptions:
+// The sequencing of the stmt has been done.
+//
+// Notes:
+// The caller must ensure that the original statement has been sequenced,
+// but this method will sequence 'replacementTree', and insert it into the
+// proper place in the statement sequence.
+
+GenTreePtr Compiler::gtReplaceTree(GenTreePtr stmt, GenTreePtr tree, GenTreePtr replacementTree)
+{
+ assert(fgStmtListThreaded);
+ assert(tree != nullptr);
+ assert(stmt != nullptr);
+ assert(replacementTree != nullptr);
+
+ GenTreePtr* treePtr = nullptr;
+ GenTreePtr treeParent = tree->gtGetParent(&treePtr);
+
+ assert(treeParent != nullptr || tree == stmt->gtStmt.gtStmtExpr);
+
+ if (treePtr == nullptr)
+ {
+ // Replace the stmt expr and rebuild the linear order for "stmt".
+ assert(treeParent == nullptr);
+ assert(fgOrder != FGOrderLinear);
+ stmt->gtStmt.gtStmtExpr = tree;
+ fgSetStmtSeq(stmt);
+ }
+ else
+ {
+ assert(treeParent != nullptr);
+
+ GenTreePtr treeFirstNode = fgGetFirstNode(tree);
+ GenTreePtr treeLastNode = tree;
+ GenTreePtr treePrevNode = treeFirstNode->gtPrev;
+ GenTreePtr treeNextNode = treeLastNode->gtNext;
+
+ *treePtr = replacementTree;
+
+ // Build the linear order for "replacementTree".
+ fgSetTreeSeq(replacementTree, treePrevNode);
+
+ // Restore linear-order Prev and Next for "replacementTree".
+ if (treePrevNode != nullptr)
+ {
+ treeFirstNode = fgGetFirstNode(replacementTree);
+ treeFirstNode->gtPrev = treePrevNode;
+ treePrevNode->gtNext = treeFirstNode;
+ }
+ else
+ {
+ // Update the linear oder start of "stmt" if treeFirstNode
+ // appears to have replaced the original first node.
+ assert(treeFirstNode == stmt->gtStmt.gtStmtList);
+ stmt->gtStmt.gtStmtList = fgGetFirstNode(replacementTree);
+ }
+
+ if (treeNextNode != nullptr)
+ {
+ treeLastNode = replacementTree;
+ treeLastNode->gtNext = treeNextNode;
+ treeNextNode->gtPrev = treeLastNode;
+ }
+
+ bool needFixupCallArg = false;
+ GenTreePtr node = treeParent;
+
+ // If we have replaced an arg, then update pointers in argtable.
+ do
+ {
+ // Look for the first enclosing callsite
+ switch (node->OperGet())
+ {
+ case GT_LIST:
+ case GT_ARGPLACE:
+ // "tree" is likely an argument of a call.
+ needFixupCallArg = true;
+ break;
+
+ case GT_CALL:
+ if (needFixupCallArg)
+ {
+ // We have replaced an arg, so update pointers in argtable.
+ fgFixupArgTabEntryPtr(node, tree, replacementTree);
+ needFixupCallArg = false;
+ }
+ break;
+
+ default:
+ // "tree" is unlikely an argument of a call.
+ needFixupCallArg = false;
+ break;
+ }
+
+ if (needFixupCallArg)
+ {
+ // Keep tracking to update the first enclosing call.
+ node = node->gtGetParent(nullptr);
+ }
+ else
+ {
+ // Stop tracking.
+ node = nullptr;
+ }
+ } while (node != nullptr);
+
+ // Propagate side-effect flags of "replacementTree" to its parents if needed.
+ gtUpdateSideEffects(treeParent, tree->gtFlags, replacementTree->gtFlags);
+ }
+
+ return replacementTree;
+}
+
+//------------------------------------------------------------------------
+// gtUpdateSideEffects: Update the side effects for ancestors.
+//
+// Arguments:
+// treeParent - The immediate parent node.
+// oldGtFlags - The stale gtFlags.
+// newGtFlags - The new gtFlags.
+//
+//
+// Assumptions:
+// Linear order of the stmt has been established.
+//
+// Notes:
+// The routine is used for updating the stale side effect flags for ancestor
+// nodes starting from treeParent up to the top-level stmt expr.
+
+void Compiler::gtUpdateSideEffects(GenTreePtr treeParent, unsigned oldGtFlags, unsigned newGtFlags)
+{
+ assert(fgStmtListThreaded);
+
+ oldGtFlags = oldGtFlags & GTF_ALL_EFFECT;
+ newGtFlags = newGtFlags & GTF_ALL_EFFECT;
+
+ if (oldGtFlags != newGtFlags)
+ {
+ while (treeParent)
+ {
+ treeParent->gtFlags &= ~oldGtFlags;
+ treeParent->gtFlags |= newGtFlags;
+ treeParent = treeParent->gtGetParent(nullptr);
+ }
+ }
+}
+
+/*****************************************************************************
+ *
+ * Comapres two trees and returns true when both trees are the same.
+ * Instead of fully comparing the two trees this method can just return false.
+ * Thus callers should not assume that the trees are different when false is returned.
+ * Only when true is returned can the caller perform code optimizations.
+ * The current implementation only compares a limited set of LEAF/CONST node
+ * and returns false for all othere trees.
+ */
+bool Compiler::gtCompareTree(GenTree* op1, GenTree* op2)
+{
+ /* Make sure that both trees are of the same GT node kind */
+ if (op1->OperGet() != op2->OperGet())
+ {
+ return false;
+ }
+
+ /* Make sure that both trees are returning the same type */
+ if (op1->gtType != op2->gtType)
+ {
+ return false;
+ }
+
+ /* Figure out what kind of a node we have */
+
+ genTreeOps oper = op1->OperGet();
+ unsigned kind = op1->OperKind();
+
+ /* Is this a constant or leaf node? */
+
+ if (kind & (GTK_CONST | GTK_LEAF))
+ {
+ switch (oper)
+ {
+ case GT_CNS_INT:
+ if ((op1->gtIntCon.gtIconVal == op2->gtIntCon.gtIconVal) && GenTree::SameIconHandleFlag(op1, op2))
+ {
+ return true;
+ }
+ break;
+
+ case GT_CNS_LNG:
+ if (op1->gtLngCon.gtLconVal == op2->gtLngCon.gtLconVal)
+ {
+ return true;
+ }
+ break;
+
+ case GT_CNS_STR:
+ if (op1->gtStrCon.gtSconCPX == op2->gtStrCon.gtSconCPX)
+ {
+ return true;
+ }
+ break;
+
+ case GT_LCL_VAR:
+ if (op1->gtLclVarCommon.gtLclNum == op2->gtLclVarCommon.gtLclNum)
+ {
+ return true;
+ }
+ break;
+
+ case GT_CLS_VAR:
+ if (op1->gtClsVar.gtClsVarHnd == op2->gtClsVar.gtClsVarHnd)
+ {
+ return true;
+ }
+ break;
+
+ default:
+ // we return false for these unhandled 'oper' kinds
+ break;
+ }
+ }
+ return false;
+}
+
+GenTreePtr Compiler::gtGetThisArg(GenTreePtr call)
+{
+ assert(call->gtOper == GT_CALL);
+
+ if (call->gtCall.gtCallObjp != nullptr)
+ {
+ if (call->gtCall.gtCallObjp->gtOper != GT_NOP && call->gtCall.gtCallObjp->gtOper != GT_ASG)
+ {
+ if (!(call->gtCall.gtCallObjp->gtFlags & GTF_LATE_ARG))
+ {
+ return call->gtCall.gtCallObjp;
+ }
+ }
+
+ if (call->gtCall.gtCallLateArgs)
+ {
+ regNumber thisReg = REG_ARG_0;
+ unsigned argNum = 0;
+ fgArgTabEntryPtr thisArgTabEntry = gtArgEntryByArgNum(call, argNum);
+ GenTreePtr result = thisArgTabEntry->node;
+
+#if !FEATURE_FIXED_OUT_ARGS
+ GenTreePtr lateArgs = call->gtCall.gtCallLateArgs;
+ regList list = call->gtCall.regArgList;
+ int index = 0;
+ while (lateArgs != NULL)
+ {
+ assert(lateArgs->gtOper == GT_LIST);
+ assert(index < call->gtCall.regArgListCount);
+ regNumber curArgReg = list[index];
+ if (curArgReg == thisReg)
+ {
+ if (optAssertionPropagatedCurrentStmt)
+ result = lateArgs->gtOp.gtOp1;
+
+ assert(result == lateArgs->gtOp.gtOp1);
+ }
+
+ lateArgs = lateArgs->gtOp.gtOp2;
+ index++;
+ }
+#endif
+ return result;
+ }
+ }
+ return nullptr;
+}
+
+bool GenTree::gtSetFlags() const
+{
+ //
+ // When FEATURE_SET_FLAGS (_TARGET_ARM_) is active the method returns true
+ // when the gtFlags has the flag GTF_SET_FLAGS set
+ // otherwise the architecture will be have instructions that typically set
+ // the flags and this method will return true.
+ //
+ // Exceptions: GT_IND (load/store) is not allowed to set the flags
+ // and on XARCH the GT_MUL/GT_DIV and all overflow instructions
+ // do not set the condition flags
+ //
+ // Precondition we have a GTK_SMPOP
+ //
+ assert(OperIsSimple());
+
+ if (!varTypeIsIntegralOrI(TypeGet()))
+ {
+ return false;
+ }
+
+#if FEATURE_SET_FLAGS
+
+ if ((gtFlags & GTF_SET_FLAGS) && gtOper != GT_IND)
+ {
+ // GTF_SET_FLAGS is not valid on GT_IND and is overlaid with GTF_NONFAULTING_IND
+ return true;
+ }
+ else
+ {
+ return false;
+ }
+
+#else // !FEATURE_SET_FLAGS
+
+#ifdef _TARGET_XARCH_
+ // Return true if/when the codegen for this node will set the flags
+ //
+ //
+ if ((gtOper == GT_IND) || (gtOper == GT_MUL) || (gtOper == GT_DIV))
+ {
+ return false;
+ }
+ else if (gtOverflowEx())
+ {
+ return false;
+ }
+ else
+ {
+ return true;
+ }
+#else
+ // Otherwise for other architectures we should return false
+ return false;
+#endif
+
+#endif // !FEATURE_SET_FLAGS
+}
+
+bool GenTree::gtRequestSetFlags()
+{
+ bool result = false;
+
+#if FEATURE_SET_FLAGS
+ // This method is a Nop unless FEATURE_SET_FLAGS is defined
+
+ // In order to set GTF_SET_FLAGS
+ // we must have a GTK_SMPOP
+ // and we have a integer or machine size type (not floating point or TYP_LONG on 32-bit)
+ //
+ if (!OperIsSimple())
+ return false;
+
+ if (!varTypeIsIntegralOrI(TypeGet()))
+ return false;
+
+ switch (gtOper)
+ {
+ case GT_IND:
+ case GT_ARR_LENGTH:
+ // These will turn into simple load from memory instructions
+ // and we can't force the setting of the flags on load from memory
+ break;
+
+ case GT_MUL:
+ case GT_DIV:
+ // These instructions don't set the flags (on x86/x64)
+ //
+ break;
+
+ default:
+ // Otherwise we can set the flags for this gtOper
+ // and codegen must set the condition flags.
+ //
+ gtFlags |= GTF_SET_FLAGS;
+ result = true;
+ break;
+ }
+#endif // FEATURE_SET_FLAGS
+
+ // Codegen for this tree must set the condition flags if
+ // this method returns true.
+ //
+ return result;
+}
+
+/*****************************************************************************/
+void GenTree::CopyTo(class Compiler* comp, const GenTree& gt)
+{
+ gtOper = gt.gtOper;
+ gtType = gt.gtType;
+ gtAssertionNum = gt.gtAssertionNum;
+
+ gtRegNum = gt.gtRegNum; // one union member.
+ CopyCosts(&gt);
+
+ gtFlags = gt.gtFlags;
+ gtVNPair = gt.gtVNPair;
+
+ gtRsvdRegs = gt.gtRsvdRegs;
+
+#ifdef LEGACY_BACKEND
+ gtUsedRegs = gt.gtUsedRegs;
+#endif // LEGACY_BACKEND
+
+#if FEATURE_STACK_FP_X87
+ gtFPlvl = gt.gtFPlvl;
+#endif // FEATURE_STACK_FP_X87
+
+ gtNext = gt.gtNext;
+ gtPrev = gt.gtPrev;
+#ifdef DEBUG
+ gtTreeID = gt.gtTreeID;
+ gtSeqNum = gt.gtSeqNum;
+#endif
+ // Largest node subtype:
+ void* remDst = reinterpret_cast<char*>(this) + sizeof(GenTree);
+ void* remSrc = reinterpret_cast<char*>(const_cast<GenTree*>(&gt)) + sizeof(GenTree);
+ memcpy(remDst, remSrc, TREE_NODE_SZ_LARGE - sizeof(GenTree));
+}
+
+void GenTree::CopyToSmall(const GenTree& gt)
+{
+ // Small node size is defined by GenTreeOp.
+ void* remDst = reinterpret_cast<char*>(this) + sizeof(GenTree);
+ void* remSrc = reinterpret_cast<char*>(const_cast<GenTree*>(&gt)) + sizeof(GenTree);
+ memcpy(remDst, remSrc, TREE_NODE_SZ_SMALL - sizeof(GenTree));
+}
+
+unsigned GenTree::NumChildren()
+{
+ if (OperIsConst() || OperIsLeaf())
+ {
+ return 0;
+ }
+ else if (OperIsUnary())
+ {
+ if (OperGet() == GT_NOP || OperGet() == GT_RETURN || OperGet() == GT_RETFILT)
+ {
+ if (gtOp.gtOp1 == nullptr)
+ {
+ return 0;
+ }
+ else
+ {
+ return 1;
+ }
+ }
+ else
+ {
+ return 1;
+ }
+ }
+ else if (OperIsBinary())
+ {
+ // All binary operators except LEA have at least one arg; the second arg may sometimes be null, however.
+ if (OperGet() == GT_LEA)
+ {
+ unsigned childCount = 0;
+ if (gtOp.gtOp1 != nullptr)
+ {
+ childCount++;
+ }
+ if (gtOp.gtOp2 != nullptr)
+ {
+ childCount++;
+ }
+ return childCount;
+ }
+ // Special case for assignment of dynamic block.
+ // This is here to duplicate the former case where the size may be evaluated prior to the
+ // source and destination addresses. In order to do this, we treat the size as a child of the
+ // assignment.
+ // TODO-1stClassStructs-Cleanup: Remove all this special casing, and ensure that the diffs are reasonable.
+ if ((OperGet() == GT_ASG) && (gtOp.gtOp1->OperGet() == GT_DYN_BLK) && (gtOp.gtOp1->AsDynBlk()->gtEvalSizeFirst))
+ {
+ return 3;
+ }
+ assert(gtOp.gtOp1 != nullptr);
+ if (gtOp.gtOp2 == nullptr)
+ {
+ return 1;
+ }
+ else
+ {
+ return 2;
+ }
+ }
+ else
+ {
+ // Special
+ switch (OperGet())
+ {
+ case GT_CMPXCHG:
+ return 3;
+
+ case GT_ARR_BOUNDS_CHECK:
+#ifdef FEATURE_SIMD
+ case GT_SIMD_CHK:
+#endif // FEATURE_SIMD
+ return 2;
+
+ case GT_FIELD:
+ case GT_STMT:
+ return 1;
+
+ case GT_ARR_ELEM:
+ return 1 + AsArrElem()->gtArrRank;
+
+ // This really has two children, but if the size is evaluated first, we treat it as a child of the
+ // parent assignment.
+ case GT_DYN_BLK:
+ if (AsDynBlk()->gtEvalSizeFirst)
+ {
+ return 1;
+ }
+ else
+ {
+ return 2;
+ }
+
+ case GT_ARR_OFFSET:
+ case GT_STORE_DYN_BLK:
+ return 3;
+
+ case GT_CALL:
+ {
+ GenTreeCall* call = AsCall();
+ unsigned res = 0; // arg list(s) (including late args).
+ if (call->gtCallObjp != nullptr)
+ {
+ res++; // Add objp?
+ }
+ if (call->gtCallArgs != nullptr)
+ {
+ res++; // Add args?
+ }
+ if (call->gtCallLateArgs != nullptr)
+ {
+ res++; // Add late args?
+ }
+ if (call->gtControlExpr != nullptr)
+ {
+ res++;
+ }
+
+ if (call->gtCallType == CT_INDIRECT)
+ {
+ if (call->gtCallCookie != nullptr)
+ {
+ res++;
+ }
+ if (call->gtCallAddr != nullptr)
+ {
+ res++;
+ }
+ }
+ return res;
+ }
+ case GT_NONE:
+ return 0;
+ default:
+ unreached();
+ }
+ }
+}
+
+GenTreePtr GenTree::GetChild(unsigned childNum)
+{
+ assert(childNum < NumChildren()); // Precondition.
+ assert(NumChildren() <= MAX_CHILDREN);
+ assert(!(OperIsConst() || OperIsLeaf()));
+ if (OperIsUnary())
+ {
+ return AsUnOp()->gtOp1;
+ }
+ // Special case for assignment of dynamic block.
+ // This code is here to duplicate the former case where the size may be evaluated prior to the
+ // source and destination addresses. In order to do this, we treat the size as a child of the
+ // assignment.
+ // TODO-1stClassStructs: Revisit the need to duplicate former behavior, so that we can remove
+ // these special cases.
+ if ((OperGet() == GT_ASG) && (gtOp.gtOp1->OperGet() == GT_DYN_BLK) && (childNum == 2))
+ {
+ return gtOp.gtOp1->AsDynBlk()->gtDynamicSize;
+ }
+ else if (OperIsBinary())
+ {
+ if (OperIsAddrMode())
+ {
+ // If this is the first (0th) child, only return op1 if it is non-null
+ // Otherwise, we return gtOp2.
+ if (childNum == 0 && AsOp()->gtOp1 != nullptr)
+ {
+ return AsOp()->gtOp1;
+ }
+ return AsOp()->gtOp2;
+ }
+ // TODO-Cleanup: Consider handling ReverseOps here, and then we wouldn't have to handle it in
+ // fgGetFirstNode(). However, it seems that it causes loop hoisting behavior to change.
+ if (childNum == 0)
+ {
+ return AsOp()->gtOp1;
+ }
+ else
+ {
+ return AsOp()->gtOp2;
+ }
+ }
+ else
+ {
+ // Special
+ switch (OperGet())
+ {
+ case GT_CMPXCHG:
+ switch (childNum)
+ {
+ case 0:
+ return AsCmpXchg()->gtOpLocation;
+ case 1:
+ return AsCmpXchg()->gtOpValue;
+ case 2:
+ return AsCmpXchg()->gtOpComparand;
+ default:
+ unreached();
+ }
+ case GT_ARR_BOUNDS_CHECK:
+#ifdef FEATURE_SIMD
+ case GT_SIMD_CHK:
+#endif // FEATURE_SIMD
+ switch (childNum)
+ {
+ case 0:
+ return AsBoundsChk()->gtArrLen;
+ case 1:
+ return AsBoundsChk()->gtIndex;
+ default:
+ unreached();
+ }
+
+ case GT_STORE_DYN_BLK:
+ switch (childNum)
+ {
+ case 0:
+ return AsDynBlk()->Addr();
+ case 1:
+ return AsDynBlk()->Data();
+ case 2:
+ return AsDynBlk()->gtDynamicSize;
+ default:
+ unreached();
+ }
+ case GT_DYN_BLK:
+ switch (childNum)
+ {
+ case 0:
+ return AsDynBlk()->Addr();
+ case 1:
+ assert(!AsDynBlk()->gtEvalSizeFirst);
+ return AsDynBlk()->gtDynamicSize;
+ default:
+ unreached();
+ }
+
+ case GT_FIELD:
+ return AsField()->gtFldObj;
+
+ case GT_STMT:
+ return AsStmt()->gtStmtExpr;
+
+ case GT_ARR_ELEM:
+ if (childNum == 0)
+ {
+ return AsArrElem()->gtArrObj;
+ }
+ else
+ {
+ return AsArrElem()->gtArrInds[childNum - 1];
+ }
+
+ case GT_ARR_OFFSET:
+ switch (childNum)
+ {
+ case 0:
+ return AsArrOffs()->gtOffset;
+ case 1:
+ return AsArrOffs()->gtIndex;
+ case 2:
+ return AsArrOffs()->gtArrObj;
+ default:
+ unreached();
+ }
+
+ case GT_CALL:
+ {
+ // The if chain below assumes that all possible children are non-null.
+ // If some are null, "virtually skip them."
+ // If there isn't "virtually skip it."
+ GenTreeCall* call = AsCall();
+
+ if (call->gtCallObjp == nullptr)
+ {
+ childNum++;
+ }
+ if (childNum >= 1 && call->gtCallArgs == nullptr)
+ {
+ childNum++;
+ }
+ if (childNum >= 2 && call->gtCallLateArgs == nullptr)
+ {
+ childNum++;
+ }
+ if (childNum >= 3 && call->gtControlExpr == nullptr)
+ {
+ childNum++;
+ }
+ if (call->gtCallType == CT_INDIRECT)
+ {
+ if (childNum >= 4 && call->gtCallCookie == nullptr)
+ {
+ childNum++;
+ }
+ }
+
+ if (childNum == 0)
+ {
+ return call->gtCallObjp;
+ }
+ else if (childNum == 1)
+ {
+ return call->gtCallArgs;
+ }
+ else if (childNum == 2)
+ {
+ return call->gtCallLateArgs;
+ }
+ else if (childNum == 3)
+ {
+ return call->gtControlExpr;
+ }
+ else
+ {
+ assert(call->gtCallType == CT_INDIRECT);
+ if (childNum == 4)
+ {
+ return call->gtCallCookie;
+ }
+ else
+ {
+ assert(childNum == 5);
+ return call->gtCallAddr;
+ }
+ }
+ }
+ case GT_NONE:
+ unreached();
+ default:
+ unreached();
+ }
+ }
+}
+
+GenTreeUseEdgeIterator::GenTreeUseEdgeIterator()
+ : m_node(nullptr)
+ , m_edge(nullptr)
+ , m_argList(nullptr)
+ , m_state(-1)
+{
+}
+
+GenTreeUseEdgeIterator::GenTreeUseEdgeIterator(GenTree* node)
+ : m_node(node)
+ , m_edge(nullptr)
+ , m_argList(nullptr)
+ , m_state(0)
+{
+ assert(m_node != nullptr);
+
+ // Advance to the first operand.
+ ++(*this);
+}
+
+//------------------------------------------------------------------------
+// GenTreeUseEdgeIterator::GetNextUseEdge:
+// Gets the next operand of a node with a fixed number of operands.
+// This covers all nodes besides GT_CALL, GT_PHI, and GT_SIMD. For the
+// node types handled by this method, the `m_state` field indicates the
+// index of the next operand to produce.
+//
+// Returns:
+// The node's next operand or nullptr if all operands have been
+// produced.
+//
+GenTree** GenTreeUseEdgeIterator::GetNextUseEdge() const
+{
+ switch (m_node->OperGet())
+ {
+ case GT_CMPXCHG:
+ switch (m_state)
+ {
+ case 0:
+ return &m_node->AsCmpXchg()->gtOpLocation;
+ case 1:
+ return &m_node->AsCmpXchg()->gtOpValue;
+ case 2:
+ return &m_node->AsCmpXchg()->gtOpComparand;
+ default:
+ return nullptr;
+ }
+ case GT_ARR_BOUNDS_CHECK:
+#ifdef FEATURE_SIMD
+ case GT_SIMD_CHK:
+#endif // FEATURE_SIMD
+ switch (m_state)
+ {
+ case 0:
+ return &m_node->AsBoundsChk()->gtArrLen;
+ case 1:
+ return &m_node->AsBoundsChk()->gtIndex;
+ default:
+ return nullptr;
+ }
+
+ case GT_FIELD:
+ if (m_state == 0)
+ {
+ return &m_node->AsField()->gtFldObj;
+ }
+ return nullptr;
+
+ case GT_STMT:
+ if (m_state == 0)
+ {
+ return &m_node->AsStmt()->gtStmtExpr;
+ }
+ return nullptr;
+
+ case GT_ARR_ELEM:
+ if (m_state == 0)
+ {
+ return &m_node->AsArrElem()->gtArrObj;
+ }
+ else if (m_state <= m_node->AsArrElem()->gtArrRank)
+ {
+ return &m_node->AsArrElem()->gtArrInds[m_state - 1];
+ }
+ return nullptr;
+
+ case GT_ARR_OFFSET:
+ switch (m_state)
+ {
+ case 0:
+ return &m_node->AsArrOffs()->gtOffset;
+ case 1:
+ return &m_node->AsArrOffs()->gtIndex;
+ case 2:
+ return &m_node->AsArrOffs()->gtArrObj;
+ default:
+ return nullptr;
+ }
+
+ // Call, phi, and SIMD nodes are handled by MoveNext{Call,Phi,SIMD}UseEdge, repsectively.
+ case GT_CALL:
+ case GT_PHI:
+#ifdef FEATURE_SIMD
+ case GT_SIMD:
+#endif
+ break;
+
+ case GT_ASG:
+ {
+ bool operandsReversed = (m_node->gtFlags & GTF_REVERSE_OPS) != 0;
+ switch (m_state)
+ {
+ case 0:
+ return !operandsReversed ? &(m_node->AsOp()->gtOp1) : &(m_node->AsOp()->gtOp2);
+ case 1:
+ return !operandsReversed ? &(m_node->AsOp()->gtOp2) : &(m_node->AsOp()->gtOp1);
+ default:
+ return nullptr;
+ }
+ }
+
+ case GT_DYN_BLK:
+ switch (m_state)
+ {
+ case 0:
+ return &(m_node->AsDynBlk()->gtOp1);
+ case 1:
+ return &(m_node->AsDynBlk()->gtDynamicSize);
+ default:
+ return nullptr;
+ }
+ break;
+
+ case GT_STORE_DYN_BLK:
+ switch (m_state)
+ {
+ case 0:
+ return &(m_node->AsDynBlk()->gtOp1);
+ case 1:
+ return &(m_node->AsDynBlk()->gtOp2);
+ case 2:
+ return &(m_node->AsDynBlk()->gtDynamicSize);
+ default:
+ return nullptr;
+ }
+ break;
+
+ case GT_LEA:
+ {
+ GenTreeAddrMode* lea = m_node->AsAddrMode();
+
+ bool hasOp1 = lea->gtOp1 != nullptr;
+ if (!hasOp1)
+ {
+ return m_state == 0 ? &lea->gtOp2 : nullptr;
+ }
+
+ bool operandsReversed = (lea->gtFlags & GTF_REVERSE_OPS) != 0;
+ switch (m_state)
+ {
+ case 0:
+ return !operandsReversed ? &lea->gtOp1 : &lea->gtOp2;
+ case 1:
+ return !operandsReversed ? &lea->gtOp2 : &lea->gtOp1;
+ default:
+ return nullptr;
+ }
+ }
+ break;
+
+ case GT_LIST:
+ if (m_node->AsArgList()->IsAggregate())
+ {
+ // List nodes that represent aggregates are handled by MoveNextAggregateUseEdge.
+ break;
+ }
+ __fallthrough;
+
+ default:
+ if (m_node->OperIsConst() || m_node->OperIsLeaf())
+ {
+ return nullptr;
+ }
+ else if (m_node->OperIsUnary())
+ {
+ return m_state == 0 ? &m_node->AsUnOp()->gtOp1 : nullptr;
+ }
+ else if (m_node->OperIsBinary())
+ {
+ bool operandsReversed = (m_node->gtFlags & GTF_REVERSE_OPS) != 0;
+ switch (m_state)
+ {
+ case 0:
+ return !operandsReversed ? &m_node->AsOp()->gtOp1 : &m_node->AsOp()->gtOp2;
+ case 1:
+ return !operandsReversed ? &m_node->AsOp()->gtOp2 : &m_node->AsOp()->gtOp1;
+ default:
+ return nullptr;
+ }
+ }
+ }
+
+ unreached();
+}
+
+//------------------------------------------------------------------------
+// GenTreeUseEdgeIterator::MoveToNextCallUseEdge:
+// Moves to the next operand of a call node. Unlike the simple nodes
+// handled by `GetNextUseEdge`, call nodes have a variable number of
+// operands stored in cons lists. This method expands the cons lists
+// into the operands stored within.
+//
+void GenTreeUseEdgeIterator::MoveToNextCallUseEdge()
+{
+ enum
+ {
+ CALL_INSTANCE = 0,
+ CALL_ARGS = 1,
+ CALL_LATE_ARGS = 2,
+ CALL_CONTROL_EXPR = 3,
+ CALL_COOKIE = 4,
+ CALL_ADDRESS = 5,
+ CALL_TERMINAL = 6,
+ };
+
+ GenTreeCall* call = m_node->AsCall();
+
+ for (;;)
+ {
+ switch (m_state)
+ {
+ case CALL_INSTANCE:
+ m_state = CALL_ARGS;
+ m_argList = call->gtCallArgs;
+
+ if (call->gtCallObjp != nullptr)
+ {
+ m_edge = &call->gtCallObjp;
+ return;
+ }
+ break;
+
+ case CALL_ARGS:
+ case CALL_LATE_ARGS:
+ if (m_argList == nullptr)
+ {
+ m_state++;
+
+ if (m_state == CALL_LATE_ARGS)
+ {
+ m_argList = call->gtCallLateArgs;
+ }
+ }
+ else
+ {
+ GenTreeArgList* argNode = m_argList->AsArgList();
+ m_edge = &argNode->gtOp1;
+ m_argList = argNode->Rest();
+ return;
+ }
+ break;
+
+ case CALL_CONTROL_EXPR:
+ m_state = call->gtCallType == CT_INDIRECT ? CALL_COOKIE : CALL_TERMINAL;
+
+ if (call->gtControlExpr != nullptr)
+ {
+ m_edge = &call->gtControlExpr;
+ return;
+ }
+ break;
+
+ case 4:
+ assert(call->gtCallType == CT_INDIRECT);
+
+ m_state = CALL_ADDRESS;
+
+ if (call->gtCallCookie != nullptr)
+ {
+ m_edge = &call->gtCallCookie;
+ return;
+ }
+ break;
+
+ case 5:
+ assert(call->gtCallType == CT_INDIRECT);
+
+ m_state = CALL_TERMINAL;
+ if (call->gtCallAddr != nullptr)
+ {
+ m_edge = &call->gtCallAddr;
+ return;
+ }
+ break;
+
+ default:
+ m_node = nullptr;
+ m_edge = nullptr;
+ m_argList = nullptr;
+ m_state = -1;
+ return;
+ }
+ }
+}
+
+//------------------------------------------------------------------------
+// GenTreeUseEdgeIterator::MoveToNextPhiUseEdge:
+// Moves to the next operand of a phi node. Unlike the simple nodes
+// handled by `GetNextUseEdge`, phi nodes have a variable number of
+// operands stored in a cons list. This method expands the cons list
+// into the operands stored within.
+//
+void GenTreeUseEdgeIterator::MoveToNextPhiUseEdge()
+{
+ GenTreeUnOp* phi = m_node->AsUnOp();
+
+ for (;;)
+ {
+ switch (m_state)
+ {
+ case 0:
+ m_state = 1;
+ m_argList = phi->gtOp1;
+ break;
+
+ case 1:
+ if (m_argList == nullptr)
+ {
+ m_state = 2;
+ }
+ else
+ {
+ GenTreeArgList* argNode = m_argList->AsArgList();
+ m_edge = &argNode->gtOp1;
+ m_argList = argNode->Rest();
+ return;
+ }
+ break;
+
+ default:
+ m_node = nullptr;
+ m_edge = nullptr;
+ m_argList = nullptr;
+ m_state = -1;
+ return;
+ }
+ }
+}
+
+#ifdef FEATURE_SIMD
+//------------------------------------------------------------------------
+// GenTreeUseEdgeIterator::MoveToNextSIMDUseEdge:
+// Moves to the next operand of a SIMD node. Most SIMD nodes have a
+// fixed number of operands and are handled accordingly.
+// `SIMDIntrinsicInitN` nodes, however, have a variable number of
+// operands stored in a cons list. This method expands the cons list
+// into the operands stored within.
+//
+void GenTreeUseEdgeIterator::MoveToNextSIMDUseEdge()
+{
+ GenTreeSIMD* simd = m_node->AsSIMD();
+
+ if (simd->gtSIMDIntrinsicID != SIMDIntrinsicInitN)
+ {
+ bool operandsReversed = (simd->gtFlags & GTF_REVERSE_OPS) != 0;
+ switch (m_state)
+ {
+ case 0:
+ m_edge = !operandsReversed ? &simd->gtOp1 : &simd->gtOp2;
+ break;
+ case 1:
+ m_edge = !operandsReversed ? &simd->gtOp2 : &simd->gtOp1;
+ break;
+ default:
+ m_edge = nullptr;
+ break;
+ }
+
+ if (m_edge != nullptr && *m_edge != nullptr)
+ {
+ m_state++;
+ }
+ else
+ {
+ m_node = nullptr;
+ m_state = -1;
+ }
+
+ return;
+ }
+
+ for (;;)
+ {
+ switch (m_state)
+ {
+ case 0:
+ m_state = 1;
+ m_argList = simd->gtOp1;
+ break;
+
+ case 1:
+ if (m_argList == nullptr)
+ {
+ m_state = 2;
+ }
+ else
+ {
+ GenTreeArgList* argNode = m_argList->AsArgList();
+ m_edge = &argNode->gtOp1;
+ m_argList = argNode->Rest();
+ return;
+ }
+ break;
+
+ default:
+ m_node = nullptr;
+ m_edge = nullptr;
+ m_argList = nullptr;
+ m_state = -1;
+ return;
+ }
+ }
+}
+#endif // FEATURE_SIMD
+
+void GenTreeUseEdgeIterator::MoveToNextAggregateUseEdge()
+{
+ assert(m_node->OperGet() == GT_LIST);
+ assert(m_node->AsArgList()->IsAggregate());
+
+ for (;;)
+ {
+ switch (m_state)
+ {
+ case 0:
+ m_state = 1;
+ m_argList = m_node;
+ break;
+
+ case 1:
+ if (m_argList == nullptr)
+ {
+ m_state = 2;
+ }
+ else
+ {
+ GenTreeArgList* aggNode = m_argList->AsArgList();
+ m_edge = &aggNode->gtOp1;
+ m_argList = aggNode->Rest();
+ return;
+ }
+ break;
+
+ default:
+ m_node = nullptr;
+ m_edge = nullptr;
+ m_argList = nullptr;
+ m_state = -1;
+ return;
+ }
+ }
+}
+
+//------------------------------------------------------------------------
+// GenTreeUseEdgeIterator::operator++:
+// Advances the iterator to the next operand.
+//
+GenTreeUseEdgeIterator& GenTreeUseEdgeIterator::operator++()
+{
+ if (m_state == -1)
+ {
+ // If we've reached the terminal state, do nothing.
+ assert(m_node == nullptr);
+ assert(m_edge == nullptr);
+ assert(m_argList == nullptr);
+ }
+ else
+ {
+ // Otherwise, move to the next operand in the node.
+ genTreeOps op = m_node->OperGet();
+ if (op == GT_CALL)
+ {
+ MoveToNextCallUseEdge();
+ }
+ else if (op == GT_PHI)
+ {
+ MoveToNextPhiUseEdge();
+ }
+#ifdef FEATURE_SIMD
+ else if (op == GT_SIMD)
+ {
+ MoveToNextSIMDUseEdge();
+ }
+#endif
+ else if ((op == GT_LIST) && (m_node->AsArgList()->IsAggregate()))
+ {
+ MoveToNextAggregateUseEdge();
+ }
+ else
+ {
+ m_edge = GetNextUseEdge();
+ if (m_edge != nullptr && *m_edge != nullptr)
+ {
+ m_state++;
+ }
+ else
+ {
+ m_edge = nullptr;
+ m_node = nullptr;
+ m_state = -1;
+ }
+ }
+ }
+
+ return *this;
+}
+
+GenTreeUseEdgeIterator GenTree::UseEdgesBegin()
+{
+ return GenTreeUseEdgeIterator(this);
+}
+
+GenTreeUseEdgeIterator GenTree::UseEdgesEnd()
+{
+ return GenTreeUseEdgeIterator();
+}
+
+IteratorPair<GenTreeUseEdgeIterator> GenTree::UseEdges()
+{
+ return MakeIteratorPair(UseEdgesBegin(), UseEdgesEnd());
+}
+
+GenTreeOperandIterator GenTree::OperandsBegin()
+{
+ return GenTreeOperandIterator(this);
+}
+
+GenTreeOperandIterator GenTree::OperandsEnd()
+{
+ return GenTreeOperandIterator();
+}
+
+IteratorPair<GenTreeOperandIterator> GenTree::Operands()
+{
+ return MakeIteratorPair(OperandsBegin(), OperandsEnd());
+}
+
+bool GenTree::Precedes(GenTree* other)
+{
+ assert(other != nullptr);
+
+ for (GenTree* node = gtNext; node != nullptr; node = node->gtNext)
+ {
+ if (node == other)
+ {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+#ifdef DEBUG
+
+/* static */ int GenTree::gtDispFlags(unsigned flags, unsigned debugFlags)
+{
+ printf("%c", (flags & GTF_ASG) ? 'A' : '-');
+ printf("%c", (flags & GTF_CALL) ? 'C' : '-');
+ printf("%c", (flags & GTF_EXCEPT) ? 'X' : '-');
+ printf("%c", (flags & GTF_GLOB_REF) ? 'G' : '-');
+ printf("%c", (debugFlags & GTF_DEBUG_NODE_MORPHED) ? '+' : // First print '+' if GTF_DEBUG_NODE_MORPHED is set
+ (flags & GTF_ORDER_SIDEEFF) ? 'O' : '-'); // otherwise print 'O' or '-'
+ printf("%c", (flags & GTF_COLON_COND) ? '?' : '-');
+ printf("%c", (flags & GTF_DONT_CSE) ? 'N' : // N is for No cse
+ (flags & GTF_MAKE_CSE) ? 'H' : '-'); // H is for Hoist this expr
+ printf("%c", (flags & GTF_REVERSE_OPS) ? 'R' : '-');
+ printf("%c", (flags & GTF_UNSIGNED) ? 'U' : (flags & GTF_BOOLEAN) ? 'B' : '-');
+#if FEATURE_SET_FLAGS
+ printf("%c", (flags & GTF_SET_FLAGS) ? 'S' : '-');
+#endif
+ printf("%c", (flags & GTF_LATE_ARG) ? 'L' : '-');
+ printf("%c", (flags & GTF_SPILLED) ? 'z' : (flags & GTF_SPILL) ? 'Z' : '-');
+ return 12; // displayed 12 flag characters
+}
+
+/*****************************************************************************/
+
+void Compiler::gtDispNodeName(GenTree* tree)
+{
+ /* print the node name */
+
+ const char* name;
+
+ assert(tree);
+ if (tree->gtOper < GT_COUNT)
+ {
+ name = GenTree::NodeName(tree->OperGet());
+ }
+ else
+ {
+ name = "<ERROR>";
+ }
+ char buf[32];
+ char* bufp = &buf[0];
+
+ if ((tree->gtOper == GT_CNS_INT) && tree->IsIconHandle())
+ {
+ sprintf_s(bufp, sizeof(buf), " %s(h)%c", name, 0);
+ }
+ else if (tree->gtOper == GT_PUTARG_STK)
+ {
+ sprintf_s(bufp, sizeof(buf), " %s [+0x%02x]%c", name, tree->AsPutArgStk()->getArgOffset(), 0);
+ }
+ else if (tree->gtOper == GT_CALL)
+ {
+ const char* callType = "call";
+ const char* gtfType = "";
+ const char* ctType = "";
+ char gtfTypeBuf[100];
+
+ if (tree->gtCall.gtCallType == CT_USER_FUNC)
+ {
+ if ((tree->gtFlags & GTF_CALL_VIRT_KIND_MASK) != GTF_CALL_NONVIRT)
+ {
+ callType = "callv";
+ }
+ }
+ else if (tree->gtCall.gtCallType == CT_HELPER)
+ {
+ ctType = " help";
+ }
+ else if (tree->gtCall.gtCallType == CT_INDIRECT)
+ {
+ ctType = " ind";
+ }
+ else
+ {
+ assert(!"Unknown gtCallType");
+ }
+
+ if (tree->gtFlags & GTF_CALL_NULLCHECK)
+ {
+ gtfType = " nullcheck";
+ }
+ if (tree->gtFlags & GTF_CALL_VIRT_VTABLE)
+ {
+ gtfType = " ind";
+ }
+ else if (tree->gtFlags & GTF_CALL_VIRT_STUB)
+ {
+ gtfType = " stub";
+ }
+#ifdef FEATURE_READYTORUN_COMPILER
+ else if (tree->gtCall.IsR2RRelativeIndir())
+ {
+ gtfType = " r2r_ind";
+ }
+#endif // FEATURE_READYTORUN_COMPILER
+ else if (tree->gtFlags & GTF_CALL_UNMANAGED)
+ {
+ char* gtfTypeBufWalk = gtfTypeBuf;
+ gtfTypeBufWalk += SimpleSprintf_s(gtfTypeBufWalk, gtfTypeBuf, sizeof(gtfTypeBuf), " unman");
+ if (tree->gtFlags & GTF_CALL_POP_ARGS)
+ {
+ gtfTypeBufWalk += SimpleSprintf_s(gtfTypeBufWalk, gtfTypeBuf, sizeof(gtfTypeBuf), " popargs");
+ }
+ if (tree->gtCall.gtCallMoreFlags & GTF_CALL_M_UNMGD_THISCALL)
+ {
+ gtfTypeBufWalk += SimpleSprintf_s(gtfTypeBufWalk, gtfTypeBuf, sizeof(gtfTypeBuf), " thiscall");
+ }
+ gtfType = gtfTypeBuf;
+ }
+
+ sprintf_s(bufp, sizeof(buf), " %s%s%s%c", callType, ctType, gtfType, 0);
+ }
+ else if (tree->gtOper == GT_ARR_ELEM)
+ {
+ bufp += SimpleSprintf_s(bufp, buf, sizeof(buf), " %s[", name);
+ for (unsigned rank = tree->gtArrElem.gtArrRank - 1; rank; rank--)
+ {
+ bufp += SimpleSprintf_s(bufp, buf, sizeof(buf), ",");
+ }
+ SimpleSprintf_s(bufp, buf, sizeof(buf), "]");
+ }
+ else if (tree->gtOper == GT_ARR_OFFSET || tree->gtOper == GT_ARR_INDEX)
+ {
+ bufp += SimpleSprintf_s(bufp, buf, sizeof(buf), " %s[", name);
+ unsigned char currDim;
+ unsigned char rank;
+ if (tree->gtOper == GT_ARR_OFFSET)
+ {
+ currDim = tree->gtArrOffs.gtCurrDim;
+ rank = tree->gtArrOffs.gtArrRank;
+ }
+ else
+ {
+ currDim = tree->gtArrIndex.gtCurrDim;
+ rank = tree->gtArrIndex.gtArrRank;
+ }
+
+ for (unsigned char dim = 0; dim < rank; dim++)
+ {
+ // Use a defacto standard i,j,k for the dimensions.
+ // Note that we only support up to rank 3 arrays with these nodes, so we won't run out of characters.
+ char dimChar = '*';
+ if (dim == currDim)
+ {
+ dimChar = 'i' + dim;
+ }
+ else if (dim > currDim)
+ {
+ dimChar = ' ';
+ }
+
+ bufp += SimpleSprintf_s(bufp, buf, sizeof(buf), "%c", dimChar);
+ if (dim != rank - 1)
+ {
+ bufp += SimpleSprintf_s(bufp, buf, sizeof(buf), ",");
+ }
+ }
+ SimpleSprintf_s(bufp, buf, sizeof(buf), "]");
+ }
+ else if (tree->gtOper == GT_LEA)
+ {
+ GenTreeAddrMode* lea = tree->AsAddrMode();
+ bufp += SimpleSprintf_s(bufp, buf, sizeof(buf), " %s(", name);
+ if (lea->Base() != nullptr)
+ {
+ bufp += SimpleSprintf_s(bufp, buf, sizeof(buf), "b+");
+ }
+ if (lea->Index() != nullptr)
+ {
+ bufp += SimpleSprintf_s(bufp, buf, sizeof(buf), "(i*%d)+", lea->gtScale);
+ }
+ bufp += SimpleSprintf_s(bufp, buf, sizeof(buf), "%d)", lea->gtOffset);
+ }
+ else if (tree->gtOper == GT_ARR_BOUNDS_CHECK)
+ {
+ switch (tree->gtBoundsChk.gtThrowKind)
+ {
+ case SCK_RNGCHK_FAIL:
+ sprintf_s(bufp, sizeof(buf), " %s_Rng", name);
+ break;
+ case SCK_ARG_EXCPN:
+ sprintf_s(bufp, sizeof(buf), " %s_Arg", name);
+ break;
+ case SCK_ARG_RNG_EXCPN:
+ sprintf_s(bufp, sizeof(buf), " %s_ArgRng", name);
+ break;
+ default:
+ unreached();
+ }
+ }
+ else if (tree->gtOverflowEx())
+ {
+ sprintf_s(bufp, sizeof(buf), " %s_ovfl%c", name, 0);
+ }
+ else if (tree->OperIsBlk() && (tree->AsBlk()->gtBlkSize != 0))
+ {
+ sprintf_s(bufp, sizeof(buf), " %s(%d)", name, tree->AsBlk()->gtBlkSize);
+ }
+ else
+ {
+ sprintf_s(bufp, sizeof(buf), " %s%c", name, 0);
+ }
+
+ if (strlen(buf) < 10)
+ {
+ printf(" %-10s", buf);
+ }
+ else
+ {
+ printf(" %s", buf);
+ }
+}
+
+void Compiler::gtDispVN(GenTree* tree)
+{
+ if (tree->gtVNPair.GetLiberal() != ValueNumStore::NoVN)
+ {
+ assert(tree->gtVNPair.GetConservative() != ValueNumStore::NoVN);
+ printf(" ");
+ vnpPrint(tree->gtVNPair, 0);
+ }
+}
+
+//------------------------------------------------------------------------
+// gtDispNode: Print a tree to jitstdout.
+//
+// Arguments:
+// tree - the tree to be printed
+// indentStack - the specification for the current level of indentation & arcs
+// msg - a contextual method (i.e. from the parent) to print
+//
+// Return Value:
+// None.
+//
+// Notes:
+// 'indentStack' may be null, in which case no indentation or arcs are printed
+// 'msg' may be null
+
+void Compiler::gtDispNode(GenTreePtr tree, IndentStack* indentStack, __in __in_z __in_opt const char* msg, bool isLIR)
+{
+ bool printPointer = true; // always true..
+ bool printFlags = true; // always true..
+ bool printCost = true; // always true..
+
+ int msgLength = 25;
+
+ GenTree* prev;
+
+ if (tree->gtSeqNum)
+ {
+ printf("N%03u ", tree->gtSeqNum);
+ if (tree->gtCostsInitialized)
+ {
+ printf("(%3u,%3u) ", tree->gtCostEx, tree->gtCostSz);
+ }
+ else
+ {
+ printf("(???"
+ ",???"
+ ") "); // This probably indicates a bug: the node has a sequence number, but not costs.
+ }
+ }
+ else
+ {
+ if (tree->gtOper == GT_STMT)
+ {
+ prev = tree->gtStmt.gtStmtExpr;
+ }
+ else
+ {
+ prev = tree;
+ }
+
+ bool hasSeqNum = true;
+ unsigned dotNum = 0;
+ do
+ {
+ dotNum++;
+ prev = prev->gtPrev;
+
+ if ((prev == nullptr) || (prev == tree))
+ {
+ hasSeqNum = false;
+ break;
+ }
+
+ assert(prev);
+ } while (prev->gtSeqNum == 0);
+
+ // If we have an indent stack, don't add additional characters,
+ // as it will mess up the alignment.
+ bool displayDotNum = tree->gtOper != GT_STMT && hasSeqNum && (indentStack == nullptr);
+ if (displayDotNum)
+ {
+ printf("N%03u.%02u ", prev->gtSeqNum, dotNum);
+ }
+ else
+ {
+ printf(" ");
+ }
+
+ if (tree->gtCostsInitialized)
+ {
+ printf("(%3u,%3u) ", tree->gtCostEx, tree->gtCostSz);
+ }
+ else
+ {
+ if (displayDotNum)
+ {
+ // Do better alignment in this case
+ printf(" ");
+ }
+ else
+ {
+ printf(" ");
+ }
+ }
+ }
+
+ if (optValnumCSE_phase)
+ {
+ if (IS_CSE_INDEX(tree->gtCSEnum))
+ {
+ printf("CSE #%02d (%s)", GET_CSE_INDEX(tree->gtCSEnum), (IS_CSE_USE(tree->gtCSEnum) ? "use" : "def"));
+ }
+ else
+ {
+ printf(" ");
+ }
+ }
+
+ /* Print the node ID */
+ printTreeID(tree);
+ printf(" ");
+
+ if (tree->gtOper >= GT_COUNT)
+ {
+ printf(" **** ILLEGAL NODE ****");
+ return;
+ }
+
+ if (printFlags)
+ {
+ /* First print the flags associated with the node */
+ switch (tree->gtOper)
+ {
+ case GT_LEA:
+ case GT_BLK:
+ case GT_OBJ:
+ case GT_DYN_BLK:
+ case GT_STORE_BLK:
+ case GT_STORE_OBJ:
+ case GT_STORE_DYN_BLK:
+
+ case GT_IND:
+ // We prefer printing R, V or U
+ if ((tree->gtFlags & (GTF_IND_REFARR_LAYOUT | GTF_IND_VOLATILE | GTF_IND_UNALIGNED)) == 0)
+ {
+ if (tree->gtFlags & GTF_IND_TGTANYWHERE)
+ {
+ printf("*");
+ --msgLength;
+ break;
+ }
+ if (tree->gtFlags & GTF_IND_INVARIANT)
+ {
+ printf("#");
+ --msgLength;
+ break;
+ }
+ if (tree->gtFlags & GTF_IND_ARR_INDEX)
+ {
+ printf("a");
+ --msgLength;
+ break;
+ }
+ }
+ __fallthrough;
+
+ case GT_INDEX:
+
+ if ((tree->gtFlags & (GTF_IND_VOLATILE | GTF_IND_UNALIGNED)) == 0) // We prefer printing V or U over R
+ {
+ if (tree->gtFlags & GTF_IND_REFARR_LAYOUT)
+ {
+ printf("R");
+ --msgLength;
+ break;
+ } // R means RefArray
+ }
+ __fallthrough;
+
+ case GT_FIELD:
+ case GT_CLS_VAR:
+ if (tree->gtFlags & GTF_IND_VOLATILE)
+ {
+ printf("V");
+ --msgLength;
+ break;
+ }
+ if (tree->gtFlags & GTF_IND_UNALIGNED)
+ {
+ printf("U");
+ --msgLength;
+ break;
+ }
+ goto DASH;
+
+ case GT_ASG:
+ if (tree->OperIsInitBlkOp())
+ {
+ printf("I");
+ --msgLength;
+ break;
+ }
+ goto DASH;
+
+ case GT_CALL:
+ if (tree->gtFlags & GTF_CALL_INLINE_CANDIDATE)
+ {
+ printf("I");
+ --msgLength;
+ break;
+ }
+ if (tree->gtCall.gtCallMoreFlags & GTF_CALL_M_RETBUFFARG)
+ {
+ printf("S");
+ --msgLength;
+ break;
+ }
+ if (tree->gtFlags & GTF_CALL_HOISTABLE)
+ {
+ printf("H");
+ --msgLength;
+ break;
+ }
+
+ goto DASH;
+
+ case GT_MUL:
+ if (tree->gtFlags & GTF_MUL_64RSLT)
+ {
+ printf("L");
+ --msgLength;
+ break;
+ }
+ goto DASH;
+
+ case GT_ADDR:
+ if (tree->gtFlags & GTF_ADDR_ONSTACK)
+ {
+ printf("L");
+ --msgLength;
+ break;
+ } // L means LclVar
+ goto DASH;
+
+ case GT_LCL_FLD:
+ case GT_LCL_VAR:
+ case GT_LCL_VAR_ADDR:
+ case GT_LCL_FLD_ADDR:
+ case GT_STORE_LCL_FLD:
+ case GT_STORE_LCL_VAR:
+ case GT_REG_VAR:
+ if (tree->gtFlags & GTF_VAR_USEASG)
+ {
+ printf("U");
+ --msgLength;
+ break;
+ }
+ if (tree->gtFlags & GTF_VAR_USEDEF)
+ {
+ printf("B");
+ --msgLength;
+ break;
+ }
+ if (tree->gtFlags & GTF_VAR_DEF)
+ {
+ printf("D");
+ --msgLength;
+ break;
+ }
+ if (tree->gtFlags & GTF_VAR_CAST)
+ {
+ printf("C");
+ --msgLength;
+ break;
+ }
+ if (tree->gtFlags & GTF_VAR_ARR_INDEX)
+ {
+ printf("i");
+ --msgLength;
+ break;
+ }
+ goto DASH;
+
+ case GT_EQ:
+ case GT_NE:
+ case GT_LT:
+ case GT_LE:
+ case GT_GE:
+ case GT_GT:
+ if (tree->gtFlags & GTF_RELOP_NAN_UN)
+ {
+ printf("N");
+ --msgLength;
+ break;
+ }
+ if (tree->gtFlags & GTF_RELOP_JMP_USED)
+ {
+ printf("J");
+ --msgLength;
+ break;
+ }
+ if (tree->gtFlags & GTF_RELOP_QMARK)
+ {
+ printf("Q");
+ --msgLength;
+ break;
+ }
+ if (tree->gtFlags & GTF_RELOP_SMALL)
+ {
+ printf("S");
+ --msgLength;
+ break;
+ }
+ goto DASH;
+
+ default:
+ DASH:
+ printf("-");
+ --msgLength;
+ break;
+ }
+
+ /* Then print the general purpose flags */
+ unsigned flags = tree->gtFlags;
+
+ if (tree->OperIsBinary())
+ {
+ genTreeOps oper = tree->OperGet();
+
+ // Check for GTF_ADDRMODE_NO_CSE flag on add/mul/shl Binary Operators
+ if ((oper == GT_ADD) || (oper == GT_MUL) || (oper == GT_LSH))
+ {
+ if ((tree->gtFlags & GTF_ADDRMODE_NO_CSE) != 0)
+ {
+ flags |= GTF_DONT_CSE; // Force the GTF_ADDRMODE_NO_CSE flag to print out like GTF_DONT_CSE
+ }
+ }
+ }
+ else // !tree->OperIsBinary()
+ {
+ // the GTF_REVERSE flag only applies to binary operations
+ flags &= ~GTF_REVERSE_OPS; // we use this value for GTF_VAR_ARR_INDEX above
+ }
+
+ msgLength -= GenTree::gtDispFlags(flags, tree->gtDebugFlags);
+/*
+ printf("%c", (flags & GTF_ASG ) ? 'A' : '-');
+ printf("%c", (flags & GTF_CALL ) ? 'C' : '-');
+ printf("%c", (flags & GTF_EXCEPT ) ? 'X' : '-');
+ printf("%c", (flags & GTF_GLOB_REF ) ? 'G' : '-');
+ printf("%c", (flags & GTF_ORDER_SIDEEFF ) ? 'O' : '-');
+ printf("%c", (flags & GTF_COLON_COND ) ? '?' : '-');
+ printf("%c", (flags & GTF_DONT_CSE ) ? 'N' : // N is for No cse
+ (flags & GTF_MAKE_CSE ) ? 'H' : '-'); // H is for Hoist this expr
+ printf("%c", (flags & GTF_REVERSE_OPS ) ? 'R' : '-');
+ printf("%c", (flags & GTF_UNSIGNED ) ? 'U' :
+ (flags & GTF_BOOLEAN ) ? 'B' : '-');
+ printf("%c", (flags & GTF_SET_FLAGS ) ? 'S' : '-');
+ printf("%c", (flags & GTF_SPILLED ) ? 'z' : '-');
+ printf("%c", (flags & GTF_SPILL ) ? 'Z' : '-');
+*/
+
+#if FEATURE_STACK_FP_X87
+ BYTE fpLvl = (BYTE)tree->gtFPlvl;
+ if (IsUninitialized(fpLvl) || fpLvl == 0x00)
+ {
+ printf("-");
+ }
+ else
+ {
+ printf("%1u", tree->gtFPlvl);
+ }
+#endif // FEATURE_STACK_FP_X87
+ }
+
+ // If we're printing a node for LIR, we use the space normally associated with the message
+ // to display the node's temp name (if any)
+ const bool hasOperands = tree->OperandsBegin() != tree->OperandsEnd();
+ if (isLIR)
+ {
+ assert(msg == nullptr);
+
+ // If the tree does not have any operands, we do not display the indent stack. This gives us
+ // two additional characters for alignment.
+ if (!hasOperands)
+ {
+ msgLength += 1;
+ }
+
+ if (tree->IsValue())
+ {
+ const size_t bufLength = msgLength - 1;
+ msg = reinterpret_cast<char*>(alloca(bufLength * sizeof(char)));
+ sprintf_s(const_cast<char*>(msg), bufLength, "t%d = %s", tree->gtTreeID, hasOperands ? "" : " ");
+ }
+ }
+
+ /* print the msg associated with the node */
+
+ if (msg == nullptr)
+ {
+ msg = "";
+ }
+ if (msgLength < 0)
+ {
+ msgLength = 0;
+ }
+
+ printf(isLIR ? " %+*s" : " %-*s", msgLength, msg);
+
+ /* Indent the node accordingly */
+ if (!isLIR || hasOperands)
+ {
+ printIndent(indentStack);
+ }
+
+ gtDispNodeName(tree);
+
+ assert(tree == nullptr || tree->gtOper < GT_COUNT);
+
+ if (tree)
+ {
+ /* print the type of the node */
+ if (tree->gtOper != GT_CAST)
+ {
+ printf(" %-6s", varTypeName(tree->TypeGet()));
+ if (tree->gtOper == GT_LCL_VAR || tree->gtOper == GT_STORE_LCL_VAR)
+ {
+ LclVarDsc* varDsc = &lvaTable[tree->gtLclVarCommon.gtLclNum];
+ if (varDsc->lvAddrExposed)
+ {
+ printf("(AX)"); // Variable has address exposed.
+ }
+
+ if (varDsc->lvUnusedStruct)
+ {
+ assert(varDsc->lvPromoted);
+ printf("(U)"); // Unused struct
+ }
+ else if (varDsc->lvPromoted)
+ {
+ assert(varTypeIsPromotable(varDsc));
+ printf("(P)"); // Promoted struct
+ }
+ }
+
+ if (tree->gtOper == GT_STMT)
+ {
+ if (opts.compDbgInfo)
+ {
+ IL_OFFSET endIL = tree->gtStmt.gtStmtLastILoffs;
+
+ printf("(IL ");
+ if (tree->gtStmt.gtStmtILoffsx == BAD_IL_OFFSET)
+ {
+ printf(" ???");
+ }
+ else
+ {
+ printf("0x%03X", jitGetILoffs(tree->gtStmt.gtStmtILoffsx));
+ }
+ printf("...");
+ if (endIL == BAD_IL_OFFSET)
+ {
+ printf(" ???");
+ }
+ else
+ {
+ printf("0x%03X", endIL);
+ }
+ printf(")");
+ }
+ }
+
+ if (tree->IsArgPlaceHolderNode() && (tree->gtArgPlace.gtArgPlaceClsHnd != nullptr))
+ {
+ printf(" => [clsHnd=%08X]", dspPtr(tree->gtArgPlace.gtArgPlaceClsHnd));
+ }
+ }
+
+ // for tracking down problems in reguse prediction or liveness tracking
+
+ if (verbose && 0)
+ {
+ printf(" RR=");
+ dspRegMask(tree->gtRsvdRegs);
+#ifdef LEGACY_BACKEND
+ printf(",UR=");
+ dspRegMask(tree->gtUsedRegs);
+#endif // LEGACY_BACKEND
+ printf("\n");
+ }
+ }
+}
+
+void Compiler::gtDispRegVal(GenTree* tree)
+{
+ switch (tree->GetRegTag())
+ {
+ // Don't display NOREG; the absence of this tag will imply this state
+ // case GenTree::GT_REGTAG_NONE: printf(" NOREG"); break;
+
+ case GenTree::GT_REGTAG_REG:
+ printf(" REG %s", compRegVarName(tree->gtRegNum));
+ break;
+
+#if CPU_LONG_USES_REGPAIR
+ case GenTree::GT_REGTAG_REGPAIR:
+ printf(" PAIR %s", compRegPairName(tree->gtRegPair));
+ break;
+#endif
+
+ default:
+ break;
+ }
+
+ if (tree->IsMultiRegCall())
+ {
+ // 0th reg is gtRegNum, which is already printed above.
+ // Print the remaining regs of a multi-reg call node.
+ GenTreeCall* call = tree->AsCall();
+ unsigned regCount = call->GetReturnTypeDesc()->GetReturnRegCount();
+ for (unsigned i = 1; i < regCount; ++i)
+ {
+ printf(",%s", compRegVarName(call->GetRegNumByIdx(i)));
+ }
+ }
+ else if (tree->IsCopyOrReloadOfMultiRegCall())
+ {
+ GenTreeCopyOrReload* copyOrReload = tree->AsCopyOrReload();
+ GenTreeCall* call = tree->gtGetOp1()->AsCall();
+ unsigned regCount = call->GetReturnTypeDesc()->GetReturnRegCount();
+ for (unsigned i = 1; i < regCount; ++i)
+ {
+ printf(",%s", compRegVarName(copyOrReload->GetRegNumByIdx(i)));
+ }
+ }
+
+ if (tree->gtFlags & GTF_REG_VAL)
+ {
+ printf(" RV");
+ }
+}
+
+// We usually/commonly don't expect to print anything longer than this string,
+#define LONGEST_COMMON_LCL_VAR_DISPLAY "V99 PInvokeFrame"
+#define LONGEST_COMMON_LCL_VAR_DISPLAY_LENGTH (sizeof(LONGEST_COMMON_LCL_VAR_DISPLAY))
+#define BUF_SIZE (LONGEST_COMMON_LCL_VAR_DISPLAY_LENGTH * 2)
+
+void Compiler::gtGetLclVarNameInfo(unsigned lclNum, const char** ilKindOut, const char** ilNameOut, unsigned* ilNumOut)
+{
+ const char* ilKind = nullptr;
+ const char* ilName = nullptr;
+
+ unsigned ilNum = compMap2ILvarNum(lclNum);
+
+ if (ilNum == (unsigned)ICorDebugInfo::RETBUF_ILNUM)
+ {
+ ilName = "RetBuf";
+ }
+ else if (ilNum == (unsigned)ICorDebugInfo::VARARGS_HND_ILNUM)
+ {
+ ilName = "VarArgHandle";
+ }
+ else if (ilNum == (unsigned)ICorDebugInfo::TYPECTXT_ILNUM)
+ {
+ ilName = "TypeCtx";
+ }
+ else if (ilNum == (unsigned)ICorDebugInfo::UNKNOWN_ILNUM)
+ {
+#if FEATURE_ANYCSE
+ if (lclNumIsTrueCSE(lclNum))
+ {
+ ilKind = "cse";
+ ilNum = lclNum - optCSEstart;
+ }
+ else if (lclNum >= optCSEstart)
+ {
+ // Currently any new LclVar's introduced after the CSE phase
+ // are believed to be created by the "rationalizer" that is what is meant by the "rat" prefix.
+ ilKind = "rat";
+ ilNum = lclNum - (optCSEstart + optCSEcount);
+ }
+ else
+#endif // FEATURE_ANYCSE
+ {
+ if (lclNum == info.compLvFrameListRoot)
+ {
+ ilName = "FramesRoot";
+ }
+ else if (lclNum == lvaInlinedPInvokeFrameVar)
+ {
+ ilName = "PInvokeFrame";
+ }
+ else if (lclNum == lvaGSSecurityCookie)
+ {
+ ilName = "GsCookie";
+ }
+#if FEATURE_FIXED_OUT_ARGS
+ else if (lclNum == lvaPInvokeFrameRegSaveVar)
+ {
+ ilName = "PInvokeFrameRegSave";
+ }
+ else if (lclNum == lvaOutgoingArgSpaceVar)
+ {
+ ilName = "OutArgs";
+ }
+#endif // FEATURE_FIXED_OUT_ARGS
+#ifdef _TARGET_ARM_
+ else if (lclNum == lvaPromotedStructAssemblyScratchVar)
+ {
+ ilName = "PromotedStructScratch";
+ }
+#endif // _TARGET_ARM_
+#if !FEATURE_EH_FUNCLETS
+ else if (lclNum == lvaShadowSPslotsVar)
+ {
+ ilName = "EHSlots";
+ }
+#endif // !FEATURE_EH_FUNCLETS
+ else if (lclNum == lvaLocAllocSPvar)
+ {
+ ilName = "LocAllocSP";
+ }
+#if FEATURE_EH_FUNCLETS
+ else if (lclNum == lvaPSPSym)
+ {
+ ilName = "PSPSym";
+ }
+#endif // FEATURE_EH_FUNCLETS
+ else
+ {
+ ilKind = "tmp";
+ if (compIsForInlining())
+ {
+ ilNum = lclNum - impInlineInfo->InlinerCompiler->info.compLocalsCount;
+ }
+ else
+ {
+ ilNum = lclNum - info.compLocalsCount;
+ }
+ }
+ }
+ }
+ else if (lclNum < (compIsForInlining() ? impInlineInfo->InlinerCompiler->info.compArgsCount : info.compArgsCount))
+ {
+ if (ilNum == 0 && !info.compIsStatic)
+ {
+ ilName = "this";
+ }
+ else
+ {
+ ilKind = "arg";
+ }
+ }
+ else
+ {
+ if (!lvaTable[lclNum].lvIsStructField)
+ {
+ ilKind = "loc";
+ }
+ if (compIsForInlining())
+ {
+ ilNum -= impInlineInfo->InlinerCompiler->info.compILargsCount;
+ }
+ else
+ {
+ ilNum -= info.compILargsCount;
+ }
+ }
+
+ *ilKindOut = ilKind;
+ *ilNameOut = ilName;
+ *ilNumOut = ilNum;
+}
+
+/*****************************************************************************/
+int Compiler::gtGetLclVarName(unsigned lclNum, char* buf, unsigned buf_remaining)
+{
+ char* bufp_next = buf;
+ unsigned charsPrinted = 0;
+ int sprintf_result;
+
+ sprintf_result = sprintf_s(bufp_next, buf_remaining, "V%02u", lclNum);
+
+ if (sprintf_result < 0)
+ {
+ return sprintf_result;
+ }
+
+ charsPrinted += sprintf_result;
+ bufp_next += sprintf_result;
+ buf_remaining -= sprintf_result;
+
+ const char* ilKind = nullptr;
+ const char* ilName = nullptr;
+ unsigned ilNum = 0;
+
+ Compiler::gtGetLclVarNameInfo(lclNum, &ilKind, &ilName, &ilNum);
+
+ if (ilName != nullptr)
+ {
+ sprintf_result = sprintf_s(bufp_next, buf_remaining, " %s", ilName);
+ if (sprintf_result < 0)
+ {
+ return sprintf_result;
+ }
+ charsPrinted += sprintf_result;
+ bufp_next += sprintf_result;
+ buf_remaining -= sprintf_result;
+ }
+ else if (ilKind != nullptr)
+ {
+ sprintf_result = sprintf_s(bufp_next, buf_remaining, " %s%d", ilKind, ilNum);
+ if (sprintf_result < 0)
+ {
+ return sprintf_result;
+ }
+ charsPrinted += sprintf_result;
+ bufp_next += sprintf_result;
+ buf_remaining -= sprintf_result;
+ }
+
+ assert(charsPrinted > 0);
+ assert(buf_remaining > 0);
+
+ return (int)charsPrinted;
+}
+
+/*****************************************************************************
+ * Get the local var name, and create a copy of the string that can be used in debug output.
+ */
+char* Compiler::gtGetLclVarName(unsigned lclNum)
+{
+ char buf[BUF_SIZE];
+ int charsPrinted = gtGetLclVarName(lclNum, buf, sizeof(buf) / sizeof(buf[0]));
+ if (charsPrinted < 0)
+ {
+ return nullptr;
+ }
+
+ char* retBuf = new (this, CMK_DebugOnly) char[charsPrinted + 1];
+ strcpy_s(retBuf, charsPrinted + 1, buf);
+ return retBuf;
+}
+
+/*****************************************************************************/
+void Compiler::gtDispLclVar(unsigned lclNum, bool padForBiggestDisp)
+{
+ char buf[BUF_SIZE];
+ int charsPrinted = gtGetLclVarName(lclNum, buf, sizeof(buf) / sizeof(buf[0]));
+
+ if (charsPrinted < 0)
+ {
+ return;
+ }
+
+ printf("%s", buf);
+
+ if (padForBiggestDisp && (charsPrinted < LONGEST_COMMON_LCL_VAR_DISPLAY_LENGTH))
+ {
+ printf("%*c", LONGEST_COMMON_LCL_VAR_DISPLAY_LENGTH - charsPrinted, ' ');
+ }
+}
+
+/*****************************************************************************/
+void Compiler::gtDispConst(GenTree* tree)
+{
+ assert(tree->OperKind() & GTK_CONST);
+
+ switch (tree->gtOper)
+ {
+ case GT_CNS_INT:
+ if (tree->IsIconHandle(GTF_ICON_STR_HDL))
+ {
+ printf(" 0x%X \"%S\"", dspPtr(tree->gtIntCon.gtIconVal), eeGetCPString(tree->gtIntCon.gtIconVal));
+ }
+ else
+ {
+ ssize_t dspIconVal = tree->IsIconHandle() ? dspPtr(tree->gtIntCon.gtIconVal) : tree->gtIntCon.gtIconVal;
+
+ if (tree->TypeGet() == TYP_REF)
+ {
+ assert(tree->gtIntCon.gtIconVal == 0);
+ printf(" null");
+ }
+ else if ((tree->gtIntCon.gtIconVal > -1000) && (tree->gtIntCon.gtIconVal < 1000))
+ {
+ printf(" %ld", dspIconVal);
+#ifdef _TARGET_64BIT_
+ }
+ else if ((tree->gtIntCon.gtIconVal & 0xFFFFFFFF00000000LL) != 0)
+ {
+ printf(" 0x%llx", dspIconVal);
+#endif
+ }
+ else
+ {
+ printf(" 0x%X", dspIconVal);
+ }
+
+ if (tree->IsIconHandle())
+ {
+ switch (tree->GetIconHandleFlag())
+ {
+ case GTF_ICON_SCOPE_HDL:
+ printf(" scope");
+ break;
+ case GTF_ICON_CLASS_HDL:
+ printf(" class");
+ break;
+ case GTF_ICON_METHOD_HDL:
+ printf(" method");
+ break;
+ case GTF_ICON_FIELD_HDL:
+ printf(" field");
+ break;
+ case GTF_ICON_STATIC_HDL:
+ printf(" static");
+ break;
+ case GTF_ICON_STR_HDL:
+ unreached(); // This case is handled above
+ break;
+ case GTF_ICON_PSTR_HDL:
+ printf(" pstr");
+ break;
+ case GTF_ICON_PTR_HDL:
+ printf(" ptr");
+ break;
+ case GTF_ICON_VARG_HDL:
+ printf(" vararg");
+ break;
+ case GTF_ICON_PINVKI_HDL:
+ printf(" pinvoke");
+ break;
+ case GTF_ICON_TOKEN_HDL:
+ printf(" token");
+ break;
+ case GTF_ICON_TLS_HDL:
+ printf(" tls");
+ break;
+ case GTF_ICON_FTN_ADDR:
+ printf(" ftn");
+ break;
+ case GTF_ICON_CIDMID_HDL:
+ printf(" cid");
+ break;
+ case GTF_ICON_BBC_PTR:
+ printf(" bbc");
+ break;
+ default:
+ printf(" UNKNOWN");
+ break;
+ }
+ }
+
+ if ((tree->gtFlags & GTF_ICON_FIELD_OFF) != 0)
+ {
+ printf(" field offset");
+ }
+
+ if ((tree->IsReuseRegVal()) != 0)
+ {
+ printf(" reuse reg val");
+ }
+ }
+
+ gtDispFieldSeq(tree->gtIntCon.gtFieldSeq);
+
+ break;
+
+ case GT_CNS_LNG:
+ printf(" 0x%016I64x", tree->gtLngCon.gtLconVal);
+ break;
+
+ case GT_CNS_DBL:
+ if (*((__int64*)&tree->gtDblCon.gtDconVal) == (__int64)I64(0x8000000000000000))
+ {
+ printf(" -0.00000");
+ }
+ else
+ {
+ printf(" %#.17g", tree->gtDblCon.gtDconVal);
+ }
+ break;
+ case GT_CNS_STR:
+ printf("<string constant>");
+ break;
+ default:
+ assert(!"unexpected constant node");
+ }
+
+ gtDispRegVal(tree);
+}
+
+void Compiler::gtDispFieldSeq(FieldSeqNode* pfsn)
+{
+ if (pfsn == FieldSeqStore::NotAField() || (pfsn == nullptr))
+ {
+ return;
+ }
+
+ // Otherwise...
+ printf(" Fseq[");
+ while (pfsn != nullptr)
+ {
+ assert(pfsn != FieldSeqStore::NotAField()); // Can't exist in a field sequence list except alone
+ CORINFO_FIELD_HANDLE fldHnd = pfsn->m_fieldHnd;
+ // First check the "pseudo" field handles...
+ if (fldHnd == FieldSeqStore::FirstElemPseudoField)
+ {
+ printf("#FirstElem");
+ }
+ else if (fldHnd == FieldSeqStore::ConstantIndexPseudoField)
+ {
+ printf("#ConstantIndex");
+ }
+ else
+ {
+ printf("%s", eeGetFieldName(fldHnd));
+ }
+ pfsn = pfsn->m_next;
+ if (pfsn != nullptr)
+ {
+ printf(", ");
+ }
+ }
+ printf("]");
+}
+
+//------------------------------------------------------------------------
+// gtDispLeaf: Print a single leaf node to jitstdout.
+//
+// Arguments:
+// tree - the tree to be printed
+// indentStack - the specification for the current level of indentation & arcs
+//
+// Return Value:
+// None.
+//
+// Notes:
+// 'indentStack' may be null, in which case no indentation or arcs are printed
+
+void Compiler::gtDispLeaf(GenTree* tree, IndentStack* indentStack)
+{
+ if (tree->OperKind() & GTK_CONST)
+ {
+ gtDispConst(tree);
+ return;
+ }
+
+ bool isLclFld = false;
+
+ switch (tree->gtOper)
+ {
+ unsigned varNum;
+ LclVarDsc* varDsc;
+
+ case GT_LCL_FLD:
+ case GT_LCL_FLD_ADDR:
+ case GT_STORE_LCL_FLD:
+ isLclFld = true;
+ __fallthrough;
+
+ case GT_PHI_ARG:
+ case GT_LCL_VAR:
+ case GT_LCL_VAR_ADDR:
+ case GT_STORE_LCL_VAR:
+ printf(" ");
+ varNum = tree->gtLclVarCommon.gtLclNum;
+ varDsc = &lvaTable[varNum];
+ gtDispLclVar(varNum);
+ if (tree->gtLclVarCommon.HasSsaName())
+ {
+ if (tree->gtFlags & GTF_VAR_USEASG)
+ {
+ assert(tree->gtFlags & GTF_VAR_DEF);
+ printf("ud:%d->%d", tree->gtLclVarCommon.gtSsaNum, GetSsaNumForLocalVarDef(tree));
+ }
+ else
+ {
+ printf("%s:%d", (tree->gtFlags & GTF_VAR_DEF) ? "d" : "u", tree->gtLclVarCommon.gtSsaNum);
+ }
+ }
+
+ if (isLclFld)
+ {
+ printf("[+%u]", tree->gtLclFld.gtLclOffs);
+ gtDispFieldSeq(tree->gtLclFld.gtFieldSeq);
+ }
+
+ if (varDsc->lvRegister)
+ {
+ printf(" ");
+ varDsc->PrintVarReg();
+ }
+#ifndef LEGACY_BACKEND
+ else if (tree->InReg())
+ {
+#if CPU_LONG_USES_REGPAIR
+ if (isRegPairType(tree->TypeGet()))
+ printf(" %s", compRegPairName(tree->gtRegPair));
+ else
+#endif
+ printf(" %s", compRegVarName(tree->gtRegNum));
+ }
+#endif // !LEGACY_BACKEND
+
+ if (varDsc->lvPromoted)
+ {
+ assert(varTypeIsPromotable(varDsc) || varDsc->lvUnusedStruct);
+
+ CORINFO_CLASS_HANDLE typeHnd = varDsc->lvVerTypeInfo.GetClassHandle();
+ CORINFO_FIELD_HANDLE fldHnd;
+
+ for (unsigned i = varDsc->lvFieldLclStart; i < varDsc->lvFieldLclStart + varDsc->lvFieldCnt; ++i)
+ {
+ LclVarDsc* fieldVarDsc = &lvaTable[i];
+ const char* fieldName;
+#if !defined(_TARGET_64BIT_)
+ if (varTypeIsLong(varDsc))
+ {
+ fieldName = (i == 0) ? "lo" : "hi";
+ }
+ else
+#endif // !defined(_TARGET_64BIT_)
+ {
+ fldHnd = info.compCompHnd->getFieldInClass(typeHnd, fieldVarDsc->lvFldOrdinal);
+ fieldName = eeGetFieldName(fldHnd);
+ }
+
+ printf("\n");
+ printf(" ");
+ printIndent(indentStack);
+ printf(" %-6s V%02u.%s (offs=0x%02x) -> ", varTypeName(fieldVarDsc->TypeGet()),
+ tree->gtLclVarCommon.gtLclNum, fieldName, fieldVarDsc->lvFldOffset);
+ gtDispLclVar(i);
+
+ if (fieldVarDsc->lvRegister)
+ {
+ printf(" ");
+ fieldVarDsc->PrintVarReg();
+ }
+
+ if (fieldVarDsc->lvTracked && fgLocalVarLivenessDone && // Includes local variable liveness
+ ((tree->gtFlags & GTF_VAR_DEATH) != 0))
+ {
+ printf(" (last use)");
+ }
+ }
+ }
+ else // a normal not-promoted lclvar
+ {
+ if (varDsc->lvTracked && fgLocalVarLivenessDone && ((tree->gtFlags & GTF_VAR_DEATH) != 0))
+ {
+ printf(" (last use)");
+ }
+ }
+ break;
+
+ case GT_REG_VAR:
+ printf(" ");
+ gtDispLclVar(tree->gtRegVar.gtLclNum);
+ if (isFloatRegType(tree->gtType))
+ {
+ assert(tree->gtRegVar.gtRegNum == tree->gtRegNum);
+ printf(" FPV%u", tree->gtRegNum);
+ }
+ else
+ {
+ printf(" %s", compRegVarName(tree->gtRegVar.gtRegNum));
+ }
+
+ varNum = tree->gtRegVar.gtLclNum;
+ varDsc = &lvaTable[varNum];
+
+ if (varDsc->lvTracked && fgLocalVarLivenessDone && ((tree->gtFlags & GTF_VAR_DEATH) != 0))
+ {
+ printf(" (last use)");
+ }
+
+ break;
+
+ case GT_JMP:
+ {
+ const char* methodName;
+ const char* className;
+
+ methodName = eeGetMethodName((CORINFO_METHOD_HANDLE)tree->gtVal.gtVal1, &className);
+ printf(" %s.%s\n", className, methodName);
+ }
+ break;
+
+ case GT_CLS_VAR:
+ printf(" Hnd=%#x", dspPtr(tree->gtClsVar.gtClsVarHnd));
+ gtDispFieldSeq(tree->gtClsVar.gtFieldSeq);
+ break;
+
+ case GT_CLS_VAR_ADDR:
+ printf(" Hnd=%#x", dspPtr(tree->gtClsVar.gtClsVarHnd));
+ break;
+
+ case GT_LABEL:
+ if (tree->gtLabel.gtLabBB)
+ {
+ printf(" dst=BB%02u", tree->gtLabel.gtLabBB->bbNum);
+ }
+ else
+ {
+ printf(" dst=<null>");
+ }
+
+ break;
+
+ case GT_FTN_ADDR:
+ {
+ const char* methodName;
+ const char* className;
+
+ methodName = eeGetMethodName((CORINFO_METHOD_HANDLE)tree->gtFptrVal.gtFptrMethod, &className);
+ printf(" %s.%s\n", className, methodName);
+ }
+ break;
+
+#if !FEATURE_EH_FUNCLETS
+ case GT_END_LFIN:
+ printf(" endNstLvl=%d", tree->gtVal.gtVal1);
+ break;
+#endif // !FEATURE_EH_FUNCLETS
+
+ // Vanilla leaves. No qualifying information available. So do nothing
+
+ case GT_NO_OP:
+ case GT_START_NONGC:
+ case GT_PROF_HOOK:
+ case GT_CATCH_ARG:
+ case GT_MEMORYBARRIER:
+ case GT_ARGPLACE:
+ case GT_PINVOKE_PROLOG:
+#ifndef LEGACY_BACKEND
+ case GT_JMPTABLE:
+#endif // !LEGACY_BACKEND
+ break;
+
+ case GT_RET_EXPR:
+ printf("(inl return from call ");
+ printTreeID(tree->gtRetExpr.gtInlineCandidate);
+ printf(")");
+ break;
+
+ case GT_PHYSREG:
+ printf(" %s", getRegName(tree->gtPhysReg.gtSrcReg, varTypeIsFloating(tree)));
+ break;
+
+ case GT_IL_OFFSET:
+ printf(" IL offset: ");
+ if (tree->gtStmt.gtStmtILoffsx == BAD_IL_OFFSET)
+ {
+ printf("???");
+ }
+ else
+ {
+ printf("%d", jitGetILoffs(tree->gtStmt.gtStmtILoffsx));
+ }
+ break;
+
+ default:
+ assert(!"don't know how to display tree leaf node");
+ }
+
+ gtDispRegVal(tree);
+}
+
+//------------------------------------------------------------------------
+// gtDispLeaf: Print a child node to jitstdout.
+//
+// Arguments:
+// tree - the tree to be printed
+// indentStack - the specification for the current level of indentation & arcs
+// arcType - the type of arc to use for this child
+// msg - a contextual method (i.e. from the parent) to print
+// topOnly - a boolean indicating whether to print the children, or just the top node
+//
+// Return Value:
+// None.
+//
+// Notes:
+// 'indentStack' may be null, in which case no indentation or arcs are printed
+// 'msg' has a default value of null
+// 'topOnly' is an optional argument that defaults to false
+
+void Compiler::gtDispChild(GenTreePtr child,
+ IndentStack* indentStack,
+ IndentInfo arcType,
+ __in_opt const char* msg, /* = nullptr */
+ bool topOnly) /* = false */
+{
+ IndentInfo info;
+ indentStack->Push(arcType);
+ gtDispTree(child, indentStack, msg, topOnly);
+ indentStack->Pop();
+}
+
+#ifdef FEATURE_SIMD
+// Intrinsic Id to name map
+extern const char* const simdIntrinsicNames[] = {
+#define SIMD_INTRINSIC(mname, inst, id, name, r, ac, arg1, arg2, arg3, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10) name,
+#include "simdintrinsiclist.h"
+};
+#endif // FEATURE_SIMD
+
+/*****************************************************************************/
+
+void Compiler::gtDispTree(GenTreePtr tree,
+ IndentStack* indentStack, /* = nullptr */
+ __in __in_z __in_opt const char* msg, /* = nullptr */
+ bool topOnly, /* = false */
+ bool isLIR) /* = false */
+{
+ if (tree == nullptr)
+ {
+ printf(" [%08X] <NULL>\n", tree);
+ printf(""); // null string means flush
+ return;
+ }
+
+ if (indentStack == nullptr)
+ {
+ indentStack = new (this, CMK_DebugOnly) IndentStack(this);
+ }
+
+ if (IsUninitialized(tree))
+ {
+ /* Value used to initalize nodes */
+ printf("Uninitialized tree node!");
+ return;
+ }
+
+ if (tree->gtOper >= GT_COUNT)
+ {
+ gtDispNode(tree, indentStack, msg, isLIR);
+ printf("Bogus operator!");
+ return;
+ }
+
+ /* Is tree a leaf node? */
+
+ if (tree->OperIsLeaf() || tree->OperIsLocalStore()) // local stores used to be leaves
+ {
+ gtDispNode(tree, indentStack, msg, isLIR);
+ gtDispLeaf(tree, indentStack);
+ gtDispVN(tree);
+ printf("\n");
+ if (tree->OperIsLocalStore() && !topOnly)
+ {
+ gtDispChild(tree->gtOp.gtOp1, indentStack, IINone);
+ }
+ return;
+ }
+
+ // Determine what kind of arc to propagate.
+ IndentInfo myArc = IINone;
+ IndentInfo lowerArc = IINone;
+ if (indentStack->Depth() > 0)
+ {
+ myArc = indentStack->Pop();
+ switch (myArc)
+ {
+ case IIArcBottom:
+ indentStack->Push(IIArc);
+ lowerArc = IINone;
+ break;
+ case IIArc:
+ indentStack->Push(IIArc);
+ lowerArc = IIArc;
+ break;
+ case IIArcTop:
+ indentStack->Push(IINone);
+ lowerArc = IIArc;
+ break;
+ case IIEmbedded:
+ indentStack->Push(IIEmbedded);
+ lowerArc = IIEmbedded;
+ break;
+ default:
+ // Should never get here; just use IINone.
+ break;
+ }
+ }
+
+ // Special case formatting for PHI nodes -- arg lists like calls.
+
+ if (tree->OperGet() == GT_PHI)
+ {
+ gtDispNode(tree, indentStack, msg, isLIR);
+ gtDispVN(tree);
+ printf("\n");
+
+ if (!topOnly)
+ {
+ if (tree->gtOp.gtOp1 != nullptr)
+ {
+ IndentInfo arcType = IIArcTop;
+ for (GenTreeArgList* args = tree->gtOp.gtOp1->AsArgList(); args != nullptr; args = args->Rest())
+ {
+ if (args->Rest() == nullptr)
+ {
+ arcType = IIArcBottom;
+ }
+ gtDispChild(args->Current(), indentStack, arcType);
+ arcType = IIArc;
+ }
+ }
+ }
+ return;
+ }
+
+ /* Is it a 'simple' unary/binary operator? */
+
+ const char* childMsg = nullptr;
+
+ if (tree->OperIsSimple())
+ {
+ if (!topOnly)
+ {
+ if (tree->gtGetOp2())
+ {
+ // Label the childMsgs of the GT_COLON operator
+ // op2 is the then part
+
+ if (tree->gtOper == GT_COLON)
+ {
+ childMsg = "then";
+ }
+ gtDispChild(tree->gtOp.gtOp2, indentStack, IIArcTop, childMsg, topOnly);
+ }
+ }
+
+ // Now, get the right type of arc for this node
+ if (myArc != IINone)
+ {
+ indentStack->Pop();
+ indentStack->Push(myArc);
+ }
+
+ gtDispNode(tree, indentStack, msg, isLIR);
+
+ // Propagate lowerArc to the lower children.
+ if (indentStack->Depth() > 0)
+ {
+ (void)indentStack->Pop();
+ indentStack->Push(lowerArc);
+ }
+
+ if (tree->gtOper == GT_CAST)
+ {
+ /* Format a message that explains the effect of this GT_CAST */
+
+ var_types fromType = genActualType(tree->gtCast.CastOp()->TypeGet());
+ var_types toType = tree->CastToType();
+ var_types finalType = tree->TypeGet();
+
+ /* if GTF_UNSIGNED is set then force fromType to an unsigned type */
+ if (tree->gtFlags & GTF_UNSIGNED)
+ {
+ fromType = genUnsignedType(fromType);
+ }
+
+ if (finalType != toType)
+ {
+ printf(" %s <-", varTypeName(finalType));
+ }
+
+ printf(" %s <- %s", varTypeName(toType), varTypeName(fromType));
+ }
+
+ if (tree->gtOper == GT_OBJ && (tree->gtFlags & GTF_VAR_DEATH))
+ {
+ printf(" (last use)");
+ }
+ if (tree->OperIsCopyBlkOp())
+ {
+ printf(" (copy)");
+ }
+ else if (tree->OperIsInitBlkOp())
+ {
+ printf(" (init)");
+ }
+
+ IndirectAssignmentAnnotation* pIndirAnnote;
+ if (tree->gtOper == GT_ASG && GetIndirAssignMap()->Lookup(tree, &pIndirAnnote))
+ {
+ printf(" indir assign of V%02d:", pIndirAnnote->m_lclNum);
+ if (pIndirAnnote->m_isEntire)
+ {
+ printf("d:%d", pIndirAnnote->m_defSsaNum);
+ }
+ else
+ {
+ printf("ud:%d->%d", pIndirAnnote->m_useSsaNum, pIndirAnnote->m_defSsaNum);
+ }
+ }
+
+ if (tree->gtOper == GT_INTRINSIC)
+ {
+ switch (tree->gtIntrinsic.gtIntrinsicId)
+ {
+ case CORINFO_INTRINSIC_Sin:
+ printf(" sin");
+ break;
+ case CORINFO_INTRINSIC_Cos:
+ printf(" cos");
+ break;
+ case CORINFO_INTRINSIC_Sqrt:
+ printf(" sqrt");
+ break;
+ case CORINFO_INTRINSIC_Abs:
+ printf(" abs");
+ break;
+ case CORINFO_INTRINSIC_Round:
+ printf(" round");
+ break;
+ case CORINFO_INTRINSIC_Cosh:
+ printf(" cosh");
+ break;
+ case CORINFO_INTRINSIC_Sinh:
+ printf(" sinh");
+ break;
+ case CORINFO_INTRINSIC_Tan:
+ printf(" tan");
+ break;
+ case CORINFO_INTRINSIC_Tanh:
+ printf(" tanh");
+ break;
+ case CORINFO_INTRINSIC_Asin:
+ printf(" asin");
+ break;
+ case CORINFO_INTRINSIC_Acos:
+ printf(" acos");
+ break;
+ case CORINFO_INTRINSIC_Atan:
+ printf(" atan");
+ break;
+ case CORINFO_INTRINSIC_Atan2:
+ printf(" atan2");
+ break;
+ case CORINFO_INTRINSIC_Log10:
+ printf(" log10");
+ break;
+ case CORINFO_INTRINSIC_Pow:
+ printf(" pow");
+ break;
+ case CORINFO_INTRINSIC_Exp:
+ printf(" exp");
+ break;
+ case CORINFO_INTRINSIC_Ceiling:
+ printf(" ceiling");
+ break;
+ case CORINFO_INTRINSIC_Floor:
+ printf(" floor");
+ break;
+ case CORINFO_INTRINSIC_Object_GetType:
+ printf(" objGetType");
+ break;
+
+ default:
+ unreached();
+ }
+ }
+
+#ifdef FEATURE_SIMD
+ if (tree->gtOper == GT_SIMD)
+ {
+ printf(" %s %s", varTypeName(tree->gtSIMD.gtSIMDBaseType),
+ simdIntrinsicNames[tree->gtSIMD.gtSIMDIntrinsicID]);
+ }
+#endif // FEATURE_SIMD
+
+ gtDispRegVal(tree);
+ gtDispVN(tree);
+ printf("\n");
+
+ if (!topOnly && tree->gtOp.gtOp1)
+ {
+
+ // Label the child of the GT_COLON operator
+ // op1 is the else part
+
+ if (tree->gtOper == GT_COLON)
+ {
+ childMsg = "else";
+ }
+ else if (tree->gtOper == GT_QMARK)
+ {
+ childMsg = " if";
+ }
+ gtDispChild(tree->gtOp.gtOp1, indentStack, IIArcBottom, childMsg, topOnly);
+ }
+
+ return;
+ }
+
+ // Now, get the right type of arc for this node
+ if (myArc != IINone)
+ {
+ indentStack->Pop();
+ indentStack->Push(myArc);
+ }
+ gtDispNode(tree, indentStack, msg, isLIR);
+
+ // Propagate lowerArc to the lower children.
+ if (indentStack->Depth() > 0)
+ {
+ (void)indentStack->Pop();
+ indentStack->Push(lowerArc);
+ }
+
+ // See what kind of a special operator we have here, and handle its special children.
+
+ switch (tree->gtOper)
+ {
+ case GT_FIELD:
+ printf(" %s", eeGetFieldName(tree->gtField.gtFldHnd), 0);
+
+ if (tree->gtField.gtFldObj && !topOnly)
+ {
+ gtDispVN(tree);
+ printf("\n");
+ gtDispChild(tree->gtField.gtFldObj, indentStack, IIArcBottom);
+ }
+ else
+ {
+ gtDispRegVal(tree);
+ gtDispVN(tree);
+ printf("\n");
+ }
+ break;
+
+ case GT_CALL:
+ {
+ assert(tree->gtFlags & GTF_CALL);
+ unsigned numChildren = tree->NumChildren();
+ GenTree* lastChild = nullptr;
+ if (numChildren != 0)
+ {
+ lastChild = tree->GetChild(numChildren - 1);
+ }
+
+ if (tree->gtCall.gtCallType != CT_INDIRECT)
+ {
+ const char* methodName;
+ const char* className;
+
+ methodName = eeGetMethodName(tree->gtCall.gtCallMethHnd, &className);
+
+ printf(" %s.%s", className, methodName);
+ }
+
+ if ((tree->gtFlags & GTF_CALL_UNMANAGED) && (tree->gtCall.gtCallMoreFlags & GTF_CALL_M_FRAME_VAR_DEATH))
+ {
+ printf(" (FramesRoot last use)");
+ }
+
+ if (((tree->gtFlags & GTF_CALL_INLINE_CANDIDATE) != 0) && (tree->gtCall.gtInlineCandidateInfo != nullptr) &&
+ (tree->gtCall.gtInlineCandidateInfo->exactContextHnd != nullptr))
+ {
+ printf(" (exactContextHnd=0x%p)", dspPtr(tree->gtCall.gtInlineCandidateInfo->exactContextHnd));
+ }
+
+ gtDispVN(tree);
+ if (tree->IsMultiRegCall())
+ {
+ gtDispRegVal(tree);
+ }
+ printf("\n");
+
+ if (!topOnly)
+ {
+ char buf[64];
+ char* bufp;
+
+ bufp = &buf[0];
+
+ if ((tree->gtCall.gtCallObjp != nullptr) && (tree->gtCall.gtCallObjp->gtOper != GT_NOP) &&
+ (!tree->gtCall.gtCallObjp->IsArgPlaceHolderNode()))
+ {
+ if (tree->gtCall.gtCallObjp->gtOper == GT_ASG)
+ {
+ sprintf_s(bufp, sizeof(buf), "this SETUP%c", 0);
+ }
+ else
+ {
+ sprintf_s(bufp, sizeof(buf), "this in %s%c", compRegVarName(REG_ARG_0), 0);
+ }
+ gtDispChild(tree->gtCall.gtCallObjp, indentStack,
+ (tree->gtCall.gtCallObjp == lastChild) ? IIArcBottom : IIArc, bufp, topOnly);
+ }
+
+ if (tree->gtCall.gtCallArgs)
+ {
+ gtDispArgList(tree, indentStack);
+ }
+
+ if (tree->gtCall.gtCallType == CT_INDIRECT)
+ {
+ gtDispChild(tree->gtCall.gtCallAddr, indentStack,
+ (tree->gtCall.gtCallAddr == lastChild) ? IIArcBottom : IIArc, "calli tgt", topOnly);
+ }
+
+ if (tree->gtCall.gtControlExpr != nullptr)
+ {
+ gtDispChild(tree->gtCall.gtControlExpr, indentStack,
+ (tree->gtCall.gtControlExpr == lastChild) ? IIArcBottom : IIArc, "control expr",
+ topOnly);
+ }
+
+#if !FEATURE_FIXED_OUT_ARGS
+ regList list = tree->gtCall.regArgList;
+#endif
+ /* process the late argument list */
+ int lateArgIndex = 0;
+ for (GenTreeArgList* lateArgs = tree->gtCall.gtCallLateArgs; lateArgs;
+ (lateArgIndex++, lateArgs = lateArgs->Rest()))
+ {
+ GenTreePtr argx;
+
+ argx = lateArgs->Current();
+
+ IndentInfo arcType = (lateArgs->Rest() == nullptr) ? IIArcBottom : IIArc;
+ gtGetLateArgMsg(tree, argx, lateArgIndex, -1, bufp, sizeof(buf));
+ gtDispChild(argx, indentStack, arcType, bufp, topOnly);
+ }
+ }
+ }
+ break;
+
+ case GT_STMT:
+ printf("\n");
+
+ if (!topOnly)
+ {
+ gtDispChild(tree->gtStmt.gtStmtExpr, indentStack, IIArcBottom);
+ }
+ break;
+
+ case GT_ARR_ELEM:
+ gtDispVN(tree);
+ printf("\n");
+
+ if (!topOnly)
+ {
+ gtDispChild(tree->gtArrElem.gtArrObj, indentStack, IIArc, nullptr, topOnly);
+
+ unsigned dim;
+ for (dim = 0; dim < tree->gtArrElem.gtArrRank; dim++)
+ {
+ IndentInfo arcType = ((dim + 1) == tree->gtArrElem.gtArrRank) ? IIArcBottom : IIArc;
+ gtDispChild(tree->gtArrElem.gtArrInds[dim], indentStack, arcType, nullptr, topOnly);
+ }
+ }
+ break;
+
+ case GT_ARR_OFFSET:
+ gtDispVN(tree);
+ printf("\n");
+ if (!topOnly)
+ {
+ gtDispChild(tree->gtArrOffs.gtOffset, indentStack, IIArc, nullptr, topOnly);
+ gtDispChild(tree->gtArrOffs.gtIndex, indentStack, IIArc, nullptr, topOnly);
+ gtDispChild(tree->gtArrOffs.gtArrObj, indentStack, IIArcBottom, nullptr, topOnly);
+ }
+ break;
+
+ case GT_CMPXCHG:
+ gtDispVN(tree);
+ printf("\n");
+ if (!topOnly)
+ {
+ gtDispChild(tree->gtCmpXchg.gtOpLocation, indentStack, IIArc, nullptr, topOnly);
+ gtDispChild(tree->gtCmpXchg.gtOpValue, indentStack, IIArc, nullptr, topOnly);
+ gtDispChild(tree->gtCmpXchg.gtOpComparand, indentStack, IIArcBottom, nullptr, topOnly);
+ }
+ break;
+
+ case GT_ARR_BOUNDS_CHECK:
+#ifdef FEATURE_SIMD
+ case GT_SIMD_CHK:
+#endif // FEATURE_SIMD
+ gtDispVN(tree);
+ printf("\n");
+ if (!topOnly)
+ {
+ gtDispChild(tree->gtBoundsChk.gtArrLen, indentStack, IIArc, nullptr, topOnly);
+ gtDispChild(tree->gtBoundsChk.gtIndex, indentStack, IIArcBottom, nullptr, topOnly);
+ }
+ break;
+
+ case GT_STORE_DYN_BLK:
+ case GT_DYN_BLK:
+ gtDispVN(tree);
+ printf("\n");
+ if (!topOnly)
+ {
+ if (tree->gtDynBlk.Data() != nullptr)
+ {
+ gtDispChild(tree->gtDynBlk.Data(), indentStack, IIArc, nullptr, topOnly);
+ }
+ gtDispChild(tree->gtDynBlk.Addr(), indentStack, IIArc, nullptr, topOnly);
+ gtDispChild(tree->gtDynBlk.gtDynamicSize, indentStack, IIArcBottom, nullptr, topOnly);
+ }
+ if (tree->OperIsCopyBlkOp())
+ {
+ printf(" (copy)");
+ }
+ else if (tree->OperIsInitBlkOp())
+ {
+ printf(" (init)");
+ }
+ break;
+
+ default:
+ printf("<DON'T KNOW HOW TO DISPLAY THIS NODE> :");
+ printf(""); // null string means flush
+ break;
+ }
+}
+
+//------------------------------------------------------------------------
+// gtGetArgMsg: Construct a message about the given argument
+//
+// Arguments:
+// call - The call for which 'arg' is an argument
+// arg - The argument for which a message should be constructed
+// argNum - The ordinal number of the arg in the argument list
+// listCount - When printing in LIR form this is the count for a multireg GT_LIST
+// or -1 if we are not printing in LIR form
+// bufp - A pointer to the buffer into which the message is written
+// bufLength - The length of the buffer pointed to by bufp
+//
+// Return Value:
+// No return value, but bufp is written.
+//
+// Assumptions:
+// 'call' must be a call node
+// 'arg' must be an argument to 'call' (else gtArgEntryByNode will assert)
+
+void Compiler::gtGetArgMsg(
+ GenTreePtr call, GenTreePtr arg, unsigned argNum, int listCount, char* bufp, unsigned bufLength)
+{
+ if (call->gtCall.gtCallLateArgs != nullptr)
+ {
+ fgArgTabEntryPtr curArgTabEntry = gtArgEntryByArgNum(call, argNum);
+ assert(curArgTabEntry);
+
+ if (arg->gtFlags & GTF_LATE_ARG)
+ {
+ sprintf_s(bufp, bufLength, "arg%d SETUP%c", argNum, 0);
+ }
+ else
+ {
+#if FEATURE_FIXED_OUT_ARGS
+ if (listCount == -1)
+ {
+ sprintf_s(bufp, bufLength, "arg%d out+%02x%c", argNum, curArgTabEntry->slotNum * TARGET_POINTER_SIZE,
+ 0);
+ }
+ else // listCount is 0,1,2 or 3
+ {
+ assert(listCount <= MAX_ARG_REG_COUNT);
+ sprintf_s(bufp, bufLength, "arg%d out+%02x%c", argNum,
+ (curArgTabEntry->slotNum + listCount) * TARGET_POINTER_SIZE, 0);
+ }
+#else
+ sprintf_s(bufp, bufLength, "arg%d on STK%c", argNum, 0);
+#endif
+ }
+ }
+ else
+ {
+ sprintf_s(bufp, bufLength, "arg%d%c", argNum, 0);
+ }
+}
+
+//------------------------------------------------------------------------
+// gtGetLateArgMsg: Construct a message about the given argument
+//
+// Arguments:
+// call - The call for which 'arg' is an argument
+// argx - The argument for which a message should be constructed
+// lateArgIndex - The ordinal number of the arg in the lastArg list
+// listCount - When printing in LIR form this is the count for a multireg GT_LIST
+// or -1 if we are not printing in LIR form
+// bufp - A pointer to the buffer into which the message is written
+// bufLength - The length of the buffer pointed to by bufp
+//
+// Return Value:
+// No return value, but bufp is written.
+//
+// Assumptions:
+// 'call' must be a call node
+// 'arg' must be an argument to 'call' (else gtArgEntryByNode will assert)
+
+void Compiler::gtGetLateArgMsg(
+ GenTreePtr call, GenTreePtr argx, int lateArgIndex, int listCount, char* bufp, unsigned bufLength)
+{
+ assert(!argx->IsArgPlaceHolderNode()); // No place holders nodes are in gtCallLateArgs;
+
+ fgArgTabEntryPtr curArgTabEntry = gtArgEntryByLateArgIndex(call, lateArgIndex);
+ assert(curArgTabEntry);
+ regNumber argReg = curArgTabEntry->regNum;
+
+#if !FEATURE_FIXED_OUT_ARGS
+ assert(lateArgIndex < call->gtCall.regArgListCount);
+ assert(argReg == call->gtCall.regArgList[lateArgIndex]);
+#else
+ if (argReg == REG_STK)
+ {
+ sprintf_s(bufp, bufLength, "arg%d in out+%02x%c", curArgTabEntry->argNum,
+ curArgTabEntry->slotNum * TARGET_POINTER_SIZE, 0);
+ }
+ else
+#endif
+ {
+ if (gtArgIsThisPtr(curArgTabEntry))
+ {
+ sprintf_s(bufp, bufLength, "this in %s%c", compRegVarName(argReg), 0);
+ }
+ else
+ {
+#if FEATURE_MULTIREG_ARGS
+ if (curArgTabEntry->numRegs >= 2)
+ {
+ regNumber otherRegNum;
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ assert(curArgTabEntry->numRegs == 2);
+ otherRegNum = curArgTabEntry->otherRegNum;
+#else
+ otherRegNum = (regNumber)(((unsigned)curArgTabEntry->regNum) + curArgTabEntry->numRegs - 1);
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+ if (listCount == -1)
+ {
+ char seperator = (curArgTabEntry->numRegs == 2) ? ',' : '-';
+
+ sprintf_s(bufp, bufLength, "arg%d %s%c%s%c", curArgTabEntry->argNum, compRegVarName(argReg),
+ seperator, compRegVarName(otherRegNum), 0);
+ }
+ else // listCount is 0,1,2 or 3
+ {
+ assert(listCount <= MAX_ARG_REG_COUNT);
+ regNumber curReg = (listCount == 1) ? otherRegNum : (regNumber)((unsigned)(argReg) + listCount);
+ sprintf_s(bufp, bufLength, "arg%d m%d %s%c", curArgTabEntry->argNum, listCount,
+ compRegVarName(curReg), 0);
+ }
+ }
+ else
+#endif
+ {
+ sprintf_s(bufp, bufLength, "arg%d in %s%c", curArgTabEntry->argNum, compRegVarName(argReg), 0);
+ }
+ }
+ }
+}
+
+//------------------------------------------------------------------------
+// gtDispArgList: Dump the tree for a call arg list
+//
+// Arguments:
+// tree - The call for which 'arg' is an argument
+// indentStack - the specification for the current level of indentation & arcs
+//
+// Return Value:
+// None.
+//
+// Assumptions:
+// 'tree' must be a call node
+
+void Compiler::gtDispArgList(GenTreePtr tree, IndentStack* indentStack)
+{
+ GenTree* args = tree->gtCall.gtCallArgs;
+ unsigned argnum = 0;
+ const int BufLength = 256;
+ char buf[BufLength];
+ char* bufp = &buf[0];
+ unsigned numChildren = tree->NumChildren();
+ assert(numChildren != 0);
+ bool argListIsLastChild = (args == tree->GetChild(numChildren - 1));
+
+ IndentInfo arcType = IIArc;
+ if (tree->gtCall.gtCallObjp != nullptr)
+ {
+ argnum++;
+ }
+
+ while (args != nullptr)
+ {
+ assert(args->gtOper == GT_LIST);
+ GenTree* arg = args->gtOp.gtOp1;
+ if (!arg->IsNothingNode() && !arg->IsArgPlaceHolderNode())
+ {
+ gtGetArgMsg(tree, arg, argnum, -1, bufp, BufLength);
+ if (argListIsLastChild && (args->gtOp.gtOp2 == nullptr))
+ {
+ arcType = IIArcBottom;
+ }
+ gtDispChild(arg, indentStack, arcType, bufp, false);
+ }
+ args = args->gtOp.gtOp2;
+ argnum++;
+ }
+}
+
+//------------------------------------------------------------------------
+// gtDispArgList: Dump the tree for a call arg list
+//
+// Arguments:
+// tree - The call for which 'arg' is an argument
+// indentStack - the specification for the current level of indentation & arcs
+//
+// Return Value:
+// None.
+//
+// Assumptions:
+// 'tree' must be a GT_LIST node
+
+void Compiler::gtDispTreeList(GenTreePtr tree, IndentStack* indentStack /* = nullptr */)
+{
+ for (/*--*/; tree != nullptr; tree = tree->gtNext)
+ {
+ gtDispTree(tree, indentStack);
+ printf("\n");
+ }
+}
+
+//------------------------------------------------------------------------
+// Compiler::gtDispRange: dumps a range of LIR.
+//
+// Arguments:
+// range - the range of LIR to display.
+//
+void Compiler::gtDispRange(LIR::ReadOnlyRange const& range)
+{
+ for (GenTree* node : range)
+ {
+ gtDispLIRNode(node);
+ }
+}
+
+//------------------------------------------------------------------------
+// Compiler::gtDispTreeRange: dumps the LIR range that contains all of the
+// nodes in the dataflow tree rooted at a given
+// node.
+//
+// Arguments:
+// containingRange - the LIR range that contains the root node.
+// tree - the root of the dataflow tree.
+//
+void Compiler::gtDispTreeRange(LIR::Range& containingRange, GenTree* tree)
+{
+ bool unused;
+ gtDispRange(containingRange.GetTreeRange(tree, &unused));
+}
+
+//------------------------------------------------------------------------
+// Compiler::gtDispLIRNode: dumps a single LIR node.
+//
+// Arguments:
+// node - the LIR node to dump.
+//
+void Compiler::gtDispLIRNode(GenTree* node)
+{
+ auto displayOperand = [](GenTree* operand, const char* message, IndentInfo operandArc, IndentStack& indentStack) {
+ assert(operand != nullptr);
+ assert(message != nullptr);
+
+ // 49 spaces for alignment
+ printf("%-49s", "");
+
+ indentStack.Push(operandArc);
+ indentStack.print();
+ indentStack.Pop();
+ operandArc = IIArc;
+
+ printf(" t%-5d %-6s %s\n", operand->gtTreeID, varTypeName(operand->TypeGet()), message);
+
+ };
+
+ IndentStack indentStack(this);
+
+ const int bufLength = 256;
+ char buf[bufLength];
+
+ const bool nodeIsCall = node->IsCall();
+
+ int numCallEarlyArgs = 0;
+ if (nodeIsCall)
+ {
+ GenTreeCall* call = node->AsCall();
+ for (GenTreeArgList* args = call->gtCallArgs; args != nullptr; args = args->Rest())
+ {
+ if (!args->Current()->IsArgPlaceHolderNode() && args->Current()->IsValue())
+ {
+ numCallEarlyArgs++;
+ }
+ }
+ }
+
+ // Visit operands
+ IndentInfo operandArc = IIArcTop;
+ int callArgNumber = 0;
+ for (GenTree* operand : node->Operands())
+ {
+ if (operand->IsArgPlaceHolderNode() || !operand->IsValue())
+ {
+ // Either of these situations may happen with calls.
+ continue;
+ }
+
+ if (nodeIsCall)
+ {
+ GenTreeCall* call = node->AsCall();
+ if (operand == call->gtCallObjp)
+ {
+ sprintf_s(buf, sizeof(buf), "this in %s", compRegVarName(REG_ARG_0));
+ displayOperand(operand, buf, operandArc, indentStack);
+ }
+ else if (operand == call->gtCallAddr)
+ {
+ displayOperand(operand, "calli tgt", operandArc, indentStack);
+ }
+ else if (operand == call->gtControlExpr)
+ {
+ displayOperand(operand, "control expr", operandArc, indentStack);
+ }
+ else if (operand == call->gtCallCookie)
+ {
+ displayOperand(operand, "cookie", operandArc, indentStack);
+ }
+ else
+ {
+ int callLateArgNumber = callArgNumber - numCallEarlyArgs;
+ if (operand->OperGet() == GT_LIST)
+ {
+ int listIndex = 0;
+ for (GenTreeArgList* element = operand->AsArgList(); element != nullptr; element = element->Rest())
+ {
+ operand = element->Current();
+ if (callLateArgNumber < 0)
+ {
+ gtGetArgMsg(call, operand, callArgNumber, listIndex, buf, sizeof(buf));
+ }
+ else
+ {
+ gtGetLateArgMsg(call, operand, callLateArgNumber, listIndex, buf, sizeof(buf));
+ }
+
+ displayOperand(operand, buf, operandArc, indentStack);
+ operandArc = IIArc;
+ }
+ }
+ else
+ {
+ if (callLateArgNumber < 0)
+ {
+ gtGetArgMsg(call, operand, callArgNumber, -1, buf, sizeof(buf));
+ }
+ else
+ {
+ gtGetLateArgMsg(call, operand, callLateArgNumber, -1, buf, sizeof(buf));
+ }
+
+ displayOperand(operand, buf, operandArc, indentStack);
+ }
+
+ callArgNumber++;
+ }
+ }
+ else if (node->OperIsDynBlkOp())
+ {
+ if (operand == node->AsBlk()->Addr())
+ {
+ displayOperand(operand, "lhs", operandArc, indentStack);
+ }
+ else if (operand == node->AsBlk()->Data())
+ {
+ displayOperand(operand, "rhs", operandArc, indentStack);
+ }
+ else
+ {
+ assert(operand == node->AsDynBlk()->gtDynamicSize);
+ displayOperand(operand, "size", operandArc, indentStack);
+ }
+ }
+ else if (node->OperGet() == GT_DYN_BLK)
+ {
+ if (operand == node->AsBlk()->Addr())
+ {
+ displayOperand(operand, "lhs", operandArc, indentStack);
+ }
+ else
+ {
+ assert(operand == node->AsDynBlk()->gtDynamicSize);
+ displayOperand(operand, "size", operandArc, indentStack);
+ }
+ }
+ else if (node->OperIsAssignment())
+ {
+ if (operand == node->gtGetOp1())
+ {
+ displayOperand(operand, "lhs", operandArc, indentStack);
+ }
+ else
+ {
+ displayOperand(operand, "rhs", operandArc, indentStack);
+ }
+ }
+ else
+ {
+ displayOperand(operand, "", operandArc, indentStack);
+ }
+
+ operandArc = IIArc;
+ }
+
+ // Visit the operator
+ const bool topOnly = true;
+ const bool isLIR = true;
+ gtDispTree(node, &indentStack, nullptr, topOnly, isLIR);
+
+ printf("\n");
+}
+
+/*****************************************************************************/
+#endif // DEBUG
+
+/*****************************************************************************
+ *
+ * Check if the given node can be folded,
+ * and call the methods to perform the folding
+ */
+
+GenTreePtr Compiler::gtFoldExpr(GenTreePtr tree)
+{
+ unsigned kind = tree->OperKind();
+
+ /* We must have a simple operation to fold */
+
+ // If we're in CSE, it's not safe to perform tree
+ // folding given that it can will potentially
+ // change considered CSE candidates.
+ if (optValnumCSE_phase)
+ {
+ return tree;
+ }
+
+ if (!(kind & GTK_SMPOP))
+ {
+ return tree;
+ }
+
+ GenTreePtr op1 = tree->gtOp.gtOp1;
+
+ /* Filter out non-foldable trees that can have constant children */
+
+ assert(kind & (GTK_UNOP | GTK_BINOP));
+ switch (tree->gtOper)
+ {
+ case GT_RETFILT:
+ case GT_RETURN:
+ case GT_IND:
+ return tree;
+ default:
+ break;
+ }
+
+ /* try to fold the current node */
+
+ if ((kind & GTK_UNOP) && op1)
+ {
+ if (op1->OperKind() & GTK_CONST)
+ {
+ return gtFoldExprConst(tree);
+ }
+ }
+ else if ((kind & GTK_BINOP) && op1 && tree->gtOp.gtOp2 &&
+ // Don't take out conditionals for debugging
+ !((opts.compDbgCode || opts.MinOpts()) && tree->OperIsCompare()))
+ {
+ GenTreePtr op2 = tree->gtOp.gtOp2;
+
+ // The atomic operations are exempted here because they are never computable statically;
+ // one of their arguments is an address.
+ if (((op1->OperKind() & op2->OperKind()) & GTK_CONST) && !tree->OperIsAtomicOp())
+ {
+ /* both nodes are constants - fold the expression */
+ return gtFoldExprConst(tree);
+ }
+ else if ((op1->OperKind() | op2->OperKind()) & GTK_CONST)
+ {
+ /* at least one is a constant - see if we have a
+ * special operator that can use only one constant
+ * to fold - e.g. booleans */
+
+ return gtFoldExprSpecial(tree);
+ }
+ else if (tree->OperIsCompare())
+ {
+ /* comparisons of two local variables can sometimes be folded */
+
+ return gtFoldExprCompare(tree);
+ }
+ else if (op2->OperGet() == GT_COLON)
+ {
+ assert(tree->OperGet() == GT_QMARK);
+
+ GenTreePtr colon_op1 = op2->gtOp.gtOp1;
+ GenTreePtr colon_op2 = op2->gtOp.gtOp2;
+
+ if (gtCompareTree(colon_op1, colon_op2))
+ {
+ // Both sides of the GT_COLON are the same tree
+
+ GenTreePtr sideEffList = nullptr;
+ gtExtractSideEffList(op1, &sideEffList);
+
+ fgUpdateRefCntForExtract(op1, sideEffList); // Decrement refcounts for op1, Keeping any side-effects
+ fgUpdateRefCntForExtract(colon_op1, nullptr); // Decrement refcounts for colon_op1
+
+ // Clear colon flags only if the qmark itself is not conditionaly executed
+ if ((tree->gtFlags & GTF_COLON_COND) == 0)
+ {
+ fgWalkTreePre(&colon_op2, gtClearColonCond);
+ }
+
+ if (sideEffList == nullptr)
+ {
+ // No side-effects, just return colon_op2
+ return colon_op2;
+ }
+ else
+ {
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nIdentical GT_COLON trees with side effects! Extracting side effects...\n");
+ gtDispTree(sideEffList);
+ printf("\n");
+ }
+#endif
+ // Change the GT_COLON into a GT_COMMA node with the side-effects
+ op2->ChangeOper(GT_COMMA);
+ op2->gtFlags |= (sideEffList->gtFlags & GTF_ALL_EFFECT);
+ op2->gtOp.gtOp1 = sideEffList;
+ return op2;
+ }
+ }
+ }
+ }
+
+ /* Return the original node (folded/bashed or not) */
+
+ return tree;
+}
+
+/*****************************************************************************
+ *
+ * Some comparisons can be folded:
+ *
+ * locA == locA
+ * classVarA == classVarA
+ * locA + locB == locB + locA
+ *
+ */
+
+GenTreePtr Compiler::gtFoldExprCompare(GenTreePtr tree)
+{
+ GenTreePtr op1 = tree->gtOp.gtOp1;
+ GenTreePtr op2 = tree->gtOp.gtOp2;
+
+ assert(tree->OperIsCompare());
+
+ /* Filter out cases that cannot be folded here */
+
+ /* Do not fold floats or doubles (e.g. NaN != Nan) */
+
+ if (varTypeIsFloating(op1->TypeGet()))
+ {
+ return tree;
+ }
+
+ /* Currently we can only fold when the two subtrees exactly match */
+
+ if ((tree->gtFlags & GTF_SIDE_EFFECT) || GenTree::Compare(op1, op2, true) == false)
+ {
+ return tree; /* return unfolded tree */
+ }
+
+ GenTreePtr cons;
+
+ switch (tree->gtOper)
+ {
+ case GT_EQ:
+ case GT_LE:
+ case GT_GE:
+ cons = gtNewIconNode(true); /* Folds to GT_CNS_INT(true) */
+ break;
+
+ case GT_NE:
+ case GT_LT:
+ case GT_GT:
+ cons = gtNewIconNode(false); /* Folds to GT_CNS_INT(false) */
+ break;
+
+ default:
+ assert(!"Unexpected relOp");
+ return tree;
+ }
+
+ /* The node has beeen folded into 'cons' */
+
+ if (fgGlobalMorph)
+ {
+ if (!fgIsInlining())
+ {
+ fgMorphTreeDone(cons);
+ }
+ }
+ else
+ {
+ cons->gtNext = tree->gtNext;
+ cons->gtPrev = tree->gtPrev;
+ }
+ if (lvaLocalVarRefCounted)
+ {
+ lvaRecursiveDecRefCounts(tree);
+ }
+ return cons;
+}
+
+/*****************************************************************************
+ *
+ * Some binary operators can be folded even if they have only one
+ * operand constant - e.g. boolean operators, add with 0
+ * multiply with 1, etc
+ */
+
+GenTreePtr Compiler::gtFoldExprSpecial(GenTreePtr tree)
+{
+ GenTreePtr op1 = tree->gtOp.gtOp1;
+ GenTreePtr op2 = tree->gtOp.gtOp2;
+ genTreeOps oper = tree->OperGet();
+
+ GenTreePtr op, cons;
+ ssize_t val;
+
+ assert(tree->OperKind() & GTK_BINOP);
+
+ /* Filter out operators that cannot be folded here */
+ if (oper == GT_CAST)
+ {
+ return tree;
+ }
+
+ /* We only consider TYP_INT for folding
+ * Do not fold pointer arithmetic (e.g. addressing modes!) */
+
+ if (oper != GT_QMARK && !varTypeIsIntOrI(tree->gtType))
+ {
+ return tree;
+ }
+
+ /* Find out which is the constant node */
+
+ if (op1->IsCnsIntOrI())
+ {
+ op = op2;
+ cons = op1;
+ }
+ else if (op2->IsCnsIntOrI())
+ {
+ op = op1;
+ cons = op2;
+ }
+ else
+ {
+ return tree;
+ }
+
+ /* Get the constant value */
+
+ val = cons->gtIntConCommon.IconValue();
+
+ /* Here op is the non-constant operand, val is the constant,
+ first is true if the constant is op1 */
+
+ switch (oper)
+ {
+
+ case GT_EQ:
+ case GT_NE:
+ // Optimize boxed value classes; these are always false. This IL is
+ // generated when a generic value is tested against null:
+ // <T> ... foo(T x) { ... if ((object)x == null) ...
+ if (val == 0 && op->IsBoxedValue())
+ {
+ // Change the assignment node so we don't generate any code for it.
+
+ GenTreePtr asgStmt = op->gtBox.gtAsgStmtWhenInlinedBoxValue;
+ assert(asgStmt->gtOper == GT_STMT);
+ GenTreePtr asg = asgStmt->gtStmt.gtStmtExpr;
+ assert(asg->gtOper == GT_ASG);
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Bashing ");
+ printTreeID(asg);
+ printf(" to NOP as part of dead box operation\n");
+ gtDispTree(tree);
+ }
+#endif
+ asg->gtBashToNOP();
+
+ op = gtNewIconNode(oper == GT_NE);
+ if (fgGlobalMorph)
+ {
+ if (!fgIsInlining())
+ {
+ fgMorphTreeDone(op);
+ }
+ }
+ else
+ {
+ op->gtNext = tree->gtNext;
+ op->gtPrev = tree->gtPrev;
+ }
+ fgSetStmtSeq(asgStmt);
+ return op;
+ }
+ break;
+
+ case GT_ADD:
+ case GT_ASG_ADD:
+ if (val == 0)
+ {
+ goto DONE_FOLD;
+ }
+ break;
+
+ case GT_MUL:
+ case GT_ASG_MUL:
+ if (val == 1)
+ {
+ goto DONE_FOLD;
+ }
+ else if (val == 0)
+ {
+ /* Multiply by zero - return the 'zero' node, but not if side effects */
+ if (!(op->gtFlags & GTF_SIDE_EFFECT))
+ {
+ if (lvaLocalVarRefCounted)
+ {
+ lvaRecursiveDecRefCounts(op);
+ }
+ op = cons;
+ goto DONE_FOLD;
+ }
+ }
+ break;
+
+ case GT_DIV:
+ case GT_UDIV:
+ case GT_ASG_DIV:
+ if ((op2 == cons) && (val == 1) && !(op1->OperKind() & GTK_CONST))
+ {
+ goto DONE_FOLD;
+ }
+ break;
+
+ case GT_SUB:
+ case GT_ASG_SUB:
+ if ((op2 == cons) && (val == 0) && !(op1->OperKind() & GTK_CONST))
+ {
+ goto DONE_FOLD;
+ }
+ break;
+
+ case GT_AND:
+ if (val == 0)
+ {
+ /* AND with zero - return the 'zero' node, but not if side effects */
+
+ if (!(op->gtFlags & GTF_SIDE_EFFECT))
+ {
+ if (lvaLocalVarRefCounted)
+ {
+ lvaRecursiveDecRefCounts(op);
+ }
+ op = cons;
+ goto DONE_FOLD;
+ }
+ }
+ else
+ {
+ /* The GTF_BOOLEAN flag is set for nodes that are part
+ * of a boolean expression, thus all their children
+ * are known to evaluate to only 0 or 1 */
+
+ if (tree->gtFlags & GTF_BOOLEAN)
+ {
+
+ /* The constant value must be 1
+ * AND with 1 stays the same */
+ assert(val == 1);
+ goto DONE_FOLD;
+ }
+ }
+ break;
+
+ case GT_OR:
+ if (val == 0)
+ {
+ goto DONE_FOLD;
+ }
+ else if (tree->gtFlags & GTF_BOOLEAN)
+ {
+ /* The constant value must be 1 - OR with 1 is 1 */
+
+ assert(val == 1);
+
+ /* OR with one - return the 'one' node, but not if side effects */
+
+ if (!(op->gtFlags & GTF_SIDE_EFFECT))
+ {
+ if (lvaLocalVarRefCounted)
+ {
+ lvaRecursiveDecRefCounts(op);
+ }
+ op = cons;
+ goto DONE_FOLD;
+ }
+ }
+ break;
+
+ case GT_LSH:
+ case GT_RSH:
+ case GT_RSZ:
+ case GT_ROL:
+ case GT_ROR:
+ case GT_ASG_LSH:
+ case GT_ASG_RSH:
+ case GT_ASG_RSZ:
+ if (val == 0)
+ {
+ if (op2 == cons)
+ {
+ goto DONE_FOLD;
+ }
+ else if (!(op->gtFlags & GTF_SIDE_EFFECT))
+ {
+ if (lvaLocalVarRefCounted)
+ {
+ lvaRecursiveDecRefCounts(op);
+ }
+ op = cons;
+ goto DONE_FOLD;
+ }
+ }
+ break;
+
+ case GT_QMARK:
+ {
+ assert(op1 == cons && op2 == op && op2->gtOper == GT_COLON);
+ assert(op2->gtOp.gtOp1 && op2->gtOp.gtOp2);
+
+ assert(val == 0 || val == 1);
+
+ GenTree* opToDelete;
+ if (val)
+ {
+ op = op2->AsColon()->ThenNode();
+ opToDelete = op2->AsColon()->ElseNode();
+ }
+ else
+ {
+ op = op2->AsColon()->ElseNode();
+ opToDelete = op2->AsColon()->ThenNode();
+ }
+ if (lvaLocalVarRefCounted)
+ {
+ lvaRecursiveDecRefCounts(opToDelete);
+ }
+
+ // Clear colon flags only if the qmark itself is not conditionaly executed
+ if ((tree->gtFlags & GTF_COLON_COND) == 0)
+ {
+ fgWalkTreePre(&op, gtClearColonCond);
+ }
+ }
+
+ goto DONE_FOLD;
+
+ default:
+ break;
+ }
+
+ /* The node is not foldable */
+
+ return tree;
+
+DONE_FOLD:
+
+ /* The node has beeen folded into 'op' */
+
+ // If there was an assigment update, we just morphed it into
+ // a use, update the flags appropriately
+ if (op->gtOper == GT_LCL_VAR)
+ {
+ assert((tree->OperKind() & GTK_ASGOP) || (op->gtFlags & (GTF_VAR_USEASG | GTF_VAR_USEDEF | GTF_VAR_DEF)) == 0);
+
+ op->gtFlags &= ~(GTF_VAR_USEASG | GTF_VAR_USEDEF | GTF_VAR_DEF);
+ }
+
+ op->gtNext = tree->gtNext;
+ op->gtPrev = tree->gtPrev;
+
+ return op;
+}
+
+/*****************************************************************************
+ *
+ * Fold the given constant tree.
+ */
+
+#ifdef _PREFAST_
+#pragma warning(push)
+#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
+#endif
+GenTreePtr Compiler::gtFoldExprConst(GenTreePtr tree)
+{
+ unsigned kind = tree->OperKind();
+
+ SSIZE_T i1, i2, itemp;
+ INT64 lval1, lval2, ltemp;
+ float f1, f2;
+ double d1, d2;
+ var_types switchType;
+ FieldSeqNode* fieldSeq = FieldSeqStore::NotAField(); // default unless we override it when folding
+
+ assert(kind & (GTK_UNOP | GTK_BINOP));
+
+ GenTreePtr op1 = tree->gtOp.gtOp1;
+ GenTreePtr op2 = tree->gtGetOp2();
+
+ if (!opts.OptEnabled(CLFLG_CONSTANTFOLD))
+ {
+ return tree;
+ }
+
+ if (tree->OperGet() == GT_NOP)
+ {
+ return tree;
+ }
+
+#ifdef FEATURE_SIMD
+ if (tree->OperGet() == GT_SIMD)
+ {
+ return tree;
+ }
+#endif // FEATURE_SIMD
+
+ if (tree->gtOper == GT_ALLOCOBJ)
+ {
+ return tree;
+ }
+
+ if (kind & GTK_UNOP)
+ {
+ assert(op1->OperKind() & GTK_CONST);
+
+ switch (op1->gtType)
+ {
+ case TYP_INT:
+
+ /* Fold constant INT unary operator */
+ assert(op1->gtIntCon.ImmedValCanBeFolded(this, tree->OperGet()));
+ i1 = (int)op1->gtIntCon.gtIconVal;
+
+ // If we fold a unary oper, then the folded constant
+ // is considered a ConstantIndexField if op1 was one
+ //
+
+ if ((op1->gtIntCon.gtFieldSeq != nullptr) && op1->gtIntCon.gtFieldSeq->IsConstantIndexFieldSeq())
+ {
+ fieldSeq = op1->gtIntCon.gtFieldSeq;
+ }
+
+ switch (tree->gtOper)
+ {
+ case GT_NOT:
+ i1 = ~i1;
+ break;
+
+ case GT_NEG:
+ case GT_CHS:
+ i1 = -i1;
+ break;
+
+ case GT_CAST:
+ // assert (genActualType(tree->CastToType()) == tree->gtType);
+ switch (tree->CastToType())
+ {
+ case TYP_BYTE:
+ itemp = INT32(INT8(i1));
+ goto CHK_OVF;
+
+ case TYP_SHORT:
+ itemp = INT32(INT16(i1));
+ CHK_OVF:
+ if (tree->gtOverflow() && ((itemp != i1) || ((tree->gtFlags & GTF_UNSIGNED) && i1 < 0)))
+ {
+ goto INT_OVF;
+ }
+ i1 = itemp;
+ goto CNS_INT;
+
+ case TYP_CHAR:
+ itemp = INT32(UINT16(i1));
+ if (tree->gtOverflow())
+ {
+ if (itemp != i1)
+ {
+ goto INT_OVF;
+ }
+ }
+ i1 = itemp;
+ goto CNS_INT;
+
+ case TYP_BOOL:
+ case TYP_UBYTE:
+ itemp = INT32(UINT8(i1));
+ if (tree->gtOverflow())
+ {
+ if (itemp != i1)
+ {
+ goto INT_OVF;
+ }
+ }
+ i1 = itemp;
+ goto CNS_INT;
+
+ case TYP_UINT:
+ if (!(tree->gtFlags & GTF_UNSIGNED) && tree->gtOverflow() && i1 < 0)
+ {
+ goto INT_OVF;
+ }
+ goto CNS_INT;
+
+ case TYP_INT:
+ if ((tree->gtFlags & GTF_UNSIGNED) && tree->gtOverflow() && i1 < 0)
+ {
+ goto INT_OVF;
+ }
+ goto CNS_INT;
+
+ case TYP_ULONG:
+ if (!(tree->gtFlags & GTF_UNSIGNED) && tree->gtOverflow() && i1 < 0)
+ {
+ op1->ChangeOperConst(GT_CNS_NATIVELONG); // need type of oper to be same as tree
+ op1->gtType = TYP_LONG;
+ // We don't care about the value as we are throwing an exception
+ goto LNG_OVF;
+ }
+ lval1 = UINT64(UINT32(i1));
+ goto CNS_LONG;
+
+ case TYP_LONG:
+ if (tree->gtFlags & GTF_UNSIGNED)
+ {
+ lval1 = INT64(UINT32(i1));
+ }
+ else
+ {
+ lval1 = INT64(INT32(i1));
+ }
+ goto CNS_LONG;
+
+ case TYP_FLOAT:
+ if (tree->gtFlags & GTF_UNSIGNED)
+ {
+ f1 = forceCastToFloat(UINT32(i1));
+ }
+ else
+ {
+ f1 = forceCastToFloat(INT32(i1));
+ }
+ d1 = f1;
+ goto CNS_DOUBLE;
+
+ case TYP_DOUBLE:
+ if (tree->gtFlags & GTF_UNSIGNED)
+ {
+ d1 = (double)UINT32(i1);
+ }
+ else
+ {
+ d1 = (double)INT32(i1);
+ }
+ goto CNS_DOUBLE;
+
+ default:
+ assert(!"BAD_TYP");
+ break;
+ }
+ return tree;
+
+ default:
+ return tree;
+ }
+
+ goto CNS_INT;
+
+ case TYP_LONG:
+
+ /* Fold constant LONG unary operator */
+
+ assert(op1->gtIntConCommon.ImmedValCanBeFolded(this, tree->OperGet()));
+ lval1 = op1->gtIntConCommon.LngValue();
+
+ switch (tree->gtOper)
+ {
+ case GT_NOT:
+ lval1 = ~lval1;
+ break;
+
+ case GT_NEG:
+ case GT_CHS:
+ lval1 = -lval1;
+ break;
+
+ case GT_CAST:
+ assert(genActualType(tree->CastToType()) == tree->gtType);
+ switch (tree->CastToType())
+ {
+ case TYP_BYTE:
+ i1 = INT32(INT8(lval1));
+ goto CHECK_INT_OVERFLOW;
+
+ case TYP_SHORT:
+ i1 = INT32(INT16(lval1));
+ goto CHECK_INT_OVERFLOW;
+
+ case TYP_CHAR:
+ i1 = INT32(UINT16(lval1));
+ goto CHECK_UINT_OVERFLOW;
+
+ case TYP_UBYTE:
+ i1 = INT32(UINT8(lval1));
+ goto CHECK_UINT_OVERFLOW;
+
+ case TYP_INT:
+ i1 = INT32(lval1);
+
+ CHECK_INT_OVERFLOW:
+ if (tree->gtOverflow())
+ {
+ if (i1 != lval1)
+ {
+ goto INT_OVF;
+ }
+ if ((tree->gtFlags & GTF_UNSIGNED) && i1 < 0)
+ {
+ goto INT_OVF;
+ }
+ }
+ goto CNS_INT;
+
+ case TYP_UINT:
+ i1 = UINT32(lval1);
+
+ CHECK_UINT_OVERFLOW:
+ if (tree->gtOverflow() && UINT32(i1) != lval1)
+ {
+ goto INT_OVF;
+ }
+ goto CNS_INT;
+
+ case TYP_ULONG:
+ if (!(tree->gtFlags & GTF_UNSIGNED) && tree->gtOverflow() && lval1 < 0)
+ {
+ goto LNG_OVF;
+ }
+ goto CNS_LONG;
+
+ case TYP_LONG:
+ if ((tree->gtFlags & GTF_UNSIGNED) && tree->gtOverflow() && lval1 < 0)
+ {
+ goto LNG_OVF;
+ }
+ goto CNS_LONG;
+
+ case TYP_FLOAT:
+ case TYP_DOUBLE:
+ if ((tree->gtFlags & GTF_UNSIGNED) && lval1 < 0)
+ {
+ d1 = FloatingPointUtils::convertUInt64ToDouble((unsigned __int64)lval1);
+ }
+ else
+ {
+ d1 = (double)lval1;
+ }
+
+ if (tree->CastToType() == TYP_FLOAT)
+ {
+ f1 = forceCastToFloat(d1); // truncate precision
+ d1 = f1;
+ }
+ goto CNS_DOUBLE;
+ default:
+ assert(!"BAD_TYP");
+ break;
+ }
+ return tree;
+
+ default:
+ return tree;
+ }
+
+ goto CNS_LONG;
+
+ case TYP_FLOAT:
+ case TYP_DOUBLE:
+ assert(op1->gtOper == GT_CNS_DBL);
+
+ /* Fold constant DOUBLE unary operator */
+
+ d1 = op1->gtDblCon.gtDconVal;
+
+ switch (tree->gtOper)
+ {
+ case GT_NEG:
+ case GT_CHS:
+ d1 = -d1;
+ break;
+
+ case GT_CAST:
+
+ if (tree->gtOverflowEx())
+ {
+ return tree;
+ }
+
+ assert(genActualType(tree->CastToType()) == tree->gtType);
+
+ if ((op1->gtType == TYP_FLOAT && !_finite(forceCastToFloat(d1))) ||
+ (op1->gtType == TYP_DOUBLE && !_finite(d1)))
+ {
+ // The floating point constant is not finite. The ECMA spec says, in
+ // III 3.27, that "...if overflow occurs converting a floating point type
+ // to an integer, ..., the value returned is unspecified." However, it would
+ // at least be desirable to have the same value returned for casting an overflowing
+ // constant to an int as would obtained by passing that constant as a parameter
+ // then casting that parameter to an int type. We will assume that the C compiler's
+ // cast logic will yield the desired result (and trust testing to tell otherwise).
+ // Cross-compilation is an issue here; if that becomes an important scenario, we should
+ // capture the target-specific values of overflow casts to the various integral types as
+ // constants in a target-specific function.
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef _TARGET_XARCH_
+ // Don't fold conversions of +inf/-inf to integral value as the value returned by JIT helper
+ // doesn't match with the C compiler's cast result.
+ return tree;
+#else //!_TARGET_XARCH_
+
+ switch (tree->CastToType())
+ {
+ case TYP_BYTE:
+ i1 = ssize_t(INT8(d1));
+ goto CNS_INT;
+ case TYP_UBYTE:
+ i1 = ssize_t(UINT8(d1));
+ goto CNS_INT;
+ case TYP_SHORT:
+ i1 = ssize_t(INT16(d1));
+ goto CNS_INT;
+ case TYP_CHAR:
+ i1 = ssize_t(UINT16(d1));
+ goto CNS_INT;
+ case TYP_INT:
+ i1 = ssize_t(INT32(d1));
+ goto CNS_INT;
+ case TYP_UINT:
+ i1 = ssize_t(UINT32(d1));
+ goto CNS_INT;
+ case TYP_LONG:
+ lval1 = INT64(d1);
+ goto CNS_LONG;
+ case TYP_ULONG:
+ lval1 = UINT64(d1);
+ goto CNS_LONG;
+ case TYP_FLOAT:
+ case TYP_DOUBLE:
+ if (op1->gtType == TYP_FLOAT)
+ d1 = forceCastToFloat(d1); // it's only !_finite() after this conversion
+ goto CNS_DOUBLE;
+ default:
+ unreached();
+ }
+#endif //!_TARGET_XARCH_
+ }
+
+ switch (tree->CastToType())
+ {
+ case TYP_BYTE:
+ i1 = INT32(INT8(d1));
+ goto CNS_INT;
+
+ case TYP_SHORT:
+ i1 = INT32(INT16(d1));
+ goto CNS_INT;
+
+ case TYP_CHAR:
+ i1 = INT32(UINT16(d1));
+ goto CNS_INT;
+
+ case TYP_UBYTE:
+ i1 = INT32(UINT8(d1));
+ goto CNS_INT;
+
+ case TYP_INT:
+ i1 = INT32(d1);
+ goto CNS_INT;
+
+ case TYP_UINT:
+ i1 = forceCastToUInt32(d1);
+ goto CNS_INT;
+
+ case TYP_LONG:
+ lval1 = INT64(d1);
+ goto CNS_LONG;
+
+ case TYP_ULONG:
+ lval1 = FloatingPointUtils::convertDoubleToUInt64(d1);
+ goto CNS_LONG;
+
+ case TYP_FLOAT:
+ d1 = forceCastToFloat(d1);
+ goto CNS_DOUBLE;
+
+ case TYP_DOUBLE:
+ if (op1->gtType == TYP_FLOAT)
+ {
+ d1 = forceCastToFloat(d1); // truncate precision
+ }
+ goto CNS_DOUBLE; // redundant cast
+
+ default:
+ assert(!"BAD_TYP");
+ break;
+ }
+ return tree;
+
+ default:
+ return tree;
+ }
+ goto CNS_DOUBLE;
+
+ default:
+ /* not a foldable typ - e.g. RET const */
+ return tree;
+ }
+ }
+
+ /* We have a binary operator */
+
+ assert(kind & GTK_BINOP);
+ assert(op2);
+ assert(op1->OperKind() & GTK_CONST);
+ assert(op2->OperKind() & GTK_CONST);
+
+ if (tree->gtOper == GT_COMMA)
+ {
+ return op2;
+ }
+
+ if (tree->gtOper == GT_LIST)
+ {
+ return tree;
+ }
+
+ switchType = op1->gtType;
+
+ // Normally we will just switch on op1 types, but for the case where
+ // only op2 is a GC type and op1 is not a GC type, we use the op2 type.
+ // This makes us handle this as a case of folding for GC type.
+ //
+ if (varTypeIsGC(op2->gtType) && !varTypeIsGC(op1->gtType))
+ {
+ switchType = op2->gtType;
+ }
+
+ switch (switchType)
+ {
+
+ /*-------------------------------------------------------------------------
+ * Fold constant REF of BYREF binary operator
+ * These can only be comparisons or null pointers
+ */
+
+ case TYP_REF:
+
+ /* String nodes are an RVA at this point */
+
+ if (op1->gtOper == GT_CNS_STR || op2->gtOper == GT_CNS_STR)
+ {
+ return tree;
+ }
+
+ __fallthrough;
+
+ case TYP_BYREF:
+
+ i1 = op1->gtIntConCommon.IconValue();
+ i2 = op2->gtIntConCommon.IconValue();
+
+ switch (tree->gtOper)
+ {
+ case GT_EQ:
+ i1 = (i1 == i2);
+ goto FOLD_COND;
+
+ case GT_NE:
+ i1 = (i1 != i2);
+ goto FOLD_COND;
+
+ case GT_ADD:
+ noway_assert(tree->gtType != TYP_REF);
+ // We only fold a GT_ADD that involves a null reference.
+ if (((op1->TypeGet() == TYP_REF) && (i1 == 0)) || ((op2->TypeGet() == TYP_REF) && (i2 == 0)))
+ {
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nFolding operator with constant nodes into a constant:\n");
+ gtDispTree(tree);
+ }
+#endif
+ // Fold into GT_IND of null byref
+ tree->ChangeOperConst(GT_CNS_INT);
+ tree->gtType = TYP_BYREF;
+ tree->gtIntCon.gtIconVal = 0;
+ tree->gtIntCon.gtFieldSeq = FieldSeqStore::NotAField();
+ if (vnStore != nullptr)
+ {
+ fgValueNumberTreeConst(tree);
+ }
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nFolded to null byref:\n");
+ gtDispTree(tree);
+ }
+#endif
+ goto DONE;
+ }
+
+ default:
+ break;
+ }
+
+ return tree;
+
+ /*-------------------------------------------------------------------------
+ * Fold constant INT binary operator
+ */
+
+ case TYP_INT:
+
+ if (tree->OperIsCompare() && (tree->gtType == TYP_BYTE))
+ {
+ tree->gtType = TYP_INT;
+ }
+
+ assert(tree->gtType == TYP_INT || varTypeIsGC(tree->TypeGet()) || tree->gtOper == GT_MKREFANY);
+
+ // No GC pointer types should be folded here...
+ //
+ assert(!varTypeIsGC(op1->gtType) && !varTypeIsGC(op2->gtType));
+
+ assert(op1->gtIntConCommon.ImmedValCanBeFolded(this, tree->OperGet()));
+ assert(op2->gtIntConCommon.ImmedValCanBeFolded(this, tree->OperGet()));
+
+ i1 = op1->gtIntConCommon.IconValue();
+ i2 = op2->gtIntConCommon.IconValue();
+
+ switch (tree->gtOper)
+ {
+ case GT_EQ:
+ i1 = (INT32(i1) == INT32(i2));
+ break;
+ case GT_NE:
+ i1 = (INT32(i1) != INT32(i2));
+ break;
+
+ case GT_LT:
+ if (tree->gtFlags & GTF_UNSIGNED)
+ {
+ i1 = (UINT32(i1) < UINT32(i2));
+ }
+ else
+ {
+ i1 = (INT32(i1) < INT32(i2));
+ }
+ break;
+
+ case GT_LE:
+ if (tree->gtFlags & GTF_UNSIGNED)
+ {
+ i1 = (UINT32(i1) <= UINT32(i2));
+ }
+ else
+ {
+ i1 = (INT32(i1) <= INT32(i2));
+ }
+ break;
+
+ case GT_GE:
+ if (tree->gtFlags & GTF_UNSIGNED)
+ {
+ i1 = (UINT32(i1) >= UINT32(i2));
+ }
+ else
+ {
+ i1 = (INT32(i1) >= INT32(i2));
+ }
+ break;
+
+ case GT_GT:
+ if (tree->gtFlags & GTF_UNSIGNED)
+ {
+ i1 = (UINT32(i1) > UINT32(i2));
+ }
+ else
+ {
+ i1 = (INT32(i1) > INT32(i2));
+ }
+ break;
+
+ case GT_ADD:
+ itemp = i1 + i2;
+ if (tree->gtOverflow())
+ {
+ if (tree->gtFlags & GTF_UNSIGNED)
+ {
+ if (INT64(UINT32(itemp)) != INT64(UINT32(i1)) + INT64(UINT32(i2)))
+ {
+ goto INT_OVF;
+ }
+ }
+ else
+ {
+ if (INT64(INT32(itemp)) != INT64(INT32(i1)) + INT64(INT32(i2)))
+ {
+ goto INT_OVF;
+ }
+ }
+ }
+ i1 = itemp;
+ fieldSeq = GetFieldSeqStore()->Append(op1->gtIntCon.gtFieldSeq, op2->gtIntCon.gtFieldSeq);
+ break;
+ case GT_SUB:
+ itemp = i1 - i2;
+ if (tree->gtOverflow())
+ {
+ if (tree->gtFlags & GTF_UNSIGNED)
+ {
+ if (INT64(UINT32(itemp)) != ((INT64)((UINT32)i1) - (INT64)((UINT32)i2)))
+ {
+ goto INT_OVF;
+ }
+ }
+ else
+ {
+ if (INT64(INT32(itemp)) != INT64(INT32(i1)) - INT64(INT32(i2)))
+ {
+ goto INT_OVF;
+ }
+ }
+ }
+ i1 = itemp;
+ break;
+ case GT_MUL:
+ itemp = i1 * i2;
+ if (tree->gtOverflow())
+ {
+ if (tree->gtFlags & GTF_UNSIGNED)
+ {
+ if (INT64(UINT32(itemp)) != ((INT64)((UINT32)i1) * (INT64)((UINT32)i2)))
+ {
+ goto INT_OVF;
+ }
+ }
+ else
+ {
+ if (INT64(INT32(itemp)) != INT64(INT32(i1)) * INT64(INT32(i2)))
+ {
+ goto INT_OVF;
+ }
+ }
+ }
+ // For the very particular case of the "constant array index" pseudo-field, we
+ // assume that multiplication is by the field width, and preserves that field.
+ // This could obviously be made more robust by a more complicated set of annotations...
+ if ((op1->gtIntCon.gtFieldSeq != nullptr) && op1->gtIntCon.gtFieldSeq->IsConstantIndexFieldSeq())
+ {
+ assert(op2->gtIntCon.gtFieldSeq == FieldSeqStore::NotAField());
+ fieldSeq = op1->gtIntCon.gtFieldSeq;
+ }
+ else if ((op2->gtIntCon.gtFieldSeq != nullptr) &&
+ op2->gtIntCon.gtFieldSeq->IsConstantIndexFieldSeq())
+ {
+ assert(op1->gtIntCon.gtFieldSeq == FieldSeqStore::NotAField());
+ fieldSeq = op2->gtIntCon.gtFieldSeq;
+ }
+ i1 = itemp;
+ break;
+
+ case GT_OR:
+ i1 |= i2;
+ break;
+ case GT_XOR:
+ i1 ^= i2;
+ break;
+ case GT_AND:
+ i1 &= i2;
+ break;
+
+ case GT_LSH:
+ i1 <<= (i2 & 0x1f);
+ break;
+ case GT_RSH:
+ i1 >>= (i2 & 0x1f);
+ break;
+ case GT_RSZ:
+ /* logical shift -> make it unsigned to not propagate the sign bit */
+ i1 = UINT32(i1) >> (i2 & 0x1f);
+ break;
+ case GT_ROL:
+ i1 = (i1 << (i2 & 0x1f)) | (UINT32(i1) >> ((32 - i2) & 0x1f));
+ break;
+ case GT_ROR:
+ i1 = (i1 << ((32 - i2) & 0x1f)) | (UINT32(i1) >> (i2 & 0x1f));
+ break;
+
+ /* DIV and MOD can generate an INT 0 - if division by 0
+ * or overflow - when dividing MIN by -1 */
+
+ case GT_DIV:
+ case GT_MOD:
+ case GT_UDIV:
+ case GT_UMOD:
+ if (INT32(i2) == 0)
+ {
+ // Division by zero:
+ // We have to evaluate this expression and throw an exception
+ return tree;
+ }
+ else if ((INT32(i2) == -1) && (UINT32(i1) == 0x80000000))
+ {
+ // Overflow Division:
+ // We have to evaluate this expression and throw an exception
+ return tree;
+ }
+
+ if (tree->gtOper == GT_DIV)
+ {
+ i1 = INT32(i1) / INT32(i2);
+ }
+ else if (tree->gtOper == GT_MOD)
+ {
+ i1 = INT32(i1) % INT32(i2);
+ }
+ else if (tree->gtOper == GT_UDIV)
+ {
+ i1 = UINT32(i1) / UINT32(i2);
+ }
+ else
+ {
+ assert(tree->gtOper == GT_UMOD);
+ i1 = UINT32(i1) % UINT32(i2);
+ }
+ break;
+
+ default:
+ return tree;
+ }
+
+ /* We get here after folding to a GT_CNS_INT type
+ * change the node to the new type / value and make sure the node sizes are OK */
+ CNS_INT:
+ FOLD_COND:
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nFolding operator with constant nodes into a constant:\n");
+ gtDispTree(tree);
+ }
+#endif
+
+#ifdef _TARGET_64BIT_
+ // we need to properly re-sign-extend or truncate as needed.
+ if (tree->gtFlags & GTF_UNSIGNED)
+ {
+ i1 = UINT32(i1);
+ }
+ else
+ {
+ i1 = INT32(i1);
+ }
+#endif // _TARGET_64BIT_
+
+ /* Also all conditional folding jumps here since the node hanging from
+ * GT_JTRUE has to be a GT_CNS_INT - value 0 or 1 */
+
+ tree->ChangeOperConst(GT_CNS_INT);
+ tree->gtType = TYP_INT;
+ tree->gtIntCon.gtIconVal = i1;
+ tree->gtIntCon.gtFieldSeq = fieldSeq;
+ if (vnStore != nullptr)
+ {
+ fgValueNumberTreeConst(tree);
+ }
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Bashed to int constant:\n");
+ gtDispTree(tree);
+ }
+#endif
+ goto DONE;
+
+ /* This operation is going to cause an overflow exception. Morph into
+ an overflow helper. Put a dummy constant value for code generation.
+
+ We could remove all subsequent trees in the current basic block,
+ unless this node is a child of GT_COLON
+
+ NOTE: Since the folded value is not constant we should not change the
+ "tree" node - otherwise we confuse the logic that checks if the folding
+ was successful - instead use one of the operands, e.g. op1
+ */
+
+ LNG_OVF:
+ // Don't fold overflow operations if not global morph phase.
+ // The reason for this is that this optimization is replacing a gentree node
+ // with another new gentree node. Say a GT_CALL(arglist) has one 'arg'
+ // involving overflow arithmetic. During assertion prop, it is possible
+ // that the 'arg' could be constant folded and the result could lead to an
+ // overflow. In such a case 'arg' will get replaced with GT_COMMA node
+ // but fgMorphArgs() - see the logic around "if(lateArgsComputed)" - doesn't
+ // update args table. For this reason this optimization is enabled only
+ // for global morphing phase.
+ //
+ // X86/Arm32 legacy codegen note: This is not an issue on x86 for the reason that
+ // it doesn't use arg table for calls. In addition x86/arm32 legacy codegen doesn't
+ // expect long constants to show up as an operand of overflow cast operation.
+ //
+ // TODO-CQ: Once fgMorphArgs() is fixed this restriction could be removed.
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifndef LEGACY_BACKEND
+ if (!fgGlobalMorph)
+ {
+ assert(tree->gtOverflow());
+ return tree;
+ }
+#endif // !LEGACY_BACKEND
+
+ op1 = gtNewLconNode(0);
+ if (vnStore != nullptr)
+ {
+ op1->gtVNPair.SetBoth(vnStore->VNZeroForType(TYP_LONG));
+ }
+ goto OVF;
+
+ INT_OVF:
+#ifndef LEGACY_BACKEND
+ // Don't fold overflow operations if not global morph phase.
+ // The reason for this is that this optimization is replacing a gentree node
+ // with another new gentree node. Say a GT_CALL(arglist) has one 'arg'
+ // involving overflow arithmetic. During assertion prop, it is possible
+ // that the 'arg' could be constant folded and the result could lead to an
+ // overflow. In such a case 'arg' will get replaced with GT_COMMA node
+ // but fgMorphArgs() - see the logic around "if(lateArgsComputed)" - doesn't
+ // update args table. For this reason this optimization is enabled only
+ // for global morphing phase.
+ //
+ // X86/Arm32 legacy codegen note: This is not an issue on x86 for the reason that
+ // it doesn't use arg table for calls. In addition x86/arm32 legacy codegen doesn't
+ // expect long constants to show up as an operand of overflow cast operation.
+ //
+ // TODO-CQ: Once fgMorphArgs() is fixed this restriction could be removed.
+
+ if (!fgGlobalMorph)
+ {
+ assert(tree->gtOverflow());
+ return tree;
+ }
+#endif // !LEGACY_BACKEND
+
+ op1 = gtNewIconNode(0);
+ if (vnStore != nullptr)
+ {
+ op1->gtVNPair.SetBoth(vnStore->VNZeroForType(TYP_INT));
+ }
+ goto OVF;
+
+ OVF:
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nFolding binary operator with constant nodes into a comma throw:\n");
+ gtDispTree(tree);
+ }
+#endif
+ /* We will change the cast to a GT_COMMA and attach the exception helper as gtOp.gtOp1.
+ * The constant expression zero becomes op2. */
+
+ assert(tree->gtOverflow());
+ assert(tree->gtOper == GT_ADD || tree->gtOper == GT_SUB || tree->gtOper == GT_CAST ||
+ tree->gtOper == GT_MUL);
+ assert(op1);
+
+ op2 = op1;
+ op1 = gtNewHelperCallNode(CORINFO_HELP_OVERFLOW, TYP_VOID, GTF_EXCEPT,
+ gtNewArgList(gtNewIconNode(compCurBB->bbTryIndex)));
+
+ if (vnStore != nullptr)
+ {
+ op1->gtVNPair =
+ vnStore->VNPWithExc(ValueNumPair(ValueNumStore::VNForVoid(), ValueNumStore::VNForVoid()),
+ vnStore->VNPExcSetSingleton(vnStore->VNPairForFunc(TYP_REF, VNF_OverflowExc)));
+ }
+
+ tree = gtNewOperNode(GT_COMMA, tree->gtType, op1, op2);
+
+ return tree;
+
+ /*-------------------------------------------------------------------------
+ * Fold constant LONG binary operator
+ */
+
+ case TYP_LONG:
+
+ // No GC pointer types should be folded here...
+ //
+ assert(!varTypeIsGC(op1->gtType) && !varTypeIsGC(op2->gtType));
+
+ // op1 is known to be a TYP_LONG, op2 is normally a TYP_LONG, unless we have a shift operator in which case
+ // it is a TYP_INT
+ //
+ assert((op2->gtType == TYP_LONG) || (op2->gtType == TYP_INT));
+
+ assert(op1->gtIntConCommon.ImmedValCanBeFolded(this, tree->OperGet()));
+ assert(op2->gtIntConCommon.ImmedValCanBeFolded(this, tree->OperGet()));
+
+ lval1 = op1->gtIntConCommon.LngValue();
+
+ // For the shift operators we can have a op2 that is a TYP_INT and thus will be GT_CNS_INT
+ if (op2->OperGet() == GT_CNS_INT)
+ {
+ lval2 = op2->gtIntConCommon.IconValue();
+ }
+ else
+ {
+ lval2 = op2->gtIntConCommon.LngValue();
+ }
+
+ switch (tree->gtOper)
+ {
+ case GT_EQ:
+ i1 = (lval1 == lval2);
+ goto FOLD_COND;
+ case GT_NE:
+ i1 = (lval1 != lval2);
+ goto FOLD_COND;
+
+ case GT_LT:
+ if (tree->gtFlags & GTF_UNSIGNED)
+ {
+ i1 = (UINT64(lval1) < UINT64(lval2));
+ }
+ else
+ {
+ i1 = (lval1 < lval2);
+ }
+ goto FOLD_COND;
+
+ case GT_LE:
+ if (tree->gtFlags & GTF_UNSIGNED)
+ {
+ i1 = (UINT64(lval1) <= UINT64(lval2));
+ }
+ else
+ {
+ i1 = (lval1 <= lval2);
+ }
+ goto FOLD_COND;
+
+ case GT_GE:
+ if (tree->gtFlags & GTF_UNSIGNED)
+ {
+ i1 = (UINT64(lval1) >= UINT64(lval2));
+ }
+ else
+ {
+ i1 = (lval1 >= lval2);
+ }
+ goto FOLD_COND;
+
+ case GT_GT:
+ if (tree->gtFlags & GTF_UNSIGNED)
+ {
+ i1 = (UINT64(lval1) > UINT64(lval2));
+ }
+ else
+ {
+ i1 = (lval1 > lval2);
+ }
+ goto FOLD_COND;
+
+ case GT_ADD:
+ ltemp = lval1 + lval2;
+
+ LNG_ADD_CHKOVF:
+ /* For the SIGNED case - If there is one positive and one negative operand, there can be no overflow
+ * If both are positive, the result has to be positive, and similary for negatives.
+ *
+ * For the UNSIGNED case - If a UINT32 operand is bigger than the result then OVF */
+
+ if (tree->gtOverflow())
+ {
+ if (tree->gtFlags & GTF_UNSIGNED)
+ {
+ if ((UINT64(lval1) > UINT64(ltemp)) || (UINT64(lval2) > UINT64(ltemp)))
+ {
+ goto LNG_OVF;
+ }
+ }
+ else if (((lval1 < 0) == (lval2 < 0)) && ((lval1 < 0) != (ltemp < 0)))
+ {
+ goto LNG_OVF;
+ }
+ }
+ lval1 = ltemp;
+ break;
+
+ case GT_SUB:
+ ltemp = lval1 - lval2;
+ if (tree->gtOverflow())
+ {
+ if (tree->gtFlags & GTF_UNSIGNED)
+ {
+ if (UINT64(lval2) > UINT64(lval1))
+ {
+ goto LNG_OVF;
+ }
+ }
+ else
+ {
+ /* If both operands are +ve or both are -ve, there can be no
+ overflow. Else use the logic for : lval1 + (-lval2) */
+
+ if ((lval1 < 0) != (lval2 < 0))
+ {
+ if (lval2 == INT64_MIN)
+ {
+ goto LNG_OVF;
+ }
+ lval2 = -lval2;
+ goto LNG_ADD_CHKOVF;
+ }
+ }
+ }
+ lval1 = ltemp;
+ break;
+
+ case GT_MUL:
+ ltemp = lval1 * lval2;
+
+ if (tree->gtOverflow() && lval2 != 0)
+ {
+
+ if (tree->gtFlags & GTF_UNSIGNED)
+ {
+ UINT64 ultemp = ltemp;
+ UINT64 ulval1 = lval1;
+ UINT64 ulval2 = lval2;
+ if ((ultemp / ulval2) != ulval1)
+ {
+ goto LNG_OVF;
+ }
+ }
+ else
+ {
+ // This does a multiply and then reverses it. This test works great except for MIN_INT *
+ //-1. In that case we mess up the sign on ltmp. Make sure to double check the sign.
+ // if either is 0, then no overflow
+ if (lval1 != 0) // lval2 checked above.
+ {
+ if (((lval1 < 0) == (lval2 < 0)) && (ltemp < 0))
+ {
+ goto LNG_OVF;
+ }
+ if (((lval1 < 0) != (lval2 < 0)) && (ltemp > 0))
+ {
+ goto LNG_OVF;
+ }
+
+ // TODO-Amd64-Unix: Remove the code that disables optimizations for this method when the
+ // clang
+ // optimizer is fixed and/or the method implementation is refactored in a simpler code.
+ // There is a bug in the clang-3.5 optimizer. The issue is that in release build the
+ // optimizer is mistyping (or just wrongly decides to use 32 bit operation for a corner
+ // case of MIN_LONG) the args of the (ltemp / lval2) to int (it does a 32 bit div
+ // operation instead of 64 bit.). For the case of lval1 and lval2 equal to MIN_LONG
+ // (0x8000000000000000) this results in raising a SIGFPE.
+ // Optimizations disabled for now. See compiler.h.
+ if ((ltemp / lval2) != lval1)
+ {
+ goto LNG_OVF;
+ }
+ }
+ }
+ }
+
+ lval1 = ltemp;
+ break;
+
+ case GT_OR:
+ lval1 |= lval2;
+ break;
+ case GT_XOR:
+ lval1 ^= lval2;
+ break;
+ case GT_AND:
+ lval1 &= lval2;
+ break;
+
+ case GT_LSH:
+ lval1 <<= (lval2 & 0x3f);
+ break;
+ case GT_RSH:
+ lval1 >>= (lval2 & 0x3f);
+ break;
+ case GT_RSZ:
+ /* logical shift -> make it unsigned to not propagate the sign bit */
+ lval1 = UINT64(lval1) >> (lval2 & 0x3f);
+ break;
+ case GT_ROL:
+ lval1 = (lval1 << (lval2 & 0x3f)) | (UINT64(lval1) >> ((64 - lval2) & 0x3f));
+ break;
+ case GT_ROR:
+ lval1 = (lval1 << ((64 - lval2) & 0x3f)) | (UINT64(lval1) >> (lval2 & 0x3f));
+ break;
+
+ // Both DIV and IDIV on x86 raise an exception for min_int (and min_long) / -1. So we preserve
+ // that behavior here.
+ case GT_DIV:
+ if (!lval2)
+ {
+ return tree;
+ }
+
+ if (UINT64(lval1) == UI64(0x8000000000000000) && lval2 == INT64(-1))
+ {
+ return tree;
+ }
+ lval1 /= lval2;
+ break;
+
+ case GT_MOD:
+ if (!lval2)
+ {
+ return tree;
+ }
+ if (UINT64(lval1) == UI64(0x8000000000000000) && lval2 == INT64(-1))
+ {
+ return tree;
+ }
+ lval1 %= lval2;
+ break;
+
+ case GT_UDIV:
+ if (!lval2)
+ {
+ return tree;
+ }
+ if (UINT64(lval1) == UI64(0x8000000000000000) && lval2 == INT64(-1))
+ {
+ return tree;
+ }
+ lval1 = UINT64(lval1) / UINT64(lval2);
+ break;
+
+ case GT_UMOD:
+ if (!lval2)
+ {
+ return tree;
+ }
+ if (UINT64(lval1) == UI64(0x8000000000000000) && lval2 == INT64(-1))
+ {
+ return tree;
+ }
+ lval1 = UINT64(lval1) % UINT64(lval2);
+ break;
+ default:
+ return tree;
+ }
+
+ CNS_LONG:
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nFolding long operator with constant nodes into a constant:\n");
+ gtDispTree(tree);
+ }
+#endif
+ assert((GenTree::s_gtNodeSizes[GT_CNS_NATIVELONG] == TREE_NODE_SZ_SMALL) ||
+ (tree->gtDebugFlags & GTF_DEBUG_NODE_LARGE));
+
+ tree->ChangeOperConst(GT_CNS_NATIVELONG);
+ tree->gtIntConCommon.SetLngValue(lval1);
+ if (vnStore != nullptr)
+ {
+ fgValueNumberTreeConst(tree);
+ }
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Bashed to long constant:\n");
+ gtDispTree(tree);
+ }
+#endif
+ goto DONE;
+
+ /*-------------------------------------------------------------------------
+ * Fold constant FLOAT or DOUBLE binary operator
+ */
+
+ case TYP_FLOAT:
+ case TYP_DOUBLE:
+
+ if (tree->gtOverflowEx())
+ {
+ return tree;
+ }
+
+ assert(op1->gtOper == GT_CNS_DBL);
+ d1 = op1->gtDblCon.gtDconVal;
+
+ assert(varTypeIsFloating(op2->gtType));
+ assert(op2->gtOper == GT_CNS_DBL);
+ d2 = op2->gtDblCon.gtDconVal;
+
+ /* Special case - check if we have NaN operands.
+ * For comparisons if not an unordered operation always return 0.
+ * For unordered operations (i.e. the GTF_RELOP_NAN_UN flag is set)
+ * the result is always true - return 1. */
+
+ if (_isnan(d1) || _isnan(d2))
+ {
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Double operator(s) is NaN\n");
+ }
+#endif
+ if (tree->OperKind() & GTK_RELOP)
+ {
+ if (tree->gtFlags & GTF_RELOP_NAN_UN)
+ {
+ /* Unordered comparison with NaN always succeeds */
+ i1 = 1;
+ goto FOLD_COND;
+ }
+ else
+ {
+ /* Normal comparison with NaN always fails */
+ i1 = 0;
+ goto FOLD_COND;
+ }
+ }
+ }
+
+ switch (tree->gtOper)
+ {
+ case GT_EQ:
+ i1 = (d1 == d2);
+ goto FOLD_COND;
+ case GT_NE:
+ i1 = (d1 != d2);
+ goto FOLD_COND;
+
+ case GT_LT:
+ i1 = (d1 < d2);
+ goto FOLD_COND;
+ case GT_LE:
+ i1 = (d1 <= d2);
+ goto FOLD_COND;
+ case GT_GE:
+ i1 = (d1 >= d2);
+ goto FOLD_COND;
+ case GT_GT:
+ i1 = (d1 > d2);
+ goto FOLD_COND;
+
+#if FEATURE_STACK_FP_X87
+ case GT_ADD:
+ d1 += d2;
+ break;
+ case GT_SUB:
+ d1 -= d2;
+ break;
+ case GT_MUL:
+ d1 *= d2;
+ break;
+ case GT_DIV:
+ if (!d2)
+ return tree;
+ d1 /= d2;
+ break;
+#else //! FEATURE_STACK_FP_X87
+ // non-x86 arch: floating point arithmetic should be done in declared
+ // precision while doing constant folding. For this reason though TYP_FLOAT
+ // constants are stored as double constants, while performing float arithmetic,
+ // double constants should be converted to float. Here is an example case
+ // where performing arithmetic in double precision would lead to incorrect
+ // results.
+ //
+ // Example:
+ // float a = float.MaxValue;
+ // float b = a*a; This will produce +inf in single precision and 1.1579207543382391e+077 in double
+ // precision.
+ // flaot c = b/b; This will produce NaN in single precision and 1 in double precision.
+ case GT_ADD:
+ if (op1->TypeGet() == TYP_FLOAT)
+ {
+ f1 = forceCastToFloat(d1);
+ f2 = forceCastToFloat(d2);
+ d1 = f1 + f2;
+ }
+ else
+ {
+ d1 += d2;
+ }
+ break;
+
+ case GT_SUB:
+ if (op1->TypeGet() == TYP_FLOAT)
+ {
+ f1 = forceCastToFloat(d1);
+ f2 = forceCastToFloat(d2);
+ d1 = f1 - f2;
+ }
+ else
+ {
+ d1 -= d2;
+ }
+ break;
+
+ case GT_MUL:
+ if (op1->TypeGet() == TYP_FLOAT)
+ {
+ f1 = forceCastToFloat(d1);
+ f2 = forceCastToFloat(d2);
+ d1 = f1 * f2;
+ }
+ else
+ {
+ d1 *= d2;
+ }
+ break;
+
+ case GT_DIV:
+ if (!d2)
+ {
+ return tree;
+ }
+ if (op1->TypeGet() == TYP_FLOAT)
+ {
+ f1 = forceCastToFloat(d1);
+ f2 = forceCastToFloat(d2);
+ d1 = f1 / f2;
+ }
+ else
+ {
+ d1 /= d2;
+ }
+ break;
+#endif //! FEATURE_STACK_FP_X87
+
+ default:
+ return tree;
+ }
+
+ CNS_DOUBLE:
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nFolding fp operator with constant nodes into a fp constant:\n");
+ gtDispTree(tree);
+ }
+#endif
+
+ assert((GenTree::s_gtNodeSizes[GT_CNS_DBL] == TREE_NODE_SZ_SMALL) ||
+ (tree->gtDebugFlags & GTF_DEBUG_NODE_LARGE));
+
+ tree->ChangeOperConst(GT_CNS_DBL);
+ tree->gtDblCon.gtDconVal = d1;
+ if (vnStore != nullptr)
+ {
+ fgValueNumberTreeConst(tree);
+ }
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Bashed to fp constant:\n");
+ gtDispTree(tree);
+ }
+#endif
+ goto DONE;
+
+ default:
+ /* not a foldable typ */
+ return tree;
+ }
+
+//-------------------------------------------------------------------------
+
+DONE:
+
+ /* Make sure no side effect flags are set on this constant node */
+
+ tree->gtFlags &= ~GTF_ALL_EFFECT;
+
+ return tree;
+}
+#ifdef _PREFAST_
+#pragma warning(pop)
+#endif
+
+/*****************************************************************************
+ *
+ * Create an assignment of the given value to a temp.
+ */
+
+GenTreePtr Compiler::gtNewTempAssign(unsigned tmp, GenTreePtr val)
+{
+ LclVarDsc* varDsc = lvaTable + tmp;
+
+ if (varDsc->TypeGet() == TYP_I_IMPL && val->TypeGet() == TYP_BYREF)
+ {
+ impBashVarAddrsToI(val);
+ }
+
+ var_types valTyp = val->TypeGet();
+ if (val->OperGet() == GT_LCL_VAR && lvaTable[val->gtLclVar.gtLclNum].lvNormalizeOnLoad())
+ {
+ valTyp = lvaGetRealType(val->gtLclVar.gtLclNum);
+ val = gtNewLclvNode(val->gtLclVar.gtLclNum, valTyp, val->gtLclVar.gtLclILoffs);
+ }
+ var_types dstTyp = varDsc->TypeGet();
+
+ /* If the variable's lvType is not yet set then set it here */
+ if (dstTyp == TYP_UNDEF)
+ {
+ varDsc->lvType = dstTyp = genActualType(valTyp);
+ if (varTypeIsGC(dstTyp))
+ {
+ varDsc->lvStructGcCount = 1;
+ }
+#if FEATURE_SIMD
+ else if (varTypeIsSIMD(dstTyp))
+ {
+ varDsc->lvSIMDType = 1;
+ }
+#endif
+ }
+
+#ifdef DEBUG
+ /* Make sure the actual types match */
+ if (genActualType(valTyp) != genActualType(dstTyp))
+ {
+ // Plus some other exceptions that are apparently legal:
+ // 1) TYP_REF or BYREF = TYP_I_IMPL
+ bool ok = false;
+ if (varTypeIsGC(dstTyp) && (valTyp == TYP_I_IMPL))
+ {
+ ok = true;
+ }
+ // 2) TYP_DOUBLE = TYP_FLOAT or TYP_FLOAT = TYP_DOUBLE
+ else if (varTypeIsFloating(dstTyp) && varTypeIsFloating(valTyp))
+ {
+ ok = true;
+ }
+
+ if (!ok)
+ {
+ gtDispTree(val);
+ assert(!"Incompatible types for gtNewTempAssign");
+ }
+ }
+#endif
+
+ // Floating Point assignments can be created during inlining
+ // see "Zero init inlinee locals:" in fgInlinePrependStatements
+ // thus we may need to set compFloatingPointUsed to true here.
+ //
+ if (varTypeIsFloating(dstTyp) && (compFloatingPointUsed == false))
+ {
+ compFloatingPointUsed = true;
+ }
+
+ /* Create the assignment node */
+
+ GenTreePtr asg;
+ GenTreePtr dest = gtNewLclvNode(tmp, dstTyp);
+ dest->gtFlags |= GTF_VAR_DEF;
+
+ // With first-class structs, we should be propagating the class handle on all non-primitive
+ // struct types. We don't have a convenient way to do that for all SIMD temps, since some
+ // internal trees use SIMD types that are not used by the input IL. In this case, we allow
+ // a null type handle and derive the necessary information about the type from its varType.
+ CORINFO_CLASS_HANDLE structHnd = gtGetStructHandleIfPresent(val);
+ if (varTypeIsStruct(valTyp) && ((structHnd != NO_CLASS_HANDLE) || (varTypeIsSIMD(valTyp))))
+ {
+ // The GT_OBJ may be be a child of a GT_COMMA.
+ GenTreePtr valx = val->gtEffectiveVal(/*commaOnly*/ true);
+
+ if (valx->gtOper == GT_OBJ)
+ {
+ assert(structHnd != nullptr);
+ lvaSetStruct(tmp, structHnd, false);
+ }
+ dest->gtFlags |= GTF_DONT_CSE;
+ valx->gtFlags |= GTF_DONT_CSE;
+ asg = impAssignStruct(dest, val, structHnd, (unsigned)CHECK_SPILL_NONE);
+ }
+ else
+ {
+ asg = gtNewAssignNode(dest, val);
+ }
+
+#ifndef LEGACY_BACKEND
+ if (compRationalIRForm)
+ {
+ Rationalizer::RewriteAssignmentIntoStoreLcl(asg->AsOp());
+ }
+#endif // !LEGACY_BACKEND
+
+ return asg;
+}
+
+/*****************************************************************************
+ *
+ * Create a helper call to access a COM field (iff 'assg' is non-zero this is
+ * an assignment and 'assg' is the new value).
+ */
+
+GenTreePtr Compiler::gtNewRefCOMfield(GenTreePtr objPtr,
+ CORINFO_RESOLVED_TOKEN* pResolvedToken,
+ CORINFO_ACCESS_FLAGS access,
+ CORINFO_FIELD_INFO* pFieldInfo,
+ var_types lclTyp,
+ CORINFO_CLASS_HANDLE structType,
+ GenTreePtr assg)
+{
+ assert(pFieldInfo->fieldAccessor == CORINFO_FIELD_INSTANCE_HELPER ||
+ pFieldInfo->fieldAccessor == CORINFO_FIELD_INSTANCE_ADDR_HELPER ||
+ pFieldInfo->fieldAccessor == CORINFO_FIELD_STATIC_ADDR_HELPER);
+
+ /* If we can't access it directly, we need to call a helper function */
+ GenTreeArgList* args = nullptr;
+ var_types helperType = TYP_BYREF;
+
+ if (pFieldInfo->fieldAccessor == CORINFO_FIELD_INSTANCE_HELPER)
+ {
+ if (access & CORINFO_ACCESS_SET)
+ {
+ assert(assg != nullptr);
+ // helper needs pointer to struct, not struct itself
+ if (pFieldInfo->helper == CORINFO_HELP_SETFIELDSTRUCT)
+ {
+ assert(structType != nullptr);
+ assg = impGetStructAddr(assg, structType, (unsigned)CHECK_SPILL_ALL, true);
+ }
+ else if (lclTyp == TYP_DOUBLE && assg->TypeGet() == TYP_FLOAT)
+ {
+ assg = gtNewCastNode(TYP_DOUBLE, assg, TYP_DOUBLE);
+ }
+ else if (lclTyp == TYP_FLOAT && assg->TypeGet() == TYP_DOUBLE)
+ {
+ assg = gtNewCastNode(TYP_FLOAT, assg, TYP_FLOAT);
+ }
+
+ args = gtNewArgList(assg);
+ helperType = TYP_VOID;
+ }
+ else if (access & CORINFO_ACCESS_GET)
+ {
+ helperType = lclTyp;
+
+ // The calling convention for the helper does not take into
+ // account optimization of primitive structs.
+ if ((pFieldInfo->helper == CORINFO_HELP_GETFIELDSTRUCT) && !varTypeIsStruct(lclTyp))
+ {
+ helperType = TYP_STRUCT;
+ }
+ }
+ }
+
+ if (pFieldInfo->helper == CORINFO_HELP_GETFIELDSTRUCT || pFieldInfo->helper == CORINFO_HELP_SETFIELDSTRUCT)
+ {
+ assert(pFieldInfo->structType != nullptr);
+ args = gtNewListNode(gtNewIconEmbClsHndNode(pFieldInfo->structType), args);
+ }
+
+ GenTreePtr fieldHnd = impTokenToHandle(pResolvedToken);
+ if (fieldHnd == nullptr)
+ { // compDonotInline()
+ return nullptr;
+ }
+
+ args = gtNewListNode(fieldHnd, args);
+
+ // If it's a static field, we shouldn't have an object node
+ // If it's an instance field, we have an object node
+ assert((pFieldInfo->fieldAccessor != CORINFO_FIELD_STATIC_ADDR_HELPER) ^ (objPtr == nullptr));
+
+ if (objPtr != nullptr)
+ {
+ args = gtNewListNode(objPtr, args);
+ }
+
+ GenTreePtr tree = gtNewHelperCallNode(pFieldInfo->helper, genActualType(helperType), 0, args);
+
+ if (pFieldInfo->fieldAccessor == CORINFO_FIELD_INSTANCE_HELPER)
+ {
+ if (access & CORINFO_ACCESS_GET)
+ {
+ if (pFieldInfo->helper == CORINFO_HELP_GETFIELDSTRUCT)
+ {
+ if (!varTypeIsStruct(lclTyp))
+ {
+ // get the result as primitive type
+ tree = impGetStructAddr(tree, structType, (unsigned)CHECK_SPILL_ALL, true);
+ tree = gtNewOperNode(GT_IND, lclTyp, tree);
+ }
+ }
+ else if (varTypeIsIntegral(lclTyp) && genTypeSize(lclTyp) < genTypeSize(TYP_INT))
+ {
+ // The helper does not extend the small return types.
+ tree = gtNewCastNode(genActualType(lclTyp), tree, lclTyp);
+ }
+ }
+ }
+ else
+ {
+ // OK, now do the indirection
+ if (access & CORINFO_ACCESS_GET)
+ {
+ if (varTypeIsStruct(lclTyp))
+ {
+ tree = gtNewObjNode(structType, tree);
+ }
+ else
+ {
+ tree = gtNewOperNode(GT_IND, lclTyp, tree);
+ }
+ tree->gtFlags |= (GTF_EXCEPT | GTF_GLOB_REF);
+ }
+ else if (access & CORINFO_ACCESS_SET)
+ {
+ if (varTypeIsStruct(lclTyp))
+ {
+ tree = impAssignStructPtr(tree, assg, structType, (unsigned)CHECK_SPILL_ALL);
+ }
+ else
+ {
+ tree = gtNewOperNode(GT_IND, lclTyp, tree);
+ tree->gtFlags |= (GTF_EXCEPT | GTF_GLOB_REF | GTF_IND_TGTANYWHERE);
+ tree = gtNewAssignNode(tree, assg);
+ }
+ }
+ }
+
+ return (tree);
+}
+
+/*****************************************************************************
+ *
+ * Return true if the given node (excluding children trees) contains side effects.
+ * Note that it does not recurse, and children need to be handled separately.
+ * It may return false even if the node has GTF_SIDE_EFFECT (because of its children).
+ *
+ * Similar to OperMayThrow() (but handles GT_CALLs specially), but considers
+ * assignments too.
+ */
+
+bool Compiler::gtNodeHasSideEffects(GenTreePtr tree, unsigned flags)
+{
+ if (flags & GTF_ASG)
+ {
+ if ((tree->OperKind() & GTK_ASGOP))
+ {
+ return true;
+ }
+ }
+
+ // Are there only GTF_CALL side effects remaining? (and no other side effect kinds)
+ if (flags & GTF_CALL)
+ {
+ if (tree->OperGet() == GT_CALL)
+ {
+ // Generally all GT_CALL nodes are considered to have side-effects.
+ // But we may have a helper call that doesn't have any important side effects.
+ //
+ if (tree->gtCall.gtCallType == CT_HELPER)
+ {
+ // But if this tree is a helper call we may not care about the side-effects
+ //
+ CorInfoHelpFunc helper = eeGetHelperNum(tree->AsCall()->gtCallMethHnd);
+
+ // We definitely care about the side effects if MutatesHeap is true
+ //
+ if (s_helperCallProperties.MutatesHeap(helper))
+ {
+ return true;
+ }
+
+ // with GTF_IS_IN_CSE we will CSE helper calls that can run cctors.
+ //
+ if (((flags & GTF_IS_IN_CSE) == 0) && (s_helperCallProperties.MayRunCctor(helper)))
+ {
+ return true;
+ }
+
+ // If we also care about exceptions then check if the helper can throw
+ //
+ if (((flags & GTF_EXCEPT) != 0) && !s_helperCallProperties.NoThrow(helper))
+ {
+ return true;
+ }
+
+ // If this is a Pure helper call or an allocator (that will not need to run a finalizer)
+ // then we don't need to preserve the side effects (of this call -- we may care about those of the
+ // arguments).
+ if (s_helperCallProperties.IsPure(helper) ||
+ (s_helperCallProperties.IsAllocator(helper) && !s_helperCallProperties.MayFinalize(helper)))
+ {
+ GenTreeCall* call = tree->AsCall();
+ for (GenTreeArgList* args = call->gtCallArgs; args != nullptr; args = args->Rest())
+ {
+ if (gtTreeHasSideEffects(args->Current(), flags))
+ {
+ return true;
+ }
+ }
+ // I'm a little worried that args that assign to temps that are late args will look like
+ // side effects...but better to be conservative for now.
+ for (GenTreeArgList* args = call->gtCallLateArgs; args != nullptr; args = args->Rest())
+ {
+ if (gtTreeHasSideEffects(args->Current(), flags))
+ {
+ return true;
+ }
+ }
+ // Otherwise:
+ return false;
+ }
+ }
+
+ // Otherwise the GT_CALL is considered to have side-effects.
+ return true;
+ }
+ }
+
+ if (flags & GTF_EXCEPT)
+ {
+ if (tree->OperMayThrow())
+ {
+ return true;
+ }
+ }
+
+ // Expressions declared as CSE by (e.g.) hoisting code are considered to have relevant side
+ // effects (if we care about GTF_MAKE_CSE).
+ if ((flags & GTF_MAKE_CSE) && (tree->gtFlags & GTF_MAKE_CSE))
+ {
+ return true;
+ }
+
+ return false;
+}
+
+/*****************************************************************************
+ * Returns true if the expr tree has any side effects.
+ */
+
+bool Compiler::gtTreeHasSideEffects(GenTreePtr tree, unsigned flags /* = GTF_SIDE_EFFECT*/)
+{
+ // These are the side effect flags that we care about for this tree
+ unsigned sideEffectFlags = tree->gtFlags & flags;
+
+ // Does this tree have any Side-effect flags set that we care about?
+ if (sideEffectFlags == 0)
+ {
+ // no it doesn't..
+ return false;
+ }
+
+ if (sideEffectFlags == GTF_CALL)
+ {
+ if (tree->OperGet() == GT_CALL)
+ {
+ // Generally all trees that contain GT_CALL nodes are considered to have side-effects.
+ //
+ if (tree->gtCall.gtCallType == CT_HELPER)
+ {
+ // If this node is a helper call we may not care about the side-effects.
+ // Note that gtNodeHasSideEffects checks the side effects of the helper itself
+ // as well as the side effects of its arguments.
+ return gtNodeHasSideEffects(tree, flags);
+ }
+ }
+ else if (tree->OperGet() == GT_INTRINSIC)
+ {
+ if (gtNodeHasSideEffects(tree, flags))
+ {
+ return true;
+ }
+
+ if (gtNodeHasSideEffects(tree->gtOp.gtOp1, flags))
+ {
+ return true;
+ }
+
+ if ((tree->gtOp.gtOp2 != nullptr) && gtNodeHasSideEffects(tree->gtOp.gtOp2, flags))
+ {
+ return true;
+ }
+
+ return false;
+ }
+ }
+
+ return true;
+}
+
+GenTreePtr Compiler::gtBuildCommaList(GenTreePtr list, GenTreePtr expr)
+{
+ // 'list' starts off as null,
+ // and when it is null we haven't started the list yet.
+ //
+ if (list != nullptr)
+ {
+ // Create a GT_COMMA that appends 'expr' in front of the remaining set of expressions in (*list)
+ GenTreePtr result = gtNewOperNode(GT_COMMA, TYP_VOID, expr, list);
+
+ // Set the flags in the comma node
+ result->gtFlags |= (list->gtFlags & GTF_ALL_EFFECT);
+ result->gtFlags |= (expr->gtFlags & GTF_ALL_EFFECT);
+
+ // 'list' and 'expr' should have valuenumbers defined for both or for neither one
+ noway_assert(list->gtVNPair.BothDefined() == expr->gtVNPair.BothDefined());
+
+ // Set the ValueNumber 'gtVNPair' for the new GT_COMMA node
+ //
+ if (expr->gtVNPair.BothDefined())
+ {
+ // The result of a GT_COMMA node is op2, the normal value number is op2vnp
+ // But we also need to include the union of side effects from op1 and op2.
+ // we compute this value into exceptions_vnp.
+ ValueNumPair op1vnp;
+ ValueNumPair op1Xvnp = ValueNumStore::VNPForEmptyExcSet();
+ ValueNumPair op2vnp;
+ ValueNumPair op2Xvnp = ValueNumStore::VNPForEmptyExcSet();
+
+ vnStore->VNPUnpackExc(expr->gtVNPair, &op1vnp, &op1Xvnp);
+ vnStore->VNPUnpackExc(list->gtVNPair, &op2vnp, &op2Xvnp);
+
+ ValueNumPair exceptions_vnp = ValueNumStore::VNPForEmptyExcSet();
+
+ exceptions_vnp = vnStore->VNPExcSetUnion(exceptions_vnp, op1Xvnp);
+ exceptions_vnp = vnStore->VNPExcSetUnion(exceptions_vnp, op2Xvnp);
+
+ result->gtVNPair = vnStore->VNPWithExc(op2vnp, exceptions_vnp);
+ }
+
+ return result;
+ }
+ else
+ {
+ // The 'expr' will start the list of expressions
+ return expr;
+ }
+}
+
+/*****************************************************************************
+ *
+ * Extracts side effects from the given expression
+ * and appends them to a given list (actually a GT_COMMA list)
+ * If ignore root is specified, the method doesn't treat the top
+ * level tree node as having side-effect.
+ */
+
+void Compiler::gtExtractSideEffList(GenTreePtr expr,
+ GenTreePtr* pList,
+ unsigned flags /* = GTF_SIDE_EFFECT*/,
+ bool ignoreRoot /* = false */)
+{
+ assert(expr);
+ assert(expr->gtOper != GT_STMT);
+
+ /* If no side effect in the expression return */
+
+ if (!gtTreeHasSideEffects(expr, flags))
+ {
+ return;
+ }
+
+ genTreeOps oper = expr->OperGet();
+ unsigned kind = expr->OperKind();
+
+ // Look for any side effects that we care about
+ //
+ if (!ignoreRoot && gtNodeHasSideEffects(expr, flags))
+ {
+ // Add the side effect to the list and return
+ //
+ *pList = gtBuildCommaList(*pList, expr);
+ return;
+ }
+
+ if (kind & GTK_LEAF)
+ {
+ return;
+ }
+
+ if (oper == GT_LOCKADD || oper == GT_XADD || oper == GT_XCHG || oper == GT_CMPXCHG)
+ {
+ // XADD both adds to the memory location and also fetches the old value. If we only need the side
+ // effect of this instruction, change it into a GT_LOCKADD node (the add only)
+ if (oper == GT_XADD)
+ {
+ expr->gtOper = GT_LOCKADD;
+ expr->gtType = TYP_VOID;
+ }
+
+ // These operations are kind of important to keep
+ *pList = gtBuildCommaList(*pList, expr);
+ return;
+ }
+
+ if (kind & GTK_SMPOP)
+ {
+ GenTreePtr op1 = expr->gtOp.gtOp1;
+ GenTreePtr op2 = expr->gtGetOp2();
+
+ if (flags & GTF_EXCEPT)
+ {
+ // Special case - GT_ADDR of GT_IND nodes of TYP_STRUCT
+ // have to be kept together
+
+ if (oper == GT_ADDR && op1->OperIsIndir() && op1->gtType == TYP_STRUCT)
+ {
+ *pList = gtBuildCommaList(*pList, expr);
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Keep the GT_ADDR and GT_IND together:\n");
+ }
+#endif
+ return;
+ }
+ }
+
+ /* Continue searching for side effects in the subtrees of the expression
+ * NOTE: Be careful to preserve the right ordering - side effects are prepended
+ * to the list */
+
+ /* Continue searching for side effects in the subtrees of the expression
+ * NOTE: Be careful to preserve the right ordering
+ * as side effects are prepended to the list */
+
+ if (expr->gtFlags & GTF_REVERSE_OPS)
+ {
+ assert(oper != GT_COMMA);
+ if (op1)
+ {
+ gtExtractSideEffList(op1, pList, flags);
+ }
+ if (op2)
+ {
+ gtExtractSideEffList(op2, pList, flags);
+ }
+ }
+ else
+ {
+ if (op2)
+ {
+ gtExtractSideEffList(op2, pList, flags);
+ }
+ if (op1)
+ {
+ gtExtractSideEffList(op1, pList, flags);
+ }
+ }
+ }
+
+ if (expr->OperGet() == GT_CALL)
+ {
+ // Generally all GT_CALL nodes are considered to have side-effects.
+ // So if we get here it must be a Helper call that we decided does
+ // not have side effects that we needed to keep
+ //
+ assert(expr->gtCall.gtCallType == CT_HELPER);
+
+ // We can remove this Helper call, but there still could be
+ // side-effects in the arguments that we may need to keep
+ //
+ GenTreePtr args;
+ for (args = expr->gtCall.gtCallArgs; args; args = args->gtOp.gtOp2)
+ {
+ assert(args->IsList());
+ gtExtractSideEffList(args->Current(), pList, flags);
+ }
+ for (args = expr->gtCall.gtCallLateArgs; args; args = args->gtOp.gtOp2)
+ {
+ assert(args->IsList());
+ gtExtractSideEffList(args->Current(), pList, flags);
+ }
+ }
+
+ if (expr->OperGet() == GT_ARR_BOUNDS_CHECK
+#ifdef FEATURE_SIMD
+ || expr->OperGet() == GT_SIMD_CHK
+#endif // FEATURE_SIMD
+ )
+ {
+ gtExtractSideEffList(expr->AsBoundsChk()->gtArrLen, pList, flags);
+ gtExtractSideEffList(expr->AsBoundsChk()->gtIndex, pList, flags);
+ }
+
+ if (expr->OperGet() == GT_DYN_BLK || expr->OperGet() == GT_STORE_DYN_BLK)
+ {
+ if (expr->AsDynBlk()->Data() != nullptr)
+ {
+ gtExtractSideEffList(expr->AsDynBlk()->Data(), pList, flags);
+ }
+ gtExtractSideEffList(expr->AsDynBlk()->Addr(), pList, flags);
+ gtExtractSideEffList(expr->AsDynBlk()->gtDynamicSize, pList, flags);
+ }
+}
+
+/*****************************************************************************
+ *
+ * For debugging only - displays a tree node list and makes sure all the
+ * links are correctly set.
+ */
+
+#ifdef DEBUG
+
+void dispNodeList(GenTreePtr list, bool verbose)
+{
+ GenTreePtr last = nullptr;
+ GenTreePtr next;
+
+ if (!list)
+ {
+ return;
+ }
+
+ for (;;)
+ {
+ next = list->gtNext;
+
+ if (verbose)
+ {
+ printf("%08X -> %08X -> %08X\n", last, list, next);
+ }
+
+ assert(!last || last->gtNext == list);
+
+ assert(next == nullptr || next->gtPrev == list);
+
+ if (!next)
+ {
+ break;
+ }
+
+ last = list;
+ list = next;
+ }
+ printf(""); // null string means flush
+}
+
+/*****************************************************************************
+ * Callback to assert that the nodes of a qmark-colon subtree are marked
+ */
+
+/* static */
+Compiler::fgWalkResult Compiler::gtAssertColonCond(GenTreePtr* pTree, fgWalkData* data)
+{
+ assert(data->pCallbackData == nullptr);
+
+ assert((*pTree)->gtFlags & GTF_COLON_COND);
+
+ return WALK_CONTINUE;
+}
+#endif // DEBUG
+
+/*****************************************************************************
+ * Callback to mark the nodes of a qmark-colon subtree that are conditionally
+ * executed.
+ */
+
+/* static */
+Compiler::fgWalkResult Compiler::gtMarkColonCond(GenTreePtr* pTree, fgWalkData* data)
+{
+ assert(data->pCallbackData == nullptr);
+
+ (*pTree)->gtFlags |= GTF_COLON_COND;
+
+ return WALK_CONTINUE;
+}
+
+/*****************************************************************************
+ * Callback to clear the conditionally executed flags of nodes that no longer
+ will be conditionally executed. Note that when we find another colon we must
+ stop, as the nodes below this one WILL be conditionally executed. This callback
+ is called when folding a qmark condition (ie the condition is constant).
+ */
+
+/* static */
+Compiler::fgWalkResult Compiler::gtClearColonCond(GenTreePtr* pTree, fgWalkData* data)
+{
+ GenTreePtr tree = *pTree;
+
+ assert(data->pCallbackData == nullptr);
+
+ if (tree->OperGet() == GT_COLON)
+ {
+ // Nodes below this will be conditionally executed.
+ return WALK_SKIP_SUBTREES;
+ }
+
+ tree->gtFlags &= ~GTF_COLON_COND;
+ return WALK_CONTINUE;
+}
+
+struct FindLinkData
+{
+ GenTreePtr nodeToFind;
+ GenTreePtr* result;
+};
+
+/*****************************************************************************
+ *
+ * Callback used by the tree walker to implement fgFindLink()
+ */
+static Compiler::fgWalkResult gtFindLinkCB(GenTreePtr* pTree, Compiler::fgWalkData* cbData)
+{
+ FindLinkData* data = (FindLinkData*)cbData->pCallbackData;
+ if (*pTree == data->nodeToFind)
+ {
+ data->result = pTree;
+ return Compiler::WALK_ABORT;
+ }
+
+ return Compiler::WALK_CONTINUE;
+}
+
+GenTreePtr* Compiler::gtFindLink(GenTreePtr stmt, GenTreePtr node)
+{
+ assert(stmt->gtOper == GT_STMT);
+
+ FindLinkData data = {node, nullptr};
+
+ fgWalkResult result = fgWalkTreePre(&stmt->gtStmt.gtStmtExpr, gtFindLinkCB, &data);
+
+ if (result == WALK_ABORT)
+ {
+ assert(data.nodeToFind == *data.result);
+ return data.result;
+ }
+ else
+ {
+ return nullptr;
+ }
+}
+
+/*****************************************************************************
+ *
+ * Callback that checks if a tree node has oper type GT_CATCH_ARG
+ */
+
+static Compiler::fgWalkResult gtFindCatchArg(GenTreePtr* pTree, Compiler::fgWalkData* /* data */)
+{
+ return ((*pTree)->OperGet() == GT_CATCH_ARG) ? Compiler::WALK_ABORT : Compiler::WALK_CONTINUE;
+}
+
+/*****************************************************************************/
+bool Compiler::gtHasCatchArg(GenTreePtr tree)
+{
+ if (((tree->gtFlags & GTF_ORDER_SIDEEFF) != 0) && (fgWalkTreePre(&tree, gtFindCatchArg) == WALK_ABORT))
+ {
+ return true;
+ }
+ return false;
+}
+
+//------------------------------------------------------------------------
+// gtHasCallOnStack:
+//
+// Arguments:
+// parentStack: a context (stack of parent nodes)
+//
+// Return Value:
+// returns true if any of the parent nodes are a GT_CALL
+//
+// Assumptions:
+// We have a stack of parent nodes. This generally requires that
+// we are performing a recursive tree walk using struct fgWalkData
+//
+//------------------------------------------------------------------------
+/* static */ bool Compiler::gtHasCallOnStack(GenTreeStack* parentStack)
+{
+ for (int i = 0; i < parentStack->Height(); i++)
+ {
+ GenTree* node = parentStack->Index(i);
+ if (node->OperGet() == GT_CALL)
+ {
+ return true;
+ }
+ }
+ return false;
+}
+
+//------------------------------------------------------------------------
+// gtCheckQuirkAddrExposedLclVar:
+//
+// Arguments:
+// tree: an address taken GenTree node that is a GT_LCL_VAR
+// parentStack: a context (stack of parent nodes)
+// The 'parentStack' is used to ensure that we are in an argument context.
+//
+// Return Value:
+// None
+//
+// Notes:
+// When allocation size of this LclVar is 32-bits we will quirk the size to 64-bits
+// because some PInvoke signatures incorrectly specify a ByRef to an INT32
+// when they actually write a SIZE_T or INT64. There are cases where overwriting
+// these extra 4 bytes corrupts some data (such as a saved register) that leads to A/V
+// Wheras previously the JIT64 codegen did not lead to an A/V
+//
+// Assumptions:
+// 'tree' is known to be address taken and that we have a stack
+// of parent nodes. Both of these generally requires that
+// we are performing a recursive tree walk using struct fgWalkData
+//------------------------------------------------------------------------
+void Compiler::gtCheckQuirkAddrExposedLclVar(GenTreePtr tree, GenTreeStack* parentStack)
+{
+#ifdef _TARGET_64BIT_
+ // We only need to Quirk for _TARGET_64BIT_
+
+ // Do we have a parent node that is a Call?
+ if (!Compiler::gtHasCallOnStack(parentStack))
+ {
+ // No, so we don't apply the Quirk
+ return;
+ }
+ noway_assert(tree->gtOper == GT_LCL_VAR);
+ unsigned lclNum = tree->gtLclVarCommon.gtLclNum;
+ LclVarDsc* varDsc = &lvaTable[lclNum];
+ var_types vartype = varDsc->TypeGet();
+
+ if (varDsc->lvIsParam)
+ {
+ // We can't Quirk the size of an incoming parameter
+ return;
+ }
+
+ // We may need to Quirk the storage size for this LCL_VAR
+ if (genActualType(vartype) == TYP_INT)
+ {
+ varDsc->lvQuirkToLong = true;
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nAdding a Quirk for the storage size of LvlVar V%02d:", lclNum);
+ printf(" (%s ==> %s)\n", varTypeName(vartype), varTypeName(TYP_LONG));
+ }
+#endif // DEBUG
+ }
+#endif
+}
+
+// Checks to see if we're allowed to optimize Type::op_Equality or Type::op_Inequality on this operand.
+// We're allowed to convert to GT_EQ/GT_NE if one of the operands is:
+// 1) The result of Object::GetType
+// 2) The result of typeof(...)
+// 3) a local variable of type RuntimeType.
+bool Compiler::gtCanOptimizeTypeEquality(GenTreePtr tree)
+{
+ if (tree->gtOper == GT_CALL)
+ {
+ if (tree->gtCall.gtCallType == CT_HELPER)
+ {
+ if (gtIsTypeHandleToRuntimeTypeHelper(tree))
+ {
+ return true;
+ }
+ }
+ else if (tree->gtCall.gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC)
+ {
+ if (info.compCompHnd->getIntrinsicID(tree->gtCall.gtCallMethHnd) == CORINFO_INTRINSIC_Object_GetType)
+ {
+ return true;
+ }
+ }
+ }
+ else if ((tree->gtOper == GT_INTRINSIC) && (tree->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Object_GetType))
+ {
+ return true;
+ }
+ else if (tree->gtOper == GT_LCL_VAR)
+ {
+ LclVarDsc* lcl = &(lvaTable[tree->gtLclVarCommon.gtLclNum]);
+ if (lcl->TypeGet() == TYP_REF)
+ {
+ if (lcl->lvVerTypeInfo.GetClassHandle() == info.compCompHnd->getBuiltinClass(CLASSID_RUNTIME_TYPE))
+ {
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+bool Compiler::gtIsTypeHandleToRuntimeTypeHelper(GenTreePtr tree)
+{
+ return tree->gtCall.gtCallMethHnd == eeFindHelper(CORINFO_HELP_TYPEHANDLE_TO_RUNTIMETYPE) ||
+ tree->gtCall.gtCallMethHnd == eeFindHelper(CORINFO_HELP_TYPEHANDLE_TO_RUNTIMETYPE_MAYBENULL);
+}
+
+bool Compiler::gtIsActiveCSE_Candidate(GenTreePtr tree)
+{
+ return (optValnumCSE_phase && IS_CSE_INDEX(tree->gtCSEnum));
+}
+
+/*****************************************************************************/
+
+struct ComplexityStruct
+{
+ unsigned m_numNodes;
+ unsigned m_nodeLimit;
+ ComplexityStruct(unsigned nodeLimit) : m_numNodes(0), m_nodeLimit(nodeLimit)
+ {
+ }
+};
+
+static Compiler::fgWalkResult ComplexityExceedsWalker(GenTreePtr* pTree, Compiler::fgWalkData* data)
+{
+ ComplexityStruct* pComplexity = (ComplexityStruct*)data->pCallbackData;
+ if (++pComplexity->m_numNodes > pComplexity->m_nodeLimit)
+ {
+ return Compiler::WALK_ABORT;
+ }
+ else
+ {
+ return Compiler::WALK_CONTINUE;
+ }
+}
+
+bool Compiler::gtComplexityExceeds(GenTreePtr* tree, unsigned limit)
+{
+ ComplexityStruct complexity(limit);
+ if (fgWalkTreePre(tree, &ComplexityExceedsWalker, &complexity) == WALK_ABORT)
+ {
+ return true;
+ }
+ else
+ {
+ return false;
+ }
+}
+
+/*
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX BasicBlock XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#if MEASURE_BLOCK_SIZE
+/* static */
+size_t BasicBlock::s_Size;
+/* static */
+size_t BasicBlock::s_Count;
+#endif // MEASURE_BLOCK_SIZE
+
+#ifdef DEBUG
+// The max # of tree nodes in any BB
+/* static */
+unsigned BasicBlock::s_nMaxTrees;
+#endif // DEBUG
+
+/*****************************************************************************
+ *
+ * Allocate a basic block but don't append it to the current BB list.
+ */
+
+BasicBlock* Compiler::bbNewBasicBlock(BBjumpKinds jumpKind)
+{
+ BasicBlock* block;
+
+ /* Allocate the block descriptor and zero it out */
+ assert(fgSafeBasicBlockCreation);
+
+ block = new (this, CMK_BasicBlock) BasicBlock;
+
+#if MEASURE_BLOCK_SIZE
+ BasicBlock::s_Count += 1;
+ BasicBlock::s_Size += sizeof(*block);
+#endif
+
+#ifdef DEBUG
+ // fgLookupBB() is invalid until fgInitBBLookup() is called again.
+ fgBBs = (BasicBlock**)0xCDCD;
+#endif
+
+ // TODO-Throughput: The following memset is pretty expensive - do something else?
+ // Note that some fields have to be initialized to 0 (like bbFPStateX87)
+ memset(block, 0, sizeof(*block));
+
+ // scopeInfo needs to be able to differentiate between blocks which
+ // correspond to some instrs (and so may have some LocalVarInfo
+ // boundaries), or have been inserted by the JIT
+ block->bbCodeOffs = BAD_IL_OFFSET;
+ block->bbCodeOffsEnd = BAD_IL_OFFSET;
+
+ /* Give the block a number, set the ancestor count and weight */
+
+ ++fgBBcount;
+
+ if (compIsForInlining())
+ {
+ block->bbNum = ++impInlineInfo->InlinerCompiler->fgBBNumMax;
+ }
+ else
+ {
+ block->bbNum = ++fgBBNumMax;
+ }
+
+#ifndef LEGACY_BACKEND
+ if (compRationalIRForm)
+ {
+ block->bbFlags |= BBF_IS_LIR;
+ }
+#endif // !LEGACY_BACKEND
+
+ block->bbRefs = 1;
+ block->bbWeight = BB_UNITY_WEIGHT;
+
+ block->bbStkTempsIn = NO_BASE_TMP;
+ block->bbStkTempsOut = NO_BASE_TMP;
+
+ block->bbEntryState = nullptr;
+
+ /* Record the jump kind in the block */
+
+ block->bbJumpKind = jumpKind;
+
+ if (jumpKind == BBJ_THROW)
+ {
+ block->bbSetRunRarely();
+ }
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("New Basic Block BB%02u [%p] created.\n", block->bbNum, dspPtr(block));
+ }
+#endif
+
+ // We will give all the blocks var sets after the number of tracked variables
+ // is determined and frozen. After that, if we dynamically create a basic block,
+ // we will initialize its var sets.
+ if (fgBBVarSetsInited)
+ {
+ VarSetOps::AssignNoCopy(this, block->bbVarUse, VarSetOps::MakeEmpty(this));
+ VarSetOps::AssignNoCopy(this, block->bbVarDef, VarSetOps::MakeEmpty(this));
+ VarSetOps::AssignNoCopy(this, block->bbVarTmp, VarSetOps::MakeEmpty(this));
+ VarSetOps::AssignNoCopy(this, block->bbLiveIn, VarSetOps::MakeEmpty(this));
+ VarSetOps::AssignNoCopy(this, block->bbLiveOut, VarSetOps::MakeEmpty(this));
+ VarSetOps::AssignNoCopy(this, block->bbScope, VarSetOps::MakeEmpty(this));
+ }
+ else
+ {
+ VarSetOps::AssignNoCopy(this, block->bbVarUse, VarSetOps::UninitVal());
+ VarSetOps::AssignNoCopy(this, block->bbVarDef, VarSetOps::UninitVal());
+ VarSetOps::AssignNoCopy(this, block->bbVarTmp, VarSetOps::UninitVal());
+ VarSetOps::AssignNoCopy(this, block->bbLiveIn, VarSetOps::UninitVal());
+ VarSetOps::AssignNoCopy(this, block->bbLiveOut, VarSetOps::UninitVal());
+ VarSetOps::AssignNoCopy(this, block->bbScope, VarSetOps::UninitVal());
+ }
+
+ block->bbHeapUse = false;
+ block->bbHeapDef = false;
+ block->bbHeapLiveIn = false;
+ block->bbHeapLiveOut = false;
+
+ block->bbHeapSsaPhiFunc = nullptr;
+ block->bbHeapSsaNumIn = 0;
+ block->bbHeapSsaNumOut = 0;
+
+ // Make sure we reserve a NOT_IN_LOOP value that isn't a legal table index.
+ static_assert_no_msg(MAX_LOOP_NUM < BasicBlock::NOT_IN_LOOP);
+
+ block->bbNatLoopNum = BasicBlock::NOT_IN_LOOP;
+
+ return block;
+}
+
+//------------------------------------------------------------------------------
+// containsStatement - return true if the block contains the given statement
+//------------------------------------------------------------------------------
+
+bool BasicBlock::containsStatement(GenTree* statement)
+{
+ assert(statement->gtOper == GT_STMT);
+
+ GenTree* curr = bbTreeList;
+ do
+ {
+ if (curr == statement)
+ {
+ break;
+ }
+ curr = curr->gtNext;
+ } while (curr);
+ return curr != nullptr;
+}
+
+GenTreeStmt* BasicBlock::FirstNonPhiDef()
+{
+ GenTreePtr stmt = bbTreeList;
+ if (stmt == nullptr)
+ {
+ return nullptr;
+ }
+ GenTreePtr tree = stmt->gtStmt.gtStmtExpr;
+ while ((tree->OperGet() == GT_ASG && tree->gtOp.gtOp2->OperGet() == GT_PHI) ||
+ (tree->OperGet() == GT_STORE_LCL_VAR && tree->gtOp.gtOp1->OperGet() == GT_PHI))
+ {
+ stmt = stmt->gtNext;
+ if (stmt == nullptr)
+ {
+ return nullptr;
+ }
+ tree = stmt->gtStmt.gtStmtExpr;
+ }
+ return stmt->AsStmt();
+}
+
+GenTreePtr BasicBlock::FirstNonPhiDefOrCatchArgAsg()
+{
+ GenTreePtr stmt = FirstNonPhiDef();
+ if (stmt == nullptr)
+ {
+ return nullptr;
+ }
+ GenTreePtr tree = stmt->gtStmt.gtStmtExpr;
+ if ((tree->OperGet() == GT_ASG && tree->gtOp.gtOp2->OperGet() == GT_CATCH_ARG) ||
+ (tree->OperGet() == GT_STORE_LCL_VAR && tree->gtOp.gtOp1->OperGet() == GT_CATCH_ARG))
+ {
+ stmt = stmt->gtNext;
+ }
+ return stmt;
+}
+
+/*****************************************************************************
+ *
+ * Mark a block as rarely run, we also don't want to have a loop in a
+ * rarely run block, and we set it's weight to zero.
+ */
+
+void BasicBlock::bbSetRunRarely()
+{
+ setBBWeight(BB_ZERO_WEIGHT);
+ if (bbWeight == BB_ZERO_WEIGHT)
+ {
+ bbFlags |= BBF_RUN_RARELY; // This block is never/rarely run
+ }
+}
+
+/*****************************************************************************
+ *
+ * Can a BasicBlock be inserted after this without altering the flowgraph
+ */
+
+bool BasicBlock::bbFallsThrough()
+{
+ switch (bbJumpKind)
+ {
+
+ case BBJ_THROW:
+ case BBJ_EHFINALLYRET:
+ case BBJ_EHFILTERRET:
+ case BBJ_EHCATCHRET:
+ case BBJ_RETURN:
+ case BBJ_ALWAYS:
+ case BBJ_LEAVE:
+ case BBJ_SWITCH:
+ return false;
+
+ case BBJ_NONE:
+ case BBJ_COND:
+ return true;
+
+ case BBJ_CALLFINALLY:
+ return ((bbFlags & BBF_RETLESS_CALL) == 0);
+
+ default:
+ assert(!"Unknown bbJumpKind in bbFallsThrough()");
+ return true;
+ }
+}
+
+unsigned BasicBlock::NumSucc(Compiler* comp)
+{
+ // As described in the spec comment of NumSucc at its declaration, whether "comp" is null determines
+ // whether NumSucc and GetSucc yield successors of finally blocks.
+
+ switch (bbJumpKind)
+ {
+
+ case BBJ_THROW:
+ case BBJ_RETURN:
+ return 0;
+
+ case BBJ_EHFILTERRET:
+ if (comp == nullptr)
+ {
+ return 0;
+ }
+ else
+ {
+ return 1;
+ }
+
+ case BBJ_EHFINALLYRET:
+ {
+ if (comp == nullptr)
+ {
+ return 0;
+ }
+ else
+ {
+ // The first block of the handler is labelled with the catch type.
+ BasicBlock* hndBeg = comp->fgFirstBlockOfHandler(this);
+ if (hndBeg->bbCatchTyp == BBCT_FINALLY)
+ {
+ return comp->fgNSuccsOfFinallyRet(this);
+ }
+ else
+ {
+ assert(hndBeg->bbCatchTyp == BBCT_FAULT); // We can only BBJ_EHFINALLYRET from FINALLY and FAULT.
+ // A FAULT block has no successors.
+ return 0;
+ }
+ }
+ }
+ case BBJ_CALLFINALLY:
+ case BBJ_ALWAYS:
+ case BBJ_EHCATCHRET:
+ case BBJ_LEAVE:
+ case BBJ_NONE:
+ return 1;
+ case BBJ_COND:
+ if (bbJumpDest == bbNext)
+ {
+ return 1;
+ }
+ else
+ {
+ return 2;
+ }
+ case BBJ_SWITCH:
+ if (comp == nullptr)
+ {
+ return bbJumpSwt->bbsCount;
+ }
+ else
+ {
+ Compiler::SwitchUniqueSuccSet sd = comp->GetDescriptorForSwitch(this);
+ return sd.numDistinctSuccs;
+ }
+
+ default:
+ unreached();
+ }
+}
+
+BasicBlock* BasicBlock::GetSucc(unsigned i, Compiler* comp)
+{
+ // As described in the spec comment of GetSucc at its declaration, whether "comp" is null determines
+ // whether NumSucc and GetSucc yield successors of finally blocks.
+
+ assert(i < NumSucc(comp)); // Index bounds check.
+ // printf("bbjk=%d\n", bbJumpKind);
+ switch (bbJumpKind)
+ {
+
+ case BBJ_THROW:
+ case BBJ_RETURN:
+ unreached(); // Should have been covered by assert above.
+
+ case BBJ_EHFILTERRET:
+ {
+ assert(comp != nullptr); // Or else we're not looking for successors.
+ BasicBlock* result = comp->fgFirstBlockOfHandler(this);
+ noway_assert(result == bbJumpDest);
+ // Handler is the (sole) normal successor of the filter.
+ return result;
+ }
+
+ case BBJ_EHFINALLYRET:
+ return comp->fgSuccOfFinallyRet(this, i);
+
+ case BBJ_CALLFINALLY:
+ case BBJ_ALWAYS:
+ case BBJ_EHCATCHRET:
+ case BBJ_LEAVE:
+ return bbJumpDest;
+
+ case BBJ_NONE:
+ return bbNext;
+ case BBJ_COND:
+ if (i == 0)
+ {
+ return bbNext;
+ }
+ else
+ {
+ assert(i == 1);
+ return bbJumpDest;
+ };
+ case BBJ_SWITCH:
+ if (comp == nullptr)
+ {
+ assert(i < bbJumpSwt->bbsCount); // Range check.
+ return bbJumpSwt->bbsDstTab[i];
+ }
+ else
+ {
+ // Remove duplicates.
+ Compiler::SwitchUniqueSuccSet sd = comp->GetDescriptorForSwitch(this);
+ assert(i < sd.numDistinctSuccs); // Range check.
+ return sd.nonDuplicates[i];
+ }
+
+ default:
+ unreached();
+ }
+}
+
+// -------------------------------------------------------------------------
+// IsRegOptional: Returns true if this gentree node is marked by lowering to
+// indicate that codegen can still generate code even if it wasn't allocated
+// a register.
+bool GenTree::IsRegOptional() const
+{
+#ifdef LEGACY_BACKEND
+ return false;
+#else
+ return gtLsraInfo.regOptional;
+#endif
+}
+
+bool GenTree::IsPhiNode()
+{
+ return (OperGet() == GT_PHI_ARG) || (OperGet() == GT_PHI) || IsPhiDefn();
+}
+
+bool GenTree::IsPhiDefn()
+{
+ bool res = ((OperGet() == GT_ASG) && (gtOp.gtOp2 != nullptr) && (gtOp.gtOp2->OperGet() == GT_PHI)) ||
+ ((OperGet() == GT_STORE_LCL_VAR) && (gtOp.gtOp1 != nullptr) && (gtOp.gtOp1->OperGet() == GT_PHI));
+ assert(!res || OperGet() == GT_STORE_LCL_VAR || gtOp.gtOp1->OperGet() == GT_LCL_VAR);
+ return res;
+}
+
+bool GenTree::IsPhiDefnStmt()
+{
+ if (OperGet() != GT_STMT)
+ {
+ return false;
+ }
+ GenTreePtr asg = gtStmt.gtStmtExpr;
+ return asg->IsPhiDefn();
+}
+
+// IsPartialLclFld: Check for a GT_LCL_FLD whose type is a different size than the lclVar.
+//
+// Arguments:
+// comp - the Compiler object.
+//
+// Return Value:
+// Returns "true" iff 'this' is a GT_LCL_FLD or GT_STORE_LCL_FLD on which the type
+// is not the same size as the type of the GT_LCL_VAR
+
+bool GenTree::IsPartialLclFld(Compiler* comp)
+{
+ return ((gtOper == GT_LCL_FLD) &&
+ (comp->lvaTable[this->gtLclVarCommon.gtLclNum].lvExactSize != genTypeSize(gtType)));
+}
+
+bool GenTree::DefinesLocal(Compiler* comp, GenTreeLclVarCommon** pLclVarTree, bool* pIsEntire)
+{
+ GenTreeBlk* blkNode = nullptr;
+ if (OperIsAssignment())
+ {
+ if (gtOp.gtOp1->IsLocal())
+ {
+ GenTreeLclVarCommon* lclVarTree = gtOp.gtOp1->AsLclVarCommon();
+ *pLclVarTree = lclVarTree;
+ if (pIsEntire != nullptr)
+ {
+ if (lclVarTree->IsPartialLclFld(comp))
+ {
+ *pIsEntire = false;
+ }
+ else
+ {
+ *pIsEntire = true;
+ }
+ }
+ return true;
+ }
+ else if (gtOp.gtOp1->OperGet() == GT_IND)
+ {
+ GenTreePtr indArg = gtOp.gtOp1->gtOp.gtOp1;
+ return indArg->DefinesLocalAddr(comp, genTypeSize(gtOp.gtOp1->TypeGet()), pLclVarTree, pIsEntire);
+ }
+ else if (gtOp.gtOp1->OperIsBlk())
+ {
+ blkNode = gtOp.gtOp1->AsBlk();
+ }
+ }
+ else if (OperIsBlk())
+ {
+ blkNode = this->AsBlk();
+ }
+ if (blkNode != nullptr)
+ {
+ GenTreePtr destAddr = blkNode->Addr();
+ unsigned width = blkNode->gtBlkSize;
+ // Do we care about whether this assigns the entire variable?
+ if (pIsEntire != nullptr && width == 0)
+ {
+ assert(blkNode->gtOper == GT_DYN_BLK);
+ GenTreePtr blockWidth = blkNode->AsDynBlk()->gtDynamicSize;
+ if (blockWidth->IsCnsIntOrI())
+ {
+ if (blockWidth->IsIconHandle())
+ {
+ // If it's a handle, it must be a class handle. We only create such block operations
+ // for initialization of struct types, so the type of the argument(s) will match this
+ // type, by construction, and be "entire".
+ assert(blockWidth->IsIconHandle(GTF_ICON_CLASS_HDL));
+ width = comp->info.compCompHnd->getClassSize(
+ CORINFO_CLASS_HANDLE(blockWidth->gtIntConCommon.IconValue()));
+ }
+ else
+ {
+ ssize_t swidth = blockWidth->AsIntConCommon()->IconValue();
+ assert(swidth >= 0);
+ // cpblk of size zero exists in the wild (in yacc-generated code in SQL) and is valid IL.
+ if (swidth == 0)
+ {
+ return false;
+ }
+ width = unsigned(swidth);
+ }
+ }
+ }
+ return destAddr->DefinesLocalAddr(comp, width, pLclVarTree, pIsEntire);
+ }
+ // Otherwise...
+ return false;
+}
+
+// Returns true if this GenTree defines a result which is based on the address of a local.
+bool GenTree::DefinesLocalAddr(Compiler* comp, unsigned width, GenTreeLclVarCommon** pLclVarTree, bool* pIsEntire)
+{
+ if (OperGet() == GT_ADDR || OperGet() == GT_LCL_VAR_ADDR)
+ {
+ GenTreePtr addrArg = this;
+ if (OperGet() == GT_ADDR)
+ {
+ addrArg = gtOp.gtOp1;
+ }
+
+ if (addrArg->IsLocal() || addrArg->OperIsLocalAddr())
+ {
+ GenTreeLclVarCommon* addrArgLcl = addrArg->AsLclVarCommon();
+ *pLclVarTree = addrArgLcl;
+ if (pIsEntire != nullptr)
+ {
+ unsigned lclOffset = 0;
+ if (addrArg->OperIsLocalField())
+ {
+ lclOffset = addrArg->gtLclFld.gtLclOffs;
+ }
+
+ if (lclOffset != 0)
+ {
+ // We aren't updating the bytes at [0..lclOffset-1] so *pIsEntire should be set to false
+ *pIsEntire = false;
+ }
+ else
+ {
+ unsigned lclNum = addrArgLcl->GetLclNum();
+ unsigned varWidth = comp->lvaLclExactSize(lclNum);
+ if (comp->lvaTable[lclNum].lvNormalizeOnStore())
+ {
+ // It's normalize on store, so use the full storage width -- writing to low bytes won't
+ // necessarily yield a normalized value.
+ varWidth = genTypeStSz(var_types(comp->lvaTable[lclNum].lvType)) * sizeof(int);
+ }
+ *pIsEntire = (varWidth == width);
+ }
+ }
+ return true;
+ }
+ else if (addrArg->OperGet() == GT_IND)
+ {
+ // A GT_ADDR of a GT_IND can both be optimized away, recurse using the child of the GT_IND
+ return addrArg->gtOp.gtOp1->DefinesLocalAddr(comp, width, pLclVarTree, pIsEntire);
+ }
+ }
+ else if (OperGet() == GT_ADD)
+ {
+ if (gtOp.gtOp1->IsCnsIntOrI())
+ {
+ // If we just adding a zero then we allow an IsEntire match against width
+ // otherwise we change width to zero to disallow an IsEntire Match
+ return gtOp.gtOp2->DefinesLocalAddr(comp, gtOp.gtOp1->IsIntegralConst(0) ? width : 0, pLclVarTree,
+ pIsEntire);
+ }
+ else if (gtOp.gtOp2->IsCnsIntOrI())
+ {
+ // If we just adding a zero then we allow an IsEntire match against width
+ // otherwise we change width to zero to disallow an IsEntire Match
+ return gtOp.gtOp1->DefinesLocalAddr(comp, gtOp.gtOp2->IsIntegralConst(0) ? width : 0, pLclVarTree,
+ pIsEntire);
+ }
+ }
+ // Post rationalization we could have GT_IND(GT_LEA(..)) trees.
+ else if (OperGet() == GT_LEA)
+ {
+ // This method gets invoked during liveness computation and therefore it is critical
+ // that we don't miss 'use' of any local. The below logic is making the assumption
+ // that in case of LEA(base, index, offset) - only base can be a GT_LCL_VAR_ADDR
+ // and index is not.
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef DEBUG
+ GenTreePtr index = gtOp.gtOp2;
+ if (index != nullptr)
+ {
+ assert(!index->DefinesLocalAddr(comp, width, pLclVarTree, pIsEntire));
+ }
+#endif // DEBUG
+
+ // base
+ GenTreePtr base = gtOp.gtOp1;
+ if (base != nullptr)
+ {
+ // Lea could have an Indir as its base.
+ if (base->OperGet() == GT_IND)
+ {
+ base = base->gtOp.gtOp1->gtEffectiveVal(/*commas only*/ true);
+ }
+ return base->DefinesLocalAddr(comp, width, pLclVarTree, pIsEntire);
+ }
+ }
+ // Otherwise...
+ return false;
+}
+
+//------------------------------------------------------------------------
+// IsLocalExpr: Determine if this is a LclVarCommon node and return some
+// additional info about it in the two out parameters.
+//
+// Arguments:
+// comp - The Compiler instance
+// pLclVarTree - An "out" argument that returns the local tree as a
+// LclVarCommon, if it is indeed local.
+// pFldSeq - An "out" argument that returns the value numbering field
+// sequence for the node, if any.
+//
+// Return Value:
+// Returns true, and sets the out arguments accordingly, if this is
+// a LclVarCommon node.
+
+bool GenTree::IsLocalExpr(Compiler* comp, GenTreeLclVarCommon** pLclVarTree, FieldSeqNode** pFldSeq)
+{
+ if (IsLocal()) // Note that this covers "GT_LCL_FLD."
+ {
+ *pLclVarTree = AsLclVarCommon();
+ if (OperGet() == GT_LCL_FLD)
+ {
+ // Otherwise, prepend this field to whatever we've already accumulated outside in.
+ *pFldSeq = comp->GetFieldSeqStore()->Append(AsLclFld()->gtFieldSeq, *pFldSeq);
+ }
+ return true;
+ }
+ else
+ {
+ return false;
+ }
+}
+
+// If this tree evaluates some sum of a local address and some constants,
+// return the node for the local being addressed
+
+GenTreeLclVarCommon* GenTree::IsLocalAddrExpr()
+{
+ if (OperGet() == GT_ADDR)
+ {
+ return gtOp.gtOp1->IsLocal() ? gtOp.gtOp1->AsLclVarCommon() : nullptr;
+ }
+ else if (OperIsLocalAddr())
+ {
+ return this->AsLclVarCommon();
+ }
+ else if (OperGet() == GT_ADD)
+ {
+ if (gtOp.gtOp1->OperGet() == GT_CNS_INT)
+ {
+ return gtOp.gtOp2->IsLocalAddrExpr();
+ }
+ else if (gtOp.gtOp2->OperGet() == GT_CNS_INT)
+ {
+ return gtOp.gtOp1->IsLocalAddrExpr();
+ }
+ }
+ // Otherwise...
+ return nullptr;
+}
+
+bool GenTree::IsLocalAddrExpr(Compiler* comp, GenTreeLclVarCommon** pLclVarTree, FieldSeqNode** pFldSeq)
+{
+ if (OperGet() == GT_ADDR)
+ {
+ assert(!comp->compRationalIRForm);
+ GenTreePtr addrArg = gtOp.gtOp1;
+ if (addrArg->IsLocal()) // Note that this covers "GT_LCL_FLD."
+ {
+ *pLclVarTree = addrArg->AsLclVarCommon();
+ if (addrArg->OperGet() == GT_LCL_FLD)
+ {
+ // Otherwise, prepend this field to whatever we've already accumulated outside in.
+ *pFldSeq = comp->GetFieldSeqStore()->Append(addrArg->AsLclFld()->gtFieldSeq, *pFldSeq);
+ }
+ return true;
+ }
+ else
+ {
+ return false;
+ }
+ }
+ else if (OperIsLocalAddr())
+ {
+ *pLclVarTree = this->AsLclVarCommon();
+ if (this->OperGet() == GT_LCL_FLD_ADDR)
+ {
+ *pFldSeq = comp->GetFieldSeqStore()->Append(this->AsLclFld()->gtFieldSeq, *pFldSeq);
+ }
+ return true;
+ }
+ else if (OperGet() == GT_ADD)
+ {
+ if (gtOp.gtOp1->OperGet() == GT_CNS_INT)
+ {
+ if (gtOp.gtOp1->AsIntCon()->gtFieldSeq == nullptr)
+ {
+ return false;
+ }
+ // Otherwise, prepend this field to whatever we've already accumulated outside in.
+ *pFldSeq = comp->GetFieldSeqStore()->Append(gtOp.gtOp1->AsIntCon()->gtFieldSeq, *pFldSeq);
+ return gtOp.gtOp2->IsLocalAddrExpr(comp, pLclVarTree, pFldSeq);
+ }
+ else if (gtOp.gtOp2->OperGet() == GT_CNS_INT)
+ {
+ if (gtOp.gtOp2->AsIntCon()->gtFieldSeq == nullptr)
+ {
+ return false;
+ }
+ // Otherwise, prepend this field to whatever we've already accumulated outside in.
+ *pFldSeq = comp->GetFieldSeqStore()->Append(gtOp.gtOp2->AsIntCon()->gtFieldSeq, *pFldSeq);
+ return gtOp.gtOp1->IsLocalAddrExpr(comp, pLclVarTree, pFldSeq);
+ }
+ }
+ // Otherwise...
+ return false;
+}
+
+//------------------------------------------------------------------------
+// IsLclVarUpdateTree: Determine whether this is an assignment tree of the
+// form Vn = Vn 'oper' 'otherTree' where Vn is a lclVar
+//
+// Arguments:
+// pOtherTree - An "out" argument in which 'otherTree' will be returned.
+// pOper - An "out" argument in which 'oper' will be returned.
+//
+// Return Value:
+// If the tree is of the above form, the lclNum of the variable being
+// updated is returned, and 'pOtherTree' and 'pOper' are set.
+// Otherwise, returns BAD_VAR_NUM.
+//
+// Notes:
+// 'otherTree' can have any shape.
+// We avoid worrying about whether the op is commutative by only considering the
+// first operand of the rhs. It is expected that most trees of this form will
+// already have the lclVar on the lhs.
+// TODO-CQ: Evaluate whether there are missed opportunities due to this, or
+// whether gtSetEvalOrder will already have put the lclVar on the lhs in
+// the cases of interest.
+
+unsigned GenTree::IsLclVarUpdateTree(GenTree** pOtherTree, genTreeOps* pOper)
+{
+ unsigned lclNum = BAD_VAR_NUM;
+ if (OperIsAssignment())
+ {
+ GenTree* lhs = gtOp.gtOp1;
+ if (lhs->OperGet() == GT_LCL_VAR)
+ {
+ unsigned lhsLclNum = lhs->AsLclVarCommon()->gtLclNum;
+ if (gtOper == GT_ASG)
+ {
+ GenTree* rhs = gtOp.gtOp2;
+ if (rhs->OperIsBinary() && (rhs->gtOp.gtOp1->gtOper == GT_LCL_VAR) &&
+ (rhs->gtOp.gtOp1->AsLclVarCommon()->gtLclNum == lhsLclNum))
+ {
+ lclNum = lhsLclNum;
+ *pOtherTree = rhs->gtOp.gtOp2;
+ *pOper = rhs->gtOper;
+ }
+ }
+ else
+ {
+ lclNum = lhsLclNum;
+ *pOper = GenTree::OpAsgToOper(gtOper);
+ *pOtherTree = gtOp.gtOp2;
+ }
+ }
+ }
+ return lclNum;
+}
+
+// return true if this tree node is a subcomponent of parent for codegen purposes
+// (essentially, will be rolled into the same instruction)
+// Note that this method relies upon the value of gtRegNum field to determine
+// if the treenode is contained or not. Therefore you can not call this method
+// until after the LSRA phase has allocated physical registers to the treenodes.
+bool GenTree::isContained() const
+{
+ if (isContainedSpillTemp())
+ {
+ return true;
+ }
+
+ if (gtHasReg())
+ {
+ return false;
+ }
+
+ // these actually produce a register (the flags reg, we just don't model it)
+ // and are a separate instruction from the branch that consumes the result
+ if (OperKind() & GTK_RELOP)
+ {
+ return false;
+ }
+
+ // TODO-Cleanup : this is not clean, would be nice to have some way of marking this.
+ switch (OperGet())
+ {
+ case GT_STOREIND:
+ case GT_JTRUE:
+ case GT_RETURN:
+ case GT_RETFILT:
+ case GT_STORE_LCL_FLD:
+ case GT_STORE_LCL_VAR:
+ case GT_ARR_BOUNDS_CHECK:
+ case GT_LOCKADD:
+ case GT_NOP:
+ case GT_NO_OP:
+ case GT_START_NONGC:
+ case GT_PROF_HOOK:
+ case GT_RETURNTRAP:
+ case GT_COMMA:
+ case GT_PINVOKE_PROLOG:
+ case GT_PHYSREGDST:
+ case GT_PUTARG_STK:
+ case GT_MEMORYBARRIER:
+ case GT_STORE_BLK:
+ case GT_STORE_OBJ:
+ case GT_STORE_DYN_BLK:
+ case GT_SWITCH:
+ case GT_JMPTABLE:
+ case GT_SWITCH_TABLE:
+ case GT_SWAP:
+ case GT_LCLHEAP:
+ case GT_CKFINITE:
+ case GT_JMP:
+ case GT_IL_OFFSET:
+#ifdef FEATURE_SIMD
+ case GT_SIMD_CHK:
+#endif // FEATURE_SIMD
+
+#if !FEATURE_EH_FUNCLETS
+ case GT_END_LFIN:
+#endif
+ return false;
+
+#if !defined(LEGACY_BACKEND) && !defined(_TARGET_64BIT_)
+ case GT_LONG:
+ // GT_LONG nodes are normally contained. The only exception is when the result
+ // of a TYP_LONG operation is not used and this can only happen if the GT_LONG
+ // is the last node in the statement (in linear order).
+ return gtNext != nullptr;
+#endif
+
+ case GT_CALL:
+ // Note: if you hit this assert you are probably calling isContained()
+ // before the LSRA phase has allocated physical register to the tree nodes
+ //
+ assert(gtType == TYP_VOID);
+ return false;
+
+ default:
+ // if it's contained it better have a parent
+ assert(gtNext || OperIsLocal());
+ return true;
+ }
+}
+
+// return true if node is contained and an indir
+bool GenTree::isContainedIndir() const
+{
+ return isContained() && isIndir();
+}
+
+bool GenTree::isIndirAddrMode()
+{
+ return isIndir() && AsIndir()->Addr()->OperIsAddrMode() && AsIndir()->Addr()->isContained();
+}
+
+bool GenTree::isIndir() const
+{
+ return OperGet() == GT_IND || OperGet() == GT_STOREIND;
+}
+
+bool GenTreeIndir::HasBase()
+{
+ return Base() != nullptr;
+}
+
+bool GenTreeIndir::HasIndex()
+{
+ return Index() != nullptr;
+}
+
+GenTreePtr GenTreeIndir::Base()
+{
+ GenTreePtr addr = Addr();
+
+ if (isIndirAddrMode())
+ {
+ GenTree* result = addr->AsAddrMode()->Base();
+ if (result != nullptr)
+ {
+ result = result->gtEffectiveVal();
+ }
+ return result;
+ }
+ else
+ {
+ return addr; // TODO: why do we return 'addr' here, but we return 'nullptr' in the equivalent Index() case?
+ }
+}
+
+GenTree* GenTreeIndir::Index()
+{
+ if (isIndirAddrMode())
+ {
+ GenTree* result = Addr()->AsAddrMode()->Index();
+ if (result != nullptr)
+ {
+ result = result->gtEffectiveVal();
+ }
+ return result;
+ }
+ else
+ {
+ return nullptr;
+ }
+}
+
+unsigned GenTreeIndir::Scale()
+{
+ if (HasIndex())
+ {
+ return Addr()->AsAddrMode()->gtScale;
+ }
+ else
+ {
+ return 1;
+ }
+}
+
+size_t GenTreeIndir::Offset()
+{
+ if (isIndirAddrMode())
+ {
+ return Addr()->AsAddrMode()->gtOffset;
+ }
+ else if (Addr()->gtOper == GT_CLS_VAR_ADDR)
+ {
+ return (size_t)Addr()->gtClsVar.gtClsVarHnd;
+ }
+ else if (Addr()->IsCnsIntOrI() && Addr()->isContained())
+ {
+ return Addr()->AsIntConCommon()->IconValue();
+ }
+ else
+ {
+ return 0;
+ }
+}
+
+//------------------------------------------------------------------------
+// GenTreeIntConCommon::ImmedValNeedsReloc: does this immediate value needs recording a relocation with the VM?
+//
+// Arguments:
+// comp - Compiler instance
+//
+// Return Value:
+// True if this immediate value needs recording a relocation with the VM; false otherwise.
+
+bool GenTreeIntConCommon::ImmedValNeedsReloc(Compiler* comp)
+{
+#ifdef RELOC_SUPPORT
+ return comp->opts.compReloc && (gtOper == GT_CNS_INT) && IsIconHandle();
+#else
+ return false;
+#endif
+}
+
+//------------------------------------------------------------------------
+// ImmedValCanBeFolded: can this immediate value be folded for op?
+//
+// Arguments:
+// comp - Compiler instance
+// op - Tree operator
+//
+// Return Value:
+// True if this immediate value can be folded for op; false otherwise.
+
+bool GenTreeIntConCommon::ImmedValCanBeFolded(Compiler* comp, genTreeOps op)
+{
+ // In general, immediate values that need relocations can't be folded.
+ // There are cases where we do want to allow folding of handle comparisons
+ // (e.g., typeof(T) == typeof(int)).
+ return !ImmedValNeedsReloc(comp) || (op == GT_EQ) || (op == GT_NE);
+}
+
+#ifdef _TARGET_AMD64_
+// Returns true if this absolute address fits within the base of an addr mode.
+// On Amd64 this effectively means, whether an absolute indirect address can
+// be encoded as 32-bit offset relative to IP or zero.
+bool GenTreeIntConCommon::FitsInAddrBase(Compiler* comp)
+{
+#ifndef LEGACY_BACKEND
+#ifdef DEBUG
+ // Early out if PC-rel encoding of absolute addr is disabled.
+ if (!comp->opts.compEnablePCRelAddr)
+ {
+ return false;
+ }
+#endif
+#endif //! LEGACY_BACKEND
+
+ if (comp->opts.compReloc)
+ {
+ // During Ngen JIT is always asked to generate relocatable code.
+ // Hence JIT will try to encode only icon handles as pc-relative offsets.
+ return IsIconHandle() && (IMAGE_REL_BASED_REL32 == comp->eeGetRelocTypeHint((void*)IconValue()));
+ }
+ else
+ {
+ // During Jitting, we are allowed to generate non-relocatable code.
+ // On Amd64 we can encode an absolute indirect addr as an offset relative to zero or RIP.
+ // An absolute indir addr that can fit within 32-bits can ben encoded as an offset relative
+ // to zero. All other absolute indir addr could be attempted to be encoded as RIP relative
+ // based on reloc hint provided by VM. RIP relative encoding is preferred over relative
+ // to zero, because the former is one byte smaller than the latter. For this reason
+ // we check for reloc hint first and then whether addr fits in 32-bits next.
+ //
+ // VM starts off with an initial state to allow both data and code address to be encoded as
+ // pc-relative offsets. Hence JIT will attempt to encode all absolute addresses as pc-relative
+ // offsets. It is possible while jitting a method, an address could not be encoded as a
+ // pc-relative offset. In that case VM will note the overflow and will trigger re-jitting
+ // of the method with reloc hints turned off for all future methods. Second time around
+ // jitting will succeed since JIT will not attempt to encode data addresses as pc-relative
+ // offsets. Note that JIT will always attempt to relocate code addresses (.e.g call addr).
+ // After an overflow, VM will assume any relocation recorded is for a code address and will
+ // emit jump thunk if it cannot be encoded as pc-relative offset.
+ return (IMAGE_REL_BASED_REL32 == comp->eeGetRelocTypeHint((void*)IconValue())) || FitsInI32();
+ }
+}
+
+// Returns true if this icon value is encoded as addr needs recording a relocation with VM
+bool GenTreeIntConCommon::AddrNeedsReloc(Compiler* comp)
+{
+ if (comp->opts.compReloc)
+ {
+ // During Ngen JIT is always asked to generate relocatable code.
+ // Hence JIT will try to encode only icon handles as pc-relative offsets.
+ return IsIconHandle() && (IMAGE_REL_BASED_REL32 == comp->eeGetRelocTypeHint((void*)IconValue()));
+ }
+ else
+ {
+ return IMAGE_REL_BASED_REL32 == comp->eeGetRelocTypeHint((void*)IconValue());
+ }
+}
+
+#elif defined(_TARGET_X86_)
+// Returns true if this absolute address fits within the base of an addr mode.
+// On x86 all addresses are 4-bytes and can be directly encoded in an addr mode.
+bool GenTreeIntConCommon::FitsInAddrBase(Compiler* comp)
+{
+#ifndef LEGACY_BACKEND
+#ifdef DEBUG
+ // Early out if PC-rel encoding of absolute addr is disabled.
+ if (!comp->opts.compEnablePCRelAddr)
+ {
+ return false;
+ }
+#endif
+#endif //! LEGACY_BACKEND
+
+ // TODO-x86 - TLS field handles are excluded for now as they are accessed relative to FS segment.
+ // Handling of TLS field handles is a NYI and this needs to be relooked after implementing it.
+ return IsCnsIntOrI() && !IsIconHandle(GTF_ICON_TLS_HDL);
+}
+
+// Returns true if this icon value is encoded as addr needs recording a relocation with VM
+bool GenTreeIntConCommon::AddrNeedsReloc(Compiler* comp)
+{
+ // If generating relocatable code, icons should be reported for recording relocatons.
+ return comp->opts.compReloc && IsIconHandle();
+}
+#endif //_TARGET_X86_
+
+bool GenTree::IsFieldAddr(Compiler* comp, GenTreePtr* pObj, GenTreePtr* pStatic, FieldSeqNode** pFldSeq)
+{
+ FieldSeqNode* newFldSeq = nullptr;
+ GenTreePtr baseAddr = nullptr;
+ bool mustBeStatic = false;
+
+ FieldSeqNode* statStructFldSeq = nullptr;
+ if (TypeGet() == TYP_REF)
+ {
+ // Recognize struct static field patterns...
+ if (OperGet() == GT_IND)
+ {
+ GenTreePtr addr = gtOp.gtOp1;
+ GenTreeIntCon* icon = nullptr;
+ if (addr->OperGet() == GT_CNS_INT)
+ {
+ icon = addr->AsIntCon();
+ }
+ else if (addr->OperGet() == GT_ADD)
+ {
+ // op1 should never be a field sequence (or any other kind of handle)
+ assert((addr->gtOp.gtOp1->gtOper != GT_CNS_INT) || !addr->gtOp.gtOp1->IsIconHandle());
+ if (addr->gtOp.gtOp2->OperGet() == GT_CNS_INT)
+ {
+ icon = addr->gtOp.gtOp2->AsIntCon();
+ }
+ }
+ if (icon != nullptr && !icon->IsIconHandle(GTF_ICON_STR_HDL) // String handles are a source of TYP_REFs.
+ && icon->gtFieldSeq != nullptr &&
+ icon->gtFieldSeq->m_next == nullptr // A static field should be a singleton
+ // TODO-Review: A pseudoField here indicates an issue - this requires investigation
+ // See test case src\ddsuites\src\clr\x86\CoreMangLib\Dev\Globalization\CalendarRegressions.exe
+ && !(FieldSeqStore::IsPseudoField(icon->gtFieldSeq->m_fieldHnd)) &&
+ icon->gtFieldSeq != FieldSeqStore::NotAField()) // Ignore non-fields.
+ {
+ statStructFldSeq = icon->gtFieldSeq;
+ }
+ else
+ {
+ addr = addr->gtEffectiveVal();
+
+ // Perhaps it's a direct indirection of a helper call or a cse with a zero offset annotation.
+ if ((addr->OperGet() == GT_CALL) || (addr->OperGet() == GT_LCL_VAR))
+ {
+ FieldSeqNode* zeroFieldSeq = nullptr;
+ if (comp->GetZeroOffsetFieldMap()->Lookup(addr, &zeroFieldSeq))
+ {
+ if (zeroFieldSeq->m_next == nullptr)
+ {
+ statStructFldSeq = zeroFieldSeq;
+ }
+ }
+ }
+ }
+ }
+ else if (OperGet() == GT_CLS_VAR)
+ {
+ GenTreeClsVar* clsVar = AsClsVar();
+ if (clsVar->gtFieldSeq != nullptr && clsVar->gtFieldSeq->m_next == nullptr)
+ {
+ statStructFldSeq = clsVar->gtFieldSeq;
+ }
+ }
+ else if (OperIsLocal())
+ {
+ // If we have a GT_LCL_VAR, it can be result of a CSE substitution
+ // If it is then the CSE assignment will have a ValueNum that
+ // describes the RHS of the CSE assignment.
+ //
+ // The CSE could be a pointer to a boxed struct
+ //
+ GenTreeLclVarCommon* lclVar = AsLclVarCommon();
+ ValueNum vn = gtVNPair.GetLiberal();
+ if (vn != ValueNumStore::NoVN)
+ {
+ // Is the ValueNum a MapSelect involving a SharedStatic helper?
+ VNFuncApp funcApp1;
+ if (comp->vnStore->GetVNFunc(vn, &funcApp1) && (funcApp1.m_func == VNF_MapSelect) &&
+ (comp->vnStore->IsSharedStatic(funcApp1.m_args[1])))
+ {
+ ValueNum mapVN = funcApp1.m_args[0];
+ // Is this new 'mapVN' ValueNum, a MapSelect involving a handle?
+ VNFuncApp funcApp2;
+ if (comp->vnStore->GetVNFunc(mapVN, &funcApp2) && (funcApp2.m_func == VNF_MapSelect) &&
+ (comp->vnStore->IsVNHandle(funcApp2.m_args[1])))
+ {
+ ValueNum fldHndVN = funcApp2.m_args[1];
+ // Is this new 'fldHndVN' VNhandle a FieldHandle?
+ unsigned flags = comp->vnStore->GetHandleFlags(fldHndVN);
+ if (flags == GTF_ICON_FIELD_HDL)
+ {
+ CORINFO_FIELD_HANDLE fieldHnd =
+ CORINFO_FIELD_HANDLE(comp->vnStore->ConstantValue<ssize_t>(fldHndVN));
+
+ // Record this field sequence in 'statStructFldSeq' as it is likely to be a Boxed Struct
+ // field access.
+ statStructFldSeq = comp->GetFieldSeqStore()->CreateSingleton(fieldHnd);
+ }
+ }
+ }
+ }
+ }
+
+ if (statStructFldSeq != nullptr)
+ {
+ assert(statStructFldSeq->m_next == nullptr);
+ // Is this a pointer to a boxed struct?
+ if (comp->gtIsStaticFieldPtrToBoxedStruct(TYP_REF, statStructFldSeq->m_fieldHnd))
+ {
+ *pFldSeq = comp->GetFieldSeqStore()->Append(statStructFldSeq, *pFldSeq);
+ *pObj = nullptr;
+ *pStatic = this;
+ return true;
+ }
+ }
+
+ // Otherwise...
+ *pObj = this;
+ *pStatic = nullptr;
+ return true;
+ }
+ else if (OperGet() == GT_ADD)
+ {
+ // op1 should never be a field sequence (or any other kind of handle)
+ assert((gtOp.gtOp1->gtOper != GT_CNS_INT) || !gtOp.gtOp1->IsIconHandle());
+ if (gtOp.gtOp2->OperGet() == GT_CNS_INT)
+ {
+ newFldSeq = gtOp.gtOp2->AsIntCon()->gtFieldSeq;
+ baseAddr = gtOp.gtOp1;
+ }
+ }
+ else
+ {
+ // Check if "this" has a zero-offset annotation.
+ if (!comp->GetZeroOffsetFieldMap()->Lookup(this, &newFldSeq))
+ {
+ // If not, this is not a field address.
+ return false;
+ }
+ else
+ {
+ baseAddr = this;
+ mustBeStatic = true;
+ }
+ }
+
+ // If not we don't have a field seq, it's not a field address.
+ if (newFldSeq == nullptr || newFldSeq == FieldSeqStore::NotAField())
+ {
+ return false;
+ }
+
+ // Prepend this field to whatever we've already accumulated (outside-in).
+ *pFldSeq = comp->GetFieldSeqStore()->Append(newFldSeq, *pFldSeq);
+
+ // Is it a static or instance field?
+ if (!FieldSeqStore::IsPseudoField(newFldSeq->m_fieldHnd) &&
+ comp->info.compCompHnd->isFieldStatic(newFldSeq->m_fieldHnd))
+ {
+ // It is a static field. We're done.
+ *pObj = nullptr;
+ *pStatic = baseAddr;
+ return true;
+ }
+ else if ((baseAddr != nullptr) && !mustBeStatic)
+ {
+ // It's an instance field...but it must be for a struct field, since we've not yet encountered
+ // a "TYP_REF" address. Analyze the reset of the address.
+ return baseAddr->gtEffectiveVal()->IsFieldAddr(comp, pObj, pStatic, pFldSeq);
+ }
+
+ // Otherwise...
+ return false;
+}
+
+bool Compiler::gtIsStaticFieldPtrToBoxedStruct(var_types fieldNodeType, CORINFO_FIELD_HANDLE fldHnd)
+{
+ if (fieldNodeType != TYP_REF)
+ {
+ return false;
+ }
+ CORINFO_CLASS_HANDLE fldCls = nullptr;
+ noway_assert(fldHnd != nullptr);
+ CorInfoType cit = info.compCompHnd->getFieldType(fldHnd, &fldCls);
+ var_types fieldTyp = JITtype2varType(cit);
+ return fieldTyp != TYP_REF;
+}
+
+CORINFO_CLASS_HANDLE Compiler::gtGetStructHandleIfPresent(GenTree* tree)
+{
+ CORINFO_CLASS_HANDLE structHnd = NO_CLASS_HANDLE;
+ tree = tree->gtEffectiveVal();
+ if (varTypeIsStruct(tree->gtType))
+ {
+ switch (tree->gtOper)
+ {
+ default:
+ break;
+ case GT_MKREFANY:
+ structHnd = impGetRefAnyClass();
+ break;
+ case GT_OBJ:
+ structHnd = tree->gtObj.gtClass;
+ break;
+ case GT_CALL:
+ structHnd = tree->gtCall.gtRetClsHnd;
+ break;
+ case GT_RET_EXPR:
+ structHnd = tree->gtRetExpr.gtRetClsHnd;
+ break;
+ case GT_ARGPLACE:
+ structHnd = tree->gtArgPlace.gtArgPlaceClsHnd;
+ break;
+ case GT_INDEX:
+ structHnd = tree->gtIndex.gtStructElemClass;
+ break;
+ case GT_FIELD:
+ info.compCompHnd->getFieldType(tree->gtField.gtFldHnd, &structHnd);
+ break;
+ case GT_ASG:
+ structHnd = gtGetStructHandleIfPresent(tree->gtGetOp1());
+ break;
+ case GT_LCL_VAR:
+ case GT_LCL_FLD:
+ structHnd = lvaTable[tree->AsLclVarCommon()->gtLclNum].lvVerTypeInfo.GetClassHandle();
+ break;
+ case GT_RETURN:
+ structHnd = gtGetStructHandleIfPresent(tree->gtOp.gtOp1);
+ break;
+ case GT_IND:
+#ifdef FEATURE_SIMD
+ if (varTypeIsSIMD(tree))
+ {
+ structHnd = gtGetStructHandleForSIMD(tree->gtType, TYP_FLOAT);
+ }
+ else
+#endif
+ if (tree->gtFlags & GTF_IND_ARR_INDEX)
+ {
+ ArrayInfo arrInfo;
+ bool b = GetArrayInfoMap()->Lookup(tree, &arrInfo);
+ assert(b);
+ structHnd = EncodeElemType(arrInfo.m_elemType, arrInfo.m_elemStructType);
+ }
+ break;
+#ifdef FEATURE_SIMD
+ case GT_SIMD:
+ structHnd = gtGetStructHandleForSIMD(tree->gtType, tree->AsSIMD()->gtSIMDBaseType);
+#endif // FEATURE_SIMD
+ break;
+ }
+ }
+ return structHnd;
+}
+
+CORINFO_CLASS_HANDLE Compiler::gtGetStructHandle(GenTree* tree)
+{
+ CORINFO_CLASS_HANDLE structHnd = gtGetStructHandleIfPresent(tree);
+ assert(structHnd != NO_CLASS_HANDLE);
+ return structHnd;
+}
+
+void GenTree::ParseArrayAddress(
+ Compiler* comp, ArrayInfo* arrayInfo, GenTreePtr* pArr, ValueNum* pInxVN, FieldSeqNode** pFldSeq)
+{
+ *pArr = nullptr;
+ ValueNum inxVN = ValueNumStore::NoVN;
+ ssize_t offset = 0;
+ FieldSeqNode* fldSeq = nullptr;
+
+ ParseArrayAddressWork(comp, 1, pArr, &inxVN, &offset, &fldSeq);
+
+ // If we didn't find an array reference (perhaps it is the constant null?) we will give up.
+ if (*pArr == nullptr)
+ {
+ return;
+ }
+
+ // OK, new we have to figure out if any part of the "offset" is a constant contribution to the index.
+ // First, sum the offsets of any fields in fldSeq.
+ unsigned fieldOffsets = 0;
+ FieldSeqNode* fldSeqIter = fldSeq;
+ // Also, find the first non-pseudo field...
+ assert(*pFldSeq == nullptr);
+ while (fldSeqIter != nullptr)
+ {
+ if (fldSeqIter == FieldSeqStore::NotAField())
+ {
+ // TODO-Review: A NotAField here indicates a failure to properly maintain the field sequence
+ // See test case self_host_tests_x86\jit\regression\CLR-x86-JIT\v1-m12-beta2\ b70992\ b70992.exe
+ // Safest thing to do here is to drop back to MinOpts
+ noway_assert(!"fldSeqIter is NotAField() in ParseArrayAddress");
+ }
+
+ if (!FieldSeqStore::IsPseudoField(fldSeqIter->m_fieldHnd))
+ {
+ if (*pFldSeq == nullptr)
+ {
+ *pFldSeq = fldSeqIter;
+ }
+ CORINFO_CLASS_HANDLE fldCls = nullptr;
+ noway_assert(fldSeqIter->m_fieldHnd != nullptr);
+ CorInfoType cit = comp->info.compCompHnd->getFieldType(fldSeqIter->m_fieldHnd, &fldCls);
+ fieldOffsets += comp->compGetTypeSize(cit, fldCls);
+ }
+ fldSeqIter = fldSeqIter->m_next;
+ }
+
+ // Is there some portion of the "offset" beyond the first-elem offset and the struct field suffix we just computed?
+ if (!FitsIn<ssize_t>(fieldOffsets + arrayInfo->m_elemOffset) || !FitsIn<ssize_t>(arrayInfo->m_elemSize))
+ {
+ // This seems unlikely, but no harm in being safe...
+ *pInxVN = comp->GetValueNumStore()->VNForExpr(nullptr, TYP_INT);
+ return;
+ }
+ // Otherwise...
+ ssize_t offsetAccountedFor = static_cast<ssize_t>(fieldOffsets + arrayInfo->m_elemOffset);
+ ssize_t elemSize = static_cast<ssize_t>(arrayInfo->m_elemSize);
+
+ ssize_t constIndOffset = offset - offsetAccountedFor;
+ // This should be divisible by the element size...
+ assert((constIndOffset % elemSize) == 0);
+ ssize_t constInd = constIndOffset / elemSize;
+
+ ValueNumStore* vnStore = comp->GetValueNumStore();
+
+ if (inxVN == ValueNumStore::NoVN)
+ {
+ // Must be a constant index.
+ *pInxVN = vnStore->VNForPtrSizeIntCon(constInd);
+ }
+ else
+ {
+ //
+ // Perform ((inxVN / elemSizeVN) + vnForConstInd)
+ //
+
+ // The value associated with the index value number (inxVN) is the offset into the array,
+ // which has been scaled by element size. We need to recover the array index from that offset
+ if (vnStore->IsVNConstant(inxVN))
+ {
+ ssize_t index = vnStore->CoercedConstantValue<ssize_t>(inxVN);
+ noway_assert(elemSize > 0 && ((index % elemSize) == 0));
+ *pInxVN = vnStore->VNForPtrSizeIntCon((index / elemSize) + constInd);
+ }
+ else
+ {
+ bool canFoldDiv = false;
+
+ // If the index VN is a MUL by elemSize, see if we can eliminate it instead of adding
+ // the division by elemSize.
+ VNFuncApp funcApp;
+ if (vnStore->GetVNFunc(inxVN, &funcApp) && funcApp.m_func == (VNFunc)GT_MUL)
+ {
+ ValueNum vnForElemSize = vnStore->VNForLongCon(elemSize);
+
+ // One of the multiply operand is elemSize, so the resulting
+ // index VN should simply be the other operand.
+ if (funcApp.m_args[1] == vnForElemSize)
+ {
+ *pInxVN = funcApp.m_args[0];
+ canFoldDiv = true;
+ }
+ else if (funcApp.m_args[0] == vnForElemSize)
+ {
+ *pInxVN = funcApp.m_args[1];
+ canFoldDiv = true;
+ }
+ }
+
+ // Perform ((inxVN / elemSizeVN) + vnForConstInd)
+ if (!canFoldDiv)
+ {
+ ValueNum vnForElemSize = vnStore->VNForPtrSizeIntCon(elemSize);
+ ValueNum vnForScaledInx =
+ vnStore->VNForFunc(TYP_I_IMPL, GetVNFuncForOper(GT_DIV, false), inxVN, vnForElemSize);
+ *pInxVN = vnForScaledInx;
+ }
+
+ if (constInd != 0)
+ {
+ ValueNum vnForConstInd = comp->GetValueNumStore()->VNForPtrSizeIntCon(constInd);
+ *pInxVN = comp->GetValueNumStore()->VNForFunc(TYP_I_IMPL,
+ GetVNFuncForOper(GT_ADD, (gtFlags & GTF_UNSIGNED) != 0),
+ *pInxVN, vnForConstInd);
+ }
+ }
+ }
+}
+
+void GenTree::ParseArrayAddressWork(
+ Compiler* comp, ssize_t inputMul, GenTreePtr* pArr, ValueNum* pInxVN, ssize_t* pOffset, FieldSeqNode** pFldSeq)
+{
+ if (TypeGet() == TYP_REF)
+ {
+ // This must be the array pointer.
+ *pArr = this;
+ assert(inputMul == 1); // Can't multiply the array pointer by anything.
+ }
+ else
+ {
+ switch (OperGet())
+ {
+ case GT_CNS_INT:
+ *pFldSeq = comp->GetFieldSeqStore()->Append(*pFldSeq, gtIntCon.gtFieldSeq);
+ *pOffset += (inputMul * gtIntCon.gtIconVal);
+ return;
+
+ case GT_ADD:
+ case GT_SUB:
+ gtOp.gtOp1->ParseArrayAddressWork(comp, inputMul, pArr, pInxVN, pOffset, pFldSeq);
+ if (OperGet() == GT_SUB)
+ {
+ inputMul = -inputMul;
+ }
+ gtOp.gtOp2->ParseArrayAddressWork(comp, inputMul, pArr, pInxVN, pOffset, pFldSeq);
+ return;
+
+ case GT_MUL:
+ {
+ // If one op is a constant, continue parsing down.
+ ssize_t subMul = 0;
+ GenTreePtr nonConst = nullptr;
+ if (gtOp.gtOp1->IsCnsIntOrI())
+ {
+ // If the other arg is an int constant, and is a "not-a-field", choose
+ // that as the multiplier, thus preserving constant index offsets...
+ if (gtOp.gtOp2->OperGet() == GT_CNS_INT &&
+ gtOp.gtOp2->gtIntCon.gtFieldSeq == FieldSeqStore::NotAField())
+ {
+ subMul = gtOp.gtOp2->gtIntConCommon.IconValue();
+ nonConst = gtOp.gtOp1;
+ }
+ else
+ {
+ subMul = gtOp.gtOp1->gtIntConCommon.IconValue();
+ nonConst = gtOp.gtOp2;
+ }
+ }
+ else if (gtOp.gtOp2->IsCnsIntOrI())
+ {
+ subMul = gtOp.gtOp2->gtIntConCommon.IconValue();
+ nonConst = gtOp.gtOp1;
+ }
+ if (nonConst != nullptr)
+ {
+ nonConst->ParseArrayAddressWork(comp, inputMul * subMul, pArr, pInxVN, pOffset, pFldSeq);
+ return;
+ }
+ // Otherwise, exit the switch, treat as a contribution to the index.
+ }
+ break;
+
+ case GT_LSH:
+ // If one op is a constant, continue parsing down.
+ if (gtOp.gtOp2->IsCnsIntOrI())
+ {
+ ssize_t subMul = 1 << gtOp.gtOp2->gtIntConCommon.IconValue();
+ gtOp.gtOp1->ParseArrayAddressWork(comp, inputMul * subMul, pArr, pInxVN, pOffset, pFldSeq);
+ return;
+ }
+ // Otherwise, exit the switch, treat as a contribution to the index.
+ break;
+
+ case GT_COMMA:
+ // We don't care about exceptions for this purpose.
+ if ((gtOp.gtOp1->OperGet() == GT_ARR_BOUNDS_CHECK) || gtOp.gtOp1->IsNothingNode())
+ {
+ gtOp.gtOp2->ParseArrayAddressWork(comp, inputMul, pArr, pInxVN, pOffset, pFldSeq);
+ return;
+ }
+ break;
+
+ default:
+ break;
+ }
+ // If we didn't return above, must be a constribution to the non-constant part of the index VN.
+ ValueNum vn = comp->GetValueNumStore()->VNNormVal(gtVNPair.GetLiberal()); // We don't care about exceptions for
+ // this purpose.
+ if (inputMul != 1)
+ {
+ ValueNum mulVN = comp->GetValueNumStore()->VNForLongCon(inputMul);
+ vn = comp->GetValueNumStore()->VNForFunc(TypeGet(), GetVNFuncForOper(GT_MUL, false), mulVN, vn);
+ }
+ if (*pInxVN == ValueNumStore::NoVN)
+ {
+ *pInxVN = vn;
+ }
+ else
+ {
+ *pInxVN = comp->GetValueNumStore()->VNForFunc(TypeGet(), GetVNFuncForOper(GT_ADD, false), *pInxVN, vn);
+ }
+ }
+}
+
+bool GenTree::ParseArrayElemForm(Compiler* comp, ArrayInfo* arrayInfo, FieldSeqNode** pFldSeq)
+{
+ if (OperIsIndir())
+ {
+ if (gtFlags & GTF_IND_ARR_INDEX)
+ {
+ bool b = comp->GetArrayInfoMap()->Lookup(this, arrayInfo);
+ assert(b);
+ return true;
+ }
+
+ // Otherwise...
+ GenTreePtr addr = AsIndir()->Addr();
+ return addr->ParseArrayElemAddrForm(comp, arrayInfo, pFldSeq);
+ }
+ else
+ {
+ return false;
+ }
+}
+
+bool GenTree::ParseArrayElemAddrForm(Compiler* comp, ArrayInfo* arrayInfo, FieldSeqNode** pFldSeq)
+{
+ switch (OperGet())
+ {
+ case GT_ADD:
+ {
+ GenTreePtr arrAddr = nullptr;
+ GenTreePtr offset = nullptr;
+ if (gtOp.gtOp1->TypeGet() == TYP_BYREF)
+ {
+ arrAddr = gtOp.gtOp1;
+ offset = gtOp.gtOp2;
+ }
+ else if (gtOp.gtOp2->TypeGet() == TYP_BYREF)
+ {
+ arrAddr = gtOp.gtOp2;
+ offset = gtOp.gtOp1;
+ }
+ else
+ {
+ return false;
+ }
+ if (!offset->ParseOffsetForm(comp, pFldSeq))
+ {
+ return false;
+ }
+ return arrAddr->ParseArrayElemAddrForm(comp, arrayInfo, pFldSeq);
+ }
+
+ case GT_ADDR:
+ {
+ GenTreePtr addrArg = gtOp.gtOp1;
+ if (addrArg->OperGet() != GT_IND)
+ {
+ return false;
+ }
+ else
+ {
+ // The "Addr" node might be annotated with a zero-offset field sequence.
+ FieldSeqNode* zeroOffsetFldSeq = nullptr;
+ if (comp->GetZeroOffsetFieldMap()->Lookup(this, &zeroOffsetFldSeq))
+ {
+ *pFldSeq = comp->GetFieldSeqStore()->Append(*pFldSeq, zeroOffsetFldSeq);
+ }
+ return addrArg->ParseArrayElemForm(comp, arrayInfo, pFldSeq);
+ }
+ }
+
+ default:
+ return false;
+ }
+}
+
+bool GenTree::ParseOffsetForm(Compiler* comp, FieldSeqNode** pFldSeq)
+{
+ switch (OperGet())
+ {
+ case GT_CNS_INT:
+ {
+ GenTreeIntCon* icon = AsIntCon();
+ *pFldSeq = comp->GetFieldSeqStore()->Append(*pFldSeq, icon->gtFieldSeq);
+ return true;
+ }
+
+ case GT_ADD:
+ if (!gtOp.gtOp1->ParseOffsetForm(comp, pFldSeq))
+ {
+ return false;
+ }
+ return gtOp.gtOp2->ParseOffsetForm(comp, pFldSeq);
+
+ default:
+ return false;
+ }
+}
+
+void GenTree::LabelIndex(Compiler* comp, bool isConst)
+{
+ switch (OperGet())
+ {
+ case GT_CNS_INT:
+ // If we got here, this is a contribution to the constant part of the index.
+ if (isConst)
+ {
+ gtIntCon.gtFieldSeq =
+ comp->GetFieldSeqStore()->CreateSingleton(FieldSeqStore::ConstantIndexPseudoField);
+ }
+ return;
+
+ case GT_LCL_VAR:
+ gtFlags |= GTF_VAR_ARR_INDEX;
+ return;
+
+ case GT_ADD:
+ case GT_SUB:
+ gtOp.gtOp1->LabelIndex(comp, isConst);
+ gtOp.gtOp2->LabelIndex(comp, isConst);
+ break;
+
+ case GT_CAST:
+ gtOp.gtOp1->LabelIndex(comp, isConst);
+ break;
+
+ case GT_ARR_LENGTH:
+ gtFlags |= GTF_ARRLEN_ARR_IDX;
+ return;
+
+ default:
+ // For all other operators, peel off one constant; and then label the other if it's also a constant.
+ if (OperIsArithmetic() || OperIsCompare())
+ {
+ if (gtOp.gtOp2->OperGet() == GT_CNS_INT)
+ {
+ gtOp.gtOp1->LabelIndex(comp, isConst);
+ break;
+ }
+ else if (gtOp.gtOp1->OperGet() == GT_CNS_INT)
+ {
+ gtOp.gtOp2->LabelIndex(comp, isConst);
+ break;
+ }
+ // Otherwise continue downward on both, labeling vars.
+ gtOp.gtOp1->LabelIndex(comp, false);
+ gtOp.gtOp2->LabelIndex(comp, false);
+ }
+ break;
+ }
+}
+
+// Note that the value of the below field doesn't matter; it exists only to provide a distinguished address.
+//
+// static
+FieldSeqNode FieldSeqStore::s_notAField(nullptr, nullptr);
+
+// FieldSeqStore methods.
+FieldSeqStore::FieldSeqStore(IAllocator* alloc) : m_alloc(alloc), m_canonMap(new (alloc) FieldSeqNodeCanonMap(alloc))
+{
+}
+
+FieldSeqNode* FieldSeqStore::CreateSingleton(CORINFO_FIELD_HANDLE fieldHnd)
+{
+ FieldSeqNode fsn(fieldHnd, nullptr);
+ FieldSeqNode* res = nullptr;
+ if (m_canonMap->Lookup(fsn, &res))
+ {
+ return res;
+ }
+ else
+ {
+ res = reinterpret_cast<FieldSeqNode*>(m_alloc->Alloc(sizeof(FieldSeqNode)));
+ *res = fsn;
+ m_canonMap->Set(fsn, res);
+ return res;
+ }
+}
+
+FieldSeqNode* FieldSeqStore::Append(FieldSeqNode* a, FieldSeqNode* b)
+{
+ if (a == nullptr)
+ {
+ return b;
+ }
+ else if (a == NotAField())
+ {
+ return NotAField();
+ }
+ else if (b == nullptr)
+ {
+ return a;
+ }
+ else if (b == NotAField())
+ {
+ return NotAField();
+ // Extremely special case for ConstantIndex pseudo-fields -- appending consecutive such
+ // together collapse to one.
+ }
+ else if (a->m_next == nullptr && a->m_fieldHnd == ConstantIndexPseudoField &&
+ b->m_fieldHnd == ConstantIndexPseudoField)
+ {
+ return b;
+ }
+ else
+ {
+ FieldSeqNode* tmp = Append(a->m_next, b);
+ FieldSeqNode fsn(a->m_fieldHnd, tmp);
+ FieldSeqNode* res = nullptr;
+ if (m_canonMap->Lookup(fsn, &res))
+ {
+ return res;
+ }
+ else
+ {
+ res = reinterpret_cast<FieldSeqNode*>(m_alloc->Alloc(sizeof(FieldSeqNode)));
+ *res = fsn;
+ m_canonMap->Set(fsn, res);
+ return res;
+ }
+ }
+}
+
+// Static vars.
+int FieldSeqStore::FirstElemPseudoFieldStruct;
+int FieldSeqStore::ConstantIndexPseudoFieldStruct;
+
+CORINFO_FIELD_HANDLE FieldSeqStore::FirstElemPseudoField =
+ (CORINFO_FIELD_HANDLE)&FieldSeqStore::FirstElemPseudoFieldStruct;
+CORINFO_FIELD_HANDLE FieldSeqStore::ConstantIndexPseudoField =
+ (CORINFO_FIELD_HANDLE)&FieldSeqStore::ConstantIndexPseudoFieldStruct;
+
+bool FieldSeqNode::IsFirstElemFieldSeq()
+{
+ // this must be non-null per ISO C++
+ return m_fieldHnd == FieldSeqStore::FirstElemPseudoField;
+}
+
+bool FieldSeqNode::IsConstantIndexFieldSeq()
+{
+ // this must be non-null per ISO C++
+ return m_fieldHnd == FieldSeqStore::ConstantIndexPseudoField;
+}
+
+bool FieldSeqNode::IsPseudoField()
+{
+ if (this == nullptr)
+ {
+ return false;
+ }
+ return m_fieldHnd == FieldSeqStore::FirstElemPseudoField || m_fieldHnd == FieldSeqStore::ConstantIndexPseudoField;
+}
+
+#ifdef FEATURE_SIMD
+GenTreeSIMD* Compiler::gtNewSIMDNode(
+ var_types type, GenTreePtr op1, SIMDIntrinsicID simdIntrinsicID, var_types baseType, unsigned size)
+{
+ // TODO-CQ: An operand may be a GT_OBJ(GT_ADDR(GT_LCL_VAR))), in which case it should be
+ // marked lvUsedInSIMDIntrinsic.
+ assert(op1 != nullptr);
+ if (op1->OperGet() == GT_LCL_VAR)
+ {
+ unsigned lclNum = op1->AsLclVarCommon()->GetLclNum();
+ LclVarDsc* lclVarDsc = &lvaTable[lclNum];
+ lclVarDsc->lvUsedInSIMDIntrinsic = true;
+ }
+
+ return new (this, GT_SIMD) GenTreeSIMD(type, op1, simdIntrinsicID, baseType, size);
+}
+
+GenTreeSIMD* Compiler::gtNewSIMDNode(
+ var_types type, GenTreePtr op1, GenTreePtr op2, SIMDIntrinsicID simdIntrinsicID, var_types baseType, unsigned size)
+{
+ // TODO-CQ: An operand may be a GT_OBJ(GT_ADDR(GT_LCL_VAR))), in which case it should be
+ // marked lvUsedInSIMDIntrinsic.
+ assert(op1 != nullptr);
+ if (op1->OperIsLocal())
+ {
+ unsigned lclNum = op1->AsLclVarCommon()->GetLclNum();
+ LclVarDsc* lclVarDsc = &lvaTable[lclNum];
+ lclVarDsc->lvUsedInSIMDIntrinsic = true;
+ }
+
+ if (op2 != nullptr && op2->OperIsLocal())
+ {
+ unsigned lclNum = op2->AsLclVarCommon()->GetLclNum();
+ LclVarDsc* lclVarDsc = &lvaTable[lclNum];
+ lclVarDsc->lvUsedInSIMDIntrinsic = true;
+ }
+
+ return new (this, GT_SIMD) GenTreeSIMD(type, op1, op2, simdIntrinsicID, baseType, size);
+}
+
+bool GenTree::isCommutativeSIMDIntrinsic()
+{
+ assert(gtOper == GT_SIMD);
+ switch (AsSIMD()->gtSIMDIntrinsicID)
+ {
+ case SIMDIntrinsicAdd:
+ case SIMDIntrinsicBitwiseAnd:
+ case SIMDIntrinsicBitwiseOr:
+ case SIMDIntrinsicBitwiseXor:
+ case SIMDIntrinsicEqual:
+ case SIMDIntrinsicMax:
+ case SIMDIntrinsicMin:
+ case SIMDIntrinsicMul:
+ case SIMDIntrinsicOpEquality:
+ case SIMDIntrinsicOpInEquality:
+ return true;
+ default:
+ return false;
+ }
+}
+#endif // FEATURE_SIMD
+
+//---------------------------------------------------------------------------------------
+// GenTreeArgList::Prepend:
+// Prepends an element to a GT_LIST.
+//
+// Arguments:
+// compiler - The compiler context.
+// element - The element to prepend.
+//
+// Returns:
+// The new head of the list.
+GenTreeArgList* GenTreeArgList::Prepend(Compiler* compiler, GenTree* element)
+{
+ GenTreeArgList* head = compiler->gtNewListNode(element, this);
+ head->gtFlags |= (gtFlags & GTF_LIST_AGGREGATE);
+ gtFlags &= ~GTF_LIST_AGGREGATE;
+ return head;
+}
+
+//---------------------------------------------------------------------------------------
+// InitializeStructReturnType:
+// Initialize the Return Type Descriptor for a method that returns a struct type
+//
+// Arguments
+// comp - Compiler Instance
+// retClsHnd - VM handle to the struct type returned by the method
+//
+// Return Value
+// None
+//
+void ReturnTypeDesc::InitializeStructReturnType(Compiler* comp, CORINFO_CLASS_HANDLE retClsHnd)
+{
+ assert(!m_inited);
+
+#if FEATURE_MULTIREG_RET
+
+ assert(retClsHnd != NO_CLASS_HANDLE);
+ unsigned structSize = comp->info.compCompHnd->getClassSize(retClsHnd);
+
+ Compiler::structPassingKind howToReturnStruct;
+ var_types returnType = comp->getReturnTypeForStruct(retClsHnd, &howToReturnStruct, structSize);
+
+ switch (howToReturnStruct)
+ {
+ case Compiler::SPK_PrimitiveType:
+ {
+ assert(returnType != TYP_UNKNOWN);
+ assert(returnType != TYP_STRUCT);
+ m_regType[0] = returnType;
+ break;
+ }
+
+ case Compiler::SPK_ByValueAsHfa:
+ {
+ assert(returnType == TYP_STRUCT);
+ var_types hfaType = comp->GetHfaType(retClsHnd);
+
+ // We should have an hfa struct type
+ assert(varTypeIsFloating(hfaType));
+
+ // Note that the retail build issues a warning about a potential divsion by zero without this Max function
+ unsigned elemSize = Max((unsigned)1, EA_SIZE_IN_BYTES(emitActualTypeSize(hfaType)));
+
+ // The size of this struct should be evenly divisible by elemSize
+ assert((structSize % elemSize) == 0);
+
+ unsigned hfaCount = (structSize / elemSize);
+ for (unsigned i = 0; i < hfaCount; ++i)
+ {
+ m_regType[i] = hfaType;
+ }
+
+ if (comp->compFloatingPointUsed == false)
+ {
+ comp->compFloatingPointUsed = true;
+ }
+ break;
+ }
+
+ case Compiler::SPK_ByValue:
+ {
+ assert(returnType == TYP_STRUCT);
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+ SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc;
+ comp->eeGetSystemVAmd64PassStructInRegisterDescriptor(retClsHnd, &structDesc);
+
+ assert(structDesc.passedInRegisters);
+ for (int i = 0; i < structDesc.eightByteCount; i++)
+ {
+ assert(i < MAX_RET_REG_COUNT);
+ m_regType[i] = comp->GetEightByteType(structDesc, i);
+ }
+
+#elif defined(_TARGET_ARM64_)
+
+ // a non-HFA struct returned using two registers
+ //
+ assert((structSize > TARGET_POINTER_SIZE) && (structSize <= (2 * TARGET_POINTER_SIZE)));
+
+ BYTE gcPtrs[2] = {TYPE_GC_NONE, TYPE_GC_NONE};
+ comp->info.compCompHnd->getClassGClayout(retClsHnd, &gcPtrs[0]);
+ for (unsigned i = 0; i < 2; ++i)
+ {
+ m_regType[i] = comp->getJitGCType(gcPtrs[i]);
+ }
+
+#else // _TARGET_XXX_
+
+ // This target needs support here!
+ //
+ NYI("Unsupported TARGET returning a TYP_STRUCT in InitializeStructReturnType");
+
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+ break; // for case SPK_ByValue
+ }
+
+ case Compiler::SPK_ByReference:
+
+ // We are returning using the return buffer argument
+ // There are no return registers
+ break;
+
+ default:
+
+ unreached(); // By the contract of getReturnTypeForStruct we should never get here.
+
+ } // end of switch (howToReturnStruct)
+
+#endif // FEATURE_MULTIREG_RET
+
+#ifdef DEBUG
+ m_inited = true;
+#endif
+}
+
+//---------------------------------------------------------------------------------------
+// InitializeLongReturnType:
+// Initialize the Return Type Descriptor for a method that returns a TYP_LONG
+//
+// Arguments
+// comp - Compiler Instance
+//
+// Return Value
+// None
+//
+void ReturnTypeDesc::InitializeLongReturnType(Compiler* comp)
+{
+#if defined(_TARGET_X86_)
+
+ // Setups up a ReturnTypeDesc for returning a long using two registers
+ //
+ assert(MAX_RET_REG_COUNT >= 2);
+ m_regType[0] = TYP_INT;
+ m_regType[1] = TYP_INT;
+
+#else // not _TARGET_X86_
+
+ m_regType[0] = TYP_LONG;
+
+#endif // _TARGET_X86_
+
+#ifdef DEBUG
+ m_inited = true;
+#endif
+}
+
+//-------------------------------------------------------------------
+// GetABIReturnReg: Return ith return register as per target ABI
+//
+// Arguments:
+// idx - Index of the return register.
+// The first return register has an index of 0 and so on.
+//
+// Return Value:
+// Returns ith return register as per target ABI.
+//
+// Notes:
+// Right now this is implemented only for x64 Unix
+// and yet to be implemented for other multi-reg return
+// targets (Arm64/Arm32/x86).
+//
+// TODO-ARM: Implement this routine to support HFA returns.
+// TODO-X86: Implement this routine to support long returns.
+regNumber ReturnTypeDesc::GetABIReturnReg(unsigned idx)
+{
+ unsigned count = GetReturnRegCount();
+ assert(idx < count);
+
+ regNumber resultReg = REG_NA;
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ var_types regType0 = GetReturnRegType(0);
+
+ if (idx == 0)
+ {
+ if (varTypeIsIntegralOrI(regType0))
+ {
+ resultReg = REG_INTRET;
+ }
+ else
+ {
+ noway_assert(varTypeIsFloating(regType0));
+ resultReg = REG_FLOATRET;
+ }
+ }
+ else if (idx == 1)
+ {
+ var_types regType1 = GetReturnRegType(1);
+
+ if (varTypeIsIntegralOrI(regType1))
+ {
+ if (varTypeIsIntegralOrI(regType0))
+ {
+ resultReg = REG_INTRET_1;
+ }
+ else
+ {
+ resultReg = REG_INTRET;
+ }
+ }
+ else
+ {
+ noway_assert(varTypeIsFloating(regType1));
+
+ if (varTypeIsFloating(regType0))
+ {
+ resultReg = REG_FLOATRET_1;
+ }
+ else
+ {
+ resultReg = REG_FLOATRET;
+ }
+ }
+ }
+
+#elif defined(_TARGET_X86_)
+
+ if (idx == 0)
+ {
+ resultReg = REG_LNGRET_LO;
+ }
+ else if (idx == 1)
+ {
+ resultReg = REG_LNGRET_HI;
+ }
+
+#elif defined(_TARGET_ARM64_)
+
+ var_types regType = GetReturnRegType(idx);
+ if (varTypeIsIntegralOrI(regType))
+ {
+ noway_assert(idx < 2); // Up to 2 return registers for 16-byte structs
+ resultReg = (idx == 0) ? REG_INTRET : REG_INTRET_1; // X0 or X1
+ }
+ else
+ {
+ noway_assert(idx < 4); // Up to 4 return registers for HFA's
+ resultReg = (regNumber)((unsigned)(REG_FLOATRET) + idx); // V0, V1, V2 or V3
+ }
+
+#endif // TARGET_XXX
+
+ assert(resultReg != REG_NA);
+ return resultReg;
+}
+
+//--------------------------------------------------------------------------------
+// GetABIReturnRegs: get the mask of return registers as per target arch ABI.
+//
+// Arguments:
+// None
+//
+// Return Value:
+// reg mask of return registers in which the return type is returned.
+//
+// Note:
+// For now this is implemented only for x64 Unix and yet to be implemented
+// for other multi-reg return targets (Arm64/Arm32x86).
+//
+// This routine can be used when the caller is not particular about the order
+// of return registers and wants to know the set of return registers.
+//
+// TODO-ARM: Implement this routine to support HFA returns.
+// TODO-ARM64: Implement this routine to support HFA returns.
+// TODO-X86: Implement this routine to support long returns.
+//
+// static
+regMaskTP ReturnTypeDesc::GetABIReturnRegs()
+{
+ regMaskTP resultMask = RBM_NONE;
+
+ unsigned count = GetReturnRegCount();
+ for (unsigned i = 0; i < count; ++i)
+ {
+ resultMask |= genRegMask(GetABIReturnReg(i));
+ }
+
+ return resultMask;
+}
diff --git a/src/jit/gentree.h b/src/jit/gentree.h
new file mode 100644
index 0000000000..4efeeae620
--- /dev/null
+++ b/src/jit/gentree.h
@@ -0,0 +1,5124 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX GenTree XX
+XX XX
+XX This is the node in the semantic tree graph. It represents the operation XX
+XX corresponding to the node, and other information during code-gen. XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+/*****************************************************************************/
+#ifndef _GENTREE_H_
+#define _GENTREE_H_
+/*****************************************************************************/
+
+#include "vartype.h" // For "var_types"
+#include "target.h" // For "regNumber"
+#include "ssaconfig.h" // For "SsaConfig::RESERVED_SSA_NUM"
+#include "reglist.h"
+#include "valuenumtype.h"
+#include "simplerhash.h"
+#include "nodeinfo.h"
+#include "simd.h"
+
+// Debugging GenTree is much easier if we add a magic virtual function to make the debugger able to figure out what type
+// it's got. This is enabled by default in DEBUG. To enable it in RET builds (temporarily!), you need to change the
+// build to define DEBUGGABLE_GENTREE=1, as well as pass /OPT:NOICF to the linker (or else all the vtables get merged,
+// making the debugging value supplied by them useless). See protojit.nativeproj for a commented example of setting the
+// build flags correctly.
+#ifndef DEBUGGABLE_GENTREE
+#ifdef DEBUG
+#define DEBUGGABLE_GENTREE 1
+#else // !DEBUG
+#define DEBUGGABLE_GENTREE 0
+#endif // !DEBUG
+#endif // !DEBUGGABLE_GENTREE
+
+// The SpecialCodeKind enum is used to indicate the type of special (unique)
+// target block that will be targeted by an instruction.
+// These are used by:
+// GenTreeBoundsChk nodes (SCK_RNGCHK_FAIL, SCK_ARG_EXCPN, SCK_ARG_RNG_EXCPN)
+// - these nodes have a field (gtThrowKind) to indicate which kind
+// GenTreeOps nodes, for which codegen will generate the branch
+// - it will use the appropriate kind based on the opcode, though it's not
+// clear why SCK_OVERFLOW == SCK_ARITH_EXCPN
+// SCK_PAUSE_EXEC is not currently used.
+//
+enum SpecialCodeKind
+{
+ SCK_NONE,
+ SCK_RNGCHK_FAIL, // target when range check fails
+ SCK_PAUSE_EXEC, // target to stop (e.g. to allow GC)
+ SCK_DIV_BY_ZERO, // target for divide by zero (Not used on X86/X64)
+ SCK_ARITH_EXCPN, // target on arithmetic exception
+ SCK_OVERFLOW = SCK_ARITH_EXCPN, // target on overflow
+ SCK_ARG_EXCPN, // target on ArgumentException (currently used only for SIMD intrinsics)
+ SCK_ARG_RNG_EXCPN, // target on ArgumentOutOfRangeException (currently used only for SIMD intrinsics)
+ SCK_COUNT
+};
+
+/*****************************************************************************/
+
+DECLARE_TYPED_ENUM(genTreeOps, BYTE)
+{
+#define GTNODE(en, sn, cm, ok) GT_##en,
+#include "gtlist.h"
+
+ GT_COUNT,
+
+#ifdef _TARGET_64BIT_
+ // GT_CNS_NATIVELONG is the gtOper symbol for GT_CNS_LNG or GT_CNS_INT, depending on the target.
+ // For the 64-bit targets we will only use GT_CNS_INT as it used to represent all the possible sizes
+ GT_CNS_NATIVELONG = GT_CNS_INT,
+#else
+ // For the 32-bit targets we use a GT_CNS_LNG to hold a 64-bit integer constant and GT_CNS_INT for all others.
+ // In the future when we retarget the JIT for x86 we should consider eliminating GT_CNS_LNG
+ GT_CNS_NATIVELONG = GT_CNS_LNG,
+#endif
+}
+END_DECLARE_TYPED_ENUM(genTreeOps, BYTE)
+
+/*****************************************************************************
+ *
+ * The following enum defines a set of bit flags that can be used
+ * to classify expression tree nodes. Note that some operators will
+ * have more than one bit set, as follows:
+ *
+ * GTK_CONST implies GTK_LEAF
+ * GTK_RELOP implies GTK_BINOP
+ * GTK_LOGOP implies GTK_BINOP
+ */
+
+enum genTreeKinds
+{
+ GTK_SPECIAL = 0x0000, // unclassified operator (special handling reqd)
+
+ GTK_CONST = 0x0001, // constant operator
+ GTK_LEAF = 0x0002, // leaf operator
+ GTK_UNOP = 0x0004, // unary operator
+ GTK_BINOP = 0x0008, // binary operator
+ GTK_RELOP = 0x0010, // comparison operator
+ GTK_LOGOP = 0x0020, // logical operator
+ GTK_ASGOP = 0x0040, // assignment operator
+
+ GTK_KINDMASK = 0x007F, // operator kind mask
+
+ GTK_COMMUTE = 0x0080, // commutative operator
+
+ GTK_EXOP = 0x0100, // Indicates that an oper for a node type that extends GenTreeOp (or GenTreeUnOp)
+ // by adding non-node fields to unary or binary operator.
+
+ GTK_LOCAL = 0x0200, // is a local access (load, store, phi)
+
+ GTK_NOVALUE = 0x0400, // node does not produce a value
+ GTK_NOTLIR = 0x0800, // node is not allowed in LIR
+
+ /* Define composite value(s) */
+
+ GTK_SMPOP = (GTK_UNOP | GTK_BINOP | GTK_RELOP | GTK_LOGOP)
+};
+
+/*****************************************************************************/
+
+#define SMALL_TREE_NODES 1
+
+/*****************************************************************************/
+
+DECLARE_TYPED_ENUM(gtCallTypes, BYTE)
+{
+ CT_USER_FUNC, // User function
+ CT_HELPER, // Jit-helper
+ CT_INDIRECT, // Indirect call
+
+ CT_COUNT // fake entry (must be last)
+}
+END_DECLARE_TYPED_ENUM(gtCallTypes, BYTE)
+
+/*****************************************************************************/
+
+struct BasicBlock;
+
+struct InlineCandidateInfo;
+
+/*****************************************************************************/
+
+// GT_FIELD nodes will be lowered into more "code-gen-able" representations, like
+// GT_IND's of addresses, or GT_LCL_FLD nodes. We'd like to preserve the more abstract
+// information, and will therefore annotate such lowered nodes with FieldSeq's. A FieldSeq
+// represents a (possibly) empty sequence of fields. The fields are in the order
+// in which they are dereferenced. The first field may be an object field or a struct field;
+// all subsequent fields must be struct fields.
+struct FieldSeqNode
+{
+ CORINFO_FIELD_HANDLE m_fieldHnd;
+ FieldSeqNode* m_next;
+
+ FieldSeqNode(CORINFO_FIELD_HANDLE fieldHnd, FieldSeqNode* next) : m_fieldHnd(fieldHnd), m_next(next)
+ {
+ }
+
+ // returns true when this is the pseudo #FirstElem field sequence
+ bool IsFirstElemFieldSeq();
+
+ // returns true when this is the pseudo #ConstantIndex field sequence
+ bool IsConstantIndexFieldSeq();
+
+ // returns true when this is the the pseudo #FirstElem field sequence or the pseudo #ConstantIndex field sequence
+ bool IsPseudoField();
+
+ // Make sure this provides methods that allow it to be used as a KeyFuncs type in SimplerHash.
+ static int GetHashCode(FieldSeqNode fsn)
+ {
+ return static_cast<int>(reinterpret_cast<intptr_t>(fsn.m_fieldHnd)) ^
+ static_cast<int>(reinterpret_cast<intptr_t>(fsn.m_next));
+ }
+
+ static bool Equals(FieldSeqNode fsn1, FieldSeqNode fsn2)
+ {
+ return fsn1.m_fieldHnd == fsn2.m_fieldHnd && fsn1.m_next == fsn2.m_next;
+ }
+};
+
+// This class canonicalizes field sequences.
+class FieldSeqStore
+{
+ typedef SimplerHashTable<FieldSeqNode, /*KeyFuncs*/ FieldSeqNode, FieldSeqNode*, JitSimplerHashBehavior>
+ FieldSeqNodeCanonMap;
+
+ IAllocator* m_alloc;
+ FieldSeqNodeCanonMap* m_canonMap;
+
+ static FieldSeqNode s_notAField; // No value, just exists to provide an address.
+
+ // Dummy variables to provide the addresses for the "pseudo field handle" statics below.
+ static int FirstElemPseudoFieldStruct;
+ static int ConstantIndexPseudoFieldStruct;
+
+public:
+ FieldSeqStore(IAllocator* alloc);
+
+ // Returns the (canonical in the store) singleton field sequence for the given handle.
+ FieldSeqNode* CreateSingleton(CORINFO_FIELD_HANDLE fieldHnd);
+
+ // This is a special distinguished FieldSeqNode indicating that a constant does *not*
+ // represent a valid field sequence. This is "infectious", in the sense that appending it
+ // (on either side) to any field sequence yields the "NotAField()" sequence.
+ static FieldSeqNode* NotAField()
+ {
+ return &s_notAField;
+ }
+
+ // Returns the (canonical in the store) field sequence representing the concatenation of
+ // the sequences represented by "a" and "b". Assumes that "a" and "b" are canonical; that is,
+ // they are the results of CreateSingleton, NotAField, or Append calls. If either of the arguments
+ // are the "NotAField" value, so is the result.
+ FieldSeqNode* Append(FieldSeqNode* a, FieldSeqNode* b);
+
+ // We have a few "pseudo" field handles:
+
+ // This treats the constant offset of the first element of something as if it were a field.
+ // Works for method table offsets of boxed structs, or first elem offset of arrays/strings.
+ static CORINFO_FIELD_HANDLE FirstElemPseudoField;
+
+ // If there is a constant index, we make a psuedo field to correspond to the constant added to
+ // offset of the indexed field. This keeps the field sequence structure "normalized", especially in the
+ // case where the element type is a struct, so we might add a further struct field offset.
+ static CORINFO_FIELD_HANDLE ConstantIndexPseudoField;
+
+ static bool IsPseudoField(CORINFO_FIELD_HANDLE hnd)
+ {
+ return hnd == FirstElemPseudoField || hnd == ConstantIndexPseudoField;
+ }
+};
+
+class GenTreeUseEdgeIterator;
+class GenTreeOperandIterator;
+
+/*****************************************************************************/
+
+typedef struct GenTree* GenTreePtr;
+struct GenTreeArgList;
+
+// Forward declarations of the subtypes
+#define GTSTRUCT_0(fn, en) struct GenTree##fn;
+#define GTSTRUCT_1(fn, en) struct GenTree##fn;
+#define GTSTRUCT_2(fn, en, en2) struct GenTree##fn;
+#define GTSTRUCT_3(fn, en, en2, en3) struct GenTree##fn;
+#define GTSTRUCT_4(fn, en, en2, en3, en4) struct GenTree##fn;
+#define GTSTRUCT_N(fn, ...) struct GenTree##fn;
+#include "gtstructs.h"
+
+/*****************************************************************************/
+
+#ifndef _HOST_64BIT_
+#include <pshpack4.h>
+#endif
+
+struct GenTree
+{
+// We use GT_STRUCT_0 only for the category of simple ops.
+#define GTSTRUCT_0(fn, en) \
+ GenTree##fn* As##fn() \
+ { \
+ assert(this->OperIsSimple()); \
+ return reinterpret_cast<GenTree##fn*>(this); \
+ } \
+ GenTree##fn& As##fn##Ref() \
+ { \
+ return *As##fn(); \
+ } \
+ __declspec(property(get = As##fn##Ref)) GenTree##fn& gt##fn;
+#define GTSTRUCT_1(fn, en) \
+ GenTree##fn* As##fn() \
+ { \
+ assert(this->gtOper == en); \
+ return reinterpret_cast<GenTree##fn*>(this); \
+ } \
+ GenTree##fn& As##fn##Ref() \
+ { \
+ return *As##fn(); \
+ } \
+ __declspec(property(get = As##fn##Ref)) GenTree##fn& gt##fn;
+#define GTSTRUCT_2(fn, en, en2) \
+ GenTree##fn* As##fn() \
+ { \
+ assert(this->gtOper == en || this->gtOper == en2); \
+ return reinterpret_cast<GenTree##fn*>(this); \
+ } \
+ GenTree##fn& As##fn##Ref() \
+ { \
+ return *As##fn(); \
+ } \
+ __declspec(property(get = As##fn##Ref)) GenTree##fn& gt##fn;
+#define GTSTRUCT_3(fn, en, en2, en3) \
+ GenTree##fn* As##fn() \
+ { \
+ assert(this->gtOper == en || this->gtOper == en2 || this->gtOper == en3); \
+ return reinterpret_cast<GenTree##fn*>(this); \
+ } \
+ GenTree##fn& As##fn##Ref() \
+ { \
+ return *As##fn(); \
+ } \
+ __declspec(property(get = As##fn##Ref)) GenTree##fn& gt##fn;
+
+#define GTSTRUCT_4(fn, en, en2, en3, en4) \
+ GenTree##fn* As##fn() \
+ { \
+ assert(this->gtOper == en || this->gtOper == en2 || this->gtOper == en3 || this->gtOper == en4); \
+ return reinterpret_cast<GenTree##fn*>(this); \
+ } \
+ GenTree##fn& As##fn##Ref() \
+ { \
+ return *As##fn(); \
+ } \
+ __declspec(property(get = As##fn##Ref)) GenTree##fn& gt##fn;
+
+#ifdef DEBUG
+// VC does not optimize out this loop in retail even though the value it computes is unused
+// so we need a separate version for non-debug
+#define GTSTRUCT_N(fn, ...) \
+ GenTree##fn* As##fn() \
+ { \
+ genTreeOps validOps[] = {__VA_ARGS__}; \
+ bool found = false; \
+ for (unsigned i = 0; i < ArrLen(validOps); i++) \
+ { \
+ if (this->gtOper == validOps[i]) \
+ { \
+ found = true; \
+ break; \
+ } \
+ } \
+ assert(found); \
+ return reinterpret_cast<GenTree##fn*>(this); \
+ } \
+ GenTree##fn& As##fn##Ref() \
+ { \
+ return *As##fn(); \
+ } \
+ __declspec(property(get = As##fn##Ref)) GenTree##fn& gt##fn;
+#else
+#define GTSTRUCT_N(fn, ...) \
+ GenTree##fn* As##fn() \
+ { \
+ return reinterpret_cast<GenTree##fn*>(this); \
+ } \
+ GenTree##fn& As##fn##Ref() \
+ { \
+ return *As##fn(); \
+ } \
+ __declspec(property(get = As##fn##Ref)) GenTree##fn& gt##fn;
+#endif
+
+#include "gtstructs.h"
+
+ genTreeOps gtOper; // enum subtype BYTE
+ var_types gtType; // enum subtype BYTE
+
+ genTreeOps OperGet() const
+ {
+ return gtOper;
+ }
+ var_types TypeGet() const
+ {
+ return gtType;
+ }
+
+#ifdef DEBUG
+ genTreeOps gtOperSave; // Only used to save gtOper when we destroy a node, to aid debugging.
+#endif
+
+#if FEATURE_ANYCSE
+
+#define NO_CSE (0)
+
+#define IS_CSE_INDEX(x) (x != 0)
+#define IS_CSE_USE(x) (x > 0)
+#define IS_CSE_DEF(x) (x < 0)
+#define GET_CSE_INDEX(x) ((x > 0) ? x : -x)
+#define TO_CSE_DEF(x) (-x)
+
+ signed char gtCSEnum; // 0 or the CSE index (negated if def)
+ // valid only for CSE expressions
+
+#endif // FEATURE_ANYCSE
+
+ unsigned char gtLIRFlags; // Used for nodes that are in LIR. See LIR::Flags in lir.h for the various flags.
+
+#if ASSERTION_PROP
+ unsigned short gtAssertionNum; // 0 or Assertion table index
+ // valid only for non-GT_STMT nodes
+
+ bool HasAssertion() const
+ {
+ return gtAssertionNum != 0;
+ }
+ void ClearAssertion()
+ {
+ gtAssertionNum = 0;
+ }
+
+ unsigned short GetAssertion() const
+ {
+ return gtAssertionNum;
+ }
+ void SetAssertion(unsigned short value)
+ {
+ assert((unsigned short)value == value);
+ gtAssertionNum = (unsigned short)value;
+ }
+
+#endif
+
+#if FEATURE_STACK_FP_X87
+ unsigned char gtFPlvl; // x87 stack depth at this node
+ void gtCopyFPlvl(GenTree* other)
+ {
+ gtFPlvl = other->gtFPlvl;
+ }
+ void gtSetFPlvl(unsigned level)
+ {
+ noway_assert(FitsIn<unsigned char>(level));
+ gtFPlvl = (unsigned char)level;
+ }
+#else // FEATURE_STACK_FP_X87
+ void gtCopyFPlvl(GenTree* other)
+ {
+ }
+ void gtSetFPlvl(unsigned level)
+ {
+ }
+#endif // FEATURE_STACK_FP_X87
+
+ //
+ // Cost metrics on the node. Don't allow direct access to the variable for setting.
+ //
+
+public:
+#ifdef DEBUG
+ // You are not allowed to read the cost values before they have been set in gtSetEvalOrder().
+ // Keep track of whether the costs have been initialized, and assert if they are read before being initialized.
+ // Obviously, this information does need to be initialized when a node is created.
+ // This is public so the dumpers can see it.
+
+ bool gtCostsInitialized;
+#endif // DEBUG
+
+#define MAX_COST UCHAR_MAX
+#define IND_COST_EX 3 // execution cost for an indirection
+
+ __declspec(property(get = GetCostEx)) unsigned char gtCostEx; // estimate of expression execution cost
+
+ __declspec(property(get = GetCostSz)) unsigned char gtCostSz; // estimate of expression code size cost
+
+ unsigned char GetCostEx() const
+ {
+ assert(gtCostsInitialized);
+ return _gtCostEx;
+ }
+ unsigned char GetCostSz() const
+ {
+ assert(gtCostsInitialized);
+ return _gtCostSz;
+ }
+
+ // Set the costs. They are always both set at the same time.
+ // Don't use the "put" property: force calling this function, to make it more obvious in the few places
+ // that set the values.
+ // Note that costs are only set in gtSetEvalOrder() and its callees.
+ void SetCosts(unsigned costEx, unsigned costSz)
+ {
+ assert(costEx != (unsigned)-1); // looks bogus
+ assert(costSz != (unsigned)-1); // looks bogus
+ INDEBUG(gtCostsInitialized = true;)
+
+ _gtCostEx = (costEx > MAX_COST) ? MAX_COST : (unsigned char)costEx;
+ _gtCostSz = (costSz > MAX_COST) ? MAX_COST : (unsigned char)costSz;
+ }
+
+ // Opimized copy function, to avoid the SetCosts() function comparisons, and make it more clear that a node copy is
+ // happening.
+ void CopyCosts(const GenTree* const tree)
+ {
+ INDEBUG(gtCostsInitialized =
+ tree->gtCostsInitialized;) // If the 'tree' costs aren't initialized, we'll hit an assert below.
+ _gtCostEx = tree->gtCostEx;
+ _gtCostSz = tree->gtCostSz;
+ }
+
+ // Same as CopyCosts, but avoids asserts if the costs we are copying have not been initialized.
+ // This is because the importer, for example, clones nodes, before these costs have been initialized.
+ // Note that we directly access the 'tree' costs, not going through the accessor functions (either
+ // directly or through the properties).
+ void CopyRawCosts(const GenTree* const tree)
+ {
+ INDEBUG(gtCostsInitialized = tree->gtCostsInitialized;)
+ _gtCostEx = tree->_gtCostEx;
+ _gtCostSz = tree->_gtCostSz;
+ }
+
+private:
+ unsigned char _gtCostEx; // estimate of expression execution cost
+ unsigned char _gtCostSz; // estimate of expression code size cost
+
+ //
+ // Register or register pair number of the node.
+ //
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef DEBUG
+public:
+ enum genRegTag
+ {
+ GT_REGTAG_NONE, // Nothing has been assigned to _gtRegNum/_gtRegPair
+ GT_REGTAG_REG, // _gtRegNum has been assigned
+#if CPU_LONG_USES_REGPAIR
+ GT_REGTAG_REGPAIR // _gtRegPair has been assigned
+#endif
+ };
+ genRegTag GetRegTag() const
+ {
+#if CPU_LONG_USES_REGPAIR
+ assert(gtRegTag == GT_REGTAG_NONE || gtRegTag == GT_REGTAG_REG || gtRegTag == GT_REGTAG_REGPAIR);
+#else
+ assert(gtRegTag == GT_REGTAG_NONE || gtRegTag == GT_REGTAG_REG);
+#endif
+ return gtRegTag;
+ }
+
+private:
+ genRegTag gtRegTag; // What is in _gtRegNum/_gtRegPair?
+#endif // DEBUG
+
+private:
+ union {
+ // NOTE: After LSRA, one of these values may be valid even if GTF_REG_VAL is not set in gtFlags.
+ // They store the register assigned to the node. If a register is not assigned, _gtRegNum is set to REG_NA
+ // or _gtRegPair is set to REG_PAIR_NONE, depending on the node type.
+ regNumberSmall _gtRegNum; // which register the value is in
+ regPairNoSmall _gtRegPair; // which register pair the value is in
+ };
+
+public:
+ // The register number is stored in a small format (8 bits), but the getters return and the setters take
+ // a full-size (unsigned) format, to localize the casts here.
+
+ __declspec(property(get = GetRegNum, put = SetRegNum)) regNumber gtRegNum;
+
+ // for codegen purposes, is this node a subnode of its parent
+ bool isContained() const;
+
+ bool isContainedIndir() const;
+
+ bool isIndirAddrMode();
+
+ bool isIndir() const;
+
+ bool isContainedIntOrIImmed() const
+ {
+ return isContained() && IsCnsIntOrI();
+ }
+
+ bool isContainedFltOrDblImmed() const
+ {
+ return isContained() && (OperGet() == GT_CNS_DBL);
+ }
+
+ bool isLclField() const
+ {
+ return OperGet() == GT_LCL_FLD || OperGet() == GT_STORE_LCL_FLD;
+ }
+
+ bool isContainedLclField() const
+ {
+ return isContained() && isLclField();
+ }
+
+ bool isContainedLclVar() const
+ {
+ return isContained() && (OperGet() == GT_LCL_VAR);
+ }
+
+ bool isContainedSpillTemp() const;
+
+ // Indicates whether it is a memory op.
+ // Right now it includes Indir and LclField ops.
+ bool isMemoryOp() const
+ {
+ return isIndir() || isLclField();
+ }
+
+ bool isContainedMemoryOp() const
+ {
+ return (isContained() && isMemoryOp()) || isContainedLclVar() || isContainedSpillTemp();
+ }
+
+ regNumber GetRegNum() const
+ {
+ assert((gtRegTag == GT_REGTAG_REG) || (gtRegTag == GT_REGTAG_NONE)); // TODO-Cleanup: get rid of the NONE case,
+ // and fix everyplace that reads undefined
+ // values
+ regNumber reg = (regNumber)_gtRegNum;
+ assert((gtRegTag == GT_REGTAG_NONE) || // TODO-Cleanup: get rid of the NONE case, and fix everyplace that reads
+ // undefined values
+ (reg >= REG_FIRST && reg <= REG_COUNT));
+ return reg;
+ }
+
+ void SetRegNum(regNumber reg)
+ {
+ assert(reg >= REG_FIRST && reg <= REG_COUNT);
+ // Make sure the upper bits of _gtRegPair are clear
+ _gtRegPair = (regPairNoSmall)0;
+ _gtRegNum = (regNumberSmall)reg;
+ INDEBUG(gtRegTag = GT_REGTAG_REG;)
+ assert(_gtRegNum == reg);
+ }
+
+#if CPU_LONG_USES_REGPAIR
+ __declspec(property(get = GetRegPair, put = SetRegPair)) regPairNo gtRegPair;
+
+ regPairNo GetRegPair() const
+ {
+ assert((gtRegTag == GT_REGTAG_REGPAIR) || (gtRegTag == GT_REGTAG_NONE)); // TODO-Cleanup: get rid of the NONE
+ // case, and fix everyplace that reads
+ // undefined values
+ regPairNo regPair = (regPairNo)_gtRegPair;
+ assert((gtRegTag == GT_REGTAG_NONE) || // TODO-Cleanup: get rid of the NONE case, and fix everyplace that reads
+ // undefined values
+ (regPair >= REG_PAIR_FIRST && regPair <= REG_PAIR_LAST) ||
+ (regPair == REG_PAIR_NONE)); // allow initializing to an undefined value
+ return regPair;
+ }
+
+ void SetRegPair(regPairNo regPair)
+ {
+ assert((regPair >= REG_PAIR_FIRST && regPair <= REG_PAIR_LAST) ||
+ (regPair == REG_PAIR_NONE)); // allow initializing to an undefined value
+ _gtRegPair = (regPairNoSmall)regPair;
+ INDEBUG(gtRegTag = GT_REGTAG_REGPAIR;)
+ assert(_gtRegPair == regPair);
+ }
+#endif
+
+ // Copy the _gtRegNum/_gtRegPair/gtRegTag fields
+ void CopyReg(GenTreePtr from);
+
+ void gtClearReg(Compiler* compiler);
+
+ bool gtHasReg() const;
+
+ regMaskTP gtGetRegMask() const;
+
+ unsigned gtFlags; // see GTF_xxxx below
+
+#if defined(DEBUG)
+ unsigned gtDebugFlags; // see GTF_DEBUG_xxx below
+#endif // defined(DEBUG)
+
+ ValueNumPair gtVNPair;
+
+ regMaskSmall gtRsvdRegs; // set of fixed trashed registers
+#ifdef LEGACY_BACKEND
+ regMaskSmall gtUsedRegs; // set of used (trashed) registers
+#endif // LEGACY_BACKEND
+
+#ifndef LEGACY_BACKEND
+ TreeNodeInfo gtLsraInfo;
+#endif // !LEGACY_BACKEND
+
+ void SetVNsFromNode(GenTreePtr tree)
+ {
+ gtVNPair = tree->gtVNPair;
+ }
+
+ ValueNum GetVN(ValueNumKind vnk) const
+ {
+ if (vnk == VNK_Liberal)
+ {
+ return gtVNPair.GetLiberal();
+ }
+ else
+ {
+ assert(vnk == VNK_Conservative);
+ return gtVNPair.GetConservative();
+ }
+ }
+ void SetVN(ValueNumKind vnk, ValueNum vn)
+ {
+ if (vnk == VNK_Liberal)
+ {
+ return gtVNPair.SetLiberal(vn);
+ }
+ else
+ {
+ assert(vnk == VNK_Conservative);
+ return gtVNPair.SetConservative(vn);
+ }
+ }
+ void SetVNs(ValueNumPair vnp)
+ {
+ gtVNPair = vnp;
+ }
+ void ClearVN()
+ {
+ gtVNPair = ValueNumPair(); // Initializes both elements to "NoVN".
+ }
+
+//---------------------------------------------------------------------
+// The first set of flags can be used with a large set of nodes, and
+// thus they must all have distinct values. That is, one can test any
+// expression node for one of these flags.
+//---------------------------------------------------------------------
+
+#define GTF_ASG 0x00000001 // sub-expression contains an assignment
+#define GTF_CALL 0x00000002 // sub-expression contains a func. call
+#define GTF_EXCEPT 0x00000004 // sub-expression might throw an exception
+#define GTF_GLOB_REF 0x00000008 // sub-expression uses global variable(s)
+#define GTF_ORDER_SIDEEFF 0x00000010 // sub-expression has a re-ordering side effect
+
+// If you set these flags, make sure that code:gtExtractSideEffList knows how to find the tree,
+// otherwise the C# (run csc /o-)
+// var v = side_eff_operation
+// with no use of v will drop your tree on the floor.
+#define GTF_PERSISTENT_SIDE_EFFECTS (GTF_ASG | GTF_CALL)
+#define GTF_SIDE_EFFECT (GTF_PERSISTENT_SIDE_EFFECTS | GTF_EXCEPT)
+#define GTF_GLOB_EFFECT (GTF_SIDE_EFFECT | GTF_GLOB_REF)
+#define GTF_ALL_EFFECT (GTF_GLOB_EFFECT | GTF_ORDER_SIDEEFF)
+
+// The extra flag GTF_IS_IN_CSE is used to tell the consumer of these flags
+// that we are calling in the context of performing a CSE, thus we
+// should allow the run-once side effects of running a class constructor.
+//
+// The only requirement of this flag is that it not overlap any of the
+// side-effect flags. The actual bit used is otherwise arbitrary.
+#define GTF_IS_IN_CSE GTF_BOOLEAN
+#define GTF_PERSISTENT_SIDE_EFFECTS_IN_CSE (GTF_ASG | GTF_CALL | GTF_IS_IN_CSE)
+
+// Can any side-effects be observed externally, say by a caller method?
+// For assignments, only assignments to global memory can be observed
+// externally, whereas simple assignments to local variables can not.
+//
+// Be careful when using this inside a "try" protected region as the
+// order of assignments to local variables would need to be preserved
+// wrt side effects if the variables are alive on entry to the
+// "catch/finally" region. In such cases, even assignments to locals
+// will have to be restricted.
+#define GTF_GLOBALLY_VISIBLE_SIDE_EFFECTS(flags) \
+ (((flags) & (GTF_CALL | GTF_EXCEPT)) || (((flags) & (GTF_ASG | GTF_GLOB_REF)) == (GTF_ASG | GTF_GLOB_REF)))
+
+#define GTF_REVERSE_OPS \
+ 0x00000020 // operand op2 should be evaluated before op1 (normally, op1 is evaluated first and op2 is evaluated
+ // second)
+#define GTF_REG_VAL \
+ 0x00000040 // operand is sitting in a register (or part of a TYP_LONG operand is sitting in a register)
+
+#define GTF_SPILLED 0x00000080 // the value has been spilled
+
+#ifdef LEGACY_BACKEND
+#define GTF_SPILLED_OPER 0x00000100 // op1 has been spilled
+#define GTF_SPILLED_OP2 0x00000200 // op2 has been spilled
+#else
+#define GTF_NOREG_AT_USE 0x00000100 // tree node is in memory at the point of use
+#endif // LEGACY_BACKEND
+
+#define GTF_ZSF_SET 0x00000400 // the zero(ZF) and sign(SF) flags set to the operand
+#if FEATURE_SET_FLAGS
+#define GTF_SET_FLAGS 0x00000800 // Requires that codegen for this node set the flags
+ // Use gtSetFlags() to check this flags
+#endif
+#define GTF_IND_NONFAULTING 0x00000800 // An indir that cannot fault. GTF_SET_FLAGS is not used on indirs
+
+#define GTF_MAKE_CSE 0x00002000 // Hoisted Expression: try hard to make this into CSE (see optPerformHoistExpr)
+#define GTF_DONT_CSE 0x00004000 // don't bother CSE'ing this expr
+#define GTF_COLON_COND 0x00008000 // this node is conditionally executed (part of ? :)
+
+#define GTF_NODE_MASK (GTF_COLON_COND)
+
+#define GTF_BOOLEAN 0x00040000 // value is known to be 0/1
+
+#define GTF_SMALL_OK 0x00080000 // actual small int sufficient
+
+#define GTF_UNSIGNED 0x00100000 // with GT_CAST: the source operand is an unsigned type
+ // with operators: the specified node is an unsigned operator
+
+#define GTF_LATE_ARG \
+ 0x00200000 // the specified node is evaluated to a temp in the arg list, and this temp is added to gtCallLateArgs.
+
+#define GTF_SPILL 0x00400000 // needs to be spilled here
+#define GTF_SPILL_HIGH 0x00040000 // shared with GTF_BOOLEAN
+
+#define GTF_COMMON_MASK 0x007FFFFF // mask of all the flags above
+
+#define GTF_REUSE_REG_VAL 0x00800000 // This is set by the register allocator on nodes whose value already exists in the
+ // register assigned to this node, so the code generator does not have to generate
+ // code to produce the value.
+ // It is currently used only on constant nodes.
+// It CANNOT be set on var (GT_LCL*) nodes, or on indir (GT_IND or GT_STOREIND) nodes, since
+// it is not needed for lclVars and is highly unlikely to be useful for indir nodes
+
+//---------------------------------------------------------------------
+// The following flags can be used only with a small set of nodes, and
+// thus their values need not be distinct (other than within the set
+// that goes with a particular node/nodes, of course). That is, one can
+// only test for one of these flags if the 'gtOper' value is tested as
+// well to make sure it's the right operator for the particular flag.
+//---------------------------------------------------------------------
+
+// NB: GTF_VAR_* and GTF_REG_* share the same namespace of flags, because
+// GT_LCL_VAR nodes may be changed to GT_REG_VAR nodes without resetting
+// the flags. These are also used by GT_LCL_FLD.
+#define GTF_VAR_DEF 0x80000000 // GT_LCL_VAR -- this is a definition
+#define GTF_VAR_USEASG 0x40000000 // GT_LCL_VAR -- this is a use/def for a x<op>=y
+#define GTF_VAR_USEDEF 0x20000000 // GT_LCL_VAR -- this is a use/def as in x=x+y (only the lhs x is tagged)
+#define GTF_VAR_CAST 0x10000000 // GT_LCL_VAR -- has been explictly cast (variable node may not be type of local)
+#define GTF_VAR_ITERATOR 0x08000000 // GT_LCL_VAR -- this is a iterator reference in the loop condition
+#define GTF_VAR_CLONED 0x01000000 // GT_LCL_VAR -- this node has been cloned or is a clone
+ // Relevant for inlining optimizations (see fgInlinePrependStatements)
+
+// TODO-Cleanup: Currently, GTF_REG_BIRTH is used only by stackfp
+// We should consider using it more generally for VAR_BIRTH, instead of
+// GTF_VAR_DEF && !GTF_VAR_USEASG
+#define GTF_REG_BIRTH 0x04000000 // GT_REG_VAR -- enregistered variable born here
+#define GTF_VAR_DEATH 0x02000000 // GT_LCL_VAR, GT_REG_VAR -- variable dies here (last use)
+
+#define GTF_VAR_ARR_INDEX 0x00000020 // The variable is part of (the index portion of) an array index expression.
+ // Shares a value with GTF_REVERSE_OPS, which is meaningless for local var.
+
+#define GTF_LIVENESS_MASK (GTF_VAR_DEF | GTF_VAR_USEASG | GTF_VAR_USEDEF | GTF_REG_BIRTH | GTF_VAR_DEATH)
+
+#define GTF_CALL_UNMANAGED 0x80000000 // GT_CALL -- direct call to unmanaged code
+#define GTF_CALL_INLINE_CANDIDATE 0x40000000 // GT_CALL -- this call has been marked as an inline candidate
+
+#define GTF_CALL_VIRT_KIND_MASK 0x30000000
+#define GTF_CALL_NONVIRT 0x00000000 // GT_CALL -- a non virtual call
+#define GTF_CALL_VIRT_STUB 0x10000000 // GT_CALL -- a stub-dispatch virtual call
+#define GTF_CALL_VIRT_VTABLE 0x20000000 // GT_CALL -- a vtable-based virtual call
+
+#define GTF_CALL_NULLCHECK 0x08000000 // GT_CALL -- must check instance pointer for null
+#define GTF_CALL_POP_ARGS 0x04000000 // GT_CALL -- caller pop arguments?
+#define GTF_CALL_HOISTABLE 0x02000000 // GT_CALL -- call is hoistable
+#define GTF_CALL_REG_SAVE 0x01000000 // GT_CALL -- This call preserves all integer regs
+ // For additional flags for GT_CALL node see GTF_CALL_M_
+
+#define GTF_NOP_DEATH 0x40000000 // GT_NOP -- operand dies here
+
+#define GTF_FLD_NULLCHECK 0x80000000 // GT_FIELD -- need to nullcheck the "this" pointer
+#define GTF_FLD_VOLATILE 0x40000000 // GT_FIELD/GT_CLS_VAR -- same as GTF_IND_VOLATILE
+
+#define GTF_INX_RNGCHK 0x80000000 // GT_INDEX -- the array reference should be range-checked.
+#define GTF_INX_REFARR_LAYOUT 0x20000000 // GT_INDEX -- same as GTF_IND_REFARR_LAYOUT
+#define GTF_INX_STRING_LAYOUT 0x40000000 // GT_INDEX -- this uses the special string array layout
+
+#define GTF_IND_VOLATILE 0x40000000 // GT_IND -- the load or store must use volatile sematics (this is a nop
+ // on X86)
+#define GTF_IND_REFARR_LAYOUT 0x20000000 // GT_IND -- the array holds object refs (only effects layout of Arrays)
+#define GTF_IND_TGTANYWHERE 0x10000000 // GT_IND -- the target could be anywhere
+#define GTF_IND_TLS_REF 0x08000000 // GT_IND -- the target is accessed via TLS
+#define GTF_IND_ASG_LHS 0x04000000 // GT_IND -- this GT_IND node is (the effective val) of the LHS of an
+ // assignment; don't evaluate it independently.
+#define GTF_IND_UNALIGNED 0x02000000 // GT_IND -- the load or store is unaligned (we assume worst case
+ // alignment of 1 byte)
+#define GTF_IND_INVARIANT 0x01000000 // GT_IND -- the target is invariant (a prejit indirection)
+#define GTF_IND_ARR_LEN 0x80000000 // GT_IND -- the indirection represents an array length (of the REF
+ // contribution to its argument).
+#define GTF_IND_ARR_INDEX 0x00800000 // GT_IND -- the indirection represents an (SZ) array index
+
+#define GTF_IND_FLAGS \
+ (GTF_IND_VOLATILE | GTF_IND_REFARR_LAYOUT | GTF_IND_TGTANYWHERE | GTF_IND_NONFAULTING | GTF_IND_TLS_REF | \
+ GTF_IND_UNALIGNED | GTF_IND_INVARIANT | GTF_IND_ARR_INDEX)
+
+#define GTF_CLS_VAR_ASG_LHS 0x04000000 // GT_CLS_VAR -- this GT_CLS_VAR node is (the effective val) of the LHS
+ // of an assignment; don't evaluate it independently.
+
+#define GTF_ADDR_ONSTACK 0x80000000 // GT_ADDR -- this expression is guaranteed to be on the stack
+
+#define GTF_ADDRMODE_NO_CSE 0x80000000 // GT_ADD/GT_MUL/GT_LSH -- Do not CSE this node only, forms complex
+ // addressing mode
+
+#define GTF_MUL_64RSLT 0x40000000 // GT_MUL -- produce 64-bit result
+
+#define GTF_MOD_INT_RESULT 0x80000000 // GT_MOD, -- the real tree represented by this
+ // GT_UMOD node evaluates to an int even though
+ // its type is long. The result is
+ // placed in the low member of the
+ // reg pair
+
+#define GTF_RELOP_NAN_UN 0x80000000 // GT_<relop> -- Is branch taken if ops are NaN?
+#define GTF_RELOP_JMP_USED 0x40000000 // GT_<relop> -- result of compare used for jump or ?:
+#define GTF_RELOP_QMARK 0x20000000 // GT_<relop> -- the node is the condition for ?:
+#define GTF_RELOP_SMALL 0x10000000 // GT_<relop> -- We should use a byte or short sized compare (op1->gtType
+ // is the small type)
+#define GTF_RELOP_ZTT 0x08000000 // GT_<relop> -- Loop test cloned for converting while-loops into do-while
+ // with explicit "loop test" in the header block.
+
+#define GTF_QMARK_CAST_INSTOF 0x80000000 // GT_QMARK -- Is this a top (not nested) level qmark created for
+ // castclass or instanceof?
+
+#define GTF_BOX_VALUE 0x80000000 // GT_BOX -- "box" is on a value type
+
+#define GTF_ICON_HDL_MASK 0xF0000000 // Bits used by handle types below
+
+#define GTF_ICON_SCOPE_HDL 0x10000000 // GT_CNS_INT -- constant is a scope handle
+#define GTF_ICON_CLASS_HDL 0x20000000 // GT_CNS_INT -- constant is a class handle
+#define GTF_ICON_METHOD_HDL 0x30000000 // GT_CNS_INT -- constant is a method handle
+#define GTF_ICON_FIELD_HDL 0x40000000 // GT_CNS_INT -- constant is a field handle
+#define GTF_ICON_STATIC_HDL 0x50000000 // GT_CNS_INT -- constant is a handle to static data
+#define GTF_ICON_STR_HDL 0x60000000 // GT_CNS_INT -- constant is a string handle
+#define GTF_ICON_PSTR_HDL 0x70000000 // GT_CNS_INT -- constant is a ptr to a string handle
+#define GTF_ICON_PTR_HDL 0x80000000 // GT_CNS_INT -- constant is a ldptr handle
+#define GTF_ICON_VARG_HDL 0x90000000 // GT_CNS_INT -- constant is a var arg cookie handle
+#define GTF_ICON_PINVKI_HDL 0xA0000000 // GT_CNS_INT -- constant is a pinvoke calli handle
+#define GTF_ICON_TOKEN_HDL 0xB0000000 // GT_CNS_INT -- constant is a token handle
+#define GTF_ICON_TLS_HDL 0xC0000000 // GT_CNS_INT -- constant is a TLS ref with offset
+#define GTF_ICON_FTN_ADDR 0xD0000000 // GT_CNS_INT -- constant is a function address
+#define GTF_ICON_CIDMID_HDL 0xE0000000 // GT_CNS_INT -- constant is a class or module ID handle
+#define GTF_ICON_BBC_PTR 0xF0000000 // GT_CNS_INT -- constant is a basic block count pointer
+
+#define GTF_ICON_FIELD_OFF 0x08000000 // GT_CNS_INT -- constant is a field offset
+
+#define GTF_BLK_VOLATILE 0x40000000 // GT_ASG, GT_STORE_BLK, GT_STORE_OBJ, GT_STORE_DYNBLK
+ // -- is a volatile block operation
+#define GTF_BLK_UNALIGNED 0x02000000 // GT_ASG, GT_STORE_BLK, GT_STORE_OBJ, GT_STORE_DYNBLK
+ // -- is an unaligned block operation
+#define GTF_BLK_INIT 0x01000000 // GT_ASG, GT_STORE_BLK, GT_STORE_OBJ, GT_STORE_DYNBLK -- is an init block operation
+
+#define GTF_OVERFLOW 0x10000000 // GT_ADD, GT_SUB, GT_MUL, - Need overflow check
+ // GT_ASG_ADD, GT_ASG_SUB,
+ // GT_CAST
+ // Use gtOverflow(Ex)() to check this flag
+
+#define GTF_NO_OP_NO 0x80000000 // GT_NO_OP --Have the codegenerator generate a special nop
+
+#define GTF_ARR_BOUND_INBND 0x80000000 // GT_ARR_BOUNDS_CHECK -- have proved this check is always in-bounds
+
+#define GTF_ARRLEN_ARR_IDX 0x80000000 // GT_ARR_LENGTH -- Length which feeds into an array index expression
+
+#define GTF_LIST_AGGREGATE 0x80000000 // GT_LIST -- Indicates that this list should be treated as an
+ // anonymous aggregate value (e.g. a multi-value argument).
+
+//----------------------------------------------------------------
+
+#define GTF_STMT_CMPADD 0x80000000 // GT_STMT -- added by compiler
+#define GTF_STMT_HAS_CSE 0x40000000 // GT_STMT -- CSE def or use was subsituted
+
+//----------------------------------------------------------------
+
+#if defined(DEBUG)
+#define GTF_DEBUG_NONE 0x00000000 // No debug flags.
+
+#define GTF_DEBUG_NODE_MORPHED 0x00000001 // the node has been morphed (in the global morphing phase)
+#define GTF_DEBUG_NODE_SMALL 0x00000002
+#define GTF_DEBUG_NODE_LARGE 0x00000004
+
+#define GTF_DEBUG_NODE_MASK 0x00000007 // These flags are all node (rather than operation) properties.
+
+#define GTF_DEBUG_VAR_CSE_REF 0x00800000 // GT_LCL_VAR -- This is a CSE LCL_VAR node
+#endif // defined(DEBUG)
+
+ GenTreePtr gtNext;
+ GenTreePtr gtPrev;
+
+#ifdef DEBUG
+ unsigned gtTreeID;
+ unsigned gtSeqNum; // liveness traversal order within the current statement
+#endif
+
+ static const unsigned short gtOperKindTable[];
+
+ static unsigned OperKind(unsigned gtOper)
+ {
+ assert(gtOper < GT_COUNT);
+
+ return gtOperKindTable[gtOper];
+ }
+
+ unsigned OperKind() const
+ {
+ assert(gtOper < GT_COUNT);
+
+ return gtOperKindTable[gtOper];
+ }
+
+ static bool IsExOp(unsigned opKind)
+ {
+ return (opKind & GTK_EXOP) != 0;
+ }
+ // Returns the operKind with the GTK_EX_OP bit removed (the
+ // kind of operator, unary or binary, that is extended).
+ static unsigned StripExOp(unsigned opKind)
+ {
+ return opKind & ~GTK_EXOP;
+ }
+
+ bool IsValue() const
+ {
+ if ((OperKind(gtOper) & GTK_NOVALUE) != 0)
+ {
+ return false;
+ }
+
+ if (gtOper == GT_NOP || gtOper == GT_CALL)
+ {
+ return gtType != TYP_VOID;
+ }
+
+ if (gtOper == GT_LIST)
+ {
+ return (gtFlags & GTF_LIST_AGGREGATE) != 0;
+ }
+
+ return true;
+ }
+
+ bool IsLIR() const
+ {
+ if ((OperKind(gtOper) & GTK_NOTLIR) != 0)
+ {
+ return false;
+ }
+
+ switch (gtOper)
+ {
+ case GT_NOP:
+ // NOPs may only be present in LIR if they do not produce a value.
+ return IsNothingNode();
+
+ case GT_ARGPLACE:
+ // ARGPLACE nodes may not be present in a block's LIR sequence, but they may
+ // be present as children of an LIR node.
+ return (gtNext == nullptr) && (gtPrev == nullptr);
+
+ case GT_LIST:
+ // LIST nodes may only be present in an LIR sequence if they represent aggregates.
+ // They are always allowed, however, as children of an LIR node.
+ return ((gtFlags & GTF_LIST_AGGREGATE) != 0) || ((gtNext == nullptr) && (gtPrev == nullptr));
+
+ case GT_ADDR:
+ {
+ // ADDR ndoes may only be present in LIR if the location they refer to is not a
+ // local, class variable, or IND node.
+ GenTree* location = const_cast<GenTree*>(this)->gtGetOp1();
+ genTreeOps locationOp = location->OperGet();
+ return !location->IsLocal() && (locationOp != GT_CLS_VAR) && (locationOp != GT_IND);
+ }
+
+ default:
+ // All other nodes are assumed to be correct.
+ return true;
+ }
+ }
+
+ static bool OperIsConst(genTreeOps gtOper)
+ {
+ return (OperKind(gtOper) & GTK_CONST) != 0;
+ }
+
+ bool OperIsConst() const
+ {
+ return (OperKind(gtOper) & GTK_CONST) != 0;
+ }
+
+ static bool OperIsLeaf(genTreeOps gtOper)
+ {
+ return (OperKind(gtOper) & GTK_LEAF) != 0;
+ }
+
+ bool OperIsLeaf() const
+ {
+ return (OperKind(gtOper) & GTK_LEAF) != 0;
+ }
+
+ static bool OperIsCompare(genTreeOps gtOper)
+ {
+ return (OperKind(gtOper) & GTK_RELOP) != 0;
+ }
+
+ static bool OperIsLocal(genTreeOps gtOper)
+ {
+ bool result = (OperKind(gtOper) & GTK_LOCAL) != 0;
+ assert(result == (gtOper == GT_LCL_VAR || gtOper == GT_PHI_ARG || gtOper == GT_REG_VAR ||
+ gtOper == GT_LCL_FLD || gtOper == GT_STORE_LCL_VAR || gtOper == GT_STORE_LCL_FLD));
+ return result;
+ }
+
+ static bool OperIsLocalAddr(genTreeOps gtOper)
+ {
+ return (gtOper == GT_LCL_VAR_ADDR || gtOper == GT_LCL_FLD_ADDR);
+ }
+
+ static bool OperIsLocalField(genTreeOps gtOper)
+ {
+ return (gtOper == GT_LCL_FLD || gtOper == GT_LCL_FLD_ADDR || gtOper == GT_STORE_LCL_FLD);
+ }
+
+ inline bool OperIsLocalField() const
+ {
+ return OperIsLocalField(gtOper);
+ }
+
+ static bool OperIsScalarLocal(genTreeOps gtOper)
+ {
+ return (gtOper == GT_LCL_VAR || gtOper == GT_REG_VAR || gtOper == GT_STORE_LCL_VAR);
+ }
+
+ static bool OperIsNonPhiLocal(genTreeOps gtOper)
+ {
+ return OperIsLocal(gtOper) && (gtOper != GT_PHI_ARG);
+ }
+
+ static bool OperIsLocalRead(genTreeOps gtOper)
+ {
+ return (OperIsLocal(gtOper) && !OperIsLocalStore(gtOper));
+ }
+
+ static bool OperIsLocalStore(genTreeOps gtOper)
+ {
+ return (gtOper == GT_STORE_LCL_VAR || gtOper == GT_STORE_LCL_FLD);
+ }
+
+ static bool OperIsAddrMode(genTreeOps gtOper)
+ {
+ return (gtOper == GT_LEA);
+ }
+
+ bool OperIsBlkOp();
+ bool OperIsCopyBlkOp();
+ bool OperIsInitBlkOp();
+ bool OperIsDynBlkOp();
+
+ static bool OperIsBlk(genTreeOps gtOper)
+ {
+ return ((gtOper == GT_BLK) || (gtOper == GT_OBJ) || (gtOper == GT_DYN_BLK) || (gtOper == GT_STORE_BLK) ||
+ (gtOper == GT_STORE_OBJ) || (gtOper == GT_STORE_DYN_BLK));
+ }
+
+ bool OperIsBlk() const
+ {
+ return OperIsBlk(OperGet());
+ }
+
+ static bool OperIsStoreBlk(genTreeOps gtOper)
+ {
+ return ((gtOper == GT_STORE_BLK) || (gtOper == GT_STORE_OBJ) || (gtOper == GT_STORE_DYN_BLK));
+ }
+
+ bool OperIsStoreBlk() const
+ {
+ return OperIsStoreBlk(OperGet());
+ }
+
+ bool OperIsPutArgStk() const
+ {
+ return gtOper == GT_PUTARG_STK;
+ }
+
+ bool OperIsPutArgReg() const
+ {
+ return gtOper == GT_PUTARG_REG;
+ }
+
+ bool OperIsPutArg() const
+ {
+ return OperIsPutArgStk() || OperIsPutArgReg();
+ }
+
+ bool OperIsAddrMode() const
+ {
+ return OperIsAddrMode(OperGet());
+ }
+
+ bool OperIsLocal() const
+ {
+ return OperIsLocal(OperGet());
+ }
+
+ bool OperIsLocalAddr() const
+ {
+ return OperIsLocalAddr(OperGet());
+ }
+
+ bool OperIsScalarLocal() const
+ {
+ return OperIsScalarLocal(OperGet());
+ }
+
+ bool OperIsNonPhiLocal() const
+ {
+ return OperIsNonPhiLocal(OperGet());
+ }
+
+ bool OperIsLocalStore() const
+ {
+ return OperIsLocalStore(OperGet());
+ }
+
+ bool OperIsLocalRead() const
+ {
+ return OperIsLocalRead(OperGet());
+ }
+
+ bool OperIsCompare()
+ {
+ return (OperKind(gtOper) & GTK_RELOP) != 0;
+ }
+
+ static bool OperIsLogical(genTreeOps gtOper)
+ {
+ return (OperKind(gtOper) & GTK_LOGOP) != 0;
+ }
+
+ bool OperIsLogical() const
+ {
+ return (OperKind(gtOper) & GTK_LOGOP) != 0;
+ }
+
+ static bool OperIsShift(genTreeOps gtOper)
+ {
+ return (gtOper == GT_LSH) || (gtOper == GT_RSH) || (gtOper == GT_RSZ);
+ }
+
+ bool OperIsShift() const
+ {
+ return OperIsShift(OperGet());
+ }
+
+ static bool OperIsRotate(genTreeOps gtOper)
+ {
+ return (gtOper == GT_ROL) || (gtOper == GT_ROR);
+ }
+
+ bool OperIsRotate() const
+ {
+ return OperIsRotate(OperGet());
+ }
+
+ static bool OperIsShiftOrRotate(genTreeOps gtOper)
+ {
+ return OperIsShift(gtOper) || OperIsRotate(gtOper);
+ }
+
+ bool OperIsShiftOrRotate() const
+ {
+ return OperIsShiftOrRotate(OperGet());
+ }
+
+ bool OperIsArithmetic() const
+ {
+ genTreeOps op = OperGet();
+ return op == GT_ADD || op == GT_SUB || op == GT_MUL || op == GT_DIV || op == GT_MOD
+
+ || op == GT_UDIV || op == GT_UMOD
+
+ || op == GT_OR || op == GT_XOR || op == GT_AND
+
+ || OperIsShiftOrRotate(op);
+ }
+
+#if !defined(LEGACY_BACKEND) && !defined(_TARGET_64BIT_)
+ static bool OperIsHigh(genTreeOps gtOper)
+ {
+ switch (gtOper)
+ {
+ case GT_ADD_HI:
+ case GT_SUB_HI:
+ case GT_MUL_HI:
+ case GT_DIV_HI:
+ case GT_MOD_HI:
+ return true;
+ default:
+ return false;
+ }
+ }
+
+ bool OperIsHigh() const
+ {
+ return OperIsHigh(OperGet());
+ }
+#endif // !defined(LEGACY_BACKEND) && !defined(_TARGET_64BIT_)
+
+ static bool OperIsUnary(genTreeOps gtOper)
+ {
+ return (OperKind(gtOper) & GTK_UNOP) != 0;
+ }
+
+ bool OperIsUnary() const
+ {
+ return OperIsUnary(gtOper);
+ }
+
+ static bool OperIsBinary(genTreeOps gtOper)
+ {
+ return (OperKind(gtOper) & GTK_BINOP) != 0;
+ }
+
+ bool OperIsBinary() const
+ {
+ return OperIsBinary(gtOper);
+ }
+
+ static bool OperIsSimple(genTreeOps gtOper)
+ {
+ return (OperKind(gtOper) & GTK_SMPOP) != 0;
+ }
+
+ static bool OperIsSpecial(genTreeOps gtOper)
+ {
+ return ((OperKind(gtOper) & GTK_KINDMASK) == GTK_SPECIAL);
+ }
+
+ bool OperIsSimple() const
+ {
+ return OperIsSimple(gtOper);
+ }
+
+#ifdef FEATURE_SIMD
+ bool isCommutativeSIMDIntrinsic();
+#else // !
+ bool isCommutativeSIMDIntrinsic()
+ {
+ return false;
+ }
+#endif // FEATURE_SIMD
+
+ static bool OperIsCommutative(genTreeOps gtOper)
+ {
+ return (OperKind(gtOper) & GTK_COMMUTE) != 0;
+ }
+
+ bool OperIsCommutative()
+ {
+ return OperIsCommutative(gtOper) || (OperIsSIMD(gtOper) && isCommutativeSIMDIntrinsic());
+ }
+
+ static bool OperIsAssignment(genTreeOps gtOper)
+ {
+ return (OperKind(gtOper) & GTK_ASGOP) != 0;
+ }
+
+ bool OperIsAssignment() const
+ {
+ return OperIsAssignment(gtOper);
+ }
+
+ static bool OperIsIndir(genTreeOps gtOper)
+ {
+ return gtOper == GT_IND || gtOper == GT_STOREIND || gtOper == GT_NULLCHECK || OperIsBlk(gtOper);
+ }
+
+ bool OperIsIndir() const
+ {
+ return OperIsIndir(gtOper);
+ }
+
+ static bool OperIsImplicitIndir(genTreeOps gtOper)
+ {
+ switch (gtOper)
+ {
+ case GT_LOCKADD:
+ case GT_XADD:
+ case GT_XCHG:
+ case GT_CMPXCHG:
+ case GT_BLK:
+ case GT_OBJ:
+ case GT_DYN_BLK:
+ case GT_STORE_BLK:
+ case GT_STORE_OBJ:
+ case GT_STORE_DYN_BLK:
+ case GT_BOX:
+ case GT_ARR_INDEX:
+ case GT_ARR_ELEM:
+ case GT_ARR_OFFSET:
+ return true;
+ default:
+ return false;
+ }
+ }
+
+ bool OperIsImplicitIndir() const
+ {
+ return OperIsImplicitIndir(gtOper);
+ }
+
+ bool OperIsStore() const
+ {
+ return OperIsStore(gtOper);
+ }
+
+ static bool OperIsStore(genTreeOps gtOper)
+ {
+ return (gtOper == GT_STOREIND || gtOper == GT_STORE_LCL_VAR || gtOper == GT_STORE_LCL_FLD ||
+ gtOper == GT_STORE_CLS_VAR || gtOper == GT_STORE_BLK || gtOper == GT_STORE_OBJ ||
+ gtOper == GT_STORE_DYN_BLK);
+ }
+
+ static bool OperIsAtomicOp(genTreeOps gtOper)
+ {
+ return (gtOper == GT_XADD || gtOper == GT_XCHG || gtOper == GT_LOCKADD || gtOper == GT_CMPXCHG);
+ }
+
+ bool OperIsAtomicOp() const
+ {
+ return OperIsAtomicOp(gtOper);
+ }
+
+ // This is basically here for cleaner FEATURE_SIMD #ifdefs.
+ static bool OperIsSIMD(genTreeOps gtOper)
+ {
+#ifdef FEATURE_SIMD
+ return gtOper == GT_SIMD;
+#else // !FEATURE_SIMD
+ return false;
+#endif // !FEATURE_SIMD
+ }
+
+ bool OperIsSIMD()
+ {
+ return OperIsSIMD(gtOper);
+ }
+
+ bool OperIsAggregate()
+ {
+ return (gtOper == GT_LIST) && ((gtFlags & GTF_LIST_AGGREGATE) != 0);
+ }
+
+ // Requires that "op" is an op= operator. Returns
+ // the corresponding "op".
+ static genTreeOps OpAsgToOper(genTreeOps op);
+
+#ifdef DEBUG
+ bool NullOp1Legal() const
+ {
+ assert(OperIsSimple(gtOper));
+ switch (gtOper)
+ {
+ case GT_PHI:
+ case GT_LEA:
+ case GT_RETFILT:
+ case GT_NOP:
+ return true;
+ case GT_RETURN:
+ return gtType == TYP_VOID;
+ default:
+ return false;
+ }
+ }
+
+ bool NullOp2Legal() const
+ {
+ assert(OperIsSimple(gtOper) || OperIsBlk(gtOper));
+ if (!OperIsBinary(gtOper))
+ {
+ return true;
+ }
+ switch (gtOper)
+ {
+ case GT_LIST:
+ case GT_INTRINSIC:
+ case GT_LEA:
+#ifdef FEATURE_SIMD
+ case GT_SIMD:
+#endif // !FEATURE_SIMD
+ return true;
+ default:
+ return false;
+ }
+ }
+
+ static inline bool RequiresNonNullOp2(genTreeOps oper);
+ bool IsListForMultiRegArg();
+#endif // DEBUG
+
+ inline bool IsFPZero();
+ inline bool IsIntegralConst(ssize_t constVal);
+
+ inline bool IsBoxedValue();
+
+ bool IsList() const
+ {
+ return gtOper == GT_LIST;
+ }
+
+ inline GenTreePtr MoveNext();
+
+ inline GenTreePtr Current();
+
+ inline GenTreePtr* pCurrent();
+
+ inline GenTreePtr gtGetOp1();
+
+ inline GenTreePtr gtGetOp2();
+
+ // Given a tree node, if this is a child of that node, return the pointer to the child node so that it
+ // can be modified; otherwise, return null.
+ GenTreePtr* gtGetChildPointer(GenTreePtr parent);
+
+ // Given a tree node, if this node uses that node, return the use as an out parameter and return true.
+ // Otherwise, return false.
+ bool TryGetUse(GenTree* def, GenTree*** use);
+
+ // Get the parent of this node, and optionally capture the pointer to the child so that it can be modified.
+ GenTreePtr gtGetParent(GenTreePtr** parentChildPtrPtr);
+
+ inline GenTreePtr gtEffectiveVal(bool commaOnly = false);
+
+ // Return the child of this node if it is a GT_RELOAD or GT_COPY; otherwise simply return the node itself
+ inline GenTree* gtSkipReloadOrCopy();
+
+ // Returns true if it is a call node returning its value in more than one register
+ inline bool IsMultiRegCall() const;
+
+ // Returns true if it is a GT_COPY or GT_RELOAD node
+ inline bool IsCopyOrReload() const;
+
+ // Returns true if it is a GT_COPY or GT_RELOAD of a multi-reg call node
+ inline bool IsCopyOrReloadOfMultiRegCall() const;
+
+ bool OperMayThrow();
+
+ unsigned GetScaleIndexMul();
+ unsigned GetScaleIndexShf();
+ unsigned GetScaledIndex();
+
+ // Returns true if "addr" is a GT_ADD node, at least one of whose arguments is an integer
+ // (<= 32 bit) constant. If it returns true, it sets "*offset" to (one of the) constant value(s), and
+ // "*addr" to the other argument.
+ bool IsAddWithI32Const(GenTreePtr* addr, int* offset);
+
+public:
+#if SMALL_TREE_NODES
+ static unsigned char s_gtNodeSizes[];
+#endif
+
+ static void InitNodeSize();
+
+ size_t GetNodeSize() const;
+
+ bool IsNodeProperlySized() const;
+
+ void CopyFrom(const GenTree* src, Compiler* comp);
+
+ static genTreeOps ReverseRelop(genTreeOps relop);
+
+ static genTreeOps SwapRelop(genTreeOps relop);
+
+ //---------------------------------------------------------------------
+
+ static bool Compare(GenTreePtr op1, GenTreePtr op2, bool swapOK = false);
+
+//---------------------------------------------------------------------
+#ifdef DEBUG
+ //---------------------------------------------------------------------
+
+ static const char* NodeName(genTreeOps op);
+
+ static const char* OpName(genTreeOps op);
+
+//---------------------------------------------------------------------
+#endif
+ //---------------------------------------------------------------------
+
+ bool IsNothingNode() const;
+ void gtBashToNOP();
+
+ // Value number update action enumeration
+ enum ValueNumberUpdate
+ {
+ CLEAR_VN, // Clear value number
+ PRESERVE_VN // Preserve value number
+ };
+
+ void SetOper(genTreeOps oper, ValueNumberUpdate vnUpdate = CLEAR_VN); // set gtOper
+ void SetOperResetFlags(genTreeOps oper); // set gtOper and reset flags
+
+ void ChangeOperConst(genTreeOps oper); // ChangeOper(constOper)
+ // set gtOper and only keep GTF_COMMON_MASK flags
+ void ChangeOper(genTreeOps oper, ValueNumberUpdate vnUpdate = CLEAR_VN);
+ void ChangeOperUnchecked(genTreeOps oper);
+
+ void ChangeType(var_types newType)
+ {
+ var_types oldType = gtType;
+ gtType = newType;
+ GenTree* node = this;
+ while (node->gtOper == GT_COMMA)
+ {
+ node = node->gtGetOp2();
+ assert(node->gtType == oldType);
+ node->gtType = newType;
+ }
+ }
+
+ bool IsLocal() const
+ {
+ return OperIsLocal(OperGet());
+ }
+
+ // Returns "true" iff 'this' is a GT_LCL_FLD or GT_STORE_LCL_FLD on which the type
+ // is not the same size as the type of the GT_LCL_VAR.
+ bool IsPartialLclFld(Compiler* comp);
+
+ // Returns "true" iff "this" defines a local variable. Requires "comp" to be the
+ // current compilation. If returns "true", sets "*pLclVarTree" to the
+ // tree for the local that is defined, and, if "pIsEntire" is non-null, sets "*pIsEntire" to
+ // true or false, depending on whether the assignment writes to the entirety of the local
+ // variable, or just a portion of it.
+ bool DefinesLocal(Compiler* comp, GenTreeLclVarCommon** pLclVarTree, bool* pIsEntire = nullptr);
+
+ // Returns true if "this" represents the address of a local, or a field of a local. If returns true, sets
+ // "*pLclVarTree" to the node indicating the local variable. If the address is that of a field of this node,
+ // sets "*pFldSeq" to the field sequence representing that field, else null.
+ bool IsLocalAddrExpr(Compiler* comp, GenTreeLclVarCommon** pLclVarTree, FieldSeqNode** pFldSeq);
+
+ // Simpler variant of the above which just returns the local node if this is an expression that
+ // yields an address into a local
+ GenTreeLclVarCommon* IsLocalAddrExpr();
+
+ // Determine if this is a LclVarCommon node and return some additional info about it in the
+ // two out parameters.
+ bool IsLocalExpr(Compiler* comp, GenTreeLclVarCommon** pLclVarTree, FieldSeqNode** pFldSeq);
+
+ // Determine whether this is an assignment tree of the form X = X (op) Y,
+ // where Y is an arbitrary tree, and X is a lclVar.
+ unsigned IsLclVarUpdateTree(GenTree** otherTree, genTreeOps* updateOper);
+
+ // If returns "true", "this" may represent the address of a static or instance field
+ // (or a field of such a field, in the case of an object field of type struct).
+ // If returns "true", then either "*pObj" is set to the object reference,
+ // or "*pStatic" is set to the baseAddr or offset to be added to the "*pFldSeq"
+ // Only one of "*pObj" or "*pStatic" will be set, the other one will be null.
+ // The boolean return value only indicates that "this" *may* be a field address
+ // -- the field sequence must also be checked.
+ // If it is a field address, the field sequence will be a sequence of length >= 1,
+ // starting with an instance or static field, and optionally continuing with struct fields.
+ bool IsFieldAddr(Compiler* comp, GenTreePtr* pObj, GenTreePtr* pStatic, FieldSeqNode** pFldSeq);
+
+ // Requires "this" to be the address of an array (the child of a GT_IND labeled with GTF_IND_ARR_INDEX).
+ // Sets "pArr" to the node representing the array (either an array object pointer, or perhaps a byref to the some
+ // element).
+ // Sets "*pArrayType" to the class handle for the array type.
+ // Sets "*inxVN" to the value number inferred for the array index.
+ // Sets "*pFldSeq" to the sequence, if any, of struct fields used to index into the array element.
+ void ParseArrayAddress(
+ Compiler* comp, struct ArrayInfo* arrayInfo, GenTreePtr* pArr, ValueNum* pInxVN, FieldSeqNode** pFldSeq);
+
+ // Helper method for the above.
+ void ParseArrayAddressWork(
+ Compiler* comp, ssize_t inputMul, GenTreePtr* pArr, ValueNum* pInxVN, ssize_t* pOffset, FieldSeqNode** pFldSeq);
+
+ // Requires "this" to be a GT_IND. Requires the outermost caller to set "*pFldSeq" to nullptr.
+ // Returns true if it is an array index expression, or access to a (sequence of) struct field(s)
+ // within a struct array element. If it returns true, sets *arrayInfo to the array information, and sets *pFldSeq
+ // to the sequence of struct field accesses.
+ bool ParseArrayElemForm(Compiler* comp, ArrayInfo* arrayInfo, FieldSeqNode** pFldSeq);
+
+ // Requires "this" to be the address of a (possible) array element (or struct field within that).
+ // If it is, sets "*arrayInfo" to the array access info, "*pFldSeq" to the sequence of struct fields
+ // accessed within the array element, and returns true. If not, returns "false".
+ bool ParseArrayElemAddrForm(Compiler* comp, ArrayInfo* arrayInfo, FieldSeqNode** pFldSeq);
+
+ // Requires "this" to be an int expression. If it is a sequence of one or more integer constants added together,
+ // returns true and sets "*pFldSeq" to the sequence of fields with which those constants are annotated.
+ bool ParseOffsetForm(Compiler* comp, FieldSeqNode** pFldSeq);
+
+ // Labels "*this" as an array index expression: label all constants and variables that could contribute, as part of
+ // an affine expression, to the value of the of the index.
+ void LabelIndex(Compiler* comp, bool isConst = true);
+
+ // Assumes that "this" occurs in a context where it is being dereferenced as the LHS of an assignment-like
+ // statement (assignment, initblk, or copyblk). The "width" should be the number of bytes copied by the
+ // operation. Returns "true" if "this" is an address of (or within)
+ // a local variable; sets "*pLclVarTree" to that local variable instance; and, if "pIsEntire" is non-null,
+ // sets "*pIsEntire" to true if this assignment writes the full width of the local.
+ bool DefinesLocalAddr(Compiler* comp, unsigned width, GenTreeLclVarCommon** pLclVarTree, bool* pIsEntire);
+
+ bool IsRegVar() const
+ {
+ return OperGet() == GT_REG_VAR ? true : false;
+ }
+ bool InReg() const
+ {
+ return (gtFlags & GTF_REG_VAL) ? true : false;
+ }
+ void SetInReg()
+ {
+ gtFlags |= GTF_REG_VAL;
+ }
+
+ regNumber GetReg() const
+ {
+ return InReg() ? gtRegNum : REG_NA;
+ }
+ bool IsRegVarDeath() const
+ {
+ assert(OperGet() == GT_REG_VAR);
+ return (gtFlags & GTF_VAR_DEATH) ? true : false;
+ }
+ bool IsRegVarBirth() const
+ {
+ assert(OperGet() == GT_REG_VAR);
+ return (gtFlags & GTF_REG_BIRTH) ? true : false;
+ }
+ bool IsReverseOp() const
+ {
+ return (gtFlags & GTF_REVERSE_OPS) ? true : false;
+ }
+
+ inline bool IsCnsIntOrI() const;
+
+ inline bool IsIntegralConst() const;
+
+ inline bool IsIntCnsFitsInI32();
+
+ inline bool IsCnsFltOrDbl() const;
+
+ inline bool IsCnsNonZeroFltOrDbl();
+
+ bool IsIconHandle() const
+ {
+ assert(gtOper == GT_CNS_INT);
+ return (gtFlags & GTF_ICON_HDL_MASK) ? true : false;
+ }
+
+ bool IsIconHandle(unsigned handleType) const
+ {
+ assert(gtOper == GT_CNS_INT);
+ assert((handleType & GTF_ICON_HDL_MASK) != 0); // check that handleType is one of the valid GTF_ICON_* values
+ assert((handleType & ~GTF_ICON_HDL_MASK) == 0);
+ return (gtFlags & GTF_ICON_HDL_MASK) == handleType;
+ }
+
+ // Return just the part of the flags corresponding to the GTF_ICON_*_HDL flag. For example,
+ // GTF_ICON_SCOPE_HDL. The tree node must be a const int, but it might not be a handle, in which
+ // case we'll return zero.
+ unsigned GetIconHandleFlag() const
+ {
+ assert(gtOper == GT_CNS_INT);
+ return (gtFlags & GTF_ICON_HDL_MASK);
+ }
+
+ // Mark this node as no longer being a handle; clear its GTF_ICON_*_HDL bits.
+ void ClearIconHandleMask()
+ {
+ assert(gtOper == GT_CNS_INT);
+ gtFlags &= ~GTF_ICON_HDL_MASK;
+ }
+
+ // Return true if the two GT_CNS_INT trees have the same handle flag (GTF_ICON_*_HDL).
+ static bool SameIconHandleFlag(GenTree* t1, GenTree* t2)
+ {
+ return t1->GetIconHandleFlag() == t2->GetIconHandleFlag();
+ }
+
+ bool IsArgPlaceHolderNode() const
+ {
+ return OperGet() == GT_ARGPLACE;
+ }
+ bool IsCall() const
+ {
+ return OperGet() == GT_CALL;
+ }
+ bool IsStatement() const
+ {
+ return OperGet() == GT_STMT;
+ }
+ inline bool IsHelperCall();
+
+ bool IsVarAddr() const;
+ bool gtOverflow() const;
+ bool gtOverflowEx() const;
+ bool gtSetFlags() const;
+ bool gtRequestSetFlags();
+#ifdef DEBUG
+ bool gtIsValid64RsltMul();
+ static int gtDispFlags(unsigned flags, unsigned debugFlags);
+#endif
+
+ // cast operations
+ inline var_types CastFromType();
+ inline var_types& CastToType();
+
+ // Returns true if this gentree node is marked by lowering to indicate
+ // that codegen can still generate code even if it wasn't allocated a
+ // register.
+ bool IsRegOptional() const;
+
+ // Returns "true" iff "this" is a phi-related node (i.e. a GT_PHI_ARG, GT_PHI, or a PhiDefn).
+ bool IsPhiNode();
+
+ // Returns "true" iff "*this" is an assignment (GT_ASG) tree that defines an SSA name (lcl = phi(...));
+ bool IsPhiDefn();
+
+ // Returns "true" iff "*this" is a statement containing an assignment that defines an SSA name (lcl = phi(...));
+ bool IsPhiDefnStmt();
+
+ // Can't use an assignment operator, because we need the extra "comp" argument
+ // (to provide the allocator necessary for the VarSet assignment).
+ // TODO-Cleanup: Not really needed now, w/o liveset on tree nodes
+ void CopyTo(class Compiler* comp, const GenTree& gt);
+
+ // Like the above, excepts assumes copying from small node to small node.
+ // (Following the code it replaces, it does *not* copy the GenTree fields,
+ // which CopyTo does.)
+ void CopyToSmall(const GenTree& gt);
+
+ // Because of the fact that we hid the assignment operator of "BitSet" (in DEBUG),
+ // we can't synthesize an assignment operator.
+ // TODO-Cleanup: Could change this w/o liveset on tree nodes
+ // (This is also necessary for the VTable trick.)
+ GenTree()
+ {
+ }
+
+ // Returns the number of children of the current node.
+ unsigned NumChildren();
+
+ // Requires "childNum < NumChildren()". Returns the "n"th child of "this."
+ GenTreePtr GetChild(unsigned childNum);
+
+ // Returns an iterator that will produce the use edge to each operand of this node. Differs
+ // from the sequence of nodes produced by a loop over `GetChild` in its handling of call, phi,
+ // and block op nodes.
+ GenTreeUseEdgeIterator GenTree::UseEdgesBegin();
+ GenTreeUseEdgeIterator GenTree::UseEdgesEnd();
+
+ IteratorPair<GenTreeUseEdgeIterator> GenTree::UseEdges();
+
+ // Returns an iterator that will produce each operand of this node. Differs from the sequence
+ // of nodes produced by a loop over `GetChild` in its handling of call, phi, and block op
+ // nodes.
+ GenTreeOperandIterator OperandsBegin();
+ GenTreeOperandIterator OperandsEnd();
+
+ // Returns a range that will produce the operands of this node in use order.
+ IteratorPair<GenTreeOperandIterator> Operands();
+
+ bool Precedes(GenTree* other);
+
+ // The maximum possible # of children of any node.
+ static const int MAX_CHILDREN = 6;
+
+ bool IsReuseRegVal() const
+ {
+ // This can be extended to non-constant nodes, but not to local or indir nodes.
+ if (OperIsConst() && ((gtFlags & GTF_REUSE_REG_VAL) != 0))
+ {
+ return true;
+ }
+ return false;
+ }
+ void SetReuseRegVal()
+ {
+ assert(OperIsConst());
+ gtFlags |= GTF_REUSE_REG_VAL;
+ }
+ void ResetReuseRegVal()
+ {
+ assert(OperIsConst());
+ gtFlags &= ~GTF_REUSE_REG_VAL;
+ }
+
+#ifdef DEBUG
+
+private:
+ GenTree& operator=(const GenTree& gt)
+ {
+ assert(!"Don't copy");
+ return *this;
+ }
+#endif // DEBUG
+
+#if DEBUGGABLE_GENTREE
+ // In DEBUG builds, add a dummy virtual method, to give the debugger run-time type information.
+ virtual void DummyVirt()
+ {
+ }
+
+ typedef void* VtablePtr;
+
+ VtablePtr GetVtableForOper(genTreeOps oper);
+ void SetVtableForOper(genTreeOps oper);
+
+ static VtablePtr s_vtablesForOpers[GT_COUNT];
+ static VtablePtr s_vtableForOp;
+#endif // DEBUGGABLE_GENTREE
+
+public:
+ inline void* operator new(size_t sz, class Compiler*, genTreeOps oper);
+
+ inline GenTree(genTreeOps oper, var_types type DEBUGARG(bool largeNode = false));
+};
+
+//------------------------------------------------------------------------
+// GenTreeUseEdgeIterator: an iterator that will produce each use edge of a
+// GenTree node in the order in which they are
+// used. Note that the use edges of a node may not
+// correspond exactly to the nodes on the other
+// ends of its use edges: in particular, GT_LIST
+// nodes are expanded into their component parts
+// (with the optional exception of multi-reg
+// arguments). This differs from the behavior of
+// GenTree::GetChildPointer(), which does not expand
+// lists.
+//
+// Note: valid values of this type may be obtained by calling
+// `GenTree::UseEdgesBegin` and `GenTree::UseEdgesEnd`.
+//
+class GenTreeUseEdgeIterator final
+{
+ friend class GenTreeOperandIterator;
+ friend GenTreeUseEdgeIterator GenTree::UseEdgesBegin();
+ friend GenTreeUseEdgeIterator GenTree::UseEdgesEnd();
+
+ GenTree* m_node;
+ GenTree** m_edge;
+ GenTree* m_argList;
+ int m_state;
+
+ GenTreeUseEdgeIterator(GenTree* node);
+
+ GenTree** GetNextUseEdge() const;
+ void MoveToNextCallUseEdge();
+ void MoveToNextPhiUseEdge();
+#ifdef FEATURE_SIMD
+ void MoveToNextSIMDUseEdge();
+#endif
+ void MoveToNextAggregateUseEdge();
+
+public:
+ GenTreeUseEdgeIterator();
+
+ inline GenTree** operator*()
+ {
+ return m_edge;
+ }
+
+ inline GenTree** operator->()
+ {
+ return m_edge;
+ }
+
+ inline bool operator==(const GenTreeUseEdgeIterator& other) const
+ {
+ if (m_state == -1 || other.m_state == -1)
+ {
+ return m_state == other.m_state;
+ }
+
+ return (m_node == other.m_node) && (m_edge == other.m_edge) && (m_argList == other.m_argList) &&
+ (m_state == other.m_state);
+ }
+
+ inline bool operator!=(const GenTreeUseEdgeIterator& other) const
+ {
+ return !(operator==(other));
+ }
+
+ GenTreeUseEdgeIterator& operator++();
+};
+
+//------------------------------------------------------------------------
+// GenTreeOperandIterator: an iterator that will produce each operand of a
+// GenTree node in the order in which they are
+// used. This uses `GenTreeUseEdgeIterator` under
+// the covers and comes with the same caveats
+// w.r.t. `GetChild`.
+//
+// Note: valid values of this type may be obtained by calling
+// `GenTree::OperandsBegin` and `GenTree::OperandsEnd`.
+class GenTreeOperandIterator final
+{
+ friend GenTreeOperandIterator GenTree::OperandsBegin();
+ friend GenTreeOperandIterator GenTree::OperandsEnd();
+
+ GenTreeUseEdgeIterator m_useEdges;
+
+ GenTreeOperandIterator(GenTree* node) : m_useEdges(node)
+ {
+ }
+
+public:
+ GenTreeOperandIterator() : m_useEdges()
+ {
+ }
+
+ inline GenTree* operator*()
+ {
+ return *(*m_useEdges);
+ }
+
+ inline GenTree* operator->()
+ {
+ return *(*m_useEdges);
+ }
+
+ inline bool operator==(const GenTreeOperandIterator& other) const
+ {
+ return m_useEdges == other.m_useEdges;
+ }
+
+ inline bool operator!=(const GenTreeOperandIterator& other) const
+ {
+ return !(operator==(other));
+ }
+
+ inline GenTreeOperandIterator& operator++()
+ {
+ ++m_useEdges;
+ return *this;
+ }
+};
+
+/*****************************************************************************/
+// In the current design, we never instantiate GenTreeUnOp: it exists only to be
+// used as a base class. For unary operators, we instantiate GenTreeOp, with a NULL second
+// argument. We check that this is true dynamically. We could tighten this and get static
+// checking, but that would entail accessing the first child of a unary operator via something
+// like gtUnOp.gtOp1 instead of gtOp.gtOp1.
+struct GenTreeUnOp : public GenTree
+{
+ GenTreePtr gtOp1;
+
+protected:
+ GenTreeUnOp(genTreeOps oper, var_types type DEBUGARG(bool largeNode = false))
+ : GenTree(oper, type DEBUGARG(largeNode)), gtOp1(nullptr)
+ {
+ }
+
+ GenTreeUnOp(genTreeOps oper, var_types type, GenTreePtr op1 DEBUGARG(bool largeNode = false))
+ : GenTree(oper, type DEBUGARG(largeNode)), gtOp1(op1)
+ {
+ assert(op1 != nullptr || NullOp1Legal());
+ if (op1 != nullptr)
+ { // Propagate effects flags from child.
+ gtFlags |= op1->gtFlags & GTF_ALL_EFFECT;
+ }
+ }
+
+#if DEBUGGABLE_GENTREE
+ GenTreeUnOp() : GenTree(), gtOp1(nullptr)
+ {
+ }
+#endif
+};
+
+struct GenTreeOp : public GenTreeUnOp
+{
+ GenTreePtr gtOp2;
+
+ GenTreeOp(genTreeOps oper, var_types type, GenTreePtr op1, GenTreePtr op2 DEBUGARG(bool largeNode = false))
+ : GenTreeUnOp(oper, type, op1 DEBUGARG(largeNode)), gtOp2(op2)
+ {
+ // comparisons are always integral types
+ assert(!GenTree::OperIsCompare(oper) || varTypeIsIntegral(type));
+ // Binary operators, with a few exceptions, require a non-nullptr
+ // second argument.
+ assert(op2 != nullptr || NullOp2Legal());
+ // Unary operators, on the other hand, require a null second argument.
+ assert(!OperIsUnary(oper) || op2 == nullptr);
+ // Propagate effects flags from child. (UnOp handled this for first child.)
+ if (op2 != nullptr)
+ {
+ gtFlags |= op2->gtFlags & GTF_ALL_EFFECT;
+ }
+ }
+
+ // A small set of types are unary operators with optional arguments. We use
+ // this constructor to build those.
+ GenTreeOp(genTreeOps oper, var_types type DEBUGARG(bool largeNode = false))
+ : GenTreeUnOp(oper, type DEBUGARG(largeNode)), gtOp2(nullptr)
+ {
+ // Unary operators with optional arguments:
+ assert(oper == GT_NOP || oper == GT_RETURN || oper == GT_RETFILT || OperIsBlk(oper));
+ }
+
+#if DEBUGGABLE_GENTREE
+ GenTreeOp() : GenTreeUnOp(), gtOp2(nullptr)
+ {
+ }
+#endif
+};
+
+struct GenTreeVal : public GenTree
+{
+ size_t gtVal1;
+
+ GenTreeVal(genTreeOps oper, var_types type, ssize_t val) : GenTree(oper, type), gtVal1(val)
+ {
+ }
+#if DEBUGGABLE_GENTREE
+ GenTreeVal() : GenTree()
+ {
+ }
+#endif
+};
+
+struct GenTreeIntConCommon : public GenTree
+{
+ inline INT64 LngValue();
+ inline void SetLngValue(INT64 val);
+ inline ssize_t IconValue();
+ inline void SetIconValue(ssize_t val);
+
+ GenTreeIntConCommon(genTreeOps oper, var_types type DEBUGARG(bool largeNode = false))
+ : GenTree(oper, type DEBUGARG(largeNode))
+ {
+ }
+
+ bool FitsInI32()
+ {
+ return FitsInI32(IconValue());
+ }
+
+ static bool FitsInI32(ssize_t val)
+ {
+#ifdef _TARGET_64BIT_
+ return (int)val == val;
+#else
+ return true;
+#endif
+ }
+
+ bool ImmedValNeedsReloc(Compiler* comp);
+ bool GenTreeIntConCommon::ImmedValCanBeFolded(Compiler* comp, genTreeOps op);
+
+#ifdef _TARGET_XARCH_
+ bool FitsInAddrBase(Compiler* comp);
+ bool AddrNeedsReloc(Compiler* comp);
+#endif
+
+#if DEBUGGABLE_GENTREE
+ GenTreeIntConCommon() : GenTree()
+ {
+ }
+#endif
+};
+
+// node representing a read from a physical register
+struct GenTreePhysReg : public GenTree
+{
+ // physregs need a field beyond gtRegNum because
+ // gtRegNum indicates the destination (and can be changed)
+ // whereas reg indicates the source
+ regNumber gtSrcReg;
+ GenTreePhysReg(regNumber r, var_types type = TYP_I_IMPL) : GenTree(GT_PHYSREG, type), gtSrcReg(r)
+ {
+ }
+#if DEBUGGABLE_GENTREE
+ GenTreePhysReg() : GenTree()
+ {
+ }
+#endif
+};
+
+#ifndef LEGACY_BACKEND
+// gtJumpTable - Switch Jump Table
+//
+// This node stores a DWORD constant that represents the
+// absolute address of a jump table for switches. The code
+// generator uses this table to code the destination for every case
+// in an array of addresses which starting position is stored in
+// this constant.
+struct GenTreeJumpTable : public GenTreeIntConCommon
+{
+ ssize_t gtJumpTableAddr;
+
+ GenTreeJumpTable(var_types type DEBUGARG(bool largeNode = false))
+ : GenTreeIntConCommon(GT_JMPTABLE, type DEBUGARG(largeNode))
+ {
+ }
+#if DEBUGGABLE_GENTREE
+ GenTreeJumpTable() : GenTreeIntConCommon()
+ {
+ }
+#endif // DEBUG
+};
+#endif // !LEGACY_BACKEND
+
+/* gtIntCon -- integer constant (GT_CNS_INT) */
+struct GenTreeIntCon : public GenTreeIntConCommon
+{
+ /*
+ * This is the GT_CNS_INT struct definition.
+ * It's used to hold for both int constants and pointer handle constants.
+ * For the 64-bit targets we will only use GT_CNS_INT as it used to represent all the possible sizes
+ * For the 32-bit targets we use a GT_CNS_LNG to hold a 64-bit integer constant and GT_CNS_INT for all others.
+ * In the future when we retarget the JIT for x86 we should consider eliminating GT_CNS_LNG
+ */
+ ssize_t gtIconVal; // Must overlap and have the same offset with the gtIconVal field in GenTreeLngCon below.
+
+ /* The InitializeArray intrinsic needs to go back to the newarray statement
+ to find the class handle of the array so that we can get its size. However,
+ in ngen mode, the handle in that statement does not correspond to the compile
+ time handle (rather it lets you get a handle at run-time). In that case, we also
+ need to store a compile time handle, which goes in this gtCompileTimeHandle field.
+ */
+ ssize_t gtCompileTimeHandle;
+
+ // TODO-Cleanup: It's not clear what characterizes the cases where the field
+ // above is used. It may be that its uses and those of the "gtFieldSeq" field below
+ // are mutually exclusive, and they could be put in a union. Or else we should separate
+ // this type into three subtypes.
+
+ // If this constant represents the offset of one or more fields, "gtFieldSeq" represents that
+ // sequence of fields.
+ FieldSeqNode* gtFieldSeq;
+
+#if defined(LATE_DISASM)
+
+ /* If the constant was morphed from some other node,
+ these fields enable us to get back to what the node
+ originally represented. See use of gtNewIconHandleNode()
+ */
+
+ union {
+ /* Template struct - The significant field of the other
+ * structs should overlap exactly with this struct
+ */
+
+ struct
+ {
+ unsigned gtIconHdl1;
+ void* gtIconHdl2;
+ } gtIconHdl;
+
+ /* GT_FIELD, etc */
+
+ struct
+ {
+ unsigned gtIconCPX;
+ CORINFO_CLASS_HANDLE gtIconCls;
+ } gtIconFld;
+ };
+#endif
+
+ GenTreeIntCon(var_types type, ssize_t value DEBUGARG(bool largeNode = false))
+ : GenTreeIntConCommon(GT_CNS_INT, type DEBUGARG(largeNode))
+ , gtIconVal(value)
+ , gtCompileTimeHandle(0)
+ , gtFieldSeq(FieldSeqStore::NotAField())
+ {
+ }
+
+ GenTreeIntCon(var_types type, ssize_t value, FieldSeqNode* fields DEBUGARG(bool largeNode = false))
+ : GenTreeIntConCommon(GT_CNS_INT, type DEBUGARG(largeNode))
+ , gtIconVal(value)
+ , gtCompileTimeHandle(0)
+ , gtFieldSeq(fields)
+ {
+ assert(fields != nullptr);
+ }
+
+ void FixupInitBlkValue(var_types asgType);
+
+#ifdef _TARGET_64BIT_
+ void TruncateOrSignExtend32()
+ {
+ if (gtFlags & GTF_UNSIGNED)
+ {
+ gtIconVal = UINT32(gtIconVal);
+ }
+ else
+ {
+ gtIconVal = INT32(gtIconVal);
+ }
+ }
+#endif // _TARGET_64BIT_
+
+#if DEBUGGABLE_GENTREE
+ GenTreeIntCon() : GenTreeIntConCommon()
+ {
+ }
+#endif
+};
+
+/* gtLngCon -- long constant (GT_CNS_LNG) */
+
+struct GenTreeLngCon : public GenTreeIntConCommon
+{
+ INT64 gtLconVal; // Must overlap and have the same offset with the gtIconVal field in GenTreeIntCon above.
+ INT32 LoVal()
+ {
+ return (INT32)(gtLconVal & 0xffffffff);
+ }
+
+ INT32 HiVal()
+ {
+ return (INT32)(gtLconVal >> 32);
+ ;
+ }
+
+ GenTreeLngCon(INT64 val) : GenTreeIntConCommon(GT_CNS_NATIVELONG, TYP_LONG)
+ {
+ SetLngValue(val);
+ }
+#if DEBUGGABLE_GENTREE
+ GenTreeLngCon() : GenTreeIntConCommon()
+ {
+ }
+#endif
+};
+
+inline INT64 GenTreeIntConCommon::LngValue()
+{
+#ifndef _TARGET_64BIT_
+ assert(gtOper == GT_CNS_LNG);
+ return AsLngCon()->gtLconVal;
+#else
+ return IconValue();
+#endif
+}
+
+inline void GenTreeIntConCommon::SetLngValue(INT64 val)
+{
+#ifndef _TARGET_64BIT_
+ assert(gtOper == GT_CNS_LNG);
+ AsLngCon()->gtLconVal = val;
+#else
+ // Compile time asserts that these two fields overlap and have the same offsets: gtIconVal and gtLconVal
+ C_ASSERT(offsetof(GenTreeLngCon, gtLconVal) == offsetof(GenTreeIntCon, gtIconVal));
+ C_ASSERT(sizeof(AsLngCon()->gtLconVal) == sizeof(AsIntCon()->gtIconVal));
+
+ SetIconValue(ssize_t(val));
+#endif
+}
+
+inline ssize_t GenTreeIntConCommon::IconValue()
+{
+ assert(gtOper == GT_CNS_INT); // We should never see a GT_CNS_LNG for a 64-bit target!
+ return AsIntCon()->gtIconVal;
+}
+
+inline void GenTreeIntConCommon::SetIconValue(ssize_t val)
+{
+ assert(gtOper == GT_CNS_INT); // We should never see a GT_CNS_LNG for a 64-bit target!
+ AsIntCon()->gtIconVal = val;
+}
+
+/* gtDblCon -- double constant (GT_CNS_DBL) */
+
+struct GenTreeDblCon : public GenTree
+{
+ double gtDconVal;
+
+ bool isBitwiseEqual(GenTreeDblCon* other)
+ {
+ unsigned __int64 bits = *(unsigned __int64*)(&gtDconVal);
+ unsigned __int64 otherBits = *(unsigned __int64*)(&(other->gtDconVal));
+ return (bits == otherBits);
+ }
+
+ GenTreeDblCon(double val) : GenTree(GT_CNS_DBL, TYP_DOUBLE), gtDconVal(val)
+ {
+ }
+#if DEBUGGABLE_GENTREE
+ GenTreeDblCon() : GenTree()
+ {
+ }
+#endif
+};
+
+/* gtStrCon -- string constant (GT_CNS_STR) */
+
+struct GenTreeStrCon : public GenTree
+{
+ unsigned gtSconCPX;
+ CORINFO_MODULE_HANDLE gtScpHnd;
+
+ // Because this node can come from an inlined method we need to
+ // have the scope handle, since it will become a helper call.
+ GenTreeStrCon(unsigned sconCPX, CORINFO_MODULE_HANDLE mod DEBUGARG(bool largeNode = false))
+ : GenTree(GT_CNS_STR, TYP_REF DEBUGARG(largeNode)), gtSconCPX(sconCPX), gtScpHnd(mod)
+ {
+ }
+#if DEBUGGABLE_GENTREE
+ GenTreeStrCon() : GenTree()
+ {
+ }
+#endif
+};
+
+// Common supertype of LCL_VAR, LCL_FLD, REG_VAR, PHI_ARG
+// This inherits from UnOp because lclvar stores are Unops
+struct GenTreeLclVarCommon : public GenTreeUnOp
+{
+private:
+ unsigned _gtLclNum; // The local number. An index into the Compiler::lvaTable array.
+ unsigned _gtSsaNum; // The SSA number.
+
+public:
+ GenTreeLclVarCommon(genTreeOps oper, var_types type, unsigned lclNum DEBUGARG(bool largeNode = false))
+ : GenTreeUnOp(oper, type DEBUGARG(largeNode))
+ {
+ SetLclNum(lclNum);
+ }
+
+ unsigned GetLclNum() const
+ {
+ return _gtLclNum;
+ }
+ __declspec(property(get = GetLclNum)) unsigned gtLclNum;
+
+ void SetLclNum(unsigned lclNum)
+ {
+ _gtLclNum = lclNum;
+ _gtSsaNum = SsaConfig::RESERVED_SSA_NUM;
+ }
+
+ unsigned GetSsaNum() const
+ {
+ return _gtSsaNum;
+ }
+ __declspec(property(get = GetSsaNum)) unsigned gtSsaNum;
+
+ void SetSsaNum(unsigned ssaNum)
+ {
+ _gtSsaNum = ssaNum;
+ }
+
+ bool HasSsaName()
+ {
+ return (gtSsaNum != SsaConfig::RESERVED_SSA_NUM);
+ }
+
+#if DEBUGGABLE_GENTREE
+ GenTreeLclVarCommon() : GenTreeUnOp()
+ {
+ }
+#endif
+};
+
+// gtLclVar -- load/store/addr of local variable
+
+struct GenTreeLclVar : public GenTreeLclVarCommon
+{
+ IL_OFFSET gtLclILoffs; // instr offset of ref (only for debug info)
+
+ GenTreeLclVar(var_types type, unsigned lclNum, IL_OFFSET ilOffs DEBUGARG(bool largeNode = false))
+ : GenTreeLclVarCommon(GT_LCL_VAR, type, lclNum DEBUGARG(largeNode)), gtLclILoffs(ilOffs)
+ {
+ }
+
+ GenTreeLclVar(genTreeOps oper, var_types type, unsigned lclNum, IL_OFFSET ilOffs DEBUGARG(bool largeNode = false))
+ : GenTreeLclVarCommon(oper, type, lclNum DEBUGARG(largeNode)), gtLclILoffs(ilOffs)
+ {
+ assert(OperIsLocal(oper) || OperIsLocalAddr(oper));
+ }
+
+#if DEBUGGABLE_GENTREE
+ GenTreeLclVar() : GenTreeLclVarCommon()
+ {
+ }
+#endif
+};
+
+// gtLclFld -- load/store/addr of local variable field
+
+struct GenTreeLclFld : public GenTreeLclVarCommon
+{
+ unsigned gtLclOffs; // offset into the variable to access
+
+ FieldSeqNode* gtFieldSeq; // This LclFld node represents some sequences of accesses.
+
+ // old/FE style constructor where load/store/addr share same opcode
+ GenTreeLclFld(var_types type, unsigned lclNum, unsigned lclOffs)
+ : GenTreeLclVarCommon(GT_LCL_FLD, type, lclNum), gtLclOffs(lclOffs), gtFieldSeq(nullptr)
+ {
+ assert(sizeof(*this) <= s_gtNodeSizes[GT_LCL_FLD]);
+ }
+
+ GenTreeLclFld(genTreeOps oper, var_types type, unsigned lclNum, unsigned lclOffs)
+ : GenTreeLclVarCommon(oper, type, lclNum), gtLclOffs(lclOffs), gtFieldSeq(nullptr)
+ {
+ assert(sizeof(*this) <= s_gtNodeSizes[GT_LCL_FLD]);
+ }
+#if DEBUGGABLE_GENTREE
+ GenTreeLclFld() : GenTreeLclVarCommon()
+ {
+ }
+#endif
+};
+
+struct GenTreeRegVar : public GenTreeLclVarCommon
+{
+ // TODO-Cleanup: Note that the base class GenTree already has a gtRegNum field.
+ // It's not clear exactly why a GT_REG_VAR has a separate field. When
+ // GT_REG_VAR is created, the two are identical. It appears that they may
+ // or may not remain so. In particular, there is a comment in stackfp.cpp
+ // that states:
+ //
+ // There used to be an assertion: assert(src->gtRegNum == src->gtRegVar.gtRegNum, ...)
+ // here, but there's actually no reason to assume that. AFAICT, for FP vars under stack FP,
+ // src->gtRegVar.gtRegNum is the allocated stack pseudo-register, but src->gtRegNum is the
+ // FP stack position into which that is loaded to represent a particular use of the variable.
+ //
+ // It might be the case that only for stackfp do they ever differ.
+ //
+ // The following might be possible: the GT_REG_VAR node has a last use prior to a complex
+ // subtree being evaluated. It could then be spilled from the register. Later,
+ // it could be unspilled into a different register, which would be recorded at
+ // the unspill time in the GenTree::gtRegNum, whereas GenTreeRegVar::gtRegNum
+ // is left alone. It's not clear why that is useful.
+ //
+ // Assuming there is a particular use, like stack fp, that requires it, maybe we
+ // can get rid of GT_REG_VAR and just leave it as GT_LCL_VAR, using the base class gtRegNum field.
+ // If we need it for stackfp, we could add a GenTreeStackFPRegVar type, which carries both the
+ // pieces of information, in a clearer and more specific way (in particular, with
+ // a different member name).
+ //
+
+private:
+ regNumberSmall _gtRegNum;
+
+public:
+ GenTreeRegVar(var_types type, unsigned lclNum, regNumber regNum) : GenTreeLclVarCommon(GT_REG_VAR, type, lclNum)
+ {
+ gtRegNum = regNum;
+ }
+
+ // The register number is stored in a small format (8 bits), but the getters return and the setters take
+ // a full-size (unsigned) format, to localize the casts here.
+
+ __declspec(property(get = GetRegNum, put = SetRegNum)) regNumber gtRegNum;
+
+ regNumber GetRegNum() const
+ {
+ return (regNumber)_gtRegNum;
+ }
+
+ void SetRegNum(regNumber reg)
+ {
+ _gtRegNum = (regNumberSmall)reg;
+ assert(_gtRegNum == reg);
+ }
+
+#if DEBUGGABLE_GENTREE
+ GenTreeRegVar() : GenTreeLclVarCommon()
+ {
+ }
+#endif
+};
+
+/* gtCast -- conversion to a different type (GT_CAST) */
+
+struct GenTreeCast : public GenTreeOp
+{
+ GenTreePtr& CastOp()
+ {
+ return gtOp1;
+ }
+ var_types gtCastType;
+
+ GenTreeCast(var_types type, GenTreePtr op, var_types castType DEBUGARG(bool largeNode = false))
+ : GenTreeOp(GT_CAST, type, op, nullptr DEBUGARG(largeNode)), gtCastType(castType)
+ {
+ }
+#if DEBUGGABLE_GENTREE
+ GenTreeCast() : GenTreeOp()
+ {
+ }
+#endif
+};
+
+// GT_BOX nodes are place markers for boxed values. The "real" tree
+// for most purposes is in gtBoxOp.
+struct GenTreeBox : public GenTreeUnOp
+{
+ // An expanded helper call to implement the "box" if we don't get
+ // rid of it any other way. Must be in same position as op1.
+
+ GenTreePtr& BoxOp()
+ {
+ return gtOp1;
+ }
+ // This is the statement that contains the assignment tree when the node is an inlined GT_BOX on a value
+ // type
+ GenTreePtr gtAsgStmtWhenInlinedBoxValue;
+
+ GenTreeBox(var_types type, GenTreePtr boxOp, GenTreePtr asgStmtWhenInlinedBoxValue)
+ : GenTreeUnOp(GT_BOX, type, boxOp), gtAsgStmtWhenInlinedBoxValue(asgStmtWhenInlinedBoxValue)
+ {
+ }
+#if DEBUGGABLE_GENTREE
+ GenTreeBox() : GenTreeUnOp()
+ {
+ }
+#endif
+};
+
+/* gtField -- data member ref (GT_FIELD) */
+
+struct GenTreeField : public GenTree
+{
+ GenTreePtr gtFldObj;
+ CORINFO_FIELD_HANDLE gtFldHnd;
+ DWORD gtFldOffset;
+ bool gtFldMayOverlap;
+#ifdef FEATURE_READYTORUN_COMPILER
+ CORINFO_CONST_LOOKUP gtFieldLookup;
+#endif
+
+ GenTreeField(var_types type) : GenTree(GT_FIELD, type)
+ {
+ gtFldMayOverlap = false;
+ }
+#if DEBUGGABLE_GENTREE
+ GenTreeField() : GenTree()
+ {
+ }
+#endif
+};
+
+// Represents the Argument list of a call node, as a Lisp-style linked list.
+// (Originally I had hoped that this could have *only* the m_arg/m_rest fields, but it turns out
+// that enough of the GenTree mechanism is used that it makes sense just to make it a subtype. But
+// note that in many ways, this is *not* a "real" node of the tree, but rather a mechanism for
+// giving call nodes a flexible number of children. GenTreeArgListNodes never evaluate to registers,
+// for example.)
+
+// Note that while this extends GenTreeOp, it is *not* an EXOP. We don't add any new fields, and one
+// is free to allocate a GenTreeOp of type GT_LIST. If you use this type, you get the convenient Current/Rest
+// method names for the arguments.
+struct GenTreeArgList : public GenTreeOp
+{
+ bool IsAggregate() const
+ {
+ return (gtFlags & GTF_LIST_AGGREGATE) != 0;
+ }
+
+ GenTreePtr& Current()
+ {
+ return gtOp1;
+ }
+ GenTreeArgList*& Rest()
+ {
+ assert(gtOp2 == nullptr || gtOp2->OperGet() == GT_LIST);
+ return *reinterpret_cast<GenTreeArgList**>(&gtOp2);
+ }
+
+#if DEBUGGABLE_GENTREE
+ GenTreeArgList() : GenTreeOp()
+ {
+ }
+#endif
+
+ GenTreeArgList(GenTreePtr arg) : GenTreeArgList(arg, nullptr)
+ {
+ }
+
+ GenTreeArgList(GenTreePtr arg, GenTreeArgList* rest) : GenTreeOp(GT_LIST, TYP_VOID, arg, rest)
+ {
+ // With structs passed in multiple args we could have an arg
+ // GT_LIST containing a list of LCL_FLDs, see IsListForMultiRegArg()
+ //
+ assert((arg != nullptr) && ((!arg->IsList()) || (arg->IsListForMultiRegArg())));
+ gtFlags |= arg->gtFlags & GTF_ALL_EFFECT;
+ if (rest != nullptr)
+ {
+ gtFlags |= rest->gtFlags & GTF_ALL_EFFECT;
+ }
+ }
+
+ GenTreeArgList* Prepend(Compiler* compiler, GenTree* element);
+};
+
+// There was quite a bit of confusion in the code base about which of gtOp1 and gtOp2 was the
+// 'then' and 'else' clause of a colon node. Adding these accessors, while not enforcing anything,
+// at least *allows* the programmer to be obviously correct.
+// However, these conventions seem backward.
+// TODO-Cleanup: If we could get these accessors used everywhere, then we could switch them.
+struct GenTreeColon : public GenTreeOp
+{
+ GenTreePtr& ThenNode()
+ {
+ return gtOp2;
+ }
+ GenTreePtr& ElseNode()
+ {
+ return gtOp1;
+ }
+
+#if DEBUGGABLE_GENTREE
+ GenTreeColon() : GenTreeOp()
+ {
+ }
+#endif
+
+ GenTreeColon(var_types typ, GenTreePtr thenNode, GenTreePtr elseNode) : GenTreeOp(GT_COLON, typ, elseNode, thenNode)
+ {
+ }
+};
+
+// gtCall -- method call (GT_CALL)
+typedef class fgArgInfo* fgArgInfoPtr;
+enum class InlineObservation;
+
+// Return type descriptor of a GT_CALL node.
+// x64 Unix, Arm64, Arm32 and x86 allow a value to be returned in multiple
+// registers. For such calls this struct provides the following info
+// on their return type
+// - type of value returned in each return register
+// - ABI return register numbers in which the value is returned
+// - count of return registers in which the value is returned
+//
+// TODO-ARM: Update this to meet the needs of Arm64 and Arm32
+//
+// TODO-AllArch: Right now it is used for describing multi-reg returned types.
+// Eventually we would want to use it for describing even single-reg
+// returned types (e.g. structs returned in single register x64/arm).
+// This would allow us not to lie or normalize single struct return
+// values in importer/morph.
+struct ReturnTypeDesc
+{
+private:
+ var_types m_regType[MAX_RET_REG_COUNT];
+
+#ifdef DEBUG
+ bool m_inited;
+#endif
+
+public:
+ ReturnTypeDesc()
+ {
+ Reset();
+ }
+
+ // Initialize the Return Type Descriptor for a method that returns a struct type
+ void InitializeStructReturnType(Compiler* comp, CORINFO_CLASS_HANDLE retClsHnd);
+
+ // Initialize the Return Type Descriptor for a method that returns a TYP_LONG
+ // Only needed for X86
+ void InitializeLongReturnType(Compiler* comp);
+
+ // Reset type descriptor to defaults
+ void Reset()
+ {
+ for (unsigned i = 0; i < MAX_RET_REG_COUNT; ++i)
+ {
+ m_regType[i] = TYP_UNKNOWN;
+ }
+#ifdef DEBUG
+ m_inited = false;
+#endif
+ }
+
+ //--------------------------------------------------------------------------------------------
+ // GetReturnRegCount: Get the count of return registers in which the return value is returned.
+ //
+ // Arguments:
+ // None
+ //
+ // Return Value:
+ // Count of return registers.
+ // Returns 0 if the return type is not returned in registers.
+ unsigned GetReturnRegCount() const
+ {
+ assert(m_inited);
+
+ int regCount = 0;
+ for (unsigned i = 0; i < MAX_RET_REG_COUNT; ++i)
+ {
+ if (m_regType[i] == TYP_UNKNOWN)
+ {
+ break;
+ }
+ // otherwise
+ regCount++;
+ }
+
+#ifdef DEBUG
+ // Any remaining elements in m_regTypes[] should also be TYP_UNKNOWN
+ for (unsigned i = regCount + 1; i < MAX_RET_REG_COUNT; ++i)
+ {
+ assert(m_regType[i] == TYP_UNKNOWN);
+ }
+#endif
+
+ return regCount;
+ }
+
+ //-----------------------------------------------------------------------
+ // IsMultiRegRetType: check whether the type is returned in multiple
+ // return registers.
+ //
+ // Arguments:
+ // None
+ //
+ // Return Value:
+ // Returns true if the type is returned in multiple return registers.
+ // False otherwise.
+ // Note that we only have to examine the first two values to determine this
+ //
+ bool IsMultiRegRetType() const
+ {
+ if (MAX_RET_REG_COUNT < 2)
+ {
+ return false;
+ }
+ else
+ {
+ return ((m_regType[0] != TYP_UNKNOWN) && (m_regType[1] != TYP_UNKNOWN));
+ }
+ }
+
+ //--------------------------------------------------------------------------
+ // GetReturnRegType: Get var_type of the return register specified by index.
+ //
+ // Arguments:
+ // index - Index of the return register.
+ // First return register will have an index 0 and so on.
+ //
+ // Return Value:
+ // var_type of the return register specified by its index.
+ // asserts if the index does not have a valid register return type.
+
+ var_types GetReturnRegType(unsigned index)
+ {
+ var_types result = m_regType[index];
+ assert(result != TYP_UNKNOWN);
+
+ return result;
+ }
+
+ // Get ith ABI return register
+ regNumber GetABIReturnReg(unsigned idx);
+
+ // Get reg mask of ABI return registers
+ regMaskTP GetABIReturnRegs();
+};
+
+struct GenTreeCall final : public GenTree
+{
+ GenTreePtr gtCallObjp; // The instance argument ('this' pointer)
+ GenTreeArgList* gtCallArgs; // The list of arguments in original evaluation order
+ GenTreeArgList* gtCallLateArgs; // On x86: The register arguments in an optimal order
+ // On ARM/x64: - also includes any outgoing arg space arguments
+ // - that were evaluated into a temp LclVar
+ fgArgInfoPtr fgArgInfo;
+
+#if !FEATURE_FIXED_OUT_ARGS
+ int regArgListCount;
+ regList regArgList;
+#endif
+
+ // TODO-Throughput: Revisit this (this used to be only defined if
+ // FEATURE_FIXED_OUT_ARGS was enabled, so this makes GenTreeCall 4 bytes bigger on x86).
+ CORINFO_SIG_INFO* callSig; // Used by tail calls and to register callsites with the EE
+
+#ifdef LEGACY_BACKEND
+ regMaskTP gtCallRegUsedMask; // mask of registers used to pass parameters
+#endif // LEGACY_BACKEND
+
+#if FEATURE_MULTIREG_RET
+ // State required to support multi-reg returning call nodes.
+ // For now it is enabled only for x64 unix.
+ //
+ // TODO-AllArch: enable for all call nodes to unify single-reg and multi-reg returns.
+ ReturnTypeDesc gtReturnTypeDesc;
+
+ // gtRegNum would always be the first return reg.
+ // The following array holds the other reg numbers of multi-reg return.
+ regNumber gtOtherRegs[MAX_RET_REG_COUNT - 1];
+
+ // GTF_SPILL or GTF_SPILLED flag on a multi-reg call node indicates that one or
+ // more of its result regs are in that state. The spill flag of each of the
+ // return register is stored in the below array.
+ unsigned gtSpillFlags[MAX_RET_REG_COUNT];
+#endif
+
+ //-----------------------------------------------------------------------
+ // GetReturnTypeDesc: get the type descriptor of return value of the call
+ //
+ // Arguments:
+ // None
+ //
+ // Returns
+ // Type descriptor of the value returned by call
+ //
+ // Note:
+ // Right now implemented only for x64 unix and yet to be
+ // implemented for other multi-reg target arch (Arm64/Arm32/x86).
+ //
+ // TODO-AllArch: enable for all call nodes to unify single-reg and multi-reg returns.
+ ReturnTypeDesc* GetReturnTypeDesc()
+ {
+#if FEATURE_MULTIREG_RET
+ return &gtReturnTypeDesc;
+#else
+ return nullptr;
+#endif
+ }
+
+ //---------------------------------------------------------------------------
+ // GetRegNumByIdx: get ith return register allocated to this call node.
+ //
+ // Arguments:
+ // idx - index of the return register
+ //
+ // Return Value:
+ // Return regNumber of ith return register of call node.
+ // Returns REG_NA if there is no valid return register for the given index.
+ //
+ regNumber GetRegNumByIdx(unsigned idx) const
+ {
+ assert(idx < MAX_RET_REG_COUNT);
+
+ if (idx == 0)
+ {
+ return gtRegNum;
+ }
+
+#if FEATURE_MULTIREG_RET
+ return gtOtherRegs[idx - 1];
+#else
+ return REG_NA;
+#endif
+ }
+
+ //----------------------------------------------------------------------
+ // SetRegNumByIdx: set ith return register of this call node
+ //
+ // Arguments:
+ // reg - reg number
+ // idx - index of the return register
+ //
+ // Return Value:
+ // None
+ //
+ void SetRegNumByIdx(regNumber reg, unsigned idx)
+ {
+ assert(idx < MAX_RET_REG_COUNT);
+
+ if (idx == 0)
+ {
+ gtRegNum = reg;
+ }
+#if FEATURE_MULTIREG_RET
+ else
+ {
+ gtOtherRegs[idx - 1] = reg;
+ assert(gtOtherRegs[idx - 1] == reg);
+ }
+#else
+ unreached();
+#endif
+ }
+
+ //----------------------------------------------------------------------------
+ // ClearOtherRegs: clear multi-reg state to indicate no regs are allocated
+ //
+ // Arguments:
+ // None
+ //
+ // Return Value:
+ // None
+ //
+ void ClearOtherRegs()
+ {
+#if FEATURE_MULTIREG_RET
+ for (unsigned i = 0; i < MAX_RET_REG_COUNT - 1; ++i)
+ {
+ gtOtherRegs[i] = REG_NA;
+ }
+#endif
+ }
+
+ //----------------------------------------------------------------------------
+ // CopyOtherRegs: copy multi-reg state from the given call node to this node
+ //
+ // Arguments:
+ // fromCall - GenTreeCall node from which to copy multi-reg state
+ //
+ // Return Value:
+ // None
+ //
+ void CopyOtherRegs(GenTreeCall* fromCall)
+ {
+#if FEATURE_MULTIREG_RET
+ for (unsigned i = 0; i < MAX_RET_REG_COUNT - 1; ++i)
+ {
+ this->gtOtherRegs[i] = fromCall->gtOtherRegs[i];
+ }
+#endif
+ }
+
+ // Get reg mask of all the valid registers of gtOtherRegs array
+ regMaskTP GetOtherRegMask() const;
+
+ //----------------------------------------------------------------------
+ // GetRegSpillFlagByIdx: get spill flag associated with the return register
+ // specified by its index.
+ //
+ // Arguments:
+ // idx - Position or index of the return register
+ //
+ // Return Value:
+ // Returns GTF_* flags associated with.
+ unsigned GetRegSpillFlagByIdx(unsigned idx) const
+ {
+ assert(idx < MAX_RET_REG_COUNT);
+
+#if FEATURE_MULTIREG_RET
+ return gtSpillFlags[idx];
+#else
+ assert(!"unreached");
+ return 0;
+#endif
+ }
+
+ //----------------------------------------------------------------------
+ // SetRegSpillFlagByIdx: set spill flags for the return register
+ // specified by its index.
+ //
+ // Arguments:
+ // flags - GTF_* flags
+ // idx - Position or index of the return register
+ //
+ // Return Value:
+ // None
+ void SetRegSpillFlagByIdx(unsigned flags, unsigned idx)
+ {
+ assert(idx < MAX_RET_REG_COUNT);
+
+#if FEATURE_MULTIREG_RET
+ gtSpillFlags[idx] = flags;
+#else
+ unreached();
+#endif
+ }
+
+ //-------------------------------------------------------------------
+ // clearOtherRegFlags: clear GTF_* flags associated with gtOtherRegs
+ //
+ // Arguments:
+ // None
+ //
+ // Return Value:
+ // None
+ void ClearOtherRegFlags()
+ {
+#if FEATURE_MULTIREG_RET
+ for (unsigned i = 0; i < MAX_RET_REG_COUNT; ++i)
+ {
+ gtSpillFlags[i] = 0;
+ }
+#endif
+ }
+
+ //-------------------------------------------------------------------------
+ // CopyOtherRegFlags: copy GTF_* flags associated with gtOtherRegs from
+ // the given call node.
+ //
+ // Arguments:
+ // fromCall - GenTreeCall node from which to copy
+ //
+ // Return Value:
+ // None
+ //
+ void CopyOtherRegFlags(GenTreeCall* fromCall)
+ {
+#if FEATURE_MULTIREG_RET
+ for (unsigned i = 0; i < MAX_RET_REG_COUNT; ++i)
+ {
+ this->gtSpillFlags[i] = fromCall->gtSpillFlags[i];
+ }
+#endif
+ }
+
+#define GTF_CALL_M_EXPLICIT_TAILCALL \
+ 0x0001 // GT_CALL -- the call is "tail" prefixed and importer has performed tail call checks
+#define GTF_CALL_M_TAILCALL 0x0002 // GT_CALL -- the call is a tailcall
+#define GTF_CALL_M_VARARGS 0x0004 // GT_CALL -- the call uses varargs ABI
+#define GTF_CALL_M_RETBUFFARG 0x0008 // GT_CALL -- first parameter is the return buffer argument
+#define GTF_CALL_M_DELEGATE_INV 0x0010 // GT_CALL -- call to Delegate.Invoke
+#define GTF_CALL_M_NOGCCHECK 0x0020 // GT_CALL -- not a call for computing full interruptability
+#define GTF_CALL_M_SPECIAL_INTRINSIC 0x0040 // GT_CALL -- function that could be optimized as an intrinsic
+ // in special cases. Used to optimize fast way out in morphing
+#define GTF_CALL_M_UNMGD_THISCALL \
+ 0x0080 // "this" pointer (first argument) should be enregistered (only for GTF_CALL_UNMANAGED)
+#define GTF_CALL_M_VIRTSTUB_REL_INDIRECT \
+ 0x0080 // the virtstub is indirected through a relative address (only for GTF_CALL_VIRT_STUB)
+#define GTF_CALL_M_NONVIRT_SAME_THIS \
+ 0x0080 // callee "this" pointer is equal to caller this pointer (only for GTF_CALL_NONVIRT)
+#define GTF_CALL_M_FRAME_VAR_DEATH 0x0100 // GT_CALL -- the compLvFrameListRoot variable dies here (last use)
+
+#ifndef LEGACY_BACKEND
+#define GTF_CALL_M_TAILCALL_VIA_HELPER 0x0200 // GT_CALL -- call is a tail call dispatched via tail call JIT helper.
+#endif // !LEGACY_BACKEND
+
+#if FEATURE_TAILCALL_OPT
+#define GTF_CALL_M_IMPLICIT_TAILCALL \
+ 0x0400 // GT_CALL -- call is an opportunistic tail call and importer has performed tail call checks
+#define GTF_CALL_M_TAILCALL_TO_LOOP \
+ 0x0800 // GT_CALL -- call is a fast recursive tail call that can be converted into a loop
+#endif
+
+#define GTF_CALL_M_PINVOKE 0x1000 // GT_CALL -- call is a pinvoke. This mirrors VM flag CORINFO_FLG_PINVOKE.
+ // A call marked as Pinvoke is not necessarily a GT_CALL_UNMANAGED. For e.g.
+ // an IL Stub dynamically generated for a PInvoke declaration is flagged as
+ // a Pinvoke but not as an unmanaged call. See impCheckForPInvokeCall() to
+ // know when these flags are set.
+
+#define GTF_CALL_M_R2R_REL_INDIRECT 0x2000 // GT_CALL -- ready to run call is indirected through a relative address
+#define GTF_CALL_M_DOES_NOT_RETURN 0x4000 // GT_CALL -- call does not return
+#define GTF_CALL_M_SECURE_DELEGATE_INV 0x8000 // GT_CALL -- call is in secure delegate
+
+ bool IsUnmanaged() const
+ {
+ return (gtFlags & GTF_CALL_UNMANAGED) != 0;
+ }
+ bool NeedsNullCheck() const
+ {
+ return (gtFlags & GTF_CALL_NULLCHECK) != 0;
+ }
+ bool CallerPop() const
+ {
+ return (gtFlags & GTF_CALL_POP_ARGS) != 0;
+ }
+ bool IsVirtual() const
+ {
+ return (gtFlags & GTF_CALL_VIRT_KIND_MASK) != GTF_CALL_NONVIRT;
+ }
+ bool IsVirtualStub() const
+ {
+ return (gtFlags & GTF_CALL_VIRT_KIND_MASK) == GTF_CALL_VIRT_STUB;
+ }
+ bool IsVirtualVtable() const
+ {
+ return (gtFlags & GTF_CALL_VIRT_KIND_MASK) == GTF_CALL_VIRT_VTABLE;
+ }
+ bool IsInlineCandidate() const
+ {
+ return (gtFlags & GTF_CALL_INLINE_CANDIDATE) != 0;
+ }
+
+#ifndef LEGACY_BACKEND
+ bool HasNonStandardAddedArgs(Compiler* compiler) const;
+ int GetNonStandardAddedArgCount(Compiler* compiler) const;
+#endif // !LEGACY_BACKEND
+
+ // Returns true if this call uses a retBuf argument and its calling convention
+ bool HasRetBufArg() const
+ {
+ return (gtCallMoreFlags & GTF_CALL_M_RETBUFFARG) != 0;
+ }
+
+ //-------------------------------------------------------------------------
+ // TreatAsHasRetBufArg:
+ //
+ // Arguments:
+ // compiler, the compiler instance so that we can call eeGetHelperNum
+ //
+ // Return Value:
+ // Returns true if we treat the call as if it has a retBuf argument
+ // This method may actually have a retBuf argument
+ // or it could be a JIT helper that we are still transforming during
+ // the importer phase.
+ //
+ // Notes:
+ // On ARM64 marking the method with the GTF_CALL_M_RETBUFFARG flag
+ // will make HasRetBufArg() return true, but will also force the
+ // use of register x8 to pass the RetBuf argument.
+ //
+ bool TreatAsHasRetBufArg(Compiler* compiler) const;
+
+ //-----------------------------------------------------------------------------------------
+ // HasMultiRegRetVal: whether the call node returns its value in multiple return registers.
+ //
+ // Arguments:
+ // None
+ //
+ // Return Value:
+ // True if the call is returning a multi-reg return value. False otherwise.
+ //
+ // Note:
+ // This is implemented only for x64 Unix and yet to be implemented for
+ // other multi-reg return target arch (arm64/arm32/x86).
+ //
+ bool HasMultiRegRetVal() const
+ {
+#if defined(_TARGET_X86_) && !defined(LEGACY_BACKEND)
+ // LEGACY_BACKEND does not use multi reg returns for calls with long return types
+ return varTypeIsLong(gtType);
+#elif FEATURE_MULTIREG_RET
+ return varTypeIsStruct(gtType) && !HasRetBufArg();
+#else
+ return false;
+#endif
+ }
+
+ // Returns true if VM has flagged this method as CORINFO_FLG_PINVOKE.
+ bool IsPInvoke() const
+ {
+ return (gtCallMoreFlags & GTF_CALL_M_PINVOKE) != 0;
+ }
+
+ // Note that the distinction of whether tail prefixed or an implicit tail call
+ // is maintained on a call node till fgMorphCall() after which it will be
+ // either a tail call (i.e. IsTailCall() is true) or a non-tail call.
+ bool IsTailPrefixedCall() const
+ {
+ return (gtCallMoreFlags & GTF_CALL_M_EXPLICIT_TAILCALL) != 0;
+ }
+
+ // This method returning "true" implies that tail call flowgraph morhphing has
+ // performed final checks and committed to making a tail call.
+ bool IsTailCall() const
+ {
+ return (gtCallMoreFlags & GTF_CALL_M_TAILCALL) != 0;
+ }
+
+ // This method returning "true" implies that importer has performed tail call checks
+ // and providing a hint that this can be converted to a tail call.
+ bool CanTailCall() const
+ {
+ return IsTailPrefixedCall() || IsImplicitTailCall();
+ }
+
+#ifndef LEGACY_BACKEND
+ bool IsTailCallViaHelper() const
+ {
+ return IsTailCall() && (gtCallMoreFlags & GTF_CALL_M_TAILCALL_VIA_HELPER);
+ }
+#else // LEGACY_BACKEND
+ bool IsTailCallViaHelper() const
+ {
+ return true;
+ }
+#endif // LEGACY_BACKEND
+
+#if FEATURE_FASTTAILCALL
+ bool IsFastTailCall() const
+ {
+ return IsTailCall() && !(gtCallMoreFlags & GTF_CALL_M_TAILCALL_VIA_HELPER);
+ }
+#else // !FEATURE_FASTTAILCALL
+ bool IsFastTailCall() const
+ {
+ return false;
+ }
+#endif // !FEATURE_FASTTAILCALL
+
+#if FEATURE_TAILCALL_OPT
+ // Returns true if this is marked for opportunistic tail calling.
+ // That is, can be tail called though not explicitly prefixed with "tail" prefix.
+ bool IsImplicitTailCall() const
+ {
+ return (gtCallMoreFlags & GTF_CALL_M_IMPLICIT_TAILCALL) != 0;
+ }
+ bool IsTailCallConvertibleToLoop() const
+ {
+ return (gtCallMoreFlags & GTF_CALL_M_TAILCALL_TO_LOOP) != 0;
+ }
+#else // !FEATURE_TAILCALL_OPT
+ bool IsImplicitTailCall() const
+ {
+ return false;
+ }
+ bool IsTailCallConvertibleToLoop() const
+ {
+ return false;
+ }
+#endif // !FEATURE_TAILCALL_OPT
+
+ bool IsSameThis() const
+ {
+ return (gtCallMoreFlags & GTF_CALL_M_NONVIRT_SAME_THIS) != 0;
+ }
+ bool IsDelegateInvoke() const
+ {
+ return (gtCallMoreFlags & GTF_CALL_M_DELEGATE_INV) != 0;
+ }
+ bool IsVirtualStubRelativeIndir() const
+ {
+ return (gtCallMoreFlags & GTF_CALL_M_VIRTSTUB_REL_INDIRECT) != 0;
+ }
+
+#ifdef FEATURE_READYTORUN_COMPILER
+ bool IsR2RRelativeIndir() const
+ {
+ return (gtCallMoreFlags & GTF_CALL_M_R2R_REL_INDIRECT) != 0;
+ }
+ void setEntryPoint(CORINFO_CONST_LOOKUP entryPoint)
+ {
+ gtEntryPoint = entryPoint;
+ if (gtEntryPoint.accessType == IAT_PVALUE)
+ {
+ gtCallMoreFlags |= GTF_CALL_M_R2R_REL_INDIRECT;
+ }
+ }
+#endif // FEATURE_READYTORUN_COMPILER
+
+ bool IsVarargs() const
+ {
+ return (gtCallMoreFlags & GTF_CALL_M_VARARGS) != 0;
+ }
+
+ bool IsNoReturn() const
+ {
+ return (gtCallMoreFlags & GTF_CALL_M_DOES_NOT_RETURN) != 0;
+ }
+
+ bool IsPure(Compiler* compiler) const;
+
+ unsigned short gtCallMoreFlags; // in addition to gtFlags
+
+ unsigned char gtCallType : 3; // value from the gtCallTypes enumeration
+ unsigned char gtReturnType : 5; // exact return type
+
+ CORINFO_CLASS_HANDLE gtRetClsHnd; // The return type handle of the call if it is a struct; always available
+
+ union {
+ // only used for CALLI unmanaged calls (CT_INDIRECT)
+ GenTreePtr gtCallCookie;
+ // gtInlineCandidateInfo is only used when inlining methods
+ InlineCandidateInfo* gtInlineCandidateInfo;
+ void* gtStubCallStubAddr; // GTF_CALL_VIRT_STUB - these are never inlined
+ CORINFO_GENERIC_HANDLE compileTimeHelperArgumentHandle; // Used to track type handle argument of dynamic helpers
+ void* gtDirectCallAddress; // Used to pass direct call address between lower and codegen
+ };
+
+ // expression evaluated after args are placed which determines the control target
+ GenTree* gtControlExpr;
+
+ union {
+ CORINFO_METHOD_HANDLE gtCallMethHnd; // CT_USER_FUNC
+ GenTreePtr gtCallAddr; // CT_INDIRECT
+ };
+
+#ifdef FEATURE_READYTORUN_COMPILER
+ // Call target lookup info for method call from a Ready To Run module
+ CORINFO_CONST_LOOKUP gtEntryPoint;
+#endif
+
+#if defined(DEBUG) || defined(INLINE_DATA)
+ // For non-inline candidates, track the first observation
+ // that blocks candidacy.
+ InlineObservation gtInlineObservation;
+
+ // IL offset of the call wrt its parent method.
+ IL_OFFSET gtRawILOffset;
+#endif // defined(DEBUG) || defined(INLINE_DATA)
+
+ bool IsHelperCall() const
+ {
+ return gtCallType == CT_HELPER;
+ }
+
+ bool IsHelperCall(CORINFO_METHOD_HANDLE callMethHnd) const
+ {
+ return IsHelperCall() && (callMethHnd == gtCallMethHnd);
+ }
+
+ bool IsHelperCall(Compiler* compiler, unsigned helper) const;
+
+ GenTreeCall(var_types type) : GenTree(GT_CALL, type)
+ {
+ }
+#if DEBUGGABLE_GENTREE
+ GenTreeCall() : GenTree()
+ {
+ }
+#endif
+};
+
+struct GenTreeCmpXchg : public GenTree
+{
+ GenTreePtr gtOpLocation;
+ GenTreePtr gtOpValue;
+ GenTreePtr gtOpComparand;
+
+ GenTreeCmpXchg(var_types type, GenTreePtr loc, GenTreePtr val, GenTreePtr comparand)
+ : GenTree(GT_CMPXCHG, type), gtOpLocation(loc), gtOpValue(val), gtOpComparand(comparand)
+ {
+ // There's no reason to do a compare-exchange on a local location, so we'll assume that all of these
+ // have global effects.
+ gtFlags |= GTF_GLOB_EFFECT;
+ }
+#if DEBUGGABLE_GENTREE
+ GenTreeCmpXchg() : GenTree()
+ {
+ }
+#endif
+};
+
+struct GenTreeFptrVal : public GenTree
+{
+ CORINFO_METHOD_HANDLE gtFptrMethod;
+
+#ifdef FEATURE_READYTORUN_COMPILER
+ CORINFO_CONST_LOOKUP gtEntryPoint;
+ CORINFO_RESOLVED_TOKEN* gtLdftnResolvedToken;
+#endif
+
+ GenTreeFptrVal(var_types type, CORINFO_METHOD_HANDLE meth) : GenTree(GT_FTN_ADDR, type), gtFptrMethod(meth)
+ {
+ }
+#if DEBUGGABLE_GENTREE
+ GenTreeFptrVal() : GenTree()
+ {
+ }
+#endif
+};
+
+/* gtQmark */
+struct GenTreeQmark : public GenTreeOp
+{
+ // Livesets on entry to then and else subtrees
+ VARSET_TP gtThenLiveSet;
+ VARSET_TP gtElseLiveSet;
+
+ // The "Compiler*" argument is not a DEBUGARG here because we use it to keep track of the set of
+ // (possible) QMark nodes.
+ GenTreeQmark(var_types type, GenTreePtr cond, GenTreePtr colonOp, class Compiler* comp);
+
+#if DEBUGGABLE_GENTREE
+ GenTreeQmark() : GenTreeOp(GT_QMARK, TYP_INT, nullptr, nullptr)
+ {
+ }
+#endif
+};
+
+/* gtIntrinsic -- intrinsic (possibly-binary op [NULL op2 is allowed] with an additional field) */
+
+struct GenTreeIntrinsic : public GenTreeOp
+{
+ CorInfoIntrinsics gtIntrinsicId;
+ CORINFO_METHOD_HANDLE gtMethodHandle; // Method handle of the method which is treated as an intrinsic.
+
+#ifdef FEATURE_READYTORUN_COMPILER
+ // Call target lookup info for method call from a Ready To Run module
+ CORINFO_CONST_LOOKUP gtEntryPoint;
+#endif
+
+ GenTreeIntrinsic(var_types type, GenTreePtr op1, CorInfoIntrinsics intrinsicId, CORINFO_METHOD_HANDLE methodHandle)
+ : GenTreeOp(GT_INTRINSIC, type, op1, nullptr), gtIntrinsicId(intrinsicId), gtMethodHandle(methodHandle)
+ {
+ }
+
+ GenTreeIntrinsic(var_types type,
+ GenTreePtr op1,
+ GenTreePtr op2,
+ CorInfoIntrinsics intrinsicId,
+ CORINFO_METHOD_HANDLE methodHandle)
+ : GenTreeOp(GT_INTRINSIC, type, op1, op2), gtIntrinsicId(intrinsicId), gtMethodHandle(methodHandle)
+ {
+ }
+
+#if DEBUGGABLE_GENTREE
+ GenTreeIntrinsic() : GenTreeOp()
+ {
+ }
+#endif
+};
+
+#ifdef FEATURE_SIMD
+
+/* gtSIMD -- SIMD intrinsic (possibly-binary op [NULL op2 is allowed] with additional fields) */
+struct GenTreeSIMD : public GenTreeOp
+{
+ SIMDIntrinsicID gtSIMDIntrinsicID; // operation Id
+ var_types gtSIMDBaseType; // SIMD vector base type
+ unsigned gtSIMDSize; // SIMD vector size in bytes
+
+ GenTreeSIMD(var_types type, GenTreePtr op1, SIMDIntrinsicID simdIntrinsicID, var_types baseType, unsigned size)
+ : GenTreeOp(GT_SIMD, type, op1, nullptr)
+ , gtSIMDIntrinsicID(simdIntrinsicID)
+ , gtSIMDBaseType(baseType)
+ , gtSIMDSize(size)
+ {
+ }
+
+ GenTreeSIMD(var_types type,
+ GenTreePtr op1,
+ GenTreePtr op2,
+ SIMDIntrinsicID simdIntrinsicID,
+ var_types baseType,
+ unsigned size)
+ : GenTreeOp(GT_SIMD, type, op1, op2)
+ , gtSIMDIntrinsicID(simdIntrinsicID)
+ , gtSIMDBaseType(baseType)
+ , gtSIMDSize(size)
+ {
+ }
+
+#if DEBUGGABLE_GENTREE
+ GenTreeSIMD() : GenTreeOp()
+ {
+ }
+#endif
+};
+#endif // FEATURE_SIMD
+
+/* gtIndex -- array access */
+
+struct GenTreeIndex : public GenTreeOp
+{
+ GenTreePtr& Arr()
+ {
+ return gtOp1;
+ }
+ GenTreePtr& Index()
+ {
+ return gtOp2;
+ }
+
+ unsigned gtIndElemSize; // size of elements in the array
+ CORINFO_CLASS_HANDLE gtStructElemClass; // If the element type is a struct, this is the struct type.
+
+ GenTreeIndex(var_types type, GenTreePtr arr, GenTreePtr ind, unsigned indElemSize)
+ : GenTreeOp(GT_INDEX, type, arr, ind)
+ , gtIndElemSize(indElemSize)
+ , gtStructElemClass(nullptr) // We always initialize this after construction.
+ {
+#ifdef DEBUG
+ if (JitConfig.JitSkipArrayBoundCheck() == 1)
+ {
+ // Skip bounds check
+ }
+ else
+#endif
+ {
+ // Do bounds check
+ gtFlags |= GTF_INX_RNGCHK;
+ }
+
+ if (type == TYP_REF)
+ {
+ gtFlags |= GTF_INX_REFARR_LAYOUT;
+ }
+
+ gtFlags |= GTF_EXCEPT | GTF_GLOB_REF;
+ }
+#if DEBUGGABLE_GENTREE
+ GenTreeIndex() : GenTreeOp()
+ {
+ }
+#endif
+};
+
+/* gtArrLen -- array length (GT_ARR_LENGTH)
+ GT_ARR_LENGTH is used for "arr.length" */
+
+struct GenTreeArrLen : public GenTreeUnOp
+{
+ GenTreePtr& ArrRef()
+ {
+ return gtOp1;
+ } // the array address node
+private:
+ int gtArrLenOffset; // constant to add to "gtArrRef" to get the address of the array length.
+
+public:
+ inline int ArrLenOffset()
+ {
+ return gtArrLenOffset;
+ }
+
+ GenTreeArrLen(var_types type, GenTreePtr arrRef, int lenOffset)
+ : GenTreeUnOp(GT_ARR_LENGTH, type, arrRef), gtArrLenOffset(lenOffset)
+ {
+ }
+
+#if DEBUGGABLE_GENTREE
+ GenTreeArrLen() : GenTreeUnOp()
+ {
+ }
+#endif
+};
+
+// This takes:
+// - a comparison value (generally an array length),
+// - an index value, and
+// - the label to jump to if the index is out of range.
+// - the "kind" of the throw block to branch to on failure
+// It generates no result.
+
+struct GenTreeBoundsChk : public GenTree
+{
+ GenTreePtr gtArrLen; // An expression for the length of the array being indexed.
+ GenTreePtr gtIndex; // The index expression.
+
+ GenTreePtr gtIndRngFailBB; // Label to jump to for array-index-out-of-range
+ SpecialCodeKind gtThrowKind; // Kind of throw block to branch to on failure
+
+ /* Only out-of-ranges at same stack depth can jump to the same label (finding return address is easier)
+ For delayed calling of fgSetRngChkTarget() so that the
+ optimizer has a chance of eliminating some of the rng checks */
+ unsigned gtStkDepth;
+
+ GenTreeBoundsChk(genTreeOps oper, var_types type, GenTreePtr arrLen, GenTreePtr index, SpecialCodeKind kind)
+ : GenTree(oper, type)
+ , gtArrLen(arrLen)
+ , gtIndex(index)
+ , gtIndRngFailBB(nullptr)
+ , gtThrowKind(kind)
+ , gtStkDepth(0)
+ {
+ // Effects flags propagate upwards.
+ gtFlags |= (arrLen->gtFlags & GTF_ALL_EFFECT);
+ gtFlags |= GTF_EXCEPT;
+ }
+#if DEBUGGABLE_GENTREE
+ GenTreeBoundsChk() : GenTree()
+ {
+ }
+#endif
+
+ // If the gtArrLen is really an array length, returns array reference, else "NULL".
+ GenTreePtr GetArray()
+ {
+ if (gtArrLen->OperGet() == GT_ARR_LENGTH)
+ {
+ return gtArrLen->gtArrLen.ArrRef();
+ }
+ else
+ {
+ return nullptr;
+ }
+ }
+};
+
+// gtArrElem -- general array element (GT_ARR_ELEM), for non "SZ_ARRAYS"
+// -- multidimensional arrays, or 1-d arrays with non-zero lower bounds.
+
+struct GenTreeArrElem : public GenTree
+{
+ GenTreePtr gtArrObj;
+
+#define GT_ARR_MAX_RANK 3
+ GenTreePtr gtArrInds[GT_ARR_MAX_RANK]; // Indices
+ unsigned char gtArrRank; // Rank of the array
+
+ unsigned char gtArrElemSize; // !!! Caution, this is an "unsigned char", it is used only
+ // on the optimization path of array intrisics.
+ // It stores the size of array elements WHEN it can fit
+ // into an "unsigned char".
+ // This has caused VSW 571394.
+ var_types gtArrElemType; // The array element type
+
+ // Requires that "inds" is a pointer to an array of "rank" GenTreePtrs for the indices.
+ GenTreeArrElem(var_types type,
+ GenTreePtr arr,
+ unsigned char rank,
+ unsigned char elemSize,
+ var_types elemType,
+ GenTreePtr* inds)
+ : GenTree(GT_ARR_ELEM, type), gtArrObj(arr), gtArrRank(rank), gtArrElemSize(elemSize), gtArrElemType(elemType)
+ {
+ for (unsigned char i = 0; i < rank; i++)
+ {
+ gtArrInds[i] = inds[i];
+ }
+ gtFlags |= GTF_EXCEPT;
+ }
+#if DEBUGGABLE_GENTREE
+ GenTreeArrElem() : GenTree()
+ {
+ }
+#endif
+};
+
+//--------------------------------------------
+//
+// GenTreeArrIndex (gtArrIndex): Expression to bounds-check the index for one dimension of a
+// multi-dimensional or non-zero-based array., and compute the effective index
+// (i.e. subtracting the lower bound).
+//
+// Notes:
+// This node is similar in some ways to GenTreeBoundsChk, which ONLY performs the check.
+// The reason that this node incorporates the check into the effective index computation is
+// to avoid duplicating the codegen, as the effective index is required to compute the
+// offset anyway.
+// TODO-CQ: Enable optimization of the lower bound and length by replacing this:
+// /--* <arrObj>
+// +--* <index0>
+// +--* ArrIndex[i, ]
+// with something like:
+// /--* <arrObj>
+// /--* ArrLowerBound[i, ]
+// | /--* <arrObj>
+// +--* ArrLen[i, ] (either generalize GT_ARR_LENGTH or add a new node)
+// +--* <index0>
+// +--* ArrIndex[i, ]
+// Which could, for example, be optimized to the following when known to be within bounds:
+// /--* TempForLowerBoundDim0
+// +--* <index0>
+// +--* - (GT_SUB)
+//
+struct GenTreeArrIndex : public GenTreeOp
+{
+ // The array object - may be any expression producing an Array reference, but is likely to be a lclVar.
+ GenTreePtr& ArrObj()
+ {
+ return gtOp1;
+ }
+ // The index expression - may be any integral expression.
+ GenTreePtr& IndexExpr()
+ {
+ return gtOp2;
+ }
+ unsigned char gtCurrDim; // The current dimension
+ unsigned char gtArrRank; // Rank of the array
+ var_types gtArrElemType; // The array element type
+
+ GenTreeArrIndex(var_types type,
+ GenTreePtr arrObj,
+ GenTreePtr indexExpr,
+ unsigned char currDim,
+ unsigned char arrRank,
+ var_types elemType)
+ : GenTreeOp(GT_ARR_INDEX, type, arrObj, indexExpr)
+ , gtCurrDim(currDim)
+ , gtArrRank(arrRank)
+ , gtArrElemType(elemType)
+ {
+ gtFlags |= GTF_EXCEPT;
+ }
+#if DEBUGGABLE_GENTREE
+protected:
+ friend GenTree;
+ // Used only for GenTree::GetVtableForOper()
+ GenTreeArrIndex() : GenTreeOp()
+ {
+ }
+#endif
+};
+
+//--------------------------------------------
+//
+// GenTreeArrOffset (gtArrOffset): Expression to compute the accumulated offset for the address
+// of an element of a multi-dimensional or non-zero-based array.
+//
+// Notes:
+// The result of this expression is (gtOffset * dimSize) + gtIndex
+// where dimSize is the length/stride/size of the dimension, and is obtained from gtArrObj.
+// This node is generated in conjunction with the GenTreeArrIndex node, which computes the
+// effective index for a single dimension. The sub-trees can be separately optimized, e.g.
+// within a loop body where the expression for the 0th dimension may be invariant.
+//
+// Here is an example of how the tree might look for a two-dimension array reference:
+// /--* const 0
+// | /--* <arrObj>
+// | +--* <index0>
+// +--* ArrIndex[i, ]
+// +--* <arrObj>
+// /--| arrOffs[i, ]
+// | +--* <arrObj>
+// | +--* <index1>
+// +--* ArrIndex[*,j]
+// +--* <arrObj>
+// /--| arrOffs[*,j]
+// TODO-CQ: see comment on GenTreeArrIndex for how its representation may change. When that
+// is done, we will also want to replace the <arrObj> argument to arrOffs with the
+// ArrLen as for GenTreeArrIndex.
+//
+struct GenTreeArrOffs : public GenTree
+{
+ GenTreePtr gtOffset; // The accumulated offset for lower dimensions - must be TYP_I_IMPL, and
+ // will either be a CSE temp, the constant 0, or another GenTreeArrOffs node.
+ GenTreePtr gtIndex; // The effective index for the current dimension - must be non-negative
+ // and can be any expression (though it is likely to be either a GenTreeArrIndex,
+ // node, a lclVar, or a constant).
+ GenTreePtr gtArrObj; // The array object - may be any expression producing an Array reference,
+ // but is likely to be a lclVar.
+ unsigned char gtCurrDim; // The current dimension
+ unsigned char gtArrRank; // Rank of the array
+ var_types gtArrElemType; // The array element type
+
+ GenTreeArrOffs(var_types type,
+ GenTreePtr offset,
+ GenTreePtr index,
+ GenTreePtr arrObj,
+ unsigned char currDim,
+ unsigned char rank,
+ var_types elemType)
+ : GenTree(GT_ARR_OFFSET, type)
+ , gtOffset(offset)
+ , gtIndex(index)
+ , gtArrObj(arrObj)
+ , gtCurrDim(currDim)
+ , gtArrRank(rank)
+ , gtArrElemType(elemType)
+ {
+ assert(index->gtFlags & GTF_EXCEPT);
+ gtFlags |= GTF_EXCEPT;
+ }
+#if DEBUGGABLE_GENTREE
+ GenTreeArrOffs() : GenTree()
+ {
+ }
+#endif
+};
+
+/* gtAddrMode -- Target-specific canonicalized addressing expression (GT_LEA) */
+
+struct GenTreeAddrMode : public GenTreeOp
+{
+ // Address is Base + Index*Scale + Offset.
+ // These are the legal patterns:
+ //
+ // Base // Base != nullptr && Index == nullptr && Scale == 0 && Offset == 0
+ // Base + Index*Scale // Base != nullptr && Index != nullptr && Scale != 0 && Offset == 0
+ // Base + Offset // Base != nullptr && Index == nullptr && Scale == 0 && Offset != 0
+ // Base + Index*Scale + Offset // Base != nullptr && Index != nullptr && Scale != 0 && Offset != 0
+ // Index*Scale // Base == nullptr && Index != nullptr && Scale > 1 && Offset == 0
+ // Index*Scale + Offset // Base == nullptr && Index != nullptr && Scale > 1 && Offset != 0
+ // Offset // Base == nullptr && Index == nullptr && Scale == 0 && Offset != 0
+ //
+ // So, for example:
+ // 1. Base + Index is legal with Scale==1
+ // 2. If Index is null, Scale should be zero (or unintialized / unused)
+ // 3. If Scale==1, then we should have "Base" instead of "Index*Scale", and "Base + Offset" instead of
+ // "Index*Scale + Offset".
+
+ // First operand is base address/pointer
+ bool HasBase() const
+ {
+ return gtOp1 != nullptr;
+ }
+ GenTreePtr& Base()
+ {
+ return gtOp1;
+ }
+
+ // Second operand is scaled index value
+ bool HasIndex() const
+ {
+ return gtOp2 != nullptr;
+ }
+ GenTreePtr& Index()
+ {
+ return gtOp2;
+ }
+
+ unsigned gtScale; // The scale factor
+ unsigned gtOffset; // The offset to add
+
+ GenTreeAddrMode(var_types type, GenTreePtr base, GenTreePtr index, unsigned scale, unsigned offset)
+ : GenTreeOp(GT_LEA, type, base, index)
+ {
+ gtScale = scale;
+ gtOffset = offset;
+ }
+#if DEBUGGABLE_GENTREE
+protected:
+ friend GenTree;
+ // Used only for GenTree::GetVtableForOper()
+ GenTreeAddrMode() : GenTreeOp()
+ {
+ }
+#endif
+};
+
+// Indir is just an op, no additional data, but some additional abstractions
+struct GenTreeIndir : public GenTreeOp
+{
+ // The address for the indirection.
+ // Since GenTreeDynBlk derives from this, but is an "EXOP" (i.e. it has extra fields),
+ // we can't access Op1 and Op2 in the normal manner if we may have a DynBlk.
+ GenTreePtr& Addr()
+ {
+ return gtOp1;
+ }
+
+ // these methods provide an interface to the indirection node which
+ bool HasBase();
+ bool HasIndex();
+ GenTree* Base();
+ GenTree* Index();
+ unsigned Scale();
+ size_t Offset();
+
+ GenTreeIndir(genTreeOps oper, var_types type, GenTree* addr, GenTree* data) : GenTreeOp(oper, type, addr, data)
+ {
+ }
+
+#if DEBUGGABLE_GENTREE
+protected:
+ friend GenTree;
+ // Used only for GenTree::GetVtableForOper()
+ GenTreeIndir() : GenTreeOp()
+ {
+ }
+#endif
+};
+
+// gtBlk -- 'block' (GT_BLK, GT_STORE_BLK).
+//
+// This is the base type for all of the nodes that represent block or struct
+// values.
+// Since it can be a store, it includes gtBlkOpKind to specify the type of
+// code generation that will be used for the block operation.
+
+struct GenTreeBlk : public GenTreeIndir
+{
+public:
+ // The data to be stored (null for GT_BLK)
+ GenTree*& Data()
+ {
+ return gtOp2;
+ }
+ void SetData(GenTree* dataNode)
+ {
+ gtOp2 = dataNode;
+ }
+
+ // The size of the buffer to be copied.
+ unsigned Size() const
+ {
+ return gtBlkSize;
+ }
+
+ unsigned gtBlkSize;
+
+ // Return true iff the object being copied contains one or more GC pointers.
+ bool HasGCPtr();
+
+ // True if this BlkOpNode is a volatile memory operation.
+ bool IsVolatile() const
+ {
+ return (gtFlags & GTF_BLK_VOLATILE) != 0;
+ }
+
+ // True if this BlkOpNode is a volatile memory operation.
+ bool IsUnaligned() const
+ {
+ return (gtFlags & GTF_BLK_UNALIGNED) != 0;
+ }
+
+ // Instruction selection: during codegen time, what code sequence we will be using
+ // to encode this operation.
+ enum
+ {
+ BlkOpKindInvalid,
+ BlkOpKindHelper,
+ BlkOpKindRepInstr,
+ BlkOpKindUnroll,
+ } gtBlkOpKind;
+
+ bool gtBlkOpGcUnsafe;
+
+ GenTreeBlk(genTreeOps oper, var_types type, GenTreePtr addr, unsigned size)
+ : GenTreeIndir(oper, type, addr, nullptr)
+ , gtBlkSize(size)
+ , gtBlkOpKind(BlkOpKindInvalid)
+ , gtBlkOpGcUnsafe(false)
+ {
+ assert(OperIsBlk(oper));
+ gtFlags |= (addr->gtFlags & GTF_ALL_EFFECT);
+ }
+
+ GenTreeBlk(genTreeOps oper, var_types type, GenTreePtr addr, GenTreePtr data, unsigned size)
+ : GenTreeIndir(oper, type, addr, data), gtBlkSize(size), gtBlkOpKind(BlkOpKindInvalid), gtBlkOpGcUnsafe(false)
+ {
+ assert(OperIsBlk(oper));
+ gtFlags |= (addr->gtFlags & GTF_ALL_EFFECT);
+ gtFlags |= (data->gtFlags & GTF_ALL_EFFECT);
+ }
+
+#if DEBUGGABLE_GENTREE
+protected:
+ friend GenTree;
+ GenTreeBlk() : GenTreeIndir()
+ {
+ }
+#endif // DEBUGGABLE_GENTREE
+};
+
+// gtObj -- 'object' (GT_OBJ).
+//
+// This node is used for block values that may have GC pointers.
+
+struct GenTreeObj : public GenTreeBlk
+{
+ CORINFO_CLASS_HANDLE gtClass; // the class of the object
+
+ // If non-null, this array represents the gc-layout of the class.
+ // This may be simply copied when cloning this node, because it is not changed once computed.
+ BYTE* gtGcPtrs;
+
+ // If non-zero, this is the number of slots in the class layout that
+ // contain gc-pointers.
+ __declspec(property(get = GetGcPtrCount)) unsigned gtGcPtrCount;
+ unsigned GetGcPtrCount() const
+ {
+ assert(_gtGcPtrCount != UINT32_MAX);
+ return _gtGcPtrCount;
+ }
+ unsigned _gtGcPtrCount;
+
+ // If non-zero, the number of pointer-sized slots that constitutes the class token.
+ unsigned gtSlots;
+
+ bool IsGCInfoInitialized()
+ {
+ return (_gtGcPtrCount != UINT32_MAX);
+ }
+
+ void SetGCInfo(BYTE* gcPtrs, unsigned gcPtrCount, unsigned slots)
+ {
+ gtGcPtrs = gcPtrs;
+ _gtGcPtrCount = gcPtrCount;
+ gtSlots = slots;
+ if (gtGcPtrCount != 0)
+ {
+ // We assume that we cannot have a struct with GC pointers that is not a multiple
+ // of the register size.
+ // The EE currently does not allow this, but it could change.
+ // Let's assert it just to be safe.
+ noway_assert(roundUp(gtBlkSize, REGSIZE_BYTES) == gtBlkSize);
+ }
+ }
+
+ void CopyGCInfo(GenTreeObj* srcObj)
+ {
+ if (srcObj->IsGCInfoInitialized())
+ {
+ gtGcPtrs = srcObj->gtGcPtrs;
+ _gtGcPtrCount = srcObj->gtGcPtrCount;
+ gtSlots = srcObj->gtSlots;
+ }
+ }
+
+ GenTreeObj(var_types type, GenTreePtr addr, CORINFO_CLASS_HANDLE cls, unsigned size)
+ : GenTreeBlk(GT_OBJ, type, addr, size), gtClass(cls)
+ {
+ // By default, an OBJ is assumed to be a global reference.
+ gtFlags |= GTF_GLOB_REF;
+ noway_assert(cls != NO_CLASS_HANDLE);
+ _gtGcPtrCount = UINT32_MAX;
+ }
+
+ GenTreeObj(var_types type, GenTreePtr addr, GenTreePtr data, CORINFO_CLASS_HANDLE cls, unsigned size)
+ : GenTreeBlk(GT_STORE_OBJ, type, addr, data, size), gtClass(cls)
+ {
+ // By default, an OBJ is assumed to be a global reference.
+ gtFlags |= GTF_GLOB_REF;
+ noway_assert(cls != NO_CLASS_HANDLE);
+ _gtGcPtrCount = UINT32_MAX;
+ }
+
+#if DEBUGGABLE_GENTREE
+ GenTreeObj() : GenTreeBlk()
+ {
+ }
+#endif
+};
+
+// gtDynBlk -- 'dynamic block' (GT_DYN_BLK).
+//
+// This node is used for block values that have a dynamic size.
+// Note that such a value can never have GC pointers.
+
+struct GenTreeDynBlk : public GenTreeBlk
+{
+public:
+ GenTreePtr gtDynamicSize;
+ bool gtEvalSizeFirst;
+
+ GenTreeDynBlk(GenTreePtr addr, GenTreePtr dynamicSize)
+ : GenTreeBlk(GT_DYN_BLK, TYP_STRUCT, addr, 0), gtDynamicSize(dynamicSize), gtEvalSizeFirst(false)
+ {
+ gtFlags |= (dynamicSize->gtFlags & GTF_ALL_EFFECT);
+ }
+
+#if DEBUGGABLE_GENTREE
+protected:
+ friend GenTree;
+ GenTreeDynBlk() : GenTreeBlk()
+ {
+ }
+#endif // DEBUGGABLE_GENTREE
+};
+
+// Read-modify-write status of a RMW memory op rooted at a storeInd
+enum RMWStatus
+{
+ STOREIND_RMW_STATUS_UNKNOWN, // RMW status of storeInd unknown
+ // Default status unless modified by IsRMWMemOpRootedAtStoreInd()
+
+ // One of these denote storeind is a RMW memory operation.
+ STOREIND_RMW_DST_IS_OP1, // StoreInd is known to be a RMW memory op and dst candidate is op1
+ STOREIND_RMW_DST_IS_OP2, // StoreInd is known to be a RMW memory op and dst candidate is op2
+
+ // One of these denote the reason for storeind is marked as non-RMW operation
+ STOREIND_RMW_UNSUPPORTED_ADDR, // Addr mode is not yet supported for RMW memory
+ STOREIND_RMW_UNSUPPORTED_OPER, // Operation is not supported for RMW memory
+ STOREIND_RMW_UNSUPPORTED_TYPE, // Type is not supported for RMW memory
+ STOREIND_RMW_INDIR_UNEQUAL // Indir to read value is not equivalent to indir that writes the value
+};
+
+// StoreInd is just a BinOp, with additional RMW status
+struct GenTreeStoreInd : public GenTreeIndir
+{
+#if !CPU_LOAD_STORE_ARCH
+ // The below flag is set and used during lowering
+ RMWStatus gtRMWStatus;
+
+ bool IsRMWStatusUnknown()
+ {
+ return gtRMWStatus == STOREIND_RMW_STATUS_UNKNOWN;
+ }
+ bool IsNonRMWMemoryOp()
+ {
+ return gtRMWStatus == STOREIND_RMW_UNSUPPORTED_ADDR || gtRMWStatus == STOREIND_RMW_UNSUPPORTED_OPER ||
+ gtRMWStatus == STOREIND_RMW_UNSUPPORTED_TYPE || gtRMWStatus == STOREIND_RMW_INDIR_UNEQUAL;
+ }
+ bool IsRMWMemoryOp()
+ {
+ return gtRMWStatus == STOREIND_RMW_DST_IS_OP1 || gtRMWStatus == STOREIND_RMW_DST_IS_OP2;
+ }
+ bool IsRMWDstOp1()
+ {
+ return gtRMWStatus == STOREIND_RMW_DST_IS_OP1;
+ }
+ bool IsRMWDstOp2()
+ {
+ return gtRMWStatus == STOREIND_RMW_DST_IS_OP2;
+ }
+#endif //! CPU_LOAD_STORE_ARCH
+
+ RMWStatus GetRMWStatus()
+ {
+#if !CPU_LOAD_STORE_ARCH
+ return gtRMWStatus;
+#else
+ return STOREIND_RMW_STATUS_UNKNOWN;
+#endif
+ }
+
+ void SetRMWStatusDefault()
+ {
+#if !CPU_LOAD_STORE_ARCH
+ gtRMWStatus = STOREIND_RMW_STATUS_UNKNOWN;
+#endif
+ }
+
+ void SetRMWStatus(RMWStatus status)
+ {
+#if !CPU_LOAD_STORE_ARCH
+ gtRMWStatus = status;
+#endif
+ }
+
+ GenTreePtr& Data()
+ {
+ return gtOp2;
+ }
+
+ GenTreeStoreInd(var_types type, GenTree* destPtr, GenTree* data) : GenTreeIndir(GT_STOREIND, type, destPtr, data)
+ {
+ SetRMWStatusDefault();
+ }
+
+#if DEBUGGABLE_GENTREE
+protected:
+ friend GenTree;
+ // Used only for GenTree::GetVtableForOper()
+ GenTreeStoreInd() : GenTreeIndir()
+ {
+ SetRMWStatusDefault();
+ }
+#endif
+};
+
+/* gtRetExp -- Place holder for the return expression from an inline candidate (GT_RET_EXPR) */
+
+struct GenTreeRetExpr : public GenTree
+{
+ GenTreePtr gtInlineCandidate;
+
+ CORINFO_CLASS_HANDLE gtRetClsHnd;
+
+ GenTreeRetExpr(var_types type) : GenTree(GT_RET_EXPR, type)
+ {
+ }
+#if DEBUGGABLE_GENTREE
+ GenTreeRetExpr() : GenTree()
+ {
+ }
+#endif
+};
+
+/* gtStmt -- 'statement expr' (GT_STMT) */
+
+class InlineContext;
+
+struct GenTreeStmt : public GenTree
+{
+ GenTreePtr gtStmtExpr; // root of the expression tree
+ GenTreePtr gtStmtList; // first node (for forward walks)
+ InlineContext* gtInlineContext; // The inline context for this statement.
+
+#if defined(DEBUGGING_SUPPORT) || defined(DEBUG)
+ IL_OFFSETX gtStmtILoffsx; // instr offset (if available)
+#endif
+
+#ifdef DEBUG
+ IL_OFFSET gtStmtLastILoffs; // instr offset at end of stmt
+#endif
+
+ __declspec(property(get = getNextStmt)) GenTreeStmt* gtNextStmt;
+
+ __declspec(property(get = getPrevStmt)) GenTreeStmt* gtPrevStmt;
+
+ GenTreeStmt* getNextStmt()
+ {
+ if (gtNext == nullptr)
+ {
+ return nullptr;
+ }
+ else
+ {
+ return gtNext->AsStmt();
+ }
+ }
+
+ GenTreeStmt* getPrevStmt()
+ {
+ if (gtPrev == nullptr)
+ {
+ return nullptr;
+ }
+ else
+ {
+ return gtPrev->AsStmt();
+ }
+ }
+
+ GenTreeStmt(GenTreePtr expr, IL_OFFSETX offset)
+ : GenTree(GT_STMT, TYP_VOID)
+ , gtStmtExpr(expr)
+ , gtStmtList(nullptr)
+ , gtInlineContext(nullptr)
+#if defined(DEBUGGING_SUPPORT) || defined(DEBUG)
+ , gtStmtILoffsx(offset)
+#endif
+#ifdef DEBUG
+ , gtStmtLastILoffs(BAD_IL_OFFSET)
+#endif
+ {
+ // Statements can't have statements as part of their expression tree.
+ assert(expr->gtOper != GT_STMT);
+
+ // Set the statement to have the same costs as the top node of the tree.
+ // This is used long before costs have been assigned, so we need to copy
+ // the raw costs.
+ CopyRawCosts(expr);
+ }
+
+#if DEBUGGABLE_GENTREE
+ GenTreeStmt() : GenTree(GT_STMT, TYP_VOID)
+ {
+ }
+#endif
+};
+
+/* NOTE: Any tree nodes that are larger than 8 bytes (two ints or
+ pointers) must be flagged as 'large' in GenTree::InitNodeSize().
+ */
+
+/* gtClsVar -- 'static data member' (GT_CLS_VAR) */
+
+struct GenTreeClsVar : public GenTree
+{
+ CORINFO_FIELD_HANDLE gtClsVarHnd;
+ FieldSeqNode* gtFieldSeq;
+
+ GenTreeClsVar(var_types type, CORINFO_FIELD_HANDLE clsVarHnd, FieldSeqNode* fldSeq)
+ : GenTree(GT_CLS_VAR, type), gtClsVarHnd(clsVarHnd), gtFieldSeq(fldSeq)
+ {
+ gtFlags |= GTF_GLOB_REF;
+ }
+#if DEBUGGABLE_GENTREE
+ GenTreeClsVar() : GenTree()
+ {
+ }
+#endif
+};
+
+/* gtArgPlace -- 'register argument placeholder' (GT_ARGPLACE) */
+
+struct GenTreeArgPlace : public GenTree
+{
+ CORINFO_CLASS_HANDLE gtArgPlaceClsHnd; // Needed when we have a TYP_STRUCT argument
+
+ GenTreeArgPlace(var_types type, CORINFO_CLASS_HANDLE clsHnd) : GenTree(GT_ARGPLACE, type), gtArgPlaceClsHnd(clsHnd)
+ {
+ }
+#if DEBUGGABLE_GENTREE
+ GenTreeArgPlace() : GenTree()
+ {
+ }
+#endif
+};
+
+/* gtLabel -- code label target (GT_LABEL) */
+
+struct GenTreeLabel : public GenTree
+{
+ BasicBlock* gtLabBB;
+
+ GenTreeLabel(BasicBlock* bb) : GenTree(GT_LABEL, TYP_VOID), gtLabBB(bb)
+ {
+ }
+#if DEBUGGABLE_GENTREE
+ GenTreeLabel() : GenTree()
+ {
+ }
+#endif
+};
+
+/* gtPhiArg -- phi node rhs argument, var = phi(phiarg, phiarg, phiarg...); GT_PHI_ARG */
+struct GenTreePhiArg : public GenTreeLclVarCommon
+{
+ BasicBlock* gtPredBB;
+
+ GenTreePhiArg(var_types type, unsigned lclNum, unsigned snum, BasicBlock* block)
+ : GenTreeLclVarCommon(GT_PHI_ARG, type, lclNum), gtPredBB(block)
+ {
+ SetSsaNum(snum);
+ }
+
+#if DEBUGGABLE_GENTREE
+ GenTreePhiArg() : GenTreeLclVarCommon()
+ {
+ }
+#endif
+};
+
+/* gtPutArgStk -- Argument passed on stack */
+
+struct GenTreePutArgStk : public GenTreeUnOp
+{
+ unsigned gtSlotNum; // Slot number of the argument to be passed on stack
+
+#if FEATURE_FASTTAILCALL
+ bool putInIncomingArgArea; // Whether this arg needs to be placed in incoming arg area.
+ // By default this is false and will be placed in out-going arg area.
+ // Fast tail calls set this to true.
+ // In future if we need to add more such bool fields consider bit fields.
+
+ GenTreePutArgStk(genTreeOps oper,
+ var_types type,
+ unsigned slotNum FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(unsigned numSlots)
+ FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(bool isStruct),
+ bool _putInIncomingArgArea = false DEBUGARG(GenTreePtr callNode = nullptr)
+ DEBUGARG(bool largeNode = false))
+ : GenTreeUnOp(oper, type DEBUGARG(largeNode))
+ , gtSlotNum(slotNum)
+ , putInIncomingArgArea(_putInIncomingArgArea)
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ , gtPutArgStkKind(PutArgStkKindInvalid)
+ , gtNumSlots(numSlots)
+ , gtIsStruct(isStruct)
+ , gtNumberReferenceSlots(0)
+ , gtGcPtrs(nullptr)
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+ {
+#ifdef DEBUG
+ gtCall = callNode;
+#endif
+ }
+
+ GenTreePutArgStk(genTreeOps oper,
+ var_types type,
+ GenTreePtr op1,
+ unsigned slotNum FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(unsigned numSlots)
+ FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(bool isStruct),
+ bool _putInIncomingArgArea = false DEBUGARG(GenTreePtr callNode = nullptr)
+ DEBUGARG(bool largeNode = false))
+ : GenTreeUnOp(oper, type, op1 DEBUGARG(largeNode))
+ , gtSlotNum(slotNum)
+ , putInIncomingArgArea(_putInIncomingArgArea)
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ , gtPutArgStkKind(PutArgStkKindInvalid)
+ , gtNumSlots(numSlots)
+ , gtIsStruct(isStruct)
+ , gtNumberReferenceSlots(0)
+ , gtGcPtrs(nullptr)
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+ {
+#ifdef DEBUG
+ gtCall = callNode;
+#endif
+ }
+
+#else // !FEATURE_FASTTAILCALL
+
+ GenTreePutArgStk(genTreeOps oper,
+ var_types type,
+ unsigned slotNum FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(unsigned numSlots)
+ FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(bool isStruct) DEBUGARG(GenTreePtr callNode = NULL)
+ DEBUGARG(bool largeNode = false))
+ : GenTreeUnOp(oper, type DEBUGARG(largeNode))
+ , gtSlotNum(slotNum)
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ , gtPutArgStkKind(PutArgStkKindInvalid)
+ , gtNumSlots(numSlots)
+ , gtIsStruct(isStruct)
+ , gtNumberReferenceSlots(0)
+ , gtGcPtrs(nullptr)
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+ {
+#ifdef DEBUG
+ gtCall = callNode;
+#endif
+ }
+
+ GenTreePutArgStk(genTreeOps oper,
+ var_types type,
+ GenTreePtr op1,
+ unsigned slotNum FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(unsigned numSlots)
+ FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(bool isStruct) DEBUGARG(GenTreePtr callNode = NULL)
+ DEBUGARG(bool largeNode = false))
+ : GenTreeUnOp(oper, type, op1 DEBUGARG(largeNode))
+ , gtSlotNum(slotNum)
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ , gtPutArgStkKind(PutArgStkKindInvalid)
+ , gtNumSlots(numSlots)
+ , gtIsStruct(isStruct)
+ , gtNumberReferenceSlots(0)
+ , gtGcPtrs(nullptr)
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+ {
+#ifdef DEBUG
+ gtCall = callNode;
+#endif
+ }
+#endif // FEATURE_FASTTAILCALL
+
+ unsigned getArgOffset()
+ {
+ return gtSlotNum * TARGET_POINTER_SIZE;
+ }
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ unsigned getArgSize()
+ {
+ return gtNumSlots * TARGET_POINTER_SIZE;
+ }
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ //------------------------------------------------------------------------
+ // setGcPointers: Sets the number of references and the layout of the struct object returned by the VM.
+ //
+ // Arguments:
+ // numPointers - Number of pointer references.
+ // pointers - layout of the struct (with pointers marked.)
+ //
+ // Return Value:
+ // None
+ //
+ // Notes:
+ // This data is used in the codegen for GT_PUTARG_STK to decide how to copy the struct to the stack by value.
+ // If no pointer references are used, block copying instructions are used.
+ // Otherwise the pointer reference slots are copied atomically in a way that gcinfo is emitted.
+ // Any non pointer references between the pointer reference slots are copied in block fashion.
+ //
+ void setGcPointers(unsigned numPointers, BYTE* pointers)
+ {
+ gtNumberReferenceSlots = numPointers;
+ gtGcPtrs = pointers;
+ }
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+#ifdef DEBUG
+ GenTreePtr gtCall; // the call node to which this argument belongs
+#endif
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ // Instruction selection: during codegen time, what code sequence we will be using
+ // to encode this operation.
+
+ enum PutArgStkKind : __int8{
+ PutArgStkKindInvalid, PutArgStkKindRepInstr, PutArgStkKindUnroll,
+ };
+
+ PutArgStkKind gtPutArgStkKind;
+
+ unsigned gtNumSlots; // Number of slots for the argument to be passed on stack
+ bool gtIsStruct; // This stack arg is a struct.
+ unsigned gtNumberReferenceSlots; // Number of reference slots.
+ BYTE* gtGcPtrs; // gcPointers
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+#if DEBUGGABLE_GENTREE
+ GenTreePutArgStk() : GenTreeUnOp()
+ {
+ }
+#endif
+};
+
+// Represents GT_COPY or GT_RELOAD node
+struct GenTreeCopyOrReload : public GenTreeUnOp
+{
+#if FEATURE_MULTIREG_RET
+ // State required to support copy/reload of a multi-reg call node.
+ // First register is is always given by gtRegNum.
+ //
+ regNumber gtOtherRegs[MAX_RET_REG_COUNT - 1];
+#endif
+
+ //----------------------------------------------------------
+ // ClearOtherRegs: set gtOtherRegs to REG_NA.
+ //
+ // Arguments:
+ // None
+ //
+ // Return Value:
+ // None
+ //
+ void ClearOtherRegs()
+ {
+#if FEATURE_MULTIREG_RET
+ for (unsigned i = 0; i < MAX_RET_REG_COUNT - 1; ++i)
+ {
+ gtOtherRegs[i] = REG_NA;
+ }
+#endif
+ }
+
+ //-----------------------------------------------------------
+ // GetRegNumByIdx: Get regNumber of ith position.
+ //
+ // Arguments:
+ // idx - register position.
+ //
+ // Return Value:
+ // Returns regNumber assigned to ith position.
+ //
+ regNumber GetRegNumByIdx(unsigned idx) const
+ {
+ assert(idx < MAX_RET_REG_COUNT);
+
+ if (idx == 0)
+ {
+ return gtRegNum;
+ }
+
+#if FEATURE_MULTIREG_RET
+ return gtOtherRegs[idx - 1];
+#else
+ return REG_NA;
+#endif
+ }
+
+ //-----------------------------------------------------------
+ // SetRegNumByIdx: Set the regNumber for ith position.
+ //
+ // Arguments:
+ // reg - reg number
+ // idx - register position.
+ //
+ // Return Value:
+ // None.
+ //
+ void SetRegNumByIdx(regNumber reg, unsigned idx)
+ {
+ assert(idx < MAX_RET_REG_COUNT);
+
+ if (idx == 0)
+ {
+ gtRegNum = reg;
+ }
+#if FEATURE_MULTIREG_RET
+ else
+ {
+ gtOtherRegs[idx - 1] = reg;
+ assert(gtOtherRegs[idx - 1] == reg);
+ }
+#else
+ else
+ {
+ unreached();
+ }
+#endif
+ }
+
+ //----------------------------------------------------------------------------
+ // CopyOtherRegs: copy multi-reg state from the given copy/reload node to this
+ // node.
+ //
+ // Arguments:
+ // from - GenTree node from which to copy multi-reg state
+ //
+ // Return Value:
+ // None
+ //
+ // TODO-ARM: Implement this routine for Arm64 and Arm32
+ // TODO-X86: Implement this routine for x86
+ void CopyOtherRegs(GenTreeCopyOrReload* from)
+ {
+ assert(OperGet() == from->OperGet());
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ for (unsigned i = 0; i < MAX_RET_REG_COUNT - 1; ++i)
+ {
+ gtOtherRegs[i] = from->gtOtherRegs[i];
+ }
+#endif
+ }
+
+ GenTreeCopyOrReload(genTreeOps oper, var_types type, GenTree* op1) : GenTreeUnOp(oper, type, op1)
+ {
+ gtRegNum = REG_NA;
+ ClearOtherRegs();
+ }
+
+#if DEBUGGABLE_GENTREE
+ GenTreeCopyOrReload() : GenTreeUnOp()
+ {
+ }
+#endif
+};
+
+// Represents GT_ALLOCOBJ node
+
+struct GenTreeAllocObj final : public GenTreeUnOp
+{
+ unsigned int gtNewHelper; // Value returned by ICorJitInfo::getNewHelper
+ CORINFO_CLASS_HANDLE gtAllocObjClsHnd;
+
+ GenTreeAllocObj(var_types type, unsigned int helper, CORINFO_CLASS_HANDLE clsHnd, GenTreePtr op)
+ : GenTreeUnOp(GT_ALLOCOBJ, type, op DEBUGARG(/*largeNode*/ TRUE))
+ , // This node in most cases will be changed to a call node
+ gtNewHelper(helper)
+ , gtAllocObjClsHnd(clsHnd)
+ {
+ }
+#if DEBUGGABLE_GENTREE
+ GenTreeAllocObj() : GenTreeUnOp()
+ {
+ }
+#endif
+};
+
+//------------------------------------------------------------------------
+// Deferred inline functions of GenTree -- these need the subtypes above to
+// be defined already.
+//------------------------------------------------------------------------
+
+inline bool GenTree::OperIsBlkOp()
+{
+ return (((gtOper == GT_ASG) && varTypeIsStruct(gtOp.gtOp1))
+#ifndef LEGACY_BACKEND
+ || (OperIsBlk() && (AsBlk()->Data() != nullptr))
+#endif
+ );
+}
+
+inline bool GenTree::OperIsDynBlkOp()
+{
+ if (gtOper == GT_ASG)
+ {
+ return gtGetOp1()->OperGet() == GT_DYN_BLK;
+ }
+#ifndef LEGACY_BACKEND
+ else if (gtOper == GT_STORE_DYN_BLK)
+ {
+ return true;
+ }
+#endif
+ return false;
+}
+
+inline bool GenTree::OperIsCopyBlkOp()
+{
+ if (gtOper == GT_ASG)
+ {
+ return (varTypeIsStruct(gtGetOp1()) && ((gtFlags & GTF_BLK_INIT) == 0));
+ }
+#ifndef LEGACY_BACKEND
+ else if (OperIsStoreBlk())
+ {
+ return ((gtFlags & GTF_BLK_INIT) == 0);
+ }
+#endif
+ return false;
+}
+
+inline bool GenTree::OperIsInitBlkOp()
+{
+ if (gtOper == GT_ASG)
+ {
+ return (varTypeIsStruct(gtGetOp1()) && ((gtFlags & GTF_BLK_INIT) != 0));
+ }
+#ifndef LEGACY_BACKEND
+ else if (OperIsStoreBlk())
+ {
+ return ((gtFlags & GTF_BLK_INIT) != 0);
+ }
+#endif
+ return false;
+}
+
+//------------------------------------------------------------------------
+// IsFPZero: Checks whether this is a floating point constant with value 0.0
+//
+// Return Value:
+// Returns true iff the tree is an GT_CNS_DBL, with value of 0.0.
+
+inline bool GenTree::IsFPZero()
+{
+ if ((gtOper == GT_CNS_DBL) && (gtDblCon.gtDconVal == 0.0))
+ {
+ return true;
+ }
+ return false;
+}
+
+//------------------------------------------------------------------------
+// IsIntegralConst: Checks whether this is a constant node with the given value
+//
+// Arguments:
+// constVal - the value of interest
+//
+// Return Value:
+// Returns true iff the tree is an integral constant opcode, with
+// the given value.
+//
+// Notes:
+// Like gtIconVal, the argument is of ssize_t, so cannot check for
+// long constants in a target-independent way.
+
+inline bool GenTree::IsIntegralConst(ssize_t constVal)
+
+{
+ if ((gtOper == GT_CNS_INT) && (gtIntConCommon.IconValue() == constVal))
+ {
+ return true;
+ }
+
+ if ((gtOper == GT_CNS_LNG) && (gtIntConCommon.LngValue() == constVal))
+ {
+ return true;
+ }
+
+ return false;
+}
+
+inline bool GenTree::IsBoxedValue()
+{
+ assert(gtOper != GT_BOX || gtBox.BoxOp() != nullptr);
+ return (gtOper == GT_BOX) && (gtFlags & GTF_BOX_VALUE);
+}
+
+inline GenTreePtr GenTree::MoveNext()
+{
+ assert(IsList());
+ return gtOp.gtOp2;
+}
+
+#ifdef DEBUG
+//------------------------------------------------------------------------
+// IsListForMultiRegArg: Given an GenTree node that represents an argument
+// enforce (or don't enforce) the following invariant.
+//
+// For LEGACY_BACKEND or architectures that don't support MultiReg args
+// we don't allow a GT_LIST at all.
+//
+// Currently for AMD64 UNIX we allow a limited case where a GT_LIST is
+// allowed but every element must be a GT_LCL_FLD.
+//
+// For the future targets that allow for Multireg args (and this includes
+// the current ARM64 target) we allow a GT_LIST of arbitrary nodes, these
+// would typically start out as GT_LCL_VARs or GT_LCL_FLDS or GT_INDs,
+// but could be changed into constants or GT_COMMA trees by the later
+// optimization phases.
+//
+// Arguments:
+// instance method for a GenTree node
+//
+// Return values:
+// true: the GenTree node is accepted as a valid argument
+// false: the GenTree node is not accepted as a valid argumeny
+//
+inline bool GenTree::IsListForMultiRegArg()
+{
+ if (!IsList())
+ {
+ // We don't have a GT_LIST, so just return true.
+ return true;
+ }
+ else // We do have a GT_LIST
+ {
+#if defined(LEGACY_BACKEND) || !FEATURE_MULTIREG_ARGS
+
+ // Not allowed to have a GT_LIST for an argument
+ // unless we have a RyuJIT backend and FEATURE_MULTIREG_ARGS
+
+ return false;
+
+#else // we have RyuJIT backend and FEATURE_MULTIREG_ARGS
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ // For UNIX ABI we currently only allow a GT_LIST of GT_LCL_FLDs nodes
+ GenTree* gtListPtr = this;
+ while (gtListPtr != nullptr)
+ {
+ // ToDo: fix UNIX_AMD64 so that we do not generate this kind of a List
+ // Note the list as currently created is malformed, as the last entry is a nullptr
+ if (gtListPtr->Current() == nullptr)
+ break;
+
+ // Only a list of GT_LCL_FLDs is allowed
+ if (gtListPtr->Current()->OperGet() != GT_LCL_FLD)
+ {
+ return false;
+ }
+ gtListPtr = gtListPtr->MoveNext();
+ }
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+ // Note that for non-UNIX ABI the GT_LIST may contain any node
+ //
+ // We allow this GT_LIST as an argument
+ return true;
+
+#endif // RyuJIT backend and FEATURE_MULTIREG_ARGS
+ }
+}
+#endif // DEBUG
+
+inline GenTreePtr GenTree::Current()
+{
+ assert(IsList());
+ return gtOp.gtOp1;
+}
+
+inline GenTreePtr* GenTree::pCurrent()
+{
+ assert(IsList());
+ return &(gtOp.gtOp1);
+}
+
+inline GenTreePtr GenTree::gtGetOp1()
+{
+ return gtOp.gtOp1;
+}
+
+#ifdef DEBUG
+/* static */
+inline bool GenTree::RequiresNonNullOp2(genTreeOps oper)
+{
+ switch (oper)
+ {
+ case GT_ADD:
+ case GT_SUB:
+ case GT_MUL:
+ case GT_DIV:
+ case GT_MOD:
+ case GT_UDIV:
+ case GT_UMOD:
+ case GT_OR:
+ case GT_XOR:
+ case GT_AND:
+ case GT_LSH:
+ case GT_RSH:
+ case GT_RSZ:
+ case GT_ROL:
+ case GT_ROR:
+ case GT_INDEX:
+ case GT_ASG:
+ case GT_ASG_ADD:
+ case GT_ASG_SUB:
+ case GT_ASG_MUL:
+ case GT_ASG_DIV:
+ case GT_ASG_MOD:
+ case GT_ASG_UDIV:
+ case GT_ASG_UMOD:
+ case GT_ASG_OR:
+ case GT_ASG_XOR:
+ case GT_ASG_AND:
+ case GT_ASG_LSH:
+ case GT_ASG_RSH:
+ case GT_ASG_RSZ:
+ case GT_EQ:
+ case GT_NE:
+ case GT_LT:
+ case GT_LE:
+ case GT_GE:
+ case GT_GT:
+ case GT_COMMA:
+ case GT_QMARK:
+ case GT_COLON:
+ case GT_MKREFANY:
+ return true;
+ default:
+ return false;
+ }
+}
+#endif // DEBUG
+
+inline GenTreePtr GenTree::gtGetOp2()
+{
+ /* gtOp.gtOp2 is only valid for GTK_BINOP nodes. */
+
+ GenTreePtr op2 = OperIsBinary() ? gtOp.gtOp2 : nullptr;
+
+ // This documents the genTreeOps for which gtOp.gtOp2 cannot be nullptr.
+ // This helps prefix in its analyis of code which calls gtGetOp2()
+
+ assert((op2 != nullptr) || !RequiresNonNullOp2(gtOper));
+
+ return op2;
+}
+
+inline GenTreePtr GenTree::gtEffectiveVal(bool commaOnly)
+{
+ switch (gtOper)
+ {
+ case GT_COMMA:
+ return gtOp.gtOp2->gtEffectiveVal(commaOnly);
+
+ case GT_NOP:
+ if (!commaOnly && gtOp.gtOp1 != nullptr)
+ {
+ return gtOp.gtOp1->gtEffectiveVal();
+ }
+ break;
+
+ default:
+ break;
+ }
+
+ return this;
+}
+
+inline GenTree* GenTree::gtSkipReloadOrCopy()
+{
+ // There can be only one reload or copy (we can't have a reload/copy of a reload/copy)
+ if (gtOper == GT_RELOAD || gtOper == GT_COPY)
+ {
+ assert(gtGetOp1()->OperGet() != GT_RELOAD && gtGetOp1()->OperGet() != GT_COPY);
+ return gtGetOp1();
+ }
+ return this;
+}
+
+//-----------------------------------------------------------------------------------
+// IsMultiRegCall: whether a call node returning its value in more than one register
+//
+// Arguments:
+// None
+//
+// Return Value:
+// Returns true if this GenTree is a multi register returning call
+inline bool GenTree::IsMultiRegCall() const
+{
+ if (this->IsCall())
+ {
+ // We cannot use AsCall() as it is not declared const
+ const GenTreeCall* call = reinterpret_cast<const GenTreeCall*>(this);
+ return call->HasMultiRegRetVal();
+ }
+
+ return false;
+}
+
+//-------------------------------------------------------------------------
+// IsCopyOrReload: whether this is a GT_COPY or GT_RELOAD node.
+//
+// Arguments:
+// None
+//
+// Return Value:
+// Returns true if this GenTree is a copy or reload node.
+inline bool GenTree::IsCopyOrReload() const
+{
+ return (gtOper == GT_COPY || gtOper == GT_RELOAD);
+}
+
+//-----------------------------------------------------------------------------------
+// IsCopyOrReloadOfMultiRegCall: whether this is a GT_COPY or GT_RELOAD of a multi-reg
+// call node.
+//
+// Arguments:
+// None
+//
+// Return Value:
+// Returns true if this GenTree is a copy or reload of multi-reg call node.
+inline bool GenTree::IsCopyOrReloadOfMultiRegCall() const
+{
+ if (IsCopyOrReload())
+ {
+ GenTree* t = const_cast<GenTree*>(this);
+ return t->gtGetOp1()->IsMultiRegCall();
+ }
+
+ return false;
+}
+
+inline bool GenTree::IsCnsIntOrI() const
+{
+ return (gtOper == GT_CNS_INT);
+}
+
+inline bool GenTree::IsIntegralConst() const
+{
+#ifdef _TARGET_64BIT_
+ return IsCnsIntOrI();
+#else // !_TARGET_64BIT_
+ return ((gtOper == GT_CNS_INT) || (gtOper == GT_CNS_LNG));
+#endif // !_TARGET_64BIT_
+}
+
+inline bool GenTree::IsIntCnsFitsInI32()
+{
+#ifdef _TARGET_64BIT_
+ return IsCnsIntOrI() && ((int)gtIntConCommon.IconValue() == gtIntConCommon.IconValue());
+#else // !_TARGET_64BIT_
+ return IsCnsIntOrI();
+#endif // !_TARGET_64BIT_
+}
+
+inline bool GenTree::IsCnsFltOrDbl() const
+{
+ return OperGet() == GT_CNS_DBL;
+}
+
+inline bool GenTree::IsCnsNonZeroFltOrDbl()
+{
+ if (OperGet() == GT_CNS_DBL)
+ {
+ double constValue = gtDblCon.gtDconVal;
+ return *(__int64*)&constValue != 0;
+ }
+
+ return false;
+}
+
+inline bool GenTree::IsHelperCall()
+{
+ return OperGet() == GT_CALL && gtCall.gtCallType == CT_HELPER;
+}
+
+inline var_types GenTree::CastFromType()
+{
+ return this->gtCast.CastOp()->TypeGet();
+}
+inline var_types& GenTree::CastToType()
+{
+ return this->gtCast.gtCastType;
+}
+
+//-----------------------------------------------------------------------------------
+// HasGCPtr: determine whether this block op involves GC pointers
+//
+// Arguments:
+// None
+//
+// Return Value:
+// Returns true iff the object being copied contains one or more GC pointers.
+//
+// Notes:
+// Of the block nodes, only GT_OBJ and ST_STORE_OBJ are allowed to have GC pointers.
+//
+inline bool GenTreeBlk::HasGCPtr()
+{
+ if ((gtOper == GT_OBJ) || (gtOper == GT_STORE_OBJ))
+ {
+ return (AsObj()->gtGcPtrCount != 0);
+ }
+ return false;
+}
+
+inline bool GenTree::isContainedSpillTemp() const
+{
+#if !defined(LEGACY_BACKEND)
+ // If spilled and no reg at use, then it is treated as contained.
+ if (((gtFlags & GTF_SPILLED) != 0) && ((gtFlags & GTF_NOREG_AT_USE) != 0))
+ {
+ return true;
+ }
+#endif //! LEGACY_BACKEND
+
+ return false;
+}
+
+/*****************************************************************************/
+
+#ifndef _HOST_64BIT_
+#include <poppack.h>
+#endif
+
+/*****************************************************************************/
+
+#if SMALL_TREE_NODES
+
+// In debug, on some platforms (e.g., when LATE_DISASM is defined), GenTreeIntCon is bigger than GenTreeLclFld.
+const size_t TREE_NODE_SZ_SMALL = max(sizeof(GenTreeIntCon), sizeof(GenTreeLclFld));
+
+#endif // SMALL_TREE_NODES
+
+const size_t TREE_NODE_SZ_LARGE = sizeof(GenTreeCall);
+
+/*****************************************************************************
+ * Types returned by GenTree::lvaLclVarRefs()
+ */
+
+enum varRefKinds
+{
+ VR_INVARIANT = 0x00, // an invariant value
+ VR_NONE = 0x00,
+ VR_IND_REF = 0x01, // an object reference
+ VR_IND_SCL = 0x02, // a non-object reference
+ VR_GLB_VAR = 0x04, // a global (clsVar)
+};
+// Add a temp define to avoid merge conflict.
+#define VR_IND_PTR VR_IND_REF
+
+/*****************************************************************************/
+#endif // !GENTREE_H
+/*****************************************************************************/
diff --git a/src/jit/gschecks.cpp b/src/jit/gschecks.cpp
new file mode 100644
index 0000000000..43cbb892e9
--- /dev/null
+++ b/src/jit/gschecks.cpp
@@ -0,0 +1,583 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX GSChecks XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+/*****************************************************************************
+ * gsGSChecksInitCookie
+ * Grabs the cookie for detecting overflow of unsafe buffers.
+ */
+void Compiler::gsGSChecksInitCookie()
+{
+ var_types type = TYP_I_IMPL;
+
+ lvaGSSecurityCookie = lvaGrabTemp(false DEBUGARG("GSSecurityCookie"));
+
+ // Prevent cookie init/check from being optimized
+ lvaSetVarAddrExposed(lvaGSSecurityCookie);
+ lvaTable[lvaGSSecurityCookie].lvType = type;
+
+ info.compCompHnd->getGSCookie(&gsGlobalSecurityCookieVal, &gsGlobalSecurityCookieAddr);
+}
+
+const unsigned NO_SHADOW_COPY = UINT_MAX;
+
+/*****************************************************************************
+ * gsCopyShadowParams
+ * The current function has an unsafe buffer on the stack. Search for vulnerable
+ * parameters which could be used to modify a code address and take over the process
+ * in the case of a buffer overrun. Create a safe local copy for each vulnerable parameter,
+ * which will be allocated bellow the unsafe buffer. Change uses of the param to the
+ * shadow copy.
+ *
+ * A pointer under indirection is considered vulnerable. A malicious user could read from
+ * protected memory or write to it. If a parameter is assigned/computed into another variable,
+ * and is a pointer (i.e., under indirection), then we consider the variable to be part of the
+ * equivalence class with the parameter. All parameters in the equivalence class are shadowed.
+ */
+void Compiler::gsCopyShadowParams()
+{
+ if (info.compIsVarArgs)
+ {
+ return;
+ }
+
+ // Allocate array for shadow param info
+ gsShadowVarInfo = new (this, CMK_Unknown) ShadowParamVarInfo[lvaCount]();
+
+ // Find groups of variables assigned to each other, and also
+ // tracks variables which are dereferenced and marks them as ptrs.
+ // Look for assignments to *p, and ptrs passed to functions
+ if (gsFindVulnerableParams())
+ {
+ // Replace vulnerable params by shadow copies.
+ gsParamsToShadows();
+ }
+}
+
+// This struct tracks how a tree is being used
+
+struct MarkPtrsInfo
+{
+ Compiler* comp;
+ unsigned lvAssignDef; // Which local variable is the tree being assigned to?
+ bool isAssignSrc; // Is this the source value for an assignment?
+ bool isUnderIndir; // Is this a pointer value tree that is being dereferenced?
+ bool skipNextNode; // Skip a single node during the tree-walk
+
+#ifdef DEBUG
+ void Print()
+ {
+ printf(
+ "[MarkPtrsInfo] = {comp = %p, lvAssignDef = %d, isAssignSrc = %d, isUnderIndir = %d, skipNextNode = %d}\n",
+ comp, lvAssignDef, isAssignSrc, isUnderIndir, skipNextNode);
+ }
+#endif
+};
+
+/*****************************************************************************
+ * gsMarkPtrsAndAssignGroups
+ * Walk a tree looking for assignment groups, variables whose value is used
+ * in a *p store or use, and variable passed to calls. This info is then used
+ * to determine parameters which are vulnerable.
+ * This function carries a state to know if it is under an assign node, call node
+ * or indirection node. It starts a new tree walk for it's subtrees when the state
+ * changes.
+ */
+Compiler::fgWalkResult Compiler::gsMarkPtrsAndAssignGroups(GenTreePtr* pTree, fgWalkData* data)
+{
+ struct MarkPtrsInfo* pState = (MarkPtrsInfo*)data->pCallbackData;
+ struct MarkPtrsInfo newState = *pState;
+ Compiler* comp = data->compiler;
+ GenTreePtr tree = *pTree;
+ ShadowParamVarInfo* shadowVarInfo = pState->comp->gsShadowVarInfo;
+ assert(shadowVarInfo);
+ bool fIsBlk = false;
+ unsigned lclNum;
+
+ assert(!pState->isAssignSrc || pState->lvAssignDef != (unsigned)-1);
+
+ if (pState->skipNextNode)
+ {
+ pState->skipNextNode = false;
+ return WALK_CONTINUE;
+ }
+
+ switch (tree->OperGet())
+ {
+ // Indirections - look for *p uses and defs
+ case GT_IND:
+ case GT_OBJ:
+ case GT_ARR_ELEM:
+ case GT_ARR_INDEX:
+ case GT_ARR_OFFSET:
+ case GT_FIELD:
+
+ newState.isUnderIndir = true;
+ {
+ newState.skipNextNode = true; // Don't have to worry about which kind of node we're dealing with
+ comp->fgWalkTreePre(&tree, comp->gsMarkPtrsAndAssignGroups, (void *)&newState);
+ }
+
+ return WALK_SKIP_SUBTREES;
+
+ // local vars and param uses
+ case GT_LCL_VAR:
+ case GT_LCL_FLD:
+ lclNum = tree->gtLclVarCommon.gtLclNum;
+
+ if (pState->isUnderIndir)
+ {
+ // The variable is being dereferenced for a read or a write.
+ comp->lvaTable[lclNum].lvIsPtr = 1;
+ }
+
+ if (pState->isAssignSrc)
+ {
+ //
+ // Add lvAssignDef and lclNum to a common assign group
+ if (shadowVarInfo[pState->lvAssignDef].assignGroup)
+ {
+ if (shadowVarInfo[lclNum].assignGroup)
+ {
+ // OR both bit vector
+ shadowVarInfo[pState->lvAssignDef].assignGroup->bitVectOr(shadowVarInfo[lclNum].assignGroup);
+ }
+ else
+ {
+ shadowVarInfo[pState->lvAssignDef].assignGroup->bitVectSet(lclNum);
+ }
+
+ // Point both to the same bit vector
+ shadowVarInfo[lclNum].assignGroup = shadowVarInfo[pState->lvAssignDef].assignGroup;
+ }
+ else if (shadowVarInfo[lclNum].assignGroup)
+ {
+ shadowVarInfo[lclNum].assignGroup->bitVectSet(pState->lvAssignDef);
+
+ // Point both to the same bit vector
+ shadowVarInfo[pState->lvAssignDef].assignGroup = shadowVarInfo[lclNum].assignGroup;
+ }
+ else
+ {
+ FixedBitVect* bv = FixedBitVect::bitVectInit(pState->comp->lvaCount, pState->comp);
+
+ // (shadowVarInfo[pState->lvAssignDef] == NULL && shadowVarInfo[lclNew] == NULL);
+ // Neither of them has an assign group yet. Make a new one.
+ shadowVarInfo[pState->lvAssignDef].assignGroup = bv;
+ shadowVarInfo[lclNum].assignGroup = bv;
+ bv->bitVectSet(pState->lvAssignDef);
+ bv->bitVectSet(lclNum);
+ }
+ }
+ return WALK_CONTINUE;
+
+ // Calls - Mark arg variables
+ case GT_CALL:
+
+ newState.isUnderIndir = false;
+ newState.isAssignSrc = false;
+ {
+ if (tree->gtCall.gtCallObjp)
+ {
+ newState.isUnderIndir = true;
+ comp->fgWalkTreePre(&tree->gtCall.gtCallObjp, gsMarkPtrsAndAssignGroups, (void*)&newState);
+ }
+
+ for (GenTreeArgList* args = tree->gtCall.gtCallArgs; args; args = args->Rest())
+ {
+ comp->fgWalkTreePre(&args->Current(), gsMarkPtrsAndAssignGroups, (void*)&newState);
+ }
+ for (GenTreeArgList* args = tree->gtCall.gtCallLateArgs; args; args = args->Rest())
+ {
+ comp->fgWalkTreePre(&args->Current(), gsMarkPtrsAndAssignGroups, (void*)&newState);
+ }
+
+ if (tree->gtCall.gtCallType == CT_INDIRECT)
+ {
+ newState.isUnderIndir = true;
+
+ // A function pointer is treated like a write-through pointer since
+ // it controls what code gets executed, and so indirectly can cause
+ // a write to memory.
+ comp->fgWalkTreePre(&tree->gtCall.gtCallAddr, gsMarkPtrsAndAssignGroups, (void*)&newState);
+ }
+ }
+ return WALK_SKIP_SUBTREES;
+
+ case GT_ADDR:
+ newState.isUnderIndir = false;
+ // We'll assume p in "**p = " can be vulnerable because by changing 'p', someone
+ // could control where **p stores to.
+ {
+ comp->fgWalkTreePre(&tree->gtOp.gtOp1, comp->gsMarkPtrsAndAssignGroups, (void*)&newState);
+ }
+ return WALK_SKIP_SUBTREES;
+
+ default:
+ // Assignments - track assign groups and *p defs.
+ if (tree->OperIsAssignment())
+ {
+ bool isLocVar;
+ bool isLocFld;
+
+ if (tree->OperIsBlkOp())
+ {
+ // Blk assignments are always handled as if they have implicit indirections.
+ // TODO-1stClassStructs: improve this.
+ newState.isUnderIndir = true;
+ comp->fgWalkTreePre(&tree->gtOp.gtOp1, comp->gsMarkPtrsAndAssignGroups, (void*)&newState);
+
+ if (tree->OperIsInitBlkOp())
+ {
+ newState.isUnderIndir = false;
+ }
+ comp->fgWalkTreePre(&tree->gtOp.gtOp2, comp->gsMarkPtrsAndAssignGroups, (void*)&newState);
+ }
+ else
+ {
+ // Walk dst side
+ comp->fgWalkTreePre(&tree->gtOp.gtOp1, comp->gsMarkPtrsAndAssignGroups, (void*)&newState);
+
+ // Now handle src side
+ isLocVar = tree->gtOp.gtOp1->OperGet() == GT_LCL_VAR;
+ isLocFld = tree->gtOp.gtOp1->OperGet() == GT_LCL_FLD;
+
+ if ((isLocVar || isLocFld) && tree->gtOp.gtOp2)
+ {
+ lclNum = tree->gtOp.gtOp1->gtLclVarCommon.gtLclNum;
+ newState.lvAssignDef = lclNum;
+ newState.isAssignSrc = true;
+ }
+
+ comp->fgWalkTreePre(&tree->gtOp.gtOp2, comp->gsMarkPtrsAndAssignGroups, (void*)&newState);
+ }
+
+ return WALK_SKIP_SUBTREES;
+ }
+ }
+
+ return WALK_CONTINUE;
+}
+
+/*****************************************************************************
+ * gsFindVulnerableParams
+ * Walk all the trees looking for ptrs, args, assign groups, *p stores, etc.
+ * Then use that info to figure out vulnerable pointers.
+ *
+ * It returns true if it found atleast one vulnerable pointer parameter that
+ * needs to be shadow-copied.
+ */
+
+bool Compiler::gsFindVulnerableParams()
+{
+ MarkPtrsInfo info;
+
+ info.comp = this;
+ info.lvAssignDef = (unsigned)-1;
+ info.isUnderIndir = false;
+ info.isAssignSrc = false;
+ info.skipNextNode = false;
+
+ // Walk all the trees setting lvIsWritePtr, lvIsOutgoingArg, lvIsPtr and assignGroup.
+ fgWalkAllTreesPre(gsMarkPtrsAndAssignGroups, &info);
+
+ // Compute has vulnerable at the end of the loop.
+ bool hasOneVulnerable = false;
+
+ // Initialize propagated[v0...vn] = {0}^n, so we can skip the ones propagated through
+ // some assign group.
+ FixedBitVect* propagated = (lvaCount > 0) ? FixedBitVect::bitVectInit(lvaCount, this) : nullptr;
+
+ for (UINT lclNum = 0; lclNum < lvaCount; lclNum++)
+ {
+ LclVarDsc* varDsc = &lvaTable[lclNum];
+ ShadowParamVarInfo* shadowInfo = &gsShadowVarInfo[lclNum];
+
+ // If there was an indirection or if unsafe buffer, then we'd call it vulnerable.
+ if (varDsc->lvIsPtr || varDsc->lvIsUnsafeBuffer)
+ {
+ hasOneVulnerable = true;
+ }
+
+ // Now, propagate the info through the assign group (an equivalence class of vars transitively assigned.)
+ if (shadowInfo->assignGroup == nullptr || propagated->bitVectTest(lclNum))
+ {
+ continue;
+ }
+
+ // Propagate lvIsPtr, so that:
+ // 1. Any parameter in the equivalence class can be identified as lvIsPtr and hence shadowed.
+ // 2. Buffers with pointers are placed at lower memory addresses than buffers without pointers.
+ UINT isUnderIndir = varDsc->lvIsPtr;
+
+ // First pass -- find if any variable is vulnerable.
+ FixedBitVect* assignGroup = shadowInfo->assignGroup;
+ for (UINT lclNum = assignGroup->bitVectGetFirst(); lclNum != (unsigned)-1 && !isUnderIndir;
+ lclNum = assignGroup->bitVectGetNext(lclNum))
+ {
+ isUnderIndir |= lvaTable[lclNum].lvIsPtr;
+ }
+
+ // Vulnerable, so propagate to all members of the equivalence class.
+ if (isUnderIndir)
+ {
+ hasOneVulnerable = true;
+ }
+ // Nothing to propagate.
+ else
+ {
+ continue;
+ }
+
+ // Second pass -- mark all are vulnerable.
+ assert(isUnderIndir);
+ for (UINT lclNum = assignGroup->bitVectGetFirst(); lclNum != (unsigned)-1;
+ lclNum = assignGroup->bitVectGetNext(lclNum))
+ {
+ lvaTable[lclNum].lvIsPtr = TRUE;
+ propagated->bitVectSet(lclNum);
+ }
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Equivalence assign group %s: ", isUnderIndir ? "isPtr " : "");
+ for (UINT lclNum = assignGroup->bitVectGetFirst(); lclNum != (unsigned)-1;
+ lclNum = assignGroup->bitVectGetNext(lclNum))
+ {
+ gtDispLclVar(lclNum, false);
+ printf(" ");
+ }
+ printf("\n");
+ }
+#endif
+ }
+
+ return hasOneVulnerable;
+}
+
+/*****************************************************************************
+ * gsParamsToShadows
+ * Copy each vulnerable param ptr or buffer to a local shadow copy and replace
+ * uses of the param by the shadow copy
+ */
+void Compiler::gsParamsToShadows()
+{
+ // Cache old count since we'll add new variables, and
+ // gsShadowVarInfo will not grow to accomodate the new ones.
+ UINT lvaOldCount = lvaCount;
+
+ // Create shadow copy for each param candidate
+ for (UINT lclNum = 0; lclNum < lvaOldCount; lclNum++)
+ {
+ LclVarDsc* varDsc = &lvaTable[lclNum];
+ gsShadowVarInfo[lclNum].shadowCopy = NO_SHADOW_COPY;
+
+ // Only care about params whose values are on the stack
+ if (!ShadowParamVarInfo::mayNeedShadowCopy(varDsc))
+ {
+ continue;
+ }
+
+ if (!varDsc->lvIsPtr && !varDsc->lvIsUnsafeBuffer)
+ {
+ continue;
+ }
+
+ int shadowVar = lvaGrabTemp(false DEBUGARG("shadowVar"));
+ // Copy some info
+
+ var_types type = varTypeIsSmall(varDsc->TypeGet()) ? TYP_INT : varDsc->TypeGet();
+ lvaTable[shadowVar].lvType = type;
+
+#ifdef FEATURE_SIMD
+ lvaTable[shadowVar].lvSIMDType = varDsc->lvSIMDType;
+ lvaTable[shadowVar].lvUsedInSIMDIntrinsic = varDsc->lvUsedInSIMDIntrinsic;
+ if (varDsc->lvSIMDType)
+ {
+ lvaTable[shadowVar].lvBaseType = varDsc->lvBaseType;
+ }
+#endif
+ lvaTable[shadowVar].lvRegStruct = varDsc->lvRegStruct;
+
+ lvaTable[shadowVar].lvAddrExposed = varDsc->lvAddrExposed;
+ lvaTable[shadowVar].lvDoNotEnregister = varDsc->lvDoNotEnregister;
+#ifdef DEBUG
+ lvaTable[shadowVar].lvVMNeedsStackAddr = varDsc->lvVMNeedsStackAddr;
+ lvaTable[shadowVar].lvLiveInOutOfHndlr = varDsc->lvLiveInOutOfHndlr;
+ lvaTable[shadowVar].lvLclFieldExpr = varDsc->lvLclFieldExpr;
+ lvaTable[shadowVar].lvLiveAcrossUCall = varDsc->lvLiveAcrossUCall;
+#endif
+ lvaTable[shadowVar].lvVerTypeInfo = varDsc->lvVerTypeInfo;
+ lvaTable[shadowVar].lvGcLayout = varDsc->lvGcLayout;
+ lvaTable[shadowVar].lvIsUnsafeBuffer = varDsc->lvIsUnsafeBuffer;
+ lvaTable[shadowVar].lvIsPtr = varDsc->lvIsPtr;
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Var V%02u is shadow param candidate. Shadow copy is V%02u.\n", lclNum, shadowVar);
+ }
+#endif
+
+ gsShadowVarInfo[lclNum].shadowCopy = shadowVar;
+ }
+
+ // Replace param uses with shadow copy
+ fgWalkAllTreesPre(gsReplaceShadowParams, (void*)this);
+
+ // Now insert code to copy the params to their shadow copy.
+ for (UINT lclNum = 0; lclNum < lvaOldCount; lclNum++)
+ {
+ LclVarDsc* varDsc = &lvaTable[lclNum];
+
+ unsigned shadowVar = gsShadowVarInfo[lclNum].shadowCopy;
+ if (shadowVar == NO_SHADOW_COPY)
+ {
+ continue;
+ }
+
+ var_types type = lvaTable[shadowVar].TypeGet();
+
+ GenTreePtr src = gtNewLclvNode(lclNum, varDsc->TypeGet());
+ GenTreePtr dst = gtNewLclvNode(shadowVar, type);
+
+ src->gtFlags |= GTF_DONT_CSE;
+ dst->gtFlags |= GTF_DONT_CSE;
+
+ GenTreePtr opAssign = nullptr;
+ if (type == TYP_STRUCT)
+ {
+ CORINFO_CLASS_HANDLE clsHnd = varDsc->lvVerTypeInfo.GetClassHandle();
+
+ // We don't need unsafe value cls check here since we are copying the params and this flag
+ // would have been set on the original param before reaching here.
+ lvaSetStruct(shadowVar, clsHnd, false);
+
+ src = gtNewOperNode(GT_ADDR, TYP_BYREF, src);
+ dst = gtNewOperNode(GT_ADDR, TYP_BYREF, dst);
+
+ opAssign = gtNewCpObjNode(dst, src, clsHnd, false);
+ lvaTable[shadowVar].lvIsMultiRegArg = lvaTable[lclNum].lvIsMultiRegArg;
+ lvaTable[shadowVar].lvIsMultiRegRet = lvaTable[lclNum].lvIsMultiRegRet;
+ }
+ else
+ {
+ opAssign = gtNewAssignNode(dst, src);
+ }
+ fgEnsureFirstBBisScratch();
+ (void)fgInsertStmtAtBeg(fgFirstBB, fgMorphTree(opAssign));
+ }
+
+ // If the method has "Jmp CalleeMethod", then we need to copy shadow params back to original
+ // params before "jmp" to CalleeMethod.
+ if (compJmpOpUsed)
+ {
+ // There could be more than one basic block ending with a "Jmp" type tail call.
+ // We would have to insert assignments in all such blocks, just before GT_JMP stmnt.
+ for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
+ {
+ if (block->bbJumpKind != BBJ_RETURN)
+ {
+ continue;
+ }
+
+ if ((block->bbFlags & BBF_HAS_JMP) == 0)
+ {
+ continue;
+ }
+
+ for (UINT lclNum = 0; lclNum < info.compArgsCount; lclNum++)
+ {
+ LclVarDsc* varDsc = &lvaTable[lclNum];
+
+ unsigned shadowVar = gsShadowVarInfo[lclNum].shadowCopy;
+ if (shadowVar == NO_SHADOW_COPY)
+ {
+ continue;
+ }
+
+ GenTreePtr src = gtNewLclvNode(shadowVar, lvaTable[shadowVar].TypeGet());
+ GenTreePtr dst = gtNewLclvNode(lclNum, varDsc->TypeGet());
+
+ src->gtFlags |= GTF_DONT_CSE;
+ dst->gtFlags |= GTF_DONT_CSE;
+
+ GenTreePtr opAssign = nullptr;
+ if (varDsc->TypeGet() == TYP_STRUCT)
+ {
+ CORINFO_CLASS_HANDLE clsHnd = varDsc->lvVerTypeInfo.GetClassHandle();
+ src = gtNewOperNode(GT_ADDR, TYP_BYREF, src);
+ dst = gtNewOperNode(GT_ADDR, TYP_BYREF, dst);
+
+ opAssign = gtNewCpObjNode(dst, src, clsHnd, false);
+ }
+ else
+ {
+ opAssign = gtNewAssignNode(dst, src);
+ }
+
+ (void)fgInsertStmtNearEnd(block, fgMorphTree(opAssign));
+ }
+ }
+ }
+}
+
+/*****************************************************************************
+ * gsReplaceShadowParams (tree-walk call-back)
+ * Replace all vulnerable param uses by it's shadow copy.
+ */
+
+Compiler::fgWalkResult Compiler::gsReplaceShadowParams(GenTreePtr* pTree, fgWalkData* data)
+{
+ Compiler* comp = data->compiler;
+ GenTreePtr tree = *pTree;
+ GenTreePtr asg = nullptr;
+
+ if (tree->gtOper == GT_ASG)
+ {
+ asg = tree; // "asg" is the assignment tree.
+ tree = tree->gtOp.gtOp1; // "tree" is the local var tree at the left-hand size of the assignment.
+ }
+
+ if (tree->gtOper == GT_LCL_VAR || tree->gtOper == GT_LCL_FLD)
+ {
+ UINT paramNum = tree->gtLclVarCommon.gtLclNum;
+
+ if (!ShadowParamVarInfo::mayNeedShadowCopy(&comp->lvaTable[paramNum]) ||
+ comp->gsShadowVarInfo[paramNum].shadowCopy == NO_SHADOW_COPY)
+ {
+ return WALK_CONTINUE;
+ }
+
+ tree->gtLclVarCommon.SetLclNum(comp->gsShadowVarInfo[paramNum].shadowCopy);
+
+ // In gsParamsToShadows(), we create a shadow var of TYP_INT for every small type param.
+ // Make sure we update the type of the local var tree as well.
+ if (varTypeIsSmall(comp->lvaTable[paramNum].TypeGet()))
+ {
+ tree->gtType = TYP_INT;
+ if (asg)
+ {
+ // If this is an assignment tree, propagate the type to it as well.
+ asg->gtType = TYP_INT;
+ }
+ }
+ }
+
+ return WALK_CONTINUE;
+}
diff --git a/src/jit/gtlist.h b/src/jit/gtlist.h
new file mode 100644
index 0000000000..a03bcfe4b0
--- /dev/null
+++ b/src/jit/gtlist.h
@@ -0,0 +1,255 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+// clang-format off
+/*****************************************************************************/
+#ifndef GTNODE
+#error Define GTNODE before including this file.
+#endif
+/*****************************************************************************/
+//
+// Node enum
+// , "Node name"
+// ,commutative
+// ,operKind
+
+GTNODE(NONE , "<none>" ,0,GTK_SPECIAL)
+
+//-----------------------------------------------------------------------------
+// Leaf nodes (i.e. these nodes have no sub-operands):
+//-----------------------------------------------------------------------------
+
+GTNODE(LCL_VAR , "lclVar" ,0,GTK_LEAF|GTK_LOCAL) // local variable
+GTNODE(LCL_FLD , "lclFld" ,0,GTK_LEAF|GTK_LOCAL) // field in a non-primitive variable
+GTNODE(LCL_VAR_ADDR , "&lclVar" ,0,GTK_LEAF) // address of local variable
+GTNODE(LCL_FLD_ADDR , "&lclFld" ,0,GTK_LEAF) // address of field in a non-primitive variable
+GTNODE(STORE_LCL_VAR , "st.lclVar" ,0,GTK_UNOP|GTK_LOCAL|GTK_NOVALUE) // store to local variable
+GTNODE(STORE_LCL_FLD , "st.lclFld" ,0,GTK_UNOP|GTK_LOCAL|GTK_NOVALUE) // store to field in a non-primitive variable
+GTNODE(CATCH_ARG , "catchArg" ,0,GTK_LEAF) // Exception object in a catch block
+GTNODE(LABEL , "codeLabel" ,0,GTK_LEAF) // Jump-target
+GTNODE(FTN_ADDR , "ftnAddr" ,0,GTK_LEAF) // Address of a function
+GTNODE(RET_EXPR , "retExpr" ,0,GTK_LEAF) // Place holder for the return expression from an inline candidate
+
+//-----------------------------------------------------------------------------
+// Constant nodes:
+//-----------------------------------------------------------------------------
+
+GTNODE(CNS_INT , "const" ,0,GTK_LEAF|GTK_CONST)
+GTNODE(CNS_LNG , "lconst" ,0,GTK_LEAF|GTK_CONST)
+GTNODE(CNS_DBL , "dconst" ,0,GTK_LEAF|GTK_CONST)
+GTNODE(CNS_STR , "sconst" ,0,GTK_LEAF|GTK_CONST)
+
+//-----------------------------------------------------------------------------
+// Unary operators (1 operand):
+//-----------------------------------------------------------------------------
+
+GTNODE(NOT , "~" ,0,GTK_UNOP)
+GTNODE(NOP , "nop" ,0,GTK_UNOP)
+GTNODE(NEG , "unary -" ,0,GTK_UNOP)
+GTNODE(COPY , "copy" ,0,GTK_UNOP) // Copies a variable from its current location to a register that satisfies
+ // code generation constraints. The child is the actual lclVar node.
+GTNODE(RELOAD , "reload" ,0,GTK_UNOP)
+GTNODE(CHS , "flipsign" ,0,GTK_BINOP|GTK_ASGOP|GTK_NOTLIR) // GT_CHS is actually unary -- op2 is ignored.
+ // Changing to unary presently causes problems, though -- take a little work to fix.
+
+GTNODE(ARR_LENGTH , "arrLen" ,0,GTK_UNOP|GTK_EXOP) // array-length
+
+GTNODE(INTRINSIC , "intrinsic" ,0,GTK_BINOP|GTK_EXOP) // intrinsics
+
+GTNODE(LOCKADD , "lockAdd" ,0,GTK_BINOP|GTK_NOVALUE)
+GTNODE(XADD , "XAdd" ,0,GTK_BINOP)
+GTNODE(XCHG , "Xchg" ,0,GTK_BINOP)
+GTNODE(CMPXCHG , "cmpxchg" ,0,GTK_SPECIAL)
+GTNODE(MEMORYBARRIER , "memoryBarrier" ,0,GTK_LEAF|GTK_NOVALUE)
+
+GTNODE(CAST , "cast" ,0,GTK_UNOP|GTK_EXOP) // conversion to another type
+GTNODE(CKFINITE , "ckfinite" ,0,GTK_UNOP) // Check for NaN
+GTNODE(LCLHEAP , "lclHeap" ,0,GTK_UNOP) // alloca()
+GTNODE(JMP , "jump" ,0,GTK_LEAF|GTK_NOVALUE) // Jump to another function
+
+
+GTNODE(ADDR , "addr" ,0,GTK_UNOP) // address of
+GTNODE(IND , "indir" ,0,GTK_UNOP) // load indirection
+GTNODE(STOREIND , "storeIndir" ,0,GTK_BINOP|GTK_NOVALUE) // store indirection
+
+ // TODO-Cleanup: GT_ARR_BOUNDS_CHECK should be made a GTK_BINOP now that it has only two child nodes
+GTNODE(ARR_BOUNDS_CHECK , "arrBndsChk" ,0,GTK_SPECIAL|GTK_NOVALUE) // array bounds check
+GTNODE(OBJ , "obj" ,0,GTK_UNOP|GTK_EXOP) // Object that MAY have gc pointers, and thus includes the relevant gc layout info.
+GTNODE(STORE_OBJ , "storeObj" ,0,GTK_BINOP|GTK_EXOP|GTK_NOVALUE) // Object that MAY have gc pointers, and thus includes the relevant gc layout info.
+GTNODE(BLK , "blk" ,0,GTK_UNOP) // Block/object with no gc pointers, and with a known size (e.g. a struct with no gc fields)
+GTNODE(STORE_BLK , "storeBlk" ,0,GTK_BINOP|GTK_NOVALUE) // Block/object with no gc pointers, and with a known size (e.g. a struct with no gc fields)
+GTNODE(DYN_BLK , "DynBlk" ,0,GTK_SPECIAL) // Dynamically sized block object
+GTNODE(STORE_DYN_BLK , "storeDynBlk" ,0,GTK_SPECIAL|GTK_NOVALUE) // Dynamically sized block object
+GTNODE(BOX , "box" ,0,GTK_UNOP|GTK_EXOP|GTK_NOTLIR)
+
+#ifdef FEATURE_SIMD
+GTNODE(SIMD_CHK , "simdChk" ,0,GTK_SPECIAL|GTK_NOVALUE) // Compare whether an index is less than the given SIMD vector length, and call CORINFO_HELP_RNGCHKFAIL if not.
+ // TODO-CQ: In future may want to add a field that specifies different exceptions but we'll
+ // need VM assistance for that.
+ // TODO-CQ: It would actually be very nice to make this an unconditional throw, and expose the control flow that
+ // does the compare, so that it can be more easily optimized. But that involves generating qmarks at import time...
+#endif // FEATURE_SIMD
+
+GTNODE(ALLOCOBJ , "allocObj" ,0,GTK_UNOP|GTK_EXOP) // object allocator
+
+//-----------------------------------------------------------------------------
+// Binary operators (2 operands):
+//-----------------------------------------------------------------------------
+
+GTNODE(ADD , "+" ,1,GTK_BINOP)
+GTNODE(SUB , "-" ,0,GTK_BINOP)
+GTNODE(MUL , "*" ,1,GTK_BINOP)
+GTNODE(DIV , "/" ,0,GTK_BINOP)
+GTNODE(MOD , "%" ,0,GTK_BINOP)
+
+GTNODE(UDIV , "un-/" ,0,GTK_BINOP)
+GTNODE(UMOD , "un-%" ,0,GTK_BINOP)
+
+GTNODE(OR , "|" ,1,GTK_BINOP|GTK_LOGOP)
+GTNODE(XOR , "^" ,1,GTK_BINOP|GTK_LOGOP)
+GTNODE(AND , "&" ,1,GTK_BINOP|GTK_LOGOP)
+
+GTNODE(LSH , "<<" ,0,GTK_BINOP)
+GTNODE(RSH , ">>" ,0,GTK_BINOP)
+GTNODE(RSZ , ">>>" ,0,GTK_BINOP)
+GTNODE(ROL , "rol" ,0,GTK_BINOP)
+GTNODE(ROR , "ror" ,0,GTK_BINOP)
+GTNODE(MULHI , "mulhi" ,1,GTK_BINOP) // returns high bits (top N bits of the 2N bit result of an NxN multiply)
+
+GTNODE(ASG , "=" ,0,GTK_BINOP|GTK_ASGOP|GTK_NOTLIR)
+GTNODE(ASG_ADD , "+=" ,0,GTK_BINOP|GTK_ASGOP|GTK_NOTLIR)
+GTNODE(ASG_SUB , "-=" ,0,GTK_BINOP|GTK_ASGOP|GTK_NOTLIR)
+GTNODE(ASG_MUL , "*=" ,0,GTK_BINOP|GTK_ASGOP|GTK_NOTLIR)
+GTNODE(ASG_DIV , "/=" ,0,GTK_BINOP|GTK_ASGOP|GTK_NOTLIR)
+GTNODE(ASG_MOD , "%=" ,0,GTK_BINOP|GTK_ASGOP|GTK_NOTLIR)
+
+GTNODE(ASG_UDIV , "/=" ,0,GTK_BINOP|GTK_ASGOP|GTK_NOTLIR)
+GTNODE(ASG_UMOD , "%=" ,0,GTK_BINOP|GTK_ASGOP|GTK_NOTLIR)
+
+GTNODE(ASG_OR , "|=" ,0,GTK_BINOP|GTK_ASGOP|GTK_NOTLIR)
+GTNODE(ASG_XOR , "^=" ,0,GTK_BINOP|GTK_ASGOP|GTK_NOTLIR)
+GTNODE(ASG_AND , "&=" ,0,GTK_BINOP|GTK_ASGOP|GTK_NOTLIR)
+GTNODE(ASG_LSH , "<<=" ,0,GTK_BINOP|GTK_ASGOP|GTK_NOTLIR)
+GTNODE(ASG_RSH , ">>=" ,0,GTK_BINOP|GTK_ASGOP|GTK_NOTLIR)
+GTNODE(ASG_RSZ , ">>>=" ,0,GTK_BINOP|GTK_ASGOP|GTK_NOTLIR)
+
+GTNODE(EQ , "==" ,0,GTK_BINOP|GTK_RELOP)
+GTNODE(NE , "!=" ,0,GTK_BINOP|GTK_RELOP)
+GTNODE(LT , "<" ,0,GTK_BINOP|GTK_RELOP)
+GTNODE(LE , "<=" ,0,GTK_BINOP|GTK_RELOP)
+GTNODE(GE , ">=" ,0,GTK_BINOP|GTK_RELOP)
+GTNODE(GT , ">" ,0,GTK_BINOP|GTK_RELOP)
+
+GTNODE(COMMA , "comma" ,0,GTK_BINOP|GTK_NOTLIR)
+
+GTNODE(QMARK , "qmark" ,0,GTK_BINOP|GTK_EXOP|GTK_NOTLIR)
+GTNODE(COLON , "colon" ,0,GTK_BINOP|GTK_NOTLIR)
+
+GTNODE(INDEX , "[]" ,0,GTK_BINOP|GTK_EXOP|GTK_NOTLIR) // SZ-array-element
+
+GTNODE(MKREFANY , "mkrefany" ,0,GTK_BINOP)
+
+GTNODE(LEA , "lea" ,0,GTK_BINOP|GTK_EXOP)
+
+#if !defined(LEGACY_BACKEND) && !defined(_TARGET_64BIT_)
+// A GT_LONG node simply represents the long value produced by the concatenation
+// of its two (lower and upper half) operands. Some GT_LONG nodes are transient,
+// during the decomposing of longs; others are handled by codegen as operands of
+// nodes such as calls, returns and stores of long lclVars.
+GTNODE(LONG , "gt_long" ,0,GTK_BINOP)
+
+// The following are nodes representing the upper half of a 64-bit operation
+// that requires a carry/borrow. However, they are all named GT_XXX_HI for
+// consistency.
+GTNODE(ADD_LO , "+Lo" ,1,GTK_BINOP)
+GTNODE(ADD_HI , "+Hi" ,1,GTK_BINOP)
+GTNODE(SUB_LO , "-Lo" ,0,GTK_BINOP)
+GTNODE(SUB_HI , "-Hi" ,0,GTK_BINOP)
+GTNODE(MUL_HI , "*Hi" ,1,GTK_BINOP)
+GTNODE(DIV_HI , "/Hi" ,0,GTK_BINOP)
+GTNODE(MOD_HI , "%Hi" ,0,GTK_BINOP)
+#endif // !defined(LEGACY_BACKEND) && !defined(_TARGET_64BIT_)
+
+#ifdef FEATURE_SIMD
+GTNODE(SIMD , "simd" ,0,GTK_BINOP|GTK_EXOP) // SIMD functions/operators/intrinsics
+#endif // FEATURE_SIMD
+
+//-----------------------------------------------------------------------------
+// Other nodes that look like unary/binary operators:
+//-----------------------------------------------------------------------------
+
+GTNODE(JTRUE , "jmpTrue" ,0,GTK_UNOP|GTK_NOVALUE)
+
+GTNODE(LIST , "<list>" ,0,GTK_BINOP)
+
+//-----------------------------------------------------------------------------
+// Other nodes that have special structure:
+//-----------------------------------------------------------------------------
+
+GTNODE(FIELD , "field" ,0,GTK_SPECIAL) // Member-field
+GTNODE(ARR_ELEM , "arrMD&" ,0,GTK_SPECIAL) // Multi-dimensional array-element address
+GTNODE(ARR_INDEX , "arrMDIdx" ,0,GTK_BINOP|GTK_EXOP) // Effective, bounds-checked index for one dimension of a multi-dimensional array element
+GTNODE(ARR_OFFSET , "arrMDOffs" ,0,GTK_SPECIAL) // Flattened offset of multi-dimensional array element
+GTNODE(CALL , "call()" ,0,GTK_SPECIAL)
+
+//-----------------------------------------------------------------------------
+// Statement operator nodes:
+//-----------------------------------------------------------------------------
+
+GTNODE(BEG_STMTS , "begStmts" ,0,GTK_SPECIAL|GTK_NOVALUE) // used only temporarily in importer by impBegin/EndTreeList()
+GTNODE(STMT , "stmtExpr" ,0,GTK_SPECIAL|GTK_NOVALUE) // top-level list nodes in bbTreeList
+
+GTNODE(RETURN , "return" ,0,GTK_UNOP|GTK_NOVALUE) // return from current function
+GTNODE(SWITCH , "switch" ,0,GTK_UNOP|GTK_NOVALUE) // switch
+
+GTNODE(NO_OP , "no_op" ,0,GTK_LEAF|GTK_NOVALUE) // nop!
+
+GTNODE(START_NONGC, "start_nongc",0,GTK_LEAF|GTK_NOVALUE) // starts a new instruction group that will be non-gc interruptible
+
+GTNODE(PROF_HOOK , "prof_hook" ,0,GTK_LEAF|GTK_NOVALUE) // profiler Enter/Leave/TailCall hook
+
+GTNODE(RETFILT , "retfilt", 0,GTK_UNOP|GTK_NOVALUE) // end filter with TYP_I_IMPL return value
+#if !FEATURE_EH_FUNCLETS
+GTNODE(END_LFIN , "endLFin" ,0,GTK_LEAF|GTK_NOVALUE) // end locally-invoked finally
+#endif // !FEATURE_EH_FUNCLETS
+
+//-----------------------------------------------------------------------------
+// Nodes used for optimizations.
+//-----------------------------------------------------------------------------
+
+GTNODE(PHI , "phi" ,0,GTK_UNOP) // phi node for ssa.
+GTNODE(PHI_ARG , "phiArg" ,0,GTK_LEAF|GTK_LOCAL) // phi(phiarg, phiarg, phiarg)
+
+//-----------------------------------------------------------------------------
+// Nodes used by Lower to generate a closer CPU representation of other nodes
+//-----------------------------------------------------------------------------
+
+GTNODE(JMPTABLE , "jumpTable" , 0, GTK_LEAF) // Generates the jump table for switches
+GTNODE(SWITCH_TABLE, "tableSwitch", 0, GTK_BINOP|GTK_NOVALUE) // Jump Table based switch construct
+
+//-----------------------------------------------------------------------------
+// Nodes used only within the code generator:
+//-----------------------------------------------------------------------------
+
+GTNODE(REG_VAR , "regVar" ,0,GTK_LEAF|GTK_LOCAL) // register variable
+GTNODE(CLS_VAR , "clsVar" ,0,GTK_LEAF) // static data member
+GTNODE(CLS_VAR_ADDR , "&clsVar" ,0,GTK_LEAF) // static data member address
+GTNODE(STORE_CLS_VAR, "st.clsVar" ,0,GTK_LEAF|GTK_NOVALUE) // store to static data member
+GTNODE(ARGPLACE , "argPlace" ,0,GTK_LEAF) // placeholder for a register arg
+GTNODE(NULLCHECK , "nullcheck" ,0,GTK_UNOP|GTK_NOVALUE) // null checks the source
+GTNODE(PHYSREG , "physregSrc" ,0,GTK_LEAF) // read from a physical register
+GTNODE(PHYSREGDST , "physregDst" ,0,GTK_UNOP|GTK_NOVALUE) // write to a physical register
+GTNODE(EMITNOP , "emitnop" ,0,GTK_LEAF|GTK_NOVALUE) // emitter-placed nop
+GTNODE(PINVOKE_PROLOG,"pinvoke_prolog",0,GTK_LEAF|GTK_NOVALUE) // pinvoke prolog seq
+GTNODE(PINVOKE_EPILOG,"pinvoke_epilog",0,GTK_LEAF|GTK_NOVALUE) // pinvoke epilog seq
+GTNODE(PUTARG_REG , "putarg_reg" ,0,GTK_UNOP) // operator that places outgoing arg in register
+GTNODE(PUTARG_STK , "putarg_stk" ,0,GTK_UNOP) // operator that places outgoing arg in stack
+GTNODE(RETURNTRAP , "returnTrap" ,0,GTK_UNOP|GTK_NOVALUE) // a conditional call to wait on gc
+GTNODE(SWAP , "swap" ,0,GTK_BINOP|GTK_NOVALUE) // op1 and op2 swap (registers)
+GTNODE(IL_OFFSET , "il_offset" ,0,GTK_LEAF|GTK_NOVALUE) // marks an IL offset for debugging purposes
+
+/*****************************************************************************/
+#undef GTNODE
+/*****************************************************************************/
+// clang-format on
diff --git a/src/jit/gtstructs.h b/src/jit/gtstructs.h
new file mode 100644
index 0000000000..895d3b6598
--- /dev/null
+++ b/src/jit/gtstructs.h
@@ -0,0 +1,112 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+// clang-format off
+
+/*****************************************************************************/
+
+#ifndef GTSTRUCT_0
+#error Define GTSTRUCT_0 before including this file.
+#endif
+
+#ifndef GTSTRUCT_1
+#error Define GTSTRUCT_1 before including this file.
+#endif
+
+#ifndef GTSTRUCT_2
+#error Define GTSTRUCT_2 before including this file.
+#endif
+
+#ifndef GTSTRUCT_3
+#error Define GTSTRUCT_3 before including this file.
+#endif
+
+#ifndef GTSTRUCT_4
+#error Define GTSTRUCT_4 before including this file.
+#endif
+
+#ifndef GTSTRUCT_N
+#error Define GTSTRUCT_N before including this file.
+#endif
+
+/*****************************************************************************/
+
+//
+// Field name , Allowed node enum(s)
+//
+
+GTSTRUCT_0(UnOp , GT_OP)
+GTSTRUCT_0(Op , GT_OP)
+#if !FEATURE_EH_FUNCLETS
+GTSTRUCT_2(Val , GT_END_LFIN, GT_JMP)
+#else
+GTSTRUCT_1(Val , GT_JMP)
+#endif
+#ifndef LEGACY_BACKEND
+GTSTRUCT_3(IntConCommon, GT_CNS_INT, GT_CNS_LNG, GT_JMPTABLE)
+GTSTRUCT_1(JumpTable , GT_JMPTABLE)
+#else // LEGACY_BACKEND
+GTSTRUCT_2(IntConCommon, GT_CNS_INT, GT_CNS_LNG)
+#endif// LEGACY_BACKEND
+GTSTRUCT_1(IntCon , GT_CNS_INT)
+GTSTRUCT_1(LngCon , GT_CNS_LNG)
+GTSTRUCT_1(DblCon , GT_CNS_DBL)
+GTSTRUCT_1(StrCon , GT_CNS_STR)
+GTSTRUCT_N(LclVarCommon, GT_LCL_VAR, GT_LCL_FLD, GT_REG_VAR, GT_PHI_ARG, GT_STORE_LCL_VAR, GT_STORE_LCL_FLD, GT_LCL_VAR_ADDR, GT_LCL_FLD_ADDR)
+GTSTRUCT_3(LclVar , GT_LCL_VAR, GT_LCL_VAR_ADDR, GT_STORE_LCL_VAR)
+#ifndef LEGACY_BACKEND
+GTSTRUCT_3(LclFld , GT_LCL_FLD, GT_STORE_LCL_FLD, GT_LCL_FLD_ADDR)
+#else // LEGACY_BACKEND
+GTSTRUCT_1(LclFld , GT_LCL_FLD)
+#endif // LEGACY_BACKEND
+GTSTRUCT_1(RegVar , GT_REG_VAR)
+GTSTRUCT_1(Cast , GT_CAST)
+GTSTRUCT_1(Box , GT_BOX)
+GTSTRUCT_1(Field , GT_FIELD)
+GTSTRUCT_1(Call , GT_CALL)
+GTSTRUCT_1(ArgList , GT_LIST)
+GTSTRUCT_1(Colon , GT_COLON)
+GTSTRUCT_1(FptrVal , GT_FTN_ADDR)
+GTSTRUCT_1(Intrinsic , GT_INTRINSIC)
+GTSTRUCT_1(Index , GT_INDEX)
+#ifdef FEATURE_SIMD
+GTSTRUCT_2(BoundsChk , GT_ARR_BOUNDS_CHECK, GT_SIMD_CHK)
+#else // !FEATURE_SIMD
+GTSTRUCT_1(BoundsChk , GT_ARR_BOUNDS_CHECK)
+#endif // !FEATURE_SIMD
+GTSTRUCT_1(ArrLen , GT_ARR_LENGTH)
+GTSTRUCT_1(ArrElem , GT_ARR_ELEM)
+GTSTRUCT_1(ArrOffs , GT_ARR_OFFSET)
+GTSTRUCT_1(ArrIndex , GT_ARR_INDEX)
+GTSTRUCT_1(RetExpr , GT_RET_EXPR)
+GTSTRUCT_2(Stmt , GT_STMT, GT_IL_OFFSET)
+GTSTRUCT_2(CopyOrReload, GT_COPY, GT_RELOAD)
+GTSTRUCT_2(ClsVar , GT_CLS_VAR, GT_CLS_VAR_ADDR)
+GTSTRUCT_1(ArgPlace , GT_ARGPLACE)
+GTSTRUCT_1(Label , GT_LABEL)
+GTSTRUCT_1(CmpXchg , GT_CMPXCHG)
+GTSTRUCT_1(AddrMode , GT_LEA)
+GTSTRUCT_N(Blk , GT_BLK, GT_STORE_BLK, GT_OBJ, GT_STORE_OBJ, GT_DYN_BLK, GT_STORE_DYN_BLK)
+GTSTRUCT_2(Obj , GT_OBJ, GT_STORE_OBJ)
+GTSTRUCT_2(DynBlk , GT_DYN_BLK, GT_STORE_DYN_BLK)
+GTSTRUCT_1(Qmark , GT_QMARK)
+GTSTRUCT_1(PhiArg , GT_PHI_ARG)
+GTSTRUCT_1(StoreInd , GT_STOREIND)
+GTSTRUCT_N(Indir , GT_STOREIND, GT_IND, GT_NULLCHECK, GT_BLK, GT_STORE_BLK, GT_OBJ, GT_STORE_OBJ, GT_DYN_BLK, GT_STORE_DYN_BLK)
+GTSTRUCT_1(PutArgStk , GT_PUTARG_STK)
+GTSTRUCT_1(PhysReg , GT_PHYSREG)
+#ifdef FEATURE_SIMD
+GTSTRUCT_1(SIMD , GT_SIMD)
+#endif // FEATURE_SIMD
+GTSTRUCT_1(AllocObj , GT_ALLOCOBJ)
+/*****************************************************************************/
+#undef GTSTRUCT_0
+#undef GTSTRUCT_1
+#undef GTSTRUCT_2
+#undef GTSTRUCT_3
+#undef GTSTRUCT_4
+#undef GTSTRUCT_N
+/*****************************************************************************/
+
+// clang-format on
diff --git a/src/jit/hashbv.cpp b/src/jit/hashbv.cpp
new file mode 100644
index 0000000000..fa06ec7b1e
--- /dev/null
+++ b/src/jit/hashbv.cpp
@@ -0,0 +1,2028 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+// --------------------------------------------------------------------
+// --------------------------------------------------------------------
+
+#ifdef DEBUG
+void hashBvNode::dump()
+{
+ printf("base: %d { ", baseIndex);
+ this->foreachBit(pBit);
+ printf("}\n");
+}
+#endif // DEBUG
+
+void hashBvNode::Reconstruct(indexType base)
+{
+ baseIndex = base;
+
+ assert(!(baseIndex % BITS_PER_NODE));
+
+ for (int i = 0; i < this->numElements(); i++)
+ {
+ elements[i] = 0;
+ }
+ next = nullptr;
+}
+
+hashBvNode::hashBvNode(indexType base)
+{
+ this->Reconstruct(base);
+}
+
+hashBvNode* hashBvNode::Create(indexType base, Compiler* compiler)
+{
+ hashBvNode* result = nullptr;
+
+ if (compiler->hbvGlobalData.hbvNodeFreeList)
+ {
+ result = compiler->hbvGlobalData.hbvNodeFreeList;
+ compiler->hbvGlobalData.hbvNodeFreeList = result->next;
+ }
+ else
+ {
+ result = new (compiler, CMK_hashBv) hashBvNode;
+ }
+ result->Reconstruct(base);
+ return result;
+}
+
+void hashBvNode::freeNode(hashBvGlobalData* glob)
+{
+ this->next = glob->hbvNodeFreeList;
+ glob->hbvNodeFreeList = this;
+}
+
+void hashBvNode::setBit(indexType base)
+{
+ assert(base >= baseIndex);
+ assert(base - baseIndex < BITS_PER_NODE);
+
+ base -= baseIndex;
+ indexType elem = base / BITS_PER_ELEMENT;
+ indexType posi = base % BITS_PER_ELEMENT;
+
+ elements[elem] |= indexType(1) << posi;
+}
+
+void hashBvNode::setLowest(indexType numToSet)
+{
+ assert(numToSet <= BITS_PER_NODE);
+
+ int elemIndex = 0;
+ while (numToSet > BITS_PER_ELEMENT)
+ {
+ elements[elemIndex] = ~(elemType(0));
+ numToSet -= BITS_PER_ELEMENT;
+ elemIndex++;
+ }
+ if (numToSet)
+ {
+ elemType allOnes = ~(elemType(0));
+ int numToShift = (int)(BITS_PER_ELEMENT - numToSet);
+ elements[elemIndex] = allOnes >> numToShift;
+ }
+}
+
+void hashBvNode::clrBit(indexType base)
+{
+ assert(base >= baseIndex);
+ assert(base - baseIndex < BITS_PER_NODE);
+
+ base -= baseIndex;
+ indexType elem = base / BITS_PER_ELEMENT;
+ indexType posi = base % BITS_PER_ELEMENT;
+
+ elements[elem] &= ~(indexType(1) << posi);
+}
+
+bool hashBvNode::belongsIn(indexType index)
+{
+ if (index < baseIndex)
+ {
+ return false;
+ }
+ if (index >= baseIndex + BITS_PER_NODE)
+ {
+ return false;
+ }
+ return true;
+}
+
+int countBitsInWord(unsigned int bits)
+{
+ // In-place adder tree: perform 16 1-bit adds, 8 2-bit adds,
+ // 4 4-bit adds, 2 8=bit adds, and 1 16-bit add.
+ bits = ((bits >> 1) & 0x55555555) + (bits & 0x55555555);
+ bits = ((bits >> 2) & 0x33333333) + (bits & 0x33333333);
+ bits = ((bits >> 4) & 0x0F0F0F0F) + (bits & 0x0F0F0F0F);
+ bits = ((bits >> 8) & 0x00FF00FF) + (bits & 0x00FF00FF);
+ bits = ((bits >> 16) & 0x0000FFFF) + (bits & 0x0000FFFF);
+ return (int)bits;
+}
+
+int countBitsInWord(unsigned __int64 bits)
+{
+ bits = ((bits >> 1) & 0x5555555555555555) + (bits & 0x5555555555555555);
+ bits = ((bits >> 2) & 0x3333333333333333) + (bits & 0x3333333333333333);
+ bits = ((bits >> 4) & 0x0F0F0F0F0F0F0F0F) + (bits & 0x0F0F0F0F0F0F0F0F);
+ bits = ((bits >> 8) & 0x00FF00FF00FF00FF) + (bits & 0x00FF00FF00FF00FF);
+ bits = ((bits >> 16) & 0x0000FFFF0000FFFF) + (bits & 0x0000FFFF0000FFFF);
+ bits = ((bits >> 32) & 0x00000000FFFFFFFF) + (bits & 0x00000000FFFFFFFF);
+ return (int)bits;
+}
+
+int hashBvNode::countBits()
+{
+ int result = 0;
+
+ for (int i = 0; i < this->numElements(); i++)
+ {
+ elemType bits = elements[i];
+
+ result += countBitsInWord(bits);
+
+ result += (int)bits;
+ }
+ return result;
+}
+
+bool hashBvNode::anyBits()
+{
+ for (int i = 0; i < this->numElements(); i++)
+ {
+ if (elements[i])
+ {
+ return true;
+ }
+ }
+ return false;
+}
+
+bool hashBvNode::getBit(indexType base)
+{
+ assert(base >= baseIndex);
+ assert(base - baseIndex < BITS_PER_NODE);
+ base -= baseIndex;
+
+ indexType elem = base / BITS_PER_ELEMENT;
+ indexType posi = base % BITS_PER_ELEMENT;
+
+ if (elements[elem] & (indexType(1) << posi))
+ {
+ return true;
+ }
+ else
+ {
+ return false;
+ }
+}
+
+bool hashBvNode::anySet()
+{
+ for (int i = 0; i < this->numElements(); i++)
+ {
+ if (elements[i])
+ {
+ return true;
+ }
+ }
+ return false;
+}
+
+void hashBvNode::copyFrom(hashBvNode* other)
+{
+ this->baseIndex = other->baseIndex;
+ for (int i = 0; i < this->numElements(); i++)
+ {
+ this->elements[i] = other->elements[i];
+ }
+}
+
+void hashBvNode::foreachBit(bitAction a)
+{
+ indexType base;
+ for (int i = 0; i < this->numElements(); i++)
+ {
+ base = baseIndex + i * BITS_PER_ELEMENT;
+ elemType e = elements[i];
+ while (e)
+ {
+ if (e & 1)
+ {
+ a(base);
+ }
+ e >>= 1;
+ base++;
+ }
+ }
+}
+
+elemType hashBvNode::AndWithChange(hashBvNode* other)
+{
+ elemType result = 0;
+
+ for (int i = 0; i < this->numElements(); i++)
+ {
+ elemType src = this->elements[i];
+ elemType dst;
+
+ dst = src & other->elements[i];
+ result |= src ^ dst;
+ this->elements[i] = dst;
+ }
+ return result;
+}
+
+elemType hashBvNode::OrWithChange(hashBvNode* other)
+{
+ elemType result = 0;
+
+ for (int i = 0; i < this->numElements(); i++)
+ {
+ elemType src = this->elements[i];
+ elemType dst;
+
+ dst = src | other->elements[i];
+ result |= src ^ dst;
+ this->elements[i] = dst;
+ }
+ return result;
+}
+
+elemType hashBvNode::XorWithChange(hashBvNode* other)
+{
+ elemType result = 0;
+
+ for (int i = 0; i < this->numElements(); i++)
+ {
+ elemType src = this->elements[i];
+ elemType dst;
+
+ dst = src ^ other->elements[i];
+ result |= src ^ dst;
+ this->elements[i] = dst;
+ }
+ return result;
+}
+
+elemType hashBvNode::SubtractWithChange(hashBvNode* other)
+{
+ elemType result = 0;
+
+ for (int i = 0; i < this->numElements(); i++)
+ {
+ elemType src = this->elements[i];
+ elemType dst;
+
+ dst = src & ~other->elements[i];
+ result |= src ^ dst;
+ this->elements[i] = dst;
+ }
+ return result;
+}
+
+bool hashBvNode::Intersects(hashBvNode* other)
+{
+ for (int i = 0; i < this->numElements(); i++)
+ {
+ if ((this->elements[i] & other->elements[i]) != 0)
+ {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+void hashBvNode::AndWith(hashBvNode* other)
+{
+ for (int i = 0; i < this->numElements(); i++)
+ {
+ this->elements[i] &= other->elements[i];
+ }
+}
+
+void hashBvNode::OrWith(hashBvNode* other)
+{
+ for (int i = 0; i < this->numElements(); i++)
+ {
+ this->elements[i] |= other->elements[i];
+ }
+}
+
+void hashBvNode::XorWith(hashBvNode* other)
+{
+ for (int i = 0; i < this->numElements(); i++)
+ {
+ this->elements[i] ^= other->elements[i];
+ }
+}
+
+void hashBvNode::Subtract(hashBvNode* other)
+{
+ for (int i = 0; i < this->numElements(); i++)
+ {
+ this->elements[i] &= ~other->elements[i];
+ }
+}
+
+bool hashBvNode::sameAs(hashBvNode* other)
+{
+ if (this->baseIndex != other->baseIndex)
+ {
+ return false;
+ }
+
+ for (int i = 0; i < this->numElements(); i++)
+ {
+ if (this->elements[i] != other->elements[i])
+ {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+// --------------------------------------------------------------------
+// --------------------------------------------------------------------
+
+hashBv::hashBv(Compiler* comp)
+{
+ this->compiler = comp;
+ this->log2_hashSize = globalData()->hbvHashSizeLog2;
+
+ int hts = hashtable_size();
+ nodeArr = getNewVector(hts);
+
+ for (int i = 0; i < hts; i++)
+ {
+ nodeArr[i] = nullptr;
+ }
+ this->numNodes = 0;
+}
+
+hashBv* hashBv::Create(Compiler* compiler)
+{
+ hashBv* result;
+ hashBvGlobalData* gd = &compiler->hbvGlobalData;
+
+ if (hbvFreeList(gd))
+ {
+ result = hbvFreeList(gd);
+ hbvFreeList(gd) = result->next;
+ assert(result->nodeArr);
+ }
+ else
+ {
+ result = new (compiler, CMK_hashBv) hashBv(compiler);
+ memset(result, 0, sizeof(hashBv));
+ result->nodeArr = result->initialVector;
+ }
+
+ result->compiler = compiler;
+ result->log2_hashSize = 0;
+ result->numNodes = 0;
+
+ return result;
+}
+
+void hashBv::Init(Compiler* compiler)
+{
+ memset(&compiler->hbvGlobalData, 0, sizeof(hashBvGlobalData));
+}
+
+hashBvGlobalData* hashBv::globalData()
+{
+ return &compiler->hbvGlobalData;
+}
+
+hashBvNode** hashBv::getNewVector(int vectorLength)
+{
+ assert(vectorLength > 0);
+ assert(isPow2(vectorLength));
+
+ hashBvNode** newVector = new (compiler, CMK_hashBv) hashBvNode*[vectorLength]();
+ return newVector;
+}
+
+hashBvNode*& hashBv::nodeFreeList(hashBvGlobalData* data)
+{
+ return data->hbvNodeFreeList;
+}
+
+hashBv*& hashBv::hbvFreeList(hashBvGlobalData* data)
+{
+ return data->hbvFreeList;
+}
+
+void hashBv::freeVector(hashBvNode* vect, int vectorLength)
+{
+ // not enough space to do anything with it
+ if (vectorLength < 2)
+ {
+ return;
+ }
+
+ hbvFreeListNode* f = (hbvFreeListNode*)vect;
+ f->next = globalData()->hbvFreeVectorList;
+ globalData()->hbvFreeVectorList = f;
+ f->size = vectorLength;
+}
+
+void hashBv::hbvFree()
+{
+ Compiler* comp = this->compiler;
+
+ int hts = hashtable_size();
+ for (int i = 0; i < hts; i++)
+ {
+ while (nodeArr[i])
+ {
+ hashBvNode* curr = nodeArr[i];
+ nodeArr[i] = curr->next;
+ curr->freeNode(globalData());
+ }
+ }
+ // keep the vector attached because the whole thing is freelisted
+ // plus you don't even know if it's freeable
+
+ this->next = hbvFreeList(globalData());
+ hbvFreeList(globalData()) = this;
+}
+
+hashBv* hashBv::CreateFrom(hashBv* other, Compiler* comp)
+{
+ hashBv* result = hashBv::Create(comp);
+ result->copyFrom(other, comp);
+ return result;
+}
+
+void hashBv::MergeLists(hashBvNode** root1, hashBvNode** root2)
+{
+}
+
+bool hashBv::TooSmall()
+{
+ return this->numNodes > this->hashtable_size() * 4;
+}
+
+bool hashBv::TooBig()
+{
+ return this->hashtable_size() > this->numNodes * 4;
+}
+
+int hashBv::getNodeCount()
+{
+ int size = hashtable_size();
+ int result = 0;
+
+ for (int i = 0; i < size; i++)
+ {
+ hashBvNode* last = nodeArr[i];
+
+ while (last)
+ {
+ last = last->next;
+ result++;
+ }
+ }
+ return result;
+}
+
+bool hashBv::IsValid()
+{
+ int size = hashtable_size();
+ // is power of 2
+ assert(((size - 1) & size) == 0);
+
+ for (int i = 0; i < size; i++)
+ {
+ hashBvNode* last = nodeArr[i];
+ hashBvNode* curr;
+ int lastIndex = -1;
+
+ while (last)
+ {
+ // the node has been hashed correctly
+ assert((int)last->baseIndex > lastIndex);
+ lastIndex = (int)last->baseIndex;
+ assert(i == getHashForIndex(last->baseIndex, size));
+ curr = last->next;
+ // the order is monotonically increasing bases
+ if (curr)
+ {
+ assert(curr->baseIndex > last->baseIndex);
+ }
+ last = curr;
+ }
+ }
+ return true;
+}
+
+void hashBv::Resize()
+{
+ // resize to 'optimal' size
+
+ this->Resize(this->numNodes);
+}
+
+void hashBv::Resize(int newSize)
+{
+ assert(newSize > 0);
+ newSize = nearest_pow2(newSize);
+
+ int oldSize = hashtable_size();
+
+ if (newSize == oldSize)
+ {
+ return;
+ }
+
+ int oldSizeLog2 = log2_hashSize;
+ int log2_newSize = genLog2((unsigned)newSize);
+ int size;
+
+ hashBvNode** newNodes = this->getNewVector(newSize);
+
+ hashBvNode*** insertionPoints = (hashBvNode***)alloca(sizeof(hashBvNode*) * newSize);
+ memset(insertionPoints, 0, sizeof(hashBvNode*) * newSize);
+
+ for (int i = 0; i < newSize; i++)
+ {
+ insertionPoints[i] = &(newNodes[i]);
+ }
+
+ if (newSize > oldSize)
+ {
+ // for each src list, expand it into multiple dst lists
+ for (int i = 0; i < oldSize; i++)
+ {
+ hashBvNode* next = nodeArr[i];
+
+ while (next)
+ {
+ hashBvNode* curr = next;
+ next = curr->next;
+ int destination = getHashForIndex(curr->baseIndex, newSize);
+
+ // ...
+
+ // stick the current node on the end of the selected list
+ *(insertionPoints[destination]) = curr;
+ insertionPoints[destination] = &(curr->next);
+ curr->next = nullptr;
+ }
+ }
+ nodeArr = newNodes;
+ log2_hashSize = (unsigned short)log2_newSize;
+ }
+ else if (oldSize > newSize)
+ {
+ int shrinkFactor = oldSize / newSize;
+
+ // shrink multiple lists into one list
+ // more efficient ways to do this but...
+ // if the lists are long, you shouldn't be shrinking.
+ for (int i = 0; i < oldSize; i++)
+ {
+ hashBvNode* next = nodeArr[i];
+
+ if (next)
+ {
+ // all nodes in this list should have the same destination list
+ int destination = getHashForIndex(next->baseIndex, newSize);
+ hashBvNode** insertionPoint = &newNodes[destination];
+ do
+ {
+ hashBvNode* curr = next;
+ // figure out where to insert it
+ while (*insertionPoint && (*insertionPoint)->baseIndex < curr->baseIndex)
+ {
+ insertionPoint = &((*insertionPoint)->next);
+ }
+ next = curr->next;
+
+ hashBvNode* temp = *insertionPoint;
+ *insertionPoint = curr;
+ curr->next = temp;
+
+ } while (next);
+ }
+ }
+ nodeArr = newNodes;
+ log2_hashSize = (unsigned short)log2_newSize;
+ }
+ else
+ {
+ // same size
+ assert(oldSize == newSize);
+ }
+ assert(this->IsValid());
+}
+
+#ifdef DEBUG
+void hashBv::dump()
+{
+ bool first = true;
+ indexType index;
+
+ // uncomment to print internal implementation details
+ // DBEXEC(TRUE, printf("[%d(%d)(nodes:%d)]{ ", hashtable_size(), countBits(), this->numNodes));
+
+ printf("{");
+ FOREACH_HBV_BIT_SET(index, this)
+ {
+ if (!first)
+ {
+ printf(" ");
+ }
+ printf("%d", index);
+ first = false;
+ }
+ NEXT_HBV_BIT_SET;
+ printf("}\n");
+}
+
+void hashBv::dumpFancy()
+{
+ indexType index;
+ indexType last_1 = -1;
+ indexType last_0 = -1;
+
+ printf("{");
+ printf("count:%d", this->countBits());
+ FOREACH_HBV_BIT_SET(index, this)
+ {
+ if (last_1 != index - 1)
+ {
+ if (last_0 + 1 != last_1)
+ {
+ printf(" %d-%d", last_0 + 1, last_1);
+ }
+ else
+ {
+ printf(" %d", last_1);
+ }
+ last_0 = index - 1;
+ }
+ last_1 = index;
+ }
+ NEXT_HBV_BIT_SET;
+
+ // Print the last one
+ if (last_0 + 1 != last_1)
+ {
+ printf(" %d-%d", last_0 + 1, last_1);
+ }
+ else
+ {
+ printf(" %d", last_1);
+ }
+
+ printf("}\n");
+}
+#endif // DEBUG
+
+void hashBv::removeNodeAtBase(indexType index)
+{
+ hashBvNode** insertionPoint = this->getInsertionPointForIndex(index);
+
+ hashBvNode* node = *insertionPoint;
+
+ // make sure that we were called to remove something
+ // that really was there
+ assert(node);
+
+ // splice it out
+ *insertionPoint = node->next;
+ this->numNodes--;
+}
+
+int hashBv::getHashForIndex(indexType index, int table_size)
+{
+ indexType hashIndex;
+
+ hashIndex = index >> LOG2_BITS_PER_NODE;
+ hashIndex &= (table_size - 1);
+
+ return (int)hashIndex;
+}
+
+int hashBv::getRehashForIndex(indexType thisIndex, int thisTableSize, int newTableSize)
+{
+ assert(0);
+ return 0;
+}
+
+hashBvNode** hashBv::getInsertionPointForIndex(indexType index)
+{
+ indexType indexInNode;
+ indexType hashIndex;
+ indexType baseIndex;
+
+ hashBvNode* result;
+
+ hashIndex = getHashForIndex(index, hashtable_size());
+
+ baseIndex = index & ~(BITS_PER_NODE - 1);
+ indexInNode = index & (BITS_PER_NODE - 1);
+
+ // printf("(%x) : hsh=%x, base=%x, index=%x\n", index,
+ // hashIndex, baseIndex, indexInNode);
+
+ // find the node
+ hashBvNode** prev = &nodeArr[hashIndex];
+ result = nodeArr[hashIndex];
+
+ while (result)
+ {
+ if (result->baseIndex == baseIndex)
+ {
+ return prev;
+ }
+ else if (result->baseIndex > baseIndex)
+ {
+ return prev;
+ }
+ else
+ {
+ prev = &(result->next);
+ result = result->next;
+ }
+ }
+ return prev;
+}
+
+hashBvNode* hashBv::getNodeForIndexHelper(indexType index, bool canAdd)
+{
+ // determine the base index of the node containing this index
+ index = index & ~(BITS_PER_NODE - 1);
+
+ hashBvNode** prev = getInsertionPointForIndex(index);
+
+ hashBvNode* node = *prev;
+
+ if (node && node->belongsIn(index))
+ {
+ return node;
+ }
+ else if (canAdd)
+ {
+ // missing node, insert it before the current one
+ hashBvNode* temp = hashBvNode::Create(index, this->compiler);
+ temp->next = node;
+ *prev = temp;
+ this->numNodes++;
+ return temp;
+ }
+ else
+ {
+ return nullptr;
+ }
+}
+
+hashBvNode* hashBv::getNodeForIndex(indexType index)
+{
+ // determine the base index of the node containing this index
+ index = index & ~(BITS_PER_NODE - 1);
+
+ hashBvNode** prev = getInsertionPointForIndex(index);
+
+ hashBvNode* node = *prev;
+
+ if (node && node->belongsIn(index))
+ {
+ return node;
+ }
+ else
+ {
+ return nullptr;
+ }
+}
+
+void hashBv::setBit(indexType index)
+{
+ assert(index >= 0);
+ assert(this->numNodes == this->getNodeCount());
+ hashBvNode* result = nullptr;
+
+ indexType baseIndex = index & ~(BITS_PER_NODE - 1);
+ indexType base = index - baseIndex;
+ indexType elem = base / BITS_PER_ELEMENT;
+ indexType posi = base % BITS_PER_ELEMENT;
+
+ // this should be the 99% case : when there is only one node in the structure
+ if ((result = nodeArr[0]) && result->baseIndex == baseIndex)
+ {
+ result->elements[elem] |= indexType(1) << posi;
+ return;
+ }
+
+ result = getOrAddNodeForIndex(index);
+ result->setBit(index);
+
+ assert(this->numNodes == this->getNodeCount());
+
+ // if it's getting out of control resize it
+ if (this->numNodes > this->hashtable_size() * 4)
+ {
+ this->Resize();
+ }
+
+ return;
+}
+
+void hashBv::setAll(indexType numToSet)
+{
+ // TODO-Throughput: this could be more efficient
+ for (unsigned int i = 0; i < numToSet; i += BITS_PER_NODE)
+ {
+ hashBvNode* node = getOrAddNodeForIndex(i);
+ indexType bits_to_set = min(BITS_PER_NODE, numToSet - i);
+ node->setLowest(bits_to_set);
+ }
+}
+
+void hashBv::clearBit(indexType index)
+{
+ assert(index >= 0);
+ assert(this->numNodes == this->getNodeCount());
+ hashBvNode* result = nullptr;
+
+ indexType baseIndex = index & ~(BITS_PER_NODE - 1);
+ indexType hashIndex = getHashForIndex(index, hashtable_size());
+
+ hashBvNode** prev = &nodeArr[hashIndex];
+ result = nodeArr[hashIndex];
+
+ while (result)
+ {
+ if (result->baseIndex == baseIndex)
+ {
+ result->clrBit(index);
+ // if nothing left set free it
+ if (!result->anySet())
+ {
+ *prev = result->next;
+ result->freeNode(globalData());
+ this->numNodes--;
+ }
+ return;
+ }
+ else if (result->baseIndex > baseIndex)
+ {
+ return;
+ }
+ else
+ {
+ prev = &(result->next);
+ result = result->next;
+ }
+ }
+ assert(this->numNodes == this->getNodeCount());
+ return;
+}
+
+bool hashBv::testBit(indexType index)
+{
+ // determine the base index of the node containing this index
+ indexType baseIndex = index & ~(BITS_PER_NODE - 1);
+ // 99% case
+ if (nodeArr[0] && nodeArr[0]->baseIndex == baseIndex)
+ {
+ return nodeArr[0]->getBit(index);
+ }
+
+ indexType hashIndex = getHashForIndex(baseIndex, hashtable_size());
+
+ hashBvNode* iter = nodeArr[hashIndex];
+
+ while (iter)
+ {
+ if (iter->baseIndex == baseIndex)
+ {
+ return iter->getBit(index);
+ }
+ else
+ {
+ iter = iter->next;
+ }
+ }
+ return false;
+}
+
+int hashBv::countBits()
+{
+ int result = 0;
+ int hts = this->hashtable_size();
+ for (int hashNum = 0; hashNum < hts; hashNum++)
+ {
+ hashBvNode* node = nodeArr[hashNum];
+ while (node)
+ {
+ result += node->countBits();
+ node = node->next;
+ }
+ }
+ return result;
+}
+
+bool hashBv::anySet()
+{
+ int result = 0;
+
+ int hts = this->hashtable_size();
+ for (int hashNum = 0; hashNum < hts; hashNum++)
+ {
+ hashBvNode* node = nodeArr[hashNum];
+ while (node)
+ {
+ if (node->anySet())
+ {
+ return true;
+ }
+ node = node->next;
+ }
+ }
+ return false;
+}
+
+class AndAction
+{
+public:
+ static inline void PreAction(hashBv* lhs, hashBv* rhs)
+ {
+ }
+ static inline void PostAction(hashBv* lhs, hashBv* rhs)
+ {
+ }
+ static inline bool DefaultResult()
+ {
+ return false;
+ }
+
+ static inline void LeftGap(hashBv* lhs, hashBvNode**& l, hashBvNode*& r, bool& result, bool& terminate)
+ {
+ // it's in other, not this
+ // so skip it
+ r = r->next;
+ }
+ static inline void RightGap(hashBv* lhs, hashBvNode**& l, hashBvNode*& r, bool& result, bool& terminate)
+ {
+ // it's in LHS, not RHS
+ // so have to remove it
+ hashBvNode* old = *l;
+ *l = (*l)->next;
+ // splice it out
+ old->freeNode(lhs->globalData());
+ lhs->numNodes--;
+ result = true;
+ }
+ static inline void BothPresent(hashBv* lhs, hashBvNode**& l, hashBvNode*& r, bool& result, bool& terminate)
+ {
+ if ((*l)->AndWithChange(r))
+ {
+ r = r->next;
+ result = true;
+
+ if ((*l)->anySet())
+ {
+ l = &((*l)->next);
+ }
+ else
+ {
+ hashBvNode* old = *l;
+ *l = (*l)->next;
+ old->freeNode(lhs->globalData());
+ lhs->numNodes--;
+ }
+ }
+ else
+ {
+ r = r->next;
+ l = &((*l)->next);
+ }
+ }
+ static inline void LeftEmpty(hashBv* lhs, hashBvNode**& l, hashBvNode*& r, bool& result, bool& terminate)
+ {
+ r = r->next;
+ }
+};
+
+class SubtractAction
+{
+public:
+ static inline void PreAction(hashBv* lhs, hashBv* rhs)
+ {
+ }
+ static inline void PostAction(hashBv* lhs, hashBv* rhs)
+ {
+ }
+ static inline bool DefaultResult()
+ {
+ return false;
+ }
+ static inline void LeftGap(hashBv* lhs, hashBvNode**& l, hashBvNode*& r, bool& result, bool& terminate)
+ {
+ // it's in other, not this
+ // so skip it
+ r = r->next;
+ }
+ static inline void RightGap(hashBv* lhs, hashBvNode**& l, hashBvNode*& r, bool& result, bool& terminate)
+ {
+ // in lhs, not rhs
+ // so skip lhs
+ l = &((*l)->next);
+ }
+ static inline void BothPresent(hashBv* lhs, hashBvNode**& l, hashBvNode*& r, bool& result, bool& terminate)
+ {
+ if ((*l)->SubtractWithChange(r))
+ {
+ r = r->next;
+ result = true;
+
+ if ((*l)->anySet())
+ {
+ l = &((*l)->next);
+ }
+ else
+ {
+ hashBvNode* old = *l;
+ *l = (*l)->next;
+ old->freeNode(lhs->globalData());
+ lhs->numNodes--;
+ }
+ }
+ else
+ {
+ r = r->next;
+ l = &((*l)->next);
+ }
+ }
+ static inline void LeftEmpty(hashBv* lhs, hashBvNode**& l, hashBvNode*& r, bool& result, bool& terminate)
+ {
+ r = r->next;
+ }
+};
+
+class XorAction
+{
+public:
+ static inline void PreAction(hashBv* lhs, hashBv* rhs)
+ {
+ }
+ static inline void PostAction(hashBv* lhs, hashBv* rhs)
+ {
+ }
+ static inline bool DefaultResult()
+ {
+ return false;
+ }
+
+ static inline void LeftGap(hashBv* lhs, hashBvNode**& l, hashBvNode*& r, bool& result, bool& terminate)
+ {
+ // it's in other, not this
+ // so put one in
+ result = true;
+ hashBvNode* temp = hashBvNode::Create(r->baseIndex, lhs->compiler);
+ lhs->numNodes++;
+ temp->XorWith(r);
+ temp->next = (*l)->next;
+ *l = temp;
+ l = &(temp->next);
+
+ r = r->next;
+ }
+
+ static inline void RightGap(hashBv* lhs, hashBvNode**& l, hashBvNode*& r, bool& result, bool& terminate)
+ {
+ // it's in LHS, not RHS
+ // so LHS remains the same
+ l = &((*l)->next);
+ }
+
+ static inline void BothPresent(hashBv* lhs, hashBvNode**& l, hashBvNode*& r, bool& result, bool& terminate)
+ {
+ if ((*l)->XorWithChange(r))
+ {
+ result = true;
+ }
+ l = &((*l)->next);
+ r = r->next;
+ }
+
+ static inline void LeftEmpty(hashBv* lhs, hashBvNode**& l, hashBvNode*& r, bool& result, bool& terminate)
+ {
+ // it's in other, not this
+ // so put one in
+ result = true;
+ hashBvNode* temp = hashBvNode::Create(r->baseIndex, lhs->compiler);
+ lhs->numNodes++;
+ temp->XorWith(r);
+ temp->next = nullptr;
+ *l = temp;
+ l = &(temp->next);
+
+ r = r->next;
+ }
+};
+
+class OrAction
+{
+public:
+ static inline void PreAction(hashBv* lhs, hashBv* rhs)
+ {
+ if (lhs->log2_hashSize + 2 < rhs->log2_hashSize)
+ {
+ lhs->Resize(rhs->numNodes);
+ }
+ if (rhs->numNodes > rhs->hashtable_size() * 4)
+ {
+ rhs->Resize(rhs->numNodes);
+ }
+ }
+ static inline void PostAction(hashBv* lhs, hashBv* rhs)
+ {
+ }
+ static inline bool DefaultResult()
+ {
+ return false;
+ }
+
+ static inline void LeftGap(hashBv* lhs, hashBvNode**& l, hashBvNode*& r, bool& result, bool& terminate)
+ {
+ // it's in other, not this
+ // so put one in
+ result = true;
+ hashBvNode* temp = hashBvNode::Create(r->baseIndex, lhs->compiler);
+ lhs->numNodes++;
+ temp->OrWith(r);
+ temp->next = *l;
+ *l = temp;
+ l = &(temp->next);
+
+ r = r->next;
+ }
+ static inline void RightGap(hashBv* lhs, hashBvNode**& l, hashBvNode*& r, bool& result, bool& terminate)
+ {
+ // in lhs, not rhs
+ // so skip lhs
+ l = &((*l)->next);
+ }
+ static inline void BothPresent(hashBv* lhs, hashBvNode**& l, hashBvNode*& r, bool& result, bool& terminate)
+ {
+ if ((*l)->OrWithChange(r))
+ {
+ result = true;
+ }
+ l = &((*l)->next);
+ r = r->next;
+ }
+ static inline void LeftEmpty(hashBv* lhs, hashBvNode**& l, hashBvNode*& r, bool& result, bool& terminate)
+ {
+ // other contains something this does not
+ // copy it
+ // LeftGap(lhs, l, r, result, terminate);
+ result = true;
+ hashBvNode* temp = hashBvNode::Create(r->baseIndex, lhs->compiler);
+ lhs->numNodes++;
+ temp->OrWith(r);
+ temp->next = nullptr;
+ *l = temp;
+ l = &(temp->next);
+
+ r = r->next;
+ }
+};
+
+class CompareAction
+{
+public:
+ static inline void PreAction(hashBv* lhs, hashBv* rhs)
+ {
+ }
+ static inline void PostAction(hashBv* lhs, hashBv* rhs)
+ {
+ }
+ static inline bool DefaultResult()
+ {
+ return true;
+ }
+
+ static inline void LeftGap(hashBv* lhs, hashBvNode**& l, hashBvNode*& r, bool& result, bool& terminate)
+ {
+ terminate = true;
+ result = false;
+ }
+ static inline void RightGap(hashBv* lhs, hashBvNode**& l, hashBvNode*& r, bool& result, bool& terminate)
+ {
+ // in lhs, not rhs
+ // so skip lhs
+ terminate = true;
+ result = false;
+ }
+ static inline void BothPresent(hashBv* lhs, hashBvNode**& l, hashBvNode*& r, bool& result, bool& terminate)
+ {
+ if (!(*l)->sameAs(r))
+ {
+ terminate = true;
+ result = false;
+ }
+ l = &((*l)->next);
+ r = r->next;
+ }
+ static inline void LeftEmpty(hashBv* lhs, hashBvNode**& l, hashBvNode*& r, bool& result, bool& terminate)
+ {
+ terminate = true;
+ result = false;
+ }
+};
+
+class IntersectsAction
+{
+public:
+ static inline void PreAction(hashBv* lhs, hashBv* rhs)
+ {
+ }
+ static inline void PostAction(hashBv* lhs, hashBv* rhs)
+ {
+ }
+ static inline bool DefaultResult()
+ {
+ return false;
+ }
+
+ static inline void LeftGap(hashBv* lhs, hashBvNode**& l, hashBvNode*& r, bool& result, bool& terminate)
+ {
+ // in rhs, not lhs
+ // so skip rhs
+ r = r->next;
+ }
+ static inline void RightGap(hashBv* lhs, hashBvNode**& l, hashBvNode*& r, bool& result, bool& terminate)
+ {
+ // in lhs, not rhs
+ // so skip lhs
+ l = &((*l)->next);
+ }
+ static inline void BothPresent(hashBv* lhs, hashBvNode**& l, hashBvNode*& r, bool& result, bool& terminate)
+ {
+ if ((*l)->Intersects(r))
+ {
+ terminate = true;
+ result = true;
+ }
+ }
+ static inline void LeftEmpty(hashBv* lhs, hashBvNode**& l, hashBvNode*& r, bool& result, bool& terminate)
+ {
+ r = r->next;
+ }
+};
+
+template <typename Action>
+bool hashBv::MultiTraverseLHSBigger(hashBv* other)
+{
+ int hts = this->hashtable_size();
+ int ots = other->hashtable_size();
+
+ bool result = Action::DefaultResult();
+ bool terminate = false;
+
+ // this is larger
+ hashBvNode*** cursors;
+ int shiftFactor = this->log2_hashSize - other->log2_hashSize;
+ int expansionFactor = hts / ots;
+ cursors = (hashBvNode***)alloca(expansionFactor * sizeof(void*));
+
+ for (int h = 0; h < other->hashtable_size(); h++)
+ {
+ // set up cursors for the expansion of nodes
+ for (int i = 0; i < expansionFactor; i++)
+ {
+ // ex: for [1024] &= [8]
+ // for rhs in bin 0
+ // cursors point to lhs: 0, 8, 16, 24, ...
+ cursors[i] = &nodeArr[ots * i + h];
+ }
+
+ hashBvNode* o = other->nodeArr[h];
+ while (o)
+ {
+ hashBvNode* next = o->next;
+ // figure out what dst list this goes to
+ int hash = getHashForIndex(o->baseIndex, hts);
+ int dstIndex = (hash - h) >> other->log2_hashSize;
+ hashBvNode** cursor = cursors[dstIndex];
+ hashBvNode* c = *cursor;
+
+ // figure out where o fits in the cursor
+
+ if (!c)
+ {
+ Action::LeftEmpty(this, cursors[dstIndex], o, result, terminate);
+ if (terminate)
+ {
+ return result;
+ }
+ }
+ else if (c->baseIndex == o->baseIndex)
+ {
+ Action::BothPresent(this, cursors[dstIndex], o, result, terminate);
+ if (terminate)
+ {
+ return result;
+ }
+ }
+ else if (c->baseIndex > o->baseIndex)
+ {
+ Action::LeftGap(this, cursors[dstIndex], o, result, terminate);
+ if (terminate)
+ {
+ return result;
+ }
+ }
+ else if (c->baseIndex < o->baseIndex)
+ {
+ Action::RightGap(this, cursors[dstIndex], o, result, terminate);
+ if (terminate)
+ {
+ return result;
+ }
+ }
+ }
+ for (int i = 0; i < expansionFactor; i++)
+ {
+ while (*(cursors[i]))
+ {
+ Action::RightGap(this, cursors[i], o, result, terminate);
+ if (terminate)
+ {
+ return result;
+ }
+ }
+ }
+ }
+ return result;
+}
+
+template <typename Action>
+bool hashBv::MultiTraverseRHSBigger(hashBv* other)
+{
+ int hts = this->hashtable_size();
+ int ots = other->hashtable_size();
+
+ bool result = Action::DefaultResult();
+ bool terminate = false;
+
+ for (int hashNum = 0; hashNum < ots; hashNum++)
+ {
+ int destination = getHashForIndex(BITS_PER_NODE * hashNum, this->hashtable_size());
+ assert(hashNum == getHashForIndex(BITS_PER_NODE * hashNum, other->hashtable_size()));
+
+ hashBvNode** pa = &this->nodeArr[destination];
+ hashBvNode** pb = &other->nodeArr[hashNum];
+ hashBvNode* b = *pb;
+
+ while (*pa && b)
+ {
+ hashBvNode* a = *pa;
+ if (a->baseIndex < b->baseIndex)
+ {
+ // in a but not in b
+ // but maybe it's someplace else in b
+ if (getHashForIndex(a->baseIndex, ots) == hashNum)
+ {
+ // this contains something other does not
+ // need to erase it
+ Action::RightGap(this, pa, b, result, terminate);
+ if (terminate)
+ {
+ return result;
+ }
+ }
+ else
+ {
+ // other might contain this, we don't know yet
+ pa = &a->next;
+ }
+ }
+ else if (a->baseIndex == b->baseIndex)
+ {
+ Action::BothPresent(this, pa, b, result, terminate);
+ if (terminate)
+ {
+ return result;
+ }
+ }
+ else if (a->baseIndex > b->baseIndex)
+ {
+ // other contains something this does not
+ Action::LeftGap(this, pa, b, result, terminate);
+ if (terminate)
+ {
+ return result;
+ }
+ }
+ }
+ while (*pa)
+ {
+ // if it's in the dest but not in src
+ // then make sure it's expected to be in this list
+ if (getHashForIndex((*pa)->baseIndex, ots) == hashNum)
+ {
+ Action::RightGap(this, pa, b, result, terminate);
+ if (terminate)
+ {
+ return result;
+ }
+ }
+ else
+ {
+ pa = &((*pa)->next);
+ }
+ }
+ while (b)
+ {
+ Action::LeftEmpty(this, pa, b, result, terminate);
+ if (terminate)
+ {
+ return result;
+ }
+ }
+ }
+ assert(this->numNodes == this->getNodeCount());
+ return result;
+}
+
+// LHSBigger and RHSBigger algorithms both work for equal
+// this is a specialized version of RHSBigger which is simpler (and faster)
+// because equal sizes are the 99% case
+template <typename Action>
+bool hashBv::MultiTraverseEqual(hashBv* other)
+{
+ int hts = this->hashtable_size();
+ assert(other->hashtable_size() == hts);
+
+ bool result = Action::DefaultResult();
+ bool terminate = false;
+
+ for (int hashNum = 0; hashNum < hts; hashNum++)
+ {
+ int destination = getHashForIndex(BITS_PER_NODE * hashNum, this->hashtable_size());
+
+ hashBvNode** pa = &this->nodeArr[hashNum];
+ hashBvNode** pb = &other->nodeArr[hashNum];
+ hashBvNode* b = *pb;
+
+ while (*pa && b)
+ {
+ hashBvNode* a = *pa;
+ if (a->baseIndex < b->baseIndex)
+ {
+ // in a but not in b
+ Action::RightGap(this, pa, b, result, terminate);
+ if (terminate)
+ {
+ return result;
+ }
+ }
+ else if (a->baseIndex == b->baseIndex)
+ {
+ Action::BothPresent(this, pa, b, result, terminate);
+ if (terminate)
+ {
+ return result;
+ }
+ }
+ else if (a->baseIndex > b->baseIndex)
+ {
+ // other contains something this does not
+ Action::LeftGap(this, pa, b, result, terminate);
+ if (terminate)
+ {
+ return result;
+ }
+ }
+ }
+ while (*pa)
+ {
+ // if it's in the dest but not in src
+ Action::RightGap(this, pa, b, result, terminate);
+ if (terminate)
+ {
+ return result;
+ }
+ }
+ while (b)
+ {
+ Action::LeftEmpty(this, pa, b, result, terminate);
+ if (terminate)
+ {
+ return result;
+ }
+ }
+ }
+ assert(this->numNodes == this->getNodeCount());
+ return result;
+}
+
+template <class Action>
+bool hashBv::MultiTraverse(hashBv* other)
+{
+ bool result = false;
+
+ assert(this->numNodes == this->getNodeCount());
+
+ Action::PreAction(this, other);
+
+ int hts = this->log2_hashSize;
+ int ots = other->log2_hashSize;
+
+ if (hts == ots)
+ {
+ return MultiTraverseEqual<Action>(other);
+ }
+ else if (hts > ots)
+ {
+ return MultiTraverseLHSBigger<Action>(other);
+ }
+ else
+ {
+ return MultiTraverseRHSBigger<Action>(other);
+ }
+}
+
+bool hashBv::Intersects(hashBv* other)
+{
+ return MultiTraverse<IntersectsAction>(other);
+}
+
+bool hashBv::AndWithChange(hashBv* other)
+{
+ return MultiTraverse<AndAction>(other);
+}
+
+// same as AND ~x
+bool hashBv::SubtractWithChange(hashBv* other)
+{
+ return MultiTraverse<SubtractAction>(other);
+}
+
+void hashBv::Subtract(hashBv* other)
+{
+ this->SubtractWithChange(other);
+}
+
+void hashBv::Subtract3(hashBv* o1, hashBv* o2)
+{
+ this->copyFrom(o1, compiler);
+ this->Subtract(o2);
+}
+
+void hashBv::UnionMinus(hashBv* src1, hashBv* src2, hashBv* src3)
+{
+ this->Subtract3(src1, src2);
+ this->OrWithChange(src3);
+}
+
+void hashBv::ZeroAll()
+{
+ int hts = this->hashtable_size();
+
+ for (int hashNum = 0; hashNum < hts; hashNum++)
+ {
+ while (nodeArr[hashNum])
+ {
+ hashBvNode* n = nodeArr[hashNum];
+ nodeArr[hashNum] = n->next;
+ n->freeNode(globalData());
+ }
+ }
+ this->numNodes = 0;
+}
+
+bool hashBv::OrWithChange(hashBv* other)
+{
+ return MultiTraverse<OrAction>(other);
+}
+
+bool hashBv::XorWithChange(hashBv* other)
+{
+ return MultiTraverse<XorAction>(other);
+}
+void hashBv::OrWith(hashBv* other)
+{
+ this->OrWithChange(other);
+}
+
+void hashBv::AndWith(hashBv* other)
+{
+ this->AndWithChange(other);
+}
+
+bool hashBv::CompareWith(hashBv* other)
+{
+ return MultiTraverse<CompareAction>(other);
+}
+
+void hashBv::copyFrom(hashBv* other, Compiler* comp)
+{
+ assert(this != other);
+
+ hashBvNode* freeList = nullptr;
+
+ this->ZeroAll();
+
+ if (this->log2_hashSize != other->log2_hashSize)
+ {
+ this->nodeArr = this->getNewVector(other->hashtable_size());
+ this->log2_hashSize = other->log2_hashSize;
+ assert(this->hashtable_size() == other->hashtable_size());
+ }
+
+ int hts = this->hashtable_size();
+ // printf("in copyfrom\n");
+ for (int h = 0; h < hts; h++)
+ {
+ // put the current list on the free list
+ freeList = this->nodeArr[h];
+ this->nodeArr[h] = nullptr;
+
+ hashBvNode** splicePoint = &(this->nodeArr[h]);
+ hashBvNode* otherNode = other->nodeArr[h];
+ hashBvNode* newNode = nullptr;
+
+ while (otherNode)
+ {
+ // printf("otherNode is True...\n");
+ hashBvNode* next = *splicePoint;
+
+ this->numNodes++;
+
+ if (freeList)
+ {
+ newNode = freeList;
+ freeList = freeList->next;
+ newNode->Reconstruct(otherNode->baseIndex);
+ }
+ else
+ {
+ newNode = hashBvNode::Create(otherNode->baseIndex, this->compiler);
+ }
+ newNode->copyFrom(otherNode);
+
+ newNode->next = *splicePoint;
+ *splicePoint = newNode;
+ splicePoint = &(newNode->next);
+
+ otherNode = otherNode->next;
+ }
+ }
+ while (freeList)
+ {
+ hashBvNode* next = freeList->next;
+ freeList->freeNode(globalData());
+ freeList = next;
+ }
+#if 0
+ for (int h=0; h<hashtable_size(); h++)
+ {
+ printf("%p %p\n", this->nodeArr[h], other->nodeArr[h]);
+ }
+#endif
+}
+
+int nodeSort(const void* x, const void* y)
+{
+ hashBvNode* a = (hashBvNode*)x;
+ hashBvNode* b = (hashBvNode*)y;
+ return (int)(b->baseIndex - a->baseIndex);
+}
+
+void hashBv::InorderTraverse(nodeAction n)
+{
+ int hts = hashtable_size();
+
+ hashBvNode** x = new (compiler, CMK_hashBv) hashBvNode*[hts];
+
+ {
+ // keep an array of the current pointers
+ // into each of the the bitvector lists
+ // in the hashtable
+ for (int i = 0; i < hts; i++)
+ {
+ x[i] = nodeArr[i];
+ }
+
+ while (1)
+ {
+ // pick the lowest node in the hashtable
+
+ indexType lowest = INT_MAX;
+ int lowest_index = -1;
+ for (int i = 0; i < hts; i++)
+ {
+ if (x[i] && x[i]->baseIndex < lowest)
+ {
+ lowest = x[i]->baseIndex;
+ lowest_index = i;
+ }
+ }
+ // if there was anything left, use it and update
+ // the list pointers otherwise we are done
+ if (lowest_index != -1)
+ {
+ n(x[lowest_index]);
+ x[lowest_index] = x[lowest_index]->next;
+ }
+ else
+ {
+ break;
+ }
+ }
+ }
+
+ delete[] x;
+}
+
+void hashBv::InorderTraverseTwo(hashBv* other, dualNodeAction a)
+{
+ int sizeThis, sizeOther;
+ hashBvNode **nodesThis, **nodesOther;
+
+ sizeThis = this->hashtable_size();
+ sizeOther = other->hashtable_size();
+
+ nodesThis = new (compiler, CMK_hashBv) hashBvNode*[sizeThis];
+ nodesOther = new (compiler, CMK_hashBv) hashBvNode*[sizeOther];
+
+ // populate the arrays
+ for (int i = 0; i < sizeThis; i++)
+ {
+ nodesThis[i] = this->nodeArr[i];
+ }
+
+ for (int i = 0; i < sizeOther; i++)
+ {
+ nodesOther[i] = other->nodeArr[i];
+ }
+
+ while (1)
+ {
+ indexType lowestThis = INT_MAX;
+ indexType lowestOther = INT_MAX;
+ int lowestHashIndexThis = -1;
+ int lowestHashIndexOther = -1;
+
+ // find the lowest remaining node in each BV
+ for (int i = 0; i < sizeThis; i++)
+ {
+ if (nodesThis[i] && nodesThis[i]->baseIndex < lowestThis)
+ {
+ lowestHashIndexThis = i;
+ lowestThis = nodesThis[i]->baseIndex;
+ }
+ }
+ for (int i = 0; i < sizeOther; i++)
+ {
+ if (nodesOther[i] && nodesOther[i]->baseIndex < lowestOther)
+ {
+ lowestHashIndexOther = i;
+ lowestOther = nodesOther[i]->baseIndex;
+ }
+ }
+ hashBvNode *nodeThis, *nodeOther;
+ nodeThis = lowestHashIndexThis == -1 ? nullptr : nodesThis[lowestHashIndexThis];
+ nodeOther = lowestHashIndexOther == -1 ? nullptr : nodesOther[lowestHashIndexOther];
+ // no nodes left in either, so return
+ if ((!nodeThis) && (!nodeOther))
+ {
+ break;
+
+ // there are only nodes left in one bitvector
+ }
+ else if ((!nodeThis) || (!nodeOther))
+ {
+ a(this, other, nodeThis, nodeOther);
+ if (nodeThis)
+ {
+ nodesThis[lowestHashIndexThis] = nodesThis[lowestHashIndexThis]->next;
+ }
+ if (nodeOther)
+ {
+ nodesOther[lowestHashIndexOther] = nodesOther[lowestHashIndexOther]->next;
+ }
+ }
+ // nodes are left in both so determine if the lowest ones
+ // match. if so process them in a pair. if not then
+ // process the lower of the two alone
+ else if (nodeThis && nodeOther)
+ {
+ if (nodeThis->baseIndex == nodeOther->baseIndex)
+ {
+ a(this, other, nodeThis, nodeOther);
+ nodesThis[lowestHashIndexThis] = nodesThis[lowestHashIndexThis]->next;
+ nodesOther[lowestHashIndexOther] = nodesOther[lowestHashIndexOther]->next;
+ }
+ else if (nodeThis->baseIndex < nodeOther->baseIndex)
+ {
+ a(this, other, nodeThis, nullptr);
+ nodesThis[lowestHashIndexThis] = nodesThis[lowestHashIndexThis]->next;
+ }
+ else if (nodeOther->baseIndex < nodeThis->baseIndex)
+ {
+ a(this, other, nullptr, nodeOther);
+ nodesOther[lowestHashIndexOther] = nodesOther[lowestHashIndexOther]->next;
+ }
+ }
+ }
+ delete[] nodesThis;
+ delete[] nodesOther;
+}
+
+// --------------------------------------------------------------------
+// --------------------------------------------------------------------
+
+#ifdef DEBUG
+void SimpleDumpNode(hashBvNode* n)
+{
+ printf("base: %d\n", n->baseIndex);
+}
+
+void DumpNode(hashBvNode* n)
+{
+ n->dump();
+}
+
+void SimpleDumpDualNode(hashBv* a, hashBv* b, hashBvNode* n, hashBvNode* m)
+{
+ printf("nodes: ");
+ if (n)
+ {
+ printf("%d,", n->baseIndex);
+ }
+ else
+ {
+ printf("----,");
+ }
+ if (m)
+ {
+ printf("%d\n", m->baseIndex);
+ }
+ else
+ {
+ printf("----\n");
+ }
+}
+#endif // DEBUG
+
+hashBvIterator::hashBvIterator()
+{
+ this->bv = nullptr;
+}
+
+hashBvIterator::hashBvIterator(hashBv* bv)
+{
+ this->bv = bv;
+ this->hashtable_index = 0;
+ this->current_element = 0;
+ this->current_base = 0;
+ this->current_data = 0;
+
+ if (bv)
+ {
+ this->hashtable_size = bv->hashtable_size();
+ this->currNode = bv->nodeArr[0];
+
+ if (!this->currNode)
+ {
+ this->nextNode();
+ }
+ }
+}
+
+void hashBvIterator::initFrom(hashBv* bv)
+{
+ this->bv = bv;
+ this->hashtable_size = bv->hashtable_size();
+ this->hashtable_index = 0;
+ this->currNode = bv->nodeArr[0];
+ this->current_element = 0;
+ this->current_base = 0;
+ this->current_data = 0;
+
+ if (!this->currNode)
+ {
+ this->nextNode();
+ }
+ if (this->currNode)
+ {
+ this->current_data = this->currNode->elements[0];
+ }
+}
+
+void hashBvIterator::nextNode()
+{
+ // if we have a valid node then just get the next one in the chain
+ if (this->currNode)
+ {
+ this->currNode = this->currNode->next;
+ }
+
+ // else step to the next one in the hash table
+ while (!this->currNode)
+ {
+ hashtable_index++;
+ // no more
+ if (hashtable_index >= hashtable_size)
+ {
+ // printf("nextnode bailed\n");
+ return;
+ }
+
+ this->currNode = bv->nodeArr[hashtable_index];
+ }
+ // first element in the new node
+ this->current_element = 0;
+ this->current_base = this->currNode->baseIndex;
+ this->current_data = this->currNode->elements[0];
+ // printf("nextnode returned base %d\n", this->current_base);
+ // printf("hti = %d ", hashtable_index);
+}
+
+indexType hashBvIterator::nextBit()
+{
+
+ // printf("in nextbit for bv:\n");
+ // this->bv->dump();
+
+ if (!this->currNode)
+ {
+ this->nextNode();
+ }
+
+top:
+
+ if (!this->currNode)
+ {
+ return NOMOREBITS;
+ }
+
+more_data:
+ if (!this->current_data)
+ {
+ current_element++;
+ // printf("current element is %d\n", current_element);
+ // reached the end of this node
+ if (current_element == (indexType) this->currNode->numElements())
+ {
+ // printf("going to next node\n");
+ this->nextNode();
+ goto top;
+ }
+ else
+ {
+ assert(current_element < (indexType) this->currNode->numElements());
+ // printf("getting more data\n");
+ current_data = this->currNode->elements[current_element];
+ current_base = this->currNode->baseIndex + current_element * BITS_PER_ELEMENT;
+ goto more_data;
+ }
+ }
+ else
+ {
+ while (current_data)
+ {
+ if (current_data & 1)
+ {
+ current_data >>= 1;
+ current_base++;
+
+ return current_base - 1;
+ }
+ else
+ {
+ current_data >>= 1;
+ current_base++;
+ }
+ }
+ goto more_data;
+ }
+}
+
+indexType HbvNext(hashBv* bv, Compiler* comp)
+{
+ if (bv)
+ {
+ bv->globalData()->hashBvNextIterator.initFrom(bv);
+ }
+ return bv->globalData()->hashBvNextIterator.nextBit();
+}
diff --git a/src/jit/hashbv.h b/src/jit/hashbv.h
new file mode 100644
index 0000000000..cadb182cc6
--- /dev/null
+++ b/src/jit/hashbv.h
@@ -0,0 +1,363 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+#ifndef HASHBV_H
+#define HASHBV_H
+
+#if defined(_M_AMD64) || defined(_M_X86)
+#include <xmmintrin.h>
+#endif
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <memory.h>
+#include <windows.h>
+
+//#define TESTING 1
+
+#define LOG2_BITS_PER_ELEMENT 5
+#define LOG2_ELEMENTS_PER_NODE 2
+#define LOG2_BITS_PER_NODE (LOG2_BITS_PER_ELEMENT + LOG2_ELEMENTS_PER_NODE)
+
+#define BITS_PER_ELEMENT (1 << LOG2_BITS_PER_ELEMENT)
+#define ELEMENTS_PER_NODE (1 << LOG2_ELEMENTS_PER_NODE)
+#define BITS_PER_NODE (1 << LOG2_BITS_PER_NODE)
+
+#ifdef _TARGET_AMD64_
+typedef unsigned __int64 elemType;
+typedef unsigned __int64 indexType;
+#else
+typedef unsigned int elemType;
+typedef unsigned int indexType;
+#endif
+
+class hashBvNode;
+class hashBv;
+class hashBvIterator;
+class hashBvGlobalData;
+
+typedef void bitAction(indexType);
+typedef void nodeAction(hashBvNode*);
+typedef void dualNodeAction(hashBv* left, hashBv* right, hashBvNode* a, hashBvNode* b);
+
+#define NOMOREBITS -1
+
+#ifdef DEBUG
+inline void pBit(indexType i)
+{
+ printf("%d ", i);
+}
+#endif // DEBUG
+
+// ------------------------------------------------------------
+// this is essentially a hashtable of small fixed bitvectors.
+// for any index, bits select position as follows:
+// 32 0
+// ------------------------------------------------------------
+// | ... ... ... | hash | element in node | index in element |
+// ------------------------------------------------------------
+//
+//
+// hashBv
+// | // hashtable
+// v
+// []->node->node->node
+// []->node
+// []
+// []->node->node
+//
+//
+
+#if TESTING
+inline int log2(int number)
+{
+ int result = 0;
+ number >>= 1;
+ while (number)
+ {
+ result++;
+ number >>= 1;
+ }
+ return result;
+}
+#endif
+
+// return greatest power of 2 that is less than or equal
+inline int nearest_pow2(unsigned number)
+{
+ int result = 0;
+
+ if (number > 0xffff)
+ {
+ number >>= 16;
+ result += 16;
+ }
+ if (number > 0xff)
+ {
+ number >>= 8;
+ result += 8;
+ }
+ if (number > 0xf)
+ {
+ number >>= 4;
+ result += 4;
+ }
+ if (number > 0x3)
+ {
+ number >>= 2;
+ result += 2;
+ }
+ if (number > 0x1)
+ {
+ number >>= 1;
+ result += 1;
+ }
+ return 1 << result;
+}
+
+class hashBvNode
+{
+public:
+ hashBvNode* next;
+ indexType baseIndex;
+ elemType elements[ELEMENTS_PER_NODE];
+
+public:
+ hashBvNode(indexType base);
+ hashBvNode()
+ {
+ }
+ static hashBvNode* Create(indexType base, Compiler* comp);
+ void Reconstruct(indexType base);
+ int numElements()
+ {
+ return ELEMENTS_PER_NODE;
+ }
+ void setBit(indexType base);
+ void setLowest(indexType numToSet);
+ bool getBit(indexType base);
+ void clrBit(indexType base);
+ bool anySet();
+ bool belongsIn(indexType index);
+ int countBits();
+ bool anyBits();
+ void foreachBit(bitAction x);
+ void freeNode(hashBvGlobalData* glob);
+ bool sameAs(hashBvNode* other);
+ void copyFrom(hashBvNode* other);
+
+ void AndWith(hashBvNode* other);
+ void OrWith(hashBvNode* other);
+ void XorWith(hashBvNode* other);
+ void Subtract(hashBvNode* other);
+
+ elemType AndWithChange(hashBvNode* other);
+ elemType OrWithChange(hashBvNode* other);
+ elemType XorWithChange(hashBvNode* other);
+ elemType SubtractWithChange(hashBvNode* other);
+
+ bool Intersects(hashBvNode* other);
+
+#ifdef DEBUG
+ void dump();
+#endif // DEBUG
+};
+
+class hashBv
+{
+public:
+ // --------------------------------------
+ // data
+ // --------------------------------------
+ hashBvNode** nodeArr;
+ hashBvNode* initialVector[1];
+
+ union {
+ Compiler* compiler;
+ // for freelist
+ hashBv* next;
+ };
+
+ unsigned short log2_hashSize;
+ // used for heuristic resizing... could be overflowed in rare circumstances
+ // but should not affect correctness
+ unsigned short numNodes;
+
+public:
+ hashBv(Compiler* comp);
+ hashBv(hashBv* other);
+ // hashBv() {}
+ static hashBv* Create(Compiler* comp);
+ static void Init(Compiler* comp);
+ static hashBv* CreateFrom(hashBv* other, Compiler* comp);
+ void hbvFree();
+#ifdef DEBUG
+ void dump();
+ void dumpFancy();
+#endif // DEBUG
+ __forceinline int hashtable_size()
+ {
+ return 1 << this->log2_hashSize;
+ }
+
+ hashBvGlobalData* globalData();
+
+ static hashBvNode*& nodeFreeList(hashBvGlobalData* globalData);
+ static hashBv*& hbvFreeList(hashBvGlobalData* data);
+
+ hashBvNode** getInsertionPointForIndex(indexType index);
+
+private:
+ hashBvNode* getNodeForIndexHelper(indexType index, bool canAdd);
+ int getHashForIndex(indexType index, int table_size);
+ int getRehashForIndex(indexType thisIndex, int thisTableSize, int newTableSize);
+
+ // maintain free lists for vectors
+ hashBvNode** getNewVector(int vectorLength);
+ void freeVector(hashBvNode* vect, int vectorLength);
+ int getNodeCount();
+
+ hashBvNode* getFreeList();
+
+public:
+ inline hashBvNode* getOrAddNodeForIndex(indexType index)
+ {
+ hashBvNode* temp = getNodeForIndexHelper(index, true);
+ return temp;
+ }
+ hashBvNode* getNodeForIndex(indexType index);
+ void removeNodeAtBase(indexType index);
+
+public:
+ void setBit(indexType index);
+ void setAll(indexType numToSet);
+ bool testBit(indexType index);
+ void clearBit(indexType index);
+ int countBits();
+ bool anySet();
+ void copyFrom(hashBv* other, Compiler* comp);
+ void ZeroAll();
+ bool CompareWith(hashBv* other);
+
+ void AndWith(hashBv* other);
+ void OrWith(hashBv* other);
+ void XorWith(hashBv* other);
+ void Subtract(hashBv* other);
+ void Subtract3(hashBv* other, hashBv* other2);
+
+ void UnionMinus(hashBv* a, hashBv* b, hashBv* c);
+
+ bool AndWithChange(hashBv* other);
+ bool OrWithChange(hashBv* other);
+ bool OrWithChangeRight(hashBv* other);
+ bool OrWithChangeLeft(hashBv* other);
+ bool XorWithChange(hashBv* other);
+ bool SubtractWithChange(hashBv* other);
+
+ bool Intersects(hashBv* other);
+
+ template <class Action>
+ bool MultiTraverseLHSBigger(hashBv* other);
+ template <class Action>
+ bool MultiTraverseRHSBigger(hashBv* other);
+ template <class Action>
+ bool MultiTraverseEqual(hashBv* other);
+ template <class Action>
+ bool MultiTraverse(hashBv* other);
+
+ void InorderTraverse(nodeAction a);
+ void InorderTraverseTwo(hashBv* other, dualNodeAction a);
+
+ void Resize(int newSize);
+ void Resize();
+ void MergeLists(hashBvNode** a, hashBvNode** b);
+
+ bool TooSmall();
+ bool TooBig();
+ bool IsValid();
+};
+
+// --------------------------------------------------------------------
+// --------------------------------------------------------------------
+
+class hbvFreeListNode
+{
+public:
+ hbvFreeListNode* next;
+ int size;
+};
+
+// --------------------------------------------------------------------
+// --------------------------------------------------------------------
+
+class hashBvIterator
+{
+public:
+ unsigned hashtable_size;
+ unsigned hashtable_index;
+ hashBv* bv;
+ hashBvNode* currNode;
+ indexType current_element;
+ // base index of current node
+ indexType current_base;
+ // working data of current element
+ elemType current_data;
+
+ hashBvIterator(hashBv* bv);
+ void initFrom(hashBv* bv);
+ hashBvIterator();
+ indexType nextBit();
+
+private:
+ void nextNode();
+};
+
+class hashBvGlobalData
+{
+ friend class hashBv;
+ friend class hashBvNode;
+
+ hashBvNode* hbvNodeFreeList;
+ hashBv* hbvFreeList;
+ unsigned short hbvHashSizeLog2;
+ hbvFreeListNode* hbvFreeVectorList;
+
+public:
+ hashBvIterator hashBvNextIterator;
+};
+
+indexType HbvNext(hashBv* bv, Compiler* comp);
+
+// clang-format off
+#define FOREACH_HBV_BIT_SET(index, bv) \
+ { \
+ for (int hashNum=0; hashNum<(bv)->hashtable_size(); hashNum++) {\
+ hashBvNode *node = (bv)->nodeArr[hashNum];\
+ while (node) { \
+ indexType base = node->baseIndex; \
+ for (int el=0; el<node->numElements(); el++) {\
+ elemType _i = 0; \
+ elemType _e = node->elements[el]; \
+ while (_e) { \
+ int _result = BitScanForwardPtr((DWORD *) &_i, _e); \
+ assert(_result); \
+ (index) = base + (el*BITS_PER_ELEMENT) + _i; \
+ _e ^= (elemType(1) << _i);
+
+#define NEXT_HBV_BIT_SET \
+ }\
+ }\
+ node = node->next; \
+ }\
+ }\
+ } \
+//clang-format on
+
+#ifdef DEBUG
+void SimpleDumpNode(hashBvNode *n);
+void DumpNode(hashBvNode *n);
+void SimpleDumpDualNode(hashBv *a, hashBv *b, hashBvNode *n, hashBvNode *m);
+#endif // DEBUG
+
+#endif
diff --git a/src/jit/host.h b/src/jit/host.h
new file mode 100644
index 0000000000..87e13d4180
--- /dev/null
+++ b/src/jit/host.h
@@ -0,0 +1,68 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*****************************************************************************/
+
+#ifdef DEBUG
+#ifndef printf
+#define printf logf
+#endif
+
+#ifndef fprintf
+#define fprintf flogf
+#endif
+
+class Compiler;
+class LogEnv
+{
+public:
+ LogEnv(ICorJitInfo* aCompHnd);
+ void setCompiler(Compiler* val)
+ {
+ const_cast<Compiler*&>(compiler) = val;
+ }
+
+ ICorJitInfo* const compHnd;
+ Compiler* const compiler;
+};
+
+BOOL vlogf(unsigned level, const char* fmt, va_list args);
+int vflogf(FILE* file, const char* fmt, va_list args);
+
+int logf(const char* fmt, ...);
+int flogf(FILE* file, const char* fmt, ...);
+void gcDump_logf(const char* fmt, ...);
+
+void logf(unsigned level, const char* fmt, ...);
+
+extern "C" void __cdecl assertAbort(const char* why, const char* file, unsigned line);
+
+#undef assert
+#define assert(p) (void)((p) || (assertAbort(#p, __FILE__, __LINE__), 0))
+
+#else // DEBUG
+
+#undef assert
+#define assert(p) (void)0
+#endif // DEBUG
+
+/*****************************************************************************/
+#ifndef _HOST_H_
+#define _HOST_H_
+/*****************************************************************************/
+
+const size_t OS_page_size = (4 * 1024);
+
+extern FILE* jitstdout;
+
+inline FILE* procstdout()
+{
+ return stdout;
+}
+#undef stdout
+#define stdout use_jitstdout
+
+/*****************************************************************************/
+#endif
+/*****************************************************************************/
diff --git a/src/jit/hostallocator.cpp b/src/jit/hostallocator.cpp
new file mode 100644
index 0000000000..b737424ee8
--- /dev/null
+++ b/src/jit/hostallocator.cpp
@@ -0,0 +1,40 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+#include "jitpch.h"
+#include "hostallocator.h"
+
+HostAllocator HostAllocator::s_hostAllocator;
+
+void* HostAllocator::Alloc(size_t size)
+{
+ assert(g_jitHost != nullptr);
+ return g_jitHost->allocateMemory(size, false);
+}
+
+void* HostAllocator::ArrayAlloc(size_t elemSize, size_t numElems)
+{
+ assert(g_jitHost != nullptr);
+
+ ClrSafeInt<size_t> safeElemSize(elemSize);
+ ClrSafeInt<size_t> safeNumElems(numElems);
+ ClrSafeInt<size_t> size = safeElemSize * safeNumElems;
+ if (size.IsOverflow())
+ {
+ return nullptr;
+ }
+
+ return g_jitHost->allocateMemory(size.Value(), false);
+}
+
+void HostAllocator::Free(void* p)
+{
+ assert(g_jitHost != nullptr);
+ g_jitHost->freeMemory(p, false);
+}
+
+HostAllocator* HostAllocator::getHostAllocator()
+{
+ return &s_hostAllocator;
+}
diff --git a/src/jit/hostallocator.h b/src/jit/hostallocator.h
new file mode 100644
index 0000000000..c48ed45b8c
--- /dev/null
+++ b/src/jit/hostallocator.h
@@ -0,0 +1,22 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+class HostAllocator : public IAllocator
+{
+private:
+ static HostAllocator s_hostAllocator;
+
+ HostAllocator()
+ {
+ }
+
+public:
+ void* Alloc(size_t size) override;
+
+ void* ArrayAlloc(size_t elemSize, size_t numElems) override;
+
+ void Free(void* p) override;
+
+ static HostAllocator* getHostAllocator();
+};
diff --git a/src/jit/importer.cpp b/src/jit/importer.cpp
new file mode 100644
index 0000000000..d04ded78fa
--- /dev/null
+++ b/src/jit/importer.cpp
@@ -0,0 +1,17997 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX Importer XX
+XX XX
+XX Imports the given method and converts it to semantic trees XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+#include "corexcep.h"
+
+#define Verify(cond, msg) \
+ do \
+ { \
+ if (!(cond)) \
+ { \
+ verRaiseVerifyExceptionIfNeeded(INDEBUG(msg) DEBUGARG(__FILE__) DEBUGARG(__LINE__)); \
+ } \
+ } while (0)
+
+#define VerifyOrReturn(cond, msg) \
+ do \
+ { \
+ if (!(cond)) \
+ { \
+ verRaiseVerifyExceptionIfNeeded(INDEBUG(msg) DEBUGARG(__FILE__) DEBUGARG(__LINE__)); \
+ return; \
+ } \
+ } while (0)
+
+#define VerifyOrReturnSpeculative(cond, msg, speculative) \
+ do \
+ { \
+ if (speculative) \
+ { \
+ if (!(cond)) \
+ { \
+ return false; \
+ } \
+ } \
+ else \
+ { \
+ if (!(cond)) \
+ { \
+ verRaiseVerifyExceptionIfNeeded(INDEBUG(msg) DEBUGARG(__FILE__) DEBUGARG(__LINE__)); \
+ return false; \
+ } \
+ } \
+ } while (0)
+
+/*****************************************************************************/
+
+void Compiler::impInit()
+{
+#ifdef DEBUG
+ impTreeList = impTreeLast = nullptr;
+#endif
+
+#if defined(DEBUG)
+ impInlinedCodeSize = 0;
+#endif
+
+ seenConditionalJump = false;
+}
+
+/*****************************************************************************
+ *
+ * Pushes the given tree on the stack.
+ */
+
+void Compiler::impPushOnStack(GenTreePtr tree, typeInfo ti)
+{
+ /* Check for overflow. If inlining, we may be using a bigger stack */
+
+ if ((verCurrentState.esStackDepth >= info.compMaxStack) &&
+ (verCurrentState.esStackDepth >= impStkSize || ((compCurBB->bbFlags & BBF_IMPORTED) == 0)))
+ {
+ BADCODE("stack overflow");
+ }
+
+#ifdef DEBUG
+ // If we are pushing a struct, make certain we know the precise type!
+ if (tree->TypeGet() == TYP_STRUCT)
+ {
+ assert(ti.IsType(TI_STRUCT));
+ CORINFO_CLASS_HANDLE clsHnd = ti.GetClassHandle();
+ assert(clsHnd != NO_CLASS_HANDLE);
+ }
+
+ if (tiVerificationNeeded && !ti.IsDead())
+ {
+ assert(typeInfo::AreEquivalent(NormaliseForStack(ti), ti)); // types are normalized
+
+ // The ti type is consistent with the tree type.
+ //
+
+ // On 64-bit systems, nodes whose "proper" type is "native int" get labeled TYP_LONG.
+ // In the verification type system, we always transform "native int" to "TI_INT".
+ // Ideally, we would keep track of which nodes labeled "TYP_LONG" are really "native int", but
+ // attempts to do that have proved too difficult. Instead, we'll assume that in checks like this,
+ // when there's a mismatch, it's because of this reason -- the typeInfo::AreEquivalentModuloNativeInt
+ // method used in the last disjunct allows exactly this mismatch.
+ assert(ti.IsDead() || ti.IsByRef() && (tree->TypeGet() == TYP_I_IMPL || tree->TypeGet() == TYP_BYREF) ||
+ ti.IsUnboxedGenericTypeVar() && tree->TypeGet() == TYP_REF ||
+ ti.IsObjRef() && tree->TypeGet() == TYP_REF || ti.IsMethod() && tree->TypeGet() == TYP_I_IMPL ||
+ ti.IsType(TI_STRUCT) && tree->TypeGet() != TYP_REF ||
+ typeInfo::AreEquivalentModuloNativeInt(NormaliseForStack(ti),
+ NormaliseForStack(typeInfo(tree->TypeGet()))));
+
+ // If it is a struct type, make certain we normalized the primitive types
+ assert(!ti.IsType(TI_STRUCT) ||
+ info.compCompHnd->getTypeForPrimitiveValueClass(ti.GetClassHandle()) == CORINFO_TYPE_UNDEF);
+ }
+
+#if VERBOSE_VERIFY
+ if (VERBOSE && tiVerificationNeeded)
+ {
+ printf("\n");
+ printf(TI_DUMP_PADDING);
+ printf("About to push to stack: ");
+ ti.Dump();
+ }
+#endif // VERBOSE_VERIFY
+
+#endif // DEBUG
+
+ verCurrentState.esStack[verCurrentState.esStackDepth].seTypeInfo = ti;
+ verCurrentState.esStack[verCurrentState.esStackDepth++].val = tree;
+
+ if ((tree->gtType == TYP_LONG) && (compLongUsed == false))
+ {
+ compLongUsed = true;
+ }
+ else if (((tree->gtType == TYP_FLOAT) || (tree->gtType == TYP_DOUBLE)) && (compFloatingPointUsed == false))
+ {
+ compFloatingPointUsed = true;
+ }
+}
+
+/******************************************************************************/
+// used in the inliner, where we can assume typesafe code. please don't use in the importer!!
+inline void Compiler::impPushOnStackNoType(GenTreePtr tree)
+{
+ assert(verCurrentState.esStackDepth < impStkSize);
+ INDEBUG(verCurrentState.esStack[verCurrentState.esStackDepth].seTypeInfo = typeInfo());
+ verCurrentState.esStack[verCurrentState.esStackDepth++].val = tree;
+
+ if ((tree->gtType == TYP_LONG) && (compLongUsed == false))
+ {
+ compLongUsed = true;
+ }
+ else if (((tree->gtType == TYP_FLOAT) || (tree->gtType == TYP_DOUBLE)) && (compFloatingPointUsed == false))
+ {
+ compFloatingPointUsed = true;
+ }
+}
+
+inline void Compiler::impPushNullObjRefOnStack()
+{
+ impPushOnStack(gtNewIconNode(0, TYP_REF), typeInfo(TI_NULL));
+}
+
+// This method gets called when we run into unverifiable code
+// (and we are verifying the method)
+
+inline void Compiler::verRaiseVerifyExceptionIfNeeded(INDEBUG(const char* msg) DEBUGARG(const char* file)
+ DEBUGARG(unsigned line))
+{
+ // Remember that the code is not verifiable
+ // Note that the method may yet pass canSkipMethodVerification(),
+ // and so the presence of unverifiable code may not be an issue.
+ tiIsVerifiableCode = FALSE;
+
+#ifdef DEBUG
+ const char* tail = strrchr(file, '\\');
+ if (tail)
+ {
+ file = tail + 1;
+ }
+
+ if (JitConfig.JitBreakOnUnsafeCode())
+ {
+ assert(!"Unsafe code detected");
+ }
+#endif
+
+ JITLOG((LL_INFO10000, "Detected unsafe code: %s:%d : %s, while compiling %s opcode %s, IL offset %x\n", file, line,
+ msg, info.compFullName, impCurOpcName, impCurOpcOffs));
+
+ if (verNeedsVerification() || compIsForImportOnly())
+ {
+ JITLOG((LL_ERROR, "Verification failure: %s:%d : %s, while compiling %s opcode %s, IL offset %x\n", file, line,
+ msg, info.compFullName, impCurOpcName, impCurOpcOffs));
+ verRaiseVerifyException(INDEBUG(msg) DEBUGARG(file) DEBUGARG(line));
+ }
+}
+
+inline void DECLSPEC_NORETURN Compiler::verRaiseVerifyException(INDEBUG(const char* msg) DEBUGARG(const char* file)
+ DEBUGARG(unsigned line))
+{
+ JITLOG((LL_ERROR, "Verification failure: %s:%d : %s, while compiling %s opcode %s, IL offset %x\n", file, line,
+ msg, info.compFullName, impCurOpcName, impCurOpcOffs));
+
+#ifdef DEBUG
+ // BreakIfDebuggerPresent();
+ if (getBreakOnBadCode())
+ {
+ assert(!"Typechecking error");
+ }
+#endif
+
+ RaiseException(SEH_VERIFICATION_EXCEPTION, EXCEPTION_NONCONTINUABLE, 0, nullptr);
+ UNREACHABLE();
+}
+
+// helper function that will tell us if the IL instruction at the addr passed
+// by param consumes an address at the top of the stack. We use it to save
+// us lvAddrTaken
+bool Compiler::impILConsumesAddr(const BYTE* codeAddr, CORINFO_METHOD_HANDLE fncHandle, CORINFO_MODULE_HANDLE scpHandle)
+{
+ assert(!compIsForInlining());
+
+ OPCODE opcode;
+
+ opcode = (OPCODE)getU1LittleEndian(codeAddr);
+
+ switch (opcode)
+ {
+ // case CEE_LDFLDA: We're taking this one out as if you have a sequence
+ // like
+ //
+ // ldloca.0
+ // ldflda whatever
+ //
+ // of a primitivelike struct, you end up after morphing with addr of a local
+ // that's not marked as addrtaken, which is wrong. Also ldflda is usually used
+ // for structs that contain other structs, which isnt a case we handle very
+ // well now for other reasons.
+
+ case CEE_LDFLD:
+ {
+ // We won't collapse small fields. This is probably not the right place to have this
+ // check, but we're only using the function for this purpose, and is easy to factor
+ // out if we need to do so.
+
+ CORINFO_RESOLVED_TOKEN resolvedToken;
+ impResolveToken(codeAddr + sizeof(__int8), &resolvedToken, CORINFO_TOKENKIND_Field);
+
+ CORINFO_CLASS_HANDLE clsHnd;
+ var_types lclTyp = JITtype2varType(info.compCompHnd->getFieldType(resolvedToken.hField, &clsHnd));
+
+ // Preserve 'small' int types
+ if (lclTyp > TYP_INT)
+ {
+ lclTyp = genActualType(lclTyp);
+ }
+
+ if (varTypeIsSmall(lclTyp))
+ {
+ return false;
+ }
+
+ return true;
+ }
+ default:
+ break;
+ }
+
+ return false;
+}
+
+void Compiler::impResolveToken(const BYTE* addr, CORINFO_RESOLVED_TOKEN* pResolvedToken, CorInfoTokenKind kind)
+{
+ pResolvedToken->tokenContext = impTokenLookupContextHandle;
+ pResolvedToken->tokenScope = info.compScopeHnd;
+ pResolvedToken->token = getU4LittleEndian(addr);
+ pResolvedToken->tokenType = kind;
+
+ if (!tiVerificationNeeded)
+ {
+ info.compCompHnd->resolveToken(pResolvedToken);
+ }
+ else
+ {
+ Verify(eeTryResolveToken(pResolvedToken), "Token resolution failed");
+ }
+}
+
+/*****************************************************************************
+ *
+ * Pop one tree from the stack.
+ */
+
+StackEntry Compiler::impPopStack()
+{
+ if (verCurrentState.esStackDepth == 0)
+ {
+ BADCODE("stack underflow");
+ }
+
+#ifdef DEBUG
+#if VERBOSE_VERIFY
+ if (VERBOSE && tiVerificationNeeded)
+ {
+ JITDUMP("\n");
+ printf(TI_DUMP_PADDING);
+ printf("About to pop from the stack: ");
+ const typeInfo& ti = verCurrentState.esStack[verCurrentState.esStackDepth - 1].seTypeInfo;
+ ti.Dump();
+ }
+#endif // VERBOSE_VERIFY
+#endif // DEBUG
+
+ return verCurrentState.esStack[--verCurrentState.esStackDepth];
+}
+
+StackEntry Compiler::impPopStack(CORINFO_CLASS_HANDLE& structType)
+{
+ StackEntry ret = impPopStack();
+ structType = verCurrentState.esStack[verCurrentState.esStackDepth].seTypeInfo.GetClassHandle();
+ return (ret);
+}
+
+GenTreePtr Compiler::impPopStack(typeInfo& ti)
+{
+ StackEntry ret = impPopStack();
+ ti = ret.seTypeInfo;
+ return (ret.val);
+}
+
+/*****************************************************************************
+ *
+ * Peep at n'th (0-based) tree on the top of the stack.
+ */
+
+StackEntry& Compiler::impStackTop(unsigned n)
+{
+ if (verCurrentState.esStackDepth <= n)
+ {
+ BADCODE("stack underflow");
+ }
+
+ return verCurrentState.esStack[verCurrentState.esStackDepth - n - 1];
+}
+/*****************************************************************************
+ * Some of the trees are spilled specially. While unspilling them, or
+ * making a copy, these need to be handled specially. The function
+ * enumerates the operators possible after spilling.
+ */
+
+#ifdef DEBUG // only used in asserts
+static bool impValidSpilledStackEntry(GenTreePtr tree)
+{
+ if (tree->gtOper == GT_LCL_VAR)
+ {
+ return true;
+ }
+
+ if (tree->OperIsConst())
+ {
+ return true;
+ }
+
+ return false;
+}
+#endif
+
+/*****************************************************************************
+ *
+ * The following logic is used to save/restore stack contents.
+ * If 'copy' is true, then we make a copy of the trees on the stack. These
+ * have to all be cloneable/spilled values.
+ */
+
+void Compiler::impSaveStackState(SavedStack* savePtr, bool copy)
+{
+ savePtr->ssDepth = verCurrentState.esStackDepth;
+
+ if (verCurrentState.esStackDepth)
+ {
+ savePtr->ssTrees = new (this, CMK_ImpStack) StackEntry[verCurrentState.esStackDepth];
+ size_t saveSize = verCurrentState.esStackDepth * sizeof(*savePtr->ssTrees);
+
+ if (copy)
+ {
+ StackEntry* table = savePtr->ssTrees;
+
+ /* Make a fresh copy of all the stack entries */
+
+ for (unsigned level = 0; level < verCurrentState.esStackDepth; level++, table++)
+ {
+ table->seTypeInfo = verCurrentState.esStack[level].seTypeInfo;
+ GenTreePtr tree = verCurrentState.esStack[level].val;
+
+ assert(impValidSpilledStackEntry(tree));
+
+ switch (tree->gtOper)
+ {
+ case GT_CNS_INT:
+ case GT_CNS_LNG:
+ case GT_CNS_DBL:
+ case GT_CNS_STR:
+ case GT_LCL_VAR:
+ table->val = gtCloneExpr(tree);
+ break;
+
+ default:
+ assert(!"Bad oper - Not covered by impValidSpilledStackEntry()");
+ break;
+ }
+ }
+ }
+ else
+ {
+ memcpy(savePtr->ssTrees, verCurrentState.esStack, saveSize);
+ }
+ }
+}
+
+void Compiler::impRestoreStackState(SavedStack* savePtr)
+{
+ verCurrentState.esStackDepth = savePtr->ssDepth;
+
+ if (verCurrentState.esStackDepth)
+ {
+ memcpy(verCurrentState.esStack, savePtr->ssTrees,
+ verCurrentState.esStackDepth * sizeof(*verCurrentState.esStack));
+ }
+}
+
+/*****************************************************************************
+ *
+ * Get the tree list started for a new basic block.
+ */
+inline void Compiler::impBeginTreeList()
+{
+ assert(impTreeList == nullptr && impTreeLast == nullptr);
+
+ impTreeList = impTreeLast = new (this, GT_BEG_STMTS) GenTree(GT_BEG_STMTS, TYP_VOID);
+}
+
+/*****************************************************************************
+ *
+ * Store the given start and end stmt in the given basic block. This is
+ * mostly called by impEndTreeList(BasicBlock *block). It is called
+ * directly only for handling CEE_LEAVEs out of finally-protected try's.
+ */
+
+inline void Compiler::impEndTreeList(BasicBlock* block, GenTreePtr firstStmt, GenTreePtr lastStmt)
+{
+ assert(firstStmt->gtOper == GT_STMT);
+ assert(lastStmt->gtOper == GT_STMT);
+
+ /* Make the list circular, so that we can easily walk it backwards */
+
+ firstStmt->gtPrev = lastStmt;
+
+ /* Store the tree list in the basic block */
+
+ block->bbTreeList = firstStmt;
+
+ /* The block should not already be marked as imported */
+ assert((block->bbFlags & BBF_IMPORTED) == 0);
+
+ block->bbFlags |= BBF_IMPORTED;
+}
+
+/*****************************************************************************
+ *
+ * Store the current tree list in the given basic block.
+ */
+
+inline void Compiler::impEndTreeList(BasicBlock* block)
+{
+ assert(impTreeList->gtOper == GT_BEG_STMTS);
+
+ GenTreePtr firstTree = impTreeList->gtNext;
+
+ if (!firstTree)
+ {
+ /* The block should not already be marked as imported */
+ assert((block->bbFlags & BBF_IMPORTED) == 0);
+
+ // Empty block. Just mark it as imported
+ block->bbFlags |= BBF_IMPORTED;
+ }
+ else
+ {
+ // Ignore the GT_BEG_STMTS
+ assert(firstTree->gtPrev == impTreeList);
+
+ impEndTreeList(block, firstTree, impTreeLast);
+ }
+
+#ifdef DEBUG
+ if (impLastILoffsStmt != nullptr)
+ {
+ impLastILoffsStmt->gtStmt.gtStmtLastILoffs = compIsForInlining() ? BAD_IL_OFFSET : impCurOpcOffs;
+ impLastILoffsStmt = nullptr;
+ }
+
+ impTreeList = impTreeLast = nullptr;
+#endif
+}
+
+/*****************************************************************************
+ *
+ * Check that storing the given tree doesnt mess up the semantic order. Note
+ * that this has only limited value as we can only check [0..chkLevel).
+ */
+
+inline void Compiler::impAppendStmtCheck(GenTreePtr stmt, unsigned chkLevel)
+{
+#ifndef DEBUG
+ return;
+#else
+ assert(stmt->gtOper == GT_STMT);
+
+ if (chkLevel == (unsigned)CHECK_SPILL_ALL)
+ {
+ chkLevel = verCurrentState.esStackDepth;
+ }
+
+ if (verCurrentState.esStackDepth == 0 || chkLevel == 0 || chkLevel == (unsigned)CHECK_SPILL_NONE)
+ {
+ return;
+ }
+
+ GenTreePtr tree = stmt->gtStmt.gtStmtExpr;
+
+ // Calls can only be appended if there are no GTF_GLOB_EFFECT on the stack
+
+ if (tree->gtFlags & GTF_CALL)
+ {
+ for (unsigned level = 0; level < chkLevel; level++)
+ {
+ assert((verCurrentState.esStack[level].val->gtFlags & GTF_GLOB_EFFECT) == 0);
+ }
+ }
+
+ if (tree->gtOper == GT_ASG)
+ {
+ // For an assignment to a local variable, all references of that
+ // variable have to be spilled. If it is aliased, all calls and
+ // indirect accesses have to be spilled
+
+ if (tree->gtOp.gtOp1->gtOper == GT_LCL_VAR)
+ {
+ unsigned lclNum = tree->gtOp.gtOp1->gtLclVarCommon.gtLclNum;
+ for (unsigned level = 0; level < chkLevel; level++)
+ {
+ assert(!gtHasRef(verCurrentState.esStack[level].val, lclNum, false));
+ assert(!lvaTable[lclNum].lvAddrExposed ||
+ (verCurrentState.esStack[level].val->gtFlags & GTF_SIDE_EFFECT) == 0);
+ }
+ }
+
+ // If the access may be to global memory, all side effects have to be spilled.
+
+ else if (tree->gtOp.gtOp1->gtFlags & GTF_GLOB_REF)
+ {
+ for (unsigned level = 0; level < chkLevel; level++)
+ {
+ assert((verCurrentState.esStack[level].val->gtFlags & GTF_GLOB_REF) == 0);
+ }
+ }
+ }
+#endif
+}
+
+/*****************************************************************************
+ *
+ * Append the given GT_STMT node to the current block's tree list.
+ * [0..chkLevel) is the portion of the stack which we will check for
+ * interference with stmt and spill if needed.
+ */
+
+inline void Compiler::impAppendStmt(GenTreePtr stmt, unsigned chkLevel)
+{
+ assert(stmt->gtOper == GT_STMT);
+ noway_assert(impTreeLast != nullptr);
+
+ /* If the statement being appended has any side-effects, check the stack
+ to see if anything needs to be spilled to preserve correct ordering. */
+
+ GenTreePtr expr = stmt->gtStmt.gtStmtExpr;
+ unsigned flags = expr->gtFlags & GTF_GLOB_EFFECT;
+
+ // Assignment to (unaliased) locals don't count as a side-effect as
+ // we handle them specially using impSpillLclRefs(). Temp locals should
+ // be fine too.
+ // TODO-1stClassStructs: The check below should apply equally to struct assignments,
+ // but previously the block ops were always being marked GTF_GLOB_REF, even if
+ // the operands could not be global refs.
+
+ if ((expr->gtOper == GT_ASG) && (expr->gtOp.gtOp1->gtOper == GT_LCL_VAR) &&
+ !(expr->gtOp.gtOp1->gtFlags & GTF_GLOB_REF) && !gtHasLocalsWithAddrOp(expr->gtOp.gtOp2) &&
+ !varTypeIsStruct(expr->gtOp.gtOp1))
+ {
+ unsigned op2Flags = expr->gtOp.gtOp2->gtFlags & GTF_GLOB_EFFECT;
+ assert(flags == (op2Flags | GTF_ASG));
+ flags = op2Flags;
+ }
+
+ if (chkLevel == (unsigned)CHECK_SPILL_ALL)
+ {
+ chkLevel = verCurrentState.esStackDepth;
+ }
+
+ if (chkLevel && chkLevel != (unsigned)CHECK_SPILL_NONE)
+ {
+ assert(chkLevel <= verCurrentState.esStackDepth);
+
+ if (flags)
+ {
+ // If there is a call, we have to spill global refs
+ bool spillGlobEffects = (flags & GTF_CALL) ? true : false;
+
+ if (expr->gtOper == GT_ASG)
+ {
+ GenTree* lhs = expr->gtGetOp1();
+ // If we are assigning to a global ref, we have to spill global refs on stack.
+ // TODO-1stClassStructs: Previously, spillGlobEffects was set to true for
+ // GT_INITBLK and GT_COPYBLK, but this is overly conservative, and should be
+ // revisited. (Note that it was NOT set to true for GT_COPYOBJ.)
+ if (!expr->OperIsBlkOp())
+ {
+ // If we are assigning to a global ref, we have to spill global refs on stack
+ if ((lhs->gtFlags & GTF_GLOB_REF) != 0)
+ {
+ spillGlobEffects = true;
+ }
+ }
+ else if ((lhs->OperIsBlk() && !lhs->AsBlk()->HasGCPtr()) ||
+ ((lhs->OperGet() == GT_LCL_VAR) &&
+ (lvaTable[lhs->AsLclVarCommon()->gtLclNum].lvStructGcCount == 0)))
+ {
+ spillGlobEffects = true;
+ }
+ }
+
+ impSpillSideEffects(spillGlobEffects, chkLevel DEBUGARG("impAppendStmt"));
+ }
+ else
+ {
+ impSpillSpecialSideEff();
+ }
+ }
+
+ impAppendStmtCheck(stmt, chkLevel);
+
+ /* Point 'prev' at the previous node, so that we can walk backwards */
+
+ stmt->gtPrev = impTreeLast;
+
+ /* Append the expression statement to the list */
+
+ impTreeLast->gtNext = stmt;
+ impTreeLast = stmt;
+
+#ifdef FEATURE_SIMD
+ impMarkContiguousSIMDFieldAssignments(stmt);
+#endif
+
+#ifdef DEBUGGING_SUPPORT
+
+ /* Once we set impCurStmtOffs in an appended tree, we are ready to
+ report the following offsets. So reset impCurStmtOffs */
+
+ if (impTreeLast->gtStmt.gtStmtILoffsx == impCurStmtOffs)
+ {
+ impCurStmtOffsSet(BAD_IL_OFFSET);
+ }
+
+#endif
+
+#ifdef DEBUG
+ if (impLastILoffsStmt == nullptr)
+ {
+ impLastILoffsStmt = stmt;
+ }
+
+ if (verbose)
+ {
+ printf("\n\n");
+ gtDispTree(stmt);
+ }
+#endif
+}
+
+/*****************************************************************************
+ *
+ * Insert the given GT_STMT "stmt" before GT_STMT "stmtBefore"
+ */
+
+inline void Compiler::impInsertStmtBefore(GenTreePtr stmt, GenTreePtr stmtBefore)
+{
+ assert(stmt->gtOper == GT_STMT);
+ assert(stmtBefore->gtOper == GT_STMT);
+
+ GenTreePtr stmtPrev = stmtBefore->gtPrev;
+ stmt->gtPrev = stmtPrev;
+ stmt->gtNext = stmtBefore;
+ stmtPrev->gtNext = stmt;
+ stmtBefore->gtPrev = stmt;
+}
+
+/*****************************************************************************
+ *
+ * Append the given expression tree to the current block's tree list.
+ * Return the newly created statement.
+ */
+
+GenTreePtr Compiler::impAppendTree(GenTreePtr tree, unsigned chkLevel, IL_OFFSETX offset)
+{
+ assert(tree);
+
+ /* Allocate an 'expression statement' node */
+
+ GenTreePtr expr = gtNewStmt(tree, offset);
+
+ /* Append the statement to the current block's stmt list */
+
+ impAppendStmt(expr, chkLevel);
+
+ return expr;
+}
+
+/*****************************************************************************
+ *
+ * Insert the given exression tree before GT_STMT "stmtBefore"
+ */
+
+void Compiler::impInsertTreeBefore(GenTreePtr tree, IL_OFFSETX offset, GenTreePtr stmtBefore)
+{
+ assert(stmtBefore->gtOper == GT_STMT);
+
+ /* Allocate an 'expression statement' node */
+
+ GenTreePtr expr = gtNewStmt(tree, offset);
+
+ /* Append the statement to the current block's stmt list */
+
+ impInsertStmtBefore(expr, stmtBefore);
+}
+
+/*****************************************************************************
+ *
+ * Append an assignment of the given value to a temp to the current tree list.
+ * curLevel is the stack level for which the spill to the temp is being done.
+ */
+
+void Compiler::impAssignTempGen(unsigned tmp,
+ GenTreePtr val,
+ unsigned curLevel,
+ GenTreePtr* pAfterStmt, /* = NULL */
+ IL_OFFSETX ilOffset, /* = BAD_IL_OFFSET */
+ BasicBlock* block /* = NULL */
+ )
+{
+ GenTreePtr asg = gtNewTempAssign(tmp, val);
+
+ if (!asg->IsNothingNode())
+ {
+ if (pAfterStmt)
+ {
+ GenTreePtr asgStmt = gtNewStmt(asg, ilOffset);
+ *pAfterStmt = fgInsertStmtAfter(block, *pAfterStmt, asgStmt);
+ }
+ else
+ {
+ impAppendTree(asg, curLevel, impCurStmtOffs);
+ }
+ }
+}
+
+/*****************************************************************************
+ * same as above, but handle the valueclass case too
+ */
+
+void Compiler::impAssignTempGen(unsigned tmpNum,
+ GenTreePtr val,
+ CORINFO_CLASS_HANDLE structType,
+ unsigned curLevel,
+ GenTreePtr* pAfterStmt, /* = NULL */
+ IL_OFFSETX ilOffset, /* = BAD_IL_OFFSET */
+ BasicBlock* block /* = NULL */
+ )
+{
+ GenTreePtr asg;
+
+ if (varTypeIsStruct(val))
+ {
+ assert(tmpNum < lvaCount);
+ assert(structType != NO_CLASS_HANDLE);
+
+ // if the method is non-verifiable the assert is not true
+ // so at least ignore it in the case when verification is turned on
+ // since any block that tries to use the temp would have failed verification.
+ var_types varType = lvaTable[tmpNum].lvType;
+ assert(tiVerificationNeeded || varType == TYP_UNDEF || varTypeIsStruct(varType));
+ lvaSetStruct(tmpNum, structType, false);
+
+ // Now, set the type of the struct value. Note that lvaSetStruct may modify the type
+ // of the lclVar to a specialized type (e.g. TYP_SIMD), based on the handle (structType)
+ // that has been passed in for the value being assigned to the temp, in which case we
+ // need to set 'val' to that same type.
+ // Note also that if we always normalized the types of any node that might be a struct
+ // type, this would not be necessary - but that requires additional JIT/EE interface
+ // calls that may not actually be required - e.g. if we only access a field of a struct.
+
+ val->gtType = lvaTable[tmpNum].lvType;
+
+ GenTreePtr dst = gtNewLclvNode(tmpNum, val->gtType);
+ asg = impAssignStruct(dst, val, structType, curLevel, pAfterStmt, block);
+ }
+ else
+ {
+ asg = gtNewTempAssign(tmpNum, val);
+ }
+
+ if (!asg->IsNothingNode())
+ {
+ if (pAfterStmt)
+ {
+ GenTreePtr asgStmt = gtNewStmt(asg, ilOffset);
+ *pAfterStmt = fgInsertStmtAfter(block, *pAfterStmt, asgStmt);
+ }
+ else
+ {
+ impAppendTree(asg, curLevel, impCurStmtOffs);
+ }
+ }
+}
+
+/*****************************************************************************
+ *
+ * Pop the given number of values from the stack and return a list node with
+ * their values.
+ * The 'prefixTree' argument may optionally contain an argument
+ * list that is prepended to the list returned from this function.
+ *
+ * The notion of prepended is a bit misleading in that the list is backwards
+ * from the way I would expect: The first element popped is at the end of
+ * the returned list, and prefixTree is 'before' that, meaning closer to
+ * the end of the list. To get to prefixTree, you have to walk to the
+ * end of the list.
+ *
+ * For ARG_ORDER_R2L prefixTree is only used to insert extra arguments, as
+ * such we reverse its meaning such that returnValue has a reversed
+ * prefixTree at the head of the list.
+ */
+
+GenTreeArgList* Compiler::impPopList(unsigned count,
+ unsigned* flagsPtr,
+ CORINFO_SIG_INFO* sig,
+ GenTreeArgList* prefixTree)
+{
+ assert(sig == nullptr || count == sig->numArgs);
+
+ unsigned flags = 0;
+ CORINFO_CLASS_HANDLE structType;
+ GenTreeArgList* treeList;
+
+ if (Target::g_tgtArgOrder == Target::ARG_ORDER_R2L)
+ {
+ treeList = nullptr;
+ }
+ else
+ { // ARG_ORDER_L2R
+ treeList = prefixTree;
+ }
+
+ while (count--)
+ {
+ StackEntry se = impPopStack();
+ typeInfo ti = se.seTypeInfo;
+ GenTreePtr temp = se.val;
+
+ if (varTypeIsStruct(temp))
+ {
+ // Morph trees that aren't already OBJs or MKREFANY to be OBJs
+ assert(ti.IsType(TI_STRUCT));
+ structType = ti.GetClassHandleForValueClass();
+ temp = impNormStructVal(temp, structType, (unsigned)CHECK_SPILL_ALL);
+ }
+
+ /* NOTE: we defer bashing the type for I_IMPL to fgMorphArgs */
+ flags |= temp->gtFlags;
+ treeList = gtNewListNode(temp, treeList);
+ }
+
+ *flagsPtr = flags;
+
+ if (sig != nullptr)
+ {
+ if (sig->retTypeSigClass != nullptr && sig->retType != CORINFO_TYPE_CLASS &&
+ sig->retType != CORINFO_TYPE_BYREF && sig->retType != CORINFO_TYPE_PTR && sig->retType != CORINFO_TYPE_VAR)
+ {
+ // Make sure that all valuetypes (including enums) that we push are loaded.
+ // This is to guarantee that if a GC is triggerred from the prestub of this methods,
+ // all valuetypes in the method signature are already loaded.
+ // We need to be able to find the size of the valuetypes, but we cannot
+ // do a class-load from within GC.
+ info.compCompHnd->classMustBeLoadedBeforeCodeIsRun(sig->retTypeSigClass);
+ }
+
+ CORINFO_ARG_LIST_HANDLE argLst = sig->args;
+ CORINFO_CLASS_HANDLE argClass;
+ CORINFO_CLASS_HANDLE argRealClass;
+ GenTreeArgList* args;
+ unsigned sigSize;
+
+ for (args = treeList, count = sig->numArgs; count > 0; args = args->Rest(), count--)
+ {
+ PREFIX_ASSUME(args != nullptr);
+
+ CorInfoType corType = strip(info.compCompHnd->getArgType(sig, argLst, &argClass));
+
+ // insert implied casts (from float to double or double to float)
+
+ if (corType == CORINFO_TYPE_DOUBLE && args->Current()->TypeGet() == TYP_FLOAT)
+ {
+ args->Current() = gtNewCastNode(TYP_DOUBLE, args->Current(), TYP_DOUBLE);
+ }
+ else if (corType == CORINFO_TYPE_FLOAT && args->Current()->TypeGet() == TYP_DOUBLE)
+ {
+ args->Current() = gtNewCastNode(TYP_FLOAT, args->Current(), TYP_FLOAT);
+ }
+
+ // insert any widening or narrowing casts for backwards compatibility
+
+ args->Current() = impImplicitIorI4Cast(args->Current(), JITtype2varType(corType));
+
+ if (corType != CORINFO_TYPE_CLASS && corType != CORINFO_TYPE_BYREF && corType != CORINFO_TYPE_PTR &&
+ corType != CORINFO_TYPE_VAR && (argRealClass = info.compCompHnd->getArgClass(sig, argLst)) != nullptr)
+ {
+ // Everett MC++ could generate IL with a mismatched valuetypes. It used to work with Everett JIT,
+ // but it stopped working in Whidbey when we have started passing simple valuetypes as underlying
+ // primitive types.
+ // We will try to adjust for this case here to avoid breaking customers code (see VSW 485789 for
+ // details).
+ if (corType == CORINFO_TYPE_VALUECLASS && !varTypeIsStruct(args->Current()))
+ {
+ args->Current() = impNormStructVal(args->Current(), argRealClass, (unsigned)CHECK_SPILL_ALL, true);
+ }
+
+ // Make sure that all valuetypes (including enums) that we push are loaded.
+ // This is to guarantee that if a GC is triggered from the prestub of this methods,
+ // all valuetypes in the method signature are already loaded.
+ // We need to be able to find the size of the valuetypes, but we cannot
+ // do a class-load from within GC.
+ info.compCompHnd->classMustBeLoadedBeforeCodeIsRun(argRealClass);
+ }
+
+ argLst = info.compCompHnd->getArgNext(argLst);
+ }
+ }
+
+ if (Target::g_tgtArgOrder == Target::ARG_ORDER_R2L)
+ {
+ // Prepend the prefixTree
+
+ // Simple in-place reversal to place treeList
+ // at the end of a reversed prefixTree
+ while (prefixTree != nullptr)
+ {
+ GenTreeArgList* next = prefixTree->Rest();
+ prefixTree->Rest() = treeList;
+ treeList = prefixTree;
+ prefixTree = next;
+ }
+ }
+ return treeList;
+}
+
+/*****************************************************************************
+ *
+ * Pop the given number of values from the stack in reverse order (STDCALL/CDECL etc.)
+ * The first "skipReverseCount" items are not reversed.
+ */
+
+GenTreeArgList* Compiler::impPopRevList(unsigned count,
+ unsigned* flagsPtr,
+ CORINFO_SIG_INFO* sig,
+ unsigned skipReverseCount)
+
+{
+ assert(skipReverseCount <= count);
+
+ GenTreeArgList* list = impPopList(count, flagsPtr, sig);
+
+ // reverse the list
+ if (list == nullptr || skipReverseCount == count)
+ {
+ return list;
+ }
+
+ GenTreeArgList* ptr = nullptr; // Initialized to the first node that needs to be reversed
+ GenTreeArgList* lastSkipNode = nullptr; // Will be set to the last node that does not need to be reversed
+
+ if (skipReverseCount == 0)
+ {
+ ptr = list;
+ }
+ else
+ {
+ lastSkipNode = list;
+ // Get to the first node that needs to be reversed
+ for (unsigned i = 0; i < skipReverseCount - 1; i++)
+ {
+ lastSkipNode = lastSkipNode->Rest();
+ }
+
+ PREFIX_ASSUME(lastSkipNode != nullptr);
+ ptr = lastSkipNode->Rest();
+ }
+
+ GenTreeArgList* reversedList = nullptr;
+
+ do
+ {
+ GenTreeArgList* tmp = ptr->Rest();
+ ptr->Rest() = reversedList;
+ reversedList = ptr;
+ ptr = tmp;
+ } while (ptr != nullptr);
+
+ if (skipReverseCount)
+ {
+ lastSkipNode->Rest() = reversedList;
+ return list;
+ }
+ else
+ {
+ return reversedList;
+ }
+}
+
+/*****************************************************************************
+ Assign (copy) the structure from 'src' to 'dest'. The structure is a value
+ class of type 'clsHnd'. It returns the tree that should be appended to the
+ statement list that represents the assignment.
+ Temp assignments may be appended to impTreeList if spilling is necessary.
+ curLevel is the stack level for which a spill may be being done.
+ */
+
+GenTreePtr Compiler::impAssignStruct(GenTreePtr dest,
+ GenTreePtr src,
+ CORINFO_CLASS_HANDLE structHnd,
+ unsigned curLevel,
+ GenTreePtr* pAfterStmt, /* = NULL */
+ BasicBlock* block /* = NULL */
+ )
+{
+ assert(varTypeIsStruct(dest));
+
+ while (dest->gtOper == GT_COMMA)
+ {
+ assert(varTypeIsStruct(dest->gtOp.gtOp2)); // Second thing is the struct
+
+ // Append all the op1 of GT_COMMA trees before we evaluate op2 of the GT_COMMA tree.
+ if (pAfterStmt)
+ {
+ *pAfterStmt = fgInsertStmtAfter(block, *pAfterStmt, gtNewStmt(dest->gtOp.gtOp1, impCurStmtOffs));
+ }
+ else
+ {
+ impAppendTree(dest->gtOp.gtOp1, curLevel, impCurStmtOffs); // do the side effect
+ }
+
+ // set dest to the second thing
+ dest = dest->gtOp.gtOp2;
+ }
+
+ assert(dest->gtOper == GT_LCL_VAR || dest->gtOper == GT_RETURN || dest->gtOper == GT_FIELD ||
+ dest->gtOper == GT_IND || dest->gtOper == GT_OBJ || dest->gtOper == GT_INDEX);
+
+ if (dest->OperGet() == GT_LCL_VAR && src->OperGet() == GT_LCL_VAR &&
+ src->gtLclVarCommon.gtLclNum == dest->gtLclVarCommon.gtLclNum)
+ {
+ // Make this a NOP
+ return gtNewNothingNode();
+ }
+
+ // TODO-1stClassStructs: Avoid creating an address if it is not needed,
+ // or re-creating a Blk node if it is.
+ GenTreePtr destAddr;
+
+ if (dest->gtOper == GT_IND || dest->OperIsBlk())
+ {
+ destAddr = dest->gtOp.gtOp1;
+ }
+ else
+ {
+ destAddr = gtNewOperNode(GT_ADDR, TYP_BYREF, dest);
+ }
+
+ return (impAssignStructPtr(destAddr, src, structHnd, curLevel, pAfterStmt, block));
+}
+
+/*****************************************************************************/
+
+GenTreePtr Compiler::impAssignStructPtr(GenTreePtr destAddr,
+ GenTreePtr src,
+ CORINFO_CLASS_HANDLE structHnd,
+ unsigned curLevel,
+ GenTreePtr* pAfterStmt, /* = NULL */
+ BasicBlock* block /* = NULL */
+ )
+{
+ var_types destType;
+ GenTreePtr dest = nullptr;
+ unsigned destFlags = 0;
+
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ assert(varTypeIsStruct(src) || (src->gtOper == GT_ADDR && src->TypeGet() == TYP_BYREF));
+ // TODO-ARM-BUG: Does ARM need this?
+ // TODO-ARM64-BUG: Does ARM64 need this?
+ assert(src->gtOper == GT_LCL_VAR || src->gtOper == GT_FIELD || src->gtOper == GT_IND || src->gtOper == GT_OBJ ||
+ src->gtOper == GT_CALL || src->gtOper == GT_MKREFANY || src->gtOper == GT_RET_EXPR ||
+ src->gtOper == GT_COMMA || src->gtOper == GT_ADDR ||
+ (src->TypeGet() != TYP_STRUCT && (GenTree::OperIsSIMD(src->gtOper) || src->gtOper == GT_LCL_FLD)));
+#else // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ assert(varTypeIsStruct(src));
+
+ assert(src->gtOper == GT_LCL_VAR || src->gtOper == GT_FIELD || src->gtOper == GT_IND || src->gtOper == GT_OBJ ||
+ src->gtOper == GT_CALL || src->gtOper == GT_MKREFANY || src->gtOper == GT_RET_EXPR ||
+ src->gtOper == GT_COMMA ||
+ (src->TypeGet() != TYP_STRUCT && (GenTree::OperIsSIMD(src->gtOper) || src->gtOper == GT_LCL_FLD)));
+#endif // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ if (destAddr->OperGet() == GT_ADDR)
+ {
+ GenTree* destNode = destAddr->gtGetOp1();
+ // If the actual destination is already a block node, or is a node that
+ // will be morphed, don't insert an OBJ(ADDR).
+ if (destNode->gtOper == GT_INDEX || destNode->OperIsBlk())
+ {
+ dest = destNode;
+ }
+ destType = destNode->TypeGet();
+ }
+ else
+ {
+ destType = src->TypeGet();
+ }
+
+ var_types asgType = src->TypeGet();
+
+ if (src->gtOper == GT_CALL)
+ {
+ if (src->AsCall()->TreatAsHasRetBufArg(this))
+ {
+ // Case of call returning a struct via hidden retbuf arg
+
+ // insert the return value buffer into the argument list as first byref parameter
+ src->gtCall.gtCallArgs = gtNewListNode(destAddr, src->gtCall.gtCallArgs);
+
+ // now returns void, not a struct
+ src->gtType = TYP_VOID;
+
+ // return the morphed call node
+ return src;
+ }
+ else
+ {
+ // Case of call returning a struct in one or more registers.
+
+ var_types returnType = (var_types)src->gtCall.gtReturnType;
+
+ // We won't use a return buffer, so change the type of src->gtType to 'returnType'
+ src->gtType = genActualType(returnType);
+
+ // First we try to change this to "LclVar/LclFld = call"
+ //
+ if ((destAddr->gtOper == GT_ADDR) && (destAddr->gtOp.gtOp1->gtOper == GT_LCL_VAR))
+ {
+ // If it is a multi-reg struct return, don't change the oper to GT_LCL_FLD.
+ // That is, the IR will be of the form lclVar = call for multi-reg return
+ //
+ GenTreePtr lcl = destAddr->gtOp.gtOp1;
+ if (src->AsCall()->HasMultiRegRetVal())
+ {
+ // Mark the struct LclVar as used in a MultiReg return context
+ // which currently makes it non promotable.
+ lvaTable[lcl->gtLclVarCommon.gtLclNum].lvIsMultiRegRet = true;
+ }
+ else // The call result is not a multireg return
+ {
+ // We change this to a GT_LCL_FLD (from a GT_ADDR of a GT_LCL_VAR)
+ lcl->ChangeOper(GT_LCL_FLD);
+ fgLclFldAssign(lcl->gtLclVarCommon.gtLclNum);
+ }
+
+ lcl->gtType = src->gtType;
+ asgType = src->gtType;
+ dest = lcl;
+
+#if defined(_TARGET_ARM_)
+ impMarkLclDstNotPromotable(lcl->gtLclVarCommon.gtLclNum, src, structHnd);
+#elif defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ // Not allowed for FEATURE_CORCLR which is the only SKU available for System V OSs.
+ assert(!src->gtCall.IsVarargs() && "varargs not allowed for System V OSs.");
+
+ // Make the struct non promotable. The eightbytes could contain multiple fields.
+ lvaTable[lcl->gtLclVarCommon.gtLclNum].lvIsMultiRegRet = true;
+#endif
+ }
+ else // we don't have a GT_ADDR of a GT_LCL_VAR
+ {
+ // !!! The destination could be on stack. !!!
+ // This flag will let us choose the correct write barrier.
+ asgType = returnType;
+ destFlags = GTF_IND_TGTANYWHERE;
+ }
+ }
+ }
+ else if (src->gtOper == GT_RET_EXPR)
+ {
+ GenTreePtr call = src->gtRetExpr.gtInlineCandidate;
+ noway_assert(call->gtOper == GT_CALL);
+
+ if (call->AsCall()->HasRetBufArg())
+ {
+ // insert the return value buffer into the argument list as first byref parameter
+ call->gtCall.gtCallArgs = gtNewListNode(destAddr, call->gtCall.gtCallArgs);
+
+ // now returns void, not a struct
+ src->gtType = TYP_VOID;
+ call->gtType = TYP_VOID;
+
+ // We already have appended the write to 'dest' GT_CALL's args
+ // So now we just return an empty node (pruning the GT_RET_EXPR)
+ return src;
+ }
+ else
+ {
+ // Case of inline method returning a struct in one or more registers.
+ //
+ var_types returnType = (var_types)call->gtCall.gtReturnType;
+
+ // We won't need a return buffer
+ asgType = returnType;
+ src->gtType = genActualType(returnType);
+ call->gtType = src->gtType;
+
+ // 1stClassStructToDo: We shouldn't necessarily need this.
+ if (dest != nullptr)
+ {
+ dest = gtNewOperNode(GT_IND, returnType, gtNewOperNode(GT_ADDR, TYP_BYREF, dest));
+ }
+
+ // !!! The destination could be on stack. !!!
+ // This flag will let us choose the correct write barrier.
+ destFlags = GTF_IND_TGTANYWHERE;
+ }
+ }
+ else if (src->OperIsBlk())
+ {
+ asgType = impNormStructType(structHnd);
+ if (src->gtOper == GT_OBJ)
+ {
+ assert(src->gtObj.gtClass == structHnd);
+ }
+ }
+ else if (src->gtOper == GT_INDEX)
+ {
+ asgType = impNormStructType(structHnd);
+ assert(src->gtIndex.gtStructElemClass == structHnd);
+ }
+ else if (src->gtOper == GT_MKREFANY)
+ {
+ // Since we are assigning the result of a GT_MKREFANY,
+ // "destAddr" must point to a refany.
+
+ GenTreePtr destAddrClone;
+ destAddr =
+ impCloneExpr(destAddr, &destAddrClone, structHnd, curLevel, pAfterStmt DEBUGARG("MKREFANY assignment"));
+
+ assert(offsetof(CORINFO_RefAny, dataPtr) == 0);
+ assert(destAddr->gtType == TYP_I_IMPL || destAddr->gtType == TYP_BYREF);
+ GetZeroOffsetFieldMap()->Set(destAddr, GetFieldSeqStore()->CreateSingleton(GetRefanyDataField()));
+ GenTreePtr ptrSlot = gtNewOperNode(GT_IND, TYP_I_IMPL, destAddr);
+ GenTreeIntCon* typeFieldOffset = gtNewIconNode(offsetof(CORINFO_RefAny, type), TYP_I_IMPL);
+ typeFieldOffset->gtFieldSeq = GetFieldSeqStore()->CreateSingleton(GetRefanyTypeField());
+ GenTreePtr typeSlot =
+ gtNewOperNode(GT_IND, TYP_I_IMPL, gtNewOperNode(GT_ADD, destAddr->gtType, destAddrClone, typeFieldOffset));
+
+ // append the assign of the pointer value
+ GenTreePtr asg = gtNewAssignNode(ptrSlot, src->gtOp.gtOp1);
+ if (pAfterStmt)
+ {
+ *pAfterStmt = fgInsertStmtAfter(block, *pAfterStmt, gtNewStmt(asg, impCurStmtOffs));
+ }
+ else
+ {
+ impAppendTree(asg, curLevel, impCurStmtOffs);
+ }
+
+ // return the assign of the type value, to be appended
+ return gtNewAssignNode(typeSlot, src->gtOp.gtOp2);
+ }
+ else if (src->gtOper == GT_COMMA)
+ {
+ // The second thing is the struct or its address.
+ assert(varTypeIsStruct(src->gtOp.gtOp2) || src->gtOp.gtOp2->gtType == TYP_BYREF);
+ if (pAfterStmt)
+ {
+ *pAfterStmt = fgInsertStmtAfter(block, *pAfterStmt, gtNewStmt(src->gtOp.gtOp1, impCurStmtOffs));
+ }
+ else
+ {
+ impAppendTree(src->gtOp.gtOp1, curLevel, impCurStmtOffs); // do the side effect
+ }
+
+ // Evaluate the second thing using recursion.
+ return impAssignStructPtr(destAddr, src->gtOp.gtOp2, structHnd, curLevel, pAfterStmt, block);
+ }
+ else if (src->IsLocal())
+ {
+ // TODO-1stClassStructs: Eliminate this; it is only here to minimize diffs in the
+ // initial implementation. Previously the source would have been under a GT_ADDR, which
+ // would cause it to be marked GTF_DONT_CSE.
+ asgType = src->TypeGet();
+ src->gtFlags |= GTF_DONT_CSE;
+ if (asgType == TYP_STRUCT)
+ {
+ GenTree* srcAddr = gtNewOperNode(GT_ADDR, TYP_BYREF, src);
+ src = gtNewOperNode(GT_IND, TYP_STRUCT, srcAddr);
+ }
+ }
+ else if (asgType == TYP_STRUCT)
+ {
+ asgType = impNormStructType(structHnd);
+ src->gtType = asgType;
+ }
+ if (dest == nullptr)
+ {
+ // TODO-1stClassStructs: We shouldn't really need a block node as the destination
+ // if this is a known struct type.
+ if (asgType == TYP_STRUCT)
+ {
+ dest = gtNewObjNode(structHnd, destAddr);
+ gtSetObjGcInfo(dest->AsObj());
+ // Although an obj as a call argument was always assumed to be a globRef
+ // (which is itself overly conservative), that is not true of the operands
+ // of a block assignment.
+ dest->gtFlags &= ~GTF_GLOB_REF;
+ dest->gtFlags |= (destAddr->gtFlags & GTF_GLOB_REF);
+ }
+ else if (varTypeIsStruct(asgType))
+ {
+ dest = new (this, GT_BLK) GenTreeBlk(GT_BLK, asgType, destAddr, genTypeSize(asgType));
+ }
+ else
+ {
+ dest = gtNewOperNode(GT_IND, asgType, destAddr);
+ }
+ }
+ else
+ {
+ dest->gtType = asgType;
+ }
+
+ dest->gtFlags |= destFlags;
+ destFlags = dest->gtFlags;
+
+ // return an assignment node, to be appended
+ GenTree* asgNode = gtNewAssignNode(dest, src);
+ gtBlockOpInit(asgNode, dest, src, false);
+
+ // TODO-1stClassStructs: Clean up the settings of GTF_DONT_CSE on the lhs
+ // of assignments.
+ if ((destFlags & GTF_DONT_CSE) == 0)
+ {
+ dest->gtFlags &= ~(GTF_DONT_CSE);
+ }
+ return asgNode;
+}
+
+/*****************************************************************************
+ Given a struct value, and the class handle for that structure, return
+ the expression for the address for that structure value.
+
+ willDeref - does the caller guarantee to dereference the pointer.
+*/
+
+GenTreePtr Compiler::impGetStructAddr(GenTreePtr structVal,
+ CORINFO_CLASS_HANDLE structHnd,
+ unsigned curLevel,
+ bool willDeref)
+{
+ assert(varTypeIsStruct(structVal) || eeIsValueClass(structHnd));
+
+ var_types type = structVal->TypeGet();
+
+ genTreeOps oper = structVal->gtOper;
+
+ if (oper == GT_OBJ && willDeref)
+ {
+ assert(structVal->gtObj.gtClass == structHnd);
+ return (structVal->gtObj.Addr());
+ }
+ else if (oper == GT_CALL || oper == GT_RET_EXPR || oper == GT_OBJ || oper == GT_MKREFANY)
+ {
+ unsigned tmpNum = lvaGrabTemp(true DEBUGARG("struct address for call/obj"));
+
+ impAssignTempGen(tmpNum, structVal, structHnd, curLevel);
+
+ // The 'return value' is now the temp itself
+
+ type = genActualType(lvaTable[tmpNum].TypeGet());
+ GenTreePtr temp = gtNewLclvNode(tmpNum, type);
+ temp = gtNewOperNode(GT_ADDR, TYP_BYREF, temp);
+ return temp;
+ }
+ else if (oper == GT_COMMA)
+ {
+ assert(structVal->gtOp.gtOp2->gtType == type); // Second thing is the struct
+
+ GenTreePtr oldTreeLast = impTreeLast;
+ structVal->gtOp.gtOp2 = impGetStructAddr(structVal->gtOp.gtOp2, structHnd, curLevel, willDeref);
+ structVal->gtType = TYP_BYREF;
+
+ if (oldTreeLast != impTreeLast)
+ {
+ // Some temp assignment statement was placed on the statement list
+ // for Op2, but that would be out of order with op1, so we need to
+ // spill op1 onto the statement list after whatever was last
+ // before we recursed on Op2 (i.e. before whatever Op2 appended).
+ impInsertTreeBefore(structVal->gtOp.gtOp1, impCurStmtOffs, oldTreeLast->gtNext);
+ structVal->gtOp.gtOp1 = gtNewNothingNode();
+ }
+
+ return (structVal);
+ }
+
+ return (gtNewOperNode(GT_ADDR, TYP_BYREF, structVal));
+}
+
+//------------------------------------------------------------------------
+// impNormStructType: Given a (known to be) struct class handle structHnd, normalize its type,
+// and optionally determine the GC layout of the struct.
+//
+// Arguments:
+// structHnd - The class handle for the struct type of interest.
+// gcLayout - (optional, default nullptr) - a BYTE pointer, allocated by the caller,
+// into which the gcLayout will be written.
+// pNumGCVars - (optional, default nullptr) - if non-null, a pointer to an unsigned,
+// which will be set to the number of GC fields in the struct.
+//
+// Return Value:
+// The JIT type for the struct (e.g. TYP_STRUCT, or TYP_SIMD*).
+// The gcLayout will be returned using the pointers provided by the caller, if non-null.
+// It may also modify the compFloatingPointUsed flag if the type is a SIMD type.
+//
+// Assumptions:
+// The caller must set gcLayout to nullptr OR ensure that it is large enough
+// (see ICorStaticInfo::getClassGClayout in corinfo.h).
+//
+// Notes:
+// Normalizing the type involves examining the struct type to determine if it should
+// be modified to one that is handled specially by the JIT, possibly being a candidate
+// for full enregistration, e.g. TYP_SIMD16.
+
+var_types Compiler::impNormStructType(CORINFO_CLASS_HANDLE structHnd,
+ BYTE* gcLayout,
+ unsigned* pNumGCVars,
+ var_types* pSimdBaseType)
+{
+ assert(structHnd != NO_CLASS_HANDLE);
+ unsigned originalSize = info.compCompHnd->getClassSize(structHnd);
+ unsigned numGCVars = 0;
+ var_types structType = TYP_STRUCT;
+ var_types simdBaseType = TYP_UNKNOWN;
+ bool definitelyHasGCPtrs = false;
+
+#ifdef FEATURE_SIMD
+ // We don't want to consider this as a possible SIMD type if it has GC pointers.
+ // (Saves querying about the SIMD assembly.)
+ BYTE gcBytes[maxPossibleSIMDStructBytes / TARGET_POINTER_SIZE];
+ if ((gcLayout == nullptr) && (originalSize >= minSIMDStructBytes()) && (originalSize <= maxSIMDStructBytes()))
+ {
+ gcLayout = gcBytes;
+ }
+#endif // FEATURE_SIMD
+
+ if (gcLayout != nullptr)
+ {
+ numGCVars = info.compCompHnd->getClassGClayout(structHnd, gcLayout);
+ definitelyHasGCPtrs = (numGCVars != 0);
+ }
+#ifdef FEATURE_SIMD
+ // Check to see if this is a SIMD type.
+ if (featureSIMD && (originalSize <= getSIMDVectorRegisterByteLength()) && (originalSize >= TARGET_POINTER_SIZE) &&
+ !definitelyHasGCPtrs)
+ {
+ unsigned int sizeBytes;
+ simdBaseType = getBaseTypeAndSizeOfSIMDType(structHnd, &sizeBytes);
+ if (simdBaseType != TYP_UNKNOWN)
+ {
+ assert(sizeBytes == originalSize);
+ structType = getSIMDTypeForSize(sizeBytes);
+ if (pSimdBaseType != nullptr)
+ {
+ *pSimdBaseType = simdBaseType;
+ }
+#ifdef _TARGET_AMD64_
+ // Amd64: also indicate that we use floating point registers
+ compFloatingPointUsed = true;
+#endif
+ }
+ }
+#endif // FEATURE_SIMD
+ if (pNumGCVars != nullptr)
+ {
+ *pNumGCVars = numGCVars;
+ }
+ return structType;
+}
+
+//****************************************************************************
+// Given TYP_STRUCT value 'structVal', make sure it is 'canonical', that is
+// it is either an OBJ or a MKREFANY node, or a node (e.g. GT_INDEX) that will be morphed.
+//
+GenTreePtr Compiler::impNormStructVal(GenTreePtr structVal,
+ CORINFO_CLASS_HANDLE structHnd,
+ unsigned curLevel,
+ bool forceNormalization /*=false*/)
+{
+ assert(forceNormalization || varTypeIsStruct(structVal));
+ assert(structHnd != NO_CLASS_HANDLE);
+ var_types structType = structVal->TypeGet();
+ bool makeTemp = false;
+ if (structType == TYP_STRUCT)
+ {
+ structType = impNormStructType(structHnd);
+ }
+ bool alreadyNormalized = false;
+ GenTreeLclVarCommon* structLcl = nullptr;
+
+ genTreeOps oper = structVal->OperGet();
+ switch (oper)
+ {
+ // GT_RETURN and GT_MKREFANY don't capture the handle.
+ case GT_RETURN:
+ break;
+ case GT_MKREFANY:
+ alreadyNormalized = true;
+ break;
+
+ case GT_CALL:
+ structVal->gtCall.gtRetClsHnd = structHnd;
+ makeTemp = true;
+ break;
+
+ case GT_RET_EXPR:
+ structVal->gtRetExpr.gtRetClsHnd = structHnd;
+ makeTemp = true;
+ break;
+
+ case GT_ARGPLACE:
+ structVal->gtArgPlace.gtArgPlaceClsHnd = structHnd;
+ break;
+
+ case GT_INDEX:
+ // This will be transformed to an OBJ later.
+ alreadyNormalized = true;
+ structVal->gtIndex.gtStructElemClass = structHnd;
+ structVal->gtIndex.gtIndElemSize = info.compCompHnd->getClassSize(structHnd);
+ break;
+
+ case GT_FIELD:
+ // Wrap it in a GT_OBJ.
+ structVal->gtType = structType;
+ structVal = gtNewObjNode(structHnd, gtNewOperNode(GT_ADDR, TYP_BYREF, structVal));
+ break;
+
+ case GT_LCL_VAR:
+ case GT_LCL_FLD:
+ structLcl = structVal->AsLclVarCommon();
+ // Wrap it in a GT_OBJ.
+ structVal = gtNewObjNode(structHnd, gtNewOperNode(GT_ADDR, TYP_BYREF, structVal));
+ __fallthrough;
+
+ case GT_OBJ:
+ case GT_BLK:
+ case GT_DYN_BLK:
+ case GT_ASG:
+ // These should already have the appropriate type.
+ assert(structVal->gtType == structType);
+ alreadyNormalized = true;
+ break;
+
+ case GT_IND:
+ assert(structVal->gtType == structType);
+ structVal = gtNewObjNode(structHnd, structVal->gtGetOp1());
+ alreadyNormalized = true;
+ break;
+
+#ifdef FEATURE_SIMD
+ case GT_SIMD:
+ assert(varTypeIsSIMD(structVal) && (structVal->gtType == structType));
+ break;
+#endif // FEATURE_SIMD
+
+ case GT_COMMA:
+ {
+ // The second thing is the block node.
+ GenTree* blockNode = structVal->gtOp.gtOp2;
+ assert(blockNode->gtType == structType);
+ // It had better be a block node - any others should not occur here.
+ assert(blockNode->OperIsBlk());
+
+ // Sink the GT_COMMA below the blockNode addr.
+ GenTree* blockNodeAddr = blockNode->gtOp.gtOp1;
+ assert(blockNodeAddr->gtType == TYP_BYREF);
+ GenTree* commaNode = structVal;
+ commaNode->gtType = TYP_BYREF;
+ commaNode->gtOp.gtOp2 = blockNodeAddr;
+ blockNode->gtOp.gtOp1 = commaNode;
+ structVal = blockNode;
+ alreadyNormalized = true;
+ }
+ break;
+
+ default:
+ assert(!"Unexpected node in impNormStructVal()");
+ break;
+ }
+ structVal->gtType = structType;
+ GenTree* structObj = structVal;
+
+ if (!alreadyNormalized || forceNormalization)
+ {
+ if (makeTemp)
+ {
+ unsigned tmpNum = lvaGrabTemp(true DEBUGARG("struct address for call/obj"));
+
+ impAssignTempGen(tmpNum, structVal, structHnd, curLevel);
+
+ // The structVal is now the temp itself
+
+ structLcl = gtNewLclvNode(tmpNum, structType)->AsLclVarCommon();
+ // TODO-1stClassStructs: Avoid always wrapping in GT_OBJ.
+ structObj = gtNewObjNode(structHnd, gtNewOperNode(GT_ADDR, TYP_BYREF, structLcl));
+ }
+ else if (varTypeIsStruct(structType) && !structVal->OperIsBlk())
+ {
+ // Wrap it in a GT_OBJ
+ structObj = gtNewObjNode(structHnd, gtNewOperNode(GT_ADDR, TYP_BYREF, structVal));
+ }
+ }
+
+ if (structLcl != nullptr)
+ {
+ // A OBJ on a ADDR(LCL_VAR) can never raise an exception
+ // so we don't set GTF_EXCEPT here.
+ if (!lvaIsImplicitByRefLocal(structLcl->gtLclNum))
+ {
+ structObj->gtFlags &= ~GTF_GLOB_REF;
+ }
+ }
+ else
+ {
+ // In general a OBJ is an indirection and could raise an exception.
+ structObj->gtFlags |= GTF_EXCEPT;
+ }
+ return (structObj);
+}
+
+/******************************************************************************/
+// Given a type token, generate code that will evaluate to the correct
+// handle representation of that token (type handle, field handle, or method handle)
+//
+// For most cases, the handle is determined at compile-time, and the code
+// generated is simply an embedded handle.
+//
+// Run-time lookup is required if the enclosing method is shared between instantiations
+// and the token refers to formal type parameters whose instantiation is not known
+// at compile-time.
+//
+GenTreePtr Compiler::impTokenToHandle(CORINFO_RESOLVED_TOKEN* pResolvedToken,
+ BOOL* pRuntimeLookup /* = NULL */,
+ BOOL mustRestoreHandle /* = FALSE */,
+ BOOL importParent /* = FALSE */)
+{
+ assert(!fgGlobalMorph);
+
+ CORINFO_GENERICHANDLE_RESULT embedInfo;
+ info.compCompHnd->embedGenericHandle(pResolvedToken, importParent, &embedInfo);
+
+ if (pRuntimeLookup)
+ {
+ *pRuntimeLookup = embedInfo.lookup.lookupKind.needsRuntimeLookup;
+ }
+
+ if (mustRestoreHandle && !embedInfo.lookup.lookupKind.needsRuntimeLookup)
+ {
+ switch (embedInfo.handleType)
+ {
+ case CORINFO_HANDLETYPE_CLASS:
+ info.compCompHnd->classMustBeLoadedBeforeCodeIsRun((CORINFO_CLASS_HANDLE)embedInfo.compileTimeHandle);
+ break;
+
+ case CORINFO_HANDLETYPE_METHOD:
+ info.compCompHnd->methodMustBeLoadedBeforeCodeIsRun((CORINFO_METHOD_HANDLE)embedInfo.compileTimeHandle);
+ break;
+
+ case CORINFO_HANDLETYPE_FIELD:
+ info.compCompHnd->classMustBeLoadedBeforeCodeIsRun(
+ info.compCompHnd->getFieldClass((CORINFO_FIELD_HANDLE)embedInfo.compileTimeHandle));
+ break;
+
+ default:
+ break;
+ }
+ }
+
+ return impLookupToTree(pResolvedToken, &embedInfo.lookup, gtTokenToIconFlags(pResolvedToken->token),
+ embedInfo.compileTimeHandle);
+}
+
+GenTreePtr Compiler::impLookupToTree(CORINFO_RESOLVED_TOKEN* pResolvedToken,
+ CORINFO_LOOKUP* pLookup,
+ unsigned handleFlags,
+ void* compileTimeHandle)
+{
+ if (!pLookup->lookupKind.needsRuntimeLookup)
+ {
+ // No runtime lookup is required.
+ // Access is direct or memory-indirect (of a fixed address) reference
+
+ CORINFO_GENERIC_HANDLE handle = nullptr;
+ void* pIndirection = nullptr;
+ assert(pLookup->constLookup.accessType != IAT_PPVALUE);
+
+ if (pLookup->constLookup.accessType == IAT_VALUE)
+ {
+ handle = pLookup->constLookup.handle;
+ }
+ else if (pLookup->constLookup.accessType == IAT_PVALUE)
+ {
+ pIndirection = pLookup->constLookup.addr;
+ }
+ return gtNewIconEmbHndNode(handle, pIndirection, handleFlags, 0, nullptr, compileTimeHandle);
+ }
+ else if (compIsForInlining())
+ {
+ // Don't import runtime lookups when inlining
+ // Inlining has to be aborted in such a case
+ compInlineResult->NoteFatal(InlineObservation::CALLSITE_GENERIC_DICTIONARY_LOOKUP);
+ return nullptr;
+ }
+ else
+ {
+ // Need to use dictionary-based access which depends on the typeContext
+ // which is only available at runtime, not at compile-time.
+
+ return impRuntimeLookupToTree(pResolvedToken, pLookup, compileTimeHandle);
+ }
+}
+
+#ifdef FEATURE_READYTORUN_COMPILER
+GenTreePtr Compiler::impReadyToRunLookupToTree(CORINFO_CONST_LOOKUP* pLookup,
+ unsigned handleFlags,
+ void* compileTimeHandle)
+{
+ CORINFO_GENERIC_HANDLE handle = 0;
+ void* pIndirection = 0;
+ assert(pLookup->accessType != IAT_PPVALUE);
+
+ if (pLookup->accessType == IAT_VALUE)
+ handle = pLookup->handle;
+ else if (pLookup->accessType == IAT_PVALUE)
+ pIndirection = pLookup->addr;
+ return gtNewIconEmbHndNode(handle, pIndirection, handleFlags, 0, 0, compileTimeHandle);
+}
+
+GenTreePtr Compiler::impReadyToRunHelperToTree(
+ CORINFO_RESOLVED_TOKEN* pResolvedToken,
+ CorInfoHelpFunc helper,
+ var_types type,
+ GenTreeArgList* args /* =NULL*/,
+ CORINFO_LOOKUP_KIND* pGenericLookupKind /* =NULL. Only used with generics */)
+{
+ CORINFO_CONST_LOOKUP lookup;
+#if COR_JIT_EE_VERSION > 460
+ if (!info.compCompHnd->getReadyToRunHelper(pResolvedToken, pGenericLookupKind, helper, &lookup))
+ return NULL;
+#else
+ info.compCompHnd->getReadyToRunHelper(pResolvedToken, helper, &lookup);
+#endif
+
+ GenTreePtr op1 = gtNewHelperCallNode(helper, type, GTF_EXCEPT, args);
+
+ op1->gtCall.setEntryPoint(lookup);
+
+ return op1;
+}
+#endif
+
+GenTreePtr Compiler::impMethodPointer(CORINFO_RESOLVED_TOKEN* pResolvedToken, CORINFO_CALL_INFO* pCallInfo)
+{
+ GenTreePtr op1 = nullptr;
+
+ switch (pCallInfo->kind)
+ {
+ case CORINFO_CALL:
+ op1 = new (this, GT_FTN_ADDR) GenTreeFptrVal(TYP_I_IMPL, pCallInfo->hMethod);
+
+#ifdef FEATURE_READYTORUN_COMPILER
+ if (opts.IsReadyToRun())
+ {
+ op1->gtFptrVal.gtEntryPoint = pCallInfo->codePointerLookup.constLookup;
+ op1->gtFptrVal.gtLdftnResolvedToken = new (this, CMK_Unknown) CORINFO_RESOLVED_TOKEN;
+ *op1->gtFptrVal.gtLdftnResolvedToken = *pResolvedToken;
+ }
+ else
+ op1->gtFptrVal.gtEntryPoint.addr = nullptr;
+#endif
+ break;
+
+ case CORINFO_CALL_CODE_POINTER:
+ if (compIsForInlining())
+ {
+ // Don't import runtime lookups when inlining
+ // Inlining has to be aborted in such a case
+ compInlineResult->NoteFatal(InlineObservation::CALLSITE_GENERIC_DICTIONARY_LOOKUP);
+ return nullptr;
+ }
+
+ op1 = impLookupToTree(pResolvedToken, &pCallInfo->codePointerLookup, GTF_ICON_FTN_ADDR, pCallInfo->hMethod);
+ break;
+
+ default:
+ noway_assert(!"unknown call kind");
+ break;
+ }
+
+ return op1;
+}
+
+/*****************************************************************************/
+/* Import a dictionary lookup to access a handle in code shared between
+ generic instantiations.
+ The lookup depends on the typeContext which is only available at
+ runtime, and not at compile-time.
+ pLookup->token1 and pLookup->token2 specify the handle that is needed.
+ The cases are:
+
+ 1. pLookup->indirections == CORINFO_USEHELPER : Call a helper passing it the
+ instantiation-specific handle, and the tokens to lookup the handle.
+ 2. pLookup->indirections != CORINFO_USEHELPER :
+ 2a. pLookup->testForNull == false : Dereference the instantiation-specific handle
+ to get the handle.
+ 2b. pLookup->testForNull == true : Dereference the instantiation-specific handle.
+ If it is non-NULL, it is the handle required. Else, call a helper
+ to lookup the handle.
+ */
+
+GenTreePtr Compiler::impRuntimeLookupToTree(CORINFO_RESOLVED_TOKEN* pResolvedToken,
+ CORINFO_LOOKUP* pLookup,
+ void* compileTimeHandle)
+{
+ CORINFO_RUNTIME_LOOKUP_KIND kind = pLookup->lookupKind.runtimeLookupKind;
+ CORINFO_RUNTIME_LOOKUP* pRuntimeLookup = &pLookup->runtimeLookup;
+
+ // This method can only be called from the importer instance of the Compiler.
+ // In other word, it cannot be called by the instance of the Compiler for the inlinee.
+ assert(!compIsForInlining());
+
+ GenTreePtr ctxTree;
+
+ // Collectible types requires that for shared generic code, if we use the generic context parameter
+ // that we report it. (This is a conservative approach, we could detect some cases particularly when the
+ // context parameter is this that we don't need the eager reporting logic.)
+ lvaGenericsContextUsed = true;
+
+ if (kind == CORINFO_LOOKUP_THISOBJ)
+ {
+ // this Object
+ ctxTree = gtNewLclvNode(info.compThisArg, TYP_REF);
+
+ // Vtable pointer of this object
+ ctxTree = gtNewOperNode(GT_IND, TYP_I_IMPL, ctxTree);
+ ctxTree->gtFlags |= GTF_EXCEPT; // Null-pointer exception
+ ctxTree->gtFlags |= GTF_IND_INVARIANT;
+ }
+ else
+ {
+ assert(kind == CORINFO_LOOKUP_METHODPARAM || kind == CORINFO_LOOKUP_CLASSPARAM);
+
+ ctxTree = gtNewLclvNode(info.compTypeCtxtArg, TYP_I_IMPL); // Exact method descriptor as passed in as last arg
+ }
+
+#ifdef FEATURE_READYTORUN_COMPILER
+ if (opts.IsReadyToRun())
+ {
+ return impReadyToRunHelperToTree(pResolvedToken, CORINFO_HELP_READYTORUN_GENERIC_HANDLE, TYP_I_IMPL,
+ gtNewArgList(ctxTree), &pLookup->lookupKind);
+ }
+#endif
+
+ // It's available only via the run-time helper function
+ if (pRuntimeLookup->indirections == CORINFO_USEHELPER)
+ {
+ GenTreeArgList* helperArgs =
+ gtNewArgList(ctxTree, gtNewIconEmbHndNode(pRuntimeLookup->signature, nullptr, GTF_ICON_TOKEN_HDL, 0,
+ nullptr, compileTimeHandle));
+
+ return gtNewHelperCallNode(pRuntimeLookup->helper, TYP_I_IMPL, GTF_EXCEPT, helperArgs);
+ }
+
+ // Slot pointer
+ GenTreePtr slotPtrTree = ctxTree;
+
+ if (pRuntimeLookup->testForNull)
+ {
+ slotPtrTree = impCloneExpr(ctxTree, &ctxTree, NO_CLASS_HANDLE, (unsigned)CHECK_SPILL_ALL,
+ nullptr DEBUGARG("impRuntimeLookup slot"));
+ }
+
+ // Applied repeated indirections
+ for (WORD i = 0; i < pRuntimeLookup->indirections; i++)
+ {
+ if (i != 0)
+ {
+ slotPtrTree = gtNewOperNode(GT_IND, TYP_I_IMPL, slotPtrTree);
+ slotPtrTree->gtFlags |= GTF_IND_NONFAULTING;
+ slotPtrTree->gtFlags |= GTF_IND_INVARIANT;
+ }
+ if (pRuntimeLookup->offsets[i] != 0)
+ {
+ slotPtrTree =
+ gtNewOperNode(GT_ADD, TYP_I_IMPL, slotPtrTree, gtNewIconNode(pRuntimeLookup->offsets[i], TYP_I_IMPL));
+ }
+ }
+
+ // No null test required
+ if (!pRuntimeLookup->testForNull)
+ {
+ if (pRuntimeLookup->indirections == 0)
+ {
+ return slotPtrTree;
+ }
+
+ slotPtrTree = gtNewOperNode(GT_IND, TYP_I_IMPL, slotPtrTree);
+ slotPtrTree->gtFlags |= GTF_IND_NONFAULTING;
+
+ if (!pRuntimeLookup->testForFixup)
+ {
+ return slotPtrTree;
+ }
+
+ impSpillSideEffects(true, CHECK_SPILL_ALL DEBUGARG("bubbling QMark0"));
+
+ GenTreePtr op1 = impCloneExpr(slotPtrTree, &slotPtrTree, NO_CLASS_HANDLE, (unsigned)CHECK_SPILL_ALL,
+ nullptr DEBUGARG("impRuntimeLookup test"));
+ op1 = impImplicitIorI4Cast(op1, TYP_INT); // downcast the pointer to a TYP_INT on 64-bit targets
+
+ // Use a GT_AND to check for the lowest bit and indirect if it is set
+ GenTreePtr testTree = gtNewOperNode(GT_AND, TYP_INT, op1, gtNewIconNode(1));
+ GenTreePtr relop = gtNewOperNode(GT_EQ, TYP_INT, testTree, gtNewIconNode(0));
+ relop->gtFlags |= GTF_RELOP_QMARK;
+
+ op1 = impCloneExpr(slotPtrTree, &slotPtrTree, NO_CLASS_HANDLE, (unsigned)CHECK_SPILL_ALL,
+ nullptr DEBUGARG("impRuntimeLookup indir"));
+ op1 = gtNewOperNode(GT_ADD, TYP_I_IMPL, op1, gtNewIconNode(-1, TYP_I_IMPL)); // subtract 1 from the pointer
+ GenTreePtr indirTree = gtNewOperNode(GT_IND, TYP_I_IMPL, op1);
+ GenTreePtr colon = new (this, GT_COLON) GenTreeColon(TYP_I_IMPL, slotPtrTree, indirTree);
+
+ GenTreePtr qmark = gtNewQmarkNode(TYP_I_IMPL, relop, colon);
+
+ unsigned tmp = lvaGrabTemp(true DEBUGARG("spilling QMark0"));
+ impAssignTempGen(tmp, qmark, (unsigned)CHECK_SPILL_NONE);
+ return gtNewLclvNode(tmp, TYP_I_IMPL);
+ }
+
+ assert(pRuntimeLookup->indirections != 0);
+
+ impSpillSideEffects(true, CHECK_SPILL_ALL DEBUGARG("bubbling QMark1"));
+
+ // Extract the handle
+ GenTreePtr handle = gtNewOperNode(GT_IND, TYP_I_IMPL, slotPtrTree);
+ handle->gtFlags |= GTF_IND_NONFAULTING;
+
+ GenTreePtr handleCopy = impCloneExpr(handle, &handle, NO_CLASS_HANDLE, (unsigned)CHECK_SPILL_ALL,
+ nullptr DEBUGARG("impRuntimeLookup typehandle"));
+
+ // Call to helper
+ GenTreeArgList* helperArgs =
+ gtNewArgList(ctxTree, gtNewIconEmbHndNode(pRuntimeLookup->signature, nullptr, GTF_ICON_TOKEN_HDL, 0, nullptr,
+ compileTimeHandle));
+ GenTreePtr helperCall = gtNewHelperCallNode(pRuntimeLookup->helper, TYP_I_IMPL, GTF_EXCEPT, helperArgs);
+
+ // Check for null and possibly call helper
+ GenTreePtr relop = gtNewOperNode(GT_NE, TYP_INT, handle, gtNewIconNode(0, TYP_I_IMPL));
+ relop->gtFlags |= GTF_RELOP_QMARK;
+
+ GenTreePtr colon = new (this, GT_COLON) GenTreeColon(TYP_I_IMPL,
+ gtNewNothingNode(), // do nothing if nonnull
+ helperCall);
+
+ GenTreePtr qmark = gtNewQmarkNode(TYP_I_IMPL, relop, colon);
+
+ unsigned tmp;
+ if (handleCopy->IsLocal())
+ {
+ tmp = handleCopy->gtLclVarCommon.gtLclNum;
+ }
+ else
+ {
+ tmp = lvaGrabTemp(true DEBUGARG("spilling QMark1"));
+ }
+
+ impAssignTempGen(tmp, qmark, (unsigned)CHECK_SPILL_NONE);
+ return gtNewLclvNode(tmp, TYP_I_IMPL);
+}
+
+/******************************************************************************
+ * Spills the stack at verCurrentState.esStack[level] and replaces it with a temp.
+ * If tnum!=BAD_VAR_NUM, the temp var used to replace the tree is tnum,
+ * else, grab a new temp.
+ * For structs (which can be pushed on the stack using obj, etc),
+ * special handling is needed
+ */
+
+struct RecursiveGuard
+{
+public:
+ RecursiveGuard()
+ {
+ m_pAddress = nullptr;
+ }
+
+ ~RecursiveGuard()
+ {
+ if (m_pAddress)
+ {
+ *m_pAddress = false;
+ }
+ }
+
+ void Init(bool* pAddress, bool bInitialize)
+ {
+ assert(pAddress && *pAddress == false && "Recursive guard violation");
+ m_pAddress = pAddress;
+
+ if (bInitialize)
+ {
+ *m_pAddress = true;
+ }
+ }
+
+protected:
+ bool* m_pAddress;
+};
+
+bool Compiler::impSpillStackEntry(unsigned level,
+ unsigned tnum
+#ifdef DEBUG
+ ,
+ bool bAssertOnRecursion,
+ const char* reason
+#endif
+ )
+{
+
+#ifdef DEBUG
+ RecursiveGuard guard;
+ guard.Init(&impNestedStackSpill, bAssertOnRecursion);
+#endif
+
+ assert(!fgGlobalMorph); // use impInlineSpillStackEntry() during inlining
+
+ GenTreePtr tree = verCurrentState.esStack[level].val;
+
+ /* Allocate a temp if we haven't been asked to use a particular one */
+
+ if (tiVerificationNeeded)
+ {
+ // Ignore bad temp requests (they will happen with bad code and will be
+ // catched when importing the destblock)
+ if ((tnum != BAD_VAR_NUM && tnum >= lvaCount) && verNeedsVerification())
+ {
+ return false;
+ }
+ }
+ else
+ {
+ if (tnum != BAD_VAR_NUM && (tnum >= lvaCount))
+ {
+ return false;
+ }
+ }
+
+ if (tnum == BAD_VAR_NUM)
+ {
+ tnum = lvaGrabTemp(true DEBUGARG(reason));
+ }
+ else if (tiVerificationNeeded && lvaTable[tnum].TypeGet() != TYP_UNDEF)
+ {
+ // if verification is needed and tnum's type is incompatible with
+ // type on that stack, we grab a new temp. This is safe since
+ // we will throw a verification exception in the dest block.
+
+ var_types valTyp = tree->TypeGet();
+ var_types dstTyp = lvaTable[tnum].TypeGet();
+
+ // if the two types are different, we return. This will only happen with bad code and will
+ // be catched when importing the destblock. We still allow int/byrefs and float/double differences.
+ if ((genActualType(valTyp) != genActualType(dstTyp)) &&
+ !(
+#ifndef _TARGET_64BIT_
+ (valTyp == TYP_I_IMPL && dstTyp == TYP_BYREF) || (valTyp == TYP_BYREF && dstTyp == TYP_I_IMPL) ||
+#endif // !_TARGET_64BIT_
+ (varTypeIsFloating(dstTyp) && varTypeIsFloating(valTyp))))
+ {
+ if (verNeedsVerification())
+ {
+ return false;
+ }
+ }
+ }
+
+ /* Assign the spilled entry to the temp */
+ impAssignTempGen(tnum, tree, verCurrentState.esStack[level].seTypeInfo.GetClassHandle(), level);
+
+ // The tree type may be modified by impAssignTempGen, so use the type of the lclVar.
+ var_types type = genActualType(lvaTable[tnum].TypeGet());
+ GenTreePtr temp = gtNewLclvNode(tnum, type);
+ verCurrentState.esStack[level].val = temp;
+
+ return true;
+}
+
+/*****************************************************************************
+ *
+ * Ensure that the stack has only spilled values
+ */
+
+void Compiler::impSpillStackEnsure(bool spillLeaves)
+{
+ assert(!spillLeaves || opts.compDbgCode);
+
+ for (unsigned level = 0; level < verCurrentState.esStackDepth; level++)
+ {
+ GenTreePtr tree = verCurrentState.esStack[level].val;
+
+ if (!spillLeaves && tree->OperIsLeaf())
+ {
+ continue;
+ }
+
+ // Temps introduced by the importer itself don't need to be spilled
+
+ bool isTempLcl = (tree->OperGet() == GT_LCL_VAR) && (tree->gtLclVarCommon.gtLclNum >= info.compLocalsCount);
+
+ if (isTempLcl)
+ {
+ continue;
+ }
+
+ impSpillStackEntry(level, BAD_VAR_NUM DEBUGARG(false) DEBUGARG("impSpillStackEnsure"));
+ }
+}
+
+void Compiler::impSpillEvalStack()
+{
+ assert(!fgGlobalMorph); // use impInlineSpillEvalStack() during inlining
+
+ for (unsigned level = 0; level < verCurrentState.esStackDepth; level++)
+ {
+ impSpillStackEntry(level, BAD_VAR_NUM DEBUGARG(false) DEBUGARG("impSpillEvalStack"));
+ }
+}
+
+/*****************************************************************************
+ *
+ * If the stack contains any trees with side effects in them, assign those
+ * trees to temps and append the assignments to the statement list.
+ * On return the stack is guaranteed to be empty.
+ */
+
+inline void Compiler::impEvalSideEffects()
+{
+ impSpillSideEffects(false, (unsigned)CHECK_SPILL_ALL DEBUGARG("impEvalSideEffects"));
+ verCurrentState.esStackDepth = 0;
+}
+
+/*****************************************************************************
+ *
+ * If the stack contains any trees with side effects in them, assign those
+ * trees to temps and replace them on the stack with refs to their temps.
+ * [0..chkLevel) is the portion of the stack which will be checked and spilled.
+ */
+
+inline void Compiler::impSpillSideEffects(bool spillGlobEffects, unsigned chkLevel DEBUGARG(const char* reason))
+{
+ assert(chkLevel != (unsigned)CHECK_SPILL_NONE);
+
+ /* Before we make any appends to the tree list we must spill the
+ * "special" side effects (GTF_ORDER_SIDEEFF on a GT_CATCH_ARG) */
+
+ impSpillSpecialSideEff();
+
+ if (chkLevel == (unsigned)CHECK_SPILL_ALL)
+ {
+ chkLevel = verCurrentState.esStackDepth;
+ }
+
+ assert(chkLevel <= verCurrentState.esStackDepth);
+
+ unsigned spillFlags = spillGlobEffects ? GTF_GLOB_EFFECT : GTF_SIDE_EFFECT;
+
+ for (unsigned i = 0; i < chkLevel; i++)
+ {
+ GenTreePtr tree = verCurrentState.esStack[i].val;
+
+ GenTreePtr lclVarTree;
+
+ if ((tree->gtFlags & spillFlags) != 0 ||
+ (spillGlobEffects && // Only consider the following when spillGlobEffects == TRUE
+ !impIsAddressInLocal(tree, &lclVarTree) && // No need to spill the GT_ADDR node on a local.
+ gtHasLocalsWithAddrOp(tree))) // Spill if we still see GT_LCL_VAR that contains lvHasLdAddrOp or
+ // lvAddrTaken flag.
+ {
+ impSpillStackEntry(i, BAD_VAR_NUM DEBUGARG(false) DEBUGARG(reason));
+ }
+ }
+}
+
+/*****************************************************************************
+ *
+ * If the stack contains any trees with special side effects in them, assign
+ * those trees to temps and replace them on the stack with refs to their temps.
+ */
+
+inline void Compiler::impSpillSpecialSideEff()
+{
+ // Only exception objects need to be carefully handled
+
+ if (!compCurBB->bbCatchTyp)
+ {
+ return;
+ }
+
+ for (unsigned level = 0; level < verCurrentState.esStackDepth; level++)
+ {
+ GenTreePtr tree = verCurrentState.esStack[level].val;
+ // Make sure if we have an exception object in the sub tree we spill ourselves.
+ if (gtHasCatchArg(tree))
+ {
+ impSpillStackEntry(level, BAD_VAR_NUM DEBUGARG(false) DEBUGARG("impSpillSpecialSideEff"));
+ }
+ }
+}
+
+/*****************************************************************************
+ *
+ * Spill all stack references to value classes (TYP_STRUCT nodes)
+ */
+
+void Compiler::impSpillValueClasses()
+{
+ for (unsigned level = 0; level < verCurrentState.esStackDepth; level++)
+ {
+ GenTreePtr tree = verCurrentState.esStack[level].val;
+
+ if (fgWalkTreePre(&tree, impFindValueClasses) == WALK_ABORT)
+ {
+ // Tree walk was aborted, which means that we found a
+ // value class on the stack. Need to spill that
+ // stack entry.
+
+ impSpillStackEntry(level, BAD_VAR_NUM DEBUGARG(false) DEBUGARG("impSpillValueClasses"));
+ }
+ }
+}
+
+/*****************************************************************************
+ *
+ * Callback that checks if a tree node is TYP_STRUCT
+ */
+
+Compiler::fgWalkResult Compiler::impFindValueClasses(GenTreePtr* pTree, fgWalkData* data)
+{
+ fgWalkResult walkResult = WALK_CONTINUE;
+
+ if ((*pTree)->gtType == TYP_STRUCT)
+ {
+ // Abort the walk and indicate that we found a value class
+
+ walkResult = WALK_ABORT;
+ }
+
+ return walkResult;
+}
+
+/*****************************************************************************
+ *
+ * If the stack contains any trees with references to local #lclNum, assign
+ * those trees to temps and replace their place on the stack with refs to
+ * their temps.
+ */
+
+void Compiler::impSpillLclRefs(ssize_t lclNum)
+{
+ assert(!fgGlobalMorph); // use impInlineSpillLclRefs() during inlining
+
+ /* Before we make any appends to the tree list we must spill the
+ * "special" side effects (GTF_ORDER_SIDEEFF) - GT_CATCH_ARG */
+
+ impSpillSpecialSideEff();
+
+ for (unsigned level = 0; level < verCurrentState.esStackDepth; level++)
+ {
+ GenTreePtr tree = verCurrentState.esStack[level].val;
+
+ /* If the tree may throw an exception, and the block has a handler,
+ then we need to spill assignments to the local if the local is
+ live on entry to the handler.
+ Just spill 'em all without considering the liveness */
+
+ bool xcptnCaught = ehBlockHasExnFlowDsc(compCurBB) && (tree->gtFlags & (GTF_CALL | GTF_EXCEPT));
+
+ /* Skip the tree if it doesn't have an affected reference,
+ unless xcptnCaught */
+
+ if (xcptnCaught || gtHasRef(tree, lclNum, false))
+ {
+ impSpillStackEntry(level, BAD_VAR_NUM DEBUGARG(false) DEBUGARG("impSpillLclRefs"));
+ }
+ }
+}
+
+/*****************************************************************************
+ *
+ * Push catch arg onto the stack.
+ * If there are jumps to the beginning of the handler, insert basic block
+ * and spill catch arg to a temp. Update the handler block if necessary.
+ *
+ * Returns the basic block of the actual handler.
+ */
+
+BasicBlock* Compiler::impPushCatchArgOnStack(BasicBlock* hndBlk, CORINFO_CLASS_HANDLE clsHnd)
+{
+ // Do not inject the basic block twice on reimport. This should be
+ // hit only under JIT stress. See if the block is the one we injected.
+ // Note that EH canonicalization can inject internal blocks here. We might
+ // be able to re-use such a block (but we don't, right now).
+ if ((hndBlk->bbFlags & (BBF_IMPORTED | BBF_INTERNAL | BBF_DONT_REMOVE | BBF_HAS_LABEL | BBF_JMP_TARGET)) ==
+ (BBF_IMPORTED | BBF_INTERNAL | BBF_DONT_REMOVE | BBF_HAS_LABEL | BBF_JMP_TARGET))
+ {
+ GenTreePtr tree = hndBlk->bbTreeList;
+
+ if (tree != nullptr && tree->gtOper == GT_STMT)
+ {
+ tree = tree->gtStmt.gtStmtExpr;
+ assert(tree != nullptr);
+
+ if ((tree->gtOper == GT_ASG) && (tree->gtOp.gtOp1->gtOper == GT_LCL_VAR) &&
+ (tree->gtOp.gtOp2->gtOper == GT_CATCH_ARG))
+ {
+ tree = gtNewLclvNode(tree->gtOp.gtOp1->gtLclVarCommon.gtLclNum, TYP_REF);
+
+ impPushOnStack(tree, typeInfo(TI_REF, clsHnd));
+
+ return hndBlk->bbNext;
+ }
+ }
+
+ // If we get here, it must have been some other kind of internal block. It's possible that
+ // someone prepended something to our injected block, but that's unlikely.
+ }
+
+ /* Push the exception address value on the stack */
+ GenTreePtr arg = new (this, GT_CATCH_ARG) GenTree(GT_CATCH_ARG, TYP_REF);
+
+ /* Mark the node as having a side-effect - i.e. cannot be
+ * moved around since it is tied to a fixed location (EAX) */
+ arg->gtFlags |= GTF_ORDER_SIDEEFF;
+
+ /* Spill GT_CATCH_ARG to a temp if there are jumps to the beginning of the handler */
+ if (hndBlk->bbRefs > 1 || compStressCompile(STRESS_CATCH_ARG, 5))
+ {
+ if (hndBlk->bbRefs == 1)
+ {
+ hndBlk->bbRefs++;
+ }
+
+ /* Create extra basic block for the spill */
+ BasicBlock* newBlk = fgNewBBbefore(BBJ_NONE, hndBlk, /* extendRegion */ true);
+ newBlk->bbFlags |= BBF_IMPORTED | BBF_DONT_REMOVE | BBF_HAS_LABEL | BBF_JMP_TARGET;
+ newBlk->setBBWeight(hndBlk->bbWeight);
+ newBlk->bbCodeOffs = hndBlk->bbCodeOffs;
+
+ /* Account for the new link we are about to create */
+ hndBlk->bbRefs++;
+
+ /* Spill into a temp */
+ unsigned tempNum = lvaGrabTemp(false DEBUGARG("SpillCatchArg"));
+ lvaTable[tempNum].lvType = TYP_REF;
+ arg = gtNewTempAssign(tempNum, arg);
+
+ hndBlk->bbStkTempsIn = tempNum;
+
+ /* Report the debug info. impImportBlockCode won't treat
+ * the actual handler as exception block and thus won't do it for us. */
+ if (info.compStmtOffsetsImplicit & ICorDebugInfo::CALL_SITE_BOUNDARIES)
+ {
+ impCurStmtOffs = newBlk->bbCodeOffs | IL_OFFSETX_STKBIT;
+ arg = gtNewStmt(arg, impCurStmtOffs);
+ }
+
+ fgInsertStmtAtEnd(newBlk, arg);
+
+ arg = gtNewLclvNode(tempNum, TYP_REF);
+ }
+
+ impPushOnStack(arg, typeInfo(TI_REF, clsHnd));
+
+ return hndBlk;
+}
+
+/*****************************************************************************
+ *
+ * Given a tree, clone it. *pClone is set to the cloned tree.
+ * Returns the original tree if the cloning was easy,
+ * else returns the temp to which the tree had to be spilled to.
+ * If the tree has side-effects, it will be spilled to a temp.
+ */
+
+GenTreePtr Compiler::impCloneExpr(GenTreePtr tree,
+ GenTreePtr* pClone,
+ CORINFO_CLASS_HANDLE structHnd,
+ unsigned curLevel,
+ GenTreePtr* pAfterStmt DEBUGARG(const char* reason))
+{
+ if (!(tree->gtFlags & GTF_GLOB_EFFECT))
+ {
+ GenTreePtr clone = gtClone(tree, true);
+
+ if (clone)
+ {
+ *pClone = clone;
+ return tree;
+ }
+ }
+
+ /* Store the operand in a temp and return the temp */
+
+ unsigned temp = lvaGrabTemp(true DEBUGARG(reason));
+
+ // impAssignTempGen() may change tree->gtType to TYP_VOID for calls which
+ // return a struct type. It also may modify the struct type to a more
+ // specialized type (e.g. a SIMD type). So we will get the type from
+ // the lclVar AFTER calling impAssignTempGen().
+
+ impAssignTempGen(temp, tree, structHnd, curLevel, pAfterStmt, impCurStmtOffs);
+ var_types type = genActualType(lvaTable[temp].TypeGet());
+
+ *pClone = gtNewLclvNode(temp, type);
+ return gtNewLclvNode(temp, type);
+}
+
+/*****************************************************************************
+ * Remember the IL offset (including stack-empty info) for the trees we will
+ * generate now.
+ */
+
+inline void Compiler::impCurStmtOffsSet(IL_OFFSET offs)
+{
+ if (compIsForInlining())
+ {
+ GenTreePtr callStmt = impInlineInfo->iciStmt;
+ assert(callStmt->gtOper == GT_STMT);
+ impCurStmtOffs = callStmt->gtStmt.gtStmtILoffsx;
+ }
+ else
+ {
+ assert(offs == BAD_IL_OFFSET || (offs & IL_OFFSETX_BITS) == 0);
+ IL_OFFSETX stkBit = (verCurrentState.esStackDepth > 0) ? IL_OFFSETX_STKBIT : 0;
+ impCurStmtOffs = offs | stkBit;
+ }
+}
+
+/*****************************************************************************
+ * Returns current IL offset with stack-empty and call-instruction info incorporated
+ */
+inline IL_OFFSETX Compiler::impCurILOffset(IL_OFFSET offs, bool callInstruction)
+{
+ if (compIsForInlining())
+ {
+ return BAD_IL_OFFSET;
+ }
+ else
+ {
+ assert(offs == BAD_IL_OFFSET || (offs & IL_OFFSETX_BITS) == 0);
+ IL_OFFSETX stkBit = (verCurrentState.esStackDepth > 0) ? IL_OFFSETX_STKBIT : 0;
+ IL_OFFSETX callInstructionBit = callInstruction ? IL_OFFSETX_CALLINSTRUCTIONBIT : 0;
+ return offs | stkBit | callInstructionBit;
+ }
+}
+
+/*****************************************************************************
+ *
+ * Remember the instr offset for the statements
+ *
+ * When we do impAppendTree(tree), we can't set tree->gtStmtLastILoffs to
+ * impCurOpcOffs, if the append was done because of a partial stack spill,
+ * as some of the trees corresponding to code up to impCurOpcOffs might
+ * still be sitting on the stack.
+ * So we delay marking of gtStmtLastILoffs until impNoteLastILoffs().
+ * This should be called when an opcode finally/explicitly causes
+ * impAppendTree(tree) to be called (as opposed to being called because of
+ * a spill caused by the opcode)
+ */
+
+#ifdef DEBUG
+
+void Compiler::impNoteLastILoffs()
+{
+ if (impLastILoffsStmt == nullptr)
+ {
+ // We should have added a statement for the current basic block
+ // Is this assert correct ?
+
+ assert(impTreeLast);
+ assert(impTreeLast->gtOper == GT_STMT);
+
+ impTreeLast->gtStmt.gtStmtLastILoffs = compIsForInlining() ? BAD_IL_OFFSET : impCurOpcOffs;
+ }
+ else
+ {
+ impLastILoffsStmt->gtStmt.gtStmtLastILoffs = compIsForInlining() ? BAD_IL_OFFSET : impCurOpcOffs;
+ impLastILoffsStmt = nullptr;
+ }
+}
+
+#endif // DEBUG
+
+/*****************************************************************************
+ * We don't create any GenTree (excluding spills) for a branch.
+ * For debugging info, we need a placeholder so that we can note
+ * the IL offset in gtStmt.gtStmtOffs. So append an empty statement.
+ */
+
+void Compiler::impNoteBranchOffs()
+{
+ if (opts.compDbgCode)
+ {
+ impAppendTree(gtNewNothingNode(), (unsigned)CHECK_SPILL_NONE, impCurStmtOffs);
+ }
+}
+
+/*****************************************************************************
+ * Locate the next stmt boundary for which we need to record info.
+ * We will have to spill the stack at such boundaries if it is not
+ * already empty.
+ * Returns the next stmt boundary (after the start of the block)
+ */
+
+unsigned Compiler::impInitBlockLineInfo()
+{
+ /* Assume the block does not correspond with any IL offset. This prevents
+ us from reporting extra offsets. Extra mappings can cause confusing
+ stepping, especially if the extra mapping is a jump-target, and the
+ debugger does not ignore extra mappings, but instead rewinds to the
+ nearest known offset */
+
+ impCurStmtOffsSet(BAD_IL_OFFSET);
+
+ if (compIsForInlining())
+ {
+ return ~0;
+ }
+
+ IL_OFFSET blockOffs = compCurBB->bbCodeOffs;
+
+ if ((verCurrentState.esStackDepth == 0) && (info.compStmtOffsetsImplicit & ICorDebugInfo::STACK_EMPTY_BOUNDARIES))
+ {
+ impCurStmtOffsSet(blockOffs);
+ }
+
+ if (false && (info.compStmtOffsetsImplicit & ICorDebugInfo::CALL_SITE_BOUNDARIES))
+ {
+ impCurStmtOffsSet(blockOffs);
+ }
+
+ /* Always report IL offset 0 or some tests get confused.
+ Probably a good idea anyways */
+
+ if (blockOffs == 0)
+ {
+ impCurStmtOffsSet(blockOffs);
+ }
+
+ if (!info.compStmtOffsetsCount)
+ {
+ return ~0;
+ }
+
+ /* Find the lowest explicit stmt boundary within the block */
+
+ /* Start looking at an entry that is based on our instr offset */
+
+ unsigned index = (info.compStmtOffsetsCount * blockOffs) / info.compILCodeSize;
+
+ if (index >= info.compStmtOffsetsCount)
+ {
+ index = info.compStmtOffsetsCount - 1;
+ }
+
+ /* If we've guessed too far, back up */
+
+ while (index > 0 && info.compStmtOffsets[index - 1] >= blockOffs)
+ {
+ index--;
+ }
+
+ /* If we guessed short, advance ahead */
+
+ while (info.compStmtOffsets[index] < blockOffs)
+ {
+ index++;
+
+ if (index == info.compStmtOffsetsCount)
+ {
+ return info.compStmtOffsetsCount;
+ }
+ }
+
+ assert(index < info.compStmtOffsetsCount);
+
+ if (info.compStmtOffsets[index] == blockOffs)
+ {
+ /* There is an explicit boundary for the start of this basic block.
+ So we will start with bbCodeOffs. Else we will wait until we
+ get to the next explicit boundary */
+
+ impCurStmtOffsSet(blockOffs);
+
+ index++;
+ }
+
+ return index;
+}
+
+/*****************************************************************************/
+
+static inline bool impOpcodeIsCallOpcode(OPCODE opcode)
+{
+ switch (opcode)
+ {
+ case CEE_CALL:
+ case CEE_CALLI:
+ case CEE_CALLVIRT:
+ return true;
+
+ default:
+ return false;
+ }
+}
+
+/*****************************************************************************/
+#ifdef DEBUGGING_SUPPORT
+
+static inline bool impOpcodeIsCallSiteBoundary(OPCODE opcode)
+{
+ switch (opcode)
+ {
+ case CEE_CALL:
+ case CEE_CALLI:
+ case CEE_CALLVIRT:
+ case CEE_JMP:
+ case CEE_NEWOBJ:
+ case CEE_NEWARR:
+ return true;
+
+ default:
+ return false;
+ }
+}
+
+#endif // DEBUGGING_SUPPORT
+
+/*****************************************************************************/
+
+// One might think it is worth caching these values, but results indicate
+// that it isn't.
+// In addition, caching them causes SuperPMI to be unable to completely
+// encapsulate an individual method context.
+CORINFO_CLASS_HANDLE Compiler::impGetRefAnyClass()
+{
+ CORINFO_CLASS_HANDLE refAnyClass = info.compCompHnd->getBuiltinClass(CLASSID_TYPED_BYREF);
+ assert(refAnyClass != (CORINFO_CLASS_HANDLE) nullptr);
+ return refAnyClass;
+}
+
+CORINFO_CLASS_HANDLE Compiler::impGetTypeHandleClass()
+{
+ CORINFO_CLASS_HANDLE typeHandleClass = info.compCompHnd->getBuiltinClass(CLASSID_TYPE_HANDLE);
+ assert(typeHandleClass != (CORINFO_CLASS_HANDLE) nullptr);
+ return typeHandleClass;
+}
+
+CORINFO_CLASS_HANDLE Compiler::impGetRuntimeArgumentHandle()
+{
+ CORINFO_CLASS_HANDLE argIteratorClass = info.compCompHnd->getBuiltinClass(CLASSID_ARGUMENT_HANDLE);
+ assert(argIteratorClass != (CORINFO_CLASS_HANDLE) nullptr);
+ return argIteratorClass;
+}
+
+CORINFO_CLASS_HANDLE Compiler::impGetStringClass()
+{
+ CORINFO_CLASS_HANDLE stringClass = info.compCompHnd->getBuiltinClass(CLASSID_STRING);
+ assert(stringClass != (CORINFO_CLASS_HANDLE) nullptr);
+ return stringClass;
+}
+
+CORINFO_CLASS_HANDLE Compiler::impGetObjectClass()
+{
+ CORINFO_CLASS_HANDLE objectClass = info.compCompHnd->getBuiltinClass(CLASSID_SYSTEM_OBJECT);
+ assert(objectClass != (CORINFO_CLASS_HANDLE) nullptr);
+ return objectClass;
+}
+
+/*****************************************************************************
+ * "&var" can be used either as TYP_BYREF or TYP_I_IMPL, but we
+ * set its type to TYP_BYREF when we create it. We know if it can be
+ * changed to TYP_I_IMPL only at the point where we use it
+ */
+
+/* static */
+void Compiler::impBashVarAddrsToI(GenTreePtr tree1, GenTreePtr tree2)
+{
+ if (tree1->IsVarAddr())
+ {
+ tree1->gtType = TYP_I_IMPL;
+ }
+
+ if (tree2 && tree2->IsVarAddr())
+ {
+ tree2->gtType = TYP_I_IMPL;
+ }
+}
+
+/*****************************************************************************
+ * TYP_INT and TYP_I_IMPL can be used almost interchangeably, but we want
+ * to make that an explicit cast in our trees, so any implicit casts that
+ * exist in the IL (at least on 64-bit where TYP_I_IMPL != TYP_INT) are
+ * turned into explicit casts here.
+ * We also allow an implicit conversion of a ldnull into a TYP_I_IMPL(0)
+ */
+
+GenTreePtr Compiler::impImplicitIorI4Cast(GenTreePtr tree, var_types dstTyp)
+{
+ var_types currType = genActualType(tree->gtType);
+ var_types wantedType = genActualType(dstTyp);
+
+ if (wantedType != currType)
+ {
+ // Automatic upcast for a GT_CNS_INT into TYP_I_IMPL
+ if ((tree->OperGet() == GT_CNS_INT) && varTypeIsI(dstTyp))
+ {
+ if (!varTypeIsI(tree->gtType) || ((tree->gtType == TYP_REF) && (tree->gtIntCon.gtIconVal == 0)))
+ {
+ tree->gtType = TYP_I_IMPL;
+ }
+ }
+#ifdef _TARGET_64BIT_
+ else if (varTypeIsI(wantedType) && (currType == TYP_INT))
+ {
+ // Note that this allows TYP_INT to be cast to a TYP_I_IMPL when wantedType is a TYP_BYREF or TYP_REF
+ tree = gtNewCastNode(TYP_I_IMPL, tree, TYP_I_IMPL);
+ }
+ else if ((wantedType == TYP_INT) && varTypeIsI(currType))
+ {
+ // Note that this allows TYP_BYREF or TYP_REF to be cast to a TYP_INT
+ tree = gtNewCastNode(TYP_INT, tree, TYP_INT);
+ }
+#endif // _TARGET_64BIT_
+ }
+
+ return tree;
+}
+
+/*****************************************************************************
+ * TYP_FLOAT and TYP_DOUBLE can be used almost interchangeably in some cases,
+ * but we want to make that an explicit cast in our trees, so any implicit casts
+ * that exist in the IL are turned into explicit casts here.
+ */
+
+GenTreePtr Compiler::impImplicitR4orR8Cast(GenTreePtr tree, var_types dstTyp)
+{
+#ifndef LEGACY_BACKEND
+ if (varTypeIsFloating(tree) && varTypeIsFloating(dstTyp) && (dstTyp != tree->gtType))
+ {
+ tree = gtNewCastNode(dstTyp, tree, dstTyp);
+ }
+#endif // !LEGACY_BACKEND
+
+ return tree;
+}
+
+/*****************************************************************************/
+BOOL Compiler::impLocAllocOnStack()
+{
+ if (!compLocallocUsed)
+ {
+ return (FALSE);
+ }
+
+ // Returns true if a GT_LCLHEAP node is encountered in any of the trees
+ // that have been pushed on the importer evaluatuion stack.
+ //
+ for (unsigned i = 0; i < verCurrentState.esStackDepth; i++)
+ {
+ if (fgWalkTreePre(&verCurrentState.esStack[i].val, Compiler::fgChkLocAllocCB) == WALK_ABORT)
+ {
+ return (TRUE);
+ }
+ }
+ return (FALSE);
+}
+
+//------------------------------------------------------------------------
+// impInitializeArrayIntrinsic: Attempts to replace a call to InitializeArray
+// with a GT_COPYBLK node.
+//
+// Arguments:
+// sig - The InitializeArray signature.
+//
+// Return Value:
+// A pointer to the newly created GT_COPYBLK node if the replacement succeeds or
+// nullptr otherwise.
+//
+// Notes:
+// The function recognizes the following IL pattern:
+// ldc <length> or a list of ldc <lower bound>/<length>
+// newarr or newobj
+// dup
+// ldtoken <field handle>
+// call InitializeArray
+// The lower bounds need not be constant except when the array rank is 1.
+// The function recognizes all kinds of arrays thus enabling a small runtime
+// such as CoreRT to skip providing an implementation for InitializeArray.
+
+GenTreePtr Compiler::impInitializeArrayIntrinsic(CORINFO_SIG_INFO* sig)
+{
+ assert(sig->numArgs == 2);
+
+ GenTreePtr fieldTokenNode = impStackTop(0).val;
+ GenTreePtr arrayLocalNode = impStackTop(1).val;
+
+ //
+ // Verify that the field token is known and valid. Note that It's also
+ // possible for the token to come from reflection, in which case we cannot do
+ // the optimization and must therefore revert to calling the helper. You can
+ // see an example of this in bvt\DynIL\initarray2.exe (in Main).
+ //
+
+ // Check to see if the ldtoken helper call is what we see here.
+ if (fieldTokenNode->gtOper != GT_CALL || (fieldTokenNode->gtCall.gtCallType != CT_HELPER) ||
+ (fieldTokenNode->gtCall.gtCallMethHnd != eeFindHelper(CORINFO_HELP_FIELDDESC_TO_STUBRUNTIMEFIELD)))
+ {
+ return nullptr;
+ }
+
+ // Strip helper call away
+ fieldTokenNode = fieldTokenNode->gtCall.gtCallArgs->Current();
+
+ if (fieldTokenNode->gtOper == GT_IND)
+ {
+ fieldTokenNode = fieldTokenNode->gtOp.gtOp1;
+ }
+
+ // Check for constant
+ if (fieldTokenNode->gtOper != GT_CNS_INT)
+ {
+ return nullptr;
+ }
+
+ CORINFO_FIELD_HANDLE fieldToken = (CORINFO_FIELD_HANDLE)fieldTokenNode->gtIntCon.gtCompileTimeHandle;
+ if (!fieldTokenNode->IsIconHandle(GTF_ICON_FIELD_HDL) || (fieldToken == nullptr))
+ {
+ return nullptr;
+ }
+
+ //
+ // We need to get the number of elements in the array and the size of each element.
+ // We verify that the newarr statement is exactly what we expect it to be.
+ // If it's not then we just return NULL and we don't optimize this call
+ //
+
+ //
+ // It is possible the we don't have any statements in the block yet
+ //
+ if (impTreeLast->gtOper != GT_STMT)
+ {
+ assert(impTreeLast->gtOper == GT_BEG_STMTS);
+ return nullptr;
+ }
+
+ //
+ // We start by looking at the last statement, making sure it's an assignment, and
+ // that the target of the assignment is the array passed to InitializeArray.
+ //
+ GenTreePtr arrayAssignment = impTreeLast->gtStmt.gtStmtExpr;
+ if ((arrayAssignment->gtOper != GT_ASG) || (arrayAssignment->gtOp.gtOp1->gtOper != GT_LCL_VAR) ||
+ (arrayLocalNode->gtOper != GT_LCL_VAR) ||
+ (arrayAssignment->gtOp.gtOp1->gtLclVarCommon.gtLclNum != arrayLocalNode->gtLclVarCommon.gtLclNum))
+ {
+ return nullptr;
+ }
+
+ //
+ // Make sure that the object being assigned is a helper call.
+ //
+
+ GenTreePtr newArrayCall = arrayAssignment->gtOp.gtOp2;
+ if ((newArrayCall->gtOper != GT_CALL) || (newArrayCall->gtCall.gtCallType != CT_HELPER))
+ {
+ return nullptr;
+ }
+
+ //
+ // Verify that it is one of the new array helpers.
+ //
+
+ bool isMDArray = false;
+
+ if (newArrayCall->gtCall.gtCallMethHnd != eeFindHelper(CORINFO_HELP_NEWARR_1_DIRECT) &&
+ newArrayCall->gtCall.gtCallMethHnd != eeFindHelper(CORINFO_HELP_NEWARR_1_OBJ) &&
+ newArrayCall->gtCall.gtCallMethHnd != eeFindHelper(CORINFO_HELP_NEWARR_1_VC) &&
+ newArrayCall->gtCall.gtCallMethHnd != eeFindHelper(CORINFO_HELP_NEWARR_1_ALIGN8)
+#ifdef FEATURE_READYTORUN_COMPILER
+ && newArrayCall->gtCall.gtCallMethHnd != eeFindHelper(CORINFO_HELP_READYTORUN_NEWARR_1)
+#endif
+ )
+ {
+#if COR_JIT_EE_VERSION > 460
+ if (newArrayCall->gtCall.gtCallMethHnd != eeFindHelper(CORINFO_HELP_NEW_MDARR_NONVARARG))
+ {
+ return nullptr;
+ }
+
+ isMDArray = true;
+#endif
+ }
+
+ CORINFO_CLASS_HANDLE arrayClsHnd = (CORINFO_CLASS_HANDLE)newArrayCall->gtCall.compileTimeHelperArgumentHandle;
+
+ //
+ // Make sure we found a compile time handle to the array
+ //
+
+ if (!arrayClsHnd)
+ {
+ return nullptr;
+ }
+
+ unsigned rank = 0;
+ S_UINT32 numElements;
+
+ if (isMDArray)
+ {
+ rank = info.compCompHnd->getArrayRank(arrayClsHnd);
+
+ if (rank == 0)
+ {
+ return nullptr;
+ }
+
+ GenTreeArgList* tokenArg = newArrayCall->gtCall.gtCallArgs;
+ assert(tokenArg != nullptr);
+ GenTreeArgList* numArgsArg = tokenArg->Rest();
+ assert(numArgsArg != nullptr);
+ GenTreeArgList* argsArg = numArgsArg->Rest();
+ assert(argsArg != nullptr);
+
+ //
+ // The number of arguments should be a constant between 1 and 64. The rank can't be 0
+ // so at least one length must be present and the rank can't exceed 32 so there can
+ // be at most 64 arguments - 32 lengths and 32 lower bounds.
+ //
+
+ if ((!numArgsArg->Current()->IsCnsIntOrI()) || (numArgsArg->Current()->AsIntCon()->IconValue() < 1) ||
+ (numArgsArg->Current()->AsIntCon()->IconValue() > 64))
+ {
+ return nullptr;
+ }
+
+ unsigned numArgs = static_cast<unsigned>(numArgsArg->Current()->AsIntCon()->IconValue());
+ bool lowerBoundsSpecified;
+
+ if (numArgs == rank * 2)
+ {
+ lowerBoundsSpecified = true;
+ }
+ else if (numArgs == rank)
+ {
+ lowerBoundsSpecified = false;
+
+ //
+ // If the rank is 1 and a lower bound isn't specified then the runtime creates
+ // a SDArray. Note that even if a lower bound is specified it can be 0 and then
+ // we get a SDArray as well, see the for loop below.
+ //
+
+ if (rank == 1)
+ {
+ isMDArray = false;
+ }
+ }
+ else
+ {
+ return nullptr;
+ }
+
+ //
+ // The rank is known to be at least 1 so we can start with numElements being 1
+ // to avoid the need to special case the first dimension.
+ //
+
+ numElements = S_UINT32(1);
+
+ struct Match
+ {
+ static bool IsArgsFieldInit(GenTree* tree, unsigned index, unsigned lvaNewObjArrayArgs)
+ {
+ return (tree->OperGet() == GT_ASG) && IsArgsFieldIndir(tree->gtGetOp1(), index, lvaNewObjArrayArgs) &&
+ IsArgsAddr(tree->gtGetOp1()->gtGetOp1()->gtGetOp1(), lvaNewObjArrayArgs);
+ }
+
+ static bool IsArgsFieldIndir(GenTree* tree, unsigned index, unsigned lvaNewObjArrayArgs)
+ {
+ return (tree->OperGet() == GT_IND) && (tree->gtGetOp1()->OperGet() == GT_ADD) &&
+ (tree->gtGetOp1()->gtGetOp2()->IsIntegralConst(sizeof(INT32) * index)) &&
+ IsArgsAddr(tree->gtGetOp1()->gtGetOp1(), lvaNewObjArrayArgs);
+ }
+
+ static bool IsArgsAddr(GenTree* tree, unsigned lvaNewObjArrayArgs)
+ {
+ return (tree->OperGet() == GT_ADDR) && (tree->gtGetOp1()->OperGet() == GT_LCL_VAR) &&
+ (tree->gtGetOp1()->AsLclVar()->GetLclNum() == lvaNewObjArrayArgs);
+ }
+
+ static bool IsComma(GenTree* tree)
+ {
+ return (tree != nullptr) && (tree->OperGet() == GT_COMMA);
+ }
+ };
+
+ unsigned argIndex = 0;
+ GenTree* comma;
+
+ for (comma = argsArg->Current(); Match::IsComma(comma); comma = comma->gtGetOp2())
+ {
+ if (lowerBoundsSpecified)
+ {
+ //
+ // In general lower bounds can be ignored because they're not needed to
+ // calculate the total number of elements. But for single dimensional arrays
+ // we need to know if the lower bound is 0 because in this case the runtime
+ // creates a SDArray and this affects the way the array data offset is calculated.
+ //
+
+ if (rank == 1)
+ {
+ GenTree* lowerBoundAssign = comma->gtGetOp1();
+ assert(Match::IsArgsFieldInit(lowerBoundAssign, argIndex, lvaNewObjArrayArgs));
+ GenTree* lowerBoundNode = lowerBoundAssign->gtGetOp2();
+
+ if (lowerBoundNode->IsIntegralConst(0))
+ {
+ isMDArray = false;
+ }
+ }
+
+ comma = comma->gtGetOp2();
+ argIndex++;
+ }
+
+ GenTree* lengthNodeAssign = comma->gtGetOp1();
+ assert(Match::IsArgsFieldInit(lengthNodeAssign, argIndex, lvaNewObjArrayArgs));
+ GenTree* lengthNode = lengthNodeAssign->gtGetOp2();
+
+ if (!lengthNode->IsCnsIntOrI())
+ {
+ return nullptr;
+ }
+
+ numElements *= S_SIZE_T(lengthNode->AsIntCon()->IconValue());
+ argIndex++;
+ }
+
+ assert((comma != nullptr) && Match::IsArgsAddr(comma, lvaNewObjArrayArgs));
+
+ if (argIndex != numArgs)
+ {
+ return nullptr;
+ }
+ }
+ else
+ {
+ //
+ // Make sure there are exactly two arguments: the array class and
+ // the number of elements.
+ //
+
+ GenTreePtr arrayLengthNode;
+
+ GenTreeArgList* args = newArrayCall->gtCall.gtCallArgs;
+#ifdef FEATURE_READYTORUN_COMPILER
+ if (newArrayCall->gtCall.gtCallMethHnd == eeFindHelper(CORINFO_HELP_READYTORUN_NEWARR_1))
+ {
+ // Array length is 1st argument for readytorun helper
+ arrayLengthNode = args->Current();
+ }
+ else
+#endif
+ {
+ // Array length is 2nd argument for regular helper
+ arrayLengthNode = args->Rest()->Current();
+ }
+
+ //
+ // Make sure that the number of elements look valid.
+ //
+ if (arrayLengthNode->gtOper != GT_CNS_INT)
+ {
+ return nullptr;
+ }
+
+ numElements = S_SIZE_T(arrayLengthNode->gtIntCon.gtIconVal);
+
+ if (!info.compCompHnd->isSDArray(arrayClsHnd))
+ {
+ return nullptr;
+ }
+ }
+
+ CORINFO_CLASS_HANDLE elemClsHnd;
+ var_types elementType = JITtype2varType(info.compCompHnd->getChildType(arrayClsHnd, &elemClsHnd));
+
+ //
+ // Note that genTypeSize will return zero for non primitive types, which is exactly
+ // what we want (size will then be 0, and we will catch this in the conditional below).
+ // Note that we don't expect this to fail for valid binaries, so we assert in the
+ // non-verification case (the verification case should not assert but rather correctly
+ // handle bad binaries). This assert is not guarding any specific invariant, but rather
+ // saying that we don't expect this to happen, and if it is hit, we need to investigate
+ // why.
+ //
+
+ S_UINT32 elemSize(genTypeSize(elementType));
+ S_UINT32 size = elemSize * S_UINT32(numElements);
+
+ if (size.IsOverflow())
+ {
+ return nullptr;
+ }
+
+ if ((size.Value() == 0) || (varTypeIsGC(elementType)))
+ {
+ assert(verNeedsVerification());
+ return nullptr;
+ }
+
+ void* initData = info.compCompHnd->getArrayInitializationData(fieldToken, size.Value());
+ if (!initData)
+ {
+ return nullptr;
+ }
+
+ //
+ // At this point we are ready to commit to implementing the InitializeArray
+ // intrinsic using a struct assignment. Pop the arguments from the stack and
+ // return the struct assignment node.
+ //
+
+ impPopStack();
+ impPopStack();
+
+ const unsigned blkSize = size.Value();
+ GenTreePtr dst;
+
+ if (isMDArray)
+ {
+ unsigned dataOffset = eeGetMDArrayDataOffset(elementType, rank);
+
+ dst = gtNewOperNode(GT_ADD, TYP_BYREF, arrayLocalNode, gtNewIconNode(dataOffset, TYP_I_IMPL));
+ }
+ else
+ {
+ dst = gtNewOperNode(GT_ADDR, TYP_BYREF, gtNewIndexRef(elementType, arrayLocalNode, gtNewIconNode(0)));
+ }
+ GenTreePtr blk = gtNewBlockVal(dst, blkSize);
+ GenTreePtr srcAddr = gtNewIconHandleNode((size_t)initData, GTF_ICON_STATIC_HDL);
+ GenTreePtr src = gtNewOperNode(GT_IND, TYP_STRUCT, srcAddr);
+
+ return gtNewBlkOpNode(blk, // dst
+ src, // src
+ blkSize, // size
+ false, // volatil
+ true); // copyBlock
+}
+
+/*****************************************************************************/
+// Returns the GenTree that should be used to do the intrinsic instead of the call.
+// Returns NULL if an intrinsic cannot be used
+
+GenTreePtr Compiler::impIntrinsic(CORINFO_CLASS_HANDLE clsHnd,
+ CORINFO_METHOD_HANDLE method,
+ CORINFO_SIG_INFO* sig,
+ int memberRef,
+ bool readonlyCall,
+ bool tailCall,
+ CorInfoIntrinsics* pIntrinsicID)
+{
+ bool mustExpand = false;
+#if COR_JIT_EE_VERSION > 460
+ CorInfoIntrinsics intrinsicID = info.compCompHnd->getIntrinsicID(method, &mustExpand);
+#else
+ CorInfoIntrinsics intrinsicID = info.compCompHnd->getIntrinsicID(method);
+#endif
+ *pIntrinsicID = intrinsicID;
+
+#ifndef _TARGET_ARM_
+ genTreeOps interlockedOperator;
+#endif
+
+ if (intrinsicID == CORINFO_INTRINSIC_StubHelpers_GetStubContext)
+ {
+ // must be done regardless of DbgCode and MinOpts
+ return gtNewLclvNode(lvaStubArgumentVar, TYP_I_IMPL);
+ }
+#ifdef _TARGET_64BIT_
+ if (intrinsicID == CORINFO_INTRINSIC_StubHelpers_GetStubContextAddr)
+ {
+ // must be done regardless of DbgCode and MinOpts
+ return gtNewOperNode(GT_ADDR, TYP_I_IMPL, gtNewLclvNode(lvaStubArgumentVar, TYP_I_IMPL));
+ }
+#else
+ assert(intrinsicID != CORINFO_INTRINSIC_StubHelpers_GetStubContextAddr);
+#endif
+
+ GenTreePtr retNode = nullptr;
+
+ //
+ // We disable the inlining of instrinsics for MinOpts.
+ //
+ if (!mustExpand && (opts.compDbgCode || opts.MinOpts()))
+ {
+ *pIntrinsicID = CORINFO_INTRINSIC_Illegal;
+ return retNode;
+ }
+
+ // Currently we don't have CORINFO_INTRINSIC_Exp because it does not
+ // seem to work properly for Infinity values, we don't do
+ // CORINFO_INTRINSIC_Pow because it needs a Helper which we currently don't have
+
+ var_types callType = JITtype2varType(sig->retType);
+
+ /* First do the intrinsics which are always smaller than a call */
+
+ switch (intrinsicID)
+ {
+ GenTreePtr op1, op2;
+
+ case CORINFO_INTRINSIC_Sin:
+ case CORINFO_INTRINSIC_Sqrt:
+ case CORINFO_INTRINSIC_Abs:
+ case CORINFO_INTRINSIC_Cos:
+ case CORINFO_INTRINSIC_Round:
+ case CORINFO_INTRINSIC_Cosh:
+ case CORINFO_INTRINSIC_Sinh:
+ case CORINFO_INTRINSIC_Tan:
+ case CORINFO_INTRINSIC_Tanh:
+ case CORINFO_INTRINSIC_Asin:
+ case CORINFO_INTRINSIC_Acos:
+ case CORINFO_INTRINSIC_Atan:
+ case CORINFO_INTRINSIC_Atan2:
+ case CORINFO_INTRINSIC_Log10:
+ case CORINFO_INTRINSIC_Pow:
+ case CORINFO_INTRINSIC_Exp:
+ case CORINFO_INTRINSIC_Ceiling:
+ case CORINFO_INTRINSIC_Floor:
+
+ // These are math intrinsics
+
+ assert(callType != TYP_STRUCT);
+
+ op1 = nullptr;
+
+#ifdef LEGACY_BACKEND
+ if (IsTargetIntrinsic(intrinsicID))
+#else
+ // Intrinsics that are not implemented directly by target instructions will
+ // be re-materialized as users calls in rationalizer. For prefixed tail calls,
+ // don't do this optimization, because
+ // a) For back compatibility reasons on desktop.Net 4.6 / 4.6.1
+ // b) It will be non-trivial task or too late to re-materialize a surviving
+ // tail prefixed GT_INTRINSIC as tail call in rationalizer.
+ if (!IsIntrinsicImplementedByUserCall(intrinsicID) || !tailCall)
+#endif
+ {
+ switch (sig->numArgs)
+ {
+ case 1:
+ op1 = impPopStack().val;
+
+#if FEATURE_X87_DOUBLES
+
+ // X87 stack doesn't differentiate between float/double
+ // so it doesn't need a cast, but everybody else does
+ // Just double check it is at least a FP type
+ noway_assert(varTypeIsFloating(op1));
+
+#else // FEATURE_X87_DOUBLES
+
+ if (op1->TypeGet() != callType)
+ {
+ op1 = gtNewCastNode(callType, op1, callType);
+ }
+
+#endif // FEATURE_X87_DOUBLES
+
+ op1 = new (this, GT_INTRINSIC)
+ GenTreeIntrinsic(genActualType(callType), op1, intrinsicID, method);
+ break;
+
+ case 2:
+ op2 = impPopStack().val;
+ op1 = impPopStack().val;
+
+#if FEATURE_X87_DOUBLES
+
+ // X87 stack doesn't differentiate between float/double
+ // so it doesn't need a cast, but everybody else does
+ // Just double check it is at least a FP type
+ noway_assert(varTypeIsFloating(op2));
+ noway_assert(varTypeIsFloating(op1));
+
+#else // FEATURE_X87_DOUBLES
+
+ if (op2->TypeGet() != callType)
+ {
+ op2 = gtNewCastNode(callType, op2, callType);
+ }
+ if (op1->TypeGet() != callType)
+ {
+ op1 = gtNewCastNode(callType, op1, callType);
+ }
+
+#endif // FEATURE_X87_DOUBLES
+
+ op1 = new (this, GT_INTRINSIC)
+ GenTreeIntrinsic(genActualType(callType), op1, op2, intrinsicID, method);
+ break;
+
+ default:
+ NO_WAY("Unsupported number of args for Math Instrinsic");
+ }
+
+#ifndef LEGACY_BACKEND
+ if (IsIntrinsicImplementedByUserCall(intrinsicID))
+ {
+ op1->gtFlags |= GTF_CALL;
+ }
+#endif
+ }
+
+ retNode = op1;
+ break;
+
+#ifdef _TARGET_XARCH_
+ // TODO-ARM-CQ: reenable treating Interlocked operation as intrinsic
+ case CORINFO_INTRINSIC_InterlockedAdd32:
+ interlockedOperator = GT_LOCKADD;
+ goto InterlockedBinOpCommon;
+ case CORINFO_INTRINSIC_InterlockedXAdd32:
+ interlockedOperator = GT_XADD;
+ goto InterlockedBinOpCommon;
+ case CORINFO_INTRINSIC_InterlockedXchg32:
+ interlockedOperator = GT_XCHG;
+ goto InterlockedBinOpCommon;
+
+#ifdef _TARGET_AMD64_
+ case CORINFO_INTRINSIC_InterlockedAdd64:
+ interlockedOperator = GT_LOCKADD;
+ goto InterlockedBinOpCommon;
+ case CORINFO_INTRINSIC_InterlockedXAdd64:
+ interlockedOperator = GT_XADD;
+ goto InterlockedBinOpCommon;
+ case CORINFO_INTRINSIC_InterlockedXchg64:
+ interlockedOperator = GT_XCHG;
+ goto InterlockedBinOpCommon;
+#endif // _TARGET_AMD64_
+
+ InterlockedBinOpCommon:
+ assert(callType != TYP_STRUCT);
+ assert(sig->numArgs == 2);
+
+ op2 = impPopStack().val;
+ op1 = impPopStack().val;
+
+ // This creates:
+ // val
+ // XAdd
+ // addr
+ // field (for example)
+ //
+ // In the case where the first argument is the address of a local, we might
+ // want to make this *not* make the var address-taken -- but atomic instructions
+ // on a local are probably pretty useless anyway, so we probably don't care.
+
+ op1 = gtNewOperNode(interlockedOperator, genActualType(callType), op1, op2);
+ op1->gtFlags |= GTF_GLOB_EFFECT;
+ retNode = op1;
+ break;
+#endif // _TARGET_XARCH_
+
+ case CORINFO_INTRINSIC_MemoryBarrier:
+
+ assert(sig->numArgs == 0);
+
+ op1 = new (this, GT_MEMORYBARRIER) GenTree(GT_MEMORYBARRIER, TYP_VOID);
+ op1->gtFlags |= GTF_GLOB_EFFECT;
+ retNode = op1;
+ break;
+
+#ifdef _TARGET_XARCH_
+ // TODO-ARM-CQ: reenable treating InterlockedCmpXchg32 operation as intrinsic
+ case CORINFO_INTRINSIC_InterlockedCmpXchg32:
+#ifdef _TARGET_AMD64_
+ case CORINFO_INTRINSIC_InterlockedCmpXchg64:
+#endif
+ {
+ assert(callType != TYP_STRUCT);
+ assert(sig->numArgs == 3);
+ GenTreePtr op3;
+
+ op3 = impPopStack().val; // comparand
+ op2 = impPopStack().val; // value
+ op1 = impPopStack().val; // location
+
+ GenTreePtr node = new (this, GT_CMPXCHG) GenTreeCmpXchg(genActualType(callType), op1, op2, op3);
+
+ node->gtCmpXchg.gtOpLocation->gtFlags |= GTF_DONT_CSE;
+ retNode = node;
+ break;
+ }
+#endif
+
+ case CORINFO_INTRINSIC_StringLength:
+ op1 = impPopStack().val;
+ if (!opts.MinOpts() && !opts.compDbgCode)
+ {
+ GenTreeArrLen* arrLen =
+ new (this, GT_ARR_LENGTH) GenTreeArrLen(TYP_INT, op1, offsetof(CORINFO_String, stringLen));
+ op1 = arrLen;
+ }
+ else
+ {
+ /* Create the expression "*(str_addr + stringLengthOffset)" */
+ op1 = gtNewOperNode(GT_ADD, TYP_BYREF, op1,
+ gtNewIconNode(offsetof(CORINFO_String, stringLen), TYP_I_IMPL));
+ op1 = gtNewOperNode(GT_IND, TYP_INT, op1);
+ }
+ retNode = op1;
+ break;
+
+ case CORINFO_INTRINSIC_StringGetChar:
+ op2 = impPopStack().val;
+ op1 = impPopStack().val;
+ op1 = gtNewIndexRef(TYP_CHAR, op1, op2);
+ op1->gtFlags |= GTF_INX_STRING_LAYOUT;
+ retNode = op1;
+ break;
+
+ case CORINFO_INTRINSIC_InitializeArray:
+ retNode = impInitializeArrayIntrinsic(sig);
+ break;
+
+ case CORINFO_INTRINSIC_Array_Address:
+ case CORINFO_INTRINSIC_Array_Get:
+ case CORINFO_INTRINSIC_Array_Set:
+ retNode = impArrayAccessIntrinsic(clsHnd, sig, memberRef, readonlyCall, intrinsicID);
+ break;
+
+ case CORINFO_INTRINSIC_GetTypeFromHandle:
+ op1 = impStackTop(0).val;
+ if (op1->gtOper == GT_CALL && (op1->gtCall.gtCallType == CT_HELPER) &&
+ gtIsTypeHandleToRuntimeTypeHelper(op1))
+ {
+ op1 = impPopStack().val;
+ // Change call to return RuntimeType directly.
+ op1->gtType = TYP_REF;
+ retNode = op1;
+ }
+ // Call the regular function.
+ break;
+
+ case CORINFO_INTRINSIC_RTH_GetValueInternal:
+ op1 = impStackTop(0).val;
+ if (op1->gtOper == GT_CALL && (op1->gtCall.gtCallType == CT_HELPER) &&
+ gtIsTypeHandleToRuntimeTypeHelper(op1))
+ {
+ // Old tree
+ // Helper-RuntimeTypeHandle -> TreeToGetNativeTypeHandle
+ //
+ // New tree
+ // TreeToGetNativeTypeHandle
+
+ // Remove call to helper and return the native TypeHandle pointer that was the parameter
+ // to that helper.
+
+ op1 = impPopStack().val;
+
+ // Get native TypeHandle argument to old helper
+ op1 = op1->gtCall.gtCallArgs;
+ assert(op1->IsList());
+ assert(op1->gtOp.gtOp2 == nullptr);
+ op1 = op1->gtOp.gtOp1;
+ retNode = op1;
+ }
+ // Call the regular function.
+ break;
+
+#ifndef LEGACY_BACKEND
+ case CORINFO_INTRINSIC_Object_GetType:
+
+ op1 = impPopStack().val;
+ op1 = new (this, GT_INTRINSIC) GenTreeIntrinsic(genActualType(callType), op1, intrinsicID, method);
+
+ // Set the CALL flag to indicate that the operator is implemented by a call.
+ // Set also the EXCEPTION flag because the native implementation of
+ // CORINFO_INTRINSIC_Object_GetType intrinsic can throw NullReferenceException.
+ op1->gtFlags |= (GTF_CALL | GTF_EXCEPT);
+ retNode = op1;
+ break;
+#endif
+
+ default:
+ /* Unknown intrinsic */
+ break;
+ }
+
+ if (mustExpand)
+ {
+ if (retNode == nullptr)
+ {
+ NO_WAY("JIT must expand the intrinsic!");
+ }
+ }
+
+ return retNode;
+}
+
+/*****************************************************************************/
+
+GenTreePtr Compiler::impArrayAccessIntrinsic(
+ CORINFO_CLASS_HANDLE clsHnd, CORINFO_SIG_INFO* sig, int memberRef, bool readonlyCall, CorInfoIntrinsics intrinsicID)
+{
+ /* If we are generating SMALL_CODE, we don't want to use intrinsics for
+ the following, as it generates fatter code.
+ */
+
+ if (compCodeOpt() == SMALL_CODE)
+ {
+ return nullptr;
+ }
+
+ /* These intrinsics generate fatter (but faster) code and are only
+ done if we don't need SMALL_CODE */
+
+ unsigned rank = (intrinsicID == CORINFO_INTRINSIC_Array_Set) ? (sig->numArgs - 1) : sig->numArgs;
+
+ // The rank 1 case is special because it has to handle two array formats
+ // we will simply not do that case
+ if (rank > GT_ARR_MAX_RANK || rank <= 1)
+ {
+ return nullptr;
+ }
+
+ CORINFO_CLASS_HANDLE arrElemClsHnd = nullptr;
+ var_types elemType = JITtype2varType(info.compCompHnd->getChildType(clsHnd, &arrElemClsHnd));
+
+ // For the ref case, we will only be able to inline if the types match
+ // (verifier checks for this, we don't care for the nonverified case and the
+ // type is final (so we don't need to do the cast)
+ if ((intrinsicID != CORINFO_INTRINSIC_Array_Get) && !readonlyCall && varTypeIsGC(elemType))
+ {
+ // Get the call site signature
+ CORINFO_SIG_INFO LocalSig;
+ eeGetCallSiteSig(memberRef, info.compScopeHnd, impTokenLookupContextHandle, &LocalSig);
+ assert(LocalSig.hasThis());
+
+ CORINFO_CLASS_HANDLE actualElemClsHnd;
+
+ if (intrinsicID == CORINFO_INTRINSIC_Array_Set)
+ {
+ // Fetch the last argument, the one that indicates the type we are setting.
+ CORINFO_ARG_LIST_HANDLE argType = LocalSig.args;
+ for (unsigned r = 0; r < rank; r++)
+ {
+ argType = info.compCompHnd->getArgNext(argType);
+ }
+
+ typeInfo argInfo = verParseArgSigToTypeInfo(&LocalSig, argType);
+ actualElemClsHnd = argInfo.GetClassHandle();
+ }
+ else
+ {
+ assert(intrinsicID == CORINFO_INTRINSIC_Array_Address);
+
+ // Fetch the return type
+ typeInfo retInfo = verMakeTypeInfo(LocalSig.retType, LocalSig.retTypeClass);
+ assert(retInfo.IsByRef());
+ actualElemClsHnd = retInfo.GetClassHandle();
+ }
+
+ // if it's not final, we can't do the optimization
+ if (!(info.compCompHnd->getClassAttribs(actualElemClsHnd) & CORINFO_FLG_FINAL))
+ {
+ return nullptr;
+ }
+ }
+
+ unsigned arrayElemSize;
+ if (elemType == TYP_STRUCT)
+ {
+ assert(arrElemClsHnd);
+
+ arrayElemSize = info.compCompHnd->getClassSize(arrElemClsHnd);
+ }
+ else
+ {
+ arrayElemSize = genTypeSize(elemType);
+ }
+
+ if ((unsigned char)arrayElemSize != arrayElemSize)
+ {
+ // arrayElemSize would be truncated as an unsigned char.
+ // This means the array element is too large. Don't do the optimization.
+ return nullptr;
+ }
+
+ GenTreePtr val = nullptr;
+
+ if (intrinsicID == CORINFO_INTRINSIC_Array_Set)
+ {
+ // Assignment of a struct is more work, and there are more gets than sets.
+ if (elemType == TYP_STRUCT)
+ {
+ return nullptr;
+ }
+
+ val = impPopStack().val;
+ assert(genActualType(elemType) == genActualType(val->gtType) ||
+ (elemType == TYP_FLOAT && val->gtType == TYP_DOUBLE) ||
+ (elemType == TYP_INT && val->gtType == TYP_BYREF) ||
+ (elemType == TYP_DOUBLE && val->gtType == TYP_FLOAT));
+ }
+
+ noway_assert((unsigned char)GT_ARR_MAX_RANK == GT_ARR_MAX_RANK);
+
+ GenTreePtr inds[GT_ARR_MAX_RANK];
+ for (unsigned k = rank; k > 0; k--)
+ {
+ inds[k - 1] = impPopStack().val;
+ }
+
+ GenTreePtr arr = impPopStack().val;
+ assert(arr->gtType == TYP_REF);
+
+ GenTreePtr arrElem =
+ new (this, GT_ARR_ELEM) GenTreeArrElem(TYP_BYREF, arr, static_cast<unsigned char>(rank),
+ static_cast<unsigned char>(arrayElemSize), elemType, &inds[0]);
+
+ if (intrinsicID != CORINFO_INTRINSIC_Array_Address)
+ {
+ arrElem = gtNewOperNode(GT_IND, elemType, arrElem);
+ }
+
+ if (intrinsicID == CORINFO_INTRINSIC_Array_Set)
+ {
+ assert(val != nullptr);
+ return gtNewAssignNode(arrElem, val);
+ }
+ else
+ {
+ return arrElem;
+ }
+}
+
+BOOL Compiler::verMergeEntryStates(BasicBlock* block, bool* changed)
+{
+ unsigned i;
+
+ // do some basic checks first
+ if (block->bbStackDepthOnEntry() != verCurrentState.esStackDepth)
+ {
+ return FALSE;
+ }
+
+ if (verCurrentState.esStackDepth > 0)
+ {
+ // merge stack types
+ StackEntry* parentStack = block->bbStackOnEntry();
+ StackEntry* childStack = verCurrentState.esStack;
+
+ for (i = 0; i < verCurrentState.esStackDepth; i++, parentStack++, childStack++)
+ {
+ if (tiMergeToCommonParent(&parentStack->seTypeInfo, &childStack->seTypeInfo, changed) == FALSE)
+ {
+ return FALSE;
+ }
+ }
+ }
+
+ // merge initialization status of this ptr
+
+ if (verTrackObjCtorInitState)
+ {
+ // If we're tracking the CtorInitState, then it must not be unknown in the current state.
+ assert(verCurrentState.thisInitialized != TIS_Bottom);
+
+ // If the successor block's thisInit state is unknown, copy it from the current state.
+ if (block->bbThisOnEntry() == TIS_Bottom)
+ {
+ *changed = true;
+ verSetThisInit(block, verCurrentState.thisInitialized);
+ }
+ else if (verCurrentState.thisInitialized != block->bbThisOnEntry())
+ {
+ if (block->bbThisOnEntry() != TIS_Top)
+ {
+ *changed = true;
+ verSetThisInit(block, TIS_Top);
+
+ if (block->bbFlags & BBF_FAILED_VERIFICATION)
+ {
+ // The block is bad. Control can flow through the block to any handler that catches the
+ // verification exception, but the importer ignores bad blocks and therefore won't model
+ // this flow in the normal way. To complete the merge into the bad block, the new state
+ // needs to be manually pushed to the handlers that may be reached after the verification
+ // exception occurs.
+ //
+ // Usually, the new state was already propagated to the relevant handlers while processing
+ // the predecessors of the bad block. The exception is when the bad block is at the start
+ // of a try region, meaning it is protected by additional handlers that do not protect its
+ // predecessors.
+ //
+ if (block->hasTryIndex() && ((block->bbFlags & BBF_TRY_BEG) != 0))
+ {
+ // Push TIS_Top to the handlers that protect the bad block. Note that this can cause
+ // recursive calls back into this code path (if successors of the current bad block are
+ // also bad blocks).
+ //
+ ThisInitState origTIS = verCurrentState.thisInitialized;
+ verCurrentState.thisInitialized = TIS_Top;
+ impVerifyEHBlock(block, true);
+ verCurrentState.thisInitialized = origTIS;
+ }
+ }
+ }
+ }
+ }
+ else
+ {
+ assert(verCurrentState.thisInitialized == TIS_Bottom && block->bbThisOnEntry() == TIS_Bottom);
+ }
+
+ return TRUE;
+}
+
+/*****************************************************************************
+ * 'logMsg' is true if a log message needs to be logged. false if the caller has
+ * already logged it (presumably in a more detailed fashion than done here)
+ * 'bVerificationException' is true for a verification exception, false for a
+ * "call unauthorized by host" exception.
+ */
+
+void Compiler::verConvertBBToThrowVerificationException(BasicBlock* block DEBUGARG(bool logMsg))
+{
+ block->bbJumpKind = BBJ_THROW;
+ block->bbFlags |= BBF_FAILED_VERIFICATION;
+
+ impCurStmtOffsSet(block->bbCodeOffs);
+
+#ifdef DEBUG
+ // we need this since BeginTreeList asserts otherwise
+ impTreeList = impTreeLast = nullptr;
+ block->bbFlags &= ~BBF_IMPORTED;
+
+ if (logMsg)
+ {
+ JITLOG((LL_ERROR, "Verification failure: while compiling %s near IL offset %x..%xh \n", info.compFullName,
+ block->bbCodeOffs, block->bbCodeOffsEnd));
+ if (verbose)
+ {
+ printf("\n\nVerification failure: %s near IL %xh \n", info.compFullName, block->bbCodeOffs);
+ }
+ }
+
+ if (JitConfig.DebugBreakOnVerificationFailure())
+ {
+ DebugBreak();
+ }
+#endif
+
+ impBeginTreeList();
+
+ // if the stack is non-empty evaluate all the side-effects
+ if (verCurrentState.esStackDepth > 0)
+ {
+ impEvalSideEffects();
+ }
+ assert(verCurrentState.esStackDepth == 0);
+
+ GenTreePtr op1 = gtNewHelperCallNode(CORINFO_HELP_VERIFICATION, TYP_VOID, GTF_EXCEPT,
+ gtNewArgList(gtNewIconNode(block->bbCodeOffs)));
+ // verCurrentState.esStackDepth = 0;
+ impAppendTree(op1, (unsigned)CHECK_SPILL_NONE, impCurStmtOffs);
+
+ // The inliner is not able to handle methods that require throw block, so
+ // make sure this methods never gets inlined.
+ info.compCompHnd->setMethodAttribs(info.compMethodHnd, CORINFO_FLG_BAD_INLINEE);
+}
+
+/*****************************************************************************
+ *
+ */
+void Compiler::verHandleVerificationFailure(BasicBlock* block DEBUGARG(bool logMsg))
+
+{
+ // In AMD64, for historical reasons involving design limitations of JIT64, the VM has a
+ // slightly different mechanism in which it calls the JIT to perform IL verification:
+ // in the case of transparent methods the VM calls for a predicate IsVerifiable()
+ // that consists of calling the JIT with the IMPORT_ONLY flag and with the IL verify flag on.
+ // If the JIT determines the method is not verifiable, it should raise the exception to the VM and let
+ // it bubble up until reported by the runtime. Currently in RyuJIT, this method doesn't bubble
+ // up the exception, instead it embeds a throw inside the offending basic block and lets this
+ // to fail upon runtime of the jitted method.
+ //
+ // For AMD64 we don't want this behavior when the JIT has been called only for verification (i.e.
+ // with the IMPORT_ONLY and IL Verification flag set) because this won't actually generate code,
+ // just try to find out whether to fail this method before even actually jitting it. So, in case
+ // we detect these two conditions, instead of generating a throw statement inside the offending
+ // basic block, we immediately fail to JIT and notify the VM to make the IsVerifiable() predicate
+ // to return false and make RyuJIT behave the same way JIT64 does.
+ //
+ // The rationale behind this workaround is to avoid modifying the VM and maintain compatibility between JIT64 and
+ // RyuJIT for the time being until we completely replace JIT64.
+ // TODO-ARM64-Cleanup: We probably want to actually modify the VM in the future to avoid the unnecesary two passes.
+
+ // In AMD64 we must make sure we're behaving the same way as JIT64, meaning we should only raise the verification
+ // exception if we are only importing and verifying. The method verNeedsVerification() can also modify the
+ // tiVerificationNeeded flag in the case it determines it can 'skip verification' during importation and defer it
+ // to a runtime check. That's why we must assert one or the other (since the flag tiVerificationNeeded can
+ // be turned off during importation).
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef _TARGET_64BIT_
+
+#ifdef DEBUG
+ bool canSkipVerificationResult =
+ info.compCompHnd->canSkipMethodVerification(info.compMethodHnd) != CORINFO_VERIFICATION_CANNOT_SKIP;
+ assert(tiVerificationNeeded || canSkipVerificationResult);
+#endif // DEBUG
+
+ // Add the non verifiable flag to the compiler
+ if ((opts.eeFlags & CORJIT_FLG_IMPORT_ONLY) != 0)
+ {
+ tiIsVerifiableCode = FALSE;
+ }
+#endif //_TARGET_64BIT_
+ verResetCurrentState(block, &verCurrentState);
+ verConvertBBToThrowVerificationException(block DEBUGARG(logMsg));
+
+#ifdef DEBUG
+ impNoteLastILoffs(); // Remember at which BC offset the tree was finished
+#endif // DEBUG
+}
+
+/******************************************************************************/
+typeInfo Compiler::verMakeTypeInfo(CorInfoType ciType, CORINFO_CLASS_HANDLE clsHnd)
+{
+ assert(ciType < CORINFO_TYPE_COUNT);
+
+ typeInfo tiResult;
+ switch (ciType)
+ {
+ case CORINFO_TYPE_STRING:
+ case CORINFO_TYPE_CLASS:
+ tiResult = verMakeTypeInfo(clsHnd);
+ if (!tiResult.IsType(TI_REF))
+ { // type must be consistent with element type
+ return typeInfo();
+ }
+ break;
+
+#ifdef _TARGET_64BIT_
+ case CORINFO_TYPE_NATIVEINT:
+ case CORINFO_TYPE_NATIVEUINT:
+ if (clsHnd)
+ {
+ // If we have more precise information, use it
+ return verMakeTypeInfo(clsHnd);
+ }
+ else
+ {
+ return typeInfo::nativeInt();
+ }
+ break;
+#endif // _TARGET_64BIT_
+
+ case CORINFO_TYPE_VALUECLASS:
+ case CORINFO_TYPE_REFANY:
+ tiResult = verMakeTypeInfo(clsHnd);
+ // type must be constant with element type;
+ if (!tiResult.IsValueClass())
+ {
+ return typeInfo();
+ }
+ break;
+ case CORINFO_TYPE_VAR:
+ return verMakeTypeInfo(clsHnd);
+
+ case CORINFO_TYPE_PTR: // for now, pointers are treated as an error
+ case CORINFO_TYPE_VOID:
+ return typeInfo();
+ break;
+
+ case CORINFO_TYPE_BYREF:
+ {
+ CORINFO_CLASS_HANDLE childClassHandle;
+ CorInfoType childType = info.compCompHnd->getChildType(clsHnd, &childClassHandle);
+ return ByRef(verMakeTypeInfo(childType, childClassHandle));
+ }
+ break;
+
+ default:
+ if (clsHnd)
+ { // If we have more precise information, use it
+ return typeInfo(TI_STRUCT, clsHnd);
+ }
+ else
+ {
+ return typeInfo(JITtype2tiType(ciType));
+ }
+ }
+ return tiResult;
+}
+
+/******************************************************************************/
+
+typeInfo Compiler::verMakeTypeInfo(CORINFO_CLASS_HANDLE clsHnd, bool bashStructToRef /* = false */)
+{
+ if (clsHnd == nullptr)
+ {
+ return typeInfo();
+ }
+
+ // Byrefs should only occur in method and local signatures, which are accessed
+ // using ICorClassInfo and ICorClassInfo.getChildType.
+ // So findClass() and getClassAttribs() should not be called for byrefs
+
+ if (JITtype2varType(info.compCompHnd->asCorInfoType(clsHnd)) == TYP_BYREF)
+ {
+ assert(!"Did findClass() return a Byref?");
+ return typeInfo();
+ }
+
+ unsigned attribs = info.compCompHnd->getClassAttribs(clsHnd);
+
+ if (attribs & CORINFO_FLG_VALUECLASS)
+ {
+ CorInfoType t = info.compCompHnd->getTypeForPrimitiveValueClass(clsHnd);
+
+ // Meta-data validation should ensure that CORINF_TYPE_BYREF should
+ // not occur here, so we may want to change this to an assert instead.
+ if (t == CORINFO_TYPE_VOID || t == CORINFO_TYPE_BYREF || t == CORINFO_TYPE_PTR)
+ {
+ return typeInfo();
+ }
+
+#ifdef _TARGET_64BIT_
+ if (t == CORINFO_TYPE_NATIVEINT || t == CORINFO_TYPE_NATIVEUINT)
+ {
+ return typeInfo::nativeInt();
+ }
+#endif // _TARGET_64BIT_
+
+ if (t != CORINFO_TYPE_UNDEF)
+ {
+ return (typeInfo(JITtype2tiType(t)));
+ }
+ else if (bashStructToRef)
+ {
+ return (typeInfo(TI_REF, clsHnd));
+ }
+ else
+ {
+ return (typeInfo(TI_STRUCT, clsHnd));
+ }
+ }
+ else if (attribs & CORINFO_FLG_GENERIC_TYPE_VARIABLE)
+ {
+ // See comment in _typeInfo.h for why we do it this way.
+ return (typeInfo(TI_REF, clsHnd, true));
+ }
+ else
+ {
+ return (typeInfo(TI_REF, clsHnd));
+ }
+}
+
+/******************************************************************************/
+BOOL Compiler::verIsSDArray(typeInfo ti)
+{
+ if (ti.IsNullObjRef())
+ { // nulls are SD arrays
+ return TRUE;
+ }
+
+ if (!ti.IsType(TI_REF))
+ {
+ return FALSE;
+ }
+
+ if (!info.compCompHnd->isSDArray(ti.GetClassHandleForObjRef()))
+ {
+ return FALSE;
+ }
+ return TRUE;
+}
+
+/******************************************************************************/
+/* Given 'arrayObjectType' which is an array type, fetch the element type. */
+/* Returns an error type if anything goes wrong */
+
+typeInfo Compiler::verGetArrayElemType(typeInfo arrayObjectType)
+{
+ assert(!arrayObjectType.IsNullObjRef()); // you need to check for null explictly since that is a success case
+
+ if (!verIsSDArray(arrayObjectType))
+ {
+ return typeInfo();
+ }
+
+ CORINFO_CLASS_HANDLE childClassHandle = nullptr;
+ CorInfoType ciType = info.compCompHnd->getChildType(arrayObjectType.GetClassHandleForObjRef(), &childClassHandle);
+
+ return verMakeTypeInfo(ciType, childClassHandle);
+}
+
+/*****************************************************************************
+ */
+typeInfo Compiler::verParseArgSigToTypeInfo(CORINFO_SIG_INFO* sig, CORINFO_ARG_LIST_HANDLE args)
+{
+ CORINFO_CLASS_HANDLE classHandle;
+ CorInfoType ciType = strip(info.compCompHnd->getArgType(sig, args, &classHandle));
+
+ var_types type = JITtype2varType(ciType);
+ if (varTypeIsGC(type))
+ {
+ // For efficiency, getArgType only returns something in classHandle for
+ // value types. For other types that have addition type info, you
+ // have to call back explicitly
+ classHandle = info.compCompHnd->getArgClass(sig, args);
+ if (!classHandle)
+ {
+ NO_WAY("Could not figure out Class specified in argument or local signature");
+ }
+ }
+
+ return verMakeTypeInfo(ciType, classHandle);
+}
+
+/*****************************************************************************/
+
+// This does the expensive check to figure out whether the method
+// needs to be verified. It is called only when we fail verification,
+// just before throwing the verification exception.
+
+BOOL Compiler::verNeedsVerification()
+{
+ // If we have previously determined that verification is NOT needed
+ // (for example in Compiler::compCompile), that means verification is really not needed.
+ // Return the same decision we made before.
+ // (Note: This literally means that tiVerificationNeeded can never go from 0 to 1.)
+
+ if (!tiVerificationNeeded)
+ {
+ return tiVerificationNeeded;
+ }
+
+ assert(tiVerificationNeeded);
+
+ // Ok, we haven't concluded that verification is NOT needed. Consult the EE now to
+ // obtain the answer.
+ CorInfoCanSkipVerificationResult canSkipVerificationResult =
+ info.compCompHnd->canSkipMethodVerification(info.compMethodHnd);
+
+ // canSkipVerification will return one of the following three values:
+ // CORINFO_VERIFICATION_CANNOT_SKIP = 0, // Cannot skip verification during jit time.
+ // CORINFO_VERIFICATION_CAN_SKIP = 1, // Can skip verification during jit time.
+ // CORINFO_VERIFICATION_RUNTIME_CHECK = 2, // Skip verification during jit time,
+ // but need to insert a callout to the VM to ask during runtime
+ // whether to skip verification or not.
+
+ // Set tiRuntimeCalloutNeeded if canSkipVerification() instructs us to insert a callout for runtime check
+ if (canSkipVerificationResult == CORINFO_VERIFICATION_RUNTIME_CHECK)
+ {
+ tiRuntimeCalloutNeeded = true;
+ }
+
+ if (canSkipVerificationResult == CORINFO_VERIFICATION_DONT_JIT)
+ {
+ // Dev10 706080 - Testers don't like the assert, so just silence it
+ // by not using the macros that invoke debugAssert.
+ badCode();
+ }
+
+ // When tiVerificationNeeded is true, JIT will do the verification during JIT time.
+ // The following line means we will NOT do jit time verification if canSkipVerification
+ // returns CORINFO_VERIFICATION_CAN_SKIP or CORINFO_VERIFICATION_RUNTIME_CHECK.
+ tiVerificationNeeded = (canSkipVerificationResult == CORINFO_VERIFICATION_CANNOT_SKIP);
+ return tiVerificationNeeded;
+}
+
+BOOL Compiler::verIsByRefLike(const typeInfo& ti)
+{
+ if (ti.IsByRef())
+ {
+ return TRUE;
+ }
+ if (!ti.IsType(TI_STRUCT))
+ {
+ return FALSE;
+ }
+ return info.compCompHnd->getClassAttribs(ti.GetClassHandleForValueClass()) & CORINFO_FLG_CONTAINS_STACK_PTR;
+}
+
+BOOL Compiler::verIsSafeToReturnByRef(const typeInfo& ti)
+{
+ if (ti.IsPermanentHomeByRef())
+ {
+ return TRUE;
+ }
+ else
+ {
+ return FALSE;
+ }
+}
+
+BOOL Compiler::verIsBoxable(const typeInfo& ti)
+{
+ return (ti.IsPrimitiveType() || ti.IsObjRef() // includes boxed generic type variables
+ || ti.IsUnboxedGenericTypeVar() ||
+ (ti.IsType(TI_STRUCT) &&
+ // exclude byreflike structs
+ !(info.compCompHnd->getClassAttribs(ti.GetClassHandleForValueClass()) & CORINFO_FLG_CONTAINS_STACK_PTR)));
+}
+
+// Is it a boxed value type?
+bool Compiler::verIsBoxedValueType(typeInfo ti)
+{
+ if (ti.GetType() == TI_REF)
+ {
+ CORINFO_CLASS_HANDLE clsHnd = ti.GetClassHandleForObjRef();
+ return !!eeIsValueClass(clsHnd);
+ }
+ else
+ {
+ return false;
+ }
+}
+
+/*****************************************************************************
+ *
+ * Check if a TailCall is legal.
+ */
+
+bool Compiler::verCheckTailCallConstraint(
+ OPCODE opcode,
+ CORINFO_RESOLVED_TOKEN* pResolvedToken,
+ CORINFO_RESOLVED_TOKEN* pConstrainedResolvedToken, // Is this a "constrained." call on a type parameter?
+ bool speculative // If true, won't throw if verificatoin fails. Instead it will
+ // return false to the caller.
+ // If false, it will throw.
+ )
+{
+ DWORD mflags;
+ CORINFO_SIG_INFO sig;
+ unsigned int popCount = 0; // we can't pop the stack since impImportCall needs it, so
+ // this counter is used to keep track of how many items have been
+ // virtually popped
+
+ CORINFO_METHOD_HANDLE methodHnd = nullptr;
+ CORINFO_CLASS_HANDLE methodClassHnd = nullptr;
+ unsigned methodClassFlgs = 0;
+
+ assert(impOpcodeIsCallOpcode(opcode));
+
+ if (compIsForInlining())
+ {
+ return false;
+ }
+
+ // for calli, VerifyOrReturn that this is not a virtual method
+ if (opcode == CEE_CALLI)
+ {
+ /* Get the call sig */
+ eeGetSig(pResolvedToken->token, info.compScopeHnd, impTokenLookupContextHandle, &sig);
+
+ // We don't know the target method, so we have to infer the flags, or
+ // assume the worst-case.
+ mflags = (sig.callConv & CORINFO_CALLCONV_HASTHIS) ? 0 : CORINFO_FLG_STATIC;
+ }
+ else
+ {
+ methodHnd = pResolvedToken->hMethod;
+
+ mflags = info.compCompHnd->getMethodAttribs(methodHnd);
+
+ // When verifying generic code we pair the method handle with its
+ // owning class to get the exact method signature.
+ methodClassHnd = pResolvedToken->hClass;
+ assert(methodClassHnd);
+
+ eeGetMethodSig(methodHnd, &sig, methodClassHnd);
+
+ // opcode specific check
+ methodClassFlgs = info.compCompHnd->getClassAttribs(methodClassHnd);
+ }
+
+ // We must have got the methodClassHnd if opcode is not CEE_CALLI
+ assert((methodHnd != nullptr && methodClassHnd != nullptr) || opcode == CEE_CALLI);
+
+ if ((sig.callConv & CORINFO_CALLCONV_MASK) == CORINFO_CALLCONV_VARARG)
+ {
+ eeGetCallSiteSig(pResolvedToken->token, info.compScopeHnd, impTokenLookupContextHandle, &sig);
+ }
+
+ // check compatibility of the arguments
+ unsigned int argCount;
+ argCount = sig.numArgs;
+ CORINFO_ARG_LIST_HANDLE args;
+ args = sig.args;
+ while (argCount--)
+ {
+ typeInfo tiDeclared = verParseArgSigToTypeInfo(&sig, args).NormaliseForStack();
+
+ // check that the argument is not a byref for tailcalls
+ VerifyOrReturnSpeculative(!verIsByRefLike(tiDeclared), "tailcall on byrefs", speculative);
+
+ // For unsafe code, we might have parameters containing pointer to the stack location.
+ // Disallow the tailcall for this kind.
+ CORINFO_CLASS_HANDLE classHandle;
+ CorInfoType ciType = strip(info.compCompHnd->getArgType(&sig, args, &classHandle));
+ VerifyOrReturnSpeculative(ciType != CORINFO_TYPE_PTR, "tailcall on CORINFO_TYPE_PTR", speculative);
+
+ args = info.compCompHnd->getArgNext(args);
+ }
+
+ // update popCount
+ popCount += sig.numArgs;
+
+ // check for 'this' which is on non-static methods, not called via NEWOBJ
+ if (!(mflags & CORINFO_FLG_STATIC))
+ {
+ // Always update the popCount.
+ // This is crucial for the stack calculation to be correct.
+ typeInfo tiThis = impStackTop(popCount).seTypeInfo;
+ popCount++;
+
+ if (opcode == CEE_CALLI)
+ {
+ // For CALLI, we don't know the methodClassHnd. Therefore, let's check the "this" object
+ // on the stack.
+ if (tiThis.IsValueClass())
+ {
+ tiThis.MakeByRef();
+ }
+ VerifyOrReturnSpeculative(!verIsByRefLike(tiThis), "byref in tailcall", speculative);
+ }
+ else
+ {
+ // Check type compatibility of the this argument
+ typeInfo tiDeclaredThis = verMakeTypeInfo(methodClassHnd);
+ if (tiDeclaredThis.IsValueClass())
+ {
+ tiDeclaredThis.MakeByRef();
+ }
+
+ VerifyOrReturnSpeculative(!verIsByRefLike(tiDeclaredThis), "byref in tailcall", speculative);
+ }
+ }
+
+ // Tail calls on constrained calls should be illegal too:
+ // when instantiated at a value type, a constrained call may pass the address of a stack allocated value
+ VerifyOrReturnSpeculative(!pConstrainedResolvedToken, "byref in constrained tailcall", speculative);
+
+ // Get the exact view of the signature for an array method
+ if (sig.retType != CORINFO_TYPE_VOID)
+ {
+ if (methodClassFlgs & CORINFO_FLG_ARRAY)
+ {
+ assert(opcode != CEE_CALLI);
+ eeGetCallSiteSig(pResolvedToken->token, info.compScopeHnd, impTokenLookupContextHandle, &sig);
+ }
+ }
+
+ typeInfo tiCalleeRetType = verMakeTypeInfo(sig.retType, sig.retTypeClass);
+ typeInfo tiCallerRetType =
+ verMakeTypeInfo(info.compMethodInfo->args.retType, info.compMethodInfo->args.retTypeClass);
+
+ // void return type gets morphed into the error type, so we have to treat them specially here
+ if (sig.retType == CORINFO_TYPE_VOID)
+ {
+ VerifyOrReturnSpeculative(info.compMethodInfo->args.retType == CORINFO_TYPE_VOID, "tailcall return mismatch",
+ speculative);
+ }
+ else
+ {
+ VerifyOrReturnSpeculative(tiCompatibleWith(NormaliseForStack(tiCalleeRetType),
+ NormaliseForStack(tiCallerRetType), true),
+ "tailcall return mismatch", speculative);
+ }
+
+ // for tailcall, stack must be empty
+ VerifyOrReturnSpeculative(verCurrentState.esStackDepth == popCount, "stack non-empty on tailcall", speculative);
+
+ return true; // Yes, tailcall is legal
+}
+
+/*****************************************************************************
+ *
+ * Checks the IL verification rules for the call
+ */
+
+void Compiler::verVerifyCall(OPCODE opcode,
+ CORINFO_RESOLVED_TOKEN* pResolvedToken,
+ CORINFO_RESOLVED_TOKEN* pConstrainedResolvedToken,
+ bool tailCall,
+ bool readonlyCall,
+ const BYTE* delegateCreateStart,
+ const BYTE* codeAddr,
+ CORINFO_CALL_INFO* callInfo DEBUGARG(const char* methodName))
+{
+ DWORD mflags;
+ CORINFO_SIG_INFO* sig = nullptr;
+ unsigned int popCount = 0; // we can't pop the stack since impImportCall needs it, so
+ // this counter is used to keep track of how many items have been
+ // virtually popped
+
+ // for calli, VerifyOrReturn that this is not a virtual method
+ if (opcode == CEE_CALLI)
+ {
+ Verify(false, "Calli not verifiable");
+ return;
+ }
+
+ //<NICE> It would be nice to cache the rest of it, but eeFindMethod is the big ticket item.
+ mflags = callInfo->verMethodFlags;
+
+ sig = &callInfo->verSig;
+
+ if ((sig->callConv & CORINFO_CALLCONV_MASK) == CORINFO_CALLCONV_VARARG)
+ {
+ eeGetCallSiteSig(pResolvedToken->token, pResolvedToken->tokenScope, pResolvedToken->tokenContext, sig);
+ }
+
+ // opcode specific check
+ unsigned methodClassFlgs = callInfo->classFlags;
+ switch (opcode)
+ {
+ case CEE_CALLVIRT:
+ // cannot do callvirt on valuetypes
+ VerifyOrReturn(!(methodClassFlgs & CORINFO_FLG_VALUECLASS), "callVirt on value class");
+ VerifyOrReturn(sig->hasThis(), "CallVirt on static method");
+ break;
+
+ case CEE_NEWOBJ:
+ {
+ assert(!tailCall); // Importer should not allow this
+ VerifyOrReturn((mflags & CORINFO_FLG_CONSTRUCTOR) && !(mflags & CORINFO_FLG_STATIC),
+ "newobj must be on instance");
+
+ if (methodClassFlgs & CORINFO_FLG_DELEGATE)
+ {
+ VerifyOrReturn(sig->numArgs == 2, "wrong number args to delegate ctor");
+ typeInfo tiDeclaredObj = verParseArgSigToTypeInfo(sig, sig->args).NormaliseForStack();
+ typeInfo tiDeclaredFtn =
+ verParseArgSigToTypeInfo(sig, info.compCompHnd->getArgNext(sig->args)).NormaliseForStack();
+ VerifyOrReturn(tiDeclaredFtn.IsNativeIntType(), "ftn arg needs to be a native int type");
+
+ assert(popCount == 0);
+ typeInfo tiActualObj = impStackTop(1).seTypeInfo;
+ typeInfo tiActualFtn = impStackTop(0).seTypeInfo;
+
+ VerifyOrReturn(tiActualFtn.IsMethod(), "delegate needs method as first arg");
+ VerifyOrReturn(tiCompatibleWith(tiActualObj, tiDeclaredObj, true), "delegate object type mismatch");
+ VerifyOrReturn(tiActualObj.IsNullObjRef() || tiActualObj.IsType(TI_REF),
+ "delegate object type mismatch");
+
+ CORINFO_CLASS_HANDLE objTypeHandle =
+ tiActualObj.IsNullObjRef() ? nullptr : tiActualObj.GetClassHandleForObjRef();
+
+ // the method signature must be compatible with the delegate's invoke method
+
+ // check that for virtual functions, the type of the object used to get the
+ // ftn ptr is the same as the type of the object passed to the delegate ctor.
+ // since this is a bit of work to determine in general, we pattern match stylized
+ // code sequences
+
+ // the delegate creation code check, which used to be done later, is now done here
+ // so we can read delegateMethodRef directly from
+ // from the preceding LDFTN or CEE_LDVIRTFN instruction sequence;
+ // we then use it in our call to isCompatibleDelegate().
+
+ mdMemberRef delegateMethodRef = mdMemberRefNil;
+ VerifyOrReturn(verCheckDelegateCreation(delegateCreateStart, codeAddr, delegateMethodRef),
+ "must create delegates with certain IL");
+
+ CORINFO_RESOLVED_TOKEN delegateResolvedToken;
+ delegateResolvedToken.tokenContext = impTokenLookupContextHandle;
+ delegateResolvedToken.tokenScope = info.compScopeHnd;
+ delegateResolvedToken.token = delegateMethodRef;
+ delegateResolvedToken.tokenType = CORINFO_TOKENKIND_Method;
+ info.compCompHnd->resolveToken(&delegateResolvedToken);
+
+ CORINFO_CALL_INFO delegateCallInfo;
+ eeGetCallInfo(&delegateResolvedToken, nullptr /* constraint typeRef */,
+ addVerifyFlag(CORINFO_CALLINFO_SECURITYCHECKS), &delegateCallInfo);
+
+ BOOL isOpenDelegate = FALSE;
+ VerifyOrReturn(info.compCompHnd->isCompatibleDelegate(objTypeHandle, delegateResolvedToken.hClass,
+ tiActualFtn.GetMethod(), pResolvedToken->hClass,
+ &isOpenDelegate),
+ "function incompatible with delegate");
+
+ // check the constraints on the target method
+ VerifyOrReturn(info.compCompHnd->satisfiesClassConstraints(delegateResolvedToken.hClass),
+ "delegate target has unsatisfied class constraints");
+ VerifyOrReturn(info.compCompHnd->satisfiesMethodConstraints(delegateResolvedToken.hClass,
+ tiActualFtn.GetMethod()),
+ "delegate target has unsatisfied method constraints");
+
+ // See ECMA spec section 1.8.1.5.2 (Delegating via instance dispatch)
+ // for additional verification rules for delegates
+ CORINFO_METHOD_HANDLE actualMethodHandle = tiActualFtn.GetMethod();
+ DWORD actualMethodAttribs = info.compCompHnd->getMethodAttribs(actualMethodHandle);
+ if (impIsLDFTN_TOKEN(delegateCreateStart, codeAddr))
+ {
+
+ if ((actualMethodAttribs & CORINFO_FLG_VIRTUAL) && ((actualMethodAttribs & CORINFO_FLG_FINAL) == 0)
+#ifdef DEBUG
+ && StrictCheckForNonVirtualCallToVirtualMethod()
+#endif
+ )
+ {
+ if (info.compCompHnd->shouldEnforceCallvirtRestriction(info.compScopeHnd))
+ {
+ VerifyOrReturn(tiActualObj.IsThisPtr() && lvaIsOriginalThisReadOnly() ||
+ verIsBoxedValueType(tiActualObj),
+ "The 'this' parameter to the call must be either the calling method's "
+ "'this' parameter or "
+ "a boxed value type.");
+ }
+ }
+ }
+
+ if (actualMethodAttribs & CORINFO_FLG_PROTECTED)
+ {
+ BOOL targetIsStatic = actualMethodAttribs & CORINFO_FLG_STATIC;
+
+ Verify(targetIsStatic || !isOpenDelegate,
+ "Unverifiable creation of an open instance delegate for a protected member.");
+
+ CORINFO_CLASS_HANDLE instanceClassHnd = (tiActualObj.IsNullObjRef() || targetIsStatic)
+ ? info.compClassHnd
+ : tiActualObj.GetClassHandleForObjRef();
+
+ // In the case of protected methods, it is a requirement that the 'this'
+ // pointer be a subclass of the current context. Perform this check.
+ Verify(info.compCompHnd->canAccessFamily(info.compMethodHnd, instanceClassHnd),
+ "Accessing protected method through wrong type.");
+ }
+ goto DONE_ARGS;
+ }
+ }
+ // fall thru to default checks
+ default:
+ VerifyOrReturn(!(mflags & CORINFO_FLG_ABSTRACT), "method abstract");
+ }
+ VerifyOrReturn(!((mflags & CORINFO_FLG_CONSTRUCTOR) && (methodClassFlgs & CORINFO_FLG_DELEGATE)),
+ "can only newobj a delegate constructor");
+
+ // check compatibility of the arguments
+ unsigned int argCount;
+ argCount = sig->numArgs;
+ CORINFO_ARG_LIST_HANDLE args;
+ args = sig->args;
+ while (argCount--)
+ {
+ typeInfo tiActual = impStackTop(popCount + argCount).seTypeInfo;
+
+ typeInfo tiDeclared = verParseArgSigToTypeInfo(sig, args).NormaliseForStack();
+ VerifyOrReturn(tiCompatibleWith(tiActual, tiDeclared, true), "type mismatch");
+
+ args = info.compCompHnd->getArgNext(args);
+ }
+
+DONE_ARGS:
+
+ // update popCount
+ popCount += sig->numArgs;
+
+ // check for 'this' which are is non-static methods, not called via NEWOBJ
+ CORINFO_CLASS_HANDLE instanceClassHnd = info.compClassHnd;
+ if (!(mflags & CORINFO_FLG_STATIC) && (opcode != CEE_NEWOBJ))
+ {
+ typeInfo tiThis = impStackTop(popCount).seTypeInfo;
+ popCount++;
+
+ // If it is null, we assume we can access it (since it will AV shortly)
+ // If it is anything but a reference class, there is no hierarchy, so
+ // again, we don't need the precise instance class to compute 'protected' access
+ if (tiThis.IsType(TI_REF))
+ {
+ instanceClassHnd = tiThis.GetClassHandleForObjRef();
+ }
+
+ // Check type compatibility of the this argument
+ typeInfo tiDeclaredThis = verMakeTypeInfo(pResolvedToken->hClass);
+ if (tiDeclaredThis.IsValueClass())
+ {
+ tiDeclaredThis.MakeByRef();
+ }
+
+ // If this is a call to the base class .ctor, set thisPtr Init for
+ // this block.
+ if (mflags & CORINFO_FLG_CONSTRUCTOR)
+ {
+ if (verTrackObjCtorInitState && tiThis.IsThisPtr() &&
+ verIsCallToInitThisPtr(info.compClassHnd, pResolvedToken->hClass))
+ {
+ assert(verCurrentState.thisInitialized !=
+ TIS_Bottom); // This should never be the case just from the logic of the verifier.
+ VerifyOrReturn(verCurrentState.thisInitialized == TIS_Uninit,
+ "Call to base class constructor when 'this' is possibly initialized");
+ // Otherwise, 'this' is now initialized.
+ verCurrentState.thisInitialized = TIS_Init;
+ tiThis.SetInitialisedObjRef();
+ }
+ else
+ {
+ // We allow direct calls to value type constructors
+ // NB: we have to check that the contents of tiThis is a value type, otherwise we could use a
+ // constrained callvirt to illegally re-enter a .ctor on a value of reference type.
+ VerifyOrReturn(tiThis.IsByRef() && DereferenceByRef(tiThis).IsValueClass(),
+ "Bad call to a constructor");
+ }
+ }
+
+ if (pConstrainedResolvedToken != nullptr)
+ {
+ VerifyOrReturn(tiThis.IsByRef(), "non-byref this type in constrained call");
+
+ typeInfo tiConstraint = verMakeTypeInfo(pConstrainedResolvedToken->hClass);
+
+ // We just dereference this and test for equality
+ tiThis.DereferenceByRef();
+ VerifyOrReturn(typeInfo::AreEquivalent(tiThis, tiConstraint),
+ "this type mismatch with constrained type operand");
+
+ // Now pretend the this type is the boxed constrained type, for the sake of subsequent checks
+ tiThis = typeInfo(TI_REF, pConstrainedResolvedToken->hClass);
+ }
+
+ // To support direct calls on readonly byrefs, just pretend tiDeclaredThis is readonly too
+ if (tiDeclaredThis.IsByRef() && tiThis.IsReadonlyByRef())
+ {
+ tiDeclaredThis.SetIsReadonlyByRef();
+ }
+
+ VerifyOrReturn(tiCompatibleWith(tiThis, tiDeclaredThis, true), "this type mismatch");
+
+ if (tiThis.IsByRef())
+ {
+ // Find the actual type where the method exists (as opposed to what is declared
+ // in the metadata). This is to prevent passing a byref as the "this" argument
+ // while calling methods like System.ValueType.GetHashCode() which expect boxed objects.
+
+ CORINFO_CLASS_HANDLE actualClassHnd = info.compCompHnd->getMethodClass(pResolvedToken->hMethod);
+ VerifyOrReturn(eeIsValueClass(actualClassHnd),
+ "Call to base type of valuetype (which is never a valuetype)");
+ }
+
+ // Rules for non-virtual call to a non-final virtual method:
+
+ // Define:
+ // The "this" pointer is considered to be "possibly written" if
+ // 1. Its address have been taken (LDARGA 0) anywhere in the method.
+ // (or)
+ // 2. It has been stored to (STARG.0) anywhere in the method.
+
+ // A non-virtual call to a non-final virtual method is only allowed if
+ // 1. The this pointer passed to the callee is an instance of a boxed value type.
+ // (or)
+ // 2. The this pointer passed to the callee is the current method's this pointer.
+ // (and) The current method's this pointer is not "possibly written".
+
+ // Thus the rule is that if you assign to this ANYWHERE you can't make "base" calls to
+ // virtual methods. (Luckily this does affect .ctors, since they are not virtual).
+ // This is stronger that is strictly needed, but implementing a laxer rule is significantly
+ // hard and more error prone.
+
+ if (opcode == CEE_CALL && (mflags & CORINFO_FLG_VIRTUAL) && ((mflags & CORINFO_FLG_FINAL) == 0)
+#ifdef DEBUG
+ && StrictCheckForNonVirtualCallToVirtualMethod()
+#endif
+ )
+ {
+ if (info.compCompHnd->shouldEnforceCallvirtRestriction(info.compScopeHnd))
+ {
+ VerifyOrReturn(
+ tiThis.IsThisPtr() && lvaIsOriginalThisReadOnly() || verIsBoxedValueType(tiThis),
+ "The 'this' parameter to the call must be either the calling method's 'this' parameter or "
+ "a boxed value type.");
+ }
+ }
+ }
+
+ // check any constraints on the callee's class and type parameters
+ VerifyOrReturn(info.compCompHnd->satisfiesClassConstraints(pResolvedToken->hClass),
+ "method has unsatisfied class constraints");
+ VerifyOrReturn(info.compCompHnd->satisfiesMethodConstraints(pResolvedToken->hClass, pResolvedToken->hMethod),
+ "method has unsatisfied method constraints");
+
+ if (mflags & CORINFO_FLG_PROTECTED)
+ {
+ VerifyOrReturn(info.compCompHnd->canAccessFamily(info.compMethodHnd, instanceClassHnd),
+ "Can't access protected method");
+ }
+
+ // Get the exact view of the signature for an array method
+ if (sig->retType != CORINFO_TYPE_VOID)
+ {
+ eeGetMethodSig(pResolvedToken->hMethod, sig, pResolvedToken->hClass);
+ }
+
+ // "readonly." prefixed calls only allowed for the Address operation on arrays.
+ // The methods supported by array types are under the control of the EE
+ // so we can trust that only the Address operation returns a byref.
+ if (readonlyCall)
+ {
+ typeInfo tiCalleeRetType = verMakeTypeInfo(sig->retType, sig->retTypeClass);
+ VerifyOrReturn((methodClassFlgs & CORINFO_FLG_ARRAY) && tiCalleeRetType.IsByRef(),
+ "unexpected use of readonly prefix");
+ }
+
+ // Verify the tailcall
+ if (tailCall)
+ {
+ verCheckTailCallConstraint(opcode, pResolvedToken, pConstrainedResolvedToken, false);
+ }
+}
+
+/*****************************************************************************
+ * Checks that a delegate creation is done using the following pattern:
+ * dup
+ * ldvirtftn targetMemberRef
+ * OR
+ * ldftn targetMemberRef
+ *
+ * 'delegateCreateStart' points at the last dup or ldftn in this basic block (null if
+ * not in this basic block)
+ *
+ * targetMemberRef is read from the code sequence.
+ * targetMemberRef is validated iff verificationNeeded.
+ */
+
+BOOL Compiler::verCheckDelegateCreation(const BYTE* delegateCreateStart,
+ const BYTE* codeAddr,
+ mdMemberRef& targetMemberRef)
+{
+ if (impIsLDFTN_TOKEN(delegateCreateStart, codeAddr))
+ {
+ targetMemberRef = getU4LittleEndian(&delegateCreateStart[2]);
+ return TRUE;
+ }
+ else if (impIsDUP_LDVIRTFTN_TOKEN(delegateCreateStart, codeAddr))
+ {
+ targetMemberRef = getU4LittleEndian(&delegateCreateStart[3]);
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+typeInfo Compiler::verVerifySTIND(const typeInfo& tiTo, const typeInfo& value, const typeInfo& instrType)
+{
+ Verify(!tiTo.IsReadonlyByRef(), "write to readonly byref");
+ typeInfo ptrVal = verVerifyLDIND(tiTo, instrType);
+ typeInfo normPtrVal = typeInfo(ptrVal).NormaliseForStack();
+ if (!tiCompatibleWith(value, normPtrVal, true))
+ {
+ Verify(tiCompatibleWith(value, normPtrVal, true), "type mismatch");
+ compUnsafeCastUsed = true;
+ }
+ return ptrVal;
+}
+
+typeInfo Compiler::verVerifyLDIND(const typeInfo& ptr, const typeInfo& instrType)
+{
+ assert(!instrType.IsStruct());
+
+ typeInfo ptrVal;
+ if (ptr.IsByRef())
+ {
+ ptrVal = DereferenceByRef(ptr);
+ if (instrType.IsObjRef() && !ptrVal.IsObjRef())
+ {
+ Verify(false, "bad pointer");
+ compUnsafeCastUsed = true;
+ }
+ else if (!instrType.IsObjRef() && !typeInfo::AreEquivalent(instrType, ptrVal))
+ {
+ Verify(false, "pointer not consistent with instr");
+ compUnsafeCastUsed = true;
+ }
+ }
+ else
+ {
+ Verify(false, "pointer not byref");
+ compUnsafeCastUsed = true;
+ }
+
+ return ptrVal;
+}
+
+// Verify that the field is used properly. 'tiThis' is NULL for statics,
+// 'fieldFlags' is the fields attributes, and mutator is TRUE if it is a
+// ld*flda or a st*fld.
+// 'enclosingClass' is given if we are accessing a field in some specific type.
+
+void Compiler::verVerifyField(CORINFO_RESOLVED_TOKEN* pResolvedToken,
+ const CORINFO_FIELD_INFO& fieldInfo,
+ const typeInfo* tiThis,
+ BOOL mutator,
+ BOOL allowPlainStructAsThis)
+{
+ CORINFO_CLASS_HANDLE enclosingClass = pResolvedToken->hClass;
+ unsigned fieldFlags = fieldInfo.fieldFlags;
+ CORINFO_CLASS_HANDLE instanceClass =
+ info.compClassHnd; // for statics, we imagine the instance is the current class.
+
+ bool isStaticField = ((fieldFlags & CORINFO_FLG_FIELD_STATIC) != 0);
+ if (mutator)
+ {
+ Verify(!(fieldFlags & CORINFO_FLG_FIELD_UNMANAGED), "mutating an RVA bases static");
+ if ((fieldFlags & CORINFO_FLG_FIELD_FINAL))
+ {
+ Verify((info.compFlags & CORINFO_FLG_CONSTRUCTOR) && enclosingClass == info.compClassHnd &&
+ info.compIsStatic == isStaticField,
+ "bad use of initonly field (set or address taken)");
+ }
+ }
+
+ if (tiThis == nullptr)
+ {
+ Verify(isStaticField, "used static opcode with non-static field");
+ }
+ else
+ {
+ typeInfo tThis = *tiThis;
+
+ if (allowPlainStructAsThis && tThis.IsValueClass())
+ {
+ tThis.MakeByRef();
+ }
+
+ // If it is null, we assume we can access it (since it will AV shortly)
+ // If it is anything but a refernce class, there is no hierarchy, so
+ // again, we don't need the precise instance class to compute 'protected' access
+ if (tiThis->IsType(TI_REF))
+ {
+ instanceClass = tiThis->GetClassHandleForObjRef();
+ }
+
+ // Note that even if the field is static, we require that the this pointer
+ // satisfy the same constraints as a non-static field This happens to
+ // be simpler and seems reasonable
+ typeInfo tiDeclaredThis = verMakeTypeInfo(enclosingClass);
+ if (tiDeclaredThis.IsValueClass())
+ {
+ tiDeclaredThis.MakeByRef();
+
+ // we allow read-only tThis, on any field access (even stores!), because if the
+ // class implementor wants to prohibit stores he should make the field private.
+ // we do this by setting the read-only bit on the type we compare tThis to.
+ tiDeclaredThis.SetIsReadonlyByRef();
+ }
+ else if (verTrackObjCtorInitState && tThis.IsThisPtr())
+ {
+ // Any field access is legal on "uninitialized" this pointers.
+ // The easiest way to implement this is to simply set the
+ // initialized bit for the duration of the type check on the
+ // field access only. It does not change the state of the "this"
+ // for the function as a whole. Note that the "tThis" is a copy
+ // of the original "this" type (*tiThis) passed in.
+ tThis.SetInitialisedObjRef();
+ }
+
+ Verify(tiCompatibleWith(tThis, tiDeclaredThis, true), "this type mismatch");
+ }
+
+ // Presently the JIT does not check that we don't store or take the address of init-only fields
+ // since we cannot guarantee their immutability and it is not a security issue.
+
+ // check any constraints on the fields's class --- accessing the field might cause a class constructor to run.
+ VerifyOrReturn(info.compCompHnd->satisfiesClassConstraints(enclosingClass),
+ "field has unsatisfied class constraints");
+ if (fieldFlags & CORINFO_FLG_FIELD_PROTECTED)
+ {
+ Verify(info.compCompHnd->canAccessFamily(info.compMethodHnd, instanceClass),
+ "Accessing protected method through wrong type.");
+ }
+}
+
+void Compiler::verVerifyCond(const typeInfo& tiOp1, const typeInfo& tiOp2, unsigned opcode)
+{
+ if (tiOp1.IsNumberType())
+ {
+#ifdef _TARGET_64BIT_
+ Verify(tiCompatibleWith(tiOp1, tiOp2, true), "Cond type mismatch");
+#else // _TARGET_64BIT
+ // [10/17/2013] Consider changing this: to put on my verification lawyer hat,
+ // this is non-conforming to the ECMA Spec: types don't have to be equivalent,
+ // but compatible, since we can coalesce native int with int32 (see section III.1.5).
+ Verify(typeInfo::AreEquivalent(tiOp1, tiOp2), "Cond type mismatch");
+#endif // !_TARGET_64BIT_
+ }
+ else if (tiOp1.IsObjRef())
+ {
+ switch (opcode)
+ {
+ case CEE_BEQ_S:
+ case CEE_BEQ:
+ case CEE_BNE_UN_S:
+ case CEE_BNE_UN:
+ case CEE_CEQ:
+ case CEE_CGT_UN:
+ break;
+ default:
+ Verify(FALSE, "Cond not allowed on object types");
+ }
+ Verify(tiOp2.IsObjRef(), "Cond type mismatch");
+ }
+ else if (tiOp1.IsByRef())
+ {
+ Verify(tiOp2.IsByRef(), "Cond type mismatch");
+ }
+ else
+ {
+ Verify(tiOp1.IsMethod() && tiOp2.IsMethod(), "Cond type mismatch");
+ }
+}
+
+void Compiler::verVerifyThisPtrInitialised()
+{
+ if (verTrackObjCtorInitState)
+ {
+ Verify(verCurrentState.thisInitialized == TIS_Init, "this ptr is not initialized");
+ }
+}
+
+BOOL Compiler::verIsCallToInitThisPtr(CORINFO_CLASS_HANDLE context, CORINFO_CLASS_HANDLE target)
+{
+ // Either target == context, in this case calling an alternate .ctor
+ // Or target is the immediate parent of context
+
+ return ((target == context) || (target == info.compCompHnd->getParentType(context)));
+}
+
+GenTreePtr Compiler::impImportLdvirtftn(GenTreePtr thisPtr,
+ CORINFO_RESOLVED_TOKEN* pResolvedToken,
+ CORINFO_CALL_INFO* pCallInfo)
+{
+ if ((pCallInfo->methodFlags & CORINFO_FLG_EnC) && !(pCallInfo->classFlags & CORINFO_FLG_INTERFACE))
+ {
+ NO_WAY("Virtual call to a function added via EnC is not supported");
+ }
+
+#ifdef FEATURE_READYTORUN_COMPILER
+ if (opts.IsReadyToRun() && !pCallInfo->exactContextNeedsRuntimeLookup)
+ {
+ GenTreeCall* call = gtNewHelperCallNode(CORINFO_HELP_READYTORUN_VIRTUAL_FUNC_PTR, TYP_I_IMPL, GTF_EXCEPT,
+ gtNewArgList(thisPtr));
+
+ call->setEntryPoint(pCallInfo->codePointerLookup.constLookup);
+
+ return call;
+ }
+#endif
+
+ // Get the exact descriptor for the static callsite
+ GenTreePtr exactTypeDesc = impParentClassTokenToHandle(pResolvedToken);
+ if (exactTypeDesc == nullptr)
+ { // compDonotInline()
+ return nullptr;
+ }
+
+ GenTreePtr exactMethodDesc = impTokenToHandle(pResolvedToken);
+ if (exactMethodDesc == nullptr)
+ { // compDonotInline()
+ return nullptr;
+ }
+
+ GenTreeArgList* helpArgs = gtNewArgList(exactMethodDesc);
+
+ helpArgs = gtNewListNode(exactTypeDesc, helpArgs);
+
+ helpArgs = gtNewListNode(thisPtr, helpArgs);
+
+ // Call helper function. This gets the target address of the final destination callsite.
+
+ return gtNewHelperCallNode(CORINFO_HELP_VIRTUAL_FUNC_PTR, TYP_I_IMPL, GTF_EXCEPT, helpArgs);
+}
+
+/*****************************************************************************
+ *
+ * Build and import a box node
+ */
+
+void Compiler::impImportAndPushBox(CORINFO_RESOLVED_TOKEN* pResolvedToken)
+{
+ // Get the tree for the type handle for the boxed object. In the case
+ // of shared generic code or ngen'd code this might be an embedded
+ // computation.
+ // Note we can only box do it if the class construtor has been called
+ // We can always do it on primitive types
+
+ GenTreePtr op1 = nullptr;
+ GenTreePtr op2 = nullptr;
+ var_types lclTyp;
+
+ impSpillSpecialSideEff();
+
+ // Now get the expression to box from the stack.
+ CORINFO_CLASS_HANDLE operCls;
+ GenTreePtr exprToBox = impPopStack(operCls).val;
+
+ CorInfoHelpFunc boxHelper = info.compCompHnd->getBoxHelper(pResolvedToken->hClass);
+ if (boxHelper == CORINFO_HELP_BOX)
+ {
+ // we are doing 'normal' boxing. This means that we can inline the box operation
+ // Box(expr) gets morphed into
+ // temp = new(clsHnd)
+ // cpobj(temp+4, expr, clsHnd)
+ // push temp
+ // The code paths differ slightly below for structs and primitives because
+ // "cpobj" differs in these cases. In one case you get
+ // impAssignStructPtr(temp+4, expr, clsHnd)
+ // and the other you get
+ // *(temp+4) = expr
+
+ if (impBoxTempInUse || impBoxTemp == BAD_VAR_NUM)
+ {
+ impBoxTemp = lvaGrabTemp(true DEBUGARG("Box Helper"));
+ }
+
+ // needs to stay in use until this box expression is appended
+ // some other node. We approximate this by keeping it alive until
+ // the opcode stack becomes empty
+ impBoxTempInUse = true;
+
+#ifdef FEATURE_READYTORUN_COMPILER
+ bool usingReadyToRunHelper = false;
+
+ if (opts.IsReadyToRun())
+ {
+ op1 = impReadyToRunHelperToTree(pResolvedToken, CORINFO_HELP_READYTORUN_NEW, TYP_REF);
+ usingReadyToRunHelper = (op1 != NULL);
+ }
+
+ if (!usingReadyToRunHelper)
+#endif
+ {
+ // TODO: ReadyToRun: When generic dictionary lookups are necessary, replace the lookup call
+ // and the newfast call with a single call to a dynamic R2R cell that will:
+ // 1) Load the context
+ // 2) Perform the generic dictionary lookup and caching, and generate the appropriate stub
+ // 3) Allocate and return the new object for boxing
+ // Reason: performance (today, we'll always use the slow helper for the R2R generics case)
+
+ // Ensure that the value class is restored
+ op2 = impTokenToHandle(pResolvedToken, nullptr, TRUE /* mustRestoreHandle */);
+ if (op2 == nullptr)
+ { // compDonotInline()
+ return;
+ }
+
+ op1 = gtNewHelperCallNode(info.compCompHnd->getNewHelper(pResolvedToken, info.compMethodHnd), TYP_REF, 0,
+ gtNewArgList(op2));
+ }
+
+ /* Remember that this basic block contains 'new' of an array */
+ compCurBB->bbFlags |= BBF_HAS_NEWOBJ;
+
+ GenTreePtr asg = gtNewTempAssign(impBoxTemp, op1);
+
+ GenTreePtr asgStmt = impAppendTree(asg, (unsigned)CHECK_SPILL_NONE, impCurStmtOffs);
+
+ op1 = gtNewLclvNode(impBoxTemp, TYP_REF);
+ op2 = gtNewIconNode(sizeof(void*), TYP_I_IMPL);
+ op1 = gtNewOperNode(GT_ADD, TYP_BYREF, op1, op2);
+
+ if (varTypeIsStruct(exprToBox))
+ {
+ assert(info.compCompHnd->getClassSize(pResolvedToken->hClass) == info.compCompHnd->getClassSize(operCls));
+ op1 = impAssignStructPtr(op1, exprToBox, operCls, (unsigned)CHECK_SPILL_ALL);
+ }
+ else
+ {
+ lclTyp = exprToBox->TypeGet();
+ if (lclTyp == TYP_BYREF)
+ {
+ lclTyp = TYP_I_IMPL;
+ }
+ CorInfoType jitType = info.compCompHnd->asCorInfoType(pResolvedToken->hClass);
+ if (impIsPrimitive(jitType))
+ {
+ lclTyp = JITtype2varType(jitType);
+ }
+ assert(genActualType(exprToBox->TypeGet()) == genActualType(lclTyp) ||
+ varTypeIsFloating(lclTyp) == varTypeIsFloating(exprToBox->TypeGet()));
+ var_types srcTyp = exprToBox->TypeGet();
+ var_types dstTyp = lclTyp;
+
+ if (srcTyp != dstTyp)
+ {
+ assert((varTypeIsFloating(srcTyp) && varTypeIsFloating(dstTyp)) ||
+ (varTypeIsIntegral(srcTyp) && varTypeIsIntegral(dstTyp)));
+ exprToBox = gtNewCastNode(dstTyp, exprToBox, dstTyp);
+ }
+ op1 = gtNewAssignNode(gtNewOperNode(GT_IND, lclTyp, op1), exprToBox);
+ }
+
+ op2 = gtNewLclvNode(impBoxTemp, TYP_REF);
+ op1 = gtNewOperNode(GT_COMMA, TYP_REF, op1, op2);
+
+ // Record that this is a "box" node.
+ op1 = new (this, GT_BOX) GenTreeBox(TYP_REF, op1, asgStmt);
+
+ // If it is a value class, mark the "box" node. We can use this information
+ // to optimise several cases:
+ // "box(x) == null" --> false
+ // "(box(x)).CallAnInterfaceMethod(...)" --> "(&x).CallAValueTypeMethod"
+ // "(box(x)).CallAnObjectMethod(...)" --> "(&x).CallAValueTypeMethod"
+
+ op1->gtFlags |= GTF_BOX_VALUE;
+ assert(op1->IsBoxedValue());
+ assert(asg->gtOper == GT_ASG);
+ }
+ else
+ {
+ // Don't optimize, just call the helper and be done with it
+
+ // Ensure that the value class is restored
+ op2 = impTokenToHandle(pResolvedToken, nullptr, TRUE /* mustRestoreHandle */);
+ if (op2 == nullptr)
+ { // compDonotInline()
+ return;
+ }
+
+ GenTreeArgList* args = gtNewArgList(op2, impGetStructAddr(exprToBox, operCls, (unsigned)CHECK_SPILL_ALL, true));
+ op1 = gtNewHelperCallNode(boxHelper, TYP_REF, GTF_EXCEPT, args);
+ }
+
+ /* Push the result back on the stack, */
+ /* even if clsHnd is a value class we want the TI_REF */
+ typeInfo tiRetVal = typeInfo(TI_REF, info.compCompHnd->getTypeForBox(pResolvedToken->hClass));
+ impPushOnStack(op1, tiRetVal);
+}
+
+//------------------------------------------------------------------------
+// impImportNewObjArray: Build and import `new` of multi-dimmensional array
+//
+// Arguments:
+// pResolvedToken - The CORINFO_RESOLVED_TOKEN that has been initialized
+// by a call to CEEInfo::resolveToken().
+// pCallInfo - The CORINFO_CALL_INFO that has been initialized
+// by a call to CEEInfo::getCallInfo().
+//
+// Assumptions:
+// The multi-dimensional array constructor arguments (array dimensions) are
+// pushed on the IL stack on entry to this method.
+//
+// Notes:
+// Multi-dimensional array constructors are imported as calls to a JIT
+// helper, not as regular calls.
+
+void Compiler::impImportNewObjArray(CORINFO_RESOLVED_TOKEN* pResolvedToken, CORINFO_CALL_INFO* pCallInfo)
+{
+ GenTreePtr classHandle = impParentClassTokenToHandle(pResolvedToken);
+ if (classHandle == nullptr)
+ { // compDonotInline()
+ return;
+ }
+
+ assert(pCallInfo->sig.numArgs);
+
+ GenTreePtr node;
+ GenTreeArgList* args;
+
+ //
+ // There are two different JIT helpers that can be used to allocate
+ // multi-dimensional arrays:
+ //
+ // - CORINFO_HELP_NEW_MDARR - takes the array dimensions as varargs.
+ // This variant is deprecated. It should be eventually removed.
+ //
+ // - CORINFO_HELP_NEW_MDARR_NONVARARG - takes the array dimensions as
+ // pointer to block of int32s. This variant is more portable.
+ //
+ // The non-varargs helper is enabled for CoreRT only for now. Enabling this
+ // unconditionally would require ReadyToRun version bump.
+ //
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if COR_JIT_EE_VERSION > 460
+ if (!opts.IsReadyToRun() || (eeGetEEInfo()->targetAbi == CORINFO_CORERT_ABI))
+ {
+ LclVarDsc* newObjArrayArgsVar;
+
+ // Reuse the temp used to pass the array dimensions to avoid bloating
+ // the stack frame in case there are multiple calls to multi-dim array
+ // constructors within a single method.
+ if (lvaNewObjArrayArgs == BAD_VAR_NUM)
+ {
+ lvaNewObjArrayArgs = lvaGrabTemp(false DEBUGARG("NewObjArrayArgs"));
+ lvaTable[lvaNewObjArrayArgs].lvType = TYP_BLK;
+ lvaTable[lvaNewObjArrayArgs].lvExactSize = 0;
+ }
+
+ // Increase size of lvaNewObjArrayArgs to be the largest size needed to hold 'numArgs' integers
+ // for our call to CORINFO_HELP_NEW_MDARR_NONVARARG.
+ lvaTable[lvaNewObjArrayArgs].lvExactSize =
+ max(lvaTable[lvaNewObjArrayArgs].lvExactSize, pCallInfo->sig.numArgs * sizeof(INT32));
+
+ // The side-effects may include allocation of more multi-dimensional arrays. Spill all side-effects
+ // to ensure that the shared lvaNewObjArrayArgs local variable is only ever used to pass arguments
+ // to one allocation at a time.
+ impSpillSideEffects(true, (unsigned)CHECK_SPILL_ALL DEBUGARG("impImportNewObjArray"));
+
+ //
+ // The arguments of the CORINFO_HELP_NEW_MDARR_NONVARARG helper are:
+ // - Array class handle
+ // - Number of dimension arguments
+ // - Pointer to block of int32 dimensions - address of lvaNewObjArrayArgs temp.
+ //
+
+ node = gtNewLclvNode(lvaNewObjArrayArgs, TYP_BLK);
+ node = gtNewOperNode(GT_ADDR, TYP_I_IMPL, node);
+
+ // Pop dimension arguments from the stack one at a time and store it
+ // into lvaNewObjArrayArgs temp.
+ for (int i = pCallInfo->sig.numArgs - 1; i >= 0; i--)
+ {
+ GenTreePtr arg = impImplicitIorI4Cast(impPopStack().val, TYP_INT);
+
+ GenTreePtr dest = gtNewLclvNode(lvaNewObjArrayArgs, TYP_BLK);
+ dest = gtNewOperNode(GT_ADDR, TYP_I_IMPL, dest);
+ dest = gtNewOperNode(GT_ADD, TYP_I_IMPL, dest,
+ new (this, GT_CNS_INT) GenTreeIntCon(TYP_I_IMPL, sizeof(INT32) * i));
+ dest = gtNewOperNode(GT_IND, TYP_INT, dest);
+
+ node = gtNewOperNode(GT_COMMA, node->TypeGet(), gtNewAssignNode(dest, arg), node);
+ }
+
+ args = gtNewArgList(node);
+
+ // pass number of arguments to the helper
+ args = gtNewListNode(gtNewIconNode(pCallInfo->sig.numArgs), args);
+
+ args = gtNewListNode(classHandle, args);
+
+ node = gtNewHelperCallNode(CORINFO_HELP_NEW_MDARR_NONVARARG, TYP_REF, 0, args);
+ }
+ else
+#endif
+ {
+ //
+ // The varargs helper needs the type and method handles as last
+ // and last-1 param (this is a cdecl call, so args will be
+ // pushed in reverse order on the CPU stack)
+ //
+
+ args = gtNewArgList(classHandle);
+
+ // pass number of arguments to the helper
+ args = gtNewListNode(gtNewIconNode(pCallInfo->sig.numArgs), args);
+
+ unsigned argFlags = 0;
+ args = impPopList(pCallInfo->sig.numArgs, &argFlags, &pCallInfo->sig, args);
+
+ node = gtNewHelperCallNode(CORINFO_HELP_NEW_MDARR, TYP_REF, 0, args);
+
+ // varargs, so we pop the arguments
+ node->gtFlags |= GTF_CALL_POP_ARGS;
+
+#ifdef DEBUG
+ // At the present time we don't track Caller pop arguments
+ // that have GC references in them
+ for (GenTreeArgList* temp = args; temp; temp = temp->Rest())
+ {
+ assert(temp->Current()->gtType != TYP_REF);
+ }
+#endif
+ }
+
+ node->gtFlags |= args->gtFlags & GTF_GLOB_EFFECT;
+ node->gtCall.compileTimeHelperArgumentHandle = (CORINFO_GENERIC_HANDLE)pResolvedToken->hClass;
+
+ // Remember that this basic block contains 'new' of a md array
+ compCurBB->bbFlags |= BBF_HAS_NEWARRAY;
+
+ impPushOnStack(node, typeInfo(TI_REF, pResolvedToken->hClass));
+}
+
+GenTreePtr Compiler::impTransformThis(GenTreePtr thisPtr,
+ CORINFO_RESOLVED_TOKEN* pConstrainedResolvedToken,
+ CORINFO_THIS_TRANSFORM transform)
+{
+ switch (transform)
+ {
+ case CORINFO_DEREF_THIS:
+ {
+ GenTreePtr obj = thisPtr;
+
+ // This does a LDIND on the obj, which should be a byref. pointing to a ref
+ impBashVarAddrsToI(obj);
+ assert(genActualType(obj->gtType) == TYP_I_IMPL || obj->gtType == TYP_BYREF);
+ CorInfoType constraintTyp = info.compCompHnd->asCorInfoType(pConstrainedResolvedToken->hClass);
+
+ obj = gtNewOperNode(GT_IND, JITtype2varType(constraintTyp), obj);
+ // ldind could point anywhere, example a boxed class static int
+ obj->gtFlags |= (GTF_EXCEPT | GTF_GLOB_REF | GTF_IND_TGTANYWHERE);
+
+ return obj;
+ }
+
+ case CORINFO_BOX_THIS:
+ {
+ // Constraint calls where there might be no
+ // unboxed entry point require us to implement the call via helper.
+ // These only occur when a possible target of the call
+ // may have inherited an implementation of an interface
+ // method from System.Object or System.ValueType. The EE does not provide us with
+ // "unboxed" versions of these methods.
+
+ GenTreePtr obj = thisPtr;
+
+ assert(obj->TypeGet() == TYP_BYREF || obj->TypeGet() == TYP_I_IMPL);
+ obj = gtNewObjNode(pConstrainedResolvedToken->hClass, obj);
+ obj->gtFlags |= GTF_EXCEPT;
+
+ CorInfoType jitTyp = info.compCompHnd->asCorInfoType(pConstrainedResolvedToken->hClass);
+ var_types objType = JITtype2varType(jitTyp);
+ if (impIsPrimitive(jitTyp))
+ {
+ if (obj->OperIsBlk())
+ {
+ obj->ChangeOperUnchecked(GT_IND);
+
+ // Obj could point anywhere, example a boxed class static int
+ obj->gtFlags |= GTF_IND_TGTANYWHERE;
+ obj->gtOp.gtOp2 = nullptr; // must be zero for tree walkers
+ }
+
+ obj->gtType = JITtype2varType(jitTyp);
+ assert(varTypeIsArithmetic(obj->gtType));
+ }
+
+ // This pushes on the dereferenced byref
+ // This is then used immediately to box.
+ impPushOnStack(obj, verMakeTypeInfo(pConstrainedResolvedToken->hClass).NormaliseForStack());
+
+ // This pops off the byref-to-a-value-type remaining on the stack and
+ // replaces it with a boxed object.
+ // This is then used as the object to the virtual call immediately below.
+ impImportAndPushBox(pConstrainedResolvedToken);
+ if (compDonotInline())
+ {
+ return nullptr;
+ }
+
+ obj = impPopStack().val;
+ return obj;
+ }
+ case CORINFO_NO_THIS_TRANSFORM:
+ default:
+ return thisPtr;
+ }
+}
+
+bool Compiler::impCanPInvokeInline(var_types callRetTyp)
+{
+ return impCanPInvokeInlineCallSite(callRetTyp) && getInlinePInvokeEnabled() && (!opts.compDbgCode) &&
+ (compCodeOpt() != SMALL_CODE) && (!opts.compNoPInvokeInlineCB) // profiler is preventing inline pinvoke
+ ;
+}
+
+// Returns false only if the callsite really cannot be inlined. Ignores global variables
+// like debugger, profiler etc.
+bool Compiler::impCanPInvokeInlineCallSite(var_types callRetTyp)
+{
+ return
+ // We have to disable pinvoke inlining inside of filters
+ // because in case the main execution (i.e. in the try block) is inside
+ // unmanaged code, we cannot reuse the inlined stub (we still need the
+ // original state until we are in the catch handler)
+ (!bbInFilterILRange(compCurBB)) &&
+ // We disable pinvoke inlining inside handlers since the GSCookie is
+ // in the inlined Frame (see CORINFO_EE_INFO::InlinedCallFrameInfo::offsetOfGSCookie),
+ // but this would not protect framelets/return-address of handlers.
+ !compCurBB->hasHndIndex() &&
+#ifdef _TARGET_AMD64_
+ // Turns out JIT64 doesn't perform PInvoke inlining inside try regions, here's an excerpt of
+ // the comment from JIT64 explaining why:
+ //
+ //// [VSWhidbey: 611015] - because the jitted code links in the Frame (instead
+ //// of the stub) we rely on the Frame not being 'active' until inside the
+ //// stub. This normally happens by the stub setting the return address
+ //// pointer in the Frame object inside the stub. On a normal return, the
+ //// return address pointer is zeroed out so the Frame can be safely re-used,
+ //// but if an exception occurs, nobody zeros out the return address pointer.
+ //// Thus if we re-used the Frame object, it would go 'active' as soon as we
+ //// link it into the Frame chain.
+ ////
+ //// Technically we only need to disable PInvoke inlining if we're in a
+ //// handler or if we're
+ //// in a try body with a catch or filter/except where other non-handler code
+ //// in this method might run and try to re-use the dirty Frame object.
+ //
+ // Now, because of this, the VM actually assumes that in 64 bit we never PInvoke
+ // inline calls on any EH construct, you can verify that on VM\ExceptionHandling.cpp:203
+ // The method responsible for resuming execution is UpdateObjectRefInResumeContextCallback
+ // you can see how it aligns with JIT64 policy of not inlining PInvoke calls almost right
+ // at the beginning of the body of the method.
+ !compCurBB->hasTryIndex() &&
+#endif
+ (!impLocAllocOnStack()) && (callRetTyp != TYP_STRUCT);
+}
+
+void Compiler::impCheckForPInvokeCall(GenTreePtr call,
+ CORINFO_METHOD_HANDLE methHnd,
+ CORINFO_SIG_INFO* sig,
+ unsigned mflags)
+{
+ var_types callRetTyp = JITtype2varType(sig->retType);
+ CorInfoUnmanagedCallConv unmanagedCallConv;
+
+ // If VM flagged it as Pinvoke, flag the call node accordingly
+ if ((mflags & CORINFO_FLG_PINVOKE) != 0)
+ {
+ call->gtCall.gtCallMoreFlags |= GTF_CALL_M_PINVOKE;
+ }
+
+ if (methHnd)
+ {
+ if ((mflags & CORINFO_FLG_PINVOKE) == 0 || (mflags & CORINFO_FLG_NOSECURITYWRAP) == 0)
+ {
+ return;
+ }
+
+ unmanagedCallConv = info.compCompHnd->getUnmanagedCallConv(methHnd);
+ }
+ else
+ {
+ CorInfoCallConv callConv = CorInfoCallConv(sig->callConv & CORINFO_CALLCONV_MASK);
+ if (callConv == CORINFO_CALLCONV_NATIVEVARARG)
+ {
+ // Used by the IL Stubs.
+ callConv = CORINFO_CALLCONV_C;
+ }
+ static_assert_no_msg((unsigned)CORINFO_CALLCONV_C == (unsigned)CORINFO_UNMANAGED_CALLCONV_C);
+ static_assert_no_msg((unsigned)CORINFO_CALLCONV_STDCALL == (unsigned)CORINFO_UNMANAGED_CALLCONV_STDCALL);
+ static_assert_no_msg((unsigned)CORINFO_CALLCONV_THISCALL == (unsigned)CORINFO_UNMANAGED_CALLCONV_THISCALL);
+ unmanagedCallConv = CorInfoUnmanagedCallConv(callConv);
+
+ assert(!call->gtCall.gtCallCookie);
+ }
+
+ if (unmanagedCallConv != CORINFO_UNMANAGED_CALLCONV_C && unmanagedCallConv != CORINFO_UNMANAGED_CALLCONV_STDCALL &&
+ unmanagedCallConv != CORINFO_UNMANAGED_CALLCONV_THISCALL)
+ {
+ return;
+ }
+ optNativeCallCount++;
+
+ if (opts.compMustInlinePInvokeCalli && methHnd == nullptr)
+ {
+#ifdef _TARGET_X86_
+ // CALLI in IL stubs must be inlined
+ assert(impCanPInvokeInlineCallSite(callRetTyp));
+ assert(!info.compCompHnd->pInvokeMarshalingRequired(methHnd, sig));
+#endif // _TARGET_X86_
+ }
+ else
+ {
+ if (!impCanPInvokeInline(callRetTyp))
+ {
+ return;
+ }
+
+ if (info.compCompHnd->pInvokeMarshalingRequired(methHnd, sig))
+ {
+ return;
+ }
+ }
+
+ JITLOG((LL_INFO1000000, "\nInline a CALLI PINVOKE call from method %s", info.compFullName));
+
+ call->gtFlags |= GTF_CALL_UNMANAGED;
+ info.compCallUnmanaged++;
+
+ assert(!compIsForInlining());
+
+ // AMD64 convention is same for native and managed
+ if (unmanagedCallConv == CORINFO_UNMANAGED_CALLCONV_C)
+ {
+ call->gtFlags |= GTF_CALL_POP_ARGS;
+ }
+
+ if (unmanagedCallConv == CORINFO_UNMANAGED_CALLCONV_THISCALL)
+ {
+ call->gtCall.gtCallMoreFlags |= GTF_CALL_M_UNMGD_THISCALL;
+ }
+}
+
+GenTreePtr Compiler::impImportIndirectCall(CORINFO_SIG_INFO* sig, IL_OFFSETX ilOffset)
+{
+ var_types callRetTyp = JITtype2varType(sig->retType);
+
+ /* The function pointer is on top of the stack - It may be a
+ * complex expression. As it is evaluated after the args,
+ * it may cause registered args to be spilled. Simply spill it.
+ */
+
+ // Ignore this trivial case.
+ if (impStackTop().val->gtOper != GT_LCL_VAR)
+ {
+ impSpillStackEntry(verCurrentState.esStackDepth - 1,
+ BAD_VAR_NUM DEBUGARG(false) DEBUGARG("impImportIndirectCall"));
+ }
+
+ /* Get the function pointer */
+
+ GenTreePtr fptr = impPopStack().val;
+ assert(genActualType(fptr->gtType) == TYP_I_IMPL);
+
+#ifdef DEBUG
+ // This temporary must never be converted to a double in stress mode,
+ // because that can introduce a call to the cast helper after the
+ // arguments have already been evaluated.
+
+ if (fptr->OperGet() == GT_LCL_VAR)
+ {
+ lvaTable[fptr->gtLclVarCommon.gtLclNum].lvKeepType = 1;
+ }
+#endif
+
+ /* Create the call node */
+
+ GenTreePtr call = gtNewIndCallNode(fptr, callRetTyp, nullptr, ilOffset);
+
+ call->gtFlags |= GTF_EXCEPT | (fptr->gtFlags & GTF_GLOB_EFFECT);
+
+ return call;
+}
+
+/*****************************************************************************/
+
+void Compiler::impPopArgsForUnmanagedCall(GenTreePtr call, CORINFO_SIG_INFO* sig)
+{
+ assert(call->gtFlags & GTF_CALL_UNMANAGED);
+
+ /* Since we push the arguments in reverse order (i.e. right -> left)
+ * spill any side effects from the stack
+ *
+ * OBS: If there is only one side effect we do not need to spill it
+ * thus we have to spill all side-effects except last one
+ */
+
+ unsigned lastLevelWithSideEffects = UINT_MAX;
+
+ unsigned argsToReverse = sig->numArgs;
+
+ // For "thiscall", the first argument goes in a register. Since its
+ // order does not need to be changed, we do not need to spill it
+
+ if (call->gtCall.gtCallMoreFlags & GTF_CALL_M_UNMGD_THISCALL)
+ {
+ assert(argsToReverse);
+ argsToReverse--;
+ }
+
+#ifndef _TARGET_X86_
+ // Don't reverse args on ARM or x64 - first four args always placed in regs in order
+ argsToReverse = 0;
+#endif
+
+ for (unsigned level = verCurrentState.esStackDepth - argsToReverse; level < verCurrentState.esStackDepth; level++)
+ {
+ if (verCurrentState.esStack[level].val->gtFlags & GTF_ORDER_SIDEEFF)
+ {
+ assert(lastLevelWithSideEffects == UINT_MAX);
+
+ impSpillStackEntry(level,
+ BAD_VAR_NUM DEBUGARG(false) DEBUGARG("impPopArgsForUnmanagedCall - other side effect"));
+ }
+ else if (verCurrentState.esStack[level].val->gtFlags & GTF_SIDE_EFFECT)
+ {
+ if (lastLevelWithSideEffects != UINT_MAX)
+ {
+ /* We had a previous side effect - must spill it */
+ impSpillStackEntry(lastLevelWithSideEffects,
+ BAD_VAR_NUM DEBUGARG(false) DEBUGARG("impPopArgsForUnmanagedCall - side effect"));
+
+ /* Record the level for the current side effect in case we will spill it */
+ lastLevelWithSideEffects = level;
+ }
+ else
+ {
+ /* This is the first side effect encountered - record its level */
+
+ lastLevelWithSideEffects = level;
+ }
+ }
+ }
+
+ /* The argument list is now "clean" - no out-of-order side effects
+ * Pop the argument list in reverse order */
+
+ unsigned argFlags = 0;
+ GenTreePtr args = call->gtCall.gtCallArgs =
+ impPopRevList(sig->numArgs, &argFlags, sig, sig->numArgs - argsToReverse);
+
+ if (call->gtCall.gtCallMoreFlags & GTF_CALL_M_UNMGD_THISCALL)
+ {
+ GenTreePtr thisPtr = args->Current();
+ impBashVarAddrsToI(thisPtr);
+ assert(thisPtr->TypeGet() == TYP_I_IMPL || thisPtr->TypeGet() == TYP_BYREF);
+ }
+
+ if (args)
+ {
+ call->gtFlags |= args->gtFlags & GTF_GLOB_EFFECT;
+ }
+}
+
+//------------------------------------------------------------------------
+// impInitClass: Build a node to initialize the class before accessing the
+// field if necessary
+//
+// Arguments:
+// pResolvedToken - The CORINFO_RESOLVED_TOKEN that has been initialized
+// by a call to CEEInfo::resolveToken().
+//
+// Return Value: If needed, a pointer to the node that will perform the class
+// initializtion. Otherwise, nullptr.
+//
+
+GenTreePtr Compiler::impInitClass(CORINFO_RESOLVED_TOKEN* pResolvedToken)
+{
+ CorInfoInitClassResult initClassResult =
+ info.compCompHnd->initClass(pResolvedToken->hField, info.compMethodHnd, impTokenLookupContextHandle);
+
+ if ((initClassResult & CORINFO_INITCLASS_USE_HELPER) == 0)
+ {
+ return nullptr;
+ }
+ BOOL runtimeLookup;
+
+ GenTreePtr node = impParentClassTokenToHandle(pResolvedToken, &runtimeLookup);
+
+ if (node == nullptr)
+ {
+ assert(compDonotInline());
+ return nullptr;
+ }
+
+ if (runtimeLookup)
+ {
+ node = gtNewHelperCallNode(CORINFO_HELP_INITCLASS, TYP_VOID, 0, gtNewArgList(node));
+ }
+ else
+ {
+ // Call the shared non gc static helper, as its the fastest
+ node = fgGetSharedCCtor(pResolvedToken->hClass);
+ }
+
+ return node;
+}
+
+GenTreePtr Compiler::impImportStaticReadOnlyField(void* fldAddr, var_types lclTyp)
+{
+ GenTreePtr op1 = nullptr;
+
+ switch (lclTyp)
+ {
+ int ival;
+ __int64 lval;
+ double dval;
+
+ case TYP_BOOL:
+ ival = *((bool*)fldAddr);
+ goto IVAL_COMMON;
+
+ case TYP_BYTE:
+ ival = *((signed char*)fldAddr);
+ goto IVAL_COMMON;
+
+ case TYP_UBYTE:
+ ival = *((unsigned char*)fldAddr);
+ goto IVAL_COMMON;
+
+ case TYP_SHORT:
+ ival = *((short*)fldAddr);
+ goto IVAL_COMMON;
+
+ case TYP_CHAR:
+ case TYP_USHORT:
+ ival = *((unsigned short*)fldAddr);
+ goto IVAL_COMMON;
+
+ case TYP_UINT:
+ case TYP_INT:
+ ival = *((int*)fldAddr);
+ IVAL_COMMON:
+ op1 = gtNewIconNode(ival);
+ break;
+
+ case TYP_LONG:
+ case TYP_ULONG:
+ lval = *((__int64*)fldAddr);
+ op1 = gtNewLconNode(lval);
+ break;
+
+ case TYP_FLOAT:
+ dval = *((float*)fldAddr);
+ op1 = gtNewDconNode(dval);
+#if !FEATURE_X87_DOUBLES
+ // X87 stack doesn't differentiate between float/double
+ // so R4 is treated as R8, but everybody else does
+ op1->gtType = TYP_FLOAT;
+#endif // FEATURE_X87_DOUBLES
+ break;
+
+ case TYP_DOUBLE:
+ dval = *((double*)fldAddr);
+ op1 = gtNewDconNode(dval);
+ break;
+
+ default:
+ assert(!"Unexpected lclTyp");
+ break;
+ }
+
+ return op1;
+}
+
+GenTreePtr Compiler::impImportStaticFieldAccess(CORINFO_RESOLVED_TOKEN* pResolvedToken,
+ CORINFO_ACCESS_FLAGS access,
+ CORINFO_FIELD_INFO* pFieldInfo,
+ var_types lclTyp)
+{
+ GenTreePtr op1;
+
+ switch (pFieldInfo->fieldAccessor)
+ {
+ case CORINFO_FIELD_STATIC_GENERICS_STATIC_HELPER:
+ {
+ assert(!compIsForInlining());
+
+ // We first call a special helper to get the statics base pointer
+ op1 = impParentClassTokenToHandle(pResolvedToken);
+
+ // compIsForInlining() is false so we should not neve get NULL here
+ assert(op1 != nullptr);
+
+ var_types type = TYP_BYREF;
+
+ switch (pFieldInfo->helper)
+ {
+ case CORINFO_HELP_GETGENERICS_NONGCTHREADSTATIC_BASE:
+ type = TYP_I_IMPL;
+ break;
+ case CORINFO_HELP_GETGENERICS_GCSTATIC_BASE:
+ case CORINFO_HELP_GETGENERICS_NONGCSTATIC_BASE:
+ case CORINFO_HELP_GETGENERICS_GCTHREADSTATIC_BASE:
+ break;
+ default:
+ assert(!"unknown generic statics helper");
+ break;
+ }
+
+ op1 = gtNewHelperCallNode(pFieldInfo->helper, type, 0, gtNewArgList(op1));
+
+ FieldSeqNode* fs = GetFieldSeqStore()->CreateSingleton(pResolvedToken->hField);
+ op1 = gtNewOperNode(GT_ADD, type, op1,
+ new (this, GT_CNS_INT) GenTreeIntCon(TYP_I_IMPL, pFieldInfo->offset, fs));
+ }
+ break;
+
+ case CORINFO_FIELD_STATIC_SHARED_STATIC_HELPER:
+#ifdef FEATURE_READYTORUN_COMPILER
+ if (opts.IsReadyToRun())
+ {
+ unsigned callFlags = 0;
+
+ if (info.compCompHnd->getClassAttribs(pResolvedToken->hClass) & CORINFO_FLG_BEFOREFIELDINIT)
+ {
+ callFlags |= GTF_CALL_HOISTABLE;
+ }
+
+ op1 = gtNewHelperCallNode(CORINFO_HELP_READYTORUN_STATIC_BASE, TYP_BYREF, callFlags);
+
+ op1->gtCall.setEntryPoint(pFieldInfo->fieldLookup);
+ }
+ else
+#endif
+ {
+ op1 = fgGetStaticsCCtorHelper(pResolvedToken->hClass, pFieldInfo->helper);
+ }
+
+ {
+ FieldSeqNode* fs = GetFieldSeqStore()->CreateSingleton(pResolvedToken->hField);
+ op1 = gtNewOperNode(GT_ADD, op1->TypeGet(), op1,
+ new (this, GT_CNS_INT) GenTreeIntCon(TYP_INT, pFieldInfo->offset, fs));
+ }
+ break;
+
+ default:
+ if (!(access & CORINFO_ACCESS_ADDRESS))
+ {
+ // In future, it may be better to just create the right tree here instead of folding it later.
+ op1 = gtNewFieldRef(lclTyp, pResolvedToken->hField);
+
+ if (pFieldInfo->fieldFlags & CORINFO_FLG_FIELD_STATIC_IN_HEAP)
+ {
+ op1->gtType = TYP_REF; // points at boxed object
+ FieldSeqNode* firstElemFldSeq =
+ GetFieldSeqStore()->CreateSingleton(FieldSeqStore::FirstElemPseudoField);
+ op1 =
+ gtNewOperNode(GT_ADD, TYP_BYREF, op1,
+ new (this, GT_CNS_INT) GenTreeIntCon(TYP_I_IMPL, sizeof(void*), firstElemFldSeq));
+
+ if (varTypeIsStruct(lclTyp))
+ {
+ // Constructor adds GTF_GLOB_REF. Note that this is *not* GTF_EXCEPT.
+ op1 = gtNewObjNode(pFieldInfo->structType, op1);
+ }
+ else
+ {
+ op1 = gtNewOperNode(GT_IND, lclTyp, op1);
+ op1->gtFlags |= GTF_GLOB_REF | GTF_IND_NONFAULTING;
+ }
+ }
+
+ return op1;
+ }
+ else
+ {
+ void** pFldAddr = nullptr;
+ void* fldAddr = info.compCompHnd->getFieldAddress(pResolvedToken->hField, (void**)&pFldAddr);
+
+ FieldSeqNode* fldSeq = GetFieldSeqStore()->CreateSingleton(pResolvedToken->hField);
+
+ /* Create the data member node */
+ if (pFldAddr == nullptr)
+ {
+ op1 = gtNewIconHandleNode((size_t)fldAddr, GTF_ICON_STATIC_HDL, fldSeq);
+ }
+ else
+ {
+ op1 = gtNewIconHandleNode((size_t)pFldAddr, GTF_ICON_STATIC_HDL, fldSeq);
+
+ // There are two cases here, either the static is RVA based,
+ // in which case the type of the FIELD node is not a GC type
+ // and the handle to the RVA is a TYP_I_IMPL. Or the FIELD node is
+ // a GC type and the handle to it is a TYP_BYREF in the GC heap
+ // because handles to statics now go into the large object heap
+
+ var_types handleTyp = (var_types)(varTypeIsGC(lclTyp) ? TYP_BYREF : TYP_I_IMPL);
+ op1 = gtNewOperNode(GT_IND, handleTyp, op1);
+ op1->gtFlags |= GTF_IND_INVARIANT | GTF_IND_NONFAULTING;
+ }
+ }
+ break;
+ }
+
+ if (pFieldInfo->fieldFlags & CORINFO_FLG_FIELD_STATIC_IN_HEAP)
+ {
+ op1 = gtNewOperNode(GT_IND, TYP_REF, op1);
+
+ FieldSeqNode* fldSeq = GetFieldSeqStore()->CreateSingleton(FieldSeqStore::FirstElemPseudoField);
+
+ op1 = gtNewOperNode(GT_ADD, TYP_BYREF, op1,
+ new (this, GT_CNS_INT) GenTreeIntCon(TYP_I_IMPL, sizeof(void*), fldSeq));
+ }
+
+ if (!(access & CORINFO_ACCESS_ADDRESS))
+ {
+ op1 = gtNewOperNode(GT_IND, lclTyp, op1);
+ op1->gtFlags |= GTF_GLOB_REF;
+ }
+
+ return op1;
+}
+
+// In general try to call this before most of the verification work. Most people expect the access
+// exceptions before the verification exceptions. If you do this after, that usually doesn't happen. Turns
+// out if you can't access something we also think that you're unverifiable for other reasons.
+void Compiler::impHandleAccessAllowed(CorInfoIsAccessAllowedResult result, CORINFO_HELPER_DESC* helperCall)
+{
+ if (result != CORINFO_ACCESS_ALLOWED)
+ {
+ impHandleAccessAllowedInternal(result, helperCall);
+ }
+}
+
+void Compiler::impHandleAccessAllowedInternal(CorInfoIsAccessAllowedResult result, CORINFO_HELPER_DESC* helperCall)
+{
+ switch (result)
+ {
+ case CORINFO_ACCESS_ALLOWED:
+ break;
+ case CORINFO_ACCESS_ILLEGAL:
+ // if we're verifying, then we need to reject the illegal access to ensure that we don't think the
+ // method is verifiable. Otherwise, delay the exception to runtime.
+ if (compIsForImportOnly())
+ {
+ info.compCompHnd->ThrowExceptionForHelper(helperCall);
+ }
+ else
+ {
+ impInsertHelperCall(helperCall);
+ }
+ break;
+ case CORINFO_ACCESS_RUNTIME_CHECK:
+ impInsertHelperCall(helperCall);
+ break;
+ }
+}
+
+void Compiler::impInsertHelperCall(CORINFO_HELPER_DESC* helperInfo)
+{
+ // Construct the argument list
+ GenTreeArgList* args = nullptr;
+ assert(helperInfo->helperNum != CORINFO_HELP_UNDEF);
+ for (unsigned i = helperInfo->numArgs; i > 0; --i)
+ {
+ const CORINFO_HELPER_ARG& helperArg = helperInfo->args[i - 1];
+ GenTreePtr currentArg = nullptr;
+ switch (helperArg.argType)
+ {
+ case CORINFO_HELPER_ARG_TYPE_Field:
+ info.compCompHnd->classMustBeLoadedBeforeCodeIsRun(
+ info.compCompHnd->getFieldClass(helperArg.fieldHandle));
+ currentArg = gtNewIconEmbFldHndNode(helperArg.fieldHandle);
+ break;
+ case CORINFO_HELPER_ARG_TYPE_Method:
+ info.compCompHnd->methodMustBeLoadedBeforeCodeIsRun(helperArg.methodHandle);
+ currentArg = gtNewIconEmbMethHndNode(helperArg.methodHandle);
+ break;
+ case CORINFO_HELPER_ARG_TYPE_Class:
+ info.compCompHnd->classMustBeLoadedBeforeCodeIsRun(helperArg.classHandle);
+ currentArg = gtNewIconEmbClsHndNode(helperArg.classHandle);
+ break;
+ case CORINFO_HELPER_ARG_TYPE_Module:
+ currentArg = gtNewIconEmbScpHndNode(helperArg.moduleHandle);
+ break;
+ case CORINFO_HELPER_ARG_TYPE_Const:
+ currentArg = gtNewIconNode(helperArg.constant);
+ break;
+ default:
+ NO_WAY("Illegal helper arg type");
+ }
+ args = (currentArg == nullptr) ? gtNewArgList(currentArg) : gtNewListNode(currentArg, args);
+ }
+
+ /* TODO-Review:
+ * Mark as CSE'able, and hoistable. Consider marking hoistable unless you're in the inlinee.
+ * Also, consider sticking this in the first basic block.
+ */
+ GenTreePtr callout = gtNewHelperCallNode(helperInfo->helperNum, TYP_VOID, GTF_EXCEPT, args);
+ impAppendTree(callout, (unsigned)CHECK_SPILL_NONE, impCurStmtOffs);
+}
+
+void Compiler::impInsertCalloutForDelegate(CORINFO_METHOD_HANDLE callerMethodHnd,
+ CORINFO_METHOD_HANDLE calleeMethodHnd,
+ CORINFO_CLASS_HANDLE delegateTypeHnd)
+{
+#ifdef FEATURE_CORECLR
+ if (!info.compCompHnd->isDelegateCreationAllowed(delegateTypeHnd, calleeMethodHnd))
+ {
+ // Call the JIT_DelegateSecurityCheck helper before calling the actual function.
+ // This helper throws an exception if the CLR host disallows the call.
+
+ GenTreePtr helper = gtNewHelperCallNode(CORINFO_HELP_DELEGATE_SECURITY_CHECK, TYP_VOID, GTF_EXCEPT,
+ gtNewArgList(gtNewIconEmbClsHndNode(delegateTypeHnd),
+ gtNewIconEmbMethHndNode(calleeMethodHnd)));
+ // Append the callout statement
+ impAppendTree(helper, (unsigned)CHECK_SPILL_NONE, impCurStmtOffs);
+ }
+#endif // FEATURE_CORECLR
+}
+
+// Checks whether the return types of caller and callee are compatible
+// so that callee can be tail called. Note that here we don't check
+// compatibility in IL Verifier sense, but on the lines of return type
+// sizes are equal and get returned in the same return register.
+bool Compiler::impTailCallRetTypeCompatible(var_types callerRetType,
+ CORINFO_CLASS_HANDLE callerRetTypeClass,
+ var_types calleeRetType,
+ CORINFO_CLASS_HANDLE calleeRetTypeClass)
+{
+ // Note that we can not relax this condition with genActualType() as the
+ // calling convention dictates that the caller of a function with a small
+ // typed return value is responsible for normalizing the return val.
+ if (callerRetType == calleeRetType)
+ {
+ return true;
+ }
+
+#if defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_)
+ // Jit64 compat:
+ if (callerRetType == TYP_VOID)
+ {
+ // This needs to be allowed to support the following IL pattern that Jit64 allows:
+ // tail.call
+ // pop
+ // ret
+ //
+ // Note that the above IL pattern is not valid as per IL verification rules.
+ // Therefore, only full trust code can take advantage of this pattern.
+ return true;
+ }
+
+ // These checks return true if the return value type sizes are the same and
+ // get returned in the same return register i.e. caller doesn't need to normalize
+ // return value. Some of the tail calls permitted by below checks would have
+ // been rejected by IL Verifier before we reached here. Therefore, only full
+ // trust code can make those tail calls.
+ unsigned callerRetTypeSize = 0;
+ unsigned calleeRetTypeSize = 0;
+ bool isCallerRetTypMBEnreg =
+ VarTypeIsMultiByteAndCanEnreg(callerRetType, callerRetTypeClass, &callerRetTypeSize, true);
+ bool isCalleeRetTypMBEnreg =
+ VarTypeIsMultiByteAndCanEnreg(calleeRetType, calleeRetTypeClass, &calleeRetTypeSize, true);
+
+ if (varTypeIsIntegral(callerRetType) || isCallerRetTypMBEnreg)
+ {
+ return (varTypeIsIntegral(calleeRetType) || isCalleeRetTypMBEnreg) && (callerRetTypeSize == calleeRetTypeSize);
+ }
+#endif // _TARGET_AMD64_ || _TARGET_ARM64_
+
+ return false;
+}
+
+// For prefixFlags
+enum
+{
+ PREFIX_TAILCALL_EXPLICIT = 0x00000001, // call has "tail" IL prefix
+ PREFIX_TAILCALL_IMPLICIT =
+ 0x00000010, // call is treated as having "tail" prefix even though there is no "tail" IL prefix
+ PREFIX_TAILCALL = (PREFIX_TAILCALL_EXPLICIT | PREFIX_TAILCALL_IMPLICIT),
+ PREFIX_VOLATILE = 0x00000100,
+ PREFIX_UNALIGNED = 0x00001000,
+ PREFIX_CONSTRAINED = 0x00010000,
+ PREFIX_READONLY = 0x00100000
+};
+
+/********************************************************************************
+ *
+ * Returns true if the current opcode and and the opcodes following it correspond
+ * to a supported tail call IL pattern.
+ *
+ */
+bool Compiler::impIsTailCallILPattern(bool tailPrefixed,
+ OPCODE curOpcode,
+ const BYTE* codeAddrOfNextOpcode,
+ const BYTE* codeEnd,
+ bool isRecursive,
+ bool* isCallPopAndRet /* = nullptr */)
+{
+ // Bail out if the current opcode is not a call.
+ if (!impOpcodeIsCallOpcode(curOpcode))
+ {
+ return false;
+ }
+
+#if !FEATURE_TAILCALL_OPT_SHARED_RETURN
+ // If shared ret tail opt is not enabled, we will enable
+ // it for recursive methods.
+ if (isRecursive)
+#endif
+ {
+ // we can actually handle if the ret is in a fallthrough block, as long as that is the only part of the
+ // sequence. Make sure we don't go past the end of the IL however.
+ codeEnd = min(codeEnd + 1, info.compCode + info.compILCodeSize);
+ }
+
+ // Bail out if there is no next opcode after call
+ if (codeAddrOfNextOpcode >= codeEnd)
+ {
+ return false;
+ }
+
+ // Scan the opcodes to look for the following IL patterns if either
+ // i) the call is not tail prefixed (i.e. implicit tail call) or
+ // ii) if tail prefixed, IL verification is not needed for the method.
+ //
+ // Only in the above two cases we can allow the below tail call patterns
+ // violating ECMA spec.
+ //
+ // Pattern1:
+ // call
+ // nop*
+ // ret
+ //
+ // Pattern2:
+ // call
+ // nop*
+ // pop
+ // nop*
+ // ret
+ int cntPop = 0;
+ OPCODE nextOpcode;
+
+#ifdef _TARGET_AMD64_
+ do
+ {
+ nextOpcode = (OPCODE)getU1LittleEndian(codeAddrOfNextOpcode);
+ codeAddrOfNextOpcode += sizeof(__int8);
+ } while ((codeAddrOfNextOpcode < codeEnd) && // Haven't reached end of method
+ (!tailPrefixed || !tiVerificationNeeded) && // Not ".tail" prefixed or method requires no IL verification
+ ((nextOpcode == CEE_NOP) || ((nextOpcode == CEE_POP) && (++cntPop == 1)))); // Next opcode = nop or exactly
+ // one pop seen so far.
+#else
+ nextOpcode = (OPCODE)getU1LittleEndian(codeAddrOfNextOpcode);
+#endif
+
+ if (isCallPopAndRet)
+ {
+ // Allow call+pop+ret to be tail call optimized if caller ret type is void
+ *isCallPopAndRet = (nextOpcode == CEE_RET) && (cntPop == 1);
+ }
+
+#ifdef _TARGET_AMD64_
+ // Jit64 Compat:
+ // Tail call IL pattern could be either of the following
+ // 1) call/callvirt/calli + ret
+ // 2) call/callvirt/calli + pop + ret in a method returning void.
+ return (nextOpcode == CEE_RET) && ((cntPop == 0) || ((cntPop == 1) && (info.compRetType == TYP_VOID)));
+#else //!_TARGET_AMD64_
+ return (nextOpcode == CEE_RET) && (cntPop == 0);
+#endif
+}
+
+/*****************************************************************************
+ *
+ * Determine whether the call could be converted to an implicit tail call
+ *
+ */
+bool Compiler::impIsImplicitTailCallCandidate(
+ OPCODE opcode, const BYTE* codeAddrOfNextOpcode, const BYTE* codeEnd, int prefixFlags, bool isRecursive)
+{
+
+#if FEATURE_TAILCALL_OPT
+ if (!opts.compTailCallOpt)
+ {
+ return false;
+ }
+
+ if (opts.compDbgCode || opts.MinOpts())
+ {
+ return false;
+ }
+
+ // must not be tail prefixed
+ if (prefixFlags & PREFIX_TAILCALL_EXPLICIT)
+ {
+ return false;
+ }
+
+#if !FEATURE_TAILCALL_OPT_SHARED_RETURN
+ // the block containing call is marked as BBJ_RETURN
+ // We allow shared ret tail call optimization on recursive calls even under
+ // !FEATURE_TAILCALL_OPT_SHARED_RETURN.
+ if (!isRecursive && (compCurBB->bbJumpKind != BBJ_RETURN))
+ return false;
+#endif // !FEATURE_TAILCALL_OPT_SHARED_RETURN
+
+ // must be call+ret or call+pop+ret
+ if (!impIsTailCallILPattern(false, opcode, codeAddrOfNextOpcode, codeEnd, isRecursive))
+ {
+ return false;
+ }
+
+ return true;
+#else
+ return false;
+#endif // FEATURE_TAILCALL_OPT
+}
+
+//------------------------------------------------------------------------
+// impImportCall: import a call-inspiring opcode
+//
+// Arguments:
+// opcode - opcode that inspires the call
+// pResolvedToken - resolved token for the call target
+// pConstrainedResolvedToken - resolved constraint token (or nullptr)
+// newObjThis - tree for this pointer or uninitalized newobj temp (or nullptr)
+// prefixFlags - IL prefix flags for the call
+// callInfo - EE supplied info for the call
+// rawILOffset - IL offset of the opcode
+//
+// Returns:
+// Type of the call's return value.
+//
+// Notes:
+// opcode can be CEE_CALL, CEE_CALLI, CEE_CALLVIRT, or CEE_NEWOBJ.
+//
+// For CEE_NEWOBJ, newobjThis should be the temp grabbed for the allocated
+// uninitalized object.
+
+#ifdef _PREFAST_
+#pragma warning(push)
+#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
+#endif
+
+var_types Compiler::impImportCall(OPCODE opcode,
+ CORINFO_RESOLVED_TOKEN* pResolvedToken,
+ CORINFO_RESOLVED_TOKEN* pConstrainedResolvedToken,
+ GenTreePtr newobjThis,
+ int prefixFlags,
+ CORINFO_CALL_INFO* callInfo,
+ IL_OFFSET rawILOffset)
+{
+ assert(opcode == CEE_CALL || opcode == CEE_CALLVIRT || opcode == CEE_NEWOBJ || opcode == CEE_CALLI);
+
+ IL_OFFSETX ilOffset = impCurILOffset(rawILOffset, true);
+ var_types callRetTyp = TYP_COUNT;
+ CORINFO_SIG_INFO* sig = nullptr;
+ CORINFO_METHOD_HANDLE methHnd = nullptr;
+ CORINFO_CLASS_HANDLE clsHnd = nullptr;
+ unsigned clsFlags = 0;
+ unsigned mflags = 0;
+ unsigned argFlags = 0;
+ GenTreePtr call = nullptr;
+ GenTreeArgList* args = nullptr;
+ CORINFO_THIS_TRANSFORM constraintCallThisTransform = CORINFO_NO_THIS_TRANSFORM;
+ CORINFO_CONTEXT_HANDLE exactContextHnd = nullptr;
+ BOOL exactContextNeedsRuntimeLookup = FALSE;
+ bool canTailCall = true;
+ const char* szCanTailCallFailReason = nullptr;
+ int tailCall = prefixFlags & PREFIX_TAILCALL;
+ bool readonlyCall = (prefixFlags & PREFIX_READONLY) != 0;
+
+ // Synchronized methods need to call CORINFO_HELP_MON_EXIT at the end. We could
+ // do that before tailcalls, but that is probably not the intended
+ // semantic. So just disallow tailcalls from synchronized methods.
+ // Also, popping arguments in a varargs function is more work and NYI
+ // If we have a security object, we have to keep our frame around for callers
+ // to see any imperative security.
+ if (info.compFlags & CORINFO_FLG_SYNCH)
+ {
+ canTailCall = false;
+ szCanTailCallFailReason = "Caller is synchronized";
+ }
+#if !FEATURE_FIXED_OUT_ARGS
+ else if (info.compIsVarArgs)
+ {
+ canTailCall = false;
+ szCanTailCallFailReason = "Caller is varargs";
+ }
+#endif // FEATURE_FIXED_OUT_ARGS
+ else if (opts.compNeedSecurityCheck)
+ {
+ canTailCall = false;
+ szCanTailCallFailReason = "Caller requires a security check.";
+ }
+
+ // We only need to cast the return value of pinvoke inlined calls that return small types
+
+ // TODO-AMD64-Cleanup: Remove this when we stop interoperating with JIT64, or if we decide to stop
+ // widening everything! CoreCLR does not support JIT64 interoperation so no need to widen there.
+ // The existing x64 JIT doesn't bother widening all types to int, so we have to assume for
+ // the time being that the callee might be compiled by the other JIT and thus the return
+ // value will need to be widened by us (or not widened at all...)
+
+ // ReadyToRun code sticks with default calling convention that does not widen small return types.
+
+ bool checkForSmallType = opts.IsJit64Compat() || opts.IsReadyToRun();
+ bool bIntrinsicImported = false;
+
+ CORINFO_SIG_INFO calliSig;
+ GenTreeArgList* extraArg = nullptr;
+
+ /*-------------------------------------------------------------------------
+ * First create the call node
+ */
+
+ if (opcode == CEE_CALLI)
+ {
+ /* Get the call site sig */
+ eeGetSig(pResolvedToken->token, info.compScopeHnd, impTokenLookupContextHandle, &calliSig);
+
+ callRetTyp = JITtype2varType(calliSig.retType);
+
+ call = impImportIndirectCall(&calliSig, ilOffset);
+
+ // We don't know the target method, so we have to infer the flags, or
+ // assume the worst-case.
+ mflags = (calliSig.callConv & CORINFO_CALLCONV_HASTHIS) ? 0 : CORINFO_FLG_STATIC;
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ unsigned structSize =
+ (callRetTyp == TYP_STRUCT) ? info.compCompHnd->getClassSize(calliSig.retTypeSigClass) : 0;
+ printf("\nIn Compiler::impImportCall: opcode is %s, kind=%d, callRetType is %s, structSize is %d\n",
+ opcodeNames[opcode], callInfo->kind, varTypeName(callRetTyp), structSize);
+ }
+#endif
+ // This should be checked in impImportBlockCode.
+ assert(!compIsForInlining() || !(impInlineInfo->inlineCandidateInfo->dwRestrictions & INLINE_RESPECT_BOUNDARY));
+
+ sig = &calliSig;
+
+#ifdef DEBUG
+ // We cannot lazily obtain the signature of a CALLI call because it has no method
+ // handle that we can use, so we need to save its full call signature here.
+ assert(call->gtCall.callSig == nullptr);
+ call->gtCall.callSig = new (this, CMK_CorSig) CORINFO_SIG_INFO;
+ *call->gtCall.callSig = calliSig;
+#endif // DEBUG
+ }
+ else // (opcode != CEE_CALLI)
+ {
+ CorInfoIntrinsics intrinsicID = CORINFO_INTRINSIC_Count;
+
+ // Passing CORINFO_CALLINFO_ALLOWINSTPARAM indicates that this JIT is prepared to
+ // supply the instantiation parameters necessary to make direct calls to underlying
+ // shared generic code, rather than calling through instantiating stubs. If the
+ // returned signature has CORINFO_CALLCONV_PARAMTYPE then this indicates that the JIT
+ // must indeed pass an instantiation parameter.
+
+ methHnd = callInfo->hMethod;
+
+ sig = &(callInfo->sig);
+ callRetTyp = JITtype2varType(sig->retType);
+
+ mflags = callInfo->methodFlags;
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ unsigned structSize = (callRetTyp == TYP_STRUCT) ? info.compCompHnd->getClassSize(sig->retTypeSigClass) : 0;
+ printf("\nIn Compiler::impImportCall: opcode is %s, kind=%d, callRetType is %s, structSize is %d\n",
+ opcodeNames[opcode], callInfo->kind, varTypeName(callRetTyp), structSize);
+ }
+#endif
+ if (compIsForInlining())
+ {
+ /* Does this call site have security boundary restrictions? */
+
+ if (impInlineInfo->inlineCandidateInfo->dwRestrictions & INLINE_RESPECT_BOUNDARY)
+ {
+ compInlineResult->NoteFatal(InlineObservation::CALLSITE_CROSS_BOUNDARY_SECURITY);
+ return callRetTyp;
+ }
+
+ /* Does the inlinee need a security check token on the frame */
+
+ if (mflags & CORINFO_FLG_SECURITYCHECK)
+ {
+ compInlineResult->NoteFatal(InlineObservation::CALLEE_NEEDS_SECURITY_CHECK);
+ return callRetTyp;
+ }
+
+ /* Does the inlinee use StackCrawlMark */
+
+ if (mflags & CORINFO_FLG_DONT_INLINE_CALLER)
+ {
+ compInlineResult->NoteFatal(InlineObservation::CALLEE_STACK_CRAWL_MARK);
+ return callRetTyp;
+ }
+
+ /* For now ignore delegate invoke */
+
+ if (mflags & CORINFO_FLG_DELEGATE_INVOKE)
+ {
+ compInlineResult->NoteFatal(InlineObservation::CALLEE_HAS_DELEGATE_INVOKE);
+ return callRetTyp;
+ }
+
+ /* For now ignore varargs */
+ if ((sig->callConv & CORINFO_CALLCONV_MASK) == CORINFO_CALLCONV_NATIVEVARARG)
+ {
+ compInlineResult->NoteFatal(InlineObservation::CALLEE_HAS_NATIVE_VARARGS);
+ return callRetTyp;
+ }
+
+ if ((sig->callConv & CORINFO_CALLCONV_MASK) == CORINFO_CALLCONV_VARARG)
+ {
+ compInlineResult->NoteFatal(InlineObservation::CALLEE_HAS_MANAGED_VARARGS);
+ return callRetTyp;
+ }
+
+ if ((mflags & CORINFO_FLG_VIRTUAL) && (sig->sigInst.methInstCount != 0) && (opcode == CEE_CALLVIRT))
+ {
+ compInlineResult->NoteFatal(InlineObservation::CALLEE_IS_GENERIC_VIRTUAL);
+ return callRetTyp;
+ }
+ }
+
+ clsHnd = pResolvedToken->hClass;
+
+ clsFlags = callInfo->classFlags;
+
+#ifdef DEBUG
+ // If this is a call to JitTestLabel.Mark, do "early inlining", and record the test attribute.
+
+ // This recognition should really be done by knowing the methHnd of the relevant Mark method(s).
+ // These should be in mscorlib.h, and available through a JIT/EE interface call.
+ const char* modName;
+ const char* className;
+ const char* methodName;
+ if ((className = eeGetClassName(clsHnd)) != nullptr &&
+ strcmp(className, "System.Runtime.CompilerServices.JitTestLabel") == 0 &&
+ (methodName = eeGetMethodName(methHnd, &modName)) != nullptr && strcmp(methodName, "Mark") == 0)
+ {
+ return impImportJitTestLabelMark(sig->numArgs);
+ }
+#endif // DEBUG
+
+ // <NICE> Factor this into getCallInfo </NICE>
+ if ((mflags & CORINFO_FLG_INTRINSIC) && !pConstrainedResolvedToken)
+ {
+ call = impIntrinsic(clsHnd, methHnd, sig, pResolvedToken->token, readonlyCall,
+ (canTailCall && (tailCall != 0)), &intrinsicID);
+
+ if (call != nullptr)
+ {
+ assert(!(mflags & CORINFO_FLG_VIRTUAL) || (mflags & CORINFO_FLG_FINAL) ||
+ (clsFlags & CORINFO_FLG_FINAL));
+
+#ifdef FEATURE_READYTORUN_COMPILER
+ if (call->OperGet() == GT_INTRINSIC)
+ {
+ if (opts.IsReadyToRun())
+ {
+ noway_assert(callInfo->kind == CORINFO_CALL);
+ call->gtIntrinsic.gtEntryPoint = callInfo->codePointerLookup.constLookup;
+ }
+ else
+ {
+ call->gtIntrinsic.gtEntryPoint.addr = nullptr;
+ }
+ }
+#endif
+
+ bIntrinsicImported = true;
+ goto DONE_CALL;
+ }
+ }
+
+#ifdef FEATURE_SIMD
+ if (featureSIMD)
+ {
+ call = impSIMDIntrinsic(opcode, newobjThis, clsHnd, methHnd, sig, pResolvedToken->token);
+ if (call != nullptr)
+ {
+ bIntrinsicImported = true;
+ goto DONE_CALL;
+ }
+ }
+#endif // FEATURE_SIMD
+
+ if ((mflags & CORINFO_FLG_VIRTUAL) && (mflags & CORINFO_FLG_EnC) && (opcode == CEE_CALLVIRT))
+ {
+ NO_WAY("Virtual call to a function added via EnC is not supported");
+ goto DONE_CALL;
+ }
+
+ if ((sig->callConv & CORINFO_CALLCONV_MASK) != CORINFO_CALLCONV_DEFAULT &&
+ (sig->callConv & CORINFO_CALLCONV_MASK) != CORINFO_CALLCONV_VARARG &&
+ (sig->callConv & CORINFO_CALLCONV_MASK) != CORINFO_CALLCONV_NATIVEVARARG)
+ {
+ BADCODE("Bad calling convention");
+ }
+
+ //-------------------------------------------------------------------------
+ // Construct the call node
+ //
+ // Work out what sort of call we're making.
+ // Dispense with virtual calls implemented via LDVIRTFTN immediately.
+
+ constraintCallThisTransform = callInfo->thisTransform;
+
+ exactContextHnd = callInfo->contextHandle;
+ exactContextNeedsRuntimeLookup = callInfo->exactContextNeedsRuntimeLookup;
+
+ // Recursive call is treaded as a loop to the begining of the method.
+ if (methHnd == info.compMethodHnd)
+ {
+#ifdef DEBUG
+ if (verbose)
+ {
+ JITDUMP("\nFound recursive call in the method. Mark BB%02u to BB%02u as having a backward branch.\n",
+ fgFirstBB->bbNum, compCurBB->bbNum);
+ }
+#endif
+ fgMarkBackwardJump(fgFirstBB, compCurBB);
+ }
+
+ switch (callInfo->kind)
+ {
+
+ case CORINFO_VIRTUALCALL_STUB:
+ {
+ assert(!(mflags & CORINFO_FLG_STATIC)); // can't call a static method
+ assert(!(clsFlags & CORINFO_FLG_VALUECLASS));
+ if (callInfo->stubLookup.lookupKind.needsRuntimeLookup)
+ {
+
+ if (compIsForInlining())
+ {
+ // Don't import runtime lookups when inlining
+ // Inlining has to be aborted in such a case
+ /* XXX Fri 3/20/2009
+ * By the way, this would never succeed. If the handle lookup is into the generic
+ * dictionary for a candidate, you'll generate different dictionary offsets and the
+ * inlined code will crash.
+ *
+ * To anyone code reviewing this, when could this ever succeed in the future? It'll
+ * always have a handle lookup. These lookups are safe intra-module, but we're just
+ * failing here.
+ */
+ compInlineResult->NoteFatal(InlineObservation::CALLSITE_HAS_COMPLEX_HANDLE);
+ return callRetTyp;
+ }
+
+ GenTreePtr stubAddr = impRuntimeLookupToTree(pResolvedToken, &callInfo->stubLookup, methHnd);
+ assert(!compDonotInline());
+
+ // This is the rough code to set up an indirect stub call
+ assert(stubAddr != nullptr);
+
+ // The stubAddr may be a
+ // complex expression. As it is evaluated after the args,
+ // it may cause registered args to be spilled. Simply spill it.
+
+ unsigned lclNum = lvaGrabTemp(true DEBUGARG("VirtualCall with runtime lookup"));
+ impAssignTempGen(lclNum, stubAddr, (unsigned)CHECK_SPILL_ALL);
+ stubAddr = gtNewLclvNode(lclNum, TYP_I_IMPL);
+
+ // Create the actual call node
+
+ assert((sig->callConv & CORINFO_CALLCONV_MASK) != CORINFO_CALLCONV_VARARG &&
+ (sig->callConv & CORINFO_CALLCONV_MASK) != CORINFO_CALLCONV_NATIVEVARARG);
+
+ call = gtNewIndCallNode(stubAddr, callRetTyp, nullptr);
+
+ call->gtFlags |= GTF_EXCEPT | (stubAddr->gtFlags & GTF_GLOB_EFFECT);
+ call->gtFlags |= GTF_CALL_VIRT_STUB;
+
+#ifdef _TARGET_X86_
+ // No tailcalls allowed for these yet...
+ canTailCall = false;
+ szCanTailCallFailReason = "VirtualCall with runtime lookup";
+#endif
+ }
+ else
+ {
+ // ok, the stub is available at compile type.
+
+ call = gtNewCallNode(CT_USER_FUNC, callInfo->hMethod, callRetTyp, nullptr, ilOffset);
+ call->gtCall.gtStubCallStubAddr = callInfo->stubLookup.constLookup.addr;
+ call->gtFlags |= GTF_CALL_VIRT_STUB;
+ assert(callInfo->stubLookup.constLookup.accessType != IAT_PPVALUE);
+ if (callInfo->stubLookup.constLookup.accessType == IAT_PVALUE)
+ {
+ call->gtCall.gtCallMoreFlags |= GTF_CALL_M_VIRTSTUB_REL_INDIRECT;
+ }
+ }
+
+#ifdef FEATURE_READYTORUN_COMPILER
+ if (opts.IsReadyToRun())
+ {
+ // Null check is sometimes needed for ready to run to handle
+ // non-virtual <-> virtual changes between versions
+ if (callInfo->nullInstanceCheck)
+ {
+ call->gtFlags |= GTF_CALL_NULLCHECK;
+ }
+ }
+#endif
+
+ break;
+ }
+
+ case CORINFO_VIRTUALCALL_VTABLE:
+ {
+ assert(!(mflags & CORINFO_FLG_STATIC)); // can't call a static method
+ assert(!(clsFlags & CORINFO_FLG_VALUECLASS));
+ call = gtNewCallNode(CT_USER_FUNC, callInfo->hMethod, callRetTyp, nullptr, ilOffset);
+ call->gtFlags |= GTF_CALL_VIRT_VTABLE;
+ break;
+ }
+
+ case CORINFO_VIRTUALCALL_LDVIRTFTN:
+ {
+ if (compIsForInlining())
+ {
+ compInlineResult->NoteFatal(InlineObservation::CALLSITE_HAS_CALL_VIA_LDVIRTFTN);
+ return callRetTyp;
+ }
+
+ assert(!(mflags & CORINFO_FLG_STATIC)); // can't call a static method
+ assert(!(clsFlags & CORINFO_FLG_VALUECLASS));
+ // OK, We've been told to call via LDVIRTFTN, so just
+ // take the call now....
+
+ args = impPopList(sig->numArgs, &argFlags, sig);
+
+ GenTreePtr thisPtr = impPopStack().val;
+ thisPtr = impTransformThis(thisPtr, pConstrainedResolvedToken, callInfo->thisTransform);
+ if (compDonotInline())
+ {
+ return callRetTyp;
+ }
+
+ // Clone the (possibly transformed) "this" pointer
+ GenTreePtr thisPtrCopy;
+ thisPtr = impCloneExpr(thisPtr, &thisPtrCopy, NO_CLASS_HANDLE, (unsigned)CHECK_SPILL_ALL,
+ nullptr DEBUGARG("LDVIRTFTN this pointer"));
+
+ GenTreePtr fptr = impImportLdvirtftn(thisPtr, pResolvedToken, callInfo);
+ if (compDonotInline())
+ {
+ return callRetTyp;
+ }
+
+ thisPtr = nullptr; // can't reuse it
+
+ // Now make an indirect call through the function pointer
+
+ unsigned lclNum = lvaGrabTemp(true DEBUGARG("VirtualCall through function pointer"));
+ impAssignTempGen(lclNum, fptr, (unsigned)CHECK_SPILL_ALL);
+ fptr = gtNewLclvNode(lclNum, TYP_I_IMPL);
+
+ // Create the actual call node
+
+ call = gtNewIndCallNode(fptr, callRetTyp, args, ilOffset);
+ call->gtCall.gtCallObjp = thisPtrCopy;
+ call->gtFlags |= GTF_EXCEPT | (fptr->gtFlags & GTF_GLOB_EFFECT);
+
+#ifdef FEATURE_READYTORUN_COMPILER
+ if (opts.IsReadyToRun())
+ {
+ // Null check is needed for ready to run to handle
+ // non-virtual <-> virtual changes between versions
+ call->gtFlags |= GTF_CALL_NULLCHECK;
+ }
+#endif
+
+ // Sine we are jumping over some code, check that its OK to skip that code
+ assert((sig->callConv & CORINFO_CALLCONV_MASK) != CORINFO_CALLCONV_VARARG &&
+ (sig->callConv & CORINFO_CALLCONV_MASK) != CORINFO_CALLCONV_NATIVEVARARG);
+ goto DONE;
+ }
+
+ case CORINFO_CALL:
+ {
+ // This is for a non-virtual, non-interface etc. call
+ call = gtNewCallNode(CT_USER_FUNC, callInfo->hMethod, callRetTyp, nullptr, ilOffset);
+
+ // We remove the nullcheck for the GetType call instrinsic.
+ // TODO-CQ: JIT64 does not introduce the null check for many more helper calls
+ // and instrinsics.
+ if (callInfo->nullInstanceCheck &&
+ !((mflags & CORINFO_FLG_INTRINSIC) != 0 && (intrinsicID == CORINFO_INTRINSIC_Object_GetType)))
+ {
+ call->gtFlags |= GTF_CALL_NULLCHECK;
+ }
+
+#ifdef FEATURE_READYTORUN_COMPILER
+ if (opts.IsReadyToRun())
+ {
+ call->gtCall.setEntryPoint(callInfo->codePointerLookup.constLookup);
+ }
+#endif
+ break;
+ }
+
+ case CORINFO_CALL_CODE_POINTER:
+ {
+ // The EE has asked us to call by computing a code pointer and then doing an
+ // indirect call. This is because a runtime lookup is required to get the code entry point.
+
+ // These calls always follow a uniform calling convention, i.e. no extra hidden params
+ assert((sig->callConv & CORINFO_CALLCONV_PARAMTYPE) == 0);
+
+ assert((sig->callConv & CORINFO_CALLCONV_MASK) != CORINFO_CALLCONV_VARARG);
+ assert((sig->callConv & CORINFO_CALLCONV_MASK) != CORINFO_CALLCONV_NATIVEVARARG);
+
+ GenTreePtr fptr =
+ impLookupToTree(pResolvedToken, &callInfo->codePointerLookup, GTF_ICON_FTN_ADDR, callInfo->hMethod);
+
+ if (compDonotInline())
+ {
+ return callRetTyp;
+ }
+
+ // Now make an indirect call through the function pointer
+
+ unsigned lclNum = lvaGrabTemp(true DEBUGARG("Indirect call through function pointer"));
+ impAssignTempGen(lclNum, fptr, (unsigned)CHECK_SPILL_ALL);
+ fptr = gtNewLclvNode(lclNum, TYP_I_IMPL);
+
+ call = gtNewIndCallNode(fptr, callRetTyp, nullptr, ilOffset);
+ call->gtFlags |= GTF_EXCEPT | (fptr->gtFlags & GTF_GLOB_EFFECT);
+ if (callInfo->nullInstanceCheck)
+ {
+ call->gtFlags |= GTF_CALL_NULLCHECK;
+ }
+
+ break;
+ }
+
+ default:
+ assert(!"unknown call kind");
+ break;
+ }
+
+ //-------------------------------------------------------------------------
+ // Set more flags
+
+ PREFIX_ASSUME(call != nullptr);
+
+ if (mflags & CORINFO_FLG_NOGCCHECK)
+ {
+ call->gtCall.gtCallMoreFlags |= GTF_CALL_M_NOGCCHECK;
+ }
+
+ // Mark call if it's one of the ones we will maybe treat as an intrinsic
+ if (intrinsicID == CORINFO_INTRINSIC_Object_GetType || intrinsicID == CORINFO_INTRINSIC_TypeEQ ||
+ intrinsicID == CORINFO_INTRINSIC_TypeNEQ || intrinsicID == CORINFO_INTRINSIC_GetCurrentManagedThread ||
+ intrinsicID == CORINFO_INTRINSIC_GetManagedThreadId)
+ {
+ call->gtCall.gtCallMoreFlags |= GTF_CALL_M_SPECIAL_INTRINSIC;
+ }
+ }
+ assert(sig);
+ assert(clsHnd || (opcode == CEE_CALLI)); // We're never verifying for CALLI, so this is not set.
+
+ /* Some sanity checks */
+
+ // CALL_VIRT and NEWOBJ must have a THIS pointer
+ assert((opcode != CEE_CALLVIRT && opcode != CEE_NEWOBJ) || (sig->callConv & CORINFO_CALLCONV_HASTHIS));
+ // static bit and hasThis are negations of one another
+ assert(((mflags & CORINFO_FLG_STATIC) != 0) == ((sig->callConv & CORINFO_CALLCONV_HASTHIS) == 0));
+ assert(call != nullptr);
+
+ /*-------------------------------------------------------------------------
+ * Check special-cases etc
+ */
+
+ /* Special case - Check if it is a call to Delegate.Invoke(). */
+
+ if (mflags & CORINFO_FLG_DELEGATE_INVOKE)
+ {
+ assert(!compIsForInlining());
+ assert(!(mflags & CORINFO_FLG_STATIC)); // can't call a static method
+ assert(mflags & CORINFO_FLG_FINAL);
+
+ /* Set the delegate flag */
+ call->gtCall.gtCallMoreFlags |= GTF_CALL_M_DELEGATE_INV;
+
+ if (callInfo->secureDelegateInvoke)
+ {
+ call->gtCall.gtCallMoreFlags |= GTF_CALL_M_SECURE_DELEGATE_INV;
+ }
+
+ if (opcode == CEE_CALLVIRT)
+ {
+ assert(mflags & CORINFO_FLG_FINAL);
+
+ /* It should have the GTF_CALL_NULLCHECK flag set. Reset it */
+ assert(call->gtFlags & GTF_CALL_NULLCHECK);
+ call->gtFlags &= ~GTF_CALL_NULLCHECK;
+ }
+ }
+
+ CORINFO_CLASS_HANDLE actualMethodRetTypeSigClass;
+ actualMethodRetTypeSigClass = sig->retTypeSigClass;
+ if (varTypeIsStruct(callRetTyp))
+ {
+ callRetTyp = impNormStructType(actualMethodRetTypeSigClass);
+ call->gtType = callRetTyp;
+ }
+
+#if !FEATURE_VARARG
+ /* Check for varargs */
+ if ((sig->callConv & CORINFO_CALLCONV_MASK) == CORINFO_CALLCONV_VARARG ||
+ (sig->callConv & CORINFO_CALLCONV_MASK) == CORINFO_CALLCONV_NATIVEVARARG)
+ {
+ BADCODE("Varargs not supported.");
+ }
+#endif // !FEATURE_VARARG
+
+ if ((sig->callConv & CORINFO_CALLCONV_MASK) == CORINFO_CALLCONV_VARARG ||
+ (sig->callConv & CORINFO_CALLCONV_MASK) == CORINFO_CALLCONV_NATIVEVARARG)
+ {
+ assert(!compIsForInlining());
+
+ /* Set the right flags */
+
+ call->gtFlags |= GTF_CALL_POP_ARGS;
+ call->gtCall.gtCallMoreFlags |= GTF_CALL_M_VARARGS;
+
+ /* Can't allow tailcall for varargs as it is caller-pop. The caller
+ will be expecting to pop a certain number of arguments, but if we
+ tailcall to a function with a different number of arguments, we
+ are hosed. There are ways around this (caller remembers esp value,
+ varargs is not caller-pop, etc), but not worth it. */
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef _TARGET_X86_
+ if (canTailCall)
+ {
+ canTailCall = false;
+ szCanTailCallFailReason = "Callee is varargs";
+ }
+#endif
+
+ /* Get the total number of arguments - this is already correct
+ * for CALLI - for methods we have to get it from the call site */
+
+ if (opcode != CEE_CALLI)
+ {
+#ifdef DEBUG
+ unsigned numArgsDef = sig->numArgs;
+#endif
+ eeGetCallSiteSig(pResolvedToken->token, info.compScopeHnd, impTokenLookupContextHandle, sig);
+
+#ifdef DEBUG
+ // We cannot lazily obtain the signature of a vararg call because using its method
+ // handle will give us only the declared argument list, not the full argument list.
+ assert(call->gtCall.callSig == nullptr);
+ call->gtCall.callSig = new (this, CMK_CorSig) CORINFO_SIG_INFO;
+ *call->gtCall.callSig = *sig;
+#endif
+
+ // For vararg calls we must be sure to load the return type of the
+ // method actually being called, as well as the return types of the
+ // specified in the vararg signature. With type equivalency, these types
+ // may not be the same.
+ if (sig->retTypeSigClass != actualMethodRetTypeSigClass)
+ {
+ if (actualMethodRetTypeSigClass != nullptr && sig->retType != CORINFO_TYPE_CLASS &&
+ sig->retType != CORINFO_TYPE_BYREF && sig->retType != CORINFO_TYPE_PTR &&
+ sig->retType != CORINFO_TYPE_VAR)
+ {
+ // Make sure that all valuetypes (including enums) that we push are loaded.
+ // This is to guarantee that if a GC is triggerred from the prestub of this methods,
+ // all valuetypes in the method signature are already loaded.
+ // We need to be able to find the size of the valuetypes, but we cannot
+ // do a class-load from within GC.
+ info.compCompHnd->classMustBeLoadedBeforeCodeIsRun(actualMethodRetTypeSigClass);
+ }
+ }
+
+ assert(numArgsDef <= sig->numArgs);
+ }
+
+ /* We will have "cookie" as the last argument but we cannot push
+ * it on the operand stack because we may overflow, so we append it
+ * to the arg list next after we pop them */
+ }
+
+ if (mflags & CORINFO_FLG_SECURITYCHECK)
+ {
+ assert(!compIsForInlining());
+
+ // Need security prolog/epilog callouts when there is
+ // imperative security in the method. This is to give security a
+ // chance to do any setup in the prolog and cleanup in the epilog if needed.
+
+ if (compIsForInlining())
+ {
+ // Cannot handle this if the method being imported is an inlinee by itself.
+ // Because inlinee method does not have its own frame.
+
+ compInlineResult->NoteFatal(InlineObservation::CALLEE_NEEDS_SECURITY_CHECK);
+ return callRetTyp;
+ }
+ else
+ {
+ tiSecurityCalloutNeeded = true;
+
+ // If the current method calls a method which needs a security check,
+ // (i.e. the method being compiled has imperative security)
+ // we need to reserve a slot for the security object in
+ // the current method's stack frame
+ opts.compNeedSecurityCheck = true;
+ }
+ }
+
+ //--------------------------- Inline NDirect ------------------------------
+
+ if (!compIsForInlining())
+ {
+ impCheckForPInvokeCall(call, methHnd, sig, mflags);
+ }
+
+ if (call->gtFlags & GTF_CALL_UNMANAGED)
+ {
+ // We set up the unmanaged call by linking the frame, disabling GC, etc
+ // This needs to be cleaned up on return
+ if (canTailCall)
+ {
+ canTailCall = false;
+ szCanTailCallFailReason = "Callee is native";
+ }
+
+ checkForSmallType = true;
+
+ impPopArgsForUnmanagedCall(call, sig);
+
+ goto DONE;
+ }
+ else if ((opcode == CEE_CALLI) && (((sig->callConv & CORINFO_CALLCONV_MASK) == CORINFO_CALLCONV_STDCALL) ||
+ ((sig->callConv & CORINFO_CALLCONV_MASK) == CORINFO_CALLCONV_C) ||
+ ((sig->callConv & CORINFO_CALLCONV_MASK) == CORINFO_CALLCONV_THISCALL) ||
+ ((sig->callConv & CORINFO_CALLCONV_MASK) == CORINFO_CALLCONV_FASTCALL)))
+ {
+ if (!info.compCompHnd->canGetCookieForPInvokeCalliSig(sig))
+ {
+ // Normally this only happens with inlining.
+ // However, a generic method (or type) being NGENd into another module
+ // can run into this issue as well. There's not an easy fall-back for NGEN
+ // so instead we fallback to JIT.
+ if (compIsForInlining())
+ {
+ compInlineResult->NoteFatal(InlineObservation::CALLSITE_CANT_EMBED_PINVOKE_COOKIE);
+ }
+ else
+ {
+ IMPL_LIMITATION("Can't get PInvoke cookie (cross module generics)");
+ }
+
+ return callRetTyp;
+ }
+
+ GenTreePtr cookie = eeGetPInvokeCookie(sig);
+
+ // This cookie is required to be either a simple GT_CNS_INT or
+ // an indirection of a GT_CNS_INT
+ //
+ GenTreePtr cookieConst = cookie;
+ if (cookie->gtOper == GT_IND)
+ {
+ cookieConst = cookie->gtOp.gtOp1;
+ }
+ assert(cookieConst->gtOper == GT_CNS_INT);
+
+ // Setting GTF_DONT_CSE on the GT_CNS_INT as well as on the GT_IND (if it exists) will ensure that
+ // we won't allow this tree to participate in any CSE logic
+ //
+ cookie->gtFlags |= GTF_DONT_CSE;
+ cookieConst->gtFlags |= GTF_DONT_CSE;
+
+ call->gtCall.gtCallCookie = cookie;
+
+ if (canTailCall)
+ {
+ canTailCall = false;
+ szCanTailCallFailReason = "PInvoke calli";
+ }
+ }
+
+ /*-------------------------------------------------------------------------
+ * Create the argument list
+ */
+
+ //-------------------------------------------------------------------------
+ // Special case - for varargs we have an implicit last argument
+
+ if ((sig->callConv & CORINFO_CALLCONV_MASK) == CORINFO_CALLCONV_VARARG)
+ {
+ assert(!compIsForInlining());
+
+ void *varCookie, *pVarCookie;
+ if (!info.compCompHnd->canGetVarArgsHandle(sig))
+ {
+ compInlineResult->NoteFatal(InlineObservation::CALLSITE_CANT_EMBED_VARARGS_COOKIE);
+ return callRetTyp;
+ }
+
+ varCookie = info.compCompHnd->getVarArgsHandle(sig, &pVarCookie);
+ assert((!varCookie) != (!pVarCookie));
+ GenTreePtr cookie = gtNewIconEmbHndNode(varCookie, pVarCookie, GTF_ICON_VARG_HDL);
+
+ assert(extraArg == nullptr);
+ extraArg = gtNewArgList(cookie);
+ }
+
+ //-------------------------------------------------------------------------
+ // Extra arg for shared generic code and array methods
+ //
+ // Extra argument containing instantiation information is passed in the
+ // following circumstances:
+ // (a) To the "Address" method on array classes; the extra parameter is
+ // the array's type handle (a TypeDesc)
+ // (b) To shared-code instance methods in generic structs; the extra parameter
+ // is the struct's type handle (a vtable ptr)
+ // (c) To shared-code per-instantiation non-generic static methods in generic
+ // classes and structs; the extra parameter is the type handle
+ // (d) To shared-code generic methods; the extra parameter is an
+ // exact-instantiation MethodDesc
+ //
+ // We also set the exact type context associated with the call so we can
+ // inline the call correctly later on.
+
+ if (sig->callConv & CORINFO_CALLCONV_PARAMTYPE)
+ {
+ assert(call->gtCall.gtCallType == CT_USER_FUNC);
+ if (clsHnd == nullptr)
+ {
+ NO_WAY("CALLI on parameterized type");
+ }
+
+ assert(opcode != CEE_CALLI);
+
+ GenTreePtr instParam;
+ BOOL runtimeLookup;
+
+ // Instantiated generic method
+ if (((SIZE_T)exactContextHnd & CORINFO_CONTEXTFLAGS_MASK) == CORINFO_CONTEXTFLAGS_METHOD)
+ {
+ CORINFO_METHOD_HANDLE exactMethodHandle =
+ (CORINFO_METHOD_HANDLE)((SIZE_T)exactContextHnd & ~CORINFO_CONTEXTFLAGS_MASK);
+
+ if (!exactContextNeedsRuntimeLookup)
+ {
+#ifdef FEATURE_READYTORUN_COMPILER
+ if (opts.IsReadyToRun())
+ {
+ instParam =
+ impReadyToRunLookupToTree(&callInfo->instParamLookup, GTF_ICON_METHOD_HDL, exactMethodHandle);
+ if (instParam == nullptr)
+ {
+ return callRetTyp;
+ }
+ }
+ else
+#endif
+ {
+ instParam = gtNewIconEmbMethHndNode(exactMethodHandle);
+ info.compCompHnd->methodMustBeLoadedBeforeCodeIsRun(exactMethodHandle);
+ }
+ }
+ else
+ {
+ instParam = impTokenToHandle(pResolvedToken, &runtimeLookup, TRUE /*mustRestoreHandle*/);
+ if (instParam == nullptr)
+ {
+ return callRetTyp;
+ }
+ }
+ }
+
+ // otherwise must be an instance method in a generic struct,
+ // a static method in a generic type, or a runtime-generated array method
+ else
+ {
+ assert(((SIZE_T)exactContextHnd & CORINFO_CONTEXTFLAGS_MASK) == CORINFO_CONTEXTFLAGS_CLASS);
+ CORINFO_CLASS_HANDLE exactClassHandle =
+ (CORINFO_CLASS_HANDLE)((SIZE_T)exactContextHnd & ~CORINFO_CONTEXTFLAGS_MASK);
+
+ if (compIsForInlining() && (clsFlags & CORINFO_FLG_ARRAY) != 0)
+ {
+ compInlineResult->NoteFatal(InlineObservation::CALLEE_IS_ARRAY_METHOD);
+ return callRetTyp;
+ }
+
+ if ((clsFlags & CORINFO_FLG_ARRAY) && readonlyCall)
+ {
+ // We indicate "readonly" to the Address operation by using a null
+ // instParam.
+ instParam = gtNewIconNode(0, TYP_REF);
+ }
+
+ if (!exactContextNeedsRuntimeLookup)
+ {
+#ifdef FEATURE_READYTORUN_COMPILER
+ if (opts.IsReadyToRun())
+ {
+ instParam =
+ impReadyToRunLookupToTree(&callInfo->instParamLookup, GTF_ICON_CLASS_HDL, exactClassHandle);
+ if (instParam == NULL)
+ {
+ return callRetTyp;
+ }
+ }
+ else
+#endif
+ {
+ instParam = gtNewIconEmbClsHndNode(exactClassHandle);
+ info.compCompHnd->classMustBeLoadedBeforeCodeIsRun(exactClassHandle);
+ }
+ }
+ else
+ {
+ instParam = impParentClassTokenToHandle(pResolvedToken, &runtimeLookup, TRUE /*mustRestoreHandle*/);
+ if (instParam == nullptr)
+ {
+ return callRetTyp;
+ }
+ }
+ }
+
+ assert(extraArg == nullptr);
+ extraArg = gtNewArgList(instParam);
+ }
+
+ // Inlining may need the exact type context (exactContextHnd) if we're inlining shared generic code, in particular
+ // to inline 'polytypic' operations such as static field accesses, type tests and method calls which
+ // rely on the exact context. The exactContextHnd is passed back to the JitInterface at appropriate points.
+ // exactContextHnd is not currently required when inlining shared generic code into shared
+ // generic code, since the inliner aborts whenever shared code polytypic operations are encountered
+ // (e.g. anything marked needsRuntimeLookup)
+ if (exactContextNeedsRuntimeLookup)
+ {
+ exactContextHnd = nullptr;
+ }
+
+ //-------------------------------------------------------------------------
+ // The main group of arguments
+
+ args = call->gtCall.gtCallArgs = impPopList(sig->numArgs, &argFlags, sig, extraArg);
+
+ if (args)
+ {
+ call->gtFlags |= args->gtFlags & GTF_GLOB_EFFECT;
+ }
+
+ //-------------------------------------------------------------------------
+ // The "this" pointer
+
+ if (!(mflags & CORINFO_FLG_STATIC) && !((opcode == CEE_NEWOBJ) && (newobjThis == nullptr)))
+ {
+ GenTreePtr obj;
+
+ if (opcode == CEE_NEWOBJ)
+ {
+ obj = newobjThis;
+ }
+ else
+ {
+ obj = impPopStack().val;
+ obj = impTransformThis(obj, pConstrainedResolvedToken, constraintCallThisTransform);
+ if (compDonotInline())
+ {
+ return callRetTyp;
+ }
+ }
+
+ /* Is this a virtual or interface call? */
+
+ if ((call->gtFlags & GTF_CALL_VIRT_KIND_MASK) != GTF_CALL_NONVIRT)
+ {
+ /* only true object pointers can be virtual */
+
+ assert(obj->gtType == TYP_REF);
+ }
+ else
+ {
+ if (impIsThis(obj))
+ {
+ call->gtCall.gtCallMoreFlags |= GTF_CALL_M_NONVIRT_SAME_THIS;
+ }
+ }
+
+ /* Store the "this" value in the call */
+
+ call->gtFlags |= obj->gtFlags & GTF_GLOB_EFFECT;
+ call->gtCall.gtCallObjp = obj;
+ }
+
+ //-------------------------------------------------------------------------
+ // The "this" pointer for "newobj"
+
+ if (opcode == CEE_NEWOBJ)
+ {
+ if (clsFlags & CORINFO_FLG_VAROBJSIZE)
+ {
+ assert(!(clsFlags & CORINFO_FLG_ARRAY)); // arrays handled separately
+ // This is a 'new' of a variable sized object, wher
+ // the constructor is to return the object. In this case
+ // the constructor claims to return VOID but we know it
+ // actually returns the new object
+ assert(callRetTyp == TYP_VOID);
+ callRetTyp = TYP_REF;
+ call->gtType = TYP_REF;
+ impSpillSpecialSideEff();
+
+ impPushOnStack(call, typeInfo(TI_REF, clsHnd));
+ }
+ else
+ {
+ if (clsFlags & CORINFO_FLG_DELEGATE)
+ {
+ // New inliner morph it in impImportCall.
+ // This will allow us to inline the call to the delegate constructor.
+ call = fgOptimizeDelegateConstructor(call, &exactContextHnd);
+ }
+
+ if (!bIntrinsicImported)
+ {
+
+#if defined(DEBUG) || defined(INLINE_DATA)
+
+ // Keep track of the raw IL offset of the call
+ call->gtCall.gtRawILOffset = rawILOffset;
+
+#endif // defined(DEBUG) || defined(INLINE_DATA)
+
+ // Is it an inline candidate?
+ impMarkInlineCandidate(call, exactContextHnd, callInfo);
+ }
+
+ // append the call node.
+ impAppendTree(call, (unsigned)CHECK_SPILL_ALL, impCurStmtOffs);
+
+ // Now push the value of the 'new onto the stack
+
+ // This is a 'new' of a non-variable sized object.
+ // Append the new node (op1) to the statement list,
+ // and then push the local holding the value of this
+ // new instruction on the stack.
+
+ if (clsFlags & CORINFO_FLG_VALUECLASS)
+ {
+ assert(newobjThis->gtOper == GT_ADDR && newobjThis->gtOp.gtOp1->gtOper == GT_LCL_VAR);
+
+ unsigned tmp = newobjThis->gtOp.gtOp1->gtLclVarCommon.gtLclNum;
+ impPushOnStack(gtNewLclvNode(tmp, lvaGetRealType(tmp)), verMakeTypeInfo(clsHnd).NormaliseForStack());
+ }
+ else
+ {
+ if (newobjThis->gtOper == GT_COMMA)
+ {
+ // In coreclr the callout can be inserted even if verification is disabled
+ // so we cannot rely on tiVerificationNeeded alone
+
+ // We must have inserted the callout. Get the real newobj.
+ newobjThis = newobjThis->gtOp.gtOp2;
+ }
+
+ assert(newobjThis->gtOper == GT_LCL_VAR);
+ impPushOnStack(gtNewLclvNode(newobjThis->gtLclVarCommon.gtLclNum, TYP_REF), typeInfo(TI_REF, clsHnd));
+ }
+ }
+ return callRetTyp;
+ }
+
+DONE:
+
+ if (tailCall)
+ {
+ // This check cannot be performed for implicit tail calls for the reason
+ // that impIsImplicitTailCallCandidate() is not checking whether return
+ // types are compatible before marking a call node with PREFIX_TAILCALL_IMPLICIT.
+ // As a result it is possible that in the following case, we find that
+ // the type stack is non-empty if Callee() is considered for implicit
+ // tail calling.
+ // int Caller(..) { .... void Callee(); ret val; ... }
+ //
+ // Note that we cannot check return type compatibility before ImpImportCall()
+ // as we don't have required info or need to duplicate some of the logic of
+ // ImpImportCall().
+ //
+ // For implicit tail calls, we perform this check after return types are
+ // known to be compatible.
+ if ((tailCall & PREFIX_TAILCALL_EXPLICIT) && (verCurrentState.esStackDepth != 0))
+ {
+ BADCODE("Stack should be empty after tailcall");
+ }
+
+ // Note that we can not relax this condition with genActualType() as
+ // the calling convention dictates that the caller of a function with
+ // a small-typed return value is responsible for normalizing the return val
+
+ if (canTailCall &&
+ !impTailCallRetTypeCompatible(info.compRetType, info.compMethodInfo->args.retTypeClass, callRetTyp,
+ callInfo->sig.retTypeClass))
+ {
+ canTailCall = false;
+ szCanTailCallFailReason = "Return types are not tail call compatible";
+ }
+
+ // Stack empty check for implicit tail calls.
+ if (canTailCall && (tailCall & PREFIX_TAILCALL_IMPLICIT) && (verCurrentState.esStackDepth != 0))
+ {
+#ifdef _TARGET_AMD64_
+ // JIT64 Compatibility: Opportunistic tail call stack mismatch throws a VerificationException
+ // in JIT64, not an InvalidProgramException.
+ Verify(false, "Stack should be empty after tailcall");
+#else // _TARGET_64BIT_
+ BADCODE("Stack should be empty after tailcall");
+#endif //!_TARGET_64BIT_
+ }
+
+ // assert(compCurBB is not a catch, finally or filter block);
+ // assert(compCurBB is not a try block protected by a finally block);
+
+ // Check for permission to tailcall
+ bool explicitTailCall = (tailCall & PREFIX_TAILCALL_EXPLICIT) != 0;
+
+ assert(!explicitTailCall || compCurBB->bbJumpKind == BBJ_RETURN);
+
+ if (canTailCall)
+ {
+ // True virtual or indirect calls, shouldn't pass in a callee handle.
+ CORINFO_METHOD_HANDLE exactCalleeHnd = ((call->gtCall.gtCallType != CT_USER_FUNC) ||
+ ((call->gtFlags & GTF_CALL_VIRT_KIND_MASK) != GTF_CALL_NONVIRT))
+ ? nullptr
+ : methHnd;
+ GenTreePtr thisArg = call->gtCall.gtCallObjp;
+
+ if (info.compCompHnd->canTailCall(info.compMethodHnd, methHnd, exactCalleeHnd, explicitTailCall))
+ {
+ canTailCall = true;
+ if (explicitTailCall)
+ {
+ // In case of explicit tail calls, mark it so that it is not considered
+ // for in-lining.
+ call->gtCall.gtCallMoreFlags |= GTF_CALL_M_EXPLICIT_TAILCALL;
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nGTF_CALL_M_EXPLICIT_TAILCALL bit set for call ");
+ printTreeID(call);
+ printf("\n");
+ }
+#endif
+ }
+ else
+ {
+#if FEATURE_TAILCALL_OPT
+ // Must be an implicit tail call.
+ assert((tailCall & PREFIX_TAILCALL_IMPLICIT) != 0);
+
+ // It is possible that a call node is both an inline candidate and marked
+ // for opportunistic tail calling. In-lining happens before morhphing of
+ // trees. If in-lining of an in-line candidate gets aborted for whatever
+ // reason, it will survive to the morphing stage at which point it will be
+ // transformed into a tail call after performing additional checks.
+
+ call->gtCall.gtCallMoreFlags |= GTF_CALL_M_IMPLICIT_TAILCALL;
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nGTF_CALL_M_IMPLICIT_TAILCALL bit set for call ");
+ printTreeID(call);
+ printf("\n");
+ }
+#endif
+
+#else //! FEATURE_TAILCALL_OPT
+ NYI("Implicit tail call prefix on a target which doesn't support opportunistic tail calls");
+
+#endif // FEATURE_TAILCALL_OPT
+ }
+
+ // we can't report success just yet...
+ }
+ else
+ {
+ canTailCall = false;
+// canTailCall reported its reasons already
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\ninfo.compCompHnd->canTailCall returned false for call ");
+ printTreeID(call);
+ printf("\n");
+ }
+#endif
+ }
+ }
+ else
+ {
+ // If this assert fires it means that canTailCall was set to false without setting a reason!
+ assert(szCanTailCallFailReason != nullptr);
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nRejecting %splicit tail call for call ", explicitTailCall ? "ex" : "im");
+ printTreeID(call);
+ printf(": %s\n", szCanTailCallFailReason);
+ }
+#endif
+ info.compCompHnd->reportTailCallDecision(info.compMethodHnd, methHnd, explicitTailCall, TAILCALL_FAIL,
+ szCanTailCallFailReason);
+ }
+ }
+
+// Note: we assume that small return types are already normalized by the managed callee
+// or by the pinvoke stub for calls to unmanaged code.
+
+DONE_CALL:
+
+ if (!bIntrinsicImported)
+ {
+ //
+ // Things needed to be checked when bIntrinsicImported is false.
+ //
+
+ assert(call->gtOper == GT_CALL);
+ assert(sig != nullptr);
+
+ // Tail calls require us to save the call site's sig info so we can obtain an argument
+ // copying thunk from the EE later on.
+ if (call->gtCall.callSig == nullptr)
+ {
+ call->gtCall.callSig = new (this, CMK_CorSig) CORINFO_SIG_INFO;
+ *call->gtCall.callSig = *sig;
+ }
+
+ if (compIsForInlining() && opcode == CEE_CALLVIRT)
+ {
+ GenTreePtr callObj = call->gtCall.gtCallObjp;
+ assert(callObj != nullptr);
+
+ unsigned callKind = call->gtFlags & GTF_CALL_VIRT_KIND_MASK;
+
+ if (((callKind != GTF_CALL_NONVIRT) || (call->gtFlags & GTF_CALL_NULLCHECK)) &&
+ impInlineIsGuaranteedThisDerefBeforeAnySideEffects(call->gtCall.gtCallArgs, callObj,
+ impInlineInfo->inlArgInfo))
+ {
+ impInlineInfo->thisDereferencedFirst = true;
+ }
+ }
+
+#if defined(DEBUG) || defined(INLINE_DATA)
+
+ // Keep track of the raw IL offset of the call
+ call->gtCall.gtRawILOffset = rawILOffset;
+
+#endif // defined(DEBUG) || defined(INLINE_DATA)
+
+ // Is it an inline candidate?
+ impMarkInlineCandidate(call, exactContextHnd, callInfo);
+ }
+
+ // Push or append the result of the call
+ if (callRetTyp == TYP_VOID)
+ {
+ if (opcode == CEE_NEWOBJ)
+ {
+ // we actually did push something, so don't spill the thing we just pushed.
+ assert(verCurrentState.esStackDepth > 0);
+ impAppendTree(call, verCurrentState.esStackDepth - 1, impCurStmtOffs);
+ }
+ else
+ {
+ impAppendTree(call, (unsigned)CHECK_SPILL_ALL, impCurStmtOffs);
+ }
+ }
+ else
+ {
+ impSpillSpecialSideEff();
+
+ if (clsFlags & CORINFO_FLG_ARRAY)
+ {
+ eeGetCallSiteSig(pResolvedToken->token, pResolvedToken->tokenScope, pResolvedToken->tokenContext, sig);
+ }
+
+ // Find the return type used for verification by interpreting the method signature.
+ // NB: we are clobbering the already established sig.
+ if (tiVerificationNeeded)
+ {
+ // Actually, we never get the sig for the original method.
+ sig = &(callInfo->verSig);
+ }
+
+ typeInfo tiRetVal = verMakeTypeInfo(sig->retType, sig->retTypeClass);
+ tiRetVal.NormaliseForStack();
+
+ // The CEE_READONLY prefix modifies the verification semantics of an Address
+ // operation on an array type.
+ if ((clsFlags & CORINFO_FLG_ARRAY) && readonlyCall && tiRetVal.IsByRef())
+ {
+ tiRetVal.SetIsReadonlyByRef();
+ }
+
+ if (tiVerificationNeeded)
+ {
+ // We assume all calls return permanent home byrefs. If they
+ // didn't they wouldn't be verifiable. This is also covering
+ // the Address() helper for multidimensional arrays.
+ if (tiRetVal.IsByRef())
+ {
+ tiRetVal.SetIsPermanentHomeByRef();
+ }
+ }
+
+ if (call->gtOper == GT_CALL)
+ {
+ // Sometimes "call" is not a GT_CALL (if we imported an intrinsic that didn't turn into a call)
+ if (varTypeIsStruct(callRetTyp))
+ {
+ call = impFixupCallStructReturn(call, sig->retTypeClass);
+ }
+ else if (varTypeIsLong(callRetTyp))
+ {
+ call = impInitCallLongReturn(call);
+ }
+
+ if ((call->gtFlags & GTF_CALL_INLINE_CANDIDATE) != 0)
+ {
+ assert(opts.OptEnabled(CLFLG_INLINING));
+
+ // Make the call its own tree (spill the stack if needed).
+ impAppendTree(call, (unsigned)CHECK_SPILL_ALL, impCurStmtOffs);
+
+ // TODO: Still using the widened type.
+ call = gtNewInlineCandidateReturnExpr(call, genActualType(callRetTyp));
+ }
+ }
+
+ if (!bIntrinsicImported)
+ {
+ //-------------------------------------------------------------------------
+ //
+ /* If the call is of a small type and the callee is managed, the callee will normalize the result
+ before returning.
+ However, we need to normalize small type values returned by unmanaged
+ functions (pinvoke). The pinvoke stub does the normalization, but we need to do it here
+ if we use the shorter inlined pinvoke stub. */
+
+ if (checkForSmallType && varTypeIsIntegral(callRetTyp) && genTypeSize(callRetTyp) < genTypeSize(TYP_INT))
+ {
+ call = gtNewCastNode(genActualType(callRetTyp), call, callRetTyp);
+ }
+ }
+
+ impPushOnStack(call, tiRetVal);
+ }
+
+ // VSD functions get a new call target each time we getCallInfo, so clear the cache.
+ // Also, the call info cache for CALLI instructions is largely incomplete, so clear it out.
+ // if ( (opcode == CEE_CALLI) || (callInfoCache.fetchCallInfo().kind == CORINFO_VIRTUALCALL_STUB))
+ // callInfoCache.uncacheCallInfo();
+
+ return callRetTyp;
+}
+#ifdef _PREFAST_
+#pragma warning(pop)
+#endif
+
+bool Compiler::impMethodInfo_hasRetBuffArg(CORINFO_METHOD_INFO* methInfo)
+{
+ CorInfoType corType = methInfo->args.retType;
+
+ if ((corType == CORINFO_TYPE_VALUECLASS) || (corType == CORINFO_TYPE_REFANY))
+ {
+ // We have some kind of STRUCT being returned
+
+ structPassingKind howToReturnStruct = SPK_Unknown;
+
+ var_types returnType = getReturnTypeForStruct(methInfo->args.retTypeClass, &howToReturnStruct);
+
+ if (howToReturnStruct == SPK_ByReference)
+ {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+#ifdef DEBUG
+//
+var_types Compiler::impImportJitTestLabelMark(int numArgs)
+{
+ TestLabelAndNum tlAndN;
+ if (numArgs == 2)
+ {
+ tlAndN.m_num = 0;
+ StackEntry se = impPopStack();
+ assert(se.seTypeInfo.GetType() == TI_INT);
+ GenTreePtr val = se.val;
+ assert(val->IsCnsIntOrI());
+ tlAndN.m_tl = (TestLabel)val->AsIntConCommon()->IconValue();
+ }
+ else if (numArgs == 3)
+ {
+ StackEntry se = impPopStack();
+ assert(se.seTypeInfo.GetType() == TI_INT);
+ GenTreePtr val = se.val;
+ assert(val->IsCnsIntOrI());
+ tlAndN.m_num = val->AsIntConCommon()->IconValue();
+ se = impPopStack();
+ assert(se.seTypeInfo.GetType() == TI_INT);
+ val = se.val;
+ assert(val->IsCnsIntOrI());
+ tlAndN.m_tl = (TestLabel)val->AsIntConCommon()->IconValue();
+ }
+ else
+ {
+ assert(false);
+ }
+
+ StackEntry expSe = impPopStack();
+ GenTreePtr node = expSe.val;
+
+ // There are a small number of special cases, where we actually put the annotation on a subnode.
+ if (tlAndN.m_tl == TL_LoopHoist && tlAndN.m_num >= 100)
+ {
+ // A loop hoist annotation with value >= 100 means that the expression should be a static field access,
+ // a GT_IND of a static field address, which should be the sum of a (hoistable) helper call and possibly some
+ // offset within the the static field block whose address is returned by the helper call.
+ // The annotation is saying that this address calculation, but not the entire access, should be hoisted.
+ GenTreePtr helperCall = nullptr;
+ assert(node->OperGet() == GT_IND);
+ tlAndN.m_num -= 100;
+ GetNodeTestData()->Set(node->gtOp.gtOp1, tlAndN);
+ GetNodeTestData()->Remove(node);
+ }
+ else
+ {
+ GetNodeTestData()->Set(node, tlAndN);
+ }
+
+ impPushOnStack(node, expSe.seTypeInfo);
+ return node->TypeGet();
+}
+#endif // DEBUG
+
+//-----------------------------------------------------------------------------------
+// impFixupCallStructReturn: For a call node that returns a struct type either
+// adjust the return type to an enregisterable type, or set the flag to indicate
+// struct return via retbuf arg.
+//
+// Arguments:
+// call - GT_CALL GenTree node
+// retClsHnd - Class handle of return type of the call
+//
+// Return Value:
+// Returns new GenTree node after fixing struct return of call node
+//
+GenTreePtr Compiler::impFixupCallStructReturn(GenTreePtr call, CORINFO_CLASS_HANDLE retClsHnd)
+{
+ assert(call->gtOper == GT_CALL);
+
+ if (!varTypeIsStruct(call))
+ {
+ return call;
+ }
+
+ call->gtCall.gtRetClsHnd = retClsHnd;
+
+ GenTreeCall* callNode = call->AsCall();
+
+#if FEATURE_MULTIREG_RET
+ // Initialize Return type descriptor of call node
+ ReturnTypeDesc* retTypeDesc = callNode->GetReturnTypeDesc();
+ retTypeDesc->InitializeStructReturnType(this, retClsHnd);
+#endif // FEATURE_MULTIREG_RET
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+ // Not allowed for FEATURE_CORCLR which is the only SKU available for System V OSs.
+ assert(!callNode->IsVarargs() && "varargs not allowed for System V OSs.");
+
+ // The return type will remain as the incoming struct type unless normalized to a
+ // single eightbyte return type below.
+ callNode->gtReturnType = call->gtType;
+
+ unsigned retRegCount = retTypeDesc->GetReturnRegCount();
+ if (retRegCount != 0)
+ {
+ if (retRegCount == 1)
+ {
+ // struct returned in a single register
+ callNode->gtReturnType = retTypeDesc->GetReturnRegType(0);
+ }
+ else
+ {
+ // must be a struct returned in two registers
+ assert(retRegCount == 2);
+
+ if ((!callNode->CanTailCall()) && (!callNode->IsInlineCandidate()))
+ {
+ // Force a call returning multi-reg struct to be always of the IR form
+ // tmp = call
+ //
+ // No need to assign a multi-reg struct to a local var if:
+ // - It is a tail call or
+ // - The call is marked for in-lining later
+ return impAssignMultiRegTypeToVar(call, retClsHnd);
+ }
+ }
+ }
+ else
+ {
+ // struct not returned in registers i.e returned via hiddden retbuf arg.
+ callNode->gtCallMoreFlags |= GTF_CALL_M_RETBUFFARG;
+ }
+
+#else // not FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+#if FEATURE_MULTIREG_RET && defined(_TARGET_ARM_)
+ // There is no fixup necessary if the return type is a HFA struct.
+ // HFA structs are returned in registers for ARM32 and ARM64
+ //
+ if (!call->gtCall.IsVarargs() && IsHfa(retClsHnd))
+ {
+ if (call->gtCall.CanTailCall())
+ {
+ if (info.compIsVarArgs)
+ {
+ // We cannot tail call because control needs to return to fixup the calling
+ // convention for result return.
+ call->gtCall.gtCallMoreFlags &= ~GTF_CALL_M_EXPLICIT_TAILCALL;
+ }
+ else
+ {
+ // If we can tail call returning HFA, then don't assign it to
+ // a variable back and forth.
+ return call;
+ }
+ }
+
+ if (call->gtFlags & GTF_CALL_INLINE_CANDIDATE)
+ {
+ return call;
+ }
+
+ unsigned retRegCount = retTypeDesc->GetReturnRegCount();
+ if (retRegCount >= 2)
+ {
+ return impAssignMultiRegTypeToVar(call, retClsHnd);
+ }
+ }
+#endif // _TARGET_ARM_
+
+ // Check for TYP_STRUCT type that wraps a primitive type
+ // Such structs are returned using a single register
+ // and we change the return type on those calls here.
+ //
+ structPassingKind howToReturnStruct;
+ var_types returnType = getReturnTypeForStruct(retClsHnd, &howToReturnStruct);
+
+ if (howToReturnStruct == SPK_ByReference)
+ {
+ assert(returnType == TYP_UNKNOWN);
+ call->gtCall.gtCallMoreFlags |= GTF_CALL_M_RETBUFFARG;
+ }
+ else
+ {
+ assert(returnType != TYP_UNKNOWN);
+ call->gtCall.gtReturnType = returnType;
+
+ // ToDo: Refactor this common code sequence into its own method as it is used 4+ times
+ if ((returnType == TYP_LONG) && (compLongUsed == false))
+ {
+ compLongUsed = true;
+ }
+ else if (((returnType == TYP_FLOAT) || (returnType == TYP_DOUBLE)) && (compFloatingPointUsed == false))
+ {
+ compFloatingPointUsed = true;
+ }
+
+#if FEATURE_MULTIREG_RET
+ unsigned retRegCount = retTypeDesc->GetReturnRegCount();
+ assert(retRegCount != 0);
+
+ if (retRegCount >= 2)
+ {
+ if ((!callNode->CanTailCall()) && (!callNode->IsInlineCandidate()))
+ {
+ // Force a call returning multi-reg struct to be always of the IR form
+ // tmp = call
+ //
+ // No need to assign a multi-reg struct to a local var if:
+ // - It is a tail call or
+ // - The call is marked for in-lining later
+ return impAssignMultiRegTypeToVar(call, retClsHnd);
+ }
+ }
+#endif // FEATURE_MULTIREG_RET
+ }
+
+#endif // not FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+ return call;
+}
+
+//-------------------------------------------------------------------------------------
+// impInitCallLongReturn:
+// Initialize the ReturnTypDesc for a call that returns a TYP_LONG
+//
+// Arguments:
+// call - GT_CALL GenTree node
+//
+// Return Value:
+// Returns new GenTree node after initializing the ReturnTypeDesc of call node
+//
+GenTreePtr Compiler::impInitCallLongReturn(GenTreePtr call)
+{
+ assert(call->gtOper == GT_CALL);
+
+#if defined(_TARGET_X86_) && !defined(LEGACY_BACKEND)
+ // LEGACY_BACKEND does not use multi reg returns for calls with long return types
+
+ if (varTypeIsLong(call))
+ {
+ GenTreeCall* callNode = call->AsCall();
+
+ // The return type will remain as the incoming long type
+ callNode->gtReturnType = call->gtType;
+
+ // Initialize Return type descriptor of call node
+ ReturnTypeDesc* retTypeDesc = callNode->GetReturnTypeDesc();
+ retTypeDesc->InitializeLongReturnType(this);
+
+ // must be a long returned in two registers
+ assert(retTypeDesc->GetReturnRegCount() == 2);
+ }
+#endif // _TARGET_X86_ && !LEGACY_BACKEND
+
+ return call;
+}
+
+/*****************************************************************************
+ For struct return values, re-type the operand in the case where the ABI
+ does not use a struct return buffer
+ Note that this method is only call for !_TARGET_X86_
+ */
+
+GenTreePtr Compiler::impFixupStructReturnType(GenTreePtr op, CORINFO_CLASS_HANDLE retClsHnd)
+{
+ assert(varTypeIsStruct(info.compRetType));
+ assert(info.compRetBuffArg == BAD_VAR_NUM);
+
+#if defined(_TARGET_XARCH_)
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ // No VarArgs for CoreCLR on x64 Unix
+ assert(!info.compIsVarArgs);
+
+ // Is method returning a multi-reg struct?
+ if (varTypeIsStruct(info.compRetNativeType) && IsMultiRegReturnedType(retClsHnd))
+ {
+ // In case of multi-reg struct return, we force IR to be one of the following:
+ // GT_RETURN(lclvar) or GT_RETURN(call). If op is anything other than a
+ // lclvar or call, it is assigned to a temp to create: temp = op and GT_RETURN(tmp).
+
+ if (op->gtOper == GT_LCL_VAR)
+ {
+ // Make sure that this struct stays in memory and doesn't get promoted.
+ unsigned lclNum = op->gtLclVarCommon.gtLclNum;
+ lvaTable[lclNum].lvIsMultiRegRet = true;
+
+ return op;
+ }
+
+ if (op->gtOper == GT_CALL)
+ {
+ return op;
+ }
+
+ return impAssignMultiRegTypeToVar(op, retClsHnd);
+ }
+#else // !FEATURE_UNIX_AMD64_STRUCT_PASSING
+ assert(info.compRetNativeType != TYP_STRUCT);
+#endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+#elif FEATURE_MULTIREG_RET && defined(_TARGET_ARM_)
+
+ if (varTypeIsStruct(info.compRetNativeType) && !info.compIsVarArgs && IsHfa(retClsHnd))
+ {
+ if (op->gtOper == GT_LCL_VAR)
+ {
+ // This LCL_VAR is an HFA return value, it stays as a TYP_STRUCT
+ unsigned lclNum = op->gtLclVarCommon.gtLclNum;
+ // Make sure this struct type stays as struct so that we can return it as an HFA
+ lvaTable[lclNum].lvIsMultiRegRet = true;
+ return op;
+ }
+
+ if (op->gtOper == GT_CALL)
+ {
+ if (op->gtCall.IsVarargs())
+ {
+ // We cannot tail call because control needs to return to fixup the calling
+ // convention for result return.
+ op->gtCall.gtCallMoreFlags &= ~GTF_CALL_M_TAILCALL;
+ op->gtCall.gtCallMoreFlags &= ~GTF_CALL_M_EXPLICIT_TAILCALL;
+ }
+ else
+ {
+ return op;
+ }
+ }
+ return impAssignMultiRegTypeToVar(op, retClsHnd);
+ }
+
+#elif FEATURE_MULTIREG_RET && defined(_TARGET_ARM64_)
+
+ // Is method returning a multi-reg struct?
+ if (IsMultiRegReturnedType(retClsHnd))
+ {
+ if (op->gtOper == GT_LCL_VAR)
+ {
+ // This LCL_VAR stays as a TYP_STRUCT
+ unsigned lclNum = op->gtLclVarCommon.gtLclNum;
+
+ // Make sure this struct type is not struct promoted
+ lvaTable[lclNum].lvIsMultiRegRet = true;
+ return op;
+ }
+
+ if (op->gtOper == GT_CALL)
+ {
+ if (op->gtCall.IsVarargs())
+ {
+ // We cannot tail call because control needs to return to fixup the calling
+ // convention for result return.
+ op->gtCall.gtCallMoreFlags &= ~GTF_CALL_M_TAILCALL;
+ op->gtCall.gtCallMoreFlags &= ~GTF_CALL_M_EXPLICIT_TAILCALL;
+ }
+ else
+ {
+ return op;
+ }
+ }
+ return impAssignMultiRegTypeToVar(op, retClsHnd);
+ }
+
+#endif // FEATURE_MULTIREG_RET && FEATURE_HFA
+
+REDO_RETURN_NODE:
+ // adjust the type away from struct to integral
+ // and no normalizing
+ if (op->gtOper == GT_LCL_VAR)
+ {
+ op->ChangeOper(GT_LCL_FLD);
+ }
+ else if (op->gtOper == GT_OBJ)
+ {
+ GenTreePtr op1 = op->AsObj()->Addr();
+
+ // We will fold away OBJ/ADDR
+ // except for OBJ/ADDR/INDEX
+ // as the array type influences the array element's offset
+ // Later in this method we change op->gtType to info.compRetNativeType
+ // This is not correct when op is a GT_INDEX as the starting offset
+ // for the array elements 'elemOffs' is different for an array of
+ // TYP_REF than an array of TYP_STRUCT (which simply wraps a TYP_REF)
+ // Also refer to the GTF_INX_REFARR_LAYOUT flag
+ //
+ if ((op1->gtOper == GT_ADDR) && (op1->gtOp.gtOp1->gtOper != GT_INDEX))
+ {
+ // Change '*(&X)' to 'X' and see if we can do better
+ op = op1->gtOp.gtOp1;
+ goto REDO_RETURN_NODE;
+ }
+ op->gtObj.gtClass = NO_CLASS_HANDLE;
+ op->ChangeOperUnchecked(GT_IND);
+ op->gtFlags |= GTF_IND_TGTANYWHERE;
+ }
+ else if (op->gtOper == GT_CALL)
+ {
+ if (op->AsCall()->TreatAsHasRetBufArg(this))
+ {
+ // This must be one of those 'special' helpers that don't
+ // really have a return buffer, but instead use it as a way
+ // to keep the trees cleaner with fewer address-taken temps.
+ //
+ // Well now we have to materialize the the return buffer as
+ // an address-taken temp. Then we can return the temp.
+ //
+ // NOTE: this code assumes that since the call directly
+ // feeds the return, then the call must be returning the
+ // same structure/class/type.
+ //
+ unsigned tmpNum = lvaGrabTemp(true DEBUGARG("pseudo return buffer"));
+
+ // No need to spill anything as we're about to return.
+ impAssignTempGen(tmpNum, op, info.compMethodInfo->args.retTypeClass, (unsigned)CHECK_SPILL_NONE);
+
+ // Don't create both a GT_ADDR & GT_OBJ just to undo all of that; instead,
+ // jump directly to a GT_LCL_FLD.
+ op = gtNewLclvNode(tmpNum, info.compRetNativeType);
+ op->ChangeOper(GT_LCL_FLD);
+ }
+ else
+ {
+ assert(info.compRetNativeType == op->gtCall.gtReturnType);
+
+ // Don't change the gtType of the node just yet, it will get changed later.
+ return op;
+ }
+ }
+ else if (op->gtOper == GT_COMMA)
+ {
+ op->gtOp.gtOp2 = impFixupStructReturnType(op->gtOp.gtOp2, retClsHnd);
+ }
+
+ op->gtType = info.compRetNativeType;
+
+ return op;
+}
+
+/*****************************************************************************
+ CEE_LEAVE may be jumping out of a protected block, viz, a catch or a
+ finally-protected try. We find the finally blocks protecting the current
+ offset (in order) by walking over the complete exception table and
+ finding enclosing clauses. This assumes that the table is sorted.
+ This will create a series of BBJ_CALLFINALLY -> BBJ_CALLFINALLY ... -> BBJ_ALWAYS.
+
+ If we are leaving a catch handler, we need to attach the
+ CPX_ENDCATCHes to the correct BBJ_CALLFINALLY blocks.
+
+ After this function, the BBJ_LEAVE block has been converted to a different type.
+ */
+
+#if !FEATURE_EH_FUNCLETS
+
+void Compiler::impImportLeave(BasicBlock* block)
+{
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nBefore import CEE_LEAVE:\n");
+ fgDispBasicBlocks();
+ fgDispHandlerTab();
+ }
+#endif // DEBUG
+
+ bool invalidatePreds = false; // If we create new blocks, invalidate the predecessor lists (if created)
+ unsigned blkAddr = block->bbCodeOffs;
+ BasicBlock* leaveTarget = block->bbJumpDest;
+ unsigned jmpAddr = leaveTarget->bbCodeOffs;
+
+ // LEAVE clears the stack, spill side effects, and set stack to 0
+
+ impSpillSideEffects(true, (unsigned)CHECK_SPILL_ALL DEBUGARG("impImportLeave"));
+ verCurrentState.esStackDepth = 0;
+
+ assert(block->bbJumpKind == BBJ_LEAVE);
+ assert(fgBBs == (BasicBlock**)0xCDCD || fgLookupBB(jmpAddr) != NULL); // should be a BB boundary
+
+ BasicBlock* step = DUMMY_INIT(NULL);
+ unsigned encFinallies = 0; // Number of enclosing finallies.
+ GenTreePtr endCatches = NULL;
+ GenTreePtr endLFin = NULL; // The statement tree to indicate the end of locally-invoked finally.
+
+ unsigned XTnum;
+ EHblkDsc* HBtab;
+
+ for (XTnum = 0, HBtab = compHndBBtab; XTnum < compHndBBtabCount; XTnum++, HBtab++)
+ {
+ // Grab the handler offsets
+
+ IL_OFFSET tryBeg = HBtab->ebdTryBegOffs();
+ IL_OFFSET tryEnd = HBtab->ebdTryEndOffs();
+ IL_OFFSET hndBeg = HBtab->ebdHndBegOffs();
+ IL_OFFSET hndEnd = HBtab->ebdHndEndOffs();
+
+ /* Is this a catch-handler we are CEE_LEAVEing out of?
+ * If so, we need to call CORINFO_HELP_ENDCATCH.
+ */
+
+ if (jitIsBetween(blkAddr, hndBeg, hndEnd) && !jitIsBetween(jmpAddr, hndBeg, hndEnd))
+ {
+ // Can't CEE_LEAVE out of a finally/fault handler
+ if (HBtab->HasFinallyOrFaultHandler())
+ BADCODE("leave out of fault/finally block");
+
+ // Create the call to CORINFO_HELP_ENDCATCH
+ GenTreePtr endCatch = gtNewHelperCallNode(CORINFO_HELP_ENDCATCH, TYP_VOID);
+
+ // Make a list of all the currently pending endCatches
+ if (endCatches)
+ endCatches = gtNewOperNode(GT_COMMA, TYP_VOID, endCatches, endCatch);
+ else
+ endCatches = endCatch;
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("impImportLeave - BB%02u jumping out of catch handler EH#%u, adding call to "
+ "CORINFO_HELP_ENDCATCH\n",
+ block->bbNum, XTnum);
+ }
+#endif
+ }
+ else if (HBtab->HasFinallyHandler() && jitIsBetween(blkAddr, tryBeg, tryEnd) &&
+ !jitIsBetween(jmpAddr, tryBeg, tryEnd))
+ {
+ /* This is a finally-protected try we are jumping out of */
+
+ /* If there are any pending endCatches, and we have already
+ jumped out of a finally-protected try, then the endCatches
+ have to be put in a block in an outer try for async
+ exceptions to work correctly.
+ Else, just use append to the original block */
+
+ BasicBlock* callBlock;
+
+ assert(!encFinallies == !endLFin); // if we have finallies, we better have an endLFin tree, and vice-versa
+
+ if (encFinallies == 0)
+ {
+ assert(step == DUMMY_INIT(NULL));
+ callBlock = block;
+ callBlock->bbJumpKind = BBJ_CALLFINALLY; // convert the BBJ_LEAVE to BBJ_CALLFINALLY
+
+ if (endCatches)
+ impAppendTree(endCatches, (unsigned)CHECK_SPILL_NONE, impCurStmtOffs);
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("impImportLeave - jumping out of a finally-protected try, convert block to BBJ_CALLFINALLY "
+ "block BB%02u [%08p]\n",
+ callBlock->bbNum, dspPtr(callBlock));
+ }
+#endif
+ }
+ else
+ {
+ assert(step != DUMMY_INIT(NULL));
+
+ /* Calling the finally block */
+ callBlock = fgNewBBinRegion(BBJ_CALLFINALLY, XTnum + 1, 0, step);
+ assert(step->bbJumpKind == BBJ_ALWAYS);
+ step->bbJumpDest = callBlock; // the previous call to a finally returns to this call (to the next
+ // finally in the chain)
+ step->bbJumpDest->bbRefs++;
+
+ /* The new block will inherit this block's weight */
+ callBlock->setBBWeight(block->bbWeight);
+ callBlock->bbFlags |= block->bbFlags & BBF_RUN_RARELY;
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("impImportLeave - jumping out of a finally-protected try, new BBJ_CALLFINALLY block BB%02u "
+ "[%08p]\n",
+ callBlock->bbNum, dspPtr(callBlock));
+ }
+#endif
+
+ GenTreePtr lastStmt;
+
+ if (endCatches)
+ {
+ lastStmt = gtNewStmt(endCatches);
+ endLFin->gtNext = lastStmt;
+ lastStmt->gtPrev = endLFin;
+ }
+ else
+ {
+ lastStmt = endLFin;
+ }
+
+ // note that this sets BBF_IMPORTED on the block
+ impEndTreeList(callBlock, endLFin, lastStmt);
+ }
+
+ step = fgNewBBafter(BBJ_ALWAYS, callBlock, true);
+ /* The new block will inherit this block's weight */
+ step->setBBWeight(block->bbWeight);
+ step->bbFlags |= (block->bbFlags & BBF_RUN_RARELY) | BBF_IMPORTED | BBF_KEEP_BBJ_ALWAYS;
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("impImportLeave - jumping out of a finally-protected try, created step (BBJ_ALWAYS) block "
+ "BB%02u [%08p]\n",
+ step->bbNum, dspPtr(step));
+ }
+#endif
+
+ unsigned finallyNesting = compHndBBtab[XTnum].ebdHandlerNestingLevel;
+ assert(finallyNesting <= compHndBBtabCount);
+
+ callBlock->bbJumpDest = HBtab->ebdHndBeg; // This callBlock will call the "finally" handler.
+ endLFin = new (this, GT_END_LFIN) GenTreeVal(GT_END_LFIN, TYP_VOID, finallyNesting);
+ endLFin = gtNewStmt(endLFin);
+ endCatches = NULL;
+
+ encFinallies++;
+
+ invalidatePreds = true;
+ }
+ }
+
+ /* Append any remaining endCatches, if any */
+
+ assert(!encFinallies == !endLFin);
+
+ if (encFinallies == 0)
+ {
+ assert(step == DUMMY_INIT(NULL));
+ block->bbJumpKind = BBJ_ALWAYS; // convert the BBJ_LEAVE to a BBJ_ALWAYS
+
+ if (endCatches)
+ impAppendTree(endCatches, (unsigned)CHECK_SPILL_NONE, impCurStmtOffs);
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("impImportLeave - no enclosing finally-protected try blocks; convert CEE_LEAVE block to BBJ_ALWAYS "
+ "block BB%02u [%08p]\n",
+ block->bbNum, dspPtr(block));
+ }
+#endif
+ }
+ else
+ {
+ // If leaveTarget is the start of another try block, we want to make sure that
+ // we do not insert finalStep into that try block. Hence, we find the enclosing
+ // try block.
+ unsigned tryIndex = bbFindInnermostCommonTryRegion(step, leaveTarget);
+
+ // Insert a new BB either in the try region indicated by tryIndex or
+ // the handler region indicated by leaveTarget->bbHndIndex,
+ // depending on which is the inner region.
+ BasicBlock* finalStep = fgNewBBinRegion(BBJ_ALWAYS, tryIndex, leaveTarget->bbHndIndex, step);
+ finalStep->bbFlags |= BBF_KEEP_BBJ_ALWAYS;
+ step->bbJumpDest = finalStep;
+
+ /* The new block will inherit this block's weight */
+ finalStep->setBBWeight(block->bbWeight);
+ finalStep->bbFlags |= block->bbFlags & BBF_RUN_RARELY;
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("impImportLeave - finalStep block required (encFinallies(%d) > 0), new block BB%02u [%08p]\n",
+ encFinallies, finalStep->bbNum, dspPtr(finalStep));
+ }
+#endif
+
+ GenTreePtr lastStmt;
+
+ if (endCatches)
+ {
+ lastStmt = gtNewStmt(endCatches);
+ endLFin->gtNext = lastStmt;
+ lastStmt->gtPrev = endLFin;
+ }
+ else
+ {
+ lastStmt = endLFin;
+ }
+
+ impEndTreeList(finalStep, endLFin, lastStmt);
+
+ finalStep->bbJumpDest = leaveTarget; // this is the ultimate destination of the LEAVE
+
+ // Queue up the jump target for importing
+
+ impImportBlockPending(leaveTarget);
+
+ invalidatePreds = true;
+ }
+
+ if (invalidatePreds && fgComputePredsDone)
+ {
+ JITDUMP("\n**** impImportLeave - Removing preds after creating new blocks\n");
+ fgRemovePreds();
+ }
+
+#ifdef DEBUG
+ fgVerifyHandlerTab();
+
+ if (verbose)
+ {
+ printf("\nAfter import CEE_LEAVE:\n");
+ fgDispBasicBlocks();
+ fgDispHandlerTab();
+ }
+#endif // DEBUG
+}
+
+#else // FEATURE_EH_FUNCLETS
+
+void Compiler::impImportLeave(BasicBlock* block)
+{
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nBefore import CEE_LEAVE in BB%02u (targetting BB%02u):\n", block->bbNum, block->bbJumpDest->bbNum);
+ fgDispBasicBlocks();
+ fgDispHandlerTab();
+ }
+#endif // DEBUG
+
+ bool invalidatePreds = false; // If we create new blocks, invalidate the predecessor lists (if created)
+ unsigned blkAddr = block->bbCodeOffs;
+ BasicBlock* leaveTarget = block->bbJumpDest;
+ unsigned jmpAddr = leaveTarget->bbCodeOffs;
+
+ // LEAVE clears the stack, spill side effects, and set stack to 0
+
+ impSpillSideEffects(true, (unsigned)CHECK_SPILL_ALL DEBUGARG("impImportLeave"));
+ verCurrentState.esStackDepth = 0;
+
+ assert(block->bbJumpKind == BBJ_LEAVE);
+ assert(fgBBs == (BasicBlock**)0xCDCD || fgLookupBB(jmpAddr) != nullptr); // should be a BB boundary
+
+ BasicBlock* step = nullptr;
+
+ enum StepType
+ {
+ // No step type; step == NULL.
+ ST_None,
+
+ // Is the step block the BBJ_ALWAYS block of a BBJ_CALLFINALLY/BBJ_ALWAYS pair?
+ // That is, is step->bbJumpDest where a finally will return to?
+ ST_FinallyReturn,
+
+ // The step block is a catch return.
+ ST_Catch,
+
+ // The step block is in a "try", created as the target for a finally return or the target for a catch return.
+ ST_Try
+ };
+ StepType stepType = ST_None;
+
+ unsigned XTnum;
+ EHblkDsc* HBtab;
+
+ for (XTnum = 0, HBtab = compHndBBtab; XTnum < compHndBBtabCount; XTnum++, HBtab++)
+ {
+ // Grab the handler offsets
+
+ IL_OFFSET tryBeg = HBtab->ebdTryBegOffs();
+ IL_OFFSET tryEnd = HBtab->ebdTryEndOffs();
+ IL_OFFSET hndBeg = HBtab->ebdHndBegOffs();
+ IL_OFFSET hndEnd = HBtab->ebdHndEndOffs();
+
+ /* Is this a catch-handler we are CEE_LEAVEing out of?
+ */
+
+ if (jitIsBetween(blkAddr, hndBeg, hndEnd) && !jitIsBetween(jmpAddr, hndBeg, hndEnd))
+ {
+ // Can't CEE_LEAVE out of a finally/fault handler
+ if (HBtab->HasFinallyOrFaultHandler())
+ {
+ BADCODE("leave out of fault/finally block");
+ }
+
+ /* We are jumping out of a catch */
+
+ if (step == nullptr)
+ {
+ step = block;
+ step->bbJumpKind = BBJ_EHCATCHRET; // convert the BBJ_LEAVE to BBJ_EHCATCHRET
+ stepType = ST_Catch;
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("impImportLeave - jumping out of a catch (EH#%u), convert block BB%02u to BBJ_EHCATCHRET "
+ "block\n",
+ XTnum, step->bbNum);
+ }
+#endif
+ }
+ else
+ {
+ BasicBlock* exitBlock;
+
+ /* Create a new catch exit block in the catch region for the existing step block to jump to in this
+ * scope */
+ exitBlock = fgNewBBinRegion(BBJ_EHCATCHRET, 0, XTnum + 1, step);
+
+ assert(step->bbJumpKind == BBJ_ALWAYS || step->bbJumpKind == BBJ_EHCATCHRET);
+ step->bbJumpDest = exitBlock; // the previous step (maybe a call to a nested finally, or a nested catch
+ // exit) returns to this block
+ step->bbJumpDest->bbRefs++;
+
+#if defined(_TARGET_ARM_)
+ if (stepType == ST_FinallyReturn)
+ {
+ assert(step->bbJumpKind == BBJ_ALWAYS);
+ // Mark the target of a finally return
+ step->bbJumpDest->bbFlags |= BBF_FINALLY_TARGET;
+ }
+#endif // defined(_TARGET_ARM_)
+
+ /* The new block will inherit this block's weight */
+ exitBlock->setBBWeight(block->bbWeight);
+ exitBlock->bbFlags |= (block->bbFlags & BBF_RUN_RARELY) | BBF_IMPORTED;
+
+ /* This exit block is the new step */
+ step = exitBlock;
+ stepType = ST_Catch;
+
+ invalidatePreds = true;
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("impImportLeave - jumping out of a catch (EH#%u), new BBJ_EHCATCHRET block BB%02u\n", XTnum,
+ exitBlock->bbNum);
+ }
+#endif
+ }
+ }
+ else if (HBtab->HasFinallyHandler() && jitIsBetween(blkAddr, tryBeg, tryEnd) &&
+ !jitIsBetween(jmpAddr, tryBeg, tryEnd))
+ {
+ /* We are jumping out of a finally-protected try */
+
+ BasicBlock* callBlock;
+
+ if (step == nullptr)
+ {
+#if FEATURE_EH_CALLFINALLY_THUNKS
+
+ // Put the call to the finally in the enclosing region.
+ unsigned callFinallyTryIndex =
+ (HBtab->ebdEnclosingTryIndex == EHblkDsc::NO_ENCLOSING_INDEX) ? 0 : HBtab->ebdEnclosingTryIndex + 1;
+ unsigned callFinallyHndIndex =
+ (HBtab->ebdEnclosingHndIndex == EHblkDsc::NO_ENCLOSING_INDEX) ? 0 : HBtab->ebdEnclosingHndIndex + 1;
+ callBlock = fgNewBBinRegion(BBJ_CALLFINALLY, callFinallyTryIndex, callFinallyHndIndex, block);
+
+ // Convert the BBJ_LEAVE to BBJ_ALWAYS, jumping to the new BBJ_CALLFINALLY. This is because
+ // the new BBJ_CALLFINALLY is in a different EH region, thus it can't just replace the BBJ_LEAVE,
+ // which might be in the middle of the "try". In most cases, the BBJ_ALWAYS will jump to the
+ // next block, and flow optimizations will remove it.
+ block->bbJumpKind = BBJ_ALWAYS;
+ block->bbJumpDest = callBlock;
+ block->bbJumpDest->bbRefs++;
+
+ /* The new block will inherit this block's weight */
+ callBlock->setBBWeight(block->bbWeight);
+ callBlock->bbFlags |= (block->bbFlags & BBF_RUN_RARELY) | BBF_IMPORTED;
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("impImportLeave - jumping out of a finally-protected try (EH#%u), convert block BB%02u to "
+ "BBJ_ALWAYS, add BBJ_CALLFINALLY block BB%02u\n",
+ XTnum, block->bbNum, callBlock->bbNum);
+ }
+#endif
+
+#else // !FEATURE_EH_CALLFINALLY_THUNKS
+
+ callBlock = block;
+ callBlock->bbJumpKind = BBJ_CALLFINALLY; // convert the BBJ_LEAVE to BBJ_CALLFINALLY
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("impImportLeave - jumping out of a finally-protected try (EH#%u), convert block BB%02u to "
+ "BBJ_CALLFINALLY block\n",
+ XTnum, callBlock->bbNum);
+ }
+#endif
+
+#endif // !FEATURE_EH_CALLFINALLY_THUNKS
+ }
+ else
+ {
+ // Calling the finally block. We already have a step block that is either the call-to-finally from a
+ // more nested try/finally (thus we are jumping out of multiple nested 'try' blocks, each protected by
+ // a 'finally'), or the step block is the return from a catch.
+ //
+ // Due to ThreadAbortException, we can't have the catch return target the call-to-finally block
+ // directly. Note that if a 'catch' ends without resetting the ThreadAbortException, the VM will
+ // automatically re-raise the exception, using the return address of the catch (that is, the target
+ // block of the BBJ_EHCATCHRET) as the re-raise address. If this address is in a finally, the VM will
+ // refuse to do the re-raise, and the ThreadAbortException will get eaten (and lost). On AMD64/ARM64,
+ // we put the call-to-finally thunk in a special "cloned finally" EH region that does look like a
+ // finally clause to the VM. Thus, on these platforms, we can't have BBJ_EHCATCHRET target a
+ // BBJ_CALLFINALLY directly. (Note that on ARM32, we don't mark the thunk specially -- it lives directly
+ // within the 'try' region protected by the finally, since we generate code in such a way that execution
+ // never returns to the call-to-finally call, and the finally-protected 'try' region doesn't appear on
+ // stack walks.)
+
+ assert(step->bbJumpKind == BBJ_ALWAYS || step->bbJumpKind == BBJ_EHCATCHRET);
+
+#if FEATURE_EH_CALLFINALLY_THUNKS
+ if (step->bbJumpKind == BBJ_EHCATCHRET)
+ {
+ // Need to create another step block in the 'try' region that will actually branch to the
+ // call-to-finally thunk.
+ BasicBlock* step2 = fgNewBBinRegion(BBJ_ALWAYS, XTnum + 1, 0, step);
+ step->bbJumpDest = step2;
+ step->bbJumpDest->bbRefs++;
+ step2->setBBWeight(block->bbWeight);
+ step2->bbFlags |= (block->bbFlags & BBF_RUN_RARELY) | BBF_IMPORTED;
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("impImportLeave - jumping out of a finally-protected try (EH#%u), step block is "
+ "BBJ_EHCATCHRET (BB%02u), new BBJ_ALWAYS step-step block BB%02u\n",
+ XTnum, step->bbNum, step2->bbNum);
+ }
+#endif
+
+ step = step2;
+ assert(stepType == ST_Catch); // Leave it as catch type for now.
+ }
+#endif // FEATURE_EH_CALLFINALLY_THUNKS
+
+#if FEATURE_EH_CALLFINALLY_THUNKS
+ unsigned callFinallyTryIndex =
+ (HBtab->ebdEnclosingTryIndex == EHblkDsc::NO_ENCLOSING_INDEX) ? 0 : HBtab->ebdEnclosingTryIndex + 1;
+ unsigned callFinallyHndIndex =
+ (HBtab->ebdEnclosingHndIndex == EHblkDsc::NO_ENCLOSING_INDEX) ? 0 : HBtab->ebdEnclosingHndIndex + 1;
+#else // !FEATURE_EH_CALLFINALLY_THUNKS
+ unsigned callFinallyTryIndex = XTnum + 1;
+ unsigned callFinallyHndIndex = 0; // don't care
+#endif // !FEATURE_EH_CALLFINALLY_THUNKS
+
+ callBlock = fgNewBBinRegion(BBJ_CALLFINALLY, callFinallyTryIndex, callFinallyHndIndex, step);
+ step->bbJumpDest = callBlock; // the previous call to a finally returns to this call (to the next
+ // finally in the chain)
+ step->bbJumpDest->bbRefs++;
+
+#if defined(_TARGET_ARM_)
+ if (stepType == ST_FinallyReturn)
+ {
+ assert(step->bbJumpKind == BBJ_ALWAYS);
+ // Mark the target of a finally return
+ step->bbJumpDest->bbFlags |= BBF_FINALLY_TARGET;
+ }
+#endif // defined(_TARGET_ARM_)
+
+ /* The new block will inherit this block's weight */
+ callBlock->setBBWeight(block->bbWeight);
+ callBlock->bbFlags |= (block->bbFlags & BBF_RUN_RARELY) | BBF_IMPORTED;
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("impImportLeave - jumping out of a finally-protected try (EH#%u), new BBJ_CALLFINALLY block "
+ "BB%02u\n",
+ XTnum, callBlock->bbNum);
+ }
+#endif
+ }
+
+ step = fgNewBBafter(BBJ_ALWAYS, callBlock, true);
+ stepType = ST_FinallyReturn;
+
+ /* The new block will inherit this block's weight */
+ step->setBBWeight(block->bbWeight);
+ step->bbFlags |= (block->bbFlags & BBF_RUN_RARELY) | BBF_IMPORTED | BBF_KEEP_BBJ_ALWAYS;
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("impImportLeave - jumping out of a finally-protected try (EH#%u), created step (BBJ_ALWAYS) "
+ "block BB%02u\n",
+ XTnum, step->bbNum);
+ }
+#endif
+
+ callBlock->bbJumpDest = HBtab->ebdHndBeg; // This callBlock will call the "finally" handler.
+
+ invalidatePreds = true;
+ }
+ else if (HBtab->HasCatchHandler() && jitIsBetween(blkAddr, tryBeg, tryEnd) &&
+ !jitIsBetween(jmpAddr, tryBeg, tryEnd))
+ {
+ // We are jumping out of a catch-protected try.
+ //
+ // If we are returning from a call to a finally, then we must have a step block within a try
+ // that is protected by a catch. This is so when unwinding from that finally (e.g., if code within the
+ // finally raises an exception), the VM will find this step block, notice that it is in a protected region,
+ // and invoke the appropriate catch.
+ //
+ // We also need to handle a special case with the handling of ThreadAbortException. If a try/catch
+ // catches a ThreadAbortException (which might be because it catches a parent, e.g. System.Exception),
+ // and the catch doesn't call System.Threading.Thread::ResetAbort(), then when the catch returns to the VM,
+ // the VM will automatically re-raise the ThreadAbortException. When it does this, it uses the target
+ // address of the catch return as the new exception address. That is, the re-raised exception appears to
+ // occur at the catch return address. If this exception return address skips an enclosing try/catch that
+ // catches ThreadAbortException, then the enclosing try/catch will not catch the exception, as it should.
+ // For example:
+ //
+ // try {
+ // try {
+ // // something here raises ThreadAbortException
+ // LEAVE LABEL_1; // no need to stop at LABEL_2
+ // } catch (Exception) {
+ // // This catches ThreadAbortException, but doesn't call System.Threading.Thread::ResetAbort(), so
+ // // ThreadAbortException is re-raised by the VM at the address specified by the LEAVE opcode.
+ // // This is bad, since it means the outer try/catch won't get a chance to catch the re-raised
+ // // ThreadAbortException. So, instead, create step block LABEL_2 and LEAVE to that. We only
+ // // need to do this transformation if the current EH block is a try/catch that catches
+ // // ThreadAbortException (or one of its parents), however we might not be able to find that
+ // // information, so currently we do it for all catch types.
+ // LEAVE LABEL_1; // Convert this to LEAVE LABEL2;
+ // }
+ // LABEL_2: LEAVE LABEL_1; // inserted by this step creation code
+ // } catch (ThreadAbortException) {
+ // }
+ // LABEL_1:
+ //
+ // Note that this pattern isn't theoretical: it occurs in ASP.NET, in IL code generated by the Roslyn C#
+ // compiler.
+
+ if ((stepType == ST_FinallyReturn) || (stepType == ST_Catch))
+ {
+ BasicBlock* catchStep;
+
+ assert(step);
+
+ if (stepType == ST_FinallyReturn)
+ {
+ assert(step->bbJumpKind == BBJ_ALWAYS);
+ }
+ else
+ {
+ assert(stepType == ST_Catch);
+ assert(step->bbJumpKind == BBJ_EHCATCHRET);
+ }
+
+ /* Create a new exit block in the try region for the existing step block to jump to in this scope */
+ catchStep = fgNewBBinRegion(BBJ_ALWAYS, XTnum + 1, 0, step);
+ step->bbJumpDest = catchStep;
+ step->bbJumpDest->bbRefs++;
+
+#if defined(_TARGET_ARM_)
+ if (stepType == ST_FinallyReturn)
+ {
+ // Mark the target of a finally return
+ step->bbJumpDest->bbFlags |= BBF_FINALLY_TARGET;
+ }
+#endif // defined(_TARGET_ARM_)
+
+ /* The new block will inherit this block's weight */
+ catchStep->setBBWeight(block->bbWeight);
+ catchStep->bbFlags |= (block->bbFlags & BBF_RUN_RARELY) | BBF_IMPORTED;
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ if (stepType == ST_FinallyReturn)
+ {
+ printf("impImportLeave - return from finally jumping out of a catch-protected try (EH#%u), new "
+ "BBJ_ALWAYS block BB%02u\n",
+ XTnum, catchStep->bbNum);
+ }
+ else
+ {
+ assert(stepType == ST_Catch);
+ printf("impImportLeave - return from catch jumping out of a catch-protected try (EH#%u), new "
+ "BBJ_ALWAYS block BB%02u\n",
+ XTnum, catchStep->bbNum);
+ }
+ }
+#endif // DEBUG
+
+ /* This block is the new step */
+ step = catchStep;
+ stepType = ST_Try;
+
+ invalidatePreds = true;
+ }
+ }
+ }
+
+ if (step == nullptr)
+ {
+ block->bbJumpKind = BBJ_ALWAYS; // convert the BBJ_LEAVE to a BBJ_ALWAYS
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("impImportLeave - no enclosing finally-protected try blocks or catch handlers; convert CEE_LEAVE "
+ "block BB%02u to BBJ_ALWAYS\n",
+ block->bbNum);
+ }
+#endif
+ }
+ else
+ {
+ step->bbJumpDest = leaveTarget; // this is the ultimate destination of the LEAVE
+
+#if defined(_TARGET_ARM_)
+ if (stepType == ST_FinallyReturn)
+ {
+ assert(step->bbJumpKind == BBJ_ALWAYS);
+ // Mark the target of a finally return
+ step->bbJumpDest->bbFlags |= BBF_FINALLY_TARGET;
+ }
+#endif // defined(_TARGET_ARM_)
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("impImportLeave - final destination of step blocks set to BB%02u\n", leaveTarget->bbNum);
+ }
+#endif
+
+ // Queue up the jump target for importing
+
+ impImportBlockPending(leaveTarget);
+ }
+
+ if (invalidatePreds && fgComputePredsDone)
+ {
+ JITDUMP("\n**** impImportLeave - Removing preds after creating new blocks\n");
+ fgRemovePreds();
+ }
+
+#ifdef DEBUG
+ fgVerifyHandlerTab();
+
+ if (verbose)
+ {
+ printf("\nAfter import CEE_LEAVE:\n");
+ fgDispBasicBlocks();
+ fgDispHandlerTab();
+ }
+#endif // DEBUG
+}
+
+#endif // FEATURE_EH_FUNCLETS
+
+/*****************************************************************************/
+// This is called when reimporting a leave block. It resets the JumpKind,
+// JumpDest, and bbNext to the original values
+
+void Compiler::impResetLeaveBlock(BasicBlock* block, unsigned jmpAddr)
+{
+#if FEATURE_EH_FUNCLETS
+ // With EH Funclets, while importing leave opcode we create another block ending with BBJ_ALWAYS (call it B1)
+ // and the block containing leave (say B0) is marked as BBJ_CALLFINALLY. Say for some reason we reimport B0,
+ // it is reset (in this routine) by marking as ending with BBJ_LEAVE and further down when B0 is reimported, we
+ // create another BBJ_ALWAYS (call it B2). In this process B1 gets orphaned and any blocks to which B1 is the
+ // only predecessor are also considered orphans and attempted to be deleted.
+ //
+ // try {
+ // ....
+ // try
+ // {
+ // ....
+ // leave OUTSIDE; // B0 is the block containing this leave, following this would be B1
+ // } finally { }
+ // } finally { }
+ // OUTSIDE:
+ //
+ // In the above nested try-finally example, we create a step block (call it Bstep) which in branches to a block
+ // where a finally would branch to (and such block is marked as finally target). Block B1 branches to step block.
+ // Because of re-import of B0, Bstep is also orphaned. Since Bstep is a finally target it cannot be removed. To
+ // work around this we will duplicate B0 (call it B0Dup) before reseting. B0Dup is marked as BBJ_CALLFINALLY and
+ // only serves to pair up with B1 (BBJ_ALWAYS) that got orphaned. Now during orphan block deletion B0Dup and B1
+ // will be treated as pair and handled correctly.
+ if (block->bbJumpKind == BBJ_CALLFINALLY)
+ {
+ BasicBlock* dupBlock = bbNewBasicBlock(block->bbJumpKind);
+ dupBlock->bbFlags = block->bbFlags;
+ dupBlock->bbJumpDest = block->bbJumpDest;
+ dupBlock->copyEHRegion(block);
+ dupBlock->bbCatchTyp = block->bbCatchTyp;
+
+ // Mark this block as
+ // a) not referenced by any other block to make sure that it gets deleted
+ // b) weight zero
+ // c) prevent from being imported
+ // d) as internal
+ // e) as rarely run
+ dupBlock->bbRefs = 0;
+ dupBlock->bbWeight = 0;
+ dupBlock->bbFlags |= BBF_IMPORTED | BBF_INTERNAL | BBF_RUN_RARELY;
+
+ // Insert the block right after the block which is getting reset so that BBJ_CALLFINALLY and BBJ_ALWAYS
+ // will be next to each other.
+ fgInsertBBafter(block, dupBlock);
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("New Basic Block BB%02u duplicate of BB%02u created.\n", dupBlock->bbNum, block->bbNum);
+ }
+#endif
+ }
+#endif // FEATURE_EH_FUNCLETS
+
+ block->bbJumpKind = BBJ_LEAVE;
+ fgInitBBLookup();
+ block->bbJumpDest = fgLookupBB(jmpAddr);
+
+ // We will leave the BBJ_ALWAYS block we introduced. When it's reimported
+ // the BBJ_ALWAYS block will be unreachable, and will be removed after. The
+ // reason we don't want to remove the block at this point is that if we call
+ // fgInitBBLookup() again we will do it wrong as the BBJ_ALWAYS block won't be
+ // added and the linked list length will be different than fgBBcount.
+}
+
+/*****************************************************************************/
+// Get the first non-prefix opcode. Used for verification of valid combinations
+// of prefixes and actual opcodes.
+
+static OPCODE impGetNonPrefixOpcode(const BYTE* codeAddr, const BYTE* codeEndp)
+{
+ while (codeAddr < codeEndp)
+ {
+ OPCODE opcode = (OPCODE)getU1LittleEndian(codeAddr);
+ codeAddr += sizeof(__int8);
+
+ if (opcode == CEE_PREFIX1)
+ {
+ if (codeAddr >= codeEndp)
+ {
+ break;
+ }
+ opcode = (OPCODE)(getU1LittleEndian(codeAddr) + 256);
+ codeAddr += sizeof(__int8);
+ }
+
+ switch (opcode)
+ {
+ case CEE_UNALIGNED:
+ case CEE_VOLATILE:
+ case CEE_TAILCALL:
+ case CEE_CONSTRAINED:
+ case CEE_READONLY:
+ break;
+ default:
+ return opcode;
+ }
+
+ codeAddr += opcodeSizes[opcode];
+ }
+
+ return CEE_ILLEGAL;
+}
+
+/*****************************************************************************/
+// Checks whether the opcode is a valid opcode for volatile. and unaligned. prefixes
+
+static void impValidateMemoryAccessOpcode(const BYTE* codeAddr, const BYTE* codeEndp, bool volatilePrefix)
+{
+ OPCODE opcode = impGetNonPrefixOpcode(codeAddr, codeEndp);
+
+ if (!(
+ // Opcode of all ldind and stdind happen to be in continuous, except stind.i.
+ ((CEE_LDIND_I1 <= opcode) && (opcode <= CEE_STIND_R8)) || (opcode == CEE_STIND_I) ||
+ (opcode == CEE_LDFLD) || (opcode == CEE_STFLD) || (opcode == CEE_LDOBJ) || (opcode == CEE_STOBJ) ||
+ (opcode == CEE_INITBLK) || (opcode == CEE_CPBLK) ||
+ // volatile. prefix is allowed with the ldsfld and stsfld
+ (volatilePrefix && ((opcode == CEE_LDSFLD) || (opcode == CEE_STSFLD)))))
+ {
+ BADCODE("Invalid opcode for unaligned. or volatile. prefix");
+ }
+}
+
+/*****************************************************************************/
+
+#ifdef DEBUG
+
+#undef RETURN // undef contracts RETURN macro
+
+enum controlFlow_t
+{
+ NEXT,
+ CALL,
+ RETURN,
+ THROW,
+ BRANCH,
+ COND_BRANCH,
+ BREAK,
+ PHI,
+ META,
+};
+
+const static controlFlow_t controlFlow[] = {
+#define OPDEF(c, s, pop, push, args, type, l, s1, s2, flow) flow,
+#include "opcode.def"
+#undef OPDEF
+};
+
+#endif // DEBUG
+
+/*****************************************************************************
+ * Determine the result type of an arithemetic operation
+ * On 64-bit inserts upcasts when native int is mixed with int32
+ */
+var_types Compiler::impGetByRefResultType(genTreeOps oper, bool fUnsigned, GenTreePtr* pOp1, GenTreePtr* pOp2)
+{
+ var_types type = TYP_UNDEF;
+ GenTreePtr op1 = *pOp1, op2 = *pOp2;
+
+ // Arithemetic operations are generally only allowed with
+ // primitive types, but certain operations are allowed
+ // with byrefs
+
+ if ((oper == GT_SUB) && (genActualType(op1->TypeGet()) == TYP_BYREF || genActualType(op2->TypeGet()) == TYP_BYREF))
+ {
+ if ((genActualType(op1->TypeGet()) == TYP_BYREF) && (genActualType(op2->TypeGet()) == TYP_BYREF))
+ {
+ // byref1-byref2 => gives a native int
+ type = TYP_I_IMPL;
+ }
+ else if (genActualTypeIsIntOrI(op1->TypeGet()) && (genActualType(op2->TypeGet()) == TYP_BYREF))
+ {
+ // [native] int - byref => gives a native int
+
+ //
+ // The reason is that it is possible, in managed C++,
+ // to have a tree like this:
+ //
+ // -
+ // / \
+ // / \
+ // / \
+ // / \
+ // const(h) int addr byref
+ //
+ // <BUGNUM> VSW 318822 </BUGNUM>
+ //
+ // So here we decide to make the resulting type to be a native int.
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef _TARGET_64BIT_
+ if (genActualType(op1->TypeGet()) != TYP_I_IMPL)
+ {
+ // insert an explicit upcast
+ op1 = *pOp1 = gtNewCastNode(TYP_I_IMPL, op1, (var_types)(fUnsigned ? TYP_U_IMPL : TYP_I_IMPL));
+ }
+#endif // _TARGET_64BIT_
+
+ type = TYP_I_IMPL;
+ }
+ else
+ {
+ // byref - [native] int => gives a byref
+ assert(genActualType(op1->TypeGet()) == TYP_BYREF && genActualTypeIsIntOrI(op2->TypeGet()));
+
+#ifdef _TARGET_64BIT_
+ if ((genActualType(op2->TypeGet()) != TYP_I_IMPL))
+ {
+ // insert an explicit upcast
+ op2 = *pOp2 = gtNewCastNode(TYP_I_IMPL, op2, (var_types)(fUnsigned ? TYP_U_IMPL : TYP_I_IMPL));
+ }
+#endif // _TARGET_64BIT_
+
+ type = TYP_BYREF;
+ }
+ }
+ else if ((oper == GT_ADD) &&
+ (genActualType(op1->TypeGet()) == TYP_BYREF || genActualType(op2->TypeGet()) == TYP_BYREF))
+ {
+ // byref + [native] int => gives a byref
+ // (or)
+ // [native] int + byref => gives a byref
+
+ // only one can be a byref : byref op byref not allowed
+ assert(genActualType(op1->TypeGet()) != TYP_BYREF || genActualType(op2->TypeGet()) != TYP_BYREF);
+ assert(genActualTypeIsIntOrI(op1->TypeGet()) || genActualTypeIsIntOrI(op2->TypeGet()));
+
+#ifdef _TARGET_64BIT_
+ if (genActualType(op2->TypeGet()) == TYP_BYREF)
+ {
+ if (genActualType(op1->TypeGet()) != TYP_I_IMPL)
+ {
+ // insert an explicit upcast
+ op1 = *pOp1 = gtNewCastNode(TYP_I_IMPL, op1, (var_types)(fUnsigned ? TYP_U_IMPL : TYP_I_IMPL));
+ }
+ }
+ else if (genActualType(op2->TypeGet()) != TYP_I_IMPL)
+ {
+ // insert an explicit upcast
+ op2 = *pOp2 = gtNewCastNode(TYP_I_IMPL, op2, (var_types)(fUnsigned ? TYP_U_IMPL : TYP_I_IMPL));
+ }
+#endif // _TARGET_64BIT_
+
+ type = TYP_BYREF;
+ }
+#ifdef _TARGET_64BIT_
+ else if (genActualType(op1->TypeGet()) == TYP_I_IMPL || genActualType(op2->TypeGet()) == TYP_I_IMPL)
+ {
+ assert(!varTypeIsFloating(op1->gtType) && !varTypeIsFloating(op2->gtType));
+
+ // int + long => gives long
+ // long + int => gives long
+ // we get this because in the IL the long isn't Int64, it's just IntPtr
+
+ if (genActualType(op1->TypeGet()) != TYP_I_IMPL)
+ {
+ // insert an explicit upcast
+ op1 = *pOp1 = gtNewCastNode(TYP_I_IMPL, op1, (var_types)(fUnsigned ? TYP_U_IMPL : TYP_I_IMPL));
+ }
+ else if (genActualType(op2->TypeGet()) != TYP_I_IMPL)
+ {
+ // insert an explicit upcast
+ op2 = *pOp2 = gtNewCastNode(TYP_I_IMPL, op2, (var_types)(fUnsigned ? TYP_U_IMPL : TYP_I_IMPL));
+ }
+
+ type = TYP_I_IMPL;
+ }
+#else // 32-bit TARGET
+ else if (genActualType(op1->TypeGet()) == TYP_LONG || genActualType(op2->TypeGet()) == TYP_LONG)
+ {
+ assert(!varTypeIsFloating(op1->gtType) && !varTypeIsFloating(op2->gtType));
+
+ // int + long => gives long
+ // long + int => gives long
+
+ type = TYP_LONG;
+ }
+#endif // _TARGET_64BIT_
+ else
+ {
+ // int + int => gives an int
+ assert(genActualType(op1->TypeGet()) != TYP_BYREF && genActualType(op2->TypeGet()) != TYP_BYREF);
+
+ assert(genActualType(op1->TypeGet()) == genActualType(op2->TypeGet()) ||
+ varTypeIsFloating(op1->gtType) && varTypeIsFloating(op2->gtType));
+
+ type = genActualType(op1->gtType);
+
+#if FEATURE_X87_DOUBLES
+
+ // For x87, since we only have 1 size of registers, prefer double
+ // For everybody else, be more precise
+ if (type == TYP_FLOAT)
+ type = TYP_DOUBLE;
+
+#else // !FEATURE_X87_DOUBLES
+
+ // If both operands are TYP_FLOAT, then leave it as TYP_FLOAT.
+ // Otherwise, turn floats into doubles
+ if ((type == TYP_FLOAT) && (genActualType(op2->gtType) != TYP_FLOAT))
+ {
+ assert(genActualType(op2->gtType) == TYP_DOUBLE);
+ type = TYP_DOUBLE;
+ }
+
+#endif // FEATURE_X87_DOUBLES
+ }
+
+#if FEATURE_X87_DOUBLES
+ assert(type == TYP_BYREF || type == TYP_DOUBLE || type == TYP_LONG || type == TYP_INT);
+#else // FEATURE_X87_DOUBLES
+ assert(type == TYP_BYREF || type == TYP_DOUBLE || type == TYP_FLOAT || type == TYP_LONG || type == TYP_INT);
+#endif // FEATURE_X87_DOUBLES
+
+ return type;
+}
+
+/*****************************************************************************
+ * Casting Helper Function to service both CEE_CASTCLASS and CEE_ISINST
+ *
+ * typeRef contains the token, op1 to contain the value being cast,
+ * and op2 to contain code that creates the type handle corresponding to typeRef
+ * isCastClass = true means CEE_CASTCLASS, false means CEE_ISINST
+ */
+GenTreePtr Compiler::impCastClassOrIsInstToTree(GenTreePtr op1,
+ GenTreePtr op2,
+ CORINFO_RESOLVED_TOKEN* pResolvedToken,
+ bool isCastClass)
+{
+ bool expandInline;
+
+ assert(op1->TypeGet() == TYP_REF);
+
+ CorInfoHelpFunc helper = info.compCompHnd->getCastingHelper(pResolvedToken, isCastClass);
+
+ if (isCastClass)
+ {
+ // We only want to expand inline the normal CHKCASTCLASS helper;
+ expandInline = (helper == CORINFO_HELP_CHKCASTCLASS);
+ }
+ else
+ {
+ if (helper == CORINFO_HELP_ISINSTANCEOFCLASS)
+ {
+ // Get the Class Handle abd class attributes for the type we are casting to
+ //
+ DWORD flags = info.compCompHnd->getClassAttribs(pResolvedToken->hClass);
+
+ //
+ // If the class handle is marked as final we can also expand the IsInst check inline
+ //
+ expandInline = ((flags & CORINFO_FLG_FINAL) != 0);
+
+ //
+ // But don't expand inline these two cases
+ //
+ if (flags & CORINFO_FLG_MARSHAL_BYREF)
+ {
+ expandInline = false;
+ }
+ else if (flags & CORINFO_FLG_CONTEXTFUL)
+ {
+ expandInline = false;
+ }
+ }
+ else
+ {
+ //
+ // We can't expand inline any other helpers
+ //
+ expandInline = false;
+ }
+ }
+
+ if (expandInline)
+ {
+ if (compCurBB->isRunRarely())
+ {
+ expandInline = false; // not worth the code expansion in a rarely run block
+ }
+
+ if ((op1->gtFlags & GTF_GLOB_EFFECT) && lvaHaveManyLocals())
+ {
+ expandInline = false; // not worth creating an untracked local variable
+ }
+ }
+
+ if (!expandInline)
+ {
+ // If we CSE this class handle we prevent assertionProp from making SubType assertions
+ // so instead we force the CSE logic to not consider CSE-ing this class handle.
+ //
+ op2->gtFlags |= GTF_DONT_CSE;
+
+ return gtNewHelperCallNode(helper, TYP_REF, 0, gtNewArgList(op2, op1));
+ }
+
+ impSpillSideEffects(true, CHECK_SPILL_ALL DEBUGARG("bubbling QMark2"));
+
+ GenTreePtr temp;
+ GenTreePtr condMT;
+ //
+ // expand the methodtable match:
+ //
+ // condMT ==> GT_NE
+ // / \
+ // GT_IND op2 (typically CNS_INT)
+ // |
+ // op1Copy
+ //
+
+ // This can replace op1 with a GT_COMMA that evaluates op1 into a local
+ //
+ op1 = impCloneExpr(op1, &temp, NO_CLASS_HANDLE, (unsigned)CHECK_SPILL_ALL, nullptr DEBUGARG("CASTCLASS eval op1"));
+ //
+ // op1 is now known to be a non-complex tree
+ // thus we can use gtClone(op1) from now on
+ //
+
+ GenTreePtr op2Var = op2;
+ if (isCastClass)
+ {
+ op2Var = fgInsertCommaFormTemp(&op2);
+ lvaTable[op2Var->AsLclVarCommon()->GetLclNum()].lvIsCSE = true;
+ }
+ temp = gtNewOperNode(GT_IND, TYP_I_IMPL, temp);
+ temp->gtFlags |= GTF_EXCEPT;
+ condMT = gtNewOperNode(GT_NE, TYP_INT, temp, op2);
+
+ GenTreePtr condNull;
+ //
+ // expand the null check:
+ //
+ // condNull ==> GT_EQ
+ // / \
+ // op1Copy CNS_INT
+ // null
+ //
+ condNull = gtNewOperNode(GT_EQ, TYP_INT, gtClone(op1), gtNewIconNode(0, TYP_REF));
+
+ //
+ // expand the true and false trees for the condMT
+ //
+ GenTreePtr condFalse = gtClone(op1);
+ GenTreePtr condTrue;
+ if (isCastClass)
+ {
+ //
+ // use the special helper that skips the cases checked by our inlined cast
+ //
+ helper = CORINFO_HELP_CHKCASTCLASS_SPECIAL;
+
+ condTrue = gtNewHelperCallNode(helper, TYP_REF, 0, gtNewArgList(op2Var, gtClone(op1)));
+ }
+ else
+ {
+ condTrue = gtNewIconNode(0, TYP_REF);
+ }
+
+#define USE_QMARK_TREES
+
+#ifdef USE_QMARK_TREES
+ GenTreePtr qmarkMT;
+ //
+ // Generate first QMARK - COLON tree
+ //
+ // qmarkMT ==> GT_QMARK
+ // / \
+ // condMT GT_COLON
+ // / \
+ // condFalse condTrue
+ //
+ temp = new (this, GT_COLON) GenTreeColon(TYP_REF, condTrue, condFalse);
+ qmarkMT = gtNewQmarkNode(TYP_REF, condMT, temp);
+ condMT->gtFlags |= GTF_RELOP_QMARK;
+
+ GenTreePtr qmarkNull;
+ //
+ // Generate second QMARK - COLON tree
+ //
+ // qmarkNull ==> GT_QMARK
+ // / \
+ // condNull GT_COLON
+ // / \
+ // qmarkMT op1Copy
+ //
+ temp = new (this, GT_COLON) GenTreeColon(TYP_REF, gtClone(op1), qmarkMT);
+ qmarkNull = gtNewQmarkNode(TYP_REF, condNull, temp);
+ qmarkNull->gtFlags |= GTF_QMARK_CAST_INSTOF;
+ condNull->gtFlags |= GTF_RELOP_QMARK;
+
+ // Make QMark node a top level node by spilling it.
+ unsigned tmp = lvaGrabTemp(true DEBUGARG("spilling QMark2"));
+ impAssignTempGen(tmp, qmarkNull, (unsigned)CHECK_SPILL_NONE);
+ return gtNewLclvNode(tmp, TYP_REF);
+#endif
+}
+
+#ifndef DEBUG
+#define assertImp(cond) ((void)0)
+#else
+#define assertImp(cond) \
+ do \
+ { \
+ if (!(cond)) \
+ { \
+ const int cchAssertImpBuf = 600; \
+ char* assertImpBuf = (char*)alloca(cchAssertImpBuf); \
+ _snprintf_s(assertImpBuf, cchAssertImpBuf, cchAssertImpBuf - 1, \
+ "%s : Possibly bad IL with CEE_%s at offset %04Xh (op1=%s op2=%s stkDepth=%d)", #cond, \
+ impCurOpcName, impCurOpcOffs, op1 ? varTypeName(op1->TypeGet()) : "NULL", \
+ op2 ? varTypeName(op2->TypeGet()) : "NULL", verCurrentState.esStackDepth); \
+ assertAbort(assertImpBuf, __FILE__, __LINE__); \
+ } \
+ } while (0)
+#endif // DEBUG
+
+#ifdef _PREFAST_
+#pragma warning(push)
+#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
+#endif
+/*****************************************************************************
+ * Import the instr for the given basic block
+ */
+void Compiler::impImportBlockCode(BasicBlock* block)
+{
+#define _impResolveToken(kind) impResolveToken(codeAddr, &resolvedToken, kind)
+
+#ifdef DEBUG
+
+ if (verbose)
+ {
+ printf("\nImporting BB%02u (PC=%03u) of '%s'", block->bbNum, block->bbCodeOffs, info.compFullName);
+ }
+#endif
+
+ unsigned nxtStmtIndex = impInitBlockLineInfo();
+ IL_OFFSET nxtStmtOffs;
+
+ GenTreePtr arrayNodeFrom, arrayNodeTo, arrayNodeToIndex;
+ bool expandInline;
+ CorInfoHelpFunc helper;
+ CorInfoIsAccessAllowedResult accessAllowedResult;
+ CORINFO_HELPER_DESC calloutHelper;
+ const BYTE* lastLoadToken = nullptr;
+
+ // reject cyclic constraints
+ if (tiVerificationNeeded)
+ {
+ Verify(!info.hasCircularClassConstraints, "Method parent has circular class type parameter constraints.");
+ Verify(!info.hasCircularMethodConstraints, "Method has circular method type parameter constraints.");
+ }
+
+ /* Get the tree list started */
+
+ impBeginTreeList();
+
+ /* Walk the opcodes that comprise the basic block */
+
+ const BYTE* codeAddr = info.compCode + block->bbCodeOffs;
+ const BYTE* codeEndp = info.compCode + block->bbCodeOffsEnd;
+
+ IL_OFFSET opcodeOffs = block->bbCodeOffs;
+ IL_OFFSET lastSpillOffs = opcodeOffs;
+
+ signed jmpDist;
+
+ /* remember the start of the delegate creation sequence (used for verification) */
+ const BYTE* delegateCreateStart = nullptr;
+
+ int prefixFlags = 0;
+ bool explicitTailCall, constraintCall, readonlyCall;
+
+ bool insertLdloc = false; // set by CEE_DUP and cleared by following store
+ typeInfo tiRetVal;
+
+ unsigned numArgs = info.compArgsCount;
+
+ /* Now process all the opcodes in the block */
+
+ var_types callTyp = TYP_COUNT;
+ OPCODE prevOpcode = CEE_ILLEGAL;
+
+ if (block->bbCatchTyp)
+ {
+ if (info.compStmtOffsetsImplicit & ICorDebugInfo::CALL_SITE_BOUNDARIES)
+ {
+ impCurStmtOffsSet(block->bbCodeOffs);
+ }
+
+ // We will spill the GT_CATCH_ARG and the input of the BB_QMARK block
+ // to a temp. This is a trade off for code simplicity
+ impSpillSpecialSideEff();
+ }
+
+ while (codeAddr < codeEndp)
+ {
+ bool usingReadyToRunHelper = false;
+ CORINFO_RESOLVED_TOKEN resolvedToken;
+ CORINFO_RESOLVED_TOKEN constrainedResolvedToken;
+ CORINFO_CALL_INFO callInfo;
+ CORINFO_FIELD_INFO fieldInfo;
+
+ tiRetVal = typeInfo(); // Default type info
+
+ //---------------------------------------------------------------------
+
+ /* We need to restrict the max tree depth as many of the Compiler
+ functions are recursive. We do this by spilling the stack */
+
+ if (verCurrentState.esStackDepth)
+ {
+ /* Has it been a while since we last saw a non-empty stack (which
+ guarantees that the tree depth isnt accumulating. */
+
+ if ((opcodeOffs - lastSpillOffs) > 200)
+ {
+ impSpillStackEnsure();
+ lastSpillOffs = opcodeOffs;
+ }
+ }
+ else
+ {
+ lastSpillOffs = opcodeOffs;
+ impBoxTempInUse = false; // nothing on the stack, box temp OK to use again
+ }
+
+ /* Compute the current instr offset */
+
+ opcodeOffs = (IL_OFFSET)(codeAddr - info.compCode);
+
+#if defined(DEBUGGING_SUPPORT) || defined(DEBUG)
+
+#ifndef DEBUG
+ if (opts.compDbgInfo)
+#endif
+ {
+ if (!compIsForInlining())
+ {
+ nxtStmtOffs =
+ (nxtStmtIndex < info.compStmtOffsetsCount) ? info.compStmtOffsets[nxtStmtIndex] : BAD_IL_OFFSET;
+
+ /* Have we reached the next stmt boundary ? */
+
+ if (nxtStmtOffs != BAD_IL_OFFSET && opcodeOffs >= nxtStmtOffs)
+ {
+ assert(nxtStmtOffs == info.compStmtOffsets[nxtStmtIndex]);
+
+ if (verCurrentState.esStackDepth != 0 && opts.compDbgCode)
+ {
+ /* We need to provide accurate IP-mapping at this point.
+ So spill anything on the stack so that it will form
+ gtStmts with the correct stmt offset noted */
+
+ impSpillStackEnsure(true);
+ }
+
+ // Has impCurStmtOffs been reported in any tree?
+
+ if (impCurStmtOffs != BAD_IL_OFFSET && opts.compDbgCode)
+ {
+ GenTreePtr placeHolder = new (this, GT_NO_OP) GenTree(GT_NO_OP, TYP_VOID);
+ impAppendTree(placeHolder, (unsigned)CHECK_SPILL_NONE, impCurStmtOffs);
+
+ assert(impCurStmtOffs == BAD_IL_OFFSET);
+ }
+
+ if (impCurStmtOffs == BAD_IL_OFFSET)
+ {
+ /* Make sure that nxtStmtIndex is in sync with opcodeOffs.
+ If opcodeOffs has gone past nxtStmtIndex, catch up */
+
+ while ((nxtStmtIndex + 1) < info.compStmtOffsetsCount &&
+ info.compStmtOffsets[nxtStmtIndex + 1] <= opcodeOffs)
+ {
+ nxtStmtIndex++;
+ }
+
+ /* Go to the new stmt */
+
+ impCurStmtOffsSet(info.compStmtOffsets[nxtStmtIndex]);
+
+ /* Update the stmt boundary index */
+
+ nxtStmtIndex++;
+ assert(nxtStmtIndex <= info.compStmtOffsetsCount);
+
+ /* Are there any more line# entries after this one? */
+
+ if (nxtStmtIndex < info.compStmtOffsetsCount)
+ {
+ /* Remember where the next line# starts */
+
+ nxtStmtOffs = info.compStmtOffsets[nxtStmtIndex];
+ }
+ else
+ {
+ /* No more line# entries */
+
+ nxtStmtOffs = BAD_IL_OFFSET;
+ }
+ }
+ }
+ else if ((info.compStmtOffsetsImplicit & ICorDebugInfo::STACK_EMPTY_BOUNDARIES) &&
+ (verCurrentState.esStackDepth == 0))
+ {
+ /* At stack-empty locations, we have already added the tree to
+ the stmt list with the last offset. We just need to update
+ impCurStmtOffs
+ */
+
+ impCurStmtOffsSet(opcodeOffs);
+ }
+ else if ((info.compStmtOffsetsImplicit & ICorDebugInfo::CALL_SITE_BOUNDARIES) &&
+ impOpcodeIsCallSiteBoundary(prevOpcode))
+ {
+ /* Make sure we have a type cached */
+ assert(callTyp != TYP_COUNT);
+
+ if (callTyp == TYP_VOID)
+ {
+ impCurStmtOffsSet(opcodeOffs);
+ }
+ else if (opts.compDbgCode)
+ {
+ impSpillStackEnsure(true);
+ impCurStmtOffsSet(opcodeOffs);
+ }
+ }
+ else if ((info.compStmtOffsetsImplicit & ICorDebugInfo::NOP_BOUNDARIES) && (prevOpcode == CEE_NOP))
+ {
+ if (opts.compDbgCode)
+ {
+ impSpillStackEnsure(true);
+ }
+
+ impCurStmtOffsSet(opcodeOffs);
+ }
+
+ assert(impCurStmtOffs == BAD_IL_OFFSET || nxtStmtOffs == BAD_IL_OFFSET ||
+ jitGetILoffs(impCurStmtOffs) <= nxtStmtOffs);
+ }
+ }
+
+#endif // defined(DEBUGGING_SUPPORT) || defined(DEBUG)
+
+ CORINFO_CLASS_HANDLE clsHnd = DUMMY_INIT(NULL);
+ CORINFO_CLASS_HANDLE ldelemClsHnd = DUMMY_INIT(NULL);
+ CORINFO_CLASS_HANDLE stelemClsHnd = DUMMY_INIT(NULL);
+
+ var_types lclTyp, ovflType = TYP_UNKNOWN;
+ GenTreePtr op1 = DUMMY_INIT(NULL);
+ GenTreePtr op2 = DUMMY_INIT(NULL);
+ GenTreeArgList* args = nullptr; // What good do these "DUMMY_INIT"s do?
+ GenTreePtr newObjThisPtr = DUMMY_INIT(NULL);
+ bool uns = DUMMY_INIT(false);
+
+ /* Get the next opcode and the size of its parameters */
+
+ OPCODE opcode = (OPCODE)getU1LittleEndian(codeAddr);
+ codeAddr += sizeof(__int8);
+
+#ifdef DEBUG
+ impCurOpcOffs = (IL_OFFSET)(codeAddr - info.compCode - 1);
+ JITDUMP("\n [%2u] %3u (0x%03x) ", verCurrentState.esStackDepth, impCurOpcOffs, impCurOpcOffs);
+#endif
+
+ DECODE_OPCODE:
+
+ // Return if any previous code has caused inline to fail.
+ if (compDonotInline())
+ {
+ return;
+ }
+
+ /* Get the size of additional parameters */
+
+ signed int sz = opcodeSizes[opcode];
+
+#ifdef DEBUG
+ clsHnd = NO_CLASS_HANDLE;
+ lclTyp = TYP_COUNT;
+ callTyp = TYP_COUNT;
+
+ impCurOpcOffs = (IL_OFFSET)(codeAddr - info.compCode - 1);
+ impCurOpcName = opcodeNames[opcode];
+
+ if (verbose && (opcode != CEE_PREFIX1))
+ {
+ printf("%s", impCurOpcName);
+ }
+
+ /* Use assertImp() to display the opcode */
+
+ op1 = op2 = nullptr;
+#endif
+
+ /* See what kind of an opcode we have, then */
+
+ unsigned mflags = 0;
+ unsigned clsFlags = 0;
+
+ switch (opcode)
+ {
+ unsigned lclNum;
+ var_types type;
+
+ GenTreePtr op3;
+ genTreeOps oper;
+ unsigned size;
+
+ int val;
+
+ CORINFO_SIG_INFO sig;
+ unsigned flags;
+ IL_OFFSET jmpAddr;
+ bool ovfl, unordered, callNode;
+ bool ldstruct;
+ CORINFO_CLASS_HANDLE tokenType;
+
+ union {
+ int intVal;
+ float fltVal;
+ __int64 lngVal;
+ double dblVal;
+ } cval;
+
+ case CEE_PREFIX1:
+ opcode = (OPCODE)(getU1LittleEndian(codeAddr) + 256);
+ codeAddr += sizeof(__int8);
+ opcodeOffs = (IL_OFFSET)(codeAddr - info.compCode);
+ goto DECODE_OPCODE;
+
+ SPILL_APPEND:
+
+ /* Append 'op1' to the list of statements */
+ impAppendTree(op1, (unsigned)CHECK_SPILL_ALL, impCurStmtOffs);
+ goto DONE_APPEND;
+
+ APPEND:
+
+ /* Append 'op1' to the list of statements */
+
+ impAppendTree(op1, (unsigned)CHECK_SPILL_NONE, impCurStmtOffs);
+ goto DONE_APPEND;
+
+ DONE_APPEND:
+
+#ifdef DEBUG
+ // Remember at which BC offset the tree was finished
+ impNoteLastILoffs();
+#endif
+ break;
+
+ case CEE_LDNULL:
+ impPushNullObjRefOnStack();
+ break;
+
+ case CEE_LDC_I4_M1:
+ case CEE_LDC_I4_0:
+ case CEE_LDC_I4_1:
+ case CEE_LDC_I4_2:
+ case CEE_LDC_I4_3:
+ case CEE_LDC_I4_4:
+ case CEE_LDC_I4_5:
+ case CEE_LDC_I4_6:
+ case CEE_LDC_I4_7:
+ case CEE_LDC_I4_8:
+ cval.intVal = (opcode - CEE_LDC_I4_0);
+ assert(-1 <= cval.intVal && cval.intVal <= 8);
+ goto PUSH_I4CON;
+
+ case CEE_LDC_I4_S:
+ cval.intVal = getI1LittleEndian(codeAddr);
+ goto PUSH_I4CON;
+ case CEE_LDC_I4:
+ cval.intVal = getI4LittleEndian(codeAddr);
+ goto PUSH_I4CON;
+ PUSH_I4CON:
+ JITDUMP(" %d", cval.intVal);
+ impPushOnStack(gtNewIconNode(cval.intVal), typeInfo(TI_INT));
+ break;
+
+ case CEE_LDC_I8:
+ cval.lngVal = getI8LittleEndian(codeAddr);
+ JITDUMP(" 0x%016llx", cval.lngVal);
+ impPushOnStack(gtNewLconNode(cval.lngVal), typeInfo(TI_LONG));
+ break;
+
+ case CEE_LDC_R8:
+ cval.dblVal = getR8LittleEndian(codeAddr);
+ JITDUMP(" %#.17g", cval.dblVal);
+ impPushOnStack(gtNewDconNode(cval.dblVal), typeInfo(TI_DOUBLE));
+ break;
+
+ case CEE_LDC_R4:
+ cval.dblVal = getR4LittleEndian(codeAddr);
+ JITDUMP(" %#.17g", cval.dblVal);
+ {
+ GenTreePtr cnsOp = gtNewDconNode(cval.dblVal);
+#if !FEATURE_X87_DOUBLES
+ // X87 stack doesn't differentiate between float/double
+ // so R4 is treated as R8, but everybody else does
+ cnsOp->gtType = TYP_FLOAT;
+#endif // FEATURE_X87_DOUBLES
+ impPushOnStack(cnsOp, typeInfo(TI_DOUBLE));
+ }
+ break;
+
+ case CEE_LDSTR:
+
+ if (compIsForInlining())
+ {
+ if (impInlineInfo->inlineCandidateInfo->dwRestrictions & INLINE_NO_CALLEE_LDSTR)
+ {
+ compInlineResult->NoteFatal(InlineObservation::CALLSITE_HAS_LDSTR_RESTRICTION);
+ return;
+ }
+ }
+
+ val = getU4LittleEndian(codeAddr);
+ JITDUMP(" %08X", val);
+ if (tiVerificationNeeded)
+ {
+ Verify(info.compCompHnd->isValidStringRef(info.compScopeHnd, val), "bad string");
+ tiRetVal = typeInfo(TI_REF, impGetStringClass());
+ }
+ impPushOnStack(gtNewSconNode(val, info.compScopeHnd), tiRetVal);
+
+ break;
+
+ case CEE_LDARG:
+ lclNum = getU2LittleEndian(codeAddr);
+ JITDUMP(" %u", lclNum);
+ impLoadArg(lclNum, opcodeOffs + sz + 1);
+ break;
+
+ case CEE_LDARG_S:
+ lclNum = getU1LittleEndian(codeAddr);
+ JITDUMP(" %u", lclNum);
+ impLoadArg(lclNum, opcodeOffs + sz + 1);
+ break;
+
+ case CEE_LDARG_0:
+ case CEE_LDARG_1:
+ case CEE_LDARG_2:
+ case CEE_LDARG_3:
+ lclNum = (opcode - CEE_LDARG_0);
+ assert(lclNum >= 0 && lclNum < 4);
+ impLoadArg(lclNum, opcodeOffs + sz + 1);
+ break;
+
+ case CEE_LDLOC:
+ lclNum = getU2LittleEndian(codeAddr);
+ JITDUMP(" %u", lclNum);
+ impLoadLoc(lclNum, opcodeOffs + sz + 1);
+ break;
+
+ case CEE_LDLOC_S:
+ lclNum = getU1LittleEndian(codeAddr);
+ JITDUMP(" %u", lclNum);
+ impLoadLoc(lclNum, opcodeOffs + sz + 1);
+ break;
+
+ case CEE_LDLOC_0:
+ case CEE_LDLOC_1:
+ case CEE_LDLOC_2:
+ case CEE_LDLOC_3:
+ lclNum = (opcode - CEE_LDLOC_0);
+ assert(lclNum >= 0 && lclNum < 4);
+ impLoadLoc(lclNum, opcodeOffs + sz + 1);
+ break;
+
+ case CEE_STARG:
+ lclNum = getU2LittleEndian(codeAddr);
+ goto STARG;
+
+ case CEE_STARG_S:
+ lclNum = getU1LittleEndian(codeAddr);
+ STARG:
+ JITDUMP(" %u", lclNum);
+
+ if (tiVerificationNeeded)
+ {
+ Verify(lclNum < info.compILargsCount, "bad arg num");
+ }
+
+ if (compIsForInlining())
+ {
+ op1 = impInlineFetchArg(lclNum, impInlineInfo->inlArgInfo, impInlineInfo->lclVarInfo);
+ noway_assert(op1->gtOper == GT_LCL_VAR);
+ lclNum = op1->AsLclVar()->gtLclNum;
+
+ goto VAR_ST_VALID;
+ }
+
+ lclNum = compMapILargNum(lclNum); // account for possible hidden param
+ assertImp(lclNum < numArgs);
+
+ if (lclNum == info.compThisArg)
+ {
+ lclNum = lvaArg0Var;
+ }
+ lvaTable[lclNum].lvArgWrite = 1;
+
+ if (tiVerificationNeeded)
+ {
+ typeInfo& tiLclVar = lvaTable[lclNum].lvVerTypeInfo;
+ Verify(tiCompatibleWith(impStackTop().seTypeInfo, NormaliseForStack(tiLclVar), true),
+ "type mismatch");
+
+ if (verTrackObjCtorInitState && (verCurrentState.thisInitialized != TIS_Init))
+ {
+ Verify(!tiLclVar.IsThisPtr(), "storing to uninit this ptr");
+ }
+ }
+
+ goto VAR_ST;
+
+ case CEE_STLOC:
+ lclNum = getU2LittleEndian(codeAddr);
+ JITDUMP(" %u", lclNum);
+ goto LOC_ST;
+
+ case CEE_STLOC_S:
+ lclNum = getU1LittleEndian(codeAddr);
+ JITDUMP(" %u", lclNum);
+ goto LOC_ST;
+
+ case CEE_STLOC_0:
+ case CEE_STLOC_1:
+ case CEE_STLOC_2:
+ case CEE_STLOC_3:
+ lclNum = (opcode - CEE_STLOC_0);
+ assert(lclNum >= 0 && lclNum < 4);
+
+ LOC_ST:
+ if (tiVerificationNeeded)
+ {
+ Verify(lclNum < info.compMethodInfo->locals.numArgs, "bad local num");
+ Verify(tiCompatibleWith(impStackTop().seTypeInfo,
+ NormaliseForStack(lvaTable[lclNum + numArgs].lvVerTypeInfo), true),
+ "type mismatch");
+ }
+
+ if (compIsForInlining())
+ {
+ lclTyp = impInlineInfo->lclVarInfo[lclNum + impInlineInfo->argCnt].lclTypeInfo;
+
+ /* Have we allocated a temp for this local? */
+
+ lclNum = impInlineFetchLocal(lclNum DEBUGARG("Inline stloc first use temp"));
+
+ goto _PopValue;
+ }
+
+ lclNum += numArgs;
+
+ VAR_ST:
+
+ if (lclNum >= info.compLocalsCount && lclNum != lvaArg0Var)
+ {
+ assert(!tiVerificationNeeded); // We should have thrown the VerificationException before.
+ BADCODE("Bad IL");
+ }
+
+ VAR_ST_VALID:
+
+ /* if it is a struct assignment, make certain we don't overflow the buffer */
+ assert(lclTyp != TYP_STRUCT || lvaLclSize(lclNum) >= info.compCompHnd->getClassSize(clsHnd));
+
+ if (lvaTable[lclNum].lvNormalizeOnLoad())
+ {
+ lclTyp = lvaGetRealType(lclNum);
+ }
+ else
+ {
+ lclTyp = lvaGetActualType(lclNum);
+ }
+
+ _PopValue:
+ /* Pop the value being assigned */
+
+ {
+ StackEntry se = impPopStack(clsHnd);
+ op1 = se.val;
+ tiRetVal = se.seTypeInfo;
+ }
+
+#ifdef FEATURE_SIMD
+ if (varTypeIsSIMD(lclTyp) && (lclTyp != op1->TypeGet()))
+ {
+ assert(op1->TypeGet() == TYP_STRUCT);
+ op1->gtType = lclTyp;
+ }
+#endif // FEATURE_SIMD
+
+ op1 = impImplicitIorI4Cast(op1, lclTyp);
+
+#ifdef _TARGET_64BIT_
+ // Downcast the TYP_I_IMPL into a 32-bit Int for x86 JIT compatiblity
+ if (varTypeIsI(op1->TypeGet()) && (genActualType(lclTyp) == TYP_INT))
+ {
+ assert(!tiVerificationNeeded); // We should have thrown the VerificationException before.
+ op1 = gtNewCastNode(TYP_INT, op1, TYP_INT);
+ }
+#endif // _TARGET_64BIT_
+
+ // We had better assign it a value of the correct type
+ assertImp(
+ genActualType(lclTyp) == genActualType(op1->gtType) ||
+ genActualType(lclTyp) == TYP_I_IMPL && op1->IsVarAddr() ||
+ (genActualType(lclTyp) == TYP_I_IMPL && (op1->gtType == TYP_BYREF || op1->gtType == TYP_REF)) ||
+ (genActualType(op1->gtType) == TYP_I_IMPL && lclTyp == TYP_BYREF) ||
+ (varTypeIsFloating(lclTyp) && varTypeIsFloating(op1->TypeGet())) ||
+ ((genActualType(lclTyp) == TYP_BYREF) && genActualType(op1->TypeGet()) == TYP_REF));
+
+ /* If op1 is "&var" then its type is the transient "*" and it can
+ be used either as TYP_BYREF or TYP_I_IMPL */
+
+ if (op1->IsVarAddr())
+ {
+ assertImp(genActualType(lclTyp) == TYP_I_IMPL || lclTyp == TYP_BYREF);
+
+ /* When "&var" is created, we assume it is a byref. If it is
+ being assigned to a TYP_I_IMPL var, change the type to
+ prevent unnecessary GC info */
+
+ if (genActualType(lclTyp) == TYP_I_IMPL)
+ {
+ op1->gtType = TYP_I_IMPL;
+ }
+ }
+
+ /* Filter out simple assignments to itself */
+
+ if (op1->gtOper == GT_LCL_VAR && lclNum == op1->gtLclVarCommon.gtLclNum)
+ {
+ if (insertLdloc)
+ {
+ // This is a sequence of (ldloc, dup, stloc). Can simplify
+ // to (ldloc, stloc). Goto LDVAR to reconstruct the ldloc node.
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef DEBUG
+ if (tiVerificationNeeded)
+ {
+ assert(
+ typeInfo::AreEquivalent(tiRetVal, NormaliseForStack(lvaTable[lclNum].lvVerTypeInfo)));
+ }
+#endif
+
+ op1 = nullptr;
+ insertLdloc = false;
+
+ impLoadVar(lclNum, opcodeOffs + sz + 1);
+ break;
+ }
+ else if (opts.compDbgCode)
+ {
+ op1 = gtNewNothingNode();
+ goto SPILL_APPEND;
+ }
+ else
+ {
+ break;
+ }
+ }
+
+ /* Create the assignment node */
+
+ op2 = gtNewLclvNode(lclNum, lclTyp, opcodeOffs + sz + 1);
+
+ /* If the local is aliased, we need to spill calls and
+ indirections from the stack. */
+
+ if ((lvaTable[lclNum].lvAddrExposed || lvaTable[lclNum].lvHasLdAddrOp) &&
+ verCurrentState.esStackDepth > 0)
+ {
+ impSpillSideEffects(false, (unsigned)CHECK_SPILL_ALL DEBUGARG("Local could be aliased"));
+ }
+
+ /* Spill any refs to the local from the stack */
+
+ impSpillLclRefs(lclNum);
+
+#if !FEATURE_X87_DOUBLES
+ // We can generate an assignment to a TYP_FLOAT from a TYP_DOUBLE
+ // We insert a cast to the dest 'op2' type
+ //
+ if ((op1->TypeGet() != op2->TypeGet()) && varTypeIsFloating(op1->gtType) &&
+ varTypeIsFloating(op2->gtType))
+ {
+ op1 = gtNewCastNode(op2->TypeGet(), op1, op2->TypeGet());
+ }
+#endif // !FEATURE_X87_DOUBLES
+
+ if (varTypeIsStruct(lclTyp))
+ {
+ op1 = impAssignStruct(op2, op1, clsHnd, (unsigned)CHECK_SPILL_ALL);
+ }
+ else
+ {
+ // The code generator generates GC tracking information
+ // based on the RHS of the assignment. Later the LHS (which is
+ // is a BYREF) gets used and the emitter checks that that variable
+ // is being tracked. It is not (since the RHS was an int and did
+ // not need tracking). To keep this assert happy, we change the RHS
+ if (lclTyp == TYP_BYREF && !varTypeIsGC(op1->gtType))
+ {
+ op1->gtType = TYP_BYREF;
+ }
+ op1 = gtNewAssignNode(op2, op1);
+ }
+
+ /* If insertLdloc is true, then we need to insert a ldloc following the
+ stloc. This is done when converting a (dup, stloc) sequence into
+ a (stloc, ldloc) sequence. */
+
+ if (insertLdloc)
+ {
+ // From SPILL_APPEND
+ impAppendTree(op1, (unsigned)CHECK_SPILL_ALL, impCurStmtOffs);
+
+#ifdef DEBUG
+ // From DONE_APPEND
+ impNoteLastILoffs();
+#endif
+ op1 = nullptr;
+ insertLdloc = false;
+
+ impLoadVar(lclNum, opcodeOffs + sz + 1, tiRetVal);
+ break;
+ }
+
+ goto SPILL_APPEND;
+
+ case CEE_LDLOCA:
+ lclNum = getU2LittleEndian(codeAddr);
+ goto LDLOCA;
+
+ case CEE_LDLOCA_S:
+ lclNum = getU1LittleEndian(codeAddr);
+ LDLOCA:
+ JITDUMP(" %u", lclNum);
+ if (tiVerificationNeeded)
+ {
+ Verify(lclNum < info.compMethodInfo->locals.numArgs, "bad local num");
+ Verify(info.compInitMem, "initLocals not set");
+ }
+
+ if (compIsForInlining())
+ {
+ // Get the local type
+ lclTyp = impInlineInfo->lclVarInfo[lclNum + impInlineInfo->argCnt].lclTypeInfo;
+
+ /* Have we allocated a temp for this local? */
+
+ lclNum = impInlineFetchLocal(lclNum DEBUGARG("Inline ldloca(s) first use temp"));
+
+ op1 = gtNewLclvNode(lclNum, lvaGetActualType(lclNum));
+
+ goto _PUSH_ADRVAR;
+ }
+
+ lclNum += numArgs;
+ assertImp(lclNum < info.compLocalsCount);
+ goto ADRVAR;
+
+ case CEE_LDARGA:
+ lclNum = getU2LittleEndian(codeAddr);
+ goto LDARGA;
+
+ case CEE_LDARGA_S:
+ lclNum = getU1LittleEndian(codeAddr);
+ LDARGA:
+ JITDUMP(" %u", lclNum);
+ Verify(lclNum < info.compILargsCount, "bad arg num");
+
+ if (compIsForInlining())
+ {
+ // In IL, LDARGA(_S) is used to load the byref managed pointer of struct argument,
+ // followed by a ldfld to load the field.
+
+ op1 = impInlineFetchArg(lclNum, impInlineInfo->inlArgInfo, impInlineInfo->lclVarInfo);
+ if (op1->gtOper != GT_LCL_VAR)
+ {
+ compInlineResult->NoteFatal(InlineObservation::CALLSITE_LDARGA_NOT_LOCAL_VAR);
+ return;
+ }
+
+ assert(op1->gtOper == GT_LCL_VAR);
+
+ goto _PUSH_ADRVAR;
+ }
+
+ lclNum = compMapILargNum(lclNum); // account for possible hidden param
+ assertImp(lclNum < numArgs);
+
+ if (lclNum == info.compThisArg)
+ {
+ lclNum = lvaArg0Var;
+ }
+
+ goto ADRVAR;
+
+ ADRVAR:
+
+ op1 = gtNewLclvNode(lclNum, lvaGetActualType(lclNum), opcodeOffs + sz + 1);
+
+ _PUSH_ADRVAR:
+ assert(op1->gtOper == GT_LCL_VAR);
+
+ /* Note that this is supposed to create the transient type "*"
+ which may be used as a TYP_I_IMPL. However we catch places
+ where it is used as a TYP_I_IMPL and change the node if needed.
+ Thus we are pessimistic and may report byrefs in the GC info
+ where it was not absolutely needed, but it is safer this way.
+ */
+ op1 = gtNewOperNode(GT_ADDR, TYP_BYREF, op1);
+
+ // &aliasedVar doesnt need GTF_GLOB_REF, though alisasedVar does
+ assert((op1->gtFlags & GTF_GLOB_REF) == 0);
+
+ tiRetVal = lvaTable[lclNum].lvVerTypeInfo;
+ if (tiVerificationNeeded)
+ {
+ // Don't allow taking address of uninit this ptr.
+ if (verTrackObjCtorInitState && (verCurrentState.thisInitialized != TIS_Init))
+ {
+ Verify(!tiRetVal.IsThisPtr(), "address of uninit this ptr");
+ }
+
+ if (!tiRetVal.IsByRef())
+ {
+ tiRetVal.MakeByRef();
+ }
+ else
+ {
+ Verify(false, "byref to byref");
+ }
+ }
+
+ impPushOnStack(op1, tiRetVal);
+ break;
+
+ case CEE_ARGLIST:
+
+ if (!info.compIsVarArgs)
+ {
+ BADCODE("arglist in non-vararg method");
+ }
+
+ if (tiVerificationNeeded)
+ {
+ tiRetVal = typeInfo(TI_STRUCT, impGetRuntimeArgumentHandle());
+ }
+ assertImp((info.compMethodInfo->args.callConv & CORINFO_CALLCONV_MASK) == CORINFO_CALLCONV_VARARG);
+
+ /* The ARGLIST cookie is a hidden 'last' parameter, we have already
+ adjusted the arg count cos this is like fetching the last param */
+ assertImp(0 < numArgs);
+ assert(lvaTable[lvaVarargsHandleArg].lvAddrExposed);
+ lclNum = lvaVarargsHandleArg;
+ op1 = gtNewLclvNode(lclNum, TYP_I_IMPL, opcodeOffs + sz + 1);
+ op1 = gtNewOperNode(GT_ADDR, TYP_BYREF, op1);
+ impPushOnStack(op1, tiRetVal);
+ break;
+
+ case CEE_ENDFINALLY:
+
+ if (compIsForInlining())
+ {
+ assert(!"Shouldn't have exception handlers in the inliner!");
+ compInlineResult->NoteFatal(InlineObservation::CALLEE_HAS_ENDFINALLY);
+ return;
+ }
+
+ if (verCurrentState.esStackDepth > 0)
+ {
+ impEvalSideEffects();
+ }
+
+ if (info.compXcptnsCount == 0)
+ {
+ BADCODE("endfinally outside finally");
+ }
+
+ assert(verCurrentState.esStackDepth == 0);
+
+ op1 = gtNewOperNode(GT_RETFILT, TYP_VOID, nullptr);
+ goto APPEND;
+
+ case CEE_ENDFILTER:
+
+ if (compIsForInlining())
+ {
+ assert(!"Shouldn't have exception handlers in the inliner!");
+ compInlineResult->NoteFatal(InlineObservation::CALLEE_HAS_ENDFILTER);
+ return;
+ }
+
+ block->bbSetRunRarely(); // filters are rare
+
+ if (info.compXcptnsCount == 0)
+ {
+ BADCODE("endfilter outside filter");
+ }
+
+ if (tiVerificationNeeded)
+ {
+ Verify(impStackTop().seTypeInfo.IsType(TI_INT), "bad endfilt arg");
+ }
+
+ op1 = impPopStack().val;
+ assertImp(op1->gtType == TYP_INT);
+ if (!bbInFilterILRange(block))
+ {
+ BADCODE("EndFilter outside a filter handler");
+ }
+
+ /* Mark current bb as end of filter */
+
+ assert(compCurBB->bbFlags & BBF_DONT_REMOVE);
+ assert(compCurBB->bbJumpKind == BBJ_EHFILTERRET);
+
+ /* Mark catch handler as successor */
+
+ op1 = gtNewOperNode(GT_RETFILT, op1->TypeGet(), op1);
+ if (verCurrentState.esStackDepth != 0)
+ {
+ verRaiseVerifyException(INDEBUG("stack must be 1 on end of filter") DEBUGARG(__FILE__)
+ DEBUGARG(__LINE__));
+ }
+ goto APPEND;
+
+ case CEE_RET:
+ prefixFlags &= ~PREFIX_TAILCALL; // ret without call before it
+ RET:
+ if (!impReturnInstruction(block, prefixFlags, opcode))
+ {
+ return; // abort
+ }
+ else
+ {
+ break;
+ }
+
+ case CEE_JMP:
+
+ assert(!compIsForInlining());
+
+ if (tiVerificationNeeded)
+ {
+ Verify(false, "Invalid opcode: CEE_JMP");
+ }
+
+ if ((info.compFlags & CORINFO_FLG_SYNCH) || block->hasTryIndex() || block->hasHndIndex())
+ {
+ /* CEE_JMP does not make sense in some "protected" regions. */
+
+ BADCODE("Jmp not allowed in protected region");
+ }
+
+ if (verCurrentState.esStackDepth != 0)
+ {
+ BADCODE("Stack must be empty after CEE_JMPs");
+ }
+
+ _impResolveToken(CORINFO_TOKENKIND_Method);
+
+ JITDUMP(" %08X", resolvedToken.token);
+
+ /* The signature of the target has to be identical to ours.
+ At least check that argCnt and returnType match */
+
+ eeGetMethodSig(resolvedToken.hMethod, &sig);
+ if (sig.numArgs != info.compMethodInfo->args.numArgs ||
+ sig.retType != info.compMethodInfo->args.retType ||
+ sig.callConv != info.compMethodInfo->args.callConv)
+ {
+ BADCODE("Incompatible target for CEE_JMPs");
+ }
+
+#if defined(_TARGET_XARCH_) || defined(_TARGET_ARMARCH_)
+
+ op1 = new (this, GT_JMP) GenTreeVal(GT_JMP, TYP_VOID, (size_t)resolvedToken.hMethod);
+
+ /* Mark the basic block as being a JUMP instead of RETURN */
+
+ block->bbFlags |= BBF_HAS_JMP;
+
+ /* Set this flag to make sure register arguments have a location assigned
+ * even if we don't use them inside the method */
+
+ compJmpOpUsed = true;
+
+ fgNoStructPromotion = true;
+
+ goto APPEND;
+
+#else // !_TARGET_XARCH_ && !_TARGET_ARMARCH_
+
+ // Import this just like a series of LDARGs + tail. + call + ret
+
+ if (info.compIsVarArgs)
+ {
+ // For now we don't implement true tail calls, so this breaks varargs.
+ // So warn the user instead of generating bad code.
+ // This is a semi-temporary workaround for DevDiv 173860, until we can properly
+ // implement true tail calls.
+ IMPL_LIMITATION("varags + CEE_JMP doesn't work yet");
+ }
+
+ // First load up the arguments (0 - N)
+ for (unsigned argNum = 0; argNum < info.compILargsCount; argNum++)
+ {
+ impLoadArg(argNum, opcodeOffs + sz + 1);
+ }
+
+ // Now generate the tail call
+ noway_assert(prefixFlags == 0);
+ prefixFlags = PREFIX_TAILCALL_EXPLICIT;
+ opcode = CEE_CALL;
+
+ eeGetCallInfo(&resolvedToken, NULL,
+ combine(CORINFO_CALLINFO_ALLOWINSTPARAM, CORINFO_CALLINFO_SECURITYCHECKS), &callInfo);
+
+ // All calls and delegates need a security callout.
+ impHandleAccessAllowed(callInfo.accessAllowed, &callInfo.callsiteCalloutHelper);
+
+ callTyp = impImportCall(CEE_CALL, &resolvedToken, NULL, NULL, PREFIX_TAILCALL_EXPLICIT, &callInfo,
+ opcodeOffs);
+
+ // And finish with the ret
+ goto RET;
+
+#endif // _TARGET_XARCH_ || _TARGET_ARMARCH_
+
+ case CEE_LDELEMA:
+ assertImp(sz == sizeof(unsigned));
+
+ _impResolveToken(CORINFO_TOKENKIND_Class);
+
+ JITDUMP(" %08X", resolvedToken.token);
+
+ ldelemClsHnd = resolvedToken.hClass;
+
+ if (tiVerificationNeeded)
+ {
+ typeInfo tiArray = impStackTop(1).seTypeInfo;
+ typeInfo tiIndex = impStackTop().seTypeInfo;
+
+ // As per ECMA 'index' specified can be either int32 or native int.
+ Verify(tiIndex.IsIntOrNativeIntType(), "bad index");
+
+ typeInfo arrayElemType = verMakeTypeInfo(ldelemClsHnd);
+ Verify(tiArray.IsNullObjRef() ||
+ typeInfo::AreEquivalent(verGetArrayElemType(tiArray), arrayElemType),
+ "bad array");
+
+ tiRetVal = arrayElemType;
+ tiRetVal.MakeByRef();
+ if (prefixFlags & PREFIX_READONLY)
+ {
+ tiRetVal.SetIsReadonlyByRef();
+ }
+
+ // an array interior pointer is always in the heap
+ tiRetVal.SetIsPermanentHomeByRef();
+ }
+
+ // If it's a value class array we just do a simple address-of
+ if (eeIsValueClass(ldelemClsHnd))
+ {
+ CorInfoType cit = info.compCompHnd->getTypeForPrimitiveValueClass(ldelemClsHnd);
+ if (cit == CORINFO_TYPE_UNDEF)
+ {
+ lclTyp = TYP_STRUCT;
+ }
+ else
+ {
+ lclTyp = JITtype2varType(cit);
+ }
+ goto ARR_LD_POST_VERIFY;
+ }
+
+ // Similarly, if its a readonly access, we can do a simple address-of
+ // without doing a runtime type-check
+ if (prefixFlags & PREFIX_READONLY)
+ {
+ lclTyp = TYP_REF;
+ goto ARR_LD_POST_VERIFY;
+ }
+
+ // Otherwise we need the full helper function with run-time type check
+ op1 = impTokenToHandle(&resolvedToken);
+ if (op1 == nullptr)
+ { // compDonotInline()
+ return;
+ }
+
+ args = gtNewArgList(op1); // Type
+ args = gtNewListNode(impPopStack().val, args); // index
+ args = gtNewListNode(impPopStack().val, args); // array
+ op1 = gtNewHelperCallNode(CORINFO_HELP_LDELEMA_REF, TYP_BYREF, GTF_EXCEPT, args);
+
+ impPushOnStack(op1, tiRetVal);
+ break;
+
+ // ldelem for reference and value types
+ case CEE_LDELEM:
+ assertImp(sz == sizeof(unsigned));
+
+ _impResolveToken(CORINFO_TOKENKIND_Class);
+
+ JITDUMP(" %08X", resolvedToken.token);
+
+ ldelemClsHnd = resolvedToken.hClass;
+
+ if (tiVerificationNeeded)
+ {
+ typeInfo tiArray = impStackTop(1).seTypeInfo;
+ typeInfo tiIndex = impStackTop().seTypeInfo;
+
+ // As per ECMA 'index' specified can be either int32 or native int.
+ Verify(tiIndex.IsIntOrNativeIntType(), "bad index");
+ tiRetVal = verMakeTypeInfo(ldelemClsHnd);
+
+ Verify(tiArray.IsNullObjRef() || tiCompatibleWith(verGetArrayElemType(tiArray), tiRetVal, false),
+ "type of array incompatible with type operand");
+ tiRetVal.NormaliseForStack();
+ }
+
+ // If it's a reference type or generic variable type
+ // then just generate code as though it's a ldelem.ref instruction
+ if (!eeIsValueClass(ldelemClsHnd))
+ {
+ lclTyp = TYP_REF;
+ opcode = CEE_LDELEM_REF;
+ }
+ else
+ {
+ CorInfoType jitTyp = info.compCompHnd->asCorInfoType(ldelemClsHnd);
+ lclTyp = JITtype2varType(jitTyp);
+ tiRetVal = verMakeTypeInfo(ldelemClsHnd); // precise type always needed for struct
+ tiRetVal.NormaliseForStack();
+ }
+ goto ARR_LD_POST_VERIFY;
+
+ case CEE_LDELEM_I1:
+ lclTyp = TYP_BYTE;
+ goto ARR_LD;
+ case CEE_LDELEM_I2:
+ lclTyp = TYP_SHORT;
+ goto ARR_LD;
+ case CEE_LDELEM_I:
+ lclTyp = TYP_I_IMPL;
+ goto ARR_LD;
+
+ // Should be UINT, but since no platform widens 4->8 bytes it doesn't matter
+ // and treating it as TYP_INT avoids other asserts.
+ case CEE_LDELEM_U4:
+ lclTyp = TYP_INT;
+ goto ARR_LD;
+
+ case CEE_LDELEM_I4:
+ lclTyp = TYP_INT;
+ goto ARR_LD;
+ case CEE_LDELEM_I8:
+ lclTyp = TYP_LONG;
+ goto ARR_LD;
+ case CEE_LDELEM_REF:
+ lclTyp = TYP_REF;
+ goto ARR_LD;
+ case CEE_LDELEM_R4:
+ lclTyp = TYP_FLOAT;
+ goto ARR_LD;
+ case CEE_LDELEM_R8:
+ lclTyp = TYP_DOUBLE;
+ goto ARR_LD;
+ case CEE_LDELEM_U1:
+ lclTyp = TYP_UBYTE;
+ goto ARR_LD;
+ case CEE_LDELEM_U2:
+ lclTyp = TYP_CHAR;
+ goto ARR_LD;
+
+ ARR_LD:
+
+ if (tiVerificationNeeded)
+ {
+ typeInfo tiArray = impStackTop(1).seTypeInfo;
+ typeInfo tiIndex = impStackTop().seTypeInfo;
+
+ // As per ECMA 'index' specified can be either int32 or native int.
+ Verify(tiIndex.IsIntOrNativeIntType(), "bad index");
+ if (tiArray.IsNullObjRef())
+ {
+ if (lclTyp == TYP_REF)
+ { // we will say a deref of a null array yields a null ref
+ tiRetVal = typeInfo(TI_NULL);
+ }
+ else
+ {
+ tiRetVal = typeInfo(lclTyp);
+ }
+ }
+ else
+ {
+ tiRetVal = verGetArrayElemType(tiArray);
+ typeInfo arrayElemTi = typeInfo(lclTyp);
+#ifdef _TARGET_64BIT_
+ if (opcode == CEE_LDELEM_I)
+ {
+ arrayElemTi = typeInfo::nativeInt();
+ }
+
+ if (lclTyp != TYP_REF && lclTyp != TYP_STRUCT)
+ {
+ Verify(typeInfo::AreEquivalent(tiRetVal, arrayElemTi), "bad array");
+ }
+ else
+#endif // _TARGET_64BIT_
+ {
+ Verify(tiRetVal.IsType(arrayElemTi.GetType()), "bad array");
+ }
+ }
+ tiRetVal.NormaliseForStack();
+ }
+ ARR_LD_POST_VERIFY:
+
+ /* Pull the index value and array address */
+ op2 = impPopStack().val;
+ op1 = impPopStack().val;
+ assertImp(op1->gtType == TYP_REF);
+
+ /* Check for null pointer - in the inliner case we simply abort */
+
+ if (compIsForInlining())
+ {
+ if (op1->gtOper == GT_CNS_INT)
+ {
+ compInlineResult->NoteFatal(InlineObservation::CALLEE_HAS_NULL_FOR_LDELEM);
+ return;
+ }
+ }
+
+ op1 = impCheckForNullPointer(op1);
+
+ /* Mark the block as containing an index expression */
+
+ if (op1->gtOper == GT_LCL_VAR)
+ {
+ if (op2->gtOper == GT_LCL_VAR || op2->gtOper == GT_CNS_INT || op2->gtOper == GT_ADD)
+ {
+ block->bbFlags |= BBF_HAS_IDX_LEN;
+ optMethodFlags |= OMF_HAS_ARRAYREF;
+ }
+ }
+
+ /* Create the index node and push it on the stack */
+
+ op1 = gtNewIndexRef(lclTyp, op1, op2);
+
+ ldstruct = (opcode == CEE_LDELEM && lclTyp == TYP_STRUCT);
+
+ if ((opcode == CEE_LDELEMA) || ldstruct ||
+ (ldelemClsHnd != DUMMY_INIT(NULL) && eeIsValueClass(ldelemClsHnd)))
+ {
+ assert(ldelemClsHnd != DUMMY_INIT(NULL));
+
+ // remember the element size
+ if (lclTyp == TYP_REF)
+ {
+ op1->gtIndex.gtIndElemSize = sizeof(void*);
+ }
+ else
+ {
+ // If ldElemClass is precisely a primitive type, use that, otherwise, preserve the struct type.
+ if (info.compCompHnd->getTypeForPrimitiveValueClass(ldelemClsHnd) == CORINFO_TYPE_UNDEF)
+ {
+ op1->gtIndex.gtStructElemClass = ldelemClsHnd;
+ }
+ assert(lclTyp != TYP_STRUCT || op1->gtIndex.gtStructElemClass != nullptr);
+ if (lclTyp == TYP_STRUCT)
+ {
+ size = info.compCompHnd->getClassSize(ldelemClsHnd);
+ op1->gtIndex.gtIndElemSize = size;
+ op1->gtType = lclTyp;
+ }
+ }
+
+ if ((opcode == CEE_LDELEMA) || ldstruct)
+ {
+ // wrap it in a &
+ lclTyp = TYP_BYREF;
+
+ op1 = gtNewOperNode(GT_ADDR, lclTyp, op1);
+ }
+ else
+ {
+ assert(lclTyp != TYP_STRUCT);
+ }
+ }
+
+ if (ldstruct)
+ {
+ // Create an OBJ for the result
+ op1 = gtNewObjNode(ldelemClsHnd, op1);
+ op1->gtFlags |= GTF_EXCEPT;
+ }
+ impPushOnStack(op1, tiRetVal);
+ break;
+
+ // stelem for reference and value types
+ case CEE_STELEM:
+
+ assertImp(sz == sizeof(unsigned));
+
+ _impResolveToken(CORINFO_TOKENKIND_Class);
+
+ JITDUMP(" %08X", resolvedToken.token);
+
+ stelemClsHnd = resolvedToken.hClass;
+
+ if (tiVerificationNeeded)
+ {
+ typeInfo tiArray = impStackTop(2).seTypeInfo;
+ typeInfo tiIndex = impStackTop(1).seTypeInfo;
+ typeInfo tiValue = impStackTop().seTypeInfo;
+
+ // As per ECMA 'index' specified can be either int32 or native int.
+ Verify(tiIndex.IsIntOrNativeIntType(), "bad index");
+ typeInfo arrayElem = verMakeTypeInfo(stelemClsHnd);
+
+ Verify(tiArray.IsNullObjRef() || tiCompatibleWith(arrayElem, verGetArrayElemType(tiArray), false),
+ "type operand incompatible with array element type");
+ arrayElem.NormaliseForStack();
+ Verify(tiCompatibleWith(tiValue, arrayElem, true), "value incompatible with type operand");
+ }
+
+ // If it's a reference type just behave as though it's a stelem.ref instruction
+ if (!eeIsValueClass(stelemClsHnd))
+ {
+ goto STELEM_REF_POST_VERIFY;
+ }
+
+ // Otherwise extract the type
+ {
+ CorInfoType jitTyp = info.compCompHnd->asCorInfoType(stelemClsHnd);
+ lclTyp = JITtype2varType(jitTyp);
+ goto ARR_ST_POST_VERIFY;
+ }
+
+ case CEE_STELEM_REF:
+
+ if (tiVerificationNeeded)
+ {
+ typeInfo tiArray = impStackTop(2).seTypeInfo;
+ typeInfo tiIndex = impStackTop(1).seTypeInfo;
+ typeInfo tiValue = impStackTop().seTypeInfo;
+
+ // As per ECMA 'index' specified can be either int32 or native int.
+ Verify(tiIndex.IsIntOrNativeIntType(), "bad index");
+ Verify(tiValue.IsObjRef(), "bad value");
+
+ // we only check that it is an object referece, The helper does additional checks
+ Verify(tiArray.IsNullObjRef() || verGetArrayElemType(tiArray).IsType(TI_REF), "bad array");
+ }
+
+ arrayNodeTo = impStackTop(2).val;
+ arrayNodeToIndex = impStackTop(1).val;
+ arrayNodeFrom = impStackTop().val;
+
+ //
+ // Note that it is not legal to optimize away CORINFO_HELP_ARRADDR_ST in a
+ // lot of cases because of covariance. ie. foo[] can be cast to object[].
+ //
+
+ // Check for assignment to same array, ie. arrLcl[i] = arrLcl[j]
+ // This does not need CORINFO_HELP_ARRADDR_ST
+
+ if (arrayNodeFrom->OperGet() == GT_INDEX && arrayNodeFrom->gtOp.gtOp1->gtOper == GT_LCL_VAR &&
+ arrayNodeTo->gtOper == GT_LCL_VAR &&
+ arrayNodeTo->gtLclVarCommon.gtLclNum == arrayNodeFrom->gtOp.gtOp1->gtLclVarCommon.gtLclNum &&
+ !lvaTable[arrayNodeTo->gtLclVarCommon.gtLclNum].lvAddrExposed)
+ {
+ lclTyp = TYP_REF;
+ goto ARR_ST_POST_VERIFY;
+ }
+
+ // Check for assignment of NULL. This does not need CORINFO_HELP_ARRADDR_ST
+
+ if (arrayNodeFrom->OperGet() == GT_CNS_INT)
+ {
+ assert(arrayNodeFrom->gtType == TYP_REF && arrayNodeFrom->gtIntCon.gtIconVal == 0);
+
+ lclTyp = TYP_REF;
+ goto ARR_ST_POST_VERIFY;
+ }
+
+ STELEM_REF_POST_VERIFY:
+
+ /* Call a helper function to do the assignment */
+ op1 = gtNewHelperCallNode(CORINFO_HELP_ARRADDR_ST, TYP_VOID, 0, impPopList(3, &flags, nullptr));
+
+ goto SPILL_APPEND;
+
+ case CEE_STELEM_I1:
+ lclTyp = TYP_BYTE;
+ goto ARR_ST;
+ case CEE_STELEM_I2:
+ lclTyp = TYP_SHORT;
+ goto ARR_ST;
+ case CEE_STELEM_I:
+ lclTyp = TYP_I_IMPL;
+ goto ARR_ST;
+ case CEE_STELEM_I4:
+ lclTyp = TYP_INT;
+ goto ARR_ST;
+ case CEE_STELEM_I8:
+ lclTyp = TYP_LONG;
+ goto ARR_ST;
+ case CEE_STELEM_R4:
+ lclTyp = TYP_FLOAT;
+ goto ARR_ST;
+ case CEE_STELEM_R8:
+ lclTyp = TYP_DOUBLE;
+ goto ARR_ST;
+
+ ARR_ST:
+
+ if (tiVerificationNeeded)
+ {
+ typeInfo tiArray = impStackTop(2).seTypeInfo;
+ typeInfo tiIndex = impStackTop(1).seTypeInfo;
+ typeInfo tiValue = impStackTop().seTypeInfo;
+
+ // As per ECMA 'index' specified can be either int32 or native int.
+ Verify(tiIndex.IsIntOrNativeIntType(), "bad index");
+ typeInfo arrayElem = typeInfo(lclTyp);
+#ifdef _TARGET_64BIT_
+ if (opcode == CEE_STELEM_I)
+ {
+ arrayElem = typeInfo::nativeInt();
+ }
+#endif // _TARGET_64BIT_
+ Verify(tiArray.IsNullObjRef() || typeInfo::AreEquivalent(verGetArrayElemType(tiArray), arrayElem),
+ "bad array");
+
+ Verify(tiCompatibleWith(NormaliseForStack(tiValue), arrayElem.NormaliseForStack(), true),
+ "bad value");
+ }
+
+ ARR_ST_POST_VERIFY:
+ /* The strict order of evaluation is LHS-operands, RHS-operands,
+ range-check, and then assignment. However, codegen currently
+ does the range-check before evaluation the RHS-operands. So to
+ maintain strict ordering, we spill the stack. */
+
+ if (impStackTop().val->gtFlags & GTF_SIDE_EFFECT)
+ {
+ impSpillSideEffects(false, (unsigned)CHECK_SPILL_ALL DEBUGARG(
+ "Strict ordering of exceptions for Array store"));
+ }
+
+ /* Pull the new value from the stack */
+ op2 = impPopStack().val;
+
+ /* Pull the index value */
+ op1 = impPopStack().val;
+
+ /* Pull the array address */
+ op3 = impPopStack().val;
+
+ assertImp(op3->gtType == TYP_REF);
+ if (op2->IsVarAddr())
+ {
+ op2->gtType = TYP_I_IMPL;
+ }
+
+ op3 = impCheckForNullPointer(op3);
+
+ // Mark the block as containing an index expression
+
+ if (op3->gtOper == GT_LCL_VAR)
+ {
+ if (op1->gtOper == GT_LCL_VAR || op1->gtOper == GT_CNS_INT || op1->gtOper == GT_ADD)
+ {
+ block->bbFlags |= BBF_HAS_IDX_LEN;
+ optMethodFlags |= OMF_HAS_ARRAYREF;
+ }
+ }
+
+ /* Create the index node */
+
+ op1 = gtNewIndexRef(lclTyp, op3, op1);
+
+ /* Create the assignment node and append it */
+
+ if (lclTyp == TYP_STRUCT)
+ {
+ assert(stelemClsHnd != DUMMY_INIT(NULL));
+
+ op1->gtIndex.gtStructElemClass = stelemClsHnd;
+ op1->gtIndex.gtIndElemSize = info.compCompHnd->getClassSize(stelemClsHnd);
+ }
+ if (varTypeIsStruct(op1))
+ {
+ op1 = impAssignStruct(op1, op2, stelemClsHnd, (unsigned)CHECK_SPILL_ALL);
+ }
+ else
+ {
+ op2 = impImplicitR4orR8Cast(op2, op1->TypeGet());
+ op1 = gtNewAssignNode(op1, op2);
+ }
+
+ /* Mark the expression as containing an assignment */
+
+ op1->gtFlags |= GTF_ASG;
+
+ goto SPILL_APPEND;
+
+ case CEE_ADD:
+ oper = GT_ADD;
+ goto MATH_OP2;
+
+ case CEE_ADD_OVF:
+ uns = false;
+ goto ADD_OVF;
+ case CEE_ADD_OVF_UN:
+ uns = true;
+ goto ADD_OVF;
+
+ ADD_OVF:
+ ovfl = true;
+ callNode = false;
+ oper = GT_ADD;
+ goto MATH_OP2_FLAGS;
+
+ case CEE_SUB:
+ oper = GT_SUB;
+ goto MATH_OP2;
+
+ case CEE_SUB_OVF:
+ uns = false;
+ goto SUB_OVF;
+ case CEE_SUB_OVF_UN:
+ uns = true;
+ goto SUB_OVF;
+
+ SUB_OVF:
+ ovfl = true;
+ callNode = false;
+ oper = GT_SUB;
+ goto MATH_OP2_FLAGS;
+
+ case CEE_MUL:
+ oper = GT_MUL;
+ goto MATH_MAYBE_CALL_NO_OVF;
+
+ case CEE_MUL_OVF:
+ uns = false;
+ goto MUL_OVF;
+ case CEE_MUL_OVF_UN:
+ uns = true;
+ goto MUL_OVF;
+
+ MUL_OVF:
+ ovfl = true;
+ oper = GT_MUL;
+ goto MATH_MAYBE_CALL_OVF;
+
+ // Other binary math operations
+
+ case CEE_DIV:
+ oper = GT_DIV;
+ goto MATH_MAYBE_CALL_NO_OVF;
+
+ case CEE_DIV_UN:
+ oper = GT_UDIV;
+ goto MATH_MAYBE_CALL_NO_OVF;
+
+ case CEE_REM:
+ oper = GT_MOD;
+ goto MATH_MAYBE_CALL_NO_OVF;
+
+ case CEE_REM_UN:
+ oper = GT_UMOD;
+ goto MATH_MAYBE_CALL_NO_OVF;
+
+ MATH_MAYBE_CALL_NO_OVF:
+ ovfl = false;
+ MATH_MAYBE_CALL_OVF:
+ // Morpher has some complex logic about when to turn different
+ // typed nodes on different platforms into helper calls. We
+ // need to either duplicate that logic here, or just
+ // pessimistically make all the nodes large enough to become
+ // call nodes. Since call nodes aren't that much larger and
+ // these opcodes are infrequent enough I chose the latter.
+ callNode = true;
+ goto MATH_OP2_FLAGS;
+
+ case CEE_AND:
+ oper = GT_AND;
+ goto MATH_OP2;
+ case CEE_OR:
+ oper = GT_OR;
+ goto MATH_OP2;
+ case CEE_XOR:
+ oper = GT_XOR;
+ goto MATH_OP2;
+
+ MATH_OP2: // For default values of 'ovfl' and 'callNode'
+
+ ovfl = false;
+ callNode = false;
+
+ MATH_OP2_FLAGS: // If 'ovfl' and 'callNode' have already been set
+
+ /* Pull two values and push back the result */
+
+ if (tiVerificationNeeded)
+ {
+ const typeInfo& tiOp1 = impStackTop(1).seTypeInfo;
+ const typeInfo& tiOp2 = impStackTop().seTypeInfo;
+
+ Verify(tiCompatibleWith(tiOp1, tiOp2, true), "different arg type");
+ if (oper == GT_ADD || oper == GT_DIV || oper == GT_SUB || oper == GT_MUL || oper == GT_MOD)
+ {
+ Verify(tiOp1.IsNumberType(), "not number");
+ }
+ else
+ {
+ Verify(tiOp1.IsIntegerType(), "not integer");
+ }
+
+ Verify(!ovfl || tiOp1.IsIntegerType(), "not integer");
+
+ tiRetVal = tiOp1;
+
+#ifdef _TARGET_64BIT_
+ if (tiOp2.IsNativeIntType())
+ {
+ tiRetVal = tiOp2;
+ }
+#endif // _TARGET_64BIT_
+ }
+
+ op2 = impPopStack().val;
+ op1 = impPopStack().val;
+
+#if !CPU_HAS_FP_SUPPORT
+ if (varTypeIsFloating(op1->gtType))
+ {
+ callNode = true;
+ }
+#endif
+ /* Can't do arithmetic with references */
+ assertImp(genActualType(op1->TypeGet()) != TYP_REF && genActualType(op2->TypeGet()) != TYP_REF);
+
+ // Change both to TYP_I_IMPL (impBashVarAddrsToI won't change if its a true byref, only
+ // if it is in the stack)
+ impBashVarAddrsToI(op1, op2);
+
+ type = impGetByRefResultType(oper, uns, &op1, &op2);
+
+ assert(!ovfl || !varTypeIsFloating(op1->gtType));
+
+ /* Special case: "int+0", "int-0", "int*1", "int/1" */
+
+ if (op2->gtOper == GT_CNS_INT)
+ {
+ if ((op2->IsIntegralConst(0) && (oper == GT_ADD || oper == GT_SUB)) ||
+ (op2->IsIntegralConst(1) && (oper == GT_MUL || oper == GT_DIV)))
+
+ {
+ impPushOnStack(op1, tiRetVal);
+ break;
+ }
+ }
+
+#if !FEATURE_X87_DOUBLES
+ // We can generate a TYP_FLOAT operation that has a TYP_DOUBLE operand
+ //
+ if (varTypeIsFloating(type) && varTypeIsFloating(op1->gtType) && varTypeIsFloating(op2->gtType))
+ {
+ if (op1->TypeGet() != type)
+ {
+ // We insert a cast of op1 to 'type'
+ op1 = gtNewCastNode(type, op1, type);
+ }
+ if (op2->TypeGet() != type)
+ {
+ // We insert a cast of op2 to 'type'
+ op2 = gtNewCastNode(type, op2, type);
+ }
+ }
+#endif // !FEATURE_X87_DOUBLES
+
+#if SMALL_TREE_NODES
+ if (callNode)
+ {
+ /* These operators can later be transformed into 'GT_CALL' */
+
+ assert(GenTree::s_gtNodeSizes[GT_CALL] > GenTree::s_gtNodeSizes[GT_MUL]);
+#ifndef _TARGET_ARM_
+ assert(GenTree::s_gtNodeSizes[GT_CALL] > GenTree::s_gtNodeSizes[GT_DIV]);
+ assert(GenTree::s_gtNodeSizes[GT_CALL] > GenTree::s_gtNodeSizes[GT_UDIV]);
+ assert(GenTree::s_gtNodeSizes[GT_CALL] > GenTree::s_gtNodeSizes[GT_MOD]);
+ assert(GenTree::s_gtNodeSizes[GT_CALL] > GenTree::s_gtNodeSizes[GT_UMOD]);
+#endif
+ // It's tempting to use LargeOpOpcode() here, but this logic is *not* saying
+ // that we'll need to transform into a general large node, but rather specifically
+ // to a call: by doing it this way, things keep working if there are multiple sizes,
+ // and a CALL is no longer the largest.
+ // That said, as of now it *is* a large node, so we'll do this with an assert rather
+ // than an "if".
+ assert(GenTree::s_gtNodeSizes[GT_CALL] == TREE_NODE_SZ_LARGE);
+ op1 = new (this, GT_CALL) GenTreeOp(oper, type, op1, op2 DEBUGARG(/*largeNode*/ true));
+ }
+ else
+#endif // SMALL_TREE_NODES
+ {
+ op1 = gtNewOperNode(oper, type, op1, op2);
+ }
+
+ /* Special case: integer/long division may throw an exception */
+
+ if (varTypeIsIntegral(op1->TypeGet()) && op1->OperMayThrow())
+ {
+ op1->gtFlags |= GTF_EXCEPT;
+ }
+
+ if (ovfl)
+ {
+ assert(oper == GT_ADD || oper == GT_SUB || oper == GT_MUL);
+ if (ovflType != TYP_UNKNOWN)
+ {
+ op1->gtType = ovflType;
+ }
+ op1->gtFlags |= (GTF_EXCEPT | GTF_OVERFLOW);
+ if (uns)
+ {
+ op1->gtFlags |= GTF_UNSIGNED;
+ }
+ }
+
+ impPushOnStack(op1, tiRetVal);
+ break;
+
+ case CEE_SHL:
+ oper = GT_LSH;
+ goto CEE_SH_OP2;
+
+ case CEE_SHR:
+ oper = GT_RSH;
+ goto CEE_SH_OP2;
+ case CEE_SHR_UN:
+ oper = GT_RSZ;
+ goto CEE_SH_OP2;
+
+ CEE_SH_OP2:
+ if (tiVerificationNeeded)
+ {
+ const typeInfo& tiVal = impStackTop(1).seTypeInfo;
+ const typeInfo& tiShift = impStackTop(0).seTypeInfo;
+ Verify(tiVal.IsIntegerType() && tiShift.IsType(TI_INT), "Bad shift args");
+ tiRetVal = tiVal;
+ }
+ op2 = impPopStack().val;
+ op1 = impPopStack().val; // operand to be shifted
+ impBashVarAddrsToI(op1, op2);
+
+ type = genActualType(op1->TypeGet());
+ op1 = gtNewOperNode(oper, type, op1, op2);
+
+ impPushOnStack(op1, tiRetVal);
+ break;
+
+ case CEE_NOT:
+ if (tiVerificationNeeded)
+ {
+ tiRetVal = impStackTop().seTypeInfo;
+ Verify(tiRetVal.IsIntegerType(), "bad int value");
+ }
+
+ op1 = impPopStack().val;
+ impBashVarAddrsToI(op1, nullptr);
+ type = genActualType(op1->TypeGet());
+ impPushOnStack(gtNewOperNode(GT_NOT, type, op1), tiRetVal);
+ break;
+
+ case CEE_CKFINITE:
+ if (tiVerificationNeeded)
+ {
+ tiRetVal = impStackTop().seTypeInfo;
+ Verify(tiRetVal.IsType(TI_DOUBLE), "bad R value");
+ }
+ op1 = impPopStack().val;
+ type = op1->TypeGet();
+ op1 = gtNewOperNode(GT_CKFINITE, type, op1);
+ op1->gtFlags |= GTF_EXCEPT;
+
+ impPushOnStack(op1, tiRetVal);
+ break;
+
+ case CEE_LEAVE:
+
+ val = getI4LittleEndian(codeAddr); // jump distance
+ jmpAddr = (IL_OFFSET)((codeAddr - info.compCode + sizeof(__int32)) + val);
+ goto LEAVE;
+
+ case CEE_LEAVE_S:
+ val = getI1LittleEndian(codeAddr); // jump distance
+ jmpAddr = (IL_OFFSET)((codeAddr - info.compCode + sizeof(__int8)) + val);
+
+ LEAVE:
+
+ if (compIsForInlining())
+ {
+ compInlineResult->NoteFatal(InlineObservation::CALLEE_HAS_LEAVE);
+ return;
+ }
+
+ JITDUMP(" %04X", jmpAddr);
+ if (block->bbJumpKind != BBJ_LEAVE)
+ {
+ impResetLeaveBlock(block, jmpAddr);
+ }
+
+ assert(jmpAddr == block->bbJumpDest->bbCodeOffs);
+ impImportLeave(block);
+ impNoteBranchOffs();
+
+ break;
+
+ case CEE_BR:
+ case CEE_BR_S:
+ jmpDist = (sz == 1) ? getI1LittleEndian(codeAddr) : getI4LittleEndian(codeAddr);
+
+ if (compIsForInlining() && jmpDist == 0)
+ {
+ break; /* NOP */
+ }
+
+ impNoteBranchOffs();
+ break;
+
+ case CEE_BRTRUE:
+ case CEE_BRTRUE_S:
+ case CEE_BRFALSE:
+ case CEE_BRFALSE_S:
+
+ /* Pop the comparand (now there's a neat term) from the stack */
+ if (tiVerificationNeeded)
+ {
+ typeInfo& tiVal = impStackTop().seTypeInfo;
+ Verify(tiVal.IsObjRef() || tiVal.IsByRef() || tiVal.IsIntegerType() || tiVal.IsMethod(),
+ "bad value");
+ }
+
+ op1 = impPopStack().val;
+ type = op1->TypeGet();
+
+ // brfalse and brtrue is only allowed on I4, refs, and byrefs.
+ if (!opts.MinOpts() && !opts.compDbgCode && block->bbJumpDest == block->bbNext)
+ {
+ block->bbJumpKind = BBJ_NONE;
+
+ if (op1->gtFlags & GTF_GLOB_EFFECT)
+ {
+ op1 = gtUnusedValNode(op1);
+ goto SPILL_APPEND;
+ }
+ else
+ {
+ break;
+ }
+ }
+
+ if (op1->OperIsCompare())
+ {
+ if (opcode == CEE_BRFALSE || opcode == CEE_BRFALSE_S)
+ {
+ // Flip the sense of the compare
+
+ op1 = gtReverseCond(op1);
+ }
+ }
+ else
+ {
+ /* We'll compare against an equally-sized integer 0 */
+ /* For small types, we always compare against int */
+ op2 = gtNewZeroConNode(genActualType(op1->gtType));
+
+ /* Create the comparison operator and try to fold it */
+
+ oper = (opcode == CEE_BRTRUE || opcode == CEE_BRTRUE_S) ? GT_NE : GT_EQ;
+ op1 = gtNewOperNode(oper, TYP_INT, op1, op2);
+ }
+
+ // fall through
+
+ COND_JUMP:
+
+ seenConditionalJump = true;
+
+ /* Fold comparison if we can */
+
+ op1 = gtFoldExpr(op1);
+
+ /* Try to fold the really simple cases like 'iconst *, ifne/ifeq'*/
+ /* Don't make any blocks unreachable in import only mode */
+
+ if ((op1->gtOper == GT_CNS_INT) && !compIsForImportOnly())
+ {
+ /* gtFoldExpr() should prevent this as we don't want to make any blocks
+ unreachable under compDbgCode */
+ assert(!opts.compDbgCode);
+
+ BBjumpKinds foldedJumpKind = (BBjumpKinds)(op1->gtIntCon.gtIconVal ? BBJ_ALWAYS : BBJ_NONE);
+ assertImp((block->bbJumpKind == BBJ_COND) // normal case
+ || (block->bbJumpKind == foldedJumpKind)); // this can happen if we are reimporting the
+ // block for the second time
+
+ block->bbJumpKind = foldedJumpKind;
+#ifdef DEBUG
+ if (verbose)
+ {
+ if (op1->gtIntCon.gtIconVal)
+ {
+ printf("\nThe conditional jump becomes an unconditional jump to BB%02u\n",
+ block->bbJumpDest->bbNum);
+ }
+ else
+ {
+ printf("\nThe block falls through into the next BB%02u\n", block->bbNext->bbNum);
+ }
+ }
+#endif
+ break;
+ }
+
+ op1 = gtNewOperNode(GT_JTRUE, TYP_VOID, op1);
+
+ /* GT_JTRUE is handled specially for non-empty stacks. See 'addStmt'
+ in impImportBlock(block). For correct line numbers, spill stack. */
+
+ if (opts.compDbgCode && impCurStmtOffs != BAD_IL_OFFSET)
+ {
+ impSpillStackEnsure(true);
+ }
+
+ goto SPILL_APPEND;
+
+ case CEE_CEQ:
+ oper = GT_EQ;
+ uns = false;
+ goto CMP_2_OPs;
+ case CEE_CGT_UN:
+ oper = GT_GT;
+ uns = true;
+ goto CMP_2_OPs;
+ case CEE_CGT:
+ oper = GT_GT;
+ uns = false;
+ goto CMP_2_OPs;
+ case CEE_CLT_UN:
+ oper = GT_LT;
+ uns = true;
+ goto CMP_2_OPs;
+ case CEE_CLT:
+ oper = GT_LT;
+ uns = false;
+ goto CMP_2_OPs;
+
+ CMP_2_OPs:
+ if (tiVerificationNeeded)
+ {
+ verVerifyCond(impStackTop(1).seTypeInfo, impStackTop().seTypeInfo, opcode);
+ tiRetVal = typeInfo(TI_INT);
+ }
+
+ op2 = impPopStack().val;
+ op1 = impPopStack().val;
+
+#ifdef _TARGET_64BIT_
+ if (varTypeIsI(op1->TypeGet()) && (genActualType(op2->TypeGet()) == TYP_INT))
+ {
+ op2 = gtNewCastNode(TYP_I_IMPL, op2, (var_types)(uns ? TYP_U_IMPL : TYP_I_IMPL));
+ }
+ else if (varTypeIsI(op2->TypeGet()) && (genActualType(op1->TypeGet()) == TYP_INT))
+ {
+ op1 = gtNewCastNode(TYP_I_IMPL, op1, (var_types)(uns ? TYP_U_IMPL : TYP_I_IMPL));
+ }
+#endif // _TARGET_64BIT_
+
+ assertImp(genActualType(op1->TypeGet()) == genActualType(op2->TypeGet()) ||
+ varTypeIsI(op1->TypeGet()) && varTypeIsI(op2->TypeGet()) ||
+ varTypeIsFloating(op1->gtType) && varTypeIsFloating(op2->gtType));
+
+ /* Create the comparison node */
+
+ op1 = gtNewOperNode(oper, TYP_INT, op1, op2);
+
+ /* TODO: setting both flags when only one is appropriate */
+ if (opcode == CEE_CGT_UN || opcode == CEE_CLT_UN)
+ {
+ op1->gtFlags |= GTF_RELOP_NAN_UN | GTF_UNSIGNED;
+ }
+
+ impPushOnStack(op1, tiRetVal);
+ break;
+
+ case CEE_BEQ_S:
+ case CEE_BEQ:
+ oper = GT_EQ;
+ goto CMP_2_OPs_AND_BR;
+
+ case CEE_BGE_S:
+ case CEE_BGE:
+ oper = GT_GE;
+ goto CMP_2_OPs_AND_BR;
+
+ case CEE_BGE_UN_S:
+ case CEE_BGE_UN:
+ oper = GT_GE;
+ goto CMP_2_OPs_AND_BR_UN;
+
+ case CEE_BGT_S:
+ case CEE_BGT:
+ oper = GT_GT;
+ goto CMP_2_OPs_AND_BR;
+
+ case CEE_BGT_UN_S:
+ case CEE_BGT_UN:
+ oper = GT_GT;
+ goto CMP_2_OPs_AND_BR_UN;
+
+ case CEE_BLE_S:
+ case CEE_BLE:
+ oper = GT_LE;
+ goto CMP_2_OPs_AND_BR;
+
+ case CEE_BLE_UN_S:
+ case CEE_BLE_UN:
+ oper = GT_LE;
+ goto CMP_2_OPs_AND_BR_UN;
+
+ case CEE_BLT_S:
+ case CEE_BLT:
+ oper = GT_LT;
+ goto CMP_2_OPs_AND_BR;
+
+ case CEE_BLT_UN_S:
+ case CEE_BLT_UN:
+ oper = GT_LT;
+ goto CMP_2_OPs_AND_BR_UN;
+
+ case CEE_BNE_UN_S:
+ case CEE_BNE_UN:
+ oper = GT_NE;
+ goto CMP_2_OPs_AND_BR_UN;
+
+ CMP_2_OPs_AND_BR_UN:
+ uns = true;
+ unordered = true;
+ goto CMP_2_OPs_AND_BR_ALL;
+ CMP_2_OPs_AND_BR:
+ uns = false;
+ unordered = false;
+ goto CMP_2_OPs_AND_BR_ALL;
+ CMP_2_OPs_AND_BR_ALL:
+
+ if (tiVerificationNeeded)
+ {
+ verVerifyCond(impStackTop(1).seTypeInfo, impStackTop().seTypeInfo, opcode);
+ }
+
+ /* Pull two values */
+ op2 = impPopStack().val;
+ op1 = impPopStack().val;
+
+#ifdef _TARGET_64BIT_
+ if ((op1->TypeGet() == TYP_I_IMPL) && (genActualType(op2->TypeGet()) == TYP_INT))
+ {
+ op2 = gtNewCastNode(TYP_I_IMPL, op2, (var_types)(uns ? TYP_U_IMPL : TYP_I_IMPL));
+ }
+ else if ((op2->TypeGet() == TYP_I_IMPL) && (genActualType(op1->TypeGet()) == TYP_INT))
+ {
+ op1 = gtNewCastNode(TYP_I_IMPL, op1, (var_types)(uns ? TYP_U_IMPL : TYP_I_IMPL));
+ }
+#endif // _TARGET_64BIT_
+
+ assertImp(genActualType(op1->TypeGet()) == genActualType(op2->TypeGet()) ||
+ varTypeIsI(op1->TypeGet()) && varTypeIsI(op2->TypeGet()) ||
+ varTypeIsFloating(op1->gtType) && varTypeIsFloating(op2->gtType));
+
+ if (!opts.MinOpts() && !opts.compDbgCode && block->bbJumpDest == block->bbNext)
+ {
+ block->bbJumpKind = BBJ_NONE;
+
+ if (op1->gtFlags & GTF_GLOB_EFFECT)
+ {
+ impSpillSideEffects(false, (unsigned)CHECK_SPILL_ALL DEBUGARG(
+ "Branch to next Optimization, op1 side effect"));
+ impAppendTree(gtUnusedValNode(op1), (unsigned)CHECK_SPILL_NONE, impCurStmtOffs);
+ }
+ if (op2->gtFlags & GTF_GLOB_EFFECT)
+ {
+ impSpillSideEffects(false, (unsigned)CHECK_SPILL_ALL DEBUGARG(
+ "Branch to next Optimization, op2 side effect"));
+ impAppendTree(gtUnusedValNode(op2), (unsigned)CHECK_SPILL_NONE, impCurStmtOffs);
+ }
+
+#ifdef DEBUG
+ if ((op1->gtFlags | op2->gtFlags) & GTF_GLOB_EFFECT)
+ {
+ impNoteLastILoffs();
+ }
+#endif
+ break;
+ }
+#if !FEATURE_X87_DOUBLES
+ // We can generate an compare of different sized floating point op1 and op2
+ // We insert a cast
+ //
+ if (varTypeIsFloating(op1->TypeGet()))
+ {
+ if (op1->TypeGet() != op2->TypeGet())
+ {
+ assert(varTypeIsFloating(op2->TypeGet()));
+
+ // say op1=double, op2=float. To avoid loss of precision
+ // while comparing, op2 is converted to double and double
+ // comparison is done.
+ if (op1->TypeGet() == TYP_DOUBLE)
+ {
+ // We insert a cast of op2 to TYP_DOUBLE
+ op2 = gtNewCastNode(TYP_DOUBLE, op2, TYP_DOUBLE);
+ }
+ else if (op2->TypeGet() == TYP_DOUBLE)
+ {
+ // We insert a cast of op1 to TYP_DOUBLE
+ op1 = gtNewCastNode(TYP_DOUBLE, op1, TYP_DOUBLE);
+ }
+ }
+ }
+#endif // !FEATURE_X87_DOUBLES
+
+ /* Create and append the operator */
+
+ op1 = gtNewOperNode(oper, TYP_INT, op1, op2);
+
+ if (uns)
+ {
+ op1->gtFlags |= GTF_UNSIGNED;
+ }
+
+ if (unordered)
+ {
+ op1->gtFlags |= GTF_RELOP_NAN_UN;
+ }
+
+ goto COND_JUMP;
+
+ case CEE_SWITCH:
+ assert(!compIsForInlining());
+
+ if (tiVerificationNeeded)
+ {
+ Verify(impStackTop().seTypeInfo.IsType(TI_INT), "Bad switch val");
+ }
+ /* Pop the switch value off the stack */
+ op1 = impPopStack().val;
+ assertImp(genActualTypeIsIntOrI(op1->TypeGet()));
+
+#ifdef _TARGET_64BIT_
+ // Widen 'op1' on 64-bit targets
+ if (op1->TypeGet() != TYP_I_IMPL)
+ {
+ if (op1->OperGet() == GT_CNS_INT)
+ {
+ op1->gtType = TYP_I_IMPL;
+ }
+ else
+ {
+ op1 = gtNewCastNode(TYP_I_IMPL, op1, TYP_I_IMPL);
+ }
+ }
+#endif // _TARGET_64BIT_
+ assert(genActualType(op1->TypeGet()) == TYP_I_IMPL);
+
+ /* We can create a switch node */
+
+ op1 = gtNewOperNode(GT_SWITCH, TYP_VOID, op1);
+
+ val = (int)getU4LittleEndian(codeAddr);
+ codeAddr += 4 + val * 4; // skip over the switch-table
+
+ goto SPILL_APPEND;
+
+ /************************** Casting OPCODES ***************************/
+
+ case CEE_CONV_OVF_I1:
+ lclTyp = TYP_BYTE;
+ goto CONV_OVF;
+ case CEE_CONV_OVF_I2:
+ lclTyp = TYP_SHORT;
+ goto CONV_OVF;
+ case CEE_CONV_OVF_I:
+ lclTyp = TYP_I_IMPL;
+ goto CONV_OVF;
+ case CEE_CONV_OVF_I4:
+ lclTyp = TYP_INT;
+ goto CONV_OVF;
+ case CEE_CONV_OVF_I8:
+ lclTyp = TYP_LONG;
+ goto CONV_OVF;
+
+ case CEE_CONV_OVF_U1:
+ lclTyp = TYP_UBYTE;
+ goto CONV_OVF;
+ case CEE_CONV_OVF_U2:
+ lclTyp = TYP_CHAR;
+ goto CONV_OVF;
+ case CEE_CONV_OVF_U:
+ lclTyp = TYP_U_IMPL;
+ goto CONV_OVF;
+ case CEE_CONV_OVF_U4:
+ lclTyp = TYP_UINT;
+ goto CONV_OVF;
+ case CEE_CONV_OVF_U8:
+ lclTyp = TYP_ULONG;
+ goto CONV_OVF;
+
+ case CEE_CONV_OVF_I1_UN:
+ lclTyp = TYP_BYTE;
+ goto CONV_OVF_UN;
+ case CEE_CONV_OVF_I2_UN:
+ lclTyp = TYP_SHORT;
+ goto CONV_OVF_UN;
+ case CEE_CONV_OVF_I_UN:
+ lclTyp = TYP_I_IMPL;
+ goto CONV_OVF_UN;
+ case CEE_CONV_OVF_I4_UN:
+ lclTyp = TYP_INT;
+ goto CONV_OVF_UN;
+ case CEE_CONV_OVF_I8_UN:
+ lclTyp = TYP_LONG;
+ goto CONV_OVF_UN;
+
+ case CEE_CONV_OVF_U1_UN:
+ lclTyp = TYP_UBYTE;
+ goto CONV_OVF_UN;
+ case CEE_CONV_OVF_U2_UN:
+ lclTyp = TYP_CHAR;
+ goto CONV_OVF_UN;
+ case CEE_CONV_OVF_U_UN:
+ lclTyp = TYP_U_IMPL;
+ goto CONV_OVF_UN;
+ case CEE_CONV_OVF_U4_UN:
+ lclTyp = TYP_UINT;
+ goto CONV_OVF_UN;
+ case CEE_CONV_OVF_U8_UN:
+ lclTyp = TYP_ULONG;
+ goto CONV_OVF_UN;
+
+ CONV_OVF_UN:
+ uns = true;
+ goto CONV_OVF_COMMON;
+ CONV_OVF:
+ uns = false;
+ goto CONV_OVF_COMMON;
+
+ CONV_OVF_COMMON:
+ ovfl = true;
+ goto _CONV;
+
+ case CEE_CONV_I1:
+ lclTyp = TYP_BYTE;
+ goto CONV;
+ case CEE_CONV_I2:
+ lclTyp = TYP_SHORT;
+ goto CONV;
+ case CEE_CONV_I:
+ lclTyp = TYP_I_IMPL;
+ goto CONV;
+ case CEE_CONV_I4:
+ lclTyp = TYP_INT;
+ goto CONV;
+ case CEE_CONV_I8:
+ lclTyp = TYP_LONG;
+ goto CONV;
+
+ case CEE_CONV_U1:
+ lclTyp = TYP_UBYTE;
+ goto CONV;
+ case CEE_CONV_U2:
+ lclTyp = TYP_CHAR;
+ goto CONV;
+#if (REGSIZE_BYTES == 8)
+ case CEE_CONV_U:
+ lclTyp = TYP_U_IMPL;
+ goto CONV_UN;
+#else
+ case CEE_CONV_U:
+ lclTyp = TYP_U_IMPL;
+ goto CONV;
+#endif
+ case CEE_CONV_U4:
+ lclTyp = TYP_UINT;
+ goto CONV;
+ case CEE_CONV_U8:
+ lclTyp = TYP_ULONG;
+ goto CONV_UN;
+
+ case CEE_CONV_R4:
+ lclTyp = TYP_FLOAT;
+ goto CONV;
+ case CEE_CONV_R8:
+ lclTyp = TYP_DOUBLE;
+ goto CONV;
+
+ case CEE_CONV_R_UN:
+ lclTyp = TYP_DOUBLE;
+ goto CONV_UN;
+
+ CONV_UN:
+ uns = true;
+ ovfl = false;
+ goto _CONV;
+
+ CONV:
+ uns = false;
+ ovfl = false;
+ goto _CONV;
+
+ _CONV:
+ // just check that we have a number on the stack
+ if (tiVerificationNeeded)
+ {
+ const typeInfo& tiVal = impStackTop().seTypeInfo;
+ Verify(tiVal.IsNumberType(), "bad arg");
+
+#ifdef _TARGET_64BIT_
+ bool isNative = false;
+
+ switch (opcode)
+ {
+ case CEE_CONV_OVF_I:
+ case CEE_CONV_OVF_I_UN:
+ case CEE_CONV_I:
+ case CEE_CONV_OVF_U:
+ case CEE_CONV_OVF_U_UN:
+ case CEE_CONV_U:
+ isNative = true;
+ default:
+ // leave 'isNative' = false;
+ break;
+ }
+ if (isNative)
+ {
+ tiRetVal = typeInfo::nativeInt();
+ }
+ else
+#endif // _TARGET_64BIT_
+ {
+ tiRetVal = typeInfo(lclTyp).NormaliseForStack();
+ }
+ }
+
+ // only converts from FLOAT or DOUBLE to an integer type
+ // and converts from ULONG (or LONG on ARM) to DOUBLE are morphed to calls
+
+ if (varTypeIsFloating(lclTyp))
+ {
+ callNode = varTypeIsLong(impStackTop().val) || uns // uint->dbl gets turned into uint->long->dbl
+#ifdef _TARGET_64BIT_
+ // TODO-ARM64-Bug?: This was AMD64; I enabled it for ARM64 also. OK?
+ // TYP_BYREF could be used as TYP_I_IMPL which is long.
+ // TODO-CQ: remove this when we lower casts long/ulong --> float/double
+ // and generate SSE2 code instead of going through helper calls.
+ || (impStackTop().val->TypeGet() == TYP_BYREF)
+#endif
+ ;
+ }
+ else
+ {
+ callNode = varTypeIsFloating(impStackTop().val->TypeGet());
+ }
+
+ // At this point uns, ovf, callNode all set
+
+ op1 = impPopStack().val;
+ impBashVarAddrsToI(op1);
+
+ if (varTypeIsSmall(lclTyp) && !ovfl && op1->gtType == TYP_INT && op1->gtOper == GT_AND)
+ {
+ op2 = op1->gtOp.gtOp2;
+
+ if (op2->gtOper == GT_CNS_INT)
+ {
+ ssize_t ival = op2->gtIntCon.gtIconVal;
+ ssize_t mask, umask;
+
+ switch (lclTyp)
+ {
+ case TYP_BYTE:
+ case TYP_UBYTE:
+ mask = 0x00FF;
+ umask = 0x007F;
+ break;
+ case TYP_CHAR:
+ case TYP_SHORT:
+ mask = 0xFFFF;
+ umask = 0x7FFF;
+ break;
+
+ default:
+ assert(!"unexpected type");
+ return;
+ }
+
+ if (((ival & umask) == ival) || ((ival & mask) == ival && uns))
+ {
+ /* Toss the cast, it's a waste of time */
+
+ impPushOnStack(op1, tiRetVal);
+ break;
+ }
+ else if (ival == mask)
+ {
+ /* Toss the masking, it's a waste of time, since
+ we sign-extend from the small value anyways */
+
+ op1 = op1->gtOp.gtOp1;
+ }
+ }
+ }
+
+ /* The 'op2' sub-operand of a cast is the 'real' type number,
+ since the result of a cast to one of the 'small' integer
+ types is an integer.
+ */
+
+ type = genActualType(lclTyp);
+
+#if SMALL_TREE_NODES
+ if (callNode)
+ {
+ op1 = gtNewCastNodeL(type, op1, lclTyp);
+ }
+ else
+#endif // SMALL_TREE_NODES
+ {
+ op1 = gtNewCastNode(type, op1, lclTyp);
+ }
+
+ if (ovfl)
+ {
+ op1->gtFlags |= (GTF_OVERFLOW | GTF_EXCEPT);
+ }
+ if (uns)
+ {
+ op1->gtFlags |= GTF_UNSIGNED;
+ }
+ impPushOnStack(op1, tiRetVal);
+ break;
+
+ case CEE_NEG:
+ if (tiVerificationNeeded)
+ {
+ tiRetVal = impStackTop().seTypeInfo;
+ Verify(tiRetVal.IsNumberType(), "Bad arg");
+ }
+
+ op1 = impPopStack().val;
+ impBashVarAddrsToI(op1, nullptr);
+ impPushOnStack(gtNewOperNode(GT_NEG, genActualType(op1->gtType), op1), tiRetVal);
+ break;
+
+ case CEE_POP:
+ if (tiVerificationNeeded)
+ {
+ impStackTop(0);
+ }
+
+ /* Pull the top value from the stack */
+
+ op1 = impPopStack(clsHnd).val;
+
+ /* Get hold of the type of the value being duplicated */
+
+ lclTyp = genActualType(op1->gtType);
+
+ /* Does the value have any side effects? */
+
+ if ((op1->gtFlags & GTF_SIDE_EFFECT) || opts.compDbgCode)
+ {
+ // Since we are throwing away the value, just normalize
+ // it to its address. This is more efficient.
+
+ if (varTypeIsStruct(op1))
+ {
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ // Non-calls, such as obj or ret_expr, have to go through this.
+ // Calls with large struct return value have to go through this.
+ // Helper calls with small struct return value also have to go
+ // through this since they do not follow Unix calling convention.
+ if (op1->gtOper != GT_CALL || !IsMultiRegReturnedType(clsHnd) ||
+ op1->AsCall()->gtCallType == CT_HELPER)
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+ {
+ op1 = impGetStructAddr(op1, clsHnd, (unsigned)CHECK_SPILL_ALL, false);
+ }
+ }
+
+ // If op1 is non-overflow cast, throw it away since it is useless.
+ // Another reason for throwing away the useless cast is in the context of
+ // implicit tail calls when the operand of pop is GT_CAST(GT_CALL(..)).
+ // The cast gets added as part of importing GT_CALL, which gets in the way
+ // of fgMorphCall() on the forms of tail call nodes that we assert.
+ if ((op1->gtOper == GT_CAST) && !op1->gtOverflow())
+ {
+ op1 = op1->gtOp.gtOp1;
+ }
+
+ // If 'op1' is an expression, create an assignment node.
+ // Helps analyses (like CSE) to work fine.
+
+ if (op1->gtOper != GT_CALL)
+ {
+ op1 = gtUnusedValNode(op1);
+ }
+
+ /* Append the value to the tree list */
+ goto SPILL_APPEND;
+ }
+
+ /* No side effects - just throw the <BEEP> thing away */
+ break;
+
+ case CEE_DUP:
+
+ if (tiVerificationNeeded)
+ {
+ // Dup could start the begining of delegate creation sequence, remember that
+ delegateCreateStart = codeAddr - 1;
+ impStackTop(0);
+ }
+
+ // Convert a (dup, stloc) sequence into a (stloc, ldloc) sequence in the following cases:
+ // - If this is non-debug code - so that CSE will recognize the two as equal.
+ // This helps eliminate a redundant bounds check in cases such as:
+ // ariba[i+3] += some_value;
+ // - If the top of the stack is a non-leaf that may be expensive to clone.
+
+ if (codeAddr < codeEndp)
+ {
+ OPCODE nextOpcode = (OPCODE)getU1LittleEndian(codeAddr);
+ if (impIsAnySTLOC(nextOpcode))
+ {
+ if (!opts.compDbgCode)
+ {
+ insertLdloc = true;
+ break;
+ }
+ GenTree* stackTop = impStackTop().val;
+ if (!stackTop->IsIntegralConst(0) && !stackTop->IsFPZero() && !stackTop->IsLocal())
+ {
+ insertLdloc = true;
+ break;
+ }
+ }
+ }
+
+ /* Pull the top value from the stack */
+ op1 = impPopStack(tiRetVal);
+
+ /* Clone the value */
+ op1 = impCloneExpr(op1, &op2, tiRetVal.GetClassHandle(), (unsigned)CHECK_SPILL_ALL,
+ nullptr DEBUGARG("DUP instruction"));
+
+ /* Either the tree started with no global effects, or impCloneExpr
+ evaluated the tree to a temp and returned two copies of that
+ temp. Either way, neither op1 nor op2 should have side effects.
+ */
+ assert(!(op1->gtFlags & GTF_GLOB_EFFECT) && !(op2->gtFlags & GTF_GLOB_EFFECT));
+
+ /* Push the tree/temp back on the stack */
+ impPushOnStack(op1, tiRetVal);
+
+ /* Push the copy on the stack */
+ impPushOnStack(op2, tiRetVal);
+
+ break;
+
+ case CEE_STIND_I1:
+ lclTyp = TYP_BYTE;
+ goto STIND;
+ case CEE_STIND_I2:
+ lclTyp = TYP_SHORT;
+ goto STIND;
+ case CEE_STIND_I4:
+ lclTyp = TYP_INT;
+ goto STIND;
+ case CEE_STIND_I8:
+ lclTyp = TYP_LONG;
+ goto STIND;
+ case CEE_STIND_I:
+ lclTyp = TYP_I_IMPL;
+ goto STIND;
+ case CEE_STIND_REF:
+ lclTyp = TYP_REF;
+ goto STIND;
+ case CEE_STIND_R4:
+ lclTyp = TYP_FLOAT;
+ goto STIND;
+ case CEE_STIND_R8:
+ lclTyp = TYP_DOUBLE;
+ goto STIND;
+ STIND:
+
+ if (tiVerificationNeeded)
+ {
+ typeInfo instrType(lclTyp);
+#ifdef _TARGET_64BIT_
+ if (opcode == CEE_STIND_I)
+ {
+ instrType = typeInfo::nativeInt();
+ }
+#endif // _TARGET_64BIT_
+ verVerifySTIND(impStackTop(1).seTypeInfo, impStackTop(0).seTypeInfo, instrType);
+ }
+ else
+ {
+ compUnsafeCastUsed = true; // Have to go conservative
+ }
+
+ STIND_POST_VERIFY:
+
+ op2 = impPopStack().val; // value to store
+ op1 = impPopStack().val; // address to store to
+
+ // you can indirect off of a TYP_I_IMPL (if we are in C) or a BYREF
+ assertImp(genActualType(op1->gtType) == TYP_I_IMPL || op1->gtType == TYP_BYREF);
+
+ impBashVarAddrsToI(op1, op2);
+
+ op2 = impImplicitR4orR8Cast(op2, lclTyp);
+
+#ifdef _TARGET_64BIT_
+ // Automatic upcast for a GT_CNS_INT into TYP_I_IMPL
+ if ((op2->OperGet() == GT_CNS_INT) && varTypeIsI(lclTyp) && !varTypeIsI(op2->gtType))
+ {
+ op2->gtType = TYP_I_IMPL;
+ }
+ else
+ {
+ // Allow a downcast of op2 from TYP_I_IMPL into a 32-bit Int for x86 JIT compatiblity
+ //
+ if (varTypeIsI(op2->gtType) && (genActualType(lclTyp) == TYP_INT))
+ {
+ assert(!tiVerificationNeeded); // We should have thrown the VerificationException before.
+ op2 = gtNewCastNode(TYP_INT, op2, TYP_INT);
+ }
+ // Allow an upcast of op2 from a 32-bit Int into TYP_I_IMPL for x86 JIT compatiblity
+ //
+ if (varTypeIsI(lclTyp) && (genActualType(op2->gtType) == TYP_INT))
+ {
+ assert(!tiVerificationNeeded); // We should have thrown the VerificationException before.
+ op2 = gtNewCastNode(TYP_I_IMPL, op2, TYP_I_IMPL);
+ }
+ }
+#endif // _TARGET_64BIT_
+
+ if (opcode == CEE_STIND_REF)
+ {
+ // STIND_REF can be used to store TYP_INT, TYP_I_IMPL, TYP_REF, or TYP_BYREF
+ assertImp(varTypeIsIntOrI(op2->gtType) || varTypeIsGC(op2->gtType));
+ lclTyp = genActualType(op2->TypeGet());
+ }
+
+// Check target type.
+#ifdef DEBUG
+ if (op2->gtType == TYP_BYREF || lclTyp == TYP_BYREF)
+ {
+ if (op2->gtType == TYP_BYREF)
+ {
+ assertImp(lclTyp == TYP_BYREF || lclTyp == TYP_I_IMPL);
+ }
+ else if (lclTyp == TYP_BYREF)
+ {
+ assertImp(op2->gtType == TYP_BYREF || varTypeIsIntOrI(op2->gtType));
+ }
+ }
+ else
+ {
+ assertImp(genActualType(op2->gtType) == genActualType(lclTyp) ||
+ ((lclTyp == TYP_I_IMPL) && (genActualType(op2->gtType) == TYP_INT)) ||
+ (varTypeIsFloating(op2->gtType) && varTypeIsFloating(lclTyp)));
+ }
+#endif
+
+ op1 = gtNewOperNode(GT_IND, lclTyp, op1);
+
+ // stind could point anywhere, example a boxed class static int
+ op1->gtFlags |= GTF_IND_TGTANYWHERE;
+
+ if (prefixFlags & PREFIX_VOLATILE)
+ {
+ assert(op1->OperGet() == GT_IND);
+ op1->gtFlags |= GTF_DONT_CSE; // Can't CSE a volatile
+ op1->gtFlags |= GTF_ORDER_SIDEEFF; // Prevent this from being reordered
+ op1->gtFlags |= GTF_IND_VOLATILE;
+ }
+
+ if (prefixFlags & PREFIX_UNALIGNED)
+ {
+ assert(op1->OperGet() == GT_IND);
+ op1->gtFlags |= GTF_IND_UNALIGNED;
+ }
+
+ op1 = gtNewAssignNode(op1, op2);
+ op1->gtFlags |= GTF_EXCEPT | GTF_GLOB_REF;
+
+ // Spill side-effects AND global-data-accesses
+ if (verCurrentState.esStackDepth > 0)
+ {
+ impSpillSideEffects(true, (unsigned)CHECK_SPILL_ALL DEBUGARG("spill side effects before STIND"));
+ }
+
+ goto APPEND;
+
+ case CEE_LDIND_I1:
+ lclTyp = TYP_BYTE;
+ goto LDIND;
+ case CEE_LDIND_I2:
+ lclTyp = TYP_SHORT;
+ goto LDIND;
+ case CEE_LDIND_U4:
+ case CEE_LDIND_I4:
+ lclTyp = TYP_INT;
+ goto LDIND;
+ case CEE_LDIND_I8:
+ lclTyp = TYP_LONG;
+ goto LDIND;
+ case CEE_LDIND_REF:
+ lclTyp = TYP_REF;
+ goto LDIND;
+ case CEE_LDIND_I:
+ lclTyp = TYP_I_IMPL;
+ goto LDIND;
+ case CEE_LDIND_R4:
+ lclTyp = TYP_FLOAT;
+ goto LDIND;
+ case CEE_LDIND_R8:
+ lclTyp = TYP_DOUBLE;
+ goto LDIND;
+ case CEE_LDIND_U1:
+ lclTyp = TYP_UBYTE;
+ goto LDIND;
+ case CEE_LDIND_U2:
+ lclTyp = TYP_CHAR;
+ goto LDIND;
+ LDIND:
+
+ if (tiVerificationNeeded)
+ {
+ typeInfo lclTiType(lclTyp);
+#ifdef _TARGET_64BIT_
+ if (opcode == CEE_LDIND_I)
+ {
+ lclTiType = typeInfo::nativeInt();
+ }
+#endif // _TARGET_64BIT_
+ tiRetVal = verVerifyLDIND(impStackTop().seTypeInfo, lclTiType);
+ tiRetVal.NormaliseForStack();
+ }
+ else
+ {
+ compUnsafeCastUsed = true; // Have to go conservative
+ }
+
+ LDIND_POST_VERIFY:
+
+ op1 = impPopStack().val; // address to load from
+ impBashVarAddrsToI(op1);
+
+#ifdef _TARGET_64BIT_
+ // Allow an upcast of op1 from a 32-bit Int into TYP_I_IMPL for x86 JIT compatiblity
+ //
+ if (genActualType(op1->gtType) == TYP_INT)
+ {
+ assert(!tiVerificationNeeded); // We should have thrown the VerificationException before.
+ op1 = gtNewCastNode(TYP_I_IMPL, op1, TYP_I_IMPL);
+ }
+#endif
+
+ assertImp(genActualType(op1->gtType) == TYP_I_IMPL || op1->gtType == TYP_BYREF);
+
+ op1 = gtNewOperNode(GT_IND, lclTyp, op1);
+
+ // ldind could point anywhere, example a boxed class static int
+ op1->gtFlags |= (GTF_EXCEPT | GTF_GLOB_REF | GTF_IND_TGTANYWHERE);
+
+ if (prefixFlags & PREFIX_VOLATILE)
+ {
+ assert(op1->OperGet() == GT_IND);
+ op1->gtFlags |= GTF_DONT_CSE; // Can't CSE a volatile
+ op1->gtFlags |= GTF_ORDER_SIDEEFF; // Prevent this from being reordered
+ op1->gtFlags |= GTF_IND_VOLATILE;
+ }
+
+ if (prefixFlags & PREFIX_UNALIGNED)
+ {
+ assert(op1->OperGet() == GT_IND);
+ op1->gtFlags |= GTF_IND_UNALIGNED;
+ }
+
+ impPushOnStack(op1, tiRetVal);
+
+ break;
+
+ case CEE_UNALIGNED:
+
+ assert(sz == 1);
+ val = getU1LittleEndian(codeAddr);
+ ++codeAddr;
+ JITDUMP(" %u", val);
+ if ((val != 1) && (val != 2) && (val != 4))
+ {
+ BADCODE("Alignment unaligned. must be 1, 2, or 4");
+ }
+
+ Verify(!(prefixFlags & PREFIX_UNALIGNED), "Multiple unaligned. prefixes");
+ prefixFlags |= PREFIX_UNALIGNED;
+
+ impValidateMemoryAccessOpcode(codeAddr, codeEndp, false);
+
+ PREFIX:
+ opcode = (OPCODE)getU1LittleEndian(codeAddr);
+ codeAddr += sizeof(__int8);
+ opcodeOffs = (IL_OFFSET)(codeAddr - info.compCode);
+ goto DECODE_OPCODE;
+
+ case CEE_VOLATILE:
+
+ Verify(!(prefixFlags & PREFIX_VOLATILE), "Multiple volatile. prefixes");
+ prefixFlags |= PREFIX_VOLATILE;
+
+ impValidateMemoryAccessOpcode(codeAddr, codeEndp, true);
+
+ assert(sz == 0);
+ goto PREFIX;
+
+ case CEE_LDFTN:
+ {
+ // Need to do a lookup here so that we perform an access check
+ // and do a NOWAY if protections are violated
+ _impResolveToken(CORINFO_TOKENKIND_Method);
+
+ JITDUMP(" %08X", resolvedToken.token);
+
+ eeGetCallInfo(&resolvedToken, nullptr /* constraint typeRef*/,
+ addVerifyFlag(combine(CORINFO_CALLINFO_SECURITYCHECKS, CORINFO_CALLINFO_LDFTN)),
+ &callInfo);
+
+ // This check really only applies to intrinsic Array.Address methods
+ if (callInfo.sig.callConv & CORINFO_CALLCONV_PARAMTYPE)
+ {
+ NO_WAY("Currently do not support LDFTN of Parameterized functions");
+ }
+
+ // Do this before DO_LDFTN since CEE_LDVIRTFN does it on its own.
+ impHandleAccessAllowed(callInfo.accessAllowed, &callInfo.callsiteCalloutHelper);
+
+ if (tiVerificationNeeded)
+ {
+ // LDFTN could start the begining of delegate creation sequence, remember that
+ delegateCreateStart = codeAddr - 2;
+
+ // check any constraints on the callee's class and type parameters
+ VerifyOrReturn(info.compCompHnd->satisfiesClassConstraints(resolvedToken.hClass),
+ "method has unsatisfied class constraints");
+ VerifyOrReturn(info.compCompHnd->satisfiesMethodConstraints(resolvedToken.hClass,
+ resolvedToken.hMethod),
+ "method has unsatisfied method constraints");
+
+ mflags = callInfo.verMethodFlags;
+ Verify(!(mflags & CORINFO_FLG_CONSTRUCTOR), "LDFTN on a constructor");
+ }
+
+ DO_LDFTN:
+ op1 = impMethodPointer(&resolvedToken, &callInfo);
+ if (compDonotInline())
+ {
+ return;
+ }
+
+ impPushOnStack(op1, typeInfo(resolvedToken.hMethod));
+
+ break;
+ }
+
+ case CEE_LDVIRTFTN:
+ {
+ /* Get the method token */
+
+ _impResolveToken(CORINFO_TOKENKIND_Method);
+
+ JITDUMP(" %08X", resolvedToken.token);
+
+ eeGetCallInfo(&resolvedToken, nullptr /* constraint typeRef */,
+ addVerifyFlag(combine(combine(CORINFO_CALLINFO_SECURITYCHECKS, CORINFO_CALLINFO_LDFTN),
+ CORINFO_CALLINFO_CALLVIRT)),
+ &callInfo);
+
+ // This check really only applies to intrinsic Array.Address methods
+ if (callInfo.sig.callConv & CORINFO_CALLCONV_PARAMTYPE)
+ {
+ NO_WAY("Currently do not support LDFTN of Parameterized functions");
+ }
+
+ mflags = callInfo.methodFlags;
+
+ impHandleAccessAllowed(callInfo.accessAllowed, &callInfo.callsiteCalloutHelper);
+
+ if (compIsForInlining())
+ {
+ if (mflags & (CORINFO_FLG_FINAL | CORINFO_FLG_STATIC) || !(mflags & CORINFO_FLG_VIRTUAL))
+ {
+ compInlineResult->NoteFatal(InlineObservation::CALLSITE_LDVIRTFN_ON_NON_VIRTUAL);
+ return;
+ }
+ }
+
+ CORINFO_SIG_INFO& ftnSig = callInfo.sig;
+
+ if (tiVerificationNeeded)
+ {
+
+ Verify(ftnSig.hasThis(), "ldvirtftn on a static method");
+ Verify(!(mflags & CORINFO_FLG_CONSTRUCTOR), "LDVIRTFTN on a constructor");
+
+ // JIT32 verifier rejects verifiable ldvirtftn pattern
+ typeInfo declType =
+ verMakeTypeInfo(resolvedToken.hClass, true); // Change TI_STRUCT to TI_REF when necessary
+
+ typeInfo arg = impStackTop().seTypeInfo;
+ Verify((arg.IsType(TI_REF) || arg.IsType(TI_NULL)) && tiCompatibleWith(arg, declType, true),
+ "bad ldvirtftn");
+
+ CORINFO_CLASS_HANDLE instanceClassHnd = info.compClassHnd;
+ if (!(arg.IsType(TI_NULL) || (mflags & CORINFO_FLG_STATIC)))
+ {
+ instanceClassHnd = arg.GetClassHandleForObjRef();
+ }
+
+ // check any constraints on the method's class and type parameters
+ VerifyOrReturn(info.compCompHnd->satisfiesClassConstraints(resolvedToken.hClass),
+ "method has unsatisfied class constraints");
+ VerifyOrReturn(info.compCompHnd->satisfiesMethodConstraints(resolvedToken.hClass,
+ resolvedToken.hMethod),
+ "method has unsatisfied method constraints");
+
+ if (mflags & CORINFO_FLG_PROTECTED)
+ {
+ Verify(info.compCompHnd->canAccessFamily(info.compMethodHnd, instanceClassHnd),
+ "Accessing protected method through wrong type.");
+ }
+ }
+
+ /* Get the object-ref */
+ op1 = impPopStack().val;
+ assertImp(op1->gtType == TYP_REF);
+
+ if (opts.IsReadyToRun())
+ {
+ if (callInfo.kind != CORINFO_VIRTUALCALL_LDVIRTFTN)
+ {
+ if (op1->gtFlags & GTF_SIDE_EFFECT)
+ {
+ op1 = gtUnusedValNode(op1);
+ impAppendTree(op1, (unsigned)CHECK_SPILL_ALL, impCurStmtOffs);
+ }
+ goto DO_LDFTN;
+ }
+ }
+ else if (mflags & (CORINFO_FLG_FINAL | CORINFO_FLG_STATIC) || !(mflags & CORINFO_FLG_VIRTUAL))
+ {
+ if (op1->gtFlags & GTF_SIDE_EFFECT)
+ {
+ op1 = gtUnusedValNode(op1);
+ impAppendTree(op1, (unsigned)CHECK_SPILL_ALL, impCurStmtOffs);
+ }
+ goto DO_LDFTN;
+ }
+
+ GenTreePtr fptr = impImportLdvirtftn(op1, &resolvedToken, &callInfo);
+ if (compDonotInline())
+ {
+ return;
+ }
+
+ impPushOnStack(fptr, typeInfo(resolvedToken.hMethod));
+
+ break;
+ }
+
+ case CEE_CONSTRAINED:
+
+ assertImp(sz == sizeof(unsigned));
+ impResolveToken(codeAddr, &constrainedResolvedToken, CORINFO_TOKENKIND_Constrained);
+ codeAddr += sizeof(unsigned); // prefix instructions must increment codeAddr manually
+ JITDUMP(" (%08X) ", constrainedResolvedToken.token);
+
+ Verify(!(prefixFlags & PREFIX_CONSTRAINED), "Multiple constrained. prefixes");
+ prefixFlags |= PREFIX_CONSTRAINED;
+
+ {
+ OPCODE actualOpcode = impGetNonPrefixOpcode(codeAddr, codeEndp);
+ if (actualOpcode != CEE_CALLVIRT)
+ {
+ BADCODE("constrained. has to be followed by callvirt");
+ }
+ }
+
+ goto PREFIX;
+
+ case CEE_READONLY:
+ JITDUMP(" readonly.");
+
+ Verify(!(prefixFlags & PREFIX_READONLY), "Multiple readonly. prefixes");
+ prefixFlags |= PREFIX_READONLY;
+
+ {
+ OPCODE actualOpcode = impGetNonPrefixOpcode(codeAddr, codeEndp);
+ if (actualOpcode != CEE_LDELEMA && !impOpcodeIsCallOpcode(actualOpcode))
+ {
+ BADCODE("readonly. has to be followed by ldelema or call");
+ }
+ }
+
+ assert(sz == 0);
+ goto PREFIX;
+
+ case CEE_TAILCALL:
+ JITDUMP(" tail.");
+
+ Verify(!(prefixFlags & PREFIX_TAILCALL_EXPLICIT), "Multiple tailcall. prefixes");
+ prefixFlags |= PREFIX_TAILCALL_EXPLICIT;
+
+ {
+ OPCODE actualOpcode = impGetNonPrefixOpcode(codeAddr, codeEndp);
+ if (!impOpcodeIsCallOpcode(actualOpcode))
+ {
+ BADCODE("tailcall. has to be followed by call, callvirt or calli");
+ }
+ }
+ assert(sz == 0);
+ goto PREFIX;
+
+ case CEE_NEWOBJ:
+
+ /* Since we will implicitly insert newObjThisPtr at the start of the
+ argument list, spill any GTF_ORDER_SIDEEFF */
+ impSpillSpecialSideEff();
+
+ /* NEWOBJ does not respond to TAIL */
+ prefixFlags &= ~PREFIX_TAILCALL_EXPLICIT;
+
+ /* NEWOBJ does not respond to CONSTRAINED */
+ prefixFlags &= ~PREFIX_CONSTRAINED;
+
+#if COR_JIT_EE_VERSION > 460
+ _impResolveToken(CORINFO_TOKENKIND_NewObj);
+#else
+ _impResolveToken(CORINFO_TOKENKIND_Method);
+#endif
+
+ eeGetCallInfo(&resolvedToken, nullptr /* constraint typeRef*/,
+ addVerifyFlag(combine(CORINFO_CALLINFO_SECURITYCHECKS, CORINFO_CALLINFO_ALLOWINSTPARAM)),
+ &callInfo);
+
+ if (compIsForInlining())
+ {
+ if (impInlineInfo->inlineCandidateInfo->dwRestrictions & INLINE_RESPECT_BOUNDARY)
+ {
+ // Check to see if this call violates the boundary.
+ compInlineResult->NoteFatal(InlineObservation::CALLSITE_CROSS_BOUNDARY_SECURITY);
+ return;
+ }
+ }
+
+ mflags = callInfo.methodFlags;
+
+ if ((mflags & (CORINFO_FLG_STATIC | CORINFO_FLG_ABSTRACT)) != 0)
+ {
+ BADCODE("newobj on static or abstract method");
+ }
+
+ // Insert the security callout before any actual code is generated
+ impHandleAccessAllowed(callInfo.accessAllowed, &callInfo.callsiteCalloutHelper);
+
+ // There are three different cases for new
+ // Object size is variable (depends on arguments)
+ // 1) Object is an array (arrays treated specially by the EE)
+ // 2) Object is some other variable sized object (e.g. String)
+ // 3) Class Size can be determined beforehand (normal case)
+ // In the first case, we need to call a NEWOBJ helper (multinewarray)
+ // in the second case we call the constructor with a '0' this pointer
+ // In the third case we alloc the memory, then call the constuctor
+
+ clsFlags = callInfo.classFlags;
+ if (clsFlags & CORINFO_FLG_ARRAY)
+ {
+ if (tiVerificationNeeded)
+ {
+ CORINFO_CLASS_HANDLE elemTypeHnd;
+ INDEBUG(CorInfoType corType =)
+ info.compCompHnd->getChildType(resolvedToken.hClass, &elemTypeHnd);
+ assert(!(elemTypeHnd == nullptr && corType == CORINFO_TYPE_VALUECLASS));
+ Verify(elemTypeHnd == nullptr ||
+ !(info.compCompHnd->getClassAttribs(elemTypeHnd) & CORINFO_FLG_CONTAINS_STACK_PTR),
+ "newarr of byref-like objects");
+ verVerifyCall(opcode, &resolvedToken, nullptr, ((prefixFlags & PREFIX_TAILCALL_EXPLICIT) != 0),
+ ((prefixFlags & PREFIX_READONLY) != 0), delegateCreateStart, codeAddr - 1,
+ &callInfo DEBUGARG(info.compFullName));
+ }
+ // Arrays need to call the NEWOBJ helper.
+ assertImp(clsFlags & CORINFO_FLG_VAROBJSIZE);
+
+ impImportNewObjArray(&resolvedToken, &callInfo);
+ if (compDonotInline())
+ {
+ return;
+ }
+
+ callTyp = TYP_REF;
+ break;
+ }
+ // At present this can only be String
+ else if (clsFlags & CORINFO_FLG_VAROBJSIZE)
+ {
+#if COR_JIT_EE_VERSION > 460
+ if (eeGetEEInfo()->targetAbi == CORINFO_CORERT_ABI)
+ {
+ // The dummy argument does not exist in CoreRT
+ newObjThisPtr = nullptr;
+ }
+ else
+#endif
+ {
+ // This is the case for variable-sized objects that are not
+ // arrays. In this case, call the constructor with a null 'this'
+ // pointer
+ newObjThisPtr = gtNewIconNode(0, TYP_REF);
+ }
+
+ /* Remember that this basic block contains 'new' of an object */
+ block->bbFlags |= BBF_HAS_NEWOBJ;
+ optMethodFlags |= OMF_HAS_NEWOBJ;
+ }
+ else
+ {
+ // This is the normal case where the size of the object is
+ // fixed. Allocate the memory and call the constructor.
+
+ // Note: We cannot add a peep to avoid use of temp here
+ // becase we don't have enough interference info to detect when
+ // sources and destination interfere, example: s = new S(ref);
+
+ // TODO: We find the correct place to introduce a general
+ // reverse copy prop for struct return values from newobj or
+ // any function returning structs.
+
+ /* get a temporary for the new object */
+ lclNum = lvaGrabTemp(true DEBUGARG("NewObj constructor temp"));
+
+ // In the value class case we only need clsHnd for size calcs.
+ //
+ // The lookup of the code pointer will be handled by CALL in this case
+ if (clsFlags & CORINFO_FLG_VALUECLASS)
+ {
+ CorInfoType jitTyp = info.compCompHnd->asCorInfoType(resolvedToken.hClass);
+ unsigned size = info.compCompHnd->getClassSize(resolvedToken.hClass);
+
+ if (impIsPrimitive(jitTyp))
+ {
+ lvaTable[lclNum].lvType = JITtype2varType(jitTyp);
+ }
+ else
+ {
+ // The local variable itself is the allocated space.
+ // Here we need unsafe value cls check, since the address of struct is taken for further use
+ // and potentially exploitable.
+ lvaSetStruct(lclNum, resolvedToken.hClass, true /* unsafe value cls check */);
+ }
+
+ // Append a tree to zero-out the temp
+ newObjThisPtr = gtNewLclvNode(lclNum, lvaTable[lclNum].TypeGet());
+
+ newObjThisPtr = gtNewBlkOpNode(newObjThisPtr, // Dest
+ gtNewIconNode(0), // Value
+ size, // Size
+ false, // isVolatile
+ false); // not copyBlock
+ impAppendTree(newObjThisPtr, (unsigned)CHECK_SPILL_NONE, impCurStmtOffs);
+
+ // Obtain the address of the temp
+ newObjThisPtr =
+ gtNewOperNode(GT_ADDR, TYP_BYREF, gtNewLclvNode(lclNum, lvaTable[lclNum].TypeGet()));
+ }
+ else
+ {
+#ifdef FEATURE_READYTORUN_COMPILER
+ if (opts.IsReadyToRun())
+ {
+ op1 = impReadyToRunHelperToTree(&resolvedToken, CORINFO_HELP_READYTORUN_NEW, TYP_REF);
+ usingReadyToRunHelper = (op1 != NULL);
+ }
+
+ if (!usingReadyToRunHelper)
+#endif
+ {
+ op1 = impParentClassTokenToHandle(&resolvedToken, nullptr, TRUE);
+ if (op1 == nullptr)
+ { // compDonotInline()
+ return;
+ }
+
+ // TODO: ReadyToRun: When generic dictionary lookups are necessary, replace the lookup call
+ // and the newfast call with a single call to a dynamic R2R cell that will:
+ // 1) Load the context
+ // 2) Perform the generic dictionary lookup and caching, and generate the appropriate
+ // stub
+ // 3) Allocate and return the new object
+ // Reason: performance (today, we'll always use the slow helper for the R2R generics case)
+
+ op1 = gtNewAllocObjNode(info.compCompHnd->getNewHelper(&resolvedToken, info.compMethodHnd),
+ resolvedToken.hClass, TYP_REF, op1);
+ }
+
+ // Remember that this basic block contains 'new' of an object
+ block->bbFlags |= BBF_HAS_NEWOBJ;
+ optMethodFlags |= OMF_HAS_NEWOBJ;
+
+ // Append the assignment to the temp/local. Dont need to spill
+ // at all as we are just calling an EE-Jit helper which can only
+ // cause an (async) OutOfMemoryException.
+
+ // We assign the newly allocated object (by a GT_ALLOCOBJ node)
+ // to a temp. Note that the pattern "temp = allocObj" is required
+ // by ObjectAllocator phase to be able to determine GT_ALLOCOBJ nodes
+ // without exhaustive walk over all expressions.
+
+ impAssignTempGen(lclNum, op1, (unsigned)CHECK_SPILL_NONE);
+
+ newObjThisPtr = gtNewLclvNode(lclNum, TYP_REF);
+ }
+ }
+ goto CALL;
+
+ case CEE_CALLI:
+
+ /* CALLI does not respond to CONSTRAINED */
+ prefixFlags &= ~PREFIX_CONSTRAINED;
+
+ if (compIsForInlining())
+ {
+ // CALLI doesn't have a method handle, so assume the worst.
+ if (impInlineInfo->inlineCandidateInfo->dwRestrictions & INLINE_RESPECT_BOUNDARY)
+ {
+ compInlineResult->NoteFatal(InlineObservation::CALLSITE_CROSS_BOUNDARY_CALLI);
+ return;
+ }
+ }
+
+ // fall through
+
+ case CEE_CALLVIRT:
+ case CEE_CALL:
+
+ // We can't call getCallInfo on the token from a CALLI, but we need it in
+ // many other places. We unfortunately embed that knowledge here.
+ if (opcode != CEE_CALLI)
+ {
+ _impResolveToken(CORINFO_TOKENKIND_Method);
+
+ eeGetCallInfo(&resolvedToken,
+ (prefixFlags & PREFIX_CONSTRAINED) ? &constrainedResolvedToken : nullptr,
+ // this is how impImportCall invokes getCallInfo
+ addVerifyFlag(
+ combine(combine(CORINFO_CALLINFO_ALLOWINSTPARAM, CORINFO_CALLINFO_SECURITYCHECKS),
+ (opcode == CEE_CALLVIRT) ? CORINFO_CALLINFO_CALLVIRT
+ : CORINFO_CALLINFO_NONE)),
+ &callInfo);
+ }
+ else
+ {
+ // Suppress uninitialized use warning.
+ memset(&resolvedToken, 0, sizeof(resolvedToken));
+ memset(&callInfo, 0, sizeof(callInfo));
+
+ resolvedToken.token = getU4LittleEndian(codeAddr);
+ }
+
+ CALL: // memberRef should be set.
+ // newObjThisPtr should be set for CEE_NEWOBJ
+
+ JITDUMP(" %08X", resolvedToken.token);
+ constraintCall = (prefixFlags & PREFIX_CONSTRAINED) != 0;
+
+ bool newBBcreatedForTailcallStress;
+
+ newBBcreatedForTailcallStress = false;
+
+ if (compIsForInlining())
+ {
+ // We rule out inlinees with explicit tail calls in fgMakeBasicBlocks.
+ assert((prefixFlags & PREFIX_TAILCALL_EXPLICIT) == 0);
+ }
+ else
+ {
+ if (compTailCallStress())
+ {
+ // Have we created a new BB after the "call" instruction in fgMakeBasicBlocks()?
+ // Tail call stress only recognizes call+ret patterns and forces them to be
+ // explicit tail prefixed calls. Also fgMakeBasicBlocks() under tail call stress
+ // doesn't import 'ret' opcode following the call into the basic block containing
+ // the call instead imports it to a new basic block. Note that fgMakeBasicBlocks()
+ // is already checking that there is an opcode following call and hence it is
+ // safe here to read next opcode without bounds check.
+ newBBcreatedForTailcallStress =
+ impOpcodeIsCallOpcode(opcode) && // Current opcode is a CALL, (not a CEE_NEWOBJ). So, don't
+ // make it jump to RET.
+ (OPCODE)getU1LittleEndian(codeAddr + sz) == CEE_RET; // Next opcode is a CEE_RET
+
+ if (newBBcreatedForTailcallStress &&
+ !(prefixFlags & PREFIX_TAILCALL_EXPLICIT) && // User hasn't set "tail." prefix yet.
+ verCheckTailCallConstraint(opcode, &resolvedToken,
+ constraintCall ? &constrainedResolvedToken : nullptr,
+ true) // Is it legal to do talcall?
+ )
+ {
+ // Stress the tailcall.
+ JITDUMP(" (Tailcall stress: prefixFlags |= PREFIX_TAILCALL_EXPLICIT)");
+ prefixFlags |= PREFIX_TAILCALL_EXPLICIT;
+ }
+ }
+
+ // Note that when running under tail call stress, a call will be marked as explicit tail prefixed
+ // hence will not be considered for implicit tail calling.
+ bool isRecursive = (callInfo.hMethod == info.compMethodHnd);
+ if (impIsImplicitTailCallCandidate(opcode, codeAddr + sz, codeEndp, prefixFlags, isRecursive))
+ {
+ JITDUMP(" (Implicit Tail call: prefixFlags |= PREFIX_TAILCALL_IMPLICIT)");
+ prefixFlags |= PREFIX_TAILCALL_IMPLICIT;
+ }
+ }
+
+ // Treat this call as tail call for verification only if "tail" prefixed (i.e. explicit tail call).
+ explicitTailCall = (prefixFlags & PREFIX_TAILCALL_EXPLICIT) != 0;
+ readonlyCall = (prefixFlags & PREFIX_READONLY) != 0;
+
+ if (opcode != CEE_CALLI && opcode != CEE_NEWOBJ)
+ {
+ // All calls and delegates need a security callout.
+ // For delegates, this is the call to the delegate constructor, not the access check on the
+ // LD(virt)FTN.
+ impHandleAccessAllowed(callInfo.accessAllowed, &callInfo.callsiteCalloutHelper);
+
+#if 0 // DevDiv 410397 - This breaks too many obfuscated apps to do this in an in-place release
+
+ // DevDiv 291703 - we need to check for accessibility between the caller of InitializeArray
+ // and the field it is reading, thus it is now unverifiable to not immediately precede with
+ // ldtoken <filed token>, and we now check accessibility
+ if ((callInfo.methodFlags & CORINFO_FLG_INTRINSIC) &&
+ (info.compCompHnd->getIntrinsicID(callInfo.hMethod) == CORINFO_INTRINSIC_InitializeArray))
+ {
+ if (prevOpcode != CEE_LDTOKEN)
+ {
+ Verify(prevOpcode == CEE_LDTOKEN, "Need ldtoken for InitializeArray");
+ }
+ else
+ {
+ assert(lastLoadToken != NULL);
+ // Now that we know we have a token, verify that it is accessible for loading
+ CORINFO_RESOLVED_TOKEN resolvedLoadField;
+ impResolveToken(lastLoadToken, &resolvedLoadField, CORINFO_TOKENKIND_Field);
+ eeGetFieldInfo(&resolvedLoadField, CORINFO_ACCESS_INIT_ARRAY, &fieldInfo);
+ impHandleAccessAllowed(fieldInfo.accessAllowed, &fieldInfo.accessCalloutHelper);
+ }
+ }
+
+#endif // DevDiv 410397
+ }
+
+ if (tiVerificationNeeded)
+ {
+ verVerifyCall(opcode, &resolvedToken, constraintCall ? &constrainedResolvedToken : nullptr,
+ explicitTailCall, readonlyCall, delegateCreateStart, codeAddr - 1,
+ &callInfo DEBUGARG(info.compFullName));
+ }
+
+ // Insert delegate callout here.
+ if (opcode == CEE_NEWOBJ && (mflags & CORINFO_FLG_CONSTRUCTOR) && (clsFlags & CORINFO_FLG_DELEGATE))
+ {
+#ifdef DEBUG
+ // We should do this only if verification is enabled
+ // If verification is disabled, delegateCreateStart will not be initialized correctly
+ if (tiVerificationNeeded)
+ {
+ mdMemberRef delegateMethodRef = mdMemberRefNil;
+ // We should get here only for well formed delegate creation.
+ assert(verCheckDelegateCreation(delegateCreateStart, codeAddr - 1, delegateMethodRef));
+ }
+#endif
+
+#ifdef FEATURE_CORECLR
+ // In coreclr the delegate transparency rule needs to be enforced even if verification is disabled
+ typeInfo tiActualFtn = impStackTop(0).seTypeInfo;
+ CORINFO_METHOD_HANDLE delegateMethodHandle = tiActualFtn.GetMethod2();
+
+ impInsertCalloutForDelegate(info.compMethodHnd, delegateMethodHandle, resolvedToken.hClass);
+#endif // FEATURE_CORECLR
+ }
+
+ callTyp = impImportCall(opcode, &resolvedToken, constraintCall ? &constrainedResolvedToken : nullptr,
+ newObjThisPtr, prefixFlags, &callInfo, opcodeOffs);
+ if (compDonotInline())
+ {
+ return;
+ }
+
+ if (explicitTailCall || newBBcreatedForTailcallStress) // If newBBcreatedForTailcallStress is true, we
+ // have created a new BB after the "call"
+ // instruction in fgMakeBasicBlocks(). So we need to jump to RET regardless.
+ {
+ assert(!compIsForInlining());
+ goto RET;
+ }
+
+ break;
+
+ case CEE_LDFLD:
+ case CEE_LDSFLD:
+ case CEE_LDFLDA:
+ case CEE_LDSFLDA:
+ {
+
+ BOOL isLoadAddress = (opcode == CEE_LDFLDA || opcode == CEE_LDSFLDA);
+ BOOL isLoadStatic = (opcode == CEE_LDSFLD || opcode == CEE_LDSFLDA);
+
+ /* Get the CP_Fieldref index */
+ assertImp(sz == sizeof(unsigned));
+
+ _impResolveToken(CORINFO_TOKENKIND_Field);
+
+ JITDUMP(" %08X", resolvedToken.token);
+
+ int aflags = isLoadAddress ? CORINFO_ACCESS_ADDRESS : CORINFO_ACCESS_GET;
+
+ GenTreePtr obj = nullptr;
+ typeInfo* tiObj = nullptr;
+ CORINFO_CLASS_HANDLE objType = nullptr; // used for fields
+
+ if (opcode == CEE_LDFLD || opcode == CEE_LDFLDA)
+ {
+ tiObj = &impStackTop().seTypeInfo;
+ obj = impPopStack(objType).val;
+
+ if (impIsThis(obj))
+ {
+ aflags |= CORINFO_ACCESS_THIS;
+
+ // An optimization for Contextful classes:
+ // we unwrap the proxy when we have a 'this reference'
+
+ if (info.compUnwrapContextful)
+ {
+ aflags |= CORINFO_ACCESS_UNWRAP;
+ }
+ }
+ }
+
+ eeGetFieldInfo(&resolvedToken, (CORINFO_ACCESS_FLAGS)aflags, &fieldInfo);
+
+ // Figure out the type of the member. We always call canAccessField, so you always need this
+ // handle
+ CorInfoType ciType = fieldInfo.fieldType;
+ clsHnd = fieldInfo.structType;
+
+ lclTyp = JITtype2varType(ciType);
+
+#ifdef _TARGET_AMD64
+ noway_assert(varTypeIsIntegralOrI(lclTyp) || varTypeIsFloating(lclTyp) || lclTyp == TYP_STRUCT);
+#endif // _TARGET_AMD64
+
+ if (compIsForInlining())
+ {
+ switch (fieldInfo.fieldAccessor)
+ {
+ case CORINFO_FIELD_INSTANCE_HELPER:
+ case CORINFO_FIELD_INSTANCE_ADDR_HELPER:
+ case CORINFO_FIELD_STATIC_ADDR_HELPER:
+ case CORINFO_FIELD_STATIC_TLS:
+
+ compInlineResult->NoteFatal(InlineObservation::CALLEE_LDFLD_NEEDS_HELPER);
+ return;
+
+ case CORINFO_FIELD_STATIC_GENERICS_STATIC_HELPER:
+
+ /* We may be able to inline the field accessors in specific instantiations of generic
+ * methods */
+ compInlineResult->NoteFatal(InlineObservation::CALLSITE_LDFLD_NEEDS_HELPER);
+ return;
+
+ default:
+ break;
+ }
+
+ if (!isLoadAddress && (fieldInfo.fieldFlags & CORINFO_FLG_FIELD_STATIC) && lclTyp == TYP_STRUCT &&
+ clsHnd)
+ {
+ if ((info.compCompHnd->getTypeForPrimitiveValueClass(clsHnd) == CORINFO_TYPE_UNDEF) &&
+ !(info.compFlags & CORINFO_FLG_FORCEINLINE))
+ {
+ // Loading a static valuetype field usually will cause a JitHelper to be called
+ // for the static base. This will bloat the code.
+ compInlineResult->Note(InlineObservation::CALLEE_LDFLD_STATIC_VALUECLASS);
+
+ if (compInlineResult->IsFailure())
+ {
+ return;
+ }
+ }
+ }
+ }
+
+ tiRetVal = verMakeTypeInfo(ciType, clsHnd);
+ if (isLoadAddress)
+ {
+ tiRetVal.MakeByRef();
+ }
+ else
+ {
+ tiRetVal.NormaliseForStack();
+ }
+
+ // Perform this check always to ensure that we get field access exceptions even with
+ // SkipVerification.
+ impHandleAccessAllowed(fieldInfo.accessAllowed, &fieldInfo.accessCalloutHelper);
+
+ if (tiVerificationNeeded)
+ {
+ // You can also pass the unboxed struct to LDFLD
+ BOOL bAllowPlainValueTypeAsThis = FALSE;
+ if (opcode == CEE_LDFLD && impIsValueType(tiObj))
+ {
+ bAllowPlainValueTypeAsThis = TRUE;
+ }
+
+ verVerifyField(&resolvedToken, fieldInfo, tiObj, isLoadAddress, bAllowPlainValueTypeAsThis);
+
+ // If we're doing this on a heap object or from a 'safe' byref
+ // then the result is a safe byref too
+ if (isLoadAddress) // load address
+ {
+ if (fieldInfo.fieldFlags &
+ CORINFO_FLG_FIELD_STATIC) // statics marked as safe will have permanent home
+ {
+ if (fieldInfo.fieldFlags & CORINFO_FLG_FIELD_SAFESTATIC_BYREF_RETURN)
+ {
+ tiRetVal.SetIsPermanentHomeByRef();
+ }
+ }
+ else if (tiObj->IsObjRef() || tiObj->IsPermanentHomeByRef())
+ {
+ // ldflda of byref is safe if done on a gc object or on a
+ // safe byref
+ tiRetVal.SetIsPermanentHomeByRef();
+ }
+ }
+ }
+ else
+ {
+ // tiVerificationNeeded is false.
+ // Raise InvalidProgramException if static load accesses non-static field
+ if (isLoadStatic && ((fieldInfo.fieldFlags & CORINFO_FLG_FIELD_STATIC) == 0))
+ {
+ BADCODE("static access on an instance field");
+ }
+ }
+
+ // We are using ldfld/a on a static field. We allow it, but need to get side-effect from obj.
+ if ((fieldInfo.fieldFlags & CORINFO_FLG_FIELD_STATIC) && obj != nullptr)
+ {
+ if (obj->gtFlags & GTF_SIDE_EFFECT)
+ {
+ obj = gtUnusedValNode(obj);
+ impAppendTree(obj, (unsigned)CHECK_SPILL_ALL, impCurStmtOffs);
+ }
+ obj = nullptr;
+ }
+
+ /* Preserve 'small' int types */
+ if (lclTyp > TYP_INT)
+ {
+ lclTyp = genActualType(lclTyp);
+ }
+
+ bool usesHelper = false;
+
+ switch (fieldInfo.fieldAccessor)
+ {
+ case CORINFO_FIELD_INSTANCE:
+#ifdef FEATURE_READYTORUN_COMPILER
+ case CORINFO_FIELD_INSTANCE_WITH_BASE:
+#endif
+ {
+ bool nullcheckNeeded = false;
+
+ obj = impCheckForNullPointer(obj);
+
+ if (isLoadAddress && (obj->gtType == TYP_BYREF) && fgAddrCouldBeNull(obj))
+ {
+ nullcheckNeeded = true;
+ }
+
+ // If the object is a struct, what we really want is
+ // for the field to operate on the address of the struct.
+ if (!varTypeGCtype(obj->TypeGet()) && impIsValueType(tiObj))
+ {
+ assert(opcode == CEE_LDFLD && objType != nullptr);
+
+ obj = impGetStructAddr(obj, objType, (unsigned)CHECK_SPILL_ALL, true);
+ }
+
+ /* Create the data member node */
+ op1 = gtNewFieldRef(lclTyp, resolvedToken.hField, obj, fieldInfo.offset, nullcheckNeeded);
+
+#ifdef FEATURE_READYTORUN_COMPILER
+ if (fieldInfo.fieldAccessor == CORINFO_FIELD_INSTANCE_WITH_BASE)
+ op1->gtField.gtFieldLookup = fieldInfo.fieldLookup;
+#endif
+
+ op1->gtFlags |= (obj->gtFlags & GTF_GLOB_EFFECT);
+
+ if (fgAddrCouldBeNull(obj))
+ {
+ op1->gtFlags |= GTF_EXCEPT;
+ }
+
+ // If gtFldObj is a BYREF then our target is a value class and
+ // it could point anywhere, example a boxed class static int
+ if (obj->gtType == TYP_BYREF)
+ {
+ op1->gtFlags |= GTF_IND_TGTANYWHERE;
+ }
+
+ DWORD typeFlags = info.compCompHnd->getClassAttribs(resolvedToken.hClass);
+ if (StructHasOverlappingFields(typeFlags))
+ {
+ op1->gtField.gtFldMayOverlap = true;
+ }
+
+ // wrap it in a address of operator if necessary
+ if (isLoadAddress)
+ {
+ op1 = gtNewOperNode(GT_ADDR,
+ (var_types)(varTypeIsGC(obj->TypeGet()) ? TYP_BYREF : TYP_I_IMPL), op1);
+ }
+ else
+ {
+ if (compIsForInlining() &&
+ impInlineIsGuaranteedThisDerefBeforeAnySideEffects(nullptr, obj,
+ impInlineInfo->inlArgInfo))
+ {
+ impInlineInfo->thisDereferencedFirst = true;
+ }
+ }
+ }
+ break;
+
+ case CORINFO_FIELD_STATIC_TLS:
+#ifdef _TARGET_X86_
+ // Legacy TLS access is implemented as intrinsic on x86 only
+
+ /* Create the data member node */
+ op1 = gtNewFieldRef(lclTyp, resolvedToken.hField, NULL, fieldInfo.offset);
+ op1->gtFlags |= GTF_IND_TLS_REF; // fgMorphField will handle the transformation
+
+ if (isLoadAddress)
+ {
+ op1 = gtNewOperNode(GT_ADDR, (var_types)TYP_I_IMPL, op1);
+ }
+ break;
+#else
+ fieldInfo.fieldAccessor = CORINFO_FIELD_STATIC_ADDR_HELPER;
+
+ __fallthrough;
+#endif
+
+ case CORINFO_FIELD_STATIC_ADDR_HELPER:
+ case CORINFO_FIELD_INSTANCE_HELPER:
+ case CORINFO_FIELD_INSTANCE_ADDR_HELPER:
+ op1 = gtNewRefCOMfield(obj, &resolvedToken, (CORINFO_ACCESS_FLAGS)aflags, &fieldInfo, lclTyp,
+ clsHnd, nullptr);
+ usesHelper = true;
+ break;
+
+ case CORINFO_FIELD_STATIC_ADDRESS:
+ // Replace static read-only fields with constant if possible
+ if ((aflags & CORINFO_ACCESS_GET) && (fieldInfo.fieldFlags & CORINFO_FLG_FIELD_FINAL) &&
+ !(fieldInfo.fieldFlags & CORINFO_FLG_FIELD_STATIC_IN_HEAP) &&
+ (varTypeIsIntegral(lclTyp) || varTypeIsFloating(lclTyp)))
+ {
+ CorInfoInitClassResult initClassResult =
+ info.compCompHnd->initClass(resolvedToken.hField, info.compMethodHnd,
+ impTokenLookupContextHandle);
+
+ if (initClassResult & CORINFO_INITCLASS_INITIALIZED)
+ {
+ void** pFldAddr = nullptr;
+ void* fldAddr =
+ info.compCompHnd->getFieldAddress(resolvedToken.hField, (void**)&pFldAddr);
+
+ // We should always be able to access this static's address directly
+ assert(pFldAddr == nullptr);
+
+ op1 = impImportStaticReadOnlyField(fldAddr, lclTyp);
+ goto FIELD_DONE;
+ }
+ }
+
+ __fallthrough;
+
+ case CORINFO_FIELD_STATIC_RVA_ADDRESS:
+ case CORINFO_FIELD_STATIC_SHARED_STATIC_HELPER:
+ case CORINFO_FIELD_STATIC_GENERICS_STATIC_HELPER:
+ op1 = impImportStaticFieldAccess(&resolvedToken, (CORINFO_ACCESS_FLAGS)aflags, &fieldInfo,
+ lclTyp);
+ break;
+
+ case CORINFO_FIELD_INTRINSIC_ZERO:
+ {
+ assert(aflags & CORINFO_ACCESS_GET);
+ op1 = gtNewIconNode(0, lclTyp);
+ goto FIELD_DONE;
+ }
+ break;
+
+ case CORINFO_FIELD_INTRINSIC_EMPTY_STRING:
+ {
+ assert(aflags & CORINFO_ACCESS_GET);
+
+ LPVOID pValue;
+ InfoAccessType iat = info.compCompHnd->emptyStringLiteral(&pValue);
+ op1 = gtNewStringLiteralNode(iat, pValue);
+ goto FIELD_DONE;
+ }
+ break;
+
+ default:
+ assert(!"Unexpected fieldAccessor");
+ }
+
+ if (!isLoadAddress)
+ {
+
+ if (prefixFlags & PREFIX_VOLATILE)
+ {
+ op1->gtFlags |= GTF_DONT_CSE; // Can't CSE a volatile
+ op1->gtFlags |= GTF_ORDER_SIDEEFF; // Prevent this from being reordered
+
+ if (!usesHelper)
+ {
+ assert((op1->OperGet() == GT_FIELD) || (op1->OperGet() == GT_IND) ||
+ (op1->OperGet() == GT_OBJ));
+ op1->gtFlags |= GTF_IND_VOLATILE;
+ }
+ }
+
+ if (prefixFlags & PREFIX_UNALIGNED)
+ {
+ if (!usesHelper)
+ {
+ assert((op1->OperGet() == GT_FIELD) || (op1->OperGet() == GT_IND) ||
+ (op1->OperGet() == GT_OBJ));
+ op1->gtFlags |= GTF_IND_UNALIGNED;
+ }
+ }
+ }
+
+ /* Check if the class needs explicit initialization */
+
+ if (fieldInfo.fieldFlags & CORINFO_FLG_FIELD_INITCLASS)
+ {
+ GenTreePtr helperNode = impInitClass(&resolvedToken);
+ if (compDonotInline())
+ {
+ return;
+ }
+ if (helperNode != nullptr)
+ {
+ op1 = gtNewOperNode(GT_COMMA, op1->TypeGet(), helperNode, op1);
+ }
+ }
+
+ FIELD_DONE:
+ impPushOnStack(op1, tiRetVal);
+ }
+ break;
+
+ case CEE_STFLD:
+ case CEE_STSFLD:
+ {
+
+ BOOL isStoreStatic = (opcode == CEE_STSFLD);
+
+ CORINFO_CLASS_HANDLE fieldClsHnd; // class of the field (if it's a ref type)
+
+ /* Get the CP_Fieldref index */
+
+ assertImp(sz == sizeof(unsigned));
+
+ _impResolveToken(CORINFO_TOKENKIND_Field);
+
+ JITDUMP(" %08X", resolvedToken.token);
+
+ int aflags = CORINFO_ACCESS_SET;
+ GenTreePtr obj = nullptr;
+ typeInfo* tiObj = nullptr;
+ typeInfo tiVal;
+
+ /* Pull the value from the stack */
+ op2 = impPopStack(tiVal);
+ clsHnd = tiVal.GetClassHandle();
+
+ if (opcode == CEE_STFLD)
+ {
+ tiObj = &impStackTop().seTypeInfo;
+ obj = impPopStack().val;
+
+ if (impIsThis(obj))
+ {
+ aflags |= CORINFO_ACCESS_THIS;
+
+ // An optimization for Contextful classes:
+ // we unwrap the proxy when we have a 'this reference'
+
+ if (info.compUnwrapContextful)
+ {
+ aflags |= CORINFO_ACCESS_UNWRAP;
+ }
+ }
+ }
+
+ eeGetFieldInfo(&resolvedToken, (CORINFO_ACCESS_FLAGS)aflags, &fieldInfo);
+
+ // Figure out the type of the member. We always call canAccessField, so you always need this
+ // handle
+ CorInfoType ciType = fieldInfo.fieldType;
+ fieldClsHnd = fieldInfo.structType;
+
+ lclTyp = JITtype2varType(ciType);
+
+ if (compIsForInlining())
+ {
+ /* Is this a 'special' (COM) field? or a TLS ref static field?, field stored int GC heap? or
+ * per-inst static? */
+
+ switch (fieldInfo.fieldAccessor)
+ {
+ case CORINFO_FIELD_INSTANCE_HELPER:
+ case CORINFO_FIELD_INSTANCE_ADDR_HELPER:
+ case CORINFO_FIELD_STATIC_ADDR_HELPER:
+ case CORINFO_FIELD_STATIC_TLS:
+
+ compInlineResult->NoteFatal(InlineObservation::CALLEE_STFLD_NEEDS_HELPER);
+ return;
+
+ case CORINFO_FIELD_STATIC_GENERICS_STATIC_HELPER:
+
+ /* We may be able to inline the field accessors in specific instantiations of generic
+ * methods */
+ compInlineResult->NoteFatal(InlineObservation::CALLSITE_STFLD_NEEDS_HELPER);
+ return;
+
+ default:
+ break;
+ }
+ }
+
+ impHandleAccessAllowed(fieldInfo.accessAllowed, &fieldInfo.accessCalloutHelper);
+
+ if (tiVerificationNeeded)
+ {
+ verVerifyField(&resolvedToken, fieldInfo, tiObj, TRUE);
+ typeInfo fieldType = verMakeTypeInfo(ciType, fieldClsHnd);
+ Verify(tiCompatibleWith(tiVal, fieldType.NormaliseForStack(), true), "type mismatch");
+ }
+ else
+ {
+ // tiVerificationNeed is false.
+ // Raise InvalidProgramException if static store accesses non-static field
+ if (isStoreStatic && ((fieldInfo.fieldFlags & CORINFO_FLG_FIELD_STATIC) == 0))
+ {
+ BADCODE("static access on an instance field");
+ }
+ }
+
+ // We are using stfld on a static field.
+ // We allow it, but need to eval any side-effects for obj
+ if ((fieldInfo.fieldFlags & CORINFO_FLG_FIELD_STATIC) && obj != nullptr)
+ {
+ if (obj->gtFlags & GTF_SIDE_EFFECT)
+ {
+ obj = gtUnusedValNode(obj);
+ impAppendTree(obj, (unsigned)CHECK_SPILL_ALL, impCurStmtOffs);
+ }
+ obj = nullptr;
+ }
+
+ /* Preserve 'small' int types */
+ if (lclTyp > TYP_INT)
+ {
+ lclTyp = genActualType(lclTyp);
+ }
+
+ switch (fieldInfo.fieldAccessor)
+ {
+ case CORINFO_FIELD_INSTANCE:
+#ifdef FEATURE_READYTORUN_COMPILER
+ case CORINFO_FIELD_INSTANCE_WITH_BASE:
+#endif
+ {
+ obj = impCheckForNullPointer(obj);
+
+ /* Create the data member node */
+ op1 = gtNewFieldRef(lclTyp, resolvedToken.hField, obj, fieldInfo.offset);
+ DWORD typeFlags = info.compCompHnd->getClassAttribs(resolvedToken.hClass);
+ if (StructHasOverlappingFields(typeFlags))
+ {
+ op1->gtField.gtFldMayOverlap = true;
+ }
+
+#ifdef FEATURE_READYTORUN_COMPILER
+ if (fieldInfo.fieldAccessor == CORINFO_FIELD_INSTANCE_WITH_BASE)
+ op1->gtField.gtFieldLookup = fieldInfo.fieldLookup;
+#endif
+
+ op1->gtFlags |= (obj->gtFlags & GTF_GLOB_EFFECT);
+
+ if (fgAddrCouldBeNull(obj))
+ {
+ op1->gtFlags |= GTF_EXCEPT;
+ }
+
+ // If gtFldObj is a BYREF then our target is a value class and
+ // it could point anywhere, example a boxed class static int
+ if (obj->gtType == TYP_BYREF)
+ {
+ op1->gtFlags |= GTF_IND_TGTANYWHERE;
+ }
+
+ if (compIsForInlining() &&
+ impInlineIsGuaranteedThisDerefBeforeAnySideEffects(op2, obj, impInlineInfo->inlArgInfo))
+ {
+ impInlineInfo->thisDereferencedFirst = true;
+ }
+ }
+ break;
+
+ case CORINFO_FIELD_STATIC_TLS:
+#ifdef _TARGET_X86_
+ // Legacy TLS access is implemented as intrinsic on x86 only
+
+ /* Create the data member node */
+ op1 = gtNewFieldRef(lclTyp, resolvedToken.hField, NULL, fieldInfo.offset);
+ op1->gtFlags |= GTF_IND_TLS_REF; // fgMorphField will handle the transformation
+
+ break;
+#else
+ fieldInfo.fieldAccessor = CORINFO_FIELD_STATIC_ADDR_HELPER;
+
+ __fallthrough;
+#endif
+
+ case CORINFO_FIELD_STATIC_ADDR_HELPER:
+ case CORINFO_FIELD_INSTANCE_HELPER:
+ case CORINFO_FIELD_INSTANCE_ADDR_HELPER:
+ op1 = gtNewRefCOMfield(obj, &resolvedToken, (CORINFO_ACCESS_FLAGS)aflags, &fieldInfo, lclTyp,
+ clsHnd, op2);
+ goto SPILL_APPEND;
+
+ case CORINFO_FIELD_STATIC_ADDRESS:
+ case CORINFO_FIELD_STATIC_RVA_ADDRESS:
+ case CORINFO_FIELD_STATIC_SHARED_STATIC_HELPER:
+ case CORINFO_FIELD_STATIC_GENERICS_STATIC_HELPER:
+ op1 = impImportStaticFieldAccess(&resolvedToken, (CORINFO_ACCESS_FLAGS)aflags, &fieldInfo,
+ lclTyp);
+ break;
+
+ default:
+ assert(!"Unexpected fieldAccessor");
+ }
+
+ // Create the member assignment, unless we have a struct.
+ // TODO-1stClassStructs: This could be limited to TYP_STRUCT, to avoid extra copies.
+ bool deferStructAssign = varTypeIsStruct(lclTyp);
+
+ if (!deferStructAssign)
+ {
+ if (prefixFlags & PREFIX_VOLATILE)
+ {
+ assert((op1->OperGet() == GT_FIELD) || (op1->OperGet() == GT_IND));
+ op1->gtFlags |= GTF_DONT_CSE; // Can't CSE a volatile
+ op1->gtFlags |= GTF_ORDER_SIDEEFF; // Prevent this from being reordered
+ op1->gtFlags |= GTF_IND_VOLATILE;
+ }
+ if (prefixFlags & PREFIX_UNALIGNED)
+ {
+ assert((op1->OperGet() == GT_FIELD) || (op1->OperGet() == GT_IND));
+ op1->gtFlags |= GTF_IND_UNALIGNED;
+ }
+
+ /* V4.0 allows assignment of i4 constant values to i8 type vars when IL verifier is bypassed (full
+ trust
+ apps). The reason this works is that JIT stores an i4 constant in Gentree union during
+ importation
+ and reads from the union as if it were a long during code generation. Though this can potentially
+ read garbage, one can get lucky to have this working correctly.
+
+ This code pattern is generated by Dev10 MC++ compiler while storing to fields when compiled with
+ /O2
+ switch (default when compiling retail configs in Dev10) and a customer app has taken a dependency
+ on
+ it. To be backward compatible, we will explicitly add an upward cast here so that it works
+ correctly
+ always.
+
+ Note that this is limited to x86 alone as thereis no back compat to be addressed for Arm JIT for
+ V4.0.
+ */
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef _TARGET_X86_
+ if (op1->TypeGet() != op2->TypeGet() && op2->OperIsConst() && varTypeIsIntOrI(op2->TypeGet()) &&
+ varTypeIsLong(op1->TypeGet()))
+ {
+ op2 = gtNewCastNode(op1->TypeGet(), op2, op1->TypeGet());
+ }
+#endif
+
+#ifdef _TARGET_64BIT_
+ // Automatic upcast for a GT_CNS_INT into TYP_I_IMPL
+ if ((op2->OperGet() == GT_CNS_INT) && varTypeIsI(lclTyp) && !varTypeIsI(op2->gtType))
+ {
+ op2->gtType = TYP_I_IMPL;
+ }
+ else
+ {
+ // Allow a downcast of op2 from TYP_I_IMPL into a 32-bit Int for x86 JIT compatiblity
+ //
+ if (varTypeIsI(op2->gtType) && (genActualType(lclTyp) == TYP_INT))
+ {
+ op2 = gtNewCastNode(TYP_INT, op2, TYP_INT);
+ }
+ // Allow an upcast of op2 from a 32-bit Int into TYP_I_IMPL for x86 JIT compatiblity
+ //
+ if (varTypeIsI(lclTyp) && (genActualType(op2->gtType) == TYP_INT))
+ {
+ op2 = gtNewCastNode(TYP_I_IMPL, op2, TYP_I_IMPL);
+ }
+ }
+#endif
+
+#if !FEATURE_X87_DOUBLES
+ // We can generate an assignment to a TYP_FLOAT from a TYP_DOUBLE
+ // We insert a cast to the dest 'op1' type
+ //
+ if ((op1->TypeGet() != op2->TypeGet()) && varTypeIsFloating(op1->gtType) &&
+ varTypeIsFloating(op2->gtType))
+ {
+ op2 = gtNewCastNode(op1->TypeGet(), op2, op1->TypeGet());
+ }
+#endif // !FEATURE_X87_DOUBLES
+
+ op1 = gtNewAssignNode(op1, op2);
+
+ /* Mark the expression as containing an assignment */
+
+ op1->gtFlags |= GTF_ASG;
+ }
+
+ /* Check if the class needs explicit initialization */
+
+ if (fieldInfo.fieldFlags & CORINFO_FLG_FIELD_INITCLASS)
+ {
+ GenTreePtr helperNode = impInitClass(&resolvedToken);
+ if (compDonotInline())
+ {
+ return;
+ }
+ if (helperNode != nullptr)
+ {
+ op1 = gtNewOperNode(GT_COMMA, op1->TypeGet(), helperNode, op1);
+ }
+ }
+
+ /* stfld can interfere with value classes (consider the sequence
+ ldloc, ldloca, ..., stfld, stloc). We will be conservative and
+ spill all value class references from the stack. */
+
+ if (obj && ((obj->gtType == TYP_BYREF) || (obj->gtType == TYP_I_IMPL)))
+ {
+ assert(tiObj);
+
+ if (impIsValueType(tiObj))
+ {
+ impSpillEvalStack();
+ }
+ else
+ {
+ impSpillValueClasses();
+ }
+ }
+
+ /* Spill any refs to the same member from the stack */
+
+ impSpillLclRefs((ssize_t)resolvedToken.hField);
+
+ /* stsfld also interferes with indirect accesses (for aliased
+ statics) and calls. But don't need to spill other statics
+ as we have explicitly spilled this particular static field. */
+
+ impSpillSideEffects(false, (unsigned)CHECK_SPILL_ALL DEBUGARG("spill side effects before STFLD"));
+
+ if (deferStructAssign)
+ {
+ op1 = impAssignStruct(op1, op2, clsHnd, (unsigned)CHECK_SPILL_ALL);
+ }
+ }
+ goto APPEND;
+
+ case CEE_NEWARR:
+ {
+
+ /* Get the class type index operand */
+
+ _impResolveToken(CORINFO_TOKENKIND_Newarr);
+
+ JITDUMP(" %08X", resolvedToken.token);
+
+ if (!opts.IsReadyToRun())
+ {
+ // Need to restore array classes before creating array objects on the heap
+ op1 = impTokenToHandle(&resolvedToken, nullptr, TRUE /*mustRestoreHandle*/);
+ if (op1 == nullptr)
+ { // compDonotInline()
+ return;
+ }
+ }
+
+ if (tiVerificationNeeded)
+ {
+ // As per ECMA 'numElems' specified can be either int32 or native int.
+ Verify(impStackTop().seTypeInfo.IsIntOrNativeIntType(), "bad bound");
+
+ CORINFO_CLASS_HANDLE elemTypeHnd;
+ info.compCompHnd->getChildType(resolvedToken.hClass, &elemTypeHnd);
+ Verify(elemTypeHnd == nullptr ||
+ !(info.compCompHnd->getClassAttribs(elemTypeHnd) & CORINFO_FLG_CONTAINS_STACK_PTR),
+ "array of byref-like type");
+ tiRetVal = verMakeTypeInfo(resolvedToken.hClass);
+ }
+
+ accessAllowedResult =
+ info.compCompHnd->canAccessClass(&resolvedToken, info.compMethodHnd, &calloutHelper);
+ impHandleAccessAllowed(accessAllowedResult, &calloutHelper);
+
+ /* Form the arglist: array class handle, size */
+ op2 = impPopStack().val;
+ assertImp(genActualTypeIsIntOrI(op2->gtType));
+
+#ifdef FEATURE_READYTORUN_COMPILER
+ if (opts.IsReadyToRun())
+ {
+ op1 = impReadyToRunHelperToTree(&resolvedToken, CORINFO_HELP_READYTORUN_NEWARR_1, TYP_REF,
+ gtNewArgList(op2));
+ usingReadyToRunHelper = (op1 != NULL);
+
+ if (!usingReadyToRunHelper)
+ {
+ // TODO: ReadyToRun: When generic dictionary lookups are necessary, replace the lookup call
+ // and the newarr call with a single call to a dynamic R2R cell that will:
+ // 1) Load the context
+ // 2) Perform the generic dictionary lookup and caching, and generate the appropriate stub
+ // 3) Allocate the new array
+ // Reason: performance (today, we'll always use the slow helper for the R2R generics case)
+
+ // Need to restore array classes before creating array objects on the heap
+ op1 = impTokenToHandle(&resolvedToken, NULL, TRUE /*mustRestoreHandle*/);
+ if (op1 == NULL) // compDonotInline()
+ return;
+ }
+ }
+
+ if (!usingReadyToRunHelper)
+#endif
+ {
+ args = gtNewArgList(op1, op2);
+
+ /* Create a call to 'new' */
+
+ // Note that this only works for shared generic code because the same helper is used for all
+ // reference array types
+ op1 =
+ gtNewHelperCallNode(info.compCompHnd->getNewArrHelper(resolvedToken.hClass), TYP_REF, 0, args);
+ }
+
+ op1->gtCall.compileTimeHelperArgumentHandle = (CORINFO_GENERIC_HANDLE)resolvedToken.hClass;
+
+ /* Remember that this basic block contains 'new' of an sd array */
+
+ block->bbFlags |= BBF_HAS_NEWARRAY;
+ optMethodFlags |= OMF_HAS_NEWARRAY;
+
+ /* Push the result of the call on the stack */
+
+ impPushOnStack(op1, tiRetVal);
+
+ callTyp = TYP_REF;
+ }
+ break;
+
+ case CEE_LOCALLOC:
+ assert(!compIsForInlining());
+
+ if (tiVerificationNeeded)
+ {
+ Verify(false, "bad opcode");
+ }
+
+ // We don't allow locallocs inside handlers
+ if (block->hasHndIndex())
+ {
+ BADCODE("Localloc can't be inside handler");
+ }
+
+ /* The FP register may not be back to the original value at the end
+ of the method, even if the frame size is 0, as localloc may
+ have modified it. So we will HAVE to reset it */
+
+ compLocallocUsed = true;
+ setNeedsGSSecurityCookie();
+
+ // Get the size to allocate
+
+ op2 = impPopStack().val;
+ assertImp(genActualTypeIsIntOrI(op2->gtType));
+
+ if (verCurrentState.esStackDepth != 0)
+ {
+ BADCODE("Localloc can only be used when the stack is empty");
+ }
+
+ op1 = gtNewOperNode(GT_LCLHEAP, TYP_I_IMPL, op2);
+
+ // May throw a stack overflow exception. Obviously, we don't want locallocs to be CSE'd.
+
+ op1->gtFlags |= (GTF_EXCEPT | GTF_DONT_CSE);
+
+ impPushOnStack(op1, tiRetVal);
+ break;
+
+ case CEE_ISINST:
+
+ /* Get the type token */
+ assertImp(sz == sizeof(unsigned));
+
+ _impResolveToken(CORINFO_TOKENKIND_Casting);
+
+ JITDUMP(" %08X", resolvedToken.token);
+
+ if (!opts.IsReadyToRun())
+ {
+ op2 = impTokenToHandle(&resolvedToken, nullptr, FALSE);
+ if (op2 == nullptr)
+ { // compDonotInline()
+ return;
+ }
+ }
+
+ if (tiVerificationNeeded)
+ {
+ Verify(impStackTop().seTypeInfo.IsObjRef(), "obj reference needed");
+ // Even if this is a value class, we know it is boxed.
+ tiRetVal = typeInfo(TI_REF, resolvedToken.hClass);
+ }
+ accessAllowedResult =
+ info.compCompHnd->canAccessClass(&resolvedToken, info.compMethodHnd, &calloutHelper);
+ impHandleAccessAllowed(accessAllowedResult, &calloutHelper);
+
+ op1 = impPopStack().val;
+
+#ifdef FEATURE_READYTORUN_COMPILER
+ if (opts.IsReadyToRun())
+ {
+ GenTreePtr opLookup =
+ impReadyToRunHelperToTree(&resolvedToken, CORINFO_HELP_READYTORUN_ISINSTANCEOF, TYP_REF,
+ gtNewArgList(op1));
+ usingReadyToRunHelper = (opLookup != NULL);
+ op1 = (usingReadyToRunHelper ? opLookup : op1);
+
+ if (!usingReadyToRunHelper)
+ {
+ // TODO: ReadyToRun: When generic dictionary lookups are necessary, replace the lookup call
+ // and the isinstanceof_any call with a single call to a dynamic R2R cell that will:
+ // 1) Load the context
+ // 2) Perform the generic dictionary lookup and caching, and generate the appropriate stub
+ // 3) Perform the 'is instance' check on the input object
+ // Reason: performance (today, we'll always use the slow helper for the R2R generics case)
+
+ op2 = impTokenToHandle(&resolvedToken, NULL, FALSE);
+ if (op2 == NULL) // compDonotInline()
+ return;
+ }
+ }
+
+ if (!usingReadyToRunHelper)
+#endif
+ {
+ op1 = impCastClassOrIsInstToTree(op1, op2, &resolvedToken, false);
+ }
+ if (compDonotInline())
+ {
+ return;
+ }
+
+ impPushOnStack(op1, tiRetVal);
+
+ break;
+
+ case CEE_REFANYVAL:
+
+ // get the class handle and make a ICON node out of it
+
+ _impResolveToken(CORINFO_TOKENKIND_Class);
+
+ JITDUMP(" %08X", resolvedToken.token);
+
+ op2 = impTokenToHandle(&resolvedToken);
+ if (op2 == nullptr)
+ { // compDonotInline()
+ return;
+ }
+
+ if (tiVerificationNeeded)
+ {
+ Verify(typeInfo::AreEquivalent(impStackTop().seTypeInfo, verMakeTypeInfo(impGetRefAnyClass())),
+ "need refany");
+ tiRetVal = verMakeTypeInfo(resolvedToken.hClass).MakeByRef();
+ }
+
+ op1 = impPopStack().val;
+ // make certain it is normalized;
+ op1 = impNormStructVal(op1, impGetRefAnyClass(), (unsigned)CHECK_SPILL_ALL);
+
+ // Call helper GETREFANY(classHandle, op1);
+ args = gtNewArgList(op2, op1);
+ op1 = gtNewHelperCallNode(CORINFO_HELP_GETREFANY, TYP_BYREF, 0, args);
+
+ impPushOnStack(op1, tiRetVal);
+ break;
+
+ case CEE_REFANYTYPE:
+
+ if (tiVerificationNeeded)
+ {
+ Verify(typeInfo::AreEquivalent(impStackTop().seTypeInfo, verMakeTypeInfo(impGetRefAnyClass())),
+ "need refany");
+ }
+
+ op1 = impPopStack().val;
+
+ // make certain it is normalized;
+ op1 = impNormStructVal(op1, impGetRefAnyClass(), (unsigned)CHECK_SPILL_ALL);
+
+ if (op1->gtOper == GT_OBJ)
+ {
+ // Get the address of the refany
+ op1 = op1->gtOp.gtOp1;
+
+ // Fetch the type from the correct slot
+ op1 = gtNewOperNode(GT_ADD, TYP_BYREF, op1,
+ gtNewIconNode(offsetof(CORINFO_RefAny, type), TYP_I_IMPL));
+ op1 = gtNewOperNode(GT_IND, TYP_BYREF, op1);
+ }
+ else
+ {
+ assertImp(op1->gtOper == GT_MKREFANY);
+
+ // The pointer may have side-effects
+ if (op1->gtOp.gtOp1->gtFlags & GTF_SIDE_EFFECT)
+ {
+ impAppendTree(op1->gtOp.gtOp1, (unsigned)CHECK_SPILL_ALL, impCurStmtOffs);
+#ifdef DEBUG
+ impNoteLastILoffs();
+#endif
+ }
+
+ // We already have the class handle
+ op1 = op1->gtOp.gtOp2;
+ }
+
+ // convert native TypeHandle to RuntimeTypeHandle
+ {
+ GenTreeArgList* helperArgs = gtNewArgList(op1);
+
+ op1 = gtNewHelperCallNode(CORINFO_HELP_TYPEHANDLE_TO_RUNTIMETYPE_MAYBENULL, TYP_STRUCT, GTF_EXCEPT,
+ helperArgs);
+
+ // The handle struct is returned in register
+ op1->gtCall.gtReturnType = TYP_REF;
+
+ tiRetVal = typeInfo(TI_STRUCT, impGetTypeHandleClass());
+ }
+
+ impPushOnStack(op1, tiRetVal);
+ break;
+
+ case CEE_LDTOKEN:
+ {
+ /* Get the Class index */
+ assertImp(sz == sizeof(unsigned));
+ lastLoadToken = codeAddr;
+ _impResolveToken(CORINFO_TOKENKIND_Ldtoken);
+
+ tokenType = info.compCompHnd->getTokenTypeAsHandle(&resolvedToken);
+
+ op1 = impTokenToHandle(&resolvedToken, nullptr, TRUE);
+ if (op1 == nullptr)
+ { // compDonotInline()
+ return;
+ }
+
+ helper = CORINFO_HELP_TYPEHANDLE_TO_RUNTIMETYPE;
+ assert(resolvedToken.hClass != nullptr);
+
+ if (resolvedToken.hMethod != nullptr)
+ {
+ helper = CORINFO_HELP_METHODDESC_TO_STUBRUNTIMEMETHOD;
+ }
+ else if (resolvedToken.hField != nullptr)
+ {
+ helper = CORINFO_HELP_FIELDDESC_TO_STUBRUNTIMEFIELD;
+ }
+
+ GenTreeArgList* helperArgs = gtNewArgList(op1);
+
+ op1 = gtNewHelperCallNode(helper, TYP_STRUCT, GTF_EXCEPT, helperArgs);
+
+ // The handle struct is returned in register
+ op1->gtCall.gtReturnType = TYP_REF;
+
+ tiRetVal = verMakeTypeInfo(tokenType);
+ impPushOnStack(op1, tiRetVal);
+ }
+ break;
+
+ case CEE_UNBOX:
+ case CEE_UNBOX_ANY:
+ {
+ /* Get the Class index */
+ assertImp(sz == sizeof(unsigned));
+
+ _impResolveToken(CORINFO_TOKENKIND_Class);
+
+ JITDUMP(" %08X", resolvedToken.token);
+
+ BOOL runtimeLookup;
+ op2 = impTokenToHandle(&resolvedToken, &runtimeLookup);
+ if (op2 == nullptr)
+ { // compDonotInline()
+ return;
+ }
+
+ // Run this always so we can get access exceptions even with SkipVerification.
+ accessAllowedResult =
+ info.compCompHnd->canAccessClass(&resolvedToken, info.compMethodHnd, &calloutHelper);
+ impHandleAccessAllowed(accessAllowedResult, &calloutHelper);
+
+ if (opcode == CEE_UNBOX_ANY && !eeIsValueClass(resolvedToken.hClass))
+ {
+ if (tiVerificationNeeded)
+ {
+ typeInfo tiUnbox = impStackTop().seTypeInfo;
+ Verify(tiUnbox.IsObjRef(), "bad unbox.any arg");
+ tiRetVal = verMakeTypeInfo(resolvedToken.hClass);
+ tiRetVal.NormaliseForStack();
+ }
+ op1 = impPopStack().val;
+ goto CASTCLASS;
+ }
+
+ /* Pop the object and create the unbox helper call */
+ /* You might think that for UNBOX_ANY we need to push a different */
+ /* (non-byref) type, but here we're making the tiRetVal that is used */
+ /* for the intermediate pointer which we then transfer onto the OBJ */
+ /* instruction. OBJ then creates the appropriate tiRetVal. */
+ if (tiVerificationNeeded)
+ {
+ typeInfo tiUnbox = impStackTop().seTypeInfo;
+ Verify(tiUnbox.IsObjRef(), "Bad unbox arg");
+
+ tiRetVal = verMakeTypeInfo(resolvedToken.hClass);
+ Verify(tiRetVal.IsValueClass(), "not value class");
+ tiRetVal.MakeByRef();
+
+ // We always come from an objref, so this is safe byref
+ tiRetVal.SetIsPermanentHomeByRef();
+ tiRetVal.SetIsReadonlyByRef();
+ }
+
+ op1 = impPopStack().val;
+ assertImp(op1->gtType == TYP_REF);
+
+ helper = info.compCompHnd->getUnBoxHelper(resolvedToken.hClass);
+ assert(helper == CORINFO_HELP_UNBOX || helper == CORINFO_HELP_UNBOX_NULLABLE);
+
+ // We only want to expand inline the normal UNBOX helper;
+ expandInline = (helper == CORINFO_HELP_UNBOX);
+
+ if (expandInline)
+ {
+ if (compCurBB->isRunRarely())
+ {
+ expandInline = false; // not worth the code expansion
+ }
+ }
+
+ if (expandInline)
+ {
+ // we are doing normal unboxing
+ // inline the common case of the unbox helper
+ // UNBOX(exp) morphs into
+ // clone = pop(exp);
+ // ((*clone == typeToken) ? nop : helper(clone, typeToken));
+ // push(clone + sizeof(void*))
+ //
+ GenTreePtr cloneOperand;
+ op1 = impCloneExpr(op1, &cloneOperand, NO_CLASS_HANDLE, (unsigned)CHECK_SPILL_ALL,
+ nullptr DEBUGARG("inline UNBOX clone1"));
+ op1 = gtNewOperNode(GT_IND, TYP_I_IMPL, op1);
+
+ GenTreePtr condBox = gtNewOperNode(GT_EQ, TYP_INT, op1, op2);
+
+ op1 = impCloneExpr(cloneOperand, &cloneOperand, NO_CLASS_HANDLE, (unsigned)CHECK_SPILL_ALL,
+ nullptr DEBUGARG("inline UNBOX clone2"));
+ op2 = impTokenToHandle(&resolvedToken);
+ if (op2 == nullptr)
+ { // compDonotInline()
+ return;
+ }
+ args = gtNewArgList(op2, op1);
+ op1 = gtNewHelperCallNode(helper, TYP_VOID, 0, args);
+
+ op1 = new (this, GT_COLON) GenTreeColon(TYP_VOID, gtNewNothingNode(), op1);
+ op1 = gtNewQmarkNode(TYP_VOID, condBox, op1);
+ condBox->gtFlags |= GTF_RELOP_QMARK;
+
+ // QMARK nodes cannot reside on the evaluation stack. Because there
+ // may be other trees on the evaluation stack that side-effect the
+ // sources of the UNBOX operation we must spill the stack.
+
+ impAppendTree(op1, (unsigned)CHECK_SPILL_ALL, impCurStmtOffs);
+
+ // Create the address-expression to reference past the object header
+ // to the beginning of the value-type. Today this means adjusting
+ // past the base of the objects vtable field which is pointer sized.
+
+ op2 = gtNewIconNode(sizeof(void*), TYP_I_IMPL);
+ op1 = gtNewOperNode(GT_ADD, TYP_BYREF, cloneOperand, op2);
+ }
+ else
+ {
+ unsigned callFlags = (helper == CORINFO_HELP_UNBOX) ? 0 : GTF_EXCEPT;
+
+ // Don't optimize, just call the helper and be done with it
+ args = gtNewArgList(op2, op1);
+ op1 = gtNewHelperCallNode(helper,
+ (var_types)((helper == CORINFO_HELP_UNBOX) ? TYP_BYREF : TYP_STRUCT),
+ callFlags, args);
+ }
+
+ assert(helper == CORINFO_HELP_UNBOX && op1->gtType == TYP_BYREF || // Unbox helper returns a byref.
+ helper == CORINFO_HELP_UNBOX_NULLABLE &&
+ varTypeIsStruct(op1) // UnboxNullable helper returns a struct.
+ );
+
+ /*
+ ----------------------------------------------------------------------
+ | \ helper | | |
+ | \ | | |
+ | \ | CORINFO_HELP_UNBOX | CORINFO_HELP_UNBOX_NULLABLE |
+ | \ | (which returns a BYREF) | (which returns a STRUCT) | |
+ | opcode \ | | |
+ |---------------------------------------------------------------------
+ | UNBOX | push the BYREF | spill the STRUCT to a local, |
+ | | | push the BYREF to this local |
+ |---------------------------------------------------------------------
+ | UNBOX_ANY | push a GT_OBJ of | push the STRUCT |
+ | | the BYREF | For Linux when the |
+ | | | struct is returned in two |
+ | | | registers create a temp |
+ | | | which address is passed to |
+ | | | the unbox_nullable helper. |
+ |---------------------------------------------------------------------
+ */
+
+ if (opcode == CEE_UNBOX)
+ {
+ if (helper == CORINFO_HELP_UNBOX_NULLABLE)
+ {
+ // Unbox nullable helper returns a struct type.
+ // We need to spill it to a temp so than can take the address of it.
+ // Here we need unsafe value cls check, since the address of struct is taken to be used
+ // further along and potetially be exploitable.
+
+ unsigned tmp = lvaGrabTemp(true DEBUGARG("UNBOXing a nullable"));
+ lvaSetStruct(tmp, resolvedToken.hClass, true /* unsafe value cls check */);
+
+ op2 = gtNewLclvNode(tmp, TYP_STRUCT);
+ op1 = impAssignStruct(op2, op1, resolvedToken.hClass, (unsigned)CHECK_SPILL_ALL);
+ assert(op1->gtType == TYP_VOID); // We must be assigning the return struct to the temp.
+
+ op2 = gtNewLclvNode(tmp, TYP_STRUCT);
+ op2 = gtNewOperNode(GT_ADDR, TYP_BYREF, op2);
+ op1 = gtNewOperNode(GT_COMMA, TYP_BYREF, op1, op2);
+ }
+
+ assert(op1->gtType == TYP_BYREF);
+ assert(!tiVerificationNeeded || tiRetVal.IsByRef());
+ }
+ else
+ {
+ assert(opcode == CEE_UNBOX_ANY);
+
+ if (helper == CORINFO_HELP_UNBOX)
+ {
+ // Normal unbox helper returns a TYP_BYREF.
+ impPushOnStack(op1, tiRetVal);
+ oper = GT_OBJ;
+ goto OBJ;
+ }
+
+ assert(helper == CORINFO_HELP_UNBOX_NULLABLE && "Make sure the helper is nullable!");
+
+#if FEATURE_MULTIREG_RET
+
+ if (varTypeIsStruct(op1) && IsMultiRegReturnedType(resolvedToken.hClass))
+ {
+ // Unbox nullable helper returns a TYP_STRUCT.
+ // For the multi-reg case we need to spill it to a temp so that
+ // we can pass the address to the unbox_nullable jit helper.
+
+ unsigned tmp = lvaGrabTemp(true DEBUGARG("UNBOXing a register returnable nullable"));
+ lvaTable[tmp].lvIsMultiRegArg = true;
+ lvaSetStruct(tmp, resolvedToken.hClass, true /* unsafe value cls check */);
+
+ op2 = gtNewLclvNode(tmp, TYP_STRUCT);
+ op1 = impAssignStruct(op2, op1, resolvedToken.hClass, (unsigned)CHECK_SPILL_ALL);
+ assert(op1->gtType == TYP_VOID); // We must be assigning the return struct to the temp.
+
+ op2 = gtNewLclvNode(tmp, TYP_STRUCT);
+ op2 = gtNewOperNode(GT_ADDR, TYP_BYREF, op2);
+ op1 = gtNewOperNode(GT_COMMA, TYP_BYREF, op1, op2);
+
+ // In this case the return value of the unbox helper is TYP_BYREF.
+ // Make sure the right type is placed on the operand type stack.
+ impPushOnStack(op1, tiRetVal);
+
+ // Load the struct.
+ oper = GT_OBJ;
+
+ assert(op1->gtType == TYP_BYREF);
+ assert(!tiVerificationNeeded || tiRetVal.IsByRef());
+
+ goto OBJ;
+ }
+ else
+
+#endif // !FEATURE_MULTIREG_RET
+
+ {
+ // If non register passable struct we have it materialized in the RetBuf.
+ assert(op1->gtType == TYP_STRUCT);
+ tiRetVal = verMakeTypeInfo(resolvedToken.hClass);
+ assert(tiRetVal.IsValueClass());
+ }
+ }
+
+ impPushOnStack(op1, tiRetVal);
+ }
+ break;
+
+ case CEE_BOX:
+ {
+ /* Get the Class index */
+ assertImp(sz == sizeof(unsigned));
+
+ _impResolveToken(CORINFO_TOKENKIND_Box);
+
+ JITDUMP(" %08X", resolvedToken.token);
+
+ if (tiVerificationNeeded)
+ {
+ typeInfo tiActual = impStackTop().seTypeInfo;
+ typeInfo tiBox = verMakeTypeInfo(resolvedToken.hClass);
+
+ Verify(verIsBoxable(tiBox), "boxable type expected");
+
+ // check the class constraints of the boxed type in case we are boxing an uninitialized value
+ Verify(info.compCompHnd->satisfiesClassConstraints(resolvedToken.hClass),
+ "boxed type has unsatisfied class constraints");
+
+ Verify(tiCompatibleWith(tiActual, tiBox.NormaliseForStack(), true), "type mismatch");
+
+ // Observation: the following code introduces a boxed value class on the stack, but,
+ // according to the ECMA spec, one would simply expect: tiRetVal =
+ // typeInfo(TI_REF,impGetObjectClass());
+
+ // Push the result back on the stack,
+ // even if clsHnd is a value class we want the TI_REF
+ // we call back to the EE to get find out what hte type we should push (for nullable<T> we push T)
+ tiRetVal = typeInfo(TI_REF, info.compCompHnd->getTypeForBox(resolvedToken.hClass));
+ }
+
+ accessAllowedResult =
+ info.compCompHnd->canAccessClass(&resolvedToken, info.compMethodHnd, &calloutHelper);
+ impHandleAccessAllowed(accessAllowedResult, &calloutHelper);
+
+ // Note BOX can be used on things that are not value classes, in which
+ // case we get a NOP. However the verifier's view of the type on the
+ // stack changes (in generic code a 'T' becomes a 'boxed T')
+ if (!eeIsValueClass(resolvedToken.hClass))
+ {
+ verCurrentState.esStack[verCurrentState.esStackDepth - 1].seTypeInfo = tiRetVal;
+ break;
+ }
+
+ // Look ahead for unbox.any
+ if (codeAddr + (sz + 1 + sizeof(mdToken)) <= codeEndp && codeAddr[sz] == CEE_UNBOX_ANY)
+ {
+ DWORD classAttribs = info.compCompHnd->getClassAttribs(resolvedToken.hClass);
+ if (!(classAttribs & CORINFO_FLG_SHAREDINST))
+ {
+ CORINFO_RESOLVED_TOKEN unboxResolvedToken;
+
+ impResolveToken(codeAddr + (sz + 1), &unboxResolvedToken, CORINFO_TOKENKIND_Class);
+
+ if (unboxResolvedToken.hClass == resolvedToken.hClass)
+ {
+ // Skip the next unbox.any instruction
+ sz += sizeof(mdToken) + 1;
+ break;
+ }
+ }
+ }
+
+ impImportAndPushBox(&resolvedToken);
+ if (compDonotInline())
+ {
+ return;
+ }
+ }
+ break;
+
+ case CEE_SIZEOF:
+
+ /* Get the Class index */
+ assertImp(sz == sizeof(unsigned));
+
+ _impResolveToken(CORINFO_TOKENKIND_Class);
+
+ JITDUMP(" %08X", resolvedToken.token);
+
+ if (tiVerificationNeeded)
+ {
+ tiRetVal = typeInfo(TI_INT);
+ }
+
+ op1 = gtNewIconNode(info.compCompHnd->getClassSize(resolvedToken.hClass));
+ impPushOnStack(op1, tiRetVal);
+ break;
+
+ case CEE_CASTCLASS:
+
+ /* Get the Class index */
+
+ assertImp(sz == sizeof(unsigned));
+
+ _impResolveToken(CORINFO_TOKENKIND_Casting);
+
+ JITDUMP(" %08X", resolvedToken.token);
+
+ if (!opts.IsReadyToRun())
+ {
+ op2 = impTokenToHandle(&resolvedToken, nullptr, FALSE);
+ if (op2 == nullptr)
+ { // compDonotInline()
+ return;
+ }
+ }
+
+ if (tiVerificationNeeded)
+ {
+ Verify(impStackTop().seTypeInfo.IsObjRef(), "object ref expected");
+ // box it
+ tiRetVal = typeInfo(TI_REF, resolvedToken.hClass);
+ }
+
+ accessAllowedResult =
+ info.compCompHnd->canAccessClass(&resolvedToken, info.compMethodHnd, &calloutHelper);
+ impHandleAccessAllowed(accessAllowedResult, &calloutHelper);
+
+ op1 = impPopStack().val;
+
+ /* Pop the address and create the 'checked cast' helper call */
+
+ // At this point we expect typeRef to contain the token, op1 to contain the value being cast,
+ // and op2 to contain code that creates the type handle corresponding to typeRef
+ CASTCLASS:
+
+#ifdef FEATURE_READYTORUN_COMPILER
+ if (opts.IsReadyToRun())
+ {
+ GenTreePtr opLookup = impReadyToRunHelperToTree(&resolvedToken, CORINFO_HELP_READYTORUN_CHKCAST,
+ TYP_REF, gtNewArgList(op1));
+ usingReadyToRunHelper = (opLookup != NULL);
+ op1 = (usingReadyToRunHelper ? opLookup : op1);
+
+ if (!usingReadyToRunHelper)
+ {
+ // TODO: ReadyToRun: When generic dictionary lookups are necessary, replace the lookup call
+ // and the chkcastany call with a single call to a dynamic R2R cell that will:
+ // 1) Load the context
+ // 2) Perform the generic dictionary lookup and caching, and generate the appropriate stub
+ // 3) Check the object on the stack for the type-cast
+ // Reason: performance (today, we'll always use the slow helper for the R2R generics case)
+
+ op2 = impTokenToHandle(&resolvedToken, NULL, FALSE);
+ if (op2 == NULL) // compDonotInline()
+ return;
+ }
+ }
+
+ if (!usingReadyToRunHelper)
+#endif
+ {
+ op1 = impCastClassOrIsInstToTree(op1, op2, &resolvedToken, true);
+ }
+ if (compDonotInline())
+ {
+ return;
+ }
+
+ /* Push the result back on the stack */
+ impPushOnStack(op1, tiRetVal);
+ break;
+
+ case CEE_THROW:
+
+ if (compIsForInlining())
+ {
+ // !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+ // TODO: Will this be too strict, given that we will inline many basic blocks?
+ // !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+ /* Do we have just the exception on the stack ?*/
+
+ if (verCurrentState.esStackDepth != 1)
+ {
+ /* if not, just don't inline the method */
+
+ compInlineResult->NoteFatal(InlineObservation::CALLEE_THROW_WITH_INVALID_STACK);
+ return;
+ }
+
+ /* Don't inline non-void conditionals that have a throw in one of the branches */
+
+ /* NOTE: If we do allow this, note that we can't simply do a
+ checkLiveness() to match the liveness at the end of the "then"
+ and "else" branches of the GT_COLON. The branch with the throw
+ will keep nothing live, so we should use the liveness at the
+ end of the non-throw branch. */
+
+ if (seenConditionalJump && (impInlineInfo->inlineCandidateInfo->fncRetType != TYP_VOID))
+ {
+ compInlineResult->NoteFatal(InlineObservation::CALLSITE_CONDITIONAL_THROW);
+ return;
+ }
+ }
+
+ if (tiVerificationNeeded)
+ {
+ tiRetVal = impStackTop().seTypeInfo;
+ Verify(tiRetVal.IsObjRef(), "object ref expected");
+ if (verTrackObjCtorInitState && (verCurrentState.thisInitialized != TIS_Init))
+ {
+ Verify(!tiRetVal.IsThisPtr(), "throw uninitialized this");
+ }
+ }
+
+ block->bbSetRunRarely(); // any block with a throw is rare
+ /* Pop the exception object and create the 'throw' helper call */
+
+ op1 = gtNewHelperCallNode(CORINFO_HELP_THROW, TYP_VOID, GTF_EXCEPT, gtNewArgList(impPopStack().val));
+
+ EVAL_APPEND:
+ if (verCurrentState.esStackDepth > 0)
+ {
+ impEvalSideEffects();
+ }
+
+ assert(verCurrentState.esStackDepth == 0);
+
+ goto APPEND;
+
+ case CEE_RETHROW:
+
+ assert(!compIsForInlining());
+
+ if (info.compXcptnsCount == 0)
+ {
+ BADCODE("rethrow outside catch");
+ }
+
+ if (tiVerificationNeeded)
+ {
+ Verify(block->hasHndIndex(), "rethrow outside catch");
+ if (block->hasHndIndex())
+ {
+ EHblkDsc* HBtab = ehGetDsc(block->getHndIndex());
+ Verify(!HBtab->HasFinallyOrFaultHandler(), "rethrow in finally or fault");
+ if (HBtab->HasFilter())
+ {
+ // we better be in the handler clause part, not the filter part
+ Verify(jitIsBetween(compCurBB->bbCodeOffs, HBtab->ebdHndBegOffs(), HBtab->ebdHndEndOffs()),
+ "rethrow in filter");
+ }
+ }
+ }
+
+ /* Create the 'rethrow' helper call */
+
+ op1 = gtNewHelperCallNode(CORINFO_HELP_RETHROW, TYP_VOID, GTF_EXCEPT);
+
+ goto EVAL_APPEND;
+
+ case CEE_INITOBJ:
+
+ assertImp(sz == sizeof(unsigned));
+
+ _impResolveToken(CORINFO_TOKENKIND_Class);
+
+ JITDUMP(" %08X", resolvedToken.token);
+
+ if (tiVerificationNeeded)
+ {
+ typeInfo tiTo = impStackTop().seTypeInfo;
+ typeInfo tiInstr = verMakeTypeInfo(resolvedToken.hClass);
+
+ Verify(tiTo.IsByRef(), "byref expected");
+ Verify(!tiTo.IsReadonlyByRef(), "write to readonly byref");
+
+ Verify(tiCompatibleWith(tiInstr, tiTo.DereferenceByRef(), false),
+ "type operand incompatible with type of address");
+ }
+
+ size = info.compCompHnd->getClassSize(resolvedToken.hClass); // Size
+ op2 = gtNewIconNode(0); // Value
+ op1 = impPopStack().val; // Dest
+ op1 = gtNewBlockVal(op1, size);
+ op1 = gtNewBlkOpNode(op1, op2, size, (prefixFlags & PREFIX_VOLATILE) != 0, false);
+ goto SPILL_APPEND;
+
+ case CEE_INITBLK:
+
+ if (tiVerificationNeeded)
+ {
+ Verify(false, "bad opcode");
+ }
+
+ op3 = impPopStack().val; // Size
+ op2 = impPopStack().val; // Value
+ op1 = impPopStack().val; // Dest
+
+ if (op3->IsCnsIntOrI())
+ {
+ size = (unsigned)op3->AsIntConCommon()->IconValue();
+ op1 = new (this, GT_BLK) GenTreeBlk(GT_BLK, TYP_STRUCT, op1, size);
+ }
+ else
+ {
+ op1 = new (this, GT_DYN_BLK) GenTreeDynBlk(op1, op3);
+ size = 0;
+ }
+ op1 = gtNewBlkOpNode(op1, op2, size, (prefixFlags & PREFIX_VOLATILE) != 0, false);
+
+ goto SPILL_APPEND;
+
+ case CEE_CPBLK:
+
+ if (tiVerificationNeeded)
+ {
+ Verify(false, "bad opcode");
+ }
+ op3 = impPopStack().val; // Size
+ op2 = impPopStack().val; // Src
+ op1 = impPopStack().val; // Dest
+
+ if (op3->IsCnsIntOrI())
+ {
+ size = (unsigned)op3->AsIntConCommon()->IconValue();
+ op1 = new (this, GT_BLK) GenTreeBlk(GT_BLK, TYP_STRUCT, op1, size);
+ }
+ else
+ {
+ op1 = new (this, GT_DYN_BLK) GenTreeDynBlk(op1, op3);
+ size = 0;
+ }
+ if (op2->OperGet() == GT_ADDR)
+ {
+ op2 = op2->gtOp.gtOp1;
+ }
+ else
+ {
+ op2 = gtNewOperNode(GT_IND, TYP_STRUCT, op2);
+ }
+
+ op1 = gtNewBlkOpNode(op1, op2, size, (prefixFlags & PREFIX_VOLATILE) != 0, true);
+ goto SPILL_APPEND;
+
+ case CEE_CPOBJ:
+
+ assertImp(sz == sizeof(unsigned));
+
+ _impResolveToken(CORINFO_TOKENKIND_Class);
+
+ JITDUMP(" %08X", resolvedToken.token);
+
+ if (tiVerificationNeeded)
+ {
+ typeInfo tiFrom = impStackTop().seTypeInfo;
+ typeInfo tiTo = impStackTop(1).seTypeInfo;
+ typeInfo tiInstr = verMakeTypeInfo(resolvedToken.hClass);
+
+ Verify(tiFrom.IsByRef(), "expected byref source");
+ Verify(tiTo.IsByRef(), "expected byref destination");
+
+ Verify(tiCompatibleWith(tiFrom.DereferenceByRef(), tiInstr, false),
+ "type of source address incompatible with type operand");
+ Verify(!tiTo.IsReadonlyByRef(), "write to readonly byref");
+ Verify(tiCompatibleWith(tiInstr, tiTo.DereferenceByRef(), false),
+ "type operand incompatible with type of destination address");
+ }
+
+ if (!eeIsValueClass(resolvedToken.hClass))
+ {
+ op1 = impPopStack().val; // address to load from
+
+ impBashVarAddrsToI(op1);
+
+ assertImp(genActualType(op1->gtType) == TYP_I_IMPL || op1->gtType == TYP_BYREF);
+
+ op1 = gtNewOperNode(GT_IND, TYP_REF, op1);
+ op1->gtFlags |= GTF_EXCEPT | GTF_GLOB_REF;
+
+ impPushOnStackNoType(op1);
+ opcode = CEE_STIND_REF;
+ lclTyp = TYP_REF;
+ goto STIND_POST_VERIFY;
+ }
+
+ op2 = impPopStack().val; // Src
+ op1 = impPopStack().val; // Dest
+ op1 = gtNewCpObjNode(op1, op2, resolvedToken.hClass, ((prefixFlags & PREFIX_VOLATILE) != 0));
+ goto SPILL_APPEND;
+
+ case CEE_STOBJ:
+ {
+ assertImp(sz == sizeof(unsigned));
+
+ _impResolveToken(CORINFO_TOKENKIND_Class);
+
+ JITDUMP(" %08X", resolvedToken.token);
+
+ if (eeIsValueClass(resolvedToken.hClass))
+ {
+ lclTyp = TYP_STRUCT;
+ }
+ else
+ {
+ lclTyp = TYP_REF;
+ }
+
+ if (tiVerificationNeeded)
+ {
+
+ typeInfo tiPtr = impStackTop(1).seTypeInfo;
+
+ // Make sure we have a good looking byref
+ Verify(tiPtr.IsByRef(), "pointer not byref");
+ Verify(!tiPtr.IsReadonlyByRef(), "write to readonly byref");
+ if (!tiPtr.IsByRef() || tiPtr.IsReadonlyByRef())
+ {
+ compUnsafeCastUsed = true;
+ }
+
+ typeInfo ptrVal = DereferenceByRef(tiPtr);
+ typeInfo argVal = verMakeTypeInfo(resolvedToken.hClass);
+
+ if (!tiCompatibleWith(impStackTop(0).seTypeInfo, NormaliseForStack(argVal), true))
+ {
+ Verify(false, "type of value incompatible with type operand");
+ compUnsafeCastUsed = true;
+ }
+
+ if (!tiCompatibleWith(argVal, ptrVal, false))
+ {
+ Verify(false, "type operand incompatible with type of address");
+ compUnsafeCastUsed = true;
+ }
+ }
+ else
+ {
+ compUnsafeCastUsed = true;
+ }
+
+ if (lclTyp == TYP_REF)
+ {
+ opcode = CEE_STIND_REF;
+ goto STIND_POST_VERIFY;
+ }
+
+ CorInfoType jitTyp = info.compCompHnd->asCorInfoType(resolvedToken.hClass);
+ if (impIsPrimitive(jitTyp))
+ {
+ lclTyp = JITtype2varType(jitTyp);
+ goto STIND_POST_VERIFY;
+ }
+
+ op2 = impPopStack().val; // Value
+ op1 = impPopStack().val; // Ptr
+
+ assertImp(varTypeIsStruct(op2));
+
+ op1 = impAssignStructPtr(op1, op2, resolvedToken.hClass, (unsigned)CHECK_SPILL_ALL);
+ goto SPILL_APPEND;
+ }
+
+ case CEE_MKREFANY:
+
+ assert(!compIsForInlining());
+
+ // Being lazy here. Refanys are tricky in terms of gc tracking.
+ // Since it is uncommon, just don't perform struct promotion in any method that contains mkrefany.
+
+ JITDUMP("disabling struct promotion because of mkrefany\n");
+ fgNoStructPromotion = true;
+
+ oper = GT_MKREFANY;
+ assertImp(sz == sizeof(unsigned));
+
+ _impResolveToken(CORINFO_TOKENKIND_Class);
+
+ JITDUMP(" %08X", resolvedToken.token);
+
+ op2 = impTokenToHandle(&resolvedToken, nullptr, TRUE);
+ if (op2 == nullptr)
+ { // compDonotInline()
+ return;
+ }
+
+ if (tiVerificationNeeded)
+ {
+ typeInfo tiPtr = impStackTop().seTypeInfo;
+ typeInfo tiInstr = verMakeTypeInfo(resolvedToken.hClass);
+
+ Verify(!verIsByRefLike(tiInstr), "mkrefany of byref-like class");
+ Verify(!tiPtr.IsReadonlyByRef(), "readonly byref used with mkrefany");
+ Verify(typeInfo::AreEquivalent(tiPtr.DereferenceByRef(), tiInstr), "type mismatch");
+ }
+
+ accessAllowedResult =
+ info.compCompHnd->canAccessClass(&resolvedToken, info.compMethodHnd, &calloutHelper);
+ impHandleAccessAllowed(accessAllowedResult, &calloutHelper);
+
+ op1 = impPopStack().val;
+
+ // @SPECVIOLATION: TYP_INT should not be allowed here by a strict reading of the spec.
+ // But JIT32 allowed it, so we continue to allow it.
+ assertImp(op1->TypeGet() == TYP_BYREF || op1->TypeGet() == TYP_I_IMPL || op1->TypeGet() == TYP_INT);
+
+ // MKREFANY returns a struct. op2 is the class token.
+ op1 = gtNewOperNode(oper, TYP_STRUCT, op1, op2);
+
+ impPushOnStack(op1, verMakeTypeInfo(impGetRefAnyClass()));
+ break;
+
+ case CEE_LDOBJ:
+ {
+ oper = GT_OBJ;
+ assertImp(sz == sizeof(unsigned));
+
+ _impResolveToken(CORINFO_TOKENKIND_Class);
+
+ JITDUMP(" %08X", resolvedToken.token);
+
+ OBJ:
+
+ tiRetVal = verMakeTypeInfo(resolvedToken.hClass);
+
+ if (tiVerificationNeeded)
+ {
+ typeInfo tiPtr = impStackTop().seTypeInfo;
+
+ // Make sure we have a byref
+ if (!tiPtr.IsByRef())
+ {
+ Verify(false, "pointer not byref");
+ compUnsafeCastUsed = true;
+ }
+ typeInfo tiPtrVal = DereferenceByRef(tiPtr);
+
+ if (!tiCompatibleWith(tiPtrVal, tiRetVal, false))
+ {
+ Verify(false, "type of address incompatible with type operand");
+ compUnsafeCastUsed = true;
+ }
+ tiRetVal.NormaliseForStack();
+ }
+ else
+ {
+ compUnsafeCastUsed = true;
+ }
+
+ if (eeIsValueClass(resolvedToken.hClass))
+ {
+ lclTyp = TYP_STRUCT;
+ }
+ else
+ {
+ lclTyp = TYP_REF;
+ opcode = CEE_LDIND_REF;
+ goto LDIND_POST_VERIFY;
+ }
+
+ op1 = impPopStack().val;
+
+ assertImp(op1->TypeGet() == TYP_BYREF || op1->TypeGet() == TYP_I_IMPL);
+
+ CorInfoType jitTyp = info.compCompHnd->asCorInfoType(resolvedToken.hClass);
+ if (impIsPrimitive(jitTyp))
+ {
+ op1 = gtNewOperNode(GT_IND, JITtype2varType(jitTyp), op1);
+
+ // Could point anywhere, example a boxed class static int
+ op1->gtFlags |= GTF_IND_TGTANYWHERE | GTF_GLOB_REF;
+ assertImp(varTypeIsArithmetic(op1->gtType));
+ }
+ else
+ {
+ // OBJ returns a struct
+ // and an inline argument which is the class token of the loaded obj
+ op1 = gtNewObjNode(resolvedToken.hClass, op1);
+ }
+ op1->gtFlags |= GTF_EXCEPT;
+
+ impPushOnStack(op1, tiRetVal);
+ break;
+ }
+
+ case CEE_LDLEN:
+ if (tiVerificationNeeded)
+ {
+ typeInfo tiArray = impStackTop().seTypeInfo;
+ Verify(verIsSDArray(tiArray), "bad array");
+ tiRetVal = typeInfo(TI_INT);
+ }
+
+ op1 = impPopStack().val;
+ if (!opts.MinOpts() && !opts.compDbgCode)
+ {
+ /* Use GT_ARR_LENGTH operator so rng check opts see this */
+ GenTreeArrLen* arrLen =
+ new (this, GT_ARR_LENGTH) GenTreeArrLen(TYP_INT, op1, offsetof(CORINFO_Array, length));
+
+ /* Mark the block as containing a length expression */
+
+ if (op1->gtOper == GT_LCL_VAR)
+ {
+ block->bbFlags |= BBF_HAS_IDX_LEN;
+ }
+
+ op1 = arrLen;
+ }
+ else
+ {
+ /* Create the expression "*(array_addr + ArrLenOffs)" */
+ op1 = gtNewOperNode(GT_ADD, TYP_BYREF, op1,
+ gtNewIconNode(offsetof(CORINFO_Array, length), TYP_I_IMPL));
+ op1 = gtNewOperNode(GT_IND, TYP_INT, op1);
+ op1->gtFlags |= GTF_IND_ARR_LEN;
+ }
+
+ /* An indirection will cause a GPF if the address is null */
+ op1->gtFlags |= GTF_EXCEPT;
+
+ /* Push the result back on the stack */
+ impPushOnStack(op1, tiRetVal);
+ break;
+
+ case CEE_BREAK:
+ op1 = gtNewHelperCallNode(CORINFO_HELP_USER_BREAKPOINT, TYP_VOID);
+ goto SPILL_APPEND;
+
+ case CEE_NOP:
+ if (opts.compDbgCode)
+ {
+ op1 = new (this, GT_NO_OP) GenTree(GT_NO_OP, TYP_VOID);
+ goto SPILL_APPEND;
+ }
+ break;
+
+ /******************************** NYI *******************************/
+
+ case 0xCC:
+ OutputDebugStringA("CLR: Invalid x86 breakpoint in IL stream\n");
+
+ case CEE_ILLEGAL:
+ case CEE_MACRO_END:
+
+ default:
+ BADCODE3("unknown opcode", ": %02X", (int)opcode);
+ }
+
+ codeAddr += sz;
+ prevOpcode = opcode;
+
+ prefixFlags = 0;
+ assert(!insertLdloc || opcode == CEE_DUP);
+ }
+
+ assert(!insertLdloc);
+
+ return;
+#undef _impResolveToken
+}
+#ifdef _PREFAST_
+#pragma warning(pop)
+#endif
+
+// Push a local/argument treeon the operand stack
+void Compiler::impPushVar(GenTree* op, typeInfo tiRetVal)
+{
+ tiRetVal.NormaliseForStack();
+
+ if (verTrackObjCtorInitState && (verCurrentState.thisInitialized != TIS_Init) && tiRetVal.IsThisPtr())
+ {
+ tiRetVal.SetUninitialisedObjRef();
+ }
+
+ impPushOnStack(op, tiRetVal);
+}
+
+// Load a local/argument on the operand stack
+// lclNum is an index into lvaTable *NOT* the arg/lcl index in the IL
+void Compiler::impLoadVar(unsigned lclNum, IL_OFFSET offset, typeInfo tiRetVal)
+{
+ var_types lclTyp;
+
+ if (lvaTable[lclNum].lvNormalizeOnLoad())
+ {
+ lclTyp = lvaGetRealType(lclNum);
+ }
+ else
+ {
+ lclTyp = lvaGetActualType(lclNum);
+ }
+
+ impPushVar(gtNewLclvNode(lclNum, lclTyp, offset), tiRetVal);
+}
+
+// Load an argument on the operand stack
+// Shared by the various CEE_LDARG opcodes
+// ilArgNum is the argument index as specified in IL.
+// It will be mapped to the correct lvaTable index
+void Compiler::impLoadArg(unsigned ilArgNum, IL_OFFSET offset)
+{
+ Verify(ilArgNum < info.compILargsCount, "bad arg num");
+
+ if (compIsForInlining())
+ {
+ if (ilArgNum >= info.compArgsCount)
+ {
+ compInlineResult->NoteFatal(InlineObservation::CALLEE_BAD_ARGUMENT_NUMBER);
+ return;
+ }
+
+ impPushVar(impInlineFetchArg(ilArgNum, impInlineInfo->inlArgInfo, impInlineInfo->lclVarInfo),
+ impInlineInfo->lclVarInfo[ilArgNum].lclVerTypeInfo);
+ }
+ else
+ {
+ if (ilArgNum >= info.compArgsCount)
+ {
+ BADCODE("Bad IL");
+ }
+
+ unsigned lclNum = compMapILargNum(ilArgNum); // account for possible hidden param
+
+ if (lclNum == info.compThisArg)
+ {
+ lclNum = lvaArg0Var;
+ }
+
+ impLoadVar(lclNum, offset);
+ }
+}
+
+// Load a local on the operand stack
+// Shared by the various CEE_LDLOC opcodes
+// ilLclNum is the local index as specified in IL.
+// It will be mapped to the correct lvaTable index
+void Compiler::impLoadLoc(unsigned ilLclNum, IL_OFFSET offset)
+{
+ if (tiVerificationNeeded)
+ {
+ Verify(ilLclNum < info.compMethodInfo->locals.numArgs, "bad loc num");
+ Verify(info.compInitMem, "initLocals not set");
+ }
+
+ if (compIsForInlining())
+ {
+ if (ilLclNum >= info.compMethodInfo->locals.numArgs)
+ {
+ compInlineResult->NoteFatal(InlineObservation::CALLEE_BAD_LOCAL_NUMBER);
+ return;
+ }
+
+ // Get the local type
+ var_types lclTyp = impInlineInfo->lclVarInfo[ilLclNum + impInlineInfo->argCnt].lclTypeInfo;
+
+ typeInfo tiRetVal = impInlineInfo->lclVarInfo[ilLclNum + impInlineInfo->argCnt].lclVerTypeInfo;
+
+ /* Have we allocated a temp for this local? */
+
+ unsigned lclNum = impInlineFetchLocal(ilLclNum DEBUGARG("Inline ldloc first use temp"));
+
+ // All vars of inlined methods should be !lvNormalizeOnLoad()
+
+ assert(!lvaTable[lclNum].lvNormalizeOnLoad());
+ lclTyp = genActualType(lclTyp);
+
+ impPushVar(gtNewLclvNode(lclNum, lclTyp), tiRetVal);
+ }
+ else
+ {
+ if (ilLclNum >= info.compMethodInfo->locals.numArgs)
+ {
+ BADCODE("Bad IL");
+ }
+
+ unsigned lclNum = info.compArgsCount + ilLclNum;
+
+ impLoadVar(lclNum, offset);
+ }
+}
+
+#ifdef _TARGET_ARM_
+/**************************************************************************************
+ *
+ * When assigning a vararg call src to a HFA lcl dest, mark that we cannot promote the
+ * dst struct, because struct promotion will turn it into a float/double variable while
+ * the rhs will be an int/long variable. We don't code generate assignment of int into
+ * a float, but there is nothing that might prevent us from doing so. The tree however
+ * would like: (=, (typ_float, typ_int)) or (GT_TRANSFER, (typ_float, typ_int))
+ *
+ * tmpNum - the lcl dst variable num that is a struct.
+ * src - the src tree assigned to the dest that is a struct/int (when varargs call.)
+ * hClass - the type handle for the struct variable.
+ *
+ * TODO-ARM-CQ: [301608] This is a rare scenario with varargs and struct promotion coming into play,
+ * however, we could do a codegen of transferring from int to float registers
+ * (transfer, not a cast.)
+ *
+ */
+void Compiler::impMarkLclDstNotPromotable(unsigned tmpNum, GenTreePtr src, CORINFO_CLASS_HANDLE hClass)
+{
+ if (src->gtOper == GT_CALL && src->gtCall.IsVarargs() && IsHfa(hClass))
+ {
+ int hfaSlots = GetHfaCount(hClass);
+ var_types hfaType = GetHfaType(hClass);
+
+ // If we have varargs we morph the method's return type to be "int" irrespective of its original
+ // type: struct/float at importer because the ABI calls out return in integer registers.
+ // We don't want struct promotion to replace an expression like this:
+ // lclFld_int = callvar_int() into lclFld_float = callvar_int();
+ // This means an int is getting assigned to a float without a cast. Prevent the promotion.
+ if ((hfaType == TYP_DOUBLE && hfaSlots == sizeof(double) / REGSIZE_BYTES) ||
+ (hfaType == TYP_FLOAT && hfaSlots == sizeof(float) / REGSIZE_BYTES))
+ {
+ // Make sure this struct type stays as struct so we can receive the call in a struct.
+ lvaTable[tmpNum].lvIsMultiRegRet = true;
+ }
+ }
+}
+#endif // _TARGET_ARM_
+
+#if FEATURE_MULTIREG_RET
+GenTreePtr Compiler::impAssignMultiRegTypeToVar(GenTreePtr op, CORINFO_CLASS_HANDLE hClass)
+{
+ unsigned tmpNum = lvaGrabTemp(true DEBUGARG("Return value temp for multireg return."));
+ impAssignTempGen(tmpNum, op, hClass, (unsigned)CHECK_SPILL_NONE);
+ GenTreePtr ret = gtNewLclvNode(tmpNum, op->gtType);
+ assert(IsMultiRegReturnedType(hClass));
+
+ // Mark the var so that fields are not promoted and stay together.
+ lvaTable[tmpNum].lvIsMultiRegRet = true;
+
+ return ret;
+}
+#endif // FEATURE_MULTIREG_RET
+
+// do import for a return
+// returns false if inlining was aborted
+// opcode can be ret or call in the case of a tail.call
+bool Compiler::impReturnInstruction(BasicBlock* block, int prefixFlags, OPCODE& opcode)
+{
+ if (tiVerificationNeeded)
+ {
+ verVerifyThisPtrInitialised();
+
+ unsigned expectedStack = 0;
+ if (info.compRetType != TYP_VOID)
+ {
+ typeInfo tiVal = impStackTop().seTypeInfo;
+ typeInfo tiDeclared =
+ verMakeTypeInfo(info.compMethodInfo->args.retType, info.compMethodInfo->args.retTypeClass);
+
+ Verify(!verIsByRefLike(tiDeclared) || verIsSafeToReturnByRef(tiVal), "byref return");
+
+ Verify(tiCompatibleWith(tiVal, tiDeclared.NormaliseForStack(), true), "type mismatch");
+ expectedStack = 1;
+ }
+ Verify(verCurrentState.esStackDepth == expectedStack, "stack non-empty on return");
+ }
+
+ GenTree* op2 = nullptr;
+ GenTree* op1 = nullptr;
+ CORINFO_CLASS_HANDLE retClsHnd = nullptr;
+
+ if (info.compRetType != TYP_VOID)
+ {
+ StackEntry se = impPopStack(retClsHnd);
+ op2 = se.val;
+
+ if (!compIsForInlining())
+ {
+ impBashVarAddrsToI(op2);
+ op2 = impImplicitIorI4Cast(op2, info.compRetType);
+ op2 = impImplicitR4orR8Cast(op2, info.compRetType);
+ assertImp((genActualType(op2->TypeGet()) == genActualType(info.compRetType)) ||
+ ((op2->TypeGet() == TYP_I_IMPL) && (info.compRetType == TYP_BYREF)) ||
+ ((op2->TypeGet() == TYP_BYREF) && (info.compRetType == TYP_I_IMPL)) ||
+ (varTypeIsFloating(op2->gtType) && varTypeIsFloating(info.compRetType)) ||
+ (varTypeIsStruct(op2) && varTypeIsStruct(info.compRetType)));
+
+#ifdef DEBUG
+ if (opts.compGcChecks && info.compRetType == TYP_REF)
+ {
+ // DDB 3483 : JIT Stress: early termination of GC ref's life time in exception code path
+ // VSW 440513: Incorrect gcinfo on the return value under COMPlus_JitGCChecks=1 for methods with
+ // one-return BB.
+
+ assert(op2->gtType == TYP_REF);
+
+ // confirm that the argument is a GC pointer (for debugging (GC stress))
+ GenTreeArgList* args = gtNewArgList(op2);
+ op2 = gtNewHelperCallNode(CORINFO_HELP_CHECK_OBJ, TYP_REF, 0, args);
+
+ if (verbose)
+ {
+ printf("\ncompGcChecks tree:\n");
+ gtDispTree(op2);
+ }
+ }
+#endif
+ }
+ else
+ {
+ // inlinee's stack should be empty now.
+ assert(verCurrentState.esStackDepth == 0);
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\n\n Inlinee Return expression (before normalization) =>\n");
+ gtDispTree(op2);
+ }
+#endif
+
+ // Make sure the type matches the original call.
+
+ var_types returnType = genActualType(op2->gtType);
+ var_types originalCallType = impInlineInfo->inlineCandidateInfo->fncRetType;
+ if ((returnType != originalCallType) && (originalCallType == TYP_STRUCT))
+ {
+ originalCallType = impNormStructType(impInlineInfo->inlineCandidateInfo->methInfo.args.retTypeClass);
+ }
+
+ if (returnType != originalCallType)
+ {
+ compInlineResult->NoteFatal(InlineObservation::CALLSITE_RETURN_TYPE_MISMATCH);
+ return false;
+ }
+
+ // Below, we are going to set impInlineInfo->retExpr to the tree with the return
+ // expression. At this point, retExpr could already be set if there are multiple
+ // return blocks (meaning lvaInlineeReturnSpillTemp != BAD_VAR_NUM) and one of
+ // the other blocks already set it. If there is only a single return block,
+ // retExpr shouldn't be set. However, this is not true if we reimport a block
+ // with a return. In that case, retExpr will be set, then the block will be
+ // reimported, but retExpr won't get cleared as part of setting the block to
+ // be reimported. The reimported retExpr value should be the same, so even if
+ // we don't unconditionally overwrite it, it shouldn't matter.
+ if (info.compRetNativeType != TYP_STRUCT)
+ {
+ // compRetNativeType is not TYP_STRUCT.
+ // This implies it could be either a scalar type or SIMD vector type or
+ // a struct type that can be normalized to a scalar type.
+
+ if (varTypeIsStruct(info.compRetType))
+ {
+ noway_assert(info.compRetBuffArg == BAD_VAR_NUM);
+ // adjust the type away from struct to integral
+ // and no normalizing
+ op2 = impFixupStructReturnType(op2, retClsHnd);
+ }
+ else
+ {
+ // Do we have to normalize?
+ var_types fncRealRetType = JITtype2varType(info.compMethodInfo->args.retType);
+ if ((varTypeIsSmall(op2->TypeGet()) || varTypeIsSmall(fncRealRetType)) &&
+ fgCastNeeded(op2, fncRealRetType))
+ {
+ // Small-typed return values are normalized by the callee
+ op2 = gtNewCastNode(TYP_INT, op2, fncRealRetType);
+ }
+ }
+
+ if (lvaInlineeReturnSpillTemp != BAD_VAR_NUM)
+ {
+ assert(info.compRetNativeType != TYP_VOID && fgMoreThanOneReturnBlock());
+
+ // This is a bit of a workaround...
+ // If we are inlining a call that returns a struct, where the actual "native" return type is
+ // not a struct (for example, the struct is composed of exactly one int, and the native
+ // return type is thus an int), and the inlinee has multiple return blocks (thus,
+ // lvaInlineeReturnSpillTemp is != BAD_VAR_NUM, and is the index of a local var that is set
+ // to the *native* return type), and at least one of the return blocks is the result of
+ // a call, then we have a problem. The situation is like this (from a failed test case):
+ //
+ // inliner:
+ // // Note: valuetype plinq_devtests.LazyTests/LIX is a struct with only a single int
+ // call !!0 [mscorlib]System.Threading.LazyInitializer::EnsureInitialized<valuetype
+ // plinq_devtests.LazyTests/LIX>(!!0&, bool&, object&, class [mscorlib]System.Func`1<!!0>)
+ //
+ // inlinee:
+ // ...
+ // ldobj !!T // this gets bashed to a GT_LCL_FLD, type TYP_INT
+ // ret
+ // ...
+ // call !!0 System.Threading.LazyInitializer::EnsureInitializedCore<!!0>(!!0&, bool&,
+ // object&, class System.Func`1<!!0>)
+ // ret
+ //
+ // In the code above, when we call impFixupStructReturnType(), we will change the op2 return type
+ // of the inlinee return node, but we don't do that for GT_CALL nodes, which we delay until
+ // morphing when we call fgFixupStructReturn(). We do this, apparently, to handle nested
+ // inlining properly by leaving the correct type on the GT_CALL node through importing.
+ //
+ // To fix this, for this case, we temporarily change the GT_CALL node type to the
+ // native return type, which is what it will be set to eventually. We generate the
+ // assignment to the return temp, using the correct type, and then restore the GT_CALL
+ // node type. During morphing, the GT_CALL will get the correct, final, native return type.
+
+ bool restoreType = false;
+ if ((op2->OperGet() == GT_CALL) && (info.compRetType == TYP_STRUCT))
+ {
+ noway_assert(op2->TypeGet() == TYP_STRUCT);
+ op2->gtType = info.compRetNativeType;
+ restoreType = true;
+ }
+
+ impAssignTempGen(lvaInlineeReturnSpillTemp, op2, se.seTypeInfo.GetClassHandle(),
+ (unsigned)CHECK_SPILL_ALL);
+
+ GenTreePtr tmpOp2 = gtNewLclvNode(lvaInlineeReturnSpillTemp, op2->TypeGet());
+
+ if (restoreType)
+ {
+ op2->gtType = TYP_STRUCT; // restore it to what it was
+ }
+
+ op2 = tmpOp2;
+
+#ifdef DEBUG
+ if (impInlineInfo->retExpr)
+ {
+ // Some other block(s) have seen the CEE_RET first.
+ // Better they spilled to the same temp.
+ assert(impInlineInfo->retExpr->gtOper == GT_LCL_VAR);
+ assert(impInlineInfo->retExpr->gtLclVarCommon.gtLclNum == op2->gtLclVarCommon.gtLclNum);
+ }
+#endif
+ }
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\n\n Inlinee Return expression (after normalization) =>\n");
+ gtDispTree(op2);
+ }
+#endif
+
+ // Report the return expression
+ impInlineInfo->retExpr = op2;
+ }
+ else
+ {
+ // compRetNativeType is TYP_STRUCT.
+ // This implies that struct return via RetBuf arg or multi-reg struct return
+
+ GenTreePtr iciCall = impInlineInfo->iciCall;
+ assert(iciCall->gtOper == GT_CALL);
+
+ // Assign the inlinee return into a spill temp.
+ // spill temp only exists if there are multiple return points
+ if (lvaInlineeReturnSpillTemp != BAD_VAR_NUM)
+ {
+ // in this case we have to insert multiple struct copies to the temp
+ // and the retexpr is just the temp.
+ assert(info.compRetNativeType != TYP_VOID);
+ assert(fgMoreThanOneReturnBlock());
+
+ impAssignTempGen(lvaInlineeReturnSpillTemp, op2, se.seTypeInfo.GetClassHandle(),
+ (unsigned)CHECK_SPILL_ALL);
+ }
+
+#if defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+#if defined(_TARGET_ARM_)
+ // TODO-ARM64-NYI: HFA
+ // TODO-AMD64-Unix and TODO-ARM once the ARM64 functionality is implemented the
+ // next ifdefs could be refactored in a single method with the ifdef inside.
+ if (IsHfa(retClsHnd))
+ {
+// Same as !IsHfa but just don't bother with impAssignStructPtr.
+#else // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ ReturnTypeDesc retTypeDesc;
+ retTypeDesc.InitializeStructReturnType(this, retClsHnd);
+ unsigned retRegCount = retTypeDesc.GetReturnRegCount();
+
+ if (retRegCount != 0)
+ {
+ // If single eightbyte, the return type would have been normalized and there won't be a temp var.
+ // This code will be called only if the struct return has not been normalized (i.e. 2 eightbytes -
+ // max allowed.)
+ assert(retRegCount == MAX_RET_REG_COUNT);
+ // Same as !structDesc.passedInRegisters but just don't bother with impAssignStructPtr.
+ CLANG_FORMAT_COMMENT_ANCHOR;
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+
+ if (lvaInlineeReturnSpillTemp != BAD_VAR_NUM)
+ {
+ if (!impInlineInfo->retExpr)
+ {
+#if defined(_TARGET_ARM_)
+ impInlineInfo->retExpr = gtNewLclvNode(lvaInlineeReturnSpillTemp, info.compRetType);
+#else // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ // The inlinee compiler has figured out the type of the temp already. Use it here.
+ impInlineInfo->retExpr =
+ gtNewLclvNode(lvaInlineeReturnSpillTemp, lvaTable[lvaInlineeReturnSpillTemp].lvType);
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ }
+ }
+ else
+ {
+ impInlineInfo->retExpr = op2;
+ }
+ }
+ else
+#elif defined(_TARGET_ARM64_)
+ ReturnTypeDesc retTypeDesc;
+ retTypeDesc.InitializeStructReturnType(this, retClsHnd);
+ unsigned retRegCount = retTypeDesc.GetReturnRegCount();
+
+ if (retRegCount != 0)
+ {
+ assert(!iciCall->AsCall()->HasRetBufArg());
+ assert(retRegCount >= 2);
+ if (lvaInlineeReturnSpillTemp != BAD_VAR_NUM)
+ {
+ if (!impInlineInfo->retExpr)
+ {
+ // The inlinee compiler has figured out the type of the temp already. Use it here.
+ impInlineInfo->retExpr =
+ gtNewLclvNode(lvaInlineeReturnSpillTemp, lvaTable[lvaInlineeReturnSpillTemp].lvType);
+ }
+ }
+ else
+ {
+ impInlineInfo->retExpr = op2;
+ }
+ }
+ else
+#endif // defined(_TARGET_ARM64_)
+ {
+ assert(iciCall->AsCall()->HasRetBufArg());
+ GenTreePtr dest = gtCloneExpr(iciCall->gtCall.gtCallArgs->gtOp.gtOp1);
+ // spill temp only exists if there are multiple return points
+ if (lvaInlineeReturnSpillTemp != BAD_VAR_NUM)
+ {
+ // if this is the first return we have seen set the retExpr
+ if (!impInlineInfo->retExpr)
+ {
+ impInlineInfo->retExpr =
+ impAssignStructPtr(dest, gtNewLclvNode(lvaInlineeReturnSpillTemp, info.compRetType),
+ retClsHnd, (unsigned)CHECK_SPILL_ALL);
+ }
+ }
+ else
+ {
+ impInlineInfo->retExpr = impAssignStructPtr(dest, op2, retClsHnd, (unsigned)CHECK_SPILL_ALL);
+ }
+ }
+ }
+ }
+ }
+
+ if (compIsForInlining())
+ {
+ return true;
+ }
+
+ if (info.compRetType == TYP_VOID)
+ {
+ // return void
+ op1 = new (this, GT_RETURN) GenTreeOp(GT_RETURN, TYP_VOID);
+ }
+ else if (info.compRetBuffArg != BAD_VAR_NUM)
+ {
+ // Assign value to return buff (first param)
+ GenTreePtr retBuffAddr = gtNewLclvNode(info.compRetBuffArg, TYP_BYREF, impCurStmtOffs);
+
+ op2 = impAssignStructPtr(retBuffAddr, op2, retClsHnd, (unsigned)CHECK_SPILL_ALL);
+ impAppendTree(op2, (unsigned)CHECK_SPILL_NONE, impCurStmtOffs);
+
+ // There are cases where the address of the implicit RetBuf should be returned explicitly (in RAX).
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if defined(_TARGET_AMD64_)
+
+ // x64 (System V and Win64) calling convention requires to
+ // return the implicit return buffer explicitly (in RAX).
+ // Change the return type to be BYREF.
+ op1 = gtNewOperNode(GT_RETURN, TYP_BYREF, gtNewLclvNode(info.compRetBuffArg, TYP_BYREF));
+#else // !defined(_TARGET_AMD64_)
+ // In case of non-AMD64 targets the profiler hook requires to return the implicit RetBuf explicitly (in RAX).
+ // In such case the return value of the function is changed to BYREF.
+ // If profiler hook is not needed the return type of the function is TYP_VOID.
+ if (compIsProfilerHookNeeded())
+ {
+ op1 = gtNewOperNode(GT_RETURN, TYP_BYREF, gtNewLclvNode(info.compRetBuffArg, TYP_BYREF));
+ }
+ else
+ {
+ // return void
+ op1 = new (this, GT_RETURN) GenTreeOp(GT_RETURN, TYP_VOID);
+ }
+#endif // !defined(_TARGET_AMD64_)
+ }
+ else if (varTypeIsStruct(info.compRetType))
+ {
+#if !FEATURE_MULTIREG_RET
+ // For both ARM architectures the HFA native types are maintained as structs.
+ // Also on System V AMD64 the multireg structs returns are also left as structs.
+ noway_assert(info.compRetNativeType != TYP_STRUCT);
+#endif
+ op2 = impFixupStructReturnType(op2, retClsHnd);
+ // return op2
+ op1 = gtNewOperNode(GT_RETURN, genActualType(info.compRetNativeType), op2);
+ }
+ else
+ {
+ // return op2
+ op1 = gtNewOperNode(GT_RETURN, genActualType(info.compRetType), op2);
+ }
+
+ // We must have imported a tailcall and jumped to RET
+ if (prefixFlags & PREFIX_TAILCALL)
+ {
+#ifndef _TARGET_AMD64_
+ // Jit64 compat:
+ // This cannot be asserted on Amd64 since we permit the following IL pattern:
+ // tail.call
+ // pop
+ // ret
+ assert(verCurrentState.esStackDepth == 0 && impOpcodeIsCallOpcode(opcode));
+#endif
+
+ opcode = CEE_RET; // To prevent trying to spill if CALL_SITE_BOUNDARIES
+
+ // impImportCall() would have already appended TYP_VOID calls
+ if (info.compRetType == TYP_VOID)
+ {
+ return true;
+ }
+ }
+
+ impAppendTree(op1, (unsigned)CHECK_SPILL_NONE, impCurStmtOffs);
+#ifdef DEBUG
+ // Remember at which BC offset the tree was finished
+ impNoteLastILoffs();
+#endif
+ return true;
+}
+
+/*****************************************************************************
+ * Mark the block as unimported.
+ * Note that the caller is responsible for calling impImportBlockPending(),
+ * with the appropriate stack-state
+ */
+
+inline void Compiler::impReimportMarkBlock(BasicBlock* block)
+{
+#ifdef DEBUG
+ if (verbose && (block->bbFlags & BBF_IMPORTED))
+ {
+ printf("\nBB%02u will be reimported\n", block->bbNum);
+ }
+#endif
+
+ block->bbFlags &= ~BBF_IMPORTED;
+}
+
+/*****************************************************************************
+ * Mark the successors of the given block as unimported.
+ * Note that the caller is responsible for calling impImportBlockPending()
+ * for all the successors, with the appropriate stack-state.
+ */
+
+void Compiler::impReimportMarkSuccessors(BasicBlock* block)
+{
+ for (unsigned i = 0; i < block->NumSucc(); i++)
+ {
+ impReimportMarkBlock(block->GetSucc(i));
+ }
+}
+
+/*****************************************************************************
+ *
+ * Filter wrapper to handle only passed in exception code
+ * from it).
+ */
+
+LONG FilterVerificationExceptions(PEXCEPTION_POINTERS pExceptionPointers, LPVOID lpvParam)
+{
+ if (pExceptionPointers->ExceptionRecord->ExceptionCode == SEH_VERIFICATION_EXCEPTION)
+ {
+ return EXCEPTION_EXECUTE_HANDLER;
+ }
+
+ return EXCEPTION_CONTINUE_SEARCH;
+}
+
+void Compiler::impVerifyEHBlock(BasicBlock* block, bool isTryStart)
+{
+ assert(block->hasTryIndex());
+ assert(!compIsForInlining());
+
+ unsigned tryIndex = block->getTryIndex();
+ EHblkDsc* HBtab = ehGetDsc(tryIndex);
+
+ if (isTryStart)
+ {
+ assert(block->bbFlags & BBF_TRY_BEG);
+
+ // The Stack must be empty
+ //
+ if (block->bbStkDepth != 0)
+ {
+ BADCODE("Evaluation stack must be empty on entry into a try block");
+ }
+ }
+
+ // Save the stack contents, we'll need to restore it later
+ //
+ SavedStack blockState;
+ impSaveStackState(&blockState, false);
+
+ while (HBtab != nullptr)
+ {
+ if (isTryStart)
+ {
+ // Are we verifying that an instance constructor properly initializes it's 'this' pointer once?
+ // We do not allow the 'this' pointer to be uninitialized when entering most kinds try regions
+ //
+ if (verTrackObjCtorInitState && (verCurrentState.thisInitialized != TIS_Init))
+ {
+ // We trigger an invalid program exception here unless we have a try/fault region.
+ //
+ if (HBtab->HasCatchHandler() || HBtab->HasFinallyHandler() || HBtab->HasFilter())
+ {
+ BADCODE(
+ "The 'this' pointer of an instance constructor is not intialized upon entry to a try region");
+ }
+ else
+ {
+ // Allow a try/fault region to proceed.
+ assert(HBtab->HasFaultHandler());
+ }
+ }
+
+ /* Recursively process the handler block */
+ BasicBlock* hndBegBB = HBtab->ebdHndBeg;
+
+ // Construct the proper verification stack state
+ // either empty or one that contains just
+ // the Exception Object that we are dealing with
+ //
+ verCurrentState.esStackDepth = 0;
+
+ if (handlerGetsXcptnObj(hndBegBB->bbCatchTyp))
+ {
+ CORINFO_CLASS_HANDLE clsHnd;
+
+ if (HBtab->HasFilter())
+ {
+ clsHnd = impGetObjectClass();
+ }
+ else
+ {
+ CORINFO_RESOLVED_TOKEN resolvedToken;
+
+ resolvedToken.tokenContext = impTokenLookupContextHandle;
+ resolvedToken.tokenScope = info.compScopeHnd;
+ resolvedToken.token = HBtab->ebdTyp;
+ resolvedToken.tokenType = CORINFO_TOKENKIND_Class;
+ info.compCompHnd->resolveToken(&resolvedToken);
+
+ clsHnd = resolvedToken.hClass;
+ }
+
+ // push catch arg the stack, spill to a temp if necessary
+ // Note: can update HBtab->ebdHndBeg!
+ hndBegBB = impPushCatchArgOnStack(hndBegBB, clsHnd);
+ }
+
+ // Queue up the handler for importing
+ //
+ impImportBlockPending(hndBegBB);
+
+ if (HBtab->HasFilter())
+ {
+ /* @VERIFICATION : Ideally the end of filter state should get
+ propagated to the catch handler, this is an incompleteness,
+ but is not a security/compliance issue, since the only
+ interesting state is the 'thisInit' state.
+ */
+
+ verCurrentState.esStackDepth = 0;
+
+ BasicBlock* filterBB = HBtab->ebdFilter;
+
+ // push catch arg the stack, spill to a temp if necessary
+ // Note: can update HBtab->ebdFilter!
+ filterBB = impPushCatchArgOnStack(filterBB, impGetObjectClass());
+
+ impImportBlockPending(filterBB);
+ }
+ }
+ else if (verTrackObjCtorInitState && HBtab->HasFaultHandler())
+ {
+ /* Recursively process the handler block */
+
+ verCurrentState.esStackDepth = 0;
+
+ // Queue up the fault handler for importing
+ //
+ impImportBlockPending(HBtab->ebdHndBeg);
+ }
+
+ // Now process our enclosing try index (if any)
+ //
+ tryIndex = HBtab->ebdEnclosingTryIndex;
+ if (tryIndex == EHblkDsc::NO_ENCLOSING_INDEX)
+ {
+ HBtab = nullptr;
+ }
+ else
+ {
+ HBtab = ehGetDsc(tryIndex);
+ }
+ }
+
+ // Restore the stack contents
+ impRestoreStackState(&blockState);
+}
+
+//***************************************************************
+// Import the instructions for the given basic block. Perform
+// verification, throwing an exception on failure. Push any successor blocks that are enabled for the first
+// time, or whose verification pre-state is changed.
+
+#ifdef _PREFAST_
+#pragma warning(push)
+#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
+#endif
+void Compiler::impImportBlock(BasicBlock* block)
+{
+ // BBF_INTERNAL blocks only exist during importation due to EH canonicalization. We need to
+ // handle them specially. In particular, there is no IL to import for them, but we do need
+ // to mark them as imported and put their successors on the pending import list.
+ if (block->bbFlags & BBF_INTERNAL)
+ {
+ JITDUMP("Marking BBF_INTERNAL block BB%02u as BBF_IMPORTED\n", block->bbNum);
+ block->bbFlags |= BBF_IMPORTED;
+
+ for (unsigned i = 0; i < block->NumSucc(); i++)
+ {
+ impImportBlockPending(block->GetSucc(i));
+ }
+
+ return;
+ }
+
+ bool markImport;
+
+ assert(block);
+
+ /* Make the block globaly available */
+
+ compCurBB = block;
+
+#ifdef DEBUG
+ /* Initialize the debug variables */
+ impCurOpcName = "unknown";
+ impCurOpcOffs = block->bbCodeOffs;
+#endif
+
+ /* Set the current stack state to the merged result */
+ verResetCurrentState(block, &verCurrentState);
+
+ /* Now walk the code and import the IL into GenTrees */
+
+ struct FilterVerificationExceptionsParam
+ {
+ Compiler* pThis;
+ BasicBlock* block;
+ };
+ FilterVerificationExceptionsParam param;
+
+ param.pThis = this;
+ param.block = block;
+
+ PAL_TRY(FilterVerificationExceptionsParam*, pParam, &param)
+ {
+ /* @VERIFICATION : For now, the only state propagation from try
+ to it's handler is "thisInit" state (stack is empty at start of try).
+ In general, for state that we track in verification, we need to
+ model the possibility that an exception might happen at any IL
+ instruction, so we really need to merge all states that obtain
+ between IL instructions in a try block into the start states of
+ all handlers.
+
+ However we do not allow the 'this' pointer to be uninitialized when
+ entering most kinds try regions (only try/fault are allowed to have
+ an uninitialized this pointer on entry to the try)
+
+ Fortunately, the stack is thrown away when an exception
+ leads to a handler, so we don't have to worry about that.
+ We DO, however, have to worry about the "thisInit" state.
+ But only for the try/fault case.
+
+ The only allowed transition is from TIS_Uninit to TIS_Init.
+
+ So for a try/fault region for the fault handler block
+ we will merge the start state of the try begin
+ and the post-state of each block that is part of this try region
+ */
+
+ // merge the start state of the try begin
+ //
+ if (pParam->block->bbFlags & BBF_TRY_BEG)
+ {
+ pParam->pThis->impVerifyEHBlock(pParam->block, true);
+ }
+
+ pParam->pThis->impImportBlockCode(pParam->block);
+
+ // As discussed above:
+ // merge the post-state of each block that is part of this try region
+ //
+ if (pParam->block->hasTryIndex())
+ {
+ pParam->pThis->impVerifyEHBlock(pParam->block, false);
+ }
+ }
+ PAL_EXCEPT_FILTER(FilterVerificationExceptions)
+ {
+ verHandleVerificationFailure(block DEBUGARG(false));
+ }
+ PAL_ENDTRY
+
+ if (compDonotInline())
+ {
+ return;
+ }
+
+ assert(!compDonotInline());
+
+ markImport = false;
+
+SPILLSTACK:
+
+ unsigned baseTmp = NO_BASE_TMP; // input temps assigned to successor blocks
+ bool reimportSpillClique = false;
+ BasicBlock* tgtBlock = nullptr;
+
+ /* If the stack is non-empty, we might have to spill its contents */
+
+ if (verCurrentState.esStackDepth != 0)
+ {
+ impBoxTemp = BAD_VAR_NUM; // if a box temp is used in a block that leaves something
+ // on the stack, its lifetime is hard to determine, simply
+ // don't reuse such temps.
+
+ GenTreePtr addStmt = nullptr;
+
+ /* Do the successors of 'block' have any other predecessors ?
+ We do not want to do some of the optimizations related to multiRef
+ if we can reimport blocks */
+
+ unsigned multRef = impCanReimport ? unsigned(~0) : 0;
+
+ switch (block->bbJumpKind)
+ {
+ case BBJ_COND:
+
+ /* Temporarily remove the 'jtrue' from the end of the tree list */
+
+ assert(impTreeLast);
+ assert(impTreeLast->gtOper == GT_STMT);
+ assert(impTreeLast->gtStmt.gtStmtExpr->gtOper == GT_JTRUE);
+
+ addStmt = impTreeLast;
+ impTreeLast = impTreeLast->gtPrev;
+
+ /* Note if the next block has more than one ancestor */
+
+ multRef |= block->bbNext->bbRefs;
+
+ /* Does the next block have temps assigned? */
+
+ baseTmp = block->bbNext->bbStkTempsIn;
+ tgtBlock = block->bbNext;
+
+ if (baseTmp != NO_BASE_TMP)
+ {
+ break;
+ }
+
+ /* Try the target of the jump then */
+
+ multRef |= block->bbJumpDest->bbRefs;
+ baseTmp = block->bbJumpDest->bbStkTempsIn;
+ tgtBlock = block->bbJumpDest;
+ break;
+
+ case BBJ_ALWAYS:
+ multRef |= block->bbJumpDest->bbRefs;
+ baseTmp = block->bbJumpDest->bbStkTempsIn;
+ tgtBlock = block->bbJumpDest;
+ break;
+
+ case BBJ_NONE:
+ multRef |= block->bbNext->bbRefs;
+ baseTmp = block->bbNext->bbStkTempsIn;
+ tgtBlock = block->bbNext;
+ break;
+
+ case BBJ_SWITCH:
+
+ BasicBlock** jmpTab;
+ unsigned jmpCnt;
+
+ /* Temporarily remove the GT_SWITCH from the end of the tree list */
+
+ assert(impTreeLast);
+ assert(impTreeLast->gtOper == GT_STMT);
+ assert(impTreeLast->gtStmt.gtStmtExpr->gtOper == GT_SWITCH);
+
+ addStmt = impTreeLast;
+ impTreeLast = impTreeLast->gtPrev;
+
+ jmpCnt = block->bbJumpSwt->bbsCount;
+ jmpTab = block->bbJumpSwt->bbsDstTab;
+
+ do
+ {
+ tgtBlock = (*jmpTab);
+
+ multRef |= tgtBlock->bbRefs;
+
+ // Thanks to spill cliques, we should have assigned all or none
+ assert((baseTmp == NO_BASE_TMP) || (baseTmp == tgtBlock->bbStkTempsIn));
+ baseTmp = tgtBlock->bbStkTempsIn;
+ if (multRef > 1)
+ {
+ break;
+ }
+ } while (++jmpTab, --jmpCnt);
+
+ break;
+
+ case BBJ_CALLFINALLY:
+ case BBJ_EHCATCHRET:
+ case BBJ_RETURN:
+ case BBJ_EHFINALLYRET:
+ case BBJ_EHFILTERRET:
+ case BBJ_THROW:
+ NO_WAY("can't have 'unreached' end of BB with non-empty stack");
+ break;
+
+ default:
+ noway_assert(!"Unexpected bbJumpKind");
+ break;
+ }
+
+ assert(multRef >= 1);
+
+ /* Do we have a base temp number? */
+
+ bool newTemps = (baseTmp == NO_BASE_TMP);
+
+ if (newTemps)
+ {
+ /* Grab enough temps for the whole stack */
+ baseTmp = impGetSpillTmpBase(block);
+ }
+
+ /* Spill all stack entries into temps */
+ unsigned level, tempNum;
+
+ JITDUMP("\nSpilling stack entries into temps\n");
+ for (level = 0, tempNum = baseTmp; level < verCurrentState.esStackDepth; level++, tempNum++)
+ {
+ GenTreePtr tree = verCurrentState.esStack[level].val;
+
+ /* VC generates code where it pushes a byref from one branch, and an int (ldc.i4 0) from
+ the other. This should merge to a byref in unverifiable code.
+ However, if the branch which leaves the TYP_I_IMPL on the stack is imported first, the
+ successor would be imported assuming there was a TYP_I_IMPL on
+ the stack. Thus the value would not get GC-tracked. Hence,
+ change the temp to TYP_BYREF and reimport the successors.
+ Note: We should only allow this in unverifiable code.
+ */
+ if (tree->gtType == TYP_BYREF && lvaTable[tempNum].lvType == TYP_I_IMPL && !verNeedsVerification())
+ {
+ lvaTable[tempNum].lvType = TYP_BYREF;
+ impReimportMarkSuccessors(block);
+ markImport = true;
+ }
+
+#ifdef _TARGET_64BIT_
+ if (genActualType(tree->gtType) == TYP_I_IMPL && lvaTable[tempNum].lvType == TYP_INT)
+ {
+ if (tiVerificationNeeded && tgtBlock->bbEntryState != nullptr &&
+ (tgtBlock->bbFlags & BBF_FAILED_VERIFICATION) == 0)
+ {
+ // Merge the current state into the entry state of block;
+ // the call to verMergeEntryStates must have changed
+ // the entry state of the block by merging the int local var
+ // and the native-int stack entry.
+ bool changed = false;
+ if (verMergeEntryStates(tgtBlock, &changed))
+ {
+ impRetypeEntryStateTemps(tgtBlock);
+ impReimportBlockPending(tgtBlock);
+ assert(changed);
+ }
+ else
+ {
+ tgtBlock->bbFlags |= BBF_FAILED_VERIFICATION;
+ break;
+ }
+ }
+
+ // Some other block in the spill clique set this to "int", but now we have "native int".
+ // Change the type and go back to re-import any blocks that used the wrong type.
+ lvaTable[tempNum].lvType = TYP_I_IMPL;
+ reimportSpillClique = true;
+ }
+ else if (genActualType(tree->gtType) == TYP_INT && lvaTable[tempNum].lvType == TYP_I_IMPL)
+ {
+ // Spill clique has decided this should be "native int", but this block only pushes an "int".
+ // Insert a sign-extension to "native int" so we match the clique.
+ verCurrentState.esStack[level].val = gtNewCastNode(TYP_I_IMPL, tree, TYP_I_IMPL);
+ }
+
+ // Consider the case where one branch left a 'byref' on the stack and the other leaves
+ // an 'int'. On 32-bit, this is allowed (in non-verifiable code) since they are the same
+ // size. JIT64 managed to make this work on 64-bit. For compatibility, we support JIT64
+ // behavior instead of asserting and then generating bad code (where we save/restore the
+ // low 32 bits of a byref pointer to an 'int' sized local). If the 'int' side has been
+ // imported already, we need to change the type of the local and reimport the spill clique.
+ // If the 'byref' side has imported, we insert a cast from int to 'native int' to match
+ // the 'byref' size.
+ if (!tiVerificationNeeded)
+ {
+ if (genActualType(tree->gtType) == TYP_BYREF && lvaTable[tempNum].lvType == TYP_INT)
+ {
+ // Some other block in the spill clique set this to "int", but now we have "byref".
+ // Change the type and go back to re-import any blocks that used the wrong type.
+ lvaTable[tempNum].lvType = TYP_BYREF;
+ reimportSpillClique = true;
+ }
+ else if (genActualType(tree->gtType) == TYP_INT && lvaTable[tempNum].lvType == TYP_BYREF)
+ {
+ // Spill clique has decided this should be "byref", but this block only pushes an "int".
+ // Insert a sign-extension to "native int" so we match the clique size.
+ verCurrentState.esStack[level].val = gtNewCastNode(TYP_I_IMPL, tree, TYP_I_IMPL);
+ }
+ }
+#endif // _TARGET_64BIT_
+
+#if FEATURE_X87_DOUBLES
+ // X87 stack doesn't differentiate between float/double
+ // so promoting is no big deal.
+ // For everybody else keep it as float until we have a collision and then promote
+ // Just like for x64's TYP_INT<->TYP_I_IMPL
+
+ if (multRef > 1 && tree->gtType == TYP_FLOAT)
+ {
+ verCurrentState.esStack[level].val = gtNewCastNode(TYP_DOUBLE, tree, TYP_DOUBLE);
+ }
+
+#else // !FEATURE_X87_DOUBLES
+
+ if (tree->gtType == TYP_DOUBLE && lvaTable[tempNum].lvType == TYP_FLOAT)
+ {
+ // Some other block in the spill clique set this to "float", but now we have "double".
+ // Change the type and go back to re-import any blocks that used the wrong type.
+ lvaTable[tempNum].lvType = TYP_DOUBLE;
+ reimportSpillClique = true;
+ }
+ else if (tree->gtType == TYP_FLOAT && lvaTable[tempNum].lvType == TYP_DOUBLE)
+ {
+ // Spill clique has decided this should be "double", but this block only pushes a "float".
+ // Insert a cast to "double" so we match the clique.
+ verCurrentState.esStack[level].val = gtNewCastNode(TYP_DOUBLE, tree, TYP_DOUBLE);
+ }
+
+#endif // FEATURE_X87_DOUBLES
+
+ /* If addStmt has a reference to tempNum (can only happen if we
+ are spilling to the temps already used by a previous block),
+ we need to spill addStmt */
+
+ if (addStmt && !newTemps && gtHasRef(addStmt->gtStmt.gtStmtExpr, tempNum, false))
+ {
+ GenTreePtr addTree = addStmt->gtStmt.gtStmtExpr;
+
+ if (addTree->gtOper == GT_JTRUE)
+ {
+ GenTreePtr relOp = addTree->gtOp.gtOp1;
+ assert(relOp->OperIsCompare());
+
+ var_types type = genActualType(relOp->gtOp.gtOp1->TypeGet());
+
+ if (gtHasRef(relOp->gtOp.gtOp1, tempNum, false))
+ {
+ unsigned temp = lvaGrabTemp(true DEBUGARG("spill addStmt JTRUE ref Op1"));
+ impAssignTempGen(temp, relOp->gtOp.gtOp1, level);
+ type = genActualType(lvaTable[temp].TypeGet());
+ relOp->gtOp.gtOp1 = gtNewLclvNode(temp, type);
+ }
+
+ if (gtHasRef(relOp->gtOp.gtOp2, tempNum, false))
+ {
+ unsigned temp = lvaGrabTemp(true DEBUGARG("spill addStmt JTRUE ref Op2"));
+ impAssignTempGen(temp, relOp->gtOp.gtOp2, level);
+ type = genActualType(lvaTable[temp].TypeGet());
+ relOp->gtOp.gtOp2 = gtNewLclvNode(temp, type);
+ }
+ }
+ else
+ {
+ assert(addTree->gtOper == GT_SWITCH && genActualType(addTree->gtOp.gtOp1->gtType) == TYP_I_IMPL);
+
+ unsigned temp = lvaGrabTemp(true DEBUGARG("spill addStmt SWITCH"));
+ impAssignTempGen(temp, addTree->gtOp.gtOp1, level);
+ addTree->gtOp.gtOp1 = gtNewLclvNode(temp, TYP_I_IMPL);
+ }
+ }
+
+ /* Spill the stack entry, and replace with the temp */
+
+ if (!impSpillStackEntry(level, tempNum
+#ifdef DEBUG
+ ,
+ true, "Spill Stack Entry"
+#endif
+ ))
+ {
+ if (markImport)
+ {
+ BADCODE("bad stack state");
+ }
+
+ // Oops. Something went wrong when spilling. Bad code.
+ verHandleVerificationFailure(block DEBUGARG(true));
+
+ goto SPILLSTACK;
+ }
+ }
+
+ /* Put back the 'jtrue'/'switch' if we removed it earlier */
+
+ if (addStmt)
+ {
+ impAppendStmt(addStmt, (unsigned)CHECK_SPILL_NONE);
+ }
+ }
+
+ // Some of the append/spill logic works on compCurBB
+
+ assert(compCurBB == block);
+
+ /* Save the tree list in the block */
+ impEndTreeList(block);
+
+ // impEndTreeList sets BBF_IMPORTED on the block
+ // We do *NOT* want to set it later than this because
+ // impReimportSpillClique might clear it if this block is both a
+ // predecessor and successor in the current spill clique
+ assert(block->bbFlags & BBF_IMPORTED);
+
+ // If we had a int/native int, or float/double collision, we need to re-import
+ if (reimportSpillClique)
+ {
+ // This will re-import all the successors of block (as well as each of their predecessors)
+ impReimportSpillClique(block);
+
+ // For blocks that haven't been imported yet, we still need to mark them as pending import.
+ for (unsigned i = 0; i < block->NumSucc(); i++)
+ {
+ BasicBlock* succ = block->GetSucc(i);
+ if ((succ->bbFlags & BBF_IMPORTED) == 0)
+ {
+ impImportBlockPending(succ);
+ }
+ }
+ }
+ else // the normal case
+ {
+ // otherwise just import the successors of block
+
+ /* Does this block jump to any other blocks? */
+ for (unsigned i = 0; i < block->NumSucc(); i++)
+ {
+ impImportBlockPending(block->GetSucc(i));
+ }
+ }
+}
+#ifdef _PREFAST_
+#pragma warning(pop)
+#endif
+
+/*****************************************************************************/
+//
+// Ensures that "block" is a member of the list of BBs waiting to be imported, pushing it on the list if
+// necessary (and ensures that it is a member of the set of BB's on the list, by setting its byte in
+// impPendingBlockMembers). Merges the current verification state into the verification state of "block"
+// (its "pre-state").
+
+void Compiler::impImportBlockPending(BasicBlock* block)
+{
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nimpImportBlockPending for BB%02u\n", block->bbNum);
+ }
+#endif
+
+ // We will add a block to the pending set if it has not already been imported (or needs to be re-imported),
+ // or if it has, but merging in a predecessor's post-state changes the block's pre-state.
+ // (When we're doing verification, we always attempt the merge to detect verification errors.)
+
+ // If the block has not been imported, add to pending set.
+ bool addToPending = ((block->bbFlags & BBF_IMPORTED) == 0);
+
+ // Initialize bbEntryState just the first time we try to add this block to the pending list
+ // Just because bbEntryState is NULL, doesn't mean the pre-state wasn't previously set
+ // We use NULL to indicate the 'common' state to avoid memory allocation
+ if ((block->bbEntryState == nullptr) && ((block->bbFlags & (BBF_IMPORTED | BBF_FAILED_VERIFICATION)) == 0) &&
+ (impGetPendingBlockMember(block) == 0))
+ {
+ verInitBBEntryState(block, &verCurrentState);
+ assert(block->bbStkDepth == 0);
+ block->bbStkDepth = static_cast<unsigned short>(verCurrentState.esStackDepth);
+ assert(addToPending);
+ assert(impGetPendingBlockMember(block) == 0);
+ }
+ else
+ {
+ // The stack should have the same height on entry to the block from all its predecessors.
+ if (block->bbStkDepth != verCurrentState.esStackDepth)
+ {
+#ifdef DEBUG
+ char buffer[400];
+ sprintf_s(buffer, sizeof(buffer),
+ "Block at offset %4.4x to %4.4x in %s entered with different stack depths.\n"
+ "Previous depth was %d, current depth is %d",
+ block->bbCodeOffs, block->bbCodeOffsEnd, info.compFullName, block->bbStkDepth,
+ verCurrentState.esStackDepth);
+ buffer[400 - 1] = 0;
+ NO_WAY(buffer);
+#else
+ NO_WAY("Block entered with different stack depths");
+#endif
+ }
+
+ // Additionally, if we need to verify, merge the verification state.
+ if (tiVerificationNeeded)
+ {
+ // Merge the current state into the entry state of block; if this does not change the entry state
+ // by merging, do not add the block to the pending-list.
+ bool changed = false;
+ if (!verMergeEntryStates(block, &changed))
+ {
+ block->bbFlags |= BBF_FAILED_VERIFICATION;
+ addToPending = true; // We will pop it off, and check the flag set above.
+ }
+ else if (changed)
+ {
+ addToPending = true;
+
+ JITDUMP("Adding BB%02u to pending set due to new merge result\n", block->bbNum);
+ }
+ }
+
+ if (!addToPending)
+ {
+ return;
+ }
+
+ if (block->bbStkDepth > 0)
+ {
+ // We need to fix the types of any spill temps that might have changed:
+ // int->native int, float->double, int->byref, etc.
+ impRetypeEntryStateTemps(block);
+ }
+
+ // OK, we must add to the pending list, if it's not already in it.
+ if (impGetPendingBlockMember(block) != 0)
+ {
+ return;
+ }
+ }
+
+ // Get an entry to add to the pending list
+
+ PendingDsc* dsc;
+
+ if (impPendingFree)
+ {
+ // We can reuse one of the freed up dscs.
+ dsc = impPendingFree;
+ impPendingFree = dsc->pdNext;
+ }
+ else
+ {
+ // We have to create a new dsc
+ dsc = new (this, CMK_Unknown) PendingDsc;
+ }
+
+ dsc->pdBB = block;
+ dsc->pdSavedStack.ssDepth = verCurrentState.esStackDepth;
+ dsc->pdThisPtrInit = verCurrentState.thisInitialized;
+
+ // Save the stack trees for later
+
+ if (verCurrentState.esStackDepth)
+ {
+ impSaveStackState(&dsc->pdSavedStack, false);
+ }
+
+ // Add the entry to the pending list
+
+ dsc->pdNext = impPendingList;
+ impPendingList = dsc;
+ impSetPendingBlockMember(block, 1); // And indicate that it's now a member of the set.
+
+ // Various assertions require us to now to consider the block as not imported (at least for
+ // the final time...)
+ block->bbFlags &= ~BBF_IMPORTED;
+
+#ifdef DEBUG
+ if (verbose && 0)
+ {
+ printf("Added PendingDsc - %08p for BB%02u\n", dspPtr(dsc), block->bbNum);
+ }
+#endif
+}
+
+/*****************************************************************************/
+//
+// Ensures that "block" is a member of the list of BBs waiting to be imported, pushing it on the list if
+// necessary (and ensures that it is a member of the set of BB's on the list, by setting its byte in
+// impPendingBlockMembers). Does *NOT* change the existing "pre-state" of the block.
+
+void Compiler::impReimportBlockPending(BasicBlock* block)
+{
+ JITDUMP("\nimpReimportBlockPending for BB%02u", block->bbNum);
+
+ assert(block->bbFlags & BBF_IMPORTED);
+
+ // OK, we must add to the pending list, if it's not already in it.
+ if (impGetPendingBlockMember(block) != 0)
+ {
+ return;
+ }
+
+ // Get an entry to add to the pending list
+
+ PendingDsc* dsc;
+
+ if (impPendingFree)
+ {
+ // We can reuse one of the freed up dscs.
+ dsc = impPendingFree;
+ impPendingFree = dsc->pdNext;
+ }
+ else
+ {
+ // We have to create a new dsc
+ dsc = new (this, CMK_ImpStack) PendingDsc;
+ }
+
+ dsc->pdBB = block;
+
+ if (block->bbEntryState)
+ {
+ dsc->pdThisPtrInit = block->bbEntryState->thisInitialized;
+ dsc->pdSavedStack.ssDepth = block->bbEntryState->esStackDepth;
+ dsc->pdSavedStack.ssTrees = block->bbEntryState->esStack;
+ }
+ else
+ {
+ dsc->pdThisPtrInit = TIS_Bottom;
+ dsc->pdSavedStack.ssDepth = 0;
+ dsc->pdSavedStack.ssTrees = nullptr;
+ }
+
+ // Add the entry to the pending list
+
+ dsc->pdNext = impPendingList;
+ impPendingList = dsc;
+ impSetPendingBlockMember(block, 1); // And indicate that it's now a member of the set.
+
+ // Various assertions require us to now to consider the block as not imported (at least for
+ // the final time...)
+ block->bbFlags &= ~BBF_IMPORTED;
+
+#ifdef DEBUG
+ if (verbose && 0)
+ {
+ printf("Added PendingDsc - %08p for BB%02u\n", dspPtr(dsc), block->bbNum);
+ }
+#endif
+}
+
+void* Compiler::BlockListNode::operator new(size_t sz, Compiler* comp)
+{
+ if (comp->impBlockListNodeFreeList == nullptr)
+ {
+ return (BlockListNode*)comp->compGetMem(sizeof(BlockListNode), CMK_BasicBlock);
+ }
+ else
+ {
+ BlockListNode* res = comp->impBlockListNodeFreeList;
+ comp->impBlockListNodeFreeList = res->m_next;
+ return res;
+ }
+}
+
+void Compiler::FreeBlockListNode(Compiler::BlockListNode* node)
+{
+ node->m_next = impBlockListNodeFreeList;
+ impBlockListNodeFreeList = node;
+}
+
+void Compiler::impWalkSpillCliqueFromPred(BasicBlock* block, SpillCliqueWalker* callback)
+{
+ bool toDo = true;
+
+ noway_assert(!fgComputePredsDone);
+ if (!fgCheapPredsValid)
+ {
+ fgComputeCheapPreds();
+ }
+
+ BlockListNode* succCliqueToDo = nullptr;
+ BlockListNode* predCliqueToDo = new (this) BlockListNode(block);
+ while (toDo)
+ {
+ toDo = false;
+ // Look at the successors of every member of the predecessor to-do list.
+ while (predCliqueToDo != nullptr)
+ {
+ BlockListNode* node = predCliqueToDo;
+ predCliqueToDo = node->m_next;
+ BasicBlock* blk = node->m_blk;
+ FreeBlockListNode(node);
+
+ for (unsigned succNum = 0; succNum < blk->NumSucc(); succNum++)
+ {
+ BasicBlock* succ = blk->GetSucc(succNum);
+ // If it's not already in the clique, add it, and also add it
+ // as a member of the successor "toDo" set.
+ if (impSpillCliqueGetMember(SpillCliqueSucc, succ) == 0)
+ {
+ callback->Visit(SpillCliqueSucc, succ);
+ impSpillCliqueSetMember(SpillCliqueSucc, succ, 1);
+ succCliqueToDo = new (this) BlockListNode(succ, succCliqueToDo);
+ toDo = true;
+ }
+ }
+ }
+ // Look at the predecessors of every member of the successor to-do list.
+ while (succCliqueToDo != nullptr)
+ {
+ BlockListNode* node = succCliqueToDo;
+ succCliqueToDo = node->m_next;
+ BasicBlock* blk = node->m_blk;
+ FreeBlockListNode(node);
+
+ for (BasicBlockList* pred = blk->bbCheapPreds; pred != nullptr; pred = pred->next)
+ {
+ BasicBlock* predBlock = pred->block;
+ // If it's not already in the clique, add it, and also add it
+ // as a member of the predecessor "toDo" set.
+ if (impSpillCliqueGetMember(SpillCliquePred, predBlock) == 0)
+ {
+ callback->Visit(SpillCliquePred, predBlock);
+ impSpillCliqueSetMember(SpillCliquePred, predBlock, 1);
+ predCliqueToDo = new (this) BlockListNode(predBlock, predCliqueToDo);
+ toDo = true;
+ }
+ }
+ }
+ }
+
+ // If this fails, it means we didn't walk the spill clique properly and somehow managed
+ // miss walking back to include the predecessor we started from.
+ // This most likely cause: missing or out of date bbPreds
+ assert(impSpillCliqueGetMember(SpillCliquePred, block) != 0);
+}
+
+void Compiler::SetSpillTempsBase::Visit(SpillCliqueDir predOrSucc, BasicBlock* blk)
+{
+ if (predOrSucc == SpillCliqueSucc)
+ {
+ assert(blk->bbStkTempsIn == NO_BASE_TMP); // Should not already be a member of a clique as a successor.
+ blk->bbStkTempsIn = m_baseTmp;
+ }
+ else
+ {
+ assert(predOrSucc == SpillCliquePred);
+ assert(blk->bbStkTempsOut == NO_BASE_TMP); // Should not already be a member of a clique as a predecessor.
+ blk->bbStkTempsOut = m_baseTmp;
+ }
+}
+
+void Compiler::ReimportSpillClique::Visit(SpillCliqueDir predOrSucc, BasicBlock* blk)
+{
+ // For Preds we could be a little smarter and just find the existing store
+ // and re-type it/add a cast, but that is complicated and hopefully very rare, so
+ // just re-import the whole block (just like we do for successors)
+
+ if (((blk->bbFlags & BBF_IMPORTED) == 0) && (m_pComp->impGetPendingBlockMember(blk) == 0))
+ {
+ // If we haven't imported this block and we're not going to (because it isn't on
+ // the pending list) then just ignore it for now.
+
+ // This block has either never been imported (EntryState == NULL) or it failed
+ // verification. Neither state requires us to force it to be imported now.
+ assert((blk->bbEntryState == nullptr) || (blk->bbFlags & BBF_FAILED_VERIFICATION));
+ return;
+ }
+
+ // For successors we have a valid verCurrentState, so just mark them for reimport
+ // the 'normal' way
+ // Unlike predecessors, we *DO* need to reimport the current block because the
+ // initial import had the wrong entry state types.
+ // Similarly, blocks that are currently on the pending list, still need to call
+ // impImportBlockPending to fixup their entry state.
+ if (predOrSucc == SpillCliqueSucc)
+ {
+ m_pComp->impReimportMarkBlock(blk);
+
+ // Set the current stack state to that of the blk->bbEntryState
+ m_pComp->verResetCurrentState(blk, &m_pComp->verCurrentState);
+ assert(m_pComp->verCurrentState.thisInitialized == blk->bbThisOnEntry());
+
+ m_pComp->impImportBlockPending(blk);
+ }
+ else if ((blk != m_pComp->compCurBB) && ((blk->bbFlags & BBF_IMPORTED) != 0))
+ {
+ // As described above, we are only visiting predecessors so they can
+ // add the appropriate casts, since we have already done that for the current
+ // block, it does not need to be reimported.
+ // Nor do we need to reimport blocks that are still pending, but not yet
+ // imported.
+ //
+ // For predecessors, we have no state to seed the EntryState, so we just have
+ // to assume the existing one is correct.
+ // If the block is also a successor, it will get the EntryState properly
+ // updated when it is visited as a successor in the above "if" block.
+ assert(predOrSucc == SpillCliquePred);
+ m_pComp->impReimportBlockPending(blk);
+ }
+}
+
+// Re-type the incoming lclVar nodes to match the varDsc.
+void Compiler::impRetypeEntryStateTemps(BasicBlock* blk)
+{
+ if (blk->bbEntryState != nullptr)
+ {
+ EntryState* es = blk->bbEntryState;
+ for (unsigned level = 0; level < es->esStackDepth; level++)
+ {
+ GenTreePtr tree = es->esStack[level].val;
+ if ((tree->gtOper == GT_LCL_VAR) || (tree->gtOper == GT_LCL_FLD))
+ {
+ unsigned lclNum = tree->gtLclVarCommon.gtLclNum;
+ noway_assert(lclNum < lvaCount);
+ LclVarDsc* varDsc = lvaTable + lclNum;
+ es->esStack[level].val->gtType = varDsc->TypeGet();
+ }
+ }
+ }
+}
+
+unsigned Compiler::impGetSpillTmpBase(BasicBlock* block)
+{
+ if (block->bbStkTempsOut != NO_BASE_TMP)
+ {
+ return block->bbStkTempsOut;
+ }
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\n*************** In impGetSpillTmpBase(BB%02u)\n", block->bbNum);
+ }
+#endif // DEBUG
+
+ // Otherwise, choose one, and propagate to all members of the spill clique.
+ // Grab enough temps for the whole stack.
+ unsigned baseTmp = lvaGrabTemps(verCurrentState.esStackDepth DEBUGARG("IL Stack Entries"));
+ SetSpillTempsBase callback(baseTmp);
+
+ // We do *NOT* need to reset the SpillClique*Members because a block can only be the predecessor
+ // to one spill clique, and similarly can only be the sucessor to one spill clique
+ impWalkSpillCliqueFromPred(block, &callback);
+
+ return baseTmp;
+}
+
+void Compiler::impReimportSpillClique(BasicBlock* block)
+{
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\n*************** In impReimportSpillClique(BB%02u)\n", block->bbNum);
+ }
+#endif // DEBUG
+
+ // If we get here, it is because this block is already part of a spill clique
+ // and one predecessor had an outgoing live stack slot of type int, and this
+ // block has an outgoing live stack slot of type native int.
+ // We need to reset these before traversal because they have already been set
+ // by the previous walk to determine all the members of the spill clique.
+ impInlineRoot()->impSpillCliquePredMembers.Reset();
+ impInlineRoot()->impSpillCliqueSuccMembers.Reset();
+
+ ReimportSpillClique callback(this);
+
+ impWalkSpillCliqueFromPred(block, &callback);
+}
+
+// Set the pre-state of "block" (which should not have a pre-state allocated) to
+// a copy of "srcState", cloning tree pointers as required.
+void Compiler::verInitBBEntryState(BasicBlock* block, EntryState* srcState)
+{
+ if (srcState->esStackDepth == 0 && srcState->thisInitialized == TIS_Bottom)
+ {
+ block->bbEntryState = nullptr;
+ return;
+ }
+
+ block->bbEntryState = (EntryState*)compGetMemA(sizeof(EntryState));
+
+ // block->bbEntryState.esRefcount = 1;
+
+ block->bbEntryState->esStackDepth = srcState->esStackDepth;
+ block->bbEntryState->thisInitialized = TIS_Bottom;
+
+ if (srcState->esStackDepth > 0)
+ {
+ block->bbSetStack(new (this, CMK_Unknown) StackEntry[srcState->esStackDepth]);
+ unsigned stackSize = srcState->esStackDepth * sizeof(StackEntry);
+
+ memcpy(block->bbEntryState->esStack, srcState->esStack, stackSize);
+ for (unsigned level = 0; level < srcState->esStackDepth; level++)
+ {
+ GenTreePtr tree = srcState->esStack[level].val;
+ block->bbEntryState->esStack[level].val = gtCloneExpr(tree);
+ }
+ }
+
+ if (verTrackObjCtorInitState)
+ {
+ verSetThisInit(block, srcState->thisInitialized);
+ }
+
+ return;
+}
+
+void Compiler::verSetThisInit(BasicBlock* block, ThisInitState tis)
+{
+ assert(tis != TIS_Bottom); // Precondition.
+ if (block->bbEntryState == nullptr)
+ {
+ block->bbEntryState = new (this, CMK_Unknown) EntryState();
+ }
+
+ block->bbEntryState->thisInitialized = tis;
+}
+
+/*
+ * Resets the current state to the state at the start of the basic block
+ */
+void Compiler::verResetCurrentState(BasicBlock* block, EntryState* destState)
+{
+
+ if (block->bbEntryState == nullptr)
+ {
+ destState->esStackDepth = 0;
+ destState->thisInitialized = TIS_Bottom;
+ return;
+ }
+
+ destState->esStackDepth = block->bbEntryState->esStackDepth;
+
+ if (destState->esStackDepth > 0)
+ {
+ unsigned stackSize = destState->esStackDepth * sizeof(StackEntry);
+
+ memcpy(destState->esStack, block->bbStackOnEntry(), stackSize);
+ }
+
+ destState->thisInitialized = block->bbThisOnEntry();
+
+ return;
+}
+
+ThisInitState BasicBlock::bbThisOnEntry()
+{
+ return bbEntryState ? bbEntryState->thisInitialized : TIS_Bottom;
+}
+
+unsigned BasicBlock::bbStackDepthOnEntry()
+{
+ return (bbEntryState ? bbEntryState->esStackDepth : 0);
+}
+
+void BasicBlock::bbSetStack(void* stackBuffer)
+{
+ assert(bbEntryState);
+ assert(stackBuffer);
+ bbEntryState->esStack = (StackEntry*)stackBuffer;
+}
+
+StackEntry* BasicBlock::bbStackOnEntry()
+{
+ assert(bbEntryState);
+ return bbEntryState->esStack;
+}
+
+void Compiler::verInitCurrentState()
+{
+ verTrackObjCtorInitState = FALSE;
+ verCurrentState.thisInitialized = TIS_Bottom;
+
+ if (tiVerificationNeeded)
+ {
+ // Track this ptr initialization
+ if (!info.compIsStatic && (info.compFlags & CORINFO_FLG_CONSTRUCTOR) && lvaTable[0].lvVerTypeInfo.IsObjRef())
+ {
+ verTrackObjCtorInitState = TRUE;
+ verCurrentState.thisInitialized = TIS_Uninit;
+ }
+ }
+
+ // initialize stack info
+
+ verCurrentState.esStackDepth = 0;
+ assert(verCurrentState.esStack != nullptr);
+
+ // copy current state to entry state of first BB
+ verInitBBEntryState(fgFirstBB, &verCurrentState);
+}
+
+Compiler* Compiler::impInlineRoot()
+{
+ if (impInlineInfo == nullptr)
+ {
+ return this;
+ }
+ else
+ {
+ return impInlineInfo->InlineRoot;
+ }
+}
+
+BYTE Compiler::impSpillCliqueGetMember(SpillCliqueDir predOrSucc, BasicBlock* blk)
+{
+ if (predOrSucc == SpillCliquePred)
+ {
+ return impInlineRoot()->impSpillCliquePredMembers.Get(blk->bbInd());
+ }
+ else
+ {
+ assert(predOrSucc == SpillCliqueSucc);
+ return impInlineRoot()->impSpillCliqueSuccMembers.Get(blk->bbInd());
+ }
+}
+
+void Compiler::impSpillCliqueSetMember(SpillCliqueDir predOrSucc, BasicBlock* blk, BYTE val)
+{
+ if (predOrSucc == SpillCliquePred)
+ {
+ impInlineRoot()->impSpillCliquePredMembers.Set(blk->bbInd(), val);
+ }
+ else
+ {
+ assert(predOrSucc == SpillCliqueSucc);
+ impInlineRoot()->impSpillCliqueSuccMembers.Set(blk->bbInd(), val);
+ }
+}
+
+/*****************************************************************************
+ *
+ * Convert the instrs ("import") into our internal format (trees). The
+ * basic flowgraph has already been constructed and is passed in.
+ */
+
+void Compiler::impImport(BasicBlock* method)
+{
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("*************** In impImport() for %s\n", info.compFullName);
+ }
+#endif
+
+ /* Allocate the stack contents */
+
+ if (info.compMaxStack <= sizeof(impSmallStack) / sizeof(impSmallStack[0]))
+ {
+ /* Use local variable, don't waste time allocating on the heap */
+
+ impStkSize = sizeof(impSmallStack) / sizeof(impSmallStack[0]);
+ verCurrentState.esStack = impSmallStack;
+ }
+ else
+ {
+ impStkSize = info.compMaxStack;
+ verCurrentState.esStack = new (this, CMK_ImpStack) StackEntry[impStkSize];
+ }
+
+ // initialize the entry state at start of method
+ verInitCurrentState();
+
+ // Initialize stuff related to figuring "spill cliques" (see spec comment for impGetSpillTmpBase).
+ Compiler* inlineRoot = impInlineRoot();
+ if (this == inlineRoot) // These are only used on the root of the inlining tree.
+ {
+ // We have initialized these previously, but to size 0. Make them larger.
+ impPendingBlockMembers.Init(getAllocator(), fgBBNumMax * 2);
+ impSpillCliquePredMembers.Init(getAllocator(), fgBBNumMax * 2);
+ impSpillCliqueSuccMembers.Init(getAllocator(), fgBBNumMax * 2);
+ }
+ inlineRoot->impPendingBlockMembers.Reset(fgBBNumMax * 2);
+ inlineRoot->impSpillCliquePredMembers.Reset(fgBBNumMax * 2);
+ inlineRoot->impSpillCliqueSuccMembers.Reset(fgBBNumMax * 2);
+ impBlockListNodeFreeList = nullptr;
+
+#ifdef DEBUG
+ impLastILoffsStmt = nullptr;
+ impNestedStackSpill = false;
+#endif
+ impBoxTemp = BAD_VAR_NUM;
+
+ impPendingList = impPendingFree = nullptr;
+
+ /* Add the entry-point to the worker-list */
+
+ // Skip leading internal blocks. There can be one as a leading scratch BB, and more
+ // from EH normalization.
+ // NOTE: It might be possible to always just put fgFirstBB on the pending list, and let everything else just fall
+ // out.
+ for (; method->bbFlags & BBF_INTERNAL; method = method->bbNext)
+ {
+ // Treat these as imported.
+ assert(method->bbJumpKind == BBJ_NONE); // We assume all the leading ones are fallthrough.
+ JITDUMP("Marking leading BBF_INTERNAL block BB%02u as BBF_IMPORTED\n", method->bbNum);
+ method->bbFlags |= BBF_IMPORTED;
+ }
+
+ impImportBlockPending(method);
+
+ /* Import blocks in the worker-list until there are no more */
+
+ while (impPendingList)
+ {
+ /* Remove the entry at the front of the list */
+
+ PendingDsc* dsc = impPendingList;
+ impPendingList = impPendingList->pdNext;
+ impSetPendingBlockMember(dsc->pdBB, 0);
+
+ /* Restore the stack state */
+
+ verCurrentState.thisInitialized = dsc->pdThisPtrInit;
+ verCurrentState.esStackDepth = dsc->pdSavedStack.ssDepth;
+ if (verCurrentState.esStackDepth)
+ {
+ impRestoreStackState(&dsc->pdSavedStack);
+ }
+
+ /* Add the entry to the free list for reuse */
+
+ dsc->pdNext = impPendingFree;
+ impPendingFree = dsc;
+
+ /* Now import the block */
+
+ if (dsc->pdBB->bbFlags & BBF_FAILED_VERIFICATION)
+ {
+
+#ifdef _TARGET_64BIT_
+ // On AMD64, during verification we have to match JIT64 behavior since the VM is very tighly
+ // coupled with the JIT64 IL Verification logic. Look inside verHandleVerificationFailure
+ // method for further explanation on why we raise this exception instead of making the jitted
+ // code throw the verification exception during execution.
+ if (tiVerificationNeeded && (opts.eeFlags & CORJIT_FLG_IMPORT_ONLY) != 0)
+ {
+ BADCODE("Basic block marked as not verifiable");
+ }
+ else
+#endif // _TARGET_64BIT_
+ {
+ verConvertBBToThrowVerificationException(dsc->pdBB DEBUGARG(true));
+ impEndTreeList(dsc->pdBB);
+ }
+ }
+ else
+ {
+ impImportBlock(dsc->pdBB);
+
+ if (compDonotInline())
+ {
+ return;
+ }
+ if (compIsForImportOnly() && !tiVerificationNeeded)
+ {
+ return;
+ }
+ }
+ }
+
+#ifdef DEBUG
+ if (verbose && info.compXcptnsCount)
+ {
+ printf("\nAfter impImport() added block for try,catch,finally");
+ fgDispBasicBlocks();
+ printf("\n");
+ }
+
+ // Used in impImportBlockPending() for STRESS_CHK_REIMPORT
+ for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
+ {
+ block->bbFlags &= ~BBF_VISITED;
+ }
+#endif
+
+ assert(!compIsForInlining() || !tiVerificationNeeded);
+}
+
+// Checks if a typeinfo (usually stored in the type stack) is a struct.
+// The invariant here is that if it's not a ref or a method and has a class handle
+// it's a valuetype
+bool Compiler::impIsValueType(typeInfo* pTypeInfo)
+{
+ if (pTypeInfo && pTypeInfo->IsValueClassWithClsHnd())
+ {
+ return true;
+ }
+ else
+ {
+ return false;
+ }
+}
+
+/*****************************************************************************
+ * Check to see if the tree is the address of a local or
+ the address of a field in a local.
+
+ *lclVarTreeOut will contain the GT_LCL_VAR tree when it returns TRUE.
+
+ */
+
+BOOL Compiler::impIsAddressInLocal(GenTreePtr tree, GenTreePtr* lclVarTreeOut)
+{
+ if (tree->gtOper != GT_ADDR)
+ {
+ return FALSE;
+ }
+
+ GenTreePtr op = tree->gtOp.gtOp1;
+ while (op->gtOper == GT_FIELD)
+ {
+ op = op->gtField.gtFldObj;
+ if (op && op->gtOper == GT_ADDR) // Skip static fields where op will be NULL.
+ {
+ op = op->gtOp.gtOp1;
+ }
+ else
+ {
+ return false;
+ }
+ }
+
+ if (op->gtOper == GT_LCL_VAR)
+ {
+ *lclVarTreeOut = op;
+ return TRUE;
+ }
+ else
+ {
+ return FALSE;
+ }
+}
+
+//------------------------------------------------------------------------
+// impMakeDiscretionaryInlineObservations: make observations that help
+// determine the profitability of a discretionary inline
+//
+// Arguments:
+// pInlineInfo -- InlineInfo for the inline, or null for the prejit root
+// inlineResult -- InlineResult accumulating information about this inline
+//
+// Notes:
+// If inlining or prejitting the root, this method also makes
+// various observations about the method that factor into inline
+// decisions. It sets `compNativeSizeEstimate` as a side effect.
+
+void Compiler::impMakeDiscretionaryInlineObservations(InlineInfo* pInlineInfo, InlineResult* inlineResult)
+{
+ assert(pInlineInfo != nullptr && compIsForInlining() || // Perform the actual inlining.
+ pInlineInfo == nullptr && !compIsForInlining() // Calculate the static inlining hint for ngen.
+ );
+
+ // If we're really inlining, we should just have one result in play.
+ assert((pInlineInfo == nullptr) || (inlineResult == pInlineInfo->inlineResult));
+
+ // If this is a "forceinline" method, the JIT probably shouldn't have gone
+ // to the trouble of estimating the native code size. Even if it did, it
+ // shouldn't be relying on the result of this method.
+ assert(inlineResult->GetObservation() == InlineObservation::CALLEE_IS_DISCRETIONARY_INLINE);
+
+ // Note if the caller contains NEWOBJ or NEWARR.
+ Compiler* rootCompiler = impInlineRoot();
+
+ if ((rootCompiler->optMethodFlags & OMF_HAS_NEWARRAY) != 0)
+ {
+ inlineResult->Note(InlineObservation::CALLER_HAS_NEWARRAY);
+ }
+
+ if ((rootCompiler->optMethodFlags & OMF_HAS_NEWOBJ) != 0)
+ {
+ inlineResult->Note(InlineObservation::CALLER_HAS_NEWOBJ);
+ }
+
+ bool calleeIsStatic = (info.compFlags & CORINFO_FLG_STATIC) != 0;
+ bool isSpecialMethod = (info.compFlags & CORINFO_FLG_CONSTRUCTOR) != 0;
+
+ if (isSpecialMethod)
+ {
+ if (calleeIsStatic)
+ {
+ inlineResult->Note(InlineObservation::CALLEE_IS_CLASS_CTOR);
+ }
+ else
+ {
+ inlineResult->Note(InlineObservation::CALLEE_IS_INSTANCE_CTOR);
+ }
+ }
+ else if (!calleeIsStatic)
+ {
+ // Callee is an instance method.
+ //
+ // Check if the callee has the same 'this' as the root.
+ if (pInlineInfo != nullptr)
+ {
+ GenTreePtr thisArg = pInlineInfo->iciCall->gtCall.gtCallObjp;
+ assert(thisArg);
+ bool isSameThis = impIsThis(thisArg);
+ inlineResult->NoteBool(InlineObservation::CALLSITE_IS_SAME_THIS, isSameThis);
+ }
+ }
+
+ // Note if the callee's class is a promotable struct
+ if ((info.compClassAttr & CORINFO_FLG_VALUECLASS) != 0)
+ {
+ lvaStructPromotionInfo structPromotionInfo;
+ lvaCanPromoteStructType(info.compClassHnd, &structPromotionInfo, false);
+ if (structPromotionInfo.canPromote)
+ {
+ inlineResult->Note(InlineObservation::CALLEE_CLASS_PROMOTABLE);
+ }
+ }
+
+#ifdef FEATURE_SIMD
+
+ // Note if this method is has SIMD args or return value
+ if (pInlineInfo != nullptr && pInlineInfo->hasSIMDTypeArgLocalOrReturn)
+ {
+ inlineResult->Note(InlineObservation::CALLEE_HAS_SIMD);
+ }
+
+#endif // FEATURE_SIMD
+
+ // Roughly classify callsite frequency.
+ InlineCallsiteFrequency frequency = InlineCallsiteFrequency::UNUSED;
+
+ // If this is a prejit root, or a maximally hot block...
+ if ((pInlineInfo == nullptr) || (pInlineInfo->iciBlock->bbWeight >= BB_MAX_WEIGHT))
+ {
+ frequency = InlineCallsiteFrequency::HOT;
+ }
+ // No training data. Look for loop-like things.
+ // We consider a recursive call loop-like. Do not give the inlining boost to the method itself.
+ // However, give it to things nearby.
+ else if ((pInlineInfo->iciBlock->bbFlags & BBF_BACKWARD_JUMP) &&
+ (pInlineInfo->fncHandle != pInlineInfo->inlineCandidateInfo->ilCallerHandle))
+ {
+ frequency = InlineCallsiteFrequency::LOOP;
+ }
+ else if ((pInlineInfo->iciBlock->bbFlags & BBF_PROF_WEIGHT) && (pInlineInfo->iciBlock->bbWeight > BB_ZERO_WEIGHT))
+ {
+ frequency = InlineCallsiteFrequency::WARM;
+ }
+ // Now modify the multiplier based on where we're called from.
+ else if (pInlineInfo->iciBlock->isRunRarely() || ((info.compFlags & FLG_CCTOR) == FLG_CCTOR))
+ {
+ frequency = InlineCallsiteFrequency::RARE;
+ }
+ else
+ {
+ frequency = InlineCallsiteFrequency::BORING;
+ }
+
+ // Also capture the block weight of the call site. In the prejit
+ // root case, assume there's some hot call site for this method.
+ unsigned weight = 0;
+
+ if (pInlineInfo != nullptr)
+ {
+ weight = pInlineInfo->iciBlock->bbWeight;
+ }
+ else
+ {
+ weight = BB_MAX_WEIGHT;
+ }
+
+ inlineResult->NoteInt(InlineObservation::CALLSITE_FREQUENCY, static_cast<int>(frequency));
+ inlineResult->NoteInt(InlineObservation::CALLSITE_WEIGHT, static_cast<int>(weight));
+}
+
+/*****************************************************************************
+ This method makes STATIC inlining decision based on the IL code.
+ It should not make any inlining decision based on the context.
+ If forceInline is true, then the inlining decision should not depend on
+ performance heuristics (code size, etc.).
+ */
+
+void Compiler::impCanInlineIL(CORINFO_METHOD_HANDLE fncHandle,
+ CORINFO_METHOD_INFO* methInfo,
+ bool forceInline,
+ InlineResult* inlineResult)
+{
+ unsigned codeSize = methInfo->ILCodeSize;
+
+ // We shouldn't have made up our minds yet...
+ assert(!inlineResult->IsDecided());
+
+ if (methInfo->EHcount)
+ {
+ inlineResult->NoteFatal(InlineObservation::CALLEE_HAS_EH);
+ return;
+ }
+
+ if ((methInfo->ILCode == nullptr) || (codeSize == 0))
+ {
+ inlineResult->NoteFatal(InlineObservation::CALLEE_HAS_NO_BODY);
+ return;
+ }
+
+ // For now we don't inline varargs (import code can't handle it)
+
+ if (methInfo->args.isVarArg())
+ {
+ inlineResult->NoteFatal(InlineObservation::CALLEE_HAS_MANAGED_VARARGS);
+ return;
+ }
+
+ // Reject if it has too many locals.
+ // This is currently an implementation limit due to fixed-size arrays in the
+ // inline info, rather than a performance heuristic.
+
+ inlineResult->NoteInt(InlineObservation::CALLEE_NUMBER_OF_LOCALS, methInfo->locals.numArgs);
+
+ if (methInfo->locals.numArgs > MAX_INL_LCLS)
+ {
+ inlineResult->NoteFatal(InlineObservation::CALLEE_TOO_MANY_LOCALS);
+ return;
+ }
+
+ // Make sure there aren't too many arguments.
+ // This is currently an implementation limit due to fixed-size arrays in the
+ // inline info, rather than a performance heuristic.
+
+ inlineResult->NoteInt(InlineObservation::CALLEE_NUMBER_OF_ARGUMENTS, methInfo->args.numArgs);
+
+ if (methInfo->args.numArgs > MAX_INL_ARGS)
+ {
+ inlineResult->NoteFatal(InlineObservation::CALLEE_TOO_MANY_ARGUMENTS);
+ return;
+ }
+
+ // Note force inline state
+
+ inlineResult->NoteBool(InlineObservation::CALLEE_IS_FORCE_INLINE, forceInline);
+
+ // Note IL code size
+
+ inlineResult->NoteInt(InlineObservation::CALLEE_IL_CODE_SIZE, codeSize);
+
+ if (inlineResult->IsFailure())
+ {
+ return;
+ }
+
+ // Make sure maxstack is not too big
+
+ inlineResult->NoteInt(InlineObservation::CALLEE_MAXSTACK, methInfo->maxStack);
+
+ if (inlineResult->IsFailure())
+ {
+ return;
+ }
+}
+
+/*****************************************************************************
+ */
+
+void Compiler::impCheckCanInline(GenTreePtr call,
+ CORINFO_METHOD_HANDLE fncHandle,
+ unsigned methAttr,
+ CORINFO_CONTEXT_HANDLE exactContextHnd,
+ InlineCandidateInfo** ppInlineCandidateInfo,
+ InlineResult* inlineResult)
+{
+ // Either EE or JIT might throw exceptions below.
+ // If that happens, just don't inline the method.
+
+ struct Param
+ {
+ Compiler* pThis;
+ GenTreePtr call;
+ CORINFO_METHOD_HANDLE fncHandle;
+ unsigned methAttr;
+ CORINFO_CONTEXT_HANDLE exactContextHnd;
+ InlineResult* result;
+ InlineCandidateInfo** ppInlineCandidateInfo;
+ } param = {nullptr};
+
+ param.pThis = this;
+ param.call = call;
+ param.fncHandle = fncHandle;
+ param.methAttr = methAttr;
+ param.exactContextHnd = (exactContextHnd != nullptr) ? exactContextHnd : MAKE_METHODCONTEXT(fncHandle);
+ param.result = inlineResult;
+ param.ppInlineCandidateInfo = ppInlineCandidateInfo;
+
+ bool success = eeRunWithErrorTrap<Param>(
+ [](Param* pParam) {
+ DWORD dwRestrictions = 0;
+ CorInfoInitClassResult initClassResult;
+
+#ifdef DEBUG
+ const char* methodName;
+ const char* className;
+ methodName = pParam->pThis->eeGetMethodName(pParam->fncHandle, &className);
+
+ if (JitConfig.JitNoInline())
+ {
+ pParam->result->NoteFatal(InlineObservation::CALLEE_IS_JIT_NOINLINE);
+ goto _exit;
+ }
+#endif
+
+ /* Try to get the code address/size for the method */
+
+ CORINFO_METHOD_INFO methInfo;
+ if (!pParam->pThis->info.compCompHnd->getMethodInfo(pParam->fncHandle, &methInfo))
+ {
+ pParam->result->NoteFatal(InlineObservation::CALLEE_NO_METHOD_INFO);
+ goto _exit;
+ }
+
+ bool forceInline;
+ forceInline = !!(pParam->methAttr & CORINFO_FLG_FORCEINLINE);
+
+ pParam->pThis->impCanInlineIL(pParam->fncHandle, &methInfo, forceInline, pParam->result);
+
+ if (pParam->result->IsFailure())
+ {
+ assert(pParam->result->IsNever());
+ goto _exit;
+ }
+
+ // Speculatively check if initClass() can be done.
+ // If it can be done, we will try to inline the method. If inlining
+ // succeeds, then we will do the non-speculative initClass() and commit it.
+ // If this speculative call to initClass() fails, there is no point
+ // trying to inline this method.
+ initClassResult =
+ pParam->pThis->info.compCompHnd->initClass(nullptr /* field */, pParam->fncHandle /* method */,
+ pParam->exactContextHnd /* context */,
+ TRUE /* speculative */);
+
+ if (initClassResult & CORINFO_INITCLASS_DONT_INLINE)
+ {
+ pParam->result->NoteFatal(InlineObservation::CALLSITE_CLASS_INIT_FAILURE_SPEC);
+ goto _exit;
+ }
+
+ // Given the EE the final say in whether to inline or not.
+ // This should be last since for verifiable code, this can be expensive
+
+ /* VM Inline check also ensures that the method is verifiable if needed */
+ CorInfoInline vmResult;
+ vmResult = pParam->pThis->info.compCompHnd->canInline(pParam->pThis->info.compMethodHnd, pParam->fncHandle,
+ &dwRestrictions);
+
+ if (vmResult == INLINE_FAIL)
+ {
+ pParam->result->NoteFatal(InlineObservation::CALLSITE_IS_VM_NOINLINE);
+ }
+ else if (vmResult == INLINE_NEVER)
+ {
+ pParam->result->NoteFatal(InlineObservation::CALLEE_IS_VM_NOINLINE);
+ }
+
+ if (pParam->result->IsFailure())
+ {
+ // Make sure not to report this one. It was already reported by the VM.
+ pParam->result->SetReported();
+ goto _exit;
+ }
+
+ // check for unsupported inlining restrictions
+ assert((dwRestrictions & ~(INLINE_RESPECT_BOUNDARY | INLINE_NO_CALLEE_LDSTR | INLINE_SAME_THIS)) == 0);
+
+ if (dwRestrictions & INLINE_SAME_THIS)
+ {
+ GenTreePtr thisArg = pParam->call->gtCall.gtCallObjp;
+ assert(thisArg);
+
+ if (!pParam->pThis->impIsThis(thisArg))
+ {
+ pParam->result->NoteFatal(InlineObservation::CALLSITE_REQUIRES_SAME_THIS);
+ goto _exit;
+ }
+ }
+
+ /* Get the method properties */
+
+ CORINFO_CLASS_HANDLE clsHandle;
+ clsHandle = pParam->pThis->info.compCompHnd->getMethodClass(pParam->fncHandle);
+ unsigned clsAttr;
+ clsAttr = pParam->pThis->info.compCompHnd->getClassAttribs(clsHandle);
+
+ /* Get the return type */
+
+ var_types fncRetType;
+ fncRetType = pParam->call->TypeGet();
+
+#ifdef DEBUG
+ var_types fncRealRetType;
+ fncRealRetType = JITtype2varType(methInfo.args.retType);
+
+ assert((genActualType(fncRealRetType) == genActualType(fncRetType)) ||
+ // <BUGNUM> VSW 288602 </BUGNUM>
+ // In case of IJW, we allow to assign a native pointer to a BYREF.
+ (fncRetType == TYP_BYREF && methInfo.args.retType == CORINFO_TYPE_PTR) ||
+ (varTypeIsStruct(fncRetType) && (fncRealRetType == TYP_STRUCT)));
+#endif
+
+ //
+ // Allocate an InlineCandidateInfo structure
+ //
+ InlineCandidateInfo* pInfo;
+ pInfo = new (pParam->pThis, CMK_Inlining) InlineCandidateInfo;
+
+ pInfo->dwRestrictions = dwRestrictions;
+ pInfo->methInfo = methInfo;
+ pInfo->methAttr = pParam->methAttr;
+ pInfo->clsHandle = clsHandle;
+ pInfo->clsAttr = clsAttr;
+ pInfo->fncRetType = fncRetType;
+ pInfo->exactContextHnd = pParam->exactContextHnd;
+ pInfo->ilCallerHandle = pParam->pThis->info.compMethodHnd;
+ pInfo->initClassResult = initClassResult;
+
+ *(pParam->ppInlineCandidateInfo) = pInfo;
+
+ _exit:;
+ },
+ &param);
+ if (!success)
+ {
+ param.result->NoteFatal(InlineObservation::CALLSITE_COMPILATION_ERROR);
+ }
+}
+
+void Compiler::impInlineRecordArgInfo(InlineInfo* pInlineInfo,
+ GenTreePtr curArgVal,
+ unsigned argNum,
+ InlineResult* inlineResult)
+{
+ InlArgInfo* inlCurArgInfo = &pInlineInfo->inlArgInfo[argNum];
+
+ if (curArgVal->gtOper == GT_MKREFANY)
+ {
+ inlineResult->NoteFatal(InlineObservation::CALLSITE_ARG_IS_MKREFANY);
+ return;
+ }
+
+ inlCurArgInfo->argNode = curArgVal;
+
+ GenTreePtr lclVarTree;
+ if (impIsAddressInLocal(curArgVal, &lclVarTree) && varTypeIsStruct(lclVarTree))
+ {
+ inlCurArgInfo->argIsByRefToStructLocal = true;
+#ifdef FEATURE_SIMD
+ if (lvaTable[lclVarTree->AsLclVarCommon()->gtLclNum].lvSIMDType)
+ {
+ pInlineInfo->hasSIMDTypeArgLocalOrReturn = true;
+ }
+#endif // FEATURE_SIMD
+ }
+
+ if (curArgVal->gtFlags & GTF_ORDER_SIDEEFF)
+ {
+ // Right now impInlineSpillLclRefs and impInlineSpillGlobEffects don't take
+ // into account special side effects, so we disallow them during inlining.
+ inlineResult->NoteFatal(InlineObservation::CALLSITE_ARG_HAS_SIDE_EFFECT);
+ return;
+ }
+
+ if (curArgVal->gtFlags & GTF_GLOB_EFFECT)
+ {
+ inlCurArgInfo->argHasGlobRef = (curArgVal->gtFlags & GTF_GLOB_REF) != 0;
+ inlCurArgInfo->argHasSideEff = (curArgVal->gtFlags & GTF_SIDE_EFFECT) != 0;
+ }
+
+ if (curArgVal->gtOper == GT_LCL_VAR)
+ {
+ inlCurArgInfo->argIsLclVar = true;
+
+ /* Remember the "original" argument number */
+ curArgVal->gtLclVar.gtLclILoffs = argNum;
+ }
+
+ if ((curArgVal->OperKind() & GTK_CONST) ||
+ ((curArgVal->gtOper == GT_ADDR) && (curArgVal->gtOp.gtOp1->gtOper == GT_LCL_VAR)))
+ {
+ inlCurArgInfo->argIsInvariant = true;
+ if (inlCurArgInfo->argIsThis && (curArgVal->gtOper == GT_CNS_INT) && (curArgVal->gtIntCon.gtIconVal == 0))
+ {
+ /* Abort, but do not mark as not inlinable */
+ inlineResult->NoteFatal(InlineObservation::CALLSITE_ARG_HAS_NULL_THIS);
+ return;
+ }
+ }
+
+ if (!inlCurArgInfo->argIsInvariant && gtHasLocalsWithAddrOp(curArgVal))
+ {
+ inlCurArgInfo->argHasLdargaOp = true;
+ }
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ if (inlCurArgInfo->argIsThis)
+ {
+ printf("thisArg:");
+ }
+ else
+ {
+ printf("\nArgument #%u:", argNum);
+ }
+ if (inlCurArgInfo->argIsLclVar)
+ {
+ printf(" is a local var");
+ }
+ if (inlCurArgInfo->argIsInvariant)
+ {
+ printf(" is a constant");
+ }
+ if (inlCurArgInfo->argHasGlobRef)
+ {
+ printf(" has global refs");
+ }
+ if (inlCurArgInfo->argHasSideEff)
+ {
+ printf(" has side effects");
+ }
+ if (inlCurArgInfo->argHasLdargaOp)
+ {
+ printf(" has ldarga effect");
+ }
+ if (inlCurArgInfo->argHasStargOp)
+ {
+ printf(" has starg effect");
+ }
+ if (inlCurArgInfo->argIsByRefToStructLocal)
+ {
+ printf(" is byref to a struct local");
+ }
+
+ printf("\n");
+ gtDispTree(curArgVal);
+ printf("\n");
+ }
+#endif
+}
+
+/*****************************************************************************
+ *
+ */
+
+void Compiler::impInlineInitVars(InlineInfo* pInlineInfo)
+{
+ assert(!compIsForInlining());
+
+ GenTreePtr call = pInlineInfo->iciCall;
+ CORINFO_METHOD_INFO* methInfo = &pInlineInfo->inlineCandidateInfo->methInfo;
+ unsigned clsAttr = pInlineInfo->inlineCandidateInfo->clsAttr;
+ InlArgInfo* inlArgInfo = pInlineInfo->inlArgInfo;
+ InlLclVarInfo* lclVarInfo = pInlineInfo->lclVarInfo;
+ InlineResult* inlineResult = pInlineInfo->inlineResult;
+
+ const bool hasRetBuffArg = impMethodInfo_hasRetBuffArg(methInfo);
+
+ /* init the argument stuct */
+
+ memset(inlArgInfo, 0, (MAX_INL_ARGS + 1) * sizeof(inlArgInfo[0]));
+
+ /* Get hold of the 'this' pointer and the argument list proper */
+
+ GenTreePtr thisArg = call->gtCall.gtCallObjp;
+ GenTreePtr argList = call->gtCall.gtCallArgs;
+ unsigned argCnt = 0; // Count of the arguments
+
+ assert((methInfo->args.hasThis()) == (thisArg != nullptr));
+
+ if (thisArg)
+ {
+ inlArgInfo[0].argIsThis = true;
+
+ impInlineRecordArgInfo(pInlineInfo, thisArg, argCnt, inlineResult);
+
+ if (inlineResult->IsFailure())
+ {
+ return;
+ }
+
+ /* Increment the argument count */
+ argCnt++;
+ }
+
+ /* Record some information about each of the arguments */
+ bool hasTypeCtxtArg = (methInfo->args.callConv & CORINFO_CALLCONV_PARAMTYPE) != 0;
+
+#if USER_ARGS_COME_LAST
+ unsigned typeCtxtArg = thisArg ? 1 : 0;
+#else // USER_ARGS_COME_LAST
+ unsigned typeCtxtArg = methInfo->args.totalILArgs();
+#endif // USER_ARGS_COME_LAST
+
+ for (GenTreePtr argTmp = argList; argTmp; argTmp = argTmp->gtOp.gtOp2)
+ {
+ if (argTmp == argList && hasRetBuffArg)
+ {
+ continue;
+ }
+
+ // Ignore the type context argument
+ if (hasTypeCtxtArg && (argCnt == typeCtxtArg))
+ {
+ typeCtxtArg = 0xFFFFFFFF;
+ continue;
+ }
+
+ assert(argTmp->gtOper == GT_LIST);
+ GenTreePtr argVal = argTmp->gtOp.gtOp1;
+
+ impInlineRecordArgInfo(pInlineInfo, argVal, argCnt, inlineResult);
+
+ if (inlineResult->IsFailure())
+ {
+ return;
+ }
+
+ /* Increment the argument count */
+ argCnt++;
+ }
+
+ /* Make sure we got the arg number right */
+ assert(argCnt == methInfo->args.totalILArgs());
+
+#ifdef FEATURE_SIMD
+ bool foundSIMDType = pInlineInfo->hasSIMDTypeArgLocalOrReturn;
+#endif // FEATURE_SIMD
+
+ /* We have typeless opcodes, get type information from the signature */
+
+ if (thisArg)
+ {
+ var_types sigType;
+
+ if (clsAttr & CORINFO_FLG_VALUECLASS)
+ {
+ sigType = TYP_BYREF;
+ }
+ else
+ {
+ sigType = TYP_REF;
+ }
+
+ lclVarInfo[0].lclVerTypeInfo = verMakeTypeInfo(pInlineInfo->inlineCandidateInfo->clsHandle);
+ lclVarInfo[0].lclHasLdlocaOp = false;
+
+#ifdef FEATURE_SIMD
+ // We always want to check isSIMDClass, since we want to set foundSIMDType (to increase
+ // the inlining multiplier) for anything in that assembly.
+ // But we only need to normalize it if it is a TYP_STRUCT
+ // (which we need to do even if we have already set foundSIMDType).
+ if ((!foundSIMDType || (sigType == TYP_STRUCT)) && isSIMDClass(&(lclVarInfo[0].lclVerTypeInfo)))
+ {
+ if (sigType == TYP_STRUCT)
+ {
+ sigType = impNormStructType(lclVarInfo[0].lclVerTypeInfo.GetClassHandle());
+ }
+ foundSIMDType = true;
+ }
+#endif // FEATURE_SIMD
+ lclVarInfo[0].lclTypeInfo = sigType;
+
+ assert(varTypeIsGC(thisArg->gtType) || // "this" is managed
+ (thisArg->gtType == TYP_I_IMPL && // "this" is unmgd but the method's class doesnt care
+ (clsAttr & CORINFO_FLG_VALUECLASS)));
+
+ if (genActualType(thisArg->gtType) != genActualType(sigType))
+ {
+ if (sigType == TYP_REF)
+ {
+ /* The argument cannot be bashed into a ref (see bug 750871) */
+ inlineResult->NoteFatal(InlineObservation::CALLSITE_ARG_NO_BASH_TO_REF);
+ return;
+ }
+
+ /* This can only happen with byrefs <-> ints/shorts */
+
+ assert(genActualType(sigType) == TYP_I_IMPL || sigType == TYP_BYREF);
+ assert(genActualType(thisArg->gtType) == TYP_I_IMPL || thisArg->gtType == TYP_BYREF);
+
+ if (sigType == TYP_BYREF)
+ {
+ lclVarInfo[0].lclVerTypeInfo = typeInfo(varType2tiType(TYP_I_IMPL));
+ }
+ else if (thisArg->gtType == TYP_BYREF)
+ {
+ assert(sigType == TYP_I_IMPL);
+
+ /* If possible change the BYREF to an int */
+ if (thisArg->IsVarAddr())
+ {
+ thisArg->gtType = TYP_I_IMPL;
+ lclVarInfo[0].lclVerTypeInfo = typeInfo(varType2tiType(TYP_I_IMPL));
+ }
+ else
+ {
+ /* Arguments 'int <- byref' cannot be bashed */
+ inlineResult->NoteFatal(InlineObservation::CALLSITE_ARG_NO_BASH_TO_INT);
+ return;
+ }
+ }
+ }
+ }
+
+ /* Init the types of the arguments and make sure the types
+ * from the trees match the types in the signature */
+
+ CORINFO_ARG_LIST_HANDLE argLst;
+ argLst = methInfo->args.args;
+
+ unsigned i;
+ for (i = (thisArg ? 1 : 0); i < argCnt; i++, argLst = info.compCompHnd->getArgNext(argLst))
+ {
+ var_types sigType = (var_types)eeGetArgType(argLst, &methInfo->args);
+
+ lclVarInfo[i].lclVerTypeInfo = verParseArgSigToTypeInfo(&methInfo->args, argLst);
+#ifdef FEATURE_SIMD
+ if ((!foundSIMDType || (sigType == TYP_STRUCT)) && isSIMDClass(&(lclVarInfo[i].lclVerTypeInfo)))
+ {
+ // If this is a SIMD class (i.e. in the SIMD assembly), then we will consider that we've
+ // found a SIMD type, even if this may not be a type we recognize (the assumption is that
+ // it is likely to use a SIMD type, and therefore we want to increase the inlining multiplier).
+ foundSIMDType = true;
+ if (sigType == TYP_STRUCT)
+ {
+ var_types structType = impNormStructType(lclVarInfo[i].lclVerTypeInfo.GetClassHandle());
+ sigType = structType;
+ }
+ }
+#endif // FEATURE_SIMD
+
+ lclVarInfo[i].lclTypeInfo = sigType;
+ lclVarInfo[i].lclHasLdlocaOp = false;
+
+ /* Does the tree type match the signature type? */
+
+ GenTreePtr inlArgNode = inlArgInfo[i].argNode;
+
+ if (sigType != inlArgNode->gtType)
+ {
+ /* In valid IL, this can only happen for short integer types or byrefs <-> [native] ints,
+ but in bad IL cases with caller-callee signature mismatches we can see other types.
+ Intentionally reject cases with mismatches so the jit is more flexible when
+ encountering bad IL. */
+
+ bool isPlausibleTypeMatch = (genActualType(sigType) == genActualType(inlArgNode->gtType)) ||
+ (genActualTypeIsIntOrI(sigType) && inlArgNode->gtType == TYP_BYREF) ||
+ (sigType == TYP_BYREF && genActualTypeIsIntOrI(inlArgNode->gtType));
+
+ if (!isPlausibleTypeMatch)
+ {
+ inlineResult->NoteFatal(InlineObservation::CALLSITE_ARG_TYPES_INCOMPATIBLE);
+ return;
+ }
+
+ /* Is it a narrowing or widening cast?
+ * Widening casts are ok since the value computed is already
+ * normalized to an int (on the IL stack) */
+
+ if (genTypeSize(inlArgNode->gtType) >= genTypeSize(sigType))
+ {
+ if (sigType == TYP_BYREF)
+ {
+ lclVarInfo[i].lclVerTypeInfo = typeInfo(varType2tiType(TYP_I_IMPL));
+ }
+ else if (inlArgNode->gtType == TYP_BYREF)
+ {
+ assert(varTypeIsIntOrI(sigType));
+
+ /* If possible bash the BYREF to an int */
+ if (inlArgNode->IsVarAddr())
+ {
+ inlArgNode->gtType = TYP_I_IMPL;
+ lclVarInfo[i].lclVerTypeInfo = typeInfo(varType2tiType(TYP_I_IMPL));
+ }
+ else
+ {
+ /* Arguments 'int <- byref' cannot be changed */
+ inlineResult->NoteFatal(InlineObservation::CALLSITE_ARG_NO_BASH_TO_INT);
+ return;
+ }
+ }
+ else if (genTypeSize(sigType) < EA_PTRSIZE)
+ {
+ /* Narrowing cast */
+
+ if (inlArgNode->gtOper == GT_LCL_VAR &&
+ !lvaTable[inlArgNode->gtLclVarCommon.gtLclNum].lvNormalizeOnLoad() &&
+ sigType == lvaGetRealType(inlArgNode->gtLclVarCommon.gtLclNum))
+ {
+ /* We don't need to insert a cast here as the variable
+ was assigned a normalized value of the right type */
+
+ continue;
+ }
+
+ inlArgNode = inlArgInfo[i].argNode = gtNewCastNode(TYP_INT, inlArgNode, sigType);
+
+ inlArgInfo[i].argIsLclVar = false;
+
+ /* Try to fold the node in case we have constant arguments */
+
+ if (inlArgInfo[i].argIsInvariant)
+ {
+ inlArgNode = gtFoldExprConst(inlArgNode);
+ inlArgInfo[i].argNode = inlArgNode;
+ assert(inlArgNode->OperIsConst());
+ }
+ }
+#ifdef _TARGET_64BIT_
+ else if (genTypeSize(genActualType(inlArgNode->gtType)) < genTypeSize(sigType))
+ {
+ // This should only happen for int -> native int widening
+ inlArgNode = inlArgInfo[i].argNode = gtNewCastNode(genActualType(sigType), inlArgNode, sigType);
+
+ inlArgInfo[i].argIsLclVar = false;
+
+ /* Try to fold the node in case we have constant arguments */
+
+ if (inlArgInfo[i].argIsInvariant)
+ {
+ inlArgNode = gtFoldExprConst(inlArgNode);
+ inlArgInfo[i].argNode = inlArgNode;
+ assert(inlArgNode->OperIsConst());
+ }
+ }
+#endif // _TARGET_64BIT_
+ }
+ }
+ }
+
+ /* Init the types of the local variables */
+
+ CORINFO_ARG_LIST_HANDLE localsSig;
+ localsSig = methInfo->locals.args;
+
+ for (i = 0; i < methInfo->locals.numArgs; i++)
+ {
+ bool isPinned;
+ var_types type = (var_types)eeGetArgType(localsSig, &methInfo->locals, &isPinned);
+
+ lclVarInfo[i + argCnt].lclHasLdlocaOp = false;
+ lclVarInfo[i + argCnt].lclTypeInfo = type;
+
+ if (isPinned)
+ {
+ inlineResult->NoteFatal(InlineObservation::CALLEE_HAS_PINNED_LOCALS);
+ return;
+ }
+
+ lclVarInfo[i + argCnt].lclVerTypeInfo = verParseArgSigToTypeInfo(&methInfo->locals, localsSig);
+
+ localsSig = info.compCompHnd->getArgNext(localsSig);
+
+#ifdef FEATURE_SIMD
+ if ((!foundSIMDType || (type == TYP_STRUCT)) && isSIMDClass(&(lclVarInfo[i + argCnt].lclVerTypeInfo)))
+ {
+ foundSIMDType = true;
+ if (featureSIMD && type == TYP_STRUCT)
+ {
+ var_types structType = impNormStructType(lclVarInfo[i + argCnt].lclVerTypeInfo.GetClassHandle());
+ lclVarInfo[i + argCnt].lclTypeInfo = structType;
+ }
+ }
+#endif // FEATURE_SIMD
+ }
+
+#ifdef FEATURE_SIMD
+ if (!foundSIMDType && (call->AsCall()->gtRetClsHnd != nullptr) && isSIMDClass(call->AsCall()->gtRetClsHnd))
+ {
+ foundSIMDType = true;
+ }
+ pInlineInfo->hasSIMDTypeArgLocalOrReturn = foundSIMDType;
+#endif // FEATURE_SIMD
+}
+
+unsigned Compiler::impInlineFetchLocal(unsigned lclNum DEBUGARG(const char* reason))
+{
+ assert(compIsForInlining());
+
+ unsigned tmpNum = impInlineInfo->lclTmpNum[lclNum];
+
+ if (tmpNum == BAD_VAR_NUM)
+ {
+ var_types lclTyp = impInlineInfo->lclVarInfo[lclNum + impInlineInfo->argCnt].lclTypeInfo;
+
+ // The lifetime of this local might span multiple BBs.
+ // So it is a long lifetime local.
+ impInlineInfo->lclTmpNum[lclNum] = tmpNum = lvaGrabTemp(false DEBUGARG(reason));
+
+ lvaTable[tmpNum].lvType = lclTyp;
+ if (impInlineInfo->lclVarInfo[lclNum + impInlineInfo->argCnt].lclHasLdlocaOp)
+ {
+ lvaTable[tmpNum].lvHasLdAddrOp = 1;
+ }
+
+ if (impInlineInfo->lclVarInfo[lclNum + impInlineInfo->argCnt].lclVerTypeInfo.IsStruct())
+ {
+ if (varTypeIsStruct(lclTyp))
+ {
+ lvaSetStruct(tmpNum,
+ impInlineInfo->lclVarInfo[lclNum + impInlineInfo->argCnt].lclVerTypeInfo.GetClassHandle(),
+ true /* unsafe value cls check */);
+ }
+ else
+ {
+ // This is a wrapped primitive. Make sure the verstate knows that
+ lvaTable[tmpNum].lvVerTypeInfo =
+ impInlineInfo->lclVarInfo[lclNum + impInlineInfo->argCnt].lclVerTypeInfo;
+ }
+ }
+ }
+
+ return tmpNum;
+}
+
+// A method used to return the GenTree (usually a GT_LCL_VAR) representing the arguments of the inlined method.
+// Only use this method for the arguments of the inlinee method.
+// !!! Do not use it for the locals of the inlinee method. !!!!
+
+GenTreePtr Compiler::impInlineFetchArg(unsigned lclNum, InlArgInfo* inlArgInfo, InlLclVarInfo* lclVarInfo)
+{
+ /* Get the argument type */
+ var_types lclTyp = lclVarInfo[lclNum].lclTypeInfo;
+
+ GenTreePtr op1 = nullptr;
+
+ // constant or address of local
+ if (inlArgInfo[lclNum].argIsInvariant && !inlArgInfo[lclNum].argHasLdargaOp && !inlArgInfo[lclNum].argHasStargOp)
+ {
+ /* Clone the constant. Note that we cannot directly use argNode
+ in the trees even if inlArgInfo[lclNum].argIsUsed==false as this
+ would introduce aliasing between inlArgInfo[].argNode and
+ impInlineExpr. Then gtFoldExpr() could change it, causing further
+ references to the argument working off of the bashed copy. */
+
+ op1 = gtCloneExpr(inlArgInfo[lclNum].argNode);
+ PREFIX_ASSUME(op1 != nullptr);
+ inlArgInfo[lclNum].argTmpNum = (unsigned)-1; // illegal temp
+ }
+ else if (inlArgInfo[lclNum].argIsLclVar && !inlArgInfo[lclNum].argHasLdargaOp && !inlArgInfo[lclNum].argHasStargOp)
+ {
+ /* Argument is a local variable (of the caller)
+ * Can we re-use the passed argument node? */
+
+ op1 = inlArgInfo[lclNum].argNode;
+ inlArgInfo[lclNum].argTmpNum = op1->gtLclVarCommon.gtLclNum;
+
+ if (inlArgInfo[lclNum].argIsUsed)
+ {
+ assert(op1->gtOper == GT_LCL_VAR);
+ assert(lclNum == op1->gtLclVar.gtLclILoffs);
+
+ if (!lvaTable[op1->gtLclVarCommon.gtLclNum].lvNormalizeOnLoad())
+ {
+ lclTyp = genActualType(lclTyp);
+ }
+
+ /* Create a new lcl var node - remember the argument lclNum */
+ op1 = gtNewLclvNode(op1->gtLclVarCommon.gtLclNum, lclTyp, op1->gtLclVar.gtLclILoffs);
+ }
+ }
+ else if (inlArgInfo[lclNum].argIsByRefToStructLocal && !inlArgInfo[lclNum].argHasStargOp)
+ {
+ /* Argument is a by-ref address to a struct, a normed struct, or its field.
+ In these cases, don't spill the byref to a local, simply clone the tree and use it.
+ This way we will increase the chance for this byref to be optimized away by
+ a subsequent "dereference" operation.
+
+ From Dev11 bug #139955: Argument node can also be TYP_I_IMPL if we've bashed the tree
+ (in impInlineInitVars()), if the arg has argHasLdargaOp as well as argIsByRefToStructLocal.
+ For example, if the caller is:
+ ldloca.s V_1 // V_1 is a local struct
+ call void Test.ILPart::RunLdargaOnPointerArg(int32*)
+ and the callee being inlined has:
+ .method public static void RunLdargaOnPointerArg(int32* ptrToInts) cil managed
+ ldarga.s ptrToInts
+ call void Test.FourInts::NotInlined_SetExpectedValuesThroughPointerToPointer(int32**)
+ then we change the argument tree (of "ldloca.s V_1") to TYP_I_IMPL to match the callee signature. We'll
+ soon afterwards reject the inlining anyway, since the tree we return isn't a GT_LCL_VAR.
+ */
+ assert(inlArgInfo[lclNum].argNode->TypeGet() == TYP_BYREF ||
+ inlArgInfo[lclNum].argNode->TypeGet() == TYP_I_IMPL);
+ op1 = gtCloneExpr(inlArgInfo[lclNum].argNode);
+ }
+ else
+ {
+ /* Argument is a complex expression - it must be evaluated into a temp */
+
+ if (inlArgInfo[lclNum].argHasTmp)
+ {
+ assert(inlArgInfo[lclNum].argIsUsed);
+ assert(inlArgInfo[lclNum].argTmpNum < lvaCount);
+
+ /* Create a new lcl var node - remember the argument lclNum */
+ op1 = gtNewLclvNode(inlArgInfo[lclNum].argTmpNum, genActualType(lclTyp));
+
+ /* This is the second or later use of the this argument,
+ so we have to use the temp (instead of the actual arg) */
+ inlArgInfo[lclNum].argBashTmpNode = nullptr;
+ }
+ else
+ {
+ /* First time use */
+ assert(inlArgInfo[lclNum].argIsUsed == false);
+
+ /* Reserve a temp for the expression.
+ * Use a large size node as we may change it later */
+
+ unsigned tmpNum = lvaGrabTemp(true DEBUGARG("Inlining Arg"));
+
+ lvaTable[tmpNum].lvType = lclTyp;
+ assert(lvaTable[tmpNum].lvAddrExposed == 0);
+ if (inlArgInfo[lclNum].argHasLdargaOp)
+ {
+ lvaTable[tmpNum].lvHasLdAddrOp = 1;
+ }
+
+ if (lclVarInfo[lclNum].lclVerTypeInfo.IsStruct())
+ {
+ if (varTypeIsStruct(lclTyp))
+ {
+ lvaSetStruct(tmpNum, impInlineInfo->lclVarInfo[lclNum].lclVerTypeInfo.GetClassHandle(),
+ true /* unsafe value cls check */);
+ }
+ else
+ {
+ // This is a wrapped primitive. Make sure the verstate knows that
+ lvaTable[tmpNum].lvVerTypeInfo = impInlineInfo->lclVarInfo[lclNum].lclVerTypeInfo;
+ }
+ }
+
+ inlArgInfo[lclNum].argHasTmp = true;
+ inlArgInfo[lclNum].argTmpNum = tmpNum;
+
+ // If we require strict exception order, then arguments must
+ // be evaluated in sequence before the body of the inlined method.
+ // So we need to evaluate them to a temp.
+ // Also, if arguments have global references, we need to
+ // evaluate them to a temp before the inlined body as the
+ // inlined body may be modifying the global ref.
+ // TODO-1stClassStructs: We currently do not reuse an existing lclVar
+ // if it is a struct, because it requires some additional handling.
+
+ if (!varTypeIsStruct(lclTyp) && (!inlArgInfo[lclNum].argHasSideEff) && (!inlArgInfo[lclNum].argHasGlobRef))
+ {
+ /* Get a *LARGE* LCL_VAR node */
+ op1 = gtNewLclLNode(tmpNum, genActualType(lclTyp), lclNum);
+
+ /* Record op1 as the very first use of this argument.
+ If there are no further uses of the arg, we may be
+ able to use the actual arg node instead of the temp.
+ If we do see any further uses, we will clear this. */
+ inlArgInfo[lclNum].argBashTmpNode = op1;
+ }
+ else
+ {
+ /* Get a small LCL_VAR node */
+ op1 = gtNewLclvNode(tmpNum, genActualType(lclTyp));
+ /* No bashing of this argument */
+ inlArgInfo[lclNum].argBashTmpNode = nullptr;
+ }
+ }
+ }
+
+ /* Mark the argument as used */
+
+ inlArgInfo[lclNum].argIsUsed = true;
+
+ return op1;
+}
+
+/******************************************************************************
+ Is this the original "this" argument to the call being inlined?
+
+ Note that we do not inline methods with "starg 0", and so we do not need to
+ worry about it.
+*/
+
+BOOL Compiler::impInlineIsThis(GenTreePtr tree, InlArgInfo* inlArgInfo)
+{
+ assert(compIsForInlining());
+ return (tree->gtOper == GT_LCL_VAR && tree->gtLclVarCommon.gtLclNum == inlArgInfo[0].argTmpNum);
+}
+
+//-----------------------------------------------------------------------------
+// This function checks if a dereference in the inlinee can guarantee that
+// the "this" is non-NULL.
+// If we haven't hit a branch or a side effect, and we are dereferencing
+// from 'this' to access a field or make GTF_CALL_NULLCHECK call,
+// then we can avoid a separate null pointer check.
+//
+// "additionalTreesToBeEvaluatedBefore"
+// is the set of pending trees that have not yet been added to the statement list,
+// and which have been removed from verCurrentState.esStack[]
+
+BOOL Compiler::impInlineIsGuaranteedThisDerefBeforeAnySideEffects(GenTreePtr additionalTreesToBeEvaluatedBefore,
+ GenTreePtr variableBeingDereferenced,
+ InlArgInfo* inlArgInfo)
+{
+ assert(compIsForInlining());
+ assert(opts.OptEnabled(CLFLG_INLINING));
+
+ BasicBlock* block = compCurBB;
+
+ GenTreePtr stmt;
+ GenTreePtr expr;
+
+ if (block != fgFirstBB)
+ {
+ return FALSE;
+ }
+
+ if (!impInlineIsThis(variableBeingDereferenced, inlArgInfo))
+ {
+ return FALSE;
+ }
+
+ if (additionalTreesToBeEvaluatedBefore &&
+ GTF_GLOBALLY_VISIBLE_SIDE_EFFECTS(additionalTreesToBeEvaluatedBefore->gtFlags))
+ {
+ return FALSE;
+ }
+
+ for (stmt = impTreeList->gtNext; stmt; stmt = stmt->gtNext)
+ {
+ expr = stmt->gtStmt.gtStmtExpr;
+
+ if (GTF_GLOBALLY_VISIBLE_SIDE_EFFECTS(expr->gtFlags))
+ {
+ return FALSE;
+ }
+ }
+
+ for (unsigned level = 0; level < verCurrentState.esStackDepth; level++)
+ {
+ unsigned stackTreeFlags = verCurrentState.esStack[level].val->gtFlags;
+ if (GTF_GLOBALLY_VISIBLE_SIDE_EFFECTS(stackTreeFlags))
+ {
+ return FALSE;
+ }
+ }
+
+ return TRUE;
+}
+
+/******************************************************************************/
+// Check the inlining eligibility of this GT_CALL node.
+// Mark GTF_CALL_INLINE_CANDIDATE on the GT_CALL node
+
+// Todo: find a way to record the failure reasons in the IR (or
+// otherwise build tree context) so when we do the inlining pass we
+// can capture these reasons
+
+void Compiler::impMarkInlineCandidate(GenTreePtr callNode,
+ CORINFO_CONTEXT_HANDLE exactContextHnd,
+ CORINFO_CALL_INFO* callInfo)
+{
+ // Let the strategy know there's another call
+ impInlineRoot()->m_inlineStrategy->NoteCall();
+
+ if (!opts.OptEnabled(CLFLG_INLINING))
+ {
+ /* XXX Mon 8/18/2008
+ * This assert is misleading. The caller does not ensure that we have CLFLG_INLINING set before
+ * calling impMarkInlineCandidate. However, if this assert trips it means that we're an inlinee and
+ * CLFLG_MINOPT is set. That doesn't make a lot of sense. If you hit this assert, work back and
+ * figure out why we did not set MAXOPT for this compile.
+ */
+ assert(!compIsForInlining());
+ return;
+ }
+
+ if (compIsForImportOnly())
+ {
+ // Don't bother creating the inline candidate during verification.
+ // Otherwise the call to info.compCompHnd->canInline will trigger a recursive verification
+ // that leads to the creation of multiple instances of Compiler.
+ return;
+ }
+
+ GenTreeCall* call = callNode->AsCall();
+ InlineResult inlineResult(this, call, nullptr, "impMarkInlineCandidate");
+
+ // Don't inline if not optimizing root method
+ if (opts.compDbgCode)
+ {
+ inlineResult.NoteFatal(InlineObservation::CALLER_DEBUG_CODEGEN);
+ return;
+ }
+
+ // Don't inline if inlining into root method is disabled.
+ if (InlineStrategy::IsNoInline(info.compCompHnd, info.compMethodHnd))
+ {
+ inlineResult.NoteFatal(InlineObservation::CALLER_IS_JIT_NOINLINE);
+ return;
+ }
+
+ // Inlining candidate determination needs to honor only IL tail prefix.
+ // Inlining takes precedence over implicit tail call optimization (if the call is not directly recursive).
+ if (call->IsTailPrefixedCall())
+ {
+ inlineResult.NoteFatal(InlineObservation::CALLSITE_EXPLICIT_TAIL_PREFIX);
+ return;
+ }
+
+ // Tail recursion elimination takes precedence over inlining.
+ // TODO: We may want to do some of the additional checks from fgMorphCall
+ // here to reduce the chance we don't inline a call that won't be optimized
+ // as a fast tail call or turned into a loop.
+ if (gtIsRecursiveCall(call) && call->IsImplicitTailCall())
+ {
+ inlineResult.NoteFatal(InlineObservation::CALLSITE_IMPLICIT_REC_TAIL_CALL);
+ return;
+ }
+
+ if ((call->gtFlags & GTF_CALL_VIRT_KIND_MASK) != GTF_CALL_NONVIRT)
+ {
+ inlineResult.NoteFatal(InlineObservation::CALLSITE_IS_NOT_DIRECT);
+ return;
+ }
+
+ /* Ignore helper calls */
+
+ if (call->gtCallType == CT_HELPER)
+ {
+ inlineResult.NoteFatal(InlineObservation::CALLSITE_IS_CALL_TO_HELPER);
+ return;
+ }
+
+ /* Ignore indirect calls */
+ if (call->gtCallType == CT_INDIRECT)
+ {
+ inlineResult.NoteFatal(InlineObservation::CALLSITE_IS_NOT_DIRECT_MANAGED);
+ return;
+ }
+
+ /* I removed the check for BBJ_THROW. BBJ_THROW is usually marked as rarely run. This more or less
+ * restricts the inliner to non-expanding inlines. I removed the check to allow for non-expanding
+ * inlining in throw blocks. I should consider the same thing for catch and filter regions. */
+
+ CORINFO_METHOD_HANDLE fncHandle = call->gtCallMethHnd;
+ unsigned methAttr;
+
+ // Reuse method flags from the original callInfo if possible
+ if (fncHandle == callInfo->hMethod)
+ {
+ methAttr = callInfo->methodFlags;
+ }
+ else
+ {
+ methAttr = info.compCompHnd->getMethodAttribs(fncHandle);
+ }
+
+#ifdef DEBUG
+ if (compStressCompile(STRESS_FORCE_INLINE, 0))
+ {
+ methAttr |= CORINFO_FLG_FORCEINLINE;
+ }
+#endif
+
+ // Check for COMPlus_AggressiveInlining
+ if (compDoAggressiveInlining)
+ {
+ methAttr |= CORINFO_FLG_FORCEINLINE;
+ }
+
+ if (!(methAttr & CORINFO_FLG_FORCEINLINE))
+ {
+ /* Don't bother inline blocks that are in the filter region */
+ if (bbInCatchHandlerILRange(compCurBB))
+ {
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nWill not inline blocks that are in the catch handler region\n");
+ }
+
+#endif
+
+ inlineResult.NoteFatal(InlineObservation::CALLSITE_IS_WITHIN_CATCH);
+ return;
+ }
+
+ if (bbInFilterILRange(compCurBB))
+ {
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nWill not inline blocks that are in the filter region\n");
+ }
+#endif
+
+ inlineResult.NoteFatal(InlineObservation::CALLSITE_IS_WITHIN_FILTER);
+ return;
+ }
+ }
+
+ /* If the caller's stack frame is marked, then we can't do any inlining. Period. */
+
+ if (opts.compNeedSecurityCheck)
+ {
+ inlineResult.NoteFatal(InlineObservation::CALLER_NEEDS_SECURITY_CHECK);
+ return;
+ }
+
+ /* Check if we tried to inline this method before */
+
+ if (methAttr & CORINFO_FLG_DONT_INLINE)
+ {
+ inlineResult.NoteFatal(InlineObservation::CALLEE_IS_NOINLINE);
+ return;
+ }
+
+ /* Cannot inline synchronized methods */
+
+ if (methAttr & CORINFO_FLG_SYNCH)
+ {
+ inlineResult.NoteFatal(InlineObservation::CALLEE_IS_SYNCHRONIZED);
+ return;
+ }
+
+ /* Do not inline if callee needs security checks (since they would then mark the wrong frame) */
+
+ if (methAttr & CORINFO_FLG_SECURITYCHECK)
+ {
+ inlineResult.NoteFatal(InlineObservation::CALLEE_NEEDS_SECURITY_CHECK);
+ return;
+ }
+
+ InlineCandidateInfo* inlineCandidateInfo = nullptr;
+ impCheckCanInline(call, fncHandle, methAttr, exactContextHnd, &inlineCandidateInfo, &inlineResult);
+
+ if (inlineResult.IsFailure())
+ {
+ return;
+ }
+
+ // The old value should be NULL
+ assert(call->gtInlineCandidateInfo == nullptr);
+
+ call->gtInlineCandidateInfo = inlineCandidateInfo;
+
+ // Mark the call node as inline candidate.
+ call->gtFlags |= GTF_CALL_INLINE_CANDIDATE;
+
+ // Let the strategy know there's another candidate.
+ impInlineRoot()->m_inlineStrategy->NoteCandidate();
+
+ // Since we're not actually inlining yet, and this call site is
+ // still just an inline candidate, there's nothing to report.
+ inlineResult.SetReported();
+}
+
+/******************************************************************************/
+// Returns true if the given intrinsic will be implemented by target-specific
+// instructions
+
+bool Compiler::IsTargetIntrinsic(CorInfoIntrinsics intrinsicId)
+{
+#if defined(_TARGET_AMD64_)
+ switch (intrinsicId)
+ {
+ // Amd64 only has SSE2 instruction to directly compute sqrt/abs.
+ case CORINFO_INTRINSIC_Sqrt:
+ case CORINFO_INTRINSIC_Abs:
+ return true;
+
+ default:
+ return false;
+ }
+#elif defined(_TARGET_ARM64_)
+ switch (intrinsicId)
+ {
+ case CORINFO_INTRINSIC_Sqrt:
+ case CORINFO_INTRINSIC_Abs:
+ case CORINFO_INTRINSIC_Round:
+ return true;
+
+ default:
+ return false;
+ }
+#elif defined(_TARGET_ARM_)
+ switch (intrinsicId)
+ {
+ case CORINFO_INTRINSIC_Sqrt:
+ case CORINFO_INTRINSIC_Abs:
+ case CORINFO_INTRINSIC_Round:
+ return true;
+
+ default:
+ return false;
+ }
+#elif defined(_TARGET_X86_)
+ switch (intrinsicId)
+ {
+ case CORINFO_INTRINSIC_Sin:
+ case CORINFO_INTRINSIC_Cos:
+ case CORINFO_INTRINSIC_Sqrt:
+ case CORINFO_INTRINSIC_Abs:
+ case CORINFO_INTRINSIC_Round:
+ return true;
+
+ default:
+ return false;
+ }
+#else
+ // TODO: This portion of logic is not implemented for other arch.
+ // The reason for returning true is that on all other arch the only intrinsic
+ // enabled are target intrinsics.
+ return true;
+#endif //_TARGET_AMD64_
+}
+
+/******************************************************************************/
+// Returns true if the given intrinsic will be implemented by calling System.Math
+// methods.
+
+bool Compiler::IsIntrinsicImplementedByUserCall(CorInfoIntrinsics intrinsicId)
+{
+ // Currently, if an math intrisic is not implemented by target-specific
+ // intructions, it will be implemented by a System.Math call. In the
+ // future, if we turn to implementing some of them with helper callers,
+ // this predicate needs to be revisited.
+ return !IsTargetIntrinsic(intrinsicId);
+}
+
+bool Compiler::IsMathIntrinsic(CorInfoIntrinsics intrinsicId)
+{
+ switch (intrinsicId)
+ {
+ case CORINFO_INTRINSIC_Sin:
+ case CORINFO_INTRINSIC_Sqrt:
+ case CORINFO_INTRINSIC_Abs:
+ case CORINFO_INTRINSIC_Cos:
+ case CORINFO_INTRINSIC_Round:
+ case CORINFO_INTRINSIC_Cosh:
+ case CORINFO_INTRINSIC_Sinh:
+ case CORINFO_INTRINSIC_Tan:
+ case CORINFO_INTRINSIC_Tanh:
+ case CORINFO_INTRINSIC_Asin:
+ case CORINFO_INTRINSIC_Acos:
+ case CORINFO_INTRINSIC_Atan:
+ case CORINFO_INTRINSIC_Atan2:
+ case CORINFO_INTRINSIC_Log10:
+ case CORINFO_INTRINSIC_Pow:
+ case CORINFO_INTRINSIC_Exp:
+ case CORINFO_INTRINSIC_Ceiling:
+ case CORINFO_INTRINSIC_Floor:
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool Compiler::IsMathIntrinsic(GenTreePtr tree)
+{
+ return (tree->OperGet() == GT_INTRINSIC) && IsMathIntrinsic(tree->gtIntrinsic.gtIntrinsicId);
+}
+/*****************************************************************************/
diff --git a/src/jit/inline.cpp b/src/jit/inline.cpp
new file mode 100644
index 0000000000..deccc0e84b
--- /dev/null
+++ b/src/jit/inline.cpp
@@ -0,0 +1,1640 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+#include "inlinepolicy.h"
+
+// Lookup table for inline description strings
+
+static const char* InlineDescriptions[] = {
+#define INLINE_OBSERVATION(name, type, description, impact, target) description,
+#include "inline.def"
+#undef INLINE_OBSERVATION
+};
+
+// Lookup table for inline targets
+
+static const InlineTarget InlineTargets[] = {
+#define INLINE_OBSERVATION(name, type, description, impact, target) InlineTarget::target,
+#include "inline.def"
+#undef INLINE_OBSERVATION
+};
+
+// Lookup table for inline impacts
+
+static const InlineImpact InlineImpacts[] = {
+#define INLINE_OBSERVATION(name, type, description, impact, target) InlineImpact::impact,
+#include "inline.def"
+#undef INLINE_OBSERVATION
+};
+
+#ifdef DEBUG
+
+//------------------------------------------------------------------------
+// InlIsValidObservation: run a validity check on an inline observation
+//
+// Arguments:
+// obs - the observation in question
+//
+// Return Value:
+// true if the observation is valid
+
+bool InlIsValidObservation(InlineObservation obs)
+{
+ return ((obs > InlineObservation::CALLEE_UNUSED_INITIAL) && (obs < InlineObservation::CALLEE_UNUSED_FINAL));
+}
+
+#endif // DEBUG
+
+//------------------------------------------------------------------------
+// InlGetObservationString: get a string describing this inline observation
+//
+// Arguments:
+// obs - the observation in question
+//
+// Return Value:
+// string describing the observation
+
+const char* InlGetObservationString(InlineObservation obs)
+{
+ assert(InlIsValidObservation(obs));
+ return InlineDescriptions[static_cast<int>(obs)];
+}
+
+//------------------------------------------------------------------------
+// InlGetTarget: get the target of an inline observation
+//
+// Arguments:
+// obs - the observation in question
+//
+// Return Value:
+// enum describing the target
+
+InlineTarget InlGetTarget(InlineObservation obs)
+{
+ assert(InlIsValidObservation(obs));
+ return InlineTargets[static_cast<int>(obs)];
+}
+
+//------------------------------------------------------------------------
+// InlGetTargetString: get a string describing the target of an inline observation
+//
+// Arguments:
+// obs - the observation in question
+//
+// Return Value:
+// string describing the target
+
+const char* InlGetTargetString(InlineObservation obs)
+{
+ InlineTarget t = InlGetTarget(obs);
+ switch (t)
+ {
+ case InlineTarget::CALLER:
+ return "caller";
+ case InlineTarget::CALLEE:
+ return "callee";
+ case InlineTarget::CALLSITE:
+ return "call site";
+ default:
+ return "unexpected target";
+ }
+}
+
+//------------------------------------------------------------------------
+// InlGetImpact: get the impact of an inline observation
+//
+// Arguments:
+// obs - the observation in question
+//
+// Return Value:
+// enum value describing the impact
+
+InlineImpact InlGetImpact(InlineObservation obs)
+{
+ assert(InlIsValidObservation(obs));
+ return InlineImpacts[static_cast<int>(obs)];
+}
+
+//------------------------------------------------------------------------
+// InlGetImpactString: get a string describing the impact of an inline observation
+//
+// Arguments:
+// obs - the observation in question
+//
+// Return Value:
+// string describing the impact
+
+const char* InlGetImpactString(InlineObservation obs)
+{
+ InlineImpact i = InlGetImpact(obs);
+ switch (i)
+ {
+ case InlineImpact::FATAL:
+ return "correctness -- fatal";
+ case InlineImpact::FUNDAMENTAL:
+ return "correctness -- fundamental limitation";
+ case InlineImpact::LIMITATION:
+ return "correctness -- jit limitation";
+ case InlineImpact::PERFORMANCE:
+ return "performance";
+ case InlineImpact::INFORMATION:
+ return "information";
+ default:
+ return "unexpected impact";
+ }
+}
+
+//------------------------------------------------------------------------
+// InlGetCorInfoInlineDecision: translate decision into a CorInfoInline
+//
+// Arguments:
+// d - the decision in question
+//
+// Return Value:
+// CorInfoInline value representing the decision
+
+CorInfoInline InlGetCorInfoInlineDecision(InlineDecision d)
+{
+ switch (d)
+ {
+ case InlineDecision::SUCCESS:
+ return INLINE_PASS;
+ case InlineDecision::FAILURE:
+ return INLINE_FAIL;
+ case InlineDecision::NEVER:
+ return INLINE_NEVER;
+ default:
+ assert(!"Unexpected InlineDecision");
+ unreached();
+ }
+}
+
+//------------------------------------------------------------------------
+// InlGetDecisionString: get a string representing this decision
+//
+// Arguments:
+// d - the decision in question
+//
+// Return Value:
+// string representing the decision
+
+const char* InlGetDecisionString(InlineDecision d)
+{
+ switch (d)
+ {
+ case InlineDecision::SUCCESS:
+ return "success";
+ case InlineDecision::FAILURE:
+ return "failed this call site";
+ case InlineDecision::NEVER:
+ return "failed this callee";
+ case InlineDecision::CANDIDATE:
+ return "candidate";
+ case InlineDecision::UNDECIDED:
+ return "undecided";
+ default:
+ assert(!"Unexpected InlineDecision");
+ unreached();
+ }
+}
+
+//------------------------------------------------------------------------
+// InlDecisionIsFailure: check if this decision describes a failing inline
+//
+// Arguments:
+// d - the decision in question
+//
+// Return Value:
+// true if the inline is definitely a failure
+
+bool InlDecisionIsFailure(InlineDecision d)
+{
+ switch (d)
+ {
+ case InlineDecision::SUCCESS:
+ case InlineDecision::UNDECIDED:
+ case InlineDecision::CANDIDATE:
+ return false;
+ case InlineDecision::FAILURE:
+ case InlineDecision::NEVER:
+ return true;
+ default:
+ assert(!"Unexpected InlineDecision");
+ unreached();
+ }
+}
+
+//------------------------------------------------------------------------
+// InlDecisionIsSuccess: check if this decision describes a sucessful inline
+//
+// Arguments:
+// d - the decision in question
+//
+// Return Value:
+// true if the inline is definitely a success
+
+bool InlDecisionIsSuccess(InlineDecision d)
+{
+ switch (d)
+ {
+ case InlineDecision::SUCCESS:
+ return true;
+ case InlineDecision::FAILURE:
+ case InlineDecision::NEVER:
+ case InlineDecision::UNDECIDED:
+ case InlineDecision::CANDIDATE:
+ return false;
+ default:
+ assert(!"Unexpected InlineDecision");
+ unreached();
+ }
+}
+
+//------------------------------------------------------------------------
+// InlDecisionIsNever: check if this decision describes a never inline
+//
+// Arguments:
+// d - the decision in question
+//
+// Return Value:
+// true if the inline is a never inline case
+
+bool InlDecisionIsNever(InlineDecision d)
+{
+ switch (d)
+ {
+ case InlineDecision::NEVER:
+ return true;
+ case InlineDecision::FAILURE:
+ case InlineDecision::SUCCESS:
+ case InlineDecision::UNDECIDED:
+ case InlineDecision::CANDIDATE:
+ return false;
+ default:
+ assert(!"Unexpected InlineDecision");
+ unreached();
+ }
+}
+
+//------------------------------------------------------------------------
+// InlDecisionIsCandidate: check if this decision describes a viable candidate
+//
+// Arguments:
+// d - the decision in question
+//
+// Return Value:
+// true if this inline still might happen
+
+bool InlDecisionIsCandidate(InlineDecision d)
+{
+ return !InlDecisionIsFailure(d);
+}
+
+//------------------------------------------------------------------------
+// InlDecisionIsDecided: check if this decision has been made
+//
+// Arguments:
+// d - the decision in question
+//
+// Return Value:
+// true if this inline has been decided one way or another
+
+bool InlDecisionIsDecided(InlineDecision d)
+{
+ switch (d)
+ {
+ case InlineDecision::NEVER:
+ case InlineDecision::FAILURE:
+ case InlineDecision::SUCCESS:
+ return true;
+ case InlineDecision::UNDECIDED:
+ case InlineDecision::CANDIDATE:
+ return false;
+ default:
+ assert(!"Unexpected InlineDecision");
+ unreached();
+ }
+}
+
+//------------------------------------------------------------------------
+// InlineContext: default constructor
+
+InlineContext::InlineContext(InlineStrategy* strategy)
+ : m_InlineStrategy(strategy)
+ , m_Parent(nullptr)
+ , m_Child(nullptr)
+ , m_Sibling(nullptr)
+ , m_Code(nullptr)
+ , m_ILSize(0)
+ , m_Offset(BAD_IL_OFFSET)
+ , m_Observation(InlineObservation::CALLEE_UNUSED_INITIAL)
+ , m_CodeSizeEstimate(0)
+ , m_Success(true)
+#if defined(DEBUG) || defined(INLINE_DATA)
+ , m_Policy(nullptr)
+ , m_Callee(nullptr)
+ , m_TreeID(0)
+ , m_Ordinal(0)
+#endif // defined(DEBUG) || defined(INLINE_DATA)
+{
+ // Empty
+}
+
+#if defined(DEBUG) || defined(INLINE_DATA)
+
+//------------------------------------------------------------------------
+// Dump: Dump an InlineContext entry and all descendants to jitstdout
+//
+// Arguments:
+// indent - indentation level for this node
+
+void InlineContext::Dump(unsigned indent)
+{
+ // Handle fact that siblings are in reverse order.
+ if (m_Sibling != nullptr)
+ {
+ m_Sibling->Dump(indent);
+ }
+
+ // We may not know callee name in some of the failing cases
+ Compiler* compiler = m_InlineStrategy->GetCompiler();
+ const char* calleeName = nullptr;
+
+ if (m_Callee == nullptr)
+ {
+ assert(!m_Success);
+ calleeName = "<unknown>";
+ }
+ else
+ {
+
+#if defined(DEBUG)
+ calleeName = compiler->eeGetMethodFullName(m_Callee);
+#else
+ calleeName = "callee";
+#endif // defined(DEBUG)
+ }
+
+ mdMethodDef calleeToken = compiler->info.compCompHnd->getMethodDefFromMethod(m_Callee);
+
+ // Dump this node
+ if (m_Parent == nullptr)
+ {
+ // Root method
+ printf("Inlines into %08X %s\n", calleeToken, calleeName);
+ }
+ else
+ {
+ // Inline attempt.
+ const char* inlineReason = InlGetObservationString(m_Observation);
+ const char* inlineResult = m_Success ? "" : "FAILED: ";
+
+ if (m_Offset == BAD_IL_OFFSET)
+ {
+ printf("%*s[%u IL=???? TR=%06u %08X] [%s%s] %s\n", indent, "", m_Ordinal, m_TreeID, calleeToken,
+ inlineResult, inlineReason, calleeName);
+ }
+ else
+ {
+ IL_OFFSET offset = jitGetILoffs(m_Offset);
+ printf("%*s[%u IL=%04d TR=%06u %08X] [%s%s] %s\n", indent, "", m_Ordinal, offset, m_TreeID, calleeToken,
+ inlineResult, inlineReason, calleeName);
+ }
+ }
+
+ // Recurse to first child
+ if (m_Child != nullptr)
+ {
+ m_Child->Dump(indent + 2);
+ }
+}
+
+//------------------------------------------------------------------------
+// DumpData: Dump a successful InlineContext entry, detailed data, and
+// any successful descendant inlines
+//
+// Arguments:
+// indent - indentation level for this node
+
+void InlineContext::DumpData(unsigned indent)
+{
+ // Handle fact that siblings are in reverse order.
+ if (m_Sibling != nullptr)
+ {
+ m_Sibling->DumpData(indent);
+ }
+
+ Compiler* compiler = m_InlineStrategy->GetCompiler();
+
+#if defined(DEBUG)
+ const char* calleeName = compiler->eeGetMethodFullName(m_Callee);
+#else
+ const char* calleeName = "callee";
+#endif // defined(DEBUG)
+
+ if (m_Parent == nullptr)
+ {
+ // Root method... cons up a policy so we can display the name
+ InlinePolicy* policy = InlinePolicy::GetPolicy(compiler, true);
+ printf("\nInlines [%u] into \"%s\" [%s]\n", m_InlineStrategy->GetInlineCount(), calleeName, policy->GetName());
+ }
+ else if (m_Success)
+ {
+ const char* inlineReason = InlGetObservationString(m_Observation);
+ printf("%*s%u,\"%s\",\"%s\"", indent, "", m_Ordinal, inlineReason, calleeName);
+ m_Policy->DumpData(jitstdout);
+ printf("\n");
+ }
+
+ // Recurse to first child
+ if (m_Child != nullptr)
+ {
+ m_Child->DumpData(indent + 2);
+ }
+}
+
+//------------------------------------------------------------------------
+// DumpXml: Dump an InlineContext entry and all descendants in xml format
+//
+// Arguments:
+// file - file for output
+// indent - indentation level for this node
+
+void InlineContext::DumpXml(FILE* file, unsigned indent)
+{
+ // Handle fact that siblings are in reverse order.
+ if (m_Sibling != nullptr)
+ {
+ m_Sibling->DumpXml(file, indent);
+ }
+
+ const bool isRoot = m_Parent == nullptr;
+ const bool hasChild = m_Child != nullptr;
+ const char* inlineType = m_Success ? "Inline" : "FailedInline";
+ unsigned newIndent = indent;
+
+ if (!isRoot)
+ {
+ Compiler* compiler = m_InlineStrategy->GetCompiler();
+
+ mdMethodDef calleeToken = compiler->info.compCompHnd->getMethodDefFromMethod(m_Callee);
+ unsigned calleeHash = compiler->info.compCompHnd->getMethodHash(m_Callee);
+
+ const char* inlineReason = InlGetObservationString(m_Observation);
+
+ int offset = -1;
+ if (m_Offset != BAD_IL_OFFSET)
+ {
+ offset = (int)jitGetILoffs(m_Offset);
+ }
+
+ fprintf(file, "%*s<%s>\n", indent, "", inlineType);
+ fprintf(file, "%*s<Token>%u</Token>\n", indent + 2, "", calleeToken);
+ fprintf(file, "%*s<Hash>%u</Hash>\n", indent + 2, "", calleeHash);
+ fprintf(file, "%*s<Offset>%u</Offset>\n", indent + 2, "", offset);
+ fprintf(file, "%*s<Reason>%s</Reason>\n", indent + 2, "", inlineReason);
+
+ // Optionally, dump data about the last inline
+ if ((JitConfig.JitInlineDumpData() != 0) && (this == m_InlineStrategy->GetLastContext()))
+ {
+ fprintf(file, "%*s<Data>", indent + 2, "");
+ m_InlineStrategy->DumpDataContents(file);
+ fprintf(file, "</Data>\n");
+ }
+
+ newIndent = indent + 2;
+ }
+
+ // Handle children
+
+ if (hasChild)
+ {
+ fprintf(file, "%*s<Inlines>\n", newIndent, "");
+ m_Child->DumpXml(file, newIndent + 2);
+ fprintf(file, "%*s</Inlines>\n", newIndent, "");
+ }
+ else
+ {
+ fprintf(file, "%*s<Inlines />\n", newIndent, "");
+ }
+
+ // Close out
+
+ if (!isRoot)
+ {
+ fprintf(file, "%*s</%s>\n", indent, "", inlineType);
+ }
+}
+
+#endif // defined(DEBUG) || defined(INLINE_DATA)
+
+//------------------------------------------------------------------------
+// InlineResult: Construct an InlineResult to evaluate a particular call
+// for inlining.
+//
+// Arguments:
+// compiler - the compiler instance examining a call for inlining
+// call - the call in question
+// stmt - statement containing the call (if known)
+// description - string describing the context of the decision
+
+InlineResult::InlineResult(Compiler* compiler, GenTreeCall* call, GenTreeStmt* stmt, const char* description)
+ : m_RootCompiler(nullptr)
+ , m_Policy(nullptr)
+ , m_Call(call)
+ , m_InlineContext(nullptr)
+ , m_Caller(nullptr)
+ , m_Callee(nullptr)
+ , m_Description(description)
+ , m_Reported(false)
+{
+ // Set the compiler instance
+ m_RootCompiler = compiler->impInlineRoot();
+
+ // Set the policy
+ const bool isPrejitRoot = false;
+ m_Policy = InlinePolicy::GetPolicy(m_RootCompiler, isPrejitRoot);
+
+ // Pass along some optional information to the policy.
+ if (stmt != nullptr)
+ {
+ m_InlineContext = stmt->gtInlineContext;
+ m_Policy->NoteContext(m_InlineContext);
+
+#if defined(DEBUG) || defined(INLINE_DATA)
+ m_Policy->NoteOffset(call->gtRawILOffset);
+#else
+ m_Policy->NoteOffset(stmt->gtStmtILoffsx);
+#endif // defined(DEBUG) || defined(INLINE_DATA)
+ }
+
+ // Get method handle for caller. Note we use the
+ // handle for the "immediate" caller here.
+ m_Caller = compiler->info.compMethodHnd;
+
+ // Get method handle for callee, if known
+ if (m_Call->gtCall.gtCallType == CT_USER_FUNC)
+ {
+ m_Callee = m_Call->gtCall.gtCallMethHnd;
+ }
+}
+
+//------------------------------------------------------------------------
+// InlineResult: Construct an InlineResult to evaluate a particular
+// method as a possible inline candidate, while prejtting.
+//
+// Arguments:
+// compiler - the compiler instance doing the prejitting
+// method - the method in question
+// description - string describing the context of the decision
+//
+// Notes:
+// Used only during prejitting to try and pre-identify methods that
+// cannot be inlined, to help subsequent jit throughput.
+//
+// We use the inlCallee member to track the method since logically
+// it is the callee here.
+
+InlineResult::InlineResult(Compiler* compiler, CORINFO_METHOD_HANDLE method, const char* description)
+ : m_RootCompiler(nullptr)
+ , m_Policy(nullptr)
+ , m_Call(nullptr)
+ , m_InlineContext(nullptr)
+ , m_Caller(nullptr)
+ , m_Callee(method)
+ , m_Description(description)
+ , m_Reported(false)
+{
+ // Set the compiler instance
+ m_RootCompiler = compiler->impInlineRoot();
+
+ // Set the policy
+ const bool isPrejitRoot = true;
+ m_Policy = InlinePolicy::GetPolicy(m_RootCompiler, isPrejitRoot);
+}
+
+//------------------------------------------------------------------------
+// Report: Dump, log, and report information about an inline decision.
+//
+// Notes:
+// Called (automatically via the InlineResult dtor) when the
+// inliner is done evaluating a candidate.
+//
+// Dumps state of the inline candidate, and if a decision was
+// reached, sends it to the log and reports the decision back to the
+// EE. Optionally update the method attribute to NOINLINE if
+// observation and policy warrant.
+//
+// All this can be suppressed if desired by calling setReported()
+// before the InlineResult goes out of scope.
+
+void InlineResult::Report()
+{
+ // If we weren't actually inlining, user may have suppressed
+ // reporting via setReported(). If so, do nothing.
+ if (m_Reported)
+ {
+ return;
+ }
+
+ m_Reported = true;
+
+#ifdef DEBUG
+ const char* callee = nullptr;
+
+ // Optionally dump the result
+ if (VERBOSE)
+ {
+ const char* format = "INLINER: during '%s' result '%s' reason '%s' for '%s' calling '%s'\n";
+ const char* caller = (m_Caller == nullptr) ? "n/a" : m_RootCompiler->eeGetMethodFullName(m_Caller);
+
+ callee = (m_Callee == nullptr) ? "n/a" : m_RootCompiler->eeGetMethodFullName(m_Callee);
+
+ JITDUMP(format, m_Description, ResultString(), ReasonString(), caller, callee);
+ }
+
+ // If the inline failed, leave information on the call so we can
+ // later recover what observation lead to the failure.
+ if (IsFailure() && (m_Call != nullptr))
+ {
+ // compiler should have revoked candidacy on the call by now
+ assert((m_Call->gtFlags & GTF_CALL_INLINE_CANDIDATE) == 0);
+
+ m_Call->gtInlineObservation = m_Policy->GetObservation();
+ }
+
+#endif // DEBUG
+
+ // Was the result NEVER? If so we might want to propagate this to
+ // the runtime.
+
+ if (IsNever() && m_Policy->PropagateNeverToRuntime())
+ {
+ // If we know the callee, and if the observation that got us
+ // to this Never inline state is something *other* than
+ // IS_NOINLINE, then we've uncovered a reason why this method
+ // can't ever be inlined. Update the callee method attributes
+ // so that future inline attempts for this callee fail faster.
+
+ InlineObservation obs = m_Policy->GetObservation();
+
+ if ((m_Callee != nullptr) && (obs != InlineObservation::CALLEE_IS_NOINLINE))
+ {
+
+#ifdef DEBUG
+
+ if (VERBOSE)
+ {
+ const char* obsString = InlGetObservationString(obs);
+ JITDUMP("\nINLINER: Marking %s as NOINLINE because of %s\n", callee, obsString);
+ }
+
+#endif // DEBUG
+
+ COMP_HANDLE comp = m_RootCompiler->info.compCompHnd;
+ comp->setMethodAttribs(m_Callee, CORINFO_FLG_BAD_INLINEE);
+ }
+ }
+
+ if (IsDecided())
+ {
+ const char* format = "INLINER: during '%s' result '%s' reason '%s'\n";
+ JITLOG_THIS(m_RootCompiler, (LL_INFO100000, format, m_Description, ResultString(), ReasonString()));
+ COMP_HANDLE comp = m_RootCompiler->info.compCompHnd;
+ comp->reportInliningDecision(m_Caller, m_Callee, Result(), ReasonString());
+ }
+}
+
+//------------------------------------------------------------------------
+// InlineStrategy construtor
+//
+// Arguments
+// compiler - root compiler instance
+
+InlineStrategy::InlineStrategy(Compiler* compiler)
+ : m_Compiler(compiler)
+ , m_RootContext(nullptr)
+ , m_LastSuccessfulPolicy(nullptr)
+ , m_CallCount(0)
+ , m_CandidateCount(0)
+ , m_AlwaysCandidateCount(0)
+ , m_ForceCandidateCount(0)
+ , m_DiscretionaryCandidateCount(0)
+ , m_UnprofitableCandidateCount(0)
+ , m_ImportCount(0)
+ , m_InlineCount(0)
+ , m_MaxInlineSize(DEFAULT_MAX_INLINE_SIZE)
+ , m_MaxInlineDepth(DEFAULT_MAX_INLINE_DEPTH)
+ , m_InitialTimeBudget(0)
+ , m_InitialTimeEstimate(0)
+ , m_CurrentTimeBudget(0)
+ , m_CurrentTimeEstimate(0)
+ , m_InitialSizeEstimate(0)
+ , m_CurrentSizeEstimate(0)
+ , m_HasForceViaDiscretionary(false)
+#if defined(DEBUG) || defined(INLINE_DATA)
+ , m_MethodXmlFilePosition(0)
+#endif // defined(DEBUG) || defined(INLINE_DATA)
+
+{
+ // Verify compiler is a root compiler instance
+ assert(m_Compiler->impInlineRoot() == m_Compiler);
+
+#ifdef DEBUG
+
+ // Possibly modify the max inline size.
+ //
+ // Default value of JitInlineSize is the same as our default.
+ // So normally this next line does not change the size.
+ m_MaxInlineSize = JitConfig.JitInlineSize();
+
+ // Up the max size under stress
+ if (m_Compiler->compInlineStress())
+ {
+ m_MaxInlineSize *= 10;
+ }
+
+ // But don't overdo it
+ if (m_MaxInlineSize > IMPLEMENTATION_MAX_INLINE_SIZE)
+ {
+ m_MaxInlineSize = IMPLEMENTATION_MAX_INLINE_SIZE;
+ }
+
+ // Verify: not too small, not too big.
+ assert(m_MaxInlineSize >= ALWAYS_INLINE_SIZE);
+ assert(m_MaxInlineSize <= IMPLEMENTATION_MAX_INLINE_SIZE);
+
+ // Possibly modify the max inline depth
+ //
+ // Default value of JitInlineDepth is the same as our default.
+ // So normally this next line does not change the size.
+ m_MaxInlineDepth = JitConfig.JitInlineDepth();
+
+ // But don't overdo it
+ if (m_MaxInlineDepth > IMPLEMENTATION_MAX_INLINE_DEPTH)
+ {
+ m_MaxInlineDepth = IMPLEMENTATION_MAX_INLINE_DEPTH;
+ }
+
+#endif // DEBUG
+}
+
+//------------------------------------------------------------------------
+// GetRootContext: get the InlineContext for the root method
+//
+// Return Value:
+// Root context; describes the method being jitted.
+//
+// Note:
+// Also initializes the jit time estimate and budget.
+
+InlineContext* InlineStrategy::GetRootContext()
+{
+ if (m_RootContext == nullptr)
+ {
+ // Allocate on first demand.
+ m_RootContext = NewRoot();
+
+ // Estimate how long the jit will take if there's no inlining
+ // done to this method.
+ m_InitialTimeEstimate = EstimateTime(m_RootContext);
+ m_CurrentTimeEstimate = m_InitialTimeEstimate;
+
+ // Set the initial budget for inlining. Note this is
+ // deliberately set very high and is intended to catch
+ // only pathological runaway inline cases.
+ m_InitialTimeBudget = BUDGET * m_InitialTimeEstimate;
+ m_CurrentTimeBudget = m_InitialTimeBudget;
+
+ // Estimate the code size if there's no inlining
+ m_InitialSizeEstimate = EstimateSize(m_RootContext);
+ m_CurrentSizeEstimate = m_InitialSizeEstimate;
+
+ // Sanity check
+ assert(m_CurrentTimeEstimate > 0);
+ assert(m_CurrentSizeEstimate > 0);
+
+ // Cache as the "last" context created
+ m_LastContext = m_RootContext;
+ }
+
+ return m_RootContext;
+}
+
+//------------------------------------------------------------------------
+// NoteAttempt: do bookkeeping for an inline attempt
+//
+// Arguments:
+// result -- InlineResult for successful inline candidate
+
+void InlineStrategy::NoteAttempt(InlineResult* result)
+{
+ assert(result->IsCandidate());
+ InlineObservation obs = result->GetObservation();
+
+ if (obs == InlineObservation::CALLEE_BELOW_ALWAYS_INLINE_SIZE)
+ {
+ m_AlwaysCandidateCount++;
+ }
+ else if (obs == InlineObservation::CALLEE_IS_FORCE_INLINE)
+ {
+ m_ForceCandidateCount++;
+ }
+ else
+ {
+ m_DiscretionaryCandidateCount++;
+ }
+}
+
+//------------------------------------------------------------------------
+// DumpCsvHeader: dump header for csv inline stats
+//
+// Argument:
+// fp -- file for dump output
+
+void InlineStrategy::DumpCsvHeader(FILE* fp)
+{
+ fprintf(fp, "\"InlineCalls\",");
+ fprintf(fp, "\"InlineCandidates\",");
+ fprintf(fp, "\"InlineAlways\",");
+ fprintf(fp, "\"InlineForce\",");
+ fprintf(fp, "\"InlineDiscretionary\",");
+ fprintf(fp, "\"InlineUnprofitable\",");
+ fprintf(fp, "\"InlineEarlyFail\",");
+ fprintf(fp, "\"InlineImport\",");
+ fprintf(fp, "\"InlineLateFail\",");
+ fprintf(fp, "\"InlineSuccess\",");
+}
+
+//------------------------------------------------------------------------
+// DumpCsvData: dump data for csv inline stats
+//
+// Argument:
+// fp -- file for dump output
+
+void InlineStrategy::DumpCsvData(FILE* fp)
+{
+ fprintf(fp, "%u,", m_CallCount);
+ fprintf(fp, "%u,", m_CandidateCount);
+ fprintf(fp, "%u,", m_AlwaysCandidateCount);
+ fprintf(fp, "%u,", m_ForceCandidateCount);
+ fprintf(fp, "%u,", m_DiscretionaryCandidateCount);
+ fprintf(fp, "%u,", m_UnprofitableCandidateCount);
+
+ // Early failures are cases where candates are rejected between
+ // the time the jit invokes the inlinee compiler and the time it
+ // starts to import the inlinee IL.
+ //
+ // So they are "cheaper" that late failures.
+
+ unsigned profitableCandidateCount = m_DiscretionaryCandidateCount - m_UnprofitableCandidateCount;
+
+ unsigned earlyFailCount =
+ m_CandidateCount - m_AlwaysCandidateCount - m_ForceCandidateCount - profitableCandidateCount;
+
+ fprintf(fp, "%u,", earlyFailCount);
+
+ unsigned lateFailCount = m_ImportCount - m_InlineCount;
+
+ fprintf(fp, "%u,", m_ImportCount);
+ fprintf(fp, "%u,", lateFailCount);
+ fprintf(fp, "%u,", m_InlineCount);
+}
+
+//------------------------------------------------------------------------
+// EstimateTime: estimate impact of this inline on the method jit time
+//
+// Arguments:
+// context - context describing this inline
+//
+// Return Value:
+// Nominal estimate of jit time.
+
+int InlineStrategy::EstimateTime(InlineContext* context)
+{
+ // Simple linear models based on observations
+ // show time is fairly well predicted by IL size.
+ unsigned ilSize = context->GetILSize();
+
+ // Prediction varies for root and inlines.
+ if (context == m_RootContext)
+ {
+ return EstimateRootTime(ilSize);
+ }
+ else
+ {
+ return EstimateInlineTime(ilSize);
+ }
+}
+
+//------------------------------------------------------------------------
+// EstimteRootTime: estimate jit time for method of this size with
+// no inlining.
+//
+// Arguments:
+// ilSize - size of the method's IL
+//
+// Return Value:
+// Nominal estimate of jit time.
+//
+// Notes:
+// Based on observational data. Time is nominally microseconds.
+
+int InlineStrategy::EstimateRootTime(unsigned ilSize)
+{
+ return 60 + 3 * ilSize;
+}
+
+//------------------------------------------------------------------------
+// EstimteInlineTime: estimate time impact on jitting for an inline
+// of this size.
+//
+// Arguments:
+// ilSize - size of the method's IL
+//
+// Return Value:
+// Nominal increase in jit time.
+//
+// Notes:
+// Based on observational data. Time is nominally microseconds.
+// Small inlines will make the jit a bit faster.
+
+int InlineStrategy::EstimateInlineTime(unsigned ilSize)
+{
+ return -14 + 2 * ilSize;
+}
+
+//------------------------------------------------------------------------
+// EstimateSize: estimate impact of this inline on the method size
+//
+// Arguments:
+// context - context describing this inline
+//
+// Return Value:
+// Nominal estimate of method size (bytes * 10)
+
+int InlineStrategy::EstimateSize(InlineContext* context)
+{
+ // Prediction varies for root and inlines.
+ if (context == m_RootContext)
+ {
+ // Simple linear models based on observations show root method
+ // native code size is fairly well predicted by IL size.
+ //
+ // Model below is for x64 on windows.
+ unsigned ilSize = context->GetILSize();
+ int estimate = (1312 + 228 * ilSize) / 10;
+
+ return estimate;
+ }
+ else
+ {
+ // Use context's code size estimate.
+ return context->GetCodeSizeEstimate();
+ }
+}
+
+//------------------------------------------------------------------------
+// NoteOutcome: do bookkeeping for an inline
+//
+// Arguments:
+// context - context for the inlie
+
+void InlineStrategy::NoteOutcome(InlineContext* context)
+{
+ // Note we can't generally count up failures here -- we only
+ // create contexts for failures in debug modes, and even then
+ // we may not get them all.
+ if (context->IsSuccess())
+ {
+ m_InlineCount++;
+
+#if defined(DEBUG) || defined(INLINE_DATA)
+
+ // Keep track of the inline targeted for data collection or,
+ // if we don't have one (yet), the last successful inline.
+ bool updateLast = (m_LastSuccessfulPolicy == nullptr) || !m_LastSuccessfulPolicy->IsDataCollectionTarget();
+
+ if (updateLast)
+ {
+ m_LastContext = context;
+ m_LastSuccessfulPolicy = context->m_Policy;
+ }
+ else
+ {
+ // We only expect one inline to be a data collection
+ // target.
+ assert(!context->m_Policy->IsDataCollectionTarget());
+ }
+
+#endif // defined(DEBUG) || defined(INLINE_DATA)
+
+ // Budget update.
+ //
+ // If callee is a force inline, increase budget, provided all
+ // parent contexts are likewise force inlines.
+ //
+ // If callee is discretionary or has a discretionary ancestor,
+ // increase expense.
+
+ InlineContext* currentContext = context;
+ bool isForceInline = false;
+
+ while (currentContext != m_RootContext)
+ {
+ InlineObservation observation = currentContext->GetObservation();
+
+ if (observation != InlineObservation::CALLEE_IS_FORCE_INLINE)
+ {
+ if (isForceInline)
+ {
+ // Interesting case where discretionary inlines pull
+ // in a force inline...
+ m_HasForceViaDiscretionary = true;
+ }
+
+ isForceInline = false;
+ break;
+ }
+
+ isForceInline = true;
+ currentContext = currentContext->GetParent();
+ }
+
+ int timeDelta = EstimateTime(context);
+
+ if (isForceInline)
+ {
+ // Update budget since this inline was forced. Only allow
+ // budget to increase.
+ if (timeDelta > 0)
+ {
+ m_CurrentTimeBudget += timeDelta;
+ }
+ }
+
+ // Update time estimate.
+ m_CurrentTimeEstimate += timeDelta;
+
+ // Update size estimate.
+ //
+ // Sometimes estimates don't make sense. Don't let the method
+ // size go negative.
+ int sizeDelta = EstimateSize(context);
+
+ if (m_CurrentSizeEstimate + sizeDelta <= 0)
+ {
+ sizeDelta = 0;
+ }
+
+ // Update the code size estimate.
+ m_CurrentSizeEstimate += sizeDelta;
+ }
+}
+
+//------------------------------------------------------------------------
+// BudgetCheck: return true if as inline of this size would exceed the
+// jit time budget for this method
+//
+// Arguments:
+// ilSize - size of the method's IL
+//
+// Return Value:
+// true if the inline would go over budget
+
+bool InlineStrategy::BudgetCheck(unsigned ilSize)
+{
+ int timeDelta = EstimateInlineTime(ilSize);
+ return (timeDelta + m_CurrentTimeEstimate > m_CurrentTimeBudget);
+}
+
+//------------------------------------------------------------------------
+// NewRoot: construct an InlineContext for the root method
+//
+// Return Value:
+// InlineContext for use as the root context
+//
+// Notes:
+// We leave m_Code as nullptr here (rather than the IL buffer
+// address of the root method) to preserve existing behavior, which
+// is to allow one recursive inline.
+
+InlineContext* InlineStrategy::NewRoot()
+{
+ InlineContext* rootContext = new (m_Compiler, CMK_Inlining) InlineContext(this);
+
+ rootContext->m_ILSize = m_Compiler->info.compILCodeSize;
+
+#if defined(DEBUG) || defined(INLINE_DATA)
+
+ rootContext->m_Callee = m_Compiler->info.compMethodHnd;
+
+#endif // defined(DEBUG) || defined(INLINE_DATA)
+
+ return rootContext;
+}
+
+//------------------------------------------------------------------------
+// NewSuccess: construct an InlineContext for a successful inline
+// and link it into the context tree
+//
+// Arguments:
+// stmt - statement containing call being inlined
+// inlineInfo - information about this inline
+//
+// Return Value:
+// A new InlineContext for statements brought into the method by
+// this inline.
+
+InlineContext* InlineStrategy::NewSuccess(InlineInfo* inlineInfo)
+{
+ InlineContext* calleeContext = new (m_Compiler, CMK_Inlining) InlineContext(this);
+ GenTree* stmt = inlineInfo->iciStmt;
+ BYTE* calleeIL = inlineInfo->inlineCandidateInfo->methInfo.ILCode;
+ unsigned calleeILSize = inlineInfo->inlineCandidateInfo->methInfo.ILCodeSize;
+ InlineContext* parentContext = stmt->gtStmt.gtInlineContext;
+
+ noway_assert(parentContext != nullptr);
+
+ calleeContext->m_Code = calleeIL;
+ calleeContext->m_ILSize = calleeILSize;
+ calleeContext->m_Parent = parentContext;
+ // Push on front here will put siblings in reverse lexical
+ // order which we undo in the dumper
+ calleeContext->m_Sibling = parentContext->m_Child;
+ parentContext->m_Child = calleeContext;
+ calleeContext->m_Child = nullptr;
+ calleeContext->m_Offset = stmt->AsStmt()->gtStmtILoffsx;
+ calleeContext->m_Observation = inlineInfo->inlineResult->GetObservation();
+ calleeContext->m_Success = true;
+
+#if defined(DEBUG) || defined(INLINE_DATA)
+
+ InlinePolicy* policy = inlineInfo->inlineResult->GetPolicy();
+
+ calleeContext->m_Policy = policy;
+ calleeContext->m_CodeSizeEstimate = policy->CodeSizeEstimate();
+ calleeContext->m_Callee = inlineInfo->fncHandle;
+ // +1 here since we set this before calling NoteOutcome.
+ calleeContext->m_Ordinal = m_InlineCount + 1;
+ // Update offset with more accurate info
+ calleeContext->m_Offset = inlineInfo->inlineResult->GetCall()->gtRawILOffset;
+
+#endif // defined(DEBUG) || defined(INLINE_DATA)
+
+#if defined(DEBUG)
+
+ calleeContext->m_TreeID = inlineInfo->inlineResult->GetCall()->gtTreeID;
+
+#endif // defined(DEBUG)
+
+ NoteOutcome(calleeContext);
+
+ return calleeContext;
+}
+
+#if defined(DEBUG) || defined(INLINE_DATA)
+
+//------------------------------------------------------------------------
+// NewFailure: construct an InlineContext for a failing inline
+// and link it into the context tree
+//
+// Arguments:
+// stmt - statement containing the attempted inline
+// inlineResult - inlineResult for the attempt
+//
+// Return Value:
+// A new InlineContext for diagnostic purposes, or nullptr if
+// the desired context could not be created.
+
+InlineContext* InlineStrategy::NewFailure(GenTree* stmt, InlineResult* inlineResult)
+{
+ // Check for a parent context first. We may insert new statements
+ // between the caller and callee that do not pick up either's
+ // context, and these statements may have calls that we later
+ // examine and fail to inline.
+ //
+ // See fgInlinePrependStatements for examples.
+
+ InlineContext* parentContext = stmt->gtStmt.gtInlineContext;
+
+ if (parentContext == nullptr)
+ {
+ // Assume for now this is a failure to inline a call in a
+ // statement inserted between caller and callee. Just ignore
+ // it for the time being.
+
+ return nullptr;
+ }
+
+ InlineContext* failedContext = new (m_Compiler, CMK_Inlining) InlineContext(this);
+
+ failedContext->m_Parent = parentContext;
+ // Push on front here will put siblings in reverse lexical
+ // order which we undo in the dumper
+ failedContext->m_Sibling = parentContext->m_Child;
+ parentContext->m_Child = failedContext;
+ failedContext->m_Child = nullptr;
+ failedContext->m_Offset = stmt->AsStmt()->gtStmtILoffsx;
+ failedContext->m_Observation = inlineResult->GetObservation();
+ failedContext->m_Callee = inlineResult->GetCallee();
+ failedContext->m_Success = false;
+
+#if defined(DEBUG) || defined(INLINE_DATA)
+
+ // Update offset with more accurate info
+ failedContext->m_Offset = inlineResult->GetCall()->gtRawILOffset;
+
+#endif // #if defined(DEBUG) || defined(INLINE_DATA)
+
+#if defined(DEBUG)
+
+ failedContext->m_TreeID = inlineResult->GetCall()->gtTreeID;
+
+#endif // defined(DEBUG)
+
+ NoteOutcome(failedContext);
+
+ return failedContext;
+}
+
+//------------------------------------------------------------------------
+// Dump: dump description of inline behavior
+
+void InlineStrategy::Dump()
+{
+ m_RootContext->Dump();
+
+ printf("Budget: initialTime=%d, finalTime=%d, initialBudget=%d, currentBudget=%d\n", m_InitialTimeEstimate,
+ m_CurrentTimeEstimate, m_InitialTimeBudget, m_CurrentTimeBudget);
+
+ if (m_CurrentTimeBudget > m_InitialTimeBudget)
+ {
+ printf("Budget: increased by %d because of force inlines\n", m_CurrentTimeBudget - m_InitialTimeBudget);
+ }
+
+ if (m_CurrentTimeEstimate > m_CurrentTimeBudget)
+ {
+ printf("Budget: went over budget by %d\n", m_CurrentTimeEstimate - m_CurrentTimeBudget);
+ }
+
+ if (m_HasForceViaDiscretionary)
+ {
+ printf("Budget: discretionary inline caused a force inline\n");
+ }
+
+ printf("Budget: initialSize=%d, finalSize=%d\n", m_InitialSizeEstimate, m_CurrentSizeEstimate);
+}
+
+// Static to track emission of the inline data header
+
+bool InlineStrategy::s_HasDumpedDataHeader = false;
+
+//------------------------------------------------------------------------
+// DumpData: dump data about the last successful inline into this method
+// in a format suitable for automated analysis.
+
+void InlineStrategy::DumpData()
+{
+ // Is dumping enabled? If not, nothing to do.
+ if (JitConfig.JitInlineDumpData() == 0)
+ {
+ return;
+ }
+
+ // If we're also dumping inline XML, we'll let it dump the data.
+ if (JitConfig.JitInlineDumpXml() != 0)
+ {
+ return;
+ }
+
+ // Don't dump anything if limiting is on and we didn't reach
+ // the limit while inlining.
+ //
+ // This serves to filter out duplicate data.
+ const int limit = JitConfig.JitInlineLimit();
+
+ if ((limit >= 0) && (m_InlineCount < static_cast<unsigned>(limit)))
+ {
+ return;
+ }
+
+ // Dump header, if not already dumped
+ if (!s_HasDumpedDataHeader)
+ {
+ DumpDataHeader(stderr);
+ s_HasDumpedDataHeader = true;
+ }
+
+ // Dump contents
+ DumpDataContents(stderr);
+ fprintf(stderr, "\n");
+}
+
+//------------------------------------------------------------------------
+// DumpDataEnsurePolicyIsSet: ensure m_LastSuccessfulPolicy describes the
+// inline policy in effect.
+//
+// Notes:
+// Needed for methods that don't have any successful inlines.
+
+void InlineStrategy::DumpDataEnsurePolicyIsSet()
+{
+ // Cache references to compiler substructures.
+ const Compiler::Info& info = m_Compiler->info;
+ const Compiler::Options& opts = m_Compiler->opts;
+
+ // If there weren't any successful inlines, we won't have a
+ // successful policy, so fake one up.
+ if (m_LastSuccessfulPolicy == nullptr)
+ {
+ const bool isPrejitRoot = (opts.eeFlags & CORJIT_FLG_PREJIT) != 0;
+ m_LastSuccessfulPolicy = InlinePolicy::GetPolicy(m_Compiler, isPrejitRoot);
+
+ // Add in a bit of data....
+ const bool isForceInline = (info.compFlags & CORINFO_FLG_FORCEINLINE) != 0;
+ m_LastSuccessfulPolicy->NoteBool(InlineObservation::CALLEE_IS_FORCE_INLINE, isForceInline);
+ m_LastSuccessfulPolicy->NoteInt(InlineObservation::CALLEE_IL_CODE_SIZE, info.compMethodInfo->ILCodeSize);
+ }
+}
+
+//------------------------------------------------------------------------
+// DumpDataHeader: dump header for inline data.
+//
+// Arguments:
+// file - file for data output
+
+void InlineStrategy::DumpDataHeader(FILE* file)
+{
+ DumpDataEnsurePolicyIsSet();
+ const int limit = JitConfig.JitInlineLimit();
+ fprintf(file, "*** Inline Data: Policy=%s JitInlineLimit=%d ***\n", m_LastSuccessfulPolicy->GetName(), limit);
+ DumpDataSchema(file);
+ fprintf(file, "\n");
+}
+
+//------------------------------------------------------------------------
+// DumpSchema: dump schema for inline data.
+//
+// Arguments:
+// file - file for data output
+
+void InlineStrategy::DumpDataSchema(FILE* file)
+{
+ DumpDataEnsurePolicyIsSet();
+ fprintf(file, "Method,Version,HotSize,ColdSize,JitTime,SizeEstimate,TimeEstimate");
+ m_LastSuccessfulPolicy->DumpSchema(file);
+}
+
+//------------------------------------------------------------------------
+// DumpDataContents: dump contents of inline data
+//
+// Arguments:
+// file - file for data output
+
+void InlineStrategy::DumpDataContents(FILE* file)
+{
+ DumpDataEnsurePolicyIsSet();
+
+ // Cache references to compiler substructures.
+ const Compiler::Info& info = m_Compiler->info;
+ const Compiler::Options& opts = m_Compiler->opts;
+
+ // We'd really like the method identifier to be unique and
+ // durable across crossgen invocations. Not clear how to
+ // accomplish this, so we'll use the token for now.
+ //
+ // Post processing will have to filter out all data from
+ // methods where the root entry appears multiple times.
+ mdMethodDef currentMethodToken = info.compCompHnd->getMethodDefFromMethod(info.compMethodHnd);
+
+ // Convert time spent jitting into microseconds
+ unsigned microsecondsSpentJitting = 0;
+ unsigned __int64 compCycles = m_Compiler->getInlineCycleCount();
+ if (compCycles > 0)
+ {
+ double countsPerSec = CycleTimer::CyclesPerSecond();
+ double counts = (double)compCycles;
+ microsecondsSpentJitting = (unsigned)((counts / countsPerSec) * 1000 * 1000);
+ }
+
+ fprintf(file, "%08X,%u,%u,%u,%u,%d,%d", currentMethodToken, m_InlineCount, info.compTotalHotCodeSize,
+ info.compTotalColdCodeSize, microsecondsSpentJitting, m_CurrentSizeEstimate / 10, m_CurrentTimeEstimate);
+ m_LastSuccessfulPolicy->DumpData(file);
+}
+
+// Static to track emission of the xml data header
+// and lock to prevent interleaved file writes
+
+bool InlineStrategy::s_HasDumpedXmlHeader = false;
+CritSecObject InlineStrategy::s_XmlWriterLock;
+
+//------------------------------------------------------------------------
+// DumpXml: dump xml-formatted version of the inline tree.
+//
+// Arguments
+// file - file for data output
+// indent - indent level of this element
+
+void InlineStrategy::DumpXml(FILE* file, unsigned indent)
+{
+ if (JitConfig.JitInlineDumpXml() == 0)
+ {
+ return;
+ }
+
+ // Lock to prevent interleaving of trees.
+ CritSecHolder writeLock(s_XmlWriterLock);
+
+ // Dump header
+ if (!s_HasDumpedXmlHeader)
+ {
+ DumpDataEnsurePolicyIsSet();
+
+ fprintf(file, "<?xml version=\"1.0\"?>\n");
+ fprintf(file, "<InlineForest>\n");
+ fprintf(file, "<Policy>%s</Policy>\n", m_LastSuccessfulPolicy->GetName());
+
+ if (JitConfig.JitInlineDumpData() != 0)
+ {
+ fprintf(file, "<DataSchema>");
+ DumpDataSchema(file);
+ fprintf(file, "</DataSchema>\n");
+ }
+
+ fprintf(file, "<Methods>\n");
+ s_HasDumpedXmlHeader = true;
+ }
+
+ // If we're dumping "minimal" Xml, and we didn't do
+ // any inlines into this method, then there's nothing
+ // to emit here.
+ if ((m_InlineCount == 0) && (JitConfig.JitInlineDumpXml() == 2))
+ {
+ return;
+ }
+
+ // Cache references to compiler substructures.
+ const Compiler::Info& info = m_Compiler->info;
+ const Compiler::Options& opts = m_Compiler->opts;
+
+ const bool isPrejitRoot = (opts.eeFlags & CORJIT_FLG_PREJIT) != 0;
+ const bool isForceInline = (info.compFlags & CORINFO_FLG_FORCEINLINE) != 0;
+
+ // We'd really like the method identifier to be unique and
+ // durable across crossgen invocations. Not clear how to
+ // accomplish this, so we'll use the token for now.
+ //
+ // Post processing will have to filter out all data from
+ // methods where the root entry appears multiple times.
+ mdMethodDef currentMethodToken = info.compCompHnd->getMethodDefFromMethod(info.compMethodHnd);
+
+ unsigned hash = info.compMethodHash();
+
+ // Convert time spent jitting into microseconds
+ unsigned microsecondsSpentJitting = 0;
+ unsigned __int64 compCycles = m_Compiler->getInlineCycleCount();
+ if (compCycles > 0)
+ {
+ double countsPerSec = CycleTimer::CyclesPerSecond();
+ double counts = (double)compCycles;
+ microsecondsSpentJitting = (unsigned)((counts / countsPerSec) * 1000 * 1000);
+ }
+
+ // Get method name just for root method, to make it a bit easier
+ // to search for things in the inline xml.
+ const char* methodName = info.compCompHnd->getMethodName(info.compMethodHnd, nullptr);
+
+ // Cheap xml quoting for values. Only < and & are troublemakers,
+ // but change > for symmetry.
+ //
+ // Ok to truncate name, just ensure it's null terminated.
+ char buf[64];
+ strncpy(buf, methodName, sizeof(buf));
+ buf[sizeof(buf) - 1] = 0;
+
+ for (int i = 0; i < sizeof(buf); i++)
+ {
+ switch (buf[i])
+ {
+ case '<':
+ buf[i] = '[';
+ break;
+ case '>':
+ buf[i] = ']';
+ break;
+ case '&':
+ buf[i] = '#';
+ break;
+ default:
+ break;
+ }
+ }
+
+ fprintf(file, "%*s<Method>\n", indent, "");
+ fprintf(file, "%*s<Token>%u</Token>\n", indent + 2, "", currentMethodToken);
+ fprintf(file, "%*s<Hash>%u</Hash>\n", indent + 2, "", hash);
+ fprintf(file, "%*s<Name>%s</Name>\n", indent + 2, "", buf);
+ fprintf(file, "%*s<InlineCount>%u</InlineCount>\n", indent + 2, "", m_InlineCount);
+ fprintf(file, "%*s<HotSize>%u</HotSize>\n", indent + 2, "", info.compTotalHotCodeSize);
+ fprintf(file, "%*s<ColdSize>%u</ColdSize>\n", indent + 2, "", info.compTotalColdCodeSize);
+ fprintf(file, "%*s<JitTime>%u</JitTime>\n", indent + 2, "", microsecondsSpentJitting);
+ fprintf(file, "%*s<SizeEstimate>%u</SizeEstimate>\n", indent + 2, "", m_CurrentSizeEstimate / 10);
+ fprintf(file, "%*s<TimeEstimate>%u</TimeEstimate>\n", indent + 2, "", m_CurrentTimeEstimate);
+
+ // Root context will be null if we're not optimizing the method.
+ //
+ // Note there are cases of this in mscorlib even in release builds,
+ // eg Task.NotifyDebuggerOfWaitCompletion.
+ //
+ // For such methods there aren't any inlines.
+ if (m_RootContext != nullptr)
+ {
+ m_RootContext->DumpXml(file, indent + 2);
+ }
+ else
+ {
+ fprintf(file, "%*s<Inlines/>\n", indent + 2, "");
+ }
+
+ fprintf(file, "%*s</Method>\n", indent, "");
+}
+
+//------------------------------------------------------------------------
+// FinalizeXml: finalize the xml-formatted version of the inline tree.
+//
+// Arguments
+// file - file for data output
+
+void InlineStrategy::FinalizeXml(FILE* file)
+{
+ // If we dumped the header, dump a footer
+ if (s_HasDumpedXmlHeader)
+ {
+ fprintf(file, "</Methods>\n");
+ fprintf(file, "</InlineForest>\n");
+ fflush(file);
+
+ // Workaroud compShutdown getting called twice.
+ s_HasDumpedXmlHeader = false;
+ }
+
+ // Finalize reading inline xml
+ ReplayPolicy::FinalizeXml();
+}
+
+#endif // defined(DEBUG) || defined(INLINE_DATA)
+
+//------------------------------------------------------------------------
+// IsNoInline: allow strategy to disable inlining in a method
+//
+// Arguments:
+// info -- compiler interface from the EE
+// method -- handle for the root method
+//
+// Notes:
+// Only will return true in debug or special release builds.
+// Expects JitNoInlineRange to be set to the hashes of methods
+// where inlining is disabled.
+
+bool InlineStrategy::IsNoInline(ICorJitInfo* info, CORINFO_METHOD_HANDLE method)
+{
+
+#if defined(DEBUG) || defined(INLINE_DATA)
+
+ static ConfigMethodRange range;
+ const wchar_t* noInlineRange = JitConfig.JitNoInlineRange();
+
+ if (noInlineRange == nullptr)
+ {
+ return false;
+ }
+
+ // If we have a config string we have at least one entry. Count
+ // number of spaces in our config string to see if there are
+ // more. Number of ranges we need is 2x that value.
+ unsigned entryCount = 1;
+ for (const wchar_t* p = noInlineRange; *p != 0; p++)
+ {
+ if (*p == L' ')
+ {
+ entryCount++;
+ }
+ }
+
+ range.EnsureInit(noInlineRange, 2 * entryCount);
+ assert(!range.Error());
+ return range.Contains(info, method);
+
+#else
+
+ return false;
+
+#endif // defined(DEBUG) || defined(INLINE_DATA)
+}
diff --git a/src/jit/inline.def b/src/jit/inline.def
new file mode 100644
index 0000000000..2c933fb8a9
--- /dev/null
+++ b/src/jit/inline.def
@@ -0,0 +1,176 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+// Macro template for inline observations
+//
+// INLINE_OBSERVATION(name, type, description, impact, target)
+//
+// name will be used to create an InlineObservation enum member
+// (enum name prepends scope, eg CALLEE_MARKED_AS_SKIPPED)
+// type is the data type for the observation
+// description is a user string for diagnostics
+// impact is one of the members of InlineImpact
+// target is one of the members of InlineTarget
+//
+// Note: the impact classification is work in progress.
+//
+// Some subset of the FATAL cases here can be refined to SERIOUS,
+// LIMITATION, or PERFORMANCE. While the refined observations may
+// eventually veto inlining, the jit can safely keep making more
+// observations.
+
+// ------ Initial Sentinel -------
+
+INLINE_OBSERVATION(UNUSED_INITIAL, bool, "unused initial observation", FATAL, CALLEE)
+
+// ------ Callee Fatal -------
+
+INLINE_OBSERVATION(BAD_ARGUMENT_NUMBER, bool, "invalid argument number", FATAL, CALLEE)
+INLINE_OBSERVATION(BAD_LOCAL_NUMBER, bool, "invalid local number", FATAL, CALLEE)
+INLINE_OBSERVATION(CLASS_INIT_FAILURE, bool, "class init failed", FATAL, CALLEE)
+INLINE_OBSERVATION(COMPILATION_ERROR, bool, "compilation error", FATAL, CALLEE)
+INLINE_OBSERVATION(EXCEEDS_THRESHOLD, bool, "exceeds profit threshold", FATAL, CALLEE)
+INLINE_OBSERVATION(HAS_DELEGATE_INVOKE, bool, "delegate invoke", FATAL, CALLEE)
+INLINE_OBSERVATION(HAS_EH, bool, "has exception handling", FATAL, CALLEE)
+INLINE_OBSERVATION(HAS_ENDFILTER, bool, "has endfilter", FATAL, CALLEE)
+INLINE_OBSERVATION(HAS_ENDFINALLY, bool, "has endfinally", FATAL, CALLEE)
+INLINE_OBSERVATION(HAS_LEAVE, bool, "has leave", FATAL, CALLEE)
+INLINE_OBSERVATION(HAS_MANAGED_VARARGS, bool, "managed varargs", FATAL, CALLEE)
+INLINE_OBSERVATION(HAS_NATIVE_VARARGS, bool, "native varargs", FATAL, CALLEE)
+INLINE_OBSERVATION(HAS_NO_BODY, bool, "has no body", FATAL, CALLEE)
+INLINE_OBSERVATION(HAS_NULL_FOR_LDELEM, bool, "has null pointer for ldelem", FATAL, CALLEE)
+INLINE_OBSERVATION(HAS_PINNED_LOCALS, bool, "has pinned locals", FATAL, CALLEE)
+INLINE_OBSERVATION(IS_ARRAY_METHOD, bool, "is array method", FATAL, CALLEE)
+INLINE_OBSERVATION(IS_GENERIC_VIRTUAL, bool, "generic virtual", FATAL, CALLEE)
+INLINE_OBSERVATION(IS_JIT_NOINLINE, bool, "noinline per JitNoinline", FATAL, CALLEE)
+INLINE_OBSERVATION(IS_NOINLINE, bool, "noinline per IL/cached result", FATAL, CALLEE)
+INLINE_OBSERVATION(IS_SYNCHRONIZED, bool, "is synchronized", FATAL, CALLEE)
+INLINE_OBSERVATION(IS_VM_NOINLINE, bool, "noinline per VM", FATAL, CALLEE)
+INLINE_OBSERVATION(LACKS_RETURN, bool, "no return opcode", FATAL, CALLEE)
+INLINE_OBSERVATION(LDFLD_NEEDS_HELPER, bool, "ldfld needs helper", FATAL, CALLEE)
+INLINE_OBSERVATION(LOG_REPLAY_REJECT, bool, "rejected by log replay", FATAL, CALLEE)
+INLINE_OBSERVATION(MARKED_AS_SKIPPED, bool, "skipped by complus request", FATAL, CALLEE)
+INLINE_OBSERVATION(MAXSTACK_TOO_BIG, bool, "maxstack too big" , FATAL, CALLEE)
+INLINE_OBSERVATION(NEEDS_SECURITY_CHECK, bool, "needs security check", FATAL, CALLEE)
+INLINE_OBSERVATION(NO_METHOD_INFO, bool, "cannot get method info", FATAL, CALLEE)
+INLINE_OBSERVATION(NOT_PROFITABLE_INLINE, bool, "unprofitable inline", FATAL, CALLEE)
+INLINE_OBSERVATION(RANDOM_REJECT, bool, "random reject", FATAL, CALLEE)
+INLINE_OBSERVATION(STACK_CRAWL_MARK, bool, "uses stack crawl mark", FATAL, CALLEE)
+INLINE_OBSERVATION(STFLD_NEEDS_HELPER, bool, "stfld needs helper", FATAL, CALLEE)
+INLINE_OBSERVATION(THROW_WITH_INVALID_STACK, bool, "throw with invalid stack", FATAL, CALLEE)
+INLINE_OBSERVATION(TOO_MANY_ARGUMENTS, bool, "too many arguments", FATAL, CALLEE)
+INLINE_OBSERVATION(TOO_MANY_LOCALS, bool, "too many locals", FATAL, CALLEE)
+INLINE_OBSERVATION(EXPLICIT_TAIL_PREFIX, bool, "explicit tail prefix in callee",FATAL, CALLEE)
+
+// ------ Callee Performance -------
+
+INLINE_OBSERVATION(LDFLD_STATIC_VALUECLASS, bool, "ldsfld of value class", PERFORMANCE, CALLEE)
+INLINE_OBSERVATION(TOO_MANY_BASIC_BLOCKS, bool, "too many basic blocks", PERFORMANCE, CALLEE)
+INLINE_OBSERVATION(TOO_MUCH_IL, bool, "too many il bytes", PERFORMANCE, CALLEE)
+
+// ------ Callee Information -------
+
+INLINE_OBSERVATION(ARG_FEEDS_CONSTANT_TEST, bool, "argument feeds constant test", INFORMATION, CALLEE)
+INLINE_OBSERVATION(ARG_FEEDS_RANGE_CHECK, bool, "argument feeds range check", INFORMATION, CALLEE)
+INLINE_OBSERVATION(BEGIN_OPCODE_SCAN, bool, "prepare to look at opcodes", INFORMATION, CALLEE)
+INLINE_OBSERVATION(BELOW_ALWAYS_INLINE_SIZE, bool, "below ALWAYS_INLINE size", INFORMATION, CALLEE)
+INLINE_OBSERVATION(CLASS_PROMOTABLE, bool, "promotable value class", INFORMATION, CALLEE)
+INLINE_OBSERVATION(DOES_NOT_RETURN, bool, "does not return", INFORMATION, CALLEE)
+INLINE_OBSERVATION(END_OPCODE_SCAN, bool, "done looking at opcodes", INFORMATION, CALLEE)
+INLINE_OBSERVATION(HAS_SIMD, bool, "has SIMD arg, local, or ret", INFORMATION, CALLEE)
+INLINE_OBSERVATION(HAS_SWITCH, bool, "has switch", INFORMATION, CALLEE)
+INLINE_OBSERVATION(IL_CODE_SIZE, int, "number of bytes of IL", INFORMATION, CALLEE)
+INLINE_OBSERVATION(IS_CLASS_CTOR, bool, "class constructor", INFORMATION, CALLEE)
+INLINE_OBSERVATION(IS_DISCRETIONARY_INLINE, bool, "can inline, check heuristics", INFORMATION, CALLEE)
+INLINE_OBSERVATION(IS_FORCE_INLINE, bool, "aggressive inline attribute", INFORMATION, CALLEE)
+INLINE_OBSERVATION(IS_INSTANCE_CTOR, bool, "instance constructor", INFORMATION, CALLEE)
+INLINE_OBSERVATION(IS_PROFITABLE_INLINE, bool, "profitable inline", INFORMATION, CALLEE)
+INLINE_OBSERVATION(IS_SIZE_DECREASING_INLINE, bool, "size decreasing inline", INFORMATION, CALLEE)
+INLINE_OBSERVATION(LOG_REPLAY_ACCEPT, bool, "accepted by log replay", INFORMATION, CALLEE)
+INLINE_OBSERVATION(LOOKS_LIKE_WRAPPER, bool, "thin wrapper around a call", INFORMATION, CALLEE)
+INLINE_OBSERVATION(MAXSTACK, int, "maxstack", INFORMATION, CALLEE)
+INLINE_OBSERVATION(OPCODE, int, "next opcode in IL stream", INFORMATION, CALLEE)
+INLINE_OBSERVATION(OPCODE_NORMED, int, "next opcode in IL stream", INFORMATION, CALLEE)
+INLINE_OBSERVATION(NUMBER_OF_ARGUMENTS, int, "number of arguments", INFORMATION, CALLEE)
+INLINE_OBSERVATION(NUMBER_OF_BASIC_BLOCKS, int, "number of basic blocks", INFORMATION, CALLEE)
+INLINE_OBSERVATION(NUMBER_OF_LOCALS, int, "number of locals", INFORMATION, CALLEE)
+INLINE_OBSERVATION(RANDOM_ACCEPT, bool, "random accept", INFORMATION, CALLEE)
+INLINE_OBSERVATION(UNSUPPORTED_OPCODE, bool, "unsupported opcode", INFORMATION, CALLEE)
+
+// ------ Caller Correctness -------
+
+INLINE_OBSERVATION(DEBUG_CODEGEN, bool, "debug codegen", FATAL, CALLER)
+INLINE_OBSERVATION(IS_JIT_NOINLINE, bool, "noinline per JitNoInlineRange", FATAL, CALLER)
+INLINE_OBSERVATION(NEEDS_SECURITY_CHECK, bool, "needs security check", FATAL, CALLER)
+
+// ------ Caller Information -------
+
+INLINE_OBSERVATION(HAS_NEWARRAY, bool, "has newarray", INFORMATION, CALLER)
+INLINE_OBSERVATION(HAS_NEWOBJ, bool, "has newobj", INFORMATION, CALLER)
+
+// ------ Call Site Correctness -------
+
+INLINE_OBSERVATION(ARG_HAS_NULL_THIS, bool, "this pointer argument is null", FATAL, CALLSITE)
+INLINE_OBSERVATION(ARG_HAS_SIDE_EFFECT, bool, "argument has side effect", FATAL, CALLSITE)
+INLINE_OBSERVATION(ARG_IS_MKREFANY, bool, "argument is mkrefany", FATAL, CALLSITE)
+INLINE_OBSERVATION(ARG_NO_BASH_TO_INT, bool, "argument can't bash to int", FATAL, CALLSITE)
+INLINE_OBSERVATION(ARG_NO_BASH_TO_REF, bool, "argument can't bash to ref", FATAL, CALLSITE)
+INLINE_OBSERVATION(ARG_TYPES_INCOMPATIBLE, bool, "argument types incompatible", FATAL, CALLSITE)
+INLINE_OBSERVATION(CANT_EMBED_PINVOKE_COOKIE, bool, "can't embed pinvoke cookie", FATAL, CALLSITE)
+INLINE_OBSERVATION(CANT_EMBED_VARARGS_COOKIE, bool, "can't embed varargs cookie", FATAL, CALLSITE)
+INLINE_OBSERVATION(CLASS_INIT_FAILURE_SPEC, bool, "speculative class init failed", FATAL, CALLSITE)
+INLINE_OBSERVATION(COMPILATION_ERROR, bool, "compilation error", FATAL, CALLSITE)
+INLINE_OBSERVATION(COMPILATION_FAILURE, bool, "failed to compile", FATAL, CALLSITE)
+INLINE_OBSERVATION(CONDITIONAL_THROW, bool, "conditional throw", FATAL, CALLSITE)
+INLINE_OBSERVATION(CROSS_BOUNDARY_CALLI, bool, "cross-boundary calli", FATAL, CALLSITE)
+INLINE_OBSERVATION(CROSS_BOUNDARY_SECURITY, bool, "cross-boundary security check", FATAL, CALLSITE)
+INLINE_OBSERVATION(EXCEEDS_THRESHOLD, bool, "exceeds profit threshold", FATAL, CALLSITE)
+INLINE_OBSERVATION(EXPLICIT_TAIL_PREFIX, bool, "explicit tail prefix", FATAL, CALLSITE)
+INLINE_OBSERVATION(GENERIC_DICTIONARY_LOOKUP, bool, "runtime dictionary lookup", FATAL, CALLSITE)
+INLINE_OBSERVATION(HAS_CALL_VIA_LDVIRTFTN, bool, "call via ldvirtftn", FATAL, CALLSITE)
+INLINE_OBSERVATION(HAS_COMPLEX_HANDLE, bool, "complex handle access", FATAL, CALLSITE)
+INLINE_OBSERVATION(HAS_LDSTR_RESTRICTION, bool, "has ldstr VM restriction", FATAL, CALLSITE)
+INLINE_OBSERVATION(IMPLICIT_REC_TAIL_CALL, bool, "implicit recursive tail call", FATAL, CALLSITE)
+INLINE_OBSERVATION(IS_CALL_TO_HELPER, bool, "target is helper", FATAL, CALLSITE)
+INLINE_OBSERVATION(IS_NOT_DIRECT, bool, "target not direct", FATAL, CALLSITE)
+INLINE_OBSERVATION(IS_NOT_DIRECT_MANAGED, bool, "target not direct managed", FATAL, CALLSITE)
+INLINE_OBSERVATION(IS_RECURSIVE, bool, "recursive", FATAL, CALLSITE)
+INLINE_OBSERVATION(IS_TOO_DEEP, bool, "too deep", FATAL, CALLSITE)
+INLINE_OBSERVATION(IS_VIRTUAL, bool, "virtual", FATAL, CALLSITE)
+INLINE_OBSERVATION(IS_VM_NOINLINE, bool, "noinline per VM", FATAL, CALLSITE)
+INLINE_OBSERVATION(IS_WITHIN_CATCH, bool, "within catch region", FATAL, CALLSITE)
+INLINE_OBSERVATION(IS_WITHIN_FILTER, bool, "within filterregion", FATAL, CALLSITE)
+INLINE_OBSERVATION(LDARGA_NOT_LOCAL_VAR, bool, "ldarga not on local var", FATAL, CALLSITE)
+INLINE_OBSERVATION(LDFLD_NEEDS_HELPER, bool, "ldfld needs helper", FATAL, CALLSITE)
+INLINE_OBSERVATION(LDVIRTFN_ON_NON_VIRTUAL, bool, "ldvirtfn on non-virtual", FATAL, CALLSITE)
+INLINE_OBSERVATION(LOG_REPLAY_REJECT, bool, "rejected by log replay", FATAL, CALLSITE)
+INLINE_OBSERVATION(NOT_CANDIDATE, bool, "not inline candidate", FATAL, CALLSITE)
+INLINE_OBSERVATION(NOT_PROFITABLE_INLINE, bool, "unprofitable inline", FATAL, CALLSITE)
+INLINE_OBSERVATION(OVER_BUDGET, bool, "inline exceeds budget", FATAL, CALLSITE)
+INLINE_OBSERVATION(OVER_INLINE_LIMIT, bool, "limited by JitInlineLimit", FATAL, CALLSITE)
+INLINE_OBSERVATION(RANDOM_REJECT, bool, "random reject", FATAL, CALLSITE)
+INLINE_OBSERVATION(REQUIRES_SAME_THIS, bool, "requires same this", FATAL, CALLSITE)
+INLINE_OBSERVATION(RETURN_TYPE_MISMATCH, bool, "return type mismatch", FATAL, CALLSITE)
+INLINE_OBSERVATION(STFLD_NEEDS_HELPER, bool, "stfld needs helper", FATAL, CALLSITE)
+INLINE_OBSERVATION(TOO_MANY_LOCALS, bool, "too many locals", FATAL, CALLSITE)
+
+// ------ Call Site Performance -------
+
+
+// ------ Call Site Information -------
+
+INLINE_OBSERVATION(CONSTANT_ARG_FEEDS_TEST, bool, "constant argument feeds test", INFORMATION, CALLSITE)
+INLINE_OBSERVATION(DEPTH, int, "depth", INFORMATION, CALLSITE)
+INLINE_OBSERVATION(FREQUENCY, int, "rough call site frequency", INFORMATION, CALLSITE)
+INLINE_OBSERVATION(IS_PROFITABLE_INLINE, bool, "profitable inline", INFORMATION, CALLSITE)
+INLINE_OBSERVATION(IS_SAME_THIS, bool, "same this as root caller", INFORMATION, CALLSITE)
+INLINE_OBSERVATION(IS_SIZE_DECREASING_INLINE, bool, "size decreasing inline", INFORMATION, CALLSITE)
+INLINE_OBSERVATION(LOG_REPLAY_ACCEPT, bool, "accepted by log replay", INFORMATION, CALLSITE)
+INLINE_OBSERVATION(RANDOM_ACCEPT, bool, "random accept", INFORMATION, CALLSITE)
+INLINE_OBSERVATION(WEIGHT, int, "call site frequency", INFORMATION, CALLSITE)
+
+// ------ Final Sentinel -------
+
+INLINE_OBSERVATION(UNUSED_FINAL, bool, "unused final observation", FATAL, CALLEE)
+
diff --git a/src/jit/inline.h b/src/jit/inline.h
new file mode 100644
index 0000000000..e3d5750754
--- /dev/null
+++ b/src/jit/inline.h
@@ -0,0 +1,894 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+// Inlining Support
+//
+// This file contains enum and class definitions and related
+// information that the jit uses to make inlining decisions.
+//
+// -- ENUMS --
+//
+// InlineCallFrequency - rough assessment of call site frequency
+// InlineDecision - overall decision made about an inline
+// InlineTarget - target of a particular observation
+// InlineImpact - impact of a particular observation
+// InlineObservation - facts observed when considering an inline
+//
+// -- CLASSES --
+//
+// InlineResult - accumulates observations, consults with policy
+// InlineCandidateInfo - basic information needed for inlining
+// InlArgInfo - information about a candidate's argument
+// InlLclVarInfo - information about a candidate's local variable
+// InlineInfo - detailed information needed for inlining
+// InlineContext - class, remembers what inlines happened
+// InlinePolicy - class, determines policy for inlining
+// InlineStrategy - class, determines overall inline strategy
+//
+// Enums are used throughout to provide various descriptions.
+//
+// There are 4 sitations where inline candidacy is evaluated. In each
+// case an InlineResult is allocated on the stack to collect
+// information about the inline candidate. Each InlineResult refers
+// to an InlinePolicy.
+//
+// 1. Importer Candidate Screen (impMarkInlineCandidate)
+//
+// Creates: InlineCandidateInfo
+//
+// During importing, the IL being imported is scanned to identify
+// inline candidates. This happens both when the root method is being
+// imported as well as when prospective inlines are being imported.
+// Candidates are marked in the IL and given an InlineCandidateInfo.
+//
+// 2. Inlining Optimization Pass -- candidates (fgInline)
+//
+// Creates / Uses: InlineContext
+// Creates: InlineInfo, InlArgInfo, InlLocalVarInfo
+//
+// During the inlining optimation pass, each candidate is further
+// analyzed. Viable candidates will eventually inspire creation of an
+// InlineInfo and a set of InlArgInfos (for call arguments) and
+// InlLocalVarInfos (for callee locals).
+//
+// The analysis will also examine InlineContexts from relevant prior
+// inlines. If the inline is successful, a new InlineContext will be
+// created to remember this inline. In DEBUG builds, failing inlines
+// also create InlineContexts.
+//
+// 3. Inlining Optimization Pass -- non-candidates (fgNoteNotInlineCandidate)
+//
+// Creates / Uses: InlineContext
+//
+// In DEBUG, the jit also searches for non-candidate calls to try
+// and get a complete picture of the set of failed inlines.
+//
+// 4. Prejit suitability screen (compCompileHelper)
+//
+// When prejitting, each method is scanned to see if it is a viable
+// inline candidate.
+
+#ifndef _INLINE_H_
+#define _INLINE_H_
+
+#include "jit.h"
+#include "gentree.h"
+
+// Implementation limits
+
+#ifndef LEGACY_BACKEND
+const unsigned int MAX_INL_ARGS = 32; // does not include obj pointer
+const unsigned int MAX_INL_LCLS = 32;
+#else // LEGACY_BACKEND
+const unsigned int MAX_INL_ARGS = 10; // does not include obj pointer
+const unsigned int MAX_INL_LCLS = 8;
+#endif // LEGACY_BACKEND
+
+// Flags lost during inlining.
+
+#define CORJIT_FLG_LOST_WHEN_INLINING \
+ (CORJIT_FLG_BBOPT | CORJIT_FLG_BBINSTR | CORJIT_FLG_PROF_ENTERLEAVE | CORJIT_FLG_DEBUG_EnC | CORJIT_FLG_DEBUG_INFO)
+
+// Forward declarations
+
+class InlineStrategy;
+
+// InlineCallsiteFrequency gives a rough classification of how
+// often a call site will be excuted at runtime.
+
+enum class InlineCallsiteFrequency
+{
+ UNUSED, // n/a
+ RARE, // once in a blue moon
+ BORING, // normal call site
+ WARM, // seen during profiling
+ LOOP, // in a loop
+ HOT // very frequent
+};
+
+// InlineDecision describes the various states the jit goes through when
+// evaluating an inline candidate. It is distinct from CorInfoInline
+// because it must capture internal states that don't get reported back
+// to the runtime.
+
+enum class InlineDecision
+{
+ UNDECIDED,
+ CANDIDATE,
+ SUCCESS,
+ FAILURE,
+ NEVER
+};
+
+// Translate a decision into a CorInfoInline for reporting back to the runtime.
+
+CorInfoInline InlGetCorInfoInlineDecision(InlineDecision d);
+
+// Get a string describing this InlineDecision
+
+const char* InlGetDecisionString(InlineDecision d);
+
+// True if this InlineDecsion describes a failing inline
+
+bool InlDecisionIsFailure(InlineDecision d);
+
+// True if this decision describes a successful inline
+
+bool InlDecisionIsSuccess(InlineDecision d);
+
+// True if this InlineDecision is a never inline decision
+
+bool InlDecisionIsNever(InlineDecision d);
+
+// True if this InlineDecision describes a viable candidate
+
+bool InlDecisionIsCandidate(InlineDecision d);
+
+// True if this InlineDecsion describes a decision
+
+bool InlDecisionIsDecided(InlineDecision d);
+
+// InlineTarget describes the possible targets of an inline observation.
+
+enum class InlineTarget
+{
+ CALLEE, // observation applies to all calls to this callee
+ CALLER, // observation applies to all calls made by this caller
+ CALLSITE // observation applies to a specific call site
+};
+
+// InlineImpact describe the possible impact of an inline observation.
+
+enum class InlineImpact
+{
+ FATAL, // inlining impossible, unsafe to evaluate further
+ FUNDAMENTAL, // inlining impossible for fundamental reasons, deeper exploration safe
+ LIMITATION, // inlining impossible because of jit limitations, deeper exploration safe
+ PERFORMANCE, // inlining inadvisable because of performance concerns
+ INFORMATION // policy-free observation to provide data for later decision making
+};
+
+// InlineObservation describes the set of possible inline observations.
+
+enum class InlineObservation
+{
+#define INLINE_OBSERVATION(name, type, description, impact, scope) scope##_##name,
+#include "inline.def"
+#undef INLINE_OBSERVATION
+};
+
+#ifdef DEBUG
+
+// Sanity check the observation value
+
+bool InlIsValidObservation(InlineObservation obs);
+
+#endif // DEBUG
+
+// Get a string describing this observation
+
+const char* InlGetObservationString(InlineObservation obs);
+
+// Get a string describing the target of this observation
+
+const char* InlGetTargetString(InlineObservation obs);
+
+// Get a string describing the impact of this observation
+
+const char* InlGetImpactString(InlineObservation obs);
+
+// Get the target of this observation
+
+InlineTarget InlGetTarget(InlineObservation obs);
+
+// Get the impact of this observation
+
+InlineImpact InlGetImpact(InlineObservation obs);
+
+// InlinePolicy is an abstract base class for a family of inline
+// policies.
+
+class InlinePolicy
+{
+public:
+ // Factory method for getting policies
+ static InlinePolicy* GetPolicy(Compiler* compiler, bool isPrejitRoot);
+
+ // Obligatory virtual dtor
+ virtual ~InlinePolicy()
+ {
+ }
+
+ // Get the current decision
+ InlineDecision GetDecision() const
+ {
+ return m_Decision;
+ }
+
+ // Get the observation responsible for the result
+ InlineObservation GetObservation() const
+ {
+ return m_Observation;
+ }
+
+ // Policy observations
+ virtual void NoteSuccess() = 0;
+ virtual void NoteBool(InlineObservation obs, bool value) = 0;
+ virtual void NoteFatal(InlineObservation obs) = 0;
+ virtual void NoteInt(InlineObservation obs, int value) = 0;
+
+ // Optional observations. Most policies ignore these.
+ virtual void NoteContext(InlineContext* context)
+ {
+ (void)context;
+ }
+ virtual void NoteOffset(IL_OFFSETX offset)
+ {
+ (void)offset;
+ }
+
+ // Policy determinations
+ virtual void DetermineProfitability(CORINFO_METHOD_INFO* methodInfo) = 0;
+
+ // Policy policies
+ virtual bool PropagateNeverToRuntime() const = 0;
+ virtual bool IsLegacyPolicy() const = 0;
+
+ // Policy estimates
+ virtual int CodeSizeEstimate() = 0;
+
+#if defined(DEBUG) || defined(INLINE_DATA)
+
+ // Name of the policy
+ virtual const char* GetName() const = 0;
+ // Detailed data value dump
+ virtual void DumpData(FILE* file) const
+ {
+ }
+ // Detailed data name dump
+ virtual void DumpSchema(FILE* file) const
+ {
+ }
+ // True if this is the inline targeted by data collection
+ bool IsDataCollectionTarget()
+ {
+ return m_IsDataCollectionTarget;
+ }
+
+#endif // defined(DEBUG) || defined(INLINE_DATA)
+
+protected:
+ InlinePolicy(bool isPrejitRoot)
+ : m_Decision(InlineDecision::UNDECIDED)
+ , m_Observation(InlineObservation::CALLEE_UNUSED_INITIAL)
+ , m_IsPrejitRoot(isPrejitRoot)
+#if defined(DEBUG) || defined(INLINE_DATA)
+ , m_IsDataCollectionTarget(false)
+#endif // defined(DEBUG) || defined(INLINE_DATA)
+
+ {
+ // empty
+ }
+
+private:
+ // No copying or assignment supported
+ InlinePolicy(const InlinePolicy&) = delete;
+ InlinePolicy& operator=(const InlinePolicy&) = delete;
+
+protected:
+ InlineDecision m_Decision;
+ InlineObservation m_Observation;
+ bool m_IsPrejitRoot;
+
+#if defined(DEBUG) || defined(INLINE_DATA)
+
+ bool m_IsDataCollectionTarget;
+
+#endif // defined(DEBUG) || defined(INLINE_DATA)
+};
+
+// InlineResult summarizes what is known about the viability of a
+// particular inline candiate.
+
+class InlineResult
+{
+public:
+ // Construct a new InlineResult to help evaluate a
+ // particular call for inlining.
+ InlineResult(Compiler* compiler, GenTreeCall* call, GenTreeStmt* stmt, const char* description);
+
+ // Construct a new InlineResult to evaluate a particular
+ // method to see if it is inlineable.
+ InlineResult(Compiler* compiler, CORINFO_METHOD_HANDLE method, const char* description);
+
+ // Has the policy determined this inline should fail?
+ bool IsFailure() const
+ {
+ return InlDecisionIsFailure(m_Policy->GetDecision());
+ }
+
+ // Has the policy determined this inline will succeed?
+ bool IsSuccess() const
+ {
+ return InlDecisionIsSuccess(m_Policy->GetDecision());
+ }
+
+ // Has the policy determined this inline will fail,
+ // and that the callee should never be inlined?
+ bool IsNever() const
+ {
+ return InlDecisionIsNever(m_Policy->GetDecision());
+ }
+
+ // Has the policy determined this inline attempt is still viable?
+ bool IsCandidate() const
+ {
+ return InlDecisionIsCandidate(m_Policy->GetDecision());
+ }
+
+ // Has the policy determined this inline attempt is still viable
+ // and is a discretionary inline?
+ bool IsDiscretionaryCandidate() const
+ {
+ bool result = InlDecisionIsCandidate(m_Policy->GetDecision()) &&
+ (m_Policy->GetObservation() == InlineObservation::CALLEE_IS_DISCRETIONARY_INLINE);
+
+ return result;
+ }
+
+ // Has the policy made a determination?
+ bool IsDecided() const
+ {
+ return InlDecisionIsDecided(m_Policy->GetDecision());
+ }
+
+ // NoteSuccess means the all the various checks have passed and
+ // the inline can happen.
+ void NoteSuccess()
+ {
+ assert(IsCandidate());
+ m_Policy->NoteSuccess();
+ }
+
+ // Make a true observation, and update internal state
+ // appropriately.
+ //
+ // Caller is expected to call isFailure after this to see whether
+ // more observation is desired.
+ void Note(InlineObservation obs)
+ {
+ m_Policy->NoteBool(obs, true);
+ }
+
+ // Make a boolean observation, and update internal state
+ // appropriately.
+ //
+ // Caller is expected to call isFailure after this to see whether
+ // more observation is desired.
+ void NoteBool(InlineObservation obs, bool value)
+ {
+ m_Policy->NoteBool(obs, value);
+ }
+
+ // Make an observation that must lead to immediate failure.
+ void NoteFatal(InlineObservation obs)
+ {
+ m_Policy->NoteFatal(obs);
+ assert(IsFailure());
+ }
+
+ // Make an observation with an int value
+ void NoteInt(InlineObservation obs, int value)
+ {
+ m_Policy->NoteInt(obs, value);
+ }
+
+ // Determine if this inline is profitable
+ void DetermineProfitability(CORINFO_METHOD_INFO* methodInfo)
+ {
+ m_Policy->DetermineProfitability(methodInfo);
+ }
+
+ // Ensure details of this inlining process are appropriately
+ // reported when the result goes out of scope.
+ ~InlineResult()
+ {
+ Report();
+ }
+
+ // The observation leading to this particular result
+ InlineObservation GetObservation() const
+ {
+ return m_Policy->GetObservation();
+ }
+
+ // The callee handle for this result
+ CORINFO_METHOD_HANDLE GetCallee() const
+ {
+ return m_Callee;
+ }
+
+ // The call being considered
+ GenTreeCall* GetCall() const
+ {
+ return m_Call;
+ }
+
+ // Result that can be reported back to the runtime
+ CorInfoInline Result() const
+ {
+ return InlGetCorInfoInlineDecision(m_Policy->GetDecision());
+ }
+
+ // String describing the decision made
+ const char* ResultString() const
+ {
+ return InlGetDecisionString(m_Policy->GetDecision());
+ }
+
+ // String describing the reason for the decision
+ const char* ReasonString() const
+ {
+ return InlGetObservationString(m_Policy->GetObservation());
+ }
+
+ // Get the policy that evaluated this result.
+ InlinePolicy* GetPolicy() const
+ {
+ return m_Policy;
+ }
+
+ // True if the policy used for this result is (exactly) the legacy
+ // policy.
+ bool UsesLegacyPolicy() const
+ {
+ return m_Policy->IsLegacyPolicy();
+ }
+
+ // SetReported indicates that this particular result doesn't need
+ // to be reported back to the runtime, either because the runtime
+ // already knows, or we aren't actually inlining yet.
+ void SetReported()
+ {
+ m_Reported = true;
+ }
+
+ // Get the InlineContext for this inline
+ InlineContext* GetInlineContext() const
+ {
+ return m_InlineContext;
+ }
+
+private:
+ // No copying or assignment allowed.
+ InlineResult(const InlineResult&) = delete;
+ InlineResult& operator=(const InlineResult&) = delete;
+
+ // Report/log/dump decision as appropriate
+ void Report();
+
+ Compiler* m_RootCompiler;
+ InlinePolicy* m_Policy;
+ GenTreeCall* m_Call;
+ InlineContext* m_InlineContext;
+ CORINFO_METHOD_HANDLE m_Caller; // immediate caller's handle
+ CORINFO_METHOD_HANDLE m_Callee;
+ const char* m_Description;
+ bool m_Reported;
+};
+
+// InlineCandidateInfo provides basic information about a particular
+// inline candidate.
+
+struct InlineCandidateInfo
+{
+ DWORD dwRestrictions;
+ CORINFO_METHOD_INFO methInfo;
+ unsigned methAttr;
+ CORINFO_CLASS_HANDLE clsHandle;
+ unsigned clsAttr;
+ var_types fncRetType;
+ CORINFO_METHOD_HANDLE ilCallerHandle; // the logical IL caller of this inlinee.
+ CORINFO_CONTEXT_HANDLE exactContextHnd;
+ CorInfoInitClassResult initClassResult;
+};
+
+// InlArgInfo describes inline candidate argument properties.
+
+struct InlArgInfo
+{
+ unsigned argIsUsed : 1; // is this arg used at all?
+ unsigned argIsInvariant : 1; // the argument is a constant or a local variable address
+ unsigned argIsLclVar : 1; // the argument is a local variable
+ unsigned argIsThis : 1; // the argument is the 'this' pointer
+ unsigned argHasSideEff : 1; // the argument has side effects
+ unsigned argHasGlobRef : 1; // the argument has a global ref
+ unsigned argHasTmp : 1; // the argument will be evaluated to a temp
+ unsigned argIsByRefToStructLocal : 1; // Is this arg an address of a struct local or a normed struct local or a
+ // field in them?
+ unsigned argHasLdargaOp : 1; // Is there LDARGA(s) operation on this argument?
+ unsigned argHasStargOp : 1; // Is there STARG(s) operation on this argument?
+
+ unsigned argTmpNum; // the argument tmp number
+ GenTreePtr argNode;
+ GenTreePtr argBashTmpNode; // tmp node created, if it may be replaced with actual arg
+};
+
+// InlArgInfo describes inline candidate local variable properties.
+
+struct InlLclVarInfo
+{
+ var_types lclTypeInfo;
+ typeInfo lclVerTypeInfo;
+ bool lclHasLdlocaOp; // Is there LDLOCA(s) operation on this argument?
+};
+
+// InlineInfo provides detailed information about a particular inline candidate.
+
+struct InlineInfo
+{
+ Compiler* InlinerCompiler; // The Compiler instance for the caller (i.e. the inliner)
+ Compiler* InlineRoot; // The Compiler instance that is the root of the inlining tree of which the owner of "this" is
+ // a member.
+
+ CORINFO_METHOD_HANDLE fncHandle;
+ InlineCandidateInfo* inlineCandidateInfo;
+
+ InlineResult* inlineResult;
+
+ GenTreePtr retExpr; // The return expression of the inlined candidate.
+
+ CORINFO_CONTEXT_HANDLE tokenLookupContextHandle; // The context handle that will be passed to
+ // impTokenLookupContextHandle in Inlinee's Compiler.
+
+ unsigned argCnt;
+ InlArgInfo inlArgInfo[MAX_INL_ARGS + 1];
+ int lclTmpNum[MAX_INL_LCLS]; // map local# -> temp# (-1 if unused)
+ InlLclVarInfo lclVarInfo[MAX_INL_LCLS + MAX_INL_ARGS + 1]; // type information from local sig
+
+ bool thisDereferencedFirst;
+#ifdef FEATURE_SIMD
+ bool hasSIMDTypeArgLocalOrReturn;
+#endif // FEATURE_SIMD
+
+ GenTreeCall* iciCall; // The GT_CALL node to be inlined.
+ GenTree* iciStmt; // The statement iciCall is in.
+ BasicBlock* iciBlock; // The basic block iciStmt is in.
+};
+
+// InlineContext tracks the inline history in a method.
+//
+// Notes:
+//
+// InlineContexts form a tree with the root method as the root and
+// inlines as children. Nested inlines are represented as granchildren
+// and so on.
+//
+// Leaves in the tree represent successful inlines of leaf methods.
+// In DEBUG builds we also keep track of failed inline attempts.
+//
+// During inlining, all statements in the IR refer back to the
+// InlineContext that is responsible for those statements existing.
+// This makes it possible to detect recursion and to keep track of the
+// depth of each inline attempt.
+
+class InlineContext
+{
+ // InlineContexts are created by InlineStrategies
+ friend class InlineStrategy;
+
+public:
+#if defined(DEBUG) || defined(INLINE_DATA)
+
+ // Dump the full subtree, including failures
+ void Dump(unsigned indent = 0);
+
+ // Dump only the success subtree, with rich data
+ void DumpData(unsigned indent = 0);
+
+ // Dump full subtree in xml format
+ void DumpXml(FILE* file = stderr, unsigned indent = 0);
+
+ // Get callee handle
+ CORINFO_METHOD_HANDLE GetCallee() const
+ {
+ return m_Callee;
+ }
+
+#endif // defined(DEBUG) || defined(INLINE_DATA)
+
+ // Get the parent context for this context.
+ InlineContext* GetParent() const
+ {
+ return m_Parent;
+ }
+
+ // Get the code pointer for this context.
+ BYTE* GetCode() const
+ {
+ return m_Code;
+ }
+
+ // True if this context describes a successful inline.
+ bool IsSuccess() const
+ {
+ return m_Success;
+ }
+
+ // Get the observation that supported or disqualified this inline.
+ InlineObservation GetObservation() const
+ {
+ return m_Observation;
+ }
+
+ // Get the IL code size for this inline.
+ unsigned GetILSize() const
+ {
+ return m_ILSize;
+ }
+
+ // Get the native code size estimate for this inline.
+ unsigned GetCodeSizeEstimate() const
+ {
+ return m_CodeSizeEstimate;
+ }
+
+ // Get the offset of the call site
+ IL_OFFSETX GetOffset() const
+ {
+ return m_Offset;
+ }
+
+ // True if this is the root context
+ bool IsRoot() const
+ {
+ return m_Parent == nullptr;
+ }
+
+private:
+ InlineContext(InlineStrategy* strategy);
+
+private:
+ InlineStrategy* m_InlineStrategy; // overall strategy
+ InlineContext* m_Parent; // logical caller (parent)
+ InlineContext* m_Child; // first child
+ InlineContext* m_Sibling; // next child of the parent
+ BYTE* m_Code; // address of IL buffer for the method
+ unsigned m_ILSize; // size of IL buffer for the method
+ IL_OFFSETX m_Offset; // call site location within parent
+ InlineObservation m_Observation; // what lead to this inline
+ int m_CodeSizeEstimate; // in bytes * 10
+ bool m_Success; // true if this was a successful inline
+
+#if defined(DEBUG) || defined(INLINE_DATA)
+
+ InlinePolicy* m_Policy; // policy that evaluated this inline
+ CORINFO_METHOD_HANDLE m_Callee; // handle to the method
+ unsigned m_TreeID; // ID of the GenTreeCall
+ unsigned m_Ordinal; // Ordinal number of this inline
+
+#endif // defined(DEBUG) || defined(INLINE_DATA)
+};
+
+// The InlineStrategy holds the per-method persistent inline state.
+// It is responsible for providing information that applies to
+// multiple inlining decisions.
+
+class InlineStrategy
+{
+
+public:
+ // Construct a new inline strategy.
+ InlineStrategy(Compiler* compiler);
+
+ // Create context for a successful inline.
+ InlineContext* NewSuccess(InlineInfo* inlineInfo);
+
+ // Create context for a failing inline.
+ InlineContext* NewFailure(GenTree* stmt, InlineResult* inlineResult);
+
+ // Compiler associated with this strategy
+ Compiler* GetCompiler() const
+ {
+ return m_Compiler;
+ }
+
+ // Root context
+ InlineContext* GetRootContext();
+
+ // Context for the last sucessful inline
+ // (or root if no inlines)
+ InlineContext* GetLastContext() const
+ {
+ return m_LastContext;
+ }
+
+ // Get IL size for maximum allowable inline
+ unsigned GetMaxInlineILSize() const
+ {
+ return m_MaxInlineSize;
+ }
+
+ // Get depth of maximum allowable inline
+ unsigned GetMaxInlineDepth() const
+ {
+ return m_MaxInlineDepth;
+ }
+
+ // Number of successful inlines into the root
+ unsigned GetInlineCount() const
+ {
+ return m_InlineCount;
+ }
+
+ // Return the current code size estimate for this method
+ int GetCurrentSizeEstimate() const
+ {
+ return m_CurrentSizeEstimate;
+ }
+
+ // Return the initial code size estimate for this method
+ int GetInitialSizeEstimate() const
+ {
+ return m_InitialSizeEstimate;
+ }
+
+ // Inform strategy that there's another call
+ void NoteCall()
+ {
+ m_CallCount++;
+ }
+
+ // Inform strategy that there's a new inline candidate.
+ void NoteCandidate()
+ {
+ m_CandidateCount++;
+ }
+
+ // Inform strategy that a candidate was assessed and determined to
+ // be unprofitable.
+ void NoteUnprofitable()
+ {
+ m_UnprofitableCandidateCount++;
+ }
+
+ // Inform strategy that a candidate has passed screening
+ // and that the jit will attempt to inline.
+ void NoteAttempt(InlineResult* result);
+
+ // Inform strategy that jit is about to import the inlinee IL.
+ void NoteImport()
+ {
+ m_ImportCount++;
+ }
+
+ // Dump csv header for inline stats to indicated file.
+ static void DumpCsvHeader(FILE* f);
+
+ // Dump csv data for inline stats to indicated file.
+ void DumpCsvData(FILE* f);
+
+ // See if an inline of this size would fit within the current jit
+ // time budget.
+ bool BudgetCheck(unsigned ilSize);
+
+ // Check if this method is not allowing inlines.
+ static bool IsNoInline(ICorJitInfo* info, CORINFO_METHOD_HANDLE method);
+
+#if defined(DEBUG) || defined(INLINE_DATA)
+
+ // Dump textual description of inlines done so far.
+ void Dump();
+
+ // Dump data-format description of inlines done so far.
+ void DumpData();
+ void DumpDataEnsurePolicyIsSet();
+ void DumpDataHeader(FILE* file);
+ void DumpDataSchema(FILE* file);
+ void DumpDataContents(FILE* file);
+
+ // Dump xml-formatted description of inlines
+ void DumpXml(FILE* file = stderr, unsigned indent = 0);
+ static void FinalizeXml(FILE* file = stderr);
+
+ // Cache for file position of this method in the inline xml
+ long GetMethodXmlFilePosition()
+ {
+ return m_MethodXmlFilePosition;
+ }
+
+ void SetMethodXmlFilePosition(long val)
+ {
+ m_MethodXmlFilePosition = val;
+ }
+
+#endif // defined(DEBUG) || defined(INLINE_DATA)
+
+ // Some inline limit values
+ enum
+ {
+ ALWAYS_INLINE_SIZE = 16,
+ IMPLEMENTATION_MAX_INLINE_SIZE = _UI16_MAX,
+ IMPLEMENTATION_MAX_INLINE_DEPTH = 1000
+ };
+
+private:
+ // Create a context for the root method.
+ InlineContext* NewRoot();
+
+ // Accounting updates for a successful or failed inline.
+ void NoteOutcome(InlineContext* context);
+
+ // Cap on allowable increase in jit time due to inlining.
+ // Multiplicative, so BUDGET = 10 means up to 10x increase
+ // in jit time.
+ enum
+ {
+ BUDGET = 10
+ };
+
+ // Estimate the jit time change because of this inline.
+ int EstimateTime(InlineContext* context);
+
+ // EstimateTime helpers
+ int EstimateRootTime(unsigned ilSize);
+ int EstimateInlineTime(unsigned ilSize);
+
+ // Estimate native code size change because of this inline.
+ int EstimateSize(InlineContext* context);
+
+#if defined(DEBUG) || defined(INLINE_DATA)
+ static bool s_HasDumpedDataHeader;
+ static bool s_HasDumpedXmlHeader;
+ static CritSecObject s_XmlWriterLock;
+#endif // defined(DEBUG) || defined(INLINE_DATA)
+
+ Compiler* m_Compiler;
+ InlineContext* m_RootContext;
+ InlinePolicy* m_LastSuccessfulPolicy;
+ InlineContext* m_LastContext;
+ unsigned m_CallCount;
+ unsigned m_CandidateCount;
+ unsigned m_AlwaysCandidateCount;
+ unsigned m_ForceCandidateCount;
+ unsigned m_DiscretionaryCandidateCount;
+ unsigned m_UnprofitableCandidateCount;
+ unsigned m_ImportCount;
+ unsigned m_InlineCount;
+ unsigned m_MaxInlineSize;
+ unsigned m_MaxInlineDepth;
+ int m_InitialTimeBudget;
+ int m_InitialTimeEstimate;
+ int m_CurrentTimeBudget;
+ int m_CurrentTimeEstimate;
+ int m_InitialSizeEstimate;
+ int m_CurrentSizeEstimate;
+ bool m_HasForceViaDiscretionary;
+
+#if defined(DEBUG) || defined(INLINE_DATA)
+ long m_MethodXmlFilePosition;
+#endif // defined(DEBUG) || defined(INLINE_DATA)
+};
+
+#endif // _INLINE_H_
diff --git a/src/jit/inlinepolicy.cpp b/src/jit/inlinepolicy.cpp
new file mode 100644
index 0000000000..f80f3a5ec0
--- /dev/null
+++ b/src/jit/inlinepolicy.cpp
@@ -0,0 +1,2857 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+#include "inlinepolicy.h"
+#include "sm.h"
+
+//------------------------------------------------------------------------
+// getPolicy: Factory method for getting an InlinePolicy
+//
+// Arguments:
+// compiler - the compiler instance that will evaluate inlines
+// isPrejitRoot - true if this policy is evaluating a prejit root
+//
+// Return Value:
+// InlinePolicy to use in evaluating an inline.
+//
+// Notes:
+// Determines which of the various policies should apply,
+// and creates (or reuses) a policy instance to use.
+
+InlinePolicy* InlinePolicy::GetPolicy(Compiler* compiler, bool isPrejitRoot)
+{
+
+#ifdef DEBUG
+
+ // Optionally install the RandomPolicy.
+ bool useRandomPolicy = compiler->compRandomInlineStress();
+
+ if (useRandomPolicy)
+ {
+ unsigned seed = getJitStressLevel();
+ assert(seed != 0);
+ return new (compiler, CMK_Inlining) RandomPolicy(compiler, isPrejitRoot, seed);
+ }
+
+#endif // DEBUG
+
+#if defined(DEBUG) || defined(INLINE_DATA)
+
+ // Optionally install the ReplayPolicy.
+ bool useReplayPolicy = JitConfig.JitInlinePolicyReplay() != 0;
+
+ if (useReplayPolicy)
+ {
+ return new (compiler, CMK_Inlining) ReplayPolicy(compiler, isPrejitRoot);
+ }
+
+ // Optionally install the SizePolicy.
+ bool useSizePolicy = JitConfig.JitInlinePolicySize() != 0;
+
+ if (useSizePolicy)
+ {
+ return new (compiler, CMK_Inlining) SizePolicy(compiler, isPrejitRoot);
+ }
+
+ // Optionally install the FullPolicy.
+ bool useFullPolicy = JitConfig.JitInlinePolicyFull() != 0;
+
+ if (useFullPolicy)
+ {
+ return new (compiler, CMK_Inlining) FullPolicy(compiler, isPrejitRoot);
+ }
+
+ // Optionally install the DiscretionaryPolicy.
+ bool useDiscretionaryPolicy = JitConfig.JitInlinePolicyDiscretionary() != 0;
+
+ if (useDiscretionaryPolicy)
+ {
+ return new (compiler, CMK_Inlining) DiscretionaryPolicy(compiler, isPrejitRoot);
+ }
+
+#endif // defined(DEBUG) || defined(INLINE_DATA)
+
+ // Optionally install the ModelPolicy.
+ bool useModelPolicy = JitConfig.JitInlinePolicyModel() != 0;
+
+ if (useModelPolicy)
+ {
+ return new (compiler, CMK_Inlining) ModelPolicy(compiler, isPrejitRoot);
+ }
+
+ // Optionally fallback to the original legacy policy
+ bool useLegacyPolicy = JitConfig.JitInlinePolicyLegacy() != 0;
+
+ if (useLegacyPolicy)
+ {
+ return new (compiler, CMK_Inlining) LegacyPolicy(compiler, isPrejitRoot);
+ }
+
+ // Use the enhanced legacy policy by default
+ return new (compiler, CMK_Inlining) EnhancedLegacyPolicy(compiler, isPrejitRoot);
+}
+
+//------------------------------------------------------------------------
+// NoteFatal: handle an observation with fatal impact
+//
+// Arguments:
+// obs - the current obsevation
+
+void LegalPolicy::NoteFatal(InlineObservation obs)
+{
+ // As a safeguard, all fatal impact must be
+ // reported via noteFatal.
+ assert(InlGetImpact(obs) == InlineImpact::FATAL);
+ NoteInternal(obs);
+ assert(InlDecisionIsFailure(m_Decision));
+}
+
+//------------------------------------------------------------------------
+// NoteInternal: helper for handling an observation
+//
+// Arguments:
+// obs - the current obsevation
+
+void LegalPolicy::NoteInternal(InlineObservation obs)
+{
+ // Note any INFORMATION that reaches here will now cause failure.
+ // Non-fatal INFORMATION observations must be handled higher up.
+ InlineTarget target = InlGetTarget(obs);
+
+ if (target == InlineTarget::CALLEE)
+ {
+ this->SetNever(obs);
+ }
+ else
+ {
+ this->SetFailure(obs);
+ }
+}
+
+//------------------------------------------------------------------------
+// SetFailure: helper for setting a failing decision
+//
+// Arguments:
+// obs - the current obsevation
+
+void LegalPolicy::SetFailure(InlineObservation obs)
+{
+ // Expect a valid observation
+ assert(InlIsValidObservation(obs));
+
+ switch (m_Decision)
+ {
+ case InlineDecision::FAILURE:
+ // Repeated failure only ok if evaluating a prejit root
+ // (since we can't fail fast because we're not inlining)
+ // or if inlining and the observation is CALLSITE_TOO_MANY_LOCALS
+ // (since we can't fail fast from lvaGrabTemp).
+ assert(m_IsPrejitRoot || (obs == InlineObservation::CALLSITE_TOO_MANY_LOCALS));
+ break;
+ case InlineDecision::UNDECIDED:
+ case InlineDecision::CANDIDATE:
+ m_Decision = InlineDecision::FAILURE;
+ m_Observation = obs;
+ break;
+ default:
+ // SUCCESS, NEVER, or ??
+ assert(!"Unexpected m_Decision");
+ unreached();
+ }
+}
+
+//------------------------------------------------------------------------
+// SetNever: helper for setting a never decision
+//
+// Arguments:
+// obs - the current obsevation
+
+void LegalPolicy::SetNever(InlineObservation obs)
+{
+ // Expect a valid observation
+ assert(InlIsValidObservation(obs));
+
+ switch (m_Decision)
+ {
+ case InlineDecision::NEVER:
+ // Repeated never only ok if evaluating a prejit root
+ assert(m_IsPrejitRoot);
+ break;
+ case InlineDecision::UNDECIDED:
+ case InlineDecision::CANDIDATE:
+ m_Decision = InlineDecision::NEVER;
+ m_Observation = obs;
+ break;
+ default:
+ // SUCCESS, FAILURE or ??
+ assert(!"Unexpected m_Decision");
+ unreached();
+ }
+}
+
+//------------------------------------------------------------------------
+// SetCandidate: helper updating candidacy
+//
+// Arguments:
+// obs - the current obsevation
+//
+// Note:
+// Candidate observations are handled here. If the inline has already
+// failed, they're ignored. If there's already a candidate reason,
+// this new reason trumps it.
+
+void LegalPolicy::SetCandidate(InlineObservation obs)
+{
+ // Ignore if this inline is going to fail.
+ if (InlDecisionIsFailure(m_Decision))
+ {
+ return;
+ }
+
+ // We should not have declared success yet.
+ assert(!InlDecisionIsSuccess(m_Decision));
+
+ // Update, overriding any previous candidacy.
+ m_Decision = InlineDecision::CANDIDATE;
+ m_Observation = obs;
+}
+
+//------------------------------------------------------------------------
+// NoteSuccess: handle finishing all the inlining checks successfully
+
+void LegacyPolicy::NoteSuccess()
+{
+ assert(InlDecisionIsCandidate(m_Decision));
+ m_Decision = InlineDecision::SUCCESS;
+}
+
+//------------------------------------------------------------------------
+// NoteBool: handle a boolean observation with non-fatal impact
+//
+// Arguments:
+// obs - the current obsevation
+// value - the value of the observation
+void LegacyPolicy::NoteBool(InlineObservation obs, bool value)
+{
+ // Check the impact
+ InlineImpact impact = InlGetImpact(obs);
+
+ // As a safeguard, all fatal impact must be
+ // reported via noteFatal.
+ assert(impact != InlineImpact::FATAL);
+
+ // Handle most information here
+ bool isInformation = (impact == InlineImpact::INFORMATION);
+ bool propagate = !isInformation;
+
+ if (isInformation)
+ {
+ switch (obs)
+ {
+ case InlineObservation::CALLEE_IS_FORCE_INLINE:
+ // We may make the force-inline observation more than
+ // once. All observations should agree.
+ assert(!m_IsForceInlineKnown || (m_IsForceInline == value));
+ m_IsForceInline = value;
+ m_IsForceInlineKnown = true;
+ break;
+
+ case InlineObservation::CALLEE_IS_INSTANCE_CTOR:
+ m_IsInstanceCtor = value;
+ break;
+
+ case InlineObservation::CALLEE_CLASS_PROMOTABLE:
+ m_IsFromPromotableValueClass = value;
+ break;
+
+ case InlineObservation::CALLEE_HAS_SIMD:
+ m_HasSimd = value;
+ break;
+
+ case InlineObservation::CALLEE_LOOKS_LIKE_WRAPPER:
+ // LegacyPolicy ignores this for prejit roots.
+ if (!m_IsPrejitRoot)
+ {
+ m_LooksLikeWrapperMethod = value;
+ }
+ break;
+
+ case InlineObservation::CALLEE_ARG_FEEDS_CONSTANT_TEST:
+ // LegacyPolicy ignores this for prejit roots.
+ if (!m_IsPrejitRoot)
+ {
+ m_ArgFeedsConstantTest++;
+ }
+ break;
+
+ case InlineObservation::CALLEE_ARG_FEEDS_RANGE_CHECK:
+ // LegacyPolicy ignores this for prejit roots.
+ if (!m_IsPrejitRoot)
+ {
+ m_ArgFeedsRangeCheck++;
+ }
+ break;
+
+ case InlineObservation::CALLEE_HAS_SWITCH:
+ case InlineObservation::CALLEE_UNSUPPORTED_OPCODE:
+ // LegacyPolicy ignores these for prejit roots.
+ if (!m_IsPrejitRoot)
+ {
+ // Pass these on, they should cause inlining to fail.
+ propagate = true;
+ }
+ break;
+
+ case InlineObservation::CALLSITE_CONSTANT_ARG_FEEDS_TEST:
+ // We shouldn't see this for a prejit root since
+ // we don't know anything about callers.
+ assert(!m_IsPrejitRoot);
+ m_ConstantArgFeedsConstantTest++;
+ break;
+
+ case InlineObservation::CALLEE_BEGIN_OPCODE_SCAN:
+ {
+ // Set up the state machine, if this inline is
+ // discretionary and is still a candidate.
+ if (InlDecisionIsCandidate(m_Decision) &&
+ (m_Observation == InlineObservation::CALLEE_IS_DISCRETIONARY_INLINE))
+ {
+ // Better not have a state machine already.
+ assert(m_StateMachine == nullptr);
+ m_StateMachine = new (m_RootCompiler, CMK_Inlining) CodeSeqSM;
+ m_StateMachine->Start(m_RootCompiler);
+ }
+ break;
+ }
+
+ case InlineObservation::CALLEE_END_OPCODE_SCAN:
+ {
+ if (m_StateMachine != nullptr)
+ {
+ m_StateMachine->End();
+ }
+
+ // If this function is mostly loads and stores, we
+ // should try harder to inline it. You can't just use
+ // the percentage test because if the method has 8
+ // instructions and 6 are loads, it's only 75% loads.
+ // This allows for CALL, RET, and one more non-ld/st
+ // instruction.
+ if (((m_InstructionCount - m_LoadStoreCount) < 4) ||
+ (((double)m_LoadStoreCount / (double)m_InstructionCount) > .90))
+ {
+ m_MethodIsMostlyLoadStore = true;
+ }
+
+ // Budget check.
+ //
+ // Conceptually this should happen when we
+ // observe the candidate's IL size.
+ //
+ // However, we do this here to avoid potential
+ // inconsistency between the state of the budget
+ // during candidate scan and the state when the IL is
+ // being scanned.
+ //
+ // Consider the case where we're just below the budget
+ // during candidate scan, and we have three possible
+ // inlines, any two of which put us over budget. We
+ // allow them all to become candidates. We then move
+ // on to inlining and the first two get inlined and
+ // put us over budget. Now the third can't be inlined
+ // anymore, but we have a policy that when we replay
+ // the candidate IL size during the inlining pass it
+ // "reestablishes" candidacy rather than alters
+ // candidacy ... so instead we bail out here.
+
+ if (!m_IsPrejitRoot)
+ {
+ InlineStrategy* strategy = m_RootCompiler->m_inlineStrategy;
+ bool overBudget = strategy->BudgetCheck(m_CodeSize);
+ if (overBudget)
+ {
+ SetFailure(InlineObservation::CALLSITE_OVER_BUDGET);
+ }
+ }
+
+ break;
+ }
+
+ default:
+ // Ignore the remainder for now
+ break;
+ }
+ }
+
+ if (propagate)
+ {
+ NoteInternal(obs);
+ }
+}
+
+//------------------------------------------------------------------------
+// NoteInt: handle an observed integer value
+//
+// Arguments:
+// obs - the current obsevation
+// value - the value being observed
+
+void LegacyPolicy::NoteInt(InlineObservation obs, int value)
+{
+ switch (obs)
+ {
+ case InlineObservation::CALLEE_MAXSTACK:
+ {
+ assert(m_IsForceInlineKnown);
+
+ unsigned calleeMaxStack = static_cast<unsigned>(value);
+
+ if (!m_IsForceInline && (calleeMaxStack > SMALL_STACK_SIZE))
+ {
+ SetNever(InlineObservation::CALLEE_MAXSTACK_TOO_BIG);
+ }
+
+ break;
+ }
+
+ case InlineObservation::CALLEE_NUMBER_OF_BASIC_BLOCKS:
+ {
+ assert(m_IsForceInlineKnown);
+ assert(value != 0);
+
+ unsigned basicBlockCount = static_cast<unsigned>(value);
+
+ if (!m_IsForceInline && (basicBlockCount > MAX_BASIC_BLOCKS))
+ {
+ SetNever(InlineObservation::CALLEE_TOO_MANY_BASIC_BLOCKS);
+ }
+
+ break;
+ }
+
+ case InlineObservation::CALLEE_IL_CODE_SIZE:
+ {
+ assert(m_IsForceInlineKnown);
+ assert(value != 0);
+ m_CodeSize = static_cast<unsigned>(value);
+
+ // Now that we know size and forceinline state,
+ // update candidacy.
+ if (m_CodeSize <= InlineStrategy::ALWAYS_INLINE_SIZE)
+ {
+ // Candidate based on small size
+ SetCandidate(InlineObservation::CALLEE_BELOW_ALWAYS_INLINE_SIZE);
+ }
+ else if (m_IsForceInline)
+ {
+ // Candidate based on force inline
+ SetCandidate(InlineObservation::CALLEE_IS_FORCE_INLINE);
+ }
+ else if (m_CodeSize <= m_RootCompiler->m_inlineStrategy->GetMaxInlineILSize())
+ {
+ // Candidate, pending profitability evaluation
+ SetCandidate(InlineObservation::CALLEE_IS_DISCRETIONARY_INLINE);
+ }
+ else
+ {
+ // Callee too big, not a candidate
+ SetNever(InlineObservation::CALLEE_TOO_MUCH_IL);
+ }
+
+ break;
+ }
+
+ case InlineObservation::CALLSITE_DEPTH:
+ {
+ unsigned depth = static_cast<unsigned>(value);
+
+ if (depth > m_RootCompiler->m_inlineStrategy->GetMaxInlineDepth())
+ {
+ SetFailure(InlineObservation::CALLSITE_IS_TOO_DEEP);
+ }
+
+ break;
+ }
+
+ case InlineObservation::CALLEE_OPCODE_NORMED:
+ case InlineObservation::CALLEE_OPCODE:
+ {
+ m_InstructionCount++;
+ OPCODE opcode = static_cast<OPCODE>(value);
+
+ if (m_StateMachine != nullptr)
+ {
+ SM_OPCODE smOpcode = CodeSeqSM::MapToSMOpcode(opcode);
+ noway_assert(smOpcode < SM_COUNT);
+ noway_assert(smOpcode != SM_PREFIX_N);
+ if (obs == InlineObservation::CALLEE_OPCODE_NORMED)
+ {
+ if (smOpcode == SM_LDARGA_S)
+ {
+ smOpcode = SM_LDARGA_S_NORMED;
+ }
+ else if (smOpcode == SM_LDLOCA_S)
+ {
+ smOpcode = SM_LDLOCA_S_NORMED;
+ }
+ }
+
+ m_StateMachine->Run(smOpcode DEBUGARG(0));
+ }
+
+ // Look for opcodes that imply loads and stores.
+ // Logic here is as it is to match legacy behavior.
+ if ((opcode >= CEE_LDARG_0 && opcode <= CEE_STLOC_S) || (opcode >= CEE_LDARG && opcode <= CEE_STLOC) ||
+ (opcode >= CEE_LDNULL && opcode <= CEE_LDC_R8) || (opcode >= CEE_LDIND_I1 && opcode <= CEE_STIND_R8) ||
+ (opcode >= CEE_LDFLD && opcode <= CEE_STOBJ) || (opcode >= CEE_LDELEMA && opcode <= CEE_STELEM) ||
+ (opcode == CEE_POP))
+ {
+ m_LoadStoreCount++;
+ }
+
+ break;
+ }
+
+ case InlineObservation::CALLSITE_FREQUENCY:
+ assert(m_CallsiteFrequency == InlineCallsiteFrequency::UNUSED);
+ m_CallsiteFrequency = static_cast<InlineCallsiteFrequency>(value);
+ assert(m_CallsiteFrequency != InlineCallsiteFrequency::UNUSED);
+ break;
+
+ default:
+ // Ignore all other information
+ break;
+ }
+}
+
+//------------------------------------------------------------------------
+// DetermineMultiplier: determine benefit multiplier for this inline
+//
+// Notes: uses the accumulated set of observations to compute a
+// profitability boost for the inline candidate.
+
+double LegacyPolicy::DetermineMultiplier()
+{
+ double multiplier = 0;
+
+ // Bump up the multiplier for instance constructors
+
+ if (m_IsInstanceCtor)
+ {
+ multiplier += 1.5;
+ JITDUMP("\nmultiplier in instance constructors increased to %g.", multiplier);
+ }
+
+ // Bump up the multiplier for methods in promotable struct
+
+ if (m_IsFromPromotableValueClass)
+ {
+ multiplier += 3;
+ JITDUMP("\nmultiplier in methods of promotable struct increased to %g.", multiplier);
+ }
+
+#ifdef FEATURE_SIMD
+
+ if (m_HasSimd)
+ {
+ multiplier += JitConfig.JitInlineSIMDMultiplier();
+ JITDUMP("\nInline candidate has SIMD type args, locals or return value. Multiplier increased to %g.",
+ multiplier);
+ }
+
+#endif // FEATURE_SIMD
+
+ if (m_LooksLikeWrapperMethod)
+ {
+ multiplier += 1.0;
+ JITDUMP("\nInline candidate looks like a wrapper method. Multiplier increased to %g.", multiplier);
+ }
+
+ if (m_ArgFeedsConstantTest > 0)
+ {
+ multiplier += 1.0;
+ JITDUMP("\nInline candidate has an arg that feeds a constant test. Multiplier increased to %g.", multiplier);
+ }
+
+ if (m_MethodIsMostlyLoadStore)
+ {
+ multiplier += 3.0;
+ JITDUMP("\nInline candidate is mostly loads and stores. Multiplier increased to %g.", multiplier);
+ }
+
+ if (m_ArgFeedsRangeCheck > 0)
+ {
+ multiplier += 0.5;
+ JITDUMP("\nInline candidate has arg that feeds range check. Multiplier increased to %g.", multiplier);
+ }
+
+ if (m_ConstantArgFeedsConstantTest > 0)
+ {
+ multiplier += 3.0;
+ JITDUMP("\nInline candidate has const arg that feeds a conditional. Multiplier increased to %g.", multiplier);
+ }
+
+ switch (m_CallsiteFrequency)
+ {
+ case InlineCallsiteFrequency::RARE:
+ // Note this one is not additive, it uses '=' instead of '+='
+ multiplier = 1.3;
+ JITDUMP("\nInline candidate callsite is rare. Multiplier limited to %g.", multiplier);
+ break;
+ case InlineCallsiteFrequency::BORING:
+ multiplier += 1.3;
+ JITDUMP("\nInline candidate callsite is boring. Multiplier increased to %g.", multiplier);
+ break;
+ case InlineCallsiteFrequency::WARM:
+ multiplier += 2.0;
+ JITDUMP("\nInline candidate callsite is warm. Multiplier increased to %g.", multiplier);
+ break;
+ case InlineCallsiteFrequency::LOOP:
+ multiplier += 3.0;
+ JITDUMP("\nInline candidate callsite is in a loop. Multiplier increased to %g.", multiplier);
+ break;
+ case InlineCallsiteFrequency::HOT:
+ multiplier += 3.0;
+ JITDUMP("\nInline candidate callsite is hot. Multiplier increased to %g.", multiplier);
+ break;
+ default:
+ assert(!"Unexpected callsite frequency");
+ break;
+ }
+
+#ifdef DEBUG
+
+ int additionalMultiplier = JitConfig.JitInlineAdditionalMultiplier();
+
+ if (additionalMultiplier != 0)
+ {
+ multiplier += additionalMultiplier;
+ JITDUMP("\nmultiplier increased via JitInlineAdditonalMultiplier=%d to %g.", additionalMultiplier, multiplier);
+ }
+
+ if (m_RootCompiler->compInlineStress())
+ {
+ multiplier += 10;
+ JITDUMP("\nmultiplier increased via inline stress to %g.", multiplier);
+ }
+
+#endif // DEBUG
+
+ return multiplier;
+}
+
+//------------------------------------------------------------------------
+// DetermineNativeSizeEstimate: return estimated native code size for
+// this inline candidate.
+//
+// Notes:
+// This is an estimate for the size of the inlined callee.
+// It does not include size impact on the caller side.
+//
+// Uses the results of a state machine model for discretionary
+// candidates. Should not be needed for forced or always
+// candidates.
+
+int LegacyPolicy::DetermineNativeSizeEstimate()
+{
+ // Should be a discretionary candidate.
+ assert(m_StateMachine != nullptr);
+
+ return m_StateMachine->NativeSize;
+}
+
+//------------------------------------------------------------------------
+// DetermineCallsiteNativeSizeEstimate: estimate native size for the
+// callsite.
+//
+// Arguments:
+// methInfo -- method info for the callee
+//
+// Notes:
+// Estimates the native size (in bytes, scaled up by 10x) for the
+// call site. While the quality of the estimate here is questionable
+// (especially for x64) it is being left as is for legacy compatibility.
+
+int LegacyPolicy::DetermineCallsiteNativeSizeEstimate(CORINFO_METHOD_INFO* methInfo)
+{
+ int callsiteSize = 55; // Direct call take 5 native bytes; indirect call takes 6 native bytes.
+
+ bool hasThis = methInfo->args.hasThis();
+
+ if (hasThis)
+ {
+ callsiteSize += 30; // "mov" or "lea"
+ }
+
+ CORINFO_ARG_LIST_HANDLE argLst = methInfo->args.args;
+ COMP_HANDLE comp = m_RootCompiler->info.compCompHnd;
+
+ for (unsigned i = (hasThis ? 1 : 0); i < methInfo->args.totalILArgs(); i++, argLst = comp->getArgNext(argLst))
+ {
+ var_types sigType = (var_types)m_RootCompiler->eeGetArgType(argLst, &methInfo->args);
+
+ if (sigType == TYP_STRUCT)
+ {
+ typeInfo verType = m_RootCompiler->verParseArgSigToTypeInfo(&methInfo->args, argLst);
+
+ /*
+
+ IN0028: 00009B lea EAX, bword ptr [EBP-14H]
+ IN0029: 00009E push dword ptr [EAX+4]
+ IN002a: 0000A1 push gword ptr [EAX]
+ IN002b: 0000A3 call [MyStruct.staticGetX2(struct):int]
+
+ */
+
+ callsiteSize += 10; // "lea EAX, bword ptr [EBP-14H]"
+
+ // NB sizeof (void*) fails to convey intent when cross-jitting.
+
+ unsigned opsz = (unsigned)(roundUp(comp->getClassSize(verType.GetClassHandle()), sizeof(void*)));
+ unsigned slots = opsz / sizeof(void*);
+
+ callsiteSize += slots * 20; // "push gword ptr [EAX+offs] "
+ }
+ else
+ {
+ callsiteSize += 30; // push by average takes 3 bytes.
+ }
+ }
+
+ return callsiteSize;
+}
+
+//------------------------------------------------------------------------
+// DetermineProfitability: determine if this inline is profitable
+//
+// Arguments:
+// methodInfo -- method info for the callee
+//
+// Notes:
+// A profitable inline is one that is projected to have a beneficial
+// size/speed tradeoff.
+//
+// It is expected that this method is only invoked for discretionary
+// candidates, since it does not make sense to do this assessment for
+// failed, always, or forced inlines.
+
+void LegacyPolicy::DetermineProfitability(CORINFO_METHOD_INFO* methodInfo)
+{
+
+#if defined(DEBUG)
+
+ // Punt if we're inlining and we've reached the acceptance limit.
+ int limit = JitConfig.JitInlineLimit();
+ unsigned current = m_RootCompiler->m_inlineStrategy->GetInlineCount();
+
+ if (!m_IsPrejitRoot && (limit >= 0) && (current >= static_cast<unsigned>(limit)))
+ {
+ SetFailure(InlineObservation::CALLSITE_OVER_INLINE_LIMIT);
+ return;
+ }
+
+#endif // defined(DEBUG)
+
+ assert(InlDecisionIsCandidate(m_Decision));
+ assert(m_Observation == InlineObservation::CALLEE_IS_DISCRETIONARY_INLINE);
+
+ m_CalleeNativeSizeEstimate = DetermineNativeSizeEstimate();
+ m_CallsiteNativeSizeEstimate = DetermineCallsiteNativeSizeEstimate(methodInfo);
+ m_Multiplier = DetermineMultiplier();
+ const int threshold = (int)(m_CallsiteNativeSizeEstimate * m_Multiplier);
+
+ // Note the LegacyPolicy estimates are scaled up by SIZE_SCALE
+ JITDUMP("\ncalleeNativeSizeEstimate=%d\n", m_CalleeNativeSizeEstimate)
+ JITDUMP("callsiteNativeSizeEstimate=%d\n", m_CallsiteNativeSizeEstimate);
+ JITDUMP("benefit multiplier=%g\n", m_Multiplier);
+ JITDUMP("threshold=%d\n", threshold);
+
+ // Reject if callee size is over the threshold
+ if (m_CalleeNativeSizeEstimate > threshold)
+ {
+ // Inline appears to be unprofitable
+ JITLOG_THIS(m_RootCompiler,
+ (LL_INFO100000, "Native estimate for function size exceeds threshold"
+ " for inlining %g > %g (multiplier = %g)\n",
+ (double)m_CalleeNativeSizeEstimate / SIZE_SCALE, (double)threshold / SIZE_SCALE, m_Multiplier));
+
+ // Fail the inline
+ if (m_IsPrejitRoot)
+ {
+ SetNever(InlineObservation::CALLEE_NOT_PROFITABLE_INLINE);
+ }
+ else
+ {
+ SetFailure(InlineObservation::CALLSITE_NOT_PROFITABLE_INLINE);
+ }
+ }
+ else
+ {
+ // Inline appears to be profitable
+ JITLOG_THIS(m_RootCompiler,
+ (LL_INFO100000, "Native estimate for function size is within threshold"
+ " for inlining %g <= %g (multiplier = %g)\n",
+ (double)m_CalleeNativeSizeEstimate / SIZE_SCALE, (double)threshold / SIZE_SCALE, m_Multiplier));
+
+ // Update candidacy
+ if (m_IsPrejitRoot)
+ {
+ SetCandidate(InlineObservation::CALLEE_IS_PROFITABLE_INLINE);
+ }
+ else
+ {
+ SetCandidate(InlineObservation::CALLSITE_IS_PROFITABLE_INLINE);
+ }
+ }
+}
+
+//------------------------------------------------------------------------
+// CodeSizeEstimate: estimated code size impact of the inline
+//
+// Return Value:
+// Estimated code size impact, in bytes * 10
+//
+// Notes:
+// Only meaningful for discretionary inlines (whether successful or
+// not). For always or force inlines the legacy policy doesn't
+// estimate size impact.
+
+int LegacyPolicy::CodeSizeEstimate()
+{
+ if (m_StateMachine != nullptr)
+ {
+ // This is not something the LegacyPolicy explicitly computed,
+ // since it uses a blended evaluation model (mixing size and time
+ // together for overall profitability). But it's effecitvely an
+ // estimate of the size impact.
+ return (m_CalleeNativeSizeEstimate - m_CallsiteNativeSizeEstimate);
+ }
+ else
+ {
+ return 0;
+ }
+}
+
+//------------------------------------------------------------------------
+// NoteBool: handle a boolean observation with non-fatal impact
+//
+// Arguments:
+// obs - the current obsevation
+// value - the value of the observation
+
+void EnhancedLegacyPolicy::NoteBool(InlineObservation obs, bool value)
+{
+ switch (obs)
+ {
+ case InlineObservation::CALLEE_DOES_NOT_RETURN:
+ m_IsNoReturn = value;
+ m_IsNoReturnKnown = true;
+ break;
+
+ default:
+ // Pass all other information to the legacy policy
+ LegacyPolicy::NoteBool(obs, value);
+ break;
+ }
+}
+
+//------------------------------------------------------------------------
+// NoteInt: handle an observed integer value
+//
+// Arguments:
+// obs - the current obsevation
+// value - the value being observed
+
+void EnhancedLegacyPolicy::NoteInt(InlineObservation obs, int value)
+{
+ switch (obs)
+ {
+ case InlineObservation::CALLEE_NUMBER_OF_BASIC_BLOCKS:
+ {
+ assert(value != 0);
+ assert(m_IsNoReturnKnown);
+
+ //
+ // Let's be conservative for now and reject inlining of "no return" methods only
+ // if the callee contains a single basic block. This covers most of the use cases
+ // (typical throw helpers simply do "throw new X();" and so they have a single block)
+ // without affecting more exotic cases (loops that do actual work for example) where
+ // failure to inline could negatively impact code quality.
+ //
+
+ unsigned basicBlockCount = static_cast<unsigned>(value);
+
+ if (m_IsNoReturn && (basicBlockCount == 1))
+ {
+ SetNever(InlineObservation::CALLEE_DOES_NOT_RETURN);
+ }
+ else
+ {
+ LegacyPolicy::NoteInt(obs, value);
+ }
+
+ break;
+ }
+
+ default:
+ // Pass all other information to the legacy policy
+ LegacyPolicy::NoteInt(obs, value);
+ break;
+ }
+}
+
+//------------------------------------------------------------------------
+// PropagateNeverToRuntime: determine if a never result should cause the
+// method to be marked as un-inlinable.
+
+bool EnhancedLegacyPolicy::PropagateNeverToRuntime() const
+{
+ //
+ // Do not propagate the "no return" observation. If we do this then future inlining
+ // attempts will fail immediately without marking the call node as "no return".
+ // This can have an adverse impact on caller's code quality as it may have to preserve
+ // registers across the call.
+ // TODO-Throughput: We should persist the "no return" information in the runtime
+ // so we don't need to re-analyze the inlinee all the time.
+ //
+
+ bool propagate = (m_Observation != InlineObservation::CALLEE_DOES_NOT_RETURN);
+
+ propagate &= LegacyPolicy::PropagateNeverToRuntime();
+
+ return propagate;
+}
+
+#ifdef DEBUG
+
+//------------------------------------------------------------------------
+// RandomPolicy: construct a new RandomPolicy
+//
+// Arguments:
+// compiler -- compiler instance doing the inlining (root compiler)
+// isPrejitRoot -- true if this compiler is prejitting the root method
+// seed -- seed value for the random number generator
+
+RandomPolicy::RandomPolicy(Compiler* compiler, bool isPrejitRoot, unsigned seed)
+ : LegalPolicy(isPrejitRoot)
+ , m_RootCompiler(compiler)
+ , m_Random(nullptr)
+ , m_CodeSize(0)
+ , m_IsForceInline(false)
+ , m_IsForceInlineKnown(false)
+{
+ // If necessary, setup and seed the random state.
+ if (compiler->inlRNG == nullptr)
+ {
+ compiler->inlRNG = new (compiler, CMK_Inlining) CLRRandom();
+
+ unsigned hash = m_RootCompiler->info.compMethodHash();
+ assert(hash != 0);
+ assert(seed != 0);
+ int hashSeed = static_cast<int>(hash ^ seed);
+ compiler->inlRNG->Init(hashSeed);
+ }
+
+ m_Random = compiler->inlRNG;
+}
+
+//------------------------------------------------------------------------
+// NoteSuccess: handle finishing all the inlining checks successfully
+
+void RandomPolicy::NoteSuccess()
+{
+ assert(InlDecisionIsCandidate(m_Decision));
+ m_Decision = InlineDecision::SUCCESS;
+}
+
+//------------------------------------------------------------------------
+// NoteBool: handle a boolean observation with non-fatal impact
+//
+// Arguments:
+// obs - the current obsevation
+// value - the value of the observation
+void RandomPolicy::NoteBool(InlineObservation obs, bool value)
+{
+ // Check the impact
+ InlineImpact impact = InlGetImpact(obs);
+
+ // As a safeguard, all fatal impact must be
+ // reported via noteFatal.
+ assert(impact != InlineImpact::FATAL);
+
+ // Handle most information here
+ bool isInformation = (impact == InlineImpact::INFORMATION);
+ bool propagate = !isInformation;
+
+ if (isInformation)
+ {
+ switch (obs)
+ {
+ case InlineObservation::CALLEE_IS_FORCE_INLINE:
+ // The RandomPolicy still honors force inlines.
+ //
+ // We may make the force-inline observation more than
+ // once. All observations should agree.
+ assert(!m_IsForceInlineKnown || (m_IsForceInline == value));
+ m_IsForceInline = value;
+ m_IsForceInlineKnown = true;
+ break;
+
+ case InlineObservation::CALLEE_HAS_SWITCH:
+ case InlineObservation::CALLEE_UNSUPPORTED_OPCODE:
+ // Pass these on, they should cause inlining to fail.
+ propagate = true;
+ break;
+
+ default:
+ // Ignore the remainder for now
+ break;
+ }
+ }
+
+ if (propagate)
+ {
+ NoteInternal(obs);
+ }
+}
+
+//------------------------------------------------------------------------
+// NoteInt: handle an observed integer value
+//
+// Arguments:
+// obs - the current obsevation
+// value - the value being observed
+
+void RandomPolicy::NoteInt(InlineObservation obs, int value)
+{
+ switch (obs)
+ {
+
+ case InlineObservation::CALLEE_IL_CODE_SIZE:
+ {
+ assert(m_IsForceInlineKnown);
+ assert(value != 0);
+ m_CodeSize = static_cast<unsigned>(value);
+
+ if (m_IsForceInline)
+ {
+ // Candidate based on force inline
+ SetCandidate(InlineObservation::CALLEE_IS_FORCE_INLINE);
+ }
+ else
+ {
+ // Candidate, pending profitability evaluation
+ SetCandidate(InlineObservation::CALLEE_IS_DISCRETIONARY_INLINE);
+ }
+
+ break;
+ }
+
+ default:
+ // Ignore all other information
+ break;
+ }
+}
+
+//------------------------------------------------------------------------
+// DetermineProfitability: determine if this inline is profitable
+//
+// Arguments:
+// methodInfo -- method info for the callee
+//
+// Notes:
+// The random policy makes random decisions about profitablity.
+// Generally we aspire to inline differently, not necessarily to
+// inline more.
+
+void RandomPolicy::DetermineProfitability(CORINFO_METHOD_INFO* methodInfo)
+{
+ assert(InlDecisionIsCandidate(m_Decision));
+ assert(m_Observation == InlineObservation::CALLEE_IS_DISCRETIONARY_INLINE);
+
+ // Budget check.
+ if (!m_IsPrejitRoot)
+ {
+ InlineStrategy* strategy = m_RootCompiler->m_inlineStrategy;
+ bool overBudget = strategy->BudgetCheck(m_CodeSize);
+ if (overBudget)
+ {
+ SetFailure(InlineObservation::CALLSITE_OVER_BUDGET);
+ return;
+ }
+ }
+
+ // Use a probability curve that roughly matches the observed
+ // behavior of the LegacyPolicy. That way we're inlining
+ // differently but not creating enormous methods.
+ //
+ // We vary a bit at the extremes. The RandomPolicy won't always
+ // inline the small methods (<= 16 IL bytes) and won't always
+ // reject the large methods (> 100 IL bytes).
+
+ unsigned threshold = 0;
+
+ if (m_CodeSize <= 16)
+ {
+ threshold = 75;
+ }
+ else if (m_CodeSize <= 30)
+ {
+ threshold = 50;
+ }
+ else if (m_CodeSize <= 40)
+ {
+ threshold = 40;
+ }
+ else if (m_CodeSize <= 50)
+ {
+ threshold = 30;
+ }
+ else if (m_CodeSize <= 75)
+ {
+ threshold = 20;
+ }
+ else if (m_CodeSize <= 100)
+ {
+ threshold = 10;
+ }
+ else if (m_CodeSize <= 200)
+ {
+ threshold = 5;
+ }
+ else
+ {
+ threshold = 1;
+ }
+
+ unsigned randomValue = m_Random->Next(1, 100);
+
+ // Reject if callee size is over the threshold
+ if (randomValue > threshold)
+ {
+ // Inline appears to be unprofitable
+ JITLOG_THIS(m_RootCompiler, (LL_INFO100000, "Random rejection (r=%d > t=%d)\n", randomValue, threshold));
+
+ // Fail the inline
+ if (m_IsPrejitRoot)
+ {
+ SetNever(InlineObservation::CALLEE_RANDOM_REJECT);
+ }
+ else
+ {
+ SetFailure(InlineObservation::CALLSITE_RANDOM_REJECT);
+ }
+ }
+ else
+ {
+ // Inline appears to be profitable
+ JITLOG_THIS(m_RootCompiler, (LL_INFO100000, "Random acceptance (r=%d <= t=%d)\n", randomValue, threshold));
+
+ // Update candidacy
+ if (m_IsPrejitRoot)
+ {
+ SetCandidate(InlineObservation::CALLEE_RANDOM_ACCEPT);
+ }
+ else
+ {
+ SetCandidate(InlineObservation::CALLSITE_RANDOM_ACCEPT);
+ }
+ }
+}
+
+#endif // DEBUG
+
+#ifdef _MSC_VER
+// Disable warning about new array member initialization behavior
+#pragma warning(disable : 4351)
+#endif
+
+//------------------------------------------------------------------------
+// DiscretionaryPolicy: construct a new DiscretionaryPolicy
+//
+// Arguments:
+// compiler -- compiler instance doing the inlining (root compiler)
+// isPrejitRoot -- true if this compiler is prejitting the root method
+
+// clang-format off
+DiscretionaryPolicy::DiscretionaryPolicy(Compiler* compiler, bool isPrejitRoot)
+ : LegacyPolicy(compiler, isPrejitRoot)
+ , m_Depth(0)
+ , m_BlockCount(0)
+ , m_Maxstack(0)
+ , m_ArgCount(0)
+ , m_ArgType()
+ , m_ArgSize()
+ , m_LocalCount(0)
+ , m_ReturnType(CORINFO_TYPE_UNDEF)
+ , m_ReturnSize(0)
+ , m_ArgAccessCount(0)
+ , m_LocalAccessCount(0)
+ , m_IntConstantCount(0)
+ , m_FloatConstantCount(0)
+ , m_IntLoadCount(0)
+ , m_FloatLoadCount(0)
+ , m_IntStoreCount(0)
+ , m_FloatStoreCount(0)
+ , m_SimpleMathCount(0)
+ , m_ComplexMathCount(0)
+ , m_OverflowMathCount(0)
+ , m_IntArrayLoadCount(0)
+ , m_FloatArrayLoadCount(0)
+ , m_RefArrayLoadCount(0)
+ , m_StructArrayLoadCount(0)
+ , m_IntArrayStoreCount(0)
+ , m_FloatArrayStoreCount(0)
+ , m_RefArrayStoreCount(0)
+ , m_StructArrayStoreCount(0)
+ , m_StructOperationCount(0)
+ , m_ObjectModelCount(0)
+ , m_FieldLoadCount(0)
+ , m_FieldStoreCount(0)
+ , m_StaticFieldLoadCount(0)
+ , m_StaticFieldStoreCount(0)
+ , m_LoadAddressCount(0)
+ , m_ThrowCount(0)
+ , m_ReturnCount(0)
+ , m_CallCount(0)
+ , m_CallSiteWeight(0)
+ , m_ModelCodeSizeEstimate(0)
+ , m_PerCallInstructionEstimate(0)
+ , m_IsClassCtor(false)
+ , m_IsSameThis(false)
+ , m_CallerHasNewArray(false)
+ , m_CallerHasNewObj(false)
+{
+ // Empty
+}
+// clang-format on
+
+//------------------------------------------------------------------------
+// NoteBool: handle an observed boolean value
+//
+// Arguments:
+// obs - the current obsevation
+// value - the value being observed
+
+void DiscretionaryPolicy::NoteBool(InlineObservation obs, bool value)
+{
+ switch (obs)
+ {
+ case InlineObservation::CALLEE_LOOKS_LIKE_WRAPPER:
+ m_LooksLikeWrapperMethod = value;
+ break;
+
+ case InlineObservation::CALLEE_ARG_FEEDS_CONSTANT_TEST:
+ assert(value);
+ m_ArgFeedsConstantTest++;
+ break;
+
+ case InlineObservation::CALLEE_ARG_FEEDS_RANGE_CHECK:
+ assert(value);
+ m_ArgFeedsRangeCheck++;
+ break;
+
+ case InlineObservation::CALLSITE_CONSTANT_ARG_FEEDS_TEST:
+ assert(value);
+ m_ConstantArgFeedsConstantTest++;
+ break;
+
+ case InlineObservation::CALLEE_IS_CLASS_CTOR:
+ m_IsClassCtor = value;
+ break;
+
+ case InlineObservation::CALLSITE_IS_SAME_THIS:
+ m_IsSameThis = value;
+ break;
+
+ case InlineObservation::CALLER_HAS_NEWARRAY:
+ m_CallerHasNewArray = value;
+ break;
+
+ case InlineObservation::CALLER_HAS_NEWOBJ:
+ m_CallerHasNewObj = value;
+ break;
+
+ default:
+ LegacyPolicy::NoteBool(obs, value);
+ break;
+ }
+}
+
+//------------------------------------------------------------------------
+// NoteInt: handle an observed integer value
+//
+// Arguments:
+// obs - the current obsevation
+// value - the value being observed
+
+void DiscretionaryPolicy::NoteInt(InlineObservation obs, int value)
+{
+ switch (obs)
+ {
+
+ case InlineObservation::CALLEE_IL_CODE_SIZE:
+ // Override how code size is handled
+ {
+ assert(m_IsForceInlineKnown);
+ assert(value != 0);
+ m_CodeSize = static_cast<unsigned>(value);
+
+ if (m_IsForceInline)
+ {
+ // Candidate based on force inline
+ SetCandidate(InlineObservation::CALLEE_IS_FORCE_INLINE);
+ }
+ else
+ {
+ // Candidate, pending profitability evaluation
+ SetCandidate(InlineObservation::CALLEE_IS_DISCRETIONARY_INLINE);
+ }
+
+ break;
+ }
+
+ case InlineObservation::CALLEE_OPCODE:
+ {
+ // This tries to do a rough binning of opcodes based
+ // on similarity of impact on codegen.
+ OPCODE opcode = static_cast<OPCODE>(value);
+ ComputeOpcodeBin(opcode);
+ LegacyPolicy::NoteInt(obs, value);
+ break;
+ }
+
+ case InlineObservation::CALLEE_MAXSTACK:
+ m_Maxstack = value;
+ break;
+
+ case InlineObservation::CALLEE_NUMBER_OF_BASIC_BLOCKS:
+ m_BlockCount = value;
+ break;
+
+ case InlineObservation::CALLSITE_DEPTH:
+ m_Depth = value;
+ break;
+
+ case InlineObservation::CALLSITE_WEIGHT:
+ m_CallSiteWeight = static_cast<unsigned>(value);
+ break;
+
+ default:
+ // Delegate remainder to the LegacyPolicy.
+ LegacyPolicy::NoteInt(obs, value);
+ break;
+ }
+}
+
+//------------------------------------------------------------------------
+// ComputeOpcodeBin: simple histogramming of opcodes based on presumably
+// similar codegen impact.
+//
+// Arguments:
+// opcode - an MSIL opcode from the callee
+
+void DiscretionaryPolicy::ComputeOpcodeBin(OPCODE opcode)
+{
+ switch (opcode)
+ {
+ case CEE_LDARG_0:
+ case CEE_LDARG_1:
+ case CEE_LDARG_2:
+ case CEE_LDARG_3:
+ case CEE_LDARG_S:
+ case CEE_LDARG:
+ case CEE_STARG_S:
+ case CEE_STARG:
+ m_ArgAccessCount++;
+ break;
+
+ case CEE_LDLOC_0:
+ case CEE_LDLOC_1:
+ case CEE_LDLOC_2:
+ case CEE_LDLOC_3:
+ case CEE_LDLOC_S:
+ case CEE_STLOC_0:
+ case CEE_STLOC_1:
+ case CEE_STLOC_2:
+ case CEE_STLOC_3:
+ case CEE_STLOC_S:
+ case CEE_LDLOC:
+ case CEE_STLOC:
+ m_LocalAccessCount++;
+ break;
+
+ case CEE_LDNULL:
+ case CEE_LDC_I4_M1:
+ case CEE_LDC_I4_0:
+ case CEE_LDC_I4_1:
+ case CEE_LDC_I4_2:
+ case CEE_LDC_I4_3:
+ case CEE_LDC_I4_4:
+ case CEE_LDC_I4_5:
+ case CEE_LDC_I4_6:
+ case CEE_LDC_I4_7:
+ case CEE_LDC_I4_8:
+ case CEE_LDC_I4_S:
+ m_IntConstantCount++;
+ break;
+
+ case CEE_LDC_R4:
+ case CEE_LDC_R8:
+ m_FloatConstantCount++;
+ break;
+
+ case CEE_LDIND_I1:
+ case CEE_LDIND_U1:
+ case CEE_LDIND_I2:
+ case CEE_LDIND_U2:
+ case CEE_LDIND_I4:
+ case CEE_LDIND_U4:
+ case CEE_LDIND_I8:
+ case CEE_LDIND_I:
+ m_IntLoadCount++;
+ break;
+
+ case CEE_LDIND_R4:
+ case CEE_LDIND_R8:
+ m_FloatLoadCount++;
+ break;
+
+ case CEE_STIND_I1:
+ case CEE_STIND_I2:
+ case CEE_STIND_I4:
+ case CEE_STIND_I8:
+ case CEE_STIND_I:
+ m_IntStoreCount++;
+ break;
+
+ case CEE_STIND_R4:
+ case CEE_STIND_R8:
+ m_FloatStoreCount++;
+ break;
+
+ case CEE_SUB:
+ case CEE_AND:
+ case CEE_OR:
+ case CEE_XOR:
+ case CEE_SHL:
+ case CEE_SHR:
+ case CEE_SHR_UN:
+ case CEE_NEG:
+ case CEE_NOT:
+ case CEE_CONV_I1:
+ case CEE_CONV_I2:
+ case CEE_CONV_I4:
+ case CEE_CONV_I8:
+ case CEE_CONV_U4:
+ case CEE_CONV_U8:
+ case CEE_CONV_U2:
+ case CEE_CONV_U1:
+ case CEE_CONV_I:
+ case CEE_CONV_U:
+ m_SimpleMathCount++;
+ break;
+
+ case CEE_MUL:
+ case CEE_DIV:
+ case CEE_DIV_UN:
+ case CEE_REM:
+ case CEE_REM_UN:
+ case CEE_CONV_R4:
+ case CEE_CONV_R8:
+ case CEE_CONV_R_UN:
+ m_ComplexMathCount++;
+ break;
+
+ case CEE_CONV_OVF_I1_UN:
+ case CEE_CONV_OVF_I2_UN:
+ case CEE_CONV_OVF_I4_UN:
+ case CEE_CONV_OVF_I8_UN:
+ case CEE_CONV_OVF_U1_UN:
+ case CEE_CONV_OVF_U2_UN:
+ case CEE_CONV_OVF_U4_UN:
+ case CEE_CONV_OVF_U8_UN:
+ case CEE_CONV_OVF_I_UN:
+ case CEE_CONV_OVF_U_UN:
+ case CEE_CONV_OVF_I1:
+ case CEE_CONV_OVF_U1:
+ case CEE_CONV_OVF_I2:
+ case CEE_CONV_OVF_U2:
+ case CEE_CONV_OVF_I4:
+ case CEE_CONV_OVF_U4:
+ case CEE_CONV_OVF_I8:
+ case CEE_CONV_OVF_U8:
+ case CEE_ADD_OVF:
+ case CEE_ADD_OVF_UN:
+ case CEE_MUL_OVF:
+ case CEE_MUL_OVF_UN:
+ case CEE_SUB_OVF:
+ case CEE_SUB_OVF_UN:
+ case CEE_CKFINITE:
+ m_OverflowMathCount++;
+ break;
+
+ case CEE_LDELEM_I1:
+ case CEE_LDELEM_U1:
+ case CEE_LDELEM_I2:
+ case CEE_LDELEM_U2:
+ case CEE_LDELEM_I4:
+ case CEE_LDELEM_U4:
+ case CEE_LDELEM_I8:
+ case CEE_LDELEM_I:
+ m_IntArrayLoadCount++;
+ break;
+
+ case CEE_LDELEM_R4:
+ case CEE_LDELEM_R8:
+ m_FloatArrayLoadCount++;
+ break;
+
+ case CEE_LDELEM_REF:
+ m_RefArrayLoadCount++;
+ break;
+
+ case CEE_LDELEM:
+ m_StructArrayLoadCount++;
+ break;
+
+ case CEE_STELEM_I:
+ case CEE_STELEM_I1:
+ case CEE_STELEM_I2:
+ case CEE_STELEM_I4:
+ case CEE_STELEM_I8:
+ m_IntArrayStoreCount++;
+ break;
+
+ case CEE_STELEM_R4:
+ case CEE_STELEM_R8:
+ m_FloatArrayStoreCount++;
+ break;
+
+ case CEE_STELEM_REF:
+ m_RefArrayStoreCount++;
+ break;
+
+ case CEE_STELEM:
+ m_StructArrayStoreCount++;
+ break;
+
+ case CEE_CPOBJ:
+ case CEE_LDOBJ:
+ case CEE_CPBLK:
+ case CEE_INITBLK:
+ case CEE_STOBJ:
+ m_StructOperationCount++;
+ break;
+
+ case CEE_CASTCLASS:
+ case CEE_ISINST:
+ case CEE_UNBOX:
+ case CEE_BOX:
+ case CEE_UNBOX_ANY:
+ case CEE_LDFTN:
+ case CEE_LDVIRTFTN:
+ case CEE_SIZEOF:
+ m_ObjectModelCount++;
+ break;
+
+ case CEE_LDFLD:
+ case CEE_LDLEN:
+ case CEE_REFANYTYPE:
+ case CEE_REFANYVAL:
+ m_FieldLoadCount++;
+ break;
+
+ case CEE_STFLD:
+ m_FieldStoreCount++;
+ break;
+
+ case CEE_LDSFLD:
+ m_StaticFieldLoadCount++;
+ break;
+
+ case CEE_STSFLD:
+ m_StaticFieldStoreCount++;
+ break;
+
+ case CEE_LDELEMA:
+ case CEE_LDSFLDA:
+ case CEE_LDFLDA:
+ case CEE_LDSTR:
+ case CEE_LDARGA:
+ case CEE_LDLOCA:
+ m_LoadAddressCount++;
+ break;
+
+ case CEE_CALL:
+ case CEE_CALLI:
+ case CEE_CALLVIRT:
+ case CEE_NEWOBJ:
+ case CEE_NEWARR:
+ case CEE_JMP:
+ m_CallCount++;
+ break;
+
+ case CEE_THROW:
+ case CEE_RETHROW:
+ m_ThrowCount++;
+ break;
+
+ case CEE_RET:
+ m_ReturnCount++;
+
+ default:
+ break;
+ }
+}
+
+//------------------------------------------------------------------------
+// PropagateNeverToRuntime: determine if a never result should cause the
+// method to be marked as un-inlinable.
+
+bool DiscretionaryPolicy::PropagateNeverToRuntime() const
+{
+ // Propagate most failures, but don't propagate when the inline
+ // was viable but unprofitable.
+ bool propagate = (m_Observation != InlineObservation::CALLEE_NOT_PROFITABLE_INLINE);
+
+ return propagate;
+}
+
+//------------------------------------------------------------------------
+// DetermineProfitability: determine if this inline is profitable
+//
+// Arguments:
+// methodInfo -- method info for the callee
+
+void DiscretionaryPolicy::DetermineProfitability(CORINFO_METHOD_INFO* methodInfo)
+{
+
+#if defined(DEBUG)
+
+ // Punt if we're inlining and we've reached the acceptance limit.
+ int limit = JitConfig.JitInlineLimit();
+ unsigned current = m_RootCompiler->m_inlineStrategy->GetInlineCount();
+
+ if (!m_IsPrejitRoot && (limit >= 0) && (current >= static_cast<unsigned>(limit)))
+ {
+ SetFailure(InlineObservation::CALLSITE_OVER_INLINE_LIMIT);
+ return;
+ }
+
+#endif // defined(DEBUG)
+
+ // Make additional observations based on the method info
+ MethodInfoObservations(methodInfo);
+
+ // Estimate the code size impact. This is just for model
+ // evaluation purposes -- we'll still use the legacy policy's
+ // model for actual inlining.
+ EstimateCodeSize();
+
+ // Estimate peformance impact. This is just for model
+ // evaluation purposes -- we'll still use the legacy policy's
+ // model for actual inlining.
+ EstimatePerformanceImpact();
+
+ // Delegate to LegacyPolicy for the rest
+ LegacyPolicy::DetermineProfitability(methodInfo);
+}
+
+//------------------------------------------------------------------------
+// MethodInfoObservations: make observations based on information from
+// the method info for the callee.
+//
+// Arguments:
+// methodInfo -- method info for the callee
+
+void DiscretionaryPolicy::MethodInfoObservations(CORINFO_METHOD_INFO* methodInfo)
+{
+ CORINFO_SIG_INFO& locals = methodInfo->locals;
+ m_LocalCount = locals.numArgs;
+
+ CORINFO_SIG_INFO& args = methodInfo->args;
+ const unsigned argCount = args.numArgs;
+ m_ArgCount = argCount;
+
+ const unsigned pointerSize = sizeof(void*);
+ unsigned i = 0;
+
+ // Implicit arguments
+
+ const bool hasThis = args.hasThis();
+
+ if (hasThis)
+ {
+ m_ArgType[i] = CORINFO_TYPE_CLASS;
+ m_ArgSize[i] = pointerSize;
+ i++;
+ m_ArgCount++;
+ }
+
+ const bool hasTypeArg = args.hasTypeArg();
+
+ if (hasTypeArg)
+ {
+ m_ArgType[i] = CORINFO_TYPE_NATIVEINT;
+ m_ArgSize[i] = pointerSize;
+ i++;
+ m_ArgCount++;
+ }
+
+ // Explicit arguments
+
+ unsigned j = 0;
+ CORINFO_ARG_LIST_HANDLE argListHandle = args.args;
+ COMP_HANDLE comp = m_RootCompiler->info.compCompHnd;
+
+ while ((i < MAX_ARGS) && (j < argCount))
+ {
+ CORINFO_CLASS_HANDLE classHandle;
+ CorInfoType type = strip(comp->getArgType(&args, argListHandle, &classHandle));
+
+ m_ArgType[i] = type;
+
+ if (type == CORINFO_TYPE_VALUECLASS)
+ {
+ assert(classHandle != nullptr);
+ m_ArgSize[i] = roundUp(comp->getClassSize(classHandle), pointerSize);
+ }
+ else
+ {
+ m_ArgSize[i] = pointerSize;
+ }
+
+ argListHandle = comp->getArgNext(argListHandle);
+ i++;
+ j++;
+ }
+
+ while (i < MAX_ARGS)
+ {
+ m_ArgType[i] = CORINFO_TYPE_UNDEF;
+ m_ArgSize[i] = 0;
+ i++;
+ }
+
+ // Return Type
+
+ m_ReturnType = args.retType;
+
+ if (m_ReturnType == CORINFO_TYPE_VALUECLASS)
+ {
+ assert(args.retTypeClass != nullptr);
+ m_ReturnSize = roundUp(comp->getClassSize(args.retTypeClass), pointerSize);
+ }
+ else if (m_ReturnType == CORINFO_TYPE_VOID)
+ {
+ m_ReturnSize = 0;
+ }
+ else
+ {
+ m_ReturnSize = pointerSize;
+ }
+}
+
+//------------------------------------------------------------------------
+// EstimateCodeSize: produce (various) code size estimates based on
+// observations.
+//
+// The "Baseline" code size model used by the legacy policy is
+// effectively
+//
+// 0.100 * m_CalleeNativeSizeEstimate +
+// -0.100 * m_CallsiteNativeSizeEstimate
+//
+// On the inlines in CoreCLR's mscorlib, release windows x64, this
+// yields scores of R=0.42, MSE=228, and MAE=7.25.
+//
+// This estimate can be improved slighly by refitting, resulting in
+//
+// -1.451 +
+// 0.095 * m_CalleeNativeSizeEstimate +
+// -0.104 * m_CallsiteNativeSizeEstimate
+//
+// With R=0.44, MSE=220, and MAE=6.93.
+
+void DiscretionaryPolicy::EstimateCodeSize()
+{
+ // Ensure we have this available.
+ m_CalleeNativeSizeEstimate = DetermineNativeSizeEstimate();
+
+ // Size estimate based on GLMNET model.
+ // R=0.55, MSE=177, MAE=6.59
+ //
+ // Suspect it doesn't handle factors properly...
+ // clang-format off
+ double sizeEstimate =
+ -13.532 +
+ 0.359 * (int) m_CallsiteFrequency +
+ -0.015 * m_ArgCount +
+ -1.553 * m_ArgSize[5] +
+ 2.326 * m_LocalCount +
+ 0.287 * m_ReturnSize +
+ 0.561 * m_IntConstantCount +
+ 1.932 * m_FloatConstantCount +
+ -0.822 * m_SimpleMathCount +
+ -7.591 * m_IntArrayLoadCount +
+ 4.784 * m_RefArrayLoadCount +
+ 12.778 * m_StructArrayLoadCount +
+ 1.452 * m_FieldLoadCount +
+ 8.811 * m_StaticFieldLoadCount +
+ 2.752 * m_StaticFieldStoreCount +
+ -6.566 * m_ThrowCount +
+ 6.021 * m_CallCount +
+ -0.238 * m_IsInstanceCtor +
+ -5.357 * m_IsFromPromotableValueClass +
+ -7.901 * (m_ConstantArgFeedsConstantTest > 0 ? 1 : 0) +
+ 0.065 * m_CalleeNativeSizeEstimate;
+ // clang-format on
+
+ // Scaled up and reported as an integer value.
+ m_ModelCodeSizeEstimate = (int)(SIZE_SCALE * sizeEstimate);
+}
+
+//------------------------------------------------------------------------
+// EstimatePeformanceImpact: produce performance estimates based on
+// observations.
+//
+// Notes:
+// Attempts to predict the per-call savings in instructions executed.
+//
+// A negative value indicates the doing the inline will save instructions
+// and likely time.
+
+void DiscretionaryPolicy::EstimatePerformanceImpact()
+{
+ // Performance estimate based on GLMNET model.
+ // R=0.24, RMSE=16.1, MAE=8.9.
+ // clang-format off
+ double perCallSavingsEstimate =
+ -7.35
+ + (m_CallsiteFrequency == InlineCallsiteFrequency::BORING ? 0.76 : 0)
+ + (m_CallsiteFrequency == InlineCallsiteFrequency::LOOP ? -2.02 : 0)
+ + (m_ArgType[0] == CORINFO_TYPE_CLASS ? 3.51 : 0)
+ + (m_ArgType[3] == CORINFO_TYPE_BOOL ? 20.7 : 0)
+ + (m_ArgType[4] == CORINFO_TYPE_CLASS ? 0.38 : 0)
+ + (m_ReturnType == CORINFO_TYPE_CLASS ? 2.32 : 0);
+ // clang-format on
+
+ // Scaled up and reported as an integer value.
+ m_PerCallInstructionEstimate = (int)(SIZE_SCALE * perCallSavingsEstimate);
+}
+
+//------------------------------------------------------------------------
+// CodeSizeEstimate: estimated code size impact of the inline
+//
+// Return Value:
+// Estimated code size impact, in bytes * 10
+
+int DiscretionaryPolicy::CodeSizeEstimate()
+{
+ return m_ModelCodeSizeEstimate;
+}
+
+#if defined(DEBUG) || defined(INLINE_DATA)
+
+//------------------------------------------------------------------------
+// DumpSchema: dump names for all the supporting data for the
+// inline decision in CSV format.
+//
+// Arguments:
+// file -- file to write to
+
+void DiscretionaryPolicy::DumpSchema(FILE* file) const
+{
+ fprintf(file, ",ILSize");
+ fprintf(file, ",CallsiteFrequency");
+ fprintf(file, ",InstructionCount");
+ fprintf(file, ",LoadStoreCount");
+ fprintf(file, ",Depth");
+ fprintf(file, ",BlockCount");
+ fprintf(file, ",Maxstack");
+ fprintf(file, ",ArgCount");
+
+ for (unsigned i = 0; i < MAX_ARGS; i++)
+ {
+ fprintf(file, ",Arg%uType", i);
+ }
+
+ for (unsigned i = 0; i < MAX_ARGS; i++)
+ {
+ fprintf(file, ",Arg%uSize", i);
+ }
+
+ fprintf(file, ",LocalCount");
+ fprintf(file, ",ReturnType");
+ fprintf(file, ",ReturnSize");
+ fprintf(file, ",ArgAccessCount");
+ fprintf(file, ",LocalAccessCount");
+ fprintf(file, ",IntConstantCount");
+ fprintf(file, ",FloatConstantCount");
+ fprintf(file, ",IntLoadCount");
+ fprintf(file, ",FloatLoadCount");
+ fprintf(file, ",IntStoreCount");
+ fprintf(file, ",FloatStoreCount");
+ fprintf(file, ",SimpleMathCount");
+ fprintf(file, ",ComplexMathCount");
+ fprintf(file, ",OverflowMathCount");
+ fprintf(file, ",IntArrayLoadCount");
+ fprintf(file, ",FloatArrayLoadCount");
+ fprintf(file, ",RefArrayLoadCount");
+ fprintf(file, ",StructArrayLoadCount");
+ fprintf(file, ",IntArrayStoreCount");
+ fprintf(file, ",FloatArrayStoreCount");
+ fprintf(file, ",RefArrayStoreCount");
+ fprintf(file, ",StructArrayStoreCount");
+ fprintf(file, ",StructOperationCount");
+ fprintf(file, ",ObjectModelCount");
+ fprintf(file, ",FieldLoadCount");
+ fprintf(file, ",FieldStoreCount");
+ fprintf(file, ",StaticFieldLoadCount");
+ fprintf(file, ",StaticFieldStoreCount");
+ fprintf(file, ",LoadAddressCount");
+ fprintf(file, ",ThrowCount");
+ fprintf(file, ",ReturnCount");
+ fprintf(file, ",CallCount");
+ fprintf(file, ",CallSiteWeight");
+ fprintf(file, ",IsForceInline");
+ fprintf(file, ",IsInstanceCtor");
+ fprintf(file, ",IsFromPromotableValueClass");
+ fprintf(file, ",HasSimd");
+ fprintf(file, ",LooksLikeWrapperMethod");
+ fprintf(file, ",ArgFeedsConstantTest");
+ fprintf(file, ",IsMostlyLoadStore");
+ fprintf(file, ",ArgFeedsRangeCheck");
+ fprintf(file, ",ConstantArgFeedsConstantTest");
+ fprintf(file, ",CalleeNativeSizeEstimate");
+ fprintf(file, ",CallsiteNativeSizeEstimate");
+ fprintf(file, ",ModelCodeSizeEstimate");
+ fprintf(file, ",ModelPerCallInstructionEstimate");
+ fprintf(file, ",IsClassCtor");
+ fprintf(file, ",IsSameThis");
+ fprintf(file, ",CallerHasNewArray");
+ fprintf(file, ",CallerHasNewObj");
+}
+
+//------------------------------------------------------------------------
+// DumpData: dump all the supporting data for the inline decision
+// in CSV format.
+//
+// Arguments:
+// file -- file to write to
+
+void DiscretionaryPolicy::DumpData(FILE* file) const
+{
+ fprintf(file, ",%u", m_CodeSize);
+ fprintf(file, ",%u", m_CallsiteFrequency);
+ fprintf(file, ",%u", m_InstructionCount);
+ fprintf(file, ",%u", m_LoadStoreCount);
+ fprintf(file, ",%u", m_Depth);
+ fprintf(file, ",%u", m_BlockCount);
+ fprintf(file, ",%u", m_Maxstack);
+ fprintf(file, ",%u", m_ArgCount);
+
+ for (unsigned i = 0; i < MAX_ARGS; i++)
+ {
+ fprintf(file, ",%u", m_ArgType[i]);
+ }
+
+ for (unsigned i = 0; i < MAX_ARGS; i++)
+ {
+ fprintf(file, ",%u", (unsigned)m_ArgSize[i]);
+ }
+
+ fprintf(file, ",%u", m_LocalCount);
+ fprintf(file, ",%u", m_ReturnType);
+ fprintf(file, ",%u", (unsigned)m_ReturnSize);
+ fprintf(file, ",%u", m_ArgAccessCount);
+ fprintf(file, ",%u", m_LocalAccessCount);
+ fprintf(file, ",%u", m_IntConstantCount);
+ fprintf(file, ",%u", m_FloatConstantCount);
+ fprintf(file, ",%u", m_IntLoadCount);
+ fprintf(file, ",%u", m_FloatLoadCount);
+ fprintf(file, ",%u", m_IntStoreCount);
+ fprintf(file, ",%u", m_FloatStoreCount);
+ fprintf(file, ",%u", m_SimpleMathCount);
+ fprintf(file, ",%u", m_ComplexMathCount);
+ fprintf(file, ",%u", m_OverflowMathCount);
+ fprintf(file, ",%u", m_IntArrayLoadCount);
+ fprintf(file, ",%u", m_FloatArrayLoadCount);
+ fprintf(file, ",%u", m_RefArrayLoadCount);
+ fprintf(file, ",%u", m_StructArrayLoadCount);
+ fprintf(file, ",%u", m_IntArrayStoreCount);
+ fprintf(file, ",%u", m_FloatArrayStoreCount);
+ fprintf(file, ",%u", m_RefArrayStoreCount);
+ fprintf(file, ",%u", m_StructArrayStoreCount);
+ fprintf(file, ",%u", m_StructOperationCount);
+ fprintf(file, ",%u", m_ObjectModelCount);
+ fprintf(file, ",%u", m_FieldLoadCount);
+ fprintf(file, ",%u", m_FieldStoreCount);
+ fprintf(file, ",%u", m_StaticFieldLoadCount);
+ fprintf(file, ",%u", m_StaticFieldStoreCount);
+ fprintf(file, ",%u", m_LoadAddressCount);
+ fprintf(file, ",%u", m_ReturnCount);
+ fprintf(file, ",%u", m_ThrowCount);
+ fprintf(file, ",%u", m_CallCount);
+ fprintf(file, ",%u", m_CallSiteWeight);
+ fprintf(file, ",%u", m_IsForceInline ? 1 : 0);
+ fprintf(file, ",%u", m_IsInstanceCtor ? 1 : 0);
+ fprintf(file, ",%u", m_IsFromPromotableValueClass ? 1 : 0);
+ fprintf(file, ",%u", m_HasSimd ? 1 : 0);
+ fprintf(file, ",%u", m_LooksLikeWrapperMethod ? 1 : 0);
+ fprintf(file, ",%u", m_ArgFeedsConstantTest);
+ fprintf(file, ",%u", m_MethodIsMostlyLoadStore ? 1 : 0);
+ fprintf(file, ",%u", m_ArgFeedsRangeCheck);
+ fprintf(file, ",%u", m_ConstantArgFeedsConstantTest);
+ fprintf(file, ",%d", m_CalleeNativeSizeEstimate);
+ fprintf(file, ",%d", m_CallsiteNativeSizeEstimate);
+ fprintf(file, ",%d", m_ModelCodeSizeEstimate);
+ fprintf(file, ",%d", m_PerCallInstructionEstimate);
+ fprintf(file, ",%u", m_IsClassCtor ? 1 : 0);
+ fprintf(file, ",%u", m_IsSameThis ? 1 : 0);
+ fprintf(file, ",%u", m_CallerHasNewArray ? 1 : 0);
+ fprintf(file, ",%u", m_CallerHasNewObj ? 1 : 0);
+}
+
+#endif // defined(DEBUG) || defined(INLINE_DATA)
+
+//------------------------------------------------------------------------/
+// ModelPolicy: construct a new ModelPolicy
+//
+// Arguments:
+// compiler -- compiler instance doing the inlining (root compiler)
+// isPrejitRoot -- true if this compiler is prejitting the root method
+
+ModelPolicy::ModelPolicy(Compiler* compiler, bool isPrejitRoot) : DiscretionaryPolicy(compiler, isPrejitRoot)
+{
+ // Empty
+}
+
+//------------------------------------------------------------------------
+// NoteInt: handle an observed integer value
+//
+// Arguments:
+// obs - the current obsevation
+// value - the value being observed
+//
+// Notes:
+// The ILSize threshold used here should be large enough that
+// it does not generally influence inlining decisions -- it only
+// helps to make them faster.
+//
+// The value is determined as follows. We figure out the maximum
+// possible code size estimate that will lead to an inline. This is
+// found by determining the maximum possible inline benefit and
+// working backwards.
+//
+// In the current ModelPolicy, the maximum benefit is -28.1, which
+// comes from a CallSiteWeight of 3 and a per call benefit of
+// -9.37. This implies that any candidate with code size larger
+// than (28.1/0.2) will not pass the threshold. So maximum code
+// size estimate (in bytes) for any inlinee is 140.55, and hence
+// maximum estimate is 1405.
+//
+// Since we are trying to short circuit early in the evaluation
+// process we don't have the code size estimate in hand. We need to
+// estimate the possible code size estimate based on something we
+// know cheaply and early -- the ILSize. So we use quantile
+// regression to project how ILSize predicts the model code size
+// estimate. Note that ILSize does not currently directly enter
+// into the model.
+//
+// The median value for the model code size estimate based on
+// ILSize is given by -107 + 12.6 * ILSize for the V9 data. This
+// means an ILSize of 120 is likely to lead to a size estimate of
+// at least 1405 at least 50% of the time. So we choose this as the
+// early rejection threshold.
+
+void ModelPolicy::NoteInt(InlineObservation obs, int value)
+{
+ // Let underlying policy do its thing.
+ DiscretionaryPolicy::NoteInt(obs, value);
+
+ // Fail fast for inlinees that are too large to ever inline.
+ // The value of 120 is model-dependent; see notes above.
+ if (!m_IsForceInline && (obs == InlineObservation::CALLEE_IL_CODE_SIZE) && (value >= 120))
+ {
+ // Callee too big, not a candidate
+ SetNever(InlineObservation::CALLEE_TOO_MUCH_IL);
+ return;
+ }
+
+ // Safeguard against overly deep inlines
+ if (obs == InlineObservation::CALLSITE_DEPTH)
+ {
+ unsigned depthLimit = m_RootCompiler->m_inlineStrategy->GetMaxInlineDepth();
+
+ if (m_Depth > depthLimit)
+ {
+ SetFailure(InlineObservation::CALLSITE_IS_TOO_DEEP);
+ return;
+ }
+ }
+}
+
+//------------------------------------------------------------------------
+// DetermineProfitability: determine if this inline is profitable
+//
+// Arguments:
+// methodInfo -- method info for the callee
+//
+// Notes:
+// There are currently two parameters that are ad-hoc: the
+// per-call-site weight and the size/speed threshold. Ideally this
+// policy would have just one tunable parameter, the threshold,
+// which describes how willing we are to trade size for speed.
+
+void ModelPolicy::DetermineProfitability(CORINFO_METHOD_INFO* methodInfo)
+{
+ // Do some homework
+ MethodInfoObservations(methodInfo);
+ EstimateCodeSize();
+ EstimatePerformanceImpact();
+
+ // Preliminary inline model.
+ //
+ // If code size is estimated to increase, look at
+ // the profitability model for guidance.
+ //
+ // If code size will decrease, just inline.
+
+ if (m_ModelCodeSizeEstimate <= 0)
+ {
+ // Inline will likely decrease code size
+ JITLOG_THIS(m_RootCompiler, (LL_INFO100000, "Inline profitable, will decrease code size by %g bytes\n",
+ (double)-m_ModelCodeSizeEstimate / SIZE_SCALE));
+
+ if (m_IsPrejitRoot)
+ {
+ SetCandidate(InlineObservation::CALLEE_IS_SIZE_DECREASING_INLINE);
+ }
+ else
+ {
+ SetCandidate(InlineObservation::CALLSITE_IS_SIZE_DECREASING_INLINE);
+ }
+ }
+ else
+ {
+ // We estimate that this inline will increase code size. Only
+ // inline if the performance win is sufficiently large to
+ // justify bigger code.
+
+ // First compute the number of instruction executions saved
+ // via inlining per call to the callee per byte of code size
+ // impact.
+ //
+ // The per call instruction estimate is negative if the inline
+ // will reduce instruction count. Flip the sign here to make
+ // positive be better and negative worse.
+ double perCallBenefit = -((double)m_PerCallInstructionEstimate / (double)m_ModelCodeSizeEstimate);
+
+ // Now estimate the local call frequency.
+ //
+ // Todo: use IBC data, or a better local profile estimate, or
+ // try and incorporate this into the model. For instance if we
+ // tried to predict the benefit per call to the root method
+ // then the model would have to incorporate the local call
+ // frequency, somehow.
+ double callSiteWeight = 1.0;
+
+ switch (m_CallsiteFrequency)
+ {
+ case InlineCallsiteFrequency::RARE:
+ callSiteWeight = 0.1;
+ break;
+ case InlineCallsiteFrequency::BORING:
+ callSiteWeight = 1.0;
+ break;
+ case InlineCallsiteFrequency::WARM:
+ callSiteWeight = 1.5;
+ break;
+ case InlineCallsiteFrequency::LOOP:
+ case InlineCallsiteFrequency::HOT:
+ callSiteWeight = 3.0;
+ break;
+ default:
+ assert(false);
+ break;
+ }
+
+ // Determine the estimated number of instructions saved per
+ // call to the root method per byte of code size impact. This
+ // is our benefit figure of merit.
+ double benefit = callSiteWeight * perCallBenefit;
+
+ // Compare this to the threshold, and inline if greater.
+ //
+ // The threshold is interpretable as a size/speed tradeoff:
+ // the value of 0.2 below indicates we'll allow inlines that
+ // grow code by as many as 5 bytes to save 1 instruction
+ // execution (per call to the root method).
+ double threshold = 0.20;
+ bool shouldInline = (benefit > threshold);
+
+ JITLOG_THIS(m_RootCompiler,
+ (LL_INFO100000, "Inline %s profitable: benefit=%g (weight=%g, percall=%g, size=%g)\n",
+ shouldInline ? "is" : "is not", benefit, callSiteWeight,
+ (double)m_PerCallInstructionEstimate / SIZE_SCALE, (double)m_ModelCodeSizeEstimate / SIZE_SCALE));
+
+ if (!shouldInline)
+ {
+ // Fail the inline
+ if (m_IsPrejitRoot)
+ {
+ SetNever(InlineObservation::CALLEE_NOT_PROFITABLE_INLINE);
+ }
+ else
+ {
+ SetFailure(InlineObservation::CALLSITE_NOT_PROFITABLE_INLINE);
+ }
+ }
+ else
+ {
+ // Update candidacy
+ if (m_IsPrejitRoot)
+ {
+ SetCandidate(InlineObservation::CALLEE_IS_PROFITABLE_INLINE);
+ }
+ else
+ {
+ SetCandidate(InlineObservation::CALLSITE_IS_PROFITABLE_INLINE);
+ }
+ }
+ }
+}
+
+#if defined(DEBUG) || defined(INLINE_DATA)
+
+//------------------------------------------------------------------------/
+// FullPolicy: construct a new FullPolicy
+//
+// Arguments:
+// compiler -- compiler instance doing the inlining (root compiler)
+// isPrejitRoot -- true if this compiler is prejitting the root method
+
+FullPolicy::FullPolicy(Compiler* compiler, bool isPrejitRoot) : DiscretionaryPolicy(compiler, isPrejitRoot)
+{
+ // Empty
+}
+
+//------------------------------------------------------------------------
+// DetermineProfitability: determine if this inline is profitable
+//
+// Arguments:
+// methodInfo -- method info for the callee
+
+void FullPolicy::DetermineProfitability(CORINFO_METHOD_INFO* methodInfo)
+{
+ // Check depth
+
+ unsigned depthLimit = m_RootCompiler->m_inlineStrategy->GetMaxInlineDepth();
+
+ if (m_Depth > depthLimit)
+ {
+ SetFailure(InlineObservation::CALLSITE_IS_TOO_DEEP);
+ return;
+ }
+
+ // Check size
+
+ unsigned sizeLimit = m_RootCompiler->m_inlineStrategy->GetMaxInlineILSize();
+
+ if (m_CodeSize > sizeLimit)
+ {
+ SetFailure(InlineObservation::CALLEE_TOO_MUCH_IL);
+ return;
+ }
+
+ // Otherwise, we're good to go
+
+ if (m_IsPrejitRoot)
+ {
+ SetCandidate(InlineObservation::CALLEE_IS_PROFITABLE_INLINE);
+ }
+ else
+ {
+ SetCandidate(InlineObservation::CALLSITE_IS_PROFITABLE_INLINE);
+ }
+
+ return;
+}
+
+//------------------------------------------------------------------------/
+// SizePolicy: construct a new SizePolicy
+//
+// Arguments:
+// compiler -- compiler instance doing the inlining (root compiler)
+// isPrejitRoot -- true if this compiler is prejitting the root method
+
+SizePolicy::SizePolicy(Compiler* compiler, bool isPrejitRoot) : DiscretionaryPolicy(compiler, isPrejitRoot)
+{
+ // Empty
+}
+
+//------------------------------------------------------------------------
+// DetermineProfitability: determine if this inline is profitable
+//
+// Arguments:
+// methodInfo -- method info for the callee
+
+void SizePolicy::DetermineProfitability(CORINFO_METHOD_INFO* methodInfo)
+{
+ // Do some homework
+ MethodInfoObservations(methodInfo);
+ EstimateCodeSize();
+
+ // Does this inline increase the estimated size beyond
+ // the original size estimate?
+ const InlineStrategy* strategy = m_RootCompiler->m_inlineStrategy;
+ const int initialSize = strategy->GetInitialSizeEstimate();
+ const int currentSize = strategy->GetCurrentSizeEstimate();
+ const int newSize = currentSize + m_ModelCodeSizeEstimate;
+
+ if (newSize <= initialSize)
+ {
+ // Estimated size impact is acceptable, so inline here.
+ JITLOG_THIS(m_RootCompiler,
+ (LL_INFO100000, "Inline profitable, root size estimate %d is less than initial size %d\n",
+ newSize / SIZE_SCALE, initialSize / SIZE_SCALE));
+
+ if (m_IsPrejitRoot)
+ {
+ SetCandidate(InlineObservation::CALLEE_IS_SIZE_DECREASING_INLINE);
+ }
+ else
+ {
+ SetCandidate(InlineObservation::CALLSITE_IS_SIZE_DECREASING_INLINE);
+ }
+ }
+ else
+ {
+ // Estimated size increase is too large, so no inline here.
+ //
+ // Note that we ought to reconsider this inline if we make
+ // room in the budget by inlining a bunch of size decreasing
+ // inlines after this one. But for now, we won't do this.
+ if (m_IsPrejitRoot)
+ {
+ SetNever(InlineObservation::CALLEE_NOT_PROFITABLE_INLINE);
+ }
+ else
+ {
+ SetFailure(InlineObservation::CALLSITE_NOT_PROFITABLE_INLINE);
+ }
+ }
+
+ return;
+}
+
+// Statics to track emission of the replay banner
+// and provide file access to the inline xml
+
+bool ReplayPolicy::s_WroteReplayBanner = false;
+FILE* ReplayPolicy::s_ReplayFile = nullptr;
+CritSecObject ReplayPolicy::s_XmlReaderLock;
+
+//------------------------------------------------------------------------/
+// ReplayPolicy: construct a new ReplayPolicy
+//
+// Arguments:
+// compiler -- compiler instance doing the inlining (root compiler)
+// isPrejitRoot -- true if this compiler is prejitting the root method
+
+ReplayPolicy::ReplayPolicy(Compiler* compiler, bool isPrejitRoot)
+ : DiscretionaryPolicy(compiler, isPrejitRoot)
+ , m_InlineContext(nullptr)
+ , m_Offset(BAD_IL_OFFSET)
+ , m_WasForceInline(false)
+{
+ // Is there a log file open already? If so, we can use it.
+ if (s_ReplayFile == nullptr)
+ {
+ // Did we already try and open and fail?
+ if (!s_WroteReplayBanner)
+ {
+ // Nope, open it up.
+ const wchar_t* replayFileName = JitConfig.JitInlineReplayFile();
+ s_ReplayFile = _wfopen(replayFileName, W("r"));
+
+ // Display banner to stderr, unless we're dumping inline Xml,
+ // in which case the policy name is captured in the Xml.
+ if (JitConfig.JitInlineDumpXml() == 0)
+ {
+ fprintf(stderr, "*** %s inlines from %ws\n", s_ReplayFile == nullptr ? "Unable to replay" : "Replaying",
+ replayFileName);
+ }
+
+ s_WroteReplayBanner = true;
+ }
+ }
+}
+
+//------------------------------------------------------------------------
+// ReplayPolicy: Finalize reading of inline Xml
+//
+// Notes:
+// Called during jitShutdown()
+
+void ReplayPolicy::FinalizeXml()
+{
+ if (s_ReplayFile != nullptr)
+ {
+ fclose(s_ReplayFile);
+ s_ReplayFile = nullptr;
+ }
+}
+
+//------------------------------------------------------------------------
+// FindMethod: find the root method in the inline Xml
+//
+// ReturnValue:
+// true if found. File position left pointing just after the
+// <Token> entry for the method.
+
+bool ReplayPolicy::FindMethod()
+{
+ if (s_ReplayFile == nullptr)
+ {
+ return false;
+ }
+
+ // See if we've already found this method.
+ InlineStrategy* inlineStrategy = m_RootCompiler->m_inlineStrategy;
+ long filePosition = inlineStrategy->GetMethodXmlFilePosition();
+
+ if (filePosition == -1)
+ {
+ // Past lookup failed
+ return false;
+ }
+ else if (filePosition > 0)
+ {
+ // Past lookup succeeded, jump there
+ fseek(s_ReplayFile, filePosition, SEEK_SET);
+ return true;
+ }
+
+ // Else, scan the file. Might be nice to build an index
+ // or something, someday.
+ const mdMethodDef methodToken =
+ m_RootCompiler->info.compCompHnd->getMethodDefFromMethod(m_RootCompiler->info.compMethodHnd);
+ const unsigned methodHash = m_RootCompiler->info.compMethodHash();
+
+ bool foundMethod = false;
+ char buffer[256];
+ fseek(s_ReplayFile, 0, SEEK_SET);
+
+ while (!foundMethod)
+ {
+ // Get next line
+ if (fgets(buffer, sizeof(buffer), s_ReplayFile) == nullptr)
+ {
+ break;
+ }
+
+ // Look for next method entry
+ if (strstr(buffer, "<Method>") == nullptr)
+ {
+ continue;
+ }
+
+ // Get next line
+ if (fgets(buffer, sizeof(buffer), s_ReplayFile) == nullptr)
+ {
+ break;
+ }
+
+ // See if token matches
+ unsigned token = 0;
+ int count = sscanf(buffer, " <Token>%u</Token> ", &token);
+ if ((count != 1) || (token != methodToken))
+ {
+ continue;
+ }
+
+ // Get next line
+ if (fgets(buffer, sizeof(buffer), s_ReplayFile) == nullptr)
+ {
+ break;
+ }
+
+ // See if hash matches
+ unsigned hash = 0;
+ count = sscanf(buffer, " <Hash>%u</Hash> ", &hash);
+ if ((count != 1) || (hash != methodHash))
+ {
+ continue;
+ }
+
+ // Found a match...
+ foundMethod = true;
+ break;
+ }
+
+ // Update file position cache for this method
+ long foundPosition = -1;
+
+ if (foundMethod)
+ {
+ foundPosition = ftell(s_ReplayFile);
+ }
+
+ inlineStrategy->SetMethodXmlFilePosition(foundPosition);
+
+ return foundMethod;
+}
+
+//------------------------------------------------------------------------
+// FindContext: find an inline context in the inline Xml
+//
+// Notes:
+// Assumes file position within the relevant method has just been
+// set by a successful call to FindMethod().
+//
+// Arguments:
+// context -- context of interest
+//
+// ReturnValue:
+// true if found. File position left pointing just after the
+// <Token> entry for the context.
+
+bool ReplayPolicy::FindContext(InlineContext* context)
+{
+ // Make sure we've found the parent context.
+ if (context->IsRoot())
+ {
+ // We've already found the method context so we're good.
+ return true;
+ }
+
+ bool foundParent = FindContext(context->GetParent());
+
+ if (!foundParent)
+ {
+ return false;
+ }
+
+ // File pointer should be pointing at the parent context level.
+ // See if we see an inline entry for this context.
+ //
+ // Token and Hash we're looking for.
+ mdMethodDef contextToken = m_RootCompiler->info.compCompHnd->getMethodDefFromMethod(context->GetCallee());
+ unsigned contextHash = m_RootCompiler->info.compCompHnd->getMethodHash(context->GetCallee());
+ unsigned contextOffset = (unsigned)context->GetOffset();
+
+ return FindInline(contextToken, contextHash, contextOffset);
+}
+
+//------------------------------------------------------------------------
+// FindInline: find entry for the current inline in inline Xml.
+//
+// Arguments:
+// token -- token describing the inline
+// hash -- hash describing the inline
+// offset -- IL offset of the call site in the parent method
+//
+// ReturnValue:
+// true if the inline entry was found
+//
+// Notes:
+// Assumes file position has just been set by a successful call to
+// FindMethod or FindContext.
+//
+// Token and Hash will not be sufficiently unique to identify a
+// particular inline, if there are multiple calls to the same
+// method.
+
+bool ReplayPolicy::FindInline(unsigned token, unsigned hash, unsigned offset)
+{
+ char buffer[256];
+ bool foundInline = false;
+ int depth = 0;
+
+ while (!foundInline)
+ {
+ // Get next line
+ if (fgets(buffer, sizeof(buffer), s_ReplayFile) == nullptr)
+ {
+ break;
+ }
+
+ // If we hit </Method> we've gone too far,
+ // and the XML is messed up.
+ if (strstr(buffer, "</Method>") != nullptr)
+ {
+ break;
+ }
+
+ // Look for <Inlines />....
+ if (strstr(buffer, "<Inlines />") != nullptr)
+ {
+ if (depth == 0)
+ {
+ // Exited depth 1, failed to find the context
+ break;
+ }
+ else
+ {
+ // Exited nested, keep looking
+ continue;
+ }
+ }
+
+ // Look for <Inlines>....
+ if (strstr(buffer, "<Inlines>") != nullptr)
+ {
+ depth++;
+ continue;
+ }
+
+ // If we hit </Inlines> we've exited a nested entry
+ // or the current entry.
+ if (strstr(buffer, "</Inlines>") != nullptr)
+ {
+ depth--;
+
+ if (depth == 0)
+ {
+ // Exited depth 1, failed to find the context
+ break;
+ }
+ else
+ {
+ // Exited nested, keep looking
+ continue;
+ }
+ }
+
+ // Look for start of inline section at the right depth
+ if ((depth != 1) || (strstr(buffer, "<Inline>") == nullptr))
+ {
+ continue;
+ }
+
+ // Get next line
+ if (fgets(buffer, sizeof(buffer), s_ReplayFile) == nullptr)
+ {
+ break;
+ }
+
+ // Match token
+ unsigned inlineToken = 0;
+ int count = sscanf(buffer, " <Token>%u</Token> ", &inlineToken);
+
+ if ((count != 1) || (inlineToken != token))
+ {
+ continue;
+ }
+
+ // Get next line
+ if (fgets(buffer, sizeof(buffer), s_ReplayFile) == nullptr)
+ {
+ break;
+ }
+
+ // Match hash
+ unsigned inlineHash = 0;
+ count = sscanf(buffer, " <Hash>%u</Hash> ", &inlineHash);
+
+ if ((count != 1) || (inlineHash != hash))
+ {
+ continue;
+ }
+
+ // Get next line
+ if (fgets(buffer, sizeof(buffer), s_ReplayFile) == nullptr)
+ {
+ break;
+ }
+
+ // Match offset
+ unsigned inlineOffset = 0;
+ count = sscanf(buffer, " <Offset>%u</Offset> ", &inlineOffset);
+ if ((count != 1) || (inlineOffset != offset))
+ {
+ continue;
+ }
+
+ // Token,Hash,Offset may still not be unique enough, but it's
+ // all we have right now.
+
+ // We're good!
+ foundInline = true;
+
+ // Check for a data collection marker. This does not affect
+ // matching...
+
+ // Get next line
+ if (fgets(buffer, sizeof(buffer), s_ReplayFile) != nullptr)
+ {
+ unsigned collectData = 0;
+ count = sscanf(buffer, " <CollectData>%u</CollectData> ", &collectData);
+
+ if (count == 1)
+ {
+ m_IsDataCollectionTarget = (collectData == 1);
+ }
+ }
+
+ break;
+ }
+
+ return foundInline;
+}
+
+//------------------------------------------------------------------------
+// FindInline: find entry for a particular callee in inline Xml.
+//
+// Arguments:
+// callee -- handle for the callee method
+//
+// ReturnValue:
+// true if the inline should be performed.
+//
+// Notes:
+// Assumes file position has just been set by a successful call to
+// FindContext(...);
+//
+// callee handle will not be sufficiently unique to identify a
+// particular inline, if there are multiple calls to the same
+// method.
+
+bool ReplayPolicy::FindInline(CORINFO_METHOD_HANDLE callee)
+{
+ // Token and Hash we're looking for
+ mdMethodDef calleeToken = m_RootCompiler->info.compCompHnd->getMethodDefFromMethod(callee);
+ unsigned calleeHash = m_RootCompiler->info.compCompHnd->getMethodHash(callee);
+
+ // Abstract this or just pass through raw bits
+ // See matching code in xml writer
+ int offset = -1;
+ if (m_Offset != BAD_IL_OFFSET)
+ {
+ offset = (int)jitGetILoffs(m_Offset);
+ }
+
+ unsigned calleeOffset = (unsigned)offset;
+
+ bool foundInline = FindInline(calleeToken, calleeHash, calleeOffset);
+
+ return foundInline;
+}
+
+//------------------------------------------------------------------------
+// NoteBool: handle an observed boolean value
+//
+// Arguments:
+// obs - the current obsevation
+// value - the value being observed
+//
+// Notes:
+// Overrides parent so Replay can control force inlines.
+
+void ReplayPolicy::NoteBool(InlineObservation obs, bool value)
+{
+ // When inlining, let log override force inline.
+ // Make a note of the actual value for later reporting during observations.
+ if (!m_IsPrejitRoot && (obs == InlineObservation::CALLEE_IS_FORCE_INLINE))
+ {
+ m_WasForceInline = value;
+ value = false;
+ }
+
+ DiscretionaryPolicy::NoteBool(obs, value);
+}
+
+//------------------------------------------------------------------------
+// DetermineProfitability: determine if this inline is profitable
+//
+// Arguments:
+// methodInfo -- method info for the callee
+
+void ReplayPolicy::DetermineProfitability(CORINFO_METHOD_INFO* methodInfo)
+{
+ // TODO: handle prejit root case....need to record this in the
+ // root method XML.
+ if (m_IsPrejitRoot)
+ {
+ // Fall back to discretionary policy for now.
+ return DiscretionaryPolicy::DetermineProfitability(methodInfo);
+ }
+
+ // If we're also dumping inline data, make additional observations
+ // based on the method info, and estimate code size and perf
+ // impact, so that the reports have the necessary data.
+ if (JitConfig.JitInlineDumpData() != 0)
+ {
+ MethodInfoObservations(methodInfo);
+ EstimateCodeSize();
+ EstimatePerformanceImpact();
+ m_IsForceInline = m_WasForceInline;
+ }
+
+ // Try and find this candiate in the Xml.
+ // If we fail to find it, then don't inline.
+ bool accept = false;
+
+ // Grab the reader lock, since we'll be manipulating
+ // the file pointer as we look for the relevant inline xml.
+ {
+ CritSecHolder readerLock(s_XmlReaderLock);
+
+ // First, locate the entries for the root method.
+ bool foundMethod = FindMethod();
+
+ if (foundMethod && (m_InlineContext != nullptr))
+ {
+ // Next, navigate the context tree to find the entries
+ // for the context that contains this candidate.
+ bool foundContext = FindContext(m_InlineContext);
+
+ if (foundContext)
+ {
+ // Finally, find this candidate within its context
+ CORINFO_METHOD_HANDLE calleeHandle = methodInfo->ftn;
+ accept = FindInline(calleeHandle);
+ }
+ }
+ }
+
+ if (accept)
+ {
+ JITLOG_THIS(m_RootCompiler, (LL_INFO100000, "Inline accepted via log replay"));
+
+ if (m_IsPrejitRoot)
+ {
+ SetCandidate(InlineObservation::CALLEE_LOG_REPLAY_ACCEPT);
+ }
+ else
+ {
+ SetCandidate(InlineObservation::CALLSITE_LOG_REPLAY_ACCEPT);
+ }
+ }
+ else
+ {
+ JITLOG_THIS(m_RootCompiler, (LL_INFO100000, "Inline rejected via log replay"));
+
+ if (m_IsPrejitRoot)
+ {
+ SetNever(InlineObservation::CALLEE_LOG_REPLAY_REJECT);
+ }
+ else
+ {
+ SetFailure(InlineObservation::CALLSITE_LOG_REPLAY_REJECT);
+ }
+ }
+
+ return;
+}
+
+#endif // defined(DEBUG) || defined(INLINE_DATA)
diff --git a/src/jit/inlinepolicy.h b/src/jit/inlinepolicy.h
new file mode 100644
index 0000000000..62031c86a0
--- /dev/null
+++ b/src/jit/inlinepolicy.h
@@ -0,0 +1,479 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+// Inlining Policies
+//
+// This file contains class definitions for various inlining
+// policies used by the jit.
+//
+// -- CLASSES --
+//
+// LegalPolicy - partial class providing common legality checks
+// LegacyPolicy - policy that provides legacy inline behavior
+// EnhancedLegacyPolicy - legacy variant with some enhancements
+// DiscretionaryPolicy - legacy variant with uniform size policy
+// ModelPolicy - policy based on statistical modelling
+//
+// These experimental policies are available only in
+// DEBUG or release+INLINE_DATA builds of the jit.
+//
+// RandomPolicy - randomized inlining
+// FullPolicy - inlines everything up to size and depth limits
+// SizePolicy - tries not to increase method sizes
+//
+// The default policy in use is the EnhancedLegacyPolicy.
+
+#ifndef _INLINE_POLICY_H_
+#define _INLINE_POLICY_H_
+
+#include "jit.h"
+#include "inline.h"
+
+// LegalPolicy is a partial policy that encapsulates the common
+// legality and ability checks the inliner must make.
+//
+// Generally speaking, the legal policy expects the inlining attempt
+// to fail fast when a fatal or equivalent observation is made. So
+// once an observation causes failure, no more observations are
+// expected. However for the prejit scan case (where the jit is not
+// actually inlining, but is assessing a method's general
+// inlinability) the legal policy allows multiple failing
+// observations provided they have the same impact. Only the first
+// observation that puts the policy into a failing state is
+// remembered. Transitions from failing states to candidate or success
+// states are not allowed.
+
+class LegalPolicy : public InlinePolicy
+{
+
+public:
+ // Constructor
+ LegalPolicy(bool isPrejitRoot) : InlinePolicy(isPrejitRoot)
+ {
+ // empty
+ }
+
+ // Handle an observation that must cause inlining to fail.
+ void NoteFatal(InlineObservation obs) override;
+
+protected:
+ // Helper methods
+ void NoteInternal(InlineObservation obs);
+ void SetCandidate(InlineObservation obs);
+ void SetFailure(InlineObservation obs);
+ void SetNever(InlineObservation obs);
+};
+
+// Forward declaration for the state machine class used by the
+// LegacyPolicy
+
+class CodeSeqSM;
+
+// LegacyPolicy implements the inlining policy used by the jit in its
+// initial release.
+
+class LegacyPolicy : public LegalPolicy
+{
+public:
+ // Construct a LegacyPolicy
+ LegacyPolicy(Compiler* compiler, bool isPrejitRoot)
+ : LegalPolicy(isPrejitRoot)
+ , m_RootCompiler(compiler)
+ , m_StateMachine(nullptr)
+ , m_Multiplier(0.0)
+ , m_CodeSize(0)
+ , m_CallsiteFrequency(InlineCallsiteFrequency::UNUSED)
+ , m_InstructionCount(0)
+ , m_LoadStoreCount(0)
+ , m_ArgFeedsConstantTest(0)
+ , m_ArgFeedsRangeCheck(0)
+ , m_ConstantArgFeedsConstantTest(0)
+ , m_CalleeNativeSizeEstimate(0)
+ , m_CallsiteNativeSizeEstimate(0)
+ , m_IsForceInline(false)
+ , m_IsForceInlineKnown(false)
+ , m_IsInstanceCtor(false)
+ , m_IsFromPromotableValueClass(false)
+ , m_HasSimd(false)
+ , m_LooksLikeWrapperMethod(false)
+ , m_MethodIsMostlyLoadStore(false)
+ {
+ // empty
+ }
+
+ // Policy observations
+ void NoteSuccess() override;
+ void NoteBool(InlineObservation obs, bool value) override;
+ void NoteInt(InlineObservation obs, int value) override;
+
+ // Policy determinations
+ void DetermineProfitability(CORINFO_METHOD_INFO* methodInfo) override;
+
+ // Policy policies
+ bool PropagateNeverToRuntime() const override
+ {
+ return true;
+ }
+ bool IsLegacyPolicy() const override
+ {
+ return true;
+ }
+
+ // Policy estimates
+ int CodeSizeEstimate() override;
+
+#if defined(DEBUG) || defined(INLINE_DATA)
+
+ const char* GetName() const override
+ {
+ return "LegacyPolicy";
+ }
+
+#endif // (DEBUG) || defined(INLINE_DATA)
+
+protected:
+ // Constants
+ enum
+ {
+ MAX_BASIC_BLOCKS = 5,
+ SIZE_SCALE = 10
+ };
+
+ // Helper methods
+ double DetermineMultiplier();
+ int DetermineNativeSizeEstimate();
+ int DetermineCallsiteNativeSizeEstimate(CORINFO_METHOD_INFO* methodInfo);
+
+ // Data members
+ Compiler* m_RootCompiler; // root compiler instance
+ CodeSeqSM* m_StateMachine;
+ double m_Multiplier;
+ unsigned m_CodeSize;
+ InlineCallsiteFrequency m_CallsiteFrequency;
+ unsigned m_InstructionCount;
+ unsigned m_LoadStoreCount;
+ unsigned m_ArgFeedsConstantTest;
+ unsigned m_ArgFeedsRangeCheck;
+ unsigned m_ConstantArgFeedsConstantTest;
+ int m_CalleeNativeSizeEstimate;
+ int m_CallsiteNativeSizeEstimate;
+ bool m_IsForceInline : 1;
+ bool m_IsForceInlineKnown : 1;
+ bool m_IsInstanceCtor : 1;
+ bool m_IsFromPromotableValueClass : 1;
+ bool m_HasSimd : 1;
+ bool m_LooksLikeWrapperMethod : 1;
+ bool m_MethodIsMostlyLoadStore : 1;
+};
+
+// EnhancedLegacyPolicy extends the legacy policy by rejecting
+// inlining of methods that never return because they throw.
+
+class EnhancedLegacyPolicy : public LegacyPolicy
+{
+public:
+ EnhancedLegacyPolicy(Compiler* compiler, bool isPrejitRoot)
+ : LegacyPolicy(compiler, isPrejitRoot), m_IsNoReturn(false), m_IsNoReturnKnown(false)
+ {
+ // empty
+ }
+
+ // Policy observations
+ void NoteBool(InlineObservation obs, bool value) override;
+ void NoteInt(InlineObservation obs, int value) override;
+
+ // Policy policies
+ bool PropagateNeverToRuntime() const override;
+ bool IsLegacyPolicy() const override
+ {
+ return false;
+ }
+
+protected:
+ // Data members
+ bool m_IsNoReturn : 1;
+ bool m_IsNoReturnKnown : 1;
+};
+
+#ifdef DEBUG
+
+// RandomPolicy implements a policy that inlines at random.
+// It is mostly useful for stress testing.
+
+class RandomPolicy : public LegalPolicy
+{
+public:
+ // Construct a RandomPolicy
+ RandomPolicy(Compiler* compiler, bool isPrejitRoot, unsigned seed);
+
+ // Policy observations
+ void NoteSuccess() override;
+ void NoteBool(InlineObservation obs, bool value) override;
+ void NoteInt(InlineObservation obs, int value) override;
+
+ // Policy determinations
+ void DetermineProfitability(CORINFO_METHOD_INFO* methodInfo) override;
+
+ // Policy policies
+ bool PropagateNeverToRuntime() const override
+ {
+ return true;
+ }
+ bool IsLegacyPolicy() const override
+ {
+ return false;
+ }
+
+ // Policy estimates
+ int CodeSizeEstimate() override
+ {
+ return 0;
+ }
+
+ const char* GetName() const override
+ {
+ return "RandomPolicy";
+ }
+
+private:
+ // Data members
+ Compiler* m_RootCompiler;
+ CLRRandom* m_Random;
+ unsigned m_CodeSize;
+ bool m_IsForceInline : 1;
+ bool m_IsForceInlineKnown : 1;
+};
+
+#endif // DEBUG
+
+// DiscretionaryPolicy is a variant of the legacy policy. It differs
+// in that there is no ALWAYS_INLINE class, there is no IL size limit,
+// it does not try and maintain legacy compatabilty, and in prejit mode,
+// discretionary failures do not set the "NEVER" inline bit.
+//
+// It is useful for gathering data about inline costs.
+
+class DiscretionaryPolicy : public LegacyPolicy
+{
+public:
+ // Construct a DiscretionaryPolicy
+ DiscretionaryPolicy(Compiler* compiler, bool isPrejitRoot);
+
+ // Policy observations
+ void NoteBool(InlineObservation obs, bool value) override;
+ void NoteInt(InlineObservation obs, int value) override;
+
+ // Policy policies
+ bool PropagateNeverToRuntime() const override;
+ bool IsLegacyPolicy() const override
+ {
+ return false;
+ }
+
+ // Policy determinations
+ void DetermineProfitability(CORINFO_METHOD_INFO* methodInfo) override;
+
+ // Policy estimates
+ int CodeSizeEstimate() override;
+
+#if defined(DEBUG) || defined(INLINE_DATA)
+
+ // Externalize data
+ void DumpData(FILE* file) const override;
+ void DumpSchema(FILE* file) const override;
+
+ // Miscellaneous
+ const char* GetName() const override
+ {
+ return "DiscretionaryPolicy";
+ }
+
+#endif // defined(DEBUG) || defined(INLINE_DATA)
+
+protected:
+ void ComputeOpcodeBin(OPCODE opcode);
+ void EstimateCodeSize();
+ void EstimatePerformanceImpact();
+ void MethodInfoObservations(CORINFO_METHOD_INFO* methodInfo);
+ enum
+ {
+ MAX_ARGS = 6
+ };
+
+ unsigned m_Depth;
+ unsigned m_BlockCount;
+ unsigned m_Maxstack;
+ unsigned m_ArgCount;
+ CorInfoType m_ArgType[MAX_ARGS];
+ size_t m_ArgSize[MAX_ARGS];
+ unsigned m_LocalCount;
+ CorInfoType m_ReturnType;
+ size_t m_ReturnSize;
+ unsigned m_ArgAccessCount;
+ unsigned m_LocalAccessCount;
+ unsigned m_IntConstantCount;
+ unsigned m_FloatConstantCount;
+ unsigned m_IntLoadCount;
+ unsigned m_FloatLoadCount;
+ unsigned m_IntStoreCount;
+ unsigned m_FloatStoreCount;
+ unsigned m_SimpleMathCount;
+ unsigned m_ComplexMathCount;
+ unsigned m_OverflowMathCount;
+ unsigned m_IntArrayLoadCount;
+ unsigned m_FloatArrayLoadCount;
+ unsigned m_RefArrayLoadCount;
+ unsigned m_StructArrayLoadCount;
+ unsigned m_IntArrayStoreCount;
+ unsigned m_FloatArrayStoreCount;
+ unsigned m_RefArrayStoreCount;
+ unsigned m_StructArrayStoreCount;
+ unsigned m_StructOperationCount;
+ unsigned m_ObjectModelCount;
+ unsigned m_FieldLoadCount;
+ unsigned m_FieldStoreCount;
+ unsigned m_StaticFieldLoadCount;
+ unsigned m_StaticFieldStoreCount;
+ unsigned m_LoadAddressCount;
+ unsigned m_ThrowCount;
+ unsigned m_ReturnCount;
+ unsigned m_CallCount;
+ unsigned m_CallSiteWeight;
+ int m_ModelCodeSizeEstimate;
+ int m_PerCallInstructionEstimate;
+ bool m_IsClassCtor;
+ bool m_IsSameThis;
+ bool m_CallerHasNewArray;
+ bool m_CallerHasNewObj;
+};
+
+// ModelPolicy is an experimental policy that uses the results
+// of data modelling to make estimates.
+
+class ModelPolicy : public DiscretionaryPolicy
+{
+public:
+ // Construct a ModelPolicy
+ ModelPolicy(Compiler* compiler, bool isPrejitRoot);
+
+ // Policy observations
+ void NoteInt(InlineObservation obs, int value) override;
+
+ // Policy determinations
+ void DetermineProfitability(CORINFO_METHOD_INFO* methodInfo) override;
+
+ // Policy policies
+ bool PropagateNeverToRuntime() const override
+ {
+ return true;
+ }
+
+#if defined(DEBUG) || defined(INLINE_DATA)
+
+ // Miscellaneous
+ const char* GetName() const override
+ {
+ return "ModelPolicy";
+ }
+
+#endif // defined(DEBUG) || defined(INLINE_DATA)
+};
+
+#if defined(DEBUG) || defined(INLINE_DATA)
+
+// FullPolicy is an experimental policy that will always inline if
+// possible, subject to externally settable depth and size limits.
+//
+// It's useful for unconvering the full set of possible inlines for
+// methods.
+
+class FullPolicy : public DiscretionaryPolicy
+{
+public:
+ // Construct a FullPolicy
+ FullPolicy(Compiler* compiler, bool isPrejitRoot);
+
+ // Policy determinations
+ void DetermineProfitability(CORINFO_METHOD_INFO* methodInfo) override;
+
+ // Miscellaneous
+ const char* GetName() const override
+ {
+ return "FullPolicy";
+ }
+};
+
+// SizePolicy is an experimental policy that will inline as much
+// as possible without increasing the (estimated) method size.
+//
+// It may be useful down the road as a policy to use for methods
+// that are rarely executed (eg class constructors).
+
+class SizePolicy : public DiscretionaryPolicy
+{
+public:
+ // Construct a SizePolicy
+ SizePolicy(Compiler* compiler, bool isPrejitRoot);
+
+ // Policy determinations
+ void DetermineProfitability(CORINFO_METHOD_INFO* methodInfo) override;
+
+ // Miscellaneous
+ const char* GetName() const override
+ {
+ return "SizePolicy";
+ }
+};
+
+// The ReplayPolicy performs only inlines specified by an external
+// inline replay log.
+
+class ReplayPolicy : public DiscretionaryPolicy
+{
+public:
+ // Construct a ReplayPolicy
+ ReplayPolicy(Compiler* compiler, bool isPrejitRoot);
+
+ // Policy observations
+ void NoteBool(InlineObservation obs, bool value) override;
+
+ // Optional observations
+ void NoteContext(InlineContext* context) override
+ {
+ m_InlineContext = context;
+ }
+
+ void NoteOffset(IL_OFFSETX offset) override
+ {
+ m_Offset = offset;
+ }
+
+ // Policy determinations
+ void DetermineProfitability(CORINFO_METHOD_INFO* methodInfo) override;
+
+ // Miscellaneous
+ const char* GetName() const override
+ {
+ return "ReplayPolicy";
+ }
+
+ static void FinalizeXml();
+
+private:
+ bool FindMethod();
+ bool FindContext(InlineContext* context);
+ bool FindInline(CORINFO_METHOD_HANDLE callee);
+ bool FindInline(unsigned token, unsigned hash, unsigned offset);
+
+ static bool s_WroteReplayBanner;
+ static FILE* s_ReplayFile;
+ static CritSecObject s_XmlReaderLock;
+ InlineContext* m_InlineContext;
+ IL_OFFSETX m_Offset;
+ bool m_WasForceInline;
+};
+
+#endif // defined(DEBUG) || defined(INLINE_DATA)
+
+#endif // _INLINE_POLICY_H_
diff --git a/src/jit/instr.cpp b/src/jit/instr.cpp
new file mode 100644
index 0000000000..d516e0dea4
--- /dev/null
+++ b/src/jit/instr.cpp
@@ -0,0 +1,4086 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX Instruction XX
+XX XX
+XX The interface to generate a machine-instruction. XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+#include "codegen.h"
+#include "instr.h"
+#include "emit.h"
+
+/*****************************************************************************/
+#ifdef DEBUG
+
+/*****************************************************************************
+ *
+ * Returns the string representation of the given CPU instruction.
+ */
+
+const char* CodeGen::genInsName(instruction ins)
+{
+ // clang-format off
+ static
+ const char * const insNames[] =
+ {
+#if defined(_TARGET_XARCH_)
+ #define INST0(id, nm, fp, um, rf, wf, mr ) nm,
+ #define INST1(id, nm, fp, um, rf, wf, mr ) nm,
+ #define INST2(id, nm, fp, um, rf, wf, mr, mi ) nm,
+ #define INST3(id, nm, fp, um, rf, wf, mr, mi, rm ) nm,
+ #define INST4(id, nm, fp, um, rf, wf, mr, mi, rm, a4 ) nm,
+ #define INST5(id, nm, fp, um, rf, wf, mr, mi, rm, a4, rr ) nm,
+ #include "instrs.h"
+
+#elif defined(_TARGET_ARM_)
+ #define INST1(id, nm, fp, ldst, fmt, e1 ) nm,
+ #define INST2(id, nm, fp, ldst, fmt, e1, e2 ) nm,
+ #define INST3(id, nm, fp, ldst, fmt, e1, e2, e3 ) nm,
+ #define INST4(id, nm, fp, ldst, fmt, e1, e2, e3, e4 ) nm,
+ #define INST5(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5 ) nm,
+ #define INST6(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6 ) nm,
+ #define INST8(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6, e7, e8 ) nm,
+ #define INST9(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9 ) nm,
+ #include "instrs.h"
+
+#elif defined(_TARGET_ARM64_)
+ #define INST1(id, nm, fp, ldst, fmt, e1 ) nm,
+ #define INST2(id, nm, fp, ldst, fmt, e1, e2 ) nm,
+ #define INST3(id, nm, fp, ldst, fmt, e1, e2, e3 ) nm,
+ #define INST4(id, nm, fp, ldst, fmt, e1, e2, e3, e4 ) nm,
+ #define INST5(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5 ) nm,
+ #define INST6(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6 ) nm,
+ #define INST9(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9 ) nm,
+ #include "instrs.h"
+
+#else
+#error "Unknown _TARGET_"
+#endif
+ };
+ // clang-format on
+
+ assert((unsigned)ins < sizeof(insNames) / sizeof(insNames[0]));
+ assert(insNames[ins] != nullptr);
+
+ return insNames[ins];
+}
+
+void __cdecl CodeGen::instDisp(instruction ins, bool noNL, const char* fmt, ...)
+{
+ if (compiler->opts.dspCode)
+ {
+ /* Display the instruction offset within the emit block */
+
+ // printf("[%08X:%04X]", getEmitter().emitCodeCurBlock(), getEmitter().emitCodeOffsInBlock());
+
+ /* Display the FP stack depth (before the instruction is executed) */
+
+ // printf("[FP=%02u] ", genGetFPstkLevel());
+
+ /* Display the instruction mnemonic */
+ printf(" ");
+
+ printf(" %-8s", genInsName(ins));
+
+ if (fmt)
+ {
+ va_list args;
+ va_start(args, fmt);
+ vprintf(fmt, args);
+ va_end(args);
+ }
+
+ if (!noNL)
+ {
+ printf("\n");
+ }
+ }
+}
+
+/*****************************************************************************/
+#endif // DEBUG
+/*****************************************************************************/
+
+void CodeGen::instInit()
+{
+}
+
+/*****************************************************************************
+ *
+ * Return the size string (e.g. "word ptr") appropriate for the given size.
+ */
+
+#ifdef DEBUG
+
+const char* CodeGen::genSizeStr(emitAttr attr)
+{
+ // clang-format off
+ static
+ const char * const sizes[] =
+ {
+ "",
+ "byte ptr ",
+ "word ptr ",
+ nullptr,
+ "dword ptr ",
+ nullptr,
+ nullptr,
+ nullptr,
+ "qword ptr ",
+ nullptr,
+ nullptr,
+ nullptr,
+ nullptr,
+ nullptr,
+ nullptr,
+ nullptr,
+ "xmmword ptr ",
+ nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
+ nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
+ nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
+ nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
+ "ymmword ptr"
+ };
+ // clang-format on
+
+ unsigned size = EA_SIZE(attr);
+
+ assert(size == 0 || size == 1 || size == 2 || size == 4 || size == 8 || size == 16 || size == 32);
+
+ if (EA_ATTR(size) == attr)
+ {
+ return sizes[size];
+ }
+ else if (attr == EA_GCREF)
+ {
+ return "gword ptr ";
+ }
+ else if (attr == EA_BYREF)
+ {
+ return "bword ptr ";
+ }
+ else if (EA_IS_DSP_RELOC(attr))
+ {
+ return "rword ptr ";
+ }
+ else
+ {
+ assert(!"Unexpected");
+ return "unknw ptr ";
+ }
+}
+
+#endif
+
+/*****************************************************************************
+ *
+ * Generate an instruction.
+ */
+
+void CodeGen::instGen(instruction ins)
+{
+
+ getEmitter()->emitIns(ins);
+
+#ifdef _TARGET_XARCH_
+ // A workaround necessitated by limitations of emitter
+ // if we are scheduled to insert a nop here, we have to delay it
+ // hopefully we have not missed any other prefix instructions or places
+ // they could be inserted
+ if (ins == INS_lock && getEmitter()->emitNextNop == 0)
+ {
+ getEmitter()->emitNextNop = 1;
+ }
+#endif
+}
+
+/*****************************************************************************
+ *
+ * Returns non-zero if the given CPU instruction is a floating-point ins.
+ */
+
+// static inline
+bool CodeGenInterface::instIsFP(instruction ins)
+{
+ assert((unsigned)ins < sizeof(instInfo) / sizeof(instInfo[0]));
+
+ return (instInfo[ins] & INST_FP) != 0;
+}
+
+#ifdef _TARGET_XARCH_
+/*****************************************************************************
+ *
+ * Generate a multi-byte NOP instruction.
+ */
+
+void CodeGen::instNop(unsigned size)
+{
+ assert(size <= 15);
+ getEmitter()->emitIns_Nop(size);
+}
+#endif
+
+/*****************************************************************************
+ *
+ * Generate a jump instruction.
+ */
+
+void CodeGen::inst_JMP(emitJumpKind jmp, BasicBlock* tgtBlock)
+{
+#if !FEATURE_FIXED_OUT_ARGS
+ // On the x86 we are pushing (and changing the stack level), but on x64 and other archs we have
+ // a fixed outgoing args area that we store into and we never change the stack level when calling methods.
+ //
+ // Thus only on x86 do we need to assert that the stack level at the target block matches the current stack level.
+ //
+ assert(tgtBlock->bbTgtStkDepth * sizeof(int) == genStackLevel || compiler->rpFrameType != FT_ESP_FRAME);
+#endif
+
+ getEmitter()->emitIns_J(emitter::emitJumpKindToIns(jmp), tgtBlock);
+}
+
+/*****************************************************************************
+ *
+ * Generate a set instruction.
+ */
+
+void CodeGen::inst_SET(emitJumpKind condition, regNumber reg)
+{
+#ifdef _TARGET_XARCH_
+ instruction ins;
+
+ /* Convert the condition to an instruction opcode */
+
+ switch (condition)
+ {
+ case EJ_js:
+ ins = INS_sets;
+ break;
+ case EJ_jns:
+ ins = INS_setns;
+ break;
+ case EJ_je:
+ ins = INS_sete;
+ break;
+ case EJ_jne:
+ ins = INS_setne;
+ break;
+
+ case EJ_jl:
+ ins = INS_setl;
+ break;
+ case EJ_jle:
+ ins = INS_setle;
+ break;
+ case EJ_jge:
+ ins = INS_setge;
+ break;
+ case EJ_jg:
+ ins = INS_setg;
+ break;
+
+ case EJ_jb:
+ ins = INS_setb;
+ break;
+ case EJ_jbe:
+ ins = INS_setbe;
+ break;
+ case EJ_jae:
+ ins = INS_setae;
+ break;
+ case EJ_ja:
+ ins = INS_seta;
+ break;
+
+ case EJ_jpe:
+ ins = INS_setpe;
+ break;
+ case EJ_jpo:
+ ins = INS_setpo;
+ break;
+
+ default:
+ NO_WAY("unexpected condition type");
+ return;
+ }
+
+ assert(genRegMask(reg) & RBM_BYTE_REGS);
+
+ // These instructions only write the low byte of 'reg'
+ getEmitter()->emitIns_R(ins, EA_1BYTE, reg);
+#elif defined(_TARGET_ARM64_)
+ insCond cond;
+ /* Convert the condition to an insCond value */
+ switch (condition)
+ {
+ case EJ_eq:
+ cond = INS_COND_EQ;
+ break;
+ case EJ_ne:
+ cond = INS_COND_NE;
+ break;
+ case EJ_hs:
+ cond = INS_COND_HS;
+ break;
+ case EJ_lo:
+ cond = INS_COND_LO;
+ break;
+
+ case EJ_mi:
+ cond = INS_COND_MI;
+ break;
+ case EJ_pl:
+ cond = INS_COND_PL;
+ break;
+ case EJ_vs:
+ cond = INS_COND_VS;
+ break;
+ case EJ_vc:
+ cond = INS_COND_VC;
+ break;
+
+ case EJ_hi:
+ cond = INS_COND_HI;
+ break;
+ case EJ_ls:
+ cond = INS_COND_LS;
+ break;
+ case EJ_ge:
+ cond = INS_COND_GE;
+ break;
+ case EJ_lt:
+ cond = INS_COND_LT;
+ break;
+
+ case EJ_gt:
+ cond = INS_COND_GT;
+ break;
+ case EJ_le:
+ cond = INS_COND_LE;
+ break;
+
+ default:
+ NO_WAY("unexpected condition type");
+ return;
+ }
+ getEmitter()->emitIns_R_COND(INS_cset, EA_8BYTE, reg, cond);
+#else
+ NYI("inst_SET");
+#endif
+}
+
+/*****************************************************************************
+ *
+ * Generate a "op reg" instruction.
+ */
+
+void CodeGen::inst_RV(instruction ins, regNumber reg, var_types type, emitAttr size)
+{
+ if (size == EA_UNKNOWN)
+ {
+ size = emitActualTypeSize(type);
+ }
+
+ getEmitter()->emitIns_R(ins, size, reg);
+}
+
+/*****************************************************************************
+ *
+ * Generate a "op reg1, reg2" instruction.
+ */
+
+void CodeGen::inst_RV_RV(instruction ins,
+ regNumber reg1,
+ regNumber reg2,
+ var_types type,
+ emitAttr size,
+ insFlags flags /* = INS_FLAGS_DONT_CARE */)
+{
+ if (size == EA_UNKNOWN)
+ {
+ size = emitActualTypeSize(type);
+ }
+
+#ifdef _TARGET_ARM_
+ getEmitter()->emitIns_R_R(ins, size, reg1, reg2, flags);
+#else
+ getEmitter()->emitIns_R_R(ins, size, reg1, reg2);
+#endif
+}
+
+/*****************************************************************************
+ *
+ * Generate a "op reg1, reg2, reg3" instruction.
+ */
+
+void CodeGen::inst_RV_RV_RV(instruction ins,
+ regNumber reg1,
+ regNumber reg2,
+ regNumber reg3,
+ emitAttr size,
+ insFlags flags /* = INS_FLAGS_DONT_CARE */)
+{
+#ifdef _TARGET_ARM_
+ getEmitter()->emitIns_R_R_R(ins, size, reg1, reg2, reg3, flags);
+#elif defined(_TARGET_XARCH_) && defined(FEATURE_AVX_SUPPORT)
+ getEmitter()->emitIns_R_R_R(ins, size, reg1, reg2, reg3);
+#else
+ NYI("inst_RV_RV_RV");
+#endif
+}
+/*****************************************************************************
+ *
+ * Generate a "op icon" instruction.
+ */
+
+void CodeGen::inst_IV(instruction ins, int val)
+{
+ getEmitter()->emitIns_I(ins, EA_PTRSIZE, val);
+}
+
+/*****************************************************************************
+ *
+ * Generate a "op icon" instruction where icon is a handle of type specified
+ * by 'flags'
+ */
+
+void CodeGen::inst_IV_handle(instruction ins, int val)
+{
+ getEmitter()->emitIns_I(ins, EA_HANDLE_CNS_RELOC, val);
+}
+
+#if FEATURE_STACK_FP_X87
+/*****************************************************************************
+ *
+ * Generate a "op ST(n), ST(0)" instruction.
+ */
+
+void CodeGen::inst_FS(instruction ins, unsigned stk)
+{
+ assert(stk < 8);
+
+#ifdef DEBUG
+
+ switch (ins)
+ {
+ case INS_fcompp:
+ assert(stk == 1);
+ break; // Implicit operand of compp is ST(1)
+ case INS_fld:
+ case INS_fxch:
+ assert(!"don't do this. Do you want to use inst_FN() instead?");
+ break;
+ default:
+ break;
+ }
+
+#endif
+
+ getEmitter()->emitIns_F_F0(ins, stk);
+}
+
+/*****************************************************************************
+ *
+ * Generate a "op ST(0), ST(n)" instruction
+ */
+
+void CodeGenInterface::inst_FN(instruction ins, unsigned stk)
+{
+ assert(stk < 8);
+
+#ifdef DEBUG
+
+ switch (ins)
+ {
+ case INS_fst:
+ case INS_fstp:
+ case INS_faddp:
+ case INS_fsubp:
+ case INS_fsubrp:
+ case INS_fmulp:
+ case INS_fdivp:
+ case INS_fdivrp:
+ case INS_fcompp:
+ assert(!"don't do this. Do you want to use inst_FS() instead?");
+ break;
+ default:
+ break;
+ }
+
+#endif // DEBUG
+
+ getEmitter()->emitIns_F0_F(ins, stk);
+}
+#endif // FEATURE_STACK_FP_X87
+
+/*****************************************************************************
+ *
+ * Display a stack frame reference.
+ */
+
+void CodeGen::inst_set_SV_var(GenTreePtr tree)
+{
+#ifdef DEBUG
+ assert(tree && (tree->gtOper == GT_LCL_VAR || tree->gtOper == GT_LCL_VAR_ADDR || tree->gtOper == GT_STORE_LCL_VAR));
+ assert(tree->gtLclVarCommon.gtLclNum < compiler->lvaCount);
+
+ getEmitter()->emitVarRefOffs = tree->gtLclVar.gtLclILoffs;
+
+#endif // DEBUG
+}
+
+/*****************************************************************************
+ *
+ * Generate a "op reg, icon" instruction.
+ */
+
+void CodeGen::inst_RV_IV(
+ instruction ins, regNumber reg, ssize_t val, emitAttr size, insFlags flags /* = INS_FLAGS_DONT_CARE */)
+{
+#if !defined(_TARGET_64BIT_)
+ assert(size != EA_8BYTE);
+#endif
+
+#ifdef _TARGET_ARM_
+ if (arm_Valid_Imm_For_Instr(ins, val, flags))
+ {
+ getEmitter()->emitIns_R_I(ins, size, reg, val, flags);
+ }
+ else if (ins == INS_mov)
+ {
+ instGen_Set_Reg_To_Imm(size, reg, val);
+ }
+ else
+ {
+#ifndef LEGACY_BACKEND
+ // TODO-Cleanup: Add a comment about why this is unreached() for RyuJIT backend.
+ unreached();
+#else // LEGACY_BACKEND
+ regNumber tmpReg = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(reg));
+ instGen_Set_Reg_To_Imm(size, tmpReg, val);
+ getEmitter()->emitIns_R_R(ins, size, reg, tmpReg, flags);
+#endif // LEGACY_BACKEND
+ }
+#elif defined(_TARGET_ARM64_)
+ // TODO-Arm64-Bug: handle large constants!
+ // Probably need something like the ARM case above: if (arm_Valid_Imm_For_Instr(ins, val)) ...
+ assert(ins != INS_cmp);
+ assert(ins != INS_tst);
+ assert(ins != INS_mov);
+ getEmitter()->emitIns_R_R_I(ins, size, reg, reg, val);
+#else // !_TARGET_ARM_
+#ifdef _TARGET_AMD64_
+ // Instead of an 8-byte immediate load, a 4-byte immediate will do fine
+ // as the high 4 bytes will be zero anyway.
+ if (size == EA_8BYTE && ins == INS_mov && ((val & 0xFFFFFFFF00000000LL) == 0))
+ {
+ size = EA_4BYTE;
+ getEmitter()->emitIns_R_I(ins, size, reg, val);
+ }
+ else if (EA_SIZE(size) == EA_8BYTE && ins != INS_mov && (((int)val != val) || EA_IS_CNS_RELOC(size)))
+ {
+#ifndef LEGACY_BACKEND
+ assert(!"Invalid immediate for inst_RV_IV");
+#else // LEGACY_BACKEND
+ // We can't fit the immediate into this instruction, so move it into
+ // a register first
+ regNumber tmpReg = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(reg));
+ instGen_Set_Reg_To_Imm(size, tmpReg, val);
+
+ // We might have to switch back from 3-operand imul to two operand form
+ if (instrIs3opImul(ins))
+ {
+ assert(getEmitter()->inst3opImulReg(ins) == reg);
+ ins = INS_imul;
+ }
+ getEmitter()->emitIns_R_R(ins, EA_TYPE(size), reg, tmpReg);
+#endif // LEGACY_BACKEND
+ }
+ else
+#endif // _TARGET_AMD64_
+ {
+ getEmitter()->emitIns_R_I(ins, size, reg, val);
+ }
+#endif // !_TARGET_ARM_
+}
+
+#if defined(LEGACY_BACKEND)
+/*****************************************************************************
+ * Figure out the operands to address the tree.
+ * 'addr' can be one of (1) a pointer to be indirected
+ * (2) a calculation to be done with LEA_AVAILABLE
+ * (3) GT_ARR_ELEM
+ *
+ * On return, *baseReg, *indScale, *indReg, and *cns are set.
+ */
+
+void CodeGen::instGetAddrMode(GenTreePtr addr, regNumber* baseReg, unsigned* indScale, regNumber* indReg, unsigned* cns)
+{
+ if (addr->gtOper == GT_ARR_ELEM)
+ {
+ /* For GT_ARR_ELEM, the addressibility registers are marked on
+ gtArrObj and gtArrInds[0] */
+
+ assert(addr->gtArrElem.gtArrObj->gtFlags & GTF_REG_VAL);
+ *baseReg = addr->gtArrElem.gtArrObj->gtRegNum;
+
+ assert(addr->gtArrElem.gtArrInds[0]->gtFlags & GTF_REG_VAL);
+ *indReg = addr->gtArrElem.gtArrInds[0]->gtRegNum;
+
+ if (jitIsScaleIndexMul(addr->gtArrElem.gtArrElemSize))
+ *indScale = addr->gtArrElem.gtArrElemSize;
+ else
+ *indScale = 0;
+
+ *cns = compiler->eeGetMDArrayDataOffset(addr->gtArrElem.gtArrElemType, addr->gtArrElem.gtArrRank);
+ }
+ else if (addr->gtOper == GT_LEA)
+ {
+ GenTreeAddrMode* lea = addr->AsAddrMode();
+ GenTreePtr base = lea->Base();
+ assert(!base || (base->gtFlags & GTF_REG_VAL));
+ GenTreePtr index = lea->Index();
+ assert(!index || (index->gtFlags & GTF_REG_VAL));
+
+ *baseReg = base ? base->gtRegNum : REG_NA;
+ *indReg = index ? index->gtRegNum : REG_NA;
+ *indScale = lea->gtScale;
+ *cns = lea->gtOffset;
+ return;
+ }
+ else
+ {
+ /* Figure out what complex address mode to use */
+
+ GenTreePtr rv1 = NULL;
+ GenTreePtr rv2 = NULL;
+ bool rev = false;
+
+ INDEBUG(bool yes =)
+ genCreateAddrMode(addr, -1, true, RBM_NONE, &rev, &rv1, &rv2,
+#if SCALED_ADDR_MODES
+ indScale,
+#endif
+ cns);
+
+ assert(yes); // // since we have called genMakeAddressable() on addr
+ // Ensure that the base and index, if used, are in registers.
+ if (rv1 && ((rv1->gtFlags & GTF_REG_VAL) == 0))
+ {
+ if (rv1->gtFlags & GTF_SPILLED)
+ {
+ genRecoverReg(rv1, RBM_ALLINT, RegSet::KEEP_REG);
+ }
+ else
+ {
+ genCodeForTree(rv1, RBM_NONE);
+ regSet.rsMarkRegUsed(rv1, addr);
+ }
+ assert(rv1->gtFlags & GTF_REG_VAL);
+ }
+ if (rv2 && ((rv2->gtFlags & GTF_REG_VAL) == 0))
+ {
+ if (rv2->gtFlags & GTF_SPILLED)
+ {
+ genRecoverReg(rv2, ~genRegMask(rv1->gtRegNum), RegSet::KEEP_REG);
+ }
+ else
+ {
+ genCodeForTree(rv2, RBM_NONE);
+ regSet.rsMarkRegUsed(rv2, addr);
+ }
+ assert(rv2->gtFlags & GTF_REG_VAL);
+ }
+ // If we did both, we might have spilled rv1.
+ if (rv1 && ((rv1->gtFlags & GTF_SPILLED) != 0))
+ {
+ regSet.rsLockUsedReg(genRegMask(rv2->gtRegNum));
+ genRecoverReg(rv1, ~genRegMask(rv2->gtRegNum), RegSet::KEEP_REG);
+ regSet.rsUnlockReg(genRegMask(rv2->gtRegNum));
+ }
+
+ *baseReg = rv1 ? rv1->gtRegNum : REG_NA;
+ *indReg = rv2 ? rv2->gtRegNum : REG_NA;
+ }
+}
+
+#if CPU_LOAD_STORE_ARCH
+/*****************************************************************************
+ *
+ * Originally this was somewhat specific to the x86 instrution format.
+ * For a Load/Store arch we generate the 1-8 instructions necessary to
+ * implement the single addressing mode instruction used on x86.
+ * We currently don't have an instruction scheduler enabled on any target.
+ *
+ * [Schedule] an "ins reg, [r/m]" (rdst=true), or "ins [r/m], reg" (rdst=false)
+ * instruction (the r/m operand given by a tree). We also allow instructions
+ * of the form "ins [r/m], icon", these are signaled by setting 'cons' to
+ * true.
+ *
+ * The longest instruction sequence emitted on the ARM is as follows:
+ *
+ * - the "addr" represents an array addressing mode,
+ * with a baseReg, indReg with a shift and a large offset
+ * (Note that typically array addressing modes do NOT have a large offset)
+ * - "ins" is an ALU instruction,
+ * - cons=true, and imm is a large constant that can not be directly encoded with "ins"
+ * - We may need to grab upto four additional registers: regT, rtegVal, regOffs and regImm
+ *
+ * add regT, baseReg, indReg<<shift
+ * movw regOffs, offsLo
+ * movt regOffs, offsHi
+ * ldr regVal, [regT + regOffs]
+ * movw regImm, consLo
+ * movt regImm, consHi
+ * "ins" regVal, regImm
+ * str regVal, [regT + regOffs]
+ *
+ */
+
+void CodeGen::sched_AM(instruction ins,
+ emitAttr size,
+ regNumber ireg,
+ bool rdst,
+ GenTreePtr addr,
+ unsigned offs,
+ bool cons,
+ int imm,
+ insFlags flags)
+{
+ assert(addr);
+ assert(size != EA_UNKNOWN);
+
+ enum INS_TYPE
+ {
+ eIT_Lea,
+ eIT_Load,
+ eIT_Store,
+ eIT_Other
+ };
+ INS_TYPE insType = eIT_Other;
+
+ if (ins == INS_lea)
+ {
+ insType = eIT_Lea;
+ ins = INS_add;
+ }
+ else if (getEmitter()->emitInsIsLoad(ins))
+ {
+ insType = eIT_Load;
+ }
+ else if (getEmitter()->emitInsIsStore(ins))
+ {
+ insType = eIT_Store;
+ }
+
+ regNumber baseReg = REG_NA;
+ regNumber indReg = REG_NA;
+ unsigned indScale = 0;
+
+ regMaskTP avoidMask = RBM_NONE;
+
+ if (addr->gtFlags & GTF_REG_VAL)
+ {
+ /* The address is "[reg+offs]" */
+ baseReg = addr->gtRegNum;
+ }
+ else if (addr->IsCnsIntOrI())
+ {
+#ifdef RELOC_SUPPORT
+ // Do we need relocations?
+ if (compiler->opts.compReloc && addr->IsIconHandle())
+ {
+ size = EA_SET_FLG(size, EA_DSP_RELOC_FLG);
+ // offs should be smaller than ZapperModule::FixupPlaceHolder
+ // so that we can uniquely identify the handle
+ assert(offs <= 4);
+ }
+#endif
+ ssize_t disp = addr->gtIntCon.gtIconVal + offs;
+ if ((insType == eIT_Store) && (ireg != REG_NA))
+ {
+ // Can't use the ireg as the baseReg when we have a store instruction
+ avoidMask |= genRegMask(ireg);
+ }
+ baseReg = regSet.rsPickFreeReg(RBM_ALLINT & ~avoidMask);
+
+ avoidMask |= genRegMask(baseReg);
+ instGen_Set_Reg_To_Imm(size, baseReg, disp);
+ offs = 0;
+ }
+ else
+ {
+ unsigned cns = 0;
+
+ instGetAddrMode(addr, &baseReg, &indScale, &indReg, &cns);
+
+ /* Add the constant offset value, if present */
+
+ offs += cns;
+
+#if SCALED_ADDR_MODES
+ noway_assert((baseReg != REG_NA) || (indReg != REG_NA));
+ if (baseReg != REG_NA)
+#endif
+ {
+ avoidMask |= genRegMask(baseReg);
+ }
+
+ // I don't think this is necessary even in the non-proto-jit case, but better to be
+ // conservative here. It is only necessary to avoid using ireg if it is used as regT,
+ // in which case it will be added to avoidMask below.
+
+ if (ireg != REG_NA)
+ {
+ avoidMask |= genRegMask(ireg);
+ }
+
+ if (indReg != REG_NA)
+ {
+ avoidMask |= genRegMask(indReg);
+ }
+ }
+
+ unsigned shift = (indScale > 0) ? genLog2((unsigned)indScale) : 0;
+
+ regNumber regT = REG_NA; // the register where the address is computed into
+ regNumber regOffs = REG_NA; // a temporary register to use for the offs when it can't be directly encoded
+ regNumber regImm = REG_NA; // a temporary register to use for the imm when it can't be directly encoded
+ regNumber regVal = REG_NA; // a temporary register to use when we have to do a load/modify/store operation
+
+ // Setup regT
+ if (indReg == REG_NA)
+ {
+ regT = baseReg; // We can use the baseReg, regT is read-only
+ }
+ else // We have an index register (indReg != REG_NA)
+ {
+ // Check for special case that we can encode using one instruction
+ if ((offs == 0) && (insType != eIT_Other) && !instIsFP(ins) && baseReg != REG_NA)
+ {
+ // ins ireg, [baseReg + indReg << shift]
+ getEmitter()->emitIns_R_R_R_I(ins, size, ireg, baseReg, indReg, shift, flags, INS_OPTS_LSL);
+ return;
+ }
+
+ // Otherwise setup regT, regT is written once here
+ //
+ if (insType == eIT_Lea || (insType == eIT_Load && !instIsFP(ins)))
+ {
+ assert(ireg != REG_NA);
+ // ireg will be written, so we can take it as our temporary register
+ regT = ireg;
+ }
+ else
+ {
+ // need a new temporary reg
+ regT = regSet.rsPickFreeReg(RBM_ALLINT & ~avoidMask);
+ regTracker.rsTrackRegTrash(regT);
+ }
+
+#if SCALED_ADDR_MODES
+ if (baseReg == REG_NA)
+ {
+ assert(shift > 0);
+ // LSL regT, indReg, shift.
+ getEmitter()->emitIns_R_R_I(INS_lsl, EA_PTRSIZE, regT, indReg, shift & ((TARGET_POINTER_SIZE * 8) - 1));
+ }
+ else
+#endif // SCALED_ADDR_MODES
+ {
+ assert(baseReg != REG_NA);
+
+ // add regT, baseReg, indReg<<shift.
+ getEmitter()->emitIns_R_R_R_I(INS_add,
+ // The "add" operation will yield either a pointer or byref, depending on the
+ // type of "addr."
+ varTypeIsGC(addr->TypeGet()) ? EA_BYREF : EA_PTRSIZE, regT, baseReg, indReg,
+ shift, INS_FLAGS_NOT_SET, INS_OPTS_LSL);
+ }
+ }
+
+ // regT is the base register for a load/store or an operand for add when insType is eIT_Lea
+ //
+ assert(regT != REG_NA);
+ avoidMask |= genRegMask(regT);
+
+ if (insType != eIT_Other)
+ {
+ assert((flags != INS_FLAGS_SET) || (insType == eIT_Lea));
+ if ((insType == eIT_Lea) && (offs == 0))
+ {
+ // If we have the same register as src and dst and we do not need to set the flags
+ // then we can skip emitting the instruction
+ if ((ireg != regT) || (flags == INS_FLAGS_SET))
+ {
+ // mov ireg, regT
+ getEmitter()->emitIns_R_R(INS_mov, size, ireg, regT, flags);
+ }
+ }
+ else if (arm_Valid_Imm_For_Instr(ins, offs, flags))
+ {
+ // ins ireg, [regT + offs]
+ getEmitter()->emitIns_R_R_I(ins, size, ireg, regT, offs, flags);
+ }
+ else
+ {
+ regOffs = regSet.rsPickFreeReg(RBM_ALLINT & ~avoidMask);
+
+ // We cannot use [regT + regOffs] to load/store a floating register
+ if (emitter::isFloatReg(ireg))
+ {
+ if (arm_Valid_Imm_For_Instr(INS_add, offs, flags))
+ {
+ // add regOffs, regT, offs
+ getEmitter()->emitIns_R_R_I(INS_add, EA_4BYTE, regOffs, regT, offs, flags);
+ }
+ else
+ {
+ // movw regOffs, offs_lo16
+ // movt regOffs, offs_hi16
+ // add regOffs, regOffs, regT
+ instGen_Set_Reg_To_Imm(EA_4BYTE, regOffs, offs);
+ getEmitter()->emitIns_R_R_R(INS_add, EA_4BYTE, regOffs, regOffs, regT, flags);
+ }
+ // ins ireg, [regOffs]
+ getEmitter()->emitIns_R_R_I(ins, size, ireg, regOffs, 0, flags);
+
+ regTracker.rsTrackRegTrash(regOffs);
+ }
+ else
+ {
+ // mov regOffs, offs
+ // ins ireg, [regT + regOffs]
+ instGen_Set_Reg_To_Imm(EA_4BYTE, regOffs, offs);
+ getEmitter()->emitIns_R_R_R(ins, size, ireg, regT, regOffs, flags);
+ }
+ }
+ }
+ else // (insType == eIT_Other);
+ {
+ // Setup regVal
+ //
+
+ regVal = regSet.rsPickReg(RBM_ALLINT & ~avoidMask);
+ regTracker.rsTrackRegTrash(regVal);
+ avoidMask |= genRegMask(regVal);
+ var_types load_store_type;
+ switch (size)
+ {
+ case EA_4BYTE:
+ load_store_type = TYP_INT;
+ break;
+
+ case EA_2BYTE:
+ load_store_type = TYP_SHORT;
+ break;
+
+ case EA_1BYTE:
+ load_store_type = TYP_BYTE;
+ break;
+
+ default:
+ assert(!"Unexpected size in sched_AM, eIT_Other");
+ load_store_type = TYP_INT;
+ break;
+ }
+
+ // Load the content at addr into regVal using regT + offs
+ if (arm_Valid_Disp_For_LdSt(offs, load_store_type))
+ {
+ // ldrX regVal, [regT + offs]
+ getEmitter()->emitIns_R_R_I(ins_Load(load_store_type), size, regVal, regT, offs);
+ }
+ else
+ {
+ // mov regOffs, offs
+ // ldrX regVal, [regT + regOffs]
+ regOffs = regSet.rsPickFreeReg(RBM_ALLINT & ~avoidMask);
+ avoidMask |= genRegMask(regOffs);
+ instGen_Set_Reg_To_Imm(EA_4BYTE, regOffs, offs);
+ getEmitter()->emitIns_R_R_R(ins_Load(load_store_type), size, regVal, regT, regOffs);
+ }
+
+ if (cons)
+ {
+ if (arm_Valid_Imm_For_Instr(ins, imm, flags))
+ {
+ getEmitter()->emitIns_R_I(ins, size, regVal, imm, flags);
+ }
+ else
+ {
+ assert(regOffs == REG_NA);
+ regImm = regSet.rsPickFreeReg(RBM_ALLINT & ~avoidMask);
+ avoidMask |= genRegMask(regImm);
+ instGen_Set_Reg_To_Imm(size, regImm, imm);
+ getEmitter()->emitIns_R_R(ins, size, regVal, regImm, flags);
+ }
+ }
+ else if (rdst)
+ {
+ getEmitter()->emitIns_R_R(ins, size, ireg, regVal, flags);
+ }
+ else
+ {
+ getEmitter()->emitIns_R_R(ins, size, regVal, ireg, flags);
+ }
+
+ // If we do not have a register destination we must perform the write-back store instruction
+ // (unless we have an instruction like INS_cmp that does not write a destination)
+ //
+ if (!rdst && ins_Writes_Dest(ins))
+ {
+ // Store regVal into [addr]
+ if (regOffs == REG_NA)
+ {
+ // strX regVal, [regT + offs]
+ getEmitter()->emitIns_R_R_I(ins_Store(load_store_type), size, regVal, regT, offs);
+ }
+ else
+ {
+ // strX regVal, [regT + regOffs]
+ getEmitter()->emitIns_R_R_R(ins_Store(load_store_type), size, regVal, regT, regOffs);
+ }
+ }
+ }
+}
+
+#else // !CPU_LOAD_STORE_ARCH
+
+/*****************************************************************************
+ *
+ * This is somewhat specific to the x86 instrution format.
+ * We currently don't have an instruction scheduler enabled on any target.
+ *
+ * [Schedule] an "ins reg, [r/m]" (rdst=true), or "ins [r/m], reg" (rdst=false)
+ * instruction (the r/m operand given by a tree). We also allow instructions
+ * of the form "ins [r/m], icon", these are signalled by setting 'cons' to
+ * true.
+ */
+
+void CodeGen::sched_AM(instruction ins,
+ emitAttr size,
+ regNumber ireg,
+ bool rdst,
+ GenTreePtr addr,
+ unsigned offs,
+ bool cons,
+ int imm,
+ insFlags flags)
+{
+#ifdef _TARGET_XARCH_
+ /* Don't use this method for issuing calls. Use instEmit_xxxCall() */
+ assert(ins != INS_call);
+#endif
+
+ assert(addr);
+ assert(size != EA_UNKNOWN);
+
+ regNumber reg;
+
+ /* Has the address been conveniently loaded into a register,
+ or is it an absolute value ? */
+
+ if ((addr->gtFlags & GTF_REG_VAL) || (addr->IsCnsIntOrI()))
+ {
+ if (addr->gtFlags & GTF_REG_VAL)
+ {
+ /* The address is "[reg+offs]" */
+
+ reg = addr->gtRegNum;
+
+ if (cons)
+ getEmitter()->emitIns_I_AR(ins, size, imm, reg, offs);
+ else if (rdst)
+ getEmitter()->emitIns_R_AR(ins, size, ireg, reg, offs);
+ else
+ getEmitter()->emitIns_AR_R(ins, size, ireg, reg, offs);
+ }
+ else
+ {
+ /* The address is an absolute value */
+
+ assert(addr->IsCnsIntOrI());
+
+#ifdef RELOC_SUPPORT
+ // Do we need relocations?
+ if (compiler->opts.compReloc && addr->IsIconHandle())
+ {
+ size = EA_SET_FLG(size, EA_DSP_RELOC_FLG);
+ // offs should be smaller than ZapperModule::FixupPlaceHolder
+ // so that we can uniquely identify the handle
+ assert(offs <= 4);
+ }
+#endif
+ reg = REG_NA;
+ ssize_t disp = addr->gtIntCon.gtIconVal + offs;
+
+ // Cross our fingers and hope the codegenerator did the right
+ // thing and the constant address can be RIP-relative
+
+ if (cons)
+ getEmitter()->emitIns_I_AI(ins, size, imm, disp);
+ else if (rdst)
+ getEmitter()->emitIns_R_AI(ins, size, ireg, disp);
+ else
+ getEmitter()->emitIns_AI_R(ins, size, ireg, disp);
+ }
+
+ return;
+ }
+
+ /* Figure out what complex address mode to use */
+
+ regNumber baseReg, indReg;
+ unsigned indScale = 0, cns = 0;
+
+ instGetAddrMode(addr, &baseReg, &indScale, &indReg, &cns);
+
+ /* Add the constant offset value, if present */
+
+ offs += cns;
+
+ /* Is there an index reg operand? */
+
+ if (indReg != REG_NA)
+ {
+ /* Is the index reg operand scaled? */
+
+ if (indScale)
+ {
+ /* Is there a base address operand? */
+
+ if (baseReg != REG_NA)
+ {
+ reg = baseReg;
+
+ /* The address is "[reg + {2/4/8} * indReg + offs]" */
+
+ if (cons)
+ getEmitter()->emitIns_I_ARX(ins, size, imm, reg, indReg, indScale, offs);
+ else if (rdst)
+ getEmitter()->emitIns_R_ARX(ins, size, ireg, reg, indReg, indScale, offs);
+ else
+ getEmitter()->emitIns_ARX_R(ins, size, ireg, reg, indReg, indScale, offs);
+ }
+ else
+ {
+ /* The address is "[{2/4/8} * indReg + offs]" */
+
+ if (cons)
+ getEmitter()->emitIns_I_AX(ins, size, imm, indReg, indScale, offs);
+ else if (rdst)
+ getEmitter()->emitIns_R_AX(ins, size, ireg, indReg, indScale, offs);
+ else
+ getEmitter()->emitIns_AX_R(ins, size, ireg, indReg, indScale, offs);
+ }
+ }
+ else
+ {
+ assert(baseReg != REG_NA);
+ reg = baseReg;
+
+ /* The address is "[reg + indReg + offs]" */
+ if (cons)
+ getEmitter()->emitIns_I_ARR(ins, size, imm, reg, indReg, offs);
+ else if (rdst)
+ getEmitter()->emitIns_R_ARR(ins, size, ireg, reg, indReg, offs);
+ else
+ getEmitter()->emitIns_ARR_R(ins, size, ireg, reg, indReg, offs);
+ }
+ }
+ else
+ {
+ unsigned cpx = 0;
+ CORINFO_CLASS_HANDLE cls = 0;
+
+ /* No second operand: the address is "[reg + icon]" */
+
+ assert(baseReg != REG_NA);
+ reg = baseReg;
+
+#ifdef LATE_DISASM
+ /*
+ Keep in mind that non-static data members (GT_FIELD nodes) were
+ transformed into GT_IND nodes - we keep the CLS/CPX information
+ in the GT_CNS_INT node representing the field offset of the
+ class member
+ */
+
+ if (addr->gtOper != GT_LEA && (addr->gtOp.gtOp2->gtOper == GT_CNS_INT) &&
+ addr->gtOp.gtOp2->IsIconHandle(GTF_ICON_FIELD_HDL))
+ {
+ /* This is a field offset - set the CPX/CLS values to emit a fixup */
+
+ cpx = addr->gtOp.gtOp2->gtIntCon.gtIconFld.gtIconCPX;
+ cls = addr->gtOp.gtOp2->gtIntCon.gtIconFld.gtIconCls;
+ }
+#endif
+
+ if (cons)
+ {
+ getEmitter()->emitIns_I_AR(ins, size, imm, reg, offs, cpx, cls);
+ }
+ else if (rdst)
+ {
+ getEmitter()->emitIns_R_AR(ins, size, ireg, reg, offs, cpx, cls);
+ }
+ else
+ {
+ getEmitter()->emitIns_AR_R(ins, size, ireg, reg, offs, cpx, cls);
+ }
+ }
+}
+
+#endif // !CPU_LOAD_STORE_ARCH
+#endif // LEGACY_BACKEND
+
+/*****************************************************************************
+ *
+ * Emit a "call [r/m]" instruction (the r/m operand given by a tree).
+ */
+
+void CodeGen::instEmit_indCall(GenTreePtr call,
+ size_t argSize,
+ emitAttr retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize))
+{
+ GenTreePtr addr;
+
+ emitter::EmitCallType emitCallType;
+
+ regNumber brg = REG_NA;
+ regNumber xrg = REG_NA;
+ unsigned mul = 0;
+ unsigned cns = 0;
+
+ CORINFO_SIG_INFO* sigInfo = nullptr;
+
+ assert(call->gtOper == GT_CALL);
+
+ /* Get hold of the function address */
+
+ assert(call->gtCall.gtCallType == CT_INDIRECT);
+ addr = call->gtCall.gtCallAddr;
+ assert(addr);
+
+#ifdef DEBUG
+ // Pass the call signature information from the GenTree node so the emitter can associate
+ // native call sites with the signatures they were generated from.
+ sigInfo = call->gtCall.callSig;
+#endif // DEBUG
+
+#if CPU_LOAD_STORE_ARCH
+
+ emitCallType = emitter::EC_INDIR_R;
+
+ if (!addr->OperIsIndir())
+ {
+ if (!(addr->gtFlags & GTF_REG_VAL) && (addr->OperGet() == GT_CNS_INT))
+ {
+ ssize_t funcPtr = addr->gtIntCon.gtIconVal;
+
+ getEmitter()->emitIns_Call(emitter::EC_FUNC_ADDR,
+ NULL, // methHnd
+ INDEBUG_LDISASM_COMMA(sigInfo)(void*) funcPtr, argSize,
+ retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize),
+ gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur);
+ return;
+ }
+ }
+ else
+ {
+ /* Get hold of the address of the function pointer */
+
+ addr = addr->gtOp.gtOp1;
+ }
+
+ if (addr->gtFlags & GTF_REG_VAL)
+ {
+ /* The address is "reg" */
+
+ brg = addr->gtRegNum;
+ }
+ else
+ {
+ // Force the address into a register
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef LEGACY_BACKEND
+ genCodeForTree(addr, RBM_NONE);
+#endif // LEGACY_BACKEND
+ assert(addr->gtFlags & GTF_REG_VAL);
+ brg = addr->gtRegNum;
+ }
+
+#else // CPU_LOAD_STORE_ARCH
+
+ /* Is there an indirection? */
+
+ if (!addr->OperIsIndir())
+ {
+ if (addr->gtFlags & GTF_REG_VAL)
+ {
+ emitCallType = emitter::EC_INDIR_R;
+ brg = addr->gtRegNum;
+ }
+ else
+ {
+ if (addr->OperGet() != GT_CNS_INT)
+ {
+ assert(addr->OperGet() == GT_LCL_VAR);
+
+ emitCallType = emitter::EC_INDIR_SR;
+ cns = addr->gtLclVarCommon.gtLclNum;
+ }
+ else
+ {
+ ssize_t funcPtr = addr->gtIntCon.gtIconVal;
+
+ getEmitter()->emitIns_Call(emitter::EC_FUNC_ADDR,
+ nullptr, // methHnd
+ INDEBUG_LDISASM_COMMA(sigInfo)(void*) funcPtr, argSize,
+ retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize),
+ gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur);
+ return;
+ }
+ }
+ }
+ else
+ {
+ /* This is an indirect call */
+
+ emitCallType = emitter::EC_INDIR_ARD;
+
+ /* Get hold of the address of the function pointer */
+
+ addr = addr->gtOp.gtOp1;
+
+ /* Has the address been conveniently loaded into a register? */
+
+ if (addr->gtFlags & GTF_REG_VAL)
+ {
+ /* The address is "reg" */
+
+ brg = addr->gtRegNum;
+ }
+ else
+ {
+ bool rev = false;
+
+ GenTreePtr rv1 = nullptr;
+ GenTreePtr rv2 = nullptr;
+
+ /* Figure out what complex address mode to use */
+
+ INDEBUG(bool yes =)
+ genCreateAddrMode(addr, -1, true, RBM_NONE, &rev, &rv1, &rv2, &mul, &cns);
+
+ INDEBUG(PREFIX_ASSUME(yes)); // since we have called genMakeAddressable() on call->gtCall.gtCallAddr
+
+ /* Get the additional operands if any */
+
+ if (rv1)
+ {
+ assert(rv1->gtFlags & GTF_REG_VAL);
+ brg = rv1->gtRegNum;
+ }
+
+ if (rv2)
+ {
+ assert(rv2->gtFlags & GTF_REG_VAL);
+ xrg = rv2->gtRegNum;
+ }
+ }
+ }
+
+ assert(emitCallType == emitter::EC_INDIR_R || emitCallType == emitter::EC_INDIR_SR ||
+ emitCallType == emitter::EC_INDIR_C || emitCallType == emitter::EC_INDIR_ARD);
+
+#endif // CPU_LOAD_STORE_ARCH
+
+ getEmitter()->emitIns_Call(emitCallType,
+ nullptr, // methHnd
+ INDEBUG_LDISASM_COMMA(sigInfo) nullptr, // addr
+ argSize, retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize),
+ gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur,
+ BAD_IL_OFFSET, // ilOffset
+ brg, xrg, mul,
+ cns); // addressing mode values
+}
+
+#ifdef LEGACY_BACKEND
+/*****************************************************************************
+ *
+ * Emit an "op [r/m]" instruction (the r/m operand given by a tree).
+ */
+
+void CodeGen::instEmit_RM(instruction ins, GenTreePtr tree, GenTreePtr addr, unsigned offs)
+{
+ emitAttr size;
+
+ if (!instIsFP(ins))
+ size = emitTypeSize(tree->TypeGet());
+ else
+ size = EA_ATTR(genTypeSize(tree->TypeGet()));
+
+ sched_AM(ins, size, REG_NA, false, addr, offs);
+}
+
+/*****************************************************************************
+ *
+ * Emit an "op [r/m], reg" instruction (the r/m operand given by a tree).
+ */
+
+void CodeGen::instEmit_RM_RV(instruction ins, emitAttr size, GenTreePtr tree, regNumber reg, unsigned offs)
+{
+#ifdef _TARGET_XARCH_
+ assert(instIsFP(ins) == 0);
+#endif
+ sched_AM(ins, size, reg, false, tree, offs);
+}
+#endif // LEGACY_BACKEND
+
+/*****************************************************************************
+ *
+ * Generate an instruction that has one operand given by a tree (which has
+ * been made addressable).
+ */
+
+void CodeGen::inst_TT(instruction ins, GenTreePtr tree, unsigned offs, int shfv, emitAttr size)
+{
+ bool sizeInferred = false;
+
+ if (size == EA_UNKNOWN)
+ {
+ sizeInferred = true;
+ if (instIsFP(ins))
+ {
+ size = EA_ATTR(genTypeSize(tree->TypeGet()));
+ }
+ else
+ {
+ size = emitTypeSize(tree->TypeGet());
+ }
+ }
+
+AGAIN:
+
+ /* Is the value sitting in a register? */
+
+ if (tree->gtFlags & GTF_REG_VAL)
+ {
+ regNumber reg;
+
+#ifndef _TARGET_64BIT_
+#ifdef LEGACY_BACKEND
+ LONGREG_TT:
+#endif // LEGACY_BACKEND
+#endif
+
+#if FEATURE_STACK_FP_X87
+
+ /* Is this a floating-point instruction? */
+
+ if (isFloatRegType(tree->gtType))
+ {
+ reg = tree->gtRegNum;
+
+ assert(instIsFP(ins) && ins != INS_fst && ins != INS_fstp);
+ assert(shfv == 0);
+
+ inst_FS(ins, reg + genGetFPstkLevel());
+ return;
+ }
+#endif // FEATURE_STACK_FP_X87
+
+ assert(!instIsFP(ins));
+
+#if CPU_LONG_USES_REGPAIR
+ if (tree->gtType == TYP_LONG)
+ {
+ if (offs)
+ {
+ assert(offs == sizeof(int));
+ reg = genRegPairHi(tree->gtRegPair);
+ }
+ else
+ {
+ reg = genRegPairLo(tree->gtRegPair);
+ }
+ }
+ else
+#endif // CPU_LONG_USES_REGPAIR
+ {
+ reg = tree->gtRegNum;
+ }
+
+ /* Make sure it is not the "stack-half" of an enregistered long */
+
+ if (reg != REG_STK)
+ {
+ // For short types, indicate that the value is promoted to 4 bytes.
+ // For longs, we are only emitting half of it so again set it to 4 bytes.
+ // but leave the GC tracking information alone
+ if (sizeInferred && EA_SIZE(size) < EA_4BYTE)
+ {
+ size = EA_SET_SIZE(size, 4);
+ }
+
+ if (shfv)
+ {
+ getEmitter()->emitIns_R_I(ins, size, reg, shfv);
+ }
+ else
+ {
+ inst_RV(ins, reg, tree->TypeGet(), size);
+ }
+
+ return;
+ }
+ }
+
+ /* Is this a spilled value? */
+
+ if (tree->gtFlags & GTF_SPILLED)
+ {
+ assert(!"ISSUE: If this can happen, we need to generate 'ins [ebp+spill]'");
+ }
+
+ switch (tree->gtOper)
+ {
+ unsigned varNum;
+
+ case GT_LCL_VAR:
+
+#ifdef LEGACY_BACKEND
+ /* Is this an enregistered long ? */
+
+ if (tree->gtType == TYP_LONG && !(tree->gtFlags & GTF_REG_VAL))
+ {
+ /* Avoid infinite loop */
+
+ if (genMarkLclVar(tree))
+ goto LONGREG_TT;
+ }
+#endif // LEGACY_BACKEND
+
+ inst_set_SV_var(tree);
+ goto LCL;
+
+ case GT_LCL_FLD:
+
+ offs += tree->gtLclFld.gtLclOffs;
+ goto LCL;
+
+ LCL:
+ varNum = tree->gtLclVarCommon.gtLclNum;
+ assert(varNum < compiler->lvaCount);
+
+ if (shfv)
+ {
+ getEmitter()->emitIns_S_I(ins, size, varNum, offs, shfv);
+ }
+ else
+ {
+ getEmitter()->emitIns_S(ins, size, varNum, offs);
+ }
+
+ return;
+
+ case GT_CLS_VAR:
+ // Make sure FP instruction size matches the operand size
+ // (We optimized constant doubles to floats when we can, just want to
+ // make sure that we don't mistakenly use 8 bytes when the
+ // constant.
+ assert(!isFloatRegType(tree->gtType) || genTypeSize(tree->gtType) == EA_SIZE_IN_BYTES(size));
+
+ if (shfv)
+ {
+ getEmitter()->emitIns_C_I(ins, size, tree->gtClsVar.gtClsVarHnd, offs, shfv);
+ }
+ else
+ {
+ getEmitter()->emitIns_C(ins, size, tree->gtClsVar.gtClsVarHnd, offs);
+ }
+ return;
+
+ case GT_IND:
+ case GT_NULLCHECK:
+ case GT_ARR_ELEM:
+ {
+#ifndef LEGACY_BACKEND
+ assert(!"inst_TT not supported for GT_IND, GT_NULLCHECK or GT_ARR_ELEM in !LEGACY_BACKEND");
+#else // LEGACY_BACKEND
+ GenTreePtr addr = tree->OperIsIndir() ? tree->gtOp.gtOp1 : tree;
+ if (shfv)
+ sched_AM(ins, size, REG_NA, false, addr, offs, true, shfv);
+ else
+ instEmit_RM(ins, tree, addr, offs);
+#endif // LEGACY_BACKEND
+ }
+ break;
+
+#ifdef _TARGET_X86_
+ case GT_CNS_INT:
+ // We will get here for GT_MKREFANY from CodeGen::genPushArgList
+ assert(offs == 0);
+ assert(!shfv);
+ if (tree->IsIconHandle())
+ inst_IV_handle(ins, tree->gtIntCon.gtIconVal);
+ else
+ inst_IV(ins, tree->gtIntCon.gtIconVal);
+ break;
+#endif
+
+ case GT_COMMA:
+ // tree->gtOp.gtOp1 - already processed by genCreateAddrMode()
+ tree = tree->gtOp.gtOp2;
+ goto AGAIN;
+
+ default:
+ assert(!"invalid address");
+ }
+}
+
+/*****************************************************************************
+ *
+ * Generate an instruction that has one operand given by a tree (which has
+ * been made addressable) and another that is a register.
+ */
+
+void CodeGen::inst_TT_RV(instruction ins, GenTreePtr tree, regNumber reg, unsigned offs, emitAttr size, insFlags flags)
+{
+ assert(reg != REG_STK);
+
+AGAIN:
+
+ /* Is the value sitting in a register? */
+
+ if (tree->gtFlags & GTF_REG_VAL)
+ {
+ regNumber rg2;
+
+#ifdef _TARGET_64BIT_
+ assert(!instIsFP(ins));
+
+ rg2 = tree->gtRegNum;
+
+ assert(offs == 0);
+ assert(rg2 != REG_STK);
+
+ if (ins != INS_mov || rg2 != reg)
+ {
+ inst_RV_RV(ins, rg2, reg, tree->TypeGet());
+ }
+ return;
+
+#else // !_TARGET_64BIT_
+
+#ifdef LEGACY_BACKEND
+ LONGREG_TT_RV:
+#endif // LEGACY_BACKEND
+
+#ifdef _TARGET_XARCH_
+ assert(!instIsFP(ins));
+#endif
+
+#if CPU_LONG_USES_REGPAIR
+ if (tree->gtType == TYP_LONG)
+ {
+ if (offs)
+ {
+ assert(offs == sizeof(int));
+ rg2 = genRegPairHi(tree->gtRegPair);
+ }
+ else
+ {
+ rg2 = genRegPairLo(tree->gtRegPair);
+ }
+ }
+ else
+#endif // CPU_LONG_USES_REGPAIR
+ {
+ rg2 = tree->gtRegNum;
+ }
+
+ if (rg2 != REG_STK)
+ {
+ if (ins != INS_mov || rg2 != reg)
+ inst_RV_RV(ins, rg2, reg, tree->TypeGet(), size, flags);
+ return;
+ }
+
+#endif // _TARGET_64BIT_
+ }
+
+ /* Is this a spilled value? */
+
+ if (tree->gtFlags & GTF_SPILLED)
+ {
+ assert(!"ISSUE: If this can happen, we need to generate 'ins [ebp+spill]'");
+ }
+
+ if (size == EA_UNKNOWN)
+ {
+ if (instIsFP(ins))
+ {
+ size = EA_ATTR(genTypeSize(tree->TypeGet()));
+ }
+ else
+ {
+ size = emitTypeSize(tree->TypeGet());
+ }
+ }
+
+ switch (tree->gtOper)
+ {
+ unsigned varNum;
+
+ case GT_LCL_VAR:
+
+#ifdef LEGACY_BACKEND
+ if (tree->gtType == TYP_LONG && !(tree->gtFlags & GTF_REG_VAL))
+ {
+ /* Avoid infinite loop */
+
+ if (genMarkLclVar(tree))
+ goto LONGREG_TT_RV;
+ }
+#endif // LEGACY_BACKEND
+
+ inst_set_SV_var(tree);
+ goto LCL;
+
+ case GT_LCL_FLD:
+ case GT_STORE_LCL_FLD:
+ offs += tree->gtLclFld.gtLclOffs;
+ goto LCL;
+
+ LCL:
+
+ varNum = tree->gtLclVarCommon.gtLclNum;
+ assert(varNum < compiler->lvaCount);
+
+#if CPU_LOAD_STORE_ARCH
+ if (!getEmitter()->emitInsIsStore(ins))
+ {
+#ifndef LEGACY_BACKEND
+ // TODO-LdStArch-Bug: Should regTmp be a dst on the node or an internal reg?
+ // Either way, it is not currently being handled by Lowering.
+ regNumber regTmp = tree->gtRegNum;
+ assert(regTmp != REG_NA);
+#else // LEGACY_BACKEND
+ regNumber regTmp = regSet.rsPickFreeReg(RBM_ALLINT & ~genRegMask(reg));
+#endif // LEGACY_BACKEND
+ getEmitter()->emitIns_R_S(ins_Load(tree->TypeGet()), size, regTmp, varNum, offs);
+ getEmitter()->emitIns_R_R(ins, size, regTmp, reg, flags);
+ getEmitter()->emitIns_S_R(ins_Store(tree->TypeGet()), size, regTmp, varNum, offs);
+
+ regTracker.rsTrackRegTrash(regTmp);
+ }
+ else
+#endif
+ {
+ // ins is a Store instruction
+ //
+ getEmitter()->emitIns_S_R(ins, size, reg, varNum, offs);
+#ifdef _TARGET_ARM_
+ // If we need to set the flags then add an extra movs reg,reg instruction
+ if (flags == INS_FLAGS_SET)
+ getEmitter()->emitIns_R_R(INS_mov, size, reg, reg, INS_FLAGS_SET);
+#endif
+ }
+ return;
+
+ case GT_CLS_VAR:
+ // Make sure FP instruction size matches the operand size
+ // (We optimized constant doubles to floats when we can, just want to
+ // make sure that we don't mistakenly use 8 bytes when the
+ // constant).
+ assert(!isFloatRegType(tree->gtType) || genTypeSize(tree->gtType) == EA_SIZE_IN_BYTES(size));
+
+#if CPU_LOAD_STORE_ARCH
+ if (!getEmitter()->emitInsIsStore(ins))
+ {
+#ifndef LEGACY_BACKEND
+ NYI("Store of GT_CLS_VAR not supported for ARM RyuJIT Backend");
+#else // LEGACY_BACKEND
+ regNumber regTmpAddr = regSet.rsPickFreeReg(RBM_ALLINT & ~genRegMask(reg));
+ regNumber regTmpArith = regSet.rsPickFreeReg(RBM_ALLINT & ~genRegMask(reg) & ~genRegMask(regTmpAddr));
+
+ getEmitter()->emitIns_R_C(INS_lea, EA_PTRSIZE, regTmpAddr, tree->gtClsVar.gtClsVarHnd, offs);
+ getEmitter()->emitIns_R_R(ins_Load(tree->TypeGet()), size, regTmpArith, regTmpAddr);
+ getEmitter()->emitIns_R_R(ins, size, regTmpArith, reg, flags);
+ getEmitter()->emitIns_R_R(ins_Store(tree->TypeGet()), size, regTmpArith, regTmpAddr);
+
+ regTracker.rsTrackRegTrash(regTmpAddr);
+ regTracker.rsTrackRegTrash(regTmpArith);
+#endif // LEGACY_BACKEND
+ }
+ else
+#endif // CPU_LOAD_STORE_ARCH
+ {
+ getEmitter()->emitIns_C_R(ins, size, tree->gtClsVar.gtClsVarHnd, reg, offs);
+ }
+ return;
+
+ case GT_IND:
+ case GT_NULLCHECK:
+ case GT_ARR_ELEM:
+ {
+#ifndef LEGACY_BACKEND
+ assert(!"inst_TT_RV not supported for GT_IND, GT_NULLCHECK or GT_ARR_ELEM in RyuJIT Backend");
+#else // LEGACY_BACKEND
+ GenTreePtr addr = tree->OperIsIndir() ? tree->gtOp.gtOp1 : tree;
+ sched_AM(ins, size, reg, false, addr, offs, false, 0, flags);
+#endif // LEGACY_BACKEND
+ }
+ break;
+
+ case GT_COMMA:
+ // tree->gtOp.gtOp1 - already processed by genCreateAddrMode()
+ tree = tree->gtOp.gtOp2;
+ goto AGAIN;
+
+ default:
+ assert(!"invalid address");
+ }
+}
+
+regNumber CodeGen::genGetZeroRegister()
+{
+ regNumber zeroReg = REG_NA;
+
+#if REDUNDANT_LOAD
+
+ // Is the constant already in some register?
+
+ zeroReg = regTracker.rsIconIsInReg(0);
+#endif
+
+#ifdef LEGACY_BACKEND
+ if (zeroReg == REG_NA)
+ {
+ regMaskTP freeMask = regSet.rsRegMaskFree();
+
+ if ((freeMask != 0) && (compiler->compCodeOpt() != Compiler::FAST_CODE))
+ {
+ // For SMALL_CODE and BLENDED_CODE,
+ // we try to generate:
+ //
+ // xor reg, reg
+ // mov dest, reg
+ //
+ // When selecting a register to xor we try to avoid REG_TMP_0
+ // when we have another CALLEE_TRASH register available.
+ // This will often let us reuse the zeroed register in
+ // several back-to-back assignments
+ //
+ if ((freeMask & RBM_CALLEE_TRASH) != RBM_TMP_0)
+ freeMask &= ~RBM_TMP_0;
+ zeroReg = regSet.rsGrabReg(freeMask); // PickReg in stress will pick 'random' registers
+ // We want one in the freeMask set, so just use GrabReg
+ genSetRegToIcon(zeroReg, 0, TYP_INT);
+ }
+ }
+#endif // !LEGACY_BACKEND
+
+ return zeroReg;
+}
+
+/*****************************************************************************
+ *
+ * Generate an instruction that has one operand given by a tree (which has
+ * been made addressable) and another that is an integer constant.
+ */
+#ifdef LEGACY_BACKEND
+void CodeGen::inst_TT_IV(instruction ins, GenTreePtr tree, ssize_t val, unsigned offs, emitAttr size, insFlags flags)
+{
+ bool sizeInferred = false;
+
+ if (size == EA_UNKNOWN)
+ {
+ sizeInferred = true;
+ if (instIsFP(ins))
+ size = EA_ATTR(genTypeSize(tree->TypeGet()));
+ else
+ size = emitTypeSize(tree->TypeGet());
+ }
+
+AGAIN:
+
+ /* Is the value sitting in a register? */
+
+ if (tree->gtFlags & GTF_REG_VAL)
+ {
+#ifndef _TARGET_64BIT_
+ LONGREG_TT_IV:
+#endif
+ regNumber reg;
+
+ assert(instIsFP(ins) == 0);
+
+#if CPU_LONG_USES_REGPAIR
+ if (tree->gtType == TYP_LONG)
+ {
+ if (offs == 0)
+ {
+ reg = genRegPairLo(tree->gtRegPair);
+ }
+ else // offs == 4
+ {
+ assert(offs == sizeof(int));
+ reg = genRegPairHi(tree->gtRegPair);
+ }
+#if CPU_LOAD_STORE_ARCH
+ if (reg == REG_STK && !getEmitter()->emitInsIsLoadOrStore(ins))
+ {
+ reg = regSet.rsPickFreeReg();
+ inst_RV_TT(INS_mov, reg, tree, offs, EA_4BYTE, flags);
+ regTracker.rsTrackRegTrash(reg);
+ }
+#endif
+ }
+ else
+#endif // CPU_LONG_USES_REGPAIR
+ {
+ reg = tree->gtRegNum;
+ }
+
+ if (reg != REG_STK)
+ {
+ // We always widen as part of enregistering,
+ // so a smaller tree in a register can be
+ // treated as 4 bytes
+ if (sizeInferred && (size < EA_4BYTE))
+ {
+ size = EA_SET_SIZE(size, EA_4BYTE);
+ }
+
+ if ((ins == INS_mov) && !EA_IS_CNS_RELOC(size))
+ {
+ genSetRegToIcon(reg, val, tree->TypeGet(), flags);
+ }
+ else
+ {
+#if defined(_TARGET_XARCH_)
+ inst_RV_IV(ins, reg, val, size);
+#elif defined(_TARGET_ARM_)
+ if (!EA_IS_CNS_RELOC(size) && arm_Valid_Imm_For_Instr(ins, val, flags))
+ {
+ getEmitter()->emitIns_R_I(ins, size, reg, val, flags);
+ }
+ else // We need a scratch register
+ {
+ // Load imm into a register
+ regMaskTP usedMask;
+ if (tree->gtType == TYP_LONG)
+ {
+ usedMask = genRegPairMask(tree->gtRegPair);
+#if CPU_LOAD_STORE_ARCH
+ // In gtRegPair, this part of the long may have been on the stack
+ // in which case, the code above would have loaded it into 'reg'
+ // and so we need to also include 'reg' in the set of registers
+ // that are already in use.
+ usedMask |= genRegMask(reg);
+#endif // CPU_LOAD_STORE_ARCH
+ }
+ else
+ {
+ usedMask = genRegMask(tree->gtRegNum);
+ }
+ regNumber immReg = regSet.rsGrabReg(RBM_ALLINT & ~usedMask);
+ noway_assert(reg != immReg);
+ instGen_Set_Reg_To_Imm(size, immReg, val);
+ if (getEmitter()->emitInsIsStore(ins))
+ ins = INS_mov;
+ getEmitter()->emitIns_R_R(ins, size, reg, immReg, flags);
+ }
+#else
+ NYI("inst_TT_IV - unknown target");
+#endif
+ }
+ return;
+ }
+ }
+
+#ifdef _TARGET_XARCH_
+ /* Are we storing a zero? */
+
+ if ((ins == INS_mov) && (val == 0) &&
+ ((genTypeSize(tree->gtType) == sizeof(int)) || (genTypeSize(tree->gtType) == REGSIZE_BYTES)))
+ {
+ regNumber zeroReg;
+
+ zeroReg = genGetZeroRegister();
+
+ if (zeroReg != REG_NA)
+ {
+ inst_TT_RV(INS_mov, tree, zeroReg, offs);
+ return;
+ }
+ }
+#endif
+
+#if CPU_LOAD_STORE_ARCH
+ /* Are we storing/comparing with a constant? */
+
+ if (getEmitter()->emitInsIsStore(ins) || getEmitter()->emitInsIsCompare(ins))
+ {
+ // Load val into a register
+
+ regNumber valReg;
+ valReg = regSet.rsGrabReg(RBM_ALLINT);
+ instGen_Set_Reg_To_Imm(EA_PTRSIZE, valReg, val);
+ inst_TT_RV(ins, tree, valReg, offs, size, flags);
+ return;
+ }
+ else if (ins == INS_mov)
+ {
+ assert(!"Please call ins_Store(type) to get the store instruction");
+ }
+ assert(!getEmitter()->emitInsIsLoad(ins));
+#endif // CPU_LOAD_STORE_ARCH
+
+ /* Is this a spilled value? */
+
+ if (tree->gtFlags & GTF_SPILLED)
+ {
+ assert(!"ISSUE: If this can happen, we need to generate 'ins [ebp+spill], icon'");
+ }
+
+#ifdef _TARGET_AMD64_
+ if ((EA_SIZE(size) == EA_8BYTE) && (((int)val != (ssize_t)val) || EA_IS_CNS_RELOC(size)))
+ {
+ // Load imm into a register
+ regNumber immReg = regSet.rsGrabReg(RBM_ALLINT);
+ instGen_Set_Reg_To_Imm(size, immReg, val);
+ inst_TT_RV(ins, tree, immReg, offs);
+ return;
+ }
+#endif // _TARGET_AMD64_
+
+ int ival = (int)val;
+
+ switch (tree->gtOper)
+ {
+ unsigned varNum;
+ LclVarDsc* varDsc;
+
+ case GT_LCL_FLD:
+
+ varNum = tree->gtLclVarCommon.gtLclNum;
+ assert(varNum < compiler->lvaCount);
+ offs += tree->gtLclFld.gtLclOffs;
+
+ goto LCL;
+
+ case GT_LCL_VAR:
+
+#ifndef _TARGET_64BIT_
+ /* Is this an enregistered long ? */
+
+ if (tree->gtType == TYP_LONG && !(tree->gtFlags & GTF_REG_VAL))
+ {
+ /* Avoid infinite loop */
+
+ if (genMarkLclVar(tree))
+ goto LONGREG_TT_IV;
+ }
+#endif // !_TARGET_64BIT_
+
+ inst_set_SV_var(tree);
+
+ varNum = tree->gtLclVarCommon.gtLclNum;
+ assert(varNum < compiler->lvaCount);
+ varDsc = &compiler->lvaTable[varNum];
+
+ // Fix the immediate by sign extending if needed
+ if (size < EA_4BYTE && !varTypeIsUnsigned(varDsc->TypeGet()))
+ {
+ if (size == EA_1BYTE)
+ {
+ if ((ival & 0x7f) != ival)
+ ival = ival | 0xffffff00;
+ }
+ else
+ {
+ assert(size == EA_2BYTE);
+ if ((ival & 0x7fff) != ival)
+ ival = ival | 0xffff0000;
+ }
+ }
+
+ // A local stack slot is at least 4 bytes in size, regardles of
+ // what the local var is typed as, so auto-promote it here
+ // unless the codegenerator told us a size, or it is a field
+ // of a promoted struct
+ if (sizeInferred && (size < EA_4BYTE) && !varDsc->lvIsStructField)
+ {
+ size = EA_SET_SIZE(size, EA_4BYTE);
+ }
+
+ LCL:
+
+ /* Integer instructions never operate on more than EA_PTRSIZE */
+
+ assert(instIsFP(ins) == false);
+
+#if CPU_LOAD_STORE_ARCH
+ if (!getEmitter()->emitInsIsStore(ins))
+ {
+ regNumber regTmp = regSet.rsPickFreeReg(RBM_ALLINT);
+ getEmitter()->emitIns_R_S(ins_Load(tree->TypeGet()), size, regTmp, varNum, offs);
+ regTracker.rsTrackRegTrash(regTmp);
+
+ if (arm_Valid_Imm_For_Instr(ins, val, flags))
+ {
+ getEmitter()->emitIns_R_I(ins, size, regTmp, ival, flags);
+ }
+ else // We need a scratch register
+ {
+ // Load imm into a register
+ regNumber regImm = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(regTmp));
+
+ instGen_Set_Reg_To_Imm(size, regImm, val);
+ getEmitter()->emitIns_R_R(ins, size, regTmp, regImm, flags);
+ }
+ getEmitter()->emitIns_S_R(ins_Store(tree->TypeGet()), size, regTmp, varNum, offs);
+ }
+ else
+#endif
+ {
+ getEmitter()->emitIns_S_I(ins, size, varNum, offs, ival);
+ }
+ return;
+
+ case GT_CLS_VAR:
+ // Make sure FP instruction size matches the operand size
+ // (We optimize constant doubles to floats when we can)
+ // We just want to make sure that we don't mistakenly
+ // use 8 bytes when the constant is smaller.
+ //
+ assert(!isFloatRegType(tree->gtType) || genTypeSize(tree->gtType) == EA_SIZE_IN_BYTES(size));
+
+#if CPU_LOAD_STORE_ARCH
+ regNumber regTmpAddr;
+ regTmpAddr = regSet.rsPickFreeReg(RBM_ALLINT);
+
+ getEmitter()->emitIns_R_C(INS_lea, EA_PTRSIZE, regTmpAddr, tree->gtClsVar.gtClsVarHnd, offs);
+ regTracker.rsTrackRegTrash(regTmpAddr);
+
+ if (!getEmitter()->emitInsIsStore(ins))
+ {
+ regNumber regTmpArith = regSet.rsPickFreeReg(RBM_ALLINT & ~genRegMask(regTmpAddr));
+
+ getEmitter()->emitIns_R_R(ins_Load(tree->TypeGet()), size, regTmpArith, regTmpAddr);
+
+ if (arm_Valid_Imm_For_Instr(ins, ival, flags))
+ {
+ getEmitter()->emitIns_R_R_I(ins, size, regTmpArith, regTmpArith, ival, flags);
+ }
+ else
+ {
+ regNumber regTmpImm =
+ regSet.rsPickFreeReg(RBM_ALLINT & ~genRegMask(regTmpAddr) & ~genRegMask(regTmpArith));
+ instGen_Set_Reg_To_Imm(EA_4BYTE, regTmpImm, (ssize_t)ival);
+ getEmitter()->emitIns_R_R(ins, size, regTmpArith, regTmpImm, flags);
+ }
+ regTracker.rsTrackRegTrash(regTmpArith);
+
+ getEmitter()->emitIns_R_R(ins_Store(tree->TypeGet()), size, regTmpArith, regTmpAddr);
+ }
+ else
+ {
+ regNumber regTmpImm = regSet.rsPickFreeReg(RBM_ALLINT & ~genRegMask(regTmpAddr));
+
+ instGen_Set_Reg_To_Imm(EA_4BYTE, regTmpImm, (ssize_t)ival, flags);
+ getEmitter()->emitIns_R_R(ins_Store(tree->TypeGet()), size, regTmpImm, regTmpAddr);
+ }
+#else // !CPU_LOAD_STORE_ARCH
+ getEmitter()->emitIns_C_I(ins, size, tree->gtClsVar.gtClsVarHnd, offs, ival);
+#endif
+ return;
+
+ case GT_IND:
+ case GT_NULLCHECK:
+ case GT_ARR_ELEM:
+ {
+ GenTreePtr addr = tree->OperIsIndir() ? tree->gtOp.gtOp1 : tree;
+ sched_AM(ins, size, REG_NA, false, addr, offs, true, ival, flags);
+ }
+ return;
+
+ case GT_COMMA:
+ // tree->gtOp.gtOp1 - already processed by genCreateAddrMode()
+ tree = tree->gtOp.gtOp2;
+ goto AGAIN;
+
+ default:
+ assert(!"invalid address");
+ }
+}
+#endif // LEGACY_BACKEND
+
+#ifdef LEGACY_BACKEND
+/*****************************************************************************
+ *
+ * Generate an instruction that has one operand given by a register and the
+ * other one by an indirection tree (which has been made addressable).
+ */
+
+void CodeGen::inst_RV_AT(
+ instruction ins, emitAttr size, var_types type, regNumber reg, GenTreePtr tree, unsigned offs, insFlags flags)
+{
+#ifdef _TARGET_XARCH_
+#ifdef DEBUG
+ // If it is a GC type and the result is not, then either
+ // 1) it is an LEA
+ // 2) optOptimizeBools() optimized if (ref != 0 && ref != 0) to if (ref & ref)
+ // 3) optOptimizeBools() optimized if (ref == 0 || ref == 0) to if (ref | ref)
+ // 4) byref - byref = int
+ if (type == TYP_REF && !EA_IS_GCREF(size))
+ assert((EA_IS_BYREF(size) && ins == INS_add) || (ins == INS_lea || ins == INS_and || ins == INS_or));
+ if (type == TYP_BYREF && !EA_IS_BYREF(size))
+ assert(ins == INS_lea || ins == INS_and || ins == INS_or || ins == INS_sub);
+ assert(!instIsFP(ins));
+#endif
+#endif
+
+ // Integer instructions never operate on more than EA_PTRSIZE.
+ if (EA_SIZE(size) > EA_PTRSIZE && !instIsFP(ins))
+ EA_SET_SIZE(size, EA_PTRSIZE);
+
+ GenTreePtr addr = tree;
+ sched_AM(ins, size, reg, true, addr, offs, false, 0, flags);
+}
+
+/*****************************************************************************
+ *
+ * Generate an instruction that has one operand given by an indirection tree
+ * (which has been made addressable) and an integer constant.
+ */
+
+void CodeGen::inst_AT_IV(instruction ins, emitAttr size, GenTreePtr baseTree, int icon, unsigned offs)
+{
+ sched_AM(ins, size, REG_NA, false, baseTree, offs, true, icon);
+}
+#endif // LEGACY_BACKEND
+
+/*****************************************************************************
+ *
+ * Generate an instruction that has one operand given by a register and the
+ * other one by a tree (which has been made addressable).
+ */
+
+void CodeGen::inst_RV_TT(instruction ins,
+ regNumber reg,
+ GenTreePtr tree,
+ unsigned offs,
+ emitAttr size,
+ insFlags flags /* = INS_FLAGS_DONT_CARE */)
+{
+ assert(reg != REG_STK);
+
+ if (size == EA_UNKNOWN)
+ {
+ if (!instIsFP(ins))
+ {
+ size = emitTypeSize(tree->TypeGet());
+ }
+ else
+ {
+ size = EA_ATTR(genTypeSize(tree->TypeGet()));
+ }
+ }
+
+#ifdef _TARGET_XARCH_
+#ifdef DEBUG
+ // If it is a GC type and the result is not, then either
+ // 1) it is an LEA
+ // 2) optOptimizeBools() optimized if (ref != 0 && ref != 0) to if (ref & ref)
+ // 3) optOptimizeBools() optimized if (ref == 0 || ref == 0) to if (ref | ref)
+ // 4) byref - byref = int
+ if (tree->gtType == TYP_REF && !EA_IS_GCREF(size))
+ {
+ assert((EA_IS_BYREF(size) && ins == INS_add) || (ins == INS_lea || ins == INS_and || ins == INS_or));
+ }
+ if (tree->gtType == TYP_BYREF && !EA_IS_BYREF(size))
+ {
+ assert(ins == INS_lea || ins == INS_and || ins == INS_or || ins == INS_sub);
+ }
+#endif
+#endif
+
+#if CPU_LOAD_STORE_ARCH
+ if (ins == INS_mov)
+ {
+#if defined(_TARGET_ARM_)
+ if (tree->TypeGet() != TYP_LONG)
+ {
+ ins = ins_Move_Extend(tree->TypeGet(), (tree->gtFlags & GTF_REG_VAL) != 0);
+ }
+ else if (offs == 0)
+ {
+ ins = ins_Move_Extend(TYP_INT,
+ (tree->gtFlags & GTF_REG_VAL) != 0 && genRegPairLo(tree->gtRegPair) != REG_STK);
+ }
+ else
+ {
+ ins = ins_Move_Extend(TYP_INT,
+ (tree->gtFlags & GTF_REG_VAL) != 0 && genRegPairHi(tree->gtRegPair) != REG_STK);
+ }
+#elif defined(_TARGET_ARM64_)
+ ins = ins_Move_Extend(tree->TypeGet(), (tree->gtFlags & GTF_REG_VAL) != 0);
+#else
+ NYI("CodeGen::inst_RV_TT with INS_mov");
+#endif
+ }
+#endif // CPU_LOAD_STORE_ARCH
+
+AGAIN:
+
+ /* Is the value sitting in a register? */
+
+ if (tree->gtFlags & GTF_REG_VAL)
+ {
+#ifdef _TARGET_64BIT_
+ assert(instIsFP(ins) == 0);
+
+ regNumber rg2 = tree->gtRegNum;
+
+ assert(offs == 0);
+ assert(rg2 != REG_STK);
+
+ if ((ins != INS_mov) || (rg2 != reg))
+ {
+ inst_RV_RV(ins, reg, rg2, tree->TypeGet(), size);
+ }
+ return;
+
+#else // !_TARGET_64BIT_
+
+#ifdef LEGACY_BACKEND
+ LONGREG_RVTT:
+#endif // LEGACY_BACKEND
+
+#ifdef _TARGET_XARCH_
+ assert(instIsFP(ins) == 0);
+#endif
+
+ regNumber rg2;
+
+#if CPU_LONG_USES_REGPAIR
+ if (tree->gtType == TYP_LONG)
+ {
+ if (offs)
+ {
+ assert(offs == sizeof(int));
+
+ rg2 = genRegPairHi(tree->gtRegPair);
+ }
+ else
+ {
+ rg2 = genRegPairLo(tree->gtRegPair);
+ }
+ }
+ else
+#endif // LEGACY_BACKEND
+ {
+ rg2 = tree->gtRegNum;
+ }
+
+ if (rg2 != REG_STK)
+ {
+#ifdef _TARGET_ARM_
+ if (getEmitter()->emitInsIsLoad(ins) || (ins == INS_lea))
+ {
+ ins = ins_Copy(tree->TypeGet());
+ }
+#endif
+
+ bool isMoveIns = (ins == INS_mov);
+#ifdef _TARGET_ARM_
+ if (ins == INS_vmov)
+ isMoveIns = true;
+#endif
+ if (!isMoveIns || (rg2 != reg))
+ {
+ inst_RV_RV(ins, reg, rg2, tree->TypeGet(), size, flags);
+ }
+ return;
+ }
+
+#endif // _TARGET_64BIT_
+ }
+
+ /* Is this a spilled value? */
+
+ if (tree->gtFlags & GTF_SPILLED)
+ {
+ assert(!"ISSUE: If this can happen, we need to generate 'ins [ebp+spill]'");
+ }
+
+ switch (tree->gtOper)
+ {
+ unsigned varNum;
+
+ case GT_LCL_VAR:
+ case GT_LCL_VAR_ADDR:
+
+#ifdef LEGACY_BACKEND
+ /* Is this an enregistered long ? */
+
+ if (tree->gtType == TYP_LONG && !(tree->gtFlags & GTF_REG_VAL))
+ {
+
+ /* Avoid infinite loop */
+
+ if (genMarkLclVar(tree))
+ goto LONGREG_RVTT;
+ }
+#endif // LEGACY_BACKEND
+
+ inst_set_SV_var(tree);
+ goto LCL;
+
+ case GT_LCL_FLD_ADDR:
+ case GT_LCL_FLD:
+ offs += tree->gtLclFld.gtLclOffs;
+ goto LCL;
+
+ LCL:
+ varNum = tree->gtLclVarCommon.gtLclNum;
+ assert(varNum < compiler->lvaCount);
+
+#ifdef _TARGET_ARM_
+ switch (ins)
+ {
+ case INS_mov:
+ ins = ins_Load(tree->TypeGet());
+ __fallthrough;
+
+ case INS_lea:
+ case INS_ldr:
+ case INS_ldrh:
+ case INS_ldrb:
+ case INS_ldrsh:
+ case INS_ldrsb:
+ case INS_vldr:
+ assert(flags != INS_FLAGS_SET);
+ getEmitter()->emitIns_R_S(ins, size, reg, varNum, offs);
+ return;
+
+ default:
+ regNumber regTmp;
+#ifndef LEGACY_BACKEND
+ if (tree->TypeGet() == TYP_LONG)
+ regTmp = (offs == 0) ? genRegPairLo(tree->gtRegPair) : genRegPairHi(tree->gtRegPair);
+ else
+ regTmp = tree->gtRegNum;
+#else // LEGACY_BACKEND
+ if (varTypeIsFloating(tree))
+ {
+ regTmp = regSet.PickRegFloat(tree->TypeGet());
+ }
+ else
+ {
+ regTmp = regSet.rsPickReg(RBM_ALLINT & ~genRegMask(reg));
+ }
+#endif // LEGACY_BACKEND
+
+ getEmitter()->emitIns_R_S(ins_Load(tree->TypeGet()), size, regTmp, varNum, offs);
+ getEmitter()->emitIns_R_R(ins, size, reg, regTmp, flags);
+
+ regTracker.rsTrackRegTrash(regTmp);
+ return;
+ }
+#else // !_TARGET_ARM_
+ getEmitter()->emitIns_R_S(ins, size, reg, varNum, offs);
+ return;
+#endif // !_TARGET_ARM_
+
+ case GT_CLS_VAR:
+ // Make sure FP instruction size matches the operand size
+ // (We optimized constant doubles to floats when we can, just want to
+ // make sure that we don't mistakenly use 8 bytes when the
+ // constant.
+ assert(!isFloatRegType(tree->gtType) || genTypeSize(tree->gtType) == EA_SIZE_IN_BYTES(size));
+
+#if CPU_LOAD_STORE_ARCH
+#ifndef LEGACY_BACKEND
+ assert(!"GT_CLS_VAR not supported in ARM RyuJIT backend");
+#else // LEGACY_BACKEND
+ switch (ins)
+ {
+ case INS_mov:
+ ins = ins_Load(tree->TypeGet());
+
+ __fallthrough;
+
+ case INS_lea:
+ case INS_ldr:
+ case INS_ldrh:
+ case INS_ldrb:
+ case INS_ldrsh:
+ case INS_ldrsb:
+ case INS_vldr:
+ assert(flags != INS_FLAGS_SET);
+ getEmitter()->emitIns_R_C(ins, size, reg, tree->gtClsVar.gtClsVarHnd, offs);
+ return;
+
+ default:
+ regNumber regTmp = regSet.rsPickFreeReg(RBM_ALLINT & ~genRegMask(reg));
+ getEmitter()->emitIns_R_C(ins_Load(tree->TypeGet()), size, regTmp, tree->gtClsVar.gtClsVarHnd,
+ offs);
+ getEmitter()->emitIns_R_R(ins, size, reg, regTmp, flags);
+ regTracker.rsTrackRegTrash(regTmp);
+ return;
+ }
+#endif // LEGACY_BACKEND
+#else // CPU_LOAD_STORE_ARCH
+ getEmitter()->emitIns_R_C(ins, size, reg, tree->gtClsVar.gtClsVarHnd, offs);
+#endif // CPU_LOAD_STORE_ARCH
+ return;
+
+ case GT_IND:
+ case GT_NULLCHECK:
+ case GT_ARR_ELEM:
+ case GT_LEA:
+ {
+#ifndef LEGACY_BACKEND
+ assert(!"inst_RV_TT not supported for GT_IND, GT_NULLCHECK, GT_ARR_ELEM or GT_LEA in !LEGACY_BACKEND");
+#else // LEGACY_BACKEND
+ GenTreePtr addr = tree->OperIsIndir() ? tree->gtOp.gtOp1 : tree;
+ inst_RV_AT(ins, size, tree->TypeGet(), reg, addr, offs, flags);
+#endif // LEGACY_BACKEND
+ }
+ break;
+
+ case GT_CNS_INT:
+
+ assert(offs == 0);
+
+ inst_RV_IV(ins, reg, tree->gtIntCon.gtIconVal, emitActualTypeSize(tree->TypeGet()), flags);
+ break;
+
+ case GT_CNS_LNG:
+
+ assert(size == EA_4BYTE || size == EA_8BYTE);
+
+#ifdef _TARGET_AMD64_
+ assert(offs == 0);
+#endif // _TARGET_AMD64_
+
+ ssize_t constVal;
+ emitAttr size;
+ if (offs == 0)
+ {
+ constVal = (ssize_t)(tree->gtLngCon.gtLconVal);
+ size = EA_PTRSIZE;
+ }
+ else
+ {
+ constVal = (ssize_t)(tree->gtLngCon.gtLconVal >> 32);
+ size = EA_4BYTE;
+ }
+#ifndef LEGACY_BACKEND
+#ifdef _TARGET_ARM_
+ if ((ins != INS_mov) && !arm_Valid_Imm_For_Instr(ins, constVal, flags))
+ {
+ regNumber constReg = (offs == 0) ? genRegPairLo(tree->gtRegPair) : genRegPairHi(tree->gtRegPair);
+ instGen_Set_Reg_To_Imm(size, constReg, constVal);
+ getEmitter()->emitIns_R_R(ins, size, reg, constReg, flags);
+ break;
+ }
+#endif // _TARGET_ARM_
+#endif // !LEGACY_BACKEND
+
+ inst_RV_IV(ins, reg, constVal, size, flags);
+ break;
+
+ case GT_COMMA:
+ tree = tree->gtOp.gtOp2;
+ goto AGAIN;
+
+ default:
+ assert(!"invalid address");
+ }
+}
+
+/*****************************************************************************
+ *
+ * Generate the 3-operand imul instruction "imul reg, [tree], icon"
+ * which is reg=[tree]*icon
+ */
+#ifdef LEGACY_BACKEND
+void CodeGen::inst_RV_TT_IV(instruction ins, regNumber reg, GenTreePtr tree, int val)
+{
+ assert(tree->gtType <= TYP_I_IMPL);
+
+#ifdef _TARGET_XARCH_
+ /* Only 'imul' uses this instruction format. Since we don't represent
+ three operands for an instruction, we encode the target register as
+ an implicit operand */
+
+ assert(ins == INS_imul);
+ ins = getEmitter()->inst3opImulForReg(reg);
+
+ genUpdateLife(tree);
+ inst_TT_IV(ins, tree, val);
+#else
+ NYI("inst_RV_TT_IV - unknown target");
+#endif
+}
+#endif // LEGACY_BACKEND
+
+/*****************************************************************************
+ *
+ * Generate a "shift reg, icon" instruction.
+ */
+
+void CodeGen::inst_RV_SH(
+ instruction ins, emitAttr size, regNumber reg, unsigned val, insFlags flags /* = INS_FLAGS_DONT_CARE */)
+{
+#if defined(_TARGET_ARM_)
+
+ if (val >= 32)
+ val &= 0x1f;
+
+ getEmitter()->emitIns_R_I(ins, size, reg, val, flags);
+
+#elif defined(_TARGET_XARCH_)
+
+#ifdef _TARGET_AMD64_
+ // X64 JB BE insures only encodable values make it here.
+ // x86 can encode 8 bits, though it masks down to 5 or 6
+ // depending on 32-bit or 64-bit registers are used.
+ // Here we will allow anything that is encodable.
+ assert(val < 256);
+#endif
+
+ ins = genMapShiftInsToShiftByConstantIns(ins, val);
+
+ if (val == 1)
+ {
+ getEmitter()->emitIns_R(ins, size, reg);
+ }
+ else
+ {
+ getEmitter()->emitIns_R_I(ins, size, reg, val);
+ }
+
+#else
+ NYI("inst_RV_SH - unknown target");
+#endif // _TARGET_*
+}
+
+/*****************************************************************************
+ *
+ * Generate a "shift [r/m], icon" instruction.
+ */
+
+void CodeGen::inst_TT_SH(instruction ins, GenTreePtr tree, unsigned val, unsigned offs)
+{
+#ifdef _TARGET_XARCH_
+ if (val == 0)
+ {
+ // Shift by 0 - why are you wasting our precious time????
+ return;
+ }
+
+ ins = genMapShiftInsToShiftByConstantIns(ins, val);
+ if (val == 1)
+ {
+ inst_TT(ins, tree, offs, 0, emitTypeSize(tree->TypeGet()));
+ }
+ else
+ {
+ inst_TT(ins, tree, offs, val, emitTypeSize(tree->TypeGet()));
+ }
+#endif // _TARGET_XARCH_
+
+#ifdef _TARGET_ARM_
+ inst_TT(ins, tree, offs, val, emitTypeSize(tree->TypeGet()));
+#endif
+}
+
+/*****************************************************************************
+ *
+ * Generate a "shift [addr], cl" instruction.
+ */
+
+void CodeGen::inst_TT_CL(instruction ins, GenTreePtr tree, unsigned offs)
+{
+ inst_TT(ins, tree, offs, 0, emitTypeSize(tree->TypeGet()));
+}
+
+/*****************************************************************************
+ *
+ * Generate an instruction of the form "op reg1, reg2, icon".
+ */
+
+#if defined(_TARGET_XARCH_)
+void CodeGen::inst_RV_RV_IV(instruction ins, emitAttr size, regNumber reg1, regNumber reg2, unsigned ival)
+{
+#if defined(_TARGET_XARCH_) && !defined(LEGACY_BACKEND)
+ assert(ins == INS_shld || ins == INS_shrd || ins == INS_shufps || ins == INS_shufpd || ins == INS_pshufd ||
+ ins == INS_cmpps || ins == INS_cmppd || ins == INS_dppd || ins == INS_dpps || ins == INS_insertps);
+#else // !_TARGET_XARCH_
+ assert(ins == INS_shld || ins == INS_shrd);
+#endif // !_TARGET_XARCH_
+
+ getEmitter()->emitIns_R_R_I(ins, size, reg1, reg2, ival);
+}
+#endif
+
+/*****************************************************************************
+ *
+ * Generate an instruction with two registers, the second one being a byte
+ * or word register (i.e. this is something like "movzx eax, cl").
+ */
+
+void CodeGen::inst_RV_RR(instruction ins, emitAttr size, regNumber reg1, regNumber reg2)
+{
+ assert(size == EA_1BYTE || size == EA_2BYTE);
+#ifdef _TARGET_XARCH_
+ assert(ins == INS_movsx || ins == INS_movzx);
+ assert(size != EA_1BYTE || (genRegMask(reg2) & RBM_BYTE_REGS));
+#endif
+
+ getEmitter()->emitIns_R_R(ins, size, reg1, reg2);
+}
+
+/*****************************************************************************
+ *
+ * The following should all end up inline in compiler.hpp at some point.
+ */
+
+void CodeGen::inst_ST_RV(instruction ins, TempDsc* tmp, unsigned ofs, regNumber reg, var_types type)
+{
+ getEmitter()->emitIns_S_R(ins, emitActualTypeSize(type), reg, tmp->tdTempNum(), ofs);
+}
+
+void CodeGen::inst_ST_IV(instruction ins, TempDsc* tmp, unsigned ofs, int val, var_types type)
+{
+ getEmitter()->emitIns_S_I(ins, emitActualTypeSize(type), tmp->tdTempNum(), ofs, val);
+}
+
+#if FEATURE_FIXED_OUT_ARGS
+/*****************************************************************************
+ *
+ * Generate an instruction that references the outgoing argument space
+ * like "str r3, [sp+0x04]"
+ */
+
+void CodeGen::inst_SA_RV(instruction ins, unsigned ofs, regNumber reg, var_types type)
+{
+ assert(ofs < compiler->lvaOutgoingArgSpaceSize);
+
+ getEmitter()->emitIns_S_R(ins, emitActualTypeSize(type), reg, compiler->lvaOutgoingArgSpaceVar, ofs);
+}
+
+void CodeGen::inst_SA_IV(instruction ins, unsigned ofs, int val, var_types type)
+{
+ assert(ofs < compiler->lvaOutgoingArgSpaceSize);
+
+ getEmitter()->emitIns_S_I(ins, emitActualTypeSize(type), compiler->lvaOutgoingArgSpaceVar, ofs, val);
+}
+#endif // FEATURE_FIXED_OUT_ARGS
+
+/*****************************************************************************
+ *
+ * Generate an instruction with one register and one operand that is byte
+ * or short (e.g. something like "movzx eax, byte ptr [edx]").
+ */
+
+void CodeGen::inst_RV_ST(instruction ins, emitAttr size, regNumber reg, GenTreePtr tree)
+{
+ assert(size == EA_1BYTE || size == EA_2BYTE);
+
+ /* "movsx erx, rl" must be handled as a special case */
+
+ if (tree->gtFlags & GTF_REG_VAL)
+ {
+ inst_RV_RR(ins, size, reg, tree->gtRegNum);
+ }
+ else
+ {
+ inst_RV_TT(ins, reg, tree, 0, size);
+ }
+}
+
+void CodeGen::inst_RV_ST(instruction ins, regNumber reg, TempDsc* tmp, unsigned ofs, var_types type, emitAttr size)
+{
+ if (size == EA_UNKNOWN)
+ {
+ size = emitActualTypeSize(type);
+ }
+
+#ifdef _TARGET_ARM_
+ switch (ins)
+ {
+ case INS_mov:
+ assert(!"Please call ins_Load(type) to get the load instruction");
+ break;
+
+ case INS_add:
+ case INS_ldr:
+ case INS_ldrh:
+ case INS_ldrb:
+ case INS_ldrsh:
+ case INS_ldrsb:
+ case INS_lea:
+ case INS_vldr:
+ getEmitter()->emitIns_R_S(ins, size, reg, tmp->tdTempNum(), ofs);
+ break;
+
+ default:
+#ifndef LEGACY_BACKEND
+ assert(!"Default inst_RV_ST case not supported for Arm !LEGACY_BACKEND");
+#else // LEGACY_BACKEND
+ regNumber regTmp;
+ if (varTypeIsFloating(type))
+ {
+ regTmp = regSet.PickRegFloat(type);
+ }
+ else
+ {
+ regTmp = regSet.rsPickFreeReg(RBM_ALLINT & ~genRegMask(reg));
+ }
+ getEmitter()->emitIns_R_S(ins_Load(type), size, regTmp, tmp->tdTempNum(), ofs);
+ regTracker.rsTrackRegTrash(regTmp);
+ getEmitter()->emitIns_R_R(ins, size, reg, regTmp);
+#endif // LEGACY_BACKEND
+ break;
+ }
+#else // !_TARGET_ARM_
+ getEmitter()->emitIns_R_S(ins, size, reg, tmp->tdTempNum(), ofs);
+#endif // !_TARGET_ARM_
+}
+
+void CodeGen::inst_mov_RV_ST(regNumber reg, GenTreePtr tree)
+{
+ /* Figure out the size of the value being loaded */
+
+ emitAttr size = EA_ATTR(genTypeSize(tree->gtType));
+ instruction loadIns = ins_Move_Extend(tree->TypeGet(), (tree->gtFlags & GTF_REG_VAL) != 0);
+
+ if (size < EA_4BYTE)
+ {
+ if ((tree->gtFlags & GTF_SMALL_OK) && (size == EA_1BYTE)
+#if CPU_HAS_BYTE_REGS
+ && (genRegMask(reg) & RBM_BYTE_REGS)
+#endif
+ )
+ {
+ /* We only need to load the actual size */
+
+ inst_RV_TT(INS_mov, reg, tree, 0, EA_1BYTE);
+ }
+ else
+ {
+ /* Generate the "movsx/movzx" opcode */
+
+ inst_RV_ST(loadIns, size, reg, tree);
+ }
+ }
+ else
+ {
+ /* Compute op1 into the target register */
+
+ inst_RV_TT(loadIns, reg, tree);
+ }
+}
+#ifdef _TARGET_XARCH_
+void CodeGen::inst_FS_ST(instruction ins, emitAttr size, TempDsc* tmp, unsigned ofs)
+{
+ getEmitter()->emitIns_S(ins, size, tmp->tdTempNum(), ofs);
+}
+#endif
+
+#ifdef _TARGET_ARM_
+bool CodeGenInterface::validImmForInstr(instruction ins, ssize_t imm, insFlags flags)
+{
+ if (getEmitter()->emitInsIsLoadOrStore(ins) && !instIsFP(ins))
+ {
+ return validDispForLdSt(imm, TYP_INT);
+ }
+
+ bool result = false;
+ switch (ins)
+ {
+ case INS_cmp:
+ case INS_cmn:
+ if (validImmForAlu(imm) || validImmForAlu(-imm))
+ result = true;
+ break;
+
+ case INS_and:
+ case INS_bic:
+ case INS_orr:
+ case INS_orn:
+ case INS_mvn:
+ if (validImmForAlu(imm) || validImmForAlu(~imm))
+ result = true;
+ break;
+
+ case INS_mov:
+ if (validImmForMov(imm))
+ result = true;
+ break;
+
+ case INS_addw:
+ case INS_subw:
+ if ((unsigned_abs(imm) <= 0x00000fff) && (flags != INS_FLAGS_SET)) // 12-bit immediate
+ result = true;
+ break;
+
+ case INS_add:
+ case INS_sub:
+ if (validImmForAdd(imm, flags))
+ result = true;
+ break;
+
+ case INS_tst:
+ case INS_eor:
+ case INS_teq:
+ case INS_adc:
+ case INS_sbc:
+ case INS_rsb:
+ if (validImmForAlu(imm))
+ result = true;
+ break;
+
+ case INS_asr:
+ case INS_lsl:
+ case INS_lsr:
+ case INS_ror:
+ if (imm > 0 && imm <= 32)
+ result = true;
+ break;
+
+ case INS_vstr:
+ case INS_vldr:
+ if ((imm & 0x3FC) == imm)
+ result = true;
+ break;
+
+ default:
+ break;
+ }
+ return result;
+}
+bool CodeGen::arm_Valid_Imm_For_Instr(instruction ins, ssize_t imm, insFlags flags)
+{
+ return validImmForInstr(ins, imm, flags);
+}
+
+bool CodeGenInterface::validDispForLdSt(ssize_t disp, var_types type)
+{
+ if (varTypeIsFloating(type))
+ {
+ if ((disp & 0x3FC) == disp)
+ return true;
+ else
+ return false;
+ }
+ else
+ {
+ if ((disp >= -0x00ff) && (disp <= 0x0fff))
+ return true;
+ else
+ return false;
+ }
+}
+bool CodeGen::arm_Valid_Disp_For_LdSt(ssize_t disp, var_types type)
+{
+ return validDispForLdSt(disp, type);
+}
+
+bool CodeGenInterface::validImmForAlu(ssize_t imm)
+{
+ return emitter::emitIns_valid_imm_for_alu(imm);
+}
+bool CodeGen::arm_Valid_Imm_For_Alu(ssize_t imm)
+{
+ return validImmForAlu(imm);
+}
+
+bool CodeGenInterface::validImmForMov(ssize_t imm)
+{
+ return emitter::emitIns_valid_imm_for_mov(imm);
+}
+bool CodeGen::arm_Valid_Imm_For_Mov(ssize_t imm)
+{
+ return validImmForMov(imm);
+}
+
+bool CodeGen::arm_Valid_Imm_For_Small_Mov(regNumber reg, ssize_t imm, insFlags flags)
+{
+ return emitter::emitIns_valid_imm_for_small_mov(reg, imm, flags);
+}
+
+bool CodeGenInterface::validImmForAdd(ssize_t imm, insFlags flags)
+{
+ return emitter::emitIns_valid_imm_for_add(imm, flags);
+}
+bool CodeGen::arm_Valid_Imm_For_Add(ssize_t imm, insFlags flags)
+{
+ return emitter::emitIns_valid_imm_for_add(imm, flags);
+}
+
+// Check "add Rd,SP,i10"
+bool CodeGen::arm_Valid_Imm_For_Add_SP(ssize_t imm)
+{
+ return emitter::emitIns_valid_imm_for_add_sp(imm);
+}
+
+bool CodeGenInterface::validImmForBL(ssize_t addr)
+{
+ return
+ // If we are running the altjit for NGEN, then assume we can use the "BL" instruction.
+ // This matches the usual behavior for NGEN, since we normally do generate "BL".
+ (!compiler->info.compMatchedVM && (compiler->opts.eeFlags & CORJIT_FLG_PREJIT)) ||
+ (compiler->eeGetRelocTypeHint((void*)addr) == IMAGE_REL_BASED_THUMB_BRANCH24);
+}
+bool CodeGen::arm_Valid_Imm_For_BL(ssize_t addr)
+{
+ return validImmForBL(addr);
+}
+
+// Returns true if this instruction writes to a destination register
+//
+bool CodeGen::ins_Writes_Dest(instruction ins)
+{
+ switch (ins)
+ {
+
+ case INS_cmp:
+ case INS_cmn:
+ case INS_tst:
+ case INS_teq:
+ return false;
+
+ default:
+ return true;
+ }
+}
+#endif // _TARGET_ARM_
+
+/*****************************************************************************
+ *
+ * Get the machine dependent instruction for performing sign/zero extension.
+ *
+ * Parameters
+ * srcType - source type
+ * srcInReg - whether source is in a register
+ */
+instruction CodeGen::ins_Move_Extend(var_types srcType, bool srcInReg)
+{
+ instruction ins = INS_invalid;
+
+ if (varTypeIsSIMD(srcType))
+ {
+#if defined(_TARGET_XARCH_) && !defined(LEGACY_BACKEND)
+ // SSE2/AVX requires destination to be a reg always.
+ // If src is in reg means, it is a reg-reg move.
+ //
+ // SSE2 Note: always prefer movaps/movups over movapd/movupd since the
+ // former doesn't require 66h prefix and one byte smaller than the
+ // latter.
+ //
+ // TODO-CQ: based on whether src type is aligned use movaps instead
+
+ return (srcInReg) ? INS_movaps : INS_movups;
+#else // !defined(_TARGET_XARCH_) || defined(LEGACY_BACKEND)
+ assert(!"unhandled SIMD type");
+#endif // !defined(_TARGET_XARCH_) || defined(LEGACY_BACKEND)
+ }
+
+#if defined(_TARGET_XARCH_) && !defined(LEGACY_BACKEND)
+ if (varTypeIsFloating(srcType))
+ {
+ if (srcType == TYP_DOUBLE)
+ {
+ return (srcInReg) ? INS_movaps : INS_movsdsse2;
+ }
+ else if (srcType == TYP_FLOAT)
+ {
+ return (srcInReg) ? INS_movaps : INS_movss;
+ }
+ else
+ {
+ assert(!"unhandled floating type");
+ }
+ }
+#elif defined(_TARGET_ARM_)
+ if (varTypeIsFloating(srcType))
+ return INS_vmov;
+#else
+ assert(!varTypeIsFloating(srcType));
+#endif
+
+#if defined(_TARGET_XARCH_)
+ if (!varTypeIsSmall(srcType))
+ {
+ ins = INS_mov;
+ }
+ else if (varTypeIsUnsigned(srcType))
+ {
+ ins = INS_movzx;
+ }
+ else
+ {
+ ins = INS_movsx;
+ }
+#elif defined(_TARGET_ARM_)
+ //
+ // Register to Register zero/sign extend operation
+ //
+ if (srcInReg)
+ {
+ if (!varTypeIsSmall(srcType))
+ {
+ ins = INS_mov;
+ }
+ else if (varTypeIsUnsigned(srcType))
+ {
+ if (varTypeIsByte(srcType))
+ ins = INS_uxtb;
+ else
+ ins = INS_uxth;
+ }
+ else
+ {
+ if (varTypeIsByte(srcType))
+ ins = INS_sxtb;
+ else
+ ins = INS_sxth;
+ }
+ }
+ else
+ {
+ ins = ins_Load(srcType);
+ }
+#elif defined(_TARGET_ARM64_)
+ //
+ // Register to Register zero/sign extend operation
+ //
+ if (srcInReg)
+ {
+ if (varTypeIsUnsigned(srcType))
+ {
+ if (varTypeIsByte(srcType))
+ {
+ ins = INS_uxtb;
+ }
+ else if (varTypeIsShort(srcType))
+ {
+ ins = INS_uxth;
+ }
+ else
+ {
+ // A mov Rd, Rm instruction performs the zero extend
+ // for the upper 32 bits when the size is EA_4BYTE
+
+ ins = INS_mov;
+ }
+ }
+ else
+ {
+ if (varTypeIsByte(srcType))
+ {
+ ins = INS_sxtb;
+ }
+ else if (varTypeIsShort(srcType))
+ {
+ ins = INS_sxth;
+ }
+ else
+ {
+ if (srcType == TYP_INT)
+ {
+ ins = INS_sxtw;
+ }
+ else
+ {
+ ins = INS_mov;
+ }
+ }
+ }
+ }
+ else
+ {
+ ins = ins_Load(srcType);
+ }
+#else
+ NYI("ins_Move_Extend");
+#endif
+ assert(ins != INS_invalid);
+ return ins;
+}
+
+/*****************************************************************************
+ *
+ * Get the machine dependent instruction for performing a load for srcType
+ *
+ * Parameters
+ * srcType - source type
+ * aligned - whether source is 16-byte aligned if srcType is a SIMD type
+ */
+instruction CodeGenInterface::ins_Load(var_types srcType, bool aligned /*=false*/)
+{
+ instruction ins = INS_invalid;
+
+ if (varTypeIsSIMD(srcType))
+ {
+#if defined(_TARGET_XARCH_) && !defined(LEGACY_BACKEND)
+#ifdef FEATURE_SIMD
+ if (srcType == TYP_SIMD8)
+ {
+ return INS_movsdsse2;
+ }
+ else
+#endif // FEATURE_SIMD
+ if (compiler->canUseAVX())
+ {
+ // TODO-CQ: consider alignment of AVX vectors.
+ return INS_movupd;
+ }
+ else
+ {
+ // SSE2 Note: always prefer movaps/movups over movapd/movupd since the
+ // former doesn't require 66h prefix and one byte smaller than the
+ // latter.
+ return (aligned) ? INS_movaps : INS_movups;
+ }
+#else
+ assert(!"ins_Load with SIMD type");
+#endif
+ }
+
+ if (varTypeIsFloating(srcType))
+ {
+#if defined(_TARGET_XARCH_) && !defined(LEGACY_BACKEND)
+ if (srcType == TYP_DOUBLE)
+ {
+ return INS_movsdsse2;
+ }
+ else if (srcType == TYP_FLOAT)
+ {
+ return INS_movss;
+ }
+ else
+ {
+ assert(!"unhandled floating type");
+ }
+#elif defined(_TARGET_ARM64_)
+ return INS_ldr;
+#elif defined(_TARGET_ARM_)
+ return INS_vldr;
+#else
+ assert(!varTypeIsFloating(srcType));
+#endif
+ }
+
+#if defined(_TARGET_XARCH_)
+ if (!varTypeIsSmall(srcType))
+ {
+ ins = INS_mov;
+ }
+ else if (varTypeIsUnsigned(srcType))
+ {
+ ins = INS_movzx;
+ }
+ else
+ {
+ ins = INS_movsx;
+ }
+
+#elif defined(_TARGET_ARMARCH_)
+ if (!varTypeIsSmall(srcType))
+ {
+#if defined(_TARGET_ARM64_)
+ if (!varTypeIsI(srcType) && !varTypeIsUnsigned(srcType))
+ {
+ ins = INS_ldrsw;
+ }
+ else
+#endif // defined(_TARGET_ARM64_)
+ {
+ ins = INS_ldr;
+ }
+ }
+ else if (varTypeIsByte(srcType))
+ {
+ if (varTypeIsUnsigned(srcType))
+ ins = INS_ldrb;
+ else
+ ins = INS_ldrsb;
+ }
+ else if (varTypeIsShort(srcType))
+ {
+ if (varTypeIsUnsigned(srcType))
+ ins = INS_ldrh;
+ else
+ ins = INS_ldrsh;
+ }
+#else
+ NYI("ins_Load");
+#endif
+
+ assert(ins != INS_invalid);
+ return ins;
+}
+
+/*****************************************************************************
+ *
+ * Get the machine dependent instruction for performing a reg-reg copy for dstType
+ *
+ * Parameters
+ * dstType - destination type
+ */
+instruction CodeGen::ins_Copy(var_types dstType)
+{
+#if defined(_TARGET_XARCH_) && !defined(LEGACY_BACKEND)
+ if (varTypeIsSIMD(dstType))
+ {
+ return INS_movaps;
+ }
+ else if (varTypeIsFloating(dstType))
+ {
+ // Both float and double copy can use movaps
+ return INS_movaps;
+ }
+ else
+ {
+ return INS_mov;
+ }
+#elif defined(_TARGET_ARM64_)
+ if (varTypeIsFloating(dstType))
+ {
+ return INS_fmov;
+ }
+ else
+ {
+ return INS_mov;
+ }
+#elif defined(_TARGET_ARM_)
+ assert(!varTypeIsSIMD(dstType));
+ if (varTypeIsFloating(dstType))
+ {
+ return INS_vmov;
+ }
+ else
+ {
+ return INS_mov;
+ }
+#elif defined(_TARGET_X86_)
+ assert(!varTypeIsSIMD(dstType));
+ assert(!varTypeIsFloating(dstType));
+ return INS_mov;
+#else // _TARGET_*
+#error "Unknown _TARGET_"
+#endif
+}
+
+/*****************************************************************************
+ *
+ * Get the machine dependent instruction for performing a store for dstType
+ *
+ * Parameters
+ * dstType - destination type
+ * aligned - whether destination is 16-byte aligned if dstType is a SIMD type
+ */
+instruction CodeGenInterface::ins_Store(var_types dstType, bool aligned /*=false*/)
+{
+ instruction ins = INS_invalid;
+
+#if defined(_TARGET_XARCH_) && !defined(LEGACY_BACKEND)
+ if (varTypeIsSIMD(dstType))
+ {
+#ifdef FEATURE_SIMD
+ if (dstType == TYP_SIMD8)
+ {
+ return INS_movsdsse2;
+ }
+ else
+#endif // FEATURE_SIMD
+ if (compiler->canUseAVX())
+ {
+ // TODO-CQ: consider alignment of AVX vectors.
+ return INS_movupd;
+ }
+ else
+ {
+ // SSE2 Note: always prefer movaps/movups over movapd/movupd since the
+ // former doesn't require 66h prefix and one byte smaller than the
+ // latter.
+ return (aligned) ? INS_movaps : INS_movups;
+ }
+ }
+ else if (varTypeIsFloating(dstType))
+ {
+ if (dstType == TYP_DOUBLE)
+ {
+ return INS_movsdsse2;
+ }
+ else if (dstType == TYP_FLOAT)
+ {
+ return INS_movss;
+ }
+ else
+ {
+ assert(!"unhandled floating type");
+ }
+ }
+#elif defined(_TARGET_ARM64_)
+ if (varTypeIsSIMD(dstType) || varTypeIsFloating(dstType))
+ {
+ // All sizes of SIMD and FP instructions use INS_str
+ return INS_str;
+ }
+#elif defined(_TARGET_ARM_)
+ assert(!varTypeIsSIMD(dstType));
+ if (varTypeIsFloating(dstType))
+ {
+ return INS_vstr;
+ }
+#else
+ assert(!varTypeIsSIMD(dstType));
+ assert(!varTypeIsFloating(dstType));
+#endif
+
+#if defined(_TARGET_XARCH_)
+ ins = INS_mov;
+#elif defined(_TARGET_ARMARCH_)
+ if (!varTypeIsSmall(dstType))
+ ins = INS_str;
+ else if (varTypeIsByte(dstType))
+ ins = INS_strb;
+ else if (varTypeIsShort(dstType))
+ ins = INS_strh;
+#else
+ NYI("ins_Store");
+#endif
+
+ assert(ins != INS_invalid);
+ return ins;
+}
+
+#if defined(_TARGET_XARCH_) && !defined(LEGACY_BACKEND)
+
+bool CodeGen::isMoveIns(instruction ins)
+{
+ return (ins == INS_mov);
+}
+
+instruction CodeGenInterface::ins_FloatLoad(var_types type)
+{
+ // Do Not use this routine in RyuJIT backend. Instead use ins_Load()/ins_Store()
+ unreached();
+}
+
+// everything is just an addressing mode variation on x64
+instruction CodeGen::ins_FloatStore(var_types type)
+{
+ // Do Not use this routine in RyuJIT backend. Instead use ins_Store()
+ unreached();
+}
+
+instruction CodeGen::ins_FloatCopy(var_types type)
+{
+ // Do Not use this routine in RyuJIT backend. Instead use ins_Load().
+ unreached();
+}
+
+instruction CodeGen::ins_FloatCompare(var_types type)
+{
+ return (type == TYP_FLOAT) ? INS_ucomiss : INS_ucomisd;
+}
+
+instruction CodeGen::ins_CopyIntToFloat(var_types srcType, var_types dstType)
+{
+ // On SSE2/AVX - the same instruction is used for moving double/quad word to XMM/YMM register.
+ assert((srcType == TYP_INT) || (srcType == TYP_UINT) || (srcType == TYP_LONG) || (srcType == TYP_ULONG));
+ return INS_mov_i2xmm;
+}
+
+instruction CodeGen::ins_CopyFloatToInt(var_types srcType, var_types dstType)
+{
+ // On SSE2/AVX - the same instruction is used for moving double/quad word of XMM/YMM to an integer register.
+ assert((dstType == TYP_INT) || (dstType == TYP_UINT) || (dstType == TYP_LONG) || (dstType == TYP_ULONG));
+ return INS_mov_xmm2i;
+}
+
+instruction CodeGen::ins_MathOp(genTreeOps oper, var_types type)
+{
+ switch (oper)
+ {
+ case GT_ADD:
+ case GT_ASG_ADD:
+ return type == TYP_DOUBLE ? INS_addsd : INS_addss;
+ break;
+ case GT_SUB:
+ case GT_ASG_SUB:
+ return type == TYP_DOUBLE ? INS_subsd : INS_subss;
+ break;
+ case GT_MUL:
+ case GT_ASG_MUL:
+ return type == TYP_DOUBLE ? INS_mulsd : INS_mulss;
+ break;
+ case GT_DIV:
+ case GT_ASG_DIV:
+ return type == TYP_DOUBLE ? INS_divsd : INS_divss;
+ case GT_AND:
+ return type == TYP_DOUBLE ? INS_andpd : INS_andps;
+ case GT_OR:
+ return type == TYP_DOUBLE ? INS_orpd : INS_orps;
+ case GT_XOR:
+ return type == TYP_DOUBLE ? INS_xorpd : INS_xorps;
+ default:
+ unreached();
+ }
+}
+
+instruction CodeGen::ins_FloatSqrt(var_types type)
+{
+ instruction ins = INS_invalid;
+
+ if (type == TYP_DOUBLE)
+ {
+ ins = INS_sqrtsd;
+ }
+ else
+ {
+ // Right now sqrt of scalar single is not needed.
+ unreached();
+ }
+
+ return ins;
+}
+
+// Conversions to or from floating point values
+instruction CodeGen::ins_FloatConv(var_types to, var_types from)
+{
+ // AVX: For now we support only conversion from Int/Long -> float
+
+ switch (from)
+ {
+ // int/long -> float/double use the same instruction but type size would be different.
+ case TYP_INT:
+ case TYP_LONG:
+ switch (to)
+ {
+ case TYP_FLOAT:
+ return INS_cvtsi2ss;
+ case TYP_DOUBLE:
+ return INS_cvtsi2sd;
+ default:
+ unreached();
+ }
+ break;
+
+ case TYP_FLOAT:
+ switch (to)
+ {
+ case TYP_INT:
+ return INS_cvttss2si;
+ case TYP_LONG:
+ return INS_cvttss2si;
+ case TYP_FLOAT:
+ return ins_Move_Extend(TYP_FLOAT, false);
+ case TYP_DOUBLE:
+ return INS_cvtss2sd;
+ default:
+ unreached();
+ }
+ break;
+
+ case TYP_DOUBLE:
+ switch (to)
+ {
+ case TYP_INT:
+ return INS_cvttsd2si;
+ case TYP_LONG:
+ return INS_cvttsd2si;
+ case TYP_FLOAT:
+ return INS_cvtsd2ss;
+ case TYP_DOUBLE:
+ return ins_Move_Extend(TYP_DOUBLE, false);
+ default:
+ unreached();
+ }
+ break;
+
+ default:
+ unreached();
+ }
+}
+
+#elif defined(_TARGET_ARM_)
+
+bool CodeGen::isMoveIns(instruction ins)
+{
+ return (ins == INS_vmov) || (ins == INS_mov);
+}
+
+instruction CodeGenInterface::ins_FloatLoad(var_types type)
+{
+ assert(type == TYP_DOUBLE || type == TYP_FLOAT);
+ return INS_vldr;
+}
+instruction CodeGen::ins_FloatStore(var_types type)
+{
+ assert(type == TYP_DOUBLE || type == TYP_FLOAT);
+ return INS_vstr;
+}
+instruction CodeGen::ins_FloatCopy(var_types type)
+{
+ assert(type == TYP_DOUBLE || type == TYP_FLOAT);
+ return INS_vmov;
+}
+
+instruction CodeGen::ins_CopyIntToFloat(var_types srcType, var_types dstType)
+{
+ // Not used and not implemented
+ unreached();
+}
+
+instruction CodeGen::ins_CopyFloatToInt(var_types srcType, var_types dstType)
+{
+ // Not used and not implemented
+ unreached();
+}
+
+instruction CodeGen::ins_FloatCompare(var_types type)
+{
+ // Not used and not implemented
+ unreached();
+}
+
+instruction CodeGen::ins_FloatSqrt(var_types type)
+{
+ // Not used and not implemented
+ unreached();
+}
+
+instruction CodeGen::ins_MathOp(genTreeOps oper, var_types type)
+{
+ switch (oper)
+ {
+ case GT_ADD:
+ case GT_ASG_ADD:
+ return INS_vadd;
+ break;
+ case GT_SUB:
+ case GT_ASG_SUB:
+ return INS_vsub;
+ break;
+ case GT_MUL:
+ case GT_ASG_MUL:
+ return INS_vmul;
+ break;
+ case GT_DIV:
+ case GT_ASG_DIV:
+ return INS_vdiv;
+ case GT_NEG:
+ return INS_vneg;
+ default:
+ unreached();
+ }
+}
+
+instruction CodeGen::ins_FloatConv(var_types to, var_types from)
+{
+ switch (from)
+ {
+ case TYP_INT:
+ switch (to)
+ {
+ case TYP_FLOAT:
+ return INS_vcvt_i2f;
+ case TYP_DOUBLE:
+ return INS_vcvt_i2d;
+ default:
+ unreached();
+ }
+ break;
+ case TYP_UINT:
+ switch (to)
+ {
+ case TYP_FLOAT:
+ return INS_vcvt_u2f;
+ case TYP_DOUBLE:
+ return INS_vcvt_u2d;
+ default:
+ unreached();
+ }
+ break;
+ case TYP_LONG:
+ switch (to)
+ {
+ case TYP_FLOAT:
+ NYI("long to float");
+ case TYP_DOUBLE:
+ NYI("long to double");
+ default:
+ unreached();
+ }
+ break;
+ case TYP_FLOAT:
+ switch (to)
+ {
+ case TYP_INT:
+ return INS_vcvt_f2i;
+ case TYP_UINT:
+ return INS_vcvt_f2u;
+ case TYP_LONG:
+ NYI("float to long");
+ case TYP_DOUBLE:
+ return INS_vcvt_f2d;
+ case TYP_FLOAT:
+ return INS_vmov;
+ default:
+ unreached();
+ }
+ break;
+ case TYP_DOUBLE:
+ switch (to)
+ {
+ case TYP_INT:
+ return INS_vcvt_d2i;
+ case TYP_UINT:
+ return INS_vcvt_d2u;
+ case TYP_LONG:
+ NYI("double to long");
+ case TYP_FLOAT:
+ return INS_vcvt_d2f;
+ case TYP_DOUBLE:
+ return INS_vmov;
+ default:
+ unreached();
+ }
+ break;
+ default:
+ unreached();
+ }
+}
+
+#endif // #elif defined(_TARGET_ARM_)
+
+/*****************************************************************************
+ *
+ * Machine independent way to return
+ */
+void CodeGen::instGen_Return(unsigned stkArgSize)
+{
+#if defined(_TARGET_XARCH_)
+ if (stkArgSize == 0)
+ {
+ instGen(INS_ret);
+ }
+ else
+ {
+ inst_IV(INS_ret, stkArgSize);
+ }
+#elif defined(_TARGET_ARM_)
+//
+// The return on ARM is folded into the pop multiple instruction
+// and as we do not know the exact set of registers that we will
+// need to restore (pop) when we first call instGen_Return we will
+// instead just not emit anything for this method on the ARM
+// The return will be part of the pop multiple and that will be
+// part of the epilog that is generated by genFnEpilog()
+#elif defined(_TARGET_ARM64_)
+ // This function shouldn't be used on ARM64.
+ unreached();
+#else
+ NYI("instGen_Return");
+#endif
+}
+
+/*****************************************************************************
+ *
+ * Emit a MemoryBarrier instruction
+ *
+ * Note: all MemoryBarriers instructions can be removed by
+ * SET COMPlus_JitNoMemoryBarriers=1
+ */
+void CodeGen::instGen_MemoryBarrier()
+{
+#ifdef DEBUG
+ if (JitConfig.JitNoMemoryBarriers() == 1)
+ {
+ return;
+ }
+#endif // DEBUG
+
+#if defined(_TARGET_XARCH_)
+ instGen(INS_lock);
+ getEmitter()->emitIns_I_AR(INS_or, EA_4BYTE, 0, REG_SPBASE, 0);
+#elif defined(_TARGET_ARM_)
+ getEmitter()->emitIns_I(INS_dmb, EA_4BYTE, 0xf);
+#elif defined(_TARGET_ARM64_)
+ getEmitter()->emitIns_BARR(INS_dmb, INS_BARRIER_SY);
+#else
+#error "Unknown _TARGET_"
+#endif
+}
+
+/*****************************************************************************
+ *
+ * Machine independent way to move a Zero value into a register
+ */
+void CodeGen::instGen_Set_Reg_To_Zero(emitAttr size, regNumber reg, insFlags flags)
+{
+#if defined(_TARGET_XARCH_)
+ getEmitter()->emitIns_R_R(INS_xor, size, reg, reg);
+#elif defined(_TARGET_ARMARCH_)
+ getEmitter()->emitIns_R_I(INS_mov, size, reg, 0 ARM_ARG(flags));
+#else
+#error "Unknown _TARGET_"
+#endif
+ regTracker.rsTrackRegIntCns(reg, 0);
+}
+
+#ifdef LEGACY_BACKEND
+/*****************************************************************************
+ *
+ * Machine independent way to move an immediate value into a register
+ */
+void CodeGen::instGen_Set_Reg_To_Imm(emitAttr size, regNumber reg, ssize_t imm, insFlags flags)
+{
+#if RELOC_SUPPORT
+ if (!compiler->opts.compReloc)
+#endif // RELOC_SUPPORT
+ {
+ size = EA_SIZE(size); // Strip any Reloc flags from size if we aren't doing relocs
+ }
+
+ if ((imm == 0) && !EA_IS_RELOC(size))
+ {
+ instGen_Set_Reg_To_Zero(size, reg, flags);
+ }
+ else
+ {
+#if defined(_TARGET_XARCH_)
+ getEmitter()->emitIns_R_I(INS_mov, size, reg, imm);
+#elif defined(_TARGET_ARM_)
+
+ if (EA_IS_RELOC(size))
+ {
+ getEmitter()->emitIns_R_I(INS_movw, size, reg, imm);
+ getEmitter()->emitIns_R_I(INS_movt, size, reg, imm);
+ }
+ else if (arm_Valid_Imm_For_Mov(imm))
+ {
+ getEmitter()->emitIns_R_I(INS_mov, size, reg, imm, flags);
+ }
+ else // We have to use a movw/movt pair of instructions
+ {
+ ssize_t imm_lo16 = (imm & 0xffff);
+ ssize_t imm_hi16 = (imm >> 16) & 0xffff;
+
+ assert(arm_Valid_Imm_For_Mov(imm_lo16));
+ assert(imm_hi16 != 0);
+
+ getEmitter()->emitIns_R_I(INS_movw, size, reg, imm_lo16);
+
+ // If we've got a low register, the high word is all bits set,
+ // and the high bit of the low word is set, we can sign extend
+ // halfword and save two bytes of encoding. This can happen for
+ // small magnitude negative numbers 'n' for -32768 <= n <= -1.
+
+ if (getEmitter()->isLowRegister(reg) && (imm_hi16 == 0xffff) && ((imm_lo16 & 0x8000) == 0x8000))
+ {
+ getEmitter()->emitIns_R_R(INS_sxth, EA_2BYTE, reg, reg);
+ }
+ else
+ {
+ getEmitter()->emitIns_R_I(INS_movt, size, reg, imm_hi16);
+ }
+
+ if (flags == INS_FLAGS_SET)
+ getEmitter()->emitIns_R_R(INS_mov, size, reg, reg, INS_FLAGS_SET);
+ }
+#elif defined(_TARGET_ARM64_)
+ NYI_ARM64("instGen_Set_Reg_To_Imm");
+#else
+#error "Unknown _TARGET_"
+#endif
+ }
+ regTracker.rsTrackRegIntCns(reg, imm);
+}
+#endif // LEGACY_BACKEND
+
+/*****************************************************************************
+ *
+ * Machine independent way to set the flags based on
+ * comparing a register with zero
+ */
+void CodeGen::instGen_Compare_Reg_To_Zero(emitAttr size, regNumber reg)
+{
+#if defined(_TARGET_XARCH_)
+ getEmitter()->emitIns_R_R(INS_test, size, reg, reg);
+#elif defined(_TARGET_ARMARCH_)
+ getEmitter()->emitIns_R_I(INS_cmp, size, reg, 0);
+#else
+#error "Unknown _TARGET_"
+#endif
+}
+
+/*****************************************************************************
+ *
+ * Machine independent way to set the flags based upon
+ * comparing a register with another register
+ */
+void CodeGen::instGen_Compare_Reg_To_Reg(emitAttr size, regNumber reg1, regNumber reg2)
+{
+#if defined(_TARGET_XARCH_) || defined(_TARGET_ARMARCH_)
+ getEmitter()->emitIns_R_R(INS_cmp, size, reg1, reg2);
+#else
+#error "Unknown _TARGET_"
+#endif
+}
+
+/*****************************************************************************
+ *
+ * Machine independent way to set the flags based upon
+ * comparing a register with an immediate
+ */
+void CodeGen::instGen_Compare_Reg_To_Imm(emitAttr size, regNumber reg, ssize_t imm)
+{
+ if (imm == 0)
+ {
+ instGen_Compare_Reg_To_Zero(size, reg);
+ }
+ else
+ {
+#if defined(_TARGET_XARCH_)
+#if defined(_TARGET_AMD64_)
+ if ((EA_SIZE(size) == EA_8BYTE) && (((int)imm != (ssize_t)imm) || EA_IS_CNS_RELOC(size)))
+ {
+#ifndef LEGACY_BACKEND
+ assert(!"Invalid immediate for instGen_Compare_Reg_To_Imm");
+#else // LEGACY_BACKEND
+ // Load imm into a register
+ regNumber immReg = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(reg));
+ instGen_Set_Reg_To_Imm(size, immReg, (ssize_t)imm);
+ getEmitter()->emitIns_R_R(INS_cmp, EA_TYPE(size), reg, immReg);
+#endif // LEGACY_BACKEND
+ }
+ else
+#endif // _TARGET_AMD64_
+ {
+ getEmitter()->emitIns_R_I(INS_cmp, size, reg, imm);
+ }
+#elif defined(_TARGET_ARM_)
+ if (arm_Valid_Imm_For_Alu(imm) || arm_Valid_Imm_For_Alu(-imm))
+ {
+ getEmitter()->emitIns_R_I(INS_cmp, size, reg, imm);
+ }
+ else // We need a scratch register
+ {
+#ifndef LEGACY_BACKEND
+ assert(!"Invalid immediate for instGen_Compare_Reg_To_Imm");
+#else // LEGACY_BACKEND
+ // Load imm into a register
+ regNumber immReg = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(reg));
+ instGen_Set_Reg_To_Imm(size, immReg, (ssize_t)imm);
+ getEmitter()->emitIns_R_R(INS_cmp, size, reg, immReg);
+#endif // !LEGACY_BACKEND
+ }
+#elif defined(_TARGET_ARM64_)
+ if (true) // TODO-ARM64-NYI: arm_Valid_Imm_For_Alu(imm) || arm_Valid_Imm_For_Alu(-imm))
+ {
+ getEmitter()->emitIns_R_I(INS_cmp, size, reg, imm);
+ }
+ else // We need a scratch register
+ {
+ assert(!"Invalid immediate for instGen_Compare_Reg_To_Imm");
+ }
+#else
+#error "Unknown _TARGET_"
+#endif
+ }
+}
+
+/*****************************************************************************
+ *
+ * Machine independent way to move a stack based local variable into a register
+ */
+void CodeGen::instGen_Load_Reg_From_Lcl(var_types srcType, regNumber dstReg, int varNum, int offs)
+{
+ emitAttr size = emitTypeSize(srcType);
+
+ getEmitter()->emitIns_R_S(ins_Load(srcType), size, dstReg, varNum, offs);
+}
+
+/*****************************************************************************
+ *
+ * Machine independent way to move a register into a stack based local variable
+ */
+void CodeGen::instGen_Store_Reg_Into_Lcl(var_types dstType, regNumber srcReg, int varNum, int offs)
+{
+ emitAttr size = emitTypeSize(dstType);
+
+ getEmitter()->emitIns_S_R(ins_Store(dstType), size, srcReg, varNum, offs);
+}
+
+/*****************************************************************************
+ *
+ * Machine independent way to move an immediate into a stack based local variable
+ */
+void CodeGen::instGen_Store_Imm_Into_Lcl(
+ var_types dstType, emitAttr sizeAttr, ssize_t imm, int varNum, int offs, regNumber regToUse)
+{
+#ifdef _TARGET_XARCH_
+#ifdef _TARGET_AMD64_
+ if ((EA_SIZE(sizeAttr) == EA_8BYTE) && (((int)imm != (ssize_t)imm) || EA_IS_CNS_RELOC(sizeAttr)))
+ {
+ assert(!"Invalid immediate for instGen_Store_Imm_Into_Lcl");
+ }
+ else
+#endif // _TARGET_AMD64_
+ {
+ getEmitter()->emitIns_S_I(ins_Store(dstType), sizeAttr, varNum, offs, (int)imm);
+ }
+#elif defined(_TARGET_ARMARCH_)
+ // Load imm into a register
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifndef LEGACY_BACKEND
+ regNumber immReg = regToUse;
+ assert(regToUse != REG_NA);
+#else // LEGACY_BACKEND
+ regNumber immReg = (regToUse == REG_NA) ? regSet.rsGrabReg(RBM_ALLINT) : regToUse;
+#endif // LEGACY_BACKEND
+ instGen_Set_Reg_To_Imm(sizeAttr, immReg, (ssize_t)imm);
+ instGen_Store_Reg_Into_Lcl(dstType, immReg, varNum, offs);
+ if (EA_IS_RELOC(sizeAttr))
+ {
+ regTracker.rsTrackRegTrash(immReg);
+ }
+#else // _TARGET_*
+#error "Unknown _TARGET_"
+#endif // _TARGET_*
+}
+
+/*****************************************************************************/
+/*****************************************************************************/
+/*****************************************************************************/
diff --git a/src/jit/instr.h b/src/jit/instr.h
new file mode 100644
index 0000000000..c38f8d2073
--- /dev/null
+++ b/src/jit/instr.h
@@ -0,0 +1,301 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+/*****************************************************************************/
+
+#ifndef _INSTR_H_
+#define _INSTR_H_
+/*****************************************************************************/
+
+#define BAD_CODE 0x0BADC0DE // better not match a real encoding!
+
+/*****************************************************************************/
+
+// clang-format off
+DECLARE_TYPED_ENUM(instruction,unsigned)
+{
+#if defined(_TARGET_XARCH_)
+ #define INST0(id, nm, fp, um, rf, wf, mr ) INS_##id,
+ #define INST1(id, nm, fp, um, rf, wf, mr ) INS_##id,
+ #define INST2(id, nm, fp, um, rf, wf, mr, mi ) INS_##id,
+ #define INST3(id, nm, fp, um, rf, wf, mr, mi, rm ) INS_##id,
+ #define INST4(id, nm, fp, um, rf, wf, mr, mi, rm, a4 ) INS_##id,
+ #define INST5(id, nm, fp, um, rf, wf, mr, mi, rm, a4, rr) INS_##id,
+ #include "instrs.h"
+
+#elif defined(_TARGET_ARM_)
+ #define INST1(id, nm, fp, ldst, fmt, e1 ) INS_##id,
+ #define INST2(id, nm, fp, ldst, fmt, e1, e2 ) INS_##id,
+ #define INST3(id, nm, fp, ldst, fmt, e1, e2, e3 ) INS_##id,
+ #define INST4(id, nm, fp, ldst, fmt, e1, e2, e3, e4 ) INS_##id,
+ #define INST5(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5 ) INS_##id,
+ #define INST6(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6 ) INS_##id,
+ #define INST8(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6, e7, e8 ) INS_##id,
+ #define INST9(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9) INS_##id,
+ #include "instrs.h"
+ #include "x86_instrs.h"
+
+#elif defined(_TARGET_ARM64_)
+ #define INST1(id, nm, fp, ldst, fmt, e1 ) INS_##id,
+ #define INST2(id, nm, fp, ldst, fmt, e1, e2 ) INS_##id,
+ #define INST3(id, nm, fp, ldst, fmt, e1, e2, e3 ) INS_##id,
+ #define INST4(id, nm, fp, ldst, fmt, e1, e2, e3, e4 ) INS_##id,
+ #define INST5(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5 ) INS_##id,
+ #define INST6(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6 ) INS_##id,
+ #define INST9(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9) INS_##id,
+ #include "instrs.h"
+
+ INS_lea, // Not a real instruction. It is used for load the address of stack locals
+
+#else
+#error Unsupported target architecture
+#endif
+
+ INS_none,
+ INS_count = INS_none
+}
+END_DECLARE_TYPED_ENUM(instruction,unsigned)
+
+/*****************************************************************************/
+
+enum insUpdateModes
+{
+ IUM_RD,
+ IUM_WR,
+ IUM_RW,
+};
+
+/*****************************************************************************/
+
+enum emitJumpKind
+{
+ EJ_NONE,
+
+ #define JMP_SMALL(en, rev, ins) EJ_##en,
+ #include "emitjmps.h"
+
+ EJ_COUNT
+};
+
+/*****************************************************************************/
+
+DECLARE_TYPED_ENUM(GCtype,unsigned)
+{
+ GCT_NONE,
+ GCT_GCREF,
+ GCT_BYREF
+}
+END_DECLARE_TYPED_ENUM(GCtype,unsigned)
+
+// TODO-Cleanup: Move 'insFlags' under _TARGET_ARM_
+DECLARE_TYPED_ENUM(insFlags,unsigned)
+{
+ INS_FLAGS_NOT_SET,
+ INS_FLAGS_SET,
+ INS_FLAGS_DONT_CARE
+};
+END_DECLARE_TYPED_ENUM(insFlags,unsigned)
+
+#if defined(_TARGET_ARM_)
+DECLARE_TYPED_ENUM(insOpts,unsigned)
+{
+ INS_OPTS_NONE,
+ INS_OPTS_LDST_PRE_DEC,
+ INS_OPTS_LDST_POST_INC,
+
+ INS_OPTS_RRX,
+ INS_OPTS_LSL,
+ INS_OPTS_LSR,
+ INS_OPTS_ASR,
+ INS_OPTS_ROR
+}
+END_DECLARE_TYPED_ENUM(insOpts,unsigned)
+#elif defined(_TARGET_ARM64_)
+DECLARE_TYPED_ENUM(insOpts,unsigned)
+{
+ INS_OPTS_NONE,
+
+ INS_OPTS_PRE_INDEX,
+ INS_OPTS_POST_INDEX,
+
+ INS_OPTS_LSL12,
+
+ INS_OPTS_LSL = 4,
+ INS_OPTS_LSR,
+ INS_OPTS_ASR,
+ INS_OPTS_ROR,
+
+ INS_OPTS_UXTB = 8,
+ INS_OPTS_UXTH,
+ INS_OPTS_UXTW,
+ INS_OPTS_UXTX,
+ INS_OPTS_SXTB,
+ INS_OPTS_SXTH,
+ INS_OPTS_SXTW,
+ INS_OPTS_SXTX,
+
+ INS_OPTS_8B = 16,
+ INS_OPTS_16B,
+ INS_OPTS_4H,
+ INS_OPTS_8H,
+ INS_OPTS_2S,
+ INS_OPTS_4S,
+ INS_OPTS_1D,
+ INS_OPTS_2D,
+
+ INS_OPTS_MSL, // Vector Immediate (shifting ones variant)
+
+ INS_OPTS_S_TO_4BYTE, // Single to INT32
+ INS_OPTS_D_TO_4BYTE, // Double to INT32
+
+ INS_OPTS_S_TO_8BYTE, // Single to INT64
+ INS_OPTS_D_TO_8BYTE, // Double to INT64
+
+ INS_OPTS_4BYTE_TO_S, // INT32 to Single
+ INS_OPTS_4BYTE_TO_D, // INT32 to Double
+
+ INS_OPTS_8BYTE_TO_S, // INT64 to Single
+ INS_OPTS_8BYTE_TO_D, // INT64 to Double
+
+ INS_OPTS_S_TO_D, // Single to Double
+ INS_OPTS_D_TO_S, // Double to Single
+
+ INS_OPTS_H_TO_S, // Half to Single
+ INS_OPTS_H_TO_D, // Half to Double
+
+ INS_OPTS_S_TO_H, // Single to Half
+ INS_OPTS_D_TO_H, // Double to Half
+}
+END_DECLARE_TYPED_ENUM(insOpts,unsigned)
+
+DECLARE_TYPED_ENUM(insCond,unsigned)
+{
+ INS_COND_EQ,
+ INS_COND_NE,
+ INS_COND_HS,
+ INS_COND_LO,
+
+ INS_COND_MI,
+ INS_COND_PL,
+ INS_COND_VS,
+ INS_COND_VC,
+
+ INS_COND_HI,
+ INS_COND_LS,
+ INS_COND_GE,
+ INS_COND_LT,
+
+ INS_COND_GT,
+ INS_COND_LE,
+}
+END_DECLARE_TYPED_ENUM(insCond,unsigned)
+
+DECLARE_TYPED_ENUM(insCflags,unsigned)
+{
+ INS_FLAGS_NONE,
+ INS_FLAGS_V,
+ INS_FLAGS_C,
+ INS_FLAGS_CV,
+
+ INS_FLAGS_Z,
+ INS_FLAGS_ZV,
+ INS_FLAGS_ZC,
+ INS_FLAGS_ZCV,
+
+ INS_FLAGS_N,
+ INS_FLAGS_NV,
+ INS_FLAGS_NC,
+ INS_FLAGS_NCV,
+
+ INS_FLAGS_NZ,
+ INS_FLAGS_NZV,
+ INS_FLAGS_NZC,
+ INS_FLAGS_NZCV,
+}
+END_DECLARE_TYPED_ENUM(insCFlags,unsigned)
+
+DECLARE_TYPED_ENUM(insBarrier,unsigned)
+{
+ INS_BARRIER_OSHLD = 1,
+ INS_BARRIER_OSHST = 2,
+ INS_BARRIER_OSH = 3,
+
+ INS_BARRIER_NSHLD = 5,
+ INS_BARRIER_NSHST = 6,
+ INS_BARRIER_NSH = 7,
+
+ INS_BARRIER_ISHLD = 9,
+ INS_BARRIER_ISHST = 10,
+ INS_BARRIER_ISH = 11,
+
+ INS_BARRIER_LD = 13,
+ INS_BARRIER_ST = 14,
+ INS_BARRIER_SY = 15,
+}
+END_DECLARE_TYPED_ENUM(insBarrier,unsigned)
+#endif
+
+#undef EA_UNKNOWN
+DECLARE_TYPED_ENUM(emitAttr,unsigned)
+{
+ EA_UNKNOWN = 0x000,
+ EA_1BYTE = 0x001,
+ EA_2BYTE = 0x002,
+ EA_4BYTE = 0x004,
+ EA_8BYTE = 0x008,
+ EA_16BYTE = 0x010,
+ EA_32BYTE = 0x020,
+ EA_SIZE_MASK = 0x03F,
+
+#ifdef _TARGET_64BIT_
+ EA_PTRSIZE = EA_8BYTE,
+#else
+ EA_PTRSIZE = EA_4BYTE,
+#endif
+
+ EA_OFFSET_FLG = 0x040,
+ EA_OFFSET = EA_OFFSET_FLG | EA_PTRSIZE, /* size == 0 */
+ EA_GCREF_FLG = 0x080,
+ EA_GCREF = EA_GCREF_FLG | EA_PTRSIZE, /* size == -1 */
+ EA_BYREF_FLG = 0x100,
+ EA_BYREF = EA_BYREF_FLG | EA_PTRSIZE, /* size == -2 */
+ EA_DSP_RELOC_FLG = 0x200,
+ EA_CNS_RELOC_FLG = 0x400,
+}
+END_DECLARE_TYPED_ENUM(emitAttr,unsigned)
+
+#define EA_ATTR(x) ((emitAttr)(x))
+#define EA_SIZE(x) ((emitAttr)(((unsigned)(x)) & EA_SIZE_MASK))
+#define EA_SIZE_IN_BYTES(x) ((UNATIVE_OFFSET)(EA_SIZE(x)))
+#define EA_SET_SIZE(x, sz) ((emitAttr)((((unsigned)(x)) & ~EA_SIZE_MASK) | sz))
+#define EA_SET_FLG(x, flg) ((emitAttr)(((unsigned)(x)) | flg))
+#define EA_4BYTE_DSP_RELOC (EA_SET_FLG(EA_4BYTE, EA_DSP_RELOC_FLG))
+#define EA_PTR_DSP_RELOC (EA_SET_FLG(EA_PTRSIZE, EA_DSP_RELOC_FLG))
+#define EA_HANDLE_CNS_RELOC (EA_SET_FLG(EA_PTRSIZE, EA_CNS_RELOC_FLG))
+#define EA_IS_OFFSET(x) ((((unsigned)(x)) & ((unsigned)EA_OFFSET_FLG)) != 0)
+#define EA_IS_GCREF(x) ((((unsigned)(x)) & ((unsigned)EA_GCREF_FLG)) != 0)
+#define EA_IS_BYREF(x) ((((unsigned)(x)) & ((unsigned)EA_BYREF_FLG)) != 0)
+#define EA_IS_GCREF_OR_BYREF(x) ((((unsigned)(x)) & ((unsigned)(EA_BYREF_FLG | EA_GCREF_FLG))) != 0)
+#define EA_IS_DSP_RELOC(x) ((((unsigned)(x)) & ((unsigned)EA_DSP_RELOC_FLG)) != 0)
+#define EA_IS_CNS_RELOC(x) ((((unsigned)(x)) & ((unsigned)EA_CNS_RELOC_FLG)) != 0)
+#define EA_IS_RELOC(x) (EA_IS_DSP_RELOC(x) || EA_IS_CNS_RELOC(x))
+#define EA_TYPE(x) ((emitAttr)(((unsigned)(x)) & ~(EA_OFFSET_FLG | EA_DSP_RELOC_FLG | EA_CNS_RELOC_FLG)))
+
+#define EmitSize(x) (EA_ATTR(genTypeSize(TypeGet(x))))
+
+// Enum specifying the instruction set for generating floating point or SIMD code.
+enum InstructionSet
+{
+#ifdef _TARGET_XARCH_
+ InstructionSet_SSE2,
+ InstructionSet_AVX,
+#elif defined(_TARGET_ARM_)
+ InstructionSet_NEON,
+#endif
+ InstructionSet_NONE
+};
+// clang-format on
+
+/*****************************************************************************/
+#endif //_INSTR_H_
+/*****************************************************************************/
diff --git a/src/jit/instrs.h b/src/jit/instrs.h
new file mode 100644
index 0000000000..2f5c14fc6f
--- /dev/null
+++ b/src/jit/instrs.h
@@ -0,0 +1,13 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+#if defined(_TARGET_XARCH_)
+#include "instrsxarch.h"
+#elif defined(_TARGET_ARM_)
+#include "instrsarm.h"
+#elif defined(_TARGET_ARM64_)
+#include "instrsarm64.h"
+#else
+#error Unsupported or unset target architecture
+#endif // target type
diff --git a/src/jit/instrsarm.h b/src/jit/instrsarm.h
new file mode 100644
index 0000000000..d1a77f8ebb
--- /dev/null
+++ b/src/jit/instrsarm.h
@@ -0,0 +1,557 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*****************************************************************************
+ * Arm Thumb1/Thumb2 instructions for JIT compiler
+ *
+ * id -- the enum name for the instruction
+ * nm -- textual name (for assembly dipslay)
+ * fp -- floating point instruction
+ * ld/st/cmp -- load/store/compare instruction
+ * fmt -- encoding format used by this instruction
+ * e1 -- encoding 1
+ * e2 -- encoding 2
+ * e3 -- encoding 3
+ * e4 -- encoding 4
+ * e5 -- encoding 5
+ * e6 -- encoding 6
+ * e7 -- encoding 7
+ * e8 -- encoding 8
+ * e9 -- encoding 9
+ *
+******************************************************************************/
+
+#if !defined(_TARGET_ARM_)
+#error Unexpected target type
+#endif
+
+#ifndef INST1
+#error INST1 must be defined before including this file.
+#endif
+#ifndef INST2
+#error INST2 must be defined before including this file.
+#endif
+#ifndef INST3
+#error INST3 must be defined before including this file.
+#endif
+#ifndef INST4
+#error INST4 must be defined before including this file.
+#endif
+#ifndef INST5
+#error INST5 must be defined before including this file.
+#endif
+#ifndef INST6
+#error INST6 must be defined before including this file.
+#endif
+// No INST7
+// #ifndef INST7
+// #error INST7 must be defined before including this file.
+// #endif
+#ifndef INST8
+#error INST8 must be defined before including this file.
+#endif
+#ifndef INST9
+#error INST9 must be defined before including this file.
+#endif
+
+/*****************************************************************************/
+/* The following is ARM-specific */
+/*****************************************************************************/
+
+// If you're adding a new instruction:
+// You need not only to fill in one of these macros describing the instruction, but also:
+// * If the instruction writes to more than one destination register, update the function
+// emitInsMayWriteMultipleRegs in emitArm.cpp.
+
+// clang-format off
+INST9(invalid, "INVALID", 0, 0, IF_NONE, BAD_CODE, BAD_CODE, BAD_CODE, BAD_CODE, BAD_CODE, BAD_CODE, BAD_CODE, BAD_CODE, BAD_CODE)
+
+// enum name FP LD/ST Rdn,Rm Rd,Rn,Rm Rdn,i8 Rd,Rn,i3 Rd,Rn,+i8<<i4 Rd,Rn,Rm{,sh} SP,i9 Rd,SP,i10 Rd,PC,i10
+// T1_D0 T1_H T1_J0 T1_G T2_L0 T2_C0 T1_F T1_J2 T1_J3
+INST9(add, "add", 0, 0, IF_EN9, 0x4400, 0x1800, 0x3000, 0x1C00, 0xF1000000, 0xEB000000, 0xB000, 0xA800, 0xA000)
+ // add Rdn,Rm T1_D0 01000100Dmmmmddd 4400 high
+ // adds Rd,Rn,Rm T1_H 0001100mmmnnnddd 1800 low
+ // adds Rdn,i8 T1_J0 00110dddiiiiiiii 3000 low imm(0-255)
+ // adds Rd,Rn,i3 T1_G 0001110iiinnnddd 1C00 low imm(0-7)
+ // add{s} Rd,Rn,Rm{,sh} T2_C0 11101011000Snnnn 0iiiddddiishmmmm EB00 0000
+ // add{s} Rd,Rn,+i8<<i4 T2_L0 11110i01000Snnnn 0iiiddddiiiiiiii F100 0000 imm(i8<<i4) *pref
+ // add SP,i9 T1_F 101100000iiiiiii B000 SP imm(0-508)
+ // add Rd,SP,i10 T1_J2 10101dddiiiiiiii A800 low imm(0-1020)
+ // add Rd,PC,i10 T1_J3 10100dddiiiiiiii A000 low imm(0-1020)
+INST9(sub, "sub", 0, 0, IF_EN9, BAD_CODE, 0x1A00, 0x3800, 0x1E00, 0xF1A00000, 0xEBA00000, 0xB080, BAD_CODE, BAD_CODE)
+ // subs Rd,Rn,Rm T1_H 0001101mmmnnnddd 1A00 low
+ // subs Rdn,i8 T1_J0 00111dddiiiiiiii 3800 low imm(0-255)
+ // subs Rd,Rn,i3 T1_G 0001111iiinnnddd 1E00 low imm(0-7)
+ // sub{s} Rd,Rn,+i8<<i4 T2_L0 11110i01101Snnnn 0iiiddddiiiiiiii F1A0 0000 imm(i8<<i4) *pref
+ // sub{s} Rd,Rn,Rm{,sh} T2_C0 11101011101Snnnn 0iiiddddiishmmmm EBA0 0000
+ // sub SP,i9 T1_F 101100001iiiiiii B080 SP imm(0-508) <
+
+// enum name FP LD/ST Rt,[Rn+Rm] Rt,[Rn+i7] Rt,[Rn+Rm,sh] Rt,[Rn+=i8] Rt,[Rn+i12] Rt,[PC+-i12] Rd,[SP+i10] Rd,[PC+i10]
+// T1_H T1_C T2_E0 T2_H0 T2_K1 T2_K4 T1_J2 T1_J3
+INST8(ldr, "ldr", 0,LD, IF_EN8, 0x5800, 0x6800, 0xF8500000, 0xF8500800, 0xF8D00000, 0xF85F0000, 0x9800, 0x4800)
+ // ldr Rt,[Rn+Rm] T1_H 0101100mmmnnnttt 5800 low
+ // ldr Rt,[Rn+i7] T1_C 01101iiiiinnnttt 6800 low imm(0-124)
+ // ldr Rt,[Rn+Rm{,sh}] T2_E0 111110000101nnnn tttt000000shmmmm F850 0000 sh=(0,1,2,3)
+ // ldr Rt,[Rn],+-i8{!} T2_H0 111110000101nnnn tttt1PUWiiiiiiii F850 0800 imm(0-255)
+ // ldr Rt,[Rn+i12] T2_K1 111110001101nnnn ttttiiiiiiiiiiii F8D0 0000 imm(0-4095)
+ // ldr Rt,[PC+-i12] T2_K4 11111000U1011111 ttttiiiiiiiiiiii F85F 0000 imm(+-4095)
+ // ldr Rt,[SP+i10] T1_J2 10011tttiiiiiiii 9800 low imm(0-1020)
+ // ldr Rt,[PC+i10] T1_J3 01001tttiiiiiiii 4800 low imm(0-1020)
+
+// enum name FP LD/ST Rt,[Rn+Rm] Rt,[Rn+i7] Rt,[Rn+Rm,sh] Rt,[Rn+=i8] Rt,[Rn+i12] Rt,[PC+-i12] or Rt,[SP+-i10]
+// T1_H T1_C T2_E0 T2_H0 T2_K1 T2_K4 or T1_J2
+INST6(str, "str", 0,ST, IF_EN6B, 0x5000, 0x6000, 0xF8400000, 0xF8400800, 0xF8C00000, 0x9000)
+ // str Rt,[Rn+Rm] T1_H 0101000mmmnnnttt 5000 low
+ // str Rt,[Rn+i7] T1_C 01100iiiiinnnttt 6000 low imm(0-124)
+ // str Rt,[Rn+Rm{,sh}] T2_E0 111110000100nnnn tttt000000shmmmm F840 0000 sh=(0,1,2,3)
+ // str Rt,[Rn],+-i8{!} T2_H0 111110000100nnnn tttt1PUWiiiiiiii F840 0800 imm(0-255)
+ // str Rt,[Rn+i12] T2_K1 111110001100nnnn ttttiiiiiiiiiiii F8C0 0000 imm(0-4095)
+ // str Rt,[SP+-i10] T1_J2 10010tttiiiiiiii 9000 low imm(0-1020)
+INST6(ldrb, "ldrb", 0,LD, IF_EN6A, 0x5C00, 0x7800, 0xF8100000, 0xF8100800, 0xF8900000, 0xF81F0000)
+ // ldrb Rt,[Rn+Rm] T1_H 0101110mmmnnnttt 5C00 low
+ // ldrb Rt,[Rn+i5] T1_C 01111iiiiinnnttt 7800 low imm(0-31)
+ // ldrb Rt,[Rn+Rm{,sh}] T2_E0 111110000001nnnn tttt000000shmmmm F810 0000 sh=(0,1,2,3)
+ // ldrb Rt,[Rn],+-i8{!} T2_H0 111110000001nnnn tttt1PUWiiiiiiii F810 0800 imm(0-255)
+ // ldrb Rt,[Rn+i12] T2_K1 111110001001nnnn ttttiiiiiiiiiiii F890 0000 imm(0-4095)
+ // ldrb Rt,[PC+i12] T2_K4 11111000U0011111 ttttiiiiiiiiiiii F81F 0000 imm(+-4095)
+INST6(strb, "strb", 0,ST, IF_EN6B, 0x5400, 0x7000, 0xF8000000, 0xF8000800, 0xF8800000, BAD_CODE)
+ // strb Rt,[Rn+Rm] T1_H 0101010mmmnnnttt 5400 low
+ // strb Rt,[Rn+i5] T1_C 01110iiiiinnnttt 7000 low imm(0-31)
+ // strb Rt,[Rn+Rm{,sh}] T2_E0 111110000000nnnn tttt000000shmmmm F800 0000 sh=(0,1,2,3)
+ // strb Rt,[Rn],+-i8{!} T2_H0 111110000000nnnn tttt1PUWiiiiiiii F800 0800 imm(0-255)
+ // strb Rt,[Rn+i12] T2_K1 111110001000nnnn ttttiiiiiiiiiiii F880 0000 imm(0-4095)
+INST6(ldrh, "ldrh", 0,LD, IF_EN6A, 0x5A00, 0x8800, 0xF8300000, 0xF8300800, 0xF8B00000, 0xF83F0000)
+ // ldrh Rt,[Rn+Rm] T1_H 0101101mmmnnnttt 5A00 low
+ // ldrh Rt,[Rn+i6] T1_C 10001iiiiinnnttt 8800 low imm(0-62)
+ // ldrh Rt,[Rn+Rm{,sh}] T2_E0 111110000011nnnn tttt000000shmmmm F830 0000 sh=(0,1,2,3)
+ // ldrh Rt,[Rn],+-i8{!} T2_H0 111110000011nnnn tttt1PUWiiiiiiii F830 0800 imm(0-255)
+ // ldrh Rt,[Rn+i12] T2_K1 111110001011nnnn ttttiiiiiiiiiiii F8B0 0000 imm(0-4095)
+ // ldrh Rt,[PC+i12] T2_K4 11111000U0111111 ttttiiiiiiiiiiii F83F 0000 imm(+-4095)
+INST6(strh, "strh", 0,ST, IF_EN6B, 0x5200, 0x8000, 0xF8200000, 0xF8200800, 0xF8a00000, BAD_CODE)
+ // strh Rt,[Rn+Rm] T1_H 0101001mmmnnnttt 5200 low
+ // strh Rt,[Rn+i6] T1_C 10000iiiiinnnttt 8000 low imm(0-62)
+ // strh Rt,[Rn+Rm{,sh}] T2_E0 111110000010nnnn tttt000000shmmmm F820 0000 sh=(0,1,2,3)
+ // strh Rt,[Rn],+-i8{!} T2_H0 111110000010nnnn tttt1PUWiiiiiiii F820 0800 imm(0-255)
+ // strh Rt,[Rn+i12] T2_K1 111110001010nnnn ttttiiiiiiiiiiii F8A0 0000 imm(0-4095)
+INST6(ldrsb, "ldrsb", 0,LD, IF_EN6A, 0x5600, BAD_CODE, 0xF9100000, 0xF9100800, 0xF9900000, 0xF91F0000)
+ // ldrsb Rt,[Rn+Rm] T1_H 0101011mmmnnnttt 5600 low
+ // ldrsb Rt,[Rn+Rm{,sh}] T2_E0 111110010001nnnn tttt000000shmmmm F910 0000 sh=(0,1,2,3)
+ // ldrsb Rt,[Rn],+-i8{!} T2_H0 111110010001nnnn tttt1PUWiiiiiiii F910 0800 imm(0-255)
+ // ldrsb Rt,[Rn+i12] T2_K1 111110011001nnnn ttttiiiiiiiiiiii F990 0000 imm(0-4095)
+ // ldrsb Rt,[PC+i12] T2_K4 11111001U0011111 ttttiiiiiiiiiiii F91F 0000 imm(+-4095)
+INST6(ldrsh, "ldrsh", 0,LD, IF_EN6A, 0x5E00, BAD_CODE, 0xF9300000, 0xF9300800, 0xF9B00000, 0xF93F0000)
+ // ldrsh Rt,[Rn+Rm] T1_H 0101111mmmnnnttt 5E00 low
+ // ldrsh Rt,[Rn+Rm{,sh}] T2_E0 111110010011nnnn tttt000000shmmmm F930 0000 sh=(0,1,2,3)
+ // ldrsh Rt,[Rn],+-i8{!} T2_H0 111110010011nnnn tttt1PUWiiiiiiii F930 0800 imm(0-255)
+ // ldrsh Rt,[Rn+i12] T2_K1 111110011011nnnn ttttiiiiiiiiiiii F9B0 0000 imm(0-4095)
+ // ldrsh Rt,[PC+i12] T2_K4 11111001U0111111 ttttiiiiiiiiiiii F93F 0000 imm(+-4095)
+
+// enum name FP LD/ST Rd, Rm Rd,Rm Rd,i8 Rd,+i8<<i4 S / Rn,Rm{,sh}
+// T1_E T1_D0 T1_J0 T2_L1/L2 T2_C3/C8
+INST5(mov, "mov", 0, 0, IF_EN5A, 0x0000, 0x4600, 0x2000, 0xF04F0000, 0xEA5F0000)
+ // movs Rd,Rm T1_E 0000000000mmmddd 0000 low
+ // mov Rd,Rm T1_D0 01000110Dmmmmddd 4600 high
+ // movs Rd,i8 T1_J0 00100dddiiiiiiii 2000 low imm(0-255)
+ // mov{s} Rd,+i8<<i4 T2_L1 11110i00010S1111 0iiiddddiiiiiiii F04F 0000 imm(i8<<i4)
+ // mov{s} Rd,Rm T2_C3 1110101001011111 0000dddd0000mmmm EA5F 0000
+INST5(cmp, "cmp", 0,CMP,IF_EN5B, 0x4280, 0x4500, 0x2800, 0xF1B00F00, 0xEBB00F00)
+ // cmp Rn,Rm T1_E 0100001010mmmnnn 4280 low
+ // cmp Rn,Rm T1_D0 01000101Nmmmmnnn 4500 high
+ // cmp Rn,i8 T1_J0 00101nnniiiiiiii 2800 low imm(0-255)
+ // cmp Rn,+i8<<i4 T2_L2 11110i011011nnnn 0iii1111iiiiiiii F1B0 0F00 imm(i8<<i4)
+ // cmp Rn,Rm{,sh} T2_C8 111010111011nnnn 0iii1111iishmmmm EBB0 0F00
+
+// enum name FP LD/ST Rdn, Rn Rd,Rn,i5 Rd,Rn,Rm Rd,Rn,i5
+// T1_E T2_C T2_C4 T2_C2
+INST4(lsl, "lsl", 0, 0, IF_EN4A, 0x4080, 0x0000, 0xFA00F000, 0xEA4F0000)
+ // lsls Rdn,Rm T1_E 0100000010mmmddd 4080 low
+ // lsls Rd,Rm,i5 T1_C 00000iiiiimmmddd 0000 low imm(0-31)
+ // lsl{s} Rd,Rn,Rm T2_C4 11111010000Snnnn 1111dddd0000mmmm FA00 F000
+ // lsl{s} Rd,Rm,i5 T2_C2 11101010010S1111 0iiiddddii00mmmm EA4F 0000 imm(0-31)
+INST4(lsr, "lsr", 0, 0, IF_EN4A, 0x40C0, 0x0800, 0xFA20F000, 0xEA4F0010)
+ // lsrs Rdn,Rm T1_E 0100000011mmmddd 40C0 low
+ // lsrs Rd,Rm,i5 T1_C 00001iiiiimmmddd 0800 low imm(0-31)
+ // lsr{s} Rd,Rn,Rm T2_C4 11111010001Snnnn 1111dddd0000mmmm FA20 F000
+ // lsr{s} Rd,Rm,i5 T2_C2 11101010010S1111 0iiiddddii01mmmm EA4F 0010 imm(0-31)
+INST4(asr, "asr", 0, 0, IF_EN4A, 0x4100, 0x1000, 0xFA40F000, 0xEA4F0020)
+ // asrs Rdn,Rm T1_E 0100000100mmmddd 4100 low shift by Rm
+ // asrs Rd,Rm,i5 T1_C 00010iiiiimmmddd 1000 low imm(0-31)
+ // asr{s} Rd,Rn,Rm T2_C4 11111010010Snnnn 1111dddd0000mmmm FA40 F000
+ // asr{s} Rd,Rm,i5 T2_C2 11101010010S1111 0iiiddddii10mmmm EA4F 0020 imm(0-31)
+INST4(ror, "ror", 0, 0, IF_EN4A, 0x41C0, BAD_CODE, 0xFA60F000, 0xEA4F0030)
+ // rors Rdn,Rm T1_E 0100000111mmmddd 41C0 low
+ // ror{s} Rd,Rn,Rm T2_C4 11111010011Snnnn 1111dddd0000mmmm FA60 F000
+ // ror{s} Rd,Rm,i5 T2_C2 11101010010S1111 0iiiddddii11mmmm EA4F 0030 imm(0-31)
+
+// enum name FP LD/ST Rdn, Rn Rd,Rn,i5 Rd,Rn,Rm Rd,Rn,i5
+// T2_K2 T2_H2 T2_C7 T2_K3
+INST4(pld, "pld", 0,LD, IF_EN4B, 0xF890F000, 0xF810FC00, 0xF810F000, 0xF81FF000) // Cache Prefetch Data for Read
+ // pld [Rn+i12] T2_K2 111110001001nnnn 1111iiiiiiiiiiii F890 F000 imm(0-4095)
+ // pld [Rn-i8] T2_H2 111110000001nnnn 11111100iiiiiiii F810 FC00 imm(0-255)
+ // pld [Rn+Rm{,sh}] T2_C7 111110000001nnnn 1111000000shmmmm F810 F000 sh=(0,1,2,3)
+ // pld [PC+-i12] T2_K3 11111001U0011111 1111iiiiiiiiiiii F81F F000 imm(+-4095)
+INST4(pldw, "pldw", 0,LD, IF_EN4B, 0xF8B0F000, 0xF830FC00, 0xF830F000, BAD_CODE) // Cache Prefetch Data for Write
+ // pldw [Rn+i12] T2_K2 111110001011nnnn 1111iiiiiiiiiiii F8B0 F000 imm(0-4095)
+ // pldw [Rn-i8] T2_H2 111110000011nnnn 11111100iiiiiiii F830 FC00 imm(0-255)
+ // pldw [Rn+Rm{,sh}] T2_C7 111110000011nnnn 1111000000shmmmm F830 F000 sh=(0,1,2,3)
+#ifdef FEATURE_PLI_INSTRUCTION
+// NOTE: The PLI instruction had an errata in early Krait implementations, so even though it's unlikely we would ever generate it, it is
+// #ifdef'ed out to prevent its use.
+INST4(pli, "pli", 0,LD, IF_EN4B, 0xF990F000, 0xF910FC00, 0xF910F000, 0xF91FF000) // Cache Prefetch Instructions for Execute
+ // pli [Rn+i12] T2_K2 111110011001nnnn 1111iiiiiiiiiiii F990 F000 imm(0-4095)
+ // pli [Rn-i8] T2_H2 111110010001nnnn 11111100iiiiiiii F910 FC00 imm(0-255)
+ // pli [Rn+Rm{,sh}] T2_C7 111110010001nnnn 1111000000shmmmm F910 F000 sh=(0,1,2,3)
+ // pli [PC+-i12] T2_K3 11111001U0011111 1111iiiiiiiiiiii F91F F000 imm(+-4095)
+#endif // FEATURE_PLI_INSTRUCTION
+
+// enum name FP LD/ST Rdn, Rm Rd,Rn,Rm,sh Rd,Rn,i12
+// T1_E T2_C0 T2_L0
+INST3(and, "and", 0, 0, IF_EN3A, 0x4000, 0xEA000000, 0xF0000000)
+ // ands Rdn,Rm T1_E 0100000000mmmddd 4000 low
+ // and{s} Rd,Rn,Rm{,sh} T2_C0 11101010000Snnnn 0iiiddddiishmmmm EA00 0000
+ // and{s} Rd,Rn,i12 T2_L0 11110i00000Snnnn 0iiiddddiiiiiiii F000 0000 imm(i8<<i4)
+INST3(eor, "eor", 0, 0, IF_EN3A, 0x4040, 0xEA800000, 0xF0800000)
+ // eors Rd,Rm T1_E 0100000001mmmddd 4040 low
+ // eor{s} Rd,Rn,Rm{,sh} T2_C0 11101010100Snnnn 0iiiddddiishmmmm EA80 0000
+ // eor{s} Rd,Rn,i12 T2_L0 11110i00100Snnnn 0iiiddddiiiiiiii F080 0000 imm(i8<<i4)
+INST3(orr, "orr", 0, 0, IF_EN3A, 0x4300, 0xEA400000, 0xF0400000)
+ // orrs Rdn,Rm T1_E 0100001100mmmddd 4300 low
+ // orr{s} Rd,Rn,Rm{,sh} T2_C0 11101010010Snnnn 0iiiddddiishmmmm EA40 0000
+ // orr{s} Rd,Rn,i12 T2_L0 11110i00010Snnnn 0iiiddddiiiiiiii F040 0000 imm(i8<<i4)
+INST3(orn, "orn", 0, 0, IF_EN3A, BAD_CODE, 0xEA600000, 0xF0600000)
+ // orn{s} Rd,Rn,Rm{,sh} T2_C0 11101010011Snnnn 0iiiddddiishmmmm EA60 0000
+ // orn{s} Rd,Rn,i12 T2_L0 11110i00011Snnnn 0iiiddddiiiiiiii F060 0000 imm(i8<<i4)
+INST3(bic, "bic", 0, 0, IF_EN3A, 0x4380, 0xEA200000, 0xF0200000)
+ // bics Rdn,Rm T1_E 0100001110mmmddd 4380 low
+ // bic{s} Rd,Rn,Rm{,sh} T2_C0 11101010001Snnnn 0iiiddddiishmmmm EA20 0000
+ // bic{s} Rd,Rn,i12 T2_L0 11110i00001Snnnn 0iiiddddiiiiiiii F020 0000 imm(i8<<i4)
+INST3(adc, "adc", 0, 0, IF_EN3A, 0x4140, 0xEB400000, 0xF1400000)
+ // adcs Rdn,Rn T1_E 0100000101mmmddd 4140 low
+ // adcs Rd,Rn,Rm{,sh} T2_C0 11101011010Snnnn 0iiiddddiishmmmm EB40 0000
+ // adcs Rd,Rn,i12 T2_L0 11110i01010Snnnn 0iiiddddiiiiiiii F140 0000 imm(0-4095)
+INST3(sbc, "sbc", 0, 0, IF_EN3A, 0x4180, 0xEB600000, 0xF1600000)
+ // sbcs Rd,Rm T1_E 0100000110mmmddd 4180 low
+ // sbc{s} Rd,Rn,Rm{,sh} T2_C0 11101011011Snnnn 0iiiddddiishmmmm EB60 0000
+ // sbc{s} Rd,Rn,+i8<<i4 T2_L0 11110i01011Snnnn 0iiiddddiiiiiiii F160 0000 imm(i8<<i4)
+INST3(rsb, "rsb", 0, 0, IF_EN3A, 0x4240, 0xEBC00000, 0xF1C00000)
+ // rsbs Rd,Rn,#0 T1_E 0100001001nnnddd 4240 low (Note: x86 NEG instr)
+ // rsb{s} Rd,Rn,Rm{,sh} T2_C0 11101011110Snnnn 0iiiddddiishmmmm EBC0 0000
+ // rsb{s} Rd,Rn,+i8<<i4 T2_L0 11110i01110Snnnn 0iiiddddiiiiiiii F1C0 0000 imm(i8<<i4)
+
+// enum name FP LD/ST Rn,Rm Rn,Rm,sh Rn,i12
+// T1_E T2_C8 T2_L2
+INST3(tst, "tst", 0,CMP,IF_EN3B, 0x4200, 0xEA100F00, 0xF0100F00)
+ // tst Rn,Rm T1_E 0100001000mmmnnn 4200 low
+ // tst Rn,Rm{,sh} T2_C8 111010100001nnnn 0iii1111iishmmmm EA10 0F00
+ // tst Rn,+i8<<i4 T2_L2 11110i000001nnnn 0iii1111iiiiiiii F010 0F00 imm(i8<<i4)
+INST3(teq, "teq", 0,CMP,IF_EN3B, BAD_CODE, 0xEA900F00, 0xF0900F00)
+ // teq Rn,Rm{,sh} T2_C8 111010101001nnnn 0iii1111iishmmmm EA90 0F00
+ // teq Rn,+i8<<i4 T2_L2 11110i001001nnnn 0iii1111iiiiiiii F090 0F00 imm(i8<<i4)
+INST3(cmn, "cmn", 0,CMP,IF_EN3B, 0x42C0, 0xEB100F00, 0xF1100F00)
+ // cmn Rn,Rn T1_E 0100001011mmmnnn 42C0 low
+ // cmn Rn,Rm{,sh} T2_C8 111010110001nnnn 0iii1111iishmmmm EB10 0F00
+ // cmn Rn,+i8<<i4 T2_L2 11110i010001nnnn 0iii1111iiiiiiii F110 0F00 imm(i8<<i4)
+
+// enum name FP LD/ST Rd,Rm Rd,Rm,sh Rd,Rn,i12
+// T1_E T2_C1 T2_L1
+INST3(mvn, "mvn", 0, 0, IF_EN3C, 0x43C0, 0xEA6F0000, 0xF06F0000)
+ // mvns Rd,Rm T1_E 0100001111mmmddd 43C0 low
+ // mvn{s} Rd,Rm{,sh} T2_C1 11101010011S1111 0iiiddddiishmmmm EA6F 0000
+ // mvn{s} Rd,+i8<<i4 T2_L1 11110i00011S1111 0iiiddddiiiiiiii F06F 0000 imm(i8<<i4)
+
+// enum name FP LD/ST SP,reg8 rT reg,reg16
+// T1_L1 T2_E2 T2_I1
+INST3(push, "push", 0, 0, IF_EN3D, 0xB400, 0xF84D0D04, 0xE92D0000)
+ // push {LR,}<reglist8> T1_L1 1011010Mrrrrrrrr B400 low
+ // push rT T2_E2 1111100001001101 tttt110100000100 F84D 0D04
+ // push <reglist16> T2_I1 1110100100101101 0M0rrrrrrrrrrrrr E92D 0000
+INST3(pop, "pop", 0, 0, IF_EN3D, 0xBC00, 0xF85D0B04, 0xE8BD0000)
+ // pop {PC,}<reglist8> T1_L1 1011110Prrrrrrrr BC00 low
+ // pop rT T2_E2 1111100001011101 tttt101100000100 F85D 0B04
+ // pop <reglist16> T2_I1 1110100010111101 PM0rrrrrrrrrrrrr E8BD 0000
+
+// enum name FP LD/ST Rd,i16 Rd,i16 Rd,i16
+// T2_N T2_N1 T2_N2
+INST3(movt, "movt", 0, 0, IF_EN3E, 0xF2C00000,0xF2C00000,0xF2C00000)
+ // Rd,i16 T2_N 11110i101100iiii 0iiiddddiiiiiiii F2C0 0000 imm(0-65535)
+ // Rd,i16 T2_N1 11110i101100iiii 0iiiddddiiiiiiii F2C0 0000 imm(0-65535)
+ // Rd,i16 T2_N2 11110i101100iiii 0iiiddddiiiiiiii F2C0 0000 imm(0-65535)
+INST3(movw, "movw", 0, 0, IF_EN3E, 0xF2400000,0xF2400000,0xF2400000)
+ // Rd,+i16 T2_N 11110i100100iiii 0iiiddddiiiiiiii F240 0000 imm(0-65535)
+ // Rd,+i16 T2_N1 11110i100100iiii 0iiiddddiiiiiiii F240 0000 imm(0-65535)
+ // Rd,+i16 T2_N2 11110i100100iiii 0iiiddddiiiiiiii F240 0000 imm(0-65535)
+
+// enum name FP LD/ST PC+-imm11 PC+-imm24 PC+-imm24
+// T1_M T2_J2 T2_J3
+INST3(b, "b", 0, 0, IF_EN3F, 0xE000, 0xF0009000, 0xF0009000)
+ // b PC+-i11 T1_M 11100iiiiiiiiiii E000 imm(-2048..2046)
+ // b PC+-i24 T2_J2 11110Siiiiiiiiii 10j1jiiiiiiiiiii F000 9000 imm(-16777216..16777214) (intra-procedure offset)
+ // b PC+-i24 T2_J3 11110Siiiiiiiiii 10j1jiiiiiiiiiii F000 9000 imm(-16777216..16777214) (inter-procedure offset)
+
+
+// enum name FP LD/ST PC+-imm8 PC+-imm20
+// T1_K T2_J1
+INST2(beq, "beq", 0, 0, IF_EN2A, 0xD000, 0xF0008000)
+ // beq PC+-i8 T1_K 11010000iiiiiiii D000 imm(-256..254)
+ // beq PC+-i20 T2_J1 11110S0000iiiiii 10j0jiiiiiiiiiii F000 8000 imm(-1048576..1048574)
+INST2(bne, "bne", 0, 0, IF_EN2A, 0xD100, 0xF0408000)
+ // bne PC+-i8 T1_K 11010001iiiiiiii D000 imm(-256..254)
+ // bne PC+-i20 T2_J1 11110S0001iiiiii 10j0jiiiiiiiiiii F000 8000 imm(-1048576..1048574)
+INST2(bhs, "bhs", 0, 0, IF_EN2A, 0xD200, 0xF0808000)
+ // bhs PC+-i8 T1_K 11010010iiiiiiii D000 imm(-256..254)
+ // bhs PC+-i20 T2_J1 11110S0010iiiiii 10j0jiiiiiiiiiii F000 8000 imm(-1048576..1048574)
+INST2(blo, "blo", 0, 0, IF_EN2A, 0xD300, 0xF0C08000)
+ // blo PC+-i8 T1_K 11010011iiiiiiii D000 imm(-256..254)
+ // blo PC+-i20 T2_J1 11110S0011iiiiii 10j0jiiiiiiiiiii F000 8000 imm(-1048576..1048574)
+INST2(bmi, "bmi", 0, 0, IF_EN2A, 0xD400, 0xF1008000)
+ // bmi PC+-i8 T1_K 11010100iiiiiiii D000 imm(-256..254)
+ // bmi PC+-i20 T2_J1 11110S0100iiiiii 10j0jiiiiiiiiiii F000 8000 imm(-1048576..1048574)
+INST2(bpl, "bpl", 0, 0, IF_EN2A, 0xD500, 0xF1408000)
+ // bpl PC+-i8 T1_K 11010101iiiiiiii D000 imm(-256..254)
+ // bpl PC+-i20 T2_J1 11110S0101iiiiii 10j0jiiiiiiiiiii F000 8000 imm(-1048576..1048574)
+INST2(bvs, "bvs", 0, 0, IF_EN2A, 0xD600, 0xF1808000)
+ // bvs PC+-i8 T1_K 11010110iiiiiiii D000 imm(-256..254)
+ // bvs PC+-i20 T2_J1 11110S0110iiiiii 10j0jiiiiiiiiiii F000 8000 imm(-1048576..1048574)
+INST2(bvc, "bvc", 0, 0, IF_EN2A, 0xD700, 0xF1C08000)
+ // bvc PC+-i8 T1_K 11010111iiiiiiii D000 imm(-256..254)
+ // bvc PC+-i20 T2_J1 11110S0111iiiiii 10j0jiiiiiiiiiii F000 8000 imm(-1048576..1048574)
+INST2(bhi, "bhi", 0, 0, IF_EN2A, 0xD800, 0xF2008000)
+ // bhi PC+-i8 T1_K 11011000iiiiiiii D000 imm(-256..254)
+ // bhi PC+-i20 T2_J1 11110S1000iiiiii 10j0jiiiiiiiiiii F000 8000 imm(-1048576..1048574)
+INST2(bls, "bls", 0, 0, IF_EN2A, 0xD900, 0xF2408000)
+ // bls PC+-i8 T1_K 11011001iiiiiiii D000 imm(-256..254)
+ // bls PC+-i20 T2_J1 11110S1001iiiiii 10j0jiiiiiiiiiii F000 8000 imm(-1048576..1048574)
+INST2(bge, "bge", 0, 0, IF_EN2A, 0xDA00, 0xF2808000)
+ // bge PC+-i8 T1_K 11011010iiiiiiii D000 imm(-256..254)
+ // bge PC+-i20 T2_J1 11110S1010iiiiii 10j0jiiiiiiiiiii F000 8000 imm(-1048576..1048574)
+INST2(blt, "blt", 0, 0, IF_EN2A, 0xDB00, 0xF2C08000)
+ // blt PC+-i8 T1_K 11011011iiiiiiii D000 imm(-256..254)
+ // blt PC+-i20 T2_J1 11110S1011iiiiii 10j0jiiiiiiiiiii F000 8000 imm(-1048576..1048574)
+INST2(bgt, "bgt", 0, 0, IF_EN2A, 0xDC00, 0xF3008000)
+ // bgt PC+-i8 T1_K 11011100iiiiiiii D000 imm(-256..254)
+ // bgt PC+-i20 T2_J1 11110S1100iiiiii 10j0jiiiiiiiiiii F000 8000 imm(-1048576..1048574)
+INST2(ble, "ble", 0, 0, IF_EN2A, 0xDD00, 0xF3408000)
+ // ble PC+-i8 T1_K 11011101iiiiiiii D000 imm(-256..254)
+ // ble PC+-i20 T2_J1 11110S1101iiiiii 10j0jiiiiiiiiiii F000 8000 imm(-1048576..1048574)
+
+// enum name FP LD/ST Rm Rm
+// T1_D1 T1_D2
+INST2(bx, "bx", 0, 0, IF_EN2B, 0x4700, 0x4700)
+ // bx Rm T1_D1 010001110mmmm000 4700 high
+ // bx Rm T1_D2 010001110mmmm000 4700 high
+
+// enum name FP LD/ST rM PC+-imm24
+// T1_D2 T2_J3
+INST2(blx, "blx", 0, 0, IF_EN2C, 0x4780, 0xF000C000)
+ // blx Rm T1_D2 010001111mmmm000 4780 high
+ // blx PC+-i24 T2_J3 11110Siiiiiiiiii 11j0jiiiiiiiiii0 F000 C000 imm(-16777216..16777214)
+
+// enum name FP LD/ST Rn,<reg8> Rn,<reg16>
+// T1_J1 T2_I0
+INST2(ldm, "ldm", 0,LD, IF_EN2D, 0xC800, 0xE8900000)
+ // ldm Rn,<reglist8> T1_J1 11001nnnrrrrrrrr C800 low
+ // ldm Rn{!},<reglist16> T2_I0 1110100010W1nnnn rr0rrrrrrrrrrrrr E890 0000
+INST2(stm, "stm", 0,ST, IF_EN2D, 0xC000, 0xE8800000)
+ // stm Rn!,<reglist8> T1_J1 11000nnnrrrrrrrr C000 low
+ // stm Rn{!},<reglist16> T2_I0 1110100010W0nnnn 0r0rrrrrrrrrrrrr E880 0000
+
+// enum name FP LD/ST Rn,Rm Rn,Rm,{sb}
+// T1_E T2_C6
+INST2(sxtb, "sxtb", 0, 0, IF_EN2E, 0xB240, 0xFA4FF080)
+ // Rd,Rm T1_E 1011001001mmmddd B240 low
+ // Rd,Rm{,sb} T2_C6 1111101001001111 1111dddd10sbmmmm FA4F F080
+INST2(sxth, "sxth", 0, 0, IF_EN2E, 0xB200, 0xFA0FF080)
+ // Rd,Rm T1_E 1011001000mmmddd B200 low
+ // Rd,Rm{,sb} T2_C6 1111101000001111 1111dddd10sbmmmm FA0F F080
+INST2(uxtb, "uxtb", 0, 0, IF_EN2E, 0xB2C0, 0xFA5FF080)
+ // Rd,Rm T1_E 1011001011mmmddd B2C0 low
+ // Rd,Rm{,sb} T2_C6 1111101001011111 1111dddd10sbmmmm FA5F F080
+INST2(uxth, "uxth", 0, 0, IF_EN2E, 0xB280, 0xFA1FF080)
+ // Rd,Rm T1_E 1011001010mmmddd B280 low
+ // Rd,Rm T2_C6 1111101000011111 1111dddd10sbmmmm FA1F F080
+
+// enum name FP LD/ST Rdn,Rm Rd,Rn,Rm
+// T1_E T2_C5
+INST2(mul, "mul", 0, 0, IF_EN2F, 0x4340, 0xFB00F000)
+ // Rd,Rm T1_E 0100001101nnnddd 4340 low
+ // Rd,Rn,Rm T2_C5 111110110000nnnn 1111dddd0000mmmm FB00 F000
+
+// enum name FP LD/ST Rd,PC,i10 Rd,PC,+-i12
+// T1_J3 T2_M1
+INST2(adr, "adr", 0, 0, IF_EN2G, 0xA000, 0xF20F0000)
+ // Rd,PC+i10 T1_J3 10100dddiiiiiiii A000 low imm(0-1020)
+ // Rd,PC+-i12 T2_M1 11110i10U0U01111 0iiiddddiiiiiiii F20F 0000 imm(+-4095)
+
+INST1(addw, "addw", 0, 0, IF_T2_M0, 0xF2000000)
+ // Rd,Rn,i12 T2_M0 11110i100000nnnn 0iiiddddiiiiiiii F200 0000 imm(0-4095)
+INST1(bfc, "bfc", 0, 0, IF_T2_D1, 0xF36F0000)
+ // Rd,#b,#w T2_D1 1111001101101111 0iiiddddii0wwwww F36F 0000 imm(0-31),imm(0-31)
+INST1(bfi, "bfi", 0, 0, IF_T2_D0, 0xF3600000)
+ // Rd,Rn,#b,#w T2_D0 111100110110nnnn 0iiiddddii0wwwww F360 0000 imm(0-31),imm(0-31)
+INST1(bl, "bl", 0, 0, IF_T2_J3, 0xF000D000)
+ // PC+-i24 T2_J3 11110Siiiiiiiiii 11j1jiiiiiiiiiii F000 D000 imm(-16777216..16777214)
+INST1(bkpt, "bkpt", 0, 0, IF_T1_A, 0xDEFE)
+ // T1_A 1101111011111110 DEFE // Windows uses this
+ // i8 T1_L0 10111110iiiiiiii BE00 imm(0-255)
+INST1(cbnz, "cbnz", 0, 0, IF_T1_I, 0xB900)
+ // Rn,PC+i7 T1_I 101110i1iiiiinnn B900 low imm(0-126)
+INST1(cbz, "cbz", 0, 0, IF_T1_I, 0xB100)
+ // Rn,PC+i7 T1_I 101100i1iiiiinnn B100 low imm(0-126)
+INST1(clz, "clz", 0, 0, IF_T2_C10, 0xFAB0F000)
+ // Rd,Rm T2_C10 111110101011mmmm 1111dddd0000mmmm FAB0 F000
+INST1(dmb, "dmb", 0, 0, IF_T2_B, 0xF3BF8F50)
+ // #i4 T2_B 1111001110111111 100011110101iiii F3BF 8F50 imm(0-15)
+INST1(ism, "ism", 0, 0, IF_T2_B, 0xF3BF8F60)
+ // #i4 T2_B 1111001110111111 100011110110iiii F3BF 8F60 imm(0-15)
+INST1(ldmdb, "ldmdb", 0,LD, IF_T2_I0, 0xE9100000)
+ // Rn{!},<reglist16> T2_I0 1110100100W1nnnn rr0rrrrrrrrrrrrr E910 0000
+INST1(ldrd, "ldrd", 0,LD, IF_T2_G0, 0xE8500000)
+ // Rt,RT,[Rn],+-i8{!}T2_G0 1110100PU1W1nnnn ttttTTTTiiiiiiii E850 0000
+INST1(ldrex, "ldrex", 0,LD, IF_T2_H1, 0xE8500F00)
+ // Rt,[Rn+i8] T2_H1 111010000101nnnn tttt1111iiiiiiii E850 0F00 imm(0-1020)
+INST1(ldrexb, "ldrexb", 0,LD, IF_T2_E1, 0xE8D00F4F)
+ // Rt,[Rn] T2_E1 111010001101nnnn tttt111101001111 E8D0 0F4F
+INST1(ldrexd, "ldrexd", 0,LD, IF_T2_G1, 0xE8D0007F)
+ // Rt,RT,[Rn] T2_G1 111010001101nnnn ttttTTTT01111111 E8D0 007F
+INST1(ldrexh, "ldrexh", 0,LD, IF_T2_E1, 0xE8D00F5F)
+ // Rt,[Rn] T2_E1 111010001101nnnn tttt111101011111 E8D0 0F5F
+INST1(mla, "mla", 0, 0, IF_T2_F2, 0xFB000000)
+ // Rd,Rn,Rm,Ra T2_F2 111110110000nnnn aaaadddd0000mmmm FB00 0000
+INST1(mls, "mls", 0, 0, IF_T2_F2, 0xFB000010)
+ // Rd,Rn,Rm,Ra T2_F2 111110110000nnnn aaaadddd0001mmmm FB00 0010
+INST1(nop, "nop", 0, 0, IF_T1_A, 0xBF00)
+ // T1_A 1011111100000000 BF00
+INST1(nopw, "nop.w", 0, 0, IF_T2_A, 0xF3AF8000)
+ // T2_A 1111001110101111 1000000000000000 F3AF 8000
+INST1(sbfx, "sbfx", 0, 0, IF_T2_D0, 0xF3400000)
+ // Rd,Rn,#b,#w T2_D0 111100110100nnnn 0iiiddddii0wwwww F340 0000 imm(0-31),imm(0-31)
+INST1(sdiv, "sdiv", 0, 0, IF_T2_C5, 0xFB90F0F0)
+ // Rd,Rn,Rm T2_C5 111110111001nnnn 1111dddd1111mmmm FB90 F0F0
+INST1(smlal, "smlal", 0, 0, IF_T2_F1, 0xFBC00000)
+ // Rl,Rh,Rn,Rm T2_F1 111110111100nnnn llllhhhh0000mmmm FBC0 0000
+INST1(smull, "smull", 0, 0, IF_T2_F1, 0xFB800000)
+ // Rl,Rh,Rn,Rm T2_F1 111110111000nnnn llllhhhh0000mmmm FB80 0000
+INST1(stmdb, "stmdb", 0,ST, IF_EN2D, 0xE9000000)
+ // Rn{!},<reglist16> T2_I0 1110100100W0nnnn 0r0rrrrrrrrrrrrr E900 0000
+INST1(strd, "strd", 0,ST, IF_T2_G0, 0xE8400000)
+ // Rt,RT,[Rn],+-i8{!}T2_G0 1110100PU1W0nnnn ttttTTTTiiiiiiii E840 0000
+INST1(strex, "strex", 0,ST, IF_T2_H1, 0xE8400F00)
+ // Rt,[Rn+i8] T2_H1 111010000100nnnn tttt1111iiiiiiii E840 0F00 imm(0-1020)
+INST1(strexb, "strexb", 0,ST, IF_T2_E1, 0xE8C00F4F)
+ // Rt,[Rn] T2_E1 111010001100nnnn tttt111101001111 E8C0 0F4F
+INST1(strexd, "strexd", 0,ST, IF_T2_G1, 0xE8C0007F)
+ // Rt,RT,[Rn] T2_G1 111010001100nnnn ttttTTTT01111111 E8C0 007F
+INST1(strexh, "strexh", 0,ST, IF_T2_E1, 0xE8C00F5F)
+ // Rt,[Rn] T2_E1 111010001100nnnn tttt111101011111 E8C0 0F5F
+INST1(subw, "subw", 0, 0, IF_T2_M0, 0xF2A00000)
+ // Rd,Rn,+i12 T2_M0 11110i101010nnnn 0iiiddddiiiiiiii F2A0 0000 imm(0-4095)
+INST1(tbb, "tbb", 0, 0, IF_T2_C9, 0xE8D0F000)
+ // Rn,Rm T2_C9 111010001101nnnn 111100000000mmmm E8D0 F000
+INST1(tbh, "tbh", 0, 0, IF_T2_C9, 0xE8D0F010)
+ // Rn,Rm,LSL #1 T2_C9 111010001101nnnn 111100000001mmmm E8D0 F010
+INST1(ubfx, "ubfx", 0, 0, IF_T2_D0, 0xF2C00000)
+ // Rd,Rn,#b,#w T2_D0 111100111100nnnn 0iiiddddii0wwwww F3C0 0000 imm(0-31),imm(0-31)
+INST1(udiv, "udiv", 0, 0, IF_T2_C5, 0xFBB0F0F0)
+ // Rd,Rn,Rm T2_C5 111110111011nnnn 1111dddd1111mmmm FBB0 F0F0
+INST1(umlal, "umlal", 0, 0, IF_T2_F1, 0xFBE00000)
+ // Rl,Rh,Rn,Rm T2_F1 111110111110nnnn llllhhhh0000mmmm FBE0 0000
+INST1(umull, "umull", 0, 0, IF_T2_F1, 0xFBA00000)
+ // Rl,Rh,Rn,Rm T2_F1 111110111010nnnn llllhhhh0000mmmm FBA0 0000
+
+#ifdef FEATURE_ITINSTRUCTION
+INST1(it, "it", 0, 0, IF_T1_B, 0xBF08)
+ // cond T1_B 10111111cond1000 BF08 cond
+INST1(itt, "itt", 0, 0, IF_T1_B, 0xBF04)
+ // cond T1_B 10111111cond0100 BF04 cond
+INST1(ite, "ite", 0, 0, IF_T1_B, 0xBF0C)
+ // cond T1_B 10111111cond1100 BF0C cond
+INST1(ittt, "ittt", 0, 0, IF_T1_B, 0xBF02)
+ // cond T1_B 10111111cond0010 BF02 cond
+INST1(itte, "itte", 0, 0, IF_T1_B, 0xBF06)
+ // cond T1_B 10111111cond0110 BF06 cond
+INST1(itet, "itet", 0, 0, IF_T1_B, 0xBF0A)
+ // cond T1_B 10111111cond1010 BF0A cond
+INST1(itee, "itee", 0, 0, IF_T1_B, 0xBF0E)
+ // cond T1_B 10111111cond1110 BF0E cond
+INST1(itttt, "itttt", 0, 0, IF_T1_B, 0xBF01)
+ // cond T1_B 10111111cond0001 BF01 cond
+INST1(ittte, "ittte", 0, 0, IF_T1_B, 0xBF01)
+ // cond T1_B 10111111cond0011 BF03 cond
+INST1(ittet, "ittet", 0, 0, IF_T1_B, 0xBF01)
+ // cond T1_B 10111111cond0101 BF05 cond
+INST1(ittee, "ittee", 0, 0, IF_T1_B, 0xBF01)
+ // cond T1_B 10111111cond0111 BF07 cond
+INST1(itett, "itett", 0, 0, IF_T1_B, 0xBF01)
+ // cond T1_B 10111111cond1001 BF09 cond
+INST1(itete, "itete", 0, 0, IF_T1_B, 0xBF01)
+ // cond T1_B 10111111cond1011 BF0B cond
+INST1(iteet, "iteet", 0, 0, IF_T1_B, 0xBF01)
+ // cond T1_B 10111111cond1101 BF0D cond
+INST1(iteee, "iteee", 0, 0, IF_T1_B, 0xBF01)
+ // cond T1_B 10111111cond1111 BF0F cond
+#endif // FEATURE_ITINSTRUCTION
+
+
+/*****************************************************************************/
+/* Floating Point Instructions */
+/*****************************************************************************/
+// enum name FP LD/ST
+ // Dd,[Rn+imm8] T2_VLDST 11101101UD0Lnnnn dddd101Ziiiiiiii ED00 0A00 imm(+-1020)
+INST1(vstr, "vstr", 1,ST, IF_T2_VLDST, 0xED000A00)
+INST1(vldr, "vldr", 1,LD, IF_T2_VLDST, 0xED100A00)
+INST1(vstm, "vstm", 1,ST, IF_T2_VLDST, 0xEC800A00) // A8.6.399 VSTM (to an address in ARM core register from consecutive floats)
+INST1(vldm, "vldm", 1,LD, IF_T2_VLDST, 0xEC900A00) // A8.6.399 VLDM (from an address in ARM core register to consecutive floats)
+INST1(vpush, "vpush", 1,ST, IF_T2_VLDST, 0xED2D0A00)
+INST1(vpop, "vpop", 1,LD, IF_T2_VLDST, 0xECBD0A00)
+
+ // vmrs rT T2_E2 1110111011110001 tttt101000010000 EEF1 0A10
+INST1(vmrs, "vmrs", 1, 0, IF_T2_E2, 0xEEF10A10)
+
+ // Dd,Dn,Dm T2_VFP3 11101110-D--nnnn dddd101ZN-M0mmmm EE30 0A00
+INST1(vadd, "vadd", 1, 0, IF_T2_VFP3, 0xEE300A00)
+INST1(vsub, "vsub", 1, 0, IF_T2_VFP3, 0xEE300A40)
+INST1(vmul, "vmul", 1, 0, IF_T2_VFP3, 0xEE200A00)
+INST1(vdiv, "vdiv", 1, 0, IF_T2_VFP3, 0xEE800A00)
+
+ // Dd,Dm T2_VFP2 111011101D110--- dddd101zp1M0mmmm EEB0 0A40
+INST1(vmov, "vmov", 1, 0, IF_T2_VFP2, 0xEEB00A40) // opc2 = '000', zp = 00
+INST1(vabs, "vabs", 1, 0, IF_T2_VFP2, 0xEEB00AC0) // opc2 = '000', zp = 01
+INST1(vsqrt, "vsqrt", 1, 0, IF_T2_VFP2, 0xEEB10AC0) // opc2 = '001', zp = 01
+INST1(vneg, "vneg", 1, 0, IF_T2_VFP2, 0xEEB10A40) // opc2 = '001', zp = 00
+INST1(vcmp, "vcmp", 1, CMP, IF_T2_VFP2, 0xEEB40A40) // opc2 = '100', zp = 00
+INST1(vcmp0, "vcmp.0", 1, CMP, IF_T2_VFP2, 0xEEB50A40) // opc2 = '101', zp = 00
+
+ // Dd,Dm T2_VFP2 111011101D111--- dddd101zp1M0mmmm EEB8 0A40
+INST1(vcvt_d2i, "vcvt.d2i", 1, 0, IF_T2_VFP2, 0xEEBD0BC0) // opc2 = '101', zp = 11
+INST1(vcvt_f2i, "vcvt.f2i", 1, 0, IF_T2_VFP2, 0xEEBD0AC0) // opc2 = '101', zp = 01
+INST1(vcvt_d2u, "vcvt.d2u", 1, 0, IF_T2_VFP2, 0xEEBC0BC0) // opc2 = '100', zp = 11
+INST1(vcvt_f2u, "vcvt.f2u", 1, 0, IF_T2_VFP2, 0xEEBC0AC0) // opc2 = '100', zp = 01
+
+INST1(vcvt_i2f, "vcvt.i2f", 1, 0, IF_T2_VFP2, 0xEEB80AC0) // opc2 = '000', zp = 01
+INST1(vcvt_i2d, "vcvt.i2d", 1, 0, IF_T2_VFP2, 0xEEB80BC0) // opc2 = '000', zp = 11
+INST1(vcvt_u2f, "vcvt.u2f", 1, 0, IF_T2_VFP2, 0xEEB80A40) // opc2 = '000', zp = 00
+INST1(vcvt_u2d, "vcvt.u2d", 1, 0, IF_T2_VFP2, 0xEEB80B40) // opc2 = '000', zp = 10
+
+ // Dd,Dm T2_VFP2 111011101D110111 dddd101z11M0mmmm EEB7 0AC0
+INST1(vcvt_d2f, "vcvt.d2f", 1, 0, IF_T2_VFP2, 0xEEB70BC0) // opc2 = '111' zp = 01
+INST1(vcvt_f2d, "vcvt.f2d", 1, 0, IF_T2_VFP2, 0xEEB70AC0) // opc2 = '111' zp = 11
+
+ // Dd,Dm T2_VMOVD 111011F100D0V0000
+INST1(vmov_i2d, "vmov.i2d", 1, 0, IF_T2_VMOVD, 0xEC400B10) // A8.6.332 VMOV from 2 int regs to a double
+INST1(vmov_d2i, "vmov.d2i", 1, 0, IF_T2_VMOVD, 0xEC500B10) // A8.6.332 VMOV from a double to 2 int regs
+INST1(vmov_i2f, "vmov.i2f", 1, 0, IF_T2_VMOVS, 0xEE000A10) // A8.6.330 VMOV (between ARM core register and single-precision register)
+INST1(vmov_f2i, "vmov.f2i", 1, 0, IF_T2_VMOVS, 0xEE100A10) // A8.6.330 VMOV (between ARM core register and single-precision register)
+// clang-format on
+
+/*****************************************************************************/
+#undef INST1
+#undef INST2
+#undef INST3
+#undef INST4
+#undef INST5
+#undef INST6
+#undef INST7
+#undef INST8
+#undef INST9
+/*****************************************************************************/
diff --git a/src/jit/instrsarm64.h b/src/jit/instrsarm64.h
new file mode 100644
index 0000000000..e91aaa6836
--- /dev/null
+++ b/src/jit/instrsarm64.h
@@ -0,0 +1,954 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*****************************************************************************
+ * Arm64 instructions for JIT compiler
+ *
+ * id -- the enum name for the instruction
+ * nm -- textual name (for assembly dipslay)
+ * fp -- floating point instruction
+ * ld/st/cmp -- load/store/compare instruction
+ * fmt -- encoding format used by this instruction
+ * e1 -- encoding 1
+ * e2 -- encoding 2
+ * e3 -- encoding 3
+ * e4 -- encoding 4
+ * e5 -- encoding 5
+ *
+******************************************************************************/
+
+#if !defined(_TARGET_ARM64_)
+#error Unexpected target type
+#endif
+
+#ifndef INST1
+#error INST1 must be defined before including this file.
+#endif
+#ifndef INST2
+#error INST2 must be defined before including this file.
+#endif
+#ifndef INST3
+#error INST3 must be defined before including this file.
+#endif
+#ifndef INST4
+#error INST4 must be defined before including this file.
+#endif
+#ifndef INST5
+#error INST5 must be defined before including this file.
+#endif
+#ifndef INST6
+#error INST6 must be defined before including this file.
+#endif
+#ifndef INST9
+#error INST9 must be defined before including this file.
+#endif
+
+/*****************************************************************************/
+/* The following is ARM64-specific */
+/*****************************************************************************/
+
+// If you're adding a new instruction:
+// You need not only to fill in one of these macros describing the instruction, but also:
+// * If the instruction writes to more than one destination register, update the function
+// emitInsMayWriteMultipleRegs in emitArm64.cpp.
+
+// clang-format off
+INST9(invalid, "INVALID", 0, 0, IF_NONE, BAD_CODE, BAD_CODE, BAD_CODE, BAD_CODE, BAD_CODE, BAD_CODE, BAD_CODE, BAD_CODE, BAD_CODE)
+
+// enum name FP LD/ST DR_2E DR_2G DI_1B DI_1D DV_3C DV_2B DV_2C DV_2E DV_2F
+INST9(mov, "mov", 0, 0, IF_EN9, 0x2A0003E0, 0x11000000, 0x52800000, 0x320003E0, 0x0EA01C00, 0x0E003C00, 0x4E001C00, 0x5E000400, 0x6E000400)
+ // mov Rd,Rm DR_2E X0101010000mmmmm 00000011111ddddd 2A00 03E0
+ // mov Rd,Rn DR_2G X001000100000000 000000nnnnnddddd 1100 0000 mov to/from SP only
+ // mov Rd,imm(i16,hw) DI_1B X10100101hwiiiii iiiiiiiiiiiddddd 5280 0000 imm(i16,hw)
+ // mov Rd,imm(N,r,s) DI_1D X01100100Nrrrrrr ssssss11111ddddd 3200 03E0 imm(N,r,s)
+ // mov Vd,Vn DV_3C 0Q001110101nnnnn 000111nnnnnddddd 0EA0 1C00 Vd,Vn
+ // mov Rd,Vn[0] DV_2B 0Q001110000iiiii 001111nnnnnddddd 0E00 3C00 Rd,Vn[] (to general)
+ // mov Vd[],Rn DV_2C 01001110000iiiii 000111nnnnnddddd 4E00 1C00 Vd[],Rn (from general)
+ // mov Vd,Vn[] DV_2E 01011110000iiiii 000001nnnnnddddd 5E00 0400 Vd,Vn[] (scalar by elem)
+ // mov Vd[],Vn[] DV_2F 01101110000iiiii 0jjjj1nnnnnddddd 6E00 0400 Vd[],Vn[] (from/to elem)
+
+// enum name FP LD/ST DR_3A DR_3B DR_3C DI_2A DV_3A DV_3E
+INST6(add, "add", 0, 0, IF_EN6A, 0x0B000000, 0x0B000000, 0x0B200000, 0x11000000, 0x0E208400, 0x5EE08400)
+ // add Rd,Rn,Rm DR_3A X0001011000mmmmm 000000nnnnnddddd 0B00 0000 Rd,Rn,Rm
+ // add Rd,Rn,(Rm,shk,imm) DR_3B X0001011sh0mmmmm ssssssnnnnnddddd 0B00 0000 Rm {LSL,LSR,ASR} imm(0-63)
+ // add Rd,Rn,(Rm,ext,shl) DR_3C X0001011001mmmmm ooosssnnnnnddddd 0B20 0000 ext(Rm) LSL imm(0-4)
+ // add Rd,Rn,i12 DI_2A X0010001shiiiiii iiiiiinnnnnddddd 1100 0000 imm(i12,sh)
+ // add Vd,Vn,Vm DV_3A 0Q001110XX1mmmmm 100001nnnnnddddd 0E20 8400 Vd,Vn,Vm (vector)
+ // add Vd,Vn,Vm DV_3E 01011110111mmmmm 100001nnnnnddddd 5EE0 8400 Vd,Vn,Vm (scalar)
+
+INST6(sub, "sub", 0, 0, IF_EN6A, 0x4B000000, 0x4B000000, 0x4B200000, 0x51000000, 0x2E208400, 0x7EE08400)
+ // sub Rd,Rn,Rm DR_3A X1001011000mmmmm 000000nnnnnddddd 4B00 0000 Rd,Rn,Rm
+ // sub Rd,Rn,(Rm,shk,imm) DR_3B X1001011sh0mmmmm ssssssnnnnnddddd 4B00 0000 Rm {LSL,LSR,ASR} imm(0-63)
+ // sub Rd,Rn,(Rm,ext,shl) DR_3C X1001011001mmmmm ooosssnnnnnddddd 4B20 0000 ext(Rm) LSL imm(0-4)
+ // sub Rd,Rn,i12 DI_2A X1010001shiiiiii iiiiiinnnnnddddd 5100 0000 imm(i12,sh)
+ // sub Vd,Vn,Vm DV_3A 0Q101110XX1mmmmm 100001nnnnnddddd 2E20 8400 Vd,Vn,Vm (vector)
+ // sub Vd,Vn,Vm DV_3E 01111110111mmmmm 100001nnnnnddddd 7EE0 8400 Vd,Vn,Vm (scalar)
+
+// enum name FP LD/ST LS_2A LS_2B LS_2C LS_3A LS_1A
+INST5(ldr, "ldr", 0,LD, IF_EN5A, 0xB9400000, 0xB9400000, 0xB8400000, 0xB8600800, 0x18000000)
+ // ldr Rt,[Xn] LS_2A 1X11100101000000 000000nnnnnttttt B940 0000
+ // ldr Rt,[Xn+pimm12] LS_2B 1X11100101iiiiii iiiiiinnnnnttttt B940 0000 imm(0-4095<<{2,3})
+ // ldr Rt,[Xn+simm9] LS_2C 1X111000010iiiii iiiiPPnnnnnttttt B840 0000 [Xn imm(-256..+255) pre/post/no inc]
+ // ldr Rt,[Xn,(Rm,ext,shl)] LS_3A 1X111000011mmmmm oooS10nnnnnttttt B860 0800 [Xn, ext(Rm) LSL {0,2,3}]
+ // ldr Vt/Rt,[PC+simm19<<2] LS_1A XX011V00iiiiiiii iiiiiiiiiiittttt 1800 0000 [PC +- imm(1MB)]
+
+INST5(ldrsw, "ldrsw", 0,LD, IF_EN5A, 0xB9800000, 0xB9800000, 0xB8800000, 0xB8A00800, 0x98000000)
+ // ldrsw Rt,[Xn] LS_2A 1011100110000000 000000nnnnnttttt B980 0000
+ // ldrsw Rt,[Xn+pimm12] LS_2B 1011100110iiiiii iiiiiinnnnnttttt B980 0000 imm(0-4095<<2)
+ // ldrsw Rt,[Xn+simm9] LS_2C 10111000100iiiii iiiiPPnnnnnttttt B880 0000 [Xn imm(-256..+255) pre/post/no inc]
+ // ldrsw Rt,[Xn,(Rm,ext,shl)] LS_3A 10111000101mmmmm oooS10nnnnnttttt B8A0 0800 [Xn, ext(Rm) LSL {0,2}]
+ // ldrsw Rt,[PC+simm19<<2] LS_1A 10011000iiiiiiii iiiiiiiiiiittttt 9800 0000 [PC +- imm(1MB)]
+
+// enum name FP LD/ST DV_2G DV_2H DV_2I DV_1A DV_1B
+INST5(fmov, "fmov", 0, 0, IF_EN5B, 0x1E204000, 0x1E260000, 0x1E270000, 0x1E201000, 0x0F00F400)
+ // fmov Vd,Vn DV_2G 000111100X100000 010000nnnnnddddd 1E20 4000 Vd,Vn (scalar)
+ // fmov Rd,Vn DV_2H X00111100X100110 000000nnnnnddddd 1E26 0000 Rd,Vn (scalar, to general)
+ // fmov Vd,Rn DV_2I X00111100X100111 000000nnnnnddddd 1E27 0000 Vd,Rn (scalar, from general)
+ // fmov Vd,immfp DV_1A 000111100X1iiiii iii10000000ddddd 1E20 1000 Vd,immfp (scalar)
+ // fmov Vd,immfp DV_1B 0QX0111100000iii 111101iiiiiddddd 0F00 F400 Vd,immfp (immediate vector)
+
+// enum name FP LD/ST DR_3A DR_3B DI_2C DV_3C DV_1B
+INST5(orr, "orr", 0, 0, IF_EN5C, 0x2A000000, 0x2A000000, 0x32000000, 0x0EA01C00, 0x0F001400)
+ // orr Rd,Rn,Rm DR_3A X0101010000mmmmm 000000nnnnnddddd 2A00 0000
+ // orr Rd,Rn,(Rm,shk,imm) DR_3B X0101010sh0mmmmm iiiiiinnnnnddddd 2A00 0000 Rm {LSL,LSR,ASR,ROR} imm(0-63)
+ // orr Rd,Rn,imm(N,r,s) DI_2C X01100100Nrrrrrr ssssssnnnnnddddd 3200 0000 imm(N,r,s)
+ // orr Vd,Vn,Vm DV_3C 0Q001110101mmmmm 000111nnnnnddddd 0EA0 1C00 Vd,Vn,Vm
+ // orr Vd,imm8 DV_1B 0Q00111100000iii ---101iiiiiddddd 0F00 1400 Vd imm8 (immediate vector)
+
+// enum name FP LD/ST LS_2A LS_2B LS_2C LS_3A
+INST4(ldrb, "ldrb", 0,LD, IF_EN4A, 0x39400000, 0x39400000, 0x38400000, 0x38600800)
+ // ldrb Rt,[Xn] LS_2A 0011100101000000 000000nnnnnttttt 3940 0000
+ // ldrb Rt,[Xn+pimm12] LS_2B 0011100101iiiiii iiiiiinnnnnttttt 3940 0000 imm(0-4095)
+ // ldrb Rt,[Xn+simm9] LS_2C 00111000010iiiii iiiiPPnnnnnttttt 3840 0000 [Xn imm(-256..+255) pre/post/no inc]
+ // ldrb Rt,[Xn,(Rm,ext,shl)] LS_3A 00111000011mmmmm oooS10nnnnnttttt 3860 0800 [Xn, ext(Rm)]
+
+INST4(ldrh, "ldrh", 0,LD, IF_EN4A, 0x79400000, 0x79400000, 0x78400000, 0x78600800)
+ // ldrh Rt,[Xn] LS_2A 0111100101000000 000000nnnnnttttt 7940 0000
+ // ldrh Rt,[Xn+pimm12] LS_2B 0111100101iiiiii iiiiiinnnnnttttt 7940 0000 imm(0-4095<<1)
+ // ldrh Rt,[Xn+simm9] LS_2C 01111000010iiiii iiiiPPnnnnnttttt 7840 0000 [Xn imm(-256..+255) pre/post/no inc]
+ // ldrh Rt,[Xn,(Rm,ext,shl)] LS_3A 01111000011mmmmm oooS10nnnnnttttt 7860 0800 [Xn, ext(Rm) LSL {0,1}]
+
+INST4(ldrsb, "ldrsb", 0,LD, IF_EN4A, 0x39800000, 0x39800000, 0x38800000, 0x38A00800)
+ // ldrsb Rt,[Xn] LS_2A 001110011X000000 000000nnnnnttttt 3980 0000
+ // ldrsb Rt,[Xn+pimm12] LS_2B 001110011Xiiiiii iiiiiinnnnnttttt 3980 0000 imm(0-4095)
+ // ldrsb Rt,[Xn+simm9] LS_2C 001110001X0iiiii iiii01nnnnnttttt 3880 0000 [Xn imm(-256..+255) pre/post/no inc]
+ // ldrsb Rt,[Xn,(Rm,ext,shl)] LS_3A 001110001X1mmmmm oooS10nnnnnttttt 38A0 0800 [Xn, ext(Rm)]
+
+INST4(ldrsh, "ldrsh", 0,LD, IF_EN4A, 0x79800000, 0x79800000, 0x78800000, 0x78A00800)
+ // ldrsh Rt,[Xn] LS_2A 011110011X000000 000000nnnnnttttt 7980 0000
+ // ldrsh Rt,[Xn+pimm12] LS_2B 011110011Xiiiiii iiiiiinnnnnttttt 7980 0000 imm(0-4095<<1)
+ // ldrsh Rt,[Xn+simm9] LS_2C 011110001X0iiiii iiiiPPnnnnnttttt 7880 0000 [Xn imm(-256..+255) pre/post/no inc]
+ // ldrsh Rt,[Xn,(Rm,ext,shl)] LS_3A 011110001X1mmmmm oooS10nnnnnttttt 78A0 0800 [Xn, ext(Rm) LSL {0,1}]
+
+INST4(str, "str", 0,ST, IF_EN4A, 0xB9000000, 0xB9000000, 0xB8000000, 0xB8200800)
+ // str Rt,[Xn] LS_2A 1X11100100000000 000000nnnnnttttt B900 0000
+ // str Rt,[Xn+pimm12] LS_2B 1X11100100iiiiii iiiiiinnnnnttttt B900 0000 imm(0-4095<<{2,3})
+ // str Rt,[Xn+simm9] LS_2C 1X111000000iiiii iiiiPPnnnnnttttt B800 0000 [Xn imm(-256..+255) pre/post/no inc]
+ // str Rt,[Xn,(Rm,ext,shl)] LS_3A 1X111000001mmmmm oooS10nnnnnttttt B820 0800 [Xn, ext(Rm)]
+
+INST4(strb, "strb", 0,ST, IF_EN4A, 0x39000000, 0x39000000, 0x38000000, 0x38200800)
+ // strb Rt,[Xn] LS_2A 0011100100000000 000000nnnnnttttt 3900 0000
+ // strb Rt,[Xn+pimm12] LS_2B 0011100100iiiiii iiiiiinnnnnttttt 3900 0000 imm(0-4095)
+ // strb Rt,[Xn+simm9] LS_2C 00111000000iiiii iiiiPPnnnnnttttt 3800 0000 [Xn imm(-256..+255) pre/post/no inc]
+ // strb Rt,[Xn,(Rm,ext,shl)] LS_3A 00111000001mmmmm oooS10nnnnnttttt 3820 0800 [Xn, ext(Rm)]
+
+INST4(strh, "strh", 0,ST, IF_EN4A, 0x79000000, 0x79000000, 0x78000000, 0x78200800)
+ // strh Rt,[Xn] LS_2A 0111100100000000 000000nnnnnttttt 7900 0000
+ // strh Rt,[Xn+pimm12] LS_2B 0111100100iiiiii iiiiiinnnnnttttt 7900 0000 imm(0-4095<<1)
+ // strh Rt,[Xn+simm9] LS_2C 01111000000iiiii iiiiPPnnnnnttttt 7800 0000 [Xn imm(-256..+255) pre/post/no inc]
+ // strh Rt,[Xn,(Rm,ext,shl)] LS_3A 01111000001mmmmm oooS10nnnnnttttt 7820 0800 [Xn, ext(Rm)]
+
+// enum name FP LD/ST DR_3A DR_3B DR_3C DI_2A
+INST4(adds, "adds", 0, 0, IF_EN4B, 0x2B000000, 0x2B000000, 0x2B200000, 0x31000000)
+ // adds Rd,Rn,Rm DR_3A X0101011000mmmmm 000000nnnnnddddd 2B00 0000
+ // adds Rd,Rn,(Rm,shk,imm) DR_3B X0101011sh0mmmmm ssssssnnnnnddddd 2B00 0000 Rm {LSL,LSR,ASR} imm(0-63)
+ // adds Rd,Rn,(Rm,ext,shl) DR_3C X0101011001mmmmm ooosssnnnnnddddd 2B20 0000 ext(Rm) LSL imm(0-4)
+ // adds Rd,Rn,i12 DI_2A X0110001shiiiiii iiiiiinnnnnddddd 3100 0000 imm(i12,sh)
+
+INST4(subs, "subs", 0, 0, IF_EN4B, 0x6B000000, 0x6B000000, 0x6B200000, 0x71000000)
+ // subs Rd,Rn,Rm DR_3A X1101011000mmmmm 000000nnnnnddddd 6B00 0000
+ // subs Rd,Rn,(Rm,shk,imm) DR_3B X1101011sh0mmmmm ssssssnnnnnddddd 6B00 0000 Rm {LSL,LSR,ASR} imm(0-63)
+ // subs Rd,Rn,(Rm,ext,shl) DR_3C X1101011001mmmmm ooosssnnnnnddddd 6B20 0000 ext(Rm) LSL imm(0-4)
+ // subs Rd,Rn,i12 DI_2A X1110001shiiiiii iiiiiinnnnnddddd 7100 0000 imm(i12,sh)
+
+// enum name FP LD/ST DR_2A DR_2B DR_2C DI_1A
+INST4(cmp, "cmp", 0,CMP,IF_EN4C, 0x6B00001F, 0x6B00001F, 0x6B20001F, 0x7100001F)
+ // cmp Rn,Rm DR_2A X1101011000mmmmm 000000nnnnn11111 6B00 001F
+ // cmp Rn,(Rm,shk,imm) DR_2B X1101011sh0mmmmm ssssssnnnnn11111 6B00 001F Rm {LSL,LSR,ASR} imm(0-63)
+ // cmp Rn,(Rm,ext,shl) DR_2C X1101011001mmmmm ooosssnnnnn11111 6B20 001F ext(Rm) LSL imm(0-4)
+ // cmp Rn,i12 DI_1A X111000100iiiiii iiiiiinnnnn11111 7100 001F imm(i12,sh)
+
+INST4(cmn, "cmn", 0,CMP,IF_EN4C, 0x2B00001F, 0x2B00001F, 0x2B20001F, 0x3100001F)
+ // cmn Rn,Rm DR_2A X0101011000mmmmm 000000nnnnn11111 2B00 001F
+ // cmn Rn,(Rm,shk,imm) DR_2B X0101011sh0mmmmm ssssssnnnnn11111 2B00 001F Rm {LSL,LSR,ASR} imm(0-63)
+ // cmn Rn,(Rm,ext,shl) DR_2C X0101011001mmmmm ooosssnnnnn11111 2B20 001F ext(Rm) LSL imm(0-4)
+ // cmn Rn,i12 DI_1A X0110001shiiiiii iiiiiinnnnn11111 3100 001F imm(0-4095)
+
+// enum name FP LD/ST DV_3B DV_3D DV_3BI DV_3DI
+INST4(fmul, "fmul", 0, 0, IF_EN4D, 0x2E20DC00, 0x1E200800, 0x0F809000, 0x5F809000)
+ // fmul Vd,Vn,Vm DV_3B 0Q1011100X1mmmmm 110111nnnnnddddd 2E20 DC00 Vd,Vn,Vm (vector)
+ // fmul Vd,Vn,Vm DV_3D 000111100X1mmmmm 000010nnnnnddddd 1E20 0800 Vd,Vn,Vm (scalar)
+ // fmul Vd,Vn,Vm[] DV_3BI 0Q0011111XLmmmmm 1001H0nnnnnddddd 0F80 9000 Vd,Vn,Vm[] (vector by elem)
+ // fmul Vd,Vn,Vm[] DV_3DI 010111111XLmmmmm 1001H0nnnnnddddd 5F80 9000 Vd,Vn,Vm[] (scalar by elem)
+
+INST4(fmulx, "fmulx", 0, 0, IF_EN4D, 0x0E20DC00, 0x5E20DC00, 0x2F809000, 0x7F809000)
+ // fmulx Vd,Vn,Vm DV_3B 0Q0011100X1mmmmm 110111nnnnnddddd 0E20 DC00 Vd,Vn,Vm (vector)
+ // fmulx Vd,Vn,Vm DV_3D 010111100X1mmmmm 110111nnnnnddddd 5E20 DC00 Vd,Vn,Vm (scalar)
+ // fmulx Vd,Vn,Vm[] DV_3BI 0Q1011111XLmmmmm 1001H0nnnnnddddd 2F80 9000 Vd,Vn,Vm[] (vector by elem)
+ // fmulx Vd,Vn,Vm[] DV_3DI 011111111XLmmmmm 1001H0nnnnnddddd 7F80 9000 Vd,Vn,Vm[] (scalar by elem)
+
+// enum name FP LD/ST DR_3A DR_3B DI_2C DV_3C
+INST4(and, "and", 0, 0, IF_EN4E, 0x0A000000, 0x0A000000, 0x12000000, 0x0E201C00)
+ // and Rd,Rn,Rm DR_3A X0001010000mmmmm 000000nnnnnddddd 0A00 0000
+ // and Rd,Rn,(Rm,shk,imm) DR_3B X0001010sh0mmmmm iiiiiinnnnnddddd 0A00 0000 Rm {LSL,LSR,ASR,ROR} imm(0-63)
+ // and Rd,Rn,imm(N,r,s) DI_2C X00100100Nrrrrrr ssssssnnnnnddddd 1200 0000 imm(N,r,s)
+ // and Vd,Vn,Vm DV_3C 0Q001110001mmmmm 000111nnnnnddddd 0E20 1C00 Vd,Vn,Vm
+
+INST4(eor, "eor", 0, 0, IF_EN4E, 0x4A000000, 0x4A000000, 0x52000000, 0x2E201C00)
+ // eor Rd,Rn,Rm DR_3A X1001010000mmmmm 000000nnnnnddddd 4A00 0000
+ // eor Rd,Rn,(Rm,shk,imm) DR_3B X1001010sh0mmmmm iiiiiinnnnnddddd 4A00 0000 Rm {LSL,LSR,ASR,ROR} imm(0-63)
+ // eor Rd,Rn,imm(N,r,s) DI_2C X10100100Nrrrrrr ssssssnnnnnddddd 5200 0000 imm(N,r,s)
+ // eor Vd,Vn,Vm DV_3C 0Q101110001mmmmm 000111nnnnnddddd 2E20 1C00 Vd,Vn,Vm
+
+// enum name FP LD/ST DR_3A DR_3B DV_3C DV_1B
+INST4(bic, "bic", 0, 0, IF_EN4F, 0x0A200000, 0x0A200000, 0x0E601C00, 0x2F001400)
+ // bic Rd,Rn,Rm DR_3A X0001010001mmmmm 000000nnnnnddddd 0A20 0000
+ // bic Rd,Rn,(Rm,shk,imm) DR_3B X0001010sh1mmmmm iiiiiinnnnnddddd 0A20 0000 Rm {LSL,LSR,ASR,ROR} imm(0-63)
+ // bic Vd,Vn,Vm DV_3C 0Q001110011mmmmm 000111nnnnnddddd 0E60 1C00 Vd,Vn,Vm
+ // bic Vd,imm8 DV_1B 0Q10111100000iii ---101iiiiiddddd 2F00 1400 Vd imm8 (immediate vector)
+
+// enum name FP LD/ST DR_2E DR_2F DV_2M DV_2L
+INST4(neg, "neg", 0, 0, IF_EN4G, 0x4B0003E0, 0x4B0003E0, 0x2E20B800, 0x7E20B800)
+ // neg Rd,Rm DR_2E X1001011000mmmmm 00000011111ddddd 4B00 03E0
+ // neg Rd,(Rm,shk,imm) DR_2F X1001011sh0mmmmm ssssss11111ddddd 4B00 03E0 Rm {LSL,LSR,ASR} imm(0-63)
+ // neg Vd,Vn DV_2M 0Q101110XX100000 101110nnnnnddddd 2E20 B800 Vd,Vn (vector)
+ // neg Vd,Vn DV_2L 01111110XX100000 101110nnnnnddddd 7E20 B800 Vd,Vn (scalar)
+
+// enum name FP LD/ST DR_3A DR_3B DI_2C
+INST3(ands, "ands", 0, 0, IF_EN3A, 0x6A000000, 0x6A000000, 0x72000000)
+ // ands Rd,Rn,Rm DR_3A X1101010000mmmmm 000000nnnnnddddd 6A00 0000
+ // ands Rd,Rn,(Rm,shk,imm) DR_3B X1101010sh0mmmmm iiiiiinnnnnddddd 6A00 0000 Rm {LSL,LSR,ASR,ROR} imm(0-63)
+ // ands Rd,Rn,imm(N,r,s) DI_2C X11100100Nrrrrrr ssssssnnnnnddddd 7200 0000 imm(N,r,s)
+
+// enum name FP LD/ST DR_2A DR_2B DI_1C
+INST3(tst, "tst", 0, 0, IF_EN3B, 0x6A00001F, 0x6A00001F, 0x7200001F)
+ // tst Rn,Rm DR_2A X1101010000mmmmm 000000nnnnn11111 6A00 001F
+ // tst Rn,(Rm,shk,imm) DR_2B X1101010sh0mmmmm iiiiiinnnnn11111 6A00 001F Rm {LSL,LSR,ASR,ROR} imm(0-63)
+ // tst Rn,imm(N,r,s) DI_1C X11100100Nrrrrrr ssssssnnnnn11111 7200 001F imm(N,r,s)
+
+// enum name FP LD/ST DR_3A DR_3B DV_3C
+INST3(orn, "orn", 0, 0, IF_EN3C, 0x2A200000, 0x2A200000, 0x0EE01C00)
+ // orn Rd,Rn,Rm DR_3A X0101010001mmmmm 000000nnnnnddddd 2A20 0000
+ // orn Rd,Rn,(Rm,shk,imm) DR_3B X0101010sh1mmmmm iiiiiinnnnnddddd 2A20 0000 Rm {LSL,LSR,ASR,ROR} imm(0-63)
+ // orn Vd,Vn,Vm DV_3C 0Q001110111mmmmm 000111nnnnnddddd 0EE0 1C00 Vd,Vn,Vm
+
+// enum name FP LD/ST DV_2C DV_2D DV_2E
+INST3(dup, "dup", 0, 0, IF_EN3D, 0x0E000C00, 0x0E004000, 0x5E000400)
+ // dup Vd,Rn DV_2C 0Q001110000iiiii 000011nnnnnddddd 0E00 0C00 Vd,Rn (vector from general)
+ // dup Vd,Vn[] DV_2D 0Q001110000iiiii 000001nnnnnddddd 0E00 0400 Vd,Vn[] (vector by elem)
+ // dup Vd,Vn[] DV_2E 01011110000iiiii 000001nnnnnddddd 5E00 0400 Vd,Vn[] (scalar by elem)
+
+// enum name FP LD/ST DV_3B DV_3BI DV_3DI
+INST3(fmla, "fmla", 0, 0, IF_EN3E, 0x0E20CC00, 0x0F801000, 0x5F801000)
+ // fmla Vd,Vn,Vm DV_3B 0Q0011100X1mmmmm 110011nnnnnddddd 0E20 CC00 Vd,Vn,Vm (vector)
+ // fmla Vd,Vn,Vm[] DV_3BI 0Q0011111XLmmmmm 0001H0nnnnnddddd 0F80 1000 Vd,Vn,Vm[] (vector by elem)
+ // fmla Vd,Vn,Vm[] DV_3DI 010111111XLmmmmm 0001H0nnnnnddddd 5F80 1000 Vd,Vn,Vm[] (scalar by elem)
+
+INST3(fmls, "fmls", 0, 0, IF_EN3E, 0x0EA0CC00, 0x0F805000, 0x5F805000)
+ // fmls Vd,Vn,Vm DV_3B 0Q0011101X1mmmmm 110011nnnnnddddd 0EA0 CC00 Vd,Vn,Vm (vector)
+ // fmls Vd,Vn,Vm[] DV_3BI 0Q0011111XLmmmmm 0101H0nnnnnddddd 0F80 5000 Vd,Vn,Vm[] (vector by elem)
+ // fmls Vd,Vn,Vm[] DV_3DI 010111111XLmmmmm 0101H0nnnnnddddd 5F80 5000 Vd,Vn,Vm[] (scalar by elem)
+
+// enum name FP LD/ST DV_2A DV_2G DV_2H
+INST3(fcvtas, "fcvtas", 0, 0, IF_EN3F, 0x0E21C800, 0x5E21C800, 0x1E240000)
+ // fcvtas Vd,Vn DV_2A 0Q0011100X100001 110010nnnnnddddd 0E21 C800 Vd,Vn (vector)
+ // fcvtas Vd,Vn DV_2G 010111100X100001 110010nnnnnddddd 5E21 C800 Vd,Vn (scalar)
+ // fcvtas Rd,Vn DV_2H X00111100X100100 000000nnnnnddddd 1E24 0000 Rd,Vn (scalar, to general)
+
+INST3(fcvtau, "fcvtau", 0, 0, IF_EN3F, 0x2E21C800, 0x7E21C800, 0x1E250000)
+ // fcvtau Vd,Vn DV_2A 0Q1011100X100001 111010nnnnnddddd 2E21 C800 Vd,Vn (vector)
+ // fcvtau Vd,Vn DV_2G 011111100X100001 111010nnnnnddddd 7E21 C800 Vd,Vn (scalar)
+ // fcvtau Rd,Vn DV_2H X00111100X100101 000000nnnnnddddd 1E25 0000 Rd,Vn (scalar, to general)
+
+INST3(fcvtms, "fcvtms", 0, 0, IF_EN3F, 0x0E21B800, 0x5E21B800, 0x1E300000)
+ // fcvtms Vd,Vn DV_2A 0Q0011100X100001 101110nnnnnddddd 0E21 B800 Vd,Vn (vector)
+ // fcvtms Vd,Vn DV_2G 010111100X100001 101110nnnnnddddd 5E21 B800 Vd,Vn (scalar)
+ // fcvtms Rd,Vn DV_2H X00111100X110000 000000nnnnnddddd 1E30 0000 Rd,Vn (scalar, to general)
+
+INST3(fcvtmu, "fcvtmu", 0, 0, IF_EN3F, 0x2E21B800, 0x7E21B800, 0x1E310000)
+ // fcvtmu Vd,Vn DV_2A 0Q1011100X100001 101110nnnnnddddd 2E21 B800 Vd,Vn (vector)
+ // fcvtmu Vd,Vn DV_2G 011111100X100001 101110nnnnnddddd 7E21 B800 Vd,Vn (scalar)
+ // fcvtmu Rd,Vn DV_2H X00111100X110001 000000nnnnnddddd 1E31 0000 Rd,Vn (scalar, to general)
+
+INST3(fcvtns, "fcvtns", 0, 0, IF_EN3F, 0x0E21A800, 0x5E21A800, 0x1E200000)
+ // fcvtns Vd,Vn DV_2A 0Q0011100X100001 101010nnnnnddddd 0E21 A800 Vd,Vn (vector)
+ // fcvtns Vd,Vn DV_2G 010111100X100001 101010nnnnnddddd 5E21 A800 Vd,Vn (scalar)
+ // fcvtns Rd,Vn DV_2H X00111100X100000 000000nnnnnddddd 1E20 0000 Rd,Vn (scalar, to general)
+
+INST3(fcvtnu, "fcvtnu", 0, 0, IF_EN3F, 0x2E21A800, 0x7E21A800, 0x1E210000)
+ // fcvtnu Vd,Vn DV_2A 0Q1011100X100001 101010nnnnnddddd 2E21 A800 Vd,Vn (vector)
+ // fcvtnu Vd,Vn DV_2G 011111100X100001 101010nnnnnddddd 7E21 A800 Vd,Vn (scalar)
+ // fcvtnu Rd,Vn DV_2H X00111100X100001 000000nnnnnddddd 1E21 0000 Rd,Vn (scalar, to general)
+
+INST3(fcvtps, "fcvtps", 0, 0, IF_EN3F, 0x0EA1A800, 0x5EA1A800, 0x1E280000)
+ // fcvtps Vd,Vn DV_2A 0Q0011101X100001 101010nnnnnddddd 0EA1 A800 Vd,Vn (vector)
+ // fcvtps Vd,Vn DV_2G 010111101X100001 101010nnnnnddddd 5EA1 A800 Vd,Vn (scalar)
+ // fcvtps Rd,Vn DV_2H X00111100X101000 000000nnnnnddddd 1E28 0000 Rd,Vn (scalar, to general)
+
+INST3(fcvtpu, "fcvtpu", 0, 0, IF_EN3F, 0x2EA1A800, 0x7EA1A800, 0x1E290000)
+ // fcvtpu Vd,Vn DV_2A 0Q1011101X100001 101010nnnnnddddd 2EA1 A800 Vd,Vn (vector)
+ // fcvtpu Vd,Vn DV_2G 011111101X100001 101010nnnnnddddd 7EA1 A800 Vd,Vn (scalar)
+ // fcvtpu Rd,Vn DV_2H X00111100X101001 000000nnnnnddddd 1E29 0000 Rd,Vn (scalar, to general)
+
+INST3(fcvtzs, "fcvtzs", 0, 0, IF_EN3F, 0x0EA1B800, 0x5EA1B800, 0x1E380000)
+ // fcvtzs Vd,Vn DV_2A 0Q0011101X100001 101110nnnnnddddd 0EA1 B800 Vd,Vn (vector)
+ // fcvtzs Vd,Vn DV_2G 010111101X100001 101110nnnnnddddd 5EA1 B800 Vd,Vn (scalar)
+ // fcvtzs Rd,Vn DV_2H X00111100X111000 000000nnnnnddddd 1E38 0000 Rd,Vn (scalar, to general)
+
+INST3(fcvtzu, "fcvtzu", 0, 0, IF_EN3F, 0x2EA1B800, 0x7EA1B800, 0x1E390000)
+ // fcvtzu Vd,Vn DV_2A 0Q1011101X100001 101110nnnnnddddd 2EA1 B800 Vd,Vn (vector)
+ // fcvtzu Vd,Vn DV_2G 011111101X100001 101110nnnnnddddd 7EA1 B800 Vd,Vn (scalar)
+ // fcvtzu Rd,Vn DV_2H X00111100X111001 000000nnnnnddddd 1E39 0000 Rd,Vn (scalar, to general)
+
+// enum name FP LD/ST DV_2A DV_2G DV_2I
+INST3(scvtf, "scvtf", 0, 0, IF_EN3G, 0x0E21D800, 0x5E21D800, 0x1E220000)
+ // scvtf Vd,Vn DV_2A 0Q0011100X100001 110110nnnnnddddd 0E21 D800 Vd,Vn (vector)
+ // scvtf Vd,Vn DV_2G 010111100X100001 110110nnnnnddddd 7E21 D800 Vd,Vn (scalar)
+ // scvtf Rd,Vn DV_2I X00111100X100010 000000nnnnnddddd 1E22 0000 Vd,Rn (scalar, from general)
+
+INST3(ucvtf, "ucvtf", 0, 0, IF_EN3G, 0x2E21D800, 0x7E21D800, 0x1E230000)
+ // ucvtf Vd,Vn DV_2A 0Q1011100X100001 110110nnnnnddddd 2E21 D800 Vd,Vn (vector)
+ // ucvtf Vd,Vn DV_2G 011111100X100001 110110nnnnnddddd 7E21 D800 Vd,Vn (scalar)
+ // ucvtf Rd,Vn DV_2I X00111100X100011 000000nnnnnddddd 1E23 0000 Vd,Rn (scalar, from general)
+
+INST3(mul, "mul", 0, 0, IF_EN3H, 0x1B007C00, 0x0E209C00, 0x0F008000)
+ // mul Rd,Rn,Rm DR_3A X0011011000mmmmm 011111nnnnnddddd 1B00 7C00
+ // mul Vd,Vn,Vm DV_3A 0Q001110XX1mmmmm 100111nnnnnddddd 0E20 9C00 Vd,Vn,Vm (vector)
+ // mul Vd,Vn,Vm[] DV_3AI 0Q001111XXLMmmmm 1000H0nnnnnddddd 0F00 8000 Vd,Vn,Vm[] (vector by elem)
+
+// enum name FP LD/ST DR_2E DR_2F DV_2M
+INST3(mvn, "mvn", 0, 0, IF_EN3I, 0x2A2003E0, 0x2A2003E0, 0x2E205800)
+ // mvn Rd,Rm DR_2E X0101010001mmmmm 00000011111ddddd 2A20 03E0
+ // mvn Rd,(Rm,shk,imm) DR_2F X0101010sh1mmmmm iiiiii11111ddddd 2A20 03E0 Rm {LSL,LSR,ASR} imm(0-63)
+ // mvn Vd,Vn DV_2M 0Q10111000100000 010110nnnnnddddd 2E20 5800 Vd,Vn (vector)
+
+
+// enum name FP LD/ST DR_2E DR_2F
+INST2(negs, "negs", 0, 0, IF_EN2A, 0x6B0003E0, 0x6B0003E0)
+ // negs Rd,Rm DR_2E X1101011000mmmmm 00000011111ddddd 6B00 03E0
+ // negs Rd,(Rm,shk,imm) DR_2F X1101011sh0mmmmm ssssss11111ddddd 6B00 03E0 Rm {LSL,LSR,ASR} imm(0-63)
+
+// enum name FP LD/ST DR_3A DR_3B
+INST2(bics, "bics", 0, 0, IF_EN2B, 0x6A200000, 0x6A200000)
+ // bics Rd,Rn,Rm DR_3A X1101010001mmmmm 000000nnnnnddddd 6A20 0000
+ // bics Rd,Rn,(Rm,shk,imm) DR_3B X1101010sh1mmmmm iiiiiinnnnnddddd 6A20 0000 Rm {LSL,LSR,ASR,ROR} imm(0-63)
+
+INST2(eon, "eon", 0, 0, IF_EN2B, 0x4A200000, 0x4A200000)
+ // eon Rd,Rn,Rm DR_3A X1001010001mmmmm 000000nnnnnddddd 4A20 0000
+ // eon Rd,Rn,(Rm,shk,imm) DR_3B X1001010sh1mmmmm iiiiiinnnnnddddd 4A20 0000 Rm {LSL,LSR,ASR,ROR} imm(0-63)
+
+// enum name FP LD/ST DR_3A DI_2C
+INST2(lsl, "lsl", 0, 0, IF_EN2C, 0x1AC02000, 0x53000000)
+ // lsl Rd,Rn,Rm DR_3A X0011010110mmmmm 001000nnnnnddddd 1AC0 2000
+ // lsl Rd,Rn,imm6 DI_2D X10100110Xrrrrrr ssssssnnnnnddddd 5300 0000 imm(N,r,s)
+
+INST2(lsr, "lsr", 0, 0, IF_EN2C, 0x1AC02400, 0x53000000)
+ // lsr Rd,Rn,Rm DR_3A X0011010110mmmmm 001001nnnnnddddd 1AC0 2400
+ // lsr Rd,Rn,imm6 DI_2D X10100110Xrrrrrr ssssssnnnnnddddd 5300 0000 imm(N,r,s)
+
+INST2(asr, "asr", 0, 0, IF_EN2C, 0x1AC02800, 0x13000000)
+ // asr Rd,Rn,Rm DR_3A X0011010110mmmmm 001010nnnnnddddd 1AC0 2800
+ // asr Rd,Rn,imm6 DI_2D X00100110Xrrrrrr ssssssnnnnnddddd 1300 0000 imm(N,r,s)
+
+// enum name FP LD/ST DR_3A DI_2B
+INST2(ror, "ror", 0, 0, IF_EN2D, 0x1AC02C00, 0x13800000)
+ // ror Rd,Rn,Rm DR_3A X0011010110mmmmm 001011nnnnnddddd 1AC0 2C00
+ // ror Rd,Rn,imm6 DI_2B X00100111X0nnnnn ssssssnnnnnddddd 1380 0000 imm(0-63)
+
+// enum name FP LD/ST LS_3B LS_3C
+INST2(ldp, "ldp", 0,LD, IF_EN2E, 0x29400000, 0x28400000)
+ // ldp Rt,Ra,[Xn] LS_3B X010100101000000 0aaaaannnnnttttt 2940 0000 [Xn imm7]
+ // ldp Rt,Ra,[Xn+simm7] LS_3C X010100PP1iiiiii iaaaaannnnnttttt 2840 0000 [Xn imm7 LSL {} pre/post/no inc]
+
+INST2(ldpsw, "ldpsw", 0,LD, IF_EN2E, 0x69400000, 0x68400000)
+ // ldpsw Rt,Ra,[Xn] LS_3B 0110100101000000 0aaaaannnnnttttt 6940 0000 [Xn imm7]
+ // ldpsw Rt,Ra,[Xn+simm7] LS_3C 0110100PP1iiiiii iaaaaannnnnttttt 6840 0000 [Xn imm7 LSL {} pre/post/no inc]
+
+INST2(stp, "stp", 0,ST, IF_EN2E, 0x29000000, 0x28000000)
+ // stp Rt,Ra,[Xn] LS_3B X010100100000000 0aaaaannnnnttttt 2900 0000 [Xn imm7]
+ // stp Rt,Ra,[Xn+simm7] LS_3C X010100PP0iiiiii iaaaaannnnnttttt 2800 0000 [Xn imm7 LSL {} pre/post/no inc]
+
+INST2(ldnp, "ldnp", 0,LD, IF_EN2E, 0x28400000, 0x28400000)
+ // ldnp Rt,Ra,[Xn] LS_3B X010100001000000 0aaaaannnnnttttt 2840 0000 [Xn imm7]
+ // ldnp Rt,Ra,[Xn+simm7] LS_3C X010100001iiiiii iaaaaannnnnttttt 2840 0000 [Xn imm7 LSL {}]
+
+INST2(stnp, "stnp", 0,ST, IF_EN2E, 0x28000000, 0x28000000)
+ // stnp Rt,Ra,[Xn] LS_3B X010100000000000 0aaaaannnnnttttt 2800 0000 [Xn imm7]
+ // stnp Rt,Ra,[Xn+simm7] LS_3C X010100000iiiiii iaaaaannnnnttttt 2800 0000 [Xn imm7 LSL {}]
+
+INST2(ccmp, "ccmp", 0,CMP,IF_EN2F, 0x7A400000, 0x7A400800)
+ // ccmp Rn,Rm, nzcv,cond DR_2I X1111010010mmmmm cccc00nnnnn0nzcv 7A40 0000 nzcv, cond
+ // ccmp Rn,imm5,nzcv,cond DI_1F X1111010010iiiii cccc10nnnnn0nzcv 7A40 0800 imm5, nzcv, cond
+
+INST2(ccmn, "ccmn", 0,CMP,IF_EN2F, 0x3A400000, 0x3A400800)
+ // ccmn Rn,Rm, nzcv,cond DR_2I X0111010010mmmmm cccc00nnnnn0nzcv 3A40 0000 nzcv, cond
+ // ccmn Rn,imm5,nzcv,cond DI_1F X0111010910iiiii cccc10nnnnn0nzcv 3A40 0800 imm5, nzcv, cond
+
+// enum name FP LD/ST DV_2C DV_2F
+INST2(ins, "ins", 0, 0, IF_EN2H, 0x4E001C00, 0x6E000400)
+ // ins Vd[],Rn DV_2C 01001110000iiiii 000111nnnnnddddd 4E00 1C00 Vd[],Rn (from general)
+ // ins Vd[],Vn[] DV_2F 01101110000iiiii 0jjjj1nnnnnddddd 6E00 0400 Vd[],Vn[] (from/to elem)
+
+// enum name FP LD/ST DV_3B DV_3D
+INST2(fadd, "fadd", 0, 0, IF_EN2G, 0x0E20D400, 0x1E202800)
+ // fadd Vd,Vn,Vm DV_3B 0Q0011100X1mmmmm 110101nnnnnddddd 0E20 D400 Vd,Vn,Vm (vector)
+ // fadd Vd,Vn,Vm DV_3D 000111100X1mmmmm 001010nnnnnddddd 1E20 2800 Vd,Vn,Vm (scalar)
+
+INST2(fsub, "fsub", 0, 0, IF_EN2G, 0x0EA0D400, 0x1E203800)
+ // fsub Vd,Vn,Vm DV_3B 0Q0011101X1mmmmm 110101nnnnnddddd 0EA0 D400 Vd,Vn,Vm (vector)
+ // fsub Vd,Vn,Vm DV_3D 000111100X1mmmmm 001110nnnnnddddd 1E20 3800 Vd,Vn,Vm (scalar)
+
+INST2(fdiv, "fdiv", 0, 0, IF_EN2G, 0x2E20FC00, 0x1E201800)
+ // fdiv Vd,Vn,Vm DV_3B 0Q1011100X1mmmmm 111111nnnnnddddd 2E20 FC00 Vd,Vn,Vm (vector)
+ // fdiv Vd,Vn,Vm DV_3D 000111100X1mmmmm 000110nnnnnddddd 1E20 1800 Vd,Vn,Vm (scalar)
+
+INST2(fmax, "fmax", 0, 0, IF_EN2G, 0x0E20F400, 0x1E204800)
+ // fmax Vd,Vn,Vm DV_3B 0Q0011100X1mmmmm 111101nnnnnddddd 0E20 F400 Vd,Vn,Vm (vector)
+ // fmax Vd,Vn,Vm DV_3D 000111100X1mmmmm 010010nnnnnddddd 1E20 4800 Vd,Vn,Vm (scalar)
+
+INST2(fmin, "fmin", 0, 0, IF_EN2G, 0x0EA0F400, 0x1E205800)
+ // fmin Vd,Vn,Vm DV_3B 0Q0011101X1mmmmm 111101nnnnnddddd 0EA0 F400 Vd,Vn,Vm (vector)
+ // fmin Vd,Vn,Vm DV_3D 000111100X1mmmmm 010110nnnnnddddd 1E20 5800 Vd,Vn,Vm (scalar)
+
+INST2(fabd, "fabd", 0, 0, IF_EN2G, 0x0EA0F400, 0x1E205800)
+ // fabd Vd,Vn,Vm DV_3B 0Q1011101X1mmmmm 110101nnnnnddddd 2EA0 D400 Vd,Vn,Vm (vector)
+ // fabd Vd,Vn,Vm DV_3D 011111101X1mmmmm 110101nnnnnddddd 7EA0 D400 Vd,Vn,Vm (scalar)
+
+// enum name FP LD/ST DV_2K DV_1C
+INST2(fcmp, "fcmp", 0, 0, IF_EN2I, 0x1E202000, 0x1E202008)
+ // fcmp Vn,Vm DV_2K 000111100X1mmmmm 001000nnnnn00000 1E20 2000 Vn Vm
+ // fcmp Vn,#0.0 DV_1C 000111100X100000 001000nnnnn01000 1E20 2008 Vn #0.0
+
+INST2(fcmpe, "fcmpe", 0, 0, IF_EN2I, 0x1E202010, 0x1E202018)
+ // fcmpe Vn,Vm DV_2K 000111100X1mmmmm 001000nnnnn10000 1E20 2010 Vn Vm
+ // fcmpe Vn,#0.0 DV_1C 000111100X100000 001000nnnnn11000 1E20 2018 Vn #0.0
+
+// enum name FP LD/ST DV_2A DV_2G
+INST2(fabs, "fabs", 0, 0, IF_EN2J, 0x0EA0F800, 0x1E20C000)
+ // fabs Vd,Vn DV_2A 0Q0011101X100000 111110nnnnnddddd 0EA0 F800 Vd,Vn (vector)
+ // fabs Vd,Vn DV_2G 000111100X100000 110000nnnnnddddd 1E20 C000 Vd,Vn (scalar)
+
+INST2(fneg, "fneg", 0, 0, IF_EN2J, 0x2EA0F800, 0x1E214000)
+ // fneg Vd,Vn DV_2A 0Q1011101X100000 111110nnnnnddddd 2EA0 F800 Vd,Vn (vector)
+ // fneg Vd,Vn DV_2G 000111100X100001 010000nnnnnddddd 1E21 4000 Vd,Vn (scalar)
+
+INST2(fsqrt, "fsqrt", 0, 0, IF_EN2J, 0x2EA1F800, 0x1E21C000)
+ // fsqrt Vd,Vn DV_2A 0Q1011101X100001 111110nnnnnddddd 2EA1 F800 Vd,Vn (vector)
+ // fsqrt Vd,Vn DV_2G 000111100X100001 110000nnnnnddddd 1E21 C000 Vd,Vn (scalar)
+
+INST2(frintn, "frintn", 0, 0, IF_EN2J, 0x0E218800, 0x1E244000)
+ // frintn Vd,Vn DV_2A 0Q0011100X100001 100010nnnnnddddd 0E21 8800 Vd,Vn (vector)
+ // frintn Vd,Vn DV_2G 000111100X100100 010000nnnnnddddd 1E24 4000 Vd,Vn (scalar)
+
+INST2(frintp, "frintp", 0, 0, IF_EN2J, 0x0EA18800, 0x1E24C000)
+ // frintp Vd,Vn DV_2A 0Q0011101X100001 100010nnnnnddddd 0EA1 8800 Vd,Vn (vector)
+ // frintp Vd,Vn DV_2G 000111100X100100 110000nnnnnddddd 1E24 C000 Vd,Vn (scalar)
+
+INST2(frintm, "frintm", 0, 0, IF_EN2J, 0x0E219800, 0x1E254000)
+ // frintm Vd,Vn DV_2A 0Q0011100X100001 100110nnnnnddddd 0E21 9800 Vd,Vn (vector)
+ // frintm Vd,Vn DV_2G 000111100X100101 010000nnnnnddddd 1E25 4000 Vd,Vn (scalar)
+
+INST2(frintz, "frintz", 0, 0, IF_EN2J, 0x0EA19800, 0x1E25C000)
+ // frintz Vd,Vn DV_2A 0Q0011101X100001 100110nnnnnddddd 0EA1 9800 Vd,Vn (vector)
+ // frintz Vd,Vn DV_2G 000111100X100101 110000nnnnnddddd 1E25 C000 Vd,Vn (scalar)
+
+INST2(frinta, "frinta", 0, 0, IF_EN2J, 0x2E218800, 0x1E264000)
+ // frinta Vd,Vn DV_2A 0Q1011100X100001 100010nnnnnddddd 2E21 8800 Vd,Vn (vector)
+ // frinta Vd,Vn DV_2G 000111100X100110 010000nnnnnddddd 1E26 4000 Vd,Vn (scalar)
+
+INST2(frintx, "frintx", 0, 0, IF_EN2J, 0x2E219800, 0x1E274000)
+ // frintx Vd,Vn DV_2A 0Q1011100X100001 100110nnnnnddddd 2E21 9800 Vd,Vn (vector)
+ // frintx Vd,Vn DV_2G 000111100X100111 010000nnnnnddddd 1E27 4000 Vd,Vn (scalar)
+
+INST2(frinti, "frinti", 0, 0, IF_EN2J, 0x2EA19800, 0x1E27C000)
+ // frinti Vd,Vn DV_2A 0Q1011101X100001 100110nnnnnddddd 2EA1 9800 Vd,Vn (vector)
+ // frinti Vd,Vn DV_2G 000111100X100111 110000nnnnnddddd 1E27 C000 Vd,Vn (scalar)
+
+// enum name FP LD/ST DV_2M DV_2L
+INST2(abs, "abs", 0, 0, IF_EN2K, 0x0E20B800, 0x5E20B800)
+ // abs Vd,Vn DV_2M 0Q001110XX100000 101110nnnnnddddd 0E20 B800 Vd,Vn (vector)
+ // abs Vd,Vn DV_2L 01011110XX100000 101110nnnnnddddd 5E20 B800 Vd,Vn (scalar)
+
+// enum name FP LD/ST DR_2G DV_2M
+INST2(cls, "cls", 0, 0, IF_EN2L, 0x5AC01400, 0x0E204800)
+ // cls Rd,Rm DR_2G X101101011000000 000101nnnnnddddd 5AC0 1400 Rd Rn (general)
+ // cls Vd,Vn DV_2M 0Q00111000100000 010010nnnnnddddd 0E20 4800 Vd,Vn (vector)
+
+INST2(clz, "clz", 0, 0, IF_EN2L, 0x5AC01000, 0x2E204800)
+ // clz Rd,Rm DR_2G X101101011000000 000100nnnnnddddd 5AC0 1000 Rd Rn (general)
+ // clz Vd,Vn DV_2M 0Q10111000100000 010010nnnnnddddd 2E20 4800 Vd,Vn (vector)
+
+INST2(rbit, "rbit", 0, 0, IF_EN2L, 0x5AC00000, 0x2E605800)
+ // rbit Rd,Rm DR_2G X101101011000000 000000nnnnnddddd 5AC0 0000 Rd Rn (general)
+ // rbit Vd,Vn DV_2M 0Q10111001100000 010110nnnnnddddd 2E60 5800 Vd,Vn (vector)
+
+INST2(rev16, "rev16", 0, 0, IF_EN2L, 0x5AC00400, 0x0E201800)
+ // rev16 Rd,Rm DR_2G X101101011000000 000001nnnnnddddd 5AC0 0400 Rd Rn (general)
+ // rev16 Vd,Vn DV_2M 0Q001110XX100000 000110nnnnnddddd 0E20 1800 Vd,Vn (vector)
+
+INST2(rev32, "rev32", 0, 0, IF_EN2L, 0xDAC00800, 0x2E200800)
+ // rev32 Rd,Rm DR_2G 1101101011000000 000010nnnnnddddd DAC0 0800 Rd Rn (general)
+ // rev32 Vd,Vn DV_2M 0Q101110XX100000 000010nnnnnddddd 2E20 0800 Vd,Vn (vector)
+
+// enum name FP LD/ST DV_3A DV_3AI
+INST2(mla, "mla", 0, 0, IF_EN2M, 0x0E209400, 0x2F000000)
+ // mla Vd,Vn,Vm DV_3A 0Q001110XX1mmmmm 100101nnnnnddddd 0E20 9400 Vd,Vn,Vm (vector)
+ // mla Vd,Vn,Vm[] DV_3AI 0Q101111XXLMmmmm 0000H0nnnnnddddd 2F00 0000 Vd,Vn,Vm[] (vector by elem)
+
+INST2(mls, "mls", 0, 0, IF_EN2M, 0x2E209400, 0x2F004000)
+ // mls Vd,Vn,Vm DV_3A 0Q101110XX1mmmmm 100101nnnnnddddd 2E20 9400 Vd,Vn,Vm (vector)
+ // mls Vd,Vn,Vm[] DV_3AI 0Q101111XXLMmmmm 0100H0nnnnnddddd 2F00 4000 Vd,Vn,Vm[] (vector by elem)
+
+// enum name FP LD/ST DV_2N DV_2O
+INST2(sshr, "sshr", 0, 0, IF_EN2N, 0x5F000400, 0x0F000400)
+ // sshr Vd,Vn,imm DV_2N 010111110iiiiiii 000001nnnnnddddd 5F00 0400 Vd Vn imm (shift - scalar)
+ // sshr Vd,Vn,imm DV_2O 0Q0011110iiiiiii 000001nnnnnddddd 0F00 0400 Vd,Vn imm (shift - vector)
+
+INST2(ssra, "ssra", 0, 0, IF_EN2N, 0x5F001400, 0x0F001400)
+ // ssra Vd,Vn,imm DV_2N 010111110iiiiiii 000101nnnnnddddd 5F00 1400 Vd Vn imm (shift - scalar)
+ // ssra Vd,Vn,imm DV_2O 0Q0011110iiiiiii 000101nnnnnddddd 0F00 1400 Vd,Vn imm (shift - vector)
+
+INST2(srshr, "srshr", 0, 0, IF_EN2N, 0x5F002400, 0x0F002400)
+ // srshr Vd,Vn,imm DV_2N 010111110iiiiiii 001001nnnnnddddd 5F00 0400 Vd Vn imm (shift - scalar)
+ // srshr Vd,Vn,imm DV_2O 0Q0011110iiiiiii 001001nnnnnddddd 0F00 0400 Vd,Vn imm (shift - vector)
+
+INST2(srsra, "srsra", 0, 0, IF_EN2N, 0x5F003400, 0x0F003400)
+ // srsra Vd,Vn,imm DV_2N 010111110iiiiiii 001101nnnnnddddd 5F00 1400 Vd Vn imm (shift - scalar)
+ // srsra Vd,Vn,imm DV_2O 0Q0011110iiiiiii 001101nnnnnddddd 0F00 1400 Vd,Vn imm (shift - vector)
+
+INST2(shl, "shl", 0, 0, IF_EN2N, 0x5F005400, 0x0F005400)
+ // shl Vd,Vn,imm DV_2N 010111110iiiiiii 010101nnnnnddddd 5F00 5400 Vd Vn imm (shift - scalar)
+ // shl Vd,Vn,imm DV_2O 0Q0011110iiiiiii 010101nnnnnddddd 0F00 5400 Vd,Vn imm (shift - vector)
+
+INST2(ushr, "ushr", 0, 0, IF_EN2N, 0x7F000400, 0x2F000400)
+ // ushr Vd,Vn,imm DV_2N 011111110iiiiiii 000001nnnnnddddd 7F00 0400 Vd Vn imm (shift - scalar)
+ // ushr Vd,Vn,imm DV_2O 0Q1011110iiiiiii 000001nnnnnddddd 2F00 0400 Vd,Vn imm (shift - vector)
+
+INST2(usra, "usra", 0, 0, IF_EN2N, 0x7F001400, 0x2F001400)
+ // usra Vd,Vn,imm DV_2N 011111110iiiiiii 000101nnnnnddddd 7F00 1400 Vd Vn imm (shift - scalar)
+ // usra Vd,Vn,imm DV_2O 0Q1011110iiiiiii 000101nnnnnddddd 2F00 1400 Vd,Vn imm (shift - vector)
+
+INST2(urshr, "urshr", 0, 0, IF_EN2N, 0x7F002400, 0x2F002400)
+ // urshr Vd,Vn,imm DV_2N 011111110iiiiiii 001001nnnnnddddd 7F00 2400 Vd Vn imm (shift - scalar)
+ // urshr Vd,Vn,imm DV_2O 0Q1011110iiiiiii 001001nnnnnddddd 2F00 2400 Vd,Vn imm (shift - vector)
+
+INST2(ursra, "ursra", 0, 0, IF_EN2N, 0x7F003400, 0x2F003400)
+ // ursra Vd,Vn,imm DV_2N 011111110iiiiiii 001101nnnnnddddd 7F00 3400 Vd Vn imm (shift - scalar)
+ // ursra Vd,Vn,imm DV_2O 0Q1011110iiiiiii 001101nnnnnddddd 2F00 3400 Vd,Vn imm (shift - vector)
+
+INST2(sri, "sri", 0, 0, IF_EN2N, 0x7F004400, 0x2F004400)
+ // sri Vd,Vn,imm DV_2N 011111110iiiiiii 010001nnnnnddddd 7F00 4400 Vd Vn imm (shift - scalar)
+ // sri Vd,Vn,imm DV_2O 0Q1011110iiiiiii 010001nnnnnddddd 2F00 4400 Vd,Vn imm (shift - vector)
+
+INST2(sli, "sli", 0, 0, IF_EN2N, 0x7F005400, 0x2F005400)
+ // sli Vd,Vn,imm DV_2N 011111110iiiiiii 010101nnnnnddddd 7F00 5400 Vd Vn imm (shift - scalar)
+ // sli Vd,Vn,imm DV_2O 0Q1011110iiiiiii 010101nnnnnddddd 2F00 5400 Vd,Vn imm (shift - vector)
+
+INST1(ldur, "ldur", 0,LD, IF_LS_2C, 0xB8400000)
+ // ldur Rt,[Xn+simm9] LS_2C 1X111000010iiiii iiii00nnnnnttttt B840 0000 [Xn imm(-256..+255)]
+
+INST1(ldurb, "ldurb", 0,LD, IF_LS_2C, 0x38400000)
+ // ldurb Rt,[Xn+simm9] LS_2C 00111000010iiiii iiii00nnnnnttttt 3840 0000 [Xn imm(-256..+255)]
+
+INST1(ldurh, "ldurh", 0,LD, IF_LS_2C, 0x78400000)
+ // ldurh Rt,[Xn+simm9] LS_2C 01111000010iiiii iiii00nnnnnttttt 7840 0000 [Xn imm(-256..+255)]
+
+INST1(ldursb, "ldursb", 0,LD, IF_LS_2C, 0x38800000)
+ // ldursb Rt,[Xn+simm9] LS_2C 001110001X0iiiii iiii00nnnnnttttt 3880 0000 [Xn imm(-256..+255)]
+
+INST1(ldursh, "ldursh", 0,LD, IF_LS_2C, 0x78800000)
+ // ldursh Rt,[Xn+simm9] LS_2C 011110001X0iiiii iiii00nnnnnttttt 7880 0000 [Xn imm(-256..+255)]
+
+INST1(ldursw, "ldursw", 0,LD, IF_LS_2C, 0xB8800000)
+ // ldursw Rt,[Xn+simm9] LS_2C 10111000100iiiii iiii00nnnnnttttt B880 0000 [Xn imm(-256..+255)]
+
+INST1(stur, "stur", 0,ST, IF_LS_2C, 0xB8000000)
+ // stur Rt,[Xn+simm9] LS_2C 1X111000000iiiii iiii00nnnnnttttt B800 0000 [Xn imm(-256..+255)]
+
+INST1(sturb, "sturb", 0,ST, IF_LS_2C, 0x38000000)
+ // sturb Rt,[Xn+simm9] LS_2C 00111000000iiiii iiii00nnnnnttttt 3800 0000 [Xn imm(-256..+255)]
+
+INST1(sturh, "sturh", 0,ST, IF_LS_2C, 0x78000000)
+ // sturh Rt,[Xn+simm9] LS_2C 01111000000iiiii iiii00nnnnnttttt 7800 0000 [Xn imm(-256..+255)]
+
+INST1(adr, "adr", 0, 0, IF_DI_1E, 0x10000000)
+ // adr Rd, simm21 DI_1E 0ii10000iiiiiiii iiiiiiiiiiiddddd 1000 0000 Rd simm21
+
+INST1(adrp, "adrp", 0, 0, IF_DI_1E, 0x90000000)
+ // adrp Rd, simm21 DI_1E 1ii10000iiiiiiii iiiiiiiiiiiddddd 9000 0000 Rd simm21
+
+INST1(b, "b", 0, 0, IF_BI_0A, 0x14000000)
+ // b simm26 BI_0A 000101iiiiiiiiii iiiiiiiiiiiiiiii 1400 0000 simm26:00
+
+INST1(b_tail, "b", 0, 0, IF_BI_0C, 0x14000000)
+ // b simm26 BI_0A 000101iiiiiiiiii iiiiiiiiiiiiiiii 1400 0000 simm26:00, same as b representing a tail call of bl.
+
+INST1(bl_local,"bl", 0, 0, IF_BI_0A, 0x94000000)
+ // bl simm26 BI_0A 100101iiiiiiiiii iiiiiiiiiiiiiiii 9400 0000 simm26:00, same as bl, but with a BasicBlock target.
+
+INST1(bl, "bl", 0, 0, IF_BI_0C, 0x94000000)
+ // bl simm26 BI_0C 100101iiiiiiiiii iiiiiiiiiiiiiiii 9400 0000 simm26:00
+
+INST1(br, "br", 0, 0, IF_BR_1A, 0xD61F0000)
+ // br Rn BR_1A 1101011000011111 000000nnnnn00000 D61F 0000, an indirect branch like switch expansion
+
+INST1(br_tail, "br", 0, 0, IF_BR_1B, 0xD61F0000)
+ // br Rn BR_1B 1101011000011111 000000nnnnn00000 D61F 0000, same as br representing a tail call of blr. Encode target with Reg3.
+
+INST1(blr, "blr", 0, 0, IF_BR_1B, 0xD63F0000)
+ // blr Rn BR_1B 1101011000111111 000000nnnnn00000 D63F 0000, Encode target with Reg3.
+
+INST1(ret, "ret", 0, 0, IF_BR_1A, 0xD65F0000)
+ // ret Rn BR_1A 1101011001011111 000000nnnnn00000 D65F 0000
+
+INST1(beq, "beq", 0, 0, IF_BI_0B, 0x54000000)
+ // beq simm19 BI_0B 01010100iiiiiiii iiiiiiiiiii00000 5400 0000 simm19:00
+
+INST1(bne, "bne", 0, 0, IF_BI_0B, 0x54000001)
+ // bne simm19 BI_0B 01010100iiiiiiii iiiiiiiiiii00001 5400 0001 simm19:00
+
+INST1(bhs, "bhs", 0, 0, IF_BI_0B, 0x54000002)
+ // bhs simm19 BI_0B 01010100iiiiiiii iiiiiiiiiii00010 5400 0002 simm19:00
+
+INST1(blo, "blo", 0, 0, IF_BI_0B, 0x54000003)
+ // blo simm19 BI_0B 01010100iiiiiiii iiiiiiiiiii00011 5400 0003 simm19:00
+
+INST1(bmi, "bmi", 0, 0, IF_BI_0B, 0x54000004)
+ // bmi simm19 BI_0B 01010100iiiiiiii iiiiiiiiiii00100 5400 0004 simm19:00
+
+INST1(bpl, "bpl", 0, 0, IF_BI_0B, 0x54000005)
+ // bpl simm19 BI_0B 01010100iiiiiiii iiiiiiiiiii00101 5400 0005 simm19:00
+
+INST1(bvs, "bvs", 0, 0, IF_BI_0B, 0x54000006)
+ // bvs simm19 BI_0B 01010100iiiiiiii iiiiiiiiiii00110 5400 0006 simm19:00
+
+INST1(bvc, "bvc", 0, 0, IF_BI_0B, 0x54000007)
+ // bvc simm19 BI_0B 01010100iiiiiiii iiiiiiiiiii00111 5400 0007 simm19:00
+
+INST1(bhi, "bhi", 0, 0, IF_BI_0B, 0x54000008)
+ // bhi simm19 BI_0B 01010100iiiiiiii iiiiiiiiiii01000 5400 0008 simm19:00
+
+INST1(bls, "bls", 0, 0, IF_BI_0B, 0x54000009)
+ // bls simm19 BI_0B 01010100iiiiiiii iiiiiiiiiii01001 5400 0009 simm19:00
+
+INST1(bge, "bge", 0, 0, IF_BI_0B, 0x5400000A)
+ // bge simm19 BI_0B 01010100iiiiiiii iiiiiiiiiii01010 5400 000A simm19:00
+
+INST1(blt, "blt", 0, 0, IF_BI_0B, 0x5400000B)
+ // blt simm19 BI_0B 01010100iiiiiiii iiiiiiiiiii01011 5400 000B simm19:00
+
+INST1(bgt, "bgt", 0, 0, IF_BI_0B, 0x5400000C)
+ // bgt simm19 BI_0B 01010100iiiiiiii iiiiiiiiiii01100 5400 000C simm19:00
+
+INST1(ble, "ble", 0, 0, IF_BI_0B, 0x5400000D)
+ // ble simm19 BI_0B 01010100iiiiiiii iiiiiiiiiii01101 5400 000D simm19:00
+
+INST1(cbz, "cbz", 0, 0, IF_BI_1A, 0x34000000)
+ // cbz Rt, simm19 BI_1A X0110100iiiiiiii iiiiiiiiiiittttt 3400 0000 Rt simm19:00
+
+INST1(cbnz, "cbnz", 0, 0, IF_BI_1A, 0x35000000)
+ // cbnz Rt, simm19 BI_1A X0110101iiiiiiii iiiiiiiiiiittttt 3500 0000 Rt simm19:00
+
+INST1(tbz, "tbz", 0, 0, IF_BI_1B, 0x36000000)
+ // tbz Rt, imm6, simm14 BI_1B B0110110bbbbbiii iiiiiiiiiiittttt 3600 0000 Rt imm6, simm14:00
+
+INST1(tbnz, "tbnz", 0, 0, IF_BI_1B, 0x37000000)
+ // tbnz Rt, imm6, simm14 BI_1B B0110111bbbbbiii iiiiiiiiiiittttt 3700 0000 Rt imm6, simm14:00
+
+INST1(movk, "movk", 0, 0, IF_DI_1B, 0x72800000)
+ // movk Rd,imm(i16,hw) DI_1B X11100101hwiiiii iiiiiiiiiiiddddd 7280 0000 imm(i16,hw)
+
+INST1(movn, "movn", 0, 0, IF_DI_1B, 0x12800000)
+ // movn Rd,imm(i16,hw) DI_1B X00100101hwiiiii iiiiiiiiiiiddddd 1280 0000 imm(i16,hw)
+
+INST1(movz, "movz", 0, 0, IF_DI_1B, 0x52800000)
+ // movz Rd,imm(i16,hw) DI_1B X10100101hwiiiii iiiiiiiiiiiddddd 5280 0000 imm(i16,hw)
+
+INST1(csel, "csel", 0, 0, IF_DR_3D, 0x1A800000)
+ // csel Rd,Rn,Rm,cond DR_3D X0011010100mmmmm cccc00nnnnnddddd 1A80 0000 cond
+
+INST1(csinc, "csinc", 0, 0, IF_DR_3D, 0x1A800400)
+ // csinc Rd,Rn,Rm,cond DR_3D X0011010100mmmmm cccc01nnnnnddddd 1A80 0400 cond
+
+INST1(csinv, "csinv", 0, 0, IF_DR_3D, 0x5A800000)
+ // csinv Rd,Rn,Rm,cond DR_3D X1011010100mmmmm cccc00nnnnnddddd 5A80 0000 cond
+
+INST1(csneg, "csneg", 0, 0, IF_DR_3D, 0x5A800400)
+ // csneg Rd,Rn,Rm,cond DR_3D X1011010100mmmmm cccc01nnnnnddddd 5A80 0400 cond
+
+INST1(cinc, "cinc", 0, 0, IF_DR_2D, 0x1A800400)
+ // cinc Rd,Rn,cond DR_2D X0011010100nnnnn cccc01nnnnnddddd 1A80 0400 cond
+
+INST1(cinv, "cinv", 0, 0, IF_DR_2D, 0x5A800000)
+ // cinv Rd,Rn,cond DR_2D X1011010100nnnnn cccc00nnnnnddddd 5A80 0000 cond
+
+INST1(cneg, "cneg", 0, 0, IF_DR_2D, 0x5A800400)
+ // cneg Rd,Rn,cond DR_2D X1011010100nnnnn cccc01nnnnnddddd 5A80 0400 cond
+
+INST1(cset, "cset", 0, 0, IF_DR_1D, 0x1A9F07E0)
+ // cset Rd,cond DR_1D X001101010011111 cccc0111111ddddd 1A9F 07E0 Rd cond
+
+INST1(csetm, "csetm", 0, 0, IF_DR_1D, 0x5A9F03E0)
+ // csetm Rd,cond DR_1D X101101010011111 cccc0011111ddddd 5A9F 03E0 Rd cond
+
+INST1(rev, "rev", 0, 0, IF_DR_2G, 0x5AC00800)
+ // rev Rd,Rm DR_2G X101101011000000 00001Xnnnnnddddd 5AC0 0800 Rd Rn
+
+INST1(rev64, "rev64", 0, 0, IF_DV_2M, 0x0E200800)
+ // rev64 Vd,Vn DV_2M 0Q001110XX100000 000010nnnnnddddd 0E20 0800 Vd,Vn (vector)
+
+INST1(adc, "adc", 0, 0, IF_DR_3A, 0x1A000000)
+ // adc Rd,Rn,Rm DR_3A X0011010000mmmmm 000000nnnnnddddd 1A00 0000
+
+INST1(adcs, "adcs", 0, 0, IF_DR_3A, 0x3A000000)
+ // adcs Rd,Rn,Rm DR_3A X0111010000mmmmm 000000nnnnnddddd 3A00 0000
+
+INST1(sbc, "sbc", 0, 0, IF_DR_3A, 0x5A000000)
+ // sdc Rd,Rn,Rm DR_3A X1011010000mmmmm 000000nnnnnddddd 5A00 0000
+
+INST1(sbcs, "sbcs", 0, 0, IF_DR_3A, 0x7A000000)
+ // sdcs Rd,Rn,Rm DR_3A X1111010000mmmmm 000000nnnnnddddd 7A00 0000
+
+INST1(udiv, "udiv", 0, 0, IF_DR_3A, 0x1AC00800)
+ // udiv Rd,Rn,Rm DR_3A X0011010110mmmmm 000010nnnnnddddd 1AC0 0800
+
+INST1(sdiv, "sdiv", 0, 0, IF_DR_3A, 0x1AC00C00)
+ // sdiv Rd,Rn,Rm DR_3A X0011010110mmmmm 000011nnnnnddddd 1AC0 0C00
+
+INST1(mneg, "mneg", 0, 0, IF_DR_3A, 0x1B00FC00)
+ // mneg Rd,Rn,Rm DR_3A X0011011000mmmmm 111111nnnnnddddd 1B00 FC00
+
+INST1(madd, "madd", 0, 0, IF_DR_4A, 0x1B000000)
+ // madd Rd,Rn,Rm,Ra DR_4A X0011011000mmmmm 0aaaaannnnnddddd 1B00 0000
+
+INST1(msub, "msub", 0, 0, IF_DR_4A, 0x1B008000)
+ // msub Rd,Rn,Rm,Ra DR_4A X0011011000mmmmm 1aaaaannnnnddddd 1B00 8000
+
+INST1(smull, "smull", 0, 0, IF_DR_3A, 0x9B207C00)
+ // smull Rd,Rn,Rm DR_3A 10011011001mmmmm 011111nnnnnddddd 9B20 7C00
+
+INST1(smaddl, "smaddl", 0, 0, IF_DR_4A, 0x9B200000)
+ // smaddl Rd,Rn,Rm,Ra DR_4A 10011011001mmmmm 0aaaaannnnnddddd 9B20 0000
+
+INST1(smnegl, "smnegl", 0, 0, IF_DR_3A, 0x9B20FC00)
+ // smnegl Rd,Rn,Rm DR_3A 10011011001mmmmm 111111nnnnnddddd 9B20 FC00
+
+INST1(smsubl, "smsubl", 0, 0, IF_DR_4A, 0x9B208000)
+ // smsubl Rd,Rn,Rm,Ra DR_4A 10011011001mmmmm 1aaaaannnnnddddd 9B20 8000
+
+INST1(smulh, "smulh", 0, 0, IF_DR_3A, 0x9B407C00)
+ // smulh Rd,Rn,Rm DR_3A 10011011010mmmmm 011111nnnnnddddd 9B40 7C00
+
+INST1(umull, "umull", 0, 0, IF_DR_3A, 0x9BA07C00)
+ // umull Rd,Rn,Rm DR_3A 10011011101mmmmm 011111nnnnnddddd 9BA0 7C00
+
+INST1(umaddl, "umaddl", 0, 0, IF_DR_4A, 0x9BA00000)
+ // umaddl Rd,Rn,Rm,Ra DR_4A 10011011101mmmmm 0aaaaannnnnddddd 9BA0 0000
+
+INST1(umnegl, "umnegl", 0, 0, IF_DR_3A, 0x9BA0FC00)
+ // umnegl Rd,Rn,Rm DR_3A 10011011101mmmmm 111111nnnnnddddd 9BA0 FC00
+
+INST1(umsubl, "umsubl", 0, 0, IF_DR_4A, 0x9BA08000)
+ // umsubl Rd,Rn,Rm,Ra DR_4A 10011011101mmmmm 1aaaaannnnnddddd 9BA0 8000
+
+INST1(umulh, "umulh", 0, 0, IF_DR_3A, 0x9BC07C00)
+ // umulh Rd,Rn,Rm DR_3A 10011011110mmmmm 011111nnnnnddddd 9BC0 7C00
+
+INST1(extr, "extr", 0, 0, IF_DR_3E, 0x13800000)
+ // extr Rd,Rn,Rm,imm6 DR_3E X00100111X0mmmmm ssssssnnnnnddddd 1380 0000 imm(0-63)
+
+INST1(lslv, "lslv", 0, 0, IF_DR_3A, 0x1AC02000)
+ // lslv Rd,Rn,Rm DR_3A X0011010110mmmmm 001000nnnnnddddd 1AC0 2000
+
+INST1(lsrv, "lsrv", 0, 0, IF_DR_3A, 0x1AC02400)
+ // lsrv Rd,Rn,Rm DR_3A X0011010110mmmmm 001001nnnnnddddd 1AC0 2400
+
+INST1(asrv, "asrv", 0, 0, IF_DR_3A, 0x1AC02800)
+ // asrv Rd,Rn,Rm DR_3A X0011010110mmmmm 001010nnnnnddddd 1AC0 2800
+
+INST1(rorv, "rorv", 0, 0, IF_DR_3A, 0x1AC02C00)
+ // rorv Rd,Rn,Rm DR_3A X0011010110mmmmm 001011nnnnnddddd 1AC0 2C00
+
+INST1(sbfm, "sbfm", 0, 0, IF_DI_2D, 0x13000000)
+ // sbfm Rd,Rn,imr,ims DI_2D X00100110Nrrrrrr ssssssnnnnnddddd 1300 0000 imr, ims
+
+INST1(bfm, "bfm", 0, 0, IF_DI_2D, 0x33000000)
+ // bfm Rd,Rn,imr,ims DI_2D X01100110Nrrrrrr ssssssnnnnnddddd 3300 0000 imr, ims
+
+INST1(ubfm, "ubfm", 0, 0, IF_DI_2D, 0x53000000)
+ // ubfm Rd,Rn,imr,ims DI_2D X10100110Nrrrrrr ssssssnnnnnddddd 5300 0000 imr, ims
+
+INST1(sbfiz, "sbfiz", 0, 0, IF_DI_2D, 0x13000000)
+ // sbfiz Rd,Rn,lsb,width DI_2D X00100110Nrrrrrr ssssssnnnnnddddd 1300 0000 imr, ims
+
+INST1(bfi, "bfi", 0, 0, IF_DI_2D, 0x33000000)
+ // bfi Rd,Rn,lsb,width DI_2D X01100110Nrrrrrr ssssssnnnnnddddd 3300 0000 imr, ims
+
+INST1(ubfiz, "ubfiz", 0, 0, IF_DI_2D, 0x53000000)
+ // ubfiz Rd,Rn,lsb,width DI_2D X10100110Nrrrrrr ssssssnnnnnddddd 5300 0000 imr, ims
+
+INST1(sbfx, "sbfx", 0, 0, IF_DI_2D, 0x13000000)
+ // sbfx Rd,Rn,lsb,width DI_2D X00100110Nrrrrrr ssssssnnnnnddddd 1300 0000 imr, ims
+
+INST1(bfxil, "bfxil", 0, 0, IF_DI_2D, 0x33000000)
+ // bfxil Rd,Rn,lsb,width DI_2D X01100110Nrrrrrr ssssssnnnnnddddd 3300 0000 imr, ims
+
+INST1(ubfx, "ubfx", 0, 0, IF_DI_2D, 0x53000000)
+ // ubfx Rd,Rn,lsb,width DI_2D X10100110Nrrrrrr ssssssnnnnnddddd 5300 0000 imr, ims
+
+INST1(sxtb, "sxtb", 0, 0, IF_DR_2H, 0x13001C00)
+ // sxtb Rd,Rn DR_2H X00100110X000000 000111nnnnnddddd 1300 1C00
+
+INST1(sxth, "sxth", 0, 0, IF_DR_2H, 0x13003C00)
+ // sxth Rd,Rn DR_2H X00100110X000000 001111nnnnnddddd 1300 3C00
+
+INST1(sxtw, "sxtw", 0, 0, IF_DR_2H, 0x13007C00)
+ // sxtw Rd,Rn DR_2H X00100110X000000 011111nnnnnddddd 1300 7C00
+
+INST1(uxtb, "uxtb", 0, 0, IF_DR_2H, 0x53001C00)
+ // uxtb Rd,Rn DR_2H 0101001100000000 000111nnnnnddddd 5300 1C00
+
+INST1(uxth, "uxth", 0, 0, IF_DR_2H, 0x53003C00)
+ // uxth Rd,Rn DR_2H 0101001100000000 001111nnnnnddddd 5300 3C00
+
+INST1(nop, "nop", 0, 0, IF_SN_0A, 0xD503201F)
+ // nop SN_0A 1101010100000011 0010000000011111 D503 201F
+
+INST1(bkpt, "bkpt", 0, 0, IF_SN_0A, 0xD43E0000)
+ // brpt SN_0A 1101010000111110 0000000000000000 D43E 0000 0xF000
+
+INST1(brk, "brk", 0, 0, IF_SI_0A, 0xD4200000)
+ // brk imm16 SI_0A 11010100001iiiii iiiiiiiiiii00000 D420 0000 imm16
+
+INST1(dsb, "dsb", 0, 0, IF_SI_0B, 0xD503309F)
+ // dsb barrierKind SI_0B 1101010100000011 0011bbbb10011111 D503 309F imm4 - barrier kind
+
+INST1(dmb, "dmb", 0, 0, IF_SI_0B, 0xD50330BF)
+ // dmb barrierKind SI_0B 1101010100000011 0011bbbb10111111 D503 30BF imm4 - barrier kind
+
+INST1(isb, "isb", 0, 0, IF_SI_0B, 0xD50330DF)
+ // isb barrierKind SI_0B 1101010100000011 0011bbbb11011111 D503 30DF imm4 - barrier kind
+
+INST1(umov, "umov", 0, 0, IF_DV_2B, 0x0E003C00)
+ // umov Rd,Vn[] DV_2B 0Q001110000iiiii 001111nnnnnddddd 0E00 3C00 Rd,Vn[]
+
+INST1(smov, "smov", 0, 0, IF_DV_2B, 0x0E002C00)
+ // smov Rd,Vn[] DV_2B 0Q001110000iiiii 001011nnnnnddddd 0E00 3C00 Rd,Vn[]
+
+INST1(movi, "movi", 0, 0, IF_DV_1B, 0x0F000400)
+ // movi Vd,imm8 DV_1B 0QX0111100000iii cmod01iiiiiddddd 0F00 0400 Vd imm8 (immediate vector)
+
+INST1(mvni, "mvni", 0, 0, IF_DV_1B, 0x2F000400)
+ // mvni Vd,imm8 DV_1B 0Q10111100000iii cmod01iiiiiddddd 2F00 0400 Vd imm8 (immediate vector)
+
+INST1(bsl, "bsl", 0, 0, IF_DV_3C, 0x2E601C00)
+ // bsl Vd,Vn,Vm DV_3C 0Q101110011mmmmm 000111nnnnnddddd 2E60 1C00 Vd,Vn,Vm
+
+INST1(bit, "bit", 0, 0, IF_DV_3C, 0x2EA01C00)
+ // bit Vd,Vn,Vm DV_3C 0Q101110101mmmmm 000111nnnnnddddd 2EA0 1C00 Vd,Vn,Vm
+
+INST1(bif, "bif", 0, 0, IF_DV_3C, 0x2EE01C00)
+ // bif Vd,Vn,Vm DV_3C 0Q101110111mmmmm 000111nnnnnddddd 2EE0 1C00 Vd,Vn,Vm
+
+INST1(cnt, "cnt", 0, 0, IF_DV_2M, 0x0E205800)
+ // cnt Vd,Vn DV_2M 0Q00111000100000 010110nnnnnddddd 0E20 5800 Vd,Vn (vector)
+
+INST1(not, "not", 0, 0, IF_DV_2M, 0x2E205800)
+ // not Vd,Vn DV_2M 0Q10111000100000 010110nnnnnddddd 2E20 5800 Vd,Vn (vector)
+
+INST1(fnmul, "fnmul", 0, 0, IF_DV_3D, 0x1E208800)
+ // fnmul Vd,Vn,Vm DV_3D 000111100X1mmmmm 100010nnnnnddddd 1E20 8800 Vd,Vn,Vm (scalar)
+
+INST1(fmadd, "fmadd", 0, 0, IF_DV_4A, 0x1F000000)
+ // fmadd Vd,Va,Vn,Vm DV_4A 000111110X0mmmmm 0aaaaannnnnddddd 1F00 0000 Vd Vn Vm Va (scalar)
+
+INST1(fmsub, "fmsub", 0, 0, IF_DV_4A, 0x1F008000)
+ // fmsub Vd,Va,Vn,Vm DV_4A 000111110X0mmmmm 1aaaaannnnnddddd 1F00 8000 Vd Vn Vm Va (scalar)
+
+INST1(fnmadd, "fnmadd", 0, 0, IF_DV_4A, 0x1F200000)
+ // fnmadd Vd,Va,Vn,Vm DV_4A 000111110X1mmmmm 0aaaaannnnnddddd 1F20 0000 Vd Vn Vm Va (scalar)
+
+INST1(fnmsub, "fnmsub", 0, 0, IF_DV_4A, 0x1F208000)
+ // fnmsub Vd,Va,Vn,Vm DV_4A 000111110X1mmmmm 1aaaaannnnnddddd 1F20 8000 Vd Vn Vm Va (scalar)
+
+INST1(fcvt, "fcvt", 0, 0, IF_DV_2J, 0x1E224000)
+ // fcvt Vd,Vn DV_2J 00011110SS10001D D10000nnnnnddddd 1E22 4000 Vd,Vn
+
+INST1(pmul, "pmul", 0, 0, IF_DV_3A, 0x2E209C00)
+ // pmul Vd,Vn,Vm DV_3A 0Q101110XX1mmmmm 100111nnnnnddddd 2E20 9C00 Vd,Vn,Vm (vector)
+
+INST1(saba, "saba", 0, 0, IF_DV_3A, 0x0E207C00)
+ // saba Vd,Vn,Vm DV_3A 0Q001110XX1mmmmm 011111nnnnnddddd 0E20 7C00 Vd,Vn,Vm (vector)
+
+INST1(sabd, "sabd", 0, 0, IF_DV_3A, 0x0E207400)
+ // sabd Vd,Vn,Vm DV_3A 0Q001110XX1mmmmm 011101nnnnnddddd 0E20 7400 Vd,Vn,Vm (vector)
+
+INST1(uaba, "uaba", 0, 0, IF_DV_3A, 0x2E207C00)
+ // uaba Vd,Vn,Vm DV_3A 0Q101110XX1mmmmm 011111nnnnnddddd 2E20 7C00 Vd,Vn,Vm (vector)
+
+INST1(uabd, "uabd", 0, 0, IF_DV_3A, 0x2E207400)
+ // uabd Vd,Vn,Vm DV_3A 0Q101110XX1mmmmm 011101nnnnnddddd 2E20 7400 Vd,Vn,Vm (vector)
+
+INST1(shll, "shll", 0, 0, IF_DV_2M, 0x2F00A400)
+ // shll Vd,Vn,imm DV_2M 0Q101110XX100001 001110nnnnnddddd 2E21 3800 Vd,Vn, {8/16/32}
+
+INST1(shll2, "shll2", 0, 0, IF_DV_2M, 0x6F00A400)
+ // shll Vd,Vn,imm DV_2M 0Q101110XX100001 001110nnnnnddddd 2E21 3800 Vd,Vn, {8/16/32}
+
+INST1(sshll, "sshll", 0, 0, IF_DV_2O, 0x0F00A400)
+ // sshll Vd,Vn,imm DV_2O 000011110iiiiiii 101001nnnnnddddd 0F00 A400 Vd,Vn imm (shift - vector)
+
+INST1(sshll2, "sshll2", 0, 0, IF_DV_2O, 0x4F00A400)
+ // sshll2 Vd,Vn,imm DV_2O 010011110iiiiiii 101001nnnnnddddd 4F00 A400 Vd,Vn imm (shift - vector)
+
+INST1(ushll, "ushll", 0, 0, IF_DV_2O, 0x2F00A400)
+ // ushll Vd,Vn,imm DV_2O 001011110iiiiiii 101001nnnnnddddd 2F00 A400 Vd,Vn imm (shift - vector)
+
+INST1(ushll2, "ushll2", 0, 0, IF_DV_2O, 0x6F00A400)
+ // ushll2 Vd,Vn,imm DV_2O 011011110iiiiiii 101001nnnnnddddd 6F00 A400 Vd,Vn imm (shift - vector)
+
+INST1(shrn, "shrn", 0, 0, IF_DV_2O, 0x0F008400)
+ // shrn Vd,Vn,imm DV_2O 000011110iiiiiii 100001nnnnnddddd 0F00 8400 Vd,Vn imm (shift - vector)
+
+INST1(shrn2, "shrn2", 0, 0, IF_DV_2O, 0x4F008400)
+ // shrn2 Vd,Vn,imm DV_2O 010011110iiiiiii 100001nnnnnddddd 4F00 8400 Vd,Vn imm (shift - vector)
+
+INST1(rshrn, "rshrn", 0, 0, IF_DV_2O, 0x0F008C00)
+ // rshrn Vd,Vn,imm DV_2O 000011110iiiiiii 100011nnnnnddddd 0F00 8C00 Vd,Vn imm (shift - vector)
+
+INST1(rshrn2, "rshrn2", 0, 0, IF_DV_2O, 0x4F008C00)
+ // rshrn2 Vd,Vn,imm DV_2O 010011110iiiiiii 100011nnnnnddddd 4F00 8C00 Vd,Vn imm (shift - vector)
+
+INST1(sxtl, "sxtl", 0, 0, IF_DV_2O, 0x0F00A400)
+ // sxtl Vd,Vn DV_2O 000011110iiiiiii 101001nnnnnddddd 0F00 A400 Vd,Vn (shift - vector)
+
+INST1(sxtl2, "sxtl2", 0, 0, IF_DV_2O, 0x4F00A400)
+ // sxtl2 Vd,Vn DV_2O 010011110iiiiiii 101001nnnnnddddd 4F00 A400 Vd,Vn (shift - vector)
+
+INST1(uxtl, "uxtl", 0, 0, IF_DV_2O, 0x2F00A400)
+ // uxtl Vd,Vn DV_2O 001011110iiiiiii 101001nnnnnddddd 2F00 A400 Vd,Vn (shift - vector)
+
+INST1(uxtl2, "uxtl2", 0, 0, IF_DV_2O, 0x6F00A400)
+ // uxtl2 Vd,Vn DV_2O 011011110iiiiiii 101001nnnnnddddd 6F00 A400 Vd,Vn (shift - vector)
+// clang-format on
+
+/*****************************************************************************/
+#undef INST1
+#undef INST2
+#undef INST3
+#undef INST4
+#undef INST5
+#undef INST6
+#undef INST9
+/*****************************************************************************/
diff --git a/src/jit/instrsxarch.h b/src/jit/instrsxarch.h
new file mode 100644
index 0000000000..436563babf
--- /dev/null
+++ b/src/jit/instrsxarch.h
@@ -0,0 +1,540 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+//
+// This file was previously known as instrs.h
+//
+/*****************************************************************************
+ * x86 instructions for the JIT compiler
+ *
+ * id -- the enum name for the instruction
+ * nm -- textual name (for assembly dipslay)
+ * fp -- 1 = floating point instruction, 0 = not floating point instruction
+ * um -- update mode, see IUM_xx enum (rd, wr, or rw)
+ * rf -- 1 = reads flags, 0 = doesn't read flags
+ * wf -- 1 = writes flags, 0 = doesn't write flags
+ * mr -- base encoding for R/M[reg] addressing mode
+ * mi -- base encoding for R/M,icon addressing mode
+ * rm -- base encoding for reg,R/M addressing mode
+ * a4 -- base encoding for eax,i32 addressing mode
+ * rr -- base encoding for register addressing mode
+ *
+******************************************************************************/
+
+// clang-format off
+#if !defined(_TARGET_XARCH_)
+ #error Unexpected target type
+#endif
+
+#ifndef INST1
+#error At least INST1 must be defined before including this file.
+#endif
+/*****************************************************************************/
+#ifndef INST0
+#define INST0(id, nm, fp, um, rf, wf, mr )
+#endif
+#ifndef INST2
+#define INST2(id, nm, fp, um, rf, wf, mr, mi )
+#endif
+#ifndef INST3
+#define INST3(id, nm, fp, um, rf, wf, mr, mi, rm )
+#endif
+#ifndef INST4
+#define INST4(id, nm, fp, um, rf, wf, mr, mi, rm, a4 )
+#endif
+#ifndef INST5
+#define INST5(id, nm, fp, um, rf, wf, mr, mi, rm, a4, rr)
+#endif
+
+/*****************************************************************************/
+/* The following is x86-specific */
+/*****************************************************************************/
+
+// enum name FP updmode rf wf R/M[reg] R/M,icon reg,R/M eax,i32 register
+INST5(invalid, "INVALID" , 0, IUM_RD, 0, 0, BAD_CODE, BAD_CODE, BAD_CODE, BAD_CODE, BAD_CODE)
+
+INST5(push , "push" , 0, IUM_RD, 0, 0, 0x0030FE, 0x000068, BAD_CODE, BAD_CODE, 0x000050)
+INST5(pop , "pop" , 0, IUM_WR, 0, 0, 0x00008E, BAD_CODE, BAD_CODE, BAD_CODE, 0x000058)
+// Does not affect the stack tracking in the emitter
+INST5(push_hide, "push" , 0, IUM_RD, 0, 0, 0x0030FE, 0x000068, BAD_CODE, BAD_CODE, 0x000050)
+INST5(pop_hide, "pop" , 0, IUM_WR, 0, 0, 0x00008E, BAD_CODE, BAD_CODE, BAD_CODE, 0x000058)
+
+INST5(inc , "inc" , 0, IUM_RW, 0, 1, 0x0000FE, BAD_CODE, BAD_CODE, BAD_CODE, 0x000040)
+INST5(inc_l , "inc" , 0, IUM_RW, 0, 1, 0x0000FE, BAD_CODE, BAD_CODE, BAD_CODE, 0x00C0FE)
+INST5(dec , "dec" , 0, IUM_RW, 0, 1, 0x0008FE, BAD_CODE, BAD_CODE, BAD_CODE, 0x000048)
+INST5(dec_l , "dec" , 0, IUM_RW, 0, 1, 0x0008FE, BAD_CODE, BAD_CODE, BAD_CODE, 0x00C8FE)
+
+// enum name FP updmode rf wf R/M,R/M[reg] R/M,icon reg,R/M eax,i32
+
+INST4(add , "add" , 0, IUM_RW, 0, 1, 0x000000, 0x000080, 0x000002, 0x000004)
+INST4(or , "or" , 0, IUM_RW, 0, 1, 0x000008, 0x000880, 0x00000A, 0x00000C)
+INST4(adc , "adc" , 0, IUM_RW, 1, 1, 0x000010, 0x001080, 0x000012, 0x000014)
+INST4(sbb , "sbb" , 0, IUM_RW, 1, 1, 0x000018, 0x001880, 0x00001A, 0x00001C)
+INST4(and , "and" , 0, IUM_RW, 0, 1, 0x000020, 0x002080, 0x000022, 0x000024)
+INST4(sub , "sub" , 0, IUM_RW, 0, 1, 0x000028, 0x002880, 0x00002A, 0x00002C)
+INST4(xor , "xor" , 0, IUM_RW, 0, 1, 0x000030, 0x003080, 0x000032, 0x000034)
+INST4(cmp , "cmp" , 0, IUM_RD, 0, 1, 0x000038, 0x003880, 0x00003A, 0x00003C)
+INST4(test , "test" , 0, IUM_RD, 0, 1, 0x000084, 0x0000F6, 0x000084, 0x0000A8)
+INST4(mov , "mov" , 0, IUM_WR, 0, 0, 0x000088, 0x0000C6, 0x00008A, 0x0000B0)
+
+INST4(lea , "lea" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, 0x00008D, BAD_CODE)
+
+// enum name FP updmode rf wf R/M,R/M[reg] R/M,icon reg,R/M
+
+INST3(movsx , "movsx" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, 0x0F00BE)
+#ifdef _TARGET_AMD64_
+INST3(movsxd , "movsxd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, 0x4800000063LL )
+#endif
+INST3(movzx , "movzx" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, 0x0F00B6)
+
+INST3(cmovo , "cmovo" , 0, IUM_WR, 1, 0, BAD_CODE, BAD_CODE, 0x0F0040)
+INST3(cmovno , "cmovno" , 0, IUM_WR, 1, 0, BAD_CODE, BAD_CODE, 0x0F0041)
+INST3(cmovb , "cmovb" , 0, IUM_WR, 1, 0, BAD_CODE, BAD_CODE, 0x0F0042)
+INST3(cmovae , "cmovae" , 0, IUM_WR, 1, 0, BAD_CODE, BAD_CODE, 0x0F0043)
+INST3(cmove , "cmove" , 0, IUM_WR, 1, 0, BAD_CODE, BAD_CODE, 0x0F0044)
+INST3(cmovne , "cmovne" , 0, IUM_WR, 1, 0, BAD_CODE, BAD_CODE, 0x0F0045)
+INST3(cmovbe , "cmovbe" , 0, IUM_WR, 1, 0, BAD_CODE, BAD_CODE, 0x0F0046)
+INST3(cmova , "cmova" , 0, IUM_WR, 1, 0, BAD_CODE, BAD_CODE, 0x0F0047)
+INST3(cmovs , "cmovs" , 0, IUM_WR, 1, 0, BAD_CODE, BAD_CODE, 0x0F0048)
+INST3(cmovns , "cmovns" , 0, IUM_WR, 1, 0, BAD_CODE, BAD_CODE, 0x0F0049)
+INST3(cmovpe , "cmovpe" , 0, IUM_WR, 1, 0, BAD_CODE, BAD_CODE, 0x0F004A)
+INST3(cmovpo , "cmovpo" , 0, IUM_WR, 1, 0, BAD_CODE, BAD_CODE, 0x0F004B)
+INST3(cmovl , "cmovl" , 0, IUM_WR, 1, 0, BAD_CODE, BAD_CODE, 0x0F004C)
+INST3(cmovge , "cmovge" , 0, IUM_WR, 1, 0, BAD_CODE, BAD_CODE, 0x0F004D)
+INST3(cmovle , "cmovle" , 0, IUM_WR, 1, 0, BAD_CODE, BAD_CODE, 0x0F004E)
+INST3(cmovg , "cmovg" , 0, IUM_WR, 1, 0, BAD_CODE, BAD_CODE, 0x0F004F)
+
+INST3(xchg , "xchg" , 0, IUM_RW, 0, 0, 0x000086, BAD_CODE, 0x000086)
+INST3(imul , "imul" , 0, IUM_RW, 0, 1, 0x0F00AC, BAD_CODE, 0x0F00AF) // op1 *= op2
+
+// enum name FP updmode rf wf R/M,R/M[reg] R/M,icon reg,R/M
+
+// Instead of encoding these as 3-operand instructions, we encode them
+// as 2-operand instructions with the target register being implicit
+// implicit_reg = op1*op2_icon
+#define INSTMUL INST3
+INSTMUL(imul_AX, "imul", 0, IUM_RD, 0, 1, BAD_CODE, 0x000068, BAD_CODE)
+INSTMUL(imul_CX, "imul", 0, IUM_RD, 0, 1, BAD_CODE, 0x000868, BAD_CODE)
+INSTMUL(imul_DX, "imul", 0, IUM_RD, 0, 1, BAD_CODE, 0x001068, BAD_CODE)
+INSTMUL(imul_BX, "imul", 0, IUM_RD, 0, 1, BAD_CODE, 0x001868, BAD_CODE)
+INSTMUL(imul_SP, "imul", 0, IUM_RD, 0, 1, BAD_CODE, BAD_CODE, BAD_CODE)
+INSTMUL(imul_BP, "imul", 0, IUM_RD, 0, 1, BAD_CODE, 0x002868, BAD_CODE)
+INSTMUL(imul_SI, "imul", 0, IUM_RD, 0, 1, BAD_CODE, 0x003068, BAD_CODE)
+INSTMUL(imul_DI, "imul", 0, IUM_RD, 0, 1, BAD_CODE, 0x003868, BAD_CODE)
+
+#ifdef _TARGET_AMD64_
+
+INSTMUL(imul_08, "imul", 0, IUM_RD, 0, 1, BAD_CODE, 0x4400000068, BAD_CODE)
+INSTMUL(imul_09, "imul", 0, IUM_RD, 0, 1, BAD_CODE, 0x4400000868, BAD_CODE)
+INSTMUL(imul_10, "imul", 0, IUM_RD, 0, 1, BAD_CODE, 0x4400001068, BAD_CODE)
+INSTMUL(imul_11, "imul", 0, IUM_RD, 0, 1, BAD_CODE, 0x4400001868, BAD_CODE)
+INSTMUL(imul_12, "imul", 0, IUM_RD, 0, 1, BAD_CODE, 0x4400002068, BAD_CODE)
+INSTMUL(imul_13, "imul", 0, IUM_RD, 0, 1, BAD_CODE, 0x4400002868, BAD_CODE)
+INSTMUL(imul_14, "imul", 0, IUM_RD, 0, 1, BAD_CODE, 0x4400003068, BAD_CODE)
+INSTMUL(imul_15, "imul", 0, IUM_RD, 0, 1, BAD_CODE, 0x4400003868, BAD_CODE)
+
+#endif // _TARGET_AMD64_
+
+// the hex codes in this file represent the instruction encoding as follows:
+// 0x0000ff00 - modrm byte position
+// 0x000000ff - last byte of opcode (before modrm)
+// 0x00ff0000 - first byte of opcode
+// 0xff000000 - middle byte of opcode, if needed (after first, before last)
+//
+// So a 1-byte opcode is: and with modrm:
+// 0x00000011 0x0000RM11
+//
+// So a 2-byte opcode is: and with modrm:
+// 0x00002211 0x0011RM22
+//
+// So a 3-byte opcode is: and with modrm:
+// 0x00113322 0x2211RM33
+//
+// So a 4-byte opcode would be something like this:
+// 0x22114433
+
+#define PACK3(byte1,byte2,byte3) ((byte1 << 16) | (byte2 << 24) | byte3)
+#define PACK2(byte1,byte2) ((byte1 << 16) | byte2)
+#define SSEFLT(c) PACK3(0xf3, 0x0f, c)
+#define SSEDBL(c) PACK3(0xf2, 0x0f, c)
+#define PCKDBL(c) PACK3(0x66, 0x0f, c)
+#define PCKFLT(c) PACK2(0x0f,c)
+
+// These macros encode extra byte that is implicit in the macro.
+#define PACK4(byte1,byte2,byte3,byte4) ((byte1 << 16) | (byte2 << 24) | byte3 | (byte4 << 8))
+#define SSE38(c) PACK4(0x66, 0x0f, 0x38, c)
+#define SSE3A(c) PACK4(0x66, 0x0f, 0x3A, c)
+
+// VEX* encodes the implied leading opcode bytes in c1:
+// 1: implied 0f, 2: implied 0f 38, 3: implied 0f 3a
+#define VEX2INT(c1,c2) PACK3(c1, 0xc5, c2)
+#define VEX3INT(c1,c2) PACK4(c1, 0xc5, 0x02, c2)
+#define VEX3FLT(c1,c2) PACK4(c1, 0xc5, 0x02, c2)
+
+// Please insert any SSE2 instructions between FIRST_SSE2_INSTRUCTION and LAST_SSE2_INSTRUCTION
+INST3(FIRST_SSE2_INSTRUCTION, "FIRST_SSE2_INSTRUCTION", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, BAD_CODE)
+
+// These are the SSE instructions used on x86
+INST3( mov_i2xmm, "movd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x6E)) // Move int reg to a xmm reg. reg1=xmm reg, reg2=int reg
+INST3( mov_xmm2i, "movd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x7E)) // Move xmm reg to an int reg. reg1=xmm reg, reg2=int reg
+INST3( movq, "movq" , 0, IUM_WR, 0, 0, PCKDBL(0xD6), BAD_CODE, SSEFLT(0x7E))
+INST3( movsdsse2, "movsd" , 0, IUM_WR, 0, 0, SSEDBL(0x11), BAD_CODE, SSEDBL(0x10))
+
+INST3( punpckldq, "punpckldq" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x62))
+
+INST3( xorps, "xorps" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKFLT(0x57)) // XOR packed singles
+
+INST3( cvttsd2si, "cvttsd2si" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEDBL(0x2C)) // cvt with trunc scalar double to signed DWORDs
+
+#ifndef LEGACY_BACKEND
+INST3( movdqu, "movdqu" , 0, IUM_WR, 0, 0, SSEFLT(0x7F), BAD_CODE, SSEFLT(0x6F))
+INST3( movdqa, "movdqa" , 0, IUM_WR, 0, 0, PCKDBL(0x7F), BAD_CODE, PCKDBL(0x6F))
+INST3( movlpd, "movlpd" , 0, IUM_WR, 0, 0, PCKDBL(0x13), BAD_CODE, PCKDBL(0x12))
+INST3( movlps, "movlps" , 0, IUM_WR, 0, 0, PCKFLT(0x13), BAD_CODE, PCKFLT(0x12))
+INST3( movhpd, "movhpd" , 0, IUM_WR, 0, 0, PCKDBL(0x17), BAD_CODE, PCKDBL(0x16))
+INST3( movhps, "movhps" , 0, IUM_WR, 0, 0, PCKFLT(0x17), BAD_CODE, PCKFLT(0x16))
+INST3( movss, "movss" , 0, IUM_WR, 0, 0, SSEFLT(0x11), BAD_CODE, SSEFLT(0x10))
+INST3( movapd, "movapd" , 0, IUM_WR, 0, 0, PCKDBL(0x29), BAD_CODE, PCKDBL(0x28))
+INST3( movaps, "movaps" , 0, IUM_WR, 0, 0, PCKFLT(0x29), BAD_CODE, PCKFLT(0x28))
+INST3( movupd, "movupd" , 0, IUM_WR, 0, 0, PCKDBL(0x11), BAD_CODE, PCKDBL(0x10))
+INST3( movups, "movups" , 0, IUM_WR, 0, 0, PCKFLT(0x11), BAD_CODE, PCKFLT(0x10))
+
+INST3( shufps, "shufps" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKFLT(0xC6))
+INST3( shufpd, "shufpd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0xC6))
+
+// SSE 2 arith
+INST3( addps, "addps", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKFLT(0x58)) // Add packed singles
+INST3( addss, "addss", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEFLT(0x58)) // Add scalar singles
+INST3( addpd, "addpd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x58)) // Add packed doubles
+INST3( addsd, "addsd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEDBL(0x58)) // Add scalar doubles
+INST3( mulps, "mulps", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKFLT(0x59)) // Multiply packed singles
+INST3( mulss, "mulss", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEFLT(0x59)) // Multiply scalar single
+INST3( mulpd, "mulpd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x59)) // Multiply packed doubles
+INST3( mulsd, "mulsd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEDBL(0x59)) // Multiply scalar doubles
+INST3( subps, "subps", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKFLT(0x5C)) // Subtract packed singles
+INST3( subss, "subss", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEFLT(0x5C)) // Subtract scalar singles
+INST3( subpd, "subpd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x5C)) // Subtract packed doubles
+INST3( subsd, "subsd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEDBL(0x5C)) // Subtract scalar doubles
+INST3( minps, "minps", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKFLT(0x5D)) // Return Minimum packed singles
+INST3( minss, "minss", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEFLT(0x5D)) // Return Minimum scalar single
+INST3( minpd, "minpd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x5D)) // Return Minimum packed doubles
+INST3( minsd, "minsd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEDBL(0x5D)) // Return Minimum scalar double
+INST3( divps, "divps", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKFLT(0x5E)) // Divide packed singles
+INST3( divss, "divss", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEFLT(0x5E)) // Divide scalar singles
+INST3( divpd, "divpd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x5E)) // Divide packed doubles
+INST3( divsd, "divsd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEDBL(0x5E)) // Divide scalar doubles
+INST3( maxps, "maxps", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKFLT(0x5F)) // Return Maximum packed singles
+INST3( maxss, "maxss", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEFLT(0x5F)) // Return Maximum scalar single
+INST3( maxpd, "maxpd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x5F)) // Return Maximum packed doubles
+INST3( maxsd, "maxsd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEDBL(0x5F)) // Return Maximum scalar double
+INST3( xorpd, "xorpd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x57)) // XOR packed doubles
+INST3( andps, "andps", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKFLT(0x54)) // AND packed singles
+INST3( andpd, "andpd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x54)) // AND packed doubles
+INST3( sqrtsd, "sqrtsd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEDBL(0x51)) // Sqrt of a scalar double
+INST3( sqrtps, "sqrtps", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKFLT(0x51)) // Sqrt of a packed float
+INST3( sqrtpd, "sqrtpd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x51)) // Sqrt of a packed double
+INST3( andnps, "andnps", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKFLT(0x55)) // And-Not packed singles
+INST3( andnpd, "andnpd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x55)) // And-Not packed doubles
+INST3( orps, "orps", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKFLT(0x56)) // Or packed singles
+INST3( orpd, "orpd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x56)) // Or packed doubles
+INST3( haddpd, "haddpd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x7C)) // Horizontal add packed doubles
+
+// SSE2 conversions
+INST3( cvtpi2ps, "cvtpi2ps", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKFLT(0x2A)) // cvt packed DWORDs to singles
+INST3( cvtsi2ss, "cvtsi2ss", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEFLT(0x2A)) // cvt DWORD to scalar single
+INST3( cvtpi2pd, "cvtpi2pd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x2A)) // cvt packed DWORDs to doubles
+INST3( cvtsi2sd, "cvtsi2sd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEDBL(0x2A)) // cvt DWORD to scalar double
+INST3( cvttps2pi, "cvttps2pi", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKFLT(0x2C)) // cvt with trunc packed singles to DWORDs
+INST3( cvttss2si, "cvttss2si", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEFLT(0x2C)) // cvt with trunc scalar single to DWORD
+INST3( cvttpd2pi, "cvttpd2pi", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x2C)) // cvt with trunc packed doubles to DWORDs
+INST3( cvtps2pi, "cvtps2pi", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKFLT(0x2D)) // cvt packed singles to DWORDs
+INST3( cvtss2si, "cvtss2si", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEFLT(0x2D)) // cvt scalar single to DWORD
+INST3( cvtpd2pi, "cvtpd2pi", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x2D)) // cvt packed doubles to DWORDs
+INST3( cvtsd2si, "cvtsd2si", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEDBL(0x2D)) // cvt scalar double to DWORD
+INST3( cvtps2pd, "cvtps2pd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKFLT(0x5A)) // cvt packed singles to doubles
+INST3( cvtpd2ps, "cvtpd2ps", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x5A)) // cvt packed doubles to singles
+INST3( cvtss2sd, "cvtss2sd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEFLT(0x5A)) // cvt scalar single to scalar doubles
+INST3( cvtsd2ss, "cvtsd2ss", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEDBL(0x5A)) // cvt scalar double to scalar singles
+INST3( cvtdq2ps, "cvtdq2ps", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKFLT(0x5B)) // cvt packed DWORDs to singles
+INST3( cvtps2dq, "cvtps2dq", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x5B)) // cvt packed singles to DWORDs
+INST3( cvttps2dq, "cvttps2dq", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEFLT(0x5B)) // cvt with trunc packed singles to DWORDs
+INST3( cvtpd2dq, "cvtpd2dq", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEDBL(0xE6)) // cvt packed doubles to DWORDs
+INST3( cvttpd2dq, "cvttpd2dq", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0xE6)) // cvt with trunc packed doubles to DWORDs
+INST3( cvtdq2pd, "cvtdq2pd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEFLT(0xE6)) // cvt packed DWORDs to doubles
+
+// SSE2 comparison instructions
+INST3( ucomiss, "ucomiss", 0, IUM_RD, 0, 0, BAD_CODE, BAD_CODE, PCKFLT(0x2E)) // unordered compare singles
+INST3( ucomisd, "ucomisd", 0, IUM_RD, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x2E)) // unordered compare doubles
+
+// SSE2 packed single/double comparison operations.
+// Note that these instructions not only compare but also overwrite the first source.
+INST3( cmpps, "cmpps", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKFLT(0xC2)) // compare packed singles
+INST3( cmppd, "cmppd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0xC2)) // compare packed doubles
+
+//SSE2 packed integer operations
+INST3( paddb, "paddb" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0xFC)) // Add packed byte integers
+INST3( paddw, "paddw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0xFD)) // Add packed word (16-bit) integers
+INST3( paddd, "paddd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0xFE)) // Add packed double-word (32-bit) integers
+INST3( paddq, "paddq" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0xD4)) // Add packed quad-word (64-bit) integers
+INST3( psubb, "psubb" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0xF8)) // Subtract packed word (16-bit) integers
+INST3( psubw, "psubw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0xF9)) // Subtract packed word (16-bit) integers
+INST3( psubd, "psubd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0xFA)) // Subtract packed double-word (32-bit) integers
+INST3( psubq, "psubq" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0xFB)) // subtract packed quad-word (64-bit) integers
+INST3( pmuludq, "pmuludq" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0xF4)) // packed multiply 32-bit unsigned integers and store 64-bit result
+INST3( pmullw, "pmullw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0xD5)) // Packed multiply 16 bit unsigned integers and store lower 16 bits of each result
+INST3( pand, "pand" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0xDB)) // Packed bit-wise AND of two xmm regs
+INST3( pandn, "pandn" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0xDF)) // Packed bit-wise AND NOT of two xmm regs
+INST3( por, "por" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0xEB)) // Packed bit-wise OR of two xmm regs
+INST3( pxor, "pxor" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0xEF)) // Packed bit-wise XOR of two xmm regs
+INST3( psrldq, "psrldq" , 0, IUM_WR, 0, 0, BAD_CODE, PCKDBL(0x73), BAD_CODE ) // Shift right logical of xmm reg by given number of bytes
+INST3( pslldq, "pslldq" , 0, IUM_WR, 0, 0, BAD_CODE, PCKDBL(0x73), BAD_CODE ) // Shift left logical of xmm reg by given number of bytes
+INST3( pmaxub, "pmaxub" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0xDE)) // packed maximum unsigned bytes
+INST3( pminub, "pminub" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0xDA)) // packed minimum unsigned bytes
+INST3( pmaxsw, "pmaxsw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0xEE)) // packed maximum signed words
+INST3( pminsw, "pminsw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0xEA)) // packed minimum signed words
+INST3( pcmpeqd, "pcmpeqd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x76)) // Packed compare 32-bit integers for equality
+INST3( pcmpgtd, "pcmpgtd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x66)) // Packed compare 32-bit signed integers for greater than
+INST3( pcmpeqw, "pcmpeqw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x75)) // Packed compare 16-bit integers for equality
+INST3( pcmpgtw, "pcmpgtw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x65)) // Packed compare 16-bit signed integers for greater than
+INST3( pcmpeqb, "pcmpeqb" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x74)) // Packed compare 8-bit integers for equality
+INST3( pcmpgtb, "pcmpgtb" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x64)) // Packed compare 8-bit signed integers for greater than
+
+INST3( pshufd, "pshufd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x70)) // Packed shuffle of 32-bit integers
+INST3( pextrw, "pextrw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0xC5)) // Extract 16-bit value into a r32 with zero extended to 32-bits
+INST3( pinsrw, "pinsrw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0xC4)) // packed insert word
+
+#endif // !LEGACY_BACKEND
+INST3(LAST_SSE2_INSTRUCTION, "LAST_SSE2_INSTRUCTION", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, BAD_CODE)
+
+#ifndef LEGACY_BACKEND
+INST3(FIRST_SSE4_INSTRUCTION, "FIRST_SSE4_INSTRUCTION", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, BAD_CODE)
+// Most of the following instructions should be included in the method Is4ByteAVXInstruction()
+// enum name FP updmode rf wf MR MI RM
+INST3( dpps, "dpps" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE3A(0x40)) // Packed bit-wise AND NOT of two xmm regs
+INST3( dppd, "dppd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE3A(0x41)) // Packed bit-wise AND NOT of two xmm regs
+INST3( insertps, "insertps" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE3A(0x21)) // Insert packed single precision float value
+INST3( pcmpeqq, "pcmpeqq" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x29)) // Packed compare 64-bit integers for equality
+INST3( pcmpgtq, "pcmpgtq" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x37)) // Packed compare 64-bit integers for equality
+INST3( pmulld, "pmulld" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x40)) // Packed multiply 32 bit unsigned integers and store lower 32 bits of each result
+INST3(LAST_SSE4_INSTRUCTION, "LAST_SSE4_INSTRUCTION", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, BAD_CODE)
+
+INST3(FIRST_AVX_INSTRUCTION, "FIRST_AVX_INSTRUCTION", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, BAD_CODE)
+// AVX only instructions
+INST3( vbroadcastss, "broadcastss" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x18)) // Broadcast float value read from memory to entire ymm register
+INST3( vbroadcastsd, "broadcastsd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x19)) // Broadcast float value read from memory to entire ymm register
+INST3( vpbroadcastb, "pbroadcastb" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x78)) // Broadcast int8 value from reg/memory to entire ymm register
+INST3( vpbroadcastw, "pbroadcastw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x79)) // Broadcast int16 value from reg/memory to entire ymm register
+INST3( vpbroadcastd, "pbroadcastd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x58)) // Broadcast int32 value from reg/memory to entire ymm register
+INST3( vpbroadcastq, "pbroadcastq" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x59)) // Broadcast int64 value from reg/memory to entire ymm register
+INST3( vextractf128, "extractf128" , 0, IUM_WR, 0, 0, SSE3A(0x19), BAD_CODE, BAD_CODE) // Extract 128-bit packed floating point values
+INST3( vinsertf128, "insertf128" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE3A(0x18)) // Insert 128-bit packed floating point values
+INST3( vzeroupper, "zeroupper" , 0, IUM_WR, 0, 0, 0xC577F8, BAD_CODE, BAD_CODE) // Zero upper 128-bits of all YMM regs (includes 2-byte fixed VEX prefix)
+
+INST3(LAST_AVX_INSTRUCTION, "LAST_AVX_INSTRUCTION", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, BAD_CODE)
+#endif // !LEGACY_BACKEND
+// enum name FP updmode rf wf R/M,R/M[reg] R/M,icon
+
+INST2(ret , "ret" , 0, IUM_RD, 0, 0, 0x0000C3, 0x0000C2)
+INST2(loop , "loop" , 0, IUM_RD, 0, 0, BAD_CODE, 0x0000E2)
+INST2(call , "call" , 0, IUM_RD, 0, 1, 0x0010FF, 0x0000E8)
+
+INST2(rol , "rol" , 0, IUM_RW, 0, 1, 0x0000D2, BAD_CODE)
+INST2(rol_1 , "rol" , 0, IUM_RW, 0, 1, 0x0000D0, 0x0000D0)
+INST2(rol_N , "rol" , 0, IUM_RW, 0, 1, 0x0000C0, 0x0000C0)
+INST2(ror , "ror" , 0, IUM_RW, 0, 1, 0x0008D2, BAD_CODE)
+INST2(ror_1 , "ror" , 0, IUM_RW, 0, 1, 0x0008D0, 0x0008D0)
+INST2(ror_N , "ror" , 0, IUM_RW, 0, 1, 0x0008C0, 0x0008C0)
+
+INST2(rcl , "rcl" , 0, IUM_RW, 1, 1, 0x0010D2, BAD_CODE)
+INST2(rcl_1 , "rcl" , 0, IUM_RW, 1, 1, 0x0010D0, 0x0010D0)
+INST2(rcl_N , "rcl" , 0, IUM_RW, 1, 1, 0x0010C0, 0x0010C0)
+INST2(rcr , "rcr" , 0, IUM_RW, 1, 1, 0x0018D2, BAD_CODE)
+INST2(rcr_1 , "rcr" , 0, IUM_RW, 1, 1, 0x0018D0, 0x0018D0)
+INST2(rcr_N , "rcr" , 0, IUM_RW, 1, 1, 0x0018C0, 0x0018C0)
+INST2(shl , "shl" , 0, IUM_RW, 0, 1, 0x0020D2, BAD_CODE)
+INST2(shl_1 , "shl" , 0, IUM_RW, 0, 1, 0x0020D0, 0x0020D0)
+INST2(shl_N , "shl" , 0, IUM_RW, 0, 1, 0x0020C0, 0x0020C0)
+INST2(shr , "shr" , 0, IUM_RW, 0, 1, 0x0028D2, BAD_CODE)
+INST2(shr_1 , "shr" , 0, IUM_RW, 0, 1, 0x0028D0, 0x0028D0)
+INST2(shr_N , "shr" , 0, IUM_RW, 0, 1, 0x0028C0, 0x0028C0)
+INST2(sar , "sar" , 0, IUM_RW, 0, 1, 0x0038D2, BAD_CODE)
+INST2(sar_1 , "sar" , 0, IUM_RW, 0, 1, 0x0038D0, 0x0038D0)
+INST2(sar_N , "sar" , 0, IUM_RW, 0, 1, 0x0038C0, 0x0038C0)
+
+
+// enum name FP updmode rf wf R/M,R/M[reg]
+
+INST1(r_movsb, "rep movsb" , 0, IUM_RD, 0, 0, 0x00A4F3)
+INST1(r_movsd, "rep movsd" , 0, IUM_RD, 0, 0, 0x00A5F3)
+#ifndef LEGACY_BACKEND
+INST1(r_movsq, "rep movsq" , 0, IUM_RD, 0, 0, 0xF3A548)
+#endif // !LEGACY_BACKEND
+INST1(movsb , "movsb" , 0, IUM_RD, 0, 0, 0x0000A4)
+INST1(movsd , "movsd" , 0, IUM_RD, 0, 0, 0x0000A5)
+#ifndef LEGACY_BACKEND
+INST1(movsq, "movsq" , 0, IUM_RD, 0, 0, 0x00A548)
+#endif // !LEGACY_BACKEND
+
+INST1(r_stosb, "rep stosb" , 0, IUM_RD, 0, 0, 0x00AAF3)
+INST1(r_stosd, "rep stosd" , 0, IUM_RD, 0, 0, 0x00ABF3)
+#ifndef LEGACY_BACKEND
+INST1(r_stosq, "rep stosq" , 0, IUM_RD, 0, 0, 0xF3AB48)
+#endif // !LEGACY_BACKEND
+INST1(stosb, "stosb" , 0, IUM_RD, 0, 0, 0x0000AA)
+INST1(stosd, "stosd" , 0, IUM_RD, 0, 0, 0x0000AB)
+#ifndef LEGACY_BACKEND
+INST1(stosq, "stosq" , 0, IUM_RD, 0, 0, 0x00AB48)
+#endif // !LEGACY_BACKEND
+
+INST1(int3 , "int3" , 0, IUM_RD, 0, 0, 0x0000CC)
+INST1(nop , "nop" , 0, IUM_RD, 0, 0, 0x000090)
+INST1(lock , "lock" , 0, IUM_RD, 0, 0, 0x0000F0)
+INST1(leave , "leave" , 0, IUM_RD, 0, 0, 0x0000C9)
+
+
+INST1(neg , "neg" , 0, IUM_RW, 0, 1, 0x0018F6)
+INST1(not , "not" , 0, IUM_RW, 0, 1, 0x0010F6)
+
+INST1(cdq , "cdq" , 0, IUM_RD, 0, 1, 0x000099)
+INST1(idiv , "idiv" , 0, IUM_RD, 0, 1, 0x0038F6)
+INST1(imulEAX, "imul" , 0, IUM_RD, 0, 1, 0x0028F6) // edx:eax = eax*op1
+INST1(div , "div" , 0, IUM_RD, 0, 1, 0x0030F6)
+INST1(mulEAX , "mul" , 0, IUM_RD, 0, 1, 0x0020F6)
+
+INST1(sahf , "sahf" , 0, IUM_RD, 0, 1, 0x00009E)
+
+INST1(xadd , "xadd" , 0, IUM_RW, 0, 1, 0x0F00C0)
+INST1(cmpxchg, "cmpxchg" , 0, IUM_RW, 0, 1, 0x0F00B0)
+
+INST1(shld , "shld" , 0, IUM_RW, 0, 1, 0x0F00A4)
+INST1(shrd , "shrd" , 0, IUM_RW, 0, 1, 0x0F00AC)
+
+// For RyuJIT/x86, we follow the x86 calling convention that requires
+// us to return floating point value on the x87 FP stack, so we need
+// these instructions regardless of whether we're using full stack fp.
+#ifdef _TARGET_X86_
+INST1(fld , "fld" , 1, IUM_WR, 0, 0, 0x0000D9)
+INST1(fstp , "fstp" , 1, IUM_WR, 0, 0, 0x0018D9)
+#endif // _TARGET_X86
+
+#if FEATURE_STACK_FP_X87
+INST1(fnstsw , "fnstsw" , 1, IUM_WR, 1, 0, 0x0020DF)
+INST1(fcom , "fcom" , 1, IUM_RD, 0, 1, 0x0010D8)
+INST1(fcomp , "fcomp" , 1, IUM_RD, 0, 1, 0x0018D8)
+INST1(fcompp , "fcompp" , 1, IUM_RD, 0, 1, 0x00D9DE)
+INST1(fcomi , "fcomi" , 1, IUM_RD, 0, 1, 0x00F0DB)
+INST1(fcomip , "fcomip" , 1, IUM_RD, 0, 1, 0x00F0DF)
+
+INST1(fchs , "fchs" , 1, IUM_RW, 0, 1, 0x00E0D9)
+INST1(fabs , "fabs" , 1, IUM_RW, 0, 1, 0x00E1D9)
+INST1(fsin , "fsin" , 1, IUM_RW, 0, 1, 0x00FED9)
+INST1(fcos , "fcos" , 1, IUM_RW, 0, 1, 0x00FFD9)
+INST1(fsqrt , "fsqrt" , 1, IUM_RW, 0, 1, 0x00FAD9)
+INST1(fldl2e , "fldl2e" , 1, IUM_RW, 0, 1, 0x00EAD9)
+INST1(frndint, "frndint" , 1, IUM_RW, 0, 1, 0x00FCD9)
+INST1(f2xm1 , "f2xm1" , 1, IUM_RW, 0, 1, 0x00F0D9)
+INST1(fscale , "fscale" , 1, IUM_RW, 0, 1, 0x00FDD9)
+
+INST1(fld1 , "fld1" , 1, IUM_WR, 0, 0, 0x00E8D9)
+INST1(fldz , "fldz" , 1, IUM_WR, 0, 0, 0x00EED9)
+INST1(fst , "fst" , 1, IUM_WR, 0, 0, 0x0010D9)
+
+INST1(fadd , "fadd" , 1, IUM_RW, 0, 0, 0x0000D8)
+INST1(faddp , "faddp" , 1, IUM_RW, 0, 0, 0x0000DA)
+INST1(fsub , "fsub" , 1, IUM_RW, 0, 0, 0x0020D8)
+INST1(fsubp , "fsubp" , 1, IUM_RW, 0, 0, 0x0028DA)
+INST1(fsubr , "fsubr" , 1, IUM_RW, 0, 0, 0x0028D8)
+INST1(fsubrp , "fsubrp" , 1, IUM_RW, 0, 0, 0x0020DA)
+INST1(fmul , "fmul" , 1, IUM_RW, 0, 0, 0x0008D8)
+INST1(fmulp , "fmulp" , 1, IUM_RW, 0, 0, 0x0008DA)
+INST1(fdiv , "fdiv" , 1, IUM_RW, 0, 0, 0x0030D8)
+INST1(fdivp , "fdivp" , 1, IUM_RW, 0, 0, 0x0038DA)
+INST1(fdivr , "fdivr" , 1, IUM_RW, 0, 0, 0x0038D8)
+INST1(fdivrp , "fdivrp" , 1, IUM_RW, 0, 0, 0x0030DA)
+
+INST1(fxch , "fxch" , 1, IUM_RW, 0, 0, 0x00C8D9)
+INST1(fprem , "fprem" , 0, IUM_RW, 0, 1, 0x00F8D9)
+
+INST1(fild , "fild" , 1, IUM_RD, 0, 0, 0x0000DB)
+INST1(fildl , "fild" , 1, IUM_RD, 0, 0, 0x0028DB)
+INST1(fistp , "fistp" , 1, IUM_WR, 0, 0, 0x0018DB)
+INST1(fistpl , "fistp" , 1, IUM_WR, 0, 0, 0x0038DB)
+
+INST1(fldcw , "fldcw" , 1, IUM_RD, 0, 0, 0x0028D9)
+INST1(fnstcw , "fnstcw" , 1, IUM_WR, 0, 0, 0x0038D9)
+#endif // FEATURE_STACK_FP_X87
+
+INST1(seto , "seto" , 0, IUM_WR, 1, 0, 0x0F0090)
+INST1(setno , "setno" , 0, IUM_WR, 1, 0, 0x0F0091)
+INST1(setb , "setb" , 0, IUM_WR, 1, 0, 0x0F0092)
+INST1(setae , "setae" , 0, IUM_WR, 1, 0, 0x0F0093)
+INST1(sete , "sete" , 0, IUM_WR, 1, 0, 0x0F0094)
+INST1(setne , "setne" , 0, IUM_WR, 1, 0, 0x0F0095)
+INST1(setbe , "setbe" , 0, IUM_WR, 1, 0, 0x0F0096)
+INST1(seta , "seta" , 0, IUM_WR, 1, 0, 0x0F0097)
+INST1(sets , "sets" , 0, IUM_WR, 1, 0, 0x0F0098)
+INST1(setns , "setns" , 0, IUM_WR, 1, 0, 0x0F0099)
+INST1(setpe , "setpe" , 0, IUM_WR, 1, 0, 0x0F009A)
+INST1(setpo , "setpo" , 0, IUM_WR, 1, 0, 0x0F009B)
+INST1(setl , "setl" , 0, IUM_WR, 1, 0, 0x0F009C)
+INST1(setge , "setge" , 0, IUM_WR, 1, 0, 0x0F009D)
+INST1(setle , "setle" , 0, IUM_WR, 1, 0, 0x0F009E)
+INST1(setg , "setg" , 0, IUM_WR, 1, 0, 0x0F009F)
+
+#ifdef _TARGET_AMD64_
+// A jump with rex prefix. This is used for register indirect
+// tail calls.
+INST1(rex_jmp, "rex.jmp" , 0, IUM_RD, 0, 0, 0x0020FE)
+#endif
+
+INST1(i_jmp , "jmp" , 0, IUM_RD, 0, 0, 0x0020FE)
+
+INST0(jmp , "jmp" , 0, IUM_RD, 0, 0, 0x0000EB)
+INST0(jo , "jo" , 0, IUM_RD, 1, 0, 0x000070)
+INST0(jno , "jno" , 0, IUM_RD, 1, 0, 0x000071)
+INST0(jb , "jb" , 0, IUM_RD, 1, 0, 0x000072)
+INST0(jae , "jae" , 0, IUM_RD, 1, 0, 0x000073)
+INST0(je , "je" , 0, IUM_RD, 1, 0, 0x000074)
+INST0(jne , "jne" , 0, IUM_RD, 1, 0, 0x000075)
+INST0(jbe , "jbe" , 0, IUM_RD, 1, 0, 0x000076)
+INST0(ja , "ja" , 0, IUM_RD, 1, 0, 0x000077)
+INST0(js , "js" , 0, IUM_RD, 1, 0, 0x000078)
+INST0(jns , "jns" , 0, IUM_RD, 1, 0, 0x000079)
+INST0(jpe , "jpe" , 0, IUM_RD, 1, 0, 0x00007A)
+INST0(jpo , "jpo" , 0, IUM_RD, 1, 0, 0x00007B)
+INST0(jl , "jl" , 0, IUM_RD, 1, 0, 0x00007C)
+INST0(jge , "jge" , 0, IUM_RD, 1, 0, 0x00007D)
+INST0(jle , "jle" , 0, IUM_RD, 1, 0, 0x00007E)
+INST0(jg , "jg" , 0, IUM_RD, 1, 0, 0x00007F)
+
+INST0(l_jmp , "jmp" , 0, IUM_RD, 0, 0, 0x0000E9)
+INST0(l_jo , "jo" , 0, IUM_RD, 1, 0, 0x00800F)
+INST0(l_jno , "jno" , 0, IUM_RD, 1, 0, 0x00810F)
+INST0(l_jb , "jb" , 0, IUM_RD, 1, 0, 0x00820F)
+INST0(l_jae , "jae" , 0, IUM_RD, 1, 0, 0x00830F)
+INST0(l_je , "je" , 0, IUM_RD, 1, 0, 0x00840F)
+INST0(l_jne , "jne" , 0, IUM_RD, 1, 0, 0x00850F)
+INST0(l_jbe , "jbe" , 0, IUM_RD, 1, 0, 0x00860F)
+INST0(l_ja , "ja" , 0, IUM_RD, 1, 0, 0x00870F)
+INST0(l_js , "js" , 0, IUM_RD, 1, 0, 0x00880F)
+INST0(l_jns , "jns" , 0, IUM_RD, 1, 0, 0x00890F)
+INST0(l_jpe , "jpe" , 0, IUM_RD, 1, 0, 0x008A0F)
+INST0(l_jpo , "jpo" , 0, IUM_RD, 1, 0, 0x008B0F)
+INST0(l_jl , "jl" , 0, IUM_RD, 1, 0, 0x008C0F)
+INST0(l_jge , "jge" , 0, IUM_RD, 1, 0, 0x008D0F)
+INST0(l_jle , "jle" , 0, IUM_RD, 1, 0, 0x008E0F)
+INST0(l_jg , "jg" , 0, IUM_RD, 1, 0, 0x008F0F)
+
+INST0(align , "align" , 0, IUM_RD, 0, 0, BAD_CODE)
+
+/*****************************************************************************/
+#undef INST0
+#undef INST1
+#undef INST2
+#undef INST3
+#undef INST4
+#undef INST5
+/*****************************************************************************/
+
+// clang-format on
diff --git a/src/jit/jit.h b/src/jit/jit.h
new file mode 100644
index 0000000000..7bf5cd4051
--- /dev/null
+++ b/src/jit/jit.h
@@ -0,0 +1,891 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*****************************************************************************/
+#ifndef _JIT_H_
+#define _JIT_H_
+/*****************************************************************************/
+
+//
+// clr.sln only defines _DEBUG
+// The jit uses DEBUG rather than _DEBUG
+// So we make sure that _DEBUG implies DEBUG
+//
+#ifdef _DEBUG
+#ifndef DEBUG
+#define DEBUG 1
+#endif
+#endif
+
+// Clang-format messes with the indentation of comments if they directly precede an
+// ifdef. This macro allows us to anchor the comments to the regular flow of code.
+#define CLANG_FORMAT_COMMENT_ANCHOR ;
+
+// Clang-tidy replaces 0 with nullptr in some templated functions, causing a build
+// break. Replacing those instances with ZERO avoids this change
+#define ZERO 0
+
+#ifdef _MSC_VER
+// These don't seem useful, so turning them off is no big deal
+#pragma warning(disable : 4510) // can't generate default constructor
+#pragma warning(disable : 4511) // can't generate copy constructor
+#pragma warning(disable : 4512) // can't generate assignment constructor
+#pragma warning(disable : 4610) // user defined constructor required
+#pragma warning(disable : 4211) // nonstandard extention used (char name[0] in structs)
+#pragma warning(disable : 4127) // conditional expression constant
+#pragma warning(disable : 4201) // "nonstandard extension used : nameless struct/union"
+
+// Depending on the code base, you may want to not disable these
+#pragma warning(disable : 4245) // assigning signed / unsigned
+#pragma warning(disable : 4146) // unary minus applied to unsigned
+
+#pragma warning(disable : 4100) // unreferenced formal parameter
+#pragma warning(disable : 4291) // new operator without delete (only in emitX86.cpp)
+#endif
+
+#ifdef _MSC_VER
+#define CHECK_STRUCT_PADDING 0 // Set this to '1' to enable warning C4820 "'bytes' bytes padding added after
+ // construct 'member_name'" on interesting structs/classes
+#else
+#define CHECK_STRUCT_PADDING 0 // Never enable it for non-MSFT compilers
+#endif
+
+#if defined(_X86_)
+#if defined(_ARM_)
+#error Cannot define both _X86_ and _ARM_
+#endif
+#if defined(_AMD64_)
+#error Cannot define both _X86_ and _AMD64_
+#endif
+#if defined(_ARM64_)
+#error Cannot define both _X86_ and _ARM64_
+#endif
+#define _HOST_X86_
+#elif defined(_AMD64_)
+#if defined(_X86_)
+#error Cannot define both _AMD64_ and _X86_
+#endif
+#if defined(_ARM_)
+#error Cannot define both _AMD64_ and _ARM_
+#endif
+#if defined(_ARM64_)
+#error Cannot define both _AMD64_ and _ARM64_
+#endif
+#define _HOST_AMD64_
+#elif defined(_ARM_)
+#if defined(_X86_)
+#error Cannot define both _ARM_ and _X86_
+#endif
+#if defined(_AMD64_)
+#error Cannot define both _ARM_ and _AMD64_
+#endif
+#if defined(_ARM64_)
+#error Cannot define both _ARM_ and _ARM64_
+#endif
+#define _HOST_ARM_
+#elif defined(_ARM64_)
+#if defined(_X86_)
+#error Cannot define both _ARM64_ and _X86_
+#endif
+#if defined(_AMD64_)
+#error Cannot define both _ARM64_ and _AMD64_
+#endif
+#if defined(_ARM_)
+#error Cannot define both _ARM64_ and _ARM_
+#endif
+#define _HOST_ARM64_
+#else
+#error Unsupported or unset host architecture
+#endif
+
+#if defined(_HOST_AMD64_) || defined(_HOST_ARM64_)
+#define _HOST_64BIT_
+#endif
+
+#if defined(_TARGET_X86_)
+#if defined(_TARGET_ARM_)
+#error Cannot define both _TARGET_X86_ and _TARGET_ARM_
+#endif
+#if defined(_TARGET_AMD64_)
+#error Cannot define both _TARGET_X86_ and _TARGET_AMD64_
+#endif
+#if defined(_TARGET_ARM64_)
+#error Cannot define both _TARGET_X86_ and _TARGET_ARM64_
+#endif
+#if !defined(_HOST_X86_)
+#define _CROSS_COMPILER_
+#endif
+#elif defined(_TARGET_AMD64_)
+#if defined(_TARGET_X86_)
+#error Cannot define both _TARGET_AMD64_ and _TARGET_X86_
+#endif
+#if defined(_TARGET_ARM_)
+#error Cannot define both _TARGET_AMD64_ and _TARGET_ARM_
+#endif
+#if defined(_TARGET_ARM64_)
+#error Cannot define both _TARGET_AMD64_ and _TARGET_ARM64_
+#endif
+#if !defined(_HOST_AMD64_)
+#define _CROSS_COMPILER_
+#endif
+#elif defined(_TARGET_ARM_)
+#if defined(_TARGET_X86_)
+#error Cannot define both _TARGET_ARM_ and _TARGET_X86_
+#endif
+#if defined(_TARGET_AMD64_)
+#error Cannot define both _TARGET_ARM_ and _TARGET_AMD64_
+#endif
+#if defined(_TARGET_ARM64_)
+#error Cannot define both _TARGET_ARM_ and _TARGET_ARM64_
+#endif
+#if !defined(_HOST_ARM_)
+#define _CROSS_COMPILER_
+#endif
+#elif defined(_TARGET_ARM64_)
+#if defined(_TARGET_X86_)
+#error Cannot define both _TARGET_ARM64_ and _TARGET_X86_
+#endif
+#if defined(_TARGET_AMD64_)
+#error Cannot define both _TARGET_ARM64_ and _TARGET_AMD64_
+#endif
+#if defined(_TARGET_ARM_)
+#error Cannot define both _TARGET_ARM64_ and _TARGET_ARM_
+#endif
+#if !defined(_HOST_ARM64_)
+#define _CROSS_COMPILER_
+#endif
+#else
+#error Unsupported or unset target architecture
+#endif
+
+#if defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_)
+#define _TARGET_64BIT_
+#endif
+
+#if defined(_TARGET_X86_) || defined(_TARGET_AMD64_)
+#define _TARGET_XARCH_
+#endif
+
+#if defined(_TARGET_ARM_) || defined(_TARGET_ARM64_)
+#define _TARGET_ARMARCH_
+#endif
+
+// --------------------------------------------------------------------------------
+// IMAGE_FILE_MACHINE_TARGET
+// --------------------------------------------------------------------------------
+
+#if defined(_TARGET_X86_)
+#define IMAGE_FILE_MACHINE_TARGET IMAGE_FILE_MACHINE_I386
+#elif defined(_TARGET_AMD64_)
+#define IMAGE_FILE_MACHINE_TARGET IMAGE_FILE_MACHINE_AMD64
+#elif defined(_TARGET_ARM_)
+#define IMAGE_FILE_MACHINE_TARGET IMAGE_FILE_MACHINE_ARMNT
+#elif defined(_TARGET_ARM64_)
+#define IMAGE_FILE_MACHINE_TARGET IMAGE_FILE_MACHINE_ARM64 // 0xAA64
+#else
+#error Unsupported or unset target architecture
+#endif
+
+// Include the AMD64 unwind codes when appropriate.
+#if defined(_TARGET_AMD64_)
+#include "win64unwind.h"
+#endif
+
+// Macros for defining strongly-typed enums. Use as follows:
+//
+// DECLARE_TYPED_ENUM(FooEnum,BYTE)
+// {
+// fooTag1, fooTag2
+// }
+// END_DECLARE_TYPED_ENUM(FooEnum, BYTE)
+//
+// VC++ understands the syntax to declare these directly, e.g., "enum FooEnum : BYTE",
+// but GCC does not, so we use typedefs.
+
+#define DECLARE_TYPED_ENUM(tag, baseType) enum tag : baseType
+
+#define END_DECLARE_TYPED_ENUM(tag, baseType) ;
+
+#include "corhdr.h"
+#include "corjit.h"
+
+#define __OPERATOR_NEW_INLINE 1 // indicate that I will define these
+#define __PLACEMENT_NEW_INLINE // don't bring in the global placement new, it is easy to make a mistake
+ // with our new(compiler*) pattern.
+
+#if COR_JIT_EE_VER > 460
+#define NO_CLRCONFIG // Don't bring in the usual CLRConfig infrastructure, since the JIT uses the JIT/EE
+ // interface to retrieve config values.
+
+// This is needed for contract.inl when FEATURE_STACK_PROBE is enabled.
+struct CLRConfig
+{
+ static struct ConfigKey
+ {
+ } EXTERNAL_NO_SO_NOT_MAINLINE;
+ static DWORD GetConfigValue(const ConfigKey& key)
+ {
+ return 0;
+ }
+};
+#endif
+
+#include "utilcode.h" // this defines assert as _ASSERTE
+#include "host.h" // this redefines assert for the JIT to use assertAbort
+#include "utils.h"
+
+#ifdef DEBUG
+#define INDEBUG(x) x
+#define INDEBUG_COMMA(x) x,
+#define DEBUGARG(x) , x
+#else
+#define INDEBUG(x)
+#define INDEBUG_COMMA(x)
+#define DEBUGARG(x)
+#endif
+
+#if defined(DEBUG) || defined(LATE_DISASM)
+#define INDEBUG_LDISASM_COMMA(x) x,
+#else
+#define INDEBUG_LDISASM_COMMA(x)
+#endif
+
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+#define FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(x) , x
+#define FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY(x) x
+#else // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+#define FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(x)
+#define FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY(x)
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+
+#if defined(UNIX_AMD64_ABI)
+#define UNIX_AMD64_ABI_ONLY_ARG(x) , x
+#define UNIX_AMD64_ABI_ONLY(x) x
+#else // !defined(UNIX_AMD64_ABI)
+#define UNIX_AMD64_ABI_ONLY_ARG(x)
+#define UNIX_AMD64_ABI_ONLY(x)
+#endif // defined(UNIX_AMD64_ABI)
+
+#if defined(UNIX_AMD64_ABI) || defined(_TARGET_ARM64_)
+#define MULTIREG_HAS_SECOND_GC_RET 1
+#define MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(x) , x
+#define MULTIREG_HAS_SECOND_GC_RET_ONLY(x) x
+#else // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+#define MULTIREG_HAS_SECOND_GC_RET 0
+#define MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(x)
+#define MULTIREG_HAS_SECOND_GC_RET_ONLY(x)
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+
+// To get rid of warning 4701 : local variable may be used without being initialized
+#define DUMMY_INIT(x) (x)
+
+#define REGEN_SHORTCUTS 0
+#define REGEN_CALLPAT 0
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX jit.h XX
+XX XX
+XX Interface of the JIT with jit.cpp XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+/*****************************************************************************/
+#if defined(DEBUG)
+#include "log.h"
+
+#define INFO6 LL_INFO10000 // Did Jit or Inline succeeded?
+#define INFO7 LL_INFO100000 // NYI stuff
+#define INFO8 LL_INFO1000000 // Weird failures
+#define INFO9 LL_EVERYTHING // Info about incoming settings
+#define INFO10 LL_EVERYTHING // Totally verbose
+
+#endif // DEBUG
+
+typedef class ICorJitInfo* COMP_HANDLE;
+
+const CORINFO_CLASS_HANDLE NO_CLASS_HANDLE = (CORINFO_CLASS_HANDLE) nullptr;
+
+/*****************************************************************************/
+
+inline bool False()
+{
+ return false;
+} // Use to disable code while keeping prefast happy
+
+// We define two IL offset types, as follows:
+//
+// IL_OFFSET: either a distinguished value, or an IL offset.
+// IL_OFFSETX: either a distinguished value, or the top two bits are a flags, and the remaining bottom
+// bits are a IL offset.
+//
+// In both cases, the set of legal distinguished values is:
+// BAD_IL_OFFSET -- A unique illegal IL offset number. Note that it must be different from
+// the ICorDebugInfo values, below, and must also not be a legal IL offset.
+// ICorDebugInfo::NO_MAPPING -- The IL offset corresponds to no source code (such as EH step blocks).
+// ICorDebugInfo::PROLOG -- The IL offset indicates a prolog
+// ICorDebugInfo::EPILOG -- The IL offset indicates an epilog
+//
+// The IL offset must be in the range [0 .. 0x3fffffff]. This is because we steal
+// the top two bits in IL_OFFSETX for flags, but we want the maximum range to be the same
+// for both types. The IL value can't be larger than the maximum IL offset of the function
+// being compiled.
+//
+// Blocks and statements never store one of the ICorDebugInfo values, even for IL_OFFSETX types. These are
+// only stored in the IPmappingDsc struct, ipmdILoffsx field.
+
+typedef unsigned IL_OFFSET;
+
+const IL_OFFSET BAD_IL_OFFSET = 0x80000000;
+const IL_OFFSET MAX_IL_OFFSET = 0x3fffffff;
+
+typedef unsigned IL_OFFSETX; // IL_OFFSET with stack-empty or call-instruction bit
+const IL_OFFSETX IL_OFFSETX_STKBIT = 0x80000000; // Note: this bit is set when the stack is NOT empty!
+const IL_OFFSETX IL_OFFSETX_CALLINSTRUCTIONBIT = 0x40000000; // Set when the IL offset is for a call instruction.
+const IL_OFFSETX IL_OFFSETX_BITS = IL_OFFSETX_STKBIT | IL_OFFSETX_CALLINSTRUCTIONBIT;
+
+IL_OFFSET jitGetILoffs(IL_OFFSETX offsx);
+IL_OFFSET jitGetILoffsAny(IL_OFFSETX offsx);
+bool jitIsStackEmpty(IL_OFFSETX offsx);
+bool jitIsCallInstruction(IL_OFFSETX offsx);
+
+const unsigned BAD_VAR_NUM = UINT_MAX;
+
+// Code can't be more than 2^31 in any direction. This is signed, so it should be used for anything that is
+// relative to something else.
+typedef int NATIVE_OFFSET;
+
+// This is the same as the above, but it's used in absolute contexts (i.e. offset from the start). Also,
+// this is used for native code sizes.
+typedef unsigned UNATIVE_OFFSET;
+
+typedef ptrdiff_t ssize_t;
+
+// For the following specially handled FIELD_HANDLES we need
+// values that are negative and have the low two bits zero
+// See eeFindJitDataOffs and eeGetJitDataOffs in Compiler.hpp
+#define FLD_GLOBAL_DS ((CORINFO_FIELD_HANDLE)-4)
+#define FLD_GLOBAL_FS ((CORINFO_FIELD_HANDLE)-8)
+
+/*****************************************************************************/
+
+#include "vartype.h"
+
+/*****************************************************************************/
+
+// Debugging support is ON by default. Can be turned OFF by
+// adding /DDEBUGGING_SUPPORT=0 on the command line.
+
+#ifndef DEBUGGING_SUPPORT
+#define DEBUGGING_SUPPORT
+#elif !DEBUGGING_SUPPORT
+#undef DEBUGGING_SUPPORT
+#endif
+
+/*****************************************************************************/
+
+// Late disassembly is OFF by default. Can be turned ON by
+// adding /DLATE_DISASM=1 on the command line.
+// Always OFF in the non-debug version
+
+#if defined(LATE_DISASM) && (LATE_DISASM == 0)
+#undef LATE_DISASM
+#endif
+
+/*****************************************************************************/
+
+/*****************************************************************************/
+
+#define FEATURE_VALNUM_CSE 1 // enable the Value Number CSE optimization logic
+
+// true if Value Number CSE is enabled
+#define FEATURE_ANYCSE FEATURE_VALNUM_CSE
+
+#define CSE_INTO_HANDLERS 0
+
+#define CAN_DISABLE_DFA 1 // disable data flow for minopts
+
+#define LARGE_EXPSET 1 // Track 64 or 32 assertions/copies/consts/rangechecks
+#define ASSERTION_PROP 1 // Enable value/assertion propagation
+
+#define LOCAL_ASSERTION_PROP ASSERTION_PROP // Enable local assertion propagation
+
+//=============================================================================
+
+#define FANCY_ARRAY_OPT 0 // optimize more complex index checks
+
+//=============================================================================
+
+#define LONG_ASG_OPS 0 // implementation isn't complete yet
+
+//=============================================================================
+
+#define OPT_MULT_ADDSUB 1 // optimize consecutive "lclVar += or -= icon"
+#define OPT_BOOL_OPS 1 // optimize boolean operations
+
+//=============================================================================
+
+#define REDUNDANT_LOAD 1 // track locals in regs, suppress loads
+#define STACK_PROBES 0 // Support for stack probes
+#define DUMP_FLOWGRAPHS DEBUG // Support for creating Xml Flowgraph reports in *.fgx files
+
+#define HANDLER_ENTRY_MUST_BE_IN_HOT_SECTION 1 // if 1 we must have all handler entry points in the Hot code section
+
+/*****************************************************************************/
+
+#define VPTR_OFFS 0 // offset of vtable pointer from obj ptr
+
+/*****************************************************************************/
+
+#define DUMP_GC_TABLES DEBUG
+#define VERIFY_GC_TABLES 0
+#define REARRANGE_ADDS 1
+
+#define FUNC_INFO_LOGGING 1 // Support dumping function info to a file. In retail, only NYIs, with no function name,
+ // are dumped.
+
+/*****************************************************************************/
+/*****************************************************************************/
+/* Set these to 1 to collect and output various statistics about the JIT */
+
+#define CALL_ARG_STATS 0 // Collect stats about calls and call arguments.
+#define COUNT_BASIC_BLOCKS 0 // Create a histogram of basic block sizes, and a histogram of IL sizes in the simple
+ // case of single block methods.
+#define COUNT_LOOPS 0 // Collect stats about loops, such as the total number of natural loops, a histogram of
+ // the number of loop exits, etc.
+#define COUNT_RANGECHECKS 0 // Count range checks removed (in lexical CSE?).
+#define DATAFLOW_ITER 0 // Count iterations in lexical CSE and constant folding dataflow.
+#define DISPLAY_SIZES 0 // Display generated code, data, and GC information sizes.
+#define MEASURE_BLOCK_SIZE 0 // Collect stats about basic block and flowList node sizes and memory allocations.
+#define MEASURE_FATAL 0 // Count the number of calls to fatal(), including NYIs and noway_asserts.
+#define MEASURE_NODE_SIZE 0 // Collect stats about GenTree node allocations.
+#define MEASURE_PTRTAB_SIZE 0 // Collect stats about GC pointer table allocations.
+#define EMITTER_STATS 0 // Collect stats on the emitter.
+
+#define VERBOSE_SIZES 0 // Always display GC info sizes. If set, DISPLAY_SIZES must also be set.
+#define VERBOSE_VERIFY 0 // Dump additional information when verifying code. Useful to debug verification bugs.
+
+#ifdef DEBUG
+#define MEASURE_MEM_ALLOC 1 // Collect memory allocation stats.
+#define LOOP_HOIST_STATS 1 // Collect loop hoisting stats.
+#else
+#define MEASURE_MEM_ALLOC 0 // You can set this to 1 to get memory stats in retail, as well
+#define LOOP_HOIST_STATS 0 // You can set this to 1 to get loop hoist stats in retail, as well
+#endif
+
+/*****************************************************************************/
+/* Portability Defines */
+/*****************************************************************************/
+#ifdef _TARGET_X86_
+#define JIT32_GCENCODER
+#endif
+
+/*****************************************************************************/
+#ifdef DEBUG
+/*****************************************************************************/
+
+#define DUMPER
+
+#else // !DEBUG
+
+#if DUMP_GC_TABLES
+#pragma message("NOTE: this non-debug build has GC ptr table dumping always enabled!")
+const bool dspGCtbls = true;
+#endif
+
+/*****************************************************************************/
+#endif // !DEBUG
+
+#ifdef DEBUG
+void JitDump(const char* pcFormat, ...);
+#define JITDUMP(...) \
+ { \
+ if (JitTls::GetCompiler()->verbose) \
+ JitDump(__VA_ARGS__); \
+ }
+#define JITLOG(x) \
+ { \
+ JitLogEE x; \
+ }
+#define JITLOG_THIS(t, x) \
+ { \
+ (t)->JitLogEE x; \
+ }
+#define DBEXEC(flg, expr) \
+ if (flg) \
+ { \
+ expr; \
+ }
+#define DISPNODE(t) \
+ if (JitTls::GetCompiler()->verbose) \
+ JitTls::GetCompiler()->gtDispTree(t, nullptr, nullptr, true);
+#define DISPTREE(t) \
+ if (JitTls::GetCompiler()->verbose) \
+ JitTls::GetCompiler()->gtDispTree(t);
+#define DISPRANGE(range) \
+ if (JitTls::GetCompiler()->verbose) \
+ JitTls::GetCompiler()->gtDispRange(range);
+#define DISPTREERANGE(range, t) \
+ if (JitTls::GetCompiler()->verbose) \
+ JitTls::GetCompiler()->gtDispTreeRange(range, t);
+#define VERBOSE JitTls::GetCompiler()->verbose
+#else // !DEBUG
+#define JITDUMP(...)
+#define JITLOG(x)
+#define JITLOG_THIS(t, x)
+#define DBEXEC(flg, expr)
+#define DISPNODE(t)
+#define DISPTREE(t)
+#define DISPRANGE(range)
+#define DISPTREERANGE(range, t)
+#define VERBOSE 0
+#endif // !DEBUG
+
+/*****************************************************************************
+ *
+ * Double alignment. This aligns ESP to 0 mod 8 in function prolog, then uses ESP
+ * to reference locals, EBP to reference parameters.
+ * It only makes sense if frameless method support is on.
+ * (frameless method support is now always on)
+ */
+
+#ifdef _TARGET_X86_
+#define DOUBLE_ALIGN 1 // permit the double alignment of ESP in prolog,
+ // and permit the double alignment of local offsets
+#else
+#define DOUBLE_ALIGN 0 // no special handling for double alignment
+#endif
+/*****************************************************************************/
+#ifdef DEBUG
+extern void _cdecl debugStop(const char* why, ...);
+#endif
+/*****************************************************************************/
+
+#ifdef DEBUG
+
+struct JitOptions
+{
+ const char* methodName; // Method to display output for
+ const char* className; // Class to display output for
+
+ double CGknob; // Tweakable knob for testing
+ unsigned testMask; // Tweakable mask for testing
+
+ JitOptions* lastDummyField; // Ensures instantiation uses right order of arguments
+};
+
+extern JitOptions jitOpts;
+
+/*****************************************************************************
+*
+* Returns a word filled with the JITs allocator CHK fill value.
+*
+*/
+template <typename T>
+inline T UninitializedWord()
+{
+ __int64 word = 0x0101010101010101LL * (JitConfig.JitDefaultFill() & 0xFF);
+ return (T)word;
+}
+
+/*****************************************************************************
+*
+* Determines whether this value is coming from uninitialized JIT memory
+*
+*/
+
+template <typename T>
+inline bool IsUninitialized(T data)
+{
+ return data == UninitializedWord<T>();
+}
+#endif // DEBUG
+
+/*****************************************************************************/
+
+enum accessLevel
+{
+ ACL_NONE,
+ ACL_PRIVATE,
+ ACL_DEFAULT,
+ ACL_PROTECTED,
+ ACL_PUBLIC,
+};
+
+/*****************************************************************************/
+
+#define castto(var, typ) (*(typ*)&var)
+
+#define sizeto(typ, mem) (offsetof(typ, mem) + sizeof(((typ*)0)->mem))
+
+/*****************************************************************************/
+
+#ifdef NO_MISALIGNED_ACCESS
+
+#define MISALIGNED_RD_I2(src) (*castto(src, char*) | *castto(src + 1, char*) << 8)
+
+#define MISALIGNED_RD_U2(src) (*castto(src, char*) | *castto(src + 1, char*) << 8)
+
+#define MISALIGNED_WR_I2(dst, val) \
+ *castto(dst, char*) = val; \
+ *castto(dst + 1, char*) = val >> 8;
+
+#define MISALIGNED_WR_I4(dst, val) \
+ *castto(dst, char*) = val; \
+ *castto(dst + 1, char*) = val >> 8; \
+ *castto(dst + 2, char*) = val >> 16; \
+ *castto(dst + 3, char*) = val >> 24;
+
+#else
+
+#define MISALIGNED_RD_I2(src) (*castto(src, short*))
+#define MISALIGNED_RD_U2(src) (*castto(src, unsigned short*))
+
+#define MISALIGNED_WR_I2(dst, val) *castto(dst, short*) = val;
+#define MISALIGNED_WR_I4(dst, val) *castto(dst, int*) = val;
+
+#define MISALIGNED_WR_ST(dst, val) *castto(dst, ssize_t*) = val;
+
+#endif
+
+/*****************************************************************************/
+
+inline size_t roundUp(size_t size, size_t mult = sizeof(size_t))
+{
+ assert(mult && ((mult & (mult - 1)) == 0)); // power of two test
+
+ return (size + (mult - 1)) & ~(mult - 1);
+}
+
+inline size_t roundDn(size_t size, size_t mult = sizeof(size_t))
+{
+ assert(mult && ((mult & (mult - 1)) == 0)); // power of two test
+
+ return (size) & ~(mult - 1);
+}
+
+inline unsigned int unsigned_abs(int x)
+{
+ return ((unsigned int)abs(x));
+}
+
+#ifdef _TARGET_64BIT_
+inline size_t unsigned_abs(ssize_t x)
+{
+#ifndef FEATURE_PAL
+ return ((size_t)abs(x));
+#else // !FEATURE_PAL
+ return ((size_t)labs(x));
+#endif // !FEATURE_PAL
+}
+#endif // _TARGET_64BIT_
+
+/*****************************************************************************/
+
+#if CALL_ARG_STATS || COUNT_BASIC_BLOCKS || COUNT_LOOPS || EMITTER_STATS || MEASURE_NODE_SIZE
+
+class Histogram
+{
+public:
+ Histogram(IAllocator* allocator, const unsigned* const sizeTable);
+ ~Histogram();
+
+ void dump(FILE* output);
+ void record(unsigned size);
+
+private:
+ void ensureAllocated();
+
+ IAllocator* m_allocator;
+ unsigned m_sizeCount;
+ const unsigned* const m_sizeTable;
+ unsigned* m_counts;
+};
+
+#endif // CALL_ARG_STATS || COUNT_BASIC_BLOCKS || COUNT_LOOPS || EMITTER_STATS || MEASURE_NODE_SIZE
+
+/*****************************************************************************/
+#ifdef ICECAP
+#include "icapexp.h"
+#include "icapctrl.h"
+#endif
+
+/*****************************************************************************/
+
+#define SECURITY_CHECK 1
+#define VERIFY_IMPORTER 1
+
+/*****************************************************************************/
+
+#if !defined(RELOC_SUPPORT)
+#define RELOC_SUPPORT 1
+#endif
+
+/*****************************************************************************/
+
+#include "error.h"
+
+/*****************************************************************************/
+
+#if CHECK_STRUCT_PADDING
+#pragma warning(push)
+#pragma warning(default : 4820) // 'bytes' bytes padding added after construct 'member_name'
+#endif // CHECK_STRUCT_PADDING
+
+#include "alloc.h"
+#include "target.h"
+
+#if FEATURE_TAILCALL_OPT
+
+#ifdef FEATURE_CORECLR
+// CoreCLR - enable tail call opt for the following IL pattern
+//
+// call someFunc
+// jmp/jcc RetBlock
+// ...
+// RetBlock:
+// ret
+#define FEATURE_TAILCALL_OPT_SHARED_RETURN 1
+#else
+// Desktop: Keep this to zero as one of app-compat apps that is using GetCallingAssembly()
+// has an issue turning this ON.
+//
+// Refer to TF: Bug: 824625 and its associated regression TF Bug: 1113265
+#define FEATURE_TAILCALL_OPT_SHARED_RETURN 0
+#endif // FEATURE_CORECLR
+
+#else // !FEATURE_TAILCALL_OPT
+#define FEATURE_TAILCALL_OPT_SHARED_RETURN 0
+#endif // !FEATURE_TAILCALL_OPT
+
+#define CLFLG_CODESIZE 0x00001
+#define CLFLG_CODESPEED 0x00002
+#define CLFLG_CSE 0x00004
+#define CLFLG_REGVAR 0x00008
+#define CLFLG_RNGCHKOPT 0x00010
+#define CLFLG_DEADASGN 0x00020
+#define CLFLG_CODEMOTION 0x00040
+#define CLFLG_QMARK 0x00080
+#define CLFLG_TREETRANS 0x00100
+#define CLFLG_INLINING 0x00200
+#define CLFLG_CONSTANTFOLD 0x00800
+
+#if FEATURE_STRUCTPROMOTE
+#define CLFLG_STRUCTPROMOTE 0x00400
+#else
+#define CLFLG_STRUCTPROMOTE 0x00000
+#endif
+
+#define CLFLG_MAXOPT \
+ (CLFLG_CSE | CLFLG_REGVAR | CLFLG_RNGCHKOPT | CLFLG_DEADASGN | CLFLG_CODEMOTION | CLFLG_QMARK | CLFLG_TREETRANS | \
+ CLFLG_INLINING | CLFLG_STRUCTPROMOTE | CLFLG_CONSTANTFOLD)
+
+#define CLFLG_MINOPT (CLFLG_TREETRANS)
+
+#define JIT_RESERVED_STACK 64 // Reserved for arguments of calls and hidden
+ // pushes for finallys so that we don't
+ // probe on every call site. See comment in
+ // for CORINFO_STACKPROBE_DEPTH in corjit.h
+
+/*****************************************************************************/
+
+extern void dumpILBytes(const BYTE* const codeAddr, unsigned codeSize, unsigned alignSize);
+
+extern unsigned dumpSingleInstr(const BYTE* const codeAddr, IL_OFFSET offs, const char* prefix = nullptr);
+
+extern void dumpILRange(const BYTE* const codeAddr, unsigned codeSize); // in bytes
+
+/*****************************************************************************/
+
+extern int jitNativeCode(CORINFO_METHOD_HANDLE methodHnd,
+ CORINFO_MODULE_HANDLE classHnd,
+ COMP_HANDLE compHnd,
+ CORINFO_METHOD_INFO* methodInfo,
+ void** methodCodePtr,
+ ULONG* methodCodeSize,
+ CORJIT_FLAGS* compileFlags,
+ void* inlineInfoPtr);
+
+#ifdef _HOST_64BIT_
+const size_t INVALID_POINTER_VALUE = 0xFEEDFACEABADF00D;
+#else
+const size_t INVALID_POINTER_VALUE = 0xFEEDFACE;
+#endif
+
+// Constants for making sure size_t fit into smaller types.
+const size_t MAX_USHORT_SIZE_T = static_cast<size_t>(static_cast<unsigned short>(-1));
+const size_t MAX_UNSIGNED_SIZE_T = static_cast<size_t>(static_cast<unsigned>(-1));
+
+// These assume 2's complement...
+const int MAX_SHORT_AS_INT = 32767;
+const int MIN_SHORT_AS_INT = -32768;
+
+/*****************************************************************************/
+
+enum CompMemKind
+{
+#define CompMemKindMacro(kind) CMK_##kind,
+#include "compmemkind.h"
+ CMK_Count
+};
+
+class Compiler;
+class JitTls
+{
+#ifdef DEBUG
+ Compiler* m_compiler;
+ LogEnv m_logEnv;
+ JitTls* m_next;
+#endif
+
+public:
+ JitTls(ICorJitInfo* jitInfo);
+ ~JitTls();
+
+#ifdef DEBUG
+ static LogEnv* GetLogEnv();
+#endif
+
+ static Compiler* GetCompiler();
+ static void SetCompiler(Compiler* compiler);
+};
+
+#if defined(DEBUG)
+
+#include "compiler.h"
+
+template <typename T>
+T dspPtr(T p)
+{
+ return (p == ZERO) ? ZERO : (JitTls::GetCompiler()->opts.dspDiffable ? T(0xD1FFAB1E) : p);
+}
+
+template <typename T>
+T dspOffset(T o)
+{
+ return (o == ZERO) ? ZERO : (JitTls::GetCompiler()->opts.dspDiffable ? T(0xD1FFAB1E) : o);
+}
+
+#else // !defined(DEBUG)
+
+template <typename T>
+T dspPtr(T p)
+{
+ return p;
+}
+
+template <typename T>
+T dspOffset(T o)
+{
+ return o;
+}
+
+#endif // !defined(DEBUG)
+
+/*****************************************************************************/
+#endif //_JIT_H_
+/*****************************************************************************/
diff --git a/src/jit/jit.settings.targets b/src/jit/jit.settings.targets
new file mode 100644
index 0000000000..9dbc225843
--- /dev/null
+++ b/src/jit/jit.settings.targets
@@ -0,0 +1,136 @@
+<Project DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003" ToolsVersion="dogfood">
+
+ <PropertyGroup>
+ <ClWarningLevel>4</ClWarningLevel>
+
+ <UserIncludes>
+ $(UserIncludes);
+ ..;
+ ..\jitstd\;
+ $(Clrbase)\src\TraceLog;
+ </UserIncludes>
+
+ <!-- PCH baloney -->
+ <EnableCxxPCHHeaders>true</EnableCxxPCHHeaders>
+ <PCHCompile>..\jitpch.cpp</PCHCompile>
+ <PCHHeader>jitpch.h</PCHHeader>
+
+ <!-- JIT_BUILD disables certain PAL_TRY debugging features -->
+ <ClDefines>$(ClDefines);JIT_BUILD=1</ClDefines>
+
+ <ClDefines Condition="'$(DebugBuild)' == 'false'">$(ClDefines);FAST=1</ClDefines>
+ <ClDefines Condition="'$(DebugBuild)' == 'true'">$(ClDefines);DEBUG=1</ClDefines>
+ </PropertyGroup>
+
+ <!-- For debugging purposes only, temporarily enable these in RET builds so GenTree debugging is easier. -->
+ <!-- We need to link with /OPT:NOICF or our magic vtable debugging system for GenTree doesn't work. -->
+ <PropertyGroup Condition="'$(DebugBuild)' == 'true'">
+ <!-- This is already automatically defined in DEBUG builds.
+ <ClDefines>$(ClDefines);DEBUGGABLE_GENTREE=1</ClDefines>
+ -->
+ <LinkEnableCOMDATFolding>false</LinkEnableCOMDATFolding> <!-- /OPT:NOICF -->
+ <ClAdditionalOptions>$(ClAdditionalOptions) /Ob0</ClAdditionalOptions> <!-- no inlining -->
+ </PropertyGroup>
+
+ <!-- Leaf Project Items -->
+ <ItemGroup>
+ <CppCompile Include="..\alloc.cpp" />
+ <CppCompile Include="..\earlyprop.cpp" />
+ <CppCompile Include="..\bitset.cpp" />
+ <CppCompile Include="..\block.cpp" />
+ <CppCompile Include="..\Compiler.cpp" />
+ <CppCompile Include="..\DisAsm.cpp" />
+ <CppCompile Include="..\eeInterface.cpp" />
+ <CppCompile Include="..\ee_il_dll.cpp" />
+ <CppCompile Include="..\jiteh.cpp" />
+ <CppCompile Include="..\error.cpp" />
+ <CppCompile Include="..\FlowGraph.cpp" />
+ <CppCompile Include="..\GCInfo.cpp" />
+ <CppCompile Include="..\GCDecode.cpp" />
+ <CppCompile Include="..\GCEncode.cpp" />
+ <CppCompile Include="..\GenTree.cpp" />
+ <CppCompile Include="..\GSChecks.cpp" />
+ <CppCompile Include="..\hashbv.cpp" />
+ <CppCompile Include="..\Importer.cpp" />
+ <CppCompile Include="..\Instr.cpp" />
+ <CppCompile Include="..\JitTelemetry.cpp" />
+ <CppCompile Include="..\LclVars.cpp" />
+ <CppCompile Include="..\LIR.cpp" />
+ <CppCompile Include="..\Liveness.cpp" />
+ <CppCompile Include="..\Morph.cpp" />
+ <CppCompile Include="..\Optimizer.cpp" />
+ <CppCompile Include="..\OptCSE.cpp" />
+ <CppCompile Include="..\rationalize.cpp" />
+ <CppCompile Include="..\RegAlloc.cpp" />
+ <CppCompile Include="..\RegSet.cpp" />
+ <CppCompile Include="..\register_arg_convention.cpp" />
+ <CppCompile Include="..\emit.cpp" />
+ <CppCompile Include="..\ScopeInfo.cpp" />
+ <CppCompile Include="..\SharedFloat.cpp" />
+ <CppCompile Include="..\SM.cpp" />
+ <CppCompile Include="..\SMData.cpp" />
+ <CppCompile Include="..\SMWeights.cpp" />
+ <CppCompile Include="..\typeInfo.cpp" />
+ <CppCompile Include="..\unwind.cpp" />
+ <CppCompile Include="..\Utils.cpp" />
+ <CppCompile Include="..\SsaBuilder.cpp" />
+ <CppCompile Include="..\SsaRenameState.cpp" />
+ <CppCompile Include="..\ValueNum.cpp" />
+ <CppCompile Include="..\CopyProp.cpp" />
+ <CppCompile Include="..\CodeGenCommon.cpp" />
+ <CppCompile Include="..\AssertionProp.cpp" />
+ <CppCompile Include="..\RangeCheck.cpp" />
+ <CppCompile Include="..\LoopCloning.cpp" />
+ <CppCompile Include="..\inline.cpp" />
+ <CppCompile Include="..\inlinepolicy.cpp" />
+ <CppCompile Include="..\jitconfig.cpp" />
+ <CppCompile Include="..\hostallocator.cpp" />
+ <CppCompile Include="..\objectalloc.cpp" />
+ <CppCompile Inlcude="..\sideeffects.cpp" />
+ <CppCompile Condition="'$(ClDefines.Contains(`LEGACY_BACKEND`))'=='True'" Include="..\CodeGenLegacy.cpp" />
+ <CppCompile Condition="'$(ClDefines.Contains(`LEGACY_BACKEND`))'=='False'" Include="..\Lower.cpp" />
+ <CppCompile Condition="'$(ClDefines.Contains(`LEGACY_BACKEND`))'=='False'" Include="..\LSRA.cpp" />
+ </ItemGroup>
+ <ItemGroup Condition="'$(TargetArch)'=='i386'">
+ <CppCompile Include="..\emitXArch.cpp" />
+ <CppCompile Include="..\TargetX86.cpp" />
+ <CppCompile Condition="'$(ClDefines.Contains(`LEGACY_BACKEND`))'=='True'" Include="..\stackfp.cpp" />
+ <CppCompile Condition="'$(ClDefines.Contains(`LEGACY_BACKEND`))'=='False'" Include="..\DecomposeLongs.cpp" />
+ <CppCompile Condition="'$(ClDefines.Contains(`LEGACY_BACKEND`))'=='False'" Include="..\LowerXArch.cpp" />
+ <CppCompile Condition="'$(ClDefines.Contains(`LEGACY_BACKEND`))'=='False'" Include="..\CodeGenXArch.cpp" />
+ <CppCompile Condition="'$(ClDefines.Contains(`LEGACY_BACKEND`))'=='False'" Include="..\SIMD.cpp" />
+ <CppCompile Condition="'$(ClDefines.Contains(`LEGACY_BACKEND`))'=='False'" Include="..\SIMDCodeGenXArch.cpp" />
+ </ItemGroup>
+ <ItemGroup Condition="'$(TargetArch)'=='amd64'">
+ <!-- AMD64 target is always RyuJIT backend -->
+ <CppCompile Include="..\emitXArch.cpp" />
+ <CppCompile Include="..\TargetAmd64.cpp" />
+ <CppCompile Include="..\LowerXArch.cpp" />
+ <CppCompile Include="..\CodeGenXArch.cpp" />
+ <CppCompile Include="..\SIMD.cpp" />
+ <CppCompile Include="..\SIMDCodeGenXArch.cpp" />
+ <CppCompile Include="..\unwindAmd64.cpp" />
+ </ItemGroup>
+ <ItemGroup Condition="'$(TargetArch)'=='arm'">
+ <CppCompile Include="..\emitarm.cpp" />
+ <CppCompile Include="..\TargetArm.cpp" />
+ <CppCompile Condition="'$(ClDefines.Contains(`LEGACY_BACKEND`))'=='True'" Include="..\registerfp.cpp" />
+ <CppCompile Condition="'$(ClDefines.Contains(`LEGACY_BACKEND`))'=='False'" Include="..\DecomposeLongs.cpp" />
+ <CppCompile Condition="'$(ClDefines.Contains(`LEGACY_BACKEND`))'=='False'" Include="..\LowerArm.cpp" />
+ <CppCompile Condition="'$(ClDefines.Contains(`LEGACY_BACKEND`))'=='False'" Include="..\CodeGenArm.cpp" />
+ <CppCompile Include="..\unwindArm.cpp" />
+ </ItemGroup>
+ <ItemGroup Condition="'$(TargetArch)'=='arm64'">
+ <!-- ARM64 target is always RyuJIT backend -->
+ <CppCompile Include="..\emitarm64.cpp" />
+ <CppCompile Include="..\TargetArm64.cpp" />
+ <CppCompile Include="..\LowerArm64.cpp" />
+ <CppCompile Include="..\CodeGenArm64.cpp" />
+ <CppCompile Include="..\unwindArm.cpp" />
+ <CppCompile Include="..\unwindArm64.cpp" />
+ </ItemGroup>
+
+ <!-- Import the targets - this actually contains the full build rules -->
+ <Import Project="$(_NTDRIVE)$(_NTROOT)\ndp\clr\clr.targets" />
+
+</Project>
diff --git a/src/jit/jitconfig.cpp b/src/jit/jitconfig.cpp
new file mode 100644
index 0000000000..9f0e226e3a
--- /dev/null
+++ b/src/jit/jitconfig.cpp
@@ -0,0 +1,344 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+#include "jitconfig.h"
+
+JitConfigValues JitConfig;
+
+void JitConfigValues::MethodSet::initialize(const wchar_t* list, ICorJitHost* host)
+{
+ assert(m_list == nullptr);
+
+ enum State
+ {
+ NO_NAME,
+ CLS_NAME,
+ FUNC_NAME,
+ ARG_LIST
+ }; // parsing state machine
+
+ const char SEP_CHAR = ' '; // current character use to separate each entry
+
+ wchar_t lastChar = '?'; // dummy
+ int nameStart = -1; // Index of the start of the current class or method name
+ MethodName currentName; // Buffer used while parsing the current entry
+ MethodName** lastName = &m_names; // Last entry inserted into the list
+ bool isQuoted = false;
+
+ currentName.m_next = nullptr;
+ currentName.m_methodNameStart = -1;
+ currentName.m_methodNameLen = -1;
+ currentName.m_classNameStart = -1;
+ currentName.m_classNameLen = -1;
+ currentName.m_numArgs = -1;
+
+ // Convert the input list to UTF-8
+ int utf8ListLen = WszWideCharToMultiByte(CP_UTF8, 0, list, -1, nullptr, 0, nullptr, nullptr);
+ m_list = (char*)host->allocateMemory(utf8ListLen);
+ if (WszWideCharToMultiByte(CP_UTF8, 0, list, -1, const_cast<LPSTR>(m_list), utf8ListLen, nullptr, nullptr) == 0)
+ {
+ // Failed to convert the list. Free the memory and ignore the list.
+ host->freeMemory(reinterpret_cast<void*>(const_cast<char*>(m_list)));
+ m_list = "";
+ return;
+ }
+
+ State state = NO_NAME;
+ for (int i = 0; lastChar != '\0'; i++)
+ {
+ lastChar = m_list[i];
+
+ switch (state)
+ {
+ case NO_NAME:
+ if (m_list[i] != SEP_CHAR)
+ {
+ nameStart = i;
+ state = CLS_NAME; // we have found the start of the next entry
+ }
+ break;
+
+ case CLS_NAME:
+ if (m_list[nameStart] == '"')
+ {
+ for (; m_list[i] != '\0' && m_list[i] != '"'; i++)
+ {
+ ;
+ }
+
+ nameStart++;
+ isQuoted = true;
+ }
+
+ if (m_list[i] == ':')
+ {
+ if (m_list[nameStart] == '*' && !isQuoted)
+ {
+ // The class name is a wildcard; mark it invalid.
+ currentName.m_classNameStart = -1;
+ currentName.m_classNameLen = -1;
+ }
+ else
+ {
+ currentName.m_classNameStart = nameStart;
+ currentName.m_classNameLen = i - nameStart;
+
+ // Remove the trailing quote, if any
+ if (isQuoted)
+ {
+ currentName.m_classNameLen--;
+ isQuoted = false;
+ }
+ }
+
+ // Accept class::name syntax as well
+ if (m_list[i + 1] == ':')
+ {
+ i++;
+ }
+
+ nameStart = i + 1;
+ state = FUNC_NAME;
+ }
+ else if (m_list[i] == '\0' || m_list[i] == SEP_CHAR || m_list[i] == '(')
+ {
+ // Treat this as a method name without a class name.
+ currentName.m_classNameStart = -1;
+ currentName.m_classNameLen = -1;
+ goto DONE_FUNC_NAME;
+ }
+ break;
+
+ case FUNC_NAME:
+ if (m_list[nameStart] == '"')
+ {
+ // The first half of the outer contdition handles the case where the
+ // class name is valid.
+ for (; nameStart == i || (m_list[i] != '\0' && m_list[i] != '"'); i++)
+ {
+ ;
+ }
+
+ nameStart++;
+ isQuoted = true;
+ }
+
+ if (m_list[i] == '\0' || m_list[i] == SEP_CHAR || m_list[i] == '(')
+ {
+ DONE_FUNC_NAME:
+ assert(m_list[i] == '\0' || m_list[i] == SEP_CHAR || m_list[i] == '(');
+
+ if (m_list[nameStart] == '*' && !isQuoted)
+ {
+ // The method name is a wildcard; mark it invalid.
+ currentName.m_methodNameStart = -1;
+ currentName.m_methodNameLen = -1;
+ }
+ else
+ {
+ currentName.m_methodNameStart = nameStart;
+ currentName.m_methodNameLen = i - nameStart;
+
+ // Remove the trailing quote, if any
+ if (isQuoted)
+ {
+ currentName.m_classNameLen--;
+ isQuoted = false;
+ }
+ }
+
+ if (m_list[i] == '\0' || m_list[i] == SEP_CHAR)
+ {
+ currentName.m_numArgs = -1;
+ goto DONE_ARG_LIST;
+ }
+ else
+ {
+ assert(m_list[i] == '(');
+ currentName.m_numArgs = -1;
+ state = ARG_LIST;
+ }
+ }
+ break;
+
+ case ARG_LIST:
+ if (m_list[i] == '\0' || m_list[i] == ')')
+ {
+ if (currentName.m_numArgs == -1)
+ {
+ currentName.m_numArgs = 0;
+ }
+
+ DONE_ARG_LIST:
+ assert(m_list[i] == '\0' || m_list[i] == SEP_CHAR || m_list[i] == ')');
+
+ // We have parsed an entire method name; create a new entry in the list for it.
+ MethodName* name = (MethodName*)host->allocateMemory(sizeof(MethodName));
+ *name = currentName;
+
+ assert(name->m_next == nullptr);
+ *lastName = name;
+ lastName = &name->m_next;
+
+ state = NO_NAME;
+
+ // Skip anything after the argument list until we find the next
+ // separator character. Otherwise if we see "func(a,b):foo" we
+ // create entries for "func(a,b)" as well as ":foo".
+ if (m_list[i] == ')')
+ {
+ for (; m_list[i] && m_list[i] != SEP_CHAR; i++)
+ {
+ ;
+ }
+
+ lastChar = m_list[i];
+ }
+ }
+ else
+ {
+ if (m_list[i] != SEP_CHAR && currentName.m_numArgs == -1)
+ {
+ currentName.m_numArgs = 1;
+ }
+
+ if (m_list[i] == ',')
+ {
+ currentName.m_numArgs++;
+ }
+ }
+ break;
+
+ default:
+ assert(!"Bad state");
+ break;
+ }
+ }
+}
+
+void JitConfigValues::MethodSet::destroy(ICorJitHost* host)
+{
+ // Free method names, free the list string, and reset our state
+ for (MethodName *name = m_names, *next = nullptr; name != nullptr; name = next)
+ {
+ next = name->m_next;
+ host->freeMemory(reinterpret_cast<void*>(const_cast<MethodName*>(name)));
+ }
+
+ host->freeMemory(reinterpret_cast<void*>(const_cast<char*>(m_list)));
+
+ m_names = nullptr;
+ m_list = nullptr;
+}
+
+static bool matchesName(const char* const name, int nameLen, const char* const s2)
+{
+ return strncmp(name, s2, nameLen) == 0 && s2[nameLen] == '\0';
+}
+
+bool JitConfigValues::MethodSet::contains(const char* methodName,
+ const char* className,
+ CORINFO_SIG_INFO* sigInfo) const
+{
+ int numArgs = sigInfo != nullptr ? sigInfo->numArgs : -1;
+
+ // Try to match any the entries in the list.
+ for (MethodName* name = m_names; name != nullptr; name = name->m_next)
+ {
+ // If m_numArgs is valid, check for a mismatch
+ if (name->m_numArgs != -1 && name->m_numArgs != numArgs)
+ {
+ continue;
+ }
+
+ // If m_methodNameStart is valid, check for a mismatch
+ if (name->m_methodNameStart != -1)
+ {
+ const char* expectedMethodName = &m_list[name->m_methodNameStart];
+ if (!matchesName(expectedMethodName, name->m_methodNameLen, methodName))
+ {
+ // C++ embeds the class name into the method name; deal with that here.
+ const char* colon = strchr(methodName, ':');
+ if (colon != nullptr && colon[1] == ':' &&
+ matchesName(expectedMethodName, name->m_methodNameLen, methodName))
+ {
+ int classLen = (int)(colon - methodName);
+ if (name->m_classNameStart == -1 ||
+ (classLen == name->m_classNameLen &&
+ strncmp(&m_list[name->m_classNameStart], methodName, classLen) == 0))
+ {
+ return true;
+ }
+ }
+ continue;
+ }
+ }
+
+ // If m_classNameStart is valid, check for a mismatch
+ if (className == nullptr || name->m_classNameStart == -1 ||
+ matchesName(&m_list[name->m_classNameStart], name->m_classNameLen, className))
+ {
+ return true;
+ }
+
+ // Check for suffix wildcard like System.*
+ if (name->m_classNameLen > 0 && m_list[name->m_classNameStart + name->m_classNameLen - 1] == '*' &&
+ strncmp(&m_list[name->m_classNameStart], className, name->m_classNameLen - 1) == 0)
+ {
+ return true;
+ }
+
+#ifdef _DEBUG
+ // Maybe className doesn't include the namespace. Try to match that
+ const char* nsSep = strrchr(className, '.');
+ if (nsSep != nullptr && nsSep != className)
+ {
+ const char* onlyClass = nsSep[-1] == '.' ? nsSep : &nsSep[1];
+ if (matchesName(&m_list[name->m_classNameStart], name->m_classNameLen, onlyClass))
+ {
+ return true;
+ }
+ }
+#endif
+ }
+
+ return false;
+}
+
+void JitConfigValues::initialize(ICorJitHost* host)
+{
+ assert(!m_isInitialized);
+
+#define CONFIG_INTEGER(name, key, defaultValue) m_##name = host->getIntConfigValue(key, defaultValue);
+#define CONFIG_STRING(name, key) m_##name = host->getStringConfigValue(key);
+#define CONFIG_METHODSET(name, key) \
+ const wchar_t* name##value = host->getStringConfigValue(key); \
+ m_##name.initialize(name##value, host); \
+ host->freeStringConfigValue(name##value);
+
+#include "jitconfigvalues.h"
+
+ m_isInitialized = true;
+}
+
+void JitConfigValues::destroy(ICorJitHost* host)
+{
+ if (!m_isInitialized)
+ {
+ return;
+ }
+
+#define CONFIG_INTEGER(name, key, defaultValue)
+#define CONFIG_STRING(name, key) host->freeStringConfigValue(m_##name);
+#define CONFIG_METHODSET(name, key) m_##name.destroy(host);
+
+#include "jitconfigvalues.h"
+
+ m_isInitialized = false;
+}
diff --git a/src/jit/jitconfig.h b/src/jit/jitconfig.h
new file mode 100644
index 0000000000..d5b4e30796
--- /dev/null
+++ b/src/jit/jitconfig.h
@@ -0,0 +1,97 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+#ifndef _JITCONFIG_H_
+#define _JITCONFIG_H_
+
+struct CORINFO_SIG_INFO;
+class ICorJitHost;
+
+class JitConfigValues
+{
+public:
+ class MethodSet
+ {
+ private:
+ struct MethodName
+ {
+ MethodName* m_next;
+ int m_methodNameStart;
+ int m_methodNameLen;
+ int m_classNameStart;
+ int m_classNameLen;
+ int m_numArgs;
+ };
+
+ const char* m_list;
+ MethodName* m_names;
+
+ MethodSet(const MethodSet& other) = delete;
+ MethodSet& operator=(const MethodSet& other) = delete;
+
+ public:
+ MethodSet()
+ {
+ }
+ inline const char* list() const
+ {
+ return m_list;
+ }
+
+ void initialize(const wchar_t* list, ICorJitHost* host);
+ void destroy(ICorJitHost* host);
+
+ inline bool isEmpty() const
+ {
+ return m_names == nullptr;
+ }
+ bool contains(const char* methodName, const char* className, CORINFO_SIG_INFO* sigInfo) const;
+ };
+
+private:
+#define CONFIG_INTEGER(name, key, defaultValue) int m_##name;
+#define CONFIG_STRING(name, key) const wchar_t* m_##name;
+#define CONFIG_METHODSET(name, key) MethodSet m_##name;
+#include "jitconfigvalues.h"
+
+public:
+#define CONFIG_INTEGER(name, key, defaultValue) \
+ inline int name() const \
+ { \
+ return m_##name; \
+ }
+#define CONFIG_STRING(name, key) \
+ inline const wchar_t* name() const \
+ { \
+ return m_##name; \
+ }
+#define CONFIG_METHODSET(name, key) \
+ inline const MethodSet& name() const \
+ { \
+ return m_##name; \
+ }
+#include "jitconfigvalues.h"
+
+private:
+ bool m_isInitialized;
+
+ JitConfigValues(const JitConfigValues& other) = delete;
+ JitConfigValues& operator=(const JitConfigValues& other) = delete;
+
+public:
+ JitConfigValues()
+ {
+ }
+
+ inline bool isInitialized() const
+ {
+ return m_isInitialized != 0;
+ }
+ void initialize(ICorJitHost* host);
+ void destroy(ICorJitHost* host);
+};
+
+extern JitConfigValues JitConfig;
+
+#endif
diff --git a/src/jit/jitconfigvalues.h b/src/jit/jitconfigvalues.h
new file mode 100644
index 0000000000..6579817249
--- /dev/null
+++ b/src/jit/jitconfigvalues.h
@@ -0,0 +1,255 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+#if !defined(CONFIG_INTEGER) || !defined(CONFIG_STRING) || !defined(CONFIG_METHODSET)
+#error CONFIG_INTEGER, CONFIG_STRING, and CONFIG_METHODSET must be defined before including this file.
+#endif // !defined(CONFIG_INTEGER) || !defined(CONFIG_STRING) || !defined(CONFIG_METHODSET)
+
+#if defined(DEBUG)
+CONFIG_INTEGER(AltJitLimit, W("AltJitLimit"), 0) // Max number of functions to use altjit for (decimal)
+CONFIG_INTEGER(AltJitSkipOnAssert, W("AltJitSkipOnAssert"), 0) // If AltJit hits an assert, fall back to the fallback
+ // JIT. Useful in conjunction with
+ // COMPlus_ContinueOnAssert=1
+CONFIG_INTEGER(BreakOnDumpToken, W("BreakOnDumpToken"), 0xffffffff) // Breaks when using internal logging on a
+ // particular token value.
+CONFIG_INTEGER(DebugBreakOnVerificationFailure, W("DebugBreakOnVerificationFailure"), 0) // Halts the jit on
+ // verification failure
+CONFIG_INTEGER(DiffableDasm, W("JitDiffableDasm"), 0) // Make the disassembly diff-able
+CONFIG_INTEGER(DisplayLoopHoistStats, W("JitLoopHoistStats"), 0) // Display JIT loop hoisting statistics
+CONFIG_INTEGER(DisplayMemStats, W("JitMemStats"), 0) // Display JIT memory usage statistics
+CONFIG_INTEGER(DumpJittedMethods, W("DumpJittedMethods"), 0) // Prints all jitted methods to the console
+CONFIG_INTEGER(EnablePCRelAddr, W("JitEnablePCRelAddr"), 1) // Whether absolute addr be encoded as PC-rel offset by
+ // RyuJIT where possible
+CONFIG_INTEGER(InterpreterFallback, W("InterpreterFallback"), 0) // Fallback to the interpreter when the JIT compiler
+ // fails
+CONFIG_INTEGER(JitAssertOnMaxRAPasses, W("JitAssertOnMaxRAPasses"), 0)
+CONFIG_INTEGER(JitBreakEmitOutputInstr, W("JitBreakEmitOutputInstr"), -1)
+CONFIG_INTEGER(JitBreakMorphTree, W("JitBreakMorphTree"), 0xffffffff)
+CONFIG_INTEGER(JitBreakOnBadCode, W("JitBreakOnBadCode"), 0)
+CONFIG_INTEGER(JitBreakOnMinOpts, W("JITBreakOnMinOpts"), 0) // Halt if jit switches to MinOpts
+CONFIG_INTEGER(JitBreakOnUnsafeCode, W("JitBreakOnUnsafeCode"), 0)
+CONFIG_INTEGER(JitCanUseSSE2, W("JitCanUseSSE2"), -1)
+CONFIG_INTEGER(JitCloneLoops, W("JitCloneLoops"), 1) // If 0, don't clone. Otherwise clone loops for optimizations.
+CONFIG_INTEGER(JitDebugLogLoopCloning, W("JitDebugLogLoopCloning"), 0) // In debug builds log places where loop cloning
+ // optimizations are performed on the fast path.
+CONFIG_INTEGER(JitDefaultFill, W("JitDefaultFill"), 0xff) // In debug builds, initialize the memory allocated by the nra
+ // with this byte.
+CONFIG_INTEGER(JitDirectAlloc, W("JitDirectAlloc"), 0)
+CONFIG_INTEGER(JitDoAssertionProp, W("JitDoAssertionProp"), 1) // Perform assertion propagation optimization
+CONFIG_INTEGER(JitDoCopyProp, W("JitDoCopyProp"), 1) // Perform copy propagation on variables that appear redundant
+CONFIG_INTEGER(JitDoEarlyProp, W("JitDoEarlyProp"), 1) // Perform Early Value Propagataion
+CONFIG_INTEGER(JitDoLoopHoisting, W("JitDoLoopHoisting"), 1) // Perform loop hoisting on loop invariant values
+CONFIG_INTEGER(JitDoRangeAnalysis, W("JitDoRangeAnalysis"), 1) // Perform range check analysis
+CONFIG_INTEGER(JitDoSsa, W("JitDoSsa"), 1) // Perform Static Single Assignment (SSA) numbering on the variables
+CONFIG_INTEGER(JitDoValueNumber, W("JitDoValueNumber"), 1) // Perform value numbering on method expressions
+CONFIG_INTEGER(JitDoubleAlign, W("JitDoubleAlign"), 1)
+CONFIG_INTEGER(JitDumpASCII, W("JitDumpASCII"), 1) // Uses only ASCII characters in tree dumps
+CONFIG_INTEGER(JitDumpFgDot, W("JitDumpFgDot"), 0) // Set to non-zero to emit Dot instead of Xml Flowgraph dump
+CONFIG_INTEGER(JitDumpTerseLsra, W("JitDumpTerseLsra"), 1) // Produce terse dump output for LSRA
+CONFIG_INTEGER(JitDumpToDebugger, W("JitDumpToDebugger"), 0) // Output JitDump output to the debugger
+CONFIG_INTEGER(JitDumpVerboseSsa, W("JitDumpVerboseSsa"), 0) // Produce especially verbose dump output for SSA
+CONFIG_INTEGER(JitDumpVerboseTrees, W("JitDumpVerboseTrees"), 0) // Enable more verbose tree dumps
+CONFIG_INTEGER(JitEmitPrintRefRegs, W("JitEmitPrintRefRegs"), 0)
+CONFIG_INTEGER(JitExpensiveDebugCheckLevel, W("JitExpensiveDebugCheckLevel"), 0) // Level indicates how much checking
+ // beyond the default to do in debug
+ // builds (currently 1-2)
+CONFIG_INTEGER(JitForceFallback, W("JitForceFallback"), 0) // Set to non-zero to test NOWAY assert by forcing a retry
+CONFIG_INTEGER(JitForceVer, W("JitForceVer"), 0)
+CONFIG_INTEGER(JitFullyInt, W("JitFullyInt"), 0) // Forces Fully interruptable code
+CONFIG_INTEGER(JitFunctionTrace, W("JitFunctionTrace"), 0) // If non-zero, print JIT start/end logging
+CONFIG_INTEGER(JitGCChecks, W("JitGCChecks"), 0)
+CONFIG_INTEGER(JitGCInfoLogging, W("JitGCInfoLogging"), 0) // If true, prints GCInfo-related output to standard output.
+CONFIG_INTEGER(JitHashBreak, W("JitHashBreak"), -1) // Same as JitBreak, but for a method hash
+CONFIG_INTEGER(JitHashDump, W("JitHashDump"), -1) // Same as JitDump, but for a method hash
+CONFIG_INTEGER(JitHashDumpIR, W("JitHashDumpIR"), -1) // Same as JitDumpIR, but for a method hash
+CONFIG_INTEGER(JitHashHalt, W("JitHashHalt"), -1) // Same as JitHalt, but for a method hash
+CONFIG_INTEGER(JitInlineAdditionalMultiplier, W("JitInlineAdditionalMultiplier"), 0)
+CONFIG_INTEGER(JitInlinePrintStats, W("JitInlinePrintStats"), 0)
+CONFIG_INTEGER(JitInlineSize, W("JITInlineSize"), DEFAULT_MAX_INLINE_SIZE)
+CONFIG_INTEGER(JitInlineDepth, W("JITInlineDepth"), DEFAULT_MAX_INLINE_DEPTH)
+CONFIG_INTEGER(JitLongAddress, W("JitLongAddress"), 0) // Force using the large pseudo instruction form for long address
+CONFIG_INTEGER(JitMaxTempAssert, W("JITMaxTempAssert"), 1)
+CONFIG_INTEGER(JitMaxUncheckedOffset, W("JitMaxUncheckedOffset"), 8)
+CONFIG_INTEGER(JitMinOpts, W("JITMinOpts"), 0) // Forces MinOpts
+CONFIG_INTEGER(JitMinOptsBbCount, W("JITMinOptsBbCount"), DEFAULT_MIN_OPTS_BB_COUNT) // Internal jit control of MinOpts
+CONFIG_INTEGER(JitMinOptsCodeSize, W("JITMinOptsCodeSize"), DEFAULT_MIN_OPTS_CODE_SIZE) // Internal jit control of
+ // MinOpts
+CONFIG_INTEGER(JitMinOptsInstrCount, W("JITMinOptsInstrCount"), DEFAULT_MIN_OPTS_INSTR_COUNT) // Internal jit control of
+ // MinOpts
+CONFIG_INTEGER(JitMinOptsLvNumCount, W("JITMinOptsLvNumcount"), DEFAULT_MIN_OPTS_LV_NUM_COUNT) // Internal jit control
+ // of MinOpts
+CONFIG_INTEGER(JitMinOptsLvRefCount, W("JITMinOptsLvRefcount"), DEFAULT_MIN_OPTS_LV_REF_COUNT) // Internal jit control
+ // of MinOpts
+CONFIG_INTEGER(JitNoCMOV, W("JitNoCMOV"), 0)
+CONFIG_INTEGER(JitNoCSE, W("JitNoCSE"), 0)
+CONFIG_INTEGER(JitNoCSE2, W("JitNoCSE2"), 0)
+CONFIG_INTEGER(JitNoForceFallback, W("JitNoForceFallback"), 0) // Set to non-zero to prevent NOWAY assert testing.
+ // Overrides COMPlus_JitForceFallback and JIT stress
+ // flags.
+CONFIG_INTEGER(JitNoHoist, W("JitNoHoist"), 0)
+CONFIG_INTEGER(JitNoInline, W("JitNoInline"), 0) // Disables inlining of all methods
+CONFIG_INTEGER(JitNoMemoryBarriers, W("JitNoMemoryBarriers"), 0) // If 1, don't generate memory barriers
+CONFIG_INTEGER(JitNoRegLoc, W("JitNoRegLoc"), 0)
+CONFIG_INTEGER(JitNoStructPromotion, W("JitNoStructPromotion"), 0) // Disables struct promotion in Jit32
+CONFIG_INTEGER(JitNoUnroll, W("JitNoUnroll"), 0)
+CONFIG_INTEGER(JitOrder, W("JitOrder"), 0)
+CONFIG_INTEGER(JitPInvokeCheckEnabled, W("JITPInvokeCheckEnabled"), 0)
+CONFIG_INTEGER(JitPInvokeEnabled, W("JITPInvokeEnabled"), 1)
+CONFIG_INTEGER(JitPrintInlinedMethods, W("JitPrintInlinedMethods"), 0)
+CONFIG_INTEGER(JitRequired, W("JITRequired"), -1)
+CONFIG_INTEGER(JitRoundFloat, W("JITRoundFloat"), DEFAULT_ROUND_LEVEL)
+CONFIG_INTEGER(JitSkipArrayBoundCheck, W("JitSkipArrayBoundCheck"), 0)
+CONFIG_INTEGER(JitSlowDebugChecksEnabled, W("JitSlowDebugChecksEnabled"), 1) // Turn on slow debug checks
+CONFIG_INTEGER(JitSplitFunctionSize, W("JitSplitFunctionSize"), 0) // On ARM, use this as the maximum function/funclet
+ // size for creating function fragments (and creating
+ // multiple RUNTIME_FUNCTION entries)
+CONFIG_INTEGER(JitSsaStress, W("JitSsaStress"), 0) // Perturb order of processing of blocks in SSA; 0 = no stress; 1 =
+ // use method hash; * = supplied value as random hash
+CONFIG_INTEGER(JitStackChecks, W("JitStackChecks"), 0)
+CONFIG_INTEGER(JitStress, W("JitStress"), 0) // Internal Jit stress mode: 0 = no stress, 2 = all stress, other = vary
+ // stress based on a hash of the method and this value
+CONFIG_INTEGER(JitStressBBProf, W("JitStressBBProf"), 0) // Internal Jit stress mode
+CONFIG_INTEGER(JitStressBiasedCSE, W("JitStressBiasedCSE"), 0x101) // Internal Jit stress mode: decimal bias value
+ // between (0,100) to perform CSE on a candidate.
+ // 100% = All CSEs. 0% = 0 CSE. (> 100) means no
+ // stress.
+CONFIG_INTEGER(JitStressFP, W("JitStressFP"), 0) // Internal Jit stress mode
+CONFIG_INTEGER(JitStressModeNamesOnly, W("JitStressModeNamesOnly"), 0) // Internal Jit stress: if nonzero, only enable
+ // stress modes listed in JitStressModeNames
+CONFIG_INTEGER(JitStressRegs, W("JitStressRegs"), 0)
+CONFIG_INTEGER(JitStrictCheckForNonVirtualCallToVirtualMethod, W("JitStrictCheckForNonVirtualCallToVirtualMethod"), 1)
+CONFIG_INTEGER(JitVNMapSelLimit, W("JitVNMapSelLimit"), 0) // If non-zero, assert if # of VNF_MapSelect applications
+ // considered reaches this
+CONFIG_INTEGER(NgenHashDump, W("NgenHashDump"), -1) // same as JitHashDump, but for ngen
+CONFIG_INTEGER(NgenHashDumpIR, W("NgenHashDumpIR"), -1) // same as JitHashDumpIR, but for ngen
+CONFIG_INTEGER(NgenOrder, W("NgenOrder"), 0)
+CONFIG_INTEGER(RunAltJitCode, W("RunAltJitCode"), 1) // If non-zero, and the compilation succeeds for an AltJit, then
+ // use the code. If zero, then we always throw away the generated
+ // code and fall back to the default compiler.
+CONFIG_INTEGER(RunComponentUnitTests, W("JitComponentUnitTests"), 0) // Run JIT component unit tests
+CONFIG_INTEGER(ShouldInjectFault, W("InjectFault"), 0)
+CONFIG_INTEGER(StackProbesOverride, W("JitStackProbes"), 0)
+CONFIG_INTEGER(StressCOMCall, W("StressCOMCall"), 0)
+CONFIG_INTEGER(TailcallStress, W("TailcallStress"), 0)
+CONFIG_INTEGER(TreesBeforeAfterMorph, W("JitDumpBeforeAfterMorph"), 0) // If 1, display each tree before/after morphing
+CONFIG_METHODSET(JitBreak, W("JitBreak")) // Stops in the importer when compiling a specified method
+CONFIG_METHODSET(JitDebugBreak, W("JitDebugBreak"))
+CONFIG_METHODSET(JitDisasm, W("JitDisasm")) // Dumps disassembly for specified method
+CONFIG_METHODSET(JitDump, W("JitDump")) // Dumps trees for specified method
+CONFIG_METHODSET(JitDumpIR, W("JitDumpIR")) // Dumps trees (in linear IR form) for specified method
+CONFIG_METHODSET(JitEHDump, W("JitEHDump")) // Dump the EH table for the method, as reported to the VM
+CONFIG_METHODSET(JitExclude, W("JitExclude"))
+CONFIG_METHODSET(JitForceProcedureSplitting, W("JitForceProcedureSplitting"))
+CONFIG_METHODSET(JitGCDump, W("JitGCDump"))
+CONFIG_METHODSET(JitHalt, W("JitHalt")) // Emits break instruction into jitted code
+CONFIG_METHODSET(JitImportBreak, W("JitImportBreak"))
+CONFIG_METHODSET(JitInclude, W("JitInclude"))
+CONFIG_METHODSET(JitLateDisasm, W("JitLateDisasm"))
+CONFIG_METHODSET(JitMinOptsName, W("JITMinOptsName")) // Forces MinOpts for a named function
+CONFIG_METHODSET(JitNoProcedureSplitting, W("JitNoProcedureSplitting")) // Disallow procedure splitting for specified
+ // methods
+CONFIG_METHODSET(JitNoProcedureSplittingEH, W("JitNoProcedureSplittingEH")) // Disallow procedure splitting for
+ // specified methods if they contain
+ // exception handling
+CONFIG_METHODSET(JitStressOnly, W("JitStressOnly")) // Internal Jit stress mode: stress only the specified method(s)
+CONFIG_METHODSET(JitUnwindDump, W("JitUnwindDump")) // Dump the unwind codes for the method
+CONFIG_METHODSET(NgenDisasm, W("NgenDisasm")) // Same as JitDisasm, but for ngen
+CONFIG_METHODSET(NgenDump, W("NgenDump")) // Same as JitDump, but for ngen
+CONFIG_METHODSET(NgenDumpIR, W("NgenDumpIR")) // Same as JitDumpIR, but for ngen
+CONFIG_METHODSET(NgenEHDump, W("NgenEHDump")) // Dump the EH table for the method, as reported to the VM
+CONFIG_METHODSET(NgenGCDump, W("NgenGCDump"))
+CONFIG_METHODSET(NgenUnwindDump, W("NgenUnwindDump")) // Dump the unwind codes for the method
+CONFIG_STRING(JitDumpFg, W("JitDumpFg")) // Dumps Xml/Dot Flowgraph for specified method
+CONFIG_STRING(JitDumpFgDir, W("JitDumpFgDir")) // Directory for Xml/Dot flowgraph dump(s)
+CONFIG_STRING(JitDumpFgFile, W("JitDumpFgFile")) // Filename for Xml/Dot flowgraph dump(s)
+CONFIG_STRING(JitDumpFgPhase, W("JitDumpFgPhase")) // Phase-based Xml/Dot flowgraph support. Set to the short name of a
+ // phase to see the flowgraph after that phase. Leave unset to dump
+ // after COLD-BLK (determine first cold block) or set to * for all
+ // phases
+CONFIG_STRING(JitDumpIRFormat, W("JitDumpIRFormat")) // Comma separated format control for JitDumpIR, values = {types |
+ // locals | ssa | valnums | kinds | flags | nodes | nolists |
+ // nostmts | noleafs | trees | dataflow}
+CONFIG_STRING(JitDumpIRPhase, W("JitDumpIRPhase")) // Phase control for JitDumpIR, values = {* | phasename}
+CONFIG_STRING(JitLateDisasmTo, W("JITLateDisasmTo"))
+CONFIG_STRING(JitRange, W("JitRange"))
+CONFIG_STRING(JitStressModeNames, W("JitStressModeNames")) // Internal Jit stress mode: stress using the given set of
+ // stress mode names, e.g. STRESS_REGS, STRESS_TAILCALL
+CONFIG_STRING(JitStressModeNamesNot, W("JitStressModeNamesNot")) // Internal Jit stress mode: do NOT stress using the
+ // given set of stress mode names, e.g. STRESS_REGS,
+ // STRESS_TAILCALL
+CONFIG_STRING(JitStressRange, W("JitStressRange")) // Internal Jit stress mode
+CONFIG_STRING(NgenDumpFg, W("NgenDumpFg")) // Ngen Xml Flowgraph support
+CONFIG_STRING(NgenDumpFgDir, W("NgenDumpFgDir")) // Ngen Xml Flowgraph support
+CONFIG_STRING(NgenDumpFgFile, W("NgenDumpFgFile")) // Ngen Xml Flowgraph support
+CONFIG_STRING(NgenDumpIRFormat, W("NgenDumpIRFormat")) // Same as JitDumpIRFormat, but for ngen
+CONFIG_STRING(NgenDumpIRPhase, W("NgenDumpIRPhase")) // Same as JitDumpIRPhase, but for ngen
+#endif // defined(DEBUG)
+
+// AltJitAssertOnNYI should be 0 on targets where JIT is under developement or bring up stage, so as to facilitate
+// fallback to main JIT on hitting a NYI.
+#if defined(_TARGET_ARM64_) || defined(_TARGET_X86_)
+CONFIG_INTEGER(AltJitAssertOnNYI, W("AltJitAssertOnNYI"), 0) // Controls the AltJit behavior of NYI stuff
+#else // !defined(_TARGET_ARM64_) && !defined(_TARGET_X86_)
+CONFIG_INTEGER(AltJitAssertOnNYI, W("AltJitAssertOnNYI"), 1) // Controls the AltJit behavior of NYI stuff
+#endif // defined(_TARGET_ARM64_) || defined(_TARGET_X86_)
+
+#if defined(_TARGET_AMD64_)
+CONFIG_INTEGER(EnableAVX, W("EnableAVX"), 1) // Enable AVX instruction set for wide operations as default
+#else // !defined(_TARGET_AMD64_)
+CONFIG_INTEGER(EnableAVX, W("EnableAVX"), 0) // Enable AVX instruction set for wide operations as default
+#endif // defined(_TARGET_AMD64_)
+
+#if !defined(DEBUG) && !defined(_DEBUG)
+CONFIG_INTEGER(JitEnableNoWayAssert, W("JitEnableNoWayAssert"), 0)
+#else // defined(DEBUG) || defined(_DEBUG)
+CONFIG_INTEGER(JitEnableNoWayAssert, W("JitEnableNoWayAssert"), 1)
+#endif // !defined(DEBUG) && !defined(_DEBUG)
+
+CONFIG_INTEGER(JitAggressiveInlining, W("JitAggressiveInlining"), 0) // Aggressive inlining of all methods
+CONFIG_INTEGER(JitELTHookEnabled, W("JitELTHookEnabled"), 0) // On ARM, setting this will emit Enter/Leave/TailCall
+ // callbacks
+CONFIG_INTEGER(JitInlineSIMDMultiplier, W("JitInlineSIMDMultiplier"), 3)
+
+#if defined(FEATURE_ENABLE_NO_RANGE_CHECKS)
+CONFIG_INTEGER(JitNoRngChks, W("JitNoRngChks"), 0) // If 1, don't generate range checks
+#endif // defined(FEATURE_ENABLE_NO_RANGE_CHECKS)
+
+CONFIG_INTEGER(JitRegisterFP, W("JitRegisterFP"), 3) // Control FP enregistration
+CONFIG_INTEGER(JitTelemetry, W("JitTelemetry"), 1) // If non-zero, gather JIT telemetry data
+CONFIG_INTEGER(JitVNMapSelBudget, W("JitVNMapSelBudget"), 100) // Max # of MapSelect's considered for a particular
+ // top-level invocation.
+CONFIG_INTEGER(TailCallLoopOpt, W("TailCallLoopOpt"), 1) // Convert recursive tail calls to loops
+CONFIG_METHODSET(AltJit, W("AltJit")) // Enables AltJit and selectively limits it to the specified methods.
+CONFIG_METHODSET(AltJitNgen,
+ W("AltJitNgen")) // Enables AltJit for NGEN and selectively limits it to the specified methods.
+
+#if defined(ALT_JIT)
+CONFIG_STRING(AltJitExcludeAssemblies,
+ W("AltJitExcludeAssemblies")) // Do not use AltJit on this semicolon-delimited list of assemblies.
+#endif // defined(ALT_JIT)
+
+CONFIG_STRING(JitFuncInfoFile, W("JitFuncInfoLogFile")) // If set, gather JIT function info and write to this file.
+CONFIG_STRING(JitTimeLogCsv, W("JitTimeLogCsv")) // If set, gather JIT throughput data and write to a CSV file. This
+ // mode must be used in internal retail builds.
+CONFIG_STRING(TailCallOpt, W("TailCallOpt"))
+
+#if defined(DEBUG) || defined(INLINE_DATA)
+CONFIG_INTEGER(JitInlineDumpData, W("JitInlineDumpData"), 0)
+CONFIG_INTEGER(JitInlineDumpXml, W("JitInlineDumpXml"), 0) // 1 = full xml (all methods), 2 = minimal xml (only method
+ // with inlines)
+CONFIG_INTEGER(JitInlineLimit, W("JitInlineLimit"), -1)
+CONFIG_INTEGER(JitInlinePolicyDiscretionary, W("JitInlinePolicyDiscretionary"), 0)
+CONFIG_INTEGER(JitInlinePolicyFull, W("JitInlinePolicyFull"), 0)
+CONFIG_INTEGER(JitInlinePolicySize, W("JitInlinePolicySize"), 0)
+CONFIG_INTEGER(JitInlinePolicyReplay, W("JitInlinePolicyReplay"), 0)
+CONFIG_STRING(JitNoInlineRange, W("JitNoInlineRange"))
+CONFIG_STRING(JitInlineReplayFile, W("JitInlineReplayFile"))
+#endif // defined(DEBUG) || defined(INLINE_DATA)
+
+CONFIG_INTEGER(JitInlinePolicyLegacy, W("JitInlinePolicyLegacy"), 0)
+CONFIG_INTEGER(JitInlinePolicyModel, W("JitInlinePolicyModel"), 0)
+
+#undef CONFIG_INTEGER
+#undef CONFIG_STRING
+#undef CONFIG_METHODSET
diff --git a/src/jit/jiteh.cpp b/src/jit/jiteh.cpp
new file mode 100644
index 0000000000..b20c2f8a9a
--- /dev/null
+++ b/src/jit/jiteh.cpp
@@ -0,0 +1,4056 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX Exception Handling XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX "EHblkDsc" functions XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+/*****************************************************************************/
+
+BasicBlock* EHblkDsc::BBFilterLast()
+{
+ noway_assert(HasFilter());
+ noway_assert(ebdFilter != nullptr);
+ noway_assert(ebdHndBeg != nullptr);
+
+ // The last block of the filter is the block immediately preceding the first block of the handler.
+ return ebdHndBeg->bbPrev;
+}
+
+BasicBlock* EHblkDsc::ExFlowBlock()
+{
+ if (HasFilter())
+ {
+ return ebdFilter;
+ }
+ else
+ {
+ return ebdHndBeg;
+ }
+}
+
+bool EHblkDsc::InTryRegionILRange(BasicBlock* pBlk)
+{
+ // BBF_INTERNAL blocks may not have a valid bbCodeOffs. This function
+ // should only be used before any BBF_INTERNAL blocks have been added.
+ assert(!(pBlk->bbFlags & BBF_INTERNAL));
+
+ return Compiler::jitIsBetween(pBlk->bbCodeOffs, ebdTryBegOffs(), ebdTryEndOffs());
+}
+
+bool EHblkDsc::InFilterRegionILRange(BasicBlock* pBlk)
+{
+ // BBF_INTERNAL blocks may not have a valid bbCodeOffs. This function
+ // should only be used before any BBF_INTERNAL blocks have been added.
+ assert(!(pBlk->bbFlags & BBF_INTERNAL));
+
+ return HasFilter() && Compiler::jitIsBetween(pBlk->bbCodeOffs, ebdFilterBegOffs(), ebdFilterEndOffs());
+}
+
+bool EHblkDsc::InHndRegionILRange(BasicBlock* pBlk)
+{
+ // BBF_INTERNAL blocks may not have a valid bbCodeOffs. This function
+ // should only be used before any BBF_INTERNAL blocks have been added.
+ assert(!(pBlk->bbFlags & BBF_INTERNAL));
+
+ return Compiler::jitIsBetween(pBlk->bbCodeOffs, ebdHndBegOffs(), ebdHndEndOffs());
+}
+
+// HasCatchHandler: returns 'true' for either try/catch, or try/filter/filter-handler.
+bool EHblkDsc::HasCatchHandler()
+{
+ return (ebdHandlerType == EH_HANDLER_CATCH) || (ebdHandlerType == EH_HANDLER_FILTER);
+}
+
+bool EHblkDsc::HasFilter()
+{
+ return ebdHandlerType == EH_HANDLER_FILTER;
+}
+
+bool EHblkDsc::HasFinallyHandler()
+{
+ return ebdHandlerType == EH_HANDLER_FINALLY;
+}
+
+bool EHblkDsc::HasFaultHandler()
+{
+ return ebdHandlerType == EH_HANDLER_FAULT;
+}
+
+bool EHblkDsc::HasFinallyOrFaultHandler()
+{
+ return HasFinallyHandler() || HasFaultHandler();
+}
+
+/*****************************************************************************
+ * Returns true if pBlk is a block in the range [pStart..pEnd).
+ * The check is inclusive of pStart, exclusive of pEnd.
+ */
+
+bool EHblkDsc::InBBRange(BasicBlock* pBlk, BasicBlock* pStart, BasicBlock* pEnd)
+{
+ for (BasicBlock* pWalk = pStart; pWalk != pEnd; pWalk = pWalk->bbNext)
+ {
+ if (pWalk == pBlk)
+ {
+ return true;
+ }
+ }
+ return false;
+}
+
+bool EHblkDsc::InTryRegionBBRange(BasicBlock* pBlk)
+{
+ return InBBRange(pBlk, ebdTryBeg, ebdTryLast->bbNext);
+}
+
+bool EHblkDsc::InFilterRegionBBRange(BasicBlock* pBlk)
+{
+ return HasFilter() && InBBRange(pBlk, ebdFilter, ebdHndBeg);
+}
+
+bool EHblkDsc::InHndRegionBBRange(BasicBlock* pBlk)
+{
+ return InBBRange(pBlk, ebdHndBeg, ebdHndLast->bbNext);
+}
+
+unsigned EHblkDsc::ebdGetEnclosingRegionIndex(bool* inTryRegion)
+{
+ if ((ebdEnclosingTryIndex == NO_ENCLOSING_INDEX) && (ebdEnclosingHndIndex == NO_ENCLOSING_INDEX))
+ {
+ return NO_ENCLOSING_INDEX;
+ }
+ else if (ebdEnclosingTryIndex == NO_ENCLOSING_INDEX)
+ {
+ assert(ebdEnclosingHndIndex != NO_ENCLOSING_INDEX);
+ *inTryRegion = false;
+ return ebdEnclosingHndIndex;
+ }
+ else if (ebdEnclosingHndIndex == NO_ENCLOSING_INDEX)
+ {
+ assert(ebdEnclosingTryIndex != NO_ENCLOSING_INDEX);
+ *inTryRegion = true;
+ return ebdEnclosingTryIndex;
+ }
+ else
+ {
+ assert(ebdEnclosingTryIndex != NO_ENCLOSING_INDEX);
+ assert(ebdEnclosingHndIndex != NO_ENCLOSING_INDEX);
+ assert(ebdEnclosingTryIndex != ebdEnclosingHndIndex);
+ if (ebdEnclosingTryIndex < ebdEnclosingHndIndex)
+ {
+ *inTryRegion = true;
+ return ebdEnclosingTryIndex;
+ }
+ else
+ {
+ *inTryRegion = false;
+ return ebdEnclosingHndIndex;
+ }
+ }
+}
+
+/*****************************************************************************/
+
+// We used to assert that the IL offsets in the EH table matched the IL offset stored
+// on the blocks pointed to by the try/filter/handler block pointers. This is true at
+// import time, but can fail to be true later in compilation when we start doing
+// flow optimizations.
+//
+// That being said, the IL offsets in the EH table should only be examined early,
+// during importing. After importing, use block info instead.
+
+IL_OFFSET EHblkDsc::ebdTryBegOffs()
+{
+ return ebdTryBegOffset;
+}
+
+IL_OFFSET EHblkDsc::ebdTryEndOffs()
+{
+ return ebdTryEndOffset;
+}
+
+IL_OFFSET EHblkDsc::ebdHndBegOffs()
+{
+ return ebdHndBegOffset;
+}
+
+IL_OFFSET EHblkDsc::ebdHndEndOffs()
+{
+ return ebdHndEndOffset;
+}
+
+IL_OFFSET EHblkDsc::ebdFilterBegOffs()
+{
+ assert(HasFilter());
+ return ebdFilterBegOffset;
+}
+
+IL_OFFSET EHblkDsc::ebdFilterEndOffs()
+{
+ assert(HasFilter());
+ return ebdHndBegOffs(); // end of filter is beginning of handler
+}
+
+/* static */
+bool EHblkDsc::ebdIsSameILTry(EHblkDsc* h1, EHblkDsc* h2)
+{
+ return ((h1->ebdTryBegOffset == h2->ebdTryBegOffset) && (h1->ebdTryEndOffset == h2->ebdTryEndOffset));
+}
+
+/*****************************************************************************/
+
+/* static */
+bool EHblkDsc::ebdIsSameTry(EHblkDsc* h1, EHblkDsc* h2)
+{
+ return ((h1->ebdTryBeg == h2->ebdTryBeg) && (h1->ebdTryLast == h2->ebdTryLast));
+}
+
+bool EHblkDsc::ebdIsSameTry(Compiler* comp, unsigned t2)
+{
+ EHblkDsc* h2 = comp->ehGetDsc(t2);
+ return ebdIsSameTry(this, h2);
+}
+
+bool EHblkDsc::ebdIsSameTry(BasicBlock* ebdTryBeg, BasicBlock* ebdTryLast)
+{
+ return ((this->ebdTryBeg == ebdTryBeg) && (this->ebdTryLast == ebdTryLast));
+}
+
+/*****************************************************************************/
+#ifdef DEBUG
+/*****************************************************************************/
+
+void EHblkDsc::DispEntry(unsigned XTnum)
+{
+ printf(" %2u ::", XTnum);
+
+#if !FEATURE_EH_FUNCLETS
+ printf(" %2u ", XTnum, ebdHandlerNestingLevel);
+#endif // !FEATURE_EH_FUNCLETS
+
+ if (ebdEnclosingTryIndex == NO_ENCLOSING_INDEX)
+ {
+ printf(" ");
+ }
+ else
+ {
+ printf(" %2u ", ebdEnclosingTryIndex);
+ }
+
+ if (ebdEnclosingHndIndex == NO_ENCLOSING_INDEX)
+ {
+ printf(" ");
+ }
+ else
+ {
+ printf(" %2u ", ebdEnclosingHndIndex);
+ }
+
+ //////////////
+ ////////////// Protected (try) region
+ //////////////
+
+ printf("- Try at BB%02u..BB%02u", ebdTryBeg->bbNum, ebdTryLast->bbNum);
+
+ /* ( brace matching editor workaround to compensate for the following line */
+ printf(" [%03X..%03X), ", ebdTryBegOffset, ebdTryEndOffset);
+
+ //////////////
+ ////////////// Filter region
+ //////////////
+
+ if (HasFilter())
+ {
+ /* ( brace matching editor workaround to compensate for the following line */
+ printf("Filter at BB%02u..BB%02u [%03X..%03X), ", ebdFilter->bbNum, BBFilterLast()->bbNum, ebdFilterBegOffset,
+ ebdHndBegOffset);
+ }
+
+ //////////////
+ ////////////// Handler region
+ //////////////
+
+ if (ebdHndBeg->bbCatchTyp == BBCT_FINALLY)
+ {
+ printf("Finally");
+ }
+ else if (ebdHndBeg->bbCatchTyp == BBCT_FAULT)
+ {
+ printf("Fault ");
+ }
+ else
+ {
+ printf("Handler");
+ }
+
+ printf(" at BB%02u..BB%02u", ebdHndBeg->bbNum, ebdHndLast->bbNum);
+
+ /* ( brace matching editor workaround to compensate for the following line */
+ printf(" [%03X..%03X)", ebdHndBegOffset, ebdHndEndOffset);
+
+ printf("\n");
+}
+
+/*****************************************************************************/
+#endif // DEBUG
+/*****************************************************************************/
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX "Compiler" functions XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+bool Compiler::bbInCatchHandlerILRange(BasicBlock* blk)
+{
+ EHblkDsc* HBtab = ehGetBlockHndDsc(blk);
+
+ if (HBtab == nullptr)
+ {
+ return false;
+ }
+
+ return HBtab->HasCatchHandler() && HBtab->InHndRegionILRange(blk);
+}
+
+bool Compiler::bbInFilterILRange(BasicBlock* blk)
+{
+ EHblkDsc* HBtab = ehGetBlockHndDsc(blk);
+
+ if (HBtab == nullptr)
+ {
+ return false;
+ }
+
+ return HBtab->InFilterRegionILRange(blk);
+}
+
+// Given a handler region, find the innermost try region that contains it.
+// NOTE: handlerIndex is 1-based (0 means no handler).
+unsigned short Compiler::bbFindInnermostTryRegionContainingHandlerRegion(unsigned handlerIndex)
+{
+ if (handlerIndex > 0)
+ {
+ unsigned XTnum;
+ EHblkDsc* ehDsc;
+ BasicBlock* blk = ehGetDsc(handlerIndex - 1)->ebdHndBeg;
+
+ // handlerIndex is 1 based, therefore our interesting clauses start from clause compHndBBtab[handlerIndex]
+ EHblkDsc* ehDscEnd = compHndBBtab + compHndBBtabCount;
+ for (ehDsc = compHndBBtab + handlerIndex, XTnum = handlerIndex; ehDsc < ehDscEnd; ehDsc++, XTnum++)
+ {
+ if (bbInTryRegions(XTnum, blk))
+ {
+ noway_assert(XTnum < MAX_XCPTN_INDEX);
+ return (unsigned short)(XTnum + 1); // Return the tryIndex
+ }
+ }
+ }
+
+ return 0;
+}
+
+// Given a try region, find the innermost handler region that contains it.
+// NOTE: tryIndex is 1-based (0 means no handler).
+unsigned short Compiler::bbFindInnermostHandlerRegionContainingTryRegion(unsigned tryIndex)
+{
+ if (tryIndex > 0)
+ {
+ unsigned XTnum;
+ EHblkDsc* ehDsc;
+ BasicBlock* blk = ehGetDsc(tryIndex - 1)->ebdTryBeg;
+
+ // tryIndex is 1 based, our interesting clauses start from clause compHndBBtab[tryIndex]
+ EHblkDsc* ehDscEnd = compHndBBtab + compHndBBtabCount;
+ for (ehDsc = compHndBBtab + tryIndex, XTnum = tryIndex; ehDsc < ehDscEnd; ehDsc++, XTnum++)
+ {
+ if (bbInHandlerRegions(XTnum, blk))
+ {
+ noway_assert(XTnum < MAX_XCPTN_INDEX);
+ return (unsigned short)(XTnum + 1); // Return the handlerIndex
+ }
+ }
+ }
+
+ return 0;
+}
+
+/*
+ Given a block and a try region index, check to see if the block is within
+ the try body. For this check, a funclet is considered to be in the region
+ it was extracted from.
+*/
+bool Compiler::bbInTryRegions(unsigned regionIndex, BasicBlock* blk)
+{
+ assert(regionIndex < EHblkDsc::NO_ENCLOSING_INDEX);
+ unsigned tryIndex = blk->hasTryIndex() ? blk->getTryIndex() : EHblkDsc::NO_ENCLOSING_INDEX;
+
+ // Loop outward until we find an enclosing try that is the same as the one
+ // we are looking for or an outer/later one
+ while (tryIndex < regionIndex)
+ {
+ tryIndex = ehGetEnclosingTryIndex(tryIndex);
+ }
+
+ // Now we have the index of 2 try bodies, either they match or not!
+ return (tryIndex == regionIndex);
+}
+
+//------------------------------------------------------------------------
+// bbInExnFlowRegions:
+// Check to see if an exception raised in the given block could be
+// handled by the given region (possibly after inner regions).
+//
+// Arguments:
+// regionIndex - Check if this region can handle exceptions from 'blk'
+// blk - Consider exceptions raised from this block
+//
+// Return Value:
+// true - The region with index 'regionIndex' can handle exceptions from 'blk'
+// false - The region with index 'regionIndex' can't handle exceptions from 'blk'
+//
+// Notes:
+// For this check, a funclet is considered to be in the region it was
+// extracted from.
+
+bool Compiler::bbInExnFlowRegions(unsigned regionIndex, BasicBlock* blk)
+{
+ assert(regionIndex < EHblkDsc::NO_ENCLOSING_INDEX);
+ EHblkDsc* ExnFlowRegion = ehGetBlockExnFlowDsc(blk);
+ unsigned tryIndex = (ExnFlowRegion == nullptr ? EHblkDsc::NO_ENCLOSING_INDEX : ehGetIndex(ExnFlowRegion));
+
+ // Loop outward until we find an enclosing try that is the same as the one
+ // we are looking for or an outer/later one
+ while (tryIndex < regionIndex)
+ {
+ tryIndex = ehGetEnclosingTryIndex(tryIndex);
+ }
+
+ // Now we have the index of 2 try bodies, either they match or not!
+ return (tryIndex == regionIndex);
+}
+
+/*
+ Given a block, check to see if it is in the handler block of the EH descriptor.
+ For this check, a funclet is considered to be in the region it was extracted from.
+*/
+bool Compiler::bbInHandlerRegions(unsigned regionIndex, BasicBlock* blk)
+{
+ assert(regionIndex < EHblkDsc::NO_ENCLOSING_INDEX);
+ unsigned hndIndex = blk->hasHndIndex() ? blk->getHndIndex() : EHblkDsc::NO_ENCLOSING_INDEX;
+
+ // We can't use the same simple trick here because there is no required ordering
+ // of handlers (which also have no required ordering with respect to their try
+ // bodies).
+ while (hndIndex < EHblkDsc::NO_ENCLOSING_INDEX && hndIndex != regionIndex)
+ {
+ hndIndex = ehGetEnclosingHndIndex(hndIndex);
+ }
+
+ // Now we have the index of 2 try bodies, either they match or not!
+ return (hndIndex == regionIndex);
+}
+
+/*
+ Given a hndBlk, see if it is in one of tryBlk's catch handler regions.
+
+ Since we create one EHblkDsc for each "catch" of a "try", we might end up
+ with multiple EHblkDsc's that have the same ebdTryBeg and ebdTryLast, but different
+ ebdHndBeg and ebdHndLast. Unfortunately getTryIndex() only returns the index of the first EHblkDsc.
+
+ E.g. The following example shows that BB02 has a catch in BB03 and another catch in BB04.
+
+ index nest, enclosing
+ 0 :: 0, 1 - Try at BB01..BB02 [000..008], Handler at BB03 [009..016]
+ 1 :: 0, - Try at BB01..BB02 [000..008], Handler at BB04 [017..022]
+
+ This function will return true for
+ bbInCatchHandlerRegions(BB02, BB03) and bbInCatchHandlerRegions(BB02, BB04)
+
+*/
+bool Compiler::bbInCatchHandlerRegions(BasicBlock* tryBlk, BasicBlock* hndBlk)
+{
+ assert(tryBlk->hasTryIndex());
+ if (!hndBlk->hasHndIndex())
+ {
+ return false;
+ }
+
+ unsigned XTnum = tryBlk->getTryIndex();
+ EHblkDsc* firstEHblkDsc = ehGetDsc(XTnum);
+ EHblkDsc* ehDsc = firstEHblkDsc;
+
+ // Rather than searching the whole list, take advantage of our sorting.
+ // We will only match against blocks with the same try body (mutually
+ // protect regions). Because of our sort ordering, such regions will
+ // always be immediately adjacent, any nested regions will be before the
+ // first of the set, and any outer regions will be after the last.
+ // Also siblings will be before or after according to their location,
+ // but never in between;
+
+ while (XTnum > 0)
+ {
+ assert(EHblkDsc::ebdIsSameTry(firstEHblkDsc, ehDsc));
+
+ // Stop when the previous region is not mutually protect
+ if (!EHblkDsc::ebdIsSameTry(firstEHblkDsc, ehDsc - 1))
+ {
+ break;
+ }
+
+ ehDsc--;
+ XTnum--;
+ }
+
+ // XTnum and ehDsc are now referring to the first region in the set of
+ // mutually protect regions.
+ assert(EHblkDsc::ebdIsSameTry(firstEHblkDsc, ehDsc));
+ assert((ehDsc == compHndBBtab) || !EHblkDsc::ebdIsSameTry(firstEHblkDsc, ehDsc - 1));
+
+ do
+ {
+ if (ehDsc->HasCatchHandler() && bbInHandlerRegions(XTnum, hndBlk))
+ {
+ return true;
+ }
+ XTnum++;
+ ehDsc++;
+ } while (XTnum < compHndBBtabCount && EHblkDsc::ebdIsSameTry(firstEHblkDsc, ehDsc));
+
+ return false;
+}
+
+/******************************************************************************************
+ * Give two blocks, return the inner-most enclosing try region that contains both of them.
+ * Return 0 if it does not find any try region (which means the inner-most region
+ * is the method itself).
+ */
+
+unsigned short Compiler::bbFindInnermostCommonTryRegion(BasicBlock* bbOne, BasicBlock* bbTwo)
+{
+ unsigned XTnum;
+
+ for (XTnum = 0; XTnum < compHndBBtabCount; XTnum++)
+ {
+ if (bbInTryRegions(XTnum, bbOne) && bbInTryRegions(XTnum, bbTwo))
+ {
+ noway_assert(XTnum < MAX_XCPTN_INDEX);
+ return (unsigned short)(XTnum + 1); // Return the tryIndex
+ }
+ }
+
+ return 0;
+}
+
+// bbIsTryBeg() returns true if this block is the start of any try region.
+// This is computed by examining the current values in the
+// EH table rather than just looking at the block->bbFlags.
+//
+// Note that a block is the beginning of any try region if it is the beginning of the
+// most nested try region it is a member of. Thus, we only need to check the EH
+// table entry related to the try index stored on the block.
+//
+bool Compiler::bbIsTryBeg(BasicBlock* block)
+{
+ EHblkDsc* ehDsc = ehGetBlockTryDsc(block);
+ return (ehDsc != nullptr) && (block == ehDsc->ebdTryBeg);
+}
+
+// bbIsHanderBeg() returns true if "block" is the start of any handler or filter.
+// Note that if a block is the beginning of a handler or filter, it must be the beginning
+// of the most nested handler or filter region it is in. Thus, we only need to look at the EH
+// descriptor corresponding to the handler index on the block.
+//
+bool Compiler::bbIsHandlerBeg(BasicBlock* block)
+{
+ EHblkDsc* ehDsc = ehGetBlockHndDsc(block);
+ return (ehDsc != nullptr) && ((block == ehDsc->ebdHndBeg) || (ehDsc->HasFilter() && (block == ehDsc->ebdFilter)));
+}
+
+bool Compiler::bbIsExFlowBlock(BasicBlock* block, unsigned* regionIndex)
+{
+ if (block->hasHndIndex())
+ {
+ *regionIndex = block->getHndIndex();
+ return block == ehGetDsc(*regionIndex)->ExFlowBlock();
+ }
+ else
+ {
+ return false;
+ }
+}
+
+bool Compiler::ehHasCallableHandlers()
+{
+#if FEATURE_EH_FUNCLETS
+
+ // Any EH in the function?
+
+ return compHndBBtabCount > 0;
+
+#else // FEATURE_EH_FUNCLETS
+
+ return ehNeedsShadowSPslots();
+
+#endif // FEATURE_EH_FUNCLETS
+}
+
+/******************************************************************************************
+ * Determine if 'block' is the last block of an EH 'try' or handler (ignoring filters). If so,
+ * return the EH descriptor pointer for that EH region. Otherwise, return nullptr.
+ */
+EHblkDsc* Compiler::ehIsBlockTryLast(BasicBlock* block)
+{
+ EHblkDsc* HBtab = ehGetBlockTryDsc(block);
+ if ((HBtab != nullptr) && (HBtab->ebdTryLast == block))
+ {
+ return HBtab;
+ }
+ return nullptr;
+}
+
+EHblkDsc* Compiler::ehIsBlockHndLast(BasicBlock* block)
+{
+ EHblkDsc* HBtab = ehGetBlockHndDsc(block);
+ if ((HBtab != nullptr) && (HBtab->ebdHndLast == block))
+ {
+ return HBtab;
+ }
+ return nullptr;
+}
+
+bool Compiler::ehIsBlockEHLast(BasicBlock* block)
+{
+ return (ehIsBlockTryLast(block) != nullptr) || (ehIsBlockHndLast(block) != nullptr);
+}
+
+//------------------------------------------------------------------------
+// ehGetBlockExnFlowDsc:
+// Get the EH descriptor for the most nested region (if any) that may
+// handle exceptions raised in the given block
+//
+// Arguments:
+// block - Consider exceptions raised from this block
+//
+// Return Value:
+// nullptr - The given block's exceptions propagate to caller
+// non-null - This region is the innermost handler for exceptions raised in
+// the given block
+
+EHblkDsc* Compiler::ehGetBlockExnFlowDsc(BasicBlock* block)
+{
+ EHblkDsc* hndDesc = ehGetBlockHndDsc(block);
+
+ if ((hndDesc != nullptr) && hndDesc->InFilterRegionBBRange(block))
+ {
+ // If an exception is thrown in a filter (or escapes a callee in a filter),
+ // or if exception_continue_search (0/false) is returned at
+ // the end of a filter, the (original) exception is propagated to
+ // the next outer handler. The "next outer handler" is the handler
+ // of the try region enclosing the try that the filter protects.
+ // This may not be the same as the try region enclosing the filter,
+ // e.g. in cases like this:
+ // try {
+ // ...
+ // } filter (filter-part) {
+ // handler-part
+ // } catch { (or finally/fault/filter)
+ // which is represented as two EHblkDscs with the same try range,
+ // the inner protected by a filter and the outer protected by the
+ // other handler; exceptions in the filter-part propagate to the
+ // other handler, even though the other handler's try region does not
+ // enclose the filter.
+
+ unsigned outerIndex = hndDesc->ebdEnclosingTryIndex;
+
+ if (outerIndex == EHblkDsc::NO_ENCLOSING_INDEX)
+ {
+ assert(!block->hasTryIndex());
+ return nullptr;
+ }
+ return ehGetDsc(outerIndex);
+ }
+
+ return ehGetBlockTryDsc(block);
+}
+
+bool Compiler::ehBlockHasExnFlowDsc(BasicBlock* block)
+{
+ if (block->hasTryIndex())
+ {
+ return true;
+ }
+
+ EHblkDsc* hndDesc = ehGetBlockHndDsc(block);
+
+ return ((hndDesc != nullptr) && hndDesc->InFilterRegionBBRange(block) &&
+ (hndDesc->ebdEnclosingTryIndex != EHblkDsc::NO_ENCLOSING_INDEX));
+}
+
+//------------------------------------------------------------------------
+// ehGetMostNestedRegionIndex: Return the region index of the most nested EH region this block is in.
+// The return value is in the range [0..compHndBBtabCount]. It is same scale as bbTryIndex/bbHndIndex:
+// 0 means main method, N is used as an index to compHndBBtab[N - 1]. If we don't return 0, then
+// *inTryRegion indicates whether the most nested region for the block is a 'try' clause or
+// filter/handler clause. For 0 return, *inTryRegion is set to true.
+//
+// Arguments:
+// block - the BasicBlock we want the region index for.
+// inTryRegion - an out parameter. As described above.
+//
+// Return Value:
+// As described above.
+//
+unsigned Compiler::ehGetMostNestedRegionIndex(BasicBlock* block, bool* inTryRegion)
+{
+ assert(block != nullptr);
+ assert(inTryRegion != nullptr);
+
+ unsigned mostNestedRegion;
+ if (block->bbHndIndex == 0)
+ {
+ mostNestedRegion = block->bbTryIndex;
+ *inTryRegion = true;
+ }
+ else if (block->bbTryIndex == 0)
+ {
+ mostNestedRegion = block->bbHndIndex;
+ *inTryRegion = false;
+ }
+ else
+ {
+ if (block->bbTryIndex < block->bbHndIndex)
+ {
+ mostNestedRegion = block->bbTryIndex;
+ *inTryRegion = true;
+ }
+ else
+ {
+ assert(block->bbTryIndex != block->bbHndIndex); // A block can't be both in the 'try' and 'handler' region
+ // of the same EH region
+ mostNestedRegion = block->bbHndIndex;
+ *inTryRegion = false;
+ }
+ }
+
+ assert(mostNestedRegion <= compHndBBtabCount);
+ return mostNestedRegion;
+}
+
+/*****************************************************************************
+ * Returns the try index of the enclosing try, skipping all EH regions with the
+ * same try region (that is, all 'mutual protect' regions). If there is no such
+ * enclosing try, returns EHblkDsc::NO_ENCLOSING_INDEX.
+ */
+unsigned Compiler::ehTrueEnclosingTryIndexIL(unsigned regionIndex)
+{
+ assert(regionIndex != EHblkDsc::NO_ENCLOSING_INDEX);
+
+ EHblkDsc* ehDscRoot = ehGetDsc(regionIndex);
+ EHblkDsc* HBtab = ehDscRoot;
+
+ for (;;)
+ {
+ regionIndex = HBtab->ebdEnclosingTryIndex;
+ if (regionIndex == EHblkDsc::NO_ENCLOSING_INDEX)
+ {
+ // No enclosing 'try'; we're done
+ break;
+ }
+
+ HBtab = ehGetDsc(regionIndex);
+ if (!EHblkDsc::ebdIsSameILTry(ehDscRoot, HBtab))
+ {
+ // Found an enclosing 'try' that has a different 'try' region (is not mutually-protect with the
+ // original region). Return it.
+ break;
+ }
+ }
+
+ return regionIndex;
+}
+
+unsigned Compiler::ehGetEnclosingRegionIndex(unsigned regionIndex, bool* inTryRegion)
+{
+ assert(regionIndex != EHblkDsc::NO_ENCLOSING_INDEX);
+
+ EHblkDsc* ehDsc = ehGetDsc(regionIndex);
+ return ehDsc->ebdGetEnclosingRegionIndex(inTryRegion);
+}
+
+/*****************************************************************************
+ * The argument 'block' has been deleted. Update the EH table so 'block' is no longer listed
+ * as a 'last' block. You can't delete a 'begin' block this way.
+ */
+void Compiler::ehUpdateForDeletedBlock(BasicBlock* block)
+{
+ assert(block->bbFlags & BBF_REMOVED);
+
+ if (!block->hasTryIndex() && !block->hasHndIndex())
+ {
+ // The block is not part of any EH region, so there is nothing to do.
+ return;
+ }
+
+ BasicBlock* bPrev = block->bbPrev;
+ assert(bPrev != nullptr);
+
+ ehUpdateLastBlocks(block, bPrev);
+}
+
+/*****************************************************************************
+ * Determine if an empty block can be deleted, and still preserve the EH normalization
+ * rules on blocks.
+ *
+ * We only consider the case where the block to be deleted is the last block of a region,
+ * and the region is being contracted such that the previous block will become the new
+ * 'last' block. If this previous block is already a 'last' block, then we can't do the
+ * delete, as that would cause a single block to be the 'last' block of multiple regions.
+ */
+bool Compiler::ehCanDeleteEmptyBlock(BasicBlock* block)
+{
+ assert(block->isEmpty());
+
+ return true;
+
+#if 0 // This is disabled while the "multiple last block" normalization is disabled
+ if (!fgNormalizeEHDone)
+ {
+ return true;
+ }
+
+ if (ehIsBlockEHLast(block))
+ {
+ BasicBlock* bPrev = block->bbPrev;
+ if ((bPrev != nullptr) && ehIsBlockEHLast(bPrev))
+ {
+ return false;
+ }
+ }
+
+ return true;
+#endif // 0
+}
+
+/*****************************************************************************
+ * The 'last' block of one or more EH regions might have changed. Update the EH table.
+ * This can happen if the EH region shrinks, where one or more blocks have been removed
+ * from the region. It can happen if the EH region grows, where one or more blocks
+ * have been added at the end of the region.
+ *
+ * We might like to verify the handler table integrity after doing this update, but we
+ * can't because this might just be one step by the caller in a transformation back to
+ * a legal state.
+ *
+ * Arguments:
+ * oldLast -- Search for this block as the 'last' block of one or more EH regions.
+ * newLast -- If 'oldLast' is found to be the 'last' block of an EH region, replace it by 'newLast'.
+ */
+void Compiler::ehUpdateLastBlocks(BasicBlock* oldLast, BasicBlock* newLast)
+{
+ EHblkDsc* HBtab;
+ EHblkDsc* HBtabEnd;
+
+ for (HBtab = compHndBBtab, HBtabEnd = compHndBBtab + compHndBBtabCount; HBtab < HBtabEnd; HBtab++)
+ {
+ if (HBtab->ebdTryLast == oldLast)
+ {
+ fgSetTryEnd(HBtab, newLast);
+ }
+ if (HBtab->ebdHndLast == oldLast)
+ {
+ fgSetHndEnd(HBtab, newLast);
+ }
+ }
+}
+
+unsigned Compiler::ehGetCallFinallyRegionIndex(unsigned finallyIndex, bool* inTryRegion)
+{
+ assert(finallyIndex != EHblkDsc::NO_ENCLOSING_INDEX);
+ assert(ehGetDsc(finallyIndex)->HasFinallyHandler());
+
+#if defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_)
+ return ehGetDsc(finallyIndex)->ebdGetEnclosingRegionIndex(inTryRegion);
+#else
+ *inTryRegion = true;
+ return finallyIndex;
+#endif
+}
+
+void Compiler::ehGetCallFinallyBlockRange(unsigned finallyIndex, BasicBlock** begBlk, BasicBlock** endBlk)
+{
+ assert(finallyIndex != EHblkDsc::NO_ENCLOSING_INDEX);
+ assert(ehGetDsc(finallyIndex)->HasFinallyHandler());
+ assert(begBlk != nullptr);
+ assert(endBlk != nullptr);
+
+ EHblkDsc* ehDsc = ehGetDsc(finallyIndex);
+
+#if FEATURE_EH_CALLFINALLY_THUNKS
+ bool inTryRegion;
+ unsigned callFinallyRegionIndex = ehGetCallFinallyRegionIndex(finallyIndex, &inTryRegion);
+
+ if (callFinallyRegionIndex == EHblkDsc::NO_ENCLOSING_INDEX)
+ {
+ *begBlk = fgFirstBB;
+ *endBlk = fgEndBBAfterMainFunction();
+ }
+ else
+ {
+ EHblkDsc* ehDsc = ehGetDsc(callFinallyRegionIndex);
+
+ if (inTryRegion)
+ {
+ *begBlk = ehDsc->ebdTryBeg;
+ *endBlk = ehDsc->ebdTryLast->bbNext;
+ }
+ else
+ {
+ *begBlk = ehDsc->ebdHndBeg;
+ *endBlk = ehDsc->ebdHndLast->bbNext;
+ }
+ }
+#else // !FEATURE_EH_CALLFINALLY_THUNKS
+ *begBlk = ehDsc->ebdTryBeg;
+ *endBlk = ehDsc->ebdTryLast->bbNext;
+#endif // !FEATURE_EH_CALLFINALLY_THUNKS
+}
+
+#ifdef DEBUG
+
+bool Compiler::ehCallFinallyInCorrectRegion(BasicBlock* blockCallFinally, unsigned finallyIndex)
+{
+ assert(blockCallFinally->bbJumpKind == BBJ_CALLFINALLY);
+ assert(finallyIndex != EHblkDsc::NO_ENCLOSING_INDEX);
+ assert(finallyIndex < compHndBBtabCount);
+ assert(ehGetDsc(finallyIndex)->HasFinallyHandler());
+
+ bool inTryRegion;
+ unsigned callFinallyIndex = ehGetCallFinallyRegionIndex(finallyIndex, &inTryRegion);
+ if (callFinallyIndex == EHblkDsc::NO_ENCLOSING_INDEX)
+ {
+ if (blockCallFinally->hasTryIndex() || blockCallFinally->hasHndIndex())
+ {
+ // The BBJ_CALLFINALLY is supposed to be in the main function body, not in any EH region.
+ return false;
+ }
+ else
+ {
+ return true;
+ }
+ }
+ else
+ {
+ if (inTryRegion)
+ {
+ if (bbInTryRegions(callFinallyIndex, blockCallFinally))
+ {
+ return true;
+ }
+ }
+ else
+ {
+ if (bbInHandlerRegions(callFinallyIndex, blockCallFinally))
+ {
+ return true;
+ }
+ }
+ }
+
+ return false;
+}
+
+#endif // DEBUG
+
+#if FEATURE_EH_FUNCLETS
+
+/*****************************************************************************
+ *
+ * Are there (or will there be) any funclets in the function?
+ */
+
+bool Compiler::ehAnyFunclets()
+{
+ return compHndBBtabCount > 0; // if there is any EH, there will be funclets
+}
+
+/*****************************************************************************
+ *
+ * Count the number of EH funclets in the function. This will return the number
+ * there will be after funclets have been created, but because it runs over the
+ * EH table, it is accurate at any time.
+ */
+
+unsigned Compiler::ehFuncletCount()
+{
+ unsigned funcletCnt = 0;
+ EHblkDsc* HBtab;
+ EHblkDsc* HBtabEnd;
+
+ for (HBtab = compHndBBtab, HBtabEnd = compHndBBtab + compHndBBtabCount; HBtab < HBtabEnd; HBtab++)
+ {
+ if (HBtab->HasFilter())
+ {
+ ++funcletCnt;
+ }
+ ++funcletCnt;
+ }
+ return funcletCnt;
+}
+
+/*****************************************************************************
+ *
+ * Get the index to use as the cache key for sharing throw blocks.
+ * For non-funclet platforms, this is just the block's bbTryIndex, to ensure
+ * that throw is protected by the correct set of trys. However, when we have
+ * funclets we also have to ensure that the throw blocks are *not* shared
+ * across funclets, so we use EHblkDsc index of either the funclet or
+ * the containing try region, whichever is inner-most. We differentiate
+ * between the 3 cases by setting the high bits (0 = try, 1 = handler,
+ * 2 = filter)
+ *
+ */
+unsigned Compiler::bbThrowIndex(BasicBlock* blk)
+{
+ if (!blk->hasTryIndex() && !blk->hasHndIndex())
+ {
+ return -1;
+ }
+
+ const unsigned tryIndex = blk->hasTryIndex() ? blk->getTryIndex() : USHRT_MAX;
+ const unsigned hndIndex = blk->hasHndIndex() ? blk->getHndIndex() : USHRT_MAX;
+ assert(tryIndex != hndIndex);
+ assert(tryIndex != USHRT_MAX || hndIndex != USHRT_MAX);
+
+ if (tryIndex < hndIndex)
+ {
+ // The most enclosing region is a try body, use it
+ assert(tryIndex <= 0x3FFFFFFF);
+ return tryIndex;
+ }
+
+ // The most enclosing region is a handler which will be a funclet
+ // Now we have to figure out if blk is in the filter or handler
+ assert(hndIndex <= 0x3FFFFFFF);
+ if (ehGetDsc(hndIndex)->InFilterRegionBBRange(blk))
+ {
+ return hndIndex | 0x40000000;
+ }
+
+ return hndIndex | 0x80000000;
+}
+
+#endif // FEATURE_EH_FUNCLETS
+
+/*****************************************************************************
+ * Determine the emitter code cookie for a block, for unwind purposes.
+ */
+
+void* Compiler::ehEmitCookie(BasicBlock* block)
+{
+ noway_assert(block);
+
+ void* cookie;
+
+#if FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
+ if (block->bbFlags & BBF_FINALLY_TARGET)
+ {
+ // Use the offset of the beginning of the NOP padding, not the main block.
+ // This might include loop head padding, too, if this is a loop head.
+ assert(block->bbUnwindNopEmitCookie); // probably not null-initialized, though, so this might not tell us
+ // anything
+ cookie = block->bbUnwindNopEmitCookie;
+ }
+ else
+#endif // FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
+ {
+ cookie = block->bbEmitCookie;
+ }
+
+ noway_assert(cookie != nullptr);
+ return cookie;
+}
+
+/*****************************************************************************
+ * Determine the emitter code offset for a block. If the block is a finally
+ * target, choose the offset of the NOP padding that precedes the block.
+ */
+
+UNATIVE_OFFSET Compiler::ehCodeOffset(BasicBlock* block)
+{
+ return genEmitter->emitCodeOffset(ehEmitCookie(block), 0);
+}
+
+/****************************************************************************/
+
+EHblkDsc* Compiler::ehInitHndRange(BasicBlock* blk, IL_OFFSET* hndBeg, IL_OFFSET* hndEnd, bool* inFilter)
+{
+ EHblkDsc* hndTab = ehGetBlockHndDsc(blk);
+ if (hndTab != nullptr)
+ {
+ if (hndTab->InFilterRegionILRange(blk))
+ {
+ *hndBeg = hndTab->ebdFilterBegOffs();
+ *hndEnd = hndTab->ebdFilterEndOffs();
+ *inFilter = true;
+ }
+ else
+ {
+ *hndBeg = hndTab->ebdHndBegOffs();
+ *hndEnd = hndTab->ebdHndEndOffs();
+ *inFilter = false;
+ }
+ }
+ else
+ {
+ *hndBeg = 0;
+ *hndEnd = info.compILCodeSize;
+ *inFilter = false;
+ }
+ return hndTab;
+}
+
+/****************************************************************************/
+
+EHblkDsc* Compiler::ehInitTryRange(BasicBlock* blk, IL_OFFSET* tryBeg, IL_OFFSET* tryEnd)
+{
+ EHblkDsc* tryTab = ehGetBlockTryDsc(blk);
+ if (tryTab != nullptr)
+ {
+ *tryBeg = tryTab->ebdTryBegOffs();
+ *tryEnd = tryTab->ebdTryEndOffs();
+ }
+ else
+ {
+ *tryBeg = 0;
+ *tryEnd = info.compILCodeSize;
+ }
+ return tryTab;
+}
+
+/****************************************************************************/
+
+EHblkDsc* Compiler::ehInitHndBlockRange(BasicBlock* blk, BasicBlock** hndBeg, BasicBlock** hndLast, bool* inFilter)
+{
+ EHblkDsc* hndTab = ehGetBlockHndDsc(blk);
+ if (hndTab != nullptr)
+ {
+ if (hndTab->InFilterRegionBBRange(blk))
+ {
+ *hndBeg = hndTab->ebdFilter;
+ if (hndLast != nullptr)
+ {
+ *hndLast = hndTab->BBFilterLast();
+ }
+ *inFilter = true;
+ }
+ else
+ {
+ *hndBeg = hndTab->ebdHndBeg;
+ if (hndLast != nullptr)
+ {
+ *hndLast = hndTab->ebdHndLast;
+ }
+ *inFilter = false;
+ }
+ }
+ else
+ {
+ *hndBeg = nullptr;
+ if (hndLast != nullptr)
+ {
+ *hndLast = nullptr;
+ }
+ *inFilter = false;
+ }
+ return hndTab;
+}
+
+/****************************************************************************/
+
+EHblkDsc* Compiler::ehInitTryBlockRange(BasicBlock* blk, BasicBlock** tryBeg, BasicBlock** tryLast)
+{
+ EHblkDsc* tryTab = ehGetBlockTryDsc(blk);
+ if (tryTab != nullptr)
+ {
+ *tryBeg = tryTab->ebdTryBeg;
+ if (tryLast != nullptr)
+ {
+ *tryLast = tryTab->ebdTryLast;
+ }
+ }
+ else
+ {
+ *tryBeg = nullptr;
+ if (tryLast != nullptr)
+ {
+ *tryLast = nullptr;
+ }
+ }
+ return tryTab;
+}
+
+/*****************************************************************************
+ * This method updates the value of ebdTryLast.
+ */
+
+void Compiler::fgSetTryEnd(EHblkDsc* handlerTab, BasicBlock* newTryLast)
+{
+ assert(newTryLast != nullptr);
+
+ //
+ // Check if we are going to change the existing value of endTryLast
+ //
+ if (handlerTab->ebdTryLast != newTryLast)
+ {
+ // Update the EH table with the newTryLast block
+ handlerTab->ebdTryLast = newTryLast;
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("EH#%u: New last block of try: BB%02u\n", ehGetIndex(handlerTab), newTryLast->bbNum);
+ }
+#endif // DEBUG
+ }
+}
+
+/*****************************************************************************
+ *
+ * This method updates the value of ebdHndLast.
+ */
+
+void Compiler::fgSetHndEnd(EHblkDsc* handlerTab, BasicBlock* newHndLast)
+{
+ assert(newHndLast != nullptr);
+
+ //
+ // Check if we are going to change the existing value of endHndLast
+ //
+ if (handlerTab->ebdHndLast != newHndLast)
+ {
+ // Update the EH table with the newHndLast block
+ handlerTab->ebdHndLast = newHndLast;
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("EH#%u: New last block of handler: BB%02u\n", ehGetIndex(handlerTab), newHndLast->bbNum);
+ }
+#endif // DEBUG
+ }
+}
+
+/*****************************************************************************
+ *
+ * Given a EH handler table entry update the ebdTryLast and ebdHndLast pointers
+ * to skip basic blocks that have been removed. They are set to the first
+ * non-removed block after ebdTryBeg and ebdHndBeg, respectively.
+ *
+ * Note that removed blocks are not in the global list of blocks (no block in the
+ * global list points to them). However, their pointers are still valid. We use
+ * this fact when we walk lists of removed blocks until we find a non-removed
+ * block, to be used for ending our iteration.
+ */
+
+void Compiler::fgSkipRmvdBlocks(EHblkDsc* handlerTab)
+{
+ BasicBlock* block;
+ BasicBlock* bEnd;
+ BasicBlock* bLast;
+
+ // Update ebdTryLast
+ bLast = nullptr;
+
+ // Find the first non-removed block after the 'try' region to end our iteration.
+ bEnd = handlerTab->ebdTryLast->bbNext;
+ while ((bEnd != nullptr) && (bEnd->bbFlags & BBF_REMOVED))
+ {
+ bEnd = bEnd->bbNext;
+ }
+
+ // Update bLast to account for any removed blocks
+ block = handlerTab->ebdTryBeg;
+ while (block != nullptr)
+ {
+ if ((block->bbFlags & BBF_REMOVED) == 0)
+ {
+ bLast = block;
+ }
+
+ block = block->bbNext;
+
+ if (block == bEnd)
+ {
+ break;
+ }
+ }
+
+ fgSetTryEnd(handlerTab, bLast);
+
+ // Update ebdHndLast
+ bLast = nullptr;
+
+ // Find the first non-removed block after the handler region to end our iteration.
+ bEnd = handlerTab->ebdHndLast->bbNext;
+ while ((bEnd != nullptr) && (bEnd->bbFlags & BBF_REMOVED))
+ {
+ bEnd = bEnd->bbNext;
+ }
+
+ // Update bLast to account for any removed blocks
+ block = handlerTab->ebdHndBeg;
+ while (block != nullptr)
+ {
+ if ((block->bbFlags & BBF_REMOVED) == 0)
+ {
+ bLast = block;
+ }
+
+ block = block->bbNext;
+ if (block == bEnd)
+ {
+ break;
+ }
+ }
+
+ fgSetHndEnd(handlerTab, bLast);
+}
+
+/*****************************************************************************
+ *
+ * Allocate the EH table
+ */
+void Compiler::fgAllocEHTable()
+{
+#if FEATURE_EH_FUNCLETS
+
+ // We need to allocate space for EH clauses that will be used by funclets
+ // as well as one for each EH clause from the IL. Nested EH clauses pulled
+ // out as funclets create one EH clause for each enclosing region. Thus,
+ // the maximum number of clauses we will need might be very large. We allocate
+ // twice the number of EH clauses in the IL, which should be good in practice.
+ // In extreme cases, we might need to abandon this and reallocate. See
+ // fgAddEHTableEntry() for more details.
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef DEBUG
+ compHndBBtabAllocCount = info.compXcptnsCount; // force the resizing code to hit more frequently in DEBUG
+#else // DEBUG
+ compHndBBtabAllocCount = info.compXcptnsCount * 2;
+#endif // DEBUG
+
+#else // FEATURE_EH_FUNCLETS
+
+ compHndBBtabAllocCount = info.compXcptnsCount;
+
+#endif // FEATURE_EH_FUNCLETS
+
+ compHndBBtab = new (this, CMK_BasicBlock) EHblkDsc[compHndBBtabAllocCount];
+
+ compHndBBtabCount = info.compXcptnsCount;
+}
+
+/*****************************************************************************
+ *
+ * Remove a single exception table entry. Note that this changes the size of
+ * the exception table. If calling this within a loop over the exception table
+ * be careful to iterate again on the current entry (if XTnum) to not skip any.
+ */
+void Compiler::fgRemoveEHTableEntry(unsigned XTnum)
+{
+ assert(compHndBBtabCount > 0);
+ assert(XTnum < compHndBBtabCount);
+
+ EHblkDsc* HBtab;
+
+ /* Reduce the number of entries in the EH table by one */
+ compHndBBtabCount--;
+
+ if (compHndBBtabCount == 0)
+ {
+ // No more entries remaining.
+ INDEBUG(compHndBBtab = (EHblkDsc*)INVALID_POINTER_VALUE;)
+ }
+ else
+ {
+ /* If we recorded an enclosing index for xtab then see
+ * if it needs to be updated due to the removal of this entry
+ */
+
+ HBtab = compHndBBtab + XTnum;
+
+ EHblkDsc* xtabEnd;
+ EHblkDsc* xtab;
+ for (xtab = compHndBBtab, xtabEnd = compHndBBtab + compHndBBtabCount; xtab < xtabEnd; xtab++)
+ {
+ if ((xtab != HBtab) && (xtab->ebdEnclosingTryIndex != EHblkDsc::NO_ENCLOSING_INDEX) &&
+ (xtab->ebdEnclosingTryIndex >= XTnum))
+ {
+ // Update the enclosing scope link
+ if (xtab->ebdEnclosingTryIndex == XTnum)
+ {
+ xtab->ebdEnclosingTryIndex = HBtab->ebdEnclosingTryIndex;
+ }
+ if ((xtab->ebdEnclosingTryIndex > XTnum) &&
+ (xtab->ebdEnclosingTryIndex != EHblkDsc::NO_ENCLOSING_INDEX))
+ {
+ xtab->ebdEnclosingTryIndex--;
+ }
+ }
+
+ if ((xtab != HBtab) && (xtab->ebdEnclosingHndIndex != EHblkDsc::NO_ENCLOSING_INDEX) &&
+ (xtab->ebdEnclosingHndIndex >= XTnum))
+ {
+ // Update the enclosing scope link
+ if (xtab->ebdEnclosingHndIndex == XTnum)
+ {
+ xtab->ebdEnclosingHndIndex = HBtab->ebdEnclosingHndIndex;
+ }
+ if ((xtab->ebdEnclosingHndIndex > XTnum) &&
+ (xtab->ebdEnclosingHndIndex != EHblkDsc::NO_ENCLOSING_INDEX))
+ {
+ xtab->ebdEnclosingHndIndex--;
+ }
+ }
+ }
+
+ /* We need to update all of the blocks' bbTryIndex */
+
+ for (BasicBlock* blk = fgFirstBB; blk; blk = blk->bbNext)
+ {
+ if (blk->hasTryIndex())
+ {
+ if (blk->getTryIndex() == XTnum)
+ {
+ noway_assert(blk->bbFlags & BBF_REMOVED);
+ INDEBUG(blk->setTryIndex(MAX_XCPTN_INDEX);) // Note: this is still a legal index, just unlikely
+ }
+ else if (blk->getTryIndex() > XTnum)
+ {
+ blk->setTryIndex(blk->getTryIndex() - 1);
+ }
+ }
+
+ if (blk->hasHndIndex())
+ {
+ if (blk->getHndIndex() == XTnum)
+ {
+ noway_assert(blk->bbFlags & BBF_REMOVED);
+ INDEBUG(blk->setHndIndex(MAX_XCPTN_INDEX);) // Note: this is still a legal index, just unlikely
+ }
+ else if (blk->getHndIndex() > XTnum)
+ {
+ blk->setHndIndex(blk->getHndIndex() - 1);
+ }
+ }
+ }
+
+ /* Now remove the unused entry from the table */
+
+ if (XTnum < compHndBBtabCount)
+ {
+ /* We copy over the old entry */
+ memmove(HBtab, HBtab + 1, (compHndBBtabCount - XTnum) * sizeof(*HBtab));
+ }
+ else
+ {
+ /* Last entry. Don't need to do anything */
+ noway_assert(XTnum == compHndBBtabCount);
+ }
+ }
+}
+
+#if FEATURE_EH_FUNCLETS
+
+/*****************************************************************************
+ *
+ * Add a single exception table entry at index 'XTnum', [0 <= XTnum <= compHndBBtabCount].
+ * If 'XTnum' is compHndBBtabCount, then add the entry at the end.
+ * Note that this changes the size of the exception table.
+ * All the blocks referring to the various index values are updated.
+ * The table entry itself is not filled in.
+ * Returns a pointer to the new entry.
+ */
+EHblkDsc* Compiler::fgAddEHTableEntry(unsigned XTnum)
+{
+ if (XTnum != compHndBBtabCount)
+ {
+ // Update all enclosing links that will get invalidated by inserting an entry at 'XTnum'
+
+ EHblkDsc* xtabEnd;
+ EHblkDsc* xtab;
+ for (xtab = compHndBBtab, xtabEnd = compHndBBtab + compHndBBtabCount; xtab < xtabEnd; xtab++)
+ {
+ if ((xtab->ebdEnclosingTryIndex != EHblkDsc::NO_ENCLOSING_INDEX) && (xtab->ebdEnclosingTryIndex >= XTnum))
+ {
+ // Update the enclosing scope link
+ xtab->ebdEnclosingTryIndex++;
+ }
+ if ((xtab->ebdEnclosingHndIndex != EHblkDsc::NO_ENCLOSING_INDEX) && (xtab->ebdEnclosingHndIndex >= XTnum))
+ {
+ // Update the enclosing scope link
+ xtab->ebdEnclosingHndIndex++;
+ }
+ }
+
+ // We need to update the BasicBlock bbTryIndex and bbHndIndex field for all blocks
+
+ for (BasicBlock* blk = fgFirstBB; blk; blk = blk->bbNext)
+ {
+ if (blk->hasTryIndex() && (blk->getTryIndex() >= XTnum))
+ {
+ blk->setTryIndex(blk->getTryIndex() + 1);
+ }
+
+ if (blk->hasHndIndex() && (blk->getHndIndex() >= XTnum))
+ {
+ blk->setHndIndex(blk->getHndIndex() + 1);
+ }
+ }
+ }
+
+ // Increase the number of entries in the EH table by one
+
+ if (compHndBBtabCount == compHndBBtabAllocCount)
+ {
+ // We need to reallocate the table
+
+ if (compHndBBtabAllocCount == MAX_XCPTN_INDEX)
+ { // We're already at the max size for indices to be unsigned short
+ IMPL_LIMITATION("too many exception clauses");
+ }
+
+ // Double the table size. For stress, we could use +1. Note that if the table isn't allocated
+ // yet, such as when we add an EH region for synchronized methods that don't already have one,
+ // we start at zero, so we need to make sure the new table has at least one entry.
+ unsigned newHndBBtabAllocCount = max(1, compHndBBtabAllocCount * 2);
+ noway_assert(compHndBBtabAllocCount < newHndBBtabAllocCount); // check for overflow
+
+ if (newHndBBtabAllocCount > MAX_XCPTN_INDEX)
+ {
+ newHndBBtabAllocCount = MAX_XCPTN_INDEX; // increase to the maximum size we allow
+ }
+
+ JITDUMP("*********** fgAddEHTableEntry: increasing EH table size from %d to %d\n", compHndBBtabAllocCount,
+ newHndBBtabAllocCount);
+
+ compHndBBtabAllocCount = newHndBBtabAllocCount;
+
+ EHblkDsc* newTable = new (this, CMK_BasicBlock) EHblkDsc[compHndBBtabAllocCount];
+
+ // Move over the stuff before the new entry
+
+ memcpy_s(newTable, compHndBBtabAllocCount * sizeof(*compHndBBtab), compHndBBtab, XTnum * sizeof(*compHndBBtab));
+
+ if (XTnum != compHndBBtabCount)
+ {
+ // Move over the stuff after the new entry
+ memcpy_s(newTable + XTnum + 1, (compHndBBtabAllocCount - XTnum - 1) * sizeof(*compHndBBtab),
+ compHndBBtab + XTnum, (compHndBBtabCount - XTnum) * sizeof(*compHndBBtab));
+ }
+
+ // Now set the new table as the table to use. The old one gets lost, but we can't
+ // free it because we don't have a freeing allocator.
+
+ compHndBBtab = newTable;
+ }
+ else if (XTnum != compHndBBtabCount)
+ {
+ // Leave the elements before the new element alone. Move the ones after it, to make space.
+
+ EHblkDsc* HBtab = compHndBBtab + XTnum;
+
+ memmove_s(HBtab + 1, (compHndBBtabAllocCount - XTnum - 1) * sizeof(*compHndBBtab), HBtab,
+ (compHndBBtabCount - XTnum) * sizeof(*compHndBBtab));
+ }
+
+ // Now the entry is there, but not filled in
+
+ compHndBBtabCount++;
+ return compHndBBtab + XTnum;
+}
+
+#endif // FEATURE_EH_FUNCLETS
+
+#if !FEATURE_EH
+
+/*****************************************************************************
+ * fgRemoveEH: To facilitiate the bring-up of new platforms without having to
+ * worry about fully implementing EH, we want to simply remove EH constructs
+ * from the IR. This works because a large percentage of our tests contain
+ * EH constructs but don't actually throw exceptions. This function removes
+ * 'catch', 'filter', 'filter-handler', and 'fault' clauses completely.
+ * It requires that the importer has created the EH table, and that normal
+ * EH well-formedness tests have been done, and 'leave' opcodes have been
+ * imported.
+ *
+ * It currently does not handle 'finally' clauses, so tests that include
+ * 'finally' will NYI(). To handle 'finally', we would need to inline the
+ * 'finally' clause IL at each exit from a finally-protected 'try', or
+ * else call the 'finally' clause, like normal.
+ *
+ * Walk the EH table from beginning to end. If a table entry is nested within
+ * a handler, we skip it, as we'll delete its code when we get to the enclosing
+ * handler. If a clause is enclosed within a 'try', or has no nesting, then we delete
+ * it (and its range of code blocks). We don't need to worry about cleaning up
+ * the EH table entries as we remove the individual handlers (such as calling
+ * fgRemoveEHTableEntry()), as we'll null out the entire table at the end.
+ *
+ * This function assumes FEATURE_EH_FUNCLETS is defined.
+ */
+void Compiler::fgRemoveEH()
+{
+#ifdef DEBUG
+ if (verbose)
+ printf("\n*************** In fgRemoveEH()\n");
+#endif // DEBUG
+
+ if (compHndBBtabCount == 0)
+ {
+ JITDUMP("No EH to remove\n\n");
+ return;
+ }
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\n*************** Before fgRemoveEH()\n");
+ fgDispBasicBlocks();
+ fgDispHandlerTab();
+ printf("\n");
+ }
+#endif // DEBUG
+
+ // Make sure we're early in compilation, so we don't need to update lots of data structures.
+ assert(!fgComputePredsDone);
+ assert(!fgDomsComputed);
+ assert(!fgFuncletsCreated);
+ assert(fgFirstFuncletBB == nullptr); // this should follow from "!fgFuncletsCreated"
+ assert(!optLoopsMarked);
+
+ unsigned XTnum;
+ EHblkDsc* HBtab;
+
+ for (XTnum = 0, HBtab = compHndBBtab; XTnum < compHndBBtabCount; XTnum++, HBtab++)
+ {
+ if (HBtab->ebdEnclosingHndIndex != EHblkDsc::NO_ENCLOSING_INDEX)
+ {
+ // This entry is nested within some other handler. So, don't delete the
+ // EH entry here; let the enclosing handler delete it. Note that for this
+ // EH entry, both the 'try' and handler portions are fully nested within
+ // the enclosing handler region, due to proper nesting rules.
+ continue;
+ }
+
+ if (HBtab->HasCatchHandler() || HBtab->HasFilter() || HBtab->HasFaultHandler())
+ {
+ // Remove all the blocks associated with the handler. Note that there is no
+ // fall-through into the handler, or fall-through out of the handler, so
+ // just deleting the blocks is sufficient. Note, however, that for every
+ // BBJ_EHCATCHRET we delete, we need to fix up the reference count of the
+ // block it points to (by subtracting one from its reference count).
+ // Note that the blocks for a filter immediately preceed the blocks for its associated filter-handler.
+
+ BasicBlock* blkBeg = HBtab->HasFilter() ? HBtab->ebdFilter : HBtab->ebdHndBeg;
+ BasicBlock* blkLast = HBtab->ebdHndLast;
+
+ // Splice out the range of blocks from blkBeg to blkLast (inclusive).
+ fgUnlinkRange(blkBeg, blkLast);
+
+ BasicBlock* blk;
+
+ // Walk the unlinked blocks and marked them as having been removed.
+ for (blk = blkBeg; blk != blkLast->bbNext; blk = blk->bbNext)
+ {
+ blk->bbFlags |= BBF_REMOVED;
+
+ if (blk->bbJumpKind == BBJ_EHCATCHRET)
+ {
+ assert(blk->bbJumpDest->bbRefs > 0);
+ blk->bbJumpDest->bbRefs -= 1;
+ }
+ }
+
+ // Walk the blocks of the 'try' and clear data that makes them appear to be within a 'try'.
+ for (blk = HBtab->ebdTryBeg; blk != HBtab->ebdTryLast->bbNext; blk = blk->bbNext)
+ {
+ blk->clearTryIndex();
+ blk->bbFlags &= ~BBF_TRY_BEG;
+ }
+
+ // If we are deleting a range of blocks whose last block is
+ // the 'last' block of an enclosing try/hnd region, we need to
+ // fix up the EH table. We only care about less nested
+ // EH table entries, since we've already deleted everything up to XTnum.
+
+ unsigned XTnum2;
+ EHblkDsc* HBtab2;
+ for (XTnum2 = XTnum + 1, HBtab2 = compHndBBtab + XTnum2; XTnum2 < compHndBBtabCount; XTnum2++, HBtab2++)
+ {
+ // Handle case where deleted range is at the end of a 'try'.
+ if (HBtab2->ebdTryLast == blkLast)
+ {
+ fgSetTryEnd(HBtab2, blkBeg->bbPrev);
+ }
+ // Handle case where deleted range is at the end of a handler.
+ // (This shouldn't happen, though, because we don't delete handlers
+ // nested within other handlers; we wait until we get to the
+ // enclosing handler.)
+ if (HBtab2->ebdHndLast == blkLast)
+ {
+ unreached();
+ }
+ }
+ }
+ else
+ {
+ // It must be a 'finally'. We still need to call the finally. Note that the
+ // 'finally' can be "called" from multiple locations (e.g., the 'try' block
+ // can have multiple 'leave' instructions, each leaving to different targets,
+ // and each going through the 'finally'). We could inline the 'finally' at each
+ // LEAVE site within a 'try'. If the 'try' exits at all (that is, no infinite loop),
+ // there will be at least one since there is no "fall through" at the end of
+ // the 'try'.
+
+ assert(HBtab->HasFinallyHandler());
+
+ NYI("remove finally blocks");
+ }
+ } /* end of the for loop over XTnum */
+
+#ifdef DEBUG
+ // Make sure none of the remaining blocks have any EH.
+
+ BasicBlock* blk;
+ foreach_block(this, blk)
+ {
+ assert(!blk->hasTryIndex());
+ assert(!blk->hasHndIndex());
+ assert((blk->bbFlags & BBF_TRY_BEG) == 0);
+ assert((blk->bbFlags & BBF_FUNCLET_BEG) == 0);
+ assert((blk->bbFlags & BBF_REMOVED) == 0);
+ assert(blk->bbCatchTyp == BBCT_NONE);
+ }
+#endif // DEBUG
+
+ // Delete the EH table
+
+ compHndBBtab = nullptr;
+ compHndBBtabCount = 0;
+ // Leave compHndBBtabAllocCount alone.
+
+ // Renumber the basic blocks
+ JITDUMP("\nRenumbering the basic blocks for fgRemoveEH\n");
+ fgRenumberBlocks();
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\n*************** After fgRemoveEH()\n");
+ fgDispBasicBlocks();
+ fgDispHandlerTab();
+ printf("\n");
+ }
+#endif
+}
+
+#endif // !FEATURE_EH
+
+/*****************************************************************************
+ *
+ * Sort the EH table if necessary.
+ */
+
+void Compiler::fgSortEHTable()
+{
+ if (!fgNeedToSortEHTable)
+ {
+ return;
+ }
+
+ // Now, all fields of the EH table are set except for those that are related
+ // to nesting. We need to first sort the table to ensure that an EH clause
+ // appears before any try or handler that it is nested within. The CLI spec
+ // requires this for nesting in 'try' clauses, but does not require this
+ // for handler clauses. However, parts of the JIT do assume this ordering.
+ //
+ // For example:
+ //
+ // try { // A
+ // } catch {
+ // try { // B
+ // } catch {
+ // }
+ // }
+ //
+ // In this case, the EH clauses for A and B have no required ordering: the
+ // clause for either A or B can come first, despite B being nested within
+ // the catch clause for A.
+ //
+ // The CLI spec, section 12.4.2.5 "Overview of exception handling", states:
+ // "The ordering of the exception clauses in the Exception Handler Table is
+ // important. If handlers are nested, the most deeply nested try blocks shall
+ // come before the try blocks that enclose them."
+ //
+ // Note, in particular, that it doesn't say "shall come before the *handler*
+ // blocks that enclose them".
+ //
+ // Also, the same section states, "When an exception occurs, the CLI searches
+ // the array for the first protected block that (1) Protects a region including the
+ // current instruction pointer and (2) Is a catch handler block and (3) Whose
+ // filter wishes to handle the exception."
+ //
+ // Once again, nothing about the ordering of the catch blocks.
+ //
+ // A more complicated example:
+ //
+ // try { // A
+ // } catch {
+ // try { // B
+ // try { // C
+ // } catch {
+ // }
+ // } catch {
+ // }
+ // }
+ //
+ // The clause for C must come before the clause for B, but the clause for A can
+ // be anywhere. Thus, we could have these orderings: ACB, CAB, CBA.
+ //
+ // One more example:
+ //
+ // try { // A
+ // } catch {
+ // try { // B
+ // } catch {
+ // try { // C
+ // } catch {
+ // }
+ // }
+ // }
+ //
+ // There is no ordering requirement: the EH clauses can come in any order.
+ //
+ // In Dev11 (Visual Studio 2012), x86 did not sort the EH table (it never had before)
+ // but ARM did. It turns out not sorting the table can cause the EH table to incorrectly
+ // set the bbHndIndex value in some nested cases, and that can lead to a security exploit
+ // that allows the execution of arbitrary code.
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("fgSortEHTable: Sorting EH table\n");
+ }
+#endif // DEBUG
+
+ EHblkDsc* xtab1;
+ EHblkDsc* xtab2;
+ unsigned xtabnum1, xtabnum2;
+
+ for (xtabnum1 = 0, xtab1 = compHndBBtab; xtabnum1 < compHndBBtabCount; xtabnum1++, xtab1++)
+ {
+ for (xtabnum2 = xtabnum1 + 1, xtab2 = xtab1 + 1; xtabnum2 < compHndBBtabCount; xtabnum2++, xtab2++)
+ {
+ // If the nesting is wrong, swap them. The nesting is wrong if
+ // EH region 2 is nested in the try, handler, or filter of EH region 1.
+ // Note that due to proper nesting rules, if any of 2 is nested in
+ // the try or handler or filter of 1, then all of 2 is nested.
+ // We must be careful when comparing the offsets of the 'try' clause, because
+ // for "mutually-protect" try/catch, the 'try' bodies will be identical.
+ // For this reason, we use the handler region to check nesting. Note
+ // that we must check both beginning and end: a nested region can have a 'try'
+ // body that starts at the beginning of a handler. Thus, if we just compared the
+ // handler begin offset, we might get confused and think it is nested.
+
+ IL_OFFSET hndBegOff = xtab2->ebdHndBegOffset;
+ IL_OFFSET hndEndOff = xtab2->ebdHndEndOffset;
+ assert(hndEndOff > hndBegOff);
+
+ if ((hndBegOff >= xtab1->ebdTryBegOffset && hndEndOff <= xtab1->ebdTryEndOffset) ||
+ (hndBegOff >= xtab1->ebdHndBegOffset && hndEndOff <= xtab1->ebdHndEndOffset) ||
+ (xtab1->HasFilter() && (hndBegOff >= xtab1->ebdFilterBegOffset && hndEndOff <= xtab1->ebdHndBegOffset))
+ // Note that end of filter is beginning of handler
+ )
+ {
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("fgSortEHTable: Swapping out-of-order EH#%u and EH#%u\n", xtabnum1, xtabnum2);
+ }
+
+ // Assert that the 'try' region is also nested in the same place as the handler
+
+ IL_OFFSET tryBegOff = xtab2->ebdTryBegOffset;
+ IL_OFFSET tryEndOff = xtab2->ebdTryEndOffset;
+ assert(tryEndOff > tryBegOff);
+
+ if (hndBegOff >= xtab1->ebdTryBegOffset && hndEndOff <= xtab1->ebdTryEndOffset)
+ {
+ assert(tryBegOff >= xtab1->ebdTryBegOffset && tryEndOff <= xtab1->ebdTryEndOffset);
+ }
+ if (hndBegOff >= xtab1->ebdHndBegOffset && hndEndOff <= xtab1->ebdHndEndOffset)
+ {
+ assert(tryBegOff >= xtab1->ebdHndBegOffset && tryEndOff <= xtab1->ebdHndEndOffset);
+ }
+ if (xtab1->HasFilter() &&
+ (hndBegOff >= xtab1->ebdFilterBegOffset && hndEndOff <= xtab1->ebdHndBegOffset))
+ {
+ assert(tryBegOff >= xtab1->ebdFilterBegOffset && tryEndOff <= xtab1->ebdHndBegOffset);
+ }
+#endif // DEBUG
+
+ // Swap them!
+ EHblkDsc tmp = *xtab1;
+ *xtab1 = *xtab2;
+ *xtab2 = tmp;
+ }
+ }
+ }
+}
+
+// fgNormalizeEH: Enforce the following invariants:
+//
+// 1. No block is both the first block of a handler and the first block of a try. In IL (and on entry
+// to this function), this can happen if the "try" is more nested than the handler.
+//
+// For example, consider:
+//
+// try1 ----------------- BB01
+// | BB02
+// |--------------------- BB03
+// handler1
+// |----- try2 ---------- BB04
+// | | BB05
+// | handler2 ------ BB06
+// | | BB07
+// | --------------- BB08
+// |--------------------- BB09
+//
+// Thus, the start of handler1 and the start of try2 are the same block. We will transform this to:
+//
+// try1 ----------------- BB01
+// | BB02
+// |--------------------- BB03
+// handler1 ------------- BB10 // empty block
+// | try2 ---------- BB04
+// | | BB05
+// | handler2 ------ BB06
+// | | BB07
+// | --------------- BB08
+// |--------------------- BB09
+//
+// 2. No block is the first block of more than one try or handler region.
+// (Note that filters cannot have EH constructs nested within them, so there can be no nested try or
+// handler that shares the filter begin or last block. For try/filter/filter-handler constructs nested
+// within a try or handler region, note that the filter block cannot be the first block of the try,
+// nor can it be the first block of the handler, since you can't "fall into" a filter, which that situation
+// would require.)
+//
+// For example, we will transform this:
+//
+// try3 try2 try1
+// |--- |--- |--- BB01
+// | | | BB02
+// | | |--- BB03
+// | | BB04
+// | |------------ BB05
+// | BB06
+// |------------------- BB07
+//
+// to this:
+//
+// try3 ------------- BB08 // empty BBJ_NONE block
+// | try2 ------ BB09 // empty BBJ_NONE block
+// | | try1
+// | | |--- BB01
+// | | | BB02
+// | | |--- BB03
+// | | BB04
+// | |------------ BB05
+// | BB06
+// |------------------- BB07
+//
+// The benefit of this is that adding a block to an EH region will not require examining every EH region,
+// looking for possible shared "first" blocks to adjust. It also makes it easier to put code at the top
+// of a particular EH region, especially for loop optimizations.
+//
+// These empty blocks (BB08, BB09) will generate no code (unless some code is subsequently placed into them),
+// and will have the same native code offset as BB01 after code is generated. There may be labels generated
+// for them, if they are branch targets, so it is possible to have multiple labels targeting the same native
+// code offset. The blocks will not be merged with the blocks they are split from, because they will have a
+// different EH region, and we don't merge blocks from two different EH regions.
+//
+// In the example, if there are branches to BB01, we need to distribute them to BB01, BB08, or BB09, appropriately.
+// 1. A branch from BB01/BB02/BB03 to BB01 will still go to BB01. Branching to BB09 or BB08 would not be legal,
+// since it would branch out of a try region.
+// 2. A branch from BB04/BB05 to BB01 will instead branch to BB09. Branching to BB08 would not be legal. Note
+// that branching to BB01 would still be legal, so we have a choice. It makes the most sense to branch to BB09,
+// so the source and target of a branch are in the same EH region.
+// 3. Similarly, a branch from BB06/BB07 to BB01 will go to BB08, even though branching to BB09 would be legal.
+// 4. A branch from outside this loop (at the top-level) to BB01 will go to BB08. This is one case where the
+// source and target of the branch are not in the same EH region.
+//
+// The EH nesting rules for IL branches are described in the ECMA spec section 12.4.2.8.2.7 "Branches" and
+// section 12.4.2.8.2.9 "Examples".
+//
+// There is one exception to this normalization rule: we do not change "mutually protect" regions. These are cases
+// where two EH table entries have exactly the same 'try' region, used to implement C# "try / catch / catch".
+// The first handler appears by our nesting to be an "inner" handler, with ebdEnclosingTryIndex pointing to the
+// second one. It is not true nesting, though, since they both protect the same "try". Both the these EH table
+// entries must keep the same "try" region begin/last block pointers. A block in this "try" region has a try index
+// of the first ("most nested") EH table entry.
+//
+// 3. No block is the last block of more than one try or handler region. Again, as described above,
+// filters need not be considered.
+//
+// For example, we will transform this:
+//
+// try3 ----------------- BB01
+// | try2 ---------- BB02
+// | | handler1 BB03
+// | | | BB04
+// |----- |----- |------- BB05
+//
+// (where all three try regions end at BB05) to this:
+//
+// try3 ----------------- BB01
+// | try2 ---------- BB02
+// | | handler1 BB03
+// | | | BB04
+// | | |------- BB05
+// | |-------------- BB06 // empty BBJ_NONE block
+// |--------------------- BB07 // empty BBJ_NONE block
+//
+// No branches need to change: if something branched to BB05, it will still branch to BB05. If BB05 is a
+// BBJ_NONE block, then control flow will fall through the newly added blocks as well. If it is anything
+// else, it will retain that block branch type and BB06 and BB07 will be unreachable.
+//
+// The benefit of this is, once again, to remove the need to consider every EH region when adding new blocks.
+//
+// Overall, a block can appear in the EH table exactly once: as the begin or last block of a single try, filter, or
+// handler. There is one exception: for a single-block EH region, the block can appear as both the "begin" and "last"
+// block of the try, or the "begin" and "last" block of the handler (note that filters don't have a "last" block stored,
+// so this case doesn't apply.)
+// (Note: we could remove this special case if we wanted, and if it helps anything, but it doesn't appear that it will
+// help.)
+//
+// These invariants simplify a number of things. When inserting a new block into a region, it is not necessary to
+// traverse the entire EH table looking to see if any EH region needs to be updated. You only ever need to update a
+// single region (except for mutually-protect "try" regions).
+//
+// Also, for example, when we're trying to determine the successors of a block B1 that leads into a try T1, if a block
+// B2 violates invariant #3 by being the first block of both the handler of T1, and an enclosed try T2, inserting a
+// block to enforce this invariant prevents us from having to consider the first block of T2's handler as a possible
+// successor of B1. This is somewhat akin to breaking of "critical edges" in a flowgraph.
+
+void Compiler::fgNormalizeEH()
+{
+ if (compHndBBtabCount == 0)
+ {
+ // No EH? Nothing to do.
+ INDEBUG(fgNormalizeEHDone = true;)
+ return;
+ }
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("*************** In fgNormalizeEH()\n");
+ fgDispBasicBlocks();
+ fgDispHandlerTab();
+ }
+#endif
+
+ bool modified = false;
+
+ // Case #1: Prevent the first block of a handler from also being the first block of a 'try'.
+ if (fgNormalizeEHCase1())
+ {
+ modified = true;
+ }
+
+ // Case #2: Prevent any two EH regions from starting with the same block (after case #3, we only need to worry about
+ // 'try' blocks).
+ if (fgNormalizeEHCase2())
+ {
+ modified = true;
+ }
+
+#if 0
+ // Case 3 normalization is disabled. The JIT really doesn't like having extra empty blocks around, especially
+ // blocks that are unreachable. There are lots of asserts when such things occur. We will re-evaluate whether we
+ // can do this normalization.
+ // Note: there are cases in fgVerifyHandlerTab() that are also disabled to match this.
+
+ // Case #3: Prevent any two EH regions from ending with the same block.
+ if (fgNormalizeEHCase3())
+ {
+ modified = true;
+ }
+
+#endif // 0
+
+ INDEBUG(fgNormalizeEHDone = true;)
+
+ if (modified)
+ {
+ // If we computed the cheap preds, don't let them leak out, in case other code doesn't maintain them properly.
+ if (fgCheapPredsValid)
+ {
+ fgRemovePreds();
+ }
+
+ JITDUMP("Added at least one basic block in fgNormalizeEH.\n");
+ fgRenumberBlocks();
+#ifdef DEBUG
+ // fgRenumberBlocks() will dump all the blocks and the handler table, so we don't need to do it here.
+ fgVerifyHandlerTab();
+#endif
+ }
+ else
+ {
+ JITDUMP("No EH normalization performed.\n");
+ }
+}
+
+bool Compiler::fgNormalizeEHCase1()
+{
+ bool modified = false;
+
+ //
+ // Case #1: Is the first block of a handler also the first block of any try?
+ //
+ // Do this as a separate loop from case #2 to simplify the logic for cases where we have both multiple identical
+ // 'try' begin blocks as well as this case, e.g.:
+ // try {
+ // } finally { try { try {
+ // } catch {}
+ // } catch {}
+ // }
+ // where the finally/try/try are all the same block.
+ // We also do this before case #2, so when we get to case #2, we only need to worry about updating 'try' begin
+ // blocks (and only those within the 'try' region's parents), not handler begin blocks, when we are inserting new
+ // header blocks.
+ //
+
+ for (unsigned XTnum = 0; XTnum < compHndBBtabCount; XTnum++)
+ {
+ EHblkDsc* eh = ehGetDsc(XTnum);
+
+ BasicBlock* handlerStart = eh->ebdHndBeg;
+ EHblkDsc* handlerStartContainingTry = ehGetBlockTryDsc(handlerStart);
+ // If the handler start block is in a try, and is in fact the first block of that try...
+ if (handlerStartContainingTry != nullptr && handlerStartContainingTry->ebdTryBeg == handlerStart)
+ {
+ // ...then we want to insert an empty, non-removable block outside the try to be the new first block of the
+ // handler.
+ BasicBlock* newHndStart = bbNewBasicBlock(BBJ_NONE);
+ fgInsertBBbefore(eh->ebdHndBeg, newHndStart);
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Handler begin for EH#%02u and 'try' begin for EH%02u are the same block; inserted new BB%02u "
+ "before BB%02u as new handler begin for EH#%u.\n",
+ XTnum, ehGetIndex(handlerStartContainingTry), newHndStart->bbNum, eh->ebdHndBeg->bbNum, XTnum);
+ }
+#endif // DEBUG
+
+ // The new block is the new handler begin.
+ eh->ebdHndBeg = newHndStart;
+
+ // Try index is the same as the enclosing try, if any, of eh:
+ if (eh->ebdEnclosingTryIndex == EHblkDsc::NO_ENCLOSING_INDEX)
+ {
+ newHndStart->clearTryIndex();
+ }
+ else
+ {
+ newHndStart->setTryIndex(eh->ebdEnclosingTryIndex);
+ }
+ newHndStart->setHndIndex(XTnum);
+ newHndStart->bbCatchTyp = handlerStart->bbCatchTyp;
+ handlerStart->bbCatchTyp = BBCT_NONE; // Now handlerStart is no longer the start of a handler...
+ newHndStart->bbCodeOffs = handlerStart->bbCodeOffs;
+ newHndStart->bbCodeOffsEnd = newHndStart->bbCodeOffs; // code size = 0. TODO: use BAD_IL_OFFSET instead?
+ newHndStart->inheritWeight(handlerStart);
+#if FEATURE_STACK_FP_X87
+ newHndStart->bbFPStateX87 = codeGen->FlatFPAllocFPState(handlerStart->bbFPStateX87);
+#endif // FEATURE_STACK_FP_X87
+ newHndStart->bbFlags |= (BBF_DONT_REMOVE | BBF_INTERNAL | BBF_HAS_LABEL);
+ modified = true;
+
+#ifdef DEBUG
+ if (0 && verbose) // Normally this is way too verbose, but it is useful for debugging
+ {
+ printf("*************** fgNormalizeEH() made a change\n");
+ fgDispBasicBlocks();
+ fgDispHandlerTab();
+ }
+#endif // DEBUG
+ }
+ }
+
+ return modified;
+}
+
+bool Compiler::fgNormalizeEHCase2()
+{
+ bool modified = false;
+
+ //
+ // Case #2: Make sure no two 'try' have the same begin block (except for mutually-protect regions).
+ // Note that this can only happen for nested 'try' regions, so we only need to look through the
+ // 'try' nesting hierarchy.
+ //
+
+ for (unsigned XTnum = 0; XTnum < compHndBBtabCount; XTnum++)
+ {
+ EHblkDsc* eh = ehGetDsc(XTnum);
+
+ if (eh->ebdEnclosingTryIndex != EHblkDsc::NO_ENCLOSING_INDEX)
+ {
+ BasicBlock* tryStart = eh->ebdTryBeg;
+ BasicBlock* insertBeforeBlk = tryStart; // If we need to insert new blocks, we insert before this block.
+
+ // We need to keep track of the last "mutually protect" region so we can properly not add additional header
+ // blocks to the second and subsequent mutually protect try blocks. We can't just keep track of the EH
+ // region pointer, because we're updating the 'try' begin blocks as we go. So, we need to keep track of the
+ // pre-update 'try' begin/last blocks themselves.
+ BasicBlock* mutualTryBeg = eh->ebdTryBeg;
+ BasicBlock* mutualTryLast = eh->ebdTryLast;
+ unsigned mutualProtectIndex = XTnum;
+
+ EHblkDsc* ehOuter = eh;
+ do
+ {
+ unsigned ehOuterTryIndex = ehOuter->ebdEnclosingTryIndex;
+ ehOuter = ehGetDsc(ehOuterTryIndex);
+ BasicBlock* outerTryStart = ehOuter->ebdTryBeg;
+ if (outerTryStart == tryStart)
+ {
+ // We found two EH regions with the same 'try' begin! Should we do something about it?
+
+ if (ehOuter->ebdIsSameTry(mutualTryBeg, mutualTryLast))
+ {
+// clang-format off
+ // Don't touch mutually-protect regions: their 'try' regions must remain identical!
+ // We want to continue the looping outwards, in case we have something like this:
+ //
+ // try3 try2 try1
+ // |--- |---- |---- BB01
+ // | | | BB02
+ // | |---- |---- BB03
+ // | BB04
+ // |------------------- BB05
+ //
+ // (Thus, try1 & try2 are mutually-protect 'try' regions from BB01 to BB03. They are nested inside try3,
+ // which also starts at BB01. The 'catch' clauses have been elided.)
+ // In this case, we'll decline to add a new header block for try2, but we will add a new one for try3, ending with:
+ //
+ // try3 try2 try1
+ // |------------------- BB06
+ // | |---- |---- BB01
+ // | | | BB02
+ // | |---- |---- BB03
+ // | BB04
+ // |------------------- BB05
+ //
+ // More complicated (yes, this is real):
+ //
+ // try {
+ // try {
+ // try {
+ // try {
+ // try {
+ // try {
+ // try {
+ // try {
+ // }
+ // catch {} // mutually-protect set #1
+ // catch {}
+ // } finally {}
+ // }
+ // catch {} // mutually-protect set #2
+ // catch {}
+ // catch {}
+ // } finally {}
+ // } catch {}
+ // } finally {}
+ // } catch {}
+ // } finally {}
+ //
+ // In this case, all the 'try' start at the same block! Note that there are two sets of mutually-protect regions,
+ // separated by some nesting.
+// clang-format on
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Mutually protect regions EH#%u and EH#%u; leaving identical 'try' begin blocks.\n",
+ mutualProtectIndex, ehGetIndex(ehOuter));
+ }
+#endif // DEBUG
+
+ // We still need to update the tryBeg, if something more nested already did that.
+ ehOuter->ebdTryBeg = insertBeforeBlk;
+ }
+ else
+ {
+ // We're in a new set of mutual protect regions, so don't compare against the original.
+ mutualTryBeg = ehOuter->ebdTryBeg;
+ mutualTryLast = ehOuter->ebdTryLast;
+ mutualProtectIndex = ehOuterTryIndex;
+
+ // We're going to need the preds. We compute them here, before inserting the new block,
+ // so our logic to add/remove preds below is the same for both the first time preds are
+ // created and subsequent times.
+ if (!fgCheapPredsValid)
+ {
+ fgComputeCheapPreds();
+ }
+
+ // We've got multiple 'try' blocks starting at the same place!
+ // Add a new first 'try' block for 'ehOuter' that will be outside 'eh'.
+
+ BasicBlock* newTryStart = bbNewBasicBlock(BBJ_NONE);
+ fgInsertBBbefore(insertBeforeBlk, newTryStart);
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("'try' begin for EH#%u and EH#%u are same block; inserted new BB%02u before BB%02u "
+ "as new 'try' begin for EH#%u.\n",
+ ehOuterTryIndex, XTnum, newTryStart->bbNum, insertBeforeBlk->bbNum, ehOuterTryIndex);
+ }
+#endif // DEBUG
+
+ // The new block is the new 'try' begin.
+ ehOuter->ebdTryBeg = newTryStart;
+
+ newTryStart->copyEHRegion(tryStart); // Copy the EH region info
+ newTryStart->setTryIndex(ehOuterTryIndex); // ... but overwrite the 'try' index
+ newTryStart->bbCatchTyp = BBCT_NONE;
+ newTryStart->bbCodeOffs = tryStart->bbCodeOffs;
+ newTryStart->bbCodeOffsEnd =
+ newTryStart->bbCodeOffs; // code size = 0. TODO: use BAD_IL_OFFSET instead?
+ newTryStart->inheritWeight(tryStart);
+#if FEATURE_STACK_FP_X87
+ newTryStart->bbFPStateX87 = codeGen->FlatFPAllocFPState(tryStart->bbFPStateX87);
+#endif // FEATURE_STACK_FP_X87
+
+ // Note that we don't need to clear any flags on the old try start, since it is still a 'try'
+ // start.
+ newTryStart->bbFlags |= (BBF_TRY_BEG | BBF_DONT_REMOVE | BBF_INTERNAL | BBF_HAS_LABEL);
+
+ // Now we need to split any flow edges targetting the old try begin block between the old
+ // and new block. Note that if we are handling a multiply-nested 'try', we may have already
+ // split the inner set. So we need to split again, from the most enclosing block that we've
+ // already created, namely, insertBeforeBlk.
+ //
+ // For example:
+ //
+ // try3 try2 try1
+ // |---- |---- |---- BB01
+ // | | | BB02
+ // | | |---- BB03
+ // | |----------- BB04
+ // |------------------ BB05
+ //
+ // We'll loop twice, to create two header blocks, one for try2, and the second time for try3
+ // (in that order).
+ // After the first loop, we have:
+ //
+ // try3 try2 try1
+ // |---- BB06
+ // |---- | |---- BB01
+ // | | | BB02
+ // | | |---- BB03
+ // | |----------- BB04
+ // |------------------ BB05
+ //
+ // And all the external edges have been changed to point at try2. On the next loop, we'll create
+ // a unique header block for try3, and split the edges between try2 and try3, leaving us with:
+ //
+ // try3 try2 try1
+ // |---- BB07
+ // | |---- BB06
+ // | | |---- BB01
+ // | | | BB02
+ // | | |---- BB03
+ // | |----------- BB04
+ // |------------------ BB05
+
+ BasicBlockList* nextPred; // we're going to update the pred list as we go, so we need to keep
+ // track of the next pred in case it gets deleted.
+ for (BasicBlockList* pred = insertBeforeBlk->bbCheapPreds; pred != nullptr; pred = nextPred)
+ {
+ nextPred = pred->next;
+
+ // Who gets this predecessor?
+ BasicBlock* predBlock = pred->block;
+
+ if (!BasicBlock::sameTryRegion(insertBeforeBlk, predBlock))
+ {
+ // Move the edge to target newTryStart instead of insertBeforeBlk.
+ fgAddCheapPred(newTryStart, predBlock);
+ fgRemoveCheapPred(insertBeforeBlk, predBlock);
+
+ // Now change the branch. If it was a BBJ_NONE fall-through to the top block, this will
+ // do nothing. Since cheap preds contains dups (for switch duplicates), we will call
+ // this once per dup.
+ fgReplaceJumpTarget(predBlock, newTryStart, insertBeforeBlk);
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Redirect BB%02u target from BB%02u to BB%02u.\n", predBlock->bbNum,
+ insertBeforeBlk->bbNum, newTryStart->bbNum);
+ }
+#endif // DEBUG
+ }
+ }
+
+ // The new block (a fall-through block) is a new predecessor.
+ fgAddCheapPred(insertBeforeBlk, newTryStart);
+
+ // We don't need to update the tryBeg block of other EH regions here because we are looping
+ // outwards in enclosing try index order, and we'll get to them later.
+
+ // Move the insert block backwards, to the one we just inserted.
+ insertBeforeBlk = insertBeforeBlk->bbPrev;
+ assert(insertBeforeBlk == newTryStart);
+
+ modified = true;
+
+#ifdef DEBUG
+ if (0 && verbose) // Normally this is way too verbose, but it is useful for debugging
+ {
+ printf("*************** fgNormalizeEH() made a change\n");
+ fgDispBasicBlocks();
+ fgDispHandlerTab();
+ }
+#endif // DEBUG
+ }
+ }
+ else
+ {
+ // If the 'try' start block in the outer block isn't the same, then none of the more-enclosing
+ // try regions (if any) can have the same 'try' start block, so we're done.
+ // Note that we could have a situation like this:
+ //
+ // try4 try3 try2 try1
+ // |--- |--- | | BB01
+ // | | | | BB02
+ // | | |---- |---- BB03
+ // | | | BB04
+ // | | |------------ BB05
+ // | | BB06
+ // | |------------------- BB07
+ // |-------------------------- BB08
+ //
+ // (Thus, try1 & try2 start at BB03, and are nested inside try3 & try4, which both start at BB01.)
+ // In this case, we'll process try1 and try2, then break out. Later, we'll get to try3 and process
+ // it and try4.
+
+ break;
+ }
+ } while (ehOuter->ebdEnclosingTryIndex != EHblkDsc::NO_ENCLOSING_INDEX);
+ }
+ }
+
+ return modified;
+}
+
+bool Compiler::fgNormalizeEHCase3()
+{
+ bool modified = false;
+
+ //
+ // Case #3: Make sure no two 'try' or handler regions have the same 'last' block (except for mutually protect 'try'
+ // regions). As above, there has to be EH region nesting for this to occur. However, since we need to consider
+ // handlers, there are more cases.
+ //
+ // There are four cases to consider:
+ // (1) try nested in try
+ // (2) handler nested in try
+ // (3) try nested in handler
+ // (4) handler nested in handler
+ //
+ // Note that, before funclet generation, it would be unusual, though legal IL, for a 'try' to come at the end
+ // of an EH region (either 'try' or handler region), since that implies that its corresponding handler precedes it.
+ // That will never happen in C#, but is legal in IL.
+ //
+ // Only one of these cases can happen. For example, if we have case (2), where a try/catch is nested in a 'try' and
+ // the nested handler has the same 'last' block as the outer handler, then, due to nesting rules, the nested 'try'
+ // must also be within the outer handler, and obviously cannot share the same 'last' block.
+ //
+
+ for (unsigned XTnum = 0; XTnum < compHndBBtabCount; XTnum++)
+ {
+ EHblkDsc* eh = ehGetDsc(XTnum);
+
+ // Find the EH region 'eh' is most nested within, either 'try' or handler or none.
+ bool outerIsTryRegion;
+ unsigned ehOuterIndex = eh->ebdGetEnclosingRegionIndex(&outerIsTryRegion);
+
+ if (ehOuterIndex != EHblkDsc::NO_ENCLOSING_INDEX)
+ {
+ EHblkDsc* ehInner = eh; // This gets updated as we loop outwards in the EH nesting
+ unsigned ehInnerIndex = XTnum; // This gets updated as we loop outwards in the EH nesting
+ bool innerIsTryRegion;
+
+ EHblkDsc* ehOuter = ehGetDsc(ehOuterIndex);
+
+ // Debugging: say what type of block we're updating.
+ INDEBUG(const char* outerType = ""; const char* innerType = "";)
+
+ // 'insertAfterBlk' is the place we will insert new "normalization" blocks. We don't know yet if we will
+ // insert them after the innermost 'try' or handler's "last" block, so we set it to nullptr. Once we
+ // determine the innermost region that is equivalent, we set this, and then update it incrementally as we
+ // loop outwards.
+ BasicBlock* insertAfterBlk = nullptr;
+
+ bool foundMatchingLastBlock = false;
+
+ // This is set to 'false' for mutual protect regions for which we will not insert a normalization block.
+ bool insertNormalizationBlock = true;
+
+ // Keep track of what the 'try' index and handler index should be for any new normalization block that we
+ // insert. If we have a sequence of alternating nested 'try' and handlers with the same 'last' block, we'll
+ // need to update these as we go. For example:
+ // try { // EH#5
+ // ...
+ // catch { // EH#4
+ // ...
+ // try { // EH#3
+ // ...
+ // catch { // EH#2
+ // ...
+ // try { // EH#1
+ // BB01 // try=1, hnd=2
+ // } } } } } // all the 'last' blocks are the same
+ //
+ // after normalization:
+ //
+ // try { // EH#5
+ // ...
+ // catch { // EH#4
+ // ...
+ // try { // EH#3
+ // ...
+ // catch { // EH#2
+ // ...
+ // try { // EH#1
+ // BB01 // try=1, hnd=2
+ // }
+ // BB02 // try=3, hnd=2
+ // }
+ // BB03 // try=3, hnd=4
+ // }
+ // BB04 // try=5, hnd=4
+ // }
+ // BB05 // try=5, hnd=0 (no enclosing hnd)
+ // }
+ //
+ unsigned nextTryIndex = EHblkDsc::NO_ENCLOSING_INDEX; // Initialization only needed to quell compiler
+ // warnings.
+ unsigned nextHndIndex = EHblkDsc::NO_ENCLOSING_INDEX;
+
+ // We compare the outer region against the inner region's 'try' or handler, determined by the
+ // 'outerIsTryRegion' variable. Once we decide that, we know exactly the 'last' pointer that we will use to
+ // compare against all enclosing EH regions.
+ //
+ // For example, if we have these nested EH regions (omitting some corresponding try/catch clauses for each
+ // nesting level):
+ //
+ // try {
+ // ...
+ // catch {
+ // ...
+ // try {
+ // } } } // all the 'last' blocks are the same
+ //
+ // then we determine that the innermost region we are going to compare against is the 'try' region. There's
+ // no reason to compare against its handler region for any enclosing region (since it couldn't possibly
+ // share a 'last' block with the enclosing region). However, there's no harm, either (and it simplifies
+ // the code for the first set of comparisons to be the same as subsequent, more enclosing cases).
+ BasicBlock* lastBlockPtrToCompare = nullptr;
+
+ // We need to keep track of the last "mutual protect" region so we can properly not add additional blocks
+ // to the second and subsequent mutual protect try blocks. We can't just keep track of the EH region
+ // pointer, because we're updating the last blocks as we go. So, we need to keep track of the
+ // pre-update 'try' begin/last blocks themselves. These only matter if the "last" blocks that match are
+ // from two (or more) nested 'try' regions.
+ BasicBlock* mutualTryBeg = nullptr;
+ BasicBlock* mutualTryLast = nullptr;
+
+ if (outerIsTryRegion)
+ {
+ nextTryIndex = EHblkDsc::NO_ENCLOSING_INDEX; // unused, since the outer block is a 'try' region.
+
+ // The outer (enclosing) region is a 'try'
+ if (ehOuter->ebdTryLast == ehInner->ebdTryLast)
+ {
+ // Case (1) try nested in try.
+ foundMatchingLastBlock = true;
+ INDEBUG(innerType = "try"; outerType = "try";)
+ insertAfterBlk = ehOuter->ebdTryLast;
+ lastBlockPtrToCompare = insertAfterBlk;
+
+ if (EHblkDsc::ebdIsSameTry(ehOuter, ehInner))
+ {
+ // We can't touch this 'try', since it's mutual protect.
+ CLANG_FORMAT_COMMENT_ANCHOR;
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Mutual protect regions EH#%u and EH#%u; leaving identical 'try' last blocks.\n",
+ ehOuterIndex, ehInnerIndex);
+ }
+#endif // DEBUG
+
+ insertNormalizationBlock = false;
+ }
+ else
+ {
+ nextHndIndex = ehInner->ebdTryLast->hasHndIndex() ? ehInner->ebdTryLast->getHndIndex()
+ : EHblkDsc::NO_ENCLOSING_INDEX;
+ }
+ }
+ else if (ehOuter->ebdTryLast == ehInner->ebdHndLast)
+ {
+ // Case (2) handler nested in try.
+ foundMatchingLastBlock = true;
+ INDEBUG(innerType = "handler"; outerType = "try";)
+ insertAfterBlk = ehOuter->ebdTryLast;
+ lastBlockPtrToCompare = insertAfterBlk;
+
+ assert(ehInner->ebdHndLast->getHndIndex() == ehInnerIndex);
+ nextHndIndex = ehInner->ebdEnclosingHndIndex;
+ }
+ else
+ {
+ // No "last" pointers match!
+ }
+
+ if (foundMatchingLastBlock)
+ {
+ // The outer might be part of a new set of mutual protect regions (if it isn't part of one already).
+ mutualTryBeg = ehOuter->ebdTryBeg;
+ mutualTryLast = ehOuter->ebdTryLast;
+ }
+ }
+ else
+ {
+ nextHndIndex = EHblkDsc::NO_ENCLOSING_INDEX; // unused, since the outer block is a handler region.
+
+ // The outer (enclosing) region is a handler (note that it can't be a filter; there is no nesting
+ // within a filter).
+ if (ehOuter->ebdHndLast == ehInner->ebdTryLast)
+ {
+ // Case (3) try nested in handler.
+ foundMatchingLastBlock = true;
+ INDEBUG(innerType = "try"; outerType = "handler";)
+ insertAfterBlk = ehOuter->ebdHndLast;
+ lastBlockPtrToCompare = insertAfterBlk;
+
+ assert(ehInner->ebdTryLast->getTryIndex() == ehInnerIndex);
+ nextTryIndex = ehInner->ebdEnclosingTryIndex;
+ }
+ else if (ehOuter->ebdHndLast == ehInner->ebdHndLast)
+ {
+ // Case (4) handler nested in handler.
+ foundMatchingLastBlock = true;
+ INDEBUG(innerType = "handler"; outerType = "handler";)
+ insertAfterBlk = ehOuter->ebdHndLast;
+ lastBlockPtrToCompare = insertAfterBlk;
+
+ nextTryIndex = ehInner->ebdTryLast->hasTryIndex() ? ehInner->ebdTryLast->getTryIndex()
+ : EHblkDsc::NO_ENCLOSING_INDEX;
+ }
+ else
+ {
+ // No "last" pointers match!
+ }
+ }
+
+ while (foundMatchingLastBlock)
+ {
+ assert(lastBlockPtrToCompare != nullptr);
+ assert(insertAfterBlk != nullptr);
+ assert(ehOuterIndex != EHblkDsc::NO_ENCLOSING_INDEX);
+ assert(ehOuter != nullptr);
+
+ // Add a normalization block
+
+ if (insertNormalizationBlock)
+ {
+ // Add a new last block for 'ehOuter' that will be outside the EH region with which it encloses and
+ // shares a 'last' pointer
+
+ BasicBlock* newLast = bbNewBasicBlock(BBJ_NONE);
+ assert(insertAfterBlk != nullptr);
+ fgInsertBBafter(insertAfterBlk, newLast);
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("last %s block for EH#%u and last %s block for EH#%u are same block; inserted new "
+ "BB%02u after BB%02u as new last %s block for EH#%u.\n",
+ outerType, ehOuterIndex, innerType, ehInnerIndex, newLast->bbNum, insertAfterBlk->bbNum,
+ outerType, ehOuterIndex);
+ }
+#endif // DEBUG
+
+ if (outerIsTryRegion)
+ {
+ ehOuter->ebdTryLast = newLast;
+ newLast->setTryIndex(ehOuterIndex);
+ if (nextHndIndex == EHblkDsc::NO_ENCLOSING_INDEX)
+ {
+ newLast->clearHndIndex();
+ }
+ else
+ {
+ newLast->setHndIndex(nextHndIndex);
+ }
+ }
+ else
+ {
+ ehOuter->ebdHndLast = newLast;
+ if (nextTryIndex == EHblkDsc::NO_ENCLOSING_INDEX)
+ {
+ newLast->clearTryIndex();
+ }
+ else
+ {
+ newLast->setTryIndex(nextTryIndex);
+ }
+ newLast->setHndIndex(ehOuterIndex);
+ }
+
+ newLast->bbCatchTyp =
+ BBCT_NONE; // bbCatchTyp is only set on the first block of a handler, which is this not
+ newLast->bbCodeOffs = insertAfterBlk->bbCodeOffsEnd;
+ newLast->bbCodeOffsEnd = newLast->bbCodeOffs; // code size = 0. TODO: use BAD_IL_OFFSET instead?
+ newLast->inheritWeight(insertAfterBlk);
+#if FEATURE_STACK_FP_X87
+ newLast->bbFPStateX87 = codeGen->FlatFPAllocFPState(insertAfterBlk->bbFPStateX87);
+#endif // FEATURE_STACK_FP_X87
+
+ newLast->bbFlags |= BBF_INTERNAL;
+
+ // The new block (a fall-through block) is a new predecessor.
+ if (fgCheapPredsValid)
+ {
+ fgAddCheapPred(newLast, insertAfterBlk);
+ }
+
+ // Move the insert pointer. More enclosing equivalent 'last' blocks will be inserted after this.
+ insertAfterBlk = newLast;
+
+ modified = true;
+
+#ifdef DEBUG
+ if (verbose) // Normally this is way too verbose, but it is useful for debugging
+ {
+ printf("*************** fgNormalizeEH() made a change\n");
+ fgDispBasicBlocks();
+ fgDispHandlerTab();
+ }
+#endif // DEBUG
+ }
+
+ // Now find the next outer enclosing EH region and see if it also shares the last block.
+ foundMatchingLastBlock = false; // assume nothing will match
+ ehInner = ehOuter;
+ ehInnerIndex = ehOuterIndex;
+ innerIsTryRegion = outerIsTryRegion;
+
+ ehOuterIndex =
+ ehOuter->ebdGetEnclosingRegionIndex(&outerIsTryRegion); // Loop outwards in the EH nesting.
+ if (ehOuterIndex != EHblkDsc::NO_ENCLOSING_INDEX)
+ {
+ // There are more enclosing regions; check for equivalent 'last' pointers.
+
+ INDEBUG(innerType = outerType; outerType = "";)
+
+ ehOuter = ehGetDsc(ehOuterIndex);
+
+ insertNormalizationBlock = true; // assume it's not mutual protect
+
+ if (outerIsTryRegion)
+ {
+ nextTryIndex = EHblkDsc::NO_ENCLOSING_INDEX; // unused, since the outer block is a 'try' region.
+
+ // The outer (enclosing) region is a 'try'
+ if (ehOuter->ebdTryLast == lastBlockPtrToCompare)
+ {
+ // Case (1) and (2): try or handler nested in try.
+ foundMatchingLastBlock = true;
+ INDEBUG(outerType = "try";)
+
+ if (innerIsTryRegion && ehOuter->ebdIsSameTry(mutualTryBeg, mutualTryLast))
+ {
+ // We can't touch this 'try', since it's mutual protect.
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Mutual protect regions EH#%u and EH#%u; leaving identical 'try' last "
+ "blocks.\n",
+ ehOuterIndex, ehInnerIndex);
+ }
+#endif // DEBUG
+
+ insertNormalizationBlock = false;
+
+ // We still need to update the 'last' pointer, in case someone inserted a normalization
+ // block before the start of the mutual protect 'try' region.
+ ehOuter->ebdTryLast = insertAfterBlk;
+ }
+ else
+ {
+ if (innerIsTryRegion)
+ {
+ // Case (1) try nested in try.
+ nextHndIndex = ehInner->ebdTryLast->hasHndIndex()
+ ? ehInner->ebdTryLast->getHndIndex()
+ : EHblkDsc::NO_ENCLOSING_INDEX;
+ }
+ else
+ {
+ // Case (2) handler nested in try.
+ assert(ehInner->ebdHndLast->getHndIndex() == ehInnerIndex);
+ nextHndIndex = ehInner->ebdEnclosingHndIndex;
+ }
+ }
+
+ // The outer might be part of a new set of mutual protect regions (if it isn't part of one
+ // already).
+ mutualTryBeg = ehOuter->ebdTryBeg;
+ mutualTryLast = ehOuter->ebdTryLast;
+ }
+ }
+ else
+ {
+ nextHndIndex =
+ EHblkDsc::NO_ENCLOSING_INDEX; // unused, since the outer block is a handler region.
+
+ // The outer (enclosing) region is a handler (note that it can't be a filter; there is no
+ // nesting within a filter).
+ if (ehOuter->ebdHndLast == lastBlockPtrToCompare)
+ {
+ // Case (3) and (4): try nested in try or handler.
+ foundMatchingLastBlock = true;
+ INDEBUG(outerType = "handler";)
+
+ if (innerIsTryRegion)
+ {
+ // Case (3) try nested in handler.
+ assert(ehInner->ebdTryLast->getTryIndex() == ehInnerIndex);
+ nextTryIndex = ehInner->ebdEnclosingTryIndex;
+ }
+ else
+ {
+ // Case (4) handler nested in handler.
+ nextTryIndex = ehInner->ebdTryLast->hasTryIndex() ? ehInner->ebdTryLast->getTryIndex()
+ : EHblkDsc::NO_ENCLOSING_INDEX;
+ }
+ }
+ }
+ }
+
+ // If we get to here and foundMatchingLastBlock is false, then the inner and outer region don't share
+ // any 'last' blocks, so we're done. Note that we could have a situation like this:
+ //
+ // try4 try3 try2 try1
+ // |---- | | | BB01
+ // | |---- | | BB02
+ // | | |---- | BB03
+ // | | | |----- BB04
+ // | | |----- |----- BB05
+ // |---- |------------------- BB06
+ //
+ // (Thus, try1 & try2 end at BB05, and are nested inside try3 & try4, which both end at BB06.)
+ // In this case, we'll process try1 and try2, then break out. Later, as we iterate through the EH table,
+ // we'll get to try3 and process it and try4.
+
+ } // end while (foundMatchingLastBlock)
+ } // if (ehOuterIndex != EHblkDsc::NO_ENCLOSING_INDEX)
+ } // EH table iteration
+
+ return modified;
+}
+
+/*****************************************************************************/
+#ifdef DEBUG
+
+void Compiler::dispIncomingEHClause(unsigned num, const CORINFO_EH_CLAUSE& clause)
+{
+ printf("EH clause #%u:\n", num);
+ printf(" Flags: 0x%x", clause.Flags);
+
+ // Note: the flags field is kind of weird. It should be compared for equality
+ // to determine the type of clause, even though it looks like a bitfield. In
+ // Particular, CORINFO_EH_CLAUSE_NONE is zero, so you can't use "&" to check it.
+ const DWORD CORINFO_EH_CLAUSE_TYPE_MASK = 0x7;
+ switch (clause.Flags & CORINFO_EH_CLAUSE_TYPE_MASK)
+ {
+ case CORINFO_EH_CLAUSE_NONE:
+ printf(" (catch)");
+ break;
+ case CORINFO_EH_CLAUSE_FILTER:
+ printf(" (filter)");
+ break;
+ case CORINFO_EH_CLAUSE_FINALLY:
+ printf(" (finally)");
+ break;
+ case CORINFO_EH_CLAUSE_FAULT:
+ printf(" (fault)");
+ break;
+ default:
+ printf(" (UNKNOWN type %u!)", clause.Flags & CORINFO_EH_CLAUSE_TYPE_MASK);
+ break;
+ }
+ if (clause.Flags & ~CORINFO_EH_CLAUSE_TYPE_MASK)
+ {
+ printf(" (extra unknown bits: 0x%x)", clause.Flags & ~CORINFO_EH_CLAUSE_TYPE_MASK);
+ }
+ printf("\n");
+
+ printf(" TryOffset: 0x%x\n", clause.TryOffset);
+ printf(" TryLength: 0x%x\n", clause.TryLength);
+ printf(" HandlerOffset: 0x%x\n", clause.HandlerOffset);
+ printf(" HandlerLength: 0x%x\n", clause.HandlerLength);
+ if (clause.Flags & CORINFO_EH_CLAUSE_FILTER)
+ {
+ printf(" FilterOffset: 0x%x\n", clause.FilterOffset);
+ }
+ else
+ {
+ printf(" ClassToken: 0x%x\n", clause.ClassToken);
+ }
+}
+
+void Compiler::dispOutgoingEHClause(unsigned num, const CORINFO_EH_CLAUSE& clause)
+{
+ if (opts.dspDiffable)
+ {
+ /* (( brace matching editor workaround to compensate for the following line */
+ printf("EH#%u: try [%s..%s) handled by [%s..%s) ", num, genEmitter->emitOffsetToLabel(clause.TryOffset),
+ genEmitter->emitOffsetToLabel(clause.TryLength), genEmitter->emitOffsetToLabel(clause.HandlerOffset),
+ genEmitter->emitOffsetToLabel(clause.HandlerLength));
+ }
+ else
+ {
+ /* (( brace matching editor workaround to compensate for the following line */
+ printf("EH#%u: try [%04X..%04X) handled by [%04X..%04X) ", num, dspOffset(clause.TryOffset),
+ dspOffset(clause.TryLength), dspOffset(clause.HandlerOffset), dspOffset(clause.HandlerLength));
+ }
+
+ // Note: the flags field is kind of weird. It should be compared for equality
+ // to determine the type of clause, even though it looks like a bitfield. In
+ // Particular, CORINFO_EH_CLAUSE_NONE is zero, so you can "&" to check it.
+ // You do need to mask off the bits, though, because COR_ILEXCEPTION_CLAUSE_DUPLICATED
+ // is and'ed in.
+ const DWORD CORINFO_EH_CLAUSE_TYPE_MASK = 0x7;
+ switch (clause.Flags & CORINFO_EH_CLAUSE_TYPE_MASK)
+ {
+ case CORINFO_EH_CLAUSE_NONE:
+ printf("(class: %04X)", clause.ClassToken);
+ break;
+ case CORINFO_EH_CLAUSE_FILTER:
+ if (opts.dspDiffable)
+ {
+ /* ( brace matching editor workaround to compensate for the following line */
+ printf("filter at [%s..%s)", genEmitter->emitOffsetToLabel(clause.ClassToken),
+ genEmitter->emitOffsetToLabel(clause.HandlerOffset));
+ }
+ else
+ {
+ /* ( brace matching editor workaround to compensate for the following line */
+ printf("filter at [%04X..%04X)", dspOffset(clause.ClassToken), dspOffset(clause.HandlerOffset));
+ }
+ break;
+ case CORINFO_EH_CLAUSE_FINALLY:
+ printf("(finally)");
+ break;
+ case CORINFO_EH_CLAUSE_FAULT:
+ printf("(fault)");
+ break;
+ default:
+ printf("(UNKNOWN type %u!)", clause.Flags & CORINFO_EH_CLAUSE_TYPE_MASK);
+ assert(!"unknown type");
+ break;
+ }
+
+ if ((clause.TryOffset == clause.TryLength) && (clause.TryOffset == clause.HandlerOffset) &&
+ ((clause.Flags & (COR_ILEXCEPTION_CLAUSE_DUPLICATED | COR_ILEXCEPTION_CLAUSE_FINALLY)) ==
+ (COR_ILEXCEPTION_CLAUSE_DUPLICATED | COR_ILEXCEPTION_CLAUSE_FINALLY)))
+ {
+ printf(" cloned finally");
+ }
+ else if (clause.Flags & COR_ILEXCEPTION_CLAUSE_DUPLICATED)
+ {
+ printf(" duplicated");
+ }
+ printf("\n");
+}
+
+/*****************************************************************************/
+
+void Compiler::fgVerifyHandlerTab()
+{
+ if (compIsForInlining())
+ {
+ // We don't inline functions with EH. Don't bother verifying the EH table in the inlinee Compiler.
+ return;
+ }
+
+ if (compHndBBtabCount == 0)
+ {
+ return;
+ }
+
+ // Did we do the normalization that prevents the first block of a handler from being a 'try' block (case 1)?
+ bool handlerBegIsTryBegNormalizationDone = fgNormalizeEHDone;
+
+ // Did we do the normalization that prevents multiple EH regions (namely, 'try' blocks) from starting on the same
+ // block (case 2)?
+ bool multipleBegBlockNormalizationDone = fgNormalizeEHDone;
+
+ // Did we do the normalization that prevents multiple EH regions ('try' or handler blocks) from ending on the same
+ // block (case 3)?
+ bool multipleLastBlockNormalizationDone = false; // Currently disabled
+
+ assert(compHndBBtabCount <= compHndBBtabAllocCount);
+
+ unsigned XTnum;
+ EHblkDsc* HBtab;
+
+ for (XTnum = 0, HBtab = compHndBBtab; XTnum < compHndBBtabCount; XTnum++, HBtab++)
+ {
+ assert(HBtab->ebdTryBeg != nullptr);
+ assert(HBtab->ebdTryLast != nullptr);
+ assert(HBtab->ebdHndBeg != nullptr);
+ assert(HBtab->ebdHndLast != nullptr);
+
+ assert(HBtab->ebdTryBeg->bbFlags & BBF_TRY_BEG);
+ assert(HBtab->ebdTryBeg->bbFlags & BBF_DONT_REMOVE);
+ assert(HBtab->ebdTryBeg->bbFlags & BBF_HAS_LABEL);
+
+ assert(HBtab->ebdHndBeg->bbFlags & BBF_DONT_REMOVE);
+ assert(HBtab->ebdHndBeg->bbFlags & BBF_HAS_LABEL);
+
+ assert((HBtab->ebdTryBeg->bbFlags & BBF_REMOVED) == 0);
+ assert((HBtab->ebdTryLast->bbFlags & BBF_REMOVED) == 0);
+ assert((HBtab->ebdHndBeg->bbFlags & BBF_REMOVED) == 0);
+ assert((HBtab->ebdHndLast->bbFlags & BBF_REMOVED) == 0);
+
+ if (HBtab->HasFilter())
+ {
+ assert(HBtab->ebdFilter != nullptr);
+ assert(HBtab->ebdFilter->bbFlags & BBF_DONT_REMOVE);
+ assert((HBtab->ebdFilter->bbFlags & BBF_REMOVED) == 0);
+ }
+
+#if FEATURE_EH_FUNCLETS
+ if (fgFuncletsCreated)
+ {
+ assert(HBtab->ebdHndBeg->bbFlags & BBF_FUNCLET_BEG);
+
+ if (HBtab->HasFilter())
+ {
+ assert(HBtab->ebdFilter->bbFlags & BBF_FUNCLET_BEG);
+ }
+ }
+#endif // FEATURE_EH_FUNCLETS
+ }
+
+ // I want to assert things about the relative ordering of blocks in the block list using
+ // block number, but I don't want to renumber the basic blocks, which might cause a difference
+ // between debug and non-debug code paths. So, create a renumbered block mapping: map the
+ // existing block number to a renumbered block number that is ordered by block list order.
+
+ unsigned bbNumMax = compIsForInlining() ? impInlineInfo->InlinerCompiler->fgBBNumMax : fgBBNumMax;
+
+ // blockNumMap[old block number] => new block number
+ size_t blockNumBytes = (bbNumMax + 1) * sizeof(unsigned);
+ unsigned* blockNumMap = (unsigned*)_alloca(blockNumBytes);
+ memset(blockNumMap, 0, blockNumBytes);
+
+ BasicBlock* block;
+ unsigned newBBnum = 1;
+ for (block = fgFirstBB; block != nullptr; block = block->bbNext)
+ {
+ assert((block->bbFlags & BBF_REMOVED) == 0);
+ assert(1 <= block->bbNum && block->bbNum <= bbNumMax);
+ assert(blockNumMap[block->bbNum] == 0); // If this fails, we have two blocks with the same block number.
+ blockNumMap[block->bbNum] = newBBnum++;
+ }
+// Note that there may be some blockNumMap[x] == 0, for a block number 'x' that has been deleted, if the blocks
+// haven't been renumbered since the deletion.
+
+#if 0 // Useful for debugging, but don't want to put this in the dump all the time
+ if (verbose)
+ {
+ printf("fgVerifyHandlerTab block number map: BB current => BB new\n");
+ for (unsigned i = 0; i <= bbNumMax; i++)
+ {
+ if (blockNumMap[i] != 0)
+ {
+ printf("BB%02u => BB%02u\n", i, blockNumMap[i]);
+ }
+ }
+ }
+#endif
+
+ // To verify that bbCatchTyp is set properly on all blocks, and that some BBF_* flags are only set on the first
+ // block
+ // of 'try' or handlers, create two bool arrays indexed by block number: one for the set of blocks that are the
+ // beginning
+ // blocks of 'try' regions, and one for blocks that are the beginning of handlers (including filters). Note that
+ // since
+ // this checking function runs before EH normalization, we have to handle the case where blocks can be both the
+ // beginning
+ // of a 'try' as well as the beginning of a handler. After we've iterated over the EH table, loop
+ // over all blocks and verify that only handler begin blocks have bbCatchTyp == BBCT_NONE, and some other things.
+
+ size_t blockBoolSetBytes = (bbNumMax + 1) * sizeof(bool);
+ bool* blockTryBegSet = (bool*)_alloca(blockBoolSetBytes);
+ bool* blockHndBegSet = (bool*)_alloca(blockBoolSetBytes);
+ for (unsigned i = 0; i <= bbNumMax; i++)
+ {
+ blockTryBegSet[i] = false;
+ blockHndBegSet[i] = false;
+ }
+
+#if FEATURE_EH_FUNCLETS
+ bool isLegalFirstFunclet = false;
+ unsigned bbNumFirstFunclet = 0;
+
+ if (fgFuncletsCreated)
+ {
+ // Assert some things about the "first funclet block" pointer.
+ assert(fgFirstFuncletBB != nullptr);
+ assert((fgFirstFuncletBB->bbFlags & BBF_REMOVED) == 0);
+ bbNumFirstFunclet = blockNumMap[fgFirstFuncletBB->bbNum];
+ assert(bbNumFirstFunclet != 0);
+ }
+ else
+ {
+ assert(fgFirstFuncletBB == nullptr);
+ }
+#endif // FEATURE_EH_FUNCLETS
+
+ for (XTnum = 0, HBtab = compHndBBtab; XTnum < compHndBBtabCount; XTnum++, HBtab++)
+ {
+ unsigned bbNumTryBeg = blockNumMap[HBtab->ebdTryBeg->bbNum];
+ unsigned bbNumTryLast = blockNumMap[HBtab->ebdTryLast->bbNum];
+ unsigned bbNumHndBeg = blockNumMap[HBtab->ebdHndBeg->bbNum];
+ unsigned bbNumHndLast = blockNumMap[HBtab->ebdHndLast->bbNum];
+ unsigned bbNumFilter = 0; // This should never get used except under "if (HBtab->HasFilter())"
+ if (HBtab->HasFilter())
+ {
+ bbNumFilter = blockNumMap[HBtab->ebdFilter->bbNum];
+ }
+
+ // Assert that the EH blocks are in the main block list
+ assert(bbNumTryBeg != 0);
+ assert(bbNumTryLast != 0);
+ assert(bbNumHndBeg != 0);
+ assert(bbNumHndLast != 0);
+ if (HBtab->HasFilter())
+ {
+ assert(bbNumFilter != 0);
+ }
+
+ // Check relative ordering of the 'beg' and 'last' blocks. Note that in IL (and in our initial block list)
+ // there is no required ordering between the 'try' and handler regions: the handler might come first!
+ // After funclets have been created, all the handler blocks come in sequence at the end of the
+ // function (this is checked below, with checks for the first funclet block). Note that a handler
+ // might contain a nested 'try', which will also then be in the "funclet region".
+ // Also, the 'try' and handler regions do not need to be adjacent.
+ assert(bbNumTryBeg <= bbNumTryLast);
+ assert(bbNumHndBeg <= bbNumHndLast);
+ if (HBtab->HasFilter())
+ {
+ // Since the filter block must be different from the handler, this condition is "<", not "<=".
+ assert(bbNumFilter < bbNumHndBeg);
+ }
+
+ // The EH regions are disjoint: the handler (including the filter, if applicable) is strictly before or after
+ // the 'try'.
+ if (HBtab->HasFilter())
+ {
+ assert((bbNumHndLast < bbNumTryBeg) || (bbNumTryLast < bbNumFilter));
+ }
+ else
+ {
+ assert((bbNumHndLast < bbNumTryBeg) || (bbNumTryLast < bbNumHndBeg));
+ }
+
+#if FEATURE_EH_FUNCLETS
+ // If funclets have been created, check the first funclet block. The first funclet block must be the
+ // first block of a filter or handler. All filter/handler blocks must come after it.
+ // Note that 'try' blocks might come either before or after it. If after, they will be nested within
+ // a handler. If before, they might be nested within a try, but not within a handler.
+
+ if (fgFuncletsCreated)
+ {
+ if (bbNumTryLast < bbNumFirstFunclet)
+ {
+ // This EH region can't be nested in a handler, or else it would be in the funclet region.
+ assert(HBtab->ebdEnclosingHndIndex == EHblkDsc::NO_ENCLOSING_INDEX);
+ }
+ else
+ {
+ // The last block of the 'try' is in the funclet region; make sure the whole thing is.
+ if (multipleBegBlockNormalizationDone)
+ {
+ assert(bbNumTryBeg > bbNumFirstFunclet); // ">" because a 'try' can't be the first block of a
+ // handler (by EH normalization).
+ }
+ else
+ {
+ assert(bbNumTryBeg >= bbNumFirstFunclet);
+ }
+
+ // This EH region must be nested in a handler.
+ assert(HBtab->ebdEnclosingHndIndex != EHblkDsc::NO_ENCLOSING_INDEX);
+ }
+
+ if (HBtab->HasFilter())
+ {
+ assert(bbNumFirstFunclet <= bbNumFilter);
+ if (fgFirstFuncletBB == HBtab->ebdFilter)
+ {
+ assert(!isLegalFirstFunclet); // We can't have already found a matching block for the first funclet.
+ isLegalFirstFunclet = true;
+ }
+ }
+ else
+ {
+ assert(bbNumFirstFunclet <= bbNumHndBeg);
+ if (fgFirstFuncletBB == HBtab->ebdHndBeg)
+ {
+ assert(!isLegalFirstFunclet); // We can't have already found a matching block for the first funclet.
+ isLegalFirstFunclet = true;
+ }
+ }
+ }
+#endif // FEATURE_EH_FUNCLETS
+
+ // Check the 'try' region nesting, using ebdEnclosingTryIndex.
+ // Only check one level of nesting, since we'll check the outer EH region (and its nesting) when we get to it
+ // later.
+
+ if (HBtab->ebdEnclosingTryIndex != EHblkDsc::NO_ENCLOSING_INDEX)
+ {
+ assert(HBtab->ebdEnclosingTryIndex > XTnum); // The enclosing region must come after this one in the table
+ EHblkDsc* HBtabOuter = ehGetDsc(HBtab->ebdEnclosingTryIndex);
+ unsigned bbNumOuterTryBeg = blockNumMap[HBtabOuter->ebdTryBeg->bbNum];
+ unsigned bbNumOuterTryLast = blockNumMap[HBtabOuter->ebdTryLast->bbNum];
+
+ // A few basic asserts (that will also get covered later, when this outer region gets handled).
+ assert(bbNumOuterTryBeg != 0);
+ assert(bbNumOuterTryLast != 0);
+ assert(bbNumOuterTryBeg <= bbNumOuterTryLast);
+
+ if (!EHblkDsc::ebdIsSameTry(HBtab, HBtabOuter))
+ {
+ // If it's not a mutually protect region, then the outer 'try' must completely lexically contain all the
+ // blocks in the nested EH region. However, if funclets have been created, this is no longer true, since
+ // this 'try' might be in a handler that is pulled out to the funclet region, while the outer 'try'
+ // remains in the main function region.
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if FEATURE_EH_FUNCLETS
+ if (fgFuncletsCreated)
+ {
+ // If both the 'try' region and the outer 'try' region are in the main function area, then we can
+ // do the normal nesting check. Otherwise, it's harder to find a useful assert to make about their
+ // relationship.
+ if ((bbNumTryLast < bbNumFirstFunclet) && (bbNumOuterTryLast < bbNumFirstFunclet))
+ {
+ if (multipleBegBlockNormalizationDone)
+ {
+ assert(bbNumOuterTryBeg < bbNumTryBeg); // Two 'try' regions can't start at the same
+ // block (by EH normalization).
+ }
+ else
+ {
+ assert(bbNumOuterTryBeg <= bbNumTryBeg);
+ }
+ if (multipleLastBlockNormalizationDone)
+ {
+ assert(bbNumTryLast < bbNumOuterTryLast); // Two 'try' regions can't end at the same block
+ //(by EH normalization).
+ }
+ else
+ {
+ assert(bbNumTryLast <= bbNumOuterTryLast);
+ }
+ }
+
+ // With funclets, all we can say about the handler blocks is that they are disjoint from the
+ // enclosing try.
+ assert((bbNumHndLast < bbNumOuterTryBeg) || (bbNumOuterTryLast < bbNumHndBeg));
+ }
+ else
+#endif // FEATURE_EH_FUNCLETS
+ {
+ if (multipleBegBlockNormalizationDone)
+ {
+ assert(bbNumOuterTryBeg < bbNumTryBeg); // Two 'try' regions can't start at the same block
+ // (by EH normalization).
+ }
+ else
+ {
+ assert(bbNumOuterTryBeg <= bbNumTryBeg);
+ }
+ assert(bbNumOuterTryBeg < bbNumHndBeg); // An inner handler can never start at the same
+ // block as an outer 'try' (by IL rules).
+ if (multipleLastBlockNormalizationDone)
+ {
+ // An inner EH region can't share a 'last' block with the outer 'try' (by EH normalization).
+ assert(bbNumTryLast < bbNumOuterTryLast);
+ assert(bbNumHndLast < bbNumOuterTryLast);
+ }
+ else
+ {
+ assert(bbNumTryLast <= bbNumOuterTryLast);
+ assert(bbNumHndLast <= bbNumOuterTryLast);
+ }
+ }
+ }
+ }
+
+ // Check the handler region nesting, using ebdEnclosingHndIndex.
+ // Only check one level of nesting, since we'll check the outer EH region (and its nesting) when we get to it
+ // later.
+
+ if (HBtab->ebdEnclosingHndIndex != EHblkDsc::NO_ENCLOSING_INDEX)
+ {
+ assert(HBtab->ebdEnclosingHndIndex > XTnum); // The enclosing region must come after this one in the table
+ EHblkDsc* HBtabOuter = ehGetDsc(HBtab->ebdEnclosingHndIndex);
+ unsigned bbNumOuterHndBeg = blockNumMap[HBtabOuter->ebdHndBeg->bbNum];
+ unsigned bbNumOuterHndLast = blockNumMap[HBtabOuter->ebdHndLast->bbNum];
+
+ // A few basic asserts (that will also get covered later, when this outer regions gets handled).
+ assert(bbNumOuterHndBeg != 0);
+ assert(bbNumOuterHndLast != 0);
+ assert(bbNumOuterHndBeg <= bbNumOuterHndLast);
+
+// The outer handler must completely contain all the blocks in the EH region nested within it. However, if
+// funclets have been created, it's harder to make any relationship asserts about the order of nested
+// handlers, which also have been made into funclets.
+
+#if FEATURE_EH_FUNCLETS
+ if (fgFuncletsCreated)
+ {
+ if (handlerBegIsTryBegNormalizationDone)
+ {
+ assert(bbNumOuterHndBeg < bbNumTryBeg); // An inner 'try' can't start at the same block as an
+ // outer handler (by EH normalization).
+ }
+ else
+ {
+ assert(bbNumOuterHndBeg <= bbNumTryBeg);
+ }
+ if (multipleLastBlockNormalizationDone)
+ {
+ assert(bbNumTryLast < bbNumOuterHndLast); // An inner 'try' can't end at the same block as an
+ // outer handler (by EH normalization).
+ }
+ else
+ {
+ assert(bbNumTryLast <= bbNumOuterHndLast);
+ }
+
+ // With funclets, all we can say about the handler blocks is that they are disjoint from the enclosing
+ // handler.
+ assert((bbNumHndLast < bbNumOuterHndBeg) || (bbNumOuterHndLast < bbNumHndBeg));
+ }
+ else
+#endif // FEATURE_EH_FUNCLETS
+ {
+ if (handlerBegIsTryBegNormalizationDone)
+ {
+ assert(bbNumOuterHndBeg < bbNumTryBeg); // An inner 'try' can't start at the same block as an
+ // outer handler (by EH normalization).
+ }
+ else
+ {
+ assert(bbNumOuterHndBeg <= bbNumTryBeg);
+ }
+ assert(bbNumOuterHndBeg < bbNumHndBeg); // An inner handler can never start at the same block
+ // as an outer handler (by IL rules).
+ if (multipleLastBlockNormalizationDone)
+ {
+ // An inner EH region can't share a 'last' block with the outer handler (by EH normalization).
+ assert(bbNumTryLast < bbNumOuterHndLast);
+ assert(bbNumHndLast < bbNumOuterHndLast);
+ }
+ else
+ {
+ assert(bbNumTryLast <= bbNumOuterHndLast);
+ assert(bbNumHndLast <= bbNumOuterHndLast);
+ }
+ }
+ }
+
+ // Set up blockTryBegSet and blockHndBegSet.
+ // We might want to have this assert:
+ // if (fgNormalizeEHDone) assert(!blockTryBegSet[HBtab->ebdTryBeg->bbNum]);
+ // But we can't, because if we have mutually-protect 'try' regions, we'll see exactly the same tryBeg twice
+ // (or more).
+ blockTryBegSet[HBtab->ebdTryBeg->bbNum] = true;
+ assert(!blockHndBegSet[HBtab->ebdHndBeg->bbNum]);
+ blockHndBegSet[HBtab->ebdHndBeg->bbNum] = true;
+
+ if (HBtab->HasFilter())
+ {
+ assert(HBtab->ebdFilter->bbCatchTyp == BBCT_FILTER);
+ assert(!blockHndBegSet[HBtab->ebdFilter->bbNum]);
+ blockHndBegSet[HBtab->ebdFilter->bbNum] = true;
+ }
+
+ // Check the block bbCatchTyp for this EH region's filter and handler.
+
+ if (HBtab->HasFilter())
+ {
+ assert(HBtab->ebdHndBeg->bbCatchTyp == BBCT_FILTER_HANDLER);
+ }
+ else if (HBtab->HasCatchHandler())
+ {
+ assert((HBtab->ebdHndBeg->bbCatchTyp != BBCT_NONE) && (HBtab->ebdHndBeg->bbCatchTyp != BBCT_FAULT) &&
+ (HBtab->ebdHndBeg->bbCatchTyp != BBCT_FINALLY) && (HBtab->ebdHndBeg->bbCatchTyp != BBCT_FILTER) &&
+ (HBtab->ebdHndBeg->bbCatchTyp != BBCT_FILTER_HANDLER));
+ }
+ else if (HBtab->HasFaultHandler())
+ {
+ assert(HBtab->ebdHndBeg->bbCatchTyp == BBCT_FAULT);
+ }
+ else if (HBtab->HasFinallyHandler())
+ {
+ assert(HBtab->ebdHndBeg->bbCatchTyp == BBCT_FINALLY);
+ }
+ }
+
+#if FEATURE_EH_FUNCLETS
+ assert(!fgFuncletsCreated || isLegalFirstFunclet);
+#endif // FEATURE_EH_FUNCLETS
+
+ // Figure out what 'try' and handler index each basic block should have,
+ // and check the blocks against that. This depends on the more nested EH
+ // clauses appearing first. For duplicate clauses, we use the duplicate
+ // clause 'try' region to set the try index, since a handler that has
+ // been pulled out of an enclosing 'try' wouldn't have had its try index
+ // otherwise set. The duplicate clause handler is truly a duplicate of
+ // a previously processed handler, so we ignore it.
+
+ size_t blockIndexBytes = (bbNumMax + 1) * sizeof(unsigned short);
+ unsigned short* blockTryIndex = (unsigned short*)_alloca(blockIndexBytes);
+ unsigned short* blockHndIndex = (unsigned short*)_alloca(blockIndexBytes);
+ memset(blockTryIndex, 0, blockIndexBytes);
+ memset(blockHndIndex, 0, blockIndexBytes);
+
+ for (XTnum = 0, HBtab = compHndBBtab; XTnum < compHndBBtabCount; XTnum++, HBtab++)
+ {
+ BasicBlock* blockEnd;
+
+ for (block = HBtab->ebdTryBeg, blockEnd = HBtab->ebdTryLast->bbNext; block != blockEnd; block = block->bbNext)
+ {
+ if (blockTryIndex[block->bbNum] == 0)
+ {
+ blockTryIndex[block->bbNum] = (unsigned short)(XTnum + 1);
+ }
+ }
+
+ for (block = (HBtab->HasFilter() ? HBtab->ebdFilter : HBtab->ebdHndBeg), blockEnd = HBtab->ebdHndLast->bbNext;
+ block != blockEnd; block = block->bbNext)
+ {
+ if (blockHndIndex[block->bbNum] == 0)
+ {
+ blockHndIndex[block->bbNum] = (unsigned short)(XTnum + 1);
+ }
+ }
+ }
+
+#if FEATURE_EH_FUNCLETS
+ if (fgFuncletsCreated)
+ {
+ // Mark all the funclet 'try' indices correctly, since they do not exist in the linear 'try' region that
+ // we looped over above. This is similar to duplicate clause logic, but we only need to look at the most
+ // nested enclosing try index, not the entire set of enclosing try indices, since that is what we store
+ // on the block.
+ for (XTnum = 0, HBtab = compHndBBtab; XTnum < compHndBBtabCount; XTnum++, HBtab++)
+ {
+ unsigned enclosingTryIndex = ehTrueEnclosingTryIndexIL(XTnum); // find the true enclosing try index,
+ // ignoring 'mutual protect' trys
+ if (enclosingTryIndex != EHblkDsc::NO_ENCLOSING_INDEX)
+ {
+ // The handler funclet for 'XTnum' has a try index of 'enclosingTryIndex' (at least, the parts of the
+ // funclet that don't already have a more nested 'try' index because a 'try' is nested within the
+ // handler).
+
+ BasicBlock* blockEnd;
+ for (block = (HBtab->HasFilter() ? HBtab->ebdFilter : HBtab->ebdHndBeg),
+ blockEnd = HBtab->ebdHndLast->bbNext;
+ block != blockEnd; block = block->bbNext)
+ {
+ if (blockTryIndex[block->bbNum] == 0)
+ {
+ blockTryIndex[block->bbNum] = (unsigned short)(enclosingTryIndex + 1);
+ }
+ }
+ }
+ }
+ }
+#endif // FEATURE_EH_FUNCLETS
+
+ // Make sure that all blocks have the right index, including those blocks that should have zero (no EH region).
+ for (block = fgFirstBB; block != nullptr; block = block->bbNext)
+ {
+ assert(block->bbTryIndex == blockTryIndex[block->bbNum]);
+ assert(block->bbHndIndex == blockHndIndex[block->bbNum]);
+
+ // Also, since we're walking the blocks, check that all blocks we didn't mark as EH handler 'begin' blocks
+ // already have bbCatchTyp set properly.
+ if (!blockHndBegSet[block->bbNum])
+ {
+ assert(block->bbCatchTyp == BBCT_NONE);
+
+#if FEATURE_EH_FUNCLETS
+ if (fgFuncletsCreated)
+ {
+ // Make sure blocks that aren't the first block of a funclet do not have the BBF_FUNCLET_BEG flag set.
+ assert((block->bbFlags & BBF_FUNCLET_BEG) == 0);
+ }
+#endif // FEATURE_EH_FUNCLETS
+ }
+
+ // Only the first block of 'try' regions should have BBF_TRY_BEG set.
+ if (!blockTryBegSet[block->bbNum])
+ {
+ assert((block->bbFlags & BBF_TRY_BEG) == 0);
+ }
+ }
+}
+
+void Compiler::fgDispHandlerTab()
+{
+ printf("\n*************** Exception Handling table");
+
+ if (compHndBBtabCount == 0)
+ {
+ printf(" is empty\n");
+ return;
+ }
+
+ printf("\nindex ");
+#if !FEATURE_EH_FUNCLETS
+ printf("nest, ");
+#endif // !FEATURE_EH_FUNCLETS
+ printf("eTry, eHnd\n");
+
+ unsigned XTnum;
+ EHblkDsc* HBtab;
+
+ for (XTnum = 0, HBtab = compHndBBtab; XTnum < compHndBBtabCount; XTnum++, HBtab++)
+ {
+ HBtab->DispEntry(XTnum);
+ }
+}
+
+#endif // DEBUG
+/*****************************************************************************/
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX "Compiler" functions: EH tree verification XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+/*****************************************************************************
+ * The following code checks the following rules for the EH table:
+ * 1. Overlapping of try blocks not allowed.
+ * 2. Handler blocks cannot be shared between different try blocks.
+ * 3. Try blocks with Finally or Fault blocks cannot have other handlers.
+ * 4. If block A contains block B, A should also contain B's try/filter/handler.
+ * 5. A block cannot contain it's related try/filter/handler.
+ * 6. Nested block must appear before containing block
+ *
+ */
+
+void Compiler::verInitEHTree(unsigned numEHClauses)
+{
+ ehnNext = new (this, CMK_BasicBlock) EHNodeDsc[numEHClauses * 3];
+ ehnTree = nullptr;
+}
+
+/* Inserts the try, handler and filter (optional) clause information in a tree structure
+ * in order to catch incorrect eh formatting (e.g. illegal overlaps, incorrect order)
+ */
+
+void Compiler::verInsertEhNode(CORINFO_EH_CLAUSE* clause, EHblkDsc* handlerTab)
+{
+ EHNodeDsc* tryNode = ehnNext++;
+ EHNodeDsc* handlerNode = ehnNext++;
+ EHNodeDsc* filterNode = nullptr; // optional
+
+ tryNode->ehnSetTryNodeType();
+ tryNode->ehnStartOffset = clause->TryOffset;
+ tryNode->ehnEndOffset = clause->TryOffset + clause->TryLength - 1;
+ tryNode->ehnHandlerNode = handlerNode;
+
+ if (clause->Flags & CORINFO_EH_CLAUSE_FINALLY)
+ {
+ handlerNode->ehnSetFinallyNodeType();
+ }
+ else if (clause->Flags & CORINFO_EH_CLAUSE_FAULT)
+ {
+ handlerNode->ehnSetFaultNodeType();
+ }
+ else
+ {
+ handlerNode->ehnSetHandlerNodeType();
+ }
+
+ handlerNode->ehnStartOffset = clause->HandlerOffset;
+ handlerNode->ehnEndOffset = clause->HandlerOffset + clause->HandlerLength - 1;
+ handlerNode->ehnTryNode = tryNode;
+
+ if (clause->Flags & CORINFO_EH_CLAUSE_FILTER)
+ {
+ filterNode = ehnNext++;
+ filterNode->ehnStartOffset = clause->FilterOffset;
+ BasicBlock* blk = handlerTab->BBFilterLast();
+ filterNode->ehnEndOffset = blk->bbCodeOffsEnd - 1;
+
+ noway_assert(filterNode->ehnEndOffset != 0);
+ filterNode->ehnSetFilterNodeType();
+ filterNode->ehnTryNode = tryNode;
+ tryNode->ehnFilterNode = filterNode;
+ }
+
+ verInsertEhNodeInTree(&ehnTree, tryNode);
+ verInsertEhNodeInTree(&ehnTree, handlerNode);
+ if (filterNode)
+ {
+ verInsertEhNodeInTree(&ehnTree, filterNode);
+ }
+}
+
+/*
+ The root node could be changed by this method.
+
+ node is inserted to
+
+ (a) right of root (root.right <-- node)
+ (b) left of root (node.right <-- root; node becomes root)
+ (c) child of root (root.child <-- node)
+ (d) parent of root (node.child <-- root; node becomes root)
+ (e) equivalent of root (root.equivalent <-- node)
+
+ such that siblings are ordered from left to right
+ child parent relationship and equivalence relationship are not violated
+
+
+ Here is a list of all possible cases
+
+ Case 1 2 3 4 5 6 7 8 9 10 11 12 13
+
+ | | | | |
+ | | | | |
+ .......|.|.|.|..................... [ root start ] .....
+ | | | | | | |
+ | | | | | | |
+ r| | | | | | | |
+ o| | | | | |
+ o| | | | | |
+ t| | | | | |
+ | | | | | | | |
+ | | | | | | |
+ |..........|.|.|.|.....|........|.. [ root end ] ........
+ | | | |
+ | | | | |
+ | | | | |
+
+ |<-- - - - n o d e - - - -->|
+
+
+ Case Operation
+ --------------
+ 1 (b)
+ 2 Error
+ 3 Error
+ 4 (d)
+ 5 (d)
+ 6 (d)
+ 7 Error
+ 8 Error
+ 9 (a)
+ 10 (c)
+ 11 (c)
+ 12 (c)
+ 13 (e)
+
+
+*/
+
+void Compiler::verInsertEhNodeInTree(EHNodeDsc** ppRoot, EHNodeDsc* node)
+{
+ unsigned nStart = node->ehnStartOffset;
+ unsigned nEnd = node->ehnEndOffset;
+
+ if (nStart > nEnd)
+ {
+ BADCODE("start offset greater or equal to end offset");
+ }
+ node->ehnNext = nullptr;
+ node->ehnChild = nullptr;
+ node->ehnEquivalent = nullptr;
+
+ while (TRUE)
+ {
+ if (*ppRoot == nullptr)
+ {
+ *ppRoot = node;
+ break;
+ }
+ unsigned rStart = (*ppRoot)->ehnStartOffset;
+ unsigned rEnd = (*ppRoot)->ehnEndOffset;
+
+ if (nStart < rStart)
+ {
+ // Case 1
+ if (nEnd < rStart)
+ {
+ // Left sibling
+ node->ehnNext = *ppRoot;
+ *ppRoot = node;
+ return;
+ }
+ // Case 2, 3
+ if (nEnd < rEnd)
+ {
+ //[Error]
+ BADCODE("Overlapping try regions");
+ }
+
+ // Case 4, 5
+ //[Parent]
+ verInsertEhNodeParent(ppRoot, node);
+ return;
+ }
+
+ // Cases 6 - 13 (nStart >= rStart)
+
+ if (nEnd > rEnd)
+ { // Case 6, 7, 8, 9
+
+ // Case 9
+ if (nStart > rEnd)
+ {
+ //[RightSibling]
+
+ // Recurse with Root.Sibling as the new root
+ ppRoot = &((*ppRoot)->ehnNext);
+ continue;
+ }
+
+ // Case 6
+ if (nStart == rStart)
+ {
+ //[Parent]
+ if (node->ehnIsTryBlock() || (*ppRoot)->ehnIsTryBlock())
+ {
+ verInsertEhNodeParent(ppRoot, node);
+ return;
+ }
+
+ // non try blocks are not allowed to start at the same offset
+ BADCODE("Handlers start at the same offset");
+ }
+
+ // Case 7, 8
+ BADCODE("Overlapping try regions");
+ }
+
+ // Case 10-13 (nStart >= rStart && nEnd <= rEnd)
+ if ((nStart != rStart) || (nEnd != rEnd))
+ { // Cases 10,11,12
+ //[Child]
+
+ if ((*ppRoot)->ehnIsTryBlock())
+ {
+ BADCODE("Inner try appears after outer try in exception handling table");
+ }
+ else
+ {
+ // We have an EH clause nested within a handler, but the parent
+ // handler clause came first in the table. The rest of the compiler
+ // doesn't expect this, so sort the EH table.
+
+ fgNeedToSortEHTable = true;
+
+ // Case 12 (nStart == rStart)
+ // non try blocks are not allowed to start at the same offset
+ if ((nStart == rStart) && !node->ehnIsTryBlock())
+ {
+ BADCODE("Handlers start at the same offset");
+ }
+
+ // check this!
+ ppRoot = &((*ppRoot)->ehnChild);
+ continue;
+ }
+ }
+
+ // Case 13
+ //[Equivalent]
+ if (!node->ehnIsTryBlock() && !(*ppRoot)->ehnIsTryBlock())
+ {
+ BADCODE("Handlers cannot be shared");
+ }
+
+ if (!node->ehnIsTryBlock() || !(*ppRoot)->ehnIsTryBlock())
+ {
+ // Equivalent is only allowed for try bodies
+ // If one is a handler, this means the nesting is wrong
+ BADCODE("Handler and try with the same offset");
+ }
+
+ node->ehnEquivalent = node->ehnNext = *ppRoot;
+
+ // check that the corresponding handler is either a catch handler
+ // or a filter
+ if (node->ehnHandlerNode->ehnIsFaultBlock() || node->ehnHandlerNode->ehnIsFinallyBlock() ||
+ (*ppRoot)->ehnHandlerNode->ehnIsFaultBlock() || (*ppRoot)->ehnHandlerNode->ehnIsFinallyBlock())
+ {
+ BADCODE("Try block with multiple non-filter/non-handler blocks");
+ }
+
+ break;
+ }
+}
+
+/**********************************************************************
+ * Make node the parent of *ppRoot. All siblings of *ppRoot that are
+ * fully or partially nested in node remain siblings of *ppRoot
+ */
+
+void Compiler::verInsertEhNodeParent(EHNodeDsc** ppRoot, EHNodeDsc* node)
+{
+ noway_assert(node->ehnNext == nullptr);
+ noway_assert(node->ehnChild == nullptr);
+
+ // Root is nested in Node
+ noway_assert(node->ehnStartOffset <= (*ppRoot)->ehnStartOffset);
+ noway_assert(node->ehnEndOffset >= (*ppRoot)->ehnEndOffset);
+
+ // Root is not the same as Node
+ noway_assert(node->ehnStartOffset != (*ppRoot)->ehnStartOffset || node->ehnEndOffset != (*ppRoot)->ehnEndOffset);
+
+ if (node->ehnIsFilterBlock())
+ {
+ BADCODE("Protected block appearing within filter block");
+ }
+
+ EHNodeDsc* lastChild = nullptr;
+ EHNodeDsc* sibling = (*ppRoot)->ehnNext;
+
+ while (sibling)
+ {
+ // siblings are ordered left to right, largest right.
+ // nodes have a width of at least one.
+ // Hence sibling start will always be after Node start.
+
+ noway_assert(sibling->ehnStartOffset > node->ehnStartOffset); // (1)
+
+ // disjoint
+ if (sibling->ehnStartOffset > node->ehnEndOffset)
+ {
+ break;
+ }
+
+ // partial containment.
+ if (sibling->ehnEndOffset > node->ehnEndOffset) // (2)
+ {
+ BADCODE("Overlapping try regions");
+ }
+ // else full containment (follows from (1) and (2))
+
+ lastChild = sibling;
+ sibling = sibling->ehnNext;
+ }
+
+ // All siblings of Root up to and including lastChild will continue to be
+ // siblings of Root (and children of Node). The node to the right of
+ // lastChild will become the first sibling of Node.
+ //
+
+ if (lastChild)
+ {
+ // Node has more than one child including Root
+
+ node->ehnNext = lastChild->ehnNext;
+ lastChild->ehnNext = nullptr;
+ }
+ else
+ {
+ // Root is the only child of Node
+ node->ehnNext = (*ppRoot)->ehnNext;
+ (*ppRoot)->ehnNext = nullptr;
+ }
+
+ node->ehnChild = *ppRoot;
+ *ppRoot = node;
+}
+
+/*****************************************************************************
+ * Checks the following two conditions:
+ * 1) If block A contains block B, A should also contain B's try/filter/handler.
+ * 2) A block cannot contain its related try/filter/handler.
+ * Both these conditions are checked by making sure that all the blocks for an
+ * exception clause are at the same level.
+ * The algorithm is: for each exception clause, determine the first block and
+ * search through the next links for its corresponding try/handler/filter as the
+ * case may be. If not found, then fail.
+ */
+void Compiler::verCheckNestingLevel(EHNodeDsc* root)
+{
+ EHNodeDsc* ehnNode = root;
+
+#define exchange(a, b) \
+ { \
+ temp = a; \
+ a = b; \
+ b = temp; \
+ }
+
+ for (unsigned XTnum = 0; XTnum < compHndBBtabCount; XTnum++)
+ {
+ EHNodeDsc *p1, *p2, *p3, *temp, *search;
+
+ p1 = ehnNode++;
+ p2 = ehnNode++;
+
+ // we are relying on the fact that ehn nodes are allocated sequentially.
+ noway_assert(p1->ehnHandlerNode == p2);
+ noway_assert(p2->ehnTryNode == p1);
+
+ // arrange p1 and p2 in sequential order
+ if (p1->ehnStartOffset == p2->ehnStartOffset)
+ {
+ BADCODE("shared exception handler");
+ }
+
+ if (p1->ehnStartOffset > p2->ehnStartOffset)
+ exchange(p1, p2);
+
+ temp = p1->ehnNext;
+ unsigned numSiblings = 0;
+
+ search = p2;
+ if (search->ehnEquivalent)
+ {
+ search = search->ehnEquivalent;
+ }
+
+ do
+ {
+ if (temp == search)
+ {
+ numSiblings++;
+ break;
+ }
+ if (temp)
+ {
+ temp = temp->ehnNext;
+ }
+ } while (temp);
+
+ CORINFO_EH_CLAUSE clause;
+ info.compCompHnd->getEHinfo(info.compMethodHnd, XTnum, &clause);
+
+ if (clause.Flags & CORINFO_EH_CLAUSE_FILTER)
+ {
+ p3 = ehnNode++;
+
+ noway_assert(p3->ehnTryNode == p1 || p3->ehnTryNode == p2);
+ noway_assert(p1->ehnFilterNode == p3 || p2->ehnFilterNode == p3);
+
+ if (p3->ehnStartOffset < p1->ehnStartOffset)
+ {
+ temp = p3;
+ search = p1;
+ }
+ else if (p3->ehnStartOffset < p2->ehnStartOffset)
+ {
+ temp = p1;
+ search = p3;
+ }
+ else
+ {
+ temp = p2;
+ search = p3;
+ }
+ if (search->ehnEquivalent)
+ {
+ search = search->ehnEquivalent;
+ }
+ do
+ {
+ if (temp == search)
+ {
+ numSiblings++;
+ break;
+ }
+ temp = temp->ehnNext;
+ } while (temp);
+ }
+ else
+ {
+ numSiblings++;
+ }
+
+ if (numSiblings != 2)
+ {
+ BADCODE("Outer block does not contain all code in inner handler");
+ }
+ }
+}
diff --git a/src/jit/jiteh.h b/src/jit/jiteh.h
new file mode 100644
index 0000000000..573116282c
--- /dev/null
+++ b/src/jit/jiteh.h
@@ -0,0 +1,180 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX Exception Handling XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+/*****************************************************************************/
+#ifndef _EH_H_
+#define _EH_H_
+
+struct BasicBlock;
+class Compiler;
+
+/*****************************************************************************/
+
+// The following holds the table of exception handlers.
+
+enum EHHandlerType
+{
+ EH_HANDLER_CATCH = 0x1, // Don't use zero (to aid debugging uninitialized memory)
+ EH_HANDLER_FILTER,
+ EH_HANDLER_FAULT,
+ EH_HANDLER_FINALLY
+};
+
+// ToCORINFO_EH_CLAUSE_FLAGS: Convert an internal EHHandlerType to a CORINFO_EH_CLAUSE_FLAGS value
+// to pass back to the VM.
+inline CORINFO_EH_CLAUSE_FLAGS ToCORINFO_EH_CLAUSE_FLAGS(EHHandlerType type)
+{
+ switch (type)
+ {
+ case EH_HANDLER_CATCH:
+ return CORINFO_EH_CLAUSE_NONE;
+ case EH_HANDLER_FILTER:
+ return CORINFO_EH_CLAUSE_FILTER;
+ case EH_HANDLER_FAULT:
+ return CORINFO_EH_CLAUSE_FAULT;
+ case EH_HANDLER_FINALLY:
+ return CORINFO_EH_CLAUSE_FINALLY;
+ default:
+ unreached();
+ }
+}
+
+// ToEHHandlerType: Convert a CORINFO_EH_CLAUSE_FLAGS value obtained from the VM in the EH clause structure
+// to the internal EHHandlerType type.
+inline EHHandlerType ToEHHandlerType(CORINFO_EH_CLAUSE_FLAGS flags)
+{
+ if (flags & CORINFO_EH_CLAUSE_FAULT)
+ {
+ return EH_HANDLER_FAULT;
+ }
+ else if (flags & CORINFO_EH_CLAUSE_FINALLY)
+ {
+ return EH_HANDLER_FINALLY;
+ }
+ else if (flags & CORINFO_EH_CLAUSE_FILTER)
+ {
+ return EH_HANDLER_FILTER;
+ }
+ else
+ {
+ // If it's none of the others, assume it is a try/catch.
+ /* XXX Fri 11/7/2008
+ * The VM (and apparently VC) stick in extra bits in the flags field. We ignore any flags
+ * we don't know about.
+ */
+ return EH_HANDLER_CATCH;
+ }
+}
+
+struct EHblkDsc
+{
+ BasicBlock* ebdTryBeg; // First block of the try
+ BasicBlock* ebdTryLast; // Last block of the try
+ BasicBlock* ebdHndBeg; // First block of the handler
+ BasicBlock* ebdHndLast; // Last block of the handler
+ union {
+ BasicBlock* ebdFilter; // First block of filter, if HasFilter()
+ unsigned ebdTyp; // Exception type (a class token), otherwise
+ };
+
+ EHHandlerType ebdHandlerType;
+
+#if !FEATURE_EH_FUNCLETS
+ // How nested is the try/handler within other *handlers* - 0 for outermost clauses, 1 for nesting with a handler,
+ // etc.
+ unsigned short ebdHandlerNestingLevel;
+#endif // !FEATURE_EH_FUNCLETS
+
+ static const unsigned short NO_ENCLOSING_INDEX = USHRT_MAX;
+
+ // The index of the enclosing outer try region, NO_ENCLOSING_INDEX if none.
+ // Be careful of 'mutually protect' catch and filter clauses (multiple
+ // handlers with the same try region): the try regions 'nest' so we set
+ // ebdEnclosingTryIndex, but the inner catch is *NOT* nested within the outer catch!
+ // That is, if the "inner catch" throws an exception, it won't be caught by
+ // the "outer catch" for mutually protect handlers.
+ unsigned short ebdEnclosingTryIndex;
+
+ // The index of the enclosing outer handler region, NO_ENCLOSING_INDEX if none.
+ unsigned short ebdEnclosingHndIndex;
+
+#if FEATURE_EH_FUNCLETS
+
+ // After funclets are created, this is the index of corresponding FuncInfoDsc
+ // Special case for Filter/Filter-handler:
+ // Like the IL the filter funclet immediately preceeds the filter-handler funclet.
+ // So this index points to the filter-handler funclet. If you want the filter
+ // funclet index, just subtract 1.
+ unsigned short ebdFuncIndex;
+
+#endif // FEATURE_EH_FUNCLETS
+
+ IL_OFFSET ebdTryBegOffset; // IL offsets of EH try/end regions as they are imported
+ IL_OFFSET ebdTryEndOffset;
+ IL_OFFSET ebdFilterBegOffset; // only set if HasFilter()
+ IL_OFFSET ebdHndBegOffset;
+ IL_OFFSET ebdHndEndOffset;
+
+ // Returns the last block of the filter. Assumes the EH clause is a try/filter/filter-handler type.
+ BasicBlock* BBFilterLast();
+
+ bool HasCatchHandler();
+ bool HasFilter();
+ bool HasFinallyHandler();
+ bool HasFaultHandler();
+ bool HasFinallyOrFaultHandler();
+
+ // Returns the block to which control will flow if an (otherwise-uncaught) exception is raised
+ // in the try. This is normally "ebdHndBeg", unless the try region has a filter, in which case that is returned.
+ // (This is, in some sense, the "true handler," at least in the sense of control flow. Note
+ // that we model the transition from a filter to its handler as normal, non-exceptional control flow.)
+ BasicBlock* ExFlowBlock();
+
+ bool InTryRegionILRange(BasicBlock* pBlk);
+ bool InFilterRegionILRange(BasicBlock* pBlk);
+ bool InHndRegionILRange(BasicBlock* pBlk);
+
+ bool InTryRegionBBRange(BasicBlock* pBlk);
+ bool InFilterRegionBBRange(BasicBlock* pBlk);
+ bool InHndRegionBBRange(BasicBlock* pBlk);
+
+ IL_OFFSET ebdTryBegOffs();
+ IL_OFFSET ebdTryEndOffs();
+ IL_OFFSET ebdFilterBegOffs();
+ IL_OFFSET ebdFilterEndOffs();
+ IL_OFFSET ebdHndBegOffs();
+ IL_OFFSET ebdHndEndOffs();
+
+ static bool ebdIsSameILTry(EHblkDsc* h1, EHblkDsc* h2); // Same 'try' region? Compare IL range.
+
+ // Return the region index of the most nested EH region that encloses this region, or NO_ENCLOSING_INDEX
+ // if this region is directly in the main function body. Set '*inTryRegion' to 'true' if this region is
+ // most nested within a 'try' region, or 'false' if this region is most nested within a handler. (Note
+ // that filters cannot contain nested EH regions.)
+ unsigned ebdGetEnclosingRegionIndex(bool* inTryRegion);
+
+ static bool ebdIsSameTry(EHblkDsc* h1, EHblkDsc* h2); // Same 'try' region? Compare begin/last blocks.
+ bool ebdIsSameTry(Compiler* comp, unsigned t2);
+ bool ebdIsSameTry(BasicBlock* ebdTryBeg, BasicBlock* ebdTryLast);
+
+#ifdef DEBUG
+ void DispEntry(unsigned num); // Display this table entry
+#endif // DEBUG
+
+private:
+ static bool InBBRange(BasicBlock* pBlk, BasicBlock* pStart, BasicBlock* pEnd);
+};
+
+/*****************************************************************************/
+#endif // _EH_H_
+/*****************************************************************************/
diff --git a/src/jit/jitgcinfo.h b/src/jit/jitgcinfo.h
new file mode 100644
index 0000000000..b93ac3376c
--- /dev/null
+++ b/src/jit/jitgcinfo.h
@@ -0,0 +1,452 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+// Garbage-collector information
+// Keeps track of which variables hold pointers.
+// Generates the GC-tables
+
+#ifndef _JITGCINFO_H_
+#define _JITGCINFO_H_
+
+#include "gcinfotypes.h"
+
+#ifndef JIT32_GCENCODER
+#include "gcinfoencoder.h"
+#endif
+
+/*****************************************************************************/
+
+#ifndef JIT32_GCENCODER
+// Shash typedefs
+struct RegSlotIdKey
+{
+ unsigned short m_regNum;
+ unsigned short m_flags;
+
+ RegSlotIdKey()
+ {
+ }
+
+ RegSlotIdKey(unsigned short regNum, unsigned short flags) : m_regNum(regNum), m_flags(flags)
+ {
+ }
+
+ static unsigned GetHashCode(RegSlotIdKey rsk)
+ {
+ return (rsk.m_flags << (8 * sizeof(unsigned short))) + rsk.m_regNum;
+ }
+
+ static bool Equals(RegSlotIdKey rsk1, RegSlotIdKey rsk2)
+ {
+ return rsk1.m_regNum == rsk2.m_regNum && rsk1.m_flags == rsk2.m_flags;
+ }
+};
+
+struct StackSlotIdKey
+{
+ int m_offset;
+ bool m_fpRel;
+ unsigned short m_flags;
+
+ StackSlotIdKey()
+ {
+ }
+
+ StackSlotIdKey(int offset, bool fpRel, unsigned short flags) : m_offset(offset), m_fpRel(fpRel), m_flags(flags)
+ {
+ }
+
+ static unsigned GetHashCode(StackSlotIdKey ssk)
+ {
+ return (ssk.m_flags << (8 * sizeof(unsigned short))) ^ (unsigned)ssk.m_offset ^ (ssk.m_fpRel ? 0x1000000 : 0);
+ }
+
+ static bool Equals(StackSlotIdKey ssk1, StackSlotIdKey ssk2)
+ {
+ return ssk1.m_offset == ssk2.m_offset && ssk1.m_fpRel == ssk2.m_fpRel && ssk1.m_flags == ssk2.m_flags;
+ }
+};
+
+typedef SimplerHashTable<RegSlotIdKey, RegSlotIdKey, GcSlotId, JitSimplerHashBehavior> RegSlotMap;
+typedef SimplerHashTable<StackSlotIdKey, StackSlotIdKey, GcSlotId, JitSimplerHashBehavior> StackSlotMap;
+#endif
+
+typedef SimplerHashTable<GenTreePtr, PtrKeyFuncs<GenTree>, VARSET_TP*, JitSimplerHashBehavior> NodeToVarsetPtrMap;
+
+class GCInfo
+{
+ friend class CodeGen;
+
+private:
+ Compiler* compiler;
+ RegSet* regSet;
+
+public:
+ GCInfo(Compiler* theCompiler);
+
+ void gcResetForBB();
+
+ void gcMarkRegSetGCref(regMaskTP regMask DEBUGARG(bool forceOutput = false));
+ void gcMarkRegSetByref(regMaskTP regMask DEBUGARG(bool forceOutput = false));
+ void gcMarkRegSetNpt(regMaskTP regMask DEBUGARG(bool forceOutput = false));
+ void gcMarkRegPtrVal(regNumber reg, var_types type);
+ void gcMarkRegPtrVal(GenTreePtr tree);
+
+#ifdef DEBUG
+ void gcDspGCrefSetChanges(regMaskTP gcRegGCrefSetNew DEBUGARG(bool forceOutput = false));
+ void gcDspByrefSetChanges(regMaskTP gcRegByrefSetNew DEBUGARG(bool forceOutput = false));
+#endif // DEBUG
+
+ /*****************************************************************************/
+
+ //-------------------------------------------------------------------------
+ //
+ // The following keeps track of which registers currently hold pointer
+ // values.
+ //
+
+ regMaskTP gcRegGCrefSetCur; // current regs holding GCrefs
+ regMaskTP gcRegByrefSetCur; // current regs holding Byrefs
+
+ VARSET_TP gcTrkStkPtrLcls; // set of tracked stack ptr lcls (GCref and Byref) - no args
+ VARSET_TP gcVarPtrSetCur; // currently live part of "gcTrkStkPtrLcls"
+
+ //-------------------------------------------------------------------------
+ //
+ // The following keeps track of the lifetimes of non-register variables that
+ // hold pointers.
+ //
+
+ struct varPtrDsc
+ {
+ varPtrDsc* vpdNext;
+
+ unsigned vpdVarNum; // which variable is this about?
+
+ unsigned vpdBegOfs; // the offset where life starts
+ unsigned vpdEndOfs; // the offset where life starts
+ };
+
+ varPtrDsc* gcVarPtrList;
+ varPtrDsc* gcVarPtrLast;
+
+ void gcVarPtrSetInit();
+
+ /*****************************************************************************/
+
+ // 'pointer value' register tracking and argument pushes/pops tracking.
+
+ enum rpdArgType_t
+ {
+ rpdARG_POP,
+ rpdARG_PUSH,
+ rpdARG_KILL
+ };
+
+ struct regPtrDsc
+ {
+ regPtrDsc* rpdNext; // next entry in the list
+ unsigned rpdOffs; // the offset of the instruction
+
+ union // 2-16 byte union (depending on architecture)
+ {
+ struct // 2-16 byte structure (depending on architecture)
+ {
+ regMaskSmall rpdAdd; // regptr bitset being added
+ regMaskSmall rpdDel; // regptr bitset being removed
+ } rpdCompiler;
+
+ unsigned short rpdPtrArg; // arg offset or popped arg count
+ };
+
+#ifndef JIT32_GCENCODER
+ unsigned char rpdCallInstrSize; // Length of the call instruction.
+#endif
+
+ unsigned short rpdArg : 1; // is this an argument descriptor?
+ unsigned short rpdArgType : 2; // is this an argument push,pop, or kill?
+ rpdArgType_t rpdArgTypeGet()
+ {
+ return (rpdArgType_t)rpdArgType;
+ }
+
+ unsigned short rpdGCtype : 2; // is this a pointer, after all?
+ GCtype rpdGCtypeGet()
+ {
+ return (GCtype)rpdGCtype;
+ }
+
+ unsigned short rpdIsThis : 1; // is it the 'this' pointer
+ unsigned short rpdCall : 1; // is this a true call site?
+ unsigned short : 1; // Padding bit, so next two start on a byte boundary
+ unsigned short rpdCallGCrefRegs : CNT_CALLEE_SAVED; // Callee-saved registers containing GC pointers.
+ unsigned short rpdCallByrefRegs : CNT_CALLEE_SAVED; // Callee-saved registers containing byrefs.
+
+#ifndef JIT32_GCENCODER
+ bool rpdIsCallInstr()
+ {
+ return rpdCall && rpdCallInstrSize != 0;
+ }
+#endif
+ };
+
+ regPtrDsc* gcRegPtrList;
+ regPtrDsc* gcRegPtrLast;
+ unsigned gcPtrArgCnt;
+
+#ifndef JIT32_GCENCODER
+ enum MakeRegPtrMode
+ {
+ MAKE_REG_PTR_MODE_ASSIGN_SLOTS,
+ MAKE_REG_PTR_MODE_DO_WORK
+ };
+
+ // This method has two modes. In the "assign slots" mode, it figures out what stack locations are
+ // used to contain GC references, and whether those locations contain byrefs or pinning references,
+ // building up mappings from tuples of <offset X byref/pinning> to the corresponding slot id.
+ // In the "do work" mode, we use these slot ids to actually declare live ranges to the encoder.
+ void gcMakeVarPtrTable(GcInfoEncoder* gcInfoEncoder, MakeRegPtrMode mode);
+
+ // This method expands the tracked stack variables lifetimes so that any lifetimes within filters
+ // are reported as pinned.
+ void gcMarkFilterVarsPinned();
+
+ // At instruction offset "instrOffset," the set of registers indicated by "regMask" is becoming live or dead,
+ // depending on whether "newState" is "GC_SLOT_DEAD" or "GC_SLOT_LIVE". The subset of registers whose corresponding
+ // bits are set in "byRefMask" contain by-refs rather than regular GC pointers. "*pPtrRegs" is the set of
+ // registers currently known to contain pointers. If "mode" is "ASSIGN_SLOTS", computes and records slot
+ // ids for the registers. If "mode" is "DO_WORK", informs "gcInfoEncoder" about the state transition,
+ // using the previously assigned slot ids, and updates "*pPtrRegs" appropriately.
+ void gcInfoRecordGCRegStateChange(GcInfoEncoder* gcInfoEncoder,
+ MakeRegPtrMode mode,
+ unsigned instrOffset,
+ regMaskSmall regMask,
+ GcSlotState newState,
+ regMaskSmall byRefMask,
+ regMaskSmall* pPtrRegs);
+
+ // regPtrDsc is also used to encode writes to the outgoing argument space (as if they were pushes)
+ void gcInfoRecordGCStackArgLive(GcInfoEncoder* gcInfoEncoder, MakeRegPtrMode mode, regPtrDsc* genStackPtr);
+
+ // Walk all the pushes between genStackPtrFirst (inclusive) and genStackPtrLast (exclusive)
+ // and mark them as going dead at instrOffset
+ void gcInfoRecordGCStackArgsDead(GcInfoEncoder* gcInfoEncoder,
+ unsigned instrOffset,
+ regPtrDsc* genStackPtrFirst,
+ regPtrDsc* genStackPtrLast);
+
+#endif
+
+#if MEASURE_PTRTAB_SIZE
+ static size_t s_gcRegPtrDscSize;
+ static size_t s_gcTotalPtrTabSize;
+#endif
+
+ regPtrDsc* gcRegPtrAllocDsc();
+
+ /*****************************************************************************/
+
+ //-------------------------------------------------------------------------
+ //
+ // If we're not generating fully interruptible code, we create a simple
+ // linked list of call descriptors.
+ //
+
+ struct CallDsc
+ {
+ CallDsc* cdNext;
+ void* cdBlock; // the code block of the call
+ unsigned cdOffs; // the offset of the call
+#ifndef JIT32_GCENCODER
+ unsigned short cdCallInstrSize; // the size of the call instruction.
+#endif
+
+ unsigned short cdArgCnt;
+
+ union {
+ struct // used if cdArgCnt == 0
+ {
+ unsigned cdArgMask; // ptr arg bitfield
+ unsigned cdByrefArgMask; // byref qualifier for cdArgMask
+ } u1;
+
+ unsigned* cdArgTable; // used if cdArgCnt != 0
+ };
+
+ regMaskSmall cdGCrefRegs;
+ regMaskSmall cdByrefRegs;
+ };
+
+ CallDsc* gcCallDescList;
+ CallDsc* gcCallDescLast;
+
+ //-------------------------------------------------------------------------
+
+ void gcCountForHeader(UNALIGNED unsigned int* untrackedCount, UNALIGNED unsigned int* varPtrTableSize);
+
+#ifdef JIT32_GCENCODER
+ size_t gcMakeRegPtrTable(BYTE* dest, int mask, const InfoHdr& header, unsigned codeSize, size_t* pArgTabOffset);
+#else
+ RegSlotMap* m_regSlotMap;
+ StackSlotMap* m_stackSlotMap;
+ // This method has two modes. In the "assign slots" mode, it figures out what registers and stack
+ // locations are used to contain GC references, and whether those locations contain byrefs or pinning
+ // references, building up mappings from tuples of <reg/offset X byref/pinning> to the corresponding
+ // slot id (in the two member fields declared above). In the "do work" mode, we use these slot ids to
+ // actually declare live ranges to the encoder.
+ void gcMakeRegPtrTable(GcInfoEncoder* gcInfoEncoder, unsigned codeSize, unsigned prologSize, MakeRegPtrMode mode);
+#endif
+
+#ifdef JIT32_GCENCODER
+ size_t gcPtrTableSize(const InfoHdr& header, unsigned codeSize, size_t* pArgTabOffset);
+ BYTE* gcPtrTableSave(BYTE* destPtr, const InfoHdr& header, unsigned codeSize, size_t* pArgTabOffset);
+#endif
+ void gcRegPtrSetInit();
+ /*****************************************************************************/
+
+ // This enumeration yields the result of the analysis below, whether a store
+ // requires a write barrier:
+ enum WriteBarrierForm
+ {
+ WBF_NoBarrier, // No barrier is required
+ WBF_BarrierUnknown, // A barrier is required, no information on checked/unchecked.
+ WBF_BarrierChecked, // A checked barrier is required.
+ WBF_BarrierUnchecked, // An unchecked barrier is required.
+ WBF_NoBarrier_CheckNotHeapInDebug, // We believe that no barrier is required because the
+ // target is not in the heap -- but in debug build use a
+ // barrier call that verifies this property. (Because the
+ // target not being in the heap relies on a convention that
+ // might accidentally be violated in the future.)
+ };
+
+ WriteBarrierForm gcIsWriteBarrierCandidate(GenTreePtr tgt, GenTreePtr assignVal);
+ bool gcIsWriteBarrierAsgNode(GenTreePtr op);
+
+ // Returns a WriteBarrierForm decision based on the form of "tgtAddr", which is assumed to be the
+ // argument of a GT_IND LHS.
+ WriteBarrierForm gcWriteBarrierFormFromTargetAddress(GenTreePtr tgtAddr);
+
+ //-------------------------------------------------------------------------
+ //
+ // These record the info about the procedure in the info-block
+ //
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef JIT32_GCENCODER
+private:
+ BYTE* gcEpilogTable;
+
+ unsigned gcEpilogPrevOffset;
+
+ size_t gcInfoBlockHdrSave(BYTE* dest,
+ int mask,
+ unsigned methodSize,
+ unsigned prologSize,
+ unsigned epilogSize,
+ InfoHdr* header,
+ int* s_cached);
+
+public:
+ static void gcInitEncoderLookupTable();
+
+private:
+ static size_t gcRecordEpilog(void* pCallBackData, unsigned offset);
+#else // JIT32_GCENCODER
+ void gcInfoBlockHdrSave(GcInfoEncoder* gcInfoEncoder, unsigned methodSize, unsigned prologSize);
+
+#ifdef DEBUG
+ void gcDumpVarPtrDsc(varPtrDsc* desc);
+#endif // DEBUG
+
+#endif // JIT32_GCENCODER
+
+#if DUMP_GC_TABLES
+
+ void gcFindPtrsInFrame(const void* infoBlock, const void* codeBlock, unsigned offs);
+
+#ifdef JIT32_GCENCODER
+ unsigned gcInfoBlockHdrDump(const BYTE* table,
+ InfoHdr* header, /* OUT */
+ unsigned* methodSize); /* OUT */
+
+ unsigned gcDumpPtrTable(const BYTE* table, const InfoHdr& header, unsigned methodSize);
+
+#endif // JIT32_GCENCODER
+#endif // DUMP_GC_TABLES
+
+#ifndef LEGACY_BACKEND
+ // This method updates the appropriate reg masks when a variable is moved.
+public:
+ void gcUpdateForRegVarMove(regMaskTP srcMask, regMaskTP dstMask, LclVarDsc* varDsc);
+#endif // !LEGACY_BACKEND
+};
+
+inline unsigned char encodeUnsigned(BYTE* dest, unsigned value)
+{
+ unsigned char size = 1;
+ unsigned tmp = value;
+ while (tmp > 0x7F)
+ {
+ tmp >>= 7;
+ assert(size < 6); // Invariant.
+ size++;
+ }
+ if (dest)
+ {
+ // write the bytes starting at the end of dest in LSB to MSB order
+ BYTE* p = dest + size;
+ BYTE cont = 0; // The last byte has no continuation flag
+ while (value > 0x7F)
+ {
+ *--p = cont | (value & 0x7f);
+ value >>= 7;
+ cont = 0x80; // Non last bytes have a continuation flag
+ }
+ *--p = cont | (BYTE)value; // Now write the first byte
+ assert(p == dest);
+ }
+ return size;
+}
+
+inline unsigned char encodeUDelta(BYTE* dest, unsigned value, unsigned lastValue)
+{
+ assert(value >= lastValue);
+ return encodeUnsigned(dest, value - lastValue);
+}
+
+inline unsigned char encodeSigned(BYTE* dest, int val)
+{
+ unsigned char size = 1;
+ unsigned value = val;
+ BYTE neg = 0;
+ if (val < 0)
+ {
+ value = -val;
+ neg = 0x40;
+ }
+ unsigned tmp = value;
+ while (tmp > 0x3F)
+ {
+ tmp >>= 7;
+ assert(size < 16); // Definitely sufficient for unsigned. Fits in an unsigned char, certainly.
+ size++;
+ }
+ if (dest)
+ {
+ // write the bytes starting at the end of dest in LSB to MSB order
+ BYTE* p = dest + size;
+ BYTE cont = 0; // The last byte has no continuation flag
+ while (value > 0x3F)
+ {
+ *--p = cont | (value & 0x7f);
+ value >>= 7;
+ cont = 0x80; // Non last bytes have a continuation flag
+ }
+ *--p = neg | cont | (BYTE)value; // Now write the first byte
+ assert(p == dest);
+ }
+ return size;
+}
+
+#endif // _JITGCINFO_H_
diff --git a/src/jit/jitpch.cpp b/src/jit/jitpch.cpp
new file mode 100644
index 0000000000..74300aefc3
--- /dev/null
+++ b/src/jit/jitpch.cpp
@@ -0,0 +1,6 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+/*****************************************************************************/
+
+#include "jitpch.h"
diff --git a/src/jit/jitpch.h b/src/jit/jitpch.h
new file mode 100644
index 0000000000..2e69e79208
--- /dev/null
+++ b/src/jit/jitpch.h
@@ -0,0 +1,36 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+#include <stdint.h>
+#include <windows.h>
+#include <wchar.h>
+#include <stdio.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <limits.h>
+#include <string.h>
+#include <float.h>
+#include <share.h>
+#include <cstdlib>
+#include <intrin.h>
+
+#if COR_JIT_EE_VERSION <= 460
+#include "corjithost.h"
+#include "jithost.h"
+#endif
+#include "jitconfig.h"
+#include "jit.h"
+#include "iallocator.h"
+#include "hashbv.h"
+#include "compiler.h"
+#include "dataflow.h"
+#include "block.h"
+#include "jiteh.h"
+#include "rationalize.h"
+#include "jitstd.h"
+#include "ssaconfig.h"
+#include "blockset.h"
+#include "bitvec.h"
+#include "inline.h"
+#include "objectalloc.h"
diff --git a/src/jit/jitstd.h b/src/jit/jitstd.h
new file mode 100644
index 0000000000..6b428679f0
--- /dev/null
+++ b/src/jit/jitstd.h
@@ -0,0 +1,10 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+#include "allocator.h"
+#include "type_traits.h"
+#include "pair.h"
+#include "utility.h"
+#include "unordered_map.h"
+#include "unordered_set.h"
diff --git a/src/jit/jitstd/.gitmirror b/src/jit/jitstd/.gitmirror
new file mode 100644
index 0000000000..f507630f94
--- /dev/null
+++ b/src/jit/jitstd/.gitmirror
@@ -0,0 +1 @@
+Only contents of this folder, excluding subfolders, will be mirrored by the Git-TFS Mirror. \ No newline at end of file
diff --git a/src/jit/jitstd/algorithm.h b/src/jit/jitstd/algorithm.h
new file mode 100644
index 0000000000..7b6447e1a5
--- /dev/null
+++ b/src/jit/jitstd/algorithm.h
@@ -0,0 +1,49 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+
+
+#pragma once
+
+namespace jitstd
+{
+
+template <typename InputIterator, typename CompareValue>
+InputIterator find(InputIterator first, InputIterator last,
+ const CompareValue& value)
+{
+ for (; first != last; ++first)
+ {
+ if (*first == value)
+ {
+ return first;
+ }
+ }
+ return last;
+}
+
+template <typename InputIterator, typename Pred>
+InputIterator find_if(InputIterator first, InputIterator last, const Pred& pred)
+{
+ for (; first != last; ++first)
+ {
+ if (pred(*first))
+ {
+ return first;
+ }
+ }
+ return last;
+}
+
+template<typename InputIterator, typename Function>
+Function for_each(InputIterator first, InputIterator last, Function func)
+{
+ for (; first != last; ++first)
+ {
+ func(*first);
+ }
+ return func;
+}
+
+}
diff --git a/src/jit/jitstd/allocator.h b/src/jit/jitstd/allocator.h
new file mode 100644
index 0000000000..2bd33daa98
--- /dev/null
+++ b/src/jit/jitstd/allocator.h
@@ -0,0 +1,211 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+// ==++==
+//
+
+//
+
+//
+// ==--==
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX allocator<T> XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#pragma once
+
+#include "iallocator.h"
+#include "new.h"
+
+namespace jitstd
+{
+
+template <typename T>
+class allocator;
+
+template <>
+class allocator<void>
+{
+public:
+ typedef size_t size_type;
+ typedef ptrdiff_t difference_type;
+ typedef void* pointer;
+ typedef const void* const_pointer;
+ typedef void value_type;
+
+ template <typename U>
+ struct rebind
+ {
+ typedef allocator<U> allocator;
+ };
+
+private:
+ allocator();
+
+public:
+ inline allocator(IAllocator* pAlloc);
+
+ template <typename U>
+ inline allocator(const allocator<U>& alloc);
+
+ inline allocator(const allocator& alloc);
+
+ template <typename U>
+ inline allocator& operator=(const allocator<U>& alloc);
+
+private:
+ IAllocator* m_pAlloc;
+ template <typename U>
+ friend class allocator;
+};
+
+allocator<void>::allocator(IAllocator* pAlloc)
+ : m_pAlloc(pAlloc)
+{
+}
+
+allocator<void>::allocator(const allocator& alloc)
+ : m_pAlloc(alloc.m_pAlloc)
+{
+}
+
+template <typename U>
+allocator<void>::allocator(const allocator<U>& alloc)
+ : m_pAlloc(alloc.m_pAlloc)
+{
+}
+
+template <typename U>
+allocator<void>& allocator<void>::operator=(const allocator<U>& alloc)
+{
+ m_pAlloc = alloc.m_pAlloc;
+ return *this;
+}
+
+template <typename T>
+class allocator
+{
+public:
+ typedef size_t size_type;
+ typedef ptrdiff_t difference_type;
+ typedef T* pointer;
+ typedef T& reference;
+ typedef const T* const_pointer;
+ typedef const T& const_reference;
+ typedef T value_type;
+
+private:
+ allocator();
+public:
+ allocator(IAllocator* pAlloc);
+
+ template <typename U>
+ allocator(const allocator<U>& alloc);
+
+ allocator(const allocator& alloc);
+
+ template <typename U>
+ allocator& operator=(const allocator<U>& alloc);
+
+ pointer address(reference val);
+ const_pointer address(const_reference val) const;
+ pointer allocate(size_type count, allocator<void>::const_pointer hint = nullptr);
+ void construct(pointer ptr, const_reference val);
+ void deallocate(pointer ptr, size_type size);
+ void destroy(pointer ptr);
+ size_type max_size() const;
+ template <typename U>
+ struct rebind
+ {
+ typedef allocator<U> allocator;
+ };
+
+private:
+ IAllocator* m_pAlloc;
+ template <typename U>
+ friend class allocator;
+};
+
+} // end of namespace jitstd
+
+
+namespace jitstd
+{
+
+template <typename T>
+allocator<T>::allocator(IAllocator* pAlloc)
+ : m_pAlloc(pAlloc)
+{
+}
+
+template <typename T>
+template <typename U>
+allocator<T>::allocator(const allocator<U>& alloc)
+ : m_pAlloc(alloc.m_pAlloc)
+{
+}
+
+template <typename T>
+allocator<T>::allocator(const allocator<T>& alloc)
+ : m_pAlloc(alloc.m_pAlloc)
+{
+}
+
+template <typename T>
+template <typename U>
+allocator<T>& allocator<T>::operator=(const allocator<U>& alloc)
+{
+ m_pAlloc = alloc.m_pAlloc;
+ return *this;
+}
+
+template <typename T>
+typename allocator<T>::pointer allocator<T>::address(reference val)
+{
+ return &val;
+}
+
+template <typename T>
+typename allocator<T>::const_pointer allocator<T>::address(const_reference val) const
+{
+ return &val;
+}
+
+template <typename T>
+T* allocator<T>::allocate(size_type count, allocator<void>::const_pointer hint)
+{
+ return (pointer) m_pAlloc->Alloc(sizeof(value_type) * count);
+}
+
+template <typename T>
+void allocator<T>::construct(pointer ptr, const_reference val)
+{
+ new (ptr, placement_t()) value_type(val);
+}
+
+template <typename T>
+void allocator<T>::deallocate(pointer ptr, size_type size)
+{
+ // m_pAlloc->Free(ptr);
+}
+
+template <typename T>
+void allocator<T>::destroy(pointer ptr)
+{
+ ptr->~T();
+}
+
+template <typename T>
+typename allocator<T>::size_type allocator<T>::max_size() const
+{
+ return (size_type) -1;
+}
+
+} // end of namespace jitstd
diff --git a/src/jit/jitstd/functional.h b/src/jit/jitstd/functional.h
new file mode 100644
index 0000000000..31456a870e
--- /dev/null
+++ b/src/jit/jitstd/functional.h
@@ -0,0 +1,62 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+
+
+#pragma once
+
+namespace jitstd
+{
+
+template <typename T>
+void swap(T& a, T& b)
+{
+ T t(a);
+ a = b;
+ b = t;
+}
+
+template <typename Arg, typename Result>
+struct unary_function
+{
+ typedef Arg argument_type;
+ typedef Result result_type;
+};
+
+template <typename Arg1, typename Arg2, typename Result>
+struct binary_function
+{
+ typedef Arg1 first_argument_type;
+ typedef Arg2 second_argument_type;
+ typedef Result result_type;
+};
+
+template <typename T>
+struct greater : binary_function<T, T, bool>
+{
+ bool operator()(const T& lhs, const T& rhs) const
+ {
+ return lhs > rhs;
+ }
+};
+
+template <typename T>
+struct equal_to : binary_function<T, T, bool>
+{
+ bool operator()(const T& lhs, const T& rhs) const
+ {
+ return lhs == rhs;
+ }
+};
+
+template <typename T>
+struct identity : unary_function<T, T>
+{
+ const T& operator()(const T& op) const
+ {
+ return op;
+ }
+};
+
+} // end of namespace jitstd.
diff --git a/src/jit/jitstd/hash.h b/src/jit/jitstd/hash.h
new file mode 100644
index 0000000000..18db74fc9f
--- /dev/null
+++ b/src/jit/jitstd/hash.h
@@ -0,0 +1,103 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+
+
+#pragma once
+
+#include "type_traits.h"
+#include <stdio.h>
+
+namespace jitstd
+{
+template<typename Type>
+class hash
+{
+public:
+ size_t operator()(const Type& val) const
+ {
+ div_t qrem = ::div((int)(size_t) val, 127773);
+ qrem.rem = 16807 * qrem.rem - 2836 * qrem.quot;
+ if (qrem.rem < 0)
+ {
+ qrem.rem += 2147483647;
+ }
+ return ((size_t) qrem.rem);
+ }
+};
+
+template<>
+class hash<int>
+{
+public:
+ size_t operator()(const int& val) const
+ {
+ return val;
+ }
+};
+
+template<>
+class hash<unsigned __int64>
+{
+private:
+ typedef unsigned __int64 Type;
+
+public:
+ size_t operator()(const Type& val) const
+ {
+ return (hash<int>()((int)(val & 0xffffffffUL)) ^ hash<int>()((int)(val >> 32)));
+ }
+};
+
+template<>
+class hash<__int64>
+{
+private:
+ typedef __int64 Type;
+
+public:
+ size_t operator()(const Type& val) const
+ {
+ return (hash<unsigned __int64>()((unsigned __int64) val));
+ }
+};
+
+template<typename Type>
+class hash<Type*>
+{
+private:
+ typedef typename conditional<sizeof (Type*) <= sizeof (int), int, __int64>::type TInteger;
+public:
+ size_t operator()(const Type* val) const
+ {
+ return (hash<TInteger>()((TInteger) val));
+ }
+};
+
+template<>
+class hash<float>
+{
+private:
+ typedef float Type;
+public:
+ size_t operator()(const Type& val) const
+ {
+ unsigned long bits = *(unsigned long*) &val;
+ return (hash<unsigned long>()(bits == 0x80000000 ? 0 : bits));
+ }
+};
+
+template<>
+class hash<double>
+{
+public:
+ typedef double Type;
+ size_t operator()(const Type& val) const
+ {
+ unsigned __int64 bits = *(unsigned __int64*)&val;
+ return (hash<unsigned __int64>()((bits & (((unsigned __int64) -1) >> 1)) == 0 ? 0 : bits));
+ }
+};
+
+}
diff --git a/src/jit/jitstd/hashtable.h b/src/jit/jitstd/hashtable.h
new file mode 100644
index 0000000000..05b033a746
--- /dev/null
+++ b/src/jit/jitstd/hashtable.h
@@ -0,0 +1,822 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+// ==++==
+//
+
+//
+
+//
+// ==--==
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX hashtable<K,V,H,P,A,KO> XX
+XX XX
+XX Implemented using a vector of list iterators begin and end whose range XX
+XX is a single bucket. A chain of buckets is maintained in a linked list XX
+XX (doubly) for holding the key-value pairs. XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#pragma once
+
+#include "hash.h"
+#include "functional.h"
+#include "allocator.h"
+#include "vector.h"
+#include "list.h"
+#include "pair.h"
+
+namespace jitstd
+{
+
+static const float kflDefaultLoadFactor = 3.0f;
+
+template <typename Key,
+ typename Value = Key,
+ typename Hash = jitstd::hash<Key>,
+ typename Pred = jitstd::equal_to<Key>,
+ typename Alloc = jitstd::allocator<Value>,
+ typename KeyOf = jitstd::identity<Value>>
+class hashtable
+{
+public:
+ typedef Key key_type;
+ typedef Value value_type;
+ typedef Hash hasher;
+ typedef Pred key_equal;
+ typedef Alloc allocator_type;
+ typedef typename allocator_type::pointer pointer;
+ typedef typename allocator_type::const_pointer const_pointer;
+ typedef typename allocator_type::reference reference;
+ typedef typename allocator_type::const_reference const_reference;
+ typedef size_t size_type;
+ typedef ptrdiff_t difference_type;
+ typedef typename list<Value, Alloc>::iterator iterator;
+ typedef typename list<Value, Alloc>::reverse_iterator reverse_iterator;
+ typedef typename list<Value, Alloc>::const_iterator const_iterator;
+ typedef typename list<Value, Alloc>::iterator local_iterator;
+
+protected:
+ hashtable();
+
+ typedef pair<iterator, iterator> BucketEntry;
+ typedef vector<BucketEntry, typename Alloc::template rebind<BucketEntry>::allocator> Buckets;
+ typedef list<Value, typename Alloc::template rebind<Value>::allocator> Elements;
+
+protected:
+ explicit hashtable(size_type,
+ const allocator_type& a,
+ const KeyOf& keyOf = KeyOf());
+
+ hashtable(size_type n,
+ const hasher& hf,
+ const key_equal& eq,
+ const allocator_type& a,
+ const KeyOf& keyOf = KeyOf());
+
+ template<typename InputIterator>
+ hashtable(
+ InputIterator f, InputIterator l,
+ size_type n,
+ const hasher& hf,
+ const key_equal& eq,
+ const allocator_type& a,
+ const KeyOf& keyOf = KeyOf());
+
+ explicit hashtable(const allocator_type& a, const KeyOf& keyOf = KeyOf());
+
+ hashtable(const hashtable& other);
+
+ ~hashtable();
+
+public:
+ hashtable& operator=(const hashtable& other);
+
+ allocator_type get_allocator() const;
+
+ bool empty() const;
+
+ size_type size() const;
+ size_type max_size() const;
+
+ iterator begin();
+ iterator end();
+
+ // Even though we have an unordered set and there is no concept of forward and
+ // reverse, rbegin will just return the first element inserted. This is not in STL.
+ reverse_iterator rbegin();
+ reverse_iterator rend();
+
+ const_iterator begin() const;
+ const_iterator end() const;
+ const_iterator cbegin() const;
+ const_iterator cend() const;
+ local_iterator begin(size_type size);
+ local_iterator end(size_type size);
+
+ pair<iterator, bool> insert(const value_type& value);
+ iterator insert(const_iterator, const value_type& value);
+ template<typename InputIterator>
+ void insert(InputIterator first, InputIterator last);
+
+ iterator erase(iterator position);
+ size_type erase(const key_type& key);
+ iterator erase(iterator first, iterator last);
+
+ void clear();
+ void swap(hashtable& table);
+
+ hasher hash_function() const;
+ key_equal key_eq() const;
+
+ const_iterator find(const key_type& key) const;
+ iterator find(const key_type& key);
+
+ size_type count(const key_type& key) const;
+
+ size_type bucket_count() const;
+ size_type max_bucket_count() const;
+
+ size_type bucket_size(size_type size) const;
+ size_type bucket(const key_type& key) const;
+
+ float load_factor() const;
+ float max_load_factor() const;
+ void max_load_factor(float);
+
+ void rehash(size_type);
+
+protected:
+ template <typename Compare>
+ iterator find(const key_type&, const Compare& comp);
+
+ // helpers
+ bool check_load();
+ void copy_helper(const hashtable& other);
+ size_type hash_helper(const key_type& value, size_type buckets) const;
+ pair<iterator, bool> insert_helper(const value_type& value, Buckets& buckets, Elements& elements, bool fRehashing);
+ iterator erase_helper(const_iterator position);
+ void dump_helper();
+ void debug_check();
+
+private:
+
+ // member objects
+ Hash m_hasher;
+ Alloc m_allocator;
+ Pred m_pred;
+
+ Buckets m_buckets;
+ Elements m_elements;
+ size_type m_nSize;
+ KeyOf m_keyOf;
+
+ // metadata
+ float m_flMaxLoadFactor;
+};
+
+} // end of namespace jitstd
+
+
+namespace jitstd
+{
+
+template <typename Key, typename Value, typename Hash, typename Pred, typename Alloc, typename KeyOf>
+void hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::dump_helper()
+{
+ for (size_type i = 0; i < m_buckets.size(); ++i)
+ {
+ printf("\n");
+ printf("--------------=BEGIN=--------------\n");
+ printf("Load factor = %f\n", load_factor());
+ printf("-----------------------------------\n");
+ printf("Bucket number = %d %p %p\n", i, *((ptrdiff_t*)&(m_buckets[i].first)), *((ptrdiff_t*)&(m_buckets[i].second)));
+ printf("-----------------------------------\n");
+ for (typename Elements::iterator value = (m_buckets[i]).first; value != (m_buckets[i]).second; ++value)
+ {
+ printf("%d, ", *((ptrdiff_t*)&value), *value);
+ }
+ printf("-----------------------------------\n");
+ }
+}
+
+// We can't leave this permanently enabled -- it makes algorithms cubic, and causes tests to time out.
+// Enable when/if you have reason to believe there's a problem in hashtable.
+#define JITSTD_DO_HASHTABLE_DEBUGCHECK 0
+
+template <typename Key, typename Value, typename Hash, typename Pred, typename Alloc, typename KeyOf>
+void hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::debug_check()
+{
+#if JITSTD_DO_HASHTABLE_DEBUGCHECK
+ for (iterator iter = m_elements.begin(); iter != m_elements.end(); ++iter)
+ {
+ size_type nHash = hash_helper(m_keyOf(*iter), m_buckets.size());
+ BucketEntry& entry = m_buckets[nHash];
+ iterator iter2 = entry.first;
+ bool present = false;
+ while (iter2 != entry.second)
+ {
+ if (iter2 == iter)
+ {
+ present = true;
+ }
+ iter2++;
+ }
+ if (!present)
+ {
+ present = false;
+ }
+ assert(present);
+ }
+#endif
+}
+
+template <typename Key, typename Value, typename Hash, typename Pred, typename Alloc, typename KeyOf>
+template <typename Compare>
+typename hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::iterator
+hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::find(const key_type& key, const Compare& comp)
+{
+ if (empty())
+ {
+ return end();
+ }
+ size_type nHash = hash_helper(key, m_buckets.size());
+ BucketEntry& entry = m_buckets[nHash];
+ for (iterator i = entry.first; i != entry.second; ++i)
+ {
+ if (comp(m_keyOf(*i), key))
+ {
+ return i;
+ }
+ }
+ return end();
+}
+
+template <typename Key, typename Value, typename Hash, typename Pred, typename Alloc, typename KeyOf>
+bool hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::check_load()
+{
+ float flLoadFactor = load_factor();
+ if (flLoadFactor > m_flMaxLoadFactor)
+ {
+ rehash(m_buckets.size());
+ return true;
+ }
+ return false;
+}
+
+template <typename Key, typename Value, typename Hash, typename Pred, typename Alloc, typename KeyOf>
+typename hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::iterator
+hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::erase_helper(const_iterator position)
+{
+ const Key& key = m_keyOf(*position);
+ size_type nHash = hash_helper(key, m_buckets.size());
+ BucketEntry& entry = m_buckets[nHash];
+ iterator eraseNext = end();
+ for (iterator first = entry.first; first != entry.second; ++first)
+ {
+ if (m_pred(m_keyOf(*first), key))
+ {
+ if (first == entry.first)
+ {
+ if (first != m_elements.begin())
+ {
+ iterator update = first;
+ update--;
+ size_type nUpdateHash = hash_helper(m_keyOf(*update), m_buckets.size());
+ if (nUpdateHash != nHash)
+ {
+ BucketEntry& updateEntry = m_buckets[nUpdateHash];
+ if (updateEntry.second == first)
+ {
+ updateEntry.second = first;
+ updateEntry.second++;
+ }
+ if (updateEntry.first == first)
+ {
+ updateEntry.first = first;
+ updateEntry.first++;
+ }
+ }
+ }
+ entry.first = m_elements.erase(first);
+ eraseNext = entry.first;
+ }
+ else
+ {
+ eraseNext = m_elements.erase(first);
+ }
+
+ --m_nSize;
+#ifdef DEBUG
+ debug_check();
+#endif
+ return eraseNext;
+ }
+ }
+ return end();
+}
+
+template <typename Key, typename Value, typename Hash, typename Pred, typename Alloc, typename KeyOf>
+pair<typename hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::iterator, bool>
+hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::insert_helper(
+ const Value& value, Buckets& buckets, Elements& elements, bool fRehashing)
+{
+ const Key& key = m_keyOf(value);
+ size_t nHash = hash_helper(key, buckets.size());
+ BucketEntry& entry = buckets[nHash];
+
+ iterator ret;
+ if (entry.first == entry.second)
+ {
+ entry.first = elements.insert(elements.begin(), value);
+ entry.second = entry.first;
+ entry.second++; // end iterator is one past always.
+ ret = entry.first;
+ }
+ else
+ {
+ for (iterator first = entry.first; first != entry.second; ++first)
+ {
+ if (m_pred(m_keyOf(*first), key))
+ {
+ return pair<iterator, bool>(first, false);
+ }
+ }
+ iterator firstNext = entry.first;
+ firstNext++;
+ ret = elements.insert(firstNext, value);
+ if (entry.second == entry.first)
+ {
+ entry.second = firstNext;
+ }
+ }
+ bool fRehashed = false;
+ if (!fRehashing)
+ {
+ m_nSize += 1;
+ fRehashed = check_load();
+ }
+
+#ifdef DEBUG
+ debug_check();
+#endif
+
+ return pair<iterator, bool>(fRehashed ? find(key, m_pred) : ret, true);
+}
+
+template <typename Key, typename Value, typename Hash, typename Pred, typename Alloc, typename KeyOf>
+typename hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::size_type
+ hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::hash_helper(
+ const key_type& key, size_type buckets) const
+{
+ return m_hasher(key) % buckets;
+}
+
+template <typename Key, typename Value, typename Hash, typename Pred, typename Alloc, typename KeyOf>
+void hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::rehash(size_type n)
+{
+ size_type nCurBuckets = m_buckets.size();
+ float flLoadFactor = load_factor();
+ if (nCurBuckets >= n && flLoadFactor <= m_flMaxLoadFactor)
+ {
+ return;
+ }
+
+ size_type nBuckets = max(nCurBuckets, 1);
+ if (flLoadFactor > m_flMaxLoadFactor)
+ {
+ nBuckets *= 2;
+ }
+
+ if (nBuckets < n)
+ {
+ nBuckets = n;
+ }
+
+ Buckets buckets(m_allocator);
+ Elements elements(m_allocator);
+
+ buckets.resize(nBuckets, BucketEntry(m_elements.end(), m_elements.end())); // both equal means empty.
+ for (typename Elements::iterator iter = m_elements.begin(); iter != m_elements.end(); ++iter)
+ {
+ (void) insert_helper(*iter, buckets, elements, true);
+ }
+ m_buckets.swap(buckets);
+ m_elements.swap(elements);
+}
+
+template <typename Key, typename Value, typename Hash, typename Pred, typename Alloc, typename KeyOf>
+hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::hashtable(
+ size_type n,
+ allocator_type const& allocator,
+ const KeyOf& keyOf)
+ : m_allocator(allocator)
+ , m_buckets(Alloc::template rebind<hashtable::BucketEntry>::allocator(allocator))
+ , m_elements(allocator)
+ , m_flMaxLoadFactor(kflDefaultLoadFactor)
+ , m_nSize(0)
+ , m_keyOf(keyOf)
+{
+ rehash(n);
+}
+
+template <typename Key, typename Value, typename Hash, typename Pred, typename Alloc, typename KeyOf>
+hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::hashtable(
+ size_type n,
+ hasher const& hf,
+ key_equal const& eq,
+ allocator_type const& allocator,
+ const KeyOf& keyOf)
+ : m_hasher(hf)
+ , m_pred(eq)
+ , m_allocator(allocator)
+ , m_buckets(Alloc::template rebind<BucketEntry>::allocator(allocator))
+ , m_elements(allocator)
+ , m_flMaxLoadFactor(kflDefaultLoadFactor)
+ , m_nSize(0)
+ , m_keyOf(keyOf)
+{
+ rehash(n);
+}
+
+template <typename Key, typename Value, typename Hash, typename Pred, typename Alloc, typename KeyOf>
+template<typename InputIterator>
+hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::hashtable(
+ InputIterator f, InputIterator l,
+ size_type n,
+ const hasher& hf,
+ const key_equal& eq,
+ const allocator_type& allocator,
+ const KeyOf& keyOf)
+ : m_hasher(hf)
+ , m_pred(eq)
+ , m_allocator(allocator)
+ , m_buckets(Alloc::template rebind<BucketEntry>::allocator(allocator))
+ , m_elements(allocator)
+ , m_flMaxLoadFactor(kflDefaultLoadFactor)
+ , m_nSize(0)
+ , m_keyOf(keyOf)
+{
+ rehash(n);
+ insert(this->first, this->last);
+}
+
+template <typename Key, typename Value, typename Hash, typename Pred, typename Alloc, typename KeyOf>
+hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::hashtable(const allocator_type& allocator, const KeyOf& keyOf)
+ : m_allocator(allocator)
+ , m_buckets(Alloc::template rebind<BucketEntry>::allocator(allocator))
+ , m_elements(allocator)
+ , m_flMaxLoadFactor(kflDefaultLoadFactor)
+ , m_nSize(0)
+ , m_keyOf(keyOf)
+{
+}
+
+template <typename Key, typename Value, typename Hash, typename Pred, typename Alloc, typename KeyOf>
+void hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::copy_helper(const hashtable& other)
+{
+ m_buckets.clear();
+ m_elements.clear();
+ m_nSize = 0;
+
+ rehash(other.m_buckets.size());
+ for (const_iterator i = other.m_elements.begin(); i != other.m_elements.end(); ++i)
+ {
+ insert_helper(*i, m_buckets, m_elements, false);
+ }
+ m_nSize = other.m_nSize;
+}
+
+template <typename Key, typename Value, typename Hash, typename Pred, typename Alloc, typename KeyOf>
+hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::hashtable(const hashtable& other)
+ : m_hasher(other.m_hasher)
+ , m_pred(other.m_pred)
+ , m_allocator(other.m_allocator)
+ , m_flMaxLoadFactor(other.m_flMaxLoadFactor)
+ , m_keyOf(other.m_keyOf)
+ , m_elements(other.m_allocator)
+ , m_buckets(other.m_allocator)
+{
+ copy_helper(other);
+}
+
+template <typename Key, typename Value, typename Hash, typename Pred, typename Alloc, typename KeyOf>
+hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::~hashtable()
+{
+}
+
+template <typename Key, typename Value, typename Hash, typename Pred, typename Alloc, typename KeyOf>
+hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>&
+ hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::operator=(hashtable const& other)
+{
+ m_hasher = other.m_hasher;
+ m_pred = other.m_pred;
+ m_allocator = other.m_allocator;
+ m_flMaxLoadFactor = other.m_flMaxLoadFactor;
+ m_keyOf = other.m_keyOf;
+ copy_helper(other);
+ return *this;
+}
+
+template <typename Key, typename Value, typename Hash, typename Pred, typename Alloc, typename KeyOf>
+typename hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::allocator_type
+ hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::get_allocator() const
+{
+ return m_allocator;
+}
+
+template <typename Key, typename Value, typename Hash, typename Pred, typename Alloc, typename KeyOf>
+bool hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::empty() const
+{
+ return m_nSize == 0;
+}
+
+template <typename Key, typename Value, typename Hash, typename Pred, typename Alloc, typename KeyOf>
+typename hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::size_type
+ hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::size() const
+{
+ return m_nSize;
+}
+
+template <typename Key, typename Value, typename Hash, typename Pred, typename Alloc, typename KeyOf>
+typename hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::size_type
+ hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::max_size() const
+{
+ return ((size_type)(-1)) >> 1;
+}
+
+template <typename Key, typename Value, typename Hash, typename Pred, typename Alloc, typename KeyOf>
+typename hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::iterator
+hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::begin()
+{
+ return m_elements.begin();
+}
+
+template <typename Key, typename Value, typename Hash, typename Pred, typename Alloc, typename KeyOf>
+typename hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::reverse_iterator
+hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::rbegin()
+{
+ return m_elements.rbegin();
+}
+
+template <typename Key, typename Value, typename Hash, typename Pred, typename Alloc, typename KeyOf>
+typename hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::iterator
+hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::end()
+{
+ return m_elements.end();
+}
+
+template <typename Key, typename Value, typename Hash, typename Pred, typename Alloc, typename KeyOf>
+typename hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::reverse_iterator
+hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::rend()
+{
+ return m_elements.rend();
+}
+
+template <typename Key, typename Value, typename Hash, typename Pred, typename Alloc, typename KeyOf>
+typename hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::const_iterator
+hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::begin() const
+{
+ return m_elements.begin();
+}
+
+template <typename Key, typename Value, typename Hash, typename Pred, typename Alloc, typename KeyOf>
+typename hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::const_iterator
+hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::end() const
+{
+ return m_elements.end();
+}
+
+template <typename Key, typename Value, typename Hash, typename Pred, typename Alloc, typename KeyOf>
+typename hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::const_iterator
+hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::cbegin() const
+{
+ return m_elements.begin();
+}
+
+template <typename Key, typename Value, typename Hash, typename Pred, typename Alloc, typename KeyOf>
+typename hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::const_iterator
+hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::cend() const
+{
+ return m_elements.end();
+}
+
+template <typename Key, typename Value, typename Hash, typename Pred, typename Alloc, typename KeyOf>
+jitstd::pair<typename hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::iterator, bool>
+ hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::insert(const Value& val)
+{
+ // Allocate some space first.
+ rehash(2);
+ return insert_helper(val, m_buckets, m_elements, false);
+}
+
+template <typename Key, typename Value, typename Hash, typename Pred, typename Alloc, typename KeyOf>
+typename hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::iterator
+ hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::insert(const_iterator position, const Value& value)
+{
+ // Allocate some space first.
+ rehash(2);
+
+ // We will not use the hint here, we can consider doing this later.
+ return insert_helper(this->val, m_buckets, m_elements, false).first;
+}
+
+template <typename Key, typename Value, typename Hash, typename Pred, typename Alloc, typename KeyOf>
+template<typename InputIterator>
+void hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::insert(InputIterator first, InputIterator last)
+{
+ // Allocate some space first.
+ rehash(2);
+ while (first != last)
+ {
+ (void) insert_helper(*first, m_buckets, m_elements, false);
+ ++first;
+ }
+}
+
+template <typename Key, typename Value, typename Hash, typename Pred, typename Alloc, typename KeyOf>
+typename hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::iterator
+ hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::erase(iterator position)
+{
+ return erase_helper(position);
+}
+
+template <typename Key, typename Value, typename Hash, typename Pred, typename Alloc, typename KeyOf>
+typename hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::size_type
+ hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::erase(const key_type& key)
+{
+ iterator iter = erase_helper(find(key));
+ return iter == end() ? 0 : 1;
+}
+
+template <typename Key, typename Value, typename Hash, typename Pred, typename Alloc, typename KeyOf>
+typename hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::iterator
+ hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::erase(iterator first, iterator last)
+{
+ iterator iter = end();
+ while (first != last)
+ {
+ iter = erase_helper(find(m_keyOf(*first)));
+ ++first;
+ }
+ return iter;
+}
+
+template <typename Key, typename Value, typename Hash, typename Pred, typename Alloc, typename KeyOf>
+void hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::clear()
+{
+ m_buckets.clear();
+ m_elements.clear();
+}
+
+template <typename Key, typename Value, typename Hash, typename Pred, typename Alloc, typename KeyOf>
+void hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::swap(hashtable& set)
+{
+ std::swap(set.m_buckets, m_buckets);
+ std::swap(set.m_elements, m_elements);
+ std::swap(set.m_flLoadFactor, this->m_flLoadFactor);
+ std::swap(set.m_flMaxLoadFactor, this->m_flMaxLoadFactor);
+ std::swap(set.m_keyOf, this->m_keyOf);
+}
+
+
+template <typename Key, typename Value, typename Hash, typename Pred, typename Alloc, typename KeyOf>
+typename hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::hasher
+ hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::hash_function() const
+{
+ return m_hasher;
+}
+
+template <typename Key, typename Value, typename Hash, typename Pred, typename Alloc, typename KeyOf>
+typename hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::key_equal
+ hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::key_eq() const
+{
+ return m_pred;
+}
+
+template <typename Key, typename Value, typename Hash, typename Pred, typename Alloc, typename KeyOf>
+typename hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::const_iterator
+ hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::find(const key_type& key) const
+{
+ if (empty())
+ {
+ return end();
+ }
+ size_type nHash = hash_helper(key, m_buckets.size());
+ BucketEntry& entry = m_buckets[nHash];
+ for (iterator i = entry.first; i != entry.second; ++i)
+ {
+ if (m_pred(m_keyOf(*i), key))
+ {
+ return i;
+ }
+ }
+ return end();
+}
+
+template <typename Key, typename Value, typename Hash, typename Pred, typename Alloc, typename KeyOf>
+typename hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::iterator
+ hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::find(const key_type& key)
+{
+ if (empty())
+ {
+ return end();
+ }
+ size_type nHash = hash_helper(key, m_buckets.size());
+ BucketEntry& entry = m_buckets[nHash];
+ for (iterator i = entry.first; i != entry.second; ++i)
+ {
+ if (m_pred(m_keyOf(*i), key))
+ {
+ return i;
+ }
+ }
+ return end();
+}
+
+
+template <typename Key, typename Value, typename Hash, typename Pred, typename Alloc, typename KeyOf>
+typename hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::size_type
+ hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::count(const key_type& key) const
+{
+ size_type nCount = 0;
+ size_type nHash = hash_helper(key, m_buckets.size());
+ BucketEntry& bucket = m_buckets[nHash];
+ for (iterator i = bucket.first; i != bucket.second; ++i)
+ {
+ if (m_pred(m_keyOf(*i), key))
+ {
+ ++nCount;
+ }
+ }
+ return nCount;
+}
+
+template <typename Key, typename Value, typename Hash, typename Pred, typename Alloc, typename KeyOf>
+typename hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::size_type
+hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::bucket_count() const
+{
+ return m_buckets.size();
+}
+
+template <typename Key, typename Value, typename Hash, typename Pred, typename Alloc, typename KeyOf>
+typename hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::size_type
+hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::max_bucket_count() const
+{
+ return m_buckets.size();
+}
+
+template <typename Key, typename Value, typename Hash, typename Pred, typename Alloc, typename KeyOf>
+typename hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::size_type
+hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::bucket_size(size_type size) const
+{
+ rehash(size);
+}
+
+template <typename Key, typename Value, typename Hash, typename Pred, typename Alloc, typename KeyOf>
+typename hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::size_type
+hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::bucket(const key_type& key) const
+{
+ return hash_helper(key, m_buckets.size());
+}
+
+template <typename Key, typename Value, typename Hash, typename Pred, typename Alloc, typename KeyOf>
+typename hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::local_iterator
+hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::begin(size_type size)
+{
+ return m_buckets[size].first;
+}
+
+template <typename Key, typename Value, typename Hash, typename Pred, typename Alloc, typename KeyOf>
+typename hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::local_iterator
+hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::end(size_type size)
+{
+ return m_buckets[size].second;
+}
+
+template <typename Key, typename Value, typename Hash, typename Pred, typename Alloc, typename KeyOf>
+float hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::load_factor() const
+{
+ return m_nSize ? (((float) m_nSize) / m_buckets.size()) : 0;
+}
+
+template <typename Key, typename Value, typename Hash, typename Pred, typename Alloc, typename KeyOf>
+float hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::max_load_factor() const
+{
+ return m_flMaxLoadFactor;
+}
+
+template <typename Key, typename Value, typename Hash, typename Pred, typename Alloc, typename KeyOf>
+void hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::max_load_factor(float flLoadFactor)
+{
+ m_flMaxLoadFactor = flLoadFactor;
+ rehash(m_buckets.size());
+}
+
+} // end of namespace jitstd.
diff --git a/src/jit/jitstd/iterator.h b/src/jit/jitstd/iterator.h
new file mode 100644
index 0000000000..975755c59c
--- /dev/null
+++ b/src/jit/jitstd/iterator.h
@@ -0,0 +1,144 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+
+
+#pragma once
+
+namespace jitstd
+{
+
+template <class Category, class T, class Distance = ptrdiff_t, class Pointer = T*, class Reference = T&>
+struct iterator
+{
+ typedef T value_type;
+ typedef Distance difference_type;
+ typedef Pointer pointer;
+ typedef Reference reference;
+ typedef Category iterator_category;
+};
+
+struct input_iterator_tag
+{
+};
+
+struct forward_iterator_tag : public input_iterator_tag
+{
+};
+
+struct bidirectional_iterator_tag : public forward_iterator_tag
+{
+};
+
+struct random_access_iterator_tag : public bidirectional_iterator_tag
+{
+};
+
+struct int_not_an_iterator_tag
+{
+};
+
+template <typename Iterator>
+struct iterator_traits
+{
+ typedef typename Iterator::difference_type difference_type;
+ typedef typename Iterator::value_type value_type;
+ typedef typename Iterator::pointer pointer;
+ typedef typename Iterator::reference reference;
+ typedef typename Iterator::iterator_category iterator_category;
+};
+
+template <typename T>
+struct iterator_traits<T*>
+{
+ typedef ptrdiff_t difference_type;
+ typedef T value_type;
+ typedef T* pointer;
+ typedef T& reference;
+ typedef random_access_iterator_tag iterator_category;
+};
+
+template <typename T>
+struct iterator_traits<const T*>
+{
+ typedef ptrdiff_t difference_type;
+ typedef T value_type;
+ typedef const T* pointer;
+ typedef const T& reference;
+ typedef random_access_iterator_tag iterator_category;
+};
+
+template<>
+struct iterator_traits<bool>
+{
+ typedef int_not_an_iterator_tag iterator_category;
+};
+
+template<>
+struct iterator_traits<char>
+{
+ typedef int_not_an_iterator_tag iterator_category;
+};
+
+template<>
+struct iterator_traits<signed char>
+{
+ typedef int_not_an_iterator_tag iterator_category;
+};
+
+template<>
+struct iterator_traits<unsigned char>
+{
+ typedef int_not_an_iterator_tag iterator_category;
+};
+
+template<>
+struct iterator_traits<short>
+{
+ typedef int_not_an_iterator_tag iterator_category;
+};
+
+template<>
+struct iterator_traits<unsigned short>
+{
+ typedef int_not_an_iterator_tag iterator_category;
+};
+
+template<>
+struct iterator_traits<int>
+{
+ typedef int_not_an_iterator_tag iterator_category;
+};
+
+template<>
+struct iterator_traits<unsigned int>
+{
+ typedef int_not_an_iterator_tag iterator_category;
+};
+
+template<>
+struct iterator_traits<__int64>
+{
+ typedef int_not_an_iterator_tag iterator_category;
+};
+
+template<>
+struct iterator_traits<unsigned __int64>
+{
+ typedef int_not_an_iterator_tag iterator_category;
+};
+
+namespace util
+{
+template<class Iterator>
+inline
+typename iterator_traits<Iterator>::iterator_category
+ iterator_category(const Iterator&)
+{
+ typename iterator_traits<Iterator>::iterator_category categ;
+ return categ;
+}
+} // end of namespace util.
+
+} // end of namespace jitstd.
diff --git a/src/jit/jitstd/jitstd.cpp b/src/jit/jitstd/jitstd.cpp
new file mode 100644
index 0000000000..48d80e2245
--- /dev/null
+++ b/src/jit/jitstd/jitstd.cpp
@@ -0,0 +1,34 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+// jitstd.cpp : Defines the entry point for the console application.
+//
+
+
+#include "stdafx.h"
+
+#include <iostream>
+#include <windows.h>
+#include <string>
+
+#include "iallocator.h"
+
+#include "algorithm.h"
+#include "functional.h"
+#include "hash.h"
+
+#include "unordered_map.h"
+#include "unordered_set.h"
+#include "hashtable.h"
+#include "list.h"
+#include "vector.h"
+#include "pair.h"
+
+int _tmain(int argc, _TCHAR* argv[])
+{
+ // return test1();
+ return 0;
+}
+
+
diff --git a/src/jit/jitstd/jitstd.sln b/src/jit/jitstd/jitstd.sln
new file mode 100644
index 0000000000..6aa099ce3c
--- /dev/null
+++ b/src/jit/jitstd/jitstd.sln
@@ -0,0 +1,20 @@
+
+Microsoft Visual Studio Solution File, Format Version 11.00
+# Visual Studio 2010
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "jitstd", "jitstd.vcxproj", "{A4576E91-78F0-4FD1-8323-8FA3BACE0581}"
+EndProject
+Global
+ GlobalSection(SolutionConfigurationPlatforms) = preSolution
+ Debug|Win32 = Debug|Win32
+ Release|Win32 = Release|Win32
+ EndGlobalSection
+ GlobalSection(ProjectConfigurationPlatforms) = postSolution
+ {A4576E91-78F0-4FD1-8323-8FA3BACE0581}.Debug|Win32.ActiveCfg = Debug|Win32
+ {A4576E91-78F0-4FD1-8323-8FA3BACE0581}.Debug|Win32.Build.0 = Debug|Win32
+ {A4576E91-78F0-4FD1-8323-8FA3BACE0581}.Release|Win32.ActiveCfg = Release|Win32
+ {A4576E91-78F0-4FD1-8323-8FA3BACE0581}.Release|Win32.Build.0 = Release|Win32
+ EndGlobalSection
+ GlobalSection(SolutionProperties) = preSolution
+ HideSolutionNode = FALSE
+ EndGlobalSection
+EndGlobal
diff --git a/src/jit/jitstd/jitstd.vcxproj b/src/jit/jitstd/jitstd.vcxproj
new file mode 100644
index 0000000000..bed1b3a12d
--- /dev/null
+++ b/src/jit/jitstd/jitstd.vcxproj
@@ -0,0 +1,103 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+ <ItemGroup Label="ProjectConfigurations">
+ <ProjectConfiguration Include="Debug|Win32">
+ <Configuration>Debug</Configuration>
+ <Platform>Win32</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="Release|Win32">
+ <Configuration>Release</Configuration>
+ <Platform>Win32</Platform>
+ </ProjectConfiguration>
+ </ItemGroup>
+ <PropertyGroup Label="Globals">
+ <ProjectGuid>{A4576E91-78F0-4FD1-8323-8FA3BACE0581}</ProjectGuid>
+ <Keyword>Win32Proj</Keyword>
+ <RootNamespace>jitstd</RootNamespace>
+ </PropertyGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+ <ConfigurationType>Application</ConfigurationType>
+ <UseDebugLibraries>true</UseDebugLibraries>
+ <CharacterSet>Unicode</CharacterSet>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+ <ConfigurationType>Application</ConfigurationType>
+ <UseDebugLibraries>false</UseDebugLibraries>
+ <WholeProgramOptimization>true</WholeProgramOptimization>
+ <CharacterSet>Unicode</CharacterSet>
+ </PropertyGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+ <ImportGroup Label="ExtensionSettings">
+ </ImportGroup>
+ <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ </ImportGroup>
+ <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ </ImportGroup>
+ <PropertyGroup Label="UserMacros" />
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+ <LinkIncremental>true</LinkIncremental>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+ <LinkIncremental>false</LinkIncremental>
+ </PropertyGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+ <ClCompile>
+ <PrecompiledHeader>Use</PrecompiledHeader>
+ <WarningLevel>Level3</WarningLevel>
+ <Optimization>Disabled</Optimization>
+ <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ </ClCompile>
+ <Link>
+ <SubSystem>Console</SubSystem>
+ <GenerateDebugInformation>true</GenerateDebugInformation>
+ </Link>
+ </ItemDefinitionGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+ <ClCompile>
+ <WarningLevel>Level3</WarningLevel>
+ <PrecompiledHeader>Use</PrecompiledHeader>
+ <Optimization>MaxSpeed</Optimization>
+ <FunctionLevelLinking>true</FunctionLevelLinking>
+ <IntrinsicFunctions>true</IntrinsicFunctions>
+ <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ </ClCompile>
+ <Link>
+ <SubSystem>Console</SubSystem>
+ <GenerateDebugInformation>true</GenerateDebugInformation>
+ <EnableCOMDATFolding>true</EnableCOMDATFolding>
+ <OptimizeReferences>true</OptimizeReferences>
+ </Link>
+ </ItemDefinitionGroup>
+ <ItemGroup>
+ <None Include="ReadMe.txt" />
+ </ItemGroup>
+ <ItemGroup>
+ <ClInclude Include="algorithm.h" />
+ <ClInclude Include="allocator.h" />
+ <ClInclude Include="functional.h" />
+ <ClInclude Include="hash.h" />
+ <ClInclude Include="hashtable.h" />
+ <ClInclude Include="iterator.h" />
+ <ClInclude Include="list.h" />
+ <ClInclude Include="pair.h" />
+ <ClInclude Include="stdafx.h" />
+ <ClInclude Include="targetver.h" />
+ <ClInclude Include="type_traits.h" />
+ <ClInclude Include="unordered_map.h" />
+ <ClInclude Include="unordered_set.h" />
+ <ClInclude Include="vector.h" />
+ </ItemGroup>
+ <ItemGroup>
+ <ClCompile Include="jitstd.cpp" />
+ <ClCompile Include="stdafx.cpp">
+ <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Create</PrecompiledHeader>
+ <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Create</PrecompiledHeader>
+ </ClCompile>
+ </ItemGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+ <ImportGroup Label="ExtensionTargets">
+ </ImportGroup>
+</Project> \ No newline at end of file
diff --git a/src/jit/jitstd/list.h b/src/jit/jitstd/list.h
new file mode 100644
index 0000000000..85545f741e
--- /dev/null
+++ b/src/jit/jitstd/list.h
@@ -0,0 +1,1243 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+// ==++==
+//
+
+//
+
+//
+// ==--==
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX list<T> XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#pragma once
+
+#include "iterator.h"
+#include "functional.h"
+
+namespace jitstd
+{
+
+template <typename T, typename Allocator = jitstd::allocator<T>>
+class list
+{
+public:
+ typedef Allocator allocator_type;
+ typedef T* pointer;
+ typedef T& reference;
+ typedef const T* const_pointer;
+ typedef const T& const_reference;
+
+ typedef size_t size_type;
+ typedef ptrdiff_t difference_type;
+ typedef T value_type;
+
+ // Forward declaration
+private:
+ struct Node;
+
+public:
+ // nested classes
+ class iterator;
+ class const_iterator : public jitstd::iterator<bidirectional_iterator_tag, T>
+ {
+ private:
+ const_iterator(Node* ptr);
+ const_iterator();
+ public:
+ const_iterator(const const_iterator& it);
+ const_iterator(const typename list<T, Allocator>::iterator& it);
+
+ const_iterator& operator++();
+ const_iterator& operator++(int);
+ const_iterator& operator--();
+ const_iterator& operator--(int);
+ const_iterator operator+(difference_type n);
+ const_iterator operator-(difference_type n);
+ size_type operator-(const const_iterator& that);
+ bool operator==(const const_iterator& it) const;
+ bool operator!=(const const_iterator& it) const;
+ const T& operator*() const;
+ const T* operator&() const;
+ const T* operator->() const;
+ operator const T*() const;
+
+ private:
+ friend class list<T, Allocator>;
+ Node* m_pNode;
+ };
+
+ class iterator : public jitstd::iterator<bidirectional_iterator_tag, T>
+ {
+ iterator(Node* ptr);
+ public:
+ iterator();
+ iterator(const iterator& it);
+
+ iterator& operator++();
+ iterator& operator++(int);
+ iterator& operator--();
+ iterator& operator--(int);
+ iterator operator+(difference_type n);
+ iterator operator-(difference_type n);
+ size_type operator-(const iterator& that);
+ bool operator==(const iterator& it);
+ bool operator!=(const iterator& it);
+ T& operator*();
+ T* operator&();
+ T* operator->();
+ operator T*();
+
+ private:
+ friend class list<T, Allocator>;
+ friend class list<T, Allocator>::const_iterator;
+ Node* m_pNode;
+ };
+
+ class reverse_iterator;
+ class const_reverse_iterator : public jitstd::iterator<bidirectional_iterator_tag, T>
+ {
+ private:
+ const_reverse_iterator(Node* ptr);
+ public:
+ const_reverse_iterator();
+ const_reverse_iterator(const const_reverse_iterator& it);
+ const_reverse_iterator(const reverse_iterator& it);
+
+ const_reverse_iterator& operator++();
+ const_reverse_iterator& operator++(int);
+ const_reverse_iterator& operator--();
+ const_reverse_iterator& operator--(int);
+ const_reverse_iterator operator+(difference_type n);
+ const_reverse_iterator operator-(difference_type n);
+ size_type operator-(const const_reverse_iterator& that);
+ bool operator==(const const_reverse_iterator& it) const;
+ bool operator!=(const const_reverse_iterator& it) const;
+ const T& operator*() const;
+ const T* operator&() const;
+ const T* operator->() const;
+ operator const T*() const;
+
+ private:
+ friend class list<T, Allocator>;
+ Node* m_pNode;
+ };
+
+ class reverse_iterator : public jitstd::iterator<bidirectional_iterator_tag, T>
+ {
+ private:
+ reverse_iterator(Node* ptr);
+ public:
+ reverse_iterator();
+ reverse_iterator(const reverse_iterator& it);
+
+ reverse_iterator& operator++();
+ reverse_iterator& operator++(int);
+ reverse_iterator& operator--();
+ reverse_iterator& operator--(int);
+ reverse_iterator operator+(difference_type n);
+ reverse_iterator operator-(difference_type n);
+ size_type operator-(const reverse_iterator& that);
+ bool operator==(const reverse_iterator& it);
+ bool operator!=(const reverse_iterator& it);
+ T& operator*();
+ T* operator&();
+ T* operator->();
+ operator T*();
+ friend class list<T, Allocator>::const_reverse_iterator;
+
+ private:
+ friend class list<T, Allocator>;
+ Node* m_pNode;
+ };
+
+ explicit list(const Allocator&);
+ list(size_type n, const T& value, const Allocator&);
+
+ template <typename InputIterator>
+ list(InputIterator first, InputIterator last, const Allocator&);
+
+ list(const list<T, Allocator>&);
+
+ ~list();
+
+ template <class InputIterator>
+ void assign(InputIterator first, InputIterator last);
+
+ void assign(size_type size, const T& val);
+
+ reference back();
+ const_reference back() const;
+
+ iterator begin();
+ const_iterator begin() const;
+
+ void clear();
+ bool empty() const;
+
+ iterator end();
+ const_iterator end() const;
+
+ iterator erase(iterator position);
+ iterator erase(iterator first, iterator last);
+
+ reference front();
+ const_reference front() const;
+
+ allocator_type get_allocator() const;
+
+ iterator insert(iterator position, const T& x);
+ template <class... Args>
+ iterator emplace(iterator position, Args&&... args);
+ void insert(iterator position, size_type n, const T& x);
+ template <class InputIterator>
+ void insert(iterator position, InputIterator first, InputIterator last);
+
+ size_type max_size() const;
+
+ void merge(list<T, Allocator>& lst);
+ template <class Compare>
+ void merge (list<T, Allocator>& lst, Compare comp);
+
+ list<T, Allocator>& operator=(const list<T, Allocator>& lst);
+
+ void pop_back();
+ void pop_front();
+
+ void push_back(const T& val);
+ template <class... Args>
+ void emplace_back(Args&&... args);
+ void push_front (const T& val);
+ template <class... Args>
+ void emplace_front(Args&&... args);
+
+ reverse_iterator rbegin();
+ const_reverse_iterator rbegin() const;
+
+ void remove(const T& val);
+ template <class Predicate>
+ void remove_if(Predicate pred);
+
+ reverse_iterator rend();
+ const_reverse_iterator rend() const;
+
+ void resize(size_type sz, const T& c);
+ void reverse();
+
+ size_type size() const;
+ void sort();
+
+ template <class Compare>
+ void sort(Compare comp);
+
+ void splice(iterator position, list& lst);
+ void splice(iterator position, list& lst, iterator i);
+ void splice(iterator position, list& x, iterator first, iterator last);
+
+ void swap(list<T,Allocator>& lst);
+
+ void unique();
+
+ template <class BinaryPredicate>
+ void unique(const BinaryPredicate& binary_pred);
+
+private:
+ struct Node
+ {
+ T m_value;
+ Node* m_pNext;
+ Node* m_pPrev;
+
+ template <class... Args>
+ Node(Args&&... args)
+ : m_value(jitstd::forward<Args>(args)...)
+ {
+ }
+ };
+
+ void destroy_helper();
+
+ void construct_helper(size_type n, const T& value, int_not_an_iterator_tag);
+ template <typename InputIterator>
+ void construct_helper(InputIterator first, InputIterator last, forward_iterator_tag);
+
+ void assign_helper(size_type n, const T& value, int_not_an_iterator_tag);
+ template <typename InputIterator>
+ void assign_helper(InputIterator first, InputIterator last, forward_iterator_tag);
+
+ void insert_helper(iterator position, size_type n, const T& value, int_not_an_iterator_tag);
+ template <typename InputIterator>
+ void insert_helper(iterator position, InputIterator first, InputIterator last, forward_iterator_tag);
+
+ void insert_new_node_helper(Node* pInsert, Node* pNewNode);
+
+ Node* m_pHead;
+ Node* m_pTail;
+ size_type m_nSize;
+ typename Allocator::template rebind<T>::allocator m_allocator;
+ typename Allocator::template rebind<Node>::allocator m_nodeAllocator;
+};
+
+}
+
+namespace jitstd
+{
+template <typename T, typename Allocator>
+list<T, Allocator>::list(const Allocator& allocator)
+ : m_pHead(nullptr)
+ , m_pTail(nullptr)
+ , m_nSize(0)
+ , m_allocator(allocator)
+ , m_nodeAllocator(allocator)
+{
+}
+
+template <typename T, typename Allocator>
+list<T, Allocator>::list(size_type n, const T& value, const Allocator& allocator)
+ : m_pHead(NULL)
+ , m_pTail(NULL)
+ , m_nSize(0)
+ , m_allocator(allocator)
+ , m_nodeAllocator(allocator)
+{
+ construct_helper(n, value, int_not_an_iterator_tag());
+}
+
+template <typename T, typename Allocator>
+template <typename InputIterator>
+list<T, Allocator>::list(InputIterator first, InputIterator last, const Allocator& allocator)
+ : m_pHead(NULL)
+ , m_pTail(NULL)
+ , m_nSize(0)
+ , m_allocator(allocator)
+ , m_nodeAllocator(allocator)
+{
+ construct_helper(first, last, iterator_traits<InputIterator>::iterator_category());
+}
+
+template <typename T, typename Allocator>
+list<T, Allocator>::list(const list<T, Allocator>& other)
+ : m_pHead(NULL)
+ , m_pTail(NULL)
+ , m_nSize(0)
+ , m_allocator(other.m_allocator)
+ , m_nodeAllocator(other.m_nodeAllocator)
+{
+ construct_helper(other.begin(), other.end(), forward_iterator_tag());
+}
+
+template <typename T, typename Allocator>
+list<T, Allocator>::~list()
+{
+ destroy_helper();
+}
+
+template <typename T, typename Allocator>
+template <class InputIterator>
+void list<T, Allocator>::assign(InputIterator first, InputIterator last)
+{
+ assign_helper(first, last, iterator_traits<InputIterator>::iterator_category());
+}
+
+template <typename T, typename Allocator>
+void list<T, Allocator>::assign(size_type size, const T& val)
+{
+ assign_helper(size, val, int_not_an_iterator_tag());
+}
+
+template <typename T, typename Allocator>
+typename list<T, Allocator>::reference list<T, Allocator>::back()
+{
+ return m_pTail->m_value;
+}
+
+template <typename T, typename Allocator>
+typename list<T, Allocator>::const_reference list<T, Allocator>::back() const
+{
+ return m_pTail->m_value;
+}
+
+template <typename T, typename Allocator>
+typename list<T, Allocator>::iterator list<T, Allocator>::begin()
+{
+ return iterator(m_pHead);
+}
+
+template <typename T, typename Allocator>
+typename list<T, Allocator>::const_iterator list<T, Allocator>::begin() const
+{
+ return const_iterator(m_pHead);
+}
+
+template <typename T, typename Allocator>
+void list<T, Allocator>::clear()
+{
+ destroy_helper();
+}
+
+template <typename T, typename Allocator>
+bool list<T, Allocator>::empty() const
+{
+ return (m_nSize == 0);
+}
+
+template <typename T, typename Allocator>
+typename list<T, Allocator>::iterator list<T, Allocator>::end()
+{
+ return iterator(nullptr);
+}
+
+template <typename T, typename Allocator>
+typename list<T, Allocator>::const_iterator list<T, Allocator>::end() const
+{
+ return const_iterator(NULL);
+}
+
+template <typename T, typename Allocator>
+typename list<T, Allocator>::iterator list<T, Allocator>::erase(iterator position)
+{
+ // Nothing to erase.
+ assert(position.m_pNode != nullptr);
+
+ --m_nSize;
+
+ Node* pNode = position.m_pNode;
+ Node* pPrev = pNode->m_pPrev;
+ Node* pNext = pNode->m_pNext;
+
+ if (pPrev != nullptr)
+ {
+ pPrev->m_pNext = pNext;
+ }
+ else
+ {
+ m_pHead = pNext;
+ }
+
+ if (pNext != nullptr)
+ {
+ pNext->m_pPrev = pPrev;
+ }
+ else
+ {
+ m_pTail = pPrev;
+ }
+
+ m_nodeAllocator.deallocate(pNode, 1);
+ return iterator(pNext);
+}
+
+template <typename T, typename Allocator>
+typename list<T, Allocator>::iterator list<T, Allocator>::erase(iterator first, iterator last)
+{
+ for (; first != last; first++)
+ {
+ erase(first);
+ }
+}
+
+template <typename T, typename Allocator>
+typename list<T, Allocator>::reference list<T, Allocator>::front()
+{
+ return m_pHead->m_value;
+}
+
+template <typename T, typename Allocator>
+typename list<T, Allocator>::const_reference list<T, Allocator>::front() const
+{
+ return m_pHead->m_value;
+}
+
+template <typename T, typename Allocator>
+typename list<T, Allocator>::allocator_type list<T, Allocator>::get_allocator() const
+{
+ return m_allocator;
+}
+
+template <typename T, typename Allocator>
+typename list<T, Allocator>::iterator
+ list<T, Allocator>::insert(iterator position, const T& val)
+{
+ Node* pNewNode = new (m_nodeAllocator.allocate(1), placement_t()) Node(val);
+ insert_new_node_helper(position.m_pNode, pNewNode);
+ return iterator(pNewNode);
+}
+
+template <typename T, typename Allocator>
+template <typename... Args>
+typename list<T, Allocator>::iterator
+ list<T, Allocator>::emplace(iterator position, Args&&... args)
+{
+ Node* pNewNode = new (m_nodeAllocator.allocate(1), placement_t()) Node(jitstd::forward<Args>(args)...);
+ insert_new_node_helper(position.m_pNode, pNewNode);
+ return iterator(pNewNode);
+}
+
+template <typename T, typename Allocator>
+void list<T, Allocator>::insert(iterator position, size_type n, const T& val)
+{
+ insert_helper(position, n, val, int_not_an_iterator_tag());
+}
+
+template <typename T, typename Allocator>
+template <class InputIterator>
+void list<T, Allocator>::insert(iterator position, InputIterator first, InputIterator last)
+{
+ insert_helper(position, first, last, iterator_traits<InputIterator>::iterator_category());
+}
+
+template <typename T, typename Allocator>
+typename list<T, Allocator>::size_type list<T, Allocator>::max_size() const
+{
+ return (((size_type)-1) >> 1) / sizeof(Node);
+}
+
+template <typename T, typename Allocator>
+void list<T, Allocator>::merge(list<T, Allocator>& lst)
+{
+ merge(lst, jitstd::greater<T>());
+}
+
+template <typename T, typename Allocator>
+template <class Compare>
+void list<T, Allocator>::merge(list<T, Allocator>& lst, Compare comp)
+{
+ int size = lst.m_nSize;
+
+ iterator i = begin();
+ iterator j = lst.begin();
+ while (i != end() && j != lst.end())
+ {
+ if (comp(*i, *j))
+ {
+ i = insert(i, *j);
+ ++j;
+ --size;
+ }
+ else
+ {
+ ++i;
+ }
+ }
+
+ if (j != lst.end())
+ {
+ if (m_pTail != NULL)
+ {
+ m_pTail->m_pNext = j.m_pNode;
+ }
+ else
+ {
+ m_pHead = j.m_pNode;
+ }
+ m_pTail = lst.m_pTail;
+ m_nSize += size;
+ }
+}
+
+template <typename T, typename Allocator>
+list<T, Allocator>& list<T, Allocator>::operator=(const list<T, Allocator>& lst)
+{
+ destroy_helper();
+ construct_helper(lst.begin(), lst.end(), forward_iterator_tag());
+ return *this;
+}
+
+template <typename T, typename Allocator>
+void list<T, Allocator>::pop_back()
+{
+ assert(m_nSize != 0);
+
+ --m_nSize;
+
+ Node* pDelete = m_pTail;
+ if (m_pHead != m_pTail)
+ {
+ m_pTail = m_pTail->m_pPrev;
+ m_pTail->m_pNext = nullptr;
+ }
+ else
+ {
+ m_pHead = nullptr;
+ m_pTail = nullptr;
+ }
+ pDelete->~Node();
+ m_nodeAllocator.deallocate(pDelete, 1);
+}
+
+template <typename T, typename Allocator>
+void list<T, Allocator>::pop_front()
+{
+ assert(m_nSize != 0);
+
+ --m_nSize;
+
+ Node* pDelete = m_pHead;
+ if (m_pHead != m_pTail)
+ {
+ m_pHead = m_pHead->m_pNext;
+ m_pHead->m_pPrev = NULL;
+ }
+ else
+ {
+ m_pHead = NULL;
+ m_pTail = NULL;
+ }
+ pDelete->~Node();
+ m_nodeAllocator.deallocate(pDelete, 1);
+}
+
+template <typename T, typename Allocator>
+void list<T, Allocator>::push_back(const T& val)
+{
+ insert(end(), val);
+}
+
+template <typename T, typename Allocator>
+template <typename... Args>
+void list<T, Allocator>::emplace_back(Args&&... args)
+{
+ emplace(end(), jitstd::forward<Args>(args)...);
+}
+
+template <typename T, typename Allocator>
+void list<T, Allocator>::push_front(const T& val)
+{
+ insert(begin(), val);
+}
+
+template <typename T, typename Allocator>
+template <typename... Args>
+void list<T, Allocator>::emplace_front(Args&&... args)
+{
+ emplace(begin(), jitstd::forward<Args>(args)...);
+}
+
+template <typename T, typename Allocator>
+typename list<T, Allocator>::reverse_iterator
+ list<T, Allocator>::rbegin()
+{
+ return reverse_iterator(m_pTail);
+}
+
+template <typename T, typename Allocator>
+typename list<T, Allocator>::const_reverse_iterator
+ list<T, Allocator>::rbegin() const
+{
+ return const_reverse_iterator(m_pTail);
+}
+
+template <typename T, typename Allocator>
+void list<T, Allocator>::remove(const T& val)
+{
+ for (iterator i = begin(); i != end(); ++i)
+ {
+ if (*i == val)
+ {
+ i = erase(i);
+ }
+ }
+}
+
+template <typename T, typename Allocator>
+template <class Predicate>
+void list<T, Allocator>::remove_if(Predicate pred)
+{
+ for (iterator i = begin(); i != end(); ++i)
+ {
+ if (pred(*i))
+ {
+ i = erase(i);
+ }
+ }
+}
+
+template <typename T, typename Allocator>
+typename list<T, Allocator>::reverse_iterator list<T, Allocator>::rend()
+{
+ return reverse_iterator(nullptr);
+}
+
+template <typename T, typename Allocator>
+typename list<T, Allocator>::const_reverse_iterator list<T, Allocator>::rend() const
+{
+ return reverse_iterator(NULL);
+}
+
+template <typename T, typename Allocator>
+void list<T, Allocator>::resize(size_type sz, const T& c)
+{
+ while (m_nSize < sz)
+ {
+ insert(end(), c);
+ }
+
+ while (m_nSize > sz)
+ {
+ erase(end());
+ }
+}
+
+template <typename T, typename Allocator>
+void list<T, Allocator>::reverse()
+{
+ for (Node* p = m_pHead; p != NULL; p = p->m_pNext)
+ {
+ jitstd::swap(p->m_pPrev, p->m_pNext);
+ }
+ jitstd::swap(m_pHead, m_pTail);
+}
+
+template <typename T, typename Allocator>
+typename list<T, Allocator>::size_type list<T, Allocator>::size() const
+{
+ return m_nSize;
+}
+
+template <typename T, typename Allocator>
+void list<T, Allocator>::sort()
+{
+ assert(false && !"template method not implemented.");
+}
+
+template <typename T, typename Allocator>
+template <class Compare>
+void list<T, Allocator>::sort(Compare comp)
+{
+ assert(false && !"template method not implemented.");
+}
+
+template <typename T, typename Allocator>
+void list<T, Allocator>::splice(iterator position, list& lst)
+{
+ if (lst.m_nSize == 0)
+ {
+ return;
+ }
+ if (m_nSize == 0)
+ {
+ std::swap(lst.m_pHead, m_pHead);
+ std::swap(lst.m_pTail, m_pTail);
+ std::swap(lst.m_nSize, m_nSize);
+ }
+}
+
+template <typename T, typename Allocator>
+void list<T, Allocator>::splice(iterator position, list& lst, iterator i)
+{
+}
+
+template <typename T, typename Allocator>
+void list<T, Allocator>::splice(iterator position, list& x, iterator first, iterator last)
+{
+}
+
+template <typename T, typename Allocator>
+void list<T, Allocator>::swap(list<T, Allocator>& lst)
+{
+ jitstd::swap(lst.m_pHead, m_pHead);
+ jitstd::swap(lst.m_pTail, m_pTail);
+ jitstd::swap(lst.m_nSize, m_nSize);
+ jitstd::swap(lst.m_allocator, m_allocator);
+ jitstd::swap(lst.m_nodeAllocator, m_nodeAllocator);
+}
+
+template <typename T, typename Allocator>
+void list<T, Allocator>::unique()
+{
+ assert(false && !"template method not implemented.");
+}
+
+template <typename T, typename Allocator>
+template <class BinaryPredicate>
+void list<T, Allocator>::unique(const BinaryPredicate& binary_pred)
+{
+ assert(false && !"template method not implemented.");
+}
+
+// private
+template <typename T, typename Allocator>
+void list<T, Allocator>::destroy_helper()
+{
+ while (m_pTail != nullptr)
+ {
+ Node* prev = m_pTail->m_pPrev;
+ m_pTail->~Node();
+ m_nodeAllocator.deallocate(m_pTail, 1);
+ m_pTail = prev;
+ }
+ m_pHead = nullptr;
+ m_nSize = 0;
+}
+
+
+template <typename T, typename Allocator>
+void list<T, Allocator>::construct_helper(size_type n, const T& value, int_not_an_iterator_tag)
+{
+ for (int i = 0; i < n; ++i)
+ {
+ insert(end(), value);
+ }
+ assert(m_nSize == n);
+}
+
+template <typename T, typename Allocator>
+template <typename InputIterator>
+void list<T, Allocator>::construct_helper(InputIterator first, InputIterator last, forward_iterator_tag)
+{
+ while (first != last)
+ {
+ insert(end(), *first);
+ ++first;
+ }
+}
+
+template <typename T, typename Allocator>
+void list<T, Allocator>::assign_helper(size_type n, const T& value, int_not_an_iterator_tag)
+{
+ destroy_helper();
+ for (int i = 0; i < n; ++i)
+ {
+ insert(end(), value);
+ }
+}
+
+template <typename T, typename Allocator>
+template <typename InputIterator>
+void list<T, Allocator>::assign_helper(InputIterator first, InputIterator last, forward_iterator_tag)
+{
+ destroy_helper();
+ while (first != last)
+ {
+ insert(end(), *first);
+ ++first;
+ }
+}
+
+template <typename T, typename Allocator>
+void list<T, Allocator>::insert_helper(iterator position, size_type n, const T& value, int_not_an_iterator_tag)
+{
+ for (int i = 0; i < n; ++i)
+ {
+ insert(position, value);
+ }
+}
+
+template <typename T, typename Allocator>
+template <typename InputIterator>
+void list<T, Allocator>::insert_helper(iterator position, InputIterator first, InputIterator last, forward_iterator_tag)
+{
+ while (first != last)
+ {
+ insert(position, *first);
+ ++first;
+ }
+}
+
+template <typename T, typename Allocator>
+void list<T, Allocator>::insert_new_node_helper(Node* pInsert, Node* pNewNode)
+{
+ ++m_nSize;
+
+ if (pInsert == nullptr)
+ {
+ pNewNode->m_pPrev = m_pTail;
+ pNewNode->m_pNext = nullptr;
+ if (m_pHead == nullptr)
+ {
+ m_pHead = pNewNode;
+ }
+ else
+ {
+ m_pTail->m_pNext = pNewNode;
+ }
+ m_pTail = pNewNode;
+ }
+ else
+ {
+ pNewNode->m_pPrev = pInsert->m_pPrev;
+ pNewNode->m_pNext = pInsert;
+ if (pInsert->m_pPrev == nullptr)
+ {
+ m_pHead = pNewNode;
+ }
+ else
+ {
+ pInsert->m_pPrev->m_pNext = pNewNode;
+ }
+ pInsert->m_pPrev = pNewNode;
+ }
+}
+
+} // end of namespace jitstd.
+
+
+
+
+
+// Implementation of list iterators
+
+namespace jitstd
+{
+
+// iterator
+template <typename T, typename Allocator>
+list<T, Allocator>::iterator::iterator()
+ : m_pNode(NULL)
+{
+}
+
+template <typename T, typename Allocator>
+list<T, Allocator>::iterator::iterator(Node* pNode)
+ : m_pNode(pNode)
+{
+}
+
+template <typename T, typename Allocator>
+list<T, Allocator>::iterator::iterator(const iterator& it)
+ : m_pNode(it.m_pNode)
+{
+}
+
+
+template <typename T, typename Allocator>
+typename list<T, Allocator>::iterator& list<T, Allocator>::iterator::operator++()
+{
+ m_pNode = m_pNode->m_pNext;
+ return *this;
+}
+
+template <typename T, typename Allocator>
+typename list<T, Allocator>::iterator& list<T, Allocator>::iterator::operator++(int)
+{
+ m_pNode = m_pNode->m_pNext;
+ return *this;
+}
+
+template <typename T, typename Allocator>
+typename list<T, Allocator>::iterator& list<T, Allocator>::iterator::operator--()
+{
+ m_pNode = m_pNode->m_pPrev;
+ return *this;
+}
+
+template <typename T, typename Allocator>
+typename list<T, Allocator>::iterator& list<T, Allocator>::iterator::operator--(int)
+{
+ m_pNode = m_pNode->m_pPrev;
+ return *this;
+}
+
+template <typename T, typename Allocator>
+bool list<T, Allocator>::iterator::operator==(const iterator& it)
+{
+ return (m_pNode == it.m_pNode);
+}
+
+template <typename T, typename Allocator>
+bool list<T, Allocator>::iterator::operator!=(const iterator& it)
+{
+ return !operator==(it);
+}
+
+template <typename T, typename Allocator>
+T& list<T, Allocator>::iterator::operator*()
+{
+ return m_pNode->m_value;
+}
+
+template <typename T, typename Allocator>
+T* list<T, Allocator>::iterator::operator&()
+{
+ return &(m_pNode->m_value);
+}
+
+template <typename T, typename Allocator>
+T* list<T, Allocator>::iterator::operator->()
+{
+ return &(m_pNode->m_value);
+}
+
+template <typename T, typename Allocator>
+list<T, Allocator>::iterator::operator T*()
+{
+ return &(m_pNode->m_value);
+}
+
+
+
+
+// const_iterator
+template <typename T, typename Allocator>
+list<T, Allocator>::const_iterator::const_iterator()
+ : m_pNode(NULL)
+{
+}
+
+template <typename T, typename Allocator>
+list<T, Allocator>::const_iterator::const_iterator(Node* pNode)
+ : m_pNode(pNode)
+{
+}
+
+template <typename T, typename Allocator>
+list<T, Allocator>::const_iterator::const_iterator(const const_iterator& it)
+ : m_pNode(it.m_pNode)
+{
+}
+
+template <typename T, typename Allocator>
+list<T, Allocator>::const_iterator::const_iterator(const typename list<T, Allocator>::iterator& it)
+ : m_pNode(it.m_pNode)
+{
+}
+
+template <typename T, typename Allocator>
+typename list<T, Allocator>::const_iterator& list<T, Allocator>::const_iterator::operator++()
+{
+ m_pNode = m_pNode->m_pNext;
+ return *this;
+}
+
+template <typename T, typename Allocator>
+typename list<T, Allocator>::const_iterator& list<T, Allocator>::const_iterator::operator++(int)
+{
+ m_pNode = m_pNode->m_pNext;
+ return *this;
+}
+
+template <typename T, typename Allocator>
+typename list<T, Allocator>::const_iterator& list<T, Allocator>::const_iterator::operator--()
+{
+ m_pNode = m_pNode->m_pPrev;
+ return *this;
+}
+
+template <typename T, typename Allocator>
+typename list<T, Allocator>::const_iterator& list<T, Allocator>::const_iterator::operator--(int)
+{
+ m_pNode = m_pNode->m_pPrev;
+ return *this;
+}
+
+template <typename T, typename Allocator>
+bool list<T, Allocator>::const_iterator::operator==(const const_iterator& it) const
+{
+ return (m_pNode == it.m_pNode);
+}
+
+template <typename T, typename Allocator>
+bool list<T, Allocator>::const_iterator::operator!=(const const_iterator& it) const
+{
+ return !operator==(it);
+}
+
+template <typename T, typename Allocator>
+const T& list<T, Allocator>::const_iterator::operator*() const
+{
+ return m_pNode->m_value;
+}
+
+template <typename T, typename Allocator>
+const T* list<T, Allocator>::const_iterator::operator&() const
+{
+ return &(m_pNode->m_value);
+}
+
+template <typename T, typename Allocator>
+const T* list<T, Allocator>::const_iterator::operator->() const
+{
+ return &(m_pNode->m_value);
+}
+
+template <typename T, typename Allocator>
+list<T, Allocator>::const_iterator::operator const T*() const
+{
+ return &(m_pNode->m_value);
+}
+
+
+// reverse_iterator
+template <typename T, typename Allocator>
+list<T, Allocator>::reverse_iterator::reverse_iterator()
+ : m_pNode(NULL)
+{
+}
+
+template <typename T, typename Allocator>
+list<T, Allocator>::reverse_iterator::reverse_iterator(Node* pNode)
+ : m_pNode(pNode)
+{
+}
+
+template <typename T, typename Allocator>
+list<T, Allocator>::reverse_iterator::reverse_iterator(const reverse_iterator& it)
+ : m_pNode(it.m_pNode)
+{
+}
+
+
+template <typename T, typename Allocator>
+typename list<T, Allocator>::reverse_iterator& list<T, Allocator>::reverse_iterator::operator++()
+{
+ m_pNode = m_pNode->m_pPrev;
+ return *this;
+}
+
+template <typename T, typename Allocator>
+typename list<T, Allocator>::reverse_iterator& list<T, Allocator>::reverse_iterator::operator++(int)
+{
+ m_pNode = m_pNode->m_pPrev;
+ return *this;
+}
+
+template <typename T, typename Allocator>
+typename list<T, Allocator>::reverse_iterator& list<T, Allocator>::reverse_iterator::operator--()
+{
+ m_pNode = m_pNode->m_pNext;
+ return *this;
+}
+
+template <typename T, typename Allocator>
+typename list<T, Allocator>::reverse_iterator& list<T, Allocator>::reverse_iterator::operator--(int)
+{
+ m_pNode = m_pNode->m_pNext;
+ return *this;
+}
+
+template <typename T, typename Allocator>
+bool list<T, Allocator>::reverse_iterator::operator==(const reverse_iterator& it)
+{
+ return (m_pNode == it.m_pNode);
+}
+
+template <typename T, typename Allocator>
+bool list<T, Allocator>::reverse_iterator::operator!=(const reverse_iterator& it)
+{
+ return !operator==(it);
+}
+
+template <typename T, typename Allocator>
+T& list<T, Allocator>::reverse_iterator::operator*()
+{
+ return m_pNode->m_value;
+}
+
+template <typename T, typename Allocator>
+T* list<T, Allocator>::reverse_iterator::operator&()
+{
+ return &(m_pNode->m_value);
+}
+
+template <typename T, typename Allocator>
+T* list<T, Allocator>::reverse_iterator::operator->()
+{
+ return &(m_pNode->m_value);
+}
+
+template <typename T, typename Allocator>
+list<T, Allocator>::reverse_iterator::operator T*()
+{
+ return &(m_pNode->m_value);
+}
+
+// const_reverse_iterator
+template <typename T, typename Allocator>
+list<T, Allocator>::const_reverse_iterator::const_reverse_iterator()
+ : m_pNode(NULL)
+{
+}
+
+template <typename T, typename Allocator>
+list<T, Allocator>::const_reverse_iterator::const_reverse_iterator(Node* pNode)
+ : m_pNode(pNode)
+{
+}
+
+template <typename T, typename Allocator>
+list<T, Allocator>::const_reverse_iterator::const_reverse_iterator(const const_reverse_iterator& it)
+ : m_pNode(it.m_pNode)
+{
+}
+
+template <typename T, typename Allocator>
+list<T, Allocator>::const_reverse_iterator::const_reverse_iterator(const reverse_iterator& it)
+ : m_pNode(it.m_pNode)
+{
+}
+
+template <typename T, typename Allocator>
+typename list<T, Allocator>::const_reverse_iterator& list<T, Allocator>::const_reverse_iterator::operator++()
+{
+ m_pNode = m_pNode->m_pPrev;
+ return *this;
+}
+
+template <typename T, typename Allocator>
+typename list<T, Allocator>::const_reverse_iterator& list<T, Allocator>::const_reverse_iterator::operator++(int)
+{
+ m_pNode = m_pNode->m_pPrev;
+ return *this;
+}
+
+template <typename T, typename Allocator>
+typename list<T, Allocator>::const_reverse_iterator& list<T, Allocator>::const_reverse_iterator::operator--()
+{
+ m_pNode = m_pNode->m_pNext;
+ return *this;
+}
+
+template <typename T, typename Allocator>
+typename list<T, Allocator>::const_reverse_iterator& list<T, Allocator>::const_reverse_iterator::operator--(int)
+{
+ m_pNode = m_pNode->m_pNext;
+ return *this;
+}
+
+template <typename T, typename Allocator>
+bool list<T, Allocator>::const_reverse_iterator::operator==(const const_reverse_iterator& it) const
+{
+ return (m_pNode == it.m_pNode);
+}
+
+template <typename T, typename Allocator>
+bool list<T, Allocator>::const_reverse_iterator::operator!=(const const_reverse_iterator& it) const
+{
+ return !operator==(it);
+}
+
+template <typename T, typename Allocator>
+const T& list<T, Allocator>::const_reverse_iterator::operator*() const
+{
+ return m_pNode->m_value;
+}
+
+template <typename T, typename Allocator>
+const T* list<T, Allocator>::const_reverse_iterator::operator&() const
+{
+ return &(m_pNode->m_value);
+}
+
+template <typename T, typename Allocator>
+const T* list<T, Allocator>::const_reverse_iterator::operator->() const
+{
+ return &(m_pNode->m_value);
+}
+
+template <typename T, typename Allocator>
+list<T, Allocator>::const_reverse_iterator::operator const T*() const
+{
+ return &(m_pNode->m_value);
+}
+
+}
+
diff --git a/src/jit/jitstd/new.h b/src/jit/jitstd/new.h
new file mode 100644
index 0000000000..7054fbea0b
--- /dev/null
+++ b/src/jit/jitstd/new.h
@@ -0,0 +1,16 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+
+
+#pragma once
+
+namespace jitstd
+{
+
+struct placement_t
+{
+};
+
+}
diff --git a/src/jit/jitstd/pair.h b/src/jit/jitstd/pair.h
new file mode 100644
index 0000000000..f306000048
--- /dev/null
+++ b/src/jit/jitstd/pair.h
@@ -0,0 +1,57 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+
+
+#pragma once
+
+namespace jitstd
+{
+template <typename Type1, typename Type2>
+class pair
+{
+public:
+ Type1 first;
+ Type2 second;
+
+ pair(const Type1& fst, const Type2& sec)
+ : first(fst)
+ , second(sec)
+ {
+ }
+
+ template <typename AltType1, typename AltType2>
+ pair(const AltType1& fst, const AltType2& sec)
+ : first((Type1) fst)
+ , second((Type2) sec)
+ {
+ }
+
+ template <typename AltType1, typename AltType2>
+ pair(const pair<AltType1, AltType2>& that)
+ : first((Type1) that.first)
+ , second((Type2) that.second)
+ {
+ }
+
+ pair(const pair& that)
+ : first(that.first)
+ , second(that.second)
+ {
+ }
+
+ template <typename AltType1, typename AltType2>
+ const pair<Type1, Type2>& operator=(const pair<AltType1, AltType2>& pair)
+ {
+ first = pair.first;
+ second = pair.second;
+ return *this;
+ }
+
+ bool operator==(const pair<Type1, Type2>& other) const
+ {
+ return (other.first == first && other.second == second);
+ }
+};
+}
diff --git a/src/jit/jitstd/stdafx.cpp b/src/jit/jitstd/stdafx.cpp
new file mode 100644
index 0000000000..1012ef98b8
--- /dev/null
+++ b/src/jit/jitstd/stdafx.cpp
@@ -0,0 +1,14 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+// stdafx.cpp : source file that includes just the standard includes
+// jitstd.pch will be the pre-compiled header
+// stdafx.obj will contain the pre-compiled type information
+
+
+
+#include "stdafx.h"
+
+// TODO: reference any additional headers you need in STDAFX.H
+// and not in this file
diff --git a/src/jit/jitstd/stdafx.h b/src/jit/jitstd/stdafx.h
new file mode 100644
index 0000000000..6d2519d3dc
--- /dev/null
+++ b/src/jit/jitstd/stdafx.h
@@ -0,0 +1,20 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+// stdafx.h : include file for standard system include files,
+// or project specific include files that are used frequently, but
+// are changed infrequently
+//
+
+
+#pragma once
+
+#include "targetver.h"
+
+#include <stdio.h>
+#include <tchar.h>
+
+
+
+// TODO: reference additional headers your program requires here
diff --git a/src/jit/jitstd/targetver.h b/src/jit/jitstd/targetver.h
new file mode 100644
index 0000000000..260ee4a21c
--- /dev/null
+++ b/src/jit/jitstd/targetver.h
@@ -0,0 +1,14 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+
+
+#pragma once
+
+// Including SDKDDKVer.h defines the highest available Windows platform.
+
+// If you wish to build your application for a previous Windows platform, include WinSDKVer.h and
+// set the _WIN32_WINNT macro to the platform you wish to support before including SDKDDKVer.h.
+
+#include <sdkddkver.h>
diff --git a/src/jit/jitstd/type_traits.h b/src/jit/jitstd/type_traits.h
new file mode 100644
index 0000000000..1e853e8cca
--- /dev/null
+++ b/src/jit/jitstd/type_traits.h
@@ -0,0 +1,196 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+
+#pragma once
+
+namespace jitstd
+{
+template <typename T>
+struct remove_const
+{
+ typedef T type;
+};
+
+template <typename T>
+struct remove_const<const T>
+{
+ typedef T type;
+};
+
+template <typename T>
+struct remove_volatile
+{
+ typedef T type;
+};
+
+template <typename T>
+struct remove_volatile<volatile T>
+{
+ typedef T type;
+};
+
+template <typename T>
+struct remove_cv : remove_const<typename remove_volatile<T>::type>
+{
+};
+
+template <typename T>
+struct remove_reference
+{
+ typedef T type;
+};
+
+template <typename T>
+struct remove_reference<T&>
+{
+ typedef T type;
+};
+
+template <typename T>
+struct remove_reference<T&&>
+{
+ typedef T type;
+};
+
+template <typename T>
+struct is_lvalue_reference
+{
+ enum { value = false };
+};
+
+template <typename T>
+struct is_lvalue_reference<T&>
+{
+ enum { value = true };
+};
+
+template <typename T>
+struct is_unqualified_pointer
+{
+ enum { value = false };
+};
+
+template <typename T>
+struct is_unqualified_pointer<T*>
+{
+ enum { value = true };
+};
+
+template <typename T>
+struct is_pointer : is_unqualified_pointer<typename remove_cv<T>::type>
+{
+};
+
+template <typename T>
+struct is_integral
+{
+ enum { value = false };
+};
+
+template<>
+struct is_integral<bool>
+{
+ enum { value = true };
+};
+
+template<>
+struct is_integral<char>
+{
+ enum { value = true };
+};
+
+template<>
+struct is_integral<unsigned char>
+{
+ enum { value = true };
+};
+
+template<>
+struct is_integral<signed char>
+{
+ enum { value = true };
+};
+
+template<>
+struct is_integral<unsigned short>
+{
+ enum { value = true };
+};
+
+template<>
+struct is_integral<signed short>
+{
+ enum { value = true };
+};
+
+template<>
+struct is_integral<unsigned int>
+{
+ enum { value = true };
+};
+
+template<>
+struct is_integral<signed int>
+{
+ enum { value = true };
+};
+
+template<>
+struct is_integral<unsigned __int64>
+{
+ enum { value = true };
+};
+
+template<>
+struct is_integral<signed __int64>
+{
+ enum { value = true };
+};
+
+
+template<bool Pred, typename Type1, typename Type2>
+struct conditional
+{
+};
+
+template<typename Type1, typename Type2>
+struct conditional<true, Type1, Type2>
+{
+ typedef Type1 type;
+};
+
+template<typename Type1, typename Type2>
+struct conditional<false, Type1, Type2>
+{
+ typedef Type2 type;
+};
+
+template<typename Type1>
+struct make_unsigned
+{
+};
+
+template<>
+struct make_unsigned<int>
+{
+ typedef unsigned int type;
+};
+
+#ifndef PLATFORM_UNIX
+
+template<>
+struct make_unsigned<long>
+{
+ typedef unsigned long type;
+};
+#endif // PLATFORM_UNIX
+
+template<>
+struct make_unsigned<__int64>
+{
+ typedef unsigned __int64 type;
+};
+
+} // namespace jit_std
diff --git a/src/jit/jitstd/unordered_map.h b/src/jit/jitstd/unordered_map.h
new file mode 100644
index 0000000000..05e97f450c
--- /dev/null
+++ b/src/jit/jitstd/unordered_map.h
@@ -0,0 +1,179 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+// ==++==
+//
+
+//
+
+//
+// ==--==
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX unordered_map<K,V,H,P,A> XX
+XX Derives from hashtable for most implementation. Inserted elements are XX
+XX value pairs and the hash key is provided by the helper method that XX
+XX extracts the key from the key value pair XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#pragma once
+
+#include "hashtable.h"
+
+namespace jitstd
+{
+
+template <typename Key, typename Value>
+struct pair_key
+{
+ Key& operator()(const jitstd::pair<Key, Value>& pair) const
+ {
+ return pair.first;
+ }
+};
+
+template<typename Key,
+ typename Value,
+ typename Hash = jitstd::hash<Key>,
+ typename Pred = jitstd::equal_to<Key>,
+ typename Alloc = jitstd::allocator<jitstd::pair<const Key, Value> > >
+class unordered_map
+ : public hashtable<Key, pair<const Key, Value>, Hash, Pred, Alloc, pair_key<const Key, Value>>
+{
+public:
+
+ typedef Key key_type;
+ typedef Value mapped_type;
+ typedef jitstd::pair<const Key, Value> value_type;
+ typedef Hash hasher;
+ typedef Pred key_equal;
+ typedef Alloc allocator_type;
+ typedef typename allocator_type::pointer pointer;
+ typedef typename allocator_type::const_pointer const_pointer;
+ typedef typename allocator_type::reference reference;
+ typedef typename allocator_type::const_reference const_reference;
+ typedef size_t size_type;
+ typedef ptrdiff_t difference_type;
+
+ explicit unordered_map(size_type size, const hasher& hasher, const key_equal& pred, const allocator_type& allocator);
+ explicit unordered_map(size_type size, const allocator_type& allocator);
+ template<typename InputIterator>
+ unordered_map(InputIterator, InputIterator,
+ size_type size,
+ const hasher& hasher,
+ const key_equal& pred,
+ const allocator_type& allocator);
+
+ unordered_map(const unordered_map& map);
+ explicit unordered_map(const allocator_type& allocator);
+ unordered_map(const unordered_map& map, const allocator_type& allocator);
+ ~unordered_map();
+
+ unordered_map& operator=(unordered_map const&);
+ mapped_type& operator[](const Key& key);
+ mapped_type& operator[](key_type&& key);
+
+ typename unordered_map<Key, Value, Hash, Pred, Alloc>::iterator insert(const key_type& key, const mapped_type& value);
+
+private:
+ typedef hashtable<Key, pair<const Key, Value>, Hash, Pred, Alloc, pair_key<const Key, Value>> base_type;
+};
+
+}
+
+
+namespace jitstd
+{
+
+template<typename Key, typename Value, typename Hash, typename Pred, typename Alloc>
+unordered_map<Key, Value, Hash, Pred, Alloc>::unordered_map(size_type size, const hasher& hasher, const key_equal& pred, const allocator_type& allocator)
+ : base_type(size, hasher, pred, allocator)
+{
+}
+
+template<typename Key, typename Value, typename Hash, typename Pred, typename Alloc>
+unordered_map<Key, Value, Hash, Pred, Alloc>::unordered_map(size_type size, const allocator_type& allocator)
+ : base_type(size, allocator)
+{
+}
+
+template<typename Key, typename Value, typename Hash, typename Pred, typename Alloc>
+template<typename InputIterator>
+unordered_map<Key, Value, Hash, Pred, Alloc>::unordered_map(InputIterator first, InputIterator last,
+ size_type size,
+ const hasher& hasher,
+ const key_equal& pred,
+ const allocator_type& allocator)
+ : base_type(first, last, size, hasher, pred, allocator)
+{
+}
+
+template<typename Key, typename Value, typename Hash, typename Pred, typename Alloc>
+unordered_map<Key, Value, Hash, Pred, Alloc>::unordered_map(const unordered_map& map)
+ : base_type(map)
+{
+}
+
+template<typename Key, typename Value, typename Hash, typename Pred, typename Alloc>
+unordered_map<Key, Value, Hash, Pred, Alloc>::unordered_map(const allocator_type& allocator)
+ : base_type(allocator)
+{
+}
+
+template<typename Key, typename Value, typename Hash, typename Pred, typename Alloc>
+unordered_map<Key, Value, Hash, Pred, Alloc>::unordered_map(const unordered_map& map, const allocator_type& allocator)
+ : base_type(map, allocator)
+{
+}
+
+template<typename Key, typename Value, typename Hash, typename Pred, typename Alloc>
+unordered_map<Key, Value, Hash, Pred, Alloc>::~unordered_map()
+{
+}
+
+template<typename Key, typename Value, typename Hash, typename Pred, typename Alloc>
+unordered_map<Key, Value, Hash, Pred, Alloc>& unordered_map<Key, Value, Hash, Pred, Alloc>::operator=(const unordered_map& map)
+{
+ base_type::operator=(map);
+ return *this;
+}
+
+template<typename Key, typename Value, typename Hash, typename Pred, typename Alloc>
+Value& unordered_map<Key, Value, Hash, Pred, Alloc>::operator[](const Key& key)
+{
+ iterator<Key, Value> iter = base_type::find(key, this->key_eq());
+ if (iter == this->end())
+ {
+ iter = base_type::insert(jitstd::pair<const Key, mapped_type>(key, mapped_type())).first;
+ }
+ return (*iter).second;
+}
+
+template<typename Key, typename Value, typename Hash, typename Pred, typename Alloc>
+Value& unordered_map<Key, Value, Hash, Pred, Alloc>::operator[](key_type&& key)
+{
+ iterator<Key, Value> iter = base_type::find(key, this->key_eq());
+ if (iter == this->end())
+ {
+ iter = base_type::insert(jitstd::pair<const Key, mapped_type>(key, mapped_type())).first;
+ }
+ return (*iter).second;
+}
+
+
+template<typename Key, typename Value, typename Hash, typename Pred, typename Alloc>
+typename unordered_map<Key, Value, Hash, Pred, Alloc>::iterator
+unordered_map<Key, Value, Hash, Pred, Alloc>::insert(const key_type& key, const mapped_type& value)
+{
+ typename unordered_map<Key, Value, Hash, Pred, Alloc>::iterator iter = base_type::find(key, this->key_eq());
+ iter = base_type::insert(jitstd::pair<const Key, mapped_type>(key, value)).first;
+ return iter;
+}
+
+}
diff --git a/src/jit/jitstd/unordered_set.h b/src/jit/jitstd/unordered_set.h
new file mode 100644
index 0000000000..388e72426c
--- /dev/null
+++ b/src/jit/jitstd/unordered_set.h
@@ -0,0 +1,156 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+// ==++==
+//
+
+//
+
+//
+// ==--==
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX unordered_set<V,H,P,A> XX
+XX XX
+XX Derives from hashtable for most implementation. The hash key is the XX
+XX elements themselves XX
+XX XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#pragma once
+
+#include "allocator.h"
+#include "hashtable.h"
+
+namespace jitstd
+{
+
+template <typename Value,
+ typename Hash = jitstd::hash<Value>,
+ typename Pred = jitstd::equal_to<Value>,
+ typename Alloc = jitstd::allocator<Value>>
+class unordered_set
+ : public hashtable<Value, Value, Hash, Pred, Alloc>
+{
+public:
+ typedef Value key_type;
+ typedef Value value_type;
+ typedef Hash hasher;
+ typedef Pred key_equal;
+ typedef Alloc allocator_type;
+ typedef typename allocator_type::pointer pointer;
+ typedef typename allocator_type::const_pointer const_pointer;
+ typedef typename allocator_type::reference reference;
+ typedef typename allocator_type::const_reference const_reference;
+ typedef size_t size_type;
+ typedef ptrdiff_t difference_type;
+ typedef typename list<Value, Alloc>::iterator iterator;
+ typedef typename list<Value, Alloc>::const_iterator const_iterator;
+ typedef typename list<Value, Alloc>::iterator local_iterator;
+
+private:
+ typedef hashtable<Value, Value, Hash, Pred, Alloc> base_type;
+ unordered_set();
+
+ typedef pair<iterator, iterator> BucketEntry;
+ typedef vector<BucketEntry, typename Alloc::template rebind<BucketEntry>::allocator> Buckets;
+ typedef list<Value, Alloc> Elements;
+
+public:
+ explicit unordered_set(size_type,
+ const allocator_type& a);
+
+ unordered_set(size_type n,
+ const hasher& hf,
+ const key_equal& eq,
+ const allocator_type&);
+
+ template<typename InputIterator>
+ unordered_set(
+ InputIterator f, InputIterator l,
+ size_type n,
+ const hasher& hf,
+ const key_equal& eq,
+ const allocator_type&);
+
+ explicit unordered_set(const allocator_type&);
+
+ unordered_set(const unordered_set& other);
+
+ ~unordered_set();
+
+ unordered_set& operator=(unordered_set const&);
+};
+
+} // end of namespace jitstd
+
+
+namespace jitstd
+{
+
+template <typename Value, typename Hash, typename Pred, typename Alloc>
+unordered_set<Value, Hash, Pred, Alloc>::unordered_set(
+ size_type n,
+ allocator_type const& allocator)
+ : hashtable<Value>(n, allocator)
+{
+ this->rehash(n);
+}
+
+template <typename Value, typename Hash, typename Pred, typename Alloc>
+unordered_set<Value, Hash, Pred, Alloc>::unordered_set(
+ size_type n,
+ hasher const& hf,
+ key_equal const& eq,
+ allocator_type const& allocator)
+ : hashtable<Value>(n, hf, eq, allocator)
+{
+ this->rehash(n);
+}
+
+template <typename Value, typename Hash, typename Pred, typename Alloc>
+template<typename InputIterator>
+unordered_set<Value, Hash, Pred, Alloc>::unordered_set(
+ InputIterator f, InputIterator l,
+ size_type n,
+ const hasher& hf,
+ const key_equal& eq,
+ const allocator_type& allocator)
+ : hashtable<Value>(f, l, n, hf, eq, allocator)
+{
+ this->rehash(n);
+ insert(this->first, this->last);
+}
+
+template <typename Value, typename Hash, typename Pred, typename Alloc>
+unordered_set<Value, Hash, Pred, Alloc>::unordered_set(const allocator_type& allocator)
+: hashtable<Value>(allocator)
+{
+}
+
+template <typename Value, typename Hash, typename Pred, typename Alloc>
+unordered_set<Value, Hash, Pred, Alloc>::unordered_set(const unordered_set& other)
+: hashtable<Value>(other)
+{
+}
+
+template <typename Value, typename Hash, typename Pred, typename Alloc>
+unordered_set<Value, Hash, Pred, Alloc>::~unordered_set()
+{
+}
+
+template <typename Value, typename Hash, typename Pred, typename Alloc>
+unordered_set<Value, Hash, Pred, Alloc>&
+ unordered_set<Value, Hash, Pred, Alloc>::operator=(unordered_set const& other)
+{
+ base_type::operator=(other);
+ return *this;
+}
+
+} // end of namespace jitstd.
diff --git a/src/jit/jitstd/utility.h b/src/jit/jitstd/utility.h
new file mode 100644
index 0000000000..80ce58e4d7
--- /dev/null
+++ b/src/jit/jitstd/utility.h
@@ -0,0 +1,108 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+
+
+#pragma once
+
+namespace jitstd
+{
+
+template <typename T>
+inline
+T&& forward(typename jitstd::remove_reference<T>::type& arg)
+{
+ return static_cast<T&&>(arg);
+}
+
+template <typename T>
+inline
+T&& forward(typename jitstd::remove_reference<T>::type&& arg)
+{
+ static_assert(!jitstd::is_lvalue_reference<T>::value, "unexpected lvalue reference");
+ return static_cast<T&&>(arg);
+}
+
+namespace utility
+{
+ // Template class for scoped execution of a lambda.
+ // Usage:
+ //
+ // auto code = [&]
+ // {
+ // JITDUMP("finally()");
+ // };
+ // jitstd::utility::scoped_code<decltype(code)> finally(code);
+ // "code" will execute when "finally" goes out of scope.
+ template <typename T>
+ class scoped_code
+ {
+ public:
+ const T& l;
+ scoped_code(const T& l) : l(l) { }
+ ~scoped_code() { l(); }
+ };
+
+
+ // Helper to allocate objects of any type, given an allocator of void type.
+ //
+ // @param alloc An allocator of void type used to create an allocator of type T.
+ // @param count The number of objects of type T that need to be allocated.
+ //
+ // @return A pointer to an object or an array of objects that was allocated.
+ template <typename T>
+ inline
+ static T* allocate(jitstd::allocator<void>& alloc, size_t count = 1)
+ {
+ return jitstd::allocator<T>(alloc).allocate(count);
+ }
+
+ // Ensures that "wset" is the union of the initial state of "wset" and "rset".
+ // Elements from "rset" that were not in "wset" are added to "cset."
+ template <typename Set>
+ bool set_union(Set& wset, const Set& rset, Set& cset)
+ {
+ bool change = false;
+ for (typename Set::const_iterator i = rset.begin(); i != rset.end(); ++i)
+ {
+ jitstd::pair<typename Set::iterator, bool> result = wset.insert(*i);
+ if (result.second)
+ {
+ change = true;
+ cset.insert(*i);
+ }
+ }
+ return change;
+ }
+
+ template <typename Set>
+ bool set_union(Set& wset, const Set& rset)
+ {
+ bool change = false;
+ for (typename Set::const_iterator i = rset.begin(); i != rset.end(); ++i)
+ {
+ jitstd::pair<typename Set::iterator, bool> result = wset.insert(*i);
+ change |= result.second;
+ }
+ return change;
+ }
+
+ template <typename Set>
+ bool set_difference(Set& wset, const Set& rset)
+ {
+ bool change = false;
+ for (typename Set::const_iterator i = rset.begin(); i != rset.end(); ++i)
+ {
+ if (wset.find(*i) != wset.end())
+ {
+ wset.erase(*i);
+ change = true;
+ }
+ }
+
+ return change;
+ }
+} // end of namespace utility.
+
+} // end of namespace jitstd.
diff --git a/src/jit/jitstd/vector.h b/src/jit/jitstd/vector.h
new file mode 100644
index 0000000000..d252e18253
--- /dev/null
+++ b/src/jit/jitstd/vector.h
@@ -0,0 +1,1254 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+// ==++==
+//
+
+//
+
+//
+// ==--==
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX vector<T> XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#pragma once
+
+#include "allocator.h"
+#include "iterator.h"
+
+namespace jitstd
+{
+
+template <typename T, typename Allocator = allocator<T> >
+class vector
+{
+public:
+ typedef Allocator allocator_type;
+ typedef T* pointer;
+ typedef T& reference;
+ typedef const T* const_pointer;
+ typedef const T& const_reference;
+
+ typedef size_t size_type;
+ typedef ptrdiff_t difference_type;
+ typedef T value_type;
+
+ // nested classes
+ class iterator : public jitstd::iterator<random_access_iterator_tag, T>
+ {
+ iterator(T* ptr);
+ public:
+ iterator();
+ iterator(const iterator& it);
+
+ iterator& operator++();
+ iterator& operator++(int);
+ iterator& operator--();
+ iterator& operator--(int);
+ iterator operator+(difference_type n);
+ iterator operator-(difference_type n);
+ size_type operator-(const iterator& that);
+ bool operator==(const iterator& it);
+ bool operator!=(const iterator& it);
+ T& operator*();
+ T* operator&();
+ operator T*();
+
+ private:
+ friend class vector<T, Allocator>;
+ pointer m_pElem;
+ };
+
+ class const_iterator : public jitstd::iterator<random_access_iterator_tag, T>
+ {
+ private:
+ const_iterator(T* ptr);
+ const_iterator();
+ public:
+ const_iterator(const const_iterator& it);
+
+ const_iterator& operator++();
+ const_iterator& operator++(int);
+ const_iterator& operator--();
+ const_iterator& operator--(int);
+ const_iterator operator+(difference_type n);
+ const_iterator operator-(difference_type n);
+ size_type operator-(const const_iterator& that);
+ bool operator==(const const_iterator& it) const;
+ bool operator!=(const const_iterator& it) const;
+ const T& operator*() const;
+ const T* operator&() const;
+ operator const T*() const;
+
+ private:
+ friend class vector<T, Allocator>;
+ pointer m_pElem;
+ };
+
+ class reverse_iterator : public jitstd::iterator<random_access_iterator_tag, T>
+ {
+ private:
+ reverse_iterator(T* ptr);
+ public:
+ reverse_iterator();
+ reverse_iterator(const reverse_iterator& it);
+
+ reverse_iterator& operator++();
+ reverse_iterator& operator++(int);
+ reverse_iterator& operator--();
+ reverse_iterator& operator--(int);
+ reverse_iterator operator+(difference_type n);
+ reverse_iterator operator-(difference_type n);
+ size_type operator-(const reverse_iterator& that);
+ bool operator==(const reverse_iterator& it);
+ bool operator!=(const reverse_iterator& it);
+ T& operator*();
+ T* operator&();
+ operator T*();
+
+ private:
+ friend class vector<T, Allocator>;
+ pointer m_pElem;
+ };
+
+ class const_reverse_iterator : public jitstd::iterator<random_access_iterator_tag, T>
+ {
+ private:
+ const_reverse_iterator(T* ptr);
+ public:
+ const_reverse_iterator();
+ const_reverse_iterator(const const_reverse_iterator& it);
+
+ const_reverse_iterator& operator++();
+ const_reverse_iterator& operator++(int);
+ const_reverse_iterator& operator--();
+ const_reverse_iterator& operator--(int);
+ const_reverse_iterator operator+(difference_type n);
+ const_reverse_iterator operator-(difference_type n);
+ size_type operator-(const const_reverse_iterator& that);
+ bool operator==(const const_reverse_iterator& it) const;
+ bool operator!=(const const_reverse_iterator& it) const;
+ const T& operator*() const;
+ const T* operator&() const;
+ operator const T*() const;
+
+ private:
+ friend class vector<T, Allocator>;
+ pointer m_pElem;
+ };
+
+ // ctors
+ explicit vector(const Allocator& allocator);
+ explicit vector(size_type n, const T& value, const Allocator& allocator);
+
+ template <typename InputIterator>
+ vector(InputIterator first, InputIterator last, const Allocator& allocator);
+
+ // cctors
+ vector(const vector& vec);
+
+ template <typename Alt, typename AltAllocator>
+ explicit vector(const vector<Alt, AltAllocator>& vec);
+
+ // dtor
+ ~vector();
+
+ template <class InputIterator>
+ void assign(InputIterator first, InputIterator last);
+ void assign(size_type size, const T& value);
+
+ const_reference at(size_type n) const;
+ reference at(size_type n);
+
+ reference back();
+ const_reference back() const;
+
+ iterator begin();
+ const_iterator begin() const;
+ const_iterator cbegin() const;
+
+ size_type capacity() const;
+
+ void clear();
+ bool empty() const;
+
+ iterator end();
+ const_iterator end() const;
+ const_iterator cend() const;
+
+ iterator erase(iterator position);
+ iterator erase(iterator first, iterator last);
+
+ reference front();
+ const_reference front() const;
+
+ allocator_type get_allocator() const;
+
+ iterator insert(iterator position, const T& value);
+ void insert(iterator position, size_type size, const T& value);
+
+ template <typename InputIterator>
+ void insert(iterator position, InputIterator first, InputIterator last);
+
+ size_type max_size() const;
+
+ vector& operator=(const vector& vec);
+ template <typename Alt, typename AltAllocator>
+ vector<T, Allocator>& operator=(const vector<Alt, AltAllocator>& vec);
+
+ reference operator[](size_type n);
+ const_reference operator[](size_type n) const;
+
+ void pop_back();
+ void push_back(const T& value);
+
+ reverse_iterator rbegin();
+ const_reverse_iterator rbegin() const;
+
+ reverse_iterator rend();
+ const_reverse_iterator rend() const;
+
+ void reserve(size_type n);
+
+ void resize(size_type sz, const T&);
+
+ size_type size() const;
+
+ void swap(vector<T, Allocator>& vec);
+
+private:
+
+ typename Allocator::template rebind<T>::allocator m_allocator;
+ T* m_pArray;
+ size_type m_nSize;
+ size_type m_nCapacity;
+
+ inline
+ bool ensure_capacity(size_type capacity);
+
+ template <typename InputIterator>
+ void construct_helper(InputIterator first, InputIterator last, forward_iterator_tag);
+ template <typename InputIterator>
+ void construct_helper(InputIterator first, InputIterator last, int_not_an_iterator_tag);
+ void construct_helper(size_type size, const T& value);
+
+ template <typename InputIterator>
+ void insert_helper(iterator iter, InputIterator first, InputIterator last, forward_iterator_tag);
+ template <typename InputIterator>
+ void insert_helper(iterator iter, InputIterator first, InputIterator last, int_not_an_iterator_tag);
+ void insert_elements_helper(iterator iter, size_type size, const T& value);
+
+ template <typename InputIterator>
+ void assign_helper(InputIterator first, InputIterator last, forward_iterator_tag);
+ template <typename InputIterator>
+ void assign_helper(InputIterator first, InputIterator last, int_not_an_iterator_tag);
+
+ template <typename Alt, typename AltAllocator>
+ friend class vector;
+};
+
+}// namespace jit_std
+
+
+
+// Implementation of vector.
+
+namespace jitstd
+{
+
+namespace
+{
+
+template <typename InputIterator>
+size_t iterator_difference(InputIterator first, const InputIterator& last)
+{
+ size_t size = 0;
+ for (; first != last; ++first, ++size);
+ return size;
+}
+
+}
+
+
+template <typename T, typename Allocator>
+vector<T, Allocator>::vector(const Allocator& allocator)
+ : m_allocator(allocator)
+ , m_pArray(nullptr)
+ , m_nSize(0)
+ , m_nCapacity(0)
+{
+}
+
+template <typename T, typename Allocator>
+vector<T, Allocator>::vector(size_type size, const T& value, const Allocator& allocator)
+ : m_allocator(allocator)
+ , m_pArray(NULL)
+ , m_nSize(0)
+ , m_nCapacity(0)
+{
+ construct_helper(size, value);
+}
+
+template <typename T, typename Allocator>
+template <typename InputIterator>
+vector<T, Allocator>::vector(InputIterator first, InputIterator last, const Allocator& allocator)
+ : m_allocator(allocator)
+ , m_pArray(NULL)
+ , m_nSize(0)
+ , m_nCapacity(0)
+{
+ construct_helper(first, last, iterator_traits<InputIterator>::iterator_category());
+}
+
+template <typename T, typename Allocator>
+template <typename Alt, typename AltAllocator>
+vector<T, Allocator>::vector(const vector<Alt, AltAllocator>& vec)
+ : m_allocator(vec.m_allocator)
+ , m_pArray(NULL)
+ , m_nSize(0)
+ , m_nCapacity(0)
+{
+ ensure_capacity(vec.m_nSize);
+ for (size_type i = 0, j = 0; i < vec.m_nSize; ++i, ++j)
+ {
+ new (m_pArray + i, placement_t()) T((T) vec.m_pArray[j]);
+ }
+
+ m_nSize = vec.m_nSize;
+}
+
+template <typename T, typename Allocator>
+vector<T, Allocator>::vector(const vector<T, Allocator>& vec)
+ : m_allocator(vec.m_allocator)
+ , m_pArray(NULL)
+ , m_nSize(0)
+ , m_nCapacity(0)
+{
+ ensure_capacity(vec.m_nSize);
+ for (size_type i = 0, j = 0; i < vec.m_nSize; ++i, ++j)
+ {
+ new (m_pArray + i, placement_t()) T(vec.m_pArray[j]);
+ }
+
+ m_nSize = vec.m_nSize;
+}
+
+
+template <typename T, typename Allocator>
+vector<T, Allocator>::~vector()
+{
+ for (size_type i = 0; i < m_nSize; ++i)
+ {
+ m_pArray[i].~T();
+ }
+ m_allocator.deallocate(m_pArray, m_nCapacity);
+ m_nSize = 0;
+ m_nCapacity = 0;
+}
+
+
+// public methods
+
+template <typename T, typename Allocator>
+template <typename InputIterator>
+void vector<T, Allocator>::assign(InputIterator first, InputIterator last)
+{
+ construct_helper(first, last, iterator_traits<InputIterator>::iterator_category());
+}
+
+template <typename T, typename Allocator>
+void vector<T, Allocator>::assign(size_type size, const T& value)
+{
+ ensure_capacity(size);
+ for (int i = 0; i < size; ++i)
+ {
+ m_pArray[i] = value;
+ }
+ m_nSize = size;
+}
+
+template <typename T, typename Allocator>
+typename vector<T, Allocator>::const_reference
+ vector<T, Allocator>::at(size_type i) const
+{
+ return operator[](i);
+}
+
+template <typename T, typename Allocator>
+typename vector<T, Allocator>::reference
+ vector<T, Allocator>::at(size_type i)
+{
+ return operator[](i);
+}
+
+template <typename T, typename Allocator>
+typename vector<T, Allocator>::reference
+ vector<T, Allocator>::back()
+{
+ return operator[](m_nSize - 1);
+}
+
+template <typename T, typename Allocator>
+typename vector<T, Allocator>::const_reference
+ vector<T, Allocator>::back() const
+{
+ return operator[](m_nSize - 1);
+}
+
+template <typename T, typename Allocator>
+typename vector<T, Allocator>::iterator
+ vector<T, Allocator>::begin()
+{
+ return iterator(m_pArray);
+}
+
+template <typename T, typename Allocator>
+typename vector<T, Allocator>::const_iterator
+ vector<T, Allocator>::begin() const
+{
+ return const_iterator(m_pArray);
+}
+
+template <typename T, typename Allocator>
+typename vector<T, Allocator>::const_iterator
+ vector<T, Allocator>::cbegin() const
+{
+ return const_iterator(m_pArray);
+}
+
+template <typename T, typename Allocator>
+typename vector<T, Allocator>::size_type
+ vector<T, Allocator>::capacity() const
+{
+ return m_nCapacity;
+}
+
+
+template <typename T, typename Allocator>
+void vector<T, Allocator>::clear()
+{
+ for (size_type i = 0; i < m_nSize; ++i)
+ {
+ m_pArray[i].~T();
+ }
+ m_allocator.deallocate(m_pArray, m_nCapacity);
+ m_pArray = NULL;
+ m_nSize = 0;
+ m_nCapacity = 0;
+}
+
+template <typename T, typename Allocator>
+bool vector<T, Allocator>::empty() const
+{
+ return m_nSize == 0;
+}
+
+template <typename T, typename Allocator>
+typename vector<T, Allocator>::iterator vector<T, Allocator>::end()
+{
+ return iterator(m_pArray + m_nSize);
+}
+
+template <typename T, typename Allocator>
+typename vector<T, Allocator>::const_iterator
+ vector<T, Allocator>::end() const
+{
+ return const_iterator(m_pArray + m_nSize);
+}
+
+template <typename T, typename Allocator>
+typename vector<T, Allocator>::const_iterator vector<T, Allocator>::cend() const
+{
+ return const_iterator(m_pArray + m_nSize);
+}
+
+template <typename T, typename Allocator>
+typename vector<T, Allocator>::iterator
+ vector<T, Allocator>::erase(
+ typename vector<T, Allocator>::iterator position)
+{
+ return erase(position, position + 1);
+}
+
+template <typename T, typename Allocator>
+typename vector<T, Allocator>::iterator
+ vector<T, Allocator>::erase(
+ typename vector<T, Allocator>::iterator first,
+ typename vector<T, Allocator>::iterator last)
+{
+ assert(m_nSize > 0);
+ assert(first.m_pElem >= m_pArray);
+ assert(last.m_pElem >= m_pArray);
+ assert(first.m_pElem <= m_pArray + m_nSize);
+ assert(last.m_pElem <= m_pArray + m_nSize);
+ assert(last.m_pElem > first.m_pElem);
+
+ pointer fptr = first.m_pElem;
+ pointer lptr = last.m_pElem;
+ pointer eptr = m_pArray + m_nSize;
+ for (; lptr != eptr; ++lptr, fptr++)
+ {
+ (*fptr).~T();
+ *fptr = *lptr;
+ }
+ m_nSize -= (size_type)(lptr - fptr);
+ return first;
+}
+
+template <typename T, typename Allocator>
+typename vector<T, Allocator>::reference
+ vector<T, Allocator>::front()
+{
+ return operator[](0);
+}
+
+template <typename T, typename Allocator>
+typename vector<T, Allocator>::const_reference
+ vector<T, Allocator>::front() const
+{
+ return operator[](0);
+}
+
+template <typename T, typename Allocator>
+typename vector<T, Allocator>::allocator_type
+ vector<T, Allocator>::get_allocator() const
+{
+ return m_allocator;
+}
+
+template <typename T, typename Allocator>
+typename vector<T, Allocator>::iterator
+ vector<T, Allocator>::insert(
+ typename vector<T, Allocator>::iterator iter,
+ const T& value)
+{
+ size_type pos = (size_type) (iter.m_pElem - m_pArray);
+ insert_elements_helper(iter, 1, value);
+ return iterator(m_pArray + pos);
+}
+
+template <typename T, typename Allocator>
+void vector<T, Allocator>::insert(
+ iterator iter,
+ size_type size,
+ const T& value)
+{
+ insert_elements_helper(iter, size, value);
+}
+
+template <typename T, typename Allocator>
+template <typename InputIterator>
+void vector<T, Allocator>::insert(
+ iterator iter,
+ InputIterator first,
+ InputIterator last)
+{
+ insert_helper(iter, first, last, iterator_traits<InputIterator>::iterator_category());
+}
+
+template <typename T, typename Allocator>
+typename vector<T, Allocator>::size_type
+ vector<T, Allocator>::max_size() const
+{
+ return ((size_type) -1) >> 1;
+}
+
+template <typename T, typename Allocator>
+template <typename Alt, typename AltAllocator>
+vector<T, Allocator>& vector<T, Allocator>::operator=(const vector<Alt, AltAllocator>& vec)
+{
+ // We'll not observe copy-on-write for now.
+ m_allocator = vec.m_allocator;
+ ensure_capacity(vec.m_nSize);
+ m_nSize = vec.m_nSize;
+ for (size_type i = 0; i < m_nSize; ++i)
+ {
+ m_pArray[i] = (T) vec.m_pArray[i];
+ }
+ return *this;
+}
+
+template <typename T, typename Allocator>
+vector<T, Allocator>& vector<T, Allocator>::operator=(const vector<T, Allocator>& vec)
+{
+ // We'll not observe copy-on-write for now.
+ m_allocator = vec.m_allocator;
+ ensure_capacity(vec.m_nSize);
+ m_nSize = vec.m_nSize;
+ for (size_type i = 0; i < m_nSize; ++i)
+ {
+ new (m_pArray + i, placement_t()) T(vec.m_pArray[i]);
+ }
+ return *this;
+}
+
+
+template <typename T, typename Allocator>
+typename vector<T, Allocator>::reference vector<T, Allocator>::operator[](size_type n)
+{
+ return m_pArray[n];
+}
+
+template <typename T, typename Allocator>
+typename vector<T, Allocator>::const_reference
+ vector<T, Allocator>::operator[](size_type n) const
+{
+ return m_pArray[n];
+}
+
+template <typename T, typename Allocator>
+void vector<T, Allocator>::pop_back()
+{
+ m_pArray[m_nSize - 1].~T();
+ --m_nSize;
+}
+
+template <typename T, typename Allocator>
+void vector<T, Allocator>::push_back(const T& value)
+{
+ ensure_capacity(m_nSize + 1);
+ new (m_pArray + m_nSize, placement_t()) T(value);
+ ++m_nSize;
+}
+
+template <typename T, typename Allocator>
+typename vector<T, Allocator>::reverse_iterator vector<T, Allocator>::rbegin()
+{
+ return reverse_iterator(m_pArray + m_nSize - 1);
+}
+
+template <typename T, typename Allocator>
+typename vector<T, Allocator>::const_reverse_iterator
+ vector<T, Allocator>::rbegin() const
+{
+ return const_reverse_iterator(m_pArray + m_nSize - 1);
+}
+
+template <typename T, typename Allocator>
+typename vector<T, Allocator>::reverse_iterator
+ vector<T, Allocator>::rend()
+{
+ return reverse_iterator(m_pArray - 1);
+}
+
+template <typename T, typename Allocator>
+typename vector<T, Allocator>::const_reverse_iterator
+ vector<T, Allocator>::rend() const
+{
+ return const_reverse_iterator(m_pArray - 1);
+}
+
+template <typename T, typename Allocator>
+void vector<T, Allocator>::reserve(size_type n)
+{
+ ensure_capacity(n);
+}
+
+template <typename T, typename Allocator>
+void vector<T, Allocator>::resize(
+ size_type sz,
+ const T& c)
+{
+ for (; m_nSize > sz; m_nSize--)
+ {
+ m_pArray[m_nSize - 1].~T();
+ }
+ ensure_capacity(sz);
+ for (; m_nSize < sz; m_nSize++)
+ {
+ new (m_pArray + m_nSize, placement_t()) T(c);
+ }
+}
+
+template <typename T, typename Allocator>
+typename vector<T, Allocator>::size_type vector<T, Allocator>::size() const
+{
+ return m_nSize;
+}
+
+template <typename T, typename Allocator>
+void vector<T, Allocator>::swap(vector<T, Allocator>& vec)
+{
+ jitstd::swap(m_pArray, vec.m_pArray);
+ jitstd::swap(m_nSize, vec.m_nSize);
+ jitstd::swap(m_nCapacity, vec.m_nCapacity);
+ jitstd::swap(m_nCapacity, vec.m_nCapacity);
+ jitstd::swap(m_allocator, vec.m_allocator);
+}
+
+// =======================================================================================
+
+template <typename T, typename Allocator>
+void vector<T, Allocator>::construct_helper(size_type size, const T& value)
+{
+ ensure_capacity(size);
+
+ for (size_type i = 0; i < size; ++i)
+ {
+ new (m_pArray + i, placement_t()) T(value);
+ }
+
+ m_nSize = size;
+}
+
+
+template <typename T, typename Allocator>
+template <typename InputIterator>
+void vector<T, Allocator>::construct_helper(InputIterator first, InputIterator last, int_not_an_iterator_tag)
+{
+ construct_helper(first, last);
+}
+
+template <typename T, typename Allocator>
+template <typename InputIterator>
+void vector<T, Allocator>::construct_helper(InputIterator first, InputIterator last, forward_iterator_tag)
+{
+ size_type size = iterator_difference(first, last);
+
+ ensure_capacity(size);
+ for (size_type i = 0; i < size; ++i)
+ {
+ new (m_pArray + i, placement_t()) T(*first);
+ first++;
+ }
+
+ m_nSize = size;
+}
+
+// =======================================================================================
+
+template <typename T, typename Allocator>
+void vector<T, Allocator>::insert_elements_helper(iterator iter, size_type size, const T& value)
+{
+ assert(size < max_size());
+
+ // m_pElem could be NULL then m_pArray would be NULL too.
+ size_type pos = iter.m_pElem - m_pArray;
+
+ assert(pos <= m_nSize); // <= could insert at end.
+ assert(pos >= 0);
+
+ ensure_capacity(m_nSize + size);
+
+ for (int src = m_nSize - 1, dst = m_nSize + size - 1; src >= (int) pos; --src, --dst)
+ {
+ m_pArray[dst] = m_pArray[src];
+ }
+
+ for (size_type i = 0; i < size; ++i)
+ {
+ new (m_pArray + pos + i, placement_t()) T(value);
+ }
+
+ m_nSize += size;
+}
+
+template <typename T, typename Allocator>
+template <typename InputIterator>
+void vector<T, Allocator>::insert_helper(iterator iter, InputIterator first, InputIterator last, int_not_an_iterator_tag)
+{
+ insert_elements_helper(iter, first, last);
+}
+
+template <typename T, typename Allocator>
+template <typename InputIterator>
+void vector<T, Allocator>::insert_helper(iterator iter, InputIterator first, InputIterator last, forward_iterator_tag)
+{
+ // m_pElem could be NULL then m_pArray would be NULL too.
+ size_type pos = iter.m_pElem - m_pArray;
+
+ assert(pos <= m_nSize); // <= could insert at end.
+ assert(pos >= 0);
+
+ size_type size = iterator_difference(first, last);
+ assert(size < max_size());
+
+ ensure_capacity(m_nSize + size);
+
+ pointer lst = m_pArray + m_nSize + size - 1;
+ for (size_type i = pos; i < m_nSize; ++i)
+ {
+ *lst-- = m_pArray[i];
+ }
+ for (size_type i = 0; i < size; ++i, ++first)
+ {
+ m_pArray[pos + i] = *first;
+ }
+
+ m_nSize += size;
+}
+
+// =======================================================================================
+
+template <typename T, typename Allocator>
+template <typename InputIterator>
+void vector<T, Allocator>::assign_helper(InputIterator first, InputIterator last, forward_iterator_tag)
+{
+ size_type size = iterator_difference(first, last);
+
+ ensure_capacity(size);
+ for (size_type i = 0; i < size; ++i)
+ {
+ m_pArray[i] = *first;
+ first++;
+ }
+
+ m_nSize = size;
+}
+
+template <typename T, typename Allocator>
+template <typename InputIterator>
+void vector<T, Allocator>::assign_helper(InputIterator first, InputIterator last, int_not_an_iterator_tag)
+{
+ assign_helper(first, last);
+}
+
+// =======================================================================================
+
+template <typename T, typename Allocator>
+bool vector<T, Allocator>::ensure_capacity(size_type newCap)
+{
+ if (newCap <= m_nCapacity)
+ {
+ return false;
+ }
+
+ // Double the alloc capacity based on size.
+ size_type allocCap = m_nSize * 2;
+
+ // Is it still not sufficient?
+ if (allocCap < newCap)
+ {
+ allocCap = newCap;
+ }
+
+ // Allocate space.
+ pointer ptr = m_allocator.allocate(allocCap);
+
+ // Copy over.
+ for (size_type i = 0; i < m_nSize; ++i)
+ {
+ new (ptr + i, placement_t()) T(m_pArray[i]);
+ }
+
+ // Deallocate currently allocated space.
+ m_allocator.deallocate(m_pArray, m_nCapacity);
+
+ // Update the pointers and capacity;
+ m_pArray = ptr;
+ m_nCapacity = allocCap;
+ return true;
+}
+
+} // end of namespace jitstd.
+
+
+
+// Implementation of vector iterators
+
+namespace jitstd
+{
+
+// iterator
+template <typename T, typename Allocator>
+vector<T, Allocator>::iterator::iterator()
+ : m_pElem(NULL)
+{
+}
+
+template <typename T, typename Allocator>
+vector<T, Allocator>::iterator::iterator(T* ptr)
+ : m_pElem(ptr)
+{
+}
+
+template <typename T, typename Allocator>
+vector<T, Allocator>::iterator::iterator(const iterator& it)
+ : m_pElem(it.m_pElem)
+{
+}
+
+
+template <typename T, typename Allocator>
+typename vector<T, Allocator>::iterator& vector<T, Allocator>::iterator::operator++()
+{
+ ++m_pElem;
+ return *this;
+}
+
+template <typename T, typename Allocator>
+typename vector<T, Allocator>::iterator& vector<T, Allocator>::iterator::operator++(int)
+{
+ ++m_pElem;
+ return *this;
+}
+
+template <typename T, typename Allocator>
+typename vector<T, Allocator>::iterator& vector<T, Allocator>::iterator::operator--()
+{
+ --m_pElem;
+ return *this;
+}
+
+template <typename T, typename Allocator>
+typename vector<T, Allocator>::iterator& vector<T, Allocator>::iterator::operator--(int)
+{
+ --m_pElem;
+ return *this;
+}
+
+template <typename T, typename Allocator>
+typename vector<T, Allocator>::iterator vector<T, Allocator>::iterator::operator+(difference_type n)
+{
+ return iterator(m_pElem + n);
+}
+
+template <typename T, typename Allocator>
+typename vector<T, Allocator>::iterator vector<T, Allocator>::iterator::operator-(difference_type n)
+{
+ return iterator(m_pElem - n);
+}
+
+template <typename T, typename Allocator>
+typename vector<T, Allocator>::size_type
+ vector<T, Allocator>::iterator::operator-(
+ const typename vector<T, Allocator>::iterator& that)
+{
+ return m_pElem - that.m_pElem;
+}
+
+template <typename T, typename Allocator>
+bool vector<T, Allocator>::iterator::operator==(const iterator& it)
+{
+ return (m_pElem == it.m_pElem);
+}
+
+template <typename T, typename Allocator>
+bool vector<T, Allocator>::iterator::operator!=(const iterator& it)
+{
+ return !operator==(it);
+}
+
+template <typename T, typename Allocator>
+T& vector<T, Allocator>::iterator::operator*()
+{
+ return *m_pElem;
+}
+
+template <typename T, typename Allocator>
+T* vector<T, Allocator>::iterator::operator&()
+{
+ return &m_pElem;
+}
+
+template <typename T, typename Allocator>
+vector<T, Allocator>::iterator::operator T*()
+{
+ return &m_pElem;
+}
+
+// const_iterator
+template <typename T, typename Allocator>
+vector<T, Allocator>::const_iterator::const_iterator()
+ : m_pElem(NULL)
+{
+}
+
+template <typename T, typename Allocator>
+vector<T, Allocator>::const_iterator::const_iterator(T* ptr)
+ : m_pElem(ptr)
+{
+}
+
+template <typename T, typename Allocator>
+vector<T, Allocator>::const_iterator::const_iterator(const const_iterator& it)
+ : m_pElem(it.m_pElem)
+{
+}
+
+
+template <typename T, typename Allocator>
+typename vector<T, Allocator>::const_iterator& vector<T, Allocator>::const_iterator::operator++()
+{
+ ++m_pElem;
+ return *this;
+}
+
+template <typename T, typename Allocator>
+typename vector<T, Allocator>::const_iterator& vector<T, Allocator>::const_iterator::operator++(int)
+{
+ ++m_pElem;
+ return *this;
+}
+
+template <typename T, typename Allocator>
+typename vector<T, Allocator>::const_iterator& vector<T, Allocator>::const_iterator::operator--()
+{
+ --m_pElem;
+ return *this;
+}
+
+template <typename T, typename Allocator>
+typename vector<T, Allocator>::const_iterator& vector<T, Allocator>::const_iterator::operator--(int)
+{
+ --m_pElem;
+ return *this;
+}
+
+template <typename T, typename Allocator>
+typename vector<T, Allocator>::const_iterator vector<T, Allocator>::const_iterator::operator+(difference_type n)
+{
+ return const_iterator(m_pElem + n);
+}
+
+template <typename T, typename Allocator>
+typename vector<T, Allocator>::const_iterator vector<T, Allocator>::const_iterator::operator-(difference_type n)
+{
+ return const_iterator(m_pElem - n);
+}
+
+template <typename T, typename Allocator>
+typename vector<T, Allocator>::size_type
+ vector<T, Allocator>::const_iterator::operator-(
+ const typename vector<T, Allocator>::const_iterator& that)
+{
+ return m_pElem - that.m_pElem;
+}
+
+template <typename T, typename Allocator>
+bool vector<T, Allocator>::const_iterator::operator==(const const_iterator& it) const
+{
+ return (m_pElem == it.m_pElem);
+}
+
+template <typename T, typename Allocator>
+bool vector<T, Allocator>::const_iterator::operator!=(const const_iterator& it) const
+{
+ return !operator==(it);
+}
+
+template <typename T, typename Allocator>
+const T& vector<T, Allocator>::const_iterator::operator*() const
+{
+ return *m_pElem;
+}
+
+
+template <typename T, typename Allocator>
+const T* vector<T, Allocator>::const_iterator::operator&() const
+{
+ return &m_pElem;
+}
+
+template <typename T, typename Allocator>
+vector<T, Allocator>::const_iterator::operator const T*() const
+{
+ return &m_pElem;
+}
+
+
+// reverse_iterator
+template <typename T, typename Allocator>
+vector<T, Allocator>::reverse_iterator::reverse_iterator()
+ : m_pElem(NULL)
+{
+}
+
+template <typename T, typename Allocator>
+vector<T, Allocator>::reverse_iterator::reverse_iterator(T* ptr)
+ : m_pElem(ptr)
+{
+}
+
+template <typename T, typename Allocator>
+vector<T, Allocator>::reverse_iterator::reverse_iterator(const reverse_iterator& it)
+ : m_pElem(it.m_pElem)
+{
+}
+
+
+template <typename T, typename Allocator>
+typename vector<T, Allocator>::reverse_iterator& vector<T, Allocator>::reverse_iterator::operator++()
+{
+ --m_pElem;
+ return *this;
+}
+
+template <typename T, typename Allocator>
+typename vector<T, Allocator>::reverse_iterator& vector<T, Allocator>::reverse_iterator::operator++(int)
+{
+ --m_pElem;
+ return *this;
+}
+
+template <typename T, typename Allocator>
+typename vector<T, Allocator>::reverse_iterator& vector<T, Allocator>::reverse_iterator::operator--()
+{
+ ++m_pElem;
+ return *this;
+}
+
+template <typename T, typename Allocator>
+typename vector<T, Allocator>::reverse_iterator& vector<T, Allocator>::reverse_iterator::operator--(int)
+{
+ ++m_pElem;
+ return *this;
+}
+
+template <typename T, typename Allocator>
+typename vector<T, Allocator>::reverse_iterator vector<T, Allocator>::reverse_iterator::operator+(difference_type n)
+{
+ return reverse_iterator(m_pElem + n);
+}
+
+template <typename T, typename Allocator>
+typename vector<T, Allocator>::reverse_iterator vector<T, Allocator>::reverse_iterator::operator-(difference_type n)
+{
+ return reverse_iterator(m_pElem - n);
+}
+
+template <typename T, typename Allocator>
+typename vector<T, Allocator>::size_type
+ vector<T, Allocator>::reverse_iterator::operator-(
+ const typename vector<T, Allocator>::reverse_iterator& that)
+{
+ return m_pElem - that.m_pElem;
+}
+
+template <typename T, typename Allocator>
+bool vector<T, Allocator>::reverse_iterator::operator==(const reverse_iterator& it)
+{
+ return (m_pElem == it.m_pElem);
+}
+
+template <typename T, typename Allocator>
+bool vector<T, Allocator>::reverse_iterator::operator!=(const reverse_iterator& it)
+{
+ return !operator==(it);
+}
+
+template <typename T, typename Allocator>
+T& vector<T, Allocator>::reverse_iterator::operator*()
+{
+ return *m_pElem;
+}
+
+template <typename T, typename Allocator>
+T* vector<T, Allocator>::reverse_iterator::operator&()
+{
+ return &m_pElem;
+}
+
+template <typename T, typename Allocator>
+vector<T, Allocator>::reverse_iterator::operator T*()
+{
+ return &m_pElem;
+}
+
+// const_reverse_iterator
+template <typename T, typename Allocator>
+vector<T, Allocator>::const_reverse_iterator::const_reverse_iterator()
+ : m_pElem(NULL)
+{
+}
+
+template <typename T, typename Allocator>
+vector<T, Allocator>::const_reverse_iterator::const_reverse_iterator(T* ptr)
+ : m_pElem(ptr)
+{
+}
+
+template <typename T, typename Allocator>
+vector<T, Allocator>::const_reverse_iterator::const_reverse_iterator(const const_reverse_iterator& it)
+ : m_pElem(it.m_pElem)
+{
+}
+
+
+template <typename T, typename Allocator>
+typename vector<T, Allocator>::const_reverse_iterator& vector<T, Allocator>::const_reverse_iterator::operator++()
+{
+ --m_pElem;
+ return *this;
+}
+
+template <typename T, typename Allocator>
+typename vector<T, Allocator>::const_reverse_iterator& vector<T, Allocator>::const_reverse_iterator::operator++(int)
+{
+ --m_pElem;
+ return *this;
+}
+
+template <typename T, typename Allocator>
+typename vector<T, Allocator>::const_reverse_iterator& vector<T, Allocator>::const_reverse_iterator::operator--()
+{
+ ++m_pElem;
+ return *this;
+}
+
+template <typename T, typename Allocator>
+typename vector<T, Allocator>::const_reverse_iterator& vector<T, Allocator>::const_reverse_iterator::operator--(int)
+{
+ ++m_pElem;
+ return *this;
+}
+
+template <typename T, typename Allocator>
+typename vector<T, Allocator>::const_reverse_iterator vector<T, Allocator>::const_reverse_iterator::operator+(difference_type n)
+{
+ return const_reverse_iterator(m_pElem + n);
+}
+
+template <typename T, typename Allocator>
+typename vector<T, Allocator>::const_reverse_iterator vector<T, Allocator>::const_reverse_iterator::operator-(difference_type n)
+{
+ return const_reverse_iterator(m_pElem - n);
+}
+
+template <typename T, typename Allocator>
+typename vector<T, Allocator>::size_type
+ vector<T, Allocator>::const_reverse_iterator::operator-(
+ const typename vector<T, Allocator>::const_reverse_iterator& that)
+{
+ return m_pElem - that.m_pElem;
+}
+
+template <typename T, typename Allocator>
+bool vector<T, Allocator>::const_reverse_iterator::operator==(const const_reverse_iterator& it) const
+{
+ return (m_pElem == it.m_pElem);
+}
+
+template <typename T, typename Allocator>
+bool vector<T, Allocator>::const_reverse_iterator::operator!=(const const_reverse_iterator& it) const
+{
+ return !operator==(it);
+}
+
+template <typename T, typename Allocator>
+const T& vector<T, Allocator>::const_reverse_iterator::operator*() const
+{
+ return *m_pElem;
+}
+
+template <typename T, typename Allocator>
+const T* vector<T, Allocator>::const_reverse_iterator::operator&() const
+{
+ return &m_pElem;
+}
+
+template <typename T, typename Allocator>
+vector<T, Allocator>::const_reverse_iterator::operator const T*() const
+{
+ return &m_pElem;
+}
+
+}
diff --git a/src/jit/jittelemetry.cpp b/src/jit/jittelemetry.cpp
new file mode 100644
index 0000000000..2d5a2102d1
--- /dev/null
+++ b/src/jit/jittelemetry.cpp
@@ -0,0 +1,390 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*****************************************************************************/
+// <OWNER>clrjit</OWNER>
+//
+// This class abstracts the telemetry information collected for the JIT.
+//
+// Goals:
+// 1. Telemetry information should be a NO-op when JIT level telemetry is disabled.
+// 2. Data collection should be actionable.
+// 3. Data collection should comply to privacy rules.
+// 4. Data collection cannot impact JIT/OS performance.
+// 5. Data collection volume should be manageable by our remote services.
+//
+// DESIGN CONCERNS:
+//
+// > To collect data, we use the TraceLogging API provided by Windows.
+//
+// The brief workflow suggested is:
+// #include <TraceLoggingProvider.h>
+// TRACELOGGING_DEFINE_PROVIDER( // defines g_hProvider
+// g_hProvider, // Name of the provider variable
+// "MyProvider", // Human-readable name of the provider
+// (0xb3864c38, 0x4273, 0x58c5, 0x54, 0x5b, 0x8b, 0x36, 0x08, 0x34, 0x34, 0x71)); // Provider GUID
+// int main(int argc, char* argv[]) // or DriverEntry for kernel-mode.
+// {
+// TraceLoggingRegister(g_hProvider, NULL, NULL, NULL); // NULLs only needed for C. Please do not include the
+// // NULLs in C++ code.
+// TraceLoggingWrite(g_hProvider,
+// "MyEvent1",
+// TraceLoggingString(argv[0], "arg0"),
+// TraceLoggingInt32(argc));
+// TraceLoggingUnregister(g_hProvider);
+// return 0;
+// }
+//
+// In summary, this involves:
+// 1. Creating a binary/DLL local provider using:
+// TRACELOGGING_DEFINE_PROVIDER(g_hProvider, "ProviderName", providerId, [option])
+// 2. Registering the provider instance
+// TraceLoggingRegister(g_hProvider)
+// 3. Perform TraceLoggingWrite operations to write out data.
+// 4. Unregister the provider instance.
+// TraceLoggingUnregister(g_hProvider)
+//
+// A. Determining where to create the provider instance?
+// 1) We use the same provider name/GUID as the CLR and the CLR creates its own DLL local provider handle.
+// For CLRJIT.dll, the question is, can the same provider name/GUIDs be shared across binaries?
+//
+// Answer:
+// "For TraceLogging providers, it is okay to use the same provider GUID / name
+// in different binaries. Do not share the same provider handle across DLLs.
+// As long as you do not pass an hProvider from one DLL to another, TraceLogging
+// will properly keep track of the events."
+//
+// 2) CoreCLR is linked into the CLR. CLR already creates an instance, so where do we create the JIT's instance?
+// Answer:
+// "Ideally you would have one provider per DLL, but if you're folding distinct sets
+// of functionality into one DLL (like shell32.dll or similar sort of catch-all things)
+// you can have perhaps a few more providers per binary."
+//
+// B. Determining where to register and unregister the provider instance?
+// 1) For CLRJIT.dll we can register the provider instance during jitDllOnProcessAttach.
+// Since one of our goals is to turn telemetry off, we need to be careful about
+// referencing environment variables during the DLL load and unload path.
+// Referencing environment variables through ConfigDWORD uses UtilCode.
+// This roughly translates to InitUtilcode() being called before jitDllOnProcessAttach.
+//
+// For CLRJIT.dll, compStartup is called on jitOnDllProcessAttach().
+// This can be called twice through sxsJitStartup -- so prevent double initialization.
+// UtilCode is init-ed by this time. The same is true for CoreCLR.
+//
+// 2) For CLRJIT.dll and CoreCLR, compShutdown will be called during jitOnDllProcessDetach().
+//
+// C. Determining the data to collect:
+//
+// IMPORTANT: Since telemetry data can be collected at any time after DLL load,
+// make sure you initialize the compiler state variables you access in telemetry
+// data collection. For example, if you are transmitting method names, then
+// make sure info.compMethodHnd is initialized at that point.
+//
+// 1) Tracking noway assert count:
+// After a noway assert is hit, in both min-opts and non-min-opts, we collect
+// info such as the JIT version, method hash being compiled, filename and
+// line number etc.
+//
+// 2) Tracking baseline for the noway asserts:
+// During DLL unload, we report the number of methods that were compiled by
+// the JIT per process both under normal mode and during min-opts. NOTE that
+// this is ON for all processes.
+//
+// 3) For the future, be aware of privacy, performance and actionability of the data.
+//
+
+#include "jitpch.h"
+#include "compiler.h"
+
+#ifdef FEATURE_TRACELOGGING
+#include "TraceLoggingProvider.h"
+#include "MicrosoftTelemetry.h"
+#include "clrtraceloggingcommon.h"
+#include "fxver.h"
+
+// Since telemetry code could be called under a noway_assert, make sure,
+// we don't call noway_assert again.
+#undef noway_assert
+
+#define BUILD_STR1(x) #x
+#define BUILD_STR2(x) BUILD_STR1(x)
+#define BUILD_MACHINE BUILD_STR2(__BUILDMACHINE__)
+
+// A DLL local instance of the DotNet provider
+TRACELOGGING_DEFINE_PROVIDER(g_hClrJitProvider,
+ CLRJIT_PROVIDER_NAME,
+ CLRJIT_PROVIDER_ID,
+ TraceLoggingOptionMicrosoftTelemetry());
+
+// Threshold to detect if we are hitting too many bad (noway) methods
+// over good methods per process to prevent logging too much data.
+static const double NOWAY_NOISE_RATIO = 0.6; // Threshold of (bad / total) beyond which we'd stop
+ // logging. We'd restart if the pass rate improves.
+static const unsigned NOWAY_SUFFICIENCY_THRESHOLD = 25; // Count of methods beyond which we'd apply percent
+ // threshold
+
+// Initialize Telemetry State
+volatile bool JitTelemetry::s_fProviderRegistered = false;
+volatile UINT32 JitTelemetry::s_uMethodsCompiled = 0;
+volatile UINT32 JitTelemetry::s_uMethodsHitNowayAssert = 0;
+
+// Constructor for telemetry state per compiler instance
+JitTelemetry::JitTelemetry()
+{
+ Initialize(nullptr);
+}
+
+//------------------------------------------------------------------------
+// Initialize: Initialize the object with the compiler instance
+//
+// Description:
+// Compiler instance may not be fully initialized. If you are
+// tracking object data for telemetry, make sure they are initialized
+// in the compiler is ready.
+//
+void JitTelemetry::Initialize(Compiler* c)
+{
+ comp = c;
+ m_pszAssemblyName = "";
+ m_pszScopeName = "";
+ m_pszMethodName = "";
+ m_uMethodHash = 0;
+ m_fMethodInfoCached = false;
+}
+
+//------------------------------------------------------------------------
+// IsTelemetryEnabled: Can we perform JIT telemetry
+//
+// Return Value:
+// Returns "true" if COMPlus_JitTelemetry environment flag is
+// non-zero. Else returns "false".
+//
+//
+/* static */
+bool JitTelemetry::IsTelemetryEnabled()
+{
+ return JitConfig.JitTelemetry() != 0;
+}
+
+//------------------------------------------------------------------------
+// NotifyDllProcessAttach: Notification for DLL load and static initializations
+//
+// Description:
+// Register telemetry provider with the OS.
+//
+// Note:
+// This method can be called twice in NGEN scenario.
+//
+void JitTelemetry::NotifyDllProcessAttach()
+{
+ if (!IsTelemetryEnabled())
+ {
+ return;
+ }
+
+ if (!s_fProviderRegistered)
+ {
+ // Register the provider.
+ TraceLoggingRegister(g_hClrJitProvider);
+ s_fProviderRegistered = true;
+ }
+}
+
+//------------------------------------------------------------------------
+// NotifyDllProcessDetach: Notification for DLL unload and teardown
+//
+// Description:
+// Log the methods compiled data if telemetry is enabled and
+// Unregister telemetry provider with the OS.
+//
+void JitTelemetry::NotifyDllProcessDetach()
+{
+ if (!IsTelemetryEnabled())
+ {
+ return;
+ }
+
+ assert(s_fProviderRegistered); // volatile read
+
+ // Unregister the provider.
+ TraceLoggingUnregister(g_hClrJitProvider);
+}
+
+//------------------------------------------------------------------------
+// NotifyEndOfCompilation: Notification for end of current method
+// compilation.
+//
+// Description:
+// Increment static volatile counters for the current compiled method.
+// This is slightly inaccurate due to lack of synchronization around
+// the counters. Inaccuracy is the tradeoff for JITting cost.
+//
+// Note:
+// 1. Must be called post fully successful compilation of the method.
+// 2. This serves as an effective baseline as how many methods compiled
+// successfully.
+void JitTelemetry::NotifyEndOfCompilation()
+{
+ if (!IsTelemetryEnabled())
+ {
+ return;
+ }
+
+ s_uMethodsCompiled++; // volatile increment
+}
+
+//------------------------------------------------------------------------
+// NotifyNowayAssert: Notification that a noway handling is under-way.
+//
+// Arguments:
+// filename - The JIT source file name's absolute path at the time of
+// building the JIT.
+// line - The line number where the noway assert was hit.
+//
+// Description:
+// If telemetry is enabled, then obtain data to collect from the
+// compiler or the VM and use the tracelogging APIs to write out.
+//
+void JitTelemetry::NotifyNowayAssert(const char* filename, unsigned line)
+{
+ if (!IsTelemetryEnabled())
+ {
+ return;
+ }
+
+ s_uMethodsHitNowayAssert++;
+
+ // Check if our assumption that noways are rare is invalid for this
+ // process. If so, return early than logging too much data.
+ unsigned noways = s_uMethodsHitNowayAssert;
+ unsigned attempts = max(1, s_uMethodsCompiled + noways);
+ double ratio = (noways / ((double)attempts));
+ if (noways > NOWAY_SUFFICIENCY_THRESHOLD && ratio > NOWAY_NOISE_RATIO)
+ {
+ return;
+ }
+
+ assert(comp);
+
+ UINT32 nowayIndex = s_uMethodsHitNowayAssert;
+ UINT32 codeSize = 0;
+ INT32 minOpts = -1;
+ const char* lastPhase = "";
+ if (comp != nullptr)
+ {
+ codeSize = comp->info.compILCodeSize;
+ minOpts = comp->opts.IsMinOptsSet() ? comp->opts.MinOpts() : -1;
+ lastPhase = PhaseNames[comp->previousCompletedPhase];
+ }
+
+ CacheCurrentMethodInfo();
+
+ TraceLoggingWrite(g_hClrJitProvider, "CLRJIT.NowayAssert",
+
+ TraceLoggingUInt32(codeSize, "IL_CODE_SIZE"), TraceLoggingInt32(minOpts, "MINOPTS_MODE"),
+ TraceLoggingString(lastPhase, "PREVIOUS_COMPLETED_PHASE"),
+
+ TraceLoggingString(m_pszAssemblyName, "ASSEMBLY_NAME"),
+ TraceLoggingString(m_pszMethodName, "METHOD_NAME"),
+ TraceLoggingString(m_pszScopeName, "METHOD_SCOPE"),
+ TraceLoggingUInt32(m_uMethodHash, "METHOD_HASH"),
+
+ TraceLoggingString(filename, "FILENAME"), TraceLoggingUInt32(line, "LINE"),
+ TraceLoggingUInt32(nowayIndex, "NOWAY_INDEX"),
+
+ TraceLoggingString(TARGET_READABLE_NAME, "ARCH"),
+ TraceLoggingString(VER_FILEVERSION_STR, "VERSION"), TraceLoggingString(BUILD_MACHINE, "BUILD"),
+ TraceLoggingString(VER_COMMENTS_STR, "FLAVOR"),
+
+ TraceLoggingKeyword(MICROSOFT_KEYWORD_TELEMETRY));
+}
+
+//------------------------------------------------------------------------
+// CacheCurrentMethodInfo: Cache the method/assembly/scope name info.
+//
+// Description:
+// Obtain the method information if not already cached, for the
+// method under compilation from the compiler. This includes:
+//
+// Method name, assembly name, scope name, method hash.
+//
+void JitTelemetry::CacheCurrentMethodInfo()
+{
+ if (m_fMethodInfoCached)
+ {
+ return;
+ }
+
+ assert(comp);
+ if (comp != nullptr)
+ {
+ comp->compGetTelemetryDefaults(&m_pszAssemblyName, &m_pszScopeName, &m_pszMethodName, &m_uMethodHash);
+ assert(m_pszAssemblyName);
+ assert(m_pszScopeName);
+ assert(m_pszMethodName);
+ }
+
+ // Set cached to prevent getting this twice.
+ m_fMethodInfoCached = true;
+}
+
+//------------------------------------------------------------------------
+// compGetTelemetryDefaults: Obtain information specific to telemetry
+// from the JIT-interface.
+//
+// Arguments:
+// assemblyName - Pointer to hold assembly name upon return
+// scopeName - Pointer to hold scope name upon return
+// methodName - Pointer to hold method name upon return
+// methodHash - Pointer to hold method hash upon return
+//
+// Description:
+// Obtains from the JIT EE interface the information for the
+// current method under compilation.
+//
+// Warning:
+// The eeGetMethodName call could be expensive for generic
+// methods, so call this method only when there is less impact
+// to throughput.
+//
+void Compiler::compGetTelemetryDefaults(const char** assemblyName,
+ const char** scopeName,
+ const char** methodName,
+ unsigned* methodHash)
+{
+ if (info.compMethodHnd != nullptr)
+ {
+ __try
+ {
+
+ // Expensive calls, call infrequently or in exceptional scenarios.
+ *methodHash = info.compCompHnd->getMethodHash(info.compMethodHnd);
+ *methodName = eeGetMethodName(info.compMethodHnd, scopeName);
+
+ // SuperPMI needs to implement record/replay of these method calls.
+ *assemblyName = info.compCompHnd->getAssemblyName(
+ info.compCompHnd->getModuleAssembly(info.compCompHnd->getClassModule(info.compClassHnd)));
+ }
+ __except (EXCEPTION_EXECUTE_HANDLER)
+ {
+ }
+ }
+
+ // If the JIT interface methods init-ed these values to nullptr,
+ // make sure they are set to empty string.
+ if (*methodName == nullptr)
+ {
+ *methodName = "";
+ }
+ if (*scopeName == nullptr)
+ {
+ *scopeName = "";
+ }
+ if (*assemblyName == nullptr)
+ {
+ *assemblyName = "";
+ }
+}
+
+#endif // FEATURE_TRACELOGGING
diff --git a/src/jit/jittelemetry.h b/src/jit/jittelemetry.h
new file mode 100644
index 0000000000..24a0ce7b5d
--- /dev/null
+++ b/src/jit/jittelemetry.h
@@ -0,0 +1,78 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*****************************************************************************/
+// <OWNER>clrjit</OWNER>
+#pragma once
+
+#ifdef FEATURE_TRACELOGGING
+
+class Compiler;
+
+class JitTelemetry
+{
+public:
+ // Notify DLL load.
+ static void NotifyDllProcessAttach();
+
+ // Notify DLL unload.
+ static void NotifyDllProcessDetach();
+
+ // Constructor
+ JitTelemetry();
+
+ // Initialize with compiler instance
+ void Initialize(Compiler* comp);
+
+ // Notification of end of compilation of the current method.
+ void NotifyEndOfCompilation();
+
+ // Notification of noway_assert.
+ void NotifyNowayAssert(const char* filename, unsigned line);
+
+ // Is telemetry enabled through COMPlus_JitTelemetry?
+ static bool IsTelemetryEnabled();
+
+private:
+ // Obtain current method information from VM and cache for
+ // future uses.
+ void CacheCurrentMethodInfo();
+
+ //
+ //--------------------------------------------------------------------------------
+ // The below per process counters are updated without synchronization or
+ // thread-safety to avoid interfering with the JIT throughput. Accuracy
+ // of these counters will be traded-off for throughput.
+ //
+
+ // Methods compiled per DLL unload
+ static volatile UINT32 s_uMethodsCompiled;
+
+ // Methods compiled per DLL unload that hit noway assert (per process)
+ static volatile UINT32 s_uMethodsHitNowayAssert;
+ //--------------------------------------------------------------------------------
+
+ // Has the provider been registered already (per process)
+ static volatile bool s_fProviderRegistered;
+
+ // Cached value of current method hash.
+ unsigned m_uMethodHash;
+
+ // Cached value of current assembly name.
+ const char* m_pszAssemblyName;
+
+ // Cached value of current scope name, i.e., "Program.Foo" in "Program.Foo:Main"
+ const char* m_pszScopeName;
+
+ // Cached value of current method name, i.e., "Main" in "Program.Foo:Main"
+ const char* m_pszMethodName;
+
+ // Have we already cached the method/scope/assembly names?
+ bool m_fMethodInfoCached;
+
+ // Compiler instance.
+ Compiler* comp;
+};
+
+#endif // FEATURE_TRACELOGGING
diff --git a/src/jit/lclvars.cpp b/src/jit/lclvars.cpp
new file mode 100644
index 0000000000..369c96322d
--- /dev/null
+++ b/src/jit/lclvars.cpp
@@ -0,0 +1,6788 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX LclVarsInfo XX
+XX XX
+XX The variables to be used by the code generator. XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+#include "emit.h"
+
+#include "register_arg_convention.h"
+
+/*****************************************************************************/
+
+#ifdef DEBUG
+#if DOUBLE_ALIGN
+/* static */
+unsigned Compiler::s_lvaDoubleAlignedProcsCount = 0;
+#endif
+#endif
+
+/*****************************************************************************/
+
+void Compiler::lvaInit()
+{
+ /* We haven't allocated stack variables yet */
+ lvaRefCountingStarted = false;
+ lvaLocalVarRefCounted = false;
+
+ lvaSortAgain = false; // false: We don't need to call lvaSortOnly()
+ lvaTrackedFixed = false; // false: We can still add new tracked variables
+
+ lvaDoneFrameLayout = NO_FRAME_LAYOUT;
+#if !FEATURE_EH_FUNCLETS
+ lvaShadowSPslotsVar = BAD_VAR_NUM;
+#endif // !FEATURE_EH_FUNCLETS
+ lvaInlinedPInvokeFrameVar = BAD_VAR_NUM;
+ lvaReversePInvokeFrameVar = BAD_VAR_NUM;
+#if FEATURE_FIXED_OUT_ARGS
+ lvaPInvokeFrameRegSaveVar = BAD_VAR_NUM;
+ lvaOutgoingArgSpaceVar = BAD_VAR_NUM;
+#endif // FEATURE_FIXED_OUT_ARGS
+#ifdef _TARGET_ARM_
+ lvaPromotedStructAssemblyScratchVar = BAD_VAR_NUM;
+#endif // _TARGET_ARM_
+ lvaLocAllocSPvar = BAD_VAR_NUM;
+ lvaNewObjArrayArgs = BAD_VAR_NUM;
+ lvaGSSecurityCookie = BAD_VAR_NUM;
+#ifdef _TARGET_X86_
+ lvaVarargsBaseOfStkArgs = BAD_VAR_NUM;
+#endif // _TARGET_X86_
+ lvaVarargsHandleArg = BAD_VAR_NUM;
+ lvaSecurityObject = BAD_VAR_NUM;
+ lvaStubArgumentVar = BAD_VAR_NUM;
+ lvaArg0Var = BAD_VAR_NUM;
+ lvaMonAcquired = BAD_VAR_NUM;
+
+ lvaInlineeReturnSpillTemp = BAD_VAR_NUM;
+
+ gsShadowVarInfo = nullptr;
+#if FEATURE_EH_FUNCLETS
+ lvaPSPSym = BAD_VAR_NUM;
+#endif
+#if FEATURE_SIMD
+ lvaSIMDInitTempVarNum = BAD_VAR_NUM;
+#endif // FEATURE_SIMD
+ lvaCurEpoch = 0;
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ lvaFirstStackIncomingArgNum = BAD_VAR_NUM;
+#endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING
+}
+
+/*****************************************************************************/
+
+void Compiler::lvaInitTypeRef()
+{
+
+ /* x86 args look something like this:
+ [this ptr] [hidden return buffer] [declared arguments]* [generic context] [var arg cookie]
+
+ x64 is closer to the native ABI:
+ [this ptr] [hidden return buffer] [generic context] [var arg cookie] [declared arguments]*
+ (Note: prior to .NET Framework 4.5.1 for Windows 8.1 (but not .NET Framework 4.5.1 "downlevel"),
+ the "hidden return buffer" came before the "this ptr". Now, the "this ptr" comes first. This
+ is different from the C++ order, where the "hidden return buffer" always comes first.)
+
+ ARM and ARM64 are the same as the current x64 convention:
+ [this ptr] [hidden return buffer] [generic context] [var arg cookie] [declared arguments]*
+
+ Key difference:
+ The var arg cookie and generic context are swapped with respect to the user arguments
+ */
+
+ /* Set compArgsCount and compLocalsCount */
+
+ info.compArgsCount = info.compMethodInfo->args.numArgs;
+
+ // Is there a 'this' pointer
+
+ if (!info.compIsStatic)
+ {
+ info.compArgsCount++;
+ }
+ else
+ {
+ info.compThisArg = BAD_VAR_NUM;
+ }
+
+ info.compILargsCount = info.compArgsCount;
+
+#ifdef FEATURE_SIMD
+ if (featureSIMD && (info.compRetNativeType == TYP_STRUCT))
+ {
+ var_types structType = impNormStructType(info.compMethodInfo->args.retTypeClass);
+ info.compRetType = structType;
+ }
+#endif // FEATURE_SIMD
+
+ // Are we returning a struct using a return buffer argument?
+ //
+ const bool hasRetBuffArg = impMethodInfo_hasRetBuffArg(info.compMethodInfo);
+
+ // Possibly change the compRetNativeType from TYP_STRUCT to a "primitive" type
+ // when we are returning a struct by value and it fits in one register
+ //
+ if (!hasRetBuffArg && varTypeIsStruct(info.compRetNativeType))
+ {
+ CORINFO_CLASS_HANDLE retClsHnd = info.compMethodInfo->args.retTypeClass;
+
+ Compiler::structPassingKind howToReturnStruct;
+ var_types returnType = getReturnTypeForStruct(retClsHnd, &howToReturnStruct);
+
+ if (howToReturnStruct == SPK_PrimitiveType)
+ {
+ assert(returnType != TYP_UNKNOWN);
+ assert(returnType != TYP_STRUCT);
+
+ info.compRetNativeType = returnType;
+
+ // ToDo: Refactor this common code sequence into its own method as it is used 4+ times
+ if ((returnType == TYP_LONG) && (compLongUsed == false))
+ {
+ compLongUsed = true;
+ }
+ else if (((returnType == TYP_FLOAT) || (returnType == TYP_DOUBLE)) && (compFloatingPointUsed == false))
+ {
+ compFloatingPointUsed = true;
+ }
+ }
+ }
+
+ // Do we have a RetBuffArg?
+
+ if (hasRetBuffArg)
+ {
+ info.compArgsCount++;
+ }
+ else
+ {
+ info.compRetBuffArg = BAD_VAR_NUM;
+ }
+
+ /* There is a 'hidden' cookie pushed last when the
+ calling convention is varargs */
+
+ if (info.compIsVarArgs)
+ {
+ info.compArgsCount++;
+ }
+
+ // Is there an extra parameter used to pass instantiation info to
+ // shared generic methods and shared generic struct instance methods?
+ if (info.compMethodInfo->args.callConv & CORINFO_CALLCONV_PARAMTYPE)
+ {
+ info.compArgsCount++;
+ }
+ else
+ {
+ info.compTypeCtxtArg = BAD_VAR_NUM;
+ }
+
+ lvaCount = info.compLocalsCount = info.compArgsCount + info.compMethodInfo->locals.numArgs;
+
+ info.compILlocalsCount = info.compILargsCount + info.compMethodInfo->locals.numArgs;
+
+ /* Now allocate the variable descriptor table */
+
+ if (compIsForInlining())
+ {
+ lvaTable = impInlineInfo->InlinerCompiler->lvaTable;
+ lvaCount = impInlineInfo->InlinerCompiler->lvaCount;
+ lvaTableCnt = impInlineInfo->InlinerCompiler->lvaTableCnt;
+
+ // No more stuff needs to be done.
+ return;
+ }
+
+ lvaTableCnt = lvaCount * 2;
+
+ if (lvaTableCnt < 16)
+ {
+ lvaTableCnt = 16;
+ }
+
+ lvaTable = (LclVarDsc*)compGetMemArray(lvaTableCnt, sizeof(*lvaTable), CMK_LvaTable);
+ size_t tableSize = lvaTableCnt * sizeof(*lvaTable);
+ memset(lvaTable, 0, tableSize);
+ for (unsigned i = 0; i < lvaTableCnt; i++)
+ {
+ new (&lvaTable[i], jitstd::placement_t()) LclVarDsc(this); // call the constructor.
+ }
+
+ //-------------------------------------------------------------------------
+ // Count the arguments and initialize the respective lvaTable[] entries
+ //
+ // First the implicit arguments
+ //-------------------------------------------------------------------------
+
+ InitVarDscInfo varDscInfo;
+ varDscInfo.Init(lvaTable, hasRetBuffArg);
+
+ lvaInitArgs(&varDscInfo);
+
+ //-------------------------------------------------------------------------
+ // Finally the local variables
+ //-------------------------------------------------------------------------
+
+ unsigned varNum = varDscInfo.varNum;
+ LclVarDsc* varDsc = varDscInfo.varDsc;
+ CORINFO_ARG_LIST_HANDLE localsSig = info.compMethodInfo->locals.args;
+
+ for (unsigned i = 0; i < info.compMethodInfo->locals.numArgs;
+ i++, varNum++, varDsc++, localsSig = info.compCompHnd->getArgNext(localsSig))
+ {
+ CORINFO_CLASS_HANDLE typeHnd;
+ CorInfoTypeWithMod corInfoType =
+ info.compCompHnd->getArgType(&info.compMethodInfo->locals, localsSig, &typeHnd);
+ lvaInitVarDsc(varDsc, varNum, strip(corInfoType), typeHnd, localsSig, &info.compMethodInfo->locals);
+
+ varDsc->lvPinned = ((corInfoType & CORINFO_TYPE_MOD_PINNED) != 0);
+ varDsc->lvOnFrame = true; // The final home for this local variable might be our local stack frame
+ }
+
+ if ( // If there already exist unsafe buffers, don't mark more structs as unsafe
+ // as that will cause them to be placed along with the real unsafe buffers,
+ // unnecessarily exposing them to overruns. This can affect GS tests which
+ // intentionally do buffer-overruns.
+ !getNeedsGSSecurityCookie() &&
+ // GS checks require the stack to be re-ordered, which can't be done with EnC
+ !opts.compDbgEnC && compStressCompile(STRESS_UNSAFE_BUFFER_CHECKS, 25))
+ {
+ setNeedsGSSecurityCookie();
+ compGSReorderStackLayout = true;
+
+ for (unsigned i = 0; i < lvaCount; i++)
+ {
+ if ((lvaTable[i].lvType == TYP_STRUCT) && compStressCompile(STRESS_GENERIC_VARN, 60))
+ {
+ lvaTable[i].lvIsUnsafeBuffer = true;
+ }
+ }
+ }
+
+ if (getNeedsGSSecurityCookie())
+ {
+ // Ensure that there will be at least one stack variable since
+ // we require that the GSCookie does not have a 0 stack offset.
+ unsigned dummy = lvaGrabTempWithImplicitUse(false DEBUGARG("GSCookie dummy"));
+ lvaTable[dummy].lvType = TYP_INT;
+ }
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ lvaTableDump(INITIAL_FRAME_LAYOUT);
+ }
+#endif
+}
+
+/*****************************************************************************/
+void Compiler::lvaInitArgs(InitVarDscInfo* varDscInfo)
+{
+ compArgSize = 0;
+
+#if defined(_TARGET_ARM_) && defined(PROFILING_SUPPORTED)
+ // Prespill all argument regs on to stack in case of Arm when under profiler.
+ if (compIsProfilerHookNeeded())
+ {
+ codeGen->regSet.rsMaskPreSpillRegArg |= RBM_ARG_REGS;
+ }
+#endif
+
+ //----------------------------------------------------------------------
+
+ /* Is there a "this" pointer ? */
+ lvaInitThisPtr(varDscInfo);
+
+ /* If we have a hidden return-buffer parameter, that comes here */
+ lvaInitRetBuffArg(varDscInfo);
+
+//======================================================================
+
+#if USER_ARGS_COME_LAST
+ //@GENERICS: final instantiation-info argument for shared generic methods
+ // and shared generic struct instance methods
+ lvaInitGenericsCtxt(varDscInfo);
+
+ /* If the method is varargs, process the varargs cookie */
+ lvaInitVarArgsHandle(varDscInfo);
+#endif
+
+ //-------------------------------------------------------------------------
+ // Now walk the function signature for the explicit user arguments
+ //-------------------------------------------------------------------------
+ lvaInitUserArgs(varDscInfo);
+
+#if !USER_ARGS_COME_LAST
+ //@GENERICS: final instantiation-info argument for shared generic methods
+ // and shared generic struct instance methods
+ lvaInitGenericsCtxt(varDscInfo);
+
+ /* If the method is varargs, process the varargs cookie */
+ lvaInitVarArgsHandle(varDscInfo);
+#endif
+
+ //----------------------------------------------------------------------
+
+ // We have set info.compArgsCount in compCompile()
+ noway_assert(varDscInfo->varNum == info.compArgsCount);
+ assert(varDscInfo->intRegArgNum <= MAX_REG_ARG);
+
+ codeGen->intRegState.rsCalleeRegArgCount = varDscInfo->intRegArgNum;
+#if !FEATURE_STACK_FP_X87
+ codeGen->floatRegState.rsCalleeRegArgCount = varDscInfo->floatRegArgNum;
+#endif // FEATURE_STACK_FP_X87
+
+ // The total argument size must be aligned.
+ noway_assert((compArgSize % sizeof(void*)) == 0);
+
+#ifdef _TARGET_X86_
+ /* We can not pass more than 2^16 dwords as arguments as the "ret"
+ instruction can only pop 2^16 arguments. Could be handled correctly
+ but it will be very difficult for fully interruptible code */
+
+ if (compArgSize != (size_t)(unsigned short)compArgSize)
+ NO_WAY("Too many arguments for the \"ret\" instruction to pop");
+#endif
+}
+
+/*****************************************************************************/
+void Compiler::lvaInitThisPtr(InitVarDscInfo* varDscInfo)
+{
+ LclVarDsc* varDsc = varDscInfo->varDsc;
+ if (!info.compIsStatic)
+ {
+ varDsc->lvIsParam = 1;
+#if ASSERTION_PROP
+ varDsc->lvSingleDef = 1;
+#endif
+
+ varDsc->lvIsPtr = 1;
+
+ lvaArg0Var = info.compThisArg = varDscInfo->varNum;
+ noway_assert(info.compThisArg == 0);
+
+ if (eeIsValueClass(info.compClassHnd))
+ {
+ varDsc->lvType = TYP_BYREF;
+#ifdef FEATURE_SIMD
+ if (featureSIMD)
+ {
+ var_types simdBaseType = TYP_UNKNOWN;
+ var_types type = impNormStructType(info.compClassHnd, nullptr, nullptr, &simdBaseType);
+ if (simdBaseType != TYP_UNKNOWN)
+ {
+ assert(varTypeIsSIMD(type));
+ varDsc->lvSIMDType = true;
+ varDsc->lvBaseType = simdBaseType;
+ }
+ }
+#endif // FEATURE_SIMD
+ }
+ else
+ {
+ varDsc->lvType = TYP_REF;
+ }
+
+ if (tiVerificationNeeded)
+ {
+ varDsc->lvVerTypeInfo = verMakeTypeInfo(info.compClassHnd);
+
+ if (varDsc->lvVerTypeInfo.IsValueClass())
+ {
+ varDsc->lvVerTypeInfo.MakeByRef();
+ }
+ }
+ else
+ {
+ varDsc->lvVerTypeInfo = typeInfo();
+ }
+
+ // Mark the 'this' pointer for the method
+ varDsc->lvVerTypeInfo.SetIsThisPtr();
+
+ varDsc->lvIsRegArg = 1;
+ noway_assert(varDscInfo->intRegArgNum == 0);
+
+ varDsc->lvArgReg = genMapRegArgNumToRegNum(varDscInfo->allocRegArg(TYP_INT), varDsc->TypeGet());
+#if FEATURE_MULTIREG_ARGS
+ varDsc->lvOtherArgReg = REG_NA;
+#endif
+ varDsc->setPrefReg(varDsc->lvArgReg, this);
+ varDsc->lvOnFrame = true; // The final home for this incoming register might be our local stack frame
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("'this' passed in register %s\n", getRegName(varDsc->lvArgReg));
+ }
+#endif
+ compArgSize += TARGET_POINTER_SIZE;
+
+ varDscInfo->varNum++;
+ varDscInfo->varDsc++;
+ }
+}
+
+/*****************************************************************************/
+void Compiler::lvaInitRetBuffArg(InitVarDscInfo* varDscInfo)
+{
+ LclVarDsc* varDsc = varDscInfo->varDsc;
+ bool hasRetBuffArg = impMethodInfo_hasRetBuffArg(info.compMethodInfo);
+
+ // These two should always match
+ noway_assert(hasRetBuffArg == varDscInfo->hasRetBufArg);
+
+ if (hasRetBuffArg)
+ {
+ info.compRetBuffArg = varDscInfo->varNum;
+ varDsc->lvType = TYP_BYREF;
+ varDsc->lvIsParam = 1;
+ varDsc->lvIsRegArg = 1;
+#if ASSERTION_PROP
+ varDsc->lvSingleDef = 1;
+#endif
+ if (hasFixedRetBuffReg())
+ {
+ varDsc->lvArgReg = theFixedRetBuffReg();
+ }
+ else
+ {
+ unsigned retBuffArgNum = varDscInfo->allocRegArg(TYP_INT);
+ varDsc->lvArgReg = genMapIntRegArgNumToRegNum(retBuffArgNum);
+ }
+
+#if FEATURE_MULTIREG__ARGS
+ varDsc->lvOtherArgReg = REG_NA;
+#endif
+ varDsc->setPrefReg(varDsc->lvArgReg, this);
+ varDsc->lvOnFrame = true; // The final home for this incoming register might be our local stack frame
+
+ info.compRetBuffDefStack = 0;
+ if (info.compRetType == TYP_STRUCT)
+ {
+ CORINFO_SIG_INFO sigInfo;
+ info.compCompHnd->getMethodSig(info.compMethodHnd, &sigInfo);
+ assert(JITtype2varType(sigInfo.retType) == info.compRetType); // Else shouldn't have a ret buff.
+
+ info.compRetBuffDefStack =
+ (info.compCompHnd->isStructRequiringStackAllocRetBuf(sigInfo.retTypeClass) == TRUE);
+ if (info.compRetBuffDefStack)
+ {
+ // If we're assured that the ret buff argument points into a callers stack, we will type it as
+ // "TYP_I_IMPL"
+ // (native int/unmanaged pointer) so that it's not tracked as a GC ref.
+ varDsc->lvType = TYP_I_IMPL;
+ }
+ }
+ assert(isValidIntArgReg(varDsc->lvArgReg));
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("'__retBuf' passed in register %s\n", getRegName(varDsc->lvArgReg));
+ }
+#endif
+
+ /* Update the total argument size, count and varDsc */
+
+ compArgSize += TARGET_POINTER_SIZE;
+ varDscInfo->varNum++;
+ varDscInfo->varDsc++;
+ }
+}
+
+/*****************************************************************************/
+void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo)
+{
+//-------------------------------------------------------------------------
+// Walk the function signature for the explicit arguments
+//-------------------------------------------------------------------------
+
+#if defined(_TARGET_X86_)
+ // Only (some of) the implicit args are enregistered for varargs
+ varDscInfo->maxIntRegArgNum = info.compIsVarArgs ? varDscInfo->intRegArgNum : MAX_REG_ARG;
+#elif defined(_TARGET_AMD64_) && !defined(UNIX_AMD64_ABI)
+ // On System V type environment the float registers are not indexed together with the int ones.
+ varDscInfo->floatRegArgNum = varDscInfo->intRegArgNum;
+#endif // _TARGET_*
+
+ CORINFO_ARG_LIST_HANDLE argLst = info.compMethodInfo->args.args;
+
+ const unsigned argSigLen = info.compMethodInfo->args.numArgs;
+
+ regMaskTP doubleAlignMask = RBM_NONE;
+ for (unsigned i = 0; i < argSigLen;
+ i++, varDscInfo->varNum++, varDscInfo->varDsc++, argLst = info.compCompHnd->getArgNext(argLst))
+ {
+ LclVarDsc* varDsc = varDscInfo->varDsc;
+ CORINFO_CLASS_HANDLE typeHnd = nullptr;
+
+ CorInfoTypeWithMod corInfoType = info.compCompHnd->getArgType(&info.compMethodInfo->args, argLst, &typeHnd);
+ varDsc->lvIsParam = 1;
+#if ASSERTION_PROP
+ varDsc->lvSingleDef = 1;
+#endif
+
+ lvaInitVarDsc(varDsc, varDscInfo->varNum, strip(corInfoType), typeHnd, argLst, &info.compMethodInfo->args);
+
+ // For ARM, ARM64, and AMD64 varargs, all arguments go in integer registers
+ var_types argType = mangleVarArgsType(varDsc->TypeGet());
+ var_types origArgType = argType;
+ // ARM softfp calling convention should affect only the floating point arguments.
+ // Otherwise there appear too many surplus pre-spills and other memory operations
+ // with the associated locations .
+ bool isSoftFPPreSpill = opts.compUseSoftFP && varTypeIsFloating(varDsc->TypeGet());
+ unsigned argSize = eeGetArgSize(argLst, &info.compMethodInfo->args);
+ unsigned cSlots = argSize / TARGET_POINTER_SIZE; // the total number of slots of this argument
+ bool isHfaArg = false;
+ var_types hfaType = TYP_UNDEF;
+
+ // Methods that use VarArg or SoftFP cannot have HFA arguments
+ if (!info.compIsVarArgs && !opts.compUseSoftFP)
+ {
+ // If the argType is a struct, then check if it is an HFA
+ if (varTypeIsStruct(argType))
+ {
+ hfaType = GetHfaType(typeHnd); // set to float or double if it is an HFA, otherwise TYP_UNDEF
+ isHfaArg = varTypeIsFloating(hfaType);
+ }
+ }
+ if (isHfaArg)
+ {
+ // We have an HFA argument, so from here on out treat the type as a float or double.
+ // The orginal struct type is available by using origArgType
+ // We also update the cSlots to be the number of float/double fields in the HFA
+ argType = hfaType;
+ cSlots = varDsc->lvHfaSlots();
+ }
+ // The number of slots that must be enregistered if we are to consider this argument enregistered.
+ // This is normally the same as cSlots, since we normally either enregister the entire object,
+ // or none of it. For structs on ARM, however, we only need to enregister a single slot to consider
+ // it enregistered, as long as we can split the rest onto the stack.
+ unsigned cSlotsToEnregister = cSlots;
+
+#ifdef _TARGET_ARM_
+ // On ARM we pass the first 4 words of integer arguments and non-HFA structs in registers.
+ // But we pre-spill user arguments in varargs methods and structs.
+ //
+ unsigned cAlign;
+ bool preSpill = info.compIsVarArgs || isSoftFPPreSpill;
+
+ switch (origArgType)
+ {
+ case TYP_STRUCT:
+ assert(varDsc->lvSize() == argSize);
+ cAlign = varDsc->lvStructDoubleAlign ? 2 : 1;
+
+ // HFA arguments go on the stack frame. They don't get spilled in the prolog like struct
+ // arguments passed in the integer registers but get homed immediately after the prolog.
+ if (!isHfaArg)
+ {
+ cSlotsToEnregister = 1; // HFAs must be totally enregistered or not, but other structs can be split.
+ preSpill = true;
+ }
+ break;
+
+ case TYP_DOUBLE:
+ case TYP_LONG:
+ cAlign = 2;
+ break;
+
+ default:
+ cAlign = 1;
+ break;
+ }
+
+ if (isRegParamType(argType))
+ {
+ compArgSize += varDscInfo->alignReg(argType, cAlign) * REGSIZE_BYTES;
+ }
+
+ if (argType == TYP_STRUCT)
+ {
+ // Are we going to split the struct between registers and stack? We can do that as long as
+ // no floating-point arguments have been put on the stack.
+ //
+ // From the ARM Procedure Call Standard:
+ // Rule C.5: "If the NCRN is less than r4 **and** the NSAA is equal to the SP,"
+ // then split the argument between registers and stack. Implication: if something
+ // has already been spilled to the stack, then anything that would normally be
+ // split between the core registers and the stack will be put on the stack.
+ // Anything that follows will also be on the stack. However, if something from
+ // floating point regs has been spilled to the stack, we can still use r0-r3 until they are full.
+
+ if (varDscInfo->canEnreg(TYP_INT, 1) && // The beginning of the struct can go in a register
+ !varDscInfo->canEnreg(TYP_INT, cSlots) && // The end of the struct can't fit in a register
+ varDscInfo->existAnyFloatStackArgs()) // There's at least one stack-based FP arg already
+ {
+ varDscInfo->setAllRegArgUsed(TYP_INT); // Prevent all future use of integer registers
+ preSpill = false; // This struct won't be prespilled, since it will go on the stack
+ }
+ }
+
+ if (preSpill)
+ {
+ for (unsigned ix = 0; ix < cSlots; ix++)
+ {
+ if (!varDscInfo->canEnreg(TYP_INT, ix + 1))
+ {
+ break;
+ }
+ regMaskTP regMask = genMapArgNumToRegMask(varDscInfo->regArgNum(TYP_INT) + ix, TYP_INT);
+ if (cAlign == 2)
+ {
+ doubleAlignMask |= regMask;
+ }
+ codeGen->regSet.rsMaskPreSpillRegArg |= regMask;
+ }
+ }
+ else
+ {
+ varDsc->lvOnFrame = true; // The final home for this incoming register might be our local stack frame
+ }
+
+#else // !_TARGET_ARM_
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc;
+ if (varTypeIsStruct(argType))
+ {
+ assert(typeHnd != nullptr);
+ eeGetSystemVAmd64PassStructInRegisterDescriptor(typeHnd, &structDesc);
+ if (structDesc.passedInRegisters)
+ {
+ unsigned intRegCount = 0;
+ unsigned floatRegCount = 0;
+
+ for (unsigned int i = 0; i < structDesc.eightByteCount; i++)
+ {
+ if (structDesc.IsIntegralSlot(i))
+ {
+ intRegCount++;
+ }
+ else if (structDesc.IsSseSlot(i))
+ {
+ floatRegCount++;
+ }
+ else
+ {
+ assert(false && "Invalid eightbyte classification type.");
+ break;
+ }
+ }
+
+ if (intRegCount != 0 && !varDscInfo->canEnreg(TYP_INT, intRegCount))
+ {
+ structDesc.passedInRegisters = false; // No register to enregister the eightbytes.
+ }
+
+ if (floatRegCount != 0 && !varDscInfo->canEnreg(TYP_FLOAT, floatRegCount))
+ {
+ structDesc.passedInRegisters = false; // No register to enregister the eightbytes.
+ }
+ }
+ }
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+ // The final home for this incoming register might be our local stack frame.
+ // For System V platforms the final home will always be on the local stack frame.
+ varDsc->lvOnFrame = true;
+
+#endif // !_TARGET_ARM_
+
+ bool canPassArgInRegisters = false;
+
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ if (varTypeIsStruct(argType))
+ {
+ canPassArgInRegisters = structDesc.passedInRegisters;
+ }
+ else
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ {
+ canPassArgInRegisters = varDscInfo->canEnreg(argType, cSlotsToEnregister);
+ }
+
+ if (canPassArgInRegisters)
+ {
+ /* Another register argument */
+
+ // Allocate the registers we need. allocRegArg() returns the first argument register number of the set.
+ // For non-HFA structs, we still "try" to enregister the whole thing; it will just max out if splitting
+ // to the stack happens.
+ unsigned firstAllocatedRegArgNum = 0;
+
+#if FEATURE_MULTIREG_ARGS
+ varDsc->lvOtherArgReg = REG_NA;
+#endif // FEATURE_MULTIREG_ARGS
+
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ unsigned secondAllocatedRegArgNum = 0;
+ var_types firstEightByteType = TYP_UNDEF;
+ var_types secondEightByteType = TYP_UNDEF;
+
+ if (varTypeIsStruct(argType))
+ {
+ if (structDesc.eightByteCount >= 1)
+ {
+ firstEightByteType = GetEightByteType(structDesc, 0);
+ firstAllocatedRegArgNum = varDscInfo->allocRegArg(firstEightByteType, 1);
+ }
+ }
+ else
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ {
+ firstAllocatedRegArgNum = varDscInfo->allocRegArg(argType, cSlots);
+ }
+
+ if (isHfaArg)
+ {
+ // We need to save the fact that this HFA is enregistered
+ varDsc->lvSetIsHfa();
+ varDsc->lvSetIsHfaRegArg();
+ varDsc->SetHfaType(hfaType);
+ varDsc->lvIsMultiRegArg = (varDsc->lvHfaSlots() > 1);
+ }
+
+ varDsc->lvIsRegArg = 1;
+
+#if FEATURE_MULTIREG_ARGS
+ if (varTypeIsStruct(argType))
+ {
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ varDsc->lvArgReg = genMapRegArgNumToRegNum(firstAllocatedRegArgNum, firstEightByteType);
+
+ // If there is a second eightbyte, get a register for it too and map the arg to the reg number.
+ if (structDesc.eightByteCount >= 2)
+ {
+ secondEightByteType = GetEightByteType(structDesc, 1);
+ secondAllocatedRegArgNum = varDscInfo->allocRegArg(secondEightByteType, 1);
+ }
+
+ if (secondEightByteType != TYP_UNDEF)
+ {
+ varDsc->lvOtherArgReg = genMapRegArgNumToRegNum(secondAllocatedRegArgNum, secondEightByteType);
+ varDsc->addPrefReg(genRegMask(varDsc->lvOtherArgReg), this);
+ }
+#else // ARM32 or ARM64
+ varDsc->lvArgReg = genMapRegArgNumToRegNum(firstAllocatedRegArgNum, TYP_I_IMPL);
+#ifdef _TARGET_ARM64_
+ if (cSlots == 2)
+ {
+ varDsc->lvOtherArgReg = genMapRegArgNumToRegNum(firstAllocatedRegArgNum + 1, TYP_I_IMPL);
+ varDsc->addPrefReg(genRegMask(varDsc->lvOtherArgReg), this);
+ }
+#endif // _TARGET_ARM64_
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ }
+ else
+#endif // FEATURE_MULTIREG_ARGS
+ {
+ varDsc->lvArgReg = genMapRegArgNumToRegNum(firstAllocatedRegArgNum, argType);
+ }
+
+ varDsc->setPrefReg(varDsc->lvArgReg, this);
+
+#ifdef _TARGET_ARM_
+ if (varDsc->TypeGet() == TYP_LONG)
+ {
+ varDsc->lvOtherReg = genMapRegArgNumToRegNum(firstAllocatedRegArgNum + 1, TYP_INT);
+ varDsc->addPrefReg(genRegMask(varDsc->lvOtherReg), this);
+ }
+#endif // _TARGET_ARM_
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Arg #%u passed in register(s) ", varDscInfo->varNum);
+ bool isFloat = false;
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ // In case of one eightbyte struct the type is already normalized earlier.
+ // The varTypeIsFloating(argType) is good for this case.
+ if (varTypeIsStruct(argType) && (structDesc.eightByteCount >= 1))
+ {
+ isFloat = varTypeIsFloating(firstEightByteType);
+ }
+ else
+#else // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ {
+ isFloat = varTypeIsFloating(argType);
+ }
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ if (varTypeIsStruct(argType))
+ {
+ // Print both registers, just to be clear
+ if (firstEightByteType == TYP_UNDEF)
+ {
+ printf("firstEightByte: <not used>");
+ }
+ else
+ {
+ printf("firstEightByte: %s",
+ getRegName(genMapRegArgNumToRegNum(firstAllocatedRegArgNum, firstEightByteType),
+ isFloat));
+ }
+
+ if (secondEightByteType == TYP_UNDEF)
+ {
+ printf(", secondEightByte: <not used>");
+ }
+ else
+ {
+ printf(", secondEightByte: %s",
+ getRegName(genMapRegArgNumToRegNum(secondAllocatedRegArgNum, secondEightByteType),
+ varTypeIsFloating(secondEightByteType)));
+ }
+ }
+ else
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ {
+ unsigned regArgNum = genMapRegNumToRegArgNum(varDsc->lvArgReg, argType);
+
+ for (unsigned ix = 0; ix < cSlots; ix++, regArgNum++)
+ {
+ if (ix > 0)
+ {
+ printf(",");
+ }
+
+ if (!isFloat && (regArgNum >= varDscInfo->maxIntRegArgNum)) // a struct has been split between
+ // registers and stack
+ {
+ printf(" stack slots:%d", cSlots - ix);
+ break;
+ }
+
+#ifdef _TARGET_ARM_
+ if (isFloat)
+ {
+ // Print register size prefix
+ if (argType == TYP_DOUBLE)
+ {
+ // Print both registers, just to be clear
+ printf("%s/%s", getRegName(genMapRegArgNumToRegNum(regArgNum, argType), isFloat),
+ getRegName(genMapRegArgNumToRegNum(regArgNum + 1, argType), isFloat));
+
+ // doubles take 2 slots
+ assert(ix + 1 < cSlots);
+ ++ix;
+ ++regArgNum;
+ }
+ else
+ {
+ printf("%s", getRegName(genMapRegArgNumToRegNum(regArgNum, argType), isFloat));
+ }
+ }
+ else
+#endif // _TARGET_ARM_
+ {
+ printf("%s", getRegName(genMapRegArgNumToRegNum(regArgNum, argType), isFloat));
+ }
+ }
+ }
+ printf("\n");
+ }
+#endif // DEBUG
+ } // end if (canPassArgInRegisters)
+ else
+ {
+#if defined(_TARGET_ARM_)
+
+ varDscInfo->setAllRegArgUsed(argType);
+ if (varTypeIsFloating(argType))
+ {
+ varDscInfo->setAnyFloatStackArgs();
+ }
+
+#elif defined(_TARGET_ARM64_)
+
+ // If we needed to use the stack in order to pass this argument then
+ // record the fact that we have used up any remaining registers of this 'type'
+ // This prevents any 'backfilling' from occuring on ARM64
+ //
+ varDscInfo->setAllRegArgUsed(argType);
+
+#endif // _TARGET_XXX_
+ }
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ // The arg size is returning the number of bytes of the argument. For a struct it could return a size not a
+ // multiple of TARGET_POINTER_SIZE. The stack allocated space should always be multiple of TARGET_POINTER_SIZE,
+ // so round it up.
+ compArgSize += (unsigned)roundUp(argSize, TARGET_POINTER_SIZE);
+#else // !FEATURE_UNIX_AMD64_STRUCT_PASSING
+ compArgSize += argSize;
+#endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING
+ if (info.compIsVarArgs || isHfaArg || isSoftFPPreSpill)
+ {
+#if defined(_TARGET_X86_)
+ varDsc->lvStkOffs = compArgSize;
+#else // !_TARGET_X86_
+ // TODO-CQ: We shouldn't have to go as far as to declare these
+ // address-exposed -- DoNotEnregister should suffice.
+ lvaSetVarAddrExposed(varDscInfo->varNum);
+#endif // !_TARGET_X86_
+ }
+ } // for each user arg
+
+#ifdef _TARGET_ARM_
+ if (doubleAlignMask != RBM_NONE)
+ {
+ assert(RBM_ARG_REGS == 0xF);
+ assert((doubleAlignMask & RBM_ARG_REGS) == doubleAlignMask);
+ if (doubleAlignMask != RBM_NONE && doubleAlignMask != RBM_ARG_REGS)
+ {
+ // doubleAlignMask can only be 0011 and/or 1100 as 'double aligned types' can
+ // begin at r0 or r2.
+ assert(doubleAlignMask == 0x3 || doubleAlignMask == 0xC /* || 0xF is if'ed out */);
+
+ // Now if doubleAlignMask is 0011 i.e., {r0,r1} and we prespill r2 or r3
+ // but not both, then the stack would be misaligned for r0. So spill both
+ // r2 and r3.
+ //
+ // ; +0 --- caller SP double aligned ----
+ // ; -4 r2 r3
+ // ; -8 r1 r1
+ // ; -c r0 r0 <-- misaligned.
+ // ; callee saved regs
+ if (doubleAlignMask == 0x3 && doubleAlignMask != codeGen->regSet.rsMaskPreSpillRegArg)
+ {
+ codeGen->regSet.rsMaskPreSpillAlign =
+ (~codeGen->regSet.rsMaskPreSpillRegArg & ~doubleAlignMask) & RBM_ARG_REGS;
+ }
+ }
+ }
+#endif // _TARGET_ARM_
+}
+
+/*****************************************************************************/
+void Compiler::lvaInitGenericsCtxt(InitVarDscInfo* varDscInfo)
+{
+ //@GENERICS: final instantiation-info argument for shared generic methods
+ // and shared generic struct instance methods
+ if (info.compMethodInfo->args.callConv & CORINFO_CALLCONV_PARAMTYPE)
+ {
+ info.compTypeCtxtArg = varDscInfo->varNum;
+
+ LclVarDsc* varDsc = varDscInfo->varDsc;
+ varDsc->lvIsParam = 1;
+#if ASSERTION_PROP
+ varDsc->lvSingleDef = 1;
+#endif
+
+ varDsc->lvType = TYP_I_IMPL;
+
+ if (varDscInfo->canEnreg(TYP_I_IMPL))
+ {
+ /* Another register argument */
+
+ varDsc->lvIsRegArg = 1;
+ varDsc->lvArgReg = genMapRegArgNumToRegNum(varDscInfo->regArgNum(TYP_INT), varDsc->TypeGet());
+#if FEATURE_MULTIREG_ARGS
+ varDsc->lvOtherArgReg = REG_NA;
+#endif
+ varDsc->setPrefReg(varDsc->lvArgReg, this);
+ varDsc->lvOnFrame = true; // The final home for this incoming register might be our local stack frame
+
+ varDscInfo->intRegArgNum++;
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("'GenCtxt' passed in register %s\n", getRegName(varDsc->lvArgReg));
+ }
+#endif
+ }
+#ifndef LEGACY_BACKEND
+ else
+ {
+ // For the RyuJIT backend, we need to mark these as being on the stack,
+ // as this is not done elsewhere in the case that canEnreg returns false.
+ varDsc->lvOnFrame = true;
+ }
+#endif // !LEGACY_BACKEND
+
+ compArgSize += TARGET_POINTER_SIZE;
+
+#if defined(_TARGET_X86_)
+ if (info.compIsVarArgs)
+ varDsc->lvStkOffs = compArgSize;
+#endif // _TARGET_X86_
+
+ varDscInfo->varNum++;
+ varDscInfo->varDsc++;
+ }
+}
+
+/*****************************************************************************/
+void Compiler::lvaInitVarArgsHandle(InitVarDscInfo* varDscInfo)
+{
+ if (info.compIsVarArgs)
+ {
+ lvaVarargsHandleArg = varDscInfo->varNum;
+
+ LclVarDsc* varDsc = varDscInfo->varDsc;
+ varDsc->lvType = TYP_I_IMPL;
+ varDsc->lvIsParam = 1;
+ // Make sure this lives in the stack -- address may be reported to the VM.
+ // TODO-CQ: This should probably be:
+ // lvaSetVarDoNotEnregister(varDscInfo->varNum DEBUGARG(DNER_VMNeedsStackAddr));
+ // But that causes problems, so, for expedience, I switched back to this heavyweight
+ // hammer. But I think it should be possible to switch; it may just work now
+ // that other problems are fixed.
+ lvaSetVarAddrExposed(varDscInfo->varNum);
+
+#if ASSERTION_PROP
+ varDsc->lvSingleDef = 1;
+#endif
+
+ if (varDscInfo->canEnreg(TYP_I_IMPL))
+ {
+ /* Another register argument */
+
+ unsigned varArgHndArgNum = varDscInfo->allocRegArg(TYP_I_IMPL);
+
+ varDsc->lvIsRegArg = 1;
+ varDsc->lvArgReg = genMapRegArgNumToRegNum(varArgHndArgNum, TYP_I_IMPL);
+#if FEATURE_MULTIREG__ARGS
+ varDsc->lvOtherArgReg = REG_NA;
+#endif
+ varDsc->setPrefReg(varDsc->lvArgReg, this);
+ varDsc->lvOnFrame = true; // The final home for this incoming register might be our local stack frame
+#ifdef _TARGET_ARM_
+ // This has to be spilled right in front of the real arguments and we have
+ // to pre-spill all the argument registers explicitly because we only have
+ // have symbols for the declared ones, not any potential variadic ones.
+ for (unsigned ix = varArgHndArgNum; ix < ArrLen(intArgMasks); ix++)
+ {
+ codeGen->regSet.rsMaskPreSpillRegArg |= intArgMasks[ix];
+ }
+#endif // _TARGET_ARM_
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("'VarArgHnd' passed in register %s\n", getRegName(varDsc->lvArgReg));
+ }
+#endif // DEBUG
+ }
+#ifndef LEGACY_BACKEND
+ else
+ {
+ // For the RyuJIT backend, we need to mark these as being on the stack,
+ // as this is not done elsewhere in the case that canEnreg returns false.
+ varDsc->lvOnFrame = true;
+ }
+#endif // !LEGACY_BACKEND
+
+ /* Update the total argument size, count and varDsc */
+
+ compArgSize += TARGET_POINTER_SIZE;
+
+ varDscInfo->varNum++;
+ varDscInfo->varDsc++;
+
+#if defined(_TARGET_X86_)
+ varDsc->lvStkOffs = compArgSize;
+
+ // Allocate a temp to point at the beginning of the args
+
+ lvaVarargsBaseOfStkArgs = lvaGrabTemp(false DEBUGARG("Varargs BaseOfStkArgs"));
+ lvaTable[lvaVarargsBaseOfStkArgs].lvType = TYP_I_IMPL;
+
+#endif // _TARGET_X86_
+ }
+}
+
+/*****************************************************************************/
+void Compiler::lvaInitVarDsc(LclVarDsc* varDsc,
+ unsigned varNum,
+ CorInfoType corInfoType,
+ CORINFO_CLASS_HANDLE typeHnd,
+ CORINFO_ARG_LIST_HANDLE varList,
+ CORINFO_SIG_INFO* varSig)
+{
+ noway_assert(varDsc == &lvaTable[varNum]);
+
+ switch (corInfoType)
+ {
+ // Mark types that looks like a pointer for doing shadow-copying of
+ // parameters if we have an unsafe buffer.
+ // Note that this does not handle structs with pointer fields. Instead,
+ // we rely on using the assign-groups/equivalence-groups in
+ // gsFindVulnerableParams() to determine if a buffer-struct contains a
+ // pointer. We could do better by having the EE determine this for us.
+ // Note that we want to keep buffers without pointers at lower memory
+ // addresses than buffers with pointers.
+ case CORINFO_TYPE_PTR:
+ case CORINFO_TYPE_BYREF:
+ case CORINFO_TYPE_CLASS:
+ case CORINFO_TYPE_STRING:
+ case CORINFO_TYPE_VAR:
+ case CORINFO_TYPE_REFANY:
+ varDsc->lvIsPtr = 1;
+ break;
+ default:
+ break;
+ }
+
+ var_types type = JITtype2varType(corInfoType);
+ if (varTypeIsFloating(type))
+ {
+ compFloatingPointUsed = true;
+ }
+
+ if (tiVerificationNeeded)
+ {
+ varDsc->lvVerTypeInfo = verParseArgSigToTypeInfo(varSig, varList);
+ }
+
+ if (tiVerificationNeeded)
+ {
+ if (varDsc->lvIsParam)
+ {
+ // For an incoming ValueType we better be able to have the full type information
+ // so that we can layout the parameter offsets correctly
+
+ if (varTypeIsStruct(type) && varDsc->lvVerTypeInfo.IsDead())
+ {
+ BADCODE("invalid ValueType parameter");
+ }
+
+ // For an incoming reference type we need to verify that the actual type is
+ // a reference type and not a valuetype.
+
+ if (type == TYP_REF &&
+ !(varDsc->lvVerTypeInfo.IsType(TI_REF) || varDsc->lvVerTypeInfo.IsUnboxedGenericTypeVar()))
+ {
+ BADCODE("parameter type mismatch");
+ }
+ }
+
+ // Disallow byrefs to byref like objects (ArgTypeHandle)
+ // techncally we could get away with just not setting them
+ if (varDsc->lvVerTypeInfo.IsByRef() && verIsByRefLike(DereferenceByRef(varDsc->lvVerTypeInfo)))
+ {
+ varDsc->lvVerTypeInfo = typeInfo();
+ }
+
+ // we don't want the EE to assert in lvaSetStruct on bad sigs, so change
+ // the JIT type to avoid even trying to call back
+ if (varTypeIsStruct(type) && varDsc->lvVerTypeInfo.IsDead())
+ {
+ type = TYP_VOID;
+ }
+ }
+
+ if (typeHnd)
+ {
+ unsigned cFlags = info.compCompHnd->getClassAttribs(typeHnd);
+
+ // We can get typeHnds for primitive types, these are value types which only contain
+ // a primitive. We will need the typeHnd to distinguish them, so we store it here.
+ if ((cFlags & CORINFO_FLG_VALUECLASS) && !varTypeIsStruct(type))
+ {
+ if (tiVerificationNeeded == false)
+ {
+ // printf("This is a struct that the JIT will treat as a primitive\n");
+ varDsc->lvVerTypeInfo = verMakeTypeInfo(typeHnd);
+ }
+ }
+
+ varDsc->lvOverlappingFields = StructHasOverlappingFields(cFlags);
+ }
+
+ if (varTypeIsGC(type))
+ {
+ varDsc->lvStructGcCount = 1;
+ }
+
+ // Set the lvType (before this point it is TYP_UNDEF).
+ if ((varTypeIsStruct(type)))
+ {
+ lvaSetStruct(varNum, typeHnd, typeHnd != nullptr, !tiVerificationNeeded);
+ }
+ else
+ {
+ varDsc->lvType = type;
+ }
+
+#if OPT_BOOL_OPS
+ if (type == TYP_BOOL)
+ {
+ varDsc->lvIsBoolean = true;
+ }
+#endif
+
+#ifdef DEBUG
+ varDsc->lvStkOffs = BAD_STK_OFFS;
+#endif
+}
+
+/*****************************************************************************
+ * Returns our internal varNum for a given IL variable.
+ * Asserts assume it is called after lvaTable[] has been set up.
+ */
+
+unsigned Compiler::compMapILvarNum(unsigned ILvarNum)
+{
+ noway_assert(ILvarNum < info.compILlocalsCount || ILvarNum > unsigned(ICorDebugInfo::UNKNOWN_ILNUM));
+
+ unsigned varNum;
+
+ if (ILvarNum == (unsigned)ICorDebugInfo::VARARGS_HND_ILNUM)
+ {
+ // The varargs cookie is the last argument in lvaTable[]
+ noway_assert(info.compIsVarArgs);
+
+ varNum = lvaVarargsHandleArg;
+ noway_assert(lvaTable[varNum].lvIsParam);
+ }
+ else if (ILvarNum == (unsigned)ICorDebugInfo::RETBUF_ILNUM)
+ {
+ noway_assert(info.compRetBuffArg != BAD_VAR_NUM);
+ varNum = info.compRetBuffArg;
+ }
+ else if (ILvarNum == (unsigned)ICorDebugInfo::TYPECTXT_ILNUM)
+ {
+ noway_assert(info.compTypeCtxtArg >= 0);
+ varNum = unsigned(info.compTypeCtxtArg);
+ }
+ else if (ILvarNum < info.compILargsCount)
+ {
+ // Parameter
+ varNum = compMapILargNum(ILvarNum);
+ noway_assert(lvaTable[varNum].lvIsParam);
+ }
+ else if (ILvarNum < info.compILlocalsCount)
+ {
+ // Local variable
+ unsigned lclNum = ILvarNum - info.compILargsCount;
+ varNum = info.compArgsCount + lclNum;
+ noway_assert(!lvaTable[varNum].lvIsParam);
+ }
+ else
+ {
+ unreached();
+ }
+
+ noway_assert(varNum < info.compLocalsCount);
+ return varNum;
+}
+
+/*****************************************************************************
+ * Returns the IL variable number given our internal varNum.
+ * Special return values are VARG_ILNUM, RETBUF_ILNUM, TYPECTXT_ILNUM.
+ *
+ * Returns UNKNOWN_ILNUM if it can't be mapped.
+ */
+
+unsigned Compiler::compMap2ILvarNum(unsigned varNum)
+{
+ if (compIsForInlining())
+ {
+ return impInlineInfo->InlinerCompiler->compMap2ILvarNum(varNum);
+ }
+
+ noway_assert(varNum < lvaCount);
+
+ if (varNum == info.compRetBuffArg)
+ {
+ return (unsigned)ICorDebugInfo::RETBUF_ILNUM;
+ }
+
+ // Is this a varargs function?
+ if (info.compIsVarArgs && varNum == lvaVarargsHandleArg)
+ {
+ return (unsigned)ICorDebugInfo::VARARGS_HND_ILNUM;
+ }
+
+ // We create an extra argument for the type context parameter
+ // needed for shared generic code.
+ if ((info.compMethodInfo->args.callConv & CORINFO_CALLCONV_PARAMTYPE) && varNum == (unsigned)info.compTypeCtxtArg)
+ {
+ return (unsigned)ICorDebugInfo::TYPECTXT_ILNUM;
+ }
+
+ // Now mutate varNum to remove extra parameters from the count.
+ if ((info.compMethodInfo->args.callConv & CORINFO_CALLCONV_PARAMTYPE) && varNum > (unsigned)info.compTypeCtxtArg)
+ {
+ varNum--;
+ }
+
+ if (info.compIsVarArgs && varNum > lvaVarargsHandleArg)
+ {
+ varNum--;
+ }
+
+ /* Is there a hidden argument for the return buffer.
+ Note that this code works because if the RetBuffArg is not present,
+ compRetBuffArg will be BAD_VAR_NUM */
+ if (info.compRetBuffArg != BAD_VAR_NUM && varNum > info.compRetBuffArg)
+ {
+ varNum--;
+ }
+
+ if (varNum >= info.compLocalsCount)
+ {
+ return (unsigned)ICorDebugInfo::UNKNOWN_ILNUM; // Cannot be mapped
+ }
+
+ return varNum;
+}
+
+/*****************************************************************************
+ * Returns true if variable "varNum" may be address-exposed.
+ */
+
+bool Compiler::lvaVarAddrExposed(unsigned varNum)
+{
+ noway_assert(varNum < lvaCount);
+ LclVarDsc* varDsc = &lvaTable[varNum];
+
+ return varDsc->lvAddrExposed;
+}
+
+/*****************************************************************************
+ * Returns true iff variable "varNum" should not be enregistered (or one of several reasons).
+ */
+
+bool Compiler::lvaVarDoNotEnregister(unsigned varNum)
+{
+ noway_assert(varNum < lvaCount);
+ LclVarDsc* varDsc = &lvaTable[varNum];
+
+ return varDsc->lvDoNotEnregister;
+}
+
+/*****************************************************************************
+ * Returns the handle to the class of the local variable varNum
+ */
+
+CORINFO_CLASS_HANDLE Compiler::lvaGetStruct(unsigned varNum)
+{
+ noway_assert(varNum < lvaCount);
+ LclVarDsc* varDsc = &lvaTable[varNum];
+
+ return varDsc->lvVerTypeInfo.GetClassHandleForValueClass();
+}
+
+/*****************************************************************************
+ *
+ * Compare function passed to qsort() by Compiler::lvaCanPromoteStructVar().
+ */
+
+/* static */
+int __cdecl Compiler::lvaFieldOffsetCmp(const void* field1, const void* field2)
+{
+ lvaStructFieldInfo* pFieldInfo1 = (lvaStructFieldInfo*)field1;
+ lvaStructFieldInfo* pFieldInfo2 = (lvaStructFieldInfo*)field2;
+
+ if (pFieldInfo1->fldOffset == pFieldInfo2->fldOffset)
+ {
+ return 0;
+ }
+ else
+ {
+ return (pFieldInfo1->fldOffset > pFieldInfo2->fldOffset) ? +1 : -1;
+ }
+}
+
+/*****************************************************************************
+ * Is this type promotable? */
+
+void Compiler::lvaCanPromoteStructType(CORINFO_CLASS_HANDLE typeHnd,
+ lvaStructPromotionInfo* StructPromotionInfo,
+ bool sortFields)
+{
+ assert(eeIsValueClass(typeHnd));
+
+ if (typeHnd != StructPromotionInfo->typeHnd)
+ {
+ // sizeof(double) represents the size of the largest primitive type that we can struct promote
+ // In the future this may be changing to XMM_REGSIZE_BYTES
+ const int MaxOffset = MAX_NumOfFieldsInPromotableStruct * sizeof(double); // must be a compile time constant
+
+ assert((BYTE)MaxOffset == MaxOffset); // because lvaStructFieldInfo.fldOffset is byte-sized
+ assert((BYTE)MAX_NumOfFieldsInPromotableStruct ==
+ MAX_NumOfFieldsInPromotableStruct); // because lvaStructFieldInfo.fieldCnt is byte-sized
+
+ bool requiresScratchVar = false;
+ bool containsHoles = false;
+ bool customLayout = false;
+ bool containsGCpointers = false;
+
+ StructPromotionInfo->typeHnd = typeHnd;
+ StructPromotionInfo->canPromote = false;
+
+ unsigned structSize = info.compCompHnd->getClassSize(typeHnd);
+ if (structSize >= MaxOffset)
+ {
+ return; // struct is too large
+ }
+
+ unsigned fieldCnt = info.compCompHnd->getClassNumInstanceFields(typeHnd);
+ if (fieldCnt == 0 || fieldCnt > MAX_NumOfFieldsInPromotableStruct)
+ {
+ return; // struct must have between 1 and MAX_NumOfFieldsInPromotableStruct fields
+ }
+
+ StructPromotionInfo->fieldCnt = (BYTE)fieldCnt;
+ DWORD typeFlags = info.compCompHnd->getClassAttribs(typeHnd);
+
+ bool treatAsOverlapping = StructHasOverlappingFields(typeFlags);
+
+#if 1 // TODO-Cleanup: Consider removing this entire #if block in the future
+
+ // This method has two callers. The one in Importer.cpp passes sortFields == false
+ // and the other passes sortFields == true.
+ // This is a workaround that leave the inlining behavior the same and before while still
+ // performing extra struct promotions when compiling the method.
+ //
+ if (!sortFields) // the condition "!sortFields" really means "we are inlining"
+ {
+ treatAsOverlapping = StructHasCustomLayout(typeFlags);
+ }
+#endif
+
+ if (treatAsOverlapping)
+ {
+ return;
+ }
+
+ // Don't struct promote if we have an CUSTOMLAYOUT flag on an HFA type
+ if (StructHasCustomLayout(typeFlags) && IsHfa(typeHnd))
+ {
+ return;
+ }
+
+#ifdef _TARGET_ARM_
+ // On ARM, we have a requirement on the struct alignment; see below.
+ unsigned structAlignment =
+ roundUp(info.compCompHnd->getClassAlignmentRequirement(typeHnd), TARGET_POINTER_SIZE);
+#endif // _TARGET_ARM
+
+ bool isHole[MaxOffset]; // isHole[] is initialized to true for every valid offset in the struct and false for
+ // the rest
+ unsigned i; // then as we process the fields we clear the isHole[] values that the field spans.
+ for (i = 0; i < MaxOffset; i++)
+ {
+ isHole[i] = (i < structSize) ? true : false;
+ }
+
+ for (BYTE ordinal = 0; ordinal < fieldCnt; ++ordinal)
+ {
+ lvaStructFieldInfo* pFieldInfo = &StructPromotionInfo->fields[ordinal];
+ pFieldInfo->fldHnd = info.compCompHnd->getFieldInClass(typeHnd, ordinal);
+ unsigned fldOffset = info.compCompHnd->getFieldOffset(pFieldInfo->fldHnd);
+
+ // The fldOffset value should never be larger than our structSize.
+ if (fldOffset >= structSize)
+ {
+ noway_assert(false);
+ return;
+ }
+
+ pFieldInfo->fldOffset = (BYTE)fldOffset;
+ pFieldInfo->fldOrdinal = ordinal;
+ CorInfoType corType = info.compCompHnd->getFieldType(pFieldInfo->fldHnd, &pFieldInfo->fldTypeHnd);
+ var_types varType = JITtype2varType(corType);
+ pFieldInfo->fldType = varType;
+ pFieldInfo->fldSize = genTypeSize(varType);
+
+ if (varTypeIsGC(varType))
+ {
+ containsGCpointers = true;
+ }
+
+ if (pFieldInfo->fldSize == 0)
+ {
+ // Non-primitive struct field. Don't promote.
+ return;
+ }
+
+ if ((pFieldInfo->fldOffset % pFieldInfo->fldSize) != 0)
+ {
+ // The code in Compiler::genPushArgList that reconstitutes
+ // struct values on the stack from promoted fields expects
+ // those fields to be at their natural alignment.
+ return;
+ }
+
+ // The end offset for this field should never be larger than our structSize.
+ noway_assert(fldOffset + pFieldInfo->fldSize <= structSize);
+
+ for (i = 0; i < pFieldInfo->fldSize; i++)
+ {
+ isHole[fldOffset + i] = false;
+ }
+
+#ifdef _TARGET_ARM_
+ // On ARM, for struct types that don't use explicit layout, the alignment of the struct is
+ // at least the max alignment of its fields. We take advantage of this invariant in struct promotion,
+ // so verify it here.
+ if (pFieldInfo->fldSize > structAlignment)
+ {
+ // Don't promote vars whose struct types violates the invariant. (Alignment == size for primitives.)
+ return;
+ }
+ // If we have any small fields we will allocate a single PromotedStructScratch local var for the method.
+ // This is a stack area that we use to assemble the small fields in order to place them in a register
+ // argument.
+ //
+ if (pFieldInfo->fldSize < TARGET_POINTER_SIZE)
+ {
+ requiresScratchVar = true;
+ }
+#endif // _TARGET_ARM_
+ }
+
+ // If we saw any GC pointer fields above then the CORINFO_FLG_CONTAINS_GC_PTR has to be set!
+ noway_assert((containsGCpointers == false) || ((typeFlags & CORINFO_FLG_CONTAINS_GC_PTR) != 0));
+
+ // If we have "Custom Layout" then we might have an explicit Size attribute
+ // Managed C++ uses this for its structs, such C++ types will not contain GC pointers.
+ //
+ // The current VM implementation also incorrectly sets the CORINFO_FLG_CUSTOMLAYOUT
+ // whenever a managed value class contains any GC pointers.
+ // (See the comment for VMFLAG_NOT_TIGHTLY_PACKED in class.h)
+ //
+ // It is important to struct promote managed value classes that have GC pointers
+ // So we compute the correct value for "CustomLayout" here
+ //
+ if (StructHasCustomLayout(typeFlags) && ((typeFlags & CORINFO_FLG_CONTAINS_GC_PTR) == 0))
+ {
+ customLayout = true;
+ }
+
+ // Check if this promoted struct contains any holes
+ //
+ for (i = 0; i < structSize; i++)
+ {
+ if (isHole[i])
+ {
+ containsHoles = true;
+ break;
+ }
+ }
+
+ // Cool, this struct is promotable.
+ StructPromotionInfo->canPromote = true;
+ StructPromotionInfo->requiresScratchVar = requiresScratchVar;
+ StructPromotionInfo->containsHoles = containsHoles;
+ StructPromotionInfo->customLayout = customLayout;
+
+ if (sortFields)
+ {
+ // Sort the fields according to the increasing order of the field offset.
+ // This is needed because the fields need to be pushed on stack (when referenced
+ // as a struct) in order.
+ qsort(StructPromotionInfo->fields, StructPromotionInfo->fieldCnt, sizeof(*StructPromotionInfo->fields),
+ lvaFieldOffsetCmp);
+ }
+ }
+ else
+ {
+ // Asking for the same type of struct as the last time.
+ // Nothing need to be done.
+ // Fall through ...
+ }
+}
+
+/*****************************************************************************
+ * Is this struct type local variable promotable? */
+
+void Compiler::lvaCanPromoteStructVar(unsigned lclNum, lvaStructPromotionInfo* StructPromotionInfo)
+{
+ noway_assert(lclNum < lvaCount);
+
+ LclVarDsc* varDsc = &lvaTable[lclNum];
+
+ noway_assert(varTypeIsStruct(varDsc));
+ noway_assert(!varDsc->lvPromoted); // Don't ask again :)
+
+#ifdef FEATURE_SIMD
+ // If this lclVar is used in a SIMD intrinsic, then we don't want to struct promote it.
+ // Note, however, that SIMD lclVars that are NOT used in a SIMD intrinsic may be
+ // profitably promoted.
+ if (varDsc->lvIsUsedInSIMDIntrinsic())
+ {
+ StructPromotionInfo->canPromote = false;
+ return;
+ }
+
+#endif
+
+ // TODO-PERF - Allow struct promotion for HFA register arguments
+
+ // Explicitly check for HFA reg args and reject them for promotion here.
+ // Promoting HFA args will fire an assert in lvaAssignFrameOffsets
+ // when the HFA reg arg is struct promoted.
+ //
+ if (varDsc->lvIsHfaRegArg())
+ {
+ StructPromotionInfo->canPromote = false;
+ return;
+ }
+
+ CORINFO_CLASS_HANDLE typeHnd = varDsc->lvVerTypeInfo.GetClassHandle();
+ lvaCanPromoteStructType(typeHnd, StructPromotionInfo, true);
+}
+
+/*****************************************************************************
+ * Promote a struct type local */
+
+void Compiler::lvaPromoteStructVar(unsigned lclNum, lvaStructPromotionInfo* StructPromotionInfo)
+{
+ LclVarDsc* varDsc = &lvaTable[lclNum];
+
+ // We should never see a reg-sized non-field-addressed struct here.
+ noway_assert(!varDsc->lvRegStruct);
+
+ noway_assert(StructPromotionInfo->canPromote);
+ noway_assert(StructPromotionInfo->typeHnd == varDsc->lvVerTypeInfo.GetClassHandle());
+
+ varDsc->lvFieldCnt = StructPromotionInfo->fieldCnt;
+ varDsc->lvFieldLclStart = lvaCount;
+ varDsc->lvPromoted = true;
+ varDsc->lvContainsHoles = StructPromotionInfo->containsHoles;
+ varDsc->lvCustomLayout = StructPromotionInfo->customLayout;
+
+#ifdef DEBUG
+ // Don't change the source to a TYP_BLK either.
+ varDsc->lvKeepType = 1;
+#endif
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nPromoting struct local V%02u (%s):", lclNum, eeGetClassName(StructPromotionInfo->typeHnd));
+ }
+#endif
+
+ for (unsigned index = 0; index < StructPromotionInfo->fieldCnt; ++index)
+ {
+ lvaStructFieldInfo* pFieldInfo = &StructPromotionInfo->fields[index];
+
+ if (varTypeIsFloating(pFieldInfo->fldType))
+ {
+ lvaTable[lclNum].lvContainsFloatingFields = 1;
+ // Whenever we promote a struct that contains a floating point field
+ // it's possible we transition from a method that originally only had integer
+ // local vars to start having FP. We have to communicate this through this flag
+ // since LSRA later on will use this flag to determine whether or not to track FP register sets.
+ compFloatingPointUsed = true;
+ }
+
+// Now grab the temp for the field local.
+
+#ifdef DEBUG
+ char buf[200];
+ char* bufp = &buf[0];
+
+ sprintf_s(bufp, sizeof(buf), "%s V%02u.%s (fldOffset=0x%x)", "field", lclNum,
+ eeGetFieldName(pFieldInfo->fldHnd), pFieldInfo->fldOffset);
+
+ if (index > 0)
+ {
+ noway_assert(pFieldInfo->fldOffset > (pFieldInfo - 1)->fldOffset);
+ }
+#endif
+
+ unsigned varNum = lvaGrabTemp(false DEBUGARG(bufp)); // Lifetime of field locals might span multiple BBs, so
+ // they are long lifetime temps.
+
+ LclVarDsc* fieldVarDsc = &lvaTable[varNum];
+ fieldVarDsc->lvType = pFieldInfo->fldType;
+ fieldVarDsc->lvExactSize = pFieldInfo->fldSize;
+ fieldVarDsc->lvIsStructField = true;
+ fieldVarDsc->lvFldOffset = pFieldInfo->fldOffset;
+ fieldVarDsc->lvFldOrdinal = pFieldInfo->fldOrdinal;
+ fieldVarDsc->lvParentLcl = lclNum;
+ fieldVarDsc->lvIsParam = varDsc->lvIsParam;
+#if defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_)
+ // Do we have a parameter that can be enregistered?
+ //
+ if (varDsc->lvIsRegArg)
+ {
+ fieldVarDsc->lvIsRegArg = true;
+ fieldVarDsc->lvArgReg = varDsc->lvArgReg;
+ fieldVarDsc->setPrefReg(varDsc->lvArgReg, this); // Set the preferred register
+
+ lvaMarkRefsWeight = BB_UNITY_WEIGHT; // incRefCnts can use this compiler global variable
+ fieldVarDsc->incRefCnts(BB_UNITY_WEIGHT, this); // increment the ref count for prolog initialization
+ }
+#endif
+
+#ifdef DEBUG
+ // This temporary should not be converted to a double in stress mode,
+ // because we introduce assigns to it after the stress conversion
+ fieldVarDsc->lvKeepType = 1;
+#endif
+ }
+}
+
+#if !defined(_TARGET_64BIT_)
+//------------------------------------------------------------------------
+// lvaPromoteLongVars: "Struct promote" all register candidate longs as if they are structs of two ints.
+//
+// Arguments:
+// None.
+//
+// Return Value:
+// None.
+//
+void Compiler::lvaPromoteLongVars()
+{
+ if ((opts.compFlags & CLFLG_REGVAR) == 0)
+ {
+ return;
+ }
+ // The lvaTable might grow as we grab temps. Make a local copy here.
+ unsigned startLvaCount = lvaCount;
+ for (unsigned lclNum = 0; lclNum < startLvaCount; lclNum++)
+ {
+ LclVarDsc* varDsc = &lvaTable[lclNum];
+ if (!varTypeIsLong(varDsc) || varDsc->lvDoNotEnregister || varDsc->lvIsMultiRegArgOrRet() ||
+ (varDsc->lvRefCnt == 0))
+ {
+ continue;
+ }
+
+ // Will this work ???
+ // We can't have nested promoted structs.
+ if (varDsc->lvIsStructField)
+ {
+ if (lvaGetPromotionType(varDsc->lvParentLcl) != PROMOTION_TYPE_INDEPENDENT)
+ {
+ continue;
+ }
+ varDsc->lvIsStructField = false;
+ varDsc->lvTracked = false;
+ }
+
+ varDsc->lvFieldCnt = 2;
+ varDsc->lvFieldLclStart = lvaCount;
+ varDsc->lvPromoted = true;
+ varDsc->lvContainsHoles = false;
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nPromoting long local V%02u:", lclNum);
+ }
+#endif
+
+ bool isParam = varDsc->lvIsParam;
+
+ for (unsigned index = 0; index < 2; ++index)
+ {
+ // Grab the temp for the field local.
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef DEBUG
+ char buf[200];
+ char* bufp = &buf[0];
+
+ sprintf_s(bufp, sizeof(buf), "%s V%02u.%s (fldOffset=0x%x)", "field", lclNum, index == 0 ? "lo" : "hi",
+ index * 4);
+#endif
+ unsigned varNum = lvaGrabTemp(false DEBUGARG(bufp)); // Lifetime of field locals might span multiple BBs, so
+ // they are long lifetime temps.
+
+ LclVarDsc* fieldVarDsc = &lvaTable[varNum];
+ fieldVarDsc->lvType = TYP_INT;
+ fieldVarDsc->lvExactSize = genTypeSize(TYP_INT);
+ fieldVarDsc->lvIsStructField = true;
+ fieldVarDsc->lvFldOffset = (unsigned char)(index * genTypeSize(TYP_INT));
+ fieldVarDsc->lvFldOrdinal = (unsigned char)index;
+ fieldVarDsc->lvParentLcl = lclNum;
+ fieldVarDsc->lvIsParam = isParam;
+ }
+ }
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nlvaTable after lvaPromoteLongVars\n");
+ lvaTableDump();
+ }
+#endif // DEBUG
+}
+#endif // !_TARGET_64BIT_
+
+/*****************************************************************************
+ * Given a fldOffset in a promoted struct var, return the index of the local
+ that represents this field.
+*/
+
+unsigned Compiler::lvaGetFieldLocal(LclVarDsc* varDsc, unsigned int fldOffset)
+{
+ noway_assert(varTypeIsStruct(varDsc));
+ noway_assert(varDsc->lvPromoted);
+
+ for (unsigned i = varDsc->lvFieldLclStart; i < varDsc->lvFieldLclStart + varDsc->lvFieldCnt; ++i)
+ {
+ noway_assert(lvaTable[i].lvIsStructField);
+ noway_assert(lvaTable[i].lvParentLcl == (unsigned)(varDsc - lvaTable));
+ if (lvaTable[i].lvFldOffset == fldOffset)
+ {
+ return i;
+ }
+ }
+
+ // This is the not-found error return path, the caller should check for BAD_VAR_NUM
+ return BAD_VAR_NUM;
+}
+
+/*****************************************************************************
+ *
+ * Set the local var "varNum" as address-exposed.
+ * If this is a promoted struct, label it's fields the same way.
+ */
+
+void Compiler::lvaSetVarAddrExposed(unsigned varNum)
+{
+ noway_assert(varNum < lvaCount);
+
+ LclVarDsc* varDsc = &lvaTable[varNum];
+
+ varDsc->lvAddrExposed = 1;
+
+ if (varDsc->lvPromoted)
+ {
+ noway_assert(varTypeIsStruct(varDsc));
+
+ for (unsigned i = varDsc->lvFieldLclStart; i < varDsc->lvFieldLclStart + varDsc->lvFieldCnt; ++i)
+ {
+ noway_assert(lvaTable[i].lvIsStructField);
+ lvaTable[i].lvAddrExposed = 1; // Make field local as address-exposed.
+ lvaSetVarDoNotEnregister(i DEBUGARG(DNER_AddrExposed));
+ }
+ }
+
+ lvaSetVarDoNotEnregister(varNum DEBUGARG(DNER_AddrExposed));
+}
+
+/*****************************************************************************
+ *
+ * Record that the local var "varNum" should not be enregistered (for one of several reasons.)
+ */
+
+void Compiler::lvaSetVarDoNotEnregister(unsigned varNum DEBUGARG(DoNotEnregisterReason reason))
+{
+ noway_assert(varNum < lvaCount);
+ LclVarDsc* varDsc = &lvaTable[varNum];
+ varDsc->lvDoNotEnregister = 1;
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nLocal V%02u should not be enregistered because: ", varNum);
+ }
+ switch (reason)
+ {
+ case DNER_AddrExposed:
+ JITDUMP("it is address exposed\n");
+ assert(varDsc->lvAddrExposed);
+ break;
+ case DNER_IsStruct:
+ JITDUMP("it is a struct\n");
+ assert(varTypeIsStruct(varDsc));
+ break;
+ case DNER_BlockOp:
+ JITDUMP("written in a block op\n");
+ varDsc->lvLclBlockOpAddr = 1;
+ break;
+ case DNER_LocalField:
+ JITDUMP("was accessed as a local field\n");
+ varDsc->lvLclFieldExpr = 1;
+ break;
+ case DNER_VMNeedsStackAddr:
+ JITDUMP("needs stack addr\n");
+ varDsc->lvVMNeedsStackAddr = 1;
+ break;
+ case DNER_LiveInOutOfHandler:
+ JITDUMP("live in/out of a handler\n");
+ varDsc->lvLiveInOutOfHndlr = 1;
+ break;
+ case DNER_LiveAcrossUnmanagedCall:
+ JITDUMP("live across unmanaged call\n");
+ varDsc->lvLiveAcrossUCall = 1;
+ break;
+#ifdef JIT32_GCENCODER
+ case DNER_PinningRef:
+ JITDUMP("pinning ref\n");
+ assert(varDsc->lvPinned);
+ break;
+#endif
+ default:
+ unreached();
+ break;
+ }
+#endif
+}
+
+// Returns true if this local var is a multireg struct
+bool Compiler::lvaIsMultiregStruct(LclVarDsc* varDsc)
+{
+ if (varDsc->TypeGet() == TYP_STRUCT)
+ {
+ CORINFO_CLASS_HANDLE clsHnd = varDsc->lvVerTypeInfo.GetClassHandleForValueClass();
+ structPassingKind howToPassStruct;
+
+ var_types type = getArgTypeForStruct(clsHnd, &howToPassStruct, varDsc->lvExactSize);
+
+ if (howToPassStruct == SPK_ByValueAsHfa)
+ {
+ assert(type = TYP_STRUCT);
+ return true;
+ }
+
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) || defined(_TARGET_ARM64_)
+ if (howToPassStruct == SPK_ByValue)
+ {
+ assert(type = TYP_STRUCT);
+ return true;
+ }
+#endif
+ }
+ return false;
+}
+
+/*****************************************************************************
+ * Set the lvClass for a local variable of a struct type */
+
+void Compiler::lvaSetStruct(unsigned varNum, CORINFO_CLASS_HANDLE typeHnd, bool unsafeValueClsCheck, bool setTypeInfo)
+{
+ noway_assert(varNum < lvaCount);
+
+ LclVarDsc* varDsc = &lvaTable[varNum];
+ if (setTypeInfo)
+ {
+ varDsc->lvVerTypeInfo = typeInfo(TI_STRUCT, typeHnd);
+ }
+
+ // Set the type and associated info if we haven't already set it.
+ var_types structType = varDsc->lvType;
+ if (varDsc->lvType == TYP_UNDEF)
+ {
+ varDsc->lvType = TYP_STRUCT;
+ }
+ if (varDsc->lvExactSize == 0)
+ {
+ varDsc->lvExactSize = info.compCompHnd->getClassSize(typeHnd);
+
+ size_t lvSize = varDsc->lvSize();
+ assert((lvSize % sizeof(void*)) ==
+ 0); // The struct needs to be a multiple of sizeof(void*) bytes for getClassGClayout() to be valid.
+ varDsc->lvGcLayout = (BYTE*)compGetMemA((lvSize / sizeof(void*)) * sizeof(BYTE), CMK_LvaTable);
+ unsigned numGCVars;
+ var_types simdBaseType = TYP_UNKNOWN;
+ varDsc->lvType = impNormStructType(typeHnd, varDsc->lvGcLayout, &numGCVars, &simdBaseType);
+
+ // We only save the count of GC vars in a struct up to 7.
+ if (numGCVars >= 8)
+ {
+ numGCVars = 7;
+ }
+ varDsc->lvStructGcCount = numGCVars;
+#if FEATURE_SIMD
+ if (simdBaseType != TYP_UNKNOWN)
+ {
+ assert(varTypeIsSIMD(varDsc));
+ varDsc->lvSIMDType = true;
+ varDsc->lvBaseType = simdBaseType;
+ }
+#endif // FEATURE_SIMD
+#ifdef FEATURE_HFA
+ // for structs that are small enough, we check and set lvIsHfa and lvHfaTypeIsFloat
+ if (varDsc->lvExactSize <= MAX_PASS_MULTIREG_BYTES)
+ {
+ var_types hfaType = GetHfaType(typeHnd); // set to float or double if it is an HFA, otherwise TYP_UNDEF
+ if (varTypeIsFloating(hfaType))
+ {
+ varDsc->_lvIsHfa = true;
+ varDsc->lvSetHfaTypeIsFloat(hfaType == TYP_FLOAT);
+
+ // hfa variables can never contain GC pointers
+ assert(varDsc->lvStructGcCount == 0);
+ // The size of this struct should be evenly divisible by 4 or 8
+ assert((varDsc->lvExactSize % genTypeSize(hfaType)) == 0);
+ // The number of elements in the HFA should fit into our MAX_ARG_REG_COUNT limit
+ assert((varDsc->lvExactSize / genTypeSize(hfaType)) <= MAX_ARG_REG_COUNT);
+ }
+ }
+#endif // FEATURE_HFA
+ }
+ else
+ {
+ assert(varDsc->lvExactSize != 0);
+#if FEATURE_SIMD
+ assert(!varTypeIsSIMD(varDsc) || (varDsc->lvBaseType != TYP_UNKNOWN));
+#endif // FEATURE_SIMD
+ }
+
+#ifndef _TARGET_64BIT_
+ bool fDoubleAlignHint = FALSE;
+#ifdef _TARGET_X86_
+ fDoubleAlignHint = TRUE;
+#endif
+
+ if (info.compCompHnd->getClassAlignmentRequirement(typeHnd, fDoubleAlignHint) == 8)
+ {
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Marking struct in V%02i with double align flag\n", varNum);
+ }
+#endif
+ varDsc->lvStructDoubleAlign = 1;
+ }
+#endif // not _TARGET_64BIT_
+
+ unsigned classAttribs = info.compCompHnd->getClassAttribs(typeHnd);
+
+ varDsc->lvOverlappingFields = StructHasOverlappingFields(classAttribs);
+
+ // Check whether this local is an unsafe value type and requires GS cookie protection.
+ // GS checks require the stack to be re-ordered, which can't be done with EnC.
+ if (unsafeValueClsCheck && (classAttribs & CORINFO_FLG_UNSAFE_VALUECLASS) && !opts.compDbgEnC)
+ {
+ setNeedsGSSecurityCookie();
+ compGSReorderStackLayout = true;
+ varDsc->lvIsUnsafeBuffer = true;
+ }
+}
+
+/*****************************************************************************
+ * Returns the array of BYTEs containing the GC layout information
+ */
+
+BYTE* Compiler::lvaGetGcLayout(unsigned varNum)
+{
+ noway_assert(varTypeIsStruct(lvaTable[varNum].lvType) && (lvaTable[varNum].lvExactSize >= TARGET_POINTER_SIZE));
+
+ return lvaTable[varNum].lvGcLayout;
+}
+
+/*****************************************************************************
+ * Return the number of bytes needed for a local variable
+ */
+
+unsigned Compiler::lvaLclSize(unsigned varNum)
+{
+ noway_assert(varNum < lvaCount);
+
+ var_types varType = lvaTable[varNum].TypeGet();
+
+ switch (varType)
+ {
+ case TYP_STRUCT:
+ case TYP_BLK:
+ return lvaTable[varNum].lvSize();
+
+ case TYP_LCLBLK:
+#if FEATURE_FIXED_OUT_ARGS
+ noway_assert(lvaOutgoingArgSpaceSize >= 0);
+ noway_assert(varNum == lvaOutgoingArgSpaceVar);
+ return lvaOutgoingArgSpaceSize;
+
+#else // FEATURE_FIXED_OUT_ARGS
+ assert(!"Unknown size");
+ NO_WAY("Target doesn't support TYP_LCLBLK");
+
+ // Keep prefast happy
+ __fallthrough;
+
+#endif // FEATURE_FIXED_OUT_ARGS
+
+ default: // This must be a primitive var. Fall out of switch statement
+ break;
+ }
+#ifdef _TARGET_64BIT_
+ // We only need this Quirk for _TARGET_64BIT_
+ if (lvaTable[varNum].lvQuirkToLong)
+ {
+ noway_assert(lvaTable[varNum].lvAddrExposed);
+ return genTypeStSz(TYP_LONG) * sizeof(int); // return 8 (2 * 4)
+ }
+#endif
+ return genTypeStSz(varType) * sizeof(int);
+}
+
+//
+// Return the exact width of local variable "varNum" -- the number of bytes
+// you'd need to copy in order to overwrite the value.
+//
+unsigned Compiler::lvaLclExactSize(unsigned varNum)
+{
+ noway_assert(varNum < lvaCount);
+
+ var_types varType = lvaTable[varNum].TypeGet();
+
+ switch (varType)
+ {
+ case TYP_STRUCT:
+ case TYP_BLK:
+ return lvaTable[varNum].lvExactSize;
+
+ case TYP_LCLBLK:
+#if FEATURE_FIXED_OUT_ARGS
+ noway_assert(lvaOutgoingArgSpaceSize >= 0);
+ noway_assert(varNum == lvaOutgoingArgSpaceVar);
+ return lvaOutgoingArgSpaceSize;
+
+#else // FEATURE_FIXED_OUT_ARGS
+ assert(!"Unknown size");
+ NO_WAY("Target doesn't support TYP_LCLBLK");
+
+ // Keep prefast happy
+ __fallthrough;
+
+#endif // FEATURE_FIXED_OUT_ARGS
+
+ default: // This must be a primitive var. Fall out of switch statement
+ break;
+ }
+
+ return genTypeSize(varType);
+}
+
+// getBBWeight -- get the normalized weight of this block
+unsigned BasicBlock::getBBWeight(Compiler* comp)
+{
+ if (this->bbWeight == 0)
+ {
+ return 0;
+ }
+ else
+ {
+ unsigned calledWeight = comp->fgCalledWeight;
+ if (calledWeight == 0)
+ {
+ calledWeight = comp->fgFirstBB->bbWeight;
+ if (calledWeight == 0)
+ {
+ calledWeight = BB_UNITY_WEIGHT;
+ }
+ }
+ if (this->bbWeight < (BB_MAX_WEIGHT / BB_UNITY_WEIGHT))
+ {
+ return max(1, (((this->bbWeight * BB_UNITY_WEIGHT) + (calledWeight / 2)) / calledWeight));
+ }
+ else
+ {
+ return (unsigned)((((double)this->bbWeight * (double)BB_UNITY_WEIGHT) / (double)calledWeight) + 0.5);
+ }
+ }
+}
+
+/*****************************************************************************
+ *
+ * Callback used by the tree walker to call lvaDecRefCnts
+ */
+Compiler::fgWalkResult Compiler::lvaDecRefCntsCB(GenTreePtr* pTree, fgWalkData* data)
+{
+ data->compiler->lvaDecRefCnts(*pTree);
+ return WALK_CONTINUE;
+}
+
+// Decrement the ref counts for all locals contained in the tree and its children.
+void Compiler::lvaRecursiveDecRefCounts(GenTreePtr tree)
+{
+ assert(lvaLocalVarRefCounted);
+
+ // We could just use the recursive walker for all cases but that is a
+ // fairly heavyweight thing to spin up when we're usually just handling a leaf.
+ if (tree->OperIsLeaf())
+ {
+ if (tree->OperIsLocal())
+ {
+ lvaDecRefCnts(tree);
+ }
+ }
+ else
+ {
+ fgWalkTreePre(&tree, Compiler::lvaDecRefCntsCB, (void*)this, true);
+ }
+}
+
+// Increment the ref counts for all locals contained in the tree and its children.
+void Compiler::lvaRecursiveIncRefCounts(GenTreePtr tree)
+{
+ assert(lvaLocalVarRefCounted);
+
+ // We could just use the recursive walker for all cases but that is a
+ // fairly heavyweight thing to spin up when we're usually just handling a leaf.
+ if (tree->OperIsLeaf())
+ {
+ if (tree->OperIsLocal())
+ {
+ lvaIncRefCnts(tree);
+ }
+ }
+ else
+ {
+ fgWalkTreePre(&tree, Compiler::lvaIncRefCntsCB, (void*)this, true);
+ }
+}
+
+/*****************************************************************************
+ *
+ * Helper passed to the tree walker to decrement the refCnts for
+ * all local variables in an expression
+ */
+void Compiler::lvaDecRefCnts(GenTreePtr tree)
+{
+ assert(compCurBB != nullptr);
+ lvaDecRefCnts(compCurBB, tree);
+}
+
+void Compiler::lvaDecRefCnts(BasicBlock* block, GenTreePtr tree)
+{
+ assert(block != nullptr);
+ assert(tree != nullptr);
+
+ unsigned lclNum;
+ LclVarDsc* varDsc;
+
+ noway_assert(lvaRefCountingStarted || lvaLocalVarRefCounted);
+
+ if ((tree->gtOper == GT_CALL) && (tree->gtFlags & GTF_CALL_UNMANAGED))
+ {
+ assert((!opts.ShouldUsePInvokeHelpers()) || (info.compLvFrameListRoot == BAD_VAR_NUM));
+ if (!opts.ShouldUsePInvokeHelpers())
+ {
+ /* Get the special variable descriptor */
+
+ lclNum = info.compLvFrameListRoot;
+
+ noway_assert(lclNum <= lvaCount);
+ varDsc = lvaTable + lclNum;
+
+ /* Decrement the reference counts twice */
+
+ varDsc->decRefCnts(block->getBBWeight(this), this);
+ varDsc->decRefCnts(block->getBBWeight(this), this);
+ }
+ }
+ else
+ {
+ /* This must be a local variable */
+
+ noway_assert(tree->OperIsLocal());
+
+ /* Get the variable descriptor */
+
+ lclNum = tree->gtLclVarCommon.gtLclNum;
+
+ noway_assert(lclNum < lvaCount);
+ varDsc = lvaTable + lclNum;
+
+ /* Decrement its lvRefCnt and lvRefCntWtd */
+
+ varDsc->decRefCnts(block->getBBWeight(this), this);
+ }
+}
+
+/*****************************************************************************
+ *
+ * Callback used by the tree walker to call lvaIncRefCnts
+ */
+Compiler::fgWalkResult Compiler::lvaIncRefCntsCB(GenTreePtr* pTree, fgWalkData* data)
+{
+ data->compiler->lvaIncRefCnts(*pTree);
+ return WALK_CONTINUE;
+}
+
+/*****************************************************************************
+ *
+ * Helper passed to the tree walker to increment the refCnts for
+ * all local variables in an expression
+ */
+void Compiler::lvaIncRefCnts(GenTreePtr tree)
+{
+ unsigned lclNum;
+ LclVarDsc* varDsc;
+
+ noway_assert(lvaRefCountingStarted || lvaLocalVarRefCounted);
+
+ if ((tree->gtOper == GT_CALL) && (tree->gtFlags & GTF_CALL_UNMANAGED))
+ {
+ assert((!opts.ShouldUsePInvokeHelpers()) || (info.compLvFrameListRoot == BAD_VAR_NUM));
+ if (!opts.ShouldUsePInvokeHelpers())
+ {
+ /* Get the special variable descriptor */
+
+ lclNum = info.compLvFrameListRoot;
+
+ noway_assert(lclNum <= lvaCount);
+ varDsc = lvaTable + lclNum;
+
+ /* Increment the reference counts twice */
+
+ varDsc->incRefCnts(compCurBB->getBBWeight(this), this);
+ varDsc->incRefCnts(compCurBB->getBBWeight(this), this);
+ }
+ }
+ else
+ {
+ /* This must be a local variable */
+
+ noway_assert(tree->gtOper == GT_LCL_VAR || tree->gtOper == GT_LCL_FLD || tree->gtOper == GT_STORE_LCL_VAR ||
+ tree->gtOper == GT_STORE_LCL_FLD);
+
+ /* Get the variable descriptor */
+
+ lclNum = tree->gtLclVarCommon.gtLclNum;
+
+ noway_assert(lclNum < lvaCount);
+ varDsc = lvaTable + lclNum;
+
+ /* Increment its lvRefCnt and lvRefCntWtd */
+
+ varDsc->incRefCnts(compCurBB->getBBWeight(this), this);
+ }
+}
+
+/*****************************************************************************
+ *
+ * Compare function passed to qsort() by Compiler::lclVars.lvaSortByRefCount().
+ * when generating SMALL_CODE.
+ * Return positive if dsc2 has a higher ref count
+ * Return negative if dsc1 has a higher ref count
+ * Return zero if the ref counts are the same
+ * lvPrefReg is only used to break ties
+ */
+
+/* static */
+int __cdecl Compiler::RefCntCmp(const void* op1, const void* op2)
+{
+ LclVarDsc* dsc1 = *(LclVarDsc**)op1;
+ LclVarDsc* dsc2 = *(LclVarDsc**)op2;
+
+ /* Make sure we preference tracked variables over untracked variables */
+
+ if (dsc1->lvTracked != dsc2->lvTracked)
+ {
+ return (dsc2->lvTracked) ? +1 : -1;
+ }
+
+ unsigned weight1 = dsc1->lvRefCnt;
+ unsigned weight2 = dsc2->lvRefCnt;
+
+#if !FEATURE_FP_REGALLOC
+ /* Force integer candidates to sort above float candidates */
+
+ bool isFloat1 = isFloatRegType(dsc1->lvType);
+ bool isFloat2 = isFloatRegType(dsc2->lvType);
+
+ if (isFloat1 != isFloat2)
+ {
+ if (weight2 && isFloat1)
+ {
+ return +1;
+ }
+ if (weight1 && isFloat2)
+ {
+ return -1;
+ }
+ }
+#endif
+
+ int diff = weight2 - weight1;
+
+ if (diff != 0)
+ {
+ return diff;
+ }
+
+ /* The unweighted ref counts were the same */
+ /* If the weighted ref counts are different then use their difference */
+ diff = dsc2->lvRefCntWtd - dsc1->lvRefCntWtd;
+
+ if (diff != 0)
+ {
+ return diff;
+ }
+
+ /* We have equal ref counts and weighted ref counts */
+
+ /* Break the tie by: */
+ /* Increasing the weight by 2 if we have exactly one bit set in lvPrefReg */
+ /* Increasing the weight by 1 if we have more than one bit set in lvPrefReg */
+ /* Increasing the weight by 0.5 if we are a GC type */
+ /* Increasing the weight by 0.5 if we were enregistered in the previous pass */
+
+ if (weight1)
+ {
+ if (dsc1->lvPrefReg)
+ {
+ if ((dsc1->lvPrefReg & ~RBM_BYTE_REG_FLAG) && genMaxOneBit((unsigned)dsc1->lvPrefReg))
+ {
+ weight1 += 2 * BB_UNITY_WEIGHT;
+ }
+ else
+ {
+ weight1 += 1 * BB_UNITY_WEIGHT;
+ }
+ }
+ if (varTypeIsGC(dsc1->TypeGet()))
+ {
+ weight1 += BB_UNITY_WEIGHT / 2;
+ }
+
+ if (dsc1->lvRegister)
+ {
+ weight1 += BB_UNITY_WEIGHT / 2;
+ }
+ }
+
+ if (weight2)
+ {
+ if (dsc2->lvPrefReg)
+ {
+ if ((dsc2->lvPrefReg & ~RBM_BYTE_REG_FLAG) && genMaxOneBit((unsigned)dsc2->lvPrefReg))
+ {
+ weight2 += 2 * BB_UNITY_WEIGHT;
+ }
+ else
+ {
+ weight2 += 1 * BB_UNITY_WEIGHT;
+ }
+ }
+ if (varTypeIsGC(dsc2->TypeGet()))
+ {
+ weight1 += BB_UNITY_WEIGHT / 2;
+ }
+
+ if (dsc2->lvRegister)
+ {
+ weight2 += BB_UNITY_WEIGHT / 2;
+ }
+ }
+
+ diff = weight2 - weight1;
+
+ if (diff != 0)
+ {
+ return diff;
+ }
+
+ /* To achieve a Stable Sort we use the LclNum (by way of the pointer address) */
+
+ if (dsc1 < dsc2)
+ {
+ return -1;
+ }
+ if (dsc1 > dsc2)
+ {
+ return +1;
+ }
+
+ return 0;
+}
+
+/*****************************************************************************
+ *
+ * Compare function passed to qsort() by Compiler::lclVars.lvaSortByRefCount().
+ * when not generating SMALL_CODE.
+ * Return positive if dsc2 has a higher weighted ref count
+ * Return negative if dsc1 has a higher weighted ref count
+ * Return zero if the ref counts are the same
+ */
+
+/* static */
+int __cdecl Compiler::WtdRefCntCmp(const void* op1, const void* op2)
+{
+ LclVarDsc* dsc1 = *(LclVarDsc**)op1;
+ LclVarDsc* dsc2 = *(LclVarDsc**)op2;
+
+ /* Make sure we preference tracked variables over untracked variables */
+
+ if (dsc1->lvTracked != dsc2->lvTracked)
+ {
+ return (dsc2->lvTracked) ? +1 : -1;
+ }
+
+ unsigned weight1 = dsc1->lvRefCntWtd;
+ unsigned weight2 = dsc2->lvRefCntWtd;
+
+#if !FEATURE_FP_REGALLOC
+ /* Force integer candidates to sort above float candidates */
+
+ bool isFloat1 = isFloatRegType(dsc1->lvType);
+ bool isFloat2 = isFloatRegType(dsc2->lvType);
+
+ if (isFloat1 != isFloat2)
+ {
+ if (weight2 && isFloat1)
+ {
+ return +1;
+ }
+ if (weight1 && isFloat2)
+ {
+ return -1;
+ }
+ }
+#endif
+
+ /* Increase the weight by 2 if we have exactly one bit set in lvPrefReg */
+ /* Increase the weight by 1 if we have more than one bit set in lvPrefReg */
+
+ if (weight1 && dsc1->lvPrefReg)
+ {
+ if ((dsc1->lvPrefReg & ~RBM_BYTE_REG_FLAG) && genMaxOneBit((unsigned)dsc1->lvPrefReg))
+ {
+ weight1 += 2 * BB_UNITY_WEIGHT;
+ }
+ else
+ {
+ weight1 += 1 * BB_UNITY_WEIGHT;
+ }
+ }
+
+ if (weight2 && dsc2->lvPrefReg)
+ {
+ if ((dsc2->lvPrefReg & ~RBM_BYTE_REG_FLAG) && genMaxOneBit((unsigned)dsc2->lvPrefReg))
+ {
+ weight2 += 2 * BB_UNITY_WEIGHT;
+ }
+ else
+ {
+ weight2 += 1 * BB_UNITY_WEIGHT;
+ }
+ }
+
+ if (weight2 > weight1)
+ {
+ return 1;
+ }
+ else if (weight2 < weight1)
+ {
+ return -1;
+ }
+
+ // Otherwise, we have equal weighted ref counts.
+
+ /* If the unweighted ref counts are different then use their difference */
+ int diff = (int)dsc2->lvRefCnt - (int)dsc1->lvRefCnt;
+
+ if (diff != 0)
+ {
+ return diff;
+ }
+
+ /* If one is a GC type and the other is not the GC type wins */
+ if (varTypeIsGC(dsc1->TypeGet()) != varTypeIsGC(dsc2->TypeGet()))
+ {
+ if (varTypeIsGC(dsc1->TypeGet()))
+ {
+ diff = -1;
+ }
+ else
+ {
+ diff = +1;
+ }
+
+ return diff;
+ }
+
+ /* If one was enregistered in the previous pass then it wins */
+ if (dsc1->lvRegister != dsc2->lvRegister)
+ {
+ if (dsc1->lvRegister)
+ {
+ diff = -1;
+ }
+ else
+ {
+ diff = +1;
+ }
+
+ return diff;
+ }
+
+ /* We have a tie! */
+
+ /* To achieve a Stable Sort we use the LclNum (by way of the pointer address) */
+
+ if (dsc1 < dsc2)
+ {
+ return -1;
+ }
+ if (dsc1 > dsc2)
+ {
+ return +1;
+ }
+
+ return 0;
+}
+
+/*****************************************************************************
+ *
+ * Sort the local variable table by refcount and assign tracking indices.
+ */
+
+void Compiler::lvaSortOnly()
+{
+ /* Now sort the variable table by ref-count */
+
+ qsort(lvaRefSorted, lvaCount, sizeof(*lvaRefSorted), (compCodeOpt() == SMALL_CODE) ? RefCntCmp : WtdRefCntCmp);
+
+ lvaSortAgain = false;
+
+ lvaDumpRefCounts();
+}
+
+void Compiler::lvaDumpRefCounts()
+{
+#ifdef DEBUG
+
+ if (verbose && lvaCount)
+ {
+ printf("refCnt table for '%s':\n", info.compMethodName);
+
+ for (unsigned lclNum = 0; lclNum < lvaCount; lclNum++)
+ {
+ unsigned refCnt = lvaRefSorted[lclNum]->lvRefCnt;
+ if (refCnt == 0)
+ {
+ break;
+ }
+ unsigned refCntWtd = lvaRefSorted[lclNum]->lvRefCntWtd;
+
+ printf(" ");
+ gtDispLclVar((unsigned)(lvaRefSorted[lclNum] - lvaTable));
+ printf(" [%6s]: refCnt = %4u, refCntWtd = %6s", varTypeName(lvaRefSorted[lclNum]->TypeGet()), refCnt,
+ refCntWtd2str(refCntWtd));
+
+ regMaskSmall pref = lvaRefSorted[lclNum]->lvPrefReg;
+ if (pref)
+ {
+ printf(" pref ");
+ dspRegMask(pref);
+ }
+ printf("\n");
+ }
+
+ printf("\n");
+ }
+
+#endif
+}
+
+/*****************************************************************************
+ *
+ * Sort the local variable table by refcount and assign tracking indices.
+ */
+
+void Compiler::lvaSortByRefCount()
+{
+ lvaTrackedCount = 0;
+ lvaTrackedCountInSizeTUnits = 0;
+
+ if (lvaCount == 0)
+ {
+ return;
+ }
+
+ unsigned lclNum;
+ LclVarDsc* varDsc;
+
+ LclVarDsc** refTab;
+
+ /* We'll sort the variables by ref count - allocate the sorted table */
+
+ lvaRefSorted = refTab = new (this, CMK_LvaTable) LclVarDsc*[lvaCount];
+
+ /* Fill in the table used for sorting */
+
+ for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
+ {
+ /* Append this variable to the table for sorting */
+
+ *refTab++ = varDsc;
+
+ /* If we have JMP, all arguments must have a location
+ * even if we don't use them inside the method */
+
+ if (compJmpOpUsed && varDsc->lvIsParam)
+ {
+ /* ...except when we have varargs and the argument is
+ passed on the stack. In that case, it's important
+ for the ref count to be zero, so that we don't attempt
+ to track them for GC info (which is not possible since we
+ don't know their offset in the stack). See the assert at the
+ end of raMarkStkVars and bug #28949 for more info. */
+
+ if (!raIsVarargsStackArg(lclNum))
+ {
+ varDsc->incRefCnts(1, this);
+ }
+ }
+
+ /* For now assume we'll be able to track all locals */
+
+ varDsc->lvTracked = 1;
+
+ /* If the ref count is zero */
+ if (varDsc->lvRefCnt == 0)
+ {
+ /* Zero ref count, make this untracked */
+ varDsc->lvTracked = 0;
+ varDsc->lvRefCntWtd = 0;
+ }
+
+#if !defined(_TARGET_64BIT_) && !defined(LEGACY_BACKEND)
+ if (varTypeIsLong(varDsc) && varDsc->lvPromoted)
+ {
+ varDsc->lvTracked = 0;
+ }
+#endif // !defined(_TARGET_64BIT_) && !defined(LEGACY_BACKEND)
+
+ // Variables that are address-exposed, and all struct locals, are never enregistered, or tracked.
+ // (The struct may be promoted, and its field variables enregistered/tracked, or the VM may "normalize"
+ // its type so that its not seen by the JIT as a struct.)
+ // Pinned variables may not be tracked (a condition of the GCInfo representation)
+ // or enregistered, on x86 -- it is believed that we can enregister pinned (more properly, "pinning")
+ // references when using the general GC encoding.
+ if (varDsc->lvAddrExposed)
+ {
+ varDsc->lvTracked = 0;
+ assert(varDsc->lvType != TYP_STRUCT ||
+ varDsc->lvDoNotEnregister); // For structs, should have set this when we set lvAddrExposed.
+ }
+ else if (varTypeIsStruct(varDsc))
+ {
+ // Promoted structs will never be considered for enregistration anyway,
+ // and the DoNotEnregister flag was used to indicate whether promotion was
+ // independent or dependent.
+ if (varDsc->lvPromoted)
+ {
+ varDsc->lvTracked = 0;
+ }
+ else if ((varDsc->lvType == TYP_STRUCT) && !varDsc->lvRegStruct)
+ {
+ lvaSetVarDoNotEnregister(lclNum DEBUGARG(DNER_IsStruct));
+ }
+ }
+ else if (varDsc->lvIsStructField && (lvaGetParentPromotionType(lclNum) != PROMOTION_TYPE_INDEPENDENT))
+ {
+ // SSA must exclude struct fields that are not independently promoted
+ // as dependent fields could be assigned using a CopyBlock
+ // resulting in a single node causing multiple SSA definitions
+ // which isn't currently supported by SSA
+ //
+ // TODO-CQ: Consider using lvLclBlockOpAddr and only marking these LclVars
+ // untracked when a blockOp is used to assign the struct.
+ //
+ varDsc->lvTracked = 0; // so, don't mark as tracked
+ }
+ else if (varDsc->lvPinned)
+ {
+ varDsc->lvTracked = 0;
+#ifdef JIT32_GCENCODER
+ lvaSetVarDoNotEnregister(lclNum DEBUGARG(DNER_PinningRef));
+#endif
+ }
+
+ // Are we not optimizing and we have exception handlers?
+ // if so mark all args and locals "do not enregister".
+ //
+ if (opts.MinOpts() && compHndBBtabCount > 0)
+ {
+ lvaSetVarDoNotEnregister(lclNum DEBUGARG(DNER_LiveInOutOfHandler));
+ continue;
+ }
+
+ var_types type = genActualType(varDsc->TypeGet());
+
+ switch (type)
+ {
+#if CPU_HAS_FP_SUPPORT
+ case TYP_FLOAT:
+ case TYP_DOUBLE:
+#endif
+ case TYP_INT:
+ case TYP_LONG:
+ case TYP_REF:
+ case TYP_BYREF:
+#ifdef FEATURE_SIMD
+ case TYP_SIMD8:
+ case TYP_SIMD12:
+ case TYP_SIMD16:
+ case TYP_SIMD32:
+#endif // FEATURE_SIMD
+ case TYP_STRUCT:
+ break;
+
+ case TYP_UNDEF:
+ case TYP_UNKNOWN:
+ noway_assert(!"lvType not set correctly");
+ varDsc->lvType = TYP_INT;
+
+ __fallthrough;
+
+ default:
+ varDsc->lvTracked = 0;
+ }
+ }
+
+ /* Now sort the variable table by ref-count */
+
+ lvaSortOnly();
+
+ /* Decide which variables will be worth tracking */
+
+ if (lvaCount > lclMAX_TRACKED)
+ {
+ /* Mark all variables past the first 'lclMAX_TRACKED' as untracked */
+
+ for (lclNum = lclMAX_TRACKED; lclNum < lvaCount; lclNum++)
+ {
+ lvaRefSorted[lclNum]->lvTracked = 0;
+ }
+ }
+
+#ifdef DEBUG
+ // Re-Initialize to -1 for safety in debug build.
+ memset(lvaTrackedToVarNum, -1, sizeof(lvaTrackedToVarNum));
+#endif
+
+ /* Assign indices to all the variables we've decided to track */
+
+ for (lclNum = 0; lclNum < min(lvaCount, lclMAX_TRACKED); lclNum++)
+ {
+ varDsc = lvaRefSorted[lclNum];
+ if (varDsc->lvTracked)
+ {
+ noway_assert(varDsc->lvRefCnt > 0);
+
+ /* This variable will be tracked - assign it an index */
+
+ lvaTrackedToVarNum[lvaTrackedCount] = (unsigned)(varDsc - lvaTable); // The type of varDsc and lvaTable
+ // is LclVarDsc. Subtraction will give us
+ // the index.
+ varDsc->lvVarIndex = lvaTrackedCount++;
+ }
+ }
+
+ // We have a new epoch, and also cache the tracked var count in terms of size_t's sufficient to hold that many bits.
+ lvaCurEpoch++;
+ lvaTrackedCountInSizeTUnits = unsigned(roundUp(lvaTrackedCount, sizeof(size_t) * 8)) / unsigned(sizeof(size_t) * 8);
+
+#ifdef DEBUG
+ VarSetOps::AssignNoCopy(this, lvaTrackedVars, VarSetOps::MakeFull(this));
+#endif
+}
+
+#if ASSERTION_PROP
+/*****************************************************************************
+ *
+ * This is called by lvaMarkLclRefs to disqualify a variable from being
+ * considered by optAddCopies()
+ */
+void LclVarDsc::lvaDisqualifyVar()
+{
+ this->lvDisqualify = true;
+ this->lvSingleDef = false;
+ this->lvDefStmt = nullptr;
+}
+#endif // ASSERTION_PROP
+
+#ifndef LEGACY_BACKEND
+/**********************************************************************************
+* Get type of a variable when passed as an argument.
+*/
+var_types LclVarDsc::lvaArgType()
+{
+ var_types type = TypeGet();
+
+#ifdef _TARGET_AMD64_
+ if (type == TYP_STRUCT)
+ {
+ switch (lvExactSize)
+ {
+ case 1:
+ type = TYP_BYTE;
+ break;
+ case 2:
+ type = TYP_SHORT;
+ break;
+ case 4:
+ type = TYP_INT;
+ break;
+ case 8:
+ switch (*lvGcLayout)
+ {
+ case TYPE_GC_NONE:
+ type = TYP_I_IMPL;
+ break;
+
+ case TYPE_GC_REF:
+ type = TYP_REF;
+ break;
+
+ case TYPE_GC_BYREF:
+ type = TYP_BYREF;
+ break;
+
+ default:
+ unreached();
+ }
+ break;
+
+ default:
+ type = TYP_BYREF;
+ break;
+ }
+ }
+#elif defined(_TARGET_X86_)
+// Nothing to do; use the type as is.
+#else
+ NYI("lvaArgType");
+#endif //_TARGET_AMD64_
+
+ return type;
+}
+#endif // !LEGACY_BACKEND
+
+/*****************************************************************************
+ *
+ * This is called by lvaMarkLclRefsCallback() to do variable ref marking
+ */
+
+void Compiler::lvaMarkLclRefs(GenTreePtr tree)
+{
+ /* Is this a call to unmanaged code ? */
+ if (tree->gtOper == GT_CALL && tree->gtFlags & GTF_CALL_UNMANAGED)
+ {
+ assert((!opts.ShouldUsePInvokeHelpers()) || (info.compLvFrameListRoot == BAD_VAR_NUM));
+ if (!opts.ShouldUsePInvokeHelpers())
+ {
+ /* Get the special variable descriptor */
+
+ unsigned lclNum = info.compLvFrameListRoot;
+
+ noway_assert(lclNum <= lvaCount);
+ LclVarDsc* varDsc = lvaTable + lclNum;
+
+ /* Increment the ref counts twice */
+ varDsc->incRefCnts(lvaMarkRefsWeight, this);
+ varDsc->incRefCnts(lvaMarkRefsWeight, this);
+ }
+ }
+
+ /* Is this an assigment? */
+
+ if (tree->OperKind() & GTK_ASGOP)
+ {
+ GenTreePtr op1 = tree->gtOp.gtOp1;
+ GenTreePtr op2 = tree->gtOp.gtOp2;
+
+ /* Set target register for RHS local if assignment is of a "small" type */
+
+ if (varTypeIsByte(tree->gtType))
+ {
+ unsigned lclNum;
+ LclVarDsc* varDsc = nullptr;
+
+ /* GT_CHS is special it doesn't have a valid op2 */
+ if (tree->gtOper == GT_CHS)
+ {
+ if (op1->gtOper == GT_LCL_VAR)
+ {
+ lclNum = op1->gtLclVarCommon.gtLclNum;
+ noway_assert(lclNum < lvaCount);
+ varDsc = &lvaTable[lclNum];
+ }
+ }
+ else
+ {
+ if (op2->gtOper == GT_LCL_VAR)
+ {
+ lclNum = op2->gtLclVarCommon.gtLclNum;
+ noway_assert(lclNum < lvaCount);
+ varDsc = &lvaTable[lclNum];
+ }
+ }
+#if CPU_HAS_BYTE_REGS
+ if (varDsc)
+ varDsc->addPrefReg(RBM_BYTE_REG_FLAG, this);
+#endif
+ }
+
+#if OPT_BOOL_OPS
+
+ /* Is this an assignment to a local variable? */
+
+ if (op1->gtOper == GT_LCL_VAR && op2->gtType != TYP_BOOL)
+ {
+ /* Only simple assignments allowed for booleans */
+
+ if (tree->gtOper != GT_ASG)
+ {
+ goto NOT_BOOL;
+ }
+
+ /* Is the RHS clearly a boolean value? */
+
+ switch (op2->gtOper)
+ {
+ unsigned lclNum;
+
+ case GT_CNS_INT:
+
+ if (op2->gtIntCon.gtIconVal == 0)
+ {
+ break;
+ }
+ if (op2->gtIntCon.gtIconVal == 1)
+ {
+ break;
+ }
+
+ // Not 0 or 1, fall through ....
+ __fallthrough;
+
+ default:
+
+ if (op2->OperIsCompare())
+ {
+ break;
+ }
+
+ NOT_BOOL:
+
+ lclNum = op1->gtLclVarCommon.gtLclNum;
+ noway_assert(lclNum < lvaCount);
+
+ lvaTable[lclNum].lvIsBoolean = false;
+ break;
+ }
+ }
+#endif
+ }
+
+#if FANCY_ARRAY_OPT
+
+ /* Special case: assignment node */
+
+ if (tree->gtOper == GT_ASG)
+ {
+ if (tree->gtType == TYP_INT)
+ {
+ unsigned lclNum1;
+ LclVarDsc* varDsc1;
+
+ GenTreePtr op1 = tree->gtOp.gtOp1;
+
+ if (op1->gtOper != GT_LCL_VAR)
+ return;
+
+ lclNum1 = op1->gtLclVarCommon.gtLclNum;
+ noway_assert(lclNum1 < lvaCount);
+ varDsc1 = lvaTable + lclNum1;
+
+ if (varDsc1->lvAssignOne)
+ varDsc1->lvAssignTwo = true;
+ else
+ varDsc1->lvAssignOne = true;
+ }
+
+ return;
+ }
+
+#endif
+
+#ifdef _TARGET_XARCH_
+ /* Special case: integer shift node by a variable amount */
+
+ if (tree->OperIsShiftOrRotate())
+ {
+ if (tree->gtType == TYP_INT)
+ {
+ GenTreePtr op2 = tree->gtOp.gtOp2;
+
+ if (op2->gtOper == GT_LCL_VAR)
+ {
+ unsigned lclNum = op2->gtLclVarCommon.gtLclNum;
+ noway_assert(lclNum < lvaCount);
+ lvaTable[lclNum].setPrefReg(REG_ECX, this);
+ }
+ }
+
+ return;
+ }
+#endif
+
+ if ((tree->gtOper != GT_LCL_VAR) && (tree->gtOper != GT_LCL_FLD))
+ {
+ return;
+ }
+
+ /* This must be a local variable reference */
+
+ noway_assert((tree->gtOper == GT_LCL_VAR) || (tree->gtOper == GT_LCL_FLD));
+ unsigned lclNum = tree->gtLclVarCommon.gtLclNum;
+
+ noway_assert(lclNum < lvaCount);
+ LclVarDsc* varDsc = lvaTable + lclNum;
+
+ /* Increment the reference counts */
+
+ varDsc->incRefCnts(lvaMarkRefsWeight, this);
+
+ if (lvaVarAddrExposed(lclNum))
+ {
+ varDsc->lvIsBoolean = false;
+ }
+
+ if (tree->gtOper == GT_LCL_FLD)
+ {
+#if ASSERTION_PROP
+ // variables that have uses inside a GT_LCL_FLD
+ // cause problems, so we will disqualify them here
+ varDsc->lvaDisqualifyVar();
+#endif // ASSERTION_PROP
+ return;
+ }
+
+#if ASSERTION_PROP
+ /* Exclude the normal entry block */
+ if (fgDomsComputed && (lvaMarkRefsCurBlock->bbNum != 1) && lvaMarkRefsCurBlock->bbIDom != nullptr)
+ {
+ // If any entry block except the normal entry block dominates the block, then mark the local with the
+ // lvVolatileHint flag.
+
+ if (BlockSetOps::MayBeUninit(lvaMarkRefsCurBlock->bbDoms))
+ {
+ // Lazy init (If a block is not dominated by any other block, we'll redo this every time, but it'll be fast)
+ BlockSetOps::AssignNoCopy(this, lvaMarkRefsCurBlock->bbDoms, fgGetDominatorSet(lvaMarkRefsCurBlock));
+ BlockSetOps::RemoveElemD(this, lvaMarkRefsCurBlock->bbDoms, fgFirstBB->bbNum);
+ }
+ assert(fgEnterBlksSetValid);
+ if (!BlockSetOps::IsEmptyIntersection(this, lvaMarkRefsCurBlock->bbDoms, fgEnterBlks))
+ {
+ varDsc->lvVolatileHint = 1;
+ }
+ }
+
+ /* Record if the variable has a single def or not */
+
+ if (!varDsc->lvDisqualify) // If this variable is already disqualified we can skip this
+ {
+ if (tree->gtFlags & GTF_VAR_DEF) // Is this is a def of our variable
+ {
+ /*
+ If we have one of these cases:
+ 1. We have already seen a definition (i.e lvSingleDef is true)
+ 2. or info.CompInitMem is true (thus this would be the second definition)
+ 3. or we have an assignment inside QMARK-COLON trees
+ 4. or we have an update form of assignment (i.e. +=, -=, *=)
+ Then we must disqualify this variable for use in optAddCopies()
+
+ Note that all parameters start out with lvSingleDef set to true
+ */
+ if ((varDsc->lvSingleDef == true) || (info.compInitMem == true) || (tree->gtFlags & GTF_COLON_COND) ||
+ (tree->gtFlags & GTF_VAR_USEASG))
+ {
+ varDsc->lvaDisqualifyVar();
+ }
+ else
+ {
+ varDsc->lvSingleDef = true;
+ varDsc->lvDefStmt = lvaMarkRefsCurStmt;
+ }
+ }
+ else // otherwise this is a ref of our variable
+ {
+ if (BlockSetOps::MayBeUninit(varDsc->lvRefBlks))
+ {
+ // Lazy initialization
+ BlockSetOps::AssignNoCopy(this, varDsc->lvRefBlks, BlockSetOps::MakeEmpty(this));
+ }
+ BlockSetOps::AddElemD(this, varDsc->lvRefBlks, lvaMarkRefsCurBlock->bbNum);
+ }
+ }
+#endif // ASSERTION_PROP
+
+ bool allowStructs = false;
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ // On System V the type of the var could be a struct type.
+ allowStructs = varTypeIsStruct(varDsc);
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+ /* Variables must be used as the same type throughout the method */
+ noway_assert(tiVerificationNeeded || varDsc->lvType == TYP_UNDEF || tree->gtType == TYP_UNKNOWN || allowStructs ||
+ genActualType(varDsc->TypeGet()) == genActualType(tree->gtType) ||
+ (tree->gtType == TYP_BYREF && varDsc->TypeGet() == TYP_I_IMPL) ||
+ (tree->gtType == TYP_I_IMPL && varDsc->TypeGet() == TYP_BYREF) || (tree->gtFlags & GTF_VAR_CAST) ||
+ varTypeIsFloating(varDsc->TypeGet()) && varTypeIsFloating(tree->gtType));
+
+ /* Remember the type of the reference */
+
+ if (tree->gtType == TYP_UNKNOWN || varDsc->lvType == TYP_UNDEF)
+ {
+ varDsc->lvType = tree->gtType;
+ noway_assert(genActualType(varDsc->TypeGet()) == tree->gtType); // no truncation
+ }
+
+#ifdef DEBUG
+ if (tree->gtFlags & GTF_VAR_CAST)
+ {
+ // it should never be bigger than the variable slot
+
+ // Trees don't store the full information about structs
+ // so we can't check them.
+ if (tree->TypeGet() != TYP_STRUCT)
+ {
+ unsigned treeSize = genTypeSize(tree->TypeGet());
+ unsigned varSize = genTypeSize(varDsc->TypeGet());
+ if (varDsc->TypeGet() == TYP_STRUCT)
+ {
+ varSize = varDsc->lvSize();
+ }
+
+ assert(treeSize <= varSize);
+ }
+ }
+#endif
+}
+
+/*****************************************************************************
+ *
+ * Helper passed to Compiler::fgWalkTreePre() to do variable ref marking.
+ */
+
+/* static */
+Compiler::fgWalkResult Compiler::lvaMarkLclRefsCallback(GenTreePtr* pTree, fgWalkData* data)
+{
+ data->compiler->lvaMarkLclRefs(*pTree);
+
+ return WALK_CONTINUE;
+}
+
+/*****************************************************************************
+ *
+ * Update the local variable reference counts for one basic block
+ */
+
+void Compiler::lvaMarkLocalVars(BasicBlock* block)
+{
+#if ASSERTION_PROP
+ lvaMarkRefsCurBlock = block;
+#endif
+ lvaMarkRefsWeight = block->getBBWeight(this);
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\n*** marking local variables in block BB%02u (weight=%s)\n", block->bbNum,
+ refCntWtd2str(lvaMarkRefsWeight));
+ }
+#endif
+
+ for (GenTreePtr tree = block->FirstNonPhiDef(); tree; tree = tree->gtNext)
+ {
+ noway_assert(tree->gtOper == GT_STMT);
+
+#if ASSERTION_PROP
+ lvaMarkRefsCurStmt = tree;
+#endif
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ gtDispTree(tree);
+ }
+#endif
+
+ fgWalkTreePre(&tree->gtStmt.gtStmtExpr, Compiler::lvaMarkLclRefsCallback, (void*)this, false);
+ }
+}
+
+/*****************************************************************************
+ *
+ * Create the local variable table and compute local variable reference
+ * counts.
+ */
+
+void Compiler::lvaMarkLocalVars()
+{
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\n*************** In lvaMarkLocalVars()");
+ }
+#endif
+
+ /* If there is a call to an unmanaged target, we already grabbed a
+ local slot for the current thread control block.
+ */
+
+ if (info.compCallUnmanaged != 0)
+ {
+ assert((!opts.ShouldUsePInvokeHelpers()) || (info.compLvFrameListRoot == BAD_VAR_NUM));
+ if (!opts.ShouldUsePInvokeHelpers())
+ {
+ noway_assert(info.compLvFrameListRoot >= info.compLocalsCount && info.compLvFrameListRoot < lvaCount);
+
+ lvaTable[info.compLvFrameListRoot].lvType = TYP_I_IMPL;
+
+ /* Set the refCnt, it is used in the prolog and return block(s) */
+
+ lvaTable[info.compLvFrameListRoot].lvRefCnt = 2;
+ lvaTable[info.compLvFrameListRoot].lvRefCntWtd = 2 * BB_UNITY_WEIGHT;
+ }
+ }
+
+ lvaAllocOutgoingArgSpace();
+
+#if !FEATURE_EH_FUNCLETS
+
+ // Grab space for exception handling
+
+ if (ehNeedsShadowSPslots())
+ {
+ // The first slot is reserved for ICodeManager::FixContext(ppEndRegion)
+ // ie. the offset of the end-of-last-executed-filter
+ unsigned slotsNeeded = 1;
+
+ unsigned handlerNestingLevel = ehMaxHndNestingCount;
+
+ if (opts.compDbgEnC && (handlerNestingLevel < (unsigned)MAX_EnC_HANDLER_NESTING_LEVEL))
+ handlerNestingLevel = (unsigned)MAX_EnC_HANDLER_NESTING_LEVEL;
+
+ slotsNeeded += handlerNestingLevel;
+
+ // For a filter (which can be active at the same time as a catch/finally handler)
+ slotsNeeded++;
+ // For zero-termination of the shadow-Stack-pointer chain
+ slotsNeeded++;
+
+ lvaShadowSPslotsVar = lvaGrabTempWithImplicitUse(false DEBUGARG("lvaShadowSPslotsVar"));
+ LclVarDsc* shadowSPslotsVar = &lvaTable[lvaShadowSPslotsVar];
+ shadowSPslotsVar->lvType = TYP_BLK;
+ shadowSPslotsVar->lvExactSize = (slotsNeeded * TARGET_POINTER_SIZE);
+ }
+
+#endif // !FEATURE_EH_FUNCLETS
+
+#if FEATURE_EH_FUNCLETS
+ if (ehNeedsPSPSym())
+ {
+ lvaPSPSym = lvaGrabTempWithImplicitUse(false DEBUGARG("PSPSym"));
+ LclVarDsc* lclPSPSym = &lvaTable[lvaPSPSym];
+ lclPSPSym->lvType = TYP_I_IMPL;
+ }
+#endif // FEATURE_EH_FUNCLETS
+
+ if (compLocallocUsed)
+ {
+ lvaLocAllocSPvar = lvaGrabTempWithImplicitUse(false DEBUGARG("LocAllocSPvar"));
+ LclVarDsc* locAllocSPvar = &lvaTable[lvaLocAllocSPvar];
+ locAllocSPvar->lvType = TYP_I_IMPL;
+ }
+
+ BasicBlock* block;
+
+#if defined(DEBUGGING_SUPPORT) || defined(DEBUG)
+
+#ifndef DEBUG
+ // Assign slot numbers to all variables.
+ // If compiler generated local variables, slot numbers will be
+ // invalid (out of range of info.compVarScopes).
+
+ // Also have to check if variable was not reallocated to another
+ // slot in which case we have to register the original slot #.
+
+ // We don't need to do this for IL, but this keeps lvSlotNum consistent.
+
+ if (opts.compScopeInfo && (info.compVarScopesCount > 0))
+#endif
+ {
+ unsigned lclNum;
+ LclVarDsc* varDsc;
+
+ for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
+ {
+ varDsc->lvSlotNum = lclNum;
+ }
+ }
+
+#endif // defined(DEBUGGING_SUPPORT) || defined(DEBUG)
+
+ /* Mark all local variable references */
+
+ lvaRefCountingStarted = true;
+ for (block = fgFirstBB; block; block = block->bbNext)
+ {
+ lvaMarkLocalVars(block);
+ }
+
+ /* For incoming register arguments, if there are references in the body
+ * then we will have to copy them to the final home in the prolog
+ * This counts as an extra reference with a weight of 2
+ */
+
+ unsigned lclNum;
+ LclVarDsc* varDsc;
+
+ for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
+ {
+ if (lclNum >= info.compArgsCount)
+ {
+ break; // early exit for loop
+ }
+
+ if ((varDsc->lvIsRegArg) && (varDsc->lvRefCnt > 0))
+ {
+ // Fix 388376 ARM JitStress WP7
+ varDsc->incRefCnts(BB_UNITY_WEIGHT, this);
+ varDsc->incRefCnts(BB_UNITY_WEIGHT, this);
+ }
+ }
+
+#if ASSERTION_PROP
+ if (!opts.MinOpts() && !opts.compDbgCode)
+ {
+ // Note: optAddCopies() depends on lvaRefBlks, which is set in lvaMarkLocalVars(BasicBlock*), called above.
+ optAddCopies();
+ }
+#endif
+
+ if (lvaKeepAliveAndReportThis() && lvaTable[0].lvRefCnt == 0)
+ {
+ lvaTable[0].lvRefCnt = 1;
+ // This isn't strictly needed as we will make a copy of the param-type-arg
+ // in the prolog. However, this ensures that the LclVarDsc corresponding to
+ // info.compTypeCtxtArg is valid.
+ }
+ else if (lvaReportParamTypeArg() && lvaTable[info.compTypeCtxtArg].lvRefCnt == 0)
+ {
+ lvaTable[info.compTypeCtxtArg].lvRefCnt = 1;
+ }
+
+ lvaLocalVarRefCounted = true;
+ lvaRefCountingStarted = false;
+
+ lvaSortByRefCount();
+}
+
+void Compiler::lvaAllocOutgoingArgSpace()
+{
+#if FEATURE_FIXED_OUT_ARGS
+
+ // Setup the outgoing argument region, in case we end up using it later
+
+ if (lvaOutgoingArgSpaceVar == BAD_VAR_NUM)
+ {
+ lvaOutgoingArgSpaceVar = lvaGrabTemp(false DEBUGARG("OutgoingArgSpace"));
+
+ lvaTable[lvaOutgoingArgSpaceVar].lvType = TYP_LCLBLK;
+
+ /* Set the refCnts */
+
+ lvaTable[lvaOutgoingArgSpaceVar].lvRefCnt = 1;
+ lvaTable[lvaOutgoingArgSpaceVar].lvRefCntWtd = BB_UNITY_WEIGHT;
+
+ if (lvaOutgoingArgSpaceSize == 0)
+ {
+ if (compUsesThrowHelper || compIsProfilerHookNeeded())
+ {
+ // Need to make sure the MIN_ARG_AREA_FOR_CALL space is added to the frame if:
+ // 1. there are calls to THROW_HEPLPER methods.
+ // 2. we are generating profiling Enter/Leave/TailCall hooks. This will ensure
+ // that even methods without any calls will have outgoing arg area space allocated.
+ //
+ // An example for these two cases is Windows Amd64, where the ABI requires to have 4 slots for
+ // the outgoing arg space if the method makes any calls.
+ lvaOutgoingArgSpaceSize = MIN_ARG_AREA_FOR_CALL;
+ }
+ }
+ }
+
+ noway_assert(lvaOutgoingArgSpaceVar >= info.compLocalsCount && lvaOutgoingArgSpaceVar < lvaCount);
+
+#endif // FEATURE_FIXED_OUT_ARGS
+}
+
+inline void Compiler::lvaIncrementFrameSize(unsigned size)
+{
+ if (size > MAX_FrameSize || compLclFrameSize + size > MAX_FrameSize)
+ {
+ BADCODE("Frame size overflow");
+ }
+
+ compLclFrameSize += size;
+}
+
+/****************************************************************************
+*
+* Return true if absolute offsets of temps are larger than vars, or in other
+* words, did we allocate temps before of after vars. The /GS buffer overrun
+* checks want temps to be at low stack addresses than buffers
+*/
+bool Compiler::lvaTempsHaveLargerOffsetThanVars()
+{
+#ifdef _TARGET_ARM_
+ // We never want to place the temps with larger offsets for ARM
+ return false;
+#else
+ if (compGSReorderStackLayout)
+ {
+ return codeGen->isFramePointerUsed();
+ }
+ else
+ {
+ return true;
+ }
+#endif
+}
+
+/****************************************************************************
+*
+* Return an upper bound estimate for the size of the compiler spill temps
+*
+*/
+unsigned Compiler::lvaGetMaxSpillTempSize()
+{
+ unsigned result = 0;
+
+#ifndef LEGACY_BACKEND
+ if (lvaDoneFrameLayout >= REGALLOC_FRAME_LAYOUT)
+ {
+ result = tmpSize;
+ }
+ else
+ {
+ result = MAX_SPILL_TEMP_SIZE;
+ }
+#else // LEGACY_BACKEND
+ if (lvaDoneFrameLayout >= FINAL_FRAME_LAYOUT)
+ {
+ result = tmpSize;
+ }
+ else
+ {
+ if (lvaDoneFrameLayout >= REGALLOC_FRAME_LAYOUT)
+ {
+ unsigned maxTmpSize = sizeof(double) + sizeof(int);
+
+ maxTmpSize += (tmpDoubleSpillMax * sizeof(double)) + (tmpIntSpillMax * sizeof(int));
+
+ result = maxTmpSize;
+ }
+ else
+ {
+ result = MAX_SPILL_TEMP_SIZE;
+ }
+#ifdef DEBUG
+ // When StressRegs is >=1, there can be a bunch of spills that are not
+ // predicted by the predictor (see logic in rsPickReg). It is very hard
+ // to teach the predictor about the behavior of rsPickReg for StressRegs >= 1,
+ // so instead let's make MaxTmpSize large enough so that we won't be wrong.
+
+ if (codeGen->regSet.rsStressRegs() >= 1)
+ {
+ result += (REG_TMP_ORDER_COUNT * REGSIZE_BYTES);
+ }
+#endif // DEBUG
+ }
+#endif // LEGACY_BACKEND
+ return result;
+}
+
+// clang-format off
+/*****************************************************************************
+ *
+ * Compute stack frame offsets for arguments, locals and optionally temps.
+ *
+ * The frame is laid out as follows for x86:
+ *
+ * ESP frames
+ *
+ * | |
+ * |-----------------------|
+ * | incoming |
+ * | arguments |
+ * |-----------------------| <---- Virtual '0'
+ * | return address |
+ * +=======================+
+ * |Callee saved registers |
+ * |-----------------------|
+ * | Temps |
+ * |-----------------------|
+ * | Variables |
+ * |-----------------------| <---- Ambient ESP
+ * | Arguments for the |
+ * ~ next function ~
+ * | |
+ * | | |
+ * | | Stack grows |
+ * | downward
+ * V
+ *
+ *
+ * EBP frames
+ *
+ * | |
+ * |-----------------------|
+ * | incoming |
+ * | arguments |
+ * |-----------------------| <---- Virtual '0'
+ * | return address |
+ * +=======================+
+ * | incoming EBP |
+ * |-----------------------| <---- EBP
+ * |Callee saved registers |
+ * |-----------------------|
+ * | security object |
+ * |-----------------------|
+ * | ParamTypeArg |
+ * |-----------------------|
+ * | Last-executed-filter |
+ * |-----------------------|
+ * | |
+ * ~ Shadow SPs ~
+ * | |
+ * |-----------------------|
+ * | |
+ * ~ Variables ~
+ * | |
+ * ~-----------------------|
+ * | Temps |
+ * |-----------------------|
+ * | localloc |
+ * |-----------------------| <---- Ambient ESP
+ * | Arguments for the |
+ * | next function ~
+ * | |
+ * | | |
+ * | | Stack grows |
+ * | downward
+ * V
+ *
+ *
+ * The frame is laid out as follows for x64:
+ *
+ * RSP frames
+ * | |
+ * |-----------------------|
+ * | incoming |
+ * | arguments |
+ * |-----------------------|
+ * | 4 fixed incoming |
+ * | argument slots |
+ * |-----------------------| <---- Caller's SP & Virtual '0'
+ * | return address |
+ * +=======================+
+ * | Callee saved Int regs |
+ * -------------------------
+ * | Padding | <---- this padding (0 or 8 bytes) is to ensure flt registers are saved at a mem location aligned at 16-bytes
+ * | | so that we can save 128-bit callee saved xmm regs using performant "movaps" instruction instead of "movups"
+ * -------------------------
+ * | Callee saved Flt regs | <----- entire 128-bits of callee saved xmm registers are stored here
+ * |-----------------------|
+ * | Temps |
+ * |-----------------------|
+ * | Variables |
+ * |-----------------------|
+ * | Arguments for the |
+ * ~ next function ~
+ * | |
+ * |-----------------------|
+ * | 4 fixed outgoing |
+ * | argument slots |
+ * |-----------------------| <---- Ambient RSP
+ * | | |
+ * ~ | Stack grows ~
+ * | | downward |
+ * V
+ *
+ *
+ * RBP frames
+ * | |
+ * |-----------------------|
+ * | incoming |
+ * | arguments |
+ * |-----------------------|
+ * | 4 fixed incoming |
+ * | argument slots |
+ * |-----------------------| <---- Caller's SP & Virtual '0'
+ * | return address |
+ * +=======================+
+ * | Callee saved Int regs |
+ * -------------------------
+ * | Padding |
+ * -------------------------
+ * | Callee saved Flt regs |
+ * |-----------------------|
+ * | security object |
+ * |-----------------------|
+ * | ParamTypeArg |
+ * |-----------------------|
+ * | |
+ * | |
+ * ~ Variables ~
+ * | |
+ * | |
+ * |-----------------------|
+ * | Temps |
+ * |-----------------------|
+ * | |
+ * ~ localloc ~ // not in frames with EH
+ * | |
+ * |-----------------------|
+ * | PSPSym | // only in frames with EH (thus no localloc)
+ * | |
+ * |-----------------------| <---- RBP in localloc frames (max 240 bytes from Initial-SP)
+ * | Arguments for the |
+ * ~ next function ~
+ * | |
+ * |-----------------------|
+ * | 4 fixed outgoing |
+ * | argument slots |
+ * |-----------------------| <---- Ambient RSP (before localloc, this is Initial-SP)
+ * | | |
+ * ~ | Stack grows ~
+ * | | downward |
+ * V
+ *
+ *
+ * The frame is laid out as follows for ARM (this is a general picture; details may differ for different conditions):
+ *
+ * SP frames
+ * | |
+ * |-----------------------|
+ * | incoming |
+ * | arguments |
+ * +=======================+ <---- Caller's SP
+ * | Pre-spill registers |
+ * |-----------------------| <---- Virtual '0'
+ * |Callee saved registers |
+ * |-----------------------|
+ * ~ possible double align ~
+ * |-----------------------|
+ * | security object |
+ * |-----------------------|
+ * | ParamTypeArg |
+ * |-----------------------|
+ * | possible GS cookie |
+ * |-----------------------|
+ * | Variables |
+ * |-----------------------|
+ * | possible GS cookie |
+ * |-----------------------|
+ * | Temps |
+ * |-----------------------|
+ * | Stub Argument Var |
+ * |-----------------------|
+ * |Inlined PInvoke Frame V|
+ * |-----------------------|
+ * ~ possible double align ~
+ * |-----------------------|
+ * | Arguments for the |
+ * ~ next function ~
+ * | |
+ * |-----------------------| <---- Ambient SP
+ * | | |
+ * ~ | Stack grows ~
+ * | | downward |
+ * V
+ *
+ *
+ * FP / R11 frames
+ * | |
+ * |-----------------------|
+ * | incoming |
+ * | arguments |
+ * +=======================+ <---- Caller's SP
+ * | Pre-spill registers |
+ * |-----------------------| <---- Virtual '0'
+ * |Callee saved registers |
+ * |-----------------------|
+ * | PSPSym | // Only for frames with EH, which means FP-based frames
+ * |-----------------------|
+ * ~ possible double align ~
+ * |-----------------------|
+ * | security object |
+ * |-----------------------|
+ * | ParamTypeArg |
+ * |-----------------------|
+ * | possible GS cookie |
+ * |-----------------------|
+ * | Variables |
+ * |-----------------------|
+ * | possible GS cookie |
+ * |-----------------------|
+ * | Temps |
+ * |-----------------------|
+ * | Stub Argument Var |
+ * |-----------------------|
+ * |Inlined PInvoke Frame V|
+ * |-----------------------|
+ * ~ possible double align ~
+ * |-----------------------|
+ * | localloc |
+ * |-----------------------|
+ * | Arguments for the |
+ * ~ next function ~
+ * | |
+ * |-----------------------| <---- Ambient SP
+ * | | |
+ * ~ | Stack grows ~
+ * | | downward |
+ * V
+ *
+ *
+ * The frame is laid out as follows for ARM64 (this is a general picture; details may differ for different conditions):
+ * TODO-ARM64-NYI: this is preliminary (copied from ARM and modified), and needs to be reviewed.
+ * NOTE: SP must be 16-byte aligned, so there may be alignment slots in the frame.
+ * We will often save and establish a frame pointer to create better ETW stack walks.
+ *
+ * SP frames
+ * | |
+ * |-----------------------|
+ * | incoming |
+ * | arguments |
+ * +=======================+ <---- Caller's SP
+ * | homed | // this is only needed if reg argument need to be homed, e.g., for varargs
+ * | register arguments |
+ * |-----------------------| <---- Virtual '0'
+ * |Callee saved registers |
+ * | except fp/lr |
+ * |-----------------------|
+ * | security object |
+ * |-----------------------|
+ * | ParamTypeArg |
+ * |-----------------------|
+ * | possible GS cookie |
+ * |-----------------------|
+ * | Variables |
+ * |-----------------------|
+ * | possible GS cookie |
+ * |-----------------------|
+ * | Temps |
+ * |-----------------------|
+ * | Stub Argument Var |
+ * |-----------------------|
+ * |Inlined PInvoke Frame V|
+ * |-----------------------|
+ * | Saved LR |
+ * |-----------------------|
+ * | Saved FP | <---- Frame pointer
+ * |-----------------------|
+ * | Stack arguments for |
+ * | the next function |
+ * |-----------------------| <---- SP
+ * | | |
+ * ~ | Stack grows ~
+ * | | downward |
+ * V
+ *
+ *
+ * FP (R29 / x29) frames
+ * | |
+ * |-----------------------|
+ * | incoming |
+ * | arguments |
+ * +=======================+ <---- Caller's SP
+ * | optional homed | // this is only needed if reg argument need to be homed, e.g., for varargs
+ * | register arguments |
+ * |-----------------------| <---- Virtual '0'
+ * |Callee saved registers |
+ * | except fp/lr |
+ * |-----------------------|
+ * | PSPSym | // Only for frames with EH, which requires FP-based frames
+ * |-----------------------|
+ * | security object |
+ * |-----------------------|
+ * | ParamTypeArg |
+ * |-----------------------|
+ * | possible GS cookie |
+ * |-----------------------|
+ * | Variables |
+ * |-----------------------|
+ * | possible GS cookie |
+ * |-----------------------|
+ * | Temps |
+ * |-----------------------|
+ * | Stub Argument Var |
+ * |-----------------------|
+ * |Inlined PInvoke Frame V|
+ * |-----------------------|
+ * | Saved LR |
+ * |-----------------------|
+ * | Saved FP | <---- Frame pointer
+ * |-----------------------|
+ * ~ localloc ~
+ * |-----------------------|
+ * | Stack arguments for |
+ * | the next function |
+ * |-----------------------| <---- Ambient SP
+ * | | |
+ * ~ | Stack grows ~
+ * | | downward |
+ * V
+ *
+ *
+ * Doing this all in one pass is 'hard'. So instead we do it in 2 basic passes:
+ * 1. Assign all the offsets relative to the Virtual '0'. Offsets above (the
+ * incoming arguments) are positive. Offsets below (everything else) are
+ * negative. This pass also calcuates the total frame size (between Caller's
+ * SP/return address and the Ambient SP).
+ * 2. Figure out where to place the frame pointer, and then adjust the offsets
+ * as needed for the final stack size and whether the offset is frame pointer
+ * relative or stack pointer relative.
+ *
+ */
+// clang-format on
+
+void Compiler::lvaAssignFrameOffsets(FrameLayoutState curState)
+{
+ noway_assert(lvaDoneFrameLayout < curState);
+
+ lvaDoneFrameLayout = curState;
+
+#ifdef DEBUG
+ if (verbose)
+ {
+
+ printf("*************** In lvaAssignFrameOffsets");
+ if (curState == INITIAL_FRAME_LAYOUT)
+ {
+ printf("(INITIAL_FRAME_LAYOUT)");
+ }
+ else if (curState == PRE_REGALLOC_FRAME_LAYOUT)
+ {
+ printf("(PRE_REGALLOC_FRAME_LAYOUT)");
+ }
+ else if (curState == REGALLOC_FRAME_LAYOUT)
+ {
+ printf("(REGALLOC_FRAME_LAYOUT)");
+ }
+ else if (curState == TENTATIVE_FRAME_LAYOUT)
+ {
+ printf("(TENTATIVE_FRAME_LAYOUT)");
+ }
+ else if (curState == FINAL_FRAME_LAYOUT)
+ {
+ printf("(FINAL_FRAME_LAYOUT)");
+ }
+ else
+ {
+ printf("(UNKNOWN)");
+ unreached();
+ }
+ printf("\n");
+ }
+#endif
+
+#if FEATURE_FIXED_OUT_ARGS
+ assert(lvaOutgoingArgSpaceVar != BAD_VAR_NUM);
+#endif // FEATURE_FIXED_OUT_ARGS
+
+ /*-------------------------------------------------------------------------
+ *
+ * First process the arguments.
+ *
+ *-------------------------------------------------------------------------
+ */
+
+ lvaAssignVirtualFrameOffsetsToArgs();
+
+ /*-------------------------------------------------------------------------
+ *
+ * Now compute stack offsets for any variables that don't live in registers
+ *
+ *-------------------------------------------------------------------------
+ */
+
+ lvaAssignVirtualFrameOffsetsToLocals();
+
+ lvaAlignFrame();
+
+ /*-------------------------------------------------------------------------
+ *
+ * Now patch the offsets
+ *
+ *-------------------------------------------------------------------------
+ */
+
+ lvaFixVirtualFrameOffsets();
+
+ // Modify the stack offset for fields of promoted structs.
+ lvaAssignFrameOffsetsToPromotedStructs();
+
+ /*-------------------------------------------------------------------------
+ *
+ * Finalize
+ *
+ *-------------------------------------------------------------------------
+ */
+
+ // If it's not the final frame layout, then it's just an estimate. This means
+ // we're allowed to once again write to these variables, even if we've read
+ // from them to make tentative code generation or frame layout decisions.
+ if (curState < FINAL_FRAME_LAYOUT)
+ {
+ codeGen->resetFramePointerUsedWritePhase();
+ }
+}
+
+/*****************************************************************************
+ * lvaFixVirtualFrameOffsets() : Now that everything has a virtual offset,
+ * determine the final value for the frame pointer (if needed) and then
+ * adjust all the offsets appropriately.
+ *
+ * This routine fixes virtual offset to be relative to frame pointer or SP
+ * based on whether varDsc->lvFramePointerBased is true or false respectively.
+ */
+void Compiler::lvaFixVirtualFrameOffsets()
+{
+ LclVarDsc* varDsc;
+
+#if FEATURE_EH_FUNCLETS && defined(_TARGET_AMD64_)
+ if (ehNeedsPSPSym())
+ {
+ // We need to fix the offset of the PSPSym so there is no padding between it and the outgoing argument space.
+ // Without this code, lvaAlignFrame might have put the padding lower than the PSPSym, which would be between
+ // the PSPSym and the outgoing argument space.
+ assert(lvaPSPSym != BAD_VAR_NUM);
+ varDsc = &lvaTable[lvaPSPSym];
+ assert(varDsc->lvFramePointerBased); // We always access it RBP-relative.
+ assert(!varDsc->lvMustInit); // It is never "must init".
+ varDsc->lvStkOffs = codeGen->genCallerSPtoInitialSPdelta() + lvaLclSize(lvaOutgoingArgSpaceVar);
+ }
+#endif
+
+ // The delta to be added to virtual offset to adjust it relative to frame pointer or SP
+ int delta = 0;
+
+#ifdef _TARGET_XARCH_
+ delta += REGSIZE_BYTES; // pushed PC (return address) for x86/x64
+
+ if (codeGen->doubleAlignOrFramePointerUsed())
+ {
+ delta += REGSIZE_BYTES; // pushed EBP (frame pointer)
+ }
+#endif
+
+ if (!codeGen->isFramePointerUsed())
+ {
+ // pushed registers, return address, and padding
+ delta += codeGen->genTotalFrameSize();
+ }
+#if defined(_TARGET_ARM_)
+ else
+ {
+ // We set FP to be after LR, FP
+ delta += 2 * REGSIZE_BYTES;
+ }
+#elif defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_)
+ else
+ {
+ // FP is used.
+ delta += codeGen->genTotalFrameSize() - codeGen->genSPtoFPdelta();
+ }
+#endif //_TARGET_AMD64_
+
+ unsigned lclNum;
+ for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
+ {
+ bool doAssignStkOffs = true;
+
+ // Can't be relative to EBP unless we have an EBP
+ noway_assert(!varDsc->lvFramePointerBased || codeGen->doubleAlignOrFramePointerUsed());
+
+ // Is this a non-param promoted struct field?
+ // if so then set doAssignStkOffs to false.
+ //
+ if (varDsc->lvIsStructField && !varDsc->lvIsParam)
+ {
+ LclVarDsc* parentvarDsc = &lvaTable[varDsc->lvParentLcl];
+ lvaPromotionType promotionType = lvaGetPromotionType(parentvarDsc);
+
+ if (promotionType == PROMOTION_TYPE_DEPENDENT)
+ {
+ doAssignStkOffs = false; // Assigned later in lvaAssignFrameOffsetsToPromotedStructs()
+ }
+ }
+
+ if (!varDsc->lvOnFrame)
+ {
+ if (!varDsc->lvIsParam
+#if !defined(_TARGET_AMD64_)
+ || (varDsc->lvIsRegArg
+#if defined(_TARGET_ARM_) && defined(PROFILING_SUPPORTED)
+ && compIsProfilerHookNeeded() &&
+ !lvaIsPreSpilled(lclNum, codeGen->regSet.rsMaskPreSpillRegs(false)) // We need assign stack offsets
+ // for prespilled arguments
+#endif
+ )
+#endif // !defined(_TARGET_AMD64_)
+ )
+ {
+ doAssignStkOffs = false; // Not on frame or an incomming stack arg
+ }
+ }
+
+ if (doAssignStkOffs)
+ {
+ varDsc->lvStkOffs += delta;
+
+#if DOUBLE_ALIGN
+ if (genDoubleAlign() && !codeGen->isFramePointerUsed())
+ {
+ if (varDsc->lvFramePointerBased)
+ {
+ varDsc->lvStkOffs -= delta;
+
+ // We need to re-adjust the offsets of the parameters so they are EBP
+ // relative rather than stack/frame pointer relative
+
+ varDsc->lvStkOffs += (2 * sizeof(void*)); // return address and pushed EBP
+
+ noway_assert(varDsc->lvStkOffs >= FIRST_ARG_STACK_OFFS);
+ }
+ }
+#endif
+ // On System V environments the stkOffs could be 0 for params passed in registers.
+ assert(codeGen->isFramePointerUsed() ||
+ varDsc->lvStkOffs >= 0); // Only EBP relative references can have negative offsets
+ }
+ }
+
+ assert(tmpAllFree());
+ for (TempDsc* temp = tmpListBeg(); temp != nullptr; temp = tmpListNxt(temp))
+ {
+ temp->tdAdjustTempOffs(delta);
+ }
+
+ lvaCachedGenericContextArgOffs += delta;
+
+#if FEATURE_FIXED_OUT_ARGS
+
+ if (lvaOutgoingArgSpaceVar != BAD_VAR_NUM)
+ {
+ varDsc = &lvaTable[lvaOutgoingArgSpaceVar];
+ varDsc->lvStkOffs = 0;
+ varDsc->lvFramePointerBased = false;
+ varDsc->lvMustInit = false;
+ }
+
+#endif // FEATURE_FIXED_OUT_ARGS
+}
+
+#ifdef _TARGET_ARM_
+bool Compiler::lvaIsPreSpilled(unsigned lclNum, regMaskTP preSpillMask)
+{
+ const LclVarDsc& desc = lvaTable[lclNum];
+ return desc.lvIsRegArg && (preSpillMask & genRegMask(desc.lvArgReg));
+}
+#endif // _TARGET_ARM_
+
+#ifndef LEGACY_BACKEND
+/*****************************************************************************
+ * lvaUpdateArgsWithInitialReg() : For each argument variable descriptor, update
+ * its current register with the initial register as assigned by LSRA.
+ */
+void Compiler::lvaUpdateArgsWithInitialReg()
+{
+ if (!compLSRADone)
+ {
+ return;
+ }
+
+ for (unsigned lclNum = 0; lclNum < info.compArgsCount; lclNum++)
+ {
+ LclVarDsc* varDsc = lvaTable + lclNum;
+
+ if (varDsc->lvPromotedStruct())
+ {
+ noway_assert(varDsc->lvFieldCnt == 1); // We only handle one field here
+
+ unsigned fieldVarNum = varDsc->lvFieldLclStart;
+ varDsc = lvaTable + fieldVarNum;
+ }
+
+ noway_assert(varDsc->lvIsParam);
+
+ if (varDsc->lvIsRegCandidate())
+ {
+ if (varTypeIsMultiReg(varDsc))
+ {
+ regPairNo initialRegPair = varDsc->lvArgInitRegPair;
+ varDsc->lvRegNum = genRegPairLo(initialRegPair);
+ varDsc->lvOtherReg = genRegPairHi(initialRegPair);
+ }
+ else
+ {
+ varDsc->lvRegNum = varDsc->lvArgInitReg;
+ }
+ }
+ }
+}
+#endif // !LEGACY_BACKEND
+
+/*****************************************************************************
+ * lvaAssignVirtualFrameOffsetsToArgs() : Assign virtual stack offsets to the
+ * arguments, and implicit arguments (this ptr, return buffer, generics,
+ * and varargs).
+ */
+void Compiler::lvaAssignVirtualFrameOffsetsToArgs()
+{
+ unsigned lclNum = 0;
+ int argOffs = 0;
+#ifdef UNIX_AMD64_ABI
+ int callerArgOffset = 0;
+#endif // UNIX_AMD64_ABI
+
+ /*
+ Assign stack offsets to arguments (in reverse order of passing).
+
+ This means that if we pass arguments left->right, we start at
+ the end of the list and work backwards, for right->left we start
+ with the first argument and move forward.
+
+ This is all relative to our Virtual '0'
+ */
+
+ if (Target::g_tgtArgOrder == Target::ARG_ORDER_L2R)
+ {
+ argOffs = compArgSize;
+ }
+
+ /* Update the argOffs to reflect arguments that are passed in registers */
+
+ noway_assert(codeGen->intRegState.rsCalleeRegArgCount <= MAX_REG_ARG);
+ noway_assert(compArgSize >= codeGen->intRegState.rsCalleeRegArgCount * sizeof(void*));
+
+#ifdef _TARGET_X86_
+ argOffs -= codeGen->intRegState.rsCalleeRegArgCount * sizeof(void*);
+#endif
+
+#ifndef LEGACY_BACKEND
+ // Update the arg initial register locations.
+ lvaUpdateArgsWithInitialReg();
+#endif // !LEGACY_BACKEND
+
+ /* Is there a "this" argument? */
+
+ if (!info.compIsStatic)
+ {
+ noway_assert(lclNum == info.compThisArg);
+#ifndef _TARGET_X86_
+ argOffs =
+ lvaAssignVirtualFrameOffsetToArg(lclNum, REGSIZE_BYTES, argOffs UNIX_AMD64_ABI_ONLY_ARG(&callerArgOffset));
+#endif // _TARGET_X86_
+ lclNum++;
+ }
+
+ /* if we have a hidden buffer parameter, that comes here */
+
+ if (info.compRetBuffArg != BAD_VAR_NUM)
+ {
+ noway_assert(lclNum == info.compRetBuffArg);
+ noway_assert(lvaTable[lclNum].lvIsRegArg);
+#ifndef _TARGET_X86_
+ argOffs =
+ lvaAssignVirtualFrameOffsetToArg(lclNum, REGSIZE_BYTES, argOffs UNIX_AMD64_ABI_ONLY_ARG(&callerArgOffset));
+#endif // _TARGET_X86_
+ lclNum++;
+ }
+
+#if USER_ARGS_COME_LAST
+
+ //@GENERICS: extra argument for instantiation info
+ if (info.compMethodInfo->args.callConv & CORINFO_CALLCONV_PARAMTYPE)
+ {
+ noway_assert(lclNum == (unsigned)info.compTypeCtxtArg);
+ argOffs = lvaAssignVirtualFrameOffsetToArg(lclNum++, sizeof(void*),
+ argOffs UNIX_AMD64_ABI_ONLY_ARG(&callerArgOffset));
+ }
+
+ if (info.compIsVarArgs)
+ {
+ argOffs = lvaAssignVirtualFrameOffsetToArg(lclNum++, sizeof(void*),
+ argOffs UNIX_AMD64_ABI_ONLY_ARG(&callerArgOffset));
+ }
+
+#endif // USER_ARGS_COME_LAST
+
+ CORINFO_ARG_LIST_HANDLE argLst = info.compMethodInfo->args.args;
+ unsigned argSigLen = info.compMethodInfo->args.numArgs;
+
+#ifdef _TARGET_ARM_
+ //
+ // struct_n { int; int; ... n times };
+ //
+ // Consider signature:
+ //
+ // Foo (float a,double b,float c,double d,float e,double f,float g,double h,
+ // float i,double j,float k,double l,struct_3 m) { }
+ //
+ // Basically the signature is: (all float regs full, 1 double, struct_3);
+ //
+ // The double argument occurs before pre spill in the argument iteration and
+ // computes an argOffset of 0. struct_3 offset becomes 8. This is wrong.
+ // Because struct_3 is prespilled and double occurs after prespill.
+ // The correct offsets are double = 16 (aligned stk), struct_3 = 0..12,
+ // Offset 12 will be skipped for double alignment of double.
+ //
+ // Another example is (struct_2, all float regs full, double, struct_2);
+ // Here, notice the order is similarly messed up because of 2 pre-spilled
+ // struct_2.
+ //
+ // Succinctly,
+ // ARG_INDEX(i) > ARG_INDEX(j) DOES NOT IMPLY |ARG_OFFSET(i)| > |ARG_OFFSET(j)|
+ //
+ // Therefore, we'll do a two pass offset calculation, one that considers pre-spill
+ // and the next, stack args.
+ //
+
+ unsigned argLcls = 0;
+
+ // Take care of pre spill registers first.
+ regMaskTP preSpillMask = codeGen->regSet.rsMaskPreSpillRegs(false);
+ regMaskTP tempMask = RBM_NONE;
+ for (unsigned i = 0, preSpillLclNum = lclNum; i < argSigLen; ++i, ++preSpillLclNum)
+ {
+ if (lvaIsPreSpilled(preSpillLclNum, preSpillMask))
+ {
+ unsigned argSize = eeGetArgSize(argLst, &info.compMethodInfo->args);
+ argOffs = lvaAssignVirtualFrameOffsetToArg(preSpillLclNum, argSize, argOffs);
+ argLcls++;
+
+ // Early out if we can. If size is 8 and base reg is 2, then the mask is 0x1100
+ tempMask |= ((((1 << (roundUp(argSize) / REGSIZE_BYTES))) - 1) << lvaTable[preSpillLclNum].lvArgReg);
+ if (tempMask == preSpillMask)
+ {
+ // We won't encounter more pre-spilled registers,
+ // so don't bother iterating further.
+ break;
+ }
+ }
+ argLst = info.compCompHnd->getArgNext(argLst);
+ }
+
+ // Take care of non pre-spilled stack arguments.
+ argLst = info.compMethodInfo->args.args;
+ for (unsigned i = 0, stkLclNum = lclNum; i < argSigLen; ++i, ++stkLclNum)
+ {
+ if (!lvaIsPreSpilled(stkLclNum, preSpillMask))
+ {
+ argOffs =
+ lvaAssignVirtualFrameOffsetToArg(stkLclNum, eeGetArgSize(argLst, &info.compMethodInfo->args), argOffs);
+ argLcls++;
+ }
+ argLst = info.compCompHnd->getArgNext(argLst);
+ }
+
+ lclNum += argLcls;
+#else // !_TARGET_ARM_
+ for (unsigned i = 0; i < argSigLen; i++)
+ {
+ unsigned argumentSize = eeGetArgSize(argLst, &info.compMethodInfo->args);
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ // On the stack frame the homed arg always takes a full number of slots
+ // for proper stack alignment. Make sure the real struct size is properly rounded up.
+ argumentSize = (unsigned)roundUp(argumentSize, TARGET_POINTER_SIZE);
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+ argOffs =
+ lvaAssignVirtualFrameOffsetToArg(lclNum++, argumentSize, argOffs UNIX_AMD64_ABI_ONLY_ARG(&callerArgOffset));
+ argLst = info.compCompHnd->getArgNext(argLst);
+ }
+#endif // !_TARGET_ARM_
+
+#if !USER_ARGS_COME_LAST
+
+ //@GENERICS: extra argument for instantiation info
+ if (info.compMethodInfo->args.callConv & CORINFO_CALLCONV_PARAMTYPE)
+ {
+ noway_assert(lclNum == (unsigned)info.compTypeCtxtArg);
+ argOffs = lvaAssignVirtualFrameOffsetToArg(lclNum++, sizeof(void*),
+ argOffs UNIX_AMD64_ABI_ONLY_ARG(&callerArgOffset));
+ }
+
+ if (info.compIsVarArgs)
+ {
+ argOffs = lvaAssignVirtualFrameOffsetToArg(lclNum++, sizeof(void*),
+ argOffs UNIX_AMD64_ABI_ONLY_ARG(&callerArgOffset));
+ }
+
+#endif // USER_ARGS_COME_LAST
+}
+
+#ifdef UNIX_AMD64_ABI
+//
+// lvaAssignVirtualFrameOffsetToArg() : Assign virtual stack offsets to an
+// individual argument, and return the offset for the next argument.
+// Note: This method only calculates the initial offset of the stack passed/spilled arguments
+// (if any - the RA might decide to spill(home on the stack) register passed arguments, if rarely used.)
+// The final offset is calculated in lvaFixVirtualFrameOffsets method. It accounts for FP existance,
+// ret address slot, stack frame padding, alloca instructions, etc.
+// Note: This is the implementation for UNIX_AMD64 System V platforms.
+//
+int Compiler::lvaAssignVirtualFrameOffsetToArg(unsigned lclNum,
+ unsigned argSize,
+ int argOffs UNIX_AMD64_ABI_ONLY_ARG(int* callerArgOffset))
+{
+ noway_assert(lclNum < info.compArgsCount);
+ noway_assert(argSize);
+
+ if (Target::g_tgtArgOrder == Target::ARG_ORDER_L2R)
+ argOffs -= argSize;
+
+ unsigned fieldVarNum = BAD_VAR_NUM;
+
+ noway_assert(lclNum < lvaCount);
+ LclVarDsc* varDsc = lvaTable + lclNum;
+
+ if (varDsc->lvPromotedStruct())
+ {
+ noway_assert(varDsc->lvFieldCnt == 1); // We only handle one field here
+ fieldVarNum = varDsc->lvFieldLclStart;
+
+ lvaPromotionType promotionType = lvaGetPromotionType(varDsc);
+
+ if (promotionType == PROMOTION_TYPE_INDEPENDENT)
+ {
+ lclNum = fieldVarNum;
+ noway_assert(lclNum < lvaCount);
+ varDsc = lvaTable + lclNum;
+ assert(varDsc->lvIsStructField);
+ }
+ }
+
+ noway_assert(varDsc->lvIsParam);
+
+ if (varDsc->lvIsRegArg)
+ {
+ // Argument is passed in a register, don't count it
+ // when updating the current offset on the stack.
+
+ if (varDsc->lvOnFrame)
+ {
+ // The offset for args needs to be set only for the stack homed arguments for System V.
+ varDsc->lvStkOffs = argOffs;
+ }
+ else
+ {
+ varDsc->lvStkOffs = 0;
+ }
+ }
+ else
+ {
+ // For Windows AMD64 there are 4 slots for the register passed arguments on the top of the caller's stack.
+ // This is where they are always homed. So, they can be accessed with positive offset.
+ // On System V platforms, if the RA decides to home a register passed arg on the stack, it creates a stack
+ // location on the callee stack (like any other local var.) In such a case, the register passed, stack homed
+ // arguments are accessed using negative offsets and the stack passed arguments are accessed using positive
+ // offset (from the caller's stack.)
+ // For System V platforms if there is no frame pointer the caller stack parameter offset should include the
+ // callee allocated space. If frame register is used, the callee allocated space should not be included for
+ // accessing the caller stack parameters. The last two requirements are met in lvaFixVirtualFrameOffsets
+ // method, which fixes the offsets, based on frame pointer existence, existence of alloca instructions, ret
+ // address pushed, ets.
+
+ varDsc->lvStkOffs = *callerArgOffset;
+ // Structs passed on stack could be of size less than TARGET_POINTER_SIZE.
+ // Make sure they get at least TARGET_POINTER_SIZE on the stack - this is required for alignment.
+ if (argSize > TARGET_POINTER_SIZE)
+ {
+ *callerArgOffset += (int)roundUp(argSize, TARGET_POINTER_SIZE);
+ }
+ else
+ {
+ *callerArgOffset += TARGET_POINTER_SIZE;
+ }
+ }
+
+ // For struct promoted parameters we need to set the offsets for both LclVars.
+ //
+ // For a dependent promoted struct we also assign the struct fields stack offset
+ if (varDsc->lvPromotedStruct())
+ {
+ lvaPromotionType promotionType = lvaGetPromotionType(varDsc);
+
+ if (promotionType == PROMOTION_TYPE_DEPENDENT)
+ {
+ noway_assert(varDsc->lvFieldCnt == 1); // We only handle one field here
+
+ assert(fieldVarNum == varDsc->lvFieldLclStart);
+ lvaTable[fieldVarNum].lvStkOffs = varDsc->lvStkOffs;
+ }
+ }
+ // For an independent promoted struct field we also assign the parent struct stack offset
+ else if (varDsc->lvIsStructField)
+ {
+ noway_assert(varDsc->lvParentLcl < lvaCount);
+ lvaTable[varDsc->lvParentLcl].lvStkOffs = varDsc->lvStkOffs;
+ }
+
+ if (Target::g_tgtArgOrder == Target::ARG_ORDER_R2L && !varDsc->lvIsRegArg)
+ argOffs += argSize;
+
+ return argOffs;
+}
+
+#else // !UNIX_AMD64_ABI
+
+//
+// lvaAssignVirtualFrameOffsetToArg() : Assign virtual stack offsets to an
+// individual argument, and return the offset for the next argument.
+// Note: This method only calculates the initial offset of the stack passed/spilled arguments
+// (if any - the RA might decide to spill(home on the stack) register passed arguments, if rarely used.)
+// The final offset is calculated in lvaFixVirtualFrameOffsets method. It accounts for FP existance,
+// ret address slot, stack frame padding, alloca instructions, etc.
+// Note: This implementation for all the platforms but UNIX_AMD64 OSs (System V 64 bit.)
+int Compiler::lvaAssignVirtualFrameOffsetToArg(unsigned lclNum,
+ unsigned argSize,
+ int argOffs UNIX_AMD64_ABI_ONLY_ARG(int* callerArgOffset))
+{
+ noway_assert(lclNum < info.compArgsCount);
+ noway_assert(argSize);
+
+ if (Target::g_tgtArgOrder == Target::ARG_ORDER_L2R)
+ {
+ argOffs -= argSize;
+ }
+
+ unsigned fieldVarNum = BAD_VAR_NUM;
+
+ noway_assert(lclNum < lvaCount);
+ LclVarDsc* varDsc = lvaTable + lclNum;
+
+ if (varDsc->lvPromotedStruct())
+ {
+ noway_assert(varDsc->lvFieldCnt == 1); // We only handle one field here
+ fieldVarNum = varDsc->lvFieldLclStart;
+
+ lvaPromotionType promotionType = lvaGetPromotionType(varDsc);
+
+ if (promotionType == PROMOTION_TYPE_INDEPENDENT)
+ {
+ lclNum = fieldVarNum;
+ noway_assert(lclNum < lvaCount);
+ varDsc = lvaTable + lclNum;
+ assert(varDsc->lvIsStructField);
+ }
+ }
+
+ noway_assert(varDsc->lvIsParam);
+
+ if (varDsc->lvIsRegArg)
+ {
+ /* Argument is passed in a register, don't count it
+ * when updating the current offset on the stack */
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if !defined(_TARGET_ARMARCH_)
+#if DEBUG
+ // TODO: Remove this noway_assert and replace occurrences of sizeof(void *) with argSize
+ // Also investigate why we are incrementing argOffs for X86 as this seems incorrect
+ //
+ noway_assert(argSize == sizeof(void*));
+#endif // DEBUG
+#endif
+
+#if defined(_TARGET_X86_)
+ argOffs += sizeof(void*);
+#elif defined(_TARGET_AMD64_)
+ // Register arguments on AMD64 also takes stack space. (in the backing store)
+ varDsc->lvStkOffs = argOffs;
+ argOffs += sizeof(void*);
+#elif defined(_TARGET_ARM64_)
+// Register arguments on ARM64 only take stack space when they have a frame home.
+#elif defined(_TARGET_ARM_)
+ // On ARM we spill the registers in codeGen->regSet.rsMaskPreSpillRegArg
+ // in the prolog, so we have to fill in lvStkOffs here
+ //
+ regMaskTP regMask = genRegMask(varDsc->lvArgReg);
+ if (codeGen->regSet.rsMaskPreSpillRegArg & regMask)
+ {
+ // Signature: void foo(struct_8, int, struct_4)
+ // ------- CALLER SP -------
+ // r3 struct_4
+ // r2 int - not prespilled, but added for alignment. argOffs should skip this.
+ // r1 struct_8
+ // r0 struct_8
+ // -------------------------
+ // If we added alignment we need to fix argOffs for all registers above alignment.
+ if (codeGen->regSet.rsMaskPreSpillAlign != RBM_NONE)
+ {
+ assert(genCountBits(codeGen->regSet.rsMaskPreSpillAlign) == 1);
+ // Is register beyond the alignment pos?
+ if (regMask > codeGen->regSet.rsMaskPreSpillAlign)
+ {
+ // Increment argOffs just once for the _first_ register after alignment pos
+ // in the prespill mask.
+ if (!BitsBetween(codeGen->regSet.rsMaskPreSpillRegArg, regMask,
+ codeGen->regSet.rsMaskPreSpillAlign))
+ {
+ argOffs += TARGET_POINTER_SIZE;
+ }
+ }
+ }
+
+ switch (varDsc->lvType)
+ {
+ case TYP_STRUCT:
+ if (!varDsc->lvStructDoubleAlign)
+ {
+ break;
+ }
+ __fallthrough;
+
+ case TYP_DOUBLE:
+ case TYP_LONG:
+ {
+ //
+ // Let's assign offsets to arg1, a double in r2. argOffs has to be 4 not 8.
+ //
+ // ------- CALLER SP -------
+ // r3
+ // r2 double -- argOffs = 4, but it doesn't need to be skipped, because there is no skipping.
+ // r1 VACookie -- argOffs = 0
+ // -------------------------
+ //
+ // Consider argOffs as if it accounts for number of prespilled registers before the current
+ // register. In the above example, for r2, it is r1 that is prespilled, but since r1 is
+ // accounted for by argOffs being 4, there should have been no skipping. Instead, if we didn't
+ // assign r1 to any variable, then argOffs would still be 0 which implies it is not accounting
+ // for r1, equivalently r1 is skipped.
+ //
+ // If prevRegsSize is unaccounted for by a corresponding argOffs, we must have skipped a register.
+ int prevRegsSize =
+ genCountBits(codeGen->regSet.rsMaskPreSpillRegArg & (regMask - 1)) * TARGET_POINTER_SIZE;
+ if (argOffs < prevRegsSize)
+ {
+ // We must align up the argOffset to a multiple of 8 to account for skipped registers.
+ argOffs = roundUp(argOffs, 2 * TARGET_POINTER_SIZE);
+ }
+ // We should've skipped only a single register.
+ assert(argOffs == prevRegsSize);
+ }
+ break;
+
+ default:
+ // No alignment of argOffs required
+ break;
+ }
+ varDsc->lvStkOffs = argOffs;
+ argOffs += argSize;
+ }
+#else // _TARGET_*
+#error Unsupported or unset target architecture
+#endif // _TARGET_*
+ }
+ else
+ {
+#if defined(_TARGET_ARM_)
+ // Dev11 Bug 42817: incorrect codegen for DrawFlatCheckBox causes A/V in WinForms
+ //
+ // Here we have method with a signature (int a1, struct a2, struct a3, int a4, int a5).
+ // Struct parameter 'a2' is 16-bytes with no alignment requirements;
+ // it uses r1,r2,r3 and [OutArg+0] when passed.
+ // Struct parameter 'a3' is 16-bytes that is required to be double aligned;
+ // the caller skips [OutArg+4] and starts the argument at [OutArg+8].
+ // Thus the caller generates the correct code to pass the arguments.
+ // When generating code to receive the arguments we set codeGen->regSet.rsMaskPreSpillRegArg to [r1,r2,r3]
+ // and spill these three registers as the first instruction in the prolog.
+ // Then when we layout the arguments' stack offsets we have an argOffs 0 which
+ // points at the location that we spilled r1 into the stack. For this first
+ // struct we take the lvIsRegArg path above with "codeGen->regSet.rsMaskPreSpillRegArg &" matching.
+ // Next when we calculate the argOffs for the second 16-byte struct we have an argOffs
+ // of 16, which appears to be aligned properly so we don't skip a stack slot.
+ //
+ // To fix this we must recover the actual OutArg offset by subtracting off the
+ // sizeof of the PreSpill register args.
+ // Then we align this offset to a multiple of 8 and add back the sizeof
+ // of the PreSpill register args.
+ //
+ // Dev11 Bug 71767: failure of assert(sizeofPreSpillRegArgs <= argOffs)
+ //
+ // We have a method with 'this' passed in r0, RetBuf arg in r1, VarArgs cookie
+ // in r2. The first user arg is a 144 byte struct with double alignment required,
+ // r3 is skipped, and the struct is passed on the stack. However, 'r3' is added
+ // to the codeGen->regSet.rsMaskPreSpillRegArg mask by the VarArgs cookie code, since we need to
+ // home all the potential varargs arguments in registers, even if we don't have
+ // signature type information for the variadic arguments. However, due to alignment,
+ // we have skipped a register that doesn't have a corresponding symbol. Make up
+ // for that by increasing argOffs here.
+ //
+
+ int sizeofPreSpillRegArgs = genCountBits(codeGen->regSet.rsMaskPreSpillRegs(true)) * REGSIZE_BYTES;
+
+ if (argOffs < sizeofPreSpillRegArgs)
+ {
+ // This can only happen if we skipped the last register spot because current stk arg
+ // is a struct requiring alignment or a pre-spill alignment was required because the
+ // first reg arg needed alignment.
+ //
+ // Example 1: First Stk Argument requiring alignment in vararg case (same as above comment.)
+ // Signature (int a0, int a1, int a2, struct {long} a3, ...)
+ //
+ // stk arg a3 --> argOffs here will be 12 (r0-r2) but pre-spill will be 16.
+ // ---- Caller SP ----
+ // r3 --> Stack slot is skipped in this case.
+ // r2 int a2
+ // r1 int a1
+ // r0 int a0
+ //
+ // Example 2: First Reg Argument requiring alignment in no-vararg case.
+ // Signature (struct {long} a0, struct {int} a1, int a2, int a3)
+ //
+ // stk arg --> argOffs here will be 12 {r0-r2} but pre-spill will be 16.
+ // ---- Caller SP ----
+ // r3 int a2 --> pushed (not pre-spilled) for alignment of a0 by lvaInitUserArgs.
+ // r2 struct { int } a1
+ // r0-r1 struct { long } a0
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef PROFILING_SUPPORTED
+ // On Arm under profiler, r0-r3 are always prespilled on stack.
+ // It is possible to have methods that accept only HFAs as parameters e.g. Signature(struct hfa1, struct
+ // hfa2), in which case hfa1 and hfa2 will be en-registered in co-processor registers and will have an
+ // argument offset less than size of preSpill.
+ //
+ // For this reason the following conditions are asserted when not under profiler.
+ if (!compIsProfilerHookNeeded())
+#endif
+ {
+ bool cond = ((info.compIsVarArgs || opts.compUseSoftFP) &&
+ // Does cur stk arg require double alignment?
+ ((varDsc->lvType == TYP_STRUCT && varDsc->lvStructDoubleAlign) ||
+ (varDsc->lvType == TYP_DOUBLE) || (varDsc->lvType == TYP_LONG))) ||
+ // Did first reg arg require alignment?
+ (codeGen->regSet.rsMaskPreSpillAlign & genRegMask(REG_ARG_LAST));
+
+ noway_assert(cond);
+ noway_assert(sizeofPreSpillRegArgs <=
+ argOffs + TARGET_POINTER_SIZE); // at most one register of alignment
+ }
+ argOffs = sizeofPreSpillRegArgs;
+ }
+
+ noway_assert(argOffs >= sizeofPreSpillRegArgs);
+ int argOffsWithoutPreSpillRegArgs = argOffs - sizeofPreSpillRegArgs;
+
+ switch (varDsc->lvType)
+ {
+ case TYP_STRUCT:
+ if (!varDsc->lvStructDoubleAlign)
+ break;
+
+ __fallthrough;
+
+ case TYP_DOUBLE:
+ case TYP_LONG:
+ // We must align up the argOffset to a multiple of 8
+ argOffs = roundUp(argOffsWithoutPreSpillRegArgs, 2 * TARGET_POINTER_SIZE) + sizeofPreSpillRegArgs;
+ break;
+
+ default:
+ // No alignment of argOffs required
+ break;
+ }
+#endif // _TARGET_ARM_
+
+ varDsc->lvStkOffs = argOffs;
+ }
+
+ // For struct promoted parameters we need to set the offsets for both LclVars.
+ //
+ // For a dependent promoted struct we also assign the struct fields stack offset
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if !defined(_TARGET_64BIT_)
+ if ((varDsc->TypeGet() == TYP_LONG) && varDsc->lvPromoted)
+ {
+ noway_assert(varDsc->lvFieldCnt == 2);
+ fieldVarNum = varDsc->lvFieldLclStart;
+ lvaTable[fieldVarNum].lvStkOffs = varDsc->lvStkOffs;
+ lvaTable[fieldVarNum + 1].lvStkOffs = varDsc->lvStkOffs + genTypeSize(TYP_INT);
+ }
+ else
+#endif // !defined(_TARGET_64BIT_)
+ if (varDsc->lvPromotedStruct())
+ {
+ lvaPromotionType promotionType = lvaGetPromotionType(varDsc);
+
+ if (promotionType == PROMOTION_TYPE_DEPENDENT)
+ {
+ noway_assert(varDsc->lvFieldCnt == 1); // We only handle one field here
+
+ assert(fieldVarNum == varDsc->lvFieldLclStart);
+ lvaTable[fieldVarNum].lvStkOffs = varDsc->lvStkOffs;
+ }
+ }
+ // For an independent promoted struct field we also assign the parent struct stack offset
+ else if (varDsc->lvIsStructField)
+ {
+ noway_assert(varDsc->lvParentLcl < lvaCount);
+ lvaTable[varDsc->lvParentLcl].lvStkOffs = varDsc->lvStkOffs;
+ }
+
+ if (Target::g_tgtArgOrder == Target::ARG_ORDER_R2L && !varDsc->lvIsRegArg)
+ {
+ argOffs += argSize;
+ }
+
+ return argOffs;
+}
+#endif // !UNIX_AMD64_ABI
+
+/*****************************************************************************
+ * lvaAssignVirtualFrameOffsetsToLocals() : Assign virtual stack offsets to
+ * locals, temps, and anything else. These will all be negative offsets
+ * (stack grows down) relative to the virtual '0'/return address
+ */
+void Compiler::lvaAssignVirtualFrameOffsetsToLocals()
+{
+ int stkOffs = 0;
+ // codeGen->isFramePointerUsed is set in regalloc phase. Initialize it to a guess for pre-regalloc layout.
+ if (lvaDoneFrameLayout <= PRE_REGALLOC_FRAME_LAYOUT)
+ {
+ codeGen->setFramePointerUsed(codeGen->isFramePointerRequired());
+ }
+
+#ifdef _TARGET_XARCH_
+ // On x86/amd64, the return address has already been pushed by the call instruction in the caller.
+ stkOffs -= sizeof(void*); // return address;
+
+ // TODO-AMD64-CQ: for X64 eventually this should be pushed with all the other
+ // calleeregs. When you fix this, you'll also need to fix
+ // the assert at the bottom of this method
+ if (codeGen->doubleAlignOrFramePointerUsed())
+ {
+ stkOffs -= REGSIZE_BYTES;
+ }
+#endif //_TARGET_XARCH_
+
+ int preSpillSize = 0;
+ bool mustDoubleAlign = false;
+
+#ifdef _TARGET_ARM_
+ mustDoubleAlign = true;
+ preSpillSize = genCountBits(codeGen->regSet.rsMaskPreSpillRegs(true)) * REGSIZE_BYTES;
+#else // !_TARGET_ARM_
+#if DOUBLE_ALIGN
+ if (genDoubleAlign())
+ {
+ mustDoubleAlign = true; // X86 only
+ }
+#endif
+#endif // !_TARGET_ARM_
+
+#ifdef _TARGET_ARM64_
+ // If the frame pointer is used, then we'll save FP/LR at the bottom of the stack.
+ // Otherwise, we won't store FP, and we'll store LR at the top, with the other callee-save
+ // registers (if any).
+
+ int initialStkOffs = 0;
+ if (info.compIsVarArgs)
+ {
+ // For varargs we always save all of the integer register arguments
+ // so that they are contiguous with the incoming stack arguments.
+ initialStkOffs = MAX_REG_ARG * REGSIZE_BYTES;
+ stkOffs -= initialStkOffs;
+ }
+
+ if (isFramePointerUsed())
+ {
+ // Subtract off FP and LR.
+ assert(compCalleeRegsPushed >= 2);
+ stkOffs -= (compCalleeRegsPushed - 2) * REGSIZE_BYTES;
+ }
+ else
+ {
+ stkOffs -= compCalleeRegsPushed * REGSIZE_BYTES;
+ }
+
+#else // !_TARGET_ARM64_
+ stkOffs -= compCalleeRegsPushed * REGSIZE_BYTES;
+#endif // !_TARGET_ARM64_
+
+ compLclFrameSize = 0;
+
+#ifdef _TARGET_AMD64_
+ // In case of Amd64 compCalleeRegsPushed includes float regs (Xmm6-xmm15) that
+ // need to be pushed. But Amd64 doesn't support push/pop of xmm registers.
+ // Instead we need to allocate space for them on the stack and save them in prolog.
+ // Therefore, we consider xmm registers being saved while computing stack offsets
+ // but space for xmm registers is considered part of compLclFrameSize.
+ // Notes
+ // 1) We need to save the entire 128-bits of xmm register to stack, since amd64
+ // prolog unwind codes allow encoding of an instruction that stores the entire xmm reg
+ // at an offset relative to SP
+ // 2) We adjust frame size so that SP is aligned at 16-bytes after pushing integer registers.
+ // This means while saving the first xmm register to its allocated stack location we might
+ // have to skip 8-bytes. The reason for padding is to use efficient "movaps" to save/restore
+ // xmm registers to/from stack to match Jit64 codegen. Without the aligning on 16-byte
+ // boundary we would have to use movups when offset turns out unaligned. Movaps is more
+ // performant than movups.
+ unsigned calleeFPRegsSavedSize = genCountBits(compCalleeFPRegsSavedMask) * XMM_REGSIZE_BYTES;
+ if (calleeFPRegsSavedSize > 0 && ((stkOffs % XMM_REGSIZE_BYTES) != 0))
+ {
+ // Take care of alignment
+ int alignPad = (int)AlignmentPad((unsigned)-stkOffs, XMM_REGSIZE_BYTES);
+ stkOffs -= alignPad;
+ lvaIncrementFrameSize(alignPad);
+ }
+
+ stkOffs -= calleeFPRegsSavedSize;
+ lvaIncrementFrameSize(calleeFPRegsSavedSize);
+
+ // Quirk for VS debug-launch scenario to work
+ if (compVSQuirkStackPaddingNeeded > 0)
+ {
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nAdding VS quirk stack padding of %d bytes between save-reg area and locals\n",
+ compVSQuirkStackPaddingNeeded);
+ }
+#endif // DEBUG
+
+ stkOffs -= compVSQuirkStackPaddingNeeded;
+ lvaIncrementFrameSize(compVSQuirkStackPaddingNeeded);
+ }
+#endif //_TARGET_AMD64_
+
+#if FEATURE_EH_FUNCLETS && defined(_TARGET_ARMARCH_)
+ if (ehNeedsPSPSym())
+ {
+ // On ARM/ARM64, if we need a PSPSym, allocate it first, before anything else, including
+ // padding (so we can avoid computing the same padding in the funclet
+ // frame). Note that there is no special padding requirement for the PSPSym.
+ noway_assert(codeGen->isFramePointerUsed()); // We need an explicit frame pointer
+ assert(lvaPSPSym != BAD_VAR_NUM); // We should have created the PSPSym variable
+ stkOffs = lvaAllocLocalAndSetVirtualOffset(lvaPSPSym, TARGET_POINTER_SIZE, stkOffs);
+ }
+#endif // FEATURE_EH_FUNCLETS && defined(_TARGET_ARMARCH_)
+
+ if (mustDoubleAlign)
+ {
+ if (lvaDoneFrameLayout != FINAL_FRAME_LAYOUT)
+ {
+ // Allocate a pointer sized stack slot, since we may need to double align here
+ // when lvaDoneFrameLayout == FINAL_FRAME_LAYOUT
+ //
+ lvaIncrementFrameSize(TARGET_POINTER_SIZE);
+ stkOffs -= TARGET_POINTER_SIZE;
+
+ // If we have any TYP_LONG, TYP_DOUBLE or double aligned structs
+ // then we need to allocate a second pointer sized stack slot,
+ // since we may need to double align that LclVar when we see it
+ // in the loop below. We will just always do this so that the
+ // offsets that we calculate for the stack frame will always
+ // be greater (or equal) to what they can be in the final layout.
+ //
+ lvaIncrementFrameSize(TARGET_POINTER_SIZE);
+ stkOffs -= TARGET_POINTER_SIZE;
+ }
+ else // FINAL_FRAME_LAYOUT
+ {
+ if (((stkOffs + preSpillSize) % (2 * TARGET_POINTER_SIZE)) != 0)
+ {
+ lvaIncrementFrameSize(TARGET_POINTER_SIZE);
+ stkOffs -= TARGET_POINTER_SIZE;
+ }
+ // We should now have a double-aligned (stkOffs+preSpillSize)
+ noway_assert(((stkOffs + preSpillSize) % (2 * TARGET_POINTER_SIZE)) == 0);
+ }
+ }
+
+ if (lvaMonAcquired != BAD_VAR_NUM)
+ {
+ // This var must go first, in what is called the 'frame header' for EnC so that it is
+ // preserved when remapping occurs. See vm\eetwain.cpp for detailed comment specifying frame
+ // layout requirements for EnC to work.
+ stkOffs = lvaAllocLocalAndSetVirtualOffset(lvaMonAcquired, lvaLclSize(lvaMonAcquired), stkOffs);
+ }
+
+ if (opts.compNeedSecurityCheck)
+ {
+#ifdef JIT32_GCENCODER
+ /* This can't work without an explicit frame, so make sure */
+ noway_assert(codeGen->isFramePointerUsed());
+#endif
+ stkOffs = lvaAllocLocalAndSetVirtualOffset(lvaSecurityObject, TARGET_POINTER_SIZE, stkOffs);
+ }
+
+ if (compLocallocUsed)
+ {
+#ifdef JIT32_GCENCODER
+ noway_assert(codeGen->isFramePointerUsed()); // else offsets of locals of frameless methods will be incorrect
+#endif
+ stkOffs = lvaAllocLocalAndSetVirtualOffset(lvaLocAllocSPvar, TARGET_POINTER_SIZE, stkOffs);
+ }
+
+ if (lvaReportParamTypeArg())
+ {
+#ifdef JIT32_GCENCODER
+ noway_assert(codeGen->isFramePointerUsed());
+#endif
+ // For CORINFO_CALLCONV_PARAMTYPE (if needed)
+ lvaIncrementFrameSize(TARGET_POINTER_SIZE);
+ stkOffs -= TARGET_POINTER_SIZE;
+ lvaCachedGenericContextArgOffs = stkOffs;
+ }
+#ifndef JIT32_GCENCODER
+ else if (lvaKeepAliveAndReportThis())
+ {
+ // When "this" is also used as generic context arg.
+ lvaIncrementFrameSize(TARGET_POINTER_SIZE);
+ stkOffs -= TARGET_POINTER_SIZE;
+ lvaCachedGenericContextArgOffs = stkOffs;
+ }
+#endif
+
+#if !FEATURE_EH_FUNCLETS
+ /* If we need space for slots for shadow SP, reserve it now */
+ if (ehNeedsShadowSPslots())
+ {
+ noway_assert(codeGen->isFramePointerUsed()); // else offsets of locals of frameless methods will be incorrect
+ if (!lvaReportParamTypeArg())
+ {
+#ifndef JIT32_GCENCODER
+ if (!lvaKeepAliveAndReportThis())
+#endif
+ {
+ // In order to keep the gc info encoding smaller, the VM assumes that all methods with EH
+ // have also saved space for a ParamTypeArg, so we need to do that here
+ lvaIncrementFrameSize(TARGET_POINTER_SIZE);
+ stkOffs -= TARGET_POINTER_SIZE;
+ }
+ }
+ stkOffs = lvaAllocLocalAndSetVirtualOffset(lvaShadowSPslotsVar, lvaLclSize(lvaShadowSPslotsVar), stkOffs);
+ }
+#endif // !FEATURE_EH_FUNCLETS
+
+ if (compGSReorderStackLayout)
+ {
+ assert(getNeedsGSSecurityCookie());
+ stkOffs = lvaAllocLocalAndSetVirtualOffset(lvaGSSecurityCookie, lvaLclSize(lvaGSSecurityCookie), stkOffs);
+ }
+
+ /*
+ If we're supposed to track lifetimes of pointer temps, we'll
+ assign frame offsets in the following order:
+
+ non-pointer local variables (also untracked pointer variables)
+ pointer local variables
+ pointer temps
+ non-pointer temps
+ */
+
+ enum Allocation
+ {
+ ALLOC_NON_PTRS = 0x1, // assign offsets to non-ptr
+ ALLOC_PTRS = 0x2, // Second pass, assign offsets to tracked ptrs
+ ALLOC_UNSAFE_BUFFERS = 0x4,
+ ALLOC_UNSAFE_BUFFERS_WITH_PTRS = 0x8
+ };
+ UINT alloc_order[5];
+
+ unsigned int cur = 0;
+
+ if (compGSReorderStackLayout)
+ {
+ noway_assert(getNeedsGSSecurityCookie());
+
+ if (codeGen->isFramePointerUsed())
+ {
+ alloc_order[cur++] = ALLOC_UNSAFE_BUFFERS;
+ alloc_order[cur++] = ALLOC_UNSAFE_BUFFERS_WITH_PTRS;
+ }
+ }
+
+ bool tempsAllocated = false;
+
+#ifdef _TARGET_ARM_
+ // On ARM, SP based offsets use smaller encoding. Since temps are relatively
+ // rarer than lcl usage, allocate them farther from SP.
+ if (!opts.MinOpts() && !compLocallocUsed)
+#else
+ if (lvaTempsHaveLargerOffsetThanVars() && !codeGen->isFramePointerUsed())
+#endif
+ {
+ // Because we want the temps to have a larger offset than locals
+ // and we're not using a frame pointer, we have to place the temps
+ // above the vars. Otherwise we place them after the vars (at the
+ // bottom of the frame).
+ noway_assert(!tempsAllocated);
+ stkOffs = lvaAllocateTemps(stkOffs, mustDoubleAlign);
+ tempsAllocated = true;
+ }
+
+ alloc_order[cur++] = ALLOC_NON_PTRS;
+
+ if (opts.compDbgEnC)
+ {
+ /* We will use just one pass, and assign offsets to all variables */
+ alloc_order[cur - 1] |= ALLOC_PTRS;
+ noway_assert(compGSReorderStackLayout == false);
+ }
+ else
+ {
+ alloc_order[cur++] = ALLOC_PTRS;
+ }
+
+ if (!codeGen->isFramePointerUsed() && compGSReorderStackLayout)
+ {
+ alloc_order[cur++] = ALLOC_UNSAFE_BUFFERS_WITH_PTRS;
+ alloc_order[cur++] = ALLOC_UNSAFE_BUFFERS;
+ }
+
+ alloc_order[cur] = 0;
+
+ noway_assert(cur < sizeof(alloc_order) / sizeof(alloc_order[0]));
+
+ // Force first pass to happen
+ UINT assignMore = 0xFFFFFFFF;
+ bool have_LclVarDoubleAlign = false;
+
+ for (cur = 0; alloc_order[cur]; cur++)
+ {
+ if ((assignMore & alloc_order[cur]) == 0)
+ {
+ continue;
+ }
+
+ assignMore = 0;
+
+ unsigned lclNum;
+ LclVarDsc* varDsc;
+
+ for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
+ {
+ /* Ignore field locals of the promotion type PROMOTION_TYPE_FIELD_DEPENDENT.
+ In other words, we will not calculate the "base" address of the struct local if
+ the promotion type is PROMOTION_TYPE_FIELD_DEPENDENT.
+ */
+ if (lvaIsFieldOfDependentlyPromotedStruct(varDsc))
+ {
+ continue;
+ }
+
+#if FEATURE_FIXED_OUT_ARGS
+ // The scratch mem is used for the outgoing arguments, and it must be absolutely last
+ if (lclNum == lvaOutgoingArgSpaceVar)
+ {
+ continue;
+ }
+#endif
+
+ bool allocateOnFrame = varDsc->lvOnFrame;
+
+ if (varDsc->lvRegister && (lvaDoneFrameLayout == REGALLOC_FRAME_LAYOUT) &&
+ ((varDsc->TypeGet() != TYP_LONG) || (varDsc->lvOtherReg != REG_STK)))
+ {
+ allocateOnFrame = false;
+ }
+
+ /* Ignore variables that are not on the stack frame */
+
+ if (!allocateOnFrame)
+ {
+ /* For EnC, all variables have to be allocated space on the
+ stack, even though they may actually be enregistered. This
+ way, the frame layout can be directly inferred from the
+ locals-sig.
+ */
+
+ if (!opts.compDbgEnC)
+ {
+ continue;
+ }
+ else if (lclNum >= info.compLocalsCount)
+ { // ignore temps for EnC
+ continue;
+ }
+ }
+ else if (lvaGSSecurityCookie == lclNum && getNeedsGSSecurityCookie())
+ {
+ continue; // This is allocated outside of this loop.
+ }
+
+ // These need to be located as the very first variables (highest memory address)
+ // and so they have already been assigned an offset
+ if (
+#if FEATURE_EH_FUNCLETS
+ lclNum == lvaPSPSym ||
+#else
+ lclNum == lvaShadowSPslotsVar ||
+#endif // FEATURE_EH_FUNCLETS
+ lclNum == lvaLocAllocSPvar || lclNum == lvaSecurityObject)
+ {
+ assert(varDsc->lvStkOffs != BAD_STK_OFFS);
+ continue;
+ }
+
+ if (lclNum == lvaMonAcquired)
+ {
+ continue;
+ }
+
+ // This should be low on the stack. Hence, it will be assigned later.
+ if (lclNum == lvaStubArgumentVar)
+ {
+#ifdef JIT32_GCENCODER
+ noway_assert(codeGen->isFramePointerUsed());
+#endif
+ continue;
+ }
+
+ // This should be low on the stack. Hence, it will be assigned later.
+ if (lclNum == lvaInlinedPInvokeFrameVar)
+ {
+ noway_assert(codeGen->isFramePointerUsed());
+ continue;
+ }
+
+ if (varDsc->lvIsParam)
+ {
+#if defined(_TARGET_AMD64_) && !defined(UNIX_AMD64_ABI)
+
+ // On Windows AMD64 we can use the caller-reserved stack area that is already setup
+ assert(varDsc->lvStkOffs != BAD_STK_OFFS);
+ continue;
+
+#else // !_TARGET_AMD64_
+
+ // A register argument that is not enregistered ends up as
+ // a local variable which will need stack frame space.
+ //
+ if (!varDsc->lvIsRegArg)
+ continue;
+
+#ifdef _TARGET_ARM64_
+ if (info.compIsVarArgs)
+ {
+ // Stack offset to varargs (parameters) should point to home area which will be preallocated.
+ varDsc->lvStkOffs =
+ -initialStkOffs + genMapIntRegNumToRegArgNum(varDsc->GetArgReg()) * REGSIZE_BYTES;
+ continue;
+ }
+#endif
+
+#ifdef _TARGET_ARM_
+ // On ARM we spill the registers in codeGen->regSet.rsMaskPreSpillRegArg
+ // in the prolog, thus they don't need stack frame space.
+ //
+ if ((codeGen->regSet.rsMaskPreSpillRegs(false) & genRegMask(varDsc->lvArgReg)) != 0)
+ {
+ assert(varDsc->lvStkOffs != BAD_STK_OFFS);
+ continue;
+ }
+#endif
+
+#endif // !_TARGET_AMD64_
+ }
+
+ /* Make sure the type is appropriate */
+
+ if (varDsc->lvIsUnsafeBuffer && compGSReorderStackLayout)
+ {
+ if (varDsc->lvIsPtr)
+ {
+ if ((alloc_order[cur] & ALLOC_UNSAFE_BUFFERS_WITH_PTRS) == 0)
+ {
+ assignMore |= ALLOC_UNSAFE_BUFFERS_WITH_PTRS;
+ continue;
+ }
+ }
+ else
+ {
+ if ((alloc_order[cur] & ALLOC_UNSAFE_BUFFERS) == 0)
+ {
+ assignMore |= ALLOC_UNSAFE_BUFFERS;
+ continue;
+ }
+ }
+ }
+ else if (varTypeIsGC(varDsc->TypeGet()) && varDsc->lvTracked)
+ {
+ if ((alloc_order[cur] & ALLOC_PTRS) == 0)
+ {
+ assignMore |= ALLOC_PTRS;
+ continue;
+ }
+ }
+ else
+ {
+ if ((alloc_order[cur] & ALLOC_NON_PTRS) == 0)
+ {
+ assignMore |= ALLOC_NON_PTRS;
+ continue;
+ }
+ }
+
+ /* Need to align the offset? */
+
+ if (mustDoubleAlign && (varDsc->lvType == TYP_DOUBLE // Align doubles for ARM and x86
+#ifdef _TARGET_ARM_
+ || varDsc->lvType == TYP_LONG // Align longs for ARM
+#endif
+#ifndef _TARGET_64BIT_
+ || varDsc->lvStructDoubleAlign // Align when lvStructDoubleAlign is true
+#endif // !_TARGET_64BIT_
+ ))
+ {
+ noway_assert((compLclFrameSize % TARGET_POINTER_SIZE) == 0);
+
+ if ((lvaDoneFrameLayout != FINAL_FRAME_LAYOUT) && !have_LclVarDoubleAlign)
+ {
+ // If this is the first TYP_LONG, TYP_DOUBLE or double aligned struct
+ // then we have seen in this loop then we allocate a pointer sized
+ // stack slot since we may need to double align this LclVar
+ // when lvaDoneFrameLayout == FINAL_FRAME_LAYOUT
+ //
+ lvaIncrementFrameSize(TARGET_POINTER_SIZE);
+ stkOffs -= TARGET_POINTER_SIZE;
+ }
+ else
+ {
+ if (((stkOffs + preSpillSize) % (2 * TARGET_POINTER_SIZE)) != 0)
+ {
+ lvaIncrementFrameSize(TARGET_POINTER_SIZE);
+ stkOffs -= TARGET_POINTER_SIZE;
+ }
+
+ // We should now have a double-aligned (stkOffs+preSpillSize)
+ noway_assert(((stkOffs + preSpillSize) % (2 * TARGET_POINTER_SIZE)) == 0);
+ }
+
+ // Remember that we had to double align a LclVar
+ have_LclVarDoubleAlign = true;
+ }
+
+ // Reserve the stack space for this variable
+ stkOffs = lvaAllocLocalAndSetVirtualOffset(lclNum, lvaLclSize(lclNum), stkOffs);
+#ifdef _TARGET_ARM64_
+ // If we have an incoming register argument that has a struct promoted field
+ // then we need to copy the lvStkOff (the stack home) from the reg arg to the field lclvar
+ //
+ if (varDsc->lvIsRegArg && varDsc->lvPromotedStruct())
+ {
+ noway_assert(varDsc->lvFieldCnt == 1); // We only handle one field here
+
+ unsigned fieldVarNum = varDsc->lvFieldLclStart;
+ lvaTable[fieldVarNum].lvStkOffs = varDsc->lvStkOffs;
+ }
+#endif
+ }
+ }
+
+ if (getNeedsGSSecurityCookie() && !compGSReorderStackLayout)
+ {
+ // LOCALLOC used, but we have no unsafe buffer. Allocated cookie last, close to localloc buffer.
+ stkOffs = lvaAllocLocalAndSetVirtualOffset(lvaGSSecurityCookie, lvaLclSize(lvaGSSecurityCookie), stkOffs);
+ }
+
+ if (tempsAllocated == false)
+ {
+ /*-------------------------------------------------------------------------
+ *
+ * Now the temps
+ *
+ *-------------------------------------------------------------------------
+ */
+ stkOffs = lvaAllocateTemps(stkOffs, mustDoubleAlign);
+ }
+
+ /*-------------------------------------------------------------------------
+ *
+ * Now do some final stuff
+ *
+ *-------------------------------------------------------------------------
+ */
+
+ // lvaInlinedPInvokeFrameVar and lvaStubArgumentVar need to be assigned last
+ // Important: The stack walker depends on lvaStubArgumentVar immediately
+ // following lvaInlinedPInvokeFrameVar in the frame.
+
+ if (lvaStubArgumentVar != BAD_VAR_NUM)
+ {
+#ifdef JIT32_GCENCODER
+ noway_assert(codeGen->isFramePointerUsed());
+#endif
+ stkOffs = lvaAllocLocalAndSetVirtualOffset(lvaStubArgumentVar, lvaLclSize(lvaStubArgumentVar), stkOffs);
+ }
+
+ if (lvaInlinedPInvokeFrameVar != BAD_VAR_NUM)
+ {
+ noway_assert(codeGen->isFramePointerUsed());
+ stkOffs =
+ lvaAllocLocalAndSetVirtualOffset(lvaInlinedPInvokeFrameVar, lvaLclSize(lvaInlinedPInvokeFrameVar), stkOffs);
+ }
+
+ if (mustDoubleAlign)
+ {
+ if (lvaDoneFrameLayout != FINAL_FRAME_LAYOUT)
+ {
+ // Allocate a pointer sized stack slot, since we may need to double align here
+ // when lvaDoneFrameLayout == FINAL_FRAME_LAYOUT
+ //
+ lvaIncrementFrameSize(TARGET_POINTER_SIZE);
+ stkOffs -= TARGET_POINTER_SIZE;
+
+ if (have_LclVarDoubleAlign)
+ {
+ // If we have any TYP_LONG, TYP_DOUBLE or double aligned structs
+ // the we need to allocate a second pointer sized stack slot,
+ // since we may need to double align the last LclVar that we saw
+ // in the loop above. We do this so that the offsets that we
+ // calculate for the stack frame are always greater than they will
+ // be in the final layout.
+ //
+ lvaIncrementFrameSize(TARGET_POINTER_SIZE);
+ stkOffs -= TARGET_POINTER_SIZE;
+ }
+ }
+ else // FINAL_FRAME_LAYOUT
+ {
+ if (((stkOffs + preSpillSize) % (2 * TARGET_POINTER_SIZE)) != 0)
+ {
+ lvaIncrementFrameSize(TARGET_POINTER_SIZE);
+ stkOffs -= TARGET_POINTER_SIZE;
+ }
+ // We should now have a double-aligned (stkOffs+preSpillSize)
+ noway_assert(((stkOffs + preSpillSize) % (2 * TARGET_POINTER_SIZE)) == 0);
+ }
+ }
+
+#if FEATURE_EH_FUNCLETS && defined(_TARGET_AMD64_)
+ if (ehNeedsPSPSym())
+ {
+ // On AMD64, if we need a PSPSym, allocate it last, immediately above the outgoing argument
+ // space. Any padding will be higher on the stack than this
+ // (including the padding added by lvaAlignFrame()).
+ noway_assert(codeGen->isFramePointerUsed()); // We need an explicit frame pointer
+ assert(lvaPSPSym != BAD_VAR_NUM); // We should have created the PSPSym variable
+ stkOffs = lvaAllocLocalAndSetVirtualOffset(lvaPSPSym, TARGET_POINTER_SIZE, stkOffs);
+ }
+#endif // FEATURE_EH_FUNCLETS && defined(_TARGET_AMD64_)
+
+#ifdef _TARGET_ARM64_
+ if (isFramePointerUsed())
+ {
+ // Create space for saving FP and LR.
+ stkOffs -= 2 * REGSIZE_BYTES;
+ }
+#endif // _TARGET_ARM64_
+
+#if FEATURE_FIXED_OUT_ARGS
+ if (lvaOutgoingArgSpaceSize > 0)
+ {
+#if defined(_TARGET_AMD64_) && !defined(UNIX_AMD64_ABI) // No 4 slots for outgoing params on System V.
+ noway_assert(lvaOutgoingArgSpaceSize >= (4 * sizeof(void*)));
+#endif
+ noway_assert((lvaOutgoingArgSpaceSize % sizeof(void*)) == 0);
+
+ // Give it a value so we can avoid asserts in CHK builds.
+ // Since this will always use an SP relative offset of zero
+ // at the end of lvaFixVirtualFrameOffsets, it will be set to absolute '0'
+
+ stkOffs = lvaAllocLocalAndSetVirtualOffset(lvaOutgoingArgSpaceVar, lvaLclSize(lvaOutgoingArgSpaceVar), stkOffs);
+ }
+#endif // FEATURE_FIXED_OUT_ARGS
+
+ // compLclFrameSize equals our negated virtual stack offset minus the pushed registers and return address
+ // and the pushed frame pointer register which for some strange reason isn't part of 'compCalleeRegsPushed'.
+ int pushedCount = compCalleeRegsPushed;
+
+#ifdef _TARGET_ARM64_
+ if (info.compIsVarArgs)
+ {
+ pushedCount += MAX_REG_ARG;
+ }
+#endif
+
+#ifdef _TARGET_XARCH_
+ if (codeGen->doubleAlignOrFramePointerUsed())
+ {
+ pushedCount += 1; // pushed EBP (frame pointer)
+ }
+ pushedCount += 1; // pushed PC (return address)
+#endif
+
+ noway_assert(compLclFrameSize == (unsigned)-(stkOffs + (pushedCount * (int)sizeof(void*))));
+}
+
+int Compiler::lvaAllocLocalAndSetVirtualOffset(unsigned lclNum, unsigned size, int stkOffs)
+{
+ noway_assert(lclNum != BAD_VAR_NUM);
+
+#ifdef _TARGET_64BIT_
+ // Before final frame layout, assume the worst case, that every >=8 byte local will need
+ // maximum padding to be aligned. This is because we generate code based on the stack offset
+ // computed during tentative frame layout. These offsets cannot get bigger during final
+ // frame layout, as that would possibly require different code generation (for example,
+ // using a 4-byte offset instead of a 1-byte offset in an instruction). The offsets can get
+ // smaller. It is possible there is different alignment at the point locals are allocated
+ // between tentative and final frame layout which would introduce padding between locals
+ // and thus increase the offset (from the stack pointer) of one of the locals. Hence the
+ // need to assume the worst alignment before final frame layout.
+ // We could probably improve this by sorting all the objects by alignment,
+ // such that all 8 byte objects are together, 4 byte objects are together, etc., which
+ // would require at most one alignment padding per group.
+ //
+ // TYP_SIMD structs locals have alignment preference given by getSIMDTypeAlignment() for
+ // better performance.
+ if ((size >= 8) && ((lvaDoneFrameLayout != FINAL_FRAME_LAYOUT) || ((stkOffs % 8) != 0)
+#if defined(FEATURE_SIMD) && ALIGN_SIMD_TYPES
+ || lclVarIsSIMDType(lclNum)
+#endif
+ ))
+ {
+ // Note that stack offsets are negative
+ assert(stkOffs < 0);
+
+ // alignment padding
+ unsigned pad = 0;
+#if defined(FEATURE_SIMD) && ALIGN_SIMD_TYPES
+ if (lclVarIsSIMDType(lclNum) && !lvaIsImplicitByRefLocal(lclNum))
+ {
+ int alignment = getSIMDTypeAlignment(lvaTable[lclNum].lvType);
+
+ if (stkOffs % alignment != 0)
+ {
+ if (lvaDoneFrameLayout != FINAL_FRAME_LAYOUT)
+ {
+ pad = alignment - 1;
+ // Note that all the objects will probably be misaligned, but we'll fix that in final layout.
+ }
+ else
+ {
+ pad = alignment + (stkOffs % alignment); // +1 to +(alignment-1) bytes
+ }
+ }
+ }
+ else
+#endif // FEATURE_SIMD && ALIGN_SIMD_TYPES
+ {
+ if (lvaDoneFrameLayout != FINAL_FRAME_LAYOUT)
+ {
+ pad = 7;
+ // Note that all the objects will probably be misaligned, but we'll fix that in final layout.
+ }
+ else
+ {
+ pad = 8 + (stkOffs % 8); // +1 to +7 bytes
+ }
+ }
+ // Will the pad ever be anything except 4? Do we put smaller-than-4-sized objects on the stack?
+ lvaIncrementFrameSize(pad);
+ stkOffs -= pad;
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Pad ");
+ gtDispLclVar(lclNum, /*pad*/ false);
+ printf(", size=%d, stkOffs=%c0x%x, pad=%d\n", size, stkOffs < 0 ? '-' : '+',
+ stkOffs < 0 ? -stkOffs : stkOffs, pad);
+ }
+#endif
+ }
+#endif // _TARGET_64BIT_
+
+ /* Reserve space on the stack by bumping the frame size */
+
+ lvaIncrementFrameSize(size);
+ stkOffs -= size;
+ lvaTable[lclNum].lvStkOffs = stkOffs;
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Assign ");
+ gtDispLclVar(lclNum, /*pad*/ false);
+ printf(", size=%d, stkOffs=%c0x%x\n", size, stkOffs < 0 ? '-' : '+', stkOffs < 0 ? -stkOffs : stkOffs);
+ }
+#endif
+
+ return stkOffs;
+}
+
+#ifdef _TARGET_AMD64_
+/*****************************************************************************
+ * lvaIsCalleeSavedIntRegCountEven() : returns true if the number of integer registers
+ * pushed onto stack is even including RBP if used as frame pointer
+ *
+ * Note that this excludes return address (PC) pushed by caller. To know whether
+ * the SP offset after pushing integer registers is aligned, we need to take
+ * negation of this routine.
+ */
+bool Compiler::lvaIsCalleeSavedIntRegCountEven()
+{
+ unsigned regsPushed = compCalleeRegsPushed + (codeGen->isFramePointerUsed() ? 1 : 0);
+ return (regsPushed % (16 / REGSIZE_BYTES)) == 0;
+}
+#endif //_TARGET_AMD64_
+
+/*****************************************************************************
+ * lvaAlignFrame() : After allocating everything on the frame, reserve any
+ * extra space needed to keep the frame aligned
+ */
+void Compiler::lvaAlignFrame()
+{
+#if defined(_TARGET_AMD64_)
+
+ // Leaf frames do not need full alignment, but the unwind info is smaller if we
+ // are at least 8 byte aligned (and we assert as much)
+ if ((compLclFrameSize % 8) != 0)
+ {
+ lvaIncrementFrameSize(8 - (compLclFrameSize % 8));
+ }
+ else if (lvaDoneFrameLayout != FINAL_FRAME_LAYOUT)
+ {
+ // If we are not doing final layout, we don't know the exact value of compLclFrameSize
+ // and thus do not know how much we will need to add in order to be aligned.
+ // We add 8 so compLclFrameSize is still a multiple of 8.
+ lvaIncrementFrameSize(8);
+ }
+ assert((compLclFrameSize % 8) == 0);
+
+ // Ensure that the stack is always 16-byte aligned by grabbing an unused QWORD
+ // if needed, but off by 8 because of the return value.
+ // And don't forget that compCalleeRegsPused does *not* include RBP if we are
+ // using it as the frame pointer.
+ //
+ bool regPushedCountAligned = lvaIsCalleeSavedIntRegCountEven();
+ bool lclFrameSizeAligned = (compLclFrameSize % 16) == 0;
+
+ // If this isn't the final frame layout, assume we have to push an extra QWORD
+ // Just so the offsets are true upper limits.
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef UNIX_AMD64_ABI
+ // The compNeedToAlignFrame flag is indicating if there is a need to align the frame.
+ // On AMD64-Windows, if there are calls, 4 slots for the outgoing ars are allocated, except for
+ // FastTailCall. This slots makes the frame size non-zero, so alignment logic will be called.
+ // On AMD64-Unix, there are no such slots. There is a possibility to have calls in the method with frame size of 0.
+ // The frame alignment logic won't kick in. This flags takes care of the AMD64-Unix case by remembering that there
+ // are calls and making sure the frame alignment logic is executed.
+ bool stackNeedsAlignment = (compLclFrameSize != 0 || opts.compNeedToAlignFrame);
+#else // !UNIX_AMD64_ABI
+ bool stackNeedsAlignment = compLclFrameSize != 0;
+#endif // !UNIX_AMD64_ABI
+ if ((!codeGen->isFramePointerUsed() && (lvaDoneFrameLayout != FINAL_FRAME_LAYOUT)) ||
+ (stackNeedsAlignment && (regPushedCountAligned == lclFrameSizeAligned)))
+ {
+ lvaIncrementFrameSize(REGSIZE_BYTES);
+ }
+
+#elif defined(_TARGET_ARM64_)
+
+ // The stack on ARM64 must be 16 byte aligned.
+
+ // First, align up to 8.
+ if ((compLclFrameSize % 8) != 0)
+ {
+ lvaIncrementFrameSize(8 - (compLclFrameSize % 8));
+ }
+ else if (lvaDoneFrameLayout != FINAL_FRAME_LAYOUT)
+ {
+ // If we are not doing final layout, we don't know the exact value of compLclFrameSize
+ // and thus do not know how much we will need to add in order to be aligned.
+ // We add 8 so compLclFrameSize is still a multiple of 8.
+ lvaIncrementFrameSize(8);
+ }
+ assert((compLclFrameSize % 8) == 0);
+
+ // Ensure that the stack is always 16-byte aligned by grabbing an unused QWORD
+ // if needed.
+ bool regPushedCountAligned = (compCalleeRegsPushed % (16 / REGSIZE_BYTES)) == 0;
+ bool lclFrameSizeAligned = (compLclFrameSize % 16) == 0;
+
+ // If this isn't the final frame layout, assume we have to push an extra QWORD
+ // Just so the offsets are true upper limits.
+ if ((lvaDoneFrameLayout != FINAL_FRAME_LAYOUT) || (regPushedCountAligned != lclFrameSizeAligned))
+ {
+ lvaIncrementFrameSize(REGSIZE_BYTES);
+ }
+
+#elif defined(_TARGET_ARM_)
+
+ // Ensure that stack offsets will be double-aligned by grabbing an unused DWORD if needed.
+ //
+ bool lclFrameSizeAligned = (compLclFrameSize % sizeof(double)) == 0;
+ bool regPushedCountAligned = ((compCalleeRegsPushed + genCountBits(codeGen->regSet.rsMaskPreSpillRegs(true))) %
+ (sizeof(double) / sizeof(void*))) == 0;
+
+ if (regPushedCountAligned != lclFrameSizeAligned)
+ {
+ lvaIncrementFrameSize(sizeof(void*));
+ }
+
+#elif defined(_TARGET_X86_)
+
+ if (genDoubleAlign())
+ {
+ // Double Frame Alignement for x86 is handled in Compiler::lvaAssignVirtualFrameOffsetsToLocals()
+
+ if (compLclFrameSize == 0)
+ {
+ // This can only happen with JitStress=1 or JitDoubleAlign=2
+ lvaIncrementFrameSize(sizeof(void*));
+ }
+ }
+
+#else
+ NYI("TARGET specific lvaAlignFrame");
+#endif // !_TARGET_AMD64_
+}
+
+/*****************************************************************************
+ * lvaAssignFrameOffsetsToPromotedStructs() : Assign offsets to fields
+ * within a promoted struct (worker for lvaAssignFrameOffsets).
+ */
+void Compiler::lvaAssignFrameOffsetsToPromotedStructs()
+{
+ LclVarDsc* varDsc = lvaTable;
+ for (unsigned lclNum = 0; lclNum < lvaCount; lclNum++, varDsc++)
+ {
+ // For promoted struct fields that are params, we will
+ // assign their offsets in lvaAssignVirtualFrameOffsetToArg().
+ // This is not true for the System V systems since there is no
+ // outgoing args space. Assign the dependently promoted fields properly.
+ //
+ if (varDsc->lvIsStructField
+#ifndef UNIX_AMD64_ABI
+ // For System V platforms there is no outgoing args space.
+ // A register passed struct arg is homed on the stack in a separate local var.
+ // The offset of these structs is already calculated in lvaAssignVirtualFrameOffsetToArg methos.
+ // Make sure the code below is not executed for these structs and the offset is not changed.
+ && !varDsc->lvIsParam
+#endif // UNIX_AMD64_ABI
+ )
+ {
+ LclVarDsc* parentvarDsc = &lvaTable[varDsc->lvParentLcl];
+ lvaPromotionType promotionType = lvaGetPromotionType(parentvarDsc);
+
+ if (promotionType == PROMOTION_TYPE_INDEPENDENT)
+ {
+ // The stack offset for these field locals must have been calculated
+ // by the normal frame offset assignment.
+ continue;
+ }
+ else
+ {
+ noway_assert(promotionType == PROMOTION_TYPE_DEPENDENT);
+ noway_assert(varDsc->lvOnFrame);
+ varDsc->lvStkOffs = parentvarDsc->lvStkOffs + varDsc->lvFldOffset;
+ }
+ }
+ }
+}
+
+/*****************************************************************************
+ * lvaAllocateTemps() : Assign virtual offsets to temps (always negative).
+ */
+int Compiler::lvaAllocateTemps(int stkOffs, bool mustDoubleAlign)
+{
+ unsigned spillTempSize = 0;
+
+ if (lvaDoneFrameLayout == FINAL_FRAME_LAYOUT)
+ {
+ int preSpillSize = 0;
+#ifdef _TARGET_ARM_
+ preSpillSize = genCountBits(codeGen->regSet.rsMaskPreSpillRegs(true)) * TARGET_POINTER_SIZE;
+#endif
+ bool assignDone;
+ bool assignNptr;
+ bool assignPtrs = true;
+
+ /* Allocate temps */
+
+ if (TRACK_GC_TEMP_LIFETIMES)
+ {
+ /* first pointers, then non-pointers in second pass */
+ assignNptr = false;
+ assignDone = false;
+ }
+ else
+ {
+ /* Pointers and non-pointers together in single pass */
+ assignNptr = true;
+ assignDone = true;
+ }
+
+ assert(tmpAllFree());
+
+ AGAIN2:
+
+ for (TempDsc* temp = tmpListBeg(); temp != nullptr; temp = tmpListNxt(temp))
+ {
+ var_types tempType = temp->tdTempType();
+ unsigned size;
+
+ /* Make sure the type is appropriate */
+
+ if (!assignPtrs && varTypeIsGC(tempType))
+ {
+ continue;
+ }
+ if (!assignNptr && !varTypeIsGC(tempType))
+ {
+ continue;
+ }
+
+ size = temp->tdTempSize();
+
+ /* Figure out and record the stack offset of the temp */
+
+ /* Need to align the offset? */
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef _TARGET_64BIT_
+ if (varTypeIsGC(tempType) && ((stkOffs % TARGET_POINTER_SIZE) != 0))
+ {
+ // Calculate 'pad' as the number of bytes to align up 'stkOffs' to be a multiple of TARGET_POINTER_SIZE
+ // In practice this is really just a fancy way of writing 4. (as all stack locations are at least 4-byte
+ // aligned). Note stkOffs is always negative, so (stkOffs % TARGET_POINTER_SIZE) yields a negative
+ // value.
+ //
+ int alignPad = (int)AlignmentPad((unsigned)-stkOffs, TARGET_POINTER_SIZE);
+
+ spillTempSize += alignPad;
+ lvaIncrementFrameSize(alignPad);
+ stkOffs -= alignPad;
+
+ noway_assert((stkOffs % TARGET_POINTER_SIZE) == 0);
+ }
+#endif
+
+ if (mustDoubleAlign && (tempType == TYP_DOUBLE)) // Align doubles for x86 and ARM
+ {
+ noway_assert((compLclFrameSize % TARGET_POINTER_SIZE) == 0);
+
+ if (((stkOffs + preSpillSize) % (2 * TARGET_POINTER_SIZE)) != 0)
+ {
+ spillTempSize += TARGET_POINTER_SIZE;
+ lvaIncrementFrameSize(TARGET_POINTER_SIZE);
+ stkOffs -= TARGET_POINTER_SIZE;
+ }
+ // We should now have a double-aligned (stkOffs+preSpillSize)
+ noway_assert(((stkOffs + preSpillSize) % (2 * TARGET_POINTER_SIZE)) == 0);
+ }
+
+ spillTempSize += size;
+ lvaIncrementFrameSize(size);
+ stkOffs -= size;
+ temp->tdSetTempOffs(stkOffs);
+ }
+#ifdef _TARGET_ARM_
+ // Only required for the ARM platform that we have an accurate estimate for the spillTempSize
+ noway_assert(spillTempSize <= lvaGetMaxSpillTempSize());
+#endif
+
+ /* If we've only assigned some temps, go back and do the rest now */
+
+ if (!assignDone)
+ {
+ assignNptr = !assignNptr;
+ assignPtrs = !assignPtrs;
+ assignDone = true;
+
+ goto AGAIN2;
+ }
+ }
+ else // We haven't run codegen, so there are no Spill temps yet!
+ {
+ unsigned size = lvaGetMaxSpillTempSize();
+
+ lvaIncrementFrameSize(size);
+ stkOffs -= size;
+ }
+
+ return stkOffs;
+}
+
+#ifdef DEBUG
+
+/*****************************************************************************
+ *
+ * Dump the register a local is in right now.
+ * For non-LSRA, this will be the register it is always in. For LSRA, it's only the current
+ * location, since the location changes and it is updated throughout code generation based on
+ * LSRA register assignments.
+ */
+
+void Compiler::lvaDumpRegLocation(unsigned lclNum)
+{
+ LclVarDsc* varDsc = lvaTable + lclNum;
+ var_types type = varDsc->TypeGet();
+
+#if FEATURE_STACK_FP_X87
+ if (varTypeIsFloating(type))
+ {
+ printf("fpu stack ");
+ }
+ else
+#endif
+ if (isRegPairType(type))
+ {
+ if (!doLSRA())
+ {
+ noway_assert(varDsc->lvRegNum != REG_STK);
+ }
+ if (doLSRA() && varDsc->lvRegNum == REG_STK)
+ {
+ /* Hi-only enregistered long */
+ int offset = varDsc->lvStkOffs;
+ printf("%-3s:[%1s0x%02X]",
+ getRegName(varDsc->lvOtherReg), // hi32
+ (offset < 0 ? "-" : "+"), (offset < 0 ? -offset : offset));
+ }
+ else if (varDsc->lvOtherReg != REG_STK)
+ {
+ /* Fully enregistered long */
+ printf("%3s:%-3s ",
+ getRegName(varDsc->lvOtherReg), // hi32
+ getRegName(varDsc->lvRegNum)); // lo32
+ }
+ else
+ {
+ /* Partially enregistered long */
+ int offset = varDsc->lvStkOffs + 4;
+ printf("[%1s0x%02X]:%-3s", (offset < 0 ? "-" : "+"), (offset < 0 ? -offset : offset),
+ getRegName(varDsc->lvRegNum)); // lo32
+ }
+ }
+#ifdef _TARGET_ARM_
+ else if (varDsc->TypeGet() == TYP_DOUBLE)
+ {
+ printf("%3s:%-3s ", getRegName(varDsc->lvRegNum), getRegName(varDsc->lvOtherReg));
+ }
+#endif
+ else
+ {
+ printf("%3s ", getRegName(varDsc->lvRegNum));
+ }
+}
+
+/*****************************************************************************
+ *
+ * Dump the frame location assigned to a local.
+ * For non-LSRA, this will only be valid if there is no assigned register.
+ * For LSRA, it's the home location, even though the variable doesn't always live
+ * in its home location.
+ */
+
+void Compiler::lvaDumpFrameLocation(unsigned lclNum)
+{
+ int offset;
+ regNumber baseReg;
+
+#ifdef _TARGET_ARM_
+ offset = lvaFrameAddress(lclNum, compLocallocUsed, &baseReg, 0);
+#else
+ bool EBPbased;
+ offset = lvaFrameAddress(lclNum, &EBPbased);
+ baseReg = EBPbased ? REG_FPBASE : REG_SPBASE;
+#endif
+
+ printf("[%2s%1s0x%02X] ", getRegName(baseReg), (offset < 0 ? "-" : "+"), (offset < 0 ? -offset : offset));
+}
+
+/*****************************************************************************
+ *
+ * dump a single lvaTable entry
+ */
+
+void Compiler::lvaDumpEntry(unsigned lclNum, FrameLayoutState curState, size_t refCntWtdWidth)
+{
+ LclVarDsc* varDsc = lvaTable + lclNum;
+ var_types type = varDsc->TypeGet();
+
+ if (curState == INITIAL_FRAME_LAYOUT)
+ {
+ printf("; ");
+ gtDispLclVar(lclNum);
+
+ printf(" %7s ", varTypeName(type));
+ if (genTypeSize(type) == 0)
+ {
+ printf("(%2d) ", lvaLclSize(lclNum));
+ }
+ }
+ else
+ {
+ if (varDsc->lvRefCnt == 0)
+ {
+ // Print this with a special indicator that the variable is unused. Even though the
+ // variable itself is unused, it might be a struct that is promoted, so seeing it
+ // can be useful when looking at the promoted struct fields. It's also weird to see
+ // missing var numbers if these aren't printed.
+ printf(";* ");
+ }
+ else
+#if FEATURE_FIXED_OUT_ARGS
+ if ((lclNum == lvaOutgoingArgSpaceVar) && (lvaLclSize(lclNum) == 0))
+ {
+ // Similar to above; print this anyway.
+ printf(";# ");
+ }
+ else
+#endif
+ {
+ printf("; ");
+ }
+
+ gtDispLclVar(lclNum);
+
+ printf("[V%02u", lclNum);
+ if (varDsc->lvTracked)
+ {
+ printf(",T%02u]", varDsc->lvVarIndex);
+ }
+ else
+ {
+ printf(" ]");
+ }
+
+ printf(" (%3u,%*s)", varDsc->lvRefCnt, (int)refCntWtdWidth, refCntWtd2str(varDsc->lvRefCntWtd));
+
+ printf(" %7s ", varTypeName(type));
+ if (genTypeSize(type) == 0)
+ {
+ printf("(%2d) ", lvaLclSize(lclNum));
+ }
+ else
+ {
+ printf(" -> ");
+ }
+
+ // The register or stack location field is 11 characters wide.
+ if (varDsc->lvRefCnt == 0)
+ {
+ printf("zero-ref ");
+ }
+ else if (varDsc->lvRegister != 0)
+ {
+ // It's always a register, and always in the same register.
+ lvaDumpRegLocation(lclNum);
+ }
+ else if (varDsc->lvOnFrame == 0)
+ {
+ printf("registers ");
+ }
+ else
+ {
+ // For RyuJIT backend, it might be in a register part of the time, but it will definitely have a stack home
+ // location. Otherwise, it's always on the stack.
+ if (lvaDoneFrameLayout != NO_FRAME_LAYOUT)
+ {
+ lvaDumpFrameLocation(lclNum);
+ }
+ }
+ }
+
+ if (varDsc->lvIsHfaRegArg())
+ {
+ if (varDsc->lvHfaTypeIsFloat())
+ {
+ printf(" (enregistered HFA: float) ");
+ }
+ else
+ {
+ printf(" (enregistered HFA: double)");
+ }
+ }
+
+ if (varDsc->lvDoNotEnregister)
+ {
+ printf(" do-not-enreg[");
+ if (varDsc->lvAddrExposed)
+ {
+ printf("X");
+ }
+ if (varTypeIsStruct(varDsc))
+ {
+ printf("S");
+ }
+ if (varDsc->lvVMNeedsStackAddr)
+ {
+ printf("V");
+ }
+ if (varDsc->lvLiveInOutOfHndlr)
+ {
+ printf("H");
+ }
+ if (varDsc->lvLclFieldExpr)
+ {
+ printf("F");
+ }
+ if (varDsc->lvLclBlockOpAddr)
+ {
+ printf("B");
+ }
+ if (varDsc->lvLiveAcrossUCall)
+ {
+ printf("U");
+ }
+ if (varDsc->lvIsMultiRegArg)
+ {
+ printf("A");
+ }
+ if (varDsc->lvIsMultiRegRet)
+ {
+ printf("R");
+ }
+#ifdef JIT32_GCENCODER
+ if (varDsc->lvPinned)
+ printf("P");
+#endif // JIT32_GCENCODER
+ printf("]");
+ }
+
+ if (varDsc->lvIsMultiRegArg)
+ {
+ printf(" multireg-arg");
+ }
+ if (varDsc->lvIsMultiRegRet)
+ {
+ printf(" multireg-ret");
+ }
+ if (varDsc->lvMustInit)
+ {
+ printf(" must-init");
+ }
+ if (varDsc->lvAddrExposed)
+ {
+ printf(" addr-exposed");
+ }
+ if (varDsc->lvHasLdAddrOp)
+ {
+ printf(" ld-addr-op");
+ }
+ if (varDsc->lvVerTypeInfo.IsThisPtr())
+ {
+ printf(" this");
+ }
+ if (varDsc->lvPinned)
+ {
+ printf(" pinned");
+ }
+ if (varDsc->lvRefAssign)
+ {
+ printf(" ref-asgn");
+ }
+ if (varDsc->lvStackByref)
+ {
+ printf(" stack-byref");
+ }
+#ifndef _TARGET_64BIT_
+ if (varDsc->lvStructDoubleAlign)
+ printf(" double-align");
+#endif // !_TARGET_64BIT_
+ if (varDsc->lvOverlappingFields)
+ {
+ printf(" overlapping-fields");
+ }
+
+ if (compGSReorderStackLayout && !varDsc->lvRegister)
+ {
+ if (varDsc->lvIsPtr)
+ {
+ printf(" ptr");
+ }
+ if (varDsc->lvIsUnsafeBuffer)
+ {
+ printf(" unsafe-buffer");
+ }
+ }
+ if (varDsc->lvIsStructField)
+ {
+ LclVarDsc* parentvarDsc = &lvaTable[varDsc->lvParentLcl];
+#if !defined(_TARGET_64BIT_)
+ if (varTypeIsLong(parentvarDsc))
+ {
+ bool isLo = (lclNum == parentvarDsc->lvFieldLclStart);
+ printf(" V%02u.%s(offs=0x%02x)", varDsc->lvParentLcl, isLo ? "lo" : "hi", isLo ? 0 : genTypeSize(TYP_INT));
+ }
+ else
+#endif // !defined(_TARGET_64BIT_)
+ {
+ CORINFO_CLASS_HANDLE typeHnd = parentvarDsc->lvVerTypeInfo.GetClassHandle();
+ CORINFO_FIELD_HANDLE fldHnd = info.compCompHnd->getFieldInClass(typeHnd, varDsc->lvFldOrdinal);
+
+ printf(" V%02u.%s(offs=0x%02x)", varDsc->lvParentLcl, eeGetFieldName(fldHnd), varDsc->lvFldOffset);
+
+ lvaPromotionType promotionType = lvaGetPromotionType(parentvarDsc);
+ // We should never have lvIsStructField set if it is a reg-sized non-field-addressed struct.
+ assert(!varDsc->lvRegStruct);
+ switch (promotionType)
+ {
+ case PROMOTION_TYPE_NONE:
+ printf(" P-NONE");
+ break;
+ case PROMOTION_TYPE_DEPENDENT:
+ printf(" P-DEP");
+ break;
+ case PROMOTION_TYPE_INDEPENDENT:
+ printf(" P-INDEP");
+ break;
+ }
+ }
+ }
+
+ printf("\n");
+}
+
+/*****************************************************************************
+*
+* dump the lvaTable
+*/
+
+void Compiler::lvaTableDump(FrameLayoutState curState)
+{
+ if (curState == NO_FRAME_LAYOUT)
+ {
+ curState = lvaDoneFrameLayout;
+ if (curState == NO_FRAME_LAYOUT)
+ {
+ // Still no layout? Could be a bug, but just display the initial layout
+ curState = INITIAL_FRAME_LAYOUT;
+ }
+ }
+
+ if (curState == INITIAL_FRAME_LAYOUT)
+ {
+ printf("; Initial");
+ }
+ else if (curState == PRE_REGALLOC_FRAME_LAYOUT)
+ {
+ printf("; Pre-RegAlloc");
+ }
+ else if (curState == REGALLOC_FRAME_LAYOUT)
+ {
+ printf("; RegAlloc");
+ }
+ else if (curState == TENTATIVE_FRAME_LAYOUT)
+ {
+ printf("; Tentative");
+ }
+ else if (curState == FINAL_FRAME_LAYOUT)
+ {
+ printf("; Final");
+ }
+ else
+ {
+ printf("UNKNOWN FrameLayoutState!");
+ unreached();
+ }
+
+ printf(" local variable assignments\n");
+ printf(";\n");
+
+ unsigned lclNum;
+ LclVarDsc* varDsc;
+
+ // Figure out some sizes, to help line things up
+
+ size_t refCntWtdWidth = 6; // Use 6 as the minimum width
+
+ if (curState != INITIAL_FRAME_LAYOUT) // don't need this info for INITIAL_FRAME_LAYOUT
+ {
+ for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
+ {
+ size_t width = strlen(refCntWtd2str(varDsc->lvRefCntWtd));
+ if (width > refCntWtdWidth)
+ {
+ refCntWtdWidth = width;
+ }
+ }
+ }
+
+ // Do the actual output
+
+ for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
+ {
+ lvaDumpEntry(lclNum, curState, refCntWtdWidth);
+ }
+
+ //-------------------------------------------------------------------------
+ // Display the code-gen temps
+
+ assert(tmpAllFree());
+ for (TempDsc* temp = tmpListBeg(); temp != nullptr; temp = tmpListNxt(temp))
+ {
+ printf("; TEMP_%02u %26s%*s%7s -> ", -temp->tdTempNum(), " ", refCntWtdWidth, " ",
+ varTypeName(temp->tdTempType()));
+ int offset = temp->tdTempOffs();
+ printf(" [%2s%1s0x%02X]\n", isFramePointerUsed() ? STR_FPBASE : STR_SPBASE, (offset < 0 ? "-" : "+"),
+ (offset < 0 ? -offset : offset));
+ }
+
+ if (curState >= TENTATIVE_FRAME_LAYOUT)
+ {
+ printf(";\n");
+ printf("; Lcl frame size = %d\n", compLclFrameSize);
+ }
+}
+#endif // DEBUG
+
+/*****************************************************************************
+ *
+ * Conservatively estimate the layout of the stack frame.
+ *
+ * This function is only used before final frame layout. It conservatively estimates the
+ * number of callee-saved registers that must be saved, then calls lvaAssignFrameOffsets().
+ * To do final frame layout, the callee-saved registers are known precisely, so
+ * lvaAssignFrameOffsets() is called directly.
+ *
+ * Returns the (conservative, that is, overly large) estimated size of the frame,
+ * including the callee-saved registers. This is only used by the emitter during code
+ * generation when estimating the size of the offset of instructions accessing temps,
+ * and only if temps have a larger offset than variables.
+ */
+
+unsigned Compiler::lvaFrameSize(FrameLayoutState curState)
+{
+ assert(curState < FINAL_FRAME_LAYOUT);
+
+ unsigned result;
+
+ /* Layout the stack frame conservatively.
+ Assume all callee-saved registers are spilled to stack */
+
+ compCalleeRegsPushed = CNT_CALLEE_SAVED;
+
+#if defined(_TARGET_ARMARCH_)
+ if (compFloatingPointUsed)
+ compCalleeRegsPushed += CNT_CALLEE_SAVED_FLOAT;
+
+ compCalleeRegsPushed++; // we always push LR. See genPushCalleeSavedRegisters
+#elif defined(_TARGET_AMD64_)
+ if (compFloatingPointUsed)
+ {
+ compCalleeFPRegsSavedMask = RBM_FLT_CALLEE_SAVED;
+ }
+ else
+ {
+ compCalleeFPRegsSavedMask = RBM_NONE;
+ }
+#endif
+
+#if DOUBLE_ALIGN
+ if (genDoubleAlign())
+ {
+ // X86 only - account for extra 4-byte pad that may be created by "and esp, -8" instruction
+ compCalleeRegsPushed++;
+ }
+#endif
+
+#ifdef _TARGET_XARCH_
+ // Since FP/EBP is included in the SAVED_REG_MAXSZ we need to
+ // subtract 1 register if codeGen->isFramePointerUsed() is true.
+ if (codeGen->isFramePointerUsed())
+ {
+ compCalleeRegsPushed--;
+ }
+#endif
+
+ lvaAssignFrameOffsets(curState);
+
+ unsigned calleeSavedRegMaxSz = CALLEE_SAVED_REG_MAXSZ;
+#if defined(_TARGET_ARMARCH_)
+ if (compFloatingPointUsed)
+ {
+ calleeSavedRegMaxSz += CALLEE_SAVED_FLOAT_MAXSZ;
+ }
+ calleeSavedRegMaxSz += REGSIZE_BYTES; // we always push LR. See genPushCalleeSavedRegisters
+#endif
+
+ result = compLclFrameSize + calleeSavedRegMaxSz;
+ return result;
+}
+
+//------------------------------------------------------------------------
+// lvaGetSPRelativeOffset: Given a variable, return the offset of that
+// variable in the frame from the stack pointer. This number will be positive,
+// since the stack pointer must be at a lower address than everything on the
+// stack.
+//
+// This can't be called for localloc functions, since the stack pointer
+// varies, and thus there is no fixed offset to a variable from the stack pointer.
+//
+// Arguments:
+// varNum - the variable number
+//
+// Return Value:
+// The offset.
+
+int Compiler::lvaGetSPRelativeOffset(unsigned varNum)
+{
+ assert(!compLocallocUsed);
+ assert(lvaDoneFrameLayout == FINAL_FRAME_LAYOUT);
+ assert(varNum < lvaCount);
+ const LclVarDsc* varDsc = lvaTable + varNum;
+ assert(varDsc->lvOnFrame);
+ int spRelativeOffset;
+
+ if (varDsc->lvFramePointerBased)
+ {
+ // The stack offset is relative to the frame pointer, so convert it to be
+ // relative to the stack pointer (which makes no sense for localloc functions).
+ spRelativeOffset = varDsc->lvStkOffs + codeGen->genSPtoFPdelta();
+ }
+ else
+ {
+ spRelativeOffset = varDsc->lvStkOffs;
+ }
+
+ assert(spRelativeOffset >= 0);
+ return spRelativeOffset;
+}
+
+/*****************************************************************************
+ *
+ * Return the caller-SP-relative stack offset of a local/parameter.
+ * Requires the local to be on the stack and frame layout to be complete.
+ */
+
+int Compiler::lvaGetCallerSPRelativeOffset(unsigned varNum)
+{
+ assert(lvaDoneFrameLayout == FINAL_FRAME_LAYOUT);
+ assert(varNum < lvaCount);
+ LclVarDsc* varDsc = lvaTable + varNum;
+ assert(varDsc->lvOnFrame);
+
+ return lvaToCallerSPRelativeOffset(varDsc->lvStkOffs, varDsc->lvFramePointerBased);
+}
+
+int Compiler::lvaToCallerSPRelativeOffset(int offset, bool isFpBased)
+{
+ assert(lvaDoneFrameLayout == FINAL_FRAME_LAYOUT);
+
+ if (isFpBased)
+ {
+ offset += codeGen->genCallerSPtoFPdelta();
+ }
+ else
+ {
+ offset += codeGen->genCallerSPtoInitialSPdelta();
+ }
+
+ return offset;
+}
+
+/*****************************************************************************
+ *
+ * Return the Initial-SP-relative stack offset of a local/parameter.
+ * Requires the local to be on the stack and frame layout to be complete.
+ */
+
+int Compiler::lvaGetInitialSPRelativeOffset(unsigned varNum)
+{
+ assert(lvaDoneFrameLayout == FINAL_FRAME_LAYOUT);
+ assert(varNum < lvaCount);
+ LclVarDsc* varDsc = lvaTable + varNum;
+ assert(varDsc->lvOnFrame);
+
+ return lvaToInitialSPRelativeOffset(varDsc->lvStkOffs, varDsc->lvFramePointerBased);
+}
+
+// Given a local variable offset, and whether that offset is frame-pointer based, return its offset from Initial-SP.
+// This is used, for example, to figure out the offset of the frame pointer from Initial-SP.
+int Compiler::lvaToInitialSPRelativeOffset(unsigned offset, bool isFpBased)
+{
+ assert(lvaDoneFrameLayout == FINAL_FRAME_LAYOUT);
+#ifdef _TARGET_AMD64_
+ if (isFpBased)
+ {
+ // Currently, the frame starts by pushing ebp, ebp points to the saved ebp
+ // (so we have ebp pointer chaining). Add the fixed-size frame size plus the
+ // size of the callee-saved regs (not including ebp itself) to find Initial-SP.
+
+ assert(codeGen->isFramePointerUsed());
+ offset += codeGen->genSPtoFPdelta();
+ }
+ else
+ {
+ // The offset is correct already!
+ }
+#else // !_TARGET_AMD64_
+ NYI("lvaToInitialSPRelativeOffset");
+#endif // !_TARGET_AMD64_
+
+ return offset;
+}
+
+/*****************************************************************************/
+
+#ifdef DEBUG
+/*****************************************************************************
+ * Pick a padding size at "random" for the local.
+ * 0 means that it should not be converted to a GT_LCL_FLD
+ */
+
+static unsigned LCL_FLD_PADDING(unsigned lclNum)
+{
+ // Convert every 2nd variable
+ if (lclNum % 2)
+ {
+ return 0;
+ }
+
+ // Pick a padding size at "random"
+ unsigned size = lclNum % 7;
+
+ return size;
+}
+
+/*****************************************************************************
+ *
+ * Callback for fgWalkAllTreesPre()
+ * Convert as many GT_LCL_VAR's to GT_LCL_FLD's
+ */
+
+/* static */
+/*
+ The stress mode does 2 passes.
+
+ In the first pass we will mark the locals where we CAN't apply the stress mode.
+ In the second pass we will do the appropiate morphing wherever we've not determined we can't do it.
+*/
+Compiler::fgWalkResult Compiler::lvaStressLclFldCB(GenTreePtr* pTree, fgWalkData* data)
+{
+ GenTreePtr tree = *pTree;
+ genTreeOps oper = tree->OperGet();
+ GenTreePtr lcl;
+
+ switch (oper)
+ {
+ case GT_LCL_VAR:
+ lcl = tree;
+ break;
+
+ case GT_ADDR:
+ if (tree->gtOp.gtOp1->gtOper != GT_LCL_VAR)
+ {
+ return WALK_CONTINUE;
+ }
+ lcl = tree->gtOp.gtOp1;
+ break;
+
+ default:
+ return WALK_CONTINUE;
+ }
+
+ Compiler* pComp = ((lvaStressLclFldArgs*)data->pCallbackData)->m_pCompiler;
+ bool bFirstPass = ((lvaStressLclFldArgs*)data->pCallbackData)->m_bFirstPass;
+ noway_assert(lcl->gtOper == GT_LCL_VAR);
+ unsigned lclNum = lcl->gtLclVarCommon.gtLclNum;
+ var_types type = lcl->TypeGet();
+ LclVarDsc* varDsc = &pComp->lvaTable[lclNum];
+
+ if (varDsc->lvNoLclFldStress)
+ {
+ // Already determined we can't do anything for this var
+ return WALK_SKIP_SUBTREES;
+ }
+
+ if (bFirstPass)
+ {
+ // Ignore arguments and temps
+ if (varDsc->lvIsParam || lclNum >= pComp->info.compLocalsCount)
+ {
+ varDsc->lvNoLclFldStress = true;
+ return WALK_SKIP_SUBTREES;
+ }
+
+ // Fix for lcl_fld stress mode
+ if (varDsc->lvKeepType)
+ {
+ varDsc->lvNoLclFldStress = true;
+ return WALK_SKIP_SUBTREES;
+ }
+
+ // Can't have GC ptrs in TYP_BLK.
+ if (!varTypeIsArithmetic(type))
+ {
+ varDsc->lvNoLclFldStress = true;
+ return WALK_SKIP_SUBTREES;
+ }
+
+ // Weed out "small" types like TYP_BYTE as we don't mark the GT_LCL_VAR
+ // node with the accurate small type. If we bash lvaTable[].lvType,
+ // then there will be no indication that it was ever a small type.
+ var_types varType = varDsc->TypeGet();
+ if (varType != TYP_BLK && genTypeSize(varType) != genTypeSize(genActualType(varType)))
+ {
+ varDsc->lvNoLclFldStress = true;
+ return WALK_SKIP_SUBTREES;
+ }
+
+ // Offset some of the local variable by a "random" non-zero amount
+ unsigned padding = LCL_FLD_PADDING(lclNum);
+ if (padding == 0)
+ {
+ varDsc->lvNoLclFldStress = true;
+ return WALK_SKIP_SUBTREES;
+ }
+ }
+ else
+ {
+ // Do the morphing
+ noway_assert(varDsc->lvType == lcl->gtType || varDsc->lvType == TYP_BLK);
+ var_types varType = varDsc->TypeGet();
+
+ // Calculate padding
+ unsigned padding = LCL_FLD_PADDING(lclNum);
+
+ // Change the variable to a TYP_BLK
+ if (varType != TYP_BLK)
+ {
+ varDsc->lvExactSize = (unsigned)(roundUp(padding + pComp->lvaLclSize(lclNum)));
+ varDsc->lvType = TYP_BLK;
+ pComp->lvaSetVarAddrExposed(lclNum);
+ }
+
+ tree->gtFlags |= GTF_GLOB_REF;
+
+ /* Now morph the tree appropriately */
+ if (oper == GT_LCL_VAR)
+ {
+ /* Change lclVar(lclNum) to lclFld(lclNum,padding) */
+
+ tree->ChangeOper(GT_LCL_FLD);
+ tree->gtLclFld.gtLclOffs = padding;
+ }
+ else
+ {
+ /* Change addr(lclVar) to addr(lclVar)+padding */
+
+ noway_assert(oper == GT_ADDR);
+ GenTreePtr newAddr = new (pComp, GT_NONE) GenTreeOp(*tree->AsOp());
+
+ tree->ChangeOper(GT_ADD);
+ tree->gtOp.gtOp1 = newAddr;
+ tree->gtOp.gtOp2 = pComp->gtNewIconNode(padding);
+
+ lcl->gtType = TYP_BLK;
+ }
+ }
+
+ return WALK_SKIP_SUBTREES;
+}
+
+/*****************************************************************************/
+
+void Compiler::lvaStressLclFld()
+{
+ if (!compStressCompile(STRESS_LCL_FLDS, 5))
+ {
+ return;
+ }
+
+ lvaStressLclFldArgs Args;
+ Args.m_pCompiler = this;
+ Args.m_bFirstPass = true;
+
+ // Do First pass
+ fgWalkAllTreesPre(lvaStressLclFldCB, &Args);
+
+ // Second pass
+ Args.m_bFirstPass = false;
+ fgWalkAllTreesPre(lvaStressLclFldCB, &Args);
+}
+
+#endif // DEBUG
+
+/*****************************************************************************
+ *
+ * A little routine that displays a local variable bitset.
+ * 'set' is mask of variables that have to be displayed
+ * 'allVars' is the complete set of interesting variables (blank space is
+ * inserted if its corresponding bit is not in 'set').
+ */
+
+#ifdef DEBUG
+void Compiler::lvaDispVarSet(VARSET_VALARG_TP set)
+{
+ VARSET_TP VARSET_INIT_NOCOPY(allVars, VarSetOps::MakeEmpty(this));
+ lvaDispVarSet(set, allVars);
+}
+
+void Compiler::lvaDispVarSet(VARSET_VALARG_TP set, VARSET_VALARG_TP allVars)
+{
+ printf("{");
+
+ bool needSpace = false;
+
+ for (unsigned index = 0; index < lvaTrackedCount; index++)
+ {
+ if (VarSetOps::IsMember(this, set, index))
+ {
+ unsigned lclNum;
+ LclVarDsc* varDsc;
+
+ /* Look for the matching variable */
+
+ for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
+ {
+ if ((varDsc->lvVarIndex == index) && varDsc->lvTracked)
+ {
+ break;
+ }
+ }
+
+ if (needSpace)
+ {
+ printf(" ");
+ }
+ else
+ {
+ needSpace = true;
+ }
+
+ printf("V%02u", lclNum);
+ }
+ else if (VarSetOps::IsMember(this, allVars, index))
+ {
+ if (needSpace)
+ {
+ printf(" ");
+ }
+ else
+ {
+ needSpace = true;
+ }
+
+ printf(" ");
+ }
+ }
+
+ printf("}");
+}
+
+#endif // DEBUG
diff --git a/src/jit/lir.cpp b/src/jit/lir.cpp
new file mode 100644
index 0000000000..94206def1c
--- /dev/null
+++ b/src/jit/lir.cpp
@@ -0,0 +1,1640 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+#include "jitpch.h"
+#include "smallhash.h"
+
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+LIR::Use::Use() : m_range(nullptr), m_edge(nullptr), m_user(nullptr)
+{
+}
+
+LIR::Use::Use(const Use& other)
+{
+ *this = other;
+}
+
+//------------------------------------------------------------------------
+// LIR::Use::Use: Constructs a use <-> def edge given the range that
+// contains the use and the def, the use -> def edge, and
+// the user.
+//
+// Arguments:
+// range - The range that contains the use and the def.
+// edge - The use -> def edge.
+// user - The node that uses the def.
+//
+// Return Value:
+//
+LIR::Use::Use(Range& range, GenTree** edge, GenTree* user) : m_range(&range), m_edge(edge), m_user(user)
+{
+ AssertIsValid();
+}
+
+LIR::Use& LIR::Use::operator=(const Use& other)
+{
+ m_range = other.m_range;
+ m_user = other.m_user;
+ m_edge = other.IsDummyUse() ? &m_user : other.m_edge;
+
+ assert(IsDummyUse() == other.IsDummyUse());
+ return *this;
+}
+
+LIR::Use& LIR::Use::operator=(Use&& other)
+{
+ *this = other;
+ return *this;
+}
+
+//------------------------------------------------------------------------
+// LIR::Use::GetDummyUse: Returns a dummy use for a node.
+//
+// This method is provided as a convenience to allow transforms to work
+// uniformly over Use values. It allows the creation of a Use given a node
+// that is not used.
+//
+// Arguments:
+// range - The range that contains the node.
+// node - The node for which to create a dummy use.
+//
+// Return Value:
+//
+LIR::Use LIR::Use::GetDummyUse(Range& range, GenTree* node)
+{
+ assert(node != nullptr);
+
+ Use dummyUse;
+ dummyUse.m_range = &range;
+ dummyUse.m_user = node;
+ dummyUse.m_edge = &dummyUse.m_user;
+
+ assert(dummyUse.IsInitialized());
+ return dummyUse;
+}
+
+//------------------------------------------------------------------------
+// LIR::Use::IsDummyUse: Indicates whether or not a use is a dummy use.
+//
+// This method must be called before attempting to call the User() method
+// below: for dummy uses, the user is the same node as the def.
+//
+// Return Value: true if this use is a dummy use; false otherwise.
+//
+bool LIR::Use::IsDummyUse() const
+{
+ return m_edge == &m_user;
+}
+
+//------------------------------------------------------------------------
+// LIR::Use::Def: Returns the node that produces the def for this use.
+//
+GenTree* LIR::Use::Def() const
+{
+ assert(IsInitialized());
+
+ return *m_edge;
+}
+
+//------------------------------------------------------------------------
+// LIR::Use::User: Returns the node that uses the def for this use.
+///
+GenTree* LIR::Use::User() const
+{
+ assert(IsInitialized());
+ assert(!IsDummyUse());
+
+ return m_user;
+}
+
+//------------------------------------------------------------------------
+// LIR::Use::IsInitialized: Returns true if the use is minimally valid; false otherwise.
+//
+bool LIR::Use::IsInitialized() const
+{
+ return (m_range != nullptr) && (m_user != nullptr) && (m_edge != nullptr);
+}
+
+//------------------------------------------------------------------------
+// LIR::Use::AssertIsValid: DEBUG function to assert on many validity conditions.
+//
+void LIR::Use::AssertIsValid() const
+{
+ assert(IsInitialized());
+ assert(m_range->Contains(m_user));
+ assert(Def() != nullptr);
+
+ GenTree** useEdge = nullptr;
+ assert(m_user->TryGetUse(Def(), &useEdge));
+ assert(useEdge == m_edge);
+}
+
+//------------------------------------------------------------------------
+// LIR::Use::ReplaceWith: Changes the use to point to a new value.
+//
+// For example, given the following LIR:
+//
+// t15 = lclVar int arg1
+// t16 = lclVar int arg1
+//
+// /--* t15 int
+// +--* t16 int
+// t17 = * == int
+//
+// /--* t17 int
+// * jmpTrue void
+//
+// If we wanted to replace the use of t17 with a use of the constant "1", we
+// might do the following (where `opEq` is a `Use` value that represents the
+// use of t17):
+//
+// GenTree* constantOne = compiler->gtNewIconNode(1);
+// range.InsertAfter(opEq.Def(), constantOne);
+// opEq.ReplaceWith(compiler, constantOne);
+//
+// Which would produce something like the following LIR:
+//
+// t15 = lclVar int arg1
+// t16 = lclVar int arg1
+//
+// /--* t15 int
+// +--* t16 int
+// t17 = * == int
+//
+// t18 = const int 1
+//
+// /--* t18 int
+// * jmpTrue void
+//
+// Elminating the now-dead compare and its operands using `LIR::Range::Remove`
+// would then give us:
+//
+// t18 = const int 1
+//
+// /--* t18 int
+// * jmpTrue void
+//
+// Arguments:
+// compiler - The Compiler context.
+// replacement - The replacement node.
+//
+void LIR::Use::ReplaceWith(Compiler* compiler, GenTree* replacement)
+{
+ assert(IsInitialized());
+ assert(compiler != nullptr);
+ assert(replacement != nullptr);
+ assert(IsDummyUse() || m_range->Contains(m_user));
+ assert(m_range->Contains(replacement));
+
+ GenTree* replacedNode = *m_edge;
+
+ *m_edge = replacement;
+ if (!IsDummyUse() && m_user->IsCall())
+ {
+ compiler->fgFixupArgTabEntryPtr(m_user, replacedNode, replacement);
+ }
+}
+
+//------------------------------------------------------------------------
+// LIR::Use::ReplaceWithLclVar: Assigns the def for this use to a local
+// var and points the use to a use of that
+// local var. If no local number is provided,
+// creates a new local var.
+//
+// For example, given the following IR:
+//
+// t15 = lclVar int arg1
+// t16 = lclVar int arg1
+//
+// /--* t15 int
+// +--* t16 int
+// t17 = * == int
+//
+// /--* t17 int
+// * jmpTrue void
+//
+// If we wanted to replace the use of t17 with a use of a new local var
+// that holds the value represented by t17, we might do the following
+// (where `opEq` is a `Use` value that represents the use of t17):
+//
+// opEq.ReplaceUseWithLclVar(compiler, block->getBBWeight(compiler));
+//
+// This would produce the following LIR:
+//
+// t15 = lclVar int arg1
+// t16 = lclVar int arg1
+//
+// /--* t15 int
+// +--* t16 int
+// t17 = * == int
+//
+// /--* t17 int
+// * st.lclVar int tmp0
+//
+// t18 = lclVar int tmp0
+//
+// /--* t18 int
+// * jmpTrue void
+//
+// Arguments:
+// compiler - The Compiler context.
+// blockWeight - The weight of the basic block that contains the use.
+// lclNum - The local to use for temporary storage. If BAD_VAR_NUM (the
+// default) is provided, this method will create and use a new
+// local var.
+//
+// Return Value: The number of the local var used for temporary storage.
+//
+unsigned LIR::Use::ReplaceWithLclVar(Compiler* compiler, unsigned blockWeight, unsigned lclNum)
+{
+ assert(IsInitialized());
+ assert(compiler != nullptr);
+ assert(m_range->Contains(m_user));
+ assert(m_range->Contains(*m_edge));
+
+ GenTree* node = *m_edge;
+
+ if (lclNum == BAD_VAR_NUM)
+ {
+ lclNum = compiler->lvaGrabTemp(true DEBUGARG("ReplaceWithLclVar is creating a new local variable"));
+ }
+
+ // Increment its lvRefCnt and lvRefCntWtd twice, one for the def and one for the use
+ compiler->lvaTable[lclNum].incRefCnts(blockWeight, compiler);
+ compiler->lvaTable[lclNum].incRefCnts(blockWeight, compiler);
+
+ GenTreeLclVar* store = compiler->gtNewTempAssign(lclNum, node)->AsLclVar();
+
+ GenTree* load =
+ new (compiler, GT_LCL_VAR) GenTreeLclVar(store->TypeGet(), store->AsLclVarCommon()->GetLclNum(), BAD_IL_OFFSET);
+
+ m_range->InsertAfter(node, store, load);
+
+ ReplaceWith(compiler, load);
+
+ JITDUMP("ReplaceWithLclVar created store :\n");
+ DISPNODE(store);
+
+ return lclNum;
+}
+
+LIR::ReadOnlyRange::ReadOnlyRange() : m_firstNode(nullptr), m_lastNode(nullptr)
+{
+}
+
+LIR::ReadOnlyRange::ReadOnlyRange(ReadOnlyRange&& other) : m_firstNode(other.m_firstNode), m_lastNode(other.m_lastNode)
+{
+#ifdef DEBUG
+ other.m_firstNode = nullptr;
+ other.m_lastNode = nullptr;
+#endif
+}
+
+//------------------------------------------------------------------------
+// LIR::ReadOnlyRange::ReadOnlyRange:
+// Creates a `ReadOnlyRange` value given the first and last node in
+// the range.
+//
+// Arguments:
+// firstNode - The first node in the range.
+// lastNode - The last node in the range.
+//
+LIR::ReadOnlyRange::ReadOnlyRange(GenTree* firstNode, GenTree* lastNode) : m_firstNode(firstNode), m_lastNode(lastNode)
+{
+ assert((m_firstNode == nullptr) == (m_lastNode == nullptr));
+ assert((m_firstNode == m_lastNode) || (Contains(m_lastNode)));
+}
+
+//------------------------------------------------------------------------
+// LIR::ReadOnlyRange::FirstNode: Returns the first node in the range.
+//
+GenTree* LIR::ReadOnlyRange::FirstNode() const
+{
+ return m_firstNode;
+}
+
+//------------------------------------------------------------------------
+// LIR::ReadOnlyRange::LastNode: Returns the last node in the range.
+//
+GenTree* LIR::ReadOnlyRange::LastNode() const
+{
+ return m_lastNode;
+}
+
+//------------------------------------------------------------------------
+// LIR::ReadOnlyRange::IsEmpty: Returns true if the range is empty; false
+// otherwise.
+//
+bool LIR::ReadOnlyRange::IsEmpty() const
+{
+ assert((m_firstNode == nullptr) == (m_lastNode == nullptr));
+ return m_firstNode == nullptr;
+}
+
+//------------------------------------------------------------------------
+// LIR::ReadOnlyRange::begin: Returns an iterator positioned at the first
+// node in the range.
+//
+LIR::ReadOnlyRange::Iterator LIR::ReadOnlyRange::begin() const
+{
+ return Iterator(m_firstNode);
+}
+
+//------------------------------------------------------------------------
+// LIR::ReadOnlyRange::end: Returns an iterator positioned after the last
+// node in the range.
+//
+LIR::ReadOnlyRange::Iterator LIR::ReadOnlyRange::end() const
+{
+ return Iterator(m_lastNode == nullptr ? nullptr : m_lastNode->gtNext);
+}
+
+//------------------------------------------------------------------------
+// LIR::ReadOnlyRange::rbegin: Returns an iterator positioned at the last
+// node in the range.
+//
+LIR::ReadOnlyRange::ReverseIterator LIR::ReadOnlyRange::rbegin() const
+{
+ return ReverseIterator(m_lastNode);
+}
+
+//------------------------------------------------------------------------
+// LIR::ReadOnlyRange::rend: Returns an iterator positioned before the first
+// node in the range.
+//
+LIR::ReadOnlyRange::ReverseIterator LIR::ReadOnlyRange::rend() const
+{
+ return ReverseIterator(m_firstNode == nullptr ? nullptr : m_firstNode->gtPrev);
+}
+
+#ifdef DEBUG
+
+//------------------------------------------------------------------------
+// LIR::ReadOnlyRange::Contains: Indicates whether or not this range
+// contains a given node.
+//
+// Arguments:
+// node - The node to find.
+//
+// Return Value: True if this range contains the given node; false
+// otherwise.
+//
+bool LIR::ReadOnlyRange::Contains(GenTree* node) const
+{
+ assert(node != nullptr);
+
+ // TODO-LIR: derive this from the # of nodes in the function as well as
+ // the debug level. Checking small functions is pretty cheap; checking
+ // large functions is not.
+ if (JitConfig.JitExpensiveDebugCheckLevel() < 2)
+ {
+ return true;
+ }
+
+ for (GenTree* n : *this)
+ {
+ if (n == node)
+ {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+#endif
+
+LIR::Range::Range() : ReadOnlyRange()
+{
+}
+
+LIR::Range::Range(Range&& other) : ReadOnlyRange(std::move(other))
+{
+}
+
+//------------------------------------------------------------------------
+// LIR::Range::Range: Creates a `Range` value given the first and last
+// node in the range.
+//
+// Arguments:
+// firstNode - The first node in the range.
+// lastNode - The last node in the range.
+//
+LIR::Range::Range(GenTree* firstNode, GenTree* lastNode) : ReadOnlyRange(firstNode, lastNode)
+{
+}
+
+//------------------------------------------------------------------------
+// LIR::Range::LastPhiNode: Returns the last phi node in the range or
+// `nullptr` if no phis exist.
+//
+GenTree* LIR::Range::LastPhiNode() const
+{
+ GenTree* lastPhiNode = nullptr;
+ for (GenTree* node : *this)
+ {
+ if (!node->IsPhiNode())
+ {
+ break;
+ }
+
+ lastPhiNode = node;
+ }
+
+ return lastPhiNode;
+}
+
+//------------------------------------------------------------------------
+// LIR::Range::FirstNonPhiNode: Returns the first non-phi node in the
+// range or `nullptr` if no non-phi nodes
+// exist.
+//
+GenTree* LIR::Range::FirstNonPhiNode() const
+{
+ for (GenTree* node : *this)
+ {
+ if (!node->IsPhiNode())
+ {
+ return node;
+ }
+ }
+
+ return nullptr;
+}
+
+//------------------------------------------------------------------------
+// LIR::Range::FirstNonPhiOrCatchArgNode: Returns the first node after all
+// phi or catch arg nodes in this
+// range.
+//
+GenTree* LIR::Range::FirstNonPhiOrCatchArgNode() const
+{
+ for (GenTree* node : NonPhiNodes())
+ {
+ if (node->OperGet() == GT_CATCH_ARG)
+ {
+ continue;
+ }
+ else if ((node->OperGet() == GT_STORE_LCL_VAR) && (node->gtGetOp1()->OperGet() == GT_CATCH_ARG))
+ {
+ continue;
+ }
+
+ return node;
+ }
+
+ return nullptr;
+}
+
+//------------------------------------------------------------------------
+// LIR::Range::PhiNodes: Returns the range of phi nodes inside this range.
+//
+LIR::ReadOnlyRange LIR::Range::PhiNodes() const
+{
+ GenTree* lastPhiNode = LastPhiNode();
+ if (lastPhiNode == nullptr)
+ {
+ return ReadOnlyRange();
+ }
+
+ return ReadOnlyRange(m_firstNode, lastPhiNode);
+}
+
+//------------------------------------------------------------------------
+// LIR::Range::PhiNodes: Returns the range of non-phi nodes inside this
+// range.
+//
+LIR::ReadOnlyRange LIR::Range::NonPhiNodes() const
+{
+ GenTree* firstNonPhiNode = FirstNonPhiNode();
+ if (firstNonPhiNode == nullptr)
+ {
+ return ReadOnlyRange();
+ }
+
+ return ReadOnlyRange(firstNonPhiNode, m_lastNode);
+}
+
+//------------------------------------------------------------------------
+// LIR::Range::InsertBefore: Inserts a node before another node in this range.
+//
+// Arguments:
+// insertionPoint - The node before which `node` will be inserted. If non-null, must be part
+// of this range. If null, insert at the end of the range.
+// node - The node to insert. Must not be part of any range.
+//
+void LIR::Range::InsertBefore(GenTree* insertionPoint, GenTree* node)
+{
+ assert(node != nullptr);
+ assert(node->gtPrev == nullptr);
+ assert(node->gtNext == nullptr);
+
+ FinishInsertBefore(insertionPoint, node, node);
+}
+
+//------------------------------------------------------------------------
+// LIR::Range::InsertBefore: Inserts 2 nodes before another node in this range.
+//
+// Arguments:
+// insertionPoint - The node before which the nodes will be inserted. If non-null, must be part
+// of this range. If null, insert at the end of the range.
+// node1 - The first node to insert. Must not be part of any range.
+// node2 - The second node to insert. Must not be part of any range.
+//
+// Notes:
+// Resulting order:
+// previous insertionPoint->gtPrev <-> node1 <-> node2 <-> insertionPoint
+//
+void LIR::Range::InsertBefore(GenTree* insertionPoint, GenTree* node1, GenTree* node2)
+{
+ assert(node1 != nullptr);
+ assert(node2 != nullptr);
+
+ assert(node1->gtNext == nullptr);
+ assert(node1->gtPrev == nullptr);
+ assert(node2->gtNext == nullptr);
+ assert(node2->gtPrev == nullptr);
+
+ node1->gtNext = node2;
+ node2->gtPrev = node1;
+
+ FinishInsertBefore(insertionPoint, node1, node2);
+}
+
+//------------------------------------------------------------------------
+// LIR::Range::InsertBefore: Inserts 3 nodes before another node in this range.
+//
+// Arguments:
+// insertionPoint - The node before which the nodes will be inserted. If non-null, must be part
+// of this range. If null, insert at the end of the range.
+// node1 - The first node to insert. Must not be part of any range.
+// node2 - The second node to insert. Must not be part of any range.
+// node3 - The third node to insert. Must not be part of any range.
+//
+// Notes:
+// Resulting order:
+// previous insertionPoint->gtPrev <-> node1 <-> node2 <-> node3 <-> insertionPoint
+//
+void LIR::Range::InsertBefore(GenTree* insertionPoint, GenTree* node1, GenTree* node2, GenTree* node3)
+{
+ assert(node1 != nullptr);
+ assert(node2 != nullptr);
+ assert(node3 != nullptr);
+
+ assert(node1->gtNext == nullptr);
+ assert(node1->gtPrev == nullptr);
+ assert(node2->gtNext == nullptr);
+ assert(node2->gtPrev == nullptr);
+ assert(node3->gtNext == nullptr);
+ assert(node3->gtPrev == nullptr);
+
+ node1->gtNext = node2;
+
+ node2->gtPrev = node1;
+ node2->gtNext = node3;
+
+ node3->gtPrev = node2;
+
+ FinishInsertBefore(insertionPoint, node1, node3);
+}
+
+//------------------------------------------------------------------------
+// LIR::Range::InsertBefore: Inserts 4 nodes before another node in this range.
+//
+// Arguments:
+// insertionPoint - The node before which the nodes will be inserted. If non-null, must be part
+// of this range. If null, insert at the end of the range.
+// node1 - The first node to insert. Must not be part of any range.
+// node2 - The second node to insert. Must not be part of any range.
+// node3 - The third node to insert. Must not be part of any range.
+// node4 - The fourth node to insert. Must not be part of any range.
+//
+// Notes:
+// Resulting order:
+// previous insertionPoint->gtPrev <-> node1 <-> node2 <-> node3 <-> node4 <-> insertionPoint
+//
+void LIR::Range::InsertBefore(GenTree* insertionPoint, GenTree* node1, GenTree* node2, GenTree* node3, GenTree* node4)
+{
+ assert(node1 != nullptr);
+ assert(node2 != nullptr);
+ assert(node3 != nullptr);
+ assert(node4 != nullptr);
+
+ assert(node1->gtNext == nullptr);
+ assert(node1->gtPrev == nullptr);
+ assert(node2->gtNext == nullptr);
+ assert(node2->gtPrev == nullptr);
+ assert(node3->gtNext == nullptr);
+ assert(node3->gtPrev == nullptr);
+ assert(node4->gtNext == nullptr);
+ assert(node4->gtPrev == nullptr);
+
+ node1->gtNext = node2;
+
+ node2->gtPrev = node1;
+ node2->gtNext = node3;
+
+ node3->gtPrev = node2;
+ node3->gtNext = node4;
+
+ node4->gtPrev = node3;
+
+ FinishInsertBefore(insertionPoint, node1, node4);
+}
+
+//------------------------------------------------------------------------
+// LIR::Range::FinishInsertBefore: Helper function to finalize InsertBefore processing: link the
+// range to insertionPoint. gtNext/gtPrev links between first and last are already set.
+//
+// Arguments:
+// insertionPoint - The node before which the nodes will be inserted. If non-null, must be part
+// of this range. If null, indicates to insert at the end of the range.
+// first - The first node of the range to insert.
+// last - The last node of the range to insert.
+//
+// Notes:
+// Resulting order:
+// previous insertionPoint->gtPrev <-> first <-> ... <-> last <-> insertionPoint
+//
+void LIR::Range::FinishInsertBefore(GenTree* insertionPoint, GenTree* first, GenTree* last)
+{
+ assert(first != nullptr);
+ assert(last != nullptr);
+ assert(first->gtPrev == nullptr);
+ assert(last->gtNext == nullptr);
+
+ if (insertionPoint == nullptr)
+ {
+ if (m_firstNode == nullptr)
+ {
+ m_firstNode = first;
+ }
+ else
+ {
+ assert(m_lastNode != nullptr);
+ assert(m_lastNode->gtNext == nullptr);
+ m_lastNode->gtNext = first;
+ first->gtPrev = m_lastNode;
+ }
+ m_lastNode = last;
+ }
+ else
+ {
+ assert(Contains(insertionPoint));
+
+ first->gtPrev = insertionPoint->gtPrev;
+ if (first->gtPrev == nullptr)
+ {
+ assert(insertionPoint == m_firstNode);
+ m_firstNode = first;
+ }
+ else
+ {
+ first->gtPrev->gtNext = first;
+ }
+
+ last->gtNext = insertionPoint;
+ insertionPoint->gtPrev = last;
+ }
+}
+
+//------------------------------------------------------------------------
+// LIR::Range::InsertAfter: Inserts a node after another node in this range.
+//
+// Arguments:
+// insertionPoint - The node after which `node` will be inserted. If non-null, must be part
+// of this range. If null, insert at the beginning of the range.
+// node - The node to insert. Must not be part of any range.
+//
+// Notes:
+// Resulting order:
+// insertionPoint <-> node <-> previous insertionPoint->gtNext
+//
+void LIR::Range::InsertAfter(GenTree* insertionPoint, GenTree* node)
+{
+ assert(node != nullptr);
+
+ assert(node->gtNext == nullptr);
+ assert(node->gtPrev == nullptr);
+
+ FinishInsertAfter(insertionPoint, node, node);
+}
+
+//------------------------------------------------------------------------
+// LIR::Range::InsertAfter: Inserts 2 nodes after another node in this range.
+//
+// Arguments:
+// insertionPoint - The node after which the nodes will be inserted. If non-null, must be part
+// of this range. If null, insert at the beginning of the range.
+// node1 - The first node to insert. Must not be part of any range.
+// node2 - The second node to insert. Must not be part of any range. Inserted after node1.
+//
+// Notes:
+// Resulting order:
+// insertionPoint <-> node1 <-> node2 <-> previous insertionPoint->gtNext
+//
+void LIR::Range::InsertAfter(GenTree* insertionPoint, GenTree* node1, GenTree* node2)
+{
+ assert(node1 != nullptr);
+ assert(node2 != nullptr);
+
+ assert(node1->gtNext == nullptr);
+ assert(node1->gtPrev == nullptr);
+ assert(node2->gtNext == nullptr);
+ assert(node2->gtPrev == nullptr);
+
+ node1->gtNext = node2;
+ node2->gtPrev = node1;
+
+ FinishInsertAfter(insertionPoint, node1, node2);
+}
+
+//------------------------------------------------------------------------
+// LIR::Range::InsertAfter: Inserts 3 nodes after another node in this range.
+//
+// Arguments:
+// insertionPoint - The node after which the nodes will be inserted. If non-null, must be part
+// of this range. If null, insert at the beginning of the range.
+// node1 - The first node to insert. Must not be part of any range.
+// node2 - The second node to insert. Must not be part of any range. Inserted after node1.
+// node3 - The third node to insert. Must not be part of any range. Inserted after node2.
+//
+// Notes:
+// Resulting order:
+// insertionPoint <-> node1 <-> node2 <-> node3 <-> previous insertionPoint->gtNext
+//
+void LIR::Range::InsertAfter(GenTree* insertionPoint, GenTree* node1, GenTree* node2, GenTree* node3)
+{
+ assert(node1 != nullptr);
+ assert(node2 != nullptr);
+ assert(node3 != nullptr);
+
+ assert(node1->gtNext == nullptr);
+ assert(node1->gtPrev == nullptr);
+ assert(node2->gtNext == nullptr);
+ assert(node2->gtPrev == nullptr);
+ assert(node3->gtNext == nullptr);
+ assert(node3->gtPrev == nullptr);
+
+ node1->gtNext = node2;
+
+ node2->gtPrev = node1;
+ node2->gtNext = node3;
+
+ node3->gtPrev = node2;
+
+ FinishInsertAfter(insertionPoint, node1, node3);
+}
+
+//------------------------------------------------------------------------
+// LIR::Range::InsertAfter: Inserts 4 nodes after another node in this range.
+//
+// Arguments:
+// insertionPoint - The node after which the nodes will be inserted. If non-null, must be part
+// of this range. If null, insert at the beginning of the range.
+// node1 - The first node to insert. Must not be part of any range.
+// node2 - The second node to insert. Must not be part of any range. Inserted after node1.
+// node3 - The third node to insert. Must not be part of any range. Inserted after node2.
+// node4 - The fourth node to insert. Must not be part of any range. Inserted after node3.
+//
+// Notes:
+// Resulting order:
+// insertionPoint <-> node1 <-> node2 <-> node3 <-> node4 <-> previous insertionPoint->gtNext
+//
+void LIR::Range::InsertAfter(GenTree* insertionPoint, GenTree* node1, GenTree* node2, GenTree* node3, GenTree* node4)
+{
+ assert(node1 != nullptr);
+ assert(node2 != nullptr);
+ assert(node3 != nullptr);
+ assert(node4 != nullptr);
+
+ assert(node1->gtNext == nullptr);
+ assert(node1->gtPrev == nullptr);
+ assert(node2->gtNext == nullptr);
+ assert(node2->gtPrev == nullptr);
+ assert(node3->gtNext == nullptr);
+ assert(node3->gtPrev == nullptr);
+ assert(node4->gtNext == nullptr);
+ assert(node4->gtPrev == nullptr);
+
+ node1->gtNext = node2;
+
+ node2->gtPrev = node1;
+ node2->gtNext = node3;
+
+ node3->gtPrev = node2;
+ node3->gtNext = node4;
+
+ node4->gtPrev = node3;
+
+ FinishInsertAfter(insertionPoint, node1, node4);
+}
+
+//------------------------------------------------------------------------
+// LIR::Range::FinishInsertAfter: Helper function to finalize InsertAfter processing: link the
+// range to insertionPoint. gtNext/gtPrev links between first and last are already set.
+//
+// Arguments:
+// insertionPoint - The node after which the nodes will be inserted. If non-null, must be part
+// of this range. If null, insert at the beginning of the range.
+// first - The first node of the range to insert.
+// last - The last node of the range to insert.
+//
+// Notes:
+// Resulting order:
+// insertionPoint <-> first <-> ... <-> last <-> previous insertionPoint->gtNext
+//
+void LIR::Range::FinishInsertAfter(GenTree* insertionPoint, GenTree* first, GenTree* last)
+{
+ assert(first != nullptr);
+ assert(last != nullptr);
+ assert(first->gtPrev == nullptr);
+ assert(last->gtNext == nullptr);
+
+ if (insertionPoint == nullptr)
+ {
+ if (m_lastNode == nullptr)
+ {
+ m_lastNode = last;
+ }
+ else
+ {
+ assert(m_firstNode != nullptr);
+ assert(m_firstNode->gtPrev == nullptr);
+ m_firstNode->gtPrev = last;
+ last->gtNext = m_firstNode;
+ }
+ m_firstNode = first;
+ }
+ else
+ {
+ assert(Contains(insertionPoint));
+
+ last->gtNext = insertionPoint->gtNext;
+ if (last->gtNext == nullptr)
+ {
+ assert(insertionPoint == m_lastNode);
+ m_lastNode = last;
+ }
+ else
+ {
+ last->gtNext->gtPrev = last;
+ }
+
+ first->gtPrev = insertionPoint;
+ insertionPoint->gtNext = first;
+ }
+}
+
+//------------------------------------------------------------------------
+// LIR::Range::InsertBefore: Inserts a range before another node in `this` range.
+//
+// Arguments:
+// insertionPoint - The node before which the nodes will be inserted. If non-null, must be part
+// of this range. If null, insert at the end of the range.
+// range - The range to splice in.
+//
+void LIR::Range::InsertBefore(GenTree* insertionPoint, Range&& range)
+{
+ assert(!range.IsEmpty());
+ FinishInsertBefore(insertionPoint, range.m_firstNode, range.m_lastNode);
+}
+
+//------------------------------------------------------------------------
+// LIR::Range::InsertAfter: Inserts a range after another node in `this` range.
+//
+// Arguments:
+// insertionPoint - The node after which the nodes will be inserted. If non-null, must be part
+// of this range. If null, insert at the beginning of the range.
+// range - The range to splice in.
+//
+void LIR::Range::InsertAfter(GenTree* insertionPoint, Range&& range)
+{
+ assert(!range.IsEmpty());
+ FinishInsertAfter(insertionPoint, range.m_firstNode, range.m_lastNode);
+}
+
+//------------------------------------------------------------------------
+// LIR::Range::InsertAtBeginning: Inserts a node at the beginning of this range.
+//
+// Arguments:
+// node - The node to insert. Must not be part of any range.
+//
+void LIR::Range::InsertAtBeginning(GenTree* node)
+{
+ InsertBefore(m_firstNode, node);
+}
+
+//------------------------------------------------------------------------
+// LIR::Range::InsertAtEnd: Inserts a node at the end of this range.
+//
+// Arguments:
+// node - The node to insert. Must not be part of any range.
+//
+void LIR::Range::InsertAtEnd(GenTree* node)
+{
+ InsertAfter(m_lastNode, node);
+}
+
+//------------------------------------------------------------------------
+// LIR::Range::InsertAtBeginning: Inserts a range at the beginning of `this` range.
+//
+// Arguments:
+// range - The range to splice in.
+//
+void LIR::Range::InsertAtBeginning(Range&& range)
+{
+ InsertBefore(m_firstNode, std::move(range));
+}
+
+//------------------------------------------------------------------------
+// LIR::Range::InsertAtEnd: Inserts a range at the end of `this` range.
+//
+// Arguments:
+// range - The range to splice in.
+//
+void LIR::Range::InsertAtEnd(Range&& range)
+{
+ InsertAfter(m_lastNode, std::move(range));
+}
+
+//------------------------------------------------------------------------
+// LIR::Range::Remove: Removes a node from this range.
+//
+// Arguments:
+// node - The node to remove. Must be part of this range.
+//
+void LIR::Range::Remove(GenTree* node)
+{
+ assert(node != nullptr);
+ assert(Contains(node));
+
+ GenTree* prev = node->gtPrev;
+ GenTree* next = node->gtNext;
+
+ if (prev != nullptr)
+ {
+ prev->gtNext = next;
+ }
+ else
+ {
+ assert(node == m_firstNode);
+ m_firstNode = next;
+ }
+
+ if (next != nullptr)
+ {
+ next->gtPrev = prev;
+ }
+ else
+ {
+ assert(node == m_lastNode);
+ m_lastNode = prev;
+ }
+
+ node->gtPrev = nullptr;
+ node->gtNext = nullptr;
+}
+
+//------------------------------------------------------------------------
+// LIR::Range::Remove: Removes a subrange from this range.
+//
+// Both the start and the end of the subrange must be part of this range.
+//
+// Arguments:
+// firstNode - The first node in the subrange.
+// lastNode - The last node in the subrange.
+//
+// Returns:
+// A mutable range containing the removed nodes.
+//
+LIR::Range LIR::Range::Remove(GenTree* firstNode, GenTree* lastNode)
+{
+ assert(firstNode != nullptr);
+ assert(lastNode != nullptr);
+ assert(Contains(firstNode));
+ assert((firstNode == lastNode) || firstNode->Precedes(lastNode));
+
+ GenTree* prev = firstNode->gtPrev;
+ GenTree* next = lastNode->gtNext;
+
+ if (prev != nullptr)
+ {
+ prev->gtNext = next;
+ }
+ else
+ {
+ assert(firstNode == m_firstNode);
+ m_firstNode = next;
+ }
+
+ if (next != nullptr)
+ {
+ next->gtPrev = prev;
+ }
+ else
+ {
+ assert(lastNode == m_lastNode);
+ m_lastNode = prev;
+ }
+
+ firstNode->gtPrev = nullptr;
+ lastNode->gtNext = nullptr;
+
+ return Range(firstNode, lastNode);
+}
+
+//------------------------------------------------------------------------
+// LIR::Range::Remove: Removes a subrange from this range.
+//
+// Arguments:
+// range - The subrange to remove. Must be part of this range.
+//
+// Returns:
+// A mutable range containing the removed nodes.
+//
+LIR::Range LIR::Range::Remove(ReadOnlyRange&& range)
+{
+ return Remove(range.m_firstNode, range.m_lastNode);
+}
+
+//------------------------------------------------------------------------
+// LIR::Range::Delete: Deletes a node from this range.
+//
+// Note that the deleted node must not be used after this function has
+// been called. If the deleted node is part of a block, this function also
+// calls `Compiler::lvaDecRefCnts` as necessary.
+//
+// Arguments:
+// node - The node to delete. Must be part of this range.
+// block - The block that contains the node, if any. May be null.
+// compiler - The compiler context. May be null if block is null.
+//
+void LIR::Range::Delete(Compiler* compiler, BasicBlock* block, GenTree* node)
+{
+ assert(node != nullptr);
+ assert((block == nullptr) == (compiler == nullptr));
+
+ Remove(node);
+
+ if (block != nullptr)
+ {
+ if (((node->OperGet() == GT_CALL) && ((node->gtFlags & GTF_CALL_UNMANAGED) != 0)) ||
+ (node->OperIsLocal() && !node->IsPhiNode()))
+ {
+ compiler->lvaDecRefCnts(block, node);
+ }
+ }
+
+ DEBUG_DESTROY_NODE(node);
+}
+
+//------------------------------------------------------------------------
+// LIR::Range::Delete: Deletes a subrange from this range.
+//
+// Both the start and the end of the subrange must be part of this range.
+// Note that the deleted nodes must not be used after this function has
+// been called. If the deleted nodes are part of a block, this function
+// also calls `Compiler::lvaDecRefCnts` as necessary.
+//
+// Arguments:
+// firstNode - The first node in the subrange.
+// lastNode - The last node in the subrange.
+// block - The block that contains the subrange, if any. May be null.
+// compiler - The compiler context. May be null if block is null.
+//
+void LIR::Range::Delete(Compiler* compiler, BasicBlock* block, GenTree* firstNode, GenTree* lastNode)
+{
+ assert(firstNode != nullptr);
+ assert(lastNode != nullptr);
+ assert((block == nullptr) == (compiler == nullptr));
+
+ Remove(firstNode, lastNode);
+
+ assert(lastNode->gtNext == nullptr);
+
+ if (block != nullptr)
+ {
+ for (GenTree* node = firstNode; node != nullptr; node = node->gtNext)
+ {
+ if (((node->OperGet() == GT_CALL) && ((node->gtFlags & GTF_CALL_UNMANAGED) != 0)) ||
+ (node->OperIsLocal() && !node->IsPhiNode()))
+ {
+ compiler->lvaDecRefCnts(block, node);
+ }
+ }
+ }
+
+#ifdef DEBUG
+ // We can't do this in the loop above because it causes `IsPhiNode` to return a false negative
+ // for `GT_STORE_LCL_VAR` nodes that participate in phi definitions.
+ for (GenTree* node = firstNode; node != nullptr; node = node->gtNext)
+ {
+ DEBUG_DESTROY_NODE(node);
+ }
+#endif
+}
+
+//------------------------------------------------------------------------
+// LIR::Range::Delete: Deletes a subrange from this range.
+//
+// Both the start and the end of the subrange must be part of this range.
+// Note that the deleted nodes must not be used after this function has
+// been called. If the deleted nodes are part of a block, this function
+// also calls `Compiler::lvaDecRefCnts` as necessary.
+//
+// Arguments:
+// range - The subrange to delete.
+// block - The block that contains the subrange, if any. May be null.
+// compiler - The compiler context. May be null if block is null.
+//
+void LIR::Range::Delete(Compiler* compiler, BasicBlock* block, ReadOnlyRange&& range)
+{
+ Delete(compiler, block, range.m_firstNode, range.m_lastNode);
+}
+
+
+//------------------------------------------------------------------------
+// LIR::Range::TryGetUse: Try to find the use for a given node.
+//
+// Arguments:
+// node - The node for which to find the corresponding use.
+// use (out) - The use of the corresponding node, if any. Invalid if
+// this method returns false.
+//
+// Return Value: Returns true if a use was found; false otherwise.
+//
+bool LIR::Range::TryGetUse(GenTree* node, Use* use)
+{
+ assert(node != nullptr);
+ assert(use != nullptr);
+ assert(Contains(node));
+
+ // Don't bother looking for uses of nodes that are not values.
+ // If the node is the last node, we won't find a use (and we would
+ // end up creating an illegal range if we tried).
+ if (node->IsValue() && (node != LastNode()))
+ {
+ for (GenTree* n : ReadOnlyRange(node->gtNext, m_lastNode))
+ {
+ GenTree** edge;
+ if (n->TryGetUse(node, &edge))
+ {
+ *use = Use(*this, edge, n);
+ return true;
+ }
+ }
+ }
+
+ *use = Use();
+ return false;
+}
+
+//------------------------------------------------------------------------
+// LIR::Range::GetTreeRange: Computes the subrange that includes all nodes
+// in the dataflow trees rooted at a particular
+// set of nodes.
+//
+// This method logically uses the following algorithm to compute the
+// range:
+//
+// worklist = { set }
+// firstNode = start
+// isClosed = true
+//
+// while not worklist.isEmpty:
+// if not worklist.contains(firstNode):
+// isClosed = false
+// else:
+// for operand in firstNode:
+// worklist.add(operand)
+//
+// worklist.remove(firstNode)
+//
+// firstNode = firstNode.previousNode
+//
+// return firstNode
+//
+// Instead of using a set for the worklist, the implementation uses the
+// `LIR::Mark` bit of the `GenTree::LIRFlags` field to track whether or
+// not a node is in the worklist.
+//
+// Note also that this algorithm depends LIR nodes being SDSU, SDSU defs
+// and uses occurring in the same block, and correct dataflow (i.e. defs
+// occurring before uses).
+//
+// Arguments:
+// root - The root of the dataflow tree.
+// isClosed - An output parameter that is set to true if the returned
+// range contains only nodes in the dataflow tree and false
+// otherwise.
+//
+// Returns:
+// The computed subrange.
+//
+LIR::ReadOnlyRange LIR::Range::GetMarkedRange(unsigned markCount,
+ GenTree* start,
+ bool* isClosed,
+ unsigned* sideEffects) const
+{
+ assert(markCount != 0);
+ assert(start != nullptr);
+ assert(isClosed != nullptr);
+ assert(sideEffects != nullptr);
+
+ bool sawUnmarkedNode = false;
+ unsigned sideEffectsInRange = 0;
+
+ GenTree* firstNode = start;
+ GenTree* lastNode = nullptr;
+ for (;;)
+ {
+ if ((firstNode->gtLIRFlags & LIR::Flags::Mark) != 0)
+ {
+ if (lastNode == nullptr)
+ {
+ lastNode = firstNode;
+ }
+
+ // Mark the node's operands
+ for (GenTree* operand : firstNode->Operands())
+ {
+ // Do not mark nodes that do not appear in the execution order
+ if (operand->OperGet() == GT_ARGPLACE)
+ {
+ continue;
+ }
+
+ operand->gtLIRFlags |= LIR::Flags::Mark;
+ markCount++;
+ }
+
+ // Unmark the the node and update `firstNode`
+ firstNode->gtLIRFlags &= ~LIR::Flags::Mark;
+ markCount--;
+ }
+ else if (lastNode != nullptr)
+ {
+ sawUnmarkedNode = true;
+ }
+
+ if (lastNode != nullptr)
+ {
+ sideEffectsInRange |= (firstNode->gtFlags & GTF_ALL_EFFECT);
+ }
+
+ if (markCount == 0)
+ {
+ break;
+ }
+
+ firstNode = firstNode->gtPrev;
+
+ // This assert will fail if the dataflow that feeds the root node
+ // is incorrect in that it crosses a block boundary or if it involves
+ // a use that occurs before its corresponding def.
+ assert(firstNode != nullptr);
+ }
+
+ assert(lastNode != nullptr);
+
+ *isClosed = !sawUnmarkedNode;
+ *sideEffects = sideEffectsInRange;
+ return ReadOnlyRange(firstNode, lastNode);
+}
+
+//------------------------------------------------------------------------
+// LIR::Range::GetTreeRange: Computes the subrange that includes all nodes
+// in the dataflow tree rooted at a particular
+// node.
+//
+// Arguments:
+// root - The root of the dataflow tree.
+// isClosed - An output parameter that is set to true if the returned
+// range contains only nodes in the dataflow tree and false
+// otherwise.
+//
+// Returns:
+// The computed subrange.
+LIR::ReadOnlyRange LIR::Range::GetTreeRange(GenTree* root, bool* isClosed) const
+{
+ unsigned unused;
+ return GetTreeRange(root, isClosed, &unused);
+}
+
+//------------------------------------------------------------------------
+// LIR::Range::GetTreeRange: Computes the subrange that includes all nodes
+// in the dataflow tree rooted at a particular
+// node.
+//
+// Arguments:
+// root - The root of the dataflow tree.
+// isClosed - An output parameter that is set to true if the returned
+// range contains only nodes in the dataflow tree and false
+// otherwise.
+// sideEffects - An output parameter that summarizes the side effects
+// contained in the returned range.
+//
+// Returns:
+// The computed subrange.
+LIR::ReadOnlyRange LIR::Range::GetTreeRange(GenTree* root, bool* isClosed, unsigned* sideEffects) const
+{
+ assert(root != nullptr);
+
+ // Mark the root of the tree
+ const unsigned markCount = 1;
+ root->gtLIRFlags |= LIR::Flags::Mark;
+
+ return GetMarkedRange(markCount, root, isClosed, sideEffects);
+}
+
+//------------------------------------------------------------------------
+// LIR::Range::GetTreeRange: Computes the subrange that includes all nodes
+// in the dataflow trees rooted by the operands
+// to a particular node.
+//
+// Arguments:
+// root - The root of the dataflow tree.
+// isClosed - An output parameter that is set to true if the returned
+// range contains only nodes in the dataflow tree and false
+// otherwise.
+// sideEffects - An output parameter that summarizes the side effects
+// contained in the returned range.
+//
+// Returns:
+// The computed subrange.
+//
+LIR::ReadOnlyRange LIR::Range::GetRangeOfOperandTrees(GenTree* root, bool* isClosed, unsigned* sideEffects) const
+{
+ assert(root != nullptr);
+ assert(isClosed != nullptr);
+ assert(sideEffects != nullptr);
+
+ // Mark the root node's operands
+ unsigned markCount = 0;
+ for (GenTree* operand : root->Operands())
+ {
+ operand->gtLIRFlags |= LIR::Flags::Mark;
+ markCount++;
+ }
+
+ if (markCount == 0)
+ {
+ *isClosed = true;
+ *sideEffects = 0;
+ return ReadOnlyRange();
+ }
+
+ return GetMarkedRange(markCount, root, isClosed, sideEffects);
+}
+
+#ifdef DEBUG
+
+//------------------------------------------------------------------------
+// LIR::Range::CheckLIR: Performs a set of correctness checks on the LIR
+// contained in this range.
+//
+// This method checks the following properties:
+// - Defs are singly-used
+// - Uses follow defs
+// - Uses are correctly linked into the block
+// - Nodes that do not produce values are not used
+// - Only LIR nodes are present in the block
+// - If any phi nodes are present in the range, they precede all other
+// nodes
+//
+// The first four properties are verified by walking the range's LIR in execution order,
+// inserting defs into a set as they are visited, and removing them as they are used. The
+// different cases are distinguished only when an error is detected.
+//
+// Arguments:
+// compiler - A compiler context.
+//
+// Return Value:
+// 'true' if the LIR for the specified range is legal.
+//
+bool LIR::Range::CheckLIR(Compiler* compiler, bool checkUnusedValues) const
+{
+ if (IsEmpty())
+ {
+ // Nothing more to check.
+ return true;
+ }
+
+ // Check the gtNext/gtPrev links: (1) ensure there are no circularities, (2) ensure the gtPrev list is
+ // precisely the inverse of the gtNext list.
+ //
+ // To detect circularity, use the "tortoise and hare" 2-pointer algorithm.
+
+ GenTree* slowNode = FirstNode();
+ assert(slowNode != nullptr); // because it's a non-empty range
+ GenTree* fastNode1 = nullptr;
+ GenTree* fastNode2 = slowNode;
+ GenTree* prevSlowNode = nullptr;
+ while (((fastNode1 = fastNode2->gtNext) != nullptr) && ((fastNode2 = fastNode1->gtNext) != nullptr))
+ {
+ if ((slowNode == fastNode1) || (slowNode == fastNode2))
+ {
+ assert(!"gtNext nodes have a circularity!");
+ }
+ assert(slowNode->gtPrev == prevSlowNode);
+ prevSlowNode = slowNode;
+ slowNode = slowNode->gtNext;
+ assert(slowNode != nullptr); // the fastNodes would have gone null first.
+ }
+ // If we get here, the list had no circularities, so either fastNode1 or fastNode2 must be nullptr.
+ assert((fastNode1 == nullptr) || (fastNode2 == nullptr));
+
+ // Need to check the rest of the gtPrev links.
+ while (slowNode != nullptr)
+ {
+ assert(slowNode->gtPrev == prevSlowNode);
+ prevSlowNode = slowNode;
+ slowNode = slowNode->gtNext;
+ }
+
+ SmallHashTable<GenTree*, bool, 32> unusedDefs(compiler);
+
+ bool pastPhis = false;
+ GenTree* prev = nullptr;
+ for (Iterator node = begin(), end = this->end(); node != end; prev = *node, ++node)
+ {
+ // Verify that the node is allowed in LIR.
+ assert(node->IsLIR());
+
+ // TODO: validate catch arg stores
+
+ // Check that all phi nodes (if any) occur at the start of the range.
+ if ((node->OperGet() == GT_PHI_ARG) || (node->OperGet() == GT_PHI) || node->IsPhiDefn())
+ {
+ assert(!pastPhis);
+ }
+ else
+ {
+ pastPhis = true;
+ }
+
+ for (GenTree** useEdge : node->UseEdges())
+ {
+ GenTree* def = *useEdge;
+
+ assert((!checkUnusedValues || ((def->gtLIRFlags & LIR::Flags::IsUnusedValue) == 0)) &&
+ "operands should never be marked as unused values");
+
+ if (def->OperGet() == GT_ARGPLACE)
+ {
+ // ARGPLACE nodes are not represented in the LIR sequence. Ignore them.
+ continue;
+ }
+ else if (!def->IsValue())
+ {
+ // Calls may contain "uses" of nodes that do not produce a value. This is an artifact of
+ // the HIR and should probably be fixed, but doing so is an unknown amount of work.
+ assert(node->OperGet() == GT_CALL);
+ continue;
+ }
+
+ bool v;
+ bool foundDef = unusedDefs.TryRemove(def, &v);
+ if (!foundDef)
+ {
+ // First, scan backwards and look for a preceding use.
+ for (GenTree* prev = *node; prev != nullptr; prev = prev->gtPrev)
+ {
+ // TODO: dump the users and the def
+ GenTree** earlierUseEdge;
+ bool foundEarlierUse = prev->TryGetUse(def, &earlierUseEdge) && earlierUseEdge != useEdge;
+ assert(!foundEarlierUse && "found multiply-used LIR node");
+ }
+
+ // The def did not precede the use. Check to see if it exists in the block at all.
+ for (GenTree* next = node->gtNext; next != nullptr; next = next->gtNext)
+ {
+ // TODO: dump the user and the def
+ assert(next != def && "found def after use");
+ }
+
+ // The def might not be a node that produces a value.
+ assert(def->IsValue() && "found use of a node that does not produce a value");
+
+ // By this point, the only possibility is that the def is not threaded into the LIR sequence.
+ assert(false && "found use of a node that is not in the LIR sequence");
+ }
+ }
+
+ if (node->IsValue())
+ {
+ bool added = unusedDefs.AddOrUpdate(*node, true);
+ assert(added);
+ }
+ }
+
+ assert(prev == m_lastNode);
+
+ // At this point the unusedDefs map should contain only unused values.
+ if (checkUnusedValues)
+ {
+ for (auto kvp : unusedDefs)
+ {
+ GenTree* node = kvp.Key();
+ assert(((node->gtLIRFlags & LIR::Flags::IsUnusedValue) != 0) && "found an unmarked unused value");
+ }
+ }
+
+ return true;
+}
+
+#endif // DEBUG
+
+//------------------------------------------------------------------------
+// LIR::AsRange: Returns an LIR view of the given basic block.
+//
+LIR::Range& LIR::AsRange(BasicBlock* block)
+{
+ return *static_cast<Range*>(block);
+}
+
+//------------------------------------------------------------------------
+// LIR::EmptyRange: Constructs and returns an empty range.
+//
+// static
+LIR::Range LIR::EmptyRange()
+{
+ return Range(nullptr, nullptr);
+}
+
+//------------------------------------------------------------------------
+// LIR::SeqTree:
+// Given a newly created, unsequenced HIR tree, set the evaluation
+// order (call gtSetEvalOrder) and sequence the tree (set gtNext/gtPrev
+// pointers by calling fgSetTreeSeq), and return a Range representing
+// the list of nodes. It is expected this will later be spliced into
+// an LIR range.
+//
+// Arguments:
+// compiler - The Compiler context.
+// tree - The tree to sequence.
+//
+// Return Value: The newly constructed range.
+//
+// static
+LIR::Range LIR::SeqTree(Compiler* compiler, GenTree* tree)
+{
+ // TODO-LIR: it would be great to assert that the tree has not already been
+ // threaded into an order, but I'm not sure that will be practical at this
+ // point.
+
+ compiler->gtSetEvalOrder(tree);
+ return Range(compiler->fgSetTreeSeq(tree, nullptr, true), tree);
+}
+
+//------------------------------------------------------------------------
+// LIR::InsertBeforeTerminator:
+// Insert an LIR range before the terminating instruction in the given
+// basic block. If the basic block has no terminating instruction (i.e.
+// it has a jump kind that is not `BBJ_RETURN`, `BBJ_COND`, or
+// `BBJ_SWITCH`), the range is inserted at the end of the block.
+//
+// Arguments:
+// block - The block in which to insert the range.
+// range - The range to insert.
+//
+void LIR::InsertBeforeTerminator(BasicBlock* block, LIR::Range&& range)
+{
+ LIR::Range& blockRange = LIR::AsRange(block);
+
+ GenTree* insertionPoint = nullptr;
+ if ((block->bbJumpKind == BBJ_COND) || (block->bbJumpKind == BBJ_SWITCH) || (block->bbJumpKind == BBJ_RETURN))
+ {
+ insertionPoint = blockRange.LastNode();
+ assert(insertionPoint != nullptr);
+
+#if DEBUG
+ switch (block->bbJumpKind)
+ {
+ case BBJ_COND:
+ assert(insertionPoint->OperGet() == GT_JTRUE);
+ break;
+
+ case BBJ_SWITCH:
+ assert((insertionPoint->OperGet() == GT_SWITCH) || (insertionPoint->OperGet() == GT_SWITCH_TABLE));
+ break;
+
+ case BBJ_RETURN:
+ assert((insertionPoint->OperGet() == GT_RETURN) ||
+ (insertionPoint->OperGet() == GT_JMP) ||
+ (insertionPoint->OperGet() == GT_CALL));
+ break;
+
+ default:
+ unreached();
+ }
+#endif
+ }
+
+ blockRange.InsertBefore(insertionPoint, std::move(range));
+}
diff --git a/src/jit/lir.h b/src/jit/lir.h
new file mode 100644
index 0000000000..e633303244
--- /dev/null
+++ b/src/jit/lir.h
@@ -0,0 +1,310 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+#ifndef _LIR_H_
+#define _LIR_H_
+
+class Compiler;
+struct GenTree;
+struct BasicBlock;
+
+class LIR final
+{
+public:
+ class Range;
+
+ //------------------------------------------------------------------------
+ // LIR::Flags: Defines the set of flags that may appear in the
+ // GenTree::gtLIRFlags field.
+ class Flags final
+ {
+ // Disallow the creation of values of this type.
+ Flags() = delete;
+
+ public:
+ enum : unsigned char
+ {
+ None = 0x00,
+
+ Mark = 0x01, // An aribtrary "mark" bit that can be used in place of
+ // a more expensive data structure when processing a set
+ // of LIR nodes. See for example `LIR::GetTreeRange`.
+
+ IsUnusedValue = 0x02, // Set on a node if it produces a value that is not
+ // subsequently used. Should never be set on nodes
+ // that return `false` for `GenTree::IsValue`. Note
+ // that this bit should not be assumed to be valid
+ // at all points during compilation: it is currently
+ // only computed during target-dependent lowering.
+ };
+ };
+
+ //------------------------------------------------------------------------
+ // LIR::Use: Represents a use <-> def edge between two nodes in a range
+ // of LIR. Provides utilities to point the use to a different
+ // def. Note that because this type deals in edges between
+ // nodes, it represents the single use of the def.
+ //
+ class Use final
+ {
+ private:
+ Range* m_range;
+ GenTree** m_edge;
+ GenTree* m_user;
+
+ public:
+ Use();
+ Use(const Use& other);
+ Use(Range& range, GenTree** edge, GenTree* user);
+
+ Use& operator=(const Use& other);
+ Use& operator=(Use&& other);
+
+ static Use GetDummyUse(Range& range, GenTree* node);
+
+ GenTree* Def() const;
+ GenTree* User() const;
+
+ bool IsInitialized() const;
+ void AssertIsValid() const;
+ bool IsDummyUse() const;
+
+ void ReplaceWith(Compiler* compiler, GenTree* replacement);
+ unsigned ReplaceWithLclVar(Compiler* compiler, unsigned blockWeight, unsigned lclNum = BAD_VAR_NUM);
+ };
+
+ //------------------------------------------------------------------------
+ // LIR::ReadOnlyRange:
+ //
+ // Represents a contiguous range of LIR nodes that may be a subrange of
+ // a containing range. Provides a small set of utilities for iteration.
+ // Instances of this type are primarily created by and provided to
+ // analysis and utility methods on LIR::Range.
+ //
+ // Although some pains have been taken to help guard against the existence
+ // of invalid subranges, it remains possible to create them. For example,
+ // consider the following:
+ //
+ // // View the block as a range
+ // LIR::Range& blockRange = LIR::AsRange(block);
+ //
+ // // Create a range from the first non-phi node in the block to the
+ // // last node in the block
+ // LIR::ReadOnlyRange nonPhis = blockRange.NonPhiNodes();
+ //
+ // // Remove the last node from the block
+ // blockRange.Remove(blockRange.LastNode());
+ //
+ // After the removal of the last node in the block, the last node of
+ // nonPhis is no longer linked to any of the other nodes in nonPhis. Due
+ // to issues such as the above, some care must be taken in order to
+ // ensure that ranges are not used once they have been invalidated.
+ //
+ class ReadOnlyRange
+ {
+ friend class LIR;
+ friend class Range;
+ friend struct BasicBlock;
+
+ private:
+ GenTree* m_firstNode;
+ GenTree* m_lastNode;
+
+ ReadOnlyRange(GenTree* firstNode, GenTree* lastNode);
+
+ ReadOnlyRange(const ReadOnlyRange& other) = delete;
+ ReadOnlyRange& operator=(const ReadOnlyRange& other) = delete;
+
+ public:
+ class Iterator
+ {
+ friend class ReadOnlyRange;
+
+ GenTree* m_node;
+
+ Iterator(GenTree* begin) : m_node(begin)
+ {
+ }
+
+ public:
+ Iterator() : m_node(nullptr)
+ {
+ }
+
+ inline GenTree* operator*()
+ {
+ return m_node;
+ }
+
+ inline GenTree* operator->()
+ {
+ return m_node;
+ }
+
+ inline bool operator==(const Iterator& other) const
+ {
+ return m_node == other.m_node;
+ }
+
+ inline bool operator!=(const Iterator& other) const
+ {
+ return m_node != other.m_node;
+ }
+
+ inline Iterator& operator++()
+ {
+ m_node = (m_node == nullptr) ? nullptr : m_node->gtNext;
+ return *this;
+ }
+ };
+
+ class ReverseIterator
+ {
+ friend class ReadOnlyRange;
+
+ GenTree* m_node;
+
+ ReverseIterator(GenTree* begin) : m_node(begin)
+ {
+ }
+
+ public:
+ ReverseIterator() : m_node(nullptr)
+ {
+ }
+
+ inline GenTree* operator*()
+ {
+ return m_node;
+ }
+
+ inline GenTree* operator->()
+ {
+ return m_node;
+ }
+
+ inline bool operator==(const ReverseIterator& other) const
+ {
+ return m_node == other.m_node;
+ }
+
+ inline bool operator!=(const ReverseIterator& other) const
+ {
+ return m_node != other.m_node;
+ }
+
+ inline ReverseIterator& operator++()
+ {
+ m_node = (m_node == nullptr) ? nullptr : m_node->gtPrev;
+ return *this;
+ }
+ };
+
+ ReadOnlyRange();
+ ReadOnlyRange(ReadOnlyRange&& other);
+
+ GenTree* FirstNode() const;
+ GenTree* LastNode() const;
+
+ bool IsEmpty() const;
+
+ Iterator begin() const;
+ Iterator end() const;
+
+ ReverseIterator rbegin() const;
+ ReverseIterator rend() const;
+
+#ifdef DEBUG
+ bool Contains(GenTree* node) const;
+#endif
+ };
+
+ //------------------------------------------------------------------------
+ // LIR::Range:
+ //
+ // Represents a contiguous range of LIR nodes. Provides a variety of
+ // variety of utilites that modify the LIR contained in the range. Unlike
+ // `ReadOnlyRange`, values of this type may be edited.
+ //
+ // Because it is not a final class, it is possible to slice values of this
+ // type; this is especially dangerous when the Range value is actually of
+ // type `BasicBlock`. As a result, this type is not copyable and it is
+ // not possible to view a `BasicBlock` as anything other than a `Range&`.
+ //
+ class Range : public ReadOnlyRange
+ {
+ friend class LIR;
+ friend struct BasicBlock;
+
+ private:
+ Range(GenTree* firstNode, GenTree* lastNode);
+
+ Range(const Range& other) = delete;
+ Range& operator=(const Range& other) = delete;
+
+ ReadOnlyRange GetMarkedRange(unsigned markCount, GenTree* start, bool* isClosed, unsigned* sideEffects) const;
+
+ void FinishInsertBefore(GenTree* insertionPoint, GenTree* first, GenTree* last);
+ void FinishInsertAfter(GenTree* insertionPoint, GenTree* first, GenTree* last);
+
+ public:
+ Range();
+ Range(Range&& other);
+
+ GenTree* LastPhiNode() const;
+ GenTree* FirstNonPhiNode() const;
+ GenTree* FirstNonPhiOrCatchArgNode() const;
+
+ ReadOnlyRange PhiNodes() const;
+ ReadOnlyRange NonPhiNodes() const;
+
+ void InsertBefore(GenTree* insertionPoint, GenTree* node);
+ void InsertAfter(GenTree* insertionPoint, GenTree* node);
+
+ void InsertBefore(GenTree* insertionPoint, GenTree* node1, GenTree* node2);
+ void InsertBefore(GenTree* insertionPoint, GenTree* node1, GenTree* node2, GenTree* node3);
+ void InsertBefore(GenTree* insertionPoint, GenTree* node1, GenTree* node2, GenTree* node3, GenTree* node4);
+
+ void InsertAfter(GenTree* insertionPoint, GenTree* node1, GenTree* node2);
+ void InsertAfter(GenTree* insertionPoint, GenTree* node1, GenTree* node2, GenTree* node3);
+ void InsertAfter(GenTree* insertionPoint, GenTree* node1, GenTree* node2, GenTree* node3, GenTree* node4);
+
+ void InsertBefore(GenTree* insertionPoint, Range&& range);
+ void InsertAfter(GenTree* insertionPoint, Range&& range);
+
+ void InsertAtBeginning(GenTree* node);
+ void InsertAtEnd(GenTree* node);
+
+ void InsertAtBeginning(Range&& range);
+ void InsertAtEnd(Range&& range);
+
+ void Remove(GenTree* node);
+ Range Remove(GenTree* firstNode, GenTree* lastNode);
+ Range Remove(ReadOnlyRange&& range);
+
+ void Delete(Compiler* compiler, BasicBlock* block, GenTree* node);
+ void Delete(Compiler* compiler, BasicBlock* block, GenTree* firstNode, GenTree* lastNode);
+ void Delete(Compiler* compiler, BasicBlock* block, ReadOnlyRange&& range);
+
+ bool TryGetUse(GenTree* node, Use* use);
+
+ ReadOnlyRange GetTreeRange(GenTree* root, bool* isClosed) const;
+ ReadOnlyRange GetTreeRange(GenTree* root, bool* isClosed, unsigned* sideEffects) const;
+ ReadOnlyRange GetRangeOfOperandTrees(GenTree* root, bool* isClosed, unsigned* sideEffects) const;
+
+#ifdef DEBUG
+ bool CheckLIR(Compiler* compiler, bool checkUnusedValues = false) const;
+#endif
+ };
+
+public:
+ static Range& AsRange(BasicBlock* block);
+
+ static Range EmptyRange();
+ static Range SeqTree(Compiler* compiler, GenTree* tree);
+
+ static void InsertBeforeTerminator(BasicBlock* block, LIR::Range&& range);
+};
+
+#endif // _LIR_H_
diff --git a/src/jit/liveness.cpp b/src/jit/liveness.cpp
new file mode 100644
index 0000000000..19d326303e
--- /dev/null
+++ b/src/jit/liveness.cpp
@@ -0,0 +1,3133 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+// =================================================================================
+// Code that works with liveness and related concepts (interference, debug scope)
+// =================================================================================
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+#if !defined(_TARGET_64BIT_)
+#include "decomposelongs.h"
+#endif
+
+/*****************************************************************************
+ *
+ * Helper for Compiler::fgPerBlockLocalVarLiveness().
+ * The goal is to compute the USE and DEF sets for a basic block.
+ * However with the new improvement to the data flow analysis (DFA),
+ * we do not mark x as used in x = f(x) when there are no side effects in f(x).
+ * 'asgdLclVar' is set when 'tree' is part of an expression with no side-effects
+ * which is assigned to asgdLclVar, ie. asgdLclVar = (... tree ...)
+ */
+void Compiler::fgMarkUseDef(GenTreeLclVarCommon* tree, GenTree* asgdLclVar)
+{
+ bool rhsUSEDEF = false;
+ unsigned lclNum;
+ unsigned lhsLclNum;
+ LclVarDsc* varDsc;
+
+ noway_assert(tree->gtOper == GT_LCL_VAR || tree->gtOper == GT_LCL_VAR_ADDR || tree->gtOper == GT_LCL_FLD ||
+ tree->gtOper == GT_LCL_FLD_ADDR || tree->gtOper == GT_STORE_LCL_VAR ||
+ tree->gtOper == GT_STORE_LCL_FLD);
+
+ if (tree->gtOper == GT_LCL_VAR || tree->gtOper == GT_LCL_VAR_ADDR || tree->gtOper == GT_STORE_LCL_VAR)
+ {
+ lclNum = tree->gtLclNum;
+ }
+ else
+ {
+ noway_assert(tree->OperIsLocalField());
+ lclNum = tree->gtLclFld.gtLclNum;
+ }
+
+ noway_assert(lclNum < lvaCount);
+ varDsc = lvaTable + lclNum;
+
+ // We should never encounter a reference to a lclVar that has a zero refCnt.
+ if (varDsc->lvRefCnt == 0 && (!varTypeIsPromotable(varDsc) || !varDsc->lvPromoted))
+ {
+ JITDUMP("Found reference to V%02u with zero refCnt.\n", lclNum);
+ assert(!"We should never encounter a reference to a lclVar that has a zero refCnt.");
+ varDsc->lvRefCnt = 1;
+ }
+
+ // NOTE: the analysis done below is neither necessary nor correct for LIR: it depends on
+ // the nodes that precede `asgdLclVar` in execution order to factor into the dataflow for the
+ // value being assigned to the local var, which is not necessarily the case without tree
+ // order. Furthermore, LIR is always traversed in an order that reflects the dataflow for the
+ // block.
+ if (asgdLclVar != nullptr)
+ {
+ assert(!compCurBB->IsLIR());
+
+ /* we have an assignment to a local var : asgdLclVar = ... tree ...
+ * check for x = f(x) case */
+
+ noway_assert(asgdLclVar->gtOper == GT_LCL_VAR || asgdLclVar->gtOper == GT_STORE_LCL_VAR);
+ noway_assert(asgdLclVar->gtFlags & GTF_VAR_DEF);
+
+ lhsLclNum = asgdLclVar->gtLclVarCommon.gtLclNum;
+
+ if ((lhsLclNum == lclNum) && ((tree->gtFlags & GTF_VAR_DEF) == 0) && (tree != asgdLclVar))
+ {
+ /* bingo - we have an x = f(x) case */
+ noway_assert(lvaTable[lhsLclNum].lvType != TYP_STRUCT);
+ asgdLclVar->gtFlags |= GTF_VAR_USEDEF;
+ rhsUSEDEF = true;
+ }
+ }
+
+ /* Is this a tracked variable? */
+
+ if (varDsc->lvTracked)
+ {
+ noway_assert(varDsc->lvVarIndex < lvaTrackedCount);
+
+ if ((tree->gtFlags & GTF_VAR_DEF) != 0 && (tree->gtFlags & (GTF_VAR_USEASG | GTF_VAR_USEDEF)) == 0)
+ {
+ // if (!(fgCurUseSet & bitMask)) printf("V%02u,T%02u def at %08p\n", lclNum, varDsc->lvVarIndex, tree);
+ VarSetOps::AddElemD(this, fgCurDefSet, varDsc->lvVarIndex);
+ }
+ else
+ {
+ // if (!(fgCurDefSet & bitMask))
+ // {
+ // printf("V%02u,T%02u use at ", lclNum, varDsc->lvVarIndex);
+ // printTreeID(tree);
+ // printf("\n");
+ // }
+
+ /* We have the following scenarios:
+ * 1. "x += something" - in this case x is flagged GTF_VAR_USEASG
+ * 2. "x = ... x ..." - the LHS x is flagged GTF_VAR_USEDEF,
+ * the RHS x is has rhsUSEDEF = true
+ * (both set by the code above)
+ *
+ * We should not mark an USE of x in the above cases provided the value "x" is not used
+ * further up in the tree. For example "while (i++)" is required to mark i as used.
+ */
+
+ /* make sure we don't include USEDEF variables in the USE set
+ * The first test is for LSH, the second (!rhsUSEDEF) is for any var in the RHS */
+
+ if ((tree->gtFlags & (GTF_VAR_USEASG | GTF_VAR_USEDEF)) == 0)
+ {
+ /* Not a special flag - check to see if used to assign to itself */
+
+ if (rhsUSEDEF)
+ {
+ /* assign to itself - do not include it in the USE set */
+ if (!opts.MinOpts() && !opts.compDbgCode)
+ {
+ return;
+ }
+ }
+ }
+
+ /* Fall through for the "good" cases above - add the variable to the USE set */
+
+ if (!VarSetOps::IsMember(this, fgCurDefSet, varDsc->lvVarIndex))
+ {
+ VarSetOps::AddElemD(this, fgCurUseSet, varDsc->lvVarIndex);
+ }
+
+ // For defs, also add to the (all) def set.
+ if ((tree->gtFlags & GTF_VAR_DEF) != 0)
+ {
+ VarSetOps::AddElemD(this, fgCurDefSet, varDsc->lvVarIndex);
+ }
+ }
+ }
+ else if (varTypeIsStruct(varDsc))
+ {
+ noway_assert(!varDsc->lvTracked);
+
+ lvaPromotionType promotionType = lvaGetPromotionType(varDsc);
+
+ if (promotionType != PROMOTION_TYPE_NONE)
+ {
+ VARSET_TP VARSET_INIT_NOCOPY(bitMask, VarSetOps::MakeEmpty(this));
+
+ for (unsigned i = varDsc->lvFieldLclStart; i < varDsc->lvFieldLclStart + varDsc->lvFieldCnt; ++i)
+ {
+ noway_assert(lvaTable[i].lvIsStructField);
+ if (lvaTable[i].lvTracked)
+ {
+ noway_assert(lvaTable[i].lvVarIndex < lvaTrackedCount);
+ VarSetOps::AddElemD(this, bitMask, lvaTable[i].lvVarIndex);
+ }
+ }
+
+ // For pure defs (i.e. not an "update" def which is also a use), add to the (all) def set.
+ if ((tree->gtFlags & GTF_VAR_DEF) != 0 && (tree->gtFlags & (GTF_VAR_USEASG | GTF_VAR_USEDEF)) == 0)
+ {
+ VarSetOps::UnionD(this, fgCurDefSet, bitMask);
+ }
+ else if (!VarSetOps::IsSubset(this, bitMask, fgCurDefSet))
+ {
+ // Mark as used any struct fields that are not yet defined.
+ VarSetOps::UnionD(this, fgCurUseSet, bitMask);
+ }
+ }
+ }
+}
+
+/*****************************************************************************/
+void Compiler::fgLocalVarLiveness()
+{
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("*************** In fgLocalVarLiveness()\n");
+
+#ifndef LEGACY_BACKEND
+ if (compRationalIRForm)
+ {
+ lvaTableDump();
+ }
+#endif // !LEGACY_BACKEND
+ }
+#endif // DEBUG
+
+ // Init liveness data structures.
+ fgLocalVarLivenessInit();
+ assert(lvaSortAgain == false); // Set to false by lvaSortOnly()
+
+ EndPhase(PHASE_LCLVARLIVENESS_INIT);
+
+ // Make sure we haven't noted any partial last uses of promoted structs.
+ GetPromotedStructDeathVars()->RemoveAll();
+
+ // Initialize the per-block var sets.
+ fgInitBlockVarSets();
+
+ fgLocalVarLivenessChanged = false;
+ do
+ {
+ /* Figure out use/def info for all basic blocks */
+ fgPerBlockLocalVarLiveness();
+ EndPhase(PHASE_LCLVARLIVENESS_PERBLOCK);
+
+ /* Live variable analysis. */
+
+ fgStmtRemoved = false;
+ fgInterBlockLocalVarLiveness();
+ } while (fgStmtRemoved && fgLocalVarLivenessChanged);
+
+ // If we removed any dead code we will have set 'lvaSortAgain' via decRefCnts
+ if (lvaSortAgain)
+ {
+ JITDUMP("In fgLocalVarLiveness, setting lvaSortAgain back to false (set during dead-code removal)\n");
+ lvaSortAgain = false; // We don't re-Sort because we just performed LclVar liveness.
+ }
+
+ EndPhase(PHASE_LCLVARLIVENESS_INTERBLOCK);
+}
+
+/*****************************************************************************/
+void Compiler::fgLocalVarLivenessInit()
+{
+ // If necessary, re-sort the variable table by ref-count...before creating any varsets using this sorting.
+ if (lvaSortAgain)
+ {
+ JITDUMP("In fgLocalVarLivenessInit, sorting locals\n");
+ lvaSortByRefCount();
+ assert(lvaSortAgain == false); // Set to false by lvaSortOnly()
+ }
+
+#ifdef LEGACY_BACKEND // RyuJIT backend does not use interference info
+
+ for (unsigned i = 0; i < lclMAX_TRACKED; i++)
+ {
+ VarSetOps::AssignNoCopy(this, lvaVarIntf[i], VarSetOps::MakeEmpty(this));
+ }
+
+ /* If we're not optimizing at all, things are simple */
+ if (opts.MinOpts())
+ {
+ VARSET_TP VARSET_INIT_NOCOPY(allOnes, VarSetOps::MakeFull(this));
+ for (unsigned i = 0; i < lvaTrackedCount; i++)
+ {
+ VarSetOps::Assign(this, lvaVarIntf[i], allOnes);
+ }
+ return;
+ }
+#endif // LEGACY_BACKEND
+
+ // We mark a lcl as must-init in a first pass of local variable
+ // liveness (Liveness1), then assertion prop eliminates the
+ // uninit-use of a variable Vk, asserting it will be init'ed to
+ // null. Then, in a second local-var liveness (Liveness2), the
+ // variable Vk is no longer live on entry to the method, since its
+ // uses have been replaced via constant propagation.
+ //
+ // This leads to a bug: since Vk is no longer live on entry, the
+ // register allocator sees Vk and an argument Vj as having
+ // disjoint lifetimes, and allocates them to the same register.
+ // But Vk is still marked "must-init", and this initialization (of
+ // the register) trashes the value in Vj.
+ //
+ // Therefore, initialize must-init to false for all variables in
+ // each liveness phase.
+ for (unsigned lclNum = 0; lclNum < lvaCount; ++lclNum)
+ {
+ lvaTable[lclNum].lvMustInit = false;
+ }
+}
+
+// Note that for the LEGACY_BACKEND this method is replaced with
+// fgLegacyPerStatementLocalVarLiveness and it lives in codegenlegacy.cpp
+//
+#ifndef LEGACY_BACKEND
+//------------------------------------------------------------------------
+// fgPerNodeLocalVarLiveness:
+// Set fgCurHeapUse and fgCurHeapDef when the global heap is read or updated
+// Call fgMarkUseDef for any Local variables encountered
+//
+// Arguments:
+// tree - The current node.
+// asgdLclVar - Either nullptr or the assignement's left-hand-side GT_LCL_VAR.
+// Used as an argument to fgMarkUseDef(); only valid for HIR blocks.
+//
+void Compiler::fgPerNodeLocalVarLiveness(GenTree* tree, GenTree* asgdLclVar)
+{
+ assert(tree != nullptr);
+ assert(asgdLclVar == nullptr || !compCurBB->IsLIR());
+
+ switch (tree->gtOper)
+ {
+ case GT_QMARK:
+ case GT_COLON:
+ // We never should encounter a GT_QMARK or GT_COLON node
+ noway_assert(!"unexpected GT_QMARK/GT_COLON");
+ break;
+
+ case GT_LCL_VAR:
+ case GT_LCL_FLD:
+ case GT_LCL_VAR_ADDR:
+ case GT_LCL_FLD_ADDR:
+ case GT_STORE_LCL_VAR:
+ case GT_STORE_LCL_FLD:
+ fgMarkUseDef(tree->AsLclVarCommon(), asgdLclVar);
+ break;
+
+ case GT_CLS_VAR:
+ // For Volatile indirection, first mutate the global heap
+ // see comments in ValueNum.cpp (under case GT_CLS_VAR)
+ // This models Volatile reads as def-then-use of the heap.
+ // and allows for a CSE of a subsequent non-volatile read
+ if ((tree->gtFlags & GTF_FLD_VOLATILE) != 0)
+ {
+ // For any Volatile indirection, we must handle it as a
+ // definition of the global heap
+ fgCurHeapDef = true;
+ }
+ // If the GT_CLS_VAR is the lhs of an assignment, we'll handle it as a heap def, when we get to assignment.
+ // Otherwise, we treat it as a use here.
+ if (!fgCurHeapDef && (tree->gtFlags & GTF_CLS_VAR_ASG_LHS) == 0)
+ {
+ fgCurHeapUse = true;
+ }
+ break;
+
+ case GT_IND:
+ // For Volatile indirection, first mutate the global heap
+ // see comments in ValueNum.cpp (under case GT_CLS_VAR)
+ // This models Volatile reads as def-then-use of the heap.
+ // and allows for a CSE of a subsequent non-volatile read
+ if ((tree->gtFlags & GTF_IND_VOLATILE) != 0)
+ {
+ // For any Volatile indirection, we must handle it as a
+ // definition of the global heap
+ fgCurHeapDef = true;
+ }
+
+ // If the GT_IND is the lhs of an assignment, we'll handle it
+ // as a heap def, when we get to assignment.
+ // Otherwise, we treat it as a use here.
+ if ((tree->gtFlags & GTF_IND_ASG_LHS) == 0)
+ {
+ GenTreeLclVarCommon* dummyLclVarTree = nullptr;
+ bool dummyIsEntire = false;
+ GenTreePtr addrArg = tree->gtOp.gtOp1->gtEffectiveVal(/*commaOnly*/ true);
+ if (!addrArg->DefinesLocalAddr(this, /*width doesn't matter*/ 0, &dummyLclVarTree, &dummyIsEntire))
+ {
+ if (!fgCurHeapDef)
+ {
+ fgCurHeapUse = true;
+ }
+ }
+ else
+ {
+ // Defines a local addr
+ assert(dummyLclVarTree != nullptr);
+ fgMarkUseDef(dummyLclVarTree->AsLclVarCommon(), asgdLclVar);
+ }
+ }
+ break;
+
+ // These should have been morphed away to become GT_INDs:
+ case GT_FIELD:
+ case GT_INDEX:
+ unreached();
+ break;
+
+ // We'll assume these are use-then-defs of the heap.
+ case GT_LOCKADD:
+ case GT_XADD:
+ case GT_XCHG:
+ case GT_CMPXCHG:
+ if (!fgCurHeapDef)
+ {
+ fgCurHeapUse = true;
+ }
+ fgCurHeapDef = true;
+ fgCurHeapHavoc = true;
+ break;
+
+ case GT_MEMORYBARRIER:
+ // Simliar to any Volatile indirection, we must handle this as a definition of the global heap
+ fgCurHeapDef = true;
+ break;
+
+ // For now, all calls read/write the heap, the latter in its entirety. Might tighten this case later.
+ case GT_CALL:
+ {
+ GenTreeCall* call = tree->AsCall();
+ bool modHeap = true;
+ if (call->gtCallType == CT_HELPER)
+ {
+ CorInfoHelpFunc helpFunc = eeGetHelperNum(call->gtCallMethHnd);
+
+ if (!s_helperCallProperties.MutatesHeap(helpFunc) && !s_helperCallProperties.MayRunCctor(helpFunc))
+ {
+ modHeap = false;
+ }
+ }
+ if (modHeap)
+ {
+ if (!fgCurHeapDef)
+ {
+ fgCurHeapUse = true;
+ }
+ fgCurHeapDef = true;
+ fgCurHeapHavoc = true;
+ }
+ }
+
+ // If this is a p/invoke unmanaged call or if this is a tail-call
+ // and we have an unmanaged p/invoke call in the method,
+ // then we're going to run the p/invoke epilog.
+ // So we mark the FrameRoot as used by this instruction.
+ // This ensures that the block->bbVarUse will contain
+ // the FrameRoot local var if is it a tracked variable.
+
+ if ((tree->gtCall.IsUnmanaged() || (tree->gtCall.IsTailCall() && info.compCallUnmanaged)))
+ {
+ assert((!opts.ShouldUsePInvokeHelpers()) || (info.compLvFrameListRoot == BAD_VAR_NUM));
+ if (!opts.ShouldUsePInvokeHelpers())
+ {
+ /* Get the TCB local and mark it as used */
+
+ noway_assert(info.compLvFrameListRoot < lvaCount);
+
+ LclVarDsc* varDsc = &lvaTable[info.compLvFrameListRoot];
+
+ if (varDsc->lvTracked)
+ {
+ if (!VarSetOps::IsMember(this, fgCurDefSet, varDsc->lvVarIndex))
+ {
+ VarSetOps::AddElemD(this, fgCurUseSet, varDsc->lvVarIndex);
+ }
+ }
+ }
+ }
+
+ break;
+
+ default:
+
+ // Determine whether it defines a heap location.
+ if (tree->OperIsAssignment() || tree->OperIsBlkOp())
+ {
+ GenTreeLclVarCommon* dummyLclVarTree = nullptr;
+ if (!tree->DefinesLocal(this, &dummyLclVarTree))
+ {
+ // If it doesn't define a local, then it might update the heap.
+ fgCurHeapDef = true;
+ }
+ }
+ break;
+ }
+}
+
+void Compiler::fgPerStatementLocalVarLiveness(GenTree* startNode, GenTree* asgdLclVar)
+{
+ // The startNode must be the 1st node of the statement.
+ assert(startNode == compCurStmt->gtStmt.gtStmtList);
+
+ // The asgdLclVar node must be either nullptr or a GT_LCL_VAR or GT_STORE_LCL_VAR
+ assert((asgdLclVar == nullptr) || (asgdLclVar->gtOper == GT_LCL_VAR || asgdLclVar->gtOper == GT_STORE_LCL_VAR));
+
+ // We always walk every node in statement list
+ for (GenTreePtr node = startNode; node != nullptr; node = node->gtNext)
+ {
+ fgPerNodeLocalVarLiveness(node, asgdLclVar);
+ }
+}
+
+#endif // !LEGACY_BACKEND
+
+/*****************************************************************************/
+void Compiler::fgPerBlockLocalVarLiveness()
+{
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("*************** In fgPerBlockLocalVarLiveness()\n");
+ }
+#endif // DEBUG
+
+ BasicBlock* block;
+
+#if CAN_DISABLE_DFA
+
+ /* If we're not optimizing at all, things are simple */
+
+ if (opts.MinOpts())
+ {
+ unsigned lclNum;
+ LclVarDsc* varDsc;
+
+ VARSET_TP VARSET_INIT_NOCOPY(liveAll, VarSetOps::MakeEmpty(this));
+
+ /* We simply make everything live everywhere */
+
+ for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
+ {
+ if (varDsc->lvTracked)
+ {
+ VarSetOps::AddElemD(this, liveAll, varDsc->lvVarIndex);
+ }
+ }
+
+ for (block = fgFirstBB; block; block = block->bbNext)
+ {
+ // Strictly speaking, the assignments for the "Def" cases aren't necessary here.
+ // The empty set would do as well. Use means "use-before-def", so as long as that's
+ // "all", this has the right effect.
+ VarSetOps::Assign(this, block->bbVarUse, liveAll);
+ VarSetOps::Assign(this, block->bbVarDef, liveAll);
+ VarSetOps::Assign(this, block->bbLiveIn, liveAll);
+ VarSetOps::Assign(this, block->bbLiveOut, liveAll);
+ block->bbHeapUse = true;
+ block->bbHeapDef = true;
+ block->bbHeapLiveIn = true;
+ block->bbHeapLiveOut = true;
+
+ switch (block->bbJumpKind)
+ {
+ case BBJ_EHFINALLYRET:
+ case BBJ_THROW:
+ case BBJ_RETURN:
+ VarSetOps::AssignNoCopy(this, block->bbLiveOut, VarSetOps::MakeEmpty(this));
+ break;
+ default:
+ break;
+ }
+ }
+ return;
+ }
+
+#endif // CAN_DISABLE_DFA
+
+ // Avoid allocations in the long case.
+ VarSetOps::AssignNoCopy(this, fgCurUseSet, VarSetOps::MakeEmpty(this));
+ VarSetOps::AssignNoCopy(this, fgCurDefSet, VarSetOps::MakeEmpty(this));
+
+ for (block = fgFirstBB; block; block = block->bbNext)
+ {
+ GenTreePtr stmt;
+ GenTreePtr tree;
+ GenTreePtr asgdLclVar;
+
+ VarSetOps::ClearD(this, fgCurUseSet);
+ VarSetOps::ClearD(this, fgCurDefSet);
+
+ fgCurHeapUse = false;
+ fgCurHeapDef = false;
+ fgCurHeapHavoc = false;
+
+ compCurBB = block;
+
+ if (!block->IsLIR())
+ {
+ for (stmt = block->FirstNonPhiDef(); stmt; stmt = stmt->gtNext)
+ {
+ noway_assert(stmt->gtOper == GT_STMT);
+
+ compCurStmt = stmt;
+
+ asgdLclVar = nullptr;
+ tree = stmt->gtStmt.gtStmtExpr;
+ noway_assert(tree);
+
+ // The following code checks if we have an assignment expression
+ // which may become a GTF_VAR_USEDEF - x=f(x).
+ // consider if LHS is local var - ignore if RHS contains SIDE_EFFECTS
+
+ if ((tree->gtOper == GT_ASG && tree->gtOp.gtOp1->gtOper == GT_LCL_VAR) ||
+ tree->gtOper == GT_STORE_LCL_VAR)
+ {
+ noway_assert(tree->gtOp.gtOp1);
+ GenTreePtr rhsNode;
+ if (tree->gtOper == GT_ASG)
+ {
+ noway_assert(tree->gtOp.gtOp2);
+ asgdLclVar = tree->gtOp.gtOp1;
+ rhsNode = tree->gtOp.gtOp2;
+ }
+ else
+ {
+ asgdLclVar = tree;
+ rhsNode = tree->gtOp.gtOp1;
+ }
+
+ // If this is an assignment to local var with no SIDE EFFECTS,
+ // set asgdLclVar so that genMarkUseDef will flag potential
+ // x=f(x) expressions as GTF_VAR_USEDEF.
+ // Reset the flag before recomputing it - it may have been set before,
+ // but subsequent optimizations could have removed the rhs reference.
+ asgdLclVar->gtFlags &= ~GTF_VAR_USEDEF;
+ if ((rhsNode->gtFlags & GTF_SIDE_EFFECT) == 0)
+ {
+ noway_assert(asgdLclVar->gtFlags & GTF_VAR_DEF);
+ }
+ else
+ {
+ asgdLclVar = nullptr;
+ }
+ }
+
+#ifdef LEGACY_BACKEND
+ tree = fgLegacyPerStatementLocalVarLiveness(stmt->gtStmt.gtStmtList, NULL, asgdLclVar);
+
+ // We must have walked to the end of this statement.
+ noway_assert(!tree);
+#else // !LEGACY_BACKEND
+ fgPerStatementLocalVarLiveness(stmt->gtStmt.gtStmtList, asgdLclVar);
+#endif // !LEGACY_BACKEND
+ }
+ }
+ else
+ {
+#ifdef LEGACY_BACKEND
+ unreached();
+#else // !LEGACY_BACKEND
+ // NOTE: the `asgdLclVar` analysis done above is not correct for LIR: it depends
+ // on all of the nodes that precede `asgdLclVar` in execution order to factor into the
+ // dataflow for the value being assigned to the local var, which is not necessarily the
+ // case without tree order. As a result, we simply pass `nullptr` for `asgdLclVar`.
+ for (GenTree* node : LIR::AsRange(block).NonPhiNodes())
+ {
+ fgPerNodeLocalVarLiveness(node, nullptr);
+ }
+#endif // !LEGACY_BACKEND
+ }
+
+ /* Get the TCB local and mark it as used */
+
+ if (block->bbJumpKind == BBJ_RETURN && info.compCallUnmanaged)
+ {
+ assert((!opts.ShouldUsePInvokeHelpers()) || (info.compLvFrameListRoot == BAD_VAR_NUM));
+ if (!opts.ShouldUsePInvokeHelpers())
+ {
+ noway_assert(info.compLvFrameListRoot < lvaCount);
+
+ LclVarDsc* varDsc = &lvaTable[info.compLvFrameListRoot];
+
+ if (varDsc->lvTracked)
+ {
+ if (!VarSetOps::IsMember(this, fgCurDefSet, varDsc->lvVarIndex))
+ {
+ VarSetOps::AddElemD(this, fgCurUseSet, varDsc->lvVarIndex);
+ }
+ }
+ }
+ }
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ VARSET_TP VARSET_INIT_NOCOPY(allVars, VarSetOps::Union(this, fgCurUseSet, fgCurDefSet));
+ printf("BB%02u", block->bbNum);
+ printf(" USE(%d)=", VarSetOps::Count(this, fgCurUseSet));
+ lvaDispVarSet(fgCurUseSet, allVars);
+ if (fgCurHeapUse)
+ {
+ printf(" + HEAP");
+ }
+ printf("\n DEF(%d)=", VarSetOps::Count(this, fgCurDefSet));
+ lvaDispVarSet(fgCurDefSet, allVars);
+ if (fgCurHeapDef)
+ {
+ printf(" + HEAP");
+ }
+ if (fgCurHeapHavoc)
+ {
+ printf("*");
+ }
+ printf("\n\n");
+ }
+#endif // DEBUG
+
+ VarSetOps::Assign(this, block->bbVarUse, fgCurUseSet);
+ VarSetOps::Assign(this, block->bbVarDef, fgCurDefSet);
+ block->bbHeapUse = fgCurHeapUse;
+ block->bbHeapDef = fgCurHeapDef;
+ block->bbHeapHavoc = fgCurHeapHavoc;
+
+ /* also initialize the IN set, just in case we will do multiple DFAs */
+
+ VarSetOps::AssignNoCopy(this, block->bbLiveIn, VarSetOps::MakeEmpty(this));
+ block->bbHeapLiveIn = false;
+ }
+}
+
+/*****************************************************************************/
+#ifdef DEBUGGING_SUPPORT
+/*****************************************************************************/
+
+// Helper functions to mark variables live over their entire scope
+
+void Compiler::fgBeginScopeLife(VARSET_TP* inScope, VarScopeDsc* var)
+{
+ assert(var);
+
+ LclVarDsc* lclVarDsc1 = &lvaTable[var->vsdVarNum];
+
+ if (lclVarDsc1->lvTracked)
+ {
+ VarSetOps::AddElemD(this, *inScope, lclVarDsc1->lvVarIndex);
+ }
+}
+
+void Compiler::fgEndScopeLife(VARSET_TP* inScope, VarScopeDsc* var)
+{
+ assert(var);
+
+ LclVarDsc* lclVarDsc1 = &lvaTable[var->vsdVarNum];
+
+ if (lclVarDsc1->lvTracked)
+ {
+ VarSetOps::RemoveElemD(this, *inScope, lclVarDsc1->lvVarIndex);
+ }
+}
+
+/*****************************************************************************/
+
+void Compiler::fgMarkInScope(BasicBlock* block, VARSET_VALARG_TP inScope)
+{
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Scope info: block BB%02u marking in scope: ", block->bbNum);
+ dumpConvertedVarSet(this, inScope);
+ printf("\n");
+ }
+#endif // DEBUG
+
+ /* Record which vars are artifically kept alive for debugging */
+
+ VarSetOps::Assign(this, block->bbScope, inScope);
+
+ /* Being in scope implies a use of the variable. Add the var to bbVarUse
+ so that redoing fgLiveVarAnalysis() will work correctly */
+
+ VarSetOps::UnionD(this, block->bbVarUse, inScope);
+
+ /* Artifically mark all vars in scope as alive */
+
+ VarSetOps::UnionD(this, block->bbLiveIn, inScope);
+ VarSetOps::UnionD(this, block->bbLiveOut, inScope);
+}
+
+void Compiler::fgUnmarkInScope(BasicBlock* block, VARSET_VALARG_TP unmarkScope)
+{
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Scope info: block BB%02u UNmarking in scope: ", block->bbNum);
+ dumpConvertedVarSet(this, unmarkScope);
+ printf("\n");
+ }
+#endif // DEBUG
+
+ assert(VarSetOps::IsSubset(this, unmarkScope, block->bbScope));
+
+ VarSetOps::DiffD(this, block->bbScope, unmarkScope);
+ VarSetOps::DiffD(this, block->bbVarUse, unmarkScope);
+ VarSetOps::DiffD(this, block->bbLiveIn, unmarkScope);
+ VarSetOps::DiffD(this, block->bbLiveOut, unmarkScope);
+}
+
+#ifdef DEBUG
+
+void Compiler::fgDispDebugScopes()
+{
+ printf("\nDebug scopes:\n");
+
+ BasicBlock* block;
+ for (block = fgFirstBB; block; block = block->bbNext)
+ {
+ printf("BB%02u: ", block->bbNum);
+ dumpConvertedVarSet(this, block->bbScope);
+ printf("\n");
+ }
+}
+
+#endif // DEBUG
+
+/*****************************************************************************
+ *
+ * Mark variables live across their entire scope.
+ */
+
+#if FEATURE_EH_FUNCLETS
+
+void Compiler::fgExtendDbgScopes()
+{
+ compResetScopeLists();
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nMarking vars alive over their entire scope :\n\n");
+ }
+
+ if (verbose)
+ {
+ compDispScopeLists();
+ }
+#endif // DEBUG
+
+ VARSET_TP VARSET_INIT_NOCOPY(inScope, VarSetOps::MakeEmpty(this));
+
+ // Mark all tracked LocalVars live over their scope - walk the blocks
+ // keeping track of the current life, and assign it to the blocks.
+
+ for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
+ {
+ // If we get to a funclet, reset the scope lists and start again, since the block
+ // offsets will be out of order compared to the previous block.
+
+ if (block->bbFlags & BBF_FUNCLET_BEG)
+ {
+ compResetScopeLists();
+ VarSetOps::ClearD(this, inScope);
+ }
+
+ // Process all scopes up to the current offset
+
+ if (block->bbCodeOffs != BAD_IL_OFFSET)
+ {
+ compProcessScopesUntil(block->bbCodeOffs, &inScope, &Compiler::fgBeginScopeLife, &Compiler::fgEndScopeLife);
+ }
+
+ // Assign the current set of variables that are in scope to the block variables tracking this.
+
+ fgMarkInScope(block, inScope);
+ }
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ fgDispDebugScopes();
+ }
+#endif // DEBUG
+}
+
+#else // !FEATURE_EH_FUNCLETS
+
+void Compiler::fgExtendDbgScopes()
+{
+ compResetScopeLists();
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nMarking vars alive over their entire scope :\n\n");
+ compDispScopeLists();
+ }
+#endif // DEBUG
+
+ VARSET_TP VARSET_INIT_NOCOPY(inScope, VarSetOps::MakeEmpty(this));
+ compProcessScopesUntil(0, &inScope, &Compiler::fgBeginScopeLife, &Compiler::fgEndScopeLife);
+
+ IL_OFFSET lastEndOffs = 0;
+
+ // Mark all tracked LocalVars live over their scope - walk the blocks
+ // keeping track of the current life, and assign it to the blocks.
+
+ BasicBlock* block;
+ for (block = fgFirstBB; block; block = block->bbNext)
+ {
+ // Find scopes becoming alive. If there is a gap in the instr
+ // sequence, we need to process any scopes on those missing offsets.
+
+ if (block->bbCodeOffs != BAD_IL_OFFSET)
+ {
+ if (lastEndOffs != block->bbCodeOffs)
+ {
+ noway_assert(lastEndOffs < block->bbCodeOffs);
+
+ compProcessScopesUntil(block->bbCodeOffs, &inScope, &Compiler::fgBeginScopeLife,
+ &Compiler::fgEndScopeLife);
+ }
+ else
+ {
+ while (VarScopeDsc* varScope = compGetNextEnterScope(block->bbCodeOffs))
+ {
+ fgBeginScopeLife(&inScope, varScope);
+ }
+ }
+ }
+
+ // Assign the current set of variables that are in scope to the block variables tracking this.
+
+ fgMarkInScope(block, inScope);
+
+ // Find scopes going dead.
+
+ if (block->bbCodeOffsEnd != BAD_IL_OFFSET)
+ {
+ VarScopeDsc* varScope;
+ while ((varScope = compGetNextExitScope(block->bbCodeOffsEnd)) != nullptr)
+ {
+ fgEndScopeLife(&inScope, varScope);
+ }
+
+ lastEndOffs = block->bbCodeOffsEnd;
+ }
+ }
+
+ /* Everything should be out of scope by the end of the method. But if the
+ last BB got removed, then inScope may not be empty. */
+
+ noway_assert(VarSetOps::IsEmpty(this, inScope) || lastEndOffs < info.compILCodeSize);
+}
+
+#endif // !FEATURE_EH_FUNCLETS
+
+/*****************************************************************************
+ *
+ * For debuggable code, we allow redundant assignments to vars
+ * by marking them live over their entire scope.
+ */
+
+void Compiler::fgExtendDbgLifetimes()
+{
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("*************** In fgExtendDbgLifetimes()\n");
+ }
+#endif // DEBUG
+
+ noway_assert(opts.compDbgCode && (info.compVarScopesCount > 0));
+
+ /*-------------------------------------------------------------------------
+ * Extend the lifetimes over the entire reported scope of the variable.
+ */
+
+ fgExtendDbgScopes();
+
+/*-------------------------------------------------------------------------
+ * Partly update liveness info so that we handle any funky BBF_INTERNAL
+ * blocks inserted out of sequence.
+ */
+
+#ifdef DEBUG
+ if (verbose && 0)
+ {
+ fgDispBBLiveness();
+ }
+#endif
+
+ fgLiveVarAnalysis(true);
+
+ /* For compDbgCode, we prepend an empty BB which will hold the
+ initializations of variables which are in scope at IL offset 0 (but
+ not initialized by the IL code). Since they will currently be
+ marked as live on entry to fgFirstBB, unmark the liveness so that
+ the following code will know to add the initializations. */
+
+ assert(fgFirstBBisScratch());
+
+ VARSET_TP VARSET_INIT_NOCOPY(trackedArgs, VarSetOps::MakeEmpty(this));
+
+ for (unsigned argNum = 0; argNum < info.compArgsCount; argNum++)
+ {
+ LclVarDsc* argDsc = lvaTable + argNum;
+ if (argDsc->lvPromoted)
+ {
+ lvaPromotionType promotionType = lvaGetPromotionType(argDsc);
+
+ if (promotionType == PROMOTION_TYPE_INDEPENDENT)
+ {
+ noway_assert(argDsc->lvFieldCnt == 1); // We only handle one field here
+
+ unsigned fieldVarNum = argDsc->lvFieldLclStart;
+ argDsc = lvaTable + fieldVarNum;
+ }
+ }
+ noway_assert(argDsc->lvIsParam);
+ if (argDsc->lvTracked)
+ {
+ noway_assert(!VarSetOps::IsMember(this, trackedArgs, argDsc->lvVarIndex)); // Each arg should define a
+ // different bit.
+ VarSetOps::AddElemD(this, trackedArgs, argDsc->lvVarIndex);
+ }
+ }
+
+ // Don't unmark struct locals, either.
+ VARSET_TP VARSET_INIT_NOCOPY(noUnmarkVars, trackedArgs);
+
+ for (unsigned i = 0; i < lvaCount; i++)
+ {
+ LclVarDsc* varDsc = &lvaTable[i];
+ if (varTypeIsStruct(varDsc) && varDsc->lvTracked)
+ {
+ VarSetOps::AddElemD(this, noUnmarkVars, varDsc->lvVarIndex);
+ }
+ }
+ fgUnmarkInScope(fgFirstBB, VarSetOps::Diff(this, fgFirstBB->bbScope, noUnmarkVars));
+
+ /*-------------------------------------------------------------------------
+ * As we keep variables artifically alive over their entire scope,
+ * we need to also artificially initialize them if the scope does
+ * not exactly match the real lifetimes, or they will contain
+ * garbage until they are initialized by the IL code.
+ */
+
+ VARSET_TP VARSET_INIT_NOCOPY(initVars, VarSetOps::MakeEmpty(this)); // Vars which are artificially made alive
+
+ for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
+ {
+ VarSetOps::ClearD(this, initVars);
+
+ switch (block->bbJumpKind)
+ {
+ case BBJ_NONE:
+ PREFIX_ASSUME(block->bbNext != nullptr);
+ VarSetOps::UnionD(this, initVars, block->bbNext->bbScope);
+ break;
+
+ case BBJ_ALWAYS:
+ case BBJ_EHCATCHRET:
+ case BBJ_EHFILTERRET:
+ VarSetOps::UnionD(this, initVars, block->bbJumpDest->bbScope);
+ break;
+
+ case BBJ_CALLFINALLY:
+ if (!(block->bbFlags & BBF_RETLESS_CALL))
+ {
+ assert(block->isBBCallAlwaysPair());
+ PREFIX_ASSUME(block->bbNext != nullptr);
+ VarSetOps::UnionD(this, initVars, block->bbNext->bbScope);
+ }
+ VarSetOps::UnionD(this, initVars, block->bbJumpDest->bbScope);
+ break;
+
+ case BBJ_COND:
+ PREFIX_ASSUME(block->bbNext != nullptr);
+ VarSetOps::UnionD(this, initVars, block->bbNext->bbScope);
+ VarSetOps::UnionD(this, initVars, block->bbJumpDest->bbScope);
+ break;
+
+ case BBJ_SWITCH:
+ {
+ BasicBlock** jmpTab;
+ unsigned jmpCnt;
+
+ jmpCnt = block->bbJumpSwt->bbsCount;
+ jmpTab = block->bbJumpSwt->bbsDstTab;
+
+ do
+ {
+ VarSetOps::UnionD(this, initVars, (*jmpTab)->bbScope);
+ } while (++jmpTab, --jmpCnt);
+ }
+ break;
+
+ case BBJ_EHFINALLYRET:
+ case BBJ_RETURN:
+ break;
+
+ case BBJ_THROW:
+ /* We don't have to do anything as we mark
+ * all vars live on entry to a catch handler as
+ * volatile anyway
+ */
+ break;
+
+ default:
+ noway_assert(!"Unexpected bbJumpKind");
+ break;
+ }
+
+ /* If the var is already live on entry to the current BB,
+ we would have already initialized it. So ignore bbLiveIn */
+
+ VarSetOps::DiffD(this, initVars, block->bbLiveIn);
+
+ /* Add statements initializing the vars, if there are any to initialize */
+ unsigned blockWeight = block->getBBWeight(this);
+
+ VARSET_ITER_INIT(this, iter, initVars, varIndex);
+ while (iter.NextElem(this, &varIndex))
+ {
+ /* Create initialization tree */
+
+ unsigned varNum = lvaTrackedToVarNum[varIndex];
+ LclVarDsc* varDsc = &lvaTable[varNum];
+ var_types type = varDsc->TypeGet();
+
+ // Don't extend struct lifetimes -- they aren't enregistered, anyway.
+ if (type == TYP_STRUCT)
+ {
+ continue;
+ }
+
+ // If we haven't already done this ...
+ if (!fgLocalVarLivenessDone)
+ {
+ // Create a "zero" node
+ GenTree* zero = gtNewZeroConNode(genActualType(type));
+
+ // Create initialization node
+ if (!block->IsLIR())
+ {
+ GenTree* varNode = gtNewLclvNode(varNum, type);
+ GenTree* initNode = gtNewAssignNode(varNode, zero);
+
+ // Create a statement for the initializer, sequence it, and append it to the current BB.
+ GenTree* initStmt = gtNewStmt(initNode);
+ gtSetStmtInfo(initStmt);
+ fgSetStmtSeq(initStmt);
+ fgInsertStmtNearEnd(block, initStmt);
+ }
+ else
+ {
+ GenTree* store = new (this, GT_STORE_LCL_VAR) GenTreeLclVar(GT_STORE_LCL_VAR, type, varNum, BAD_IL_OFFSET);
+ store->gtOp.gtOp1 = zero;
+ store->gtFlags |= (GTF_VAR_DEF | GTF_ASG);
+
+ LIR::Range initRange = LIR::EmptyRange();
+ initRange.InsertBefore(nullptr, zero, store);
+
+#if !defined(_TARGET_64BIT_) && !defined(LEGACY_BACKEND)
+ DecomposeLongs::DecomposeRange(this, blockWeight, initRange);
+#endif
+
+ // Naively inserting the initializer at the end of the block may add code after the block's
+ // terminator, in which case the inserted code will never be executed (and the IR for the
+ // block will be invalid). Use `LIR::InsertBeforeTerminator` to avoid this problem.
+ LIR::InsertBeforeTerminator(block, std::move(initRange));
+ }
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Created zero-init of V%02u in BB%02u\n", varNum, block->bbNum);
+ }
+#endif // DEBUG
+
+ varDsc->incRefCnts(block->getBBWeight(this), this);
+
+ block->bbFlags |= BBF_CHANGED; // indicates that the contents of the block have changed.
+ }
+
+ /* Update liveness information so that redoing fgLiveVarAnalysis()
+ will work correctly if needed */
+
+ VarSetOps::AddElemD(this, block->bbVarDef, varIndex);
+ VarSetOps::AddElemD(this, block->bbLiveOut, varIndex);
+ }
+ }
+
+ // raMarkStkVars() reserves stack space for unused variables (which
+ // needs to be initialized). However, arguments don't need to be initialized.
+ // So just ensure that they don't have a 0 ref cnt
+
+ unsigned lclNum = 0;
+ for (LclVarDsc *varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
+ {
+ if (varDsc->lvRefCnt == 0 && varDsc->lvIsRegArg)
+ {
+ varDsc->lvRefCnt = 1;
+ }
+ }
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nBB liveness after fgExtendDbgLifetimes():\n\n");
+ fgDispBBLiveness();
+ printf("\n");
+ }
+#endif // DEBUG
+}
+
+/*****************************************************************************/
+#endif // DEBUGGING_SUPPORT
+/*****************************************************************************/
+
+VARSET_VALRET_TP Compiler::fgGetHandlerLiveVars(BasicBlock* block)
+{
+ noway_assert(block);
+ noway_assert(ehBlockHasExnFlowDsc(block));
+
+ VARSET_TP VARSET_INIT_NOCOPY(liveVars, VarSetOps::MakeEmpty(this));
+ EHblkDsc* HBtab = ehGetBlockExnFlowDsc(block);
+
+ do
+ {
+ /* Either we enter the filter first or the catch/finally */
+
+ if (HBtab->HasFilter())
+ {
+ VarSetOps::UnionD(this, liveVars, HBtab->ebdFilter->bbLiveIn);
+#if FEATURE_EH_FUNCLETS
+ // The EH subsystem can trigger a stack walk after the filter
+ // has returned, but before invoking the handler, and the only
+ // IP address reported from this method will be the original
+ // faulting instruction, thus everything in the try body
+ // must report as live any variables live-out of the filter
+ // (which is the same as those live-in to the handler)
+ VarSetOps::UnionD(this, liveVars, HBtab->ebdHndBeg->bbLiveIn);
+#endif // FEATURE_EH_FUNCLETS
+ }
+ else
+ {
+ VarSetOps::UnionD(this, liveVars, HBtab->ebdHndBeg->bbLiveIn);
+ }
+
+ /* If we have nested try's edbEnclosing will provide them */
+ noway_assert((HBtab->ebdEnclosingTryIndex == EHblkDsc::NO_ENCLOSING_INDEX) ||
+ (HBtab->ebdEnclosingTryIndex > ehGetIndex(HBtab)));
+
+ unsigned outerIndex = HBtab->ebdEnclosingTryIndex;
+ if (outerIndex == EHblkDsc::NO_ENCLOSING_INDEX)
+ {
+ break;
+ }
+ HBtab = ehGetDsc(outerIndex);
+
+ } while (true);
+
+ return liveVars;
+}
+
+/*****************************************************************************
+ *
+ * This is the classic algorithm for Live Variable Analysis.
+ * If updateInternalOnly==true, only update BBF_INTERNAL blocks.
+ */
+
+void Compiler::fgLiveVarAnalysis(bool updateInternalOnly)
+{
+ BasicBlock* block;
+ bool change;
+#ifdef DEBUG
+ VARSET_TP VARSET_INIT_NOCOPY(extraLiveOutFromFinally, VarSetOps::MakeEmpty(this));
+#endif // DEBUG
+ bool keepAliveThis = lvaKeepAliveAndReportThis() && lvaTable[info.compThisArg].lvTracked;
+
+ /* Live Variable Analysis - Backward dataflow */
+
+ bool hasPossibleBackEdge = false;
+
+ do
+ {
+ change = false;
+
+ /* Visit all blocks and compute new data flow values */
+
+ VARSET_TP VARSET_INIT_NOCOPY(liveIn, VarSetOps::MakeEmpty(this));
+ VARSET_TP VARSET_INIT_NOCOPY(liveOut, VarSetOps::MakeEmpty(this));
+
+ bool heapLiveIn = false;
+ bool heapLiveOut = false;
+
+ for (block = fgLastBB; block; block = block->bbPrev)
+ {
+ // sometimes block numbers are not monotonically increasing which
+ // would cause us not to identify backedges
+ if (block->bbNext && block->bbNext->bbNum <= block->bbNum)
+ {
+ hasPossibleBackEdge = true;
+ }
+
+ if (updateInternalOnly)
+ {
+ /* Only update BBF_INTERNAL blocks as they may be
+ syntactically out of sequence. */
+
+ noway_assert(opts.compDbgCode && (info.compVarScopesCount > 0));
+
+ if (!(block->bbFlags & BBF_INTERNAL))
+ {
+ continue;
+ }
+ }
+
+ /* Compute the 'liveOut' set */
+
+ VarSetOps::ClearD(this, liveOut);
+ heapLiveOut = false;
+ if (block->endsWithJmpMethod(this))
+ {
+ // A JMP uses all the arguments, so mark them all
+ // as live at the JMP instruction
+ //
+ const LclVarDsc* varDscEndParams = lvaTable + info.compArgsCount;
+ for (LclVarDsc* varDsc = lvaTable; varDsc < varDscEndParams; varDsc++)
+ {
+ noway_assert(!varDsc->lvPromoted);
+ if (varDsc->lvTracked)
+ {
+ VarSetOps::AddElemD(this, liveOut, varDsc->lvVarIndex);
+ }
+ }
+ }
+
+ // Additionally, union in all the live-in tracked vars of successors.
+ AllSuccessorIter succsEnd = block->GetAllSuccs(this).end();
+ for (AllSuccessorIter succs = block->GetAllSuccs(this).begin(); succs != succsEnd; ++succs)
+ {
+ BasicBlock* succ = (*succs);
+ VarSetOps::UnionD(this, liveOut, succ->bbLiveIn);
+ heapLiveOut = heapLiveOut || (*succs)->bbHeapLiveIn;
+ if (succ->bbNum <= block->bbNum)
+ {
+ hasPossibleBackEdge = true;
+ }
+ }
+
+ /* For lvaKeepAliveAndReportThis methods, "this" has to be kept alive everywhere
+ Note that a function may end in a throw on an infinite loop (as opposed to a return).
+ "this" has to be alive everywhere even in such methods. */
+
+ if (keepAliveThis)
+ {
+ VarSetOps::AddElemD(this, liveOut, lvaTable[info.compThisArg].lvVarIndex);
+ }
+
+ /* Compute the 'liveIn' set */
+
+ VarSetOps::Assign(this, liveIn, liveOut);
+ VarSetOps::DiffD(this, liveIn, block->bbVarDef);
+ VarSetOps::UnionD(this, liveIn, block->bbVarUse);
+
+ heapLiveIn = (heapLiveOut && !block->bbHeapDef) || block->bbHeapUse;
+
+ /* Can exceptions from this block be handled (in this function)? */
+
+ if (ehBlockHasExnFlowDsc(block))
+ {
+ VARSET_TP VARSET_INIT_NOCOPY(liveVars, fgGetHandlerLiveVars(block));
+
+ VarSetOps::UnionD(this, liveIn, liveVars);
+ VarSetOps::UnionD(this, liveOut, liveVars);
+ }
+
+ /* Has there been any change in either live set? */
+
+ if (!VarSetOps::Equal(this, block->bbLiveIn, liveIn) || !VarSetOps::Equal(this, block->bbLiveOut, liveOut))
+ {
+ if (updateInternalOnly)
+ {
+ // Only "extend" liveness over BBF_INTERNAL blocks
+
+ noway_assert(block->bbFlags & BBF_INTERNAL);
+
+ if (!VarSetOps::Equal(this, VarSetOps::Intersection(this, block->bbLiveIn, liveIn), liveIn) ||
+ !VarSetOps::Equal(this, VarSetOps::Intersection(this, block->bbLiveOut, liveOut), liveOut))
+ {
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Scope info: block BB%02u LiveIn+ ", block->bbNum);
+ dumpConvertedVarSet(this, VarSetOps::Diff(this, liveIn, block->bbLiveIn));
+ printf(", LiveOut+ ");
+ dumpConvertedVarSet(this, VarSetOps::Diff(this, liveOut, block->bbLiveOut));
+ printf("\n");
+ }
+#endif // DEBUG
+
+ VarSetOps::UnionD(this, block->bbLiveIn, liveIn);
+ VarSetOps::UnionD(this, block->bbLiveOut, liveOut);
+ change = true;
+ }
+ }
+ else
+ {
+ VarSetOps::Assign(this, block->bbLiveIn, liveIn);
+ VarSetOps::Assign(this, block->bbLiveOut, liveOut);
+ change = true;
+ }
+ }
+
+ if ((block->bbHeapLiveIn == 1) != heapLiveIn || (block->bbHeapLiveOut == 1) != heapLiveOut)
+ {
+ block->bbHeapLiveIn = heapLiveIn;
+ block->bbHeapLiveOut = heapLiveOut;
+ change = true;
+ }
+ }
+ // if there is no way we could have processed a block without seeing all of its predecessors
+ // then there is no need to iterate
+ if (!hasPossibleBackEdge)
+ {
+ break;
+ }
+ } while (change);
+
+//-------------------------------------------------------------------------
+
+#ifdef DEBUG
+
+ if (verbose && !updateInternalOnly)
+ {
+ printf("\nBB liveness after fgLiveVarAnalysis():\n\n");
+ fgDispBBLiveness();
+ }
+
+#endif // DEBUG
+}
+
+/*****************************************************************************
+ *
+ * Mark any variables in varSet1 as interfering with any variables
+ * specified in varSet2.
+ * We ensure that the interference graph is reflective:
+ * (if T11 interferes with T16, then T16 interferes with T11)
+ * returns true if an interference was added
+ * This function returns true if any new interferences were added
+ * and returns false if no new interference were added
+ */
+bool Compiler::fgMarkIntf(VARSET_VALARG_TP varSet1, VARSET_VALARG_TP varSet2)
+{
+#ifdef LEGACY_BACKEND
+ /* If either set has no bits set (or we are not optimizing), take an early out */
+ if (VarSetOps::IsEmpty(this, varSet2) || VarSetOps::IsEmpty(this, varSet1) || opts.MinOpts())
+ {
+ return false;
+ }
+
+ bool addedIntf = false; // This is set to true if we add any new interferences
+
+ VarSetOps::Assign(this, fgMarkIntfUnionVS, varSet1);
+ VarSetOps::UnionD(this, fgMarkIntfUnionVS, varSet2);
+
+ VARSET_ITER_INIT(this, iter, fgMarkIntfUnionVS, refIndex);
+ while (iter.NextElem(this, &refIndex))
+ {
+ // if varSet1 has this bit set then it interferes with varSet2
+ if (VarSetOps::IsMember(this, varSet1, refIndex))
+ {
+ // Calculate the set of new interference to add
+ VARSET_TP VARSET_INIT_NOCOPY(newIntf, VarSetOps::Diff(this, varSet2, lvaVarIntf[refIndex]));
+ if (!VarSetOps::IsEmpty(this, newIntf))
+ {
+ addedIntf = true;
+ VarSetOps::UnionD(this, lvaVarIntf[refIndex], newIntf);
+ }
+ }
+
+ // if varSet2 has this bit set then it interferes with varSet1
+ if (VarSetOps::IsMember(this, varSet2, refIndex))
+ {
+ // Calculate the set of new interference to add
+ VARSET_TP VARSET_INIT_NOCOPY(newIntf, VarSetOps::Diff(this, varSet1, lvaVarIntf[refIndex]));
+ if (!VarSetOps::IsEmpty(this, newIntf))
+ {
+ addedIntf = true;
+ VarSetOps::UnionD(this, lvaVarIntf[refIndex], newIntf);
+ }
+ }
+ }
+
+ return addedIntf;
+#else
+ return false;
+#endif
+}
+
+/*****************************************************************************
+ *
+ * Mark any variables in varSet as interfering with each other,
+ * This is a specialized version of the above, when both args are the same
+ * We ensure that the interference graph is reflective:
+ * (if T11 interferes with T16, then T16 interferes with T11)
+ * This function returns true if any new interferences were added
+ * and returns false if no new interference were added
+ */
+
+bool Compiler::fgMarkIntf(VARSET_VALARG_TP varSet)
+{
+#ifdef LEGACY_BACKEND
+ /* No bits set or we are not optimizing, take an early out */
+ if (VarSetOps::IsEmpty(this, varSet) || opts.MinOpts())
+ return false;
+
+ bool addedIntf = false; // This is set to true if we add any new interferences
+
+ VARSET_ITER_INIT(this, iter, varSet, refIndex);
+ while (iter.NextElem(this, &refIndex))
+ {
+ // Calculate the set of new interference to add
+ VARSET_TP VARSET_INIT_NOCOPY(newIntf, VarSetOps::Diff(this, varSet, lvaVarIntf[refIndex]));
+ if (!VarSetOps::IsEmpty(this, newIntf))
+ {
+ addedIntf = true;
+ VarSetOps::UnionD(this, lvaVarIntf[refIndex], newIntf);
+ }
+ }
+
+ return addedIntf;
+#else // !LEGACY_BACKEND
+ return false;
+#endif // !LEGACY_BACKEND
+}
+
+/*****************************************************************************
+ * For updating liveset during traversal AFTER fgComputeLife has completed
+ */
+
+VARSET_VALRET_TP Compiler::fgUpdateLiveSet(VARSET_VALARG_TP liveSet, GenTreePtr tree)
+{
+ VARSET_TP VARSET_INIT(this, newLiveSet, liveSet);
+ assert(fgLocalVarLivenessDone == true);
+ GenTreePtr lclVarTree = tree; // After the tests below, "lclVarTree" will be the local variable.
+ if (tree->gtOper == GT_LCL_VAR || tree->gtOper == GT_LCL_FLD || tree->gtOper == GT_REG_VAR ||
+ (lclVarTree = fgIsIndirOfAddrOfLocal(tree)) != nullptr)
+ {
+ VARSET_TP VARSET_INIT_NOCOPY(varBits, fgGetVarBits(lclVarTree));
+
+ if (!VarSetOps::IsEmpty(this, varBits))
+ {
+ if (tree->gtFlags & GTF_VAR_DEATH)
+ {
+ // We'd like to be able to assert the following, however if we are walking
+ // through a qmark/colon tree, we may encounter multiple last-use nodes.
+ // assert (VarSetOps::IsSubset(this, varBits, newLiveSet));
+
+ // We maintain the invariant that if the lclVarTree is a promoted struct, but the
+ // the lookup fails, then all the field vars (i.e., "varBits") are dying.
+ VARSET_TP* deadVarBits = nullptr;
+ if (varTypeIsStruct(lclVarTree) && GetPromotedStructDeathVars()->Lookup(lclVarTree, &deadVarBits))
+ {
+ VarSetOps::DiffD(this, newLiveSet, *deadVarBits);
+ }
+ else
+ {
+ VarSetOps::DiffD(this, newLiveSet, varBits);
+ }
+ }
+ else if ((tree->gtFlags & GTF_VAR_DEF) != 0 && (tree->gtFlags & GTF_VAR_USEASG) == 0)
+ {
+ assert(tree == lclVarTree); // LDOBJ case should only be a use.
+
+ // This shouldn't be in newLiveSet, unless this is debug code, in which
+ // case we keep vars live everywhere, OR it is address-exposed, OR this block
+ // is part of a try block, in which case it may be live at the handler
+ // Could add a check that, if it's in the newLiveSet, that it's also in
+ // fgGetHandlerLiveVars(compCurBB), but seems excessive
+ //
+ assert(VarSetOps::IsEmptyIntersection(this, newLiveSet, varBits) || opts.compDbgCode ||
+ lvaTable[tree->gtLclVarCommon.gtLclNum].lvAddrExposed ||
+ (compCurBB != nullptr && ehBlockHasExnFlowDsc(compCurBB)));
+ VarSetOps::UnionD(this, newLiveSet, varBits);
+ }
+ }
+ }
+ return newLiveSet;
+}
+
+//------------------------------------------------------------------------
+// Compiler::fgComputeLifeCall: compute the changes to local var liveness
+// due to a GT_CALL node.
+//
+// Arguments:
+// life - The live set that is being computed.
+// call - The call node in question.
+//
+void Compiler::fgComputeLifeCall(VARSET_TP& life, GenTreeCall* call)
+{
+ assert(call != nullptr);
+
+ // If this is a tail-call and we have any unmanaged p/invoke calls in
+ // the method then we're going to run the p/invoke epilog
+ // So we mark the FrameRoot as used by this instruction.
+ // This ensure that this variable is kept alive at the tail-call
+ if (call->IsTailCall() && info.compCallUnmanaged)
+ {
+ assert((!opts.ShouldUsePInvokeHelpers()) || (info.compLvFrameListRoot == BAD_VAR_NUM));
+ if (!opts.ShouldUsePInvokeHelpers())
+ {
+ /* Get the TCB local and make it live */
+
+ noway_assert(info.compLvFrameListRoot < lvaCount);
+
+ LclVarDsc* frameVarDsc = &lvaTable[info.compLvFrameListRoot];
+
+ if (frameVarDsc->lvTracked)
+ {
+ VARSET_TP VARSET_INIT_NOCOPY(varBit, VarSetOps::MakeSingleton(this, frameVarDsc->lvVarIndex));
+
+ VarSetOps::AddElemD(this, life, frameVarDsc->lvVarIndex);
+
+ /* Record interference with other live variables */
+
+ fgMarkIntf(life, varBit);
+ }
+ }
+ }
+
+ /* GC refs cannot be enregistered accross an unmanaged call */
+
+ // TODO: we should generate the code for saving to/restoring
+ // from the inlined N/Direct frame instead.
+
+ /* Is this call to unmanaged code? */
+ if (call->IsUnmanaged())
+ {
+ /* Get the TCB local and make it live */
+ assert((!opts.ShouldUsePInvokeHelpers()) || (info.compLvFrameListRoot == BAD_VAR_NUM));
+ if (!opts.ShouldUsePInvokeHelpers())
+ {
+ noway_assert(info.compLvFrameListRoot < lvaCount);
+
+ LclVarDsc* frameVarDsc = &lvaTable[info.compLvFrameListRoot];
+
+ if (frameVarDsc->lvTracked)
+ {
+ unsigned varIndex = frameVarDsc->lvVarIndex;
+ noway_assert(varIndex < lvaTrackedCount);
+
+ // Is the variable already known to be alive?
+ //
+ if (VarSetOps::IsMember(this, life, varIndex))
+ {
+ // Since we may call this multiple times, clear the GTF_CALL_M_FRAME_VAR_DEATH if set.
+ //
+ call->gtCallMoreFlags &= ~GTF_CALL_M_FRAME_VAR_DEATH;
+ }
+ else
+ {
+ // The variable is just coming to life
+ // Since this is a backwards walk of the trees
+ // that makes this change in liveness a 'last-use'
+ //
+ VarSetOps::AddElemD(this, life, varIndex);
+ call->gtCallMoreFlags |= GTF_CALL_M_FRAME_VAR_DEATH;
+ }
+
+ // Record an interference with the other live variables
+ //
+ VARSET_TP VARSET_INIT_NOCOPY(varBit, VarSetOps::MakeSingleton(this, varIndex));
+ fgMarkIntf(life, varBit);
+ }
+ }
+
+ /* Do we have any live variables? */
+
+ if (!VarSetOps::IsEmpty(this, life))
+ {
+ // For each live variable if it is a GC-ref type, we
+ // mark it volatile to prevent if from being enregistered
+ // across the unmanaged call.
+
+ unsigned lclNum;
+ LclVarDsc* varDsc;
+ for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
+ {
+ /* Ignore the variable if it's not tracked */
+
+ if (!varDsc->lvTracked)
+ {
+ continue;
+ }
+
+ unsigned varNum = varDsc->lvVarIndex;
+
+ /* Ignore the variable if it's not live here */
+
+ if (!VarSetOps::IsMember(this, life, varDsc->lvVarIndex))
+ {
+ continue;
+ }
+
+ // If it is a GC-ref type then mark it DoNotEnregister.
+ if (varTypeIsGC(varDsc->TypeGet()))
+ {
+ lvaSetVarDoNotEnregister(lclNum DEBUGARG(DNER_LiveAcrossUnmanagedCall));
+ }
+ }
+ }
+ }
+}
+
+//------------------------------------------------------------------------
+// Compiler::fgComputeLifeLocal: compute the changes to local var liveness
+// due to a use or a def of a local var and
+// indicates wither the use/def is a dead
+// store.
+//
+// Arguments:
+// life - The live set that is being computed.
+// keepAliveVars - The currents set of variables to keep alive
+// regardless of their actual lifetime.
+// lclVarNode - The node that corresponds to the local var def or
+// use. Only differs from `node` when targeting the
+// legacy backend.
+// node - The actual tree node being processed.
+//
+// Returns:
+// `true` if the local var node corresponds to a dead store; `false`
+// otherwise.
+//
+bool Compiler::fgComputeLifeLocal(VARSET_TP& life, VARSET_TP& keepAliveVars, GenTree* lclVarNode, GenTree* node)
+{
+ unsigned lclNum = lclVarNode->gtLclVarCommon.gtLclNum;
+
+ noway_assert(lclNum < lvaCount);
+ LclVarDsc* varDsc = &lvaTable[lclNum];
+
+ unsigned varIndex;
+ VARSET_TP varBit;
+
+ // Is this a tracked variable?
+ if (varDsc->lvTracked)
+ {
+ varIndex = varDsc->lvVarIndex;
+ noway_assert(varIndex < lvaTrackedCount);
+
+ /* Is this a definition or use? */
+
+ if (lclVarNode->gtFlags & GTF_VAR_DEF)
+ {
+ /*
+ The variable is being defined here. The variable
+ should be marked dead from here until its closest
+ previous use.
+
+ IMPORTANT OBSERVATION:
+
+ For GTF_VAR_USEASG (i.e. x <op>= a) we cannot
+ consider it a "pure" definition because it would
+ kill x (which would be wrong because x is
+ "used" in such a construct) -> see below the case when x is live
+ */
+
+ if (VarSetOps::IsMember(this, life, varIndex))
+ {
+ /* The variable is live */
+
+ if ((lclVarNode->gtFlags & GTF_VAR_USEASG) == 0)
+ {
+ /* Mark variable as dead from here to its closest use */
+
+ if (!VarSetOps::IsMember(this, keepAliveVars, varIndex))
+ {
+ VarSetOps::RemoveElemD(this, life, varIndex);
+ }
+#ifdef DEBUG
+ if (verbose && 0)
+ {
+ printf("Def V%02u,T%02u at ", lclNum, varIndex);
+ printTreeID(lclVarNode);
+ printf(" life %s -> %s\n",
+ VarSetOps::ToString(this, VarSetOps::Union(this, life,
+ VarSetOps::MakeSingleton(this, varIndex))),
+ VarSetOps::ToString(this, life));
+ }
+#endif // DEBUG
+ }
+ }
+ else
+ {
+ /* Dead assignment to the variable */
+ lclVarNode->gtFlags |= GTF_VAR_DEATH;
+
+ if (!opts.MinOpts())
+ {
+ // keepAliveVars always stay alive
+ noway_assert(!VarSetOps::IsMember(this, keepAliveVars, varIndex));
+
+ /* This is a dead store unless the variable is marked
+ GTF_VAR_USEASG and we are in an interior statement
+ that will be used (e.g. while (i++) or a GT_COMMA) */
+
+ // Do not consider this store dead if the target local variable represents
+ // a promoted struct field of an address exposed local or if the address
+ // of the variable has been exposed. Improved alias analysis could allow
+ // stores to these sorts of variables to be removed at the cost of compile
+ // time.
+ return !varDsc->lvAddrExposed &&
+ !(varDsc->lvIsStructField && lvaTable[varDsc->lvParentLcl].lvAddrExposed);
+ }
+ }
+
+ return false;
+ }
+ else // it is a use
+ {
+ // Is the variable already known to be alive?
+ if (VarSetOps::IsMember(this, life, varIndex))
+ {
+ // Since we may do liveness analysis multiple times, clear the GTF_VAR_DEATH if set.
+ lclVarNode->gtFlags &= ~GTF_VAR_DEATH;
+ return false;
+ }
+
+#ifdef DEBUG
+ if (verbose && 0)
+ {
+ printf("Ref V%02u,T%02u] at ", lclNum, varIndex);
+ printTreeID(node);
+ printf(" life %s -> %s\n", VarSetOps::ToString(this, life),
+ VarSetOps::ToString(this, VarSetOps::Union(this, life, varBit)));
+ }
+#endif // DEBUG
+
+ // The variable is being used, and it is not currently live.
+ // So the variable is just coming to life
+ lclVarNode->gtFlags |= GTF_VAR_DEATH;
+ VarSetOps::AddElemD(this, life, varIndex);
+
+ // Record interference with other live variables
+ fgMarkIntf(life, VarSetOps::MakeSingleton(this, varIndex));
+ }
+ }
+ // Note that promoted implies not tracked (i.e. only the fields are tracked).
+ else if (varTypeIsStruct(varDsc->lvType))
+ {
+ noway_assert(!varDsc->lvTracked);
+
+ lvaPromotionType promotionType = lvaGetPromotionType(varDsc);
+
+ if (promotionType != PROMOTION_TYPE_NONE)
+ {
+ VarSetOps::AssignNoCopy(this, varBit, VarSetOps::MakeEmpty(this));
+
+ for (unsigned i = varDsc->lvFieldLclStart; i < varDsc->lvFieldLclStart + varDsc->lvFieldCnt; ++i)
+ {
+#if !defined(_TARGET_64BIT_) && !defined(LEGACY_BACKEND)
+ if (!varTypeIsLong(lvaTable[i].lvType) || !lvaTable[i].lvPromoted)
+#endif // !defined(_TARGET_64BIT_) && !defined(LEGACY_BACKEND)
+ {
+ noway_assert(lvaTable[i].lvIsStructField);
+ }
+ if (lvaTable[i].lvTracked)
+ {
+ varIndex = lvaTable[i].lvVarIndex;
+ noway_assert(varIndex < lvaTrackedCount);
+ VarSetOps::AddElemD(this, varBit, varIndex);
+ }
+ }
+ if (node->gtFlags & GTF_VAR_DEF)
+ {
+ VarSetOps::DiffD(this, varBit, keepAliveVars);
+ VarSetOps::DiffD(this, life, varBit);
+ return false;
+ }
+ // This is a use.
+
+ // Are the variables already known to be alive?
+ if (VarSetOps::IsSubset(this, varBit, life))
+ {
+ node->gtFlags &= ~GTF_VAR_DEATH; // Since we may now call this multiple times, reset if live.
+ return false;
+ }
+
+ // Some variables are being used, and they are not currently live.
+ // So they are just coming to life, in the backwards traversal; in a forwards
+ // traversal, one or more are dying. Mark this.
+
+ node->gtFlags |= GTF_VAR_DEATH;
+
+ // Are all the variables becoming alive (in the backwards traversal), or just a subset?
+ if (!VarSetOps::IsEmptyIntersection(this, varBit, life))
+ {
+ // Only a subset of the variables are become live; we must record that subset.
+ // (Lack of an entry for "lclVarNode" will be considered to imply all become dead in the
+ // forward traversal.)
+ VARSET_TP* deadVarSet = new (this, CMK_bitset) VARSET_TP;
+ VarSetOps::AssignNoCopy(this, *deadVarSet, VarSetOps::Diff(this, varBit, life));
+ GetPromotedStructDeathVars()->Set(lclVarNode, deadVarSet);
+ }
+
+ // In any case, all the field vars are now live (in the backwards traversal).
+ VarSetOps::UnionD(this, life, varBit);
+
+ // Record interference with other live variables
+ fgMarkIntf(life, varBit);
+ }
+ }
+
+ return false;
+}
+
+/*****************************************************************************
+ *
+ * Compute the set of live variables at each node in a given statement
+ * or subtree of a statement moving backward from startNode to endNode
+ */
+
+#ifndef LEGACY_BACKEND
+VARSET_VALRET_TP Compiler::fgComputeLife(VARSET_VALARG_TP lifeArg,
+ GenTreePtr startNode,
+ GenTreePtr endNode,
+ VARSET_VALARG_TP volatileVars,
+ bool* pStmtInfoDirty DEBUGARG(bool* treeModf))
+{
+ GenTreePtr tree;
+ unsigned lclNum;
+
+ VARSET_TP VARSET_INIT(this, life, lifeArg); // lifeArg is const ref; copy to allow modification.
+
+ VARSET_TP VARSET_INIT(this, keepAliveVars, volatileVars);
+#ifdef DEBUGGING_SUPPORT
+ VarSetOps::UnionD(this, keepAliveVars, compCurBB->bbScope); // Don't kill vars in scope
+#endif
+
+ noway_assert(VarSetOps::Equal(this, VarSetOps::Intersection(this, keepAliveVars, life), keepAliveVars));
+ noway_assert(compCurStmt->gtOper == GT_STMT);
+ noway_assert(endNode || (startNode == compCurStmt->gtStmt.gtStmtExpr));
+
+ // NOTE: Live variable analysis will not work if you try
+ // to use the result of an assignment node directly!
+ for (tree = startNode; tree != endNode; tree = tree->gtPrev)
+ {
+ AGAIN:
+ assert(tree->OperGet() != GT_QMARK);
+
+ if (tree->gtOper == GT_CALL)
+ {
+ fgComputeLifeCall(life, tree->AsCall());
+ }
+ else if (tree->OperIsNonPhiLocal() || tree->OperIsLocalAddr())
+ {
+ bool isDeadStore = fgComputeLifeLocal(life, keepAliveVars, tree, tree);
+ if (isDeadStore)
+ {
+ LclVarDsc* varDsc = &lvaTable[tree->gtLclVarCommon.gtLclNum];
+
+ bool doAgain = false;
+ if (fgRemoveDeadStore(&tree, varDsc, life, &doAgain, pStmtInfoDirty DEBUGARG(treeModf)))
+ {
+ assert(!doAgain);
+ break;
+ }
+
+ if (doAgain)
+ {
+ goto AGAIN;
+ }
+ }
+ }
+ }
+
+ // Return the set of live variables out of this statement
+ return life;
+}
+
+VARSET_VALRET_TP Compiler::fgComputeLifeLIR(VARSET_VALARG_TP lifeArg, BasicBlock* block, VARSET_VALARG_TP volatileVars)
+{
+ VARSET_TP VARSET_INIT(this, life, lifeArg); // lifeArg is const ref; copy to allow modification.
+
+ VARSET_TP VARSET_INIT(this, keepAliveVars, volatileVars);
+#ifdef DEBUGGING_SUPPORT
+ VarSetOps::UnionD(this, keepAliveVars, block->bbScope); // Don't kill vars in scope
+#endif
+
+ noway_assert(VarSetOps::Equal(this, VarSetOps::Intersection(this, keepAliveVars, life), keepAliveVars));
+
+ LIR::Range& blockRange = LIR::AsRange(block);
+ GenTree* firstNonPhiNode = blockRange.FirstNonPhiNode();
+ if (firstNonPhiNode == nullptr)
+ {
+ return life;
+ }
+
+ for (GenTree *node = blockRange.LastNode(), *next = nullptr, *end = firstNonPhiNode->gtPrev; node != end;
+ node = next)
+ {
+ next = node->gtPrev;
+
+ if (node->OperGet() == GT_CALL)
+ {
+ fgComputeLifeCall(life, node->AsCall());
+ }
+ else if (node->OperIsNonPhiLocal() || node->OperIsLocalAddr())
+ {
+ bool isDeadStore = fgComputeLifeLocal(life, keepAliveVars, node, node);
+ if (isDeadStore)
+ {
+ fgTryRemoveDeadLIRStore(blockRange, node, &next);
+ }
+ }
+ }
+
+ return life;
+}
+
+#else // LEGACY_BACKEND
+
+#ifdef _PREFAST_
+#pragma warning(push)
+#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
+#endif
+
+VARSET_VALRET_TP Compiler::fgComputeLife(VARSET_VALARG_TP lifeArg,
+ GenTreePtr startNode,
+ GenTreePtr endNode,
+ VARSET_VALARG_TP volatileVars,
+ bool* pStmtInfoDirty DEBUGARG(bool* treeModf))
+{
+ GenTreePtr tree;
+ unsigned lclNum;
+
+ GenTreePtr gtQMark = NULL; // current GT_QMARK node (walking the trees backwards)
+ GenTreePtr nextColonExit = 0; // gtQMark->gtOp.gtOp2 while walking the 'else' branch.
+ // gtQMark->gtOp.gtOp1 while walking the 'then' branch
+
+ VARSET_TP VARSET_INIT(this, life, lifeArg); // lifeArg is const ref; copy to allow modification.
+
+ // TBD: This used to be an initialization to VARSET_NOT_ACCEPTABLE. Try to figure out what's going on here.
+ VARSET_TP VARSET_INIT_NOCOPY(entryLiveSet, VarSetOps::MakeFull(this)); // liveness when we see gtQMark
+ VARSET_TP VARSET_INIT_NOCOPY(gtColonLiveSet, VarSetOps::MakeFull(this)); // liveness when we see gtColon
+ GenTreePtr gtColon = NULL;
+
+ VARSET_TP VARSET_INIT(this, keepAliveVars, volatileVars);
+#ifdef DEBUGGING_SUPPORT
+ VarSetOps::UnionD(this, keepAliveVars, compCurBB->bbScope); /* Dont kill vars in scope */
+#endif
+ noway_assert(VarSetOps::Equal(this, VarSetOps::Intersection(this, keepAliveVars, life), keepAliveVars));
+ noway_assert(compCurStmt->gtOper == GT_STMT);
+ noway_assert(endNode || (startNode == compCurStmt->gtStmt.gtStmtExpr));
+
+ /* NOTE: Live variable analysis will not work if you try
+ * to use the result of an assignment node directly */
+
+ for (tree = startNode; tree != endNode; tree = tree->gtPrev)
+ {
+ AGAIN:
+ /* For ?: nodes if we're done with the then branch, remember
+ * the liveness */
+ if (gtQMark && (tree == gtColon))
+ {
+ VarSetOps::Assign(this, gtColonLiveSet, life);
+ VarSetOps::Assign(this, gtQMark->gtQmark.gtThenLiveSet, gtColonLiveSet);
+ }
+
+ /* For ?: nodes if we're done with the else branch
+ * then set the correct life as the union of the two branches */
+
+ if (gtQMark && (tree == gtQMark->gtOp.gtOp1))
+ {
+ noway_assert(tree->gtFlags & GTF_RELOP_QMARK);
+ noway_assert(gtQMark->gtOp.gtOp2->gtOper == GT_COLON);
+
+ GenTreePtr thenNode = gtColon->AsColon()->ThenNode();
+ GenTreePtr elseNode = gtColon->AsColon()->ElseNode();
+
+ noway_assert(thenNode && elseNode);
+
+ VarSetOps::Assign(this, gtQMark->gtQmark.gtElseLiveSet, life);
+
+ /* Check if we optimized away the ?: */
+
+ if (elseNode->IsNothingNode())
+ {
+ if (thenNode->IsNothingNode())
+ {
+ /* This can only happen for VOID ?: */
+ noway_assert(gtColon->gtType == TYP_VOID);
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("BB%02u - Removing dead QMark - Colon ...\n", compCurBB->bbNum);
+ gtDispTree(gtQMark);
+ printf("\n");
+ }
+#endif // DEBUG
+
+ /* Remove the '?:' - keep the side effects in the condition */
+
+ noway_assert(tree->OperKind() & GTK_RELOP);
+
+ /* Change the node to a NOP */
+
+ gtQMark->gtBashToNOP();
+#ifdef DEBUG
+ *treeModf = true;
+#endif // DEBUG
+
+ /* Extract and keep the side effects */
+
+ if (tree->gtFlags & GTF_SIDE_EFFECT)
+ {
+ GenTreePtr sideEffList = NULL;
+
+ gtExtractSideEffList(tree, &sideEffList);
+
+ if (sideEffList)
+ {
+ noway_assert(sideEffList->gtFlags & GTF_SIDE_EFFECT);
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Extracted side effects list from condition...\n");
+ gtDispTree(sideEffList);
+ printf("\n");
+ }
+#endif // DEBUG
+ fgUpdateRefCntForExtract(tree, sideEffList);
+
+ /* The NOP node becomes a GT_COMMA holding the side effect list */
+
+ gtQMark->ChangeOper(GT_COMMA);
+ gtQMark->gtFlags |= sideEffList->gtFlags & GTF_ALL_EFFECT;
+
+ if (sideEffList->gtOper == GT_COMMA)
+ {
+ gtQMark->gtOp.gtOp1 = sideEffList->gtOp.gtOp1;
+ gtQMark->gtOp.gtOp2 = sideEffList->gtOp.gtOp2;
+ }
+ else
+ {
+ gtQMark->gtOp.gtOp1 = sideEffList;
+ gtQMark->gtOp.gtOp2 = gtNewNothingNode();
+ }
+ }
+ else
+ {
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nRemoving tree ");
+ printTreeID(tree);
+ printf(" in BB%02u as useless\n", compCurBB->bbNum);
+ gtDispTree(tree);
+ printf("\n");
+ }
+#endif // DEBUG
+ fgUpdateRefCntForExtract(tree, NULL);
+ }
+ }
+
+ /* If top node without side effects remove it */
+
+ if ((gtQMark == compCurStmt->gtStmt.gtStmtExpr) && gtQMark->IsNothingNode())
+ {
+ fgRemoveStmt(compCurBB, compCurStmt);
+ break;
+ }
+
+ /* Re-link the nodes for this statement */
+
+ fgSetStmtSeq(compCurStmt);
+
+ /* Continue analysis from this node */
+
+ tree = gtQMark;
+
+ /* As the 'then' and 'else' branches are emtpy, liveness
+ should not have changed */
+
+ noway_assert(VarSetOps::Equal(this, life, entryLiveSet));
+ goto SKIP_QMARK;
+ }
+ else
+ {
+ // The 'else' branch is empty and the 'then' branch is non-empty
+ // so swap the two branches and reverse the condition. If one is
+ // non-empty, we want it to be the 'else'
+
+ GenTreePtr tmp = thenNode;
+
+ gtColon->AsColon()->ThenNode() = thenNode = elseNode;
+ gtColon->AsColon()->ElseNode() = elseNode = tmp;
+ noway_assert(tree == gtQMark->gtOp.gtOp1);
+ gtReverseCond(tree);
+
+ // Remember to also swap the live sets of the two branches.
+ VARSET_TP VARSET_INIT_NOCOPY(tmpVS, gtQMark->gtQmark.gtElseLiveSet);
+ VarSetOps::AssignNoCopy(this, gtQMark->gtQmark.gtElseLiveSet, gtQMark->gtQmark.gtThenLiveSet);
+ VarSetOps::AssignNoCopy(this, gtQMark->gtQmark.gtThenLiveSet, tmpVS);
+
+ /* Re-link the nodes for this statement */
+
+ fgSetStmtSeq(compCurStmt);
+ }
+ }
+
+ /* Variables in the two branches that are live at the split
+ * must interfere with each other */
+
+ fgMarkIntf(life, gtColonLiveSet);
+
+ /* The live set at the split is the union of the two branches */
+
+ VarSetOps::UnionD(this, life, gtColonLiveSet);
+
+ SKIP_QMARK:
+
+ /* We are out of the parallel branches, the rest is sequential */
+
+ gtQMark = NULL;
+ }
+
+ if (tree->gtOper == GT_CALL)
+ {
+ fgComputeLifeCall(life, tree->AsCall());
+ continue;
+ }
+
+ // Is this a use/def of a local variable?
+ // Generally, the last use information is associated with the lclVar node.
+ // However, for LEGACY_BACKEND, the information must be associated
+ // with the OBJ itself for promoted structs.
+ // In that case, the LDOBJ may be require an implementation that might itself allocate registers,
+ // so the variable(s) should stay live until the end of the LDOBJ.
+ // Note that for promoted structs lvTracked is false.
+
+ GenTreePtr lclVarTree = nullptr;
+ if (tree->gtOper == GT_OBJ)
+ {
+ // fgIsIndirOfAddrOfLocal returns nullptr if the tree is
+ // not an indir(addr(local)), in which case we will set lclVarTree
+ // back to the original tree, and not handle it as a use/def.
+ lclVarTree = fgIsIndirOfAddrOfLocal(tree);
+ if ((lclVarTree != nullptr) && lvaTable[lclVarTree->gtLclVarCommon.gtLclNum].lvTracked)
+ {
+ lclVarTree = nullptr;
+ }
+ }
+ if (lclVarTree == nullptr)
+ {
+ lclVarTree = tree;
+ }
+
+ if (lclVarTree->OperIsNonPhiLocal() || lclVarTree->OperIsLocalAddr())
+ {
+ bool isDeadStore = fgComputeLifeLocal(life, keepAliveVars, lclVarTree, tree);
+ if (isDeadStore)
+ {
+ LclVarDsc* varDsc = &lvaTable[lclVarTree->gtLclVarCommon.gtLclNum];
+
+ bool doAgain = false;
+ if (fgRemoveDeadStore(&tree, varDsc, life, &doAgain, pStmtInfoDirty DEBUGARG(treeModf)))
+ {
+ assert(!doAgain);
+ break;
+ }
+
+ if (doAgain)
+ {
+ goto AGAIN;
+ }
+ }
+ }
+ else
+ {
+ if (tree->gtOper == GT_QMARK && tree->gtOp.gtOp1)
+ {
+ /* Special cases - "? :" operators.
+
+ The trees are threaded as shown below with nodes 1 to 11 linked
+ by gtNext. Both GT_<cond>->gtLiveSet and GT_COLON->gtLiveSet are
+ the union of the liveness on entry to thenTree and elseTree.
+
+ +--------------------+
+ | GT_QMARK 11|
+ +----------+---------+
+ |
+ *
+ / \
+ / \
+ / \
+ +---------------------+ +--------------------+
+ | GT_<cond> 3 | | GT_COLON 7 |
+ | w/ GTF_RELOP_QMARK | | w/ GTF_COLON_COND |
+ +----------+----------+ +---------+----------+
+ | |
+ * *
+ / \ / \
+ / \ / \
+ / \ / \
+ 2 1 thenTree 6 elseTree 10
+ x | |
+ / * *
+ +----------------+ / / \ / \
+ |prevExpr->gtNext+------/ / \ / \
+ +----------------+ / \ / \
+ 5 4 9 8
+
+ */
+
+ noway_assert(tree->gtOp.gtOp1->OperKind() & GTK_RELOP);
+ noway_assert(tree->gtOp.gtOp1->gtFlags & GTF_RELOP_QMARK);
+ noway_assert(tree->gtOp.gtOp2->gtOper == GT_COLON);
+
+ if (gtQMark)
+ {
+ /* This is a nested QMARK sequence - we need to use recursion.
+ * Compute the liveness for each node of the COLON branches
+ * The new computation starts from the GT_QMARK node and ends
+ * when the COLON branch of the enclosing QMARK ends */
+
+ noway_assert(nextColonExit &&
+ (nextColonExit == gtQMark->gtOp.gtOp1 || nextColonExit == gtQMark->gtOp.gtOp2));
+
+ VarSetOps::AssignNoCopy(this, life, fgComputeLife(life, tree, nextColonExit, volatileVars,
+ pStmtInfoDirty DEBUGARG(treeModf)));
+
+ /* Continue with exit node (the last node in the enclosing colon branch) */
+
+ tree = nextColonExit;
+ goto AGAIN;
+ }
+ else
+ {
+ gtQMark = tree;
+ VarSetOps::Assign(this, entryLiveSet, life);
+ gtColon = gtQMark->gtOp.gtOp2;
+ nextColonExit = gtColon;
+ }
+ }
+
+ /* If found the GT_COLON, start the new branch with the original life */
+
+ if (gtQMark && tree == gtQMark->gtOp.gtOp2)
+ {
+ /* The node better be a COLON. */
+ noway_assert(tree->gtOper == GT_COLON);
+
+ VarSetOps::Assign(this, life, entryLiveSet);
+ nextColonExit = gtQMark->gtOp.gtOp1;
+ }
+ }
+ }
+
+ /* Return the set of live variables out of this statement */
+
+ return life;
+}
+
+#ifdef _PREFAST_
+#pragma warning(pop)
+#endif
+
+#endif // !LEGACY_BACKEND
+
+bool Compiler::fgTryRemoveDeadLIRStore(LIR::Range& blockRange, GenTree* node, GenTree** next)
+{
+ assert(node != nullptr);
+ assert(next != nullptr);
+
+ assert(node->OperIsLocalStore() || node->OperIsLocalAddr());
+
+ GenTree* store = nullptr;
+ GenTree* value = nullptr;
+ if (node->OperIsLocalStore())
+ {
+ store = node;
+ value = store->gtGetOp1();
+ }
+ else if (node->OperIsLocalAddr())
+ {
+ LIR::Use addrUse;
+ if (!blockRange.TryGetUse(node, &addrUse) || (addrUse.User()->OperGet() != GT_STOREIND))
+ {
+ *next = node->gtPrev;
+ return false;
+ }
+
+ store = addrUse.User();
+ value = store->gtGetOp2();
+ }
+
+ bool isClosed = false;
+ unsigned sideEffects = 0;
+ LIR::ReadOnlyRange operandsRange = blockRange.GetRangeOfOperandTrees(store, &isClosed, &sideEffects);
+ if (!isClosed || ((sideEffects & GTF_SIDE_EFFECT) != 0) ||
+ (((sideEffects & GTF_ORDER_SIDEEFF) != 0) && (value->OperGet() == GT_CATCH_ARG)))
+ {
+ // If the range of the operands contains unrelated code or if it contains any side effects,
+ // do not remove it. Instead, just remove the store.
+
+ *next = node->gtPrev;
+ }
+ else
+ {
+ // Okay, the operands to the store form a contiguous range that has no side effects. Remove the
+ // range containing the operands and decrement the local var ref counts appropriately.
+
+ // Compute the next node to process. Note that we must be careful not to set the next node to
+ // process to a node that we are about to remove.
+ if (node->OperIsLocalStore())
+ {
+ assert(node == store);
+ *next = (operandsRange.LastNode()->gtNext == store) ? operandsRange.FirstNode()->gtPrev : node->gtPrev;
+ }
+ else
+ {
+ assert(operandsRange.Contains(node));
+ *next = operandsRange.FirstNode()->gtPrev;
+ }
+
+ blockRange.Delete(this, compCurBB, std::move(operandsRange));
+ }
+
+ // If the store is marked as a late argument, it is referenced by a call. Instead of removing it,
+ // bash it to a NOP.
+ if ((store->gtFlags & GTF_LATE_ARG) != 0)
+ {
+ if (store->IsLocal())
+ {
+ lvaDecRefCnts(compCurBB, store);
+ }
+
+ store->gtBashToNOP();
+ }
+ else
+ {
+ blockRange.Delete(this, compCurBB, store);
+ }
+
+ return true;
+}
+
+// fgRemoveDeadStore - remove a store to a local which has no exposed uses.
+//
+// pTree - GenTree** to local, including store-form local or local addr (post-rationalize)
+// varDsc - var that is being stored to
+// life - current live tracked vars (maintained as we walk backwards)
+// doAgain - out parameter, true if we should restart the statement
+// pStmtInfoDirty - should defer the cost computation to the point after the reverse walk is completed?
+//
+// Returns: true if we should skip the rest of the statement, false if we should continue
+
+bool Compiler::fgRemoveDeadStore(
+ GenTree** pTree, LclVarDsc* varDsc, VARSET_TP life, bool* doAgain, bool* pStmtInfoDirty DEBUGARG(bool* treeModf))
+{
+ assert(!compRationalIRForm);
+
+ // Vars should have already been checked for address exposure by this point.
+ assert(!varDsc->lvIsStructField || !lvaTable[varDsc->lvParentLcl].lvAddrExposed);
+ assert(!varDsc->lvAddrExposed);
+
+ GenTree* asgNode = nullptr;
+ GenTree* rhsNode = nullptr;
+ GenTree* addrNode = nullptr;
+ GenTree* const tree = *pTree;
+
+ GenTree* nextNode = tree->gtNext;
+
+ // First, characterize the lclVarTree and see if we are taking its address.
+ if (tree->OperIsLocalStore())
+ {
+ rhsNode = tree->gtOp.gtOp1;
+ asgNode = tree;
+ }
+ else if (tree->OperIsLocal())
+ {
+ if (nextNode == nullptr)
+ {
+ return false;
+ }
+ if (nextNode->OperGet() == GT_ADDR)
+ {
+ addrNode = nextNode;
+ nextNode = nextNode->gtNext;
+ }
+ }
+ else
+ {
+ assert(tree->OperIsLocalAddr());
+ addrNode = tree;
+ }
+
+ // Next, find the assignment.
+ if (asgNode == nullptr)
+ {
+ if (addrNode == nullptr)
+ {
+ asgNode = nextNode;
+ }
+ else if (asgNode == nullptr)
+ {
+ // This may be followed by GT_IND/assign or GT_STOREIND.
+ if (nextNode == nullptr)
+ {
+ return false;
+ }
+ if (nextNode->OperIsIndir())
+ {
+ // This must be a non-nullcheck form of indir, or it would not be a def.
+ assert(nextNode->OperGet() != GT_NULLCHECK);
+ if (nextNode->OperIsStore())
+ {
+ asgNode = nextNode;
+ if (asgNode->OperIsBlk())
+ {
+ rhsNode = asgNode->AsBlk()->Data();
+ }
+ // TODO-1stClassStructs: There should be an else clause here to handle
+ // the non-block forms of store ops (GT_STORE_LCL_VAR, etc.) for which
+ // rhsNode is op1. (This isn't really a 1stClassStructs item, but the
+ // above was added to catch what used to be dead block ops, and that
+ // made this omission apparent.)
+ }
+ else
+ {
+ asgNode = nextNode->gtNext;
+ }
+ }
+ }
+ }
+
+ if (asgNode == nullptr)
+ {
+ return false;
+ }
+
+ if (asgNode->OperIsAssignment())
+ {
+ rhsNode = asgNode->gtGetOp2();
+ }
+ else if (rhsNode == nullptr)
+ {
+ return false;
+ }
+
+ if (asgNode && (asgNode->gtFlags & GTF_ASG))
+ {
+ noway_assert(rhsNode);
+ noway_assert(tree->gtFlags & GTF_VAR_DEF);
+
+ if (asgNode->gtOper != GT_ASG && asgNode->gtOverflowEx())
+ {
+ // asgNode may be <op_ovf>= (with GTF_OVERFLOW). In that case, we need to keep the <op_ovf>
+
+ // Dead <OpOvf>= assignment. We change it to the right operation (taking out the assignment),
+ // update the flags, update order of statement, as we have changed the order of the operation
+ // and we start computing life again from the op_ovf node (we go backwards). Note that we
+ // don't need to update ref counts because we don't change them, we're only changing the
+ // operation.
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nChanging dead <asgop> ovf to <op> ovf...\n");
+ }
+#endif // DEBUG
+
+ switch (asgNode->gtOper)
+ {
+ case GT_ASG_ADD:
+ asgNode->gtOper = GT_ADD;
+ break;
+ case GT_ASG_SUB:
+ asgNode->gtOper = GT_SUB;
+ break;
+ default:
+ // Only add and sub allowed, we don't have ASG_MUL and ASG_DIV for ints, and
+ // floats don't allow OVF forms.
+ noway_assert(!"Unexpected ASG_OP");
+ }
+
+ asgNode->gtFlags &= ~GTF_REVERSE_OPS;
+ if (!((asgNode->gtOp.gtOp1->gtFlags | rhsNode->gtFlags) & GTF_ASG))
+ {
+ asgNode->gtFlags &= ~GTF_ASG;
+ }
+ asgNode->gtOp.gtOp1->gtFlags &= ~(GTF_VAR_DEF | GTF_VAR_USEASG);
+
+#ifdef DEBUG
+ *treeModf = true;
+#endif // DEBUG
+
+ // Make sure no previous cousin subtree rooted at a common ancestor has
+ // asked to defer the recomputation of costs.
+ if (!*pStmtInfoDirty)
+ {
+ /* Update ordering, costs, FP levels, etc. */
+ gtSetStmtInfo(compCurStmt);
+
+ /* Re-link the nodes for this statement */
+ fgSetStmtSeq(compCurStmt);
+
+ // Start from the old assign node, as we have changed the order of its operands.
+ // No need to update liveness, as nothing has changed (the target of the asgNode
+ // either goes dead here, in which case the whole expression is now dead, or it
+ // was already live).
+
+ // TODO-Throughput: Redo this so that the graph is modified BEFORE traversing it!
+ // We can determine this case when we first see the asgNode
+
+ *pTree = asgNode;
+
+ *doAgain = true;
+ }
+ return false;
+ }
+
+ // Do not remove if this local variable represents
+ // a promoted struct field of an address exposed local.
+ if (varDsc->lvIsStructField && lvaTable[varDsc->lvParentLcl].lvAddrExposed)
+ {
+ return false;
+ }
+
+ // Do not remove if the address of the variable has been exposed.
+ if (varDsc->lvAddrExposed)
+ {
+ return false;
+ }
+
+ /* Test for interior statement */
+
+ if (asgNode->gtNext == nullptr)
+ {
+ /* This is a "NORMAL" statement with the
+ * assignment node hanging from the GT_STMT node */
+
+ noway_assert(compCurStmt->gtStmt.gtStmtExpr == asgNode);
+ JITDUMP("top level assign\n");
+
+ /* Check for side effects */
+
+ if (rhsNode->gtFlags & GTF_SIDE_EFFECT)
+ {
+ EXTRACT_SIDE_EFFECTS:
+ /* Extract the side effects */
+
+ GenTreePtr sideEffList = nullptr;
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("BB%02u - Dead assignment has side effects...\n", compCurBB->bbNum);
+ gtDispTree(asgNode);
+ printf("\n");
+ }
+#endif // DEBUG
+ if (rhsNode->TypeGet() == TYP_STRUCT)
+ {
+ // This is a block assignment. An indirection of the rhs is not considered to
+ // happen until the assignment, so we will extract the side effects from only
+ // the address.
+ if (rhsNode->OperIsIndir())
+ {
+ assert(rhsNode->OperGet() != GT_NULLCHECK);
+ rhsNode = rhsNode->AsIndir()->Addr();
+ }
+ }
+ gtExtractSideEffList(rhsNode, &sideEffList);
+
+ if (sideEffList)
+ {
+ noway_assert(sideEffList->gtFlags & GTF_SIDE_EFFECT);
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Extracted side effects list...\n");
+ gtDispTree(sideEffList);
+ printf("\n");
+ }
+#endif // DEBUG
+ fgUpdateRefCntForExtract(asgNode, sideEffList);
+
+ /* Replace the assignment statement with the list of side effects */
+ noway_assert(sideEffList->gtOper != GT_STMT);
+
+ *pTree = compCurStmt->gtStmt.gtStmtExpr = sideEffList;
+#ifdef DEBUG
+ *treeModf = true;
+#endif // DEBUG
+ /* Update ordering, costs, FP levels, etc. */
+ gtSetStmtInfo(compCurStmt);
+
+ /* Re-link the nodes for this statement */
+ fgSetStmtSeq(compCurStmt);
+
+ // Since the whole statement gets replaced it is safe to
+ // re-thread and update order. No need to compute costs again.
+ *pStmtInfoDirty = false;
+
+ /* Compute the live set for the new statement */
+ *doAgain = true;
+ return false;
+ }
+ else
+ {
+ /* No side effects, most likely we forgot to reset some flags */
+ fgRemoveStmt(compCurBB, compCurStmt);
+
+ return true;
+ }
+ }
+ else
+ {
+ /* If this is GT_CATCH_ARG saved to a local var don't bother */
+
+ JITDUMP("removing stmt with no side effects\n");
+
+ if (asgNode->gtFlags & GTF_ORDER_SIDEEFF)
+ {
+ if (rhsNode->gtOper == GT_CATCH_ARG)
+ {
+ goto EXTRACT_SIDE_EFFECTS;
+ }
+ }
+
+ /* No side effects - remove the whole statement from the block->bbTreeList */
+
+ fgRemoveStmt(compCurBB, compCurStmt);
+
+ /* Since we removed it do not process the rest (i.e. RHS) of the statement
+ * variables in the RHS will not be marked as live, so we get the benefit of
+ * propagating dead variables up the chain */
+
+ return true;
+ }
+ }
+ else
+ {
+ /* This is an INTERIOR STATEMENT with a dead assignment - remove it */
+
+ noway_assert(!VarSetOps::IsMember(this, life, varDsc->lvVarIndex));
+
+ if (rhsNode->gtFlags & GTF_SIDE_EFFECT)
+ {
+ /* :-( we have side effects */
+
+ GenTreePtr sideEffList = nullptr;
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("BB%02u - INTERIOR dead assignment has side effects...\n", compCurBB->bbNum);
+ gtDispTree(asgNode);
+ printf("\n");
+ }
+#endif // DEBUG
+ gtExtractSideEffList(rhsNode, &sideEffList);
+
+ if (!sideEffList)
+ {
+ goto NO_SIDE_EFFECTS;
+ }
+
+ noway_assert(sideEffList->gtFlags & GTF_SIDE_EFFECT);
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Extracted side effects list from condition...\n");
+ gtDispTree(sideEffList);
+ printf("\n");
+ }
+#endif // DEBUG
+ if (sideEffList->gtOper == asgNode->gtOper)
+ {
+ fgUpdateRefCntForExtract(asgNode, sideEffList);
+#ifdef DEBUG
+ *treeModf = true;
+#endif // DEBUG
+ asgNode->gtOp.gtOp1 = sideEffList->gtOp.gtOp1;
+ asgNode->gtOp.gtOp2 = sideEffList->gtOp.gtOp2;
+ asgNode->gtType = sideEffList->gtType;
+ }
+ else
+ {
+ fgUpdateRefCntForExtract(asgNode, sideEffList);
+#ifdef DEBUG
+ *treeModf = true;
+#endif // DEBUG
+ /* Change the node to a GT_COMMA holding the side effect list */
+ asgNode->gtBashToNOP();
+
+ asgNode->ChangeOper(GT_COMMA);
+ asgNode->gtFlags |= sideEffList->gtFlags & GTF_ALL_EFFECT;
+
+ if (sideEffList->gtOper == GT_COMMA)
+ {
+ asgNode->gtOp.gtOp1 = sideEffList->gtOp.gtOp1;
+ asgNode->gtOp.gtOp2 = sideEffList->gtOp.gtOp2;
+ }
+ else
+ {
+ asgNode->gtOp.gtOp1 = sideEffList;
+ asgNode->gtOp.gtOp2 = gtNewNothingNode();
+ }
+ }
+ }
+ else
+ {
+ NO_SIDE_EFFECTS:
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nRemoving tree ");
+ printTreeID(asgNode);
+ printf(" in BB%02u as useless\n", compCurBB->bbNum);
+ gtDispTree(asgNode);
+ printf("\n");
+ }
+#endif // DEBUG
+ /* No side effects - Remove the interior statement */
+ fgUpdateRefCntForExtract(asgNode, nullptr);
+
+ /* Change the assignment to a GT_NOP node */
+
+ asgNode->gtBashToNOP();
+
+#ifdef DEBUG
+ *treeModf = true;
+#endif // DEBUG
+ }
+
+ /* Re-link the nodes for this statement - Do not update ordering! */
+
+ // Do not update costs by calling gtSetStmtInfo. fgSetStmtSeq modifies
+ // the tree threading based on the new costs. Removing nodes could
+ // cause a subtree to get evaluated first (earlier second) during the
+ // liveness walk. Instead just set a flag that costs are dirty and
+ // caller has to call gtSetStmtInfo.
+ *pStmtInfoDirty = true;
+
+ fgSetStmtSeq(compCurStmt);
+
+ /* Continue analysis from this node */
+
+ *pTree = asgNode;
+
+ return false;
+ }
+ }
+ return false;
+}
+
+/*****************************************************************************
+ *
+ * Iterative data flow for live variable info and availability of range
+ * check index expressions.
+ */
+void Compiler::fgInterBlockLocalVarLiveness()
+{
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("*************** In fgInterBlockLocalVarLiveness()\n");
+ }
+#endif
+
+ /* This global flag is set whenever we remove a statement */
+
+ fgStmtRemoved = false;
+
+ // keep track if a bbLiveIn changed due to dead store removal
+ fgLocalVarLivenessChanged = false;
+
+ /* Compute the IN and OUT sets for tracked variables */
+
+ fgLiveVarAnalysis();
+
+//-------------------------------------------------------------------------
+
+#ifdef DEBUGGING_SUPPORT
+
+ /* For debuggable code, we mark vars as live over their entire
+ * reported scope, so that it will be visible over the entire scope
+ */
+
+ if (opts.compDbgCode && (info.compVarScopesCount > 0))
+ {
+ fgExtendDbgLifetimes();
+ }
+
+#endif // DEBUGGING_SUPPORT
+
+ /*-------------------------------------------------------------------------
+ * Variables involved in exception-handlers and finally blocks need
+ * to be specially marked
+ */
+ BasicBlock* block;
+
+ VARSET_TP VARSET_INIT_NOCOPY(exceptVars, VarSetOps::MakeEmpty(this)); // vars live on entry to a handler
+ VARSET_TP VARSET_INIT_NOCOPY(finallyVars, VarSetOps::MakeEmpty(this)); // vars live on exit of a 'finally' block
+ VARSET_TP VARSET_INIT_NOCOPY(filterVars, VarSetOps::MakeEmpty(this)); // vars live on exit from a 'filter'
+
+ for (block = fgFirstBB; block; block = block->bbNext)
+ {
+ if (block->bbCatchTyp != BBCT_NONE)
+ {
+ /* Note the set of variables live on entry to exception handler */
+
+ VarSetOps::UnionD(this, exceptVars, block->bbLiveIn);
+ }
+
+ if (block->bbJumpKind == BBJ_EHFILTERRET)
+ {
+ /* Get the set of live variables on exit from a 'filter' */
+ VarSetOps::UnionD(this, filterVars, block->bbLiveOut);
+ }
+ else if (block->bbJumpKind == BBJ_EHFINALLYRET)
+ {
+ /* Get the set of live variables on exit from a 'finally' block */
+
+ VarSetOps::UnionD(this, finallyVars, block->bbLiveOut);
+ }
+#if FEATURE_EH_FUNCLETS
+ // Funclets are called and returned from, as such we can only count on the frame
+ // pointer being restored, and thus everything live in or live out must be on the
+ // stack
+ if (block->bbFlags & BBF_FUNCLET_BEG)
+ {
+ VarSetOps::UnionD(this, exceptVars, block->bbLiveIn);
+ }
+ if ((block->bbJumpKind == BBJ_EHFINALLYRET) || (block->bbJumpKind == BBJ_EHFILTERRET) ||
+ (block->bbJumpKind == BBJ_EHCATCHRET))
+ {
+ VarSetOps::UnionD(this, exceptVars, block->bbLiveOut);
+ }
+#endif // FEATURE_EH_FUNCLETS
+ }
+
+ LclVarDsc* varDsc;
+ unsigned varNum;
+
+ for (varNum = 0, varDsc = lvaTable; varNum < lvaCount; varNum++, varDsc++)
+ {
+ /* Ignore the variable if it's not tracked */
+
+ if (!varDsc->lvTracked)
+ {
+ continue;
+ }
+
+ if (lvaIsFieldOfDependentlyPromotedStruct(varDsc))
+ {
+ continue;
+ }
+
+ /* Un-init locals may need auto-initialization. Note that the
+ liveness of such locals will bubble to the top (fgFirstBB)
+ in fgInterBlockLocalVarLiveness() */
+
+ if (!varDsc->lvIsParam && VarSetOps::IsMember(this, fgFirstBB->bbLiveIn, varDsc->lvVarIndex) &&
+ (info.compInitMem || varTypeIsGC(varDsc->TypeGet())))
+ {
+ varDsc->lvMustInit = true;
+ }
+
+ // Mark all variables that are live on entry to an exception handler
+ // or on exit from a filter handler or finally as DoNotEnregister */
+
+ if (VarSetOps::IsMember(this, exceptVars, varDsc->lvVarIndex) ||
+ VarSetOps::IsMember(this, filterVars, varDsc->lvVarIndex))
+ {
+ /* Mark the variable appropriately */
+ lvaSetVarDoNotEnregister(varNum DEBUGARG(DNER_LiveInOutOfHandler));
+ }
+
+ /* Mark all pointer variables live on exit from a 'finally'
+ block as either volatile for non-GC ref types or as
+ 'explicitly initialized' (volatile and must-init) for GC-ref types */
+
+ if (VarSetOps::IsMember(this, finallyVars, varDsc->lvVarIndex))
+ {
+ lvaSetVarDoNotEnregister(varNum DEBUGARG(DNER_LiveInOutOfHandler));
+
+ /* Don't set lvMustInit unless we have a non-arg, GC pointer */
+
+ if (varDsc->lvIsParam)
+ {
+ continue;
+ }
+
+ if (!varTypeIsGC(varDsc->TypeGet()))
+ {
+ continue;
+ }
+
+ /* Mark it */
+ varDsc->lvMustInit = true;
+ }
+ }
+
+ /*-------------------------------------------------------------------------
+ * Now fill in liveness info within each basic block - Backward DataFlow
+ */
+
+ // This is used in the liveness computation, as a temporary.
+ VarSetOps::AssignNoCopy(this, fgMarkIntfUnionVS, VarSetOps::MakeEmpty(this));
+
+ for (block = fgFirstBB; block; block = block->bbNext)
+ {
+ /* Tell everyone what block we're working on */
+
+ compCurBB = block;
+
+ /* Remember those vars live on entry to exception handlers */
+ /* if we are part of a try block */
+
+ VARSET_TP VARSET_INIT_NOCOPY(volatileVars, VarSetOps::MakeEmpty(this));
+
+ if (ehBlockHasExnFlowDsc(block))
+ {
+ VarSetOps::Assign(this, volatileVars, fgGetHandlerLiveVars(block));
+
+ // volatileVars is a subset of exceptVars
+ noway_assert(VarSetOps::IsSubset(this, volatileVars, exceptVars));
+ }
+
+ /* Start with the variables live on exit from the block */
+
+ VARSET_TP VARSET_INIT(this, life, block->bbLiveOut);
+
+ /* Mark any interference we might have at the end of the block */
+
+ fgMarkIntf(life);
+
+ if (!block->IsLIR())
+ {
+ /* Get the first statement in the block */
+
+ GenTreePtr firstStmt = block->FirstNonPhiDef();
+
+ if (!firstStmt)
+ {
+ continue;
+ }
+
+ /* Walk all the statements of the block backwards - Get the LAST stmt */
+
+ GenTreePtr nextStmt = block->bbTreeList->gtPrev;
+
+ do
+ {
+#ifdef DEBUG
+ bool treeModf = false;
+#endif // DEBUG
+ noway_assert(nextStmt);
+ noway_assert(nextStmt->gtOper == GT_STMT);
+
+ compCurStmt = nextStmt;
+ nextStmt = nextStmt->gtPrev;
+
+ /* Compute the liveness for each tree node in the statement */
+ bool stmtInfoDirty = false;
+
+ VarSetOps::AssignNoCopy(this, life, fgComputeLife(life, compCurStmt->gtStmt.gtStmtExpr, nullptr,
+ volatileVars, &stmtInfoDirty DEBUGARG(&treeModf)));
+
+ if (stmtInfoDirty)
+ {
+ gtSetStmtInfo(compCurStmt);
+ fgSetStmtSeq(compCurStmt);
+ }
+
+#ifdef DEBUG
+ if (verbose && treeModf)
+ {
+ printf("\nfgComputeLife modified tree:\n");
+ gtDispTree(compCurStmt->gtStmt.gtStmtExpr);
+ printf("\n");
+ }
+#endif // DEBUG
+ } while (compCurStmt != firstStmt);
+ }
+ else
+ {
+#ifdef LEGACY_BACKEND
+ unreached();
+#else // !LEGACY_BACKEND
+ VarSetOps::AssignNoCopy(this, life, fgComputeLifeLIR(life, block, volatileVars));
+#endif // !LEGACY_BACKEND
+ }
+
+ /* Done with the current block - if we removed any statements, some
+ * variables may have become dead at the beginning of the block
+ * -> have to update bbLiveIn */
+
+ if (!VarSetOps::Equal(this, life, block->bbLiveIn))
+ {
+ /* some variables have become dead all across the block
+ So life should be a subset of block->bbLiveIn */
+
+ // We changed the liveIn of the block, which may affect liveOut of others,
+ // which may expose more dead stores.
+ fgLocalVarLivenessChanged = true;
+
+ noway_assert(VarSetOps::Equal(this, VarSetOps::Intersection(this, life, block->bbLiveIn), life));
+
+ /* set the new bbLiveIn */
+
+ VarSetOps::Assign(this, block->bbLiveIn, life);
+
+ /* compute the new bbLiveOut for all the predecessors of this block */
+ }
+
+ noway_assert(compCurBB == block);
+#ifdef DEBUG
+ compCurBB = nullptr;
+#endif
+ }
+
+ fgLocalVarLivenessDone = true;
+}
+
+#ifdef DEBUG
+
+/*****************************************************************************/
+
+void Compiler::fgDispBBLiveness(BasicBlock* block)
+{
+ VARSET_TP VARSET_INIT_NOCOPY(allVars, VarSetOps::Union(this, block->bbLiveIn, block->bbLiveOut));
+ printf("BB%02u", block->bbNum);
+ printf(" IN (%d)=", VarSetOps::Count(this, block->bbLiveIn));
+ lvaDispVarSet(block->bbLiveIn, allVars);
+ if (block->bbHeapLiveIn)
+ {
+ printf(" + HEAP");
+ }
+ printf("\n OUT(%d)=", VarSetOps::Count(this, block->bbLiveOut));
+ lvaDispVarSet(block->bbLiveOut, allVars);
+ if (block->bbHeapLiveOut)
+ {
+ printf(" + HEAP");
+ }
+ printf("\n\n");
+}
+
+void Compiler::fgDispBBLiveness()
+{
+ for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
+ {
+ fgDispBBLiveness(block);
+ }
+}
+
+#endif // DEBUG
diff --git a/src/jit/loopcloning.cpp b/src/jit/loopcloning.cpp
new file mode 100644
index 0000000000..8ce015e607
--- /dev/null
+++ b/src/jit/loopcloning.cpp
@@ -0,0 +1,845 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX LoopCloning XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#include "jitpch.h"
+
+//--------------------------------------------------------------------------------------------------
+// ToGenTree - Convert an arrLen operation into a gentree node.
+//
+// Arguments:
+// comp Compiler instance to allocate trees
+//
+// Return Values:
+// Returns the gen tree representation for arrLen or MD Array node as defined by
+// the "type" member
+//
+// Notes:
+// This tree produces GT_INDEX node, the caller is supposed to morph it appropriately
+// so it can be codegen'ed.
+//
+GenTreePtr LC_Array::ToGenTree(Compiler* comp)
+{
+ // If jagged array
+ if (type == Jagged)
+ {
+ // Create a a[i][j][k].length type node.
+ GenTreePtr arr = comp->gtNewLclvNode(arrIndex->arrLcl, comp->lvaTable[arrIndex->arrLcl].lvType);
+ int rank = GetDimRank();
+ for (int i = 0; i < rank; ++i)
+ {
+ arr = comp->gtNewIndexRef(TYP_REF, arr, comp->gtNewLclvNode(arrIndex->indLcls[i],
+ comp->lvaTable[arrIndex->indLcls[i]].lvType));
+ }
+ // If asked for arrlen invoke arr length operator.
+ if (oper == ArrLen)
+ {
+ GenTreePtr arrLen = new (comp, GT_ARR_LENGTH) GenTreeArrLen(TYP_INT, arr, offsetof(CORINFO_Array, length));
+ return arrLen;
+ }
+ else
+ {
+ assert(oper == None);
+ return arr;
+ }
+ }
+ else
+ {
+ // TODO-CQ: Optimize for MD Array.
+ assert(!"Optimize for MD Array");
+ }
+ return nullptr;
+}
+
+//--------------------------------------------------------------------------------------------------
+// ToGenTree - Convert an "identifier" into a gentree node.
+//
+// Arguments:
+// comp Compiler instance to allocate trees
+//
+// Return Values:
+// Returns the gen tree representation for either a constant or a variable or an arrLen operation
+// defined by the "type" member
+//
+GenTreePtr LC_Ident::ToGenTree(Compiler* comp)
+{
+ // Convert to GenTree nodes.
+ switch (type)
+ {
+ case Const:
+#ifdef _TARGET_64BIT_
+ return comp->gtNewLconNode(constant);
+#else
+ return comp->gtNewIconNode((ssize_t)constant);
+#endif
+ case Var:
+ return comp->gtNewLclvNode((unsigned)constant, comp->lvaTable[constant].lvType);
+ case ArrLen:
+ return arrLen.ToGenTree(comp);
+ case Null:
+ return comp->gtNewIconNode(0, TYP_REF);
+ default:
+ assert(!"Could not convert LC_Ident to GenTree");
+ unreached();
+ break;
+ }
+}
+
+//--------------------------------------------------------------------------------------------------
+// ToGenTree - Convert an "expression" into a gentree node.
+//
+// Arguments:
+// comp Compiler instance to allocate trees
+//
+// Return Values:
+// Returns the gen tree representation for either a constant or a variable or an arrLen operation
+// defined by the "type" member
+//
+GenTreePtr LC_Expr::ToGenTree(Compiler* comp)
+{
+ // Convert to GenTree nodes.
+ switch (type)
+ {
+ case Ident:
+ return ident.ToGenTree(comp);
+ case IdentPlusConst:
+#ifdef _TARGET_64BIT_
+ return comp->gtNewOperNode(GT_ADD, TYP_LONG, ident.ToGenTree(comp), comp->gtNewLconNode(constant));
+#else
+ return comp->gtNewOperNode(GT_ADD, TYP_INT, ident.ToGenTree(comp), comp->gtNewIconNode((ssize_t)constant));
+#endif
+ default:
+ assert(!"Could not convert LC_Expr to GenTree");
+ unreached();
+ break;
+ }
+}
+
+//--------------------------------------------------------------------------------------------------
+// ToGenTree - Convert a "condition" into a gentree node.
+//
+// Arguments:
+// comp Compiler instance to allocate trees
+//
+// Return Values:
+// Returns the gen tree representation for the conditional operator on lhs and rhs trees
+//
+GenTreePtr LC_Condition::ToGenTree(Compiler* comp)
+{
+ return comp->gtNewOperNode(oper, TYP_INT, op1.ToGenTree(comp), op2.ToGenTree(comp));
+}
+
+//--------------------------------------------------------------------------------------------------
+// Evaluates - Evaluate a given loop cloning condition if it can be statically evaluated.
+//
+// Arguments:
+// pResult The evaluation result
+//
+// Return Values:
+// Returns true if the condition can be statically evaluated. If the condition's result
+// is statically unknown then return false. In other words, true if "pResult" is valid.
+//
+bool LC_Condition::Evaluates(bool* pResult)
+{
+ switch (oper)
+ {
+ case GT_EQ:
+ case GT_GE:
+ case GT_LE:
+ // If op1 == op2 then equality should result in true.
+ if (op1 == op2)
+ {
+ *pResult = true;
+ return true;
+ }
+ break;
+
+ case GT_GT:
+ case GT_LT:
+ case GT_NE:
+ // If op1 == op2 then inequality should result in false.
+ if (op1 == op2)
+ {
+ *pResult = false;
+ return true;
+ }
+ break;
+
+ default:
+ // for all other 'oper' kinds, we will return false
+ break;
+ }
+ return false;
+}
+
+//--------------------------------------------------------------------------------------------------
+// Combines - Check whether two conditions would combine to yield a single new condition.
+//
+// Arguments:
+// cond The condition that is checked if it would combine with "*this" condition.
+// newCond The resulting combined condition.
+//
+// Return Values:
+// Returns true if "cond" combines with the "this" condition.
+// "newCond" contains the combines condition.
+//
+// Operation:
+// Check if both conditions are equal. If so, return just 1 of them.
+// Reverse their operators and check if their reversed operands match. If so, return either of them.
+//
+// Notes:
+// This is not a full-fledged expression optimizer, it is supposed
+// to remove redundant conditions that are generated for optimization
+// opportunities. Anything further should be implemented as needed.
+// For example, for (i = beg; i < end; i += inc) a[i]. Then, the conditions
+// would be: "beg >= 0, end <= a.len, inc > 0"
+bool LC_Condition::Combines(const LC_Condition& cond, LC_Condition* newCond)
+{
+ if (oper == cond.oper && op1 == cond.op1 && op2 == cond.op2)
+ {
+ *newCond = *this;
+ return true;
+ }
+ else if ((oper == GT_LT || oper == GT_LE || oper == GT_GT || oper == GT_GE) &&
+ GenTree::ReverseRelop(oper) == cond.oper && op1 == cond.op2 && op2 == cond.op1)
+ {
+ *newCond = *this;
+ return true;
+ }
+ return false;
+}
+
+//--------------------------------------------------------------------------------------------------
+// GetLoopOptInfo - Retrieve the loop opt info candidate array.
+//
+// Arguments:
+// loopNum the loop index.
+//
+// Return Values:
+// Return the optInfo array member. The method doesn't allocate memory.
+//
+ExpandArrayStack<LcOptInfo*>* LoopCloneContext::GetLoopOptInfo(unsigned loopNum)
+{
+ return optInfo[loopNum];
+}
+
+//--------------------------------------------------------------------------------------------------
+// CancelLoopOptInfo - Cancel loop cloning optimization for this loop.
+//
+// Arguments:
+// loopNum the loop index.
+//
+// Return Values:
+// None.
+//
+void LoopCloneContext::CancelLoopOptInfo(unsigned loopNum)
+{
+ JITDUMP("Cancelling loop cloning for loop L_%02u\n", loopNum);
+ optInfo[loopNum] = nullptr;
+ if (conditions[loopNum] != nullptr)
+ {
+ conditions[loopNum]->Reset();
+ conditions[loopNum] = nullptr;
+ }
+}
+
+//--------------------------------------------------------------------------------------------------
+// EnsureLoopOptInfo - Retrieve the loop opt info candidate array, if it is not present, allocate
+// memory.
+//
+// Arguments:
+// loopNum the loop index.
+//
+// Return Values:
+// The array of optimization candidates for the loop.
+//
+ExpandArrayStack<LcOptInfo*>* LoopCloneContext::EnsureLoopOptInfo(unsigned loopNum)
+{
+ if (optInfo[loopNum] == nullptr)
+ {
+ optInfo[loopNum] = new (alloc) ExpandArrayStack<LcOptInfo*>(alloc, 4);
+ }
+ return optInfo[loopNum];
+}
+
+//--------------------------------------------------------------------------------------------------
+// EnsureLoopOptInfo - Retrieve the loop cloning conditions candidate array,
+// if it is not present, allocate memory.
+//
+// Arguments:
+// loopNum the loop index.
+//
+// Return Values:
+// The array of cloning conditions for the loop.
+//
+ExpandArrayStack<LC_Condition>* LoopCloneContext::EnsureConditions(unsigned loopNum)
+{
+ if (conditions[loopNum] == nullptr)
+ {
+ conditions[loopNum] = new (alloc) ExpandArrayStack<LC_Condition>(alloc, 4);
+ }
+ return conditions[loopNum];
+}
+
+//--------------------------------------------------------------------------------------------------
+// GetConditions - Get the cloning conditions array for the loop, no allocation.
+//
+// Arguments:
+// loopNum the loop index.
+//
+// Return Values:
+// The array of cloning conditions for the loop.
+//
+ExpandArrayStack<LC_Condition>* LoopCloneContext::GetConditions(unsigned loopNum)
+{
+ return conditions[loopNum];
+}
+
+//--------------------------------------------------------------------------------------------------
+// EnsureDerefs - Ensure an array of dereferences is created if it doesn't exist.
+//
+// Arguments:
+// loopNum the loop index.
+//
+// Return Values:
+// The array of dereferences for the loop.
+//
+ExpandArrayStack<LC_Array>* LoopCloneContext::EnsureDerefs(unsigned loopNum)
+{
+ if (derefs[loopNum] == nullptr)
+ {
+ derefs[loopNum] = new (alloc) ExpandArrayStack<LC_Array>(alloc, 4);
+ }
+ return derefs[loopNum];
+}
+
+//--------------------------------------------------------------------------------------------------
+// HasBlockConditions - Check if there are block level conditions for the loop.
+//
+// Arguments:
+// loopNum the loop index.
+//
+// Return Values:
+// Return true if there are any block level conditions.
+//
+bool LoopCloneContext::HasBlockConditions(unsigned loopNum)
+{
+ ExpandArrayStack<ExpandArrayStack<LC_Condition>*>* levelCond = blockConditions[loopNum];
+ if (levelCond == nullptr)
+ {
+ return false;
+ }
+
+ // Walk through each block to check if any of them has conditions.
+ for (unsigned i = 0; i < levelCond->Size(); ++i)
+ {
+ if ((*levelCond)[i]->Size() > 0)
+ {
+ return true;
+ }
+ }
+ return false;
+}
+
+//--------------------------------------------------------------------------------------------------
+// GetBlockConditions - Return block level conditions for the loop.
+//
+// Arguments:
+// loopNum the loop index.
+//
+// Return Values:
+// Return block conditions.
+//
+ExpandArrayStack<ExpandArrayStack<LC_Condition>*>* LoopCloneContext::GetBlockConditions(unsigned loopNum)
+{
+ assert(HasBlockConditions(loopNum));
+ return blockConditions[loopNum];
+}
+
+//--------------------------------------------------------------------------------------------------
+// EnsureBlockConditions - Allocate block level conditions for the loop if not exists.
+//
+// Arguments:
+// loopNum the loop index.
+// condBlocks the number of block-level conditions for each loop, corresponding to the blocks
+// created.
+//
+// Return Values:
+// Return block conditions.
+//
+ExpandArrayStack<ExpandArrayStack<LC_Condition>*>* LoopCloneContext::EnsureBlockConditions(unsigned loopNum,
+ unsigned condBlocks)
+{
+ if (blockConditions[loopNum] == nullptr)
+ {
+ blockConditions[loopNum] = new (alloc) ExpandArrayStack<ExpandArrayStack<LC_Condition>*>(alloc, condBlocks);
+ }
+ ExpandArrayStack<ExpandArrayStack<LC_Condition>*>* levelCond = blockConditions[loopNum];
+ for (unsigned i = 0; i < condBlocks; ++i)
+ {
+ levelCond->Set(i, new (alloc) ExpandArrayStack<LC_Condition>(alloc));
+ }
+ return levelCond;
+}
+
+#ifdef DEBUG
+void LoopCloneContext::PrintBlockConditions(unsigned loopNum)
+{
+ ExpandArrayStack<ExpandArrayStack<LC_Condition>*>* levelCond = blockConditions[loopNum];
+ if (levelCond == nullptr || levelCond->Size() == 0)
+ {
+ JITDUMP("No block conditions\n");
+ return;
+ }
+
+ for (unsigned i = 0; i < levelCond->Size(); ++i)
+ {
+ JITDUMP("%d = {", i);
+ for (unsigned j = 0; j < ((*levelCond)[i])->Size(); ++j)
+ {
+ if (j != 0)
+ {
+ JITDUMP(" & ");
+ }
+ (*((*levelCond)[i]))[j].Print();
+ }
+ JITDUMP("}\n");
+ }
+}
+#endif
+
+//--------------------------------------------------------------------------------------------------
+// EvaluateConditions - Evaluate the loop cloning conditions statically, if it can be evaluated.
+//
+// Arguments:
+// loopNum the loop index.
+// pAllTrue all the cloning conditions evaluated to "true" statically.
+// pAnyFalse some cloning condition evaluated to "false" statically.
+// verbose verbose logging required.
+//
+// Return Values:
+// None.
+//
+// Operation:
+// For example, a condition like "V02 >= V02" statically evaluates to true. Caller should detect such
+// conditions and remove them from the "conditions" array.
+//
+// Similarly, conditions like "V02 > V02" will evaluate to "false". In this case caller has to abort
+// loop cloning optimization for the loop. Note that the assumption for conditions is that they will
+// all be "AND"ed, so statically we know we will never take the fast path.
+//
+// Sometimes we simply can't say statically whether "V02 > V01.length" is true or false.
+// In that case, the "pAllTrue" will be false because this condition doesn't evaluate to "true" and
+// "pAnyFalse" could be false if no other condition statically evaluates to "false".
+void LoopCloneContext::EvaluateConditions(unsigned loopNum, bool* pAllTrue, bool* pAnyFalse DEBUGARG(bool verbose))
+{
+ bool allTrue = true;
+ bool anyFalse = false;
+
+ ExpandArrayStack<LC_Condition>& conds = *conditions[loopNum];
+
+ JITDUMP("Evaluating %d loop cloning conditions for loop %d\n", conds.Size(), loopNum);
+
+ assert(conds.Size() > 0);
+ for (unsigned i = 0; i < conds.Size(); ++i)
+ {
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Considering condition %d: (", i);
+ conds[i].Print();
+ }
+#endif
+
+ bool res = false;
+ // Check if this condition evaluates to true or false.
+ if (conds[i].Evaluates(&res))
+ {
+ JITDUMP(") evaluates to %d\n", res);
+ if (!res)
+ {
+ anyFalse = true;
+ return;
+ }
+ }
+ else
+ {
+ JITDUMP("), could not be evaluated\n");
+ allTrue = false;
+ }
+ }
+
+ JITDUMP("Evaluation result allTrue = %d, anyFalse = %d\n", allTrue, anyFalse);
+ *pAllTrue = allTrue;
+ *pAnyFalse = anyFalse;
+}
+
+//--------------------------------------------------------------------------------------------------
+// OptimizeConditions - Evaluate the loop cloning conditions statically, if they can be evaluated
+// then optimize the "conditions" array accordingly.
+//
+// Arguments:
+// conds The conditions array to optimize.
+//
+// Return Values:
+// None.
+//
+// Operation:
+// For example, a condition like "V02 >= V02" statically evaluates to true. Remove such conditions
+// from the "conditions" array.
+//
+// Similarly, conditions like "V02 > V02" will evaluate to "false". In this case abort loop cloning
+// optimization for the loop.
+//
+// Sometimes, two conditions will combine together to yield a single condition, then remove a
+// duplicate condition.
+void LoopCloneContext::OptimizeConditions(ExpandArrayStack<LC_Condition>& conds)
+{
+ for (unsigned i = 0; i < conds.Size(); ++i)
+ {
+ // Check if the conditions evaluate.
+ bool result = false;
+ if (conds[i].Evaluates(&result))
+ {
+ // If statically known to be true, then remove this condition.
+ if (result)
+ {
+ conds.Remove(i);
+ --i;
+ continue;
+ }
+ else
+ {
+ // Some condition is statically false, then simply indicate
+ // not to clone this loop.
+ CancelLoopOptInfo(i);
+ break;
+ }
+ }
+
+ // Check for all other conditions[j], if it would combine with
+ // conditions[i].
+ for (unsigned j = i + 1; j < conds.Size(); ++j)
+ {
+ LC_Condition newCond;
+ if (conds[i].Combines(conds[j], &newCond))
+ {
+ conds.Remove(j);
+ conds[i] = newCond;
+ i = -1;
+ break;
+ }
+ }
+ }
+#ifdef DEBUG
+ // Make sure we didn't miss some combining.
+ for (unsigned i = 0; i < conds.Size(); ++i)
+ {
+ for (unsigned j = 0; j < conds.Size(); ++j)
+ {
+ LC_Condition newCond;
+ if ((i != j) && conds[i].Combines(conds[j], &newCond))
+ {
+ assert(!"Loop cloning conditions can still be optimized further.");
+ }
+ }
+ }
+#endif
+}
+
+//--------------------------------------------------------------------------------------------------
+// OptimizeBlockConditions - Optimize block level conditions.
+//
+// Arguments:
+// loopNum the loop index.
+//
+// Operation:
+// Calls OptimizeConditions helper on block level conditions.
+//
+// Return Values:
+// None.
+//
+void LoopCloneContext::OptimizeBlockConditions(unsigned loopNum DEBUGARG(bool verbose))
+{
+ if (!HasBlockConditions(loopNum))
+ {
+ return;
+ }
+ ExpandArrayStack<ExpandArrayStack<LC_Condition>*>* levelCond = blockConditions[loopNum];
+ for (unsigned i = 0; i < levelCond->Size(); ++i)
+ {
+ OptimizeConditions(*((*levelCond)[i]));
+ }
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("After optimizing block-level cloning conditions\n\t");
+ PrintConditions(loopNum);
+ printf("\n");
+ }
+#endif
+}
+
+//--------------------------------------------------------------------------------------------------
+// OptimizeConditions - Optimize cloning conditions.
+//
+// Arguments:
+// loopNum the loop index.
+// verbose verbose logging required.
+//
+// Operation:
+// Calls OptimizeConditions helper on cloning conditions.
+//
+// Return Values:
+// None.
+//
+void LoopCloneContext::OptimizeConditions(unsigned loopNum DEBUGARG(bool verbose))
+{
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Before optimizing cloning conditions\n\t");
+ PrintConditions(loopNum);
+ printf("\n");
+ }
+#endif
+ ExpandArrayStack<LC_Condition>& conds = *conditions[loopNum];
+ OptimizeConditions(conds);
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("After optimizing cloning conditions\n\t");
+ PrintConditions(loopNum);
+ printf("\n");
+ }
+#endif
+}
+
+#ifdef DEBUG
+//--------------------------------------------------------------------------------------------------
+// PrintConditions - Print loop cloning conditions necessary to clone the loop.
+//
+// Arguments:
+// loopNum the loop index.
+//
+// Return Values:
+// None.
+//
+void LoopCloneContext::PrintConditions(unsigned loopNum)
+{
+ if (conditions[loopNum] == nullptr)
+ {
+ JITDUMP("NO conditions");
+ return;
+ }
+ if (conditions[loopNum]->Size() == 0)
+ {
+ JITDUMP("Conditions were optimized away! Will always take cloned path.");
+ }
+ for (unsigned i = 0; i < conditions[loopNum]->Size(); ++i)
+ {
+ if (i != 0)
+ {
+ JITDUMP(" & ");
+ }
+ (*conditions[loopNum])[i].Print();
+ }
+}
+#endif
+
+//--------------------------------------------------------------------------------------------------
+// CondToStmtInBlock - Convert an array of conditions. Evaluate them into a JTRUE stmt and add it to
+// the block
+//
+// Arguments:
+// comp Compiler instance
+// conds Array of conditions to evaluate into a JTRUE stmt
+// block Block to insert the stmt into
+// reverse Reverse conditions if true.
+//
+// Note:
+// The condition that will be generated: jmpTrue(cond1 & cond2 ... == 0)
+//
+// Return Values:
+// None.
+//
+void LoopCloneContext::CondToStmtInBlock(Compiler* comp,
+ ExpandArrayStack<LC_Condition>& conds,
+ BasicBlock* block,
+ bool reverse)
+{
+ noway_assert(conds.Size() > 0);
+
+ // Get the first condition.
+ GenTreePtr cond = conds[0].ToGenTree(comp);
+ for (unsigned i = 1; i < conds.Size(); ++i)
+ {
+ // Append all conditions using AND operator.
+ cond = comp->gtNewOperNode(GT_AND, TYP_INT, cond, conds[i].ToGenTree(comp));
+ }
+
+ // Add "cond == 0" node
+ cond = comp->gtNewOperNode(reverse ? GT_NE : GT_EQ, TYP_INT, cond, comp->gtNewIconNode(0));
+
+ // Add jmpTrue "cond == 0" to slow path.
+ GenTreePtr stmt = comp->fgNewStmtFromTree(comp->gtNewOperNode(GT_JTRUE, TYP_VOID, cond));
+
+ // Add stmt to the block.
+ comp->fgInsertStmtAtEnd(block, stmt);
+
+ // Remorph.
+ comp->fgMorphBlockStmt(block, stmt DEBUGARG("Loop cloning condition"));
+}
+
+//--------------------------------------------------------------------------------------------------
+// Lcl - the current node's local variable.
+//
+// Arguments:
+// None.
+//
+// Operation:
+// If level is 0, then just return the array base. Else return the index variable on dim 'level'
+//
+// Return Values:
+// The local variable in the node's level.
+//
+unsigned LC_Deref::Lcl()
+{
+ unsigned lvl = level;
+ if (lvl == 0)
+ {
+ return array.arrIndex->arrLcl;
+ }
+ lvl--;
+ return array.arrIndex->indLcls[lvl];
+}
+
+//--------------------------------------------------------------------------------------------------
+// HasChildren - Check if there are children to 'this' node.
+//
+// Arguments:
+// None.
+//
+// Return Values:
+// Return true if children are present.
+//
+bool LC_Deref::HasChildren()
+{
+ return children != nullptr && children->Size() > 0;
+}
+
+//--------------------------------------------------------------------------------------------------
+// DeriveLevelConditions - Generate conditions for each level of the tree.
+//
+// Arguments:
+// conds An array of conditions for each level i.e., (level x conditions). This array will
+// contain the conditions for the tree at the end of the method.
+//
+// Operation:
+// level0 yields only (a != null) condition. All other levels yield two conditions:
+// (level < a[...].length && a[...][level] != null)
+//
+// Return Values:
+// None
+//
+void LC_Deref::DeriveLevelConditions(ExpandArrayStack<ExpandArrayStack<LC_Condition>*>* conds)
+{
+ if (level == 0)
+ {
+ // For level 0, just push (a != null).
+ (*conds)[level]->Push(
+ LC_Condition(GT_NE, LC_Expr(LC_Ident(Lcl(), LC_Ident::Var)), LC_Expr(LC_Ident(LC_Ident::Null))));
+ }
+ else
+ {
+ // Adjust for level0 having just 1 condition and push condition (i < a.len).
+ LC_Array arrLen = array;
+ arrLen.oper = LC_Array::ArrLen;
+ arrLen.dim = level - 1;
+ (*conds)[level * 2 - 1]->Push(
+ LC_Condition(GT_LT, LC_Expr(LC_Ident(Lcl(), LC_Ident::Var)), LC_Expr(LC_Ident(arrLen))));
+
+ // Push condition (a[i] != null)
+ LC_Array arrTmp = array;
+ arrTmp.dim = level;
+ (*conds)[level * 2]->Push(LC_Condition(GT_NE, LC_Expr(LC_Ident(arrTmp)), LC_Expr(LC_Ident(LC_Ident::Null))));
+ }
+
+ // Invoke on the children recursively.
+ if (HasChildren())
+ {
+ for (unsigned i = 0; i < children->Size(); ++i)
+ {
+ (*children)[i]->DeriveLevelConditions(conds);
+ }
+ }
+}
+
+//--------------------------------------------------------------------------------------------------
+// EnsureChildren - Create an array of child nodes if nullptr.
+//
+// Arguments:
+// alloc IAllocator instance
+//
+// Return Values:
+// None
+//
+void LC_Deref::EnsureChildren(IAllocator* alloc)
+{
+ if (children == nullptr)
+ {
+ children = new (alloc) ExpandArrayStack<LC_Deref*>(alloc);
+ }
+}
+
+//--------------------------------------------------------------------------------------------------
+// Find - Find the node representing the local variable in child nodes of the 'this' node.
+//
+// Arguments:
+// lcl the local to find in the children array
+//
+// Return Values:
+// The child node if found or nullptr.
+//
+LC_Deref* LC_Deref::Find(unsigned lcl)
+{
+ return Find(children, lcl);
+}
+
+//--------------------------------------------------------------------------------------------------
+// Find - Find the node representing the local variable in a list of nodes.
+//
+// Arguments:
+// lcl the local to find.
+// children the list of nodes to find the node representing the lcl.
+//
+// Return Values:
+// The node if found or nullptr.
+//
+
+// static
+LC_Deref* LC_Deref::Find(ExpandArrayStack<LC_Deref*>* children, unsigned lcl)
+{
+ if (children == nullptr)
+ {
+ return nullptr;
+ }
+ for (unsigned i = 0; i < children->Size(); ++i)
+ {
+ if ((*children)[i]->Lcl() == lcl)
+ {
+ return (*children)[i];
+ }
+ }
+ return nullptr;
+}
diff --git a/src/jit/loopcloning.h b/src/jit/loopcloning.h
new file mode 100644
index 0000000000..40793afcf1
--- /dev/null
+++ b/src/jit/loopcloning.h
@@ -0,0 +1,667 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX LoopCloning XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+
+ Loop cloning optimizations comprise of the following steps:
+ - Loop detection logic which is existing logic in the JIT that records
+ loop information with loop flags.
+ - The next step is to identify loop optimization candidates. This is done
+ by optObtainLoopCloningOpts. The loop context variable is updated with
+ all the necessary information (for ex: block, stmt, tree information)
+ to do the optimization later.
+ a) This involves checking if the loop is well-formed with respect to
+ the optimization being performed.
+ b) In array bounds check case, reconstructing the morphed GT_INDEX
+ nodes back to their array representation.
+ i) The array index is stored in the "context" variable with
+ additional block, tree, stmt info.
+ - Once the optimization candidates are identified, we derive cloning conditions
+ For ex: to clone a simple "for (i=0; i<n; ++i) { a[i] }" loop, we need the
+ following conditions:
+ (a != null) && ((n >= 0) & (n <= a.length) & (stride > 0))
+ a) Note the short circuit AND for (a != null). These are called block
+ conditions or deref-conditions since these conditions need to be in their
+ own blocks to be able to short-circuit.
+ i) For a doubly nested loop on i, j, we would then have
+ conditions like
+ (a != null) && (i < a.len) && (a[i] != null) && (j < a[i].len)
+ all short-circuiting creating blocks.
+
+ Advantage:
+ All conditions are checked before we enter the fast path. So fast
+ path gets as fast as it can be.
+
+ Disadvantage:
+ Creation of blocks.
+
+ Heuristic:
+ Therefore we will not clone if we exceed creating 4 blocks.
+
+ b) The other conditions called cloning conditions are transformed into LC_Condition
+ structs which are then optimized.
+ i) Optimization of conditions involves removing redundant condition checks.
+ ii) If some conditions evaluate to true statically, then they are removed.
+ iii) If any condition evaluates to false statically, then loop cloning is
+ aborted for that loop.
+ - Then the block splitting occurs and loop cloning conditions is transformed into
+ GenTree and added to the loop cloning choice block.
+
+ Preconditions
+ - Loop detection should have completed and the loop table should be
+ populated with the loop dscs.
+ - The loops that will be considered are the ones with the LPFLG_ITER
+ marked on them.
+
+ Limitations
+ - For array based optimizations the loop choice condition is checked
+ before the loop body. This implies that the loop initializer statement
+ has not executed at the time of the check. So any loop cloning condition
+ involving the initial value of the loop counter cannot be condition checked
+ as it hasn't been assigned yet at the time of condition checking. Therefore
+ the initial value has to be statically known. This can be fixed with further
+ effort.
+
+ Assumption
+ - The assumption is that the optimization candidates collected during the
+ identification phase will be the ones that will be optimized. In other words,
+ the loop that is present originally will be the fast path. Explicitly, the cloned
+ path will be the slow path and will be unoptimized. This allows us to
+ collect additional information at the same time of identifying the optimization
+ candidates. This later helps us to perform the optimizations during actual cloning.
+ - All loop cloning choice conditions will automatically be "AND"-ed. These are
+ bitwise AND operations.
+ - Perform short circuit AND for (array != null) side effect check
+ before hoisting (limit <= a.length) check.
+ For ex: to clone a simple "for (i=0; i<n; ++i) { a[i] }" loop, we need the
+ following conditions:
+ (a != null) && ((n >= 0) & (n <= a.length) & (stride > 0))
+
+*/
+#pragma once
+
+class Compiler;
+
+/**
+ *
+ * Represents an array access and associated bounds checks.
+ * Array access is required have the array and indices in local variables.
+ * This struct is constructed using a GT_INDEX node that is broken into
+ * its sub trees.
+ *
+ */
+struct ArrIndex
+{
+ unsigned arrLcl; // The array base local num
+ ExpandArrayStack<unsigned> indLcls; // The indices local nums
+ ExpandArrayStack<GenTree*> bndsChks; // The bounds checks nodes along each dimension.
+ unsigned rank; // Rank of the array
+ BasicBlock* useBlock; // Block where the [] occurs
+
+ ArrIndex(IAllocator* alloc) : arrLcl(BAD_VAR_NUM), indLcls(alloc), bndsChks(alloc), rank(0), useBlock(nullptr)
+ {
+ }
+
+#ifdef DEBUG
+ void Print(unsigned dim = -1)
+ {
+ printf("V%02d", arrLcl);
+ for (unsigned i = 0; i < ((dim == -1) ? rank : dim); ++i)
+ {
+ printf("[V%02d]", indLcls.GetRef(i));
+ }
+ }
+#endif
+};
+
+// Forward declarations
+#define LC_OPT(en) struct en##OptInfo;
+#include "loopcloningopts.h"
+
+/**
+ *
+ * LcOptInfo represents the optimization information for loop cloning,
+ * other classes are supposed to derive from this base class.
+ *
+ * Example usage:
+ * LcMdArrayOptInfo is multi-dimensional array optimization for which the
+ * loop can be cloned.
+ * LcArrIndexOptInfo is a jagged array optimization for which the loop
+ * can be cloned.
+ *
+ * So LcOptInfo represents any type of optimization opportunity that
+ * occurs in a loop and the metadata for the optimization is stored in
+ * this class.
+ */
+struct LcOptInfo
+{
+ enum OptType
+ {
+#undef LC_OPT
+#define LC_OPT(en) en,
+#include "loopcloningopts.h"
+ };
+
+ void* optInfo;
+ OptType optType;
+ LcOptInfo(void* optInfo, OptType optType) : optInfo(optInfo), optType(optType)
+ {
+ }
+
+ OptType GetOptType()
+ {
+ return optType;
+ }
+#undef LC_OPT
+#define LC_OPT(en) \
+ en##OptInfo* As##en##OptInfo() \
+ { \
+ assert(optType == en); \
+ return reinterpret_cast<en##OptInfo*>(this); \
+ }
+#include "loopcloningopts.h"
+};
+
+/**
+ *
+ * Optimization info for a multi-dimensional array.
+ */
+struct LcMdArrayOptInfo : public LcOptInfo
+{
+ GenTreeArrElem* arrElem; // "arrElem" node of an MD array.
+ unsigned dim; // "dim" represents upto what level of the rank this optimization applies to.
+ // For example, a[i,j,k] could be the MD array "arrElem" but if "dim" is 2,
+ // then this node is treated as though it were a[i,j]
+ ArrIndex* index; // "index" cached computation in the form of an ArrIndex representation.
+
+ LcMdArrayOptInfo(GenTreeArrElem* arrElem, unsigned dim)
+ : LcOptInfo(this, LcMdArray), arrElem(arrElem), dim(dim), index(nullptr)
+ {
+ }
+
+ ArrIndex* GetArrIndexForDim(IAllocator* alloc)
+ {
+ if (index == nullptr)
+ {
+ index = new (alloc) ArrIndex(alloc);
+ index->rank = arrElem->gtArrRank;
+ for (unsigned i = 0; i < dim; ++i)
+ {
+ index->indLcls.Push(arrElem->gtArrInds[i]->gtLclVarCommon.gtLclNum);
+ }
+ index->arrLcl = arrElem->gtArrObj->gtLclVarCommon.gtLclNum;
+ }
+ return index;
+ }
+};
+
+/**
+ *
+ * Optimization info for a jagged array.
+ */
+struct LcJaggedArrayOptInfo : public LcOptInfo
+{
+ unsigned dim; // "dim" represents upto what level of the rank this optimization applies to.
+ // For example, a[i][j][k] could be the jagged array but if "dim" is 2,
+ // then this node is treated as though it were a[i][j]
+ ArrIndex arrIndex; // ArrIndex representation of the array.
+ GenTreePtr stmt; // "stmt" where the optimization opportunity occurs.
+
+ LcJaggedArrayOptInfo(ArrIndex& arrIndex, unsigned dim, GenTreePtr stmt)
+ : LcOptInfo(this, LcJaggedArray), dim(dim), arrIndex(arrIndex), stmt(stmt)
+ {
+ }
+};
+
+/**
+ *
+ * Symbolic representation of a.length, or a[i][j].length or a[i,j].length and so on.
+ * OperType decides whether "arrLength" is invoked on the array or if it is just an array.
+ */
+struct LC_Array
+{
+ enum ArrType
+ {
+ Invalid,
+ Jagged,
+ MdArray
+ };
+
+ enum OperType
+ {
+ None,
+ ArrLen,
+ };
+
+ ArrType type; // The type of the array on which to invoke length operator.
+ ArrIndex* arrIndex; // ArrIndex representation of this array.
+
+ OperType oper;
+
+#ifdef DEBUG
+ void Print()
+ {
+ arrIndex->Print(dim);
+ if (oper == ArrLen)
+ {
+ printf(".Length");
+ }
+ }
+#endif
+
+ int dim; // "dim" = which index to invoke arrLen on, if -1 invoke on the whole array
+ // Example 1: a[0][1][2] and dim = 2 implies a[0][1].length
+ // Example 2: a[0][1][2] and dim = -1 implies a[0][1][2].length
+ LC_Array() : type(Invalid), dim(-1)
+ {
+ }
+ LC_Array(ArrType type, ArrIndex* arrIndex, int dim, OperType oper)
+ : type(type), arrIndex(arrIndex), oper(oper), dim(dim)
+ {
+ }
+
+ LC_Array(ArrType type, ArrIndex* arrIndex, OperType oper) : type(type), arrIndex(arrIndex), oper(oper), dim(-1)
+ {
+ }
+
+ // Equality operator
+ bool operator==(const LC_Array& that) const
+ {
+ assert(type != Invalid && that.type != Invalid);
+
+ // Types match and the array base matches.
+ if (type != that.type || arrIndex->arrLcl != that.arrIndex->arrLcl || oper != that.oper)
+ {
+ return false;
+ }
+
+ // If the dim ranks are not matching, quit.
+ int rank1 = GetDimRank();
+ int rank2 = that.GetDimRank();
+ if (rank1 != rank2)
+ {
+ return false;
+ }
+
+ // Check for the indices.
+ for (int i = 0; i < rank1; ++i)
+ {
+ if (arrIndex->indLcls[i] != that.arrIndex->indLcls[i])
+ {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ // The max dim on which length is invoked.
+ int GetDimRank() const
+ {
+ return (dim < 0) ? (int)arrIndex->rank : dim;
+ }
+
+ // Get a tree representation for this symbolic a.length
+ GenTreePtr ToGenTree(Compiler* comp);
+};
+
+/**
+ *
+ * Symbolic representation of either a constant like 1, 2 or a variable V02, V03 etc. or an "LC_Array" or the null
+ * constant.
+ */
+struct LC_Ident
+{
+ enum IdentType
+ {
+ Invalid,
+ Const,
+ Var,
+ ArrLen,
+ Null,
+ };
+
+ INT64 constant; // The constant value if this node is of type "Const", or the lcl num if "Var"
+ LC_Array arrLen; // The LC_Array if the type is "ArrLen"
+ IdentType type; // The type of this object
+
+ // Equality operator
+ bool operator==(const LC_Ident& that) const
+ {
+ switch (type)
+ {
+ case Const:
+ case Var:
+ return (type == that.type) && constant == that.constant;
+ case ArrLen:
+ return (type == that.type) && (arrLen == that.arrLen);
+ case Null:
+ return (type == that.type);
+ default:
+ assert(!"Unknown LC_Ident type");
+ unreached();
+ }
+ }
+
+#ifdef DEBUG
+ void Print()
+ {
+ switch (type)
+ {
+ case Const:
+ printf("%I64d", constant);
+ break;
+ case Var:
+ printf("V%02d", constant);
+ break;
+ case ArrLen:
+ arrLen.Print();
+ break;
+ case Null:
+ printf("null");
+ break;
+ default:
+ assert(false);
+ break;
+ }
+ }
+#endif
+
+ LC_Ident() : type(Invalid)
+ {
+ }
+ LC_Ident(INT64 constant, IdentType type) : constant(constant), type(type)
+ {
+ }
+ explicit LC_Ident(IdentType type) : type(type)
+ {
+ }
+ explicit LC_Ident(const LC_Array& arrLen) : arrLen(arrLen), type(ArrLen)
+ {
+ }
+
+ // Convert this symbolic representation into a tree node.
+ GenTreePtr ToGenTree(Compiler* comp);
+};
+
+/**
+ *
+ * Symbolic representation of an expr that involves an "LC_Ident" or an "LC_Ident - constant"
+ */
+struct LC_Expr
+{
+ enum ExprType
+ {
+ Invalid,
+ Ident,
+ IdentPlusConst
+ };
+
+ LC_Ident ident;
+ INT64 constant;
+ ExprType type;
+
+ // Equality operator
+ bool operator==(const LC_Expr& that) const
+ {
+ assert(type != Invalid && that.type != Invalid);
+
+ // If the types don't match quit.
+ if (type != that.type)
+ {
+ return false;
+ }
+
+ // If the type involves arithmetic, the constant should match.
+ if (type == IdentPlusConst && constant != that.constant)
+ {
+ return false;
+ }
+
+ // Check if the ident match.
+ return (ident == that.ident);
+ }
+
+#ifdef DEBUG
+ void Print()
+ {
+ if (type == IdentPlusConst)
+ {
+ printf("(%I64d - ", constant);
+ ident.Print();
+ printf(")");
+ }
+ else
+ {
+ ident.Print();
+ }
+ }
+#endif
+
+ LC_Expr() : type(Invalid)
+ {
+ }
+ explicit LC_Expr(const LC_Ident& ident) : ident(ident), type(Ident)
+ {
+ }
+ LC_Expr(const LC_Ident& ident, INT64 constant) : ident(ident), constant(constant), type(IdentPlusConst)
+ {
+ }
+
+ // Convert LC_Expr into a tree node.
+ GenTreePtr ToGenTree(Compiler* comp);
+};
+
+/**
+ *
+ * Symbolic representation of a conditional operation involving two "LC_Expr":
+ * LC_Expr < LC_Expr, for example: i > 0, i < a.length
+ */
+struct LC_Condition
+{
+ LC_Expr op1;
+ LC_Expr op2;
+ genTreeOps oper;
+
+#ifdef DEBUG
+ void Print()
+ {
+ op1.Print();
+ printf(" %s ", GenTree::NodeName(oper));
+ op2.Print();
+ }
+#endif
+
+ // Check if the condition evaluates statically to true or false, i < i => false, a.length > 0 => true
+ // The result is put in "pResult" parameter and is valid if the method returns "true". Otherwise, the
+ // condition could not be evaluated.
+ bool Evaluates(bool* pResult);
+
+ // Check if two conditions can be combined to yield one condition.
+ bool Combines(const LC_Condition& cond, LC_Condition* newCond);
+
+ LC_Condition()
+ {
+ }
+ LC_Condition(genTreeOps oper, const LC_Expr& op1, const LC_Expr& op2) : op1(op1), op2(op2), oper(oper)
+ {
+ }
+
+ // Convert this conditional operation into a GenTree.
+ GenTreePtr ToGenTree(Compiler* comp);
+};
+
+/**
+ * A deref tree of an array expression.
+ * a[i][j][k], b[i] and a[i][y][k] are the occurrences in the loop, then, the tree would be:
+ * a => {
+ * i => {
+ * j => {
+ * k => {}
+ * },
+ * y => {
+ * k => {}
+ * },
+ * }
+ * },
+ * b => {
+ * i => {}
+ * }
+ */
+struct LC_Deref
+{
+ const LC_Array array;
+ ExpandArrayStack<LC_Deref*>* children;
+
+ unsigned level;
+
+ LC_Deref(const LC_Array& array, unsigned level) : array(array), children(nullptr), level(level)
+ {
+ }
+
+ LC_Deref* Find(unsigned lcl);
+
+ unsigned Lcl();
+
+ bool HasChildren();
+ void EnsureChildren(IAllocator* alloc);
+ static LC_Deref* Find(ExpandArrayStack<LC_Deref*>* children, unsigned lcl);
+
+ void DeriveLevelConditions(ExpandArrayStack<ExpandArrayStack<LC_Condition>*>* len);
+#ifdef DEBUG
+ void Print(unsigned indent = 0)
+ {
+ unsigned tab = 4 * indent;
+ printf("%*s%d,%d => {", tab, "", Lcl(), level);
+ if (children != nullptr)
+ {
+ for (unsigned i = 0; i < children->Size(); ++i)
+ {
+ if (i > 0)
+ {
+ printf(",");
+ }
+ printf("\n");
+#ifdef _MSC_VER
+ (*children)[i]->Print(indent + 1);
+#else // _MSC_VER
+ (*((ExpandArray<LC_Deref*>*)children))[i]->Print(indent + 1);
+#endif // _MSC_VER
+ }
+ }
+ printf("\n%*s}", tab, "");
+ }
+#endif
+};
+
+/**
+ *
+ * The "context" represents data that is used for making loop-cloning decisions.
+ * - The data is the collection of optimization opportunities
+ * - and the conditions (LC_Condition) that decide between the fast
+ * path or the slow path.
+ *
+ * BNF for LC_Condition:
+ * LC_Condition : LC_Expr genTreeOps LC_Expr
+ * LC_Expr : LC_Ident | LC_Ident + Constant
+ * LC_Ident : Constant | Var | LC_Array
+ * LC_Array : .
+ * genTreeOps : GT_GE | GT_LE | GT_GT | GT_LT
+ *
+ */
+struct LoopCloneContext
+{
+ IAllocator* alloc; // The allocator
+ ExpandArrayStack<LcOptInfo*>** optInfo; // The array of optimization opportunities found in each loop. (loop x
+ // optimization-opportunities)
+ ExpandArrayStack<LC_Condition>** conditions; // The array of conditions that influence which path to take for each
+ // loop. (loop x cloning-conditions)
+ ExpandArrayStack<LC_Array>** derefs; // The array of dereference conditions found in each loop. (loop x
+ // deref-conditions)
+ ExpandArrayStack<ExpandArrayStack<LC_Condition>*>** blockConditions; // The array of block levels of conditions for
+ // each loop. (loop x level x conditions)
+
+ LoopCloneContext(unsigned loopCount, IAllocator* alloc) : alloc(alloc)
+ {
+ optInfo = new (alloc) ExpandArrayStack<LcOptInfo*>*[loopCount];
+ conditions = new (alloc) ExpandArrayStack<LC_Condition>*[loopCount];
+ derefs = new (alloc) ExpandArrayStack<LC_Array>*[loopCount];
+ blockConditions = new (alloc) ExpandArrayStack<ExpandArrayStack<LC_Condition>*>*[loopCount];
+ for (unsigned i = 0; i < loopCount; ++i)
+ {
+ optInfo[i] = nullptr;
+ conditions[i] = nullptr;
+ derefs[i] = nullptr;
+ blockConditions[i] = nullptr;
+ }
+ }
+
+ // Evaluate conditions into a JTRUE stmt and put it in the block. Reverse condition if 'reverse' is true.
+ void CondToStmtInBlock(Compiler* comp, ExpandArrayStack<LC_Condition>& conds, BasicBlock* block, bool reverse);
+
+ // Get all the optimization information for loop "loopNum"; This information is held in "optInfo" array.
+ // If NULL this allocates the optInfo[loopNum] array for "loopNum"
+ ExpandArrayStack<LcOptInfo*>* EnsureLoopOptInfo(unsigned loopNum);
+
+ // Get all the optimization information for loop "loopNum"; This information is held in "optInfo" array.
+ // If NULL this does not allocate the optInfo[loopNum] array for "loopNum"
+ ExpandArrayStack<LcOptInfo*>* GetLoopOptInfo(unsigned loopNum);
+
+ // Cancel all optimizations for loop "loopNum" by clearing out the "conditions" member if non-null
+ // and setting the optInfo to "null.", If "null", then the user of this class is not supposed to
+ // clone this loop.
+ void CancelLoopOptInfo(unsigned loopNum);
+
+ // Get the conditions that decide which loop to take for "loopNum." If NULL allocate an empty array.
+ ExpandArrayStack<LC_Condition>* EnsureConditions(unsigned loopNum);
+
+ // Get the conditions for loop. No allocation is performed.
+ ExpandArrayStack<LC_Condition>* GetConditions(unsigned loopNum);
+
+ // Ensure that the "deref" conditions array is allocated.
+ ExpandArrayStack<LC_Array>* EnsureDerefs(unsigned loopNum);
+
+ // Get block conditions for each loop, no allocation is performed.
+ ExpandArrayStack<ExpandArrayStack<LC_Condition>*>* GetBlockConditions(unsigned loopNum);
+
+ // Ensure that the block condition is present, if not allocate space.
+ ExpandArrayStack<ExpandArrayStack<LC_Condition>*>* EnsureBlockConditions(unsigned loopNum, unsigned totalBlocks);
+
+ // Print the block conditions for the loop.
+ void PrintBlockConditions(unsigned loopNum);
+
+ // Does the loop have block conditions?
+ bool HasBlockConditions(unsigned loopNum);
+
+ // Evaluate the conditions for "loopNum" and indicate if they are either all true or any of them are false.
+ // "pAllTrue" implies all the conditions are statically known to be true.
+ // "pAnyFalse" implies at least one condition is statically known to be false.
+ // If neither of them are true, then some conditions' evaluations are statically unknown.
+ //
+ // If all conditions yield true, then the caller doesn't need to clone the loop, but it can perform
+ // fast path optimizations.
+ // If any condition yields false, then the caller needs to abort cloning the loop (neither clone nor
+ // fast path optimizations.)
+ //
+ // Assumes the conditions involve an AND join operator.
+ void EvaluateConditions(unsigned loopNum, bool* pAllTrue, bool* pAnyFalse DEBUGARG(bool verbose));
+
+private:
+ void OptimizeConditions(ExpandArrayStack<LC_Condition>& conds);
+
+public:
+ // Optimize conditions to remove redundant conditions.
+ void OptimizeConditions(unsigned loopNum DEBUGARG(bool verbose));
+
+ void OptimizeBlockConditions(unsigned loopNum DEBUGARG(bool verbose));
+
+#ifdef DEBUG
+ void PrintConditions(unsigned loopNum);
+#endif
+};
diff --git a/src/jit/loopcloningopts.h b/src/jit/loopcloningopts.h
new file mode 100644
index 0000000000..9048a41a14
--- /dev/null
+++ b/src/jit/loopcloningopts.h
@@ -0,0 +1,16 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+//
+/*****************************************************************************/
+
+#ifndef LC_OPT
+#error Define LC_OPT before including this file.
+#endif
+
+// Types of Loop Cloning based optimizations.
+LC_OPT(LcMdArray)
+LC_OPT(LcJaggedArray)
+
+#undef LC_OPT
diff --git a/src/jit/lower.cpp b/src/jit/lower.cpp
new file mode 100644
index 0000000000..09eb9146ac
--- /dev/null
+++ b/src/jit/lower.cpp
@@ -0,0 +1,4196 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX Lower XX
+XX XX
+XX Preconditions: XX
+XX XX
+XX Postconditions (for the nodes currently handled): XX
+XX - All operands requiring a register are explicit in the graph XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+#ifndef LEGACY_BACKEND // This file is ONLY used for the RyuJIT backend that uses the linear scan register allocator
+
+#include "lower.h"
+
+#if !defined(_TARGET_64BIT_)
+#include "decomposelongs.h"
+#endif // !defined(_TARGET_64BIT_)
+
+//------------------------------------------------------------------------
+// MakeSrcContained: Make "childNode" a contained node
+//
+// Arguments:
+// parentNode - is a non-leaf node that can contain its 'childNode'
+// childNode - is an op that will now be contained by its parent.
+//
+// Notes:
+// If 'childNode' it has any existing sources, they will now be sources for the parent.
+//
+void Lowering::MakeSrcContained(GenTreePtr parentNode, GenTreePtr childNode)
+{
+ assert(!parentNode->OperIsLeaf());
+ int srcCount = childNode->gtLsraInfo.srcCount;
+ assert(srcCount >= 0);
+ m_lsra->clearOperandCounts(childNode);
+ assert(parentNode->gtLsraInfo.srcCount > 0);
+ parentNode->gtLsraInfo.srcCount += srcCount - 1;
+}
+
+//------------------------------------------------------------------------
+// CheckImmedAndMakeContained: Checks if the 'childNode' is a containable immediate
+// and, if so, makes it contained.
+//
+// Arguments:
+// parentNode - is any non-leaf node
+// childNode - is an child op of 'parentNode'
+//
+// Return value:
+// true if we are able to make childNode a contained immediate
+//
+bool Lowering::CheckImmedAndMakeContained(GenTree* parentNode, GenTree* childNode)
+{
+ assert(!parentNode->OperIsLeaf());
+ // If childNode is a containable immediate
+ if (IsContainableImmed(parentNode, childNode))
+ {
+ // then make it contained within the parentNode
+ MakeSrcContained(parentNode, childNode);
+ return true;
+ }
+ return false;
+}
+
+//------------------------------------------------------------------------
+// IsSafeToContainMem: Checks for conflicts between childNode and parentNode,
+// and returns 'true' iff memory operand childNode can be contained in parentNode.
+//
+// Arguments:
+// parentNode - any non-leaf node
+// childNode - some node that is an input to `parentNode`
+//
+// Return value:
+// true if it is safe to make childNode a contained memory operand.
+//
+bool Lowering::IsSafeToContainMem(GenTree* parentNode, GenTree* childNode)
+{
+ m_scratchSideEffects.Clear();
+ m_scratchSideEffects.AddNode(comp, childNode);
+
+ for (GenTree* node = childNode->gtNext; node != parentNode; node = node->gtNext)
+ {
+ if (m_scratchSideEffects.InterferesWith(comp, node, false))
+ {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+//------------------------------------------------------------------------
+
+// This is the main entry point for Lowering.
+GenTree* Lowering::LowerNode(GenTree* node)
+{
+ assert(node != nullptr);
+ switch (node->gtOper)
+ {
+ case GT_IND:
+ TryCreateAddrMode(LIR::Use(BlockRange(), &node->gtOp.gtOp1, node), true);
+ break;
+
+ case GT_STOREIND:
+ LowerStoreInd(node);
+ break;
+
+ case GT_ADD:
+ return LowerAdd(node);
+
+ case GT_UDIV:
+ case GT_UMOD:
+ LowerUnsignedDivOrMod(node);
+ break;
+
+ case GT_DIV:
+ case GT_MOD:
+ return LowerSignedDivOrMod(node);
+
+ case GT_SWITCH:
+ return LowerSwitch(node);
+
+ case GT_CALL:
+ LowerCall(node);
+ break;
+
+ case GT_JMP:
+ LowerJmpMethod(node);
+ break;
+
+ case GT_RETURN:
+ LowerRet(node);
+ break;
+
+ case GT_CAST:
+ LowerCast(node);
+ break;
+
+ case GT_ARR_ELEM:
+ return LowerArrElem(node);
+
+ case GT_ROL:
+ case GT_ROR:
+ LowerRotate(node);
+ break;
+
+ case GT_STORE_BLK:
+ case GT_STORE_OBJ:
+ case GT_STORE_DYN_BLK:
+ LowerBlockStore(node->AsBlk());
+ break;
+
+#ifdef FEATURE_SIMD
+ case GT_SIMD:
+ if (node->TypeGet() == TYP_SIMD12)
+ {
+ // GT_SIMD node requiring to produce TYP_SIMD12 in fact
+ // produces a TYP_SIMD16 result
+ node->gtType = TYP_SIMD16;
+ }
+ break;
+
+ case GT_LCL_VAR:
+ case GT_STORE_LCL_VAR:
+ if (node->TypeGet() == TYP_SIMD12)
+ {
+#ifdef _TARGET_64BIT_
+ // Assumption 1:
+ // RyuJit backend depends on the assumption that on 64-Bit targets Vector3 size is rounded off
+ // to TARGET_POINTER_SIZE and hence Vector3 locals on stack can be treated as TYP_SIMD16 for
+ // reading and writing purposes.
+ //
+ // Assumption 2:
+ // RyuJit backend is making another implicit assumption that Vector3 type args when passed in
+ // registers or on stack, the upper most 4-bytes will be zero.
+ //
+ // For P/Invoke return and Reverse P/Invoke argument passing, native compiler doesn't guarantee
+ // that upper 4-bytes of a Vector3 type struct is zero initialized and hence assumption 2 is
+ // invalid.
+ //
+ // RyuJIT x64 Windows: arguments are treated as passed by ref and hence read/written just 12
+ // bytes. In case of Vector3 returns, Caller allocates a zero initialized Vector3 local and
+ // passes it retBuf arg and Callee method writes only 12 bytes to retBuf. For this reason,
+ // there is no need to clear upper 4-bytes of Vector3 type args.
+ //
+ // RyuJIT x64 Unix: arguments are treated as passed by value and read/writen as if TYP_SIMD16.
+ // Vector3 return values are returned two return registers and Caller assembles them into a
+ // single xmm reg. Hence RyuJIT explicitly generates code to clears upper 4-bytes of Vector3
+ // type args in prolog and Vector3 type return value of a call
+ node->gtType = TYP_SIMD16;
+#else
+ NYI("Lowering of TYP_SIMD12 locals");
+#endif // _TARGET_64BIT_
+ }
+#endif // FEATURE_SIMD
+ __fallthrough;
+
+ case GT_STORE_LCL_FLD:
+ // TODO-1stClassStructs: Once we remove the requirement that all struct stores
+ // are block stores (GT_STORE_BLK or GT_STORE_OBJ), here is where we would put the local
+ // store under a block store if codegen will require it.
+ if (node->OperIsStore() && (node->TypeGet() == TYP_STRUCT) && (node->gtGetOp1()->OperGet() != GT_PHI))
+ {
+#if FEATURE_MULTIREG_RET
+ GenTree* src = node->gtGetOp1();
+ assert((src->OperGet() == GT_CALL) && src->AsCall()->HasMultiRegRetVal());
+#else // !FEATURE_MULTIREG_RET
+ assert(!"Unexpected struct local store in Lowering");
+#endif // !FEATURE_MULTIREG_RET
+ }
+ break;
+
+ default:
+ break;
+ }
+
+ return node->gtNext;
+}
+
+/** -- Switch Lowering --
+ * The main idea of switch lowering is to keep transparency of the register requirements of this node
+ * downstream in LSRA. Given that the switch instruction is inherently a control statement which in the JIT
+ * is represented as a simple tree node, at the time we actually generate code for it we end up
+ * generating instructions that actually modify the flow of execution that imposes complicated
+ * register requirement and lifetimes.
+ *
+ * So, for the purpose of LSRA, we want to have a more detailed specification of what a switch node actually
+ * means and more importantly, which and when do we need a register for each instruction we want to issue
+ * to correctly allocate them downstream.
+ *
+ * For this purpose, this procedure performs switch lowering in two different ways:
+ *
+ * a) Represent the switch statement as a zero-index jump table construct. This means that for every destination
+ * of the switch, we will store this destination in an array of addresses and the code generator will issue
+ * a data section where this array will live and will emit code that based on the switch index, will indirect and
+ * jump to the destination specified in the jump table.
+ *
+ * For this transformation we introduce a new GT node called GT_SWITCH_TABLE that is a specialization of the switch
+ * node for jump table based switches.
+ * The overall structure of a GT_SWITCH_TABLE is:
+ *
+ * GT_SWITCH_TABLE
+ * |_________ localVar (a temporary local that holds the switch index)
+ * |_________ jumpTable (this is a special node that holds the address of the jump table array)
+ *
+ * Now, the way we morph a GT_SWITCH node into this lowered switch table node form is the following:
+ *
+ * Input: GT_SWITCH (inside a basic block whose Branch Type is BBJ_SWITCH)
+ * |_____ expr (an arbitrarily complex GT_NODE that represents the switch index)
+ *
+ * This gets transformed into the following statements inside a BBJ_COND basic block (the target would be
+ * the default case of the switch in case the conditional is evaluated to true).
+ *
+ * ----- original block, transformed
+ * GT_ASG
+ * |_____ tempLocal (a new temporary local variable used to store the switch index)
+ * |_____ expr (the index expression)
+ *
+ * GT_JTRUE
+ * |_____ GT_COND
+ * |_____ GT_GE
+ * |___ Int_Constant (This constant is the index of the default case
+ * that happens to be the highest index in the jump table).
+ * |___ tempLocal (The local variable were we stored the index expression).
+ *
+ * ----- new basic block
+ * GT_SWITCH_TABLE
+ * |_____ tempLocal
+ * |_____ jumpTable (a new jump table node that now LSRA can allocate registers for explicitly
+ * and LinearCodeGen will be responsible to generate downstream).
+ *
+ * This way there are no implicit temporaries.
+ *
+ * b) For small-sized switches, we will actually morph them into a series of conditionals of the form
+ * if (case falls into the default){ goto jumpTable[size]; // last entry in the jump table is the default case }
+ * (For the default case conditional, we'll be constructing the exact same code as the jump table case one).
+ * else if (case == firstCase){ goto jumpTable[1]; }
+ * else if (case == secondCase) { goto jumptable[2]; } and so on.
+ *
+ * This transformation is of course made in JIT-IR, not downstream to CodeGen level, so this way we no longer
+ * require internal temporaries to maintain the index we're evaluating plus we're using existing code from
+ * LinearCodeGen to implement this instead of implement all the control flow constructs using InstrDscs and
+ * InstrGroups downstream.
+ */
+
+GenTree* Lowering::LowerSwitch(GenTree* node)
+{
+ unsigned jumpCnt;
+ unsigned targetCnt;
+ BasicBlock** jumpTab;
+
+ assert(node->gtOper == GT_SWITCH);
+
+ // The first step is to build the default case conditional construct that is
+ // shared between both kinds of expansion of the switch node.
+
+ // To avoid confusion, we'll alias m_block to originalSwitchBB
+ // that represents the node we're morphing.
+ BasicBlock* originalSwitchBB = m_block;
+ LIR::Range& switchBBRange = LIR::AsRange(originalSwitchBB);
+
+ // jumpCnt is the number of elements in the jump table array.
+ // jumpTab is the actual pointer to the jump table array.
+ // targetCnt is the number of unique targets in the jump table array.
+ jumpCnt = originalSwitchBB->bbJumpSwt->bbsCount;
+ jumpTab = originalSwitchBB->bbJumpSwt->bbsDstTab;
+ targetCnt = originalSwitchBB->NumSucc(comp);
+
+// GT_SWITCH must be a top-level node with no use.
+#ifdef DEBUG
+ {
+ LIR::Use use;
+ assert(!switchBBRange.TryGetUse(node, &use));
+ }
+#endif
+
+ JITDUMP("Lowering switch BB%02u, %d cases\n", originalSwitchBB->bbNum, jumpCnt);
+
+ // Handle a degenerate case: if the switch has only a default case, just convert it
+ // to an unconditional branch. This should only happen in minopts or with debuggable
+ // code.
+ if (targetCnt == 1)
+ {
+ JITDUMP("Lowering switch BB%02u: single target; converting to BBJ_ALWAYS\n", originalSwitchBB->bbNum);
+ noway_assert(comp->opts.MinOpts() || comp->opts.compDbgCode);
+ if (originalSwitchBB->bbNext == jumpTab[0])
+ {
+ originalSwitchBB->bbJumpKind = BBJ_NONE;
+ originalSwitchBB->bbJumpDest = nullptr;
+ }
+ else
+ {
+ originalSwitchBB->bbJumpKind = BBJ_ALWAYS;
+ originalSwitchBB->bbJumpDest = jumpTab[0];
+ }
+ // Remove extra predecessor links if there was more than one case.
+ for (unsigned i = 1; i < jumpCnt; ++i)
+ {
+ (void)comp->fgRemoveRefPred(jumpTab[i], originalSwitchBB);
+ }
+
+ // We have to get rid of the GT_SWITCH node but a child might have side effects so just assign
+ // the result of the child subtree to a temp.
+ GenTree* rhs = node->gtOp.gtOp1;
+
+ unsigned lclNum = comp->lvaGrabTemp(true DEBUGARG("Lowering is creating a new local variable"));
+ comp->lvaSortAgain = true;
+ comp->lvaTable[lclNum].lvType = rhs->TypeGet();
+ comp->lvaTable[lclNum].lvRefCnt = 1;
+
+ GenTreeLclVar* store =
+ new (comp, GT_STORE_LCL_VAR) GenTreeLclVar(GT_STORE_LCL_VAR, rhs->TypeGet(), lclNum, BAD_IL_OFFSET);
+ store->gtOp1 = rhs;
+ store->gtFlags = (rhs->gtFlags & GTF_COMMON_MASK);
+ store->gtFlags |= GTF_VAR_DEF;
+
+ switchBBRange.InsertAfter(node, store);
+ switchBBRange.Remove(node);
+
+ return store;
+ }
+
+ noway_assert(jumpCnt >= 2);
+
+ // Spill the argument to the switch node into a local so that it can be used later.
+ unsigned blockWeight = originalSwitchBB->getBBWeight(comp);
+
+ LIR::Use use(switchBBRange, &(node->gtOp.gtOp1), node);
+ use.ReplaceWithLclVar(comp, blockWeight);
+
+ // GT_SWITCH(indexExpression) is now two statements:
+ // 1. a statement containing 'asg' (for temp = indexExpression)
+ // 2. and a statement with GT_SWITCH(temp)
+
+ assert(node->gtOper == GT_SWITCH);
+ GenTreePtr temp = node->gtOp.gtOp1;
+ assert(temp->gtOper == GT_LCL_VAR);
+ unsigned tempLclNum = temp->gtLclVarCommon.gtLclNum;
+ LclVarDsc* tempVarDsc = comp->lvaTable + tempLclNum;
+ var_types tempLclType = tempVarDsc->TypeGet();
+
+ BasicBlock* defaultBB = jumpTab[jumpCnt - 1];
+ BasicBlock* followingBB = originalSwitchBB->bbNext;
+
+ /* Is the number of cases right for a test and jump switch? */
+ const bool fFirstCaseFollows = (followingBB == jumpTab[0]);
+ const bool fDefaultFollows = (followingBB == defaultBB);
+
+ unsigned minSwitchTabJumpCnt = 2; // table is better than just 2 cmp/jcc
+
+ // This means really just a single cmp/jcc (aka a simple if/else)
+ if (fFirstCaseFollows || fDefaultFollows)
+ {
+ minSwitchTabJumpCnt++;
+ }
+
+#if defined(_TARGET_ARM_)
+ // On ARM for small switch tables we will
+ // generate a sequence of compare and branch instructions
+ // because the code to load the base of the switch
+ // table is huge and hideous due to the relocation... :(
+ minSwitchTabJumpCnt += 2;
+#endif // _TARGET_ARM_
+
+ // Once we have the temporary variable, we construct the conditional branch for
+ // the default case. As stated above, this conditional is being shared between
+ // both GT_SWITCH lowering code paths.
+ // This condition is of the form: if (temp > jumpTableLength - 2){ goto jumpTable[jumpTableLength - 1]; }
+ GenTreePtr gtDefaultCaseCond = comp->gtNewOperNode(GT_GT, TYP_INT, comp->gtNewLclvNode(tempLclNum, tempLclType),
+ comp->gtNewIconNode(jumpCnt - 2, TYP_INT));
+
+ // Make sure we perform an unsigned comparison, just in case the switch index in 'temp'
+ // is now less than zero 0 (that would also hit the default case).
+ gtDefaultCaseCond->gtFlags |= GTF_UNSIGNED;
+
+ /* Increment the lvRefCnt and lvRefCntWtd for temp */
+ tempVarDsc->incRefCnts(blockWeight, comp);
+
+ GenTreePtr gtDefaultCaseJump = comp->gtNewOperNode(GT_JTRUE, TYP_VOID, gtDefaultCaseCond);
+ gtDefaultCaseJump->gtFlags = node->gtFlags;
+
+ LIR::Range condRange = LIR::SeqTree(comp, gtDefaultCaseJump);
+ switchBBRange.InsertAtEnd(std::move(condRange));
+
+ BasicBlock* afterDefaultCondBlock = comp->fgSplitBlockAfterNode(originalSwitchBB, condRange.LastNode());
+
+ // afterDefaultCondBlock is now the switch, and all the switch targets have it as a predecessor.
+ // originalSwitchBB is now a BBJ_NONE, and there is a predecessor edge in afterDefaultCondBlock
+ // representing the fall-through flow from originalSwitchBB.
+ assert(originalSwitchBB->bbJumpKind == BBJ_NONE);
+ assert(originalSwitchBB->bbNext == afterDefaultCondBlock);
+ assert(afterDefaultCondBlock->bbJumpKind == BBJ_SWITCH);
+ assert(afterDefaultCondBlock->bbJumpSwt->bbsHasDefault);
+ assert(afterDefaultCondBlock->isEmpty()); // Nothing here yet.
+
+ // The GT_SWITCH code is still in originalSwitchBB (it will be removed later).
+
+ // Turn originalSwitchBB into a BBJ_COND.
+ originalSwitchBB->bbJumpKind = BBJ_COND;
+ originalSwitchBB->bbJumpDest = jumpTab[jumpCnt - 1];
+
+ // Fix the pred for the default case: the default block target still has originalSwitchBB
+ // as a predecessor, but the fgSplitBlockAfterStatement() moved all predecessors to point
+ // to afterDefaultCondBlock.
+ flowList* oldEdge = comp->fgRemoveRefPred(jumpTab[jumpCnt - 1], afterDefaultCondBlock);
+ comp->fgAddRefPred(jumpTab[jumpCnt - 1], originalSwitchBB, oldEdge);
+
+ // If we originally had 2 unique successors, check to see whether there is a unique
+ // non-default case, in which case we can eliminate the switch altogether.
+ // Note that the single unique successor case is handled above.
+ BasicBlock* uniqueSucc = nullptr;
+ if (targetCnt == 2)
+ {
+ uniqueSucc = jumpTab[0];
+ noway_assert(jumpCnt >= 2);
+ for (unsigned i = 1; i < jumpCnt - 1; i++)
+ {
+ if (jumpTab[i] != uniqueSucc)
+ {
+ uniqueSucc = nullptr;
+ break;
+ }
+ }
+ }
+ if (uniqueSucc != nullptr)
+ {
+ // If the unique successor immediately follows this block, we have nothing to do -
+ // it will simply fall-through after we remove the switch, below.
+ // Otherwise, make this a BBJ_ALWAYS.
+ // Now, fixup the predecessor links to uniqueSucc. In the original jumpTab:
+ // jumpTab[i-1] was the default target, which we handled above,
+ // jumpTab[0] is the first target, and we'll leave that predecessor link.
+ // Remove any additional predecessor links to uniqueSucc.
+ for (unsigned i = 1; i < jumpCnt - 1; ++i)
+ {
+ assert(jumpTab[i] == uniqueSucc);
+ (void)comp->fgRemoveRefPred(uniqueSucc, afterDefaultCondBlock);
+ }
+ if (afterDefaultCondBlock->bbNext == uniqueSucc)
+ {
+ afterDefaultCondBlock->bbJumpKind = BBJ_NONE;
+ afterDefaultCondBlock->bbJumpDest = nullptr;
+ }
+ else
+ {
+ afterDefaultCondBlock->bbJumpKind = BBJ_ALWAYS;
+ afterDefaultCondBlock->bbJumpDest = uniqueSucc;
+ }
+ }
+ // If the number of possible destinations is small enough, we proceed to expand the switch
+ // into a series of conditional branches, otherwise we follow the jump table based switch
+ // transformation.
+ else if (jumpCnt < minSwitchTabJumpCnt)
+ {
+ // Lower the switch into a series of compare and branch IR trees.
+ //
+ // In this case we will morph the node in the following way:
+ // 1. Generate a JTRUE statement to evaluate the default case. (This happens above.)
+ // 2. Start splitting the switch basic block into subsequent basic blocks, each of which will contain
+ // a statement that is responsible for performing a comparison of the table index and conditional
+ // branch if equal.
+
+ JITDUMP("Lowering switch BB%02u: using compare/branch expansion\n", originalSwitchBB->bbNum);
+
+ // We'll use 'afterDefaultCondBlock' for the first conditional. After that, we'll add new
+ // blocks. If we end up not needing it at all (say, if all the non-default cases just fall through),
+ // we'll delete it.
+ bool fUsedAfterDefaultCondBlock = false;
+ BasicBlock* currentBlock = afterDefaultCondBlock;
+ LIR::Range* currentBBRange = &LIR::AsRange(currentBlock);
+
+ // Walk to entries 0 to jumpCnt - 1. If a case target follows, ignore it and let it fall through.
+ // If no case target follows, the last one doesn't need to be a compare/branch: it can be an
+ // unconditional branch.
+ bool fAnyTargetFollows = false;
+ for (unsigned i = 0; i < jumpCnt - 1; ++i)
+ {
+ assert(currentBlock != nullptr);
+
+ // Remove the switch from the predecessor list of this case target's block.
+ // We'll add the proper new predecessor edge later.
+ flowList* oldEdge = comp->fgRemoveRefPred(jumpTab[i], afterDefaultCondBlock);
+
+ if (jumpTab[i] == followingBB)
+ {
+ // This case label follows the switch; let it fall through.
+ fAnyTargetFollows = true;
+ continue;
+ }
+
+ // We need a block to put in the new compare and/or branch.
+ // If we haven't used the afterDefaultCondBlock yet, then use that.
+ if (fUsedAfterDefaultCondBlock)
+ {
+ BasicBlock* newBlock = comp->fgNewBBafter(BBJ_NONE, currentBlock, true);
+ comp->fgAddRefPred(newBlock, currentBlock); // The fall-through predecessor.
+ currentBlock = newBlock;
+ currentBBRange = &LIR::AsRange(currentBlock);
+ }
+ else
+ {
+ assert(currentBlock == afterDefaultCondBlock);
+ fUsedAfterDefaultCondBlock = true;
+ }
+
+ // We're going to have a branch, either a conditional or unconditional,
+ // to the target. Set the target.
+ currentBlock->bbJumpDest = jumpTab[i];
+
+ // Wire up the predecessor list for the "branch" case.
+ comp->fgAddRefPred(jumpTab[i], currentBlock, oldEdge);
+
+ if (!fAnyTargetFollows && (i == jumpCnt - 2))
+ {
+ // We're processing the last one, and there is no fall through from any case
+ // to the following block, so we can use an unconditional branch to the final
+ // case: there is no need to compare against the case index, since it's
+ // guaranteed to be taken (since the default case was handled first, above).
+
+ currentBlock->bbJumpKind = BBJ_ALWAYS;
+ }
+ else
+ {
+ // Otherwise, it's a conditional branch. Set the branch kind, then add the
+ // condition statement.
+ currentBlock->bbJumpKind = BBJ_COND;
+
+ // Now, build the conditional statement for the current case that is
+ // being evaluated:
+ // GT_JTRUE
+ // |__ GT_COND
+ // |____GT_EQ
+ // |____ (switchIndex) (The temp variable)
+ // |____ (ICon) (The actual case constant)
+ GenTreePtr gtCaseCond =
+ comp->gtNewOperNode(GT_EQ, TYP_INT, comp->gtNewLclvNode(tempLclNum, tempLclType),
+ comp->gtNewIconNode(i, TYP_INT));
+ /* Increment the lvRefCnt and lvRefCntWtd for temp */
+ tempVarDsc->incRefCnts(blockWeight, comp);
+
+ GenTreePtr gtCaseBranch = comp->gtNewOperNode(GT_JTRUE, TYP_VOID, gtCaseCond);
+ LIR::Range caseRange = LIR::SeqTree(comp, gtCaseBranch);
+ currentBBRange->InsertAtEnd(std::move(condRange));
+ }
+ }
+
+ if (fAnyTargetFollows)
+ {
+ // There is a fall-through to the following block. In the loop
+ // above, we deleted all the predecessor edges from the switch.
+ // In this case, we need to add one back.
+ comp->fgAddRefPred(currentBlock->bbNext, currentBlock);
+ }
+
+ if (!fUsedAfterDefaultCondBlock)
+ {
+ // All the cases were fall-through! We don't need this block.
+ // Convert it from BBJ_SWITCH to BBJ_NONE and unset the BBF_DONT_REMOVE flag
+ // so fgRemoveBlock() doesn't complain.
+ JITDUMP("Lowering switch BB%02u: all switch cases were fall-through\n", originalSwitchBB->bbNum);
+ assert(currentBlock == afterDefaultCondBlock);
+ assert(currentBlock->bbJumpKind == BBJ_SWITCH);
+ currentBlock->bbJumpKind = BBJ_NONE;
+ currentBlock->bbFlags &= ~BBF_DONT_REMOVE;
+ comp->fgRemoveBlock(currentBlock, /* unreachable */ false); // It's an empty block.
+ }
+ }
+ else
+ {
+ // Lower the switch into an indirect branch using a jump table:
+ //
+ // 1. Create the constant for the default case
+ // 2. Generate a GT_GE condition to compare to the default case
+ // 3. Generate a GT_JTRUE to jump.
+ // 4. Load the jump table address into a local (presumably the just
+ // created constant for GT_SWITCH).
+ // 5. Create a new node for the lowered switch, this will both generate
+ // the branch table and also will be responsible for the indirect
+ // branch.
+
+ JITDUMP("Lowering switch BB%02u: using jump table expansion\n", originalSwitchBB->bbNum);
+
+ GenTreePtr gtTableSwitch =
+ comp->gtNewOperNode(GT_SWITCH_TABLE, TYP_VOID, comp->gtNewLclvNode(tempLclNum, tempLclType),
+ comp->gtNewJmpTableNode());
+ /* Increment the lvRefCnt and lvRefCntWtd for temp */
+ tempVarDsc->incRefCnts(blockWeight, comp);
+
+ // this block no longer branches to the default block
+ afterDefaultCondBlock->bbJumpSwt->removeDefault();
+ comp->fgInvalidateSwitchDescMapEntry(afterDefaultCondBlock);
+
+ LIR::Range& afterDefaultCondBBRange = LIR::AsRange(afterDefaultCondBlock);
+ afterDefaultCondBBRange.InsertAtEnd(LIR::SeqTree(comp, gtTableSwitch));
+ }
+
+ GenTree* next = node->gtNext;
+
+ // Get rid of the GT_SWITCH(temp).
+ switchBBRange.Remove(node->gtOp.gtOp1);
+ switchBBRange.Remove(node);
+
+ return next;
+}
+
+// NOTE: this method deliberately does not update the call arg table. It must only
+// be used by NewPutArg and LowerArg; these functions are responsible for updating
+// the call arg table as necessary.
+void Lowering::ReplaceArgWithPutArgOrCopy(GenTree** argSlot, GenTree* putArgOrCopy)
+{
+ assert(argSlot != nullptr);
+ assert(*argSlot != nullptr);
+ assert(putArgOrCopy->OperGet() == GT_PUTARG_REG || putArgOrCopy->OperGet() == GT_PUTARG_STK ||
+ putArgOrCopy->OperGet() == GT_COPY);
+
+ GenTree* arg = *argSlot;
+
+ // Replace the argument with the putarg/copy
+ *argSlot = putArgOrCopy;
+ putArgOrCopy->gtOp.gtOp1 = arg;
+
+ // Insert the putarg/copy into the block
+ BlockRange().InsertAfter(arg, putArgOrCopy);
+}
+
+//------------------------------------------------------------------------
+// NewPutArg: rewrites the tree to put an arg in a register or on the stack.
+//
+// Arguments:
+// call - the call whose arg is being rewritten.
+// arg - the arg being rewritten.
+// info - the ArgTabEntry information for the argument.
+// type - the type of the argument.
+//
+// Return Value:
+// The new tree that was created to put the arg in the right place
+// or the incoming arg if the arg tree was not rewritten.
+//
+// Assumptions:
+// call, arg, and info must be non-null.
+//
+// Notes:
+// For System V systems with native struct passing (i.e. FEATURE_UNIX_AMD64_STRUCT_PASSING defined)
+// this method allocates a single GT_PUTARG_REG for 1 eightbyte structs and a GT_LIST of two GT_PUTARG_REGs
+// for two eightbyte structs.
+//
+// For STK passed structs the method generates GT_PUTARG_STK tree. For System V systems with native struct passing
+// (i.e. FEATURE_UNIX_AMD64_STRUCT_PASSING defined) this method also sets the GP pointers count and the pointers
+// layout object, so the codegen of the GT_PUTARG_STK could use this for optimizing copying to the stack by value.
+// (using block copy primitives for non GC pointers and a single TARGET_POINTER_SIZE copy with recording GC info.)
+//
+GenTreePtr Lowering::NewPutArg(GenTreeCall* call, GenTreePtr arg, fgArgTabEntryPtr info, var_types type)
+{
+ assert(call != nullptr);
+ assert(arg != nullptr);
+ assert(info != nullptr);
+
+ GenTreePtr putArg = nullptr;
+ bool updateArgTable = true;
+
+#if !defined(_TARGET_64BIT_)
+ if (varTypeIsLong(type))
+ {
+ // For TYP_LONG, we leave the GT_LONG as the arg, and put the putArg below it.
+ // Therefore, we don't update the arg table entry.
+ updateArgTable = false;
+ type = TYP_INT;
+ }
+#endif // !defined(_TARGET_64BIT_)
+
+ bool isOnStack = true;
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ if (varTypeIsStruct(type))
+ {
+ isOnStack = !info->structDesc.passedInRegisters;
+ }
+ else
+ {
+ isOnStack = info->regNum == REG_STK;
+ }
+#else // !FEATURE_UNIX_AMD64_STRUCT_PASSING
+ isOnStack = info->regNum == REG_STK;
+#endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+ if (!isOnStack)
+ {
+#ifdef FEATURE_SIMD
+ // TYP_SIMD8 is passed in an integer register. We need the putArg node to be of the int type.
+ if (type == TYP_SIMD8 && genIsValidIntReg(info->regNum))
+ {
+ type = TYP_LONG;
+ }
+#endif // FEATURE_SIMD
+
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ if (info->isStruct)
+ {
+ // The following code makes sure a register passed struct arg is moved to
+ // the register before the call is made.
+ // There are two cases (comments added in the code below.)
+ // 1. The struct is of size one eightbyte:
+ // In this case a new tree is created that is GT_PUTARG_REG
+ // with a op1 the original argument.
+ // 2. The struct is contained in 2 eightbytes:
+ // in this case the arg comes as a GT_LIST of two GT_LCL_FLDs - the two eightbytes of the struct.
+ // The code creates a GT_PUTARG_REG node for each GT_LCL_FLD in the GT_LIST
+ // and splices it in the list with the corresponding original GT_LCL_FLD tree as op1.
+
+ assert(info->structDesc.eightByteCount != 0);
+
+ if (info->structDesc.eightByteCount == 1)
+ {
+ // clang-format off
+ // Case 1 above: Create a GT_PUTARG_REG node with op1 of the original tree.
+ //
+ // Here the IR for this operation:
+ // lowering call :
+ // N001(3, 2)[000017] ------ - N---- / --* &lclVar byref V00 loc0
+ // N003(6, 5)[000052] * --XG------ - / --* indir int
+ // N004(3, 2)[000046] ------ - N---- + --* &lclVar byref V02 tmp0
+ // (13, 11)[000070] -- - XG-- - R-- - arg0 in out + 00 / --* storeIndir int
+ // N009(3, 4)[000054] ------ - N----arg0 in rdi + --* lclFld int V02 tmp0[+0](last use)
+ // N011(33, 21)[000018] --CXG------ - *call void Test.Foo.test1
+ //
+ // args :
+ // lowering arg : (13, 11)[000070] -- - XG-- - R-- - *storeIndir int
+ //
+ // late :
+ // lowering arg : N009(3, 4)[000054] ------ - N---- * lclFld int V02 tmp0[+0](last use)
+ // new node is : (3, 4)[000071] ------------ * putarg_reg int RV
+ //
+ // after :
+ // N001(3, 2)[000017] ------ - N---- / --* &lclVar byref V00 loc0
+ // N003(6, 5)[000052] * --XG------ - / --* indir int
+ // N004(3, 2)[000046] ------ - N---- + --* &lclVar byref V02 tmp0
+ // (13, 11)[000070] -- - XG-- - R-- - arg0 in out + 00 / --* storeIndir int
+ // N009(3, 4)[000054] ------ - N---- | / --* lclFld int V02 tmp0[+0](last use)
+ // (3, 4)[000071] ------------arg0 in rdi + --* putarg_reg int RV
+ // N011(33, 21)[000018] --CXG------ - *call void Test.Foo.test1
+ //
+ // clang-format on
+
+ putArg = comp->gtNewOperNode(GT_PUTARG_REG, type, arg);
+ }
+ else if (info->structDesc.eightByteCount == 2)
+ {
+ // clang-format off
+ // Case 2 above: Convert the LCL_FLDs to PUTARG_REG
+ //
+ // lowering call :
+ // N001(3, 2) [000025] ------ - N----Source / --* &lclVar byref V01 loc1
+ // N003(3, 2) [000056] ------ - N----Destination + --* &lclVar byref V03 tmp1
+ // N006(1, 1) [000058] ------------ + --* const int 16
+ // N007(12, 12)[000059] - A--G---- - L - arg0 SETUP / --* copyBlk void
+ // N009(3, 4) [000061] ------ - N----arg0 in rdi + --* lclFld long V03 tmp1[+0]
+ // N010(3, 4) [000063] ------------arg0 in rsi + --* lclFld long V03 tmp1[+8](last use)
+ // N014(40, 31)[000026] --CXG------ - *call void Test.Foo.test2
+ //
+ // args :
+ // lowering arg : N007(12, 12)[000059] - A--G---- - L - *copyBlk void
+ //
+ // late :
+ // lowering arg : N012(11, 13)[000065] ------------ * <list> struct
+ //
+ // after :
+ // N001(3, 2)[000025] ------ - N----Source / --* &lclVar byref V01 loc1
+ // N003(3, 2)[000056] ------ - N----Destination + --* &lclVar byref V03 tmp1
+ // N006(1, 1)[000058] ------------ + --* const int 16
+ // N007(12, 12)[000059] - A--G---- - L - arg0 SETUP / --* copyBlk void
+ // N009(3, 4)[000061] ------ - N---- | / --* lclFld long V03 tmp1[+0]
+ // (3, 4)[000072] ------------arg0 in rdi + --* putarg_reg long
+ // N010(3, 4)[000063] ------------ | / --* lclFld long V03 tmp1[+8](last use)
+ // (3, 4)[000073] ------------arg0 in rsi + --* putarg_reg long
+ // N014(40, 31)[000026] --CXG------ - *call void Test.Foo.test2
+ //
+ // clang-format on
+
+ assert(arg->OperGet() == GT_LIST);
+
+ GenTreeArgList* argListPtr = arg->AsArgList();
+ assert(argListPtr->IsAggregate());
+
+ for (unsigned ctr = 0; argListPtr != nullptr; argListPtr = argListPtr->Rest(), ctr++)
+ {
+ // Create a new GT_PUTARG_REG node with op1 the original GT_LCL_FLD.
+ GenTreePtr newOper = comp->gtNewOperNode(
+ GT_PUTARG_REG,
+ comp->GetTypeFromClassificationAndSizes(info->structDesc.eightByteClassifications[ctr],
+ info->structDesc.eightByteSizes[ctr]),
+ argListPtr->gtOp.gtOp1);
+
+ // Splice in the new GT_PUTARG_REG node in the GT_LIST
+ ReplaceArgWithPutArgOrCopy(&argListPtr->gtOp.gtOp1, newOper);
+ }
+
+ // Just return arg. The GT_LIST is not replaced.
+ // Nothing more to do.
+ return arg;
+ }
+ else
+ {
+ assert(false &&
+ "Illegal count of eightbytes for the CLR type system"); // No more than 2 eightbytes for the CLR.
+ }
+ }
+ else
+#else // not defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+#if FEATURE_MULTIREG_ARGS
+ if ((info->numRegs > 1) && (arg->OperGet() == GT_LIST))
+ {
+ assert(arg->OperGet() == GT_LIST);
+
+ GenTreeArgList* argListPtr = arg->AsArgList();
+ assert(argListPtr->IsAggregate());
+
+ for (unsigned ctr = 0; argListPtr != nullptr; argListPtr = argListPtr->Rest(), ctr++)
+ {
+ GenTreePtr curOp = argListPtr->gtOp.gtOp1;
+ var_types curTyp = curOp->TypeGet();
+
+ // Create a new GT_PUTARG_REG node with op1
+ GenTreePtr newOper = comp->gtNewOperNode(GT_PUTARG_REG, curTyp, curOp);
+
+ // Splice in the new GT_PUTARG_REG node in the GT_LIST
+ ReplaceArgWithPutArgOrCopy(&argListPtr->gtOp.gtOp1, newOper);
+ }
+
+ // Just return arg. The GT_LIST is not replaced.
+ // Nothing more to do.
+ return arg;
+ }
+ else
+#endif // FEATURE_MULTIREG_ARGS
+#endif // not defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ {
+ putArg = comp->gtNewOperNode(GT_PUTARG_REG, type, arg);
+ }
+ }
+ else
+ {
+ // Mark this one as tail call arg if it is a fast tail call.
+ // This provides the info to put this argument in in-coming arg area slot
+ // instead of in out-going arg area slot.
+
+ FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY(assert(info->isStruct == varTypeIsStruct(type))); // Make sure state is
+ // correct
+
+#if FEATURE_FASTTAILCALL
+ putArg = new (comp, GT_PUTARG_STK)
+ GenTreePutArgStk(GT_PUTARG_STK, type, arg,
+ info->slotNum FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(info->numSlots)
+ FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(info->isStruct),
+ call->IsFastTailCall() DEBUGARG(call));
+#else
+ putArg = new (comp, GT_PUTARG_STK)
+ GenTreePutArgStk(GT_PUTARG_STK, type, arg,
+ info->slotNum FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(info->numSlots)
+ FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(info->isStruct) DEBUGARG(call));
+#endif
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ // If the ArgTabEntry indicates that this arg is a struct
+ // get and store the number of slots that are references.
+ // This is later used in the codegen for PUT_ARG_STK implementation
+ // for struct to decide whether and how many single eight-byte copies
+ // to be done (only for reference slots), so gcinfo is emitted.
+ // For non-reference slots faster/smaller size instructions are used -
+ // pair copying using XMM registers or rep mov instructions.
+ if (info->isStruct)
+ {
+ unsigned numRefs = 0;
+ BYTE* gcLayout = new (comp, CMK_Codegen) BYTE[info->numSlots];
+ // We use GT_OBJ for non-SIMD struct arguments. However, for
+ // SIMD arguments the GT_OBJ has already been transformed.
+ if (arg->gtOper != GT_OBJ)
+ {
+ assert(varTypeIsSIMD(arg));
+ }
+ else
+ {
+ assert(!varTypeIsSIMD(arg));
+ numRefs = comp->info.compCompHnd->getClassGClayout(arg->gtObj.gtClass, gcLayout);
+ }
+
+ putArg->AsPutArgStk()->setGcPointers(numRefs, gcLayout);
+ }
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+ }
+
+ if (arg->InReg())
+ {
+ putArg->SetInReg();
+ }
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ else if (info->isStruct)
+ {
+ if (info->structDesc.passedInRegisters)
+ {
+ putArg->SetInReg();
+ }
+ }
+#endif
+
+ JITDUMP("new node is : ");
+ DISPNODE(putArg);
+ JITDUMP("\n");
+
+ if (arg->gtFlags & GTF_LATE_ARG)
+ {
+ putArg->gtFlags |= GTF_LATE_ARG;
+ }
+ else if (updateArgTable)
+ {
+ info->node = putArg;
+ }
+ return putArg;
+}
+
+//------------------------------------------------------------------------
+// LowerArg: Lower one argument of a call. This entails splicing a "putarg" node between
+// the argument evaluation and the call. This is the point at which the source is
+// consumed and the value transitions from control of the register allocator to the calling
+// convention.
+//
+// Arguments:
+// call - The call node
+// ppArg - Pointer to the call argument pointer. We might replace the call argument by
+// changing *ppArg.
+//
+// Return Value:
+// None.
+//
+void Lowering::LowerArg(GenTreeCall* call, GenTreePtr* ppArg)
+{
+ GenTreePtr arg = *ppArg;
+
+ JITDUMP("lowering arg : ");
+ DISPNODE(arg);
+
+ // No assignments should remain by Lowering.
+ assert(!arg->OperIsAssignment());
+ assert(!arg->OperIsPutArgStk());
+
+ // Assignments/stores at this level are not really placing an argument.
+ // They are setting up temporary locals that will later be placed into
+ // outgoing regs or stack.
+ if (arg->OperIsStore() || arg->IsArgPlaceHolderNode() || arg->IsNothingNode() || arg->OperIsCopyBlkOp())
+ {
+ return;
+ }
+
+ fgArgTabEntryPtr info = comp->gtArgEntryByNode(call, arg);
+ assert(info->node == arg);
+ bool isReg = (info->regNum != REG_STK);
+ var_types type = arg->TypeGet();
+
+ if (varTypeIsSmall(type))
+ {
+ // Normalize 'type', it represents the item that we will be storing in the Outgoing Args
+ type = TYP_INT;
+ }
+
+ GenTreePtr putArg;
+
+ // If we hit this we are probably double-lowering.
+ assert(!arg->OperIsPutArg());
+
+#if !defined(_TARGET_64BIT_)
+ if (varTypeIsLong(type))
+ {
+ if (isReg)
+ {
+ NYI("Lowering of long register argument");
+ }
+
+ // For longs, we will create two PUTARG_STKs below the GT_LONG. The hi argument needs to
+ // be pushed first, so the hi PUTARG_STK will precede the lo PUTARG_STK in execution order.
+ noway_assert(arg->OperGet() == GT_LONG);
+ GenTreePtr argLo = arg->gtGetOp1();
+ GenTreePtr argHi = arg->gtGetOp2();
+
+ GenTreePtr putArgLo = NewPutArg(call, argLo, info, type);
+ GenTreePtr putArgHi = NewPutArg(call, argHi, info, type);
+
+ arg->gtOp.gtOp1 = putArgLo;
+ arg->gtOp.gtOp2 = putArgHi;
+
+ BlockRange().InsertBefore(arg, putArgHi, putArgLo);
+
+ // The execution order now looks like this:
+ // argLoPrev <-> argLoFirst ... argLo <-> argHiFirst ... argHi <-> putArgHi <-> putArgLo <-> arg(GT_LONG)
+
+ assert((arg->gtFlags & GTF_REVERSE_OPS) == 0);
+ arg->gtFlags |= GTF_REVERSE_OPS; // We consume the high arg (op2) first.
+ }
+ else
+#endif // !defined(_TARGET_64BIT_)
+ {
+
+#ifdef _TARGET_ARM64_
+ // For vararg call, reg args should be all integer.
+ // Insert a copy to move float value to integer register.
+ if (call->IsVarargs() && varTypeIsFloating(type))
+ {
+ var_types intType = (type == TYP_DOUBLE) ? TYP_LONG : TYP_INT;
+ GenTreePtr intArg = comp->gtNewOperNode(GT_COPY, intType, arg);
+
+ info->node = intArg;
+ ReplaceArgWithPutArgOrCopy(ppArg, intArg);
+
+ // Update arg/type with new ones.
+ arg = intArg;
+ type = intType;
+ }
+#endif
+
+ putArg = NewPutArg(call, arg, info, type);
+
+ // In the case of register passable struct (in one or two registers)
+ // the NewPutArg returns a new node (GT_PUTARG_REG or a GT_LIST with two GT_PUTARG_REGs.)
+ // If an extra node is returned, splice it in the right place in the tree.
+ if (arg != putArg)
+ {
+ ReplaceArgWithPutArgOrCopy(ppArg, putArg);
+ }
+ }
+}
+
+// do lowering steps for each arg of a call
+void Lowering::LowerArgsForCall(GenTreeCall* call)
+{
+ JITDUMP("objp:\n======\n");
+ if (call->gtCallObjp)
+ {
+ LowerArg(call, &call->gtCallObjp);
+ }
+
+ GenTreeArgList* args = call->gtCallArgs;
+
+ JITDUMP("\nargs:\n======\n");
+ for (; args; args = args->Rest())
+ {
+ LowerArg(call, &args->Current());
+ }
+
+ JITDUMP("\nlate:\n======\n");
+ for (args = call->gtCallLateArgs; args; args = args->Rest())
+ {
+ LowerArg(call, &args->Current());
+ }
+}
+
+// helper that create a node representing a relocatable physical address computation
+// (optionally specifying the register to place it in)
+GenTree* Lowering::AddrGen(ssize_t addr, regNumber reg)
+{
+ // this should end up in codegen as : instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, reg, addr)
+ GenTree* result = comp->gtNewIconHandleNode(addr, GTF_ICON_FTN_ADDR);
+
+ result->gtRegNum = reg;
+
+ return result;
+}
+
+// variant that takes a void*
+GenTree* Lowering::AddrGen(void* addr, regNumber reg)
+{
+ return AddrGen((ssize_t)addr, reg);
+}
+
+// do lowering steps for a call
+// this includes:
+// - adding the placement nodes (either stack or register variety) for arguments
+// - lowering the expression that calculates the target address
+// - adding nodes for other operations that occur after the call sequence starts and before
+// control transfer occurs (profiling and tail call helpers, pinvoke incantations)
+//
+void Lowering::LowerCall(GenTree* node)
+{
+ GenTreeCall* call = node->AsCall();
+
+ JITDUMP("lowering call (before):\n");
+ DISPTREERANGE(BlockRange(), call);
+ JITDUMP("\n");
+
+ LowerArgsForCall(call);
+
+// RyuJIT arm is not set up for lowered call control
+#ifndef _TARGET_ARM_
+
+ // note that everything generated from this point on runs AFTER the outgoing args are placed
+ GenTree* result = nullptr;
+
+ // for x86, this is where we record ESP for checking later to make sure stack is balanced
+
+ // Check for Delegate.Invoke(). If so, we inline it. We get the
+ // target-object and target-function from the delegate-object, and do
+ // an indirect call.
+ if (call->IsDelegateInvoke())
+ {
+ result = LowerDelegateInvoke(call);
+ }
+ else
+ {
+ // Virtual and interface calls
+ switch (call->gtFlags & GTF_CALL_VIRT_KIND_MASK)
+ {
+ case GTF_CALL_VIRT_STUB:
+ result = LowerVirtualStubCall(call);
+ break;
+
+ case GTF_CALL_VIRT_VTABLE:
+ // stub dispatching is off or this is not a virtual call (could be a tailcall)
+ result = LowerVirtualVtableCall(call);
+ break;
+
+ case GTF_CALL_NONVIRT:
+ if (call->IsUnmanaged())
+ {
+ result = LowerNonvirtPinvokeCall(call);
+ }
+ else if (call->gtCallType == CT_INDIRECT)
+ {
+ result = LowerIndirectNonvirtCall(call);
+ }
+ else
+ {
+ result = LowerDirectCall(call);
+ }
+ break;
+
+ default:
+ noway_assert(!"strange call type");
+ break;
+ }
+ }
+
+ if (call->IsTailCallViaHelper())
+ {
+ // Either controlExpr or gtCallAddr must contain real call target.
+ if (result == nullptr)
+ {
+ assert(call->gtCallType == CT_INDIRECT);
+ assert(call->gtCallAddr != nullptr);
+ result = call->gtCallAddr;
+ }
+
+ result = LowerTailCallViaHelper(call, result);
+ }
+ else if (call->IsFastTailCall())
+ {
+ LowerFastTailCall(call);
+ }
+
+ if (result != nullptr)
+ {
+ LIR::Range resultRange = LIR::SeqTree(comp, result);
+
+ JITDUMP("results of lowering call:\n");
+ DISPRANGE(resultRange);
+
+ GenTree* insertionPoint = call;
+ if (!call->IsTailCallViaHelper())
+ {
+ // The controlExpr should go before the gtCallCookie and the gtCallAddr, if they exist
+ //
+ // TODO-LIR: find out what's really required here, as this is currently a tree order
+ // dependency.
+ if (call->gtCallType == CT_INDIRECT)
+ {
+ bool isClosed = false;
+ if (call->gtCallCookie != nullptr)
+ {
+#ifdef DEBUG
+ GenTree* firstCallAddrNode = BlockRange().GetTreeRange(call->gtCallAddr, &isClosed).FirstNode();
+ assert(isClosed);
+ assert(call->gtCallCookie->Precedes(firstCallAddrNode));
+#endif // DEBUG
+
+ insertionPoint = BlockRange().GetTreeRange(call->gtCallCookie, &isClosed).FirstNode();
+ assert(isClosed);
+ }
+ else if (call->gtCallAddr != nullptr)
+ {
+ insertionPoint = BlockRange().GetTreeRange(call->gtCallAddr, &isClosed).FirstNode();
+ assert(isClosed);
+ }
+ }
+ }
+
+ BlockRange().InsertBefore(insertionPoint, std::move(resultRange));
+
+ call->gtControlExpr = result;
+ }
+#endif //!_TARGET_ARM_
+
+ if (comp->opts.IsJit64Compat())
+ {
+ CheckVSQuirkStackPaddingNeeded(call);
+ }
+
+ JITDUMP("lowering call (after):\n");
+ DISPTREERANGE(BlockRange(), call);
+ JITDUMP("\n");
+}
+
+// Though the below described issue gets fixed in intellitrace dll of VS2015 (a.k.a Dev14),
+// we still need this quirk for desktop so that older version of VS (e.g. VS2010/2012)
+// continues to work.
+// This quirk is excluded from other targets that have no back compat burden.
+//
+// Quirk for VS debug-launch scenario to work:
+// See if this is a PInvoke call with exactly one param that is the address of a struct local.
+// In such a case indicate to frame-layout logic to add 16-bytes of padding
+// between save-reg area and locals. This is to protect against the buffer
+// overrun bug in microsoft.intellitrace.11.0.0.dll!ProfilerInterop.InitInterop().
+//
+// A work-around to this bug is to disable IntelliTrace debugging
+// (VS->Tools->Options->IntelliTrace->Enable IntelliTrace - uncheck this option).
+// The reason why this works on Jit64 is that at the point of AV the call stack is
+//
+// GetSystemInfo() Native call
+// IL_Stub generated for PInvoke declaration.
+// ProfilerInterface::InitInterop()
+// ProfilerInterface.Cctor()
+// VM asm worker
+//
+// The cctor body has just the call to InitInterop(). VM asm worker is holding
+// something in rbx that is used immediately after the Cctor call. Jit64 generated
+// InitInterop() method is pushing the registers in the following order
+//
+// rbx
+// rbp
+// rsi
+// rdi
+// r12
+// r13
+// Struct local
+//
+// Due to buffer overrun, rbx doesn't get impacted. Whereas RyuJIT jitted code of
+// the same method is pushing regs in the following order
+//
+// rbp
+// rdi
+// rsi
+// rbx
+// struct local
+//
+// Therefore as a fix, we add padding between save-reg area and locals to
+// make this scenario work against JB.
+//
+// Note: If this quirk gets broken due to other JIT optimizations, we should consider
+// more tolerant fix. One such fix is to padd the struct.
+void Lowering::CheckVSQuirkStackPaddingNeeded(GenTreeCall* call)
+{
+ assert(comp->opts.IsJit64Compat());
+
+#ifdef _TARGET_AMD64_
+ // Confine this to IL stub calls which aren't marked as unmanaged.
+ if (call->IsPInvoke() && !call->IsUnmanaged())
+ {
+ bool paddingNeeded = false;
+ GenTreePtr firstPutArgReg = nullptr;
+ for (GenTreeArgList* args = call->gtCallLateArgs; args; args = args->Rest())
+ {
+ GenTreePtr tmp = args->Current();
+ if (tmp->OperGet() == GT_PUTARG_REG)
+ {
+ if (firstPutArgReg == nullptr)
+ {
+ firstPutArgReg = tmp;
+ GenTreePtr op1 = firstPutArgReg->gtOp.gtOp1;
+
+ if (op1->OperGet() == GT_LCL_VAR_ADDR)
+ {
+ unsigned lclNum = op1->AsLclVarCommon()->GetLclNum();
+ // TODO-1stClassStructs: This is here to duplicate previous behavior,
+ // but is not needed because the scenario being quirked did not involve
+ // a SIMD or enregisterable struct.
+ // if(comp->lvaTable[lclNum].TypeGet() == TYP_STRUCT)
+ if (varTypeIsStruct(comp->lvaTable[lclNum].TypeGet()))
+ {
+ // First arg is addr of a struct local.
+ paddingNeeded = true;
+ }
+ else
+ {
+ // Not a struct local.
+ assert(paddingNeeded == false);
+ break;
+ }
+ }
+ else
+ {
+ // First arg is not a local var addr.
+ assert(paddingNeeded == false);
+ break;
+ }
+ }
+ else
+ {
+ // Has more than one arg.
+ paddingNeeded = false;
+ break;
+ }
+ }
+ }
+
+ if (paddingNeeded)
+ {
+ comp->compVSQuirkStackPaddingNeeded = VSQUIRK_STACK_PAD;
+ }
+ }
+#endif // _TARGET_AMD64_
+}
+
+// Inserts profiler hook, GT_PROF_HOOK for a tail call node.
+//
+// We need to insert this after all nested calls, but before all the arguments to this call have been set up.
+// To do this, we look for the first GT_PUTARG_STK or GT_PUTARG_REG, and insert the hook immediately before
+// that. If there are no args, then it should be inserted before the call node.
+//
+// For example:
+// * stmtExpr void (top level) (IL 0x000...0x010)
+// arg0 SETUP | /--* argPlace ref REG NA $c5
+// this in rcx | | /--* argPlace ref REG NA $c1
+// | | | /--* call ref System.Globalization.CultureInfo.get_InvariantCulture $c2
+// arg1 SETUP | | +--* st.lclVar ref V02 tmp1 REG NA $c2
+// | | | /--* lclVar ref V02 tmp1 u : 2 (last use) REG NA $c2
+// arg1 in rdx | | +--* putarg_reg ref REG NA
+// | | | /--* lclVar ref V00 arg0 u : 2 (last use) REG NA $80
+// this in rcx | | +--* putarg_reg ref REG NA
+// | | /--* call nullcheck ref System.String.ToLower $c5
+// | | { * stmtExpr void (embedded)(IL 0x000... ? ? ? )
+// | | { \--* prof_hook void REG NA
+// arg0 in rcx | +--* putarg_reg ref REG NA
+// control expr | +--* const(h) long 0x7ffe8e910e98 ftn REG NA
+// \--* call void System.Runtime.Remoting.Identity.RemoveAppNameOrAppGuidIfNecessary $VN.Void
+//
+// In this case, the GT_PUTARG_REG src is a nested call. We need to put the instructions after that call
+// (as shown). We assume that of all the GT_PUTARG_*, only the first one can have a nested call.
+//
+// Params:
+// callNode - tail call node
+// insertionPoint - if caller has an insertion point; If null
+// profiler hook is inserted before args are setup
+// but after all arg side effects are computed.
+void Lowering::InsertProfTailCallHook(GenTreeCall* call, GenTree* insertionPoint)
+{
+ assert(call->IsTailCall());
+ assert(comp->compIsProfilerHookNeeded());
+
+ if (insertionPoint == nullptr)
+ {
+ GenTreePtr tmp = nullptr;
+ for (GenTreeArgList* args = call->gtCallArgs; args; args = args->Rest())
+ {
+ tmp = args->Current();
+ assert(tmp->OperGet() != GT_PUTARG_REG); // We don't expect to see these in gtCallArgs
+ if (tmp->OperGet() == GT_PUTARG_STK)
+ {
+ // found it
+ insertionPoint = tmp;
+ break;
+ }
+ }
+
+ if (insertionPoint == nullptr)
+ {
+ for (GenTreeArgList* args = call->gtCallLateArgs; args; args = args->Rest())
+ {
+ tmp = args->Current();
+ if ((tmp->OperGet() == GT_PUTARG_REG) || (tmp->OperGet() == GT_PUTARG_STK))
+ {
+ // found it
+ insertionPoint = tmp;
+ break;
+ }
+ }
+
+ // If there are no args, insert before the call node
+ if (insertionPoint == nullptr)
+ {
+ insertionPoint = call;
+ }
+ }
+ }
+
+ assert(insertionPoint != nullptr);
+ GenTreePtr profHookNode = new (comp, GT_PROF_HOOK) GenTree(GT_PROF_HOOK, TYP_VOID);
+ BlockRange().InsertBefore(insertionPoint, profHookNode);
+}
+
+// Lower fast tail call implemented as epilog+jmp.
+// Also inserts PInvoke method epilog if required.
+void Lowering::LowerFastTailCall(GenTreeCall* call)
+{
+#if FEATURE_FASTTAILCALL
+ // Tail call restrictions i.e. conditions under which tail prefix is ignored.
+ // Most of these checks are already done by importer or fgMorphTailCall().
+ // This serves as a double sanity check.
+ assert((comp->info.compFlags & CORINFO_FLG_SYNCH) == 0); // tail calls from synchronized methods
+ assert(!comp->opts.compNeedSecurityCheck); // tail call from methods that need security check
+ assert(!call->IsUnmanaged()); // tail calls to unamanaged methods
+ assert(!comp->compLocallocUsed); // tail call from methods that also do localloc
+ assert(!comp->getNeedsGSSecurityCookie()); // jit64 compat: tail calls from methods that need GS check
+
+ // We expect to see a call that meets the following conditions
+ assert(call->IsFastTailCall());
+
+ // VM cannot use return address hijacking when A() and B() tail call each
+ // other in mutual recursion. Therefore, this block is reachable through
+ // a GC-safe point or the whole method is marked as fully interruptible.
+ //
+ // TODO-Cleanup:
+ // optReachWithoutCall() depends on the fact that loop headers blocks
+ // will have a block number > fgLastBB. These loop headers gets added
+ // after dominator computation and get skipped by OptReachWithoutCall().
+ // The below condition cannot be asserted in lower because fgSimpleLowering()
+ // can add a new basic block for range check failure which becomes
+ // fgLastBB with block number > loop header block number.
+ // assert((comp->compCurBB->bbFlags & BBF_GC_SAFE_POINT) ||
+ // !comp->optReachWithoutCall(comp->fgFirstBB, comp->compCurBB) || comp->genInterruptible);
+
+ // If PInvokes are in-lined, we have to remember to execute PInvoke method epilog anywhere that
+ // a method returns. This is a case of caller method has both PInvokes and tail calls.
+ if (comp->info.compCallUnmanaged)
+ {
+ InsertPInvokeMethodEpilog(comp->compCurBB DEBUGARG(call));
+ }
+
+ // Args for tail call are setup in incoming arg area. The gc-ness of args of
+ // caller and callee (which being tail called) may not match. Therefore, everything
+ // from arg setup until the epilog need to be non-interuptible by GC. This is
+ // achieved by inserting GT_START_NONGC before the very first GT_PUTARG_STK node
+ // of call is setup. Note that once a stack arg is setup, it cannot have nested
+ // calls subsequently in execution order to setup other args, because the nested
+ // call could over-write the stack arg that is setup earlier.
+ GenTreePtr firstPutArgStk = nullptr;
+ GenTreeArgList* args;
+ ArrayStack<GenTree*> putargs(comp);
+
+ for (args = call->gtCallArgs; args; args = args->Rest())
+ {
+ GenTreePtr tmp = args->Current();
+ if (tmp->OperGet() == GT_PUTARG_STK)
+ {
+ putargs.Push(tmp);
+ }
+ }
+
+ for (args = call->gtCallLateArgs; args; args = args->Rest())
+ {
+ GenTreePtr tmp = args->Current();
+ if (tmp->OperGet() == GT_PUTARG_STK)
+ {
+ putargs.Push(tmp);
+ }
+ }
+
+ if (putargs.Height() > 0)
+ {
+ firstPutArgStk = putargs.Bottom();
+ }
+
+ // If we have a putarg_stk node, also count the number of non-standard args the
+ // call node has. Note that while determining whether a tail call can be fast
+ // tail called, we don't count non-standard args (passed in R10 or R11) since they
+ // don't contribute to outgoing arg space. These non-standard args are not
+ // accounted in caller's arg count but accounted in callee's arg count after
+ // fgMorphArgs(). Therefore, exclude callee's non-standard args while mapping
+ // callee's stack arg num to corresponding caller's stack arg num.
+ unsigned calleeNonStandardArgCount = call->GetNonStandardAddedArgCount(comp);
+
+ // Say Caller(a, b, c, d, e) fast tail calls Callee(e, d, c, b, a)
+ // i.e. passes its arguments in reverse to Callee. During call site
+ // setup, after computing argument side effects, stack args are setup
+ // first and reg args next. In the above example, both Callers and
+ // Callee stack args (e and a respectively) share the same stack slot
+ // and are alive at the same time. The act of setting up Callee's
+ // stack arg will over-write the stack arg of Caller and if there are
+ // further uses of Caller stack arg we have to make sure that we move
+ // it to a temp before over-writing its slot and use temp in place of
+ // the corresponding Caller stack arg.
+ //
+ // For the above example, conceptually this is what is done
+ // tmp = e;
+ // Stack slot of e = a
+ // R9 = b, R8 = c, RDx = d
+ // RCX = tmp
+ //
+ // The below logic is meant to detect cases like this and introduce
+ // temps to set up args correctly for Callee.
+
+ for (int i = 0; i < putargs.Height(); i++)
+ {
+ GenTreePtr putArgStkNode = putargs.Bottom(i);
+
+ assert(putArgStkNode->OperGet() == GT_PUTARG_STK);
+
+ // Get the caller arg num corresponding to this callee arg.
+ // Note that these two args share the same stack slot. Therefore,
+ // if there are further uses of corresponding caller arg, we need
+ // to move it to a temp and use the temp in this call tree.
+ //
+ // Note that Caller is guaranteed to have a param corresponding to
+ // this Callee's arg since fast tail call mechanism counts the
+ // stack slots required for both Caller and Callee for passing params
+ // and allow fast tail call only if stack slots required by Caller >=
+ // Callee.
+ fgArgTabEntryPtr argTabEntry = comp->gtArgEntryByNode(call, putArgStkNode);
+ assert(argTabEntry);
+ unsigned callerArgNum = argTabEntry->argNum - calleeNonStandardArgCount;
+ noway_assert(callerArgNum < comp->info.compArgsCount);
+
+ unsigned callerArgLclNum = callerArgNum;
+ LclVarDsc* callerArgDsc = comp->lvaTable + callerArgLclNum;
+ if (callerArgDsc->lvPromoted)
+ {
+ callerArgLclNum =
+ callerArgDsc->lvFieldLclStart; // update the callerArgNum to the promoted struct field's lclNum
+ callerArgDsc = comp->lvaTable + callerArgLclNum;
+ }
+ noway_assert(callerArgDsc->lvIsParam);
+
+ // Start searching in execution order list till we encounter call node
+ unsigned tmpLclNum = BAD_VAR_NUM;
+ var_types tmpType = TYP_UNDEF;
+ for (GenTreePtr treeNode = putArgStkNode->gtNext; treeNode != call; treeNode = treeNode->gtNext)
+ {
+ if (treeNode->OperIsLocal() || treeNode->OperIsLocalAddr())
+ {
+ // This should neither be a GT_REG_VAR nor GT_PHI_ARG.
+ assert((treeNode->OperGet() != GT_REG_VAR) && (treeNode->OperGet() != GT_PHI_ARG));
+
+ GenTreeLclVarCommon* lcl = treeNode->AsLclVarCommon();
+ LclVarDsc* lclVar = &comp->lvaTable[lcl->gtLclNum];
+
+ // Fast tail calling criteria permits passing of structs of size 1, 2, 4 and 8 as args.
+ // It is possible that the callerArgLclNum corresponds to such a struct whose stack slot
+ // is getting over-written by setting up of a stack arg and there are further uses of
+ // any of its fields if such a struct is type-dependently promoted. In this case too
+ // we need to introduce a temp.
+ if ((lcl->gtLclNum == callerArgNum) || (lcl->gtLclNum == callerArgLclNum))
+ {
+ // Create tmp and use it in place of callerArgDsc
+ if (tmpLclNum == BAD_VAR_NUM)
+ {
+ tmpLclNum = comp->lvaGrabTemp(
+ true DEBUGARG("Fast tail call lowering is creating a new local variable"));
+ comp->lvaSortAgain = true;
+ tmpType = genActualType(callerArgDsc->lvaArgType());
+ comp->lvaTable[tmpLclNum].lvType = tmpType;
+ comp->lvaTable[tmpLclNum].lvRefCnt = 1;
+ }
+
+ lcl->SetLclNum(tmpLclNum);
+ lcl->SetOper(GT_LCL_VAR);
+ }
+ }
+ }
+
+ // If we have created a temp, insert an embedded assignment stmnt before
+ // the first putargStkNode i.e.
+ // tmpLcl = CallerArg
+ if (tmpLclNum != BAD_VAR_NUM)
+ {
+ assert(tmpType != TYP_UNDEF);
+ GenTreeLclVar* local =
+ new (comp, GT_LCL_VAR) GenTreeLclVar(GT_LCL_VAR, tmpType, callerArgLclNum, BAD_IL_OFFSET);
+ GenTree* assignExpr = comp->gtNewTempAssign(tmpLclNum, local);
+ BlockRange().InsertBefore(firstPutArgStk, LIR::SeqTree(comp, assignExpr));
+ }
+ }
+
+ // Insert GT_START_NONGC node before the first GT_PUTARG_STK node.
+ // Note that if there are no args to be setup on stack, no need to
+ // insert GT_START_NONGC node.
+ GenTreePtr startNonGCNode = nullptr;
+ if (firstPutArgStk != nullptr)
+ {
+ startNonGCNode = new (comp, GT_START_NONGC) GenTree(GT_START_NONGC, TYP_VOID);
+ BlockRange().InsertBefore(firstPutArgStk, startNonGCNode);
+
+ // Gc-interruptability in the following case:
+ // foo(a, b, c, d, e) { bar(a, b, c, d, e); }
+ // bar(a, b, c, d, e) { foo(a, b, d, d, e); }
+ //
+ // Since the instruction group starting from the instruction that sets up first
+ // stack arg to the end of the tail call is marked as non-gc interruptible,
+ // this will form a non-interruptible tight loop causing gc-starvation. To fix
+ // this we insert GT_NO_OP as embedded stmt before GT_START_NONGC, if the method
+ // has a single basic block and is not a GC-safe point. The presence of a single
+ // nop outside non-gc interruptible region will prevent gc starvation.
+ if ((comp->fgBBcount == 1) && !(comp->compCurBB->bbFlags & BBF_GC_SAFE_POINT))
+ {
+ assert(comp->fgFirstBB == comp->compCurBB);
+ GenTreePtr noOp = new (comp, GT_NO_OP) GenTree(GT_NO_OP, TYP_VOID);
+ BlockRange().InsertBefore(startNonGCNode, noOp);
+ }
+ }
+
+ // Insert GT_PROF_HOOK node to emit profiler tail call hook. This should be
+ // inserted before the args are setup but after the side effects of args are
+ // computed. That is, GT_PROF_HOOK node needs to be inserted before GT_START_NONGC
+ // node if one exists.
+ if (comp->compIsProfilerHookNeeded())
+ {
+ InsertProfTailCallHook(call, startNonGCNode);
+ }
+
+#else // !FEATURE_FASTTAILCALL
+
+ // Platform choose not to implement fast tail call mechanism.
+ // In such a case we should never be reaching this method as
+ // the expectation is that IsTailCallViaHelper() will always
+ // be true on such a platform.
+ unreached();
+#endif
+}
+
+//------------------------------------------------------------------------
+// LowerTailCallViaHelper: lower a call via the tailcall helper. Morph
+// has already inserted tailcall helper special arguments. This function
+// inserts actual data for some placeholders.
+//
+// For AMD64, lower
+// tail.call(void* copyRoutine, void* dummyArg, ...)
+// as
+// Jit_TailCall(void* copyRoutine, void* callTarget, ...)
+//
+// For x86, lower
+// tail.call(<function args>, int numberOfOldStackArgs, int dummyNumberOfNewStackArgs, int flags, void* dummyArg)
+// as
+// JIT_TailCall(<function args>, int numberOfOldStackArgsWords, int numberOfNewStackArgsWords, int flags, void*
+// callTarget)
+// Note that the special arguments are on the stack, whereas the function arguments follow the normal convention.
+//
+// Also inserts PInvoke method epilog if required.
+//
+// Arguments:
+// call - The call node
+// callTarget - The real call target. This is used to replace the dummyArg during lowering.
+//
+// Return Value:
+// Returns control expression tree for making a call to helper Jit_TailCall.
+//
+GenTree* Lowering::LowerTailCallViaHelper(GenTreeCall* call, GenTree* callTarget)
+{
+ // Tail call restrictions i.e. conditions under which tail prefix is ignored.
+ // Most of these checks are already done by importer or fgMorphTailCall().
+ // This serves as a double sanity check.
+ assert((comp->info.compFlags & CORINFO_FLG_SYNCH) == 0); // tail calls from synchronized methods
+ assert(!comp->opts.compNeedSecurityCheck); // tail call from methods that need security check
+ assert(!call->IsUnmanaged()); // tail calls to unamanaged methods
+ assert(!comp->compLocallocUsed); // tail call from methods that also do localloc
+ assert(!comp->getNeedsGSSecurityCookie()); // jit64 compat: tail calls from methods that need GS check
+
+ // We expect to see a call that meets the following conditions
+ assert(call->IsTailCallViaHelper());
+ assert(callTarget != nullptr);
+
+ // The TailCall helper call never returns to the caller and is not GC interruptible.
+ // Therefore the block containing the tail call should be a GC safe point to avoid
+ // GC starvation.
+ assert(comp->compCurBB->bbFlags & BBF_GC_SAFE_POINT);
+
+ // If PInvokes are in-lined, we have to remember to execute PInvoke method epilog anywhere that
+ // a method returns. This is a case of caller method has both PInvokes and tail calls.
+ if (comp->info.compCallUnmanaged)
+ {
+ InsertPInvokeMethodEpilog(comp->compCurBB DEBUGARG(call));
+ }
+
+ // Remove gtCallAddr from execution order if present.
+ if (call->gtCallType == CT_INDIRECT)
+ {
+ assert(call->gtCallAddr != nullptr);
+
+ bool isClosed;
+ LIR::ReadOnlyRange callAddrRange = BlockRange().GetTreeRange(call->gtCallAddr, &isClosed);
+ assert(isClosed);
+
+ BlockRange().Remove(std::move(callAddrRange));
+ }
+
+ // The callTarget tree needs to be sequenced.
+ LIR::Range callTargetRange = LIR::SeqTree(comp, callTarget);
+
+ fgArgTabEntry* argEntry;
+
+#if defined(_TARGET_AMD64_)
+
+// For AMD64, first argument is CopyRoutine and second argument is a place holder node.
+
+#ifdef DEBUG
+ argEntry = comp->gtArgEntryByArgNum(call, 0);
+ assert(argEntry != nullptr);
+ assert(argEntry->node->gtOper == GT_PUTARG_REG);
+ GenTree* firstArg = argEntry->node->gtOp.gtOp1;
+ assert(firstArg->gtOper == GT_CNS_INT);
+#endif
+
+ // Replace second arg by callTarget.
+ argEntry = comp->gtArgEntryByArgNum(call, 1);
+ assert(argEntry != nullptr);
+ assert(argEntry->node->gtOper == GT_PUTARG_REG);
+ GenTree* secondArg = argEntry->node->gtOp.gtOp1;
+
+ BlockRange().InsertAfter(secondArg, std::move(callTargetRange));
+
+ bool isClosed;
+ LIR::ReadOnlyRange secondArgRange = BlockRange().GetTreeRange(secondArg, &isClosed);
+ assert(isClosed);
+
+ BlockRange().Remove(std::move(secondArgRange));
+
+ argEntry->node->gtOp.gtOp1 = callTarget;
+
+#elif defined(_TARGET_X86_)
+
+ // Verify the special args are what we expect, and replace the dummy args with real values.
+ // We need to figure out the size of the outgoing stack arguments, not including the special args.
+ // The number of 4-byte words is passed to the helper for the incoming and outgoing argument sizes.
+ // This number is exactly the next slot number in the call's argument info struct.
+ unsigned nNewStkArgsWords = call->fgArgInfo->GetNextSlotNum();
+ assert(nNewStkArgsWords >= 4); // There must be at least the four special stack args.
+ nNewStkArgsWords -= 4;
+
+ unsigned numArgs = call->fgArgInfo->ArgCount();
+
+ // arg 0 == callTarget.
+ argEntry = comp->gtArgEntryByArgNum(call, numArgs - 1);
+ assert(argEntry != nullptr);
+ assert(argEntry->node->gtOper == GT_PUTARG_STK);
+ GenTree* arg0 = argEntry->node->gtOp.gtOp1;
+
+ BlockRange().InsertAfter(arg0, std::move(callTargetRange));
+
+ bool isClosed;
+ LIR::ReadOnlyRange secondArgRange = BlockRange().GetTreeRange(arg0, &isClosed);
+ assert(isClosed);
+
+ argEntry->node->gtOp.gtOp1 = callTarget;
+
+ // arg 1 == flags
+ argEntry = comp->gtArgEntryByArgNum(call, numArgs - 2);
+ assert(argEntry != nullptr);
+ assert(argEntry->node->gtOper == GT_PUTARG_STK);
+ GenTree* arg1 = argEntry->node->gtOp.gtOp1;
+ assert(arg1->gtOper == GT_CNS_INT);
+
+ ssize_t tailCallHelperFlags = 1 | // always restore EDI,ESI,EBX
+ (call->IsVirtualStub() ? 0x2 : 0x0); // Stub dispatch flag
+ arg1->gtIntCon.gtIconVal = tailCallHelperFlags;
+
+ // arg 2 == numberOfNewStackArgsWords
+ argEntry = comp->gtArgEntryByArgNum(call, numArgs - 3);
+ assert(argEntry != nullptr);
+ assert(argEntry->node->gtOper == GT_PUTARG_STK);
+ GenTree* arg2 = argEntry->node->gtOp.gtOp1;
+ assert(arg2->gtOper == GT_CNS_INT);
+
+ arg2->gtIntCon.gtIconVal = nNewStkArgsWords;
+
+#ifdef DEBUG
+ // arg 3 == numberOfOldStackArgsWords
+ argEntry = comp->gtArgEntryByArgNum(call, numArgs - 4);
+ assert(argEntry != nullptr);
+ assert(argEntry->node->gtOper == GT_PUTARG_STK);
+ GenTree* arg3 = argEntry->node->gtOp.gtOp1;
+ assert(arg3->gtOper == GT_CNS_INT);
+#endif // DEBUG
+
+#else
+ NYI("LowerTailCallViaHelper");
+#endif // _TARGET_*
+
+ // Transform this call node into a call to Jit tail call helper.
+ call->gtCallType = CT_HELPER;
+ call->gtCallMethHnd = comp->eeFindHelper(CORINFO_HELP_TAILCALL);
+ call->gtFlags &= ~GTF_CALL_VIRT_KIND_MASK;
+
+ // Lower this as if it were a pure helper call.
+ call->gtCallMoreFlags &= ~(GTF_CALL_M_TAILCALL | GTF_CALL_M_TAILCALL_VIA_HELPER);
+ GenTree* result = LowerDirectCall(call);
+
+ // Now add back tail call flags for identifying this node as tail call dispatched via helper.
+ call->gtCallMoreFlags |= GTF_CALL_M_TAILCALL | GTF_CALL_M_TAILCALL_VIA_HELPER;
+
+ // Insert profiler tail call hook if needed.
+ // Since we don't know the insertion point, pass null for second param.
+ if (comp->compIsProfilerHookNeeded())
+ {
+ InsertProfTailCallHook(call, nullptr);
+ }
+
+ return result;
+}
+
+// Lower "jmp <method>" tail call to insert PInvoke method epilog if required.
+void Lowering::LowerJmpMethod(GenTree* jmp)
+{
+ assert(jmp->OperGet() == GT_JMP);
+
+ JITDUMP("lowering GT_JMP\n");
+ DISPNODE(jmp);
+ JITDUMP("============");
+
+ // If PInvokes are in-lined, we have to remember to execute PInvoke method epilog anywhere that
+ // a method returns.
+ if (comp->info.compCallUnmanaged)
+ {
+ InsertPInvokeMethodEpilog(comp->compCurBB DEBUGARG(jmp));
+ }
+}
+
+// Lower GT_RETURN node to insert PInvoke method epilog if required.
+void Lowering::LowerRet(GenTree* ret)
+{
+ assert(ret->OperGet() == GT_RETURN);
+
+ JITDUMP("lowering GT_RETURN\n");
+ DISPNODE(ret);
+ JITDUMP("============");
+
+ // Method doing PInvokes has exactly one return block unless it has tail calls.
+ if (comp->info.compCallUnmanaged && (comp->compCurBB == comp->genReturnBB))
+ {
+ InsertPInvokeMethodEpilog(comp->compCurBB DEBUGARG(ret));
+ }
+}
+
+GenTree* Lowering::LowerDirectCall(GenTreeCall* call)
+{
+ noway_assert(call->gtCallType == CT_USER_FUNC || call->gtCallType == CT_HELPER);
+
+ // Don't support tail calling helper methods.
+ // But we might encounter tail calls dispatched via JIT helper appear as a tail call to helper.
+ noway_assert(!call->IsTailCall() || call->IsTailCallViaHelper() || call->gtCallType == CT_USER_FUNC);
+
+ // Non-virtual direct/indirect calls: Work out if the address of the
+ // call is known at JIT time. If not it is either an indirect call
+ // or the address must be accessed via an single/double indirection.
+
+ void* addr;
+ InfoAccessType accessType;
+ CorInfoHelpFunc helperNum = comp->eeGetHelperNum(call->gtCallMethHnd);
+
+#ifdef FEATURE_READYTORUN_COMPILER
+ if (call->gtEntryPoint.addr != nullptr)
+ {
+ accessType = call->gtEntryPoint.accessType;
+ addr = call->gtEntryPoint.addr;
+ }
+ else
+#endif
+ if (call->gtCallType == CT_HELPER)
+ {
+ noway_assert(helperNum != CORINFO_HELP_UNDEF);
+
+ // the convention on getHelperFtn seems to be (it's not documented)
+ // that it returns an address or if it returns null, pAddr is set to
+ // another address, which requires an indirection
+ void* pAddr;
+ addr = comp->info.compCompHnd->getHelperFtn(helperNum, (void**)&pAddr);
+
+ if (addr != nullptr)
+ {
+ accessType = IAT_VALUE;
+ }
+ else
+ {
+ accessType = IAT_PVALUE;
+ addr = pAddr;
+ }
+ }
+ else
+ {
+ noway_assert(helperNum == CORINFO_HELP_UNDEF);
+
+ CORINFO_ACCESS_FLAGS aflags = CORINFO_ACCESS_ANY;
+
+ if (call->IsSameThis())
+ {
+ aflags = (CORINFO_ACCESS_FLAGS)(aflags | CORINFO_ACCESS_THIS);
+ }
+
+ if (!call->NeedsNullCheck())
+ {
+ aflags = (CORINFO_ACCESS_FLAGS)(aflags | CORINFO_ACCESS_NONNULL);
+ }
+
+ CORINFO_CONST_LOOKUP addrInfo;
+ comp->info.compCompHnd->getFunctionEntryPoint(call->gtCallMethHnd, &addrInfo, aflags);
+
+ accessType = addrInfo.accessType;
+ addr = addrInfo.addr;
+ }
+
+ GenTree* result = nullptr;
+ switch (accessType)
+ {
+ case IAT_VALUE:
+ // Non-virtual direct call to known address
+ if (!IsCallTargetInRange(addr) || call->IsTailCall())
+ {
+ result = AddrGen(addr);
+ }
+ else
+ {
+ // a direct call within range of hardware relative call instruction
+ // stash the address for codegen
+ call->gtDirectCallAddress = addr;
+ }
+ break;
+
+ case IAT_PVALUE:
+ {
+ // Non-virtual direct calls to addresses accessed by
+ // a single indirection.
+ GenTree* cellAddr = AddrGen(addr);
+ GenTree* indir = Ind(cellAddr);
+
+#ifdef FEATURE_READYTORUN_COMPILER
+#ifdef _TARGET_ARM64_
+ // For arm64, we dispatch code same as VSD using X11 for indirection cell address,
+ // which ZapIndirectHelperThunk expects.
+ if (call->IsR2RRelativeIndir())
+ {
+ cellAddr->gtRegNum = REG_R2R_INDIRECT_PARAM;
+ indir->gtRegNum = REG_JUMP_THUNK_PARAM;
+ }
+#endif
+#endif
+ result = indir;
+ break;
+ }
+
+ case IAT_PPVALUE:
+ // Non-virtual direct calls to addresses accessed by
+ // a double indirection.
+ //
+ // Double-indirection. Load the address into a register
+ // and call indirectly through the register
+ noway_assert(helperNum == CORINFO_HELP_UNDEF);
+ result = AddrGen(addr);
+ result = Ind(Ind(result));
+ break;
+
+ default:
+ noway_assert(!"Bad accessType");
+ break;
+ }
+
+ return result;
+}
+
+GenTree* Lowering::LowerDelegateInvoke(GenTreeCall* call)
+{
+ noway_assert(call->gtCallType == CT_USER_FUNC);
+
+ assert((comp->info.compCompHnd->getMethodAttribs(call->gtCallMethHnd) &
+ (CORINFO_FLG_DELEGATE_INVOKE | CORINFO_FLG_FINAL)) == (CORINFO_FLG_DELEGATE_INVOKE | CORINFO_FLG_FINAL));
+
+ GenTree* thisArgNode;
+ if (call->IsTailCallViaHelper())
+ {
+#ifdef _TARGET_X86_ // x86 tailcall via helper follows normal calling convention, but with extra stack args.
+ const unsigned argNum = 0;
+#else // !_TARGET_X86_
+ // In case of helper dispatched tail calls, "thisptr" will be the third arg.
+ // The first two args are: real call target and addr of args copy routine.
+ const unsigned argNum = 2;
+#endif // !_TARGET_X86_
+
+ fgArgTabEntryPtr thisArgTabEntry = comp->gtArgEntryByArgNum(call, argNum);
+ thisArgNode = thisArgTabEntry->node;
+ }
+ else
+ {
+ thisArgNode = comp->gtGetThisArg(call);
+ }
+
+ assert(thisArgNode->gtOper == GT_PUTARG_REG);
+ GenTree* originalThisExpr = thisArgNode->gtOp.gtOp1;
+
+ // We're going to use the 'this' expression multiple times, so make a local to copy it.
+
+ unsigned lclNum;
+
+#ifdef _TARGET_X86_
+ if (call->IsTailCallViaHelper() && originalThisExpr->IsLocal())
+ {
+ // For ordering purposes for the special tailcall arguments on x86, we forced the
+ // 'this' pointer in this case to a local in Compiler::fgMorphTailCall().
+ // We could possibly use this case to remove copies for all architectures and non-tailcall
+ // calls by creating a new lcl var or lcl field reference, as is done in the
+ // LowerVirtualVtableCall() code.
+ assert(originalThisExpr->OperGet() == GT_LCL_VAR);
+ lclNum = originalThisExpr->AsLclVarCommon()->GetLclNum();
+ }
+ else
+#endif // _TARGET_X86_
+ {
+ unsigned delegateInvokeTmp = comp->lvaGrabTemp(true DEBUGARG("delegate invoke call"));
+
+ LIR::Use thisExprUse(BlockRange(), &thisArgNode->gtOp.gtOp1, thisArgNode);
+ thisExprUse.ReplaceWithLclVar(comp, m_block->getBBWeight(comp), delegateInvokeTmp);
+
+ originalThisExpr = thisExprUse.Def(); // it's changed; reload it.
+ lclNum = delegateInvokeTmp;
+ }
+
+ // replace original expression feeding into thisPtr with
+ // [originalThis + offsetOfDelegateInstance]
+
+ GenTree* newThisAddr = new (comp, GT_LEA)
+ GenTreeAddrMode(TYP_REF, originalThisExpr, nullptr, 0, comp->eeGetEEInfo()->offsetOfDelegateInstance);
+
+ GenTree* newThis = comp->gtNewOperNode(GT_IND, TYP_REF, newThisAddr);
+
+ BlockRange().InsertAfter(originalThisExpr, newThisAddr, newThis);
+
+ thisArgNode->gtOp.gtOp1 = newThis;
+
+ // the control target is
+ // [originalThis + firstTgtOffs]
+
+ GenTree* base = new (comp, GT_LCL_VAR) GenTreeLclVar(originalThisExpr->TypeGet(), lclNum, BAD_IL_OFFSET);
+
+ unsigned targetOffs = comp->eeGetEEInfo()->offsetOfDelegateFirstTarget;
+ GenTree* result = new (comp, GT_LEA) GenTreeAddrMode(TYP_REF, base, nullptr, 0, targetOffs);
+ GenTree* callTarget = Ind(result);
+
+ // don't need to sequence and insert this tree, caller will do it
+
+ return callTarget;
+}
+
+GenTree* Lowering::LowerIndirectNonvirtCall(GenTreeCall* call)
+{
+#ifdef _TARGET_X86_
+ if (call->gtCallCookie != nullptr)
+ {
+ NYI_X86("Morphing indirect non-virtual call with non-standard args");
+ }
+#endif
+
+ // Indirect cookie calls gets transformed by fgMorphArgs as indirect call with non-standard args.
+ // Hence we should never see this type of call in lower.
+
+ noway_assert(call->gtCallCookie == nullptr);
+
+ return nullptr;
+}
+
+//------------------------------------------------------------------------
+// CreateReturnTrapSeq: Create a tree to perform a "return trap", used in PInvoke
+// epilogs to invoke a GC under a condition. The return trap checks some global
+// location (the runtime tells us where that is and how many indirections to make),
+// then, based on the result, conditionally calls a GC helper. We use a special node
+// for this because at this time (late in the compilation phases), introducing flow
+// is tedious/difficult.
+//
+// This is used for PInvoke inlining.
+//
+// Return Value:
+// Code tree to perform the action.
+//
+GenTree* Lowering::CreateReturnTrapSeq()
+{
+ // The GT_RETURNTRAP node expands to this:
+ // if (g_TrapReturningThreads)
+ // {
+ // RareDisablePreemptiveGC();
+ // }
+
+ // The only thing to do here is build up the expression that evaluates 'g_TrapReturningThreads'.
+
+ void* pAddrOfCaptureThreadGlobal = nullptr;
+ LONG* addrOfCaptureThreadGlobal = comp->info.compCompHnd->getAddrOfCaptureThreadGlobal(&pAddrOfCaptureThreadGlobal);
+
+ GenTree* testTree;
+ if (addrOfCaptureThreadGlobal != nullptr)
+ {
+ testTree = Ind(AddrGen(addrOfCaptureThreadGlobal));
+ }
+ else
+ {
+ testTree = Ind(Ind(AddrGen(pAddrOfCaptureThreadGlobal)));
+ }
+ return comp->gtNewOperNode(GT_RETURNTRAP, TYP_INT, testTree);
+}
+
+//------------------------------------------------------------------------
+// SetGCState: Create a tree that stores the given constant (0 or 1) into the
+// thread's GC state field.
+//
+// This is used for PInvoke inlining.
+//
+// Arguments:
+// state - constant (0 or 1) to store into the thread's GC state field.
+//
+// Return Value:
+// Code tree to perform the action.
+//
+GenTree* Lowering::SetGCState(int state)
+{
+ // Thread.offsetOfGcState = 0/1
+
+ assert(state == 0 || state == 1);
+
+ const CORINFO_EE_INFO* pInfo = comp->eeGetEEInfo();
+
+ GenTree* base = new (comp, GT_LCL_VAR) GenTreeLclVar(TYP_I_IMPL, comp->info.compLvFrameListRoot, -1);
+
+ GenTree* storeGcState = new (comp, GT_STOREIND)
+ GenTreeStoreInd(TYP_BYTE,
+ new (comp, GT_LEA) GenTreeAddrMode(TYP_I_IMPL, base, nullptr, 1, pInfo->offsetOfGCState),
+ new (comp, GT_CNS_INT) GenTreeIntCon(TYP_BYTE, state));
+
+ return storeGcState;
+}
+
+//------------------------------------------------------------------------
+// CreateFrameLinkUpdate: Create a tree that either links or unlinks the
+// locally-allocated InlinedCallFrame from the Frame list.
+//
+// This is used for PInvoke inlining.
+//
+// Arguments:
+// action - whether to link (push) or unlink (pop) the Frame
+//
+// Return Value:
+// Code tree to perform the action.
+//
+GenTree* Lowering::CreateFrameLinkUpdate(FrameLinkAction action)
+{
+ const CORINFO_EE_INFO* pInfo = comp->eeGetEEInfo();
+ const CORINFO_EE_INFO::InlinedCallFrameInfo& callFrameInfo = pInfo->inlinedCallFrameInfo;
+
+ GenTree* TCB = new (comp, GT_LCL_VAR) GenTreeLclVar(GT_LCL_VAR, TYP_I_IMPL, comp->info.compLvFrameListRoot,
+ (IL_OFFSET)-1); // cast to resolve ambiguity.
+
+ // Thread->m_pFrame
+ GenTree* addr = new (comp, GT_LEA) GenTreeAddrMode(TYP_I_IMPL, TCB, nullptr, 1, pInfo->offsetOfThreadFrame);
+
+ GenTree* data = nullptr;
+
+ if (action == PushFrame)
+ {
+ // Thread->m_pFrame = &inlinedCallFrame;
+ data = new (comp, GT_LCL_FLD_ADDR)
+ GenTreeLclFld(GT_LCL_FLD_ADDR, TYP_BYREF, comp->lvaInlinedPInvokeFrameVar, callFrameInfo.offsetOfFrameVptr);
+ }
+ else
+ {
+ assert(action == PopFrame);
+ // Thread->m_pFrame = inlinedCallFrame.m_pNext;
+
+ data = new (comp, GT_LCL_FLD) GenTreeLclFld(GT_LCL_FLD, TYP_BYREF, comp->lvaInlinedPInvokeFrameVar,
+ pInfo->inlinedCallFrameInfo.offsetOfFrameLink);
+ }
+ GenTree* storeInd = new (comp, GT_STOREIND) GenTreeStoreInd(TYP_I_IMPL, addr, data);
+ return storeInd;
+}
+
+//------------------------------------------------------------------------
+// InsertPInvokeMethodProlog: Create the code that runs at the start of
+// every method that has PInvoke calls.
+//
+// Initialize the TCB local and the InlinedCallFrame object. Then link ("push")
+// the InlinedCallFrame object on the Frame chain. The layout of InlinedCallFrame
+// is defined in vm/frames.h. See also vm/jitinterface.cpp for more information.
+// The offsets of these fields is returned by the VM in a call to ICorStaticInfo::getEEInfo().
+//
+// The (current) layout is as follows:
+//
+// 64-bit 32-bit CORINFO_EE_INFO
+// offset offset field name offset when set
+// -----------------------------------------------------------------------------------------
+// +00h +00h GS cookie offsetOfGSCookie
+// +08h +04h vptr for class InlinedCallFrame offsetOfFrameVptr method prolog
+// +10h +08h m_Next offsetOfFrameLink method prolog
+// +18h +0Ch m_Datum offsetOfCallTarget call site
+// +20h n/a m_StubSecretArg not set by JIT
+// +28h +10h m_pCallSiteSP offsetOfCallSiteSP x86: call site, and zeroed in method
+// prolog;
+// non-x86: method prolog (SP remains
+// constant in function, after prolog: no
+// localloc and PInvoke in same function)
+// +30h +14h m_pCallerReturnAddress offsetOfReturnAddress call site
+// +38h +18h m_pCalleeSavedFP offsetOfCalleeSavedFP not set by JIT
+// +1Ch JIT retval spill area (int) before call_gc ???
+// +20h JIT retval spill area (long) before call_gc ???
+// +24h Saved value of EBP method prolog ???
+//
+// Note that in the VM, InlinedCallFrame is a C++ class whose objects have a 'this' pointer that points
+// to the InlinedCallFrame vptr (the 2nd field listed above), and the GS cookie is stored *before*
+// the object. When we link the InlinedCallFrame onto the Frame chain, we must point at this location,
+// and not at the beginning of the InlinedCallFrame local, which is actually the GS cookie.
+//
+// Return Value:
+// none
+//
+void Lowering::InsertPInvokeMethodProlog()
+{
+ noway_assert(comp->info.compCallUnmanaged);
+ noway_assert(comp->lvaInlinedPInvokeFrameVar != BAD_VAR_NUM);
+
+ if (comp->opts.ShouldUsePInvokeHelpers())
+ {
+ return;
+ }
+
+ JITDUMP("======= Inserting PInvoke method prolog\n");
+
+ LIR::Range& firstBlockRange = LIR::AsRange(comp->fgFirstBB);
+
+ const CORINFO_EE_INFO* pInfo = comp->eeGetEEInfo();
+ const CORINFO_EE_INFO::InlinedCallFrameInfo& callFrameInfo = pInfo->inlinedCallFrameInfo;
+
+ // First arg: &compiler->lvaInlinedPInvokeFrameVar + callFrameInfo.offsetOfFrameVptr
+
+ GenTree* frameAddr = new (comp, GT_LCL_FLD_ADDR)
+ GenTreeLclFld(GT_LCL_FLD_ADDR, TYP_BYREF, comp->lvaInlinedPInvokeFrameVar, callFrameInfo.offsetOfFrameVptr);
+
+ // Call runtime helper to fill in our InlinedCallFrame and push it on the Frame list:
+ // TCB = CORINFO_HELP_INIT_PINVOKE_FRAME(&symFrameStart, secretArg);
+ // for x86, don't pass the secretArg.
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef _TARGET_X86_
+ GenTreeArgList* argList = comp->gtNewArgList(frameAddr);
+#else // !_TARGET_X86_
+ GenTreeArgList* argList = comp->gtNewArgList(frameAddr, PhysReg(REG_SECRET_STUB_PARAM));
+#endif // !_TARGET_X86_
+
+ GenTree* call = comp->gtNewHelperCallNode(CORINFO_HELP_INIT_PINVOKE_FRAME, TYP_I_IMPL, 0, argList);
+
+ // some sanity checks on the frame list root vardsc
+ LclVarDsc* varDsc = &comp->lvaTable[comp->info.compLvFrameListRoot];
+ noway_assert(!varDsc->lvIsParam);
+ noway_assert(varDsc->lvType == TYP_I_IMPL);
+
+ GenTree* store =
+ new (comp, GT_STORE_LCL_VAR) GenTreeLclVar(GT_STORE_LCL_VAR, TYP_I_IMPL, comp->info.compLvFrameListRoot,
+ (IL_OFFSET)-1); // cast to resolve ambiguity.
+ store->gtOp.gtOp1 = call;
+ store->gtFlags |= GTF_VAR_DEF;
+
+ GenTree* insertionPoint = firstBlockRange.FirstNonPhiOrCatchArgNode();
+
+ comp->fgMorphTree(store);
+ firstBlockRange.InsertBefore(insertionPoint, LIR::SeqTree(comp, store));
+ DISPTREERANGE(firstBlockRange, store);
+
+#ifndef _TARGET_X86_ // For x86, this step is done at the call site (due to stack pointer not being static in the
+ // function).
+
+ // --------------------------------------------------------
+ // InlinedCallFrame.m_pCallSiteSP = @RSP;
+
+ GenTreeLclFld* storeSP = new (comp, GT_STORE_LCL_FLD)
+ GenTreeLclFld(GT_STORE_LCL_FLD, TYP_I_IMPL, comp->lvaInlinedPInvokeFrameVar, callFrameInfo.offsetOfCallSiteSP);
+ storeSP->gtOp1 = PhysReg(REG_SPBASE);
+
+ firstBlockRange.InsertBefore(insertionPoint, LIR::SeqTree(comp, storeSP));
+ DISPTREERANGE(firstBlockRange, storeSP);
+
+#endif // !_TARGET_X86_
+
+ // --------------------------------------------------------
+ // InlinedCallFrame.m_pCalleeSavedEBP = @RBP;
+
+ GenTreeLclFld* storeFP =
+ new (comp, GT_STORE_LCL_FLD) GenTreeLclFld(GT_STORE_LCL_FLD, TYP_I_IMPL, comp->lvaInlinedPInvokeFrameVar,
+ callFrameInfo.offsetOfCalleeSavedFP);
+ storeFP->gtOp1 = PhysReg(REG_FPBASE);
+
+ firstBlockRange.InsertBefore(insertionPoint, LIR::SeqTree(comp, storeFP));
+ DISPTREERANGE(firstBlockRange, storeFP);
+
+ // --------------------------------------------------------
+
+ if (comp->opts.eeFlags & CORJIT_FLG_IL_STUB)
+ {
+ // Push a frame - if we are NOT in an IL stub, this is done right before the call
+ // The init routine sets InlinedCallFrame's m_pNext, so we just set the thead's top-of-stack
+ GenTree* frameUpd = CreateFrameLinkUpdate(PushFrame);
+ firstBlockRange.InsertBefore(insertionPoint, LIR::SeqTree(comp, frameUpd));
+ DISPTREERANGE(firstBlockRange, frameUpd);
+ }
+}
+
+//------------------------------------------------------------------------
+// InsertPInvokeMethodEpilog: Code that needs to be run when exiting any method
+// that has PInvoke inlines. This needs to be inserted any place you can exit the
+// function: returns, tailcalls and jmps.
+//
+// Arguments:
+// returnBB - basic block from which a method can return
+// lastExpr - GenTree of the last top level stmnt of returnBB (debug only arg)
+//
+// Return Value:
+// Code tree to perform the action.
+//
+void Lowering::InsertPInvokeMethodEpilog(BasicBlock* returnBB DEBUGARG(GenTreePtr lastExpr))
+{
+ assert(returnBB != nullptr);
+ assert(comp->info.compCallUnmanaged);
+
+ if (comp->opts.ShouldUsePInvokeHelpers())
+ {
+ return;
+ }
+
+ JITDUMP("======= Inserting PInvoke method epilog\n");
+
+ // Method doing PInvoke calls has exactly one return block unless it has "jmp" or tail calls.
+ assert(((returnBB == comp->genReturnBB) && (returnBB->bbJumpKind == BBJ_RETURN)) ||
+ returnBB->endsWithTailCallOrJmp(comp));
+
+ LIR::Range& returnBlockRange = LIR::AsRange(returnBB);
+
+ GenTree* insertionPoint = returnBlockRange.LastNode();
+ assert(insertionPoint == lastExpr);
+
+ // Note: PInvoke Method Epilog (PME) needs to be inserted just before GT_RETURN, GT_JMP or GT_CALL node in execution
+ // order so that it is guaranteed that there will be no further PInvokes after that point in the method.
+ //
+ // Example1: GT_RETURN(op1) - say execution order is: Op1, GT_RETURN. After inserting PME, execution order would be
+ // Op1, PME, GT_RETURN
+ //
+ // Example2: GT_CALL(arg side effect computing nodes, Stk Args Setup, Reg Args setup). The execution order would be
+ // arg side effect computing nodes, Stk Args setup, Reg Args setup, GT_CALL
+ // After inserting PME execution order would be:
+ // arg side effect computing nodes, Stk Args setup, Reg Args setup, PME, GT_CALL
+ //
+ // Example3: GT_JMP. After inserting PME execution order would be: PME, GT_JMP
+ // That is after PME, args for GT_JMP call will be setup.
+
+ // TODO-Cleanup: setting GCState to 1 seems to be redundant as InsertPInvokeCallProlog will set it to zero before a
+ // PInvoke call and InsertPInvokeCallEpilog() will set it back to 1 after the PInvoke. Though this is redundant,
+ // it is harmeless.
+ // Note that liveness is artificially extending the life of compLvFrameListRoot var if the method being compiled has
+ // PInvokes. Deleting the below stmnt would cause an an assert in lsra.cpp::SetLastUses() since compLvFrameListRoot
+ // will be live-in to a BBJ_RETURN block without any uses. Long term we need to fix liveness for x64 case to
+ // properly extend the life of compLvFrameListRoot var.
+ //
+ // Thread.offsetOfGcState = 0/1
+ // That is [tcb + offsetOfGcState] = 1
+ GenTree* storeGCState = SetGCState(1);
+ returnBlockRange.InsertBefore(insertionPoint, LIR::SeqTree(comp, storeGCState));
+
+ if (comp->opts.eeFlags & CORJIT_FLG_IL_STUB)
+ {
+ // Pop the frame, in non-stubs we do this around each PInvoke call
+ GenTree* frameUpd = CreateFrameLinkUpdate(PopFrame);
+ returnBlockRange.InsertBefore(insertionPoint, LIR::SeqTree(comp, frameUpd));
+ }
+}
+
+//------------------------------------------------------------------------
+// InsertPInvokeCallProlog: Emit the call-site prolog for direct calls to unmanaged code.
+// It does all the necessary call-site setup of the InlinedCallFrame.
+//
+// Arguments:
+// call - the call for which we are inserting the PInvoke prolog.
+//
+// Return Value:
+// None.
+//
+void Lowering::InsertPInvokeCallProlog(GenTreeCall* call)
+{
+ JITDUMP("======= Inserting PInvoke call prolog\n");
+
+ GenTree* insertBefore = call;
+ if (call->gtCallType == CT_INDIRECT)
+ {
+ bool isClosed;
+ insertBefore = BlockRange().GetTreeRange(call->gtCallAddr, &isClosed).FirstNode();
+ assert(isClosed);
+ }
+
+ const CORINFO_EE_INFO::InlinedCallFrameInfo& callFrameInfo = comp->eeGetEEInfo()->inlinedCallFrameInfo;
+
+ gtCallTypes callType = (gtCallTypes)call->gtCallType;
+
+ noway_assert(comp->lvaInlinedPInvokeFrameVar != BAD_VAR_NUM);
+
+#if COR_JIT_EE_VERSION > 460
+ if (comp->opts.ShouldUsePInvokeHelpers())
+ {
+ // First argument is the address of the frame variable.
+ GenTree* frameAddr = new (comp, GT_LCL_VAR_ADDR)
+ GenTreeLclVar(GT_LCL_VAR_ADDR, TYP_BYREF, comp->lvaInlinedPInvokeFrameVar, BAD_IL_OFFSET);
+
+ // Insert call to CORINFO_HELP_JIT_PINVOKE_BEGIN
+ GenTree* helperCall =
+ comp->gtNewHelperCallNode(CORINFO_HELP_JIT_PINVOKE_BEGIN, TYP_VOID, 0, comp->gtNewArgList(frameAddr));
+
+ comp->fgMorphTree(helperCall);
+ BlockRange().InsertBefore(insertBefore, LIR::SeqTree(comp, helperCall));
+ return;
+ }
+#endif
+
+ // Emit the following sequence:
+ //
+ // InlinedCallFrame.callTarget = methodHandle // stored in m_Datum
+ // InlinedCallFrame.m_pCallSiteSP = SP // x86 only
+ // InlinedCallFrame.m_pCallerReturnAddress = return address
+ // Thread.gcState = 0
+ // (non-stub) - update top Frame on TCB
+
+ // ----------------------------------------------------------------------------------
+ // Setup InlinedCallFrame.callSiteTarget (which is how the JIT refers to it).
+ // The actual field is InlinedCallFrame.m_Datum which has many different uses and meanings.
+
+ GenTree* src = nullptr;
+
+ if (callType == CT_INDIRECT)
+ {
+ if (comp->info.compPublishStubParam)
+ {
+ src = new (comp, GT_LCL_VAR) GenTreeLclVar(TYP_I_IMPL, comp->lvaStubArgumentVar, BAD_IL_OFFSET);
+ }
+ // else { If we don't have secret parameter, m_Datum will be initialized by VM code }
+ }
+ else
+ {
+ assert(callType == CT_USER_FUNC);
+
+ void* pEmbedMethodHandle = nullptr;
+ CORINFO_METHOD_HANDLE embedMethodHandle =
+ comp->info.compCompHnd->embedMethodHandle(call->gtCallMethHnd, &pEmbedMethodHandle);
+
+ noway_assert((!embedMethodHandle) != (!pEmbedMethodHandle));
+
+ if (embedMethodHandle != nullptr)
+ {
+ // InlinedCallFrame.callSiteTarget = methodHandle
+ src = AddrGen(embedMethodHandle);
+ }
+ else
+ {
+ // InlinedCallFrame.callSiteTarget = *pEmbedMethodHandle
+ src = Ind(AddrGen(pEmbedMethodHandle));
+ }
+ }
+
+ if (src != nullptr)
+ {
+ // Store into InlinedCallFrame.m_Datum, the offset of which is given by offsetOfCallTarget.
+ GenTreeLclFld* store =
+ new (comp, GT_STORE_LCL_FLD) GenTreeLclFld(GT_STORE_LCL_FLD, TYP_I_IMPL, comp->lvaInlinedPInvokeFrameVar,
+ callFrameInfo.offsetOfCallTarget);
+ store->gtOp1 = src;
+
+ BlockRange().InsertBefore(insertBefore, LIR::SeqTree(comp, store));
+ }
+
+#ifdef _TARGET_X86_
+
+ // ----------------------------------------------------------------------------------
+ // InlinedCallFrame.m_pCallSiteSP = SP
+
+ GenTreeLclFld* storeCallSiteSP = new (comp, GT_STORE_LCL_FLD)
+ GenTreeLclFld(GT_STORE_LCL_FLD, TYP_I_IMPL, comp->lvaInlinedPInvokeFrameVar, callFrameInfo.offsetOfCallSiteSP);
+
+ storeCallSiteSP->gtOp1 = PhysReg(REG_SPBASE);
+
+ BlockRange().InsertBefore(insertBefore, LIR::SeqTree(comp, storeCallSiteSP));
+
+#endif
+
+ // ----------------------------------------------------------------------------------
+ // InlinedCallFrame.m_pCallerReturnAddress = &label (the address of the instruction immediately following the call)
+
+ GenTreeLclFld* storeLab =
+ new (comp, GT_STORE_LCL_FLD) GenTreeLclFld(GT_STORE_LCL_FLD, TYP_I_IMPL, comp->lvaInlinedPInvokeFrameVar,
+ callFrameInfo.offsetOfReturnAddress);
+
+ // We don't have a real label, and inserting one is hard (even if we made a special node),
+ // so for now we will just 'know' what this means in codegen.
+ GenTreeLabel* labelRef = new (comp, GT_LABEL) GenTreeLabel(nullptr);
+ labelRef->gtType = TYP_I_IMPL;
+ storeLab->gtOp1 = labelRef;
+
+ BlockRange().InsertBefore(insertBefore, LIR::SeqTree(comp, storeLab));
+
+ if (!(comp->opts.eeFlags & CORJIT_FLG_IL_STUB))
+ {
+ // Set the TCB's frame to be the one we just created.
+ // Note the init routine for the InlinedCallFrame (CORINFO_HELP_INIT_PINVOKE_FRAME)
+ // has prepended it to the linked list to maintain the stack of Frames.
+ //
+ // Stubs do this once per stub, not once per call.
+ GenTree* frameUpd = CreateFrameLinkUpdate(PushFrame);
+ BlockRange().InsertBefore(insertBefore, LIR::SeqTree(comp, frameUpd));
+ }
+
+ // IMPORTANT **** This instruction must come last!!! ****
+ // It changes the thread's state to Preemptive mode
+ // ----------------------------------------------------------------------------------
+ // [tcb + offsetOfGcState] = 0
+
+ GenTree* storeGCState = SetGCState(0);
+ BlockRange().InsertBefore(insertBefore, LIR::SeqTree(comp, storeGCState));
+}
+
+//------------------------------------------------------------------------
+// InsertPInvokeCallEpilog: Insert the code that goes after every inlined pinvoke call.
+//
+// Arguments:
+// call - the call for which we are inserting the PInvoke epilog.
+//
+// Return Value:
+// None.
+//
+void Lowering::InsertPInvokeCallEpilog(GenTreeCall* call)
+{
+ JITDUMP("======= Inserting PInvoke call epilog\n");
+
+#if COR_JIT_EE_VERSION > 460
+ if (comp->opts.ShouldUsePInvokeHelpers())
+ {
+ noway_assert(comp->lvaInlinedPInvokeFrameVar != BAD_VAR_NUM);
+
+ // First argument is the address of the frame variable.
+ GenTree* frameAddr =
+ new (comp, GT_LCL_VAR) GenTreeLclVar(GT_LCL_VAR, TYP_BYREF, comp->lvaInlinedPInvokeFrameVar, BAD_IL_OFFSET);
+ frameAddr->gtOper = GT_LCL_VAR_ADDR;
+
+ // Insert call to CORINFO_HELP_JIT_PINVOKE_END
+ GenTree* helperCall =
+ comp->gtNewHelperCallNode(CORINFO_HELP_JIT_PINVOKE_END, TYP_VOID, 0, comp->gtNewArgList(frameAddr));
+
+ comp->fgMorphTree(helperCall);
+ BlockRange().InsertAfter(call, LIR::SeqTree(comp, helperCall));
+ return;
+ }
+#endif
+
+ // gcstate = 1
+ GenTree* insertionPoint = call->gtNext;
+
+ GenTree* tree = SetGCState(1);
+ BlockRange().InsertBefore(insertionPoint, LIR::SeqTree(comp, tree));
+
+ tree = CreateReturnTrapSeq();
+ BlockRange().InsertBefore(insertionPoint, LIR::SeqTree(comp, tree));
+
+ // Pop the frame if necessasry
+ if (!(comp->opts.eeFlags & CORJIT_FLG_IL_STUB))
+ {
+ tree = CreateFrameLinkUpdate(PopFrame);
+ BlockRange().InsertBefore(insertionPoint, LIR::SeqTree(comp, tree));
+ }
+}
+
+//------------------------------------------------------------------------
+// LowerNonvirtPinvokeCall: Lower a non-virtual / indirect PInvoke call
+//
+// Arguments:
+// call - The call to lower.
+//
+// Return Value:
+// The lowered call tree.
+//
+GenTree* Lowering::LowerNonvirtPinvokeCall(GenTreeCall* call)
+{
+ // PInvoke lowering varies depending on the flags passed in by the EE. By default,
+ // GC transitions are generated inline; if CORJIT_FLG2_USE_PINVOKE_HELPERS is specified,
+ // GC transitions are instead performed using helper calls. Examples of each case are given
+ // below. Note that the data structure that is used to store information about a call frame
+ // containing any P/Invoke calls is initialized in the method prolog (see
+ // InsertPInvokeMethod{Prolog,Epilog} for details).
+ //
+ // Inline transitions:
+ // InlinedCallFrame inlinedCallFrame;
+ //
+ // ...
+ //
+ // // Set up frame information
+ // inlinedCallFrame.callTarget = methodHandle; // stored in m_Datum
+ // inlinedCallFrame.m_pCallSiteSP = SP; // x86 only
+ // inlinedCallFrame.m_pCallerReturnAddress = &label; (the address of the instruction immediately following the
+ // call)
+ // Thread.m_pFrame = &inlinedCallFrame; (non-IL-stub only)
+ //
+ // // Switch the thread's GC mode to preemptive mode
+ // thread->m_fPreemptiveGCDisabled = 0;
+ //
+ // // Call the unmanaged method
+ // target();
+ //
+ // // Switch the thread's GC mode back to cooperative mode
+ // thread->m_fPreemptiveGCDisabled = 1;
+ //
+ // // Rendezvous with a running collection if necessary
+ // if (g_TrapReturningThreads)
+ // RareDisablePreemptiveGC();
+ //
+ // Transistions using helpers:
+ //
+ // OpaqueFrame opaqueFrame;
+ //
+ // ...
+ //
+ // // Call the JIT_PINVOKE_BEGIN helper
+ // JIT_PINVOKE_BEGIN(&opaqueFrame);
+ //
+ // // Call the unmanaged method
+ // target();
+ //
+ // // Call the JIT_PINVOKE_END helper
+ // JIT_PINVOKE_END(&opaqueFrame);
+ //
+ // Note that the JIT_PINVOKE_{BEGIN.END} helpers currently use the default calling convention for the target
+ // platform. They may be changed in the future such that they preserve all register values.
+
+ GenTree* result = nullptr;
+ void* addr = nullptr;
+
+ // assert we have seen one of these
+ noway_assert(comp->info.compCallUnmanaged != 0);
+
+ // All code generated by this function must not contain the randomly-inserted NOPs
+ // that we insert to inhibit JIT spraying in partial trust scenarios.
+ // The PINVOKE_PROLOG op signals this to the code generator/emitter.
+
+ GenTree* prolog = new (comp, GT_NOP) GenTree(GT_PINVOKE_PROLOG, TYP_VOID);
+ BlockRange().InsertBefore(call, prolog);
+
+ InsertPInvokeCallProlog(call);
+
+ if (call->gtCallType != CT_INDIRECT)
+ {
+ noway_assert(call->gtCallType == CT_USER_FUNC);
+ CORINFO_METHOD_HANDLE methHnd = call->gtCallMethHnd;
+
+ CORINFO_CONST_LOOKUP lookup;
+#if COR_JIT_EE_VERSION > 460
+ comp->info.compCompHnd->getAddressOfPInvokeTarget(methHnd, &lookup);
+#else
+ void* pIndirection;
+ lookup.accessType = IAT_PVALUE;
+ lookup.addr = comp->info.compCompHnd->getAddressOfPInvokeFixup(methHnd, &pIndirection);
+ if (lookup.addr == nullptr)
+ {
+ lookup.accessType = IAT_PPVALUE;
+ lookup.addr = pIndirection;
+ }
+#endif
+
+ void* addr = lookup.addr;
+ switch (lookup.accessType)
+ {
+ case IAT_VALUE:
+ if (!IsCallTargetInRange(addr))
+ {
+ result = AddrGen(addr);
+ }
+ else
+ {
+ // a direct call within range of hardware relative call instruction
+ // stash the address for codegen
+ call->gtDirectCallAddress = addr;
+#ifdef FEATURE_READYTORUN_COMPILER
+ call->gtEntryPoint.addr = nullptr;
+#endif
+ }
+ break;
+
+ case IAT_PVALUE:
+ result = Ind(AddrGen(addr));
+ break;
+
+ case IAT_PPVALUE:
+ result = Ind(Ind(AddrGen(addr)));
+ break;
+ }
+ }
+
+ InsertPInvokeCallEpilog(call);
+
+ return result;
+}
+
+// Expand the code necessary to calculate the control target.
+// Returns: the expression needed to calculate the control target
+// May insert embedded statements
+GenTree* Lowering::LowerVirtualVtableCall(GenTreeCall* call)
+{
+ noway_assert(call->gtCallType == CT_USER_FUNC);
+
+ // If this is a tail call via helper, thisPtr will be the third argument.
+ int thisPtrArgNum;
+ regNumber thisPtrArgReg;
+
+#ifndef _TARGET_X86_ // x86 tailcall via helper follows normal calling convention, but with extra stack args.
+ if (call->IsTailCallViaHelper())
+ {
+ thisPtrArgNum = 2;
+ thisPtrArgReg = REG_ARG_2;
+ }
+ else
+#endif // !_TARGET_X86_
+ {
+ thisPtrArgNum = 0;
+ thisPtrArgReg = comp->codeGen->genGetThisArgReg(call);
+ }
+
+ // get a reference to the thisPtr being passed
+ fgArgTabEntry* argEntry = comp->gtArgEntryByArgNum(call, thisPtrArgNum);
+ assert(argEntry->regNum == thisPtrArgReg);
+ assert(argEntry->node->gtOper == GT_PUTARG_REG);
+ GenTree* thisPtr = argEntry->node->gtOp.gtOp1;
+
+ // If what we are passing as the thisptr is not already a local, make a new local to place it in
+ // because we will be creating expressions based on it.
+ unsigned lclNum;
+ if (thisPtr->IsLocal())
+ {
+ lclNum = thisPtr->gtLclVarCommon.gtLclNum;
+ }
+ else
+ {
+ // Split off the thisPtr and store to a temporary variable.
+ if (vtableCallTemp == BAD_VAR_NUM)
+ {
+ vtableCallTemp = comp->lvaGrabTemp(true DEBUGARG("virtual vtable call"));
+ }
+
+ LIR::Use thisPtrUse(BlockRange(), &(argEntry->node->gtOp.gtOp1), argEntry->node);
+ thisPtrUse.ReplaceWithLclVar(comp, m_block->getBBWeight(comp), vtableCallTemp);
+
+ lclNum = vtableCallTemp;
+ }
+
+ // We'll introduce another use of this local so increase its ref count.
+ comp->lvaTable[lclNum].incRefCnts(comp->compCurBB->getBBWeight(comp), comp);
+
+ // If the thisPtr is a local field, then construct a local field type node
+ GenTree* local;
+ if (thisPtr->isLclField())
+ {
+ local = new (comp, GT_LCL_FLD)
+ GenTreeLclFld(GT_LCL_FLD, thisPtr->TypeGet(), lclNum, thisPtr->AsLclFld()->gtLclOffs);
+ }
+ else
+ {
+ local = new (comp, GT_LCL_VAR) GenTreeLclVar(GT_LCL_VAR, thisPtr->TypeGet(), lclNum, BAD_IL_OFFSET);
+ }
+
+ // pointer to virtual table = [REG_CALL_THIS + offs]
+ GenTree* result = Ind(Offset(local, VPTR_OFFS));
+
+ // Get hold of the vtable offset (note: this might be expensive)
+ unsigned vtabOffsOfIndirection;
+ unsigned vtabOffsAfterIndirection;
+ comp->info.compCompHnd->getMethodVTableOffset(call->gtCallMethHnd, &vtabOffsOfIndirection,
+ &vtabOffsAfterIndirection);
+
+ // Get the appropriate vtable chunk
+ // result = [REG_CALL_IND_SCRATCH + vtabOffsOfIndirection]
+ result = Ind(Offset(result, vtabOffsOfIndirection));
+
+ // Load the function address
+ // result = [reg+vtabOffs]
+ result = Ind(Offset(result, vtabOffsAfterIndirection));
+
+ return result;
+}
+
+// Lower stub dispatched virtual calls.
+GenTree* Lowering::LowerVirtualStubCall(GenTreeCall* call)
+{
+ assert((call->gtFlags & GTF_CALL_VIRT_KIND_MASK) == GTF_CALL_VIRT_STUB);
+
+ // An x86 JIT which uses full stub dispatch must generate only
+ // the following stub dispatch calls:
+ //
+ // (1) isCallRelativeIndirect:
+ // call dword ptr [rel32] ; FF 15 ---rel32----
+ // (2) isCallRelative:
+ // call abc ; E8 ---rel32----
+ // (3) isCallRegisterIndirect:
+ // 3-byte nop ;
+ // call dword ptr [eax] ; FF 10
+ //
+ // THIS IS VERY TIGHTLY TIED TO THE PREDICATES IN
+ // vm\i386\cGenCpu.h, esp. isCallRegisterIndirect.
+
+ GenTree* result = nullptr;
+
+#ifdef _TARGET_64BIT_
+ // Non-tail calls: Jump Stubs are not taken into account by VM for mapping an AV into a NullRef
+ // exception. Therefore, JIT needs to emit an explicit null check. Note that Jit64 too generates
+ // an explicit null check.
+ //
+ // Tail calls: fgMorphTailCall() materializes null check explicitly and hence no need to emit
+ // null check.
+
+ // Non-64-bit: No need to null check the this pointer - the dispatch code will deal with this.
+ // The VM considers exceptions that occur in stubs on 64-bit to be not managed exceptions and
+ // it would be difficult to change this in a way so that it affects only the right stubs.
+
+ if (!call->IsTailCallViaHelper())
+ {
+ call->gtFlags |= GTF_CALL_NULLCHECK;
+ }
+#endif
+
+ // TODO-Cleanup: Disable emitting random NOPs
+
+ // This is code to set up an indirect call to a stub address computed
+ // via dictionary lookup.
+ if (call->gtCallType == CT_INDIRECT)
+ {
+ NYI_X86("Virtual Stub dispatched call lowering via dictionary lookup");
+
+ // The importer decided we needed a stub call via a computed
+ // stub dispatch address, i.e. an address which came from a dictionary lookup.
+ // - The dictionary lookup produces an indirected address, suitable for call
+ // via "call [REG_VIRTUAL_STUB_PARAM]"
+ //
+ // This combination will only be generated for shared generic code and when
+ // stub dispatch is active.
+
+ // fgMorphArgs will have created trees to pass the address in REG_VIRTUAL_STUB_PARAM.
+ // All we have to do here is add an indirection to generate the actual call target.
+
+ GenTree* ind = Ind(call->gtCallAddr);
+ BlockRange().InsertAfter(call->gtCallAddr, ind);
+ call->gtCallAddr = ind;
+ }
+ else
+ {
+ // Direct stub call.
+ // Get stub addr. This will return NULL if virtual call stubs are not active
+ void* stubAddr = call->gtStubCallStubAddr;
+ noway_assert(stubAddr != nullptr);
+
+ // If not CT_INDIRECT, then it should always be relative indir call.
+ // This is ensured by VM.
+ noway_assert(call->IsVirtualStubRelativeIndir());
+
+ // Direct stub calls, though the stubAddr itself may still need to be
+ // accesed via an indirection.
+ GenTree* addr = AddrGen(stubAddr);
+
+#ifdef _TARGET_X86_
+ // On x86, for tailcall via helper, the JIT_TailCall helper takes the stubAddr as
+ // the target address, and we set a flag that it's a VSD call. The helper then
+ // handles any necessary indirection.
+ if (call->IsTailCallViaHelper())
+ {
+ result = addr;
+ }
+#endif // _TARGET_X86_
+
+ if (result == nullptr)
+ {
+ GenTree* indir = Ind(addr);
+
+// On x86 we generate this:
+// call dword ptr [rel32] ; FF 15 ---rel32----
+// So we don't use a register.
+#ifndef _TARGET_X86_
+ // on x64 we must materialize the target using specific registers.
+ addr->gtRegNum = REG_VIRTUAL_STUB_PARAM;
+ indir->gtRegNum = REG_JUMP_THUNK_PARAM;
+#endif
+ result = indir;
+ }
+ }
+
+ // TODO-Cleanup: start emitting random NOPS
+ return result;
+}
+
+//------------------------------------------------------------------------
+// AddrModeCleanupHelper: Remove the nodes that are no longer used after an
+// addressing mode is constructed
+//
+// Arguments:
+// addrMode - A pointer to a new GenTreeAddrMode
+// node - The node currently being considered for removal
+//
+// Return Value:
+// None.
+//
+// Assumptions:
+// 'addrMode' and 'node' must be contained in the current block
+//
+void Lowering::AddrModeCleanupHelper(GenTreeAddrMode* addrMode, GenTree* node)
+{
+ if (node == addrMode->Base() || node == addrMode->Index())
+ {
+ return;
+ }
+
+ // TODO-LIR: change this to use the LIR mark bit and iterate instead of recursing
+ for (GenTree* operand : node->Operands())
+ {
+ AddrModeCleanupHelper(addrMode, operand);
+ }
+
+ BlockRange().Remove(node);
+}
+
+//------------------------------------------------------------------------
+// Lowering::AreSourcesPossibleModifiedLocals:
+// Given two nodes which will be used in an addressing mode (base,
+// index), check to see if they are lclVar reads, and if so, walk
+// backwards from the use until both reads have been visited to
+// determine if they are potentially modified in that range.
+//
+// Arguments:
+// addr - the node that uses the base and index nodes
+// base - the base node
+// index - the index node
+//
+// Returns: true if either the base or index may be modified between the
+// node and addr.
+//
+bool Lowering::AreSourcesPossiblyModifiedLocals(GenTree* addr, GenTree* base, GenTree* index)
+{
+ assert(addr != nullptr);
+
+ unsigned markCount = 0;
+
+ SideEffectSet baseSideEffects;
+ if (base != nullptr)
+ {
+ if (base->OperIsLocalRead())
+ {
+ baseSideEffects.AddNode(comp, base);
+ }
+ else
+ {
+ base = nullptr;
+ }
+ }
+
+ SideEffectSet indexSideEffects;
+ if (index != nullptr)
+ {
+ if (index->OperIsLocalRead())
+ {
+ indexSideEffects.AddNode(comp, index);
+ }
+ else
+ {
+ index = nullptr;
+ }
+ }
+
+ for (GenTree* cursor = addr;; cursor = cursor->gtPrev)
+ {
+ assert(cursor != nullptr);
+
+ if (cursor == base)
+ {
+ base = nullptr;
+ }
+
+ if (cursor == index)
+ {
+ index = nullptr;
+ }
+
+ if ((base == nullptr) && (index == nullptr))
+ {
+ return false;
+ }
+
+ m_scratchSideEffects.Clear();
+ m_scratchSideEffects.AddNode(comp, cursor);
+ if ((base != nullptr) && m_scratchSideEffects.InterferesWith(baseSideEffects, false))
+ {
+ return true;
+ }
+
+ if ((index != nullptr) && m_scratchSideEffects.InterferesWith(indexSideEffects, false))
+ {
+ return true;
+ }
+ }
+
+ unreached();
+}
+
+//------------------------------------------------------------------------
+// TryCreateAddrMode: recognize trees which can be implemented using an
+// addressing mode and transform them to a GT_LEA
+//
+// Arguments:
+// use: the use of the address we want to transform
+// isIndir: true if this addressing mode is the child of an indir
+//
+// Returns:
+// The created LEA node or the original address node if an LEA could
+// not be formed.
+//
+GenTree* Lowering::TryCreateAddrMode(LIR::Use&& use, bool isIndir)
+{
+ GenTree* addr = use.Def();
+ GenTreePtr base = nullptr;
+ GenTreePtr index = nullptr;
+ unsigned scale = 0;
+ unsigned offset = 0;
+ bool rev = false;
+
+ // TODO-1stClassStructs: This logic is here to preserve prior behavior. Note that previously
+ // block ops were not considered for addressing modes, but an add under it may have been.
+ // This should be replaced with logic that more carefully determines when an addressing mode
+ // would be beneficial for a block op.
+ if (isIndir)
+ {
+ GenTree* indir = use.User();
+ if (indir->TypeGet() == TYP_STRUCT)
+ {
+ isIndir = false;
+ }
+ else if (varTypeIsStruct(indir))
+ {
+ // We can have an indirection on the rhs of a block copy (it is the source
+ // object). This is not a "regular" indirection.
+ // (Note that the parent check could be costly.)
+ GenTree* parent = indir->gtGetParent(nullptr);
+ if ((parent != nullptr) && parent->OperIsIndir())
+ {
+ isIndir = false;
+ }
+ else
+ {
+ isIndir = !indir->OperIsBlk();
+ }
+ }
+ }
+
+ // Find out if an addressing mode can be constructed
+ bool doAddrMode =
+ comp->codeGen->genCreateAddrMode(addr, -1, true, 0, &rev, &base, &index, &scale, &offset, true /*nogen*/);
+
+ if (scale == 0)
+ {
+ scale = 1;
+ }
+
+ if (!isIndir)
+ {
+ // this is just a reg-const add
+ if (index == nullptr)
+ {
+ return addr;
+ }
+
+ // this is just a reg-reg add
+ if (scale == 1 && offset == 0)
+ {
+ return addr;
+ }
+ }
+
+ // make sure there are not any side effects between def of leaves and use
+ if (!doAddrMode || AreSourcesPossiblyModifiedLocals(addr, base, index))
+ {
+ JITDUMP(" No addressing mode\n");
+ return addr;
+ }
+
+ GenTreePtr arrLength = nullptr;
+
+ JITDUMP("Addressing mode:\n");
+ JITDUMP(" Base\n");
+ DISPNODE(base);
+ if (index != nullptr)
+ {
+ JITDUMP(" + Index * %u + %u\n", scale, offset);
+ DISPNODE(index);
+ }
+ else
+ {
+ JITDUMP(" + %u\n", offset);
+ }
+
+ var_types addrModeType = addr->TypeGet();
+ if (addrModeType == TYP_REF)
+ {
+ addrModeType = TYP_BYREF;
+ }
+
+ GenTreeAddrMode* addrMode = new (comp, GT_LEA) GenTreeAddrMode(addrModeType, base, index, scale, offset);
+
+ addrMode->gtRsvdRegs = addr->gtRsvdRegs;
+ addrMode->gtFlags |= (addr->gtFlags & GTF_IND_FLAGS);
+ addrMode->gtFlags &= ~GTF_ALL_EFFECT; // LEAs are side-effect-free.
+
+ JITDUMP("New addressing mode node:\n");
+ DISPNODE(addrMode);
+ JITDUMP("\n");
+
+ // Required to prevent assert failure:
+ // Assertion failed 'op1 && op2' in flowgraph.cpp, Line: 34431
+ // when iterating the operands of a GT_LEA
+ // Test Case: self_host_tests_amd64\jit\jit64\opt\cse\VolatileTest_op_mul.exe
+ // Method: TestCSE:.cctor
+ // The method genCreateAddrMode() above probably should be fixed
+ // to not return rev=true, when index is returned as NULL
+ //
+ if (rev && index == nullptr)
+ {
+ rev = false;
+ }
+
+ if (rev)
+ {
+ addrMode->gtFlags |= GTF_REVERSE_OPS;
+ }
+ else
+ {
+ addrMode->gtFlags &= ~(GTF_REVERSE_OPS);
+ }
+
+ BlockRange().InsertAfter(addr, addrMode);
+
+ // Now we need to remove all the nodes subsumed by the addrMode
+ AddrModeCleanupHelper(addrMode, addr);
+
+ // Replace the original address node with the addrMode.
+ use.ReplaceWith(comp, addrMode);
+
+ return addrMode;
+}
+
+//------------------------------------------------------------------------
+// LowerAdd: turn this add into a GT_LEA if that would be profitable
+//
+// Arguments:
+// node - the node we care about
+//
+// Returns:
+// The next node to lower.
+//
+GenTree* Lowering::LowerAdd(GenTree* node)
+{
+ GenTree* next = node->gtNext;
+
+#ifdef _TARGET_ARMARCH_
+ // For ARM architectures we don't have the LEA instruction
+ // therefore we won't get much benefit from doing this.
+ return next;
+#else // _TARGET_ARMARCH_
+ if (!varTypeIsIntegralOrI(node))
+ {
+ return next;
+ }
+
+ LIR::Use use;
+ if (!BlockRange().TryGetUse(node, &use))
+ {
+ return next;
+ }
+
+ // if this is a child of an indir, let the parent handle it.
+ GenTree* parent = use.User();
+ if (parent->OperIsIndir())
+ {
+ return next;
+ }
+
+ // if there is a chain of adds, only look at the topmost one
+ if (parent->gtOper == GT_ADD)
+ {
+ return next;
+ }
+
+ GenTree* addr = TryCreateAddrMode(std::move(use), false);
+ return addr->gtNext;
+#endif // !_TARGET_ARMARCH_
+}
+
+//------------------------------------------------------------------------
+// LowerUnsignedDivOrMod: transform GT_UDIV/GT_UMOD nodes with a const power of 2
+// divisor into GT_RSZ/GT_AND nodes.
+//
+// Arguments:
+// node - pointer to the GT_UDIV/GT_UMOD node to be lowered
+//
+void Lowering::LowerUnsignedDivOrMod(GenTree* node)
+{
+ assert((node->OperGet() == GT_UDIV) || (node->OperGet() == GT_UMOD));
+
+ GenTree* divisor = node->gtGetOp2();
+
+ if (divisor->IsCnsIntOrI())
+ {
+ size_t divisorValue = static_cast<size_t>(divisor->gtIntCon.IconValue());
+
+ if (isPow2(divisorValue))
+ {
+ genTreeOps newOper;
+
+ if (node->OperGet() == GT_UDIV)
+ {
+ newOper = GT_RSZ;
+ divisorValue = genLog2(divisorValue);
+ }
+ else
+ {
+ newOper = GT_AND;
+ divisorValue -= 1;
+ }
+
+ node->SetOper(newOper);
+ divisor->gtIntCon.SetIconValue(divisorValue);
+ }
+ }
+}
+
+//------------------------------------------------------------------------
+// LowerSignedDivOrMod: transform integer GT_DIV/GT_MOD nodes with a power of 2
+// const divisor into equivalent but faster sequences.
+//
+// Arguments:
+// node - pointer to node we care about
+//
+// Returns:
+// The next node to lower.
+//
+GenTree* Lowering::LowerSignedDivOrMod(GenTreePtr node)
+{
+ assert((node->OperGet() == GT_DIV) || (node->OperGet() == GT_MOD));
+
+ GenTree* next = node->gtNext;
+ GenTree* divMod = node;
+ GenTree* divisor = divMod->gtGetOp2();
+
+ if (!divisor->IsCnsIntOrI())
+ {
+ return next; // no transformations to make
+ }
+
+ const var_types type = divMod->TypeGet();
+ assert((type == TYP_INT) || (type == TYP_LONG));
+
+ GenTree* dividend = divMod->gtGetOp1();
+
+ if (dividend->IsCnsIntOrI())
+ {
+ // We shouldn't see a divmod with constant operands here but if we do then it's likely
+ // because optimizations are disabled or it's a case that's supposed to throw an exception.
+ // Don't optimize this.
+ return next;
+ }
+
+ ssize_t divisorValue = divisor->gtIntCon.IconValue();
+
+ if (divisorValue == -1)
+ {
+ // x / -1 can't be optimized because INT_MIN / -1 is required to throw an exception.
+
+ // x % -1 is always 0 and the IL spec says that the rem instruction "can" throw an exception if x is
+ // the minimum representable integer. However, the C# spec says that an exception "is" thrown in this
+ // case so optimizing this case would break C# code.
+
+ // A runtime check could be used to handle this case but it's probably too rare to matter.
+ return next;
+ }
+
+ bool isDiv = divMod->OperGet() == GT_DIV;
+
+ if (isDiv)
+ {
+ if ((type == TYP_INT && divisorValue == INT_MIN) || (type == TYP_LONG && divisorValue == INT64_MIN))
+ {
+ // If the divisor is the minimum representable integer value then we can use a compare,
+ // the result is 1 iff the dividend equals divisor.
+ divMod->SetOper(GT_EQ);
+ return next;
+ }
+ }
+
+ size_t absDivisorValue =
+ (divisorValue == SSIZE_T_MIN) ? static_cast<size_t>(divisorValue) : static_cast<size_t>(abs(divisorValue));
+
+ if (!isPow2(absDivisorValue))
+ {
+ return next;
+ }
+
+ // We're committed to the conversion now. Go find the use.
+ LIR::Use use;
+ if (!BlockRange().TryGetUse(node, &use))
+ {
+ assert(!"signed DIV/MOD node is unused");
+ return next;
+ }
+
+ // We need to use the dividend node multiple times so its value needs to be
+ // computed once and stored in a temp variable.
+
+ unsigned curBBWeight = comp->compCurBB->getBBWeight(comp);
+
+ LIR::Use opDividend(BlockRange(), &divMod->gtOp.gtOp1, divMod);
+ opDividend.ReplaceWithLclVar(comp, curBBWeight);
+
+ dividend = divMod->gtGetOp1();
+ assert(dividend->OperGet() == GT_LCL_VAR);
+
+ unsigned dividendLclNum = dividend->gtLclVar.gtLclNum;
+
+ GenTree* adjustment = comp->gtNewOperNode(GT_RSH, type, dividend, comp->gtNewIconNode(type == TYP_INT ? 31 : 63));
+
+ if (absDivisorValue == 2)
+ {
+ // If the divisor is +/-2 then we'd end up with a bitwise and between 0/-1 and 1.
+ // We can get the same result by using GT_RSZ instead of GT_RSH.
+ adjustment->SetOper(GT_RSZ);
+ }
+ else
+ {
+ adjustment = comp->gtNewOperNode(GT_AND, type, adjustment, comp->gtNewIconNode(absDivisorValue - 1, type));
+ }
+
+ GenTree* adjustedDividend =
+ comp->gtNewOperNode(GT_ADD, type, adjustment, comp->gtNewLclvNode(dividendLclNum, type));
+
+ comp->lvaTable[dividendLclNum].incRefCnts(curBBWeight, comp);
+
+ GenTree* newDivMod;
+
+ if (isDiv)
+ {
+ // perform the division by right shifting the adjusted dividend
+ divisor->gtIntCon.SetIconValue(genLog2(absDivisorValue));
+
+ newDivMod = comp->gtNewOperNode(GT_RSH, type, adjustedDividend, divisor);
+
+ if (divisorValue < 0)
+ {
+ // negate the result if the divisor is negative
+ newDivMod = comp->gtNewOperNode(GT_NEG, type, newDivMod);
+ }
+ }
+ else
+ {
+ // divisor % dividend = dividend - divisor x (dividend / divisor)
+ // divisor x (dividend / divisor) translates to (dividend >> log2(divisor)) << log2(divisor)
+ // which simply discards the low log2(divisor) bits, that's just dividend & ~(divisor - 1)
+ divisor->gtIntCon.SetIconValue(~(absDivisorValue - 1));
+
+ newDivMod = comp->gtNewOperNode(GT_SUB, type, comp->gtNewLclvNode(dividendLclNum, type),
+ comp->gtNewOperNode(GT_AND, type, adjustedDividend, divisor));
+
+ comp->lvaTable[dividendLclNum].incRefCnts(curBBWeight, comp);
+ }
+
+ // Remove the divisor and dividend nodes from the linear order,
+ // since we have reused them and will resequence the tree
+ BlockRange().Remove(divisor);
+ BlockRange().Remove(dividend);
+
+ // linearize and insert the new tree before the original divMod node
+ BlockRange().InsertBefore(divMod, LIR::SeqTree(comp, newDivMod));
+ BlockRange().Remove(divMod);
+
+ // replace the original divmod node with the new divmod tree
+ use.ReplaceWith(comp, newDivMod);
+
+ return newDivMod->gtNext;
+}
+
+//------------------------------------------------------------------------
+// LowerStoreInd: attempt to transform an indirect store to use an
+// addressing mode
+//
+// Arguments:
+// node - the node we care about
+//
+void Lowering::LowerStoreInd(GenTree* node)
+{
+ assert(node != nullptr);
+ assert(node->OperGet() == GT_STOREIND);
+
+ TryCreateAddrMode(LIR::Use(BlockRange(), &node->gtOp.gtOp1, node), true);
+
+ // Mark all GT_STOREIND nodes to indicate that it is not known
+ // whether it represents a RMW memory op.
+ node->AsStoreInd()->SetRMWStatusDefault();
+}
+
+void Lowering::LowerBlockStore(GenTreeBlk* blkNode)
+{
+ GenTree* src = blkNode->Data();
+ // TODO-1stClassStructs: Don't require this.
+ assert(blkNode->OperIsInitBlkOp() || !src->OperIsLocal());
+ TryCreateAddrMode(LIR::Use(BlockRange(), &blkNode->Addr(), blkNode), false);
+}
+
+//------------------------------------------------------------------------
+// LowerArrElem: Lower a GT_ARR_ELEM node
+//
+// Arguments:
+// node - the GT_ARR_ELEM node to lower.
+//
+// Return Value:
+// The next node to lower.
+//
+// Assumptions:
+// pTree points to a pointer to a GT_ARR_ELEM node.
+//
+// Notes:
+// This performs the following lowering. We start with a node of the form:
+// /--* <arrObj>
+// +--* <index0>
+// +--* <index1>
+// /--* arrMD&[,]
+//
+// First, we create temps for arrObj if it is not already a lclVar, and for any of the index
+// expressions that have side-effects.
+// We then transform the tree into:
+// <offset is null - no accumulated offset for the first index>
+// /--* <arrObj>
+// +--* <index0>
+// /--* ArrIndex[i, ]
+// +--* <arrObj>
+// /--| arrOffs[i, ]
+// | +--* <arrObj>
+// | +--* <index1>
+// +--* ArrIndex[*,j]
+// +--* <arrObj>
+// /--| arrOffs[*,j]
+// +--* lclVar NewTemp
+// /--* lea (scale = element size, offset = offset of first element)
+//
+// The new stmtExpr may be omitted if the <arrObj> is a lclVar.
+// The new stmtExpr may be embedded if the <arrObj> is not the first tree in linear order for
+// the statement containing the original arrMD.
+// Note that the arrMDOffs is the INDEX of the lea, but is evaluated before the BASE (which is the second
+// reference to NewTemp), because that provides more accurate lifetimes.
+// There may be 1, 2 or 3 dimensions, with 1, 2 or 3 arrMDIdx nodes, respectively.
+//
+GenTree* Lowering::LowerArrElem(GenTree* node)
+{
+ // This will assert if we don't have an ArrElem node
+ GenTreeArrElem* arrElem = node->AsArrElem();
+ const unsigned char rank = arrElem->gtArrElem.gtArrRank;
+ const unsigned blockWeight = m_block->getBBWeight(comp);
+
+ JITDUMP("Lowering ArrElem\n");
+ JITDUMP("============\n");
+ DISPTREERANGE(BlockRange(), arrElem);
+ JITDUMP("\n");
+
+ assert(arrElem->gtArrObj->TypeGet() == TYP_REF);
+
+ // We need to have the array object in a lclVar.
+ if (!arrElem->gtArrObj->IsLocal())
+ {
+ LIR::Use arrObjUse(BlockRange(), &arrElem->gtArrObj, arrElem);
+ arrObjUse.ReplaceWithLclVar(comp, blockWeight);
+ }
+
+ GenTree* arrObjNode = arrElem->gtArrObj;
+ assert(arrObjNode->IsLocal());
+
+ GenTree* insertionPoint = arrElem;
+
+ // The first ArrOffs node will have 0 for the offset of the previous dimension.
+ GenTree* prevArrOffs = new (comp, GT_CNS_INT) GenTreeIntCon(TYP_I_IMPL, 0);
+ BlockRange().InsertBefore(insertionPoint, prevArrOffs);
+
+ for (unsigned char dim = 0; dim < rank; dim++)
+ {
+ GenTree* indexNode = arrElem->gtArrElem.gtArrInds[dim];
+
+ // Use the original arrObjNode on the 0th ArrIndex node, and clone it for subsequent ones.
+ GenTreePtr idxArrObjNode;
+ if (dim == 0)
+ {
+ idxArrObjNode = arrObjNode;
+ }
+ else
+ {
+ idxArrObjNode = comp->gtClone(arrObjNode);
+ BlockRange().InsertBefore(insertionPoint, idxArrObjNode);
+ }
+
+ // Next comes the GT_ARR_INDEX node.
+ GenTreeArrIndex* arrMDIdx = new (comp, GT_ARR_INDEX)
+ GenTreeArrIndex(TYP_INT, idxArrObjNode, indexNode, dim, rank, arrElem->gtArrElem.gtArrElemType);
+ arrMDIdx->gtFlags |= ((idxArrObjNode->gtFlags | indexNode->gtFlags) & GTF_ALL_EFFECT);
+ BlockRange().InsertBefore(insertionPoint, arrMDIdx);
+
+ GenTree* offsArrObjNode = comp->gtClone(arrObjNode);
+ BlockRange().InsertBefore(insertionPoint, offsArrObjNode);
+
+ GenTreeArrOffs* arrOffs =
+ new (comp, GT_ARR_OFFSET) GenTreeArrOffs(TYP_I_IMPL, prevArrOffs, arrMDIdx, offsArrObjNode, dim, rank,
+ arrElem->gtArrElem.gtArrElemType);
+ arrOffs->gtFlags |= ((prevArrOffs->gtFlags | arrMDIdx->gtFlags | offsArrObjNode->gtFlags) & GTF_ALL_EFFECT);
+ BlockRange().InsertBefore(insertionPoint, arrOffs);
+
+ prevArrOffs = arrOffs;
+ }
+
+ // Generate the LEA and make it reverse evaluation, because we want to evaluate the index expression before the
+ // base.
+ unsigned scale = arrElem->gtArrElem.gtArrElemSize;
+ unsigned offset = comp->eeGetMDArrayDataOffset(arrElem->gtArrElem.gtArrElemType, arrElem->gtArrElem.gtArrRank);
+
+ GenTreePtr leaIndexNode = prevArrOffs;
+ if (!jitIsScaleIndexMul(scale))
+ {
+ // We do the address arithmetic in TYP_I_IMPL, though note that the lower bounds and lengths in memory are
+ // TYP_INT
+ GenTreePtr scaleNode = new (comp, GT_CNS_INT) GenTreeIntCon(TYP_I_IMPL, scale);
+ GenTreePtr mulNode = new (comp, GT_MUL) GenTreeOp(GT_MUL, TYP_I_IMPL, leaIndexNode, scaleNode);
+ BlockRange().InsertBefore(insertionPoint, scaleNode, mulNode);
+ leaIndexNode = mulNode;
+ scale = 1;
+ }
+
+ GenTreePtr leaBase = comp->gtClone(arrObjNode);
+ BlockRange().InsertBefore(insertionPoint, leaBase);
+
+ GenTreePtr leaNode = new (comp, GT_LEA) GenTreeAddrMode(arrElem->TypeGet(), leaBase, leaIndexNode, scale, offset);
+ leaNode->gtFlags |= GTF_REVERSE_OPS;
+
+ BlockRange().InsertBefore(insertionPoint, leaNode);
+
+ LIR::Use arrElemUse;
+ if (BlockRange().TryGetUse(arrElem, &arrElemUse))
+ {
+ arrElemUse.ReplaceWith(comp, leaNode);
+ }
+
+ BlockRange().Remove(arrElem);
+
+ JITDUMP("Results of lowering ArrElem:\n");
+ DISPTREERANGE(BlockRange(), leaNode);
+ JITDUMP("\n\n");
+
+ return leaNode;
+}
+
+void Lowering::DoPhase()
+{
+#if 0
+ // The code in this #if can be used to debug lowering issues according to
+ // method hash. To use, simply set environment variables lowerhashlo and lowerhashhi
+#ifdef DEBUG
+ unsigned methHash = info.compMethodHash();
+ char* lostr = getenv("lowerhashlo");
+ unsigned methHashLo = 0;
+ if (lostr != NULL)
+ {
+ sscanf_s(lostr, "%x", &methHashLo);
+ }
+ char* histr = getenv("lowerhashhi");
+ unsigned methHashHi = UINT32_MAX;
+ if (histr != NULL)
+ {
+ sscanf_s(histr, "%x", &methHashHi);
+ }
+ if (methHash < methHashLo || methHash > methHashHi)
+ return;
+ else
+ {
+ printf("Lowering for method %s, hash = 0x%x.\n",
+ info.compFullName, info.compMethodHash());
+ printf(""); // in our logic this causes a flush
+ }
+#endif
+#endif
+
+#if !defined(_TARGET_64BIT_)
+ DecomposeLongs decomp(comp); // Initialize the long decomposition class.
+ decomp.PrepareForDecomposition();
+#endif // !defined(_TARGET_64BIT_)
+
+ for (BasicBlock* block = comp->fgFirstBB; block; block = block->bbNext)
+ {
+ /* Make the block publicly available */
+ comp->compCurBB = block;
+
+#if !defined(_TARGET_64BIT_)
+ decomp.DecomposeBlock(block);
+#endif //!_TARGET_64BIT_
+
+ LowerBlock(block);
+ }
+
+ // If we have any PInvoke calls, insert the one-time prolog code. We've already inserted the epilog code in the
+ // appropriate spots. NOTE: there is a minor optimization opportunity here, as we still create p/invoke data
+ // structures and setup/teardown even if we've eliminated all p/invoke calls due to dead code elimination.
+ if (comp->info.compCallUnmanaged)
+ {
+ InsertPInvokeMethodProlog();
+ }
+
+#ifdef DEBUG
+ JITDUMP("Lower has completed modifying nodes, proceeding to initialize LSRA TreeNodeInfo structs...\n");
+ if (VERBOSE)
+ {
+ comp->fgDispBasicBlocks(true);
+ }
+#endif
+
+ // TODO-Throughput: We re-sort local variables to get the goodness of enregistering recently
+ // introduced local variables both by Rationalize and Lower; downside is we need to
+ // recompute standard local variable liveness in order to get Linear CodeGen working.
+ // For now we'll take the throughput hit of recomputing local liveness but in the long term
+ // we're striving to use the unified liveness computation (fgLocalVarLiveness) and stop
+ // computing it separately in LSRA.
+ if (comp->lvaCount != 0)
+ {
+ comp->lvaSortAgain = true;
+ }
+ comp->EndPhase(PHASE_LOWERING_DECOMP);
+
+ comp->fgLocalVarLiveness();
+ // local var liveness can delete code, which may create empty blocks
+ if (!comp->opts.MinOpts() && !comp->opts.compDbgCode)
+ {
+ comp->optLoopsMarked = false;
+ bool modified = comp->fgUpdateFlowGraph();
+ if (modified || comp->lvaSortAgain)
+ {
+ JITDUMP("had to run another liveness pass:\n");
+ comp->fgLocalVarLiveness();
+ }
+ }
+#ifdef DEBUG
+ JITDUMP("Liveness pass finished after lowering, IR:\n");
+ JITDUMP("lvasortagain = %d\n", comp->lvaSortAgain);
+ if (VERBOSE)
+ {
+ comp->fgDispBasicBlocks(true);
+ }
+#endif
+
+ // The initialization code for the TreeNodeInfo map was initially part of a single full IR
+ // traversal and it has been split because the order of traversal performed by fgWalkTreePost
+ // does not necessarily lower nodes in execution order and also, it could potentially
+ // add new BasicBlocks on the fly as part of the Lowering pass so the traversal won't be complete.
+ //
+ // Doing a new traversal guarantees we 'see' all new introduced trees and basic blocks allowing us
+ // to correctly initialize all the data structures LSRA requires later on.
+ // This code still has issues when it has to do with initialization of recently introduced locals by
+ // lowering. The effect of this is that any temporary local variable introduced by lowering won't be
+ // enregistered yielding suboptimal CQ.
+ // The reason for this is because we cannot re-sort the local variables per ref-count and bump of the number of
+ // tracked variables just here because then LSRA will work with mismatching BitSets (i.e. BitSets with different
+ // 'epochs' that were created before and after variable resorting, that will result in different number of tracked
+ // local variables).
+ //
+ // The fix for this is to refactor this code to be run JUST BEFORE LSRA and not as part of lowering.
+ // It's also desirable to avoid initializing this code using a non-execution order traversal.
+ //
+ LsraLocation currentLoc = 1;
+ for (BasicBlock* block = m_lsra->startBlockSequence(); block != nullptr; block = m_lsra->moveToNextBlock())
+ {
+ GenTreePtr stmt;
+
+ // Increment the LsraLocation (currentLoc) at each BasicBlock.
+ // This ensures that the block boundary (RefTypeBB, RefTypeExpUse and RefTypeDummyDef) RefPositions
+ // are in increasing location order.
+ currentLoc += 2;
+
+ m_block = block;
+ for (GenTree* node : BlockRange().NonPhiNodes())
+ {
+/* We increment the number position of each tree node by 2 to
+* simplify the logic when there's the case of a tree that implicitly
+* does a dual-definition of temps (the long case). In this case
+* is easier to already have an idle spot to handle a dual-def instead
+* of making some messy adjustments if we only increment the
+* number position by one.
+*/
+#ifdef DEBUG
+ node->gtSeqNum = currentLoc;
+#endif
+
+ node->gtLsraInfo.Initialize(m_lsra, node, currentLoc);
+ node->gtClearReg(comp);
+
+ // Mark the node's operands as used
+ for (GenTree* operand : node->Operands())
+ {
+ operand->gtLIRFlags &= ~LIR::Flags::IsUnusedValue;
+ }
+
+ // If the node produces a value, mark it as unused.
+ if (node->IsValue())
+ {
+ node->gtLIRFlags |= LIR::Flags::IsUnusedValue;
+ }
+
+ currentLoc += 2;
+ }
+
+ for (GenTree* node : BlockRange().NonPhiNodes())
+ {
+ TreeNodeInfoInit(node);
+
+ // Only nodes that produce values should have a non-zero dstCount.
+ assert((node->gtLsraInfo.dstCount == 0) || node->IsValue());
+
+ // If the node produces an unused value, mark it as a local def-use
+ if ((node->gtLIRFlags & LIR::Flags::IsUnusedValue) != 0)
+ {
+ node->gtLsraInfo.isLocalDefUse = true;
+ node->gtLsraInfo.dstCount = 0;
+ }
+
+#if 0
+ // TODO-CQ: Enable this code after fixing the isContained() logic to not abort for these
+ // top-level nodes that throw away their result.
+ // If this is an interlocked operation that has a non-last-use lclVar as its op2,
+ // make sure we allocate a target register for the interlocked operation.; otherwise we need
+ // not allocate a register
+ else if ((tree->OperGet() == GT_LOCKADD || tree->OperGet() == GT_XCHG || tree->OperGet() == GT_XADD))
+ {
+ tree->gtLsraInfo.dstCount = 0;
+ if (tree->gtGetOp2()->IsLocal() && (tree->gtFlags & GTF_VAR_DEATH) == 0)
+ tree->gtLsraInfo.isLocalDefUse = true;
+ }
+#endif
+ }
+
+ assert(BlockRange().CheckLIR(comp, true));
+ }
+ DBEXEC(VERBOSE, DumpNodeInfoMap());
+}
+
+#ifdef DEBUG
+
+//------------------------------------------------------------------------
+// Lowering::CheckCallArg: check that a call argument is in an expected
+// form after lowering.
+//
+// Arguments:
+// arg - the argument to check.
+//
+void Lowering::CheckCallArg(GenTree* arg)
+{
+ if (arg->OperIsStore() || arg->IsArgPlaceHolderNode() || arg->IsNothingNode() || arg->OperIsCopyBlkOp())
+ {
+ return;
+ }
+
+ switch (arg->OperGet())
+ {
+#if !defined(_TARGET_64BIT_)
+ case GT_LONG:
+ assert(arg->gtGetOp1()->OperIsPutArg());
+ assert(arg->gtGetOp2()->OperIsPutArg());
+ break;
+#endif
+
+ case GT_LIST:
+ {
+ GenTreeArgList* list = arg->AsArgList();
+ assert(list->IsAggregate());
+
+ for (; list != nullptr; list = list->Rest())
+ {
+ assert(list->Current()->OperIsPutArg());
+ }
+ }
+ break;
+
+ default:
+ assert(arg->OperIsPutArg());
+ break;
+ }
+}
+
+//------------------------------------------------------------------------
+// Lowering::CheckCall: check that a call is in an expected form after
+// lowering. Currently this amounts to checking its
+// arguments, but could be expanded to verify more
+// properties in the future.
+//
+// Arguments:
+// call - the call to check.
+//
+void Lowering::CheckCall(GenTreeCall* call)
+{
+ if (call->gtCallObjp != nullptr)
+ {
+ CheckCallArg(call->gtCallObjp);
+ }
+
+ for (GenTreeArgList* args = call->gtCallArgs; args != nullptr; args = args->Rest())
+ {
+ CheckCallArg(args->Current());
+ }
+
+ for (GenTreeArgList* args = call->gtCallLateArgs; args != nullptr; args = args->Rest())
+ {
+ CheckCallArg(args->Current());
+ }
+}
+
+//------------------------------------------------------------------------
+// Lowering::CheckNode: check that an LIR node is in an expected form
+// after lowering.
+//
+// Arguments:
+// node - the node to check.
+//
+void Lowering::CheckNode(GenTree* node)
+{
+ switch (node->OperGet())
+ {
+ case GT_CALL:
+ CheckCall(node->AsCall());
+ break;
+
+#ifdef FEATURE_SIMD
+ case GT_SIMD:
+#ifdef _TARGET_64BIT_
+ case GT_LCL_VAR:
+ case GT_STORE_LCL_VAR:
+#endif // _TARGET_64BIT_
+ assert(node->TypeGet() != TYP_SIMD12);
+ break;
+#endif
+
+ default:
+ break;
+ }
+}
+
+//------------------------------------------------------------------------
+// Lowering::CheckBlock: check that the contents of an LIR block are in an
+// expected form after lowering.
+//
+// Arguments:
+// compiler - the compiler context.
+// block - the block to check.
+//
+bool Lowering::CheckBlock(Compiler* compiler, BasicBlock* block)
+{
+ assert(block->isEmpty() || block->IsLIR());
+
+ LIR::Range& blockRange = LIR::AsRange(block);
+ for (GenTree* node : blockRange)
+ {
+ CheckNode(node);
+ }
+
+ assert(blockRange.CheckLIR(compiler));
+ return true;
+}
+#endif
+
+void Lowering::LowerBlock(BasicBlock* block)
+{
+ assert(block == comp->compCurBB); // compCurBB must already be set.
+ assert(block->isEmpty() || block->IsLIR());
+
+ m_block = block;
+
+ // NOTE: some of the lowering methods insert calls before the node being
+ // lowered (See e.g. InsertPInvoke{Method,Call}{Prolog,Epilog}). In
+ // general, any code that is inserted before the current node should be
+ // "pre-lowered" as they won't be subject to further processing.
+ // Lowering::CheckBlock() runs some extra checks on call arguments in
+ // order to help catch unlowered nodes.
+
+ GenTree* node = BlockRange().FirstNode();
+ while (node != nullptr)
+ {
+ node = LowerNode(node);
+ }
+
+ assert(CheckBlock(comp, block));
+}
+
+/** Verifies if both of these trees represent the same indirection.
+ * Used by Lower to annotate if CodeGen generate an instruction of the
+ * form *addrMode BinOp= expr
+ *
+ * Preconditions: both trees are children of GT_INDs and their underlying children
+ * have the same gtOper.
+ *
+ * This is a first iteration to actually recognize trees that can be code-generated
+ * as a single read-modify-write instruction on AMD64/x86. For now
+ * this method only supports the recognition of simple addressing modes (through GT_LEA)
+ * or local var indirections. Local fields, array access and other more complex nodes are
+ * not yet supported.
+ *
+ * TODO-CQ: Perform tree recognition by using the Value Numbering Package, that way we can recognize
+ * arbitrary complex trees and support much more addressing patterns.
+ */
+bool Lowering::IndirsAreEquivalent(GenTreePtr candidate, GenTreePtr storeInd)
+{
+ assert(candidate->OperGet() == GT_IND);
+ assert(storeInd->OperGet() == GT_STOREIND);
+
+ // We should check the size of the indirections. If they are
+ // different, say because of a cast, then we can't call them equivalent. Doing so could cause us
+ // to drop a cast.
+ // Signed-ness difference is okay and expected since a store indirection must always
+ // be signed based on the CIL spec, but a load could be unsigned.
+ if (genTypeSize(candidate->gtType) != genTypeSize(storeInd->gtType))
+ {
+ return false;
+ }
+
+ GenTreePtr pTreeA = candidate->gtGetOp1();
+ GenTreePtr pTreeB = storeInd->gtGetOp1();
+
+ // This method will be called by codegen (as well as during lowering).
+ // After register allocation, the sources may have been spilled and reloaded
+ // to a different register, indicated by an inserted GT_RELOAD node.
+ pTreeA = pTreeA->gtSkipReloadOrCopy();
+ pTreeB = pTreeB->gtSkipReloadOrCopy();
+
+ genTreeOps oper;
+ unsigned kind;
+
+ if (pTreeA->OperGet() != pTreeB->OperGet())
+ {
+ return false;
+ }
+
+ oper = pTreeA->OperGet();
+ switch (oper)
+ {
+ case GT_LCL_VAR:
+ case GT_LCL_VAR_ADDR:
+ case GT_CLS_VAR_ADDR:
+ case GT_CNS_INT:
+ return NodesAreEquivalentLeaves(pTreeA, pTreeB);
+
+ case GT_LEA:
+ {
+ GenTreeAddrMode* gtAddr1 = pTreeA->AsAddrMode();
+ GenTreeAddrMode* gtAddr2 = pTreeB->AsAddrMode();
+ return NodesAreEquivalentLeaves(gtAddr1->Base(), gtAddr2->Base()) &&
+ NodesAreEquivalentLeaves(gtAddr1->Index(), gtAddr2->Index()) &&
+ gtAddr1->gtScale == gtAddr2->gtScale && gtAddr1->gtOffset == gtAddr2->gtOffset;
+ }
+ default:
+ // We don't handle anything that is not either a constant,
+ // a local var or LEA.
+ return false;
+ }
+}
+
+/** Test whether the two given nodes are the same leaves.
+ * Right now, only constant integers and local variables are supported
+ */
+bool Lowering::NodesAreEquivalentLeaves(GenTreePtr tree1, GenTreePtr tree2)
+{
+ if (tree1 == nullptr && tree2 == nullptr)
+ {
+ return true;
+ }
+
+ // both null, they are equivalent, otherwise if either is null not equivalent
+ if (tree1 == nullptr || tree2 == nullptr)
+ {
+ return false;
+ }
+
+ tree1 = tree1->gtSkipReloadOrCopy();
+ tree2 = tree2->gtSkipReloadOrCopy();
+
+ if (tree1->TypeGet() != tree2->TypeGet())
+ {
+ return false;
+ }
+
+ if (tree1->OperGet() != tree2->OperGet())
+ {
+ return false;
+ }
+
+ if (!tree1->OperIsLeaf() || !tree2->OperIsLeaf())
+ {
+ return false;
+ }
+
+ switch (tree1->OperGet())
+ {
+ case GT_CNS_INT:
+ return tree1->gtIntCon.gtIconVal == tree2->gtIntCon.gtIconVal &&
+ tree1->IsIconHandle() == tree2->IsIconHandle();
+ case GT_LCL_VAR:
+ case GT_LCL_VAR_ADDR:
+ return tree1->gtLclVarCommon.gtLclNum == tree2->gtLclVarCommon.gtLclNum;
+ case GT_CLS_VAR_ADDR:
+ return tree1->gtClsVar.gtClsVarHnd == tree2->gtClsVar.gtClsVarHnd;
+ default:
+ return false;
+ }
+}
+
+#ifdef _TARGET_64BIT_
+/**
+ * Get common information required to handle a cast instruction
+ *
+ * Right now only supports 64 bit targets. In order to support 32 bit targets the
+ * switch statement needs work.
+ *
+ */
+void Lowering::getCastDescription(GenTreePtr treeNode, CastInfo* castInfo)
+{
+ // Intialize castInfo
+ memset(castInfo, 0, sizeof(*castInfo));
+
+ GenTreePtr castOp = treeNode->gtCast.CastOp();
+
+ var_types dstType = treeNode->CastToType();
+ var_types srcType = castOp->TypeGet();
+
+ castInfo->unsignedDest = varTypeIsUnsigned(dstType);
+ castInfo->unsignedSource = varTypeIsUnsigned(srcType);
+
+ // If necessary, force the srcType to unsigned when the GT_UNSIGNED flag is set.
+ if (!castInfo->unsignedSource && (treeNode->gtFlags & GTF_UNSIGNED) != 0)
+ {
+ srcType = genUnsignedType(srcType);
+ castInfo->unsignedSource = true;
+ }
+
+ if (treeNode->gtOverflow() &&
+ (genTypeSize(srcType) >= genTypeSize(dstType) || (srcType == TYP_INT && dstType == TYP_ULONG)))
+ {
+ castInfo->requiresOverflowCheck = true;
+ }
+
+ if (castInfo->requiresOverflowCheck)
+ {
+ ssize_t typeMin = 0;
+ ssize_t typeMax = 0;
+ ssize_t typeMask = 0;
+ bool signCheckOnly = false;
+
+ // Do we need to compare the value, or just check masks
+
+ switch (dstType)
+ {
+ default:
+ assert(!"unreachable: getCastDescription");
+ break;
+
+ case TYP_BYTE:
+ typeMask = ssize_t((int)0xFFFFFF80);
+ typeMin = SCHAR_MIN;
+ typeMax = SCHAR_MAX;
+ break;
+
+ case TYP_UBYTE:
+ typeMask = ssize_t((int)0xFFFFFF00L);
+ break;
+
+ case TYP_SHORT:
+ typeMask = ssize_t((int)0xFFFF8000);
+ typeMin = SHRT_MIN;
+ typeMax = SHRT_MAX;
+ break;
+
+ case TYP_CHAR:
+ typeMask = ssize_t((int)0xFFFF0000L);
+ break;
+
+ case TYP_INT:
+ if (srcType == TYP_UINT)
+ {
+ signCheckOnly = true;
+ }
+ else
+ {
+ typeMask = 0xFFFFFFFF80000000LL;
+ typeMin = INT_MIN;
+ typeMax = INT_MAX;
+ }
+ break;
+
+ case TYP_UINT:
+ if (srcType == TYP_INT)
+ {
+ signCheckOnly = true;
+ }
+ else
+ {
+ typeMask = 0xFFFFFFFF00000000LL;
+ }
+ break;
+
+ case TYP_LONG:
+ signCheckOnly = true;
+ break;
+
+ case TYP_ULONG:
+ signCheckOnly = true;
+ break;
+ }
+
+ if (signCheckOnly)
+ {
+ castInfo->signCheckOnly = true;
+ }
+
+ castInfo->typeMax = typeMax;
+ castInfo->typeMin = typeMin;
+ castInfo->typeMask = typeMask;
+ }
+}
+
+#endif // _TARGET_64BIT_
+
+#ifdef DEBUG
+void Lowering::DumpNodeInfoMap()
+{
+ printf("-----------------------------\n");
+ printf("TREE NODE INFO DUMP\n");
+ printf("-----------------------------\n");
+
+ for (BasicBlock* block = comp->fgFirstBB; block != nullptr; block = block->bbNext)
+ {
+ for (GenTree* node : LIR::AsRange(block).NonPhiNodes())
+ {
+ comp->gtDispTree(node, nullptr, nullptr, true);
+ printf(" +");
+ node->gtLsraInfo.dump(m_lsra);
+ }
+ }
+}
+#endif // DEBUG
+
+#endif // !LEGACY_BACKEND
diff --git a/src/jit/lower.h b/src/jit/lower.h
new file mode 100644
index 0000000000..620636d8bd
--- /dev/null
+++ b/src/jit/lower.h
@@ -0,0 +1,280 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX Lower XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#ifndef _LOWER_H_
+#define _LOWER_H_
+
+#include "compiler.h"
+#include "phase.h"
+#include "lsra.h"
+#include "sideeffects.h"
+
+class Lowering : public Phase
+{
+public:
+ inline Lowering(Compiler* compiler, LinearScanInterface* lsra)
+ : Phase(compiler, "Lowering", PHASE_LOWERING), vtableCallTemp(BAD_VAR_NUM)
+ {
+ m_lsra = (LinearScan*)lsra;
+ assert(m_lsra);
+ }
+ virtual void DoPhase();
+
+ // If requiresOverflowCheck is false, all other values will be unset
+ struct CastInfo
+ {
+ bool requiresOverflowCheck; // Will the cast require an overflow check
+ bool unsignedSource; // Is the source unsigned
+ bool unsignedDest; // is the dest unsigned
+
+ // All other fields are only meaningful if requiresOverflowCheck is set.
+
+ ssize_t typeMin; // Lowest storable value of the dest type
+ ssize_t typeMax; // Highest storable value of the dest type
+ ssize_t typeMask; // For converting from/to unsigned
+ bool signCheckOnly; // For converting between unsigned/signed int
+ };
+
+#ifdef _TARGET_64BIT_
+ static void getCastDescription(GenTreePtr treeNode, CastInfo* castInfo);
+#endif // _TARGET_64BIT_
+
+private:
+#ifdef DEBUG
+ static void CheckCallArg(GenTree* arg);
+ static void CheckCall(GenTreeCall* call);
+ static void CheckNode(GenTree* node);
+ static bool CheckBlock(Compiler* compiler, BasicBlock* block);
+#endif // DEBUG
+
+ void LowerBlock(BasicBlock* block);
+ GenTree* LowerNode(GenTree* node);
+ void CheckVSQuirkStackPaddingNeeded(GenTreeCall* call);
+
+ // ------------------------------
+ // Call Lowering
+ // ------------------------------
+ void LowerCall(GenTree* call);
+ void LowerJmpMethod(GenTree* jmp);
+ void LowerRet(GenTree* ret);
+ GenTree* LowerDelegateInvoke(GenTreeCall* call);
+ GenTree* LowerIndirectNonvirtCall(GenTreeCall* call);
+ GenTree* LowerDirectCall(GenTreeCall* call);
+ GenTree* LowerNonvirtPinvokeCall(GenTreeCall* call);
+ GenTree* LowerTailCallViaHelper(GenTreeCall* callNode, GenTree* callTarget);
+ void LowerFastTailCall(GenTreeCall* callNode);
+ void InsertProfTailCallHook(GenTreeCall* callNode, GenTree* insertionPoint);
+ GenTree* LowerVirtualVtableCall(GenTreeCall* call);
+ GenTree* LowerVirtualStubCall(GenTreeCall* call);
+ void LowerArgsForCall(GenTreeCall* call);
+ void ReplaceArgWithPutArgOrCopy(GenTreePtr* ppChild, GenTreePtr newNode);
+ GenTree* NewPutArg(GenTreeCall* call, GenTreePtr arg, fgArgTabEntryPtr info, var_types type);
+ void LowerArg(GenTreeCall* call, GenTreePtr* ppTree);
+ void InsertPInvokeCallProlog(GenTreeCall* call);
+ void InsertPInvokeCallEpilog(GenTreeCall* call);
+ void InsertPInvokeMethodProlog();
+ void InsertPInvokeMethodEpilog(BasicBlock* returnBB DEBUGARG(GenTreePtr lastExpr));
+ GenTree* SetGCState(int cns);
+ GenTree* CreateReturnTrapSeq();
+ enum FrameLinkAction
+ {
+ PushFrame,
+ PopFrame
+ };
+ GenTree* CreateFrameLinkUpdate(FrameLinkAction);
+ GenTree* AddrGen(ssize_t addr, regNumber reg = REG_NA);
+ GenTree* AddrGen(void* addr, regNumber reg = REG_NA);
+
+ GenTree* Ind(GenTree* tree)
+ {
+ return comp->gtNewOperNode(GT_IND, TYP_I_IMPL, tree);
+ }
+
+ GenTree* PhysReg(regNumber reg, var_types type = TYP_I_IMPL)
+ {
+ return comp->gtNewPhysRegNode(reg, type);
+ }
+
+ GenTree* PhysRegDst(regNumber reg, GenTree* src)
+ {
+ return comp->gtNewPhysRegNode(reg, src);
+ }
+
+ GenTree* ThisReg(GenTreeCall* call)
+ {
+ return PhysReg(comp->codeGen->genGetThisArgReg(call), TYP_REF);
+ }
+
+ GenTree* Offset(GenTree* base, unsigned offset)
+ {
+ var_types resultType = (base->TypeGet() == TYP_REF) ? TYP_BYREF : base->TypeGet();
+ return new (comp, GT_LEA) GenTreeAddrMode(resultType, base, nullptr, 0, offset);
+ }
+
+ // returns true if the tree can use the read-modify-write memory instruction form
+ bool isRMWRegOper(GenTreePtr tree);
+
+ // return true if this call target is within range of a pc-rel call on the machine
+ bool IsCallTargetInRange(void* addr);
+
+ void TreeNodeInfoInit(GenTree* stmt);
+
+#if defined(_TARGET_XARCH_)
+ void TreeNodeInfoInitSimple(GenTree* tree);
+
+ //----------------------------------------------------------------------
+ // SetRegOptional - sets a bit to indicate to LSRA that register
+ // for a given tree node is optional for codegen purpose. If no
+ // register is allocated to such a tree node, its parent node treats
+ // it as a contained memory operand during codegen.
+ //
+ // Arguments:
+ // tree - GenTree node
+ //
+ // Returns
+ // None
+ void SetRegOptional(GenTree* tree)
+ {
+ tree->gtLsraInfo.regOptional = true;
+ }
+
+ GenTree* PreferredRegOptionalOperand(GenTree* tree);
+
+ // ------------------------------------------------------------------
+ // SetRegOptionalBinOp - Indicates which of the operands of a bin-op
+ // register requirement is optional. Xarch instruction set allows
+ // either of op1 or op2 of binary operation (e.g. add, mul etc) to be
+ // a memory operand. This routine provides info to register allocator
+ // which of its operands optionally require a register. Lsra might not
+ // allocate a register to RefTypeUse positions of such operands if it
+ // is beneficial. In such a case codegen will treat them as memory
+ // operands.
+ //
+ // Arguments:
+ // tree - Gentree of a bininary operation.
+ //
+ // Returns
+ // None.
+ //
+ // Note: On xarch at most only one of the operands will be marked as
+ // reg optional, even when both operands could be considered register
+ // optional.
+ void SetRegOptionalForBinOp(GenTree* tree)
+ {
+ assert(GenTree::OperIsBinary(tree->OperGet()));
+
+ GenTree* op1 = tree->gtGetOp1();
+ GenTree* op2 = tree->gtGetOp2();
+
+ if (tree->OperIsCommutative() && tree->TypeGet() == op1->TypeGet())
+ {
+ GenTree* preferredOp = PreferredRegOptionalOperand(tree);
+ SetRegOptional(preferredOp);
+ }
+ else if (tree->TypeGet() == op2->TypeGet())
+ {
+ SetRegOptional(op2);
+ }
+ }
+#endif // defined(_TARGET_XARCH_)
+ void TreeNodeInfoInitReturn(GenTree* tree);
+ void TreeNodeInfoInitShiftRotate(GenTree* tree);
+ void TreeNodeInfoInitCall(GenTreeCall* call);
+ void TreeNodeInfoInitStructArg(GenTreePtr structArg);
+ void TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode);
+ void TreeNodeInfoInitLogicalOp(GenTree* tree);
+ void TreeNodeInfoInitModDiv(GenTree* tree);
+ void TreeNodeInfoInitIntrinsic(GenTree* tree);
+#ifdef FEATURE_SIMD
+ void TreeNodeInfoInitSIMD(GenTree* tree);
+#endif // FEATURE_SIMD
+ void TreeNodeInfoInitCast(GenTree* tree);
+#ifdef _TARGET_ARM64_
+ void TreeNodeInfoInitPutArgStk(GenTree* argNode, fgArgTabEntryPtr info);
+#endif // _TARGET_ARM64_
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ void TreeNodeInfoInitPutArgStk(GenTree* tree);
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+ void TreeNodeInfoInitLclHeap(GenTree* tree);
+
+ void DumpNodeInfoMap();
+
+ // Per tree node member functions
+ void LowerStoreInd(GenTree* node);
+ GenTree* LowerAdd(GenTree* node);
+ void LowerUnsignedDivOrMod(GenTree* node);
+ GenTree* LowerSignedDivOrMod(GenTree* node);
+ void LowerBlockStore(GenTreeBlk* blkNode);
+
+ GenTree* TryCreateAddrMode(LIR::Use&& use, bool isIndir);
+ void AddrModeCleanupHelper(GenTreeAddrMode* addrMode, GenTree* node);
+
+ GenTree* LowerSwitch(GenTree* node);
+ void LowerCast(GenTree* node);
+
+#if defined(_TARGET_XARCH_)
+ void SetMulOpCounts(GenTreePtr tree);
+#endif // defined(_TARGET_XARCH_)
+
+ void LowerCmp(GenTreePtr tree);
+
+#if !CPU_LOAD_STORE_ARCH
+ bool IsRMWIndirCandidate(GenTree* operand, GenTree* storeInd);
+ bool IsBinOpInRMWStoreInd(GenTreePtr tree);
+ bool IsRMWMemOpRootedAtStoreInd(GenTreePtr storeIndTree, GenTreePtr* indirCandidate, GenTreePtr* indirOpSource);
+ bool SetStoreIndOpCountsIfRMWMemOp(GenTreePtr storeInd);
+#endif
+ void LowerStoreLoc(GenTreeLclVarCommon* tree);
+ void SetIndirAddrOpCounts(GenTree* indirTree);
+ void LowerGCWriteBarrier(GenTree* tree);
+ GenTree* LowerArrElem(GenTree* node);
+ void LowerRotate(GenTree* tree);
+
+ // Utility functions
+ void MorphBlkIntoHelperCall(GenTreePtr pTree, GenTreePtr treeStmt);
+
+public:
+ static bool IndirsAreEquivalent(GenTreePtr pTreeA, GenTreePtr pTreeB);
+
+private:
+ static bool NodesAreEquivalentLeaves(GenTreePtr candidate, GenTreePtr storeInd);
+
+ bool AreSourcesPossiblyModifiedLocals(GenTree* addr, GenTree* base, GenTree* index);
+
+ // return true if 'childNode' is an immediate that can be contained
+ // by the 'parentNode' (i.e. folded into an instruction)
+ // for example small enough and non-relocatable
+ bool IsContainableImmed(GenTree* parentNode, GenTree* childNode);
+
+ // Makes 'childNode' contained in the 'parentNode'
+ void MakeSrcContained(GenTreePtr parentNode, GenTreePtr childNode);
+
+ // Checks and makes 'childNode' contained in the 'parentNode'
+ bool CheckImmedAndMakeContained(GenTree* parentNode, GenTree* childNode);
+
+ // Checks for memory conflicts in the instructions between childNode and parentNode, and returns true if childNode
+ // can be contained.
+ bool IsSafeToContainMem(GenTree* parentNode, GenTree* childNode);
+
+ inline LIR::Range& BlockRange() const
+ {
+ return LIR::AsRange(m_block);
+ }
+
+ LinearScan* m_lsra;
+ unsigned vtableCallTemp; // local variable we use as a temp for vtable calls
+ SideEffectSet m_scratchSideEffects; // SideEffectSet used for IsSafeToContainMem and isRMWIndirCandidate
+ BasicBlock* m_block;
+};
+
+#endif // _LOWER_H_
diff --git a/src/jit/lowerarm.cpp b/src/jit/lowerarm.cpp
new file mode 100644
index 0000000000..67cea2ff4e
--- /dev/null
+++ b/src/jit/lowerarm.cpp
@@ -0,0 +1,71 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX Lowering for ARM XX
+XX XX
+XX This encapsulates all the logic for lowering trees for the ARM XX
+XX architecture. For a more detailed view of what is lowering, please XX
+XX take a look at Lower.cpp XX
+XX XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+#ifndef LEGACY_BACKEND // This file is ONLY used for the RyuJIT backend that uses the linear scan register allocator
+
+// The ARM backend is not yet implemented, so the methods here are all NYI.
+// TODO-ARM-NYI: Lowering for ARM.
+#ifdef _TARGET_ARM_
+
+#include "jit.h"
+#include "sideeffects.h"
+#include "lower.h"
+#include "lsra.h"
+
+/* Lowering of GT_CAST nodes */
+void Lowering::LowerCast(GenTree* tree)
+{
+ NYI_ARM("ARM Lowering for cast");
+}
+
+void Lowering::LowerRotate(GenTreePtr tree)
+{
+ NYI_ARM("ARM Lowering for ROL and ROR");
+}
+
+void Lowering::TreeNodeInfoInit(GenTree* stmt)
+{
+ NYI("ARM TreeNodInfoInit");
+}
+
+// returns true if the tree can use the read-modify-write memory instruction form
+bool Lowering::isRMWRegOper(GenTreePtr tree)
+{
+ return false;
+}
+
+bool Lowering::IsCallTargetInRange(void* addr)
+{
+ return comp->codeGen->validImmForBL((ssize_t)addr);
+}
+
+// return true if the immediate can be folded into an instruction, for example small enough and non-relocatable
+bool Lowering::IsContainableImmed(GenTree* parentNode, GenTree* childNode)
+{
+ NYI_ARM("ARM IsContainableImmed");
+ return false;
+}
+
+#endif // _TARGET_ARM_
+
+#endif // !LEGACY_BACKEND
diff --git a/src/jit/lowerarm64.cpp b/src/jit/lowerarm64.cpp
new file mode 100644
index 0000000000..1720c62acb
--- /dev/null
+++ b/src/jit/lowerarm64.cpp
@@ -0,0 +1,2063 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX Lowering for ARM64 XX
+XX XX
+XX This encapsulates all the logic for lowering trees for the ARM64 XX
+XX architecture. For a more detailed view of what is lowering, please XX
+XX take a look at Lower.cpp XX
+XX XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+#ifndef LEGACY_BACKEND // This file is ONLY used for the RyuJIT backend that uses the linear scan register allocator
+
+#ifdef _TARGET_ARM64_
+
+#include "jit.h"
+#include "sideeffects.h"
+#include "lower.h"
+
+// there is not much lowering to do with storing a local but
+// we do some handling of contained immediates and widening operations of unsigneds
+void Lowering::LowerStoreLoc(GenTreeLclVarCommon* storeLoc)
+{
+ TreeNodeInfo* info = &(storeLoc->gtLsraInfo);
+
+ // Is this the case of var = call where call is returning
+ // a value in multiple return registers?
+ GenTree* op1 = storeLoc->gtGetOp1();
+ if (op1->IsMultiRegCall())
+ {
+ // backend expects to see this case only for store lclvar.
+ assert(storeLoc->OperGet() == GT_STORE_LCL_VAR);
+
+ // srcCount = number of registers in which the value is returned by call
+ GenTreeCall* call = op1->AsCall();
+ ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc();
+ info->srcCount = retTypeDesc->GetReturnRegCount();
+
+ // Call node srcCandidates = Bitwise-OR(allregs(GetReturnRegType(i))) for all i=0..RetRegCount-1
+ regMaskTP srcCandidates = m_lsra->allMultiRegCallNodeRegs(call);
+ op1->gtLsraInfo.setSrcCandidates(m_lsra, srcCandidates);
+ return;
+ }
+
+ CheckImmedAndMakeContained(storeLoc, op1);
+
+ // Try to widen the ops if they are going into a local var.
+ if ((storeLoc->gtOper == GT_STORE_LCL_VAR) && (op1->gtOper == GT_CNS_INT))
+ {
+ GenTreeIntCon* con = op1->AsIntCon();
+ ssize_t ival = con->gtIconVal;
+ unsigned varNum = storeLoc->gtLclNum;
+ LclVarDsc* varDsc = comp->lvaTable + varNum;
+
+ if (varDsc->lvIsSIMDType())
+ {
+ noway_assert(storeLoc->gtType != TYP_STRUCT);
+ }
+ unsigned size = genTypeSize(storeLoc);
+ // If we are storing a constant into a local variable
+ // we extend the size of the store here
+ if ((size < 4) && !varTypeIsStruct(varDsc))
+ {
+ if (!varTypeIsUnsigned(varDsc))
+ {
+ if (genTypeSize(storeLoc) == 1)
+ {
+ if ((ival & 0x7f) != ival)
+ {
+ ival = ival | 0xffffff00;
+ }
+ }
+ else
+ {
+ assert(genTypeSize(storeLoc) == 2);
+ if ((ival & 0x7fff) != ival)
+ {
+ ival = ival | 0xffff0000;
+ }
+ }
+ }
+
+ // A local stack slot is at least 4 bytes in size, regardless of
+ // what the local var is typed as, so auto-promote it here
+ // unless it is a field of a promoted struct
+ // TODO-ARM64-CQ: if the field is promoted shouldn't we also be able to do this?
+ if (!varDsc->lvIsStructField)
+ {
+ storeLoc->gtType = TYP_INT;
+ con->SetIconValue(ival);
+ }
+ }
+ }
+}
+
+/**
+ * Takes care of annotating the register requirements
+ * for every TreeNodeInfo struct that maps to each tree node.
+ * Preconditions:
+ * LSRA has been initialized and there is a TreeNodeInfo node
+ * already allocated and initialized for every tree in the IR.
+ * Postconditions:
+ * Every TreeNodeInfo instance has the right annotations on register
+ * requirements needed by LSRA to build the Interval Table (source,
+ * destination and internal [temp] register counts).
+ * This code is refactored originally from LSRA.
+ */
+void Lowering::TreeNodeInfoInit(GenTree* tree)
+{
+ LinearScan* l = m_lsra;
+ Compiler* compiler = comp;
+
+ unsigned kind = tree->OperKind();
+ TreeNodeInfo* info = &(tree->gtLsraInfo);
+ RegisterType registerType = TypeGet(tree);
+
+ switch (tree->OperGet())
+ {
+ GenTree* op1;
+ GenTree* op2;
+
+ default:
+ info->dstCount = (tree->TypeGet() == TYP_VOID) ? 0 : 1;
+ if (kind & (GTK_CONST | GTK_LEAF))
+ {
+ info->srcCount = 0;
+ }
+ else if (kind & (GTK_SMPOP))
+ {
+ if (tree->gtGetOp2() != nullptr)
+ {
+ info->srcCount = 2;
+ }
+ else
+ {
+ info->srcCount = 1;
+ }
+ }
+ else
+ {
+ unreached();
+ }
+ break;
+
+ case GT_STORE_LCL_FLD:
+ case GT_STORE_LCL_VAR:
+ info->srcCount = 1;
+ info->dstCount = 0;
+ LowerStoreLoc(tree->AsLclVarCommon());
+ break;
+
+ case GT_BOX:
+ noway_assert(!"box should not exist here");
+ // The result of 'op1' is also the final result
+ info->srcCount = 0;
+ info->dstCount = 0;
+ break;
+
+ case GT_PHYSREGDST:
+ info->srcCount = 1;
+ info->dstCount = 0;
+ break;
+
+ case GT_COMMA:
+ {
+ GenTreePtr firstOperand;
+ GenTreePtr secondOperand;
+ if (tree->gtFlags & GTF_REVERSE_OPS)
+ {
+ firstOperand = tree->gtOp.gtOp2;
+ secondOperand = tree->gtOp.gtOp1;
+ }
+ else
+ {
+ firstOperand = tree->gtOp.gtOp1;
+ secondOperand = tree->gtOp.gtOp2;
+ }
+ if (firstOperand->TypeGet() != TYP_VOID)
+ {
+ firstOperand->gtLsraInfo.isLocalDefUse = true;
+ firstOperand->gtLsraInfo.dstCount = 0;
+ }
+ if (tree->TypeGet() == TYP_VOID && secondOperand->TypeGet() != TYP_VOID)
+ {
+ secondOperand->gtLsraInfo.isLocalDefUse = true;
+ secondOperand->gtLsraInfo.dstCount = 0;
+ }
+ }
+
+ __fallthrough;
+
+ case GT_LIST:
+ case GT_ARGPLACE:
+ case GT_NO_OP:
+ case GT_START_NONGC:
+ case GT_PROF_HOOK:
+ info->srcCount = 0;
+ info->dstCount = 0;
+ break;
+
+ case GT_CNS_DBL:
+ info->srcCount = 0;
+ info->dstCount = 1;
+ {
+ GenTreeDblCon* dblConst = tree->AsDblCon();
+ double constValue = dblConst->gtDblCon.gtDconVal;
+
+ if (emitter::emitIns_valid_imm_for_fmov(constValue))
+ {
+ // Directly encode constant to instructions.
+ }
+ else
+ {
+ // Reserve int to load constant from memory (IF_LARGELDC)
+ info->internalIntCount = 1;
+ }
+ }
+ break;
+
+ case GT_QMARK:
+ case GT_COLON:
+ info->srcCount = 0;
+ info->dstCount = 0;
+ unreached();
+ break;
+
+ case GT_RETURN:
+ TreeNodeInfoInitReturn(tree);
+ break;
+
+ case GT_RETFILT:
+ if (tree->TypeGet() == TYP_VOID)
+ {
+ info->srcCount = 0;
+ info->dstCount = 0;
+ }
+ else
+ {
+ assert(tree->TypeGet() == TYP_INT);
+
+ info->srcCount = 1;
+ info->dstCount = 0;
+
+ info->setSrcCandidates(l, RBM_INTRET);
+ tree->gtOp.gtOp1->gtLsraInfo.setSrcCandidates(l, RBM_INTRET);
+ }
+ break;
+
+ case GT_NOP:
+ // A GT_NOP is either a passthrough (if it is void, or if it has
+ // a child), but must be considered to produce a dummy value if it
+ // has a type but no child
+ info->srcCount = 0;
+ if (tree->TypeGet() != TYP_VOID && tree->gtOp.gtOp1 == nullptr)
+ {
+ info->dstCount = 1;
+ }
+ else
+ {
+ info->dstCount = 0;
+ }
+ break;
+
+ case GT_JTRUE:
+ info->srcCount = 0;
+ info->dstCount = 0;
+ l->clearDstCount(tree->gtOp.gtOp1);
+ break;
+
+ case GT_JMP:
+ info->srcCount = 0;
+ info->dstCount = 0;
+ break;
+
+ case GT_SWITCH:
+ // This should never occur since switch nodes must not be visible at this
+ // point in the JIT.
+ info->srcCount = 0;
+ info->dstCount = 0; // To avoid getting uninit errors.
+ noway_assert(!"Switch must be lowered at this point");
+ break;
+
+ case GT_JMPTABLE:
+ info->srcCount = 0;
+ info->dstCount = 1;
+ break;
+
+ case GT_SWITCH_TABLE:
+ info->srcCount = 2;
+ info->internalIntCount = 1;
+ info->dstCount = 0;
+ break;
+
+ case GT_ASG:
+ case GT_ASG_ADD:
+ case GT_ASG_SUB:
+ noway_assert(!"We should never hit any assignment operator in lowering");
+ info->srcCount = 0;
+ info->dstCount = 0;
+ break;
+
+ case GT_ADD:
+ case GT_SUB:
+ if (varTypeIsFloating(tree->TypeGet()))
+ {
+ // overflow operations aren't supported on float/double types.
+ assert(!tree->gtOverflow());
+
+ // No implicit conversions at this stage as the expectation is that
+ // everything is made explicit by adding casts.
+ assert(tree->gtOp.gtOp1->TypeGet() == tree->gtOp.gtOp2->TypeGet());
+
+ info->srcCount = 2;
+ info->dstCount = 1;
+
+ break;
+ }
+
+ __fallthrough;
+
+ case GT_AND:
+ case GT_OR:
+ case GT_XOR:
+ info->srcCount = 2;
+ info->dstCount = 1;
+ // Check and make op2 contained (if it is a containable immediate)
+ CheckImmedAndMakeContained(tree, tree->gtOp.gtOp2);
+ break;
+
+ case GT_RETURNTRAP:
+ // this just turns into a compare of its child with an int
+ // + a conditional call
+ info->srcCount = 1;
+ info->dstCount = 0;
+ break;
+
+ case GT_MOD:
+ case GT_UMOD:
+ NYI_IF(varTypeIsFloating(tree->TypeGet()), "FP Remainder in ARM64");
+ assert(!"Shouldn't see an integer typed GT_MOD node in ARM64");
+ break;
+
+ case GT_MUL:
+ if (tree->gtOverflow())
+ {
+ // Need a register different from target reg to check for overflow.
+ info->internalIntCount = 2;
+ }
+ __fallthrough;
+
+ case GT_DIV:
+ case GT_MULHI:
+ case GT_UDIV:
+ {
+ info->srcCount = 2;
+ info->dstCount = 1;
+ }
+ break;
+
+ case GT_INTRINSIC:
+ {
+ // TODO-ARM64-NYI
+ // Right now only Abs/Round/Sqrt are treated as math intrinsics
+ noway_assert((tree->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Abs) ||
+ (tree->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Round) ||
+ (tree->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Sqrt));
+
+ // Both operand and its result must be of the same floating point type.
+ op1 = tree->gtOp.gtOp1;
+ assert(varTypeIsFloating(op1));
+ assert(op1->TypeGet() == tree->TypeGet());
+
+ info->srcCount = 1;
+ info->dstCount = 1;
+ }
+ break;
+
+#ifdef FEATURE_SIMD
+ case GT_SIMD:
+ TreeNodeInfoInitSIMD(tree);
+ break;
+#endif // FEATURE_SIMD
+
+ case GT_CAST:
+ {
+ // TODO-ARM64-CQ: Int-To-Int conversions - castOp cannot be a memory op and must have an assigned
+ // register.
+ // see CodeGen::genIntToIntCast()
+
+ info->srcCount = 1;
+ info->dstCount = 1;
+
+ // Non-overflow casts to/from float/double are done using SSE2 instructions
+ // and that allow the source operand to be either a reg or memop. Given the
+ // fact that casts from small int to float/double are done as two-level casts,
+ // the source operand is always guaranteed to be of size 4 or 8 bytes.
+ var_types castToType = tree->CastToType();
+ GenTreePtr castOp = tree->gtCast.CastOp();
+ var_types castOpType = castOp->TypeGet();
+ if (tree->gtFlags & GTF_UNSIGNED)
+ {
+ castOpType = genUnsignedType(castOpType);
+ }
+#ifdef DEBUG
+ if (!tree->gtOverflow() && (varTypeIsFloating(castToType) || varTypeIsFloating(castOpType)))
+ {
+ // If converting to float/double, the operand must be 4 or 8 byte in size.
+ if (varTypeIsFloating(castToType))
+ {
+ unsigned opSize = genTypeSize(castOpType);
+ assert(opSize == 4 || opSize == 8);
+ }
+ }
+#endif // DEBUG
+ // Some overflow checks need a temp reg
+
+ CastInfo castInfo;
+
+ // Get information about the cast.
+ getCastDescription(tree, &castInfo);
+
+ if (castInfo.requiresOverflowCheck)
+ {
+ var_types srcType = castOp->TypeGet();
+ emitAttr cmpSize = EA_ATTR(genTypeSize(srcType));
+
+ // If we cannot store the comparisons in an immediate for either
+ // comparing against the max or min value, then we will need to
+ // reserve a temporary register.
+
+ bool canStoreMaxValue = emitter::emitIns_valid_imm_for_cmp(castInfo.typeMax, cmpSize);
+ bool canStoreMinValue = emitter::emitIns_valid_imm_for_cmp(castInfo.typeMin, cmpSize);
+
+ if (!canStoreMaxValue || !canStoreMinValue)
+ {
+ info->internalIntCount = 1;
+ }
+ }
+ }
+ break;
+
+ case GT_NEG:
+ info->srcCount = 1;
+ info->dstCount = 1;
+ break;
+
+ case GT_NOT:
+ info->srcCount = 1;
+ info->dstCount = 1;
+ break;
+
+ case GT_LSH:
+ case GT_RSH:
+ case GT_RSZ:
+ case GT_ROR:
+ {
+ info->srcCount = 2;
+ info->dstCount = 1;
+
+ GenTreePtr shiftBy = tree->gtOp.gtOp2;
+ GenTreePtr source = tree->gtOp.gtOp1;
+ if (shiftBy->IsCnsIntOrI())
+ {
+ l->clearDstCount(shiftBy);
+ info->srcCount--;
+ }
+ }
+ break;
+
+ case GT_EQ:
+ case GT_NE:
+ case GT_LT:
+ case GT_LE:
+ case GT_GE:
+ case GT_GT:
+ LowerCmp(tree);
+ break;
+
+ case GT_CKFINITE:
+ info->srcCount = 1;
+ info->dstCount = 1;
+ info->internalIntCount = 1;
+ break;
+
+ case GT_CMPXCHG:
+ info->srcCount = 3;
+ info->dstCount = 1;
+
+ // TODO-ARM64-NYI
+ NYI("CMPXCHG");
+ break;
+
+ case GT_LOCKADD:
+ info->srcCount = 2;
+ info->dstCount = 0;
+ CheckImmedAndMakeContained(tree, tree->gtOp.gtOp2);
+ break;
+
+ case GT_CALL:
+ TreeNodeInfoInitCall(tree->AsCall());
+ break;
+
+ case GT_ADDR:
+ {
+ // For a GT_ADDR, the child node should not be evaluated into a register
+ GenTreePtr child = tree->gtOp.gtOp1;
+ assert(!l->isCandidateLocalRef(child));
+ l->clearDstCount(child);
+ info->srcCount = 0;
+ info->dstCount = 1;
+ }
+ break;
+
+ case GT_BLK:
+ case GT_OBJ:
+ case GT_DYN_BLK:
+ // These should all be eliminated prior to Lowering.
+ assert(!"Non-store block node in Lowering");
+ info->srcCount = 0;
+ info->dstCount = 0;
+
+ case GT_STORE_BLK:
+ case GT_STORE_OBJ:
+ case GT_STORE_DYN_BLK:
+ TreeNodeInfoInitBlockStore(tree->AsBlk());
+ break;
+
+ case GT_LCLHEAP:
+ {
+ info->srcCount = 1;
+ info->dstCount = 1;
+
+ // Need a variable number of temp regs (see genLclHeap() in codegenamd64.cpp):
+ // Here '-' means don't care.
+ //
+ // Size? Init Memory? # temp regs
+ // 0 - 0
+ // const and <=6 ptr words - 0
+ // const and <PageSize No 0
+ // >6 ptr words Yes hasPspSym ? 1 : 0
+ // Non-const Yes hasPspSym ? 1 : 0
+ // Non-const No 2
+ //
+ // PSPSym - If the method has PSPSym increment internalIntCount by 1.
+ //
+ bool hasPspSym;
+#if FEATURE_EH_FUNCLETS
+ hasPspSym = (compiler->lvaPSPSym != BAD_VAR_NUM);
+#else
+ hasPspSym = false;
+#endif
+
+ GenTreePtr size = tree->gtOp.gtOp1;
+ if (size->IsCnsIntOrI())
+ {
+ MakeSrcContained(tree, size);
+
+ size_t sizeVal = size->gtIntCon.gtIconVal;
+
+ if (sizeVal == 0)
+ {
+ info->internalIntCount = 0;
+ }
+ else
+ {
+ // Compute the amount of memory to properly STACK_ALIGN.
+ // Note: The Gentree node is not updated here as it is cheap to recompute stack aligned size.
+ // This should also help in debugging as we can examine the original size specified with
+ // localloc.
+ sizeVal = AlignUp(sizeVal, STACK_ALIGN);
+ size_t cntStackAlignedWidthItems = (sizeVal >> STACK_ALIGN_SHIFT);
+
+ // For small allocations upto 4 'stp' instructions (i.e. 64 bytes of localloc)
+ //
+ if (cntStackAlignedWidthItems <= 4)
+ {
+ info->internalIntCount = 0;
+ }
+ else if (!compiler->info.compInitMem)
+ {
+ // No need to initialize allocated stack space.
+ if (sizeVal < compiler->eeGetPageSize())
+ {
+ info->internalIntCount = 0;
+ }
+ else
+ {
+ // We need two registers: regCnt and RegTmp
+ info->internalIntCount = 2;
+ }
+ }
+ else
+ {
+ // greater than 4 and need to zero initialize allocated stack space.
+ // If the method has PSPSym, we need an internal register to hold regCnt
+ // since targetReg allocated to GT_LCLHEAP node could be the same as one of
+ // the the internal registers.
+ info->internalIntCount = hasPspSym ? 1 : 0;
+ }
+ }
+ }
+ else
+ {
+ if (!compiler->info.compInitMem)
+ {
+ info->internalIntCount = 2;
+ }
+ else
+ {
+ // If the method has PSPSym, we need an internal register to hold regCnt
+ // since targetReg allocated to GT_LCLHEAP node could be the same as one of
+ // the the internal registers.
+ info->internalIntCount = hasPspSym ? 1 : 0;
+ }
+ }
+
+ // If the method has PSPSym, we would need an addtional register to relocate it on stack.
+ if (hasPspSym)
+ {
+ // Exclude const size 0
+ if (!size->IsCnsIntOrI() || (size->gtIntCon.gtIconVal > 0))
+ info->internalIntCount++;
+ }
+ }
+ break;
+
+ case GT_ARR_BOUNDS_CHECK:
+#ifdef FEATURE_SIMD
+ case GT_SIMD_CHK:
+#endif // FEATURE_SIMD
+ {
+ GenTreeBoundsChk* node = tree->AsBoundsChk();
+ // Consumes arrLen & index - has no result
+ info->srcCount = 2;
+ info->dstCount = 0;
+
+ GenTree* intCns = nullptr;
+ GenTree* other = nullptr;
+ if (CheckImmedAndMakeContained(tree, node->gtIndex))
+ {
+ intCns = node->gtIndex;
+ other = node->gtArrLen;
+ }
+ else if (CheckImmedAndMakeContained(tree, node->gtArrLen))
+ {
+ intCns = node->gtArrLen;
+ other = node->gtIndex;
+ }
+ else
+ {
+ other = node->gtIndex;
+ }
+ }
+ break;
+
+ case GT_ARR_ELEM:
+ // These must have been lowered to GT_ARR_INDEX
+ noway_assert(!"We should never see a GT_ARR_ELEM in lowering");
+ info->srcCount = 0;
+ info->dstCount = 0;
+ break;
+
+ case GT_ARR_INDEX:
+ info->srcCount = 2;
+ info->dstCount = 1;
+
+ // We need one internal register when generating code for GT_ARR_INDEX, however the
+ // register allocator always may just give us the same one as it gives us for the 'dst'
+ // as a workaround we will just ask for two internal registers.
+ //
+ info->internalIntCount = 2;
+
+ // For GT_ARR_INDEX, the lifetime of the arrObj must be extended because it is actually used multiple
+ // times while the result is being computed.
+ tree->AsArrIndex()->ArrObj()->gtLsraInfo.isDelayFree = true;
+ info->hasDelayFreeSrc = true;
+ break;
+
+ case GT_ARR_OFFSET:
+ // This consumes the offset, if any, the arrObj and the effective index,
+ // and produces the flattened offset for this dimension.
+ info->srcCount = 3;
+ info->dstCount = 1;
+ info->internalIntCount = 1;
+
+ // we don't want to generate code for this
+ if (tree->gtArrOffs.gtOffset->IsIntegralConst(0))
+ {
+ MakeSrcContained(tree, tree->gtArrOffs.gtOffset);
+ }
+ break;
+
+ case GT_LEA:
+ {
+ GenTreeAddrMode* lea = tree->AsAddrMode();
+
+ GenTree* base = lea->Base();
+ GenTree* index = lea->Index();
+ unsigned cns = lea->gtOffset;
+
+ // This LEA is instantiating an address,
+ // so we set up the srcCount and dstCount here.
+ info->srcCount = 0;
+ if (base != nullptr)
+ {
+ info->srcCount++;
+ }
+ if (index != nullptr)
+ {
+ info->srcCount++;
+ }
+ info->dstCount = 1;
+
+ // On ARM64 we may need a single internal register
+ // (when both conditions are true then we still only need a single internal register)
+ if ((index != nullptr) && (cns != 0))
+ {
+ // ARM64 does not support both Index and offset so we need an internal register
+ info->internalIntCount = 1;
+ }
+ else if (!emitter::emitIns_valid_imm_for_add(cns, EA_8BYTE))
+ {
+ // This offset can't be contained in the add instruction, so we need an internal register
+ info->internalIntCount = 1;
+ }
+ }
+ break;
+
+ case GT_STOREIND:
+ {
+ info->srcCount = 2;
+ info->dstCount = 0;
+ GenTree* src = tree->gtOp.gtOp2;
+
+ if (compiler->codeGen->gcInfo.gcIsWriteBarrierAsgNode(tree))
+ {
+ LowerGCWriteBarrier(tree);
+ break;
+ }
+ if (!varTypeIsFloating(src->TypeGet()) && src->IsIntegralConst(0))
+ {
+ // an integer zero for 'src' can be contained.
+ MakeSrcContained(tree, src);
+ }
+
+ SetIndirAddrOpCounts(tree);
+ }
+ break;
+
+ case GT_NULLCHECK:
+ info->dstCount = 0;
+ info->srcCount = 1;
+ info->isLocalDefUse = true;
+ // null check is an indirection on an addr
+ SetIndirAddrOpCounts(tree);
+ break;
+
+ case GT_IND:
+ info->dstCount = 1;
+ info->srcCount = 1;
+ SetIndirAddrOpCounts(tree);
+ break;
+
+ case GT_CATCH_ARG:
+ info->srcCount = 0;
+ info->dstCount = 1;
+ info->setDstCandidates(l, RBM_EXCEPTION_OBJECT);
+ break;
+
+ case GT_CLS_VAR:
+ info->srcCount = 0;
+ // GT_CLS_VAR, by the time we reach the backend, must always
+ // be a pure use.
+ // It will produce a result of the type of the
+ // node, and use an internal register for the address.
+
+ info->dstCount = 1;
+ assert((tree->gtFlags & (GTF_VAR_DEF | GTF_VAR_USEASG | GTF_VAR_USEDEF)) == 0);
+ info->internalIntCount = 1;
+ break;
+ } // end switch (tree->OperGet())
+
+ // We need to be sure that we've set info->srcCount and info->dstCount appropriately
+ assert((info->dstCount < 2) || tree->IsMultiRegCall());
+}
+//------------------------------------------------------------------------
+// TreeNodeInfoInitReturn: Set the NodeInfo for a GT_RETURN.
+//
+// Arguments:
+// tree - The node of interest
+//
+// Return Value:
+// None.
+//
+void Lowering::TreeNodeInfoInitReturn(GenTree* tree)
+{
+ TreeNodeInfo* info = &(tree->gtLsraInfo);
+ LinearScan* l = m_lsra;
+ Compiler* compiler = comp;
+
+ GenTree* op1 = tree->gtGetOp1();
+ regMaskTP useCandidates = RBM_NONE;
+
+ info->srcCount = (tree->TypeGet() == TYP_VOID) ? 0 : 1;
+ info->dstCount = 0;
+
+ if (varTypeIsStruct(tree))
+ {
+ // op1 has to be either an lclvar or a multi-reg returning call
+ if ((op1->OperGet() == GT_LCL_VAR) || (op1->OperGet() == GT_LCL_FLD))
+ {
+ GenTreeLclVarCommon* lclVarCommon = op1->AsLclVarCommon();
+ LclVarDsc* varDsc = &(compiler->lvaTable[lclVarCommon->gtLclNum]);
+ assert(varDsc->lvIsMultiRegRet);
+
+ // Mark var as contained if not enregistrable.
+ if (!varTypeIsEnregisterableStruct(op1))
+ {
+ MakeSrcContained(tree, op1);
+ }
+ }
+ else
+ {
+ noway_assert(op1->IsMultiRegCall());
+
+ ReturnTypeDesc* retTypeDesc = op1->AsCall()->GetReturnTypeDesc();
+ info->srcCount = retTypeDesc->GetReturnRegCount();
+ useCandidates = retTypeDesc->GetABIReturnRegs();
+ }
+ }
+ else
+ {
+ // Non-struct type return - determine useCandidates
+ switch (tree->TypeGet())
+ {
+ case TYP_VOID:
+ useCandidates = RBM_NONE;
+ break;
+ case TYP_FLOAT:
+ useCandidates = RBM_FLOATRET;
+ break;
+ case TYP_DOUBLE:
+ useCandidates = RBM_DOUBLERET;
+ break;
+ case TYP_LONG:
+ useCandidates = RBM_LNGRET;
+ break;
+ default:
+ useCandidates = RBM_INTRET;
+ break;
+ }
+ }
+
+ if (useCandidates != RBM_NONE)
+ {
+ tree->gtOp.gtOp1->gtLsraInfo.setSrcCandidates(l, useCandidates);
+ }
+}
+
+//------------------------------------------------------------------------
+// TreeNodeInfoInitCall: Set the NodeInfo for a call.
+//
+// Arguments:
+// call - The call node of interest
+//
+// Return Value:
+// None.
+//
+void Lowering::TreeNodeInfoInitCall(GenTreeCall* call)
+{
+ TreeNodeInfo* info = &(call->gtLsraInfo);
+ LinearScan* l = m_lsra;
+ Compiler* compiler = comp;
+ bool hasMultiRegRetVal = false;
+ ReturnTypeDesc* retTypeDesc = nullptr;
+
+ info->srcCount = 0;
+ if (call->TypeGet() != TYP_VOID)
+ {
+ hasMultiRegRetVal = call->HasMultiRegRetVal();
+ if (hasMultiRegRetVal)
+ {
+ // dst count = number of registers in which the value is returned by call
+ retTypeDesc = call->GetReturnTypeDesc();
+ info->dstCount = retTypeDesc->GetReturnRegCount();
+ }
+ else
+ {
+ info->dstCount = 1;
+ }
+ }
+ else
+ {
+ info->dstCount = 0;
+ }
+
+ GenTree* ctrlExpr = call->gtControlExpr;
+ if (call->gtCallType == CT_INDIRECT)
+ {
+ // either gtControlExpr != null or gtCallAddr != null.
+ // Both cannot be non-null at the same time.
+ assert(ctrlExpr == nullptr);
+ assert(call->gtCallAddr != nullptr);
+ ctrlExpr = call->gtCallAddr;
+ }
+
+ // set reg requirements on call target represented as control sequence.
+ if (ctrlExpr != nullptr)
+ {
+ // we should never see a gtControlExpr whose type is void.
+ assert(ctrlExpr->TypeGet() != TYP_VOID);
+
+ info->srcCount++;
+
+ // In case of fast tail implemented as jmp, make sure that gtControlExpr is
+ // computed into a register.
+ if (call->IsFastTailCall())
+ {
+ // Fast tail call - make sure that call target is always computed in IP0
+ // so that epilog sequence can generate "br xip0" to achieve fast tail call.
+ ctrlExpr->gtLsraInfo.setSrcCandidates(l, genRegMask(REG_IP0));
+ }
+ }
+
+ RegisterType registerType = call->TypeGet();
+
+ // Set destination candidates for return value of the call.
+ if (hasMultiRegRetVal)
+ {
+ assert(retTypeDesc != nullptr);
+ info->setDstCandidates(l, retTypeDesc->GetABIReturnRegs());
+ }
+ else if (varTypeIsFloating(registerType))
+ {
+ info->setDstCandidates(l, RBM_FLOATRET);
+ }
+ else if (registerType == TYP_LONG)
+ {
+ info->setDstCandidates(l, RBM_LNGRET);
+ }
+ else
+ {
+ info->setDstCandidates(l, RBM_INTRET);
+ }
+
+ // If there is an explicit this pointer, we don't want that node to produce anything
+ // as it is redundant
+ if (call->gtCallObjp != nullptr)
+ {
+ GenTreePtr thisPtrNode = call->gtCallObjp;
+
+ if (thisPtrNode->gtOper == GT_PUTARG_REG)
+ {
+ l->clearOperandCounts(thisPtrNode);
+ l->clearDstCount(thisPtrNode->gtOp.gtOp1);
+ }
+ else
+ {
+ l->clearDstCount(thisPtrNode);
+ }
+ }
+
+ // First, count reg args
+ bool callHasFloatRegArgs = false;
+
+ for (GenTreePtr list = call->gtCallLateArgs; list; list = list->MoveNext())
+ {
+ assert(list->IsList());
+
+ GenTreePtr argNode = list->Current();
+
+ fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, argNode);
+ assert(curArgTabEntry);
+
+ if (curArgTabEntry->regNum == REG_STK)
+ {
+ // late arg that is not passed in a register
+ assert(argNode->gtOper == GT_PUTARG_STK);
+
+ TreeNodeInfoInitPutArgStk(argNode, curArgTabEntry);
+ continue;
+ }
+
+ var_types argType = argNode->TypeGet();
+ bool argIsFloat = varTypeIsFloating(argType);
+ callHasFloatRegArgs |= argIsFloat;
+
+ regNumber argReg = curArgTabEntry->regNum;
+ // We will setup argMask to the set of all registers that compose this argument
+ regMaskTP argMask = 0;
+
+ argNode = argNode->gtEffectiveVal();
+
+ // A GT_LIST has a TYP_VOID, but is used to represent a multireg struct
+ if (varTypeIsStruct(argNode) || (argNode->gtOper == GT_LIST))
+ {
+ GenTreePtr actualArgNode = argNode;
+ unsigned originalSize = 0;
+
+ if (argNode->gtOper == GT_LIST)
+ {
+ // There could be up to 2-4 PUTARG_REGs in the list (3 or 4 can only occur for HFAs)
+ GenTreeArgList* argListPtr = argNode->AsArgList();
+
+ // Initailize the first register and the first regmask in our list
+ regNumber targetReg = argReg;
+ regMaskTP targetMask = genRegMask(targetReg);
+ unsigned iterationNum = 0;
+ originalSize = 0;
+
+ for (; argListPtr; argListPtr = argListPtr->Rest())
+ {
+ GenTreePtr putArgRegNode = argListPtr->gtOp.gtOp1;
+ assert(putArgRegNode->gtOper == GT_PUTARG_REG);
+ GenTreePtr putArgChild = putArgRegNode->gtOp.gtOp1;
+
+ originalSize += REGSIZE_BYTES; // 8 bytes
+
+ // Record the register requirements for the GT_PUTARG_REG node
+ putArgRegNode->gtLsraInfo.setDstCandidates(l, targetMask);
+ putArgRegNode->gtLsraInfo.setSrcCandidates(l, targetMask);
+
+ // To avoid redundant moves, request that the argument child tree be
+ // computed in the register in which the argument is passed to the call.
+ putArgChild->gtLsraInfo.setSrcCandidates(l, targetMask);
+
+ // We consume one source for each item in this list
+ info->srcCount++;
+ iterationNum++;
+
+ // Update targetReg and targetMask for the next putarg_reg (if any)
+ targetReg = genRegArgNext(targetReg);
+ targetMask = genRegMask(targetReg);
+ }
+ }
+ else
+ {
+#ifdef DEBUG
+ compiler->gtDispTreeRange(BlockRange(), argNode);
+#endif
+ noway_assert(!"Unsupported TYP_STRUCT arg kind");
+ }
+
+ unsigned slots = ((unsigned)(roundUp(originalSize, REGSIZE_BYTES))) / REGSIZE_BYTES;
+ regNumber curReg = argReg;
+ regNumber lastReg = argIsFloat ? REG_ARG_FP_LAST : REG_ARG_LAST;
+ unsigned remainingSlots = slots;
+
+ while (remainingSlots > 0)
+ {
+ argMask |= genRegMask(curReg);
+ remainingSlots--;
+
+ if (curReg == lastReg)
+ break;
+
+ curReg = genRegArgNext(curReg);
+ }
+
+ // Struct typed arguments must be fully passed in registers (Reg/Stk split not allowed)
+ noway_assert(remainingSlots == 0);
+ argNode->gtLsraInfo.internalIntCount = 0;
+ }
+ else // A scalar argument (not a struct)
+ {
+ // We consume one source
+ info->srcCount++;
+
+ argMask |= genRegMask(argReg);
+ argNode->gtLsraInfo.setDstCandidates(l, argMask);
+ argNode->gtLsraInfo.setSrcCandidates(l, argMask);
+
+ if (argNode->gtOper == GT_PUTARG_REG)
+ {
+ GenTreePtr putArgChild = argNode->gtOp.gtOp1;
+
+ // To avoid redundant moves, request that the argument child tree be
+ // computed in the register in which the argument is passed to the call.
+ putArgChild->gtLsraInfo.setSrcCandidates(l, argMask);
+ }
+ }
+ }
+
+ // Now, count stack args
+ // Note that these need to be computed into a register, but then
+ // they're just stored to the stack - so the reg doesn't
+ // need to remain live until the call. In fact, it must not
+ // because the code generator doesn't actually consider it live,
+ // so it can't be spilled.
+
+ GenTreePtr args = call->gtCallArgs;
+ while (args)
+ {
+ GenTreePtr arg = args->gtOp.gtOp1;
+
+ // Skip arguments that have been moved to the Late Arg list
+ if (!(args->gtFlags & GTF_LATE_ARG))
+ {
+ if (arg->gtOper == GT_PUTARG_STK)
+ {
+ fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, arg);
+ assert(curArgTabEntry);
+
+ assert(curArgTabEntry->regNum == REG_STK);
+
+ TreeNodeInfoInitPutArgStk(arg, curArgTabEntry);
+ }
+ else
+ {
+ TreeNodeInfo* argInfo = &(arg->gtLsraInfo);
+ if (argInfo->dstCount != 0)
+ {
+ argInfo->isLocalDefUse = true;
+ }
+
+ argInfo->dstCount = 0;
+ }
+ }
+ args = args->gtOp.gtOp2;
+ }
+
+ // If it is a fast tail call, it is already preferenced to use IP0.
+ // Therefore, no need set src candidates on call tgt again.
+ if (call->IsVarargs() && callHasFloatRegArgs && !call->IsFastTailCall() && (ctrlExpr != nullptr))
+ {
+ // Don't assign the call target to any of the argument registers because
+ // we will use them to also pass floating point arguments as required
+ // by Arm64 ABI.
+ ctrlExpr->gtLsraInfo.setSrcCandidates(l, l->allRegs(TYP_INT) & ~(RBM_ARG_REGS));
+ }
+}
+
+//------------------------------------------------------------------------
+// TreeNodeInfoInitPutArgStk: Set the NodeInfo for a GT_PUTARG_STK node
+//
+// Arguments:
+// argNode - a GT_PUTARG_STK node
+//
+// Return Value:
+// None.
+//
+// Notes:
+// Set the child node(s) to be contained when we have a multireg arg
+//
+void Lowering::TreeNodeInfoInitPutArgStk(GenTree* argNode, fgArgTabEntryPtr info)
+{
+ assert(argNode->gtOper == GT_PUTARG_STK);
+
+ GenTreePtr putArgChild = argNode->gtOp.gtOp1;
+
+ // Initialize 'argNode' as not contained, as this is both the default case
+ // and how MakeSrcContained expects to find things setup.
+ //
+ argNode->gtLsraInfo.srcCount = 1;
+ argNode->gtLsraInfo.dstCount = 0;
+
+ // Do we have a TYP_STRUCT argument (or a GT_LIST), if so it must be a multireg pass-by-value struct
+ if ((putArgChild->TypeGet() == TYP_STRUCT) || (putArgChild->OperGet() == GT_LIST))
+ {
+ // We will use store instructions that each write a register sized value
+
+ if (putArgChild->OperGet() == GT_LIST)
+ {
+ // We consume all of the items in the GT_LIST
+ argNode->gtLsraInfo.srcCount = info->numSlots;
+ }
+ else
+ {
+ // We could use a ldp/stp sequence so we need two internal registers
+ argNode->gtLsraInfo.internalIntCount = 2;
+
+ if (putArgChild->OperGet() == GT_OBJ)
+ {
+ GenTreePtr objChild = putArgChild->gtOp.gtOp1;
+ if (objChild->OperGet() == GT_LCL_VAR_ADDR)
+ {
+ // We will generate all of the code for the GT_PUTARG_STK, the GT_OBJ and the GT_LCL_VAR_ADDR
+ // as one contained operation
+ //
+ MakeSrcContained(putArgChild, objChild);
+ }
+ }
+
+ // We will generate all of the code for the GT_PUTARG_STK and it's child node
+ // as one contained operation
+ //
+ MakeSrcContained(argNode, putArgChild);
+ }
+ }
+ else
+ {
+ // We must not have a multi-reg struct
+ assert(info->numSlots == 1);
+ }
+}
+
+//------------------------------------------------------------------------
+// TreeNodeInfoInitBlockStore: Set the NodeInfo for a block store.
+//
+// Arguments:
+// blkNode - The block store node of interest
+//
+// Return Value:
+// None.
+//
+// Notes:
+
+void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode)
+{
+ GenTree* dstAddr = blkNode->Addr();
+ unsigned size;
+ LinearScan* l = m_lsra;
+ Compiler* compiler = comp;
+
+ // Sources are dest address and initVal or source.
+ // We may require an additional source or temp register for the size.
+ blkNode->gtLsraInfo.srcCount = 2;
+ blkNode->gtLsraInfo.dstCount = 0;
+
+ if ((blkNode->OperGet() == GT_STORE_OBJ) && (blkNode->AsObj()->gtGcPtrCount == 0))
+ {
+ blkNode->SetOper(GT_STORE_BLK);
+ }
+
+ if (blkNode->OperIsInitBlkOp())
+ {
+ unsigned size = blkNode->gtBlkSize;
+ GenTreePtr initVal = blkNode->Data();
+
+#if 0
+ // TODO-ARM64-CQ: Currently we generate a helper call for every
+ // initblk we encounter. Later on we should implement loop unrolling
+ // code sequences to improve CQ.
+ // For reference see the code in LowerXArch.cpp.
+ if ((size != 0) && (size <= INITBLK_UNROLL_LIMIT) && initVal->IsCnsIntOrI())
+ {
+ // The fill value of an initblk is interpreted to hold a
+ // value of (unsigned int8) however a constant of any size
+ // may practically reside on the evaluation stack. So extract
+ // the lower byte out of the initVal constant and replicate
+ // it to a larger constant whose size is sufficient to support
+ // the largest width store of the desired inline expansion.
+
+ ssize_t fill = initVal->gtIntCon.gtIconVal & 0xFF;
+ if (size < REGSIZE_BYTES)
+ {
+ initVal->gtIntCon.gtIconVal = 0x01010101 * fill;
+ }
+ else
+ {
+ initVal->gtIntCon.gtIconVal = 0x0101010101010101LL * fill;
+ initVal->gtType = TYP_LONG;
+ }
+
+ MakeSrcContained(tree, blockSize);
+
+ // In case we have a buffer >= 16 bytes
+ // we can use SSE2 to do a 128-bit store in a single
+ // instruction.
+ if (size >= XMM_REGSIZE_BYTES)
+ {
+ // Reserve an XMM register to fill it with
+ // a pack of 16 init value constants.
+ blkNode->gtLsraInfo.internalFloatCount = 1;
+ blkNode->gtLsraInfo.setInternalCandidates(l, l->internalFloatRegCandidates());
+ }
+ initBlkNode->gtBlkOpKind = GenTreeBlkOp::BlkOpKindUnroll;
+ }
+ }
+ else
+#endif // 0
+ {
+ // The helper follows the regular AMD64 ABI.
+ dstAddr->gtLsraInfo.setSrcCandidates(l, RBM_ARG_0);
+ initVal->gtLsraInfo.setSrcCandidates(l, RBM_ARG_1);
+ blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindHelper;
+ if (size != 0)
+ {
+ // Reserve a temp register for the block size argument.
+ blkNode->gtLsraInfo.setInternalCandidates(l, RBM_ARG_2);
+ blkNode->gtLsraInfo.internalIntCount = 1;
+ }
+ else
+ {
+ // The block size argument is a third argument to GT_STORE_DYN_BLK
+ noway_assert(blkNode->gtOper == GT_STORE_DYN_BLK);
+ blkNode->gtLsraInfo.setSrcCount(3);
+ GenTree* sizeNode = blkNode->AsDynBlk()->gtDynamicSize;
+ sizeNode->gtLsraInfo.setSrcCandidates(l, RBM_ARG_2);
+ }
+ }
+ }
+ else
+ {
+ // CopyObj or CopyBlk
+ // Sources are src and dest and size if not constant.
+ unsigned size = blkNode->gtBlkSize;
+ GenTreePtr source = blkNode->Data();
+ GenTree* srcAddr = nullptr;
+
+ if (source->gtOper == GT_IND)
+ {
+ srcAddr = blkNode->Data()->gtGetOp1();
+ // We're effectively setting source as contained, but can't call MakeSrcContained, because the
+ // "inheritance" of the srcCount is to a child not a parent - it would "just work" but could be misleading.
+ // If srcAddr is already non-contained, we don't need to change it.
+ if (srcAddr->gtLsraInfo.getDstCount() == 0)
+ {
+ srcAddr->gtLsraInfo.setDstCount(1);
+ srcAddr->gtLsraInfo.setSrcCount(source->gtLsraInfo.srcCount);
+ }
+ m_lsra->clearOperandCounts(source);
+ }
+ else
+ {
+ assert(source->IsLocal());
+ MakeSrcContained(blkNode, source);
+ }
+ if (blkNode->OperGet() == GT_STORE_OBJ)
+ {
+ // CopyObj
+
+ GenTreeObj* objNode = blkNode->AsObj();
+ GenTreePtr source = objNode->Data();
+
+ unsigned slots = objNode->gtSlots;
+
+#ifdef DEBUG
+ // CpObj must always have at least one GC-Pointer as a member.
+ assert(objNode->gtGcPtrCount > 0);
+
+ assert(dstAddr->gtType == TYP_BYREF || dstAddr->gtType == TYP_I_IMPL);
+
+ CORINFO_CLASS_HANDLE clsHnd = objNode->gtClass;
+ size_t classSize = compiler->info.compCompHnd->getClassSize(clsHnd);
+ size_t blkSize = roundUp(classSize, TARGET_POINTER_SIZE);
+
+ // Currently, the EE always round up a class data structure so
+ // we are not handling the case where we have a non multiple of pointer sized
+ // struct. This behavior may change in the future so in order to keeps things correct
+ // let's assert it just to be safe. Going forward we should simply
+ // handle this case.
+ assert(classSize == blkSize);
+ assert((blkSize / TARGET_POINTER_SIZE) == slots);
+ assert(objNode->HasGCPtr());
+#endif
+
+ // We don't need to materialize the struct size but we still need
+ // a temporary register to perform the sequence of loads and stores.
+ blkNode->gtLsraInfo.internalIntCount = 1;
+
+ dstAddr->gtLsraInfo.setSrcCandidates(l, RBM_WRITE_BARRIER_DST_BYREF);
+ srcAddr->gtLsraInfo.setSrcCandidates(l, RBM_WRITE_BARRIER_SRC_BYREF);
+ }
+ else
+ {
+ // CopyBlk
+ unsigned size = blkNode->gtBlkSize;
+ GenTreePtr dstAddr = blkNode->Addr();
+ GenTreePtr srcAddr = blkNode->Data();
+ short internalIntCount = 0;
+ regMaskTP internalIntCandidates = RBM_NONE;
+
+#if 0
+ // In case of a CpBlk with a constant size and less than CPBLK_UNROLL_LIMIT size
+ // we should unroll the loop to improve CQ.
+
+ // TODO-ARM64-CQ: cpblk loop unrolling is currently not implemented.
+
+ if (blockSize->IsCnsIntOrI() && blockSize->gtIntCon.gtIconVal <= CPBLK_UNROLL_LIMIT)
+ {
+ assert(!blockSize->IsIconHandle());
+ ssize_t size = blockSize->gtIntCon.gtIconVal;
+
+ // If we have a buffer between XMM_REGSIZE_BYTES and CPBLK_UNROLL_LIMIT bytes, we'll use SSE2.
+ // Structs and buffer with sizes <= CPBLK_UNROLL_LIMIT bytes are occurring in more than 95% of
+ // our framework assemblies, so this is the main code generation scheme we'll use.
+ if ((size & (XMM_REGSIZE_BYTES - 1)) != 0)
+ {
+ info->internalIntCount++;
+ info->addInternalCandidates(l, l->allRegs(TYP_INT));
+ }
+
+ if (size >= XMM_REGSIZE_BYTES)
+ {
+ // If we have a buffer larger than XMM_REGSIZE_BYTES,
+ // reserve an XMM register to use it for a
+ // series of 16-byte loads and stores.
+ blkNode->gtLsraInfo.internalFloatCount = 1;
+ blkNode->gtLsraInfo.addInternalCandidates(l, l->internalFloatRegCandidates());
+ }
+
+ // If src or dst are on stack, we don't have to generate the address into a register
+ // because it's just some constant+SP
+ if (srcAddr->OperIsLocalAddr())
+ {
+ MakeSrcContained(blkNode, srcAddr);
+ }
+
+ if (dstAddr->OperIsLocalAddr())
+ {
+ MakeSrcContained(blkNode, dstAddr);
+ }
+
+ blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindUnroll;
+ }
+ else
+#endif // 0
+ {
+ // In case we have a constant integer this means we went beyond
+ // CPBLK_UNROLL_LIMIT bytes of size, still we should never have the case of
+ // any GC-Pointers in the src struct.
+
+ dstAddr->gtLsraInfo.setSrcCandidates(l, RBM_ARG_0);
+ // The srcAddr goes in arg1.
+ if (srcAddr != nullptr)
+ {
+ srcAddr->gtLsraInfo.setSrcCandidates(l, RBM_ARG_1);
+ }
+ else
+ {
+ // This is a local; we'll use a temp register for its address.
+ internalIntCandidates |= RBM_ARG_1;
+ internalIntCount++;
+ }
+ if (size != 0)
+ {
+ // Reserve a temp register for the block size argument.
+ internalIntCandidates |= RBM_ARG_2;
+ internalIntCount++;
+ }
+ else
+ {
+ // The block size argument is a third argument to GT_STORE_DYN_BLK
+ noway_assert(blkNode->gtOper == GT_STORE_DYN_BLK);
+ blkNode->gtLsraInfo.setSrcCount(3);
+ GenTree* blockSize = blkNode->AsDynBlk()->gtDynamicSize;
+ assert(!blockSize->IsIconHandle());
+ blockSize->gtLsraInfo.setSrcCandidates(l, RBM_ARG_2);
+ }
+ blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindHelper;
+ }
+ if (internalIntCount != 0)
+ {
+ blkNode->gtLsraInfo.internalIntCount = internalIntCount;
+ blkNode->gtLsraInfo.setInternalCandidates(l, internalIntCandidates);
+ }
+ }
+ }
+}
+
+#ifdef FEATURE_SIMD
+//------------------------------------------------------------------------
+// TreeNodeInfoInitSIMD: Set the NodeInfo for a GT_SIMD tree.
+//
+// Arguments:
+// tree - The GT_SIMD node of interest
+//
+// Return Value:
+// None.
+
+void Lowering::TreeNodeInfoInitSIMD(GenTree* tree)
+{
+ NYI("TreeNodeInfoInitSIMD");
+ GenTreeSIMD* simdTree = tree->AsSIMD();
+ TreeNodeInfo* info = &(tree->gtLsraInfo);
+ LinearScan* lsra = m_lsra;
+ info->dstCount = 1;
+ switch (simdTree->gtSIMDIntrinsicID)
+ {
+ case SIMDIntrinsicInit:
+ {
+ // This sets all fields of a SIMD struct to the given value.
+ // Mark op1 as contained if it is either zero or int constant of all 1's.
+ info->srcCount = 1;
+ GenTree* op1 = tree->gtOp.gtOp1;
+ if (op1->IsIntegralConst(0) || (simdTree->gtSIMDBaseType == TYP_INT && op1->IsCnsIntOrI() &&
+ op1->AsIntConCommon()->IconValue() == 0xffffffff) ||
+ (simdTree->gtSIMDBaseType == TYP_LONG && op1->IsCnsIntOrI() &&
+ op1->AsIntConCommon()->IconValue() == 0xffffffffffffffffLL))
+ {
+ MakeSrcContained(tree, tree->gtOp.gtOp1);
+ info->srcCount = 0;
+ }
+ }
+ break;
+
+ case SIMDIntrinsicInitN:
+ info->srcCount = (int)(simdTree->gtSIMDSize / genTypeSize(simdTree->gtSIMDBaseType));
+ // Need an internal register to stitch together all the values into a single vector in an XMM reg.
+ info->internalFloatCount = 1;
+ info->setInternalCandidates(lsra, lsra->allSIMDRegs());
+ break;
+
+ case SIMDIntrinsicInitArray:
+ // We have an array and an index, which may be contained.
+ info->srcCount = 2;
+ CheckImmedAndMakeContained(tree, tree->gtGetOp2());
+ break;
+
+ case SIMDIntrinsicDiv:
+ // SSE2 has no instruction support for division on integer vectors
+ noway_assert(varTypeIsFloating(simdTree->gtSIMDBaseType));
+ info->srcCount = 2;
+ break;
+
+ case SIMDIntrinsicAbs:
+ // This gets implemented as bitwise-And operation with a mask
+ // and hence should never see it here.
+ unreached();
+ break;
+
+ case SIMDIntrinsicSqrt:
+ // SSE2 has no instruction support for sqrt on integer vectors.
+ noway_assert(varTypeIsFloating(simdTree->gtSIMDBaseType));
+ info->srcCount = 1;
+ break;
+
+ case SIMDIntrinsicAdd:
+ case SIMDIntrinsicSub:
+ case SIMDIntrinsicMul:
+ case SIMDIntrinsicBitwiseAnd:
+ case SIMDIntrinsicBitwiseAndNot:
+ case SIMDIntrinsicBitwiseOr:
+ case SIMDIntrinsicBitwiseXor:
+ case SIMDIntrinsicMin:
+ case SIMDIntrinsicMax:
+ info->srcCount = 2;
+
+ // SSE2 32-bit integer multiplication requires two temp regs
+ if (simdTree->gtSIMDIntrinsicID == SIMDIntrinsicMul && simdTree->gtSIMDBaseType == TYP_INT)
+ {
+ info->internalFloatCount = 2;
+ info->setInternalCandidates(lsra, lsra->allSIMDRegs());
+ }
+ break;
+
+ case SIMDIntrinsicEqual:
+ info->srcCount = 2;
+ break;
+
+ // SSE2 doesn't support < and <= directly on int vectors.
+ // Instead we need to use > and >= with swapped operands.
+ case SIMDIntrinsicLessThan:
+ case SIMDIntrinsicLessThanOrEqual:
+ info->srcCount = 2;
+ noway_assert(!varTypeIsIntegral(simdTree->gtSIMDBaseType));
+ break;
+
+ // SIMDIntrinsicEqual is supported only on non-floating point base type vectors.
+ // SSE2 cmpps/pd doesn't support > and >= directly on float/double vectors.
+ // Instead we need to use < and <= with swapped operands.
+ case SIMDIntrinsicGreaterThan:
+ noway_assert(!varTypeIsFloating(simdTree->gtSIMDBaseType));
+ info->srcCount = 2;
+ break;
+
+ case SIMDIntrinsicGreaterThanOrEqual:
+ noway_assert(!varTypeIsFloating(simdTree->gtSIMDBaseType));
+ info->srcCount = 2;
+
+ // a >= b = (a==b) | (a>b)
+ // To hold intermediate result of a==b and a>b we need two distinct
+ // registers. We can use targetReg and one internal reg provided
+ // they are distinct which is not guaranteed. Therefore, we request
+ // two internal registers so that one of the internal registers has
+ // to be different from targetReg.
+ info->internalFloatCount = 2;
+ info->setInternalCandidates(lsra, lsra->allSIMDRegs());
+ break;
+
+ case SIMDIntrinsicOpEquality:
+ case SIMDIntrinsicOpInEquality:
+ // Need two SIMD registers as scratch.
+ // See genSIMDIntrinsicRelOp() for details on code sequence generate and
+ // the need for two scratch registers.
+ info->srcCount = 2;
+ info->internalFloatCount = 2;
+ info->setInternalCandidates(lsra, lsra->allSIMDRegs());
+ break;
+
+ case SIMDIntrinsicDotProduct:
+ // Also need an internal register as scratch. Further we need that targetReg and internal reg
+ // are two distinct regs. It is achieved by requesting two internal registers and one of them
+ // has to be different from targetReg.
+ //
+ // See genSIMDIntrinsicDotProduct() for details on code sequence generated and
+ // the need for scratch registers.
+ info->srcCount = 2;
+ info->internalFloatCount = 2;
+ info->setInternalCandidates(lsra, lsra->allSIMDRegs());
+ break;
+
+ case SIMDIntrinsicGetItem:
+ // This implements get_Item method. The sources are:
+ // - the source SIMD struct
+ // - index (which element to get)
+ // The result is baseType of SIMD struct.
+ info->srcCount = 2;
+
+ op2 = tree->gtGetOp2()
+ // If the index is a constant, mark it as contained.
+ if (CheckImmedAndMakeContained(tree, op2))
+ {
+ info->srcCount = 1;
+ }
+
+ // If the index is not a constant, we will use the SIMD temp location to store the vector.
+ // Otherwise, if the baseType is floating point, the targetReg will be a xmm reg and we
+ // can use that in the process of extracting the element.
+ // In all other cases with constant index, we need a temp xmm register to extract the
+ // element if index is other than zero.
+ if (!op2->IsCnsIntOrI())
+ {
+ (void)comp->getSIMDInitTempVarNum();
+ }
+ else if (!varTypeIsFloating(simdTree->gtSIMDBaseType) && !op2->IsIntegralConst(0))
+ {
+ info->internalFloatCount = 1;
+ info->setInternalCandidates(lsra, lsra->allSIMDRegs());
+ }
+ break;
+
+ case SIMDIntrinsicCast:
+ info->srcCount = 1;
+ break;
+
+ // These should have been transformed in terms of other intrinsics
+ case SIMDIntrinsicOpEquality:
+ case SIMDIntrinsicOpInEquality:
+ assert("OpEquality/OpInEquality intrinsics should not be seen during Lowering.");
+ unreached();
+
+ case SIMDIntrinsicGetX:
+ case SIMDIntrinsicGetY:
+ case SIMDIntrinsicGetZ:
+ case SIMDIntrinsicGetW:
+ case SIMDIntrinsicGetOne:
+ case SIMDIntrinsicGetZero:
+ case SIMDIntrinsicGetLength:
+ case SIMDIntrinsicGetAllOnes:
+ assert(!"Get intrinsics should not be seen during Lowering.");
+ unreached();
+
+ default:
+ noway_assert(!"Unimplemented SIMD node type.");
+ unreached();
+ }
+}
+#endif // FEATURE_SIMD
+
+void Lowering::LowerGCWriteBarrier(GenTree* tree)
+{
+ GenTreePtr dst = tree;
+ GenTreePtr addr = tree->gtOp.gtOp1;
+ GenTreePtr src = tree->gtOp.gtOp2;
+
+ if (addr->OperGet() == GT_LEA)
+ {
+ // In the case where we are doing a helper assignment, if the dst
+ // is an indir through an lea, we need to actually instantiate the
+ // lea in a register
+ GenTreeAddrMode* lea = addr->AsAddrMode();
+
+ short leaSrcCount = 0;
+ if (lea->Base() != nullptr)
+ {
+ leaSrcCount++;
+ }
+ if (lea->Index() != nullptr)
+ {
+ leaSrcCount++;
+ }
+ lea->gtLsraInfo.srcCount = leaSrcCount;
+ lea->gtLsraInfo.dstCount = 1;
+ }
+
+#if NOGC_WRITE_BARRIERS
+ // For the NOGC JIT Helper calls
+ //
+ // the 'addr' goes into x14 (REG_WRITE_BARRIER_DST_BYREF)
+ // the 'src' goes into x15 (REG_WRITE_BARRIER)
+ //
+ addr->gtLsraInfo.setSrcCandidates(m_lsra, RBM_WRITE_BARRIER_DST_BYREF);
+ src->gtLsraInfo.setSrcCandidates(m_lsra, RBM_WRITE_BARRIER);
+#else
+ // For the standard JIT Helper calls
+ // op1 goes into REG_ARG_0 and
+ // op2 goes into REG_ARG_1
+ //
+ addr->gtLsraInfo.setSrcCandidates(m_lsra, RBM_ARG_0);
+ src->gtLsraInfo.setSrcCandidates(m_lsra, RBM_ARG_1);
+#endif // NOGC_WRITE_BARRIERS
+
+ // Both src and dst must reside in a register, which they should since we haven't set
+ // either of them as contained.
+ assert(addr->gtLsraInfo.dstCount == 1);
+ assert(src->gtLsraInfo.dstCount == 1);
+}
+
+//-----------------------------------------------------------------------------------------
+// Specify register requirements for address expression of an indirection operation.
+//
+// Arguments:
+// indirTree - GT_IND, GT_STOREIND, block node or GT_NULLCHECK gentree node
+//
+void Lowering::SetIndirAddrOpCounts(GenTreePtr indirTree)
+{
+ assert(indirTree->OperIsIndir());
+ // If this is the rhs of a block copy (i.e. non-enregisterable struct),
+ // it has no register requirements.
+ if (indirTree->TypeGet() == TYP_STRUCT)
+ {
+ return;
+ }
+
+ GenTreePtr addr = indirTree->gtGetOp1();
+ TreeNodeInfo* info = &(indirTree->gtLsraInfo);
+
+ GenTreePtr base = nullptr;
+ GenTreePtr index = nullptr;
+ unsigned cns = 0;
+ unsigned mul;
+ bool rev;
+ bool modifiedSources = false;
+
+ if ((addr->OperGet() == GT_LEA) && IsSafeToContainMem(indirTree, addr))
+ {
+ GenTreeAddrMode* lea = addr->AsAddrMode();
+ base = lea->Base();
+ index = lea->Index();
+ cns = lea->gtOffset;
+
+ m_lsra->clearOperandCounts(addr);
+ // The srcCount is decremented because addr is now "contained",
+ // then we account for the base and index below, if they are non-null.
+ info->srcCount--;
+ }
+ else if (comp->codeGen->genCreateAddrMode(addr, -1, true, 0, &rev, &base, &index, &mul, &cns, true /*nogen*/) &&
+ !(modifiedSources = AreSourcesPossiblyModifiedLocals(indirTree, base, index)))
+ {
+ // An addressing mode will be constructed that may cause some
+ // nodes to not need a register, and cause others' lifetimes to be extended
+ // to the GT_IND or even its parent if it's an assignment
+
+ assert(base != addr);
+ m_lsra->clearOperandCounts(addr);
+
+ GenTreePtr arrLength = nullptr;
+
+ // Traverse the computation below GT_IND to find the operands
+ // for the addressing mode, marking the various constants and
+ // intermediate results as not consuming/producing.
+ // If the traversal were more complex, we might consider using
+ // a traversal function, but the addressing mode is only made
+ // up of simple arithmetic operators, and the code generator
+ // only traverses one leg of each node.
+
+ bool foundBase = (base == nullptr);
+ bool foundIndex = (index == nullptr);
+ GenTreePtr nextChild = nullptr;
+ for (GenTreePtr child = addr; child != nullptr && !child->OperIsLeaf(); child = nextChild)
+ {
+ nextChild = nullptr;
+ GenTreePtr op1 = child->gtOp.gtOp1;
+ GenTreePtr op2 = (child->OperIsBinary()) ? child->gtOp.gtOp2 : nullptr;
+
+ if (op1 == base)
+ {
+ foundBase = true;
+ }
+ else if (op1 == index)
+ {
+ foundIndex = true;
+ }
+ else
+ {
+ m_lsra->clearOperandCounts(op1);
+ if (!op1->OperIsLeaf())
+ {
+ nextChild = op1;
+ }
+ }
+
+ if (op2 != nullptr)
+ {
+ if (op2 == base)
+ {
+ foundBase = true;
+ }
+ else if (op2 == index)
+ {
+ foundIndex = true;
+ }
+ else
+ {
+ m_lsra->clearOperandCounts(op2);
+ if (!op2->OperIsLeaf())
+ {
+ assert(nextChild == nullptr);
+ nextChild = op2;
+ }
+ }
+ }
+ }
+ assert(foundBase && foundIndex);
+ info->srcCount--; // it gets incremented below.
+ }
+ else if (addr->gtOper == GT_ARR_ELEM)
+ {
+ // The GT_ARR_ELEM consumes all the indices and produces the offset.
+ // The array object lives until the mem access.
+ // We also consume the target register to which the address is
+ // computed
+
+ info->srcCount++;
+ assert(addr->gtLsraInfo.srcCount >= 2);
+ addr->gtLsraInfo.srcCount -= 1;
+ }
+ else
+ {
+ // it is nothing but a plain indir
+ info->srcCount--; // base gets added in below
+ base = addr;
+ }
+
+ if (base != nullptr)
+ {
+ info->srcCount++;
+ }
+
+ if (index != nullptr && !modifiedSources)
+ {
+ info->srcCount++;
+ }
+
+ // On ARM64 we may need a single internal register
+ // (when both conditions are true then we still only need a single internal register)
+ if ((index != nullptr) && (cns != 0))
+ {
+ // ARM64 does not support both Index and offset so we need an internal register
+ info->internalIntCount = 1;
+ }
+ else if (!emitter::emitIns_valid_imm_for_ldst_offset(cns, emitTypeSize(indirTree)))
+ {
+ // This offset can't be contained in the ldr/str instruction, so we need an internal register
+ info->internalIntCount = 1;
+ }
+}
+
+void Lowering::LowerCmp(GenTreePtr tree)
+{
+ TreeNodeInfo* info = &(tree->gtLsraInfo);
+
+ info->srcCount = 2;
+ info->dstCount = 1;
+ CheckImmedAndMakeContained(tree, tree->gtOp.gtOp2);
+}
+
+/* Lower GT_CAST(srcType, DstType) nodes.
+ *
+ * Casts from small int type to float/double are transformed as follows:
+ * GT_CAST(byte, float/double) = GT_CAST(GT_CAST(byte, int32), float/double)
+ * GT_CAST(sbyte, float/double) = GT_CAST(GT_CAST(sbyte, int32), float/double)
+ * GT_CAST(int16, float/double) = GT_CAST(GT_CAST(int16, int32), float/double)
+ * GT_CAST(uint16, float/double) = GT_CAST(GT_CAST(uint16, int32), float/double)
+ *
+ * SSE2 conversion instructions operate on signed integers. casts from Uint32/Uint64
+ * are morphed as follows by front-end and hence should not be seen here.
+ * GT_CAST(uint32, float/double) = GT_CAST(GT_CAST(uint32, long), float/double)
+ * GT_CAST(uint64, float) = GT_CAST(GT_CAST(uint64, double), float)
+ *
+ *
+ * Similarly casts from float/double to a smaller int type are transformed as follows:
+ * GT_CAST(float/double, byte) = GT_CAST(GT_CAST(float/double, int32), byte)
+ * GT_CAST(float/double, sbyte) = GT_CAST(GT_CAST(float/double, int32), sbyte)
+ * GT_CAST(float/double, int16) = GT_CAST(GT_CAST(double/double, int32), int16)
+ * GT_CAST(float/double, uint16) = GT_CAST(GT_CAST(double/double, int32), uint16)
+ *
+ * SSE2 has instructions to convert a float/double vlaue into a signed 32/64-bit
+ * integer. The above transformations help us to leverage those instructions.
+ *
+ * Note that for the overflow conversions we still depend on helper calls and
+ * don't expect to see them here.
+ * i) GT_CAST(float/double, int type with overflow detection)
+ *
+ */
+void Lowering::LowerCast(GenTree* tree)
+{
+ assert(tree->OperGet() == GT_CAST);
+
+ GenTreePtr op1 = tree->gtOp.gtOp1;
+ var_types dstType = tree->CastToType();
+ var_types srcType = op1->TypeGet();
+ var_types tmpType = TYP_UNDEF;
+
+ // We should never see the following casts as they are expected to be lowered
+ // apropriately or converted into helper calls by front-end.
+ // srcType = float/double dstType = * and overflow detecting cast
+ // Reason: must be converted to a helper call
+ //
+ if (varTypeIsFloating(srcType))
+ {
+ noway_assert(!tree->gtOverflow());
+ }
+
+ // Case of src is a small type and dst is a floating point type.
+ if (varTypeIsSmall(srcType) && varTypeIsFloating(dstType))
+ {
+ // These conversions can never be overflow detecting ones.
+ noway_assert(!tree->gtOverflow());
+ tmpType = TYP_INT;
+ }
+ // case of src is a floating point type and dst is a small type.
+ else if (varTypeIsFloating(srcType) && varTypeIsSmall(dstType))
+ {
+ tmpType = TYP_INT;
+ }
+
+ if (tmpType != TYP_UNDEF)
+ {
+ GenTreePtr tmp = comp->gtNewCastNode(tmpType, op1, tmpType);
+ tmp->gtFlags |= (tree->gtFlags & (GTF_UNSIGNED | GTF_OVERFLOW | GTF_EXCEPT));
+
+ tree->gtFlags &= ~GTF_UNSIGNED;
+ tree->gtOp.gtOp1 = tmp;
+ BlockRange().InsertAfter(op1, tmp);
+ }
+}
+
+void Lowering::LowerRotate(GenTreePtr tree)
+{
+ if (tree->OperGet() == GT_ROL)
+ {
+ // There is no ROL instruction on ARM. Convert ROL into ROR.
+ GenTreePtr rotatedValue = tree->gtOp.gtOp1;
+ unsigned rotatedValueBitSize = genTypeSize(rotatedValue->gtType) * 8;
+ GenTreePtr rotateLeftIndexNode = tree->gtOp.gtOp2;
+
+ if (rotateLeftIndexNode->IsCnsIntOrI())
+ {
+ ssize_t rotateLeftIndex = rotateLeftIndexNode->gtIntCon.gtIconVal;
+ ssize_t rotateRightIndex = rotatedValueBitSize - rotateLeftIndex;
+ rotateLeftIndexNode->gtIntCon.gtIconVal = rotateRightIndex;
+ }
+ else
+ {
+ GenTreePtr tmp =
+ comp->gtNewOperNode(GT_NEG, genActualType(rotateLeftIndexNode->gtType), rotateLeftIndexNode);
+ BlockRange().InsertAfter(rotateLeftIndexNode, tmp);
+ tree->gtOp.gtOp2 = tmp;
+ }
+ tree->ChangeOper(GT_ROR);
+ }
+}
+
+// returns true if the tree can use the read-modify-write memory instruction form
+bool Lowering::isRMWRegOper(GenTreePtr tree)
+{
+ return false;
+}
+
+bool Lowering::IsCallTargetInRange(void* addr)
+{
+ // TODO-ARM64-CQ: This is a workaround to unblock the JIT from getting calls working.
+ // Currently, we'll be generating calls using blr and manually loading an absolute
+ // call target in a register using a sequence of load immediate instructions.
+ //
+ // As you can expect, this is inefficient and it's not the recommended way as per the
+ // ARM64 ABI Manual but will get us getting things done for now.
+ // The work to get this right would be to implement PC-relative calls, the bl instruction
+ // can only address things -128 + 128MB away, so this will require getting some additional
+ // code to get jump thunks working.
+ return true;
+}
+
+// return true if the immediate can be folded into an instruction, for example small enough and non-relocatable
+bool Lowering::IsContainableImmed(GenTree* parentNode, GenTree* childNode)
+{
+ if (varTypeIsFloating(parentNode->TypeGet()))
+ {
+ // We can contain a floating point 0.0 constant in a compare instruction
+ switch (parentNode->OperGet())
+ {
+ default:
+ return false;
+
+ case GT_EQ:
+ case GT_NE:
+ case GT_LT:
+ case GT_LE:
+ case GT_GE:
+ case GT_GT:
+ if (childNode->IsIntegralConst(0))
+ return true;
+ break;
+ }
+ }
+ else
+ {
+ // Make sure we have an actual immediate
+ if (!childNode->IsCnsIntOrI())
+ return false;
+ if (childNode->IsIconHandle() && comp->opts.compReloc)
+ return false;
+
+ ssize_t immVal = childNode->gtIntCon.gtIconVal;
+ emitAttr attr = emitActualTypeSize(childNode->TypeGet());
+ emitAttr size = EA_SIZE(attr);
+
+ switch (parentNode->OperGet())
+ {
+ default:
+ return false;
+
+ case GT_ADD:
+ case GT_SUB:
+ if (emitter::emitIns_valid_imm_for_add(immVal, size))
+ return true;
+ break;
+
+ case GT_EQ:
+ case GT_NE:
+ case GT_LT:
+ case GT_LE:
+ case GT_GE:
+ case GT_GT:
+ if (emitter::emitIns_valid_imm_for_cmp(immVal, size))
+ return true;
+ break;
+
+ case GT_AND:
+ case GT_OR:
+ case GT_XOR:
+ if (emitter::emitIns_valid_imm_for_alu(immVal, size))
+ return true;
+ break;
+
+ case GT_STORE_LCL_VAR:
+ if (immVal == 0)
+ return true;
+ break;
+ }
+ }
+
+ return false;
+}
+
+#endif // _TARGET_ARM64_
+
+#endif // !LEGACY_BACKEND
diff --git a/src/jit/lowerxarch.cpp b/src/jit/lowerxarch.cpp
new file mode 100644
index 0000000000..6f98eb6661
--- /dev/null
+++ b/src/jit/lowerxarch.cpp
@@ -0,0 +1,4192 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX Lowering for AMD64 XX
+XX XX
+XX This encapsulates all the logic for lowering trees for the AMD64 XX
+XX architecture. For a more detailed view of what is lowering, please XX
+XX take a look at Lower.cpp XX
+XX XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+#ifndef LEGACY_BACKEND // This file is ONLY used for the RyuJIT backend that uses the linear scan register allocator
+
+#ifdef _TARGET_XARCH_
+
+#include "jit.h"
+#include "sideeffects.h"
+#include "lower.h"
+
+// xarch supports both ROL and ROR instructions so no lowering is required.
+void Lowering::LowerRotate(GenTreePtr tree)
+{
+}
+
+//------------------------------------------------------------------------
+// LowerStoreLoc: Lower a store of a lclVar
+//
+// Arguments:
+// storeLoc - the local store (GT_STORE_LCL_FLD or GT_STORE_LCL_VAR)
+//
+// Notes:
+// This involves:
+// - Setting the appropriate candidates for a store of a multi-reg call return value.
+// - Requesting an internal register for SIMD12 stores.
+// - Handling of contained immediates and widening operations of unsigneds.
+
+void Lowering::LowerStoreLoc(GenTreeLclVarCommon* storeLoc)
+{
+ TreeNodeInfo* info = &(storeLoc->gtLsraInfo);
+
+ // Is this the case of var = call where call is returning
+ // a value in multiple return registers?
+ GenTree* op1 = storeLoc->gtGetOp1();
+ if (op1->IsMultiRegCall())
+ {
+ // backend expects to see this case only for store lclvar.
+ assert(storeLoc->OperGet() == GT_STORE_LCL_VAR);
+
+ // srcCount = number of registers in which the value is returned by call
+ GenTreeCall* call = op1->AsCall();
+ ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc();
+ info->srcCount = retTypeDesc->GetReturnRegCount();
+
+ // Call node srcCandidates = Bitwise-OR(allregs(GetReturnRegType(i))) for all i=0..RetRegCount-1
+ regMaskTP srcCandidates = m_lsra->allMultiRegCallNodeRegs(call);
+ op1->gtLsraInfo.setSrcCandidates(m_lsra, srcCandidates);
+ return;
+ }
+
+#ifdef FEATURE_SIMD
+ if (varTypeIsSIMD(storeLoc))
+ {
+ if (op1->IsCnsIntOrI())
+ {
+ // InitBlk
+ MakeSrcContained(storeLoc, op1);
+ }
+ else if (storeLoc->TypeGet() == TYP_SIMD12)
+ {
+ // Need an additional register to extract upper 4 bytes of Vector3.
+ info->internalFloatCount = 1;
+ info->setInternalCandidates(m_lsra, m_lsra->allSIMDRegs());
+
+ // In this case don't mark the operand as contained as we want it to
+ // be evaluated into an xmm register
+ }
+ return;
+ }
+#endif // FEATURE_SIMD
+
+ // If the source is a containable immediate, make it contained, unless it is
+ // an int-size or larger store of zero to memory, because we can generate smaller code
+ // by zeroing a register and then storing it.
+ if (IsContainableImmed(storeLoc, op1) && (!op1->IsIntegralConst(0) || varTypeIsSmall(storeLoc)))
+ {
+ MakeSrcContained(storeLoc, op1);
+ }
+
+ // Try to widen the ops if they are going into a local var.
+ if ((storeLoc->gtOper == GT_STORE_LCL_VAR) && (storeLoc->gtOp1->gtOper == GT_CNS_INT))
+ {
+ GenTreeIntCon* con = storeLoc->gtOp1->AsIntCon();
+ ssize_t ival = con->gtIconVal;
+
+ unsigned varNum = storeLoc->gtLclNum;
+ LclVarDsc* varDsc = comp->lvaTable + varNum;
+
+ if (varDsc->lvIsSIMDType())
+ {
+ noway_assert(storeLoc->gtType != TYP_STRUCT);
+ }
+ unsigned size = genTypeSize(storeLoc);
+ // If we are storing a constant into a local variable
+ // we extend the size of the store here
+ if ((size < 4) && !varTypeIsStruct(varDsc))
+ {
+ if (!varTypeIsUnsigned(varDsc))
+ {
+ if (genTypeSize(storeLoc) == 1)
+ {
+ if ((ival & 0x7f) != ival)
+ {
+ ival = ival | 0xffffff00;
+ }
+ }
+ else
+ {
+ assert(genTypeSize(storeLoc) == 2);
+ if ((ival & 0x7fff) != ival)
+ {
+ ival = ival | 0xffff0000;
+ }
+ }
+ }
+
+ // A local stack slot is at least 4 bytes in size, regardless of
+ // what the local var is typed as, so auto-promote it here
+ // unless it is a field of a promoted struct
+ // TODO-XArch-CQ: if the field is promoted shouldn't we also be able to do this?
+ if (!varDsc->lvIsStructField)
+ {
+ storeLoc->gtType = TYP_INT;
+ con->SetIconValue(ival);
+ }
+ }
+ }
+}
+
+/**
+ * Takes care of annotating the register requirements
+ * for every TreeNodeInfo struct that maps to each tree node.
+ * Preconditions:
+ * LSRA Has been initialized and there is a TreeNodeInfo node
+ * already allocated and initialized for every tree in the IR.
+ * Postconditions:
+ * Every TreeNodeInfo instance has the right annotations on register
+ * requirements needed by LSRA to build the Interval Table (source,
+ * destination and internal [temp] register counts).
+ * This code is refactored originally from LSRA.
+ */
+void Lowering::TreeNodeInfoInit(GenTree* tree)
+{
+ LinearScan* l = m_lsra;
+ Compiler* compiler = comp;
+
+ TreeNodeInfo* info = &(tree->gtLsraInfo);
+
+ switch (tree->OperGet())
+ {
+ GenTree* op1;
+ GenTree* op2;
+
+ default:
+ TreeNodeInfoInitSimple(tree);
+ break;
+
+ case GT_LCL_FLD:
+ info->srcCount = 0;
+ info->dstCount = 1;
+
+#ifdef FEATURE_SIMD
+ // Need an additional register to read upper 4 bytes of Vector3.
+ if (tree->TypeGet() == TYP_SIMD12)
+ {
+ // We need an internal register different from targetReg in which 'tree' produces its result
+ // because both targetReg and internal reg will be in use at the same time. This is achieved
+ // by asking for two internal registers.
+ info->internalFloatCount = 2;
+ info->setInternalCandidates(m_lsra, m_lsra->allSIMDRegs());
+ }
+#endif
+ break;
+
+ case GT_STORE_LCL_FLD:
+ case GT_STORE_LCL_VAR:
+ info->srcCount = 1;
+ info->dstCount = 0;
+ LowerStoreLoc(tree->AsLclVarCommon());
+ break;
+
+ case GT_BOX:
+ noway_assert(!"box should not exist here");
+ // The result of 'op1' is also the final result
+ info->srcCount = 0;
+ info->dstCount = 0;
+ break;
+
+ case GT_PHYSREGDST:
+ info->srcCount = 1;
+ info->dstCount = 0;
+ break;
+
+ case GT_COMMA:
+ {
+ GenTreePtr firstOperand;
+ GenTreePtr secondOperand;
+ if (tree->gtFlags & GTF_REVERSE_OPS)
+ {
+ firstOperand = tree->gtOp.gtOp2;
+ secondOperand = tree->gtOp.gtOp1;
+ }
+ else
+ {
+ firstOperand = tree->gtOp.gtOp1;
+ secondOperand = tree->gtOp.gtOp2;
+ }
+ if (firstOperand->TypeGet() != TYP_VOID)
+ {
+ firstOperand->gtLsraInfo.isLocalDefUse = true;
+ firstOperand->gtLsraInfo.dstCount = 0;
+ }
+ if (tree->TypeGet() == TYP_VOID && secondOperand->TypeGet() != TYP_VOID)
+ {
+ secondOperand->gtLsraInfo.isLocalDefUse = true;
+ secondOperand->gtLsraInfo.dstCount = 0;
+ }
+ }
+ info->srcCount = 0;
+ info->dstCount = 0;
+ break;
+
+ case GT_LIST:
+ case GT_ARGPLACE:
+ case GT_NO_OP:
+ case GT_START_NONGC:
+ case GT_PROF_HOOK:
+ info->srcCount = 0;
+ info->dstCount = 0;
+ break;
+
+ case GT_CNS_DBL:
+ info->srcCount = 0;
+ info->dstCount = 1;
+ break;
+
+#if !defined(_TARGET_64BIT_)
+
+ case GT_LONG:
+ if ((tree->gtLIRFlags & LIR::Flags::IsUnusedValue) != 0)
+ {
+ // An unused GT_LONG node needs to consume its sources.
+ info->srcCount = 2;
+ }
+ else
+ {
+ // Passthrough
+ info->srcCount = 0;
+ }
+
+ info->dstCount = 0;
+ break;
+
+#endif // !defined(_TARGET_64BIT_)
+
+ case GT_QMARK:
+ case GT_COLON:
+ info->srcCount = 0;
+ info->dstCount = 0;
+ unreached();
+ break;
+
+ case GT_RETURN:
+ TreeNodeInfoInitReturn(tree);
+ break;
+
+ case GT_RETFILT:
+ if (tree->TypeGet() == TYP_VOID)
+ {
+ info->srcCount = 0;
+ info->dstCount = 0;
+ }
+ else
+ {
+ assert(tree->TypeGet() == TYP_INT);
+
+ info->srcCount = 1;
+ info->dstCount = 0;
+
+ info->setSrcCandidates(l, RBM_INTRET);
+ tree->gtOp.gtOp1->gtLsraInfo.setSrcCandidates(l, RBM_INTRET);
+ }
+ break;
+
+ // A GT_NOP is either a passthrough (if it is void, or if it has
+ // a child), but must be considered to produce a dummy value if it
+ // has a type but no child
+ case GT_NOP:
+ info->srcCount = 0;
+ if (tree->TypeGet() != TYP_VOID && tree->gtOp.gtOp1 == nullptr)
+ {
+ info->dstCount = 1;
+ }
+ else
+ {
+ info->dstCount = 0;
+ }
+ break;
+
+ case GT_JTRUE:
+ info->srcCount = 0;
+ info->dstCount = 0;
+ l->clearDstCount(tree->gtOp.gtOp1);
+ break;
+
+ case GT_JMP:
+ info->srcCount = 0;
+ info->dstCount = 0;
+ break;
+
+ case GT_SWITCH:
+ // This should never occur since switch nodes must not be visible at this
+ // point in the JIT.
+ info->srcCount = 0;
+ info->dstCount = 0; // To avoid getting uninit errors.
+ noway_assert(!"Switch must be lowered at this point");
+ break;
+
+ case GT_JMPTABLE:
+ info->srcCount = 0;
+ info->dstCount = 1;
+ break;
+
+ case GT_SWITCH_TABLE:
+ info->srcCount = 2;
+ info->internalIntCount = 1;
+ info->dstCount = 0;
+ break;
+
+ case GT_ASG:
+ case GT_ASG_ADD:
+ case GT_ASG_SUB:
+ noway_assert(!"We should never hit any assignment operator in lowering");
+ info->srcCount = 0;
+ info->dstCount = 0;
+ break;
+
+#if !defined(_TARGET_64BIT_)
+ case GT_ADD_LO:
+ case GT_ADD_HI:
+ case GT_SUB_LO:
+ case GT_SUB_HI:
+#endif
+ case GT_ADD:
+ case GT_SUB:
+ // SSE2 arithmetic instructions doesn't support the form "op mem, xmm".
+ // Rather they only support "op xmm, mem/xmm" form.
+ if (varTypeIsFloating(tree->TypeGet()))
+ {
+ // overflow operations aren't supported on float/double types.
+ assert(!tree->gtOverflow());
+
+ op1 = tree->gtGetOp1();
+ op2 = tree->gtGetOp2();
+
+ // No implicit conversions at this stage as the expectation is that
+ // everything is made explicit by adding casts.
+ assert(op1->TypeGet() == op2->TypeGet());
+
+ info->srcCount = 2;
+ info->dstCount = 1;
+
+ if (op2->isMemoryOp() || op2->IsCnsNonZeroFltOrDbl())
+ {
+ MakeSrcContained(tree, op2);
+ }
+ else if (tree->OperIsCommutative() &&
+ (op1->IsCnsNonZeroFltOrDbl() || (op1->isMemoryOp() && IsSafeToContainMem(tree, op1))))
+ {
+ // Though we have GT_ADD(op1=memOp, op2=non-memOp, we try to reorder the operands
+ // as long as it is safe so that the following efficient code sequence is generated:
+ // addss/sd targetReg, memOp (if op1Reg == targetReg) OR
+ // movaps targetReg, op2Reg; addss/sd targetReg, [memOp]
+ //
+ // Instead of
+ // movss op1Reg, [memOp]; addss/sd targetReg, Op2Reg (if op1Reg == targetReg) OR
+ // movss op1Reg, [memOp]; movaps targetReg, op1Reg, addss/sd targetReg, Op2Reg
+ MakeSrcContained(tree, op1);
+ }
+ else
+ {
+ // If there are no containable operands, we can make an operand reg optional.
+ SetRegOptionalForBinOp(tree);
+ }
+ break;
+ }
+
+ __fallthrough;
+
+ case GT_AND:
+ case GT_OR:
+ case GT_XOR:
+ TreeNodeInfoInitLogicalOp(tree);
+ break;
+
+ case GT_RETURNTRAP:
+ // this just turns into a compare of its child with an int
+ // + a conditional call
+ info->srcCount = 1;
+ info->dstCount = 0;
+ if (tree->gtOp.gtOp1->isIndir())
+ {
+ MakeSrcContained(tree, tree->gtOp.gtOp1);
+ }
+ info->internalIntCount = 1;
+ info->setInternalCandidates(l, l->allRegs(TYP_INT));
+ break;
+
+ case GT_MOD:
+ case GT_DIV:
+ case GT_UMOD:
+ case GT_UDIV:
+ TreeNodeInfoInitModDiv(tree);
+ break;
+
+ case GT_MUL:
+ case GT_MULHI:
+ SetMulOpCounts(tree);
+ break;
+
+ case GT_INTRINSIC:
+ TreeNodeInfoInitIntrinsic(tree);
+ break;
+
+#ifdef FEATURE_SIMD
+ case GT_SIMD:
+ TreeNodeInfoInitSIMD(tree);
+ break;
+#endif // FEATURE_SIMD
+
+ case GT_CAST:
+ TreeNodeInfoInitCast(tree);
+ break;
+
+ case GT_NEG:
+ info->srcCount = 1;
+ info->dstCount = 1;
+
+ // TODO-XArch-CQ:
+ // SSE instruction set doesn't have an instruction to negate a number.
+ // The recommended way is to xor the float/double number with a bitmask.
+ // The only way to xor is using xorps or xorpd both of which operate on
+ // 128-bit operands. To hold the bit-mask we would need another xmm
+ // register or a 16-byte aligned 128-bit data constant. Right now emitter
+ // lacks the support for emitting such constants or instruction with mem
+ // addressing mode referring to a 128-bit operand. For now we use an
+ // internal xmm register to load 32/64-bit bitmask from data section.
+ // Note that by trading additional data section memory (128-bit) we can
+ // save on the need for an internal register and also a memory-to-reg
+ // move.
+ //
+ // Note: another option to avoid internal register requirement is by
+ // lowering as GT_SUB(0, src). This will generate code different from
+ // Jit64 and could possibly result in compat issues (?).
+ if (varTypeIsFloating(tree))
+ {
+ info->internalFloatCount = 1;
+ info->setInternalCandidates(l, l->internalFloatRegCandidates());
+ }
+ break;
+
+ case GT_NOT:
+ info->srcCount = 1;
+ info->dstCount = 1;
+ break;
+
+ case GT_LSH:
+ case GT_RSH:
+ case GT_RSZ:
+ case GT_ROL:
+ case GT_ROR:
+ TreeNodeInfoInitShiftRotate(tree);
+ break;
+
+ case GT_EQ:
+ case GT_NE:
+ case GT_LT:
+ case GT_LE:
+ case GT_GE:
+ case GT_GT:
+ LowerCmp(tree);
+ break;
+
+ case GT_CKFINITE:
+ info->srcCount = 1;
+ info->dstCount = 1;
+ info->internalIntCount = 1;
+ break;
+
+ case GT_CMPXCHG:
+ info->srcCount = 3;
+ info->dstCount = 1;
+
+ // comparand is preferenced to RAX.
+ // Remaining two operands can be in any reg other than RAX.
+ tree->gtCmpXchg.gtOpComparand->gtLsraInfo.setSrcCandidates(l, RBM_RAX);
+ tree->gtCmpXchg.gtOpLocation->gtLsraInfo.setSrcCandidates(l, l->allRegs(TYP_INT) & ~RBM_RAX);
+ tree->gtCmpXchg.gtOpValue->gtLsraInfo.setSrcCandidates(l, l->allRegs(TYP_INT) & ~RBM_RAX);
+ tree->gtLsraInfo.setDstCandidates(l, RBM_RAX);
+ break;
+
+ case GT_LOCKADD:
+ info->srcCount = 2;
+ info->dstCount = 0;
+
+ CheckImmedAndMakeContained(tree, tree->gtOp.gtOp2);
+ break;
+
+ case GT_CALL:
+ TreeNodeInfoInitCall(tree->AsCall());
+ break;
+
+ case GT_ADDR:
+ {
+ // For a GT_ADDR, the child node should not be evaluated into a register
+ GenTreePtr child = tree->gtOp.gtOp1;
+ assert(!l->isCandidateLocalRef(child));
+ l->clearDstCount(child);
+ info->srcCount = 0;
+ info->dstCount = 1;
+ }
+ break;
+
+#ifdef _TARGET_X86_
+ case GT_OBJ:
+ NYI_X86("GT_OBJ");
+#elif !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ case GT_OBJ:
+#endif
+ case GT_BLK:
+ case GT_DYN_BLK:
+ // These should all be eliminated prior to Lowering.
+ assert(!"Non-store block node in Lowering");
+ info->srcCount = 0;
+ info->dstCount = 0;
+ break;
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ case GT_PUTARG_STK:
+ TreeNodeInfoInitPutArgStk(tree);
+ break;
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+ case GT_STORE_BLK:
+ case GT_STORE_OBJ:
+ case GT_STORE_DYN_BLK:
+ TreeNodeInfoInitBlockStore(tree->AsBlk());
+ break;
+
+ case GT_LCLHEAP:
+ TreeNodeInfoInitLclHeap(tree);
+ break;
+
+ case GT_ARR_BOUNDS_CHECK:
+#ifdef FEATURE_SIMD
+ case GT_SIMD_CHK:
+#endif // FEATURE_SIMD
+ {
+ GenTreeBoundsChk* node = tree->AsBoundsChk();
+ // Consumes arrLen & index - has no result
+ info->srcCount = 2;
+ info->dstCount = 0;
+
+ GenTreePtr other;
+ if (CheckImmedAndMakeContained(tree, node->gtIndex))
+ {
+ other = node->gtArrLen;
+ }
+ else if (CheckImmedAndMakeContained(tree, node->gtArrLen))
+ {
+ other = node->gtIndex;
+ }
+ else if (node->gtIndex->isMemoryOp())
+ {
+ other = node->gtIndex;
+ }
+ else
+ {
+ other = node->gtArrLen;
+ }
+
+ if (node->gtIndex->TypeGet() == node->gtArrLen->TypeGet())
+ {
+ if (other->isMemoryOp())
+ {
+ MakeSrcContained(tree, other);
+ }
+ else
+ {
+ // We can mark 'other' as reg optional, since it is not contained.
+ SetRegOptional(other);
+ }
+ }
+ }
+ break;
+
+ case GT_ARR_ELEM:
+ // These must have been lowered to GT_ARR_INDEX
+ noway_assert(!"We should never see a GT_ARR_ELEM in lowering");
+ info->srcCount = 0;
+ info->dstCount = 0;
+ break;
+
+ case GT_ARR_INDEX:
+ info->srcCount = 2;
+ info->dstCount = 1;
+ // For GT_ARR_INDEX, the lifetime of the arrObj must be extended because it is actually used multiple
+ // times while the result is being computed.
+ tree->AsArrIndex()->ArrObj()->gtLsraInfo.isDelayFree = true;
+ info->hasDelayFreeSrc = true;
+ break;
+
+ case GT_ARR_OFFSET:
+ // This consumes the offset, if any, the arrObj and the effective index,
+ // and produces the flattened offset for this dimension.
+ info->srcCount = 3;
+ info->dstCount = 1;
+ info->internalIntCount = 1;
+ // we don't want to generate code for this
+ if (tree->gtArrOffs.gtOffset->IsIntegralConst(0))
+ {
+ MakeSrcContained(tree, tree->gtArrOffs.gtOffset);
+ }
+ break;
+
+ case GT_LEA:
+ // The LEA usually passes its operands through to the GT_IND, in which case we'll
+ // clear the info->srcCount and info->dstCount later, but we may be instantiating an address,
+ // so we set them here.
+ info->srcCount = 0;
+ if (tree->AsAddrMode()->HasBase())
+ {
+ info->srcCount++;
+ }
+ if (tree->AsAddrMode()->HasIndex())
+ {
+ info->srcCount++;
+ }
+ info->dstCount = 1;
+ break;
+
+ case GT_STOREIND:
+ {
+ info->srcCount = 2;
+ info->dstCount = 0;
+ GenTree* src = tree->gtOp.gtOp2;
+
+ if (compiler->codeGen->gcInfo.gcIsWriteBarrierAsgNode(tree))
+ {
+ LowerGCWriteBarrier(tree);
+ break;
+ }
+
+ // If the source is a containable immediate, make it contained, unless it is
+ // an int-size or larger store of zero to memory, because we can generate smaller code
+ // by zeroing a register and then storing it.
+ if (IsContainableImmed(tree, src) &&
+ (!src->IsIntegralConst(0) || varTypeIsSmall(tree) || tree->gtGetOp1()->OperGet() == GT_CLS_VAR_ADDR))
+ {
+ MakeSrcContained(tree, src);
+ }
+ else if (!varTypeIsFloating(tree))
+ {
+ // Perform recognition of trees with the following structure:
+ // StoreInd(addr, BinOp(expr, GT_IND(addr)))
+ // to be able to fold this into an instruction of the form
+ // BINOP [addr], register
+ // where register is the actual place where 'expr' is computed.
+ //
+ // SSE2 doesn't support RMW form of instructions.
+ if (SetStoreIndOpCountsIfRMWMemOp(tree))
+ {
+ break;
+ }
+ }
+
+ SetIndirAddrOpCounts(tree);
+ }
+ break;
+
+ case GT_NULLCHECK:
+ info->dstCount = 0;
+ info->srcCount = 1;
+ info->isLocalDefUse = true;
+ break;
+
+ case GT_IND:
+ info->dstCount = 1;
+ info->srcCount = 1;
+ SetIndirAddrOpCounts(tree);
+ break;
+
+ case GT_CATCH_ARG:
+ info->srcCount = 0;
+ info->dstCount = 1;
+ info->setDstCandidates(l, RBM_EXCEPTION_OBJECT);
+ break;
+
+#if !FEATURE_EH_FUNCLETS
+ case GT_END_LFIN:
+ info->srcCount = 0;
+ info->dstCount = 0;
+ break;
+#endif
+
+ case GT_CLS_VAR:
+ info->srcCount = 0;
+ // GT_CLS_VAR, by the time we reach the backend, must always
+ // be a pure use.
+ // It will produce a result of the type of the
+ // node, and use an internal register for the address.
+
+ info->dstCount = 1;
+ assert((tree->gtFlags & (GTF_VAR_DEF | GTF_VAR_USEASG | GTF_VAR_USEDEF)) == 0);
+ info->internalIntCount = 1;
+ break;
+ } // end switch (tree->OperGet())
+
+ // If op2 of a binary-op gets marked as contained, then binary-op srcCount will be 1.
+ // Even then we would like to set isTgtPref on Op1.
+ if (tree->OperIsBinary() && info->srcCount >= 1)
+ {
+ if (isRMWRegOper(tree))
+ {
+ GenTree* op1 = tree->gtOp.gtOp1;
+ GenTree* op2 = tree->gtOp.gtOp2;
+
+ // Commutative opers like add/mul/and/or/xor could reverse the order of
+ // operands if it is safe to do so. In such a case we would like op2 to be
+ // target preferenced instead of op1.
+ if (tree->OperIsCommutative() && op1->gtLsraInfo.dstCount == 0 && op2 != nullptr)
+ {
+ op1 = op2;
+ op2 = tree->gtOp.gtOp1;
+ }
+
+ // If we have a read-modify-write operation, we want to preference op1 to the target.
+ // If op1 is contained, we don't want to preference it, but it won't
+ // show up as a source in that case, so it will be ignored.
+ op1->gtLsraInfo.isTgtPref = true;
+
+ // Is this a non-commutative operator, or is op2 a contained memory op?
+ // (Note that we can't call IsContained() at this point because it uses exactly the
+ // same information we're currently computing.)
+ // In either case, we need to make op2 remain live until the op is complete, by marking
+ // the source(s) associated with op2 as "delayFree".
+ // Note that if op2 of a binary RMW operator is a memory op, even if the operator
+ // is commutative, codegen cannot reverse them.
+ // TODO-XArch-CQ: This is not actually the case for all RMW binary operators, but there's
+ // more work to be done to correctly reverse the operands if they involve memory
+ // operands. Also, we may need to handle more cases than GT_IND, especially once
+ // we've modified the register allocator to not require all nodes to be assigned
+ // a register (e.g. a spilled lclVar can often be referenced directly from memory).
+ // Note that we may have a null op2, even with 2 sources, if op1 is a base/index memory op.
+
+ GenTree* delayUseSrc = nullptr;
+ // TODO-XArch-Cleanup: We should make the indirection explicit on these nodes so that we don't have
+ // to special case them.
+ if (tree->OperGet() == GT_XADD || tree->OperGet() == GT_XCHG || tree->OperGet() == GT_LOCKADD)
+ {
+ delayUseSrc = op1;
+ }
+ else if ((op2 != nullptr) &&
+ (!tree->OperIsCommutative() || (op2->isMemoryOp() && (op2->gtLsraInfo.srcCount == 0))))
+ {
+ delayUseSrc = op2;
+ }
+ if (delayUseSrc != nullptr)
+ {
+ // If delayUseSrc is an indirection and it doesn't produce a result, then we need to set "delayFree'
+ // on the base & index, if any.
+ // Otherwise, we set it on delayUseSrc itself.
+ if (delayUseSrc->isIndir() && (delayUseSrc->gtLsraInfo.dstCount == 0))
+ {
+ GenTree* base = delayUseSrc->AsIndir()->Base();
+ GenTree* index = delayUseSrc->AsIndir()->Index();
+ if (base != nullptr)
+ {
+ base->gtLsraInfo.isDelayFree = true;
+ }
+ if (index != nullptr)
+ {
+ index->gtLsraInfo.isDelayFree = true;
+ }
+ }
+ else
+ {
+ delayUseSrc->gtLsraInfo.isDelayFree = true;
+ }
+ info->hasDelayFreeSrc = true;
+ }
+ }
+ }
+
+#ifdef _TARGET_X86_
+ // Exclude RBM_NON_BYTE_REGS from dst candidates of tree node and src candidates of operands
+ // if the tree node is a byte type.
+ //
+ // Example1: GT_STOREIND(byte, addr, op2) - storeind of byte sized value from op2 into mem 'addr'
+ // Storeind itself will not produce any value and hence dstCount=0. But op2 could be TYP_INT
+ // value. In this case we need to exclude esi/edi from the src candidates of op2.
+ //
+ // Example2: GT_CAST(int <- bool <- int) - here type of GT_CAST node is int and castToType is bool.
+ //
+ // Example3: GT_EQ(int, op1 of type ubyte, op2 of type ubyte) - in this case codegen uses
+ // ubyte as the result of comparison and if the result needs to be materialized into a reg
+ // simply zero extend it to TYP_INT size. Here is an example of generated code:
+ // cmp dl, byte ptr[addr mode]
+ // movzx edx, dl
+ //
+ // Though this looks conservative in theory, in practice we could not think of a case where
+ // the below logic leads to conservative register specification. In future when or if we find
+ // one such case, this logic needs to be fine tuned for that case(s).
+ if (varTypeIsByte(tree) || ((tree->OperGet() == GT_CAST) && varTypeIsByte(tree->CastToType())) ||
+ (tree->OperIsCompare() && varTypeIsByte(tree->gtGetOp1()) && varTypeIsByte(tree->gtGetOp2())))
+ {
+ regMaskTP regMask;
+ if (info->dstCount > 0)
+ {
+ regMask = info->getDstCandidates(l);
+ assert(regMask != RBM_NONE);
+ info->setDstCandidates(l, regMask & ~RBM_NON_BYTE_REGS);
+ }
+
+ if (tree->OperIsSimple() && (info->srcCount > 0))
+ {
+ // No need to set src candidates on a contained child operand.
+ GenTree* op = tree->gtOp.gtOp1;
+ assert(op != nullptr);
+ bool containedNode = (op->gtLsraInfo.srcCount == 0) && (op->gtLsraInfo.dstCount == 0);
+ if (!containedNode)
+ {
+ regMask = op->gtLsraInfo.getSrcCandidates(l);
+ assert(regMask != RBM_NONE);
+ op->gtLsraInfo.setSrcCandidates(l, regMask & ~RBM_NON_BYTE_REGS);
+ }
+
+ if (tree->OperIsBinary() && (tree->gtOp.gtOp2 != nullptr))
+ {
+ op = tree->gtOp.gtOp2;
+ containedNode = (op->gtLsraInfo.srcCount == 0) && (op->gtLsraInfo.dstCount == 0);
+ if (!containedNode)
+ {
+ regMask = op->gtLsraInfo.getSrcCandidates(l);
+ assert(regMask != RBM_NONE);
+ op->gtLsraInfo.setSrcCandidates(l, regMask & ~RBM_NON_BYTE_REGS);
+ }
+ }
+ }
+ }
+#endif //_TARGET_X86_
+
+ // We need to be sure that we've set info->srcCount and info->dstCount appropriately
+ assert((info->dstCount < 2) || (tree->IsMultiRegCall() && info->dstCount == MAX_RET_REG_COUNT));
+}
+
+//------------------------------------------------------------------------
+// TreeNodeInfoInitSimple: Sets the srcCount and dstCount for all the trees
+// without special handling based on the tree node type.
+//
+// Arguments:
+// tree - The node of interest
+//
+// Return Value:
+// None.
+//
+void Lowering::TreeNodeInfoInitSimple(GenTree* tree)
+{
+ TreeNodeInfo* info = &(tree->gtLsraInfo);
+ unsigned kind = tree->OperKind();
+ info->dstCount = (tree->TypeGet() == TYP_VOID) ? 0 : 1;
+ if (kind & (GTK_CONST | GTK_LEAF))
+ {
+ info->srcCount = 0;
+ }
+ else if (kind & (GTK_SMPOP))
+ {
+ if (tree->gtGetOp2() != nullptr)
+ {
+ info->srcCount = 2;
+ }
+ else
+ {
+ info->srcCount = 1;
+ }
+ }
+ else
+ {
+ unreached();
+ }
+}
+
+//------------------------------------------------------------------------
+// TreeNodeInfoInitReturn: Set the NodeInfo for a GT_RETURN.
+//
+// Arguments:
+// tree - The node of interest
+//
+// Return Value:
+// None.
+//
+void Lowering::TreeNodeInfoInitReturn(GenTree* tree)
+{
+ TreeNodeInfo* info = &(tree->gtLsraInfo);
+ LinearScan* l = m_lsra;
+ Compiler* compiler = comp;
+
+#if !defined(_TARGET_64BIT_)
+ if (tree->TypeGet() == TYP_LONG)
+ {
+ GenTree* op1 = tree->gtGetOp1();
+ noway_assert(op1->OperGet() == GT_LONG);
+ GenTree* loVal = op1->gtGetOp1();
+ GenTree* hiVal = op1->gtGetOp2();
+ info->srcCount = 2;
+ loVal->gtLsraInfo.setSrcCandidates(l, RBM_LNGRET_LO);
+ hiVal->gtLsraInfo.setSrcCandidates(l, RBM_LNGRET_HI);
+ info->dstCount = 0;
+ }
+ else
+#endif // !defined(_TARGET_64BIT_)
+ {
+ GenTree* op1 = tree->gtGetOp1();
+ regMaskTP useCandidates = RBM_NONE;
+
+ info->srcCount = (tree->TypeGet() == TYP_VOID) ? 0 : 1;
+ info->dstCount = 0;
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ if (varTypeIsStruct(tree))
+ {
+ // op1 has to be either an lclvar or a multi-reg returning call
+ if (op1->OperGet() == GT_LCL_VAR)
+ {
+ GenTreeLclVarCommon* lclVarCommon = op1->AsLclVarCommon();
+ LclVarDsc* varDsc = &(compiler->lvaTable[lclVarCommon->gtLclNum]);
+ assert(varDsc->lvIsMultiRegRet);
+
+ // Mark var as contained if not enregistrable.
+ if (!varTypeIsEnregisterableStruct(op1))
+ {
+ MakeSrcContained(tree, op1);
+ }
+ }
+ else
+ {
+ noway_assert(op1->IsMultiRegCall());
+
+ ReturnTypeDesc* retTypeDesc = op1->AsCall()->GetReturnTypeDesc();
+ info->srcCount = retTypeDesc->GetReturnRegCount();
+ useCandidates = retTypeDesc->GetABIReturnRegs();
+ }
+ }
+ else
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+ {
+ // Non-struct type return - determine useCandidates
+ switch (tree->TypeGet())
+ {
+ case TYP_VOID:
+ useCandidates = RBM_NONE;
+ break;
+ case TYP_FLOAT:
+ useCandidates = RBM_FLOATRET;
+ break;
+ case TYP_DOUBLE:
+ useCandidates = RBM_DOUBLERET;
+ break;
+#if defined(_TARGET_64BIT_)
+ case TYP_LONG:
+ useCandidates = RBM_LNGRET;
+ break;
+#endif // defined(_TARGET_64BIT_)
+ default:
+ useCandidates = RBM_INTRET;
+ break;
+ }
+ }
+
+ if (useCandidates != RBM_NONE)
+ {
+ op1->gtLsraInfo.setSrcCandidates(l, useCandidates);
+ }
+ }
+}
+
+//------------------------------------------------------------------------
+// TreeNodeInfoInitShiftRotate: Set the NodeInfo for a shift or rotate.
+//
+// Arguments:
+// tree - The node of interest
+//
+// Return Value:
+// None.
+//
+void Lowering::TreeNodeInfoInitShiftRotate(GenTree* tree)
+{
+ TreeNodeInfo* info = &(tree->gtLsraInfo);
+ LinearScan* l = m_lsra;
+
+ info->srcCount = 2;
+ info->dstCount = 1;
+
+ // For shift operations, we need that the number
+ // of bits moved gets stored in CL in case
+ // the number of bits to shift is not a constant.
+ GenTreePtr shiftBy = tree->gtOp.gtOp2;
+ GenTreePtr source = tree->gtOp.gtOp1;
+
+ // x64 can encode 8 bits of shift and it will use 5 or 6. (the others are masked off)
+ // We will allow whatever can be encoded - hope you know what you are doing.
+ if (!IsContainableImmed(tree, shiftBy) || (shiftBy->gtIntConCommon.IconValue() > 255) ||
+ (shiftBy->gtIntConCommon.IconValue() < 0))
+ {
+ source->gtLsraInfo.setSrcCandidates(l, l->allRegs(TYP_INT) & ~RBM_RCX);
+ shiftBy->gtLsraInfo.setSrcCandidates(l, RBM_RCX);
+ info->setDstCandidates(l, l->allRegs(TYP_INT) & ~RBM_RCX);
+ }
+ else
+ {
+ MakeSrcContained(tree, shiftBy);
+ }
+}
+
+//------------------------------------------------------------------------
+// TreeNodeInfoInitCall: Set the NodeInfo for a call.
+//
+// Arguments:
+// call - The call node of interest
+//
+// Return Value:
+// None.
+//
+void Lowering::TreeNodeInfoInitCall(GenTreeCall* call)
+{
+ TreeNodeInfo* info = &(call->gtLsraInfo);
+ LinearScan* l = m_lsra;
+ Compiler* compiler = comp;
+ bool hasMultiRegRetVal = false;
+ ReturnTypeDesc* retTypeDesc = nullptr;
+
+ info->srcCount = 0;
+ if (call->TypeGet() != TYP_VOID)
+ {
+ hasMultiRegRetVal = call->HasMultiRegRetVal();
+ if (hasMultiRegRetVal)
+ {
+ // dst count = number of registers in which the value is returned by call
+ retTypeDesc = call->GetReturnTypeDesc();
+ info->dstCount = retTypeDesc->GetReturnRegCount();
+ }
+ else
+ {
+ info->dstCount = 1;
+ }
+ }
+ else
+ {
+ info->dstCount = 0;
+ }
+
+ GenTree* ctrlExpr = call->gtControlExpr;
+ if (call->gtCallType == CT_INDIRECT)
+ {
+ // either gtControlExpr != null or gtCallAddr != null.
+ // Both cannot be non-null at the same time.
+ assert(ctrlExpr == nullptr);
+ assert(call->gtCallAddr != nullptr);
+ ctrlExpr = call->gtCallAddr;
+ }
+
+ // set reg requirements on call target represented as control sequence.
+ if (ctrlExpr != nullptr)
+ {
+ // we should never see a gtControlExpr whose type is void.
+ assert(ctrlExpr->TypeGet() != TYP_VOID);
+
+ // call can take a Rm op on x64
+ info->srcCount++;
+
+ // In case of fast tail implemented as jmp, make sure that gtControlExpr is
+ // computed into a register.
+ if (!call->IsFastTailCall())
+ {
+ if (ctrlExpr->isIndir())
+ {
+ MakeSrcContained(call, ctrlExpr);
+ }
+ }
+ else
+ {
+ // Fast tail call - make sure that call target is always computed in RAX
+ // so that epilog sequence can generate "jmp rax" to achieve fast tail call.
+ ctrlExpr->gtLsraInfo.setSrcCandidates(l, RBM_RAX);
+ }
+ }
+
+ // If this is a varargs call, we will clear the internal candidates in case we need
+ // to reserve some integer registers for copying float args.
+ // We have to do this because otherwise the default candidates are allRegs, and adding
+ // the individual specific registers will have no effect.
+ if (call->IsVarargs())
+ {
+ info->setInternalCandidates(l, RBM_NONE);
+ }
+
+ RegisterType registerType = call->TypeGet();
+
+ // Set destination candidates for return value of the call.
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef _TARGET_X86_
+ if (call->IsHelperCall(compiler, CORINFO_HELP_INIT_PINVOKE_FRAME))
+ {
+ // The x86 CORINFO_HELP_INIT_PINVOKE_FRAME helper uses a custom calling convention that returns with
+ // TCB in REG_PINVOKE_TCB. AMD64/ARM64 use the standard calling convention. fgMorphCall() sets the
+ // correct argument registers.
+ info->setDstCandidates(l, RBM_PINVOKE_TCB);
+ }
+ else
+#endif // _TARGET_X86_
+ if (hasMultiRegRetVal)
+ {
+ assert(retTypeDesc != nullptr);
+ info->setDstCandidates(l, retTypeDesc->GetABIReturnRegs());
+ }
+ else if (varTypeIsFloating(registerType))
+ {
+#ifdef _TARGET_X86_
+ // The return value will be on the X87 stack, and we will need to move it.
+ info->setDstCandidates(l, l->allRegs(registerType));
+#else // !_TARGET_X86_
+ info->setDstCandidates(l, RBM_FLOATRET);
+#endif // !_TARGET_X86_
+ }
+ else if (registerType == TYP_LONG)
+ {
+ info->setDstCandidates(l, RBM_LNGRET);
+ }
+ else
+ {
+ info->setDstCandidates(l, RBM_INTRET);
+ }
+
+ // number of args to a call =
+ // callRegArgs + (callargs - placeholders, setup, etc)
+ // there is an explicit thisPtr but it is redundant
+
+ // If there is an explicit this pointer, we don't want that node to produce anything
+ // as it is redundant
+ if (call->gtCallObjp != nullptr)
+ {
+ GenTreePtr thisPtrNode = call->gtCallObjp;
+
+ if (thisPtrNode->gtOper == GT_PUTARG_REG)
+ {
+ l->clearOperandCounts(thisPtrNode);
+ l->clearDstCount(thisPtrNode->gtOp.gtOp1);
+ }
+ else
+ {
+ l->clearDstCount(thisPtrNode);
+ }
+ }
+
+#if FEATURE_VARARG
+ bool callHasFloatRegArgs = false;
+#endif // !FEATURE_VARARG
+
+ // First, count reg args
+ for (GenTreePtr list = call->gtCallLateArgs; list; list = list->MoveNext())
+ {
+ assert(list->IsList());
+
+ GenTreePtr argNode = list->Current();
+
+ fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, argNode);
+ assert(curArgTabEntry);
+
+ if (curArgTabEntry->regNum == REG_STK)
+ {
+ // late arg that is not passed in a register
+ DISPNODE(argNode);
+ assert(argNode->gtOper == GT_PUTARG_STK);
+ argNode->gtLsraInfo.srcCount = 1;
+ argNode->gtLsraInfo.dstCount = 0;
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ // If the node is TYP_STRUCT and it is put on stack with
+ // putarg_stk operation, we consume and produce no registers.
+ // In this case the embedded Obj node should not produce
+ // registers too since it is contained.
+ // Note that if it is a SIMD type the argument will be in a register.
+ if (argNode->TypeGet() == TYP_STRUCT)
+ {
+ assert(argNode->gtOp.gtOp1 != nullptr && argNode->gtOp.gtOp1->OperGet() == GT_OBJ);
+ argNode->gtOp.gtOp1->gtLsraInfo.dstCount = 0;
+ argNode->gtLsraInfo.srcCount = 0;
+ }
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+ continue;
+ }
+
+ regNumber argReg = REG_NA;
+ regMaskTP argMask = RBM_NONE;
+ short regCount = 0;
+ bool isOnStack = true;
+ if (curArgTabEntry->regNum != REG_STK)
+ {
+ isOnStack = false;
+ var_types argType = argNode->TypeGet();
+
+#if FEATURE_VARARG
+ callHasFloatRegArgs |= varTypeIsFloating(argType);
+#endif // !FEATURE_VARARG
+
+ argReg = curArgTabEntry->regNum;
+ regCount = 1;
+
+ // Default case is that we consume one source; modify this later (e.g. for
+ // promoted structs)
+ info->srcCount++;
+
+ argMask = genRegMask(argReg);
+ argNode = argNode->gtEffectiveVal();
+ }
+
+ // If the struct arg is wrapped in CPYBLK the type of the param will be TYP_VOID.
+ // Use the curArgTabEntry's isStruct to get whether the param is a struct.
+ if (varTypeIsStruct(argNode) FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY(|| curArgTabEntry->isStruct))
+ {
+ unsigned originalSize = 0;
+ LclVarDsc* varDsc = nullptr;
+ if (argNode->gtOper == GT_LCL_VAR)
+ {
+ varDsc = compiler->lvaTable + argNode->gtLclVarCommon.gtLclNum;
+ originalSize = varDsc->lvSize();
+ }
+ else if (argNode->gtOper == GT_MKREFANY)
+ {
+ originalSize = 2 * TARGET_POINTER_SIZE;
+ }
+ else if (argNode->gtOper == GT_OBJ)
+ {
+ noway_assert(!"GT_OBJ not supported for amd64");
+ }
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ else if (argNode->gtOper == GT_PUTARG_REG)
+ {
+ originalSize = genTypeSize(argNode->gtType);
+ }
+ else if (argNode->gtOper == GT_LIST)
+ {
+ originalSize = 0;
+
+ // There could be up to 2 PUTARG_REGs in the list
+ GenTreeArgList* argListPtr = argNode->AsArgList();
+ unsigned iterationNum = 0;
+ for (; argListPtr; argListPtr = argListPtr->Rest())
+ {
+ GenTreePtr putArgRegNode = argListPtr->gtOp.gtOp1;
+ assert(putArgRegNode->gtOper == GT_PUTARG_REG);
+
+ if (iterationNum == 0)
+ {
+ varDsc = compiler->lvaTable + putArgRegNode->gtOp.gtOp1->gtLclVarCommon.gtLclNum;
+ originalSize = varDsc->lvSize();
+ assert(originalSize != 0);
+ }
+ else
+ {
+ // Need an extra source for every node, but the first in the list.
+ info->srcCount++;
+
+ // Get the mask for the second putarg_reg
+ argMask = genRegMask(curArgTabEntry->otherRegNum);
+ }
+
+ putArgRegNode->gtLsraInfo.setDstCandidates(l, argMask);
+ putArgRegNode->gtLsraInfo.setSrcCandidates(l, argMask);
+
+ // To avoid redundant moves, have the argument child tree computed in the
+ // register in which the argument is passed to the call.
+ putArgRegNode->gtOp.gtOp1->gtLsraInfo.setSrcCandidates(l, l->getUseCandidates(putArgRegNode));
+ iterationNum++;
+ }
+
+ assert(iterationNum <= CLR_SYSTEMV_MAX_EIGHTBYTES_COUNT_TO_PASS_IN_REGISTERS);
+ }
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+ else
+ {
+ noway_assert(!"Can't predict unsupported TYP_STRUCT arg kind");
+ }
+
+ unsigned slots = ((unsigned)(roundUp(originalSize, TARGET_POINTER_SIZE))) / REGSIZE_BYTES;
+ unsigned remainingSlots = slots;
+
+ if (!isOnStack)
+ {
+ remainingSlots = slots - 1;
+
+ regNumber reg = (regNumber)(argReg + 1);
+ while (remainingSlots > 0 && reg <= REG_ARG_LAST)
+ {
+ argMask |= genRegMask(reg);
+ reg = (regNumber)(reg + 1);
+ remainingSlots--;
+ regCount++;
+ }
+ }
+
+ short internalIntCount = 0;
+ if (remainingSlots > 0)
+ {
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ // This TYP_STRUCT argument is also passed in the outgoing argument area
+ // We need a register to address the TYP_STRUCT
+ internalIntCount = 1;
+#else // FEATURE_UNIX_AMD64_STRUCT_PASSING
+ // And we may need 2
+ internalIntCount = 2;
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+ }
+ argNode->gtLsraInfo.internalIntCount = internalIntCount;
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ if (argNode->gtOper == GT_PUTARG_REG)
+ {
+ argNode->gtLsraInfo.setDstCandidates(l, argMask);
+ argNode->gtLsraInfo.setSrcCandidates(l, argMask);
+ }
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+ }
+ else
+ {
+ argNode->gtLsraInfo.setDstCandidates(l, argMask);
+ argNode->gtLsraInfo.setSrcCandidates(l, argMask);
+ }
+
+ // To avoid redundant moves, have the argument child tree computed in the
+ // register in which the argument is passed to the call.
+ if (argNode->gtOper == GT_PUTARG_REG)
+ {
+ argNode->gtOp.gtOp1->gtLsraInfo.setSrcCandidates(l, l->getUseCandidates(argNode));
+ }
+
+#if FEATURE_VARARG
+ // In the case of a varargs call, the ABI dictates that if we have floating point args,
+ // we must pass the enregistered arguments in both the integer and floating point registers.
+ // Since the integer register is not associated with this arg node, we will reserve it as
+ // an internal register so that it is not used during the evaluation of the call node
+ // (e.g. for the target).
+ if (call->IsVarargs() && varTypeIsFloating(argNode))
+ {
+ regNumber targetReg = compiler->getCallArgIntRegister(argReg);
+ info->setInternalIntCount(info->internalIntCount + 1);
+ info->addInternalCandidates(l, genRegMask(targetReg));
+ }
+#endif // FEATURE_VARARG
+ }
+
+ // Now, count stack args
+ // Note that these need to be computed into a register, but then
+ // they're just stored to the stack - so the reg doesn't
+ // need to remain live until the call. In fact, it must not
+ // because the code generator doesn't actually consider it live,
+ // so it can't be spilled.
+
+ GenTreePtr args = call->gtCallArgs;
+ while (args)
+ {
+ GenTreePtr arg = args->gtOp.gtOp1;
+ if (!(args->gtFlags & GTF_LATE_ARG))
+ {
+ TreeNodeInfo* argInfo = &(arg->gtLsraInfo);
+#if !defined(_TARGET_64BIT_)
+ if (arg->TypeGet() == TYP_LONG)
+ {
+ assert(arg->OperGet() == GT_LONG);
+ GenTreePtr loArg = arg->gtGetOp1();
+ GenTreePtr hiArg = arg->gtGetOp2();
+ assert((loArg->OperGet() == GT_PUTARG_STK) && (hiArg->OperGet() == GT_PUTARG_STK));
+ assert((loArg->gtLsraInfo.dstCount == 1) && (hiArg->gtLsraInfo.dstCount == 1));
+ loArg->gtLsraInfo.isLocalDefUse = true;
+ hiArg->gtLsraInfo.isLocalDefUse = true;
+ }
+ else
+#endif // !defined(_TARGET_64BIT_)
+ {
+ if (argInfo->dstCount != 0)
+ {
+ argInfo->isLocalDefUse = true;
+ }
+
+ // If the child of GT_PUTARG_STK is a constant, we don't need a register to
+ // move it to memory (stack location).
+ //
+ // On AMD64, we don't want to make 0 contained, because we can generate smaller code
+ // by zeroing a register and then storing it. E.g.:
+ // xor rdx, rdx
+ // mov gword ptr [rsp+28H], rdx
+ // is 2 bytes smaller than:
+ // mov gword ptr [rsp+28H], 0
+ //
+ // On x86, we push stack arguments; we don't use 'mov'. So:
+ // push 0
+ // is 1 byte smaller than:
+ // xor rdx, rdx
+ // push rdx
+
+ argInfo->dstCount = 0;
+ if (arg->gtOper == GT_PUTARG_STK)
+ {
+ GenTree* op1 = arg->gtOp.gtOp1;
+ if (IsContainableImmed(arg, op1)
+#if defined(_TARGET_AMD64_)
+ && !op1->IsIntegralConst(0)
+#endif // _TARGET_AMD64_
+ )
+ {
+ MakeSrcContained(arg, op1);
+ }
+ }
+ }
+ }
+ args = args->gtOp.gtOp2;
+ }
+
+#if FEATURE_VARARG
+ // If it is a fast tail call, it is already preferenced to use RAX.
+ // Therefore, no need set src candidates on call tgt again.
+ if (call->IsVarargs() && callHasFloatRegArgs && !call->IsFastTailCall() && (ctrlExpr != nullptr))
+ {
+ // Don't assign the call target to any of the argument registers because
+ // we will use them to also pass floating point arguments as required
+ // by Amd64 ABI.
+ ctrlExpr->gtLsraInfo.setSrcCandidates(l, l->allRegs(TYP_INT) & ~(RBM_ARG_REGS));
+ }
+#endif // !FEATURE_VARARG
+}
+
+//------------------------------------------------------------------------
+// TreeNodeInfoInitBlockStore: Set the NodeInfo for a block store.
+//
+// Arguments:
+// blkNode - The block store node of interest
+//
+// Return Value:
+// None.
+//
+void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode)
+{
+ GenTree* dstAddr = blkNode->Addr();
+ unsigned size = blkNode->gtBlkSize;
+ GenTree* source = blkNode->Data();
+ LinearScan* l = m_lsra;
+ Compiler* compiler = comp;
+
+ // Sources are dest address, initVal or source.
+ // We may require an additional source or temp register for the size.
+ blkNode->gtLsraInfo.srcCount = 2;
+ blkNode->gtLsraInfo.dstCount = 0;
+ blkNode->gtLsraInfo.setInternalCandidates(l, RBM_NONE);
+ GenTreePtr srcAddrOrFill = nullptr;
+ bool isInitBlk = blkNode->OperIsInitBlkOp();
+
+ regMaskTP dstAddrRegMask = RBM_NONE;
+ regMaskTP sourceRegMask = RBM_NONE;
+ regMaskTP blkSizeRegMask = RBM_NONE;
+ if (!isInitBlk)
+ {
+ // CopyObj or CopyBlk
+ if ((blkNode->OperGet() == GT_STORE_OBJ) && ((blkNode->AsObj()->gtGcPtrCount == 0) || blkNode->gtBlkOpGcUnsafe))
+ {
+ blkNode->SetOper(GT_STORE_BLK);
+ }
+ if (source->gtOper == GT_IND)
+ {
+ srcAddrOrFill = blkNode->Data()->gtGetOp1();
+ // We're effectively setting source as contained, but can't call MakeSrcContained, because the
+ // "inheritance" of the srcCount is to a child not a parent - it would "just work" but could be misleading.
+ // If srcAddr is already non-contained, we don't need to change it.
+ if (srcAddrOrFill->gtLsraInfo.getDstCount() == 0)
+ {
+ srcAddrOrFill->gtLsraInfo.setDstCount(1);
+ srcAddrOrFill->gtLsraInfo.setSrcCount(source->gtLsraInfo.srcCount);
+ }
+ m_lsra->clearOperandCounts(source);
+ }
+ else if (!source->OperIsSIMD())
+ {
+ assert(source->IsLocal());
+ MakeSrcContained(blkNode, source);
+ }
+ }
+
+ if (isInitBlk)
+ {
+ GenTree* initVal = source;
+ srcAddrOrFill = source;
+ // If we have an InitBlk with constant block size we can optimize several ways:
+ // a) If the size is smaller than a small memory page but larger than INITBLK_UNROLL_LIMIT bytes
+ // we use rep stosb since this reduces the register pressure in LSRA and we have
+ // roughly the same performance as calling the helper.
+ // b) If the size is <= INITBLK_UNROLL_LIMIT bytes and the fill byte is a constant,
+ // we can speed this up by unrolling the loop using SSE2 stores. The reason for
+ // this threshold is because our last investigation (Fall 2013), more than 95% of initblks
+ // in our framework assemblies are actually <= INITBLK_UNROLL_LIMIT bytes size, so this is the
+ // preferred code sequence for the vast majority of cases.
+
+ // This threshold will decide from using the helper or let the JIT decide to inline
+ // a code sequence of its choice.
+ unsigned helperThreshold = max(INITBLK_STOS_LIMIT, INITBLK_UNROLL_LIMIT);
+
+ // TODO-X86-CQ: Investigate whether a helper call would be beneficial on x86
+ if (size != 0 && size <= helperThreshold)
+ {
+ // Always favor unrolling vs rep stos.
+ if (size <= INITBLK_UNROLL_LIMIT && initVal->IsCnsIntOrI())
+ {
+ // The fill value of an initblk is interpreted to hold a
+ // value of (unsigned int8) however a constant of any size
+ // may practically reside on the evaluation stack. So extract
+ // the lower byte out of the initVal constant and replicate
+ // it to a larger constant whose size is sufficient to support
+ // the largest width store of the desired inline expansion.
+
+ ssize_t fill = initVal->gtIntCon.gtIconVal & 0xFF;
+#ifdef _TARGET_AMD64_
+ if (size < REGSIZE_BYTES)
+ {
+ initVal->gtIntCon.gtIconVal = 0x01010101 * fill;
+ }
+ else
+ {
+ initVal->gtIntCon.gtIconVal = 0x0101010101010101LL * fill;
+ initVal->gtType = TYP_LONG;
+ }
+#else // !_TARGET_AMD64_
+ initVal->gtIntCon.gtIconVal = 0x01010101 * fill;
+#endif // !_TARGET_AMD64_
+
+ // In case we have a buffer >= 16 bytes
+ // we can use SSE2 to do a 128-bit store in a single
+ // instruction.
+ if (size >= XMM_REGSIZE_BYTES)
+ {
+ // Reserve an XMM register to fill it with
+ // a pack of 16 init value constants.
+ blkNode->gtLsraInfo.internalFloatCount = 1;
+ blkNode->gtLsraInfo.setInternalCandidates(l, l->internalFloatRegCandidates());
+ }
+ blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindUnroll;
+ }
+ else
+ {
+ // rep stos has the following register requirements:
+ // a) The memory address to be in RDI.
+ // b) The fill value has to be in RAX.
+ // c) The buffer size will go in RCX.
+ dstAddrRegMask = RBM_RDI;
+ srcAddrOrFill = initVal;
+ sourceRegMask = RBM_RAX;
+ blkSizeRegMask = RBM_RCX;
+ blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindRepInstr;
+ }
+ }
+ else
+ {
+#ifdef _TARGET_AMD64_
+ // The helper follows the regular AMD64 ABI.
+ dstAddrRegMask = RBM_ARG_0;
+ sourceRegMask = RBM_ARG_1;
+ blkSizeRegMask = RBM_ARG_2;
+ blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindHelper;
+#else // !_TARGET_AMD64_
+ dstAddrRegMask = RBM_RDI;
+ sourceRegMask = RBM_RAX;
+ blkSizeRegMask = RBM_RCX;
+ blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindRepInstr;
+#endif // !_TARGET_AMD64_
+ }
+ }
+ else if (blkNode->gtOper == GT_STORE_OBJ)
+ {
+ // CopyObj
+
+ GenTreeObj* cpObjNode = blkNode->AsObj();
+
+ unsigned slots = cpObjNode->gtSlots;
+
+#ifdef DEBUG
+ // CpObj must always have at least one GC-Pointer as a member.
+ assert(cpObjNode->gtGcPtrCount > 0);
+
+ assert(dstAddr->gtType == TYP_BYREF || dstAddr->gtType == TYP_I_IMPL);
+
+ CORINFO_CLASS_HANDLE clsHnd = cpObjNode->gtClass;
+ size_t classSize = comp->info.compCompHnd->getClassSize(clsHnd);
+ size_t blkSize = roundUp(classSize, TARGET_POINTER_SIZE);
+
+ // Currently, the EE always round up a class data structure so
+ // we are not handling the case where we have a non multiple of pointer sized
+ // struct. This behavior may change in the future so in order to keeps things correct
+ // let's assert it just to be safe. Going forward we should simply
+ // handle this case.
+ assert(classSize == blkSize);
+ assert((blkSize / TARGET_POINTER_SIZE) == slots);
+ assert(cpObjNode->HasGCPtr());
+#endif
+
+ bool IsRepMovsProfitable = false;
+
+ // If the destination is not on the stack, let's find out if we
+ // can improve code size by using rep movsq instead of generating
+ // sequences of movsq instructions.
+ if (!dstAddr->OperIsLocalAddr())
+ {
+ // Let's inspect the struct/class layout and determine if it's profitable
+ // to use rep movsq for copying non-gc memory instead of using single movsq
+ // instructions for each memory slot.
+ unsigned i = 0;
+ BYTE* gcPtrs = cpObjNode->gtGcPtrs;
+
+ do
+ {
+ unsigned nonGCSlots = 0;
+ // Measure a contiguous non-gc area inside the struct and note the maximum.
+ while (i < slots && gcPtrs[i] == TYPE_GC_NONE)
+ {
+ nonGCSlots++;
+ i++;
+ }
+
+ while (i < slots && gcPtrs[i] != TYPE_GC_NONE)
+ {
+ i++;
+ }
+
+ if (nonGCSlots >= CPOBJ_NONGC_SLOTS_LIMIT)
+ {
+ IsRepMovsProfitable = true;
+ break;
+ }
+ } while (i < slots);
+ }
+ else if (slots >= CPOBJ_NONGC_SLOTS_LIMIT)
+ {
+ IsRepMovsProfitable = true;
+ }
+
+ // There are two cases in which we need to materialize the
+ // struct size:
+ // a) When the destination is on the stack we don't need to use the
+ // write barrier, we can just simply call rep movsq and get a win in codesize.
+ // b) If we determine we have contiguous non-gc regions in the struct where it's profitable
+ // to use rep movsq instead of a sequence of single movsq instructions. According to the
+ // Intel Manual, the sweet spot for small structs is between 4 to 12 slots of size where
+ // the entire operation takes 20 cycles and encodes in 5 bytes (moving RCX, and calling rep movsq).
+ if (IsRepMovsProfitable)
+ {
+ // We need the size of the contiguous Non-GC-region to be in RCX to call rep movsq.
+ blkSizeRegMask = RBM_RCX;
+ blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindRepInstr;
+ }
+ else
+ {
+ blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindUnroll;
+ }
+
+ dstAddrRegMask = RBM_RDI;
+
+ // The srcAddr must be in a register. If it was under a GT_IND, we need to subsume all of its
+ // sources.
+ sourceRegMask = RBM_RSI;
+ }
+ else
+ {
+ assert((blkNode->OperGet() == GT_STORE_BLK) || (blkNode->OperGet() == GT_STORE_DYN_BLK));
+ // CopyBlk
+ // In case of a CpBlk with a constant size and less than CPBLK_MOVS_LIMIT size
+ // we can use rep movs to generate code instead of the helper call.
+
+ // This threshold will decide between using the helper or let the JIT decide to inline
+ // a code sequence of its choice.
+ unsigned helperThreshold = max(CPBLK_MOVS_LIMIT, CPBLK_UNROLL_LIMIT);
+
+ // TODO-X86-CQ: Investigate whether a helper call would be beneficial on x86
+ if ((size != 0) && (size <= helperThreshold))
+ {
+ // If we have a buffer between XMM_REGSIZE_BYTES and CPBLK_UNROLL_LIMIT bytes, we'll use SSE2.
+ // Structs and buffer with sizes <= CPBLK_UNROLL_LIMIT bytes are occurring in more than 95% of
+ // our framework assemblies, so this is the main code generation scheme we'll use.
+ if (size <= CPBLK_UNROLL_LIMIT)
+ {
+ // If we have a remainder smaller than XMM_REGSIZE_BYTES, we need an integer temp reg.
+ //
+ // x86 specific note: if the size is odd, the last copy operation would be of size 1 byte.
+ // But on x86 only RBM_BYTE_REGS could be used as byte registers. Therefore, exclude
+ // RBM_NON_BYTE_REGS from internal candidates.
+ if ((size & (XMM_REGSIZE_BYTES - 1)) != 0)
+ {
+ blkNode->gtLsraInfo.internalIntCount++;
+ regMaskTP regMask = l->allRegs(TYP_INT);
+
+#ifdef _TARGET_X86_
+ if ((size % 2) != 0)
+ {
+ regMask &= ~RBM_NON_BYTE_REGS;
+ }
+#endif
+ blkNode->gtLsraInfo.setInternalCandidates(l, regMask);
+ }
+
+ if (size >= XMM_REGSIZE_BYTES)
+ {
+ // If we have a buffer larger than XMM_REGSIZE_BYTES,
+ // reserve an XMM register to use it for a
+ // series of 16-byte loads and stores.
+ blkNode->gtLsraInfo.internalFloatCount = 1;
+ blkNode->gtLsraInfo.addInternalCandidates(l, l->internalFloatRegCandidates());
+ }
+
+ // If src or dst are on stack, we don't have to generate the address into a register
+ // because it's just some constant+SP
+ if (srcAddrOrFill != nullptr && srcAddrOrFill->OperIsLocalAddr())
+ {
+ MakeSrcContained(blkNode, srcAddrOrFill);
+ }
+
+ if (dstAddr->OperIsLocalAddr())
+ {
+ MakeSrcContained(blkNode, dstAddr);
+ }
+
+ blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindUnroll;
+ }
+ else
+ {
+ blkNode->gtLsraInfo.setInternalCandidates(l, RBM_NONE);
+ dstAddrRegMask = RBM_RDI;
+ sourceRegMask = RBM_RSI;
+ blkSizeRegMask = RBM_RCX;
+ blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindRepInstr;
+ }
+ }
+#ifdef _TARGET_AMD64_
+ else
+ {
+ // In case we have a constant integer this means we went beyond
+ // CPBLK_MOVS_LIMIT bytes of size, still we should never have the case of
+ // any GC-Pointers in the src struct.
+ blkNode->gtLsraInfo.setInternalCandidates(l, RBM_NONE);
+ dstAddrRegMask = RBM_ARG_0;
+ sourceRegMask = RBM_ARG_1;
+ blkSizeRegMask = RBM_ARG_2;
+ blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindHelper;
+ }
+#elif defined(_TARGET_X86_)
+ else
+ {
+ dstAddrRegMask = RBM_RDI;
+ sourceRegMask = RBM_RSI;
+ blkSizeRegMask = RBM_RCX;
+ blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindRepInstr;
+ }
+#endif // _TARGET_X86_
+ assert(blkNode->gtBlkOpKind != GenTreeBlk::BlkOpKindInvalid);
+ }
+ if (dstAddrRegMask != RBM_NONE)
+ {
+ dstAddr->gtLsraInfo.setSrcCandidates(l, dstAddrRegMask);
+ }
+ if (sourceRegMask != RBM_NONE)
+ {
+ if (srcAddrOrFill != nullptr)
+ {
+ srcAddrOrFill->gtLsraInfo.setSrcCandidates(l, sourceRegMask);
+ }
+ else
+ {
+ // This is a local source; we'll use a temp register for its address.
+ blkNode->gtLsraInfo.addInternalCandidates(l, sourceRegMask);
+ blkNode->gtLsraInfo.internalIntCount++;
+ }
+ }
+ if (blkSizeRegMask != RBM_NONE)
+ {
+ if (size != 0)
+ {
+ // Reserve a temp register for the block size argument.
+ blkNode->gtLsraInfo.addInternalCandidates(l, blkSizeRegMask);
+ blkNode->gtLsraInfo.internalIntCount++;
+ }
+ else
+ {
+ // The block size argument is a third argument to GT_STORE_DYN_BLK
+ noway_assert(blkNode->gtOper == GT_STORE_DYN_BLK);
+ blkNode->gtLsraInfo.setSrcCount(3);
+ GenTree* blockSize = blkNode->AsDynBlk()->gtDynamicSize;
+ blockSize->gtLsraInfo.setSrcCandidates(l, blkSizeRegMask);
+ }
+ }
+}
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+//------------------------------------------------------------------------
+// TreeNodeInfoInitPutArgStk: Set the NodeInfo for a GT_PUTARG_STK.
+//
+// Arguments:
+// tree - The node of interest
+//
+// Return Value:
+// None.
+//
+void Lowering::TreeNodeInfoInitPutArgStk(GenTree* tree)
+{
+ TreeNodeInfo* info = &(tree->gtLsraInfo);
+ LinearScan* l = m_lsra;
+
+ if (tree->TypeGet() != TYP_STRUCT)
+ {
+ TreeNodeInfoInitSimple(tree);
+ return;
+ }
+
+ GenTreePutArgStk* putArgStkTree = tree->AsPutArgStk();
+
+ GenTreePtr dst = tree;
+ GenTreePtr src = tree->gtOp.gtOp1;
+ GenTreePtr srcAddr = nullptr;
+
+ if ((src->OperGet() == GT_OBJ) || (src->OperGet() == GT_IND))
+ {
+ srcAddr = src->gtOp.gtOp1;
+ }
+ else
+ {
+ assert(varTypeIsSIMD(tree));
+ }
+ info->srcCount = src->gtLsraInfo.dstCount;
+
+ // If this is a stack variable address,
+ // make the op1 contained, so this way
+ // there is no unnecessary copying between registers.
+ // To avoid assertion, increment the parent's source.
+ // It is recovered below.
+ bool haveLocalAddr = ((srcAddr != nullptr) && (srcAddr->OperIsLocalAddr()));
+ if (haveLocalAddr)
+ {
+ info->srcCount += 1;
+ }
+
+ info->dstCount = 0;
+
+ // In case of a CpBlk we could use a helper call. In case of putarg_stk we
+ // can't do that since the helper call could kill some already set up outgoing args.
+ // TODO-Amd64-Unix: converge the code for putarg_stk with cpyblk/cpyobj.
+ // The cpyXXXX code is rather complex and this could cause it to be more complex, but
+ // it might be the right thing to do.
+
+ // This threshold will decide from using the helper or let the JIT decide to inline
+ // a code sequence of its choice.
+ ssize_t helperThreshold = max(CPBLK_MOVS_LIMIT, CPBLK_UNROLL_LIMIT);
+ ssize_t size = putArgStkTree->gtNumSlots * TARGET_POINTER_SIZE;
+
+ // TODO-X86-CQ: The helper call either is not supported on x86 or required more work
+ // (I don't know which).
+
+ // If we have a buffer between XMM_REGSIZE_BYTES and CPBLK_UNROLL_LIMIT bytes, we'll use SSE2.
+ // Structs and buffer with sizes <= CPBLK_UNROLL_LIMIT bytes are occurring in more than 95% of
+ // our framework assemblies, so this is the main code generation scheme we'll use.
+ if (size <= CPBLK_UNROLL_LIMIT && putArgStkTree->gtNumberReferenceSlots == 0)
+ {
+ // If we have a remainder smaller than XMM_REGSIZE_BYTES, we need an integer temp reg.
+ //
+ // x86 specific note: if the size is odd, the last copy operation would be of size 1 byte.
+ // But on x86 only RBM_BYTE_REGS could be used as byte registers. Therefore, exclude
+ // RBM_NON_BYTE_REGS from internal candidates.
+ if ((size & (XMM_REGSIZE_BYTES - 1)) != 0)
+ {
+ info->internalIntCount++;
+ regMaskTP regMask = l->allRegs(TYP_INT);
+
+#ifdef _TARGET_X86_
+ if ((size % 2) != 0)
+ {
+ regMask &= ~RBM_NON_BYTE_REGS;
+ }
+#endif
+ info->setInternalCandidates(l, regMask);
+ }
+
+ if (size >= XMM_REGSIZE_BYTES)
+ {
+ // If we have a buffer larger than XMM_REGSIZE_BYTES,
+ // reserve an XMM register to use it for a
+ // series of 16-byte loads and stores.
+ info->internalFloatCount = 1;
+ info->addInternalCandidates(l, l->internalFloatRegCandidates());
+ }
+
+ if (haveLocalAddr)
+ {
+ MakeSrcContained(putArgStkTree, srcAddr);
+ }
+
+ // If src or dst are on stack, we don't have to generate the address into a register
+ // because it's just some constant+SP
+ putArgStkTree->gtPutArgStkKind = GenTreePutArgStk::PutArgStkKindUnroll;
+ }
+ else
+ {
+ info->internalIntCount += 3;
+ info->setInternalCandidates(l, (RBM_RDI | RBM_RCX | RBM_RSI));
+ if (haveLocalAddr)
+ {
+ MakeSrcContained(putArgStkTree, srcAddr);
+ }
+
+ putArgStkTree->gtPutArgStkKind = GenTreePutArgStk::PutArgStkKindRepInstr;
+ }
+
+ // Always mark the OBJ and ADDR as contained trees by the putarg_stk. The codegen will deal with this tree.
+ MakeSrcContained(putArgStkTree, src);
+
+ // Balance up the inc above.
+ if (haveLocalAddr)
+ {
+ info->srcCount -= 1;
+ }
+}
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+//------------------------------------------------------------------------
+// TreeNodeInfoInitLclHeap: Set the NodeInfo for a GT_LCLHEAP.
+//
+// Arguments:
+// tree - The node of interest
+//
+// Return Value:
+// None.
+//
+void Lowering::TreeNodeInfoInitLclHeap(GenTree* tree)
+{
+ TreeNodeInfo* info = &(tree->gtLsraInfo);
+ LinearScan* l = m_lsra;
+ Compiler* compiler = comp;
+
+ info->srcCount = 1;
+ info->dstCount = 1;
+
+ // Need a variable number of temp regs (see genLclHeap() in codegenamd64.cpp):
+ // Here '-' means don't care.
+ //
+ // Size? Init Memory? # temp regs
+ // 0 - 0
+ // const and <=6 reg words - 0
+ // const and >6 reg words Yes 0
+ // const and <PageSize No 0 (amd64) 1 (x86)
+ // const and >=PageSize No 2
+ // Non-const Yes 0
+ // Non-const No 2
+
+ GenTreePtr size = tree->gtOp.gtOp1;
+ if (size->IsCnsIntOrI())
+ {
+ MakeSrcContained(tree, size);
+
+ size_t sizeVal = size->gtIntCon.gtIconVal;
+
+ if (sizeVal == 0)
+ {
+ info->internalIntCount = 0;
+ }
+ else
+ {
+ // Compute the amount of memory to properly STACK_ALIGN.
+ // Note: The Gentree node is not updated here as it is cheap to recompute stack aligned size.
+ // This should also help in debugging as we can examine the original size specified with localloc.
+ sizeVal = AlignUp(sizeVal, STACK_ALIGN);
+
+ // For small allocations up to 6 pointer sized words (i.e. 48 bytes of localloc)
+ // we will generate 'push 0'.
+ assert((sizeVal % REGSIZE_BYTES) == 0);
+ size_t cntRegSizedWords = sizeVal / REGSIZE_BYTES;
+ if (cntRegSizedWords <= 6)
+ {
+ info->internalIntCount = 0;
+ }
+ else if (!compiler->info.compInitMem)
+ {
+ // No need to initialize allocated stack space.
+ if (sizeVal < compiler->eeGetPageSize())
+ {
+#ifdef _TARGET_X86_
+ info->internalIntCount = 1; // x86 needs a register here to avoid generating "sub" on ESP.
+#else // !_TARGET_X86_
+ info->internalIntCount = 0;
+#endif // !_TARGET_X86_
+ }
+ else
+ {
+ // We need two registers: regCnt and RegTmp
+ info->internalIntCount = 2;
+ }
+ }
+ else
+ {
+ // >6 and need to zero initialize allocated stack space.
+ info->internalIntCount = 0;
+ }
+ }
+ }
+ else
+ {
+ if (!compiler->info.compInitMem)
+ {
+ info->internalIntCount = 2;
+ }
+ else
+ {
+ info->internalIntCount = 0;
+ }
+ }
+}
+
+//------------------------------------------------------------------------
+// TreeNodeInfoInitLogicalOp: Set the NodeInfo for GT_AND/GT_OR/GT_XOR,
+// as well as GT_ADD/GT_SUB.
+//
+// Arguments:
+// tree - The node of interest
+//
+// Return Value:
+// None.
+//
+void Lowering::TreeNodeInfoInitLogicalOp(GenTree* tree)
+{
+ TreeNodeInfo* info = &(tree->gtLsraInfo);
+ LinearScan* l = m_lsra;
+
+ // We're not marking a constant hanging on the left of the add
+ // as containable so we assign it to a register having CQ impact.
+ // TODO-XArch-CQ: Detect this case and support both generating a single instruction
+ // for GT_ADD(Constant, SomeTree)
+ info->srcCount = 2;
+ info->dstCount = 1;
+
+ GenTree* op1 = tree->gtGetOp1();
+ GenTree* op2 = tree->gtGetOp2();
+
+ // We can directly encode the second operand if it is either a containable constant or a memory-op.
+ // In case of memory-op, we can encode it directly provided its type matches with 'tree' type.
+ // This is because during codegen, type of 'tree' is used to determine emit Type size. If the types
+ // do not match, they get normalized (i.e. sign/zero extended) on load into a register.
+ bool directlyEncodable = false;
+ bool binOpInRMW = false;
+ GenTreePtr operand = nullptr;
+
+ if (IsContainableImmed(tree, op2))
+ {
+ directlyEncodable = true;
+ operand = op2;
+ }
+ else
+ {
+ binOpInRMW = IsBinOpInRMWStoreInd(tree);
+ if (!binOpInRMW)
+ {
+ if (op2->isMemoryOp() && tree->TypeGet() == op2->TypeGet())
+ {
+ directlyEncodable = true;
+ operand = op2;
+ }
+ else if (tree->OperIsCommutative())
+ {
+ if (IsContainableImmed(tree, op1) ||
+ (op1->isMemoryOp() && tree->TypeGet() == op1->TypeGet() && IsSafeToContainMem(tree, op1)))
+ {
+ // If it is safe, we can reverse the order of operands of commutative operations for efficient
+ // codegen
+ directlyEncodable = true;
+ operand = op1;
+ }
+ }
+ }
+ }
+
+ if (directlyEncodable)
+ {
+ assert(operand != nullptr);
+ MakeSrcContained(tree, operand);
+ }
+ else if (!binOpInRMW)
+ {
+ // If this binary op neither has contained operands, nor is a
+ // Read-Modify-Write (RMW) operation, we can mark its operands
+ // as reg optional.
+ SetRegOptionalForBinOp(tree);
+ }
+}
+
+//------------------------------------------------------------------------
+// TreeNodeInfoInitModDiv: Set the NodeInfo for GT_MOD/GT_DIV/GT_UMOD/GT_UDIV.
+//
+// Arguments:
+// tree - The node of interest
+//
+// Return Value:
+// None.
+//
+void Lowering::TreeNodeInfoInitModDiv(GenTree* tree)
+{
+ TreeNodeInfo* info = &(tree->gtLsraInfo);
+ LinearScan* l = m_lsra;
+
+ GenTree* op1 = tree->gtGetOp1();
+ GenTree* op2 = tree->gtGetOp2();
+
+ info->srcCount = 2;
+ info->dstCount = 1;
+
+ switch (tree->OperGet())
+ {
+ case GT_MOD:
+ case GT_DIV:
+ if (varTypeIsFloating(tree->TypeGet()))
+ {
+ // No implicit conversions at this stage as the expectation is that
+ // everything is made explicit by adding casts.
+ assert(op1->TypeGet() == op2->TypeGet());
+
+ if (op2->isMemoryOp() || op2->IsCnsNonZeroFltOrDbl())
+ {
+ MakeSrcContained(tree, op2);
+ }
+ else
+ {
+ // If there are no containable operands, we can make an operand reg optional.
+ // SSE2 allows only op2 to be a memory-op.
+ SetRegOptional(op2);
+ }
+
+ return;
+ }
+ break;
+
+ default:
+ break;
+ }
+
+ // Amd64 Div/Idiv instruction:
+ // Dividend in RAX:RDX and computes
+ // Quotient in RAX, Remainder in RDX
+
+ if (tree->OperGet() == GT_MOD || tree->OperGet() == GT_UMOD)
+ {
+ // We are interested in just the remainder.
+ // RAX is used as a trashable register during computation of remainder.
+ info->setDstCandidates(l, RBM_RDX);
+ }
+ else
+ {
+ // We are interested in just the quotient.
+ // RDX gets used as trashable register during computation of quotient
+ info->setDstCandidates(l, RBM_RAX);
+ }
+
+ // If possible would like to have op1 in RAX to avoid a register move
+ op1->gtLsraInfo.setSrcCandidates(l, RBM_RAX);
+
+ // divisor can be an r/m, but the memory indirection must be of the same size as the divide
+ if (op2->isMemoryOp() && (op2->TypeGet() == tree->TypeGet()))
+ {
+ MakeSrcContained(tree, op2);
+ }
+ else
+ {
+ op2->gtLsraInfo.setSrcCandidates(l, l->allRegs(TYP_INT) & ~(RBM_RAX | RBM_RDX));
+
+ // If there are no containable operands, we can make an operand reg optional.
+ // Div instruction allows only op2 to be a memory op.
+ SetRegOptional(op2);
+ }
+}
+
+//------------------------------------------------------------------------
+// TreeNodeInfoInitIntrinsic: Set the NodeInfo for a GT_INTRINSIC.
+//
+// Arguments:
+// tree - The node of interest
+//
+// Return Value:
+// None.
+//
+void Lowering::TreeNodeInfoInitIntrinsic(GenTree* tree)
+{
+ TreeNodeInfo* info = &(tree->gtLsraInfo);
+ LinearScan* l = m_lsra;
+
+ // Both operand and its result must be of floating point type.
+ GenTree* op1 = tree->gtGetOp1();
+ assert(varTypeIsFloating(op1));
+ assert(op1->TypeGet() == tree->TypeGet());
+
+ info->srcCount = 1;
+ info->dstCount = 1;
+
+ switch (tree->gtIntrinsic.gtIntrinsicId)
+ {
+ case CORINFO_INTRINSIC_Sqrt:
+ if (op1->isMemoryOp() || op1->IsCnsNonZeroFltOrDbl())
+ {
+ MakeSrcContained(tree, op1);
+ }
+ else
+ {
+ // Mark the operand as reg optional since codegen can still
+ // generate code if op1 is on stack.
+ SetRegOptional(op1);
+ }
+ break;
+
+ case CORINFO_INTRINSIC_Abs:
+ // Abs(float x) = x & 0x7fffffff
+ // Abs(double x) = x & 0x7ffffff ffffffff
+
+ // In case of Abs we need an internal register to hold mask.
+
+ // TODO-XArch-CQ: avoid using an internal register for the mask.
+ // Andps or andpd both will operate on 128-bit operands.
+ // The data section constant to hold the mask is a 64-bit size.
+ // Therefore, we need both the operand and mask to be in
+ // xmm register. When we add support in emitter to emit 128-bit
+ // data constants and instructions that operate on 128-bit
+ // memory operands we can avoid the need for an internal register.
+ if (tree->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Abs)
+ {
+ info->internalFloatCount = 1;
+ info->setInternalCandidates(l, l->internalFloatRegCandidates());
+ }
+ break;
+
+#ifdef _TARGET_X86_
+ case CORINFO_INTRINSIC_Cos:
+ case CORINFO_INTRINSIC_Sin:
+ case CORINFO_INTRINSIC_Round:
+ NYI_X86("Math intrinsics Cos, Sin and Round");
+ break;
+#endif // _TARGET_X86_
+
+ default:
+ // Right now only Sqrt/Abs are treated as math intrinsics
+ noway_assert(!"Unsupported math intrinsic");
+ unreached();
+ break;
+ }
+}
+
+#ifdef FEATURE_SIMD
+//------------------------------------------------------------------------
+// TreeNodeInfoInitSIMD: Set the NodeInfo for a GT_SIMD tree.
+//
+// Arguments:
+// tree - The GT_SIMD node of interest
+//
+// Return Value:
+// None.
+
+void Lowering::TreeNodeInfoInitSIMD(GenTree* tree)
+{
+ GenTreeSIMD* simdTree = tree->AsSIMD();
+ TreeNodeInfo* info = &(tree->gtLsraInfo);
+ LinearScan* lsra = m_lsra;
+ info->dstCount = 1;
+ switch (simdTree->gtSIMDIntrinsicID)
+ {
+ GenTree* op2;
+
+ case SIMDIntrinsicInit:
+ {
+ info->srcCount = 1;
+ GenTree* op1 = tree->gtOp.gtOp1;
+
+ // This sets all fields of a SIMD struct to the given value.
+ // Mark op1 as contained if it is either zero or int constant of all 1's,
+ // or a float constant with 16 or 32 byte simdType (AVX case)
+ //
+ // Should never see small int base type vectors except for zero initialization.
+ assert(!varTypeIsSmallInt(simdTree->gtSIMDBaseType) || op1->IsIntegralConst(0));
+
+ if (op1->IsFPZero() || op1->IsIntegralConst(0) ||
+ (varTypeIsIntegral(simdTree->gtSIMDBaseType) && op1->IsIntegralConst(-1)))
+ {
+ MakeSrcContained(tree, tree->gtOp.gtOp1);
+ info->srcCount = 0;
+ }
+ else if ((comp->getSIMDInstructionSet() == InstructionSet_AVX) &&
+ ((simdTree->gtSIMDSize == 16) || (simdTree->gtSIMDSize == 32)))
+ {
+ // Either op1 is a float or dbl constant or an addr
+ if (op1->IsCnsFltOrDbl() || op1->OperIsLocalAddr())
+ {
+ MakeSrcContained(tree, tree->gtOp.gtOp1);
+ info->srcCount = 0;
+ }
+ }
+ }
+ break;
+
+ case SIMDIntrinsicInitN:
+ {
+ info->srcCount = (short)(simdTree->gtSIMDSize / genTypeSize(simdTree->gtSIMDBaseType));
+
+ // Need an internal register to stitch together all the values into a single vector in a SIMD reg.
+ info->internalFloatCount = 1;
+ info->setInternalCandidates(lsra, lsra->allSIMDRegs());
+ }
+ break;
+
+ case SIMDIntrinsicInitArray:
+ // We have an array and an index, which may be contained.
+ info->srcCount = 2;
+ CheckImmedAndMakeContained(tree, tree->gtGetOp2());
+ break;
+
+ case SIMDIntrinsicDiv:
+ // SSE2 has no instruction support for division on integer vectors
+ noway_assert(varTypeIsFloating(simdTree->gtSIMDBaseType));
+ info->srcCount = 2;
+ break;
+
+ case SIMDIntrinsicAbs:
+ // This gets implemented as bitwise-And operation with a mask
+ // and hence should never see it here.
+ unreached();
+ break;
+
+ case SIMDIntrinsicSqrt:
+ // SSE2 has no instruction support for sqrt on integer vectors.
+ noway_assert(varTypeIsFloating(simdTree->gtSIMDBaseType));
+ info->srcCount = 1;
+ break;
+
+ case SIMDIntrinsicAdd:
+ case SIMDIntrinsicSub:
+ case SIMDIntrinsicMul:
+ case SIMDIntrinsicBitwiseAnd:
+ case SIMDIntrinsicBitwiseAndNot:
+ case SIMDIntrinsicBitwiseOr:
+ case SIMDIntrinsicBitwiseXor:
+ case SIMDIntrinsicMin:
+ case SIMDIntrinsicMax:
+ info->srcCount = 2;
+
+ // SSE2 32-bit integer multiplication requires two temp regs
+ if (simdTree->gtSIMDIntrinsicID == SIMDIntrinsicMul && simdTree->gtSIMDBaseType == TYP_INT)
+ {
+ info->internalFloatCount = 2;
+ info->setInternalCandidates(lsra, lsra->allSIMDRegs());
+ }
+ break;
+
+ case SIMDIntrinsicEqual:
+ info->srcCount = 2;
+ break;
+
+ // SSE2 doesn't support < and <= directly on int vectors.
+ // Instead we need to use > and >= with swapped operands.
+ case SIMDIntrinsicLessThan:
+ case SIMDIntrinsicLessThanOrEqual:
+ info->srcCount = 2;
+ noway_assert(!varTypeIsIntegral(simdTree->gtSIMDBaseType));
+ break;
+
+ // SIMDIntrinsicEqual is supported only on non-floating point base type vectors.
+ // SSE2 cmpps/pd doesn't support > and >= directly on float/double vectors.
+ // Instead we need to use < and <= with swapped operands.
+ case SIMDIntrinsicGreaterThan:
+ noway_assert(!varTypeIsFloating(simdTree->gtSIMDBaseType));
+ info->srcCount = 2;
+ break;
+
+ case SIMDIntrinsicOpEquality:
+ case SIMDIntrinsicOpInEquality:
+ // Need two SIMD registers as scratch.
+ // See genSIMDIntrinsicRelOp() for details on code sequence generate and
+ // the need for two scratch registers.
+ info->srcCount = 2;
+ info->internalFloatCount = 2;
+ info->setInternalCandidates(lsra, lsra->allSIMDRegs());
+ break;
+
+ case SIMDIntrinsicDotProduct:
+ if ((comp->getSIMDInstructionSet() == InstructionSet_SSE2) ||
+ (simdTree->gtOp.gtOp1->TypeGet() == TYP_SIMD32))
+ {
+ // For SSE, or AVX with 32-byte vectors, we also need an internal register as scratch.
+ // Further we need the targetReg and internal reg to be distinct registers.
+ // This is achieved by requesting two internal registers; thus one of them
+ // will be different from targetReg.
+ // Note that if this is a TYP_SIMD16 or smaller on AVX, then we don't need a tmpReg.
+ //
+ // See genSIMDIntrinsicDotProduct() for details on code sequence generated and
+ // the need for scratch registers.
+ info->internalFloatCount = 2;
+ info->setInternalCandidates(lsra, lsra->allSIMDRegs());
+ }
+ info->srcCount = 2;
+ break;
+
+ case SIMDIntrinsicGetItem:
+ // This implements get_Item method. The sources are:
+ // - the source SIMD struct
+ // - index (which element to get)
+ // The result is baseType of SIMD struct.
+ info->srcCount = 2;
+ op2 = tree->gtOp.gtOp2;
+
+ // If the index is a constant, mark it as contained.
+ if (CheckImmedAndMakeContained(tree, op2))
+ {
+ info->srcCount = 1;
+ }
+
+ // If the index is not a constant, we will use the SIMD temp location to store the vector.
+ // Otherwise, if the baseType is floating point, the targetReg will be a xmm reg and we
+ // can use that in the process of extracting the element.
+ //
+ // If the index is a constant and base type is a small int we can use pextrw, but on AVX
+ // we will need a temp if are indexing into the upper half of the AVX register.
+ // In all other cases with constant index, we need a temp xmm register to extract the
+ // element if index is other than zero.
+
+ if (!op2->IsCnsIntOrI())
+ {
+ (void)comp->getSIMDInitTempVarNum();
+ }
+ else if (!varTypeIsFloating(simdTree->gtSIMDBaseType))
+ {
+ bool needFloatTemp;
+ if (varTypeIsSmallInt(simdTree->gtSIMDBaseType) &&
+ (comp->getSIMDInstructionSet() == InstructionSet_AVX))
+ {
+ int byteShiftCnt = (int)op2->AsIntCon()->gtIconVal * genTypeSize(simdTree->gtSIMDBaseType);
+ needFloatTemp = (byteShiftCnt >= 16);
+ }
+ else
+ {
+ needFloatTemp = !op2->IsIntegralConst(0);
+ }
+ if (needFloatTemp)
+ {
+ info->internalFloatCount = 1;
+ info->setInternalCandidates(lsra, lsra->allSIMDRegs());
+ }
+ }
+ break;
+
+ case SIMDIntrinsicSetX:
+ case SIMDIntrinsicSetY:
+ case SIMDIntrinsicSetZ:
+ case SIMDIntrinsicSetW:
+ // We need an internal integer register
+ info->srcCount = 2;
+ info->internalIntCount = 1;
+ info->setInternalCandidates(lsra, lsra->allRegs(TYP_INT));
+ break;
+
+ case SIMDIntrinsicCast:
+ info->srcCount = 1;
+ break;
+
+ case SIMDIntrinsicShuffleSSE2:
+ info->srcCount = 2;
+ // Second operand is an integer constant and marked as contained.
+ op2 = tree->gtOp.gtOp2;
+ noway_assert(op2->IsCnsIntOrI());
+ MakeSrcContained(tree, op2);
+ break;
+
+ case SIMDIntrinsicGetX:
+ case SIMDIntrinsicGetY:
+ case SIMDIntrinsicGetZ:
+ case SIMDIntrinsicGetW:
+ case SIMDIntrinsicGetOne:
+ case SIMDIntrinsicGetZero:
+ case SIMDIntrinsicGetCount:
+ case SIMDIntrinsicGetAllOnes:
+ assert(!"Get intrinsics should not be seen during Lowering.");
+ unreached();
+
+ default:
+ noway_assert(!"Unimplemented SIMD node type.");
+ unreached();
+ }
+}
+#endif // FEATURE_SIMD
+
+//------------------------------------------------------------------------
+// TreeNodeInfoInitCast: Set the NodeInfo for a GT_CAST.
+//
+// Arguments:
+// tree - The node of interest
+//
+// Return Value:
+// None.
+//
+void Lowering::TreeNodeInfoInitCast(GenTree* tree)
+{
+ TreeNodeInfo* info = &(tree->gtLsraInfo);
+
+ // TODO-XArch-CQ: Int-To-Int conversions - castOp cannot be a memory op and must have an assigned register.
+ // see CodeGen::genIntToIntCast()
+
+ info->srcCount = 1;
+ info->dstCount = 1;
+
+ // Non-overflow casts to/from float/double are done using SSE2 instructions
+ // and that allow the source operand to be either a reg or memop. Given the
+ // fact that casts from small int to float/double are done as two-level casts,
+ // the source operand is always guaranteed to be of size 4 or 8 bytes.
+ var_types castToType = tree->CastToType();
+ GenTreePtr castOp = tree->gtCast.CastOp();
+ var_types castOpType = castOp->TypeGet();
+ if (tree->gtFlags & GTF_UNSIGNED)
+ {
+ castOpType = genUnsignedType(castOpType);
+ }
+
+ if (!tree->gtOverflow() && (varTypeIsFloating(castToType) || varTypeIsFloating(castOpType)))
+ {
+#ifdef DEBUG
+ // If converting to float/double, the operand must be 4 or 8 byte in size.
+ if (varTypeIsFloating(castToType))
+ {
+ unsigned opSize = genTypeSize(castOpType);
+ assert(opSize == 4 || opSize == 8);
+ }
+#endif // DEBUG
+
+ // U8 -> R8 conversion requires that the operand be in a register.
+ if (castOpType != TYP_ULONG)
+ {
+ if (castOp->isMemoryOp() || castOp->IsCnsNonZeroFltOrDbl())
+ {
+ MakeSrcContained(tree, castOp);
+ }
+ else
+ {
+ // Mark castOp as reg optional to indicate codegen
+ // can still generate code if it is on stack.
+ SetRegOptional(castOp);
+ }
+ }
+ }
+
+#if !defined(_TARGET_64BIT_)
+ if (varTypeIsLong(castOpType))
+ {
+ noway_assert(castOp->OperGet() == GT_LONG);
+ info->srcCount = 2;
+ }
+#endif // !defined(_TARGET_64BIT_)
+
+ // some overflow checks need a temp reg:
+ // - GT_CAST from INT64/UINT64 to UINT32
+ if (tree->gtOverflow() && (castToType == TYP_UINT))
+ {
+ if (genTypeSize(castOpType) == 8)
+ {
+ info->internalIntCount = 1;
+ }
+ }
+}
+
+void Lowering::LowerGCWriteBarrier(GenTree* tree)
+{
+ assert(tree->OperGet() == GT_STOREIND);
+
+ GenTreeStoreInd* dst = tree->AsStoreInd();
+ GenTreePtr addr = dst->Addr();
+ GenTreePtr src = dst->Data();
+
+ if (addr->OperGet() == GT_LEA)
+ {
+ // In the case where we are doing a helper assignment, if the dst
+ // is an indir through an lea, we need to actually instantiate the
+ // lea in a register
+ GenTreeAddrMode* lea = addr->AsAddrMode();
+
+ int leaSrcCount = 0;
+ if (lea->HasBase())
+ {
+ leaSrcCount++;
+ }
+ if (lea->HasIndex())
+ {
+ leaSrcCount++;
+ }
+ lea->gtLsraInfo.srcCount = leaSrcCount;
+ lea->gtLsraInfo.dstCount = 1;
+ }
+
+ bool useOptimizedWriteBarrierHelper = false; // By default, assume no optimized write barriers.
+
+#if NOGC_WRITE_BARRIERS
+
+#if defined(_TARGET_X86_)
+
+ useOptimizedWriteBarrierHelper = true; // On x86, use the optimized write barriers by default.
+#ifdef DEBUG
+ GCInfo::WriteBarrierForm wbf = comp->codeGen->gcInfo.gcIsWriteBarrierCandidate(tree, src);
+ if (wbf == GCInfo::WBF_NoBarrier_CheckNotHeapInDebug) // This one is always a call to a C++ method.
+ {
+ useOptimizedWriteBarrierHelper = false;
+ }
+#endif
+
+ if (useOptimizedWriteBarrierHelper)
+ {
+ // Special write barrier:
+ // op1 (addr) goes into REG_WRITE_BARRIER (rdx) and
+ // op2 (src) goes into any int register.
+ addr->gtLsraInfo.setSrcCandidates(m_lsra, RBM_WRITE_BARRIER);
+ src->gtLsraInfo.setSrcCandidates(m_lsra, RBM_WRITE_BARRIER_SRC);
+ }
+
+#else // !defined(_TARGET_X86_)
+#error "NOGC_WRITE_BARRIERS is not supported"
+#endif // !defined(_TARGET_X86_)
+
+#endif // NOGC_WRITE_BARRIERS
+
+ if (!useOptimizedWriteBarrierHelper)
+ {
+ // For the standard JIT Helper calls:
+ // op1 (addr) goes into REG_ARG_0 and
+ // op2 (src) goes into REG_ARG_1
+ addr->gtLsraInfo.setSrcCandidates(m_lsra, RBM_ARG_0);
+ src->gtLsraInfo.setSrcCandidates(m_lsra, RBM_ARG_1);
+ }
+
+ // Both src and dst must reside in a register, which they should since we haven't set
+ // either of them as contained.
+ assert(addr->gtLsraInfo.dstCount == 1);
+ assert(src->gtLsraInfo.dstCount == 1);
+}
+
+//-----------------------------------------------------------------------------------------
+// Specify register requirements for address expression of an indirection operation.
+//
+// Arguments:
+// indirTree - GT_IND or GT_STOREIND gentree node
+//
+void Lowering::SetIndirAddrOpCounts(GenTreePtr indirTree)
+{
+ assert(indirTree->isIndir());
+ // If this is the rhs of a block copy (i.e. non-enregisterable struct),
+ // it has no register requirements.
+ if (indirTree->TypeGet() == TYP_STRUCT)
+ {
+ return;
+ }
+
+ GenTreePtr addr = indirTree->gtGetOp1();
+ TreeNodeInfo* info = &(indirTree->gtLsraInfo);
+
+ GenTreePtr base = nullptr;
+ GenTreePtr index = nullptr;
+ unsigned mul, cns;
+ bool rev;
+ bool modifiedSources = false;
+
+#ifdef FEATURE_SIMD
+ // If indirTree is of TYP_SIMD12, don't mark addr as contained
+ // so that it always get computed to a register. This would
+ // mean codegen side logic doesn't need to handle all possible
+ // addr expressions that could be contained.
+ //
+ // TODO-XArch-CQ: handle other addr mode expressions that could be marked
+ // as contained.
+ if (indirTree->TypeGet() == TYP_SIMD12)
+ {
+ // Vector3 is read/written as two reads/writes: 8 byte and 4 byte.
+ // To assemble the vector properly we would need an additional
+ // XMM register.
+ info->internalFloatCount = 1;
+
+ // In case of GT_IND we need an internal register different from targetReg and
+ // both of the registers are used at the same time. This achieved by reserving
+ // two internal registers
+ if (indirTree->OperGet() == GT_IND)
+ {
+ (info->internalFloatCount)++;
+ }
+
+ info->setInternalCandidates(m_lsra, m_lsra->allSIMDRegs());
+
+ return;
+ }
+#endif // FEATURE_SIMD
+
+ // These nodes go into an addr mode:
+ // - GT_CLS_VAR_ADDR turns into a constant.
+ // - GT_LCL_VAR_ADDR is a stack addr mode.
+ if ((addr->OperGet() == GT_CLS_VAR_ADDR) || (addr->OperGet() == GT_LCL_VAR_ADDR))
+ {
+ // make this contained, it turns into a constant that goes into an addr mode
+ MakeSrcContained(indirTree, addr);
+ }
+ else if (addr->IsCnsIntOrI() && addr->AsIntConCommon()->FitsInAddrBase(comp) &&
+ addr->gtLsraInfo.getDstCandidates(m_lsra) != RBM_VIRTUAL_STUB_PARAM)
+ {
+ // Amd64:
+ // We can mark any pc-relative 32-bit addr as containable, except for a direct VSD call address.
+ // (i.e. those VSD calls for which stub addr is known during JIT compilation time). In this case,
+ // VM requires us to pass stub addr in REG_VIRTUAL_STUB_PARAM - see LowerVirtualStubCall(). For
+ // that reason we cannot mark such an addr as contained. Note that this is not an issue for
+ // indirect VSD calls since morphArgs() is explicitly materializing hidden param as a non-standard
+ // argument.
+ //
+ // Workaround:
+ // Note that LowerVirtualStubCall() sets addr->gtRegNum to REG_VIRTUAL_STUB_PARAM and Lowering::doPhase()
+ // sets destination candidates on such nodes and resets addr->gtRegNum to REG_NA before calling
+ // TreeNodeInfoInit(). Ideally we should set a flag on addr nodes that shouldn't be marked as contained
+ // (in LowerVirtualStubCall()), but we don't have any GTF_* flags left for that purpose. As a workaround
+ // an explicit check is made here.
+ //
+ // On x86, direct VSD is done via a relative branch, and in fact it MUST be contained.
+ MakeSrcContained(indirTree, addr);
+ }
+ else if ((addr->OperGet() == GT_LEA) && IsSafeToContainMem(indirTree, addr))
+ {
+ GenTreeAddrMode* lea = addr->AsAddrMode();
+ base = lea->Base();
+ index = lea->Index();
+
+ m_lsra->clearOperandCounts(addr);
+ // The srcCount is decremented because addr is now "contained",
+ // then we account for the base and index below, if they are non-null.
+ info->srcCount--;
+ }
+ else if (comp->codeGen->genCreateAddrMode(addr, -1, true, 0, &rev, &base, &index, &mul, &cns, true /*nogen*/) &&
+ !(modifiedSources = AreSourcesPossiblyModifiedLocals(indirTree, base, index)))
+ {
+ // An addressing mode will be constructed that may cause some
+ // nodes to not need a register, and cause others' lifetimes to be extended
+ // to the GT_IND or even its parent if it's an assignment
+
+ assert(base != addr);
+ m_lsra->clearOperandCounts(addr);
+
+ GenTreePtr arrLength = nullptr;
+
+ // Traverse the computation below GT_IND to find the operands
+ // for the addressing mode, marking the various constants and
+ // intermediate results as not consuming/producing.
+ // If the traversal were more complex, we might consider using
+ // a traversal function, but the addressing mode is only made
+ // up of simple arithmetic operators, and the code generator
+ // only traverses one leg of each node.
+
+ bool foundBase = (base == nullptr);
+ bool foundIndex = (index == nullptr);
+ GenTreePtr nextChild = nullptr;
+ for (GenTreePtr child = addr; child != nullptr && !child->OperIsLeaf(); child = nextChild)
+ {
+ nextChild = nullptr;
+ GenTreePtr op1 = child->gtOp.gtOp1;
+ GenTreePtr op2 = (child->OperIsBinary()) ? child->gtOp.gtOp2 : nullptr;
+
+ if (op1 == base)
+ {
+ foundBase = true;
+ }
+ else if (op1 == index)
+ {
+ foundIndex = true;
+ }
+ else
+ {
+ m_lsra->clearOperandCounts(op1);
+ if (!op1->OperIsLeaf())
+ {
+ nextChild = op1;
+ }
+ }
+
+ if (op2 != nullptr)
+ {
+ if (op2 == base)
+ {
+ foundBase = true;
+ }
+ else if (op2 == index)
+ {
+ foundIndex = true;
+ }
+ else
+ {
+ m_lsra->clearOperandCounts(op2);
+ if (!op2->OperIsLeaf())
+ {
+ assert(nextChild == nullptr);
+ nextChild = op2;
+ }
+ }
+ }
+ }
+ assert(foundBase && foundIndex);
+ info->srcCount--; // it gets incremented below.
+ }
+ else if (addr->gtOper == GT_ARR_ELEM)
+ {
+ // The GT_ARR_ELEM consumes all the indices and produces the offset.
+ // The array object lives until the mem access.
+ // We also consume the target register to which the address is
+ // computed
+
+ info->srcCount++;
+ assert(addr->gtLsraInfo.srcCount >= 2);
+ addr->gtLsraInfo.srcCount -= 1;
+ }
+ else
+ {
+ // it is nothing but a plain indir
+ info->srcCount--; // base gets added in below
+ base = addr;
+ }
+
+ if (base != nullptr)
+ {
+ info->srcCount++;
+ }
+
+ if (index != nullptr && !modifiedSources)
+ {
+ info->srcCount++;
+ }
+}
+
+void Lowering::LowerCmp(GenTreePtr tree)
+{
+ TreeNodeInfo* info = &(tree->gtLsraInfo);
+
+ info->srcCount = 2;
+ info->dstCount = 1;
+
+#ifdef _TARGET_X86_
+ info->setDstCandidates(m_lsra, RBM_BYTE_REGS);
+#endif // _TARGET_X86_
+
+ GenTreePtr op1 = tree->gtOp.gtOp1;
+ GenTreePtr op2 = tree->gtOp.gtOp2;
+ var_types op1Type = op1->TypeGet();
+ var_types op2Type = op2->TypeGet();
+
+#if !defined(_TARGET_64BIT_)
+ // Long compares will consume GT_LONG nodes, each of which produces two results.
+ // Thus for each long operand there will be an additional source.
+ // TODO-X86-CQ: Mark hiOp2 and loOp2 as contained if it is a constant or a memory op.
+ if (varTypeIsLong(op1Type))
+ {
+ info->srcCount++;
+ }
+ if (varTypeIsLong(op2Type))
+ {
+ info->srcCount++;
+ }
+#endif // !defined(_TARGET_64BIT_)
+
+ // If either of op1 or op2 is floating point values, then we need to use
+ // ucomiss or ucomisd to compare, both of which support the following form
+ // ucomis[s|d] xmm, xmm/mem. That is only the second operand can be a memory
+ // op.
+ //
+ // Second operand is a memory Op: Note that depending on comparison operator,
+ // the operands of ucomis[s|d] need to be reversed. Therefore, either op1 or
+ // op2 can be a memory op depending on the comparison operator.
+ if (varTypeIsFloating(op1Type))
+ {
+ // The type of the operands has to be the same and no implicit conversions at this stage.
+ assert(op1Type == op2Type);
+
+ bool reverseOps;
+ if ((tree->gtFlags & GTF_RELOP_NAN_UN) != 0)
+ {
+ // Unordered comparison case
+ reverseOps = (tree->gtOper == GT_GT || tree->gtOper == GT_GE);
+ }
+ else
+ {
+ reverseOps = (tree->gtOper == GT_LT || tree->gtOper == GT_LE);
+ }
+
+ GenTreePtr otherOp;
+ if (reverseOps)
+ {
+ otherOp = op1;
+ }
+ else
+ {
+ otherOp = op2;
+ }
+
+ assert(otherOp != nullptr);
+ if (otherOp->IsCnsNonZeroFltOrDbl())
+ {
+ MakeSrcContained(tree, otherOp);
+ }
+ else if (otherOp->isMemoryOp() && ((otherOp == op2) || IsSafeToContainMem(tree, otherOp)))
+ {
+ MakeSrcContained(tree, otherOp);
+ }
+ else
+ {
+ // SSE2 allows only otherOp to be a memory-op. Since otherOp is not
+ // contained, we can mark it reg-optional.
+ SetRegOptional(otherOp);
+ }
+
+ return;
+ }
+
+ // TODO-XArch-CQ: factor out cmp optimization in 'genCondSetFlags' to be used here
+ // or in other backend.
+
+ bool hasShortCast = false;
+ if (CheckImmedAndMakeContained(tree, op2))
+ {
+ bool op1CanBeContained = (op1Type == op2Type);
+ if (!op1CanBeContained)
+ {
+ if (genTypeSize(op1Type) == genTypeSize(op2Type))
+ {
+ // The constant is of the correct size, but we don't have an exact type match
+ // We can treat the isMemoryOp as "contained"
+ op1CanBeContained = true;
+ }
+ }
+
+ // Do we have a short compare against a constant in op2
+ //
+ if (varTypeIsSmall(op1Type))
+ {
+ GenTreeIntCon* con = op2->AsIntCon();
+ ssize_t ival = con->gtIconVal;
+
+ bool isEqualityCompare = (tree->gtOper == GT_EQ || tree->gtOper == GT_NE);
+ bool useTest = isEqualityCompare && (ival == 0);
+
+ if (!useTest)
+ {
+ ssize_t lo = 0; // minimum imm value allowed for cmp reg,imm
+ ssize_t hi = 0; // maximum imm value allowed for cmp reg,imm
+ bool isUnsigned = false;
+
+ switch (op1Type)
+ {
+ case TYP_BOOL:
+ op1Type = TYP_UBYTE;
+ __fallthrough;
+ case TYP_UBYTE:
+ lo = 0;
+ hi = 0x7f;
+ isUnsigned = true;
+ break;
+ case TYP_BYTE:
+ lo = -0x80;
+ hi = 0x7f;
+ break;
+ case TYP_CHAR:
+ lo = 0;
+ hi = 0x7fff;
+ isUnsigned = true;
+ break;
+ case TYP_SHORT:
+ lo = -0x8000;
+ hi = 0x7fff;
+ break;
+ default:
+ unreached();
+ }
+
+ if ((ival >= lo) && (ival <= hi))
+ {
+ // We can perform a small compare with the immediate 'ival'
+ tree->gtFlags |= GTF_RELOP_SMALL;
+ if (isUnsigned && !isEqualityCompare)
+ {
+ tree->gtFlags |= GTF_UNSIGNED;
+ }
+ // We can treat the isMemoryOp as "contained"
+ op1CanBeContained = true;
+ }
+ }
+ }
+
+ if (op1CanBeContained)
+ {
+ if (op1->isMemoryOp())
+ {
+ MakeSrcContained(tree, op1);
+ }
+ else
+ {
+ bool op1IsMadeContained = false;
+
+ // When op1 is a GT_AND we can often generate a single "test" instruction
+ // instead of two instructions (an "and" instruction followed by a "cmp"/"test")
+ //
+ // This instruction can only be used for equality or inequality comparions.
+ // and we must have a compare against zero.
+ //
+ // If we have a postive test for a single bit we can reverse the condition and
+ // make the compare be against zero
+ //
+ // Example:
+ // GT_EQ GT_NE
+ // / \ / \
+ // GT_AND GT_CNS (0x100) ==>> GT_AND GT_CNS (0)
+ // / \ / \
+ // andOp1 GT_CNS (0x100) andOp1 GT_CNS (0x100)
+ //
+ // We will mark the GT_AND node as contained if the tree is a equality compare with zero
+ // Additionally when we do this we also allow for a contained memory operand for "andOp1".
+ //
+ bool isEqualityCompare = (tree->gtOper == GT_EQ || tree->gtOper == GT_NE);
+
+ if (isEqualityCompare && (op1->OperGet() == GT_AND))
+ {
+ GenTreePtr andOp2 = op1->gtOp.gtOp2;
+ if (IsContainableImmed(op1, andOp2))
+ {
+ ssize_t andOp2CnsVal = andOp2->AsIntConCommon()->IconValue();
+ ssize_t relOp2CnsVal = op2->AsIntConCommon()->IconValue();
+
+ if ((relOp2CnsVal == andOp2CnsVal) && isPow2(andOp2CnsVal))
+ {
+ // We have a single bit test, so now we can change the
+ // tree into the alternative form,
+ // so that we can generate a test instruction.
+
+ // Reverse the equality comparison
+ tree->gtOper = (tree->gtOper == GT_EQ) ? GT_NE : GT_EQ;
+
+ // Change the relOp2CnsVal to zero
+ relOp2CnsVal = 0;
+ op2->AsIntConCommon()->SetIconValue(0);
+ }
+
+ // Now do we have a equality compare with zero?
+ //
+ if (relOp2CnsVal == 0)
+ {
+ // Note that child nodes must be made contained before parent nodes
+
+ // Check for a memory operand for op1 with the test instruction
+ //
+ GenTreePtr andOp1 = op1->gtOp.gtOp1;
+ if (andOp1->isMemoryOp())
+ {
+ // If the type of value memoryOp (andOp1) is not the same as the type of constant
+ // (andOp2) check to see whether it is safe to mark AndOp1 as contained. For e.g. in
+ // the following case it is not safe to mark andOp1 as contained
+ // AndOp1 = signed byte and andOp2 is an int constant of value 512.
+ //
+ // If it is safe, we update the type and value of andOp2 to match with andOp1.
+ bool containable = (andOp1->TypeGet() == op1->TypeGet());
+ if (!containable)
+ {
+ ssize_t newIconVal = 0;
+
+ switch (andOp1->TypeGet())
+ {
+ default:
+ break;
+ case TYP_BYTE:
+ newIconVal = (signed char)andOp2CnsVal;
+ containable = FitsIn<signed char>(andOp2CnsVal);
+ break;
+ case TYP_BOOL:
+ case TYP_UBYTE:
+ newIconVal = andOp2CnsVal & 0xFF;
+ containable = true;
+ break;
+ case TYP_SHORT:
+ newIconVal = (signed short)andOp2CnsVal;
+ containable = FitsIn<signed short>(andOp2CnsVal);
+ break;
+ case TYP_CHAR:
+ newIconVal = andOp2CnsVal & 0xFFFF;
+ containable = true;
+ break;
+ case TYP_INT:
+ newIconVal = (INT32)andOp2CnsVal;
+ containable = FitsIn<INT32>(andOp2CnsVal);
+ break;
+ case TYP_UINT:
+ newIconVal = andOp2CnsVal & 0xFFFFFFFF;
+ containable = true;
+ break;
+
+#ifdef _TARGET_64BIT_
+ case TYP_LONG:
+ newIconVal = (INT64)andOp2CnsVal;
+ containable = true;
+ break;
+ case TYP_ULONG:
+ newIconVal = (UINT64)andOp2CnsVal;
+ containable = true;
+ break;
+#endif //_TARGET_64BIT_
+ }
+
+ if (containable)
+ {
+ andOp2->gtType = andOp1->TypeGet();
+ andOp2->AsIntConCommon()->SetIconValue(newIconVal);
+ }
+ }
+
+ // Mark the 'andOp1' memory operand as contained
+ // Note that for equality comparisons we don't need
+ // to deal with any signed or unsigned issues.
+ if (containable)
+ {
+ MakeSrcContained(op1, andOp1);
+ }
+ }
+ // Mark the 'op1' (the GT_AND) operand as contained
+ MakeSrcContained(tree, op1);
+ op1IsMadeContained = true;
+
+ // During Codegen we will now generate "test andOp1, andOp2CnsVal"
+ }
+ }
+ }
+ else if (op1->OperGet() == GT_CAST)
+ {
+ // If the op1 is a cast operation, and cast type is one byte sized unsigned type,
+ // we can directly use the number in register, instead of doing an extra cast step.
+ var_types dstType = op1->CastToType();
+ bool isUnsignedDst = varTypeIsUnsigned(dstType);
+ emitAttr castSize = EA_ATTR(genTypeSize(dstType));
+ GenTreePtr castOp1 = op1->gtOp.gtOp1;
+ genTreeOps castOp1Oper = castOp1->OperGet();
+ bool safeOper = false;
+
+ // It is not always safe to change the gtType of 'castOp1' to TYP_UBYTE
+ // For example when 'castOp1Oper' is a GT_RSZ or GT_RSH then we are shifting
+ // bits from the left into the lower bits. If we change the type to a TYP_UBYTE
+ // we will instead generate a byte sized shift operation: shr al, 24
+ // For the following ALU operations is it safe to change the gtType to the
+ // smaller type:
+ //
+ if ((castOp1Oper == GT_CNS_INT) || (castOp1Oper == GT_CALL) || // the return value from a Call
+ (castOp1Oper == GT_LCL_VAR) || castOp1->OperIsLogical() || // GT_AND, GT_OR, GT_XOR
+ castOp1->isMemoryOp()) // isIndir() || isLclField();
+ {
+ safeOper = true;
+ }
+
+ if ((castSize == EA_1BYTE) && isUnsignedDst && // Unsigned cast to TYP_UBYTE
+ safeOper && // Must be a safe operation
+ !op1->gtOverflow()) // Must not be an overflow checking cast
+ {
+ // Currently all of the Oper accepted as 'safeOper' are
+ // non-overflow checking operations. If we were to add
+ // an overflow checking operation then this assert needs
+ // to be moved above to guard entry to this block.
+ //
+ assert(!castOp1->gtOverflowEx()); // Must not be an overflow checking operation
+
+ GenTreePtr removeTreeNode = op1;
+ tree->gtOp.gtOp1 = castOp1;
+ op1 = castOp1;
+ castOp1->gtType = TYP_UBYTE;
+
+ // trim down the value if castOp1 is an int constant since its type changed to UBYTE.
+ if (castOp1Oper == GT_CNS_INT)
+ {
+ castOp1->gtIntCon.gtIconVal = (UINT8)castOp1->gtIntCon.gtIconVal;
+ }
+
+ if (op2->isContainedIntOrIImmed())
+ {
+ ssize_t val = (ssize_t)op2->AsIntConCommon()->IconValue();
+ if (val >= 0 && val <= 255)
+ {
+ op2->gtType = TYP_UBYTE;
+ tree->gtFlags |= GTF_UNSIGNED;
+
+ // right now the op1's type is the same as op2's type.
+ // if op1 is MemoryOp, we should make the op1 as contained node.
+ if (castOp1->isMemoryOp())
+ {
+ MakeSrcContained(tree, op1);
+ op1IsMadeContained = true;
+ }
+ }
+ }
+
+ BlockRange().Remove(removeTreeNode);
+#ifdef DEBUG
+ if (comp->verbose)
+ {
+ printf("LowerCmp: Removing a GT_CAST to TYP_UBYTE and changing castOp1->gtType to "
+ "TYP_UBYTE\n");
+ comp->gtDispTreeRange(BlockRange(), tree);
+ }
+#endif
+ }
+ }
+
+ // If not made contained, op1 can be marked as reg-optional.
+ if (!op1IsMadeContained)
+ {
+ SetRegOptional(op1);
+ }
+ }
+ }
+ }
+ else if (op1Type == op2Type)
+ {
+ if (op2->isMemoryOp())
+ {
+ MakeSrcContained(tree, op2);
+ }
+ else if (op1->isMemoryOp() && IsSafeToContainMem(tree, op1))
+ {
+ MakeSrcContained(tree, op1);
+ }
+ else
+ {
+ // One of op1 or op2 could be marked as reg optional
+ // to indicate that codgen can still generate code
+ // if one of them is on stack.
+ SetRegOptional(PreferredRegOptionalOperand(tree));
+ }
+
+ if (varTypeIsSmall(op1Type) && varTypeIsUnsigned(op1Type))
+ {
+ // Mark the tree as doing unsigned comparison if
+ // both the operands are small and unsigned types.
+ // Otherwise we will end up performing a signed comparison
+ // of two small unsigned values without zero extending them to
+ // TYP_INT size and which is incorrect.
+ tree->gtFlags |= GTF_UNSIGNED;
+ }
+ }
+}
+
+/* Lower GT_CAST(srcType, DstType) nodes.
+ *
+ * Casts from small int type to float/double are transformed as follows:
+ * GT_CAST(byte, float/double) = GT_CAST(GT_CAST(byte, int32), float/double)
+ * GT_CAST(sbyte, float/double) = GT_CAST(GT_CAST(sbyte, int32), float/double)
+ * GT_CAST(int16, float/double) = GT_CAST(GT_CAST(int16, int32), float/double)
+ * GT_CAST(uint16, float/double) = GT_CAST(GT_CAST(uint16, int32), float/double)
+ *
+ * SSE2 conversion instructions operate on signed integers. casts from Uint32/Uint64
+ * are morphed as follows by front-end and hence should not be seen here.
+ * GT_CAST(uint32, float/double) = GT_CAST(GT_CAST(uint32, long), float/double)
+ * GT_CAST(uint64, float) = GT_CAST(GT_CAST(uint64, double), float)
+ *
+ *
+ * Similarly casts from float/double to a smaller int type are transformed as follows:
+ * GT_CAST(float/double, byte) = GT_CAST(GT_CAST(float/double, int32), byte)
+ * GT_CAST(float/double, sbyte) = GT_CAST(GT_CAST(float/double, int32), sbyte)
+ * GT_CAST(float/double, int16) = GT_CAST(GT_CAST(double/double, int32), int16)
+ * GT_CAST(float/double, uint16) = GT_CAST(GT_CAST(double/double, int32), uint16)
+ *
+ * SSE2 has instructions to convert a float/double vlaue into a signed 32/64-bit
+ * integer. The above transformations help us to leverage those instructions.
+ *
+ * Note that for the following conversions we still depend on helper calls and
+ * don't expect to see them here.
+ * i) GT_CAST(float/double, uint64)
+ * ii) GT_CAST(float/double, int type with overflow detection)
+ *
+ * TODO-XArch-CQ: (Low-pri): Jit64 generates in-line code of 8 instructions for (i) above.
+ * There are hardly any occurrences of this conversion operation in platform
+ * assemblies or in CQ perf benchmarks (1 occurrence in mscorlib, microsoft.jscript,
+ * 1 occurence in Roslyn and no occurrences in system, system.core, system.numerics
+ * system.windows.forms, scimark, fractals, bio mums). If we ever find evidence that
+ * doing this optimization is a win, should consider generating in-lined code.
+ */
+void Lowering::LowerCast(GenTree* tree)
+{
+ assert(tree->OperGet() == GT_CAST);
+
+ GenTreePtr op1 = tree->gtOp.gtOp1;
+ var_types dstType = tree->CastToType();
+ var_types srcType = op1->TypeGet();
+ var_types tmpType = TYP_UNDEF;
+ bool srcUns = false;
+
+ // force the srcType to unsigned if GT_UNSIGNED flag is set
+ if (tree->gtFlags & GTF_UNSIGNED)
+ {
+ srcType = genUnsignedType(srcType);
+ }
+
+ // We should never see the following casts as they are expected to be lowered
+ // apropriately or converted into helper calls by front-end.
+ // srcType = float/double dstType = * and overflow detecting cast
+ // Reason: must be converted to a helper call
+ // srcType = float/double, dstType = ulong
+ // Reason: must be converted to a helper call
+ // srcType = uint dstType = float/double
+ // Reason: uint -> float/double = uint -> long -> float/double
+ // srcType = ulong dstType = float
+ // Reason: ulong -> float = ulong -> double -> float
+ if (varTypeIsFloating(srcType))
+ {
+ noway_assert(!tree->gtOverflow());
+ noway_assert(dstType != TYP_ULONG);
+ }
+ else if (srcType == TYP_UINT)
+ {
+ noway_assert(!varTypeIsFloating(dstType));
+ }
+ else if (srcType == TYP_ULONG)
+ {
+ noway_assert(dstType != TYP_FLOAT);
+ }
+
+ // Case of src is a small type and dst is a floating point type.
+ if (varTypeIsSmall(srcType) && varTypeIsFloating(dstType))
+ {
+ // These conversions can never be overflow detecting ones.
+ noway_assert(!tree->gtOverflow());
+ tmpType = TYP_INT;
+ }
+ // case of src is a floating point type and dst is a small type.
+ else if (varTypeIsFloating(srcType) && varTypeIsSmall(dstType))
+ {
+ tmpType = TYP_INT;
+ }
+
+ if (tmpType != TYP_UNDEF)
+ {
+ GenTreePtr tmp = comp->gtNewCastNode(tmpType, op1, tmpType);
+ tmp->gtFlags |= (tree->gtFlags & (GTF_UNSIGNED | GTF_OVERFLOW | GTF_EXCEPT));
+
+ tree->gtFlags &= ~GTF_UNSIGNED;
+ tree->gtOp.gtOp1 = tmp;
+ BlockRange().InsertAfter(op1, tmp);
+ }
+}
+
+//----------------------------------------------------------------------------------------------
+// Lowering::IsRMWIndirCandidate:
+// Returns true if the given operand is a candidate indirection for a read-modify-write
+// operator.
+//
+// Arguments:
+// operand - The operand to consider.
+// storeInd - The indirect store that roots the possible RMW operator.
+//
+bool Lowering::IsRMWIndirCandidate(GenTree* operand, GenTree* storeInd)
+{
+ // If the operand isn't an indirection, it's trivially not a candidate.
+ if (operand->OperGet() != GT_IND)
+ {
+ return false;
+ }
+
+ // If the indirection's source address isn't equivalent to the destination address of the storeIndir, then the
+ // indirection is not a candidate.
+ GenTree* srcAddr = operand->gtGetOp1();
+ GenTree* dstAddr = storeInd->gtGetOp1();
+ if ((srcAddr->OperGet() != dstAddr->OperGet()) || !IndirsAreEquivalent(operand, storeInd))
+ {
+ return false;
+ }
+
+ // If it is not safe to contain the entire tree rooted at the indirection, then the indirection is not a
+ // candidate. Crawl the IR from the node immediately preceding the storeIndir until the last node in the
+ // indirection's tree is visited and check the side effects at each point.
+
+ m_scratchSideEffects.Clear();
+
+ assert((operand->gtLIRFlags & LIR::Flags::Mark) == 0);
+ operand->gtLIRFlags |= LIR::Flags::Mark;
+
+ unsigned markCount = 1;
+ GenTree* node;
+ for (node = storeInd->gtPrev; markCount > 0; node = node->gtPrev)
+ {
+ assert(node != nullptr);
+
+ if ((node->gtLIRFlags & LIR::Flags::Mark) == 0)
+ {
+ m_scratchSideEffects.AddNode(comp, node);
+ }
+ else
+ {
+ node->gtLIRFlags &= ~LIR::Flags::Mark;
+ markCount--;
+
+ if (m_scratchSideEffects.InterferesWith(comp, node, false))
+ {
+ // The indirection's tree contains some node that can't be moved to the storeInder. The indirection is
+ // not a candidate. Clear any leftover mark bits and return.
+ for (; markCount > 0; node = node->gtPrev)
+ {
+ if ((node->gtLIRFlags & LIR::Flags::Mark) != 0)
+ {
+ node->gtLIRFlags &= ~LIR::Flags::Mark;
+ markCount--;
+ }
+ }
+ return false;
+ }
+
+ for (GenTree* nodeOperand : node->Operands())
+ {
+ assert((nodeOperand->gtLIRFlags & LIR::Flags::Mark) == 0);
+ nodeOperand->gtLIRFlags |= LIR::Flags::Mark;
+ markCount++;
+ }
+ }
+ }
+
+ // At this point we've verified that the operand is an indirection, its address is equivalent to the storeIndir's
+ // destination address, and that it and the transitive closure of its operand can be safely contained by the
+ // storeIndir. This indirection is therefore a candidate for an RMW op.
+ return true;
+}
+
+//----------------------------------------------------------------------------------------------
+// Returns true if this tree is bin-op of a GT_STOREIND of the following form
+// storeInd(subTreeA, binOp(gtInd(subTreeA), subtreeB)) or
+// storeInd(subTreeA, binOp(subtreeB, gtInd(subTreeA)) in case of commutative bin-ops
+//
+// The above form for storeInd represents a read-modify-write memory binary operation.
+//
+// Parameters
+// tree - GentreePtr of binOp
+//
+// Return Value
+// True if 'tree' is part of a RMW memory operation pattern
+//
+bool Lowering::IsBinOpInRMWStoreInd(GenTreePtr tree)
+{
+ // Must be a non floating-point type binary operator since SSE2 doesn't support RMW memory ops
+ assert(!varTypeIsFloating(tree));
+ assert(GenTree::OperIsBinary(tree->OperGet()));
+
+ // Cheap bail out check before more expensive checks are performed.
+ // RMW memory op pattern requires that one of the operands of binOp to be GT_IND.
+ if (tree->gtGetOp1()->OperGet() != GT_IND && tree->gtGetOp2()->OperGet() != GT_IND)
+ {
+ return false;
+ }
+
+ LIR::Use use;
+ if (!BlockRange().TryGetUse(tree, &use) || use.User()->OperGet() != GT_STOREIND || use.User()->gtGetOp2() != tree)
+ {
+ return false;
+ }
+
+ // Since it is not relatively cheap to recognize RMW memory op pattern, we
+ // cache the result in GT_STOREIND node so that while lowering GT_STOREIND
+ // we can use the result.
+ GenTreePtr indirCandidate = nullptr;
+ GenTreePtr indirOpSource = nullptr;
+ return IsRMWMemOpRootedAtStoreInd(use.User(), &indirCandidate, &indirOpSource);
+}
+
+//----------------------------------------------------------------------------------------------
+// This method recognizes the case where we have a treeNode with the following structure:
+// storeInd(IndirDst, binOp(gtInd(IndirDst), indirOpSource)) OR
+// storeInd(IndirDst, binOp(indirOpSource, gtInd(IndirDst)) in case of commutative operations OR
+// storeInd(IndirDst, unaryOp(gtInd(IndirDst)) in case of unary operations
+//
+// Terminology:
+// indirDst = memory write of an addr mode (i.e. storeind destination)
+// indirSrc = value being written to memory (i.e. storeind source which could either be a binary or unary op)
+// indirCandidate = memory read i.e. a gtInd of an addr mode
+// indirOpSource = source operand used in binary/unary op (i.e. source operand of indirSrc node)
+//
+// In x86/x64 this storeInd pattern can be effectively encoded in a single instruction of the
+// following form in case of integer operations:
+// binOp [addressing mode], RegIndirOpSource
+// binOp [addressing mode], immediateVal
+// where RegIndirOpSource is the register where indirOpSource was computed.
+//
+// Right now, we recognize few cases:
+// a) The gtInd child is a lea/lclVar/lclVarAddr/clsVarAddr/constant
+// b) BinOp is either add, sub, xor, or, and, shl, rsh, rsz.
+// c) unaryOp is either not/neg
+//
+// Implementation Note: The following routines need to be in sync for RMW memory op optimization
+// to be correct and functional.
+// IndirsAreEquivalent()
+// NodesAreEquivalentLeaves()
+// Codegen of GT_STOREIND and genCodeForShiftRMW()
+// emitInsRMW()
+//
+// TODO-CQ: Enable support for more complex indirections (if needed) or use the value numbering
+// package to perform more complex tree recognition.
+//
+// TODO-XArch-CQ: Add support for RMW of lcl fields (e.g. lclfield binop= source)
+//
+// Parameters:
+// tree - GT_STOREIND node
+// outIndirCandidate - out param set to indirCandidate as described above
+// ouutIndirOpSource - out param set to indirOpSource as described above
+//
+// Return value
+// True if there is a RMW memory operation rooted at a GT_STOREIND tree
+// and out params indirCandidate and indirOpSource are set to non-null values.
+// Otherwise, returns false with indirCandidate and indirOpSource set to null.
+// Also updates flags of GT_STOREIND tree with its RMW status.
+//
+bool Lowering::IsRMWMemOpRootedAtStoreInd(GenTreePtr tree, GenTreePtr* outIndirCandidate, GenTreePtr* outIndirOpSource)
+{
+ assert(!varTypeIsFloating(tree));
+ assert(outIndirCandidate != nullptr);
+ assert(outIndirOpSource != nullptr);
+
+ *outIndirCandidate = nullptr;
+ *outIndirOpSource = nullptr;
+
+ // Early out if storeInd is already known to be a non-RMW memory op
+ GenTreeStoreInd* storeInd = tree->AsStoreInd();
+ if (storeInd->IsNonRMWMemoryOp())
+ {
+ return false;
+ }
+
+ GenTreePtr indirDst = storeInd->gtGetOp1();
+ GenTreePtr indirSrc = storeInd->gtGetOp2();
+ genTreeOps oper = indirSrc->OperGet();
+
+ // Early out if it is already known to be a RMW memory op
+ if (storeInd->IsRMWMemoryOp())
+ {
+ if (GenTree::OperIsBinary(oper))
+ {
+ if (storeInd->IsRMWDstOp1())
+ {
+ *outIndirCandidate = indirSrc->gtGetOp1();
+ *outIndirOpSource = indirSrc->gtGetOp2();
+ }
+ else
+ {
+ assert(storeInd->IsRMWDstOp2());
+ *outIndirCandidate = indirSrc->gtGetOp2();
+ *outIndirOpSource = indirSrc->gtGetOp1();
+ }
+ assert(IndirsAreEquivalent(*outIndirCandidate, storeInd));
+ }
+ else
+ {
+ assert(GenTree::OperIsUnary(oper));
+ assert(IndirsAreEquivalent(indirSrc->gtGetOp1(), storeInd));
+ *outIndirCandidate = indirSrc->gtGetOp1();
+ *outIndirOpSource = indirSrc->gtGetOp1();
+ }
+
+ return true;
+ }
+
+ // If reached here means that we do not know RMW status of tree rooted at storeInd
+ assert(storeInd->IsRMWStatusUnknown());
+
+ // Early out if indirDst is not one of the supported memory operands.
+ if (indirDst->OperGet() != GT_LEA && indirDst->OperGet() != GT_LCL_VAR && indirDst->OperGet() != GT_LCL_VAR_ADDR &&
+ indirDst->OperGet() != GT_CLS_VAR_ADDR && indirDst->OperGet() != GT_CNS_INT)
+ {
+ storeInd->SetRMWStatus(STOREIND_RMW_UNSUPPORTED_ADDR);
+ return false;
+ }
+
+ // We can not use Read-Modify-Write instruction forms with overflow checking instructions
+ // because we are not allowed to modify the target until after the overflow check.
+ if (indirSrc->gtOverflowEx())
+ {
+ storeInd->SetRMWStatus(STOREIND_RMW_UNSUPPORTED_OPER);
+ return false;
+ }
+
+ // At this point we can match one of two patterns:
+ //
+ // t_ind = indir t_addr_0
+ // ...
+ // t_value = binop t_ind, t_other
+ // ...
+ // storeIndir t_addr_1, t_value
+ //
+ // or
+ //
+ // t_ind = indir t_addr_0
+ // ...
+ // t_value = unop t_ind
+ // ...
+ // storeIndir t_addr_1, t_value
+ //
+ // In all cases, we will eventually make the binop that produces t_value and the entire dataflow tree rooted at
+ // t_ind contained by t_value.
+
+ GenTree* indirCandidate = nullptr;
+ GenTree* indirOpSource = nullptr;
+ RMWStatus status = STOREIND_RMW_STATUS_UNKNOWN;
+ if (GenTree::OperIsBinary(oper))
+ {
+ // Return if binary op is not one of the supported operations for RMW of memory.
+ if (oper != GT_ADD && oper != GT_SUB && oper != GT_AND && oper != GT_OR && oper != GT_XOR &&
+ !GenTree::OperIsShiftOrRotate(oper))
+ {
+ storeInd->SetRMWStatus(STOREIND_RMW_UNSUPPORTED_OPER);
+ return false;
+ }
+
+ if (GenTree::OperIsShiftOrRotate(oper) && varTypeIsSmall(storeInd))
+ {
+ // In ldind, Integer values smaller than 4 bytes, a boolean, or a character converted to 4 bytes
+ // by sign or zero-extension as appropriate. If we directly shift the short type data using sar, we
+ // will lose the sign or zero-extension bits.
+ storeInd->SetRMWStatus(STOREIND_RMW_UNSUPPORTED_TYPE);
+ return false;
+ }
+
+ // In the common case, the second operand to the binop will be the indir candidate.
+ GenTreeOp* binOp = indirSrc->AsOp();
+ if (GenTree::OperIsCommutative(oper) && IsRMWIndirCandidate(binOp->gtOp2, storeInd))
+ {
+ indirCandidate = binOp->gtOp2;
+ indirOpSource = binOp->gtOp1;
+ status = STOREIND_RMW_DST_IS_OP2;
+ }
+ else if (IsRMWIndirCandidate(binOp->gtOp1, storeInd))
+ {
+ indirCandidate = binOp->gtOp1;
+ indirOpSource = binOp->gtOp2;
+ status = STOREIND_RMW_DST_IS_OP1;
+ }
+ else
+ {
+ storeInd->SetRMWStatus(STOREIND_RMW_UNSUPPORTED_ADDR);
+ return false;
+ }
+ }
+ else if (GenTree::OperIsUnary(oper))
+ {
+ // Nodes other than GT_NOT and GT_NEG are not yet supported.
+ if (oper != GT_NOT && oper != GT_NEG)
+ {
+ storeInd->SetRMWStatus(STOREIND_RMW_UNSUPPORTED_OPER);
+ return false;
+ }
+
+ if (indirSrc->gtGetOp1()->OperGet() != GT_IND)
+ {
+ storeInd->SetRMWStatus(STOREIND_RMW_UNSUPPORTED_ADDR);
+ return false;
+ }
+
+ GenTreeUnOp* unOp = indirSrc->AsUnOp();
+ if (IsRMWIndirCandidate(unOp->gtOp1, storeInd))
+ {
+ // src and dest are the same in case of unary ops
+ indirCandidate = unOp->gtOp1;
+ indirOpSource = unOp->gtOp1;
+ status = STOREIND_RMW_DST_IS_OP1;
+ }
+ else
+ {
+ storeInd->SetRMWStatus(STOREIND_RMW_UNSUPPORTED_ADDR);
+ return false;
+ }
+ }
+ else
+ {
+ storeInd->SetRMWStatus(STOREIND_RMW_UNSUPPORTED_OPER);
+ return false;
+ }
+
+ // By this point we've verified that we have a supported operand with a supported address. Now we need to ensure
+ // that we're able to move the destination address for the source indirection forwards.
+ if (!IsSafeToContainMem(storeInd, indirDst))
+ {
+ storeInd->SetRMWStatus(STOREIND_RMW_UNSUPPORTED_ADDR);
+ return false;
+ }
+
+ assert(indirCandidate != nullptr);
+ assert(indirOpSource != nullptr);
+ assert(status != STOREIND_RMW_STATUS_UNKNOWN);
+
+ *outIndirCandidate = indirCandidate;
+ *outIndirOpSource = indirOpSource;
+ storeInd->SetRMWStatus(status);
+ return true;
+}
+
+//--------------------------------------------------------------------------------------------
+// SetStoreIndOpCountsIfRMWMemOp checks to see if there is a RMW memory operation rooted at
+// GT_STOREIND node and if so will mark register requirements for nodes under storeInd so
+// that CodeGen will generate a single instruction of the form:
+//
+// binOp [addressing mode], reg
+//
+// Parameters
+// storeInd - GT_STOREIND node
+//
+// Return value
+// True, if RMW memory op tree pattern is recognized and op counts are set.
+// False otherwise.
+//
+bool Lowering::SetStoreIndOpCountsIfRMWMemOp(GenTreePtr storeInd)
+{
+ assert(storeInd->OperGet() == GT_STOREIND);
+
+ // SSE2 doesn't support RMW on float values
+ assert(!varTypeIsFloating(storeInd));
+
+ // Terminology:
+ // indirDst = memory write of an addr mode (i.e. storeind destination)
+ // indirSrc = value being written to memory (i.e. storeind source which could a binary/unary op)
+ // indirCandidate = memory read i.e. a gtInd of an addr mode
+ // indirOpSource = source operand used in binary/unary op (i.e. source operand of indirSrc node)
+
+ GenTreePtr indirCandidate = nullptr;
+ GenTreePtr indirOpSource = nullptr;
+
+ if (!IsRMWMemOpRootedAtStoreInd(storeInd, &indirCandidate, &indirOpSource))
+ {
+ JITDUMP("Lower of StoreInd didn't mark the node as self contained for reason: %d\n",
+ storeInd->AsStoreInd()->GetRMWStatus());
+ DISPTREERANGE(BlockRange(), storeInd);
+ return false;
+ }
+
+ GenTreePtr indirDst = storeInd->gtGetOp1();
+ GenTreePtr indirSrc = storeInd->gtGetOp2();
+ genTreeOps oper = indirSrc->OperGet();
+
+ // At this point we have successfully detected a RMW memory op of one of the following forms
+ // storeInd(indirDst, indirSrc(indirCandidate, indirOpSource)) OR
+ // storeInd(indirDst, indirSrc(indirOpSource, indirCandidate) in case of commutative operations OR
+ // storeInd(indirDst, indirSrc(indirCandidate) in case of unary operations
+ //
+ // Here indirSrc = one of the supported binary or unary operation for RMW of memory
+ // indirCandidate = a GT_IND node
+ // indirCandidateChild = operand of GT_IND indirCandidate
+ //
+ // The logic below essentially does the following
+ // set storeInd src count to that of the dst count of indirOpSource
+ // clear operand counts on indirSrc (i.e. marked as contained and storeInd will generate code for it)
+ // clear operand counts on indirCandidate
+ // clear operand counts on indirDst except when it is a GT_LCL_VAR or GT_CNS_INT that doesn't fit within addr
+ // base
+ // Increment src count of storeInd to account for the registers required to form indirDst addr mode
+ // clear operand counts on indirCandidateChild
+
+ TreeNodeInfo* info = &(storeInd->gtLsraInfo);
+ info->dstCount = 0;
+
+ if (GenTree::OperIsBinary(oper))
+ {
+ // On Xarch RMW operations require that the source memory-op be in a register.
+ assert(!indirOpSource->isMemoryOp() || indirOpSource->gtLsraInfo.dstCount == 1);
+ JITDUMP("Lower succesfully detected an assignment of the form: *addrMode BinOp= source\n");
+ info->srcCount = indirOpSource->gtLsraInfo.dstCount;
+ }
+ else
+ {
+ assert(GenTree::OperIsUnary(oper));
+ JITDUMP("Lower succesfully detected an assignment of the form: *addrMode = UnaryOp(*addrMode)\n");
+ info->srcCount = 0;
+ }
+ DISPTREERANGE(BlockRange(), storeInd);
+
+ m_lsra->clearOperandCounts(indirSrc);
+ m_lsra->clearOperandCounts(indirCandidate);
+
+ GenTreePtr indirCandidateChild = indirCandidate->gtGetOp1();
+ if (indirCandidateChild->OperGet() == GT_LEA)
+ {
+ GenTreeAddrMode* addrMode = indirCandidateChild->AsAddrMode();
+
+ if (addrMode->HasBase())
+ {
+ assert(addrMode->Base()->OperIsLeaf());
+ m_lsra->clearOperandCounts(addrMode->Base());
+ info->srcCount++;
+ }
+
+ if (addrMode->HasIndex())
+ {
+ assert(addrMode->Index()->OperIsLeaf());
+ m_lsra->clearOperandCounts(addrMode->Index());
+ info->srcCount++;
+ }
+
+ m_lsra->clearOperandCounts(indirDst);
+ }
+ else
+ {
+ assert(indirCandidateChild->OperGet() == GT_LCL_VAR || indirCandidateChild->OperGet() == GT_LCL_VAR_ADDR ||
+ indirCandidateChild->OperGet() == GT_CLS_VAR_ADDR || indirCandidateChild->OperGet() == GT_CNS_INT);
+
+ // If it is a GT_LCL_VAR, it still needs the reg to hold the address.
+ // We would still need a reg for GT_CNS_INT if it doesn't fit within addressing mode base.
+ // For GT_CLS_VAR_ADDR, we don't need a reg to hold the address, because field address value is known at jit
+ // time. Also, we don't need a reg for GT_CLS_VAR_ADDR.
+ if (indirCandidateChild->OperGet() == GT_LCL_VAR_ADDR || indirCandidateChild->OperGet() == GT_CLS_VAR_ADDR)
+ {
+ m_lsra->clearOperandCounts(indirDst);
+ }
+ else if (indirCandidateChild->IsCnsIntOrI() && indirCandidateChild->AsIntConCommon()->FitsInAddrBase(comp))
+ {
+ m_lsra->clearOperandCounts(indirDst);
+ }
+ else
+ {
+ // Need a reg and hence increment src count of storeind
+ info->srcCount += indirCandidateChild->gtLsraInfo.dstCount;
+ }
+ }
+ m_lsra->clearOperandCounts(indirCandidateChild);
+
+ return true;
+}
+
+/**
+ * Takes care of annotating the src and dst register
+ * requirements for a GT_MUL treenode.
+ */
+void Lowering::SetMulOpCounts(GenTreePtr tree)
+{
+ assert(tree->OperGet() == GT_MUL || tree->OperGet() == GT_MULHI);
+
+ TreeNodeInfo* info = &(tree->gtLsraInfo);
+
+ info->srcCount = 2;
+ info->dstCount = 1;
+
+ GenTreePtr op1 = tree->gtOp.gtOp1;
+ GenTreePtr op2 = tree->gtOp.gtOp2;
+
+ // Case of float/double mul.
+ if (varTypeIsFloating(tree->TypeGet()))
+ {
+ assert(tree->OperGet() == GT_MUL);
+
+ if (op2->isMemoryOp() || op2->IsCnsNonZeroFltOrDbl())
+ {
+ MakeSrcContained(tree, op2);
+ }
+ else if (op1->IsCnsNonZeroFltOrDbl() || (op1->isMemoryOp() && IsSafeToContainMem(tree, op1)))
+ {
+ // Since GT_MUL is commutative, we will try to re-order operands if it is safe to
+ // generate more efficient code sequence for the case of GT_MUL(op1=memOp, op2=non-memOp)
+ MakeSrcContained(tree, op1);
+ }
+ else
+ {
+ // If there are no containable operands, we can make an operand reg optional.
+ SetRegOptionalForBinOp(tree);
+ }
+ return;
+ }
+
+ bool isUnsignedMultiply = ((tree->gtFlags & GTF_UNSIGNED) != 0);
+ bool requiresOverflowCheck = tree->gtOverflowEx();
+ bool useLeaEncoding = false;
+ GenTreePtr memOp = nullptr;
+
+ bool hasImpliedFirstOperand = false;
+ GenTreeIntConCommon* imm = nullptr;
+ GenTreePtr other = nullptr;
+
+ // There are three forms of x86 multiply:
+ // one-op form: RDX:RAX = RAX * r/m
+ // two-op form: reg *= r/m
+ // three-op form: reg = r/m * imm
+
+ // This special widening 32x32->64 MUL is not used on x64
+ assert((tree->gtFlags & GTF_MUL_64RSLT) == 0);
+
+ // Multiply should never be using small types
+ assert(!varTypeIsSmall(tree->TypeGet()));
+
+ // We do use the widening multiply to implement
+ // the overflow checking for unsigned multiply
+ //
+ if (isUnsignedMultiply && requiresOverflowCheck)
+ {
+ // The only encoding provided is RDX:RAX = RAX * rm
+ //
+ // Here we set RAX as the only destination candidate
+ // In LSRA we set the kill set for this operation to RBM_RAX|RBM_RDX
+ //
+ info->setDstCandidates(m_lsra, RBM_RAX);
+ hasImpliedFirstOperand = true;
+ }
+ else if (tree->gtOper == GT_MULHI)
+ {
+ // have to use the encoding:RDX:RAX = RAX * rm
+ info->setDstCandidates(m_lsra, RBM_RAX);
+ hasImpliedFirstOperand = true;
+ }
+ else if (IsContainableImmed(tree, op2) || IsContainableImmed(tree, op1))
+ {
+ if (IsContainableImmed(tree, op2))
+ {
+ imm = op2->AsIntConCommon();
+ other = op1;
+ }
+ else
+ {
+ imm = op1->AsIntConCommon();
+ other = op2;
+ }
+
+ // CQ: We want to rewrite this into a LEA
+ ssize_t immVal = imm->AsIntConCommon()->IconValue();
+ if (!requiresOverflowCheck && (immVal == 3 || immVal == 5 || immVal == 9))
+ {
+ useLeaEncoding = true;
+ }
+
+ MakeSrcContained(tree, imm); // The imm is always contained
+ if (other->isMemoryOp())
+ {
+ memOp = other; // memOp may be contained below
+ }
+ }
+
+ // We allow one operand to be a contained memory operand.
+ // The memory op type must match with the 'tree' type.
+ // This is because during codegen we use 'tree' type to derive EmitTypeSize.
+ // E.g op1 type = byte, op2 type = byte but GT_MUL tree type is int.
+ //
+ if (memOp == nullptr && op2->isMemoryOp())
+ {
+ memOp = op2;
+ }
+
+ // To generate an LEA we need to force memOp into a register
+ // so don't allow memOp to be 'contained'
+ //
+ if (!useLeaEncoding)
+ {
+ if ((memOp != nullptr) && (memOp->TypeGet() == tree->TypeGet()) && IsSafeToContainMem(tree, memOp))
+ {
+ MakeSrcContained(tree, memOp);
+ }
+ else if (imm != nullptr)
+ {
+ // Has a contained immediate operand.
+ // Only 'other' operand can be marked as reg optional.
+ assert(other != nullptr);
+ SetRegOptional(other);
+ }
+ else if (hasImpliedFirstOperand)
+ {
+ // Only op2 can be marke as reg optional.
+ SetRegOptional(op2);
+ }
+ else
+ {
+ // If there are no containable operands, we can make either of op1 or op2
+ // as reg optional.
+ SetRegOptionalForBinOp(tree);
+ }
+ }
+}
+
+//------------------------------------------------------------------------------
+// isRMWRegOper: Can this binary tree node be used in a Read-Modify-Write format
+//
+// Arguments:
+// tree - a binary tree node
+//
+// Return Value:
+// Returns true if we can use the read-modify-write instruction form
+//
+// Notes:
+// This is used to determine whether to preference the source to the destination register.
+//
+bool Lowering::isRMWRegOper(GenTreePtr tree)
+{
+ // TODO-XArch-CQ: Make this more accurate.
+ // For now, We assume that most binary operators are of the RMW form.
+ assert(tree->OperIsBinary());
+
+ if (tree->OperIsCompare())
+ {
+ return false;
+ }
+
+ switch (tree->OperGet())
+ {
+ // These Opers either support a three op form (i.e. GT_LEA), or do not read/write their first operand
+ case GT_LEA:
+ case GT_STOREIND:
+ case GT_ARR_INDEX:
+ case GT_STORE_BLK:
+ case GT_STORE_OBJ:
+ return false;
+
+ // x86/x64 does support a three op multiply when op2|op1 is a contained immediate
+ case GT_MUL:
+ return (!IsContainableImmed(tree, tree->gtOp.gtOp2) && !IsContainableImmed(tree, tree->gtOp.gtOp1));
+
+ default:
+ return true;
+ }
+}
+
+// anything is in range for AMD64
+bool Lowering::IsCallTargetInRange(void* addr)
+{
+ return true;
+}
+
+// return true if the immediate can be folded into an instruction, for example small enough and non-relocatable
+bool Lowering::IsContainableImmed(GenTree* parentNode, GenTree* childNode)
+{
+ if (!childNode->IsIntCnsFitsInI32())
+ {
+ return false;
+ }
+
+ // At this point we know that it is an int const fits within 4-bytes and hence can safely cast to IntConCommon.
+ // Icons that need relocation should never be marked as contained immed
+ if (childNode->AsIntConCommon()->ImmedValNeedsReloc(comp))
+ {
+ return false;
+ }
+
+ return true;
+}
+
+//-----------------------------------------------------------------------
+// PreferredRegOptionalOperand: returns one of the operands of given
+// binary oper that is to be preferred for marking as reg optional.
+//
+// Since only one of op1 or op2 can be a memory operand on xarch, only
+// one of them have to be marked as reg optional. Since Lower doesn't
+// know apriori which of op1 or op2 is not likely to get a register, it
+// has to make a guess. This routine encapsulates heuristics that
+// guess whether it is likely to be beneficial to mark op1 or op2 as
+// reg optional.
+//
+//
+// Arguments:
+// tree - a binary-op tree node that is either commutative
+// or a compare oper.
+//
+// Returns:
+// Returns op1 or op2 of tree node that is preferred for
+// marking as reg optional.
+//
+// Note: if the tree oper is neither commutative nor a compare oper
+// then only op2 can be reg optional on xarch and hence no need to
+// call this routine.
+GenTree* Lowering::PreferredRegOptionalOperand(GenTree* tree)
+{
+ assert(GenTree::OperIsBinary(tree->OperGet()));
+ assert(tree->OperIsCommutative() || tree->OperIsCompare());
+
+ GenTree* op1 = tree->gtGetOp1();
+ GenTree* op2 = tree->gtGetOp2();
+ GenTree* preferredOp = nullptr;
+
+ // This routine uses the following heuristics:
+ //
+ // a) If both are tracked locals, marking the one with lower weighted
+ // ref count as reg-optional would likely be beneficial as it has
+ // higher probability of not getting a register.
+ //
+ // b) op1 = tracked local and op2 = untracked local: LSRA creates two
+ // ref positions for op2: a def and use position. op2's def position
+ // requires a reg and it is allocated a reg by spilling another
+ // interval (if required) and that could be even op1. For this reason
+ // it is beneficial to mark op1 as reg optional.
+ //
+ // TODO: It is not always mandatory for a def position of an untracked
+ // local to be allocated a register if it is on rhs of an assignment
+ // and its use position is reg-optional and has not been assigned a
+ // register. Reg optional def positions is currently not yet supported.
+ //
+ // c) op1 = untracked local and op2 = tracked local: marking op1 as
+ // reg optional is beneficial, since its use position is less likely
+ // to get a register.
+ //
+ // d) If both are untracked locals (i.e. treated like tree temps by
+ // LSRA): though either of them could be marked as reg optional,
+ // marking op1 as reg optional is likely to be beneficial because
+ // while allocating op2's def position, there is a possibility of
+ // spilling op1's def and in which case op1 is treated as contained
+ // memory operand rather than requiring to reload.
+ //
+ // e) If only one of them is a local var, prefer to mark it as
+ // reg-optional. This is heuristic is based on the results
+ // obtained against CQ perf benchmarks.
+ //
+ // f) If neither of them are local vars (i.e. tree temps), prefer to
+ // mark op1 as reg optional for the same reason as mentioned in (d) above.
+ if (op1->OperGet() == GT_LCL_VAR && op2->OperGet() == GT_LCL_VAR)
+ {
+ LclVarDsc* v1 = comp->lvaTable + op1->AsLclVarCommon()->GetLclNum();
+ LclVarDsc* v2 = comp->lvaTable + op2->AsLclVarCommon()->GetLclNum();
+
+ if (v1->lvTracked && v2->lvTracked)
+ {
+ // Both are tracked locals. The one with lower weight is less likely
+ // to get a register and hence beneficial to mark the one with lower
+ // weight as reg optional.
+ if (v1->lvRefCntWtd < v2->lvRefCntWtd)
+ {
+ preferredOp = op1;
+ }
+ else
+ {
+ preferredOp = op2;
+ }
+ }
+ else if (v2->lvTracked)
+ {
+ // v1 is an untracked lcl and it is use position is less likely to
+ // get a register.
+ preferredOp = op1;
+ }
+ else if (v1->lvTracked)
+ {
+ // v2 is an untracked lcl and its def position always
+ // needs a reg. Hence it is better to mark v1 as
+ // reg optional.
+ preferredOp = op1;
+ }
+ else
+ {
+ preferredOp = op1;
+ ;
+ }
+ }
+ else if (op1->OperGet() == GT_LCL_VAR)
+ {
+ preferredOp = op1;
+ }
+ else if (op2->OperGet() == GT_LCL_VAR)
+ {
+ preferredOp = op2;
+ }
+ else
+ {
+ // Neither of the operands is a local, prefer marking
+ // operand that is evaluated first as reg optional
+ // since its use position is less likely to get a register.
+ bool reverseOps = ((tree->gtFlags & GTF_REVERSE_OPS) != 0);
+ preferredOp = reverseOps ? op2 : op1;
+ }
+
+ return preferredOp;
+}
+
+#endif // _TARGET_XARCH_
+
+#endif // !LEGACY_BACKEND
diff --git a/src/jit/lsra.cpp b/src/jit/lsra.cpp
new file mode 100644
index 0000000000..317b976e42
--- /dev/null
+++ b/src/jit/lsra.cpp
@@ -0,0 +1,11578 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+
+ Linear Scan Register Allocation
+
+ a.k.a. LSRA
+
+ Preconditions
+ - All register requirements are expressed in the code stream, either as destination
+ registers of tree nodes, or as internal registers. These requirements are
+ expressed in the TreeNodeInfo (gtLsraInfo) on each node, which includes:
+ - The number of register sources and destinations.
+ - The register restrictions (candidates) of the target register, both from itself,
+ as producer of the value (dstCandidates), and from its consuming node (srcCandidates).
+ Note that the srcCandidates field of TreeNodeInfo refers to the destination register
+ (not any of its sources).
+ - The number (internalCount) of registers required, and their register restrictions (internalCandidates).
+ These are neither inputs nor outputs of the node, but used in the sequence of code generated for the tree.
+ "Internal registers" are registers used during the code sequence generated for the node.
+ The register lifetimes must obey the following lifetime model:
+ - First, any internal registers are defined.
+ - Next, any source registers are used (and are then freed if they are last use and are not identified as
+ "delayRegFree").
+ - Next, the internal registers are used (and are then freed).
+ - Next, any registers in the kill set for the instruction are killed.
+ - Next, the destination register(s) are defined (multiple destination registers are only supported on ARM)
+ - Finally, any "delayRegFree" source registers are freed.
+ There are several things to note about this order:
+ - The internal registers will never overlap any use, but they may overlap a destination register.
+ - Internal registers are never live beyond the node.
+ - The "delayRegFree" annotation is used for instructions that are only available in a Read-Modify-Write form.
+ That is, the destination register is one of the sources. In this case, we must not use the same register for
+ the non-RMW operand as for the destination.
+
+ Overview (doLinearScan):
+ - Walk all blocks, building intervals and RefPositions (buildIntervals)
+ - Traverse the RefPositions, marking last uses (setLastUses)
+ - Note that this is necessary because the execution order doesn't accurately reflect use order.
+ There is a "TODO-Throughput" to eliminate this.
+ - Allocate registers (allocateRegisters)
+ - Annotate nodes with register assignments (resolveRegisters)
+ - Add move nodes as needed to resolve conflicting register
+ assignments across non-adjacent edges. (resolveEdges, called from resolveRegisters)
+
+ Postconditions:
+
+ Tree nodes (GenTree):
+ - GenTree::gtRegNum (and gtRegPair for ARM) is annotated with the register
+ assignment for a node. If the node does not require a register, it is
+ annotated as such (for single registers, gtRegNum = REG_NA; for register
+ pair type, gtRegPair = REG_PAIR_NONE). For a variable definition or interior
+ tree node (an "implicit" definition), this is the register to put the result.
+ For an expression use, this is the place to find the value that has previously
+ been computed.
+ - In most cases, this register must satisfy the constraints specified by the TreeNodeInfo.
+ - In some cases, this is difficult:
+ - If a lclVar node currently lives in some register, it may not be desirable to move it
+ (i.e. its current location may be desirable for future uses, e.g. if it's a callee save register,
+ but needs to be in a specific arg register for a call).
+ - In other cases there may be conflicts on the restrictions placed by the defining node and the node which
+ consumes it
+ - If such a node is constrained to a single fixed register (e.g. an arg register, or a return from a call),
+ then LSRA is free to annotate the node with a different register. The code generator must issue the appropriate
+ move.
+ - However, if such a node is constrained to a set of registers, and its current location does not satisfy that
+ requirement, LSRA must insert a GT_COPY node between the node and its parent. The gtRegNum on the GT_COPY node
+ must satisfy the register requirement of the parent.
+ - GenTree::gtRsvdRegs has a set of registers used for internal temps.
+ - A tree node is marked GTF_SPILL if the tree node must be spilled by the code generator after it has been
+ evaluated.
+ - LSRA currently does not set GTF_SPILLED on such nodes, because it caused problems in the old code generator.
+ In the new backend perhaps this should change (see also the note below under CodeGen).
+ - A tree node is marked GTF_SPILLED if it is a lclVar that must be reloaded prior to use.
+ - The register (gtRegNum) on the node indicates the register to which it must be reloaded.
+ - For lclVar nodes, since the uses and defs are distinct tree nodes, it is always possible to annotate the node
+ with the register to which the variable must be reloaded.
+ - For other nodes, since they represent both the def and use, if the value must be reloaded to a different
+ register, LSRA must insert a GT_RELOAD node in order to specify the register to which it should be reloaded.
+
+ Local variable table (LclVarDsc):
+ - LclVarDsc::lvRegister is set to true if a local variable has the
+ same register assignment for its entire lifetime.
+ - LclVarDsc::lvRegNum / lvOtherReg: these are initialized to their
+ first value at the end of LSRA (it looks like lvOtherReg isn't?
+ This is probably a bug (ARM)). Codegen will set them to their current value
+ as it processes the trees, since a variable can (now) be assigned different
+ registers over its lifetimes.
+
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+#ifndef LEGACY_BACKEND // This file is ONLY used for the RyuJIT backend that uses the linear scan register allocator
+
+#include "lsra.h"
+
+#ifdef DEBUG
+const char* LinearScan::resolveTypeName[] = {"Split", "Join", "Critical", "SharedCritical"};
+#endif // DEBUG
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX Small Helper functions XX
+XX XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+//--------------------------------------------------------------
+// lsraAssignRegToTree: Assign the given reg to tree node.
+//
+// Arguments:
+// tree - Gentree node
+// reg - register to be assigned
+// regIdx - register idx, if tree is a multi-reg call node.
+// regIdx will be zero for single-reg result producing tree nodes.
+//
+// Return Value:
+// None
+//
+void lsraAssignRegToTree(GenTreePtr tree, regNumber reg, unsigned regIdx)
+{
+ if (regIdx == 0)
+ {
+ tree->gtRegNum = reg;
+ }
+ else
+ {
+ assert(tree->IsMultiRegCall());
+ GenTreeCall* call = tree->AsCall();
+ call->SetRegNumByIdx(reg, regIdx);
+ }
+}
+
+//-------------------------------------------------------------
+// getWeight: Returns the weight of the RefPosition.
+//
+// Arguments:
+// refPos - ref position
+//
+// Returns:
+// Weight of ref position.
+unsigned LinearScan::getWeight(RefPosition* refPos)
+{
+ unsigned weight;
+ GenTreePtr treeNode = refPos->treeNode;
+
+ if (treeNode != nullptr)
+ {
+ if (isCandidateLocalRef(treeNode))
+ {
+ // Tracked locals: use weighted ref cnt as the weight of the
+ // ref position.
+ GenTreeLclVarCommon* lclCommon = treeNode->AsLclVarCommon();
+ LclVarDsc* varDsc = &(compiler->lvaTable[lclCommon->gtLclNum]);
+ weight = varDsc->lvRefCntWtd;
+ }
+ else
+ {
+ // Non-candidate local ref or non-lcl tree node.
+ // These are considered to have two references in the basic block:
+ // a def and a use and hence weighted ref count is 2 times
+ // the basic block weight in which they appear.
+ weight = 2 * this->blockInfo[refPos->bbNum].weight;
+ }
+ }
+ else
+ {
+ // Non-tree node ref positions. These will have a single
+ // reference in the basic block and hence their weighted
+ // refcount is equal to the block weight in which they
+ // appear.
+ weight = this->blockInfo[refPos->bbNum].weight;
+ }
+
+ return weight;
+}
+
+// allRegs represents a set of registers that can
+// be used to allocate the specified type in any point
+// in time (more of a 'bank' of registers).
+regMaskTP LinearScan::allRegs(RegisterType rt)
+{
+ if (rt == TYP_FLOAT)
+ {
+ return availableFloatRegs;
+ }
+ else if (rt == TYP_DOUBLE)
+ {
+ return availableDoubleRegs;
+#ifdef FEATURE_SIMD
+ // TODO-Cleanup: Add an RBM_ALLSIMD
+ }
+ else if (varTypeIsSIMD(rt))
+ {
+ return availableDoubleRegs;
+#endif // FEATURE_SIMD
+ }
+ else
+ {
+ return availableIntRegs;
+ }
+}
+
+//--------------------------------------------------------------------------
+// allMultiRegCallNodeRegs: represents a set of registers that can be used
+// to allocate a multi-reg call node.
+//
+// Arguments:
+// call - Multi-reg call node
+//
+// Return Value:
+// Mask representing the set of available registers for multi-reg call
+// node.
+//
+// Note:
+// Multi-reg call node available regs = Bitwise-OR(allregs(GetReturnRegType(i)))
+// for all i=0..RetRegCount-1.
+regMaskTP LinearScan::allMultiRegCallNodeRegs(GenTreeCall* call)
+{
+ assert(call->HasMultiRegRetVal());
+
+ ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc();
+ regMaskTP resultMask = allRegs(retTypeDesc->GetReturnRegType(0));
+
+ unsigned count = retTypeDesc->GetReturnRegCount();
+ for (unsigned i = 1; i < count; ++i)
+ {
+ resultMask |= allRegs(retTypeDesc->GetReturnRegType(i));
+ }
+
+ return resultMask;
+}
+
+//--------------------------------------------------------------------------
+// allRegs: returns the set of registers that can accomodate the type of
+// given node.
+//
+// Arguments:
+// tree - GenTree node
+//
+// Return Value:
+// Mask representing the set of available registers for given tree
+//
+// Note: In case of multi-reg call node, the full set of registers must be
+// determined by looking at types of individual return register types.
+// In this case, the registers may include registers from different register
+// sets and will not be limited to the actual ABI return registers.
+regMaskTP LinearScan::allRegs(GenTree* tree)
+{
+ regMaskTP resultMask;
+
+ // In case of multi-reg calls, allRegs is defined as
+ // Bitwise-Or(allRegs(GetReturnRegType(i)) for i=0..ReturnRegCount-1
+ if (tree->IsMultiRegCall())
+ {
+ resultMask = allMultiRegCallNodeRegs(tree->AsCall());
+ }
+ else
+ {
+ resultMask = allRegs(tree->TypeGet());
+ }
+
+ return resultMask;
+}
+
+regMaskTP LinearScan::allSIMDRegs()
+{
+ return availableFloatRegs;
+}
+
+//------------------------------------------------------------------------
+// internalFloatRegCandidates: Return the set of registers that are appropriate
+// for use as internal float registers.
+//
+// Return Value:
+// The set of registers (as a regMaskTP).
+//
+// Notes:
+// compFloatingPointUsed is only required to be set if it is possible that we
+// will use floating point callee-save registers.
+// It is unlikely, if an internal register is the only use of floating point,
+// that it will select a callee-save register. But to be safe, we restrict
+// the set of candidates if compFloatingPointUsed is not already set.
+
+regMaskTP LinearScan::internalFloatRegCandidates()
+{
+ if (compiler->compFloatingPointUsed)
+ {
+ return allRegs(TYP_FLOAT);
+ }
+ else
+ {
+ return RBM_FLT_CALLEE_TRASH;
+ }
+}
+
+/*****************************************************************************
+ * Register types
+ *****************************************************************************/
+template <class T>
+RegisterType regType(T type)
+{
+#ifdef FEATURE_SIMD
+ if (varTypeIsSIMD(type))
+ {
+ return FloatRegisterType;
+ }
+#endif // FEATURE_SIMD
+ return varTypeIsFloating(TypeGet(type)) ? FloatRegisterType : IntRegisterType;
+}
+
+bool useFloatReg(var_types type)
+{
+ return (regType(type) == FloatRegisterType);
+}
+
+bool registerTypesEquivalent(RegisterType a, RegisterType b)
+{
+ return varTypeIsIntegralOrI(a) == varTypeIsIntegralOrI(b);
+}
+
+bool isSingleRegister(regMaskTP regMask)
+{
+ return (regMask != RBM_NONE && genMaxOneBit(regMask));
+}
+
+/*****************************************************************************
+ * Inline functions for RegRecord
+ *****************************************************************************/
+
+bool RegRecord::isFree()
+{
+ return ((assignedInterval == nullptr || !assignedInterval->isActive) && !isBusyUntilNextKill);
+}
+
+/*****************************************************************************
+ * Inline functions for LinearScan
+ *****************************************************************************/
+RegRecord* LinearScan::getRegisterRecord(regNumber regNum)
+{
+ return &physRegs[regNum];
+}
+
+#ifdef DEBUG
+//------------------------------------------------------------------------
+// stressLimitRegs: Given a set of registers, expressed as a register mask, reduce
+// them based on the current stress options.
+//
+// Arguments:
+// mask - The current mask of register candidates for a node
+//
+// Return Value:
+// A possibly-modified mask, based on the value of COMPlus_JitStressRegs.
+//
+// Notes:
+// This is the method used to implement the stress options that limit
+// the set of registers considered for allocation.
+
+regMaskTP LinearScan::stressLimitRegs(RefPosition* refPosition, regMaskTP mask)
+{
+ if (getStressLimitRegs() != LSRA_LIMIT_NONE)
+ {
+ switch (getStressLimitRegs())
+ {
+ case LSRA_LIMIT_CALLEE:
+ if (!compiler->opts.compDbgEnC && (mask & RBM_CALLEE_SAVED) != RBM_NONE)
+ {
+ mask &= RBM_CALLEE_SAVED;
+ }
+ break;
+ case LSRA_LIMIT_CALLER:
+ if ((mask & RBM_CALLEE_TRASH) != RBM_NONE)
+ {
+ mask &= RBM_CALLEE_TRASH;
+ }
+ break;
+ case LSRA_LIMIT_SMALL_SET:
+ if ((mask & LsraLimitSmallIntSet) != RBM_NONE)
+ {
+ mask &= LsraLimitSmallIntSet;
+ }
+ else if ((mask & LsraLimitSmallFPSet) != RBM_NONE)
+ {
+ mask &= LsraLimitSmallFPSet;
+ }
+ break;
+ default:
+ unreached();
+ }
+ if (refPosition != nullptr && refPosition->isFixedRegRef)
+ {
+ mask |= refPosition->registerAssignment;
+ }
+ }
+ return mask;
+}
+#endif // DEBUG
+
+// TODO-Cleanup: Consider adding an overload that takes a varDsc, and can appropriately
+// set such fields as isStructField
+
+Interval* LinearScan::newInterval(RegisterType theRegisterType)
+{
+ intervals.emplace_back(theRegisterType, allRegs(theRegisterType));
+ Interval* newInt = &intervals.back();
+
+#ifdef DEBUG
+ newInt->intervalIndex = static_cast<unsigned>(intervals.size() - 1);
+#endif // DEBUG
+
+ DBEXEC(VERBOSE, newInt->dump());
+ return newInt;
+}
+
+RefPosition* LinearScan::newRefPositionRaw(LsraLocation nodeLocation, GenTree* treeNode, RefType refType)
+{
+ refPositions.emplace_back(curBBNum, nodeLocation, treeNode, refType);
+ RefPosition* newRP = &refPositions.back();
+#ifdef DEBUG
+ newRP->rpNum = static_cast<unsigned>(refPositions.size() - 1);
+#endif // DEBUG
+ return newRP;
+}
+
+//------------------------------------------------------------------------
+// resolveConflictingDefAndUse: Resolve the situation where we have conflicting def and use
+// register requirements on a single-def, single-use interval.
+//
+// Arguments:
+// defRefPosition - The interval definition
+// useRefPosition - The (sole) interval use
+//
+// Return Value:
+// None.
+//
+// Assumptions:
+// The two RefPositions are for the same interval, which is a tree-temp.
+//
+// Notes:
+// We require some special handling for the case where the use is a "delayRegFree" case of a fixedReg.
+// In that case, if we change the registerAssignment on the useRefPosition, we will lose the fact that,
+// even if we assign a different register (and rely on codegen to do the copy), that fixedReg also needs
+// to remain busy until the Def register has been allocated. In that case, we don't allow Case 1 or Case 4
+// below.
+// Here are the cases we consider (in this order):
+// 1. If The defRefPosition specifies a single register, and there are no conflicting
+// FixedReg uses of it between the def and use, we use that register, and the code generator
+// will insert the copy. Note that it cannot be in use because there is a FixedRegRef for the def.
+// 2. If the useRefPosition specifies a single register, and it is not in use, and there are no
+// conflicting FixedReg uses of it between the def and use, we use that register, and the code generator
+// will insert the copy.
+// 3. If the defRefPosition specifies a single register (but there are conflicts, as determined
+// in 1.), and there are no conflicts with the useRefPosition register (if it's a single register),
+/// we set the register requirements on the defRefPosition to the use registers, and the
+// code generator will insert a copy on the def. We can't rely on the code generator to put a copy
+// on the use if it has multiple possible candidates, as it won't know which one has been allocated.
+// 4. If the useRefPosition specifies a single register, and there are no conflicts with the register
+// on the defRefPosition, we leave the register requirements on the defRefPosition as-is, and set
+// the useRefPosition to the def registers, for similar reasons to case #3.
+// 5. If both the defRefPosition and the useRefPosition specify single registers, but both have conflicts,
+// We set the candiates on defRefPosition to be all regs of the appropriate type, and since they are
+// single registers, codegen can insert the copy.
+// 6. Finally, if the RefPositions specify disjoint subsets of the registers (or the use is fixed but
+// has a conflict), we must insert a copy. The copy will be inserted before the use if the
+// use is not fixed (in the fixed case, the code generator will insert the use).
+//
+// TODO-CQ: We get bad register allocation in case #3 in the situation where no register is
+// available for the lifetime. We end up allocating a register that must be spilled, and it probably
+// won't be the register that is actually defined by the target instruction. So, we have to copy it
+// and THEN spill it. In this case, we should be using the def requirement. But we need to change
+// the interface to this method a bit to make that work (e.g. returning a candidate set to use, but
+// leaving the registerAssignment as-is on the def, so that if we find that we need to spill anyway
+// we can use the fixed-reg on the def.
+//
+
+void LinearScan::resolveConflictingDefAndUse(Interval* interval, RefPosition* defRefPosition)
+{
+ assert(!interval->isLocalVar);
+
+ RefPosition* useRefPosition = defRefPosition->nextRefPosition;
+ regMaskTP defRegAssignment = defRefPosition->registerAssignment;
+ regMaskTP useRegAssignment = useRefPosition->registerAssignment;
+ RegRecord* defRegRecord = nullptr;
+ RegRecord* useRegRecord = nullptr;
+ regNumber defReg = REG_NA;
+ regNumber useReg = REG_NA;
+ bool defRegConflict = false;
+ bool useRegConflict = false;
+
+ // If the useRefPosition is a "delayRegFree", we can't change the registerAssignment
+ // on it, or we will fail to ensure that the fixedReg is busy at the time the target
+ // (of the node that uses this interval) is allocated.
+ bool canChangeUseAssignment = !useRefPosition->isFixedRegRef || !useRefPosition->delayRegFree;
+
+ INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_DEFUSE_CONFLICT));
+ if (!canChangeUseAssignment)
+ {
+ INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_DEFUSE_FIXED_DELAY_USE));
+ }
+ if (defRefPosition->isFixedRegRef)
+ {
+ defReg = defRefPosition->assignedReg();
+ defRegRecord = getRegisterRecord(defReg);
+ if (canChangeUseAssignment)
+ {
+ RefPosition* currFixedRegRefPosition = defRegRecord->recentRefPosition;
+ assert(currFixedRegRefPosition != nullptr &&
+ currFixedRegRefPosition->nodeLocation == defRefPosition->nodeLocation);
+
+ if (currFixedRegRefPosition->nextRefPosition == nullptr ||
+ currFixedRegRefPosition->nextRefPosition->nodeLocation > useRefPosition->getRefEndLocation())
+ {
+ // This is case #1. Use the defRegAssignment
+ INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_DEFUSE_CASE1));
+ useRefPosition->registerAssignment = defRegAssignment;
+ return;
+ }
+ else
+ {
+ defRegConflict = true;
+ }
+ }
+ }
+ if (useRefPosition->isFixedRegRef)
+ {
+ useReg = useRefPosition->assignedReg();
+ useRegRecord = getRegisterRecord(useReg);
+ RefPosition* currFixedRegRefPosition = useRegRecord->recentRefPosition;
+
+ // We know that useRefPosition is a fixed use, so the nextRefPosition must not be null.
+ RefPosition* nextFixedRegRefPosition = useRegRecord->getNextRefPosition();
+ assert(nextFixedRegRefPosition != nullptr &&
+ nextFixedRegRefPosition->nodeLocation <= useRefPosition->nodeLocation);
+
+ // First, check to see if there are any conflicting FixedReg references between the def and use.
+ if (nextFixedRegRefPosition->nodeLocation == useRefPosition->nodeLocation)
+ {
+ // OK, no conflicting FixedReg references.
+ // Now, check to see whether it is currently in use.
+ if (useRegRecord->assignedInterval != nullptr)
+ {
+ RefPosition* possiblyConflictingRef = useRegRecord->assignedInterval->recentRefPosition;
+ LsraLocation possiblyConflictingRefLocation = possiblyConflictingRef->getRefEndLocation();
+ if (possiblyConflictingRefLocation >= defRefPosition->nodeLocation)
+ {
+ useRegConflict = true;
+ }
+ }
+ if (!useRegConflict)
+ {
+ // This is case #2. Use the useRegAssignment
+ INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_DEFUSE_CASE2));
+ defRefPosition->registerAssignment = useRegAssignment;
+ return;
+ }
+ }
+ else
+ {
+ useRegConflict = true;
+ }
+ }
+ if (defRegRecord != nullptr && !useRegConflict)
+ {
+ // This is case #3.
+ INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_DEFUSE_CASE3));
+ defRefPosition->registerAssignment = useRegAssignment;
+ return;
+ }
+ if (useRegRecord != nullptr && !defRegConflict && canChangeUseAssignment)
+ {
+ // This is case #4.
+ INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_DEFUSE_CASE4));
+ useRefPosition->registerAssignment = defRegAssignment;
+ return;
+ }
+ if (defRegRecord != nullptr && useRegRecord != nullptr)
+ {
+ // This is case #5.
+ INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_DEFUSE_CASE5));
+ RegisterType regType = interval->registerType;
+ assert((getRegisterType(interval, defRefPosition) == regType) &&
+ (getRegisterType(interval, useRefPosition) == regType));
+ regMaskTP candidates = allRegs(regType);
+ defRefPosition->registerAssignment = candidates;
+ return;
+ }
+ INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_DEFUSE_CASE6));
+ return;
+}
+
+//------------------------------------------------------------------------
+// conflictingFixedRegReference: Determine whether the current RegRecord has a
+// fixed register use that conflicts with 'refPosition'
+//
+// Arguments:
+// refPosition - The RefPosition of interest
+//
+// Return Value:
+// Returns true iff the given RefPosition is NOT a fixed use of this register,
+// AND either:
+// - there is a RefPosition on this RegRecord at the nodeLocation of the given RefPosition, or
+// - the given RefPosition has a delayRegFree, and there is a RefPosition on this RegRecord at
+// the nodeLocation just past the given RefPosition.
+//
+// Assumptions:
+// 'refPosition is non-null.
+
+bool RegRecord::conflictingFixedRegReference(RefPosition* refPosition)
+{
+ // Is this a fixed reference of this register? If so, there is no conflict.
+ if (refPosition->isFixedRefOfRegMask(genRegMask(regNum)))
+ {
+ return false;
+ }
+ // Otherwise, check for conflicts.
+ // There is a conflict if:
+ // 1. There is a recent RefPosition on this RegRecord that is at this location,
+ // except in the case where it is a special "putarg" that is associated with this interval, OR
+ // 2. There is an upcoming RefPosition at this location, or at the next location
+ // if refPosition is a delayed use (i.e. must be kept live through the next/def location).
+
+ LsraLocation refLocation = refPosition->nodeLocation;
+ if (recentRefPosition != nullptr && recentRefPosition->refType != RefTypeKill &&
+ recentRefPosition->nodeLocation == refLocation &&
+ (!isBusyUntilNextKill || assignedInterval != refPosition->getInterval()))
+ {
+ return true;
+ }
+ LsraLocation nextPhysRefLocation = getNextRefLocation();
+ if (nextPhysRefLocation == refLocation || (refPosition->delayRegFree && nextPhysRefLocation == (refLocation + 1)))
+ {
+ return true;
+ }
+ return false;
+}
+
+void LinearScan::applyCalleeSaveHeuristics(RefPosition* rp)
+{
+#ifdef _TARGET_AMD64_
+ if (compiler->opts.compDbgEnC)
+ {
+ // We only use RSI and RDI for EnC code, so we don't want to favor callee-save regs.
+ return;
+ }
+#endif // _TARGET_AMD64_
+
+ Interval* theInterval = rp->getInterval();
+#ifdef DEBUG
+ regMaskTP calleeSaveMask = calleeSaveRegs(getRegisterType(theInterval, rp));
+ if (doReverseCallerCallee())
+ {
+ regMaskTP newAssignment = rp->registerAssignment;
+ newAssignment &= calleeSaveMask;
+ if (newAssignment != RBM_NONE)
+ {
+ rp->registerAssignment = newAssignment;
+ }
+ }
+ else
+#endif // DEBUG
+ {
+ // Set preferences so that this register set will be preferred for earlier refs
+ theInterval->updateRegisterPreferences(rp->registerAssignment);
+ }
+}
+
+void LinearScan::associateRefPosWithInterval(RefPosition* rp)
+{
+ Referenceable* theReferent = rp->referent;
+
+ if (theReferent != nullptr)
+ {
+ // All RefPositions except the dummy ones at the beginning of blocks
+
+ if (rp->isIntervalRef())
+ {
+ Interval* theInterval = rp->getInterval();
+
+ applyCalleeSaveHeuristics(rp);
+
+ // Ensure that we have consistent def/use on SDSU temps.
+ // However, in the case of a non-commutative rmw def, we must avoid over-constraining
+ // the def, so don't propagate a single-register restriction from the consumer to the producer
+
+ if (RefTypeIsUse(rp->refType) && !theInterval->isLocalVar)
+ {
+ RefPosition* prevRefPosition = theInterval->recentRefPosition;
+ assert(prevRefPosition != nullptr && theInterval->firstRefPosition == prevRefPosition);
+ regMaskTP prevAssignment = prevRefPosition->registerAssignment;
+ regMaskTP newAssignment = (prevAssignment & rp->registerAssignment);
+ if (newAssignment != RBM_NONE)
+ {
+ if (!theInterval->hasNonCommutativeRMWDef || !isSingleRegister(newAssignment))
+ {
+ prevRefPosition->registerAssignment = newAssignment;
+ }
+ }
+ else
+ {
+ theInterval->hasConflictingDefUse = true;
+ }
+ }
+ }
+
+ RefPosition* prevRP = theReferent->recentRefPosition;
+ if (prevRP != nullptr)
+ {
+ prevRP->nextRefPosition = rp;
+ }
+ else
+ {
+ theReferent->firstRefPosition = rp;
+ }
+ theReferent->recentRefPosition = rp;
+ theReferent->lastRefPosition = rp;
+ }
+ else
+ {
+ assert((rp->refType == RefTypeBB) || (rp->refType == RefTypeKillGCRefs));
+ }
+}
+
+//---------------------------------------------------------------------------
+// newRefPosition: allocate and initialize a new RefPosition.
+//
+// Arguments:
+// reg - reg number that identifies RegRecord to be associated
+// with this RefPosition
+// theLocation - LSRA location of RefPosition
+// theRefType - RefPosition type
+// theTreeNode - GenTree node for which this RefPosition is created
+// mask - Set of valid registers for this RefPosition
+// multiRegIdx - register position if this RefPosition corresponds to a
+// multi-reg call node.
+//
+// Return Value:
+// a new RefPosition
+//
+RefPosition* LinearScan::newRefPosition(
+ regNumber reg, LsraLocation theLocation, RefType theRefType, GenTree* theTreeNode, regMaskTP mask)
+{
+ RefPosition* newRP = newRefPositionRaw(theLocation, theTreeNode, theRefType);
+
+ newRP->setReg(getRegisterRecord(reg));
+ newRP->registerAssignment = mask;
+
+ newRP->setMultiRegIdx(0);
+ newRP->setAllocateIfProfitable(0);
+
+ associateRefPosWithInterval(newRP);
+
+ DBEXEC(VERBOSE, newRP->dump());
+ return newRP;
+}
+
+//---------------------------------------------------------------------------
+// newRefPosition: allocate and initialize a new RefPosition.
+//
+// Arguments:
+// theInterval - interval to which RefPosition is associated with.
+// theLocation - LSRA location of RefPosition
+// theRefType - RefPosition type
+// theTreeNode - GenTree node for which this RefPosition is created
+// mask - Set of valid registers for this RefPosition
+// multiRegIdx - register position if this RefPosition corresponds to a
+// multi-reg call node.
+//
+// Return Value:
+// a new RefPosition
+//
+RefPosition* LinearScan::newRefPosition(Interval* theInterval,
+ LsraLocation theLocation,
+ RefType theRefType,
+ GenTree* theTreeNode,
+ regMaskTP mask,
+ unsigned multiRegIdx /* = 0 */)
+{
+#ifdef DEBUG
+ if (theInterval != nullptr && regType(theInterval->registerType) == FloatRegisterType)
+ {
+ // In the case we're using floating point registers we must make sure
+ // this flag was set previously in the compiler since this will mandate
+ // whether LSRA will take into consideration FP reg killsets.
+ assert(compiler->compFloatingPointUsed || ((mask & RBM_FLT_CALLEE_SAVED) == 0));
+ }
+#endif // DEBUG
+
+ // If this reference is constrained to a single register (and it's not a dummy
+ // or Kill reftype already), add a RefTypeFixedReg at this location so that its
+ // availability can be more accurately determined
+
+ bool isFixedRegister = isSingleRegister(mask);
+ bool insertFixedRef = false;
+ if (isFixedRegister)
+ {
+ // Insert a RefTypeFixedReg for any normal def or use (not ParamDef or BB)
+ if (theRefType == RefTypeUse || theRefType == RefTypeDef)
+ {
+ insertFixedRef = true;
+ }
+ }
+
+ if (insertFixedRef)
+ {
+ regNumber physicalReg = genRegNumFromMask(mask);
+ RefPosition* pos = newRefPosition(physicalReg, theLocation, RefTypeFixedReg, nullptr, mask);
+ assert(theInterval != nullptr);
+ assert((allRegs(theInterval->registerType) & mask) != 0);
+ }
+
+ RefPosition* newRP = newRefPositionRaw(theLocation, theTreeNode, theRefType);
+
+ newRP->setInterval(theInterval);
+
+ // Spill info
+ newRP->isFixedRegRef = isFixedRegister;
+
+#ifndef _TARGET_AMD64_
+ // We don't need this for AMD because the PInvoke method epilog code is explicit
+ // at register allocation time.
+ if (theInterval != nullptr && theInterval->isLocalVar && compiler->info.compCallUnmanaged &&
+ theInterval->varNum == compiler->genReturnLocal)
+ {
+ mask &= ~(RBM_PINVOKE_TCB | RBM_PINVOKE_FRAME);
+ noway_assert(mask != RBM_NONE);
+ }
+#endif // !_TARGET_AMD64_
+ newRP->registerAssignment = mask;
+
+ newRP->setMultiRegIdx(multiRegIdx);
+ newRP->setAllocateIfProfitable(0);
+
+ associateRefPosWithInterval(newRP);
+
+ DBEXEC(VERBOSE, newRP->dump());
+ return newRP;
+}
+
+/*****************************************************************************
+ * Inline functions for Interval
+ *****************************************************************************/
+RefPosition* Referenceable::getNextRefPosition()
+{
+ if (recentRefPosition == nullptr)
+ {
+ return firstRefPosition;
+ }
+ else
+ {
+ return recentRefPosition->nextRefPosition;
+ }
+}
+
+LsraLocation Referenceable::getNextRefLocation()
+{
+ RefPosition* nextRefPosition = getNextRefPosition();
+ if (nextRefPosition == nullptr)
+ {
+ return MaxLocation;
+ }
+ else
+ {
+ return nextRefPosition->nodeLocation;
+ }
+}
+
+// Iterate through all the registers of the given type
+class RegisterIterator
+{
+ friend class Registers;
+
+public:
+ RegisterIterator(RegisterType type) : regType(type)
+ {
+ if (useFloatReg(regType))
+ {
+ currentRegNum = REG_FP_FIRST;
+ }
+ else
+ {
+ currentRegNum = REG_INT_FIRST;
+ }
+ }
+
+protected:
+ static RegisterIterator Begin(RegisterType regType)
+ {
+ return RegisterIterator(regType);
+ }
+ static RegisterIterator End(RegisterType regType)
+ {
+ RegisterIterator endIter = RegisterIterator(regType);
+ // This assumes only integer and floating point register types
+ // if we target a processor with additional register types,
+ // this would have to change
+ if (useFloatReg(regType))
+ {
+ // This just happens to work for both double & float
+ endIter.currentRegNum = REG_NEXT(REG_FP_LAST);
+ }
+ else
+ {
+ endIter.currentRegNum = REG_NEXT(REG_INT_LAST);
+ }
+ return endIter;
+ }
+
+public:
+ void operator++(int dummy) // int dummy is c++ for "this is postfix ++"
+ {
+ currentRegNum = REG_NEXT(currentRegNum);
+#ifdef _TARGET_ARM_
+ if (regType == TYP_DOUBLE)
+ currentRegNum = REG_NEXT(currentRegNum);
+#endif
+ }
+ void operator++() // prefix operator++
+ {
+ currentRegNum = REG_NEXT(currentRegNum);
+#ifdef _TARGET_ARM_
+ if (regType == TYP_DOUBLE)
+ currentRegNum = REG_NEXT(currentRegNum);
+#endif
+ }
+ regNumber operator*()
+ {
+ return currentRegNum;
+ }
+ bool operator!=(const RegisterIterator& other)
+ {
+ return other.currentRegNum != currentRegNum;
+ }
+
+private:
+ regNumber currentRegNum;
+ RegisterType regType;
+};
+
+class Registers
+{
+public:
+ friend class RegisterIterator;
+ RegisterType type;
+ Registers(RegisterType t)
+ {
+ type = t;
+ }
+ RegisterIterator begin()
+ {
+ return RegisterIterator::Begin(type);
+ }
+ RegisterIterator end()
+ {
+ return RegisterIterator::End(type);
+ }
+};
+
+#ifdef DEBUG
+void LinearScan::dumpVarToRegMap(VarToRegMap map)
+{
+ bool anyPrinted = false;
+ for (unsigned varIndex = 0; varIndex < compiler->lvaTrackedCount; varIndex++)
+ {
+ unsigned varNum = compiler->lvaTrackedToVarNum[varIndex];
+ if (map[varIndex] != REG_STK)
+ {
+ printf("V%02u=%s ", varNum, getRegName(map[varIndex]));
+ anyPrinted = true;
+ }
+ }
+ if (!anyPrinted)
+ {
+ printf("none");
+ }
+ printf("\n");
+}
+
+void LinearScan::dumpInVarToRegMap(BasicBlock* block)
+{
+ printf("Var=Reg beg of BB%02u: ", block->bbNum);
+ VarToRegMap map = getInVarToRegMap(block->bbNum);
+ dumpVarToRegMap(map);
+}
+
+void LinearScan::dumpOutVarToRegMap(BasicBlock* block)
+{
+ printf("Var=Reg end of BB%02u: ", block->bbNum);
+ VarToRegMap map = getOutVarToRegMap(block->bbNum);
+ dumpVarToRegMap(map);
+}
+
+#endif // DEBUG
+
+LinearScanInterface* getLinearScanAllocator(Compiler* comp)
+{
+ return new (comp, CMK_LSRA) LinearScan(comp);
+}
+
+//------------------------------------------------------------------------
+// LSRA constructor
+//
+// Arguments:
+// theCompiler
+//
+// Notes:
+// The constructor takes care of initializing the data structures that are used
+// during Lowering, including (in DEBUG) getting the stress environment variables,
+// as they may affect the block ordering.
+
+LinearScan::LinearScan(Compiler* theCompiler)
+ : compiler(theCompiler)
+#if MEASURE_MEM_ALLOC
+ , lsraIAllocator(nullptr)
+#endif // MEASURE_MEM_ALLOC
+ , intervals(LinearScanMemoryAllocatorInterval(theCompiler))
+ , refPositions(LinearScanMemoryAllocatorRefPosition(theCompiler))
+{
+#ifdef DEBUG
+ maxNodeLocation = 0;
+ activeRefPosition = nullptr;
+
+ // Get the value of the environment variable that controls stress for register allocation
+ lsraStressMask = JitConfig.JitStressRegs();
+#if 0
+#ifdef DEBUG
+ if (lsraStressMask != 0)
+ {
+ // The code in this #if can be used to debug JitStressRegs issues according to
+ // method hash. To use, simply set environment variables JitStressRegsHashLo and JitStressRegsHashHi
+ unsigned methHash = compiler->info.compMethodHash();
+ char* lostr = getenv("JitStressRegsHashLo");
+ unsigned methHashLo = 0;
+ bool dump = false;
+ if (lostr != nullptr)
+ {
+ sscanf_s(lostr, "%x", &methHashLo);
+ dump = true;
+ }
+ char* histr = getenv("JitStressRegsHashHi");
+ unsigned methHashHi = UINT32_MAX;
+ if (histr != nullptr)
+ {
+ sscanf_s(histr, "%x", &methHashHi);
+ dump = true;
+ }
+ if (methHash < methHashLo || methHash > methHashHi)
+ {
+ lsraStressMask = 0;
+ }
+ else if (dump == true)
+ {
+ printf("JitStressRegs = %x for method %s, hash = 0x%x.\n",
+ lsraStressMask, compiler->info.compFullName, compiler->info.compMethodHash());
+ printf(""); // in our logic this causes a flush
+ }
+ }
+#endif // DEBUG
+#endif
+
+ dumpTerse = (JitConfig.JitDumpTerseLsra() != 0);
+
+#endif // DEBUG
+ availableIntRegs = (RBM_ALLINT & ~compiler->codeGen->regSet.rsMaskResvd);
+#if ETW_EBP_FRAMED
+ availableIntRegs &= ~RBM_FPBASE;
+#endif // ETW_EBP_FRAMED
+ availableFloatRegs = RBM_ALLFLOAT;
+ availableDoubleRegs = RBM_ALLDOUBLE;
+
+#ifdef _TARGET_AMD64_
+ if (compiler->opts.compDbgEnC)
+ {
+ // On x64 when the EnC option is set, we always save exactly RBP, RSI and RDI.
+ // RBP is not available to the register allocator, so RSI and RDI are the only
+ // callee-save registers available.
+ availableIntRegs &= ~RBM_CALLEE_SAVED | RBM_RSI | RBM_RDI;
+ availableFloatRegs &= ~RBM_CALLEE_SAVED;
+ availableDoubleRegs &= ~RBM_CALLEE_SAVED;
+ }
+#endif // _TARGET_AMD64_
+ compiler->rpFrameType = FT_NOT_SET;
+ compiler->rpMustCreateEBPCalled = false;
+
+ compiler->codeGen->intRegState.rsIsFloat = false;
+ compiler->codeGen->floatRegState.rsIsFloat = true;
+
+ // Block sequencing (the order in which we schedule).
+ // Note that we don't initialize the bbVisitedSet until we do the first traversal
+ // (currently during Lowering's second phase, where it sets the TreeNodeInfo).
+ // This is so that any blocks that are added during the first phase of Lowering
+ // are accounted for (and we don't have BasicBlockEpoch issues).
+ blockSequencingDone = false;
+ blockSequence = nullptr;
+ blockSequenceWorkList = nullptr;
+ curBBSeqNum = 0;
+ bbSeqCount = 0;
+
+ // Information about each block, including predecessor blocks used for variable locations at block entry.
+ blockInfo = nullptr;
+
+ // Populate the register mask table.
+ // The first two masks in the table are allint/allfloat
+ // The next N are the masks for each single register.
+ // After that are the dynamically added ones.
+ regMaskTable = new (compiler, CMK_LSRA) regMaskTP[numMasks];
+ regMaskTable[ALLINT_IDX] = allRegs(TYP_INT);
+ regMaskTable[ALLFLOAT_IDX] = allRegs(TYP_DOUBLE);
+
+ regNumber reg;
+ for (reg = REG_FIRST; reg < REG_COUNT; reg = REG_NEXT(reg))
+ {
+ regMaskTable[FIRST_SINGLE_REG_IDX + reg - REG_FIRST] = (reg == REG_STK) ? RBM_NONE : genRegMask(reg);
+ }
+ nextFreeMask = FIRST_SINGLE_REG_IDX + REG_COUNT;
+ noway_assert(nextFreeMask <= numMasks);
+}
+
+// Return the reg mask corresponding to the given index.
+regMaskTP LinearScan::GetRegMaskForIndex(RegMaskIndex index)
+{
+ assert(index < numMasks);
+ assert(index < nextFreeMask);
+ return regMaskTable[index];
+}
+
+// Given a reg mask, return the index it corresponds to. If it is not a 'well known' reg mask,
+// add it at the end. This method has linear behavior in the worst cases but that is fairly rare.
+// Most methods never use any but the well-known masks, and when they do use more
+// it is only one or two more.
+LinearScan::RegMaskIndex LinearScan::GetIndexForRegMask(regMaskTP mask)
+{
+ RegMaskIndex result;
+ if (isSingleRegister(mask))
+ {
+ result = genRegNumFromMask(mask) + FIRST_SINGLE_REG_IDX;
+ }
+ else if (mask == allRegs(TYP_INT))
+ {
+ result = ALLINT_IDX;
+ }
+ else if (mask == allRegs(TYP_DOUBLE))
+ {
+ result = ALLFLOAT_IDX;
+ }
+ else
+ {
+ for (int i = FIRST_SINGLE_REG_IDX + REG_COUNT; i < nextFreeMask; i++)
+ {
+ if (regMaskTable[i] == mask)
+ {
+ return i;
+ }
+ }
+
+ // We only allocate a fixed number of masks. Since we don't reallocate, we will throw a
+ // noway_assert if we exceed this limit.
+ noway_assert(nextFreeMask < numMasks);
+
+ regMaskTable[nextFreeMask] = mask;
+ result = nextFreeMask;
+ nextFreeMask++;
+ }
+ assert(mask == regMaskTable[result]);
+ return result;
+}
+
+// We've decided that we can't use a register during register allocation (probably FPBASE),
+// but we've already added it to the register masks. Go through the masks and remove it.
+void LinearScan::RemoveRegisterFromMasks(regNumber reg)
+{
+ JITDUMP("Removing register %s from LSRA register masks\n", getRegName(reg));
+
+ regMaskTP mask = ~genRegMask(reg);
+ for (int i = 0; i < nextFreeMask; i++)
+ {
+ regMaskTable[i] &= mask;
+ }
+
+ JITDUMP("After removing register:\n");
+ DBEXEC(VERBOSE, dspRegisterMaskTable());
+}
+
+#ifdef DEBUG
+void LinearScan::dspRegisterMaskTable()
+{
+ printf("LSRA register masks. Total allocated: %d, total used: %d\n", numMasks, nextFreeMask);
+ for (int i = 0; i < nextFreeMask; i++)
+ {
+ printf("%2u: ", i);
+ dspRegMask(regMaskTable[i]);
+ printf("\n");
+ }
+}
+#endif // DEBUG
+
+//------------------------------------------------------------------------
+// getNextCandidateFromWorkList: Get the next candidate for block sequencing
+//
+// Arguments:
+// None.
+//
+// Return Value:
+// The next block to be placed in the sequence.
+//
+// Notes:
+// This method currently always returns the next block in the list, and relies on having
+// blocks added to the list only when they are "ready", and on the
+// addToBlockSequenceWorkList() method to insert them in the proper order.
+// However, a block may be in the list and already selected, if it was subsequently
+// encountered as both a flow and layout successor of the most recently selected
+// block.
+
+BasicBlock* LinearScan::getNextCandidateFromWorkList()
+{
+ BasicBlockList* nextWorkList = nullptr;
+ for (BasicBlockList* workList = blockSequenceWorkList; workList != nullptr; workList = nextWorkList)
+ {
+ nextWorkList = workList->next;
+ BasicBlock* candBlock = workList->block;
+ removeFromBlockSequenceWorkList(workList, nullptr);
+ if (!isBlockVisited(candBlock))
+ {
+ return candBlock;
+ }
+ }
+ return nullptr;
+}
+
+//------------------------------------------------------------------------
+// setBlockSequence:Determine the block order for register allocation.
+//
+// Arguments:
+// None
+//
+// Return Value:
+// None
+//
+// Notes:
+// On return, the blockSequence array contains the blocks, in the order in which they
+// will be allocated.
+// This method clears the bbVisitedSet on LinearScan, and when it returns the set
+// contains all the bbNums for the block.
+// This requires a traversal of the BasicBlocks, and could potentially be
+// combined with the first traversal (currently the one in Lowering that sets the
+// TreeNodeInfo).
+
+void LinearScan::setBlockSequence()
+{
+ // Reset the "visited" flag on each block.
+ compiler->EnsureBasicBlockEpoch();
+ bbVisitedSet = BlockSetOps::MakeEmpty(compiler);
+ BlockSet BLOCKSET_INIT_NOCOPY(readySet, BlockSetOps::MakeEmpty(compiler));
+ assert(blockSequence == nullptr && bbSeqCount == 0);
+ blockSequence = new (compiler, CMK_LSRA) BasicBlock*[compiler->fgBBcount];
+ bbNumMaxBeforeResolution = compiler->fgBBNumMax;
+ blockInfo = new (compiler, CMK_LSRA) LsraBlockInfo[bbNumMaxBeforeResolution + 1];
+
+ assert(blockSequenceWorkList == nullptr);
+
+ bool addedInternalBlocks = false;
+ verifiedAllBBs = false;
+ BasicBlock* nextBlock;
+ for (BasicBlock* block = compiler->fgFirstBB; block != nullptr; block = nextBlock)
+ {
+ blockSequence[bbSeqCount] = block;
+ markBlockVisited(block);
+ bbSeqCount++;
+ nextBlock = nullptr;
+
+ // Initialize the blockInfo.
+ // predBBNum will be set later. 0 is never used as a bbNum.
+ blockInfo[block->bbNum].predBBNum = 0;
+ // We check for critical edges below, but initialize to false.
+ blockInfo[block->bbNum].hasCriticalInEdge = false;
+ blockInfo[block->bbNum].hasCriticalOutEdge = false;
+ blockInfo[block->bbNum].weight = block->bbWeight;
+
+ if (block->GetUniquePred(compiler) == nullptr)
+ {
+ for (flowList* pred = block->bbPreds; pred != nullptr; pred = pred->flNext)
+ {
+ BasicBlock* predBlock = pred->flBlock;
+ if (predBlock->NumSucc(compiler) > 1)
+ {
+ blockInfo[block->bbNum].hasCriticalInEdge = true;
+ break;
+ }
+ else if (predBlock->bbJumpKind == BBJ_SWITCH)
+ {
+ assert(!"Switch with single successor");
+ }
+ }
+ }
+
+ // Determine which block to schedule next.
+
+ // First, update the NORMAL successors of the current block, adding them to the worklist
+ // according to the desired order. We will handle the EH successors below.
+ bool checkForCriticalOutEdge = (block->NumSucc(compiler) > 1);
+ if (!checkForCriticalOutEdge && block->bbJumpKind == BBJ_SWITCH)
+ {
+ assert(!"Switch with single successor");
+ }
+
+ for (unsigned succIndex = 0; succIndex < block->NumSucc(compiler); succIndex++)
+ {
+ BasicBlock* succ = block->GetSucc(succIndex, compiler);
+ if (checkForCriticalOutEdge && succ->GetUniquePred(compiler) == nullptr)
+ {
+ blockInfo[block->bbNum].hasCriticalOutEdge = true;
+ // We can stop checking now.
+ checkForCriticalOutEdge = false;
+ }
+
+ if (isTraversalLayoutOrder() || isBlockVisited(succ))
+ {
+ continue;
+ }
+
+ // We've now seen a predecessor, so add it to the work list and the "readySet".
+ // It will be inserted in the worklist according to the specified traversal order
+ // (i.e. pred-first or random, since layout order is handled above).
+ if (!BlockSetOps::IsMember(compiler, readySet, succ->bbNum))
+ {
+ addToBlockSequenceWorkList(readySet, succ);
+ BlockSetOps::AddElemD(compiler, readySet, succ->bbNum);
+ }
+ }
+
+ // For layout order, simply use bbNext
+ if (isTraversalLayoutOrder())
+ {
+ nextBlock = block->bbNext;
+ continue;
+ }
+
+ while (nextBlock == nullptr)
+ {
+ nextBlock = getNextCandidateFromWorkList();
+
+ // TODO-Throughput: We would like to bypass this traversal if we know we've handled all
+ // the blocks - but fgBBcount does not appear to be updated when blocks are removed.
+ if (nextBlock == nullptr /* && bbSeqCount != compiler->fgBBcount*/ && !verifiedAllBBs)
+ {
+ // If we don't encounter all blocks by traversing the regular sucessor links, do a full
+ // traversal of all the blocks, and add them in layout order.
+ // This may include:
+ // - internal-only blocks (in the fgAddCodeList) which may not be in the flow graph
+ // (these are not even in the bbNext links).
+ // - blocks that have become unreachable due to optimizations, but that are strongly
+ // connected (these are not removed)
+ // - EH blocks
+
+ for (Compiler::AddCodeDsc* desc = compiler->fgAddCodeList; desc != nullptr; desc = desc->acdNext)
+ {
+ if (!isBlockVisited(block))
+ {
+ addToBlockSequenceWorkList(readySet, block);
+ BlockSetOps::AddElemD(compiler, readySet, block->bbNum);
+ }
+ }
+
+ for (BasicBlock* block = compiler->fgFirstBB; block; block = block->bbNext)
+ {
+ if (!isBlockVisited(block))
+ {
+ addToBlockSequenceWorkList(readySet, block);
+ BlockSetOps::AddElemD(compiler, readySet, block->bbNum);
+ }
+ }
+ verifiedAllBBs = true;
+ }
+ else
+ {
+ break;
+ }
+ }
+ }
+ blockSequencingDone = true;
+
+#ifdef DEBUG
+ // Make sure that we've visited all the blocks.
+ for (BasicBlock* block = compiler->fgFirstBB; block != nullptr; block = block->bbNext)
+ {
+ assert(isBlockVisited(block));
+ }
+
+ JITDUMP("LSRA Block Sequence: ");
+ int i = 1;
+ for (BasicBlock *block = startBlockSequence(); block != nullptr; ++i, block = moveToNextBlock())
+ {
+ JITDUMP("BB%02u", block->bbNum);
+
+ if (block->isMaxBBWeight())
+ {
+ JITDUMP("(MAX) ");
+ }
+ else
+ {
+ JITDUMP("(%6s) ", refCntWtd2str(block->getBBWeight(compiler)));
+ }
+
+ if (i % 10 == 0)
+ {
+ JITDUMP("\n ");
+ }
+ }
+ JITDUMP("\n\n");
+#endif
+}
+
+//------------------------------------------------------------------------
+// compareBlocksForSequencing: Compare two basic blocks for sequencing order.
+//
+// Arguments:
+// block1 - the first block for comparison
+// block2 - the second block for comparison
+// useBlockWeights - whether to use block weights for comparison
+//
+// Return Value:
+// -1 if block1 is preferred.
+// 0 if the blocks are equivalent.
+// 1 if block2 is preferred.
+//
+// Notes:
+// See addToBlockSequenceWorkList.
+int LinearScan::compareBlocksForSequencing(BasicBlock* block1, BasicBlock* block2, bool useBlockWeights)
+{
+ if (useBlockWeights)
+ {
+ unsigned weight1 = block1->getBBWeight(compiler);
+ unsigned weight2 = block2->getBBWeight(compiler);
+
+ if (weight1 > weight2)
+ {
+ return -1;
+ }
+ else if (weight1 < weight2)
+ {
+ return 1;
+ }
+ }
+
+ // If weights are the same prefer LOWER bbnum
+ if (block1->bbNum < block2->bbNum)
+ {
+ return -1;
+ }
+ else if (block1->bbNum == block2->bbNum)
+ {
+ return 0;
+ }
+ else
+ {
+ return 1;
+ }
+}
+
+//------------------------------------------------------------------------
+// addToBlockSequenceWorkList: Add a BasicBlock to the work list for sequencing.
+//
+// Arguments:
+// sequencedBlockSet - the set of blocks that are already sequenced
+// block - the new block to be added
+//
+// Return Value:
+// None.
+//
+// Notes:
+// The first block in the list will be the next one to be sequenced, as soon
+// as we encounter a block whose successors have all been sequenced, in pred-first
+// order, or the very next block if we are traversing in random order (once implemented).
+// This method uses a comparison method to determine the order in which to place
+// the blocks in the list. This method queries whether all predecessors of the
+// block are sequenced at the time it is added to the list and if so uses block weights
+// for inserting the block. A block is never inserted ahead of its predecessors.
+// A block at the time of insertion may not have all its predecessors sequenced, in
+// which case it will be sequenced based on its block number. Once a block is inserted,
+// its priority\order will not be changed later once its remaining predecessors are
+// sequenced. This would mean that work list may not be sorted entirely based on
+// block weights alone.
+//
+// Note also that, when random traversal order is implemented, this method
+// should insert the blocks into the list in random order, so that we can always
+// simply select the first block in the list.
+void LinearScan::addToBlockSequenceWorkList(BlockSet sequencedBlockSet, BasicBlock* block)
+{
+ // The block that is being added is not already sequenced
+ assert(!BlockSetOps::IsMember(compiler, sequencedBlockSet, block->bbNum));
+
+ // Get predSet of block
+ BlockSet BLOCKSET_INIT_NOCOPY(predSet, BlockSetOps::MakeEmpty(compiler));
+ flowList* pred;
+ for (pred = block->bbPreds; pred != nullptr; pred = pred->flNext)
+ {
+ BlockSetOps::AddElemD(compiler, predSet, pred->flBlock->bbNum);
+ }
+
+ // If either a rarely run block or all its preds are already sequenced, use block's weight to sequence
+ bool useBlockWeight = block->isRunRarely() || BlockSetOps::IsSubset(compiler, sequencedBlockSet, predSet);
+
+ BasicBlockList* prevNode = nullptr;
+ BasicBlockList* nextNode = blockSequenceWorkList;
+
+ while (nextNode != nullptr)
+ {
+ int seqResult;
+
+ if (nextNode->block->isRunRarely())
+ {
+ // If the block that is yet to be sequenced is a rarely run block, always use block weights for sequencing
+ seqResult = compareBlocksForSequencing(nextNode->block, block, true);
+ }
+ else if (BlockSetOps::IsMember(compiler, predSet, nextNode->block->bbNum))
+ {
+ // always prefer unsequenced pred blocks
+ seqResult = -1;
+ }
+ else
+ {
+ seqResult = compareBlocksForSequencing(nextNode->block, block, useBlockWeight);
+ }
+
+ if (seqResult > 0)
+ {
+ break;
+ }
+
+ prevNode = nextNode;
+ nextNode = nextNode->next;
+ }
+
+ BasicBlockList* newListNode = new (compiler, CMK_LSRA) BasicBlockList(block, nextNode);
+ if (prevNode == nullptr)
+ {
+ blockSequenceWorkList = newListNode;
+ }
+ else
+ {
+ prevNode->next = newListNode;
+ }
+}
+
+void LinearScan::removeFromBlockSequenceWorkList(BasicBlockList* listNode, BasicBlockList* prevNode)
+{
+ if (listNode == blockSequenceWorkList)
+ {
+ assert(prevNode == nullptr);
+ blockSequenceWorkList = listNode->next;
+ }
+ else
+ {
+ assert(prevNode != nullptr && prevNode->next == listNode);
+ prevNode->next = listNode->next;
+ }
+ // TODO-Cleanup: consider merging Compiler::BlockListNode and BasicBlockList
+ // compiler->FreeBlockListNode(listNode);
+}
+
+// Initialize the block order for allocation (called each time a new traversal begins).
+BasicBlock* LinearScan::startBlockSequence()
+{
+ if (!blockSequencingDone)
+ {
+ setBlockSequence();
+ }
+ BasicBlock* curBB = compiler->fgFirstBB;
+ curBBSeqNum = 0;
+ curBBNum = curBB->bbNum;
+ clearVisitedBlocks();
+ assert(blockSequence[0] == compiler->fgFirstBB);
+ markBlockVisited(curBB);
+ return curBB;
+}
+
+//------------------------------------------------------------------------
+// moveToNextBlock: Move to the next block in order for allocation or resolution.
+//
+// Arguments:
+// None
+//
+// Return Value:
+// The next block.
+//
+// Notes:
+// This method is used when the next block is actually going to be handled.
+// It changes curBBNum.
+
+BasicBlock* LinearScan::moveToNextBlock()
+{
+ BasicBlock* nextBlock = getNextBlock();
+ curBBSeqNum++;
+ if (nextBlock != nullptr)
+ {
+ curBBNum = nextBlock->bbNum;
+ }
+ return nextBlock;
+}
+
+//------------------------------------------------------------------------
+// getNextBlock: Get the next block in order for allocation or resolution.
+//
+// Arguments:
+// None
+//
+// Return Value:
+// The next block.
+//
+// Notes:
+// This method does not actually change the current block - it is used simply
+// to determine which block will be next.
+
+BasicBlock* LinearScan::getNextBlock()
+{
+ assert(blockSequencingDone);
+ unsigned int nextBBSeqNum = curBBSeqNum + 1;
+ if (nextBBSeqNum < bbSeqCount)
+ {
+ return blockSequence[nextBBSeqNum];
+ }
+ return nullptr;
+}
+
+//------------------------------------------------------------------------
+// doLinearScan: The main method for register allocation.
+//
+// Arguments:
+// None
+//
+// Return Value:
+// None.
+//
+// Assumptions:
+// Lowering must have set the NodeInfo (gtLsraInfo) on each node to communicate
+// the register requirements.
+
+void LinearScan::doLinearScan()
+{
+#ifdef DEBUG
+ if (VERBOSE)
+ {
+ printf("*************** In doLinearScan\n");
+ printf("Trees before linear scan register allocator (LSRA)\n");
+ compiler->fgDispBasicBlocks(true);
+ }
+#endif // DEBUG
+
+ splitBBNumToTargetBBNumMap = nullptr;
+
+ // This is complicated by the fact that physical registers have refs associated
+ // with locations where they are killed (e.g. calls), but we don't want to
+ // count these as being touched.
+
+ compiler->codeGen->regSet.rsClearRegsModified();
+
+ // Figure out if we're going to use an RSP frame or an RBP frame. We need to do this
+ // before building the intervals and ref positions, because those objects will embed
+ // RBP in various register masks (like preferences) if RBP is allowed to be allocated.
+ setFrameType();
+
+ initMaxSpill();
+ buildIntervals();
+ DBEXEC(VERBOSE, TupleStyleDump(LSRA_DUMP_REFPOS));
+ compiler->EndPhase(PHASE_LINEAR_SCAN_BUILD);
+
+ DBEXEC(VERBOSE, lsraDumpIntervals("after buildIntervals"));
+
+ BlockSetOps::ClearD(compiler, bbVisitedSet);
+ initVarRegMaps();
+ allocateRegisters();
+ compiler->EndPhase(PHASE_LINEAR_SCAN_ALLOC);
+ resolveRegisters();
+ compiler->EndPhase(PHASE_LINEAR_SCAN_RESOLVE);
+
+ DBEXEC(VERBOSE, TupleStyleDump(LSRA_DUMP_POST));
+
+ compiler->compLSRADone = true;
+}
+
+//------------------------------------------------------------------------
+// recordVarLocationsAtStartOfBB: Update live-in LclVarDscs with the appropriate
+// register location at the start of a block, during codegen.
+//
+// Arguments:
+// bb - the block for which code is about to be generated.
+//
+// Return Value:
+// None.
+//
+// Assumptions:
+// CodeGen will take care of updating the reg masks and the current var liveness,
+// after calling this method.
+// This is because we need to kill off the dead registers before setting the newly live ones.
+
+void LinearScan::recordVarLocationsAtStartOfBB(BasicBlock* bb)
+{
+ JITDUMP("Recording Var Locations at start of BB%02u\n", bb->bbNum);
+ VarToRegMap map = getInVarToRegMap(bb->bbNum);
+ unsigned count = 0;
+
+ VARSET_ITER_INIT(compiler, iter, bb->bbLiveIn, varIndex);
+ while (iter.NextElem(compiler, &varIndex))
+ {
+ unsigned varNum = compiler->lvaTrackedToVarNum[varIndex];
+ LclVarDsc* varDsc = &(compiler->lvaTable[varNum]);
+ regNumber regNum = getVarReg(map, varNum);
+
+ regNumber oldRegNum = varDsc->lvRegNum;
+ regNumber newRegNum = regNum;
+
+ if (oldRegNum != newRegNum)
+ {
+ JITDUMP(" V%02u(%s->%s)", varNum, compiler->compRegVarName(oldRegNum),
+ compiler->compRegVarName(newRegNum));
+ varDsc->lvRegNum = newRegNum;
+ count++;
+ }
+ else if (newRegNum != REG_STK)
+ {
+ JITDUMP(" V%02u(%s)", varNum, compiler->compRegVarName(newRegNum));
+ count++;
+ }
+ }
+
+ if (count == 0)
+ {
+ JITDUMP(" <none>\n");
+ }
+
+ JITDUMP("\n");
+}
+
+void Interval::setLocalNumber(unsigned lclNum, LinearScan* linScan)
+{
+ linScan->localVarIntervals[lclNum] = this;
+
+ assert(linScan->getIntervalForLocalVar(lclNum) == this);
+ this->isLocalVar = true;
+ this->varNum = lclNum;
+}
+
+// identify the candidates which we are not going to enregister due to
+// being used in EH in a way we don't want to deal with
+// this logic cloned from fgInterBlockLocalVarLiveness
+void LinearScan::identifyCandidatesExceptionDataflow()
+{
+ VARSET_TP VARSET_INIT_NOCOPY(exceptVars, VarSetOps::MakeEmpty(compiler));
+ VARSET_TP VARSET_INIT_NOCOPY(filterVars, VarSetOps::MakeEmpty(compiler));
+ VARSET_TP VARSET_INIT_NOCOPY(finallyVars, VarSetOps::MakeEmpty(compiler));
+ BasicBlock* block;
+
+ foreach_block(compiler, block)
+ {
+ if (block->bbCatchTyp != BBCT_NONE)
+ {
+ // live on entry to handler
+ VarSetOps::UnionD(compiler, exceptVars, block->bbLiveIn);
+ }
+
+ if (block->bbJumpKind == BBJ_EHFILTERRET)
+ {
+ // live on exit from filter
+ VarSetOps::UnionD(compiler, filterVars, block->bbLiveOut);
+ }
+ else if (block->bbJumpKind == BBJ_EHFINALLYRET)
+ {
+ // live on exit from finally
+ VarSetOps::UnionD(compiler, finallyVars, block->bbLiveOut);
+ }
+#if FEATURE_EH_FUNCLETS
+ // Funclets are called and returned from, as such we can only count on the frame
+ // pointer being restored, and thus everything live in or live out must be on the
+ // stack
+ if (block->bbFlags & BBF_FUNCLET_BEG)
+ {
+ VarSetOps::UnionD(compiler, exceptVars, block->bbLiveIn);
+ }
+ if ((block->bbJumpKind == BBJ_EHFINALLYRET) || (block->bbJumpKind == BBJ_EHFILTERRET) ||
+ (block->bbJumpKind == BBJ_EHCATCHRET))
+ {
+ VarSetOps::UnionD(compiler, exceptVars, block->bbLiveOut);
+ }
+#endif // FEATURE_EH_FUNCLETS
+ }
+
+ // slam them all together (there was really no need to use more than 2 bitvectors here)
+ VarSetOps::UnionD(compiler, exceptVars, filterVars);
+ VarSetOps::UnionD(compiler, exceptVars, finallyVars);
+
+ /* Mark all pointer variables live on exit from a 'finally'
+ block as either volatile for non-GC ref types or as
+ 'explicitly initialized' (volatile and must-init) for GC-ref types */
+
+ VARSET_ITER_INIT(compiler, iter, exceptVars, varIndex);
+ while (iter.NextElem(compiler, &varIndex))
+ {
+ unsigned varNum = compiler->lvaTrackedToVarNum[varIndex];
+ LclVarDsc* varDsc = compiler->lvaTable + varNum;
+
+ compiler->lvaSetVarDoNotEnregister(varNum DEBUGARG(Compiler::DNER_LiveInOutOfHandler));
+
+ if (varTypeIsGC(varDsc))
+ {
+ if (VarSetOps::IsMember(compiler, finallyVars, varIndex) && !varDsc->lvIsParam)
+ {
+ varDsc->lvMustInit = true;
+ }
+ }
+ }
+}
+
+bool LinearScan::isRegCandidate(LclVarDsc* varDsc)
+{
+ // Check to see if opt settings permit register variables
+ if ((compiler->opts.compFlags & CLFLG_REGVAR) == 0)
+ {
+ return false;
+ }
+
+ // If we have JMP, reg args must be put on the stack
+
+ if (compiler->compJmpOpUsed && varDsc->lvIsRegArg)
+ {
+ return false;
+ }
+
+ if (!varDsc->lvTracked)
+ {
+ return false;
+ }
+
+ // Don't allocate registers for dependently promoted struct fields
+ if (compiler->lvaIsFieldOfDependentlyPromotedStruct(varDsc))
+ {
+ return false;
+ }
+ return true;
+}
+
+// Identify locals & compiler temps that are register candidates
+// TODO-Cleanup: This was cloned from Compiler::lvaSortByRefCount() in lclvars.cpp in order
+// to avoid perturbation, but should be merged.
+
+void LinearScan::identifyCandidates()
+{
+ if (compiler->lvaCount == 0)
+ {
+ return;
+ }
+
+ if (compiler->compHndBBtabCount > 0)
+ {
+ identifyCandidatesExceptionDataflow();
+ }
+
+ // initialize mapping from local to interval
+ localVarIntervals = new (compiler, CMK_LSRA) Interval*[compiler->lvaCount];
+
+ unsigned lclNum;
+ LclVarDsc* varDsc;
+
+ // While we build intervals for the candidate lclVars, we will determine the floating point
+ // lclVars, if any, to consider for callee-save register preferencing.
+ // We maintain two sets of FP vars - those that meet the first threshold of weighted ref Count,
+ // and those that meet the second.
+ // The first threshold is used for methods that are heuristically deemed either to have light
+ // fp usage, or other factors that encourage conservative use of callee-save registers, such
+ // as multiple exits (where there might be an early exit that woudl be excessively penalized by
+ // lots of prolog/epilog saves & restores).
+ // The second threshold is used where there are factors deemed to make it more likely that fp
+ // fp callee save registers will be needed, such as loops or many fp vars.
+ // We keep two sets of vars, since we collect some of the information to determine which set to
+ // use as we iterate over the vars.
+ // When we are generating AVX code on non-Unix (FEATURE_PARTIAL_SIMD_CALLEE_SAVE), we maintain an
+ // additional set of LargeVectorType vars, and there is a separate threshold defined for those.
+ // It is assumed that if we encounter these, that we should consider this a "high use" scenario,
+ // so we don't maintain two sets of these vars.
+ // This is defined as thresholdLargeVectorRefCntWtd, as we are likely to use the same mechanism
+ // for vectors on Arm64, though the actual value may differ.
+
+ VarSetOps::AssignNoCopy(compiler, fpCalleeSaveCandidateVars, VarSetOps::MakeEmpty(compiler));
+ VARSET_TP VARSET_INIT_NOCOPY(fpMaybeCandidateVars, VarSetOps::MakeEmpty(compiler));
+ unsigned int floatVarCount = 0;
+ unsigned int thresholdFPRefCntWtd = 4 * BB_UNITY_WEIGHT;
+ unsigned int maybeFPRefCntWtd = 2 * BB_UNITY_WEIGHT;
+#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
+ VarSetOps::AssignNoCopy(compiler, largeVectorVars, VarSetOps::MakeEmpty(compiler));
+ VarSetOps::AssignNoCopy(compiler, largeVectorCalleeSaveCandidateVars, VarSetOps::MakeEmpty(compiler));
+ unsigned int largeVectorVarCount = 0;
+ unsigned int thresholdLargeVectorRefCntWtd = 4 * BB_UNITY_WEIGHT;
+#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
+
+ for (lclNum = 0, varDsc = compiler->lvaTable; lclNum < compiler->lvaCount; lclNum++, varDsc++)
+ {
+ // Assign intervals to all the variables - this makes it easier to map
+ // them back
+ var_types intervalType = (var_types)varDsc->lvType;
+ Interval* newInt = newInterval(intervalType);
+
+ newInt->setLocalNumber(lclNum, this);
+ if (varDsc->lvIsStructField)
+ {
+ newInt->isStructField = true;
+ }
+
+ // Initialize all variables to REG_STK
+ varDsc->lvRegNum = REG_STK;
+#ifndef _TARGET_64BIT_
+ varDsc->lvOtherReg = REG_STK;
+#endif // _TARGET_64BIT_
+
+#if !defined(_TARGET_64BIT_)
+ if (intervalType == TYP_LONG)
+ {
+ // Long variables should not be register candidates.
+ // Lowering will have split any candidate lclVars into lo/hi vars.
+ varDsc->lvLRACandidate = 0;
+ continue;
+ }
+#endif // !defined(_TARGET_64BIT)
+
+ /* Track all locals that can be enregistered */
+
+ varDsc->lvLRACandidate = 1;
+
+ if (!isRegCandidate(varDsc))
+ {
+ varDsc->lvLRACandidate = 0;
+ continue;
+ }
+
+ // Start with lvRegister as false - set it true only if the variable gets
+ // the same register assignment throughout
+ varDsc->lvRegister = false;
+
+ /* If the ref count is zero */
+ if (varDsc->lvRefCnt == 0)
+ {
+ /* Zero ref count, make this untracked */
+ varDsc->lvRefCntWtd = 0;
+ varDsc->lvLRACandidate = 0;
+ }
+
+ // Variables that are address-exposed are never enregistered, or tracked.
+ // A struct may be promoted, and a struct that fits in a register may be fully enregistered.
+ // Pinned variables may not be tracked (a condition of the GCInfo representation)
+ // or enregistered, on x86 -- it is believed that we can enregister pinned (more properly, "pinning")
+ // references when using the general GC encoding.
+
+ if (varDsc->lvAddrExposed || !varTypeIsEnregisterableStruct(varDsc))
+ {
+ varDsc->lvLRACandidate = 0;
+#ifdef DEBUG
+ Compiler::DoNotEnregisterReason dner = Compiler::DNER_AddrExposed;
+ if (!varDsc->lvAddrExposed)
+ {
+ dner = Compiler::DNER_IsStruct;
+ }
+#endif // DEBUG
+ compiler->lvaSetVarDoNotEnregister(lclNum DEBUGARG(dner));
+ }
+ else if (varDsc->lvPinned)
+ {
+ varDsc->lvTracked = 0;
+#ifdef JIT32_GCENCODER
+ compiler->lvaSetVarDoNotEnregister(lclNum DEBUGARG(Compiler::DNER_PinningRef));
+#endif // JIT32_GCENCODER
+ }
+
+ // Are we not optimizing and we have exception handlers?
+ // if so mark all args and locals as volatile, so that they
+ // won't ever get enregistered.
+ //
+ if (compiler->opts.MinOpts() && compiler->compHndBBtabCount > 0)
+ {
+ compiler->lvaSetVarDoNotEnregister(lclNum DEBUGARG(Compiler::DNER_LiveInOutOfHandler));
+ varDsc->lvLRACandidate = 0;
+ continue;
+ }
+
+ if (varDsc->lvDoNotEnregister)
+ {
+ varDsc->lvLRACandidate = 0;
+ continue;
+ }
+
+ var_types type = genActualType(varDsc->TypeGet());
+
+ switch (type)
+ {
+#if CPU_HAS_FP_SUPPORT
+ case TYP_FLOAT:
+ case TYP_DOUBLE:
+ if (compiler->opts.compDbgCode)
+ {
+ varDsc->lvLRACandidate = 0;
+ }
+ break;
+#endif // CPU_HAS_FP_SUPPORT
+
+ case TYP_INT:
+ case TYP_LONG:
+ case TYP_REF:
+ case TYP_BYREF:
+ break;
+
+#ifdef FEATURE_SIMD
+ case TYP_SIMD12:
+ case TYP_SIMD16:
+ case TYP_SIMD32:
+ if (varDsc->lvPromoted)
+ {
+ varDsc->lvLRACandidate = 0;
+ }
+ break;
+ // TODO-1stClassStructs: Move TYP_SIMD8 up with the other SIMD types, after handling the param issue
+ // (passing & returning as TYP_LONG).
+ case TYP_SIMD8:
+#endif // FEATURE_SIMD
+
+ case TYP_STRUCT:
+ {
+ varDsc->lvLRACandidate = 0;
+ }
+ break;
+
+ case TYP_UNDEF:
+ case TYP_UNKNOWN:
+ noway_assert(!"lvType not set correctly");
+ varDsc->lvType = TYP_INT;
+
+ __fallthrough;
+
+ default:
+ varDsc->lvLRACandidate = 0;
+ }
+
+ // we will set this later when we have determined liveness
+ if (varDsc->lvLRACandidate)
+ {
+ varDsc->lvMustInit = false;
+ }
+
+ // We maintain two sets of FP vars - those that meet the first threshold of weighted ref Count,
+ // and those that meet the second (see the definitions of thresholdFPRefCntWtd and maybeFPRefCntWtd
+ // above).
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
+ // Additionally, when we are generating AVX on non-UNIX amd64, we keep a separate set of the LargeVectorType
+ // vars.
+ if (varDsc->lvType == LargeVectorType)
+ {
+ largeVectorVarCount++;
+ VarSetOps::AddElemD(compiler, largeVectorVars, varDsc->lvVarIndex);
+ unsigned refCntWtd = varDsc->lvRefCntWtd;
+ if (refCntWtd >= thresholdLargeVectorRefCntWtd)
+ {
+ VarSetOps::AddElemD(compiler, largeVectorCalleeSaveCandidateVars, varDsc->lvVarIndex);
+ }
+ }
+ else
+#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
+ if (regType(newInt->registerType) == FloatRegisterType)
+ {
+ floatVarCount++;
+ unsigned refCntWtd = varDsc->lvRefCntWtd;
+ if (varDsc->lvIsRegArg)
+ {
+ // Don't count the initial reference for register params. In those cases,
+ // using a callee-save causes an extra copy.
+ refCntWtd -= BB_UNITY_WEIGHT;
+ }
+ if (refCntWtd >= thresholdFPRefCntWtd)
+ {
+ VarSetOps::AddElemD(compiler, fpCalleeSaveCandidateVars, varDsc->lvVarIndex);
+ }
+ else if (refCntWtd >= maybeFPRefCntWtd)
+ {
+ VarSetOps::AddElemD(compiler, fpMaybeCandidateVars, varDsc->lvVarIndex);
+ }
+ }
+ }
+
+ // The factors we consider to determine which set of fp vars to use as candidates for callee save
+ // registers current include the number of fp vars, whether there are loops, and whether there are
+ // multiple exits. These have been selected somewhat empirically, but there is probably room for
+ // more tuning.
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef DEBUG
+ if (VERBOSE)
+ {
+ printf("\nFP callee save candidate vars: ");
+ if (!VarSetOps::IsEmpty(compiler, fpCalleeSaveCandidateVars))
+ {
+ dumpConvertedVarSet(compiler, fpCalleeSaveCandidateVars);
+ printf("\n");
+ }
+ else
+ {
+ printf("None\n\n");
+ }
+ }
+#endif
+
+ JITDUMP("floatVarCount = %d; hasLoops = %d, singleExit = %d\n", floatVarCount, compiler->fgHasLoops,
+ (compiler->fgReturnBlocks == nullptr || compiler->fgReturnBlocks->next == nullptr));
+
+ // Determine whether to use the 2nd, more aggressive, threshold for fp callee saves.
+ if (floatVarCount > 6 && compiler->fgHasLoops &&
+ (compiler->fgReturnBlocks == nullptr || compiler->fgReturnBlocks->next == nullptr))
+ {
+#ifdef DEBUG
+ if (VERBOSE)
+ {
+ printf("Adding additional fp callee save candidates: \n");
+ if (!VarSetOps::IsEmpty(compiler, fpMaybeCandidateVars))
+ {
+ dumpConvertedVarSet(compiler, fpMaybeCandidateVars);
+ printf("\n");
+ }
+ else
+ {
+ printf("None\n\n");
+ }
+ }
+#endif
+ VarSetOps::UnionD(compiler, fpCalleeSaveCandidateVars, fpMaybeCandidateVars);
+ }
+
+#ifdef _TARGET_ARM_
+#ifdef DEBUG
+ if (VERBOSE)
+ {
+ // Frame layout is only pre-computed for ARM
+ printf("\nlvaTable after IdentifyCandidates\n");
+ compiler->lvaTableDump();
+ }
+#endif // DEBUG
+#endif // _TARGET_ARM_
+}
+
+// TODO-Throughput: This mapping can surely be more efficiently done
+void LinearScan::initVarRegMaps()
+{
+ assert(compiler->lvaTrackedFixed); // We should have already set this to prevent us from adding any new tracked
+ // variables.
+
+ // The compiler memory allocator requires that the allocation be an
+ // even multiple of int-sized objects
+ unsigned int varCount = compiler->lvaTrackedCount;
+ regMapCount = (unsigned int)roundUp(varCount, sizeof(int));
+
+ // Not sure why blocks aren't numbered from zero, but they don't appear to be.
+ // So, if we want to index by bbNum we have to know the maximum value.
+ unsigned int bbCount = compiler->fgBBNumMax + 1;
+
+ inVarToRegMaps = new (compiler, CMK_LSRA) regNumber*[bbCount];
+ outVarToRegMaps = new (compiler, CMK_LSRA) regNumber*[bbCount];
+
+ if (varCount > 0)
+ {
+ // This VarToRegMap is used during the resolution of critical edges.
+ sharedCriticalVarToRegMap = new (compiler, CMK_LSRA) regNumber[regMapCount];
+
+ for (unsigned int i = 0; i < bbCount; i++)
+ {
+ regNumber* inVarToRegMap = new (compiler, CMK_LSRA) regNumber[regMapCount];
+ regNumber* outVarToRegMap = new (compiler, CMK_LSRA) regNumber[regMapCount];
+
+ for (unsigned int j = 0; j < regMapCount; j++)
+ {
+ inVarToRegMap[j] = REG_STK;
+ outVarToRegMap[j] = REG_STK;
+ }
+ inVarToRegMaps[i] = inVarToRegMap;
+ outVarToRegMaps[i] = outVarToRegMap;
+ }
+ }
+ else
+ {
+ sharedCriticalVarToRegMap = nullptr;
+ for (unsigned int i = 0; i < bbCount; i++)
+ {
+ inVarToRegMaps[i] = nullptr;
+ outVarToRegMaps[i] = nullptr;
+ }
+ }
+}
+
+void LinearScan::setInVarRegForBB(unsigned int bbNum, unsigned int varNum, regNumber reg)
+{
+ assert(reg < UCHAR_MAX && varNum < compiler->lvaCount);
+ inVarToRegMaps[bbNum][compiler->lvaTable[varNum].lvVarIndex] = reg;
+}
+
+void LinearScan::setOutVarRegForBB(unsigned int bbNum, unsigned int varNum, regNumber reg)
+{
+ assert(reg < UCHAR_MAX && varNum < compiler->lvaCount);
+ outVarToRegMaps[bbNum][compiler->lvaTable[varNum].lvVarIndex] = reg;
+}
+
+LinearScan::SplitEdgeInfo LinearScan::getSplitEdgeInfo(unsigned int bbNum)
+{
+ SplitEdgeInfo splitEdgeInfo;
+ assert(bbNum <= compiler->fgBBNumMax);
+ assert(bbNum > bbNumMaxBeforeResolution);
+ assert(splitBBNumToTargetBBNumMap != nullptr);
+ splitBBNumToTargetBBNumMap->Lookup(bbNum, &splitEdgeInfo);
+ assert(splitEdgeInfo.toBBNum <= bbNumMaxBeforeResolution);
+ assert(splitEdgeInfo.fromBBNum <= bbNumMaxBeforeResolution);
+ return splitEdgeInfo;
+}
+
+VarToRegMap LinearScan::getInVarToRegMap(unsigned int bbNum)
+{
+ assert(bbNum <= compiler->fgBBNumMax);
+ // For the blocks inserted to split critical edges, the inVarToRegMap is
+ // equal to the outVarToRegMap at the "from" block.
+ if (bbNum > bbNumMaxBeforeResolution)
+ {
+ SplitEdgeInfo splitEdgeInfo = getSplitEdgeInfo(bbNum);
+ unsigned fromBBNum = splitEdgeInfo.fromBBNum;
+ if (fromBBNum == 0)
+ {
+ assert(splitEdgeInfo.toBBNum != 0);
+ return inVarToRegMaps[splitEdgeInfo.toBBNum];
+ }
+ else
+ {
+ return outVarToRegMaps[fromBBNum];
+ }
+ }
+
+ return inVarToRegMaps[bbNum];
+}
+
+VarToRegMap LinearScan::getOutVarToRegMap(unsigned int bbNum)
+{
+ assert(bbNum <= compiler->fgBBNumMax);
+ // For the blocks inserted to split critical edges, the outVarToRegMap is
+ // equal to the inVarToRegMap at the target.
+ if (bbNum > bbNumMaxBeforeResolution)
+ {
+ // If this is an empty block, its in and out maps are both the same.
+ // We identify this case by setting fromBBNum or toBBNum to 0, and using only the other.
+ SplitEdgeInfo splitEdgeInfo = getSplitEdgeInfo(bbNum);
+ unsigned toBBNum = splitEdgeInfo.toBBNum;
+ if (toBBNum == 0)
+ {
+ assert(splitEdgeInfo.fromBBNum != 0);
+ return outVarToRegMaps[splitEdgeInfo.fromBBNum];
+ }
+ else
+ {
+ return inVarToRegMaps[toBBNum];
+ }
+ }
+ return outVarToRegMaps[bbNum];
+}
+
+regNumber LinearScan::getVarReg(VarToRegMap bbVarToRegMap, unsigned int varNum)
+{
+ assert(compiler->lvaTable[varNum].lvTracked);
+ return bbVarToRegMap[compiler->lvaTable[varNum].lvVarIndex];
+}
+
+// Initialize the incoming VarToRegMap to the given map values (generally a predecessor of
+// the block)
+VarToRegMap LinearScan::setInVarToRegMap(unsigned int bbNum, VarToRegMap srcVarToRegMap)
+{
+ VarToRegMap inVarToRegMap = inVarToRegMaps[bbNum];
+ memcpy(inVarToRegMap, srcVarToRegMap, (regMapCount * sizeof(regNumber)));
+ return inVarToRegMap;
+}
+
+// find the last node in the tree in execution order
+// TODO-Throughput: this is inefficient!
+GenTree* lastNodeInTree(GenTree* tree)
+{
+ // There is no gtprev on the top level tree node so
+ // apparently the way to walk a tree backwards is to walk
+ // it forward, find the last node, and walk back from there.
+
+ GenTree* last = nullptr;
+ if (tree->OperGet() == GT_STMT)
+ {
+ GenTree* statement = tree;
+
+ foreach_treenode_execution_order(tree, statement)
+ {
+ last = tree;
+ }
+ return last;
+ }
+ else
+ {
+ while (tree)
+ {
+ last = tree;
+ tree = tree->gtNext;
+ }
+ return last;
+ }
+}
+
+// given a tree node
+RefType refTypeForLocalRefNode(GenTree* node)
+{
+ assert(node->IsLocal());
+
+ // We don't support updates
+ assert((node->gtFlags & GTF_VAR_USEASG) == 0);
+
+ if (node->gtFlags & GTF_VAR_DEF)
+ {
+ return RefTypeDef;
+ }
+ else
+ {
+ return RefTypeUse;
+ }
+}
+
+// This function sets RefPosition last uses by walking the RefPositions, instead of walking the
+// tree nodes in execution order (as was done in a previous version).
+// This is because the execution order isn't strictly correct, specifically for
+// references to local variables that occur in arg lists.
+//
+// TODO-Throughput: This function should eventually be eliminated, as we should be able to rely on last uses
+// being set by dataflow analysis. It is necessary to do it this way only because the execution
+// order wasn't strictly correct.
+
+void LinearScan::setLastUses(BasicBlock* block)
+{
+#ifdef DEBUG
+ if (VERBOSE)
+ {
+ JITDUMP("\n\nCALCULATING LAST USES for block %u, liveout=", block->bbNum);
+ dumpConvertedVarSet(compiler, block->bbLiveOut);
+ JITDUMP("\n==============================\n");
+ }
+#endif // DEBUG
+
+ unsigned keepAliveVarNum = BAD_VAR_NUM;
+ if (compiler->lvaKeepAliveAndReportThis())
+ {
+ keepAliveVarNum = compiler->info.compThisArg;
+ assert(compiler->info.compIsStatic == false);
+ }
+
+ // find which uses are lastUses
+
+ // Work backwards starting with live out.
+ // 'temp' is updated to include any exposed use (including those in this
+ // block that we've already seen). When we encounter a use, if it's
+ // not in that set, then it's a last use.
+
+ VARSET_TP VARSET_INIT(compiler, temp, block->bbLiveOut);
+
+ auto currentRefPosition = refPositions.rbegin();
+
+ while (currentRefPosition->refType != RefTypeBB)
+ {
+ // We should never see ParamDefs or ZeroInits within a basic block.
+ assert(currentRefPosition->refType != RefTypeParamDef && currentRefPosition->refType != RefTypeZeroInit);
+ if (currentRefPosition->isIntervalRef() && currentRefPosition->getInterval()->isLocalVar)
+ {
+ unsigned varNum = currentRefPosition->getInterval()->varNum;
+ unsigned varIndex = currentRefPosition->getInterval()->getVarIndex(compiler);
+ // We should always have a tree node for a localVar, except for the "special" RefPositions.
+ GenTreePtr tree = currentRefPosition->treeNode;
+ assert(tree != nullptr || currentRefPosition->refType == RefTypeExpUse ||
+ currentRefPosition->refType == RefTypeDummyDef);
+ if (!VarSetOps::IsMember(compiler, temp, varIndex) && varNum != keepAliveVarNum)
+ {
+ // There was no exposed use, so this is a
+ // "last use" (and we mark it thus even if it's a def)
+
+ if (tree != nullptr)
+ {
+ tree->gtFlags |= GTF_VAR_DEATH;
+ }
+ LsraLocation loc = currentRefPosition->nodeLocation;
+#ifdef DEBUG
+ if (getLsraExtendLifeTimes())
+ {
+ JITDUMP("last use of V%02u @%u (not marked as last use for LSRA due to extendLifetimes stress "
+ "option)\n",
+ compiler->lvaTrackedToVarNum[varIndex], loc);
+ }
+ else
+#endif // DEBUG
+ {
+ JITDUMP("last use of V%02u @%u\n", compiler->lvaTrackedToVarNum[varIndex], loc);
+ currentRefPosition->lastUse = true;
+ }
+ VarSetOps::AddElemD(compiler, temp, varIndex);
+ }
+ else
+ {
+ currentRefPosition->lastUse = false;
+ if (tree != nullptr)
+ {
+ tree->gtFlags &= ~GTF_VAR_DEATH;
+ }
+ }
+
+ if (currentRefPosition->refType == RefTypeDef || currentRefPosition->refType == RefTypeDummyDef)
+ {
+ VarSetOps::RemoveElemD(compiler, temp, varIndex);
+ }
+ }
+ assert(currentRefPosition != refPositions.rend());
+ ++currentRefPosition;
+ }
+
+#ifdef DEBUG
+ VARSET_TP VARSET_INIT(compiler, temp2, block->bbLiveIn);
+ VarSetOps::DiffD(compiler, temp2, temp);
+ VarSetOps::DiffD(compiler, temp, block->bbLiveIn);
+ bool foundDiff = false;
+
+ {
+ VARSET_ITER_INIT(compiler, iter, temp, varIndex);
+ while (iter.NextElem(compiler, &varIndex))
+ {
+ unsigned varNum = compiler->lvaTrackedToVarNum[varIndex];
+ if (compiler->lvaTable[varNum].lvLRACandidate)
+ {
+ JITDUMP("BB%02u: V%02u is computed live, but not in LiveIn set.\n", block->bbNum, varNum);
+ foundDiff = true;
+ }
+ }
+ }
+
+ {
+ VARSET_ITER_INIT(compiler, iter, temp2, varIndex);
+ while (iter.NextElem(compiler, &varIndex))
+ {
+ unsigned varNum = compiler->lvaTrackedToVarNum[varIndex];
+ if (compiler->lvaTable[varNum].lvLRACandidate)
+ {
+ JITDUMP("BB%02u: V%02u is in LiveIn set, but not computed live.\n", block->bbNum, varNum);
+ foundDiff = true;
+ }
+ }
+ }
+
+ assert(!foundDiff);
+#endif // DEBUG
+}
+
+void LinearScan::addRefsForPhysRegMask(regMaskTP mask, LsraLocation currentLoc, RefType refType, bool isLastUse)
+{
+ for (regNumber reg = REG_FIRST; mask; reg = REG_NEXT(reg), mask >>= 1)
+ {
+ if (mask & 1)
+ {
+ // This assumes that these are all "special" RefTypes that
+ // don't need to be recorded on the tree (hence treeNode is nullptr)
+ RefPosition* pos = newRefPosition(reg, currentLoc, refType, nullptr,
+ genRegMask(reg)); // This MUST occupy the physical register (obviously)
+
+ if (isLastUse)
+ {
+ pos->lastUse = true;
+ }
+ }
+ }
+}
+
+//------------------------------------------------------------------------
+// getKillSetForNode: Return the registers killed by the given tree node.
+//
+// Arguments:
+// compiler - the compiler context to use
+// tree - the tree for which the kill set is needed.
+//
+// Return Value: a register mask of the registers killed
+//
+regMaskTP LinearScan::getKillSetForNode(GenTree* tree)
+{
+ regMaskTP killMask = RBM_NONE;
+ switch (tree->OperGet())
+ {
+#ifdef _TARGET_XARCH_
+ case GT_MUL:
+ // We use the 128-bit multiply when performing an overflow checking unsigned multiply
+ //
+ if (((tree->gtFlags & GTF_UNSIGNED) != 0) && tree->gtOverflowEx())
+ {
+ // Both RAX and RDX are killed by the operation
+ killMask = RBM_RAX | RBM_RDX;
+ }
+ break;
+
+ case GT_MULHI:
+ killMask = RBM_RAX | RBM_RDX;
+ break;
+
+ case GT_MOD:
+ case GT_DIV:
+ case GT_UMOD:
+ case GT_UDIV:
+ if (!varTypeIsFloating(tree->TypeGet()))
+ {
+ // RDX needs to be killed early, because it must not be used as a source register
+ // (unlike most cases, where the kill happens AFTER the uses). So for this kill,
+ // we add the RefPosition at the tree loc (where the uses are located) instead of the
+ // usual kill location which is the same as the defs at tree loc+1.
+ // Note that we don't have to add interference for the live vars, because that
+ // will be done below, and is not sensitive to the precise location.
+ LsraLocation currentLoc = tree->gtLsraInfo.loc;
+ assert(currentLoc != 0);
+ addRefsForPhysRegMask(RBM_RDX, currentLoc, RefTypeKill, true);
+ // Both RAX and RDX are killed by the operation
+ killMask = RBM_RAX | RBM_RDX;
+ }
+ break;
+#endif // _TARGET_XARCH_
+
+ case GT_STORE_OBJ:
+ if (tree->OperIsCopyBlkOp())
+ {
+ assert(tree->AsObj()->gtGcPtrCount != 0);
+ killMask = compiler->compHelperCallKillSet(CORINFO_HELP_ASSIGN_BYREF);
+ break;
+ }
+ __fallthrough;
+
+ case GT_STORE_BLK:
+ case GT_STORE_DYN_BLK:
+ {
+ GenTreeBlk* blkNode = tree->AsBlk();
+ bool isCopyBlk = varTypeIsStruct(blkNode->Data());
+ switch (blkNode->gtBlkOpKind)
+ {
+ case GenTreeBlk::BlkOpKindHelper:
+ if (isCopyBlk)
+ {
+ killMask = compiler->compHelperCallKillSet(CORINFO_HELP_MEMCPY);
+ }
+ else
+ {
+ killMask = compiler->compHelperCallKillSet(CORINFO_HELP_MEMSET);
+ }
+ break;
+
+#ifdef _TARGET_XARCH_
+ case GenTreeBlk::BlkOpKindRepInstr:
+ if (isCopyBlk)
+ {
+ // rep movs kills RCX, RDI and RSI
+ killMask = RBM_RCX | RBM_RDI | RBM_RSI;
+ }
+ else
+ {
+ // rep stos kills RCX and RDI.
+ // (Note that the Data() node, if not constant, will be assigned to
+ // RCX, but it's find that this kills it, as the value is not available
+ // after this node in any case.)
+ killMask = RBM_RDI | RBM_RCX;
+ }
+ break;
+#else
+ case GenTreeBlk::BlkOpKindRepInstr:
+#endif
+ case GenTreeBlk::BlkOpKindUnroll:
+ case GenTreeBlk::BlkOpKindInvalid:
+ // for these 'gtBlkOpKind' kinds, we leave 'killMask' = RBM_NONE
+ break;
+ }
+ }
+ break;
+
+ case GT_LSH:
+ case GT_RSH:
+ case GT_RSZ:
+ case GT_ROL:
+ case GT_ROR:
+ if (tree->gtLsraInfo.isHelperCallWithKills)
+ {
+ killMask = RBM_CALLEE_TRASH;
+ }
+ break;
+ case GT_RETURNTRAP:
+ killMask = compiler->compHelperCallKillSet(CORINFO_HELP_STOP_FOR_GC);
+ break;
+ case GT_CALL:
+#ifdef _TARGET_X86_
+ if (compiler->compFloatingPointUsed)
+ {
+ if (tree->TypeGet() == TYP_DOUBLE)
+ {
+ needDoubleTmpForFPCall = true;
+ }
+ else if (tree->TypeGet() == TYP_FLOAT)
+ {
+ needFloatTmpForFPCall = true;
+ }
+ }
+ if (tree->IsHelperCall())
+ {
+ GenTreeCall* call = tree->AsCall();
+ CorInfoHelpFunc helpFunc = compiler->eeGetHelperNum(call->gtCallMethHnd);
+ killMask = compiler->compHelperCallKillSet(helpFunc);
+ }
+ else
+#endif // _TARGET_X86_
+ {
+ // if there is no FP used, we can ignore the FP kills
+ if (compiler->compFloatingPointUsed)
+ {
+ killMask = RBM_CALLEE_TRASH;
+ }
+ else
+ {
+ killMask = RBM_INT_CALLEE_TRASH;
+ }
+ }
+ break;
+ case GT_STOREIND:
+ if (compiler->codeGen->gcInfo.gcIsWriteBarrierAsgNode(tree))
+ {
+ killMask = RBM_CALLEE_TRASH_NOGC;
+#if !NOGC_WRITE_BARRIERS && (defined(_TARGET_ARM_) || defined(_TARGET_AMD64_))
+ killMask |= (RBM_ARG_0 | RBM_ARG_1);
+#endif // !NOGC_WRITE_BARRIERS && (defined(_TARGET_ARM_) || defined(_TARGET_AMD64_))
+ }
+ break;
+
+#if defined(PROFILING_SUPPORTED) && defined(_TARGET_AMD64_)
+ // If this method requires profiler ELT hook then mark these nodes as killing
+ // callee trash registers (excluding RAX and XMM0). The reason for this is that
+ // profiler callback would trash these registers. See vm\amd64\asmhelpers.asm for
+ // more details.
+ case GT_RETURN:
+ if (compiler->compIsProfilerHookNeeded())
+ {
+ killMask = compiler->compHelperCallKillSet(CORINFO_HELP_PROF_FCN_LEAVE);
+ }
+ break;
+
+ case GT_PROF_HOOK:
+ if (compiler->compIsProfilerHookNeeded())
+ {
+ killMask = compiler->compHelperCallKillSet(CORINFO_HELP_PROF_FCN_TAILCALL);
+ ;
+ }
+ break;
+#endif // PROFILING_SUPPORTED && _TARGET_AMD64_
+
+ default:
+ // for all other 'tree->OperGet()' kinds, leave 'killMask' = RBM_NONE
+ break;
+ }
+ return killMask;
+}
+
+//------------------------------------------------------------------------
+// buildKillPositionsForNode:
+// Given some tree node add refpositions for all the registers this node kills
+//
+// Arguments:
+// tree - the tree for which kill positions should be generated
+// currentLoc - the location at which the kills should be added
+//
+// Return Value:
+// true - kills were inserted
+// false - no kills were inserted
+//
+// Notes:
+// The return value is needed because if we have any kills, we need to make sure that
+// all defs are located AFTER the kills. On the other hand, if there aren't kills,
+// the multiple defs for a regPair are in different locations.
+// If we generate any kills, we will mark all currentLiveVars as being preferenced
+// to avoid the killed registers. This is somewhat conservative.
+
+bool LinearScan::buildKillPositionsForNode(GenTree* tree, LsraLocation currentLoc)
+{
+ regMaskTP killMask = getKillSetForNode(tree);
+ bool isCallKill = ((killMask == RBM_INT_CALLEE_TRASH) || (killMask == RBM_CALLEE_TRASH));
+ if (killMask != RBM_NONE)
+ {
+ // The killMask identifies a set of registers that will be used during codegen.
+ // Mark these as modified here, so when we do final frame layout, we'll know about
+ // all these registers. This is especially important if killMask contains
+ // callee-saved registers, which affect the frame size since we need to save/restore them.
+ // In the case where we have a copyBlk with GC pointers, can need to call the
+ // CORINFO_HELP_ASSIGN_BYREF helper, which kills callee-saved RSI and RDI, if
+ // LSRA doesn't assign RSI/RDI, they wouldn't get marked as modified until codegen,
+ // which is too late.
+ compiler->codeGen->regSet.rsSetRegsModified(killMask DEBUGARG(dumpTerse));
+
+ addRefsForPhysRegMask(killMask, currentLoc, RefTypeKill, true);
+
+ // TODO-CQ: It appears to be valuable for both fp and int registers to avoid killing the callee
+ // save regs on infrequently exectued paths. However, it results in a large number of asmDiffs,
+ // many of which appear to be regressions (because there is more spill on the infrequently path),
+ // but are not really because the frequent path becomes smaller. Validating these diffs will need
+ // to be done before making this change.
+ // if (!blockSequence[curBBSeqNum]->isRunRarely())
+ {
+
+ VARSET_ITER_INIT(compiler, iter, currentLiveVars, varIndex);
+ while (iter.NextElem(compiler, &varIndex))
+ {
+ unsigned varNum = compiler->lvaTrackedToVarNum[varIndex];
+ LclVarDsc* varDsc = compiler->lvaTable + varNum;
+#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
+ if (varDsc->lvType == LargeVectorType)
+ {
+ if (!VarSetOps::IsMember(compiler, largeVectorCalleeSaveCandidateVars, varIndex))
+ {
+ continue;
+ }
+ }
+ else
+#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
+ if (varTypeIsFloating(varDsc) &&
+ !VarSetOps::IsMember(compiler, fpCalleeSaveCandidateVars, varIndex))
+ {
+ continue;
+ }
+ Interval* interval = getIntervalForLocalVar(varNum);
+ if (isCallKill)
+ {
+ interval->preferCalleeSave = true;
+ }
+ regMaskTP newPreferences = allRegs(interval->registerType) & (~killMask);
+
+ if (newPreferences != RBM_NONE)
+ {
+ interval->updateRegisterPreferences(newPreferences);
+ }
+ else
+ {
+ // If there are no callee-saved registers, the call could kill all the registers.
+ // This is a valid state, so in that case assert should not trigger. The RA will spill in order to
+ // free a register later.
+ assert(compiler->opts.compDbgEnC || (calleeSaveRegs(varDsc->lvType)) == RBM_NONE);
+ }
+ }
+ }
+
+ if (tree->IsCall() && (tree->gtFlags & GTF_CALL_UNMANAGED) != 0)
+ {
+ RefPosition* pos = newRefPosition((Interval*)nullptr, currentLoc, RefTypeKillGCRefs, tree,
+ (allRegs(TYP_REF) & ~RBM_ARG_REGS));
+ }
+ return true;
+ }
+
+ return false;
+}
+
+RefPosition* LinearScan::defineNewInternalTemp(GenTree* tree,
+ RegisterType regType,
+ LsraLocation currentLoc,
+ regMaskTP regMask)
+{
+ Interval* current = newInterval(regType);
+ current->isInternal = true;
+ return newRefPosition(current, currentLoc, RefTypeDef, tree, regMask);
+}
+
+int LinearScan::buildInternalRegisterDefsForNode(GenTree* tree,
+ LsraLocation currentLoc,
+ RefPosition* temps[]) // populates
+{
+ int count;
+ int internalIntCount = tree->gtLsraInfo.internalIntCount;
+ regMaskTP internalCands = tree->gtLsraInfo.getInternalCandidates(this);
+
+ // If the number of internal integer registers required is the same as the number of candidate integer registers in
+ // the candidate set, then they must be handled as fixed registers.
+ // (E.g. for the integer registers that floating point arguments must be copied into for a varargs call.)
+ bool fixedRegs = false;
+ regMaskTP internalIntCandidates = (internalCands & allRegs(TYP_INT));
+ if (((int)genCountBits(internalIntCandidates)) == internalIntCount)
+ {
+ fixedRegs = true;
+ }
+
+ for (count = 0; count < internalIntCount; count++)
+ {
+ regMaskTP internalIntCands = (internalCands & allRegs(TYP_INT));
+ if (fixedRegs)
+ {
+ internalIntCands = genFindLowestBit(internalIntCands);
+ internalCands &= ~internalIntCands;
+ }
+ temps[count] = defineNewInternalTemp(tree, IntRegisterType, currentLoc, internalIntCands);
+ }
+
+ int internalFloatCount = tree->gtLsraInfo.internalFloatCount;
+ for (int i = 0; i < internalFloatCount; i++)
+ {
+ regMaskTP internalFPCands = (internalCands & internalFloatRegCandidates());
+ temps[count++] = defineNewInternalTemp(tree, FloatRegisterType, currentLoc, internalFPCands);
+ }
+
+ noway_assert(count < MaxInternalRegisters);
+ assert(count == (internalIntCount + internalFloatCount));
+ return count;
+}
+
+void LinearScan::buildInternalRegisterUsesForNode(GenTree* tree,
+ LsraLocation currentLoc,
+ RefPosition* defs[],
+ int total)
+{
+ assert(total < MaxInternalRegisters);
+
+ // defs[] has been populated by buildInternalRegisterDefsForNode
+ // now just add uses to the defs previously added.
+ for (int i = 0; i < total; i++)
+ {
+ RefPosition* prevRefPosition = defs[i];
+ assert(prevRefPosition != nullptr);
+ regMaskTP mask = prevRefPosition->registerAssignment;
+ if (prevRefPosition->isPhysRegRef)
+ {
+ newRefPosition(defs[i]->getReg()->regNum, currentLoc, RefTypeUse, tree, mask);
+ }
+ else
+ {
+ RefPosition* newest = newRefPosition(defs[i]->getInterval(), currentLoc, RefTypeUse, tree, mask);
+ newest->lastUse = true;
+ }
+ }
+}
+
+regMaskTP LinearScan::getUseCandidates(GenTree* useNode)
+{
+ TreeNodeInfo info = useNode->gtLsraInfo;
+ return info.getSrcCandidates(this);
+}
+
+regMaskTP LinearScan::getDefCandidates(GenTree* tree)
+{
+ TreeNodeInfo info = tree->gtLsraInfo;
+ return info.getDstCandidates(this);
+}
+
+RegisterType LinearScan::getDefType(GenTree* tree)
+{
+ return tree->TypeGet();
+}
+
+regMaskTP fixedCandidateMask(var_types type, regMaskTP candidates)
+{
+ if (genMaxOneBit(candidates))
+ {
+ return candidates;
+ }
+ return RBM_NONE;
+}
+
+//------------------------------------------------------------------------
+// LocationInfoListNode: used to store a single `LocationInfo` value for a
+// node during `buildIntervals`.
+//
+// This is the node type for `LocationInfoList` below.
+//
+class LocationInfoListNode final : public LocationInfo
+{
+ friend class LocationInfoList;
+ friend class LocationInfoListNodePool;
+
+ LocationInfoListNode* m_next; // The next node in the list
+
+public:
+ LocationInfoListNode(LsraLocation l, Interval* i, GenTree* t, unsigned regIdx = 0) : LocationInfo(l, i, t, regIdx)
+ {
+ }
+
+ //------------------------------------------------------------------------
+ // LocationInfoListNode::Next: Returns the next node in the list.
+ LocationInfoListNode* Next() const
+ {
+ return m_next;
+ }
+};
+
+//------------------------------------------------------------------------
+// LocationInfoList: used to store a list of `LocationInfo` values for a
+// node during `buildIntervals`.
+//
+// Given an IR node that either directly defines N registers or that is a
+// contained node with uses that define a total of N registers, that node
+// will map to N `LocationInfo` values. These values are stored as a
+// linked list of `LocationInfoListNode` values.
+//
+class LocationInfoList final
+{
+ friend class LocationInfoListNodePool;
+
+ LocationInfoListNode* m_head; // The head of the list
+ LocationInfoListNode* m_tail; // The tail of the list
+
+public:
+ LocationInfoList() : m_head(nullptr), m_tail(nullptr)
+ {
+ }
+
+ LocationInfoList(LocationInfoListNode* node) : m_head(node), m_tail(node)
+ {
+ assert(m_head->m_next == nullptr);
+ }
+
+ //------------------------------------------------------------------------
+ // LocationInfoList::IsEmpty: Returns true if the list is empty.
+ //
+ bool IsEmpty() const
+ {
+ return m_head == nullptr;
+ }
+
+ //------------------------------------------------------------------------
+ // LocationInfoList::Begin: Returns the first node in the list.
+ //
+ LocationInfoListNode* Begin() const
+ {
+ return m_head;
+ }
+
+ //------------------------------------------------------------------------
+ // LocationInfoList::End: Returns the position after the last node in the
+ // list. The returned value is suitable for use as
+ // a sentinel for iteration.
+ //
+ LocationInfoListNode* End() const
+ {
+ return nullptr;
+ }
+
+ //------------------------------------------------------------------------
+ // LocationInfoList::Append: Appends a node to the list.
+ //
+ // Arguments:
+ // node - The node to append. Must not be part of an existing list.
+ //
+ void Append(LocationInfoListNode* node)
+ {
+ assert(node->m_next == nullptr);
+
+ if (m_tail == nullptr)
+ {
+ assert(m_head == nullptr);
+ m_head = node;
+ }
+ else
+ {
+ m_tail->m_next = node;
+ }
+
+ m_tail = node;
+ }
+
+ //------------------------------------------------------------------------
+ // LocationInfoList::Append: Appends another list to this list.
+ //
+ // Arguments:
+ // other - The list to append.
+ //
+ void Append(LocationInfoList other)
+ {
+ if (m_tail == nullptr)
+ {
+ assert(m_head == nullptr);
+ m_head = other.m_head;
+ }
+ else
+ {
+ m_tail->m_next = other.m_head;
+ }
+
+ m_tail = other.m_tail;
+ }
+};
+
+//------------------------------------------------------------------------
+// LocationInfoListNodePool: manages a pool of `LocationInfoListNode`
+// values to decrease overall memory usage
+// during `buildIntervals`.
+//
+// `buildIntervals` involves creating a list of location info values per
+// node that either directly produces a set of registers or that is a
+// contained node with register-producing sources. However, these lists
+// are short-lived: they are destroyed once the use of the corresponding
+// node is processed. As such, there is typically only a small number of
+// `LocationInfoListNode` values in use at any given time. Pooling these
+// values avoids otherwise frequent allocations.
+class LocationInfoListNodePool final
+{
+ LocationInfoListNode* m_freeList;
+ Compiler* m_compiler;
+
+public:
+ //------------------------------------------------------------------------
+ // LocationInfoListNodePool::LocationInfoListNodePool:
+ // Creates a pool of `LocationInfoListNode` values.
+ //
+ // Arguments:
+ // compiler - The compiler context.
+ // preallocate - The number of nodes to preallocate.
+ //
+ LocationInfoListNodePool(Compiler* compiler, unsigned preallocate = 0) : m_compiler(compiler)
+ {
+ if (preallocate > 0)
+ {
+ size_t preallocateSize = sizeof(LocationInfoListNode) * preallocate;
+ auto* preallocatedNodes = reinterpret_cast<LocationInfoListNode*>(compiler->compGetMem(preallocateSize));
+
+ LocationInfoListNode* head = preallocatedNodes;
+ head->m_next = nullptr;
+
+ for (unsigned i = 1; i < preallocate; i++)
+ {
+ LocationInfoListNode* node = &preallocatedNodes[i];
+ node->m_next = head;
+ head = node;
+ }
+
+ m_freeList = head;
+ }
+ }
+
+ //------------------------------------------------------------------------
+ // LocationInfoListNodePool::GetNode: Fetches an unused node from the
+ // pool.
+ //
+ // Arguments:
+ // l - - The `LsraLocation` for the `LocationInfo` value.
+ // i - The interval for the `LocationInfo` value.
+ // t - The IR node for the `LocationInfo` value
+ // regIdx - The register index for the `LocationInfo` value.
+ //
+ // Returns:
+ // A pooled or newly-allocated `LocationInfoListNode`, depending on the
+ // contents of the pool.
+ LocationInfoListNode* GetNode(LsraLocation l, Interval* i, GenTree* t, unsigned regIdx = 0)
+ {
+ LocationInfoListNode* head = m_freeList;
+ if (head == nullptr)
+ {
+ head = reinterpret_cast<LocationInfoListNode*>(m_compiler->compGetMem(sizeof(LocationInfoListNode)));
+ }
+ else
+ {
+ m_freeList = head->m_next;
+ }
+
+ head->loc = l;
+ head->interval = i;
+ head->treeNode = t;
+ head->multiRegIdx = regIdx;
+ head->m_next = nullptr;
+
+ return head;
+ }
+
+ //------------------------------------------------------------------------
+ // LocationInfoListNodePool::ReturnNodes: Returns a list of nodes to the
+ // pool.
+ //
+ // Arguments:
+ // list - The list to return.
+ //
+ void ReturnNodes(LocationInfoList& list)
+ {
+ assert(list.m_head != nullptr);
+ assert(list.m_tail != nullptr);
+
+ LocationInfoListNode* head = m_freeList;
+ list.m_tail->m_next = head;
+ m_freeList = list.m_head;
+ }
+};
+
+#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
+VARSET_VALRET_TP
+LinearScan::buildUpperVectorSaveRefPositions(GenTree* tree, LsraLocation currentLoc)
+{
+ VARSET_TP VARSET_INIT_NOCOPY(liveLargeVectors, VarSetOps::MakeEmpty(compiler));
+ regMaskTP fpCalleeKillSet = RBM_NONE;
+ if (!VarSetOps::IsEmpty(compiler, largeVectorVars))
+ {
+ // We actually need to find any calls that kill the upper-half of the callee-save vector registers.
+ // But we will use as a proxy any node that kills floating point registers.
+ // (Note that some calls are masquerading as other nodes at this point so we can't just check for calls.)
+ fpCalleeKillSet = getKillSetForNode(tree);
+ if ((fpCalleeKillSet & RBM_FLT_CALLEE_TRASH) != RBM_NONE)
+ {
+ VarSetOps::AssignNoCopy(compiler, liveLargeVectors,
+ VarSetOps::Intersection(compiler, currentLiveVars, largeVectorVars));
+ VARSET_ITER_INIT(compiler, iter, liveLargeVectors, varIndex);
+ while (iter.NextElem(compiler, &varIndex))
+ {
+ unsigned varNum = compiler->lvaTrackedToVarNum[varIndex];
+ Interval* varInterval = getIntervalForLocalVar(varNum);
+ Interval* tempInterval = newInterval(LargeVectorType);
+ tempInterval->isInternal = true;
+ RefPosition* pos =
+ newRefPosition(tempInterval, currentLoc, RefTypeUpperVectorSaveDef, tree, RBM_FLT_CALLEE_SAVED);
+ // We are going to save the existing relatedInterval of varInterval on tempInterval, so that we can set
+ // the tempInterval as the relatedInterval of varInterval, so that we can build the corresponding
+ // RefTypeUpperVectorSaveUse RefPosition. We will then restore the relatedInterval onto varInterval,
+ // and set varInterval as the relatedInterval of tempInterval.
+ tempInterval->relatedInterval = varInterval->relatedInterval;
+ varInterval->relatedInterval = tempInterval;
+ }
+ }
+ }
+ return liveLargeVectors;
+}
+
+void LinearScan::buildUpperVectorRestoreRefPositions(GenTree* tree,
+ LsraLocation currentLoc,
+ VARSET_VALARG_TP liveLargeVectors)
+{
+ if (!VarSetOps::IsEmpty(compiler, liveLargeVectors))
+ {
+ VARSET_ITER_INIT(compiler, iter, liveLargeVectors, varIndex);
+ while (iter.NextElem(compiler, &varIndex))
+ {
+ unsigned varNum = compiler->lvaTrackedToVarNum[varIndex];
+ Interval* varInterval = getIntervalForLocalVar(varNum);
+ Interval* tempInterval = varInterval->relatedInterval;
+ assert(tempInterval->isInternal == true);
+ RefPosition* pos =
+ newRefPosition(tempInterval, currentLoc, RefTypeUpperVectorSaveUse, tree, RBM_FLT_CALLEE_SAVED);
+ // Restore the relatedInterval onto varInterval, and set varInterval as the relatedInterval
+ // of tempInterval.
+ varInterval->relatedInterval = tempInterval->relatedInterval;
+ tempInterval->relatedInterval = varInterval;
+ }
+ }
+}
+#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
+
+#ifdef DEBUG
+//------------------------------------------------------------------------
+// ComputeOperandDstCount: computes the number of registers defined by a
+// node.
+//
+// For most nodes, this is simple:
+// - Nodes that do not produce values (e.g. stores and other void-typed
+// nodes) and nodes that immediately use the registers they define
+// produce no registers
+// - Nodes that are marked as defining N registers define N registers.
+//
+// For contained nodes, however, things are more complicated: for purposes
+// of bookkeeping, a contained node is treated as producing the transitive
+// closure of the registers produced by its sources.
+//
+// Arguments:
+// operand - The operand for which to compute a register count.
+//
+// Returns:
+// The number of registers defined by `operand`.
+//
+static int ComputeOperandDstCount(GenTree* operand)
+{
+ TreeNodeInfo& operandInfo = operand->gtLsraInfo;
+
+ if (operandInfo.isLocalDefUse)
+ {
+ // Operands that define an unused value do not produce any registers.
+ return 0;
+ }
+ else if (operandInfo.dstCount != 0)
+ {
+ // Operands that have a specified number of destination registers consume all of their operands
+ // and therefore produce exactly that number of registers.
+ return operandInfo.dstCount;
+ }
+ else if (operandInfo.srcCount != 0)
+ {
+ // If an operand has no destination registers but does have source registers, it must be a store
+ // or a compare.
+ assert(operand->OperIsStore() || operand->OperIsBlkOp() || operand->OperIsPutArgStk() ||
+ operand->OperIsCompare());
+ return 0;
+ }
+ else if (!operand->OperIsAggregate() && (operand->OperIsStore() || operand->TypeGet() == TYP_VOID))
+ {
+ // Stores and void-typed operands may be encountered when processing call nodes, which contain
+ // pointers to argument setup stores.
+ return 0;
+ }
+ else
+ {
+ // If an aggregate or non-void-typed operand is not an unsued value and does not have source registers,
+ // that argument is contained within its parent and produces `sum(operand_dst_count)` registers.
+ int dstCount = 0;
+ for (GenTree* op : operand->Operands())
+ {
+ dstCount += ComputeOperandDstCount(op);
+ }
+
+ return dstCount;
+ }
+}
+
+//------------------------------------------------------------------------
+// ComputeAvailableSrcCount: computes the number of registers available as
+// sources for a node.
+//
+// This is simply the sum of the number of registers prduced by each
+// operand to the node.
+//
+// Arguments:
+// node - The node for which to compute a source count.
+//
+// Retures:
+// The number of registers available as sources for `node`.
+//
+static int ComputeAvailableSrcCount(GenTree* node)
+{
+ int numSources = 0;
+ for (GenTree* operand : node->Operands())
+ {
+ numSources += ComputeOperandDstCount(operand);
+ }
+
+ return numSources;
+}
+#endif
+
+void LinearScan::buildRefPositionsForNode(GenTree* tree,
+ BasicBlock* block,
+ LocationInfoListNodePool& listNodePool,
+ HashTableBase<GenTree*, LocationInfoList>& operandToLocationInfoMap,
+ LsraLocation currentLoc)
+{
+#ifdef _TARGET_ARM_
+ assert(!isRegPairType(tree->TypeGet()));
+#endif // _TARGET_ARM_
+
+ // The LIR traversal doesn't visit non-aggregate GT_LIST or GT_ARGPLACE nodes
+ assert(tree->OperGet() != GT_ARGPLACE);
+ assert((tree->OperGet() != GT_LIST) || tree->AsArgList()->IsAggregate());
+
+ // These nodes are eliminated by the Rationalizer.
+ if (tree->OperGet() == GT_CLS_VAR)
+ {
+ JITDUMP("Unexpected node %s in LSRA.\n", GenTree::NodeName(tree->OperGet()));
+ assert(!"Unexpected node in LSRA.");
+ }
+
+ // The set of internal temporary registers used by this node are stored in the
+ // gtRsvdRegs register mask. Clear it out.
+ tree->gtRsvdRegs = RBM_NONE;
+
+#ifdef DEBUG
+ if (VERBOSE)
+ {
+ JITDUMP("at start of tree, map contains: { ");
+ bool first = true;
+ for (auto kvp : operandToLocationInfoMap)
+ {
+ GenTree* node = kvp.Key();
+ LocationInfoList defList = kvp.Value();
+
+ JITDUMP("%sN%03u. %s -> (", first ? "" : "; ", node->gtSeqNum, GenTree::NodeName(node->OperGet()));
+ for (LocationInfoListNode *def = defList.Begin(), *end = defList.End(); def != end; def = def->Next())
+ {
+ JITDUMP("%s%d.N%03u", def == defList.Begin() ? "" : ", ", def->loc, def->treeNode->gtSeqNum);
+ }
+ JITDUMP(")");
+
+ first = false;
+ }
+ JITDUMP(" }\n");
+ }
+#endif // DEBUG
+
+ TreeNodeInfo info = tree->gtLsraInfo;
+ assert(info.IsValid(this));
+ int consume = info.srcCount;
+ int produce = info.dstCount;
+
+ assert(((consume == 0) && (produce == 0)) || (ComputeAvailableSrcCount(tree) == consume));
+
+ if (isCandidateLocalRef(tree) && !tree->OperIsLocalStore())
+ {
+ assert(consume == 0);
+
+ // We handle tracked variables differently from non-tracked ones. If it is tracked,
+ // we simply add a use or def of the tracked variable. Otherwise, for a use we need
+ // to actually add the appropriate references for loading or storing the variable.
+ //
+ // It won't actually get used or defined until the appropriate ancestor tree node
+ // is processed, unless this is marked "isLocalDefUse" because it is a stack-based argument
+ // to a call
+
+ Interval* interval = getIntervalForLocalVar(tree->gtLclVarCommon.gtLclNum);
+ regMaskTP candidates = getUseCandidates(tree);
+ regMaskTP fixedAssignment = fixedCandidateMask(tree->TypeGet(), candidates);
+
+ // We have only approximate last-use information at this point. This is because the
+ // execution order doesn't actually reflect the true order in which the localVars
+ // are referenced - but the order of the RefPositions will, so we recompute it after
+ // RefPositions are built.
+ // Use the old value for setting currentLiveVars - note that we do this with the
+ // not-quite-correct setting of lastUse. However, this is OK because
+ // 1) this is only for preferencing, which doesn't require strict correctness, and
+ // 2) the cases where these out-of-order uses occur should not overlap a kill.
+ // TODO-Throughput: clean this up once we have the execution order correct. At that point
+ // we can update currentLiveVars at the same place that we create the RefPosition.
+ if ((tree->gtFlags & GTF_VAR_DEATH) != 0)
+ {
+ VarSetOps::RemoveElemD(compiler, currentLiveVars,
+ compiler->lvaTable[tree->gtLclVarCommon.gtLclNum].lvVarIndex);
+ }
+
+ JITDUMP("t%u (i:%u)\n", currentLoc, interval->intervalIndex);
+
+ if (!info.isLocalDefUse)
+ {
+ if (produce != 0)
+ {
+ LocationInfoList list(listNodePool.GetNode(currentLoc, interval, tree));
+ bool added = operandToLocationInfoMap.AddOrUpdate(tree, list);
+ assert(added);
+
+ tree->gtLsraInfo.definesAnyRegisters = true;
+ }
+
+ return;
+ }
+ else
+ {
+ JITDUMP(" Not added to map\n");
+ regMaskTP candidates = getUseCandidates(tree);
+
+ if (fixedAssignment != RBM_NONE)
+ {
+ candidates = fixedAssignment;
+ }
+ RefPosition* pos = newRefPosition(interval, currentLoc, RefTypeUse, tree, candidates);
+ pos->isLocalDefUse = true;
+ bool isLastUse = ((tree->gtFlags & GTF_VAR_DEATH) != 0);
+ pos->lastUse = isLastUse;
+ pos->setAllocateIfProfitable(tree->IsRegOptional());
+ DBEXEC(VERBOSE, pos->dump());
+ return;
+ }
+ }
+
+#ifdef DEBUG
+ if (VERBOSE)
+ {
+ lsraDispNode(tree, LSRA_DUMP_REFPOS, (produce != 0));
+ JITDUMP("\n");
+ JITDUMP(" consume=%d produce=%d\n", consume, produce);
+ }
+#endif // DEBUG
+
+ // Handle the case of local variable assignment
+ Interval* varDefInterval = nullptr;
+ RefType defRefType = RefTypeDef;
+
+ GenTree* defNode = tree;
+
+ // noAdd means the node creates a def but for purposes of map
+ // management do not add it because data is not flowing up the
+ // tree but over (as in ASG nodes)
+
+ bool noAdd = info.isLocalDefUse;
+ RefPosition* prevPos = nullptr;
+
+ bool isSpecialPutArg = false;
+
+ assert(!tree->OperIsAssignment());
+ if (tree->OperIsLocalStore())
+ {
+ if (isCandidateLocalRef(tree))
+ {
+ // We always push the tracked lclVar intervals
+ varDefInterval = getIntervalForLocalVar(tree->gtLclVarCommon.gtLclNum);
+ defRefType = refTypeForLocalRefNode(tree);
+ defNode = tree;
+ if (produce == 0)
+ {
+ produce = 1;
+ noAdd = true;
+ }
+
+ assert(consume <= MAX_RET_REG_COUNT);
+ if (consume == 1)
+ {
+ // Get the location info for the register defined by the first operand.
+ LocationInfoList operandDefs;
+ bool found = operandToLocationInfoMap.TryGetValue(*(tree->OperandsBegin()), &operandDefs);
+ assert(found);
+
+ // Since we only expect to consume one register, we should only have a single register to
+ // consume.
+ assert(operandDefs.Begin()->Next() == operandDefs.End());
+
+ LocationInfo& operandInfo = *static_cast<LocationInfo*>(operandDefs.Begin());
+
+ Interval* srcInterval = operandInfo.interval;
+ if (srcInterval->relatedInterval == nullptr)
+ {
+ // Preference the source to the dest, unless this is a non-last-use localVar.
+ // Note that the last-use info is not correct, but it is a better approximation than preferencing
+ // the source to the dest, if the source's lifetime extends beyond the dest.
+ if (!srcInterval->isLocalVar || (operandInfo.treeNode->gtFlags & GTF_VAR_DEATH) != 0)
+ {
+ srcInterval->assignRelatedInterval(varDefInterval);
+ }
+ }
+ else if (!srcInterval->isLocalVar)
+ {
+ // Preference the source to dest, if src is not a local var.
+ srcInterval->assignRelatedInterval(varDefInterval);
+ }
+
+ // We can have a case where the source of the store has a different register type,
+ // e.g. when the store is of a return value temp, and op1 is a Vector2
+ // (TYP_SIMD8). We will need to set the
+ // src candidates accordingly on op1 so that LSRA will generate a copy.
+ // We could do this during Lowering, but at that point we don't know whether
+ // this lclVar will be a register candidate, and if not, we would prefer to leave
+ // the type alone.
+ if (regType(tree->gtGetOp1()->TypeGet()) != regType(tree->TypeGet()))
+ {
+ tree->gtGetOp1()->gtLsraInfo.setSrcCandidates(this, allRegs(tree->TypeGet()));
+ }
+ }
+
+ if ((tree->gtFlags & GTF_VAR_DEATH) == 0)
+ {
+ VarSetOps::AddElemD(compiler, currentLiveVars,
+ compiler->lvaTable[tree->gtLclVarCommon.gtLclNum].lvVarIndex);
+ }
+ }
+ }
+ else if (noAdd && produce == 0)
+ {
+ // This is the case for dead nodes that occur after
+ // tree rationalization
+ // TODO-Cleanup: Identify and remove these dead nodes prior to register allocation.
+ if (tree->IsMultiRegCall())
+ {
+ // In case of multi-reg call node, produce = number of return registers
+ produce = tree->AsCall()->GetReturnTypeDesc()->GetReturnRegCount();
+ }
+ else
+ {
+ produce = 1;
+ }
+ }
+
+#ifdef DEBUG
+ if (VERBOSE)
+ {
+ if (produce)
+ {
+ if (varDefInterval != nullptr)
+ {
+ printf("t%u (i:%u) = op ", currentLoc, varDefInterval->intervalIndex);
+ }
+ else
+ {
+ for (int i = 0; i < produce; i++)
+ {
+ printf("t%u ", currentLoc);
+ }
+ printf("= op ");
+ }
+ }
+ else
+ {
+ printf(" op ");
+ }
+ printf("\n");
+ }
+#endif // DEBUG
+
+ Interval* prefSrcInterval = nullptr;
+
+ // If this is a binary operator that will be encoded with 2 operand fields
+ // (i.e. the target is read-modify-write), preference the dst to op1.
+
+ bool hasDelayFreeSrc = tree->gtLsraInfo.hasDelayFreeSrc;
+ if (tree->OperGet() == GT_PUTARG_REG && isCandidateLocalRef(tree->gtGetOp1()) &&
+ (tree->gtGetOp1()->gtFlags & GTF_VAR_DEATH) == 0)
+ {
+ // This is the case for a "pass-through" copy of a lclVar. In the case where it is a non-last-use,
+ // we don't want the def of the copy to kill the lclVar register, if it is assigned the same register
+ // (which is actually what we hope will happen).
+ JITDUMP("Setting putarg_reg as a pass-through of a non-last use lclVar\n");
+
+ // Get the register information for the first operand of the node.
+ LocationInfoList operandDefs;
+ bool found = operandToLocationInfoMap.TryGetValue(*(tree->OperandsBegin()), &operandDefs);
+ assert(found);
+
+ // Preference the destination to the interval of the first register defined by the first operand.
+ Interval* srcInterval = operandDefs.Begin()->interval;
+ assert(srcInterval->isLocalVar);
+ prefSrcInterval = srcInterval;
+ isSpecialPutArg = true;
+ }
+
+ RefPosition* internalRefs[MaxInternalRegisters];
+
+ // make intervals for all the 'internal' register requirements for this node
+ // where internal means additional registers required temporarily
+ int internalCount = buildInternalRegisterDefsForNode(tree, currentLoc, internalRefs);
+
+ // pop all ref'd tree temps
+ GenTreeOperandIterator iterator = tree->OperandsBegin();
+
+ // `operandDefs` holds the list of `LocationInfo` values for the registers defined by the current
+ // operand. `operandDefsIterator` points to the current `LocationInfo` value in `operandDefs`.
+ LocationInfoList operandDefs;
+ LocationInfoListNode* operandDefsIterator = operandDefs.End();
+ for (int useIndex = 0; useIndex < consume; useIndex++)
+ {
+ // If we've consumed all of the registers defined by the current operand, advance to the next
+ // operand that defines any registers.
+ if (operandDefsIterator == operandDefs.End())
+ {
+ // Skip operands that do not define any registers, whether directly or indirectly.
+ GenTree* operand;
+ do
+ {
+ assert(iterator != tree->OperandsEnd());
+ operand = *iterator;
+
+ ++iterator;
+ } while (!operand->gtLsraInfo.definesAnyRegisters);
+
+ // If we have already processed a previous operand, return its `LocationInfo` list to the
+ // pool.
+ if (useIndex > 0)
+ {
+ assert(!operandDefs.IsEmpty());
+ listNodePool.ReturnNodes(operandDefs);
+ }
+
+ // Remove the list of registers defined by the current operand from the map. Note that this
+ // is only correct because tree nodes are singly-used: if this property ever changes (e.g.
+ // if tree nodes are eventually allowed to be multiply-used), then the removal is only
+ // correct at the last use.
+ bool removed = operandToLocationInfoMap.TryRemove(operand, &operandDefs);
+ assert(removed);
+
+ // Move the operand def iterator to the `LocationInfo` for the first register defined by the
+ // current operand.
+ operandDefsIterator = operandDefs.Begin();
+ assert(operandDefsIterator != operandDefs.End());
+ }
+
+ LocationInfo& locInfo = *static_cast<LocationInfo*>(operandDefsIterator);
+ operandDefsIterator = operandDefsIterator->Next();
+
+ JITDUMP("t%u ", locInfo.loc);
+
+ // for interstitial tree temps, a use is always last and end;
+ // this is set by default in newRefPosition
+ GenTree* useNode = locInfo.treeNode;
+ assert(useNode != nullptr);
+ var_types type = useNode->TypeGet();
+ regMaskTP candidates = getUseCandidates(useNode);
+ Interval* i = locInfo.interval;
+ unsigned multiRegIdx = locInfo.multiRegIdx;
+
+#ifdef FEATURE_SIMD
+ // In case of multi-reg call store to a local, there won't be any mismatch of
+ // use candidates with the type of the tree node.
+ if (tree->OperIsLocalStore() && varDefInterval == nullptr && !useNode->IsMultiRegCall())
+ {
+ // This is a non-candidate store. If this is a SIMD type, the use candidates
+ // may not match the type of the tree node. If that is the case, change the
+ // type of the tree node to match, so that we do the right kind of store.
+ if ((candidates & allRegs(tree->gtType)) == RBM_NONE)
+ {
+ noway_assert((candidates & allRegs(useNode->gtType)) != RBM_NONE);
+ // Currently, the only case where this should happen is for a TYP_LONG
+ // source and a TYP_SIMD8 target.
+ assert((useNode->gtType == TYP_LONG && tree->gtType == TYP_SIMD8) ||
+ (useNode->gtType == TYP_SIMD8 && tree->gtType == TYP_LONG));
+ tree->gtType = useNode->gtType;
+ }
+ }
+#endif // FEATURE_SIMD
+
+ bool delayRegFree = (hasDelayFreeSrc && useNode->gtLsraInfo.isDelayFree);
+ if (useNode->gtLsraInfo.isTgtPref)
+ {
+ prefSrcInterval = i;
+ }
+
+ bool regOptionalAtUse = useNode->IsRegOptional();
+ bool isLastUse = true;
+ if (isCandidateLocalRef(useNode))
+ {
+ isLastUse = ((useNode->gtFlags & GTF_VAR_DEATH) != 0);
+ }
+ else
+ {
+ // For non-localVar uses we record nothing,
+ // as nothing needs to be written back to the tree.
+ useNode = nullptr;
+ }
+
+ regMaskTP fixedAssignment = fixedCandidateMask(type, candidates);
+ if (fixedAssignment != RBM_NONE)
+ {
+ candidates = fixedAssignment;
+ }
+
+ RefPosition* pos;
+ if ((candidates & allRegs(i->registerType)) == 0)
+ {
+ // This should only occur where we've got a type mismatch due to SIMD
+ // pointer-size types that are passed & returned as longs.
+ i->hasConflictingDefUse = true;
+ if (fixedAssignment != RBM_NONE)
+ {
+ // Explicitly insert a FixedRefPosition and fake the candidates, because otherwise newRefPosition
+ // will complain about the types not matching.
+ regNumber physicalReg = genRegNumFromMask(fixedAssignment);
+ RefPosition* pos = newRefPosition(physicalReg, currentLoc, RefTypeFixedReg, nullptr, fixedAssignment);
+ }
+ pos = newRefPosition(i, currentLoc, RefTypeUse, useNode, allRegs(i->registerType), multiRegIdx);
+ pos->registerAssignment = candidates;
+ }
+ else
+ {
+ pos = newRefPosition(i, currentLoc, RefTypeUse, useNode, candidates, multiRegIdx);
+ }
+ if (delayRegFree)
+ {
+ hasDelayFreeSrc = true;
+ pos->delayRegFree = true;
+ }
+
+ if (isLastUse)
+ {
+ pos->lastUse = true;
+ }
+
+ if (regOptionalAtUse)
+ {
+ pos->setAllocateIfProfitable(1);
+ }
+ }
+ JITDUMP("\n");
+
+ if (!operandDefs.IsEmpty())
+ {
+ listNodePool.ReturnNodes(operandDefs);
+ }
+
+ buildInternalRegisterUsesForNode(tree, currentLoc, internalRefs, internalCount);
+
+ RegisterType registerType = getDefType(tree);
+ regMaskTP candidates = getDefCandidates(tree);
+ regMaskTP useCandidates = getUseCandidates(tree);
+
+#ifdef DEBUG
+ if (VERBOSE)
+ {
+ printf("Def candidates ");
+ dumpRegMask(candidates);
+ printf(", Use candidates ");
+ dumpRegMask(useCandidates);
+ printf("\n");
+ }
+#endif // DEBUG
+
+#if defined(_TARGET_AMD64_)
+ // Multi-reg call node is the only node that could produce multi-reg value
+ assert(produce <= 1 || (tree->IsMultiRegCall() && produce == MAX_RET_REG_COUNT));
+#elif defined(_TARGET_ARM_)
+ assert(!varTypeIsMultiReg(tree->TypeGet()));
+#endif // _TARGET_xxx_
+
+ // Add kill positions before adding def positions
+ buildKillPositionsForNode(tree, currentLoc + 1);
+
+#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
+ VARSET_TP VARSET_INIT_NOCOPY(liveLargeVectors, VarSetOps::UninitVal());
+ if (RBM_FLT_CALLEE_SAVED != RBM_NONE)
+ {
+ // Build RefPositions for saving any live large vectors.
+ // This must be done after the kills, so that we know which large vectors are still live.
+ VarSetOps::AssignNoCopy(compiler, liveLargeVectors, buildUpperVectorSaveRefPositions(tree, currentLoc));
+ }
+#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
+
+ ReturnTypeDesc* retTypeDesc = nullptr;
+ bool isMultiRegCall = tree->IsMultiRegCall();
+ if (isMultiRegCall)
+ {
+ retTypeDesc = tree->AsCall()->GetReturnTypeDesc();
+ assert((int)genCountBits(candidates) == produce);
+ assert(candidates == retTypeDesc->GetABIReturnRegs());
+ }
+
+ // push defs
+ LocationInfoList locationInfoList;
+ LsraLocation defLocation = currentLoc + 1;
+ for (int i = 0; i < produce; i++)
+ {
+ regMaskTP currCandidates = candidates;
+ Interval* interval = varDefInterval;
+
+ // In case of multi-reg call node, registerType is given by
+ // the type of ith position return register.
+ if (isMultiRegCall)
+ {
+ registerType = retTypeDesc->GetReturnRegType((unsigned)i);
+ currCandidates = genRegMask(retTypeDesc->GetABIReturnReg(i));
+ useCandidates = allRegs(registerType);
+ }
+
+ if (interval == nullptr)
+ {
+ // Make a new interval
+ interval = newInterval(registerType);
+ if (hasDelayFreeSrc)
+ {
+ interval->hasNonCommutativeRMWDef = true;
+ }
+ else if (tree->OperIsConst())
+ {
+ assert(!tree->IsReuseRegVal());
+ interval->isConstant = true;
+ }
+
+ if ((currCandidates & useCandidates) != RBM_NONE)
+ {
+ interval->updateRegisterPreferences(currCandidates & useCandidates);
+ }
+
+ if (isSpecialPutArg)
+ {
+ interval->isSpecialPutArg = true;
+ }
+ }
+ else
+ {
+ assert(registerTypesEquivalent(interval->registerType, registerType));
+ }
+
+ if (prefSrcInterval != nullptr)
+ {
+ interval->assignRelatedIntervalIfUnassigned(prefSrcInterval);
+ }
+
+ // for assignments, we want to create a refposition for the def
+ // but not push it
+ if (!noAdd)
+ {
+ locationInfoList.Append(listNodePool.GetNode(defLocation, interval, tree, (unsigned)i));
+ }
+
+ RefPosition* pos = newRefPosition(interval, defLocation, defRefType, defNode, currCandidates, (unsigned)i);
+ if (info.isLocalDefUse)
+ {
+ pos->isLocalDefUse = true;
+ pos->lastUse = true;
+ }
+ DBEXEC(VERBOSE, pos->dump());
+ interval->updateRegisterPreferences(currCandidates);
+ interval->updateRegisterPreferences(useCandidates);
+ }
+
+#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
+ buildUpperVectorRestoreRefPositions(tree, currentLoc, liveLargeVectors);
+#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
+
+ bool isContainedNode =
+ !noAdd && consume == 0 && produce == 0 && (tree->OperIsAggregate() || (tree->TypeGet() != TYP_VOID && !tree->OperIsStore()));
+ if (isContainedNode)
+ {
+ // Contained nodes map to the concatenated lists of their operands.
+ for (GenTree* op : tree->Operands())
+ {
+ if (!op->gtLsraInfo.definesAnyRegisters)
+ {
+ assert(ComputeOperandDstCount(op) == 0);
+ continue;
+ }
+
+ LocationInfoList operandList;
+ bool removed = operandToLocationInfoMap.TryRemove(op, &operandList);
+ assert(removed);
+
+ locationInfoList.Append(operandList);
+ }
+ }
+
+ if (!locationInfoList.IsEmpty())
+ {
+ bool added = operandToLocationInfoMap.AddOrUpdate(tree, locationInfoList);
+ assert(added);
+ tree->gtLsraInfo.definesAnyRegisters = true;
+ }
+}
+
+// make an interval for each physical register
+void LinearScan::buildPhysRegRecords()
+{
+ RegisterType regType = IntRegisterType;
+ for (regNumber reg = REG_FIRST; reg < ACTUAL_REG_COUNT; reg = REG_NEXT(reg))
+ {
+ RegRecord* curr = &physRegs[reg];
+ curr->init(reg);
+ }
+}
+
+BasicBlock* getNonEmptyBlock(BasicBlock* block)
+{
+ while (block != nullptr && block->bbTreeList == nullptr)
+ {
+ BasicBlock* nextBlock = block->bbNext;
+ // Note that here we use the version of NumSucc that does not take a compiler.
+ // That way this doesn't have to take a compiler, or be an instance method, e.g. of LinearScan.
+ // If we have an empty block, it must have jump type BBJ_NONE or BBJ_ALWAYS, in which
+ // case we don't need the version that takes a compiler.
+ assert(block->NumSucc() == 1 && ((block->bbJumpKind == BBJ_ALWAYS) || (block->bbJumpKind == BBJ_NONE)));
+ // sometimes the first block is empty and ends with an uncond branch
+ // assert( block->GetSucc(0) == nextBlock);
+ block = nextBlock;
+ }
+ assert(block != nullptr && block->bbTreeList != nullptr);
+ return block;
+}
+
+void LinearScan::insertZeroInitRefPositions()
+{
+ // insert defs for this, then a block boundary
+
+ VARSET_ITER_INIT(compiler, iter, compiler->fgFirstBB->bbLiveIn, varIndex);
+ while (iter.NextElem(compiler, &varIndex))
+ {
+ unsigned varNum = compiler->lvaTrackedToVarNum[varIndex];
+ LclVarDsc* varDsc = compiler->lvaTable + varNum;
+ if (!varDsc->lvIsParam && isCandidateVar(varDsc) &&
+ (compiler->info.compInitMem || varTypeIsGC(varDsc->TypeGet())))
+ {
+ GenTree* firstNode = getNonEmptyBlock(compiler->fgFirstBB)->firstNode();
+ JITDUMP("V%02u was live in\n", varNum);
+ Interval* interval = getIntervalForLocalVar(varNum);
+ RefPosition* pos =
+ newRefPosition(interval, MinLocation, RefTypeZeroInit, firstNode, allRegs(interval->registerType));
+ varDsc->lvMustInit = true;
+ }
+ }
+}
+
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+// -----------------------------------------------------------------------
+// Sets the register state for an argument of type STRUCT for System V systems.
+// See Compiler::raUpdateRegStateForArg(RegState *regState, LclVarDsc *argDsc) in regalloc.cpp
+// for how state for argument is updated for unix non-structs and Windows AMD64 structs.
+void LinearScan::unixAmd64UpdateRegStateForArg(LclVarDsc* argDsc)
+{
+ assert(varTypeIsStruct(argDsc));
+ RegState* intRegState = &compiler->codeGen->intRegState;
+ RegState* floatRegState = &compiler->codeGen->floatRegState;
+
+ if ((argDsc->lvArgReg != REG_STK) && (argDsc->lvArgReg != REG_NA))
+ {
+ if (genRegMask(argDsc->lvArgReg) & (RBM_ALLFLOAT))
+ {
+ assert(genRegMask(argDsc->lvArgReg) & (RBM_FLTARG_REGS));
+ floatRegState->rsCalleeRegArgMaskLiveIn |= genRegMask(argDsc->lvArgReg);
+ }
+ else
+ {
+ assert(genRegMask(argDsc->lvArgReg) & (RBM_ARG_REGS));
+ intRegState->rsCalleeRegArgMaskLiveIn |= genRegMask(argDsc->lvArgReg);
+ }
+ }
+
+ if ((argDsc->lvOtherArgReg != REG_STK) && (argDsc->lvOtherArgReg != REG_NA))
+ {
+ if (genRegMask(argDsc->lvOtherArgReg) & (RBM_ALLFLOAT))
+ {
+ assert(genRegMask(argDsc->lvOtherArgReg) & (RBM_FLTARG_REGS));
+ floatRegState->rsCalleeRegArgMaskLiveIn |= genRegMask(argDsc->lvOtherArgReg);
+ }
+ else
+ {
+ assert(genRegMask(argDsc->lvOtherArgReg) & (RBM_ARG_REGS));
+ intRegState->rsCalleeRegArgMaskLiveIn |= genRegMask(argDsc->lvOtherArgReg);
+ }
+ }
+}
+
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+
+//------------------------------------------------------------------------
+// updateRegStateForArg: Updates rsCalleeRegArgMaskLiveIn for the appropriate
+// regState (either compiler->intRegState or compiler->floatRegState),
+// with the lvArgReg on "argDsc"
+//
+// Arguments:
+// argDsc - the argument for which the state is to be updated.
+//
+// Return Value: None
+//
+// Assumptions:
+// The argument is live on entry to the function
+// (or is untracked and therefore assumed live)
+//
+// Notes:
+// This relies on a method in regAlloc.cpp that is shared between LSRA
+// and regAlloc. It is further abstracted here because regState is updated
+// separately for tracked and untracked variables in LSRA.
+//
+void LinearScan::updateRegStateForArg(LclVarDsc* argDsc)
+{
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ // For System V AMD64 calls the argDsc can have 2 registers (for structs.)
+ // Handle them here.
+ if (varTypeIsStruct(argDsc))
+ {
+ unixAmd64UpdateRegStateForArg(argDsc);
+ }
+ else
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ {
+ RegState* intRegState = &compiler->codeGen->intRegState;
+ RegState* floatRegState = &compiler->codeGen->floatRegState;
+ // In the case of AMD64 we'll still use the floating point registers
+ // to model the register usage for argument on vararg calls, so
+ // we will ignore the varargs condition to determine whether we use
+ // XMM registers or not for setting up the call.
+ bool isFloat = (isFloatRegType(argDsc->lvType)
+#ifndef _TARGET_AMD64_
+ && !compiler->info.compIsVarArgs
+#endif
+ );
+
+ if (argDsc->lvIsHfaRegArg())
+ {
+ isFloat = true;
+ }
+
+ if (isFloat)
+ {
+ JITDUMP("Float arg V%02u in reg %s\n", (argDsc - compiler->lvaTable), getRegName(argDsc->lvArgReg));
+ compiler->raUpdateRegStateForArg(floatRegState, argDsc);
+ }
+ else
+ {
+ JITDUMP("Int arg V%02u in reg %s\n", (argDsc - compiler->lvaTable), getRegName(argDsc->lvArgReg));
+#if FEATURE_MULTIREG_ARGS
+ if (argDsc->lvOtherArgReg != REG_NA)
+ {
+ JITDUMP("(second half) in reg %s\n", getRegName(argDsc->lvOtherArgReg));
+ }
+#endif // FEATURE_MULTIREG_ARGS
+ compiler->raUpdateRegStateForArg(intRegState, argDsc);
+ }
+ }
+}
+
+//------------------------------------------------------------------------
+// findPredBlockForLiveIn: Determine which block should be used for the register locations of the live-in variables.
+//
+// Arguments:
+// block - The block for which we're selecting a predecesor.
+// prevBlock - The previous block in in allocation order.
+// pPredBlockIsAllocated - A debug-only argument that indicates whether any of the predecessors have been seen
+// in allocation order.
+//
+// Return Value:
+// The selected predecessor.
+//
+// Assumptions:
+// in DEBUG, caller initializes *pPredBlockIsAllocated to false, and it will be set to true if the block
+// returned is in fact a predecessor.
+//
+// Notes:
+// This will select a predecessor based on the heuristics obtained by getLsraBlockBoundaryLocations(), which can be
+// one of:
+// LSRA_BLOCK_BOUNDARY_PRED - Use the register locations of a predecessor block (default)
+// LSRA_BLOCK_BOUNDARY_LAYOUT - Use the register locations of the previous block in layout order.
+// This is the only case where this actually returns a different block.
+// LSRA_BLOCK_BOUNDARY_ROTATE - Rotate the register locations from a predecessor.
+// For this case, the block returned is the same as for LSRA_BLOCK_BOUNDARY_PRED, but
+// the register locations will be "rotated" to stress the resolution and allocation
+// code.
+
+BasicBlock* LinearScan::findPredBlockForLiveIn(BasicBlock* block,
+ BasicBlock* prevBlock DEBUGARG(bool* pPredBlockIsAllocated))
+{
+ BasicBlock* predBlock = nullptr;
+#ifdef DEBUG
+ assert(*pPredBlockIsAllocated == false);
+ if (getLsraBlockBoundaryLocations() == LSRA_BLOCK_BOUNDARY_LAYOUT)
+ {
+ if (prevBlock != nullptr)
+ {
+ predBlock = prevBlock;
+ }
+ }
+ else
+#endif // DEBUG
+ if (block != compiler->fgFirstBB)
+ {
+ predBlock = block->GetUniquePred(compiler);
+ if (predBlock != nullptr)
+ {
+ if (isBlockVisited(predBlock))
+ {
+ if (predBlock->bbJumpKind == BBJ_COND)
+ {
+ // Special handling to improve matching on backedges.
+ BasicBlock* otherBlock = (block == predBlock->bbNext) ? predBlock->bbJumpDest : predBlock->bbNext;
+ noway_assert(otherBlock != nullptr);
+ if (isBlockVisited(otherBlock))
+ {
+ // This is the case when we have a conditional branch where one target has already
+ // been visited. It would be best to use the same incoming regs as that block,
+ // so that we have less likelihood of having to move registers.
+ // For example, in determining the block to use for the starting register locations for
+ // "block" in the following example, we'd like to use the same predecessor for "block"
+ // as for "otherBlock", so that both successors of predBlock have the same locations, reducing
+ // the likelihood of needing a split block on a backedge:
+ //
+ // otherPred
+ // |
+ // otherBlock <-+
+ // . . . |
+ // |
+ // predBlock----+
+ // |
+ // block
+ //
+ for (flowList* pred = otherBlock->bbPreds; pred != nullptr; pred = pred->flNext)
+ {
+ BasicBlock* otherPred = pred->flBlock;
+ if (otherPred->bbNum == blockInfo[otherBlock->bbNum].predBBNum)
+ {
+ predBlock = otherPred;
+ break;
+ }
+ }
+ }
+ }
+ }
+ else
+ {
+ predBlock = nullptr;
+ }
+ }
+ else
+ {
+ for (flowList* pred = block->bbPreds; pred != nullptr; pred = pred->flNext)
+ {
+ BasicBlock* candidatePredBlock = pred->flBlock;
+ if (isBlockVisited(candidatePredBlock))
+ {
+ if (predBlock == nullptr || predBlock->bbWeight < candidatePredBlock->bbWeight)
+ {
+ predBlock = candidatePredBlock;
+ INDEBUG(*pPredBlockIsAllocated = true;)
+ }
+ }
+ }
+ }
+ if (predBlock == nullptr)
+ {
+ predBlock = prevBlock;
+ assert(predBlock != nullptr);
+ JITDUMP("\n\nNo allocated predecessor; ");
+ }
+ }
+ return predBlock;
+}
+
+void LinearScan::buildIntervals()
+{
+ BasicBlock* block;
+
+ // start numbering at 1; 0 is the entry
+ LsraLocation currentLoc = 1;
+
+ JITDUMP("\nbuildIntervals ========\n");
+
+ // Now build (empty) records for all of the physical registers
+ buildPhysRegRecords();
+
+#ifdef DEBUG
+ if (VERBOSE)
+ {
+ printf("\n-----------------\n");
+ printf("LIVENESS:\n");
+ printf("-----------------\n");
+ foreach_block(compiler, block)
+ {
+ printf("BB%02u use def in out\n", block->bbNum);
+ dumpConvertedVarSet(compiler, block->bbVarUse);
+ printf("\n");
+ dumpConvertedVarSet(compiler, block->bbVarDef);
+ printf("\n");
+ dumpConvertedVarSet(compiler, block->bbLiveIn);
+ printf("\n");
+ dumpConvertedVarSet(compiler, block->bbLiveOut);
+ printf("\n");
+ }
+ }
+#endif // DEBUG
+
+ identifyCandidates();
+
+ DBEXEC(VERBOSE, TupleStyleDump(LSRA_DUMP_PRE));
+
+ // second part:
+ JITDUMP("\nbuildIntervals second part ========\n");
+ currentLoc = 0;
+
+ // Next, create ParamDef RefPositions for all the tracked parameters,
+ // in order of their varIndex
+
+ LclVarDsc* argDsc;
+ unsigned int lclNum;
+
+ RegState* intRegState = &compiler->codeGen->intRegState;
+ RegState* floatRegState = &compiler->codeGen->floatRegState;
+ intRegState->rsCalleeRegArgMaskLiveIn = RBM_NONE;
+ floatRegState->rsCalleeRegArgMaskLiveIn = RBM_NONE;
+
+ for (unsigned int varIndex = 0; varIndex < compiler->lvaTrackedCount; varIndex++)
+ {
+ lclNum = compiler->lvaTrackedToVarNum[varIndex];
+ argDsc = &(compiler->lvaTable[lclNum]);
+
+ if (!argDsc->lvIsParam)
+ {
+ continue;
+ }
+
+ // Only reserve a register if the argument is actually used.
+ // Is it dead on entry? If compJmpOpUsed is true, then the arguments
+ // have to be kept alive, so we have to consider it as live on entry.
+ // Use lvRefCnt instead of checking bbLiveIn because if it's volatile we
+ // won't have done dataflow on it, but it needs to be marked as live-in so
+ // it will get saved in the prolog.
+ if (!compiler->compJmpOpUsed && argDsc->lvRefCnt == 0 && !compiler->opts.compDbgCode)
+ {
+ continue;
+ }
+
+ if (argDsc->lvIsRegArg)
+ {
+ updateRegStateForArg(argDsc);
+ }
+
+ if (isCandidateVar(argDsc))
+ {
+ Interval* interval = getIntervalForLocalVar(lclNum);
+ regMaskTP mask = allRegs(TypeGet(argDsc));
+ if (argDsc->lvIsRegArg)
+ {
+ // Set this interval as currently assigned to that register
+ regNumber inArgReg = argDsc->lvArgReg;
+ assert(inArgReg < REG_COUNT);
+ mask = genRegMask(inArgReg);
+ assignPhysReg(inArgReg, interval);
+ }
+ RefPosition* pos = newRefPosition(interval, MinLocation, RefTypeParamDef, nullptr, mask);
+ }
+ else if (varTypeIsStruct(argDsc->lvType))
+ {
+ for (unsigned fieldVarNum = argDsc->lvFieldLclStart;
+ fieldVarNum < argDsc->lvFieldLclStart + argDsc->lvFieldCnt; ++fieldVarNum)
+ {
+ LclVarDsc* fieldVarDsc = &(compiler->lvaTable[fieldVarNum]);
+ if (fieldVarDsc->lvLRACandidate)
+ {
+ Interval* interval = getIntervalForLocalVar(fieldVarNum);
+ RefPosition* pos =
+ newRefPosition(interval, MinLocation, RefTypeParamDef, nullptr, allRegs(TypeGet(fieldVarDsc)));
+ }
+ }
+ }
+ else
+ {
+ // We can overwrite the register (i.e. codegen saves it on entry)
+ assert(argDsc->lvRefCnt == 0 || !argDsc->lvIsRegArg || argDsc->lvDoNotEnregister ||
+ !argDsc->lvLRACandidate || (varTypeIsFloating(argDsc->TypeGet()) && compiler->opts.compDbgCode));
+ }
+ }
+
+ // Now set up the reg state for the non-tracked args
+ // (We do this here because we want to generate the ParamDef RefPositions in tracked
+ // order, so that loop doesn't hit the non-tracked args)
+
+ for (unsigned argNum = 0; argNum < compiler->info.compArgsCount; argNum++, argDsc++)
+ {
+ argDsc = &(compiler->lvaTable[argNum]);
+
+ if (argDsc->lvPromotedStruct())
+ {
+ noway_assert(argDsc->lvFieldCnt == 1); // We only handle one field here
+
+ unsigned fieldVarNum = argDsc->lvFieldLclStart;
+ argDsc = &(compiler->lvaTable[fieldVarNum]);
+ }
+ noway_assert(argDsc->lvIsParam);
+ if (!argDsc->lvTracked && argDsc->lvIsRegArg)
+ {
+ updateRegStateForArg(argDsc);
+ }
+ }
+
+ // If there is a secret stub param, it is also live in
+ if (compiler->info.compPublishStubParam)
+ {
+ intRegState->rsCalleeRegArgMaskLiveIn |= RBM_SECRET_STUB_PARAM;
+ }
+
+ LocationInfoListNodePool listNodePool(compiler, 8);
+ SmallHashTable<GenTree*, LocationInfoList, 32> operandToLocationInfoMap(compiler);
+
+ BasicBlock* predBlock = nullptr;
+ BasicBlock* prevBlock = nullptr;
+
+ // Initialize currentLiveVars to the empty set. We will set it to the current
+ // live-in at the entry to each block (this will include the incoming args on
+ // the first block).
+ VarSetOps::AssignNoCopy(compiler, currentLiveVars, VarSetOps::MakeEmpty(compiler));
+
+ for (block = startBlockSequence(); block != nullptr; block = moveToNextBlock())
+ {
+ JITDUMP("\nNEW BLOCK BB%02u\n", block->bbNum);
+
+ bool predBlockIsAllocated = false;
+ predBlock = findPredBlockForLiveIn(block, prevBlock DEBUGARG(&predBlockIsAllocated));
+
+ if (block == compiler->fgFirstBB)
+ {
+ insertZeroInitRefPositions();
+ }
+
+ // Determine if we need any DummyDefs.
+ // We need DummyDefs for cases where "predBlock" isn't really a predecessor.
+ // Note that it's possible to have uses of unitialized variables, in which case even the first
+ // block may require DummyDefs, which we are not currently adding - this means that these variables
+ // will always be considered to be in memory on entry (and reloaded when the use is encountered).
+ // TODO-CQ: Consider how best to tune this. Currently, if we create DummyDefs for uninitialized
+ // variables (which may actually be initialized along the dynamically executed paths, but not
+ // on all static paths), we wind up with excessive liveranges for some of these variables.
+ VARSET_TP VARSET_INIT(compiler, newLiveIn, block->bbLiveIn);
+ if (predBlock)
+ {
+ JITDUMP("\n\nSetting incoming variable registers of BB%02u to outVarToRegMap of BB%02u\n", block->bbNum,
+ predBlock->bbNum);
+ assert(predBlock->bbNum <= bbNumMaxBeforeResolution);
+ blockInfo[block->bbNum].predBBNum = predBlock->bbNum;
+ // Compute set difference: newLiveIn = block->bbLiveIn - predBlock->bbLiveOut
+ VarSetOps::DiffD(compiler, newLiveIn, predBlock->bbLiveOut);
+ }
+ bool needsDummyDefs = (!VarSetOps::IsEmpty(compiler, newLiveIn) && block != compiler->fgFirstBB);
+
+ // Create dummy def RefPositions
+
+ if (needsDummyDefs)
+ {
+ // If we are using locations from a predecessor, we should never require DummyDefs.
+ assert(!predBlockIsAllocated);
+
+ JITDUMP("Creating dummy definitions\n");
+ VARSET_ITER_INIT(compiler, iter, newLiveIn, varIndex);
+ while (iter.NextElem(compiler, &varIndex))
+ {
+ unsigned varNum = compiler->lvaTrackedToVarNum[varIndex];
+ LclVarDsc* varDsc = compiler->lvaTable + varNum;
+ // Add a dummyDef for any candidate vars that are in the "newLiveIn" set.
+ // If this is the entry block, don't add any incoming parameters (they're handled with ParamDefs).
+ if (isCandidateVar(varDsc) && (predBlock != nullptr || !varDsc->lvIsParam))
+ {
+ Interval* interval = getIntervalForLocalVar(varNum);
+ RefPosition* pos =
+ newRefPosition(interval, currentLoc, RefTypeDummyDef, nullptr, allRegs(interval->registerType));
+ }
+ }
+ JITDUMP("Finished creating dummy definitions\n\n");
+ }
+
+ // Add a dummy RefPosition to mark the block boundary.
+ // Note that we do this AFTER adding the exposed uses above, because the
+ // register positions for those exposed uses need to be recorded at
+ // this point.
+
+ RefPosition* pos = newRefPosition((Interval*)nullptr, currentLoc, RefTypeBB, nullptr, RBM_NONE);
+
+ VarSetOps::Assign(compiler, currentLiveVars, block->bbLiveIn);
+
+ LIR::Range& blockRange = LIR::AsRange(block);
+ for (GenTree* node : blockRange.NonPhiNodes())
+ {
+ assert(node->gtLsraInfo.loc >= currentLoc);
+ assert(((node->gtLIRFlags & LIR::Flags::IsUnusedValue) == 0) || node->gtLsraInfo.isLocalDefUse);
+
+ currentLoc = node->gtLsraInfo.loc;
+ buildRefPositionsForNode(node, block, listNodePool, operandToLocationInfoMap, currentLoc);
+
+#ifdef DEBUG
+ if (currentLoc > maxNodeLocation)
+ {
+ maxNodeLocation = currentLoc;
+ }
+#endif // DEBUG
+ }
+
+ // Increment the LsraLocation at this point, so that the dummy RefPositions
+ // will not have the same LsraLocation as any "real" RefPosition.
+ currentLoc += 2;
+
+ // Note: the visited set is cleared in LinearScan::doLinearScan()
+ markBlockVisited(block);
+
+ // Insert exposed uses for a lclVar that is live-out of 'block' but not live-in to the
+ // next block, or any unvisited successors.
+ // This will address lclVars that are live on a backedge, as well as those that are kept
+ // live at a GT_JMP.
+ //
+ // Blocks ending with "jmp method" are marked as BBJ_HAS_JMP,
+ // and jmp call is represented using GT_JMP node which is a leaf node.
+ // Liveness phase keeps all the arguments of the method live till the end of
+ // block by adding them to liveout set of the block containing GT_JMP.
+ //
+ // The target of a GT_JMP implicitly uses all the current method arguments, however
+ // there are no actual references to them. This can cause LSRA to assert, because
+ // the variables are live but it sees no references. In order to correctly model the
+ // liveness of these arguments, we add dummy exposed uses, in the same manner as for
+ // backward branches. This will happen automatically via expUseSet.
+ //
+ // Note that a block ending with GT_JMP has no successors and hence the variables
+ // for which dummy use ref positions are added are arguments of the method.
+
+ VARSET_TP VARSET_INIT(compiler, expUseSet, block->bbLiveOut);
+ BasicBlock* nextBlock = getNextBlock();
+ if (nextBlock != nullptr)
+ {
+ VarSetOps::DiffD(compiler, expUseSet, nextBlock->bbLiveIn);
+ }
+ AllSuccessorIter succsEnd = block->GetAllSuccs(compiler).end();
+ for (AllSuccessorIter succs = block->GetAllSuccs(compiler).begin();
+ succs != succsEnd && !VarSetOps::IsEmpty(compiler, expUseSet); ++succs)
+ {
+ BasicBlock* succ = (*succs);
+ if (isBlockVisited(succ))
+ {
+ continue;
+ }
+ VarSetOps::DiffD(compiler, expUseSet, succ->bbLiveIn);
+ }
+
+ if (!VarSetOps::IsEmpty(compiler, expUseSet))
+ {
+ JITDUMP("Exposed uses:");
+ VARSET_ITER_INIT(compiler, iter, expUseSet, varIndex);
+ while (iter.NextElem(compiler, &varIndex))
+ {
+ unsigned varNum = compiler->lvaTrackedToVarNum[varIndex];
+ LclVarDsc* varDsc = compiler->lvaTable + varNum;
+ if (isCandidateVar(varDsc))
+ {
+ Interval* interval = getIntervalForLocalVar(varNum);
+ RefPosition* pos =
+ newRefPosition(interval, currentLoc, RefTypeExpUse, nullptr, allRegs(interval->registerType));
+ JITDUMP(" V%02u", varNum);
+ }
+ }
+ JITDUMP("\n");
+ }
+
+ // Identify the last uses of each variable, except in the case of MinOpts, where all vars
+ // are kept live everywhere.
+
+ if (!compiler->opts.MinOpts())
+ {
+ setLastUses(block);
+ }
+
+#ifdef DEBUG
+ if (VERBOSE)
+ {
+ printf("use: ");
+ dumpConvertedVarSet(compiler, block->bbVarUse);
+ printf("\ndef: ");
+ dumpConvertedVarSet(compiler, block->bbVarDef);
+ printf("\n");
+ }
+#endif // DEBUG
+
+ prevBlock = block;
+ }
+
+ // If we need to KeepAliveAndReportThis, add a dummy exposed use of it at the end
+ if (compiler->lvaKeepAliveAndReportThis())
+ {
+ unsigned keepAliveVarNum = compiler->info.compThisArg;
+ assert(compiler->info.compIsStatic == false);
+ if (isCandidateVar(&compiler->lvaTable[keepAliveVarNum]))
+ {
+ JITDUMP("Adding exposed use of this, for lvaKeepAliveAndReportThis\n");
+ Interval* interval = getIntervalForLocalVar(keepAliveVarNum);
+ RefPosition* pos =
+ newRefPosition(interval, currentLoc, RefTypeExpUse, nullptr, allRegs(interval->registerType));
+ }
+ }
+
+#ifdef DEBUG
+ if (getLsraExtendLifeTimes())
+ {
+ LclVarDsc* varDsc;
+ for (lclNum = 0, varDsc = compiler->lvaTable; lclNum < compiler->lvaCount; lclNum++, varDsc++)
+ {
+ if (varDsc->lvLRACandidate)
+ {
+ JITDUMP("Adding exposed use of V%02u for LsraExtendLifetimes\n", lclNum);
+ Interval* interval = getIntervalForLocalVar(lclNum);
+ RefPosition* pos =
+ newRefPosition(interval, currentLoc, RefTypeExpUse, nullptr, allRegs(interval->registerType));
+ }
+ }
+ }
+#endif // DEBUG
+
+ // If the last block has successors, create a RefTypeBB to record
+ // what's live
+
+ if (prevBlock->NumSucc(compiler) > 0)
+ {
+ RefPosition* pos = newRefPosition((Interval*)nullptr, currentLoc, RefTypeBB, nullptr, RBM_NONE);
+ }
+
+#ifdef DEBUG
+ // Make sure we don't have any blocks that were not visited
+ foreach_block(compiler, block)
+ {
+ assert(isBlockVisited(block));
+ }
+
+ if (VERBOSE)
+ {
+ lsraDumpIntervals("BEFORE VALIDATING INTERVALS");
+ dumpRefPositions("BEFORE VALIDATING INTERVALS");
+ validateIntervals();
+ }
+#endif // DEBUG
+}
+
+#ifdef DEBUG
+void LinearScan::dumpVarRefPositions(const char* title)
+{
+ printf("\nVAR REFPOSITIONS %s\n", title);
+
+ for (unsigned i = 0; i < compiler->lvaCount; i++)
+ {
+ Interval* interval = getIntervalForLocalVar(i);
+ printf("--- V%02u\n", i);
+
+ for (RefPosition* ref = interval->firstRefPosition; ref != nullptr; ref = ref->nextRefPosition)
+ {
+ ref->dump();
+ }
+ }
+
+ printf("\n");
+}
+
+void LinearScan::validateIntervals()
+{
+ for (unsigned i = 0; i < compiler->lvaCount; i++)
+ {
+ Interval* interval = getIntervalForLocalVar(i);
+
+ bool defined = false;
+ printf("-----------------\n");
+ for (RefPosition* ref = interval->firstRefPosition; ref != nullptr; ref = ref->nextRefPosition)
+ {
+ ref->dump();
+ RefType refType = ref->refType;
+ if (!defined && RefTypeIsUse(refType))
+ {
+ if (compiler->info.compMethodName != nullptr)
+ {
+ printf("%s: ", compiler->info.compMethodName);
+ }
+ printf("LocalVar V%02u: undefined use at %u\n", i, ref->nodeLocation);
+ }
+ // Note that there can be multiple last uses if they are on disjoint paths,
+ // so we can't really check the lastUse flag
+ if (ref->lastUse)
+ {
+ defined = false;
+ }
+ if (RefTypeIsDef(refType))
+ {
+ defined = true;
+ }
+ }
+ }
+}
+#endif // DEBUG
+
+// Set the default rpFrameType based upon codeGen->isFramePointerRequired()
+// This was lifted from the register predictor
+//
+void LinearScan::setFrameType()
+{
+ FrameType frameType = FT_NOT_SET;
+ if (compiler->codeGen->isFramePointerRequired())
+ {
+ frameType = FT_EBP_FRAME;
+ }
+ else
+ {
+ if (compiler->rpMustCreateEBPCalled == false)
+ {
+#ifdef DEBUG
+ const char* reason;
+#endif // DEBUG
+ compiler->rpMustCreateEBPCalled = true;
+ if (compiler->rpMustCreateEBPFrame(INDEBUG(&reason)))
+ {
+ JITDUMP("; Decided to create an EBP based frame for ETW stackwalking (%s)\n", reason);
+ compiler->codeGen->setFrameRequired(true);
+ }
+ }
+
+ if (compiler->codeGen->isFrameRequired())
+ {
+ frameType = FT_EBP_FRAME;
+ }
+ else
+ {
+ frameType = FT_ESP_FRAME;
+ }
+ }
+
+#if DOUBLE_ALIGN
+ // The DOUBLE_ALIGN feature indicates whether the JIT will attempt to double-align the
+ // frame if needed. Note that this feature isn't on for amd64, because the stack is
+ // always double-aligned by default.
+ compiler->codeGen->setDoubleAlign(false);
+
+ // TODO-CQ: Tune this (see regalloc.cpp, in which raCntWtdStkDblStackFP is used to
+ // determine whether to double-align). Note, though that there is at least one test
+ // (jit\opt\Perf\DoubleAlign\Locals.exe) that depends on double-alignment being set
+ // in certain situations.
+ if (!compiler->opts.MinOpts() && !compiler->codeGen->isFramePointerRequired() && compiler->compFloatingPointUsed)
+ {
+ frameType = FT_DOUBLE_ALIGN_FRAME;
+ }
+#endif // DOUBLE_ALIGN
+
+ switch (frameType)
+ {
+ case FT_ESP_FRAME:
+ noway_assert(!compiler->codeGen->isFramePointerRequired());
+ noway_assert(!compiler->codeGen->isFrameRequired());
+ compiler->codeGen->setFramePointerUsed(false);
+ break;
+ case FT_EBP_FRAME:
+ compiler->codeGen->setFramePointerUsed(true);
+ break;
+#if DOUBLE_ALIGN
+ case FT_DOUBLE_ALIGN_FRAME:
+ noway_assert(!compiler->codeGen->isFramePointerRequired());
+ compiler->codeGen->setFramePointerUsed(false);
+ compiler->codeGen->setDoubleAlign(true);
+ break;
+#endif // DOUBLE_ALIGN
+ default:
+ noway_assert(!"rpFrameType not set correctly!");
+ break;
+ }
+
+ // If we are using FPBASE as the frame register, we cannot also use it for
+ // a local var. Note that we may have already added it to the register masks,
+ // which are computed when the LinearScan class constructor is created, and
+ // used during lowering. Luckily, the TreeNodeInfo only stores an index to
+ // the masks stored in the LinearScan class, so we only need to walk the
+ // unique masks and remove FPBASE.
+ if (frameType == FT_EBP_FRAME)
+ {
+ if ((availableIntRegs & RBM_FPBASE) != 0)
+ {
+ RemoveRegisterFromMasks(REG_FPBASE);
+
+ // We know that we're already in "read mode" for availableIntRegs. However,
+ // we need to remove the FPBASE register, so subsequent users (like callers
+ // to allRegs()) get the right thing. The RemoveRegisterFromMasks() code
+ // fixes up everything that already took a dependency on the value that was
+ // previously read, so this completes the picture.
+ availableIntRegs.OverrideAssign(availableIntRegs & ~RBM_FPBASE);
+ }
+ }
+
+ compiler->rpFrameType = frameType;
+}
+
+// Is the copyReg given by this RefPosition still busy at the
+// given location?
+bool copyRegInUse(RefPosition* ref, LsraLocation loc)
+{
+ assert(ref->copyReg);
+ if (ref->getRefEndLocation() >= loc)
+ {
+ return true;
+ }
+ Interval* interval = ref->getInterval();
+ RefPosition* nextRef = interval->getNextRefPosition();
+ if (nextRef != nullptr && nextRef->treeNode == ref->treeNode && nextRef->getRefEndLocation() >= loc)
+ {
+ return true;
+ }
+ return false;
+}
+
+// Determine whether the register represented by "physRegRecord" is available at least
+// at the "currentLoc", and if so, return the next location at which it is in use in
+// "nextRefLocationPtr"
+//
+bool LinearScan::registerIsAvailable(RegRecord* physRegRecord,
+ LsraLocation currentLoc,
+ LsraLocation* nextRefLocationPtr,
+ RegisterType regType)
+{
+ *nextRefLocationPtr = MaxLocation;
+ LsraLocation nextRefLocation = MaxLocation;
+ regMaskTP regMask = genRegMask(physRegRecord->regNum);
+ if (physRegRecord->isBusyUntilNextKill)
+ {
+ return false;
+ }
+
+ RefPosition* nextPhysReference = physRegRecord->getNextRefPosition();
+ if (nextPhysReference != nullptr)
+ {
+ nextRefLocation = nextPhysReference->nodeLocation;
+ // if (nextPhysReference->refType == RefTypeFixedReg) nextRefLocation--;
+ }
+ else if (!physRegRecord->isCalleeSave)
+ {
+ nextRefLocation = MaxLocation - 1;
+ }
+
+ Interval* assignedInterval = physRegRecord->assignedInterval;
+
+ if (assignedInterval != nullptr)
+ {
+ RefPosition* recentReference = assignedInterval->recentRefPosition;
+
+ // The only case where we have an assignedInterval, but recentReference is null
+ // is where this interval is live at procedure entry (i.e. an arg register), in which
+ // case it's still live and its assigned register is not available
+ // (Note that the ParamDef will be recorded as a recentReference when we encounter
+ // it, but we will be allocating registers, potentially to other incoming parameters,
+ // as we process the ParamDefs.)
+
+ if (recentReference == nullptr)
+ {
+ return false;
+ }
+
+ // Is this a copyReg? It is if the register assignment doesn't match.
+ // (the recentReference may not be a copyReg, because we could have seen another
+ // reference since the copyReg)
+
+ if (!assignedInterval->isAssignedTo(physRegRecord->regNum))
+ {
+ // Don't reassign it if it's still in use
+ if (recentReference->copyReg && copyRegInUse(recentReference, currentLoc))
+ {
+ return false;
+ }
+ }
+ else if (!assignedInterval->isActive && assignedInterval->isConstant)
+ {
+ // Treat this as unassigned, i.e. do nothing.
+ // TODO-CQ: Consider adjusting the heuristics (probably in the caller of this method)
+ // to avoid reusing these registers.
+ }
+ // If this interval isn't active, it's available if it isn't referenced
+ // at this location (or the previous location, if the recent RefPosition
+ // is a delayRegFree).
+ else if (!assignedInterval->isActive &&
+ (recentReference->refType == RefTypeExpUse || recentReference->getRefEndLocation() < currentLoc))
+ {
+ // This interval must have a next reference (otherwise it wouldn't be assigned to this register)
+ RefPosition* nextReference = recentReference->nextRefPosition;
+ if (nextReference != nullptr)
+ {
+ if (nextReference->nodeLocation < nextRefLocation)
+ {
+ nextRefLocation = nextReference->nodeLocation;
+ }
+ }
+ else
+ {
+ assert(recentReference->copyReg && recentReference->registerAssignment != regMask);
+ }
+ }
+ else
+ {
+ return false;
+ }
+ }
+ if (nextRefLocation < *nextRefLocationPtr)
+ {
+ *nextRefLocationPtr = nextRefLocation;
+ }
+
+#ifdef _TARGET_ARM_
+ if (regType == TYP_DOUBLE)
+ {
+ // Recurse, but check the other half this time (TYP_FLOAT)
+ if (!registerIsAvailable(getRegisterRecord(REG_NEXT(physRegRecord->regNum)), currentLoc, nextRefLocationPtr,
+ TYP_FLOAT))
+ return false;
+ nextRefLocation = *nextRefLocationPtr;
+ }
+#endif // _TARGET_ARM_
+
+ return (nextRefLocation >= currentLoc);
+}
+
+//------------------------------------------------------------------------
+// getRegisterType: Get the RegisterType to use for the given RefPosition
+//
+// Arguments:
+// currentInterval: The interval for the current allocation
+// refPosition: The RefPosition of the current Interval for which a register is being allocated
+//
+// Return Value:
+// The RegisterType that should be allocated for this RefPosition
+//
+// Notes:
+// This will nearly always be identical to the registerType of the interval, except in the case
+// of SIMD types of 8 bytes (currently only Vector2) when they are passed and returned in integer
+// registers, or copied to a return temp.
+// This method need only be called in situations where we may be dealing with the register requirements
+// of a RefTypeUse RefPosition (i.e. not when we are only looking at the type of an interval, nor when
+// we are interested in the "defining" type of the interval). This is because the situation of interest
+// only happens at the use (where it must be copied to an integer register).
+
+RegisterType LinearScan::getRegisterType(Interval* currentInterval, RefPosition* refPosition)
+{
+ assert(refPosition->getInterval() == currentInterval);
+ RegisterType regType = currentInterval->registerType;
+ regMaskTP candidates = refPosition->registerAssignment;
+#if defined(FEATURE_SIMD) && defined(_TARGET_AMD64_)
+ if ((candidates & allRegs(regType)) == RBM_NONE)
+ {
+ assert((regType == TYP_SIMD8) && (refPosition->refType == RefTypeUse) &&
+ ((candidates & allRegs(TYP_INT)) != RBM_NONE));
+ regType = TYP_INT;
+ }
+#else // !(defined(FEATURE_SIMD) && defined(_TARGET_AMD64_))
+ assert((candidates & allRegs(regType)) != RBM_NONE);
+#endif // !(defined(FEATURE_SIMD) && defined(_TARGET_AMD64_))
+ return regType;
+}
+
+//------------------------------------------------------------------------
+// tryAllocateFreeReg: Find a free register that satisfies the requirements for refPosition,
+// and takes into account the preferences for the given Interval
+//
+// Arguments:
+// currentInterval: The interval for the current allocation
+// refPosition: The RefPosition of the current Interval for which a register is being allocated
+//
+// Return Value:
+// The regNumber, if any, allocated to the RefPositon. Returns REG_NA if no free register is found.
+//
+// Notes:
+// TODO-CQ: Consider whether we need to use a different order for tree temps than for vars, as
+// reg predict does
+
+static const regNumber lsraRegOrder[] = {REG_VAR_ORDER};
+const unsigned lsraRegOrderSize = ArrLen(lsraRegOrder);
+static const regNumber lsraRegOrderFlt[] = {REG_VAR_ORDER_FLT};
+const unsigned lsraRegOrderFltSize = ArrLen(lsraRegOrderFlt);
+
+regNumber LinearScan::tryAllocateFreeReg(Interval* currentInterval, RefPosition* refPosition)
+{
+ regNumber foundReg = REG_NA;
+
+ RegisterType regType = getRegisterType(currentInterval, refPosition);
+ const regNumber* regOrder;
+ unsigned regOrderSize;
+ if (useFloatReg(regType))
+ {
+ regOrder = lsraRegOrderFlt;
+ regOrderSize = lsraRegOrderFltSize;
+ }
+ else
+ {
+ regOrder = lsraRegOrder;
+ regOrderSize = lsraRegOrderSize;
+ }
+
+ LsraLocation currentLocation = refPosition->nodeLocation;
+ RefPosition* nextRefPos = refPosition->nextRefPosition;
+ LsraLocation nextLocation = (nextRefPos == nullptr) ? currentLocation : nextRefPos->nodeLocation;
+ regMaskTP candidates = refPosition->registerAssignment;
+ regMaskTP preferences = currentInterval->registerPreferences;
+
+ if (RefTypeIsDef(refPosition->refType))
+ {
+ if (currentInterval->hasConflictingDefUse)
+ {
+ resolveConflictingDefAndUse(currentInterval, refPosition);
+ candidates = refPosition->registerAssignment;
+ }
+ // Otherwise, check for the case of a fixed-reg def of a reg that will be killed before the
+ // use, or interferes at the point of use (which shouldn't happen, but Lower doesn't mark
+ // the contained nodes as interfering).
+ // Note that we may have a ParamDef RefPosition that is marked isFixedRegRef, but which
+ // has had its registerAssignment changed to no longer be a single register.
+ else if (refPosition->isFixedRegRef && nextRefPos != nullptr && RefTypeIsUse(nextRefPos->refType) &&
+ !nextRefPos->isFixedRegRef && genMaxOneBit(refPosition->registerAssignment))
+ {
+ regNumber defReg = refPosition->assignedReg();
+ RegRecord* defRegRecord = getRegisterRecord(defReg);
+
+ RefPosition* currFixedRegRefPosition = defRegRecord->recentRefPosition;
+ assert(currFixedRegRefPosition != nullptr &&
+ currFixedRegRefPosition->nodeLocation == refPosition->nodeLocation);
+
+ // If there is another fixed reference to this register before the use, change the candidates
+ // on this RefPosition to include that of nextRefPos.
+ if (currFixedRegRefPosition->nextRefPosition != nullptr &&
+ currFixedRegRefPosition->nextRefPosition->nodeLocation <= nextRefPos->getRefEndLocation())
+ {
+ candidates |= nextRefPos->registerAssignment;
+ if (preferences == refPosition->registerAssignment)
+ {
+ preferences = candidates;
+ }
+ }
+ }
+ }
+
+ preferences &= candidates;
+ if (preferences == RBM_NONE)
+ {
+ preferences = candidates;
+ }
+ regMaskTP relatedPreferences = RBM_NONE;
+
+#ifdef DEBUG
+ candidates = stressLimitRegs(refPosition, candidates);
+#endif
+ bool mustAssignARegister = true;
+ assert(candidates != RBM_NONE);
+
+ // If the related interval has no further references, it is possible that it is a source of the
+ // node that produces this interval. However, we don't want to use the relatedInterval for preferencing
+ // if its next reference is not a new definition (as it either is or will become live).
+ Interval* relatedInterval = currentInterval->relatedInterval;
+ if (relatedInterval != nullptr)
+ {
+ RefPosition* nextRelatedRefPosition = relatedInterval->getNextRefPosition();
+ if (nextRelatedRefPosition != nullptr)
+ {
+ // Don't use the relatedInterval for preferencing if its next reference is not a new definition.
+ if (!RefTypeIsDef(nextRelatedRefPosition->refType))
+ {
+ relatedInterval = nullptr;
+ }
+ // Is the relatedInterval simply a copy to another relatedInterval?
+ else if ((relatedInterval->relatedInterval != nullptr) &&
+ (nextRelatedRefPosition->nextRefPosition != nullptr) &&
+ (nextRelatedRefPosition->nextRefPosition->nextRefPosition == nullptr) &&
+ (nextRelatedRefPosition->nextRefPosition->nodeLocation <
+ relatedInterval->relatedInterval->getNextRefLocation()))
+ {
+ // The current relatedInterval has only two remaining RefPositions, both of which
+ // occur prior to the next RefPosition for its relatedInterval.
+ // It is likely a copy.
+ relatedInterval = relatedInterval->relatedInterval;
+ }
+ }
+ }
+
+ if (relatedInterval != nullptr)
+ {
+ // If the related interval already has an assigned register, then use that
+ // as the related preference. We'll take the related
+ // interval preferences into account in the loop over all the registers.
+
+ if (relatedInterval->assignedReg != nullptr)
+ {
+ relatedPreferences = genRegMask(relatedInterval->assignedReg->regNum);
+ }
+ else
+ {
+ relatedPreferences = relatedInterval->registerPreferences;
+ }
+ }
+
+ bool preferCalleeSave = currentInterval->preferCalleeSave;
+
+ // For floating point, we want to be less aggressive about using callee-save registers.
+ // So in that case, we just need to ensure that the current RefPosition is covered.
+ RefPosition* rangeEndRefPosition;
+ RefPosition* lastRefPosition = currentInterval->lastRefPosition;
+ if (useFloatReg(currentInterval->registerType))
+ {
+ rangeEndRefPosition = refPosition;
+ }
+ else
+ {
+ rangeEndRefPosition = currentInterval->lastRefPosition;
+ // If we have a relatedInterval that is not currently occupying a register,
+ // and whose lifetime begins after this one ends,
+ // we want to try to select a register that will cover its lifetime.
+ if ((relatedInterval != nullptr) && (relatedInterval->assignedReg == nullptr) &&
+ (relatedInterval->getNextRefLocation() >= rangeEndRefPosition->nodeLocation))
+ {
+ lastRefPosition = relatedInterval->lastRefPosition;
+ preferCalleeSave = relatedInterval->preferCalleeSave;
+ }
+ }
+
+ // If this has a delayed use (due to being used in a rmw position of a
+ // non-commutative operator), its endLocation is delayed until the "def"
+ // position, which is one location past the use (getRefEndLocation() takes care of this).
+ LsraLocation rangeEndLocation = rangeEndRefPosition->getRefEndLocation();
+ LsraLocation lastLocation = lastRefPosition->getRefEndLocation();
+ regNumber prevReg = REG_NA;
+
+ if (currentInterval->assignedReg)
+ {
+ bool useAssignedReg = false;
+ // This was an interval that was previously allocated to the given
+ // physical register, and we should try to allocate it to that register
+ // again, if possible and reasonable.
+ // Use it preemptively (i.e. before checking other available regs)
+ // only if it is preferred and available.
+
+ RegRecord* regRec = currentInterval->assignedReg;
+ prevReg = regRec->regNum;
+ regMaskTP prevRegBit = genRegMask(prevReg);
+
+ // Is it in the preferred set of regs?
+ if ((prevRegBit & preferences) != RBM_NONE)
+ {
+ // Is it currently available?
+ LsraLocation nextPhysRefLoc;
+ if (registerIsAvailable(regRec, currentLocation, &nextPhysRefLoc, currentInterval->registerType))
+ {
+ // If the register is next referenced at this location, only use it if
+ // this has a fixed reg requirement (i.e. this is the reference that caused
+ // the FixedReg ref to be created)
+
+ if (!regRec->conflictingFixedRegReference(refPosition))
+ {
+ useAssignedReg = true;
+ }
+ }
+ }
+ if (useAssignedReg)
+ {
+ regNumber foundReg = prevReg;
+ assignPhysReg(regRec, currentInterval);
+ refPosition->registerAssignment = genRegMask(foundReg);
+ return foundReg;
+ }
+ else
+ {
+ // Don't keep trying to allocate to this register
+ currentInterval->assignedReg = nullptr;
+ }
+ }
+
+ RegRecord* availablePhysRegInterval = nullptr;
+ Interval* intervalToUnassign = nullptr;
+
+ // Each register will receive a score which is the sum of the scoring criteria below.
+ // These were selected on the assumption that they will have an impact on the "goodness"
+ // of a register selection, and have been tuned to a certain extent by observing the impact
+ // of the ordering on asmDiffs. However, there is probably much more room for tuning,
+ // and perhaps additional criteria.
+ //
+ // These are FLAGS (bits) so that we can easily order them and add them together.
+ // If the scores are equal, but one covers more of the current interval's range,
+ // then it wins. Otherwise, the one encountered earlier in the regOrder wins.
+
+ enum RegisterScore
+ {
+ VALUE_AVAILABLE = 0x40, // It is a constant value that is already in an acceptable register.
+ COVERS = 0x20, // It is in the interval's preference set and it covers the entire lifetime.
+ OWN_PREFERENCE = 0x10, // It is in the preference set of this interval.
+ COVERS_RELATED = 0x08, // It is in the preference set of the related interval and covers the entire lifetime.
+ RELATED_PREFERENCE = 0x04, // It is in the preference set of the related interval.
+ CALLER_CALLEE = 0x02, // It is in the right "set" for the interval (caller or callee-save).
+ UNASSIGNED = 0x01, // It is not currently assigned to an inactive interval.
+ };
+
+ int bestScore = 0;
+
+ // Compute the best possible score so we can stop looping early if we find it.
+ // TODO-Throughput: At some point we may want to short-circuit the computation of each score, but
+ // probably not until we've tuned the order of these criteria. At that point,
+ // we'll need to avoid the short-circuit if we've got a stress option to reverse
+ // the selection.
+ int bestPossibleScore = COVERS + UNASSIGNED + OWN_PREFERENCE + CALLER_CALLEE;
+ if (relatedPreferences != RBM_NONE)
+ {
+ bestPossibleScore |= RELATED_PREFERENCE + COVERS_RELATED;
+ }
+
+ LsraLocation bestLocation = MinLocation;
+
+ // In non-debug builds, this will simply get optimized away
+ bool reverseSelect = false;
+#ifdef DEBUG
+ reverseSelect = doReverseSelect();
+#endif // DEBUG
+
+ // An optimization for the common case where there is only one candidate -
+ // avoid looping over all the other registers
+
+ regNumber singleReg = REG_NA;
+
+ if (genMaxOneBit(candidates))
+ {
+ regOrderSize = 1;
+ singleReg = genRegNumFromMask(candidates);
+ regOrder = &singleReg;
+ }
+
+ for (unsigned i = 0; i < regOrderSize && (candidates != RBM_NONE); i++)
+ {
+ regNumber regNum = regOrder[i];
+ regMaskTP candidateBit = genRegMask(regNum);
+
+ if (!(candidates & candidateBit))
+ {
+ continue;
+ }
+
+ candidates &= ~candidateBit;
+
+ RegRecord* physRegRecord = getRegisterRecord(regNum);
+
+ int score = 0;
+ LsraLocation nextPhysRefLocation = MaxLocation;
+
+ // By chance, is this register already holding this interval, as a copyReg or having
+ // been restored as inactive after a kill?
+ if (physRegRecord->assignedInterval == currentInterval)
+ {
+ availablePhysRegInterval = physRegRecord;
+ intervalToUnassign = nullptr;
+ break;
+ }
+
+ // Find the next RefPosition of the physical register
+ if (!registerIsAvailable(physRegRecord, currentLocation, &nextPhysRefLocation, regType))
+ {
+ continue;
+ }
+
+ // If the register is next referenced at this location, only use it if
+ // this has a fixed reg requirement (i.e. this is the reference that caused
+ // the FixedReg ref to be created)
+
+ if (physRegRecord->conflictingFixedRegReference(refPosition))
+ {
+ continue;
+ }
+
+ // If this is a definition of a constant interval, check to see if its value is already in this register.
+ if (currentInterval->isConstant && RefTypeIsDef(refPosition->refType) &&
+ (physRegRecord->assignedInterval != nullptr) && physRegRecord->assignedInterval->isConstant)
+ {
+ noway_assert(refPosition->treeNode != nullptr);
+ GenTree* otherTreeNode = physRegRecord->assignedInterval->firstRefPosition->treeNode;
+ noway_assert(otherTreeNode != nullptr);
+
+ if (refPosition->treeNode->OperGet() == otherTreeNode->OperGet())
+ {
+ switch (otherTreeNode->OperGet())
+ {
+ case GT_CNS_INT:
+ if ((refPosition->treeNode->AsIntCon()->IconValue() ==
+ otherTreeNode->AsIntCon()->IconValue()) &&
+ (varTypeGCtype(refPosition->treeNode) == varTypeGCtype(otherTreeNode)))
+ {
+#ifdef _TARGET_64BIT_
+ // If the constant is negative, only reuse registers of the same type.
+ // This is because, on a 64-bit system, we do not sign-extend immediates in registers to
+ // 64-bits unless they are actually longs, as this requires a longer instruction.
+ // This doesn't apply to a 32-bit system, on which long values occupy multiple registers.
+ // (We could sign-extend, but we would have to always sign-extend, because if we reuse more
+ // than once, we won't have access to the instruction that originally defines the constant).
+ if ((refPosition->treeNode->TypeGet() == otherTreeNode->TypeGet()) ||
+ (refPosition->treeNode->AsIntCon()->IconValue() >= 0))
+#endif // _TARGET_64BIT_
+ {
+ score |= VALUE_AVAILABLE;
+ }
+ }
+ break;
+ case GT_CNS_DBL:
+ {
+ // For floating point constants, the values must be identical, not simply compare
+ // equal. So we compare the bits.
+ if (refPosition->treeNode->AsDblCon()->isBitwiseEqual(otherTreeNode->AsDblCon()) &&
+ (refPosition->treeNode->TypeGet() == otherTreeNode->TypeGet()))
+ {
+ score |= VALUE_AVAILABLE;
+ }
+ break;
+ }
+ default:
+ // for all other 'otherTreeNode->OperGet()' kinds, we leave 'score' unchanged
+ break;
+ }
+ }
+ }
+
+ // If the nextPhysRefLocation is a fixedRef for the rangeEndRefPosition, increment it so that
+ // we don't think it isn't covering the live range.
+ // This doesn't handle the case where earlier RefPositions for this Interval are also
+ // FixedRefs of this regNum, but at least those are only interesting in the case where those
+ // are "local last uses" of the Interval - otherwise the liveRange would interfere with the reg.
+ if (nextPhysRefLocation == rangeEndLocation && rangeEndRefPosition->isFixedRefOfReg(regNum))
+ {
+ INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_INCREMENT_RANGE_END, currentInterval, regNum));
+ nextPhysRefLocation++;
+ }
+
+ if ((candidateBit & preferences) != RBM_NONE)
+ {
+ score |= OWN_PREFERENCE;
+ if (nextPhysRefLocation > rangeEndLocation)
+ {
+ score |= COVERS;
+ }
+ }
+ if (relatedInterval != nullptr && (candidateBit & relatedPreferences) != RBM_NONE)
+ {
+ score |= RELATED_PREFERENCE;
+ if (nextPhysRefLocation > relatedInterval->lastRefPosition->nodeLocation)
+ {
+ score |= COVERS_RELATED;
+ }
+ }
+
+ // If we had a fixed-reg def of a reg that will be killed before the use, prefer it to any other registers
+ // with the same score. (Note that we haven't changed the original registerAssignment on the RefPosition).
+ // Overload the RELATED_PREFERENCE value.
+ else if (candidateBit == refPosition->registerAssignment)
+ {
+ score |= RELATED_PREFERENCE;
+ }
+
+ if ((preferCalleeSave && physRegRecord->isCalleeSave) || (!preferCalleeSave && !physRegRecord->isCalleeSave))
+ {
+ score |= CALLER_CALLEE;
+ }
+
+ // The register is considered unassigned if it has no assignedInterval, OR
+ // if its next reference is beyond the range of this interval.
+ if (physRegRecord->assignedInterval == nullptr ||
+ physRegRecord->assignedInterval->getNextRefLocation() > lastLocation)
+ {
+ score |= UNASSIGNED;
+ }
+
+ bool foundBetterCandidate = false;
+
+ if (score > bestScore)
+ {
+ foundBetterCandidate = true;
+ }
+ else if (score == bestScore)
+ {
+ // Prefer a register that covers the range.
+ if (bestLocation <= lastLocation)
+ {
+ if (nextPhysRefLocation > bestLocation)
+ {
+ foundBetterCandidate = true;
+ }
+ }
+ // If both cover the range, prefer a register that is killed sooner (leaving the longer range register
+ // available). If both cover the range and also getting killed at the same location, prefer the one which
+ // is same as previous assignment.
+ else if (nextPhysRefLocation > lastLocation)
+ {
+ if (nextPhysRefLocation < bestLocation)
+ {
+ foundBetterCandidate = true;
+ }
+ else if (nextPhysRefLocation == bestLocation && prevReg == regNum)
+ {
+ foundBetterCandidate = true;
+ }
+ }
+ }
+
+#ifdef DEBUG
+ if (doReverseSelect() && bestScore != 0)
+ {
+ foundBetterCandidate = !foundBetterCandidate;
+ }
+#endif // DEBUG
+
+ if (foundBetterCandidate)
+ {
+ bestLocation = nextPhysRefLocation;
+ availablePhysRegInterval = physRegRecord;
+ intervalToUnassign = physRegRecord->assignedInterval;
+ bestScore = score;
+ }
+
+ // there is no way we can get a better score so break out
+ if (!reverseSelect && score == bestPossibleScore && bestLocation == rangeEndLocation + 1)
+ {
+ break;
+ }
+ }
+
+ if (availablePhysRegInterval != nullptr)
+ {
+ if (intervalToUnassign != nullptr)
+ {
+ unassignPhysReg(availablePhysRegInterval, intervalToUnassign->recentRefPosition);
+ if (bestScore & VALUE_AVAILABLE)
+ {
+ assert(intervalToUnassign->isConstant);
+ refPosition->treeNode->SetReuseRegVal();
+ refPosition->treeNode->SetInReg();
+ }
+ // If we considered this "unassigned" because this interval's lifetime ends before
+ // the next ref, remember it.
+ else if ((bestScore & UNASSIGNED) != 0 && intervalToUnassign != nullptr)
+ {
+ availablePhysRegInterval->previousInterval = intervalToUnassign;
+ }
+ }
+ else
+ {
+ assert((bestScore & VALUE_AVAILABLE) == 0);
+ }
+ assignPhysReg(availablePhysRegInterval, currentInterval);
+ foundReg = availablePhysRegInterval->regNum;
+ regMaskTP foundRegMask = genRegMask(foundReg);
+ refPosition->registerAssignment = foundRegMask;
+ if (relatedInterval != nullptr)
+ {
+ relatedInterval->updateRegisterPreferences(foundRegMask);
+ }
+ }
+
+ return foundReg;
+}
+
+//------------------------------------------------------------------------
+// allocateBusyReg: Find a busy register that satisfies the requirements for refPosition,
+// and that can be spilled.
+//
+// Arguments:
+// current The interval for the current allocation
+// refPosition The RefPosition of the current Interval for which a register is being allocated
+// allocateIfProfitable If true, a reg may not be allocated if all other ref positions currently
+// occupying registers are more important than the 'refPosition'.
+//
+// Return Value:
+// The regNumber allocated to the RefPositon. Returns REG_NA if no free register is found.
+//
+// Note: Currently this routine uses weight and farthest distance of next reference
+// to select a ref position for spilling.
+// a) if allocateIfProfitable = false
+// The ref position chosen for spilling will be the lowest weight
+// of all and if there is is more than one ref position with the
+// same lowest weight, among them choses the one with farthest
+// distance to its next reference.
+//
+// b) if allocateIfProfitable = true
+// The ref position chosen for spilling will not only be lowest weight
+// of all but also has a weight lower than 'refPosition'. If there is
+// no such ref position, reg will not be allocated.
+regNumber LinearScan::allocateBusyReg(Interval* current, RefPosition* refPosition, bool allocateIfProfitable)
+{
+ regNumber foundReg = REG_NA;
+
+ RegisterType regType = getRegisterType(current, refPosition);
+ regMaskTP candidates = refPosition->registerAssignment;
+ regMaskTP preferences = (current->registerPreferences & candidates);
+ if (preferences == RBM_NONE)
+ {
+ preferences = candidates;
+ }
+ if (candidates == RBM_NONE)
+ {
+ // This assumes only integer and floating point register types
+ // if we target a processor with additional register types,
+ // this would have to change
+ candidates = allRegs(regType);
+ }
+
+#ifdef DEBUG
+ candidates = stressLimitRegs(refPosition, candidates);
+#endif // DEBUG
+
+ // TODO-CQ: Determine whether/how to take preferences into account in addition to
+ // prefering the one with the furthest ref position when considering
+ // a candidate to spill
+ RegRecord* farthestRefPhysRegRecord = nullptr;
+ LsraLocation farthestLocation = MinLocation;
+ LsraLocation refLocation = refPosition->nodeLocation;
+ unsigned farthestRefPosWeight;
+ if (allocateIfProfitable)
+ {
+ // If allocating a reg is optional, we will consider those ref positions
+ // whose weight is less than 'refPosition' for spilling.
+ farthestRefPosWeight = getWeight(refPosition);
+ }
+ else
+ {
+ // If allocating a reg is a must, we start off with max weight so
+ // that the first spill candidate will be selected based on
+ // farthest distance alone. Since we start off with farthestLocation
+ // initialized to MinLocation, the first available ref position
+ // will be selected as spill candidate and its weight as the
+ // fathestRefPosWeight.
+ farthestRefPosWeight = BB_MAX_WEIGHT;
+ }
+
+ for (regNumber regNum : Registers(regType))
+ {
+ regMaskTP candidateBit = genRegMask(regNum);
+ if (!(candidates & candidateBit))
+ {
+ continue;
+ }
+ RegRecord* physRegRecord = getRegisterRecord(regNum);
+
+ if (physRegRecord->isBusyUntilNextKill)
+ {
+ continue;
+ }
+ Interval* assignedInterval = physRegRecord->assignedInterval;
+
+ // If there is a fixed reference at the same location (and it's not due to this reference),
+ // don't use it.
+
+ if (physRegRecord->conflictingFixedRegReference(refPosition))
+ {
+ assert(candidates != candidateBit);
+ continue;
+ }
+
+ LsraLocation physRegNextLocation = MaxLocation;
+ if (refPosition->isFixedRefOfRegMask(candidateBit))
+ {
+ // Either there is a fixed reference due to this node, or one associated with a
+ // fixed use fed by a def at this node.
+ // In either case, we must use this register as it's the only candidate
+ // TODO-CQ: At the time we allocate a register to a fixed-reg def, if it's not going
+ // to remain live until the use, we should set the candidates to allRegs(regType)
+ // to avoid a spill - codegen can then insert the copy.
+ assert(candidates == candidateBit);
+ physRegNextLocation = MaxLocation;
+ farthestRefPosWeight = BB_MAX_WEIGHT;
+ }
+ else
+ {
+ physRegNextLocation = physRegRecord->getNextRefLocation();
+
+ // If refPosition requires a fixed register, we should reject all others.
+ // Otherwise, we will still evaluate all phyRegs though their next location is
+ // not better than farthestLocation found so far.
+ //
+ // TODO: this method should be using an approach similar to tryAllocateFreeReg()
+ // where it uses a regOrder array to avoid iterating over any but the single
+ // fixed candidate.
+ if (refPosition->isFixedRegRef && physRegNextLocation < farthestLocation)
+ {
+ continue;
+ }
+ }
+
+ // If this register is not assigned to an interval, either
+ // - it has a FixedReg reference at the current location that is not this reference, OR
+ // - this is the special case of a fixed loReg, where this interval has a use at the same location
+ // In either case, we cannot use it
+
+ if (assignedInterval == nullptr)
+ {
+ RefPosition* nextPhysRegPosition = physRegRecord->getNextRefPosition();
+
+#ifndef _TARGET_ARM64_
+ // TODO-Cleanup: Revisit this after Issue #3524 is complete
+ // On ARM64 the nodeLocation is not always == refLocation, Disabling this assert for now.
+ assert(nextPhysRegPosition->nodeLocation == refLocation && candidateBit != candidates);
+#endif
+ continue;
+ }
+
+ RefPosition* recentAssignedRef = assignedInterval->recentRefPosition;
+
+ if (!assignedInterval->isActive)
+ {
+ // The assigned interval has a reference at this location - otherwise, we would have found
+ // this in tryAllocateFreeReg().
+ // Note that we may or may not have actually handled the reference yet, so it could either
+ // be recentAssigedRef, or the next reference.
+ assert(recentAssignedRef != nullptr);
+ if (recentAssignedRef->nodeLocation != refLocation)
+ {
+ if (recentAssignedRef->nodeLocation + 1 == refLocation)
+ {
+ assert(recentAssignedRef->delayRegFree);
+ }
+ else
+ {
+ RefPosition* nextAssignedRef = recentAssignedRef->nextRefPosition;
+ assert(nextAssignedRef != nullptr);
+ assert(nextAssignedRef->nodeLocation == refLocation ||
+ (nextAssignedRef->nodeLocation + 1 == refLocation && nextAssignedRef->delayRegFree));
+ }
+ }
+ continue;
+ }
+
+ // If we have a recentAssignedRef, check that it is going to be OK to spill it
+ //
+ // TODO-Review: Under what conditions recentAssginedRef would be null?
+ unsigned recentAssignedRefWeight = BB_ZERO_WEIGHT;
+ if (recentAssignedRef != nullptr)
+ {
+ if (recentAssignedRef->nodeLocation == refLocation)
+ {
+ // We can't spill a register that's being used at the current location
+ RefPosition* physRegRef = physRegRecord->recentRefPosition;
+ continue;
+ }
+
+ // If the current position has the candidate register marked to be delayed,
+ // check if the previous location is using this register, if that's the case we have to skip
+ // since we can't spill this register.
+ if (recentAssignedRef->delayRegFree && (refLocation == recentAssignedRef->nodeLocation + 1))
+ {
+ continue;
+ }
+
+ // We don't prefer to spill a register if the weight of recentAssignedRef > weight
+ // of the spill candidate found so far. We would consider spilling a greater weight
+ // ref position only if the refPosition being allocated must need a reg.
+ recentAssignedRefWeight = getWeight(recentAssignedRef);
+ if (recentAssignedRefWeight > farthestRefPosWeight)
+ {
+ continue;
+ }
+ }
+
+ LsraLocation nextLocation = assignedInterval->getNextRefLocation();
+
+ // We should never spill a register that's occupied by an Interval with its next use at the current location.
+ // Normally this won't occur (unless we actually had more uses in a single node than there are registers),
+ // because we'll always find something with a later nextLocation, but it can happen in stress when
+ // we have LSRA_SELECT_NEAREST.
+ if ((nextLocation == refLocation) && !refPosition->isFixedRegRef)
+ {
+ continue;
+ }
+
+ if (nextLocation > physRegNextLocation)
+ {
+ nextLocation = physRegNextLocation;
+ }
+
+ bool isBetterLocation;
+
+#ifdef DEBUG
+ if (doSelectNearest() && farthestRefPhysRegRecord != nullptr)
+ {
+ isBetterLocation = (nextLocation <= farthestLocation);
+ }
+ else
+#endif
+ // This if-stmt is associated with the above else
+ if (recentAssignedRefWeight < farthestRefPosWeight)
+ {
+ isBetterLocation = true;
+ }
+ else
+ {
+ // This would mean the weight of spill ref position we found so far is equal
+ // to the weight of the ref position that is being evaluated. In this case
+ // we prefer to spill ref position whose distance to its next reference is
+ // the farthest.
+ assert(recentAssignedRefWeight == farthestRefPosWeight);
+
+ // If allocateIfProfitable=true, the first spill candidate selected
+ // will be based on weight alone. After we have found a spill
+ // candidate whose weight is less than the 'refPosition', we will
+ // consider farthest distance when there is a tie in weights.
+ // This is to ensure that we don't spill a ref position whose
+ // weight is equal to weight of 'refPosition'.
+ if (allocateIfProfitable && farthestRefPhysRegRecord == nullptr)
+ {
+ isBetterLocation = false;
+ }
+ else
+ {
+ isBetterLocation = (nextLocation > farthestLocation);
+
+ if (nextLocation > farthestLocation)
+ {
+ isBetterLocation = true;
+ }
+ else if (nextLocation == farthestLocation)
+ {
+ // Both weight and distance are equal.
+ // Prefer that ref position which is marked both reload and
+ // allocate if profitable. These ref positions don't need
+ // need to be spilled as they are already in memory and
+ // codegen considers them as contained memory operands.
+ isBetterLocation = (recentAssignedRef != nullptr) && recentAssignedRef->reload &&
+ recentAssignedRef->AllocateIfProfitable();
+ }
+ else
+ {
+ isBetterLocation = false;
+ }
+ }
+ }
+
+ if (isBetterLocation)
+ {
+ farthestLocation = nextLocation;
+ farthestRefPhysRegRecord = physRegRecord;
+ farthestRefPosWeight = recentAssignedRefWeight;
+ }
+ }
+
+#if DEBUG
+ if (allocateIfProfitable)
+ {
+ // There may not be a spill candidate or if one is found
+ // its weight must be less than the weight of 'refPosition'
+ assert((farthestRefPhysRegRecord == nullptr) || (farthestRefPosWeight < getWeight(refPosition)));
+ }
+ else
+ {
+ // Must have found a spill candidate.
+ assert((farthestRefPhysRegRecord != nullptr) && (farthestLocation > refLocation || refPosition->isFixedRegRef));
+ }
+#endif
+
+ if (farthestRefPhysRegRecord != nullptr)
+ {
+ foundReg = farthestRefPhysRegRecord->regNum;
+ unassignPhysReg(farthestRefPhysRegRecord, farthestRefPhysRegRecord->assignedInterval->recentRefPosition);
+ assignPhysReg(farthestRefPhysRegRecord, current);
+ refPosition->registerAssignment = genRegMask(foundReg);
+ }
+ else
+ {
+ foundReg = REG_NA;
+ refPosition->registerAssignment = RBM_NONE;
+ }
+
+ return foundReg;
+}
+
+// Grab a register to use to copy and then immediately use.
+// This is called only for localVar intervals that already have a register
+// assignment that is not compatible with the current RefPosition.
+// This is not like regular assignment, because we don't want to change
+// any preferences or existing register assignments.
+// Prefer a free register that's got the earliest next use.
+// Otherwise, spill something with the farthest next use
+//
+regNumber LinearScan::assignCopyReg(RefPosition* refPosition)
+{
+ Interval* currentInterval = refPosition->getInterval();
+ assert(currentInterval != nullptr);
+ assert(currentInterval->isActive);
+
+ bool foundFreeReg = false;
+ RegRecord* bestPhysReg = nullptr;
+ LsraLocation bestLocation = MinLocation;
+ regMaskTP candidates = refPosition->registerAssignment;
+
+ // Save the relatedInterval, if any, so that it doesn't get modified during allocation.
+ Interval* savedRelatedInterval = currentInterval->relatedInterval;
+ currentInterval->relatedInterval = nullptr;
+
+ // We don't want really want to change the default assignment,
+ // so 1) pretend this isn't active, and 2) remember the old reg
+ regNumber oldPhysReg = currentInterval->physReg;
+ RegRecord* oldRegRecord = currentInterval->assignedReg;
+ assert(oldRegRecord->regNum == oldPhysReg);
+ currentInterval->isActive = false;
+
+ regNumber allocatedReg = tryAllocateFreeReg(currentInterval, refPosition);
+ if (allocatedReg == REG_NA)
+ {
+ allocatedReg = allocateBusyReg(currentInterval, refPosition, false);
+ }
+
+ // Now restore the old info
+ currentInterval->relatedInterval = savedRelatedInterval;
+ currentInterval->physReg = oldPhysReg;
+ currentInterval->assignedReg = oldRegRecord;
+ currentInterval->isActive = true;
+
+ refPosition->copyReg = true;
+ return allocatedReg;
+}
+
+// Check if the interval is already assigned and if it is then unassign the physical record
+// then set the assignedInterval to 'interval'
+//
+void LinearScan::checkAndAssignInterval(RegRecord* regRec, Interval* interval)
+{
+ if (regRec->assignedInterval != nullptr && regRec->assignedInterval != interval)
+ {
+ // This is allocated to another interval. Either it is inactive, or it was allocated as a
+ // copyReg and is therefore not the "assignedReg" of the other interval. In the latter case,
+ // we simply unassign it - in the former case we need to set the physReg on the interval to
+ // REG_NA to indicate that it is no longer in that register.
+ // The lack of checking for this case resulted in an assert in the retail version of System.dll,
+ // in method SerialStream.GetDcbFlag.
+ // Note that we can't check for the copyReg case, because we may have seen a more recent
+ // RefPosition for the Interval that was NOT a copyReg.
+ if (regRec->assignedInterval->assignedReg == regRec)
+ {
+ assert(regRec->assignedInterval->isActive == false);
+ regRec->assignedInterval->physReg = REG_NA;
+ }
+ unassignPhysReg(regRec->regNum);
+ }
+
+ regRec->assignedInterval = interval;
+}
+
+// Assign the given physical register interval to the given interval
+void LinearScan::assignPhysReg(RegRecord* regRec, Interval* interval)
+{
+ regMaskTP assignedRegMask = genRegMask(regRec->regNum);
+ compiler->codeGen->regSet.rsSetRegsModified(assignedRegMask DEBUGARG(dumpTerse));
+
+ checkAndAssignInterval(regRec, interval);
+ interval->assignedReg = regRec;
+
+#ifdef _TARGET_ARM_
+ if ((interval->registerType == TYP_DOUBLE) && isFloatRegType(regRec->registerType))
+ {
+ regNumber nextRegNum = REG_NEXT(regRec->regNum);
+ RegRecord* nextRegRec = getRegisterRecord(nextRegNum);
+
+ checkAndAssignInterval(nextRegRec, interval);
+ }
+#endif // _TARGET_ARM_
+
+ interval->physReg = regRec->regNum;
+ interval->isActive = true;
+ if (interval->isLocalVar)
+ {
+ // Prefer this register for future references
+ interval->updateRegisterPreferences(assignedRegMask);
+ }
+}
+
+//------------------------------------------------------------------------
+// spill: Spill this Interval between "fromRefPosition" and "toRefPosition"
+//
+// Arguments:
+// fromRefPosition - The RefPosition at which the Interval is to be spilled
+// toRefPosition - The RefPosition at which it must be reloaded
+//
+// Return Value:
+// None.
+//
+// Assumptions:
+// fromRefPosition and toRefPosition must not be null
+//
+void LinearScan::spillInterval(Interval* interval, RefPosition* fromRefPosition, RefPosition* toRefPosition)
+{
+ assert(fromRefPosition != nullptr && toRefPosition != nullptr);
+ assert(fromRefPosition->getInterval() == interval && toRefPosition->getInterval() == interval);
+ assert(fromRefPosition->nextRefPosition == toRefPosition);
+
+ if (!fromRefPosition->lastUse)
+ {
+ // If not allocated a register, Lcl var def/use ref positions even if reg optional
+ // should be marked as spillAfter.
+ if (!fromRefPosition->RequiresRegister() && !(interval->isLocalVar && fromRefPosition->IsActualRef()))
+ {
+ fromRefPosition->registerAssignment = RBM_NONE;
+ }
+ else
+ {
+ fromRefPosition->spillAfter = true;
+ }
+ }
+ assert(toRefPosition != nullptr);
+
+#ifdef DEBUG
+ if (VERBOSE)
+ {
+ dumpLsraAllocationEvent(LSRA_EVENT_SPILL, interval);
+ }
+#endif // DEBUG
+
+ interval->isActive = false;
+ interval->isSpilled = true;
+
+ // If fromRefPosition occurs before the beginning of this block, mark this as living in the stack
+ // on entry to this block.
+ if (fromRefPosition->nodeLocation <= curBBStartLocation)
+ {
+ // This must be a lclVar interval
+ assert(interval->isLocalVar);
+ setInVarRegForBB(curBBNum, interval->varNum, REG_STK);
+ }
+}
+
+//------------------------------------------------------------------------
+// unassignPhysRegNoSpill: Unassign the given physical register record from
+// an active interval, without spilling.
+//
+// Arguments:
+// regRec - the RegRecord to be unasssigned
+//
+// Return Value:
+// None.
+//
+// Assumptions:
+// The assignedInterval must not be null, and must be active.
+//
+// Notes:
+// This method is used to unassign a register when an interval needs to be moved to a
+// different register, but not (yet) spilled.
+
+void LinearScan::unassignPhysRegNoSpill(RegRecord* regRec)
+{
+ Interval* assignedInterval = regRec->assignedInterval;
+ assert(assignedInterval != nullptr && assignedInterval->isActive);
+ assignedInterval->isActive = false;
+ unassignPhysReg(regRec, nullptr);
+ assignedInterval->isActive = true;
+}
+
+//------------------------------------------------------------------------
+// checkAndClearInterval: Clear the assignedInterval for the given
+// physical register record
+//
+// Arguments:
+// regRec - the physical RegRecord to be unasssigned
+// spillRefPosition - The RefPosition at which the assignedInterval is to be spilled
+// or nullptr if we aren't spilling
+//
+// Return Value:
+// None.
+//
+// Assumptions:
+// see unassignPhysReg
+//
+void LinearScan::checkAndClearInterval(RegRecord* regRec, RefPosition* spillRefPosition)
+{
+ Interval* assignedInterval = regRec->assignedInterval;
+ assert(assignedInterval != nullptr);
+ regNumber thisRegNum = regRec->regNum;
+
+ if (spillRefPosition == nullptr)
+ {
+ // Note that we can't assert for the copyReg case
+ //
+ if (assignedInterval->physReg == thisRegNum)
+ {
+ assert(assignedInterval->isActive == false);
+ }
+ }
+ else
+ {
+ assert(spillRefPosition->getInterval() == assignedInterval);
+ }
+
+ regRec->assignedInterval = nullptr;
+}
+
+//------------------------------------------------------------------------
+// unassignPhysReg: Unassign the given physical register record, and spill the
+// assignedInterval at the given spillRefPosition, if any.
+//
+// Arguments:
+// regRec - the RegRecord to be unasssigned
+// spillRefPosition - The RefPosition at which the assignedInterval is to be spilled
+//
+// Return Value:
+// None.
+//
+// Assumptions:
+// The assignedInterval must not be null.
+// If spillRefPosition is null, the assignedInterval must be inactive, or not currently
+// assigned to this register (e.g. this is a copyReg for that Interval).
+// Otherwise, spillRefPosition must be associated with the assignedInterval.
+//
+void LinearScan::unassignPhysReg(RegRecord* regRec, RefPosition* spillRefPosition)
+{
+ Interval* assignedInterval = regRec->assignedInterval;
+ assert(assignedInterval != nullptr);
+ checkAndClearInterval(regRec, spillRefPosition);
+ regNumber thisRegNum = regRec->regNum;
+
+#ifdef _TARGET_ARM_
+ if ((assignedInterval->registerType == TYP_DOUBLE) && isFloatRegType(regRec->registerType))
+ {
+ regNumber nextRegNum = REG_NEXT(regRec->regNum);
+ RegRecord* nextRegRec = getRegisterRecord(nextRegNum);
+ checkAndClearInterval(nextRegRec, spillRefPosition);
+ }
+#endif // _TARGET_ARM_
+
+#ifdef DEBUG
+ if (VERBOSE && !dumpTerse)
+ {
+ printf("unassigning %s: ", getRegName(regRec->regNum));
+ assignedInterval->dump();
+ printf("\n");
+ }
+#endif // DEBUG
+
+ RefPosition* nextRefPosition = nullptr;
+ if (spillRefPosition != nullptr)
+ {
+ nextRefPosition = spillRefPosition->nextRefPosition;
+ }
+
+ if (assignedInterval->physReg != REG_NA && assignedInterval->physReg != thisRegNum)
+ {
+ // This must have been a temporary copy reg, but we can't assert that because there
+ // may have been intervening RefPositions that were not copyRegs.
+ regRec->assignedInterval = nullptr;
+ return;
+ }
+
+ regNumber victimAssignedReg = assignedInterval->physReg;
+ assignedInterval->physReg = REG_NA;
+
+ bool spill = assignedInterval->isActive && nextRefPosition != nullptr;
+ if (spill)
+ {
+ // If this is an active interval, it must have a recentRefPosition,
+ // otherwise it would not be active
+ assert(spillRefPosition != nullptr);
+
+#if 0
+ // TODO-CQ: Enable this and insert an explicit GT_COPY (otherwise there's no way to communicate
+ // to codegen that we want the copyReg to be the new home location).
+ // If the last reference was a copyReg, and we're spilling the register
+ // it was copied from, then make the copyReg the new primary location
+ // if possible
+ if (spillRefPosition->copyReg)
+ {
+ regNumber copyFromRegNum = victimAssignedReg;
+ regNumber copyRegNum = genRegNumFromMask(spillRefPosition->registerAssignment);
+ if (copyFromRegNum == thisRegNum &&
+ getRegisterRecord(copyRegNum)->assignedInterval == assignedInterval)
+ {
+ assert(copyRegNum != thisRegNum);
+ assignedInterval->physReg = copyRegNum;
+ assignedInterval->assignedReg = this->getRegisterRecord(copyRegNum);
+ return;
+ }
+ }
+#endif // 0
+#ifdef DEBUG
+ // With JitStressRegs == 0x80 (LSRA_EXTEND_LIFETIMES), we may have a RefPosition
+ // that is not marked lastUse even though the treeNode is a lastUse. In that case
+ // we must not mark it for spill because the register will have been immediately freed
+ // after use. While we could conceivably add special handling for this case in codegen,
+ // it would be messy and undesirably cause the "bleeding" of LSRA stress modes outside
+ // of LSRA.
+ if (extendLifetimes() && assignedInterval->isLocalVar && RefTypeIsUse(spillRefPosition->refType) &&
+ spillRefPosition->treeNode != nullptr && (spillRefPosition->treeNode->gtFlags & GTF_VAR_DEATH) != 0)
+ {
+ dumpLsraAllocationEvent(LSRA_EVENT_SPILL_EXTENDED_LIFETIME, assignedInterval);
+ assignedInterval->isActive = false;
+ spill = false;
+ // If the spillRefPosition occurs before the beginning of this block, it will have
+ // been marked as living in this register on entry to this block, but we now need
+ // to mark this as living on the stack.
+ if (spillRefPosition->nodeLocation <= curBBStartLocation)
+ {
+ setInVarRegForBB(curBBNum, assignedInterval->varNum, REG_STK);
+ if (spillRefPosition->nextRefPosition != nullptr)
+ {
+ assignedInterval->isSpilled = true;
+ }
+ }
+ else
+ {
+ // Otherwise, we need to mark spillRefPosition as lastUse, or the interval
+ // will remain active beyond its allocated range during the resolution phase.
+ spillRefPosition->lastUse = true;
+ }
+ }
+ else
+#endif // DEBUG
+ {
+ spillInterval(assignedInterval, spillRefPosition, nextRefPosition);
+ }
+ }
+ // Maintain the association with the interval, if it has more references.
+ // Or, if we "remembered" an interval assigned to this register, restore it.
+ if (nextRefPosition != nullptr)
+ {
+ assignedInterval->assignedReg = regRec;
+ }
+ else if (regRec->previousInterval != nullptr && regRec->previousInterval->assignedReg == regRec &&
+ regRec->previousInterval->getNextRefPosition() != nullptr)
+ {
+ regRec->assignedInterval = regRec->previousInterval;
+ regRec->previousInterval = nullptr;
+#ifdef DEBUG
+ if (spill)
+ {
+ dumpLsraAllocationEvent(LSRA_EVENT_RESTORE_PREVIOUS_INTERVAL_AFTER_SPILL, regRec->assignedInterval,
+ thisRegNum);
+ }
+ else
+ {
+ dumpLsraAllocationEvent(LSRA_EVENT_RESTORE_PREVIOUS_INTERVAL, regRec->assignedInterval, thisRegNum);
+ }
+#endif // DEBUG
+ }
+ else
+ {
+ regRec->assignedInterval = nullptr;
+ regRec->previousInterval = nullptr;
+ }
+}
+
+//------------------------------------------------------------------------
+// spillGCRefs: Spill any GC-type intervals that are currently in registers.a
+//
+// Arguments:
+// killRefPosition - The RefPosition for the kill
+//
+// Return Value:
+// None.
+//
+void LinearScan::spillGCRefs(RefPosition* killRefPosition)
+{
+ // For each physical register that can hold a GC type,
+ // if it is occupied by an interval of a GC type, spill that interval.
+ regMaskTP candidateRegs = killRefPosition->registerAssignment;
+ while (candidateRegs != RBM_NONE)
+ {
+ regMaskTP nextRegBit = genFindLowestBit(candidateRegs);
+ candidateRegs &= ~nextRegBit;
+ regNumber nextReg = genRegNumFromMask(nextRegBit);
+ RegRecord* regRecord = getRegisterRecord(nextReg);
+ Interval* assignedInterval = regRecord->assignedInterval;
+ if (assignedInterval == nullptr || (assignedInterval->isActive == false) ||
+ !varTypeIsGC(assignedInterval->registerType))
+ {
+ continue;
+ }
+ unassignPhysReg(regRecord, assignedInterval->recentRefPosition);
+ }
+ INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_DONE_KILL_GC_REFS, nullptr, REG_NA, nullptr));
+}
+
+//------------------------------------------------------------------------
+// processBlockEndAllocation: Update var locations after 'currentBlock' has been allocated
+//
+// Arguments:
+// currentBlock - the BasicBlock we have just finished allocating registers for
+//
+// Return Value:
+// None
+//
+// Notes:
+// Calls processBlockEndLocation() to set the outVarToRegMap, then gets the next block,
+// and sets the inVarToRegMap appropriately.
+
+void LinearScan::processBlockEndAllocation(BasicBlock* currentBlock)
+{
+ assert(currentBlock != nullptr);
+ processBlockEndLocations(currentBlock);
+ markBlockVisited(currentBlock);
+
+ // Get the next block to allocate.
+ // When the last block in the method has successors, there will be a final "RefTypeBB" to
+ // ensure that we get the varToRegMap set appropriately, but in that case we don't need
+ // to worry about "nextBlock".
+ BasicBlock* nextBlock = getNextBlock();
+ if (nextBlock != nullptr)
+ {
+ processBlockStartLocations(nextBlock, true);
+ }
+}
+
+//------------------------------------------------------------------------
+// rotateBlockStartLocation: When in the LSRA_BLOCK_BOUNDARY_ROTATE stress mode, attempt to
+// "rotate" the register assignment for a localVar to the next higher
+// register that is available.
+//
+// Arguments:
+// interval - the Interval for the variable whose register is getting rotated
+// targetReg - its register assignment from the predecessor block being used for live-in
+// availableRegs - registers available for use
+//
+// Return Value:
+// The new register to use.
+
+#ifdef DEBUG
+regNumber LinearScan::rotateBlockStartLocation(Interval* interval, regNumber targetReg, regMaskTP availableRegs)
+{
+ if (targetReg != REG_STK && getLsraBlockBoundaryLocations() == LSRA_BLOCK_BOUNDARY_ROTATE)
+ {
+ // If we're rotating the register locations at block boundaries, try to use
+ // the next higher register number of the appropriate register type.
+ regMaskTP candidateRegs = allRegs(interval->registerType) & availableRegs;
+ regNumber firstReg = REG_NA;
+ regNumber newReg = REG_NA;
+ while (candidateRegs != RBM_NONE)
+ {
+ regMaskTP nextRegBit = genFindLowestBit(candidateRegs);
+ candidateRegs &= ~nextRegBit;
+ regNumber nextReg = genRegNumFromMask(nextRegBit);
+ if (nextReg > targetReg)
+ {
+ newReg = nextReg;
+ break;
+ }
+ else if (firstReg == REG_NA)
+ {
+ firstReg = nextReg;
+ }
+ }
+ if (newReg == REG_NA)
+ {
+ assert(firstReg != REG_NA);
+ newReg = firstReg;
+ }
+ targetReg = newReg;
+ }
+ return targetReg;
+}
+#endif // DEBUG
+
+//------------------------------------------------------------------------
+// processBlockStartLocations: Update var locations on entry to 'currentBlock'
+//
+// Arguments:
+// currentBlock - the BasicBlock we have just finished allocating registers for
+// allocationPass - true if we are currently allocating registers (versus writing them back)
+//
+// Return Value:
+// None
+//
+// Notes:
+// During the allocation pass, we use the outVarToRegMap of the selected predecessor to
+// determine the lclVar locations for the inVarToRegMap.
+// During the resolution (write-back) pass, we only modify the inVarToRegMap in cases where
+// a lclVar was spilled after the block had been completed.
+void LinearScan::processBlockStartLocations(BasicBlock* currentBlock, bool allocationPass)
+{
+ unsigned predBBNum = blockInfo[currentBlock->bbNum].predBBNum;
+ VarToRegMap predVarToRegMap = getOutVarToRegMap(predBBNum);
+ VarToRegMap inVarToRegMap = getInVarToRegMap(currentBlock->bbNum);
+ bool hasCriticalInEdge = blockInfo[currentBlock->bbNum].hasCriticalInEdge;
+
+ VARSET_TP VARSET_INIT_NOCOPY(liveIn, currentBlock->bbLiveIn);
+#ifdef DEBUG
+ if (getLsraExtendLifeTimes())
+ {
+ VarSetOps::AssignNoCopy(compiler, liveIn, compiler->lvaTrackedVars);
+ }
+ // If we are rotating register assignments at block boundaries, we want to make the
+ // inactive registers available for the rotation.
+ regMaskTP inactiveRegs = RBM_NONE;
+#endif // DEBUG
+ regMaskTP liveRegs = RBM_NONE;
+ VARSET_ITER_INIT(compiler, iter, liveIn, varIndex);
+ while (iter.NextElem(compiler, &varIndex))
+ {
+ unsigned varNum = compiler->lvaTrackedToVarNum[varIndex];
+ if (!compiler->lvaTable[varNum].lvLRACandidate)
+ {
+ continue;
+ }
+ regNumber targetReg;
+ Interval* interval = getIntervalForLocalVar(varNum);
+ RefPosition* nextRefPosition = interval->getNextRefPosition();
+ assert(nextRefPosition != nullptr);
+
+ if (allocationPass)
+ {
+ targetReg = predVarToRegMap[varIndex];
+ INDEBUG(targetReg = rotateBlockStartLocation(interval, targetReg, (~liveRegs | inactiveRegs)));
+ inVarToRegMap[varIndex] = targetReg;
+ }
+ else // !allocationPass (i.e. resolution/write-back pass)
+ {
+ targetReg = inVarToRegMap[varIndex];
+ // There are four cases that we need to consider during the resolution pass:
+ // 1. This variable had a register allocated initially, and it was not spilled in the RefPosition
+ // that feeds this block. In this case, both targetReg and predVarToRegMap[varIndex] will be targetReg.
+ // 2. This variable had not been spilled prior to the end of predBB, but was later spilled, so
+ // predVarToRegMap[varIndex] will be REG_STK, but targetReg is its former allocated value.
+ // In this case, we will normally change it to REG_STK. We will update its "spilled" status when we
+ // encounter it in resolveLocalRef().
+ // 2a. If the next RefPosition is marked as a copyReg, we need to retain the allocated register. This is
+ // because the copyReg RefPosition will not have recorded the "home" register, yet downstream
+ // RefPositions rely on the correct "home" register.
+ // 3. This variable was spilled before we reached the end of predBB. In this case, both targetReg and
+ // predVarToRegMap[varIndex] will be REG_STK, and the next RefPosition will have been marked
+ // as reload during allocation time if necessary (note that by the time we actually reach the next
+ // RefPosition, we may be using a different predecessor, at which it is still in a register).
+ // 4. This variable was spilled during the allocation of this block, so targetReg is REG_STK
+ // (because we set inVarToRegMap at the time we spilled it), but predVarToRegMap[varIndex]
+ // is not REG_STK. We retain the REG_STK value in the inVarToRegMap.
+ if (targetReg != REG_STK)
+ {
+ if (predVarToRegMap[varIndex] != REG_STK)
+ {
+ // Case #1 above.
+ assert(predVarToRegMap[varIndex] == targetReg ||
+ getLsraBlockBoundaryLocations() == LSRA_BLOCK_BOUNDARY_ROTATE);
+ }
+ else if (!nextRefPosition->copyReg)
+ {
+ // case #2 above.
+ inVarToRegMap[varIndex] = REG_STK;
+ targetReg = REG_STK;
+ }
+ // Else case 2a. - retain targetReg.
+ }
+ // Else case #3 or #4, we retain targetReg and nothing further to do or assert.
+ }
+ if (interval->physReg == targetReg)
+ {
+ if (interval->isActive)
+ {
+ assert(targetReg != REG_STK);
+ assert(interval->assignedReg != nullptr && interval->assignedReg->regNum == targetReg &&
+ interval->assignedReg->assignedInterval == interval);
+ liveRegs |= genRegMask(targetReg);
+ continue;
+ }
+ }
+ else if (interval->physReg != REG_NA)
+ {
+ // This can happen if we are using the locations from a basic block other than the
+ // immediately preceding one - where the variable was in a different location.
+ if (targetReg != REG_STK)
+ {
+ // Unassign it from the register (it will get a new register below).
+ if (interval->assignedReg != nullptr && interval->assignedReg->assignedInterval == interval)
+ {
+ interval->isActive = false;
+ unassignPhysReg(getRegisterRecord(interval->physReg), nullptr);
+ }
+ else
+ {
+ // This interval was live in this register the last time we saw a reference to it,
+ // but has since been displaced.
+ interval->physReg = REG_NA;
+ }
+ }
+ else if (allocationPass)
+ {
+ // Keep the register assignment - if another var has it, it will get unassigned.
+ // Otherwise, resolution will fix it up later, and it will be more
+ // likely to match other assignments this way.
+ interval->isActive = true;
+ liveRegs |= genRegMask(interval->physReg);
+ INDEBUG(inactiveRegs |= genRegMask(interval->physReg));
+ inVarToRegMap[varIndex] = interval->physReg;
+ }
+ else
+ {
+ interval->physReg = REG_NA;
+ }
+ }
+ if (targetReg != REG_STK)
+ {
+ RegRecord* targetRegRecord = getRegisterRecord(targetReg);
+ liveRegs |= genRegMask(targetReg);
+ if (!interval->isActive)
+ {
+ interval->isActive = true;
+ interval->physReg = targetReg;
+ interval->assignedReg = targetRegRecord;
+ }
+ Interval* assignedInterval = targetRegRecord->assignedInterval;
+ if (assignedInterval != interval)
+ {
+ // Is there another interval currently assigned to this register? If so unassign it.
+ if (assignedInterval != nullptr)
+ {
+ if (assignedInterval->assignedReg == targetRegRecord)
+ {
+ // If the interval is active, it will be set to active when we reach its new
+ // register assignment (which we must not yet have done, or it wouldn't still be
+ // assigned to this register).
+ assignedInterval->isActive = false;
+ unassignPhysReg(targetRegRecord, nullptr);
+ if (allocationPass && assignedInterval->isLocalVar &&
+ inVarToRegMap[assignedInterval->getVarIndex(compiler)] == targetReg)
+ {
+ inVarToRegMap[assignedInterval->getVarIndex(compiler)] = REG_STK;
+ }
+ }
+ else
+ {
+ // This interval is no longer assigned to this register.
+ targetRegRecord->assignedInterval = nullptr;
+ }
+ }
+ assignPhysReg(targetRegRecord, interval);
+ }
+ if (interval->recentRefPosition != nullptr && !interval->recentRefPosition->copyReg &&
+ interval->recentRefPosition->registerAssignment != genRegMask(targetReg))
+ {
+ interval->getNextRefPosition()->outOfOrder = true;
+ }
+ }
+ }
+
+ // Unassign any registers that are no longer live.
+ for (regNumber reg = REG_FIRST; reg < ACTUAL_REG_COUNT; reg = REG_NEXT(reg))
+ {
+ if ((liveRegs & genRegMask(reg)) == 0)
+ {
+ RegRecord* physRegRecord = getRegisterRecord(reg);
+ Interval* assignedInterval = physRegRecord->assignedInterval;
+
+ if (assignedInterval != nullptr)
+ {
+ assert(assignedInterval->isLocalVar || assignedInterval->isConstant);
+ if (!assignedInterval->isConstant && assignedInterval->assignedReg == physRegRecord)
+ {
+ assignedInterval->isActive = false;
+ if (assignedInterval->getNextRefPosition() == nullptr)
+ {
+ unassignPhysReg(physRegRecord, nullptr);
+ }
+ inVarToRegMap[assignedInterval->getVarIndex(compiler)] = REG_STK;
+ }
+ else
+ {
+ // This interval may still be active, but was in another register in an
+ // intervening block.
+ physRegRecord->assignedInterval = nullptr;
+ }
+ }
+ }
+ }
+ INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_START_BB, nullptr, REG_NA, currentBlock));
+}
+
+//------------------------------------------------------------------------
+// processBlockEndLocations: Record the variables occupying registers after completing the current block.
+//
+// Arguments:
+// currentBlock - the block we have just completed.
+//
+// Return Value:
+// None
+//
+// Notes:
+// This must be called both during the allocation and resolution (write-back) phases.
+// This is because we need to have the outVarToRegMap locations in order to set the locations
+// at successor blocks during allocation time, but if lclVars are spilled after a block has been
+// completed, we need to record the REG_STK location for those variables at resolution time.
+
+void LinearScan::processBlockEndLocations(BasicBlock* currentBlock)
+{
+ assert(currentBlock != nullptr && currentBlock->bbNum == curBBNum);
+ VarToRegMap outVarToRegMap = getOutVarToRegMap(curBBNum);
+
+ VARSET_TP VARSET_INIT_NOCOPY(liveOut, currentBlock->bbLiveOut);
+#ifdef DEBUG
+ if (getLsraExtendLifeTimes())
+ {
+ VarSetOps::AssignNoCopy(compiler, liveOut, compiler->lvaTrackedVars);
+ }
+#endif // DEBUG
+ regMaskTP liveRegs = RBM_NONE;
+ VARSET_ITER_INIT(compiler, iter, liveOut, varIndex);
+ while (iter.NextElem(compiler, &varIndex))
+ {
+ unsigned varNum = compiler->lvaTrackedToVarNum[varIndex];
+ Interval* interval = getIntervalForLocalVar(varNum);
+ if (interval->isActive)
+ {
+ assert(interval->physReg != REG_NA && interval->physReg != REG_STK);
+ outVarToRegMap[varIndex] = interval->physReg;
+ }
+ else
+ {
+ outVarToRegMap[varIndex] = REG_STK;
+ }
+ }
+ INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_END_BB));
+}
+
+#ifdef DEBUG
+void LinearScan::dumpRefPositions(const char* str)
+{
+ printf("------------\n");
+ printf("REFPOSITIONS %s: \n", str);
+ printf("------------\n");
+ for (auto& refPos : refPositions)
+ {
+ refPos.dump();
+ }
+}
+#endif // DEBUG
+
+bool LinearScan::registerIsFree(regNumber regNum, RegisterType regType)
+{
+ RegRecord* physRegRecord = getRegisterRecord(regNum);
+
+ bool isFree = physRegRecord->isFree();
+
+#ifdef _TARGET_ARM_
+ if (isFree && regType == TYP_DOUBLE)
+ {
+ isFree = getRegisterRecord(REG_NEXT(regNum))->isFree();
+ }
+#endif // _TARGET_ARM_
+
+ return isFree;
+}
+
+//------------------------------------------------------------------------
+// LinearScan::freeRegister: Make a register available for use
+//
+// Arguments:
+// physRegRecord - the RegRecord for the register to be freed.
+//
+// Return Value:
+// None.
+//
+// Assumptions:
+// None.
+// It may be that the RegRecord has already been freed, e.g. due to a kill,
+// in which case this method has no effect.
+//
+// Notes:
+// If there is currently an Interval assigned to this register, and it has
+// more references (i.e. this is a local last-use, but more uses and/or
+// defs remain), it will remain assigned to the physRegRecord. However, since
+// it is marked inactive, the register will be available, albeit less desirable
+// to allocate.
+void LinearScan::freeRegister(RegRecord* physRegRecord)
+{
+ Interval* assignedInterval = physRegRecord->assignedInterval;
+ // It may have already been freed by a "Kill"
+ if (assignedInterval != nullptr)
+ {
+ assignedInterval->isActive = false;
+ // If this is a constant node, that we may encounter again (e.g. constant),
+ // don't unassign it until we need the register.
+ if (!assignedInterval->isConstant)
+ {
+ RefPosition* nextRefPosition = assignedInterval->getNextRefPosition();
+ // Unassign the register only if there are no more RefPositions, or the next
+ // one is a def. Note that the latter condition doesn't actually ensure that
+ // there aren't subsequent uses that could be reached by a def in the assigned
+ // register, but is merely a heuristic to avoid tying up the register (or using
+ // it when it's non-optimal). A better alternative would be to use SSA, so that
+ // we wouldn't unnecessarily link separate live ranges to the same register.
+ if (nextRefPosition == nullptr || RefTypeIsDef(nextRefPosition->refType))
+ {
+ unassignPhysReg(physRegRecord, nullptr);
+ }
+ }
+ }
+}
+
+void LinearScan::freeRegisters(regMaskTP regsToFree)
+{
+ if (regsToFree == RBM_NONE)
+ {
+ return;
+ }
+
+ INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_FREE_REGS));
+ while (regsToFree != RBM_NONE)
+ {
+ regMaskTP nextRegBit = genFindLowestBit(regsToFree);
+ regsToFree &= ~nextRegBit;
+ regNumber nextReg = genRegNumFromMask(nextRegBit);
+ freeRegister(getRegisterRecord(nextReg));
+ }
+}
+
+// Actual register allocation, accomplished by iterating over all of the previously
+// constructed Intervals
+// Loosely based on raAssignVars()
+//
+void LinearScan::allocateRegisters()
+{
+ JITDUMP("*************** In LinearScan::allocateRegisters()\n");
+ DBEXEC(VERBOSE, lsraDumpIntervals("before allocateRegisters"));
+
+ // at start, nothing is active except for register args
+ for (auto& interval : intervals)
+ {
+ Interval* currentInterval = &interval;
+ currentInterval->recentRefPosition = nullptr;
+ currentInterval->isActive = false;
+ if (currentInterval->isLocalVar)
+ {
+ LclVarDsc* varDsc = currentInterval->getLocalVar(compiler);
+ if (varDsc->lvIsRegArg && currentInterval->firstRefPosition != nullptr)
+ {
+ currentInterval->isActive = true;
+ }
+ }
+ }
+
+ for (regNumber reg = REG_FIRST; reg < ACTUAL_REG_COUNT; reg = REG_NEXT(reg))
+ {
+ getRegisterRecord(reg)->recentRefPosition = nullptr;
+ getRegisterRecord(reg)->isActive = false;
+ }
+
+#ifdef DEBUG
+ regNumber lastAllocatedReg = REG_NA;
+ if (VERBOSE)
+ {
+ dumpRefPositions("BEFORE ALLOCATION");
+ dumpVarRefPositions("BEFORE ALLOCATION");
+
+ printf("\n\nAllocating Registers\n"
+ "--------------------\n");
+ if (dumpTerse)
+ {
+ dumpRegRecordHeader();
+ // Now print an empty indent
+ printf(indentFormat, "");
+ }
+ }
+#endif // DEBUG
+
+ BasicBlock* currentBlock = nullptr;
+
+ LsraLocation prevLocation = MinLocation;
+ regMaskTP regsToFree = RBM_NONE;
+ regMaskTP delayRegsToFree = RBM_NONE;
+
+ // This is the most recent RefPosition for which a register was allocated
+ // - currently only used for DEBUG but maintained in non-debug, for clarity of code
+ // (and will be optimized away because in non-debug spillAlways() unconditionally returns false)
+ RefPosition* lastAllocatedRefPosition = nullptr;
+
+ bool handledBlockEnd = false;
+
+ for (auto& refPosition : refPositions)
+ {
+ RefPosition* currentRefPosition = &refPosition;
+
+#ifdef DEBUG
+ // Set the activeRefPosition to null until we're done with any boundary handling.
+ activeRefPosition = nullptr;
+ if (VERBOSE)
+ {
+ if (dumpTerse)
+ {
+ // We're really dumping the RegRecords "after" the previous RefPosition, but it's more convenient
+ // to do this here, since there are a number of "continue"s in this loop.
+ dumpRegRecords();
+ }
+ else
+ {
+ printf("\n");
+ }
+ }
+#endif // DEBUG
+
+ // This is the previousRefPosition of the current Referent, if any
+ RefPosition* previousRefPosition = nullptr;
+
+ Interval* currentInterval = nullptr;
+ Referenceable* currentReferent = nullptr;
+ bool isInternalRef = false;
+ RefType refType = currentRefPosition->refType;
+
+ currentReferent = currentRefPosition->referent;
+
+ if (spillAlways() && lastAllocatedRefPosition != nullptr && !lastAllocatedRefPosition->isPhysRegRef &&
+ !lastAllocatedRefPosition->getInterval()->isInternal &&
+ (RefTypeIsDef(lastAllocatedRefPosition->refType) || lastAllocatedRefPosition->getInterval()->isLocalVar))
+ {
+ assert(lastAllocatedRefPosition->registerAssignment != RBM_NONE);
+ RegRecord* regRecord = lastAllocatedRefPosition->getInterval()->assignedReg;
+ unassignPhysReg(regRecord, lastAllocatedRefPosition);
+ // Now set lastAllocatedRefPosition to null, so that we don't try to spill it again
+ lastAllocatedRefPosition = nullptr;
+ }
+
+ // We wait to free any registers until we've completed all the
+ // uses for the current node.
+ // This avoids reusing registers too soon.
+ // We free before the last true def (after all the uses & internal
+ // registers), and then again at the beginning of the next node.
+ // This is made easier by assigning two LsraLocations per node - one
+ // for all the uses, internal registers & all but the last def, and
+ // another for the final def (if any).
+
+ LsraLocation currentLocation = currentRefPosition->nodeLocation;
+
+ if ((regsToFree | delayRegsToFree) != RBM_NONE)
+ {
+ bool doFreeRegs = false;
+ // Free at a new location, or at a basic block boundary
+ if (currentLocation > prevLocation || refType == RefTypeBB)
+ {
+ doFreeRegs = true;
+ }
+
+ if (doFreeRegs)
+ {
+ freeRegisters(regsToFree);
+ regsToFree = delayRegsToFree;
+ delayRegsToFree = RBM_NONE;
+ }
+ }
+ prevLocation = currentLocation;
+
+ // get previous refposition, then current refpos is the new previous
+ if (currentReferent != nullptr)
+ {
+ previousRefPosition = currentReferent->recentRefPosition;
+ currentReferent->recentRefPosition = currentRefPosition;
+ }
+ else
+ {
+ assert((refType == RefTypeBB) || (refType == RefTypeKillGCRefs));
+ }
+
+ // For the purposes of register resolution, we handle the DummyDefs before
+ // the block boundary - so the RefTypeBB is after all the DummyDefs.
+ // However, for the purposes of allocation, we want to handle the block
+ // boundary first, so that we can free any registers occupied by lclVars
+ // that aren't live in the next block and make them available for the
+ // DummyDefs.
+
+ if (!handledBlockEnd && (refType == RefTypeBB || refType == RefTypeDummyDef))
+ {
+ // Free any delayed regs (now in regsToFree) before processing the block boundary
+ freeRegisters(regsToFree);
+ regsToFree = RBM_NONE;
+ handledBlockEnd = true;
+ curBBStartLocation = currentRefPosition->nodeLocation;
+ if (currentBlock == nullptr)
+ {
+ currentBlock = startBlockSequence();
+ }
+ else
+ {
+ processBlockEndAllocation(currentBlock);
+ currentBlock = moveToNextBlock();
+ }
+#ifdef DEBUG
+ if (VERBOSE && currentBlock != nullptr && !dumpTerse)
+ {
+ currentBlock->dspBlockHeader(compiler);
+ printf("\n");
+ }
+#endif // DEBUG
+ }
+
+#ifdef DEBUG
+ activeRefPosition = currentRefPosition;
+ if (VERBOSE)
+ {
+ if (dumpTerse)
+ {
+ dumpRefPositionShort(currentRefPosition, currentBlock);
+ }
+ else
+ {
+ currentRefPosition->dump();
+ }
+ }
+#endif // DEBUG
+
+ if (refType == RefTypeBB)
+ {
+ handledBlockEnd = false;
+ continue;
+ }
+
+ if (refType == RefTypeKillGCRefs)
+ {
+ spillGCRefs(currentRefPosition);
+ continue;
+ }
+
+ // If this is a FixedReg, disassociate any inactive constant interval from this register.
+ // Otherwise, do nothing.
+ if (refType == RefTypeFixedReg)
+ {
+ RegRecord* regRecord = currentRefPosition->getReg();
+ if (regRecord->assignedInterval != nullptr && !regRecord->assignedInterval->isActive &&
+ regRecord->assignedInterval->isConstant)
+ {
+ regRecord->assignedInterval = nullptr;
+ }
+ INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_FIXED_REG, nullptr, currentRefPosition->assignedReg()));
+ continue;
+ }
+
+ // If this is an exposed use, do nothing - this is merely a placeholder to attempt to
+ // ensure that a register is allocated for the full lifetime. The resolution logic
+ // will take care of moving to the appropriate register if needed.
+
+ if (refType == RefTypeExpUse)
+ {
+ INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_EXP_USE));
+ continue;
+ }
+
+ regNumber assignedRegister = REG_NA;
+
+ if (currentRefPosition->isIntervalRef())
+ {
+ currentInterval = currentRefPosition->getInterval();
+ assignedRegister = currentInterval->physReg;
+#if DEBUG
+ if (VERBOSE && !dumpTerse)
+ {
+ currentInterval->dump();
+ }
+#endif // DEBUG
+
+ // Identify the special cases where we decide up-front not to allocate
+ bool allocate = true;
+ bool didDump = false;
+
+ if (refType == RefTypeParamDef || refType == RefTypeZeroInit)
+ {
+ // For a ParamDef with a weighted refCount less than unity, don't enregister it at entry.
+ // TODO-CQ: Consider doing this only for stack parameters, since otherwise we may be needlessly
+ // inserting a store.
+ LclVarDsc* varDsc = currentInterval->getLocalVar(compiler);
+ assert(varDsc != nullptr);
+ if (refType == RefTypeParamDef && varDsc->lvRefCntWtd <= BB_UNITY_WEIGHT)
+ {
+ INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_NO_ENTRY_REG_ALLOCATED, currentInterval));
+ didDump = true;
+ allocate = false;
+ }
+ // If it has no actual references, mark it as "lastUse"; since they're not actually part
+ // of any flow they won't have been marked during dataflow. Otherwise, if we allocate a
+ // register we won't unassign it.
+ else if (currentRefPosition->nextRefPosition == nullptr)
+ {
+ INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_ZERO_REF, currentInterval));
+ currentRefPosition->lastUse = true;
+ }
+ }
+#ifdef FEATURE_SIMD
+ else if (refType == RefTypeUpperVectorSaveDef || refType == RefTypeUpperVectorSaveUse)
+ {
+ Interval* lclVarInterval = currentInterval->relatedInterval;
+ if (lclVarInterval->physReg == REG_NA)
+ {
+ allocate = false;
+ }
+ }
+#endif // FEATURE_SIMD
+
+ if (allocate == false)
+ {
+ if (assignedRegister != REG_NA)
+ {
+ unassignPhysReg(getRegisterRecord(assignedRegister), currentRefPosition);
+ }
+ else if (!didDump)
+ {
+ INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_NO_REG_ALLOCATED, currentInterval));
+ didDump = true;
+ }
+ currentRefPosition->registerAssignment = RBM_NONE;
+ continue;
+ }
+
+ if (currentInterval->isSpecialPutArg)
+ {
+ assert(!currentInterval->isLocalVar);
+ Interval* srcInterval = currentInterval->relatedInterval;
+ assert(srcInterval->isLocalVar);
+ if (refType == RefTypeDef)
+ {
+ assert(srcInterval->recentRefPosition->nodeLocation == currentLocation - 1);
+ RegRecord* physRegRecord = srcInterval->assignedReg;
+
+ // For a putarg_reg to be special, its next use location has to be the same
+ // as fixed reg's next kill location. Otherwise, if source lcl var's next use
+ // is after the kill of fixed reg but before putarg_reg's next use, fixed reg's
+ // kill would lead to spill of source but not the putarg_reg if it were treated
+ // as special.
+ if (srcInterval->isActive &&
+ genRegMask(srcInterval->physReg) == currentRefPosition->registerAssignment &&
+ currentInterval->getNextRefLocation() == physRegRecord->getNextRefLocation())
+ {
+ assert(physRegRecord->regNum == srcInterval->physReg);
+
+ // Special putarg_reg acts as a pass-thru since both source lcl var
+ // and putarg_reg have the same register allocated. Physical reg
+ // record of reg continue to point to source lcl var's interval
+ // instead of to putarg_reg's interval. So if a spill of reg
+ // allocated to source lcl var happens, to reallocate to another
+ // tree node, before its use at call node it will lead to spill of
+ // lcl var instead of putarg_reg since physical reg record is pointing
+ // to lcl var's interval. As a result, arg reg would get trashed leading
+ // to bad codegen. The assumption here is that source lcl var of a
+ // special putarg_reg doesn't get spilled and re-allocated prior to
+ // its use at the call node. This is ensured by marking physical reg
+ // record as busy until next kill.
+ physRegRecord->isBusyUntilNextKill = true;
+ }
+ else
+ {
+ currentInterval->isSpecialPutArg = false;
+ }
+ }
+ // If this is still a SpecialPutArg, continue;
+ if (currentInterval->isSpecialPutArg)
+ {
+ INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_SPECIAL_PUTARG, currentInterval,
+ currentRefPosition->assignedReg()));
+ continue;
+ }
+ }
+
+ if (assignedRegister == REG_NA && RefTypeIsUse(refType))
+ {
+ currentRefPosition->reload = true;
+ INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_RELOAD, currentInterval, assignedRegister));
+ }
+ }
+
+ regMaskTP assignedRegBit = RBM_NONE;
+ bool isInRegister = false;
+ if (assignedRegister != REG_NA)
+ {
+ isInRegister = true;
+ assignedRegBit = genRegMask(assignedRegister);
+ if (!currentInterval->isActive)
+ {
+ // If this is a use, it must have started the block on the stack, but the register
+ // was available for use so we kept the association.
+ if (RefTypeIsUse(refType))
+ {
+ assert(inVarToRegMaps[curBBNum][currentInterval->getVarIndex(compiler)] == REG_STK &&
+ previousRefPosition->nodeLocation <= curBBStartLocation);
+ isInRegister = false;
+ }
+ else
+ {
+ currentInterval->isActive = true;
+ }
+ }
+ assert(currentInterval->assignedReg != nullptr &&
+ currentInterval->assignedReg->regNum == assignedRegister &&
+ currentInterval->assignedReg->assignedInterval == currentInterval);
+ }
+
+ // If this is a physical register, we unconditionally assign it to itself!
+ if (currentRefPosition->isPhysRegRef)
+ {
+ RegRecord* currentReg = currentRefPosition->getReg();
+ Interval* assignedInterval = currentReg->assignedInterval;
+
+ if (assignedInterval != nullptr)
+ {
+ unassignPhysReg(currentReg, assignedInterval->recentRefPosition);
+ }
+ currentReg->isActive = true;
+ assignedRegister = currentReg->regNum;
+ assignedRegBit = genRegMask(assignedRegister);
+ if (refType == RefTypeKill)
+ {
+ currentReg->isBusyUntilNextKill = false;
+ }
+ }
+ else if (previousRefPosition != nullptr)
+ {
+ assert(previousRefPosition->nextRefPosition == currentRefPosition);
+ assert(assignedRegister == REG_NA || assignedRegBit == previousRefPosition->registerAssignment ||
+ currentRefPosition->outOfOrder || previousRefPosition->copyReg ||
+ previousRefPosition->refType == RefTypeExpUse || currentRefPosition->refType == RefTypeDummyDef);
+ }
+ else if (assignedRegister != REG_NA)
+ {
+ // Handle the case where this is a preassigned register (i.e. parameter).
+ // We don't want to actually use the preassigned register if it's not
+ // going to cover the lifetime - but we had to preallocate it to ensure
+ // that it remained live.
+ // TODO-CQ: At some point we may want to refine the analysis here, in case
+ // it might be beneficial to keep it in this reg for PART of the lifetime
+ if (currentInterval->isLocalVar)
+ {
+ regMaskTP preferences = currentInterval->registerPreferences;
+ bool keepAssignment = true;
+ bool matchesPreferences = (preferences & genRegMask(assignedRegister)) != RBM_NONE;
+
+ // Will the assigned register cover the lifetime? If not, does it at least
+ // meet the preferences for the next RefPosition?
+ RegRecord* physRegRecord = getRegisterRecord(currentInterval->physReg);
+ RefPosition* nextPhysRegRefPos = physRegRecord->getNextRefPosition();
+ if (nextPhysRegRefPos != nullptr &&
+ nextPhysRegRefPos->nodeLocation <= currentInterval->lastRefPosition->nodeLocation)
+ {
+ // Check to see if the existing assignment matches the preferences (e.g. callee save registers)
+ // and ensure that the next use of this localVar does not occur after the nextPhysRegRefPos
+ // There must be a next RefPosition, because we know that the Interval extends beyond the
+ // nextPhysRegRefPos.
+ RefPosition* nextLclVarRefPos = currentRefPosition->nextRefPosition;
+ assert(nextLclVarRefPos != nullptr);
+ if (!matchesPreferences || nextPhysRegRefPos->nodeLocation < nextLclVarRefPos->nodeLocation ||
+ physRegRecord->conflictingFixedRegReference(nextLclVarRefPos))
+ {
+ keepAssignment = false;
+ }
+ }
+ else if (refType == RefTypeParamDef && !matchesPreferences)
+ {
+ // Don't use the register, even if available, if it doesn't match the preferences.
+ // Note that this case is only for ParamDefs, for which we haven't yet taken preferences
+ // into account (we've just automatically got the initial location). In other cases,
+ // we would already have put it in a preferenced register, if it was available.
+ // TODO-CQ: Consider expanding this to check availability - that would duplicate
+ // code here, but otherwise we may wind up in this register anyway.
+ keepAssignment = false;
+ }
+
+ if (keepAssignment == false)
+ {
+ currentRefPosition->registerAssignment = allRegs(currentInterval->registerType);
+ unassignPhysRegNoSpill(physRegRecord);
+
+ // If the preferences are currently set to just this register, reset them to allRegs
+ // of the appropriate type (just as we just reset the registerAssignment for this
+ // RefPosition.
+ // Otherwise, simply remove this register from the preferences, if it's there.
+
+ if (currentInterval->registerPreferences == assignedRegBit)
+ {
+ currentInterval->registerPreferences = currentRefPosition->registerAssignment;
+ }
+ else
+ {
+ currentInterval->registerPreferences &= ~assignedRegBit;
+ }
+
+ assignedRegister = REG_NA;
+ assignedRegBit = RBM_NONE;
+ }
+ }
+ }
+
+ if (assignedRegister != REG_NA)
+ {
+ // If there is a conflicting fixed reference, insert a copy.
+ RegRecord* physRegRecord = getRegisterRecord(assignedRegister);
+ if (physRegRecord->conflictingFixedRegReference(currentRefPosition))
+ {
+ // We may have already reassigned the register to the conflicting reference.
+ // If not, we need to unassign this interval.
+ if (physRegRecord->assignedInterval == currentInterval)
+ {
+ unassignPhysRegNoSpill(physRegRecord);
+ }
+ currentRefPosition->moveReg = true;
+ assignedRegister = REG_NA;
+ INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_MOVE_REG, currentInterval, assignedRegister));
+ }
+ else if ((genRegMask(assignedRegister) & currentRefPosition->registerAssignment) != 0)
+ {
+ currentRefPosition->registerAssignment = assignedRegBit;
+ if (!currentReferent->isActive)
+ {
+ // If we've got an exposed use at the top of a block, the
+ // interval might not have been active. Otherwise if it's a use,
+ // the interval must be active.
+ if (refType == RefTypeDummyDef)
+ {
+ currentReferent->isActive = true;
+ assert(getRegisterRecord(assignedRegister)->assignedInterval == currentInterval);
+ }
+ else
+ {
+ currentRefPosition->reload = true;
+ }
+ }
+ INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_KEPT_ALLOCATION, currentInterval, assignedRegister));
+ }
+ else
+ {
+ // This must be a localVar or a single-reg fixed use or a tree temp with conflicting def & use.
+
+ assert(currentInterval && (currentInterval->isLocalVar || currentRefPosition->isFixedRegRef ||
+ currentInterval->hasConflictingDefUse));
+
+ // It's already in a register, but not one we need.
+ // If it is a fixed use that is not marked "delayRegFree", there is already a FixedReg to ensure that
+ // the needed reg is not otherwise in use, so we can simply ignore it and codegen will do the copy.
+ // The reason we need special handling for the "delayRegFree" case is that we need to mark the
+ // fixed-reg as in-use and delayed (the FixedReg RefPosition doesn't handle the delay requirement).
+ // Otherwise, if this is a pure use localVar or tree temp, we assign a copyReg, but must free both regs
+ // if it is a last use.
+ if (!currentRefPosition->isFixedRegRef || currentRefPosition->delayRegFree)
+ {
+ if (!RefTypeIsDef(currentRefPosition->refType))
+ {
+ regNumber copyReg = assignCopyReg(currentRefPosition);
+ assert(copyReg != REG_NA);
+ INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_COPY_REG, currentInterval, copyReg));
+ lastAllocatedRefPosition = currentRefPosition;
+ if (currentRefPosition->lastUse)
+ {
+ if (currentRefPosition->delayRegFree)
+ {
+ INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_LAST_USE_DELAYED, currentInterval,
+ assignedRegister));
+ delayRegsToFree |=
+ (genRegMask(assignedRegister) | currentRefPosition->registerAssignment);
+ }
+ else
+ {
+ INDEBUG(
+ dumpLsraAllocationEvent(LSRA_EVENT_LAST_USE, currentInterval, assignedRegister));
+ regsToFree |= (genRegMask(assignedRegister) | currentRefPosition->registerAssignment);
+ }
+ }
+ // If this is a tree temp (non-localVar) interval, we will need an explicit move.
+ if (!currentInterval->isLocalVar)
+ {
+ currentRefPosition->moveReg = true;
+ currentRefPosition->copyReg = false;
+ }
+ continue;
+ }
+ else
+ {
+ INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_NEEDS_NEW_REG, nullptr, assignedRegister));
+ regsToFree |= genRegMask(assignedRegister);
+ // We want a new register, but we don't want this to be considered a spill.
+ assignedRegister = REG_NA;
+ if (physRegRecord->assignedInterval == currentInterval)
+ {
+ unassignPhysRegNoSpill(physRegRecord);
+ }
+ }
+ }
+ else
+ {
+ INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_KEPT_ALLOCATION, nullptr, assignedRegister));
+ }
+ }
+ }
+
+ if (assignedRegister == REG_NA)
+ {
+ bool allocateReg = true;
+
+ if (currentRefPosition->AllocateIfProfitable())
+ {
+ // We can avoid allocating a register if it is a the last use requiring a reload.
+ if (currentRefPosition->lastUse && currentRefPosition->reload)
+ {
+ allocateReg = false;
+ }
+
+#ifdef DEBUG
+ // Under stress mode, don't attempt to allocate a reg to
+ // reg optional ref position.
+ if (allocateReg && regOptionalNoAlloc())
+ {
+ allocateReg = false;
+ }
+#endif
+ }
+
+ if (allocateReg)
+ {
+ // Try to allocate a register
+ assignedRegister = tryAllocateFreeReg(currentInterval, currentRefPosition);
+ }
+
+ // If no register was found, and if the currentRefPosition must have a register,
+ // then find a register to spill
+ if (assignedRegister == REG_NA)
+ {
+#ifdef FEATURE_SIMD
+ if (refType == RefTypeUpperVectorSaveDef)
+ {
+ // TODO-CQ: Determine whether copying to two integer callee-save registers would be profitable.
+ currentRefPosition->registerAssignment = (allRegs(TYP_FLOAT) & RBM_FLT_CALLEE_TRASH);
+ assignedRegister = tryAllocateFreeReg(currentInterval, currentRefPosition);
+ // There MUST be caller-save registers available, because they have all just been killed.
+ assert(assignedRegister != REG_NA);
+ // Now, spill it.
+ // (These will look a bit backward in the dump, but it's a pain to dump the alloc before the spill).
+ unassignPhysReg(getRegisterRecord(assignedRegister), currentRefPosition);
+ INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_ALLOC_REG, currentInterval, assignedRegister));
+ // Now set assignedRegister to REG_NA again so that we don't re-activate it.
+ assignedRegister = REG_NA;
+ }
+ else
+#endif // FEATURE_SIMD
+ if (currentRefPosition->RequiresRegister() || currentRefPosition->AllocateIfProfitable())
+ {
+ if (allocateReg)
+ {
+ assignedRegister = allocateBusyReg(currentInterval, currentRefPosition,
+ currentRefPosition->AllocateIfProfitable());
+ }
+
+ if (assignedRegister != REG_NA)
+ {
+ INDEBUG(
+ dumpLsraAllocationEvent(LSRA_EVENT_ALLOC_SPILLED_REG, currentInterval, assignedRegister));
+ }
+ else
+ {
+ // This can happen only for those ref positions that are to be allocated
+ // only if profitable.
+ noway_assert(currentRefPosition->AllocateIfProfitable());
+
+ currentRefPosition->registerAssignment = RBM_NONE;
+ currentRefPosition->reload = false;
+
+ INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_NO_REG_ALLOCATED, currentInterval));
+ }
+ }
+ else
+ {
+ INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_NO_REG_ALLOCATED, currentInterval));
+ currentRefPosition->registerAssignment = RBM_NONE;
+ currentInterval->isActive = false;
+ }
+ }
+#ifdef DEBUG
+ else
+ {
+ if (VERBOSE)
+ {
+ if (currentInterval->isConstant && (currentRefPosition->treeNode != nullptr) &&
+ currentRefPosition->treeNode->IsReuseRegVal())
+ {
+ dumpLsraAllocationEvent(LSRA_EVENT_REUSE_REG, nullptr, assignedRegister, currentBlock);
+ }
+ else
+ {
+ dumpLsraAllocationEvent(LSRA_EVENT_ALLOC_REG, nullptr, assignedRegister, currentBlock);
+ }
+ }
+ }
+#endif // DEBUG
+
+ if (refType == RefTypeDummyDef && assignedRegister != REG_NA)
+ {
+ setInVarRegForBB(curBBNum, currentInterval->varNum, assignedRegister);
+ }
+
+ // If we allocated a register, and this is a use of a spilled value,
+ // it should have been marked for reload above.
+ if (assignedRegister != REG_NA && RefTypeIsUse(refType) && !isInRegister)
+ {
+ assert(currentRefPosition->reload);
+ }
+ }
+
+ // If we allocated a register, record it
+ if (currentInterval != nullptr && assignedRegister != REG_NA)
+ {
+ assignedRegBit = genRegMask(assignedRegister);
+ currentRefPosition->registerAssignment = assignedRegBit;
+ currentInterval->physReg = assignedRegister;
+ regsToFree &= ~assignedRegBit; // we'll set it again later if it's dead
+
+ // If this interval is dead, free the register.
+ // The interval could be dead if this is a user variable, or if the
+ // node is being evaluated for side effects, or a call whose result
+ // is not used, etc.
+ if (currentRefPosition->lastUse || currentRefPosition->nextRefPosition == nullptr)
+ {
+ assert(currentRefPosition->isIntervalRef());
+
+ if (refType != RefTypeExpUse && currentRefPosition->nextRefPosition == nullptr)
+ {
+ if (currentRefPosition->delayRegFree)
+ {
+ delayRegsToFree |= assignedRegBit;
+ INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_LAST_USE_DELAYED));
+ }
+ else
+ {
+ regsToFree |= assignedRegBit;
+ INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_LAST_USE));
+ }
+ }
+ else
+ {
+ currentInterval->isActive = false;
+ }
+ }
+
+ lastAllocatedRefPosition = currentRefPosition;
+ }
+ }
+
+ // Free registers to clear associated intervals for resolution phase
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef DEBUG
+ if (getLsraExtendLifeTimes())
+ {
+ // If we have extended lifetimes, we need to make sure all the registers are freed.
+ for (int regNumIndex = 0; regNumIndex <= REG_FP_LAST; regNumIndex++)
+ {
+ RegRecord& regRecord = physRegs[regNumIndex];
+ Interval* interval = regRecord.assignedInterval;
+ if (interval != nullptr)
+ {
+ interval->isActive = false;
+ unassignPhysReg(&regRecord, nullptr);
+ }
+ }
+ }
+ else
+#endif // DEBUG
+ {
+ freeRegisters(regsToFree | delayRegsToFree);
+ }
+
+#ifdef DEBUG
+ if (VERBOSE)
+ {
+ if (dumpTerse)
+ {
+ // Dump the RegRecords after the last RefPosition is handled.
+ dumpRegRecords();
+ printf("\n");
+ }
+
+ dumpRefPositions("AFTER ALLOCATION");
+ dumpVarRefPositions("AFTER ALLOCATION");
+
+ // Dump the intervals that remain active
+ printf("Active intervals at end of allocation:\n");
+
+ // We COULD just reuse the intervalIter from above, but ArrayListIterator doesn't
+ // provide a Reset function (!) - we'll probably replace this so don't bother
+ // adding it
+
+ for (auto& interval : intervals)
+ {
+ if (interval.isActive)
+ {
+ printf("Active ");
+ interval.dump();
+ }
+ }
+
+ printf("\n");
+ }
+#endif // DEBUG
+}
+
+// LinearScan::resolveLocalRef
+// Description:
+// Update the graph for a local reference.
+// Also, track the register (if any) that is currently occupied.
+// Arguments:
+// treeNode: The lclVar that's being resolved
+// currentRefPosition: the RefPosition associated with the treeNode
+//
+// Details:
+// This method is called for each local reference, during the resolveRegisters
+// phase of LSRA. It is responsible for keeping the following in sync:
+// - varDsc->lvRegNum (and lvOtherReg) contain the unique register location.
+// If it is not in the same register through its lifetime, it is set to REG_STK.
+// - interval->physReg is set to the assigned register
+// (i.e. at the code location which is currently being handled by resolveRegisters())
+// - interval->isActive is true iff the interval is live and occupying a register
+// - interval->isSpilled is set to true if the interval is EVER spilled
+// - interval->isSplit is set to true if the interval does not occupy the same
+// register throughout the method
+// - RegRecord->assignedInterval points to the interval which currently occupies
+// the register
+// - For each lclVar node:
+// - gtRegNum/gtRegPair is set to the currently allocated register(s)
+// - GTF_REG_VAL is set if it is a use, and is in a register
+// - GTF_SPILLED is set on a use if it must be reloaded prior to use (GTF_REG_VAL
+// must not be set)
+// - GTF_SPILL is set if it must be spilled after use (GTF_REG_VAL may or may not
+// be set)
+//
+// A copyReg is an ugly case where the variable must be in a specific (fixed) register,
+// but it currently resides elsewhere. The register allocator must track the use of the
+// fixed register, but it marks the lclVar node with the register it currently lives in
+// and the code generator does the necessary move.
+//
+// Before beginning, the varDsc for each parameter must be set to its initial location.
+//
+// NICE: Consider tracking whether an Interval is always in the same location (register/stack)
+// in which case it will require no resolution.
+//
+void LinearScan::resolveLocalRef(BasicBlock* block, GenTreePtr treeNode, RefPosition* currentRefPosition)
+{
+ assert((block == nullptr) == (treeNode == nullptr));
+
+ // Is this a tracked local? Or just a register allocated for loading
+ // a non-tracked one?
+ Interval* interval = currentRefPosition->getInterval();
+ if (!interval->isLocalVar)
+ {
+ return;
+ }
+ interval->recentRefPosition = currentRefPosition;
+ LclVarDsc* varDsc = interval->getLocalVar(compiler);
+
+ if (currentRefPosition->registerAssignment == RBM_NONE)
+ {
+ assert(!currentRefPosition->RequiresRegister());
+
+ interval->isSpilled = true;
+ varDsc->lvRegNum = REG_STK;
+ if (interval->assignedReg != nullptr && interval->assignedReg->assignedInterval == interval)
+ {
+ interval->assignedReg->assignedInterval = nullptr;
+ }
+ interval->assignedReg = nullptr;
+ interval->physReg = REG_NA;
+
+ return;
+ }
+
+ // In most cases, assigned and home registers will be the same
+ // The exception is the copyReg case, where we've assigned a register
+ // for a specific purpose, but will be keeping the register assignment
+ regNumber assignedReg = currentRefPosition->assignedReg();
+ regNumber homeReg = assignedReg;
+
+ // Undo any previous association with a physical register, UNLESS this
+ // is a copyReg
+ if (!currentRefPosition->copyReg)
+ {
+ regNumber oldAssignedReg = interval->physReg;
+ if (oldAssignedReg != REG_NA && assignedReg != oldAssignedReg)
+ {
+ RegRecord* oldRegRecord = getRegisterRecord(oldAssignedReg);
+ if (oldRegRecord->assignedInterval == interval)
+ {
+ oldRegRecord->assignedInterval = nullptr;
+ }
+ }
+ }
+
+ if (currentRefPosition->refType == RefTypeUse && !currentRefPosition->reload)
+ {
+ // Was this spilled after our predecessor was scheduled?
+ if (interval->physReg == REG_NA)
+ {
+ assert(inVarToRegMaps[curBBNum][varDsc->lvVarIndex] == REG_STK);
+ currentRefPosition->reload = true;
+ }
+ }
+
+ bool reload = currentRefPosition->reload;
+ bool spillAfter = currentRefPosition->spillAfter;
+
+ // In the reload case we simply do not set GTF_REG_VAL, and it gets
+ // referenced from the variable's home location.
+ // This is also true for a pure def which is spilled.
+ if (reload && currentRefPosition->refType != RefTypeDef)
+ {
+ varDsc->lvRegNum = REG_STK;
+ if (!spillAfter)
+ {
+ interval->physReg = assignedReg;
+ }
+
+ // If there is no treeNode, this must be a RefTypeExpUse, in
+ // which case we did the reload already
+ if (treeNode != nullptr)
+ {
+ treeNode->gtFlags |= GTF_SPILLED;
+ if (spillAfter)
+ {
+ if (currentRefPosition->AllocateIfProfitable())
+ {
+ // This is a use of lclVar that is flagged as reg-optional
+ // by lower/codegen and marked for both reload and spillAfter.
+ // In this case we can avoid unnecessary reload and spill
+ // by setting reg on lclVar to REG_STK and reg on tree node
+ // to REG_NA. Codegen will generate the code by considering
+ // it as a contained memory operand.
+ //
+ // Note that varDsc->lvRegNum is already to REG_STK above.
+ interval->physReg = REG_NA;
+ treeNode->gtRegNum = REG_NA;
+ treeNode->gtFlags &= ~GTF_SPILLED;
+ }
+ else
+ {
+ treeNode->gtFlags |= GTF_SPILL;
+ }
+ }
+ }
+ else
+ {
+ assert(currentRefPosition->refType == RefTypeExpUse);
+ }
+
+ // If we have an undefined use set it as non-reg
+ if (!interval->isSpilled)
+ {
+ if (varDsc->lvIsParam && !varDsc->lvIsRegArg && currentRefPosition == interval->firstRefPosition)
+ {
+ // Parameters are the only thing that can be used before defined
+ }
+ else
+ {
+ // if we see a use before def of something else, the zero init flag better not be set.
+ noway_assert(!compiler->info.compInitMem);
+ // if it is not set, then the behavior is undefined but we don't want to crash or assert
+ interval->isSpilled = true;
+ }
+ }
+ }
+ else if (spillAfter && !RefTypeIsUse(currentRefPosition->refType))
+ {
+ // In the case of a pure def, don't bother spilling - just assign it to the
+ // stack. However, we need to remember that it was spilled.
+
+ interval->isSpilled = true;
+ varDsc->lvRegNum = REG_STK;
+ interval->physReg = REG_NA;
+ if (treeNode != nullptr)
+ {
+ treeNode->gtRegNum = REG_NA;
+ }
+ }
+ else
+ {
+ // Not reload and Not pure-def that's spillAfter
+
+ if (currentRefPosition->copyReg || currentRefPosition->moveReg)
+ {
+ // For a copyReg or moveReg, we have two cases:
+ // - In the first case, we have a fixedReg - i.e. a register which the code
+ // generator is constrained to use.
+ // The code generator will generate the appropriate move to meet the requirement.
+ // - In the second case, we were forced to use a different register because of
+ // interference (or JitStressRegs).
+ // In this case, we generate a GT_COPY.
+ // In either case, we annotate the treeNode with the register in which the value
+ // currently lives. For moveReg, the homeReg is the new register (as assigned above).
+ // But for copyReg, the homeReg remains unchanged.
+
+ assert(treeNode != nullptr);
+ treeNode->gtRegNum = interval->physReg;
+
+ if (currentRefPosition->copyReg)
+ {
+ homeReg = interval->physReg;
+ }
+ else
+ {
+ interval->physReg = assignedReg;
+ }
+
+ if (!currentRefPosition->isFixedRegRef || currentRefPosition->moveReg)
+ {
+ // This is the second case, where we need to generate a copy
+ insertCopyOrReload(block, treeNode, currentRefPosition->getMultiRegIdx(), currentRefPosition);
+ }
+ }
+ else
+ {
+ interval->physReg = assignedReg;
+
+ if (!interval->isSpilled && !interval->isSplit)
+ {
+ if (varDsc->lvRegNum != REG_STK)
+ {
+ // If the register assignments don't match, then this interval is spilt,
+ // but not spilled (yet)
+ // However, we don't have a single register assignment now
+ if (varDsc->lvRegNum != assignedReg)
+ {
+ interval->isSplit = TRUE;
+ varDsc->lvRegNum = REG_STK;
+ }
+ }
+ else
+ {
+ varDsc->lvRegNum = assignedReg;
+ }
+ }
+ }
+ if (spillAfter)
+ {
+ if (treeNode != nullptr)
+ {
+ treeNode->gtFlags |= GTF_SPILL;
+ }
+ interval->isSpilled = true;
+ interval->physReg = REG_NA;
+ varDsc->lvRegNum = REG_STK;
+ }
+
+ // This value is in a register, UNLESS we already saw this treeNode
+ // and marked it for reload
+ if (treeNode != nullptr && !(treeNode->gtFlags & GTF_SPILLED))
+ {
+ treeNode->gtFlags |= GTF_REG_VAL;
+ }
+ }
+
+ // Update the physRegRecord for the register, so that we know what vars are in
+ // regs at the block boundaries
+ RegRecord* physRegRecord = getRegisterRecord(homeReg);
+ if (spillAfter || currentRefPosition->lastUse)
+ {
+ physRegRecord->assignedInterval = nullptr;
+ interval->assignedReg = nullptr;
+ interval->physReg = REG_NA;
+ interval->isActive = false;
+ }
+ else
+ {
+ interval->isActive = true;
+ physRegRecord->assignedInterval = interval;
+ interval->assignedReg = physRegRecord;
+ }
+}
+
+void LinearScan::writeRegisters(RefPosition* currentRefPosition, GenTree* tree)
+{
+ lsraAssignRegToTree(tree, currentRefPosition->assignedReg(), currentRefPosition->getMultiRegIdx());
+}
+
+//------------------------------------------------------------------------
+// insertCopyOrReload: Insert a copy in the case where a tree node value must be moved
+// to a different register at the point of use (GT_COPY), or it is reloaded to a different register
+// than the one it was spilled from (GT_RELOAD).
+//
+// Arguments:
+// tree - This is the node to copy or reload.
+// Insert copy or reload node between this node and its parent.
+// multiRegIdx - register position of tree node for which copy or reload is needed.
+// refPosition - The RefPosition at which copy or reload will take place.
+//
+// Notes:
+// The GT_COPY or GT_RELOAD will be inserted in the proper spot in execution order where the reload is to occur.
+//
+// For example, for this tree (numbers are execution order, lower is earlier and higher is later):
+//
+// +---------+----------+
+// | GT_ADD (3) |
+// +---------+----------+
+// |
+// / \
+// / \
+// / \
+// +-------------------+ +----------------------+
+// | x (1) | "tree" | y (2) |
+// +-------------------+ +----------------------+
+//
+// generate this tree:
+//
+// +---------+----------+
+// | GT_ADD (4) |
+// +---------+----------+
+// |
+// / \
+// / \
+// / \
+// +-------------------+ +----------------------+
+// | GT_RELOAD (3) | | y (2) |
+// +-------------------+ +----------------------+
+// |
+// +-------------------+
+// | x (1) | "tree"
+// +-------------------+
+//
+// Note in particular that the GT_RELOAD node gets inserted in execution order immediately before the parent of "tree",
+// which seems a bit weird since normally a node's parent (in this case, the parent of "x", GT_RELOAD in the "after"
+// picture) immediately follows all of its children (that is, normally the execution ordering is postorder).
+// The ordering must be this weird "out of normal order" way because the "x" node is being spilled, probably
+// because the expression in the tree represented above by "y" has high register requirements. We don't want
+// to reload immediately, of course. So we put GT_RELOAD where the reload should actually happen.
+//
+// Note that GT_RELOAD is required when we reload to a different register than the one we spilled to. It can also be
+// used if we reload to the same register. Normally, though, in that case we just mark the node with GTF_SPILLED,
+// and the unspilling code automatically reuses the same register, and does the reload when it notices that flag
+// when considering a node's operands.
+//
+void LinearScan::insertCopyOrReload(BasicBlock* block, GenTreePtr tree, unsigned multiRegIdx, RefPosition* refPosition)
+{
+ LIR::Range& blockRange = LIR::AsRange(block);
+
+ LIR::Use treeUse;
+ bool foundUse = blockRange.TryGetUse(tree, &treeUse);
+ assert(foundUse);
+
+ GenTree* parent = treeUse.User();
+
+ genTreeOps oper;
+ if (refPosition->reload)
+ {
+ oper = GT_RELOAD;
+ }
+ else
+ {
+ oper = GT_COPY;
+ }
+
+ // If the parent is a reload/copy node, then tree must be a multi-reg call node
+ // that has already had one of its registers spilled. This is Because multi-reg
+ // call node is the only node whose RefTypeDef positions get independently
+ // spilled or reloaded. It is possible that one of its RefTypeDef position got
+ // spilled and the next use of it requires it to be in a different register.
+ //
+ // In this case set the ith position reg of reload/copy node to the reg allocated
+ // for copy/reload refPosition. Essentially a copy/reload node will have a reg
+ // for each multi-reg position of its child. If there is a valid reg in ith
+ // position of GT_COPY or GT_RELOAD node then the corresponding result of its
+ // child needs to be copied or reloaded to that reg.
+ if (parent->IsCopyOrReload())
+ {
+ noway_assert(parent->OperGet() == oper);
+ noway_assert(tree->IsMultiRegCall());
+ GenTreeCall* call = tree->AsCall();
+ GenTreeCopyOrReload* copyOrReload = parent->AsCopyOrReload();
+ noway_assert(copyOrReload->GetRegNumByIdx(multiRegIdx) == REG_NA);
+ copyOrReload->SetRegNumByIdx(refPosition->assignedReg(), multiRegIdx);
+ }
+ else
+ {
+ // Create the new node, with "tree" as its only child.
+ var_types treeType = tree->TypeGet();
+
+#ifdef FEATURE_SIMD
+ // Check to see whether we need to move to a different register set.
+ // This currently only happens in the case of SIMD vector types that are small enough (pointer size)
+ // that they must be passed & returned in integer registers.
+ // 'treeType' is the type of the register we are moving FROM,
+ // and refPosition->registerAssignment is the mask for the register we are moving TO.
+ // If they don't match, we need to reverse the type for the "move" node.
+
+ if ((allRegs(treeType) & refPosition->registerAssignment) == 0)
+ {
+ treeType = (useFloatReg(treeType)) ? TYP_I_IMPL : TYP_SIMD8;
+ }
+#endif // FEATURE_SIMD
+
+ GenTreeCopyOrReload* newNode = new (compiler, oper) GenTreeCopyOrReload(oper, treeType, tree);
+ assert(refPosition->registerAssignment != RBM_NONE);
+ newNode->SetRegNumByIdx(refPosition->assignedReg(), multiRegIdx);
+ newNode->gtLsraInfo.isLsraAdded = true;
+ newNode->gtLsraInfo.isLocalDefUse = false;
+ if (refPosition->copyReg)
+ {
+ // This is a TEMPORARY copy
+ assert(isCandidateLocalRef(tree));
+ newNode->gtFlags |= GTF_VAR_DEATH;
+ }
+
+ // Insert the copy/reload after the spilled node and replace the use of the original node with a use
+ // of the copy/reload.
+ blockRange.InsertAfter(tree, newNode);
+ treeUse.ReplaceWith(compiler, newNode);
+ }
+}
+
+#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
+//------------------------------------------------------------------------
+// insertUpperVectorSaveAndReload: Insert code to save and restore the upper half of a vector that lives
+// in a callee-save register at the point of a kill (the upper half is
+// not preserved).
+//
+// Arguments:
+// tree - This is the node around which we will insert the Save & Reload.
+// It will be a call or some node that turns into a call.
+// refPosition - The RefTypeUpperVectorSaveDef RefPosition.
+//
+void LinearScan::insertUpperVectorSaveAndReload(GenTreePtr tree, RefPosition* refPosition, BasicBlock* block)
+{
+ Interval* lclVarInterval = refPosition->getInterval()->relatedInterval;
+ assert(lclVarInterval->isLocalVar == true);
+ LclVarDsc* varDsc = compiler->lvaTable + lclVarInterval->varNum;
+ assert(varDsc->lvType == LargeVectorType);
+ regNumber lclVarReg = lclVarInterval->physReg;
+ if (lclVarReg == REG_NA)
+ {
+ return;
+ }
+
+ assert((genRegMask(lclVarReg) & RBM_FLT_CALLEE_SAVED) != RBM_NONE);
+
+ regNumber spillReg = refPosition->assignedReg();
+ bool spillToMem = refPosition->spillAfter;
+
+ LIR::Range& blockRange = LIR::AsRange(block);
+
+ // First, insert the save as an embedded statement before the call.
+
+ GenTreePtr saveLcl = compiler->gtNewLclvNode(lclVarInterval->varNum, LargeVectorType);
+ saveLcl->gtLsraInfo.isLsraAdded = true;
+ saveLcl->gtRegNum = lclVarReg;
+ saveLcl->gtFlags |= GTF_REG_VAL;
+ saveLcl->gtLsraInfo.isLocalDefUse = false;
+
+ GenTreeSIMD* simdNode =
+ new (compiler, GT_SIMD) GenTreeSIMD(LargeVectorSaveType, saveLcl, nullptr, SIMDIntrinsicUpperSave,
+ varDsc->lvBaseType, genTypeSize(LargeVectorType));
+ simdNode->gtLsraInfo.isLsraAdded = true;
+ simdNode->gtRegNum = spillReg;
+ if (spillToMem)
+ {
+ simdNode->gtFlags |= GTF_SPILL;
+ }
+
+ blockRange.InsertBefore(tree, LIR::SeqTree(compiler, simdNode));
+
+ // Now insert the restore after the call.
+
+ GenTreePtr restoreLcl = compiler->gtNewLclvNode(lclVarInterval->varNum, LargeVectorType);
+ restoreLcl->gtLsraInfo.isLsraAdded = true;
+ restoreLcl->gtRegNum = lclVarReg;
+ restoreLcl->gtFlags |= GTF_REG_VAL;
+ restoreLcl->gtLsraInfo.isLocalDefUse = false;
+
+ simdNode = new (compiler, GT_SIMD)
+ GenTreeSIMD(LargeVectorType, restoreLcl, nullptr, SIMDIntrinsicUpperRestore, varDsc->lvBaseType, 32);
+ simdNode->gtLsraInfo.isLsraAdded = true;
+ simdNode->gtRegNum = spillReg;
+ if (spillToMem)
+ {
+ simdNode->gtFlags |= GTF_SPILLED;
+ }
+
+ blockRange.InsertAfter(tree, LIR::SeqTree(compiler, simdNode));
+}
+#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
+
+//------------------------------------------------------------------------
+// initMaxSpill: Initializes the LinearScan members used to track the max number
+// of concurrent spills. This is needed so that we can set the
+// fields in Compiler, so that the code generator, in turn can
+// allocate the right number of spill locations.
+//
+// Arguments:
+// None.
+//
+// Return Value:
+// None.
+//
+// Assumptions:
+// This is called before any calls to updateMaxSpill().
+
+void LinearScan::initMaxSpill()
+{
+ needDoubleTmpForFPCall = false;
+ needFloatTmpForFPCall = false;
+ for (int i = 0; i < TYP_COUNT; i++)
+ {
+ maxSpill[i] = 0;
+ currentSpill[i] = 0;
+ }
+}
+
+//------------------------------------------------------------------------
+// recordMaxSpill: Sets the fields in Compiler for the max number of concurrent spills.
+// (See the comment on initMaxSpill.)
+//
+// Arguments:
+// None.
+//
+// Return Value:
+// None.
+//
+// Assumptions:
+// This is called after updateMaxSpill() has been called for all "real"
+// RefPositions.
+
+void LinearScan::recordMaxSpill()
+{
+ // Note: due to the temp normalization process (see tmpNormalizeType)
+ // only a few types should actually be seen here.
+ JITDUMP("Recording the maximum number of concurrent spills:\n");
+#ifdef _TARGET_X86_
+ var_types returnType = compiler->tmpNormalizeType(compiler->info.compRetType);
+ if (needDoubleTmpForFPCall || (returnType == TYP_DOUBLE))
+ {
+ JITDUMP("Adding a spill temp for moving a double call/return value between xmm reg and x87 stack.\n");
+ maxSpill[TYP_DOUBLE] += 1;
+ }
+ if (needFloatTmpForFPCall || (returnType == TYP_FLOAT))
+ {
+ JITDUMP("Adding a spill temp for moving a float call/return value between xmm reg and x87 stack.\n");
+ maxSpill[TYP_FLOAT] += 1;
+ }
+#endif // _TARGET_X86_
+ for (int i = 0; i < TYP_COUNT; i++)
+ {
+ if (var_types(i) != compiler->tmpNormalizeType(var_types(i)))
+ {
+ // Only normalized types should have anything in the maxSpill array.
+ // We assume here that if type 'i' does not normalize to itself, then
+ // nothing else normalizes to 'i', either.
+ assert(maxSpill[i] == 0);
+ }
+ JITDUMP(" %s: %d\n", varTypeName(var_types(i)), maxSpill[i]);
+ if (maxSpill[i] != 0)
+ {
+ compiler->tmpPreAllocateTemps(var_types(i), maxSpill[i]);
+ }
+ }
+}
+
+//------------------------------------------------------------------------
+// updateMaxSpill: Update the maximum number of concurrent spills
+//
+// Arguments:
+// refPosition - the current RefPosition being handled
+//
+// Return Value:
+// None.
+//
+// Assumptions:
+// The RefPosition has an associated interval (getInterval() will
+// otherwise assert).
+//
+// Notes:
+// This is called for each "real" RefPosition during the writeback
+// phase of LSRA. It keeps track of how many concurrently-live
+// spills there are, and the largest number seen so far.
+
+void LinearScan::updateMaxSpill(RefPosition* refPosition)
+{
+ RefType refType = refPosition->refType;
+
+ if (refPosition->spillAfter || refPosition->reload ||
+ (refPosition->AllocateIfProfitable() && refPosition->assignedReg() == REG_NA))
+ {
+ Interval* interval = refPosition->getInterval();
+ if (!interval->isLocalVar)
+ {
+ // The tmp allocation logic 'normalizes' types to a small number of
+ // types that need distinct stack locations from each other.
+ // Those types are currently gc refs, byrefs, <= 4 byte non-GC items,
+ // 8-byte non-GC items, and 16-byte or 32-byte SIMD vectors.
+ // LSRA is agnostic to those choices but needs
+ // to know what they are here.
+ var_types typ;
+
+#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
+ if ((refType == RefTypeUpperVectorSaveDef) || (refType == RefTypeUpperVectorSaveUse))
+ {
+ typ = LargeVectorSaveType;
+ }
+ else
+#endif // !FEATURE_PARTIAL_SIMD_CALLEE_SAVE
+ {
+ GenTreePtr treeNode = refPosition->treeNode;
+ if (treeNode == nullptr)
+ {
+ assert(RefTypeIsUse(refType));
+ treeNode = interval->firstRefPosition->treeNode;
+ }
+ assert(treeNode != nullptr);
+
+ // In case of multi-reg call nodes, we need to use the type
+ // of the return register given by multiRegIdx of the refposition.
+ if (treeNode->IsMultiRegCall())
+ {
+ ReturnTypeDesc* retTypeDesc = treeNode->AsCall()->GetReturnTypeDesc();
+ typ = retTypeDesc->GetReturnRegType(refPosition->getMultiRegIdx());
+ }
+ else
+ {
+ typ = treeNode->TypeGet();
+ }
+ typ = compiler->tmpNormalizeType(typ);
+ }
+
+ if (refPosition->spillAfter && !refPosition->reload)
+ {
+ currentSpill[typ]++;
+ if (currentSpill[typ] > maxSpill[typ])
+ {
+ maxSpill[typ] = currentSpill[typ];
+ }
+ }
+ else if (refPosition->reload)
+ {
+ assert(currentSpill[typ] > 0);
+ currentSpill[typ]--;
+ }
+ else if (refPosition->AllocateIfProfitable() && refPosition->assignedReg() == REG_NA)
+ {
+ // A spill temp not getting reloaded into a reg because it is
+ // marked as allocate if profitable and getting used from its
+ // memory location. To properly account max spill for typ we
+ // decrement spill count.
+ assert(RefTypeIsUse(refType));
+ assert(currentSpill[typ] > 0);
+ currentSpill[typ]--;
+ }
+ JITDUMP(" Max spill for %s is %d\n", varTypeName(typ), maxSpill[typ]);
+ }
+ }
+}
+
+// This is the final phase of register allocation. It writes the register assignments to
+// the tree, and performs resolution across joins and backedges.
+//
+void LinearScan::resolveRegisters()
+{
+ // Iterate over the tree and the RefPositions in lockstep
+ // - annotate the tree with register assignments by setting gtRegNum or gtRegPair (for longs)
+ // on the tree node
+ // - track globally-live var locations
+ // - add resolution points at split/merge/critical points as needed
+
+ // Need to use the same traversal order as the one that assigns the location numbers.
+
+ // Dummy RefPositions have been added at any split, join or critical edge, at the
+ // point where resolution may be required. These are located:
+ // - for a split, at the top of the non-adjacent block
+ // - for a join, at the bottom of the non-adjacent joining block
+ // - for a critical edge, at the top of the target block of each critical
+ // edge.
+ // Note that a target block may have multiple incoming critical or split edges
+ //
+ // These RefPositions record the expected location of the Interval at that point.
+ // At each branch, we identify the location of each liveOut interval, and check
+ // against the RefPositions at the target.
+
+ BasicBlock* block;
+ LsraLocation currentLocation = MinLocation;
+
+ // Clear register assignments - these will be reestablished as lclVar defs (including RefTypeParamDefs)
+ // are encountered.
+ for (regNumber reg = REG_FIRST; reg < ACTUAL_REG_COUNT; reg = REG_NEXT(reg))
+ {
+ RegRecord* physRegRecord = getRegisterRecord(reg);
+ Interval* assignedInterval = physRegRecord->assignedInterval;
+ if (assignedInterval != nullptr)
+ {
+ assignedInterval->assignedReg = nullptr;
+ assignedInterval->physReg = REG_NA;
+ }
+ physRegRecord->assignedInterval = nullptr;
+ physRegRecord->recentRefPosition = nullptr;
+ }
+
+ // Clear "recentRefPosition" for lclVar intervals
+ for (unsigned lclNum = 0; lclNum < compiler->lvaCount; lclNum++)
+ {
+ localVarIntervals[lclNum]->recentRefPosition = nullptr;
+ localVarIntervals[lclNum]->isActive = false;
+ }
+
+ // handle incoming arguments and special temps
+ auto currentRefPosition = refPositions.begin();
+
+ VarToRegMap entryVarToRegMap = inVarToRegMaps[compiler->fgFirstBB->bbNum];
+ while (currentRefPosition != refPositions.end() &&
+ (currentRefPosition->refType == RefTypeParamDef || currentRefPosition->refType == RefTypeZeroInit))
+ {
+ Interval* interval = currentRefPosition->getInterval();
+ assert(interval != nullptr && interval->isLocalVar);
+ resolveLocalRef(nullptr, nullptr, currentRefPosition);
+ regNumber reg = REG_STK;
+ int varIndex = interval->getVarIndex(compiler);
+
+ if (!currentRefPosition->spillAfter && currentRefPosition->registerAssignment != RBM_NONE)
+ {
+ reg = currentRefPosition->assignedReg();
+ }
+ else
+ {
+ reg = REG_STK;
+ interval->isActive = false;
+ }
+ entryVarToRegMap[varIndex] = reg;
+ ++currentRefPosition;
+ }
+
+ JITDUMP("------------------------\n");
+ JITDUMP("WRITING BACK ASSIGNMENTS\n");
+ JITDUMP("------------------------\n");
+
+ BasicBlock* insertionBlock = compiler->fgFirstBB;
+ GenTreePtr insertionPoint = LIR::AsRange(insertionBlock).FirstNonPhiNode();
+
+ // write back assignments
+ for (block = startBlockSequence(); block != nullptr; block = moveToNextBlock())
+ {
+ assert(curBBNum == block->bbNum);
+
+#ifdef DEBUG
+ if (VERBOSE)
+ {
+ block->dspBlockHeader(compiler);
+ currentRefPosition->dump();
+ }
+#endif // DEBUG
+
+ // Record the var locations at the start of this block.
+ // (If it's fgFirstBB, we've already done that above, see entryVarToRegMap)
+
+ curBBStartLocation = currentRefPosition->nodeLocation;
+ if (block != compiler->fgFirstBB)
+ {
+ processBlockStartLocations(block, false);
+ }
+
+ // Handle the DummyDefs, updating the incoming var location.
+ for (; currentRefPosition != refPositions.end() && currentRefPosition->refType == RefTypeDummyDef;
+ ++currentRefPosition)
+ {
+ assert(currentRefPosition->isIntervalRef());
+ // Don't mark dummy defs as reload
+ currentRefPosition->reload = false;
+ resolveLocalRef(nullptr, nullptr, currentRefPosition);
+ regNumber reg;
+ if (currentRefPosition->registerAssignment != RBM_NONE)
+ {
+ reg = currentRefPosition->assignedReg();
+ }
+ else
+ {
+ reg = REG_STK;
+ currentRefPosition->getInterval()->isActive = false;
+ }
+ setInVarRegForBB(curBBNum, currentRefPosition->getInterval()->varNum, reg);
+ }
+
+ // The next RefPosition should be for the block. Move past it.
+ assert(currentRefPosition != refPositions.end());
+ assert(currentRefPosition->refType == RefTypeBB);
+ ++currentRefPosition;
+
+ // Handle the RefPositions for the block
+ for (; currentRefPosition != refPositions.end() && currentRefPosition->refType != RefTypeBB &&
+ currentRefPosition->refType != RefTypeDummyDef;
+ ++currentRefPosition)
+ {
+ currentLocation = currentRefPosition->nodeLocation;
+ JITDUMP("current : ");
+ DBEXEC(VERBOSE, currentRefPosition->dump());
+
+ // Ensure that the spill & copy info is valid.
+ // First, if it's reload, it must not be copyReg or moveReg
+ assert(!currentRefPosition->reload || (!currentRefPosition->copyReg && !currentRefPosition->moveReg));
+ // If it's copyReg it must not be moveReg, and vice-versa
+ assert(!currentRefPosition->copyReg || !currentRefPosition->moveReg);
+
+ switch (currentRefPosition->refType)
+ {
+#ifdef FEATURE_SIMD
+ case RefTypeUpperVectorSaveUse:
+ case RefTypeUpperVectorSaveDef:
+#endif // FEATURE_SIMD
+ case RefTypeUse:
+ case RefTypeDef:
+ // These are the ones we're interested in
+ break;
+ case RefTypeKill:
+ case RefTypeFixedReg:
+ // These require no handling at resolution time
+ assert(currentRefPosition->referent != nullptr);
+ currentRefPosition->referent->recentRefPosition = currentRefPosition;
+ continue;
+ case RefTypeExpUse:
+ // Ignore the ExpUse cases - a RefTypeExpUse would only exist if the
+ // variable is dead at the entry to the next block. So we'll mark
+ // it as in its current location and resolution will take care of any
+ // mismatch.
+ assert(getNextBlock() == nullptr ||
+ !VarSetOps::IsMember(compiler, getNextBlock()->bbLiveIn,
+ currentRefPosition->getInterval()->getVarIndex(compiler)));
+ currentRefPosition->referent->recentRefPosition = currentRefPosition;
+ continue;
+ case RefTypeKillGCRefs:
+ // No action to take at resolution time, and no interval to update recentRefPosition for.
+ continue;
+ case RefTypeDummyDef:
+ case RefTypeParamDef:
+ case RefTypeZeroInit:
+ // Should have handled all of these already
+ default:
+ unreached();
+ break;
+ }
+ updateMaxSpill(currentRefPosition);
+ GenTree* treeNode = currentRefPosition->treeNode;
+
+#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
+ if (currentRefPosition->refType == RefTypeUpperVectorSaveDef)
+ {
+ // The treeNode must be a call, and this must be a RefPosition for a LargeVectorType LocalVar.
+ // If the LocalVar is in a callee-save register, we are going to spill its upper half around the call.
+ // If we have allocated a register to spill it to, we will use that; otherwise, we will spill it
+ // to the stack. We can use as a temp register any non-arg caller-save register.
+ noway_assert(treeNode != nullptr);
+ currentRefPosition->referent->recentRefPosition = currentRefPosition;
+ insertUpperVectorSaveAndReload(treeNode, currentRefPosition, block);
+ }
+ else if (currentRefPosition->refType == RefTypeUpperVectorSaveUse)
+ {
+ continue;
+ }
+#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
+
+ // Most uses won't actually need to be recorded (they're on the def).
+ // In those cases, treeNode will be nullptr.
+ if (treeNode == nullptr)
+ {
+ // This is either a use, a dead def, or a field of a struct
+ Interval* interval = currentRefPosition->getInterval();
+ assert(currentRefPosition->refType == RefTypeUse ||
+ currentRefPosition->registerAssignment == RBM_NONE || interval->isStructField);
+
+ // TODO-Review: Need to handle the case where any of the struct fields
+ // are reloaded/spilled at this use
+ assert(!interval->isStructField ||
+ (currentRefPosition->reload == false && currentRefPosition->spillAfter == false));
+
+ if (interval->isLocalVar && !interval->isStructField)
+ {
+ LclVarDsc* varDsc = interval->getLocalVar(compiler);
+
+ // This must be a dead definition. We need to mark the lclVar
+ // so that it's not considered a candidate for lvRegister, as
+ // this dead def will have to go to the stack.
+ assert(currentRefPosition->refType == RefTypeDef);
+ varDsc->lvRegNum = REG_STK;
+ }
+
+ JITDUMP("No tree node to write back to\n");
+ continue;
+ }
+
+ DBEXEC(VERBOSE, lsraDispNode(treeNode, LSRA_DUMP_REFPOS, true));
+ JITDUMP("\n");
+
+ LsraLocation loc = treeNode->gtLsraInfo.loc;
+ JITDUMP("curr = %u mapped = %u", currentLocation, loc);
+ assert(treeNode->IsLocal() || currentLocation == loc || currentLocation == loc + 1);
+
+ if (currentRefPosition->isIntervalRef() && currentRefPosition->getInterval()->isInternal)
+ {
+ JITDUMP(" internal");
+ GenTreePtr indNode = nullptr;
+ if (treeNode->OperIsIndir())
+ {
+ indNode = treeNode;
+ JITDUMP(" allocated at GT_IND");
+ }
+ if (indNode != nullptr)
+ {
+ GenTreePtr addrNode = indNode->gtOp.gtOp1->gtEffectiveVal();
+ if (addrNode->OperGet() != GT_ARR_ELEM)
+ {
+ addrNode->gtRsvdRegs |= currentRefPosition->registerAssignment;
+ JITDUMP(", recorded on addr");
+ }
+ }
+ if (treeNode->OperGet() == GT_ARR_ELEM)
+ {
+ // TODO-Review: See WORKAROUND ALERT in buildRefPositionsForNode()
+ GenTreePtr firstIndexTree = treeNode->gtArrElem.gtArrInds[0]->gtEffectiveVal();
+ assert(firstIndexTree != nullptr);
+ if (firstIndexTree->IsLocal() && (firstIndexTree->gtFlags & GTF_VAR_DEATH) == 0)
+ {
+ // Record the LAST internal interval
+ // (Yes, this naively just records each one, but the next will replace it;
+ // I'd fix this if it wasn't just a temporary fix)
+ if (currentRefPosition->refType == RefTypeDef)
+ {
+ JITDUMP(" allocated at GT_ARR_ELEM, recorded on firstIndex V%02u");
+ firstIndexTree->gtRsvdRegs = (regMaskSmall)currentRefPosition->registerAssignment;
+ }
+ }
+ }
+ treeNode->gtRsvdRegs |= currentRefPosition->registerAssignment;
+ }
+ else
+ {
+ writeRegisters(currentRefPosition, treeNode);
+
+ if (treeNode->IsLocal() && currentRefPosition->getInterval()->isLocalVar)
+ {
+ resolveLocalRef(block, treeNode, currentRefPosition);
+ }
+
+ // Mark spill locations on temps
+ // (local vars are handled in resolveLocalRef, above)
+ // Note that the tree node will be changed from GTF_SPILL to GTF_SPILLED
+ // in codegen, taking care of the "reload" case for temps
+ else if (currentRefPosition->spillAfter || (currentRefPosition->nextRefPosition != nullptr &&
+ currentRefPosition->nextRefPosition->moveReg))
+ {
+ if (treeNode != nullptr && currentRefPosition->isIntervalRef())
+ {
+ if (currentRefPosition->spillAfter)
+ {
+ treeNode->gtFlags |= GTF_SPILL;
+
+ // If this is a constant interval that is reusing a pre-existing value, we actually need
+ // to generate the value at this point in order to spill it.
+ if (treeNode->IsReuseRegVal())
+ {
+ treeNode->ResetReuseRegVal();
+ }
+
+ // In case of multi-reg call node, also set spill flag on the
+ // register specified by multi-reg index of current RefPosition.
+ // Note that the spill flag on treeNode indicates that one or
+ // more its allocated registers are in that state.
+ if (treeNode->IsMultiRegCall())
+ {
+ GenTreeCall* call = treeNode->AsCall();
+ call->SetRegSpillFlagByIdx(GTF_SPILL, currentRefPosition->getMultiRegIdx());
+ }
+ }
+
+ // If the value is reloaded or moved to a different register, we need to insert
+ // a node to hold the register to which it should be reloaded
+ RefPosition* nextRefPosition = currentRefPosition->nextRefPosition;
+ assert(nextRefPosition != nullptr);
+ if (INDEBUG(alwaysInsertReload() ||)
+ nextRefPosition->assignedReg() != currentRefPosition->assignedReg())
+ {
+ if (nextRefPosition->assignedReg() != REG_NA)
+ {
+ insertCopyOrReload(block, treeNode, currentRefPosition->getMultiRegIdx(),
+ nextRefPosition);
+ }
+ else
+ {
+ assert(nextRefPosition->AllocateIfProfitable());
+
+ // In case of tree temps, if def is spilled and use didn't
+ // get a register, set a flag on tree node to be treated as
+ // contained at the point of its use.
+ if (currentRefPosition->spillAfter && currentRefPosition->refType == RefTypeDef &&
+ nextRefPosition->refType == RefTypeUse)
+ {
+ assert(nextRefPosition->treeNode == nullptr);
+ treeNode->gtFlags |= GTF_NOREG_AT_USE;
+ }
+ }
+ }
+ }
+
+ // We should never have to "spill after" a temp use, since
+ // they're single use
+ else
+ {
+ unreached();
+ }
+ }
+ }
+ JITDUMP("\n");
+ }
+
+ processBlockEndLocations(block);
+ }
+
+#ifdef DEBUG
+ if (VERBOSE)
+ {
+ printf("-----------------------\n");
+ printf("RESOLVING BB BOUNDARIES\n");
+ printf("-----------------------\n");
+
+ printf("Prior to Resolution\n");
+ foreach_block(compiler, block)
+ {
+ printf("\nBB%02u use def in out\n", block->bbNum);
+ dumpConvertedVarSet(compiler, block->bbVarUse);
+ printf("\n");
+ dumpConvertedVarSet(compiler, block->bbVarDef);
+ printf("\n");
+ dumpConvertedVarSet(compiler, block->bbLiveIn);
+ printf("\n");
+ dumpConvertedVarSet(compiler, block->bbLiveOut);
+ printf("\n");
+
+ dumpInVarToRegMap(block);
+ dumpOutVarToRegMap(block);
+ }
+
+ printf("\n\n");
+ }
+#endif // DEBUG
+
+ resolveEdges();
+
+ // Verify register assignments on variables
+ unsigned lclNum;
+ LclVarDsc* varDsc;
+ for (lclNum = 0, varDsc = compiler->lvaTable; lclNum < compiler->lvaCount; lclNum++, varDsc++)
+ {
+ if (!isCandidateVar(varDsc))
+ {
+ varDsc->lvRegNum = REG_STK;
+ }
+ else
+ {
+ Interval* interval = getIntervalForLocalVar(lclNum);
+
+ // Determine initial position for parameters
+
+ if (varDsc->lvIsParam)
+ {
+ regMaskTP initialRegMask = interval->firstRefPosition->registerAssignment;
+ regNumber initialReg = (initialRegMask == RBM_NONE || interval->firstRefPosition->spillAfter)
+ ? REG_STK
+ : genRegNumFromMask(initialRegMask);
+ regNumber sourceReg = (varDsc->lvIsRegArg) ? varDsc->lvArgReg : REG_STK;
+
+#ifdef _TARGET_ARM_
+ if (varTypeIsMultiReg(varDsc))
+ {
+ // TODO-ARM-NYI: Map the hi/lo intervals back to lvRegNum and lvOtherReg (these should NYI before
+ // this)
+ assert(!"Multi-reg types not yet supported");
+ }
+ else
+#endif // _TARGET_ARM_
+ {
+ varDsc->lvArgInitReg = initialReg;
+ JITDUMP(" Set V%02u argument initial register to %s\n", lclNum, getRegName(initialReg));
+ }
+ if (!varDsc->lvIsRegArg)
+ {
+ // stack arg
+ if (compiler->lvaIsFieldOfDependentlyPromotedStruct(varDsc))
+ {
+ if (sourceReg != initialReg)
+ {
+ // The code generator won't initialize struct
+ // fields, so we have to do that if it's not already
+ // where it belongs.
+ assert(interval->isStructField);
+ JITDUMP(" Move struct field param V%02u from %s to %s\n", lclNum, getRegName(sourceReg),
+ getRegName(initialReg));
+ insertMove(insertionBlock, insertionPoint, lclNum, sourceReg, initialReg);
+ }
+ }
+ }
+ }
+
+ // If lvRegNum is REG_STK, that means that either no register
+ // was assigned, or (more likely) that the same register was not
+ // used for all references. In that case, codegen gets the register
+ // from the tree node.
+ if (varDsc->lvRegNum == REG_STK || interval->isSpilled || interval->isSplit)
+ {
+ // For codegen purposes, we'll set lvRegNum to whatever register
+ // it's currently in as we go.
+ // However, we never mark an interval as lvRegister if it has either been spilled
+ // or split.
+ varDsc->lvRegister = false;
+
+ // Skip any dead defs or exposed uses
+ // (first use exposed will only occur when there is no explicit initialization)
+ RefPosition* firstRefPosition = interval->firstRefPosition;
+ while ((firstRefPosition != nullptr) && (firstRefPosition->refType == RefTypeExpUse))
+ {
+ firstRefPosition = firstRefPosition->nextRefPosition;
+ }
+ if (firstRefPosition == nullptr)
+ {
+ // Dead interval
+ varDsc->lvLRACandidate = false;
+ if (varDsc->lvRefCnt == 0)
+ {
+ varDsc->lvOnFrame = false;
+ }
+ else
+ {
+ // We may encounter cases where a lclVar actually has no references, but
+ // a non-zero refCnt. For safety (in case this is some "hidden" lclVar that we're
+ // not correctly recognizing), we'll mark those as needing a stack location.
+ // TODO-Cleanup: Make this an assert if/when we correct the refCnt
+ // updating.
+ varDsc->lvOnFrame = true;
+ }
+ }
+ else
+ {
+ // If the interval was not spilled, it doesn't need a stack location.
+ if (!interval->isSpilled)
+ {
+ varDsc->lvOnFrame = false;
+ }
+ if (firstRefPosition->registerAssignment == RBM_NONE || firstRefPosition->spillAfter)
+ {
+ // Either this RefPosition is spilled, or it is not a "real" def or use
+ assert(firstRefPosition->spillAfter ||
+ (firstRefPosition->refType != RefTypeDef && firstRefPosition->refType != RefTypeUse));
+ varDsc->lvRegNum = REG_STK;
+ }
+ else
+ {
+ varDsc->lvRegNum = firstRefPosition->assignedReg();
+ }
+ }
+ }
+ else
+ {
+ {
+ varDsc->lvRegister = true;
+ varDsc->lvOnFrame = false;
+ }
+#ifdef DEBUG
+ regMaskTP registerAssignment = genRegMask(varDsc->lvRegNum);
+ assert(!interval->isSpilled && !interval->isSplit);
+ RefPosition* refPosition = interval->firstRefPosition;
+ assert(refPosition != nullptr);
+
+ while (refPosition != nullptr)
+ {
+ // All RefPositions must match, except for dead definitions,
+ // copyReg/moveReg and RefTypeExpUse positions
+ if (refPosition->registerAssignment != RBM_NONE && !refPosition->copyReg && !refPosition->moveReg &&
+ refPosition->refType != RefTypeExpUse)
+ {
+ assert(refPosition->registerAssignment == registerAssignment);
+ }
+ refPosition = refPosition->nextRefPosition;
+ }
+#endif // DEBUG
+ }
+ }
+ }
+
+#ifdef DEBUG
+ if (VERBOSE)
+ {
+ printf("Trees after linear scan register allocator (LSRA)\n");
+ compiler->fgDispBasicBlocks(true);
+ }
+
+ verifyFinalAllocation();
+#endif // DEBUG
+
+ compiler->raMarkStkVars();
+ recordMaxSpill();
+
+ // TODO-CQ: Review this comment and address as needed.
+ // Change all unused promoted non-argument struct locals to a non-GC type (in this case TYP_INT)
+ // so that the gc tracking logic and lvMustInit logic will ignore them.
+ // Extract the code that does this from raAssignVars, and call it here.
+ // PRECONDITIONS: Ensure that lvPromoted is set on promoted structs, if and
+ // only if it is promoted on all paths.
+ // Call might be something like:
+ // compiler->BashUnusedStructLocals();
+}
+
+//
+//------------------------------------------------------------------------
+// insertMove: Insert a move of a lclVar with the given lclNum into the given block.
+//
+// Arguments:
+// block - the BasicBlock into which the move will be inserted.
+// insertionPoint - the instruction before which to insert the move
+// lclNum - the lclNum of the var to be moved
+// fromReg - the register from which the var is moving
+// toReg - the register to which the var is moving
+//
+// Return Value:
+// None.
+//
+// Notes:
+// If insertionPoint is non-NULL, insert before that instruction;
+// otherwise, insert "near" the end (prior to the branch, if any).
+// If fromReg or toReg is REG_STK, then move from/to memory, respectively.
+
+void LinearScan::insertMove(
+ BasicBlock* block, GenTreePtr insertionPoint, unsigned lclNum, regNumber fromReg, regNumber toReg)
+{
+ LclVarDsc* varDsc = compiler->lvaTable + lclNum;
+ // One or both MUST be a register
+ assert(fromReg != REG_STK || toReg != REG_STK);
+ // They must not be the same register.
+ assert(fromReg != toReg);
+
+ // This var can't be marked lvRegister now
+ varDsc->lvRegNum = REG_STK;
+
+ var_types lclTyp = varDsc->TypeGet();
+ if (varDsc->lvNormalizeOnStore())
+ {
+ lclTyp = genActualType(lclTyp);
+ }
+ GenTreePtr src = compiler->gtNewLclvNode(lclNum, lclTyp);
+ src->gtLsraInfo.isLsraAdded = true;
+ GenTreePtr top;
+
+ // If we are moving from STK to reg, mark the lclVar nodes with GTF_SPILLED
+ // Otherwise, if we are moving from reg to stack, mark it as GTF_SPILL
+ // Finally, for a reg-to-reg move, generate a GT_COPY
+
+ top = src;
+ if (fromReg == REG_STK)
+ {
+ src->gtFlags |= GTF_SPILLED;
+ src->gtRegNum = toReg;
+ }
+ else if (toReg == REG_STK)
+ {
+ src->gtFlags |= GTF_SPILL;
+ src->SetInReg();
+ src->gtRegNum = fromReg;
+ }
+ else
+ {
+ top = new (compiler, GT_COPY) GenTreeCopyOrReload(GT_COPY, varDsc->TypeGet(), src);
+ // This is the new home of the lclVar - indicate that by clearing the GTF_VAR_DEATH flag.
+ // Note that if src is itself a lastUse, this will have no effect.
+ top->gtFlags &= ~(GTF_VAR_DEATH);
+ src->gtRegNum = fromReg;
+ src->SetInReg();
+ top->gtRegNum = toReg;
+ src->gtNext = top;
+ top->gtPrev = src;
+ src->gtLsraInfo.isLocalDefUse = false;
+ top->gtLsraInfo.isLsraAdded = true;
+ }
+ top->gtLsraInfo.isLocalDefUse = true;
+
+ LIR::Range treeRange = LIR::SeqTree(compiler, top);
+ LIR::Range& blockRange = LIR::AsRange(block);
+
+ if (insertionPoint != nullptr)
+ {
+ blockRange.InsertBefore(insertionPoint, std::move(treeRange));
+ }
+ else
+ {
+ // Put the copy at the bottom
+ // If there's a branch, make an embedded statement that executes just prior to the branch
+ if (block->bbJumpKind == BBJ_COND || block->bbJumpKind == BBJ_SWITCH)
+ {
+ noway_assert(!blockRange.IsEmpty());
+
+ GenTree* branch = blockRange.LastNode();
+ assert(branch->OperGet() == GT_JTRUE || branch->OperGet() == GT_SWITCH_TABLE ||
+ branch->OperGet() == GT_SWITCH);
+
+ blockRange.InsertBefore(branch, std::move(treeRange));
+ }
+ else
+ {
+ assert(block->bbJumpKind == BBJ_NONE || block->bbJumpKind == BBJ_ALWAYS);
+ blockRange.InsertAtEnd(std::move(treeRange));
+ }
+ }
+}
+
+void LinearScan::insertSwap(
+ BasicBlock* block, GenTreePtr insertionPoint, unsigned lclNum1, regNumber reg1, unsigned lclNum2, regNumber reg2)
+{
+#ifdef DEBUG
+ if (VERBOSE)
+ {
+ const char* insertionPointString = "top";
+ if (insertionPoint == nullptr)
+ {
+ insertionPointString = "bottom";
+ }
+ printf(" BB%02u %s: swap V%02u in %s with V%02u in %s\n", block->bbNum, insertionPointString, lclNum1,
+ getRegName(reg1), lclNum2, getRegName(reg2));
+ }
+#endif // DEBUG
+
+ LclVarDsc* varDsc1 = compiler->lvaTable + lclNum1;
+ LclVarDsc* varDsc2 = compiler->lvaTable + lclNum2;
+ assert(reg1 != REG_STK && reg1 != REG_NA && reg2 != REG_STK && reg2 != REG_NA);
+
+ GenTreePtr lcl1 = compiler->gtNewLclvNode(lclNum1, varDsc1->TypeGet());
+ lcl1->gtLsraInfo.isLsraAdded = true;
+ lcl1->gtLsraInfo.isLocalDefUse = false;
+ lcl1->SetInReg();
+ lcl1->gtRegNum = reg1;
+
+ GenTreePtr lcl2 = compiler->gtNewLclvNode(lclNum2, varDsc2->TypeGet());
+ lcl2->gtLsraInfo.isLsraAdded = true;
+ lcl2->gtLsraInfo.isLocalDefUse = false;
+ lcl2->SetInReg();
+ lcl2->gtRegNum = reg2;
+
+ GenTreePtr swap = compiler->gtNewOperNode(GT_SWAP, TYP_VOID, lcl1, lcl2);
+ swap->gtLsraInfo.isLsraAdded = true;
+ swap->gtLsraInfo.isLocalDefUse = false;
+ swap->gtRegNum = REG_NA;
+
+ lcl1->gtNext = lcl2;
+ lcl2->gtPrev = lcl1;
+ lcl2->gtNext = swap;
+ swap->gtPrev = lcl2;
+
+ LIR::Range swapRange = LIR::SeqTree(compiler, swap);
+ LIR::Range& blockRange = LIR::AsRange(block);
+
+ if (insertionPoint != nullptr)
+ {
+ blockRange.InsertBefore(insertionPoint, std::move(swapRange));
+ }
+ else
+ {
+ // Put the copy at the bottom
+ // If there's a branch, make an embedded statement that executes just prior to the branch
+ if (block->bbJumpKind == BBJ_COND || block->bbJumpKind == BBJ_SWITCH)
+ {
+ noway_assert(!blockRange.IsEmpty());
+
+ GenTree* branch = blockRange.LastNode();
+ assert(branch->OperGet() == GT_JTRUE || branch->OperGet() == GT_SWITCH_TABLE ||
+ branch->OperGet() == GT_SWITCH);
+
+ blockRange.InsertBefore(branch, std::move(swapRange));
+ }
+ else
+ {
+ assert(block->bbJumpKind == BBJ_NONE || block->bbJumpKind == BBJ_ALWAYS);
+ blockRange.InsertAtEnd(std::move(swapRange));
+ }
+ }
+}
+
+//------------------------------------------------------------------------
+// getTempRegForResolution: Get a free register to use for resolution code.
+//
+// Arguments:
+// fromBlock - The "from" block on the edge being resolved.
+// toBlock - The "to"block on the edge
+// type - the type of register required
+//
+// Return Value:
+// Returns a register that is free on the given edge, or REG_NA if none is available.
+//
+// Notes:
+// It is up to the caller to check the return value, and to determine whether a register is
+// available, and to handle that case appropriately.
+// It is also up to the caller to cache the return value, as this is not cheap to compute.
+
+regNumber LinearScan::getTempRegForResolution(BasicBlock* fromBlock, BasicBlock* toBlock, var_types type)
+{
+ // TODO-Throughput: This would be much more efficient if we add RegToVarMaps instead of VarToRegMaps
+ // and they would be more space-efficient as well.
+ VarToRegMap fromVarToRegMap = getOutVarToRegMap(fromBlock->bbNum);
+ VarToRegMap toVarToRegMap = getInVarToRegMap(toBlock->bbNum);
+
+ regMaskTP freeRegs = allRegs(type);
+#ifdef DEBUG
+ if (getStressLimitRegs() == LSRA_LIMIT_SMALL_SET)
+ {
+ return REG_NA;
+ }
+#endif // DEBUG
+ INDEBUG(freeRegs = stressLimitRegs(nullptr, freeRegs));
+
+ // We are only interested in the variables that are live-in to the "to" block.
+ VARSET_ITER_INIT(compiler, iter, toBlock->bbLiveIn, varIndex);
+ while (iter.NextElem(compiler, &varIndex) && freeRegs != RBM_NONE)
+ {
+ regNumber fromReg = fromVarToRegMap[varIndex];
+ regNumber toReg = toVarToRegMap[varIndex];
+ assert(fromReg != REG_NA && toReg != REG_NA);
+ if (fromReg != REG_STK)
+ {
+ freeRegs &= ~genRegMask(fromReg);
+ }
+ if (toReg != REG_STK)
+ {
+ freeRegs &= ~genRegMask(toReg);
+ }
+ }
+ if (freeRegs == RBM_NONE)
+ {
+ return REG_NA;
+ }
+ else
+ {
+ regNumber tempReg = genRegNumFromMask(genFindLowestBit(freeRegs));
+ return tempReg;
+ }
+}
+
+//------------------------------------------------------------------------
+// addResolution: Add a resolution move of the given interval
+//
+// Arguments:
+// block - the BasicBlock into which the move will be inserted.
+// insertionPoint - the instruction before which to insert the move
+// interval - the interval of the var to be moved
+// toReg - the register to which the var is moving
+// fromReg - the register from which the var is moving
+//
+// Return Value:
+// None.
+//
+// Notes:
+// For joins, we insert at the bottom (indicated by an insertionPoint
+// of nullptr), while for splits we insert at the top.
+// This is because for joins 'block' is a pred of the join, while for splits it is a succ.
+// For critical edges, this function may be called twice - once to move from
+// the source (fromReg), if any, to the stack, in which case toReg will be
+// REG_STK, and we insert at the bottom (leave insertionPoint as nullptr).
+// The next time, we want to move from the stack to the destination (toReg),
+// in which case fromReg will be REG_STK, and we insert at the top.
+
+void LinearScan::addResolution(
+ BasicBlock* block, GenTreePtr insertionPoint, Interval* interval, regNumber toReg, regNumber fromReg)
+{
+#ifdef DEBUG
+ const char* insertionPointString = "top";
+#endif // DEBUG
+ if (insertionPoint == nullptr)
+ {
+#ifdef DEBUG
+ insertionPointString = "bottom";
+#endif // DEBUG
+ }
+
+ JITDUMP(" BB%02u %s: move V%02u from ", block->bbNum, insertionPointString, interval->varNum);
+ JITDUMP("%s to %s", getRegName(fromReg), getRegName(toReg));
+
+ insertMove(block, insertionPoint, interval->varNum, fromReg, toReg);
+ if (fromReg == REG_STK || toReg == REG_STK)
+ {
+ interval->isSpilled = true;
+ }
+ else
+ {
+ interval->isSplit = true;
+ }
+}
+
+//------------------------------------------------------------------------
+// handleOutgoingCriticalEdges: Performs the necessary resolution on all critical edges that feed out of 'block'
+//
+// Arguments:
+// block - the block with outgoing critical edges.
+//
+// Return Value:
+// None..
+//
+// Notes:
+// For all outgoing critical edges (i.e. any successor of this block which is
+// a join edge), if there are any conflicts, split the edge by adding a new block,
+// and generate the resolution code into that block.
+
+void LinearScan::handleOutgoingCriticalEdges(BasicBlock* block)
+{
+ VARSET_TP VARSET_INIT_NOCOPY(sameResolutionSet, VarSetOps::MakeEmpty(compiler));
+ VARSET_TP VARSET_INIT_NOCOPY(sameLivePathsSet, VarSetOps::MakeEmpty(compiler));
+ VARSET_TP VARSET_INIT_NOCOPY(singleTargetSet, VarSetOps::MakeEmpty(compiler));
+ VARSET_TP VARSET_INIT_NOCOPY(diffResolutionSet, VarSetOps::MakeEmpty(compiler));
+
+ // Get the outVarToRegMap for this block
+ VarToRegMap outVarToRegMap = getOutVarToRegMap(block->bbNum);
+ unsigned succCount = block->NumSucc(compiler);
+ assert(succCount > 1);
+ VarToRegMap firstSuccInVarToRegMap = nullptr;
+ BasicBlock* firstSucc = nullptr;
+
+ // First, determine the live regs at the end of this block so that we know what regs are
+ // available to copy into.
+ regMaskTP liveOutRegs = RBM_NONE;
+ VARSET_ITER_INIT(compiler, iter1, block->bbLiveOut, varIndex1);
+ while (iter1.NextElem(compiler, &varIndex1))
+ {
+ unsigned varNum = compiler->lvaTrackedToVarNum[varIndex1];
+ regNumber fromReg = getVarReg(outVarToRegMap, varNum);
+ if (fromReg != REG_STK)
+ {
+ liveOutRegs |= genRegMask(fromReg);
+ }
+ }
+
+ // Next, if this blocks ends with a switch table, we have to make sure not to copy
+ // into the registers that it uses.
+ regMaskTP switchRegs = RBM_NONE;
+ if (block->bbJumpKind == BBJ_SWITCH)
+ {
+ // At this point, Lowering has transformed any non-switch-table blocks into
+ // cascading ifs.
+ GenTree* switchTable = LIR::AsRange(block).LastNode();
+ assert(switchTable != nullptr && switchTable->OperGet() == GT_SWITCH_TABLE);
+
+ switchRegs = switchTable->gtRsvdRegs;
+ GenTree* op1 = switchTable->gtGetOp1();
+ GenTree* op2 = switchTable->gtGetOp2();
+ noway_assert(op1 != nullptr && op2 != nullptr);
+ assert(op1->gtRegNum != REG_NA && op2->gtRegNum != REG_NA);
+ switchRegs |= genRegMask(op1->gtRegNum);
+ switchRegs |= genRegMask(op2->gtRegNum);
+ }
+
+ VarToRegMap sameVarToRegMap = sharedCriticalVarToRegMap;
+ regMaskTP sameWriteRegs = RBM_NONE;
+ regMaskTP diffReadRegs = RBM_NONE;
+
+ // For each var, classify them as:
+ // - in the same register at the end of this block and at each target (no resolution needed)
+ // - in different registers at different targets (resolve separately):
+ // diffResolutionSet
+ // - in the same register at each target at which it's live, but different from the end of
+ // this block. We may be able to resolve these as if it is "join", but only if they do not
+ // write to any registers that are read by those in the diffResolutionSet:
+ // sameResolutionSet
+
+ VARSET_ITER_INIT(compiler, iter, block->bbLiveOut, varIndex);
+ while (iter.NextElem(compiler, &varIndex))
+ {
+ unsigned varNum = compiler->lvaTrackedToVarNum[varIndex];
+ regNumber fromReg = getVarReg(outVarToRegMap, varNum);
+ bool isMatch = true;
+ bool isSame = false;
+ bool maybeSingleTarget = false;
+ bool maybeSameLivePaths = false;
+ bool liveOnlyAtSplitEdge = true;
+ regNumber sameToReg = REG_NA;
+ for (unsigned succIndex = 0; succIndex < succCount; succIndex++)
+ {
+ BasicBlock* succBlock = block->GetSucc(succIndex, compiler);
+ if (!VarSetOps::IsMember(compiler, succBlock->bbLiveIn, varIndex))
+ {
+ maybeSameLivePaths = true;
+ continue;
+ }
+ else if (liveOnlyAtSplitEdge)
+ {
+ // Is the var live only at those target blocks which are connected by a split edge to this block
+ liveOnlyAtSplitEdge = ((succBlock->bbPreds->flNext == nullptr) && (succBlock != compiler->fgFirstBB));
+ }
+
+ regNumber toReg = getVarReg(getInVarToRegMap(succBlock->bbNum), varNum);
+ if (sameToReg == REG_NA)
+ {
+ sameToReg = toReg;
+ continue;
+ }
+ if (toReg == sameToReg)
+ {
+ continue;
+ }
+ sameToReg = REG_NA;
+ break;
+ }
+
+ // Check for the cases where we can't write to a register.
+ // We only need to check for these cases if sameToReg is an actual register (not REG_STK).
+ if (sameToReg != REG_NA && sameToReg != REG_STK)
+ {
+ // If there's a path on which this var isn't live, it may use the original value in sameToReg.
+ // In this case, sameToReg will be in the liveOutRegs of this block.
+ // Similarly, if sameToReg is in sameWriteRegs, it has already been used (i.e. for a lclVar that's
+ // live only at another target), and we can't copy another lclVar into that reg in this block.
+ regMaskTP sameToRegMask = genRegMask(sameToReg);
+ if (maybeSameLivePaths &&
+ (((sameToRegMask & liveOutRegs) != RBM_NONE) || ((sameToRegMask & sameWriteRegs) != RBM_NONE)))
+ {
+ sameToReg = REG_NA;
+ }
+ // If this register is used by a switch table at the end of the block, we can't do the copy
+ // in this block (since we can't insert it after the switch).
+ if ((sameToRegMask & switchRegs) != RBM_NONE)
+ {
+ sameToReg = REG_NA;
+ }
+
+ // If the var is live only at those blocks connected by a split edge and not live-in at some of the
+ // target blocks, we will resolve it the same way as if it were in diffResolutionSet and resolution
+ // will be deferred to the handling of split edges, which means copy will only be at those target(s).
+ //
+ // Another way to achieve similar resolution for vars live only at split edges is by removing them
+ // from consideration up-front but it requires that we traverse those edges anyway to account for
+ // the registers that must note be overwritten.
+ if (liveOnlyAtSplitEdge && maybeSameLivePaths)
+ {
+ sameToReg = REG_NA;
+ }
+ }
+
+ if (sameToReg == REG_NA)
+ {
+ VarSetOps::AddElemD(compiler, diffResolutionSet, varIndex);
+ if (fromReg != REG_STK)
+ {
+ diffReadRegs |= genRegMask(fromReg);
+ }
+ }
+ else if (sameToReg != fromReg)
+ {
+ VarSetOps::AddElemD(compiler, sameResolutionSet, varIndex);
+ sameVarToRegMap[varIndex] = sameToReg;
+ if (sameToReg != REG_STK)
+ {
+ sameWriteRegs |= genRegMask(sameToReg);
+ }
+ }
+ }
+
+ if (!VarSetOps::IsEmpty(compiler, sameResolutionSet))
+ {
+ if ((sameWriteRegs & diffReadRegs) != RBM_NONE)
+ {
+ // We cannot split the "same" and "diff" regs if the "same" set writes registers
+ // that must be read by the "diff" set. (Note that when these are done as a "batch"
+ // we carefully order them to ensure all the input regs are read before they are
+ // overwritten.)
+ VarSetOps::UnionD(compiler, diffResolutionSet, sameResolutionSet);
+ VarSetOps::ClearD(compiler, sameResolutionSet);
+ }
+ else
+ {
+ // For any vars in the sameResolutionSet, we can simply add the move at the end of "block".
+ resolveEdge(block, nullptr, ResolveSharedCritical, sameResolutionSet);
+ }
+ }
+ if (!VarSetOps::IsEmpty(compiler, diffResolutionSet))
+ {
+ for (unsigned succIndex = 0; succIndex < succCount; succIndex++)
+ {
+ BasicBlock* succBlock = block->GetSucc(succIndex, compiler);
+
+ // Any "diffResolutionSet" resolution for a block with no other predecessors will be handled later
+ // as split resolution.
+ if ((succBlock->bbPreds->flNext == nullptr) && (succBlock != compiler->fgFirstBB))
+ {
+ continue;
+ }
+
+ // Now collect the resolution set for just this edge, if any.
+ // Check only the vars in diffResolutionSet that are live-in to this successor.
+ bool needsResolution = false;
+ VarToRegMap succInVarToRegMap = getInVarToRegMap(succBlock->bbNum);
+ VARSET_TP VARSET_INIT_NOCOPY(edgeResolutionSet,
+ VarSetOps::Intersection(compiler, diffResolutionSet, succBlock->bbLiveIn));
+ VARSET_ITER_INIT(compiler, iter, edgeResolutionSet, varIndex);
+ while (iter.NextElem(compiler, &varIndex))
+ {
+ unsigned varNum = compiler->lvaTrackedToVarNum[varIndex];
+ Interval* interval = getIntervalForLocalVar(varNum);
+ regNumber fromReg = getVarReg(outVarToRegMap, varNum);
+ regNumber toReg = getVarReg(succInVarToRegMap, varNum);
+
+ if (fromReg == toReg)
+ {
+ VarSetOps::RemoveElemD(compiler, edgeResolutionSet, varIndex);
+ }
+ }
+ if (!VarSetOps::IsEmpty(compiler, edgeResolutionSet))
+ {
+ resolveEdge(block, succBlock, ResolveCritical, edgeResolutionSet);
+ }
+ }
+ }
+}
+
+//------------------------------------------------------------------------
+// resolveEdges: Perform resolution across basic block edges
+//
+// Arguments:
+// None.
+//
+// Return Value:
+// None.
+//
+// Notes:
+// Traverse the basic blocks.
+// - If this block has a single predecessor that is not the immediately
+// preceding block, perform any needed 'split' resolution at the beginning of this block
+// - Otherwise if this block has critical incoming edges, handle them.
+// - If this block has a single successor that has multiple predecesors, perform any needed
+// 'join' resolution at the end of this block.
+// Note that a block may have both 'split' or 'critical' incoming edge(s) and 'join' outgoing
+// edges.
+
+void LinearScan::resolveEdges()
+{
+ JITDUMP("RESOLVING EDGES\n");
+
+ BasicBlock *block, *prevBlock = nullptr;
+
+ // Handle all the critical edges first.
+ // We will try to avoid resolution across critical edges in cases where all the critical-edge
+ // targets of a block have the same home. We will then split the edges only for the
+ // remaining mismatches. We visit the out-edges, as that allows us to share the moves that are
+ // common among allt he targets.
+
+ foreach_block(compiler, block)
+ {
+ if (block->bbNum > bbNumMaxBeforeResolution)
+ {
+ // This is a new block added during resolution - we don't need to visit these now.
+ continue;
+ }
+ if (blockInfo[block->bbNum].hasCriticalOutEdge)
+ {
+ handleOutgoingCriticalEdges(block);
+ }
+ prevBlock = block;
+ }
+
+ prevBlock = nullptr;
+ foreach_block(compiler, block)
+ {
+ if (block->bbNum > bbNumMaxBeforeResolution)
+ {
+ // This is a new block added during resolution - we don't need to visit these now.
+ continue;
+ }
+
+ unsigned succCount = block->NumSucc(compiler);
+ flowList* preds = block->bbPreds;
+ BasicBlock* uniquePredBlock = block->GetUniquePred(compiler);
+
+ // First, if this block has a single predecessor,
+ // we may need resolution at the beginning of this block.
+ // This may be true even if it's the block we used for starting locations,
+ // if a variable was spilled.
+ if (!VarSetOps::IsEmpty(compiler, block->bbLiveIn))
+ {
+ if (uniquePredBlock != nullptr)
+ {
+ // We may have split edges during critical edge resolution, and in the process split
+ // a non-critical edge as well.
+ // It is unlikely that we would ever have more than one of these in sequence (indeed,
+ // I don't think it's possible), but there's no need to assume that it can't.
+ while (uniquePredBlock->bbNum > bbNumMaxBeforeResolution)
+ {
+ uniquePredBlock = uniquePredBlock->GetUniquePred(compiler);
+ noway_assert(uniquePredBlock != nullptr);
+ }
+ resolveEdge(uniquePredBlock, block, ResolveSplit, block->bbLiveIn);
+ }
+ }
+
+ // Finally, if this block has a single successor:
+ // - and that has at least one other predecessor (otherwise we will do the resolution at the
+ // top of the successor),
+ // - and that is not the target of a critical edge (otherwise we've already handled it)
+ // we may need resolution at the end of this block.
+
+ if (succCount == 1)
+ {
+ BasicBlock* succBlock = block->GetSucc(0, compiler);
+ if (succBlock->GetUniquePred(compiler) == nullptr)
+ {
+ resolveEdge(block, succBlock, ResolveJoin, succBlock->bbLiveIn);
+ }
+ }
+ }
+
+ // Now, fixup the mapping for any blocks that were adding for edge splitting.
+ // See the comment prior to the call to fgSplitEdge() in resolveEdge().
+ // Note that we could fold this loop in with the checking code below, but that
+ // would only improve the debug case, and would clutter up the code somewhat.
+ if (compiler->fgBBNumMax > bbNumMaxBeforeResolution)
+ {
+ foreach_block(compiler, block)
+ {
+ if (block->bbNum > bbNumMaxBeforeResolution)
+ {
+ // There may be multiple blocks inserted when we split. But we must always have exactly
+ // one path (i.e. all blocks must be single-successor and single-predecessor),
+ // and only one block along the path may be non-empty.
+ // Note that we may have a newly-inserted block that is empty, but which connects
+ // two non-resolution blocks. This happens when an edge is split that requires it.
+
+ BasicBlock* succBlock = block;
+ do
+ {
+ succBlock = succBlock->GetUniqueSucc();
+ noway_assert(succBlock != nullptr);
+ } while ((succBlock->bbNum > bbNumMaxBeforeResolution) && succBlock->isEmpty());
+
+ BasicBlock* predBlock = block;
+ do
+ {
+ predBlock = predBlock->GetUniquePred(compiler);
+ noway_assert(predBlock != nullptr);
+ } while ((predBlock->bbNum > bbNumMaxBeforeResolution) && predBlock->isEmpty());
+
+ unsigned succBBNum = succBlock->bbNum;
+ unsigned predBBNum = predBlock->bbNum;
+ if (block->isEmpty())
+ {
+ // For the case of the empty block, find the non-resolution block (succ or pred).
+ if (predBBNum > bbNumMaxBeforeResolution)
+ {
+ assert(succBBNum <= bbNumMaxBeforeResolution);
+ predBBNum = 0;
+ }
+ else
+ {
+ succBBNum = 0;
+ }
+ }
+ else
+ {
+ assert((succBBNum <= bbNumMaxBeforeResolution) && (predBBNum <= bbNumMaxBeforeResolution));
+ }
+ SplitEdgeInfo info = {predBBNum, succBBNum};
+ getSplitBBNumToTargetBBNumMap()->Set(block->bbNum, info);
+ }
+ }
+ }
+
+#ifdef DEBUG
+ // Make sure the varToRegMaps match up on all edges.
+ bool foundMismatch = false;
+ foreach_block(compiler, block)
+ {
+ if (block->isEmpty() && block->bbNum > bbNumMaxBeforeResolution)
+ {
+ continue;
+ }
+ VarToRegMap toVarToRegMap = getInVarToRegMap(block->bbNum);
+ for (flowList* pred = block->bbPreds; pred != nullptr; pred = pred->flNext)
+ {
+ BasicBlock* predBlock = pred->flBlock;
+ VarToRegMap fromVarToRegMap = getOutVarToRegMap(predBlock->bbNum);
+ VARSET_ITER_INIT(compiler, iter, block->bbLiveIn, varIndex);
+ while (iter.NextElem(compiler, &varIndex))
+ {
+ unsigned varNum = compiler->lvaTrackedToVarNum[varIndex];
+ regNumber fromReg = getVarReg(fromVarToRegMap, varNum);
+ regNumber toReg = getVarReg(toVarToRegMap, varNum);
+ if (fromReg != toReg)
+ {
+ Interval* interval = getIntervalForLocalVar(varNum);
+ if (!foundMismatch)
+ {
+ foundMismatch = true;
+ printf("Found mismatched var locations after resolution!\n");
+ }
+ printf(" V%02u: BB%02u to BB%02u: ", varNum, predBlock->bbNum, block->bbNum);
+ printf("%s to %s\n", getRegName(fromReg), getRegName(toReg));
+ }
+ }
+ }
+ }
+ assert(!foundMismatch);
+#endif
+ JITDUMP("\n");
+}
+
+//------------------------------------------------------------------------
+// resolveEdge: Perform the specified type of resolution between two blocks.
+//
+// Arguments:
+// fromBlock - the block from which the edge originates
+// toBlock - the block at which the edge terminates
+// resolveType - the type of resolution to be performed
+// liveSet - the set of tracked lclVar indices which may require resolution
+//
+// Return Value:
+// None.
+//
+// Assumptions:
+// The caller must have performed the analysis to determine the type of the edge.
+//
+// Notes:
+// This method emits the correctly ordered moves necessary to place variables in the
+// correct registers across a Split, Join or Critical edge.
+// In order to avoid overwriting register values before they have been moved to their
+// new home (register/stack), it first does the register-to-stack moves (to free those
+// registers), then the register to register moves, ensuring that the target register
+// is free before the move, and then finally the stack to register moves.
+
+void LinearScan::resolveEdge(BasicBlock* fromBlock,
+ BasicBlock* toBlock,
+ ResolveType resolveType,
+ VARSET_VALARG_TP liveSet)
+{
+ VarToRegMap fromVarToRegMap = getOutVarToRegMap(fromBlock->bbNum);
+ VarToRegMap toVarToRegMap;
+ if (resolveType == ResolveSharedCritical)
+ {
+ toVarToRegMap = sharedCriticalVarToRegMap;
+ }
+ else
+ {
+ toVarToRegMap = getInVarToRegMap(toBlock->bbNum);
+ }
+
+ // The block to which we add the resolution moves depends on the resolveType
+ BasicBlock* block;
+ switch (resolveType)
+ {
+ case ResolveJoin:
+ case ResolveSharedCritical:
+ block = fromBlock;
+ break;
+ case ResolveSplit:
+ block = toBlock;
+ break;
+ case ResolveCritical:
+ // fgSplitEdge may add one or two BasicBlocks. It returns the block that splits
+ // the edge from 'fromBlock' and 'toBlock', but if it inserts that block right after
+ // a block with a fall-through it will have to create another block to handle that edge.
+ // These new blocks can be mapped to existing blocks in order to correctly handle
+ // the calls to recordVarLocationsAtStartOfBB() from codegen. That mapping is handled
+ // in resolveEdges(), after all the edge resolution has been done (by calling this
+ // method for each edge).
+ block = compiler->fgSplitEdge(fromBlock, toBlock);
+ break;
+ default:
+ unreached();
+ break;
+ }
+
+#ifndef _TARGET_XARCH_
+ // We record tempregs for beginning and end of each block.
+ // For amd64/x86 we only need a tempReg for float - we'll use xchg for int.
+ // TODO-Throughput: It would be better to determine the tempRegs on demand, but the code below
+ // modifies the varToRegMaps so we don't have all the correct registers at the time
+ // we need to get the tempReg.
+ regNumber tempRegInt =
+ (resolveType == ResolveSharedCritical) ? REG_NA : getTempRegForResolution(fromBlock, toBlock, TYP_INT);
+#endif // !_TARGET_XARCH_
+ regNumber tempRegFlt = REG_NA;
+ if ((compiler->compFloatingPointUsed) && (resolveType != ResolveSharedCritical))
+ {
+ tempRegFlt = getTempRegForResolution(fromBlock, toBlock, TYP_FLOAT);
+ }
+
+ regMaskTP targetRegsToDo = RBM_NONE;
+ regMaskTP targetRegsReady = RBM_NONE;
+ regMaskTP targetRegsFromStack = RBM_NONE;
+
+ // The following arrays capture the location of the registers as they are moved:
+ // - location[reg] gives the current location of the var that was originally in 'reg'.
+ // (Note that a var may be moved more than once.)
+ // - source[reg] gives the original location of the var that needs to be moved to 'reg'.
+ // For example, if a var is in rax and needs to be moved to rsi, then we would start with:
+ // location[rax] == rax
+ // source[rsi] == rax -- this doesn't change
+ // Then, if for some reason we need to move it temporary to rbx, we would have:
+ // location[rax] == rbx
+ // Once we have completed the move, we will have:
+ // location[rax] == REG_NA
+ // This indicates that the var originally in rax is now in its target register.
+
+ regNumberSmall location[REG_COUNT];
+ C_ASSERT(sizeof(char) == sizeof(regNumberSmall)); // for memset to work
+ memset(location, REG_NA, REG_COUNT);
+ regNumberSmall source[REG_COUNT];
+ memset(source, REG_NA, REG_COUNT);
+
+ // What interval is this register associated with?
+ // (associated with incoming reg)
+ Interval* sourceIntervals[REG_COUNT] = {nullptr};
+
+ // Intervals for vars that need to be loaded from the stack
+ Interval* stackToRegIntervals[REG_COUNT] = {nullptr};
+
+ // Get the starting insertion point for the "to" resolution
+ GenTreePtr insertionPoint = nullptr;
+ if (resolveType == ResolveSplit || resolveType == ResolveCritical)
+ {
+ insertionPoint = LIR::AsRange(block).FirstNonPhiNode();
+ }
+
+ // First:
+ // - Perform all moves from reg to stack (no ordering needed on these)
+ // - For reg to reg moves, record the current location, associating their
+ // source location with the target register they need to go into
+ // - For stack to reg moves (done last, no ordering needed between them)
+ // record the interval associated with the target reg
+ // TODO-Throughput: We should be looping over the liveIn and liveOut registers, since
+ // that will scale better than the live variables
+
+ VARSET_ITER_INIT(compiler, iter, liveSet, varIndex);
+ while (iter.NextElem(compiler, &varIndex))
+ {
+ unsigned varNum = compiler->lvaTrackedToVarNum[varIndex];
+ bool isSpilled = false;
+ Interval* interval = getIntervalForLocalVar(varNum);
+ regNumber fromReg = getVarReg(fromVarToRegMap, varNum);
+ regNumber toReg = getVarReg(toVarToRegMap, varNum);
+ if (fromReg == toReg)
+ {
+ continue;
+ }
+
+ // For Critical edges, the location will not change on either side of the edge,
+ // since we'll add a new block to do the move.
+ if (resolveType == ResolveSplit)
+ {
+ toVarToRegMap[varIndex] = fromReg;
+ }
+ else if (resolveType == ResolveJoin || resolveType == ResolveSharedCritical)
+ {
+ fromVarToRegMap[varIndex] = toReg;
+ }
+
+ assert(fromReg < UCHAR_MAX && toReg < UCHAR_MAX);
+
+ bool done = false;
+
+ if (fromReg != toReg)
+ {
+ if (fromReg == REG_STK)
+ {
+ stackToRegIntervals[toReg] = interval;
+ targetRegsFromStack |= genRegMask(toReg);
+ }
+ else if (toReg == REG_STK)
+ {
+ // Do the reg to stack moves now
+ addResolution(block, insertionPoint, interval, REG_STK, fromReg);
+ JITDUMP(" (%s)\n", resolveTypeName[resolveType]);
+ }
+ else
+ {
+ location[fromReg] = (regNumberSmall)fromReg;
+ source[toReg] = (regNumberSmall)fromReg;
+ sourceIntervals[fromReg] = interval;
+ targetRegsToDo |= genRegMask(toReg);
+ }
+ }
+ }
+
+ // REGISTER to REGISTER MOVES
+
+ // First, find all the ones that are ready to move now
+ regMaskTP targetCandidates = targetRegsToDo;
+ while (targetCandidates != RBM_NONE)
+ {
+ regMaskTP targetRegMask = genFindLowestBit(targetCandidates);
+ targetCandidates &= ~targetRegMask;
+ regNumber targetReg = genRegNumFromMask(targetRegMask);
+ if (location[targetReg] == REG_NA)
+ {
+ targetRegsReady |= targetRegMask;
+ }
+ }
+
+ // Perform reg to reg moves
+ while (targetRegsToDo != RBM_NONE)
+ {
+ while (targetRegsReady != RBM_NONE)
+ {
+ regMaskTP targetRegMask = genFindLowestBit(targetRegsReady);
+ targetRegsToDo &= ~targetRegMask;
+ targetRegsReady &= ~targetRegMask;
+ regNumber targetReg = genRegNumFromMask(targetRegMask);
+ assert(location[targetReg] != targetReg);
+ regNumber sourceReg = (regNumber)source[targetReg];
+ regNumber fromReg = (regNumber)location[sourceReg];
+ assert(fromReg < UCHAR_MAX && sourceReg < UCHAR_MAX);
+ Interval* interval = sourceIntervals[sourceReg];
+ assert(interval != nullptr);
+ addResolution(block, insertionPoint, interval, targetReg, fromReg);
+ JITDUMP(" (%s)\n", resolveTypeName[resolveType]);
+ sourceIntervals[sourceReg] = nullptr;
+ location[sourceReg] = REG_NA;
+
+ // Do we have a free targetReg?
+ if (fromReg == sourceReg && source[fromReg] != REG_NA)
+ {
+ regMaskTP fromRegMask = genRegMask(fromReg);
+ targetRegsReady |= fromRegMask;
+ }
+ }
+ if (targetRegsToDo != RBM_NONE)
+ {
+ regMaskTP targetRegMask = genFindLowestBit(targetRegsToDo);
+ regNumber targetReg = genRegNumFromMask(targetRegMask);
+
+ // Is it already there due to other moves?
+ // If not, move it to the temp reg, OR swap it with another register
+ regNumber sourceReg = (regNumber)source[targetReg];
+ regNumber fromReg = (regNumber)location[sourceReg];
+ if (targetReg == fromReg)
+ {
+ targetRegsToDo &= ~targetRegMask;
+ }
+ else
+ {
+ regNumber tempReg = REG_NA;
+ bool useSwap = false;
+ if (emitter::isFloatReg(targetReg))
+ {
+ tempReg = tempRegFlt;
+ }
+#ifdef _TARGET_XARCH_
+ else
+ {
+ useSwap = true;
+ }
+#else // !_TARGET_XARCH_
+ else
+ {
+ tempReg = tempRegInt;
+ }
+#endif // !_TARGET_XARCH_
+ if (useSwap || tempReg == REG_NA)
+ {
+ // First, we have to figure out the destination register for what's currently in fromReg,
+ // so that we can find its sourceInterval.
+ regNumber otherTargetReg = REG_NA;
+
+ // By chance, is fromReg going where it belongs?
+ if (location[source[fromReg]] == targetReg)
+ {
+ otherTargetReg = fromReg;
+ // If we can swap, we will be done with otherTargetReg as well.
+ // Otherwise, we'll spill it to the stack and reload it later.
+ if (useSwap)
+ {
+ regMaskTP fromRegMask = genRegMask(fromReg);
+ targetRegsToDo &= ~fromRegMask;
+ }
+ }
+ else
+ {
+ // Look at the remaining registers from targetRegsToDo (which we expect to be relatively
+ // small at this point) to find out what's currently in targetReg.
+ regMaskTP mask = targetRegsToDo;
+ while (mask != RBM_NONE && otherTargetReg == REG_NA)
+ {
+ regMaskTP nextRegMask = genFindLowestBit(mask);
+ regNumber nextReg = genRegNumFromMask(nextRegMask);
+ mask &= ~nextRegMask;
+ if (location[source[nextReg]] == targetReg)
+ {
+ otherTargetReg = nextReg;
+ }
+ }
+ }
+ assert(otherTargetReg != REG_NA);
+
+ if (useSwap)
+ {
+ // Generate a "swap" of fromReg and targetReg
+ insertSwap(block, insertionPoint, sourceIntervals[source[otherTargetReg]]->varNum, targetReg,
+ sourceIntervals[sourceReg]->varNum, fromReg);
+ location[sourceReg] = REG_NA;
+ location[source[otherTargetReg]] = (regNumberSmall)fromReg;
+ }
+ else
+ {
+ // Spill "targetReg" to the stack and add its eventual target (otherTargetReg)
+ // to "targetRegsFromStack", which will be handled below.
+ // NOTE: This condition is very rare. Setting COMPlus_JitStressRegs=0x203
+ // has been known to trigger it in JIT SH.
+
+ // First, spill "otherInterval" from targetReg to the stack.
+ Interval* otherInterval = sourceIntervals[source[otherTargetReg]];
+ addResolution(block, insertionPoint, otherInterval, REG_STK, targetReg);
+ JITDUMP(" (%s)\n", resolveTypeName[resolveType]);
+ location[source[otherTargetReg]] = REG_STK;
+
+ // Now, move the interval that is going to targetReg, and add its "fromReg" to
+ // "targetRegsReady".
+ addResolution(block, insertionPoint, sourceIntervals[sourceReg], targetReg, fromReg);
+ JITDUMP(" (%s)\n", resolveTypeName[resolveType]);
+ location[sourceReg] = REG_NA;
+ targetRegsReady |= genRegMask(fromReg);
+ }
+ targetRegsToDo &= ~targetRegMask;
+ }
+ else
+ {
+ compiler->codeGen->regSet.rsSetRegsModified(genRegMask(tempReg) DEBUGARG(dumpTerse));
+ assert(sourceIntervals[targetReg] != nullptr);
+ addResolution(block, insertionPoint, sourceIntervals[targetReg], tempReg, targetReg);
+ JITDUMP(" (%s)\n", resolveTypeName[resolveType]);
+ location[targetReg] = (regNumberSmall)tempReg;
+ targetRegsReady |= targetRegMask;
+ }
+ }
+ }
+ }
+
+ // Finally, perform stack to reg moves
+ // All the target regs will be empty at this point
+ while (targetRegsFromStack != RBM_NONE)
+ {
+ regMaskTP targetRegMask = genFindLowestBit(targetRegsFromStack);
+ targetRegsFromStack &= ~targetRegMask;
+ regNumber targetReg = genRegNumFromMask(targetRegMask);
+
+ Interval* interval = stackToRegIntervals[targetReg];
+ assert(interval != nullptr);
+
+ addResolution(block, insertionPoint, interval, targetReg, REG_STK);
+ JITDUMP(" (%s)\n", resolveTypeName[resolveType]);
+ }
+}
+
+void TreeNodeInfo::Initialize(LinearScan* lsra, GenTree* node, LsraLocation location)
+{
+ regMaskTP dstCandidates;
+
+ // if there is a reg indicated on the tree node, use that for dstCandidates
+ // the exception is the NOP, which sometimes show up around late args.
+ // TODO-Cleanup: get rid of those NOPs.
+ if (node->gtRegNum == REG_NA || node->gtOper == GT_NOP)
+ {
+ dstCandidates = lsra->allRegs(node->TypeGet());
+ }
+ else
+ {
+ dstCandidates = genRegMask(node->gtRegNum);
+ }
+
+ internalIntCount = 0;
+ internalFloatCount = 0;
+ isLocalDefUse = false;
+ isHelperCallWithKills = false;
+ isLsraAdded = false;
+ definesAnyRegisters = false;
+
+ setDstCandidates(lsra, dstCandidates);
+ srcCandsIndex = dstCandsIndex;
+
+ setInternalCandidates(lsra, lsra->allRegs(TYP_INT));
+
+ loc = location;
+#ifdef DEBUG
+ isInitialized = true;
+#endif
+
+ assert(IsValid(lsra));
+}
+
+regMaskTP TreeNodeInfo::getSrcCandidates(LinearScan* lsra)
+{
+ return lsra->GetRegMaskForIndex(srcCandsIndex);
+}
+
+void TreeNodeInfo::setSrcCandidates(LinearScan* lsra, regMaskTP mask)
+{
+ LinearScan::RegMaskIndex i = lsra->GetIndexForRegMask(mask);
+ assert(FitsIn<unsigned char>(i));
+ srcCandsIndex = (unsigned char)i;
+}
+
+regMaskTP TreeNodeInfo::getDstCandidates(LinearScan* lsra)
+{
+ return lsra->GetRegMaskForIndex(dstCandsIndex);
+}
+
+void TreeNodeInfo::setDstCandidates(LinearScan* lsra, regMaskTP mask)
+{
+ LinearScan::RegMaskIndex i = lsra->GetIndexForRegMask(mask);
+ assert(FitsIn<unsigned char>(i));
+ dstCandsIndex = (unsigned char)i;
+}
+
+regMaskTP TreeNodeInfo::getInternalCandidates(LinearScan* lsra)
+{
+ return lsra->GetRegMaskForIndex(internalCandsIndex);
+}
+
+void TreeNodeInfo::setInternalCandidates(LinearScan* lsra, regMaskTP mask)
+{
+ LinearScan::RegMaskIndex i = lsra->GetIndexForRegMask(mask);
+ assert(FitsIn<unsigned char>(i));
+ internalCandsIndex = (unsigned char)i;
+}
+
+void TreeNodeInfo::addInternalCandidates(LinearScan* lsra, regMaskTP mask)
+{
+ LinearScan::RegMaskIndex i = lsra->GetIndexForRegMask(lsra->GetRegMaskForIndex(internalCandsIndex) | mask);
+ assert(FitsIn<unsigned char>(i));
+ internalCandsIndex = (unsigned char)i;
+}
+
+#ifdef DEBUG
+void dumpRegMask(regMaskTP regs)
+{
+ if (regs == RBM_ALLINT)
+ {
+ printf("[allInt]");
+ }
+ else if (regs == (RBM_ALLINT & ~RBM_FPBASE))
+ {
+ printf("[allIntButFP]");
+ }
+ else if (regs == RBM_ALLFLOAT)
+ {
+ printf("[allFloat]");
+ }
+ else if (regs == RBM_ALLDOUBLE)
+ {
+ printf("[allDouble]");
+ }
+ else
+ {
+ dspRegMask(regs);
+ }
+}
+
+static const char* getRefTypeName(RefType refType)
+{
+ switch (refType)
+ {
+#define DEF_REFTYPE(memberName, memberValue, shortName) \
+ case memberName: \
+ return #memberName;
+#include "lsra_reftypes.h"
+#undef DEF_REFTYPE
+ default:
+ return nullptr;
+ }
+}
+
+static const char* getRefTypeShortName(RefType refType)
+{
+ switch (refType)
+ {
+#define DEF_REFTYPE(memberName, memberValue, shortName) \
+ case memberName: \
+ return shortName;
+#include "lsra_reftypes.h"
+#undef DEF_REFTYPE
+ default:
+ return nullptr;
+ }
+}
+
+void RefPosition::dump()
+{
+ printf("<RefPosition #%-3u @%-3u", rpNum, nodeLocation);
+
+ if (nextRefPosition)
+ {
+ printf(" ->#%-3u", nextRefPosition->rpNum);
+ }
+
+ printf(" %s ", getRefTypeName(refType));
+
+ if (this->isPhysRegRef)
+ {
+ this->getReg()->tinyDump();
+ }
+ else if (getInterval())
+ {
+ this->getInterval()->tinyDump();
+ }
+
+ if (this->treeNode)
+ {
+ printf("%s ", treeNode->OpName(treeNode->OperGet()));
+ }
+ printf("BB%02u ", this->bbNum);
+
+ printf("regmask=");
+ dumpRegMask(registerAssignment);
+
+ if (this->lastUse)
+ {
+ printf(" last");
+ }
+ if (this->reload)
+ {
+ printf(" reload");
+ }
+ if (this->spillAfter)
+ {
+ printf(" spillAfter");
+ }
+ if (this->moveReg)
+ {
+ printf(" move");
+ }
+ if (this->copyReg)
+ {
+ printf(" copy");
+ }
+ if (this->isFixedRegRef)
+ {
+ printf(" fixed");
+ }
+ if (this->isLocalDefUse)
+ {
+ printf(" local");
+ }
+ if (this->delayRegFree)
+ {
+ printf(" delay");
+ }
+ if (this->outOfOrder)
+ {
+ printf(" outOfOrder");
+ }
+ printf(">\n");
+}
+
+void RegRecord::dump()
+{
+ tinyDump();
+}
+
+void Interval::dump()
+{
+ printf("Interval %2u:", intervalIndex);
+
+ if (isLocalVar)
+ {
+ printf(" (V%02u)", varNum);
+ }
+ if (isInternal)
+ {
+ printf(" (INTERNAL)");
+ }
+ if (isSpilled)
+ {
+ printf(" (SPILLED)");
+ }
+ if (isSplit)
+ {
+ printf(" (SPLIT)");
+ }
+ if (isStructField)
+ {
+ printf(" (struct)");
+ }
+ if (isSpecialPutArg)
+ {
+ printf(" (specialPutArg)");
+ }
+ if (isConstant)
+ {
+ printf(" (constant)");
+ }
+
+ printf(" RefPositions {");
+ for (RefPosition* refPosition = this->firstRefPosition; refPosition != nullptr;
+ refPosition = refPosition->nextRefPosition)
+ {
+ printf("#%u@%u", refPosition->rpNum, refPosition->nodeLocation);
+ if (refPosition->nextRefPosition)
+ {
+ printf(" ");
+ }
+ }
+ printf("}");
+
+ // this is not used (yet?)
+ // printf(" SpillOffset %d", this->spillOffset);
+
+ printf(" physReg:%s", getRegName(physReg));
+
+ printf(" Preferences=");
+ dumpRegMask(this->registerPreferences);
+
+ if (relatedInterval)
+ {
+ printf(" RelatedInterval ");
+ relatedInterval->microDump();
+ printf("[%p]", dspPtr(relatedInterval));
+ }
+
+ printf("\n");
+}
+
+// print out very concise representation
+void Interval::tinyDump()
+{
+ printf("<Ivl:%u", intervalIndex);
+ if (isLocalVar)
+ {
+ printf(" V%02u", varNum);
+ }
+ if (isInternal)
+ {
+ printf(" internal");
+ }
+ printf("> ");
+}
+
+// print out extremely concise representation
+void Interval::microDump()
+{
+ char intervalTypeChar = 'I';
+ if (isInternal)
+ {
+ intervalTypeChar = 'T';
+ }
+ else if (isLocalVar)
+ {
+ intervalTypeChar = 'L';
+ }
+
+ printf("<%c%u>", intervalTypeChar, intervalIndex);
+}
+
+void RegRecord::tinyDump()
+{
+ printf("<Reg:%-3s> ", getRegName(regNum));
+}
+
+void TreeNodeInfo::dump(LinearScan* lsra)
+{
+ printf("<TreeNodeInfo @ %2u %d=%d %di %df", loc, dstCount, srcCount, internalIntCount, internalFloatCount);
+ printf(" src=");
+ dumpRegMask(getSrcCandidates(lsra));
+ printf(" int=");
+ dumpRegMask(getInternalCandidates(lsra));
+ printf(" dst=");
+ dumpRegMask(getDstCandidates(lsra));
+ if (isLocalDefUse)
+ {
+ printf(" L");
+ }
+ if (isInitialized)
+ {
+ printf(" I");
+ }
+ if (isHelperCallWithKills)
+ {
+ printf(" H");
+ }
+ if (isLsraAdded)
+ {
+ printf(" A");
+ }
+ if (isDelayFree)
+ {
+ printf(" D");
+ }
+ if (isTgtPref)
+ {
+ printf(" P");
+ }
+ printf(">\n");
+}
+
+void LinearScan::lsraDumpIntervals(const char* msg)
+{
+ Interval* interval;
+
+ printf("\nLinear scan intervals %s:\n", msg);
+ for (auto& interval : intervals)
+ {
+ // only dump something if it has references
+ // if (interval->firstRefPosition)
+ interval.dump();
+ }
+
+ printf("\n");
+}
+
+// Dumps a tree node as a destination or source operand, with the style
+// of dump dependent on the mode
+void LinearScan::lsraGetOperandString(GenTreePtr tree,
+ LsraTupleDumpMode mode,
+ char* operandString,
+ unsigned operandStringLength)
+{
+ const char* lastUseChar = "";
+ if ((tree->gtFlags & GTF_VAR_DEATH) != 0)
+ {
+ lastUseChar = "*";
+ }
+ switch (mode)
+ {
+ case LinearScan::LSRA_DUMP_PRE:
+ _snprintf_s(operandString, operandStringLength, operandStringLength, "t%d%s", tree->gtSeqNum, lastUseChar);
+ break;
+ case LinearScan::LSRA_DUMP_REFPOS:
+ _snprintf_s(operandString, operandStringLength, operandStringLength, "t%d%s", tree->gtSeqNum, lastUseChar);
+ break;
+ case LinearScan::LSRA_DUMP_POST:
+ {
+ Compiler* compiler = JitTls::GetCompiler();
+
+ if (!tree->gtHasReg())
+ {
+ _snprintf_s(operandString, operandStringLength, operandStringLength, "STK%s", lastUseChar);
+ }
+ else
+ {
+ _snprintf_s(operandString, operandStringLength, operandStringLength, "%s%s",
+ getRegName(tree->gtRegNum, useFloatReg(tree->TypeGet())), lastUseChar);
+ }
+ }
+ break;
+ default:
+ printf("ERROR: INVALID TUPLE DUMP MODE\n");
+ break;
+ }
+}
+void LinearScan::lsraDispNode(GenTreePtr tree, LsraTupleDumpMode mode, bool hasDest)
+{
+ Compiler* compiler = JitTls::GetCompiler();
+ const unsigned operandStringLength = 16;
+ char operandString[operandStringLength];
+ const char* emptyDestOperand = " ";
+ char spillChar = ' ';
+
+ if (mode == LinearScan::LSRA_DUMP_POST)
+ {
+ if ((tree->gtFlags & GTF_SPILL) != 0)
+ {
+ spillChar = 'S';
+ }
+ if (!hasDest && tree->gtHasReg())
+ {
+ // This can be true for the "localDefUse" case - defining a reg, but
+ // pushing it on the stack
+ assert(spillChar == ' ');
+ spillChar = '*';
+ hasDest = true;
+ }
+ }
+ printf("%c N%03u. ", spillChar, tree->gtSeqNum);
+
+ LclVarDsc* varDsc = nullptr;
+ unsigned varNum = UINT_MAX;
+ if (tree->IsLocal())
+ {
+ varNum = tree->gtLclVarCommon.gtLclNum;
+ varDsc = &(compiler->lvaTable[varNum]);
+ if (varDsc->lvLRACandidate)
+ {
+ hasDest = false;
+ }
+ }
+ if (hasDest)
+ {
+ if (mode == LinearScan::LSRA_DUMP_POST && tree->gtFlags & GTF_SPILLED)
+ {
+ assert(tree->gtHasReg());
+ }
+ lsraGetOperandString(tree, mode, operandString, operandStringLength);
+ printf("%-15s =", operandString);
+ }
+ else
+ {
+ printf("%-15s ", emptyDestOperand);
+ }
+ if (varDsc != nullptr)
+ {
+ if (varDsc->lvLRACandidate)
+ {
+ if (mode == LSRA_DUMP_REFPOS)
+ {
+ printf(" V%02u(L%d)", varNum, getIntervalForLocalVar(varNum)->intervalIndex);
+ }
+ else
+ {
+ lsraGetOperandString(tree, mode, operandString, operandStringLength);
+ printf(" V%02u(%s)", varNum, operandString);
+ if (mode == LinearScan::LSRA_DUMP_POST && tree->gtFlags & GTF_SPILLED)
+ {
+ printf("R");
+ }
+ }
+ }
+ else
+ {
+ printf(" V%02u MEM", varNum);
+ }
+ }
+ else if (tree->OperIsAssignment())
+ {
+ assert(!tree->gtHasReg());
+ const char* isRev = "";
+ if ((tree->gtFlags & GTF_REVERSE_OPS) != 0)
+ {
+ isRev = "(Rev)";
+ }
+ printf(" asg%s%s ", GenTree::NodeName(tree->OperGet()), isRev);
+ }
+ else
+ {
+ compiler->gtDispNodeName(tree);
+ if ((tree->gtFlags & GTF_REVERSE_OPS) != 0)
+ {
+ printf("(Rev)");
+ }
+ if (tree->OperKind() & GTK_LEAF)
+ {
+ compiler->gtDispLeaf(tree, nullptr);
+ }
+ }
+}
+
+//------------------------------------------------------------------------
+// ComputeOperandDstCount: computes the number of registers defined by a
+// node.
+//
+// For most nodes, this is simple:
+// - Nodes that do not produce values (e.g. stores and other void-typed
+// nodes) and nodes that immediately use the registers they define
+// produce no registers
+// - Nodes that are marked as defining N registers define N registers.
+//
+// For contained nodes, however, things are more complicated: for purposes
+// of bookkeeping, a contained node is treated as producing the transitive
+// closure of the registers produced by its sources.
+//
+// Arguments:
+// operand - The operand for which to compute a register count.
+//
+// Returns:
+// The number of registers defined by `operand`.
+//
+void LinearScan::DumpOperandDefs(
+ GenTree* operand, bool& first, LsraTupleDumpMode mode, char* operandString, const unsigned operandStringLength)
+{
+ assert(operand != nullptr);
+ assert(operandString != nullptr);
+
+ if (ComputeOperandDstCount(operand) == 0)
+ {
+ return;
+ }
+
+ if (operand->gtLsraInfo.dstCount != 0)
+ {
+ // This operand directly produces registers; print it.
+ for (int i = 0; i < operand->gtLsraInfo.dstCount; i++)
+ {
+ if (!first)
+ {
+ printf(",");
+ }
+
+ lsraGetOperandString(operand, mode, operandString, operandStringLength);
+ printf("%s", operandString);
+
+ first = false;
+ }
+ }
+ else
+ {
+ // This is a contained node. Dump the defs produced by its operands.
+ for (GenTree* op : operand->Operands())
+ {
+ DumpOperandDefs(op, first, mode, operandString, operandStringLength);
+ }
+ }
+}
+
+void LinearScan::TupleStyleDump(LsraTupleDumpMode mode)
+{
+ BasicBlock* block;
+ LsraLocation currentLoc = 1; // 0 is the entry
+ const unsigned operandStringLength = 16;
+ char operandString[operandStringLength];
+
+ // currentRefPosition is not used for LSRA_DUMP_PRE
+ // We keep separate iterators for defs, so that we can print them
+ // on the lhs of the dump
+ auto currentRefPosition = refPositions.begin();
+
+ switch (mode)
+ {
+ case LSRA_DUMP_PRE:
+ printf("TUPLE STYLE DUMP BEFORE LSRA\n");
+ break;
+ case LSRA_DUMP_REFPOS:
+ printf("TUPLE STYLE DUMP WITH REF POSITIONS\n");
+ break;
+ case LSRA_DUMP_POST:
+ printf("TUPLE STYLE DUMP WITH REGISTER ASSIGNMENTS\n");
+ break;
+ default:
+ printf("ERROR: INVALID TUPLE DUMP MODE\n");
+ return;
+ }
+
+ if (mode != LSRA_DUMP_PRE)
+ {
+ printf("Incoming Parameters: ");
+ for (; currentRefPosition != refPositions.end() && currentRefPosition->refType != RefTypeBB;
+ ++currentRefPosition)
+ {
+ Interval* interval = currentRefPosition->getInterval();
+ assert(interval != nullptr && interval->isLocalVar);
+ printf(" V%02d", interval->varNum);
+ if (mode == LSRA_DUMP_POST)
+ {
+ regNumber reg;
+ if (currentRefPosition->registerAssignment == RBM_NONE)
+ {
+ reg = REG_STK;
+ }
+ else
+ {
+ reg = currentRefPosition->assignedReg();
+ }
+ LclVarDsc* varDsc = &(compiler->lvaTable[interval->varNum]);
+ printf("(");
+ regNumber assignedReg = varDsc->lvRegNum;
+ regNumber argReg = (varDsc->lvIsRegArg) ? varDsc->lvArgReg : REG_STK;
+
+ assert(reg == assignedReg || varDsc->lvRegister == false);
+ if (reg != argReg)
+ {
+ printf(getRegName(argReg, isFloatRegType(interval->registerType)));
+ printf("=>");
+ }
+ printf("%s)", getRegName(reg, isFloatRegType(interval->registerType)));
+ }
+ }
+ printf("\n");
+ }
+
+ for (block = startBlockSequence(); block != nullptr; block = moveToNextBlock())
+ {
+ currentLoc += 2;
+
+ if (mode == LSRA_DUMP_REFPOS)
+ {
+ bool printedBlockHeader = false;
+ // We should find the boundary RefPositions in the order of exposed uses, dummy defs, and the blocks
+ for (; currentRefPosition != refPositions.end() &&
+ (currentRefPosition->refType == RefTypeExpUse || currentRefPosition->refType == RefTypeDummyDef ||
+ (currentRefPosition->refType == RefTypeBB && !printedBlockHeader));
+ ++currentRefPosition)
+ {
+ Interval* interval = nullptr;
+ if (currentRefPosition->isIntervalRef())
+ {
+ interval = currentRefPosition->getInterval();
+ }
+ switch (currentRefPosition->refType)
+ {
+ case RefTypeExpUse:
+ assert(interval != nullptr);
+ assert(interval->isLocalVar);
+ printf(" Exposed use of V%02u at #%d\n", interval->varNum, currentRefPosition->rpNum);
+ break;
+ case RefTypeDummyDef:
+ assert(interval != nullptr);
+ assert(interval->isLocalVar);
+ printf(" Dummy def of V%02u at #%d\n", interval->varNum, currentRefPosition->rpNum);
+ break;
+ case RefTypeBB:
+ block->dspBlockHeader(compiler);
+ printedBlockHeader = true;
+ printf("=====\n");
+ break;
+ default:
+ printf("Unexpected RefPosition type at #%d\n", currentRefPosition->rpNum);
+ break;
+ }
+ }
+ }
+ else
+ {
+ block->dspBlockHeader(compiler);
+ printf("=====\n");
+ }
+ if (mode == LSRA_DUMP_POST && block != compiler->fgFirstBB && block->bbNum <= bbNumMaxBeforeResolution)
+ {
+ printf("Predecessor for variable locations: BB%02u\n", blockInfo[block->bbNum].predBBNum);
+ dumpInVarToRegMap(block);
+ }
+ if (block->bbNum > bbNumMaxBeforeResolution)
+ {
+ SplitEdgeInfo splitEdgeInfo;
+ splitBBNumToTargetBBNumMap->Lookup(block->bbNum, &splitEdgeInfo);
+ assert(splitEdgeInfo.toBBNum <= bbNumMaxBeforeResolution);
+ assert(splitEdgeInfo.fromBBNum <= bbNumMaxBeforeResolution);
+ printf("New block introduced for resolution from BB%02u to BB%02u\n", splitEdgeInfo.fromBBNum,
+ splitEdgeInfo.toBBNum);
+ }
+
+ for (GenTree* node : LIR::AsRange(block).NonPhiNodes())
+ {
+ GenTree* tree = node;
+
+ genTreeOps oper = tree->OperGet();
+ TreeNodeInfo& info = tree->gtLsraInfo;
+ if (tree->gtLsraInfo.isLsraAdded)
+ {
+ // This must be one of the nodes that we add during LSRA
+
+ if (oper == GT_LCL_VAR)
+ {
+ info.srcCount = 0;
+ info.dstCount = 1;
+ }
+ else if (oper == GT_RELOAD || oper == GT_COPY)
+ {
+ info.srcCount = 1;
+ info.dstCount = 1;
+ }
+#ifdef FEATURE_SIMD
+ else if (oper == GT_SIMD)
+ {
+ if (tree->gtSIMD.gtSIMDIntrinsicID == SIMDIntrinsicUpperSave)
+ {
+ info.srcCount = 1;
+ info.dstCount = 1;
+ }
+ else
+ {
+ assert(tree->gtSIMD.gtSIMDIntrinsicID == SIMDIntrinsicUpperRestore);
+ info.srcCount = 2;
+ info.dstCount = 0;
+ }
+ }
+#endif // FEATURE_SIMD
+ else
+ {
+ assert(oper == GT_SWAP);
+ info.srcCount = 2;
+ info.dstCount = 0;
+ }
+ info.internalIntCount = 0;
+ info.internalFloatCount = 0;
+ }
+
+ int consume = info.srcCount;
+ int produce = info.dstCount;
+ regMaskTP killMask = RBM_NONE;
+ regMaskTP fixedMask = RBM_NONE;
+
+ lsraDispNode(tree, mode, produce != 0 && mode != LSRA_DUMP_REFPOS);
+
+ if (mode != LSRA_DUMP_REFPOS)
+ {
+ if (consume > 0)
+ {
+ printf("; ");
+
+ bool first = true;
+ for (GenTree* operand : tree->Operands())
+ {
+ DumpOperandDefs(operand, first, mode, operandString, operandStringLength);
+ }
+ }
+ }
+ else
+ {
+ // Print each RefPosition on a new line, but
+ // printing all the kills for each node on a single line
+ // and combining the fixed regs with their associated def or use
+ bool killPrinted = false;
+ RefPosition* lastFixedRegRefPos = nullptr;
+ for (; currentRefPosition != refPositions.end() &&
+ (currentRefPosition->refType == RefTypeUse || currentRefPosition->refType == RefTypeFixedReg ||
+ currentRefPosition->refType == RefTypeKill || currentRefPosition->refType == RefTypeDef) &&
+ (currentRefPosition->nodeLocation == tree->gtSeqNum ||
+ currentRefPosition->nodeLocation == tree->gtSeqNum + 1);
+ ++currentRefPosition)
+ {
+ Interval* interval = nullptr;
+ if (currentRefPosition->isIntervalRef())
+ {
+ interval = currentRefPosition->getInterval();
+ }
+ switch (currentRefPosition->refType)
+ {
+ case RefTypeUse:
+ if (currentRefPosition->isPhysRegRef)
+ {
+ printf("\n Use:R%d(#%d)",
+ currentRefPosition->getReg()->regNum, currentRefPosition->rpNum);
+ }
+ else
+ {
+ assert(interval != nullptr);
+ printf("\n Use:");
+ interval->microDump();
+ printf("(#%d)", currentRefPosition->rpNum);
+ if (currentRefPosition->isFixedRegRef)
+ {
+ assert(genMaxOneBit(currentRefPosition->registerAssignment));
+ assert(lastFixedRegRefPos != nullptr);
+ printf(" Fixed:%s(#%d)", getRegName(currentRefPosition->assignedReg(),
+ isFloatRegType(interval->registerType)),
+ lastFixedRegRefPos->rpNum);
+ lastFixedRegRefPos = nullptr;
+ }
+ if (currentRefPosition->isLocalDefUse)
+ {
+ printf(" LocalDefUse");
+ }
+ if (currentRefPosition->lastUse)
+ {
+ printf(" *");
+ }
+ }
+ break;
+ case RefTypeDef:
+ {
+ // Print each def on a new line
+ assert(interval != nullptr);
+ printf("\n Def:");
+ interval->microDump();
+ printf("(#%d)", currentRefPosition->rpNum);
+ if (currentRefPosition->isFixedRegRef)
+ {
+ assert(genMaxOneBit(currentRefPosition->registerAssignment));
+ printf(" %s", getRegName(currentRefPosition->assignedReg(),
+ isFloatRegType(interval->registerType)));
+ }
+ if (currentRefPosition->isLocalDefUse)
+ {
+ printf(" LocalDefUse");
+ }
+ if (currentRefPosition->lastUse)
+ {
+ printf(" *");
+ }
+ if (interval->relatedInterval != nullptr)
+ {
+ printf(" Pref:");
+ interval->relatedInterval->microDump();
+ }
+ }
+ break;
+ case RefTypeKill:
+ if (!killPrinted)
+ {
+ printf("\n Kill: ");
+ killPrinted = true;
+ }
+ printf(getRegName(currentRefPosition->assignedReg(),
+ isFloatRegType(currentRefPosition->getReg()->registerType)));
+ printf(" ");
+ break;
+ case RefTypeFixedReg:
+ lastFixedRegRefPos = currentRefPosition;
+ break;
+ default:
+ printf("Unexpected RefPosition type at #%d\n", currentRefPosition->rpNum);
+ break;
+ }
+ }
+ }
+ printf("\n");
+ if (info.internalIntCount != 0 && mode != LSRA_DUMP_REFPOS)
+ {
+ printf("\tinternal (%d):\t", info.internalIntCount);
+ if (mode == LSRA_DUMP_POST)
+ {
+ dumpRegMask(tree->gtRsvdRegs);
+ }
+ else if ((info.getInternalCandidates(this) & allRegs(TYP_INT)) != allRegs(TYP_INT))
+ {
+ dumpRegMask(info.getInternalCandidates(this) & allRegs(TYP_INT));
+ }
+ printf("\n");
+ }
+ if (info.internalFloatCount != 0 && mode != LSRA_DUMP_REFPOS)
+ {
+ printf("\tinternal (%d):\t", info.internalFloatCount);
+ if (mode == LSRA_DUMP_POST)
+ {
+ dumpRegMask(tree->gtRsvdRegs);
+ }
+ else if ((info.getInternalCandidates(this) & allRegs(TYP_INT)) != allRegs(TYP_INT))
+ {
+ dumpRegMask(info.getInternalCandidates(this) & allRegs(TYP_INT));
+ }
+ printf("\n");
+ }
+ }
+ if (mode == LSRA_DUMP_POST)
+ {
+ dumpOutVarToRegMap(block);
+ }
+ printf("\n");
+ }
+ printf("\n\n");
+}
+
+void LinearScan::dumpLsraAllocationEvent(LsraDumpEvent event,
+ Interval* interval,
+ regNumber reg,
+ BasicBlock* currentBlock)
+{
+ if (!(VERBOSE))
+ {
+ return;
+ }
+ switch (event)
+ {
+ // Conflicting def/use
+ case LSRA_EVENT_DEFUSE_CONFLICT:
+ if (!dumpTerse)
+ {
+ printf(" Def and Use have conflicting register requirements:");
+ }
+ else
+ {
+ printf("DUconflict ");
+ dumpRegRecords();
+ }
+ break;
+ case LSRA_EVENT_DEFUSE_FIXED_DELAY_USE:
+ if (!dumpTerse)
+ {
+ printf(" Can't change useAssignment ");
+ }
+ break;
+ case LSRA_EVENT_DEFUSE_CASE1:
+ if (!dumpTerse)
+ {
+ printf(" case #1, use the defRegAssignment\n");
+ }
+ else
+ {
+ printf(indentFormat, " case #1 use defRegAssignment");
+ dumpRegRecords();
+ dumpEmptyRefPosition();
+ }
+ break;
+ case LSRA_EVENT_DEFUSE_CASE2:
+ if (!dumpTerse)
+ {
+ printf(" case #2, use the useRegAssignment\n");
+ }
+ else
+ {
+ printf(indentFormat, " case #2 use useRegAssignment");
+ dumpRegRecords();
+ dumpEmptyRefPosition();
+ }
+ break;
+ case LSRA_EVENT_DEFUSE_CASE3:
+ if (!dumpTerse)
+ {
+ printf(" case #3, change the defRegAssignment to the use regs\n");
+ }
+ else
+ {
+ printf(indentFormat, " case #3 use useRegAssignment");
+ dumpRegRecords();
+ dumpEmptyRefPosition();
+ }
+ break;
+ case LSRA_EVENT_DEFUSE_CASE4:
+ if (!dumpTerse)
+ {
+ printf(" case #4, change the useRegAssignment to the def regs\n");
+ }
+ else
+ {
+ printf(indentFormat, " case #4 use defRegAssignment");
+ dumpRegRecords();
+ dumpEmptyRefPosition();
+ }
+ break;
+ case LSRA_EVENT_DEFUSE_CASE5:
+ if (!dumpTerse)
+ {
+ printf(" case #5, Conflicting Def and Use single-register requirements require copies - set def to all "
+ "regs of the appropriate type\n");
+ }
+ else
+ {
+ printf(indentFormat, " case #5 set def to all regs");
+ dumpRegRecords();
+ dumpEmptyRefPosition();
+ }
+ break;
+ case LSRA_EVENT_DEFUSE_CASE6:
+ if (!dumpTerse)
+ {
+ printf(" case #6, Conflicting Def and Use register requirements require a copy\n");
+ }
+ else
+ {
+ printf(indentFormat, " case #6 need a copy");
+ dumpRegRecords();
+ dumpEmptyRefPosition();
+ }
+ break;
+
+ case LSRA_EVENT_SPILL:
+ if (!dumpTerse)
+ {
+ printf("Spilled:\n");
+ interval->dump();
+ }
+ else
+ {
+ assert(interval != nullptr && interval->assignedReg != nullptr);
+ printf("Spill %-4s ", getRegName(interval->assignedReg->regNum));
+ dumpRegRecords();
+ dumpEmptyRefPosition();
+ }
+ break;
+ case LSRA_EVENT_SPILL_EXTENDED_LIFETIME:
+ if (!dumpTerse)
+ {
+ printf(" Spilled extended lifetime var V%02u at last use; not marked for actual spill.",
+ interval->intervalIndex);
+ }
+ break;
+
+ // Restoring the previous register
+ case LSRA_EVENT_RESTORE_PREVIOUS_INTERVAL_AFTER_SPILL:
+ assert(interval != nullptr);
+ if (!dumpTerse)
+ {
+ printf(" Assign register %s to previous interval Ivl:%d after spill\n", getRegName(reg),
+ interval->intervalIndex);
+ }
+ else
+ {
+ // If we spilled, then the dump is already pre-indented, but we need to pre-indent for the subsequent
+ // allocation
+ // with a dumpEmptyRefPosition().
+ printf("SRstr %-4s ", getRegName(reg));
+ dumpRegRecords();
+ dumpEmptyRefPosition();
+ }
+ break;
+ case LSRA_EVENT_RESTORE_PREVIOUS_INTERVAL:
+ assert(interval != nullptr);
+ if (!dumpTerse)
+ {
+ printf(" Assign register %s to previous interval Ivl:%d\n", getRegName(reg), interval->intervalIndex);
+ }
+ else
+ {
+ if (activeRefPosition == nullptr)
+ {
+ printf(emptyRefPositionFormat, "");
+ }
+ printf("Restr %-4s ", getRegName(reg));
+ dumpRegRecords();
+ if (activeRefPosition != nullptr)
+ {
+ printf(emptyRefPositionFormat, "");
+ }
+ }
+ break;
+
+ // Done with GC Kills
+ case LSRA_EVENT_DONE_KILL_GC_REFS:
+ printf("DoneKillGC ");
+ break;
+
+ // Block boundaries
+ case LSRA_EVENT_START_BB:
+ assert(currentBlock != nullptr);
+ if (!dumpTerse)
+ {
+ printf("\n\n Live Vars(Regs) at start of BB%02u (from pred BB%02u):", currentBlock->bbNum,
+ blockInfo[currentBlock->bbNum].predBBNum);
+ dumpVarToRegMap(inVarToRegMaps[currentBlock->bbNum]);
+ }
+ break;
+ case LSRA_EVENT_END_BB:
+ if (!dumpTerse)
+ {
+ printf("\n\n Live Vars(Regs) after BB%02u:", currentBlock->bbNum);
+ dumpVarToRegMap(outVarToRegMaps[currentBlock->bbNum]);
+ }
+ break;
+
+ case LSRA_EVENT_FREE_REGS:
+ if (!dumpTerse)
+ {
+ printf("Freeing registers:\n");
+ }
+ break;
+
+ // Characteristics of the current RefPosition
+ case LSRA_EVENT_INCREMENT_RANGE_END:
+ if (!dumpTerse)
+ {
+ printf(" Incrementing nextPhysRegLocation for %s\n", getRegName(reg));
+ }
+ // else ???
+ break;
+ case LSRA_EVENT_LAST_USE:
+ if (!dumpTerse)
+ {
+ printf(" Last use, marked to be freed\n");
+ }
+ break;
+ case LSRA_EVENT_LAST_USE_DELAYED:
+ if (!dumpTerse)
+ {
+ printf(" Last use, marked to be freed (delayed)\n");
+ }
+ break;
+ case LSRA_EVENT_NEEDS_NEW_REG:
+ if (!dumpTerse)
+ {
+ printf(" Needs new register; mark %s to be freed\n", getRegName(reg));
+ }
+ else
+ {
+ printf("Free %-4s ", getRegName(reg));
+ dumpRegRecords();
+ dumpEmptyRefPosition();
+ }
+ break;
+
+ // Allocation decisions
+ case LSRA_EVENT_FIXED_REG:
+ case LSRA_EVENT_EXP_USE:
+ if (!dumpTerse)
+ {
+ printf("No allocation\n");
+ }
+ else
+ {
+ printf("Keep %-4s ", getRegName(reg));
+ }
+ break;
+ case LSRA_EVENT_ZERO_REF:
+ assert(interval != nullptr && interval->isLocalVar);
+ if (!dumpTerse)
+ {
+ printf("Marking V%02u as last use there are no actual references\n", interval->varNum);
+ }
+ else
+ {
+ printf("NoRef ");
+ dumpRegRecords();
+ dumpEmptyRefPosition();
+ }
+ break;
+ case LSRA_EVENT_KEPT_ALLOCATION:
+ if (!dumpTerse)
+ {
+ printf("already allocated %4s\n", getRegName(reg));
+ }
+ else
+ {
+ printf("Keep %-4s ", getRegName(reg));
+ }
+ break;
+ case LSRA_EVENT_COPY_REG:
+ assert(interval != nullptr && interval->recentRefPosition != nullptr);
+ if (!dumpTerse)
+ {
+ printf("allocated %s as copyReg\n\n", getRegName(reg));
+ }
+ else
+ {
+ printf("Copy %-4s ", getRegName(reg));
+ }
+ break;
+ case LSRA_EVENT_MOVE_REG:
+ assert(interval != nullptr && interval->recentRefPosition != nullptr);
+ if (!dumpTerse)
+ {
+ printf(" needs a new register; marked as moveReg\n");
+ }
+ else
+ {
+ printf("Move %-4s ", getRegName(reg));
+ dumpRegRecords();
+ dumpEmptyRefPosition();
+ }
+ break;
+ case LSRA_EVENT_ALLOC_REG:
+ if (!dumpTerse)
+ {
+ printf("allocated %s\n", getRegName(reg));
+ }
+ else
+ {
+ printf("Alloc %-4s ", getRegName(reg));
+ }
+ break;
+ case LSRA_EVENT_REUSE_REG:
+ if (!dumpTerse)
+ {
+ printf("reused constant in %s\n", getRegName(reg));
+ }
+ else
+ {
+ printf("Reuse %-4s ", getRegName(reg));
+ }
+ break;
+ case LSRA_EVENT_ALLOC_SPILLED_REG:
+ if (!dumpTerse)
+ {
+ printf("allocated spilled register %s\n", getRegName(reg));
+ }
+ else
+ {
+ printf("Steal %-4s ", getRegName(reg));
+ }
+ break;
+ case LSRA_EVENT_NO_ENTRY_REG_ALLOCATED:
+ assert(interval != nullptr && interval->isLocalVar);
+ if (!dumpTerse)
+ {
+ printf("Not allocating an entry register for V%02u due to low ref count\n", interval->varNum);
+ }
+ else
+ {
+ printf("LoRef ");
+ }
+ break;
+ case LSRA_EVENT_NO_REG_ALLOCATED:
+ if (!dumpTerse)
+ {
+ printf("no register allocated\n");
+ }
+ else
+ {
+ printf("NoReg ");
+ }
+ break;
+ case LSRA_EVENT_RELOAD:
+ if (!dumpTerse)
+ {
+ printf(" Marked for reload\n");
+ }
+ else
+ {
+ printf("ReLod %-4s ", getRegName(reg));
+ dumpRegRecords();
+ dumpEmptyRefPosition();
+ }
+ break;
+ case LSRA_EVENT_SPECIAL_PUTARG:
+ if (!dumpTerse)
+ {
+ printf(" Special case of putArg - using lclVar that's in the expected reg\n");
+ }
+ else
+ {
+ printf("PtArg %-4s ", getRegName(reg));
+ }
+ break;
+ default:
+ break;
+ }
+}
+
+//------------------------------------------------------------------------
+// dumpRegRecordHeader: Dump the header for a column-based dump of the register state.
+//
+// Arguments:
+// None.
+//
+// Return Value:
+// None.
+//
+// Assumptions:
+// Reg names fit in 4 characters (minimum width of the columns)
+//
+// Notes:
+// In order to make the table as dense as possible (for ease of reading the dumps),
+// we determine the minimum regColumnWidth width required to represent:
+// regs, by name (e.g. eax or xmm0) - this is fixed at 4 characters.
+// intervals, as Vnn for lclVar intervals, or as I<num> for other intervals.
+// The table is indented by the amount needed for dumpRefPositionShort, which is
+// captured in shortRefPositionDumpWidth.
+//
+void LinearScan::dumpRegRecordHeader()
+{
+ printf("The following table has one or more rows for each RefPosition that is handled during allocation.\n"
+ "The first column provides the basic information about the RefPosition, with its type (e.g. Def,\n"
+ "Use, Fixd) followed by a '*' if it is a last use, and a 'D' if it is delayRegFree, and then the\n"
+ "action taken during allocation (e.g. Alloc a new register, or Keep an existing one).\n"
+ "The subsequent columns show the Interval occupying each register, if any, followed by 'a' if it is\n"
+ "active, and 'i'if it is inactive. Columns are only printed up to the last modifed register, which\n"
+ "may increase during allocation, in which case additional columns will appear. Registers which are\n"
+ "not marked modified have ---- in their column.\n\n");
+
+ // First, determine the width of each register column (which holds a reg name in the
+ // header, and an interval name in each subsequent row).
+ int intervalNumberWidth = (int)log10((double)intervals.size()) + 1;
+ // The regColumnWidth includes the identifying character (I or V) and an 'i' or 'a' (inactive or active)
+ regColumnWidth = intervalNumberWidth + 2;
+ if (regColumnWidth < 4)
+ {
+ regColumnWidth = 4;
+ }
+ sprintf_s(intervalNameFormat, MAX_FORMAT_CHARS, "%%c%%-%dd", regColumnWidth - 2);
+ sprintf_s(regNameFormat, MAX_FORMAT_CHARS, "%%-%ds", regColumnWidth);
+
+ // Next, determine the width of the short RefPosition (see dumpRefPositionShort()).
+ // This is in the form:
+ // nnn.#mmm NAME TYPEld
+ // Where:
+ // nnn is the Location, right-justified to the width needed for the highest location.
+ // mmm is the RefPosition rpNum, left-justified to the width needed for the highest rpNum.
+ // NAME is dumped by dumpReferentName(), and is "regColumnWidth".
+ // TYPE is RefTypeNameShort, and is 4 characters
+ // l is either '*' (if a last use) or ' ' (otherwise)
+ // d is either 'D' (if a delayed use) or ' ' (otherwise)
+
+ maxNodeLocation = (maxNodeLocation == 0)
+ ? 1
+ : maxNodeLocation; // corner case of a method with an infinite loop without any gentree nodes
+ assert(maxNodeLocation >= 1);
+ assert(refPositions.size() >= 1);
+ int nodeLocationWidth = (int)log10((double)maxNodeLocation) + 1;
+ int refPositionWidth = (int)log10((double)refPositions.size()) + 1;
+ int refTypeInfoWidth = 4 /*TYPE*/ + 2 /* last-use and delayed */ + 1 /* space */;
+ int locationAndRPNumWidth = nodeLocationWidth + 2 /* .# */ + refPositionWidth + 1 /* space */;
+ int shortRefPositionDumpWidth = locationAndRPNumWidth + regColumnWidth + 1 /* space */ + refTypeInfoWidth;
+ sprintf_s(shortRefPositionFormat, MAX_FORMAT_CHARS, "%%%dd.#%%-%dd ", nodeLocationWidth, refPositionWidth);
+ sprintf_s(emptyRefPositionFormat, MAX_FORMAT_CHARS, "%%-%ds", shortRefPositionDumpWidth);
+
+ // The width of the "allocation info"
+ // - a 5-character allocation decision
+ // - a space
+ // - a 4-character register
+ // - a space
+ int allocationInfoWidth = 5 + 1 + 4 + 1;
+
+ // Next, determine the width of the legend for each row. This includes:
+ // - a short RefPosition dump (shortRefPositionDumpWidth), which includes a space
+ // - the allocation info (allocationInfoWidth), which also includes a space
+
+ regTableIndent = shortRefPositionDumpWidth + allocationInfoWidth;
+
+ // BBnn printed left-justified in the NAME Typeld and allocationInfo space.
+ int bbDumpWidth = regColumnWidth + 1 + refTypeInfoWidth + allocationInfoWidth;
+ int bbNumWidth = (int)log10((double)compiler->fgBBNumMax) + 1;
+ // In the unlikely event that BB numbers overflow the space, we'll simply omit the predBB
+ int predBBNumDumpSpace = regTableIndent - locationAndRPNumWidth - bbNumWidth - 9; // 'BB' + ' PredBB'
+ if (predBBNumDumpSpace < bbNumWidth)
+ {
+ sprintf_s(bbRefPosFormat, MAX_LEGEND_FORMAT_CHARS, "BB%%-%dd", shortRefPositionDumpWidth - 2);
+ }
+ else
+ {
+ sprintf_s(bbRefPosFormat, MAX_LEGEND_FORMAT_CHARS, "BB%%-%dd PredBB%%-%dd", bbNumWidth, predBBNumDumpSpace);
+ }
+
+ if (compiler->shouldDumpASCIITrees())
+ {
+ columnSeparator = "|";
+ line = "-";
+ leftBox = "+";
+ middleBox = "+";
+ rightBox = "+";
+ }
+ else
+ {
+ columnSeparator = "\xe2\x94\x82";
+ line = "\xe2\x94\x80";
+ leftBox = "\xe2\x94\x9c";
+ middleBox = "\xe2\x94\xbc";
+ rightBox = "\xe2\x94\xa4";
+ }
+ sprintf_s(indentFormat, MAX_FORMAT_CHARS, "%%-%ds", regTableIndent);
+
+ // Now, set up the legend format for the RefPosition info
+ sprintf_s(legendFormat, MAX_LEGEND_FORMAT_CHARS, "%%-%d.%ds%%-%d.%ds%%-%ds%%s", nodeLocationWidth + 1,
+ nodeLocationWidth + 1, refPositionWidth + 2, refPositionWidth + 2, regColumnWidth + 1);
+
+ // Finally, print a "title row" including the legend and the reg names
+ dumpRegRecordTitle();
+}
+
+int LinearScan::getLastUsedRegNumIndex()
+{
+ int lastUsedRegNumIndex = 0;
+ regMaskTP usedRegsMask = compiler->codeGen->regSet.rsGetModifiedRegsMask();
+ int lastRegNumIndex = compiler->compFloatingPointUsed ? REG_FP_LAST : REG_INT_LAST;
+ for (int regNumIndex = 0; regNumIndex <= lastRegNumIndex; regNumIndex++)
+ {
+ if ((usedRegsMask & genRegMask((regNumber)regNumIndex)) != 0)
+ {
+ lastUsedRegNumIndex = regNumIndex;
+ }
+ }
+ return lastUsedRegNumIndex;
+}
+
+void LinearScan::dumpRegRecordTitleLines()
+{
+ for (int i = 0; i < regTableIndent; i++)
+ {
+ printf("%s", line);
+ }
+ int lastUsedRegNumIndex = getLastUsedRegNumIndex();
+ for (int regNumIndex = 0; regNumIndex <= lastUsedRegNumIndex; regNumIndex++)
+ {
+ printf("%s", middleBox);
+ for (int i = 0; i < regColumnWidth; i++)
+ {
+ printf("%s", line);
+ }
+ }
+ printf("%s\n", rightBox);
+}
+void LinearScan::dumpRegRecordTitle()
+{
+ dumpRegRecordTitleLines();
+
+ // Print out the legend for the RefPosition info
+ printf(legendFormat, "Loc ", "RP# ", "Name ", "Type Action Reg ");
+
+ // Print out the register name column headers
+ char columnFormatArray[MAX_FORMAT_CHARS];
+ sprintf_s(columnFormatArray, MAX_FORMAT_CHARS, "%s%%-%d.%ds", columnSeparator, regColumnWidth, regColumnWidth);
+ int lastUsedRegNumIndex = getLastUsedRegNumIndex();
+ for (int regNumIndex = 0; regNumIndex <= lastUsedRegNumIndex; regNumIndex++)
+ {
+ regNumber regNum = (regNumber)regNumIndex;
+ const char* regName = getRegName(regNum);
+ printf(columnFormatArray, regName);
+ }
+ printf("%s\n", columnSeparator);
+
+ rowCountSinceLastTitle = 0;
+
+ dumpRegRecordTitleLines();
+}
+
+void LinearScan::dumpRegRecords()
+{
+ static char columnFormatArray[18];
+ int lastUsedRegNumIndex = getLastUsedRegNumIndex();
+ regMaskTP usedRegsMask = compiler->codeGen->regSet.rsGetModifiedRegsMask();
+
+ for (int regNumIndex = 0; regNumIndex <= lastUsedRegNumIndex; regNumIndex++)
+ {
+ printf("%s", columnSeparator);
+ RegRecord& regRecord = physRegs[regNumIndex];
+ Interval* interval = regRecord.assignedInterval;
+ if (interval != nullptr)
+ {
+ dumpIntervalName(interval);
+ char activeChar = interval->isActive ? 'a' : 'i';
+ printf("%c", activeChar);
+ }
+ else if (regRecord.isBusyUntilNextKill)
+ {
+ printf(columnFormatArray, "Busy");
+ }
+ else if ((usedRegsMask & genRegMask((regNumber)regNumIndex)) == 0)
+ {
+ sprintf_s(columnFormatArray, MAX_FORMAT_CHARS, "%%-%ds", regColumnWidth);
+ printf(columnFormatArray, "----");
+ }
+ else
+ {
+ sprintf_s(columnFormatArray, MAX_FORMAT_CHARS, "%%-%ds", regColumnWidth);
+ printf(columnFormatArray, "");
+ }
+ }
+ printf("%s\n", columnSeparator);
+
+ if (rowCountSinceLastTitle > MAX_ROWS_BETWEEN_TITLES)
+ {
+ dumpRegRecordTitle();
+ }
+ rowCountSinceLastTitle++;
+}
+
+void LinearScan::dumpIntervalName(Interval* interval)
+{
+ char intervalChar;
+ if (interval->isLocalVar)
+ {
+ intervalChar = 'V';
+ }
+ else if (interval->isConstant)
+ {
+ intervalChar = 'C';
+ }
+ else
+ {
+ intervalChar = 'I';
+ }
+ printf(intervalNameFormat, intervalChar, interval->intervalIndex);
+}
+
+void LinearScan::dumpEmptyRefPosition()
+{
+ printf(emptyRefPositionFormat, "");
+}
+
+// Note that the size of this dump is computed in dumpRegRecordHeader().
+//
+void LinearScan::dumpRefPositionShort(RefPosition* refPosition, BasicBlock* currentBlock)
+{
+ BasicBlock* block = currentBlock;
+ if (refPosition->refType == RefTypeBB)
+ {
+ // Always print a title row before a RefTypeBB (except for the first, because we
+ // will already have printed it before the parameters)
+ if (refPosition->refType == RefTypeBB && block != compiler->fgFirstBB && block != nullptr)
+ {
+ dumpRegRecordTitle();
+ }
+ }
+ printf(shortRefPositionFormat, refPosition->nodeLocation, refPosition->rpNum);
+ if (refPosition->refType == RefTypeBB)
+ {
+ if (block == nullptr)
+ {
+ printf(regNameFormat, "END");
+ printf(" ");
+ printf(regNameFormat, "");
+ }
+ else
+ {
+ printf(bbRefPosFormat, block->bbNum, block == compiler->fgFirstBB ? 0 : blockInfo[block->bbNum].predBBNum);
+ }
+ }
+ else if (refPosition->isIntervalRef())
+ {
+ Interval* interval = refPosition->getInterval();
+ dumpIntervalName(interval);
+ char lastUseChar = ' ';
+ char delayChar = ' ';
+ if (refPosition->lastUse)
+ {
+ lastUseChar = '*';
+ if (refPosition->delayRegFree)
+ {
+ delayChar = 'D';
+ }
+ }
+ printf(" %s%c%c ", getRefTypeShortName(refPosition->refType), lastUseChar, delayChar);
+ }
+ else if (refPosition->isPhysRegRef)
+ {
+ RegRecord* regRecord = refPosition->getReg();
+ printf(regNameFormat, getRegName(regRecord->regNum));
+ printf(" %s ", getRefTypeShortName(refPosition->refType));
+ }
+ else
+ {
+ assert(refPosition->refType == RefTypeKillGCRefs);
+ // There's no interval or reg name associated with this.
+ printf(regNameFormat, " ");
+ printf(" %s ", getRefTypeShortName(refPosition->refType));
+ }
+}
+
+//------------------------------------------------------------------------
+// LinearScan::IsResolutionMove:
+// Returns true if the given node is a move inserted by LSRA
+// resolution.
+//
+// Arguments:
+// node - the node to check.
+//
+bool LinearScan::IsResolutionMove(GenTree* node)
+{
+ if (!node->gtLsraInfo.isLsraAdded)
+ {
+ return false;
+ }
+
+ switch (node->OperGet())
+ {
+ case GT_LCL_VAR:
+ case GT_COPY:
+ return node->gtLsraInfo.isLocalDefUse;
+
+ case GT_SWAP:
+ return true;
+
+ default:
+ return false;
+ }
+}
+
+//------------------------------------------------------------------------
+// LinearScan::IsResolutionNode:
+// Returns true if the given node is either a move inserted by LSRA
+// resolution or an operand to such a move.
+//
+// Arguments:
+// containingRange - the range that contains the node to check.
+// node - the node to check.
+//
+bool LinearScan::IsResolutionNode(LIR::Range& containingRange, GenTree* node)
+{
+ for (;;)
+ {
+ if (IsResolutionMove(node))
+ {
+ return true;
+ }
+
+ if (!node->gtLsraInfo.isLsraAdded || (node->OperGet() != GT_LCL_VAR))
+ {
+ return false;
+ }
+
+ LIR::Use use;
+ bool foundUse = containingRange.TryGetUse(node, &use);
+ assert(foundUse);
+
+ node = use.User();
+ }
+}
+
+//------------------------------------------------------------------------
+// verifyFinalAllocation: Traverse the RefPositions and verify various invariants.
+//
+// Arguments:
+// None.
+//
+// Return Value:
+// None.
+//
+// Notes:
+// If verbose is set, this will also dump a table of the final allocations.
+void LinearScan::verifyFinalAllocation()
+{
+ if (VERBOSE)
+ {
+ printf("\nFinal allocation\n");
+ }
+
+ // Clear register assignments.
+ for (regNumber reg = REG_FIRST; reg < ACTUAL_REG_COUNT; reg = REG_NEXT(reg))
+ {
+ RegRecord* physRegRecord = getRegisterRecord(reg);
+ physRegRecord->assignedInterval = nullptr;
+ }
+
+ for (auto& interval : intervals)
+ {
+ interval.assignedReg = nullptr;
+ interval.physReg = REG_NA;
+ }
+
+ DBEXEC(VERBOSE, dumpRegRecordTitle());
+
+ BasicBlock* currentBlock = nullptr;
+ GenTree* firstBlockEndResolutionNode = nullptr;
+ regMaskTP regsToFree = RBM_NONE;
+ regMaskTP delayRegsToFree = RBM_NONE;
+ LsraLocation currentLocation = MinLocation;
+ for (auto& refPosition : refPositions)
+ {
+ RefPosition* currentRefPosition = &refPosition;
+ Interval* interval = nullptr;
+ RegRecord* regRecord = nullptr;
+ regNumber regNum = REG_NA;
+ if (currentRefPosition->refType == RefTypeBB)
+ {
+ regsToFree |= delayRegsToFree;
+ delayRegsToFree = RBM_NONE;
+ // For BB RefPositions, wait until we dump the "end of block" info before dumping the basic RefPosition
+ // info.
+ }
+ else
+ {
+ // For other RefPosition types, we can dump the basic RefPosition info now.
+ DBEXEC(VERBOSE, dumpRefPositionShort(currentRefPosition, currentBlock));
+
+ if (currentRefPosition->isPhysRegRef)
+ {
+ regRecord = currentRefPosition->getReg();
+ regRecord->recentRefPosition = currentRefPosition;
+ regNum = regRecord->regNum;
+ }
+ else if (currentRefPosition->isIntervalRef())
+ {
+ interval = currentRefPosition->getInterval();
+ interval->recentRefPosition = currentRefPosition;
+ if (currentRefPosition->registerAssignment != RBM_NONE)
+ {
+ if (!genMaxOneBit(currentRefPosition->registerAssignment))
+ {
+ assert(currentRefPosition->refType == RefTypeExpUse ||
+ currentRefPosition->refType == RefTypeDummyDef);
+ }
+ else
+ {
+ regNum = currentRefPosition->assignedReg();
+ regRecord = getRegisterRecord(regNum);
+ }
+ }
+ }
+ }
+
+ LsraLocation newLocation = currentRefPosition->nodeLocation;
+
+ if (newLocation > currentLocation)
+ {
+ // Free Registers.
+ // We could use the freeRegisters() method, but we'd have to carefully manage the active intervals.
+ for (regNumber reg = REG_FIRST; reg < ACTUAL_REG_COUNT; reg = REG_NEXT(reg))
+ {
+ regMaskTP regMask = genRegMask(reg);
+ if ((regsToFree & regMask) != RBM_NONE)
+ {
+ RegRecord* physRegRecord = getRegisterRecord(reg);
+ physRegRecord->assignedInterval = nullptr;
+ }
+ }
+ regsToFree = delayRegsToFree;
+ regsToFree = RBM_NONE;
+ }
+ currentLocation = newLocation;
+
+ switch (currentRefPosition->refType)
+ {
+ case RefTypeBB:
+ {
+ if (currentBlock == nullptr)
+ {
+ currentBlock = startBlockSequence();
+ }
+ else
+ {
+ // Verify the resolution moves at the end of the previous block.
+ for (GenTree* node = firstBlockEndResolutionNode; node != nullptr; node = node->gtNext)
+ {
+ // Only verify nodes that are actually moves; don't bother with the nodes that are
+ // operands to moves.
+ if (IsResolutionMove(node))
+ {
+ verifyResolutionMove(node, currentLocation);
+ }
+ }
+
+ // Validate the locations at the end of the previous block.
+ VarToRegMap outVarToRegMap = outVarToRegMaps[currentBlock->bbNum];
+ VARSET_ITER_INIT(compiler, iter, currentBlock->bbLiveOut, varIndex);
+ while (iter.NextElem(compiler, &varIndex))
+ {
+ unsigned varNum = compiler->lvaTrackedToVarNum[varIndex];
+ regNumber regNum = getVarReg(outVarToRegMap, varNum);
+ interval = getIntervalForLocalVar(varNum);
+ assert(interval->physReg == regNum || (interval->physReg == REG_NA && regNum == REG_STK));
+ interval->physReg = REG_NA;
+ interval->assignedReg = nullptr;
+ interval->isActive = false;
+ }
+
+ // Clear register assignments.
+ for (regNumber reg = REG_FIRST; reg < ACTUAL_REG_COUNT; reg = REG_NEXT(reg))
+ {
+ RegRecord* physRegRecord = getRegisterRecord(reg);
+ physRegRecord->assignedInterval = nullptr;
+ }
+
+ // Now, record the locations at the beginning of this block.
+ currentBlock = moveToNextBlock();
+ }
+
+ if (currentBlock != nullptr)
+ {
+ VarToRegMap inVarToRegMap = inVarToRegMaps[currentBlock->bbNum];
+ VARSET_ITER_INIT(compiler, iter, currentBlock->bbLiveIn, varIndex);
+ while (iter.NextElem(compiler, &varIndex))
+ {
+ unsigned varNum = compiler->lvaTrackedToVarNum[varIndex];
+ regNumber regNum = getVarReg(inVarToRegMap, varNum);
+ interval = getIntervalForLocalVar(varNum);
+ interval->physReg = regNum;
+ interval->assignedReg = &(physRegs[regNum]);
+ interval->isActive = true;
+ physRegs[regNum].assignedInterval = interval;
+ }
+
+ if (VERBOSE)
+ {
+ dumpRefPositionShort(currentRefPosition, currentBlock);
+ dumpRegRecords();
+ }
+
+ // Finally, handle the resolution moves, if any, at the beginning of the next block.
+ firstBlockEndResolutionNode = nullptr;
+ bool foundNonResolutionNode = false;
+
+ LIR::Range& currentBlockRange = LIR::AsRange(currentBlock);
+ for (GenTree* node : currentBlockRange.NonPhiNodes())
+ {
+ if (IsResolutionNode(currentBlockRange, node))
+ {
+ if (foundNonResolutionNode)
+ {
+ firstBlockEndResolutionNode = node;
+ break;
+ }
+ else if (IsResolutionMove(node))
+ {
+ // Only verify nodes that are actually moves; don't bother with the nodes that are
+ // operands to moves.
+ verifyResolutionMove(node, currentLocation);
+ }
+ }
+ else
+ {
+ foundNonResolutionNode = true;
+ }
+ }
+ }
+ }
+
+ break;
+
+ case RefTypeKill:
+ assert(regRecord != nullptr);
+ assert(regRecord->assignedInterval == nullptr);
+ dumpLsraAllocationEvent(LSRA_EVENT_KEPT_ALLOCATION, nullptr, regRecord->regNum, currentBlock);
+ break;
+ case RefTypeFixedReg:
+ assert(regRecord != nullptr);
+ dumpLsraAllocationEvent(LSRA_EVENT_KEPT_ALLOCATION, nullptr, regRecord->regNum, currentBlock);
+ break;
+
+ case RefTypeUpperVectorSaveDef:
+ case RefTypeUpperVectorSaveUse:
+ case RefTypeDef:
+ case RefTypeUse:
+ case RefTypeParamDef:
+ case RefTypeZeroInit:
+ assert(interval != nullptr);
+
+ if (interval->isSpecialPutArg)
+ {
+ dumpLsraAllocationEvent(LSRA_EVENT_SPECIAL_PUTARG, interval, regNum);
+ break;
+ }
+ if (currentRefPosition->reload)
+ {
+ interval->isActive = true;
+ assert(regNum != REG_NA);
+ interval->physReg = regNum;
+ interval->assignedReg = regRecord;
+ regRecord->assignedInterval = interval;
+ dumpLsraAllocationEvent(LSRA_EVENT_RELOAD, nullptr, regRecord->regNum, currentBlock);
+ }
+ if (regNum == REG_NA)
+ {
+ dumpLsraAllocationEvent(LSRA_EVENT_NO_REG_ALLOCATED, interval);
+ }
+ else if (RefTypeIsDef(currentRefPosition->refType))
+ {
+ interval->isActive = true;
+ if (VERBOSE)
+ {
+ if (interval->isConstant && (currentRefPosition->treeNode != nullptr) &&
+ currentRefPosition->treeNode->IsReuseRegVal())
+ {
+ dumpLsraAllocationEvent(LSRA_EVENT_REUSE_REG, nullptr, regRecord->regNum, currentBlock);
+ }
+ else
+ {
+ dumpLsraAllocationEvent(LSRA_EVENT_ALLOC_REG, nullptr, regRecord->regNum, currentBlock);
+ }
+ }
+ }
+ else if (currentRefPosition->copyReg)
+ {
+ dumpLsraAllocationEvent(LSRA_EVENT_COPY_REG, interval, regRecord->regNum, currentBlock);
+ }
+ else if (currentRefPosition->moveReg)
+ {
+ assert(interval->assignedReg != nullptr);
+ interval->assignedReg->assignedInterval = nullptr;
+ interval->physReg = regNum;
+ interval->assignedReg = regRecord;
+ regRecord->assignedInterval = interval;
+ if (VERBOSE)
+ {
+ printf("Move %-4s ", getRegName(regRecord->regNum));
+ }
+ }
+ else
+ {
+ dumpLsraAllocationEvent(LSRA_EVENT_KEPT_ALLOCATION, nullptr, regRecord->regNum, currentBlock);
+ }
+ if (currentRefPosition->lastUse || currentRefPosition->spillAfter)
+ {
+ interval->isActive = false;
+ }
+ if (regNum != REG_NA)
+ {
+ if (currentRefPosition->spillAfter)
+ {
+ if (VERBOSE)
+ {
+ dumpRegRecords();
+ dumpEmptyRefPosition();
+ printf("Spill %-4s ", getRegName(regNum));
+ }
+ }
+ else if (currentRefPosition->copyReg)
+ {
+ regRecord->assignedInterval = interval;
+ }
+ else
+ {
+ interval->physReg = regNum;
+ interval->assignedReg = regRecord;
+ regRecord->assignedInterval = interval;
+ }
+ }
+ break;
+ case RefTypeKillGCRefs:
+ // No action to take.
+ // However, we will assert that, at resolution time, no registers contain GC refs.
+ {
+ DBEXEC(VERBOSE, printf(" "));
+ regMaskTP candidateRegs = currentRefPosition->registerAssignment;
+ while (candidateRegs != RBM_NONE)
+ {
+ regMaskTP nextRegBit = genFindLowestBit(candidateRegs);
+ candidateRegs &= ~nextRegBit;
+ regNumber nextReg = genRegNumFromMask(nextRegBit);
+ RegRecord* regRecord = getRegisterRecord(nextReg);
+ Interval* assignedInterval = regRecord->assignedInterval;
+ assert(assignedInterval == nullptr || !varTypeIsGC(assignedInterval->registerType));
+ }
+ }
+ break;
+
+ case RefTypeExpUse:
+ case RefTypeDummyDef:
+ // Do nothing; these will be handled by the RefTypeBB.
+ DBEXEC(VERBOSE, printf(" "));
+ break;
+
+ case RefTypeInvalid:
+ // for these 'currentRefPosition->refType' values, No action to take
+ break;
+ }
+
+ if (currentRefPosition->refType != RefTypeBB)
+ {
+ DBEXEC(VERBOSE, dumpRegRecords());
+ if (interval != nullptr)
+ {
+ if (currentRefPosition->copyReg)
+ {
+ assert(interval->physReg != regNum);
+ regRecord->assignedInterval = nullptr;
+ assert(interval->assignedReg != nullptr);
+ regRecord = interval->assignedReg;
+ }
+ if (currentRefPosition->spillAfter || currentRefPosition->lastUse)
+ {
+ interval->physReg = REG_NA;
+ interval->assignedReg = nullptr;
+
+ // regRegcord could be null if RefPosition is to be allocated a
+ // reg only if profitable.
+ if (regRecord != nullptr)
+ {
+ regRecord->assignedInterval = nullptr;
+ }
+ else
+ {
+ assert(currentRefPosition->AllocateIfProfitable());
+ }
+ }
+ }
+ }
+ }
+
+ // Now, verify the resolution blocks.
+ // Currently these are nearly always at the end of the method, but that may not alwyas be the case.
+ // So, we'll go through all the BBs looking for blocks whose bbNum is greater than bbNumMaxBeforeResolution.
+ for (BasicBlock* currentBlock = compiler->fgFirstBB; currentBlock != nullptr; currentBlock = currentBlock->bbNext)
+ {
+ if (currentBlock->bbNum > bbNumMaxBeforeResolution)
+ {
+ if (VERBOSE)
+ {
+ dumpRegRecordTitle();
+ printf(shortRefPositionFormat, 0, 0);
+ assert(currentBlock->bbPreds != nullptr && currentBlock->bbPreds->flBlock != nullptr);
+ printf(bbRefPosFormat, currentBlock->bbNum, currentBlock->bbPreds->flBlock->bbNum);
+ dumpRegRecords();
+ }
+
+ // Clear register assignments.
+ for (regNumber reg = REG_FIRST; reg < ACTUAL_REG_COUNT; reg = REG_NEXT(reg))
+ {
+ RegRecord* physRegRecord = getRegisterRecord(reg);
+ physRegRecord->assignedInterval = nullptr;
+ }
+
+ // Set the incoming register assignments
+ VarToRegMap inVarToRegMap = getInVarToRegMap(currentBlock->bbNum);
+ VARSET_ITER_INIT(compiler, iter, currentBlock->bbLiveIn, varIndex);
+ while (iter.NextElem(compiler, &varIndex))
+ {
+ unsigned varNum = compiler->lvaTrackedToVarNum[varIndex];
+ regNumber regNum = getVarReg(inVarToRegMap, varNum);
+ Interval* interval = getIntervalForLocalVar(varNum);
+ interval->physReg = regNum;
+ interval->assignedReg = &(physRegs[regNum]);
+ interval->isActive = true;
+ physRegs[regNum].assignedInterval = interval;
+ }
+
+ // Verify the moves in this block
+ LIR::Range& currentBlockRange = LIR::AsRange(currentBlock);
+ for (GenTree* node : currentBlockRange.NonPhiNodes())
+ {
+ assert(IsResolutionNode(currentBlockRange, node));
+ if (IsResolutionMove(node))
+ {
+ // Only verify nodes that are actually moves; don't bother with the nodes that are
+ // operands to moves.
+ verifyResolutionMove(node, currentLocation);
+ }
+ }
+
+ // Verify the outgoing register assignments
+ {
+ VarToRegMap outVarToRegMap = getOutVarToRegMap(currentBlock->bbNum);
+ VARSET_ITER_INIT(compiler, iter, currentBlock->bbLiveOut, varIndex);
+ while (iter.NextElem(compiler, &varIndex))
+ {
+ unsigned varNum = compiler->lvaTrackedToVarNum[varIndex];
+ regNumber regNum = getVarReg(outVarToRegMap, varNum);
+ Interval* interval = getIntervalForLocalVar(varNum);
+ assert(interval->physReg == regNum || (interval->physReg == REG_NA && regNum == REG_STK));
+ interval->physReg = REG_NA;
+ interval->assignedReg = nullptr;
+ interval->isActive = false;
+ }
+ }
+ }
+ }
+
+ DBEXEC(VERBOSE, printf("\n"));
+}
+
+//------------------------------------------------------------------------
+// verifyResolutionMove: Verify a resolution statement. Called by verifyFinalAllocation()
+//
+// Arguments:
+// resolutionMove - A GenTree* that must be a resolution move.
+// currentLocation - The LsraLocation of the most recent RefPosition that has been verified.
+//
+// Return Value:
+// None.
+//
+// Notes:
+// If verbose is set, this will also dump the moves into the table of final allocations.
+void LinearScan::verifyResolutionMove(GenTree* resolutionMove, LsraLocation currentLocation)
+{
+ GenTree* dst = resolutionMove;
+ assert(IsResolutionMove(dst));
+
+ if (dst->OperGet() == GT_SWAP)
+ {
+ GenTreeLclVarCommon* left = dst->gtGetOp1()->AsLclVarCommon();
+ GenTreeLclVarCommon* right = dst->gtGetOp2()->AsLclVarCommon();
+ regNumber leftRegNum = left->gtRegNum;
+ regNumber rightRegNum = right->gtRegNum;
+ Interval* leftInterval = getIntervalForLocalVar(left->gtLclNum);
+ Interval* rightInterval = getIntervalForLocalVar(right->gtLclNum);
+ assert(leftInterval->physReg == leftRegNum && rightInterval->physReg == rightRegNum);
+ leftInterval->physReg = rightRegNum;
+ rightInterval->physReg = leftRegNum;
+ physRegs[rightRegNum].assignedInterval = leftInterval;
+ physRegs[leftRegNum].assignedInterval = rightInterval;
+ if (VERBOSE)
+ {
+ printf(shortRefPositionFormat, currentLocation, 0);
+ dumpIntervalName(leftInterval);
+ printf(" Swap ");
+ printf(" %-4s ", getRegName(rightRegNum));
+ dumpRegRecords();
+ printf(shortRefPositionFormat, currentLocation, 0);
+ dumpIntervalName(rightInterval);
+ printf(" \" ");
+ printf(" %-4s ", getRegName(leftRegNum));
+ dumpRegRecords();
+ }
+ return;
+ }
+ regNumber dstRegNum = dst->gtRegNum;
+ regNumber srcRegNum;
+ GenTreeLclVarCommon* lcl;
+ if (dst->OperGet() == GT_COPY)
+ {
+ lcl = dst->gtGetOp1()->AsLclVarCommon();
+ srcRegNum = lcl->gtRegNum;
+ }
+ else
+ {
+ lcl = dst->AsLclVarCommon();
+ if ((lcl->gtFlags & GTF_SPILLED) != 0)
+ {
+ srcRegNum = REG_STK;
+ }
+ else
+ {
+ assert((lcl->gtFlags & GTF_SPILL) != 0);
+ srcRegNum = dstRegNum;
+ dstRegNum = REG_STK;
+ }
+ }
+ Interval* interval = getIntervalForLocalVar(lcl->gtLclNum);
+ assert(interval->physReg == srcRegNum || (srcRegNum == REG_STK && interval->physReg == REG_NA));
+ if (srcRegNum != REG_STK)
+ {
+ physRegs[srcRegNum].assignedInterval = nullptr;
+ }
+ if (dstRegNum != REG_STK)
+ {
+ interval->physReg = dstRegNum;
+ interval->assignedReg = &(physRegs[dstRegNum]);
+ physRegs[dstRegNum].assignedInterval = interval;
+ interval->isActive = true;
+ }
+ else
+ {
+ interval->physReg = REG_NA;
+ interval->assignedReg = nullptr;
+ interval->isActive = false;
+ }
+ if (VERBOSE)
+ {
+ printf(shortRefPositionFormat, currentLocation, 0);
+ dumpIntervalName(interval);
+ printf(" Move ");
+ printf(" %-4s ", getRegName(dstRegNum));
+ dumpRegRecords();
+ }
+}
+#endif // DEBUG
+
+#endif // !LEGACY_BACKEND
diff --git a/src/jit/lsra.h b/src/jit/lsra.h
new file mode 100644
index 0000000000..a3c41fe1e3
--- /dev/null
+++ b/src/jit/lsra.h
@@ -0,0 +1,1608 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+/*****************************************************************************/
+
+#ifndef _LSRA_H_
+#define _LSRA_H_
+
+#include "arraylist.h"
+#include "smallhash.h"
+#include "nodeinfo.h"
+
+// Minor and forward-reference types
+class Interval;
+class RefPosition;
+class LinearScan;
+class RegRecord;
+
+template <class T>
+class ArrayStack;
+
+// LsraLocation tracks the linearized order of the nodes.
+// Each node is assigned two LsraLocations - one for all the uses and all but the last
+// def, and a second location for the last def (if any)
+
+typedef unsigned int LsraLocation;
+const unsigned int MinLocation = 0;
+const unsigned int MaxLocation = UINT_MAX;
+// max number of registers an operation could require internally (in addition to uses and defs)
+const unsigned int MaxInternalRegisters = 8;
+const unsigned int RegisterTypeCount = 2;
+
+typedef var_types RegisterType;
+#define IntRegisterType TYP_INT
+#define FloatRegisterType TYP_FLOAT
+
+inline regMaskTP calleeSaveRegs(RegisterType rt)
+{
+ return varTypeIsIntegralOrI(rt) ? RBM_INT_CALLEE_SAVED : RBM_FLT_CALLEE_SAVED;
+}
+
+struct LocationInfo
+{
+ LsraLocation loc;
+
+ // Reg Index in case of multi-reg result producing call node.
+ // Indicates the position of the register that this location refers to.
+ // The max bits needed is based on max value of MAX_RET_REG_COUNT value
+ // across all targets and that happens 4 on on Arm. Hence index value
+ // would be 0..MAX_RET_REG_COUNT-1.
+ unsigned multiRegIdx : 2;
+
+ Interval* interval;
+ GenTree* treeNode;
+
+ LocationInfo(LsraLocation l, Interval* i, GenTree* t, unsigned regIdx = 0)
+ : loc(l), multiRegIdx(regIdx), interval(i), treeNode(t)
+ {
+ assert(multiRegIdx == regIdx);
+ }
+
+ // default constructor for data structures
+ LocationInfo()
+ {
+ }
+};
+
+struct LsraBlockInfo
+{
+ // bbNum of the predecessor to use for the register location of live-in variables.
+ // 0 for fgFirstBB.
+ BasicBlock::weight_t weight;
+ unsigned int predBBNum;
+ bool hasCriticalInEdge;
+ bool hasCriticalOutEdge;
+};
+
+// This is sort of a bit mask
+// The low order 2 bits will be 1 for defs, and 2 for uses
+enum RefType : unsigned char
+{
+#define DEF_REFTYPE(memberName, memberValue, shortName) memberName = memberValue,
+#include "lsra_reftypes.h"
+#undef DEF_REFTYPE
+};
+
+// position in a block (for resolution)
+enum BlockStartOrEnd
+{
+ BlockPositionStart = 0,
+ BlockPositionEnd = 1,
+ PositionCount = 2
+};
+
+inline bool RefTypeIsUse(RefType refType)
+{
+ return ((refType & RefTypeUse) == RefTypeUse);
+}
+
+inline bool RefTypeIsDef(RefType refType)
+{
+ return ((refType & RefTypeDef) == RefTypeDef);
+}
+
+typedef regNumber* VarToRegMap;
+
+template <typename ElementType, CompMemKind MemKind>
+class ListElementAllocator
+{
+private:
+ template <typename U, CompMemKind CMK>
+ friend class ListElementAllocator;
+
+ Compiler* m_compiler;
+
+public:
+ ListElementAllocator(Compiler* compiler) : m_compiler(compiler)
+ {
+ }
+
+ template <typename U>
+ ListElementAllocator(const ListElementAllocator<U, MemKind>& other) : m_compiler(other.m_compiler)
+ {
+ }
+
+ ElementType* allocate(size_t count)
+ {
+ return reinterpret_cast<ElementType*>(m_compiler->compGetMem(sizeof(ElementType) * count, MemKind));
+ }
+
+ void deallocate(ElementType* pointer, size_t count)
+ {
+ }
+
+ template <typename U>
+ struct rebind
+ {
+ typedef ListElementAllocator<U, MemKind> allocator;
+ };
+};
+
+typedef ListElementAllocator<Interval, CMK_LSRA_Interval> LinearScanMemoryAllocatorInterval;
+typedef ListElementAllocator<RefPosition, CMK_LSRA_RefPosition> LinearScanMemoryAllocatorRefPosition;
+
+typedef jitstd::list<Interval, LinearScanMemoryAllocatorInterval> IntervalList;
+typedef jitstd::list<RefPosition, LinearScanMemoryAllocatorRefPosition> RefPositionList;
+
+class Referenceable
+{
+public:
+ Referenceable()
+ {
+ firstRefPosition = nullptr;
+ recentRefPosition = nullptr;
+ lastRefPosition = nullptr;
+ isActive = false;
+ }
+
+ // A linked list of RefPositions. These are only traversed in the forward
+ // direction, and are not moved, so they don't need to be doubly linked
+ // (see RefPosition).
+
+ RefPosition* firstRefPosition;
+ RefPosition* recentRefPosition;
+ RefPosition* lastRefPosition;
+
+ bool isActive;
+
+ // Get the position of the next reference which is at or greater than
+ // the current location (relies upon recentRefPosition being udpated
+ // during traversal).
+ RefPosition* getNextRefPosition();
+ LsraLocation getNextRefLocation();
+};
+
+class RegRecord : public Referenceable
+{
+public:
+ RegRecord()
+ {
+ assignedInterval = nullptr;
+ previousInterval = nullptr;
+ regNum = REG_NA;
+ isCalleeSave = false;
+ registerType = IntRegisterType;
+ isBusyUntilNextKill = false;
+ }
+
+ void init(regNumber reg)
+ {
+#ifdef _TARGET_ARM64_
+ // The Zero register, or the SP
+ if ((reg == REG_ZR) || (reg == REG_SP))
+ {
+ // IsGeneralRegister returns false for REG_ZR and REG_SP
+ regNum = reg;
+ registerType = IntRegisterType;
+ }
+ else
+#endif
+ if (emitter::isFloatReg(reg))
+ {
+ registerType = FloatRegisterType;
+ }
+ else
+ {
+ // The constructor defaults to IntRegisterType
+ assert(emitter::isGeneralRegister(reg) && registerType == IntRegisterType);
+ }
+ regNum = reg;
+ isCalleeSave = ((RBM_CALLEE_SAVED & genRegMask(reg)) != 0);
+ }
+
+#ifdef DEBUG
+ // print out representation
+ void dump();
+ // concise representation for embedding
+ void tinyDump();
+#endif // DEBUG
+
+ bool isFree();
+
+ // RefPosition * getNextRefPosition();
+ // LsraLocation getNextRefLocation();
+
+ // DATA
+
+ // interval to which this register is currently allocated.
+ // If the interval is inactive (isActive == false) then it is not currently live,
+ // and the register call be unassigned (i.e. setting assignedInterval to nullptr)
+ // without spilling the register.
+ Interval* assignedInterval;
+ // Interval to which this register was previously allocated, and which was unassigned
+ // because it was inactive. This register will be reassigned to this Interval when
+ // assignedInterval becomes inactive.
+ Interval* previousInterval;
+
+ regNumber regNum;
+ bool isCalleeSave;
+ RegisterType registerType;
+ // This register must be considered busy until the next time it is explicitly killed.
+ // This is used so that putarg_reg can avoid killing its lclVar source, while avoiding
+ // the problem with the reg becoming free if the last-use is encountered before the call.
+ bool isBusyUntilNextKill;
+
+ bool conflictingFixedRegReference(RefPosition* refPosition);
+};
+
+inline bool leafInRange(GenTree* leaf, int lower, int upper)
+{
+ if (!leaf->IsIntCnsFitsInI32())
+ {
+ return false;
+ }
+ if (leaf->gtIntCon.gtIconVal < lower)
+ {
+ return false;
+ }
+ if (leaf->gtIntCon.gtIconVal > upper)
+ {
+ return false;
+ }
+
+ return true;
+}
+
+inline bool leafInRange(GenTree* leaf, int lower, int upper, int multiple)
+{
+ if (!leafInRange(leaf, lower, upper))
+ {
+ return false;
+ }
+ if (leaf->gtIntCon.gtIconVal % multiple)
+ {
+ return false;
+ }
+
+ return true;
+}
+
+inline bool leafAddInRange(GenTree* leaf, int lower, int upper, int multiple = 1)
+{
+ if (leaf->OperGet() != GT_ADD)
+ {
+ return false;
+ }
+ return leafInRange(leaf->gtOp.gtOp2, lower, upper, multiple);
+}
+
+inline bool isCandidateVar(LclVarDsc* varDsc)
+{
+ return varDsc->lvLRACandidate;
+}
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX LinearScan XX
+XX XX
+XX This is the container for the Linear Scan data structures and methods. XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+// OPTION 1: The algorithm as described in "Optimized Interval Splitting in a
+// Linear Scan Register Allocator". It is driven by iterating over the Interval
+// lists. In this case, we need multiple IntervalLists, and Intervals will be
+// moved between them so they must be easily updated.
+
+// OPTION 2: The algorithm is driven by iterating over the RefPositions. In this
+// case, we only need a single IntervalList, and it won't be updated.
+// The RefPosition must refer to its Interval, and we need to be able to traverse
+// to the next RefPosition in code order
+// THIS IS THE OPTION CURRENTLY BEING PURSUED
+
+class LocationInfoList;
+class LocationInfoListNodePool;
+
+class LinearScan : public LinearScanInterface
+{
+ friend class RefPosition;
+ friend class Interval;
+ friend class Lowering;
+ friend class TreeNodeInfo;
+
+public:
+ // This could use further abstraction. From Compiler we need the tree,
+ // the flowgraph and the allocator.
+ LinearScan(Compiler* theCompiler);
+
+ // This is the main driver
+ virtual void doLinearScan();
+
+ // TreeNodeInfo contains three register masks: src candidates, dst candidates, and internal condidates.
+ // Instead of storing actual register masks, however, which are large, we store a small index into a table
+ // of register masks, stored in this class. We create only as many distinct register masks as are needed.
+ // All identical register masks get the same index. The register mask table contains:
+ // 1. A mask containing all eligible integer registers.
+ // 2. A mask containing all elibible floating-point registers.
+ // 3. A mask for each of single register.
+ // 4. A mask for each combination of registers, created dynamically as required.
+ //
+ // Currently, the maximum number of masks allowed is a constant defined by 'numMasks'. The register mask
+ // table is never resized. It is also limited by the size of the index, currently an unsigned char.
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if defined(_TARGET_ARM64_)
+ static const int numMasks = 128;
+#else
+ static const int numMasks = 64;
+#endif
+
+ regMaskTP* regMaskTable;
+ int nextFreeMask;
+
+ typedef int RegMaskIndex;
+
+ // allint is 0, allfloat is 1, all the single-bit masks start at 2
+ enum KnownRegIndex
+ {
+ ALLINT_IDX = 0,
+ ALLFLOAT_IDX = 1,
+ FIRST_SINGLE_REG_IDX = 2
+ };
+
+ RegMaskIndex GetIndexForRegMask(regMaskTP mask);
+ regMaskTP GetRegMaskForIndex(RegMaskIndex index);
+ void RemoveRegisterFromMasks(regNumber reg);
+
+#ifdef DEBUG
+ void dspRegisterMaskTable();
+#endif // DEBUG
+
+ // Initialize the block traversal for LSRA.
+ // This resets the bbVisitedSet, and on the first invocation sets the blockSequence array,
+ // which determines the order in which blocks will be allocated (currently called during Lowering).
+ BasicBlock* startBlockSequence();
+ // Move to the next block in sequence, updating the current block information.
+ BasicBlock* moveToNextBlock();
+ // Get the next block to be scheduled without changing the current block,
+ // but updating the blockSequence during the first iteration if it is not fully computed.
+ BasicBlock* getNextBlock();
+
+ // This is called during code generation to update the location of variables
+ virtual void recordVarLocationsAtStartOfBB(BasicBlock* bb);
+
+ // This does the dataflow analysis and builds the intervals
+ void buildIntervals();
+
+ // This is where the actual assignment is done
+ void allocateRegisters();
+
+ // This is the resolution phase, where cross-block mismatches are fixed up
+ void resolveRegisters();
+
+ void writeRegisters(RefPosition* currentRefPosition, GenTree* tree);
+
+ // Insert a copy in the case where a tree node value must be moved to a different
+ // register at the point of use, or it is reloaded to a different register
+ // than the one it was spilled from
+ void insertCopyOrReload(BasicBlock* block, GenTreePtr tree, unsigned multiRegIdx, RefPosition* refPosition);
+
+#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
+ // Insert code to save and restore the upper half of a vector that lives
+ // in a callee-save register at the point of a call (the upper half is
+ // not preserved).
+ void insertUpperVectorSaveAndReload(GenTreePtr tree, RefPosition* refPosition, BasicBlock* block);
+#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
+
+ // resolve along one block-block edge
+ enum ResolveType
+ {
+ ResolveSplit,
+ ResolveJoin,
+ ResolveCritical,
+ ResolveSharedCritical,
+ ResolveTypeCount
+ };
+#ifdef DEBUG
+ static const char* resolveTypeName[ResolveTypeCount];
+#endif
+
+ enum WhereToInsert
+ {
+ InsertAtTop,
+ InsertAtBottom
+ };
+
+ void addResolution(
+ BasicBlock* block, GenTreePtr insertionPoint, Interval* interval, regNumber outReg, regNumber inReg);
+
+ void handleOutgoingCriticalEdges(BasicBlock* block);
+
+ void resolveEdge(BasicBlock* fromBlock, BasicBlock* toBlock, ResolveType resolveType, VARSET_VALARG_TP liveSet);
+
+ void resolveEdges();
+
+ // Finally, the register assignments are written back to the tree nodes.
+ void recordRegisterAssignments();
+
+ // Keep track of how many temp locations we'll need for spill
+ void initMaxSpill();
+ void updateMaxSpill(RefPosition* refPosition);
+ void recordMaxSpill();
+
+ // max simultaneous spill locations used of every type
+ unsigned int maxSpill[TYP_COUNT];
+ unsigned int currentSpill[TYP_COUNT];
+ bool needFloatTmpForFPCall;
+ bool needDoubleTmpForFPCall;
+
+#ifdef DEBUG
+private:
+ //------------------------------------------------------------------------
+ // Should we stress lsra?
+ // This uses the same COMPLUS variable as rsStressRegs (COMPlus_JitStressRegs)
+ // However, the possible values and their interpretation are entirely different.
+ //
+ // The mask bits are currently divided into fields in which each non-zero value
+ // is a distinct stress option (e.g. 0x3 is not a combination of 0x1 and 0x2).
+ // However, subject to possible constraints (to be determined), the different
+ // fields can be combined (e.g. 0x7 is a combination of 0x3 and 0x4).
+ // Note that the field values are declared in a public enum, but the actual bits are
+ // only accessed via accessors.
+
+ unsigned lsraStressMask;
+
+ // This controls the registers available for allocation
+ enum LsraStressLimitRegs{LSRA_LIMIT_NONE = 0, LSRA_LIMIT_CALLEE = 0x1, LSRA_LIMIT_CALLER = 0x2,
+ LSRA_LIMIT_SMALL_SET = 0x3, LSRA_LIMIT_MASK = 0x3};
+
+ // When LSRA_LIMIT_SMALL_SET is specified, it is desirable to select a "mixed" set of caller- and callee-save
+ // registers, so as to get different coverage than limiting to callee or caller.
+ // At least for x86 and AMD64, and potentially other architecture that will support SIMD,
+ // we need a minimum of 5 fp regs in order to support the InitN intrinsic for Vector4.
+ // Hence the "SmallFPSet" has 5 elements.
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if defined(_TARGET_AMD64_)
+#ifdef UNIX_AMD64_ABI
+ // On System V the RDI and RSI are not callee saved. Use R12 ans R13 as callee saved registers.
+ static const regMaskTP LsraLimitSmallIntSet =
+ (RBM_EAX | RBM_ECX | RBM_EBX | RBM_ETW_FRAMED_EBP | RBM_R12 | RBM_R13);
+#else // !UNIX_AMD64_ABI
+ // On Windows Amd64 use the RDI and RSI as callee saved registers.
+ static const regMaskTP LsraLimitSmallIntSet =
+ (RBM_EAX | RBM_ECX | RBM_EBX | RBM_ETW_FRAMED_EBP | RBM_ESI | RBM_EDI);
+#endif // !UNIX_AMD64_ABI
+ static const regMaskTP LsraLimitSmallFPSet = (RBM_XMM0 | RBM_XMM1 | RBM_XMM2 | RBM_XMM6 | RBM_XMM7);
+#elif defined(_TARGET_ARM_)
+ static const regMaskTP LsraLimitSmallIntSet = (RBM_R0 | RBM_R1 | RBM_R2 | RBM_R3 | RBM_R4);
+ static const regMaskTP LsraLimitSmallFPSet = (RBM_F0 | RBM_F1 | RBM_F2 | RBM_F16 | RBM_F17);
+#elif defined(_TARGET_ARM64_)
+ static const regMaskTP LsraLimitSmallIntSet = (RBM_R0 | RBM_R1 | RBM_R2 | RBM_R19 | RBM_R20);
+ static const regMaskTP LsraLimitSmallFPSet = (RBM_V0 | RBM_V1 | RBM_V2 | RBM_V8 | RBM_V9);
+#elif defined(_TARGET_X86_)
+ static const regMaskTP LsraLimitSmallIntSet = (RBM_EAX | RBM_ECX | RBM_EDI);
+ static const regMaskTP LsraLimitSmallFPSet = (RBM_XMM0 | RBM_XMM1 | RBM_XMM2 | RBM_XMM6 | RBM_XMM7);
+#else
+#error Unsupported or unset target architecture
+#endif // target
+
+ LsraStressLimitRegs getStressLimitRegs()
+ {
+ return (LsraStressLimitRegs)(lsraStressMask & LSRA_LIMIT_MASK);
+ }
+ regMaskTP stressLimitRegs(RefPosition* refPosition, regMaskTP mask);
+
+ // This controls the heuristics used to select registers
+ // These can be combined.
+ enum LsraSelect{LSRA_SELECT_DEFAULT = 0, LSRA_SELECT_REVERSE_HEURISTICS = 0x04,
+ LSRA_SELECT_REVERSE_CALLER_CALLEE = 0x08, LSRA_SELECT_NEAREST = 0x10, LSRA_SELECT_MASK = 0x1c};
+ LsraSelect getSelectionHeuristics()
+ {
+ return (LsraSelect)(lsraStressMask & LSRA_SELECT_MASK);
+ }
+ bool doReverseSelect()
+ {
+ return ((lsraStressMask & LSRA_SELECT_REVERSE_HEURISTICS) != 0);
+ }
+ bool doReverseCallerCallee()
+ {
+ return ((lsraStressMask & LSRA_SELECT_REVERSE_CALLER_CALLEE) != 0);
+ }
+ bool doSelectNearest()
+ {
+ return ((lsraStressMask & LSRA_SELECT_NEAREST) != 0);
+ }
+
+ // This controls the order in which basic blocks are visited during allocation
+ enum LsraTraversalOrder{LSRA_TRAVERSE_LAYOUT = 0x20, LSRA_TRAVERSE_PRED_FIRST = 0x40,
+ LSRA_TRAVERSE_RANDOM = 0x60, // NYI
+ LSRA_TRAVERSE_DEFAULT = LSRA_TRAVERSE_PRED_FIRST, LSRA_TRAVERSE_MASK = 0x60};
+ LsraTraversalOrder getLsraTraversalOrder()
+ {
+ if ((lsraStressMask & LSRA_TRAVERSE_MASK) == 0)
+ {
+ return LSRA_TRAVERSE_DEFAULT;
+ }
+ return (LsraTraversalOrder)(lsraStressMask & LSRA_TRAVERSE_MASK);
+ }
+ bool isTraversalLayoutOrder()
+ {
+ return getLsraTraversalOrder() == LSRA_TRAVERSE_LAYOUT;
+ }
+ bool isTraversalPredFirstOrder()
+ {
+ return getLsraTraversalOrder() == LSRA_TRAVERSE_PRED_FIRST;
+ }
+
+ // This controls whether lifetimes should be extended to the entire method.
+ // Note that this has no effect under MinOpts
+ enum LsraExtendLifetimes{LSRA_DONT_EXTEND = 0, LSRA_EXTEND_LIFETIMES = 0x80, LSRA_EXTEND_LIFETIMES_MASK = 0x80};
+ LsraExtendLifetimes getLsraExtendLifeTimes()
+ {
+ return (LsraExtendLifetimes)(lsraStressMask & LSRA_EXTEND_LIFETIMES_MASK);
+ }
+ bool extendLifetimes()
+ {
+ return getLsraExtendLifeTimes() == LSRA_EXTEND_LIFETIMES;
+ }
+
+ // This controls whether variables locations should be set to the previous block in layout order
+ // (LSRA_BLOCK_BOUNDARY_LAYOUT), or to that of the highest-weight predecessor (LSRA_BLOCK_BOUNDARY_PRED -
+ // the default), or rotated (LSRA_BLOCK_BOUNDARY_ROTATE).
+ enum LsraBlockBoundaryLocations{LSRA_BLOCK_BOUNDARY_PRED = 0, LSRA_BLOCK_BOUNDARY_LAYOUT = 0x100,
+ LSRA_BLOCK_BOUNDARY_ROTATE = 0x200, LSRA_BLOCK_BOUNDARY_MASK = 0x300};
+ LsraBlockBoundaryLocations getLsraBlockBoundaryLocations()
+ {
+ return (LsraBlockBoundaryLocations)(lsraStressMask & LSRA_BLOCK_BOUNDARY_MASK);
+ }
+ regNumber rotateBlockStartLocation(Interval* interval, regNumber targetReg, regMaskTP availableRegs);
+
+ // This controls whether we always insert a GT_RELOAD instruction after a spill
+ // Note that this can be combined with LsraSpillAlways (or not)
+ enum LsraReload{LSRA_NO_RELOAD_IF_SAME = 0, LSRA_ALWAYS_INSERT_RELOAD = 0x400, LSRA_RELOAD_MASK = 0x400};
+ LsraReload getLsraReload()
+ {
+ return (LsraReload)(lsraStressMask & LSRA_RELOAD_MASK);
+ }
+ bool alwaysInsertReload()
+ {
+ return getLsraReload() == LSRA_ALWAYS_INSERT_RELOAD;
+ }
+
+ // This controls whether we spill everywhere
+ enum LsraSpill{LSRA_DONT_SPILL_ALWAYS = 0, LSRA_SPILL_ALWAYS = 0x800, LSRA_SPILL_MASK = 0x800};
+ LsraSpill getLsraSpill()
+ {
+ return (LsraSpill)(lsraStressMask & LSRA_SPILL_MASK);
+ }
+ bool spillAlways()
+ {
+ return getLsraSpill() == LSRA_SPILL_ALWAYS;
+ }
+
+ // This controls whether RefPositions that lower/codegen indicated as reg optional be
+ // allocated a reg at all.
+ enum LsraRegOptionalControl{LSRA_REG_OPTIONAL_DEFAULT = 0, LSRA_REG_OPTIONAL_NO_ALLOC = 0x1000,
+ LSRA_REG_OPTIONAL_MASK = 0x1000};
+
+ LsraRegOptionalControl getLsraRegOptionalControl()
+ {
+ return (LsraRegOptionalControl)(lsraStressMask & LSRA_REG_OPTIONAL_MASK);
+ }
+
+ bool regOptionalNoAlloc()
+ {
+ return getLsraRegOptionalControl() == LSRA_REG_OPTIONAL_NO_ALLOC;
+ }
+
+ // Dump support
+ void lsraDumpIntervals(const char* msg);
+ void dumpRefPositions(const char* msg);
+ void dumpVarRefPositions(const char* msg);
+
+ static bool IsResolutionMove(GenTree* node);
+ static bool IsResolutionNode(LIR::Range& containingRange, GenTree* node);
+
+ void verifyFinalAllocation();
+ void verifyResolutionMove(GenTree* resolutionNode, LsraLocation currentLocation);
+#else // !DEBUG
+ bool doSelectNearest()
+ {
+ return false;
+ }
+ bool extendLifetimes()
+ {
+ return false;
+ }
+ bool spillAlways()
+ {
+ return false;
+ }
+ // In a retail build we support only the default traversal order
+ bool isTraversalLayoutOrder()
+ {
+ return false;
+ }
+ bool isTraversalPredFirstOrder()
+ {
+ return true;
+ }
+ bool getLsraExtendLifeTimes()
+ {
+ return false;
+ }
+#endif // !DEBUG
+
+public:
+ // Used by Lowering when considering whether to split Longs, as well as by identifyCandidates().
+ bool isRegCandidate(LclVarDsc* varDsc);
+
+private:
+ // Determine which locals are candidates for allocation
+ void identifyCandidates();
+
+ // determine which locals are used in EH constructs we don't want to deal with
+ void identifyCandidatesExceptionDataflow();
+
+ void buildPhysRegRecords();
+
+ void setLastUses(BasicBlock* block);
+
+ void setFrameType();
+
+ // Update allocations at start/end of block
+ void processBlockEndAllocation(BasicBlock* current);
+
+ // Record variable locations at start/end of block
+ void processBlockStartLocations(BasicBlock* current, bool allocationPass);
+ void processBlockEndLocations(BasicBlock* current);
+
+ RefType CheckBlockType(BasicBlock* block, BasicBlock* prevBlock);
+
+ // insert refpositions representing prolog zero-inits which will be added later
+ void insertZeroInitRefPositions();
+
+ void AddMapping(GenTree* node, LsraLocation loc);
+
+ // add physreg refpositions for a tree node, based on calling convention and instruction selection predictions
+ void addRefsForPhysRegMask(regMaskTP mask, LsraLocation currentLoc, RefType refType, bool isLastUse);
+
+ void resolveConflictingDefAndUse(Interval* interval, RefPosition* defRefPosition);
+
+ void buildRefPositionsForNode(GenTree* tree,
+ BasicBlock* block,
+ LocationInfoListNodePool& listNodePool,
+ HashTableBase<GenTree*, LocationInfoList>& operandToLocationInfoMap,
+ LsraLocation loc);
+
+#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
+ VARSET_VALRET_TP buildUpperVectorSaveRefPositions(GenTree* tree, LsraLocation currentLoc);
+ void buildUpperVectorRestoreRefPositions(GenTree* tree, LsraLocation currentLoc, VARSET_VALARG_TP liveLargeVectors);
+#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
+
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ // For AMD64 on SystemV machines. This method
+ // is called as replacement for raUpdateRegStateForArg
+ // that is used on Windows. On System V systems a struct can be passed
+ // partially using registers from the 2 register files.
+ void unixAmd64UpdateRegStateForArg(LclVarDsc* argDsc);
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+
+ // Update reg state for an incoming register argument
+ void updateRegStateForArg(LclVarDsc* argDsc);
+
+ inline void setTreeNodeInfo(GenTree* tree, TreeNodeInfo info)
+ {
+ tree->gtLsraInfo = info;
+ tree->gtClearReg(compiler);
+
+ DBEXEC(VERBOSE, info.dump(this));
+ }
+
+ inline void clearDstCount(GenTree* tree)
+ {
+ tree->gtLsraInfo.dstCount = 0;
+ }
+
+ inline void clearOperandCounts(GenTree* tree)
+ {
+ TreeNodeInfo& info = tree->gtLsraInfo;
+ info.srcCount = 0;
+ info.dstCount = 0;
+ }
+
+ inline bool isLocalDefUse(GenTree* tree)
+ {
+ return tree->gtLsraInfo.isLocalDefUse;
+ }
+
+ inline bool isCandidateLocalRef(GenTree* tree)
+ {
+ if (tree->IsLocal())
+ {
+ unsigned int lclNum = tree->gtLclVarCommon.gtLclNum;
+ assert(lclNum < compiler->lvaCount);
+ LclVarDsc* varDsc = compiler->lvaTable + tree->gtLclVarCommon.gtLclNum;
+
+ return isCandidateVar(varDsc);
+ }
+ return false;
+ }
+
+ static Compiler::fgWalkResult markAddrModeOperandsHelperMD(GenTreePtr tree, void* p);
+
+ // Return the registers killed by the given tree node.
+ regMaskTP getKillSetForNode(GenTree* tree);
+
+ // Given some tree node add refpositions for all the registers this node kills
+ bool buildKillPositionsForNode(GenTree* tree, LsraLocation currentLoc);
+
+ regMaskTP allRegs(RegisterType rt);
+ regMaskTP allRegs(GenTree* tree);
+ regMaskTP allMultiRegCallNodeRegs(GenTreeCall* tree);
+ regMaskTP allSIMDRegs();
+ regMaskTP internalFloatRegCandidates();
+
+ bool registerIsFree(regNumber regNum, RegisterType regType);
+ bool registerIsAvailable(RegRecord* physRegRecord,
+ LsraLocation currentLoc,
+ LsraLocation* nextRefLocationPtr,
+ RegisterType regType);
+ void freeRegister(RegRecord* physRegRecord);
+ void freeRegisters(regMaskTP regsToFree);
+
+ regMaskTP getUseCandidates(GenTree* useNode);
+ regMaskTP getDefCandidates(GenTree* tree);
+ var_types getDefType(GenTree* tree);
+
+ RefPosition* defineNewInternalTemp(GenTree* tree, RegisterType regType, LsraLocation currentLoc, regMaskTP regMask);
+
+ int buildInternalRegisterDefsForNode(GenTree* tree, LsraLocation currentLoc, RefPosition* defs[]);
+
+ void buildInternalRegisterUsesForNode(GenTree* tree, LsraLocation currentLoc, RefPosition* defs[], int total);
+
+ void resolveLocalRef(BasicBlock* block, GenTreePtr treeNode, RefPosition* currentRefPosition);
+
+ void insertMove(BasicBlock* block, GenTreePtr insertionPoint, unsigned lclNum, regNumber inReg, regNumber outReg);
+
+ void insertSwap(BasicBlock* block,
+ GenTreePtr insertionPoint,
+ unsigned lclNum1,
+ regNumber reg1,
+ unsigned lclNum2,
+ regNumber reg2);
+
+public:
+ // TODO-Cleanup: unused?
+ class PhysRegIntervalIterator
+ {
+ public:
+ PhysRegIntervalIterator(LinearScan* theLinearScan)
+ {
+ nextRegNumber = (regNumber)0;
+ linearScan = theLinearScan;
+ }
+ RegRecord* GetNext()
+ {
+ return &linearScan->physRegs[nextRegNumber];
+ }
+
+ private:
+ // This assumes that the physical registers are contiguous, starting
+ // with a register number of 0
+ regNumber nextRegNumber;
+ LinearScan* linearScan;
+ };
+
+private:
+ Interval* newInterval(RegisterType regType);
+
+ Interval* getIntervalForLocalVar(unsigned varNum)
+ {
+ return localVarIntervals[varNum];
+ }
+ RegRecord* getRegisterRecord(regNumber regNum);
+
+ RefPosition* newRefPositionRaw(LsraLocation nodeLocation, GenTree* treeNode, RefType refType);
+
+ RefPosition* newRefPosition(Interval* theInterval,
+ LsraLocation theLocation,
+ RefType theRefType,
+ GenTree* theTreeNode,
+ regMaskTP mask,
+ unsigned multiRegIdx = 0);
+
+ RefPosition* newRefPosition(
+ regNumber reg, LsraLocation theLocation, RefType theRefType, GenTree* theTreeNode, regMaskTP mask);
+
+ void applyCalleeSaveHeuristics(RefPosition* rp);
+
+ void associateRefPosWithInterval(RefPosition* rp);
+
+ void associateRefPosWithRegister(RefPosition* rp);
+
+ unsigned getWeight(RefPosition* refPos);
+
+ /*****************************************************************************
+ * Register management
+ ****************************************************************************/
+ RegisterType getRegisterType(Interval* currentInterval, RefPosition* refPosition);
+ regNumber tryAllocateFreeReg(Interval* current, RefPosition* refPosition);
+ RegRecord* findBestPhysicalReg(RegisterType regType,
+ LsraLocation endLocation,
+ regMaskTP candidates,
+ regMaskTP preferences);
+ regNumber allocateBusyReg(Interval* current, RefPosition* refPosition, bool allocateIfProfitable);
+ regNumber assignCopyReg(RefPosition* refPosition);
+
+ void checkAndAssignInterval(RegRecord* regRec, Interval* interval);
+ void assignPhysReg(RegRecord* regRec, Interval* interval);
+ void assignPhysReg(regNumber reg, Interval* interval)
+ {
+ assignPhysReg(getRegisterRecord(reg), interval);
+ }
+
+ void checkAndClearInterval(RegRecord* regRec, RefPosition* spillRefPosition);
+ void unassignPhysReg(RegRecord* regRec, RefPosition* spillRefPosition);
+ void unassignPhysRegNoSpill(RegRecord* reg);
+ void unassignPhysReg(regNumber reg)
+ {
+ unassignPhysReg(getRegisterRecord(reg), nullptr);
+ }
+
+ void spillInterval(Interval* interval, RefPosition* fromRefPosition, RefPosition* toRefPosition);
+
+ void spillGCRefs(RefPosition* killRefPosition);
+
+ /*****************************************************************************
+ * For Resolution phase
+ ****************************************************************************/
+ // TODO-Throughput: Consider refactoring this so that we keep a map from regs to vars for better scaling
+ unsigned int regMapCount;
+
+ // When we split edges, we create new blocks, and instead of expanding the VarToRegMaps, we
+ // rely on the property that the "in" map is the same as the "from" block of the edge, and the
+ // "out" map is the same as the "to" block of the edge (by construction).
+ // So, for any block whose bbNum is greater than bbNumMaxBeforeResolution, we use the
+ // splitBBNumToTargetBBNumMap.
+ // TODO-Throughput: We may want to look into the cost/benefit tradeoff of doing this vs. expanding
+ // the arrays.
+
+ unsigned bbNumMaxBeforeResolution;
+ struct SplitEdgeInfo
+ {
+ unsigned fromBBNum;
+ unsigned toBBNum;
+ };
+ typedef SimplerHashTable<unsigned, SmallPrimitiveKeyFuncs<unsigned>, SplitEdgeInfo, JitSimplerHashBehavior>
+ SplitBBNumToTargetBBNumMap;
+ SplitBBNumToTargetBBNumMap* splitBBNumToTargetBBNumMap;
+ SplitBBNumToTargetBBNumMap* getSplitBBNumToTargetBBNumMap()
+ {
+ if (splitBBNumToTargetBBNumMap == nullptr)
+ {
+ splitBBNumToTargetBBNumMap =
+ new (getAllocator(compiler)) SplitBBNumToTargetBBNumMap(getAllocator(compiler));
+ }
+ return splitBBNumToTargetBBNumMap;
+ }
+ SplitEdgeInfo getSplitEdgeInfo(unsigned int bbNum);
+
+ void initVarRegMaps();
+ void setInVarRegForBB(unsigned int bbNum, unsigned int varNum, regNumber reg);
+ void setOutVarRegForBB(unsigned int bbNum, unsigned int varNum, regNumber reg);
+ VarToRegMap getInVarToRegMap(unsigned int bbNum);
+ VarToRegMap getOutVarToRegMap(unsigned int bbNum);
+ regNumber getVarReg(VarToRegMap map, unsigned int varNum);
+ // Initialize the incoming VarToRegMap to the given map values (generally a predecessor of
+ // the block)
+ VarToRegMap setInVarToRegMap(unsigned int bbNum, VarToRegMap srcVarToRegMap);
+
+ regNumber getTempRegForResolution(BasicBlock* fromBlock, BasicBlock* toBlock, var_types type);
+
+#ifdef DEBUG
+ void dumpVarToRegMap(VarToRegMap map);
+ void dumpInVarToRegMap(BasicBlock* block);
+ void dumpOutVarToRegMap(BasicBlock* block);
+
+ // There are three points at which a tuple-style dump is produced, and each
+ // differs slightly:
+ // - In LSRA_DUMP_PRE, it does a simple dump of each node, with indications of what
+ // tree nodes are consumed.
+ // - In LSRA_DUMP_REFPOS, which is after the intervals are built, but before
+ // register allocation, each node is dumped, along with all of the RefPositions,
+ // The Intervals are identifed as Lnnn for lclVar intervals, Innn for for other
+ // intervals, and Tnnn for internal temps.
+ // - In LSRA_DUMP_POST, which is after register allocation, the registers are
+ // shown.
+
+ enum LsraTupleDumpMode{LSRA_DUMP_PRE, LSRA_DUMP_REFPOS, LSRA_DUMP_POST};
+ void lsraGetOperandString(GenTreePtr tree,
+ LsraTupleDumpMode mode,
+ char* operandString,
+ unsigned operandStringLength);
+ void lsraDispNode(GenTreePtr tree, LsraTupleDumpMode mode, bool hasDest);
+ void DumpOperandDefs(GenTree* operand,
+ bool& first,
+ LsraTupleDumpMode mode,
+ char* operandString,
+ const unsigned operandStringLength);
+ void TupleStyleDump(LsraTupleDumpMode mode);
+
+ bool dumpTerse;
+ LsraLocation maxNodeLocation;
+
+ // Width of various fields - used to create a streamlined dump during allocation that shows the
+ // state of all the registers in columns.
+ int regColumnWidth;
+ int regTableIndent;
+
+ const char* columnSeparator;
+ const char* line;
+ const char* leftBox;
+ const char* middleBox;
+ const char* rightBox;
+
+ static const int MAX_FORMAT_CHARS = 12;
+ char intervalNameFormat[MAX_FORMAT_CHARS];
+ char regNameFormat[MAX_FORMAT_CHARS];
+ char shortRefPositionFormat[MAX_FORMAT_CHARS];
+ char emptyRefPositionFormat[MAX_FORMAT_CHARS];
+ char indentFormat[MAX_FORMAT_CHARS];
+ static const int MAX_LEGEND_FORMAT_CHARS = 25;
+ char bbRefPosFormat[MAX_LEGEND_FORMAT_CHARS];
+ char legendFormat[MAX_LEGEND_FORMAT_CHARS];
+
+ // How many rows have we printed since last printing a "title row"?
+ static const int MAX_ROWS_BETWEEN_TITLES = 50;
+ int rowCountSinceLastTitle;
+
+ void dumpRegRecordHeader();
+ void dumpRegRecordTitle();
+ void dumpRegRecordTitleLines();
+ int getLastUsedRegNumIndex();
+ void dumpRegRecords();
+ // An abbreviated RefPosition dump for printing with column-based register state
+ void dumpRefPositionShort(RefPosition* refPosition, BasicBlock* currentBlock);
+ // Print the number of spaces occupied by a dumpRefPositionShort()
+ void dumpEmptyRefPosition();
+ // A dump of Referent, in exactly regColumnWidth characters
+ void dumpIntervalName(Interval* interval);
+
+ // Events during the allocation phase that cause some dump output, which differs depending
+ // upon whether dumpTerse is set:
+ enum LsraDumpEvent{
+ // Conflicting def/use
+ LSRA_EVENT_DEFUSE_CONFLICT, LSRA_EVENT_DEFUSE_FIXED_DELAY_USE, LSRA_EVENT_DEFUSE_CASE1, LSRA_EVENT_DEFUSE_CASE2,
+ LSRA_EVENT_DEFUSE_CASE3, LSRA_EVENT_DEFUSE_CASE4, LSRA_EVENT_DEFUSE_CASE5, LSRA_EVENT_DEFUSE_CASE6,
+
+ // Spilling
+ LSRA_EVENT_SPILL, LSRA_EVENT_SPILL_EXTENDED_LIFETIME, LSRA_EVENT_RESTORE_PREVIOUS_INTERVAL,
+ LSRA_EVENT_RESTORE_PREVIOUS_INTERVAL_AFTER_SPILL, LSRA_EVENT_DONE_KILL_GC_REFS,
+
+ // Block boundaries
+ LSRA_EVENT_START_BB, LSRA_EVENT_END_BB,
+
+ // Miscellaneous
+ LSRA_EVENT_FREE_REGS,
+
+ // Characteristics of the current RefPosition
+ LSRA_EVENT_INCREMENT_RANGE_END, // ???
+ LSRA_EVENT_LAST_USE, LSRA_EVENT_LAST_USE_DELAYED, LSRA_EVENT_NEEDS_NEW_REG,
+
+ // Allocation decisions
+ LSRA_EVENT_FIXED_REG, LSRA_EVENT_EXP_USE, LSRA_EVENT_ZERO_REF, LSRA_EVENT_NO_ENTRY_REG_ALLOCATED,
+ LSRA_EVENT_KEPT_ALLOCATION, LSRA_EVENT_COPY_REG, LSRA_EVENT_MOVE_REG, LSRA_EVENT_ALLOC_REG,
+ LSRA_EVENT_ALLOC_SPILLED_REG, LSRA_EVENT_NO_REG_ALLOCATED, LSRA_EVENT_RELOAD, LSRA_EVENT_SPECIAL_PUTARG,
+ LSRA_EVENT_REUSE_REG,
+ };
+ void dumpLsraAllocationEvent(LsraDumpEvent event,
+ Interval* interval = nullptr,
+ regNumber reg = REG_NA,
+ BasicBlock* currentBlock = nullptr);
+
+ void dumpBlockHeader(BasicBlock* block);
+
+ void validateIntervals();
+#endif // DEBUG
+
+ Compiler* compiler;
+
+private:
+#if MEASURE_MEM_ALLOC
+ IAllocator* lsraIAllocator;
+#endif
+
+ IAllocator* getAllocator(Compiler* comp)
+ {
+#if MEASURE_MEM_ALLOC
+ if (lsraIAllocator == nullptr)
+ {
+ lsraIAllocator = new (comp, CMK_LSRA) CompAllocator(comp, CMK_LSRA);
+ }
+ return lsraIAllocator;
+#else
+ return comp->getAllocator();
+#endif
+ }
+
+#ifdef DEBUG
+ // This is used for dumping
+ RefPosition* activeRefPosition;
+#endif // DEBUG
+
+ IntervalList intervals;
+
+ RegRecord physRegs[REG_COUNT];
+
+ Interval** localVarIntervals;
+
+ // Set of blocks that have been visited.
+ BlockSet bbVisitedSet;
+ void markBlockVisited(BasicBlock* block)
+ {
+ BlockSetOps::AddElemD(compiler, bbVisitedSet, block->bbNum);
+ }
+ void clearVisitedBlocks()
+ {
+ BlockSetOps::ClearD(compiler, bbVisitedSet);
+ }
+ bool isBlockVisited(BasicBlock* block)
+ {
+ return BlockSetOps::IsMember(compiler, bbVisitedSet, block->bbNum);
+ }
+
+ // A map from bbNum to the block information used during register allocation.
+ LsraBlockInfo* blockInfo;
+ BasicBlock* findPredBlockForLiveIn(BasicBlock* block, BasicBlock* prevBlock DEBUGARG(bool* pPredBlockIsAllocated));
+
+ // The order in which the blocks will be allocated.
+ // This is any array of BasicBlock*, in the order in which they should be traversed.
+ BasicBlock** blockSequence;
+ // The verifiedAllBBs flag indicates whether we have verified that all BBs have been
+ // included in the blockSeuqence above, during setBlockSequence().
+ bool verifiedAllBBs;
+ void setBlockSequence();
+ int compareBlocksForSequencing(BasicBlock* block1, BasicBlock* block2, bool useBlockWeights);
+ BasicBlockList* blockSequenceWorkList;
+ bool blockSequencingDone;
+ void addToBlockSequenceWorkList(BlockSet sequencedBlockSet, BasicBlock* block);
+ void removeFromBlockSequenceWorkList(BasicBlockList* listNode, BasicBlockList* prevNode);
+ BasicBlock* getNextCandidateFromWorkList();
+
+ // The bbNum of the block being currently allocated or resolved.
+ unsigned int curBBNum;
+ // The ordinal of the block we're on (i.e. this is the curBBSeqNum-th block we've allocated).
+ unsigned int curBBSeqNum;
+ // The number of blocks that we've sequenced.
+ unsigned int bbSeqCount;
+ // The Location of the start of the current block.
+ LsraLocation curBBStartLocation;
+
+ // Ordered list of RefPositions
+ RefPositionList refPositions;
+
+ // Per-block variable location mappings: an array indexed by block number that yields a
+ // pointer to an array of regNumber, one per variable.
+ VarToRegMap* inVarToRegMaps;
+ VarToRegMap* outVarToRegMaps;
+
+ // A temporary VarToRegMap used during the resolution of critical edges.
+ VarToRegMap sharedCriticalVarToRegMap;
+
+ PhasedVar<regMaskTP> availableIntRegs;
+ PhasedVar<regMaskTP> availableFloatRegs;
+ PhasedVar<regMaskTP> availableDoubleRegs;
+
+ // Current set of live tracked vars, used during building of RefPositions to determine whether
+ // to preference to callee-save
+ VARSET_TP currentLiveVars;
+ // Set of floating point variables to consider for callee-save registers.
+ VARSET_TP fpCalleeSaveCandidateVars;
+#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
+#if defined(_TARGET_AMD64_)
+ static const var_types LargeVectorType = TYP_SIMD32;
+ static const var_types LargeVectorSaveType = TYP_SIMD16;
+#elif defined(_TARGET_ARM64_)
+ static const var_types LargeVectorType = TYP_SIMD16;
+ static const var_types LargeVectorSaveType = TYP_DOUBLE;
+#else // !defined(_TARGET_AMD64_) && !defined(_TARGET_ARM64_)
+#error("Unknown target architecture for FEATURE_SIMD")
+#endif // !defined(_TARGET_AMD64_) && !defined(_TARGET_ARM64_)
+
+ // Set of large vector (TYP_SIMD32 on AVX) variables.
+ VARSET_TP largeVectorVars;
+ // Set of large vector (TYP_SIMD32 on AVX) variables to consider for callee-save registers.
+ VARSET_TP largeVectorCalleeSaveCandidateVars;
+#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
+};
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX Interval XX
+XX XX
+XX This is the fundamental data structure for linear scan register XX
+XX allocation. It represents the live range(s) for a variable or temp. XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+class Interval : public Referenceable
+{
+public:
+ Interval(RegisterType registerType, regMaskTP registerPreferences)
+ : registerPreferences(registerPreferences)
+ , relatedInterval(nullptr)
+ , assignedReg(nullptr)
+ , registerType(registerType)
+ , isLocalVar(false)
+ , isSplit(false)
+ , isSpilled(false)
+ , isInternal(false)
+ , isStructField(false)
+ , isPromotedStruct(false)
+ , hasConflictingDefUse(false)
+ , hasNonCommutativeRMWDef(false)
+ , isSpecialPutArg(false)
+ , preferCalleeSave(false)
+ , isConstant(false)
+ , physReg(REG_COUNT)
+#ifdef DEBUG
+ , intervalIndex(0)
+#endif
+ , varNum(0)
+ {
+ }
+
+#ifdef DEBUG
+ // print out representation
+ void dump();
+ // concise representation for embedding
+ void tinyDump();
+ // extremely concise representation
+ void microDump();
+#endif // DEBUG
+
+ void setLocalNumber(unsigned localNum, LinearScan* l);
+
+ // Fixed registers for which this Interval has a preference
+ regMaskTP registerPreferences;
+
+ // The relatedInterval is:
+ // - for any other interval, it is the interval to which this interval
+ // is currently preferenced (e.g. because they are related by a copy)
+ Interval* relatedInterval;
+
+ // The assignedReg is the RecRecord for the register to which this interval
+ // has been assigned at some point - if the interval is active, this is the
+ // register it currently occupies.
+ RegRecord* assignedReg;
+
+ // DECIDE : put this in a union or do something w/ inheritance?
+ // this is an interval for a physical register, not a allocatable entity
+
+ RegisterType registerType;
+ bool isLocalVar : 1;
+ // Indicates whether this interval has been assigned to different registers
+ bool isSplit : 1;
+ // Indicates whether this interval is ever spilled
+ bool isSpilled : 1;
+ // indicates an interval representing the internal requirements for
+ // generating code for a node (temp registers internal to the node)
+ // Note that this interval may live beyond a node in the GT_ARR_LENREF/GT_IND
+ // case (though never lives beyond a stmt)
+ bool isInternal : 1;
+ // true if this is a LocalVar for a struct field
+ bool isStructField : 1;
+ // true iff this is a GT_LDOBJ for a fully promoted (PROMOTION_TYPE_INDEPENDENT) struct
+ bool isPromotedStruct : 1;
+ // true if this is an SDSU interval for which the def and use have conflicting register
+ // requirements
+ bool hasConflictingDefUse : 1;
+ // true if this interval is defined by a non-commutative 2-operand instruction
+ bool hasNonCommutativeRMWDef : 1;
+
+ // True if this interval is defined by a putArg, whose source is a non-last-use lclVar.
+ // During allocation, this flag will be cleared if the source is not already in the required register.
+ // Othewise, we will leave the register allocated to the lclVar, but mark the RegRecord as
+ // isBusyUntilNextKill, so that it won't be reused if the lclVar goes dead before the call.
+ bool isSpecialPutArg : 1;
+
+ // True if this interval interferes with a call.
+ bool preferCalleeSave : 1;
+
+ // True if this interval is defined by a constant node that may be reused and/or may be
+ // able to reuse a constant that's already in a register.
+ bool isConstant : 1;
+
+ // The register to which it is currently assigned.
+ regNumber physReg;
+
+#ifdef DEBUG
+ unsigned int intervalIndex;
+#endif // DEBUG
+
+ unsigned int varNum; // This is the "variable number": the index into the lvaTable array
+
+ LclVarDsc* getLocalVar(Compiler* comp)
+ {
+ assert(isLocalVar);
+ return &(comp->lvaTable[this->varNum]);
+ }
+
+ // Get the local tracked variable "index" (lvVarIndex), used in bitmasks.
+ unsigned getVarIndex(Compiler* comp)
+ {
+ LclVarDsc* varDsc = getLocalVar(comp);
+ assert(varDsc->lvTracked); // If this isn't true, we shouldn't be calling this function!
+ return varDsc->lvVarIndex;
+ }
+
+ bool isAssignedTo(regNumber regNum)
+ {
+ // This uses regMasks to handle the case where a double actually occupies two registers
+ // TODO-Throughput: This could/should be done more cheaply.
+ return (physReg != REG_NA && (genRegMask(physReg, registerType) & genRegMask(regNum)) != RBM_NONE);
+ }
+
+ // Assign the related interval.
+ void assignRelatedInterval(Interval* newRelatedInterval)
+ {
+#ifdef DEBUG
+ if (VERBOSE)
+ {
+ printf("Assigning related ");
+ newRelatedInterval->microDump();
+ printf(" to ");
+ this->microDump();
+ printf("\n");
+ }
+#endif // DEBUG
+ relatedInterval = newRelatedInterval;
+ }
+
+ // Assign the related interval, but only if it isn't already assigned.
+ void assignRelatedIntervalIfUnassigned(Interval* newRelatedInterval)
+ {
+ if (relatedInterval == nullptr)
+ {
+ assignRelatedInterval(newRelatedInterval);
+ }
+ else
+ {
+#ifdef DEBUG
+ if (VERBOSE)
+ {
+ printf("Interval ");
+ this->microDump();
+ printf(" already has a related interval\n");
+ }
+#endif // DEBUG
+ }
+ }
+
+ // Update the registerPreferences on the interval.
+ // If there are conflicting requirements on this interval, set the preferences to
+ // the union of them. That way maybe we'll get at least one of them.
+ // An exception is made in the case where one of the existing or new
+ // preferences are all callee-save, in which case we "prefer" the callee-save
+
+ void updateRegisterPreferences(regMaskTP preferences)
+ {
+ // We require registerPreferences to have been initialized.
+ assert(registerPreferences != RBM_NONE);
+ // It is invalid to update with empty preferences
+ assert(preferences != RBM_NONE);
+
+ regMaskTP commonPreferences = (registerPreferences & preferences);
+ if (commonPreferences != RBM_NONE)
+ {
+ registerPreferences = commonPreferences;
+ return;
+ }
+
+ // There are no preferences in common.
+ // Preferences need to reflect both cases where a var must occupy a specific register,
+ // as well as cases where a var is live when a register is killed.
+ // In the former case, we would like to record all such registers, however we don't
+ // really want to use any registers that will interfere.
+ // To approximate this, we never "or" together multi-reg sets, which are generally kill sets.
+
+ if (!genMaxOneBit(preferences))
+ {
+ // The new preference value is a multi-reg set, so it's probably a kill.
+ // Keep the new value.
+ registerPreferences = preferences;
+ return;
+ }
+
+ if (!genMaxOneBit(registerPreferences))
+ {
+ // The old preference value is a multi-reg set.
+ // Keep the existing preference set, as it probably reflects one or more kills.
+ // It may have been a union of multiple individual registers, but we can't
+ // distinguish that case without extra cost.
+ return;
+ }
+
+ // If we reach here, we have two disjoint single-reg sets.
+ // Keep only the callee-save preferences, if not empty.
+ // Otherwise, take the union of the preferences.
+
+ regMaskTP newPreferences = registerPreferences | preferences;
+
+ if (preferCalleeSave)
+ {
+ regMaskTP calleeSaveMask = (calleeSaveRegs(this->registerType) & (newPreferences));
+ if (calleeSaveMask != RBM_NONE)
+ {
+ newPreferences = calleeSaveMask;
+ }
+ }
+ registerPreferences = newPreferences;
+ }
+};
+
+class RefPosition
+{
+public:
+ RefPosition(unsigned int bbNum, LsraLocation nodeLocation, GenTree* treeNode, RefType refType)
+ : referent(nullptr)
+ , nextRefPosition(nullptr)
+ , treeNode(treeNode)
+ , bbNum(bbNum)
+ , nodeLocation(nodeLocation)
+ , registerAssignment(RBM_NONE)
+ , refType(refType)
+ , multiRegIdx(0)
+ , lastUse(false)
+ , reload(false)
+ , spillAfter(false)
+ , copyReg(false)
+ , moveReg(false)
+ , isPhysRegRef(false)
+ , isFixedRegRef(false)
+ , isLocalDefUse(false)
+ , delayRegFree(false)
+ , outOfOrder(false)
+#ifdef DEBUG
+ , rpNum(0)
+#endif
+ {
+ }
+
+ // A RefPosition refers to either an Interval or a RegRecord. 'referent' points to one
+ // of these types. If it refers to a RegRecord, then 'isPhysRegRef' is true. If it
+ // refers to an Interval, then 'isPhysRegRef' is false.
+ //
+ // Q: can 'referent' be NULL?
+
+ Referenceable* referent;
+
+ Interval* getInterval()
+ {
+ assert(!isPhysRegRef);
+ return (Interval*)referent;
+ }
+ void setInterval(Interval* i)
+ {
+ referent = i;
+ isPhysRegRef = false;
+ }
+
+ RegRecord* getReg()
+ {
+ assert(isPhysRegRef);
+ return (RegRecord*)referent;
+ }
+ void setReg(RegRecord* r)
+ {
+ referent = r;
+ isPhysRegRef = true;
+ registerAssignment = genRegMask(r->regNum);
+ }
+
+ // nextRefPosition is the next in code order.
+ // Note that in either case there is no need for these to be doubly linked, as they
+ // are only traversed in the forward direction, and are not moved.
+ RefPosition* nextRefPosition;
+
+ // The remaining fields are common to both options
+ GenTree* treeNode;
+ unsigned int bbNum;
+
+ // Prior to the allocation pass, registerAssignment captures the valid registers
+ // for this RefPosition. An empty set means that any register is valid. A non-empty
+ // set means that it must be one of the given registers (may be the full set if the
+ // only constraint is that it must reside in SOME register)
+ // After the allocation pass, this contains the actual assignment
+ LsraLocation nodeLocation;
+ regMaskTP registerAssignment;
+
+ regNumber assignedReg()
+ {
+ if (registerAssignment == RBM_NONE)
+ {
+ return REG_NA;
+ }
+
+ return genRegNumFromMask(registerAssignment);
+ }
+
+ RefType refType;
+
+ // Returns true if it is a reference on a gentree node.
+ bool IsActualRef()
+ {
+ return (refType == RefTypeDef || refType == RefTypeUse);
+ }
+
+ bool RequiresRegister()
+ {
+ return (IsActualRef()
+#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
+ || refType == RefTypeUpperVectorSaveDef || refType == RefTypeUpperVectorSaveUse
+#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
+ ) &&
+ !AllocateIfProfitable();
+ }
+
+ // Indicates whether this ref position is to be allocated
+ // a reg only if profitable. Currently these are the
+ // ref positions that lower/codegen has indicated as reg
+ // optional and is considered a contained memory operand if
+ // no reg is allocated.
+ unsigned allocRegIfProfitable : 1;
+
+ void setAllocateIfProfitable(unsigned val)
+ {
+ allocRegIfProfitable = val;
+ }
+
+ // Returns true whether this ref position is to be allocated
+ // a reg only if it is profitable.
+ bool AllocateIfProfitable()
+ {
+ // TODO-CQ: Right now if a ref position is marked as
+ // copyreg or movereg, then it is not treated as
+ // 'allocate if profitable'. This is an implementation
+ // limitation that needs to be addressed.
+ return allocRegIfProfitable && !copyReg && !moveReg;
+ }
+
+ // Used by RefTypeDef/Use positions of a multi-reg call node.
+ // Indicates the position of the register that this ref position refers to.
+ // The max bits needed is based on max value of MAX_RET_REG_COUNT value
+ // across all targets and that happens 4 on on Arm. Hence index value
+ // would be 0..MAX_RET_REG_COUNT-1.
+ unsigned multiRegIdx : 2;
+
+ void setMultiRegIdx(unsigned idx)
+ {
+ multiRegIdx = idx;
+ assert(multiRegIdx == idx);
+ }
+
+ unsigned getMultiRegIdx()
+ {
+ return multiRegIdx;
+ }
+
+ // Last Use - this may be true for multiple RefPositions in the same Interval
+ bool lastUse : 1;
+
+ // Spill and Copy info
+ // reload indicates that the value was spilled, and must be reloaded here.
+ // spillAfter indicates that the value is spilled here, so a spill must be added.
+ // copyReg indicates that the value needs to be copied to a specific register,
+ // but that it will also retain its current assigned register.
+ // moveReg indicates that the value needs to be moved to a different register,
+ // and that this will be its new assigned register.
+ // A RefPosition may have any flag individually or the following combinations:
+ // - reload and spillAfter (i.e. it remains in memory), but not in combination with copyReg or moveReg
+ // (reload cannot exist with copyReg or moveReg; it should be reloaded into the appropriate reg)
+ // - spillAfter and copyReg (i.e. it must be copied to a new reg for use, but is then spilled)
+ // - spillAfter and moveReg (i.e. it most be both spilled and moved)
+ // NOTE: a moveReg involves an explicit move, and would usually not be needed for a fixed Reg if it is going
+ // to be spilled, because the code generator will do the move to the fixed register, and doesn't need to
+ // record the new register location as the new "home" location of the lclVar. However, if there is a conflicting
+ // use at the same location (e.g. lclVar V1 is in rdx and needs to be in rcx, but V2 needs to be in rdx), then
+ // we need an explicit move.
+ // - copyReg and moveReg must not exist with each other.
+
+ bool reload : 1;
+ bool spillAfter : 1;
+ bool copyReg : 1;
+ bool moveReg : 1; // true if this var is moved to a new register
+
+ bool isPhysRegRef : 1; // true if 'referent' points of a RegRecord, false if it points to an Interval
+ bool isFixedRegRef : 1;
+ bool isLocalDefUse : 1;
+
+ // delayRegFree indicates that the register should not be freed right away, but instead wait
+ // until the next Location after it would normally be freed. This is used for the case of
+ // non-commutative binary operators, where op2 must not be assigned the same register as
+ // the target. We do this by not freeing it until after the target has been defined.
+ // Another option would be to actually change the Location of the op2 use until the same
+ // Location as the def, but then it could potentially reuse a register that has been freed
+ // from the other source(s), e.g. if it's a lastUse or spilled.
+ bool delayRegFree : 1;
+
+ // outOfOrder is marked on a (non-def) RefPosition that doesn't follow a definition of the
+ // register currently assigned to the Interval. This happens when we use the assigned
+ // register from a predecessor that is not the most recently allocated BasicBlock.
+ bool outOfOrder : 1;
+
+ LsraLocation getRefEndLocation()
+ {
+ return delayRegFree ? nodeLocation + 1 : nodeLocation;
+ }
+
+#ifdef DEBUG
+ unsigned rpNum; // The unique RefPosition number, equal to its index in the refPositions list. Only used for
+ // debugging dumps.
+#endif // DEBUG
+
+ bool isIntervalRef()
+ {
+ return (!isPhysRegRef && (referent != nullptr));
+ }
+
+ // isTrueDef indicates that the RefPosition is a non-update def of a non-internal
+ // interval
+ bool isTrueDef()
+ {
+ return (refType == RefTypeDef && isIntervalRef() && !getInterval()->isInternal);
+ }
+
+ // isFixedRefOfRegMask indicates that the RefPosition has a fixed assignment to the register
+ // specified by the given mask
+ bool isFixedRefOfRegMask(regMaskTP regMask)
+ {
+ assert(genMaxOneBit(regMask));
+ return (registerAssignment == regMask);
+ }
+
+ // isFixedRefOfReg indicates that the RefPosition has a fixed assignment to the given register
+ bool isFixedRefOfReg(regNumber regNum)
+ {
+ return (isFixedRefOfRegMask(genRegMask(regNum)));
+ }
+
+#ifdef DEBUG
+ // operator= copies everything except 'rpNum', which must remain unique
+ RefPosition& operator=(const RefPosition& rp)
+ {
+ unsigned rpNumSave = rpNum;
+ memcpy(this, &rp, sizeof(rp));
+ rpNum = rpNumSave;
+ return *this;
+ }
+
+ void dump();
+#endif // DEBUG
+};
+
+#ifdef DEBUG
+void dumpRegMask(regMaskTP regs);
+#endif // DEBUG
+
+/*****************************************************************************/
+#endif //_LSRA_H_
+/*****************************************************************************/
diff --git a/src/jit/lsra_reftypes.h b/src/jit/lsra_reftypes.h
new file mode 100644
index 0000000000..841b78c881
--- /dev/null
+++ b/src/jit/lsra_reftypes.h
@@ -0,0 +1,23 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+// clang-format off
+// memberName - enum member name
+// memberValue - enum member value
+// shortName - short name string
+// DEF_REFTYPE(memberName , memberValue , shortName )
+ DEF_REFTYPE(RefTypeInvalid , 0x00 , "Invl" )
+ DEF_REFTYPE(RefTypeDef , 0x01 , "Def " )
+ DEF_REFTYPE(RefTypeUse , 0x02 , "Use " )
+ DEF_REFTYPE(RefTypeKill , 0x04 , "Kill" )
+ DEF_REFTYPE(RefTypeBB , 0x08 , "BB " )
+ DEF_REFTYPE(RefTypeFixedReg , 0x10 , "Fixd" )
+ DEF_REFTYPE(RefTypeExpUse , (0x20 | RefTypeUse), "ExpU" )
+ DEF_REFTYPE(RefTypeParamDef , (0x10 | RefTypeDef), "Parm" )
+ DEF_REFTYPE(RefTypeDummyDef , (0x20 | RefTypeDef), "DDef" )
+ DEF_REFTYPE(RefTypeZeroInit , (0x30 | RefTypeDef), "Zero" )
+ DEF_REFTYPE(RefTypeUpperVectorSaveDef, (0x40 | RefTypeDef), "UVSv" )
+ DEF_REFTYPE(RefTypeUpperVectorSaveUse, (0x40 | RefTypeUse), "UVRs" )
+ DEF_REFTYPE(RefTypeKillGCRefs , 0x80 , "KlGC" )
+// clang-format on
diff --git a/src/jit/morph.cpp b/src/jit/morph.cpp
new file mode 100644
index 0000000000..00df17baa0
--- /dev/null
+++ b/src/jit/morph.cpp
@@ -0,0 +1,18245 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX Morph XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+#include "allocacheck.h" // for alloca
+
+// Convert the given node into a call to the specified helper passing
+// the given argument list.
+//
+// Tries to fold constants and also adds an edge for overflow exception
+// returns the morphed tree
+GenTreePtr Compiler::fgMorphCastIntoHelper(GenTreePtr tree, int helper, GenTreePtr oper)
+{
+ GenTree* result;
+
+ /* If the operand is a constant, we'll try to fold it */
+ if (oper->OperIsConst())
+ {
+ GenTreePtr oldTree = tree;
+
+ tree = gtFoldExprConst(tree); // This may not fold the constant (NaN ...)
+
+ if (tree != oldTree)
+ {
+ return fgMorphTree(tree);
+ }
+ else if (tree->OperKind() & GTK_CONST)
+ {
+ return fgMorphConst(tree);
+ }
+
+ // assert that oper is unchanged and that it is still a GT_CAST node
+ noway_assert(tree->gtCast.CastOp() == oper);
+ noway_assert(tree->gtOper == GT_CAST);
+ }
+ result = fgMorphIntoHelperCall(tree, helper, gtNewArgList(oper));
+ assert(result == tree);
+ return result;
+}
+
+/*****************************************************************************
+ *
+ * Convert the given node into a call to the specified helper passing
+ * the given argument list.
+ */
+
+GenTreePtr Compiler::fgMorphIntoHelperCall(GenTreePtr tree, int helper, GenTreeArgList* args)
+{
+ tree->ChangeOper(GT_CALL);
+
+ tree->gtFlags |= GTF_CALL;
+ if (args)
+ {
+ tree->gtFlags |= (args->gtFlags & GTF_ALL_EFFECT);
+ }
+ tree->gtCall.gtCallType = CT_HELPER;
+ tree->gtCall.gtCallMethHnd = eeFindHelper(helper);
+ tree->gtCall.gtCallArgs = args;
+ tree->gtCall.gtCallObjp = nullptr;
+ tree->gtCall.gtCallLateArgs = nullptr;
+ tree->gtCall.fgArgInfo = nullptr;
+ tree->gtCall.gtRetClsHnd = nullptr;
+ tree->gtCall.gtCallMoreFlags = 0;
+ tree->gtCall.gtInlineCandidateInfo = nullptr;
+ tree->gtCall.gtControlExpr = nullptr;
+
+#ifdef LEGACY_BACKEND
+ tree->gtCall.gtCallRegUsedMask = RBM_NONE;
+#endif // LEGACY_BACKEND
+
+#if DEBUG
+ // Helper calls are never candidates.
+
+ tree->gtCall.gtInlineObservation = InlineObservation::CALLSITE_IS_CALL_TO_HELPER;
+#endif // DEBUG
+
+#ifdef FEATURE_READYTORUN_COMPILER
+ tree->gtCall.gtEntryPoint.addr = nullptr;
+#endif
+
+#if defined(_TARGET_X86_) && !defined(LEGACY_BACKEND)
+ if (varTypeIsLong(tree))
+ {
+ GenTreeCall* callNode = tree->AsCall();
+ ReturnTypeDesc* retTypeDesc = callNode->GetReturnTypeDesc();
+ retTypeDesc->Reset();
+ retTypeDesc->InitializeLongReturnType(this);
+ callNode->ClearOtherRegs();
+ }
+#endif
+
+ /* Perform the morphing */
+
+ tree = fgMorphArgs(tree->AsCall());
+
+ return tree;
+}
+
+/*****************************************************************************
+ *
+ * Determine if a relop must be morphed to a qmark to manifest a boolean value.
+ * This is done when code generation can't create straight-line code to do it.
+ */
+bool Compiler::fgMorphRelopToQmark(GenTreePtr tree)
+{
+#ifndef LEGACY_BACKEND
+ return false;
+#else // LEGACY_BACKEND
+ return (genActualType(tree->TypeGet()) == TYP_LONG) || varTypeIsFloating(tree->TypeGet());
+#endif // LEGACY_BACKEND
+}
+
+/*****************************************************************************
+ *
+ * Morph a cast node (we perform some very simple transformations here).
+ */
+
+#ifdef _PREFAST_
+#pragma warning(push)
+#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
+#endif
+GenTreePtr Compiler::fgMorphCast(GenTreePtr tree)
+{
+ noway_assert(tree->gtOper == GT_CAST);
+ noway_assert(genTypeSize(TYP_I_IMPL) == sizeof(void*));
+
+ /* The first sub-operand is the thing being cast */
+
+ GenTreePtr oper = tree->gtCast.CastOp();
+ var_types srcType = genActualType(oper->TypeGet());
+ unsigned srcSize;
+
+ var_types dstType = tree->CastToType();
+ unsigned dstSize = genTypeSize(dstType);
+
+ // See if the cast has to be done in two steps. R -> I
+ if (varTypeIsFloating(srcType) && varTypeIsIntegral(dstType))
+ {
+ // Only x86 must go through TYP_DOUBLE to get to all
+ // integral types everybody else can get straight there
+ // except for when using helpers
+ if (srcType == TYP_FLOAT
+#if !FEATURE_STACK_FP_X87
+
+#if defined(_TARGET_ARM64_)
+ // Amd64: src = float, dst is overflow conversion.
+ // This goes through helper and hence src needs to be converted to double.
+ && tree->gtOverflow()
+#elif defined(_TARGET_AMD64_)
+ // Amd64: src = float, dst = uint64 or overflow conversion.
+ // This goes through helper and hence src needs to be converted to double.
+ && (tree->gtOverflow() || (dstType == TYP_ULONG))
+#elif defined(_TARGET_ARM_)
+ // Arm: src = float, dst = int64/uint64 or overflow conversion.
+ && (tree->gtOverflow() || varTypeIsLong(dstType))
+#endif
+
+#endif // FEATURE_STACK_FP_X87
+ )
+ {
+ oper = gtNewCastNode(TYP_DOUBLE, oper, TYP_DOUBLE);
+ }
+
+ // do we need to do it in two steps R -> I, '-> smallType
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if defined(_TARGET_ARM64_) || defined(_TARGET_AMD64_)
+ if (dstSize < genTypeSize(TYP_INT))
+ {
+ oper = gtNewCastNodeL(TYP_INT, oper, TYP_INT);
+ oper->gtFlags |= (tree->gtFlags & (GTF_UNSIGNED | GTF_OVERFLOW | GTF_EXCEPT));
+ tree->gtFlags &= ~GTF_UNSIGNED;
+ }
+#else
+ if (dstSize < sizeof(void*))
+ {
+ oper = gtNewCastNodeL(TYP_I_IMPL, oper, TYP_I_IMPL);
+ oper->gtFlags |= (tree->gtFlags & (GTF_OVERFLOW | GTF_EXCEPT));
+ }
+#endif
+ else
+ {
+ /* Note that if we need to use a helper call then we can not morph oper */
+ if (!tree->gtOverflow())
+ {
+#ifdef _TARGET_ARM64_ // On ARM64 All non-overflow checking conversions can be optimized
+ goto OPTIMIZECAST;
+#else
+ switch (dstType)
+ {
+ case TYP_INT:
+#ifdef _TARGET_X86_ // there is no rounding convert to integer instruction on ARM or x64 so skip this
+ if ((oper->gtOper == GT_INTRINSIC) &&
+ (oper->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Round))
+ {
+ /* optimization: conv.i4(round.d(d)) -> round.i(d) */
+ oper->gtType = dstType;
+ return fgMorphTree(oper);
+ }
+ // if SSE2 is not enabled, we need the helper
+ else if (!opts.compCanUseSSE2)
+ {
+ return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2INT, oper);
+ }
+ else
+#endif // _TARGET_X86_
+ {
+ goto OPTIMIZECAST;
+ }
+#if defined(_TARGET_ARM_) || defined(_TARGET_AMD64_)
+ case TYP_UINT:
+ goto OPTIMIZECAST;
+#else // _TARGET_ARM_
+ case TYP_UINT:
+ return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2UINT, oper);
+#endif // _TARGET_ARM_
+
+#ifdef _TARGET_AMD64_
+ // SSE2 has instructions to convert a float/double directly to a long
+ case TYP_LONG:
+ goto OPTIMIZECAST;
+#else
+ case TYP_LONG:
+ return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2LNG, oper);
+#endif //_TARGET_AMD64_
+ case TYP_ULONG:
+ return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2ULNG, oper);
+ default:
+ break;
+ }
+#endif // _TARGET_ARM64_
+ }
+ else
+ {
+ switch (dstType)
+ {
+ case TYP_INT:
+ return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2INT_OVF, oper);
+ case TYP_UINT:
+ return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2UINT_OVF, oper);
+ case TYP_LONG:
+ return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2LNG_OVF, oper);
+ case TYP_ULONG:
+ return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2ULNG_OVF, oper);
+ default:
+ break;
+ }
+ }
+ noway_assert(!"Unexpected dstType");
+ }
+ }
+#ifndef _TARGET_64BIT_
+ // The code generation phase (for x86 & ARM32) does not handle casts
+ // directly from [u]long to anything other than [u]int. Insert an
+ // intermediate cast to native int.
+ else if (varTypeIsLong(srcType) && varTypeIsSmall(dstType))
+ {
+ oper = gtNewCastNode(TYP_I_IMPL, oper, TYP_I_IMPL);
+ oper->gtFlags |= (tree->gtFlags & (GTF_OVERFLOW | GTF_EXCEPT | GTF_UNSIGNED));
+ tree->gtFlags &= ~GTF_UNSIGNED;
+ }
+#endif //!_TARGET_64BIT_
+
+#ifdef _TARGET_ARM_
+ else if ((dstType == TYP_FLOAT) && (srcType == TYP_DOUBLE) && (oper->gtOper == GT_CAST) &&
+ !varTypeIsLong(oper->gtCast.CastOp()))
+ {
+ // optimization: conv.r4(conv.r8(?)) -> conv.r4(d)
+ // except when the ultimate source is a long because there is no long-to-float helper, so it must be 2 step.
+ // This happens semi-frequently because there is no IL 'conv.r4.un'
+ oper->gtType = TYP_FLOAT;
+ oper->CastToType() = TYP_FLOAT;
+ return fgMorphTree(oper);
+ }
+ // converts long/ulong --> float/double casts into helper calls.
+ else if (varTypeIsFloating(dstType) && varTypeIsLong(srcType))
+ {
+ if (dstType == TYP_FLOAT)
+ {
+ // there is only a double helper, so we
+ // - change the dsttype to double
+ // - insert a cast from double to float
+ // - recurse into the resulting tree
+ tree->CastToType() = TYP_DOUBLE;
+ tree->gtType = TYP_DOUBLE;
+
+ tree = gtNewCastNode(TYP_FLOAT, tree, TYP_FLOAT);
+
+ return fgMorphTree(tree);
+ }
+ if (tree->gtFlags & GTF_UNSIGNED)
+ return fgMorphCastIntoHelper(tree, CORINFO_HELP_ULNG2DBL, oper);
+ return fgMorphCastIntoHelper(tree, CORINFO_HELP_LNG2DBL, oper);
+ }
+#endif //_TARGET_ARM_
+
+#ifdef _TARGET_AMD64_
+ // Do we have to do two step U4/8 -> R4/8 ?
+ // Codegen supports the following conversion as one-step operation
+ // a) Long -> R4/R8
+ // b) U8 -> R8
+ //
+ // The following conversions are performed as two-step operations using above.
+ // U4 -> R4/8 = U4-> Long -> R4/8
+ // U8 -> R4 = U8 -> R8 -> R4
+ else if ((tree->gtFlags & GTF_UNSIGNED) && varTypeIsFloating(dstType))
+ {
+ srcType = genUnsignedType(srcType);
+
+ if (srcType == TYP_ULONG)
+ {
+ if (dstType == TYP_FLOAT)
+ {
+ // Codegen can handle U8 -> R8 conversion.
+ // U8 -> R4 = U8 -> R8 -> R4
+ // - change the dsttype to double
+ // - insert a cast from double to float
+ // - recurse into the resulting tree
+ tree->CastToType() = TYP_DOUBLE;
+ tree->gtType = TYP_DOUBLE;
+ tree = gtNewCastNode(TYP_FLOAT, tree, TYP_FLOAT);
+ return fgMorphTree(tree);
+ }
+ }
+ else if (srcType == TYP_UINT)
+ {
+ oper = gtNewCastNode(TYP_LONG, oper, TYP_LONG);
+ oper->gtFlags |= (tree->gtFlags & (GTF_OVERFLOW | GTF_EXCEPT | GTF_UNSIGNED));
+ tree->gtFlags &= ~GTF_UNSIGNED;
+ }
+ }
+#endif // _TARGET_AMD64_
+
+#ifdef _TARGET_X86_
+ // Do we have to do two step U4/8 -> R4/8 ?
+ else if ((tree->gtFlags & GTF_UNSIGNED) && varTypeIsFloating(dstType))
+ {
+ srcType = genUnsignedType(srcType);
+
+ if (srcType == TYP_ULONG)
+ {
+ return fgMorphCastIntoHelper(tree, CORINFO_HELP_ULNG2DBL, oper);
+ }
+ else if (srcType == TYP_UINT)
+ {
+ oper = gtNewCastNode(TYP_LONG, oper, TYP_LONG);
+ oper->gtFlags |= (tree->gtFlags & (GTF_OVERFLOW | GTF_EXCEPT | GTF_UNSIGNED));
+ tree->gtFlags &= ~GTF_UNSIGNED;
+ }
+ }
+#endif //_TARGET_XARCH_
+ else if (varTypeIsGC(srcType) != varTypeIsGC(dstType))
+ {
+ // We are casting away GC information. we would like to just
+ // change the type to int, however this gives the emitter fits because
+ // it believes the variable is a GC variable at the begining of the
+ // instruction group, but is not turned non-gc by the code generator
+ // we fix this by copying the GC pointer to a non-gc pointer temp.
+ noway_assert(!varTypeIsGC(dstType) && "How can we have a cast to a GCRef here?");
+
+ // We generate an assignment to an int and then do the cast from an int. With this we avoid
+ // the gc problem and we allow casts to bytes, longs, etc...
+ unsigned lclNum = lvaGrabTemp(true DEBUGARG("Cast away GC"));
+ oper->gtType = TYP_I_IMPL;
+ GenTreePtr asg = gtNewTempAssign(lclNum, oper);
+ oper->gtType = srcType;
+
+ // do the real cast
+ GenTreePtr cast = gtNewCastNode(tree->TypeGet(), gtNewLclvNode(lclNum, TYP_I_IMPL), dstType);
+
+ // Generate the comma tree
+ oper = gtNewOperNode(GT_COMMA, tree->TypeGet(), asg, cast);
+
+ return fgMorphTree(oper);
+ }
+
+ // Look for narrowing casts ([u]long -> [u]int) and try to push them
+ // down into the operand before morphing it.
+ //
+ // It doesn't matter if this is cast is from ulong or long (i.e. if
+ // GTF_UNSIGNED is set) because the transformation is only applied to
+ // overflow-insensitive narrowing casts, which always silently truncate.
+ //
+ // Note that casts from [u]long to small integer types are handled above.
+ if ((srcType == TYP_LONG) && ((dstType == TYP_INT) || (dstType == TYP_UINT)))
+ {
+ // As a special case, look for overflow-sensitive casts of an AND
+ // expression, and see if the second operand is a small constant. Since
+ // the result of an AND is bound by its smaller operand, it may be
+ // possible to prove that the cast won't overflow, which will in turn
+ // allow the cast's operand to be transformed.
+ if (tree->gtOverflow() && (oper->OperGet() == GT_AND))
+ {
+ GenTreePtr andOp2 = oper->gtOp.gtOp2;
+
+ // Special case to the special case: AND with a casted int.
+ if ((andOp2->OperGet() == GT_CAST) && (andOp2->gtCast.CastOp()->OperGet() == GT_CNS_INT))
+ {
+ // gtFoldExprConst will deal with whether the cast is signed or
+ // unsigned, or overflow-sensitive.
+ andOp2 = oper->gtOp.gtOp2 = gtFoldExprConst(andOp2);
+ }
+
+ // Look for a constant less than 2^{32} for a cast to uint, or less
+ // than 2^{31} for a cast to int.
+ int maxWidth = (dstType == TYP_UINT) ? 32 : 31;
+
+ if ((andOp2->OperGet() == GT_CNS_NATIVELONG) && ((andOp2->gtIntConCommon.LngValue() >> maxWidth) == 0))
+ {
+ // This cast can't overflow.
+ tree->gtFlags &= ~(GTF_OVERFLOW | GTF_EXCEPT);
+ }
+ }
+
+ // Only apply this transformation during global morph,
+ // when neither the cast node nor the oper node may throw an exception
+ // based on the upper 32 bits.
+ //
+ if (fgGlobalMorph && !tree->gtOverflow() && !oper->gtOverflowEx())
+ {
+ // For these operations the lower 32 bits of the result only depends
+ // upon the lower 32 bits of the operands
+ //
+ if ((oper->OperGet() == GT_ADD) || (oper->OperGet() == GT_MUL) || (oper->OperGet() == GT_AND) ||
+ (oper->OperGet() == GT_OR) || (oper->OperGet() == GT_XOR))
+ {
+ DEBUG_DESTROY_NODE(tree);
+
+ // Insert narrowing casts for op1 and op2
+ oper->gtOp.gtOp1 = gtNewCastNode(TYP_INT, oper->gtOp.gtOp1, dstType);
+ oper->gtOp.gtOp2 = gtNewCastNode(TYP_INT, oper->gtOp.gtOp2, dstType);
+
+ // Clear the GT_MUL_64RSLT if it is set
+ if (oper->gtOper == GT_MUL && (oper->gtFlags & GTF_MUL_64RSLT))
+ {
+ oper->gtFlags &= ~GTF_MUL_64RSLT;
+ }
+
+ // The operation now produces a 32-bit result.
+ oper->gtType = TYP_INT;
+
+ // Remorph the new tree as the casts that we added may be folded away.
+ return fgMorphTree(oper);
+ }
+ }
+ }
+
+OPTIMIZECAST:
+ noway_assert(tree->gtOper == GT_CAST);
+
+ /* Morph the operand */
+ tree->gtCast.CastOp() = oper = fgMorphTree(oper);
+
+ /* Reset the call flag */
+ tree->gtFlags &= ~GTF_CALL;
+
+ /* unless we have an overflow cast, reset the except flag */
+ if (!tree->gtOverflow())
+ {
+ tree->gtFlags &= ~GTF_EXCEPT;
+ }
+
+ /* Just in case new side effects were introduced */
+ tree->gtFlags |= (oper->gtFlags & GTF_ALL_EFFECT);
+
+ srcType = oper->TypeGet();
+
+ /* if GTF_UNSIGNED is set then force srcType to an unsigned type */
+ if (tree->gtFlags & GTF_UNSIGNED)
+ {
+ srcType = genUnsignedType(srcType);
+ }
+
+ srcSize = genTypeSize(srcType);
+
+ if (!gtIsActiveCSE_Candidate(tree)) // tree cannot be a CSE candidate
+ {
+ /* See if we can discard the cast */
+ if (varTypeIsIntegral(srcType) && varTypeIsIntegral(dstType))
+ {
+ if (srcType == dstType)
+ { // Certainly if they are identical it is pointless
+ goto REMOVE_CAST;
+ }
+
+ if (oper->OperGet() == GT_LCL_VAR && varTypeIsSmall(dstType))
+ {
+ unsigned varNum = oper->gtLclVarCommon.gtLclNum;
+ LclVarDsc* varDsc = &lvaTable[varNum];
+ if (varDsc->TypeGet() == dstType && varDsc->lvNormalizeOnStore())
+ {
+ goto REMOVE_CAST;
+ }
+ }
+
+ bool unsignedSrc = varTypeIsUnsigned(srcType);
+ bool unsignedDst = varTypeIsUnsigned(dstType);
+ bool signsDiffer = (unsignedSrc != unsignedDst);
+
+ // For same sized casts with
+ // the same signs or non-overflow cast we discard them as well
+ if (srcSize == dstSize)
+ {
+ /* This should have been handled above */
+ noway_assert(varTypeIsGC(srcType) == varTypeIsGC(dstType));
+
+ if (!signsDiffer)
+ {
+ goto REMOVE_CAST;
+ }
+
+ if (!tree->gtOverflow())
+ {
+ /* For small type casts, when necessary we force
+ the src operand to the dstType and allow the
+ implied load from memory to perform the casting */
+ if (varTypeIsSmall(srcType))
+ {
+ switch (oper->gtOper)
+ {
+ case GT_IND:
+ case GT_CLS_VAR:
+ case GT_LCL_FLD:
+ case GT_ARR_ELEM:
+ oper->gtType = dstType;
+ goto REMOVE_CAST;
+ default:
+ break;
+ }
+ }
+ else
+ {
+ goto REMOVE_CAST;
+ }
+ }
+ }
+
+ if (srcSize < dstSize) // widening cast
+ {
+ // Keep any long casts
+ if (dstSize == sizeof(int))
+ {
+ // Only keep signed to unsigned widening cast with overflow check
+ if (!tree->gtOverflow() || !unsignedDst || unsignedSrc)
+ {
+ goto REMOVE_CAST;
+ }
+ }
+
+ // Casts from signed->unsigned can never overflow while widening
+
+ if (unsignedSrc || !unsignedDst)
+ {
+ tree->gtFlags &= ~GTF_OVERFLOW;
+ }
+ }
+ else
+ {
+ // Try to narrow the operand of the cast and discard the cast
+ // Note: Do not narrow a cast that is marked as a CSE
+ // And do not narrow if the oper is marked as a CSE either
+ //
+ if (!tree->gtOverflow() && !gtIsActiveCSE_Candidate(oper) && (opts.compFlags & CLFLG_TREETRANS) &&
+ optNarrowTree(oper, srcType, dstType, tree->gtVNPair, false))
+ {
+ optNarrowTree(oper, srcType, dstType, tree->gtVNPair, true);
+
+ /* If oper is changed into a cast to TYP_INT, or to a GT_NOP, we may need to discard it */
+ if (oper->gtOper == GT_CAST && oper->CastToType() == genActualType(oper->CastFromType()))
+ {
+ oper = oper->gtCast.CastOp();
+ }
+ goto REMOVE_CAST;
+ }
+ }
+ }
+
+ switch (oper->gtOper)
+ {
+ /* If the operand is a constant, we'll fold it */
+ case GT_CNS_INT:
+ case GT_CNS_LNG:
+ case GT_CNS_DBL:
+ case GT_CNS_STR:
+ {
+ GenTreePtr oldTree = tree;
+
+ tree = gtFoldExprConst(tree); // This may not fold the constant (NaN ...)
+
+ // Did we get a comma throw as a result of gtFoldExprConst?
+ if ((oldTree != tree) && (oldTree->gtOper != GT_COMMA))
+ {
+ noway_assert(fgIsCommaThrow(tree));
+ tree->gtOp.gtOp1 = fgMorphTree(tree->gtOp.gtOp1);
+ fgMorphTreeDone(tree);
+ return tree;
+ }
+ else if (tree->gtOper != GT_CAST)
+ {
+ return tree;
+ }
+
+ noway_assert(tree->gtCast.CastOp() == oper); // unchanged
+ }
+ break;
+
+ case GT_CAST:
+ /* Check for two consecutive casts into the same dstType */
+ if (!tree->gtOverflow())
+ {
+ var_types dstType2 = oper->CastToType();
+ if (dstType == dstType2)
+ {
+ goto REMOVE_CAST;
+ }
+ }
+ break;
+
+ /* If op1 is a mod node, mark it with the GTF_MOD_INT_RESULT flag
+ so that the code generator will know not to convert the result
+ of the idiv to a regpair */
+ case GT_MOD:
+ if (dstType == TYP_INT)
+ {
+ tree->gtOp.gtOp1->gtFlags |= GTF_MOD_INT_RESULT;
+ }
+
+ break;
+ case GT_UMOD:
+ if (dstType == TYP_UINT)
+ {
+ tree->gtOp.gtOp1->gtFlags |= GTF_MOD_INT_RESULT;
+ }
+ break;
+
+ case GT_COMMA:
+ // Check for cast of a GT_COMMA with a throw overflow
+ // Bug 110829: Since this optimization will bash the types
+ // neither oper or commaOp2 can be CSE candidates
+ if (fgIsCommaThrow(oper) && !gtIsActiveCSE_Candidate(oper)) // oper can not be a CSE candidate
+ {
+ GenTreePtr commaOp2 = oper->gtOp.gtOp2;
+
+ if (!gtIsActiveCSE_Candidate(commaOp2)) // commaOp2 can not be a CSE candidate
+ {
+ // need type of oper to be same as tree
+ if (tree->gtType == TYP_LONG)
+ {
+ commaOp2->ChangeOperConst(GT_CNS_NATIVELONG);
+ commaOp2->gtIntConCommon.SetLngValue(0);
+ /* Change the types of oper and commaOp2 to TYP_LONG */
+ oper->gtType = commaOp2->gtType = TYP_LONG;
+ }
+ else if (varTypeIsFloating(tree->gtType))
+ {
+ commaOp2->ChangeOperConst(GT_CNS_DBL);
+ commaOp2->gtDblCon.gtDconVal = 0.0;
+ // Change the types of oper and commaOp2
+ // X87 promotes everything to TYP_DOUBLE
+ // But other's are a little more precise
+ const var_types newTyp
+#if FEATURE_X87_DOUBLES
+ = TYP_DOUBLE;
+#else // FEATURE_X87_DOUBLES
+ = tree->gtType;
+#endif // FEATURE_X87_DOUBLES
+ oper->gtType = commaOp2->gtType = newTyp;
+ }
+ else
+ {
+ commaOp2->ChangeOperConst(GT_CNS_INT);
+ commaOp2->gtIntCon.gtIconVal = 0;
+ /* Change the types of oper and commaOp2 to TYP_INT */
+ oper->gtType = commaOp2->gtType = TYP_INT;
+ }
+ }
+
+ if (vnStore != nullptr)
+ {
+ fgValueNumberTreeConst(commaOp2);
+ }
+
+ /* Return the GT_COMMA node as the new tree */
+ return oper;
+ }
+ break;
+
+ default:
+ break;
+ } /* end switch (oper->gtOper) */
+ }
+
+ if (tree->gtOverflow())
+ {
+ fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_OVERFLOW, fgPtrArgCntCur);
+ }
+
+ return tree;
+
+REMOVE_CAST:
+
+ /* Here we've eliminated the cast, so just return it's operand */
+ assert(!gtIsActiveCSE_Candidate(tree)); // tree cannot be a CSE candidate
+
+ DEBUG_DESTROY_NODE(tree);
+ return oper;
+}
+#ifdef _PREFAST_
+#pragma warning(pop)
+#endif
+
+/*****************************************************************************
+ *
+ * Perform an unwrap operation on a Proxy object
+ */
+
+GenTreePtr Compiler::fgUnwrapProxy(GenTreePtr objRef)
+{
+ assert(info.compIsContextful && info.compUnwrapContextful && impIsThis(objRef));
+
+ CORINFO_EE_INFO* pInfo = eeGetEEInfo();
+ GenTreePtr addTree;
+
+ // Perform the unwrap:
+ //
+ // This requires two extra indirections.
+ // We mark these indirections as 'invariant' and
+ // the CSE logic will hoist them when appropriate.
+ //
+ // Note that each dereference is a GC pointer
+
+ addTree = gtNewOperNode(GT_ADD, TYP_I_IMPL, objRef, gtNewIconNode(pInfo->offsetOfTransparentProxyRP, TYP_I_IMPL));
+
+ objRef = gtNewOperNode(GT_IND, TYP_REF, addTree);
+ objRef->gtFlags |= GTF_IND_INVARIANT;
+
+ addTree = gtNewOperNode(GT_ADD, TYP_I_IMPL, objRef, gtNewIconNode(pInfo->offsetOfRealProxyServer, TYP_I_IMPL));
+
+ objRef = gtNewOperNode(GT_IND, TYP_REF, addTree);
+ objRef->gtFlags |= GTF_IND_INVARIANT;
+
+ // objRef now hold the 'real this' reference (i.e. the unwrapped proxy)
+ return objRef;
+}
+
+/*****************************************************************************
+ *
+ * Morph an argument list; compute the pointer argument count in the process.
+ *
+ * NOTE: This function can be called from any place in the JIT to perform re-morphing
+ * due to graph altering modifications such as copy / constant propagation
+ */
+
+unsigned UpdateGT_LISTFlags(GenTreePtr tree)
+{
+ assert(tree->gtOper == GT_LIST);
+
+ unsigned flags = 0;
+ if (tree->gtOp.gtOp2)
+ {
+ flags |= UpdateGT_LISTFlags(tree->gtOp.gtOp2);
+ }
+
+ flags |= (tree->gtOp.gtOp1->gtFlags & GTF_ALL_EFFECT);
+
+ tree->gtFlags &= ~GTF_ALL_EFFECT;
+ tree->gtFlags |= flags;
+
+ return tree->gtFlags;
+}
+
+#ifdef DEBUG
+void fgArgTabEntry::Dump()
+{
+ printf("fgArgTabEntry[arg %u", argNum);
+ if (regNum != REG_STK)
+ {
+ printf(", %s, regs=%u", getRegName(regNum), numRegs);
+ }
+ if (numSlots > 0)
+ {
+ printf(", numSlots=%u, slotNum=%u", numSlots, slotNum);
+ }
+ printf(", align=%u", alignment);
+ if (lateArgInx != (unsigned)-1)
+ {
+ printf(", lateArgInx=%u", lateArgInx);
+ }
+ if (isSplit)
+ {
+ printf(", isSplit");
+ }
+ if (needTmp)
+ {
+ printf(", tmpNum=V%02u", tmpNum);
+ }
+ if (needPlace)
+ {
+ printf(", needPlace");
+ }
+ if (isTmp)
+ {
+ printf(", isTmp");
+ }
+ if (processed)
+ {
+ printf(", processed");
+ }
+ if (isHfaRegArg)
+ {
+ printf(", isHfa");
+ }
+ if (isBackFilled)
+ {
+ printf(", isBackFilled");
+ }
+ if (isNonStandard)
+ {
+ printf(", isNonStandard");
+ }
+ printf("]\n");
+}
+#endif
+
+fgArgInfo::fgArgInfo(Compiler* comp, GenTreePtr call, unsigned numArgs)
+{
+ compiler = comp;
+ callTree = call;
+ assert(call->IsCall());
+ argCount = 0; // filled in arg count, starts at zero
+ nextSlotNum = INIT_ARG_STACK_SLOT;
+ stkLevel = 0;
+ argTableSize = numArgs; // the allocated table size
+
+ hasRegArgs = false;
+ hasStackArgs = false;
+ argsComplete = false;
+ argsSorted = false;
+
+ if (argTableSize == 0)
+ {
+ argTable = nullptr;
+ }
+ else
+ {
+ argTable = new (compiler, CMK_fgArgInfoPtrArr) fgArgTabEntryPtr[argTableSize];
+ }
+}
+
+/*****************************************************************************
+ *
+ * fgArgInfo Copy Constructor
+ *
+ * This method needs to act like a copy constructor for fgArgInfo.
+ * The newCall needs to have its fgArgInfo initialized such that
+ * we have newCall that is an exact copy of the oldCall.
+ * We have to take care since the argument information
+ * in the argTable contains pointers that must point to the
+ * new arguments and not the old arguments.
+ */
+fgArgInfo::fgArgInfo(GenTreePtr newCall, GenTreePtr oldCall)
+{
+ assert(oldCall->IsCall());
+ assert(newCall->IsCall());
+
+ fgArgInfoPtr oldArgInfo = oldCall->gtCall.fgArgInfo;
+
+ compiler = oldArgInfo->compiler;
+ ;
+ callTree = newCall;
+ assert(newCall->IsCall());
+ argCount = 0; // filled in arg count, starts at zero
+ nextSlotNum = INIT_ARG_STACK_SLOT;
+ stkLevel = oldArgInfo->stkLevel;
+ argTableSize = oldArgInfo->argTableSize;
+ argsComplete = false;
+ argTable = nullptr;
+ if (argTableSize > 0)
+ {
+ argTable = new (compiler, CMK_fgArgInfoPtrArr) fgArgTabEntryPtr[argTableSize];
+ for (unsigned inx = 0; inx < argTableSize; inx++)
+ {
+ argTable[inx] = nullptr;
+ }
+ }
+
+ assert(oldArgInfo->argsComplete);
+
+ // We create local, artificial GenTreeArgLists that includes the gtCallObjp, if that exists, as first argument,
+ // so we can iterate over these argument lists more uniformly.
+ // Need to provide a temporary non-null first arguments to these constructors: if we use them, we'll replace them
+ GenTreeArgList* newArgs;
+ GenTreeArgList newArgObjp(newCall, newCall->gtCall.gtCallArgs);
+ GenTreeArgList* oldArgs;
+ GenTreeArgList oldArgObjp(oldCall, oldCall->gtCall.gtCallArgs);
+
+ if (newCall->gtCall.gtCallObjp == nullptr)
+ {
+ assert(oldCall->gtCall.gtCallObjp == nullptr);
+ newArgs = newCall->gtCall.gtCallArgs;
+ oldArgs = oldCall->gtCall.gtCallArgs;
+ }
+ else
+ {
+ assert(oldCall->gtCall.gtCallObjp != nullptr);
+ newArgObjp.Current() = newCall->gtCall.gtCallArgs;
+ newArgs = &newArgObjp;
+ oldArgObjp.Current() = oldCall->gtCall.gtCallObjp;
+ oldArgs = &oldArgObjp;
+ }
+
+ GenTreePtr newCurr;
+ GenTreePtr oldCurr;
+ GenTreeArgList* newParent = nullptr;
+ GenTreeArgList* oldParent = nullptr;
+ fgArgTabEntryPtr* oldArgTable = oldArgInfo->argTable;
+ bool scanRegArgs = false;
+
+ while (newArgs)
+ {
+ /* Get hold of the next argument values for the oldCall and newCall */
+
+ newCurr = newArgs->Current();
+ oldCurr = oldArgs->Current();
+ if (newArgs != &newArgObjp)
+ {
+ newParent = newArgs;
+ oldParent = oldArgs;
+ }
+ else
+ {
+ assert(newParent == nullptr && oldParent == nullptr);
+ }
+ newArgs = newArgs->Rest();
+ oldArgs = oldArgs->Rest();
+
+ fgArgTabEntryPtr oldArgTabEntry = nullptr;
+ fgArgTabEntryPtr newArgTabEntry = nullptr;
+
+ for (unsigned inx = 0; inx < argTableSize; inx++)
+ {
+ oldArgTabEntry = oldArgTable[inx];
+
+ if (oldArgTabEntry->parent == oldParent)
+ {
+ assert((oldParent == nullptr) == (newParent == nullptr));
+
+ // We have found the matching "parent" field in oldArgTabEntry
+
+ newArgTabEntry = new (compiler, CMK_fgArgInfo) fgArgTabEntry;
+
+ // First block copy all fields
+ //
+ *newArgTabEntry = *oldArgTabEntry;
+
+ // Then update all GenTreePtr fields in the newArgTabEntry
+ //
+ newArgTabEntry->parent = newParent;
+
+ // The node field is likely to have been updated
+ // to point at a node in the gtCallLateArgs list
+ //
+ if (oldArgTabEntry->node == oldCurr)
+ {
+ // node is not pointing into the gtCallLateArgs list
+ newArgTabEntry->node = newCurr;
+ }
+ else
+ {
+ // node must be pointing into the gtCallLateArgs list
+ //
+ // We will fix this pointer up in the next loop
+ //
+ newArgTabEntry->node = nullptr; // For now we assign a NULL to this field
+
+ scanRegArgs = true;
+ }
+
+ // Now initialize the proper element in the argTable array
+ //
+ argTable[inx] = newArgTabEntry;
+ break;
+ }
+ }
+ // We should have found the matching oldArgTabEntry and created the newArgTabEntry
+ //
+ assert(newArgTabEntry != nullptr);
+ }
+
+ if (scanRegArgs)
+ {
+ newArgs = newCall->gtCall.gtCallLateArgs;
+ oldArgs = oldCall->gtCall.gtCallLateArgs;
+
+ while (newArgs)
+ {
+ /* Get hold of the next argument values for the oldCall and newCall */
+
+ assert(newArgs->IsList());
+
+ newCurr = newArgs->Current();
+ newArgs = newArgs->Rest();
+
+ assert(oldArgs->IsList());
+
+ oldCurr = oldArgs->Current();
+ oldArgs = oldArgs->Rest();
+
+ fgArgTabEntryPtr oldArgTabEntry = nullptr;
+ fgArgTabEntryPtr newArgTabEntry = nullptr;
+
+ for (unsigned inx = 0; inx < argTableSize; inx++)
+ {
+ oldArgTabEntry = oldArgTable[inx];
+
+ if (oldArgTabEntry->node == oldCurr)
+ {
+ // We have found the matching "node" field in oldArgTabEntry
+
+ newArgTabEntry = argTable[inx];
+ assert(newArgTabEntry != nullptr);
+
+ // update the "node" GenTreePtr fields in the newArgTabEntry
+ //
+ assert(newArgTabEntry->node == nullptr); // We previously assigned NULL to this field
+
+ newArgTabEntry->node = newCurr;
+ break;
+ }
+ }
+ }
+ }
+
+ argCount = oldArgInfo->argCount;
+ nextSlotNum = oldArgInfo->nextSlotNum;
+ argsComplete = true;
+ argsSorted = true;
+}
+
+void fgArgInfo::AddArg(fgArgTabEntryPtr curArgTabEntry)
+{
+ assert(argCount < argTableSize);
+ argTable[argCount] = curArgTabEntry;
+ argCount++;
+}
+
+fgArgTabEntryPtr fgArgInfo::AddRegArg(
+ unsigned argNum, GenTreePtr node, GenTreePtr parent, regNumber regNum, unsigned numRegs, unsigned alignment)
+{
+ fgArgTabEntryPtr curArgTabEntry = new (compiler, CMK_fgArgInfo) fgArgTabEntry;
+
+ curArgTabEntry->argNum = argNum;
+ curArgTabEntry->node = node;
+ curArgTabEntry->parent = parent;
+ curArgTabEntry->regNum = regNum;
+ curArgTabEntry->slotNum = 0;
+ curArgTabEntry->numRegs = numRegs;
+ curArgTabEntry->numSlots = 0;
+ curArgTabEntry->alignment = alignment;
+ curArgTabEntry->lateArgInx = (unsigned)-1;
+ curArgTabEntry->tmpNum = (unsigned)-1;
+ curArgTabEntry->isSplit = false;
+ curArgTabEntry->isTmp = false;
+ curArgTabEntry->needTmp = false;
+ curArgTabEntry->needPlace = false;
+ curArgTabEntry->processed = false;
+ curArgTabEntry->isHfaRegArg = false;
+ curArgTabEntry->isBackFilled = false;
+ curArgTabEntry->isNonStandard = false;
+
+ hasRegArgs = true;
+ AddArg(curArgTabEntry);
+ return curArgTabEntry;
+}
+
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+fgArgTabEntryPtr fgArgInfo::AddRegArg(unsigned argNum,
+ GenTreePtr node,
+ GenTreePtr parent,
+ regNumber regNum,
+ unsigned numRegs,
+ unsigned alignment,
+ const bool isStruct,
+ const regNumber otherRegNum,
+ const SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR* const structDescPtr)
+{
+ fgArgTabEntryPtr curArgTabEntry = AddRegArg(argNum, node, parent, regNum, numRegs, alignment);
+ assert(curArgTabEntry != nullptr);
+
+ // The node of the ArgTabEntry could change after remorphing - it could be rewritten to a cpyblk or a
+ // PlaceHolder node (in case of needed late argument, for example.)
+ // This requires using of an extra flag. At creation time the state is right, so
+ // and this assert enforces that.
+ assert((varTypeIsStruct(node) && isStruct) || (!varTypeIsStruct(node) && !isStruct));
+ curArgTabEntry->otherRegNum = otherRegNum; // Second reg for the struct
+ curArgTabEntry->isStruct = isStruct; // is this a struct arg
+
+ if (isStruct && structDescPtr != nullptr)
+ {
+ curArgTabEntry->structDesc.CopyFrom(*structDescPtr);
+ }
+
+ return curArgTabEntry;
+}
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+
+fgArgTabEntryPtr fgArgInfo::AddStkArg(unsigned argNum,
+ GenTreePtr node,
+ GenTreePtr parent,
+ unsigned numSlots,
+ unsigned alignment
+ FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(const bool isStruct))
+{
+ fgArgTabEntryPtr curArgTabEntry = new (compiler, CMK_fgArgInfo) fgArgTabEntry;
+
+ nextSlotNum = (unsigned)roundUp(nextSlotNum, alignment);
+
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ // The node of the ArgTabEntry could change after remorphing - it could be rewritten to a cpyblk or a
+ // PlaceHolder node (in case of needed late argument, for example.)
+ // This reqires using of an extra flag. At creation time the state is right, so
+ // and this assert enforces that.
+ assert((varTypeIsStruct(node) && isStruct) || (!varTypeIsStruct(node) && !isStruct));
+ curArgTabEntry->isStruct = isStruct; // is this a struct arg
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+
+ curArgTabEntry->argNum = argNum;
+ curArgTabEntry->node = node;
+ curArgTabEntry->parent = parent;
+ curArgTabEntry->regNum = REG_STK;
+ curArgTabEntry->slotNum = nextSlotNum;
+ curArgTabEntry->numRegs = 0;
+ curArgTabEntry->numSlots = numSlots;
+ curArgTabEntry->alignment = alignment;
+ curArgTabEntry->lateArgInx = (unsigned)-1;
+ curArgTabEntry->tmpNum = (unsigned)-1;
+ curArgTabEntry->isSplit = false;
+ curArgTabEntry->isTmp = false;
+ curArgTabEntry->needTmp = false;
+ curArgTabEntry->needPlace = false;
+ curArgTabEntry->processed = false;
+ curArgTabEntry->isHfaRegArg = false;
+ curArgTabEntry->isBackFilled = false;
+ curArgTabEntry->isNonStandard = false;
+
+ hasStackArgs = true;
+ AddArg(curArgTabEntry);
+
+ nextSlotNum += numSlots;
+ return curArgTabEntry;
+}
+
+void fgArgInfo::RemorphReset()
+{
+ nextSlotNum = INIT_ARG_STACK_SLOT;
+}
+
+fgArgTabEntry* fgArgInfo::RemorphRegArg(
+ unsigned argNum, GenTreePtr node, GenTreePtr parent, regNumber regNum, unsigned numRegs, unsigned alignment)
+{
+ fgArgTabEntryPtr curArgTabEntry = nullptr;
+ unsigned regArgInx = 0;
+ unsigned inx;
+
+ for (inx = 0; inx < argCount; inx++)
+ {
+ curArgTabEntry = argTable[inx];
+ if (curArgTabEntry->argNum == argNum)
+ {
+ break;
+ }
+
+ bool isRegArg;
+ GenTreePtr argx;
+ if (curArgTabEntry->parent != nullptr)
+ {
+ assert(curArgTabEntry->parent->IsList());
+ argx = curArgTabEntry->parent->Current();
+ isRegArg = (argx->gtFlags & GTF_LATE_ARG) != 0;
+ }
+ else
+ {
+ argx = curArgTabEntry->node;
+ isRegArg = true;
+ }
+
+ if (isRegArg)
+ {
+ regArgInx++;
+ }
+ }
+ // if this was a nonstandard arg the table is definitive
+ if (curArgTabEntry->isNonStandard)
+ {
+ regNum = curArgTabEntry->regNum;
+ }
+
+ assert(curArgTabEntry->argNum == argNum);
+ assert(curArgTabEntry->regNum == regNum);
+ assert(curArgTabEntry->alignment == alignment);
+ assert(curArgTabEntry->parent == parent);
+
+ if (curArgTabEntry->node != node)
+ {
+ GenTreePtr argx = nullptr;
+ unsigned regIndex = 0;
+
+ /* process the register argument list */
+ for (GenTreeArgList* list = callTree->gtCall.gtCallLateArgs; list; (regIndex++, list = list->Rest()))
+ {
+ argx = list->Current();
+ assert(!argx->IsArgPlaceHolderNode()); // No place holders nodes are in gtCallLateArgs;
+ if (regIndex == regArgInx)
+ {
+ break;
+ }
+ }
+ assert(regIndex == regArgInx);
+ assert(regArgInx == curArgTabEntry->lateArgInx);
+
+ if (curArgTabEntry->node != argx)
+ {
+ curArgTabEntry->node = argx;
+ }
+ }
+ return curArgTabEntry;
+}
+
+void fgArgInfo::RemorphStkArg(
+ unsigned argNum, GenTreePtr node, GenTreePtr parent, unsigned numSlots, unsigned alignment)
+{
+ fgArgTabEntryPtr curArgTabEntry = nullptr;
+ bool isRegArg = false;
+ unsigned regArgInx = 0;
+ GenTreePtr argx;
+ unsigned inx;
+
+ for (inx = 0; inx < argCount; inx++)
+ {
+ curArgTabEntry = argTable[inx];
+
+ if (curArgTabEntry->parent != nullptr)
+ {
+ assert(curArgTabEntry->parent->IsList());
+ argx = curArgTabEntry->parent->Current();
+ isRegArg = (argx->gtFlags & GTF_LATE_ARG) != 0;
+ }
+ else
+ {
+ argx = curArgTabEntry->node;
+ isRegArg = true;
+ }
+
+ if (curArgTabEntry->argNum == argNum)
+ {
+ break;
+ }
+
+ if (isRegArg)
+ {
+ regArgInx++;
+ }
+ }
+
+ nextSlotNum = (unsigned)roundUp(nextSlotNum, alignment);
+
+ assert(curArgTabEntry->argNum == argNum);
+ assert(curArgTabEntry->slotNum == nextSlotNum);
+ assert(curArgTabEntry->numSlots == numSlots);
+ assert(curArgTabEntry->alignment == alignment);
+ assert(curArgTabEntry->parent == parent);
+ assert(parent->IsList());
+
+#if FEATURE_FIXED_OUT_ARGS
+ if (curArgTabEntry->node != node)
+ {
+ if (isRegArg)
+ {
+ GenTreePtr argx = nullptr;
+ unsigned regIndex = 0;
+
+ /* process the register argument list */
+ for (GenTreeArgList *list = callTree->gtCall.gtCallLateArgs; list; list = list->Rest(), regIndex++)
+ {
+ argx = list->Current();
+ assert(!argx->IsArgPlaceHolderNode()); // No place holders nodes are in gtCallLateArgs;
+ if (regIndex == regArgInx)
+ {
+ break;
+ }
+ }
+ assert(regIndex == regArgInx);
+ assert(regArgInx == curArgTabEntry->lateArgInx);
+
+ if (curArgTabEntry->node != argx)
+ {
+ curArgTabEntry->node = argx;
+ }
+ }
+ else
+ {
+ assert(parent->Current() == node);
+ curArgTabEntry->node = node;
+ }
+ }
+#else
+ curArgTabEntry->node = node;
+#endif
+
+ nextSlotNum += numSlots;
+}
+
+void fgArgInfo::SplitArg(unsigned argNum, unsigned numRegs, unsigned numSlots)
+{
+ fgArgTabEntryPtr curArgTabEntry = nullptr;
+ assert(argNum < argCount);
+ for (unsigned inx = 0; inx < argCount; inx++)
+ {
+ curArgTabEntry = argTable[inx];
+ if (curArgTabEntry->argNum == argNum)
+ {
+ break;
+ }
+ }
+
+ assert(numRegs > 0);
+ assert(numSlots > 0);
+
+ curArgTabEntry->isSplit = true;
+ curArgTabEntry->numRegs = numRegs;
+ curArgTabEntry->numSlots = numSlots;
+
+ nextSlotNum += numSlots;
+}
+
+void fgArgInfo::EvalToTmp(unsigned argNum, unsigned tmpNum, GenTreePtr newNode)
+{
+ fgArgTabEntryPtr curArgTabEntry = nullptr;
+ assert(argNum < argCount);
+ for (unsigned inx = 0; inx < argCount; inx++)
+ {
+ curArgTabEntry = argTable[inx];
+ if (curArgTabEntry->argNum == argNum)
+ {
+ break;
+ }
+ }
+ assert(curArgTabEntry->parent->Current() == newNode);
+
+ curArgTabEntry->node = newNode;
+ curArgTabEntry->tmpNum = tmpNum;
+ curArgTabEntry->isTmp = true;
+}
+
+void fgArgInfo::ArgsComplete()
+{
+ bool hasStackArgs = false;
+ bool hasStructRegArg = false;
+
+ for (unsigned curInx = 0; curInx < argCount; curInx++)
+ {
+ fgArgTabEntryPtr curArgTabEntry = argTable[curInx];
+ assert(curArgTabEntry != nullptr);
+ GenTreePtr argx = curArgTabEntry->node;
+
+ if (curArgTabEntry->regNum == REG_STK)
+ {
+ hasStackArgs = true;
+#if !FEATURE_FIXED_OUT_ARGS
+ // On x86 we use push instructions to pass arguments:
+ // The non-register arguments are evaluated and pushed in order
+ // and they are never evaluated into temps
+ //
+ continue;
+#endif
+ }
+ else // we have a register argument, next we look for a struct type.
+ {
+ if (varTypeIsStruct(argx) FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY(|| curArgTabEntry->isStruct))
+ {
+ hasStructRegArg = true;
+ }
+ }
+
+ /* If the argument tree contains an assignment (GTF_ASG) then the argument and
+ and every earlier argument (except constants) must be evaluated into temps
+ since there may be other arguments that follow and they may use the value being assigned.
+
+ EXAMPLE: ArgTab is "a, a=5, a"
+ -> when we see the second arg "a=5"
+ we know the first two arguments "a, a=5" have to be evaluated into temps
+
+ For the case of an assignment, we only know that there exist some assignment someplace
+ in the tree. We don't know what is being assigned so we are very conservative here
+ and assume that any local variable could have been assigned.
+ */
+
+ if (argx->gtFlags & GTF_ASG)
+ {
+ // If this is not the only argument, or it's a copyblk, or it already evaluates the expression to
+ // a tmp, then we need a temp in the late arg list.
+ if ((argCount > 1) || argx->OperIsCopyBlkOp()
+#ifdef FEATURE_FIXED_OUT_ARGS
+ || curArgTabEntry->isTmp // I protect this by "FEATURE_FIXED_OUT_ARGS" to preserve the property
+ // that we only have late non-register args when that feature is on.
+#endif // FEATURE_FIXED_OUT_ARGS
+ )
+ {
+ curArgTabEntry->needTmp = true;
+ }
+
+ // For all previous arguments, unless they are a simple constant
+ // we require that they be evaluated into temps
+ for (unsigned prevInx = 0; prevInx < curInx; prevInx++)
+ {
+ fgArgTabEntryPtr prevArgTabEntry = argTable[prevInx];
+ assert(prevArgTabEntry->argNum < curArgTabEntry->argNum);
+
+ assert(prevArgTabEntry->node);
+ if (prevArgTabEntry->node->gtOper != GT_CNS_INT)
+ {
+ prevArgTabEntry->needTmp = true;
+ }
+ }
+ }
+
+#if FEATURE_FIXED_OUT_ARGS
+ // Like calls, if this argument has a tree that will do an inline throw,
+ // a call to a jit helper, then we need to treat it like a call (but only
+ // if there are/were any stack args).
+ // This means unnesting, sorting, etc. Technically this is overly
+ // conservative, but I want to avoid as much special-case debug-only code
+ // as possible, so leveraging the GTF_CALL flag is the easiest.
+ if (!(argx->gtFlags & GTF_CALL) && (argx->gtFlags & GTF_EXCEPT) && (argCount > 1) &&
+ compiler->opts.compDbgCode &&
+ (compiler->fgWalkTreePre(&argx, Compiler::fgChkThrowCB) == Compiler::WALK_ABORT))
+ {
+ for (unsigned otherInx = 0; otherInx < argCount; otherInx++)
+ {
+ if (otherInx == curInx)
+ {
+ continue;
+ }
+
+ if (argTable[otherInx]->regNum == REG_STK)
+ {
+ argx->gtFlags |= GTF_CALL;
+ break;
+ }
+ }
+ }
+#endif // FEATURE_FIXED_OUT_ARGS
+
+ /* If it contains a call (GTF_CALL) then itself and everything before the call
+ with a GLOB_EFFECT must eval to temp (this is because everything with SIDE_EFFECT
+ has to be kept in the right order since we will move the call to the first position)
+
+ For calls we don't have to be quite as conservative as we are with an assignment
+ since the call won't be modifying any non-address taken LclVars.
+ */
+
+ if (argx->gtFlags & GTF_CALL)
+ {
+ if (argCount > 1) // If this is not the only argument
+ {
+ curArgTabEntry->needTmp = true;
+ }
+ else if (varTypeIsFloating(argx->TypeGet()) && (argx->OperGet() == GT_CALL))
+ {
+ // Spill all arguments that are floating point calls
+ curArgTabEntry->needTmp = true;
+ }
+
+ // All previous arguments may need to be evaluated into temps
+ for (unsigned prevInx = 0; prevInx < curInx; prevInx++)
+ {
+ fgArgTabEntryPtr prevArgTabEntry = argTable[prevInx];
+ assert(prevArgTabEntry->argNum < curArgTabEntry->argNum);
+ assert(prevArgTabEntry->node);
+
+ // For all previous arguments, if they have any GTF_ALL_EFFECT
+ // we require that they be evaluated into a temp
+ if ((prevArgTabEntry->node->gtFlags & GTF_ALL_EFFECT) != 0)
+ {
+ prevArgTabEntry->needTmp = true;
+ }
+#if FEATURE_FIXED_OUT_ARGS
+ // Or, if they are stored into the FIXED_OUT_ARG area
+ // we require that they be moved to the gtCallLateArgs
+ // and replaced with a placeholder node
+ else if (prevArgTabEntry->regNum == REG_STK)
+ {
+ prevArgTabEntry->needPlace = true;
+ }
+#endif
+ }
+ }
+
+#ifndef LEGACY_BACKEND
+#if FEATURE_MULTIREG_ARGS
+ // For RyuJIT backend we will expand a Multireg arg into a GT_LIST
+ // with multiple indirections, so here we consider spilling it into a tmp LclVar.
+ //
+ // Note that Arm32 is a LEGACY_BACKEND and it defines FEATURE_MULTIREG_ARGS
+ // so we skip this for ARM32 until it is ported to use RyuJIT backend
+ //
+
+ bool isMultiRegArg = (curArgTabEntry->numRegs > 1);
+
+ if ((argx->TypeGet() == TYP_STRUCT) && (curArgTabEntry->needTmp == false))
+ {
+ if (isMultiRegArg && ((argx->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) != 0))
+ {
+ // Spill multireg struct arguments that have Assignments or Calls embedded in them
+ curArgTabEntry->needTmp = true;
+ }
+ else
+ {
+ // We call gtPrepareCost to measure the cost of evaluating this tree
+ compiler->gtPrepareCost(argx);
+
+ if (isMultiRegArg && (argx->gtCostEx > (6 * IND_COST_EX)))
+ {
+ // Spill multireg struct arguments that are expensive to evaluate twice
+ curArgTabEntry->needTmp = true;
+ }
+ else if (argx->OperGet() == GT_OBJ)
+ {
+ GenTreeObj* argObj = argx->AsObj();
+ CORINFO_CLASS_HANDLE objClass = argObj->gtClass;
+ unsigned structSize = compiler->info.compCompHnd->getClassSize(objClass);
+ switch (structSize)
+ {
+ case 3:
+ case 5:
+ case 6:
+ case 7:
+ // If we have a stack based LclVar we can perform a wider read of 4 or 8 bytes
+ //
+ if (argObj->gtObj.gtOp1->IsVarAddr() == false) // Is the source not a LclVar?
+ {
+ // If we don't have a LclVar we need to read exactly 3,5,6 or 7 bytes
+ // For now we use a a GT_CPBLK to copy the exact size into a GT_LCL_VAR temp.
+ //
+ curArgTabEntry->needTmp = true;
+ }
+ break;
+
+ case 11:
+ case 13:
+ case 14:
+ case 15:
+ // Spill any GT_OBJ multireg structs that are difficult to extract
+ //
+ // When we have a GT_OBJ of a struct with the above sizes we would need
+ // to use 3 or 4 load instructions to load the exact size of this struct.
+ // Instead we spill the GT_OBJ into a new GT_LCL_VAR temp and this sequence
+ // will use a GT_CPBLK to copy the exact size into the GT_LCL_VAR temp.
+ // Then we can just load all 16 bytes of the GT_LCL_VAR temp when passing
+ // the argument.
+ //
+ curArgTabEntry->needTmp = true;
+ break;
+
+ default:
+ break;
+ }
+ }
+ }
+ }
+#endif // FEATURE_MULTIREG_ARGS
+#endif // LEGACY_BACKEND
+ }
+
+ // We only care because we can't spill structs and qmarks involve a lot of spilling, but
+ // if we don't have qmarks, then it doesn't matter.
+ // So check for Qmark's globally once here, instead of inside the loop.
+ //
+ const bool hasStructRegArgWeCareAbout = (hasStructRegArg && compiler->compQmarkUsed);
+
+#if FEATURE_FIXED_OUT_ARGS
+
+ // For Arm/x64 we only care because we can't reorder a register
+ // argument that uses GT_LCLHEAP. This is an optimization to
+ // save a check inside the below loop.
+ //
+ const bool hasStackArgsWeCareAbout = (hasStackArgs && compiler->compLocallocUsed);
+
+#else
+
+ const bool hasStackArgsWeCareAbout = hasStackArgs;
+
+#endif // FEATURE_FIXED_OUT_ARGS
+
+ // If we have any stack args we have to force the evaluation
+ // of any arguments passed in registers that might throw an exception
+ //
+ // Technically we only a required to handle the following two cases:
+ // a GT_IND with GTF_IND_RNGCHK (only on x86) or
+ // a GT_LCLHEAP node that allocates stuff on the stack
+ //
+ if (hasStackArgsWeCareAbout || hasStructRegArgWeCareAbout)
+ {
+ for (unsigned curInx = 0; curInx < argCount; curInx++)
+ {
+ fgArgTabEntryPtr curArgTabEntry = argTable[curInx];
+ assert(curArgTabEntry != nullptr);
+ GenTreePtr argx = curArgTabEntry->node;
+
+ // Examine the register args that are currently not marked needTmp
+ //
+ if (!curArgTabEntry->needTmp && (curArgTabEntry->regNum != REG_STK))
+ {
+ if (hasStackArgsWeCareAbout)
+ {
+#if !FEATURE_FIXED_OUT_ARGS
+ // On x86 we previously recorded a stack depth of zero when
+ // morphing the register arguments of any GT_IND with a GTF_IND_RNGCHK flag
+ // Thus we can not reorder the argument after any stack based argument
+ // (Note that GT_LCLHEAP sets the GTF_EXCEPT flag so we don't need to
+ // check for it explicitly
+ //
+ if (argx->gtFlags & GTF_EXCEPT)
+ {
+ curArgTabEntry->needTmp = true;
+ continue;
+ }
+#else
+ // For Arm/X64 we can't reorder a register argument that uses a GT_LCLHEAP
+ //
+ if (argx->gtFlags & GTF_EXCEPT)
+ {
+ assert(compiler->compLocallocUsed);
+
+ // Returns WALK_ABORT if a GT_LCLHEAP node is encountered in the argx tree
+ //
+ if (compiler->fgWalkTreePre(&argx, Compiler::fgChkLocAllocCB) == Compiler::WALK_ABORT)
+ {
+ curArgTabEntry->needTmp = true;
+ continue;
+ }
+ }
+#endif
+ }
+ if (hasStructRegArgWeCareAbout)
+ {
+ // Returns true if a GT_QMARK node is encountered in the argx tree
+ //
+ if (compiler->fgWalkTreePre(&argx, Compiler::fgChkQmarkCB) == Compiler::WALK_ABORT)
+ {
+ curArgTabEntry->needTmp = true;
+ continue;
+ }
+ }
+ }
+ }
+ }
+
+ argsComplete = true;
+}
+
+void fgArgInfo::SortArgs()
+{
+ assert(argsComplete == true);
+
+#ifdef DEBUG
+ if (compiler->verbose)
+ {
+ printf("\nSorting the arguments:\n");
+ }
+#endif
+
+ /* Shuffle the arguments around before we build the gtCallLateArgs list.
+ The idea is to move all "simple" arguments like constants and local vars
+ to the end of the table, and move the complex arguments towards the beginning
+ of the table. This will help prevent registers from being spilled by
+ allowing us to evaluate the more complex arguments before the simpler arguments.
+ The argTable ends up looking like:
+ +------------------------------------+ <--- argTable[argCount - 1]
+ | constants |
+ +------------------------------------+
+ | local var / local field |
+ +------------------------------------+
+ | remaining arguments sorted by cost |
+ +------------------------------------+
+ | temps (argTable[].needTmp = true) |
+ +------------------------------------+
+ | args with calls (GTF_CALL) |
+ +------------------------------------+ <--- argTable[0]
+ */
+
+ /* Set the beginning and end for the new argument table */
+ unsigned curInx;
+ int regCount = 0;
+ unsigned begTab = 0;
+ unsigned endTab = argCount - 1;
+ unsigned argsRemaining = argCount;
+
+ // First take care of arguments that are constants.
+ // [We use a backward iterator pattern]
+ //
+ curInx = argCount;
+ do
+ {
+ curInx--;
+
+ fgArgTabEntryPtr curArgTabEntry = argTable[curInx];
+
+ if (curArgTabEntry->regNum != REG_STK)
+ {
+ regCount++;
+ }
+
+ // Skip any already processed args
+ //
+ if (!curArgTabEntry->processed)
+ {
+ GenTreePtr argx = curArgTabEntry->node;
+
+ // put constants at the end of the table
+ //
+ if (argx->gtOper == GT_CNS_INT)
+ {
+ noway_assert(curInx <= endTab);
+
+ curArgTabEntry->processed = true;
+
+ // place curArgTabEntry at the endTab position by performing a swap
+ //
+ if (curInx != endTab)
+ {
+ argTable[curInx] = argTable[endTab];
+ argTable[endTab] = curArgTabEntry;
+ }
+
+ endTab--;
+ argsRemaining--;
+ }
+ }
+ } while (curInx > 0);
+
+ if (argsRemaining > 0)
+ {
+ // Next take care of arguments that are calls.
+ // [We use a forward iterator pattern]
+ //
+ for (curInx = begTab; curInx <= endTab; curInx++)
+ {
+ fgArgTabEntryPtr curArgTabEntry = argTable[curInx];
+
+ // Skip any already processed args
+ //
+ if (!curArgTabEntry->processed)
+ {
+ GenTreePtr argx = curArgTabEntry->node;
+
+ // put calls at the beginning of the table
+ //
+ if (argx->gtFlags & GTF_CALL)
+ {
+ curArgTabEntry->processed = true;
+
+ // place curArgTabEntry at the begTab position by performing a swap
+ //
+ if (curInx != begTab)
+ {
+ argTable[curInx] = argTable[begTab];
+ argTable[begTab] = curArgTabEntry;
+ }
+
+ begTab++;
+ argsRemaining--;
+ }
+ }
+ }
+ }
+
+ if (argsRemaining > 0)
+ {
+ // Next take care arguments that are temps.
+ // These temps come before the arguments that are
+ // ordinary local vars or local fields
+ // since this will give them a better chance to become
+ // enregistered into their actual argument register.
+ // [We use a forward iterator pattern]
+ //
+ for (curInx = begTab; curInx <= endTab; curInx++)
+ {
+ fgArgTabEntryPtr curArgTabEntry = argTable[curInx];
+
+ // Skip any already processed args
+ //
+ if (!curArgTabEntry->processed)
+ {
+ if (curArgTabEntry->needTmp)
+ {
+ curArgTabEntry->processed = true;
+
+ // place curArgTabEntry at the begTab position by performing a swap
+ //
+ if (curInx != begTab)
+ {
+ argTable[curInx] = argTable[begTab];
+ argTable[begTab] = curArgTabEntry;
+ }
+
+ begTab++;
+ argsRemaining--;
+ }
+ }
+ }
+ }
+
+ if (argsRemaining > 0)
+ {
+ // Next take care of local var and local field arguments.
+ // These are moved towards the end of the argument evaluation.
+ // [We use a backward iterator pattern]
+ //
+ curInx = endTab + 1;
+ do
+ {
+ curInx--;
+
+ fgArgTabEntryPtr curArgTabEntry = argTable[curInx];
+
+ // Skip any already processed args
+ //
+ if (!curArgTabEntry->processed)
+ {
+ GenTreePtr argx = curArgTabEntry->node;
+
+ if ((argx->gtOper == GT_LCL_VAR) || (argx->gtOper == GT_LCL_FLD))
+ {
+ noway_assert(curInx <= endTab);
+
+ curArgTabEntry->processed = true;
+
+ // place curArgTabEntry at the endTab position by performing a swap
+ //
+ if (curInx != endTab)
+ {
+ argTable[curInx] = argTable[endTab];
+ argTable[endTab] = curArgTabEntry;
+ }
+
+ endTab--;
+ argsRemaining--;
+ }
+ }
+ } while (curInx > begTab);
+ }
+
+ // Finally, take care of all the remaining arguments.
+ // Note that we fill in one arg at a time using a while loop.
+ bool costsPrepared = false; // Only prepare tree costs once, the first time through this loop
+ while (argsRemaining > 0)
+ {
+ /* Find the most expensive arg remaining and evaluate it next */
+
+ fgArgTabEntryPtr expensiveArgTabEntry = nullptr;
+ unsigned expensiveArg = UINT_MAX;
+ unsigned expensiveArgCost = 0;
+
+ // [We use a forward iterator pattern]
+ //
+ for (curInx = begTab; curInx <= endTab; curInx++)
+ {
+ fgArgTabEntryPtr curArgTabEntry = argTable[curInx];
+
+ // Skip any already processed args
+ //
+ if (!curArgTabEntry->processed)
+ {
+ GenTreePtr argx = curArgTabEntry->node;
+
+ // We should have already handled these kinds of args
+ assert(argx->gtOper != GT_LCL_VAR);
+ assert(argx->gtOper != GT_LCL_FLD);
+ assert(argx->gtOper != GT_CNS_INT);
+
+ // This arg should either have no persistent side effects or be the last one in our table
+ // assert(((argx->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) == 0) || (curInx == (argCount-1)));
+
+ if (argsRemaining == 1)
+ {
+ // This is the last arg to place
+ expensiveArg = curInx;
+ expensiveArgTabEntry = curArgTabEntry;
+ assert(begTab == endTab);
+ break;
+ }
+ else
+ {
+ if (!costsPrepared)
+ {
+ /* We call gtPrepareCost to measure the cost of evaluating this tree */
+ compiler->gtPrepareCost(argx);
+ }
+
+ if (argx->gtCostEx > expensiveArgCost)
+ {
+ // Remember this arg as the most expensive one that we have yet seen
+ expensiveArgCost = argx->gtCostEx;
+ expensiveArg = curInx;
+ expensiveArgTabEntry = curArgTabEntry;
+ }
+ }
+ }
+ }
+
+ noway_assert(expensiveArg != UINT_MAX);
+
+ // put the most expensive arg towards the beginning of the table
+
+ expensiveArgTabEntry->processed = true;
+
+ // place expensiveArgTabEntry at the begTab position by performing a swap
+ //
+ if (expensiveArg != begTab)
+ {
+ argTable[expensiveArg] = argTable[begTab];
+ argTable[begTab] = expensiveArgTabEntry;
+ }
+
+ begTab++;
+ argsRemaining--;
+
+ costsPrepared = true; // If we have more expensive arguments, don't re-evaluate the tree cost on the next loop
+ }
+
+ // The table should now be completely filled and thus begTab should now be adjacent to endTab
+ // and regArgsRemaining should be zero
+ assert(begTab == (endTab + 1));
+ assert(argsRemaining == 0);
+
+#if !FEATURE_FIXED_OUT_ARGS
+ // Finally build the regArgList
+ //
+ callTree->gtCall.regArgList = NULL;
+ callTree->gtCall.regArgListCount = regCount;
+
+ unsigned regInx = 0;
+ for (curInx = 0; curInx < argCount; curInx++)
+ {
+ fgArgTabEntryPtr curArgTabEntry = argTable[curInx];
+
+ if (curArgTabEntry->regNum != REG_STK)
+ {
+ // Encode the argument register in the register mask
+ //
+ callTree->gtCall.regArgList[regInx] = curArgTabEntry->regNum;
+ regInx++;
+ }
+ }
+#endif // !FEATURE_FIXED_OUT_ARGS
+
+ argsSorted = true;
+}
+
+//------------------------------------------------------------------------------
+// fgMakeTmpArgNode : This function creates a tmp var only if needed.
+// We need this to be done in order to enforce ordering
+// of the evaluation of arguments.
+//
+// Arguments:
+// tmpVarNum - the var num which we clone into the newly created temp var.
+//
+// Return Value:
+// the newly created temp var tree.
+
+GenTreePtr Compiler::fgMakeTmpArgNode(
+ unsigned tmpVarNum FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(const bool passedInRegisters))
+{
+ LclVarDsc* varDsc = &lvaTable[tmpVarNum];
+ assert(varDsc->lvIsTemp);
+ var_types type = varDsc->TypeGet();
+
+ // Create a copy of the temp to go into the late argument list
+ GenTreePtr arg = gtNewLclvNode(tmpVarNum, type);
+ GenTreePtr addrNode = nullptr;
+
+ if (varTypeIsStruct(type))
+ {
+
+#if defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_)
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+ arg->gtFlags |= GTF_DONT_CSE;
+
+#else // !FEATURE_UNIX_AMD64_STRUCT_PASSING
+ // Can this type be passed in a single register?
+ // If so, the following call will return the corresponding primitive type.
+ // Otherwise, it will return TYP_UNKNOWN and we will pass by reference.
+
+ bool passedInRegisters = false;
+ structPassingKind kind;
+ CORINFO_CLASS_HANDLE clsHnd = varDsc->lvVerTypeInfo.GetClassHandle();
+ var_types structBaseType = getPrimitiveTypeForStruct(lvaLclExactSize(tmpVarNum), clsHnd);
+
+ if (structBaseType != TYP_UNKNOWN)
+ {
+ passedInRegisters = true;
+ type = structBaseType;
+ }
+#endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+ // If it is passed in registers, don't get the address of the var. Make it a
+ // field instead. It will be loaded in registers with putarg_reg tree in lower.
+ if (passedInRegisters)
+ {
+ arg->ChangeOper(GT_LCL_FLD);
+ arg->gtType = type;
+ }
+ else
+ {
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ // TODO-Cleanup: Fix this - we should never have an address that is TYP_STRUCT.
+ var_types addrType = type;
+#else
+ var_types addrType = TYP_BYREF;
+#endif
+ arg = gtNewOperNode(GT_ADDR, addrType, arg);
+ addrNode = arg;
+
+#if FEATURE_MULTIREG_ARGS
+#ifdef _TARGET_ARM64_
+ assert(varTypeIsStruct(type));
+ if (lvaIsMultiregStruct(varDsc))
+ {
+ // ToDo-ARM64: Consider using: arg->ChangeOper(GT_LCL_FLD);
+ // as that is how FEATURE_UNIX_AMD64_STRUCT_PASSING works.
+ // We will create a GT_OBJ for the argument below.
+ // This will be passed by value in two registers.
+ assert(addrNode != nullptr);
+
+ // Create an Obj of the temp to use it as a call argument.
+ arg = gtNewObjNode(lvaGetStruct(tmpVarNum), arg);
+
+ // TODO-1stClassStructs: We should not need to set the GTF_DONT_CSE flag here;
+ // this is only to preserve former behavior (though some CSE'ing of struct
+ // values can be pessimizing, so enabling this may require some additional tuning).
+ arg->gtFlags |= GTF_DONT_CSE;
+ }
+#endif // _TARGET_ARM64_
+#endif // FEATURE_MULTIREG_ARGS
+ }
+
+#else // not (_TARGET_AMD64_ or _TARGET_ARM64_)
+
+ // other targets, we pass the struct by value
+ assert(varTypeIsStruct(type));
+
+ addrNode = gtNewOperNode(GT_ADDR, TYP_BYREF, arg);
+
+ // Get a new Obj node temp to use it as a call argument.
+ // gtNewObjNode will set the GTF_EXCEPT flag if this is not a local stack object.
+ arg = gtNewObjNode(lvaGetStruct(tmpVarNum), addrNode);
+
+#endif // not (_TARGET_AMD64_ or _TARGET_ARM64_)
+
+ } // (varTypeIsStruct(type))
+
+ if (addrNode != nullptr)
+ {
+ assert(addrNode->gtOper == GT_ADDR);
+
+ // This will prevent this LclVar from being optimized away
+ lvaSetVarAddrExposed(tmpVarNum);
+
+ // the child of a GT_ADDR is required to have this flag set
+ addrNode->gtOp.gtOp1->gtFlags |= GTF_DONT_CSE;
+ }
+
+ return arg;
+}
+
+void fgArgInfo::EvalArgsToTemps()
+{
+ assert(argsSorted == true);
+
+ unsigned regArgInx = 0;
+ // Now go through the argument table and perform the necessary evaluation into temps
+ GenTreeArgList* tmpRegArgNext = nullptr;
+ for (unsigned curInx = 0; curInx < argCount; curInx++)
+ {
+ fgArgTabEntryPtr curArgTabEntry = argTable[curInx];
+
+ GenTreePtr argx = curArgTabEntry->node;
+ GenTreePtr setupArg = nullptr;
+ GenTreePtr defArg;
+
+#if !FEATURE_FIXED_OUT_ARGS
+ // Only ever set for FEATURE_FIXED_OUT_ARGS
+ assert(curArgTabEntry->needPlace == false);
+
+ // On x86 and other archs that use push instructions to pass arguments:
+ // Only the register arguments need to be replaced with placeholder nodes.
+ // Stacked arguments are evaluated and pushed (or stored into the stack) in order.
+ //
+ if (curArgTabEntry->regNum == REG_STK)
+ continue;
+#endif
+
+ if (curArgTabEntry->needTmp)
+ {
+ unsigned tmpVarNum;
+
+ if (curArgTabEntry->isTmp == true)
+ {
+ // Create a copy of the temp to go into the late argument list
+ tmpVarNum = curArgTabEntry->tmpNum;
+ defArg = compiler->fgMakeTmpArgNode(tmpVarNum FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(
+ argTable[curInx]->structDesc.passedInRegisters));
+
+ // mark the original node as a late argument
+ argx->gtFlags |= GTF_LATE_ARG;
+ }
+ else
+ {
+ // Create a temp assignment for the argument
+ // Put the temp in the gtCallLateArgs list
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef DEBUG
+ if (compiler->verbose)
+ {
+ printf("Argument with 'side effect'...\n");
+ compiler->gtDispTree(argx);
+ }
+#endif
+
+#if defined(_TARGET_AMD64_) && !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ noway_assert(argx->gtType != TYP_STRUCT);
+#endif
+
+ tmpVarNum = compiler->lvaGrabTemp(true DEBUGARG("argument with side effect"));
+ if (argx->gtOper == GT_MKREFANY)
+ {
+ // For GT_MKREFANY, typically the actual struct copying does
+ // not have any side-effects and can be delayed. So instead
+ // of using a temp for the whole struct, we can just use a temp
+ // for operand that that has a side-effect
+ GenTreePtr operand;
+ if ((argx->gtOp.gtOp2->gtFlags & GTF_ALL_EFFECT) == 0)
+ {
+ operand = argx->gtOp.gtOp1;
+
+ // In the early argument evaluation, place an assignment to the temp
+ // from the source operand of the mkrefany
+ setupArg = compiler->gtNewTempAssign(tmpVarNum, operand);
+
+ // Replace the operand for the mkrefany with the new temp.
+ argx->gtOp.gtOp1 = compiler->gtNewLclvNode(tmpVarNum, operand->TypeGet());
+ }
+ else if ((argx->gtOp.gtOp1->gtFlags & GTF_ALL_EFFECT) == 0)
+ {
+ operand = argx->gtOp.gtOp2;
+
+ // In the early argument evaluation, place an assignment to the temp
+ // from the source operand of the mkrefany
+ setupArg = compiler->gtNewTempAssign(tmpVarNum, operand);
+
+ // Replace the operand for the mkrefany with the new temp.
+ argx->gtOp.gtOp2 = compiler->gtNewLclvNode(tmpVarNum, operand->TypeGet());
+ }
+ }
+
+ if (setupArg != nullptr)
+ {
+ // Now keep the mkrefany for the late argument list
+ defArg = argx;
+
+ // Clear the side-effect flags because now both op1 and op2 have no side-effects
+ defArg->gtFlags &= ~GTF_ALL_EFFECT;
+ }
+ else
+ {
+ setupArg = compiler->gtNewTempAssign(tmpVarNum, argx);
+
+ LclVarDsc* varDsc = compiler->lvaTable + tmpVarNum;
+
+#ifndef LEGACY_BACKEND
+ if (compiler->fgOrder == Compiler::FGOrderLinear)
+ {
+ // We'll reference this temporary variable just once
+ // when we perform the function call after
+ // setting up this argument.
+ varDsc->lvRefCnt = 1;
+ }
+#endif // !LEGACY_BACKEND
+
+ var_types lclVarType = genActualType(argx->gtType);
+ var_types scalarType = TYP_UNKNOWN;
+
+ if (setupArg->OperIsCopyBlkOp())
+ {
+ setupArg = compiler->fgMorphCopyBlock(setupArg);
+#ifdef _TARGET_ARM64_
+ // This scalar LclVar widening step is only performed for ARM64
+ //
+ CORINFO_CLASS_HANDLE clsHnd = compiler->lvaGetStruct(tmpVarNum);
+ unsigned structSize = varDsc->lvExactSize;
+
+ scalarType = compiler->getPrimitiveTypeForStruct(structSize, clsHnd);
+#endif // _TARGET_ARM64_
+ }
+
+ // scalarType can be set to a wider type for ARM64: (3 => 4) or (5,6,7 => 8)
+ if ((scalarType != TYP_UNKNOWN) && (scalarType != lclVarType))
+ {
+ // Create a GT_LCL_FLD using the wider type to go to the late argument list
+ defArg = compiler->gtNewLclFldNode(tmpVarNum, scalarType, 0);
+ }
+ else
+ {
+ // Create a copy of the temp to go to the late argument list
+ defArg = compiler->gtNewLclvNode(tmpVarNum, lclVarType);
+ }
+
+ curArgTabEntry->isTmp = true;
+ curArgTabEntry->tmpNum = tmpVarNum;
+
+#ifdef _TARGET_ARM_
+ // Previously we might have thought the local was promoted, and thus the 'COPYBLK'
+ // might have left holes in the used registers (see
+ // fgAddSkippedRegsInPromotedStructArg).
+ // Too bad we're not that smart for these intermediate temps...
+ if (isValidIntArgReg(curArgTabEntry->regNum) && (curArgTabEntry->numRegs > 1))
+ {
+ regNumber argReg = curArgTabEntry->regNum;
+ regMaskTP allUsedRegs = genRegMask(curArgTabEntry->regNum);
+ for (unsigned i = 1; i < curArgTabEntry->numRegs; i++)
+ {
+ argReg = genRegArgNext(argReg);
+ allUsedRegs |= genRegMask(argReg);
+ }
+#ifdef LEGACY_BACKEND
+ callTree->gtCall.gtCallRegUsedMask |= allUsedRegs;
+#endif // LEGACY_BACKEND
+ }
+#endif // _TARGET_ARM_
+ }
+
+ /* mark the assignment as a late argument */
+ setupArg->gtFlags |= GTF_LATE_ARG;
+
+#ifdef DEBUG
+ if (compiler->verbose)
+ {
+ printf("\n Evaluate to a temp:\n");
+ compiler->gtDispTree(setupArg);
+ }
+#endif
+ }
+ }
+ else // curArgTabEntry->needTmp == false
+ {
+ // On x86 -
+ // Only register args are replaced with placeholder nodes
+ // and the stack based arguments are evaluated and pushed in order.
+ //
+ // On Arm/x64 - When needTmp is false and needPlace is false,
+ // the non-register arguments are evaluated and stored in order.
+ // When needPlace is true we have a nested call that comes after
+ // this argument so we have to replace it in the gtCallArgs list
+ // (the initial argument evaluation list) with a placeholder.
+ //
+ if ((curArgTabEntry->regNum == REG_STK) && (curArgTabEntry->needPlace == false))
+ {
+ continue;
+ }
+
+ /* No temp needed - move the whole node to the gtCallLateArgs list */
+
+ /* The argument is deferred and put in the late argument list */
+
+ defArg = argx;
+
+ // Create a placeholder node to put in its place in gtCallLateArgs.
+
+ // For a struct type we also need to record the class handle of the arg.
+ CORINFO_CLASS_HANDLE clsHnd = NO_CLASS_HANDLE;
+
+#if defined(_TARGET_AMD64_) && !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+
+ // All structs are either passed (and retyped) as integral types, OR they
+ // are passed by reference.
+ noway_assert(argx->gtType != TYP_STRUCT);
+
+#else // !defined(_TARGET_AMD64_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+
+ if (varTypeIsStruct(defArg))
+ {
+ // Need a temp to walk any GT_COMMA nodes when searching for the clsHnd
+ GenTreePtr defArgTmp = defArg;
+
+ // The GT_OBJ may be be a child of a GT_COMMA.
+ while (defArgTmp->gtOper == GT_COMMA)
+ {
+ defArgTmp = defArgTmp->gtOp.gtOp2;
+ }
+ assert(varTypeIsStruct(defArgTmp));
+
+ // We handle two opcodes: GT_MKREFANY and GT_OBJ.
+ if (defArgTmp->gtOper == GT_MKREFANY)
+ {
+ clsHnd = compiler->impGetRefAnyClass();
+ }
+ else if (defArgTmp->gtOper == GT_OBJ)
+ {
+ clsHnd = defArgTmp->AsObj()->gtClass;
+ }
+ else
+ {
+ BADCODE("Unhandled struct argument tree in fgMorphArgs");
+ }
+ }
+
+#endif // !(defined(_TARGET_AMD64_) && !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING))
+
+ setupArg = compiler->gtNewArgPlaceHolderNode(defArg->gtType, clsHnd);
+
+ /* mark the placeholder node as a late argument */
+ setupArg->gtFlags |= GTF_LATE_ARG;
+
+#ifdef DEBUG
+ if (compiler->verbose)
+ {
+ if (curArgTabEntry->regNum == REG_STK)
+ {
+ printf("Deferred stack argument :\n");
+ }
+ else
+ {
+ printf("Deferred argument ('%s'):\n", getRegName(curArgTabEntry->regNum));
+ }
+
+ compiler->gtDispTree(argx);
+ printf("Replaced with placeholder node:\n");
+ compiler->gtDispTree(setupArg);
+ }
+#endif
+ }
+
+ if (setupArg != nullptr)
+ {
+ if (curArgTabEntry->parent)
+ {
+ GenTreePtr parent = curArgTabEntry->parent;
+ /* a normal argument from the list */
+ noway_assert(parent->IsList());
+ noway_assert(parent->gtOp.gtOp1 == argx);
+
+ parent->gtOp.gtOp1 = setupArg;
+ }
+ else
+ {
+ /* must be the gtCallObjp */
+ noway_assert(callTree->gtCall.gtCallObjp == argx);
+
+ callTree->gtCall.gtCallObjp = setupArg;
+ }
+ }
+
+ /* deferred arg goes into the late argument list */
+
+ if (tmpRegArgNext == nullptr)
+ {
+ tmpRegArgNext = compiler->gtNewArgList(defArg);
+ callTree->gtCall.gtCallLateArgs = tmpRegArgNext;
+ }
+ else
+ {
+ noway_assert(tmpRegArgNext->IsList());
+ noway_assert(tmpRegArgNext->Current());
+ tmpRegArgNext->gtOp.gtOp2 = compiler->gtNewArgList(defArg);
+ tmpRegArgNext = tmpRegArgNext->Rest();
+ }
+
+ curArgTabEntry->node = defArg;
+ curArgTabEntry->lateArgInx = regArgInx++;
+ }
+
+#ifdef DEBUG
+ if (compiler->verbose)
+ {
+ printf("\nShuffled argument table: ");
+ for (unsigned curInx = 0; curInx < argCount; curInx++)
+ {
+ fgArgTabEntryPtr curArgTabEntry = argTable[curInx];
+
+ if (curArgTabEntry->regNum != REG_STK)
+ {
+ printf("%s ", getRegName(curArgTabEntry->regNum));
+ }
+ }
+ printf("\n");
+ }
+#endif
+}
+
+void fgArgInfo::RecordStkLevel(unsigned stkLvl)
+{
+ assert(!IsUninitialized(stkLvl));
+ this->stkLevel = stkLvl;
+}
+
+unsigned fgArgInfo::RetrieveStkLevel()
+{
+ assert(!IsUninitialized(stkLevel));
+ return stkLevel;
+}
+
+// Return a conservative estimate of the stack size in bytes.
+// It will be used only on the intercepted-for-host code path to copy the arguments.
+int Compiler::fgEstimateCallStackSize(GenTreeCall* call)
+{
+
+ int numArgs = 0;
+ for (GenTreeArgList* args = call->gtCallArgs; args; args = args->Rest())
+ {
+ numArgs++;
+ }
+
+ int numStkArgs;
+ if (numArgs > MAX_REG_ARG)
+ {
+ numStkArgs = numArgs - MAX_REG_ARG;
+ }
+ else
+ {
+ numStkArgs = 0;
+ }
+
+ return numStkArgs * REGSIZE_BYTES;
+}
+
+//------------------------------------------------------------------------------
+// fgMakeMultiUse : If the node is a local, clone it and increase the ref count
+// otherwise insert a comma form temp
+//
+// Arguments:
+// ppTree - a pointer to the child node we will be replacing with the comma expression that
+// evaluates ppTree to a temp and returns the result
+//
+// Return Value:
+// A fresh GT_LCL_VAR node referencing the temp which has not been used
+//
+// Assumption:
+// The result tree MUST be added to the tree structure since the ref counts are
+// already incremented.
+
+GenTree* Compiler::fgMakeMultiUse(GenTree** pOp)
+{
+ GenTree* tree = *pOp;
+ if (tree->IsLocal())
+ {
+ auto result = gtClone(tree);
+ if (lvaLocalVarRefCounted)
+ {
+ lvaTable[tree->gtLclVarCommon.gtLclNum].incRefCnts(compCurBB->getBBWeight(this), this);
+ }
+ return result;
+ }
+ else
+ {
+ GenTree* result = fgInsertCommaFormTemp(pOp);
+
+ // At this point, *pOp is GT_COMMA(GT_ASG(V01, *pOp), V01) and result = V01
+ // Therefore, the ref count has to be incremented 3 times for *pOp and result, if result will
+ // be added by the caller.
+ if (lvaLocalVarRefCounted)
+ {
+ lvaTable[result->gtLclVarCommon.gtLclNum].incRefCnts(compCurBB->getBBWeight(this), this);
+ lvaTable[result->gtLclVarCommon.gtLclNum].incRefCnts(compCurBB->getBBWeight(this), this);
+ lvaTable[result->gtLclVarCommon.gtLclNum].incRefCnts(compCurBB->getBBWeight(this), this);
+ }
+
+ return result;
+ }
+}
+
+//------------------------------------------------------------------------------
+// fgInsertCommaFormTemp: Create a new temporary variable to hold the result of *ppTree,
+// and replace *ppTree with comma(asg(newLcl, *ppTree), newLcl)
+//
+// Arguments:
+// ppTree - a pointer to the child node we will be replacing with the comma expression that
+// evaluates ppTree to a temp and returns the result
+//
+// structType - value type handle if the temp created is of TYP_STRUCT.
+//
+// Return Value:
+// A fresh GT_LCL_VAR node referencing the temp which has not been used
+//
+
+GenTree* Compiler::fgInsertCommaFormTemp(GenTree** ppTree, CORINFO_CLASS_HANDLE structType /*= nullptr*/)
+{
+ GenTree* subTree = *ppTree;
+
+ unsigned lclNum = lvaGrabTemp(true DEBUGARG("fgInsertCommaFormTemp is creating a new local variable"));
+
+ if (varTypeIsStruct(subTree))
+ {
+ assert(structType != nullptr);
+ lvaSetStruct(lclNum, structType, false);
+ }
+
+ // If subTree->TypeGet() == TYP_STRUCT, gtNewTempAssign() will create a GT_COPYBLK tree.
+ // The type of GT_COPYBLK is TYP_VOID. Therefore, we should use subTree->TypeGet() for
+ // setting type of lcl vars created.
+ GenTree* asg = gtNewTempAssign(lclNum, subTree);
+
+ GenTree* load = new (this, GT_LCL_VAR) GenTreeLclVar(subTree->TypeGet(), lclNum, BAD_IL_OFFSET);
+
+ GenTree* comma = gtNewOperNode(GT_COMMA, subTree->TypeGet(), asg, load);
+
+ *ppTree = comma;
+
+ return new (this, GT_LCL_VAR) GenTreeLclVar(subTree->TypeGet(), lclNum, BAD_IL_OFFSET);
+}
+
+//------------------------------------------------------------------------
+// fgMorphArgs: Walk and transform (morph) the arguments of a call
+//
+// Arguments:
+// callNode - the call for which we are doing the argument morphing
+//
+// Return Value:
+// Like most morph methods, this method returns the morphed node,
+// though in this case there are currently no scenarios where the
+// node itself is re-created.
+//
+// Notes:
+// This method is even less idempotent than most morph methods.
+// That is, it makes changes that should not be redone. It uses the existence
+// of gtCallLateArgs (the late arguments list) to determine if it has
+// already done that work.
+//
+// The first time it is called (i.e. during global morphing), this method
+// computes the "late arguments". This is when it determines which arguments
+// need to be evaluated to temps prior to the main argument setup, and which
+// can be directly evaluated into the argument location. It also creates a
+// second argument list (gtCallLateArgs) that does the final placement of the
+// arguments, e.g. into registers or onto the stack.
+//
+// The "non-late arguments", aka the gtCallArgs, are doing the in-order
+// evaluation of the arguments that might have side-effects, such as embedded
+// assignments, calls or possible throws. In these cases, it and earlier
+// arguments must be evaluated to temps.
+//
+// On targets with a fixed outgoing argument area (FEATURE_FIXED_OUT_ARGS),
+// if we have any nested calls, we need to defer the copying of the argument
+// into the fixed argument area until after the call. If the argument did not
+// otherwise need to be computed into a temp, it is moved to gtCallLateArgs and
+// replaced in the "early" arg list (gtCallArgs) with a placeholder node.
+
+#ifdef _PREFAST_
+#pragma warning(push)
+#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
+#endif
+GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
+{
+ GenTreeCall* call = callNode->AsCall();
+
+ GenTreePtr args;
+ GenTreePtr argx;
+
+ unsigned flagsSummary = 0;
+ unsigned genPtrArgCntSav = fgPtrArgCntCur;
+
+ unsigned argIndex = 0;
+
+ unsigned intArgRegNum = 0;
+ unsigned fltArgRegNum = 0;
+
+#ifdef _TARGET_ARM_
+ regMaskTP argSkippedRegMask = RBM_NONE;
+ regMaskTP fltArgSkippedRegMask = RBM_NONE;
+#endif // _TARGET_ARM_
+
+#if defined(_TARGET_X86_)
+ unsigned maxRegArgs = MAX_REG_ARG; // X86: non-const, must be calculated
+#else
+ const unsigned maxRegArgs = MAX_REG_ARG; // other arch: fixed constant number
+#endif
+
+ unsigned argSlots = 0;
+ unsigned nonRegPassedStructSlots = 0;
+ bool lateArgsComputed = (call->gtCallLateArgs != nullptr);
+ bool callHasRetBuffArg = call->HasRetBufArg();
+
+#ifndef _TARGET_X86_ // i.e. _TARGET_AMD64_ or _TARGET_ARM_
+ bool callIsVararg = call->IsVarargs();
+#endif
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ // If fgMakeOutgoingStructArgCopy is called and copies are generated, hasStackArgCopy is set
+ // to make sure to call EvalArgsToTemp. fgMakeOutgoingStructArgCopy just marks the argument
+ // to need a temp variable, and EvalArgsToTemp actually creates the temp variable node.
+ bool hasStackArgCopy = false;
+#endif
+
+#ifndef LEGACY_BACKEND
+ // Data structure for keeping track of non-standard args. Non-standard args are those that are not passed
+ // following the normal calling convention or in the normal argument registers. We either mark existing
+ // arguments as non-standard (such as the x8 return buffer register on ARM64), or we manually insert the
+ // non-standard arguments into the argument list, below.
+ class NonStandardArgs
+ {
+ struct NonStandardArg
+ {
+ regNumber reg; // The register to be assigned to this non-standard argument.
+ GenTree* node; // The tree node representing this non-standard argument.
+ // Note that this must be updated if the tree node changes due to morphing!
+ };
+
+ ArrayStack<NonStandardArg> args;
+
+ public:
+ NonStandardArgs(Compiler* compiler) : args(compiler, 3) // We will have at most 3 non-standard arguments
+ {
+ }
+
+ //-----------------------------------------------------------------------------
+ // Add: add a non-standard argument to the table of non-standard arguments
+ //
+ // Arguments:
+ // node - a GenTree node that has a non-standard argument.
+ // reg - the register to assign to this node.
+ //
+ // Return Value:
+ // None.
+ //
+ void Add(GenTree* node, regNumber reg)
+ {
+ NonStandardArg nsa = {reg, node};
+ args.Push(nsa);
+ }
+
+ //-----------------------------------------------------------------------------
+ // Find: Look for a GenTree* in the set of non-standard args.
+ //
+ // Arguments:
+ // node - a GenTree node to look for
+ //
+ // Return Value:
+ // The index of the non-standard argument (a non-negative, unique, stable number).
+ // If the node is not a non-standard argument, return -1.
+ //
+ int Find(GenTree* node)
+ {
+ for (int i = 0; i < args.Height(); i++)
+ {
+ if (node == args.Index(i).node)
+ {
+ return i;
+ }
+ }
+ return -1;
+ }
+
+ //-----------------------------------------------------------------------------
+ // FindReg: Look for a GenTree node in the non-standard arguments set. If found,
+ // set the register to use for the node.
+ //
+ // Arguments:
+ // node - a GenTree node to look for
+ // pReg - an OUT argument. *pReg is set to the non-standard register to use if
+ // 'node' is found in the non-standard argument set.
+ //
+ // Return Value:
+ // 'true' if 'node' is a non-standard argument. In this case, *pReg is set to the
+ // register to use.
+ // 'false' otherwise (in this case, *pReg is unmodified).
+ //
+ bool FindReg(GenTree* node, regNumber* pReg)
+ {
+ for (int i = 0; i < args.Height(); i++)
+ {
+ NonStandardArg& nsa = args.IndexRef(i);
+ if (node == nsa.node)
+ {
+ *pReg = nsa.reg;
+ return true;
+ }
+ }
+ return false;
+ }
+
+ //-----------------------------------------------------------------------------
+ // Replace: Replace the non-standard argument node at a given index. This is done when
+ // the original node was replaced via morphing, but we need to continue to assign a
+ // particular non-standard arg to it.
+ //
+ // Arguments:
+ // index - the index of the non-standard arg. It must exist.
+ // node - the new GenTree node.
+ //
+ // Return Value:
+ // None.
+ //
+ void Replace(int index, GenTree* node)
+ {
+ args.IndexRef(index).node = node;
+ }
+
+ } nonStandardArgs(this);
+#endif // !LEGACY_BACKEND
+
+ // Count of args. On first morph, this is counted before we've filled in the arg table.
+ // On remorph, we grab it from the arg table.
+ unsigned numArgs = 0;
+
+ // Process the late arguments (which were determined by a previous caller).
+ // Do this before resetting fgPtrArgCntCur as fgMorphTree(call->gtCallLateArgs)
+ // may need to refer to it.
+ if (lateArgsComputed)
+ {
+ // We need to reMorph the gtCallLateArgs early since that is what triggers
+ // the expression folding and we need to have the final folded gtCallLateArgs
+ // available when we call RemorphRegArg so that we correctly update the fgArgInfo
+ // with the folded tree that represents the final optimized argument nodes.
+ //
+ // However if a range-check needs to be generated for any of these late
+ // arguments we also need to "know" what the stack depth will be when we generate
+ // code to branch to the throw range check failure block as that is part of the
+ // GC information contract for that block.
+ //
+ // Since the late arguments are evaluated last we have pushed all of the
+ // other arguments on the stack before we evaluate these late arguments,
+ // so we record the stack depth on the first morph call when lateArgsComputed
+ // was false (via RecordStkLevel) and then retrieve that value here (via RetrieveStkLevel)
+ //
+ unsigned callStkLevel = call->fgArgInfo->RetrieveStkLevel();
+ fgPtrArgCntCur += callStkLevel;
+ call->gtCallLateArgs = fgMorphTree(call->gtCallLateArgs)->AsArgList();
+ flagsSummary |= call->gtCallLateArgs->gtFlags;
+ fgPtrArgCntCur -= callStkLevel;
+ assert(call->fgArgInfo != nullptr);
+ call->fgArgInfo->RemorphReset();
+
+ numArgs = call->fgArgInfo->ArgCount();
+ }
+ else
+ {
+ // First we need to count the args
+ if (call->gtCallObjp)
+ {
+ numArgs++;
+ }
+ for (args = call->gtCallArgs; (args != nullptr); args = args->gtOp.gtOp2)
+ {
+ numArgs++;
+ }
+
+ // Insert or mark non-standard args. These are either outside the normal calling convention, or
+ // arguments registers that don't follow the normal progression of argument registers in the calling
+ // convention (such as for the ARM64 fixed return buffer argument x8).
+ //
+ // *********** NOTE *************
+ // The logic here must remain in sync with GetNonStandardAddedArgCount(), which is used to map arguments
+ // in the implementation of fast tail call.
+ // *********** END NOTE *********
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if !defined(LEGACY_BACKEND) && defined(_TARGET_X86_)
+ // The x86 CORINFO_HELP_INIT_PINVOKE_FRAME helper has a custom calling convention. Set the argument registers
+ // correctly here.
+ if (call->IsHelperCall(this, CORINFO_HELP_INIT_PINVOKE_FRAME))
+ {
+ GenTreeArgList* args = call->gtCallArgs;
+ GenTree* arg1 = args->Current();
+ assert(arg1 != nullptr);
+ nonStandardArgs.Add(arg1, REG_PINVOKE_FRAME);
+ }
+ // The x86 shift helpers have custom calling conventions and expect the lo part of the long to be in EAX and the
+ // hi part to be in EDX. This sets the argument registers up correctly.
+ else if (call->IsHelperCall(this, CORINFO_HELP_LLSH) || call->IsHelperCall(this, CORINFO_HELP_LRSH) || call->IsHelperCall(this, CORINFO_HELP_LRSZ))
+ {
+ GenTreeArgList* args = call->gtCallArgs;
+ GenTree* arg1 = args->Current();
+ assert(arg1 != nullptr);
+ nonStandardArgs.Add(arg1, REG_LNGARG_LO);
+
+ args = args->Rest();
+ GenTree* arg2 = args->Current();
+ assert(arg2 != nullptr);
+ nonStandardArgs.Add(arg2, REG_LNGARG_HI);
+ }
+#endif // !defined(LEGACY_BACKEND) && defined(_TARGET_X86_)
+
+#if !defined(LEGACY_BACKEND) && !defined(_TARGET_X86_)
+ // TODO-X86-CQ: Currently RyuJIT/x86 passes args on the stack, so this is not needed.
+ // If/when we change that, the following code needs to be changed to correctly support the (TBD) managed calling
+ // convention for x86/SSE.
+
+ // If we have a Fixed Return Buffer argument register then we setup a non-standard argument for it
+ //
+ if (hasFixedRetBuffReg() && call->HasRetBufArg())
+ {
+ args = call->gtCallArgs;
+ assert(args != nullptr);
+ assert(args->IsList());
+
+ argx = call->gtCallArgs->Current();
+
+ // We don't increment numArgs here, since we already counted this argument above.
+
+ nonStandardArgs.Add(argx, theFixedRetBuffReg());
+ }
+
+ // We are allowed to have a Fixed Return Buffer argument combined
+ // with any of the remaining non-standard arguments
+ //
+ if (call->IsUnmanaged() && !opts.ShouldUsePInvokeHelpers())
+ {
+ assert(!call->gtCallCookie);
+ // Add a conservative estimate of the stack size in a special parameter (r11) at the call site.
+ // It will be used only on the intercepted-for-host code path to copy the arguments.
+
+ GenTree* cns = new (this, GT_CNS_INT) GenTreeIntCon(TYP_I_IMPL, fgEstimateCallStackSize(call));
+ call->gtCallArgs = gtNewListNode(cns, call->gtCallArgs);
+ numArgs++;
+
+ nonStandardArgs.Add(cns, REG_PINVOKE_COOKIE_PARAM);
+ }
+ else if (call->IsVirtualStub() && (call->gtCallType == CT_INDIRECT) && !call->IsTailCallViaHelper())
+ {
+ // indirect VSD stubs need the base of the indirection cell to be
+ // passed in addition. At this point that is the value in gtCallAddr.
+ // The actual call target will be derived from gtCallAddr in call
+ // lowering.
+
+ // If it is a VSD call getting dispatched via tail call helper,
+ // fgMorphTailCall() would materialize stub addr as an additional
+ // parameter added to the original arg list and hence no need to
+ // add as a non-standard arg.
+
+ GenTree* arg = call->gtCallAddr;
+ if (arg->OperIsLocal())
+ {
+ arg = gtClone(arg, true);
+ }
+ else
+ {
+ call->gtCallAddr = fgInsertCommaFormTemp(&arg);
+ call->gtFlags |= GTF_ASG;
+ }
+ noway_assert(arg != nullptr);
+
+ // And push the stub address onto the list of arguments
+ call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs);
+ numArgs++;
+
+ nonStandardArgs.Add(arg, REG_VIRTUAL_STUB_PARAM);
+ }
+ else if (call->gtCallType == CT_INDIRECT && call->gtCallCookie)
+ {
+ assert(!call->IsUnmanaged());
+
+ // put cookie into R11
+ GenTree* arg = call->gtCallCookie;
+ noway_assert(arg != nullptr);
+ call->gtCallCookie = nullptr;
+
+ call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs);
+ numArgs++;
+
+ nonStandardArgs.Add(arg, REG_PINVOKE_COOKIE_PARAM);
+
+ // put destination into R10
+ arg = gtClone(call->gtCallAddr, true);
+ call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs);
+ numArgs++;
+
+ nonStandardArgs.Add(arg, REG_PINVOKE_TARGET_PARAM);
+
+ // finally change this call to a helper call
+ call->gtCallType = CT_HELPER;
+ call->gtCallMethHnd = eeFindHelper(CORINFO_HELP_PINVOKE_CALLI);
+ }
+#endif // !defined(LEGACY_BACKEND) && !defined(_TARGET_X86_)
+
+ // Allocate the fgArgInfo for the call node;
+ //
+ call->fgArgInfo = new (this, CMK_Unknown) fgArgInfo(this, call, numArgs);
+ }
+
+ if (varTypeIsStruct(call))
+ {
+ fgFixupStructReturn(call);
+ }
+
+ /* First we morph the argument subtrees ('this' pointer, arguments, etc.).
+ * During the first call to fgMorphArgs we also record the
+ * information about late arguments we have in 'fgArgInfo'.
+ * This information is used later to contruct the gtCallLateArgs */
+
+ /* Process the 'this' argument value, if present */
+
+ argx = call->gtCallObjp;
+
+ if (argx)
+ {
+ argx = fgMorphTree(argx);
+ call->gtCallObjp = argx;
+ flagsSummary |= argx->gtFlags;
+
+ assert(call->gtCallType == CT_USER_FUNC || call->gtCallType == CT_INDIRECT);
+
+ assert(argIndex == 0);
+
+ /* We must fill in or update the argInfo table */
+
+ if (lateArgsComputed)
+ {
+ /* this is a register argument - possibly update it in the table */
+ call->fgArgInfo->RemorphRegArg(argIndex, argx, nullptr, genMapIntRegArgNumToRegNum(intArgRegNum), 1, 1);
+ }
+ else
+ {
+ assert(varTypeIsGC(call->gtCallObjp->gtType) || (call->gtCallObjp->gtType == TYP_I_IMPL));
+
+ /* this is a register argument - put it in the table */
+ call->fgArgInfo->AddRegArg(argIndex, argx, nullptr, genMapIntRegArgNumToRegNum(intArgRegNum), 1, 1
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ ,
+ false, REG_STK, nullptr
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+ );
+ }
+ // this can't be a struct.
+ assert(argx->gtType != TYP_STRUCT);
+
+ /* Increment the argument register count and argument index */
+ if (!varTypeIsFloating(argx->gtType) || opts.compUseSoftFP)
+ {
+ intArgRegNum++;
+#ifdef WINDOWS_AMD64_ABI
+ // Whenever we pass an integer register argument
+ // we skip the corresponding floating point register argument
+ fltArgRegNum++;
+#endif // WINDOWS_AMD64_ABI
+ }
+ else
+ {
+ noway_assert(!"the 'this' pointer can not be a floating point type");
+ }
+ argIndex++;
+ argSlots++;
+ }
+
+#ifdef _TARGET_X86_
+ // Compute the maximum number of arguments that can be passed in registers.
+ // For X86 we handle the varargs and unmanaged calling conventions
+
+ if (call->gtFlags & GTF_CALL_POP_ARGS)
+ {
+ noway_assert(intArgRegNum < MAX_REG_ARG);
+ // No more register arguments for varargs (CALL_POP_ARGS)
+ maxRegArgs = intArgRegNum;
+
+ // Add in the ret buff arg
+ if (callHasRetBuffArg)
+ maxRegArgs++;
+ }
+
+ if (call->IsUnmanaged())
+ {
+ noway_assert(intArgRegNum == 0);
+
+ if (call->gtCallMoreFlags & GTF_CALL_M_UNMGD_THISCALL)
+ {
+ noway_assert(call->gtCallArgs->gtOp.gtOp1->TypeGet() == TYP_I_IMPL ||
+ call->gtCallArgs->gtOp.gtOp1->TypeGet() == TYP_BYREF ||
+ call->gtCallArgs->gtOp.gtOp1->gtOper ==
+ GT_NOP); // the arg was already morphed to a register (fgMorph called twice)
+ maxRegArgs = 1;
+ }
+ else
+ {
+ maxRegArgs = 0;
+ }
+
+ // Add in the ret buff arg
+ if (callHasRetBuffArg)
+ maxRegArgs++;
+ }
+#endif // _TARGET_X86_
+
+ /* Morph the user arguments */
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if defined(_TARGET_ARM_)
+
+ // The ARM ABI has a concept of back-filling of floating-point argument registers, according
+ // to the "Procedure Call Standard for the ARM Architecture" document, especially
+ // section 6.1.2.3 "Parameter passing". Back-filling is where floating-point argument N+1 can
+ // appear in a lower-numbered register than floating point argument N. That is, argument
+ // register allocation is not strictly increasing. To support this, we need to keep track of unused
+ // floating-point argument registers that we can back-fill. We only support 4-byte float and
+ // 8-byte double types, and one to four element HFAs composed of these types. With this, we will
+ // only back-fill single registers, since there is no way with these types to create
+ // an alignment hole greater than one register. However, there can be up to 3 back-fill slots
+ // available (with 16 FP argument registers). Consider this code:
+ //
+ // struct HFA { float x, y, z; }; // a three element HFA
+ // void bar(float a1, // passed in f0
+ // double a2, // passed in f2/f3; skip f1 for alignment
+ // HFA a3, // passed in f4/f5/f6
+ // double a4, // passed in f8/f9; skip f7 for alignment. NOTE: it doesn't fit in the f1 back-fill slot
+ // HFA a5, // passed in f10/f11/f12
+ // double a6, // passed in f14/f15; skip f13 for alignment. NOTE: it doesn't fit in the f1 or f7 back-fill
+ // // slots
+ // float a7, // passed in f1 (back-filled)
+ // float a8, // passed in f7 (back-filled)
+ // float a9, // passed in f13 (back-filled)
+ // float a10) // passed on the stack in [OutArg+0]
+ //
+ // Note that if we ever support FP types with larger alignment requirements, then there could
+ // be more than single register back-fills.
+ //
+ // Once we assign a floating-pointer register to the stack, they all must be on the stack.
+ // See "Procedure Call Standard for the ARM Architecture", section 6.1.2.3, "The back-filling
+ // continues only so long as no VFP CPRC has been allocated to a slot on the stack."
+ // We set anyFloatStackArgs to true when a floating-point argument has been assigned to the stack
+ // and prevent any additional floating-point arguments from going in registers.
+
+ bool anyFloatStackArgs = false;
+
+#endif // _TARGET_ARM_
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc;
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+ bool hasStructArgument = false; // @TODO-ARM64-UNIX: Remove this bool during a future refactoring
+ bool hasMultiregStructArgs = false;
+ for (args = call->gtCallArgs; args; args = args->gtOp.gtOp2, argIndex++)
+ {
+ GenTreePtr* parentArgx = &args->gtOp.gtOp1;
+
+#if FEATURE_MULTIREG_ARGS
+ if (!hasStructArgument)
+ {
+ hasStructArgument = varTypeIsStruct(args->gtOp.gtOp1);
+ }
+#endif // FEATURE_MULTIREG_ARGS
+
+#ifndef LEGACY_BACKEND
+ // Record the index of any nonStandard arg that we may be processing here, as we are
+ // about to call fgMorphTree on it and fgMorphTree may replace it with a new tree.
+ GenTreePtr orig_argx = *parentArgx;
+ int nonStandard_index = nonStandardArgs.Find(orig_argx);
+#endif // !LEGACY_BACKEND
+
+ argx = fgMorphTree(*parentArgx);
+ *parentArgx = argx;
+ flagsSummary |= argx->gtFlags;
+
+ assert(args->IsList());
+ assert(argx == args->Current());
+
+#ifndef LEGACY_BACKEND
+ if ((nonStandard_index != -1) && (argx != orig_argx))
+ {
+ // We need to update the node field for this nonStandard arg here
+ // as it was changed by the call to fgMorphTree
+ nonStandardArgs.Replace(nonStandard_index, argx);
+ }
+#endif // !LEGACY_BACKEND
+
+ /* Change the node to TYP_I_IMPL so we don't report GC info
+ * NOTE: We deferred this from the importer because of the inliner */
+
+ if (argx->IsVarAddr())
+ {
+ argx->gtType = TYP_I_IMPL;
+ }
+
+ bool passUsingFloatRegs;
+ unsigned argAlign = 1;
+ // Setup any HFA information about 'argx'
+ var_types hfaType = GetHfaType(argx);
+ bool isHfaArg = varTypeIsFloating(hfaType);
+ unsigned hfaSlots = 0;
+
+ if (isHfaArg)
+ {
+ hfaSlots = GetHfaCount(argx);
+
+ // If we have a HFA struct it's possible we transition from a method that originally
+ // only had integer types to now start having FP types. We have to communicate this
+ // through this flag since LSRA later on will use this flag to determine whether
+ // or not to track the FP register set.
+ //
+ compFloatingPointUsed = true;
+ }
+
+ unsigned size = 0;
+ CORINFO_CLASS_HANDLE copyBlkClass = nullptr;
+ bool isRegArg = false;
+
+ fgArgTabEntryPtr argEntry = nullptr;
+
+ if (lateArgsComputed)
+ {
+ argEntry = gtArgEntryByArgNum(call, argIndex);
+ }
+
+#ifdef _TARGET_ARM_
+
+ bool passUsingIntRegs;
+ if (lateArgsComputed)
+ {
+ passUsingFloatRegs = isValidFloatArgReg(argEntry->regNum);
+ passUsingIntRegs = isValidIntArgReg(argEntry->regNum);
+ }
+ else
+ {
+ passUsingFloatRegs = !callIsVararg && (isHfaArg || varTypeIsFloating(argx)) && !opts.compUseSoftFP;
+ passUsingIntRegs = passUsingFloatRegs ? false : (intArgRegNum < MAX_REG_ARG);
+ }
+
+ GenTreePtr curArg = argx;
+ // If late args have already been computed, use the node in the argument table.
+ if (argEntry != NULL && argEntry->isTmp)
+ {
+ curArg = argEntry->node;
+ }
+
+ // We don't use the "size" return value from InferOpSizeAlign().
+ codeGen->InferOpSizeAlign(curArg, &argAlign);
+
+ argAlign = roundUp(argAlign, TARGET_POINTER_SIZE);
+ argAlign /= TARGET_POINTER_SIZE;
+
+ if (argAlign == 2)
+ {
+ if (passUsingFloatRegs)
+ {
+ if (fltArgRegNum % 2 == 1)
+ {
+ fltArgSkippedRegMask |= genMapArgNumToRegMask(fltArgRegNum, TYP_FLOAT);
+ fltArgRegNum++;
+ }
+ }
+ else if (passUsingIntRegs)
+ {
+ if (intArgRegNum % 2 == 1)
+ {
+ argSkippedRegMask |= genMapArgNumToRegMask(intArgRegNum, TYP_I_IMPL);
+ intArgRegNum++;
+ }
+ }
+
+ if (argSlots % 2 == 1)
+ {
+ argSlots++;
+ }
+ }
+
+#elif defined(_TARGET_ARM64_)
+
+ if (lateArgsComputed)
+ {
+ passUsingFloatRegs = isValidFloatArgReg(argEntry->regNum);
+ }
+ else
+ {
+ passUsingFloatRegs = !callIsVararg && (isHfaArg || varTypeIsFloating(argx));
+ }
+
+#elif defined(_TARGET_AMD64_)
+#if defined(UNIX_AMD64_ABI)
+ if (lateArgsComputed)
+ {
+ passUsingFloatRegs = isValidFloatArgReg(argEntry->regNum);
+ }
+ else
+ {
+ passUsingFloatRegs = varTypeIsFloating(argx);
+ }
+#else // WINDOWS_AMD64_ABI
+ passUsingFloatRegs = varTypeIsFloating(argx);
+#endif // !UNIX_AMD64_ABI
+#elif defined(_TARGET_X86_)
+
+ passUsingFloatRegs = false;
+
+#else
+#error Unsupported or unset target architecture
+#endif // _TARGET_*
+
+ bool isBackFilled = false;
+ unsigned nextFltArgRegNum = fltArgRegNum; // This is the next floating-point argument register number to use
+ var_types structBaseType = TYP_STRUCT;
+ unsigned structSize = 0;
+
+ bool isStructArg = varTypeIsStruct(argx);
+
+ if (lateArgsComputed)
+ {
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ // Get the struct description for the already completed struct argument.
+ fgArgTabEntryPtr fgEntryPtr = gtArgEntryByNode(call, argx);
+ assert(fgEntryPtr != nullptr);
+
+ // As described in few other places, this can happen when the argx was morphed
+ // into an arg setup node - COPYBLK. The COPYBLK has always a type of void.
+ // In such case the fgArgTabEntry keeps track of whether the original node (before morphing)
+ // was a struct and the struct classification.
+ isStructArg = fgEntryPtr->isStruct;
+
+ if (isStructArg)
+ {
+ structDesc.CopyFrom(fgEntryPtr->structDesc);
+ }
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+
+ assert(argEntry != nullptr);
+ if (argEntry->IsBackFilled())
+ {
+ isRegArg = true;
+ size = argEntry->numRegs;
+ nextFltArgRegNum = genMapFloatRegNumToRegArgNum(argEntry->regNum);
+ assert(size == 1);
+ isBackFilled = true;
+ }
+ else if (argEntry->regNum == REG_STK)
+ {
+ isRegArg = false;
+ assert(argEntry->numRegs == 0);
+ size = argEntry->numSlots;
+ }
+ else
+ {
+ isRegArg = true;
+ assert(argEntry->numRegs > 0);
+ size = argEntry->numRegs + argEntry->numSlots;
+ }
+
+ // This size has now been computed
+ assert(size != 0);
+ }
+ else // !lateArgsComputed
+ {
+ //
+ // Figure out the size of the argument. This is either in number of registers, or number of
+ // TARGET_POINTER_SIZE stack slots, or the sum of these if the argument is split between the registers and
+ // the stack.
+ //
+ if (argx->IsArgPlaceHolderNode() || (!isStructArg))
+ {
+#if defined(_TARGET_AMD64_)
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ if (!isStructArg)
+ {
+ size = 1; // On AMD64, all primitives fit in a single (64-bit) 'slot'
+ }
+ else
+ {
+ size = (unsigned)(roundUp(info.compCompHnd->getClassSize(argx->gtArgPlace.gtArgPlaceClsHnd),
+ TARGET_POINTER_SIZE)) /
+ TARGET_POINTER_SIZE;
+ eeGetSystemVAmd64PassStructInRegisterDescriptor(argx->gtArgPlace.gtArgPlaceClsHnd, &structDesc);
+ if (size > 1)
+ {
+ hasMultiregStructArgs = true;
+ }
+ }
+#else // !FEATURE_UNIX_AMD64_STRUCT_PASSING
+ size = 1; // On AMD64, all primitives fit in a single (64-bit) 'slot'
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+#elif defined(_TARGET_ARM64_)
+ if (isStructArg)
+ {
+ if (isHfaArg)
+ {
+ size = GetHfaCount(argx);
+ // HFA structs are passed by value in multiple registers
+ hasMultiregStructArgs = true;
+ }
+ else
+ {
+ // Structs are either passed in 1 or 2 (64-bit) slots
+ size = (unsigned)(roundUp(info.compCompHnd->getClassSize(argx->gtArgPlace.gtArgPlaceClsHnd),
+ TARGET_POINTER_SIZE)) /
+ TARGET_POINTER_SIZE;
+
+ if (size == 2)
+ {
+ // Structs that are the size of 2 pointers are passed by value in multiple registers
+ hasMultiregStructArgs = true;
+ }
+ else if (size > 2)
+ {
+ size = 1; // Structs that are larger that 2 pointers (except for HFAs) are passed by
+ // reference (to a copy)
+ }
+ }
+ // Note that there are some additional rules for multireg structs.
+ // (i.e they cannot be split between registers and the stack)
+ }
+ else
+ {
+ size = 1; // Otherwise, all primitive types fit in a single (64-bit) 'slot'
+ }
+#elif defined(_TARGET_ARM_)
+ if (isStructArg)
+ {
+ size = (unsigned)(roundUp(info.compCompHnd->getClassSize(argx->gtArgPlace.gtArgPlaceClsHnd),
+ TARGET_POINTER_SIZE)) /
+ TARGET_POINTER_SIZE;
+ }
+ else
+ {
+ // The typical case
+ size = genTypeStSz(argx->gtType);
+ }
+#elif defined(_TARGET_X86_)
+ size = genTypeStSz(argx->gtType);
+#else
+#error Unsupported or unset target architecture
+#endif // _TARGET_XXX_
+ }
+#ifdef _TARGET_ARM_
+ else if (isHfaArg)
+ {
+ size = GetHfaCount(argx);
+ }
+#endif // _TARGET_ARM_
+ else // struct type
+ {
+ // We handle two opcodes: GT_MKREFANY and GT_OBJ
+ if (argx->gtOper == GT_MKREFANY)
+ {
+ if (varTypeIsStruct(argx))
+ {
+ isStructArg = true;
+ }
+#ifdef _TARGET_AMD64_
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ if (varTypeIsStruct(argx))
+ {
+ size = info.compCompHnd->getClassSize(impGetRefAnyClass());
+ unsigned roundupSize = (unsigned)roundUp(size, TARGET_POINTER_SIZE);
+ size = roundupSize / TARGET_POINTER_SIZE;
+ eeGetSystemVAmd64PassStructInRegisterDescriptor(impGetRefAnyClass(), &structDesc);
+ }
+ else
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ {
+ size = 1;
+ }
+#else
+ size = 2;
+#endif
+ }
+ else // We must have a GT_OBJ with a struct type, but the GT_OBJ may be be a child of a GT_COMMA
+ {
+ GenTreePtr argObj = argx;
+ GenTreePtr* parentOfArgObj = parentArgx;
+
+ assert(args->IsList());
+ assert(argx == args->Current());
+
+ /* The GT_OBJ may be be a child of a GT_COMMA */
+ while (argObj->gtOper == GT_COMMA)
+ {
+ parentOfArgObj = &argObj->gtOp.gtOp2;
+ argObj = argObj->gtOp.gtOp2;
+ }
+
+ // TODO-1stClassStructs: An OBJ node should not be required for lclVars.
+ if (argObj->gtOper != GT_OBJ)
+ {
+ BADCODE("illegal argument tree in fgMorphArgs");
+ }
+
+ CORINFO_CLASS_HANDLE objClass = argObj->gtObj.gtClass;
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ eeGetSystemVAmd64PassStructInRegisterDescriptor(objClass, &structDesc);
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+ unsigned originalSize = info.compCompHnd->getClassSize(objClass);
+ originalSize = (originalSize == 0 ? TARGET_POINTER_SIZE : originalSize);
+ unsigned roundupSize = (unsigned)roundUp(originalSize, TARGET_POINTER_SIZE);
+
+ structSize = originalSize;
+
+ structPassingKind howToPassStruct;
+ structBaseType = getArgTypeForStruct(objClass, &howToPassStruct, originalSize);
+
+#ifdef _TARGET_ARM64_
+ if ((howToPassStruct == SPK_PrimitiveType) && // Passed in a single register
+ !isPow2(originalSize)) // size is 3,5,6 or 7 bytes
+ {
+ if (argObj->gtObj.gtOp1->IsVarAddr()) // Is the source a LclVar?
+ {
+ // For ARM64 we pass structs that are 3,5,6,7 bytes in size
+ // we can read 4 or 8 bytes from the LclVar to pass this arg
+ originalSize = genTypeSize(structBaseType);
+ }
+ }
+#endif // _TARGET_ARM64_
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ // On System V OS-es a struct is never passed by reference.
+ // It is either passed by value on the stack or in registers.
+ bool passStructInRegisters = false;
+#else // !FEATURE_UNIX_AMD64_STRUCT_PASSING
+ bool passStructByRef = false;
+#endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+ // The following if-then-else needs to be carefully refactored.
+ // Basically the else portion wants to turn a struct load (a GT_OBJ)
+ // into a GT_IND of the appropriate size.
+ // It can do this with structs sizes that are 1, 2, 4, or 8 bytes.
+ // It can't do this when FEATURE_UNIX_AMD64_STRUCT_PASSING is defined (Why?)
+ // TODO-Cleanup: Remove the #ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING below.
+ // It also can't do this if we have a HFA arg,
+ // unless we have a 1-elem HFA in which case we want to do the optimization.
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifndef _TARGET_X86_
+#ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ // Check for struct argument with size 1, 2, 4 or 8 bytes
+ // As we can optimize these by turning them into a GT_IND of the correct type
+ //
+ // Check for cases that we cannot optimize:
+ //
+ if ((originalSize > TARGET_POINTER_SIZE) || // it is struct that is larger than a pointer
+ !isPow2(originalSize) || // it is not a power of two (1, 2, 4 or 8)
+ (isHfaArg && (hfaSlots != 1))) // it is a one element HFA struct
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+ {
+ // Normalize 'size' to the number of pointer sized items
+ // 'size' is the number of register slots that we will use to pass the argument
+ size = roundupSize / TARGET_POINTER_SIZE;
+#if defined(_TARGET_AMD64_)
+#ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ size = 1; // This must be copied to a temp and passed by address
+ passStructByRef = true;
+ copyBlkClass = objClass;
+#else // FEATURE_UNIX_AMD64_STRUCT_PASSING
+ if (!structDesc.passedInRegisters)
+ {
+ GenTreePtr lclVar = fgIsIndirOfAddrOfLocal(argObj);
+ bool needCpyBlk = false;
+ if (lclVar != nullptr)
+ {
+ // If the struct is promoted to registers, it has to be materialized
+ // on stack. We may want to support promoted structures in
+ // codegening pugarg_stk instead of creating a copy here.
+ LclVarDsc* varDsc = &lvaTable[lclVar->gtLclVarCommon.gtLclNum];
+ needCpyBlk = varDsc->lvPromoted;
+ }
+ else
+ {
+ // If simd16 comes from vector<t>, eeGetSystemVAmd64PassStructInRegisterDescriptor
+ // sets structDesc.passedInRegisters to be false.
+ //
+ // GT_ADDR(GT_SIMD) is not a rationalized IR form and is not handled
+ // by rationalizer. For now we will let SIMD struct arg to be copied to
+ // a local. As part of cpblk rewrite, rationalizer will handle GT_ADDR(GT_SIMD)
+ //
+ // +--* obj simd16
+ // | \--* addr byref
+ // | | /--* lclVar simd16 V05 loc4
+ // | \--* simd simd16 int -
+ // | \--* lclVar simd16 V08 tmp1
+ //
+ // TODO-Amd64-Unix: The rationalizer can be updated to handle this pattern,
+ // so that we don't need to generate a copy here.
+ GenTree* addr = argObj->gtOp.gtOp1;
+ if (addr->OperGet() == GT_ADDR)
+ {
+ GenTree* addrChild = addr->gtOp.gtOp1;
+ if (addrChild->OperGet() == GT_SIMD)
+ {
+ needCpyBlk = true;
+ }
+ }
+ }
+ passStructInRegisters = false;
+ if (needCpyBlk)
+ {
+ copyBlkClass = objClass;
+ }
+ else
+ {
+ copyBlkClass = NO_CLASS_HANDLE;
+ }
+ }
+ else
+ {
+ // The objClass is used to materialize the struct on stack.
+ // For SystemV, the code below generates copies for struct arguments classified
+ // as register argument.
+ // TODO-Amd64-Unix: We don't always need copies for this case. Struct arguments
+ // can be passed on registers or can be copied directly to outgoing area.
+ passStructInRegisters = true;
+ copyBlkClass = objClass;
+ }
+
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+#elif defined(_TARGET_ARM64_)
+ if ((size > 2) && !isHfaArg)
+ {
+ size = 1; // This must be copied to a temp and passed by address
+ passStructByRef = true;
+ copyBlkClass = objClass;
+ }
+#endif
+
+#ifdef _TARGET_ARM_
+ // If we're passing a promoted struct local var,
+ // we may need to skip some registers due to alignment; record those.
+ GenTreePtr lclVar = fgIsIndirOfAddrOfLocal(argObj);
+ if (lclVar != NULL)
+ {
+ LclVarDsc* varDsc = &lvaTable[lclVar->gtLclVarCommon.gtLclNum];
+ if (varDsc->lvPromoted)
+ {
+ assert(argObj->OperGet() == GT_OBJ);
+ if (lvaGetPromotionType(varDsc) == PROMOTION_TYPE_INDEPENDENT)
+ {
+ fgAddSkippedRegsInPromotedStructArg(varDsc, intArgRegNum, &argSkippedRegMask);
+ }
+ }
+ }
+#endif // _TARGET_ARM_
+ }
+#ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ // TODO-Amd64-Unix: Since the else part below is disabled for UNIX_AMD64, copies are always
+ // generated for struct 1, 2, 4, or 8.
+ else // We have a struct argument with size 1, 2, 4 or 8 bytes
+ {
+ // change our GT_OBJ into a GT_IND of the correct type.
+ // We've already ensured above that size is a power of 2, and less than or equal to pointer
+ // size.
+
+ assert(howToPassStruct == SPK_PrimitiveType);
+
+ // ToDo: remove this block as getArgTypeForStruct properly handles turning one element HFAs into
+ // primitives
+ if (isHfaArg)
+ {
+ // If we reach here with an HFA arg it has to be a one element HFA
+ assert(hfaSlots == 1);
+ structBaseType = hfaType; // change the indirection type to a floating point type
+ }
+
+ noway_assert(structBaseType != TYP_UNKNOWN);
+
+ argObj->ChangeOper(GT_IND);
+
+ // Now see if we can fold *(&X) into X
+ if (argObj->gtOp.gtOp1->gtOper == GT_ADDR)
+ {
+ GenTreePtr temp = argObj->gtOp.gtOp1->gtOp.gtOp1;
+
+ // Keep the DONT_CSE flag in sync
+ // (as the addr always marks it for its op1)
+ temp->gtFlags &= ~GTF_DONT_CSE;
+ temp->gtFlags |= (argObj->gtFlags & GTF_DONT_CSE);
+ DEBUG_DESTROY_NODE(argObj->gtOp.gtOp1); // GT_ADDR
+ DEBUG_DESTROY_NODE(argObj); // GT_IND
+
+ argObj = temp;
+ *parentOfArgObj = temp;
+
+ // If the OBJ had been the top level node, we've now changed argx.
+ if (parentOfArgObj == parentArgx)
+ {
+ argx = temp;
+ }
+ }
+ if (argObj->gtOper == GT_LCL_VAR)
+ {
+ unsigned lclNum = argObj->gtLclVarCommon.gtLclNum;
+ LclVarDsc* varDsc = &lvaTable[lclNum];
+
+ if (varDsc->lvPromoted)
+ {
+ if (varDsc->lvFieldCnt == 1)
+ {
+ // get the first and only promoted field
+ LclVarDsc* fieldVarDsc = &lvaTable[varDsc->lvFieldLclStart];
+ if (genTypeSize(fieldVarDsc->TypeGet()) >= originalSize)
+ {
+ // we will use the first and only promoted field
+ argObj->gtLclVarCommon.SetLclNum(varDsc->lvFieldLclStart);
+
+ if (varTypeCanReg(fieldVarDsc->TypeGet()) &&
+ (genTypeSize(fieldVarDsc->TypeGet()) == originalSize))
+ {
+ // Just use the existing field's type
+ argObj->gtType = fieldVarDsc->TypeGet();
+ }
+ else
+ {
+ // Can't use the existing field's type, so use GT_LCL_FLD to swizzle
+ // to a new type
+ argObj->ChangeOper(GT_LCL_FLD);
+ argObj->gtType = structBaseType;
+ }
+ assert(varTypeCanReg(argObj->TypeGet()));
+ assert(copyBlkClass == NO_CLASS_HANDLE);
+ }
+ else
+ {
+ // use GT_LCL_FLD to swizzle the single field struct to a new type
+ lvaSetVarDoNotEnregister(lclNum DEBUGARG(DNER_LocalField));
+ argObj->ChangeOper(GT_LCL_FLD);
+ argObj->gtType = structBaseType;
+ }
+ }
+ else
+ {
+ // The struct fits into a single register, but it has been promoted into its
+ // constituent fields, and so we have to re-assemble it
+ copyBlkClass = objClass;
+#ifdef _TARGET_ARM_
+ // Alignment constraints may cause us not to use (to "skip") some argument
+ // registers. Add those, if any, to the skipped (int) arg reg mask.
+ fgAddSkippedRegsInPromotedStructArg(varDsc, intArgRegNum, &argSkippedRegMask);
+#endif // _TARGET_ARM_
+ }
+ }
+ else if (!varTypeIsIntegralOrI(varDsc->TypeGet()))
+ {
+ // Not a promoted struct, so just swizzle the type by using GT_LCL_FLD
+ argObj->ChangeOper(GT_LCL_FLD);
+ argObj->gtType = structBaseType;
+ }
+ }
+ else
+ {
+ // Not a GT_LCL_VAR, so we can just change the type on the node
+ argObj->gtType = structBaseType;
+ }
+ assert(varTypeCanReg(argObj->TypeGet()) ||
+ ((copyBlkClass != NO_CLASS_HANDLE) && varTypeIsIntegral(structBaseType)));
+
+ size = 1;
+ }
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+#endif // not _TARGET_X86_
+ // We still have a struct unless we converted the GT_OBJ into a GT_IND above...
+ if ((structBaseType == TYP_STRUCT) &&
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ !passStructInRegisters
+#else // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ !passStructByRef
+#endif // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ )
+ {
+ if (isHfaArg && passUsingFloatRegs)
+ {
+ size = GetHfaCount(argx); // GetHfaCount returns number of elements in the HFA
+ }
+ else
+ {
+ // If the valuetype size is not a multiple of sizeof(void*),
+ // we must copyblk to a temp before doing the obj to avoid
+ // the obj reading memory past the end of the valuetype
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if defined(_TARGET_X86_) && !defined(LEGACY_BACKEND)
+ // TODO-X86-CQ: [1091733] Revisit for small structs, we should use push instruction
+ copyBlkClass = objClass;
+ size = roundupSize / TARGET_POINTER_SIZE; // Normalize size to number of pointer sized items
+#else // !defined(_TARGET_X86_) || defined(LEGACY_BACKEND)
+ if (roundupSize > originalSize)
+ {
+ copyBlkClass = objClass;
+
+ // There are a few special cases where we can omit using a CopyBlk
+ // where we normally would need to use one.
+
+ if (argObj->gtObj.gtOp1->IsVarAddr()) // Is the source a LclVar?
+ {
+ copyBlkClass = NO_CLASS_HANDLE;
+ }
+ }
+
+ size = roundupSize / TARGET_POINTER_SIZE; // Normalize size to number of pointer sized items
+#endif // !defined(_TARGET_X86_) || defined(LEGACY_BACKEND)
+ }
+ }
+ }
+
+#ifndef _TARGET_X86_
+ // TODO-Arm: Does this apply for _TARGET_ARM_, where structs passed by value can be split between
+ // registers and stack?
+ if (size > 1)
+ {
+ hasMultiregStructArgs = true;
+ }
+#endif // !_TARGET_X86_
+ }
+
+ // The 'size' value has now must have been set. (the original value of zero is an invalid value)
+ assert(size != 0);
+
+ //
+ // Figure out if the argument will be passed in a register.
+ //
+
+ if (isRegParamType(genActualType(argx->TypeGet()))
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ && (!isStructArg || structDesc.passedInRegisters)
+#endif
+ )
+ {
+#ifdef _TARGET_ARM_
+ if (passUsingFloatRegs)
+ {
+ // First, see if it can be back-filled
+ if (!anyFloatStackArgs && // Is it legal to back-fill? (We haven't put any FP args on the stack yet)
+ (fltArgSkippedRegMask != RBM_NONE) && // Is there an available back-fill slot?
+ (size == 1)) // The size to back-fill is one float register
+ {
+ // Back-fill the register.
+ isBackFilled = true;
+ regMaskTP backFillBitMask = genFindLowestBit(fltArgSkippedRegMask);
+ fltArgSkippedRegMask &=
+ ~backFillBitMask; // Remove the back-filled register(s) from the skipped mask
+ nextFltArgRegNum = genMapFloatRegNumToRegArgNum(genRegNumFromMask(backFillBitMask));
+ assert(nextFltArgRegNum < MAX_FLOAT_REG_ARG);
+ }
+
+ // Does the entire float, double, or HFA fit in the FP arg registers?
+ // Check if the last register needed is still in the argument register range.
+ isRegArg = (nextFltArgRegNum + size - 1) < MAX_FLOAT_REG_ARG;
+
+ if (!isRegArg)
+ {
+ anyFloatStackArgs = true;
+ }
+ }
+ else
+ {
+ isRegArg = intArgRegNum < MAX_REG_ARG;
+ }
+#elif defined(_TARGET_ARM64_)
+ if (passUsingFloatRegs)
+ {
+ // Check if the last register needed is still in the fp argument register range.
+ isRegArg = (nextFltArgRegNum + (size - 1)) < MAX_FLOAT_REG_ARG;
+
+ // Do we have a HFA arg that we wanted to pass in registers, but we ran out of FP registers?
+ if (isHfaArg && !isRegArg)
+ {
+ // recompute the 'size' so that it represent the number of stack slots rather than the number of
+ // registers
+ //
+ unsigned roundupSize = (unsigned)roundUp(structSize, TARGET_POINTER_SIZE);
+ size = roundupSize / TARGET_POINTER_SIZE;
+
+ // We also must update fltArgRegNum so that we no longer try to
+ // allocate any new floating point registers for args
+ // This prevents us from backfilling a subsequent arg into d7
+ //
+ fltArgRegNum = MAX_FLOAT_REG_ARG;
+ }
+ }
+ else
+ {
+ // Check if the last register needed is still in the int argument register range.
+ isRegArg = (intArgRegNum + (size - 1)) < maxRegArgs;
+
+ // Did we run out of registers when we had a 16-byte struct (size===2) ?
+ // (i.e we only have one register remaining but we needed two registers to pass this arg)
+ // This prevents us from backfilling a subsequent arg into x7
+ //
+ if (!isRegArg && (size > 1))
+ {
+ // We also must update intArgRegNum so that we no longer try to
+ // allocate any new general purpose registers for args
+ //
+ intArgRegNum = maxRegArgs;
+ }
+ }
+#else // not _TARGET_ARM_ or _TARGET_ARM64_
+
+#if defined(UNIX_AMD64_ABI)
+
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ // Here a struct can be passed in register following the classifications of its members and size.
+ // Now make sure there are actually enough registers to do so.
+ if (isStructArg)
+ {
+ unsigned int structFloatRegs = 0;
+ unsigned int structIntRegs = 0;
+ for (unsigned int i = 0; i < structDesc.eightByteCount; i++)
+ {
+ if (structDesc.IsIntegralSlot(i))
+ {
+ structIntRegs++;
+ }
+ else if (structDesc.IsSseSlot(i))
+ {
+ structFloatRegs++;
+ }
+ }
+
+ isRegArg = ((nextFltArgRegNum + structFloatRegs) <= MAX_FLOAT_REG_ARG) &&
+ ((intArgRegNum + structIntRegs) <= MAX_REG_ARG);
+ }
+ else
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ {
+ if (passUsingFloatRegs)
+ {
+ isRegArg = nextFltArgRegNum < MAX_FLOAT_REG_ARG;
+ }
+ else
+ {
+ isRegArg = intArgRegNum < MAX_REG_ARG;
+ }
+ }
+#else // !defined(UNIX_AMD64_ABI)
+ isRegArg = (intArgRegNum + (size - 1)) < maxRegArgs;
+#endif // !defined(UNIX_AMD64_ABI)
+#endif // _TARGET_ARM_
+ }
+ else
+ {
+ isRegArg = false;
+ }
+
+#if defined(_TARGET_X86_) && !defined(LEGACY_BACKEND)
+ if (call->IsTailCallViaHelper())
+ {
+ // We have already (before calling fgMorphArgs()) appended the 4 special args
+ // required by the x86 tailcall helper. These args are required to go on the
+ // stack. Force them to the stack here.
+ assert(numArgs >= 4);
+ if (argIndex >= numArgs - 4)
+ {
+ isRegArg = false;
+ }
+ }
+#endif // defined(_TARGET_X86_) && !defined(LEGACY_BACKEND)
+
+ } // end !lateArgsComputed
+
+ //
+ // Now we know if the argument goes in registers or not and how big it is,
+ // whether we had to just compute it or this is a re-morph call and we looked it up.
+ //
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef _TARGET_ARM_
+ // If we ever allocate a floating point argument to the stack, then all
+ // subsequent HFA/float/double arguments go on the stack.
+ if (!isRegArg && passUsingFloatRegs)
+ {
+ for (; fltArgRegNum < MAX_FLOAT_REG_ARG; ++fltArgRegNum)
+ {
+ fltArgSkippedRegMask |= genMapArgNumToRegMask(fltArgRegNum, TYP_FLOAT);
+ }
+ }
+
+ // If we think we're going to split a struct between integer registers and the stack, check to
+ // see if we've already assigned a floating-point arg to the stack.
+ if (isRegArg && // We decided above to use a register for the argument
+ !passUsingFloatRegs && // We're using integer registers
+ (intArgRegNum + size > MAX_REG_ARG) && // We're going to split a struct type onto registers and stack
+ anyFloatStackArgs) // We've already used the stack for a floating-point argument
+ {
+ isRegArg = false; // Change our mind; don't pass this struct partially in registers
+
+ // Skip the rest of the integer argument registers
+ for (; intArgRegNum < MAX_REG_ARG; ++intArgRegNum)
+ {
+ argSkippedRegMask |= genMapArgNumToRegMask(intArgRegNum, TYP_I_IMPL);
+ }
+ }
+
+#endif // _TARGET_ARM_
+
+ if (isRegArg)
+ {
+ regNumber nextRegNum = REG_STK;
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ regNumber nextOtherRegNum = REG_STK;
+ unsigned int structFloatRegs = 0;
+ unsigned int structIntRegs = 0;
+
+ if (isStructArg && structDesc.passedInRegisters)
+ {
+ // It is a struct passed in registers. Assign the next available register.
+ assert((structDesc.eightByteCount <= 2) && "Too many eightbytes.");
+ regNumber* nextRegNumPtrs[2] = {&nextRegNum, &nextOtherRegNum};
+ for (unsigned int i = 0; i < structDesc.eightByteCount; i++)
+ {
+ if (structDesc.IsIntegralSlot(i))
+ {
+ *nextRegNumPtrs[i] = genMapIntRegArgNumToRegNum(intArgRegNum + structIntRegs);
+ structIntRegs++;
+ }
+ else if (structDesc.IsSseSlot(i))
+ {
+ *nextRegNumPtrs[i] = genMapFloatRegArgNumToRegNum(nextFltArgRegNum + structFloatRegs);
+ structFloatRegs++;
+ }
+ }
+ }
+ else
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ {
+ // fill in or update the argInfo table
+ nextRegNum = passUsingFloatRegs ? genMapFloatRegArgNumToRegNum(nextFltArgRegNum)
+ : genMapIntRegArgNumToRegNum(intArgRegNum);
+ }
+
+#ifdef _TARGET_AMD64_
+#ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ assert(size == 1);
+#endif
+#endif
+
+ fgArgTabEntryPtr newArgEntry;
+ if (lateArgsComputed)
+ {
+ // This is a register argument - possibly update it in the table
+ newArgEntry = call->fgArgInfo->RemorphRegArg(argIndex, argx, args, nextRegNum, size, argAlign);
+ }
+ else
+ {
+ bool isNonStandard = false;
+
+#ifndef LEGACY_BACKEND
+ // If there are nonstandard args (outside the calling convention) they were inserted above
+ // and noted them in a table so we can recognize them here and build their argInfo.
+ //
+ // They should not affect the placement of any other args or stack space required.
+ // Example: on AMD64 R10 and R11 are used for indirect VSD (generic interface) and cookie calls.
+ isNonStandard = nonStandardArgs.FindReg(argx, &nextRegNum);
+#endif // !LEGACY_BACKEND
+
+ // This is a register argument - put it in the table
+ newArgEntry = call->fgArgInfo->AddRegArg(argIndex, argx, args, nextRegNum, size, argAlign
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ ,
+ isStructArg, nextOtherRegNum, &structDesc
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ );
+
+ newArgEntry->SetIsHfaRegArg(passUsingFloatRegs &&
+ isHfaArg); // Note on Arm32 a HFA is passed in int regs for varargs
+ newArgEntry->SetIsBackFilled(isBackFilled);
+ newArgEntry->isNonStandard = isNonStandard;
+ }
+
+ if (newArgEntry->isNonStandard)
+ {
+ continue;
+ }
+
+ // Set up the next intArgRegNum and fltArgRegNum values.
+ if (!isBackFilled)
+ {
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ if (isStructArg)
+ {
+ intArgRegNum += structIntRegs;
+ fltArgRegNum += structFloatRegs;
+ }
+ else
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ {
+ if (passUsingFloatRegs)
+ {
+ fltArgRegNum += size;
+
+#ifdef WINDOWS_AMD64_ABI
+ // Whenever we pass an integer register argument
+ // we skip the corresponding floating point register argument
+ intArgRegNum = min(intArgRegNum + size, MAX_REG_ARG);
+#endif // WINDOWS_AMD64_ABI
+#ifdef _TARGET_ARM_
+ if (fltArgRegNum > MAX_FLOAT_REG_ARG)
+ {
+ // This indicates a partial enregistration of a struct type
+ assert(varTypeIsStruct(argx));
+ unsigned numRegsPartial = size - (fltArgRegNum - MAX_FLOAT_REG_ARG);
+ assert((unsigned char)numRegsPartial == numRegsPartial);
+ call->fgArgInfo->SplitArg(argIndex, numRegsPartial, size - numRegsPartial);
+ fltArgRegNum = MAX_FLOAT_REG_ARG;
+ }
+#endif // _TARGET_ARM_
+ }
+ else
+ {
+ if (hasFixedRetBuffReg() && (nextRegNum == theFixedRetBuffReg()))
+ {
+ // we are setting up the fixed return buffer register argument
+ // so don't increment intArgRegNum
+ assert(size == 1);
+ }
+ else
+ {
+ // Increment intArgRegNum by 'size' registers
+ intArgRegNum += size;
+ }
+
+#if defined(_TARGET_AMD64_) && !defined(UNIX_AMD64_ABI)
+ fltArgRegNum = min(fltArgRegNum + size, MAX_FLOAT_REG_ARG);
+#endif // _TARGET_AMD64_
+#ifdef _TARGET_ARM_
+ if (intArgRegNum > MAX_REG_ARG)
+ {
+ // This indicates a partial enregistration of a struct type
+ assert((isStructArg) || argx->OperIsCopyBlkOp() ||
+ (argx->gtOper == GT_COMMA && (args->gtFlags & GTF_ASG)));
+ unsigned numRegsPartial = size - (intArgRegNum - MAX_REG_ARG);
+ assert((unsigned char)numRegsPartial == numRegsPartial);
+ call->fgArgInfo->SplitArg(argIndex, numRegsPartial, size - numRegsPartial);
+ intArgRegNum = MAX_REG_ARG;
+ fgPtrArgCntCur += size - numRegsPartial;
+ }
+#endif // _TARGET_ARM_
+ }
+ }
+ }
+ }
+ else // We have an argument that is not passed in a register
+ {
+ fgPtrArgCntCur += size;
+
+ // If the register arguments have not been determined then we must fill in the argInfo
+
+ if (lateArgsComputed)
+ {
+ // This is a stack argument - possibly update it in the table
+ call->fgArgInfo->RemorphStkArg(argIndex, argx, args, size, argAlign);
+ }
+ else
+ {
+ // This is a stack argument - put it in the table
+ call->fgArgInfo->AddStkArg(argIndex, argx, args, size,
+ argAlign FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(isStructArg));
+ }
+ }
+
+ if (copyBlkClass != NO_CLASS_HANDLE)
+ {
+ noway_assert(!lateArgsComputed);
+ fgMakeOutgoingStructArgCopy(call, args, argIndex,
+ copyBlkClass FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(&structDesc));
+
+ // This can cause a GTF_EXCEPT flag to be set.
+ // TODO-CQ: Fix the cases where this happens. We shouldn't be adding any new flags.
+ // This currently occurs in the case where we are re-morphing the args on x86/RyuJIT, and
+ // there are no register arguments. Then lateArgsComputed is never true, so we keep re-copying
+ // any struct arguments.
+ // i.e. assert(((call->gtFlags & GTF_EXCEPT) != 0) || ((args->Current()->gtFlags & GTF_EXCEPT) == 0)
+ flagsSummary |= (args->Current()->gtFlags & GTF_EXCEPT);
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ hasStackArgCopy = true;
+#endif
+ }
+
+#ifndef LEGACY_BACKEND
+ if (argx->gtOper == GT_MKREFANY)
+ {
+ NYI_X86("MKREFANY");
+
+ // 'Lower' the MKREFANY tree and insert it.
+ noway_assert(!lateArgsComputed);
+
+ // Get a new temp
+ // Here we don't need unsafe value cls check since the addr of temp is used only in mkrefany
+ unsigned tmp = lvaGrabTemp(true DEBUGARG("by-value mkrefany struct argument"));
+ lvaSetStruct(tmp, impGetRefAnyClass(), false);
+
+ // Build the mkrefany as a comma node:
+ // (tmp.ptr=argx),(tmp.type=handle)
+ GenTreeLclFld* destPtrSlot = gtNewLclFldNode(tmp, TYP_I_IMPL, offsetof(CORINFO_RefAny, dataPtr));
+ GenTreeLclFld* destTypeSlot = gtNewLclFldNode(tmp, TYP_I_IMPL, offsetof(CORINFO_RefAny, type));
+ destPtrSlot->gtFieldSeq = GetFieldSeqStore()->CreateSingleton(GetRefanyDataField());
+ destPtrSlot->gtFlags |= GTF_VAR_DEF;
+ destTypeSlot->gtFieldSeq = GetFieldSeqStore()->CreateSingleton(GetRefanyTypeField());
+ destTypeSlot->gtFlags |= GTF_VAR_DEF;
+
+ GenTreePtr asgPtrSlot = gtNewAssignNode(destPtrSlot, argx->gtOp.gtOp1);
+ GenTreePtr asgTypeSlot = gtNewAssignNode(destTypeSlot, argx->gtOp.gtOp2);
+ GenTreePtr asg = gtNewOperNode(GT_COMMA, TYP_VOID, asgPtrSlot, asgTypeSlot);
+
+ // Change the expression to "(tmp=val)"
+ args->gtOp.gtOp1 = asg;
+
+ // EvalArgsToTemps will cause tmp to actually get loaded as the argument
+ call->fgArgInfo->EvalToTmp(argIndex, tmp, asg);
+ lvaSetVarAddrExposed(tmp);
+ }
+#endif // !LEGACY_BACKEND
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ if (isStructArg && !isRegArg)
+ {
+ nonRegPassedStructSlots += size;
+ }
+ else
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+ {
+ argSlots += size;
+ }
+ } // end foreach argument loop
+
+ if (!lateArgsComputed)
+ {
+ call->fgArgInfo->ArgsComplete();
+#ifdef LEGACY_BACKEND
+ call->gtCallRegUsedMask = genIntAllRegArgMask(intArgRegNum);
+#if defined(_TARGET_ARM_)
+ call->gtCallRegUsedMask &= ~argSkippedRegMask;
+#endif
+ if (fltArgRegNum > 0)
+ {
+#if defined(_TARGET_ARM_)
+ call->gtCallRegUsedMask |= genFltAllRegArgMask(fltArgRegNum) & ~fltArgSkippedRegMask;
+#endif
+ }
+#endif // LEGACY_BACKEND
+ }
+
+ if (call->gtCallArgs)
+ {
+ UpdateGT_LISTFlags(call->gtCallArgs);
+ }
+
+ /* Process the function address, if indirect call */
+
+ if (call->gtCallType == CT_INDIRECT)
+ {
+ call->gtCallAddr = fgMorphTree(call->gtCallAddr);
+ }
+
+ call->fgArgInfo->RecordStkLevel(fgPtrArgCntCur);
+
+ if ((call->gtCallType == CT_INDIRECT) && (call->gtCallCookie != nullptr))
+ {
+ fgPtrArgCntCur++;
+ }
+
+ /* Remember the maximum value we ever see */
+
+ if (fgPtrArgCntMax < fgPtrArgCntCur)
+ {
+ fgPtrArgCntMax = fgPtrArgCntCur;
+ }
+
+ /* The call will pop all the arguments we pushed */
+
+ fgPtrArgCntCur = genPtrArgCntSav;
+
+#if FEATURE_FIXED_OUT_ARGS
+
+ // Update the outgoing argument size.
+ // If the call is a fast tail call, it will setup its arguments in incoming arg
+ // area instead of the out-going arg area. Therefore, don't consider fast tail
+ // calls to update lvaOutgoingArgSpaceSize.
+ if (!call->IsFastTailCall())
+ {
+ unsigned preallocatedArgCount = call->fgArgInfo->GetNextSlotNum();
+
+#if defined(UNIX_AMD64_ABI)
+ opts.compNeedToAlignFrame = true; // this is currently required for the UNIX ABI to work correctly
+
+ // ToDo: Remove this re-calculation preallocatedArgCount and use the value assigned above.
+
+ // First slots go in registers only, no stack needed.
+ // TODO-Amd64-Unix-CQ This calculation is only accurate for integer arguments,
+ // and ignores floating point args (it is overly conservative in that case).
+ preallocatedArgCount = nonRegPassedStructSlots;
+ if (argSlots > MAX_REG_ARG)
+ {
+ preallocatedArgCount += argSlots - MAX_REG_ARG;
+ }
+#endif // UNIX_AMD64_ABI
+
+ // Check if we need to increase the size of our Outgoing Arg Space
+ if (preallocatedArgCount * REGSIZE_BYTES > lvaOutgoingArgSpaceSize)
+ {
+ lvaOutgoingArgSpaceSize = preallocatedArgCount * REGSIZE_BYTES;
+
+ // If a function has localloc, we will need to move the outgoing arg space when the
+ // localloc happens. When we do this, we need to maintain stack alignment. To avoid
+ // leaving alignment-related holes when doing this move, make sure the outgoing
+ // argument space size is a multiple of the stack alignment by aligning up to the next
+ // stack alignment boundary.
+ if (compLocallocUsed)
+ {
+ lvaOutgoingArgSpaceSize = (unsigned)roundUp(lvaOutgoingArgSpaceSize, STACK_ALIGN);
+ }
+ }
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("argSlots=%d, preallocatedArgCount=%d, nextSlotNum=%d, lvaOutgoingArgSpaceSize=%d\n", argSlots,
+ preallocatedArgCount, call->fgArgInfo->GetNextSlotNum(), lvaOutgoingArgSpaceSize);
+ }
+#endif
+ }
+#endif // FEATURE_FIXED_OUT_ARGS
+
+ /* Update the 'side effect' flags value for the call */
+
+ call->gtFlags |= (flagsSummary & GTF_ALL_EFFECT);
+
+ // If the register arguments have already been determined
+ // or we have no register arguments then we don't need to
+ // call SortArgs() and EvalArgsToTemps()
+ //
+ // For UNIX_AMD64, the condition without hasStackArgCopy cannot catch
+ // all cases of fgMakeOutgoingStructArgCopy() being called. hasStackArgCopy
+ // is added to make sure to call EvalArgsToTemp.
+ if (!lateArgsComputed && (call->fgArgInfo->HasRegArgs()
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ || hasStackArgCopy
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+ ))
+ {
+ // This is the first time that we morph this call AND it has register arguments.
+ // Follow into the code below and do the 'defer or eval to temp' analysis.
+
+ call->fgArgInfo->SortArgs();
+
+ call->fgArgInfo->EvalArgsToTemps();
+
+ // We may have updated the arguments
+ if (call->gtCallArgs)
+ {
+ UpdateGT_LISTFlags(call->gtCallArgs);
+ }
+ }
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+ // Rewrite the struct args to be passed by value on stack or in registers.
+ fgMorphSystemVStructArgs(call, hasStructArgument);
+
+#else // !FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+#ifndef LEGACY_BACKEND
+ // In the future we can migrate UNIX_AMD64 to use this
+ // method instead of fgMorphSystemVStructArgs
+
+ // We only build GT_LISTs for MultiReg structs for the RyuJIT backend
+ if (hasMultiregStructArgs)
+ {
+ fgMorphMultiregStructArgs(call);
+ }
+#endif // LEGACY_BACKEND
+
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ fgArgInfoPtr argInfo = call->fgArgInfo;
+ for (unsigned curInx = 0; curInx < argInfo->ArgCount(); curInx++)
+ {
+ fgArgTabEntryPtr curArgEntry = argInfo->ArgTable()[curInx];
+ curArgEntry->Dump();
+ }
+ }
+#endif
+
+ return call;
+}
+#ifdef _PREFAST_
+#pragma warning(pop)
+#endif
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+// fgMorphSystemVStructArgs:
+// Rewrite the struct args to be passed by value on stack or in registers.
+//
+// args:
+// call: The call whose arguments need to be morphed.
+// hasStructArgument: Whether this call has struct arguments.
+//
+void Compiler::fgMorphSystemVStructArgs(GenTreeCall* call, bool hasStructArgument)
+{
+ unsigned flagsSummary = 0;
+ GenTreePtr args;
+ GenTreePtr argx;
+
+ if (hasStructArgument)
+ {
+ fgArgInfoPtr allArgInfo = call->fgArgInfo;
+
+ for (args = call->gtCallArgs; args != nullptr; args = args->gtOp.gtOp2)
+ {
+ // For late arguments the arg tree that is overridden is in the gtCallLateArgs list.
+ // For such late args the gtCallArgList contains the setup arg node (evaluating the arg.)
+ // The tree from the gtCallLateArgs list is passed to the callee. The fgArgEntry node contains the mapping
+ // between the nodes in both lists. If the arg is not a late arg, the fgArgEntry->node points to itself,
+ // otherwise points to the list in the late args list.
+ bool isLateArg = (args->gtOp.gtOp1->gtFlags & GTF_LATE_ARG) != 0;
+ fgArgTabEntryPtr fgEntryPtr = gtArgEntryByNode(call, args->gtOp.gtOp1);
+ assert(fgEntryPtr != nullptr);
+ GenTreePtr argx = fgEntryPtr->node;
+ GenTreePtr lateList = nullptr;
+ GenTreePtr lateNode = nullptr;
+
+ if (isLateArg)
+ {
+ for (GenTreePtr list = call->gtCallLateArgs; list; list = list->MoveNext())
+ {
+ assert(list->IsList());
+
+ GenTreePtr argNode = list->Current();
+ if (argx == argNode)
+ {
+ lateList = list;
+ lateNode = argNode;
+ break;
+ }
+ }
+ assert(lateList != nullptr && lateNode != nullptr);
+ }
+ GenTreePtr arg = argx;
+ bool argListCreated = false;
+
+ var_types type = arg->TypeGet();
+
+ if (varTypeIsStruct(type))
+ {
+ var_types originalType = type;
+ // If we have already processed the arg...
+ if (arg->OperGet() == GT_LIST && varTypeIsStruct(arg))
+ {
+ continue;
+ }
+
+ // If already OBJ it is set properly already.
+ if (arg->OperGet() == GT_OBJ)
+ {
+ assert(!fgEntryPtr->structDesc.passedInRegisters);
+ continue;
+ }
+
+ assert(arg->OperGet() == GT_LCL_VAR || arg->OperGet() == GT_LCL_FLD ||
+ (arg->OperGet() == GT_ADDR &&
+ (arg->gtOp.gtOp1->OperGet() == GT_LCL_FLD || arg->gtOp.gtOp1->OperGet() == GT_LCL_VAR)));
+
+ GenTreeLclVarCommon* lclCommon =
+ arg->OperGet() == GT_ADDR ? arg->gtOp.gtOp1->AsLclVarCommon() : arg->AsLclVarCommon();
+ if (fgEntryPtr->structDesc.passedInRegisters)
+ {
+ if (fgEntryPtr->structDesc.eightByteCount == 1)
+ {
+ // Change the type and below the code will change the LclVar to a LCL_FLD
+ type = GetTypeFromClassificationAndSizes(fgEntryPtr->structDesc.eightByteClassifications[0],
+ fgEntryPtr->structDesc.eightByteSizes[0]);
+ }
+ else if (fgEntryPtr->structDesc.eightByteCount == 2)
+ {
+ // Create LCL_FLD for each eightbyte.
+ argListCreated = true;
+
+ // Second eightbyte.
+ GenTreeLclFld* newLclField = new (this, GT_LCL_FLD)
+ GenTreeLclFld(GetTypeFromClassificationAndSizes(fgEntryPtr->structDesc
+ .eightByteClassifications[1],
+ fgEntryPtr->structDesc.eightByteSizes[1]),
+ lclCommon->gtLclNum, fgEntryPtr->structDesc.eightByteOffsets[1]);
+
+ GenTreeArgList* aggregate = gtNewAggregate(newLclField);
+ aggregate->gtType = originalType; // Preserve the type. It is a special case.
+ newLclField->gtFieldSeq = FieldSeqStore::NotAField();
+
+ // First field
+ arg->AsLclFld()->gtFieldSeq = FieldSeqStore::NotAField();
+ arg->gtType =
+ GetTypeFromClassificationAndSizes(fgEntryPtr->structDesc.eightByteClassifications[0],
+ fgEntryPtr->structDesc.eightByteSizes[0]);
+ arg = aggregate->Prepend(this, arg);
+ arg->gtType = type; // Preserve the type. It is a special case.
+ }
+ else
+ {
+ assert(false && "More than two eightbytes detected for CLR."); // No more than two eightbytes
+ // for the CLR.
+ }
+ }
+
+ // If we didn't change the type of the struct, it means
+ // its classification doesn't support to be passed directly through a
+ // register, so we need to pass a pointer to the destination where
+ // where we copied the struct to.
+ if (!argListCreated)
+ {
+ if (fgEntryPtr->structDesc.passedInRegisters)
+ {
+ arg->gtType = type;
+ }
+ else
+ {
+ // Make sure this is an addr node.
+ if (arg->OperGet() != GT_ADDR && arg->OperGet() != GT_LCL_VAR_ADDR)
+ {
+ arg = gtNewOperNode(GT_ADDR, TYP_I_IMPL, arg);
+ }
+
+ assert(arg->OperGet() == GT_ADDR || arg->OperGet() == GT_LCL_VAR_ADDR);
+
+ // Create an Obj of the temp to use it as a call argument.
+ arg = gtNewObjNode(lvaGetStruct(lclCommon->gtLclNum), arg);
+ }
+ }
+ }
+
+ if (argx != arg)
+ {
+ bool isLateArg = (args->gtOp.gtOp1->gtFlags & GTF_LATE_ARG) != 0;
+ fgArgTabEntryPtr fgEntryPtr = gtArgEntryByNode(call, args->gtOp.gtOp1);
+ assert(fgEntryPtr != nullptr);
+ GenTreePtr argx = fgEntryPtr->node;
+ GenTreePtr lateList = nullptr;
+ GenTreePtr lateNode = nullptr;
+ if (isLateArg)
+ {
+ for (GenTreePtr list = call->gtCallLateArgs; list; list = list->MoveNext())
+ {
+ assert(list->IsList());
+
+ GenTreePtr argNode = list->Current();
+ if (argx == argNode)
+ {
+ lateList = list;
+ lateNode = argNode;
+ break;
+ }
+ }
+ assert(lateList != nullptr && lateNode != nullptr);
+ }
+
+ fgEntryPtr->node = arg;
+ if (isLateArg)
+ {
+ lateList->gtOp.gtOp1 = arg;
+ }
+ else
+ {
+ args->gtOp.gtOp1 = arg;
+ }
+ }
+ }
+ }
+
+ // Update the flags
+ call->gtFlags |= (flagsSummary & GTF_ALL_EFFECT);
+}
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+//-----------------------------------------------------------------------------
+// fgMorphMultiregStructArgs: Locate the TYP_STRUCT arguments and
+// call fgMorphMultiregStructArg on each of them.
+//
+// Arguments:
+// call: a GenTreeCall node that has one or more TYP_STRUCT arguments
+//
+// Notes:
+// We only call fgMorphMultiregStructArg for the register passed TYP_STRUCT arguments.
+// The call to fgMorphMultiregStructArg will mutate the argument into the GT_LIST form
+// whicj is only used for register arguments.
+// If this method fails to find any TYP_STRUCT arguments it will assert.
+//
+void Compiler::fgMorphMultiregStructArgs(GenTreeCall* call)
+{
+ GenTreePtr args;
+ GenTreePtr argx;
+ bool foundStructArg = false;
+ unsigned initialFlags = call->gtFlags;
+ unsigned flagsSummary = 0;
+ fgArgInfoPtr allArgInfo = call->fgArgInfo;
+
+ // Currently only ARM64 is using this method to morph the MultiReg struct args
+ // in the future AMD64_UNIX and for HFAs ARM32, will also use this method
+ //
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef _TARGET_ARM_
+ NYI_ARM("fgMorphMultiregStructArgs");
+#endif
+#ifdef _TARGET_X86_
+ assert(!"Logic error: no MultiregStructArgs for X86");
+#endif
+#ifdef _TARGET_AMD64_
+#if defined(UNIX_AMD64_ABI)
+ NYI_AMD64("fgMorphMultiregStructArgs (UNIX ABI)");
+#else // WINDOWS_AMD64_ABI
+ assert(!"Logic error: no MultiregStructArgs for Windows X64 ABI");
+#endif // !UNIX_AMD64_ABI
+#endif
+
+ for (args = call->gtCallArgs; args != nullptr; args = args->gtOp.gtOp2)
+ {
+ // For late arguments the arg tree that is overridden is in the gtCallLateArgs list.
+ // For such late args the gtCallArgList contains the setup arg node (evaluating the arg.)
+ // The tree from the gtCallLateArgs list is passed to the callee. The fgArgEntry node contains the mapping
+ // between the nodes in both lists. If the arg is not a late arg, the fgArgEntry->node points to itself,
+ // otherwise points to the list in the late args list.
+ bool isLateArg = (args->gtOp.gtOp1->gtFlags & GTF_LATE_ARG) != 0;
+ fgArgTabEntryPtr fgEntryPtr = gtArgEntryByNode(call, args->gtOp.gtOp1);
+ assert(fgEntryPtr != nullptr);
+ GenTreePtr argx = fgEntryPtr->node;
+ GenTreePtr lateList = nullptr;
+ GenTreePtr lateNode = nullptr;
+
+ if (isLateArg)
+ {
+ for (GenTreePtr list = call->gtCallLateArgs; list; list = list->MoveNext())
+ {
+ assert(list->IsList());
+
+ GenTreePtr argNode = list->Current();
+ if (argx == argNode)
+ {
+ lateList = list;
+ lateNode = argNode;
+ break;
+ }
+ }
+ assert(lateList != nullptr && lateNode != nullptr);
+ }
+
+ GenTreePtr arg = argx;
+
+ if (arg->TypeGet() == TYP_STRUCT)
+ {
+ foundStructArg = true;
+
+ arg = fgMorphMultiregStructArg(arg, fgEntryPtr);
+
+ // Did we replace 'argx' with a new tree?
+ if (arg != argx)
+ {
+ fgEntryPtr->node = arg; // Record the new value for the arg in the fgEntryPtr->node
+
+ // link the new arg node into either the late arg list or the gtCallArgs list
+ if (isLateArg)
+ {
+ lateList->gtOp.gtOp1 = arg;
+ }
+ else
+ {
+ args->gtOp.gtOp1 = arg;
+ }
+ }
+ }
+ }
+
+ // We should only call this method when we actually have one or more multireg struct args
+ assert(foundStructArg);
+
+ // Update the flags
+ call->gtFlags |= (flagsSummary & GTF_ALL_EFFECT);
+}
+
+//-----------------------------------------------------------------------------
+// fgMorphMultiregStructArg: Given a multireg TYP_STRUCT arg from a call argument list
+// Morph the argument into a set of GT_LIST nodes.
+//
+// Arguments:
+// arg - A GenTree node containing a TYP_STRUCT arg that
+// is to be passed in multiple registers
+// fgEntryPtr - the fgArgTabEntry information for the current 'arg'
+//
+// Notes:
+// arg must be a GT_OBJ or GT_LCL_VAR or GT_LCL_FLD of TYP_STRUCT that is suitable
+// for passing in multiple registers.
+// If arg is a LclVar we check if it is struct promoted and has the right number of fields
+// and if they are at the appropriate offsets we will use the struct promted fields
+// in the GT_LIST nodes that we create.
+// If we have a GT_LCL_VAR that isn't struct promoted or doesn't meet the requirements
+// we will use a set of GT_LCL_FLDs nodes to access the various portions of the struct
+// this also forces the struct to be stack allocated into the local frame.
+// For the GT_OBJ case will clone the address expression and generate two (or more)
+// indirections.
+// Currently the implementation only handles ARM64 and will NYI for other architectures.
+//
+GenTreePtr Compiler::fgMorphMultiregStructArg(GenTreePtr arg, fgArgTabEntryPtr fgEntryPtr)
+{
+ assert(arg->TypeGet() == TYP_STRUCT);
+
+#ifndef _TARGET_ARM64_
+ NYI("fgMorphMultiregStructArg requires implementation for this target");
+#endif
+
+#if FEATURE_MULTIREG_ARGS
+ // Examine 'arg' and setup argValue objClass and structSize
+ //
+ CORINFO_CLASS_HANDLE objClass = NO_CLASS_HANDLE;
+ GenTreePtr argValue = arg; // normally argValue will be arg, but see right below
+ unsigned structSize = 0;
+
+ if (arg->OperGet() == GT_OBJ)
+ {
+ GenTreeObj* argObj = arg->AsObj();
+ objClass = argObj->gtClass;
+ structSize = info.compCompHnd->getClassSize(objClass);
+
+ // If we have a GT_OBJ of a GT_ADDR then we set argValue to the child node of the GT_ADDR
+ //
+ if (argObj->gtOp1->OperGet() == GT_ADDR)
+ {
+ argValue = argObj->gtOp1->gtOp.gtOp1;
+ }
+ }
+ else if (arg->OperGet() == GT_LCL_VAR)
+ {
+ GenTreeLclVarCommon* varNode = arg->AsLclVarCommon();
+ unsigned varNum = varNode->gtLclNum;
+ assert(varNum < lvaCount);
+ LclVarDsc* varDsc = &lvaTable[varNum];
+
+ objClass = lvaGetStruct(varNum);
+ structSize = varDsc->lvExactSize;
+ }
+ noway_assert(objClass != nullptr);
+
+ var_types hfaType = TYP_UNDEF;
+ var_types elemType = TYP_UNDEF;
+ unsigned elemCount = 0;
+ unsigned elemSize = 0;
+ var_types type[MAX_ARG_REG_COUNT] = {}; // TYP_UNDEF = 0
+
+ hfaType = GetHfaType(objClass); // set to float or double if it is an HFA, otherwise TYP_UNDEF
+ if (varTypeIsFloating(hfaType))
+ {
+ elemType = hfaType;
+ elemSize = genTypeSize(elemType);
+ elemCount = structSize / elemSize;
+ assert(elemSize * elemCount == structSize);
+ for (unsigned inx = 0; inx < elemCount; inx++)
+ {
+ type[inx] = elemType;
+ }
+ }
+ else
+ {
+ assert(structSize <= 2 * TARGET_POINTER_SIZE);
+ BYTE gcPtrs[2] = {TYPE_GC_NONE, TYPE_GC_NONE};
+ info.compCompHnd->getClassGClayout(objClass, &gcPtrs[0]);
+ elemCount = 2;
+ type[0] = getJitGCType(gcPtrs[0]);
+ type[1] = getJitGCType(gcPtrs[1]);
+
+ if ((argValue->OperGet() == GT_LCL_FLD) || (argValue->OperGet() == GT_LCL_VAR))
+ {
+ // We can safely widen this to 16 bytes since we are loading from
+ // a GT_LCL_VAR or a GT_LCL_FLD which is properly padded and
+ // lives in the stack frame or will be a promoted field.
+ //
+ elemSize = TARGET_POINTER_SIZE;
+ structSize = 2 * TARGET_POINTER_SIZE;
+ }
+ else // we must have a GT_OBJ
+ {
+ assert(argValue->OperGet() == GT_OBJ);
+
+ // We need to load the struct from an arbitrary address
+ // and we can't read past the end of the structSize
+ // We adjust the second load type here
+ //
+ if (structSize < 2 * TARGET_POINTER_SIZE)
+ {
+ switch (structSize - TARGET_POINTER_SIZE)
+ {
+ case 1:
+ type[1] = TYP_BYTE;
+ break;
+ case 2:
+ type[1] = TYP_SHORT;
+ break;
+ case 4:
+ type[1] = TYP_INT;
+ break;
+ default:
+ noway_assert(!"NYI: odd sized struct in fgMorphMultiregStructArg");
+ break;
+ }
+ }
+ }
+ }
+ // We should still have a TYP_STRUCT
+ assert(argValue->TypeGet() == TYP_STRUCT);
+
+ GenTreeArgList* newArg = nullptr;
+
+ // Are we passing a struct LclVar?
+ //
+ if (argValue->OperGet() == GT_LCL_VAR)
+ {
+ GenTreeLclVarCommon* varNode = argValue->AsLclVarCommon();
+ unsigned varNum = varNode->gtLclNum;
+ assert(varNum < lvaCount);
+ LclVarDsc* varDsc = &lvaTable[varNum];
+
+ // At this point any TYP_STRUCT LclVar must be a 16-byte struct
+ // or an HFA struct, both which are passed by value.
+ //
+ assert((varDsc->lvSize() == 2 * TARGET_POINTER_SIZE) || varDsc->lvIsHfa());
+
+ varDsc->lvIsMultiRegArg = true;
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ JITDUMP("Multireg struct argument V%02u : ");
+ fgEntryPtr->Dump();
+ }
+#endif // DEBUG
+
+ // This local variable must match the layout of the 'objClass' type exactly
+ if (varDsc->lvIsHfa())
+ {
+ // We have a HFA struct
+ noway_assert(elemType == (varDsc->lvHfaTypeIsFloat() ? TYP_FLOAT : TYP_DOUBLE));
+ noway_assert(elemSize == genTypeSize(elemType));
+ noway_assert(elemCount == (varDsc->lvExactSize / elemSize));
+ noway_assert(elemSize * elemCount == varDsc->lvExactSize);
+
+ for (unsigned inx = 0; (inx < elemCount); inx++)
+ {
+ noway_assert(type[inx] == elemType);
+ }
+ }
+ else
+ {
+ // We must have a 16-byte struct (non-HFA)
+ noway_assert(elemCount == 2);
+
+ for (unsigned inx = 0; inx < elemCount; inx++)
+ {
+ CorInfoGCType currentGcLayoutType = (CorInfoGCType)varDsc->lvGcLayout[inx];
+
+ // We setup the type[inx] value above using the GC info from 'objClass'
+ // This GT_LCL_VAR must have the same GC layout info
+ //
+ if (currentGcLayoutType != TYPE_GC_NONE)
+ {
+ noway_assert(type[inx] == getJitGCType((BYTE)currentGcLayoutType));
+ }
+ else
+ {
+ // We may have use a small type when we setup the type[inx] values above
+ // We can safely widen this to TYP_I_IMPL
+ type[inx] = TYP_I_IMPL;
+ }
+ }
+ }
+
+ // Is this LclVar a promoted struct with exactly 2 fields?
+ // TODO-ARM64-CQ: Support struct promoted HFA types here
+ if (varDsc->lvPromoted && (varDsc->lvFieldCnt == 2) && !varDsc->lvIsHfa())
+ {
+ // See if we have two promoted fields that start at offset 0 and 8?
+ unsigned loVarNum = lvaGetFieldLocal(varDsc, 0);
+ unsigned hiVarNum = lvaGetFieldLocal(varDsc, TARGET_POINTER_SIZE);
+
+ // Did we find the promoted fields at the necessary offsets?
+ if ((loVarNum != BAD_VAR_NUM) && (hiVarNum != BAD_VAR_NUM))
+ {
+ LclVarDsc* loVarDsc = &lvaTable[loVarNum];
+ LclVarDsc* hiVarDsc = &lvaTable[hiVarNum];
+
+ var_types loType = loVarDsc->lvType;
+ var_types hiType = hiVarDsc->lvType;
+
+ if (varTypeIsFloating(loType) || varTypeIsFloating(hiType))
+ {
+ // TODO-LSRA - It currently doesn't support the passing of floating point LCL_VARS in the integer
+ // registers. So for now we will use GT_LCLFLD's to pass this struct (it won't be enregistered)
+ //
+ JITDUMP("Multireg struct V%02u will be passed using GT_LCLFLD because it has float fields.\n",
+ varNum);
+ //
+ // we call lvaSetVarDoNotEnregister and do the proper transformation below.
+ //
+ }
+ else
+ {
+ // We can use the struct promoted field as the two arguments
+
+ GenTreePtr loLclVar = gtNewLclvNode(loVarNum, loType, loVarNum);
+ GenTreePtr hiLclVar = gtNewLclvNode(hiVarNum, hiType, hiVarNum);
+
+ // Create a new tree for 'arg'
+ // replace the existing LDOBJ(ADDR(LCLVAR))
+ // with a LIST(LCLVAR-LO, LIST(LCLVAR-HI, nullptr))
+ //
+ newArg = gtNewAggregate(hiLclVar)->Prepend(this, loLclVar);
+ }
+ }
+ }
+ else
+ {
+ //
+ // We will create a list of GT_LCL_FLDs nodes to pass this struct
+ //
+ lvaSetVarDoNotEnregister(varNum DEBUG_ARG(DNER_LocalField));
+ }
+ }
+
+ // If we didn't set newarg to a new List Node tree
+ //
+ if (newArg == nullptr)
+ {
+ if (fgEntryPtr->regNum == REG_STK)
+ {
+ // We leave this stack passed argument alone
+ return arg;
+ }
+
+ // Are we passing a GT_LCL_FLD (or a GT_LCL_VAR that was not struct promoted )
+ // A GT_LCL_FLD could also contain a 16-byte struct or HFA struct inside it?
+ //
+ if ((argValue->OperGet() == GT_LCL_FLD) || (argValue->OperGet() == GT_LCL_VAR))
+ {
+ GenTreeLclVarCommon* varNode = argValue->AsLclVarCommon();
+ unsigned varNum = varNode->gtLclNum;
+ assert(varNum < lvaCount);
+ LclVarDsc* varDsc = &lvaTable[varNum];
+
+ unsigned baseOffset = (argValue->OperGet() == GT_LCL_FLD) ? argValue->gtLclFld.gtLclOffs : 0;
+ unsigned lastOffset = baseOffset + (elemCount * elemSize);
+
+ // The allocated size of our LocalVar must be at least as big as lastOffset
+ assert(varDsc->lvSize() >= lastOffset);
+
+ if (varDsc->lvStructGcCount > 0)
+ {
+ // alignment of the baseOffset is required
+ noway_assert((baseOffset % TARGET_POINTER_SIZE) == 0);
+ noway_assert(elemSize == TARGET_POINTER_SIZE);
+ unsigned baseIndex = baseOffset / TARGET_POINTER_SIZE;
+ const BYTE* gcPtrs = varDsc->lvGcLayout; // Get the GC layout for the local variable
+ for (unsigned inx = 0; (inx < elemCount); inx++)
+ {
+ // The GC information must match what we setup using 'objClass'
+ noway_assert(type[inx] == getJitGCType(gcPtrs[baseIndex + inx]));
+ }
+ }
+ else // this varDsc contains no GC pointers
+ {
+ for (unsigned inx = 0; inx < elemCount; inx++)
+ {
+ // The GC information must match what we setup using 'objClass'
+ noway_assert(!varTypeIsGC(type[inx]));
+ }
+ }
+
+ //
+ // We create a list of GT_LCL_FLDs nodes to pass this struct
+ //
+ lvaSetVarDoNotEnregister(varNum DEBUG_ARG(DNER_LocalField));
+
+ // Start building our list from the last element
+ unsigned offset = lastOffset;
+ unsigned inx = elemCount;
+
+ // Create a new tree for 'arg'
+ // replace the existing LDOBJ(ADDR(LCLVAR))
+ // with a LIST(LCLFLD-LO, LIST(LCLFLD-HI, nullptr) ...)
+ //
+ while (inx > 0)
+ {
+ inx--;
+ offset -= elemSize;
+ GenTreePtr nextLclFld = gtNewLclFldNode(varNum, type[inx], offset);
+ if (newArg == nullptr)
+ {
+ newArg = gtNewAggregate(nextLclFld);
+ }
+ else
+ {
+ newArg = newArg->Prepend(this, nextLclFld);
+ }
+ }
+ }
+ // Are we passing a GT_OBJ struct?
+ //
+ else if (argValue->OperGet() == GT_OBJ)
+ {
+ GenTreeObj* argObj = argValue->AsObj();
+ GenTreePtr baseAddr = argObj->gtOp1;
+ var_types addrType = baseAddr->TypeGet();
+
+ // Create a new tree for 'arg'
+ // replace the existing LDOBJ(EXPR)
+ // with a LIST(IND(EXPR), LIST(IND(EXPR+8), nullptr) ...)
+ //
+
+ // Start building our list from the last element
+ unsigned offset = structSize;
+ unsigned inx = elemCount;
+ while (inx > 0)
+ {
+ inx--;
+ elemSize = genTypeSize(type[inx]);
+ offset -= elemSize;
+ GenTreePtr curAddr = baseAddr;
+ if (offset != 0)
+ {
+ GenTreePtr baseAddrDup = gtCloneExpr(baseAddr);
+ noway_assert(baseAddrDup != nullptr);
+ curAddr = gtNewOperNode(GT_ADD, addrType, baseAddrDup, gtNewIconNode(offset, TYP_I_IMPL));
+ }
+ else
+ {
+ curAddr = baseAddr;
+ }
+ GenTreePtr curItem = gtNewOperNode(GT_IND, type[inx], curAddr);
+ if (newArg == nullptr)
+ {
+ newArg = gtNewAggregate(curItem);
+ }
+ else
+ {
+ newArg = newArg->Prepend(this, curItem);
+ }
+ }
+ }
+ }
+
+#ifdef DEBUG
+ // If we reach here we should have set newArg to something
+ if (newArg == nullptr)
+ {
+ gtDispTree(argValue);
+ assert(!"Missing case in fgMorphMultiregStructArg");
+ }
+
+ if (verbose)
+ {
+ printf("fgMorphMultiregStructArg created tree:\n");
+ gtDispTree(newArg);
+ }
+#endif
+
+ arg = newArg; // consider calling fgMorphTree(newArg);
+
+#endif // FEATURE_MULTIREG_ARGS
+
+ return arg;
+}
+
+// Make a copy of a struct variable if necessary, to pass to a callee.
+// returns: tree that computes address of the outgoing arg
+void Compiler::fgMakeOutgoingStructArgCopy(
+ GenTreeCall* call,
+ GenTree* args,
+ unsigned argIndex,
+ CORINFO_CLASS_HANDLE copyBlkClass FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(
+ const SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR* const structDescPtr))
+{
+ GenTree* argx = args->Current();
+ noway_assert(argx->gtOper != GT_MKREFANY);
+ // See if we need to insert a copy at all
+ // Case 1: don't need a copy if it is the last use of a local. We can't determine that all of the time
+ // but if there is only one use and no loops, the use must be last.
+ GenTreeLclVarCommon* lcl = nullptr;
+ if (argx->OperIsLocal())
+ {
+ lcl = argx->AsLclVarCommon();
+ }
+ else if ((argx->OperGet() == GT_OBJ) && argx->AsIndir()->Addr()->OperIsLocal())
+ {
+ lcl = argx->AsObj()->Addr()->AsLclVarCommon();
+ }
+ if (lcl != nullptr)
+ {
+ unsigned varNum = lcl->AsLclVarCommon()->GetLclNum();
+ if (lvaIsImplicitByRefLocal(varNum))
+ {
+ LclVarDsc* varDsc = &lvaTable[varNum];
+ // JIT_TailCall helper has an implicit assumption that all tail call arguments live
+ // on the caller's frame. If an argument lives on the caller caller's frame, it may get
+ // overwritten if that frame is reused for the tail call. Therefore, we should always copy
+ // struct parameters if they are passed as arguments to a tail call.
+ if (!call->IsTailCallViaHelper() && (varDsc->lvRefCnt == 1) && !fgMightHaveLoop())
+ {
+ varDsc->lvRefCnt = 0;
+ args->gtOp.gtOp1 = lcl;
+ fgArgTabEntryPtr fp = Compiler::gtArgEntryByNode(call, argx);
+ fp->node = lcl;
+
+ JITDUMP("did not have to make outgoing copy for V%2d", varNum);
+ return;
+ }
+ }
+ }
+
+ if (fgOutgoingArgTemps == nullptr)
+ {
+ fgOutgoingArgTemps = hashBv::Create(this);
+ }
+
+ unsigned tmp = 0;
+ bool found = false;
+
+ // Attempt to find a local we have already used for an outgoing struct and reuse it.
+ // We do not reuse within a statement.
+ if (!opts.MinOpts())
+ {
+ indexType lclNum;
+ FOREACH_HBV_BIT_SET(lclNum, fgOutgoingArgTemps)
+ {
+ LclVarDsc* varDsc = &lvaTable[lclNum];
+ if (typeInfo::AreEquivalent(varDsc->lvVerTypeInfo, typeInfo(TI_STRUCT, copyBlkClass)) &&
+ !fgCurrentlyInUseArgTemps->testBit(lclNum))
+ {
+ tmp = (unsigned)lclNum;
+ found = true;
+ JITDUMP("reusing outgoing struct arg");
+ break;
+ }
+ }
+ NEXT_HBV_BIT_SET;
+ }
+
+ // Create the CopyBlk tree and insert it.
+ if (!found)
+ {
+ // Get a new temp
+ // Here We don't need unsafe value cls check, since the addr of this temp is used only in copyblk.
+ tmp = lvaGrabTemp(true DEBUGARG("by-value struct argument"));
+ lvaSetStruct(tmp, copyBlkClass, false);
+ fgOutgoingArgTemps->setBit(tmp);
+ }
+
+ fgCurrentlyInUseArgTemps->setBit(tmp);
+
+ // TYP_SIMD structs should not be enregistered, since ABI requires it to be
+ // allocated on stack and address of it needs to be passed.
+ if (lclVarIsSIMDType(tmp))
+ {
+ lvaSetVarDoNotEnregister(tmp DEBUGARG(DNER_IsStruct));
+ }
+
+ // Create a reference to the temp
+ GenTreePtr dest = gtNewLclvNode(tmp, lvaTable[tmp].lvType);
+ dest->gtFlags |= (GTF_DONT_CSE | GTF_VAR_DEF); // This is a def of the local, "entire" by construction.
+
+ // TODO-Cleanup: This probably shouldn't be done here because arg morphing is done prior
+ // to ref counting of the lclVars.
+ lvaTable[tmp].incRefCnts(compCurBB->getBBWeight(this), this);
+
+ GenTreePtr src;
+ if (argx->gtOper == GT_OBJ)
+ {
+ argx->gtFlags &= ~(GTF_ALL_EFFECT) | (argx->AsBlk()->Addr()->gtFlags & GTF_ALL_EFFECT);
+ }
+ else
+ {
+ argx->gtFlags |= GTF_DONT_CSE;
+ }
+
+ // Copy the valuetype to the temp
+ unsigned size = info.compCompHnd->getClassSize(copyBlkClass);
+ GenTreePtr copyBlk = gtNewBlkOpNode(dest, argx, size, false /* not volatile */, true /* copyBlock */);
+ copyBlk = fgMorphCopyBlock(copyBlk);
+
+#if FEATURE_FIXED_OUT_ARGS
+
+ // Do the copy early, and evalute the temp later (see EvalArgsToTemps)
+ // When on Unix create LCL_FLD for structs passed in more than one registers. See fgMakeTmpArgNode
+ GenTreePtr arg = copyBlk;
+
+#else // FEATURE_FIXED_OUT_ARGS
+
+ // Structs are always on the stack, and thus never need temps
+ // so we have to put the copy and temp all into one expression
+ GenTreePtr arg = fgMakeTmpArgNode(tmp FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(structDescPtr->passedInRegisters));
+
+ // Change the expression to "(tmp=val),tmp"
+ arg = gtNewOperNode(GT_COMMA, arg->TypeGet(), copyBlk, arg);
+
+#endif // FEATURE_FIXED_OUT_ARGS
+
+ args->gtOp.gtOp1 = arg;
+ call->fgArgInfo->EvalToTmp(argIndex, tmp, arg);
+
+ return;
+}
+
+#ifdef _TARGET_ARM_
+// See declaration for specification comment.
+void Compiler::fgAddSkippedRegsInPromotedStructArg(LclVarDsc* varDsc,
+ unsigned firstArgRegNum,
+ regMaskTP* pArgSkippedRegMask)
+{
+ assert(varDsc->lvPromoted);
+ // There's no way to do these calculations without breaking abstraction and assuming that
+ // integer register arguments are consecutive ints. They are on ARM.
+
+ // To start, figure out what register contains the last byte of the first argument.
+ LclVarDsc* firstFldVarDsc = &lvaTable[varDsc->lvFieldLclStart];
+ unsigned lastFldRegOfLastByte =
+ (firstFldVarDsc->lvFldOffset + firstFldVarDsc->lvExactSize - 1) / TARGET_POINTER_SIZE;
+ ;
+
+ // Now we're keeping track of the register that the last field ended in; see what registers
+ // subsequent fields start in, and whether any are skipped.
+ // (We assume here the invariant that the fields are sorted in offset order.)
+ for (unsigned fldVarOffset = 1; fldVarOffset < varDsc->lvFieldCnt; fldVarOffset++)
+ {
+ unsigned fldVarNum = varDsc->lvFieldLclStart + fldVarOffset;
+ LclVarDsc* fldVarDsc = &lvaTable[fldVarNum];
+ unsigned fldRegOffset = fldVarDsc->lvFldOffset / TARGET_POINTER_SIZE;
+ assert(fldRegOffset >= lastFldRegOfLastByte); // Assuming sorted fields.
+ // This loop should enumerate the offsets of any registers skipped.
+ // Find what reg contains the last byte:
+ // And start at the first register after that. If that isn't the first reg of the current
+ for (unsigned skippedRegOffsets = lastFldRegOfLastByte + 1; skippedRegOffsets < fldRegOffset;
+ skippedRegOffsets++)
+ {
+ // If the register number would not be an arg reg, we're done.
+ if (firstArgRegNum + skippedRegOffsets >= MAX_REG_ARG)
+ return;
+ *pArgSkippedRegMask |= genRegMask(regNumber(firstArgRegNum + skippedRegOffsets));
+ }
+ lastFldRegOfLastByte = (fldVarDsc->lvFldOffset + fldVarDsc->lvExactSize - 1) / TARGET_POINTER_SIZE;
+ }
+}
+
+#endif // _TARGET_ARM_
+
+//****************************************************************************
+// fgFixupStructReturn:
+// The companion to impFixupCallStructReturn. Now that the importer is done
+// change the gtType to the precomputed native return type
+// requires that callNode currently has a struct type
+//
+void Compiler::fgFixupStructReturn(GenTreePtr callNode)
+{
+ assert(varTypeIsStruct(callNode));
+
+ GenTreeCall* call = callNode->AsCall();
+ bool callHasRetBuffArg = call->HasRetBufArg();
+ bool isHelperCall = call->IsHelperCall();
+
+ // Decide on the proper return type for this call that currently returns a struct
+ //
+ CORINFO_CLASS_HANDLE retClsHnd = call->gtRetClsHnd;
+ Compiler::structPassingKind howToReturnStruct;
+ var_types returnType;
+
+ // There are a couple of Helper Calls that say they return a TYP_STRUCT but they
+ // expect this method to re-type this to a TYP_REF (what is in call->gtReturnType)
+ //
+ // CORINFO_HELP_METHODDESC_TO_STUBRUNTIMEMETHOD
+ // CORINFO_HELP_FIELDDESC_TO_STUBRUNTIMEFIELD
+ // CORINFO_HELP_TYPEHANDLE_TO_RUNTIMETYPE_MAYBENULL
+ //
+ if (isHelperCall)
+ {
+ assert(!callHasRetBuffArg);
+ assert(retClsHnd == NO_CLASS_HANDLE);
+
+ // Now that we are past the importer, re-type this node
+ howToReturnStruct = SPK_PrimitiveType;
+ returnType = (var_types)call->gtReturnType;
+ }
+ else
+ {
+ returnType = getReturnTypeForStruct(retClsHnd, &howToReturnStruct);
+ }
+
+ if (howToReturnStruct == SPK_ByReference)
+ {
+ assert(returnType == TYP_UNKNOWN);
+ assert(callHasRetBuffArg);
+ }
+ else
+ {
+ assert(returnType != TYP_UNKNOWN);
+
+ if (returnType != TYP_STRUCT)
+ {
+ // Widen the primitive type if necessary
+ returnType = genActualType(returnType);
+ }
+ call->gtType = returnType;
+ }
+
+#if FEATURE_MULTIREG_RET
+ // Either we don't have a struct now or if struct, then it is a struct returned in regs or in return buffer.
+ assert(!varTypeIsStruct(call) || call->HasMultiRegRetVal() || callHasRetBuffArg);
+#else // !FEATURE_MULTIREG_RET
+ // No more struct returns
+ assert(call->TypeGet() != TYP_STRUCT);
+#endif
+
+#if !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ // If it was a struct return, it has been transformed into a call
+ // with a return buffer (that returns TYP_VOID) or into a return
+ // of a primitive/enregisterable type
+ assert(!callHasRetBuffArg || (call->TypeGet() == TYP_VOID));
+#endif
+}
+
+/*****************************************************************************
+ *
+ * A little helper used to rearrange nested commutative operations. The
+ * effect is that nested associative, commutative operations are transformed
+ * into a 'left-deep' tree, i.e. into something like this:
+ *
+ * (((a op b) op c) op d) op...
+ */
+
+#if REARRANGE_ADDS
+
+void Compiler::fgMoveOpsLeft(GenTreePtr tree)
+{
+ GenTreePtr op1;
+ GenTreePtr op2;
+ genTreeOps oper;
+
+ do
+ {
+ op1 = tree->gtOp.gtOp1;
+ op2 = tree->gtOp.gtOp2;
+ oper = tree->OperGet();
+
+ noway_assert(GenTree::OperIsCommutative(oper));
+ noway_assert(oper == GT_ADD || oper == GT_XOR || oper == GT_OR || oper == GT_AND || oper == GT_MUL);
+ noway_assert(!varTypeIsFloating(tree->TypeGet()) || !opts.genFPorder);
+ noway_assert(oper == op2->gtOper);
+
+ // Commutativity doesn't hold if overflow checks are needed
+
+ if (tree->gtOverflowEx() || op2->gtOverflowEx())
+ {
+ return;
+ }
+
+ if (gtIsActiveCSE_Candidate(op2))
+ {
+ // If we have marked op2 as a CSE candidate,
+ // we can't perform a commutative reordering
+ // because any value numbers that we computed for op2
+ // will be incorrect after performing a commutative reordering
+ //
+ return;
+ }
+
+ if (oper == GT_MUL && (op2->gtFlags & GTF_MUL_64RSLT))
+ {
+ return;
+ }
+
+ // Check for GTF_ADDRMODE_NO_CSE flag on add/mul Binary Operators
+ if (((oper == GT_ADD) || (oper == GT_MUL)) && ((tree->gtFlags & GTF_ADDRMODE_NO_CSE) != 0))
+ {
+ return;
+ }
+
+ if ((tree->gtFlags | op2->gtFlags) & GTF_BOOLEAN)
+ {
+ // We could deal with this, but we were always broken and just hit the assert
+ // below regarding flags, which means it's not frequent, so will just bail out.
+ // See #195514
+ return;
+ }
+
+ noway_assert(!tree->gtOverflowEx() && !op2->gtOverflowEx());
+
+ GenTreePtr ad1 = op2->gtOp.gtOp1;
+ GenTreePtr ad2 = op2->gtOp.gtOp2;
+
+ // Compiler::optOptimizeBools() can create GT_OR of two GC pointers yeilding a GT_INT
+ // We can not reorder such GT_OR trees
+ //
+ if (varTypeIsGC(ad1->TypeGet()) != varTypeIsGC(op2->TypeGet()))
+ {
+ break;
+ }
+
+ /* Change "(x op (y op z))" to "(x op y) op z" */
+ /* ie. "(op1 op (ad1 op ad2))" to "(op1 op ad1) op ad2" */
+
+ GenTreePtr new_op1 = op2;
+
+ new_op1->gtOp.gtOp1 = op1;
+ new_op1->gtOp.gtOp2 = ad1;
+
+ /* Change the flags. */
+
+ // Make sure we arent throwing away any flags
+ noway_assert((new_op1->gtFlags &
+ ~(GTF_MAKE_CSE | GTF_DONT_CSE | // It is ok that new_op1->gtFlags contains GTF_DONT_CSE flag.
+ GTF_REVERSE_OPS | // The reverse ops flag also can be set, it will be re-calculated
+ GTF_NODE_MASK | GTF_ALL_EFFECT | GTF_UNSIGNED)) == 0);
+
+ new_op1->gtFlags =
+ (new_op1->gtFlags & (GTF_NODE_MASK | GTF_DONT_CSE)) | // Make sure we propagate GTF_DONT_CSE flag.
+ (op1->gtFlags & GTF_ALL_EFFECT) | (ad1->gtFlags & GTF_ALL_EFFECT);
+
+ /* Retype new_op1 if it has not/become a GC ptr. */
+
+ if (varTypeIsGC(op1->TypeGet()))
+ {
+ noway_assert((varTypeIsGC(tree->TypeGet()) && op2->TypeGet() == TYP_I_IMPL &&
+ oper == GT_ADD) || // byref(ref + (int+int))
+ (varTypeIsI(tree->TypeGet()) && op2->TypeGet() == TYP_I_IMPL &&
+ oper == GT_OR)); // int(gcref | int(gcref|intval))
+
+ new_op1->gtType = tree->gtType;
+ }
+ else if (varTypeIsGC(ad2->TypeGet()))
+ {
+ // Neither ad1 nor op1 are GC. So new_op1 isnt either
+ noway_assert(op1->gtType == TYP_I_IMPL && ad1->gtType == TYP_I_IMPL);
+ new_op1->gtType = TYP_I_IMPL;
+ }
+
+ // If new_op1 is a new expression. Assign it a new unique value number.
+ // vnStore is null before the ValueNumber phase has run
+ if (vnStore != nullptr)
+ {
+ // We can only keep the old value number on new_op1 if both op1 and ad2
+ // have the same non-NoVN value numbers. Since op is commutative, comparing
+ // only ad2 and op1 is enough.
+ if ((op1->gtVNPair.GetLiberal() == ValueNumStore::NoVN) ||
+ (ad2->gtVNPair.GetLiberal() == ValueNumStore::NoVN) ||
+ (ad2->gtVNPair.GetLiberal() != op1->gtVNPair.GetLiberal()))
+ {
+ new_op1->gtVNPair.SetBoth(vnStore->VNForExpr(nullptr, new_op1->TypeGet()));
+ }
+ }
+
+ tree->gtOp.gtOp1 = new_op1;
+ tree->gtOp.gtOp2 = ad2;
+
+ /* If 'new_op1' is now the same nested op, process it recursively */
+
+ if ((ad1->gtOper == oper) && !ad1->gtOverflowEx())
+ {
+ fgMoveOpsLeft(new_op1);
+ }
+
+ /* If 'ad2' is now the same nested op, process it
+ * Instead of recursion, we set up op1 and op2 for the next loop.
+ */
+
+ op1 = new_op1;
+ op2 = ad2;
+ } while ((op2->gtOper == oper) && !op2->gtOverflowEx());
+
+ return;
+}
+
+#endif
+
+/*****************************************************************************/
+
+void Compiler::fgSetRngChkTarget(GenTreePtr tree, bool delay)
+{
+ GenTreeBoundsChk* bndsChk = nullptr;
+ SpecialCodeKind kind = SCK_RNGCHK_FAIL;
+
+#ifdef FEATURE_SIMD
+ if ((tree->gtOper == GT_ARR_BOUNDS_CHECK) || (tree->gtOper == GT_SIMD_CHK))
+#else // FEATURE_SIMD
+ if (tree->gtOper == GT_ARR_BOUNDS_CHECK)
+#endif // FEATURE_SIMD
+ {
+ bndsChk = tree->AsBoundsChk();
+ kind = tree->gtBoundsChk.gtThrowKind;
+ }
+ else
+ {
+ noway_assert((tree->gtOper == GT_ARR_ELEM) || (tree->gtOper == GT_ARR_INDEX));
+ }
+
+#ifdef _TARGET_X86_
+ unsigned callStkDepth = fgPtrArgCntCur;
+#else
+ // only x86 pushes args
+ const unsigned callStkDepth = 0;
+#endif
+
+ if (opts.MinOpts())
+ {
+ delay = false;
+
+ // we need to initialize this field
+ if (fgGlobalMorph && bndsChk != nullptr)
+ {
+ bndsChk->gtStkDepth = callStkDepth;
+ }
+ }
+
+ if (!opts.compDbgCode)
+ {
+ if (delay || compIsForInlining())
+ {
+ /* We delay this until after loop-oriented range check
+ analysis. For now we merely store the current stack
+ level in the tree node.
+ */
+ if (bndsChk != nullptr)
+ {
+ noway_assert(!bndsChk->gtIndRngFailBB || previousCompletedPhase >= PHASE_OPTIMIZE_LOOPS);
+ bndsChk->gtStkDepth = callStkDepth;
+ }
+ }
+ else
+ {
+ /* Create/find the appropriate "range-fail" label */
+
+ // fgPtrArgCntCur is only valid for global morph or if we walk full stmt.
+ noway_assert((bndsChk != nullptr) || fgGlobalMorph);
+
+ unsigned stkDepth = (bndsChk != nullptr) ? bndsChk->gtStkDepth : callStkDepth;
+
+ BasicBlock* rngErrBlk = fgRngChkTarget(compCurBB, stkDepth, kind);
+
+ /* Add the label to the indirection node */
+
+ if (bndsChk != nullptr)
+ {
+ bndsChk->gtIndRngFailBB = gtNewCodeRef(rngErrBlk);
+ }
+ }
+ }
+}
+
+/*****************************************************************************
+ *
+ * Expand a GT_INDEX node and fully morph the child operands
+ *
+ * The orginal GT_INDEX node is bashed into the GT_IND node that accesses
+ * the array element. We expand the GT_INDEX node into a larger tree that
+ * evaluates the array base and index. The simplest expansion is a GT_COMMA
+ * with a GT_ARR_BOUND_CHK and a GT_IND with a GTF_INX_RNGCHK flag.
+ * For complex array or index expressions one or more GT_COMMA assignments
+ * are inserted so that we only evaluate the array or index expressions once.
+ *
+ * The fully expanded tree is then morphed. This causes gtFoldExpr to
+ * perform local constant prop and reorder the constants in the tree and
+ * fold them.
+ *
+ * We then parse the resulting array element expression in order to locate
+ * and label the constants and variables that occur in the tree.
+ */
+
+const int MAX_ARR_COMPLEXITY = 4;
+const int MAX_INDEX_COMPLEXITY = 4;
+
+GenTreePtr Compiler::fgMorphArrayIndex(GenTreePtr tree)
+{
+ noway_assert(tree->gtOper == GT_INDEX);
+ GenTreeIndex* asIndex = tree->AsIndex();
+
+ var_types elemTyp = tree->TypeGet();
+ unsigned elemSize = tree->gtIndex.gtIndElemSize;
+ CORINFO_CLASS_HANDLE elemStructType = tree->gtIndex.gtStructElemClass;
+
+ noway_assert(elemTyp != TYP_STRUCT || elemStructType != nullptr);
+
+#ifdef FEATURE_SIMD
+ if (featureSIMD && varTypeIsStruct(elemTyp) && elemSize <= getSIMDVectorRegisterByteLength())
+ {
+ // If this is a SIMD type, this is the point at which we lose the type information,
+ // so we need to set the correct type on the GT_IND.
+ // (We don't care about the base type here, so we only check, but don't retain, the return value).
+ unsigned simdElemSize = 0;
+ if (getBaseTypeAndSizeOfSIMDType(elemStructType, &simdElemSize) != TYP_UNKNOWN)
+ {
+ assert(simdElemSize == elemSize);
+ elemTyp = getSIMDTypeForSize(elemSize);
+ // This is the new type of the node.
+ tree->gtType = elemTyp;
+ // Now set elemStructType to null so that we don't confuse value numbering.
+ elemStructType = nullptr;
+ }
+ }
+#endif // FEATURE_SIMD
+
+ GenTreePtr arrRef = asIndex->Arr();
+ GenTreePtr index = asIndex->Index();
+
+ // Set up the the array length's offset into lenOffs
+ // And the the first element's offset into elemOffs
+ ssize_t lenOffs;
+ ssize_t elemOffs;
+ if (tree->gtFlags & GTF_INX_STRING_LAYOUT)
+ {
+ lenOffs = offsetof(CORINFO_String, stringLen);
+ elemOffs = offsetof(CORINFO_String, chars);
+ tree->gtFlags &= ~GTF_INX_STRING_LAYOUT; // Clear this flag as it is used for GTF_IND_VOLATILE
+ }
+ else if (tree->gtFlags & GTF_INX_REFARR_LAYOUT)
+ {
+ lenOffs = offsetof(CORINFO_RefArray, length);
+ elemOffs = eeGetEEInfo()->offsetOfObjArrayData;
+ }
+ else // We have a standard array
+ {
+ lenOffs = offsetof(CORINFO_Array, length);
+ elemOffs = offsetof(CORINFO_Array, u1Elems);
+ }
+
+ bool chkd = ((tree->gtFlags & GTF_INX_RNGCHK) != 0); // if false, range checking will be disabled
+ bool nCSE = ((tree->gtFlags & GTF_DONT_CSE) != 0);
+
+ GenTreePtr arrRefDefn = nullptr; // non-NULL if we need to allocate a temp for the arrRef expression
+ GenTreePtr indexDefn = nullptr; // non-NULL if we need to allocate a temp for the index expression
+ GenTreePtr bndsChk = nullptr;
+
+ // If we're doing range checking, introduce a GT_ARR_BOUNDS_CHECK node for the address.
+ if (chkd)
+ {
+ GenTreePtr arrRef2 = nullptr; // The second copy will be used in array address expression
+ GenTreePtr index2 = nullptr;
+
+ // If the arrRef expression involves an assignment, a call or reads from global memory,
+ // then we *must* allocate a temporary in which to "localize" those values,
+ // to ensure that the same values are used in the bounds check and the actual
+ // dereference.
+ // Also we allocate the temporary when the arrRef is sufficiently complex/expensive.
+ //
+ if ((arrRef->gtFlags & (GTF_ASG | GTF_CALL | GTF_GLOB_REF)) || gtComplexityExceeds(&arrRef, MAX_ARR_COMPLEXITY))
+ {
+ unsigned arrRefTmpNum = lvaGrabTemp(true DEBUGARG("arr expr"));
+ arrRefDefn = gtNewTempAssign(arrRefTmpNum, arrRef);
+ arrRef = gtNewLclvNode(arrRefTmpNum, arrRef->TypeGet());
+ arrRef2 = gtNewLclvNode(arrRefTmpNum, arrRef->TypeGet());
+ }
+ else
+ {
+ arrRef2 = gtCloneExpr(arrRef);
+ noway_assert(arrRef2 != nullptr);
+ }
+
+ // If the index expression involves an assignment, a call or reads from global memory,
+ // we *must* allocate a temporary in which to "localize" those values,
+ // to ensure that the same values are used in the bounds check and the actual
+ // dereference.
+ // Also we allocate the temporary when the index is sufficiently complex/expensive.
+ //
+ if ((index->gtFlags & (GTF_ASG | GTF_CALL | GTF_GLOB_REF)) || gtComplexityExceeds(&index, MAX_ARR_COMPLEXITY))
+ {
+ unsigned indexTmpNum = lvaGrabTemp(true DEBUGARG("arr expr"));
+ indexDefn = gtNewTempAssign(indexTmpNum, index);
+ index = gtNewLclvNode(indexTmpNum, index->TypeGet());
+ index2 = gtNewLclvNode(indexTmpNum, index->TypeGet());
+ }
+ else
+ {
+ index2 = gtCloneExpr(index);
+ noway_assert(index2 != nullptr);
+ }
+
+ // Next introduce a GT_ARR_BOUNDS_CHECK node
+ var_types bndsChkType = TYP_INT; // By default, try to use 32-bit comparison for array bounds check.
+
+#ifdef _TARGET_64BIT_
+ // The CLI Spec allows an array to be indexed by either an int32 or a native int. In the case
+ // of a 64 bit architecture this means the array index can potentially be a TYP_LONG, so for this case,
+ // the comparison will have to be widen to 64 bits.
+ if (index->TypeGet() == TYP_I_IMPL)
+ {
+ bndsChkType = TYP_I_IMPL;
+ }
+#endif // _TARGET_64BIT_
+
+ GenTree* arrLen = new (this, GT_ARR_LENGTH) GenTreeArrLen(TYP_INT, arrRef, (int)lenOffs);
+
+ if (bndsChkType != TYP_INT)
+ {
+ arrLen = gtNewCastNode(bndsChkType, arrLen, bndsChkType);
+ }
+
+ GenTreeBoundsChk* arrBndsChk = new (this, GT_ARR_BOUNDS_CHECK)
+ GenTreeBoundsChk(GT_ARR_BOUNDS_CHECK, TYP_VOID, arrLen, index, SCK_RNGCHK_FAIL);
+
+ bndsChk = arrBndsChk;
+
+ // Make sure to increment ref-counts if already ref-counted.
+ if (lvaLocalVarRefCounted)
+ {
+ lvaRecursiveIncRefCounts(index);
+ lvaRecursiveIncRefCounts(arrRef);
+ }
+
+ // Now we'll switch to using the second copies for arrRef and index
+ // to compute the address expression
+
+ arrRef = arrRef2;
+ index = index2;
+ }
+
+ // Create the "addr" which is "*(arrRef + ((index * elemSize) + elemOffs))"
+
+ GenTreePtr addr;
+
+#ifdef _TARGET_64BIT_
+ // Widen 'index' on 64-bit targets
+ if (index->TypeGet() != TYP_I_IMPL)
+ {
+ if (index->OperGet() == GT_CNS_INT)
+ {
+ index->gtType = TYP_I_IMPL;
+ }
+ else
+ {
+ index = gtNewCastNode(TYP_I_IMPL, index, TYP_I_IMPL);
+ }
+ }
+#endif // _TARGET_64BIT_
+
+ /* Scale the index value if necessary */
+ if (elemSize > 1)
+ {
+ GenTreePtr size = gtNewIconNode(elemSize, TYP_I_IMPL);
+
+ // Fix 392756 WP7 Crossgen
+ //
+ // During codegen optGetArrayRefScaleAndIndex() makes the assumption that op2 of a GT_MUL node
+ // is a constant and is not capable of handling CSE'ing the elemSize constant into a lclvar.
+ // Hence to prevent the constant from becoming a CSE we mark it as NO_CSE.
+ //
+ size->gtFlags |= GTF_DONT_CSE;
+
+ /* Multiply by the array element size */
+ addr = gtNewOperNode(GT_MUL, TYP_I_IMPL, index, size);
+ }
+ else
+ {
+ addr = index;
+ }
+
+ /* Add the object ref to the element's offset */
+
+ addr = gtNewOperNode(GT_ADD, TYP_BYREF, arrRef, addr);
+
+ /* Add the first element's offset */
+
+ GenTreePtr cns = gtNewIconNode(elemOffs, TYP_I_IMPL);
+
+ addr = gtNewOperNode(GT_ADD, TYP_BYREF, addr, cns);
+
+#if SMALL_TREE_NODES
+ assert(tree->gtDebugFlags & GTF_DEBUG_NODE_LARGE);
+#endif
+
+ // Change the orginal GT_INDEX node into a GT_IND node
+ tree->SetOper(GT_IND);
+
+ // If the index node is a floating-point type, notify the compiler
+ // we'll potentially use floating point registers at the time of codegen.
+ if (varTypeIsFloating(tree->gtType))
+ {
+ this->compFloatingPointUsed = true;
+ }
+
+ // We've now consumed the GTF_INX_RNGCHK, and the node
+ // is no longer a GT_INDEX node.
+ tree->gtFlags &= ~GTF_INX_RNGCHK;
+
+ tree->gtOp.gtOp1 = addr;
+
+ // This is an array index expression.
+ tree->gtFlags |= GTF_IND_ARR_INDEX;
+
+ /* An indirection will cause a GPF if the address is null */
+ tree->gtFlags |= GTF_EXCEPT;
+
+ if (nCSE)
+ {
+ tree->gtFlags |= GTF_DONT_CSE;
+ }
+
+ // Store information about it.
+ GetArrayInfoMap()->Set(tree, ArrayInfo(elemTyp, elemSize, (int)elemOffs, elemStructType));
+
+ // Remember this 'indTree' that we just created, as we still need to attach the fieldSeq information to it.
+
+ GenTreePtr indTree = tree;
+
+ // Did we create a bndsChk tree?
+ if (bndsChk)
+ {
+ // Use a GT_COMMA node to prepend the array bound check
+ //
+ tree = gtNewOperNode(GT_COMMA, elemTyp, bndsChk, tree);
+
+ /* Mark the indirection node as needing a range check */
+ fgSetRngChkTarget(bndsChk);
+ }
+
+ if (indexDefn != nullptr)
+ {
+ // Use a GT_COMMA node to prepend the index assignment
+ //
+ tree = gtNewOperNode(GT_COMMA, tree->TypeGet(), indexDefn, tree);
+ }
+ if (arrRefDefn != nullptr)
+ {
+ // Use a GT_COMMA node to prepend the arRef assignment
+ //
+ tree = gtNewOperNode(GT_COMMA, tree->TypeGet(), arrRefDefn, tree);
+ }
+
+ // Currently we morph the tree to perform some folding operations prior
+ // to attaching fieldSeq info and labeling constant array index contributions
+ //
+ fgMorphTree(tree);
+
+ // Ideally we just want to proceed to attaching fieldSeq info and labeling the
+ // constant array index contributions, but the morphing operation may have changed
+ // the 'tree' into something that now unconditionally throws an exception.
+ //
+ // In such case the gtEffectiveVal could be a new tree or it's gtOper could be modified
+ // or it could be left unchanged. If it is unchanged then we should not return,
+ // instead we should proceed to attaching fieldSeq info, etc...
+ //
+ GenTreePtr arrElem = tree->gtEffectiveVal();
+
+ if (fgIsCommaThrow(tree))
+ {
+ if ((arrElem != indTree) || // A new tree node may have been created
+ (indTree->OperGet() != GT_IND)) // The GT_IND may have been changed to a GT_CNS_INT
+ {
+ return tree; // Just return the Comma-Throw, don't try to attach the fieldSeq info, etc..
+ }
+ }
+
+ assert(!fgGlobalMorph || (arrElem->gtDebugFlags & GTF_DEBUG_NODE_MORPHED));
+
+ addr = arrElem->gtOp.gtOp1;
+
+ assert(addr->TypeGet() == TYP_BYREF);
+
+ GenTreePtr cnsOff = nullptr;
+ if (addr->OperGet() == GT_ADD)
+ {
+ if (addr->gtOp.gtOp2->gtOper == GT_CNS_INT)
+ {
+ cnsOff = addr->gtOp.gtOp2;
+ addr = addr->gtOp.gtOp1;
+ }
+
+ while ((addr->OperGet() == GT_ADD) || (addr->OperGet() == GT_SUB))
+ {
+ assert(addr->TypeGet() == TYP_BYREF);
+ GenTreePtr index = addr->gtOp.gtOp2;
+
+ // Label any constant array index contributions with #ConstantIndex and any LclVars with GTF_VAR_ARR_INDEX
+ index->LabelIndex(this);
+
+ addr = addr->gtOp.gtOp1;
+ }
+ assert(addr->TypeGet() == TYP_REF);
+ }
+ else if (addr->OperGet() == GT_CNS_INT)
+ {
+ cnsOff = addr;
+ }
+
+ FieldSeqNode* firstElemFseq = GetFieldSeqStore()->CreateSingleton(FieldSeqStore::FirstElemPseudoField);
+
+ if ((cnsOff != nullptr) && (cnsOff->gtIntCon.gtIconVal == elemOffs))
+ {
+ // Assign it the [#FirstElem] field sequence
+ //
+ cnsOff->gtIntCon.gtFieldSeq = firstElemFseq;
+ }
+ else // We have folded the first element's offset with the index expression
+ {
+ // Build the [#ConstantIndex, #FirstElem] field sequence
+ //
+ FieldSeqNode* constantIndexFseq = GetFieldSeqStore()->CreateSingleton(FieldSeqStore::ConstantIndexPseudoField);
+ FieldSeqNode* fieldSeq = GetFieldSeqStore()->Append(constantIndexFseq, firstElemFseq);
+
+ if (cnsOff == nullptr) // It must have folded into a zero offset
+ {
+ // Record in the general zero-offset map.
+ GetZeroOffsetFieldMap()->Set(addr, fieldSeq);
+ }
+ else
+ {
+ cnsOff->gtIntCon.gtFieldSeq = fieldSeq;
+ }
+ }
+
+ return tree;
+}
+
+#ifdef _TARGET_X86_
+/*****************************************************************************
+ *
+ * Wrap fixed stack arguments for varargs functions to go through varargs
+ * cookie to access them, except for the cookie itself.
+ *
+ * Non-x86 platforms are allowed to access all arguments directly
+ * so we don't need this code.
+ *
+ */
+GenTreePtr Compiler::fgMorphStackArgForVarArgs(unsigned lclNum, var_types varType, unsigned lclOffs)
+{
+ /* For the fixed stack arguments of a varargs function, we need to go
+ through the varargs cookies to access them, except for the
+ cookie itself */
+
+ LclVarDsc* varDsc = &lvaTable[lclNum];
+
+ if (varDsc->lvIsParam && !varDsc->lvIsRegArg && lclNum != lvaVarargsHandleArg)
+ {
+ // Create a node representing the local pointing to the base of the args
+ GenTreePtr ptrArg =
+ gtNewOperNode(GT_SUB, TYP_I_IMPL, gtNewLclvNode(lvaVarargsBaseOfStkArgs, TYP_I_IMPL),
+ gtNewIconNode(varDsc->lvStkOffs - codeGen->intRegState.rsCalleeRegArgCount * sizeof(void*) +
+ lclOffs));
+
+ // Access the argument through the local
+ GenTreePtr tree = gtNewOperNode(GT_IND, varType, ptrArg);
+ tree->gtFlags |= GTF_IND_TGTANYWHERE;
+
+ if (varDsc->lvAddrExposed)
+ {
+ tree->gtFlags |= GTF_GLOB_REF;
+ }
+
+ return fgMorphTree(tree);
+ }
+
+ return NULL;
+}
+#endif
+
+/*****************************************************************************
+ *
+ * Transform the given GT_LCL_VAR tree for code generation.
+ */
+
+GenTreePtr Compiler::fgMorphLocalVar(GenTreePtr tree)
+{
+ noway_assert(tree->gtOper == GT_LCL_VAR);
+
+ unsigned lclNum = tree->gtLclVarCommon.gtLclNum;
+ var_types varType = lvaGetRealType(lclNum);
+ LclVarDsc* varDsc = &lvaTable[lclNum];
+
+ if (varDsc->lvAddrExposed)
+ {
+ tree->gtFlags |= GTF_GLOB_REF;
+ }
+
+#ifdef _TARGET_X86_
+ if (info.compIsVarArgs)
+ {
+ GenTreePtr newTree = fgMorphStackArgForVarArgs(lclNum, varType, 0);
+ if (newTree != NULL)
+ return newTree;
+ }
+#endif // _TARGET_X86_
+
+ /* If not during the global morphing phase bail */
+
+ if (!fgGlobalMorph)
+ {
+ return tree;
+ }
+
+ bool varAddr = (tree->gtFlags & GTF_DONT_CSE) != 0;
+
+ noway_assert(!(tree->gtFlags & GTF_VAR_DEF) || varAddr); // GTF_VAR_DEF should always imply varAddr
+
+ if (!varAddr && varTypeIsSmall(varDsc->TypeGet()) && varDsc->lvNormalizeOnLoad())
+ {
+#if LOCAL_ASSERTION_PROP
+ /* Assertion prop can tell us to omit adding a cast here */
+ if (optLocalAssertionProp && optAssertionIsSubrange(tree, varType, apFull) != NO_ASSERTION_INDEX)
+ {
+ return tree;
+ }
+#endif
+ /* Small-typed arguments and aliased locals are normalized on load.
+ Other small-typed locals are normalized on store.
+ Also, under the debugger as the debugger could write to the variable.
+ If this is one of the former, insert a narrowing cast on the load.
+ ie. Convert: var-short --> cast-short(var-int) */
+
+ tree->gtType = TYP_INT;
+ fgMorphTreeDone(tree);
+ tree = gtNewCastNode(TYP_INT, tree, varType);
+ fgMorphTreeDone(tree);
+ return tree;
+ }
+
+ return tree;
+}
+
+/*****************************************************************************
+ Grab a temp for big offset morphing.
+ This method will grab a new temp if no temp of this "type" has been created.
+ Or it will return the same cached one if it has been created.
+*/
+unsigned Compiler::fgGetBigOffsetMorphingTemp(var_types type)
+{
+ unsigned lclNum = fgBigOffsetMorphingTemps[type];
+
+ if (lclNum == BAD_VAR_NUM)
+ {
+ // We haven't created a temp for this kind of type. Create one now.
+ lclNum = lvaGrabTemp(false DEBUGARG("Big Offset Morphing"));
+ fgBigOffsetMorphingTemps[type] = lclNum;
+ }
+ else
+ {
+ // We better get the right type.
+ noway_assert(lvaTable[lclNum].TypeGet() == type);
+ }
+
+ noway_assert(lclNum != BAD_VAR_NUM);
+ return lclNum;
+}
+
+/*****************************************************************************
+ *
+ * Transform the given GT_FIELD tree for code generation.
+ */
+
+GenTreePtr Compiler::fgMorphField(GenTreePtr tree, MorphAddrContext* mac)
+{
+ assert(tree->gtOper == GT_FIELD);
+
+ noway_assert(tree->gtFlags & GTF_GLOB_REF);
+
+ CORINFO_FIELD_HANDLE symHnd = tree->gtField.gtFldHnd;
+ unsigned fldOffset = tree->gtField.gtFldOffset;
+ GenTreePtr objRef = tree->gtField.gtFldObj;
+ bool fieldMayOverlap = false;
+ bool objIsLocal = false;
+
+ if (tree->gtField.gtFldMayOverlap)
+ {
+ fieldMayOverlap = true;
+ // Reset the flag because we may reuse the node.
+ tree->gtField.gtFldMayOverlap = false;
+ }
+
+#ifdef FEATURE_SIMD
+ // if this field belongs to simd struct, tranlate it to simd instrinsic.
+ if (mac == nullptr || mac->m_kind != MACK_Addr)
+ {
+ GenTreePtr newTree = fgMorphFieldToSIMDIntrinsicGet(tree);
+ if (newTree != tree)
+ {
+ newTree = fgMorphSmpOp(newTree);
+ return newTree;
+ }
+ }
+ else if (objRef != nullptr && objRef->OperGet() == GT_ADDR && objRef->OperIsSIMD())
+ {
+ // We have a field of an SIMD intrinsic in an address-taken context.
+ // We need to copy the SIMD result to a temp, and take the field of that.
+ GenTree* copy = fgCopySIMDNode(objRef->gtOp.gtOp1->AsSIMD());
+ objRef->gtOp.gtOp1 = copy;
+ }
+#endif
+
+ /* Is this an instance data member? */
+
+ if (objRef)
+ {
+ GenTreePtr addr;
+ objIsLocal = objRef->IsLocal();
+
+ if (tree->gtFlags & GTF_IND_TLS_REF)
+ {
+ NO_WAY("instance field can not be a TLS ref.");
+ }
+
+ /* We'll create the expression "*(objRef + mem_offs)" */
+
+ noway_assert(varTypeIsGC(objRef->TypeGet()) || objRef->TypeGet() == TYP_I_IMPL);
+
+ // An optimization for Contextful classes:
+ // we unwrap the proxy when we have a 'this reference'
+ if (info.compIsContextful && info.compUnwrapContextful && impIsThis(objRef))
+ {
+ objRef = fgUnwrapProxy(objRef);
+ }
+
+ /*
+ Now we have a tree like this:
+
+ +--------------------+
+ | GT_FIELD | tree
+ +----------+---------+
+ |
+ +--------------+-------------+
+ | tree->gtField.gtFldObj |
+ +--------------+-------------+
+
+
+ We want to make it like this (when fldOffset is <= MAX_UNCHECKED_OFFSET_FOR_NULL_OBJECT):
+
+ +--------------------+
+ | GT_IND/GT_OBJ | tree
+ +---------+----------+
+ |
+ |
+ +---------+----------+
+ | GT_ADD | addr
+ +---------+----------+
+ |
+ / \
+ / \
+ / \
+ +-------------------+ +----------------------+
+ | objRef | | fldOffset |
+ | | | (when fldOffset !=0) |
+ +-------------------+ +----------------------+
+
+
+ or this (when fldOffset is > MAX_UNCHECKED_OFFSET_FOR_NULL_OBJECT):
+
+
+ +--------------------+
+ | GT_IND/GT_OBJ | tree
+ +----------+---------+
+ |
+ +----------+---------+
+ | GT_COMMA | comma2
+ +----------+---------+
+ |
+ / \
+ / \
+ / \
+ / \
+ +---------+----------+ +---------+----------+
+ comma | GT_COMMA | | "+" (i.e. GT_ADD) | addr
+ +---------+----------+ +---------+----------+
+ | |
+ / \ / \
+ / \ / \
+ / \ / \
+ +-----+-----+ +-----+-----+ +---------+ +-----------+
+ asg | GT_ASG | ind | GT_IND | | tmpLcl | | fldOffset |
+ +-----+-----+ +-----+-----+ +---------+ +-----------+
+ | |
+ / \ |
+ / \ |
+ / \ |
+ +-----+-----+ +-----+-----+ +-----------+
+ | tmpLcl | | objRef | | tmpLcl |
+ +-----------+ +-----------+ +-----------+
+
+
+ */
+
+ var_types objRefType = objRef->TypeGet();
+
+ GenTreePtr comma = nullptr;
+
+ bool addedExplicitNullCheck = false;
+
+ // NULL mac means we encounter the GT_FIELD first. This denotes a dereference of the field,
+ // and thus is equivalent to a MACK_Ind with zero offset.
+ MorphAddrContext defMAC(MACK_Ind);
+ if (mac == nullptr)
+ {
+ mac = &defMAC;
+ }
+
+ // This flag is set to enable the "conservative" style of explicit null-check insertion.
+ // This means that we insert an explicit null check whenever we create byref by adding a
+ // constant offset to a ref, in a MACK_Addr context (meaning that the byref is not immediately
+ // dereferenced). The alternative is "aggressive", which would not insert such checks (for
+ // small offsets); in this plan, we would transfer some null-checking responsibility to
+ // callee's of methods taking byref parameters. They would have to add explicit null checks
+ // when creating derived byrefs from argument byrefs by adding constants to argument byrefs, in
+ // contexts where the resulting derived byref is not immediately dereferenced (or if the offset is too
+ // large). To make the "aggressive" scheme work, however, we'd also have to add explicit derived-from-null
+ // checks for byref parameters to "external" methods implemented in C++, and in P/Invoke stubs.
+ // This is left here to point out how to implement it.
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#define CONSERVATIVE_NULL_CHECK_BYREF_CREATION 1
+
+ // If the objRef is a GT_ADDR node, it, itself, never requires null checking. The expression
+ // whose address is being taken is either a local or static variable, whose address is necessarily
+ // non-null, or else it is a field dereference, which will do its own bounds checking if necessary.
+ if (objRef->gtOper != GT_ADDR && ((mac->m_kind == MACK_Addr || mac->m_kind == MACK_Ind) &&
+ (!mac->m_allConstantOffsets || fgIsBigOffset(mac->m_totalOffset + fldOffset)
+#if CONSERVATIVE_NULL_CHECK_BYREF_CREATION
+ || (mac->m_kind == MACK_Addr && (mac->m_totalOffset + fldOffset > 0))
+#else
+ || (objRef->gtType == TYP_BYREF && mac->m_kind == MACK_Addr &&
+ (mac->m_totalOffset + fldOffset > 0))
+#endif
+ )))
+ {
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Before explicit null check morphing:\n");
+ gtDispTree(tree);
+ }
+#endif
+
+ //
+ // Create the "comma" subtree
+ //
+ GenTreePtr asg = nullptr;
+ GenTreePtr nullchk;
+
+ unsigned lclNum;
+
+ if (objRef->gtOper != GT_LCL_VAR)
+ {
+ lclNum = fgGetBigOffsetMorphingTemp(genActualType(objRef->TypeGet()));
+
+ // Create the "asg" node
+ asg = gtNewTempAssign(lclNum, objRef);
+ }
+ else
+ {
+ lclNum = objRef->gtLclVarCommon.gtLclNum;
+ }
+
+ // Create the "nullchk" node.
+ // Make it TYP_BYTE so we only deference it for 1 byte.
+ GenTreePtr lclVar = gtNewLclvNode(lclNum, objRefType);
+ nullchk = new (this, GT_NULLCHECK) GenTreeIndir(GT_NULLCHECK, TYP_BYTE, lclVar, nullptr);
+
+ nullchk->gtFlags |= GTF_DONT_CSE; // Don't try to create a CSE for these TYP_BYTE indirections
+
+ // An indirection will cause a GPF if the address is null.
+ nullchk->gtFlags |= GTF_EXCEPT;
+
+ compCurBB->bbFlags |= BBF_HAS_NULLCHECK;
+ optMethodFlags |= OMF_HAS_NULLCHECK;
+
+ if (asg)
+ {
+ // Create the "comma" node.
+ comma = gtNewOperNode(GT_COMMA,
+ TYP_VOID, // We don't want to return anything from this "comma" node.
+ // Set the type to TYP_VOID, so we can select "cmp" instruction
+ // instead of "mov" instruction later on.
+ asg, nullchk);
+ }
+ else
+ {
+ comma = nullchk;
+ }
+
+ addr = gtNewLclvNode(lclNum, objRefType); // Use "tmpLcl" to create "addr" node.
+
+ addedExplicitNullCheck = true;
+ }
+ else if (fldOffset == 0)
+ {
+ // Generate the "addr" node.
+ addr = objRef;
+ FieldSeqNode* fieldSeq =
+ fieldMayOverlap ? FieldSeqStore::NotAField() : GetFieldSeqStore()->CreateSingleton(symHnd);
+ GetZeroOffsetFieldMap()->Set(addr, fieldSeq);
+ }
+ else
+ {
+ addr = objRef;
+ }
+
+#ifdef FEATURE_READYTORUN_COMPILER
+ if (tree->gtField.gtFieldLookup.addr != nullptr)
+ {
+ GenTreePtr baseOffset = gtNewIconEmbHndNode(tree->gtField.gtFieldLookup.addr, nullptr, GTF_ICON_FIELD_HDL);
+
+ if (tree->gtField.gtFieldLookup.accessType == IAT_PVALUE)
+ baseOffset = gtNewOperNode(GT_IND, TYP_I_IMPL, baseOffset);
+
+ addr =
+ gtNewOperNode(GT_ADD, (var_types)(objRefType == TYP_I_IMPL ? TYP_I_IMPL : TYP_BYREF), addr, baseOffset);
+ }
+#endif
+ if (fldOffset != 0)
+ {
+ // Generate the "addr" node.
+ /* Add the member offset to the object's address */
+ FieldSeqNode* fieldSeq =
+ fieldMayOverlap ? FieldSeqStore::NotAField() : GetFieldSeqStore()->CreateSingleton(symHnd);
+ addr = gtNewOperNode(GT_ADD, (var_types)(objRefType == TYP_I_IMPL ? TYP_I_IMPL : TYP_BYREF), addr,
+ gtNewIconHandleNode(fldOffset, GTF_ICON_FIELD_OFF, fieldSeq));
+ }
+
+ // Now let's set the "tree" as a GT_IND tree.
+
+ tree->SetOper(GT_IND);
+ tree->gtOp.gtOp1 = addr;
+
+ if (fgAddrCouldBeNull(addr))
+ {
+ // This indirection can cause a GPF if the address could be null.
+ tree->gtFlags |= GTF_EXCEPT;
+ }
+
+ if (addedExplicitNullCheck)
+ {
+ //
+ // Create "comma2" node and link it to "tree".
+ //
+ GenTreePtr comma2;
+ comma2 = gtNewOperNode(GT_COMMA,
+ addr->TypeGet(), // The type of "comma2" node is the same as the type of "addr" node.
+ comma, addr);
+ tree->gtOp.gtOp1 = comma2;
+ }
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ if (addedExplicitNullCheck)
+ {
+ printf("After adding explicit null check:\n");
+ gtDispTree(tree);
+ }
+ }
+#endif
+ }
+ else /* This is a static data member */
+ {
+ if (tree->gtFlags & GTF_IND_TLS_REF)
+ {
+ // Thread Local Storage static field reference
+ //
+ // Field ref is a TLS 'Thread-Local-Storage' reference
+ //
+ // Build this tree: IND(*) #
+ // |
+ // ADD(I_IMPL)
+ // / \
+ // / CNS(fldOffset)
+ // /
+ // /
+ // /
+ // IND(I_IMPL) == [Base of this DLL's TLS]
+ // |
+ // ADD(I_IMPL)
+ // / \
+ // / CNS(IdValue*4) or MUL
+ // / / \
+ // IND(I_IMPL) / CNS(4)
+ // | /
+ // CNS(TLS_HDL,0x2C) IND
+ // |
+ // CNS(pIdAddr)
+ //
+ // # Denotes the orginal node
+ //
+ void** pIdAddr = nullptr;
+ unsigned IdValue = info.compCompHnd->getFieldThreadLocalStoreID(symHnd, (void**)&pIdAddr);
+
+ //
+ // If we can we access the TLS DLL index ID value directly
+ // then pIdAddr will be NULL and
+ // IdValue will be the actual TLS DLL index ID
+ //
+ GenTreePtr dllRef = nullptr;
+ if (pIdAddr == nullptr)
+ {
+ if (IdValue != 0)
+ {
+ dllRef = gtNewIconNode(IdValue * 4, TYP_I_IMPL);
+ }
+ }
+ else
+ {
+ dllRef = gtNewIconHandleNode((size_t)pIdAddr, GTF_ICON_STATIC_HDL);
+ dllRef = gtNewOperNode(GT_IND, TYP_I_IMPL, dllRef);
+ dllRef->gtFlags |= GTF_IND_INVARIANT;
+
+ /* Multiply by 4 */
+
+ dllRef = gtNewOperNode(GT_MUL, TYP_I_IMPL, dllRef, gtNewIconNode(4, TYP_I_IMPL));
+ }
+
+#define WIN32_TLS_SLOTS (0x2C) // Offset from fs:[0] where the pointer to the slots resides
+
+ // Mark this ICON as a TLS_HDL, codegen will use FS:[cns]
+
+ GenTreePtr tlsRef = gtNewIconHandleNode(WIN32_TLS_SLOTS, GTF_ICON_TLS_HDL);
+
+ tlsRef = gtNewOperNode(GT_IND, TYP_I_IMPL, tlsRef);
+
+ if (dllRef != nullptr)
+ {
+ /* Add the dllRef */
+ tlsRef = gtNewOperNode(GT_ADD, TYP_I_IMPL, tlsRef, dllRef);
+ }
+
+ /* indirect to have tlsRef point at the base of the DLLs Thread Local Storage */
+ tlsRef = gtNewOperNode(GT_IND, TYP_I_IMPL, tlsRef);
+
+ if (fldOffset != 0)
+ {
+ FieldSeqNode* fieldSeq =
+ fieldMayOverlap ? FieldSeqStore::NotAField() : GetFieldSeqStore()->CreateSingleton(symHnd);
+ GenTreePtr fldOffsetNode = new (this, GT_CNS_INT) GenTreeIntCon(TYP_INT, fldOffset, fieldSeq);
+
+ /* Add the TLS static field offset to the address */
+
+ tlsRef = gtNewOperNode(GT_ADD, TYP_I_IMPL, tlsRef, fldOffsetNode);
+ }
+
+ // Final indirect to get to actual value of TLS static field
+
+ tree->SetOper(GT_IND);
+ tree->gtOp.gtOp1 = tlsRef;
+
+ noway_assert(tree->gtFlags & GTF_IND_TLS_REF);
+ }
+ else
+ {
+ // Normal static field reference
+
+ //
+ // If we can we access the static's address directly
+ // then pFldAddr will be NULL and
+ // fldAddr will be the actual address of the static field
+ //
+ void** pFldAddr = nullptr;
+ void* fldAddr = info.compCompHnd->getFieldAddress(symHnd, (void**)&pFldAddr);
+
+ if (pFldAddr == nullptr)
+ {
+#ifdef _TARGET_64BIT_
+ if (IMAGE_REL_BASED_REL32 != eeGetRelocTypeHint(fldAddr))
+ {
+ // The address is not directly addressible, so force it into a
+ // constant, so we handle it properly
+
+ GenTreePtr addr = gtNewIconHandleNode((size_t)fldAddr, GTF_ICON_STATIC_HDL);
+ addr->gtType = TYP_I_IMPL;
+ FieldSeqNode* fieldSeq =
+ fieldMayOverlap ? FieldSeqStore::NotAField() : GetFieldSeqStore()->CreateSingleton(symHnd);
+ addr->gtIntCon.gtFieldSeq = fieldSeq;
+
+ tree->SetOper(GT_IND);
+ tree->gtOp.gtOp1 = addr;
+
+ return fgMorphSmpOp(tree);
+ }
+ else
+#endif // _TARGET_64BIT_
+ {
+ // Only volatile could be set, and it maps over
+ noway_assert((tree->gtFlags & ~(GTF_FLD_VOLATILE | GTF_COMMON_MASK)) == 0);
+ noway_assert(GTF_FLD_VOLATILE == GTF_IND_VOLATILE);
+ tree->SetOper(GT_CLS_VAR);
+ tree->gtClsVar.gtClsVarHnd = symHnd;
+ FieldSeqNode* fieldSeq =
+ fieldMayOverlap ? FieldSeqStore::NotAField() : GetFieldSeqStore()->CreateSingleton(symHnd);
+ tree->gtClsVar.gtFieldSeq = fieldSeq;
+ }
+
+ return tree;
+ }
+ else
+ {
+ GenTreePtr addr = gtNewIconHandleNode((size_t)pFldAddr, GTF_ICON_STATIC_HDL);
+
+ // There are two cases here, either the static is RVA based,
+ // in which case the type of the FIELD node is not a GC type
+ // and the handle to the RVA is a TYP_I_IMPL. Or the FIELD node is
+ // a GC type and the handle to it is a TYP_BYREF in the GC heap
+ // because handles to statics now go into the large object heap
+
+ var_types handleTyp = (var_types)(varTypeIsGC(tree->TypeGet()) ? TYP_BYREF : TYP_I_IMPL);
+ GenTreePtr op1 = gtNewOperNode(GT_IND, handleTyp, addr);
+ op1->gtFlags |= GTF_IND_INVARIANT;
+
+ tree->SetOper(GT_IND);
+ tree->gtOp.gtOp1 = op1;
+ }
+ }
+ }
+ noway_assert(tree->gtOper == GT_IND);
+
+ GenTreePtr res = fgMorphSmpOp(tree);
+
+ // If we have a struct type, this node would previously have been under a GT_ADDR,
+ // and therefore would have been marked GTF_DONT_CSE.
+ // TODO-1stClassStructs: revisit this.
+ if ((res->TypeGet() == TYP_STRUCT) && !objIsLocal)
+ {
+ res->gtFlags |= GTF_DONT_CSE;
+ }
+
+ if (fldOffset == 0 && res->OperGet() == GT_IND)
+ {
+ GenTreePtr addr = res->gtOp.gtOp1;
+ // Since we don't make a constant zero to attach the field sequence to, associate it with the "addr" node.
+ FieldSeqNode* fieldSeq =
+ fieldMayOverlap ? FieldSeqStore::NotAField() : GetFieldSeqStore()->CreateSingleton(symHnd);
+ fgAddFieldSeqForZeroOffset(addr, fieldSeq);
+ }
+
+ return res;
+}
+
+//------------------------------------------------------------------------------
+// fgMorphCallInline: attempt to inline a call
+//
+// Arguments:
+// call - call expression to inline, inline candidate
+// inlineResult - result tracking and reporting
+//
+// Notes:
+// Attempts to inline the call.
+//
+// If successful, callee's IR is inserted in place of the call, and
+// is marked with an InlineContext.
+//
+// If unsuccessful, the transformations done in anticpation of a
+// possible inline are undone, and the candidate flag on the call
+// is cleared.
+
+void Compiler::fgMorphCallInline(GenTreeCall* call, InlineResult* inlineResult)
+{
+ // The call must be a candiate for inlining.
+ assert((call->gtFlags & GTF_CALL_INLINE_CANDIDATE) != 0);
+
+ // Attempt the inline
+ fgMorphCallInlineHelper(call, inlineResult);
+
+ // We should have made up our minds one way or another....
+ assert(inlineResult->IsDecided());
+
+ // If we failed to inline, we have a bit of work to do to cleanup
+ if (inlineResult->IsFailure())
+ {
+
+#ifdef DEBUG
+
+ // Before we do any cleanup, create a failing InlineContext to
+ // capture details of the inlining attempt.
+ m_inlineStrategy->NewFailure(fgMorphStmt, inlineResult);
+
+#endif
+
+ // It was an inline candidate, but we haven't expanded it.
+ if (call->gtCall.gtReturnType != TYP_VOID)
+ {
+ // Detach the GT_CALL tree from the original statement by
+ // hanging a "nothing" node to it. Later the "nothing" node will be removed
+ // and the original GT_CALL tree will be picked up by the GT_RET_EXPR node.
+
+ noway_assert(fgMorphStmt->gtStmt.gtStmtExpr == call);
+ fgMorphStmt->gtStmt.gtStmtExpr = gtNewNothingNode();
+ }
+
+ // Clear the Inline Candidate flag so we can ensure later we tried
+ // inlining all candidates.
+ //
+ call->gtFlags &= ~GTF_CALL_INLINE_CANDIDATE;
+ }
+}
+
+/*****************************************************************************
+ * Helper to attempt to inline a call
+ * Sets success/failure in inline result
+ * If success, modifies current method's IR with inlinee's IR
+ * If failed, undoes any speculative modifications to current method
+ */
+
+void Compiler::fgMorphCallInlineHelper(GenTreeCall* call, InlineResult* result)
+{
+ // Don't expect any surprises here.
+ assert(result->IsCandidate());
+
+ if (lvaCount >= MAX_LV_NUM_COUNT_FOR_INLINING)
+ {
+ // For now, attributing this to call site, though it's really
+ // more of a budget issue (lvaCount currently includes all
+ // caller and prospective callee locals). We still might be
+ // able to inline other callees into this caller, or inline
+ // this callee in other callers.
+ result->NoteFatal(InlineObservation::CALLSITE_TOO_MANY_LOCALS);
+ return;
+ }
+
+ if (call->IsVirtual())
+ {
+ result->NoteFatal(InlineObservation::CALLSITE_IS_VIRTUAL);
+ return;
+ }
+
+ // impMarkInlineCandidate() is expected not to mark tail prefixed calls
+ // and recursive tail calls as inline candidates.
+ noway_assert(!call->IsTailPrefixedCall());
+ noway_assert(!call->IsImplicitTailCall() || !gtIsRecursiveCall(call));
+
+ /* If the caller's stack frame is marked, then we can't do any inlining. Period.
+ Although we have checked this in impCanInline, it is possible that later IL instructions
+ might cause compNeedSecurityCheck to be set. Therefore we need to check it here again.
+ */
+
+ if (opts.compNeedSecurityCheck)
+ {
+ result->NoteFatal(InlineObservation::CALLER_NEEDS_SECURITY_CHECK);
+ return;
+ }
+
+ //
+ // Calling inlinee's compiler to inline the method.
+ //
+
+ unsigned startVars = lvaCount;
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Expanding INLINE_CANDIDATE in statement ");
+ printTreeID(fgMorphStmt);
+ printf(" in BB%02u:\n", compCurBB->bbNum);
+ gtDispTree(fgMorphStmt);
+
+ // printf("startVars=%d.\n", startVars);
+ }
+#endif
+
+ impInlineRoot()->m_inlineStrategy->NoteAttempt(result);
+
+ //
+ // Invoke the compiler to inline the call.
+ //
+
+ fgInvokeInlineeCompiler(call, result);
+
+ if (result->IsFailure())
+ {
+ // Undo some changes made in anticipation of inlining...
+
+ // Zero out the used locals
+ memset(lvaTable + startVars, 0, (lvaCount - startVars) * sizeof(*lvaTable));
+ for (unsigned i = startVars; i < lvaCount; i++)
+ {
+ new (&lvaTable[i], jitstd::placement_t()) LclVarDsc(this); // call the constructor.
+ }
+
+ lvaCount = startVars;
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ // printf("Inlining failed. Restore lvaCount to %d.\n", lvaCount);
+ }
+#endif
+
+ return;
+ }
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ // printf("After inlining lvaCount=%d.\n", lvaCount);
+ }
+#endif
+}
+
+/*****************************************************************************
+ *
+ * Performs checks to see if this tail call can be optimized as epilog+jmp.
+ */
+bool Compiler::fgCanFastTailCall(GenTreeCall* callee)
+{
+#if FEATURE_FASTTAILCALL
+ // Reached here means that return types of caller and callee are tail call compatible.
+ // In case of structs that can be returned in a register, compRetNativeType is set to the actual return type.
+ //
+ // In an implicit tail call case callSig may not be available but it is guaranteed to be available
+ // for explicit tail call cases. The reason implicit tail case callSig may not be available is that
+ // a call node might be marked as an in-line candidate and could fail to be in-lined. In which case
+ // fgInline() will replace return value place holder with call node using gtCloneExpr() which is
+ // currently not copying/setting callSig.
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef DEBUG
+ if (callee->IsTailPrefixedCall())
+ {
+ assert(impTailCallRetTypeCompatible(info.compRetNativeType, info.compMethodInfo->args.retTypeClass,
+ (var_types)callee->gtReturnType, callee->callSig->retTypeClass));
+ }
+#endif
+
+ // Note on vararg methods:
+ // If the caller is vararg method, we don't know the number of arguments passed by caller's caller.
+ // But we can be sure that in-coming arg area of vararg caller would be sufficient to hold its
+ // fixed args. Therefore, we can allow a vararg method to fast tail call other methods as long as
+ // out-going area required for callee is bounded by caller's fixed argument space.
+ //
+ // Note that callee being a vararg method is not a problem since we can account the params being passed.
+
+ // Count of caller args including implicit and hidden (i.e. thisPtr, RetBuf, GenericContext, VarargCookie)
+ unsigned nCallerArgs = info.compArgsCount;
+
+ // Count the callee args including implicit and hidden.
+ // Note that GenericContext and VarargCookie are added by importer while
+ // importing the call to gtCallArgs list along with explicit user args.
+ unsigned nCalleeArgs = 0;
+ if (callee->gtCallObjp) // thisPtr
+ {
+ nCalleeArgs++;
+ }
+
+ if (callee->HasRetBufArg()) // RetBuf
+ {
+ nCalleeArgs++;
+
+ // If callee has RetBuf param, caller too must have it.
+ // Otherwise go the slow route.
+ if (info.compRetBuffArg == BAD_VAR_NUM)
+ {
+ return false;
+ }
+ }
+
+ // Count user args while tracking whether any of them is a multi-byte params
+ // that cannot be passed in a register. Note that we don't need to count
+ // non-standard and secret params passed in registers (e.g. R10, R11) since
+ // these won't contribute to out-going arg size.
+ bool hasMultiByteArgs = false;
+ for (GenTreePtr args = callee->gtCallArgs; (args != nullptr) && !hasMultiByteArgs; args = args->gtOp.gtOp2)
+ {
+ nCalleeArgs++;
+
+ assert(args->IsList());
+ GenTreePtr argx = args->gtOp.gtOp1;
+
+ if (varTypeIsStruct(argx))
+ {
+ // Actual arg may be a child of a GT_COMMA. Skip over comma opers.
+ while (argx->gtOper == GT_COMMA)
+ {
+ argx = argx->gtOp.gtOp2;
+ }
+
+ // Get the size of the struct and see if it is register passable.
+ CORINFO_CLASS_HANDLE objClass = nullptr;
+
+ if (argx->OperGet() == GT_OBJ)
+ {
+ objClass = argx->AsObj()->gtClass;
+ }
+ else if (argx->IsLocal())
+ {
+ objClass = lvaTable[argx->AsLclVarCommon()->gtLclNum].lvVerTypeInfo.GetClassHandle();
+ }
+ if (objClass != nullptr)
+ {
+#if defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_)
+
+ unsigned typeSize = 0;
+ hasMultiByteArgs = !VarTypeIsMultiByteAndCanEnreg(argx->TypeGet(), objClass, &typeSize, false);
+
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) || defined(_TARGET_ARM64_)
+ // On System V/arm64 the args could be a 2 eightbyte struct that is passed in two registers.
+ // Account for the second eightbyte in the nCalleeArgs.
+ // https://github.com/dotnet/coreclr/issues/2666
+ // TODO-CQ-Amd64-Unix/arm64: Structs of size between 9 to 16 bytes are conservatively estimated
+ // as two args, since they need two registers whereas nCallerArgs is
+ // counting such an arg as one. This would mean we will not be optimizing
+ // certain calls though technically possible.
+
+ if (typeSize > TARGET_POINTER_SIZE)
+ {
+ unsigned extraArgRegsToAdd = (typeSize / TARGET_POINTER_SIZE);
+ nCalleeArgs += extraArgRegsToAdd;
+ }
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING || _TARGET_ARM64_
+
+#else
+ assert(!"Target platform ABI rules regarding passing struct type args in registers");
+ unreached();
+#endif //_TARGET_AMD64_ || _TARGET_ARM64_
+ }
+ else
+ {
+ hasMultiByteArgs = true;
+ }
+ }
+ }
+
+ // Go the slow route, if it has multi-byte params
+ if (hasMultiByteArgs)
+ {
+ return false;
+ }
+
+ // If we reached here means that callee has only those argument types which can be passed in
+ // a register and if passed on stack will occupy exactly one stack slot in out-going arg area.
+ // If we are passing args on stack for callee and it has more args passed on stack than
+ // caller, then fast tail call cannot be performed.
+ //
+ // Note that the GC'ness of on stack args need not match since the arg setup area is marked
+ // as non-interruptible for fast tail calls.
+ if ((nCalleeArgs > MAX_REG_ARG) && (nCallerArgs < nCalleeArgs))
+ {
+ return false;
+ }
+
+ return true;
+#else
+ return false;
+#endif
+}
+
+/*****************************************************************************
+ *
+ * Transform the given GT_CALL tree for tail call code generation.
+ */
+void Compiler::fgMorphTailCall(GenTreeCall* call)
+{
+ JITDUMP("fgMorphTailCall (before):\n");
+ DISPTREE(call);
+
+#if defined(_TARGET_ARM_)
+ // For the helper-assisted tail calls, we need to push all the arguments
+ // into a single list, and then add a few extra at the beginning
+
+ // Check for PInvoke call types that we don't handle in codegen yet.
+ assert(!call->IsUnmanaged());
+ assert(call->IsVirtual() || (call->gtCallType != CT_INDIRECT) || (call->gtCallCookie == NULL));
+
+ // First move the this pointer (if any) onto the regular arg list
+ GenTreePtr thisPtr = NULL;
+ if (call->gtCallObjp)
+ {
+ GenTreePtr objp = call->gtCallObjp;
+ call->gtCallObjp = NULL;
+
+ if ((call->gtFlags & GTF_CALL_NULLCHECK) || call->IsVirtualVtable())
+ {
+ thisPtr = gtClone(objp, true);
+ var_types vt = objp->TypeGet();
+ if (thisPtr == NULL)
+ {
+ // Too complex, so use a temp
+ unsigned lclNum = lvaGrabTemp(true DEBUGARG("tail call thisptr"));
+ GenTreePtr asg = gtNewTempAssign(lclNum, objp);
+ if (!call->IsVirtualVtable())
+ {
+ // Add an indirection to get the nullcheck
+ GenTreePtr tmp = gtNewLclvNode(lclNum, vt);
+ GenTreePtr ind = gtNewOperNode(GT_IND, TYP_INT, tmp);
+ asg = gtNewOperNode(GT_COMMA, TYP_VOID, asg, ind);
+ }
+ objp = gtNewOperNode(GT_COMMA, vt, asg, gtNewLclvNode(lclNum, vt));
+ thisPtr = gtNewLclvNode(lclNum, vt);
+ }
+ else if (!call->IsVirtualVtable())
+ {
+ GenTreePtr ind = gtNewOperNode(GT_IND, TYP_INT, thisPtr);
+ objp = gtNewOperNode(GT_COMMA, vt, ind, objp);
+ thisPtr = gtClone(thisPtr, true);
+ }
+
+ call->gtFlags &= ~GTF_CALL_NULLCHECK;
+ }
+
+ call->gtCallArgs = gtNewListNode(objp, call->gtCallArgs);
+ }
+
+ // Add the extra VSD parameter if needed
+ CorInfoHelperTailCallSpecialHandling flags = CorInfoHelperTailCallSpecialHandling(0);
+ if (call->IsVirtualStub())
+ {
+ flags = CORINFO_TAILCALL_STUB_DISPATCH_ARG;
+
+ GenTreePtr arg;
+ if (call->gtCallType == CT_INDIRECT)
+ {
+ arg = gtClone(call->gtCallAddr, true);
+ noway_assert(arg != NULL);
+ }
+ else
+ {
+ noway_assert(call->gtCallMoreFlags & GTF_CALL_M_VIRTSTUB_REL_INDIRECT);
+ ssize_t addr = ssize_t(call->gtStubCallStubAddr);
+ arg = gtNewIconHandleNode(addr, GTF_ICON_FTN_ADDR);
+
+ // Change the call type, so we can add the extra indirection here, rather than in codegen
+ call->gtCallAddr = gtNewIconHandleNode(addr, GTF_ICON_FTN_ADDR);
+ call->gtStubCallStubAddr = NULL;
+ call->gtCallType = CT_INDIRECT;
+ }
+ // Add the extra indirection to generate the real target
+ call->gtCallAddr = gtNewOperNode(GT_IND, TYP_I_IMPL, call->gtCallAddr);
+ call->gtFlags |= GTF_EXCEPT;
+
+ // And push the stub address onto the list of arguments
+ call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs);
+ }
+ else if (call->IsVirtualVtable())
+ {
+ // TODO-ARM-NYI: for x64 handle CORINFO_TAILCALL_THIS_IN_SECRET_REGISTER
+
+ noway_assert(thisPtr != NULL);
+
+ GenTreePtr add = gtNewOperNode(GT_ADD, TYP_I_IMPL, thisPtr, gtNewIconNode(VPTR_OFFS, TYP_I_IMPL));
+ GenTreePtr vtbl = gtNewOperNode(GT_IND, TYP_I_IMPL, add);
+ vtbl->gtFlags |= GTF_EXCEPT;
+
+ unsigned vtabOffsOfIndirection;
+ unsigned vtabOffsAfterIndirection;
+ info.compCompHnd->getMethodVTableOffset(call->gtCallMethHnd, &vtabOffsOfIndirection, &vtabOffsAfterIndirection);
+
+ /* Get the appropriate vtable chunk */
+
+ add = gtNewOperNode(GT_ADD, TYP_I_IMPL, vtbl, gtNewIconNode(vtabOffsOfIndirection, TYP_I_IMPL));
+ vtbl = gtNewOperNode(GT_IND, TYP_I_IMPL, add);
+
+ /* Now the appropriate vtable slot */
+
+ add = gtNewOperNode(GT_ADD, TYP_I_IMPL, vtbl, gtNewIconNode(vtabOffsAfterIndirection, TYP_I_IMPL));
+ vtbl = gtNewOperNode(GT_IND, TYP_I_IMPL, add);
+
+ // Switch this to a plain indirect call
+ call->gtFlags &= ~GTF_CALL_VIRT_KIND_MASK;
+ assert(!call->IsVirtual());
+ call->gtCallType = CT_INDIRECT;
+
+ call->gtCallAddr = vtbl;
+ call->gtCallCookie = NULL;
+ call->gtFlags |= GTF_EXCEPT;
+ }
+
+ // Now inject a placeholder for the real call target that codegen
+ // will generate
+ GenTreePtr arg = new (this, GT_NOP) GenTreeOp(GT_NOP, TYP_I_IMPL);
+ codeGen->genMarkTreeInReg(arg, REG_TAILCALL_ADDR);
+ call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs);
+
+ // Lastly inject the pointer for the copy routine
+ noway_assert(call->callSig != NULL);
+ void* pfnCopyArgs = info.compCompHnd->getTailCallCopyArgsThunk(call->callSig, flags);
+ arg = gtNewIconHandleNode(ssize_t(pfnCopyArgs), GTF_ICON_FTN_ADDR);
+ call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs);
+
+ // It is now a varargs tail call
+ call->gtCallMoreFlags = GTF_CALL_M_VARARGS | GTF_CALL_M_TAILCALL;
+ call->gtFlags &= ~GTF_CALL_POP_ARGS;
+
+#elif defined(_TARGET_XARCH_) && !defined(LEGACY_BACKEND)
+
+ // x86 classic codegen doesn't require any morphing
+
+ // For the helper-assisted tail calls, we need to push all the arguments
+ // into a single list, and then add a few extra at the beginning or end.
+ //
+ // For AMD64, the tailcall helper (JIT_TailCall) is defined as:
+ //
+ // JIT_TailCall(void* copyRoutine, void* callTarget, <function args>)
+ //
+ // We need to add "copyRoutine" and "callTarget" extra params at the beginning.
+ // But callTarget is determined by the Lower phase. Therefore, we add a placeholder arg
+ // for callTarget here which will be replaced later with callTarget in tail call lowering.
+ //
+ // For x86, the tailcall helper is defined as:
+ //
+ // JIT_TailCall(<function args>, int numberOfOldStackArgsWords, int numberOfNewStackArgsWords, int flags, void*
+ // callTarget)
+ //
+ // Note that the special arguments are on the stack, whereas the function arguments follow
+ // the normal convention: there might be register arguments in ECX and EDX. The stack will
+ // look like (highest address at the top):
+ // first normal stack argument
+ // ...
+ // last normal stack argument
+ // numberOfOldStackArgs
+ // numberOfNewStackArgs
+ // flags
+ // callTarget
+ //
+ // Each special arg is 4 bytes.
+ //
+ // 'flags' is a bitmask where:
+ // 1 == restore callee-save registers (EDI,ESI,EBX). The JIT always saves all
+ // callee-saved registers for tailcall functions. Note that the helper assumes
+ // that the callee-saved registers live immediately below EBP, and must have been
+ // pushed in this order: EDI, ESI, EBX.
+ // 2 == call target is a virtual stub dispatch.
+ //
+ // The x86 tail call helper lives in VM\i386\jithelp.asm. See that function for more details
+ // on the custom calling convention.
+
+ // Check for PInvoke call types that we don't handle in codegen yet.
+ assert(!call->IsUnmanaged());
+ assert(call->IsVirtual() || (call->gtCallType != CT_INDIRECT) || (call->gtCallCookie == nullptr));
+
+ // Don't support tail calling helper methods
+ assert(call->gtCallType != CT_HELPER);
+
+ // We come this route only for tail prefixed calls that cannot be dispatched as
+ // fast tail calls
+ assert(!call->IsImplicitTailCall());
+ assert(!fgCanFastTailCall(call));
+
+ // First move the 'this' pointer (if any) onto the regular arg list. We do this because
+ // we are going to prepend special arguments onto the argument list (for non-x86 platforms),
+ // and thus shift where the 'this' pointer will be passed to a later argument slot. In
+ // addition, for all platforms, we are going to change the call into a helper call. Our code
+ // generation code for handling calls to helpers does not handle 'this' pointers. So, when we
+ // do this transformation, we must explicitly create a null 'this' pointer check, if required,
+ // since special 'this' pointer handling will no longer kick in.
+ //
+ // Some call types, such as virtual vtable calls, require creating a call address expression
+ // that involves the "this" pointer. Lowering will sometimes create an embedded statement
+ // to create a temporary that is assigned to the "this" pointer expression, and then use
+ // that temp to create the call address expression. This temp creation embedded statement
+ // will occur immediately before the "this" pointer argument, and then will be used for both
+ // the "this" pointer argument as well as the call address expression. In the normal ordering,
+ // the embedded statement establishing the "this" pointer temp will execute before both uses
+ // of the temp. However, for tail calls via a helper, we move the "this" pointer onto the
+ // normal call argument list, and insert a placeholder which will hold the call address
+ // expression. For non-x86, things are ok, because the order of execution of these is not
+ // altered. However, for x86, the call address expression is inserted as the *last* argument
+ // in the argument list, *after* the "this" pointer. It will be put on the stack, and be
+ // evaluated first. To ensure we don't end up with out-of-order temp definition and use,
+ // for those cases where call lowering creates an embedded form temp of "this", we will
+ // create a temp here, early, that will later get morphed correctly.
+
+ if (call->gtCallObjp)
+ {
+ GenTreePtr thisPtr = nullptr;
+ GenTreePtr objp = call->gtCallObjp;
+ call->gtCallObjp = nullptr;
+
+#ifdef _TARGET_X86_
+ if ((call->IsDelegateInvoke() || call->IsVirtualVtable()) && !objp->IsLocal())
+ {
+ // tmp = "this"
+ unsigned lclNum = lvaGrabTemp(true DEBUGARG("tail call thisptr"));
+ GenTreePtr asg = gtNewTempAssign(lclNum, objp);
+
+ // COMMA(tmp = "this", tmp)
+ var_types vt = objp->TypeGet();
+ GenTreePtr tmp = gtNewLclvNode(lclNum, vt);
+ thisPtr = gtNewOperNode(GT_COMMA, vt, asg, tmp);
+
+ objp = thisPtr;
+ }
+#endif // _TARGET_X86_
+
+ if (call->NeedsNullCheck())
+ {
+ // clone "this" if "this" has no side effects.
+ if ((thisPtr == nullptr) && !(objp->gtFlags & GTF_SIDE_EFFECT))
+ {
+ thisPtr = gtClone(objp, true);
+ }
+
+ var_types vt = objp->TypeGet();
+ if (thisPtr == nullptr)
+ {
+ // create a temp if either "this" has side effects or "this" is too complex to clone.
+
+ // tmp = "this"
+ unsigned lclNum = lvaGrabTemp(true DEBUGARG("tail call thisptr"));
+ GenTreePtr asg = gtNewTempAssign(lclNum, objp);
+
+ // COMMA(tmp = "this", deref(tmp))
+ GenTreePtr tmp = gtNewLclvNode(lclNum, vt);
+ GenTreePtr ind = gtNewOperNode(GT_IND, TYP_INT, tmp);
+ asg = gtNewOperNode(GT_COMMA, TYP_VOID, asg, ind);
+
+ // COMMA(COMMA(tmp = "this", deref(tmp)), tmp)
+ thisPtr = gtNewOperNode(GT_COMMA, vt, asg, gtNewLclvNode(lclNum, vt));
+ }
+ else
+ {
+ // thisPtr = COMMA(deref("this"), "this")
+ GenTreePtr ind = gtNewOperNode(GT_IND, TYP_INT, thisPtr);
+ thisPtr = gtNewOperNode(GT_COMMA, vt, ind, gtClone(objp, true));
+ }
+
+ call->gtFlags &= ~GTF_CALL_NULLCHECK;
+ }
+ else
+ {
+ thisPtr = objp;
+ }
+
+ // During rationalization tmp="this" and null check will
+ // materialize as embedded stmts in right execution order.
+ assert(thisPtr != nullptr);
+ call->gtCallArgs = gtNewListNode(thisPtr, call->gtCallArgs);
+ }
+
+#if defined(_TARGET_AMD64_)
+
+ // Add the extra VSD parameter to arg list in case of VSD calls.
+ // Tail call arg copying thunk will move this extra VSD parameter
+ // to R11 before tail calling VSD stub. See CreateTailCallCopyArgsThunk()
+ // in Stublinkerx86.cpp for more details.
+ CorInfoHelperTailCallSpecialHandling flags = CorInfoHelperTailCallSpecialHandling(0);
+ if (call->IsVirtualStub())
+ {
+ GenTreePtr stubAddrArg;
+
+ flags = CORINFO_TAILCALL_STUB_DISPATCH_ARG;
+
+ if (call->gtCallType == CT_INDIRECT)
+ {
+ stubAddrArg = gtClone(call->gtCallAddr, true);
+ noway_assert(stubAddrArg != nullptr);
+ }
+ else
+ {
+ noway_assert((call->gtCallMoreFlags & GTF_CALL_M_VIRTSTUB_REL_INDIRECT) != 0);
+
+ ssize_t addr = ssize_t(call->gtStubCallStubAddr);
+ stubAddrArg = gtNewIconHandleNode(addr, GTF_ICON_FTN_ADDR);
+ }
+
+ // Push the stub address onto the list of arguments
+ call->gtCallArgs = gtNewListNode(stubAddrArg, call->gtCallArgs);
+ }
+
+ // Now inject a placeholder for the real call target that Lower phase will generate.
+ GenTreePtr arg = gtNewIconNode(0, TYP_I_IMPL);
+ call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs);
+
+ // Inject the pointer for the copy routine to be used for struct copying
+ noway_assert(call->callSig != nullptr);
+ void* pfnCopyArgs = info.compCompHnd->getTailCallCopyArgsThunk(call->callSig, flags);
+ arg = gtNewIconHandleNode(ssize_t(pfnCopyArgs), GTF_ICON_FTN_ADDR);
+ call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs);
+
+#else // !_TARGET_AMD64_
+
+ // Find the end of the argument list. ppArg will point at the last pointer; setting *ppArg will
+ // append to the list.
+ GenTreeArgList** ppArg = &call->gtCallArgs;
+ for (GenTreeArgList* args = call->gtCallArgs; args != nullptr; args = args->Rest())
+ {
+ ppArg = (GenTreeArgList**)&args->gtOp2;
+ }
+ assert(ppArg != nullptr);
+ assert(*ppArg == nullptr);
+
+ unsigned nOldStkArgsWords =
+ (compArgSize - (codeGen->intRegState.rsCalleeRegArgCount * REGSIZE_BYTES)) / REGSIZE_BYTES;
+ GenTree* arg3 = gtNewIconNode((ssize_t)nOldStkArgsWords, TYP_I_IMPL);
+ *ppArg = gtNewListNode(arg3, nullptr); // numberOfOldStackArgs
+ ppArg = (GenTreeArgList**)&((*ppArg)->gtOp2);
+
+ // Inject a placeholder for the count of outgoing stack arguments that the Lowering phase will generate.
+ // The constant will be replaced.
+ GenTree* arg2 = gtNewIconNode(9, TYP_I_IMPL);
+ *ppArg = gtNewListNode(arg2, nullptr); // numberOfNewStackArgs
+ ppArg = (GenTreeArgList**)&((*ppArg)->gtOp2);
+
+ // Inject a placeholder for the flags.
+ // The constant will be replaced.
+ GenTree* arg1 = gtNewIconNode(8, TYP_I_IMPL);
+ *ppArg = gtNewListNode(arg1, nullptr);
+ ppArg = (GenTreeArgList**)&((*ppArg)->gtOp2);
+
+ // Inject a placeholder for the real call target that the Lowering phase will generate.
+ // The constant will be replaced.
+ GenTree* arg0 = gtNewIconNode(7, TYP_I_IMPL);
+ *ppArg = gtNewListNode(arg0, nullptr);
+
+#endif // !_TARGET_AMD64_
+
+ // It is now a varargs tail call dispatched via helper.
+ call->gtCallMoreFlags |= GTF_CALL_M_VARARGS | GTF_CALL_M_TAILCALL | GTF_CALL_M_TAILCALL_VIA_HELPER;
+ call->gtFlags &= ~GTF_CALL_POP_ARGS;
+
+#endif // _TARGET_*
+
+ JITDUMP("fgMorphTailCall (after):\n");
+ DISPTREE(call);
+}
+
+//------------------------------------------------------------------------------
+// fgMorphRecursiveFastTailCallIntoLoop : Transform a recursive fast tail call into a loop.
+//
+//
+// Arguments:
+// block - basic block ending with a recursive fast tail call
+// recursiveTailCall - recursive tail call to transform
+//
+// Notes:
+// The legality of the transformation is ensured by the checks in endsWithTailCallConvertibleToLoop.
+
+void Compiler::fgMorphRecursiveFastTailCallIntoLoop(BasicBlock* block, GenTreeCall* recursiveTailCall)
+{
+ assert(recursiveTailCall->IsTailCallConvertibleToLoop());
+ GenTreePtr last = block->lastStmt();
+ assert(recursiveTailCall == last->gtStmt.gtStmtExpr);
+
+ // Transform recursive tail call into a loop.
+
+ GenTreePtr earlyArgInsertionPoint = last;
+ IL_OFFSETX callILOffset = last->gtStmt.gtStmtILoffsx;
+
+ // Hoist arg setup statement for the 'this' argument.
+ GenTreePtr thisArg = recursiveTailCall->gtCallObjp;
+ if (thisArg && !thisArg->IsNothingNode() && !thisArg->IsArgPlaceHolderNode())
+ {
+ GenTreePtr thisArgStmt = gtNewStmt(thisArg, callILOffset);
+ fgInsertStmtBefore(block, earlyArgInsertionPoint, thisArgStmt);
+ }
+
+ // All arguments whose trees may involve caller parameter local variables need to be assigned to temps first;
+ // then the temps need to be assigned to the method parameters. This is done so that the caller
+ // parameters are not re-assigned before call arguments depending on them are evaluated.
+ // tmpAssignmentInsertionPoint and paramAssignmentInsertionPoint keep track of
+ // where the next temp or parameter assignment should be inserted.
+
+ // In the example below the first call argument (arg1 - 1) needs to be assigned to a temp first
+ // while the second call argument (const 1) doesn't.
+ // Basic block before tail recursion elimination:
+ // ***** BB04, stmt 1 (top level)
+ // [000037] ------------ * stmtExpr void (top level) (IL 0x00A...0x013)
+ // [000033] --C - G------ - \--* call void RecursiveMethod
+ // [000030] ------------ | / --* const int - 1
+ // [000031] ------------arg0 in rcx + --* +int
+ // [000029] ------------ | \--* lclVar int V00 arg1
+ // [000032] ------------arg1 in rdx \--* const int 1
+ //
+ //
+ // Basic block after tail recursion elimination :
+ // ***** BB04, stmt 1 (top level)
+ // [000051] ------------ * stmtExpr void (top level) (IL 0x00A... ? ? ? )
+ // [000030] ------------ | / --* const int - 1
+ // [000031] ------------ | / --* +int
+ // [000029] ------------ | | \--* lclVar int V00 arg1
+ // [000050] - A---------- \--* = int
+ // [000049] D------N---- \--* lclVar int V02 tmp0
+ //
+ // ***** BB04, stmt 2 (top level)
+ // [000055] ------------ * stmtExpr void (top level) (IL 0x00A... ? ? ? )
+ // [000052] ------------ | / --* lclVar int V02 tmp0
+ // [000054] - A---------- \--* = int
+ // [000053] D------N---- \--* lclVar int V00 arg0
+
+ // ***** BB04, stmt 3 (top level)
+ // [000058] ------------ * stmtExpr void (top level) (IL 0x00A... ? ? ? )
+ // [000032] ------------ | / --* const int 1
+ // [000057] - A---------- \--* = int
+ // [000056] D------N---- \--* lclVar int V01 arg1
+
+ GenTreePtr tmpAssignmentInsertionPoint = last;
+ GenTreePtr paramAssignmentInsertionPoint = last;
+
+ // Process early args. They may contain both setup statements for late args and actual args.
+ // Early args don't include 'this' arg. We need to account for that so that the call to gtArgEntryByArgNum
+ // below has the correct second argument.
+ int earlyArgIndex = (thisArg == nullptr) ? 0 : 1;
+ for (GenTreeArgList* earlyArgs = recursiveTailCall->gtCallArgs; earlyArgs != nullptr;
+ (earlyArgIndex++, earlyArgs = earlyArgs->Rest()))
+ {
+ GenTreePtr earlyArg = earlyArgs->Current();
+ if (!earlyArg->IsNothingNode() && !earlyArg->IsArgPlaceHolderNode())
+ {
+ if ((earlyArg->gtFlags & GTF_LATE_ARG) != 0)
+ {
+ // This is a setup node so we need to hoist it.
+ GenTreePtr earlyArgStmt = gtNewStmt(earlyArg, callILOffset);
+ fgInsertStmtBefore(block, earlyArgInsertionPoint, earlyArgStmt);
+ }
+ else
+ {
+ // This is an actual argument that needs to be assigned to the corresponding caller parameter.
+ fgArgTabEntryPtr curArgTabEntry = gtArgEntryByArgNum(recursiveTailCall, earlyArgIndex);
+ GenTreePtr paramAssignStmt =
+ fgAssignRecursiveCallArgToCallerParam(earlyArg, curArgTabEntry, block, callILOffset,
+ tmpAssignmentInsertionPoint, paramAssignmentInsertionPoint);
+ if ((tmpAssignmentInsertionPoint == last) && (paramAssignStmt != nullptr))
+ {
+ // All temp assignments will happen before the first param assignment.
+ tmpAssignmentInsertionPoint = paramAssignStmt;
+ }
+ }
+ }
+ }
+
+ // Process late args.
+ int lateArgIndex = 0;
+ for (GenTreeArgList* lateArgs = recursiveTailCall->gtCallLateArgs; lateArgs != nullptr;
+ (lateArgIndex++, lateArgs = lateArgs->Rest()))
+ {
+ // A late argument is an actual argument that needs to be assigned to the corresponding caller's parameter.
+ GenTreePtr lateArg = lateArgs->Current();
+ fgArgTabEntryPtr curArgTabEntry = gtArgEntryByLateArgIndex(recursiveTailCall, lateArgIndex);
+ GenTreePtr paramAssignStmt =
+ fgAssignRecursiveCallArgToCallerParam(lateArg, curArgTabEntry, block, callILOffset,
+ tmpAssignmentInsertionPoint, paramAssignmentInsertionPoint);
+
+ if ((tmpAssignmentInsertionPoint == last) && (paramAssignStmt != nullptr))
+ {
+ // All temp assignments will happen before the first param assignment.
+ tmpAssignmentInsertionPoint = paramAssignStmt;
+ }
+ }
+
+ // If the method has starg.s 0 or ldarga.s 0 a special local (lvaArg0Var) is created so that
+ // compThisArg stays immutable. Normally it's assigned in fgFirstBBScratch block. Since that
+ // block won't be in the loop (it's assumed to have no predecessors), we need to update the special local here.
+ if (!info.compIsStatic && (lvaArg0Var != info.compThisArg))
+ {
+ var_types thisType = lvaTable[info.compThisArg].TypeGet();
+ GenTreePtr arg0 = gtNewLclvNode(lvaArg0Var, thisType);
+ GenTreePtr arg0Assignment = gtNewAssignNode(arg0, gtNewLclvNode(info.compThisArg, thisType));
+ GenTreePtr arg0AssignmentStmt = gtNewStmt(arg0Assignment, callILOffset);
+ fgInsertStmtBefore(block, paramAssignmentInsertionPoint, arg0AssignmentStmt);
+ }
+
+ // Remove the call
+ fgRemoveStmt(block, last);
+
+ // Set the loop edge.
+ block->bbJumpKind = BBJ_ALWAYS;
+ block->bbJumpDest = fgFirstBBisScratch() ? fgFirstBB->bbNext : fgFirstBB;
+ fgAddRefPred(block->bbJumpDest, block);
+ block->bbFlags &= ~BBF_HAS_JMP;
+}
+
+//------------------------------------------------------------------------------
+// fgAssignRecursiveCallArgToCallerParam : Assign argument to a recursive call to the corresponding caller parameter.
+//
+//
+// Arguments:
+// arg - argument to assign
+// argTabEntry - argument table entry corresponding to arg
+// block --- basic block the call is in
+// callILOffset - IL offset of the call
+// tmpAssignmentInsertionPoint - tree before which temp assignment should be inserted (if necessary)
+// paramAssignmentInsertionPoint - tree before which parameter assignment should be inserted
+//
+// Return Value:
+// parameter assignment statement if one was inserted; nullptr otherwise.
+
+GenTreePtr Compiler::fgAssignRecursiveCallArgToCallerParam(GenTreePtr arg,
+ fgArgTabEntryPtr argTabEntry,
+ BasicBlock* block,
+ IL_OFFSETX callILOffset,
+ GenTreePtr tmpAssignmentInsertionPoint,
+ GenTreePtr paramAssignmentInsertionPoint)
+{
+ // Call arguments should be assigned to temps first and then the temps should be assigned to parameters because
+ // some argument trees may reference parameters directly.
+
+ GenTreePtr argInTemp = nullptr;
+ unsigned originalArgNum = argTabEntry->argNum;
+ bool needToAssignParameter = true;
+
+ // TODO-CQ: enable calls with struct arguments passed in registers.
+ noway_assert(!varTypeIsStruct(arg->TypeGet()));
+
+ if ((argTabEntry->isTmp) || arg->IsCnsIntOrI() || arg->IsCnsFltOrDbl())
+ {
+ // The argument is already assigned to a temp or is a const.
+ argInTemp = arg;
+ }
+ else if (arg->OperGet() == GT_LCL_VAR)
+ {
+ unsigned lclNum = arg->AsLclVar()->gtLclNum;
+ LclVarDsc* varDsc = &lvaTable[lclNum];
+ if (!varDsc->lvIsParam)
+ {
+ // The argument is a non-parameter local so it doesn't need to be assigned to a temp.
+ argInTemp = arg;
+ }
+ else if (lclNum == originalArgNum)
+ {
+ // The argument is the same parameter local that we were about to assign so
+ // we can skip the assignment.
+ needToAssignParameter = false;
+ }
+ }
+
+ // TODO: We don't need temp assignments if we can prove that the argument tree doesn't involve
+ // any caller parameters. Some common cases are handled above but we may be able to eliminate
+ // more temp assignments.
+
+ GenTreePtr paramAssignStmt = nullptr;
+ if (needToAssignParameter)
+ {
+ if (argInTemp == nullptr)
+ {
+ // The argument is not assigned to a temp. We need to create a new temp and insert an assignment.
+ // TODO: we can avoid a temp assignment if we can prove that the argument tree
+ // doesn't involve any caller parameters.
+ unsigned tmpNum = lvaGrabTemp(true DEBUGARG("arg temp"));
+ GenTreePtr tempSrc = arg;
+ GenTreePtr tempDest = gtNewLclvNode(tmpNum, tempSrc->gtType);
+ GenTreePtr tmpAssignNode = gtNewAssignNode(tempDest, tempSrc);
+ GenTreePtr tmpAssignStmt = gtNewStmt(tmpAssignNode, callILOffset);
+ fgInsertStmtBefore(block, tmpAssignmentInsertionPoint, tmpAssignStmt);
+ argInTemp = gtNewLclvNode(tmpNum, tempSrc->gtType);
+ }
+
+ // Now assign the temp to the parameter.
+ LclVarDsc* paramDsc = lvaTable + originalArgNum;
+ assert(paramDsc->lvIsParam);
+ GenTreePtr paramDest = gtNewLclvNode(originalArgNum, paramDsc->lvType);
+ GenTreePtr paramAssignNode = gtNewAssignNode(paramDest, argInTemp);
+ paramAssignStmt = gtNewStmt(paramAssignNode, callILOffset);
+
+ fgInsertStmtBefore(block, paramAssignmentInsertionPoint, paramAssignStmt);
+ }
+ return paramAssignStmt;
+}
+
+/*****************************************************************************
+ *
+ * Transform the given GT_CALL tree for code generation.
+ */
+
+GenTreePtr Compiler::fgMorphCall(GenTreeCall* call)
+{
+ if (call->CanTailCall())
+ {
+ // It should either be an explicit (i.e. tail prefixed) or an implicit tail call
+ assert(call->IsTailPrefixedCall() ^ call->IsImplicitTailCall());
+
+ // It cannot be an inline candidate
+ assert(!call->IsInlineCandidate());
+
+ const char* szFailReason = nullptr;
+ bool hasStructParam = false;
+ if (call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC)
+ {
+ szFailReason = "Might turn into an intrinsic";
+ }
+
+ if (opts.compNeedSecurityCheck)
+ {
+ szFailReason = "Needs security check";
+ }
+ else if (compLocallocUsed)
+ {
+ szFailReason = "Localloc used";
+ }
+#ifdef _TARGET_AMD64_
+ // Needed for Jit64 compat.
+ // In future, enabling tail calls from methods that need GS cookie check
+ // would require codegen side work to emit GS cookie check before a tail
+ // call.
+ else if (getNeedsGSSecurityCookie())
+ {
+ szFailReason = "GS Security cookie check";
+ }
+#endif
+#ifdef DEBUG
+ // DDB 99324: Just disable tailcall under compGcChecks stress mode.
+ else if (opts.compGcChecks)
+ {
+ szFailReason = "GcChecks";
+ }
+#endif
+#if FEATURE_TAILCALL_OPT
+ else
+ {
+ // We are still not sure whether it can be a tail call. Because, when converting
+ // a call to an implicit tail call, we must check that there are no locals with
+ // their address taken. If this is the case, we have to assume that the address
+ // has been leaked and the current stack frame must live until after the final
+ // call.
+
+ // Verify that none of vars has lvHasLdAddrOp or lvAddrExposed bit set. Note
+ // that lvHasLdAddrOp is much more conservative. We cannot just base it on
+ // lvAddrExposed alone since it is not guaranteed to be set on all VarDscs
+ // during morph stage. The reason for also checking lvAddrExposed is that in case
+ // of vararg methods user args are marked as addr exposed but not lvHasLdAddrOp.
+ // The combination of lvHasLdAddrOp and lvAddrExposed though conservative allows us
+ // never to be incorrect.
+ //
+ // TODO-Throughput: have a compiler level flag to indicate whether method has vars whose
+ // address is taken. Such a flag could be set whenever lvHasLdAddrOp or LvAddrExposed
+ // is set. This avoids the need for iterating through all lcl vars of the current
+ // method. Right now throughout the code base we are not consistently using 'set'
+ // method to set lvHasLdAddrOp and lvAddrExposed flags.
+ unsigned varNum;
+ LclVarDsc* varDsc;
+ bool hasAddrExposedVars = false;
+ bool hasStructPromotedParam = false;
+ bool hasPinnedVars = false;
+
+ for (varNum = 0, varDsc = lvaTable; varNum < lvaCount; varNum++, varDsc++)
+ {
+ // If the method is marked as an explicit tail call we will skip the
+ // following three hazard checks.
+ // We still must check for any struct parameters and set 'hasStructParam'
+ // so that we won't transform the recursive tail call into a loop.
+ //
+ if (call->IsImplicitTailCall())
+ {
+ if (varDsc->lvHasLdAddrOp || varDsc->lvAddrExposed)
+ {
+ hasAddrExposedVars = true;
+ break;
+ }
+ if (varDsc->lvPromoted && varDsc->lvIsParam)
+ {
+ hasStructPromotedParam = true;
+ break;
+ }
+ if (varDsc->lvPinned)
+ {
+ // A tail call removes the method from the stack, which means the pinning
+ // goes away for the callee. We can't allow that.
+ hasPinnedVars = true;
+ break;
+ }
+ }
+ if (varTypeIsStruct(varDsc->TypeGet()) && varDsc->lvIsParam)
+ {
+ hasStructParam = true;
+ // This prevents transforming a recursive tail call into a loop
+ // but doesn't prevent tail call optimization so we need to
+ // look at the rest of parameters.
+ continue;
+ }
+ }
+
+ if (hasAddrExposedVars)
+ {
+ szFailReason = "Local address taken";
+ }
+ if (hasStructPromotedParam)
+ {
+ szFailReason = "Has Struct Promoted Param";
+ }
+ if (hasPinnedVars)
+ {
+ szFailReason = "Has Pinned Vars";
+ }
+ }
+#endif // FEATURE_TAILCALL_OPT
+
+ if (varTypeIsStruct(call))
+ {
+ fgFixupStructReturn(call);
+ }
+
+ var_types callType = call->TypeGet();
+
+ // We have to ensure to pass the incoming retValBuf as the
+ // outgoing one. Using a temp will not do as this function will
+ // not regain control to do the copy.
+
+ if (info.compRetBuffArg != BAD_VAR_NUM)
+ {
+ noway_assert(callType == TYP_VOID);
+ GenTreePtr retValBuf = call->gtCallArgs->gtOp.gtOp1;
+ if (retValBuf->gtOper != GT_LCL_VAR || retValBuf->gtLclVarCommon.gtLclNum != info.compRetBuffArg)
+ {
+ szFailReason = "Need to copy return buffer";
+ }
+ }
+
+ // If this is an opportunistic tail call and cannot be dispatched as
+ // fast tail call, go the non-tail call route. This is done for perf
+ // reason.
+ //
+ // Avoid the cost of determining whether can be dispatched as fast tail
+ // call if we already know that tail call cannot be honored for other
+ // reasons.
+ bool canFastTailCall = false;
+ if (szFailReason == nullptr)
+ {
+ canFastTailCall = fgCanFastTailCall(call);
+ if (!canFastTailCall)
+ {
+ // Implicit or opportunistic tail calls are always dispatched via fast tail call
+ // mechanism and never via tail call helper for perf.
+ if (call->IsImplicitTailCall())
+ {
+ szFailReason = "Opportunistic tail call cannot be dispatched as epilog+jmp";
+ }
+#ifndef LEGACY_BACKEND
+ else if (!call->IsVirtualStub() && call->HasNonStandardAddedArgs(this))
+ {
+ // If we are here, it means that the call is an explicitly ".tail" prefixed and cannot be
+ // dispatched as a fast tail call.
+
+ // Methods with non-standard args will have indirection cell or cookie param passed
+ // in callee trash register (e.g. R11). Tail call helper doesn't preserve it before
+ // tail calling the target method and hence ".tail" prefix on such calls needs to be
+ // ignored.
+ //
+ // Exception to the above rule: although Virtual Stub Dispatch (VSD) calls require
+ // extra stub param (e.g. in R11 on Amd64), they can still be called via tail call helper.
+ // This is done by by adding stubAddr as an additional arg before the original list of
+ // args. For more details see fgMorphTailCall() and CreateTailCallCopyArgsThunk()
+ // in Stublinkerx86.cpp.
+ szFailReason = "Method with non-standard args passed in callee trash register cannot be tail "
+ "called via helper";
+ }
+#ifdef _TARGET_ARM64_
+ else
+ {
+ // NYI - TAILCALL_RECURSIVE/TAILCALL_HELPER.
+ // So, bail out if we can't make fast tail call.
+ szFailReason = "Non-qualified fast tail call";
+ }
+#endif
+#endif // LEGACY_BACKEND
+ }
+ }
+
+ // Clear these flags before calling fgMorphCall() to avoid recursion.
+ bool isTailPrefixed = call->IsTailPrefixedCall();
+ call->gtCallMoreFlags &= ~GTF_CALL_M_EXPLICIT_TAILCALL;
+
+#if FEATURE_TAILCALL_OPT
+ call->gtCallMoreFlags &= ~GTF_CALL_M_IMPLICIT_TAILCALL;
+#endif
+
+#ifdef FEATURE_PAL
+ if (!canFastTailCall && szFailReason == nullptr)
+ {
+ szFailReason = "Non fast tail calls disabled for PAL based systems.";
+ }
+#endif // FEATURE_PAL
+
+ if (szFailReason != nullptr)
+ {
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nRejecting tail call late for call ");
+ printTreeID(call);
+ printf(": %s\n", szFailReason);
+ }
+#endif
+
+ // for non user funcs, we have no handles to report
+ info.compCompHnd->reportTailCallDecision(nullptr,
+ (call->gtCallType == CT_USER_FUNC) ? call->gtCallMethHnd : nullptr,
+ isTailPrefixed, TAILCALL_FAIL, szFailReason);
+
+ goto NO_TAIL_CALL;
+ }
+
+#if !FEATURE_TAILCALL_OPT_SHARED_RETURN
+ // We enable shared-ret tail call optimization for recursive calls even if
+ // FEATURE_TAILCALL_OPT_SHARED_RETURN is not defined.
+ if (gtIsRecursiveCall(call))
+#endif
+ {
+ // Many tailcalls will have call and ret in the same block, and thus be BBJ_RETURN,
+ // but if the call falls through to a ret, and we are doing a tailcall, change it here.
+ if (compCurBB->bbJumpKind != BBJ_RETURN)
+ {
+ compCurBB->bbJumpKind = BBJ_RETURN;
+ }
+ }
+
+ // Set this flag before calling fgMorphCall() to prevent inlining this call.
+ call->gtCallMoreFlags |= GTF_CALL_M_TAILCALL;
+
+ bool fastTailCallToLoop = false;
+#if FEATURE_TAILCALL_OPT
+ // TODO-CQ: enable the transformation when the method has a struct parameter that can be passed in a register
+ // or return type is a struct that can be passed in a register.
+ //
+ // TODO-CQ: if the method being compiled requires generic context reported in gc-info (either through
+ // hidden generic context param or through keep alive thisptr), then while transforming a recursive
+ // call to such a method requires that the generic context stored on stack slot be updated. Right now,
+ // fgMorphRecursiveFastTailCallIntoLoop() is not handling update of generic context while transforming
+ // a recursive call into a loop. Another option is to modify gtIsRecursiveCall() to check that the
+ // generic type parameters of both caller and callee generic method are the same.
+ if (opts.compTailCallLoopOpt && canFastTailCall && gtIsRecursiveCall(call) && !lvaReportParamTypeArg() &&
+ !lvaKeepAliveAndReportThis() && !call->IsVirtual() && !hasStructParam && !varTypeIsStruct(call->TypeGet()))
+ {
+ call->gtCallMoreFlags |= GTF_CALL_M_TAILCALL_TO_LOOP;
+ fastTailCallToLoop = true;
+ }
+#endif
+
+ // Do some target-specific transformations (before we process the args, etc.)
+ // This is needed only for tail prefixed calls that cannot be dispatched as
+ // fast calls.
+ if (!canFastTailCall)
+ {
+ fgMorphTailCall(call);
+ }
+
+ // Implementation note : If we optimize tailcall to do a direct jump
+ // to the target function (after stomping on the return address, etc),
+ // without using CORINFO_HELP_TAILCALL, we have to make certain that
+ // we don't starve the hijacking logic (by stomping on the hijacked
+ // return address etc).
+
+ // At this point, we are committed to do the tailcall.
+ compTailCallUsed = true;
+
+ CorInfoTailCall tailCallResult;
+
+ if (fastTailCallToLoop)
+ {
+ tailCallResult = TAILCALL_RECURSIVE;
+ }
+ else if (canFastTailCall)
+ {
+ tailCallResult = TAILCALL_OPTIMIZED;
+ }
+ else
+ {
+ tailCallResult = TAILCALL_HELPER;
+ }
+
+ // for non user funcs, we have no handles to report
+ info.compCompHnd->reportTailCallDecision(nullptr,
+ (call->gtCallType == CT_USER_FUNC) ? call->gtCallMethHnd : nullptr,
+ isTailPrefixed, tailCallResult, nullptr);
+
+ // As we will actually call CORINFO_HELP_TAILCALL, set the callTyp to TYP_VOID.
+ // to avoid doing any extra work for the return value.
+ call->gtType = TYP_VOID;
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nGTF_CALL_M_TAILCALL bit set for call ");
+ printTreeID(call);
+ printf("\n");
+ if (fastTailCallToLoop)
+ {
+ printf("\nGTF_CALL_M_TAILCALL_TO_LOOP bit set for call ");
+ printTreeID(call);
+ printf("\n");
+ }
+ }
+#endif
+
+ GenTreePtr stmtExpr = fgMorphStmt->gtStmt.gtStmtExpr;
+
+#ifdef DEBUG
+ // Tail call needs to be in one of the following IR forms
+ // Either a call stmt or
+ // GT_RETURN(GT_CALL(..)) or
+ // var = call
+ noway_assert((stmtExpr->gtOper == GT_CALL && stmtExpr == call) ||
+ (stmtExpr->gtOper == GT_RETURN &&
+ (stmtExpr->gtOp.gtOp1 == call || stmtExpr->gtOp.gtOp1->gtOp.gtOp1 == call)) ||
+ (stmtExpr->gtOper == GT_ASG && stmtExpr->gtOp.gtOp2 == call));
+#endif
+
+ // For void calls, we would have created a GT_CALL in the stmt list.
+ // For non-void calls, we would have created a GT_RETURN(GT_CAST(GT_CALL)).
+ // For calls returning structs, we would have a void call, followed by a void return.
+ // For debuggable code, it would be an assignment of the call to a temp
+ // We want to get rid of any of this extra trees, and just leave
+ // the call.
+ GenTreePtr nextMorphStmt = fgMorphStmt->gtNext;
+
+#ifdef _TARGET_AMD64_
+ // Legacy Jit64 Compat:
+ // There could be any number of GT_NOPs between tail call and GT_RETURN.
+ // That is tail call pattern could be one of the following:
+ // 1) tail.call, nop*, ret
+ // 2) tail.call, nop*, pop, nop*, ret
+ // 3) var=tail.call, nop*, ret(var)
+ // 4) var=tail.call, nop*, pop, ret
+ //
+ // See impIsTailCallILPattern() for details on tail call IL patterns
+ // that are supported.
+ if ((stmtExpr->gtOper == GT_CALL) || (stmtExpr->gtOper == GT_ASG))
+ {
+ // First delete all GT_NOPs after the call
+ GenTreePtr morphStmtToRemove = nullptr;
+ while (nextMorphStmt != nullptr)
+ {
+ GenTreePtr nextStmtExpr = nextMorphStmt->gtStmt.gtStmtExpr;
+ if (!nextStmtExpr->IsNothingNode())
+ {
+ break;
+ }
+
+ morphStmtToRemove = nextMorphStmt;
+ nextMorphStmt = morphStmtToRemove->gtNext;
+ fgRemoveStmt(compCurBB, morphStmtToRemove);
+ }
+
+ // Check to see if there is a pop.
+ // Since tail call is honored, we can get rid of the stmt corresponding to pop.
+ if (nextMorphStmt != nullptr && nextMorphStmt->gtStmt.gtStmtExpr->gtOper != GT_RETURN)
+ {
+ // Note that pop opcode may or may not result in a new stmt (for details see
+ // impImportBlockCode()). Hence, it is not possible to assert about the IR
+ // form generated by pop but pop tree must be side-effect free so that we can
+ // delete it safely.
+ GenTreePtr popStmt = nextMorphStmt;
+ nextMorphStmt = nextMorphStmt->gtNext;
+
+ noway_assert((popStmt->gtStmt.gtStmtExpr->gtFlags & GTF_ALL_EFFECT) == 0);
+ fgRemoveStmt(compCurBB, popStmt);
+ }
+
+ // Next delete any GT_NOP nodes after pop
+ while (nextMorphStmt != nullptr)
+ {
+ GenTreePtr nextStmtExpr = nextMorphStmt->gtStmt.gtStmtExpr;
+ if (!nextStmtExpr->IsNothingNode())
+ {
+ break;
+ }
+
+ morphStmtToRemove = nextMorphStmt;
+ nextMorphStmt = morphStmtToRemove->gtNext;
+ fgRemoveStmt(compCurBB, morphStmtToRemove);
+ }
+ }
+#endif // _TARGET_AMD64_
+
+ // Delete GT_RETURN if any
+ if (nextMorphStmt != nullptr)
+ {
+ GenTreePtr retExpr = nextMorphStmt->gtStmt.gtStmtExpr;
+ noway_assert(retExpr->gtOper == GT_RETURN);
+
+ // If var=call, then the next stmt must be a GT_RETURN(TYP_VOID) or GT_RETURN(var).
+ // This can occur if impSpillStackEnsure() has introduced an assignment to a temp.
+ if (stmtExpr->gtOper == GT_ASG && info.compRetType != TYP_VOID)
+ {
+ noway_assert(stmtExpr->gtGetOp1()->OperIsLocal());
+ noway_assert(stmtExpr->gtGetOp1()->AsLclVarCommon()->gtLclNum ==
+ retExpr->gtGetOp1()->AsLclVarCommon()->gtLclNum);
+ }
+
+ fgRemoveStmt(compCurBB, nextMorphStmt);
+ }
+
+ fgMorphStmt->gtStmt.gtStmtExpr = call;
+
+ // Tail call via helper: The VM can't use return address hijacking if we're
+ // not going to return and the helper doesn't have enough info to safely poll,
+ // so we poll before the tail call, if the block isn't already safe. Since
+ // tail call via helper is a slow mechanism it doen't matter whether we emit
+ // GC poll. This is done to be in parity with Jit64. Also this avoids GC info
+ // size increase if all most all methods are expected to be tail calls (e.g. F#).
+ //
+ // Note that we can avoid emitting GC-poll if we know that the current BB is
+ // dominated by a Gc-SafePoint block. But we don't have dominator info at this
+ // point. One option is to just add a place holder node for GC-poll (e.g. GT_GCPOLL)
+ // here and remove it in lowering if the block is dominated by a GC-SafePoint. For
+ // now it not clear whether optimizing slow tail calls is worth the effort. As a
+ // low cost check, we check whether the first and current basic blocks are
+ // GC-SafePoints.
+ //
+ // Fast Tail call as epilog+jmp - No need to insert GC-poll. Instead, fgSetBlockOrder()
+ // is going to mark the method as fully interruptible if the block containing this tail
+ // call is reachable without executing any call.
+ if (canFastTailCall || (fgFirstBB->bbFlags & BBF_GC_SAFE_POINT) || (compCurBB->bbFlags & BBF_GC_SAFE_POINT) ||
+ !fgCreateGCPoll(GCPOLL_INLINE, compCurBB))
+ {
+ // We didn't insert a poll block, so we need to morph the call now
+ // (Normally it will get morphed when we get to the split poll block)
+ GenTreePtr temp = fgMorphCall(call);
+ noway_assert(temp == call);
+ }
+
+ // Tail call via helper: we just call CORINFO_HELP_TAILCALL, and it jumps to
+ // the target. So we don't need an epilog - just like CORINFO_HELP_THROW.
+ //
+ // Fast tail call: in case of fast tail calls, we need a jmp epilog and
+ // hence mark it as BBJ_RETURN with BBF_JMP flag set.
+ noway_assert(compCurBB->bbJumpKind == BBJ_RETURN);
+
+ if (canFastTailCall)
+ {
+ compCurBB->bbFlags |= BBF_HAS_JMP;
+ }
+ else
+ {
+ compCurBB->bbJumpKind = BBJ_THROW;
+ }
+
+ // For non-void calls, we return a place holder which will be
+ // used by the parent GT_RETURN node of this call.
+
+ GenTree* result = call;
+ if (callType != TYP_VOID && info.compRetType != TYP_VOID)
+ {
+#ifdef FEATURE_HFA
+ // Return a dummy node, as the return is already removed.
+ if (callType == TYP_STRUCT)
+ {
+ // This is a HFA, use float 0.
+ callType = TYP_FLOAT;
+ }
+#elif defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ // Return a dummy node, as the return is already removed.
+ if (varTypeIsStruct(callType))
+ {
+ // This is a register-returned struct. Return a 0.
+ // The actual return registers are hacked in lower and the register allocator.
+ callType = TYP_INT;
+ }
+#endif
+#ifdef FEATURE_SIMD
+ // Return a dummy node, as the return is already removed.
+ if (varTypeIsSIMD(callType))
+ {
+ callType = TYP_DOUBLE;
+ }
+#endif
+ result = gtNewZeroConNode(genActualType(callType));
+ result = fgMorphTree(result);
+ }
+
+ return result;
+ }
+
+NO_TAIL_CALL:
+
+ if ((call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) == 0 &&
+ (call->gtCallMethHnd == eeFindHelper(CORINFO_HELP_VIRTUAL_FUNC_PTR)
+#ifdef FEATURE_READYTORUN_COMPILER
+ || call->gtCallMethHnd == eeFindHelper(CORINFO_HELP_READYTORUN_VIRTUAL_FUNC_PTR)
+#endif
+ ) &&
+ (call == fgMorphStmt->gtStmt.gtStmtExpr))
+ {
+ // This is call to CORINFO_HELP_VIRTUAL_FUNC_PTR with ignored result.
+ // Transform it into a null check.
+
+ GenTreePtr thisPtr = call->gtCallArgs->gtOp.gtOp1;
+
+ GenTreePtr nullCheck = gtNewOperNode(GT_IND, TYP_I_IMPL, thisPtr);
+ nullCheck->gtFlags |= GTF_EXCEPT;
+
+ return fgMorphTree(nullCheck);
+ }
+
+ noway_assert(call->gtOper == GT_CALL);
+
+ //
+ // Only count calls once (only in the global morph phase)
+ //
+ if (fgGlobalMorph)
+ {
+ if (call->gtCallType == CT_INDIRECT)
+ {
+ optCallCount++;
+ optIndirectCallCount++;
+ }
+ else if (call->gtCallType == CT_USER_FUNC)
+ {
+ optCallCount++;
+ if (call->IsVirtual())
+ {
+ optIndirectCallCount++;
+ }
+ }
+ }
+
+ // Couldn't inline - remember that this BB contains method calls
+
+ // If this is a 'regular' call, mark the basic block as
+ // having a call (for computing full interruptibility).
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef _TARGET_AMD64_
+ // Amd64 note: If this is a fast tail call then don't count it as a call
+ // since we don't insert GC-polls but instead make the method fully GC
+ // interruptible.
+ if (!call->IsFastTailCall())
+#endif
+ {
+ if (call->gtCallType == CT_INDIRECT)
+ {
+ compCurBB->bbFlags |= BBF_GC_SAFE_POINT;
+ }
+ else if (call->gtCallType == CT_USER_FUNC)
+ {
+ if ((call->gtCallMoreFlags & GTF_CALL_M_NOGCCHECK) == 0)
+ {
+ compCurBB->bbFlags |= BBF_GC_SAFE_POINT;
+ }
+ }
+ // otherwise we have a CT_HELPER
+ }
+
+ // Morph Type.op_Equality and Type.op_Inequality
+ // We need to do this before the arguments are morphed
+ if ((call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC))
+ {
+ CorInfoIntrinsics methodID = info.compCompHnd->getIntrinsicID(call->gtCallMethHnd);
+
+ genTreeOps simpleOp = GT_CALL;
+ if (methodID == CORINFO_INTRINSIC_TypeEQ)
+ {
+ simpleOp = GT_EQ;
+ }
+ else if (methodID == CORINFO_INTRINSIC_TypeNEQ)
+ {
+ simpleOp = GT_NE;
+ }
+
+ if (simpleOp == GT_EQ || simpleOp == GT_NE)
+ {
+ noway_assert(call->TypeGet() == TYP_INT);
+
+ // Check for GetClassFromHandle(handle) and obj.GetType() both of which will only return RuntimeType
+ // objects. Then if either operand is one of these two calls we can simplify op_Equality/op_Inequality to
+ // GT_NE/GT_NE: One important invariance that should never change is that type equivalency is always
+ // equivalent to object identity equality for runtime type objects in reflection. This is also reflected
+ // in RuntimeTypeHandle::TypeEquals. If this invariance would ever be broken, we need to remove the
+ // optimization below.
+
+ GenTreePtr op1 = call->gtCallArgs->gtOp.gtOp1;
+ GenTreePtr op2 = call->gtCallArgs->gtOp.gtOp2->gtOp.gtOp1;
+
+ if (gtCanOptimizeTypeEquality(op1) || gtCanOptimizeTypeEquality(op2))
+ {
+ GenTreePtr compare = gtNewOperNode(simpleOp, TYP_INT, op1, op2);
+
+ // fgMorphSmpOp will further optimize the following patterns:
+ // 1. typeof(...) == typeof(...)
+ // 2. typeof(...) == obj.GetType()
+ return fgMorphTree(compare);
+ }
+ }
+ }
+
+ // Make sure that return buffers containing GC pointers that aren't too large are pointers into the stack.
+ GenTreePtr origDest = nullptr; // Will only become non-null if we do the transformation (and thus require
+ // copy-back).
+ unsigned retValTmpNum = BAD_VAR_NUM;
+ CORINFO_CLASS_HANDLE structHnd = nullptr;
+ if (call->HasRetBufArg() &&
+ call->gtCallLateArgs == nullptr) // Don't do this if we're re-morphing (which will make late args non-null).
+ {
+ // We're enforcing the invariant that return buffers pointers (at least for
+ // struct return types containing GC pointers) are never pointers into the heap.
+ // The large majority of cases are address of local variables, which are OK.
+ // Otherwise, allocate a local of the given struct type, pass its address,
+ // then assign from that into the proper destination. (We don't need to do this
+ // if we're passing the caller's ret buff arg to the callee, since the caller's caller
+ // will maintain the same invariant.)
+
+ GenTreePtr dest = call->gtCallArgs->gtOp.gtOp1;
+ assert(dest->OperGet() != GT_ARGPLACE); // If it was, we'd be in a remorph, which we've already excluded above.
+ if (dest->gtType == TYP_BYREF && !(dest->OperGet() == GT_ADDR && dest->gtOp.gtOp1->OperGet() == GT_LCL_VAR))
+ {
+ // We'll exempt helper calls from this, assuming that the helper implementation
+ // follows the old convention, and does whatever barrier is required.
+ if (call->gtCallType != CT_HELPER)
+ {
+ structHnd = call->gtRetClsHnd;
+ if (info.compCompHnd->isStructRequiringStackAllocRetBuf(structHnd) &&
+ !((dest->OperGet() == GT_LCL_VAR || dest->OperGet() == GT_REG_VAR) &&
+ dest->gtLclVar.gtLclNum == info.compRetBuffArg))
+ {
+ origDest = dest;
+
+ retValTmpNum = lvaGrabTemp(true DEBUGARG("substitute local for ret buff arg"));
+ lvaSetStruct(retValTmpNum, structHnd, true);
+ dest = gtNewOperNode(GT_ADDR, TYP_BYREF, gtNewLclvNode(retValTmpNum, TYP_STRUCT));
+ }
+ }
+ }
+
+ call->gtCallArgs->gtOp.gtOp1 = dest;
+ }
+
+ /* Process the "normal" argument list */
+ call = fgMorphArgs(call);
+ noway_assert(call->gtOper == GT_CALL);
+
+ // Morph stelem.ref helper call to store a null value, into a store into an array without the helper.
+ // This needs to be done after the arguments are morphed to ensure constant propagation has already taken place.
+ if ((call->gtCallType == CT_HELPER) && (call->gtCallMethHnd == eeFindHelper(CORINFO_HELP_ARRADDR_ST)))
+ {
+ GenTreePtr value = gtArgEntryByArgNum(call, 2)->node;
+
+ if (value->IsIntegralConst(0))
+ {
+ assert(value->OperGet() == GT_CNS_INT);
+ GenTreePtr arr = gtArgEntryByArgNum(call, 0)->node;
+ GenTreePtr index = gtArgEntryByArgNum(call, 1)->node;
+
+ arr = gtClone(arr, true);
+ if (arr != nullptr)
+ {
+ index = gtClone(index, true);
+ if (index != nullptr)
+ {
+ value = gtClone(value);
+ noway_assert(value != nullptr);
+
+ GenTreePtr nullCheckedArr = impCheckForNullPointer(arr);
+ GenTreePtr arrIndexNode = gtNewIndexRef(TYP_REF, nullCheckedArr, index);
+ GenTreePtr arrStore = gtNewAssignNode(arrIndexNode, value);
+ arrStore->gtFlags |= GTF_ASG;
+
+ return fgMorphTree(arrStore);
+ }
+ }
+ }
+ }
+
+ // Optimize get_ManagedThreadId(get_CurrentThread)
+ if ((call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) &&
+ info.compCompHnd->getIntrinsicID(call->gtCallMethHnd) == CORINFO_INTRINSIC_GetManagedThreadId)
+ {
+ noway_assert(origDest == nullptr);
+ noway_assert(call->gtCallLateArgs->gtOp.gtOp1 != nullptr);
+
+ GenTreePtr innerCall = call->gtCallLateArgs->gtOp.gtOp1;
+
+ if (innerCall->gtOper == GT_CALL && (innerCall->gtCall.gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) &&
+ info.compCompHnd->getIntrinsicID(innerCall->gtCall.gtCallMethHnd) ==
+ CORINFO_INTRINSIC_GetCurrentManagedThread)
+ {
+ // substitute expression with call to helper
+ GenTreePtr newCall = gtNewHelperCallNode(CORINFO_HELP_GETCURRENTMANAGEDTHREADID, TYP_INT, 0);
+ JITDUMP("get_ManagedThreadId(get_CurrentThread) folding performed\n");
+ return fgMorphTree(newCall);
+ }
+ }
+
+ if (origDest != nullptr)
+ {
+ GenTreePtr retValVarAddr = gtNewOperNode(GT_ADDR, TYP_BYREF, gtNewLclvNode(retValTmpNum, TYP_STRUCT));
+ // If the origDest expression was an assignment to a variable, it might be to an otherwise-unused
+ // var, which would allow the whole assignment to be optimized away to a NOP. So in that case, make the
+ // origDest into a comma that uses the var. Note that the var doesn't have to be a temp for this to
+ // be correct.
+ if (origDest->OperGet() == GT_ASG)
+ {
+ if (origDest->gtOp.gtOp1->OperGet() == GT_LCL_VAR)
+ {
+ GenTreePtr var = origDest->gtOp.gtOp1;
+ origDest = gtNewOperNode(GT_COMMA, var->TypeGet(), origDest,
+ gtNewLclvNode(var->gtLclVar.gtLclNum, var->TypeGet()));
+ }
+ }
+ GenTreePtr copyBlk = gtNewCpObjNode(origDest, retValVarAddr, structHnd, false);
+ copyBlk = fgMorphTree(copyBlk);
+ GenTree* result = gtNewOperNode(GT_COMMA, TYP_VOID, call, copyBlk);
+#ifdef DEBUG
+ result->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
+#endif
+ return result;
+ }
+
+ if (call->IsNoReturn())
+ {
+ //
+ // If we know that the call does not return then we can set fgRemoveRestOfBlock
+ // to remove all subsequent statements and change the call's basic block to BBJ_THROW.
+ // As a result the compiler won't need to preserve live registers across the call.
+ //
+ // This isn't need for tail calls as there shouldn't be any code after the call anyway.
+ // Besides, the tail call code is part of the epilog and converting the block to
+ // BBJ_THROW would result in the tail call being dropped as the epilog is generated
+ // only for BBJ_RETURN blocks.
+ //
+ // Currently this doesn't work for non-void callees. Some of the code that handles
+ // fgRemoveRestOfBlock expects the tree to have GTF_EXCEPT flag set but call nodes
+ // do not have this flag by default. We could add the flag here but the proper solution
+ // would be to replace the return expression with a local var node during inlining
+ // so the rest of the call tree stays in a separate statement. That statement can then
+ // be removed by fgRemoveRestOfBlock without needing to add GTF_EXCEPT anywhere.
+ //
+
+ if (!call->IsTailCall() && call->TypeGet() == TYP_VOID)
+ {
+ fgRemoveRestOfBlock = true;
+ }
+ }
+
+ return call;
+}
+
+/*****************************************************************************
+ *
+ * Transform the given GTK_CONST tree for code generation.
+ */
+
+GenTreePtr Compiler::fgMorphConst(GenTreePtr tree)
+{
+ noway_assert(tree->OperKind() & GTK_CONST);
+
+ /* Clear any exception flags or other unnecessary flags
+ * that may have been set before folding this node to a constant */
+
+ tree->gtFlags &= ~(GTF_ALL_EFFECT | GTF_REVERSE_OPS);
+
+ if (tree->OperGet() != GT_CNS_STR)
+ {
+ return tree;
+ }
+
+ // TODO-CQ: Do this for compCurBB->isRunRarely(). Doing that currently will
+ // guarantee slow performance for that block. Instead cache the return value
+ // of CORINFO_HELP_STRCNS and go to cache first giving reasonable perf.
+
+ if (compCurBB->bbJumpKind == BBJ_THROW)
+ {
+ CorInfoHelpFunc helper = info.compCompHnd->getLazyStringLiteralHelper(tree->gtStrCon.gtScpHnd);
+ if (helper != CORINFO_HELP_UNDEF)
+ {
+ // For un-important blocks, we want to construct the string lazily
+
+ GenTreeArgList* args;
+ if (helper == CORINFO_HELP_STRCNS_CURRENT_MODULE)
+ {
+ args = gtNewArgList(gtNewIconNode(RidFromToken(tree->gtStrCon.gtSconCPX), TYP_INT));
+ }
+ else
+ {
+ args = gtNewArgList(gtNewIconNode(RidFromToken(tree->gtStrCon.gtSconCPX), TYP_INT),
+ gtNewIconEmbScpHndNode(tree->gtStrCon.gtScpHnd));
+ }
+
+ tree = gtNewHelperCallNode(helper, TYP_REF, 0, args);
+ return fgMorphTree(tree);
+ }
+ }
+
+ assert(tree->gtStrCon.gtScpHnd == info.compScopeHnd || !IsUninitialized(tree->gtStrCon.gtScpHnd));
+
+ LPVOID pValue;
+ InfoAccessType iat =
+ info.compCompHnd->constructStringLiteral(tree->gtStrCon.gtScpHnd, tree->gtStrCon.gtSconCPX, &pValue);
+
+ tree = gtNewStringLiteralNode(iat, pValue);
+
+ return fgMorphTree(tree);
+}
+
+/*****************************************************************************
+ *
+ * Transform the given GTK_LEAF tree for code generation.
+ */
+
+GenTreePtr Compiler::fgMorphLeaf(GenTreePtr tree)
+{
+ noway_assert(tree->OperKind() & GTK_LEAF);
+
+ if (tree->gtOper == GT_LCL_VAR)
+ {
+ return fgMorphLocalVar(tree);
+ }
+#ifdef _TARGET_X86_
+ else if (tree->gtOper == GT_LCL_FLD)
+ {
+ if (info.compIsVarArgs)
+ {
+ GenTreePtr newTree =
+ fgMorphStackArgForVarArgs(tree->gtLclFld.gtLclNum, tree->gtType, tree->gtLclFld.gtLclOffs);
+ if (newTree != NULL)
+ return newTree;
+ }
+ }
+#endif // _TARGET_X86_
+ else if (tree->gtOper == GT_FTN_ADDR)
+ {
+ CORINFO_CONST_LOOKUP addrInfo;
+
+#ifdef FEATURE_READYTORUN_COMPILER
+ if (tree->gtFptrVal.gtEntryPoint.addr != nullptr)
+ {
+ addrInfo = tree->gtFptrVal.gtEntryPoint;
+ }
+ else
+#endif
+ {
+ info.compCompHnd->getFunctionFixedEntryPoint(tree->gtFptrVal.gtFptrMethod, &addrInfo);
+ }
+
+ // Refer to gtNewIconHandleNode() as the template for constructing a constant handle
+ //
+ tree->SetOper(GT_CNS_INT);
+ tree->gtIntConCommon.SetIconValue(ssize_t(addrInfo.handle));
+ tree->gtFlags |= GTF_ICON_FTN_ADDR;
+
+ switch (addrInfo.accessType)
+ {
+ case IAT_PPVALUE:
+ tree = gtNewOperNode(GT_IND, TYP_I_IMPL, tree);
+ tree->gtFlags |= GTF_IND_INVARIANT;
+
+ __fallthrough;
+
+ case IAT_PVALUE:
+ tree = gtNewOperNode(GT_IND, TYP_I_IMPL, tree);
+ break;
+
+ case IAT_VALUE:
+ tree = gtNewOperNode(GT_NOP, tree->TypeGet(), tree); // prevents constant folding
+ break;
+
+ default:
+ noway_assert(!"Unknown addrInfo.accessType");
+ }
+
+ return fgMorphTree(tree);
+ }
+
+ return tree;
+}
+
+void Compiler::fgAssignSetVarDef(GenTreePtr tree)
+{
+ GenTreeLclVarCommon* lclVarCmnTree;
+ bool isEntire = false;
+ if (tree->DefinesLocal(this, &lclVarCmnTree, &isEntire))
+ {
+ if (isEntire)
+ {
+ lclVarCmnTree->gtFlags |= GTF_VAR_DEF;
+ }
+ else
+ {
+ // We consider partial definitions to be modeled as uses followed by definitions.
+ // This captures the idea that precedings defs are not necessarily made redundant
+ // by this definition.
+ lclVarCmnTree->gtFlags |= (GTF_VAR_DEF | GTF_VAR_USEASG);
+ }
+ }
+}
+
+//------------------------------------------------------------------------
+// fgMorphOneAsgBlockOp: Attempt to replace a block assignment with a scalar assignment
+//
+// Arguments:
+// tree - The block assignment to be possibly morphed
+//
+// Return Value:
+// The modified tree if successful, nullptr otherwise.
+//
+// Assumptions:
+// 'tree' must be a block assignment.
+//
+// Notes:
+// If successful, this method always returns the incoming tree, modifying only
+// its arguments.
+
+GenTreePtr Compiler::fgMorphOneAsgBlockOp(GenTreePtr tree)
+{
+ // This must be a block assignment.
+ noway_assert(tree->OperIsBlkOp());
+ var_types asgType = tree->TypeGet();
+
+ GenTreePtr asg = tree;
+ GenTreePtr dest = asg->gtGetOp1();
+ GenTreePtr src = asg->gtGetOp2();
+ unsigned destVarNum = BAD_VAR_NUM;
+ LclVarDsc* destVarDsc = nullptr;
+ GenTreePtr lclVarTree = nullptr;
+ bool isCopyBlock = asg->OperIsCopyBlkOp();
+ bool isInitBlock = !isCopyBlock;
+
+ unsigned size;
+ CORINFO_CLASS_HANDLE clsHnd = NO_CLASS_HANDLE;
+#ifdef FEATURE_SIMD
+ // importer introduces cpblk nodes with src = GT_ADDR(GT_SIMD)
+ // The SIMD type in question could be Vector2f which is 8-bytes in size.
+ // The below check is to make sure that we don't turn that copyblk
+ // into a assignment, since rationalizer logic will transform the
+ // copyblk apropriately. Otherwise, the transormation made in this
+ // routine will prevent rationalizer logic and we might end up with
+ // GT_ADDR(GT_SIMD) node post rationalization, leading to a noway assert
+ // in codegen.
+ // TODO-1stClassStructs: This is here to preserve old behavior.
+ // It should be eliminated.
+ if (src->OperGet() == GT_SIMD)
+ {
+ return nullptr;
+ }
+#endif
+
+ if (dest->gtEffectiveVal()->OperIsBlk())
+ {
+ GenTreeBlk* lhsBlk = dest->gtEffectiveVal()->AsBlk();
+ size = lhsBlk->Size();
+ if (impIsAddressInLocal(lhsBlk->Addr(), &lclVarTree))
+ {
+ destVarNum = lclVarTree->AsLclVarCommon()->gtLclNum;
+ destVarDsc = &(lvaTable[destVarNum]);
+ }
+ if (lhsBlk->OperGet() == GT_OBJ)
+ {
+ clsHnd = lhsBlk->AsObj()->gtClass;
+ }
+ }
+ else
+ {
+ noway_assert(dest->OperIsLocal());
+ lclVarTree = dest;
+ destVarNum = lclVarTree->AsLclVarCommon()->gtLclNum;
+ destVarDsc = &(lvaTable[destVarNum]);
+ if (isCopyBlock)
+ {
+ clsHnd = destVarDsc->lvVerTypeInfo.GetClassHandle();
+ size = info.compCompHnd->getClassSize(clsHnd);
+ }
+ else
+ {
+ size = destVarDsc->lvExactSize;
+ }
+ }
+
+ //
+ // See if we can do a simple transformation:
+ //
+ // GT_ASG <TYP_size>
+ // / \
+ // GT_IND GT_IND or CNS_INT
+ // | |
+ // [dest] [src]
+ //
+
+ if (size == REGSIZE_BYTES)
+ {
+ if (clsHnd == NO_CLASS_HANDLE)
+ {
+ // A register-sized cpblk can be treated as an integer asignment.
+ asgType = TYP_I_IMPL;
+ }
+ else
+ {
+ BYTE gcPtr;
+ info.compCompHnd->getClassGClayout(clsHnd, &gcPtr);
+ asgType = getJitGCType(gcPtr);
+ }
+ }
+ else
+ {
+ switch (size)
+ {
+ case 1:
+ asgType = TYP_BYTE;
+ break;
+ case 2:
+ asgType = TYP_SHORT;
+ break;
+
+#ifdef _TARGET_64BIT_
+ case 4:
+ asgType = TYP_INT;
+ break;
+#endif // _TARGET_64BIT_
+ }
+ }
+
+ // TODO-1stClassStructs: Change this to asgType != TYP_STRUCT.
+ if (!varTypeIsStruct(asgType))
+ {
+ // For initBlk, a non constant source is not going to allow us to fiddle
+ // with the bits to create a single assigment.
+ noway_assert(size <= REGSIZE_BYTES);
+
+ if (isInitBlock && (src->gtOper != GT_CNS_INT))
+ {
+ return nullptr;
+ }
+
+ if (destVarDsc != nullptr)
+ {
+#if LOCAL_ASSERTION_PROP
+ // Kill everything about dest
+ if (optLocalAssertionProp)
+ {
+ if (optAssertionCount > 0)
+ {
+ fgKillDependentAssertions(destVarNum DEBUGARG(tree));
+ }
+ }
+#endif // LOCAL_ASSERTION_PROP
+
+ // A previous incarnation of this code also required the local not to be
+ // address-exposed(=taken). That seems orthogonal to the decision of whether
+ // to do field-wise assignments: being address-exposed will cause it to be
+ // "dependently" promoted, so it will be in the right memory location. One possible
+ // further reason for avoiding field-wise stores is that the struct might have alignment-induced
+ // holes, whose contents could be meaningful in unsafe code. If we decide that's a valid
+ // concern, then we could compromise, and say that address-exposed + fields do not completely cover the
+ // memory of the struct prevent field-wise assignments. Same situation exists for the "src" decision.
+ if (varTypeIsStruct(lclVarTree) && (destVarDsc->lvPromoted || destVarDsc->lvIsSIMDType()))
+ {
+ // Let fgMorphInitBlock handle it. (Since we'll need to do field-var-wise assignments.)
+ return nullptr;
+ }
+ else if (!varTypeIsFloating(lclVarTree->TypeGet()) && (size == genTypeSize(destVarDsc)))
+ {
+ // Use the dest local var directly, as well as its type.
+ dest = lclVarTree;
+ asgType = destVarDsc->lvType;
+
+ // If the block operation had been a write to a local var of a small int type,
+ // of the exact size of the small int type, and the var is NormalizeOnStore,
+ // we would have labeled it GTF_VAR_USEASG, because the block operation wouldn't
+ // have done that normalization. If we're now making it into an assignment,
+ // the NormalizeOnStore will work, and it can be a full def.
+ if (destVarDsc->lvNormalizeOnStore())
+ {
+ dest->gtFlags &= (~GTF_VAR_USEASG);
+ }
+ }
+ else
+ {
+ // Could be a non-promoted struct, or a floating point type local, or
+ // an int subject to a partial write. Don't enregister.
+ lvaSetVarDoNotEnregister(destVarNum DEBUGARG(DNER_LocalField));
+
+ // Mark the local var tree as a definition point of the local.
+ lclVarTree->gtFlags |= GTF_VAR_DEF;
+ if (size < destVarDsc->lvExactSize)
+ { // If it's not a full-width assignment....
+ lclVarTree->gtFlags |= GTF_VAR_USEASG;
+ }
+
+ if (dest == lclVarTree)
+ {
+ dest = gtNewOperNode(GT_IND, asgType, gtNewOperNode(GT_ADDR, TYP_BYREF, dest));
+ }
+ }
+ }
+
+ // Check to ensure we don't have a reducible *(& ... )
+ if (dest->OperIsIndir() && dest->AsIndir()->Addr()->OperGet() == GT_ADDR)
+ {
+ GenTreePtr addrOp = dest->AsIndir()->Addr()->gtGetOp1();
+ // Ignore reinterpret casts between int/gc
+ if ((addrOp->TypeGet() == asgType) || (varTypeIsIntegralOrI(addrOp) && (genTypeSize(asgType) == size)))
+ {
+ dest = addrOp;
+ asgType = addrOp->TypeGet();
+ }
+ }
+
+ if (dest->gtEffectiveVal()->OperIsIndir())
+ {
+ // If we have no information about the destination, we have to assume it could
+ // live anywhere (not just in the GC heap).
+ // Mark the GT_IND node so that we use the correct write barrier helper in case
+ // the field is a GC ref.
+
+ if (!fgIsIndirOfAddrOfLocal(dest))
+ {
+ dest->gtFlags |= (GTF_EXCEPT | GTF_GLOB_REF | GTF_IND_TGTANYWHERE);
+ tree->gtFlags |= (GTF_EXCEPT | GTF_GLOB_REF | GTF_IND_TGTANYWHERE);
+ }
+ }
+
+ LclVarDsc* srcVarDsc = nullptr;
+ if (isCopyBlock)
+ {
+ if (src->OperGet() == GT_LCL_VAR)
+ {
+ lclVarTree = src;
+ srcVarDsc = &(lvaTable[src->AsLclVarCommon()->gtLclNum]);
+ }
+ else if (src->OperIsIndir() && impIsAddressInLocal(src->gtOp.gtOp1, &lclVarTree))
+ {
+ srcVarDsc = &(lvaTable[lclVarTree->AsLclVarCommon()->gtLclNum]);
+ }
+ if (srcVarDsc != nullptr)
+ {
+ if (varTypeIsStruct(lclVarTree) && (srcVarDsc->lvPromoted || srcVarDsc->lvIsSIMDType()))
+ {
+ // Let fgMorphCopyBlock handle it.
+ return nullptr;
+ }
+ else if (!varTypeIsFloating(lclVarTree->TypeGet()) &&
+ size == genTypeSize(genActualType(lclVarTree->TypeGet())))
+ {
+ // Use the src local var directly.
+ src = lclVarTree;
+ }
+ else
+ {
+#ifndef LEGACY_BACKEND
+
+ // The source argument of the copyblk can potentially
+ // be accessed only through indir(addr(lclVar))
+ // or indir(lclVarAddr) in rational form and liveness
+ // won't account for these uses. That said,
+ // we have to mark this local as address exposed so
+ // we don't delete it as a dead store later on.
+ unsigned lclVarNum = lclVarTree->gtLclVarCommon.gtLclNum;
+ lvaTable[lclVarNum].lvAddrExposed = true;
+ lvaSetVarDoNotEnregister(lclVarNum DEBUGARG(DNER_AddrExposed));
+
+#else // LEGACY_BACKEND
+ lvaSetVarDoNotEnregister(lclVarTree->gtLclVarCommon.gtLclNum DEBUGARG(DNER_LocalField));
+#endif // LEGACY_BACKEND
+ GenTree* srcAddr;
+ if (src == lclVarTree)
+ {
+ srcAddr = gtNewOperNode(GT_ADDR, TYP_BYREF, src);
+ src = gtNewOperNode(GT_IND, asgType, srcAddr);
+ }
+ else
+ {
+ assert(src->OperIsIndir());
+ }
+ }
+ }
+ // If we have no information about the src, we have to assume it could
+ // live anywhere (not just in the GC heap).
+ // Mark the GT_IND node so that we use the correct write barrier helper in case
+ // the field is a GC ref.
+
+ if (!fgIsIndirOfAddrOfLocal(src))
+ {
+ src->gtFlags |= (GTF_EXCEPT | GTF_GLOB_REF | GTF_IND_TGTANYWHERE);
+ }
+ }
+ else
+ {
+// InitBlk
+#if FEATURE_SIMD
+ if (varTypeIsSIMD(asgType))
+ {
+ assert(!isCopyBlock); // Else we would have returned the tree above.
+ noway_assert(src->IsIntegralConst(0));
+ noway_assert(destVarDsc != nullptr);
+
+ src = new (this, GT_SIMD) GenTreeSIMD(asgType, src, SIMDIntrinsicInit, destVarDsc->lvBaseType, size);
+ tree->gtOp.gtOp2 = src;
+ return tree;
+ }
+ else
+#endif
+ if (src->IsCnsIntOrI())
+ {
+ // This will mutate the integer constant, in place, to be the correct
+ // value for the type we are using in the assignment.
+ src->AsIntCon()->FixupInitBlkValue(asgType);
+ }
+ }
+
+ // Ensure that the dest is setup appropriately.
+ if (dest->gtEffectiveVal()->OperIsIndir())
+ {
+ dest = fgMorphBlockOperand(dest, asgType, size, true /*isDest*/);
+ }
+
+ // Ensure that the rhs is setup appropriately.
+ if (isCopyBlock)
+ {
+ src = fgMorphBlockOperand(src, asgType, size, false /*isDest*/);
+ }
+
+ // Set the lhs and rhs on the assignment.
+ if (dest != tree->gtOp.gtOp1)
+ {
+ asg->gtOp.gtOp1 = dest;
+ }
+ if (src != asg->gtOp.gtOp2)
+ {
+ asg->gtOp.gtOp2 = src;
+ }
+
+ asg->ChangeType(asgType);
+ dest->gtFlags |= GTF_DONT_CSE;
+ asg->gtFlags |= ((dest->gtFlags | src->gtFlags) & GTF_ALL_EFFECT);
+ // Un-set GTF_REVERSE_OPS, and it will be set later if appropriate.
+ asg->gtFlags &= ~GTF_REVERSE_OPS;
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("fgMorphOneAsgBlock (after):\n");
+ gtDispTree(tree);
+ }
+#endif
+ return tree;
+ }
+
+ return nullptr;
+}
+
+//------------------------------------------------------------------------
+// fgMorphInitBlock: Perform the Morphing of a GT_INITBLK node
+//
+// Arguments:
+// tree - a tree node with a gtOper of GT_INITBLK
+// the child nodes for tree have already been Morphed
+//
+// Return Value:
+// We can return the orginal GT_INITBLK unmodified (least desirable, but always correct)
+// We can return a single assignment, when fgMorphOneAsgBlockOp transforms it (most desirable)
+// If we have performed struct promotion of the Dest() then we will try to
+// perform a field by field assignment for each of the promoted struct fields
+//
+// Notes:
+// If we leave it as a GT_INITBLK we will call lvaSetVarDoNotEnregister() with a reason of DNER_BlockOp
+// if the Dest() is a a struct that has a "CustomLayout" and "ConstainsHoles" then we
+// can not use a field by field assignment and must the orginal GT_INITBLK unmodified.
+
+GenTreePtr Compiler::fgMorphInitBlock(GenTreePtr tree)
+{
+ noway_assert(tree->gtOper == GT_ASG && varTypeIsStruct(tree));
+#ifdef DEBUG
+ bool morphed = false;
+#endif // DEBUG
+
+ GenTree* asg = tree;
+ GenTree* src = tree->gtGetOp2();
+ GenTree* origDest = tree->gtGetOp1();
+
+ GenTree* dest = fgMorphBlkNode(origDest, true);
+ if (dest != origDest)
+ {
+ tree->gtOp.gtOp1 = dest;
+ }
+ tree->gtType = dest->TypeGet();
+ JITDUMP("\nfgMorphInitBlock:");
+
+ GenTreePtr oneAsgTree = fgMorphOneAsgBlockOp(tree);
+ if (oneAsgTree)
+ {
+ JITDUMP(" using oneAsgTree.\n");
+ tree = oneAsgTree;
+ }
+ else
+ {
+ GenTree* destAddr = nullptr;
+ GenTree* initVal = src;
+ GenTree* blockSize = nullptr;
+ unsigned blockWidth = 0;
+ FieldSeqNode* destFldSeq = nullptr;
+ LclVarDsc* destLclVar = nullptr;
+ bool destDoFldAsg = false;
+ unsigned destLclNum = BAD_VAR_NUM;
+ bool blockWidthIsConst = false;
+ GenTreeLclVarCommon* lclVarTree = nullptr;
+ if (dest->IsLocal())
+ {
+ lclVarTree = dest->AsLclVarCommon();
+ }
+ else
+ {
+ if (dest->OperIsBlk())
+ {
+ destAddr = dest->AsBlk()->Addr();
+ blockWidth = dest->AsBlk()->gtBlkSize;
+ }
+ else
+ {
+ assert((dest->gtOper == GT_IND) && (dest->TypeGet() != TYP_STRUCT));
+ destAddr = dest->gtGetOp1();
+ blockWidth = genTypeSize(dest->TypeGet());
+ }
+ }
+ if (lclVarTree != nullptr)
+ {
+ destLclNum = lclVarTree->gtLclNum;
+ destLclVar = &lvaTable[destLclNum];
+ blockWidth = varTypeIsStruct(destLclVar) ? destLclVar->lvExactSize : genTypeSize(destLclVar);
+ blockWidthIsConst = true;
+ }
+ else
+ {
+ if (dest->gtOper == GT_DYN_BLK)
+ {
+ // The size must be an integer type
+ blockSize = dest->AsBlk()->gtDynBlk.gtDynamicSize;
+ assert(varTypeIsIntegral(blockSize->gtType));
+ }
+ else
+ {
+ assert(blockWidth != 0);
+ blockWidthIsConst = true;
+ }
+
+ if ((destAddr != nullptr) && destAddr->IsLocalAddrExpr(this, &lclVarTree, &destFldSeq))
+ {
+ destLclNum = lclVarTree->gtLclNum;
+ destLclVar = &lvaTable[destLclNum];
+ }
+ }
+ if (destLclNum != BAD_VAR_NUM)
+ {
+#if LOCAL_ASSERTION_PROP
+ // Kill everything about destLclNum (and its field locals)
+ if (optLocalAssertionProp)
+ {
+ if (optAssertionCount > 0)
+ {
+ fgKillDependentAssertions(destLclNum DEBUGARG(tree));
+ }
+ }
+#endif // LOCAL_ASSERTION_PROP
+
+ if (destLclVar->lvPromoted && blockWidthIsConst)
+ {
+ noway_assert(varTypeIsStruct(destLclVar));
+ noway_assert(!opts.MinOpts());
+ if (destLclVar->lvAddrExposed & destLclVar->lvContainsHoles)
+ {
+ JITDUMP(" dest is address exposed");
+ }
+ else
+ {
+ if (blockWidth == destLclVar->lvExactSize)
+ {
+ JITDUMP(" (destDoFldAsg=true)");
+ // We may decide later that a copyblk is required when this struct has holes
+ destDoFldAsg = true;
+ }
+ else
+ {
+ JITDUMP(" with mismatched size");
+ }
+ }
+ }
+ }
+
+ // Can we use field by field assignment for the dest?
+ if (destDoFldAsg && destLclVar->lvCustomLayout && destLclVar->lvContainsHoles)
+ {
+ JITDUMP(" dest contains holes");
+ destDoFldAsg = false;
+ }
+
+ JITDUMP(destDoFldAsg ? " using field by field initialization.\n" : " this requires an InitBlock.\n");
+
+ // If we're doing an InitBlock and we've transformed the dest to a non-Blk
+ // we need to change it back.
+ if (!destDoFldAsg && !dest->OperIsBlk())
+ {
+ noway_assert(blockWidth != 0);
+ tree->gtOp.gtOp1 = origDest;
+ tree->gtType = origDest->gtType;
+ }
+
+ if (!destDoFldAsg && (destLclVar != nullptr))
+ {
+ // If destLclVar is not a reg-sized non-field-addressed struct, set it as DoNotEnregister.
+ if (!destLclVar->lvRegStruct)
+ {
+ // Mark it as DoNotEnregister.
+ lvaSetVarDoNotEnregister(destLclNum DEBUGARG(DNER_BlockOp));
+ }
+ }
+
+ // Mark the dest struct as DoNotEnreg
+ // when they are LclVar structs and we are using a CopyBlock
+ // or the struct is not promoted
+ //
+ if (!destDoFldAsg)
+ {
+#if CPU_USES_BLOCK_MOVE
+ compBlkOpUsed = true;
+#endif
+ if (!dest->OperIsBlk())
+ {
+ GenTree* destAddr = gtNewOperNode(GT_ADDR, TYP_BYREF, dest);
+ CORINFO_CLASS_HANDLE clsHnd = gtGetStructHandleIfPresent(dest);
+ if (clsHnd == NO_CLASS_HANDLE)
+ {
+ dest = new (this, GT_BLK) GenTreeBlk(GT_BLK, dest->TypeGet(), destAddr, blockWidth);
+ }
+ else
+ {
+ GenTree* newDest = gtNewObjNode(clsHnd, destAddr);
+ if (newDest->OperGet() == GT_OBJ)
+ {
+ gtSetObjGcInfo(newDest->AsObj());
+ }
+ dest = newDest;
+ }
+ tree->gtOp.gtOp1 = dest;
+ }
+ }
+ else
+ {
+ // The initVal must be a constant of TYP_INT
+ noway_assert(initVal->OperGet() == GT_CNS_INT);
+ noway_assert(genActualType(initVal->gtType) == TYP_INT);
+
+ // The dest must be of a struct type.
+ noway_assert(varTypeIsStruct(destLclVar));
+
+ //
+ // Now, convert InitBlock to individual assignments
+ //
+
+ tree = nullptr;
+ INDEBUG(morphed = true);
+
+ GenTreePtr dest;
+ GenTreePtr srcCopy;
+ unsigned fieldLclNum;
+ unsigned fieldCnt = destLclVar->lvFieldCnt;
+
+ for (unsigned i = 0; i < fieldCnt; ++i)
+ {
+ fieldLclNum = destLclVar->lvFieldLclStart + i;
+ dest = gtNewLclvNode(fieldLclNum, lvaTable[fieldLclNum].TypeGet());
+
+ noway_assert(lclVarTree->gtOper == GT_LCL_VAR);
+ // If it had been labeled a "USEASG", assignments to the the individual promoted fields are not.
+ dest->gtFlags |= (lclVarTree->gtFlags & ~(GTF_NODE_MASK | GTF_VAR_USEASG));
+
+ srcCopy = gtCloneExpr(initVal);
+ noway_assert(srcCopy != nullptr);
+
+ // need type of oper to be same as tree
+ if (dest->gtType == TYP_LONG)
+ {
+ srcCopy->ChangeOperConst(GT_CNS_NATIVELONG);
+ // copy and extend the value
+ srcCopy->gtIntConCommon.SetLngValue(initVal->gtIntConCommon.IconValue());
+ /* Change the types of srcCopy to TYP_LONG */
+ srcCopy->gtType = TYP_LONG;
+ }
+ else if (varTypeIsFloating(dest->gtType))
+ {
+ srcCopy->ChangeOperConst(GT_CNS_DBL);
+ // setup the bit pattern
+ memset(&srcCopy->gtDblCon.gtDconVal, (int)initVal->gtIntCon.gtIconVal,
+ sizeof(srcCopy->gtDblCon.gtDconVal));
+ /* Change the types of srcCopy to TYP_DOUBLE */
+ srcCopy->gtType = TYP_DOUBLE;
+ }
+ else
+ {
+ noway_assert(srcCopy->gtOper == GT_CNS_INT);
+ noway_assert(srcCopy->TypeGet() == TYP_INT);
+ // setup the bit pattern
+ memset(&srcCopy->gtIntCon.gtIconVal, (int)initVal->gtIntCon.gtIconVal,
+ sizeof(srcCopy->gtIntCon.gtIconVal));
+ }
+
+ srcCopy->gtType = dest->TypeGet();
+
+ asg = gtNewAssignNode(dest, srcCopy);
+
+#if LOCAL_ASSERTION_PROP
+ if (optLocalAssertionProp)
+ {
+ optAssertionGen(asg);
+ }
+#endif // LOCAL_ASSERTION_PROP
+
+ if (tree)
+ {
+ tree = gtNewOperNode(GT_COMMA, TYP_VOID, tree, asg);
+ }
+ else
+ {
+ tree = asg;
+ }
+ }
+ }
+ }
+
+#ifdef DEBUG
+ if (morphed)
+ {
+ tree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
+
+ if (verbose)
+ {
+ printf("fgMorphInitBlock (after):\n");
+ gtDispTree(tree);
+ }
+ }
+#endif
+
+ return tree;
+}
+
+//------------------------------------------------------------------------
+// fgMorphBlkToInd: Change a blk node into a GT_IND of the specified type
+//
+// Arguments:
+// tree - the node to be modified.
+// type - the type of indirection to change it to.
+//
+// Return Value:
+// Returns the node, modified in place.
+//
+// Notes:
+// This doesn't really warrant a separate method, but is here to abstract
+// the fact that these nodes can be modified in-place.
+
+GenTreePtr Compiler::fgMorphBlkToInd(GenTreeBlk* tree, var_types type)
+{
+ tree->SetOper(GT_IND);
+ tree->gtType = type;
+ return tree;
+}
+
+//------------------------------------------------------------------------
+// fgMorphGetStructAddr: Gets the address of a struct object
+//
+// Arguments:
+// pTree - the parent's pointer to the struct object node
+// clsHnd - the class handle for the struct type
+// isRValue - true if this is a source (not dest)
+//
+// Return Value:
+// Returns the address of the struct value, possibly modifying the existing tree to
+// sink the address below any comma nodes (this is to canonicalize for value numbering).
+// If this is a source, it will morph it to an GT_IND before taking its address,
+// since it may not be remorphed (and we don't want blk nodes as rvalues).
+
+GenTreePtr Compiler::fgMorphGetStructAddr(GenTreePtr* pTree, CORINFO_CLASS_HANDLE clsHnd, bool isRValue)
+{
+ GenTree* addr;
+ GenTree* tree = *pTree;
+ // If this is an indirection, we can return its op1, unless it's a GTF_IND_ARR_INDEX, in which case we
+ // need to hang onto that for the purposes of value numbering.
+ if (tree->OperIsIndir())
+ {
+ if ((tree->gtFlags & GTF_IND_ARR_INDEX) == 0)
+ {
+ addr = tree->gtOp.gtOp1;
+ }
+ else
+ {
+ if (isRValue && tree->OperIsBlk())
+ {
+ tree->ChangeOper(GT_IND);
+ }
+ addr = gtNewOperNode(GT_ADDR, TYP_BYREF, tree);
+ }
+ }
+ else if (tree->gtOper == GT_COMMA)
+ {
+ // If this is a comma, we're going to "sink" the GT_ADDR below it.
+ (void)fgMorphGetStructAddr(&(tree->gtOp.gtOp2), clsHnd, isRValue);
+ tree->gtType = TYP_BYREF;
+ addr = tree;
+ }
+ else
+ {
+ switch (tree->gtOper)
+ {
+ case GT_LCL_FLD:
+ case GT_LCL_VAR:
+ case GT_INDEX:
+ case GT_FIELD:
+ case GT_ARR_ELEM:
+ addr = gtNewOperNode(GT_ADDR, TYP_BYREF, tree);
+ break;
+ default:
+ {
+ // TODO: Consider using lvaGrabTemp and gtNewTempAssign instead, since we're
+ // not going to use "temp"
+ GenTree* temp = fgInsertCommaFormTemp(pTree, clsHnd);
+ addr = fgMorphGetStructAddr(pTree, clsHnd, isRValue);
+ break;
+ }
+ }
+ }
+ *pTree = addr;
+ return addr;
+}
+
+//------------------------------------------------------------------------
+// fgMorphBlkNode: Morph a block node preparatory to morphing a block assignment
+//
+// Arguments:
+// tree - The struct type node
+// isDest - True if this is the destination of the assignment
+//
+// Return Value:
+// Returns the possibly-morphed node. The caller is responsible for updating
+// the parent of this node..
+
+GenTree* Compiler::fgMorphBlkNode(GenTreePtr tree, bool isDest)
+{
+ if (tree->gtOper == GT_COMMA)
+ {
+ GenTree* effectiveVal = tree->gtEffectiveVal();
+ GenTree* addr = gtNewOperNode(GT_ADDR, TYP_BYREF, effectiveVal);
+#ifdef DEBUG
+ addr->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
+#endif
+ // In order to CSE and value number array index expressions and bounds checks,
+ // the commas in which they are contained need to match.
+ // The pattern is that the COMMA should be the address expression.
+ // Therefore, we insert a GT_ADDR just above the node, and wrap it in an obj or ind.
+ // TODO-1stClassStructs: Consider whether this can be improved.
+ // Also consider whether some of this can be included in gtNewBlockVal (though note
+ // that doing so may cause us to query the type system before we otherwise would.
+ GenTree* lastComma = nullptr;
+ for (GenTree* next = tree; next != nullptr && next->gtOper == GT_COMMA; next = next->gtGetOp2())
+ {
+ next->gtType = TYP_BYREF;
+ lastComma = next;
+ }
+ if (lastComma != nullptr)
+ {
+ noway_assert(lastComma->gtGetOp2() == effectiveVal);
+ lastComma->gtOp.gtOp2 = addr;
+ addr = tree;
+ }
+ var_types structType = effectiveVal->TypeGet();
+ if (structType == TYP_STRUCT)
+ {
+ CORINFO_CLASS_HANDLE structHnd = gtGetStructHandleIfPresent(effectiveVal);
+ if (structHnd == NO_CLASS_HANDLE)
+ {
+ tree = gtNewOperNode(GT_IND, effectiveVal->TypeGet(), addr);
+ }
+ else
+ {
+ tree = gtNewObjNode(structHnd, addr);
+ if (tree->OperGet() == GT_OBJ)
+ {
+ gtSetObjGcInfo(tree->AsObj());
+ }
+ }
+ }
+ else
+ {
+ tree = new (this, GT_BLK) GenTreeBlk(GT_BLK, structType, addr, genTypeSize(structType));
+ }
+#ifdef DEBUG
+ tree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
+#endif
+ }
+
+ if (!tree->OperIsBlk())
+ {
+ return tree;
+ }
+ GenTreeBlk* blkNode = tree->AsBlk();
+ if (blkNode->OperGet() == GT_DYN_BLK)
+ {
+ if (blkNode->AsDynBlk()->gtDynamicSize->IsCnsIntOrI())
+ {
+ unsigned size = (unsigned)blkNode->AsDynBlk()->gtDynamicSize->AsIntConCommon()->IconValue();
+ blkNode->AsDynBlk()->gtDynamicSize = nullptr;
+ blkNode->ChangeOper(GT_BLK);
+ blkNode->gtBlkSize = size;
+ }
+ else
+ {
+ return tree;
+ }
+ }
+ if ((blkNode->TypeGet() != TYP_STRUCT) && (blkNode->Addr()->OperGet() == GT_ADDR) &&
+ (blkNode->Addr()->gtGetOp1()->OperGet() == GT_LCL_VAR))
+ {
+ GenTreeLclVarCommon* lclVarNode = blkNode->Addr()->gtGetOp1()->AsLclVarCommon();
+ if ((genTypeSize(blkNode) != genTypeSize(lclVarNode)) || (!isDest && !varTypeIsStruct(lclVarNode)))
+ {
+ lvaSetVarDoNotEnregister(lclVarNode->gtLclNum DEBUG_ARG(DNER_VMNeedsStackAddr));
+ }
+ }
+
+ return tree;
+}
+
+//------------------------------------------------------------------------
+// fgMorphBlockOperand: Canonicalize an operand of a block assignment
+//
+// Arguments:
+// tree - The block operand
+// asgType - The type of the assignment
+// blockWidth - The size of the block
+// isDest - true iff this is the destination of the assignment
+//
+// Return Value:
+// Returns the morphed block operand
+//
+// Notes:
+// This does the following:
+// - Ensures that a struct operand is a block node.
+// - Ensures that any COMMAs are above ADDR nodes.
+// Although 'tree' WAS an operand of a block assignment, the assignment
+// may have been retyped to be a scalar assignment.
+
+GenTree* Compiler::fgMorphBlockOperand(GenTree* tree, var_types asgType, unsigned blockWidth, bool isDest)
+{
+ GenTree* effectiveVal = tree->gtEffectiveVal();
+
+ // TODO-1stClassStucts: We would like to transform non-TYP_STRUCT nodes to
+ // either plain lclVars or GT_INDs. However, for now we want to preserve most
+ // of the block nodes until the Rationalizer.
+
+ if (!varTypeIsStruct(asgType))
+ {
+ if (effectiveVal->OperIsIndir())
+ {
+ GenTree* addr = effectiveVal->AsIndir()->Addr();
+ if ((addr->OperGet() == GT_ADDR) && (addr->gtGetOp1()->TypeGet() == asgType))
+ {
+ effectiveVal = addr->gtGetOp1();
+ }
+ else if (effectiveVal->OperIsBlk())
+ {
+ effectiveVal = fgMorphBlkToInd(effectiveVal->AsBlk(), asgType);
+ }
+ else
+ {
+ effectiveVal->gtType = asgType;
+ }
+ }
+ else if (effectiveVal->TypeGet() != asgType)
+ {
+ GenTree* addr = gtNewOperNode(GT_ADDR, TYP_BYREF, effectiveVal);
+ effectiveVal = gtNewOperNode(GT_IND, asgType, addr);
+ }
+ }
+ else
+ {
+#ifdef FEATURE_SIMD
+ if (varTypeIsSIMD(asgType))
+ {
+ if (effectiveVal->OperIsIndir())
+ {
+ GenTree* addr = effectiveVal->AsIndir()->Addr();
+ if (!isDest && (addr->OperGet() == GT_ADDR))
+ {
+ if ((addr->gtGetOp1()->gtOper == GT_SIMD) || (addr->gtGetOp1()->OperGet() == GT_LCL_VAR))
+ {
+ effectiveVal = addr->gtGetOp1();
+ }
+ }
+ else if (isDest && !effectiveVal->OperIsBlk())
+ {
+ effectiveVal = new (this, GT_BLK) GenTreeBlk(GT_BLK, asgType, addr, blockWidth);
+ }
+ }
+ else if (!effectiveVal->OperIsSIMD() && (!effectiveVal->IsLocal() || isDest) && !effectiveVal->OperIsBlk())
+ {
+ GenTree* addr = gtNewOperNode(GT_ADDR, TYP_BYREF, effectiveVal);
+ effectiveVal = new (this, GT_BLK) GenTreeBlk(GT_BLK, asgType, addr, blockWidth);
+ }
+ }
+ else
+#endif // FEATURE_SIMD
+ if (!effectiveVal->OperIsBlk())
+ {
+ GenTree* addr = gtNewOperNode(GT_ADDR, TYP_BYREF, effectiveVal);
+ CORINFO_CLASS_HANDLE clsHnd = gtGetStructHandleIfPresent(effectiveVal);
+ GenTree* newTree;
+ if (clsHnd == NO_CLASS_HANDLE)
+ {
+ newTree = new (this, GT_BLK) GenTreeBlk(GT_BLK, TYP_STRUCT, addr, blockWidth);
+ }
+ else
+ {
+ newTree = gtNewObjNode(clsHnd, addr);
+ if (isDest && (newTree->OperGet() == GT_OBJ))
+ {
+ gtSetObjGcInfo(newTree->AsObj());
+ }
+ if (effectiveVal->IsLocal() && ((effectiveVal->gtFlags & GTF_GLOB_EFFECT) == 0))
+ {
+ // This is not necessarily a global reference, though gtNewObjNode always assumes it is.
+ // TODO-1stClassStructs: This check should be done in the GenTreeObj constructor,
+ // where it currently sets GTF_GLOB_EFFECT unconditionally, but it is handled
+ // separately now to avoid excess diffs.
+ newTree->gtFlags &= ~(GTF_GLOB_EFFECT);
+ }
+ }
+ effectiveVal = newTree;
+ }
+ }
+ if (!isDest && effectiveVal->OperIsBlk())
+ {
+ (void)fgMorphBlkToInd(effectiveVal->AsBlk(), asgType);
+ }
+ tree = effectiveVal;
+ return tree;
+}
+
+//------------------------------------------------------------------------
+// fgMorphCopyBlock: Perform the Morphing of block copy
+//
+// Arguments:
+// tree - a block copy (i.e. an assignment with a block op on the lhs).
+//
+// Return Value:
+// We can return the orginal block copy unmodified (least desirable, but always correct)
+// We can return a single assignment, when fgMorphOneAsgBlockOp transforms it (most desirable).
+// If we have performed struct promotion of the Source() or the Dest() then we will try to
+// perform a field by field assignment for each of the promoted struct fields.
+//
+// Assumptions:
+// The child nodes for tree have already been Morphed.
+//
+// Notes:
+// If we leave it as a block copy we will call lvaSetVarDoNotEnregister() on both Source() and Dest().
+// When performing a field by field assignment we can have one of Source() or Dest treated as a blob of bytes
+// and in such cases we will call lvaSetVarDoNotEnregister() on the one treated as a blob of bytes.
+// if the Source() or Dest() is a a struct that has a "CustomLayout" and "ConstainsHoles" then we
+// can not use a field by field assignment and must the orginal block copy unmodified.
+
+GenTreePtr Compiler::fgMorphCopyBlock(GenTreePtr tree)
+{
+ noway_assert(tree->OperIsCopyBlkOp());
+
+ JITDUMP("\nfgMorphCopyBlock:");
+
+ bool isLateArg = (tree->gtFlags & GTF_LATE_ARG) != 0;
+
+ GenTree* asg = tree;
+ GenTree* rhs = asg->gtGetOp2();
+ GenTree* dest = asg->gtGetOp1();
+
+#if FEATURE_MULTIREG_RET
+ // If this is a multi-reg return, we will not do any morphing of this node.
+ if (rhs->IsMultiRegCall())
+ {
+ assert(dest->OperGet() == GT_LCL_VAR);
+ JITDUMP(" not morphing a multireg call return\n");
+ return tree;
+ }
+#endif // FEATURE_MULTIREG_RET
+
+ // If we have an array index on the lhs, we need to create an obj node.
+
+ dest = fgMorphBlkNode(dest, true);
+ if (dest != asg->gtGetOp1())
+ {
+ asg->gtOp.gtOp1 = dest;
+ if (dest->IsLocal())
+ {
+ dest->gtFlags |= GTF_VAR_DEF;
+ }
+ }
+ asg->gtType = dest->TypeGet();
+ rhs = fgMorphBlkNode(rhs, false);
+
+ asg->gtOp.gtOp2 = rhs;
+
+ GenTreePtr oldTree = tree;
+ GenTreePtr oneAsgTree = fgMorphOneAsgBlockOp(tree);
+
+ if (oneAsgTree)
+ {
+ JITDUMP(" using oneAsgTree.\n");
+ tree = oneAsgTree;
+ }
+ else
+ {
+ unsigned blockWidth;
+ bool blockWidthIsConst = false;
+ GenTreeLclVarCommon* lclVarTree = nullptr;
+ GenTreeLclVarCommon* srcLclVarTree = nullptr;
+ unsigned destLclNum = BAD_VAR_NUM;
+ LclVarDsc* destLclVar = nullptr;
+ FieldSeqNode* destFldSeq = nullptr;
+ bool destDoFldAsg = false;
+ GenTreePtr destAddr = nullptr;
+ GenTreePtr srcAddr = nullptr;
+ bool destOnStack = false;
+ bool hasGCPtrs = false;
+
+ JITDUMP("block assignment to morph:\n");
+ DISPTREE(asg);
+
+ if (dest->IsLocal())
+ {
+ blockWidthIsConst = true;
+ destOnStack = true;
+ if (dest->gtOper == GT_LCL_VAR)
+ {
+ lclVarTree = dest->AsLclVarCommon();
+ destLclNum = lclVarTree->gtLclNum;
+ destLclVar = &lvaTable[destLclNum];
+ if (destLclVar->lvType == TYP_STRUCT)
+ {
+ // It would be nice if lvExactSize always corresponded to the size of the struct,
+ // but it doesn't always for the temps that the importer creates when it spills side
+ // effects.
+ // TODO-Cleanup: Determine when this happens, and whether it can be changed.
+ blockWidth = info.compCompHnd->getClassSize(destLclVar->lvVerTypeInfo.GetClassHandle());
+ }
+ else
+ {
+ blockWidth = genTypeSize(destLclVar->lvType);
+ }
+ hasGCPtrs = destLclVar->lvStructGcCount != 0;
+ }
+ else
+ {
+ assert(dest->TypeGet() != TYP_STRUCT);
+ assert(dest->gtOper == GT_LCL_FLD);
+ blockWidth = genTypeSize(dest->TypeGet());
+ destAddr = gtNewOperNode(GT_ADDR, TYP_BYREF, dest);
+ }
+ }
+ else
+ {
+ GenTree* effectiveDest = dest->gtEffectiveVal();
+ if (effectiveDest->OperGet() == GT_IND)
+ {
+ assert(dest->TypeGet() != TYP_STRUCT);
+ blockWidth = genTypeSize(effectiveDest->TypeGet());
+ blockWidthIsConst = true;
+ if ((dest == effectiveDest) && ((dest->gtFlags & GTF_IND_ARR_INDEX) == 0))
+ {
+ destAddr = dest->gtGetOp1();
+ }
+ }
+ else
+ {
+ assert(effectiveDest->OperIsBlk());
+ GenTreeBlk* blk = effectiveDest->AsBlk();
+
+ blockWidth = blk->gtBlkSize;
+ blockWidthIsConst = (blk->gtOper != GT_DYN_BLK);
+ if ((dest == effectiveDest) && ((dest->gtFlags & GTF_IND_ARR_INDEX) == 0))
+ {
+ destAddr = blk->Addr();
+ }
+ }
+ if (destAddr != nullptr)
+ {
+ noway_assert(destAddr->TypeGet() == TYP_BYREF || destAddr->TypeGet() == TYP_I_IMPL);
+ if (destAddr->IsLocalAddrExpr(this, &lclVarTree, &destFldSeq))
+ {
+ destOnStack = true;
+ destLclNum = lclVarTree->gtLclNum;
+ destLclVar = &lvaTable[destLclNum];
+ }
+ }
+ }
+
+ if (destLclVar != nullptr)
+ {
+#if LOCAL_ASSERTION_PROP
+ // Kill everything about destLclNum (and its field locals)
+ if (optLocalAssertionProp)
+ {
+ if (optAssertionCount > 0)
+ {
+ fgKillDependentAssertions(destLclNum DEBUGARG(tree));
+ }
+ }
+#endif // LOCAL_ASSERTION_PROP
+
+ if (destLclVar->lvPromoted && blockWidthIsConst)
+ {
+ noway_assert(varTypeIsStruct(destLclVar));
+ noway_assert(!opts.MinOpts());
+
+ if (blockWidth == destLclVar->lvExactSize)
+ {
+ JITDUMP(" (destDoFldAsg=true)");
+ // We may decide later that a copyblk is required when this struct has holes
+ destDoFldAsg = true;
+ }
+ else
+ {
+ JITDUMP(" with mismatched dest size");
+ }
+ }
+ }
+
+ FieldSeqNode* srcFldSeq = nullptr;
+ unsigned srcLclNum = BAD_VAR_NUM;
+ LclVarDsc* srcLclVar = nullptr;
+ bool srcDoFldAsg = false;
+
+ if (rhs->IsLocal())
+ {
+ srcLclVarTree = rhs->AsLclVarCommon();
+ srcLclNum = srcLclVarTree->gtLclNum;
+ if (rhs->OperGet() == GT_LCL_FLD)
+ {
+ srcFldSeq = rhs->AsLclFld()->gtFieldSeq;
+ }
+ }
+ else if (rhs->OperIsIndir())
+ {
+ if (rhs->gtOp.gtOp1->IsLocalAddrExpr(this, &srcLclVarTree, &srcFldSeq))
+ {
+ srcLclNum = srcLclVarTree->gtLclNum;
+ }
+ else
+ {
+ srcAddr = rhs->gtOp.gtOp1;
+ }
+ }
+
+ if (srcLclNum != BAD_VAR_NUM)
+ {
+ srcLclVar = &lvaTable[srcLclNum];
+
+ if (srcLclVar->lvPromoted && blockWidthIsConst)
+ {
+ noway_assert(varTypeIsStruct(srcLclVar));
+ noway_assert(!opts.MinOpts());
+
+ if (blockWidth == srcLclVar->lvExactSize)
+ {
+ JITDUMP(" (srcDoFldAsg=true)");
+ // We may decide later that a copyblk is required when this struct has holes
+ srcDoFldAsg = true;
+ }
+ else
+ {
+ JITDUMP(" with mismatched src size");
+ }
+ }
+ }
+
+ // Check to see if we are required to do a copy block because the struct contains holes
+ // and either the src or dest is externally visible
+ //
+ bool requiresCopyBlock = false;
+ bool srcSingleLclVarAsg = false;
+
+ // If either src or dest is a reg-sized non-field-addressed struct, keep the copyBlock.
+ if ((destLclVar != nullptr && destLclVar->lvRegStruct) || (srcLclVar != nullptr && srcLclVar->lvRegStruct))
+ {
+ requiresCopyBlock = true;
+ }
+
+ // Can we use field by field assignment for the dest?
+ if (destDoFldAsg && destLclVar->lvCustomLayout && destLclVar->lvContainsHoles)
+ {
+ JITDUMP(" dest contains custom layout and contains holes");
+ // C++ style CopyBlock with holes
+ requiresCopyBlock = true;
+ }
+
+ // Can we use field by field assignment for the src?
+ if (srcDoFldAsg && srcLclVar->lvCustomLayout && srcLclVar->lvContainsHoles)
+ {
+ JITDUMP(" src contains custom layout and contains holes");
+ // C++ style CopyBlock with holes
+ requiresCopyBlock = true;
+ }
+
+ if (dest->OperGet() == GT_OBJ && dest->AsBlk()->gtBlkOpGcUnsafe)
+ {
+ requiresCopyBlock = true;
+ }
+
+ // Can't use field by field assignment if the src is a call.
+ if (rhs->OperGet() == GT_CALL)
+ {
+ JITDUMP(" src is a call");
+ // C++ style CopyBlock with holes
+ requiresCopyBlock = true;
+ }
+
+ // If we passed the above checks, then we will check these two
+ if (!requiresCopyBlock)
+ {
+ // Are both dest and src promoted structs?
+ if (destDoFldAsg && srcDoFldAsg)
+ {
+ // Both structs should be of the same type, if not we will use a copy block
+ if (lvaTable[destLclNum].lvVerTypeInfo.GetClassHandle() !=
+ lvaTable[srcLclNum].lvVerTypeInfo.GetClassHandle())
+ {
+ requiresCopyBlock = true; // Mismatched types, leave as a CopyBlock
+ JITDUMP(" with mismatched types");
+ }
+ }
+ // Are neither dest or src promoted structs?
+ else if (!destDoFldAsg && !srcDoFldAsg)
+ {
+ requiresCopyBlock = true; // Leave as a CopyBlock
+ JITDUMP(" with no promoted structs");
+ }
+ else if (destDoFldAsg)
+ {
+ // Match the following kinds of trees:
+ // fgMorphTree BB01, stmt 9 (before)
+ // [000052] ------------ const int 8
+ // [000053] -A--G------- copyBlk void
+ // [000051] ------------ addr byref
+ // [000050] ------------ lclVar long V07 loc5
+ // [000054] --------R--- <list> void
+ // [000049] ------------ addr byref
+ // [000048] ------------ lclVar struct(P) V06 loc4
+ // long V06.h (offs=0x00) -> V17 tmp9
+ // Yields this transformation
+ // fgMorphCopyBlock (after):
+ // [000050] ------------ lclVar long V07 loc5
+ // [000085] -A---------- = long
+ // [000083] D------N---- lclVar long V17 tmp9
+ //
+ if (blockWidthIsConst && (destLclVar->lvFieldCnt == 1) && (srcLclVar != nullptr) &&
+ (blockWidth == genTypeSize(srcLclVar->TypeGet())))
+ {
+ // Reject the following tree:
+ // - seen on x86chk jit\jit64\hfa\main\hfa_sf3E_r.exe
+ //
+ // fgMorphTree BB01, stmt 6 (before)
+ // [000038] ------------- const int 4
+ // [000039] -A--G-------- copyBlk void
+ // [000037] ------------- addr byref
+ // [000036] ------------- lclVar int V05 loc3
+ // [000040] --------R---- <list> void
+ // [000035] ------------- addr byref
+ // [000034] ------------- lclVar struct(P) V04 loc2
+ // float V04.f1 (offs=0x00) -> V13 tmp6
+ // As this would framsform into
+ // float V13 = int V05
+ //
+ unsigned fieldLclNum = lvaTable[destLclNum].lvFieldLclStart;
+ var_types destType = lvaTable[fieldLclNum].TypeGet();
+ if (srcLclVar->TypeGet() == destType)
+ {
+ srcSingleLclVarAsg = true;
+ }
+ }
+ }
+ }
+
+ // If we require a copy block the set both of the field assign bools to false
+ if (requiresCopyBlock)
+ {
+ // If a copy block is required then we won't do field by field assignments
+ destDoFldAsg = false;
+ srcDoFldAsg = false;
+ }
+
+ JITDUMP(requiresCopyBlock ? " this requires a CopyBlock.\n" : " using field by field assignments.\n");
+
+ // Mark the dest/src structs as DoNotEnreg
+ // when they are not reg-sized non-field-addressed structs and we are using a CopyBlock
+ // or the struct is not promoted
+ //
+ if (!destDoFldAsg && (destLclVar != nullptr))
+ {
+ if (!destLclVar->lvRegStruct)
+ {
+ // Mark it as DoNotEnregister.
+ lvaSetVarDoNotEnregister(destLclNum DEBUGARG(DNER_BlockOp));
+ }
+ }
+
+ if (!srcDoFldAsg && (srcLclVar != nullptr) && !srcSingleLclVarAsg)
+ {
+ if (!srcLclVar->lvRegStruct)
+ {
+ lvaSetVarDoNotEnregister(srcLclNum DEBUGARG(DNER_BlockOp));
+ }
+ }
+
+ if (requiresCopyBlock)
+ {
+#if CPU_USES_BLOCK_MOVE
+ compBlkOpUsed = true;
+#endif
+ var_types asgType = dest->TypeGet();
+ dest = fgMorphBlockOperand(dest, asgType, blockWidth, true /*isDest*/);
+ asg->gtOp.gtOp1 = dest;
+ hasGCPtrs = ((dest->OperGet() == GT_OBJ) && (dest->AsObj()->gtGcPtrCount != 0));
+
+#ifdef CPBLK_UNROLL_LIMIT
+ // Note that the unrolling of CopyBlk is only implemented on some platforms.
+ // Currently that includes x64 and Arm64 but not x64 or Arm32.
+
+ // If we have a CopyObj with a dest on the stack
+ // we will convert it into an GC Unsafe CopyBlk that is non-interruptible
+ // when its size is small enouch to be completely unrolled (i.e. between [16..64] bytes)
+ //
+ if (hasGCPtrs && destOnStack && blockWidthIsConst && (blockWidth >= (2 * TARGET_POINTER_SIZE)) &&
+ (blockWidth <= CPBLK_UNROLL_LIMIT))
+ {
+ if (dest->OperGet() == GT_OBJ)
+ {
+ dest->SetOper(GT_BLK);
+ dest->AsBlk()->gtBlkOpGcUnsafe = true; // Mark as a GC unsafe copy block
+ }
+ else
+ {
+ assert(dest->OperIsLocal());
+ GenTree* destAddr = gtNewOperNode(GT_ADDR, TYP_BYREF, dest);
+ dest = new (this, GT_BLK) GenTreeBlk(GT_BLK, dest->TypeGet(), destAddr, blockWidth);
+ dest->AsBlk()->gtBlkOpGcUnsafe = true; // Mark as a GC unsafe copy block
+ tree->gtOp.gtOp1 = dest;
+ }
+ }
+#endif
+ // Eliminate the "OBJ or BLK" node on the rhs.
+ rhs = fgMorphBlockOperand(rhs, asgType, blockWidth, false /*!isDest*/);
+ asg->gtOp.gtOp2 = rhs;
+
+#ifdef LEGACY_BACKEND
+ if (!rhs->OperIsIndir())
+ {
+ noway_assert(rhs->gtOper == GT_LCL_VAR);
+ GenTree* rhsAddr = gtNewOperNode(GT_ADDR, TYP_BYREF, rhs);
+ rhs = gtNewOperNode(GT_IND, TYP_STRUCT, rhsAddr);
+ }
+#endif // LEGACY_BACKEND
+ // Formerly, liveness did not consider copyblk arguments of simple types as being
+ // a use or def, so these variables were marked as address-exposed.
+ // TODO-1stClassStructs: This should no longer be needed.
+ if (srcLclNum != BAD_VAR_NUM && !varTypeIsStruct(srcLclVar))
+ {
+ JITDUMP("Non-struct copyBlk src V%02d is addr exposed\n", srcLclNum);
+ lvaTable[srcLclNum].lvAddrExposed = true;
+ }
+
+ if (destLclNum != BAD_VAR_NUM && !varTypeIsStruct(destLclVar))
+ {
+ JITDUMP("Non-struct copyBlk dest V%02d is addr exposed\n", destLclNum);
+ lvaTable[destLclNum].lvAddrExposed = true;
+ }
+
+ goto _Done;
+ }
+
+ //
+ // Otherwise we convert this CopyBlock into individual field by field assignments
+ //
+ tree = nullptr;
+
+ GenTreePtr src;
+ GenTreePtr addrSpill = nullptr;
+ unsigned addrSpillTemp = BAD_VAR_NUM;
+ bool addrSpillIsStackDest = false; // true if 'addrSpill' represents the address in our local stack frame
+
+ unsigned fieldCnt = DUMMY_INIT(0);
+
+ if (destDoFldAsg && srcDoFldAsg)
+ {
+ // To do fieldwise assignments for both sides, they'd better be the same struct type!
+ // All of these conditions were checked above...
+ assert(destLclNum != BAD_VAR_NUM && srcLclNum != BAD_VAR_NUM);
+ assert(lvaTable[destLclNum].lvVerTypeInfo.GetClassHandle() ==
+ lvaTable[srcLclNum].lvVerTypeInfo.GetClassHandle());
+ assert(destLclVar != nullptr && srcLclVar != nullptr && destLclVar->lvFieldCnt == srcLclVar->lvFieldCnt);
+
+ fieldCnt = destLclVar->lvFieldCnt;
+ goto _AssignFields; // No need to spill the address to the temp. Go ahead to morph it into field
+ // assignments.
+ }
+ else if (destDoFldAsg)
+ {
+ fieldCnt = destLclVar->lvFieldCnt;
+ rhs = fgMorphBlockOperand(rhs, TYP_STRUCT, blockWidth, false /*isDest*/);
+ if (srcAddr == nullptr)
+ {
+ srcAddr = fgMorphGetStructAddr(&rhs, destLclVar->lvVerTypeInfo.GetClassHandle(), true /* rValue */);
+ }
+ }
+ else
+ {
+ assert(srcDoFldAsg);
+ fieldCnt = srcLclVar->lvFieldCnt;
+ dest = fgMorphBlockOperand(dest, TYP_STRUCT, blockWidth, true /*isDest*/);
+ if (dest->OperIsBlk())
+ {
+ (void)fgMorphBlkToInd(dest->AsBlk(), TYP_STRUCT);
+ }
+ destAddr = gtNewOperNode(GT_ADDR, TYP_BYREF, dest);
+ }
+
+ if (destDoFldAsg)
+ {
+ noway_assert(!srcDoFldAsg);
+ if (gtClone(srcAddr))
+ {
+ // srcAddr is simple expression. No need to spill.
+ noway_assert((srcAddr->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) == 0);
+ }
+ else
+ {
+ // srcAddr is complex expression. Clone and spill it (unless the destination is
+ // a struct local that only has one field, in which case we'd only use the
+ // address value once...)
+ if (destLclVar->lvFieldCnt > 1)
+ {
+ addrSpill = gtCloneExpr(srcAddr); // addrSpill represents the 'srcAddr'
+ noway_assert(addrSpill != nullptr);
+ }
+ }
+ }
+
+ if (srcDoFldAsg)
+ {
+ noway_assert(!destDoFldAsg);
+
+ // If we're doing field-wise stores, to an address within a local, and we copy
+ // the address into "addrSpill", do *not* declare the original local var node in the
+ // field address as GTF_VAR_DEF and GTF_VAR_USEASG; we will declare each of the
+ // field-wise assignments as an "indirect" assignment to the local.
+ // ("lclVarTree" is a subtree of "destAddr"; make sure we remove the flags before
+ // we clone it.)
+ if (lclVarTree != nullptr)
+ {
+ lclVarTree->gtFlags &= ~(GTF_VAR_DEF | GTF_VAR_USEASG);
+ }
+
+ if (gtClone(destAddr))
+ {
+ // destAddr is simple expression. No need to spill
+ noway_assert((destAddr->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) == 0);
+ }
+ else
+ {
+ // destAddr is complex expression. Clone and spill it (unless
+ // the source is a struct local that only has one field, in which case we'd only
+ // use the address value once...)
+ if (srcLclVar->lvFieldCnt > 1)
+ {
+ addrSpill = gtCloneExpr(destAddr); // addrSpill represents the 'destAddr'
+ noway_assert(addrSpill != nullptr);
+ }
+
+ // TODO-CQ: this should be based on a more general
+ // "BaseAddress" method, that handles fields of structs, before or after
+ // morphing.
+ if (addrSpill != nullptr && addrSpill->OperGet() == GT_ADDR)
+ {
+ if (addrSpill->gtOp.gtOp1->IsLocal())
+ {
+ // We will *not* consider this to define the local, but rather have each individual field assign
+ // be a definition.
+ addrSpill->gtOp.gtOp1->gtFlags &= ~(GTF_LIVENESS_MASK);
+ assert(lvaGetPromotionType(addrSpill->gtOp.gtOp1->gtLclVarCommon.gtLclNum) !=
+ PROMOTION_TYPE_INDEPENDENT);
+ addrSpillIsStackDest = true; // addrSpill represents the address of LclVar[varNum] in our
+ // local stack frame
+ }
+ }
+ }
+ }
+
+ if (addrSpill != nullptr)
+ {
+ // Spill the (complex) address to a BYREF temp.
+ // Note, at most one address may need to be spilled.
+ addrSpillTemp = lvaGrabTemp(true DEBUGARG("BlockOp address local"));
+
+ lvaTable[addrSpillTemp].lvType = TYP_BYREF;
+
+ if (addrSpillIsStackDest)
+ {
+ lvaTable[addrSpillTemp].lvStackByref = true;
+ }
+
+ tree = gtNewAssignNode(gtNewLclvNode(addrSpillTemp, TYP_BYREF), addrSpill);
+
+#ifndef LEGACY_BACKEND
+ // If we are assigning the address of a LclVar here
+ // liveness does not account for this kind of address taken use.
+ //
+ // We have to mark this local as address exposed so
+ // that we don't delete the definition for this LclVar
+ // as a dead store later on.
+ //
+ if (addrSpill->OperGet() == GT_ADDR)
+ {
+ GenTreePtr addrOp = addrSpill->gtOp.gtOp1;
+ if (addrOp->IsLocal())
+ {
+ unsigned lclVarNum = addrOp->gtLclVarCommon.gtLclNum;
+ lvaTable[lclVarNum].lvAddrExposed = true;
+ lvaSetVarDoNotEnregister(lclVarNum DEBUGARG(DNER_AddrExposed));
+ }
+ }
+#endif // !LEGACY_BACKEND
+ }
+
+ _AssignFields:
+
+ for (unsigned i = 0; i < fieldCnt; ++i)
+ {
+ FieldSeqNode* curFieldSeq = nullptr;
+ if (destDoFldAsg)
+ {
+ noway_assert(destLclNum != BAD_VAR_NUM);
+ unsigned fieldLclNum = lvaTable[destLclNum].lvFieldLclStart + i;
+ dest = gtNewLclvNode(fieldLclNum, lvaTable[fieldLclNum].TypeGet());
+ // If it had been labeled a "USEASG", assignments to the the individual promoted fields are not.
+ if (destAddr != nullptr)
+ {
+ noway_assert(destAddr->gtOp.gtOp1->gtOper == GT_LCL_VAR);
+ dest->gtFlags |= destAddr->gtOp.gtOp1->gtFlags & ~(GTF_NODE_MASK | GTF_VAR_USEASG);
+ }
+ else
+ {
+ noway_assert(lclVarTree != nullptr);
+ dest->gtFlags |= lclVarTree->gtFlags & ~(GTF_NODE_MASK | GTF_VAR_USEASG);
+ }
+ // Don't CSE the lhs of an assignment.
+ dest->gtFlags |= GTF_DONT_CSE;
+ }
+ else
+ {
+ noway_assert(srcDoFldAsg);
+ noway_assert(srcLclNum != BAD_VAR_NUM);
+ unsigned fieldLclNum = lvaTable[srcLclNum].lvFieldLclStart + i;
+
+ if (addrSpill)
+ {
+ assert(addrSpillTemp != BAD_VAR_NUM);
+ dest = gtNewLclvNode(addrSpillTemp, TYP_BYREF);
+ }
+ else
+ {
+ dest = gtCloneExpr(destAddr);
+ noway_assert(dest != nullptr);
+
+ // Is the address of a local?
+ GenTreeLclVarCommon* lclVarTree = nullptr;
+ bool isEntire = false;
+ bool* pIsEntire = (blockWidthIsConst ? &isEntire : nullptr);
+ if (dest->DefinesLocalAddr(this, blockWidth, &lclVarTree, pIsEntire))
+ {
+ lclVarTree->gtFlags |= GTF_VAR_DEF;
+ if (!isEntire)
+ {
+ lclVarTree->gtFlags |= GTF_VAR_USEASG;
+ }
+ }
+ }
+
+ GenTreePtr fieldOffsetNode = gtNewIconNode(lvaTable[fieldLclNum].lvFldOffset, TYP_I_IMPL);
+ // Have to set the field sequence -- which means we need the field handle.
+ CORINFO_CLASS_HANDLE classHnd = lvaTable[srcLclNum].lvVerTypeInfo.GetClassHandle();
+ CORINFO_FIELD_HANDLE fieldHnd =
+ info.compCompHnd->getFieldInClass(classHnd, lvaTable[fieldLclNum].lvFldOrdinal);
+ curFieldSeq = GetFieldSeqStore()->CreateSingleton(fieldHnd);
+ fieldOffsetNode->gtIntCon.gtFieldSeq = curFieldSeq;
+
+ dest = gtNewOperNode(GT_ADD, TYP_BYREF, dest, fieldOffsetNode);
+
+ dest = gtNewOperNode(GT_IND, lvaTable[fieldLclNum].TypeGet(), dest);
+
+ // !!! The destination could be on stack. !!!
+ // This flag will let us choose the correct write barrier.
+ dest->gtFlags |= GTF_IND_TGTANYWHERE;
+ }
+
+ if (srcDoFldAsg)
+ {
+ noway_assert(srcLclNum != BAD_VAR_NUM);
+ unsigned fieldLclNum = lvaTable[srcLclNum].lvFieldLclStart + i;
+ src = gtNewLclvNode(fieldLclNum, lvaTable[fieldLclNum].TypeGet());
+
+ noway_assert(srcLclVarTree != nullptr);
+ src->gtFlags |= srcLclVarTree->gtFlags & ~GTF_NODE_MASK;
+ // TODO-1stClassStructs: These should not need to be marked GTF_DONT_CSE,
+ // but they are when they are under a GT_ADDR.
+ src->gtFlags |= GTF_DONT_CSE;
+ }
+ else
+ {
+ noway_assert(destDoFldAsg);
+ noway_assert(destLclNum != BAD_VAR_NUM);
+ unsigned fieldLclNum = lvaTable[destLclNum].lvFieldLclStart + i;
+
+ if (srcSingleLclVarAsg)
+ {
+ noway_assert(fieldCnt == 1);
+ noway_assert(srcLclVar != nullptr);
+ noway_assert(addrSpill == nullptr);
+
+ src = gtNewLclvNode(srcLclNum, srcLclVar->TypeGet());
+ }
+ else
+ {
+ if (addrSpill)
+ {
+ assert(addrSpillTemp != BAD_VAR_NUM);
+ src = gtNewLclvNode(addrSpillTemp, TYP_BYREF);
+ }
+ else
+ {
+ src = gtCloneExpr(srcAddr);
+ noway_assert(src != nullptr);
+ }
+
+ CORINFO_CLASS_HANDLE classHnd = lvaTable[destLclNum].lvVerTypeInfo.GetClassHandle();
+ CORINFO_FIELD_HANDLE fieldHnd =
+ info.compCompHnd->getFieldInClass(classHnd, lvaTable[fieldLclNum].lvFldOrdinal);
+ curFieldSeq = GetFieldSeqStore()->CreateSingleton(fieldHnd);
+
+ src = gtNewOperNode(GT_ADD, TYP_BYREF, src,
+ new (this, GT_CNS_INT)
+ GenTreeIntCon(TYP_I_IMPL, lvaTable[fieldLclNum].lvFldOffset, curFieldSeq));
+
+ src = gtNewOperNode(GT_IND, lvaTable[fieldLclNum].TypeGet(), src);
+ }
+ }
+
+ noway_assert(dest->TypeGet() == src->TypeGet());
+
+ asg = gtNewAssignNode(dest, src);
+
+ // If we spilled the address, and we didn't do individual field assignments to promoted fields,
+ // and it was of a local, record the assignment as an indirect update of a local.
+ if (addrSpill && !destDoFldAsg && destLclNum != BAD_VAR_NUM)
+ {
+ curFieldSeq = GetFieldSeqStore()->Append(destFldSeq, curFieldSeq);
+ bool isEntire = (genTypeSize(var_types(lvaTable[destLclNum].lvType)) == genTypeSize(dest->TypeGet()));
+ IndirectAssignmentAnnotation* pIndirAnnot =
+ new (this, CMK_Unknown) IndirectAssignmentAnnotation(destLclNum, curFieldSeq, isEntire);
+ GetIndirAssignMap()->Set(asg, pIndirAnnot);
+ }
+
+#if LOCAL_ASSERTION_PROP
+ if (optLocalAssertionProp)
+ {
+ optAssertionGen(asg);
+ }
+#endif // LOCAL_ASSERTION_PROP
+
+ if (tree)
+ {
+ tree = gtNewOperNode(GT_COMMA, TYP_VOID, tree, asg);
+ }
+ else
+ {
+ tree = asg;
+ }
+ }
+ }
+
+ if (isLateArg)
+ {
+ tree->gtFlags |= GTF_LATE_ARG;
+ }
+
+#ifdef DEBUG
+ if (tree != oldTree)
+ {
+ tree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
+ }
+
+ if (verbose)
+ {
+ printf("\nfgMorphCopyBlock (after):\n");
+ gtDispTree(tree);
+ }
+#endif
+
+_Done:
+ return tree;
+}
+
+// insert conversions and normalize to make tree amenable to register
+// FP architectures
+GenTree* Compiler::fgMorphForRegisterFP(GenTree* tree)
+{
+ GenTreePtr op1 = tree->gtOp.gtOp1;
+ GenTreePtr op2 = tree->gtGetOp2();
+
+ if (tree->OperIsArithmetic() && varTypeIsFloating(tree))
+ {
+ if (op1->TypeGet() != tree->TypeGet())
+ {
+ tree->gtOp.gtOp1 = gtNewCastNode(tree->TypeGet(), tree->gtOp.gtOp1, tree->TypeGet());
+ }
+ if (op2->TypeGet() != tree->TypeGet())
+ {
+ tree->gtOp.gtOp2 = gtNewCastNode(tree->TypeGet(), tree->gtOp.gtOp2, tree->TypeGet());
+ }
+ }
+ else if (tree->OperIsCompare() && varTypeIsFloating(op1) && op1->TypeGet() != op2->TypeGet())
+ {
+ // both had better be floating, just one bigger than other
+ assert(varTypeIsFloating(op2));
+ if (op1->TypeGet() == TYP_FLOAT)
+ {
+ tree->gtOp.gtOp1 = gtNewCastNode(TYP_DOUBLE, tree->gtOp.gtOp1, TYP_DOUBLE);
+ }
+ else if (op2->TypeGet() == TYP_FLOAT)
+ {
+ tree->gtOp.gtOp2 = gtNewCastNode(TYP_DOUBLE, tree->gtOp.gtOp2, TYP_DOUBLE);
+ }
+ }
+
+ return tree;
+}
+
+GenTree* Compiler::fgMorphRecognizeBoxNullable(GenTree* compare)
+{
+ GenTree* op1 = compare->gtOp.gtOp1;
+ GenTree* op2 = compare->gtOp.gtOp2;
+ GenTree* opCns;
+ GenTreeCall* opCall;
+
+ // recognize this pattern:
+ //
+ // stmtExpr void (IL 0x000... ???)
+ // return int
+ // const ref null
+ // == int
+ // call help ref HELPER.CORINFO_HELP_BOX_NULLABLE
+ // const(h) long 0x7fed96836c8 class
+ // addr byref
+ // ld.lclVar struct V00 arg0
+ //
+ //
+ // which comes from this code (reported by customer as being slow) :
+ //
+ // private static bool IsNull<T>(T arg)
+ // {
+ // return arg==null;
+ // }
+ //
+
+ if (op1->IsCnsIntOrI() && op2->IsHelperCall())
+ {
+ opCns = op1;
+ opCall = op2->AsCall();
+ }
+ else if (op1->IsHelperCall() && op2->IsCnsIntOrI())
+ {
+ opCns = op2;
+ opCall = op1->AsCall();
+ }
+ else
+ {
+ return compare;
+ }
+
+ if (!opCns->IsIntegralConst(0))
+ {
+ return compare;
+ }
+
+ if (eeGetHelperNum(opCall->gtCallMethHnd) != CORINFO_HELP_BOX_NULLABLE)
+ {
+ return compare;
+ }
+
+ // replace the box with an access of the nullable 'hasValue' field which is at the zero offset
+ GenTree* newOp = gtNewOperNode(GT_IND, TYP_BOOL, opCall->gtCall.gtCallArgs->gtOp.gtOp2->gtOp.gtOp1);
+
+ if (opCall == op1)
+ {
+ compare->gtOp.gtOp1 = newOp;
+ }
+ else
+ {
+ compare->gtOp.gtOp2 = newOp;
+ }
+
+ return compare;
+}
+
+#ifdef FEATURE_SIMD
+
+//--------------------------------------------------------------------------------------
+// fgCopySIMDNode: make a copy of a SIMD intrinsic node, e.g. so that a field can be accessed.
+//
+// Arguments:
+// simdNode - The GenTreeSIMD node to be copied
+//
+// Return Value:
+// A comma node where op1 is the assignment of the simd node to a temp, and op2 is the temp lclVar.
+//
+GenTree* Compiler::fgCopySIMDNode(GenTreeSIMD* simdNode)
+{
+ // Copy the result of the SIMD intrinsic into a temp.
+ unsigned lclNum = lvaGrabTemp(true DEBUGARG("Copy of SIMD intrinsic with field access"));
+
+ CORINFO_CLASS_HANDLE simdHandle = NO_CLASS_HANDLE;
+ // We only have fields of the fixed float vectors.
+ noway_assert(simdNode->gtSIMDBaseType == TYP_FLOAT);
+ switch (simdNode->gtSIMDSize)
+ {
+ case 8:
+ simdHandle = SIMDVector2Handle;
+ break;
+ case 12:
+ simdHandle = SIMDVector3Handle;
+ break;
+ case 16:
+ simdHandle = SIMDVector4Handle;
+ break;
+ default:
+ noway_assert(!"field of unexpected SIMD type");
+ break;
+ }
+ assert(simdHandle != NO_CLASS_HANDLE);
+
+ lvaSetStruct(lclNum, simdHandle, false, true);
+ lvaTable[lclNum].lvFieldAccessed = true;
+
+ GenTree* asg = gtNewTempAssign(lclNum, simdNode);
+ GenTree* newLclVarNode = new (this, GT_LCL_VAR) GenTreeLclVar(simdNode->TypeGet(), lclNum, BAD_IL_OFFSET);
+
+ GenTree* comma = gtNewOperNode(GT_COMMA, simdNode->TypeGet(), asg, newLclVarNode);
+ return comma;
+}
+
+//--------------------------------------------------------------------------------------------------------------
+// getSIMDStructFromField:
+// Checking whether the field belongs to a simd struct or not. If it is, return the GenTreePtr for
+// the struct node, also base type, field index and simd size. If it is not, just return nullptr.
+// Usually if the tree node is from a simd lclvar which is not used in any SIMD intrinsic, then we
+// should return nullptr, since in this case we should treat SIMD struct as a regular struct.
+// However if no matter what, you just want get simd struct node, you can set the ignoreUsedInSIMDIntrinsic
+// as true. Then there will be no IsUsedInSIMDIntrinsic checking, and it will return SIMD struct node
+// if the struct is a SIMD struct.
+//
+// Arguments:
+// tree - GentreePtr. This node will be checked to see this is a field which belongs to a simd
+// struct used for simd intrinsic or not.
+// pBaseTypeOut - var_types pointer, if the tree node is the tree we want, we set *pBaseTypeOut
+// to simd lclvar's base type.
+// indexOut - unsigned pointer, if the tree is used for simd intrinsic, we will set *indexOut
+// equals to the index number of this field.
+// simdSizeOut - unsigned pointer, if the tree is used for simd intrinsic, set the *simdSizeOut
+// equals to the simd struct size which this tree belongs to.
+// ignoreUsedInSIMDIntrinsic - bool. If this is set to true, then this function will ignore
+// the UsedInSIMDIntrinsic check.
+//
+// return value:
+// A GenTreePtr which points the simd lclvar tree belongs to. If the tree is not the simd
+// instrinic related field, return nullptr.
+//
+
+GenTreePtr Compiler::getSIMDStructFromField(GenTreePtr tree,
+ var_types* pBaseTypeOut,
+ unsigned* indexOut,
+ unsigned* simdSizeOut,
+ bool ignoreUsedInSIMDIntrinsic /*false*/)
+{
+ GenTreePtr ret = nullptr;
+ if (tree->OperGet() == GT_FIELD)
+ {
+ GenTreePtr objRef = tree->gtField.gtFldObj;
+ if (objRef != nullptr)
+ {
+ GenTreePtr obj = nullptr;
+ if (objRef->gtOper == GT_ADDR)
+ {
+ obj = objRef->gtOp.gtOp1;
+ }
+ else if (ignoreUsedInSIMDIntrinsic)
+ {
+ obj = objRef;
+ }
+ else
+ {
+ return nullptr;
+ }
+
+ if (isSIMDTypeLocal(obj))
+ {
+ unsigned lclNum = obj->gtLclVarCommon.gtLclNum;
+ LclVarDsc* varDsc = &lvaTable[lclNum];
+ if (varDsc->lvIsUsedInSIMDIntrinsic() || ignoreUsedInSIMDIntrinsic)
+ {
+ *simdSizeOut = varDsc->lvExactSize;
+ *pBaseTypeOut = getBaseTypeOfSIMDLocal(obj);
+ ret = obj;
+ }
+ }
+ else if (obj->OperGet() == GT_SIMD)
+ {
+ ret = obj;
+ GenTreeSIMD* simdNode = obj->AsSIMD();
+ *simdSizeOut = simdNode->gtSIMDSize;
+ *pBaseTypeOut = simdNode->gtSIMDBaseType;
+ }
+ }
+ }
+ if (ret != nullptr)
+ {
+ unsigned BaseTypeSize = genTypeSize(*pBaseTypeOut);
+ *indexOut = tree->gtField.gtFldOffset / BaseTypeSize;
+ }
+ return ret;
+}
+
+/*****************************************************************************
+* If a read operation tries to access simd struct field, then transform the this
+* operation to to the SIMD intrinsic SIMDIntrinsicGetItem, and return the new tree.
+* Otherwise, return the old tree.
+* Argument:
+* tree - GenTreePtr. If this pointer points to simd struct which is used for simd
+* intrinsic. We will morph it as simd intrinsic SIMDIntrinsicGetItem.
+* Return:
+* A GenTreePtr which points to the new tree. If the tree is not for simd intrinsic,
+* return nullptr.
+*/
+
+GenTreePtr Compiler::fgMorphFieldToSIMDIntrinsicGet(GenTreePtr tree)
+{
+ unsigned index = 0;
+ var_types baseType = TYP_UNKNOWN;
+ unsigned simdSize = 0;
+ GenTreePtr simdStructNode = getSIMDStructFromField(tree, &baseType, &index, &simdSize);
+ if (simdStructNode != nullptr)
+ {
+
+ assert(simdSize >= ((index + 1) * genTypeSize(baseType)));
+ GenTree* op2 = gtNewIconNode(index);
+ tree = gtNewSIMDNode(baseType, simdStructNode, op2, SIMDIntrinsicGetItem, baseType, simdSize);
+#ifdef DEBUG
+ tree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
+#endif
+ }
+ return tree;
+}
+
+/*****************************************************************************
+* Transform an assignment of a SIMD struct field to SIMD intrinsic
+* SIMDIntrinsicGetItem, and return a new tree. If If it is not such an assignment,
+* then return the old tree.
+* Argument:
+* tree - GenTreePtr. If this pointer points to simd struct which is used for simd
+* intrinsic. We will morph it as simd intrinsic set.
+* Return:
+* A GenTreePtr which points to the new tree. If the tree is not for simd intrinsic,
+* return nullptr.
+*/
+
+GenTreePtr Compiler::fgMorphFieldAssignToSIMDIntrinsicSet(GenTreePtr tree)
+{
+ assert(tree->OperGet() == GT_ASG);
+ GenTreePtr op1 = tree->gtGetOp1();
+ GenTreePtr op2 = tree->gtGetOp2();
+
+ unsigned index = 0;
+ var_types baseType = TYP_UNKNOWN;
+ unsigned simdSize = 0;
+ GenTreePtr simdOp1Struct = getSIMDStructFromField(op1, &baseType, &index, &simdSize);
+ if (simdOp1Struct != nullptr)
+ {
+ // Generate the simd set intrinsic
+ assert(simdSize >= ((index + 1) * genTypeSize(baseType)));
+
+ SIMDIntrinsicID simdIntrinsicID = SIMDIntrinsicInvalid;
+ switch (index)
+ {
+ case 0:
+ simdIntrinsicID = SIMDIntrinsicSetX;
+ break;
+ case 1:
+ simdIntrinsicID = SIMDIntrinsicSetY;
+ break;
+ case 2:
+ simdIntrinsicID = SIMDIntrinsicSetZ;
+ break;
+ case 3:
+ simdIntrinsicID = SIMDIntrinsicSetW;
+ break;
+ default:
+ noway_assert(!"There is no set intrinsic for index bigger than 3");
+ }
+
+ GenTreePtr target = gtClone(simdOp1Struct);
+ assert(target != nullptr);
+ GenTreePtr simdTree = gtNewSIMDNode(target->gtType, simdOp1Struct, op2, simdIntrinsicID, baseType, simdSize);
+ tree->gtOp.gtOp1 = target;
+ tree->gtOp.gtOp2 = simdTree;
+#ifdef DEBUG
+ tree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
+#endif
+ }
+
+ return tree;
+}
+
+#endif
+/*****************************************************************************
+ *
+ * Transform the given GTK_SMPOP tree for code generation.
+ */
+
+#ifdef _PREFAST_
+#pragma warning(push)
+#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
+#endif
+GenTreePtr Compiler::fgMorphSmpOp(GenTreePtr tree, MorphAddrContext* mac)
+{
+ // this extra scope is a workaround for a gcc bug
+ // the inline destructor for ALLOCA_CHECK confuses the control
+ // flow and gcc thinks that the function never returns
+ {
+ ALLOCA_CHECK();
+ noway_assert(tree->OperKind() & GTK_SMPOP);
+
+ /* The steps in this function are :
+ o Perform required preorder processing
+ o Process the first, then second operand, if any
+ o Perform required postorder morphing
+ o Perform optional postorder morphing if optimizing
+ */
+
+ bool isQmarkColon = false;
+
+#if LOCAL_ASSERTION_PROP
+ AssertionIndex origAssertionCount = DUMMY_INIT(0);
+ AssertionDsc* origAssertionTab = DUMMY_INIT(NULL);
+
+ AssertionIndex thenAssertionCount = DUMMY_INIT(0);
+ AssertionDsc* thenAssertionTab = DUMMY_INIT(NULL);
+#endif
+
+ if (fgGlobalMorph)
+ {
+#if !FEATURE_STACK_FP_X87
+ tree = fgMorphForRegisterFP(tree);
+#endif
+ }
+
+ genTreeOps oper = tree->OperGet();
+ var_types typ = tree->TypeGet();
+ GenTreePtr op1 = tree->gtOp.gtOp1;
+ GenTreePtr op2 = tree->gtGetOp2();
+
+ /*-------------------------------------------------------------------------
+ * First do any PRE-ORDER processing
+ */
+
+ switch (oper)
+ {
+ // Some arithmetic operators need to use a helper call to the EE
+ int helper;
+
+ case GT_ASG:
+ tree = fgDoNormalizeOnStore(tree);
+ /* fgDoNormalizeOnStore can change op2 */
+ noway_assert(op1 == tree->gtOp.gtOp1);
+ op2 = tree->gtOp.gtOp2;
+ // TODO-1stClassStructs: this is here to match previous behavior, but results in some
+ // unnecessary pessimization in the handling of addresses in fgMorphCopyBlock().
+ if (tree->OperIsBlkOp())
+ {
+ op1->gtFlags |= GTF_DONT_CSE;
+ if (tree->OperIsCopyBlkOp() &&
+ (op2->IsLocal() || (op2->OperIsIndir() && (op2->AsIndir()->Addr()->OperGet() == GT_ADDR))))
+ {
+ op2->gtFlags |= GTF_DONT_CSE;
+ }
+ }
+
+#ifdef FEATURE_SIMD
+ {
+ // We should check whether op2 should be assigned to a SIMD field or not.
+ // If it is, we should tranlate the tree to simd intrinsic.
+ assert((tree->gtDebugFlags & GTF_DEBUG_NODE_MORPHED) == 0);
+ GenTreePtr newTree = fgMorphFieldAssignToSIMDIntrinsicSet(tree);
+ typ = tree->TypeGet();
+ op1 = tree->gtGetOp1();
+ op2 = tree->gtGetOp2();
+#ifdef DEBUG
+ assert((tree == newTree) && (tree->OperGet() == oper));
+ if ((tree->gtDebugFlags & GTF_DEBUG_NODE_MORPHED) != 0)
+ {
+ tree->gtDebugFlags &= ~GTF_DEBUG_NODE_MORPHED;
+ }
+#endif // DEBUG
+ }
+#endif
+
+ __fallthrough;
+
+ case GT_ASG_ADD:
+ case GT_ASG_SUB:
+ case GT_ASG_MUL:
+ case GT_ASG_DIV:
+ case GT_ASG_MOD:
+ case GT_ASG_UDIV:
+ case GT_ASG_UMOD:
+ case GT_ASG_OR:
+ case GT_ASG_XOR:
+ case GT_ASG_AND:
+ case GT_ASG_LSH:
+ case GT_ASG_RSH:
+ case GT_ASG_RSZ:
+ case GT_CHS:
+
+ // We can't CSE the LHS of an assignment. Only r-values can be CSEed.
+ // Previously, the "lhs" (addr) of a block op was CSE'd. So, to duplicate the former
+ // behavior, allow CSE'ing if is a struct type (or a TYP_REF transformed from a struct type)
+ // TODO-1stClassStructs: improve this.
+ if (op1->IsLocal() || (op1->TypeGet() != TYP_STRUCT))
+ {
+ op1->gtFlags |= GTF_DONT_CSE;
+ }
+ break;
+
+ case GT_ADDR:
+
+ /* op1 of a GT_ADDR is an l-value. Only r-values can be CSEed */
+ op1->gtFlags |= GTF_DONT_CSE;
+ break;
+
+ case GT_QMARK:
+ case GT_JTRUE:
+
+ noway_assert(op1);
+
+ if (op1->OperKind() & GTK_RELOP)
+ {
+ noway_assert((oper == GT_JTRUE) || (op1->gtFlags & GTF_RELOP_QMARK));
+ /* Mark the comparison node with GTF_RELOP_JMP_USED so it knows that it does
+ not need to materialize the result as a 0 or 1. */
+
+ /* We also mark it as DONT_CSE, as we don't handle QMARKs with nonRELOP op1s */
+ op1->gtFlags |= (GTF_RELOP_JMP_USED | GTF_DONT_CSE);
+
+ // Request that the codegen for op1 sets the condition flags
+ // when it generates the code for op1.
+ //
+ // Codegen for op1 must set the condition flags if
+ // this method returns true.
+ //
+ op1->gtRequestSetFlags();
+ }
+ else
+ {
+ GenTreePtr effOp1 = op1->gtEffectiveVal();
+ noway_assert((effOp1->gtOper == GT_CNS_INT) &&
+ (effOp1->IsIntegralConst(0) || effOp1->IsIntegralConst(1)));
+ }
+ break;
+
+ case GT_COLON:
+#if LOCAL_ASSERTION_PROP
+ if (optLocalAssertionProp)
+ {
+#endif
+ isQmarkColon = true;
+ }
+ break;
+
+ case GT_INDEX:
+ return fgMorphArrayIndex(tree);
+
+ case GT_CAST:
+ return fgMorphCast(tree);
+
+ case GT_MUL:
+
+#ifndef _TARGET_64BIT_
+ if (typ == TYP_LONG)
+ {
+ /* For (long)int1 * (long)int2, we dont actually do the
+ casts, and just multiply the 32 bit values, which will
+ give us the 64 bit result in edx:eax */
+
+ noway_assert(op2);
+ if ((op1->gtOper == GT_CAST && op2->gtOper == GT_CAST &&
+ genActualType(op1->CastFromType()) == TYP_INT &&
+ genActualType(op2->CastFromType()) == TYP_INT) &&
+ !op1->gtOverflow() && !op2->gtOverflow())
+ {
+ // The casts have to be of the same signedness.
+ if ((op1->gtFlags & GTF_UNSIGNED) != (op2->gtFlags & GTF_UNSIGNED))
+ {
+ // We see if we can force an int constant to change its signedness
+ GenTreePtr constOp;
+ if (op1->gtCast.CastOp()->gtOper == GT_CNS_INT)
+ constOp = op1;
+ else if (op2->gtCast.CastOp()->gtOper == GT_CNS_INT)
+ constOp = op2;
+ else
+ goto NO_MUL_64RSLT;
+
+ if (((unsigned)(constOp->gtCast.CastOp()->gtIntCon.gtIconVal) < (unsigned)(0x80000000)))
+ constOp->gtFlags ^= GTF_UNSIGNED;
+ else
+ goto NO_MUL_64RSLT;
+ }
+
+ // The only combination that can overflow
+ if (tree->gtOverflow() && (tree->gtFlags & GTF_UNSIGNED) && !(op1->gtFlags & GTF_UNSIGNED))
+ goto NO_MUL_64RSLT;
+
+ /* Remaining combinations can never overflow during long mul. */
+
+ tree->gtFlags &= ~GTF_OVERFLOW;
+
+ /* Do unsigned mul only if the casts were unsigned */
+
+ tree->gtFlags &= ~GTF_UNSIGNED;
+ tree->gtFlags |= op1->gtFlags & GTF_UNSIGNED;
+
+ /* Since we are committing to GTF_MUL_64RSLT, we don't want
+ the casts to be folded away. So morph the castees directly */
+
+ op1->gtOp.gtOp1 = fgMorphTree(op1->gtOp.gtOp1);
+ op2->gtOp.gtOp1 = fgMorphTree(op2->gtOp.gtOp1);
+
+ // Propagate side effect flags up the tree
+ op1->gtFlags &= ~GTF_ALL_EFFECT;
+ op1->gtFlags |= (op1->gtOp.gtOp1->gtFlags & GTF_ALL_EFFECT);
+ op2->gtFlags &= ~GTF_ALL_EFFECT;
+ op2->gtFlags |= (op2->gtOp.gtOp1->gtFlags & GTF_ALL_EFFECT);
+
+ // If the GT_MUL can be altogether folded away, we should do that.
+
+ if ((op1->gtCast.CastOp()->OperKind() & op2->gtCast.CastOp()->OperKind() & GTK_CONST) &&
+ opts.OptEnabled(CLFLG_CONSTANTFOLD))
+ {
+ tree->gtOp.gtOp1 = op1 = gtFoldExprConst(op1);
+ tree->gtOp.gtOp2 = op2 = gtFoldExprConst(op2);
+ noway_assert(op1->OperKind() & op2->OperKind() & GTK_CONST);
+ tree = gtFoldExprConst(tree);
+ noway_assert(tree->OperIsConst());
+ return tree;
+ }
+
+ tree->gtFlags |= GTF_MUL_64RSLT;
+
+ // If op1 and op2 are unsigned casts, we need to do an unsigned mult
+ tree->gtFlags |= (op1->gtFlags & GTF_UNSIGNED);
+
+ // Insert GT_NOP nodes for the cast operands so that they do not get folded
+ // And propagate the new flags. We don't want to CSE the casts because
+ // codegen expects GTF_MUL_64RSLT muls to have a certain layout.
+
+ if (op1->gtCast.CastOp()->OperGet() != GT_NOP)
+ {
+ op1->gtOp.gtOp1 = gtNewOperNode(GT_NOP, TYP_INT, op1->gtCast.CastOp());
+ op1->gtFlags &= ~GTF_ALL_EFFECT;
+ op1->gtFlags |= (op1->gtCast.CastOp()->gtFlags & GTF_ALL_EFFECT);
+ op1->gtFlags |= GTF_DONT_CSE;
+ }
+
+ if (op2->gtCast.CastOp()->OperGet() != GT_NOP)
+ {
+ op2->gtOp.gtOp1 = gtNewOperNode(GT_NOP, TYP_INT, op2->gtCast.CastOp());
+ op2->gtFlags &= ~GTF_ALL_EFFECT;
+ op2->gtFlags |= (op2->gtCast.CastOp()->gtFlags & GTF_ALL_EFFECT);
+ op2->gtFlags |= GTF_DONT_CSE;
+ }
+
+ tree->gtFlags &= ~GTF_ALL_EFFECT;
+ tree->gtFlags |= ((op1->gtFlags | op2->gtFlags) & GTF_ALL_EFFECT);
+
+ goto DONE_MORPHING_CHILDREN;
+ }
+ else if ((tree->gtFlags & GTF_MUL_64RSLT) == 0)
+ {
+ NO_MUL_64RSLT:
+ if (tree->gtOverflow())
+ helper = (tree->gtFlags & GTF_UNSIGNED) ? CORINFO_HELP_ULMUL_OVF : CORINFO_HELP_LMUL_OVF;
+ else
+ helper = CORINFO_HELP_LMUL;
+
+ goto USE_HELPER_FOR_ARITH;
+ }
+ else
+ {
+ /* We are seeing this node again. We have decided to use
+ GTF_MUL_64RSLT, so leave it alone. */
+
+ assert(tree->gtIsValid64RsltMul());
+ }
+ }
+#endif // !_TARGET_64BIT_
+ break;
+
+ case GT_DIV:
+
+#ifndef _TARGET_64BIT_
+ if (typ == TYP_LONG)
+ {
+ helper = CORINFO_HELP_LDIV;
+ goto USE_HELPER_FOR_ARITH;
+ }
+
+#if USE_HELPERS_FOR_INT_DIV
+ if (typ == TYP_INT && !fgIsSignedDivOptimizable(op2))
+ {
+ helper = CORINFO_HELP_DIV;
+ goto USE_HELPER_FOR_ARITH;
+ }
+#endif
+#endif // !_TARGET_64BIT_
+
+#ifndef LEGACY_BACKEND
+ if (op2->gtOper == GT_CAST && op2->gtOp.gtOp1->IsCnsIntOrI())
+ {
+ op2 = gtFoldExprConst(op2);
+ }
+
+ if (fgShouldUseMagicNumberDivide(tree->AsOp()))
+ {
+ tree = fgMorphDivByConst(tree->AsOp());
+ op1 = tree->gtOp.gtOp1;
+ op2 = tree->gtOp.gtOp2;
+ }
+#endif // !LEGACY_BACKEND
+ break;
+
+ case GT_UDIV:
+
+#ifndef _TARGET_64BIT_
+ if (typ == TYP_LONG)
+ {
+ helper = CORINFO_HELP_ULDIV;
+ goto USE_HELPER_FOR_ARITH;
+ }
+#if USE_HELPERS_FOR_INT_DIV
+ if (typ == TYP_INT && !fgIsUnsignedDivOptimizable(op2))
+ {
+ helper = CORINFO_HELP_UDIV;
+ goto USE_HELPER_FOR_ARITH;
+ }
+#endif
+#endif // _TARGET_64BIT_
+ break;
+
+ case GT_MOD:
+
+ if (varTypeIsFloating(typ))
+ {
+ helper = CORINFO_HELP_DBLREM;
+ noway_assert(op2);
+ if (op1->TypeGet() == TYP_FLOAT)
+ {
+ if (op2->TypeGet() == TYP_FLOAT)
+ {
+ helper = CORINFO_HELP_FLTREM;
+ }
+ else
+ {
+ tree->gtOp.gtOp1 = op1 = gtNewCastNode(TYP_DOUBLE, op1, TYP_DOUBLE);
+ }
+ }
+ else if (op2->TypeGet() == TYP_FLOAT)
+ {
+ tree->gtOp.gtOp2 = op2 = gtNewCastNode(TYP_DOUBLE, op2, TYP_DOUBLE);
+ }
+ goto USE_HELPER_FOR_ARITH;
+ }
+
+ // Do not use optimizations (unlike UMOD's idiv optimizing during codegen) for signed mod.
+ // A similar optimization for signed mod will not work for a negative perfectly divisible
+ // HI-word. To make it correct, we would need to divide without the sign and then flip the
+ // result sign after mod. This requires 18 opcodes + flow making it not worthy to inline.
+ goto ASSIGN_HELPER_FOR_MOD;
+
+ case GT_UMOD:
+
+#ifdef _TARGET_ARMARCH_
+//
+// Note for _TARGET_ARMARCH_ we don't have a remainder instruction, so we don't do this optimization
+//
+#else // _TARGET_XARCH
+ /* If this is an unsigned long mod with op2 which is a cast to long from a
+ constant int, then don't morph to a call to the helper. This can be done
+ faster inline using idiv.
+ */
+
+ noway_assert(op2);
+ if ((typ == TYP_LONG) && opts.OptEnabled(CLFLG_CONSTANTFOLD) &&
+ ((tree->gtFlags & GTF_UNSIGNED) == (op1->gtFlags & GTF_UNSIGNED)) &&
+ ((tree->gtFlags & GTF_UNSIGNED) == (op2->gtFlags & GTF_UNSIGNED)))
+ {
+ if (op2->gtOper == GT_CAST && op2->gtCast.CastOp()->gtOper == GT_CNS_INT &&
+ op2->gtCast.CastOp()->gtIntCon.gtIconVal >= 2 &&
+ op2->gtCast.CastOp()->gtIntCon.gtIconVal <= 0x3fffffff &&
+ (tree->gtFlags & GTF_UNSIGNED) == (op2->gtCast.CastOp()->gtFlags & GTF_UNSIGNED))
+ {
+ tree->gtOp.gtOp2 = op2 = fgMorphCast(op2);
+ noway_assert(op2->gtOper == GT_CNS_NATIVELONG);
+ }
+
+ if (op2->gtOper == GT_CNS_NATIVELONG && op2->gtIntConCommon.LngValue() >= 2 &&
+ op2->gtIntConCommon.LngValue() <= 0x3fffffff)
+ {
+ tree->gtOp.gtOp1 = op1 = fgMorphTree(op1);
+ noway_assert(op1->TypeGet() == TYP_LONG);
+
+ // Update flags for op1 morph
+ tree->gtFlags &= ~GTF_ALL_EFFECT;
+
+ tree->gtFlags |= (op1->gtFlags & GTF_ALL_EFFECT); // Only update with op1 as op2 is a constant
+
+ // If op1 is a constant, then do constant folding of the division operator
+ if (op1->gtOper == GT_CNS_NATIVELONG)
+ {
+ tree = gtFoldExpr(tree);
+ }
+ return tree;
+ }
+ }
+#endif // _TARGET_XARCH
+
+ ASSIGN_HELPER_FOR_MOD:
+
+ // For "val % 1", return 0 if op1 doesn't have any side effects
+ // and we are not in the CSE phase, we cannot discard 'tree'
+ // because it may contain CSE expressions that we haven't yet examined.
+ //
+ if (((op1->gtFlags & GTF_SIDE_EFFECT) == 0) && !optValnumCSE_phase)
+ {
+ if (op2->IsIntegralConst(1))
+ {
+ GenTreePtr zeroNode = gtNewZeroConNode(typ);
+#ifdef DEBUG
+ zeroNode->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
+#endif
+ DEBUG_DESTROY_NODE(tree);
+ return zeroNode;
+ }
+ }
+
+#ifndef _TARGET_64BIT_
+ if (typ == TYP_LONG)
+ {
+ helper = (oper == GT_UMOD) ? CORINFO_HELP_ULMOD : CORINFO_HELP_LMOD;
+ goto USE_HELPER_FOR_ARITH;
+ }
+
+#if USE_HELPERS_FOR_INT_DIV
+ if (typ == TYP_INT)
+ {
+ if (oper == GT_UMOD && !fgIsUnsignedModOptimizable(op2))
+ {
+ helper = CORINFO_HELP_UMOD;
+ goto USE_HELPER_FOR_ARITH;
+ }
+ else if (oper == GT_MOD && !fgIsSignedModOptimizable(op2))
+ {
+ helper = CORINFO_HELP_MOD;
+ goto USE_HELPER_FOR_ARITH;
+ }
+ }
+#endif
+#endif // !_TARGET_64BIT_
+
+#ifndef LEGACY_BACKEND
+ if (op2->gtOper == GT_CAST && op2->gtOp.gtOp1->IsCnsIntOrI())
+ {
+ op2 = gtFoldExprConst(op2);
+ }
+
+#ifdef _TARGET_ARM64_
+
+ // For ARM64 we don't have a remainder instruction,
+ // The architecture manual suggests the following transformation to
+ // generate code for such operator:
+ //
+ // a % b = a - (a / b) * b;
+ //
+ tree = fgMorphModToSubMulDiv(tree->AsOp());
+ op1 = tree->gtOp.gtOp1;
+ op2 = tree->gtOp.gtOp2;
+
+#else // !_TARGET_ARM64_
+
+ if (oper != GT_UMOD && fgShouldUseMagicNumberDivide(tree->AsOp()))
+ {
+ tree = fgMorphModByConst(tree->AsOp());
+ op1 = tree->gtOp.gtOp1;
+ op2 = tree->gtOp.gtOp2;
+ }
+
+#endif //_TARGET_ARM64_
+#endif // !LEGACY_BACKEND
+ break;
+
+ USE_HELPER_FOR_ARITH:
+ {
+ /* We have to morph these arithmetic operations into helper calls
+ before morphing the arguments (preorder), else the arguments
+ won't get correct values of fgPtrArgCntCur.
+ However, try to fold the tree first in case we end up with a
+ simple node which won't need a helper call at all */
+
+ noway_assert(tree->OperIsBinary());
+
+ GenTreePtr oldTree = tree;
+
+ tree = gtFoldExpr(tree);
+
+ // Were we able to fold it ?
+ // Note that gtFoldExpr may return a non-leaf even if successful
+ // e.g. for something like "expr / 1" - see also bug #290853
+ if (tree->OperIsLeaf() || (oldTree != tree))
+
+ {
+ return (oldTree != tree) ? fgMorphTree(tree) : fgMorphLeaf(tree);
+ }
+
+ // Did we fold it into a comma node with throw?
+ if (tree->gtOper == GT_COMMA)
+ {
+ noway_assert(fgIsCommaThrow(tree));
+ return fgMorphTree(tree);
+ }
+ }
+ return fgMorphIntoHelperCall(tree, helper, gtNewArgList(op1, op2));
+
+ case GT_RETURN:
+ // normalize small integer return values
+ if (fgGlobalMorph && varTypeIsSmall(info.compRetType) && (op1 != nullptr) &&
+ (op1->TypeGet() != TYP_VOID) && fgCastNeeded(op1, info.compRetType))
+ {
+ // Small-typed return values are normalized by the callee
+ op1 = gtNewCastNode(TYP_INT, op1, info.compRetType);
+
+ // Propagate GTF_COLON_COND
+ op1->gtFlags |= (tree->gtFlags & GTF_COLON_COND);
+
+ tree->gtOp.gtOp1 = fgMorphCast(op1);
+
+ // Propagate side effect flags
+ tree->gtFlags &= ~GTF_ALL_EFFECT;
+ tree->gtFlags |= (tree->gtOp.gtOp1->gtFlags & GTF_ALL_EFFECT);
+
+ return tree;
+ }
+ break;
+
+ case GT_EQ:
+ case GT_NE:
+
+ // Check for typeof(...) == obj.GetType()
+ // Also check for typeof(...) == typeof(...)
+ // IMPORTANT NOTE: this optimization relies on a one-to-one mapping between
+ // type handles and instances of System.Type
+ // If this invariant is ever broken, the optimization will need updating
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef LEGACY_BACKEND
+ if (op1->gtOper == GT_CALL && op2->gtOper == GT_CALL &&
+ ((op1->gtCall.gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) ||
+ (op1->gtCall.gtCallType == CT_HELPER)) &&
+ ((op2->gtCall.gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) ||
+ (op2->gtCall.gtCallType == CT_HELPER)))
+#else
+ if ((((op1->gtOper == GT_INTRINSIC) &&
+ (op1->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Object_GetType)) ||
+ ((op1->gtOper == GT_CALL) && (op1->gtCall.gtCallType == CT_HELPER))) &&
+ (((op2->gtOper == GT_INTRINSIC) &&
+ (op2->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Object_GetType)) ||
+ ((op2->gtOper == GT_CALL) && (op2->gtCall.gtCallType == CT_HELPER))))
+#endif
+ {
+ GenTreePtr pGetClassFromHandle;
+ GenTreePtr pGetType;
+
+#ifdef LEGACY_BACKEND
+ bool bOp1ClassFromHandle = gtIsTypeHandleToRuntimeTypeHelper(op1);
+ bool bOp2ClassFromHandle = gtIsTypeHandleToRuntimeTypeHelper(op2);
+#else
+ bool bOp1ClassFromHandle = op1->gtOper == GT_CALL ? gtIsTypeHandleToRuntimeTypeHelper(op1) : false;
+ bool bOp2ClassFromHandle = op2->gtOper == GT_CALL ? gtIsTypeHandleToRuntimeTypeHelper(op2) : false;
+#endif
+
+ // Optimize typeof(...) == typeof(...)
+ // Typically this occurs in generic code that attempts a type switch
+ // e.g. typeof(T) == typeof(int)
+
+ if (bOp1ClassFromHandle && bOp2ClassFromHandle)
+ {
+ GenTreePtr classFromHandleArg1 = tree->gtOp.gtOp1->gtCall.gtCallArgs->gtOp.gtOp1;
+ GenTreePtr classFromHandleArg2 = tree->gtOp.gtOp2->gtCall.gtCallArgs->gtOp.gtOp1;
+
+ GenTreePtr compare = gtNewOperNode(oper, TYP_INT, classFromHandleArg1, classFromHandleArg2);
+
+ compare->gtFlags |= tree->gtFlags & (GTF_RELOP_JMP_USED | GTF_RELOP_QMARK | GTF_DONT_CSE);
+
+ // Morph and return
+ return fgMorphTree(compare);
+ }
+ else if (bOp1ClassFromHandle || bOp2ClassFromHandle)
+ {
+ //
+ // Now check for GetClassFromHandle(handle) == obj.GetType()
+ //
+
+ if (bOp1ClassFromHandle)
+ {
+ pGetClassFromHandle = tree->gtOp.gtOp1;
+ pGetType = op2;
+ }
+ else
+ {
+ pGetClassFromHandle = tree->gtOp.gtOp2;
+ pGetType = op1;
+ }
+
+ GenTreePtr pGetClassFromHandleArgument = pGetClassFromHandle->gtCall.gtCallArgs->gtOp.gtOp1;
+ GenTreePtr pConstLiteral = pGetClassFromHandleArgument;
+
+ // Unwrap GT_NOP node used to prevent constant folding
+ if (pConstLiteral->gtOper == GT_NOP && pConstLiteral->gtType == TYP_I_IMPL)
+ {
+ pConstLiteral = pConstLiteral->gtOp.gtOp1;
+ }
+
+ // In the ngen case, we have to go thru an indirection to get the right handle.
+ if (pConstLiteral->gtOper == GT_IND)
+ {
+ pConstLiteral = pConstLiteral->gtOp.gtOp1;
+ }
+#ifdef LEGACY_BACKEND
+
+ if (pGetType->gtCall.gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC &&
+ info.compCompHnd->getIntrinsicID(pGetType->gtCall.gtCallMethHnd) ==
+ CORINFO_INTRINSIC_Object_GetType &&
+#else
+ if ((pGetType->gtOper == GT_INTRINSIC) &&
+ (pGetType->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Object_GetType) &&
+#endif
+ pConstLiteral->gtOper == GT_CNS_INT && pConstLiteral->gtType == TYP_I_IMPL)
+ {
+ CORINFO_CLASS_HANDLE clsHnd =
+ CORINFO_CLASS_HANDLE(pConstLiteral->gtIntCon.gtCompileTimeHandle);
+
+ if (info.compCompHnd->canInlineTypeCheckWithObjectVTable(clsHnd))
+ {
+ // Method Table tree
+ CLANG_FORMAT_COMMENT_ANCHOR;
+#ifdef LEGACY_BACKEND
+ GenTreePtr objMT = gtNewOperNode(GT_IND, TYP_I_IMPL, pGetType->gtCall.gtCallObjp);
+#else
+ GenTreePtr objMT = gtNewOperNode(GT_IND, TYP_I_IMPL, pGetType->gtUnOp.gtOp1);
+#endif
+ objMT->gtFlags |= GTF_EXCEPT; // Null ref exception if object is null
+ compCurBB->bbFlags |= BBF_HAS_VTABREF;
+ optMethodFlags |= OMF_HAS_VTABLEREF;
+
+ // Method table constant
+ GenTreePtr cnsMT = pGetClassFromHandleArgument;
+
+ GenTreePtr compare = gtNewOperNode(oper, TYP_INT, objMT, cnsMT);
+
+ compare->gtFlags |=
+ tree->gtFlags & (GTF_RELOP_JMP_USED | GTF_RELOP_QMARK | GTF_DONT_CSE);
+
+ // Morph and return
+ return fgMorphTree(compare);
+ }
+ }
+ }
+ }
+ fgMorphRecognizeBoxNullable(tree);
+ op1 = tree->gtOp.gtOp1;
+ op2 = tree->gtGetOp2();
+
+ break;
+
+#ifdef _TARGET_ARM_
+ case GT_INTRINSIC:
+ if (tree->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Round)
+ {
+ switch (tree->TypeGet())
+ {
+ case TYP_DOUBLE:
+ return fgMorphIntoHelperCall(tree, CORINFO_HELP_DBLROUND, gtNewArgList(op1));
+ case TYP_FLOAT:
+ return fgMorphIntoHelperCall(tree, CORINFO_HELP_FLTROUND, gtNewArgList(op1));
+ default:
+ unreached();
+ }
+ }
+ break;
+#endif
+
+ default:
+ break;
+ }
+
+#if !CPU_HAS_FP_SUPPORT
+ tree = fgMorphToEmulatedFP(tree);
+#endif
+
+ /* Could this operator throw an exception? */
+ if (fgGlobalMorph && tree->OperMayThrow())
+ {
+ if (((tree->OperGet() != GT_IND) && !tree->OperIsBlk()) || fgAddrCouldBeNull(tree->gtOp.gtOp1))
+ {
+ /* Mark the tree node as potentially throwing an exception */
+ tree->gtFlags |= GTF_EXCEPT;
+ }
+ }
+
+ /*-------------------------------------------------------------------------
+ * Process the first operand, if any
+ */
+
+ if (op1)
+ {
+
+#if LOCAL_ASSERTION_PROP
+ // If we are entering the "then" part of a Qmark-Colon we must
+ // save the state of the current copy assignment table
+ // so that we can restore this state when entering the "else" part
+ if (isQmarkColon)
+ {
+ noway_assert(optLocalAssertionProp);
+ if (optAssertionCount)
+ {
+ noway_assert(optAssertionCount <= optMaxAssertionCount); // else ALLOCA() is a bad idea
+ unsigned tabSize = optAssertionCount * sizeof(AssertionDsc);
+ origAssertionTab = (AssertionDsc*)ALLOCA(tabSize);
+ origAssertionCount = optAssertionCount;
+ memcpy(origAssertionTab, optAssertionTabPrivate, tabSize);
+ }
+ else
+ {
+ origAssertionCount = 0;
+ origAssertionTab = nullptr;
+ }
+ }
+#endif // LOCAL_ASSERTION_PROP
+
+ // We might need a new MorphAddressContext context. (These are used to convey
+ // parent context about how addresses being calculated will be used; see the
+ // specification comment for MorphAddrContext for full details.)
+ // Assume it's an Ind context to start.
+ MorphAddrContext subIndMac1(MACK_Ind);
+ MorphAddrContext* subMac1 = mac;
+ if (subMac1 == nullptr || subMac1->m_kind == MACK_Ind || subMac1->m_kind == MACK_CopyBlock)
+ {
+ switch (tree->gtOper)
+ {
+ case GT_ADDR:
+ if (subMac1 == nullptr)
+ {
+ subMac1 = &subIndMac1;
+ subMac1->m_kind = MACK_Addr;
+ }
+ break;
+ case GT_COMMA:
+ // In a comma, the incoming context only applies to the rightmost arg of the
+ // comma list. The left arg (op1) gets a fresh context.
+ subMac1 = nullptr;
+ break;
+ case GT_ASG:
+ if (tree->OperIsBlkOp())
+ {
+ subMac1 = &subIndMac1;
+ }
+ break;
+ case GT_OBJ:
+ case GT_BLK:
+ case GT_DYN_BLK:
+ case GT_IND:
+ subMac1 = &subIndMac1;
+ break;
+ default:
+ break;
+ }
+ }
+
+ // For additions, if we're in an IND context keep track of whether
+ // all offsets added to the address are constant, and their sum.
+ if (tree->gtOper == GT_ADD && subMac1 != nullptr)
+ {
+ assert(subMac1->m_kind == MACK_Ind || subMac1->m_kind == MACK_Addr); // Can't be a CopyBlock.
+ GenTreePtr otherOp = tree->gtOp.gtOp2;
+ // Is the other operator a constant?
+ if (otherOp->IsCnsIntOrI())
+ {
+ ClrSafeInt<size_t> totalOffset(subMac1->m_totalOffset);
+ totalOffset += otherOp->gtIntConCommon.IconValue();
+ if (totalOffset.IsOverflow())
+ {
+ // We will consider an offset so large as to overflow as "not a constant" --
+ // we will do a null check.
+ subMac1->m_allConstantOffsets = false;
+ }
+ else
+ {
+ subMac1->m_totalOffset += otherOp->gtIntConCommon.IconValue();
+ }
+ }
+ else
+ {
+ subMac1->m_allConstantOffsets = false;
+ }
+ }
+
+ tree->gtOp.gtOp1 = op1 = fgMorphTree(op1, subMac1);
+
+#if LOCAL_ASSERTION_PROP
+ // If we are exiting the "then" part of a Qmark-Colon we must
+ // save the state of the current copy assignment table
+ // so that we can merge this state with the "else" part exit
+ if (isQmarkColon)
+ {
+ noway_assert(optLocalAssertionProp);
+ if (optAssertionCount)
+ {
+ noway_assert(optAssertionCount <= optMaxAssertionCount); // else ALLOCA() is a bad idea
+ unsigned tabSize = optAssertionCount * sizeof(AssertionDsc);
+ thenAssertionTab = (AssertionDsc*)ALLOCA(tabSize);
+ thenAssertionCount = optAssertionCount;
+ memcpy(thenAssertionTab, optAssertionTabPrivate, tabSize);
+ }
+ else
+ {
+ thenAssertionCount = 0;
+ thenAssertionTab = nullptr;
+ }
+ }
+#endif // LOCAL_ASSERTION_PROP
+
+ /* Morphing along with folding and inlining may have changed the
+ * side effect flags, so we have to reset them
+ *
+ * NOTE: Don't reset the exception flags on nodes that may throw */
+
+ noway_assert(tree->gtOper != GT_CALL);
+
+ if ((tree->gtOper != GT_INTRINSIC) || !IsIntrinsicImplementedByUserCall(tree->gtIntrinsic.gtIntrinsicId))
+ {
+ tree->gtFlags &= ~GTF_CALL;
+ }
+
+ if (!tree->OperMayThrow())
+ {
+ tree->gtFlags &= ~GTF_EXCEPT;
+ }
+
+ /* Propagate the new flags */
+ tree->gtFlags |= (op1->gtFlags & GTF_ALL_EFFECT);
+
+ // &aliasedVar doesn't need GTF_GLOB_REF, though alisasedVar does
+ // Similarly for clsVar
+ if (oper == GT_ADDR && (op1->gtOper == GT_LCL_VAR || op1->gtOper == GT_CLS_VAR))
+ {
+ tree->gtFlags &= ~GTF_GLOB_REF;
+ }
+ } // if (op1)
+
+ /*-------------------------------------------------------------------------
+ * Process the second operand, if any
+ */
+
+ if (op2)
+ {
+
+#if LOCAL_ASSERTION_PROP
+ // If we are entering the "else" part of a Qmark-Colon we must
+ // reset the state of the current copy assignment table
+ if (isQmarkColon)
+ {
+ noway_assert(optLocalAssertionProp);
+ optAssertionReset(0);
+ if (origAssertionCount)
+ {
+ size_t tabSize = origAssertionCount * sizeof(AssertionDsc);
+ memcpy(optAssertionTabPrivate, origAssertionTab, tabSize);
+ optAssertionReset(origAssertionCount);
+ }
+ }
+#endif // LOCAL_ASSERTION_PROP
+
+ // We might need a new MorphAddressContext context to use in evaluating op2.
+ // (These are used to convey parent context about how addresses being calculated
+ // will be used; see the specification comment for MorphAddrContext for full details.)
+ // Assume it's an Ind context to start.
+ MorphAddrContext subIndMac2(MACK_Ind);
+ switch (tree->gtOper)
+ {
+ case GT_ADD:
+ if (mac != nullptr && mac->m_kind == MACK_Ind)
+ {
+ GenTreePtr otherOp = tree->gtOp.gtOp1;
+ // Is the other operator a constant?
+ if (otherOp->IsCnsIntOrI())
+ {
+ mac->m_totalOffset += otherOp->gtIntConCommon.IconValue();
+ }
+ else
+ {
+ mac->m_allConstantOffsets = false;
+ }
+ }
+ break;
+ case GT_ASG:
+ if (tree->OperIsBlkOp())
+ {
+ mac = &subIndMac2;
+ }
+ break;
+ default:
+ break;
+ }
+ tree->gtOp.gtOp2 = op2 = fgMorphTree(op2, mac);
+
+ /* Propagate the side effect flags from op2 */
+
+ tree->gtFlags |= (op2->gtFlags & GTF_ALL_EFFECT);
+
+#if LOCAL_ASSERTION_PROP
+ // If we are exiting the "else" part of a Qmark-Colon we must
+ // merge the state of the current copy assignment table with
+ // that of the exit of the "then" part.
+ if (isQmarkColon)
+ {
+ noway_assert(optLocalAssertionProp);
+ // If either exit table has zero entries then
+ // the merged table also has zero entries
+ if (optAssertionCount == 0 || thenAssertionCount == 0)
+ {
+ optAssertionReset(0);
+ }
+ else
+ {
+ size_t tabSize = optAssertionCount * sizeof(AssertionDsc);
+ if ((optAssertionCount != thenAssertionCount) ||
+ (memcmp(thenAssertionTab, optAssertionTabPrivate, tabSize) != 0))
+ {
+ // Yes they are different so we have to find the merged set
+ // Iterate over the copy asgn table removing any entries
+ // that do not have an exact match in the thenAssertionTab
+ AssertionIndex index = 1;
+ while (index <= optAssertionCount)
+ {
+ AssertionDsc* curAssertion = optGetAssertion(index);
+
+ for (unsigned j = 0; j < thenAssertionCount; j++)
+ {
+ AssertionDsc* thenAssertion = &thenAssertionTab[j];
+
+ // Do the left sides match?
+ if ((curAssertion->op1.lcl.lclNum == thenAssertion->op1.lcl.lclNum) &&
+ (curAssertion->assertionKind == thenAssertion->assertionKind))
+ {
+ // Do the right sides match?
+ if ((curAssertion->op2.kind == thenAssertion->op2.kind) &&
+ (curAssertion->op2.lconVal == thenAssertion->op2.lconVal))
+ {
+ goto KEEP;
+ }
+ else
+ {
+ goto REMOVE;
+ }
+ }
+ }
+ //
+ // If we fall out of the loop above then we didn't find
+ // any matching entry in the thenAssertionTab so it must
+ // have been killed on that path so we remove it here
+ //
+ REMOVE:
+ // The data at optAssertionTabPrivate[i] is to be removed
+ CLANG_FORMAT_COMMENT_ANCHOR;
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("The QMARK-COLON ");
+ printTreeID(tree);
+ printf(" removes assertion candidate #%d\n", index);
+ }
+#endif
+ optAssertionRemove(index);
+ continue;
+ KEEP:
+ // The data at optAssertionTabPrivate[i] is to be kept
+ index++;
+ }
+ }
+ }
+ }
+#endif // LOCAL_ASSERTION_PROP
+ } // if (op2)
+
+ DONE_MORPHING_CHILDREN:
+
+/*-------------------------------------------------------------------------
+ * Now do POST-ORDER processing
+ */
+
+#if FEATURE_FIXED_OUT_ARGS && !defined(_TARGET_64BIT_)
+ // Variable shifts of a long end up being helper calls, so mark the tree as such. This
+ // is potentially too conservative, since they'll get treated as having side effects.
+ // It is important to mark them as calls so if they are part of an argument list,
+ // they will get sorted and processed properly (for example, it is important to handle
+ // all nested calls before putting struct arguments in the argument registers). We
+ // could mark the trees just before argument processing, but it would require a full
+ // tree walk of the argument tree, so we just do it here, instead, even though we'll
+ // mark non-argument trees (that will still get converted to calls, anyway).
+ if (GenTree::OperIsShift(oper) && (tree->TypeGet() == TYP_LONG) && (op2->OperGet() != GT_CNS_INT))
+ {
+ tree->gtFlags |= GTF_CALL;
+ }
+#endif // FEATURE_FIXED_OUT_ARGS && !_TARGET_64BIT_
+
+ if (varTypeIsGC(tree->TypeGet()) && (op1 && !varTypeIsGC(op1->TypeGet())) &&
+ (op2 && !varTypeIsGC(op2->TypeGet())))
+ {
+ // The tree is really not GC but was marked as such. Now that the
+ // children have been unmarked, unmark the tree too.
+
+ // Remember that GT_COMMA inherits it's type only from op2
+ if (tree->gtOper == GT_COMMA)
+ {
+ tree->gtType = genActualType(op2->TypeGet());
+ }
+ else
+ {
+ tree->gtType = genActualType(op1->TypeGet());
+ }
+ }
+
+ GenTreePtr oldTree = tree;
+
+ GenTreePtr qmarkOp1 = nullptr;
+ GenTreePtr qmarkOp2 = nullptr;
+
+ if ((tree->OperGet() == GT_QMARK) && (tree->gtOp.gtOp2->OperGet() == GT_COLON))
+ {
+ qmarkOp1 = oldTree->gtOp.gtOp2->gtOp.gtOp1;
+ qmarkOp2 = oldTree->gtOp.gtOp2->gtOp.gtOp2;
+ }
+
+ // Try to fold it, maybe we get lucky,
+ tree = gtFoldExpr(tree);
+
+ if (oldTree != tree)
+ {
+ /* if gtFoldExpr returned op1 or op2 then we are done */
+ if ((tree == op1) || (tree == op2) || (tree == qmarkOp1) || (tree == qmarkOp2))
+ {
+ return tree;
+ }
+
+ /* If we created a comma-throw tree then we need to morph op1 */
+ if (fgIsCommaThrow(tree))
+ {
+ tree->gtOp.gtOp1 = fgMorphTree(tree->gtOp.gtOp1);
+ fgMorphTreeDone(tree);
+ return tree;
+ }
+
+ return tree;
+ }
+ else if (tree->OperKind() & GTK_CONST)
+ {
+ return tree;
+ }
+
+ /* gtFoldExpr could have used setOper to change the oper */
+ oper = tree->OperGet();
+ typ = tree->TypeGet();
+
+ /* gtFoldExpr could have changed op1 and op2 */
+ op1 = tree->gtOp.gtOp1;
+ op2 = tree->gtGetOp2();
+
+ // Do we have an integer compare operation?
+ //
+ if (tree->OperIsCompare() && varTypeIsIntegralOrI(tree->TypeGet()))
+ {
+ // Are we comparing against zero?
+ //
+ if (op2->IsIntegralConst(0))
+ {
+ // Request that the codegen for op1 sets the condition flags
+ // when it generates the code for op1.
+ //
+ // Codegen for op1 must set the condition flags if
+ // this method returns true.
+ //
+ op1->gtRequestSetFlags();
+ }
+ }
+ /*-------------------------------------------------------------------------
+ * Perform the required oper-specific postorder morphing
+ */
+
+ GenTreePtr temp;
+ GenTreePtr cns1, cns2;
+ GenTreePtr thenNode;
+ GenTreePtr elseNode;
+ size_t ival1, ival2;
+ GenTreePtr lclVarTree;
+ GenTreeLclVarCommon* lclVarCmnTree;
+ FieldSeqNode* fieldSeq = nullptr;
+
+ switch (oper)
+ {
+ case GT_ASG:
+
+ lclVarTree = fgIsIndirOfAddrOfLocal(op1);
+ if (lclVarTree != nullptr)
+ {
+ lclVarTree->gtFlags |= GTF_VAR_DEF;
+ }
+
+ if (op1->gtEffectiveVal()->OperIsConst())
+ {
+ op1 = gtNewOperNode(GT_IND, tree->TypeGet(), op1);
+ tree->gtOp.gtOp1 = op1;
+ }
+
+ /* If we are storing a small type, we might be able to omit a cast */
+ if ((op1->gtOper == GT_IND) && varTypeIsSmall(op1->TypeGet()))
+ {
+ if (!gtIsActiveCSE_Candidate(op2) && (op2->gtOper == GT_CAST) && !op2->gtOverflow())
+ {
+ var_types castType = op2->CastToType();
+
+ // If we are performing a narrowing cast and
+ // castType is larger or the same as op1's type
+ // then we can discard the cast.
+
+ if (varTypeIsSmall(castType) && (castType >= op1->TypeGet()))
+ {
+ tree->gtOp.gtOp2 = op2 = op2->gtCast.CastOp();
+ }
+ }
+ else if (op2->OperIsCompare() && varTypeIsByte(op1->TypeGet()))
+ {
+ /* We don't need to zero extend the setcc instruction */
+ op2->gtType = TYP_BYTE;
+ }
+ }
+ // If we introduced a CSE we may need to undo the optimization above
+ // (i.e. " op2->gtType = TYP_BYTE;" which depends upon op1 being a GT_IND of a byte type)
+ // When we introduce the CSE we remove the GT_IND and subsitute a GT_LCL_VAR in it place.
+ else if (op2->OperIsCompare() && (op2->gtType == TYP_BYTE) && (op1->gtOper == GT_LCL_VAR))
+ {
+ unsigned varNum = op1->gtLclVarCommon.gtLclNum;
+ LclVarDsc* varDsc = &lvaTable[varNum];
+
+ /* We again need to zero extend the setcc instruction */
+ op2->gtType = varDsc->TypeGet();
+ }
+ fgAssignSetVarDef(tree);
+
+ __fallthrough;
+
+ case GT_ASG_ADD:
+ case GT_ASG_SUB:
+ case GT_ASG_MUL:
+ case GT_ASG_DIV:
+ case GT_ASG_MOD:
+ case GT_ASG_UDIV:
+ case GT_ASG_UMOD:
+ case GT_ASG_OR:
+ case GT_ASG_XOR:
+ case GT_ASG_AND:
+ case GT_ASG_LSH:
+ case GT_ASG_RSH:
+ case GT_ASG_RSZ:
+
+ /* We can't CSE the LHS of an assignment */
+ /* We also must set in the pre-morphing phase, otherwise assertionProp doesn't see it */
+ if (op1->IsLocal() || (op1->TypeGet() != TYP_STRUCT))
+ {
+ op1->gtFlags |= GTF_DONT_CSE;
+ }
+ break;
+
+ case GT_EQ:
+ case GT_NE:
+
+ /* Make sure we're allowed to do this */
+
+ if (optValnumCSE_phase)
+ {
+ // It is not safe to reorder/delete CSE's
+ break;
+ }
+
+ cns2 = op2;
+
+ /* Check for "(expr +/- icon1) ==/!= (non-zero-icon2)" */
+
+ if (cns2->gtOper == GT_CNS_INT && cns2->gtIntCon.gtIconVal != 0)
+ {
+ op1 = tree->gtOp.gtOp1;
+
+ /* Since this can occur repeatedly we use a while loop */
+
+ while ((op1->gtOper == GT_ADD || op1->gtOper == GT_SUB) &&
+ (op1->gtOp.gtOp2->gtOper == GT_CNS_INT) && (op1->gtType == TYP_INT) &&
+ (op1->gtOverflow() == false))
+ {
+ /* Got it; change "x+icon1==icon2" to "x==icon2-icon1" */
+
+ ival1 = op1->gtOp.gtOp2->gtIntCon.gtIconVal;
+ ival2 = cns2->gtIntCon.gtIconVal;
+
+ if (op1->gtOper == GT_ADD)
+ {
+ ival2 -= ival1;
+ }
+ else
+ {
+ ival2 += ival1;
+ }
+ cns2->gtIntCon.gtIconVal = ival2;
+
+#ifdef _TARGET_64BIT_
+ // we need to properly re-sign-extend or truncate as needed.
+ cns2->AsIntCon()->TruncateOrSignExtend32();
+#endif // _TARGET_64BIT_
+
+ op1 = tree->gtOp.gtOp1 = op1->gtOp.gtOp1;
+ }
+ }
+
+ //
+ // Here we look for the following tree
+ //
+ // EQ/NE
+ // / \
+ // op1 CNS 0/1
+ //
+ ival2 = INT_MAX; // The value of INT_MAX for ival2 just means that the constant value is not 0 or 1
+
+ // cast to unsigned allows test for both 0 and 1
+ if ((cns2->gtOper == GT_CNS_INT) && (((size_t)cns2->gtIntConCommon.IconValue()) <= 1U))
+ {
+ ival2 = (size_t)cns2->gtIntConCommon.IconValue();
+ }
+ else // cast to UINT64 allows test for both 0 and 1
+ if ((cns2->gtOper == GT_CNS_LNG) && (((UINT64)cns2->gtIntConCommon.LngValue()) <= 1ULL))
+ {
+ ival2 = (size_t)cns2->gtIntConCommon.LngValue();
+ }
+
+ if (ival2 != INT_MAX)
+ {
+ // If we don't have a comma and relop, we can't do this optimization
+ //
+ if ((op1->gtOper == GT_COMMA) && (op1->gtOp.gtOp2->OperIsCompare()))
+ {
+ // Here we look for the following transformation
+ //
+ // EQ/NE Possible REVERSE(RELOP)
+ // / \ / \
+ // COMMA CNS 0/1 -> COMMA relop_op2
+ // / \ / \
+ // x RELOP x relop_op1
+ // / \
+ // relop_op1 relop_op2
+ //
+ //
+ //
+ GenTreePtr comma = op1;
+ GenTreePtr relop = comma->gtOp.gtOp2;
+
+ GenTreePtr relop_op1 = relop->gtOp.gtOp1;
+
+ bool reverse = ((ival2 == 0) == (oper == GT_EQ));
+
+ if (reverse)
+ {
+ gtReverseCond(relop);
+ }
+
+ relop->gtOp.gtOp1 = comma;
+ comma->gtOp.gtOp2 = relop_op1;
+
+ // Comma now has fewer nodes underneath it, so we need to regenerate its flags
+ comma->gtFlags &= ~GTF_ALL_EFFECT;
+ comma->gtFlags |= (comma->gtOp.gtOp1->gtFlags) & GTF_ALL_EFFECT;
+ comma->gtFlags |= (comma->gtOp.gtOp2->gtFlags) & GTF_ALL_EFFECT;
+
+ noway_assert((relop->gtFlags & GTF_RELOP_JMP_USED) == 0);
+ noway_assert((relop->gtFlags & GTF_REVERSE_OPS) == 0);
+ relop->gtFlags |=
+ tree->gtFlags & (GTF_RELOP_JMP_USED | GTF_RELOP_QMARK | GTF_DONT_CSE | GTF_ALL_EFFECT);
+
+ return relop;
+ }
+
+ if (op1->gtOper == GT_COMMA)
+ {
+ // Here we look for the following tree
+ // and when the LCL_VAR is a temp we can fold the tree:
+ //
+ // EQ/NE EQ/NE
+ // / \ / \
+ // COMMA CNS 0/1 -> RELOP CNS 0/1
+ // / \ / \
+ // ASG LCL_VAR
+ // / \
+ // LCL_VAR RELOP
+ // / \
+ //
+
+ GenTreePtr asg = op1->gtOp.gtOp1;
+ GenTreePtr lcl = op1->gtOp.gtOp2;
+
+ /* Make sure that the left side of the comma is the assignment of the LCL_VAR */
+ if (asg->gtOper != GT_ASG)
+ {
+ goto SKIP;
+ }
+
+ /* The right side of the comma must be a LCL_VAR temp */
+ if (lcl->gtOper != GT_LCL_VAR)
+ {
+ goto SKIP;
+ }
+
+ unsigned lclNum = lcl->gtLclVarCommon.gtLclNum;
+ noway_assert(lclNum < lvaCount);
+
+ /* If the LCL_VAR is not a temp then bail, a temp has a single def */
+ if (!lvaTable[lclNum].lvIsTemp)
+ {
+ goto SKIP;
+ }
+
+#if FEATURE_ANYCSE
+ /* If the LCL_VAR is a CSE temp then bail, it could have multiple defs/uses */
+ // Fix 383856 X86/ARM ILGEN
+ if (lclNumIsCSE(lclNum))
+ {
+ goto SKIP;
+ }
+#endif
+
+ /* We also must be assigning the result of a RELOP */
+ if (asg->gtOp.gtOp1->gtOper != GT_LCL_VAR)
+ {
+ goto SKIP;
+ }
+
+ /* Both of the LCL_VAR must match */
+ if (asg->gtOp.gtOp1->gtLclVarCommon.gtLclNum != lclNum)
+ {
+ goto SKIP;
+ }
+
+ /* If right side of asg is not a RELOP then skip */
+ if (!asg->gtOp.gtOp2->OperIsCompare())
+ {
+ goto SKIP;
+ }
+
+ LclVarDsc* varDsc = lvaTable + lclNum;
+
+ /* Set op1 to the right side of asg, (i.e. the RELOP) */
+ op1 = asg->gtOp.gtOp2;
+
+ DEBUG_DESTROY_NODE(asg->gtOp.gtOp1);
+ DEBUG_DESTROY_NODE(lcl);
+
+ /* This local variable should never be used again */
+ // <BUGNUM>
+ // VSW 184221: Make RefCnt to zero to indicate that this local var
+ // is not used any more. (Keey the lvType as is.)
+ // Otherwise lvOnFrame will be set to true in Compiler::raMarkStkVars
+ // And then emitter::emitEndCodeGen will assert in the following line:
+ // noway_assert( dsc->lvTracked);
+ // </BUGNUM>
+ noway_assert(varDsc->lvRefCnt == 0 || // lvRefCnt may not have been set yet.
+ varDsc->lvRefCnt == 2 // Or, we assume this tmp should only be used here,
+ // and it only shows up twice.
+ );
+ lvaTable[lclNum].lvRefCnt = 0;
+ lvaTable[lclNum].lvaResetSortAgainFlag(this);
+ }
+
+ if (op1->OperIsCompare())
+ {
+ // Here we look for the following tree
+ //
+ // EQ/NE -> RELOP/!RELOP
+ // / \ / \
+ // RELOP CNS 0/1
+ // / \
+ //
+ // Note that we will remove/destroy the EQ/NE node and move
+ // the RELOP up into it's location.
+
+ /* Here we reverse the RELOP if necessary */
+
+ bool reverse = ((ival2 == 0) == (oper == GT_EQ));
+
+ if (reverse)
+ {
+ gtReverseCond(op1);
+ }
+
+ /* Propagate gtType of tree into op1 in case it is TYP_BYTE for setcc optimization */
+ op1->gtType = tree->gtType;
+
+ noway_assert((op1->gtFlags & GTF_RELOP_JMP_USED) == 0);
+ op1->gtFlags |= tree->gtFlags & (GTF_RELOP_JMP_USED | GTF_RELOP_QMARK | GTF_DONT_CSE);
+
+ DEBUG_DESTROY_NODE(tree);
+ return op1;
+ }
+
+ //
+ // Now we check for a compare with the result of an '&' operator
+ //
+ // Here we look for the following transformation:
+ //
+ // EQ/NE EQ/NE
+ // / \ / \
+ // AND CNS 0/1 -> AND CNS 0
+ // / \ / \
+ // RSZ/RSH CNS 1 x CNS (1 << y)
+ // / \
+ // x CNS_INT +y
+
+ if (op1->gtOper == GT_AND)
+ {
+ GenTreePtr andOp = op1;
+ GenTreePtr rshiftOp = andOp->gtOp.gtOp1;
+
+ if ((rshiftOp->gtOper != GT_RSZ) && (rshiftOp->gtOper != GT_RSH))
+ {
+ goto SKIP;
+ }
+
+ if (!rshiftOp->gtOp.gtOp2->IsCnsIntOrI())
+ {
+ goto SKIP;
+ }
+
+ ssize_t shiftAmount = rshiftOp->gtOp.gtOp2->gtIntCon.gtIconVal;
+
+ if (shiftAmount < 0)
+ {
+ goto SKIP;
+ }
+
+ if (!andOp->gtOp.gtOp2->IsIntegralConst(1))
+ {
+ goto SKIP;
+ }
+
+ if (andOp->gtType == TYP_INT)
+ {
+ if (shiftAmount > 31)
+ {
+ goto SKIP;
+ }
+
+ UINT32 newAndOperand = ((UINT32)1) << shiftAmount;
+
+ andOp->gtOp.gtOp2->gtIntCon.gtIconVal = newAndOperand;
+
+ // Reverse the cond if necessary
+ if (ival2 == 1)
+ {
+ gtReverseCond(tree);
+ cns2->gtIntCon.gtIconVal = 0;
+ oper = tree->gtOper;
+ }
+ }
+ else if (andOp->gtType == TYP_LONG)
+ {
+ if (shiftAmount > 63)
+ {
+ goto SKIP;
+ }
+
+ UINT64 newAndOperand = ((UINT64)1) << shiftAmount;
+
+ andOp->gtOp.gtOp2->gtIntConCommon.SetLngValue(newAndOperand);
+
+ // Reverse the cond if necessary
+ if (ival2 == 1)
+ {
+ gtReverseCond(tree);
+ cns2->gtIntConCommon.SetLngValue(0);
+ oper = tree->gtOper;
+ }
+ }
+
+ andOp->gtOp.gtOp1 = rshiftOp->gtOp.gtOp1;
+
+ DEBUG_DESTROY_NODE(rshiftOp->gtOp.gtOp2);
+ DEBUG_DESTROY_NODE(rshiftOp);
+ }
+ } // END if (ival2 != INT_MAX)
+
+ SKIP:
+ /* Now check for compares with small constant longs that can be cast to int */
+
+ if (!cns2->OperIsConst())
+ {
+ goto COMPARE;
+ }
+
+ if (cns2->TypeGet() != TYP_LONG)
+ {
+ goto COMPARE;
+ }
+
+ /* Is the constant 31 bits or smaller? */
+
+ if ((cns2->gtIntConCommon.LngValue() >> 31) != 0)
+ {
+ goto COMPARE;
+ }
+
+ /* Is the first comparand mask operation of type long ? */
+
+ if (op1->gtOper != GT_AND)
+ {
+ /* Another interesting case: cast from int */
+
+ if (op1->gtOper == GT_CAST && op1->CastFromType() == TYP_INT &&
+ !gtIsActiveCSE_Candidate(op1) && // op1 cannot be a CSE candidate
+ !op1->gtOverflow()) // cannot be an overflow checking cast
+ {
+ /* Simply make this into an integer comparison */
+
+ tree->gtOp.gtOp1 = op1->gtCast.CastOp();
+ tree->gtOp.gtOp2 = gtNewIconNode((int)cns2->gtIntConCommon.LngValue(), TYP_INT);
+ }
+
+ goto COMPARE;
+ }
+
+ noway_assert(op1->TypeGet() == TYP_LONG && op1->OperGet() == GT_AND);
+
+ /* Is the result of the mask effectively an INT ? */
+
+ GenTreePtr andMask;
+ andMask = op1->gtOp.gtOp2;
+ if (andMask->gtOper != GT_CNS_NATIVELONG)
+ {
+ goto COMPARE;
+ }
+ if ((andMask->gtIntConCommon.LngValue() >> 32) != 0)
+ {
+ goto COMPARE;
+ }
+
+ /* Now we know that we can cast gtOp.gtOp1 of AND to int */
+
+ op1->gtOp.gtOp1 = gtNewCastNode(TYP_INT, op1->gtOp.gtOp1, TYP_INT);
+
+ /* now replace the mask node (gtOp.gtOp2 of AND node) */
+
+ noway_assert(andMask == op1->gtOp.gtOp2);
+
+ ival1 = (int)andMask->gtIntConCommon.LngValue();
+ andMask->SetOper(GT_CNS_INT);
+ andMask->gtType = TYP_INT;
+ andMask->gtIntCon.gtIconVal = ival1;
+
+ /* now change the type of the AND node */
+
+ op1->gtType = TYP_INT;
+
+ /* finally we replace the comparand */
+
+ ival2 = (int)cns2->gtIntConCommon.LngValue();
+ cns2->SetOper(GT_CNS_INT);
+ cns2->gtType = TYP_INT;
+
+ noway_assert(cns2 == op2);
+ cns2->gtIntCon.gtIconVal = ival2;
+
+ goto COMPARE;
+
+ case GT_LT:
+ case GT_LE:
+ case GT_GE:
+ case GT_GT:
+
+ if ((tree->gtFlags & GTF_UNSIGNED) == 0)
+ {
+ if (op2->gtOper == GT_CNS_INT)
+ {
+ cns2 = op2;
+ /* Check for "expr relop 1" */
+ if (cns2->IsIntegralConst(1))
+ {
+ /* Check for "expr >= 1" */
+ if (oper == GT_GE)
+ {
+ /* Change to "expr > 0" */
+ oper = GT_GT;
+ goto SET_OPER;
+ }
+ /* Check for "expr < 1" */
+ else if (oper == GT_LT)
+ {
+ /* Change to "expr <= 0" */
+ oper = GT_LE;
+ goto SET_OPER;
+ }
+ }
+ /* Check for "expr relop -1" */
+ else if (cns2->IsIntegralConst(-1) && ((oper == GT_LE) || (oper == GT_GT)))
+ {
+ /* Check for "expr <= -1" */
+ if (oper == GT_LE)
+ {
+ /* Change to "expr < 0" */
+ oper = GT_LT;
+ goto SET_OPER;
+ }
+ /* Check for "expr > -1" */
+ else if (oper == GT_GT)
+ {
+ /* Change to "expr >= 0" */
+ oper = GT_GE;
+
+ SET_OPER:
+ // IF we get here we should be changing 'oper'
+ assert(tree->OperGet() != oper);
+
+ // Keep the old ValueNumber for 'tree' as the new expr
+ // will still compute the same value as before
+ tree->SetOper(oper, GenTree::PRESERVE_VN);
+ cns2->gtIntCon.gtIconVal = 0;
+
+ // vnStore is null before the ValueNumber phase has run
+ if (vnStore != nullptr)
+ {
+ // Update the ValueNumber for 'cns2', as we just changed it to 0
+ fgValueNumberTreeConst(cns2);
+ }
+
+ op2 = tree->gtOp.gtOp2 = gtFoldExpr(op2);
+ }
+ }
+ }
+ }
+
+ COMPARE:
+
+ noway_assert(tree->OperKind() & GTK_RELOP);
+
+ /* Check if the result of the comparison is used for a jump.
+ * If not then only the int (i.e. 32 bit) case is handled in
+ * the code generator through the (x86) "set" instructions.
+ * For the rest of the cases, the simplest way is to
+ * "simulate" the comparison with ?:
+ *
+ * On ARM, we previously used the IT instruction, but the IT instructions
+ * have mostly been declared obsolete and off-limits, so all cases on ARM
+ * get converted to ?: */
+
+ if (!(tree->gtFlags & GTF_RELOP_JMP_USED) && fgMorphRelopToQmark(op1))
+ {
+ /* We convert it to "(CMP_TRUE) ? (1):(0)" */
+
+ op1 = tree;
+ op1->gtFlags |= (GTF_RELOP_JMP_USED | GTF_RELOP_QMARK | GTF_DONT_CSE);
+ op1->gtRequestSetFlags();
+
+ op2 = new (this, GT_COLON) GenTreeColon(TYP_INT, gtNewIconNode(1), gtNewIconNode(0));
+ op2 = fgMorphTree(op2);
+
+ tree = gtNewQmarkNode(TYP_INT, op1, op2);
+
+ fgMorphTreeDone(tree);
+
+ return tree;
+ }
+ break;
+
+ case GT_QMARK:
+
+ /* If op1 is a comma throw node then we won't be keeping op2 */
+ if (fgIsCommaThrow(op1))
+ {
+ break;
+ }
+
+ /* Get hold of the two branches */
+
+ noway_assert(op2->OperGet() == GT_COLON);
+ elseNode = op2->AsColon()->ElseNode();
+ thenNode = op2->AsColon()->ThenNode();
+
+ /* Try to hoist assignments out of qmark colon constructs.
+ ie. replace (cond?(x=a):(x=b)) with (x=(cond?a:b)). */
+
+ if (tree->TypeGet() == TYP_VOID && thenNode->OperGet() == GT_ASG && elseNode->OperGet() == GT_ASG &&
+ thenNode->TypeGet() != TYP_LONG && GenTree::Compare(thenNode->gtOp.gtOp1, elseNode->gtOp.gtOp1) &&
+ thenNode->gtOp.gtOp2->TypeGet() == elseNode->gtOp.gtOp2->TypeGet())
+ {
+ noway_assert(thenNode->TypeGet() == elseNode->TypeGet());
+
+ GenTreePtr asg = thenNode;
+ GenTreePtr colon = op2;
+ colon->gtOp.gtOp1 = thenNode->gtOp.gtOp2;
+ colon->gtOp.gtOp2 = elseNode->gtOp.gtOp2;
+ tree->gtType = colon->gtType = asg->gtOp.gtOp2->gtType;
+ asg->gtOp.gtOp2 = tree;
+
+ // Asg will have all the flags that the QMARK had
+ asg->gtFlags |= (tree->gtFlags & GTF_ALL_EFFECT);
+
+ // Colon flag won't have the flags that x had.
+ colon->gtFlags &= ~GTF_ALL_EFFECT;
+ colon->gtFlags |= (colon->gtOp.gtOp1->gtFlags | colon->gtOp.gtOp2->gtFlags) & GTF_ALL_EFFECT;
+
+ DEBUG_DESTROY_NODE(elseNode->gtOp.gtOp1);
+ DEBUG_DESTROY_NODE(elseNode);
+
+ return asg;
+ }
+
+ /* If the 'else' branch is empty swap the two branches and reverse the condition */
+
+ if (elseNode->IsNothingNode())
+ {
+ /* This can only happen for VOID ?: */
+ noway_assert(op2->gtType == TYP_VOID);
+
+ /* If the thenNode and elseNode are both nop nodes then optimize away the QMARK */
+ if (thenNode->IsNothingNode())
+ {
+ // We may be able to throw away op1 (unless it has side-effects)
+
+ if ((op1->gtFlags & GTF_SIDE_EFFECT) == 0)
+ {
+ /* Just return a a Nop Node */
+ return thenNode;
+ }
+ else
+ {
+ /* Just return the relop, but clear the special flags. Note
+ that we can't do that for longs and floats (see code under
+ COMPARE label above) */
+
+ if (!fgMorphRelopToQmark(op1->gtOp.gtOp1))
+ {
+ op1->gtFlags &= ~(GTF_RELOP_QMARK | GTF_RELOP_JMP_USED);
+ return op1;
+ }
+ }
+ }
+ else
+ {
+ GenTreePtr tmp = elseNode;
+
+ op2->AsColon()->ElseNode() = elseNode = thenNode;
+ op2->AsColon()->ThenNode() = thenNode = tmp;
+ gtReverseCond(op1);
+ }
+ }
+
+#if !defined(_TARGET_ARM_)
+ // If we have (cond)?0:1, then we just return "cond" for TYP_INTs
+ //
+ // Don't do this optimization for ARM: we always require assignment
+ // to boolean to remain ?:, since we don't have any way to generate
+ // this with straight-line code, like x86 does using setcc (at least
+ // after the IT instruction is deprecated).
+
+ if (genActualType(op1->gtOp.gtOp1->gtType) == TYP_INT && genActualType(typ) == TYP_INT &&
+ thenNode->gtOper == GT_CNS_INT && elseNode->gtOper == GT_CNS_INT)
+ {
+ ival1 = thenNode->gtIntCon.gtIconVal;
+ ival2 = elseNode->gtIntCon.gtIconVal;
+
+ // Is one constant 0 and the other 1?
+ if ((ival1 | ival2) == 1 && (ival1 & ival2) == 0)
+ {
+ // If the constants are {1, 0}, reverse the condition
+ if (ival1 == 1)
+ {
+ gtReverseCond(op1);
+ }
+
+ // Unmark GTF_RELOP_JMP_USED on the condition node so it knows that it
+ // needs to materialize the result as a 0 or 1.
+ noway_assert(op1->gtFlags & (GTF_RELOP_QMARK | GTF_RELOP_JMP_USED));
+ op1->gtFlags &= ~(GTF_RELOP_QMARK | GTF_RELOP_JMP_USED);
+
+ DEBUG_DESTROY_NODE(tree);
+ DEBUG_DESTROY_NODE(op2);
+
+ return op1;
+ }
+ }
+#endif // !_TARGET_ARM_
+
+ break; // end case GT_QMARK
+
+ case GT_MUL:
+
+#ifndef _TARGET_64BIT_
+ if (typ == TYP_LONG)
+ {
+ // This must be GTF_MUL_64RSLT
+ assert(tree->gtIsValid64RsltMul());
+ return tree;
+ }
+#endif // _TARGET_64BIT_
+ goto CM_OVF_OP;
+
+ case GT_SUB:
+
+ if (tree->gtOverflow())
+ {
+ goto CM_OVF_OP;
+ }
+
+ /* Check for "op1 - cns2" , we change it to "op1 + (-cns2)" */
+
+ noway_assert(op2);
+ if (op2->IsCnsIntOrI())
+ {
+ /* Negate the constant and change the node to be "+" */
+
+ op2->gtIntConCommon.SetIconValue(-op2->gtIntConCommon.IconValue());
+ oper = GT_ADD;
+ tree->ChangeOper(oper);
+ goto CM_ADD_OP;
+ }
+
+ /* Check for "cns1 - op2" , we change it to "(cns1 + (-op2))" */
+
+ noway_assert(op1);
+ if (op1->IsCnsIntOrI())
+ {
+ noway_assert(varTypeIsIntOrI(tree));
+
+ tree->gtOp.gtOp2 = op2 =
+ gtNewOperNode(GT_NEG, tree->gtType, op2); // The type of the new GT_NEG node should be the same
+ // as the type of the tree, i.e. tree->gtType.
+ fgMorphTreeDone(op2);
+
+ oper = GT_ADD;
+ tree->ChangeOper(oper);
+ goto CM_ADD_OP;
+ }
+
+ /* No match - exit */
+
+ break;
+
+#ifdef _TARGET_ARM64_
+ case GT_DIV:
+ if (!varTypeIsFloating(tree->gtType))
+ {
+ // Codegen for this instruction needs to be able to throw two exceptions:
+ fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_OVERFLOW, fgPtrArgCntCur);
+ fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_DIV_BY_ZERO, fgPtrArgCntCur);
+ }
+ break;
+ case GT_UDIV:
+ // Codegen for this instruction needs to be able to throw one exception:
+ fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_DIV_BY_ZERO, fgPtrArgCntCur);
+ break;
+#endif
+
+ case GT_ADD:
+
+ CM_OVF_OP:
+ if (tree->gtOverflow())
+ {
+ tree->gtRequestSetFlags();
+
+ // Add the excptn-throwing basic block to jump to on overflow
+
+ fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_OVERFLOW, fgPtrArgCntCur);
+
+ // We can't do any commutative morphing for overflow instructions
+
+ break;
+ }
+
+ CM_ADD_OP:
+
+ case GT_OR:
+ case GT_XOR:
+ case GT_AND:
+
+ /* Commute any non-REF constants to the right */
+
+ noway_assert(op1);
+ if (op1->OperIsConst() && (op1->gtType != TYP_REF))
+ {
+ // TODO-Review: We used to assert here that
+ // noway_assert(!op2->OperIsConst() || !opts.OptEnabled(CLFLG_CONSTANTFOLD));
+ // With modifications to AddrTaken==>AddrExposed, we did more assertion propagation,
+ // and would sometimes hit this assertion. This may indicate a missed "remorph".
+ // Task is to re-enable this assertion and investigate.
+
+ /* Swap the operands */
+ tree->gtOp.gtOp1 = op2;
+ tree->gtOp.gtOp2 = op1;
+
+ op1 = op2;
+ op2 = tree->gtOp.gtOp2;
+ }
+
+ /* See if we can fold GT_ADD nodes. */
+
+ if (oper == GT_ADD)
+ {
+ /* Fold "((x+icon1)+(y+icon2)) to ((x+y)+(icon1+icon2))" */
+
+ if (op1->gtOper == GT_ADD && op2->gtOper == GT_ADD && !gtIsActiveCSE_Candidate(op2) &&
+ op1->gtOp.gtOp2->gtOper == GT_CNS_INT && op2->gtOp.gtOp2->gtOper == GT_CNS_INT &&
+ !op1->gtOverflow() && !op2->gtOverflow())
+ {
+ cns1 = op1->gtOp.gtOp2;
+ cns2 = op2->gtOp.gtOp2;
+ cns1->gtIntCon.gtIconVal += cns2->gtIntCon.gtIconVal;
+#ifdef _TARGET_64BIT_
+ if (cns1->TypeGet() == TYP_INT)
+ {
+ // we need to properly re-sign-extend or truncate after adding two int constants above
+ cns1->AsIntCon()->TruncateOrSignExtend32();
+ }
+#endif //_TARGET_64BIT_
+
+ tree->gtOp.gtOp2 = cns1;
+ DEBUG_DESTROY_NODE(cns2);
+
+ op1->gtOp.gtOp2 = op2->gtOp.gtOp1;
+ op1->gtFlags |= (op1->gtOp.gtOp2->gtFlags & GTF_ALL_EFFECT);
+ DEBUG_DESTROY_NODE(op2);
+ op2 = tree->gtOp.gtOp2;
+ }
+
+ if (op2->IsCnsIntOrI() && varTypeIsIntegralOrI(typ))
+ {
+ /* Fold "((x+icon1)+icon2) to (x+(icon1+icon2))" */
+
+ if (op1->gtOper == GT_ADD && !gtIsActiveCSE_Candidate(op1) && op1->gtOp.gtOp2->IsCnsIntOrI() &&
+ !op1->gtOverflow() && op1->gtOp.gtOp2->OperGet() == op2->OperGet())
+ {
+ cns1 = op1->gtOp.gtOp2;
+ op2->gtIntConCommon.SetIconValue(cns1->gtIntConCommon.IconValue() +
+ op2->gtIntConCommon.IconValue());
+#ifdef _TARGET_64BIT_
+ if (op2->TypeGet() == TYP_INT)
+ {
+ // we need to properly re-sign-extend or truncate after adding two int constants above
+ op2->AsIntCon()->TruncateOrSignExtend32();
+ }
+#endif //_TARGET_64BIT_
+
+ if (cns1->OperGet() == GT_CNS_INT)
+ {
+ op2->gtIntCon.gtFieldSeq =
+ GetFieldSeqStore()->Append(cns1->gtIntCon.gtFieldSeq, op2->gtIntCon.gtFieldSeq);
+ }
+ DEBUG_DESTROY_NODE(cns1);
+
+ tree->gtOp.gtOp1 = op1->gtOp.gtOp1;
+ DEBUG_DESTROY_NODE(op1);
+ op1 = tree->gtOp.gtOp1;
+ }
+
+ // Fold (x + 0).
+
+ if ((op2->gtIntConCommon.IconValue() == 0) && !gtIsActiveCSE_Candidate(tree))
+ {
+
+ // If this addition is adding an offset to a null pointer,
+ // avoid the work and yield the null pointer immediately.
+ // Dereferencing the pointer in either case will have the
+ // same effect.
+
+ if (!gtIsActiveCSE_Candidate(op1) && varTypeIsGC(op2->TypeGet()))
+ {
+ op2->gtType = tree->gtType;
+ DEBUG_DESTROY_NODE(op1);
+ DEBUG_DESTROY_NODE(tree);
+ return op2;
+ }
+
+ // Remove the addition iff it won't change the tree type
+ // to TYP_REF.
+
+ if (!gtIsActiveCSE_Candidate(op2) &&
+ ((op1->TypeGet() == tree->TypeGet()) || (op1->TypeGet() != TYP_REF)))
+ {
+ if (fgGlobalMorph && (op2->OperGet() == GT_CNS_INT) &&
+ (op2->gtIntCon.gtFieldSeq != nullptr) &&
+ (op2->gtIntCon.gtFieldSeq != FieldSeqStore::NotAField()))
+ {
+ fgAddFieldSeqForZeroOffset(op1, op2->gtIntCon.gtFieldSeq);
+ }
+
+ DEBUG_DESTROY_NODE(op2);
+ DEBUG_DESTROY_NODE(tree);
+
+ return op1;
+ }
+ }
+ }
+ }
+ /* See if we can fold GT_MUL by const nodes */
+ else if (oper == GT_MUL && op2->IsCnsIntOrI() && !optValnumCSE_phase)
+ {
+#ifndef _TARGET_64BIT_
+ noway_assert(typ <= TYP_UINT);
+#endif // _TARGET_64BIT_
+ noway_assert(!tree->gtOverflow());
+
+ ssize_t mult = op2->gtIntConCommon.IconValue();
+ bool op2IsConstIndex = op2->OperGet() == GT_CNS_INT && op2->gtIntCon.gtFieldSeq != nullptr &&
+ op2->gtIntCon.gtFieldSeq->IsConstantIndexFieldSeq();
+
+ assert(!op2IsConstIndex || op2->AsIntCon()->gtFieldSeq->m_next == nullptr);
+
+ if (mult == 0)
+ {
+ // We may be able to throw away op1 (unless it has side-effects)
+
+ if ((op1->gtFlags & GTF_SIDE_EFFECT) == 0)
+ {
+ DEBUG_DESTROY_NODE(op1);
+ DEBUG_DESTROY_NODE(tree);
+ return op2; // Just return the "0" node
+ }
+
+ // We need to keep op1 for the side-effects. Hang it off
+ // a GT_COMMA node
+
+ tree->ChangeOper(GT_COMMA);
+ return tree;
+ }
+
+ size_t abs_mult = (mult >= 0) ? mult : -mult;
+ size_t lowestBit = genFindLowestBit(abs_mult);
+ bool changeToShift = false;
+
+ // is it a power of two? (positive or negative)
+ if (abs_mult == lowestBit)
+ {
+ // if negative negate (min-int does not need negation)
+ if (mult < 0 && mult != SSIZE_T_MIN)
+ {
+ tree->gtOp.gtOp1 = op1 = gtNewOperNode(GT_NEG, op1->gtType, op1);
+ fgMorphTreeDone(op1);
+ }
+
+ // If "op2" is a constant array index, the other multiplicand must be a constant.
+ // Transfer the annotation to the other one.
+ if (op2->OperGet() == GT_CNS_INT && op2->gtIntCon.gtFieldSeq != nullptr &&
+ op2->gtIntCon.gtFieldSeq->IsConstantIndexFieldSeq())
+ {
+ assert(op2->gtIntCon.gtFieldSeq->m_next == nullptr);
+ GenTreePtr otherOp = op1;
+ if (otherOp->OperGet() == GT_NEG)
+ {
+ otherOp = otherOp->gtOp.gtOp1;
+ }
+ assert(otherOp->OperGet() == GT_CNS_INT);
+ assert(otherOp->gtIntCon.gtFieldSeq == FieldSeqStore::NotAField());
+ otherOp->gtIntCon.gtFieldSeq = op2->gtIntCon.gtFieldSeq;
+ }
+
+ if (abs_mult == 1)
+ {
+ DEBUG_DESTROY_NODE(op2);
+ DEBUG_DESTROY_NODE(tree);
+ return op1;
+ }
+
+ /* Change the multiplication into a shift by log2(val) bits */
+ op2->gtIntConCommon.SetIconValue(genLog2(abs_mult));
+ changeToShift = true;
+ }
+#if LEA_AVAILABLE
+ else if ((lowestBit > 1) && jitIsScaleIndexMul(lowestBit) && optAvoidIntMult())
+ {
+ int shift = genLog2(lowestBit);
+ ssize_t factor = abs_mult >> shift;
+
+ if (factor == 3 || factor == 5 || factor == 9)
+ {
+ // if negative negate (min-int does not need negation)
+ if (mult < 0 && mult != SSIZE_T_MIN)
+ {
+ tree->gtOp.gtOp1 = op1 = gtNewOperNode(GT_NEG, op1->gtType, op1);
+ fgMorphTreeDone(op1);
+ }
+
+ GenTreePtr factorIcon = gtNewIconNode(factor, TYP_I_IMPL);
+ if (op2IsConstIndex)
+ {
+ factorIcon->AsIntCon()->gtFieldSeq =
+ GetFieldSeqStore()->CreateSingleton(FieldSeqStore::ConstantIndexPseudoField);
+ }
+
+ // change the multiplication into a smaller multiplication (by 3, 5 or 9) and a shift
+ tree->gtOp.gtOp1 = op1 = gtNewOperNode(GT_MUL, tree->gtType, op1, factorIcon);
+ fgMorphTreeDone(op1);
+
+ op2->gtIntConCommon.SetIconValue(shift);
+ changeToShift = true;
+ }
+ }
+#endif // LEA_AVAILABLE
+ if (changeToShift)
+ {
+ // vnStore is null before the ValueNumber phase has run
+ if (vnStore != nullptr)
+ {
+ // Update the ValueNumber for 'op2', as we just changed the constant
+ fgValueNumberTreeConst(op2);
+ }
+ oper = GT_LSH;
+ // Keep the old ValueNumber for 'tree' as the new expr
+ // will still compute the same value as before
+ tree->ChangeOper(oper, GenTree::PRESERVE_VN);
+
+ goto DONE_MORPHING_CHILDREN;
+ }
+ }
+ else if (fgOperIsBitwiseRotationRoot(oper))
+ {
+ tree = fgRecognizeAndMorphBitwiseRotation(tree);
+
+ // fgRecognizeAndMorphBitwiseRotation may return a new tree
+ oper = tree->OperGet();
+ typ = tree->TypeGet();
+ op1 = tree->gtOp.gtOp1;
+ op2 = tree->gtOp.gtOp2;
+ }
+
+ break;
+
+ case GT_CHS:
+ case GT_NOT:
+ case GT_NEG:
+
+ /* Any constant cases should have been folded earlier */
+ noway_assert(!op1->OperIsConst() || !opts.OptEnabled(CLFLG_CONSTANTFOLD) || optValnumCSE_phase);
+ break;
+
+ case GT_CKFINITE:
+
+ noway_assert(varTypeIsFloating(op1->TypeGet()));
+
+ fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_ARITH_EXCPN, fgPtrArgCntCur);
+ break;
+
+ case GT_OBJ:
+ // If we have GT_OBJ(GT_ADDR(X)) and X has GTF_GLOB_REF, we must set GTF_GLOB_REF on
+ // the GT_OBJ. Note that the GTF_GLOB_REF will have been cleared on ADDR(X) where X
+ // is a local or clsVar, even if it has been address-exposed.
+ if (op1->OperGet() == GT_ADDR)
+ {
+ tree->gtFlags |= (op1->gtGetOp1()->gtFlags & GTF_GLOB_REF);
+ }
+ break;
+
+ case GT_IND:
+
+ // Can not remove a GT_IND if it is currently a CSE candidate.
+ if (gtIsActiveCSE_Candidate(tree))
+ {
+ break;
+ }
+
+ bool foldAndReturnTemp;
+ foldAndReturnTemp = false;
+ temp = nullptr;
+ ival1 = 0;
+
+ /* Try to Fold *(&X) into X */
+ if (op1->gtOper == GT_ADDR)
+ {
+ // Can not remove a GT_ADDR if it is currently a CSE candidate.
+ if (gtIsActiveCSE_Candidate(op1))
+ {
+ break;
+ }
+
+ temp = op1->gtOp.gtOp1; // X
+
+ // In the test below, if they're both TYP_STRUCT, this of course does *not* mean that
+ // they are the *same* struct type. In fact, they almost certainly aren't. If the
+ // address has an associated field sequence, that identifies this case; go through
+ // the "lcl_fld" path rather than this one.
+ FieldSeqNode* addrFieldSeq = nullptr; // This is an unused out parameter below.
+ if (typ == temp->TypeGet() && !GetZeroOffsetFieldMap()->Lookup(op1, &addrFieldSeq))
+ {
+ foldAndReturnTemp = true;
+ }
+ else if (temp->OperIsLocal())
+ {
+ unsigned lclNum = temp->gtLclVarCommon.gtLclNum;
+ LclVarDsc* varDsc = &lvaTable[lclNum];
+
+ // We will try to optimize when we have a promoted struct promoted with a zero lvFldOffset
+ if (varDsc->lvPromoted && (varDsc->lvFldOffset == 0))
+ {
+ noway_assert(varTypeIsStruct(varDsc));
+
+ // We will try to optimize when we have a single field struct that is being struct promoted
+ if (varDsc->lvFieldCnt == 1)
+ {
+ unsigned lclNumFld = varDsc->lvFieldLclStart;
+ // just grab the promoted field
+ LclVarDsc* fieldVarDsc = &lvaTable[lclNumFld];
+
+ // Also make sure that the tree type matches the fieldVarType and that it's lvFldOffset
+ // is zero
+ if (fieldVarDsc->TypeGet() == tree->TypeGet() && (fieldVarDsc->lvFldOffset == 0))
+ {
+ // We can just use the existing promoted field LclNum
+ temp->gtLclVarCommon.SetLclNum(lclNumFld);
+ temp->gtType = fieldVarDsc->TypeGet();
+
+ foldAndReturnTemp = true;
+ }
+ }
+ }
+ // If the type of the IND (typ) is a "small int", and the type of the local has the
+ // same width, then we can reduce to just the local variable -- it will be
+ // correctly normalized, and signed/unsigned differences won't matter.
+ //
+ // The below transformation cannot be applied if the local var needs to be normalized on load.
+ else if (varTypeIsSmall(typ) && (genTypeSize(lvaTable[lclNum].lvType) == genTypeSize(typ)) &&
+ !lvaTable[lclNum].lvNormalizeOnLoad())
+ {
+ tree->gtType = temp->gtType;
+ foldAndReturnTemp = true;
+ }
+ else
+ {
+ // Assumes that when Lookup returns "false" it will leave "fieldSeq" unmodified (i.e.
+ // nullptr)
+ assert(fieldSeq == nullptr);
+ bool b = GetZeroOffsetFieldMap()->Lookup(op1, &fieldSeq);
+ assert(b || fieldSeq == nullptr);
+
+ if ((fieldSeq != nullptr) && (temp->OperGet() == GT_LCL_FLD))
+ {
+ // Append the field sequence, change the type.
+ temp->AsLclFld()->gtFieldSeq =
+ GetFieldSeqStore()->Append(temp->AsLclFld()->gtFieldSeq, fieldSeq);
+ temp->gtType = tree->TypeGet();
+
+ foldAndReturnTemp = true;
+ }
+ }
+ // Otherwise will will fold this into a GT_LCL_FLD below
+ // where we check (temp != nullptr)
+ }
+ else // !temp->OperIsLocal()
+ {
+ // We don't try to fold away the GT_IND/GT_ADDR for this case
+ temp = nullptr;
+ }
+ }
+ else if (op1->OperGet() == GT_ADD)
+ {
+ /* Try to change *(&lcl + cns) into lcl[cns] to prevent materialization of &lcl */
+
+ if (op1->gtOp.gtOp1->OperGet() == GT_ADDR && op1->gtOp.gtOp2->OperGet() == GT_CNS_INT &&
+ (!(opts.MinOpts() || opts.compDbgCode)))
+ {
+ // No overflow arithmetic with pointers
+ noway_assert(!op1->gtOverflow());
+
+ temp = op1->gtOp.gtOp1->gtOp.gtOp1;
+ if (!temp->OperIsLocal())
+ {
+ temp = nullptr;
+ break;
+ }
+
+ // Can not remove the GT_ADDR if it is currently a CSE candidate.
+ if (gtIsActiveCSE_Candidate(op1->gtOp.gtOp1))
+ {
+ break;
+ }
+
+ ival1 = op1->gtOp.gtOp2->gtIntCon.gtIconVal;
+ fieldSeq = op1->gtOp.gtOp2->gtIntCon.gtFieldSeq;
+
+ // Does the address have an associated zero-offset field sequence?
+ FieldSeqNode* addrFieldSeq = nullptr;
+ if (GetZeroOffsetFieldMap()->Lookup(op1->gtOp.gtOp1, &addrFieldSeq))
+ {
+ fieldSeq = GetFieldSeqStore()->Append(addrFieldSeq, fieldSeq);
+ }
+
+ if (ival1 == 0 && typ == temp->TypeGet() && temp->TypeGet() != TYP_STRUCT)
+ {
+ noway_assert(!varTypeIsGC(temp->TypeGet()));
+ foldAndReturnTemp = true;
+ }
+ else
+ {
+ // The emitter can't handle large offsets
+ if (ival1 != (unsigned short)ival1)
+ {
+ break;
+ }
+
+ // The emitter can get confused by invalid offsets
+ if (ival1 >= Compiler::lvaLclSize(temp->gtLclVarCommon.gtLclNum))
+ {
+ break;
+ }
+
+#ifdef _TARGET_ARM_
+ // Check for a LclVar TYP_STRUCT with misalignment on a Floating Point field
+ //
+ if (varTypeIsFloating(tree->TypeGet()))
+ {
+ if ((ival1 % emitTypeSize(tree->TypeGet())) != 0)
+ {
+ tree->gtFlags |= GTF_IND_UNALIGNED;
+ break;
+ }
+ }
+#endif
+ }
+ // Now we can fold this into a GT_LCL_FLD below
+ // where we check (temp != nullptr)
+ }
+ }
+
+#ifdef DEBUG
+ // If we have decided to fold, then temp cannot be nullptr
+ if (foldAndReturnTemp)
+ {
+ assert(temp != nullptr);
+ }
+#endif
+
+ if (temp != nullptr)
+ {
+ noway_assert(op1->gtOper == GT_ADD || op1->gtOper == GT_ADDR);
+
+ // If we haven't already decided to fold this expression
+ //
+ if (!foldAndReturnTemp)
+ {
+ noway_assert(temp->OperIsLocal());
+ LclVarDsc* varDsc = &(lvaTable[temp->AsLclVarCommon()->gtLclNum]);
+ // Make sure we don't separately promote the fields of this struct.
+ if (varDsc->lvRegStruct)
+ {
+ // We can enregister, but can't promote.
+ varDsc->lvPromoted = false;
+ }
+ else
+ {
+ lvaSetVarDoNotEnregister(temp->gtLclVarCommon.gtLclNum DEBUGARG(DNER_LocalField));
+ }
+
+ // We will turn a GT_LCL_VAR into a GT_LCL_FLD with an gtLclOffs of 'ival'
+ // or if we already have a GT_LCL_FLD we will adjust the gtLclOffs by adding 'ival'
+ // Then we change the type of the GT_LCL_FLD to match the orginal GT_IND type.
+ //
+ if (temp->OperGet() == GT_LCL_FLD)
+ {
+ temp->AsLclFld()->gtLclOffs += (unsigned short)ival1;
+ temp->AsLclFld()->gtFieldSeq =
+ GetFieldSeqStore()->Append(temp->AsLclFld()->gtFieldSeq, fieldSeq);
+ }
+ else
+ {
+ temp->ChangeOper(GT_LCL_FLD); // Note that this makes the gtFieldSeq "NotAField"...
+ temp->AsLclFld()->gtLclOffs = (unsigned short)ival1;
+ if (fieldSeq != nullptr)
+ { // If it does represent a field, note that.
+ temp->AsLclFld()->gtFieldSeq = fieldSeq;
+ }
+ }
+ temp->gtType = tree->gtType;
+ foldAndReturnTemp = true;
+ }
+
+ assert(foldAndReturnTemp == true);
+
+ // Keep the DONT_CSE flag in sync
+ // (i.e keep the original value of this flag from tree)
+ // as it can be set for 'temp' because a GT_ADDR always marks it for it's op1
+ //
+ temp->gtFlags &= ~GTF_DONT_CSE;
+ temp->gtFlags |= (tree->gtFlags & GTF_DONT_CSE);
+
+ noway_assert(op1->gtOper == GT_ADD || op1->gtOper == GT_ADDR);
+ noway_assert(temp->gtType == tree->gtType);
+
+ if (op1->OperGet() == GT_ADD)
+ {
+ DEBUG_DESTROY_NODE(op1->gtOp.gtOp1); // GT_ADDR
+ DEBUG_DESTROY_NODE(op1->gtOp.gtOp2); // GT_CNS_INT
+ }
+ DEBUG_DESTROY_NODE(op1); // GT_ADD or GT_ADDR
+ DEBUG_DESTROY_NODE(tree); // GT_IND
+
+ return temp;
+ }
+
+ // Only do this optimization when we are in the global optimizer. Doing this after value numbering
+ // could result in an invalid value number for the newly generated GT_IND node.
+ if ((op1->OperGet() == GT_COMMA) && fgGlobalMorph)
+ {
+ // Perform the transform IND(COMMA(x, ..., z)) == COMMA(x, ..., IND(z)).
+ // TBD: this transformation is currently necessary for correctness -- it might
+ // be good to analyze the failures that result if we don't do this, and fix them
+ // in other ways. Ideally, this should be optional.
+ GenTreePtr commaNode = op1;
+ unsigned treeFlags = tree->gtFlags;
+ commaNode->gtType = typ;
+ commaNode->gtFlags = (treeFlags & ~GTF_REVERSE_OPS); // Bashing the GT_COMMA flags here is
+ // dangerous, clear the GTF_REVERSE_OPS at
+ // least.
+#ifdef DEBUG
+ commaNode->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
+#endif
+ while (commaNode->gtOp.gtOp2->gtOper == GT_COMMA)
+ {
+ commaNode = commaNode->gtOp.gtOp2;
+ commaNode->gtType = typ;
+ commaNode->gtFlags = (treeFlags & ~GTF_REVERSE_OPS); // Bashing the GT_COMMA flags here is
+ // dangerous, clear the GTF_REVERSE_OPS at
+ // least.
+#ifdef DEBUG
+ commaNode->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
+#endif
+ }
+ bool wasArrIndex = (tree->gtFlags & GTF_IND_ARR_INDEX) != 0;
+ ArrayInfo arrInfo;
+ if (wasArrIndex)
+ {
+ bool b = GetArrayInfoMap()->Lookup(tree, &arrInfo);
+ assert(b);
+ GetArrayInfoMap()->Remove(tree);
+ }
+ tree = op1;
+ op1 = gtNewOperNode(GT_IND, typ, commaNode->gtOp.gtOp2);
+ op1->gtFlags = treeFlags;
+ if (wasArrIndex)
+ {
+ GetArrayInfoMap()->Set(op1, arrInfo);
+ }
+#ifdef DEBUG
+ op1->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
+#endif
+ commaNode->gtOp.gtOp2 = op1;
+ return tree;
+ }
+
+ break;
+
+ case GT_ADDR:
+
+ // Can not remove op1 if it is currently a CSE candidate.
+ if (gtIsActiveCSE_Candidate(op1))
+ {
+ break;
+ }
+
+ if (op1->OperGet() == GT_IND)
+ {
+ if ((op1->gtFlags & GTF_IND_ARR_INDEX) == 0)
+ {
+ // Can not remove a GT_ADDR if it is currently a CSE candidate.
+ if (gtIsActiveCSE_Candidate(tree))
+ {
+ break;
+ }
+
+ // Perform the transform ADDR(IND(...)) == (...).
+ GenTreePtr addr = op1->gtOp.gtOp1;
+
+ noway_assert(varTypeIsGC(addr->gtType) || addr->gtType == TYP_I_IMPL);
+
+ DEBUG_DESTROY_NODE(op1);
+ DEBUG_DESTROY_NODE(tree);
+
+ return addr;
+ }
+ }
+ else if (op1->OperGet() == GT_OBJ)
+ {
+ // Can not remove a GT_ADDR if it is currently a CSE candidate.
+ if (gtIsActiveCSE_Candidate(tree))
+ {
+ break;
+ }
+
+ // Perform the transform ADDR(OBJ(...)) == (...).
+ GenTreePtr addr = op1->AsObj()->Addr();
+
+ noway_assert(varTypeIsGC(addr->gtType) || addr->gtType == TYP_I_IMPL);
+
+ DEBUG_DESTROY_NODE(op1);
+ DEBUG_DESTROY_NODE(tree);
+
+ return addr;
+ }
+ else if (op1->gtOper == GT_CAST)
+ {
+ GenTreePtr casting = op1->gtCast.CastOp();
+ if (casting->gtOper == GT_LCL_VAR || casting->gtOper == GT_CLS_VAR)
+ {
+ DEBUG_DESTROY_NODE(op1);
+ tree->gtOp.gtOp1 = op1 = casting;
+ }
+ }
+ else if ((op1->gtOper == GT_COMMA) && !optValnumCSE_phase)
+ {
+ // Perform the transform ADDR(COMMA(x, ..., z)) == COMMA(x, ..., ADDR(z)).
+ // (Be sure to mark "z" as an l-value...)
+ GenTreePtr commaNode = op1;
+ while (commaNode->gtOp.gtOp2->gtOper == GT_COMMA)
+ {
+ commaNode = commaNode->gtOp.gtOp2;
+ }
+ // The top-level addr might be annotated with a zeroOffset field.
+ FieldSeqNode* zeroFieldSeq = nullptr;
+ bool isZeroOffset = GetZeroOffsetFieldMap()->Lookup(tree, &zeroFieldSeq);
+ tree = op1;
+ commaNode->gtOp.gtOp2->gtFlags |= GTF_DONT_CSE;
+
+ // If the node we're about to put under a GT_ADDR is an indirection, it
+ // doesn't need to be materialized, since we only want the addressing mode. Because
+ // of this, this GT_IND is not a faulting indirection and we don't have to extract it
+ // as a side effect.
+ GenTree* commaOp2 = commaNode->gtOp.gtOp2;
+ if (commaOp2->OperIsBlk())
+ {
+ commaOp2 = fgMorphBlkToInd(commaOp2->AsBlk(), commaOp2->TypeGet());
+ }
+ if (commaOp2->gtOper == GT_IND)
+ {
+ commaOp2->gtFlags |= GTF_IND_NONFAULTING;
+ }
+
+ op1 = gtNewOperNode(GT_ADDR, TYP_BYREF, commaOp2);
+
+ if (isZeroOffset)
+ {
+ // Transfer the annotation to the new GT_ADDR node.
+ GetZeroOffsetFieldMap()->Set(op1, zeroFieldSeq);
+ }
+ commaNode->gtOp.gtOp2 = op1;
+ // Originally, I gave all the comma nodes type "byref". But the ADDR(IND(x)) == x transform
+ // might give op1 a type different from byref (like, say, native int). So now go back and give
+ // all the comma nodes the type of op1.
+ // TODO: the comma flag update below is conservative and can be improved.
+ // For example, if we made the ADDR(IND(x)) == x transformation, we may be able to
+ // get rid of some of the the IND flags on the COMMA nodes (e.g., GTF_GLOB_REF).
+ commaNode = tree;
+ while (commaNode->gtOper == GT_COMMA)
+ {
+ commaNode->gtType = op1->gtType;
+ commaNode->gtFlags |= op1->gtFlags;
+#ifdef DEBUG
+ commaNode->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
+#endif
+ commaNode = commaNode->gtOp.gtOp2;
+ }
+
+ return tree;
+ }
+
+ /* op1 of a GT_ADDR is an l-value. Only r-values can be CSEed */
+ op1->gtFlags |= GTF_DONT_CSE;
+ break;
+
+ case GT_COLON:
+ if (fgGlobalMorph)
+ {
+ /* Mark the nodes that are conditionally executed */
+ fgWalkTreePre(&tree, gtMarkColonCond);
+ }
+ /* Since we're doing this postorder we clear this if it got set by a child */
+ fgRemoveRestOfBlock = false;
+ break;
+
+ case GT_COMMA:
+
+ /* Special case: trees that don't produce a value */
+ if ((op2->OperKind() & GTK_ASGOP) || (op2->OperGet() == GT_COMMA && op2->TypeGet() == TYP_VOID) ||
+ fgIsThrow(op2))
+ {
+ typ = tree->gtType = TYP_VOID;
+ }
+
+ // If we are in the Valuenum CSE phase then don't morph away anything as these
+ // nodes may have CSE defs/uses in them.
+ //
+ if (!optValnumCSE_phase)
+ {
+ // Extract the side effects from the left side of the comma. Since they don't "go" anywhere, this
+ // is all we need.
+
+ GenTreePtr op1SideEffects = nullptr;
+ // The addition of "GTF_MAKE_CSE" below prevents us from throwing away (for example)
+ // hoisted expressions in loops.
+ gtExtractSideEffList(op1, &op1SideEffects, (GTF_SIDE_EFFECT | GTF_MAKE_CSE));
+ if (op1SideEffects)
+ {
+ // Replace the left hand side with the side effect list.
+ tree->gtOp.gtOp1 = op1SideEffects;
+ tree->gtFlags |= (op1SideEffects->gtFlags & GTF_ALL_EFFECT);
+ }
+ else
+ {
+ /* The left operand is worthless, throw it away */
+ if (lvaLocalVarRefCounted)
+ {
+ lvaRecursiveDecRefCounts(op1);
+ }
+ op2->gtFlags |= (tree->gtFlags & (GTF_DONT_CSE | GTF_LATE_ARG));
+ DEBUG_DESTROY_NODE(tree);
+ DEBUG_DESTROY_NODE(op1);
+ return op2;
+ }
+
+ /* If the right operand is just a void nop node, throw it away */
+ if (op2->IsNothingNode() && op1->gtType == TYP_VOID)
+ {
+ op1->gtFlags |= (tree->gtFlags & (GTF_DONT_CSE | GTF_LATE_ARG));
+ DEBUG_DESTROY_NODE(tree);
+ DEBUG_DESTROY_NODE(op2);
+ return op1;
+ }
+ }
+
+ break;
+
+ case GT_JTRUE:
+
+ /* Special case if fgRemoveRestOfBlock is set to true */
+ if (fgRemoveRestOfBlock)
+ {
+ if (fgIsCommaThrow(op1, true))
+ {
+ GenTreePtr throwNode = op1->gtOp.gtOp1;
+ noway_assert(throwNode->gtType == TYP_VOID);
+
+ return throwNode;
+ }
+
+ noway_assert(op1->OperKind() & GTK_RELOP);
+ noway_assert(op1->gtFlags & GTF_EXCEPT);
+
+ // We need to keep op1 for the side-effects. Hang it off
+ // a GT_COMMA node
+
+ tree->ChangeOper(GT_COMMA);
+ tree->gtOp.gtOp2 = op2 = gtNewNothingNode();
+
+ // Additionally since we're eliminating the JTRUE
+ // codegen won't like it if op1 is a RELOP of longs, floats or doubles.
+ // So we change it into a GT_COMMA as well.
+ op1->ChangeOper(GT_COMMA);
+ op1->gtType = op1->gtOp.gtOp1->gtType;
+
+ return tree;
+ }
+
+ default:
+ break;
+ }
+
+ noway_assert(oper == tree->gtOper);
+
+ // If we are in the Valuenum CSE phase then don't morph away anything as these
+ // nodes may have CSE defs/uses in them.
+ //
+ if (!optValnumCSE_phase && (oper != GT_ASG) && (oper != GT_COLON) && !tree->IsList())
+ {
+ /* Check for op1 as a GT_COMMA with a unconditional throw node */
+ if (op1 && fgIsCommaThrow(op1, true))
+ {
+ if ((op1->gtFlags & GTF_COLON_COND) == 0)
+ {
+ /* We can safely throw out the rest of the statements */
+ fgRemoveRestOfBlock = true;
+ }
+
+ GenTreePtr throwNode = op1->gtOp.gtOp1;
+ noway_assert(throwNode->gtType == TYP_VOID);
+
+ if (oper == GT_COMMA)
+ {
+ /* Both tree and op1 are GT_COMMA nodes */
+ /* Change the tree's op1 to the throw node: op1->gtOp.gtOp1 */
+ tree->gtOp.gtOp1 = throwNode;
+ return tree;
+ }
+ else if (oper != GT_NOP)
+ {
+ if (genActualType(typ) == genActualType(op1->gtType))
+ {
+ /* The types match so, return the comma throw node as the new tree */
+ return op1;
+ }
+ else
+ {
+ if (typ == TYP_VOID)
+ {
+ // Return the throw node
+ return throwNode;
+ }
+ else
+ {
+ GenTreePtr commaOp2 = op1->gtOp.gtOp2;
+
+ // need type of oper to be same as tree
+ if (typ == TYP_LONG)
+ {
+ commaOp2->ChangeOperConst(GT_CNS_NATIVELONG);
+ commaOp2->gtIntConCommon.SetLngValue(0);
+ /* Change the types of oper and commaOp2 to TYP_LONG */
+ op1->gtType = commaOp2->gtType = TYP_LONG;
+ }
+ else if (varTypeIsFloating(typ))
+ {
+ commaOp2->ChangeOperConst(GT_CNS_DBL);
+ commaOp2->gtDblCon.gtDconVal = 0.0;
+ /* Change the types of oper and commaOp2 to TYP_DOUBLE */
+ op1->gtType = commaOp2->gtType = TYP_DOUBLE;
+ }
+ else
+ {
+ commaOp2->ChangeOperConst(GT_CNS_INT);
+ commaOp2->gtIntConCommon.SetIconValue(0);
+ /* Change the types of oper and commaOp2 to TYP_INT */
+ op1->gtType = commaOp2->gtType = TYP_INT;
+ }
+
+ /* Return the GT_COMMA node as the new tree */
+ return op1;
+ }
+ }
+ }
+ }
+
+ /* Check for op2 as a GT_COMMA with a unconditional throw */
+
+ if (op2 && fgIsCommaThrow(op2, true))
+ {
+ if ((op2->gtFlags & GTF_COLON_COND) == 0)
+ {
+ /* We can safely throw out the rest of the statements */
+ fgRemoveRestOfBlock = true;
+ }
+
+ // If op1 has no side-effects
+ if ((op1->gtFlags & GTF_ALL_EFFECT) == 0)
+ {
+ // If tree is an asg node
+ if (tree->OperIsAssignment())
+ {
+ /* Return the throw node as the new tree */
+ return op2->gtOp.gtOp1;
+ }
+
+ if (tree->OperGet() == GT_ARR_BOUNDS_CHECK)
+ {
+ /* Return the throw node as the new tree */
+ return op2->gtOp.gtOp1;
+ }
+
+ // If tree is a comma node
+ if (tree->OperGet() == GT_COMMA)
+ {
+ /* Return the throw node as the new tree */
+ return op2->gtOp.gtOp1;
+ }
+
+ /* for the shift nodes the type of op2 can differ from the tree type */
+ if ((typ == TYP_LONG) && (genActualType(op2->gtType) == TYP_INT))
+ {
+ noway_assert(GenTree::OperIsShiftOrRotate(oper));
+
+ GenTreePtr commaOp2 = op2->gtOp.gtOp2;
+
+ commaOp2->ChangeOperConst(GT_CNS_NATIVELONG);
+ commaOp2->gtIntConCommon.SetLngValue(0);
+
+ /* Change the types of oper and commaOp2 to TYP_LONG */
+ op2->gtType = commaOp2->gtType = TYP_LONG;
+ }
+
+ if ((genActualType(typ) == TYP_INT) &&
+ (genActualType(op2->gtType) == TYP_LONG || varTypeIsFloating(op2->TypeGet())))
+ {
+ // An example case is comparison (say GT_GT) of two longs or floating point values.
+
+ GenTreePtr commaOp2 = op2->gtOp.gtOp2;
+
+ commaOp2->ChangeOperConst(GT_CNS_INT);
+ commaOp2->gtIntCon.gtIconVal = 0;
+ /* Change the types of oper and commaOp2 to TYP_INT */
+ op2->gtType = commaOp2->gtType = TYP_INT;
+ }
+
+ if ((typ == TYP_BYREF) && (genActualType(op2->gtType) == TYP_I_IMPL))
+ {
+ noway_assert(tree->OperGet() == GT_ADD);
+
+ GenTreePtr commaOp2 = op2->gtOp.gtOp2;
+
+ commaOp2->ChangeOperConst(GT_CNS_INT);
+ commaOp2->gtIntCon.gtIconVal = 0;
+ /* Change the types of oper and commaOp2 to TYP_BYREF */
+ op2->gtType = commaOp2->gtType = TYP_BYREF;
+ }
+
+ /* types should now match */
+ noway_assert((genActualType(typ) == genActualType(op2->gtType)));
+
+ /* Return the GT_COMMA node as the new tree */
+ return op2;
+ }
+ }
+ }
+
+ /*-------------------------------------------------------------------------
+ * Optional morphing is done if tree transformations is permitted
+ */
+
+ if ((opts.compFlags & CLFLG_TREETRANS) == 0)
+ {
+ return tree;
+ }
+
+ tree = fgMorphSmpOpOptional(tree->AsOp());
+
+ } // extra scope for gcc workaround
+ return tree;
+}
+#ifdef _PREFAST_
+#pragma warning(pop)
+#endif
+
+GenTree* Compiler::fgMorphSmpOpOptional(GenTreeOp* tree)
+{
+ genTreeOps oper = tree->gtOper;
+ GenTree* op1 = tree->gtOp1;
+ GenTree* op2 = tree->gtOp2;
+ var_types typ = tree->TypeGet();
+
+ if (GenTree::OperIsCommutative(oper))
+ {
+ /* Swap the operands so that the more expensive one is 'op1' */
+
+ if (tree->gtFlags & GTF_REVERSE_OPS)
+ {
+ tree->gtOp1 = op2;
+ tree->gtOp2 = op1;
+
+ op2 = op1;
+ op1 = tree->gtOp1;
+
+ tree->gtFlags &= ~GTF_REVERSE_OPS;
+ }
+
+ if (oper == op2->gtOper)
+ {
+ /* Reorder nested operators at the same precedence level to be
+ left-recursive. For example, change "(a+(b+c))" to the
+ equivalent expression "((a+b)+c)".
+ */
+
+ /* Things are handled differently for floating-point operators */
+
+ if (!varTypeIsFloating(tree->TypeGet()))
+ {
+ fgMoveOpsLeft(tree);
+ op1 = tree->gtOp1;
+ op2 = tree->gtOp2;
+ }
+ }
+ }
+
+#if REARRANGE_ADDS
+
+ /* Change "((x+icon)+y)" to "((x+y)+icon)"
+ Don't reorder floating-point operations */
+
+ if ((oper == GT_ADD) && !tree->gtOverflow() && (op1->gtOper == GT_ADD) && !op1->gtOverflow() &&
+ varTypeIsIntegralOrI(typ))
+ {
+ GenTreePtr ad2 = op1->gtOp.gtOp2;
+
+ if (op2->OperIsConst() == 0 && ad2->OperIsConst() != 0)
+ {
+ // This takes
+ // + (tree)
+ // / \
+ // / \
+ // / \
+ // + (op1) op2
+ // / \
+ // \
+ // ad2
+ //
+ // And it swaps ad2 and op2. If (op2) is varTypeIsGC, then this implies that (tree) is
+ // varTypeIsGC. If (op1) is not, then when we swap (ad2) and (op2), then we have a TYP_INT node
+ // (op1) with a child that is varTypeIsGC. If we encounter that situation, make (op1) the same
+ // type as (tree).
+ //
+ // Also, if (ad2) is varTypeIsGC then (tree) must also be (since op1 is), so no fixing is
+ // necessary
+
+ if (varTypeIsGC(op2->TypeGet()))
+ {
+ noway_assert(varTypeIsGC(typ));
+ op1->gtType = typ;
+ }
+ tree->gtOp2 = ad2;
+
+ op1->gtOp.gtOp2 = op2;
+ op1->gtFlags |= op2->gtFlags & GTF_ALL_EFFECT;
+
+ op2 = tree->gtOp2;
+ }
+ }
+
+#endif
+
+ /*-------------------------------------------------------------------------
+ * Perform optional oper-specific postorder morphing
+ */
+
+ switch (oper)
+ {
+ genTreeOps cmop;
+ bool dstIsSafeLclVar;
+
+ case GT_ASG:
+ /* We'll convert "a = a <op> x" into "a <op>= x" */
+ /* and also "a = x <op> a" into "a <op>= x" for communative ops */
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if !LONG_ASG_OPS
+ if (typ == TYP_LONG)
+ {
+ break;
+ }
+#endif
+
+ if (varTypeIsStruct(typ) && !tree->IsPhiDefn())
+ {
+ if (tree->OperIsCopyBlkOp())
+ {
+ return fgMorphCopyBlock(tree);
+ }
+ else
+ {
+ return fgMorphInitBlock(tree);
+ }
+ }
+
+ /* Make sure we're allowed to do this */
+
+ if (optValnumCSE_phase)
+ {
+ // It is not safe to reorder/delete CSE's
+ break;
+ }
+
+ /* Are we assigning to a GT_LCL_VAR ? */
+
+ dstIsSafeLclVar = (op1->gtOper == GT_LCL_VAR);
+
+ /* If we have a GT_LCL_VAR, then is the address taken? */
+ if (dstIsSafeLclVar)
+ {
+ unsigned lclNum = op1->gtLclVarCommon.gtLclNum;
+ LclVarDsc* varDsc = lvaTable + lclNum;
+
+ noway_assert(lclNum < lvaCount);
+
+ /* Is the address taken? */
+ if (varDsc->lvAddrExposed)
+ {
+ dstIsSafeLclVar = false;
+ }
+ else if (op2->gtFlags & GTF_ASG)
+ {
+ break;
+ }
+ }
+
+ if (!dstIsSafeLclVar)
+ {
+ if (op2->gtFlags & GTF_ASG)
+ {
+ break;
+ }
+
+ if ((op2->gtFlags & GTF_CALL) && (op1->gtFlags & GTF_ALL_EFFECT))
+ {
+ break;
+ }
+ }
+
+ /* Special case: a cast that can be thrown away */
+
+ if (op1->gtOper == GT_IND && op2->gtOper == GT_CAST && !op2->gtOverflow())
+ {
+ var_types srct;
+ var_types cast;
+ var_types dstt;
+
+ srct = op2->gtCast.CastOp()->TypeGet();
+ cast = (var_types)op2->CastToType();
+ dstt = op1->TypeGet();
+
+ /* Make sure these are all ints and precision is not lost */
+
+ if (cast >= dstt && dstt <= TYP_INT && srct <= TYP_INT)
+ {
+ op2 = tree->gtOp2 = op2->gtCast.CastOp();
+ }
+ }
+
+ /* Make sure we have the operator range right */
+
+ noway_assert(GT_SUB == GT_ADD + 1);
+ noway_assert(GT_MUL == GT_ADD + 2);
+ noway_assert(GT_DIV == GT_ADD + 3);
+ noway_assert(GT_MOD == GT_ADD + 4);
+ noway_assert(GT_UDIV == GT_ADD + 5);
+ noway_assert(GT_UMOD == GT_ADD + 6);
+
+ noway_assert(GT_OR == GT_ADD + 7);
+ noway_assert(GT_XOR == GT_ADD + 8);
+ noway_assert(GT_AND == GT_ADD + 9);
+
+ noway_assert(GT_LSH == GT_ADD + 10);
+ noway_assert(GT_RSH == GT_ADD + 11);
+ noway_assert(GT_RSZ == GT_ADD + 12);
+
+ /* Check for a suitable operator on the RHS */
+
+ cmop = op2->OperGet();
+
+ switch (cmop)
+ {
+ case GT_NEG:
+ // GT_CHS only supported for integer types
+ if (varTypeIsFloating(tree->TypeGet()))
+ {
+ break;
+ }
+
+ goto ASG_OP;
+
+ case GT_MUL:
+ // GT_ASG_MUL only supported for floating point types
+ if (!varTypeIsFloating(tree->TypeGet()))
+ {
+ break;
+ }
+
+ __fallthrough;
+
+ case GT_ADD:
+ case GT_SUB:
+ if (op2->gtOverflow())
+ {
+ /* Disable folding into "<op>=" if the result can be
+ visible to anyone as <op> may throw an exception and
+ the assignment should not proceed
+ We are safe with an assignment to a local variables
+ */
+ if (ehBlockHasExnFlowDsc(compCurBB))
+ {
+ break;
+ }
+ if (!dstIsSafeLclVar)
+ {
+ break;
+ }
+ }
+#ifndef _TARGET_AMD64_
+ // This is hard for byte-operations as we need to make
+ // sure both operands are in RBM_BYTE_REGS.
+ if (varTypeIsByte(op2->TypeGet()))
+ break;
+#endif // _TARGET_AMD64_
+ goto ASG_OP;
+
+ case GT_DIV:
+ case GT_UDIV:
+ // GT_ASG_DIV only supported for floating point types
+ if (!varTypeIsFloating(tree->TypeGet()))
+ {
+ break;
+ }
+
+ case GT_LSH:
+ case GT_RSH:
+ case GT_RSZ:
+
+#if LONG_ASG_OPS
+
+ if (typ == TYP_LONG)
+ break;
+#endif
+
+ case GT_OR:
+ case GT_XOR:
+ case GT_AND:
+
+#if LONG_ASG_OPS
+
+ /* TODO: allow non-const long assignment operators */
+
+ if (typ == TYP_LONG && op2->gtOp.gtOp2->gtOper != GT_CNS_LNG)
+ break;
+#endif
+
+ ASG_OP:
+ {
+ bool bReverse = false;
+ bool bAsgOpFoldable = fgShouldCreateAssignOp(tree, &bReverse);
+ if (bAsgOpFoldable)
+ {
+ if (bReverse)
+ {
+ // We will transform this from "a = x <op> a" to "a <op>= x"
+ // so we can now destroy the duplicate "a"
+ DEBUG_DESTROY_NODE(op2->gtOp.gtOp2);
+ op2->gtOp.gtOp2 = op2->gtOp.gtOp1;
+ }
+
+ /* Special case: "x |= -1" and "x &= 0" */
+ if (((cmop == GT_AND) && op2->gtOp.gtOp2->IsIntegralConst(0)) ||
+ ((cmop == GT_OR) && op2->gtOp.gtOp2->IsIntegralConst(-1)))
+ {
+ /* Simply change to an assignment */
+ tree->gtOp2 = op2->gtOp.gtOp2;
+ break;
+ }
+
+ if (cmop == GT_NEG)
+ {
+ /* This is "x = -x;", use the flipsign operator */
+
+ tree->ChangeOper(GT_CHS);
+
+ if (op1->gtOper == GT_LCL_VAR)
+ {
+ op1->gtFlags |= GTF_VAR_USEASG;
+ }
+
+ tree->gtOp2 = gtNewIconNode(0, op1->TypeGet());
+
+ break;
+ }
+
+ if (cmop == GT_RSH && varTypeIsSmall(op1->TypeGet()) && varTypeIsUnsigned(op1->TypeGet()))
+ {
+ // Changing from x = x op y to x op= y when x is a small integer type
+ // makes the op size smaller (originally the op size was 32 bits, after
+ // sign or zero extension of x, and there is an implicit truncation in the
+ // assignment).
+ // This is ok in most cases because the upper bits were
+ // lost when assigning the op result to a small type var,
+ // but it may not be ok for the right shift operation where the higher bits
+ // could be shifted into the lower bits and preserved.
+ // Signed right shift of signed x still works (i.e. (sbyte)((int)(sbyte)x >>signed y) ==
+ // (sbyte)x >>signed y)) as do unsigned right shift ((ubyte)((int)(ubyte)x >>unsigned y) ==
+ // (ubyte)x >>unsigned y), but signed right shift of an unigned small type may give the
+ // wrong
+ // result:
+ // e.g. (ubyte)((int)(ubyte)0xf0 >>signed 4) == 0x0f,
+ // but (ubyte)0xf0 >>signed 4 == 0xff which is incorrect.
+ // The result becomes correct if we use >>unsigned instead of >>signed.
+ noway_assert(op1->TypeGet() == op2->gtOp.gtOp1->TypeGet());
+ cmop = GT_RSZ;
+ }
+
+ /* Replace with an assignment operator */
+ noway_assert(GT_ADD - GT_ADD == GT_ASG_ADD - GT_ASG_ADD);
+ noway_assert(GT_SUB - GT_ADD == GT_ASG_SUB - GT_ASG_ADD);
+ noway_assert(GT_OR - GT_ADD == GT_ASG_OR - GT_ASG_ADD);
+ noway_assert(GT_XOR - GT_ADD == GT_ASG_XOR - GT_ASG_ADD);
+ noway_assert(GT_AND - GT_ADD == GT_ASG_AND - GT_ASG_ADD);
+ noway_assert(GT_LSH - GT_ADD == GT_ASG_LSH - GT_ASG_ADD);
+ noway_assert(GT_RSH - GT_ADD == GT_ASG_RSH - GT_ASG_ADD);
+ noway_assert(GT_RSZ - GT_ADD == GT_ASG_RSZ - GT_ASG_ADD);
+
+ tree->SetOper((genTreeOps)(cmop - GT_ADD + GT_ASG_ADD));
+ tree->gtOp2 = op2->gtOp.gtOp2;
+
+ /* Propagate GTF_OVERFLOW */
+
+ if (op2->gtOverflowEx())
+ {
+ tree->gtType = op2->gtType;
+ tree->gtFlags |= (op2->gtFlags & (GTF_OVERFLOW | GTF_EXCEPT | GTF_UNSIGNED));
+ }
+
+#if FEATURE_SET_FLAGS
+
+ /* Propagate GTF_SET_FLAGS */
+ if (op2->gtSetFlags())
+ {
+ tree->gtRequestSetFlags();
+ }
+
+#endif // FEATURE_SET_FLAGS
+
+ DEBUG_DESTROY_NODE(op2);
+ op2 = tree->gtOp2;
+
+ /* The target is used as well as being defined */
+ if (op1->OperIsLocal())
+ {
+ op1->gtFlags |= GTF_VAR_USEASG;
+ }
+
+#if CPU_HAS_FP_SUPPORT
+ /* Check for the special case "x += y * x;" */
+
+ // GT_ASG_MUL only supported for floating point types
+ if (cmop != GT_ADD && cmop != GT_SUB)
+ {
+ break;
+ }
+
+ if (op2->gtOper == GT_MUL && varTypeIsFloating(tree->TypeGet()))
+ {
+ if (GenTree::Compare(op1, op2->gtOp.gtOp1))
+ {
+ /* Change "x += x * y" into "x *= (y + 1)" */
+
+ op2 = op2->gtOp.gtOp2;
+ }
+ else if (GenTree::Compare(op1, op2->gtOp.gtOp2))
+ {
+ /* Change "x += y * x" into "x *= (y + 1)" */
+
+ op2 = op2->gtOp.gtOp1;
+ }
+ else
+ {
+ break;
+ }
+
+ op1 = gtNewDconNode(1.0);
+
+ /* Now make the "*=" node */
+
+ if (cmop == GT_ADD)
+ {
+ /* Change "x += x * y" into "x *= (y + 1)" */
+
+ tree->gtOp2 = op2 = gtNewOperNode(GT_ADD, tree->TypeGet(), op2, op1);
+ }
+ else
+ {
+ /* Change "x -= x * y" into "x *= (1 - y)" */
+
+ noway_assert(cmop == GT_SUB);
+ tree->gtOp2 = op2 = gtNewOperNode(GT_SUB, tree->TypeGet(), op1, op2);
+ }
+ tree->ChangeOper(GT_ASG_MUL);
+ }
+#endif // CPU_HAS_FP_SUPPORT
+ }
+ }
+
+ break;
+
+ case GT_NOT:
+
+ /* Is the destination identical to the first RHS sub-operand? */
+
+ if (GenTree::Compare(op1, op2->gtOp.gtOp1))
+ {
+ /* This is "x = ~x" which is the same as "x ^= -1"
+ * Transform the node into a GT_ASG_XOR */
+
+ noway_assert(genActualType(typ) == TYP_INT || genActualType(typ) == TYP_LONG);
+
+ op2->gtOp.gtOp2 = (genActualType(typ) == TYP_INT) ? gtNewIconNode(-1) : gtNewLconNode(-1);
+
+ cmop = GT_XOR;
+ goto ASG_OP;
+ }
+
+ break;
+ default:
+ break;
+ }
+
+ break;
+
+ case GT_MUL:
+
+ /* Check for the case "(val + icon) * icon" */
+
+ if (op2->gtOper == GT_CNS_INT && op1->gtOper == GT_ADD)
+ {
+ GenTreePtr add = op1->gtOp.gtOp2;
+
+ if (add->IsCnsIntOrI() && (op2->GetScaleIndexMul() != 0))
+ {
+ if (tree->gtOverflow() || op1->gtOverflow())
+ {
+ break;
+ }
+
+ ssize_t imul = op2->gtIntCon.gtIconVal;
+ ssize_t iadd = add->gtIntCon.gtIconVal;
+
+ /* Change '(val + iadd) * imul' -> '(val * imul) + (iadd * imul)' */
+
+ oper = GT_ADD;
+ tree->ChangeOper(oper);
+
+ op2->gtIntCon.gtIconVal = iadd * imul;
+
+ op1->ChangeOper(GT_MUL);
+
+ add->gtIntCon.gtIconVal = imul;
+#ifdef _TARGET_64BIT_
+ if (add->gtType == TYP_INT)
+ {
+ // we need to properly re-sign-extend or truncate after multiplying two int constants above
+ add->AsIntCon()->TruncateOrSignExtend32();
+ }
+#endif //_TARGET_64BIT_
+ }
+ }
+
+ break;
+
+ case GT_DIV:
+
+ /* For "val / 1", just return "val" */
+
+ if (op2->IsIntegralConst(1))
+ {
+ DEBUG_DESTROY_NODE(tree);
+ return op1;
+ }
+
+ break;
+
+ case GT_LSH:
+
+ /* Check for the case "(val + icon) << icon" */
+
+ if (op2->IsCnsIntOrI() && op1->gtOper == GT_ADD && !op1->gtOverflow())
+ {
+ GenTreePtr cns = op1->gtOp.gtOp2;
+
+ if (cns->IsCnsIntOrI() && (op2->GetScaleIndexShf() != 0))
+ {
+ ssize_t ishf = op2->gtIntConCommon.IconValue();
+ ssize_t iadd = cns->gtIntConCommon.IconValue();
+
+ // printf("Changing '(val+icon1)<<icon2' into '(val<<icon2+icon1<<icon2)'\n");
+
+ /* Change "(val + iadd) << ishf" into "(val<<ishf + iadd<<ishf)" */
+
+ tree->ChangeOper(GT_ADD);
+ ssize_t result = iadd << ishf;
+ op2->gtIntConCommon.SetIconValue(result);
+#ifdef _TARGET_64BIT_
+ if (op1->gtType == TYP_INT)
+ {
+ op2->AsIntCon()->TruncateOrSignExtend32();
+ }
+#endif // _TARGET_64BIT_
+
+ // we are reusing the shift amount node here, but the type we want is that of the shift result
+ op2->gtType = op1->gtType;
+
+ if (cns->gtOper == GT_CNS_INT && cns->gtIntCon.gtFieldSeq != nullptr &&
+ cns->gtIntCon.gtFieldSeq->IsConstantIndexFieldSeq())
+ {
+ assert(cns->gtIntCon.gtFieldSeq->m_next == nullptr);
+ op2->gtIntCon.gtFieldSeq = cns->gtIntCon.gtFieldSeq;
+ }
+
+ op1->ChangeOper(GT_LSH);
+
+ cns->gtIntConCommon.SetIconValue(ishf);
+ }
+ }
+
+ break;
+
+ case GT_XOR:
+
+ if (!optValnumCSE_phase)
+ {
+ /* "x ^ -1" is "~x" */
+
+ if (op2->IsIntegralConst(-1))
+ {
+ tree->ChangeOper(GT_NOT);
+ tree->gtOp2 = nullptr;
+ DEBUG_DESTROY_NODE(op2);
+ }
+ else if (op2->IsIntegralConst(1) && op1->OperIsCompare())
+ {
+ /* "binaryVal ^ 1" is "!binaryVal" */
+ gtReverseCond(op1);
+ DEBUG_DESTROY_NODE(op2);
+ DEBUG_DESTROY_NODE(tree);
+ return op1;
+ }
+ }
+
+ break;
+
+ default:
+ break;
+ }
+ return tree;
+}
+
+// code to generate a magic number and shift amount for the magic number division
+// optimization. This code is previously from UTC where it notes it was taken from
+// _The_PowerPC_Compiler_Writer's_Guide_, pages 57-58.
+// The paper it is based on is "Division by invariant integers using multiplication"
+// by Torbjorn Granlund and Peter L. Montgomery in PLDI 94
+
+template <typename T>
+T GetSignedMagicNumberForDivide(T denom, int* shift /*out*/)
+{
+ // static SMAG smag;
+ const int bits = sizeof(T) * 8;
+ const int bits_minus_1 = bits - 1;
+
+ typedef typename jitstd::make_unsigned<T>::type UT;
+
+ const UT two_nminus1 = UT(1) << bits_minus_1;
+
+ int p;
+ UT absDenom;
+ UT absNc;
+ UT delta;
+ UT q1;
+ UT r1;
+ UT r2;
+ UT q2;
+ UT t;
+ T result_magic;
+ int result_shift;
+ int iters = 0;
+
+ absDenom = abs(denom);
+ t = two_nminus1 + ((unsigned int)denom >> 31);
+ absNc = t - 1 - (t % absDenom); // absolute value of nc
+ p = bits_minus_1; // initialize p
+ q1 = two_nminus1 / absNc; // initialize q1 = 2^p / abs(nc)
+ r1 = two_nminus1 - (q1 * absNc); // initialize r1 = rem(2^p, abs(nc))
+ q2 = two_nminus1 / absDenom; // initialize q1 = 2^p / abs(denom)
+ r2 = two_nminus1 - (q2 * absDenom); // initialize r1 = rem(2^p, abs(denom))
+
+ do
+ {
+ iters++;
+ p++;
+ q1 *= 2; // update q1 = 2^p / abs(nc)
+ r1 *= 2; // update r1 = rem(2^p / abs(nc))
+
+ if (r1 >= absNc)
+ { // must be unsigned comparison
+ q1++;
+ r1 -= absNc;
+ }
+
+ q2 *= 2; // update q2 = 2^p / abs(denom)
+ r2 *= 2; // update r2 = rem(2^p / abs(denom))
+
+ if (r2 >= absDenom)
+ { // must be unsigned comparison
+ q2++;
+ r2 -= absDenom;
+ }
+
+ delta = absDenom - r2;
+ } while (q1 < delta || (q1 == delta && r1 == 0));
+
+ result_magic = q2 + 1; // resulting magic number
+ if (denom < 0)
+ {
+ result_magic = -result_magic;
+ }
+ *shift = p - bits; // resulting shift
+
+ return result_magic;
+}
+
+bool Compiler::fgShouldUseMagicNumberDivide(GenTreeOp* tree)
+{
+#ifdef _TARGET_ARM64_
+ // TODO-ARM64-NYI: We don't have a 'mulHi' implementation yet for ARM64
+ return false;
+#else
+
+ // During the optOptimizeValnumCSEs phase we can call fgMorph and when we do,
+ // if this method returns true we will introduce a new LclVar and
+ // a couple of new GenTree nodes, including an assignment to the new LclVar.
+ // None of these new GenTree nodes will have valid ValueNumbers.
+ // That is an invalid state for a GenTree node during the optOptimizeValnumCSEs phase.
+ //
+ // Also during optAssertionProp when extracting side effects we can assert
+ // during gtBuildCommaList if we have one tree that has Value Numbers
+ // and another one that does not.
+ //
+ if (!fgGlobalMorph)
+ {
+ // We only perform the Magic Number Divide optimization during
+ // the initial global morph phase
+ return false;
+ }
+
+ if (tree->gtFlags & GTF_OVERFLOW)
+ {
+ return false;
+ }
+
+ if (tree->gtOp2->gtOper != GT_CNS_INT && tree->gtOp2->gtOper != GT_CNS_LNG)
+ {
+ return false;
+ }
+
+ ssize_t cons = tree->gtOp2->gtIntConCommon.IconValue();
+
+ if (cons == 0 || cons == -1 || cons == 1)
+ {
+ return false;
+ }
+
+ // codegen will expand these
+ if (cons == SSIZE_T_MIN || isPow2(abs(cons)))
+ {
+ return false;
+ }
+
+ // someone else will fold this away, so don't make it complicated for them
+ if (tree->gtOp1->IsCnsIntOrI())
+ {
+ return false;
+ }
+
+ // There is no technical barrier to handling unsigned, however it is quite rare
+ // and more work to support and test
+ if (tree->gtFlags & GTF_UNSIGNED)
+ {
+ return false;
+ }
+
+ return true;
+#endif
+}
+
+// transform x%c -> x-((x/c)*c)
+
+GenTree* Compiler::fgMorphModByConst(GenTreeOp* tree)
+{
+ assert(fgShouldUseMagicNumberDivide(tree));
+
+ var_types type = tree->gtType;
+
+ GenTree* cns = tree->gtOp2;
+
+ GenTree* numerator = fgMakeMultiUse(&tree->gtOp1);
+
+ tree->SetOper(GT_DIV);
+
+ GenTree* mul = gtNewOperNode(GT_MUL, type, tree, gtCloneExpr(cns));
+
+ GenTree* sub = gtNewOperNode(GT_SUB, type, numerator, mul);
+
+#ifdef DEBUG
+ sub->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
+#endif
+
+ return sub;
+}
+
+// For ARM64 we don't have a remainder instruction,
+// The architecture manual suggests the following transformation to
+// generate code for such operator:
+//
+// a % b = a - (a / b) * b;
+//
+// This method will produce the above expression in 'a' and 'b' are
+// leaf nodes, otherwise, if any of them is not a leaf it will spill
+// its value into a temporary variable, an example:
+// (x * 2 - 1) % (y + 1) -> t1 - (t2 * ( comma(t1 = x * 2 - 1, t1) / comma(t2 = y + 1, t2) ) )
+//
+GenTree* Compiler::fgMorphModToSubMulDiv(GenTreeOp* tree)
+{
+#ifndef _TARGET_ARM64_
+ assert(!"This should only be called for ARM64");
+#endif
+
+ if (tree->OperGet() == GT_MOD)
+ {
+ tree->SetOper(GT_DIV);
+ }
+ else if (tree->OperGet() == GT_UMOD)
+ {
+ tree->SetOper(GT_UDIV);
+ }
+ else
+ {
+ noway_assert(!"Illegal gtOper in fgMorphModToSubMulDiv");
+ }
+
+ var_types type = tree->gtType;
+ GenTree* denominator = tree->gtOp2;
+ GenTree* numerator = tree->gtOp1;
+
+ if (!numerator->OperIsLeaf())
+ {
+ numerator = fgMakeMultiUse(&tree->gtOp1);
+ }
+
+ if (!denominator->OperIsLeaf())
+ {
+ denominator = fgMakeMultiUse(&tree->gtOp2);
+ }
+
+ GenTree* mul = gtNewOperNode(GT_MUL, type, tree, gtCloneExpr(denominator));
+ GenTree* sub = gtNewOperNode(GT_SUB, type, gtCloneExpr(numerator), mul);
+
+#ifdef DEBUG
+ sub->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
+#endif
+
+ return sub;
+}
+
+// Turn a division by a constant into a multiplication by constant + some adjustments
+// see comments on GetSignedMagicNumberForDivide for source of this algorithm.
+// returns: the transformed tree
+
+GenTree* Compiler::fgMorphDivByConst(GenTreeOp* tree)
+{
+ assert(fgShouldUseMagicNumberDivide(tree));
+
+ JITDUMP("doing magic number divide optimization\n");
+
+ int64_t denominator = tree->gtOp2->gtIntConCommon.IconValue();
+ int64_t magic;
+ int shift;
+ var_types type = tree->gtType;
+
+ if (tree->gtType == TYP_INT)
+ {
+ magic = GetSignedMagicNumberForDivide<int32_t>((int32_t)denominator, &shift);
+ }
+ else
+ {
+ magic = GetSignedMagicNumberForDivide<int64_t>((int64_t)denominator, &shift);
+ }
+
+ GenTree* numerator = nullptr;
+
+ // If signs of the denominator and magic number don't match,
+ // we will need to use the numerator again.
+ if (signum(denominator) != signum(magic))
+ {
+ numerator = fgMakeMultiUse(&tree->gtOp1);
+ tree->gtFlags |= GTF_ASG;
+ }
+
+ if (type == TYP_LONG)
+ {
+ tree->gtOp2->gtIntConCommon.SetLngValue(magic);
+ }
+ else
+ {
+ tree->gtOp2->gtIntConCommon.SetIconValue((ssize_t)magic);
+ }
+
+ tree->SetOper(GT_MULHI);
+
+ GenTree* t = tree;
+ GenTree* mulresult = tree;
+
+ JITDUMP("Multiply Result:\n");
+ DISPTREE(mulresult);
+
+ GenTree* adjusted = mulresult;
+
+ if (denominator > 0 && magic < 0)
+ {
+ // add the numerator back in
+ adjusted = gtNewOperNode(GT_ADD, type, mulresult, numerator);
+ }
+ else if (denominator < 0 && magic > 0)
+ {
+ // subtract the numerator off
+ adjusted = gtNewOperNode(GT_SUB, type, mulresult, numerator);
+ }
+ else
+ {
+ adjusted = mulresult;
+ }
+
+ GenTree* result1 = adjusted;
+ if (shift != 0)
+ {
+ result1 = gtNewOperNode(GT_RSH, type, adjusted, gtNewIconNode(shift, TYP_INT));
+ }
+
+ GenTree* secondClone = fgMakeMultiUse(&result1);
+
+ GenTree* result2 = gtNewOperNode(GT_RSZ, type, secondClone, gtNewIconNode(genTypeSize(type) * 8 - 1, type));
+
+ GenTree* result = gtNewOperNode(GT_ADD, type, result1, result2);
+ JITDUMP("Final Magic Number divide:\n");
+ DISPTREE(result);
+
+#ifdef DEBUG
+ result->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
+#endif
+
+ return result;
+}
+
+//------------------------------------------------------------------------------
+// fgOperIsBitwiseRotationRoot : Check if the operation can be a root of a bitwise rotation tree.
+//
+//
+// Arguments:
+// oper - Operation to check
+//
+// Return Value:
+// True if the operation can be a root of a bitwise rotation tree; false otherwise.
+
+bool Compiler::fgOperIsBitwiseRotationRoot(genTreeOps oper)
+{
+ return (oper == GT_OR) || (oper == GT_XOR);
+}
+
+//------------------------------------------------------------------------------
+// fgRecognizeAndMorphBitwiseRotation : Check if the tree represents a left or right rotation. If so, return
+// an equivalent GT_ROL or GT_ROR tree; otherwise, return the original tree.
+//
+// Arguments:
+// tree - tree to check for a rotation pattern
+//
+// Return Value:
+// An equivalent GT_ROL or GT_ROR tree if a pattern is found; original tree otherwise.
+//
+// Assumption:
+// The input is a GT_OR or a GT_XOR tree.
+
+GenTreePtr Compiler::fgRecognizeAndMorphBitwiseRotation(GenTreePtr tree)
+{
+#ifndef LEGACY_BACKEND
+ //
+ // Check for a rotation pattern, e.g.,
+ //
+ // OR ROL
+ // / \ / \
+ // LSH RSZ -> x y
+ // / \ / \
+ // x AND x AND
+ // / \ / \
+ // y 31 ADD 31
+ // / \
+ // NEG 32
+ // |
+ // y
+ // The patterns recognized:
+ // (x << (y & M)) op (x >>> ((-y + N) & M))
+ // (x >>> ((-y + N) & M)) op (x << (y & M))
+ //
+ // (x << y) op (x >>> (-y + N))
+ // (x >> > (-y + N)) op (x << y)
+ //
+ // (x >>> (y & M)) op (x << ((-y + N) & M))
+ // (x << ((-y + N) & M)) op (x >>> (y & M))
+ //
+ // (x >>> y) op (x << (-y + N))
+ // (x << (-y + N)) op (x >>> y)
+ //
+ // (x << c1) op (x >>> c2)
+ // (x >>> c1) op (x << c2)
+ //
+ // where
+ // c1 and c2 are const
+ // c1 + c2 == bitsize(x)
+ // N == bitsize(x)
+ // M is const
+ // M & (N - 1) == N - 1
+ // op is either | or ^
+
+ if (((tree->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) != 0) || ((tree->gtFlags & GTF_ORDER_SIDEEFF) != 0))
+ {
+ // We can't do anything if the tree has assignments, calls, or volatile
+ // reads. Note that we allow GTF_EXCEPT side effect since any exceptions
+ // thrown by the original tree will be thrown by the transformed tree as well.
+ return tree;
+ }
+
+ genTreeOps oper = tree->OperGet();
+ assert(fgOperIsBitwiseRotationRoot(oper));
+
+ // Check if we have an LSH on one side of the OR and an RSZ on the other side.
+ GenTreePtr op1 = tree->gtGetOp1();
+ GenTreePtr op2 = tree->gtGetOp2();
+ GenTreePtr leftShiftTree = nullptr;
+ GenTreePtr rightShiftTree = nullptr;
+ if ((op1->OperGet() == GT_LSH) && (op2->OperGet() == GT_RSZ))
+ {
+ leftShiftTree = op1;
+ rightShiftTree = op2;
+ }
+ else if ((op1->OperGet() == GT_RSZ) && (op2->OperGet() == GT_LSH))
+ {
+ leftShiftTree = op2;
+ rightShiftTree = op1;
+ }
+ else
+ {
+ return tree;
+ }
+
+ // Check if the trees representing the value to shift are identical.
+ // We already checked that there are no side effects above.
+ if (GenTree::Compare(leftShiftTree->gtGetOp1(), rightShiftTree->gtGetOp1()))
+ {
+ GenTreePtr rotatedValue = leftShiftTree->gtGetOp1();
+ var_types rotatedValueActualType = genActualType(rotatedValue->gtType);
+ ssize_t rotatedValueBitSize = genTypeSize(rotatedValueActualType) * 8;
+ noway_assert((rotatedValueBitSize == 32) || (rotatedValueBitSize == 64));
+ GenTreePtr leftShiftIndex = leftShiftTree->gtGetOp2();
+ GenTreePtr rightShiftIndex = rightShiftTree->gtGetOp2();
+
+ // The shift index may be masked. At least (rotatedValueBitSize - 1) lower bits
+ // shouldn't be masked for the transformation to be valid. If additional
+ // higher bits are not masked, the transformation is still valid since the result
+ // of MSIL shift instructions is unspecified if the shift amount is greater or equal
+ // than the width of the value being shifted.
+ ssize_t minimalMask = rotatedValueBitSize - 1;
+ ssize_t leftShiftMask = -1;
+ ssize_t rightShiftMask = -1;
+
+ if ((leftShiftIndex->OperGet() == GT_AND))
+ {
+ if (leftShiftIndex->gtGetOp2()->IsCnsIntOrI())
+ {
+ leftShiftMask = leftShiftIndex->gtGetOp2()->gtIntCon.gtIconVal;
+ leftShiftIndex = leftShiftIndex->gtGetOp1();
+ }
+ else
+ {
+ return tree;
+ }
+ }
+
+ if ((rightShiftIndex->OperGet() == GT_AND))
+ {
+ if (rightShiftIndex->gtGetOp2()->IsCnsIntOrI())
+ {
+ rightShiftMask = rightShiftIndex->gtGetOp2()->gtIntCon.gtIconVal;
+ rightShiftIndex = rightShiftIndex->gtGetOp1();
+ }
+ else
+ {
+ return tree;
+ }
+ }
+
+ if (((minimalMask & leftShiftMask) != minimalMask) || ((minimalMask & rightShiftMask) != minimalMask))
+ {
+ // The shift index is overmasked, e.g., we have
+ // something like (x << y & 15) or
+ // (x >> (32 - y) & 15 with 32 bit x.
+ // The transformation is not valid.
+ return tree;
+ }
+
+ GenTreePtr shiftIndexWithAdd = nullptr;
+ GenTreePtr shiftIndexWithoutAdd = nullptr;
+ genTreeOps rotateOp = GT_NONE;
+ GenTreePtr rotateIndex = nullptr;
+
+ if (leftShiftIndex->OperGet() == GT_ADD)
+ {
+ shiftIndexWithAdd = leftShiftIndex;
+ shiftIndexWithoutAdd = rightShiftIndex;
+ rotateOp = GT_ROR;
+ }
+ else if (rightShiftIndex->OperGet() == GT_ADD)
+ {
+ shiftIndexWithAdd = rightShiftIndex;
+ shiftIndexWithoutAdd = leftShiftIndex;
+ rotateOp = GT_ROL;
+ }
+
+ if (shiftIndexWithAdd != nullptr)
+ {
+ if (shiftIndexWithAdd->gtGetOp2()->IsCnsIntOrI())
+ {
+ if (shiftIndexWithAdd->gtGetOp2()->gtIntCon.gtIconVal == rotatedValueBitSize)
+ {
+ if (shiftIndexWithAdd->gtGetOp1()->OperGet() == GT_NEG)
+ {
+ if (GenTree::Compare(shiftIndexWithAdd->gtGetOp1()->gtGetOp1(), shiftIndexWithoutAdd))
+ {
+ // We found one of these patterns:
+ // (x << (y & M)) | (x >>> ((-y + N) & M))
+ // (x << y) | (x >>> (-y + N))
+ // (x >>> (y & M)) | (x << ((-y + N) & M))
+ // (x >>> y) | (x << (-y + N))
+ // where N == bitsize(x), M is const, and
+ // M & (N - 1) == N - 1
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifndef _TARGET_64BIT_
+ if (!shiftIndexWithoutAdd->IsCnsIntOrI() && (rotatedValueBitSize == 64))
+ {
+ // TODO: we need to handle variable-sized long shifts specially on x86.
+ // GT_LSH, GT_RSH, and GT_RSZ have helpers for this case. We may need
+ // to add helpers for GT_ROL and GT_ROR.
+ NYI("Rotation of a long value by variable amount");
+ }
+#endif
+
+ rotateIndex = shiftIndexWithoutAdd;
+ }
+ }
+ }
+ }
+ }
+ else if ((leftShiftIndex->IsCnsIntOrI() && rightShiftIndex->IsCnsIntOrI()))
+ {
+ if (leftShiftIndex->gtIntCon.gtIconVal + rightShiftIndex->gtIntCon.gtIconVal == rotatedValueBitSize)
+ {
+ // We found this pattern:
+ // (x << c1) | (x >>> c2)
+ // where c1 and c2 are const and c1 + c2 == bitsize(x)
+ rotateOp = GT_ROL;
+ rotateIndex = leftShiftIndex;
+ }
+ }
+
+ if (rotateIndex != nullptr)
+ {
+ noway_assert(GenTree::OperIsRotate(rotateOp));
+
+ unsigned inputTreeEffects = tree->gtFlags & GTF_ALL_EFFECT;
+
+ // We can use the same tree only during global morph; reusing the tree in a later morph
+ // may invalidate value numbers.
+ if (fgGlobalMorph)
+ {
+ tree->gtOp.gtOp1 = rotatedValue;
+ tree->gtOp.gtOp2 = rotateIndex;
+ tree->ChangeOper(rotateOp);
+ noway_assert(inputTreeEffects == ((rotatedValue->gtFlags | rotateIndex->gtFlags) & GTF_ALL_EFFECT));
+ }
+ else
+ {
+ tree = gtNewOperNode(rotateOp, rotatedValueActualType, rotatedValue, rotateIndex);
+ noway_assert(inputTreeEffects == (tree->gtFlags & GTF_ALL_EFFECT));
+ }
+
+ return tree;
+ }
+ }
+#endif // LEGACY_BACKEND
+ return tree;
+}
+
+#if !CPU_HAS_FP_SUPPORT
+GenTreePtr Compiler::fgMorphToEmulatedFP(GenTreePtr tree)
+{
+
+ genTreeOps oper = tree->OperGet();
+ var_types typ = tree->TypeGet();
+ GenTreePtr op1 = tree->gtOp.gtOp1;
+ GenTreePtr op2 = tree->gtGetOp2();
+
+ /*
+ We have to use helper calls for all FP operations:
+
+ FP operators that operate on FP values
+ casts to and from FP
+ comparisons of FP values
+ */
+
+ if (varTypeIsFloating(typ) || (op1 && varTypeIsFloating(op1->TypeGet())))
+ {
+ int helper;
+ GenTreePtr args;
+ size_t argc = genTypeStSz(typ);
+
+ /* Not all FP operations need helper calls */
+
+ switch (oper)
+ {
+ case GT_ASG:
+ case GT_IND:
+ case GT_LIST:
+ case GT_ADDR:
+ case GT_COMMA:
+ return tree;
+ }
+
+#ifdef DEBUG
+
+ /* If the result isn't FP, it better be a compare or cast */
+
+ if (!(varTypeIsFloating(typ) || tree->OperIsCompare() || oper == GT_CAST))
+ gtDispTree(tree);
+
+ noway_assert(varTypeIsFloating(typ) || tree->OperIsCompare() || oper == GT_CAST);
+#endif
+
+ /* Keep track of how many arguments we're passing */
+
+ fgPtrArgCntCur += argc;
+
+ /* Is this a binary operator? */
+
+ if (op2)
+ {
+ /* Add the second operand to the argument count */
+
+ fgPtrArgCntCur += argc;
+ argc *= 2;
+
+ /* What kind of an operator do we have? */
+
+ switch (oper)
+ {
+ case GT_ADD:
+ helper = CPX_R4_ADD;
+ break;
+ case GT_SUB:
+ helper = CPX_R4_SUB;
+ break;
+ case GT_MUL:
+ helper = CPX_R4_MUL;
+ break;
+ case GT_DIV:
+ helper = CPX_R4_DIV;
+ break;
+ // case GT_MOD: helper = CPX_R4_REM; break;
+
+ case GT_EQ:
+ helper = CPX_R4_EQ;
+ break;
+ case GT_NE:
+ helper = CPX_R4_NE;
+ break;
+ case GT_LT:
+ helper = CPX_R4_LT;
+ break;
+ case GT_LE:
+ helper = CPX_R4_LE;
+ break;
+ case GT_GE:
+ helper = CPX_R4_GE;
+ break;
+ case GT_GT:
+ helper = CPX_R4_GT;
+ break;
+
+ default:
+#ifdef DEBUG
+ gtDispTree(tree);
+#endif
+ noway_assert(!"unexpected FP binary op");
+ break;
+ }
+
+ args = gtNewArgList(tree->gtOp.gtOp2, tree->gtOp.gtOp1);
+ }
+ else
+ {
+ switch (oper)
+ {
+ case GT_RETURN:
+ return tree;
+
+ case GT_CAST:
+ noway_assert(!"FP cast");
+
+ case GT_NEG:
+ helper = CPX_R4_NEG;
+ break;
+
+ default:
+#ifdef DEBUG
+ gtDispTree(tree);
+#endif
+ noway_assert(!"unexpected FP unary op");
+ break;
+ }
+
+ args = gtNewArgList(tree->gtOp.gtOp1);
+ }
+
+ /* If we have double result/operands, modify the helper */
+
+ if (typ == TYP_DOUBLE)
+ {
+ noway_assert(CPX_R4_NEG + 1 == CPX_R8_NEG);
+ noway_assert(CPX_R4_ADD + 1 == CPX_R8_ADD);
+ noway_assert(CPX_R4_SUB + 1 == CPX_R8_SUB);
+ noway_assert(CPX_R4_MUL + 1 == CPX_R8_MUL);
+ noway_assert(CPX_R4_DIV + 1 == CPX_R8_DIV);
+
+ helper++;
+ }
+ else
+ {
+ noway_assert(tree->OperIsCompare());
+
+ noway_assert(CPX_R4_EQ + 1 == CPX_R8_EQ);
+ noway_assert(CPX_R4_NE + 1 == CPX_R8_NE);
+ noway_assert(CPX_R4_LT + 1 == CPX_R8_LT);
+ noway_assert(CPX_R4_LE + 1 == CPX_R8_LE);
+ noway_assert(CPX_R4_GE + 1 == CPX_R8_GE);
+ noway_assert(CPX_R4_GT + 1 == CPX_R8_GT);
+ }
+
+ tree = fgMorphIntoHelperCall(tree, helper, args);
+
+ if (fgPtrArgCntMax < fgPtrArgCntCur)
+ fgPtrArgCntMax = fgPtrArgCntCur;
+
+ fgPtrArgCntCur -= argc;
+ return tree;
+
+ case GT_RETURN:
+
+ if (op1)
+ {
+
+ if (compCurBB == genReturnBB)
+ {
+ /* This is the 'exitCrit' call at the exit label */
+
+ noway_assert(op1->gtType == TYP_VOID);
+ noway_assert(op2 == 0);
+
+ tree->gtOp.gtOp1 = op1 = fgMorphTree(op1);
+
+ return tree;
+ }
+
+ /* This is a (real) return value -- check its type */
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef DEBUG
+ if (genActualType(op1->TypeGet()) != genActualType(info.compRetType))
+ {
+ bool allowMismatch = false;
+
+ // Allow TYP_BYREF to be returned as TYP_I_IMPL and vice versa
+ if ((info.compRetType == TYP_BYREF && genActualType(op1->TypeGet()) == TYP_I_IMPL) ||
+ (op1->TypeGet() == TYP_BYREF && genActualType(info.compRetType) == TYP_I_IMPL))
+ allowMismatch = true;
+
+ if (varTypeIsFloating(info.compRetType) && varTypeIsFloating(op1->TypeGet()))
+ allowMismatch = true;
+
+ if (!allowMismatch)
+ NO_WAY("Return type mismatch");
+ }
+#endif
+ }
+ break;
+ }
+ return tree;
+}
+#endif
+
+/*****************************************************************************
+ *
+ * Transform the given tree for code generation and return an equivalent tree.
+ */
+
+GenTreePtr Compiler::fgMorphTree(GenTreePtr tree, MorphAddrContext* mac)
+{
+ noway_assert(tree);
+ noway_assert(tree->gtOper != GT_STMT);
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ if ((unsigned)JitConfig.JitBreakMorphTree() == tree->gtTreeID)
+ {
+ noway_assert(!"JitBreakMorphTree hit");
+ }
+ }
+#endif
+
+#ifdef DEBUG
+ int thisMorphNum = 0;
+ if (verbose && treesBeforeAfterMorph)
+ {
+ thisMorphNum = morphNum++;
+ printf("\nfgMorphTree (before %d):\n", thisMorphNum);
+ gtDispTree(tree);
+ }
+#endif
+
+/*-------------------------------------------------------------------------
+ * fgMorphTree() can potentially replace a tree with another, and the
+ * caller has to store the return value correctly.
+ * Turn this on to always make copy of "tree" here to shake out
+ * hidden/unupdated references.
+ */
+
+#ifdef DEBUG
+
+ if (compStressCompile(STRESS_GENERIC_CHECK, 0))
+ {
+ GenTreePtr copy;
+
+#ifdef SMALL_TREE_NODES
+ if (GenTree::s_gtNodeSizes[tree->gtOper] == TREE_NODE_SZ_SMALL)
+ {
+ copy = gtNewLargeOperNode(GT_ADD, TYP_INT);
+ }
+ else
+#endif
+ {
+ copy = new (this, GT_CALL) GenTreeCall(TYP_INT);
+ }
+
+ copy->CopyFrom(tree, this);
+
+#if defined(LATE_DISASM)
+ // GT_CNS_INT is considered small, so CopyFrom() won't copy all fields
+ if ((tree->gtOper == GT_CNS_INT) && tree->IsIconHandle())
+ {
+ copy->gtIntCon.gtIconHdl.gtIconHdl1 = tree->gtIntCon.gtIconHdl.gtIconHdl1;
+ copy->gtIntCon.gtIconHdl.gtIconHdl2 = tree->gtIntCon.gtIconHdl.gtIconHdl2;
+ }
+#endif
+
+ DEBUG_DESTROY_NODE(tree);
+ tree = copy;
+ }
+#endif // DEBUG
+
+ if (fgGlobalMorph)
+ {
+ /* Ensure that we haven't morphed this node already */
+ assert(((tree->gtDebugFlags & GTF_DEBUG_NODE_MORPHED) == 0) && "ERROR: Already morphed this node!");
+
+#if LOCAL_ASSERTION_PROP
+ /* Before morphing the tree, we try to propagate any active assertions */
+ if (optLocalAssertionProp)
+ {
+ /* Do we have any active assertions? */
+
+ if (optAssertionCount > 0)
+ {
+ GenTreePtr newTree = tree;
+ while (newTree != nullptr)
+ {
+ tree = newTree;
+ /* newTree is non-Null if we propagated an assertion */
+ newTree = optAssertionProp(apFull, tree, nullptr);
+ }
+ noway_assert(tree != nullptr);
+ }
+ }
+ PREFAST_ASSUME(tree != nullptr);
+#endif
+ }
+
+ /* Save the original un-morphed tree for fgMorphTreeDone */
+
+ GenTreePtr oldTree = tree;
+
+ /* Figure out what kind of a node we have */
+
+ unsigned kind = tree->OperKind();
+
+ /* Is this a constant node? */
+
+ if (kind & GTK_CONST)
+ {
+ tree = fgMorphConst(tree);
+ goto DONE;
+ }
+
+ /* Is this a leaf node? */
+
+ if (kind & GTK_LEAF)
+ {
+ tree = fgMorphLeaf(tree);
+ goto DONE;
+ }
+
+ /* Is it a 'simple' unary/binary operator? */
+
+ if (kind & GTK_SMPOP)
+ {
+ tree = fgMorphSmpOp(tree, mac);
+ goto DONE;
+ }
+
+ /* See what kind of a special operator we have here */
+
+ switch (tree->OperGet())
+ {
+ case GT_FIELD:
+ tree = fgMorphField(tree, mac);
+ break;
+
+ case GT_CALL:
+ tree = fgMorphCall(tree->AsCall());
+ break;
+
+ case GT_ARR_BOUNDS_CHECK:
+#ifdef FEATURE_SIMD
+ case GT_SIMD_CHK:
+#endif // FEATURE_SIMD
+ {
+ fgSetRngChkTarget(tree);
+
+ GenTreeBoundsChk* bndsChk = tree->AsBoundsChk();
+ bndsChk->gtArrLen = fgMorphTree(bndsChk->gtArrLen);
+ bndsChk->gtIndex = fgMorphTree(bndsChk->gtIndex);
+ // If the index is a comma(throw, x), just return that.
+ if (!optValnumCSE_phase && fgIsCommaThrow(bndsChk->gtIndex))
+ {
+ tree = bndsChk->gtIndex;
+ }
+
+ // Propagate effects flags upwards
+ bndsChk->gtFlags |= (bndsChk->gtArrLen->gtFlags & GTF_ALL_EFFECT);
+ bndsChk->gtFlags |= (bndsChk->gtIndex->gtFlags & GTF_ALL_EFFECT);
+
+ // Otherwise, we don't change the tree.
+ }
+ break;
+
+ case GT_ARR_ELEM:
+ tree->gtArrElem.gtArrObj = fgMorphTree(tree->gtArrElem.gtArrObj);
+ tree->gtFlags |= tree->gtArrElem.gtArrObj->gtFlags & GTF_ALL_EFFECT;
+
+ unsigned dim;
+ for (dim = 0; dim < tree->gtArrElem.gtArrRank; dim++)
+ {
+ tree->gtArrElem.gtArrInds[dim] = fgMorphTree(tree->gtArrElem.gtArrInds[dim]);
+ tree->gtFlags |= tree->gtArrElem.gtArrInds[dim]->gtFlags & GTF_ALL_EFFECT;
+ }
+ if (fgGlobalMorph)
+ {
+ fgSetRngChkTarget(tree, false);
+ }
+ break;
+
+ case GT_ARR_OFFSET:
+ tree->gtArrOffs.gtOffset = fgMorphTree(tree->gtArrOffs.gtOffset);
+ tree->gtFlags |= tree->gtArrOffs.gtOffset->gtFlags & GTF_ALL_EFFECT;
+ tree->gtArrOffs.gtIndex = fgMorphTree(tree->gtArrOffs.gtIndex);
+ tree->gtFlags |= tree->gtArrOffs.gtIndex->gtFlags & GTF_ALL_EFFECT;
+ tree->gtArrOffs.gtArrObj = fgMorphTree(tree->gtArrOffs.gtArrObj);
+ tree->gtFlags |= tree->gtArrOffs.gtArrObj->gtFlags & GTF_ALL_EFFECT;
+ if (fgGlobalMorph)
+ {
+ fgSetRngChkTarget(tree, false);
+ }
+ break;
+
+ case GT_CMPXCHG:
+ tree->gtCmpXchg.gtOpLocation = fgMorphTree(tree->gtCmpXchg.gtOpLocation);
+ tree->gtCmpXchg.gtOpValue = fgMorphTree(tree->gtCmpXchg.gtOpValue);
+ tree->gtCmpXchg.gtOpComparand = fgMorphTree(tree->gtCmpXchg.gtOpComparand);
+ break;
+
+ case GT_STORE_DYN_BLK:
+ tree->gtDynBlk.Data() = fgMorphTree(tree->gtDynBlk.Data());
+ __fallthrough;
+ case GT_DYN_BLK:
+ tree->gtDynBlk.Addr() = fgMorphTree(tree->gtDynBlk.Addr());
+ tree->gtDynBlk.gtDynamicSize = fgMorphTree(tree->gtDynBlk.gtDynamicSize);
+ break;
+
+ default:
+#ifdef DEBUG
+ gtDispTree(tree);
+#endif
+ noway_assert(!"unexpected operator");
+ }
+DONE:
+
+ fgMorphTreeDone(tree, oldTree DEBUGARG(thisMorphNum));
+
+ return tree;
+}
+
+#if LOCAL_ASSERTION_PROP
+/*****************************************************************************
+ *
+ * Kill all dependent assertions with regard to lclNum.
+ *
+ */
+
+void Compiler::fgKillDependentAssertions(unsigned lclNum DEBUGARG(GenTreePtr tree))
+{
+ LclVarDsc* varDsc = &lvaTable[lclNum];
+
+ if (varDsc->lvPromoted)
+ {
+ noway_assert(varTypeIsStruct(varDsc));
+
+ // Kill the field locals.
+ for (unsigned i = varDsc->lvFieldLclStart; i < varDsc->lvFieldLclStart + varDsc->lvFieldCnt; ++i)
+ {
+ fgKillDependentAssertions(i DEBUGARG(tree));
+ }
+
+ // Fall through to kill the struct local itself.
+ }
+
+ /* All dependent assertions are killed here */
+
+ ASSERT_TP killed = BitVecOps::MakeCopy(apTraits, GetAssertionDep(lclNum));
+
+ if (killed)
+ {
+ AssertionIndex index = optAssertionCount;
+ while (killed && (index > 0))
+ {
+ if (BitVecOps::IsMember(apTraits, killed, index - 1))
+ {
+#ifdef DEBUG
+ AssertionDsc* curAssertion = optGetAssertion(index);
+ noway_assert((curAssertion->op1.lcl.lclNum == lclNum) ||
+ ((curAssertion->op2.kind == O2K_LCLVAR_COPY) && (curAssertion->op2.lcl.lclNum == lclNum)));
+ if (verbose)
+ {
+ printf("\nThe assignment ");
+ printTreeID(tree);
+ printf(" using V%02u removes: ", curAssertion->op1.lcl.lclNum);
+ optPrintAssertion(curAssertion);
+ }
+#endif
+ // Remove this bit from the killed mask
+ BitVecOps::RemoveElemD(apTraits, killed, index - 1);
+
+ optAssertionRemove(index);
+ }
+
+ index--;
+ }
+
+ // killed mask should now be zero
+ noway_assert(BitVecOps::IsEmpty(apTraits, killed));
+ }
+}
+#endif // LOCAL_ASSERTION_PROP
+
+/*****************************************************************************
+ *
+ * This function is called to complete the morphing of a tree node
+ * It should only be called once for each node.
+ * If DEBUG is defined the flag GTF_DEBUG_NODE_MORPHED is checked and updated,
+ * to enforce the invariant that each node is only morphed once.
+ * If LOCAL_ASSERTION_PROP is enabled the result tree may be replaced
+ * by an equivalent tree.
+ *
+ */
+
+void Compiler::fgMorphTreeDone(GenTreePtr tree,
+ GenTreePtr oldTree /* == NULL */
+ DEBUGARG(int morphNum))
+{
+#ifdef DEBUG
+ if (verbose && treesBeforeAfterMorph)
+ {
+ printf("\nfgMorphTree (after %d):\n", morphNum);
+ gtDispTree(tree);
+ printf(""); // in our logic this causes a flush
+ }
+#endif
+
+ if (!fgGlobalMorph)
+ {
+ return;
+ }
+
+ if ((oldTree != nullptr) && (oldTree != tree))
+ {
+ /* Ensure that we have morphed this node */
+ assert((tree->gtDebugFlags & GTF_DEBUG_NODE_MORPHED) && "ERROR: Did not morph this node!");
+
+#ifdef DEBUG
+ TransferTestDataToNode(oldTree, tree);
+#endif
+ }
+ else
+ {
+ // Ensure that we haven't morphed this node already
+ assert(((tree->gtDebugFlags & GTF_DEBUG_NODE_MORPHED) == 0) && "ERROR: Already morphed this node!");
+ }
+
+ if (tree->OperKind() & GTK_CONST)
+ {
+ goto DONE;
+ }
+
+#if LOCAL_ASSERTION_PROP
+
+ if (!optLocalAssertionProp)
+ {
+ goto DONE;
+ }
+
+ /* Do we have any active assertions? */
+
+ if (optAssertionCount > 0)
+ {
+ /* Is this an assignment to a local variable */
+
+ if ((tree->OperKind() & GTK_ASGOP) &&
+ (tree->gtOp.gtOp1->gtOper == GT_LCL_VAR || tree->gtOp.gtOp1->gtOper == GT_LCL_FLD))
+ {
+ unsigned op1LclNum = tree->gtOp.gtOp1->gtLclVarCommon.gtLclNum;
+ noway_assert(op1LclNum < lvaCount);
+ fgKillDependentAssertions(op1LclNum DEBUGARG(tree));
+ }
+ }
+
+ /* If this tree makes a new assertion - make it available */
+ optAssertionGen(tree);
+
+#endif // LOCAL_ASSERTION_PROP
+
+DONE:;
+
+#ifdef DEBUG
+ /* Mark this node as being morphed */
+ tree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
+#endif
+}
+
+/*****************************************************************************
+ *
+ * Check and fold blocks of type BBJ_COND and BBJ_SWITCH on constants
+ * Returns true if we modified the flow graph
+ */
+
+bool Compiler::fgFoldConditional(BasicBlock* block)
+{
+ bool result = false;
+
+ // We don't want to make any code unreachable
+ if (opts.compDbgCode || opts.MinOpts())
+ {
+ return false;
+ }
+
+ if (block->bbJumpKind == BBJ_COND)
+ {
+ noway_assert(block->bbTreeList && block->bbTreeList->gtPrev);
+
+ GenTreePtr stmt = block->bbTreeList->gtPrev;
+
+ noway_assert(stmt->gtNext == nullptr);
+
+ if (stmt->gtStmt.gtStmtExpr->gtOper == GT_CALL)
+ {
+ noway_assert(fgRemoveRestOfBlock);
+
+ /* Unconditional throw - transform the basic block into a BBJ_THROW */
+ fgConvertBBToThrowBB(block);
+
+ /* Remove 'block' from the predecessor list of 'block->bbNext' */
+ fgRemoveRefPred(block->bbNext, block);
+
+ /* Remove 'block' from the predecessor list of 'block->bbJumpDest' */
+ fgRemoveRefPred(block->bbJumpDest, block);
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nConditional folded at BB%02u\n", block->bbNum);
+ printf("BB%02u becomes a BBJ_THROW\n", block->bbNum);
+ }
+#endif
+ goto DONE_COND;
+ }
+
+ noway_assert(stmt->gtStmt.gtStmtExpr->gtOper == GT_JTRUE);
+
+ /* Did we fold the conditional */
+
+ noway_assert(stmt->gtStmt.gtStmtExpr->gtOp.gtOp1);
+ GenTreePtr cond;
+ cond = stmt->gtStmt.gtStmtExpr->gtOp.gtOp1;
+
+ if (cond->OperKind() & GTK_CONST)
+ {
+ /* Yupee - we folded the conditional!
+ * Remove the conditional statement */
+
+ noway_assert(cond->gtOper == GT_CNS_INT);
+ noway_assert((block->bbNext->countOfInEdges() > 0) && (block->bbJumpDest->countOfInEdges() > 0));
+
+ /* remove the statement from bbTreelist - No need to update
+ * the reference counts since there are no lcl vars */
+ fgRemoveStmt(block, stmt);
+
+ // block is a BBJ_COND that we are folding the conditional for
+ // bTaken is the path that will always be taken from block
+ // bNotTaken is the path that will never be taken from block
+ //
+ BasicBlock* bTaken;
+ BasicBlock* bNotTaken;
+
+ if (cond->gtIntCon.gtIconVal != 0)
+ {
+ /* JTRUE 1 - transform the basic block into a BBJ_ALWAYS */
+ block->bbJumpKind = BBJ_ALWAYS;
+ bTaken = block->bbJumpDest;
+ bNotTaken = block->bbNext;
+ }
+ else
+ {
+ /* Unmark the loop if we are removing a backwards branch */
+ /* dest block must also be marked as a loop head and */
+ /* We must be able to reach the backedge block */
+ if ((block->bbJumpDest->isLoopHead()) && (block->bbJumpDest->bbNum <= block->bbNum) &&
+ fgReachable(block->bbJumpDest, block))
+ {
+ optUnmarkLoopBlocks(block->bbJumpDest, block);
+ }
+
+ /* JTRUE 0 - transform the basic block into a BBJ_NONE */
+ block->bbJumpKind = BBJ_NONE;
+ noway_assert(!(block->bbFlags & BBF_NEEDS_GCPOLL));
+ bTaken = block->bbNext;
+ bNotTaken = block->bbJumpDest;
+ }
+
+ if (fgHaveValidEdgeWeights)
+ {
+ // We are removing an edge from block to bNotTaken
+ // and we have already computed the edge weights, so
+ // we will try to adjust some of the weights
+ //
+ flowList* edgeTaken = fgGetPredForBlock(bTaken, block);
+ BasicBlock* bUpdated = nullptr; // non-NULL if we updated the weight of an internal block
+
+ // We examine the taken edge (block -> bTaken)
+ // if block has valid profile weight and bTaken does not we try to adjust bTaken's weight
+ // else if bTaken has valid profile weight and block does not we try to adjust block's weight
+ // We can only adjust the block weights when (the edge block -> bTaken) is the only edge into bTaken
+ //
+ if (block->bbFlags & BBF_PROF_WEIGHT)
+ {
+ // The edge weights for (block -> bTaken) are 100% of block's weight
+ edgeTaken->flEdgeWeightMin = block->bbWeight;
+ edgeTaken->flEdgeWeightMax = block->bbWeight;
+
+ if ((bTaken->bbFlags & BBF_PROF_WEIGHT) == 0)
+ {
+ if ((bTaken->countOfInEdges() == 1) || (bTaken->bbWeight < block->bbWeight))
+ {
+ // Update the weight of bTaken
+ bTaken->inheritWeight(block);
+ bUpdated = bTaken;
+ }
+ }
+ }
+ else if (bTaken->bbFlags & BBF_PROF_WEIGHT)
+ {
+ if (bTaken->countOfInEdges() == 1)
+ {
+ // There is only one in edge to bTaken
+ edgeTaken->flEdgeWeightMin = bTaken->bbWeight;
+ edgeTaken->flEdgeWeightMax = bTaken->bbWeight;
+
+ // Update the weight of block
+ block->inheritWeight(bTaken);
+ bUpdated = block;
+ }
+ }
+
+ if (bUpdated != nullptr)
+ {
+ flowList* edge;
+ // Now fix the weights of the edges out of 'bUpdated'
+ switch (bUpdated->bbJumpKind)
+ {
+ case BBJ_NONE:
+ edge = fgGetPredForBlock(bUpdated->bbNext, bUpdated);
+ edge->flEdgeWeightMax = bUpdated->bbWeight;
+ break;
+ case BBJ_COND:
+ edge = fgGetPredForBlock(bUpdated->bbNext, bUpdated);
+ edge->flEdgeWeightMax = bUpdated->bbWeight;
+ __fallthrough;
+ case BBJ_ALWAYS:
+ edge = fgGetPredForBlock(bUpdated->bbJumpDest, bUpdated);
+ edge->flEdgeWeightMax = bUpdated->bbWeight;
+ break;
+ default:
+ // We don't handle BBJ_SWITCH
+ break;
+ }
+ }
+ }
+
+ /* modify the flow graph */
+
+ /* Remove 'block' from the predecessor list of 'bNotTaken' */
+ fgRemoveRefPred(bNotTaken, block);
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nConditional folded at BB%02u\n", block->bbNum);
+ printf("BB%02u becomes a %s", block->bbNum,
+ block->bbJumpKind == BBJ_ALWAYS ? "BBJ_ALWAYS" : "BBJ_NONE");
+ if (block->bbJumpKind == BBJ_ALWAYS)
+ {
+ printf(" to BB%02u", block->bbJumpDest->bbNum);
+ }
+ printf("\n");
+ }
+#endif
+
+ /* if the block was a loop condition we may have to modify
+ * the loop table */
+
+ for (unsigned loopNum = 0; loopNum < optLoopCount; loopNum++)
+ {
+ /* Some loops may have been already removed by
+ * loop unrolling or conditional folding */
+
+ if (optLoopTable[loopNum].lpFlags & LPFLG_REMOVED)
+ {
+ continue;
+ }
+
+ /* We are only interested in the loop bottom */
+
+ if (optLoopTable[loopNum].lpBottom == block)
+ {
+ if (cond->gtIntCon.gtIconVal == 0)
+ {
+ /* This was a bogus loop (condition always false)
+ * Remove the loop from the table */
+
+ optLoopTable[loopNum].lpFlags |= LPFLG_REMOVED;
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Removing loop L%02u (from BB%02u to BB%02u)\n\n", loopNum,
+ optLoopTable[loopNum].lpFirst->bbNum, optLoopTable[loopNum].lpBottom->bbNum);
+ }
+#endif
+ }
+ }
+ }
+ DONE_COND:
+ result = true;
+ }
+ }
+ else if (block->bbJumpKind == BBJ_SWITCH)
+ {
+ noway_assert(block->bbTreeList && block->bbTreeList->gtPrev);
+
+ GenTreePtr stmt = block->bbTreeList->gtPrev;
+
+ noway_assert(stmt->gtNext == nullptr);
+
+ if (stmt->gtStmt.gtStmtExpr->gtOper == GT_CALL)
+ {
+ noway_assert(fgRemoveRestOfBlock);
+
+ /* Unconditional throw - transform the basic block into a BBJ_THROW */
+ fgConvertBBToThrowBB(block);
+
+ /* update the flow graph */
+
+ unsigned jumpCnt = block->bbJumpSwt->bbsCount;
+ BasicBlock** jumpTab = block->bbJumpSwt->bbsDstTab;
+
+ for (unsigned val = 0; val < jumpCnt; val++, jumpTab++)
+ {
+ BasicBlock* curJump = *jumpTab;
+
+ /* Remove 'block' from the predecessor list of 'curJump' */
+ fgRemoveRefPred(curJump, block);
+ }
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nConditional folded at BB%02u\n", block->bbNum);
+ printf("BB%02u becomes a BBJ_THROW\n", block->bbNum);
+ }
+#endif
+ goto DONE_SWITCH;
+ }
+
+ noway_assert(stmt->gtStmt.gtStmtExpr->gtOper == GT_SWITCH);
+
+ /* Did we fold the conditional */
+
+ noway_assert(stmt->gtStmt.gtStmtExpr->gtOp.gtOp1);
+ GenTreePtr cond;
+ cond = stmt->gtStmt.gtStmtExpr->gtOp.gtOp1;
+
+ if (cond->OperKind() & GTK_CONST)
+ {
+ /* Yupee - we folded the conditional!
+ * Remove the conditional statement */
+
+ noway_assert(cond->gtOper == GT_CNS_INT);
+
+ /* remove the statement from bbTreelist - No need to update
+ * the reference counts since there are no lcl vars */
+ fgRemoveStmt(block, stmt);
+
+ /* modify the flow graph */
+
+ /* Find the actual jump target */
+ unsigned switchVal;
+ switchVal = (unsigned)cond->gtIntCon.gtIconVal;
+ unsigned jumpCnt;
+ jumpCnt = block->bbJumpSwt->bbsCount;
+ BasicBlock** jumpTab;
+ jumpTab = block->bbJumpSwt->bbsDstTab;
+ bool foundVal;
+ foundVal = false;
+
+ for (unsigned val = 0; val < jumpCnt; val++, jumpTab++)
+ {
+ BasicBlock* curJump = *jumpTab;
+
+ assert(curJump->countOfInEdges() > 0);
+
+ // If val matches switchVal or we are at the last entry and
+ // we never found the switch value then set the new jump dest
+
+ if ((val == switchVal) || (!foundVal && (val == jumpCnt - 1)))
+ {
+ if (curJump != block->bbNext)
+ {
+ /* transform the basic block into a BBJ_ALWAYS */
+ block->bbJumpKind = BBJ_ALWAYS;
+ block->bbJumpDest = curJump;
+
+ // if we are jumping backwards, make sure we have a GC Poll.
+ if (curJump->bbNum > block->bbNum)
+ {
+ block->bbFlags &= ~BBF_NEEDS_GCPOLL;
+ }
+ }
+ else
+ {
+ /* transform the basic block into a BBJ_NONE */
+ block->bbJumpKind = BBJ_NONE;
+ block->bbFlags &= ~BBF_NEEDS_GCPOLL;
+ }
+ foundVal = true;
+ }
+ else
+ {
+ /* Remove 'block' from the predecessor list of 'curJump' */
+ fgRemoveRefPred(curJump, block);
+ }
+ }
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nConditional folded at BB%02u\n", block->bbNum);
+ printf("BB%02u becomes a %s", block->bbNum,
+ block->bbJumpKind == BBJ_ALWAYS ? "BBJ_ALWAYS" : "BBJ_NONE");
+ if (block->bbJumpKind == BBJ_ALWAYS)
+ {
+ printf(" to BB%02u", block->bbJumpDest->bbNum);
+ }
+ printf("\n");
+ }
+#endif
+ DONE_SWITCH:
+ result = true;
+ }
+ }
+ return result;
+}
+
+//*****************************************************************************
+//
+// Morphs a single statement in a block.
+// Can be called anytime, unlike fgMorphStmts() which should only be called once.
+//
+// Returns true if 'stmt' was removed from the block.
+// Returns false if 'stmt' is still in the block (even if other statements were removed).
+//
+
+bool Compiler::fgMorphBlockStmt(BasicBlock* block, GenTreePtr stmt DEBUGARG(const char* msg))
+{
+ noway_assert(stmt->gtOper == GT_STMT);
+
+ compCurBB = block;
+ compCurStmt = stmt;
+
+ GenTreePtr morph = fgMorphTree(stmt->gtStmt.gtStmtExpr);
+
+ // Bug 1106830 - During the CSE phase we can't just remove
+ // morph->gtOp.gtOp2 as it could contain CSE expressions.
+ // This leads to a noway_assert in OptCSE.cpp when
+ // searching for the removed CSE ref. (using gtFindLink)
+ //
+ if (!optValnumCSE_phase)
+ {
+ /* Check for morph as a GT_COMMA with an unconditional throw */
+ if (fgIsCommaThrow(morph, true))
+ {
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Folding a top-level fgIsCommaThrow stmt\n");
+ printf("Removing op2 as unreachable:\n");
+ gtDispTree(morph->gtOp.gtOp2);
+ printf("\n");
+ }
+#endif
+ /* Use the call as the new stmt */
+ morph = morph->gtOp.gtOp1;
+ noway_assert(morph->gtOper == GT_CALL);
+ }
+
+ /* we can get a throw as a statement root*/
+ if (fgIsThrow(morph))
+ {
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("We have a top-level fgIsThrow stmt\n");
+ printf("Removing the rest of block as unreachable:\n");
+ }
+#endif
+ noway_assert((morph->gtFlags & GTF_COLON_COND) == 0);
+ fgRemoveRestOfBlock = true;
+ }
+ }
+
+ stmt->gtStmt.gtStmtExpr = morph;
+
+ /* Can the entire tree be removed ? */
+
+ bool removedStmt = fgCheckRemoveStmt(block, stmt);
+
+ /* Or this is the last statement of a conditional branch that was just folded */
+
+ if ((!removedStmt) && (stmt->gtNext == nullptr) && !fgRemoveRestOfBlock)
+ {
+ if (fgFoldConditional(block))
+ {
+ if (block->bbJumpKind != BBJ_THROW)
+ {
+ removedStmt = true;
+ }
+ }
+ }
+
+ if (!removedStmt)
+ {
+ /* Have to re-do the evaluation order since for example
+ * some later code does not expect constants as op1 */
+ gtSetStmtInfo(stmt);
+
+ /* Have to re-link the nodes for this statement */
+ fgSetStmtSeq(stmt);
+ }
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("%s %s tree:\n", msg, (removedStmt ? "removed" : "morphed"));
+ gtDispTree(morph);
+ printf("\n");
+ }
+#endif
+
+ if (fgRemoveRestOfBlock)
+ {
+ /* Remove the rest of the stmts in the block */
+
+ while (stmt->gtNext)
+ {
+ stmt = stmt->gtNext;
+ noway_assert(stmt->gtOper == GT_STMT);
+
+ fgRemoveStmt(block, stmt);
+ }
+
+ // The rest of block has been removed
+ // and we will always throw an exception
+
+ // Update succesors of block
+ fgRemoveBlockAsPred(block);
+
+ // For compDbgCode, we prepend an empty BB as the firstBB, it is BBJ_NONE.
+ // We should not convert it to a ThrowBB.
+ if ((block != fgFirstBB) || ((fgFirstBB->bbFlags & BBF_INTERNAL) == 0))
+ {
+ // Convert block to a throw bb
+ fgConvertBBToThrowBB(block);
+ }
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\n%s Block BB%02u becomes a throw block.\n", msg, block->bbNum);
+ }
+#endif
+ fgRemoveRestOfBlock = false;
+ }
+
+ return removedStmt;
+}
+
+/*****************************************************************************
+ *
+ * Morph the statements of the given block.
+ * This function should be called just once for a block. Use fgMorphBlockStmt()
+ * for reentrant calls.
+ */
+
+void Compiler::fgMorphStmts(BasicBlock* block, bool* mult, bool* lnot, bool* loadw)
+{
+ fgRemoveRestOfBlock = false;
+
+ noway_assert(fgExpandInline == false);
+
+ /* Make the current basic block address available globally */
+
+ compCurBB = block;
+
+ *mult = *lnot = *loadw = false;
+
+ fgCurrentlyInUseArgTemps = hashBv::Create(this);
+
+ GenTreePtr stmt, prev;
+ for (stmt = block->bbTreeList, prev = nullptr; stmt; prev = stmt->gtStmt.gtStmtExpr, stmt = stmt->gtNext)
+ {
+ noway_assert(stmt->gtOper == GT_STMT);
+
+ if (fgRemoveRestOfBlock)
+ {
+ fgRemoveStmt(block, stmt);
+ continue;
+ }
+#ifdef FEATURE_SIMD
+ if (!opts.MinOpts() && stmt->gtStmt.gtStmtExpr->TypeGet() == TYP_FLOAT &&
+ stmt->gtStmt.gtStmtExpr->OperGet() == GT_ASG)
+ {
+ fgMorphCombineSIMDFieldAssignments(block, stmt);
+ }
+#endif
+
+ fgMorphStmt = stmt;
+ compCurStmt = stmt;
+ GenTreePtr tree = stmt->gtStmt.gtStmtExpr;
+
+#ifdef DEBUG
+ compCurStmtNum++;
+ if (stmt == block->bbTreeList)
+ {
+ block->bbStmtNum = compCurStmtNum; // Set the block->bbStmtNum
+ }
+
+ unsigned oldHash = verbose ? gtHashValue(tree) : DUMMY_INIT(~0);
+
+ if (verbose)
+ {
+ printf("\nfgMorphTree BB%02u, stmt %d (before)\n", block->bbNum, compCurStmtNum);
+ gtDispTree(tree);
+ }
+#endif
+
+ /* Morph this statement tree */
+
+ GenTreePtr morph = fgMorphTree(tree);
+
+ // mark any outgoing arg temps as free so we can reuse them in the next statement.
+
+ fgCurrentlyInUseArgTemps->ZeroAll();
+
+ // Has fgMorphStmt been sneakily changed ?
+
+ if (stmt->gtStmt.gtStmtExpr != tree)
+ {
+ /* This must be tailcall. Ignore 'morph' and carry on with
+ the tail-call node */
+
+ morph = stmt->gtStmt.gtStmtExpr;
+ noway_assert(compTailCallUsed);
+ noway_assert((morph->gtOper == GT_CALL) && morph->AsCall()->IsTailCall());
+ noway_assert(stmt->gtNext == nullptr);
+
+ GenTreeCall* call = morph->AsCall();
+ // Could either be
+ // - a tail call dispatched via helper in which case block will be ending with BBJ_THROW or
+ // - a fast call made as jmp in which case block will be ending with BBJ_RETURN and marked as containing
+ // a jmp.
+ noway_assert((call->IsTailCallViaHelper() && (compCurBB->bbJumpKind == BBJ_THROW)) ||
+ (call->IsFastTailCall() && (compCurBB->bbJumpKind == BBJ_RETURN) &&
+ (compCurBB->bbFlags & BBF_HAS_JMP)));
+ }
+ else if (block != compCurBB)
+ {
+ /* This must be a tail call that caused a GCPoll to get
+ injected. We haven't actually morphed the call yet
+ but the flag still got set, clear it here... */
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef DEBUG
+ tree->gtDebugFlags &= ~GTF_DEBUG_NODE_MORPHED;
+#endif
+
+ noway_assert(compTailCallUsed);
+ noway_assert((tree->gtOper == GT_CALL) && tree->AsCall()->IsTailCall());
+ noway_assert(stmt->gtNext == nullptr);
+
+ GenTreeCall* call = morph->AsCall();
+
+ // Could either be
+ // - a tail call dispatched via helper in which case block will be ending with BBJ_THROW or
+ // - a fast call made as jmp in which case block will be ending with BBJ_RETURN and marked as containing
+ // a jmp.
+ noway_assert((call->IsTailCallViaHelper() && (compCurBB->bbJumpKind == BBJ_THROW)) ||
+ (call->IsFastTailCall() && (compCurBB->bbJumpKind == BBJ_RETURN) &&
+ (compCurBB->bbFlags & BBF_HAS_JMP)));
+ }
+
+#ifdef DEBUG
+ if (compStressCompile(STRESS_CLONE_EXPR, 30))
+ {
+ // Clone all the trees to stress gtCloneExpr()
+
+ if (verbose)
+ {
+ printf("\nfgMorphTree (stressClone from):\n");
+ gtDispTree(morph);
+ }
+
+ morph = gtCloneExpr(morph);
+ noway_assert(morph);
+
+ if (verbose)
+ {
+ printf("\nfgMorphTree (stressClone to):\n");
+ gtDispTree(morph);
+ }
+ }
+
+ /* If the hash value changes. we modified the tree during morphing */
+ if (verbose)
+ {
+ unsigned newHash = gtHashValue(morph);
+ if (newHash != oldHash)
+ {
+ printf("\nfgMorphTree BB%02u, stmt %d (after)\n", block->bbNum, compCurStmtNum);
+ gtDispTree(morph);
+ }
+ }
+#endif
+
+ /* Check for morph as a GT_COMMA with an unconditional throw */
+ if (!gtIsActiveCSE_Candidate(morph) && fgIsCommaThrow(morph, true))
+ {
+ /* Use the call as the new stmt */
+ morph = morph->gtOp.gtOp1;
+ noway_assert(morph->gtOper == GT_CALL);
+ noway_assert((morph->gtFlags & GTF_COLON_COND) == 0);
+
+ fgRemoveRestOfBlock = true;
+ }
+
+ stmt->gtStmt.gtStmtExpr = tree = morph;
+
+ noway_assert(fgPtrArgCntCur == 0);
+
+ if (fgRemoveRestOfBlock)
+ {
+ continue;
+ }
+
+ /* Has the statement been optimized away */
+
+ if (fgCheckRemoveStmt(block, stmt))
+ {
+ continue;
+ }
+
+ /* Check if this block ends with a conditional branch that can be folded */
+
+ if (fgFoldConditional(block))
+ {
+ continue;
+ }
+
+ if (ehBlockHasExnFlowDsc(block))
+ {
+ continue;
+ }
+
+#if OPT_MULT_ADDSUB
+
+ /* Note whether we have two or more +=/-= operators in a row */
+
+ if (tree->gtOper == GT_ASG_ADD || tree->gtOper == GT_ASG_SUB)
+ {
+ if (prev && prev->gtOper == tree->gtOper)
+ {
+ *mult = true;
+ }
+ }
+
+#endif
+
+ /* Note "x = a[i] & icon" followed by "x |= a[i] << 8" */
+
+ if (tree->gtOper == GT_ASG_OR && prev && prev->gtOper == GT_ASG)
+ {
+ *loadw = true;
+ }
+ }
+
+ if (fgRemoveRestOfBlock)
+ {
+ if ((block->bbJumpKind == BBJ_COND) || (block->bbJumpKind == BBJ_SWITCH))
+ {
+ GenTreePtr first = block->bbTreeList;
+ noway_assert(first);
+ GenTreePtr last = first->gtPrev;
+ noway_assert(last && last->gtNext == nullptr);
+ GenTreePtr lastStmt = last->gtStmt.gtStmtExpr;
+
+ if (((block->bbJumpKind == BBJ_COND) && (lastStmt->gtOper == GT_JTRUE)) ||
+ ((block->bbJumpKind == BBJ_SWITCH) && (lastStmt->gtOper == GT_SWITCH)))
+ {
+ GenTreePtr op1 = lastStmt->gtOp.gtOp1;
+
+ if (op1->OperKind() & GTK_RELOP)
+ {
+ /* Unmark the comparison node with GTF_RELOP_JMP_USED */
+ op1->gtFlags &= ~GTF_RELOP_JMP_USED;
+ }
+
+ last->gtStmt.gtStmtExpr = fgMorphTree(op1);
+ }
+ }
+
+ /* Mark block as a BBJ_THROW block */
+ fgConvertBBToThrowBB(block);
+ }
+
+ noway_assert(fgExpandInline == false);
+
+#if FEATURE_FASTTAILCALL
+ GenTreePtr recursiveTailCall = nullptr;
+ if (block->endsWithTailCallConvertibleToLoop(this, &recursiveTailCall))
+ {
+ fgMorphRecursiveFastTailCallIntoLoop(block, recursiveTailCall->AsCall());
+ }
+#endif
+
+#ifdef DEBUG
+ compCurBB = (BasicBlock*)INVALID_POINTER_VALUE;
+#endif
+
+ // Reset this back so that it doesn't leak out impacting other blocks
+ fgRemoveRestOfBlock = false;
+}
+
+/*****************************************************************************
+ *
+ * Morph the blocks of the method.
+ * Returns true if the basic block list is modified.
+ * This function should be called just once.
+ */
+
+void Compiler::fgMorphBlocks()
+{
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\n*************** In fgMorphBlocks()\n");
+ }
+#endif
+
+ /* Since fgMorphTree can be called after various optimizations to re-arrange
+ * the nodes we need a global flag to signal if we are during the one-pass
+ * global morphing */
+
+ fgGlobalMorph = true;
+
+#if LOCAL_ASSERTION_PROP
+ //
+ // Local assertion prop is enabled if we are optimized
+ //
+ optLocalAssertionProp = (!opts.compDbgCode && !opts.MinOpts());
+
+ if (optLocalAssertionProp)
+ {
+ //
+ // Initialize for local assertion prop
+ //
+ optAssertionInit(true);
+ }
+#elif ASSERTION_PROP
+ //
+ // If LOCAL_ASSERTION_PROP is not set
+ // and we have global assertion prop
+ // then local assertion prop is always off
+ //
+ optLocalAssertionProp = false;
+
+#endif
+
+ /*-------------------------------------------------------------------------
+ * Process all basic blocks in the function
+ */
+
+ BasicBlock* block = fgFirstBB;
+ noway_assert(block);
+
+#ifdef DEBUG
+ compCurStmtNum = 0;
+#endif
+
+ do
+ {
+#if OPT_MULT_ADDSUB
+ bool mult = false;
+#endif
+
+#if OPT_BOOL_OPS
+ bool lnot = false;
+#endif
+
+ bool loadw = false;
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nMorphing BB%02u of '%s'\n", block->bbNum, info.compFullName);
+ }
+#endif
+
+#if LOCAL_ASSERTION_PROP
+ if (optLocalAssertionProp)
+ {
+ //
+ // Clear out any currently recorded assertion candidates
+ // before processing each basic block,
+ // also we must handle QMARK-COLON specially
+ //
+ optAssertionReset(0);
+ }
+#endif
+
+ /* Process all statement trees in the basic block */
+
+ GenTreePtr tree;
+
+ fgMorphStmts(block, &mult, &lnot, &loadw);
+
+#if OPT_MULT_ADDSUB
+
+ if (mult && (opts.compFlags & CLFLG_TREETRANS) && !opts.compDbgCode && !opts.MinOpts())
+ {
+ for (tree = block->bbTreeList; tree; tree = tree->gtNext)
+ {
+ noway_assert(tree->gtOper == GT_STMT);
+ GenTreePtr last = tree->gtStmt.gtStmtExpr;
+
+ if (last->gtOper == GT_ASG_ADD || last->gtOper == GT_ASG_SUB)
+ {
+ GenTreePtr temp;
+ GenTreePtr next;
+
+ GenTreePtr dst1 = last->gtOp.gtOp1;
+ GenTreePtr src1 = last->gtOp.gtOp2;
+
+ if (!last->IsCnsIntOrI())
+ {
+ goto NOT_CAFFE;
+ }
+
+ if (dst1->gtOper != GT_LCL_VAR)
+ {
+ goto NOT_CAFFE;
+ }
+ if (!src1->IsCnsIntOrI())
+ {
+ goto NOT_CAFFE;
+ }
+
+ for (;;)
+ {
+ GenTreePtr dst2;
+ GenTreePtr src2;
+
+ /* Look at the next statement */
+
+ temp = tree->gtNext;
+ if (!temp)
+ {
+ goto NOT_CAFFE;
+ }
+
+ noway_assert(temp->gtOper == GT_STMT);
+ next = temp->gtStmt.gtStmtExpr;
+
+ if (next->gtOper != last->gtOper)
+ {
+ goto NOT_CAFFE;
+ }
+ if (next->gtType != last->gtType)
+ {
+ goto NOT_CAFFE;
+ }
+
+ dst2 = next->gtOp.gtOp1;
+ src2 = next->gtOp.gtOp2;
+
+ if (dst2->gtOper != GT_LCL_VAR)
+ {
+ goto NOT_CAFFE;
+ }
+ if (dst2->gtLclVarCommon.gtLclNum != dst1->gtLclVarCommon.gtLclNum)
+ {
+ goto NOT_CAFFE;
+ }
+
+ if (!src2->IsCnsIntOrI())
+ {
+ goto NOT_CAFFE;
+ }
+
+ if (last->gtOverflow() != next->gtOverflow())
+ {
+ goto NOT_CAFFE;
+ }
+
+ const ssize_t i1 = src1->gtIntCon.gtIconVal;
+ const ssize_t i2 = src2->gtIntCon.gtIconVal;
+ const ssize_t itemp = i1 + i2;
+
+ /* if the operators are checking for overflow, check for overflow of the operands */
+
+ if (next->gtOverflow())
+ {
+ if (next->TypeGet() == TYP_LONG)
+ {
+ if (next->gtFlags & GTF_UNSIGNED)
+ {
+ ClrSafeInt<UINT64> si1(i1);
+ if ((si1 + ClrSafeInt<UINT64>(i2)).IsOverflow())
+ {
+ goto NOT_CAFFE;
+ }
+ }
+ else
+ {
+ ClrSafeInt<INT64> si1(i1);
+ if ((si1 + ClrSafeInt<INT64>(i2)).IsOverflow())
+ {
+ goto NOT_CAFFE;
+ }
+ }
+ }
+ else if (next->gtFlags & GTF_UNSIGNED)
+ {
+ ClrSafeInt<UINT32> si1(i1);
+ if ((si1 + ClrSafeInt<UINT32>(i2)).IsOverflow())
+ {
+ goto NOT_CAFFE;
+ }
+ }
+ else
+ {
+ ClrSafeInt<INT32> si1(i1);
+ if ((si1 + ClrSafeInt<INT32>(i2)).IsOverflow())
+ {
+ goto NOT_CAFFE;
+ }
+ }
+ }
+
+ /* Fold the two increments/decrements into one */
+
+ src1->gtIntCon.gtIconVal = itemp;
+#ifdef _TARGET_64BIT_
+ if (src1->gtType == TYP_INT)
+ {
+ src1->AsIntCon()->TruncateOrSignExtend32();
+ }
+#endif //_TARGET_64BIT_
+
+ /* Remove the second statement completely */
+
+ noway_assert(tree->gtNext == temp);
+ noway_assert(temp->gtPrev == tree);
+
+ if (temp->gtNext)
+ {
+ noway_assert(temp->gtNext->gtPrev == temp);
+
+ temp->gtNext->gtPrev = tree;
+ tree->gtNext = temp->gtNext;
+ }
+ else
+ {
+ tree->gtNext = nullptr;
+
+ noway_assert(block->bbTreeList->gtPrev == temp);
+
+ block->bbTreeList->gtPrev = tree;
+ }
+ }
+ }
+
+ NOT_CAFFE:;
+ }
+ }
+
+#endif
+
+ /* Are we using a single return block? */
+
+ if (block->bbJumpKind == BBJ_RETURN)
+ {
+ if ((genReturnBB != nullptr) && (genReturnBB != block) && ((block->bbFlags & BBF_HAS_JMP) == 0))
+ {
+ /* We'll jump to the genReturnBB */
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if !defined(_TARGET_X86_)
+ if (info.compFlags & CORINFO_FLG_SYNCH)
+ {
+ fgConvertSyncReturnToLeave(block);
+ }
+ else
+#endif // !_TARGET_X86_
+ {
+ block->bbJumpKind = BBJ_ALWAYS;
+ block->bbJumpDest = genReturnBB;
+ fgReturnCount--;
+ }
+
+ // Note 1: A block is not guaranteed to have a last stmt if its jump kind is BBJ_RETURN.
+ // For example a method returning void could have an empty block with jump kind BBJ_RETURN.
+ // Such blocks do materialize as part of in-lining.
+ //
+ // Note 2: A block with jump kind BBJ_RETURN does not necessarily need to end with GT_RETURN.
+ // It could end with a tail call or rejected tail call or monitor.exit or a GT_INTRINSIC.
+ // For now it is safe to explicitly check whether last stmt is GT_RETURN if genReturnLocal
+ // is BAD_VAR_NUM.
+ //
+ // TODO: Need to characterize the last top level stmt of a block ending with BBJ_RETURN.
+
+ GenTreePtr last = (block->bbTreeList != nullptr) ? block->bbTreeList->gtPrev : nullptr;
+ GenTreePtr ret = (last != nullptr) ? last->gtStmt.gtStmtExpr : nullptr;
+
+ // replace the GT_RETURN node to be a GT_ASG that stores the return value into genReturnLocal.
+ if (genReturnLocal != BAD_VAR_NUM)
+ {
+ // Method must be returning a value other than TYP_VOID.
+ noway_assert(compMethodHasRetVal());
+
+ // This block must be ending with a GT_RETURN
+ noway_assert(last != nullptr);
+ noway_assert(last->gtOper == GT_STMT);
+ noway_assert(last->gtNext == nullptr);
+ noway_assert(ret != nullptr);
+
+ // GT_RETURN must have non-null operand as the method is returning the value assigned to
+ // genReturnLocal
+ noway_assert(ret->OperGet() == GT_RETURN);
+ noway_assert(ret->gtGetOp1() != nullptr);
+ noway_assert(ret->gtGetOp2() == nullptr);
+
+ GenTreePtr tree = gtNewTempAssign(genReturnLocal, ret->gtGetOp1());
+
+ last->gtStmt.gtStmtExpr = (tree->OperIsCopyBlkOp()) ? fgMorphCopyBlock(tree) : tree;
+
+ // make sure that copy-prop ignores this assignment.
+ last->gtStmt.gtStmtExpr->gtFlags |= GTF_DONT_CSE;
+ }
+ else if (ret != nullptr && ret->OperGet() == GT_RETURN)
+ {
+ // This block ends with a GT_RETURN
+ noway_assert(last != nullptr);
+ noway_assert(last->gtOper == GT_STMT);
+ noway_assert(last->gtNext == nullptr);
+
+ // Must be a void GT_RETURN with null operand; delete it as this block branches to oneReturn block
+ noway_assert(ret->TypeGet() == TYP_VOID);
+ noway_assert(ret->gtGetOp1() == nullptr);
+ noway_assert(ret->gtGetOp2() == nullptr);
+
+ fgRemoveStmt(block, last);
+ }
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("morph BB%02u to point at onereturn. New block is\n", block->bbNum);
+ fgTableDispBasicBlock(block);
+ }
+#endif
+ }
+ }
+
+ block = block->bbNext;
+ } while (block);
+
+ /* We are done with the global morphing phase */
+
+ fgGlobalMorph = false;
+
+#ifdef DEBUG
+ if (verboseTrees)
+ {
+ fgDispBasicBlocks(true);
+ }
+#endif
+}
+
+/*****************************************************************************
+ *
+ * Make some decisions about the kind of code to generate.
+ */
+
+void Compiler::fgSetOptions()
+{
+#ifdef DEBUG
+ /* Should we force fully interruptible code ? */
+ if (JitConfig.JitFullyInt() || compStressCompile(STRESS_GENERIC_VARN, 30))
+ {
+ noway_assert(!codeGen->isGCTypeFixed());
+ genInterruptible = true;
+ }
+#endif
+
+#ifdef DEBUGGING_SUPPORT
+ if (opts.compDbgCode)
+ {
+ assert(!codeGen->isGCTypeFixed());
+ genInterruptible = true; // debugging is easier this way ...
+ }
+#endif
+
+ /* Assume we won't need an explicit stack frame if this is allowed */
+
+ // CORINFO_HELP_TAILCALL won't work with localloc because of the restoring of
+ // the callee-saved registers.
+ noway_assert(!compTailCallUsed || !compLocallocUsed);
+
+ if (compLocallocUsed)
+ {
+ codeGen->setFramePointerRequired(true);
+ }
+
+#ifdef _TARGET_X86_
+
+ if (compTailCallUsed)
+ codeGen->setFramePointerRequired(true);
+
+#endif // _TARGET_X86_
+
+ if (!opts.genFPopt)
+ {
+ codeGen->setFramePointerRequired(true);
+ }
+
+ // Assert that the EH table has been initialized by now. Note that
+ // compHndBBtabAllocCount never decreases; it is a high-water mark
+ // of table allocation. In contrast, compHndBBtabCount does shrink
+ // if we delete a dead EH region, and if it shrinks to zero, the
+ // table pointer compHndBBtab is unreliable.
+ assert(compHndBBtabAllocCount >= info.compXcptnsCount);
+
+#ifdef _TARGET_X86_
+
+ // Note: this case, and the !X86 case below, should both use the
+ // !X86 path. This would require a few more changes for X86 to use
+ // compHndBBtabCount (the current number of EH clauses) instead of
+ // info.compXcptnsCount (the number of EH clauses in IL), such as
+ // in ehNeedsShadowSPslots(). This is because sometimes the IL has
+ // an EH clause that we delete as statically dead code before we
+ // get here, leaving no EH clauses left, and thus no requirement
+ // to use a frame pointer because of EH. But until all the code uses
+ // the same test, leave info.compXcptnsCount here.
+ if (info.compXcptnsCount > 0)
+ codeGen->setFramePointerRequiredEH(true);
+
+#else // !_TARGET_X86_
+
+ if (compHndBBtabCount > 0)
+ {
+ codeGen->setFramePointerRequiredEH(true);
+ }
+
+#endif // _TARGET_X86_
+
+ // fpPtrArgCntMax records the maximum number of pushed arguments
+ // Depending upon this value of the maximum number of pushed arguments
+ // we may need to use an EBP frame or be partially interuptible
+ //
+
+ if (!compCanEncodePtrArgCntMax())
+ {
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Too many pushed arguments for fully interruptible encoding, marking method as partially "
+ "interruptible\n");
+ }
+#endif
+ genInterruptible = false;
+ }
+ if (fgPtrArgCntMax >= sizeof(unsigned))
+ {
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Too many pushed arguments for an ESP based encoding, forcing an EBP frame\n");
+ }
+#endif
+ codeGen->setFramePointerRequiredGCInfo(true);
+ }
+
+ if (info.compCallUnmanaged)
+ {
+ codeGen->setFramePointerRequired(true); // Setup of Pinvoke frame currently requires an EBP style frame
+ }
+
+ if (info.compPublishStubParam)
+ {
+ codeGen->setFramePointerRequiredGCInfo(true);
+ }
+
+ if (opts.compNeedSecurityCheck)
+ {
+ codeGen->setFramePointerRequiredGCInfo(true);
+
+#ifndef JIT32_GCENCODER
+
+ // The decoder only reports objects in frames with exceptions if the frame
+ // is fully interruptible.
+ // Even if there is no catch or other way to resume execution in this frame
+ // the VM requires the security object to remain alive until later, so
+ // Frames with security objects must be fully interruptible.
+ genInterruptible = true;
+
+#endif // JIT32_GCENCODER
+ }
+
+ if (compIsProfilerHookNeeded())
+ {
+ codeGen->setFramePointerRequired(true);
+ }
+
+ if (info.compIsVarArgs)
+ {
+ // Code that initializes lvaVarargsBaseOfStkArgs requires this to be EBP relative.
+ codeGen->setFramePointerRequiredGCInfo(true);
+ }
+
+ if (lvaReportParamTypeArg())
+ {
+ codeGen->setFramePointerRequiredGCInfo(true);
+ }
+
+ // printf("method will %s be fully interruptible\n", genInterruptible ? " " : "not");
+}
+
+/*****************************************************************************/
+
+GenTreePtr Compiler::fgInitThisClass()
+{
+ noway_assert(!compIsForInlining());
+
+ CORINFO_LOOKUP_KIND kind = info.compCompHnd->getLocationOfThisType(info.compMethodHnd);
+
+ if (!kind.needsRuntimeLookup)
+ {
+ return fgGetSharedCCtor(info.compClassHnd);
+ }
+ else
+ {
+ // Collectible types requires that for shared generic code, if we use the generic context paramter
+ // that we report it. (This is a conservative approach, we could detect some cases particularly when the
+ // context parameter is this that we don't need the eager reporting logic.)
+ lvaGenericsContextUsed = true;
+
+ switch (kind.runtimeLookupKind)
+ {
+ case CORINFO_LOOKUP_THISOBJ:
+ // This code takes a this pointer; but we need to pass the static method desc to get the right point in
+ // the hierarchy
+ {
+ GenTreePtr vtTree = gtNewLclvNode(info.compThisArg, TYP_REF);
+ // Vtable pointer of this object
+ vtTree = gtNewOperNode(GT_IND, TYP_I_IMPL, vtTree);
+ vtTree->gtFlags |= GTF_EXCEPT; // Null-pointer exception
+ GenTreePtr methodHnd = gtNewIconEmbMethHndNode(info.compMethodHnd);
+
+ return gtNewHelperCallNode(CORINFO_HELP_INITINSTCLASS, TYP_VOID, 0,
+ gtNewArgList(vtTree, methodHnd));
+ }
+
+ case CORINFO_LOOKUP_CLASSPARAM:
+ {
+ GenTreePtr vtTree = gtNewLclvNode(info.compTypeCtxtArg, TYP_I_IMPL);
+ return gtNewHelperCallNode(CORINFO_HELP_INITCLASS, TYP_VOID, 0, gtNewArgList(vtTree));
+ }
+
+ case CORINFO_LOOKUP_METHODPARAM:
+ {
+ GenTreePtr methHndTree = gtNewLclvNode(info.compTypeCtxtArg, TYP_I_IMPL);
+ return gtNewHelperCallNode(CORINFO_HELP_INITINSTCLASS, TYP_VOID, 0,
+ gtNewArgList(gtNewIconNode(0), methHndTree));
+ }
+ }
+ }
+
+ noway_assert(!"Unknown LOOKUP_KIND");
+ UNREACHABLE();
+}
+
+#ifdef DEBUG
+/*****************************************************************************
+ *
+ * Tree walk callback to make sure no GT_QMARK nodes are present in the tree,
+ * except for the allowed ? 1 : 0; pattern.
+ */
+Compiler::fgWalkResult Compiler::fgAssertNoQmark(GenTreePtr* tree, fgWalkData* data)
+{
+ if ((*tree)->OperGet() == GT_QMARK)
+ {
+ fgCheckQmarkAllowedForm(*tree);
+ }
+ return WALK_CONTINUE;
+}
+
+void Compiler::fgCheckQmarkAllowedForm(GenTree* tree)
+{
+ assert(tree->OperGet() == GT_QMARK);
+#ifndef LEGACY_BACKEND
+ assert(!"Qmarks beyond morph disallowed.");
+#else // LEGACY_BACKEND
+ GenTreePtr colon = tree->gtOp.gtOp2;
+
+ assert(colon->gtOp.gtOp1->IsIntegralConst(0));
+ assert(colon->gtOp.gtOp2->IsIntegralConst(1));
+#endif // LEGACY_BACKEND
+}
+
+/*****************************************************************************
+ *
+ * Verify that the importer has created GT_QMARK nodes in a way we can
+ * process them. The following is allowed:
+ *
+ * 1. A top level qmark. Top level qmark is of the form:
+ * a) (bool) ? (void) : (void) OR
+ * b) V0N = (bool) ? (type) : (type)
+ *
+ * 2. Recursion is allowed at the top level, i.e., a GT_QMARK can be a child
+ * of either op1 of colon or op2 of colon but not a child of any other
+ * operator.
+ */
+void Compiler::fgPreExpandQmarkChecks(GenTreePtr expr)
+{
+ GenTreePtr topQmark = fgGetTopLevelQmark(expr);
+
+ // If the top level Qmark is null, then scan the tree to make sure
+ // there are no qmarks within it.
+ if (topQmark == nullptr)
+ {
+ fgWalkTreePre(&expr, Compiler::fgAssertNoQmark, nullptr);
+ }
+ else
+ {
+ // We could probably expand the cond node also, but don't think the extra effort is necessary,
+ // so let's just assert the cond node of a top level qmark doesn't have further top level qmarks.
+ fgWalkTreePre(&topQmark->gtOp.gtOp1, Compiler::fgAssertNoQmark, nullptr);
+
+ fgPreExpandQmarkChecks(topQmark->gtOp.gtOp2->gtOp.gtOp1);
+ fgPreExpandQmarkChecks(topQmark->gtOp.gtOp2->gtOp.gtOp2);
+ }
+}
+#endif // DEBUG
+
+/*****************************************************************************
+ *
+ * Get the top level GT_QMARK node in a given "expr", return NULL if such a
+ * node is not present. If the top level GT_QMARK node is assigned to a
+ * GT_LCL_VAR, then return the lcl node in ppDst.
+ *
+ */
+GenTreePtr Compiler::fgGetTopLevelQmark(GenTreePtr expr, GenTreePtr* ppDst /* = NULL */)
+{
+ if (ppDst != nullptr)
+ {
+ *ppDst = nullptr;
+ }
+
+ GenTreePtr topQmark = nullptr;
+ if (expr->gtOper == GT_QMARK)
+ {
+ topQmark = expr;
+ }
+ else if (expr->gtOper == GT_ASG && expr->gtOp.gtOp2->gtOper == GT_QMARK && expr->gtOp.gtOp1->gtOper == GT_LCL_VAR)
+ {
+ topQmark = expr->gtOp.gtOp2;
+ if (ppDst != nullptr)
+ {
+ *ppDst = expr->gtOp.gtOp1;
+ }
+ }
+ return topQmark;
+}
+
+/*********************************************************************************
+ *
+ * For a castclass helper call,
+ * Importer creates the following tree:
+ * tmp = (op1 == null) ? op1 : ((*op1 == (cse = op2, cse)) ? op1 : helper());
+ *
+ * This method splits the qmark expression created by the importer into the
+ * following blocks: (block, asg, cond1, cond2, helper, remainder)
+ * Notice that op1 is the result for both the conditions. So we coalesce these
+ * assignments into a single block instead of two blocks resulting a nested diamond.
+ *
+ * +---------->-----------+
+ * | | |
+ * ^ ^ v
+ * | | |
+ * block-->asg-->cond1--+-->cond2--+-->helper--+-->remainder
+ *
+ * We expect to achieve the following codegen:
+ * mov rsi, rdx tmp = op1 // asgBlock
+ * test rsi, rsi goto skip if tmp == null ? // cond1Block
+ * je SKIP
+ * mov rcx, 0x76543210 cns = op2 // cond2Block
+ * cmp qword ptr [rsi], rcx goto skip if *tmp == op2
+ * je SKIP
+ * call CORINFO_HELP_CHKCASTCLASS_SPECIAL tmp = helper(cns, tmp) // helperBlock
+ * mov rsi, rax
+ * SKIP: // remainderBlock
+ * tmp has the result.
+ *
+ */
+void Compiler::fgExpandQmarkForCastInstOf(BasicBlock* block, GenTreePtr stmt)
+{
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nExpanding CastInstOf qmark in BB%02u (before)\n", block->bbNum);
+ fgDispBasicBlocks(block, block, true);
+ }
+#endif // DEBUG
+
+ GenTreePtr expr = stmt->gtStmt.gtStmtExpr;
+
+ GenTreePtr dst = nullptr;
+ GenTreePtr qmark = fgGetTopLevelQmark(expr, &dst);
+ noway_assert(dst != nullptr);
+
+ assert(qmark->gtFlags & GTF_QMARK_CAST_INSTOF);
+
+ // Get cond, true, false exprs for the qmark.
+ GenTreePtr condExpr = qmark->gtGetOp1();
+ GenTreePtr trueExpr = qmark->gtGetOp2()->AsColon()->ThenNode();
+ GenTreePtr falseExpr = qmark->gtGetOp2()->AsColon()->ElseNode();
+
+ // Get cond, true, false exprs for the nested qmark.
+ GenTreePtr nestedQmark = falseExpr;
+ GenTreePtr cond2Expr;
+ GenTreePtr true2Expr;
+ GenTreePtr false2Expr;
+
+ if (nestedQmark->gtOper == GT_QMARK)
+ {
+ cond2Expr = nestedQmark->gtGetOp1();
+ true2Expr = nestedQmark->gtGetOp2()->AsColon()->ThenNode();
+ false2Expr = nestedQmark->gtGetOp2()->AsColon()->ElseNode();
+
+ assert(cond2Expr->gtFlags & GTF_RELOP_QMARK);
+ cond2Expr->gtFlags &= ~GTF_RELOP_QMARK;
+ }
+ else
+ {
+ // This is a rare case that arises when we are doing minopts and encounter isinst of null
+ // gtFoldExpr was still is able to optimize away part of the tree (but not all).
+ // That means it does not match our pattern.
+
+ // Rather than write code to handle this case, just fake up some nodes to make it match the common
+ // case. Synthesize a comparison that is always true, and for the result-on-true, use the
+ // entire subtree we expected to be the nested question op.
+
+ cond2Expr = gtNewOperNode(GT_EQ, TYP_INT, gtNewIconNode(0, TYP_I_IMPL), gtNewIconNode(0, TYP_I_IMPL));
+ true2Expr = nestedQmark;
+ false2Expr = gtNewIconNode(0, TYP_I_IMPL);
+ }
+ assert(false2Expr->OperGet() == trueExpr->OperGet());
+
+ // Clear flags as they are now going to be part of JTRUE.
+ assert(condExpr->gtFlags & GTF_RELOP_QMARK);
+ condExpr->gtFlags &= ~GTF_RELOP_QMARK;
+
+ // Create the chain of blocks. See method header comment.
+ // The order of blocks after this is the following:
+ // block ... asgBlock ... cond1Block ... cond2Block ... helperBlock ... remainderBlock
+ //
+ // We need to remember flags that exist on 'block' that we want to propagate to 'remainderBlock',
+ // if they are going to be cleared by fgSplitBlockAfterStatement(). We currently only do this only
+ // for the GC safe point bit, the logic being that if 'block' was marked gcsafe, then surely
+ // remainderBlock will still be GC safe.
+ unsigned propagateFlags = block->bbFlags & BBF_GC_SAFE_POINT;
+ BasicBlock* remainderBlock = fgSplitBlockAfterStatement(block, stmt);
+ fgRemoveRefPred(remainderBlock, block); // We're going to put more blocks between block and remainderBlock.
+
+ BasicBlock* helperBlock = fgNewBBafter(BBJ_NONE, block, true);
+ BasicBlock* cond2Block = fgNewBBafter(BBJ_COND, block, true);
+ BasicBlock* cond1Block = fgNewBBafter(BBJ_COND, block, true);
+ BasicBlock* asgBlock = fgNewBBafter(BBJ_NONE, block, true);
+
+ remainderBlock->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL | propagateFlags;
+
+ // These blocks are only internal if 'block' is (but they've been set as internal by fgNewBBafter).
+ // If they're not internal, mark them as imported to avoid asserts about un-imported blocks.
+ if ((block->bbFlags & BBF_INTERNAL) == 0)
+ {
+ helperBlock->bbFlags &= ~BBF_INTERNAL;
+ cond2Block->bbFlags &= ~BBF_INTERNAL;
+ cond1Block->bbFlags &= ~BBF_INTERNAL;
+ asgBlock->bbFlags &= ~BBF_INTERNAL;
+ helperBlock->bbFlags |= BBF_IMPORTED;
+ cond2Block->bbFlags |= BBF_IMPORTED;
+ cond1Block->bbFlags |= BBF_IMPORTED;
+ asgBlock->bbFlags |= BBF_IMPORTED;
+ }
+
+ // Chain the flow correctly.
+ fgAddRefPred(asgBlock, block);
+ fgAddRefPred(cond1Block, asgBlock);
+ fgAddRefPred(cond2Block, cond1Block);
+ fgAddRefPred(helperBlock, cond2Block);
+ fgAddRefPred(remainderBlock, helperBlock);
+ fgAddRefPred(remainderBlock, cond1Block);
+ fgAddRefPred(remainderBlock, cond2Block);
+
+ cond1Block->bbJumpDest = remainderBlock;
+ cond2Block->bbJumpDest = remainderBlock;
+
+ // Set the weights; some are guesses.
+ asgBlock->inheritWeight(block);
+ cond1Block->inheritWeight(block);
+ cond2Block->inheritWeightPercentage(cond1Block, 50);
+ helperBlock->inheritWeightPercentage(cond2Block, 50);
+
+ // Append cond1 as JTRUE to cond1Block
+ GenTreePtr jmpTree = gtNewOperNode(GT_JTRUE, TYP_VOID, condExpr);
+ GenTreePtr jmpStmt = fgNewStmtFromTree(jmpTree, stmt->gtStmt.gtStmtILoffsx);
+ fgInsertStmtAtEnd(cond1Block, jmpStmt);
+
+ // Append cond2 as JTRUE to cond2Block
+ jmpTree = gtNewOperNode(GT_JTRUE, TYP_VOID, cond2Expr);
+ jmpStmt = fgNewStmtFromTree(jmpTree, stmt->gtStmt.gtStmtILoffsx);
+ fgInsertStmtAtEnd(cond2Block, jmpStmt);
+
+ // AsgBlock should get tmp = op1 assignment.
+ trueExpr = gtNewTempAssign(dst->AsLclVarCommon()->GetLclNum(), trueExpr);
+ GenTreePtr trueStmt = fgNewStmtFromTree(trueExpr, stmt->gtStmt.gtStmtILoffsx);
+ fgInsertStmtAtEnd(asgBlock, trueStmt);
+
+ // Since we are adding helper in the JTRUE false path, reverse the cond2 and add the helper.
+ gtReverseCond(cond2Expr);
+ GenTreePtr helperExpr = gtNewTempAssign(dst->AsLclVarCommon()->GetLclNum(), true2Expr);
+ GenTreePtr helperStmt = fgNewStmtFromTree(helperExpr, stmt->gtStmt.gtStmtILoffsx);
+ fgInsertStmtAtEnd(helperBlock, helperStmt);
+
+ // Finally remove the nested qmark stmt.
+ fgRemoveStmt(block, stmt);
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nExpanding CastInstOf qmark in BB%02u (after)\n", block->bbNum);
+ fgDispBasicBlocks(block, remainderBlock, true);
+ }
+#endif // DEBUG
+}
+
+/*****************************************************************************
+ *
+ * Expand a statement with a top level qmark node. There are three cases, based
+ * on whether the qmark has both "true" and "false" arms, or just one of them.
+ *
+ * S0;
+ * C ? T : F;
+ * S1;
+ *
+ * Generates ===>
+ *
+ * bbj_always
+ * +---->------+
+ * false | |
+ * S0 -->-- ~C -->-- T F -->-- S1
+ * | |
+ * +--->--------+
+ * bbj_cond(true)
+ *
+ * -----------------------------------------
+ *
+ * S0;
+ * C ? T : NOP;
+ * S1;
+ *
+ * Generates ===>
+ *
+ * false
+ * S0 -->-- ~C -->-- T -->-- S1
+ * | |
+ * +-->-------------+
+ * bbj_cond(true)
+ *
+ * -----------------------------------------
+ *
+ * S0;
+ * C ? NOP : F;
+ * S1;
+ *
+ * Generates ===>
+ *
+ * false
+ * S0 -->-- C -->-- F -->-- S1
+ * | |
+ * +-->------------+
+ * bbj_cond(true)
+ *
+ * If the qmark assigns to a variable, then create tmps for "then"
+ * and "else" results and assign the temp to the variable as a writeback step.
+ */
+void Compiler::fgExpandQmarkStmt(BasicBlock* block, GenTreePtr stmt)
+{
+ GenTreePtr expr = stmt->gtStmt.gtStmtExpr;
+
+ // Retrieve the Qmark node to be expanded.
+ GenTreePtr dst = nullptr;
+ GenTreePtr qmark = fgGetTopLevelQmark(expr, &dst);
+ if (qmark == nullptr)
+ {
+ return;
+ }
+
+ if (qmark->gtFlags & GTF_QMARK_CAST_INSTOF)
+ {
+ fgExpandQmarkForCastInstOf(block, stmt);
+ return;
+ }
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nExpanding top-level qmark in BB%02u (before)\n", block->bbNum);
+ fgDispBasicBlocks(block, block, true);
+ }
+#endif // DEBUG
+
+ // Retrieve the operands.
+ GenTreePtr condExpr = qmark->gtGetOp1();
+ GenTreePtr trueExpr = qmark->gtGetOp2()->AsColon()->ThenNode();
+ GenTreePtr falseExpr = qmark->gtGetOp2()->AsColon()->ElseNode();
+
+ assert(condExpr->gtFlags & GTF_RELOP_QMARK);
+ condExpr->gtFlags &= ~GTF_RELOP_QMARK;
+
+ assert(!varTypeIsFloating(condExpr->TypeGet()));
+
+ bool hasTrueExpr = (trueExpr->OperGet() != GT_NOP);
+ bool hasFalseExpr = (falseExpr->OperGet() != GT_NOP);
+ assert(hasTrueExpr || hasFalseExpr); // We expect to have at least one arm of the qmark!
+
+ // Create remainder, cond and "else" blocks. After this, the blocks are in this order:
+ // block ... condBlock ... elseBlock ... remainderBlock
+ //
+ // We need to remember flags that exist on 'block' that we want to propagate to 'remainderBlock',
+ // if they are going to be cleared by fgSplitBlockAfterStatement(). We currently only do this only
+ // for the GC safe point bit, the logic being that if 'block' was marked gcsafe, then surely
+ // remainderBlock will still be GC safe.
+ unsigned propagateFlags = block->bbFlags & BBF_GC_SAFE_POINT;
+ BasicBlock* remainderBlock = fgSplitBlockAfterStatement(block, stmt);
+ fgRemoveRefPred(remainderBlock, block); // We're going to put more blocks between block and remainderBlock.
+
+ BasicBlock* condBlock = fgNewBBafter(BBJ_COND, block, true);
+ BasicBlock* elseBlock = fgNewBBafter(BBJ_NONE, condBlock, true);
+
+ // These blocks are only internal if 'block' is (but they've been set as internal by fgNewBBafter).
+ // If they're not internal, mark them as imported to avoid asserts about un-imported blocks.
+ if ((block->bbFlags & BBF_INTERNAL) == 0)
+ {
+ condBlock->bbFlags &= ~BBF_INTERNAL;
+ elseBlock->bbFlags &= ~BBF_INTERNAL;
+ condBlock->bbFlags |= BBF_IMPORTED;
+ elseBlock->bbFlags |= BBF_IMPORTED;
+ }
+
+ remainderBlock->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL | propagateFlags;
+
+ condBlock->inheritWeight(block);
+
+ fgAddRefPred(condBlock, block);
+ fgAddRefPred(elseBlock, condBlock);
+ fgAddRefPred(remainderBlock, elseBlock);
+
+ BasicBlock* thenBlock = nullptr;
+ if (hasTrueExpr && hasFalseExpr)
+ {
+ // bbj_always
+ // +---->------+
+ // false | |
+ // S0 -->-- ~C -->-- T F -->-- S1
+ // | |
+ // +--->--------+
+ // bbj_cond(true)
+ //
+ gtReverseCond(condExpr);
+ condBlock->bbJumpDest = elseBlock;
+
+ thenBlock = fgNewBBafter(BBJ_ALWAYS, condBlock, true);
+ thenBlock->bbJumpDest = remainderBlock;
+ if ((block->bbFlags & BBF_INTERNAL) == 0)
+ {
+ thenBlock->bbFlags &= ~BBF_INTERNAL;
+ thenBlock->bbFlags |= BBF_IMPORTED;
+ }
+
+ elseBlock->bbFlags |= (BBF_JMP_TARGET | BBF_HAS_LABEL);
+
+ fgAddRefPred(thenBlock, condBlock);
+ fgAddRefPred(remainderBlock, thenBlock);
+
+ thenBlock->inheritWeightPercentage(condBlock, 50);
+ elseBlock->inheritWeightPercentage(condBlock, 50);
+ }
+ else if (hasTrueExpr)
+ {
+ // false
+ // S0 -->-- ~C -->-- T -->-- S1
+ // | |
+ // +-->-------------+
+ // bbj_cond(true)
+ //
+ gtReverseCond(condExpr);
+ condBlock->bbJumpDest = remainderBlock;
+ fgAddRefPred(remainderBlock, condBlock);
+ // Since we have no false expr, use the one we'd already created.
+ thenBlock = elseBlock;
+ elseBlock = nullptr;
+
+ thenBlock->inheritWeightPercentage(condBlock, 50);
+ }
+ else if (hasFalseExpr)
+ {
+ // false
+ // S0 -->-- C -->-- F -->-- S1
+ // | |
+ // +-->------------+
+ // bbj_cond(true)
+ //
+ condBlock->bbJumpDest = remainderBlock;
+ fgAddRefPred(remainderBlock, condBlock);
+
+ elseBlock->inheritWeightPercentage(condBlock, 50);
+ }
+
+ GenTreePtr jmpTree = gtNewOperNode(GT_JTRUE, TYP_VOID, qmark->gtGetOp1());
+ GenTreePtr jmpStmt = fgNewStmtFromTree(jmpTree, stmt->gtStmt.gtStmtILoffsx);
+ fgInsertStmtAtEnd(condBlock, jmpStmt);
+
+ // Remove the original qmark statement.
+ fgRemoveStmt(block, stmt);
+
+ // Since we have top level qmarks, we either have a dst for it in which case
+ // we need to create tmps for true and falseExprs, else just don't bother
+ // assigning.
+ unsigned lclNum = BAD_VAR_NUM;
+ if (dst != nullptr)
+ {
+ assert(dst->gtOper == GT_LCL_VAR);
+ lclNum = dst->gtLclVar.gtLclNum;
+ }
+ else
+ {
+ assert(qmark->TypeGet() == TYP_VOID);
+ }
+
+ if (hasTrueExpr)
+ {
+ if (dst != nullptr)
+ {
+ trueExpr = gtNewTempAssign(lclNum, trueExpr);
+ }
+ GenTreePtr trueStmt = fgNewStmtFromTree(trueExpr, stmt->gtStmt.gtStmtILoffsx);
+ fgInsertStmtAtEnd(thenBlock, trueStmt);
+ }
+
+ // Assign the falseExpr into the dst or tmp, insert in elseBlock
+ if (hasFalseExpr)
+ {
+ if (dst != nullptr)
+ {
+ falseExpr = gtNewTempAssign(lclNum, falseExpr);
+ }
+ GenTreePtr falseStmt = fgNewStmtFromTree(falseExpr, stmt->gtStmt.gtStmtILoffsx);
+ fgInsertStmtAtEnd(elseBlock, falseStmt);
+ }
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nExpanding top-level qmark in BB%02u (after)\n", block->bbNum);
+ fgDispBasicBlocks(block, remainderBlock, true);
+ }
+#endif // DEBUG
+}
+
+/*****************************************************************************
+ *
+ * Expand GT_QMARK nodes from the flow graph into basic blocks.
+ *
+ */
+
+void Compiler::fgExpandQmarkNodes()
+{
+ if (compQmarkUsed)
+ {
+ for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
+ {
+ for (GenTreePtr stmt = block->bbTreeList; stmt; stmt = stmt->gtNext)
+ {
+ GenTreePtr expr = stmt->gtStmt.gtStmtExpr;
+#ifdef DEBUG
+ fgPreExpandQmarkChecks(expr);
+#endif
+ fgExpandQmarkStmt(block, stmt);
+ }
+ }
+#ifdef DEBUG
+ fgPostExpandQmarkChecks();
+#endif
+ }
+ compQmarkRationalized = true;
+}
+
+#ifdef DEBUG
+/*****************************************************************************
+ *
+ * Make sure we don't have any more GT_QMARK nodes.
+ *
+ */
+void Compiler::fgPostExpandQmarkChecks()
+{
+ for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
+ {
+ for (GenTreePtr stmt = block->bbTreeList; stmt; stmt = stmt->gtNext)
+ {
+ GenTreePtr expr = stmt->gtStmt.gtStmtExpr;
+ fgWalkTreePre(&expr, Compiler::fgAssertNoQmark, nullptr);
+ }
+ }
+}
+#endif
+
+/*****************************************************************************
+ *
+ * Transform all basic blocks for codegen.
+ */
+
+void Compiler::fgMorph()
+{
+ noway_assert(!compIsForInlining()); // Inlinee's compiler should never reach here.
+
+ fgOutgoingArgTemps = nullptr;
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("*************** In fgMorph()\n");
+ }
+ if (verboseTrees)
+ {
+ fgDispBasicBlocks(true);
+ }
+#endif // DEBUG
+
+ // Insert call to class constructor as the first basic block if
+ // we were asked to do so.
+ if (info.compCompHnd->initClass(nullptr /* field */, info.compMethodHnd /* method */,
+ impTokenLookupContextHandle /* context */) &
+ CORINFO_INITCLASS_USE_HELPER)
+ {
+ fgEnsureFirstBBisScratch();
+ fgInsertStmtAtBeg(fgFirstBB, fgInitThisClass());
+ }
+
+#ifdef DEBUG
+ if (opts.compGcChecks)
+ {
+ for (unsigned i = 0; i < info.compArgsCount; i++)
+ {
+ if (lvaTable[i].TypeGet() == TYP_REF)
+ {
+ // confirm that the argument is a GC pointer (for debugging (GC stress))
+ GenTreePtr op = gtNewLclvNode(i, TYP_REF);
+ GenTreeArgList* args = gtNewArgList(op);
+ op = gtNewHelperCallNode(CORINFO_HELP_CHECK_OBJ, TYP_VOID, 0, args);
+
+ fgEnsureFirstBBisScratch();
+ fgInsertStmtAtEnd(fgFirstBB, op);
+ }
+ }
+ }
+
+ if (opts.compStackCheckOnRet)
+ {
+ lvaReturnEspCheck = lvaGrabTempWithImplicitUse(false DEBUGARG("ReturnEspCheck"));
+ lvaTable[lvaReturnEspCheck].lvType = TYP_INT;
+ }
+
+ if (opts.compStackCheckOnCall)
+ {
+ lvaCallEspCheck = lvaGrabTempWithImplicitUse(false DEBUGARG("CallEspCheck"));
+ lvaTable[lvaCallEspCheck].lvType = TYP_INT;
+ }
+#endif // DEBUG
+
+ /* Filter out unimported BBs */
+
+ fgRemoveEmptyBlocks();
+
+ /* Add any internal blocks/trees we may need */
+
+ fgAddInternal();
+
+#if OPT_BOOL_OPS
+ fgMultipleNots = false;
+#endif
+
+#ifdef DEBUG
+ /* Inliner could add basic blocks. Check that the flowgraph data is up-to-date */
+ fgDebugCheckBBlist(false, false);
+#endif // DEBUG
+
+ /* Inline */
+ fgInline();
+#if 0
+ JITDUMP("trees after inlining\n");
+ DBEXEC(VERBOSE, fgDispBasicBlocks(true));
+#endif
+
+ RecordStateAtEndOfInlining(); // Record "start" values for post-inlining cycles and elapsed time.
+
+#ifdef DEBUG
+ /* Inliner could add basic blocks. Check that the flowgraph data is up-to-date */
+ fgDebugCheckBBlist(false, false);
+#endif // DEBUG
+
+ /* For x64 and ARM64 we need to mark irregular parameters early so that they don't get promoted */
+ fgMarkImplicitByRefArgs();
+
+ /* Promote struct locals if necessary */
+ fgPromoteStructs();
+
+ /* Now it is the time to figure out what locals have address-taken. */
+ fgMarkAddressExposedLocals();
+
+#ifdef DEBUG
+ /* Now that locals have address-taken marked, we can safely apply stress. */
+ lvaStressLclFld();
+ fgStress64RsltMul();
+#endif // DEBUG
+
+ /* Morph the trees in all the blocks of the method */
+
+ fgMorphBlocks();
+
+#if 0
+ JITDUMP("trees after fgMorphBlocks\n");
+ DBEXEC(VERBOSE, fgDispBasicBlocks(true));
+#endif
+
+ /* Decide the kind of code we want to generate */
+
+ fgSetOptions();
+
+ fgExpandQmarkNodes();
+
+#ifdef DEBUG
+ compCurBB = nullptr;
+#endif // DEBUG
+}
+
+/*****************************************************************************
+ *
+ * Promoting struct locals
+ */
+void Compiler::fgPromoteStructs()
+{
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("*************** In fgPromoteStructs()\n");
+ }
+#endif // DEBUG
+
+ if (!opts.OptEnabled(CLFLG_STRUCTPROMOTE))
+ {
+ return;
+ }
+
+ if (fgNoStructPromotion)
+ {
+ return;
+ }
+
+#if 0
+ // The code in this #if has been useful in debugging struct promotion issues, by
+ // enabling selective enablement of the struct promotion optimization according to
+ // method hash.
+#ifdef DEBUG
+ unsigned methHash = info.compMethodHash();
+ char* lostr = getenv("structpromohashlo");
+ unsigned methHashLo = 0;
+ if (lostr != NULL)
+ {
+ sscanf_s(lostr, "%x", &methHashLo);
+ }
+ char* histr = getenv("structpromohashhi");
+ unsigned methHashHi = UINT32_MAX;
+ if (histr != NULL)
+ {
+ sscanf_s(histr, "%x", &methHashHi);
+ }
+ if (methHash < methHashLo || methHash > methHashHi)
+ {
+ return;
+ }
+ else
+ {
+ printf("Promoting structs for method %s, hash = 0x%x.\n",
+ info.compFullName, info.compMethodHash());
+ printf(""); // in our logic this causes a flush
+ }
+#endif // DEBUG
+#endif // 0
+
+ if (info.compIsVarArgs)
+ {
+ return;
+ }
+
+ if (getNeedsGSSecurityCookie())
+ {
+ return;
+ }
+
+ // The lvaTable might grow as we grab temps. Make a local copy here.
+
+ unsigned startLvaCount = lvaCount;
+
+ //
+ // Loop through the original lvaTable. Looking for struct locals to be promoted.
+ //
+
+ lvaStructPromotionInfo structPromotionInfo;
+ bool tooManyLocals = false;
+
+ for (unsigned lclNum = 0; lclNum < startLvaCount; lclNum++)
+ {
+ // Whether this var got promoted
+ bool promotedVar = false;
+ LclVarDsc* varDsc = &lvaTable[lclNum];
+
+#ifdef FEATURE_SIMD
+ if (varDsc->lvSIMDType && varDsc->lvUsedInSIMDIntrinsic)
+ {
+ // If we have marked this as lvUsedInSIMDIntrinsic, then we do not want to promote
+ // its fields. Instead, we will attempt to enregister the entire struct.
+ varDsc->lvRegStruct = true;
+ }
+ else
+#endif // FEATURE_SIMD
+ // Don't promote if we have reached the tracking limit.
+ if (lvaHaveManyLocals())
+ {
+ // Print the message first time when we detected this condition
+ if (!tooManyLocals)
+ {
+ JITDUMP("Stopped promoting struct fields, due to too many locals.\n");
+ }
+ tooManyLocals = true;
+ }
+#if !FEATURE_MULTIREG_STRUCT_PROMOTE
+ else if (varDsc->lvIsMultiRegArg)
+ {
+ JITDUMP("Skipping V%02u: marked lvIsMultiRegArg.\n", lclNum);
+ }
+#endif // !FEATURE_MULTIREG_STRUCT_PROMOTE
+ else if (varDsc->lvIsMultiRegRet)
+ {
+ JITDUMP("Skipping V%02u: marked lvIsMultiRegRet.\n", lclNum);
+ }
+ else if (varTypeIsStruct(varDsc))
+ {
+ lvaCanPromoteStructVar(lclNum, &structPromotionInfo);
+ bool canPromote = structPromotionInfo.canPromote;
+
+ // We start off with shouldPromote same as canPromote.
+ // Based on further profitablity checks done below, shouldPromote
+ // could be set to false.
+ bool shouldPromote = canPromote;
+
+ if (canPromote)
+ {
+
+ // We *can* promote; *should* we promote?
+ // We should only do so if promotion has potential savings. One source of savings
+ // is if a field of the struct is accessed, since this access will be turned into
+ // an access of the corresponding promoted field variable. Even if there are no
+ // field accesses, but only block-level operations on the whole struct, if the struct
+ // has only one or two fields, then doing those block operations field-wise is probably faster
+ // than doing a whole-variable block operation (e.g., a hardware "copy loop" on x86).
+ // So if no fields are accessed independently, and there are three or more fields,
+ // then do not promote.
+ if (structPromotionInfo.fieldCnt > 2 && !varDsc->lvFieldAccessed)
+ {
+ JITDUMP("Not promoting promotable struct local V%02u: #fields = %d, fieldAccessed = %d.\n", lclNum,
+ structPromotionInfo.fieldCnt, varDsc->lvFieldAccessed);
+ shouldPromote = false;
+ }
+#if defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_)
+ // TODO-PERF - Only do this when the LclVar is used in an argument context
+ // TODO-ARM64 - HFA support should also eliminate the need for this.
+ // TODO-LSRA - Currently doesn't support the passing of floating point LCL_VARS in the integer registers
+ //
+ // For now we currently don't promote structs with a single float field
+ // Promoting it can cause us to shuffle it back and forth between the int and
+ // the float regs when it is used as a argument, which is very expensive for XARCH
+ //
+ else if ((structPromotionInfo.fieldCnt == 1) &&
+ varTypeIsFloating(structPromotionInfo.fields[0].fldType))
+ {
+ JITDUMP("Not promoting promotable struct local V%02u: #fields = %d because it is a struct with "
+ "single float field.\n",
+ lclNum, structPromotionInfo.fieldCnt);
+ shouldPromote = false;
+ }
+#endif // _TARGET_AMD64_ || _TARGET_ARM64_
+
+#if !FEATURE_MULTIREG_STRUCT_PROMOTE
+#if defined(_TARGET_ARM64_)
+ //
+ // For now we currently don't promote structs that are passed in registers
+ //
+ else if (lvaIsMultiregStruct(varDsc))
+ {
+ JITDUMP("Not promoting promotable multireg struct local V%02u (size==%d): ", lclNum,
+ lvaLclExactSize(lclNum));
+ shouldPromote = false;
+ }
+#endif // _TARGET_ARM64_
+#endif // !FEATURE_MULTIREG_STRUCT_PROMOTE
+ else if (varDsc->lvIsParam)
+ {
+#if FEATURE_MULTIREG_STRUCT_PROMOTE
+ if (lvaIsMultiregStruct(
+ varDsc) && // Is this a variable holding a value that is passed in multiple registers?
+ (structPromotionInfo.fieldCnt != 2)) // Does it have exactly two fields
+ {
+ JITDUMP(
+ "Not promoting multireg struct local V%02u, because lvIsParam is true and #fields != 2\n",
+ lclNum);
+ shouldPromote = false;
+ }
+ else
+#endif // !FEATURE_MULTIREG_STRUCT_PROMOTE
+
+ // TODO-PERF - Implement struct promotion for incoming multireg structs
+ // Currently it hits assert(lvFieldCnt==1) in lclvar.cpp line 4417
+
+ if (structPromotionInfo.fieldCnt != 1)
+ {
+ JITDUMP("Not promoting promotable struct local V%02u, because lvIsParam is true and #fields = "
+ "%d.\n",
+ lclNum, structPromotionInfo.fieldCnt);
+ shouldPromote = false;
+ }
+ }
+
+ //
+ // If the lvRefCnt is zero and we have a struct promoted parameter we can end up with an extra store of
+ // the the incoming register into the stack frame slot.
+ // In that case, we would like to avoid promortion.
+ // However we haven't yet computed the lvRefCnt values so we can't do that.
+ //
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if 0
+ // Often-useful debugging code: if you've narrowed down a struct-promotion problem to a single
+ // method, this allows you to select a subset of the vars to promote (by 1-based ordinal number).
+ static int structPromoVarNum = 0;
+ structPromoVarNum++;
+ if (atoi(getenv("structpromovarnumlo")) <= structPromoVarNum && structPromoVarNum <= atoi(getenv("structpromovarnumhi")))
+#endif // 0
+
+ if (shouldPromote)
+ {
+ assert(canPromote);
+
+ // Promote the this struct local var.
+ lvaPromoteStructVar(lclNum, &structPromotionInfo);
+ promotedVar = true;
+
+#ifdef _TARGET_ARM_
+ if (structPromotionInfo.requiresScratchVar)
+ {
+ // Ensure that the scratch variable is allocated, in case we
+ // pass a promoted struct as an argument.
+ if (lvaPromotedStructAssemblyScratchVar == BAD_VAR_NUM)
+ {
+ lvaPromotedStructAssemblyScratchVar =
+ lvaGrabTempWithImplicitUse(false DEBUGARG("promoted struct assembly scratch var."));
+ lvaTable[lvaPromotedStructAssemblyScratchVar].lvType = TYP_I_IMPL;
+ }
+ }
+#endif // _TARGET_ARM_
+ }
+ }
+ }
+
+#ifdef FEATURE_SIMD
+ if (!promotedVar && varDsc->lvSIMDType && !varDsc->lvFieldAccessed)
+ {
+ // Even if we have not used this in a SIMD intrinsic, if it is not being promoted,
+ // we will treat it as a reg struct.
+ varDsc->lvRegStruct = true;
+ }
+#endif // FEATURE_SIMD
+ }
+}
+
+Compiler::fgWalkResult Compiler::fgMorphStructField(GenTreePtr tree, fgWalkData* fgWalkPre)
+{
+ noway_assert(tree->OperGet() == GT_FIELD);
+ noway_assert(tree->gtFlags & GTF_GLOB_REF);
+
+ GenTreePtr objRef = tree->gtField.gtFldObj;
+
+ /* Is this an instance data member? */
+
+ if (objRef)
+ {
+ if (objRef->gtOper == GT_ADDR)
+ {
+ GenTreePtr obj = objRef->gtOp.gtOp1;
+
+ if (obj->gtOper == GT_LCL_VAR)
+ {
+ unsigned lclNum = obj->gtLclVarCommon.gtLclNum;
+ LclVarDsc* varDsc = &lvaTable[lclNum];
+
+ if (varTypeIsStruct(obj))
+ {
+ if (varDsc->lvPromoted)
+ {
+ // Promoted struct
+ unsigned fldOffset = tree->gtField.gtFldOffset;
+ unsigned fieldLclIndex = lvaGetFieldLocal(varDsc, fldOffset);
+ noway_assert(fieldLclIndex != BAD_VAR_NUM);
+
+ tree->SetOper(GT_LCL_VAR);
+ tree->gtLclVarCommon.SetLclNum(fieldLclIndex);
+ tree->gtType = lvaTable[fieldLclIndex].TypeGet();
+ tree->gtFlags &= GTF_NODE_MASK;
+ tree->gtFlags &= ~GTF_GLOB_REF;
+
+ GenTreePtr parent = fgWalkPre->parentStack->Index(1);
+ if ((parent->gtOper == GT_ASG) && (parent->gtOp.gtOp1 == tree))
+ {
+ tree->gtFlags |= GTF_VAR_DEF;
+ tree->gtFlags |= GTF_DONT_CSE;
+ }
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Replacing the field in promoted struct with a local var:\n");
+ fgWalkPre->printModified = true;
+ }
+#endif // DEBUG
+ return WALK_SKIP_SUBTREES;
+ }
+ }
+ else
+ {
+ // Normed struct
+ // A "normed struct" is a struct that the VM tells us is a basic type. This can only happen if
+ // the struct contains a single element, and that element is 4 bytes (on x64 it can also be 8
+ // bytes). Normally, the type of the local var and the type of GT_FIELD are equivalent. However,
+ // there is one extremely rare case where that won't be true. An enum type is a special value type
+ // that contains exactly one element of a primitive integer type (that, for CLS programs is named
+ // "value__"). The VM tells us that a local var of that enum type is the primitive type of the
+ // enum's single field. It turns out that it is legal for IL to access this field using ldflda or
+ // ldfld. For example:
+ //
+ // .class public auto ansi sealed mynamespace.e_t extends [mscorlib]System.Enum
+ // {
+ // .field public specialname rtspecialname int16 value__
+ // .field public static literal valuetype mynamespace.e_t one = int16(0x0000)
+ // }
+ // .method public hidebysig static void Main() cil managed
+ // {
+ // .locals init (valuetype mynamespace.e_t V_0)
+ // ...
+ // ldloca.s V_0
+ // ldflda int16 mynamespace.e_t::value__
+ // ...
+ // }
+ //
+ // Normally, compilers will not generate the ldflda, since it is superfluous.
+ //
+ // In the example, the lclVar is short, but the JIT promotes all trees using this local to the
+ // "actual type", that is, INT. But the GT_FIELD is still SHORT. So, in the case of a type
+ // mismatch like this, don't do this morphing. The local var may end up getting marked as
+ // address taken, and the appropriate SHORT load will be done from memory in that case.
+
+ if (tree->TypeGet() == obj->TypeGet())
+ {
+ tree->ChangeOper(GT_LCL_VAR);
+ tree->gtLclVarCommon.SetLclNum(lclNum);
+ tree->gtFlags &= GTF_NODE_MASK;
+
+ GenTreePtr parent = fgWalkPre->parentStack->Index(1);
+ if ((parent->gtOper == GT_ASG) && (parent->gtOp.gtOp1 == tree))
+ {
+ tree->gtFlags |= GTF_VAR_DEF;
+ tree->gtFlags |= GTF_DONT_CSE;
+ }
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Replacing the field in normed struct with the local var:\n");
+ fgWalkPre->printModified = true;
+ }
+#endif // DEBUG
+ return WALK_SKIP_SUBTREES;
+ }
+ }
+ }
+ }
+ }
+
+ return WALK_CONTINUE;
+}
+
+Compiler::fgWalkResult Compiler::fgMorphLocalField(GenTreePtr tree, fgWalkData* fgWalkPre)
+{
+ noway_assert(tree->OperGet() == GT_LCL_FLD);
+
+ unsigned lclNum = tree->gtLclFld.gtLclNum;
+ LclVarDsc* varDsc = &lvaTable[lclNum];
+
+ if (varTypeIsStruct(varDsc) && (varDsc->lvPromoted))
+ {
+ // Promoted struct
+ unsigned fldOffset = tree->gtLclFld.gtLclOffs;
+ unsigned fieldLclIndex = 0;
+ LclVarDsc* fldVarDsc = nullptr;
+
+ if (fldOffset != BAD_VAR_NUM)
+ {
+ fieldLclIndex = lvaGetFieldLocal(varDsc, fldOffset);
+ noway_assert(fieldLclIndex != BAD_VAR_NUM);
+ fldVarDsc = &lvaTable[fieldLclIndex];
+ }
+
+ if (fldOffset != BAD_VAR_NUM && genTypeSize(fldVarDsc->TypeGet()) == genTypeSize(tree->gtType)
+#ifdef _TARGET_X86_
+ && varTypeIsFloating(fldVarDsc->TypeGet()) == varTypeIsFloating(tree->gtType)
+#endif
+ )
+ {
+ // There is an existing sub-field we can use
+ tree->gtLclFld.SetLclNum(fieldLclIndex);
+
+ // We need to keep the types 'compatible'. If we can switch back to a GT_LCL_VAR
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef _TARGET_ARM_
+ assert(varTypeIsIntegralOrI(tree->TypeGet()) || varTypeIsFloating(tree->TypeGet()));
+#else
+ assert(varTypeIsIntegralOrI(tree->TypeGet()));
+#endif
+ if (varTypeCanReg(fldVarDsc->TypeGet()))
+ {
+ // If the type is integer-ish, then we can use it as-is
+ tree->ChangeOper(GT_LCL_VAR);
+ assert(tree->gtLclVarCommon.gtLclNum == fieldLclIndex);
+ tree->gtType = fldVarDsc->TypeGet();
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Replacing the GT_LCL_FLD in promoted struct with a local var:\n");
+ fgWalkPre->printModified = true;
+ }
+#endif // DEBUG
+ }
+
+ GenTreePtr parent = fgWalkPre->parentStack->Index(1);
+ if ((parent->gtOper == GT_ASG) && (parent->gtOp.gtOp1 == tree))
+ {
+ tree->gtFlags |= GTF_VAR_DEF;
+ tree->gtFlags |= GTF_DONT_CSE;
+ }
+ }
+ else
+ {
+ // There is no existing field that has all the parts that we need
+ // So we must ensure that the struct lives in memory.
+ lvaSetVarDoNotEnregister(lclNum DEBUGARG(DNER_LocalField));
+
+#ifdef DEBUG
+ // We can't convert this guy to a float because he really does have his
+ // address taken..
+ varDsc->lvKeepType = 1;
+#endif // DEBUG
+ }
+
+ return WALK_SKIP_SUBTREES;
+ }
+
+ return WALK_CONTINUE;
+}
+
+/*****************************************************************************
+ *
+ * Mark irregular parameters. For x64 this is 3, 5, 6, 7, >8 byte structs that are passed by reference.
+ * For ARM64, this is structs larger than 16 bytes that are also not HFAs that are passed by reference.
+ */
+void Compiler::fgMarkImplicitByRefArgs()
+{
+#if defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_)
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\n*************** In fgMarkImplicitByRefs()\n");
+ }
+#endif // DEBUG
+
+ for (unsigned lclNum = 0; lclNum < lvaCount; lclNum++)
+ {
+ LclVarDsc* varDsc = &lvaTable[lclNum];
+
+ assert(!varDsc->lvPromoted); // Called in the wrong order?
+
+ if (varDsc->lvIsParam && varTypeIsStruct(varDsc))
+ {
+ size_t size;
+
+ if (varDsc->lvSize() > REGSIZE_BYTES)
+ {
+ size = varDsc->lvSize();
+ }
+ else
+ {
+ CORINFO_CLASS_HANDLE typeHnd = varDsc->lvVerTypeInfo.GetClassHandle();
+ size = info.compCompHnd->getClassSize(typeHnd);
+ }
+
+#if !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+#if defined(_TARGET_AMD64_)
+ if (size > REGSIZE_BYTES || (size & (size - 1)) != 0)
+#elif defined(_TARGET_ARM64_)
+ if ((size > TARGET_POINTER_SIZE) && !lvaIsMultiregStruct(varDsc))
+#endif
+ {
+ // Previously nobody was ever setting lvIsParam and lvIsTemp on the same local
+ // So I am now using it to indicate that this is one of the weird implicit
+ // by ref locals.
+ // The address taken cleanup will look for references to locals marked like
+ // this, and transform them appropriately.
+ varDsc->lvIsTemp = 1;
+
+ // Also marking them as BYREF will hide them from struct promotion.
+ varDsc->lvType = TYP_BYREF;
+ varDsc->lvRefCnt = 0;
+
+ // Since this previously was a TYP_STRUCT and we have changed it to a TYP_BYREF
+ // make sure that the following flag is not set as these will force SSA to
+ // exclude tracking/enregistering these LclVars. (see fgExcludeFromSsa)
+ //
+ varDsc->lvOverlappingFields = 0; // This flag could have been set, clear it.
+
+#ifdef DEBUG
+ // This should not be converted to a double in stress mode,
+ // because it is really a pointer
+ varDsc->lvKeepType = 1;
+
+ if (verbose)
+ {
+ printf("Changing the lvType for struct parameter V%02d to TYP_BYREF.\n", lclNum);
+ }
+#endif // DEBUG
+ }
+#endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING
+ }
+ }
+
+#endif // _TARGET_AMD64_ || _TARGET_ARM64_
+}
+
+/*****************************************************************************
+ *
+ * Morph irregular parameters
+ * for x64 and ARM64 this means turning them into byrefs, adding extra indirs.
+ */
+bool Compiler::fgMorphImplicitByRefArgs(GenTreePtr* pTree, fgWalkData* fgWalkPre)
+{
+#if !defined(_TARGET_AMD64_) && !defined(_TARGET_ARM64_)
+
+ return false;
+
+#else // _TARGET_AMD64_ || _TARGET_ARM64_
+
+ GenTree* tree = *pTree;
+ assert((tree->gtOper == GT_LCL_VAR) || ((tree->gtOper == GT_ADDR) && (tree->gtOp.gtOp1->gtOper == GT_LCL_VAR)));
+
+ bool isAddr = (tree->gtOper == GT_ADDR);
+ GenTreePtr lclVarTree = isAddr ? tree->gtOp.gtOp1 : tree;
+ unsigned lclNum = lclVarTree->gtLclVarCommon.gtLclNum;
+ LclVarDsc* lclVarDsc = &lvaTable[lclNum];
+
+ if (!lvaIsImplicitByRefLocal(lclNum))
+ {
+ // We only need to tranform the 'marked' implicit by ref parameters
+ return false;
+ }
+
+ // The SIMD transformation to coalesce contiguous references to SIMD vector fields will
+ // re-invoke the traversal to mark address-taken locals.
+ // So, we may encounter a tree that has already been transformed to TYP_BYREF.
+ // If we do, leave it as-is.
+ if (!varTypeIsStruct(lclVarTree))
+ {
+ assert(lclVarTree->TypeGet() == TYP_BYREF);
+ return false;
+ }
+
+ // We are overloading the lvRefCnt field here because real ref counts have not been set.
+ lclVarDsc->lvRefCnt++;
+
+ // This is no longer a def of the lclVar, even if it WAS a def of the struct.
+ lclVarTree->gtFlags &= ~(GTF_LIVENESS_MASK);
+
+ if (isAddr)
+ {
+ // change &X into just plain X
+ tree->CopyFrom(lclVarTree, this);
+ tree->gtType = TYP_BYREF;
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Replacing address of implicit by ref struct parameter with byref:\n");
+ fgWalkPre->printModified = true;
+ }
+#endif // DEBUG
+ }
+ else
+ {
+ // Change X into OBJ(X)
+ var_types structType = tree->gtType;
+ tree->gtType = TYP_BYREF;
+ tree = gtNewObjNode(lclVarDsc->lvVerTypeInfo.GetClassHandle(), tree);
+ if (structType == TYP_STRUCT)
+ {
+ gtSetObjGcInfo(tree->AsObj());
+ }
+
+ // TODO-CQ: If the VM ever stops violating the ABI and passing heap references
+ // we could remove TGTANYWHERE
+ tree->gtFlags = ((tree->gtFlags & GTF_COMMON_MASK) | GTF_IND_TGTANYWHERE);
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Replacing value of implicit by ref struct parameter with indir of parameter:\n");
+ gtDispTree(tree, nullptr, nullptr, true);
+ fgWalkPre->printModified = true;
+ }
+#endif // DEBUG
+ }
+
+ *pTree = tree;
+ return true;
+
+#endif // _TARGET_AMD64_ || _TARGET_ARM64_
+}
+
+// An "AddrExposedContext" expresses the calling context in which an address expression occurs.
+enum AddrExposedContext
+{
+ AXC_None, // None of the below seen yet.
+ AXC_Ind, // The address being computed is to be dereferenced.
+ AXC_Addr, // We're computing a raw address (not dereferenced, at least not immediately).
+ AXC_IndWide, // A block operation dereferenced an address referencing more bytes than the address
+ // addresses -- if the address addresses a field of a struct local, we need to consider
+ // the entire local address taken (not just the field).
+ AXC_AddrWide, // The address being computed will be dereferenced by a block operation that operates
+ // on more bytes than the width of the storage location addressed. If this is a
+ // field of a promoted struct local, declare the entire struct local address-taken.
+ AXC_InitBlk, // An GT_INITBLK is the immediate parent. The first argument is in an IND context.
+ AXC_CopyBlk, // An GT_COPYBLK is the immediate parent. The first argument is in a GT_LIST, whose
+ // args should be evaluated in an IND context.
+ AXC_IndAdd, // A GT_ADD is the immediate parent, and it was evaluated in an IND contxt.
+ // If one arg is a constant int, evaluate the other in an IND context. Otherwise, none.
+};
+
+typedef ArrayStack<AddrExposedContext> AXCStack;
+
+// We use pre-post to simulate passing an argument in a recursion, via a stack.
+Compiler::fgWalkResult Compiler::fgMarkAddrTakenLocalsPostCB(GenTreePtr* pTree, fgWalkData* fgWalkPre)
+{
+ AXCStack* axcStack = reinterpret_cast<AXCStack*>(fgWalkPre->pCallbackData);
+ (void)axcStack->Pop();
+ return WALK_CONTINUE;
+}
+
+Compiler::fgWalkResult Compiler::fgMarkAddrTakenLocalsPreCB(GenTreePtr* pTree, fgWalkData* fgWalkPre)
+{
+ GenTreePtr tree = *pTree;
+ Compiler* comp = fgWalkPre->compiler;
+ AXCStack* axcStack = reinterpret_cast<AXCStack*>(fgWalkPre->pCallbackData);
+ AddrExposedContext axc = axcStack->Top();
+
+ // In some situations, we have to figure out what the effective context is in which to
+ // evaluate the current tree, depending on which argument position it is in its parent.
+
+ switch (axc)
+ {
+
+ case AXC_IndAdd:
+ {
+ GenTreePtr parent = fgWalkPre->parentStack->Index(1);
+ assert(parent->OperGet() == GT_ADD);
+ // Is one of the args a constant representing a field offset,
+ // and is this the other? If so, Ind context.
+ if (parent->gtOp.gtOp1->IsCnsIntOrI() && parent->gtOp.gtOp2 == tree)
+ {
+ axc = AXC_Ind;
+ }
+ else if (parent->gtOp.gtOp2->IsCnsIntOrI() && parent->gtOp.gtOp1 == tree)
+ {
+ axc = AXC_Ind;
+ }
+ else
+ {
+ axc = AXC_None;
+ }
+ }
+ break;
+
+ default:
+ break;
+ }
+
+ // Now recurse properly for the tree.
+ switch (tree->gtOper)
+ {
+ case GT_IND:
+ if (axc != AXC_Addr)
+ {
+ axcStack->Push(AXC_Ind);
+ }
+ else
+ {
+ axcStack->Push(AXC_None);
+ }
+ return WALK_CONTINUE;
+
+ case GT_BLK:
+ case GT_OBJ:
+ if (axc == AXC_Addr)
+ {
+ axcStack->Push(AXC_None);
+ }
+ else if (tree->TypeGet() == TYP_STRUCT)
+ {
+ // The block operation will derefence its argument(s) -- usually. If the size of the initblk
+ // or copyblk exceeds the size of a storage location whose address is used as one of the
+ // arguments, then we have to consider that storage location (indeed, it's underlying containing
+ // location) to be address taken. So get the width of the initblk or copyblk.
+
+ GenTreePtr parent = fgWalkPre->parentStack->Index(1);
+ GenTreeBlk* blk = tree->AsBlk();
+ unsigned width = blk->gtBlkSize;
+ noway_assert(width != 0);
+ axc = AXC_Ind;
+ GenTree* addr = blk->Addr();
+ if (addr->OperGet() == GT_ADDR)
+ {
+ if (parent->gtOper == GT_ASG)
+ {
+ if ((tree == parent->gtOp.gtOp1) &&
+ ((width == 0) || !comp->fgFitsInOrNotLoc(addr->gtGetOp1(), width)))
+ {
+ axc = AXC_IndWide;
+ }
+ }
+ else
+ {
+ assert(parent->gtOper == GT_CALL);
+ }
+ }
+ axcStack->Push(axc);
+ }
+ else
+ {
+ // This is like a regular GT_IND.
+ axcStack->Push(AXC_Ind);
+ }
+ return WALK_CONTINUE;
+
+ case GT_DYN_BLK:
+ // Assume maximal width.
+ axcStack->Push(AXC_IndWide);
+ return WALK_CONTINUE;
+
+ case GT_LIST:
+ if (axc == AXC_InitBlk || axc == AXC_CopyBlk)
+ {
+ axcStack->Push(axc);
+ }
+ else
+ {
+ axcStack->Push(AXC_None);
+ }
+ return WALK_CONTINUE;
+
+ case GT_INDEX:
+ // Taking the address of an array element never takes the address of a local.
+ axcStack->Push(AXC_None);
+ return WALK_CONTINUE;
+
+ case GT_ADDR:
+ // If we have ADDR(lcl), and "lcl" is an implicit byref parameter, fgMorphImplicitByRefArgs will
+ // convert to just "lcl". This is never an address-context use, since the local is already a
+ // byref after this transformation.
+ if (tree->gtOp.gtOp1->OperGet() == GT_LCL_VAR && comp->fgMorphImplicitByRefArgs(pTree, fgWalkPre))
+ {
+ // Push something to keep the PostCB, which will pop it, happy.
+ axcStack->Push(AXC_None);
+ // In the first case, tree may no longer be a leaf, but we're done with it; is a leaf in the second
+ // case.
+ return WALK_SKIP_SUBTREES;
+ }
+#ifdef FEATURE_SIMD
+ if (tree->gtOp.gtOp1->OperGet() == GT_SIMD)
+ {
+ axcStack->Push(AXC_None);
+ }
+ else
+#endif // FEATURE_SIMD
+ if (axc == AXC_Ind)
+ {
+ axcStack->Push(AXC_None);
+ }
+ else if (axc == AXC_IndWide)
+ {
+ axcStack->Push(AXC_AddrWide);
+ }
+ else
+ {
+ assert(axc == AXC_None);
+ axcStack->Push(AXC_Addr);
+ }
+ return WALK_CONTINUE;
+
+ case GT_FIELD:
+ // First, handle a couple of special cases: field of promoted struct local, field
+ // of "normed" struct.
+ if (comp->fgMorphStructField(tree, fgWalkPre) == WALK_SKIP_SUBTREES)
+ {
+ // It (may have) replaced the field with a local var or local field. If we're in an addr context,
+ // label it addr-taken.
+ if (tree->OperIsLocal() && (axc == AXC_Addr || axc == AXC_AddrWide))
+ {
+ unsigned lclNum = tree->gtLclVarCommon.gtLclNum;
+ comp->lvaSetVarAddrExposed(lclNum);
+ if (axc == AXC_AddrWide)
+ {
+ LclVarDsc* varDsc = &comp->lvaTable[lclNum];
+ if (varDsc->lvIsStructField)
+ {
+ comp->lvaSetVarAddrExposed(varDsc->lvParentLcl);
+ }
+ }
+ }
+ // Push something to keep the PostCB, which will pop it, happy.
+ axcStack->Push(AXC_None);
+ return WALK_SKIP_SUBTREES;
+ }
+ else
+ {
+ // GT_FIELD is an implicit deref.
+ if (axc == AXC_Addr)
+ {
+ axcStack->Push(AXC_None);
+ }
+ else if (axc == AXC_AddrWide)
+ {
+ axcStack->Push(AXC_IndWide);
+ }
+ else
+ {
+ axcStack->Push(AXC_Ind);
+ }
+ return WALK_CONTINUE;
+ }
+
+ case GT_LCL_FLD:
+ {
+ assert(axc != AXC_Addr);
+ // This recognizes certain forms, and does all the work. In that case, returns WALK_SKIP_SUBTREES,
+ // else WALK_CONTINUE. We do the same here.
+ fgWalkResult res = comp->fgMorphLocalField(tree, fgWalkPre);
+ if (res == WALK_SKIP_SUBTREES && tree->OperGet() == GT_LCL_VAR && (axc == AXC_Addr || axc == AXC_AddrWide))
+ {
+ unsigned lclNum = tree->gtLclVarCommon.gtLclNum;
+ comp->lvaSetVarAddrExposed(lclNum);
+ if (axc == AXC_AddrWide)
+ {
+ LclVarDsc* varDsc = &comp->lvaTable[lclNum];
+ if (varDsc->lvIsStructField)
+ {
+ comp->lvaSetVarAddrExposed(varDsc->lvParentLcl);
+ }
+ }
+ }
+ // Must push something; if res is WALK_SKIP_SUBTREES, doesn't matter
+ // what, but something to be popped by the post callback. If we're going
+ // to analyze children, the LCL_FLD creates an Ind context, so use that.
+ axcStack->Push(AXC_Ind);
+ return res;
+ }
+
+ case GT_LCL_VAR:
+ // On some architectures, some arguments are passed implicitly by reference.
+ // Modify the trees to reflect that, if this local is one of those.
+ if (comp->fgMorphImplicitByRefArgs(pTree, fgWalkPre))
+ {
+ // We can't be in an address context; the ADDR(lcl), where lcl is an implicit byref param, was
+ // handled earlier. (And we can't have added anything to this address, since it was implicit.)
+ assert(axc != AXC_Addr);
+ }
+ else
+ {
+ if (axc == AXC_Addr || axc == AXC_AddrWide)
+ {
+ unsigned lclNum = tree->gtLclVarCommon.gtLclNum;
+ comp->lvaSetVarAddrExposed(lclNum);
+ if (axc == AXC_AddrWide)
+ {
+ LclVarDsc* varDsc = &comp->lvaTable[lclNum];
+ if (varDsc->lvIsStructField)
+ {
+ comp->lvaSetVarAddrExposed(varDsc->lvParentLcl);
+ }
+ }
+
+ // We may need to Quirk the storage size for this LCL_VAR
+ // some PInvoke signatures incorrectly specify a ByRef to an INT32
+ // when they actually write a SIZE_T or INT64
+ if (axc == AXC_Addr)
+ {
+ comp->gtCheckQuirkAddrExposedLclVar(tree, fgWalkPre->parentStack);
+ }
+ }
+ }
+ // Push something to keep the PostCB, which will pop it, happy.
+ axcStack->Push(AXC_None);
+ // In the first case, tree may no longer be a leaf, but we're done with it; is a leaf in the second case.
+ return WALK_SKIP_SUBTREES;
+
+ case GT_ADD:
+ assert(axc != AXC_Addr);
+ // See below about treating pointer operations as wider indirection.
+ if (tree->gtOp.gtOp1->gtType == TYP_BYREF || tree->gtOp.gtOp2->gtType == TYP_BYREF)
+ {
+ axcStack->Push(AXC_IndWide);
+ }
+ else if (axc == AXC_Ind)
+ {
+ // Let the children know that the parent was a GT_ADD, to be evaluated in an IND context.
+ // If it's an add of a constant and an address, and the constant represents a field,
+ // then we'll evaluate the address argument in an Ind context; otherwise, the None context.
+ axcStack->Push(AXC_IndAdd);
+ }
+ else
+ {
+ axcStack->Push(axc);
+ }
+ return WALK_CONTINUE;
+
+ // !!! Treat Pointer Operations as Wider Indirection
+ //
+ // If we are performing pointer operations, make sure we treat that as equivalent to a wider
+ // indirection. This is because the pointers could be pointing to the address of struct fields
+ // and could be used to perform operations on the whole struct or passed to another method.
+ //
+ // When visiting a node in this pre-order walk, we do not know if we would in the future
+ // encounter a GT_ADDR of a GT_FIELD below.
+ //
+ // Note: GT_ADDR of a GT_FIELD is always a TYP_BYREF.
+ // So let us be conservative and treat TYP_BYREF operations as AXC_IndWide and propagate a
+ // wider indirection context down the expr tree.
+ //
+ // Example, in unsafe code,
+ //
+ // IL_000e 12 00 ldloca.s 0x0
+ // IL_0010 7c 02 00 00 04 ldflda 0x4000002
+ // IL_0015 12 00 ldloca.s 0x0
+ // IL_0017 7c 01 00 00 04 ldflda 0x4000001
+ // IL_001c 59 sub
+ //
+ // When visiting the GT_SUB node, if the types of either of the GT_SUB's operand are BYREF, then
+ // consider GT_SUB to be equivalent of an AXC_IndWide.
+ //
+ // Similarly for pointer comparisons and pointer escaping as integers through conversions, treat
+ // them as AXC_IndWide.
+ //
+
+ // BINOP
+ case GT_SUB:
+ case GT_MUL:
+ case GT_DIV:
+ case GT_UDIV:
+ case GT_OR:
+ case GT_XOR:
+ case GT_AND:
+ case GT_LSH:
+ case GT_RSH:
+ case GT_RSZ:
+ case GT_ROL:
+ case GT_ROR:
+ case GT_EQ:
+ case GT_NE:
+ case GT_LT:
+ case GT_LE:
+ case GT_GT:
+ case GT_GE:
+ // UNOP
+ case GT_CAST:
+ if ((tree->gtOp.gtOp1->gtType == TYP_BYREF) ||
+ (tree->OperIsBinary() && (tree->gtOp.gtOp2->gtType == TYP_BYREF)))
+ {
+ axcStack->Push(AXC_IndWide);
+ return WALK_CONTINUE;
+ }
+ __fallthrough;
+
+ default:
+ // To be safe/conservative: pass Addr through, but not Ind -- otherwise, revert to "None". We must
+ // handle the "Ind" propogation explicitly above.
+ if (axc == AXC_Addr || axc == AXC_AddrWide)
+ {
+ axcStack->Push(axc);
+ }
+ else
+ {
+ axcStack->Push(AXC_None);
+ }
+ return WALK_CONTINUE;
+ }
+}
+
+bool Compiler::fgFitsInOrNotLoc(GenTreePtr tree, unsigned width)
+{
+ if (tree->TypeGet() != TYP_STRUCT)
+ {
+ return width <= genTypeSize(tree->TypeGet());
+ }
+ else if (tree->OperGet() == GT_LCL_VAR)
+ {
+ assert(tree->TypeGet() == TYP_STRUCT);
+ unsigned lclNum = tree->gtLclVarCommon.gtLclNum;
+ return width <= lvaTable[lclNum].lvExactSize;
+ }
+ else if (tree->OperGet() == GT_FIELD)
+ {
+ CORINFO_CLASS_HANDLE fldClass = info.compCompHnd->getFieldClass(tree->gtField.gtFldHnd);
+ return width <= info.compCompHnd->getClassSize(fldClass);
+ }
+ else if (tree->OperGet() == GT_INDEX)
+ {
+ return width <= tree->gtIndex.gtIndElemSize;
+ }
+ else
+ {
+ return false;
+ }
+}
+
+void Compiler::fgAddFieldSeqForZeroOffset(GenTreePtr op1, FieldSeqNode* fieldSeq)
+{
+ assert(op1->TypeGet() == TYP_BYREF || op1->TypeGet() == TYP_I_IMPL || op1->TypeGet() == TYP_REF);
+
+ switch (op1->OperGet())
+ {
+ case GT_ADDR:
+ if (op1->gtOp.gtOp1->OperGet() == GT_LCL_FLD)
+ {
+ GenTreeLclFld* lclFld = op1->gtOp.gtOp1->AsLclFld();
+ lclFld->gtFieldSeq = GetFieldSeqStore()->Append(lclFld->gtFieldSeq, fieldSeq);
+ }
+ break;
+
+ case GT_ADD:
+ if (op1->gtOp.gtOp1->OperGet() == GT_CNS_INT)
+ {
+ FieldSeqNode* op1Fs = op1->gtOp.gtOp1->gtIntCon.gtFieldSeq;
+ if (op1Fs != nullptr)
+ {
+ op1Fs = GetFieldSeqStore()->Append(op1Fs, fieldSeq);
+ op1->gtOp.gtOp1->gtIntCon.gtFieldSeq = op1Fs;
+ }
+ }
+ else if (op1->gtOp.gtOp2->OperGet() == GT_CNS_INT)
+ {
+ FieldSeqNode* op2Fs = op1->gtOp.gtOp2->gtIntCon.gtFieldSeq;
+ if (op2Fs != nullptr)
+ {
+ op2Fs = GetFieldSeqStore()->Append(op2Fs, fieldSeq);
+ op1->gtOp.gtOp2->gtIntCon.gtFieldSeq = op2Fs;
+ }
+ }
+ break;
+
+ case GT_CNS_INT:
+ {
+ FieldSeqNode* op1Fs = op1->gtIntCon.gtFieldSeq;
+ if (op1Fs != nullptr)
+ {
+ op1Fs = GetFieldSeqStore()->Append(op1Fs, fieldSeq);
+ op1->gtIntCon.gtFieldSeq = op1Fs;
+ }
+ }
+ break;
+
+ default:
+ // Record in the general zero-offset map.
+ GetZeroOffsetFieldMap()->Set(op1, fieldSeq);
+ break;
+ }
+}
+
+/*****************************************************************************
+ *
+ * Mark address-taken locals.
+ */
+
+void Compiler::fgMarkAddressExposedLocals()
+{
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\n*************** In fgMarkAddressExposedLocals()\n");
+ }
+#endif // DEBUG
+
+ BasicBlock* block = fgFirstBB;
+ noway_assert(block);
+
+ do
+ {
+ /* Make the current basic block address available globally */
+
+ compCurBB = block;
+
+ GenTreePtr stmt;
+
+ for (stmt = block->bbTreeList; stmt; stmt = stmt->gtNext)
+ {
+ // Call Compiler::fgMarkAddrTakenLocalsCB on each node
+ AXCStack stk(this);
+ stk.Push(AXC_None); // We start in neither an addr or ind context.
+ fgWalkTree(&stmt->gtStmt.gtStmtExpr, fgMarkAddrTakenLocalsPreCB, fgMarkAddrTakenLocalsPostCB, &stk);
+ }
+
+ block = block->bbNext;
+
+ } while (block);
+}
+
+// fgNodesMayInterfere:
+// return true if moving nodes relative to each other can change the result of a computation
+//
+// args:
+// read: a node which reads
+//
+
+bool Compiler::fgNodesMayInterfere(GenTree* write, GenTree* read)
+{
+ LclVarDsc* srcVar = nullptr;
+
+ bool readIsIndir = read->OperIsIndir() || read->OperIsImplicitIndir();
+ bool writeIsIndir = write->OperIsIndir() || write->OperIsImplicitIndir();
+
+ if (read->OperIsLocal())
+ {
+ srcVar = &lvaTable[read->gtLclVarCommon.gtLclNum];
+ }
+
+ if (writeIsIndir)
+ {
+ if (srcVar && srcVar->lvAddrExposed)
+ {
+ return true;
+ }
+ else if (readIsIndir)
+ {
+ return true;
+ }
+ return false;
+ }
+ else if (write->OperIsLocal())
+ {
+ LclVarDsc* dstVar = &lvaTable[write->gtLclVarCommon.gtLclNum];
+ if (readIsIndir)
+ {
+ return dstVar->lvAddrExposed;
+ }
+ else if (read->OperIsLocal())
+ {
+ if (read->gtLclVarCommon.gtLclNum == write->gtLclVarCommon.gtLclNum)
+ {
+ return true;
+ }
+ return false;
+ }
+ else
+ {
+ return false;
+ }
+ }
+ else
+ {
+ return false;
+ }
+}
+
+/** This predicate decides whether we will fold a tree with the structure:
+ * x = x <op> y where x could be any arbitrary expression into
+ * x <op>= y.
+ *
+ * This modification is only performed when the target architecture supports
+ * complex addressing modes. In the case of ARM for example, this transformation
+ * yields no benefit.
+ *
+ * In case this functions decides we can proceed to fold into an assignment operator
+ * we need to inspect whether the operator is commutative to tell fgMorph whether we need to
+ * reverse the tree due to the fact we saw x = y <op> x and we want to fold that into
+ * x <op>= y because the operator property.
+ */
+bool Compiler::fgShouldCreateAssignOp(GenTreePtr tree, bool* bReverse)
+{
+#if CPU_LOAD_STORE_ARCH
+ /* In the case of a load/store architecture, there's no gain by doing any of this, we bail. */
+ return false;
+#elif !defined(LEGACY_BACKEND)
+ return false;
+#else // defined(LEGACY_BACKEND)
+
+ GenTreePtr op1 = tree->gtOp.gtOp1;
+ GenTreePtr op2 = tree->gtGetOp2();
+ genTreeOps cmop = op2->OperGet();
+
+ /* Is the destination identical to the first RHS sub-operand? */
+ if (GenTree::Compare(op1, op2->gtOp.gtOp1))
+ {
+ /*
+ Do not transform the following tree
+
+ [0024CFA4] ----------- const int 1
+ [0024CFDC] ----G------ | int
+ [0024CF5C] ----------- lclVar ubyte V01 tmp0
+ [0024D05C] -A--G------ = ubyte
+ [0024D014] D------N--- lclVar ubyte V01 tmp0
+
+ to
+
+ [0024CFA4] ----------- const int 1
+ [0024D05C] -A--G------ |= ubyte
+ [0024D014] U------N--- lclVar ubyte V01 tmp0
+
+ , when V01 is a struct field local.
+ */
+
+ if (op1->gtOper == GT_LCL_VAR && varTypeIsSmall(op1->TypeGet()) && op1->TypeGet() != op2->gtOp.gtOp2->TypeGet())
+ {
+ unsigned lclNum = op1->gtLclVarCommon.gtLclNum;
+ LclVarDsc* varDsc = lvaTable + lclNum;
+
+ if (varDsc->lvIsStructField)
+ {
+ return false;
+ }
+ }
+
+ *bReverse = false;
+ return true;
+ }
+ else if (GenTree::OperIsCommutative(cmop))
+ {
+ /* For commutative ops only, check for "a = x <op> a" */
+
+ /* Should we be doing this at all? */
+ if ((opts.compFlags & CLFLG_TREETRANS) == 0)
+ {
+ return false;
+ }
+
+ /* Can we swap the operands to cmop ... */
+ if ((op2->gtOp.gtOp1->gtFlags & GTF_ALL_EFFECT) && (op2->gtOp.gtOp2->gtFlags & GTF_ALL_EFFECT))
+ {
+ // Both sides must have side effects to prevent swap */
+ return false;
+ }
+
+ /* Is the destination identical to the second RHS sub-operand? */
+ if (GenTree::Compare(op1, op2->gtOp.gtOp2))
+ {
+ *bReverse = true;
+ return true;
+ }
+ }
+ return false;
+#endif // defined(LEGACY_BACKEND)
+}
+
+// Static variables.
+Compiler::MorphAddrContext Compiler::s_CopyBlockMAC(Compiler::MACK_CopyBlock);
+
+#ifdef FEATURE_SIMD
+
+//-----------------------------------------------------------------------------------
+// fgMorphCombineSIMDFieldAssignments:
+// If the RHS of the input stmt is a read for simd vector X Field, then this function
+// will keep reading next few stmts based on the vector size(2, 3, 4).
+// If the next stmts LHS are located contiguous and RHS are also located
+// contiguous, then we replace those statements with a copyblk.
+//
+// Argument:
+// block - BasicBlock*. block which stmt belongs to
+// stmt - GenTreeStmt*. the stmt node we want to check
+//
+// return value:
+// if this funciton successfully optimized the stmts, then return true. Otherwise
+// return false;
+
+bool Compiler::fgMorphCombineSIMDFieldAssignments(BasicBlock* block, GenTreePtr stmt)
+{
+
+ noway_assert(stmt->gtOper == GT_STMT);
+ GenTreePtr tree = stmt->gtStmt.gtStmtExpr;
+ assert(tree->OperGet() == GT_ASG);
+
+ GenTreePtr originalLHS = tree->gtOp.gtOp1;
+ GenTreePtr prevLHS = tree->gtOp.gtOp1;
+ GenTreePtr prevRHS = tree->gtOp.gtOp2;
+ unsigned index = 0;
+ var_types baseType = TYP_UNKNOWN;
+ unsigned simdSize = 0;
+ GenTreePtr simdStructNode = getSIMDStructFromField(prevRHS, &baseType, &index, &simdSize, true);
+
+ if (simdStructNode == nullptr || index != 0 || baseType != TYP_FLOAT)
+ {
+ // if the RHS is not from a SIMD vector field X, then there is no need to check further.
+ return false;
+ }
+
+ var_types simdType = getSIMDTypeForSize(simdSize);
+ int assignmentsCount = simdSize / genTypeSize(baseType) - 1;
+ int remainingAssignments = assignmentsCount;
+ GenTreePtr curStmt = stmt->gtNext;
+ GenTreePtr lastStmt = stmt;
+
+ while (curStmt != nullptr && remainingAssignments > 0)
+ {
+ GenTreePtr exp = curStmt->gtStmt.gtStmtExpr;
+ if (exp->OperGet() != GT_ASG)
+ {
+ break;
+ }
+ GenTreePtr curLHS = exp->gtGetOp1();
+ GenTreePtr curRHS = exp->gtGetOp2();
+
+ if (!areArgumentsContiguous(prevLHS, curLHS) || !areArgumentsContiguous(prevRHS, curRHS))
+ {
+ break;
+ }
+
+ remainingAssignments--;
+ prevLHS = curLHS;
+ prevRHS = curRHS;
+
+ lastStmt = curStmt;
+ curStmt = curStmt->gtNext;
+ }
+
+ if (remainingAssignments > 0)
+ {
+ // if the left assignments number is bigger than zero, then this means
+ // that the assignments are not assgining to the contiguously memory
+ // locations from same vector.
+ return false;
+ }
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nFound contiguous assignments from a SIMD vector to memory.\n");
+ printf("From BB%02u, stmt", block->bbNum);
+ printTreeID(stmt);
+ printf(" to stmt");
+ printTreeID(lastStmt);
+ printf("\n");
+ }
+#endif
+
+ for (int i = 0; i < assignmentsCount; i++)
+ {
+ fgRemoveStmt(block, stmt->gtNext);
+ }
+
+ GenTree* copyBlkDst = createAddressNodeForSIMDInit(originalLHS, simdSize);
+ if (simdStructNode->OperIsLocal())
+ {
+ setLclRelatedToSIMDIntrinsic(simdStructNode);
+ }
+ GenTree* copyBlkAddr = copyBlkDst;
+ if (copyBlkAddr->gtOper == GT_LEA)
+ {
+ copyBlkAddr = copyBlkAddr->AsAddrMode()->Base();
+ }
+ GenTreeLclVarCommon* localDst = nullptr;
+ if (copyBlkAddr->IsLocalAddrExpr(this, &localDst, nullptr))
+ {
+ setLclRelatedToSIMDIntrinsic(localDst);
+ }
+
+ GenTree* simdStructAddr;
+ if (simdStructNode->TypeGet() == TYP_BYREF)
+ {
+ assert(simdStructNode->OperIsLocal());
+ assert(lvaIsImplicitByRefLocal(simdStructNode->AsLclVarCommon()->gtLclNum));
+ simdStructNode = gtNewOperNode(GT_IND, simdType, simdStructNode);
+ }
+ else
+ {
+ assert(varTypeIsSIMD(simdStructNode));
+ }
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nBB%02u stmt", block->bbNum);
+ printTreeID(stmt);
+ printf("(before)\n");
+ gtDispTree(stmt);
+ }
+#endif
+
+ // TODO-1stClassStructs: we should be able to simply use a GT_IND here.
+ GenTree* blkNode = gtNewBlockVal(copyBlkDst, simdSize);
+ blkNode->gtType = simdType;
+ tree = gtNewBlkOpNode(blkNode, simdStructNode, simdSize,
+ false, // not volatile
+ true); // copyBlock
+
+ stmt->gtStmt.gtStmtExpr = tree;
+
+ // Since we generated a new address node which didn't exist before,
+ // we should expose this address manually here.
+ AXCStack stk(this);
+ stk.Push(AXC_None);
+ fgWalkTree(&stmt->gtStmt.gtStmtExpr, fgMarkAddrTakenLocalsPreCB, fgMarkAddrTakenLocalsPostCB, &stk);
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nReplaced BB%02u stmt", block->bbNum);
+ printTreeID(stmt);
+ printf("(after)\n");
+ gtDispTree(stmt);
+ }
+#endif
+ return true;
+}
+
+#endif // FEATURE_SIMD
diff --git a/src/jit/nodeinfo.h b/src/jit/nodeinfo.h
new file mode 100644
index 0000000000..a73033a91f
--- /dev/null
+++ b/src/jit/nodeinfo.h
@@ -0,0 +1,161 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+#ifndef _NODEINFO_H_
+#define _NODEINFO_H_
+
+struct GenTree;
+
+class LinearScan;
+typedef unsigned int LsraLocation;
+
+class TreeNodeInfo
+{
+public:
+ TreeNodeInfo()
+ {
+ loc = 0;
+ _dstCount = 0;
+ _srcCount = 0;
+ _internalIntCount = 0;
+ _internalFloatCount = 0;
+
+ srcCandsIndex = 0;
+ dstCandsIndex = 0;
+ internalCandsIndex = 0;
+ isLocalDefUse = false;
+ isHelperCallWithKills = false;
+ isLsraAdded = false;
+ isDelayFree = false;
+ hasDelayFreeSrc = false;
+ isTgtPref = false;
+ regOptional = false;
+ definesAnyRegisters = false;
+#ifdef DEBUG
+ isInitialized = false;
+#endif
+ }
+
+ // dst
+ __declspec(property(put = setDstCount, get = getDstCount)) int dstCount;
+ void setDstCount(int count)
+ {
+ assert(count <= MAX_RET_REG_COUNT);
+ _dstCount = (char)count;
+ }
+ int getDstCount()
+ {
+ return _dstCount;
+ }
+
+ // src
+ __declspec(property(put = setSrcCount, get = getSrcCount)) int srcCount;
+ void setSrcCount(int count)
+ {
+ _srcCount = (char)count;
+ assert(_srcCount == count);
+ }
+ int getSrcCount()
+ {
+ return _srcCount;
+ }
+
+ // internalInt
+ __declspec(property(put = setInternalIntCount, get = getInternalIntCount)) int internalIntCount;
+ void setInternalIntCount(int count)
+ {
+ _internalIntCount = (char)count;
+ assert(_internalIntCount == count);
+ }
+ int getInternalIntCount()
+ {
+ return _internalIntCount;
+ }
+
+ // internalFloat
+ __declspec(property(put = setInternalFloatCount, get = getInternalFloatCount)) int internalFloatCount;
+ void setInternalFloatCount(int count)
+ {
+ _internalFloatCount = (char)count;
+ assert(_internalFloatCount == count);
+ }
+ int getInternalFloatCount()
+ {
+ return _internalFloatCount;
+ }
+
+ // SrcCandidates are constraints of the consuming (parent) operation applied to this node
+ // (i.e. what registers it is constrained to consume).
+ regMaskTP getSrcCandidates(LinearScan* lsra);
+ void setSrcCandidates(LinearScan* lsra, regMaskTP mask);
+ // DstCandidates are constraints of this node (i.e. what registers it is constrained to produce).
+ regMaskTP getDstCandidates(LinearScan* lsra);
+ void setDstCandidates(LinearScan* lsra, regMaskTP mask);
+ // InternalCandidates are constraints of the registers used as temps in the evaluation of this node.
+ regMaskTP getInternalCandidates(LinearScan* lsra);
+ void setInternalCandidates(LinearScan* lsra, regMaskTP mask);
+ void addInternalCandidates(LinearScan* lsra, regMaskTP mask);
+
+ LsraLocation loc;
+
+private:
+ unsigned char _dstCount;
+ unsigned char _srcCount;
+ unsigned char _internalIntCount;
+ unsigned char _internalFloatCount;
+
+public:
+ unsigned char srcCandsIndex;
+ unsigned char dstCandsIndex;
+ unsigned char internalCandsIndex;
+
+ // isLocalDefUse identifies trees that produce a value that is not consumed elsewhere.
+ // Examples include stack arguments to a call (they are immediately stored), lhs of comma
+ // nodes, or top-level nodes that are non-void.
+ unsigned char isLocalDefUse : 1;
+ // isHelperCallWithKills is set when this is a helper call that kills more than just its in/out regs.
+ unsigned char isHelperCallWithKills : 1;
+ // Is this node added by LSRA, e.g. as a resolution or copy/reload move.
+ unsigned char isLsraAdded : 1;
+ // isDelayFree is set when the register defined by this node will interfere with the destination
+ // of the consuming node, and therefore it must not be freed immediately after use.
+ unsigned char isDelayFree : 1;
+ // hasDelayFreeSrc is set when this node has sources that are marked "isDelayFree". This is because,
+ // we may eventually "contain" this node, in which case we don't want it's children (which have
+ // already been marked "isDelayFree" to be handled that way when allocating.
+ unsigned char hasDelayFreeSrc : 1;
+ // isTgtPref is set to true when we have a rmw op, where we would like the result to be allocated
+ // in the same register as op1.
+ unsigned char isTgtPref : 1;
+ // Whether a spilled second src can be treated as a contained operand
+ unsigned char regOptional : 1;
+ // Whether or not a node defines any registers, whether directly (for nodes where dstCout is non-zero)
+ // or indirectly (for contained nodes, which propagate the transitive closure of the registers
+ // defined by their inputs). Used during buildRefPositionsForNode in order to avoid unnecessary work.
+ unsigned char definesAnyRegisters : 1;
+
+#ifdef DEBUG
+ // isInitialized is set when the tree node is handled.
+ unsigned char isInitialized : 1;
+#endif
+
+public:
+ // Initializes the TreeNodeInfo value with the given values.
+ void Initialize(LinearScan* lsra, GenTree* node, LsraLocation location);
+
+#ifdef DEBUG
+ void dump(LinearScan* lsra);
+
+ // This method checks to see whether the information has been initialized,
+ // and is in a consistent state
+ bool IsValid(LinearScan* lsra)
+ {
+ return (isInitialized &&
+ ((getSrcCandidates(lsra) | getInternalCandidates(lsra) | getDstCandidates(lsra)) &
+ ~(RBM_ALLFLOAT | RBM_ALLINT)) == 0);
+ }
+#endif // DEBUG
+};
+
+#endif // _NODEINFO_H_
diff --git a/src/jit/objectalloc.cpp b/src/jit/objectalloc.cpp
new file mode 100644
index 0000000000..2e19f4378d
--- /dev/null
+++ b/src/jit/objectalloc.cpp
@@ -0,0 +1,207 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX ObjectAllocator XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+//===============================================================================
+
+//------------------------------------------------------------------------
+// DoPhase: Run analysis (if object stack allocation is enabled) and then
+// morph each GT_ALLOCOBJ node either into an allocation helper
+// call or stack allocation.
+// Notes:
+// Runs only if Compiler::optMethodFlags has flag OMF_HAS_NEWOBJ set.
+void ObjectAllocator::DoPhase()
+{
+ if ((comp->optMethodFlags & OMF_HAS_NEWOBJ) == 0)
+ {
+ return;
+ }
+
+ if (IsObjectStackAllocationEnabled())
+ {
+ DoAnalysis();
+ }
+
+ MorphAllocObjNodes();
+}
+
+//------------------------------------------------------------------------
+// DoAnalysis: Walk over basic blocks of the method and detect all local
+// variables that can be allocated on the stack.
+//
+// Assumptions:
+// Must be run after the dominators have been computed (we need this
+// information to detect loops).
+void ObjectAllocator::DoAnalysis()
+{
+ assert(m_IsObjectStackAllocationEnabled);
+ assert(comp->fgDomsComputed);
+ // TODO-ObjectStackAllocation
+ NYI("DoAnalysis");
+}
+
+//------------------------------------------------------------------------
+// MorphAllocObjNodes: Morph each GT_ALLOCOBJ node either into an
+// allocation helper call or stack allocation.
+//
+// Notes:
+// Runs only over the blocks having bbFlags BBF_HAS_NEWOBJ set.
+void ObjectAllocator::MorphAllocObjNodes()
+{
+ BasicBlock* block;
+
+ foreach_block(comp, block)
+ {
+ const bool basicBlockHasNewObj = (block->bbFlags & BBF_HAS_NEWOBJ) == BBF_HAS_NEWOBJ;
+#ifndef DEBUG
+ if (!basicBlockHasNewObj)
+ {
+ continue;
+ }
+#endif // DEBUG
+
+ for (GenTreeStmt* stmt = block->firstStmt(); stmt; stmt = stmt->gtNextStmt)
+ {
+ GenTreePtr stmtExpr = stmt->gtStmtExpr;
+ GenTreePtr op2 = nullptr;
+
+ bool canonicalAllocObjFound = false;
+
+ if (stmtExpr->OperGet() == GT_ASG && stmtExpr->TypeGet() == TYP_REF)
+ {
+ op2 = stmtExpr->gtGetOp2();
+
+ if (op2->OperGet() == GT_ALLOCOBJ)
+ {
+ canonicalAllocObjFound = true;
+ }
+ }
+
+ if (canonicalAllocObjFound)
+ {
+ assert(basicBlockHasNewObj);
+ //------------------------------------------------------------------------
+ // We expect the following expression tree at this point
+ // * GT_STMT void (top level)
+ // | /--* GT_ALLOCOBJ ref
+ // \--* GT_ASG ref
+ // \--* GT_LCL_VAR ref
+ //------------------------------------------------------------------------
+
+ GenTreePtr op1 = stmtExpr->gtGetOp1();
+
+ assert(op1->OperGet() == GT_LCL_VAR);
+ assert(op1->TypeGet() == TYP_REF);
+ assert(op2 != nullptr);
+ assert(op2->OperGet() == GT_ALLOCOBJ);
+
+ GenTreeAllocObj* asAllocObj = op2->AsAllocObj();
+ unsigned int lclNum = op1->AsLclVar()->GetLclNum();
+
+ if (IsObjectStackAllocationEnabled() && CanAllocateLclVarOnStack(lclNum))
+ {
+ op2 = MorphAllocObjNodeIntoStackAlloc(asAllocObj, block, stmt);
+ }
+ else
+ {
+ op2 = MorphAllocObjNodeIntoHelperCall(asAllocObj);
+ }
+
+ // Propagate flags of op2 to its parent.
+ stmtExpr->gtOp.gtOp2 = op2;
+ stmtExpr->gtFlags |= op2->gtFlags & GTF_ALL_EFFECT;
+ }
+#ifdef DEBUG
+ else
+ {
+ // We assume that GT_ALLOCOBJ nodes are always present in the
+ // canonical form.
+ comp->fgWalkTreePre(&stmt->gtStmtExpr, AssertWhenAllocObjFoundVisitor);
+ }
+#endif // DEBUG
+ }
+ }
+}
+
+//------------------------------------------------------------------------
+// MorphAllocObjNodeIntoHelperCall: Morph a GT_ALLOCOBJ node into an
+// allocation helper call.
+//
+// Arguments:
+// allocObj - GT_ALLOCOBJ that will be replaced by helper call.
+//
+// Return Value:
+// Address of helper call node (can be the same as allocObj).
+//
+// Notes:
+// Must update parents flags after this.
+GenTreePtr ObjectAllocator::MorphAllocObjNodeIntoHelperCall(GenTreeAllocObj* allocObj)
+{
+ assert(allocObj != nullptr);
+
+ GenTreePtr op1 = allocObj->gtGetOp1();
+
+ GenTreePtr helperCall = comp->fgMorphIntoHelperCall(allocObj, allocObj->gtNewHelper, comp->gtNewArgList(op1));
+
+ return helperCall;
+}
+
+//------------------------------------------------------------------------
+// MorphAllocObjNodeIntoStackAlloc: Morph a GT_ALLOCOBJ node into stack
+// allocation.
+// Arguments:
+// allocObj - GT_ALLOCOBJ that will be replaced by helper call.
+// block - a basic block where allocObj is
+// stmt - a statement where allocObj is
+//
+// Return Value:
+// Address of tree doing stack allocation (can be the same as allocObj).
+//
+// Notes:
+// Must update parents flags after this.
+// This function can insert additional statements before stmt.
+GenTreePtr ObjectAllocator::MorphAllocObjNodeIntoStackAlloc(GenTreeAllocObj* allocObj,
+ BasicBlock* block,
+ GenTreeStmt* stmt)
+{
+ assert(allocObj != nullptr);
+ assert(m_AnalysisDone);
+
+ // TODO-StackAllocation
+ NYI("MorphAllocObjIntoStackAlloc");
+
+ return allocObj;
+}
+
+#ifdef DEBUG
+
+//------------------------------------------------------------------------
+// AssertWhenAllocObjFoundVisitor: Look for a GT_ALLOCOBJ node and assert
+// when found one.
+Compiler::fgWalkResult ObjectAllocator::AssertWhenAllocObjFoundVisitor(GenTreePtr* pTree, Compiler::fgWalkData* data)
+{
+ GenTreePtr tree = *pTree;
+
+ assert(tree != nullptr);
+ assert(tree->OperGet() != GT_ALLOCOBJ);
+
+ return Compiler::fgWalkResult::WALK_CONTINUE;
+}
+
+#endif // DEBUG
+
+//===============================================================================
diff --git a/src/jit/objectalloc.h b/src/jit/objectalloc.h
new file mode 100644
index 0000000000..bea6744024
--- /dev/null
+++ b/src/jit/objectalloc.h
@@ -0,0 +1,82 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX ObjectAllocator XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+/*****************************************************************************/
+#ifndef OBJECTALLOC_H
+#define OBJECTALLOC_H
+/*****************************************************************************/
+
+//===============================================================================
+#include "phase.h"
+
+class ObjectAllocator final : public Phase
+{
+ //===============================================================================
+ // Data members
+ bool m_IsObjectStackAllocationEnabled;
+ bool m_AnalysisDone;
+ //===============================================================================
+ // Methods
+public:
+ ObjectAllocator(Compiler* comp);
+ bool IsObjectStackAllocationEnabled() const;
+ void EnableObjectStackAllocation();
+
+protected:
+ virtual void DoPhase() override;
+
+private:
+ bool CanAllocateLclVarOnStack(unsigned int lclNum) const;
+ void DoAnalysis();
+ void MorphAllocObjNodes();
+ GenTreePtr MorphAllocObjNodeIntoHelperCall(GenTreeAllocObj* allocObj);
+ GenTreePtr MorphAllocObjNodeIntoStackAlloc(GenTreeAllocObj* allocObj, BasicBlock* block, GenTreeStmt* stmt);
+#ifdef DEBUG
+ static Compiler::fgWalkResult AssertWhenAllocObjFoundVisitor(GenTreePtr* pTree, Compiler::fgWalkData* data);
+#endif // DEBUG
+};
+
+//===============================================================================
+
+inline ObjectAllocator::ObjectAllocator(Compiler* comp)
+ : Phase(comp, "Allocate Objects", PHASE_ALLOCATE_OBJECTS)
+ , m_IsObjectStackAllocationEnabled(false)
+ , m_AnalysisDone(false)
+{
+}
+
+inline bool ObjectAllocator::IsObjectStackAllocationEnabled() const
+{
+ return m_IsObjectStackAllocationEnabled;
+}
+
+inline void ObjectAllocator::EnableObjectStackAllocation()
+{
+ m_IsObjectStackAllocationEnabled = true;
+}
+
+//------------------------------------------------------------------------
+// CanAllocateLclVarOnStack: Returns true iff local variable can not
+// potentially escape from the method and
+// can be allocated on the stack.
+inline bool ObjectAllocator::CanAllocateLclVarOnStack(unsigned int lclNum) const
+{
+ assert(m_AnalysisDone);
+ // TODO-ObjectStackAllocation
+ NYI("CanAllocateLclVarOnStack");
+ return false;
+}
+
+//===============================================================================
+
+#endif // OBJECTALLOC_H
diff --git a/src/jit/opcode.h b/src/jit/opcode.h
new file mode 100644
index 0000000000..87741e97d9
--- /dev/null
+++ b/src/jit/opcode.h
@@ -0,0 +1,29 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX opcodes.h XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+/*****************************************************************************/
+#ifndef _OPCODE_H_
+#define _OPCODE_H_
+
+#include "openum.h"
+
+extern const signed char opcodeSizes[];
+
+#if defined(DEBUG)
+extern const char* const opcodeNames[];
+extern const BYTE opcodeArgKinds[];
+#endif
+
+/*****************************************************************************/
+#endif // _OPCODE_H_
+/*****************************************************************************/
diff --git a/src/jit/optcse.cpp b/src/jit/optcse.cpp
new file mode 100644
index 0000000000..d23b4cd198
--- /dev/null
+++ b/src/jit/optcse.cpp
@@ -0,0 +1,2582 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX OptCSE XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+/*****************************************************************************/
+#if FEATURE_ANYCSE
+/*****************************************************************************/
+
+/* static */
+const size_t Compiler::s_optCSEhashSize = EXPSET_SZ * 2;
+
+/*****************************************************************************
+ *
+ * We've found all the candidates, build the index for easy access.
+ */
+
+void Compiler::optCSEstop()
+{
+ if (optCSECandidateCount == 0)
+ {
+ return;
+ }
+
+ CSEdsc* dsc;
+ CSEdsc** ptr;
+ unsigned cnt;
+
+ optCSEtab = new (this, CMK_CSE) CSEdsc*[optCSECandidateCount]();
+
+ for (cnt = s_optCSEhashSize, ptr = optCSEhash; cnt; cnt--, ptr++)
+ {
+ for (dsc = *ptr; dsc; dsc = dsc->csdNextInBucket)
+ {
+ if (dsc->csdIndex)
+ {
+ noway_assert((unsigned)dsc->csdIndex <= optCSECandidateCount);
+ if (optCSEtab[dsc->csdIndex - 1] == nullptr)
+ {
+ optCSEtab[dsc->csdIndex - 1] = dsc;
+ }
+ }
+ }
+ }
+
+#ifdef DEBUG
+ for (cnt = 0; cnt < optCSECandidateCount; cnt++)
+ {
+ noway_assert(optCSEtab[cnt] != nullptr);
+ }
+#endif
+}
+
+/*****************************************************************************
+ *
+ * Return the descriptor for the CSE with the given index.
+ */
+
+inline Compiler::CSEdsc* Compiler::optCSEfindDsc(unsigned index)
+{
+ noway_assert(index);
+ noway_assert(index <= optCSECandidateCount);
+ noway_assert(optCSEtab[index - 1]);
+
+ return optCSEtab[index - 1];
+}
+
+/*****************************************************************************
+ *
+ * For a previously marked CSE, decrement the use counts and unmark it
+ */
+
+void Compiler::optUnmarkCSE(GenTreePtr tree)
+{
+ if (!IS_CSE_INDEX(tree->gtCSEnum))
+ {
+ // This tree is not a CSE candidate, so there is nothing
+ // to do.
+ return;
+ }
+
+ unsigned CSEnum = GET_CSE_INDEX(tree->gtCSEnum);
+ CSEdsc* desc;
+
+ // make sure it's been initialized
+ noway_assert(optCSEweight <= BB_MAX_WEIGHT);
+
+ /* Is this a CSE use? */
+ if (IS_CSE_USE(tree->gtCSEnum))
+ {
+ desc = optCSEfindDsc(CSEnum);
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Unmark CSE use #%02d at ", CSEnum);
+ printTreeID(tree);
+ printf(": %3d -> %3d\n", desc->csdUseCount, desc->csdUseCount - 1);
+ }
+#endif
+
+ /* Reduce the nested CSE's 'use' count */
+
+ noway_assert(desc->csdUseCount > 0);
+
+ if (desc->csdUseCount > 0)
+ {
+ desc->csdUseCount -= 1;
+
+ if (desc->csdUseWtCnt < optCSEweight)
+ {
+ desc->csdUseWtCnt = 0;
+ }
+ else
+ {
+ desc->csdUseWtCnt -= optCSEweight;
+ }
+ }
+ }
+ else
+ {
+ desc = optCSEfindDsc(CSEnum);
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Unmark CSE def #%02d at ", CSEnum);
+ printTreeID(tree);
+ printf(": %3d -> %3d\n", desc->csdDefCount, desc->csdDefCount - 1);
+ }
+#endif
+
+ /* Reduce the nested CSE's 'def' count */
+
+ noway_assert(desc->csdDefCount > 0);
+
+ if (desc->csdDefCount > 0)
+ {
+ desc->csdDefCount -= 1;
+
+ if (desc->csdDefWtCnt < optCSEweight)
+ {
+ desc->csdDefWtCnt = 0;
+ }
+ else
+ {
+ desc->csdDefWtCnt -= optCSEweight;
+ }
+ }
+ }
+
+ tree->gtCSEnum = NO_CSE;
+}
+
+Compiler::fgWalkResult Compiler::optHasNonCSEChild(GenTreePtr* pTree, fgWalkData* data)
+{
+ if (*pTree == data->pCallbackData)
+ {
+ return WALK_CONTINUE;
+ }
+
+ if ((*pTree)->gtFlags & GTF_DONT_CSE)
+ {
+
+ // Fix 392756 WP7 Crossgen
+ // Don't propagate the GTF_DONT_CSE flag up from a GT_CNS_INT
+ //
+ // During codegen optGetArrayRefScaleAndIndex() makes the assumption that op2 of a GT_MUL node
+ // is a constant and is not capable of handling CSE'ing the elemSize constant into a lclvar.
+ // Hence to prevent the constant from becoming a CSE we have marked it as NO_CSE, but this
+ // should not prevent tree's above the constant from becoming CSE's.
+ //
+ if ((*pTree)->gtOper == GT_CNS_INT)
+ {
+ return WALK_SKIP_SUBTREES;
+ }
+
+ return WALK_ABORT;
+ }
+
+ return WALK_SKIP_SUBTREES;
+}
+
+Compiler::fgWalkResult Compiler::optPropagateNonCSE(GenTreePtr* pTree, fgWalkData* data)
+{
+ GenTree* tree = *pTree;
+ Compiler* comp = data->compiler;
+
+ /* Calls get DONT_CSE implicitly */
+ if (tree->OperGet() == GT_CALL)
+ {
+ if (!IsSharedStaticHelper(tree))
+ {
+ tree->gtFlags |= GTF_DONT_CSE;
+ }
+ }
+
+ if ((tree->gtFlags & GTF_DONT_CSE) == 0)
+ {
+ /* Propagate the DONT_CSE flag from child to parent */
+ if (comp->fgWalkTreePre(&tree, optHasNonCSEChild, tree) == WALK_ABORT)
+ {
+ tree->gtFlags |= GTF_DONT_CSE;
+ }
+ }
+
+ return WALK_CONTINUE;
+}
+
+/*****************************************************************************
+ *
+ * Helper passed to Compiler::fgWalkAllTreesPre() to unmark nested CSE's.
+ */
+
+/* static */
+Compiler::fgWalkResult Compiler::optUnmarkCSEs(GenTreePtr* pTree, fgWalkData* data)
+{
+ GenTreePtr tree = *pTree;
+ Compiler* comp = data->compiler;
+ GenTreePtr keepList = (GenTreePtr)(data->pCallbackData);
+
+ // We may have a non-NULL side effect list that is being kept
+ //
+ if (keepList)
+ {
+ GenTreePtr keptTree = keepList;
+ while (keptTree->OperGet() == GT_COMMA)
+ {
+ assert(keptTree->OperKind() & GTK_SMPOP);
+ GenTreePtr op1 = keptTree->gtOp.gtOp1;
+ GenTreePtr op2 = keptTree->gtGetOp2();
+
+ // For the GT_COMMA case the op1 is part of the orginal CSE tree
+ // that is being kept because it contains some side-effect
+ //
+ if (tree == op1)
+ {
+ // This tree and all of its sub trees are being kept
+ return WALK_SKIP_SUBTREES;
+ }
+
+ // For the GT_COMMA case the op2 are the remaining side-effects of the orginal CSE tree
+ // which can again be another GT_COMMA or the final side-effect part
+ //
+ keptTree = op2;
+ }
+ if (tree == keptTree)
+ {
+ // This tree and all of its sub trees are being kept
+ return WALK_SKIP_SUBTREES;
+ }
+ }
+
+ // This node is being removed from the graph of GenTreePtr
+ // Call optUnmarkCSE and decrement the LclVar ref counts.
+ comp->optUnmarkCSE(tree);
+ assert(!IS_CSE_INDEX(tree->gtCSEnum));
+
+ /* Look for any local variable references */
+
+ if (tree->gtOper == GT_LCL_VAR)
+ {
+ unsigned lclNum;
+ LclVarDsc* varDsc;
+
+ /* This variable ref is going away, decrease its ref counts */
+
+ lclNum = tree->gtLclVarCommon.gtLclNum;
+ assert(lclNum < comp->lvaCount);
+ varDsc = comp->lvaTable + lclNum;
+
+ // make sure it's been initialized
+ assert(comp->optCSEweight <= BB_MAX_WEIGHT);
+
+ /* Decrement its lvRefCnt and lvRefCntWtd */
+
+ varDsc->decRefCnts(comp->optCSEweight, comp);
+ }
+
+ return WALK_CONTINUE;
+}
+
+Compiler::fgWalkResult Compiler::optCSE_MaskHelper(GenTreePtr* pTree, fgWalkData* walkData)
+{
+ GenTree* tree = *pTree;
+ Compiler* comp = walkData->compiler;
+ optCSE_MaskData* pUserData = (optCSE_MaskData*)(walkData->pCallbackData);
+
+ if (IS_CSE_INDEX(tree->gtCSEnum))
+ {
+ unsigned cseIndex = GET_CSE_INDEX(tree->gtCSEnum);
+ EXPSET_TP cseBit = genCSEnum2bit(cseIndex);
+ if (IS_CSE_DEF(tree->gtCSEnum))
+ {
+ pUserData->CSE_defMask |= cseBit;
+ }
+ else
+ {
+ pUserData->CSE_useMask |= cseBit;
+ }
+ }
+
+ return WALK_CONTINUE;
+}
+
+// This functions walks all the node for an given tree
+// and return the mask of CSE defs and uses for the tree
+//
+void Compiler::optCSE_GetMaskData(GenTreePtr tree, optCSE_MaskData* pMaskData)
+{
+ pMaskData->CSE_defMask = 0;
+ pMaskData->CSE_useMask = 0;
+ fgWalkTreePre(&tree, optCSE_MaskHelper, (void*)pMaskData);
+}
+
+//------------------------------------------------------------------------
+// optCSE_canSwap: Determine if the execution order of two nodes can be swapped.
+//
+// Arguments:
+// op1 - The first node
+// op2 - The second node
+//
+// Return Value:
+// Return true iff it safe to swap the execution order of 'op1' and 'op2',
+// considering only the locations of the CSE defs and uses.
+//
+// Assumptions:
+// 'op1' currently occurse before 'op2' in the execution order.
+//
+bool Compiler::optCSE_canSwap(GenTree* op1, GenTree* op2)
+{
+ // op1 and op2 must be non-null.
+ assert(op1 != nullptr);
+ assert(op2 != nullptr);
+
+ bool canSwap = true; // the default result unless proven otherwise.
+
+ optCSE_MaskData op1MaskData;
+ optCSE_MaskData op2MaskData;
+
+ optCSE_GetMaskData(op1, &op1MaskData);
+ optCSE_GetMaskData(op2, &op2MaskData);
+
+ // We cannot swap if op1 contains a CSE def that is used by op2
+ if ((op1MaskData.CSE_defMask & op2MaskData.CSE_useMask) != 0)
+ {
+ canSwap = false;
+ }
+ else
+ {
+ // We also cannot swap if op2 contains a CSE def that is used by op1.
+ if ((op2MaskData.CSE_defMask & op1MaskData.CSE_useMask) != 0)
+ {
+ canSwap = false;
+ }
+ }
+
+ return canSwap;
+}
+
+//------------------------------------------------------------------------
+// optCSE_canSwap: Determine if the execution order of a node's operands can be swapped.
+//
+// Arguments:
+// tree - The node of interest
+//
+// Return Value:
+// Return true iff it safe to swap the execution order of the operands of 'tree',
+// considering only the locations of the CSE defs and uses.
+//
+bool Compiler::optCSE_canSwap(GenTreePtr tree)
+{
+ // We must have a binary treenode with non-null op1 and op2
+ assert((tree->OperKind() & GTK_SMPOP) != 0);
+
+ GenTreePtr op1 = tree->gtOp.gtOp1;
+ GenTreePtr op2 = tree->gtGetOp2();
+
+ return optCSE_canSwap(op1, op2);
+}
+
+/*****************************************************************************
+ *
+ * Compare function passed to qsort() by CSE_Heuristic::SortCandidates
+ * when (CodeOptKind() != Compiler::SMALL_CODE)
+ */
+
+/* static */
+int __cdecl Compiler::optCSEcostCmpEx(const void* op1, const void* op2)
+{
+ CSEdsc* dsc1 = *(CSEdsc**)op1;
+ CSEdsc* dsc2 = *(CSEdsc**)op2;
+
+ GenTreePtr exp1 = dsc1->csdTree;
+ GenTreePtr exp2 = dsc2->csdTree;
+
+ int diff;
+
+ diff = (int)(exp2->gtCostEx - exp1->gtCostEx);
+
+ if (diff != 0)
+ {
+ return diff;
+ }
+
+ // Sort the higher Use Counts toward the top
+ diff = (int)(dsc2->csdUseWtCnt - dsc1->csdUseWtCnt);
+
+ if (diff != 0)
+ {
+ return diff;
+ }
+
+ // With the same use count, Sort the lower Def Counts toward the top
+ diff = (int)(dsc1->csdDefWtCnt - dsc2->csdDefWtCnt);
+
+ if (diff != 0)
+ {
+ return diff;
+ }
+
+ // In order to ensure that we have a stable sort, we break ties using the csdIndex
+ return (int)(dsc1->csdIndex - dsc2->csdIndex);
+}
+
+/*****************************************************************************
+ *
+ * Compare function passed to qsort() by CSE_Heuristic::SortCandidates
+ * when (CodeOptKind() == Compiler::SMALL_CODE)
+ */
+
+/* static */
+int __cdecl Compiler::optCSEcostCmpSz(const void* op1, const void* op2)
+{
+ CSEdsc* dsc1 = *(CSEdsc**)op1;
+ CSEdsc* dsc2 = *(CSEdsc**)op2;
+
+ GenTreePtr exp1 = dsc1->csdTree;
+ GenTreePtr exp2 = dsc2->csdTree;
+
+ int diff;
+
+ diff = (int)(exp2->gtCostSz - exp1->gtCostSz);
+
+ if (diff != 0)
+ {
+ return diff;
+ }
+
+ // Sort the higher Use Counts toward the top
+ diff = (int)(dsc2->csdUseCount - dsc1->csdUseCount);
+
+ if (diff != 0)
+ {
+ return diff;
+ }
+
+ // With the same use count, Sort the lower Def Counts toward the top
+ diff = (int)(dsc1->csdDefCount - dsc2->csdDefCount);
+
+ if (diff != 0)
+ {
+ return diff;
+ }
+
+ // In order to ensure that we have a stable sort, we break ties using the csdIndex
+ return (int)(dsc1->csdIndex - dsc2->csdIndex);
+}
+
+/*****************************************************************************/
+#if FEATURE_VALNUM_CSE
+/*****************************************************************************/
+
+/*****************************************************************************
+ *
+ * Initialize the Value Number CSE tracking logic.
+ */
+
+void Compiler::optValnumCSE_Init()
+{
+#ifdef DEBUG
+ optCSEtab = nullptr;
+#endif
+
+ /* Allocate and clear the hash bucket table */
+
+ optCSEhash = new (this, CMK_CSE) CSEdsc*[s_optCSEhashSize]();
+
+ optCSECandidateCount = 0;
+ optDoCSE = false; // Stays false until we find duplicate CSE tree
+}
+
+/*****************************************************************************
+ *
+ * Assign an index to the given expression (adding it to the lookup table,
+ * if necessary). Returns the index or 0 if the expression can not be a CSE.
+ */
+
+unsigned Compiler::optValnumCSE_Index(GenTreePtr tree, GenTreePtr stmt)
+{
+ unsigned key;
+ unsigned hash;
+ unsigned hval;
+ CSEdsc* hashDsc;
+
+ ValueNum vnlib = tree->GetVN(VNK_Liberal);
+
+ /* Compute the hash value for the expression */
+
+ key = (unsigned)vnlib;
+
+ hash = key;
+ hash *= (unsigned)(s_optCSEhashSize + 1);
+ hash >>= 7;
+
+ hval = hash % s_optCSEhashSize;
+
+ /* Look for a matching index in the hash table */
+
+ bool newCSE = false;
+
+ for (hashDsc = optCSEhash[hval]; hashDsc; hashDsc = hashDsc->csdNextInBucket)
+ {
+ if (hashDsc->csdHashValue == key)
+ {
+ treeStmtLstPtr newElem;
+
+ /* Have we started the list of matching nodes? */
+
+ if (hashDsc->csdTreeList == nullptr)
+ {
+ // Create the new element based upon the matching hashDsc element.
+
+ newElem = new (this, CMK_TreeStatementList) treeStmtLst;
+
+ newElem->tslTree = hashDsc->csdTree;
+ newElem->tslStmt = hashDsc->csdStmt;
+ newElem->tslBlock = hashDsc->csdBlock;
+ newElem->tslNext = nullptr;
+
+ /* Start the list with the first CSE candidate recorded */
+
+ hashDsc->csdTreeList = newElem;
+ hashDsc->csdTreeLast = newElem;
+ }
+
+ noway_assert(hashDsc->csdTreeList);
+
+ /* Append this expression to the end of the list */
+
+ newElem = new (this, CMK_TreeStatementList) treeStmtLst;
+
+ newElem->tslTree = tree;
+ newElem->tslStmt = stmt;
+ newElem->tslBlock = compCurBB;
+ newElem->tslNext = nullptr;
+
+ hashDsc->csdTreeLast->tslNext = newElem;
+ hashDsc->csdTreeLast = newElem;
+
+ optDoCSE = true; // Found a duplicate CSE tree
+
+ /* Have we assigned a CSE index? */
+ if (hashDsc->csdIndex == 0)
+ {
+ newCSE = true;
+ break;
+ }
+#if 0
+ // Use this to see if this Value Number base CSE is also a lexical CSE
+ bool treeMatch = GenTree::Compare(hashDsc->csdTree, tree, true);
+#endif
+
+ assert(FitsIn<signed char>(hashDsc->csdIndex));
+ tree->gtCSEnum = ((signed char)hashDsc->csdIndex);
+ return hashDsc->csdIndex;
+ }
+ }
+
+ if (!newCSE)
+ {
+ /* Not found, create a new entry (unless we have too many already) */
+
+ if (optCSECandidateCount < MAX_CSE_CNT)
+ {
+ hashDsc = new (this, CMK_CSE) CSEdsc;
+
+ hashDsc->csdHashValue = key;
+ hashDsc->csdIndex = 0;
+ hashDsc->csdLiveAcrossCall = 0;
+ hashDsc->csdDefCount = 0;
+ hashDsc->csdUseCount = 0;
+ hashDsc->csdDefWtCnt = 0;
+ hashDsc->csdUseWtCnt = 0;
+
+ hashDsc->csdTree = tree;
+ hashDsc->csdStmt = stmt;
+ hashDsc->csdBlock = compCurBB;
+ hashDsc->csdTreeList = nullptr;
+
+ /* Append the entry to the hash bucket */
+
+ hashDsc->csdNextInBucket = optCSEhash[hval];
+ optCSEhash[hval] = hashDsc;
+ }
+ return 0;
+ }
+ else // newCSE is true
+ {
+ /* We get here only after finding a matching CSE */
+
+ /* Create a new CSE (unless we have the maximum already) */
+
+ if (optCSECandidateCount == MAX_CSE_CNT)
+ {
+ return 0;
+ }
+
+ C_ASSERT((signed char)MAX_CSE_CNT == MAX_CSE_CNT);
+
+ unsigned CSEindex = ++optCSECandidateCount;
+ EXPSET_TP CSEmask = genCSEnum2bit(CSEindex);
+
+ /* Record the new CSE index in the hashDsc */
+ hashDsc->csdIndex = CSEindex;
+
+ /* Update the gtCSEnum field in the original tree */
+ noway_assert(hashDsc->csdTreeList->tslTree->gtCSEnum == 0);
+ assert(FitsIn<signed char>(CSEindex));
+
+ hashDsc->csdTreeList->tslTree->gtCSEnum = ((signed char)CSEindex);
+ noway_assert(((unsigned)hashDsc->csdTreeList->tslTree->gtCSEnum) == CSEindex);
+
+ tree->gtCSEnum = ((signed char)CSEindex);
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nCSE candidate #%02u, vn=", CSEindex);
+ vnPrint(vnlib, 0);
+ printf(" cseMask=%s in BB%02u, [cost=%2u, size=%2u]: \n", genES2str(genCSEnum2bit(CSEindex)),
+ compCurBB->bbNum, tree->gtCostEx, tree->gtCostSz);
+ gtDispTree(tree);
+ }
+#endif // DEBUG
+
+ return CSEindex;
+ }
+}
+
+/*****************************************************************************
+ *
+ * Locate CSE candidates and assign indices to them
+ * return 0 if no CSE candidates were found
+ * Also initialize bbCseIn, bbCseout and bbCseGen sets for all blocks
+ */
+
+unsigned Compiler::optValnumCSE_Locate()
+{
+ // Locate CSE candidates and assign them indices
+
+ for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
+ {
+ GenTreePtr stmt;
+ GenTreePtr tree;
+
+ /* Make the block publicly available */
+
+ compCurBB = block;
+
+ /* Ensure that the BBF_VISITED and BBF_MARKED flag are clear */
+ /* Everyone who uses these flags are required to clear afterwards */
+ noway_assert((block->bbFlags & (BBF_VISITED | BBF_MARKED)) == 0);
+
+ /* Walk the statement trees in this basic block */
+ for (stmt = block->FirstNonPhiDef(); stmt; stmt = stmt->gtNext)
+ {
+ noway_assert(stmt->gtOper == GT_STMT);
+
+ /* We walk the tree in the forwards direction (bottom up) */
+ for (tree = stmt->gtStmt.gtStmtList; tree; tree = tree->gtNext)
+ {
+ if (!optIsCSEcandidate(tree))
+ {
+ continue;
+ }
+
+ ValueNum vnlib = tree->GetVN(VNK_Liberal);
+
+ if (ValueNumStore::isReservedVN(vnlib))
+ {
+ continue;
+ }
+
+ // Don't CSE constant values, instead let the Value Number
+ // based Assertion Prop phase handle them.
+ //
+ if (vnStore->IsVNConstant(vnlib))
+ {
+ continue;
+ }
+
+ /* Assign an index to this expression */
+
+ unsigned CSEindex = optValnumCSE_Index(tree, stmt);
+
+ if (CSEindex != 0)
+ {
+ noway_assert(((unsigned)tree->gtCSEnum) == CSEindex);
+ }
+ }
+ }
+ }
+
+ /* We're done if there were no interesting expressions */
+
+ if (!optDoCSE)
+ {
+ return 0;
+ }
+
+ /* We're finished building the expression lookup table */
+
+ optCSEstop();
+
+ return 1;
+}
+
+/*****************************************************************************
+ *
+ * Compute each blocks bbCseGen
+ * This is the bitset that represents the CSEs that are generated within the block
+ */
+void Compiler::optValnumCSE_InitDataFlow()
+{
+ for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
+ {
+ GenTreePtr stmt;
+ GenTreePtr tree;
+
+ /* Initialize the blocks's bbCseIn set */
+
+ bool init_to_zero = false;
+
+ if (block == fgFirstBB)
+ {
+ /* Clear bbCseIn for the entry block */
+ init_to_zero = true;
+ }
+#if !CSE_INTO_HANDLERS
+ else
+ {
+ if (bbIsHandlerBeg(block))
+ {
+ /* Clear everything on entry to filters or handlers */
+ init_to_zero = true;
+ }
+ }
+#endif
+ if (init_to_zero)
+ {
+ /* Initialize to {ZERO} prior to dataflow */
+
+ block->bbCseIn = 0;
+ }
+ else
+ {
+ /* Initialize to {ALL} prior to dataflow */
+
+ block->bbCseIn = EXPSET_ALL;
+ }
+ block->bbCseOut = EXPSET_ALL;
+
+ /* Initialize to {ZERO} prior to locating the CSE candidates */
+ block->bbCseGen = 0;
+ }
+
+ // We walk the set of CSE candidates and set the bit corresponsing to the CSEindex
+ // in the block's bbCseGen bitset
+ //
+ for (unsigned cnt = 0; cnt < optCSECandidateCount; cnt++)
+ {
+ CSEdsc* dsc = optCSEtab[cnt];
+ unsigned CSEindex = dsc->csdIndex;
+ treeStmtLstPtr lst = dsc->csdTreeList;
+ noway_assert(lst);
+
+ while (lst != nullptr)
+ {
+ BasicBlock* block = lst->tslBlock;
+ block->bbCseGen |= genCSEnum2bit(CSEindex);
+ lst = lst->tslNext;
+ }
+ }
+
+#ifdef DEBUG
+ // Dump out the bbCseGen information that we just created
+ //
+ if (verbose)
+ {
+ bool headerPrinted = false;
+ for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
+ {
+ if (block->bbCseGen != 0)
+ {
+ if (!headerPrinted)
+ {
+ printf("\nBlocks that generate CSE def/uses\n");
+ headerPrinted = true;
+ }
+ printf("BB%02u", block->bbNum);
+ printf(" cseGen = %s\n", genES2str(block->bbCseGen));
+ }
+ }
+ }
+
+ fgDebugCheckLinks();
+
+#endif // DEBUG
+}
+
+/*****************************************************************************
+ *
+ * CSE Dataflow, so that all helper methods for dataflow are in a single place
+ *
+ */
+class CSE_DataFlow
+{
+private:
+ EXPSET_TP m_preMergeOut;
+
+ Compiler* m_pCompiler;
+
+public:
+ CSE_DataFlow(Compiler* pCompiler) : m_pCompiler(pCompiler)
+ {
+ }
+
+ Compiler* getCompiler()
+ {
+ return m_pCompiler;
+ }
+
+ // At the start of the merge function of the dataflow equations, initialize premerge state (to detect changes.)
+ void StartMerge(BasicBlock* block)
+ {
+ m_preMergeOut = block->bbCseOut;
+ }
+
+ // During merge, perform the actual merging of the predecessor's (since this is a forward analysis) dataflow flags.
+ void Merge(BasicBlock* block, BasicBlock* predBlock, flowList* preds)
+ {
+ block->bbCseIn &= predBlock->bbCseOut;
+ }
+
+ // At the end of the merge store results of the dataflow equations, in a postmerge state.
+ bool EndMerge(BasicBlock* block)
+ {
+ EXPSET_TP mergeOut = block->bbCseOut & (block->bbCseIn | block->bbCseGen);
+ block->bbCseOut = mergeOut;
+ return (mergeOut != m_preMergeOut);
+ }
+};
+
+/*****************************************************************************
+ *
+ * Perform a DataFlow forward analysis using the block CSE bitsets:
+ * Inputs:
+ * bbCseGen - Exact CSEs that are become available within the block
+ * bbCseIn - Maximal estimate of CSEs that are/could be available at input to the block
+ * bbCseOut - Maximal estimate of CSEs that are/could be available at exit to the block
+ *
+ * Outputs:
+ * bbCseIn - Computed CSEs that are available at input to the block
+ * bbCseOut - Computed CSEs that are available at exit to the block
+ */
+
+void Compiler::optValnumCSE_DataFlow()
+{
+ CSE_DataFlow cse(this);
+
+ // Modified dataflow algorithm for available expressions.
+ DataFlow cse_flow(this);
+
+ cse_flow.ForwardAnalysis(cse);
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nAfter performing DataFlow for ValnumCSE's\n");
+
+ for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
+ {
+ printf("BB%02u", block->bbNum);
+ printf(" cseIn = %s", genES2str(block->bbCseIn));
+ printf(" cseOut = %s", genES2str(block->bbCseOut));
+ printf("\n");
+ }
+
+ printf("\n");
+ }
+#endif // DEBUG
+}
+
+/*****************************************************************************
+ *
+ * Using the information computed by CSE_DataFlow determine for each
+ * CSE whether the CSE is a definition (if the CSE was not available)
+ * or if the CSE is a use (if the CSE was previously made available)
+ * The implementation iterates of all blocks setting 'available_cses'
+ * to the CSEs that are available at input to the block.
+ * When a CSE expression is encountered it is classified as either
+ * as a definition (if the CSE is not in the 'available_cses' set) or
+ * as a use (if the CSE is in the 'available_cses' set). If the CSE
+ * is a definition then it is added to the 'available_cses' set.
+ * In the Value Number based CSEs we do not need to have kill sets
+ */
+
+void Compiler::optValnumCSE_Availablity()
+{
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Labeling the CSEs with Use/Def information\n");
+ }
+#endif
+ for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
+ {
+ GenTreePtr stmt;
+ GenTreePtr tree;
+
+ /* Make the block publicly available */
+
+ compCurBB = block;
+
+ EXPSET_TP available_cses = block->bbCseIn;
+
+ optCSEweight = block->getBBWeight(this);
+
+ /* Walk the statement trees in this basic block */
+
+ for (stmt = block->FirstNonPhiDef(); stmt; stmt = stmt->gtNext)
+ {
+ noway_assert(stmt->gtOper == GT_STMT);
+
+ /* We walk the tree in the forwards direction (bottom up) */
+ for (tree = stmt->gtStmt.gtStmtList; tree; tree = tree->gtNext)
+ {
+ if (IS_CSE_INDEX(tree->gtCSEnum))
+ {
+ EXPSET_TP mask = genCSEnum2bit(tree->gtCSEnum);
+ CSEdsc* desc = optCSEfindDsc(tree->gtCSEnum);
+ unsigned stmw = block->getBBWeight(this);
+
+ /* Is this expression available here? */
+
+ if (available_cses & mask)
+ {
+ /* This is a CSE use */
+
+ desc->csdUseCount += 1;
+ desc->csdUseWtCnt += stmw;
+ }
+ else
+ {
+ if (tree->gtFlags & GTF_COLON_COND)
+ {
+ // We can't create CSE definitions inside QMARK-COLON trees
+ tree->gtCSEnum = NO_CSE;
+ continue;
+ }
+
+ /* This is a CSE def */
+
+ desc->csdDefCount += 1;
+ desc->csdDefWtCnt += stmw;
+
+ /* Mark the node as a CSE definition */
+
+ tree->gtCSEnum = TO_CSE_DEF(tree->gtCSEnum);
+
+ /* This CSE will be available after this def */
+
+ available_cses |= mask;
+ }
+#ifdef DEBUG
+ if (verbose && IS_CSE_INDEX(tree->gtCSEnum))
+ {
+ printf("BB%02u ", block->bbNum);
+ printTreeID(tree);
+ printf(" %s of CSE #%02u [weight=%s]\n", IS_CSE_USE(tree->gtCSEnum) ? "Use" : "Def",
+ GET_CSE_INDEX(tree->gtCSEnum), refCntWtd2str(stmw));
+ }
+#endif
+ }
+ }
+ }
+ }
+}
+
+// The following class handles the CSE heuristics
+// we use a complex set of heuristic rules
+// to determine if it is likely to be profitable to perform this CSE
+//
+class CSE_Heuristic
+{
+ Compiler* m_pCompiler;
+ unsigned m_addCSEcount;
+
+ unsigned aggressiveRefCnt;
+ unsigned moderateRefCnt;
+ unsigned enregCount; // count of the number of enregisterable variables
+ bool largeFrame;
+ bool hugeFrame;
+ Compiler::codeOptimize codeOptKind;
+ Compiler::CSEdsc** sortTab;
+ size_t sortSiz;
+#ifdef DEBUG
+ CLRRandom m_cseRNG;
+ unsigned m_bias;
+#endif
+
+public:
+ CSE_Heuristic(Compiler* pCompiler) : m_pCompiler(pCompiler)
+ {
+ codeOptKind = m_pCompiler->compCodeOpt();
+ }
+
+ Compiler::codeOptimize CodeOptKind()
+ {
+ return codeOptKind;
+ }
+
+ // Perform the Initialization step for our CSE Heuristics
+ // determine the various cut off values to use for
+ // the aggressive, moderate and conservative CSE promotions
+ // count the number of enregisterable variables
+ // determine if the method has a large or huge stack frame.
+ //
+ void Initialize()
+ {
+ m_addCSEcount = 0; /* Count of the number of LclVars for CSEs that we added */
+
+ // Record the weighted ref count of the last "for sure" callee saved LclVar
+ aggressiveRefCnt = 0;
+ moderateRefCnt = 0;
+ enregCount = 0;
+ largeFrame = false;
+ hugeFrame = false;
+ sortTab = nullptr;
+ sortSiz = 0;
+
+#ifdef _TARGET_XARCH_
+ if (m_pCompiler->compLongUsed)
+ {
+ enregCount++;
+ }
+#endif
+
+ unsigned frameSize = 0;
+ unsigned regAvailEstimate = ((CNT_CALLEE_ENREG * 3) + (CNT_CALLEE_TRASH * 2) + 1);
+ unsigned lclNum;
+ LclVarDsc* varDsc;
+
+ for (lclNum = 0, varDsc = m_pCompiler->lvaTable; lclNum < m_pCompiler->lvaCount; lclNum++, varDsc++)
+ {
+ if (varDsc->lvRefCnt == 0)
+ {
+ continue;
+ }
+
+ bool onStack = (regAvailEstimate == 0); // true when it is likely that this LclVar will have a stack home
+
+ // Some LclVars always have stack homes
+ if ((varDsc->lvDoNotEnregister) || (varDsc->lvType == TYP_LCLBLK))
+ {
+ onStack = true;
+ }
+
+#ifdef _TARGET_X86_
+ // Treat floating point and 64 bit integers as always on the stack
+ if (varTypeIsFloating(varDsc->TypeGet()) || varTypeIsLong(varDsc->TypeGet()))
+ onStack = true;
+#endif
+
+ if (onStack)
+ {
+ frameSize += m_pCompiler->lvaLclSize(lclNum);
+ }
+ else
+ {
+ // For the purposes of estimating the frameSize we
+ // will consider this LclVar as being enregistered.
+ // Now we reduce the remaining regAvailEstimate by
+ // an appropriate amount.
+ if (varDsc->lvRefCnt <= 2)
+ {
+ // a single use single def LclVar only uses 1
+ regAvailEstimate -= 1;
+ }
+ else
+ {
+ // a LclVar with multiple uses and defs uses 2
+ if (regAvailEstimate >= 2)
+ {
+ regAvailEstimate -= 2;
+ }
+ else
+ {
+ // Don't try to subtract when regAvailEstimate is 1
+ regAvailEstimate = 0;
+ }
+ }
+ }
+#ifdef _TARGET_XARCH_
+ if (frameSize > 0x080)
+ {
+ // We likely have a large stack frame.
+ // Thus we might need to use large displacements when loading or storing
+ // to CSE LclVars that are not enregistered
+ largeFrame = true;
+ break; // early out, we don't need to keep increasing frameSize
+ }
+#else // _TARGET_ARM_
+ if (frameSize > 0x0400)
+ {
+ largeFrame = true;
+ }
+ if (frameSize > 0x10000)
+ {
+ hugeFrame = true;
+ break;
+ }
+#endif
+ }
+
+ unsigned sortNum = 0;
+ while (sortNum < m_pCompiler->lvaTrackedCount)
+ {
+ LclVarDsc* varDsc = m_pCompiler->lvaRefSorted[sortNum++];
+ var_types varTyp = varDsc->TypeGet();
+
+ if (varDsc->lvDoNotEnregister)
+ {
+ continue;
+ }
+
+ if (!varTypeIsFloating(varTyp))
+ {
+ // TODO-1stClassStructs: Remove this; it is here to duplicate previous behavior.
+ // Note that this makes genTypeStSz return 1.
+ if (varTypeIsStruct(varTyp))
+ {
+ varTyp = TYP_STRUCT;
+ }
+ enregCount += genTypeStSz(varTyp);
+ }
+
+ if ((aggressiveRefCnt == 0) && (enregCount > (CNT_CALLEE_ENREG * 3 / 2)))
+ {
+ if (CodeOptKind() == Compiler::SMALL_CODE)
+ {
+ aggressiveRefCnt = varDsc->lvRefCnt + BB_UNITY_WEIGHT;
+ }
+ else
+ {
+ aggressiveRefCnt = varDsc->lvRefCntWtd + BB_UNITY_WEIGHT;
+ }
+ }
+ if ((moderateRefCnt == 0) && (enregCount > ((CNT_CALLEE_ENREG * 3) + (CNT_CALLEE_TRASH * 2))))
+ {
+ if (CodeOptKind() == Compiler::SMALL_CODE)
+ {
+ moderateRefCnt = varDsc->lvRefCnt;
+ }
+ else
+ {
+ moderateRefCnt = varDsc->lvRefCntWtd;
+ }
+ }
+ }
+ unsigned mult = 3;
+ // use smaller value for mult when enregCount is in [0..4]
+ if (enregCount <= 4)
+ {
+ mult = (enregCount <= 2) ? 1 : 2;
+ }
+
+ aggressiveRefCnt = max(BB_UNITY_WEIGHT * mult, aggressiveRefCnt);
+ moderateRefCnt = max((BB_UNITY_WEIGHT * mult) / 2, moderateRefCnt);
+
+#ifdef DEBUG
+ if (m_pCompiler->verbose)
+ {
+ printf("\n");
+ printf("Aggressive CSE Promotion cutoff is %u\n", aggressiveRefCnt);
+ printf("Moderate CSE Promotion cutoff is %u\n", moderateRefCnt);
+ printf("Framesize estimate is 0x%04X\n", frameSize);
+ printf("We have a %s frame\n", hugeFrame ? "huge" : (largeFrame ? "large" : "small"));
+ }
+#endif
+ }
+
+ void SortCandidates()
+ {
+ /* Create an expression table sorted by decreasing cost */
+ sortTab = new (m_pCompiler, CMK_CSE) Compiler::CSEdsc*[m_pCompiler->optCSECandidateCount];
+
+ sortSiz = m_pCompiler->optCSECandidateCount * sizeof(*sortTab);
+ memcpy(sortTab, m_pCompiler->optCSEtab, sortSiz);
+
+ if (CodeOptKind() == Compiler::SMALL_CODE)
+ {
+ qsort(sortTab, m_pCompiler->optCSECandidateCount, sizeof(*sortTab), m_pCompiler->optCSEcostCmpSz);
+ }
+ else
+ {
+ qsort(sortTab, m_pCompiler->optCSECandidateCount, sizeof(*sortTab), m_pCompiler->optCSEcostCmpEx);
+ }
+
+#ifdef DEBUG
+ if (m_pCompiler->verbose)
+ {
+ printf("\nSorted CSE candidates:\n");
+ /* Print out the CSE candidates */
+ for (unsigned cnt = 0; cnt < m_pCompiler->optCSECandidateCount; cnt++)
+ {
+ Compiler::CSEdsc* dsc = sortTab[cnt];
+ GenTreePtr expr = dsc->csdTree;
+
+ unsigned def;
+ unsigned use;
+
+ if (CodeOptKind() == Compiler::SMALL_CODE)
+ {
+ def = dsc->csdDefCount; // def count
+ use = dsc->csdUseCount; // use count (excluding the implicit uses at defs)
+ }
+ else
+ {
+ def = dsc->csdDefWtCnt; // weighted def count
+ use = dsc->csdUseWtCnt; // weighted use count (excluding the implicit uses at defs)
+ }
+
+ printf("CSE #%02u,cseMask=%s,useCnt=%d: [def=%3u, use=%3u", dsc->csdIndex,
+ genES2str(genCSEnum2bit(dsc->csdIndex)), dsc->csdUseCount, def, use);
+ printf("] :: ");
+ m_pCompiler->gtDispTree(expr, nullptr, nullptr, true);
+ }
+ printf("\n");
+ }
+#endif // DEBUG
+ }
+
+ // The following class nested within CSE_Heuristic encapsulates the information
+ // about the current CSE candidate that is under consideration
+ //
+ // TODO-Cleanup: This is still very much based upon the old Lexical CSE implementation
+ // and needs to be reworked for the Value Number based implementation
+ //
+ class CSE_Candidate
+ {
+ CSE_Heuristic* m_context;
+ Compiler::CSEdsc* m_CseDsc;
+
+ unsigned m_cseIndex;
+
+ unsigned m_defCount;
+ unsigned m_useCount;
+
+ unsigned m_Cost;
+ unsigned m_Size;
+
+ public:
+ CSE_Candidate(CSE_Heuristic* context, Compiler::CSEdsc* cseDsc) : m_context(context), m_CseDsc(cseDsc)
+ {
+ m_cseIndex = m_CseDsc->csdIndex;
+ }
+
+ Compiler::CSEdsc* CseDsc()
+ {
+ return m_CseDsc;
+ }
+ unsigned CseIndex()
+ {
+ return m_cseIndex;
+ }
+ unsigned DefCount()
+ {
+ return m_defCount;
+ }
+ unsigned UseCount()
+ {
+ return m_useCount;
+ }
+ // TODO-CQ: With ValNum CSE's the Expr and its cost can vary.
+ GenTreePtr Expr()
+ {
+ return m_CseDsc->csdTree;
+ }
+ unsigned Cost()
+ {
+ return m_Cost;
+ }
+ unsigned Size()
+ {
+ return m_Size;
+ }
+
+ bool LiveAcrossCall()
+ {
+ return (m_CseDsc->csdLiveAcrossCall != 0);
+ }
+
+ void InitializeCounts()
+ {
+ if (m_context->CodeOptKind() == Compiler::SMALL_CODE)
+ {
+ m_Cost = Expr()->gtCostSz; // the estimated code size
+ m_Size = Expr()->gtCostSz; // always the gtCostSz
+ m_defCount = m_CseDsc->csdDefCount; // def count
+ m_useCount = m_CseDsc->csdUseCount; // use count (excluding the implicit uses at defs)
+ }
+ else
+ {
+ m_Cost = Expr()->gtCostEx; // the estimated execution cost
+ m_Size = Expr()->gtCostSz; // always the gtCostSz
+ m_defCount = m_CseDsc->csdDefWtCnt; // weighted def count
+ m_useCount = m_CseDsc->csdUseWtCnt; // weighted use count (excluding the implicit uses at defs)
+ }
+ }
+ };
+
+#ifdef DEBUG
+ //------------------------------------------------------------------------
+ // optConfigBiasedCSE:
+ // Stress mode to shuffle the decision to CSE or not using environment
+ // variable COMPlus_JitStressBiasedCSE (= 0 to 100%). When the bias value
+ // is not specified but COMPlus_JitStress is ON, generate a random bias.
+ //
+ // Return Value:
+ // 0 -- This method is indifferent about this CSE (no bias specified and no stress)
+ // 1 -- This CSE must be performed to maintain specified/generated bias.
+ // -1 -- This CSE mustn't be performed to maintain specified/generated bias.
+ //
+ // Operation:
+ // A debug stress only method that returns "1" with probability (P)
+ // defined by:
+ //
+ // P = (COMPlus_JitStressBiasedCSE / 100) (or)
+ // P = (random(100) / 100) when COMPlus_JitStress is specified and
+ // COMPlus_JitStressBiasedCSE is unspecified.
+ //
+ // When specified, the bias is reinterpreted as a decimal number between 0
+ // to 100.
+ // When bias is not specified, a bias is randomly generated if COMPlus_JitStress
+ // is non-zero.
+ //
+ // Callers are supposed to call this method for each CSE promotion decision
+ // and ignore the call if return value is 0 and honor the 1 with a CSE and
+ // -1 with a no-CSE to maintain the specified/generated bias.
+ //
+ int optConfigBiasedCSE()
+ {
+ // Seed the PRNG, if never done before.
+ if (!m_cseRNG.IsInitialized())
+ {
+ m_cseRNG.Init(m_pCompiler->info.compMethodHash());
+ m_bias = m_cseRNG.Next(100);
+ }
+
+ // Obtain the bias value and reinterpret as decimal.
+ unsigned bias = ReinterpretHexAsDecimal(JitConfig.JitStressBiasedCSE());
+
+ // Invalid value, check if JitStress is ON.
+ if (bias > 100)
+ {
+ if (!m_pCompiler->compStressCompile(Compiler::STRESS_MAKE_CSE, MAX_STRESS_WEIGHT))
+ {
+ // JitStress is OFF for CSE, nothing to do.
+ return 0;
+ }
+ bias = m_bias;
+ JITDUMP("JitStressBiasedCSE is OFF, but JitStress is ON: generated bias=%d.\n", bias);
+ }
+
+ // Generate a number between (0, 99) and if the generated
+ // number is smaller than bias, then perform CSE.
+ unsigned gen = m_cseRNG.Next(100);
+ int ret = (gen < bias) ? 1 : -1;
+
+ if (m_pCompiler->verbose)
+ {
+ if (ret < 0)
+ {
+ printf("No CSE because gen=%d >= bias=%d\n", gen, bias);
+ }
+ else
+ {
+ printf("Promoting CSE because gen=%d < bias=%d\n", gen, bias);
+ }
+ }
+
+ // Indicate whether to perform CSE or not.
+ return ret;
+ }
+#endif
+
+ // Given a CSE candidate decide whether it passes or fails the profitablity heuristic
+ // return true if we believe that it is profitable to promote this candidate to a CSE
+ //
+ bool PromotionCheck(CSE_Candidate* candidate)
+ {
+ bool result = false;
+
+#ifdef DEBUG
+ int stressResult = optConfigBiasedCSE();
+ if (stressResult != 0)
+ {
+ // Stress is enabled. Check whether to perform CSE or not.
+ return (stressResult > 0);
+ }
+
+ if (m_pCompiler->optConfigDisableCSE2())
+ {
+ return false; // skip this CSE
+ }
+#endif
+
+ /*
+ Our calculation is based on the following cost estimate formula
+
+ Existing costs are:
+
+ (def + use) * cost
+
+ If we introduce a CSE temp are each definition and
+ replace the use with a CSE temp then our cost is:
+
+ (def * (cost + cse-def-cost)) + (use * cse-use-cost)
+
+ We must estimate the values to use for cse-def-cost and cse-use-cost
+
+ If we are able to enregister the CSE then the cse-use-cost is one
+ and cse-def-cost is either zero or one. Zero in the case where
+ we needed to evaluate the def into a register and we can use that
+ register as the CSE temp as well.
+
+ If we are unable to enregister the CSE then the cse-use-cost is IND_COST
+ and the cse-def-cost is also IND_COST.
+
+ If we want to be conservative we use IND_COST as the the value
+ for both cse-def-cost and cse-use-cost and then we never introduce
+ a CSE that could pessimize the execution time of the method.
+
+ If we want to be more moderate we use (IND_COST_EX + 1) / 2 as the
+ values for both cse-def-cost and cse-use-cost.
+
+ If we want to be aggressive we use 1 as the values for both
+ cse-def-cost and cse-use-cost.
+
+ If we believe that the CSE very valuable in terms of weighted ref counts
+ such that it would always be enregistered by the register allocator we choose
+ the aggressive use def costs.
+
+ If we believe that the CSE is somewhat valuable in terms of weighted ref counts
+ such that it could be likely be enregistered by the register allocator we choose
+ the moderate use def costs.
+
+ otherwise we choose the conservative use def costs.
+
+ */
+
+ unsigned cse_def_cost;
+ unsigned cse_use_cost;
+
+ unsigned no_cse_cost = 0;
+ unsigned yes_cse_cost = 0;
+ unsigned extra_yes_cost = 0;
+ unsigned extra_no_cost = 0;
+
+ // The 'cseRefCnt' is the RefCnt that we will have if we promote this CSE into a new LclVar
+ // Each CSE Def will contain two Refs and each CSE Use wil have one Ref of this new LclVar
+ unsigned cseRefCnt = (candidate->DefCount() * 2) + candidate->UseCount();
+
+ if (CodeOptKind() == Compiler::SMALL_CODE)
+ {
+ if (cseRefCnt >= aggressiveRefCnt)
+ {
+#ifdef DEBUG
+ if (m_pCompiler->verbose)
+ {
+ printf("Aggressive CSE Promotion (%u >= %u)\n", cseRefCnt, aggressiveRefCnt);
+ }
+#endif
+ cse_def_cost = 1;
+ cse_use_cost = 1;
+
+ if (candidate->LiveAcrossCall() != 0)
+ {
+ if (largeFrame)
+ {
+ cse_def_cost++;
+ cse_use_cost++;
+ }
+ if (hugeFrame)
+ {
+ cse_def_cost++;
+ cse_use_cost++;
+ }
+ }
+ }
+ else if (largeFrame)
+ {
+#ifdef DEBUG
+ if (m_pCompiler->verbose)
+ {
+ printf("Codesize CSE Promotion (large frame)\n");
+ }
+#endif
+#ifdef _TARGET_XARCH_
+ /* The following formula is good choice when optimizing CSE for SMALL_CODE */
+ cse_def_cost = 6; // mov [EBP-0x00001FC],reg
+ cse_use_cost = 5; // [EBP-0x00001FC]
+#else // _TARGET_ARM_
+ if (hugeFrame)
+ {
+ cse_def_cost = 12; // movw/movt r10 and str reg,[sp+r10]
+ cse_use_cost = 12;
+ }
+ else
+ {
+ cse_def_cost = 8; // movw r10 and str reg,[sp+r10]
+ cse_use_cost = 8;
+ }
+#endif
+ }
+ else // small frame
+ {
+#ifdef DEBUG
+ if (m_pCompiler->verbose)
+ {
+ printf("Codesize CSE Promotion (small frame)\n");
+ }
+#endif
+#ifdef _TARGET_XARCH_
+ /* The following formula is good choice when optimizing CSE for SMALL_CODE */
+ cse_def_cost = 3; // mov [EBP-1C],reg
+ cse_use_cost = 2; // [EBP-1C]
+#else // _TARGET_ARM_
+ cse_def_cost = 2; // str reg,[sp+0x9c]
+ cse_use_cost = 2; // ldr reg,[sp+0x9c]
+#endif
+ }
+ }
+ else // not SMALL_CODE ...
+ {
+ if (cseRefCnt >= aggressiveRefCnt)
+ {
+#ifdef DEBUG
+ if (m_pCompiler->verbose)
+ {
+ printf("Aggressive CSE Promotion (%u >= %u)\n", cseRefCnt, aggressiveRefCnt);
+ }
+#endif
+ cse_def_cost = 1;
+ cse_use_cost = 1;
+ }
+ else if (cseRefCnt >= moderateRefCnt)
+ {
+
+ if (candidate->LiveAcrossCall() == 0)
+ {
+#ifdef DEBUG
+ if (m_pCompiler->verbose)
+ {
+ printf("Moderate CSE Promotion (CSE never live at call) (%u >= %u)\n", cseRefCnt,
+ moderateRefCnt);
+ }
+#endif
+ cse_def_cost = 2;
+ cse_use_cost = 1;
+ }
+ else // candidate is live across call
+ {
+#ifdef DEBUG
+ if (m_pCompiler->verbose)
+ {
+ printf("Moderate CSE Promotion (%u >= %u)\n", cseRefCnt, moderateRefCnt);
+ }
+#endif
+ cse_def_cost = 2;
+ cse_use_cost = 2;
+ extra_yes_cost = BB_UNITY_WEIGHT * 2; // Extra cost in case we have to spill/restore a caller
+ // saved register
+ }
+ }
+ else // Conservative CSE promotion
+ {
+ if (candidate->LiveAcrossCall() == 0)
+ {
+#ifdef DEBUG
+ if (m_pCompiler->verbose)
+ {
+ printf("Conservative CSE Promotion (CSE never live at call) (%u < %u)\n", cseRefCnt,
+ moderateRefCnt);
+ }
+#endif
+ cse_def_cost = 2;
+ cse_use_cost = 2;
+ }
+ else // candidate is live across call
+ {
+#ifdef DEBUG
+ if (m_pCompiler->verbose)
+ {
+ printf("Conservative CSE Promotion (%u < %u)\n", cseRefCnt, moderateRefCnt);
+ }
+#endif
+ cse_def_cost = 3;
+ cse_use_cost = 3;
+ extra_yes_cost = BB_UNITY_WEIGHT * 4; // Extra cost in case we have to spill/restore a caller
+ // saved register
+ }
+
+ // If we have maxed out lvaTrackedCount then this CSE may end up as an untracked variable
+ if (m_pCompiler->lvaTrackedCount == lclMAX_TRACKED)
+ {
+ cse_def_cost++;
+ cse_use_cost++;
+ }
+ }
+
+ if (largeFrame)
+ {
+ cse_def_cost++;
+ cse_use_cost++;
+ }
+ if (hugeFrame)
+ {
+ cse_def_cost++;
+ cse_use_cost++;
+ }
+ }
+
+ // estimate the cost from lost codesize reduction if we do not perform the CSE
+ if (candidate->Size() > cse_use_cost)
+ {
+ Compiler::CSEdsc* dsc = candidate->CseDsc(); // We need to retrieve the actual use count, not the
+ // weighted count
+ extra_no_cost = candidate->Size() - cse_use_cost;
+ extra_no_cost = extra_no_cost * dsc->csdUseCount * 2;
+ }
+
+ /* no_cse_cost is the cost estimate when we decide not to make a CSE */
+ /* yes_cse_cost is the cost estimate when we decide to make a CSE */
+
+ no_cse_cost = candidate->UseCount() * candidate->Cost();
+ yes_cse_cost = (candidate->DefCount() * cse_def_cost) + (candidate->UseCount() * cse_use_cost);
+
+#if CPU_LONG_USES_REGPAIR
+ if (candidate->Expr()->TypeGet() == TYP_LONG)
+ {
+ yes_cse_cost *= 2;
+ }
+#endif
+ no_cse_cost += extra_no_cost;
+ yes_cse_cost += extra_yes_cost;
+
+#ifdef DEBUG
+ if (m_pCompiler->verbose)
+ {
+ printf("cseRefCnt=%d, aggressiveRefCnt=%d, moderateRefCnt=%d\n", cseRefCnt, aggressiveRefCnt,
+ moderateRefCnt);
+ printf("defCnt=%d, useCnt=%d, cost=%d, size=%d\n", candidate->DefCount(), candidate->UseCount(),
+ candidate->Cost(), candidate->Size());
+ printf("def_cost=%d, use_cost=%d, extra_no_cost=%d, extra_yes_cost=%d\n", cse_def_cost, cse_use_cost,
+ extra_no_cost, extra_yes_cost);
+
+ printf("CSE cost savings check (%u >= %u) %s\n", no_cse_cost, yes_cse_cost,
+ (no_cse_cost >= yes_cse_cost) ? "passes" : "fails");
+ }
+#endif
+
+ // Should we make this candidate into a CSE?
+ // Is the yes cost less than the no cost
+ //
+ if (yes_cse_cost <= no_cse_cost)
+ {
+ result = true; // Yes make this a CSE
+ }
+ else
+ {
+ /* In stress mode we will make some extra CSEs */
+ if (no_cse_cost > 0)
+ {
+ int percentage = (no_cse_cost * 100) / yes_cse_cost;
+
+ if (m_pCompiler->compStressCompile(Compiler::STRESS_MAKE_CSE, percentage))
+ {
+ result = true; // Yes make this a CSE
+ }
+ }
+ }
+
+ return result;
+ }
+
+ // PerformCSE() takes a successful candidate and performs the appropriate replacements:
+ //
+ // It will replace all of the CSE defs with assignments to a new "cse0" LclVar
+ // and will replace all of the CSE uses with reads of the "cse0" LclVar
+ //
+ void PerformCSE(CSE_Candidate* successfulCandidate)
+ {
+ unsigned cseRefCnt = (successfulCandidate->DefCount() * 2) + successfulCandidate->UseCount();
+
+ if (successfulCandidate->LiveAcrossCall() != 0)
+ {
+ // As we introduce new LclVars for these CSE we slightly
+ // increase the cutoffs for aggressive and moderate CSE's
+ //
+ int incr = BB_UNITY_WEIGHT;
+
+#if CPU_LONG_USES_REGPAIR
+ if (successfulCandidate->Expr()->TypeGet() == TYP_LONG)
+ incr *= 2;
+#endif
+
+ if (cseRefCnt > aggressiveRefCnt)
+ {
+ aggressiveRefCnt += incr;
+ }
+
+ if (cseRefCnt > moderateRefCnt)
+ {
+ moderateRefCnt += (incr / 2);
+ }
+ }
+
+ /* Introduce a new temp for the CSE */
+
+ // we will create a long lifetime temp for the new cse LclVar
+ unsigned cseLclVarNum = m_pCompiler->lvaGrabTemp(false DEBUGARG("ValNumCSE"));
+ var_types cseLclVarTyp = genActualType(successfulCandidate->Expr()->TypeGet());
+ if (varTypeIsStruct(cseLclVarTyp))
+ {
+ m_pCompiler->lvaSetStruct(cseLclVarNum, m_pCompiler->gtGetStructHandle(successfulCandidate->Expr()), false);
+ }
+ m_pCompiler->lvaTable[cseLclVarNum].lvType = cseLclVarTyp;
+ m_pCompiler->lvaTable[cseLclVarNum].lvIsCSE = true;
+
+ m_addCSEcount++; // Record that we created a new LclVar for use as a CSE temp
+ m_pCompiler->optCSEcount++;
+
+ /* Walk all references to this CSE, adding an assignment
+ to the CSE temp to all defs and changing all refs to
+ a simple use of the CSE temp.
+
+ We also unmark nested CSE's for all uses.
+ */
+
+ Compiler::treeStmtLstPtr lst;
+ lst = successfulCandidate->CseDsc()->csdTreeList;
+ noway_assert(lst);
+
+#define QQQ_CHECK_CSE_VNS 0
+#if QQQ_CHECK_CSE_VNS
+ assert(lst != NULL);
+ ValueNum firstVN = lst->tslTree->gtVN;
+ lst = lst->tslNext;
+ bool allSame = true;
+ while (lst != NULL)
+ {
+ if (IS_CSE_INDEX(lst->tslTree->gtCSEnum))
+ {
+ if (lst->tslTree->gtVN != firstVN)
+ {
+ allSame = false;
+ break;
+ }
+ }
+ lst = lst->tslNext;
+ }
+ if (!allSame)
+ {
+ lst = dsc->csdTreeList;
+ GenTreePtr firstTree = lst->tslTree;
+ printf("In %s, CSE (oper = %s, type = %s) has differing VNs: ", info.compFullName,
+ GenTree::NodeName(firstTree->OperGet()), varTypeName(firstTree->TypeGet()));
+ while (lst != NULL)
+ {
+ if (IS_CSE_INDEX(lst->tslTree->gtCSEnum))
+ {
+ printf("0x%x(%s,%d) ", lst->tslTree, IS_CSE_USE(lst->tslTree->gtCSEnum) ? "u" : "d",
+ lst->tslTree->gtVN);
+ }
+ lst = lst->tslNext;
+ }
+ printf("\n");
+ }
+ lst = dsc->csdTreeList;
+#endif
+
+ do
+ {
+ /* Process the next node in the list */
+ GenTreePtr exp = lst->tslTree;
+ GenTreePtr stm = lst->tslStmt;
+ noway_assert(stm->gtOper == GT_STMT);
+ BasicBlock* blk = lst->tslBlock;
+
+ /* Advance to the next node in the list */
+ lst = lst->tslNext;
+
+ // Assert if we used DEBUG_DESTROY_NODE on this CSE exp
+ assert(exp->gtOper != GT_COUNT);
+
+ /* Ignore the node if it's not been marked as a CSE */
+ if (!IS_CSE_INDEX(exp->gtCSEnum))
+ {
+ continue;
+ }
+
+ /* Make sure we update the weighted ref count correctly */
+ m_pCompiler->optCSEweight = blk->getBBWeight(m_pCompiler);
+
+ /* Figure out the actual type of the value */
+ var_types expTyp = genActualType(exp->TypeGet());
+ noway_assert(expTyp == cseLclVarTyp);
+
+ // This will contain the replacement tree for exp
+ // It will either be the CSE def or CSE ref
+ //
+ GenTreePtr cse = nullptr;
+ bool isDef;
+ FieldSeqNode* fldSeq = nullptr;
+ bool hasZeroMapAnnotation = m_pCompiler->GetZeroOffsetFieldMap()->Lookup(exp, &fldSeq);
+
+ if (IS_CSE_USE(exp->gtCSEnum))
+ {
+ /* This is a use of the CSE */
+ isDef = false;
+#ifdef DEBUG
+ if (m_pCompiler->verbose)
+ {
+ printf("\nCSE #%02u use at ", exp->gtCSEnum);
+ Compiler::printTreeID(exp);
+ printf(" replaced in BB%02u with temp use.\n", blk->bbNum);
+ }
+#endif // DEBUG
+
+ /* check for and collect any SIDE_EFFECTS */
+ GenTreePtr sideEffList = nullptr;
+
+ if (exp->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS)
+ {
+ // Extract any side effects from exp
+ //
+ m_pCompiler->gtExtractSideEffList(exp, &sideEffList, GTF_PERSISTENT_SIDE_EFFECTS_IN_CSE);
+ }
+
+ // We will replace the CSE ref with a new tree
+ // this is typically just a simple use of the new CSE LclVar
+ //
+ cse = m_pCompiler->gtNewLclvNode(cseLclVarNum, cseLclVarTyp);
+ cse->gtVNPair = exp->gtVNPair; // assign the proper Value Numbers
+#ifdef DEBUG
+ cse->gtDebugFlags |= GTF_DEBUG_VAR_CSE_REF;
+#endif // DEBUG
+
+ // If we have side effects then we need to create a GT_COMMA tree instead
+ //
+ if (sideEffList)
+ {
+ noway_assert(sideEffList->gtFlags & GTF_SIDE_EFFECT);
+#ifdef DEBUG
+ if (m_pCompiler->verbose)
+ {
+ printf("\nThe CSE has side effects! Extracting side effects...\n");
+ m_pCompiler->gtDispTree(sideEffList);
+ printf("\n");
+ }
+#endif
+
+ GenTreePtr cseVal = cse;
+ GenTreePtr curSideEff = sideEffList;
+ ValueNumStore* vnStore = m_pCompiler->vnStore;
+ ValueNumPair exceptions_vnp = ValueNumStore::VNPForEmptyExcSet();
+
+ while ((curSideEff->OperGet() == GT_COMMA) || (curSideEff->OperGet() == GT_ASG))
+ {
+ GenTreePtr op1 = curSideEff->gtOp.gtOp1;
+ GenTreePtr op2 = curSideEff->gtOp.gtOp2;
+
+ ValueNumPair op1vnp;
+ ValueNumPair op1Xvnp = ValueNumStore::VNPForEmptyExcSet();
+ vnStore->VNPUnpackExc(op1->gtVNPair, &op1vnp, &op1Xvnp);
+
+ exceptions_vnp = vnStore->VNPExcSetUnion(exceptions_vnp, op1Xvnp);
+ curSideEff = op2;
+ }
+
+ // We may have inserted a narrowing cast during a previous remorph
+ // and it will not have a value number.
+ if ((curSideEff->OperGet() == GT_CAST) && !curSideEff->gtVNPair.BothDefined())
+ {
+ // The inserted cast will have no exceptional effects
+ assert(curSideEff->gtOverflow() == false);
+ // Process the exception effects from the cast's operand.
+ curSideEff = curSideEff->gtOp.gtOp1;
+ }
+
+ ValueNumPair op2vnp;
+ ValueNumPair op2Xvnp = ValueNumStore::VNPForEmptyExcSet();
+ vnStore->VNPUnpackExc(curSideEff->gtVNPair, &op2vnp, &op2Xvnp);
+ exceptions_vnp = vnStore->VNPExcSetUnion(exceptions_vnp, op2Xvnp);
+
+ op2Xvnp = ValueNumStore::VNPForEmptyExcSet();
+ vnStore->VNPUnpackExc(cseVal->gtVNPair, &op2vnp, &op2Xvnp);
+ exceptions_vnp = vnStore->VNPExcSetUnion(exceptions_vnp, op2Xvnp);
+
+ /* Create a comma node with the sideEffList as op1 */
+ cse = m_pCompiler->gtNewOperNode(GT_COMMA, expTyp, sideEffList, cseVal);
+ cse->gtVNPair = vnStore->VNPWithExc(op2vnp, exceptions_vnp);
+ }
+
+ exp->gtCSEnum = NO_CSE; // clear the gtCSEnum field
+
+ /* Unmark any nested CSE's in the sub-operands */
+
+ // But we do need to communicate the side effect list to optUnmarkCSEs
+ // as any part of the 'exp' tree that is in the sideEffList is preserved
+ // and is not deleted and does not have its ref counts decremented
+ //
+ m_pCompiler->optValnumCSE_UnmarkCSEs(exp, sideEffList);
+ }
+ else
+ {
+ /* This is a def of the CSE */
+ isDef = true;
+#ifdef DEBUG
+ if (m_pCompiler->verbose)
+ {
+ printf("\nCSE #%02u def at ", GET_CSE_INDEX(exp->gtCSEnum));
+ Compiler::printTreeID(exp);
+ printf(" replaced in BB%02u with def of V%02u\n", blk->bbNum, cseLclVarNum);
+ }
+#endif // DEBUG
+
+ exp->gtCSEnum = NO_CSE; // clear the gtCSEnum field
+
+ GenTreePtr val = exp;
+
+ /* Create an assignment of the value to the temp */
+ GenTreePtr asg = m_pCompiler->gtNewTempAssign(cseLclVarNum, val);
+
+ // assign the proper Value Numbers
+ asg->gtVNPair.SetBoth(ValueNumStore::VNForVoid()); // The GT_ASG node itself is $VN.Void
+ asg->gtOp.gtOp1->gtVNPair = val->gtVNPair; // The dest op is the same as 'val'
+
+ noway_assert(asg->gtOp.gtOp1->gtOper == GT_LCL_VAR);
+ noway_assert(asg->gtOp.gtOp2 == val);
+
+ /* Create a reference to the CSE temp */
+ GenTreePtr ref = m_pCompiler->gtNewLclvNode(cseLclVarNum, cseLclVarTyp);
+ ref->gtVNPair = val->gtVNPair; // The new 'ref' is the same as 'val'
+
+ // If it has a zero-offset field seq, copy annotation to the ref
+ if (hasZeroMapAnnotation)
+ {
+ m_pCompiler->GetZeroOffsetFieldMap()->Set(ref, fldSeq);
+ }
+
+ /* Create a comma node for the CSE assignment */
+ cse = m_pCompiler->gtNewOperNode(GT_COMMA, expTyp, asg, ref);
+ cse->gtVNPair = ref->gtVNPair; // The comma's value is the same as 'val'
+ // as the assignment to the CSE LclVar
+ // cannot add any new exceptions
+ }
+
+ // Increment ref count for the CSE ref
+ m_pCompiler->lvaTable[cseLclVarNum].incRefCnts(blk->getBBWeight(m_pCompiler), m_pCompiler);
+
+ if (isDef)
+ {
+ // Also increment ref count for the CSE assignment
+ m_pCompiler->lvaTable[cseLclVarNum].incRefCnts(blk->getBBWeight(m_pCompiler), m_pCompiler);
+ }
+
+ // Walk the statement 'stm' and find the pointer
+ // in the tree is pointing to 'exp'
+ //
+ GenTreePtr* link = m_pCompiler->gtFindLink(stm, exp);
+
+#ifdef DEBUG
+ if (link == nullptr)
+ {
+ printf("\ngtFindLink failed: stm=");
+ Compiler::printTreeID(stm);
+ printf(", exp=");
+ Compiler::printTreeID(exp);
+ printf("\n");
+ printf("stm =");
+ m_pCompiler->gtDispTree(stm);
+ printf("\n");
+ printf("exp =");
+ m_pCompiler->gtDispTree(exp);
+ printf("\n");
+ }
+#endif // DEBUG
+
+ noway_assert(link);
+
+ // Mutate this link, thus replacing the old exp with the new cse representation
+ //
+ *link = cse;
+
+ // If it has a zero-offset field seq, copy annotation.
+ if (hasZeroMapAnnotation)
+ {
+ m_pCompiler->GetZeroOffsetFieldMap()->Set(cse, fldSeq);
+ }
+
+ assert(m_pCompiler->fgRemoveRestOfBlock == false);
+
+ /* re-morph the statement */
+ m_pCompiler->fgMorphBlockStmt(blk, stm DEBUGARG("optValnumCSE"));
+
+ } while (lst != nullptr);
+ }
+
+ // Consider each of the CSE candidates and if the CSE passes
+ // the PromotionCheck then transform the CSE by calling PerformCSE
+ //
+ void ConsiderCandidates()
+ {
+ /* Consider each CSE candidate, in order of decreasing cost */
+ unsigned cnt = m_pCompiler->optCSECandidateCount;
+ Compiler::CSEdsc** ptr = sortTab;
+ for (; (cnt > 0); cnt--, ptr++)
+ {
+ Compiler::CSEdsc* dsc = *ptr;
+ CSE_Candidate candidate(this, dsc);
+
+ candidate.InitializeCounts();
+
+ if (candidate.UseCount() == 0)
+ {
+#ifdef DEBUG
+ if (m_pCompiler->verbose)
+ {
+ printf("Skipped CSE #%02u because use count is 0\n", candidate.CseIndex());
+ }
+#endif
+ continue;
+ }
+
+#ifdef DEBUG
+ if (m_pCompiler->verbose)
+ {
+ printf("\nConsidering CSE #%02u [def=%2u, use=%2u, cost=%2u] CSE Expression:\n", candidate.CseIndex(),
+ candidate.DefCount(), candidate.UseCount(), candidate.Cost());
+ m_pCompiler->gtDispTree(candidate.Expr());
+ printf("\n");
+ }
+#endif
+
+ if ((dsc->csdDefCount <= 0) || (dsc->csdUseCount == 0))
+ {
+ // If we reach this point, then the CSE def was incorrectly marked or the
+ // block with this use is unreachable. So skip and go to the next CSE.
+ // Without the "continue", we'd generate bad code in retail.
+ // Commented out a noway_assert(false) here due to bug: 3290124.
+ // The problem is if there is sub-graph that is not reachable from the
+ // entry point, the CSE flags propagated, would be incorrect for it.
+ continue;
+ }
+
+ bool doCSE = PromotionCheck(&candidate);
+
+#ifdef DEBUG
+ if (m_pCompiler->verbose)
+ {
+ if (doCSE)
+ {
+ printf("\nPromoting CSE:\n");
+ }
+ else
+ {
+ printf("Did Not promote this CSE\n");
+ }
+ }
+#endif // DEBUG
+
+ if (doCSE)
+ {
+ PerformCSE(&candidate);
+ }
+ }
+ }
+
+ // Perform the necessary cleanup after our CSE heuristics have run
+ //
+ void Cleanup()
+ {
+ if (m_addCSEcount > 0)
+ {
+ /* We've added new local variables to the lvaTable so note that we need to recreate the sorted table */
+ m_pCompiler->lvaSortAgain = true;
+ }
+ }
+};
+
+/*****************************************************************************
+ *
+ * Routine for performing the Value Number based CSE using our heuristics
+ */
+
+void Compiler::optValnumCSE_Heuristic()
+{
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\n************ Trees at start of optValnumCSE_Heuristic()\n");
+ fgDumpTrees(fgFirstBB, nullptr);
+ printf("\n");
+ }
+#endif // DEBUG
+
+ CSE_Heuristic cse_heuristic(this);
+
+ cse_heuristic.Initialize();
+ cse_heuristic.SortCandidates();
+ cse_heuristic.ConsiderCandidates();
+ cse_heuristic.Cleanup();
+}
+
+/*****************************************************************************
+ *
+ * Routine to unmark any CSEs contained within a tree
+ * - optionally a 'keepList' vcan be provided to specify a list of trees that will be kept
+ *
+ */
+
+void Compiler::optValnumCSE_UnmarkCSEs(GenTreePtr deadTree, GenTreePtr keepList)
+{
+ assert(optValnumCSE_phase);
+
+ // We need to communicate the 'keepList' to optUnmarkCSEs
+ // as any part of the 'deadTree' tree that is in the keepList is preserved
+ // and is not deleted and does not have its ref counts decremented
+ // We communicate this value using the walkData.pCallbackData field
+ //
+
+ fgWalkTreePre(&deadTree, optUnmarkCSEs, (void*)keepList);
+}
+
+/*****************************************************************************
+ *
+ * Perform common sub-expression elimination.
+ */
+
+void Compiler::optOptimizeValnumCSEs()
+{
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\n*************** In optOptimizeValnumCSEs()\n");
+ }
+
+ if (optConfigDisableCSE())
+ {
+ return; // Disabled by JitNoCSE
+ }
+#endif
+
+ optValnumCSE_phase = true;
+
+ /* Initialize the expression tracking logic */
+
+ optValnumCSE_Init();
+
+ /* Locate interesting expressions and assign indices to them */
+
+ if (optValnumCSE_Locate() > 0)
+ {
+ optCSECandidateTotal += optCSECandidateCount;
+
+ optValnumCSE_InitDataFlow();
+
+ optValnumCSE_DataFlow();
+
+ optValnumCSE_Availablity();
+
+ optValnumCSE_Heuristic();
+ }
+
+ optValnumCSE_phase = false;
+}
+
+#endif // FEATURE_VALNUM_CSE
+
+/*****************************************************************************
+ *
+ * The following determines whether the given expression is a worthy CSE
+ * candidate.
+ */
+bool Compiler::optIsCSEcandidate(GenTreePtr tree)
+{
+ /* No good if the expression contains side effects or if it was marked as DONT CSE */
+
+ if (tree->gtFlags & (GTF_ASG | GTF_DONT_CSE))
+ {
+ return false;
+ }
+
+ /* The only reason a TYP_STRUCT tree might occur is as an argument to
+ GT_ADDR. It will never be actually materialized. So ignore them.
+ Also TYP_VOIDs */
+
+ var_types type = tree->TypeGet();
+ genTreeOps oper = tree->OperGet();
+
+ // TODO-1stClassStructs: Enable CSE for struct types (depends on either transforming
+ // to use regular assignments, or handling copyObj.
+ if (varTypeIsStruct(type) || type == TYP_VOID)
+ {
+ return false;
+ }
+
+#ifdef _TARGET_X86_
+ if (type == TYP_FLOAT)
+ {
+ // TODO-X86-CQ: Revisit this
+ // Don't CSE a TYP_FLOAT on x86 as we currently can only enregister doubles
+ return false;
+ }
+#else
+ if (oper == GT_CNS_DBL)
+ {
+ // TODO-CQ: Revisit this
+ // Don't try to CSE a GT_CNS_DBL as they can represent both float and doubles
+ return false;
+ }
+#endif
+
+ unsigned cost;
+ if (compCodeOpt() == SMALL_CODE)
+ {
+ cost = tree->gtCostSz;
+ }
+ else
+ {
+ cost = tree->gtCostEx;
+ }
+
+ /* Don't bother if the potential savings are very low */
+ if (cost < MIN_CSE_COST)
+ {
+ return false;
+ }
+
+#if !CSE_CONSTS
+ /* Don't bother with constants */
+ if (tree->OperKind() & GTK_CONST)
+ return false;
+#endif
+
+ /* Check for some special cases */
+
+ switch (oper)
+ {
+ case GT_CALL:
+ // If we have a simple helper call with no other persistent side-effects
+ // then we allow this tree to be a CSE candidate
+ //
+ if (gtTreeHasSideEffects(tree, GTF_PERSISTENT_SIDE_EFFECTS_IN_CSE) == false)
+ {
+ return true;
+ }
+ else
+ {
+ // Calls generally cannot be CSE-ed
+ return false;
+ }
+
+ case GT_IND:
+ // TODO-CQ: Review this...
+ /* We try to cse GT_ARR_ELEM nodes instead of GT_IND(GT_ARR_ELEM).
+ Doing the first allows cse to also kick in for code like
+ "GT_IND(GT_ARR_ELEM) = GT_IND(GT_ARR_ELEM) + xyz", whereas doing
+ the second would not allow it */
+
+ return (tree->gtOp.gtOp1->gtOper != GT_ARR_ELEM);
+
+ case GT_CNS_INT:
+ case GT_CNS_LNG:
+ case GT_CNS_DBL:
+ case GT_CNS_STR:
+ return true; // We reach here only when CSE_CONSTS is enabled
+
+ case GT_ARR_ELEM:
+ case GT_ARR_LENGTH:
+ case GT_CLS_VAR:
+ case GT_LCL_FLD:
+ return true;
+
+ case GT_LCL_VAR:
+ return false; // Can't CSE a volatile LCL_VAR
+
+ case GT_NEG:
+ case GT_NOT:
+ case GT_CAST:
+ return true; // CSE these Unary Operators
+
+ case GT_SUB:
+ case GT_DIV:
+ case GT_MOD:
+ case GT_UDIV:
+ case GT_UMOD:
+ case GT_OR:
+ case GT_AND:
+ case GT_XOR:
+ case GT_RSH:
+ case GT_RSZ:
+ case GT_ROL:
+ case GT_ROR:
+ return true; // CSE these Binary Operators
+
+ case GT_ADD: // Check for ADDRMODE flag on these Binary Operators
+ case GT_MUL:
+ case GT_LSH:
+ if ((tree->gtFlags & GTF_ADDRMODE_NO_CSE) != 0)
+ {
+ return false;
+ }
+
+ case GT_EQ:
+ case GT_NE:
+ case GT_LT:
+ case GT_LE:
+ case GT_GE:
+ case GT_GT:
+ return true; // Also CSE these Comparison Operators
+
+ case GT_INTRINSIC:
+ return true; // Intrinsics
+
+ case GT_COMMA:
+ return true; // Allow GT_COMMA nodes to be CSE-ed.
+
+ case GT_COLON:
+ case GT_QMARK:
+ case GT_NOP:
+ case GT_RETURN:
+ return false; // Currently the only special nodes that we hit
+ // that we know that we don't want to CSE
+
+ default:
+ break; // Any new nodes that we might add later...
+ }
+
+ return false;
+}
+
+#ifdef DEBUG
+//
+// A Debug only method that allows you to control whether the CSE logic is enabled for this method.
+//
+// If this method returns false then the CSE phase should be performed.
+// If the method returns true then the CSE phase should be skipped.
+//
+bool Compiler::optConfigDisableCSE()
+{
+ // Next check if COMPlus_JitNoCSE is set and applies to this method
+ //
+ unsigned jitNoCSE = JitConfig.JitNoCSE();
+
+ if (jitNoCSE > 0)
+ {
+ unsigned methodCount = Compiler::jitTotalMethodCompiled;
+ if ((jitNoCSE & 0xF000000) == 0xF000000)
+ {
+ unsigned methodCountMask = methodCount & 0xFFF;
+ unsigned bitsZero = (jitNoCSE >> 12) & 0xFFF;
+ unsigned bitsOne = (jitNoCSE >> 0) & 0xFFF;
+
+ if (((methodCountMask & bitsOne) == bitsOne) && ((~methodCountMask & bitsZero) == bitsZero))
+ {
+ if (verbose)
+ {
+ printf(" Disabled by JitNoCSE methodCountMask\n");
+ }
+
+ return true; // The CSE phase for this method is disabled
+ }
+ }
+ else if (jitNoCSE <= (methodCount + 1))
+ {
+ if (verbose)
+ {
+ printf(" Disabled by JitNoCSE > methodCount\n");
+ }
+
+ return true; // The CSE phase for this method is disabled
+ }
+ }
+
+ return false;
+}
+
+//
+// A Debug only method that allows you to control whether the CSE logic is enabled for
+// a particular CSE in a method
+//
+// If this method returns false then the CSE should be performed.
+// If the method returns true then the CSE should be skipped.
+//
+bool Compiler::optConfigDisableCSE2()
+{
+ static unsigned totalCSEcount = 0;
+
+ unsigned jitNoCSE2 = JitConfig.JitNoCSE2();
+
+ totalCSEcount++;
+
+ if (jitNoCSE2 > 0)
+ {
+ if ((jitNoCSE2 & 0xF000000) == 0xF000000)
+ {
+ unsigned totalCSEMask = totalCSEcount & 0xFFF;
+ unsigned bitsZero = (jitNoCSE2 >> 12) & 0xFFF;
+ unsigned bitsOne = (jitNoCSE2 >> 0) & 0xFFF;
+
+ if (((totalCSEMask & bitsOne) == bitsOne) && ((~totalCSEMask & bitsZero) == bitsZero))
+ {
+ if (verbose)
+ {
+ printf(" Disabled by jitNoCSE2 Ones/Zeros mask\n");
+ }
+ return true;
+ }
+ }
+ else if ((jitNoCSE2 & 0xF000000) == 0xE000000)
+ {
+ unsigned totalCSEMask = totalCSEcount & 0xFFF;
+ unsigned disableMask = jitNoCSE2 & 0xFFF;
+
+ disableMask >>= (totalCSEMask % 12);
+
+ if (disableMask & 1)
+ {
+ if (verbose)
+ {
+ printf(" Disabled by jitNoCSE2 rotating disable mask\n");
+ }
+ return true;
+ }
+ }
+ else if (jitNoCSE2 <= totalCSEcount)
+ {
+ if (verbose)
+ {
+ printf(" Disabled by jitNoCSE2 > totalCSEcount\n");
+ }
+ return true;
+ }
+ }
+ return false;
+}
+#endif
+
+void Compiler::optOptimizeCSEs()
+{
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\n*************** In optOptimizeCSEs()\n");
+ printf("Blocks/Trees at start of optOptimizeCSE phase\n");
+ fgDispBasicBlocks(true);
+ }
+#endif // DEBUG
+
+ optCSECandidateCount = 0;
+ optCSEstart = lvaCount;
+
+#if FEATURE_VALNUM_CSE
+ INDEBUG(optEnsureClearCSEInfo());
+ optOptimizeValnumCSEs();
+ EndPhase(PHASE_OPTIMIZE_VALNUM_CSES);
+#endif // FEATURE_VALNUM_CSE
+}
+
+/*****************************************************************************
+ *
+ * Cleanup after CSE to allow us to run more than once.
+ */
+
+void Compiler::optCleanupCSEs()
+{
+ // We must clear the BBF_VISITED and BBF_MARKED flags
+ //
+ for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
+ {
+ unsigned blkFlags = block->bbFlags;
+
+ // And clear all the "visited" bits on the block
+ //
+ block->bbFlags &= ~(BBF_VISITED | BBF_MARKED);
+
+ /* Walk the statement trees in this basic block */
+
+ GenTreePtr stmt;
+
+ // Initialize 'stmt' to the first non-Phi statement
+ stmt = block->FirstNonPhiDef();
+
+ for (; stmt; stmt = stmt->gtNext)
+ {
+ noway_assert(stmt->gtOper == GT_STMT);
+
+ /* We must clear the gtCSEnum field */
+ for (GenTreePtr tree = stmt->gtStmt.gtStmtExpr; tree; tree = tree->gtPrev)
+ {
+ tree->gtCSEnum = NO_CSE;
+ }
+ }
+ }
+}
+
+#ifdef DEBUG
+
+/*****************************************************************************
+ *
+ * Ensure that all the CSE information in the IR is initialized the way we expect it,
+ * before running a CSE phase. This is basically an assert that optCleanupCSEs() is not needed.
+ */
+
+void Compiler::optEnsureClearCSEInfo()
+{
+ for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
+ {
+ assert((block->bbFlags & (BBF_VISITED | BBF_MARKED)) == 0);
+
+ /* Walk the statement trees in this basic block */
+
+ GenTreePtr stmt;
+
+ // Initialize 'stmt' to the first non-Phi statement
+ stmt = block->FirstNonPhiDef();
+
+ for (; stmt; stmt = stmt->gtNext)
+ {
+ assert(stmt->gtOper == GT_STMT);
+
+ for (GenTreePtr tree = stmt->gtStmt.gtStmtExpr; tree; tree = tree->gtPrev)
+ {
+ assert(tree->gtCSEnum == NO_CSE);
+ }
+ }
+ }
+}
+
+#endif // DEBUG
+
+/*****************************************************************************/
+#endif // FEATURE_ANYCSE
+/*****************************************************************************/
diff --git a/src/jit/optimizer.cpp b/src/jit/optimizer.cpp
new file mode 100644
index 0000000000..0fbdb27770
--- /dev/null
+++ b/src/jit/optimizer.cpp
@@ -0,0 +1,8540 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX Optimizer XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#pragma warning(disable : 4701)
+#endif
+
+/*****************************************************************************/
+
+#if COUNT_RANGECHECKS
+/* static */
+unsigned Compiler::optRangeChkRmv = 0;
+/* static */
+unsigned Compiler::optRangeChkAll = 0;
+#endif
+
+/*****************************************************************************/
+
+void Compiler::optInit()
+{
+ optLoopsMarked = false;
+ fgHasLoops = false;
+
+ /* Initialize the # of tracked loops to 0 */
+ optLoopCount = 0;
+ /* Keep track of the number of calls and indirect calls made by this method */
+ optCallCount = 0;
+ optIndirectCallCount = 0;
+ optNativeCallCount = 0;
+ optAssertionCount = 0;
+ optAssertionDep = nullptr;
+#if FEATURE_ANYCSE
+ optCSECandidateTotal = 0;
+ optCSEstart = UINT_MAX;
+ optCSEcount = 0;
+#endif // FEATURE_ANYCSE
+}
+
+DataFlow::DataFlow(Compiler* pCompiler) : m_pCompiler(pCompiler)
+{
+}
+
+/*****************************************************************************
+ *
+ */
+
+void Compiler::optSetBlockWeights()
+{
+ noway_assert(!opts.MinOpts() && !opts.compDbgCode);
+ assert(fgDomsComputed);
+
+#ifdef DEBUG
+ bool changed = false;
+#endif
+
+ bool firstBBdomsRets = true;
+
+ BasicBlock* block;
+
+ for (block = fgFirstBB; (block != nullptr); block = block->bbNext)
+ {
+ /* Blocks that can't be reached via the first block are rarely executed */
+ if (!fgReachable(fgFirstBB, block))
+ {
+ block->bbSetRunRarely();
+ }
+
+ if (block->bbWeight != BB_ZERO_WEIGHT)
+ {
+ // Calculate our bbWeight:
+ //
+ // o BB_UNITY_WEIGHT if we dominate all BBJ_RETURN blocks
+ // o otherwise BB_UNITY_WEIGHT / 2
+ //
+ bool domsRets = true; // Assume that we will dominate
+
+ for (BasicBlockList* retBlocks = fgReturnBlocks; retBlocks != nullptr; retBlocks = retBlocks->next)
+ {
+ if (!fgDominate(block, retBlocks->block))
+ {
+ domsRets = false;
+ break;
+ }
+ }
+
+ if (block == fgFirstBB)
+ {
+ firstBBdomsRets = domsRets;
+ }
+
+ // If we are not using profile weight then we lower the weight
+ // of blocks that do not dominate a return block
+ //
+ if (firstBBdomsRets && (fgIsUsingProfileWeights() == false) && (domsRets == false))
+ {
+#if DEBUG
+ changed = true;
+#endif
+ block->modifyBBWeight(block->bbWeight / 2);
+ noway_assert(block->bbWeight);
+ }
+ }
+ }
+
+#if DEBUG
+ if (changed && verbose)
+ {
+ printf("\nAfter optSetBlockWeights:\n");
+ fgDispBasicBlocks();
+ printf("\n");
+ }
+
+ /* Check that the flowgraph data (bbNum, bbRefs, bbPreds) is up-to-date */
+ fgDebugCheckBBlist();
+#endif
+}
+
+/*****************************************************************************
+ *
+ * Marks the blocks between 'begBlk' and 'endBlk' as part of a loop.
+ */
+
+void Compiler::optMarkLoopBlocks(BasicBlock* begBlk, BasicBlock* endBlk, bool excludeEndBlk)
+{
+ /* Calculate the 'loopWeight',
+ this is the amount to increase each block in the loop
+ Our heuristic is that loops are weighted eight times more
+ than straight line code.
+ Thus we increase each block by 7 times the weight of
+ the loop header block,
+ if the loops are all properly formed gives us:
+ (assuming that BB_LOOP_WEIGHT is 8)
+
+ 1 -- non loop basic block
+ 8 -- single loop nesting
+ 64 -- double loop nesting
+ 512 -- triple loop nesting
+
+ */
+
+ noway_assert(begBlk->bbNum <= endBlk->bbNum);
+ noway_assert(begBlk->isLoopHead());
+ noway_assert(fgReachable(begBlk, endBlk));
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nMarking loop L%02u", begBlk->bbLoopNum);
+ }
+#endif
+
+ noway_assert(!opts.MinOpts());
+
+ /* Build list of backedges for block begBlk */
+ flowList* backedgeList = nullptr;
+
+ for (flowList* pred = begBlk->bbPreds; pred != nullptr; pred = pred->flNext)
+ {
+ /* Is this a backedge? */
+ if (pred->flBlock->bbNum >= begBlk->bbNum)
+ {
+ flowList* flow = new (this, CMK_FlowList) flowList();
+
+#if MEASURE_BLOCK_SIZE
+ genFlowNodeCnt += 1;
+ genFlowNodeSize += sizeof(flowList);
+#endif // MEASURE_BLOCK_SIZE
+
+ flow->flNext = backedgeList;
+ flow->flBlock = pred->flBlock;
+ backedgeList = flow;
+ }
+ }
+
+ /* At least one backedge must have been found (the one from endBlk) */
+ noway_assert(backedgeList);
+
+ BasicBlock* curBlk = begBlk;
+
+ while (true)
+ {
+ noway_assert(curBlk);
+
+ // For curBlk to be part of a loop that starts at begBlk
+ // curBlk must be reachable from begBlk and (since this is a loop)
+ // likewise begBlk must be reachable from curBlk.
+ //
+
+ if (fgReachable(curBlk, begBlk) && fgReachable(begBlk, curBlk))
+ {
+ /* If this block reaches any of the backedge blocks we set reachable */
+ /* If this block dominates any of the backedge blocks we set dominates */
+ bool reachable = false;
+ bool dominates = false;
+
+ for (flowList* tmp = backedgeList; tmp != nullptr; tmp = tmp->flNext)
+ {
+ BasicBlock* backedge = tmp->flBlock;
+
+ if (!curBlk->isRunRarely())
+ {
+ reachable |= fgReachable(curBlk, backedge);
+ dominates |= fgDominate(curBlk, backedge);
+
+ if (dominates && reachable)
+ {
+ break;
+ }
+ }
+ }
+
+ if (reachable)
+ {
+ noway_assert(curBlk->bbWeight > BB_ZERO_WEIGHT);
+
+ unsigned weight;
+
+ if ((curBlk->bbFlags & BBF_PROF_WEIGHT) != 0)
+ {
+ // We have real profile weights, so we aren't going to change this blocks weight
+ weight = curBlk->bbWeight;
+ }
+ else
+ {
+ if (dominates)
+ {
+ weight = curBlk->bbWeight * BB_LOOP_WEIGHT;
+ }
+ else
+ {
+ weight = curBlk->bbWeight * (BB_LOOP_WEIGHT / 2);
+ }
+
+ //
+ // The multiplication may have caused us to overflow
+ //
+ if (weight < curBlk->bbWeight)
+ {
+ // The multiplication caused us to overflow
+ weight = BB_MAX_WEIGHT;
+ }
+ //
+ // Set the new weight
+ //
+ curBlk->modifyBBWeight(weight);
+ }
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\n BB%02u(wt=%s)", curBlk->bbNum, refCntWtd2str(curBlk->getBBWeight(this)));
+ }
+#endif
+ }
+ }
+
+ /* Stop if we've reached the last block in the loop */
+
+ if (curBlk == endBlk)
+ {
+ break;
+ }
+
+ curBlk = curBlk->bbNext;
+
+ /* If we are excluding the endBlk then stop if we've reached endBlk */
+
+ if (excludeEndBlk && (curBlk == endBlk))
+ {
+ break;
+ }
+ }
+}
+
+/*****************************************************************************
+ *
+ * Unmark the blocks between 'begBlk' and 'endBlk' as part of a loop.
+ */
+
+void Compiler::optUnmarkLoopBlocks(BasicBlock* begBlk, BasicBlock* endBlk)
+{
+ /* A set of blocks that were previously marked as a loop are now
+ to be unmarked, since we have decided that for some reason this
+ loop no longer exists.
+ Basically we are just reseting the blocks bbWeight to their
+ previous values.
+ */
+
+ noway_assert(begBlk->bbNum <= endBlk->bbNum);
+ noway_assert(begBlk->isLoopHead());
+
+ noway_assert(!opts.MinOpts());
+
+ BasicBlock* curBlk;
+ unsigned backEdgeCount = 0;
+
+ for (flowList* pred = begBlk->bbPreds; pred != nullptr; pred = pred->flNext)
+ {
+ curBlk = pred->flBlock;
+
+ /* is this a backward edge? (from curBlk to begBlk) */
+
+ if (begBlk->bbNum > curBlk->bbNum)
+ {
+ continue;
+ }
+
+ /* We only consider back-edges that are BBJ_COND or BBJ_ALWAYS for loops */
+
+ if ((curBlk->bbJumpKind != BBJ_COND) && (curBlk->bbJumpKind != BBJ_ALWAYS))
+ {
+ continue;
+ }
+
+ backEdgeCount++;
+ }
+
+ /* Only unmark the loop blocks if we have exactly one loop back edge */
+ if (backEdgeCount != 1)
+ {
+#ifdef DEBUG
+ if (verbose)
+ {
+ if (backEdgeCount > 0)
+ {
+ printf("\nNot removing loop L%02u, due to an additional back edge", begBlk->bbLoopNum);
+ }
+ else if (backEdgeCount == 0)
+ {
+ printf("\nNot removing loop L%02u, due to no back edge", begBlk->bbLoopNum);
+ }
+ }
+#endif
+ return;
+ }
+ noway_assert(backEdgeCount == 1);
+ noway_assert(fgReachable(begBlk, endBlk));
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nUnmarking loop L%02u", begBlk->bbLoopNum);
+ }
+#endif
+
+ curBlk = begBlk;
+ while (true)
+ {
+ noway_assert(curBlk);
+
+ // For curBlk to be part of a loop that starts at begBlk
+ // curBlk must be reachable from begBlk and (since this is a loop)
+ // likewise begBlk must be reachable from curBlk.
+ //
+ if (!curBlk->isRunRarely() && fgReachable(curBlk, begBlk) && fgReachable(begBlk, curBlk))
+ {
+ unsigned weight = curBlk->bbWeight;
+
+ // Don't unmark blocks that are set to BB_MAX_WEIGHT
+ // Don't unmark blocks when we are using profile weights
+ //
+ if (!curBlk->isMaxBBWeight() && ((curBlk->bbFlags & BBF_PROF_WEIGHT) == 0))
+ {
+ if (!fgDominate(curBlk, endBlk))
+ {
+ weight *= 2;
+ }
+ else
+ {
+ /* Merging of blocks can disturb the Dominates
+ information (see RAID #46649) */
+ if (weight < BB_LOOP_WEIGHT)
+ {
+ weight *= 2;
+ }
+ }
+
+ // We can overflow here so check for it
+ if (weight < curBlk->bbWeight)
+ {
+ weight = BB_MAX_WEIGHT;
+ }
+
+ assert(weight >= BB_LOOP_WEIGHT);
+
+ curBlk->modifyBBWeight(weight / BB_LOOP_WEIGHT);
+ }
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\n BB%02u(wt=%s)", curBlk->bbNum, refCntWtd2str(curBlk->getBBWeight(this)));
+ }
+#endif
+ }
+ /* Stop if we've reached the last block in the loop */
+
+ if (curBlk == endBlk)
+ {
+ break;
+ }
+
+ curBlk = curBlk->bbNext;
+
+ /* Stop if we go past the last block in the loop, as it may have been deleted */
+ if (curBlk->bbNum > endBlk->bbNum)
+ {
+ break;
+ }
+ }
+}
+
+/*****************************************************************************************************
+ *
+ * Function called to update the loop table and bbWeight before removing a block
+ */
+
+void Compiler::optUpdateLoopsBeforeRemoveBlock(BasicBlock* block, bool skipUnmarkLoop)
+{
+ if (!optLoopsMarked)
+ {
+ return;
+ }
+
+ noway_assert(!opts.MinOpts());
+
+ bool removeLoop = false;
+
+ /* If an unreachable block was part of a loop entry or bottom then the loop is unreachable */
+ /* Special case: the block was the head of a loop - or pointing to a loop entry */
+
+ for (unsigned loopNum = 0; loopNum < optLoopCount; loopNum++)
+ {
+ /* Some loops may have been already removed by
+ * loop unrolling or conditional folding */
+
+ if (optLoopTable[loopNum].lpFlags & LPFLG_REMOVED)
+ {
+ continue;
+ }
+
+ if (block == optLoopTable[loopNum].lpEntry || block == optLoopTable[loopNum].lpBottom)
+ {
+ optLoopTable[loopNum].lpFlags |= LPFLG_REMOVED;
+ continue;
+ }
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nUpdateLoopsBeforeRemoveBlock Before: ");
+ optPrintLoopInfo(loopNum);
+ }
+#endif
+
+ /* If the loop is still in the table
+ * any block in the loop must be reachable !!! */
+
+ noway_assert(optLoopTable[loopNum].lpEntry != block);
+ noway_assert(optLoopTable[loopNum].lpBottom != block);
+
+ if (optLoopTable[loopNum].lpExit == block)
+ {
+ optLoopTable[loopNum].lpExit = nullptr;
+ optLoopTable[loopNum].lpFlags &= ~LPFLG_ONE_EXIT;
+ ;
+ }
+
+ /* If this points to the actual entry in the loop
+ * then the whole loop may become unreachable */
+
+ switch (block->bbJumpKind)
+ {
+ unsigned jumpCnt;
+ BasicBlock** jumpTab;
+
+ case BBJ_NONE:
+ case BBJ_COND:
+ if (block->bbNext == optLoopTable[loopNum].lpEntry)
+ {
+ removeLoop = true;
+ break;
+ }
+ if (block->bbJumpKind == BBJ_NONE)
+ {
+ break;
+ }
+
+ __fallthrough;
+
+ case BBJ_ALWAYS:
+ noway_assert(block->bbJumpDest);
+ if (block->bbJumpDest == optLoopTable[loopNum].lpEntry)
+ {
+ removeLoop = true;
+ }
+ break;
+
+ case BBJ_SWITCH:
+ jumpCnt = block->bbJumpSwt->bbsCount;
+ jumpTab = block->bbJumpSwt->bbsDstTab;
+
+ do
+ {
+ noway_assert(*jumpTab);
+ if ((*jumpTab) == optLoopTable[loopNum].lpEntry)
+ {
+ removeLoop = true;
+ }
+ } while (++jumpTab, --jumpCnt);
+ break;
+
+ default:
+ break;
+ }
+
+ if (removeLoop)
+ {
+ /* Check if the entry has other predecessors outside the loop
+ * TODO: Replace this when predecessors are available */
+
+ BasicBlock* auxBlock;
+ for (auxBlock = fgFirstBB; auxBlock; auxBlock = auxBlock->bbNext)
+ {
+ /* Ignore blocks in the loop */
+
+ if (auxBlock->bbNum > optLoopTable[loopNum].lpHead->bbNum &&
+ auxBlock->bbNum <= optLoopTable[loopNum].lpBottom->bbNum)
+ {
+ continue;
+ }
+
+ switch (auxBlock->bbJumpKind)
+ {
+ unsigned jumpCnt;
+ BasicBlock** jumpTab;
+
+ case BBJ_NONE:
+ case BBJ_COND:
+ if (auxBlock->bbNext == optLoopTable[loopNum].lpEntry)
+ {
+ removeLoop = false;
+ break;
+ }
+ if (auxBlock->bbJumpKind == BBJ_NONE)
+ {
+ break;
+ }
+
+ __fallthrough;
+
+ case BBJ_ALWAYS:
+ noway_assert(auxBlock->bbJumpDest);
+ if (auxBlock->bbJumpDest == optLoopTable[loopNum].lpEntry)
+ {
+ removeLoop = false;
+ }
+ break;
+
+ case BBJ_SWITCH:
+ jumpCnt = auxBlock->bbJumpSwt->bbsCount;
+ jumpTab = auxBlock->bbJumpSwt->bbsDstTab;
+
+ do
+ {
+ noway_assert(*jumpTab);
+ if ((*jumpTab) == optLoopTable[loopNum].lpEntry)
+ {
+ removeLoop = false;
+ }
+ } while (++jumpTab, --jumpCnt);
+ break;
+
+ default:
+ break;
+ }
+ }
+
+ if (removeLoop)
+ {
+ optLoopTable[loopNum].lpFlags |= LPFLG_REMOVED;
+ }
+ }
+ else if (optLoopTable[loopNum].lpHead == block)
+ {
+ /* The loop has a new head - Just update the loop table */
+ optLoopTable[loopNum].lpHead = block->bbPrev;
+ }
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nUpdateLoopsBeforeRemoveBlock After: ");
+ optPrintLoopInfo(loopNum);
+ }
+#endif
+ }
+
+ if ((skipUnmarkLoop == false) && ((block->bbJumpKind == BBJ_ALWAYS) || (block->bbJumpKind == BBJ_COND)) &&
+ (block->bbJumpDest->isLoopHead()) && (block->bbJumpDest->bbNum <= block->bbNum) && fgDomsComputed &&
+ (fgCurBBEpochSize == fgDomBBcount + 1) && fgReachable(block->bbJumpDest, block))
+ {
+ optUnmarkLoopBlocks(block->bbJumpDest, block);
+ }
+}
+
+#ifdef DEBUG
+
+/*****************************************************************************
+ *
+ * Given the beginBlock of the loop, return the index of this loop
+ * to the loop table.
+ */
+
+unsigned Compiler::optFindLoopNumberFromBeginBlock(BasicBlock* begBlk)
+{
+ unsigned lnum = 0;
+
+ for (lnum = 0; lnum < optLoopCount; lnum++)
+ {
+ if (optLoopTable[lnum].lpHead->bbNext == begBlk)
+ {
+ // Found the loop.
+ return lnum;
+ }
+ }
+
+ noway_assert(!"Loop number not found.");
+
+ return optLoopCount;
+}
+
+/*****************************************************************************
+ *
+ * Print loop info in an uniform way.
+ */
+
+void Compiler::optPrintLoopInfo(unsigned loopInd,
+ BasicBlock* lpHead,
+ BasicBlock* lpFirst,
+ BasicBlock* lpTop,
+ BasicBlock* lpEntry,
+ BasicBlock* lpBottom,
+ unsigned char lpExitCnt,
+ BasicBlock* lpExit,
+ unsigned parentLoop)
+{
+ noway_assert(lpHead);
+
+ //
+ // NOTE: we take "loopInd" as an argument instead of using the one
+ // stored in begBlk->bbLoopNum because sometimes begBlk->bbLoopNum
+ // has not be set correctly. For example, in optRecordLoop().
+ // However, in most of the cases, loops should have been recorded.
+ // Therefore the correct way is to call the Compiler::optPrintLoopInfo(unsigned lnum)
+ // version of this method.
+ //
+ printf("L%02u, from BB%02u", loopInd, lpFirst->bbNum);
+ if (lpTop != lpFirst)
+ {
+ printf(" (loop top is BB%02u)", lpTop->bbNum);
+ }
+
+ printf(" to BB%02u (Head=BB%02u, Entry=BB%02u, ExitCnt=%d", lpBottom->bbNum, lpHead->bbNum, lpEntry->bbNum,
+ lpExitCnt);
+
+ if (lpExitCnt == 1)
+ {
+ printf(" at BB%02u", lpExit->bbNum);
+ }
+
+ if (parentLoop != BasicBlock::NOT_IN_LOOP)
+ {
+ printf(", parent loop = L%02u", parentLoop);
+ }
+ printf(")");
+}
+
+/*****************************************************************************
+ *
+ * Print loop information given the index of the loop in the loop table.
+ */
+
+void Compiler::optPrintLoopInfo(unsigned lnum)
+{
+ noway_assert(lnum < optLoopCount);
+
+ LoopDsc* ldsc = &optLoopTable[lnum]; // lnum is the INDEX to the loop table.
+
+ optPrintLoopInfo(lnum, ldsc->lpHead, ldsc->lpFirst, ldsc->lpTop, ldsc->lpEntry, ldsc->lpBottom, ldsc->lpExitCnt,
+ ldsc->lpExit, ldsc->lpParent);
+}
+
+#endif
+
+//------------------------------------------------------------------------
+// optPopulateInitInfo: Populate loop init info in the loop table.
+//
+// Arguments:
+// init - the tree that is supposed to initialize the loop iterator.
+// iterVar - loop iteration variable.
+//
+// Return Value:
+// "false" if the loop table could not be populated with the loop iterVar init info.
+//
+// Operation:
+// The 'init' tree is checked if its lhs is a local and rhs is either
+// a const or a local.
+//
+bool Compiler::optPopulateInitInfo(unsigned loopInd, GenTreePtr init, unsigned iterVar)
+{
+ // Operator should be =
+ if (init->gtOper != GT_ASG)
+ {
+ return false;
+ }
+
+ GenTreePtr lhs = init->gtOp.gtOp1;
+ GenTreePtr rhs = init->gtOp.gtOp2;
+ // LHS has to be local and should equal iterVar.
+ if (lhs->gtOper != GT_LCL_VAR || lhs->gtLclVarCommon.gtLclNum != iterVar)
+ {
+ return false;
+ }
+
+ // RHS can be constant or local var.
+ // TODO-CQ: CLONE: Add arr length for descending loops.
+ if (rhs->gtOper == GT_CNS_INT && rhs->TypeGet() == TYP_INT)
+ {
+ optLoopTable[loopInd].lpFlags |= LPFLG_CONST_INIT;
+ optLoopTable[loopInd].lpConstInit = (int)rhs->gtIntCon.gtIconVal;
+ }
+ else if (rhs->gtOper == GT_LCL_VAR)
+ {
+ optLoopTable[loopInd].lpFlags |= LPFLG_VAR_INIT;
+ optLoopTable[loopInd].lpVarInit = rhs->gtLclVarCommon.gtLclNum;
+ }
+ else
+ {
+ return false;
+ }
+ return true;
+}
+
+//----------------------------------------------------------------------------------
+// optCheckIterInLoopTest: Check if iter var is used in loop test.
+//
+// Arguments:
+// test "jtrue" tree or an asg of the loop iter termination condition
+// from/to blocks (beg, end) which are part of the loop.
+// iterVar loop iteration variable.
+// loopInd loop index.
+//
+// Operation:
+// The test tree is parsed to check if "iterVar" matches the lhs of the condition
+// and the rhs limit is extracted from the "test" tree. The limit information is
+// added to the loop table.
+//
+// Return Value:
+// "false" if the loop table could not be populated with the loop test info or
+// if the test condition doesn't involve iterVar.
+//
+bool Compiler::optCheckIterInLoopTest(
+ unsigned loopInd, GenTreePtr test, BasicBlock* from, BasicBlock* to, unsigned iterVar)
+{
+ // Obtain the relop from the "test" tree.
+ GenTreePtr relop;
+ if (test->gtOper == GT_JTRUE)
+ {
+ relop = test->gtGetOp1();
+ }
+ else
+ {
+ assert(test->gtOper == GT_ASG);
+ relop = test->gtGetOp2();
+ }
+
+ noway_assert(relop->OperKind() & GTK_RELOP);
+
+ GenTreePtr opr1 = relop->gtOp.gtOp1;
+ GenTreePtr opr2 = relop->gtOp.gtOp2;
+
+ GenTreePtr iterOp;
+ GenTreePtr limitOp;
+
+ // Make sure op1 or op2 is the iterVar.
+ if (opr1->gtOper == GT_LCL_VAR && opr1->gtLclVarCommon.gtLclNum == iterVar)
+ {
+ iterOp = opr1;
+ limitOp = opr2;
+ }
+ else if (opr2->gtOper == GT_LCL_VAR && opr2->gtLclVarCommon.gtLclNum == iterVar)
+ {
+ iterOp = opr2;
+ limitOp = opr1;
+ }
+ else
+ {
+ return false;
+ }
+
+ if (iterOp->gtType != TYP_INT)
+ {
+ return false;
+ }
+
+ // Mark the iterator node.
+ iterOp->gtFlags |= GTF_VAR_ITERATOR;
+
+ // Check what type of limit we have - constant, variable or arr-len.
+ if (limitOp->gtOper == GT_CNS_INT)
+ {
+ optLoopTable[loopInd].lpFlags |= LPFLG_CONST_LIMIT;
+ }
+ else if (limitOp->gtOper == GT_LCL_VAR && !optIsVarAssigned(from, to, nullptr, limitOp->gtLclVarCommon.gtLclNum))
+ {
+ optLoopTable[loopInd].lpFlags |= LPFLG_VAR_LIMIT;
+ }
+ else if (limitOp->gtOper == GT_ARR_LENGTH)
+ {
+ optLoopTable[loopInd].lpFlags |= LPFLG_ARRLEN_LIMIT;
+ }
+ else
+ {
+ return false;
+ }
+ // Save the type of the comparison between the iterator and the limit.
+ optLoopTable[loopInd].lpTestTree = relop;
+ return true;
+}
+
+//----------------------------------------------------------------------------------
+// optIsLoopIncrTree: Check if loop is a tree of form v += 1 or v = v + 1
+//
+// Arguments:
+// incr The incr tree to be checked. Whether incr tree is
+// oper-equal(+=, -=...) type nodes or v=v+1 type ASG nodes.
+//
+// Operation:
+// The test tree is parsed to check if "iterVar" matches the lhs of the condition
+// and the rhs limit is extracted from the "test" tree. The limit information is
+// added to the loop table.
+//
+// Return Value:
+// iterVar local num if the iterVar is found, otherwise BAD_VAR_NUM.
+//
+unsigned Compiler::optIsLoopIncrTree(GenTreePtr incr)
+{
+ GenTree* incrVal;
+ genTreeOps updateOper;
+ unsigned iterVar = incr->IsLclVarUpdateTree(&incrVal, &updateOper);
+ if (iterVar != BAD_VAR_NUM)
+ {
+ // We have v = v op y type asg node.
+ switch (updateOper)
+ {
+ case GT_ADD:
+ case GT_SUB:
+ case GT_MUL:
+ case GT_RSH:
+ case GT_LSH:
+ break;
+ default:
+ return BAD_VAR_NUM;
+ }
+
+ // Increment should be by a const int.
+ // TODO-CQ: CLONE: allow variable increments.
+ if ((incrVal->gtOper != GT_CNS_INT) || (incrVal->TypeGet() != TYP_INT))
+ {
+ return BAD_VAR_NUM;
+ }
+ }
+
+ return iterVar;
+}
+
+//----------------------------------------------------------------------------------
+// optComputeIterInfo: Check tree is loop increment of a lcl that is loop-invariant.
+//
+// Arguments:
+// from, to - are blocks (beg, end) which are part of the loop.
+// incr - tree that increments the loop iterator. v+=1 or v=v+1.
+// pIterVar - see return value.
+//
+// Return Value:
+// Returns true if iterVar "v" can be returned in "pIterVar", otherwise returns
+// false.
+//
+// Operation:
+// Check if the "incr" tree is a "v=v+1 or v+=1" type tree and make sure it is not
+// assigned in the loop.
+//
+bool Compiler::optComputeIterInfo(GenTreePtr incr, BasicBlock* from, BasicBlock* to, unsigned* pIterVar)
+{
+
+ unsigned iterVar = optIsLoopIncrTree(incr);
+ if (iterVar == BAD_VAR_NUM)
+ {
+ return false;
+ }
+ if (optIsVarAssigned(from, to, incr, iterVar))
+ {
+ JITDUMP("iterVar is assigned in loop\n");
+ return false;
+ }
+
+ *pIterVar = iterVar;
+ return true;
+}
+
+//----------------------------------------------------------------------------------
+// optIsLoopTestEvalIntoTemp:
+// Pattern match if the test tree is computed into a tmp
+// and the "tmp" is used as jump condition for loop termination.
+//
+// Arguments:
+// testStmt - is the JTRUE statement that is of the form: jmpTrue (Vtmp != 0)
+// where Vtmp contains the actual loop test result.
+// newStmt - contains the statement that is the actual test stmt involving
+// the loop iterator.
+//
+// Return Value:
+// Returns true if a new test tree can be obtained.
+//
+// Operation:
+// Scan if the current stmt is a jtrue with (Vtmp != 0) as condition
+// Then returns the rhs for def of Vtmp as the "test" node.
+//
+// Note:
+// This method just retrieves what it thinks is the "test" node,
+// the callers are expected to verify that "iterVar" is used in the test.
+//
+bool Compiler::optIsLoopTestEvalIntoTemp(GenTreePtr testStmt, GenTreePtr* newTest)
+{
+ GenTreePtr test = testStmt->gtStmt.gtStmtExpr;
+
+ if (test->gtOper != GT_JTRUE)
+ {
+ return false;
+ }
+
+ GenTreePtr relop = test->gtGetOp1();
+ noway_assert(relop->OperIsCompare());
+
+ GenTreePtr opr1 = relop->gtOp.gtOp1;
+ GenTreePtr opr2 = relop->gtOp.gtOp2;
+
+ // Make sure we have jtrue (vtmp != 0)
+ if ((relop->OperGet() == GT_NE) && (opr1->OperGet() == GT_LCL_VAR) && (opr2->OperGet() == GT_CNS_INT) &&
+ opr2->IsIntegralConst(0))
+ {
+ // Get the previous statement to get the def (rhs) of Vtmp to see
+ // if the "test" is evaluated into Vtmp.
+ GenTreePtr prevStmt = testStmt->gtPrev;
+ if (prevStmt == nullptr)
+ {
+ return false;
+ }
+
+ GenTreePtr tree = prevStmt->gtStmt.gtStmtExpr;
+ if (tree->OperGet() == GT_ASG)
+ {
+ GenTreePtr lhs = tree->gtOp.gtOp1;
+ GenTreePtr rhs = tree->gtOp.gtOp2;
+
+ // Return as the new test node.
+ if (lhs->gtOper == GT_LCL_VAR && lhs->AsLclVarCommon()->GetLclNum() == opr1->AsLclVarCommon()->GetLclNum())
+ {
+ if (rhs->OperIsCompare())
+ {
+ *newTest = prevStmt;
+ return true;
+ }
+ }
+ }
+ }
+ return false;
+}
+
+//----------------------------------------------------------------------------------
+// optExtractInitTestIncr:
+// Extract the "init", "test" and "incr" nodes of the loop.
+//
+// Arguments:
+// head - Loop head block
+// bottom - Loop bottom block
+// top - Loop top block
+// ppInit - The init stmt of the loop if found.
+// ppTest - The test stmt of the loop if found.
+// ppIncr - The incr stmt of the loop if found.
+//
+// Return Value:
+// The results are put in "ppInit", "ppTest" and "ppIncr" if the method
+// returns true. Returns false if the information can't be extracted.
+//
+// Operation:
+// Check if the "test" stmt is last stmt in the loop "bottom". If found good,
+// "test" stmt is found. Try to find the "incr" stmt. Check previous stmt of
+// "test" to get the "incr" stmt. If it is not found it could be a loop of the
+// below form.
+//
+// +-------<-----------------<-----------+
+// | |
+// v |
+// BBinit(head) -> BBcond(top) -> BBLoopBody(bottom) ---^
+//
+// Check if the "incr" tree is present in the loop "top" node as the last stmt.
+// Also check if the "test" tree is assigned to a tmp node and the tmp is used
+// in the jtrue condition.
+//
+// Note:
+// This method just retrieves what it thinks is the "test" node,
+// the callers are expected to verify that "iterVar" is used in the test.
+//
+bool Compiler::optExtractInitTestIncr(
+ BasicBlock* head, BasicBlock* bottom, BasicBlock* top, GenTreePtr* ppInit, GenTreePtr* ppTest, GenTreePtr* ppIncr)
+{
+ assert(ppInit != nullptr);
+ assert(ppTest != nullptr);
+ assert(ppIncr != nullptr);
+
+ // Check if last two statements in the loop body are the increment of the iterator
+ // and the loop termination test.
+ noway_assert(bottom->bbTreeList != nullptr);
+ GenTreePtr test = bottom->bbTreeList->gtPrev;
+ noway_assert(test != nullptr && test->gtNext == nullptr);
+
+ GenTreePtr newTest;
+ if (optIsLoopTestEvalIntoTemp(test, &newTest))
+ {
+ test = newTest;
+ }
+
+ // Check if we have the incr tree before the test tree, if we don't,
+ // check if incr is part of the loop "top".
+ GenTreePtr incr = test->gtPrev;
+ if (incr == nullptr || optIsLoopIncrTree(incr->gtStmt.gtStmtExpr) == BAD_VAR_NUM)
+ {
+ if (top == nullptr || top->bbTreeList == nullptr || top->bbTreeList->gtPrev == nullptr)
+ {
+ return false;
+ }
+
+ // If the prev stmt to loop test is not incr, then check if we have loop test evaluated into a tmp.
+ GenTreePtr topLast = top->bbTreeList->gtPrev;
+ if (optIsLoopIncrTree(topLast->gtStmt.gtStmtExpr) != BAD_VAR_NUM)
+ {
+ incr = topLast;
+ }
+ else
+ {
+ return false;
+ }
+ }
+
+ assert(test != incr);
+
+ // Find the last statement in the loop pre-header which we expect to be the initialization of
+ // the loop iterator.
+ GenTreePtr phdr = head->bbTreeList;
+ if (phdr == nullptr)
+ {
+ return false;
+ }
+
+ GenTreePtr init = phdr->gtPrev;
+ noway_assert(init != nullptr && (init->gtNext == nullptr));
+
+ // If it is a duplicated loop condition, skip it.
+ if (init->gtFlags & GTF_STMT_CMPADD)
+ {
+ // Must be a duplicated loop condition.
+ noway_assert(init->gtStmt.gtStmtExpr->gtOper == GT_JTRUE);
+ init = init->gtPrev;
+ noway_assert(init != nullptr);
+ }
+
+ noway_assert(init->gtOper == GT_STMT);
+ noway_assert(test->gtOper == GT_STMT);
+ noway_assert(incr->gtOper == GT_STMT);
+
+ *ppInit = init->gtStmt.gtStmtExpr;
+ *ppTest = test->gtStmt.gtStmtExpr;
+ *ppIncr = incr->gtStmt.gtStmtExpr;
+
+ return true;
+}
+
+/*****************************************************************************
+ *
+ * Record the loop in the loop table.
+ */
+
+void Compiler::optRecordLoop(BasicBlock* head,
+ BasicBlock* first,
+ BasicBlock* top,
+ BasicBlock* entry,
+ BasicBlock* bottom,
+ BasicBlock* exit,
+ unsigned char exitCnt)
+{
+ // Record this loop in the table, if there's room.
+
+ assert(optLoopCount <= MAX_LOOP_NUM);
+ if (optLoopCount == MAX_LOOP_NUM)
+ {
+#if COUNT_LOOPS
+ loopOverflowThisMethod = true;
+#endif
+ return;
+ }
+
+ // Assumed preconditions on the loop we're adding.
+ assert(first->bbNum <= top->bbNum);
+ assert(top->bbNum <= entry->bbNum);
+ assert(entry->bbNum <= bottom->bbNum);
+ assert(head->bbNum < top->bbNum || head->bbNum > bottom->bbNum);
+
+ // If the new loop contains any existing ones, add it in the right place.
+ unsigned char loopInd = optLoopCount;
+ for (unsigned char prevPlus1 = optLoopCount; prevPlus1 > 0; prevPlus1--)
+ {
+ unsigned char prev = prevPlus1 - 1;
+ if (optLoopTable[prev].lpContainedBy(first, bottom))
+ {
+ loopInd = prev;
+ }
+ }
+ // Move up any loops if necessary.
+ for (unsigned j = optLoopCount; j > loopInd; j--)
+ {
+ optLoopTable[j] = optLoopTable[j - 1];
+ }
+
+#ifdef DEBUG
+ for (unsigned i = loopInd + 1; i < optLoopCount; i++)
+ {
+ // The loop is well-formed.
+ assert(optLoopTable[i].lpWellFormed());
+ // Check for disjoint.
+ if (optLoopTable[i].lpDisjoint(first, bottom))
+ {
+ continue;
+ }
+ // Otherwise, assert complete containment (of optLoopTable[i] in new loop).
+ assert(optLoopTable[i].lpContainedBy(first, bottom));
+ }
+#endif // DEBUG
+
+ optLoopTable[loopInd].lpHead = head;
+ optLoopTable[loopInd].lpFirst = first;
+ optLoopTable[loopInd].lpTop = top;
+ optLoopTable[loopInd].lpBottom = bottom;
+ optLoopTable[loopInd].lpEntry = entry;
+ optLoopTable[loopInd].lpExit = exit;
+ optLoopTable[loopInd].lpExitCnt = exitCnt;
+
+ optLoopTable[loopInd].lpParent = BasicBlock::NOT_IN_LOOP;
+ optLoopTable[loopInd].lpChild = BasicBlock::NOT_IN_LOOP;
+ optLoopTable[loopInd].lpSibling = BasicBlock::NOT_IN_LOOP;
+
+ optLoopTable[loopInd].lpFlags = 0;
+
+ // We haven't yet recorded any side effects.
+ optLoopTable[loopInd].lpLoopHasHeapHavoc = false;
+ optLoopTable[loopInd].lpFieldsModified = nullptr;
+ optLoopTable[loopInd].lpArrayElemTypesModified = nullptr;
+
+ // If DO-WHILE loop mark it as such.
+ if (head->bbNext == entry)
+ {
+ optLoopTable[loopInd].lpFlags |= LPFLG_DO_WHILE;
+ }
+
+ // If single exit loop mark it as such.
+ if (exitCnt == 1)
+ {
+ noway_assert(exit);
+ optLoopTable[loopInd].lpFlags |= LPFLG_ONE_EXIT;
+ }
+
+ //
+ // Try to find loops that have an iterator (i.e. for-like loops) "for (init; test; incr){ ... }"
+ // We have the following restrictions:
+ // 1. The loop condition must be a simple one i.e. only one JTRUE node
+ // 2. There must be a loop iterator (a local var) that is
+ // incremented (decremented or lsh, rsh, mul) with a constant value
+ // 3. The iterator is incremented exactly once
+ // 4. The loop condition must use the iterator.
+ //
+ if (bottom->bbJumpKind == BBJ_COND)
+ {
+ GenTreePtr init;
+ GenTreePtr test;
+ GenTreePtr incr;
+ if (!optExtractInitTestIncr(head, bottom, top, &init, &test, &incr))
+ {
+ goto DONE_LOOP;
+ }
+
+ unsigned iterVar = BAD_VAR_NUM;
+ if (!optComputeIterInfo(incr, head->bbNext, bottom, &iterVar))
+ {
+ goto DONE_LOOP;
+ }
+
+ // Make sure the "iterVar" initialization is never skipped,
+ // i.e. HEAD dominates the ENTRY.
+ if (!fgDominate(head, entry))
+ {
+ goto DONE_LOOP;
+ }
+
+ if (!optPopulateInitInfo(loopInd, init, iterVar))
+ {
+ goto DONE_LOOP;
+ }
+
+ // Check that the iterator is used in the loop condition.
+ if (!optCheckIterInLoopTest(loopInd, test, head->bbNext, bottom, iterVar))
+ {
+ goto DONE_LOOP;
+ }
+
+ // We know the loop has an iterator at this point ->flag it as LPFLG_ITER
+ // Record the iterator, the pointer to the test node
+ // and the initial value of the iterator (constant or local var)
+ optLoopTable[loopInd].lpFlags |= LPFLG_ITER;
+
+ // Record iterator.
+ optLoopTable[loopInd].lpIterTree = incr;
+
+#if COUNT_LOOPS
+ // Save the initial value of the iterator - can be lclVar or constant
+ // Flag the loop accordingly.
+
+ iterLoopCount++;
+#endif
+
+#if COUNT_LOOPS
+ simpleTestLoopCount++;
+#endif
+
+ // Check if a constant iteration loop.
+ if ((optLoopTable[loopInd].lpFlags & LPFLG_CONST_INIT) && (optLoopTable[loopInd].lpFlags & LPFLG_CONST_LIMIT))
+ {
+ // This is a constant loop.
+ optLoopTable[loopInd].lpFlags |= LPFLG_CONST;
+#if COUNT_LOOPS
+ constIterLoopCount++;
+#endif
+ }
+
+#ifdef DEBUG
+ if (verbose && 0)
+ {
+ printf("\nConstant loop initializer:\n");
+ gtDispTree(init);
+
+ printf("\nConstant loop body:\n");
+
+ BasicBlock* block = head;
+ do
+ {
+ block = block->bbNext;
+ for (GenTreeStmt* stmt = block->firstStmt(); stmt; stmt = stmt->gtNextStmt)
+ {
+ if (stmt->gtStmt.gtStmtExpr == incr)
+ {
+ break;
+ }
+ printf("\n");
+ gtDispTree(stmt->gtStmt.gtStmtExpr);
+ }
+ } while (block != bottom);
+ }
+#endif // DEBUG
+ }
+
+DONE_LOOP:
+ DBEXEC(verbose, optPrintLoopRecording(loopInd));
+ optLoopCount++;
+}
+
+#ifdef DEBUG
+//------------------------------------------------------------------------
+// optPrintLoopRecording: Print a recording of the loop.
+//
+// Arguments:
+// loopInd - loop index.
+//
+void Compiler::optPrintLoopRecording(unsigned loopInd)
+{
+ printf("Recorded loop %s", (loopInd != optLoopCount ? "(extended) " : ""));
+ optPrintLoopInfo(optLoopCount, // Not necessarily the loop index, but the number of loops that have been added.
+ optLoopTable[loopInd].lpHead, optLoopTable[loopInd].lpFirst, optLoopTable[loopInd].lpTop,
+ optLoopTable[loopInd].lpEntry, optLoopTable[loopInd].lpBottom, optLoopTable[loopInd].lpExitCnt,
+ optLoopTable[loopInd].lpExit);
+
+ // If an iterator loop print the iterator and the initialization.
+ if (optLoopTable[loopInd].lpFlags & LPFLG_ITER)
+ {
+ printf(" [over V%02u", optLoopTable[loopInd].lpIterVar());
+ printf(" (");
+ printf(GenTree::NodeName(optLoopTable[loopInd].lpIterOper()));
+ printf(" ");
+ printf("%d )", optLoopTable[loopInd].lpIterConst());
+
+ if (optLoopTable[loopInd].lpFlags & LPFLG_CONST_INIT)
+ {
+ printf(" from %d", optLoopTable[loopInd].lpConstInit);
+ }
+ if (optLoopTable[loopInd].lpFlags & LPFLG_VAR_INIT)
+ {
+ printf(" from V%02u", optLoopTable[loopInd].lpVarInit);
+ }
+
+ // If a simple test condition print operator and the limits */
+ printf(GenTree::NodeName(optLoopTable[loopInd].lpTestOper()));
+
+ if (optLoopTable[loopInd].lpFlags & LPFLG_CONST_LIMIT)
+ {
+ printf("%d ", optLoopTable[loopInd].lpConstLimit());
+ }
+
+ if (optLoopTable[loopInd].lpFlags & LPFLG_VAR_LIMIT)
+ {
+ printf("V%02u ", optLoopTable[loopInd].lpVarLimit());
+ }
+
+ printf("]");
+ }
+
+ printf("\n");
+}
+
+void Compiler::optCheckPreds()
+{
+ BasicBlock* block;
+ BasicBlock* blockPred;
+ flowList* pred;
+
+ for (block = fgFirstBB; block; block = block->bbNext)
+ {
+ for (pred = block->bbPreds; pred; pred = pred->flNext)
+ {
+ // make sure this pred is part of the BB list
+ for (blockPred = fgFirstBB; blockPred; blockPred = blockPred->bbNext)
+ {
+ if (blockPred == pred->flBlock)
+ {
+ break;
+ }
+ }
+ noway_assert(blockPred);
+ switch (blockPred->bbJumpKind)
+ {
+ case BBJ_COND:
+ if (blockPred->bbJumpDest == block)
+ {
+ break;
+ }
+ __fallthrough;
+ case BBJ_NONE:
+ noway_assert(blockPred->bbNext == block);
+ break;
+ case BBJ_EHFILTERRET:
+ case BBJ_ALWAYS:
+ case BBJ_EHCATCHRET:
+ noway_assert(blockPred->bbJumpDest == block);
+ break;
+ default:
+ break;
+ }
+ }
+ }
+}
+
+#endif // DEBUG
+
+/*****************************************************************************
+ * Find the natural loops, using dominators. Note that the test for
+ * a loop is slightly different from the standard one, because we have
+ * not done a depth first reordering of the basic blocks.
+ */
+
+void Compiler::optFindNaturalLoops()
+{
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("*************** In optFindNaturalLoops()\n");
+ }
+#endif // DEBUG
+
+ flowList* pred;
+ flowList* predTop;
+ flowList* predEntry;
+
+ noway_assert(fgDomsComputed);
+ assert(fgHasLoops);
+
+#if COUNT_LOOPS
+ hasMethodLoops = false;
+ loopsThisMethod = 0;
+ loopOverflowThisMethod = false;
+#endif
+
+ /* We will use the following terminology:
+ * HEAD - the basic block that flows into the loop ENTRY block (Currently MUST be lexically before entry).
+ Not part of the looping of the loop.
+ * FIRST - the lexically first basic block (in bbNext order) within this loop. (May be part of a nested loop,
+ * but not the outer loop. ???)
+ * TOP - the target of the backward edge from BOTTOM. In most cases FIRST and TOP are the same.
+ * BOTTOM - the lexically last block in the loop (i.e. the block from which we jump to the top)
+ * EXIT - the loop exit or the block right after the bottom
+ * ENTRY - the entry in the loop (not necessarly the TOP), but there must be only one entry
+ *
+ * We (currently) require the body of a loop to be a contiguous (in bbNext order) sequence of basic blocks.
+
+ |
+ v
+ head
+ |
+ | top/beg <--+
+ | | |
+ | ... |
+ | | |
+ | v |
+ +---> entry |
+ | |
+ ... |
+ | |
+ v |
+ +-- exit/tail |
+ | | |
+ | ... |
+ | | |
+ | v |
+ | bottom ---+
+ |
+ +------+
+ |
+ v
+
+ */
+
+ BasicBlock* head;
+ BasicBlock* top;
+ BasicBlock* bottom;
+ BasicBlock* entry;
+ BasicBlock* exit;
+ unsigned char exitCount;
+
+ for (head = fgFirstBB; head->bbNext; head = head->bbNext)
+ {
+ top = head->bbNext;
+ exit = nullptr;
+ exitCount = 0;
+
+ // Blocks that are rarely run have a zero bbWeight and should
+ // never be optimized here
+
+ if (top->bbWeight == BB_ZERO_WEIGHT)
+ {
+ continue;
+ }
+
+ for (pred = top->bbPreds; pred; pred = pred->flNext)
+ {
+ /* Is this a loop candidate? - We look for "back edges", i.e. an edge from BOTTOM
+ * to TOP (note that this is an abuse of notation since this is not necessarily a back edge
+ * as the definition says, but merely an indication that we have a loop there).
+ * Thus, we have to be very careful and after entry discovery check that it is indeed
+ * the only place we enter the loop (especially for non-reducible flow graphs).
+ */
+
+ bottom = pred->flBlock;
+ exitCount = 0;
+
+ if (top->bbNum <= bottom->bbNum) // is this a backward edge? (from BOTTOM to TOP)
+ {
+ if ((bottom->bbJumpKind == BBJ_EHFINALLYRET) || (bottom->bbJumpKind == BBJ_EHFILTERRET) ||
+ (bottom->bbJumpKind == BBJ_EHCATCHRET) || (bottom->bbJumpKind == BBJ_CALLFINALLY) ||
+ (bottom->bbJumpKind == BBJ_SWITCH))
+ {
+ /* BBJ_EHFINALLYRET, BBJ_EHFILTERRET, BBJ_EHCATCHRET, and BBJ_CALLFINALLY can never form a loop.
+ * BBJ_SWITCH that has a backward jump appears only for labeled break. */
+ goto NO_LOOP;
+ }
+
+ BasicBlock* loopBlock;
+
+ /* The presence of a "back edge" is an indication that a loop might be present here
+ *
+ * LOOP:
+ * 1. A collection of STRONGLY CONNECTED nodes i.e. there is a path from any
+ * node in the loop to any other node in the loop (wholly within the loop)
+ * 2. The loop has a unique ENTRY, i.e. there is only one way to reach a node
+ * in the loop from outside the loop, and that is through the ENTRY
+ */
+
+ /* Let's find the loop ENTRY */
+
+ if (head->bbJumpKind == BBJ_ALWAYS)
+ {
+ if (head->bbJumpDest->bbNum <= bottom->bbNum && head->bbJumpDest->bbNum >= top->bbNum)
+ {
+ /* OK - we enter somewhere within the loop */
+ entry = head->bbJumpDest;
+
+ /* some useful asserts
+ * Cannot enter at the top - should have being caught by redundant jumps */
+
+ assert((entry != top) || (head->bbFlags & BBF_KEEP_BBJ_ALWAYS));
+ }
+ else
+ {
+ /* special case - don't consider now */
+ // assert (!"Loop entered in weird way!");
+ goto NO_LOOP;
+ }
+ }
+ // Can we fall through into the loop?
+ else if (head->bbJumpKind == BBJ_NONE || head->bbJumpKind == BBJ_COND)
+ {
+ /* The ENTRY is at the TOP (a do-while loop) */
+ entry = top;
+ }
+ else
+ {
+ goto NO_LOOP; // head does not flow into the loop bail for now
+ }
+
+ // Now we find the "first" block -- the earliest block reachable within the loop.
+ // This is usually the same as "top", but can differ in rare cases where "top" is
+ // the entry block of a nested loop, and that nested loop branches backwards to a
+ // a block before "top". We find this by searching for such backwards branches
+ // in the loop known so far.
+ BasicBlock* first = top;
+ BasicBlock* newFirst;
+ bool blocksToSearch = true;
+ BasicBlock* validatedAfter = bottom->bbNext;
+ while (blocksToSearch)
+ {
+ blocksToSearch = false;
+ newFirst = nullptr;
+ blocksToSearch = false;
+ for (loopBlock = first; loopBlock != validatedAfter; loopBlock = loopBlock->bbNext)
+ {
+ unsigned nSucc = loopBlock->NumSucc();
+ for (unsigned j = 0; j < nSucc; j++)
+ {
+ BasicBlock* succ = loopBlock->GetSucc(j);
+ if ((newFirst == nullptr && succ->bbNum < first->bbNum) ||
+ (newFirst != nullptr && succ->bbNum < newFirst->bbNum))
+ {
+ newFirst = succ;
+ }
+ }
+ }
+ if (newFirst != nullptr)
+ {
+ validatedAfter = first;
+ first = newFirst;
+ blocksToSearch = true;
+ }
+ }
+
+ // Is "head" still before "first"? If not, we don't have a valid loop...
+ if (head->bbNum >= first->bbNum)
+ {
+ JITDUMP(
+ "Extending loop [BB%02u..BB%02u] 'first' to BB%02u captures head BB%02u. Rejecting loop.\n",
+ top->bbNum, bottom->bbNum, first->bbNum, head->bbNum);
+ goto NO_LOOP;
+ }
+
+ /* Make sure ENTRY dominates all blocks in the loop
+ * This is necessary to ensure condition 2. above
+ * At the same time check if the loop has a single exit
+ * point - those loops are easier to optimize */
+
+ for (loopBlock = top; loopBlock != bottom->bbNext; loopBlock = loopBlock->bbNext)
+ {
+ if (!fgDominate(entry, loopBlock))
+ {
+ goto NO_LOOP;
+ }
+
+ if (loopBlock == bottom)
+ {
+ if (bottom->bbJumpKind != BBJ_ALWAYS)
+ {
+ /* there is an exit at the bottom */
+
+ noway_assert(bottom->bbJumpDest == top);
+ exit = bottom;
+ exitCount++;
+ continue;
+ }
+ }
+
+ BasicBlock* exitPoint;
+
+ switch (loopBlock->bbJumpKind)
+ {
+ case BBJ_COND:
+ case BBJ_CALLFINALLY:
+ case BBJ_ALWAYS:
+ case BBJ_EHCATCHRET:
+ assert(loopBlock->bbJumpDest);
+ exitPoint = loopBlock->bbJumpDest;
+
+ if (exitPoint->bbNum < top->bbNum || exitPoint->bbNum > bottom->bbNum)
+ {
+ /* exit from a block other than BOTTOM */
+ exit = loopBlock;
+ exitCount++;
+ }
+ break;
+
+ case BBJ_NONE:
+ break;
+
+ case BBJ_EHFINALLYRET:
+ case BBJ_EHFILTERRET:
+ /* The "try" associated with this "finally" must be in the
+ * same loop, so the finally block will return control inside the loop */
+ break;
+
+ case BBJ_THROW:
+ case BBJ_RETURN:
+ /* those are exits from the loop */
+ exit = loopBlock;
+ exitCount++;
+ break;
+
+ case BBJ_SWITCH:
+
+ unsigned jumpCnt;
+ jumpCnt = loopBlock->bbJumpSwt->bbsCount;
+ BasicBlock** jumpTab;
+ jumpTab = loopBlock->bbJumpSwt->bbsDstTab;
+
+ do
+ {
+ noway_assert(*jumpTab);
+ exitPoint = *jumpTab;
+
+ if (exitPoint->bbNum < top->bbNum || exitPoint->bbNum > bottom->bbNum)
+ {
+ exit = loopBlock;
+ exitCount++;
+ }
+ } while (++jumpTab, --jumpCnt);
+ break;
+
+ default:
+ noway_assert(!"Unexpected bbJumpKind");
+ break;
+ }
+ }
+
+ /* Make sure we can iterate the loop (i.e. there is a way back to ENTRY)
+ * This is to ensure condition 1. above which prevents marking fake loops
+ *
+ * Below is an example:
+ * for (....)
+ * {
+ * ...
+ * computations
+ * ...
+ * break;
+ * }
+ * The example above is not a loop since we bail after the first iteration
+ *
+ * The condition we have to check for is
+ * 1. ENTRY must have at least one predecessor inside the loop. Since we know that that block is
+ * reachable, it can only be reached through ENTRY, therefore we have a way back to ENTRY
+ *
+ * 2. If we have a GOTO (BBJ_ALWAYS) outside of the loop and that block dominates the
+ * loop bottom then we cannot iterate
+ *
+ * NOTE that this doesn't entirely satisfy condition 1. since "break" statements are not
+ * part of the loop nodes (as per definition they are loop exits executed only once),
+ * but we have no choice but to include them because we consider all blocks within TOP-BOTTOM */
+
+ for (loopBlock = top; loopBlock != bottom; loopBlock = loopBlock->bbNext)
+ {
+ switch (loopBlock->bbJumpKind)
+ {
+ case BBJ_ALWAYS:
+ case BBJ_THROW:
+ case BBJ_RETURN:
+ if (fgDominate(loopBlock, bottom))
+ {
+ goto NO_LOOP;
+ }
+ default:
+ break;
+ }
+ }
+
+ bool canIterateLoop = false;
+
+ for (predEntry = entry->bbPreds; predEntry; predEntry = predEntry->flNext)
+ {
+ if (predEntry->flBlock->bbNum >= top->bbNum && predEntry->flBlock->bbNum <= bottom->bbNum)
+ {
+ canIterateLoop = true;
+ break;
+ }
+ else if (predEntry->flBlock != head)
+ {
+ // The entry block has multiple predecessors outside the loop; the 'head'
+ // block isn't the only one. We only support a single 'head', so bail.
+ goto NO_LOOP;
+ }
+ }
+
+ if (!canIterateLoop)
+ {
+ goto NO_LOOP;
+ }
+
+ /* Double check - make sure that all loop blocks except ENTRY
+ * have no predecessors outside the loop - this ensures only one loop entry and prevents
+ * us from considering non-loops due to incorrectly assuming that we had a back edge
+ *
+ * OBSERVATION:
+ * Loops of the form "while (a || b)" will be treated as 2 nested loops (with the same header)
+ */
+
+ for (loopBlock = top; loopBlock != bottom->bbNext; loopBlock = loopBlock->bbNext)
+ {
+ if (loopBlock == entry)
+ {
+ continue;
+ }
+
+ for (predTop = loopBlock->bbPreds; predTop != nullptr; predTop = predTop->flNext)
+ {
+ if (predTop->flBlock->bbNum < top->bbNum || predTop->flBlock->bbNum > bottom->bbNum)
+ {
+ // noway_assert(!"Found loop with multiple entries");
+ goto NO_LOOP;
+ }
+ }
+ }
+
+ // Disqualify loops where the first block of the loop is less nested in EH than
+ // the bottom block. That is, we don't want to handle loops where the back edge
+ // goes from within an EH region to a first block that is outside that same EH
+ // region. Note that we *do* handle loops where the first block is the *first*
+ // block of a more nested EH region (since it is legal to branch to the first
+ // block of an immediately more nested EH region). So, for example, disqualify
+ // this:
+ //
+ // BB02
+ // ...
+ // try {
+ // ...
+ // BB10 BBJ_COND => BB02
+ // ...
+ // }
+ //
+ // Here, BB10 is more nested than BB02.
+
+ if (bottom->hasTryIndex() && !bbInTryRegions(bottom->getTryIndex(), first))
+ {
+ JITDUMP("Loop 'first' BB%02u is in an outer EH region compared to loop 'bottom' BB%02u. Rejecting "
+ "loop.\n",
+ first->bbNum, bottom->bbNum);
+ goto NO_LOOP;
+ }
+
+#if FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
+ // Disqualify loops where the first block of the loop is a finally target.
+ // The main problem is when multiple loops share a 'first' block that is a finally
+ // target and we canonicalize the loops by adding a new loop head. In that case, we
+ // need to update the blocks so the finally target bit is moved to the newly created
+ // block, and removed from the old 'first' block. This is 'hard', so at this point
+ // in the RyuJIT codebase (when we don't expect to keep the "old" ARM32 code generator
+ // long-term), it's easier to disallow the loop than to update the flow graph to
+ // support this case.
+
+ if ((first->bbFlags & BBF_FINALLY_TARGET) != 0)
+ {
+ JITDUMP("Loop 'first' BB%02u is a finally target. Rejecting loop.\n", first->bbNum);
+ goto NO_LOOP;
+ }
+#endif // FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
+
+ /* At this point we have a loop - record it in the loop table
+ * If we found only one exit, record it in the table too
+ * (otherwise an exit = 0 in the loop table means multiple exits) */
+
+ assert(pred);
+ if (exitCount != 1)
+ {
+ exit = nullptr;
+ }
+ optRecordLoop(head, first, top, entry, bottom, exit, exitCount);
+
+#if COUNT_LOOPS
+ if (!hasMethodLoops)
+ {
+ /* mark the method as containing natural loops */
+ totalLoopMethods++;
+ hasMethodLoops = true;
+ }
+
+ /* increment total number of loops found */
+ totalLoopCount++;
+ loopsThisMethod++;
+
+ /* keep track of the number of exits */
+ loopExitCountTable.record(static_cast<unsigned>(exitCount));
+#endif // COUNT_LOOPS
+ }
+
+ /* current predecessor not good for a loop - continue with another one, if any */
+ NO_LOOP:;
+ }
+ }
+
+#if COUNT_LOOPS
+ loopCountTable.record(loopsThisMethod);
+ if (maxLoopsPerMethod < loopsThisMethod)
+ {
+ maxLoopsPerMethod = loopsThisMethod;
+ }
+ if (loopOverflowThisMethod)
+ {
+ totalLoopOverflows++;
+ }
+#endif // COUNT_LOOPS
+
+ // Now the loop indices are stable. We can figure out parent/child relationships
+ // (using table indices to name loops), and label blocks.
+ for (unsigned char loopInd = 1; loopInd < optLoopCount; loopInd++)
+ {
+ for (unsigned char possibleParent = loopInd; possibleParent > 0;)
+ {
+ possibleParent--;
+ if (optLoopTable[possibleParent].lpContains(optLoopTable[loopInd]))
+ {
+ optLoopTable[loopInd].lpParent = possibleParent;
+ optLoopTable[loopInd].lpSibling = optLoopTable[possibleParent].lpChild;
+ optLoopTable[possibleParent].lpChild = loopInd;
+ break;
+ }
+ }
+ }
+
+ // Now label the blocks with the innermost loop to which they belong. Since parents
+ // precede children in the table, doing the labeling for each loop in order will achieve
+ // this -- the innermost loop labeling will be done last.
+ for (unsigned char loopInd = 0; loopInd < optLoopCount; loopInd++)
+ {
+ BasicBlock* first = optLoopTable[loopInd].lpFirst;
+ BasicBlock* bottom = optLoopTable[loopInd].lpBottom;
+ for (BasicBlock* blk = first; blk != nullptr; blk = blk->bbNext)
+ {
+ blk->bbNatLoopNum = loopInd;
+ if (blk == bottom)
+ {
+ break;
+ }
+ assert(blk->bbNext != nullptr); // We should never reach nullptr.
+ }
+ }
+
+ // Make sure that loops are canonical: that every loop has a unique "top", by creating an empty "nop"
+ // one, if necessary, for loops containing others that share a "top."
+ bool mod = false;
+ for (unsigned char loopInd = 0; loopInd < optLoopCount; loopInd++)
+ {
+ // Traverse the outermost loops as entries into the loop nest; so skip non-outermost.
+ if (optLoopTable[loopInd].lpParent != BasicBlock::NOT_IN_LOOP)
+ {
+ continue;
+ }
+
+ // Otherwise...
+ if (optCanonicalizeLoopNest(loopInd))
+ {
+ mod = true;
+ }
+ }
+ if (mod)
+ {
+ fgUpdateChangedFlowGraph();
+ }
+
+#ifdef DEBUG
+ if (verbose && optLoopCount > 0)
+ {
+ printf("\nFinal natural loop table:\n");
+ for (unsigned loopInd = 0; loopInd < optLoopCount; loopInd++)
+ {
+ optPrintLoopInfo(loopInd);
+ printf("\n");
+ }
+ }
+#endif // DEBUG
+}
+
+void Compiler::optRedirectBlock(BasicBlock* blk, BlockToBlockMap* redirectMap)
+{
+ BasicBlock* newJumpDest = nullptr;
+ switch (blk->bbJumpKind)
+ {
+ case BBJ_THROW:
+ case BBJ_RETURN:
+ case BBJ_NONE:
+ case BBJ_EHFILTERRET:
+ case BBJ_EHFINALLYRET:
+ case BBJ_EHCATCHRET:
+ // These have no jump destination to update.
+ break;
+
+ case BBJ_ALWAYS:
+ case BBJ_LEAVE:
+ case BBJ_CALLFINALLY:
+ case BBJ_COND:
+ // All of these have a single jump destination to update.
+ if (redirectMap->Lookup(blk->bbJumpDest, &newJumpDest))
+ {
+ blk->bbJumpDest = newJumpDest;
+ }
+ break;
+
+ case BBJ_SWITCH:
+ {
+ bool redirected = false;
+ for (unsigned i = 0; i < blk->bbJumpSwt->bbsCount; i++)
+ {
+ if (redirectMap->Lookup(blk->bbJumpSwt->bbsDstTab[i], &newJumpDest))
+ {
+ blk->bbJumpSwt->bbsDstTab[i] = newJumpDest;
+ redirected = true;
+ }
+ }
+ // If any redirections happend, invalidate the switch table map for the switch.
+ if (redirected)
+ {
+ GetSwitchDescMap()->Remove(blk);
+ }
+ }
+ break;
+
+ default:
+ unreached();
+ }
+}
+
+// TODO-Cleanup: This should be a static member of the BasicBlock class.
+void Compiler::optCopyBlkDest(BasicBlock* from, BasicBlock* to)
+{
+ assert(from->bbJumpKind == to->bbJumpKind); // Precondition.
+
+ // copy the jump destination(s) from "from" to "to".
+ switch (to->bbJumpKind)
+ {
+ case BBJ_ALWAYS:
+ case BBJ_LEAVE:
+ case BBJ_CALLFINALLY:
+ case BBJ_COND:
+ // All of these have a single jump destination to update.
+ to->bbJumpDest = from->bbJumpDest;
+ break;
+
+ case BBJ_SWITCH:
+ {
+ to->bbJumpSwt = new (this, CMK_BasicBlock) BBswtDesc();
+ to->bbJumpSwt->bbsCount = from->bbJumpSwt->bbsCount;
+ to->bbJumpSwt->bbsDstTab = new (this, CMK_BasicBlock) BasicBlock*[from->bbJumpSwt->bbsCount];
+
+ for (unsigned i = 0; i < from->bbJumpSwt->bbsCount; i++)
+ {
+ to->bbJumpSwt->bbsDstTab[i] = from->bbJumpSwt->bbsDstTab[i];
+ }
+ }
+ break;
+
+ default:
+ break;
+ }
+}
+
+// Canonicalize the loop nest rooted at parent loop 'loopInd'.
+// Returns 'true' if the flow graph is modified.
+bool Compiler::optCanonicalizeLoopNest(unsigned char loopInd)
+{
+ bool modified = false;
+
+ // Is the top of the current loop not in any nested loop?
+ if (optLoopTable[loopInd].lpTop->bbNatLoopNum != loopInd)
+ {
+ if (optCanonicalizeLoop(loopInd))
+ {
+ modified = true;
+ }
+ }
+
+ for (unsigned char child = optLoopTable[loopInd].lpChild; child != BasicBlock::NOT_IN_LOOP;
+ child = optLoopTable[child].lpSibling)
+ {
+ if (optCanonicalizeLoopNest(child))
+ {
+ modified = true;
+ }
+ }
+
+ return modified;
+}
+
+bool Compiler::optCanonicalizeLoop(unsigned char loopInd)
+{
+ // Is the top uniquely part of the current loop?
+ BasicBlock* t = optLoopTable[loopInd].lpTop;
+
+ if (t->bbNatLoopNum == loopInd)
+ {
+ return false;
+ }
+
+ JITDUMP("in optCanonicalizeLoop: L%02u has top BB%02u (bottom BB%02u) with natural loop number L%02u: need to "
+ "canonicalize\n",
+ loopInd, t->bbNum, optLoopTable[loopInd].lpBottom->bbNum, t->bbNatLoopNum);
+
+ // Otherwise, the top of this loop is also part of a nested loop.
+ //
+ // Insert a new unique top for this loop. We must be careful to put this new
+ // block in the correct EH region. Note that f->bbPrev might be in a different
+ // EH region. For example:
+ //
+ // try {
+ // ...
+ // BB07
+ // }
+ // BB08 // "first"
+ //
+ // In this case, first->bbPrev is BB07, which is in a different 'try' region.
+ // On the other hand, the first block of multiple loops might be the first
+ // block of a 'try' region that is completely contained in the multiple loops.
+ // for example:
+ //
+ // BB08 try { }
+ // ...
+ // BB10 BBJ_ALWAYS => BB08
+ // ...
+ // BB12 BBJ_ALWAYS => BB08
+ //
+ // Here, we have two loops, both with BB08 as the "first" block. Block BB08
+ // is a single-block "try" region. Neither loop "bottom" block is in the same
+ // "try" region as BB08. This is legal because you can jump to the first block
+ // of a try region. With EH normalization, no two "try" regions will share
+ // this block. In this case, we need to insert a new block for the outer loop
+ // in the same EH region as the branch from the "bottom":
+ //
+ // BB30 BBJ_NONE
+ // BB08 try { }
+ // ...
+ // BB10 BBJ_ALWAYS => BB08
+ // ...
+ // BB12 BBJ_ALWAYS => BB30
+ //
+ // Another possibility is that the "first" block of the loop nest can be the first block
+ // of a "try" region that also has other predecessors than those in the loop, or even in
+ // the "try" region (since blocks can target the first block of a "try" region). For example:
+ //
+ // BB08 try {
+ // ...
+ // BB10 BBJ_ALWAYS => BB08
+ // ...
+ // BB12 BBJ_ALWAYS => BB08
+ // BB13 }
+ // ...
+ // BB20 BBJ_ALWAYS => BB08
+ // ...
+ // BB25 BBJ_ALWAYS => BB08
+ //
+ // Here, BB08 has 4 flow graph predecessors: BB10, BB12, BB20, BB25. These are all potential loop
+ // bottoms, for four possible nested loops. However, we require all the loop bottoms to be in the
+ // same EH region. For loops BB08..BB10 and BB08..BB12, we need to add a new "top" block within
+ // the try region, immediately before BB08. The bottom of the loop BB08..BB10 loop will target the
+ // old BB08, and the bottom of the BB08..BB12 loop will target the new loop header. The other branches
+ // (BB20, BB25) must target the new loop header, both for correctness, and to avoid the illegal
+ // situation of branching to a non-first block of a 'try' region.
+ //
+ // We can also have a loop nest where the "first" block is outside of a "try" region
+ // and the back edges are inside a "try" region, for example:
+ //
+ // BB02 // "first"
+ // ...
+ // BB09 try { BBJ_COND => BB02
+ // ...
+ // BB15 BBJ_COND => BB02
+ // ...
+ // BB21 } // end of "try"
+ //
+ // In this case, both loop back edges were formed by "leave" instructions that were
+ // imported into branches that were later made conditional. In this case, we don't
+ // want to copy the EH region of the back edge, since that would create a block
+ // outside of and disjoint with the "try" region of the back edge. However, to
+ // simplify things, we disqualify this type of loop, so we should never see this here.
+
+ BasicBlock* h = optLoopTable[loopInd].lpHead;
+ BasicBlock* f = optLoopTable[loopInd].lpFirst;
+ BasicBlock* b = optLoopTable[loopInd].lpBottom;
+
+ // The loop must be entirely contained within a single handler region.
+ assert(BasicBlock::sameHndRegion(f, b));
+
+ // If the bottom block is in the same "try" region, then we extend the EH
+ // region. Otherwise, we add the new block outside the "try" region.
+ bool extendRegion = BasicBlock::sameTryRegion(f, b);
+ BasicBlock* newT = fgNewBBbefore(BBJ_NONE, f, extendRegion);
+ if (!extendRegion)
+ {
+ // We need to set the EH region manually. Set it to be the same
+ // as the bottom block.
+ newT->copyEHRegion(b);
+ }
+
+ BlockSetOps::Assign(this, newT->bbReach, t->bbReach);
+
+ // Redirect the "bottom" of the current loop to "newT".
+ BlockToBlockMap* blockMap = new (getAllocatorLoopHoist()) BlockToBlockMap(getAllocatorLoopHoist());
+ blockMap->Set(t, newT);
+ optRedirectBlock(b, blockMap);
+
+ // Redirect non-loop preds of "t" to also go to "newT". Inner loops that also branch to "t" should continue
+ // to do so. However, there maybe be other predecessors from outside the loop nest that need to be updated
+ // to point to "newT". This normally wouldn't happen, since they too would be part of the loop nest. However,
+ // they might have been prevented from participating in the loop nest due to different EH nesting, or some
+ // other reason.
+ //
+ // Note that optRedirectBlock doesn't update the predecessors list. So, if the same 't' block is processed
+ // multiple times while canonicalizing multiple loop nests, we'll attempt to redirect a predecessor multiple times.
+ // This is ok, because after the first redirection, the topPredBlock branch target will no longer match the source
+ // edge of the blockMap, so nothing will happen.
+ for (flowList* topPred = t->bbPreds; topPred != nullptr; topPred = topPred->flNext)
+ {
+ BasicBlock* topPredBlock = topPred->flBlock;
+
+ // Skip if topPredBlock is in the loop.
+ // Note that this uses block number to detect membership in the loop. We are adding blocks during
+ // canonicalization, and those block numbers will be new, and larger than previous blocks. However, we work
+ // outside-in, so we shouldn't encounter the new blocks at the loop boundaries, or in the predecessor lists.
+ if (t->bbNum <= topPredBlock->bbNum && topPredBlock->bbNum <= b->bbNum)
+ {
+ JITDUMP("in optCanonicalizeLoop: 'top' predecessor BB%02u is in the range of L%02u (BB%02u..BB%02u); not "
+ "redirecting its bottom edge\n",
+ topPredBlock->bbNum, loopInd, t->bbNum, b->bbNum);
+ continue;
+ }
+
+ JITDUMP("in optCanonicalizeLoop: redirect top predecessor BB%02u to BB%02u\n", topPredBlock->bbNum,
+ newT->bbNum);
+ optRedirectBlock(topPredBlock, blockMap);
+ }
+
+ assert(newT->bbNext == f);
+ if (f != t)
+ {
+ newT->bbJumpKind = BBJ_ALWAYS;
+ newT->bbJumpDest = t;
+ newT->bbTreeList = nullptr;
+ fgInsertStmtAtEnd(newT, fgNewStmtFromTree(gtNewOperNode(GT_NOP, TYP_VOID, nullptr)));
+ }
+
+ // If it had been a do-while loop (top == entry), update entry, as well.
+ BasicBlock* origE = optLoopTable[loopInd].lpEntry;
+ if (optLoopTable[loopInd].lpTop == origE)
+ {
+ optLoopTable[loopInd].lpEntry = newT;
+ }
+ optLoopTable[loopInd].lpTop = newT;
+ optLoopTable[loopInd].lpFirst = newT;
+
+ newT->bbNatLoopNum = loopInd;
+
+ JITDUMP("in optCanonicalizeLoop: made new block BB%02u [%p] the new unique top of loop %d.\n", newT->bbNum,
+ dspPtr(newT), loopInd);
+
+ // Make sure the head block still goes to the entry...
+ if (h->bbJumpKind == BBJ_NONE && h->bbNext != optLoopTable[loopInd].lpEntry)
+ {
+ h->bbJumpKind = BBJ_ALWAYS;
+ h->bbJumpDest = optLoopTable[loopInd].lpEntry;
+ }
+ else if (h->bbJumpKind == BBJ_COND && h->bbNext == newT && newT != optLoopTable[loopInd].lpEntry)
+ {
+ BasicBlock* h2 = fgNewBBafter(BBJ_ALWAYS, h, /*extendRegion*/ true);
+ optLoopTable[loopInd].lpHead = h2;
+ h2->bbJumpDest = optLoopTable[loopInd].lpEntry;
+ h2->bbTreeList = nullptr;
+ fgInsertStmtAtEnd(h2, fgNewStmtFromTree(gtNewOperNode(GT_NOP, TYP_VOID, nullptr)));
+ }
+
+ // If any loops nested in "loopInd" have the same head and entry as "loopInd",
+ // it must be the case that they were do-while's (since "h" fell through to the entry).
+ // The new node "newT" becomes the head of such loops.
+ for (unsigned char childLoop = optLoopTable[loopInd].lpChild; childLoop != BasicBlock::NOT_IN_LOOP;
+ childLoop = optLoopTable[childLoop].lpSibling)
+ {
+ if (optLoopTable[childLoop].lpEntry == origE && optLoopTable[childLoop].lpHead == h &&
+ newT->bbJumpKind == BBJ_NONE && newT->bbNext == origE)
+ {
+ optUpdateLoopHead(childLoop, h, newT);
+ }
+ }
+ return true;
+}
+
+bool Compiler::optLoopContains(unsigned l1, unsigned l2)
+{
+ assert(l1 != BasicBlock::NOT_IN_LOOP);
+ if (l1 == l2)
+ {
+ return true;
+ }
+ else if (l2 == BasicBlock::NOT_IN_LOOP)
+ {
+ return false;
+ }
+ else
+ {
+ return optLoopContains(l1, optLoopTable[l2].lpParent);
+ }
+}
+
+void Compiler::optUpdateLoopHead(unsigned loopInd, BasicBlock* from, BasicBlock* to)
+{
+ assert(optLoopTable[loopInd].lpHead == from);
+ optLoopTable[loopInd].lpHead = to;
+ for (unsigned char childLoop = optLoopTable[loopInd].lpChild; childLoop != BasicBlock::NOT_IN_LOOP;
+ childLoop = optLoopTable[childLoop].lpSibling)
+ {
+ if (optLoopTable[childLoop].lpHead == from)
+ {
+ optUpdateLoopHead(childLoop, from, to);
+ }
+ }
+}
+
+/*****************************************************************************
+ * If the : i += const" will cause an overflow exception for the small types.
+ */
+
+bool jitIterSmallOverflow(int iterAtExit, var_types incrType)
+{
+ int type_MAX;
+
+ switch (incrType)
+ {
+ case TYP_BYTE:
+ type_MAX = SCHAR_MAX;
+ break;
+ case TYP_UBYTE:
+ type_MAX = UCHAR_MAX;
+ break;
+ case TYP_SHORT:
+ type_MAX = SHRT_MAX;
+ break;
+ case TYP_CHAR:
+ type_MAX = USHRT_MAX;
+ break;
+
+ case TYP_UINT: // Detected by checking for 32bit ....
+ case TYP_INT:
+ return false; // ... overflow same as done for TYP_INT
+
+ default:
+ NO_WAY("Bad type");
+ }
+
+ if (iterAtExit > type_MAX)
+ {
+ return true;
+ }
+ else
+ {
+ return false;
+ }
+}
+
+/*****************************************************************************
+ * If the "i -= const" will cause an underflow exception for the small types
+ */
+
+bool jitIterSmallUnderflow(int iterAtExit, var_types decrType)
+{
+ int type_MIN;
+
+ switch (decrType)
+ {
+ case TYP_BYTE:
+ type_MIN = SCHAR_MIN;
+ break;
+ case TYP_SHORT:
+ type_MIN = SHRT_MIN;
+ break;
+ case TYP_UBYTE:
+ type_MIN = 0;
+ break;
+ case TYP_CHAR:
+ type_MIN = 0;
+ break;
+
+ case TYP_UINT: // Detected by checking for 32bit ....
+ case TYP_INT:
+ return false; // ... underflow same as done for TYP_INT
+
+ default:
+ NO_WAY("Bad type");
+ }
+
+ if (iterAtExit < type_MIN)
+ {
+ return true;
+ }
+ else
+ {
+ return false;
+ }
+}
+
+/*****************************************************************************
+ *
+ * Helper for unroll loops - Computes the number of repetitions
+ * in a constant loop. If it cannot prove the number is constant returns false
+ */
+
+bool Compiler::optComputeLoopRep(int constInit,
+ int constLimit,
+ int iterInc,
+ genTreeOps iterOper,
+ var_types iterOperType,
+ genTreeOps testOper,
+ bool unsTest,
+ bool dupCond,
+ unsigned* iterCount)
+{
+ noway_assert(genActualType(iterOperType) == TYP_INT);
+
+ __int64 constInitX;
+ __int64 constLimitX;
+
+ unsigned loopCount;
+ int iterSign;
+
+ // Using this, we can just do a signed comparison with other 32 bit values.
+ if (unsTest)
+ {
+ constLimitX = (unsigned int)constLimit;
+ }
+ else
+ {
+ constLimitX = (signed int)constLimit;
+ }
+
+ switch (iterOperType)
+ {
+// For small types, the iteration operator will narrow these values if big
+
+#define INIT_ITER_BY_TYPE(type) \
+ constInitX = (type)constInit; \
+ iterInc = (type)iterInc;
+
+ case TYP_BYTE:
+ INIT_ITER_BY_TYPE(signed char);
+ break;
+ case TYP_UBYTE:
+ INIT_ITER_BY_TYPE(unsigned char);
+ break;
+ case TYP_SHORT:
+ INIT_ITER_BY_TYPE(signed short);
+ break;
+ case TYP_CHAR:
+ INIT_ITER_BY_TYPE(unsigned short);
+ break;
+
+ // For the big types, 32 bit arithmetic is performed
+
+ case TYP_INT:
+ case TYP_UINT:
+ if (unsTest)
+ {
+ constInitX = (unsigned int)constInit;
+ }
+ else
+ {
+ constInitX = (signed int)constInit;
+ }
+ break;
+
+ default:
+ noway_assert(!"Bad type");
+ NO_WAY("Bad type");
+ }
+
+ /* If iterInc is zero we have an infinite loop */
+ if (iterInc == 0)
+ {
+ return false;
+ }
+
+ /* Set iterSign to +1 for positive iterInc and -1 for negative iterInc */
+ iterSign = (iterInc > 0) ? +1 : -1;
+
+ /* Initialize loopCount to zero */
+ loopCount = 0;
+
+ // If dupCond is true then the loop head contains a test which skips
+ // this loop, if the constInit does not pass the loop test
+ // Such a loop can execute zero times.
+ // If dupCond is false then we have a true do-while loop which we
+ // always execute the loop once before performing the loop test
+ if (!dupCond)
+ {
+ loopCount += 1;
+ constInitX += iterInc;
+ }
+
+ // bail if count is based on wrap-around math
+ if (iterInc > 0)
+ {
+ if (constLimitX < constInitX)
+ {
+ return false;
+ }
+ }
+ else if (constLimitX > constInitX)
+ {
+ return false;
+ }
+
+ /* Compute the number of repetitions */
+
+ switch (testOper)
+ {
+ __int64 iterAtExitX;
+
+ case GT_EQ:
+ /* something like "for (i=init; i == lim; i++)" doesn't make any sense */
+ return false;
+
+ case GT_NE:
+ /* "for (i=init; i != lim; i+=const)" - this is tricky since it may
+ * have a constant number of iterations or loop forever -
+ * we have to compute (lim-init) mod iterInc to see if it is zero.
+ * If mod iterInc is not zero then the limit test will miss an a wrap will occur
+ * which is probably not what the end user wanted, but it is legal.
+ */
+
+ if (iterInc > 0)
+ {
+ /* Stepping by one, i.e. Mod with 1 is always zero */
+ if (iterInc != 1)
+ {
+ if (((constLimitX - constInitX) % iterInc) != 0)
+ {
+ return false;
+ }
+ }
+ }
+ else
+ {
+ noway_assert(iterInc < 0);
+ /* Stepping by -1, i.e. Mod with 1 is always zero */
+ if (iterInc != -1)
+ {
+ if (((constInitX - constLimitX) % (-iterInc)) != 0)
+ {
+ return false;
+ }
+ }
+ }
+
+ switch (iterOper)
+ {
+ case GT_ASG_SUB:
+ case GT_SUB:
+ iterInc = -iterInc;
+ __fallthrough;
+
+ case GT_ASG_ADD:
+ case GT_ADD:
+ if (constInitX != constLimitX)
+ {
+ loopCount += (unsigned)((constLimitX - constInitX - iterSign) / iterInc) + 1;
+ }
+
+ iterAtExitX = (int)(constInitX + iterInc * (int)loopCount);
+
+ if (unsTest)
+ {
+ iterAtExitX = (unsigned)iterAtExitX;
+ }
+
+ // Check if iteration incr will cause overflow for small types
+ if (jitIterSmallOverflow((int)iterAtExitX, iterOperType))
+ {
+ return false;
+ }
+
+ // iterator with 32bit overflow. Bad for TYP_(U)INT
+ if (iterAtExitX < constLimitX)
+ {
+ return false;
+ }
+
+ *iterCount = loopCount;
+ return true;
+
+ case GT_ASG_MUL:
+ case GT_MUL:
+ case GT_ASG_DIV:
+ case GT_DIV:
+ case GT_ASG_RSH:
+ case GT_RSH:
+ case GT_ASG_LSH:
+ case GT_LSH:
+ case GT_ASG_UDIV:
+ case GT_UDIV:
+ return false;
+
+ default:
+ noway_assert(!"Unknown operator for loop iterator");
+ return false;
+ }
+
+ case GT_LT:
+ switch (iterOper)
+ {
+ case GT_ASG_SUB:
+ case GT_SUB:
+ iterInc = -iterInc;
+ __fallthrough;
+
+ case GT_ASG_ADD:
+ case GT_ADD:
+ if (constInitX < constLimitX)
+ {
+ loopCount += (unsigned)((constLimitX - constInitX - iterSign) / iterInc) + 1;
+ }
+
+ iterAtExitX = (int)(constInitX + iterInc * (int)loopCount);
+
+ if (unsTest)
+ {
+ iterAtExitX = (unsigned)iterAtExitX;
+ }
+
+ // Check if iteration incr will cause overflow for small types
+ if (jitIterSmallOverflow((int)iterAtExitX, iterOperType))
+ {
+ return false;
+ }
+
+ // iterator with 32bit overflow. Bad for TYP_(U)INT
+ if (iterAtExitX < constLimitX)
+ {
+ return false;
+ }
+
+ *iterCount = loopCount;
+ return true;
+
+ case GT_ASG_MUL:
+ case GT_MUL:
+ case GT_ASG_DIV:
+ case GT_DIV:
+ case GT_ASG_RSH:
+ case GT_RSH:
+ case GT_ASG_LSH:
+ case GT_LSH:
+ case GT_ASG_UDIV:
+ case GT_UDIV:
+ return false;
+
+ default:
+ noway_assert(!"Unknown operator for loop iterator");
+ return false;
+ }
+
+ case GT_LE:
+ switch (iterOper)
+ {
+ case GT_ASG_SUB:
+ case GT_SUB:
+ iterInc = -iterInc;
+ __fallthrough;
+
+ case GT_ASG_ADD:
+ case GT_ADD:
+ if (constInitX <= constLimitX)
+ {
+ loopCount += (unsigned)((constLimitX - constInitX) / iterInc) + 1;
+ }
+
+ iterAtExitX = (int)(constInitX + iterInc * (int)loopCount);
+
+ if (unsTest)
+ {
+ iterAtExitX = (unsigned)iterAtExitX;
+ }
+
+ // Check if iteration incr will cause overflow for small types
+ if (jitIterSmallOverflow((int)iterAtExitX, iterOperType))
+ {
+ return false;
+ }
+
+ // iterator with 32bit overflow. Bad for TYP_(U)INT
+ if (iterAtExitX <= constLimitX)
+ {
+ return false;
+ }
+
+ *iterCount = loopCount;
+ return true;
+
+ case GT_ASG_MUL:
+ case GT_MUL:
+ case GT_ASG_DIV:
+ case GT_DIV:
+ case GT_ASG_RSH:
+ case GT_RSH:
+ case GT_ASG_LSH:
+ case GT_LSH:
+ case GT_ASG_UDIV:
+ case GT_UDIV:
+ return false;
+
+ default:
+ noway_assert(!"Unknown operator for loop iterator");
+ return false;
+ }
+
+ case GT_GT:
+ switch (iterOper)
+ {
+ case GT_ASG_SUB:
+ case GT_SUB:
+ iterInc = -iterInc;
+ __fallthrough;
+
+ case GT_ASG_ADD:
+ case GT_ADD:
+ if (constInitX > constLimitX)
+ {
+ loopCount += (unsigned)((constLimitX - constInitX - iterSign) / iterInc) + 1;
+ }
+
+ iterAtExitX = (int)(constInitX + iterInc * (int)loopCount);
+
+ if (unsTest)
+ {
+ iterAtExitX = (unsigned)iterAtExitX;
+ }
+
+ // Check if small types will underflow
+ if (jitIterSmallUnderflow((int)iterAtExitX, iterOperType))
+ {
+ return false;
+ }
+
+ // iterator with 32bit underflow. Bad for TYP_INT and unsigneds
+ if (iterAtExitX > constLimitX)
+ {
+ return false;
+ }
+
+ *iterCount = loopCount;
+ return true;
+
+ case GT_ASG_MUL:
+ case GT_MUL:
+ case GT_ASG_DIV:
+ case GT_DIV:
+ case GT_ASG_RSH:
+ case GT_RSH:
+ case GT_ASG_LSH:
+ case GT_LSH:
+ case GT_ASG_UDIV:
+ case GT_UDIV:
+ return false;
+
+ default:
+ noway_assert(!"Unknown operator for loop iterator");
+ return false;
+ }
+
+ case GT_GE:
+ switch (iterOper)
+ {
+ case GT_ASG_SUB:
+ case GT_SUB:
+ iterInc = -iterInc;
+ __fallthrough;
+
+ case GT_ASG_ADD:
+ case GT_ADD:
+ if (constInitX >= constLimitX)
+ {
+ loopCount += (unsigned)((constLimitX - constInitX) / iterInc) + 1;
+ }
+
+ iterAtExitX = (int)(constInitX + iterInc * (int)loopCount);
+
+ if (unsTest)
+ {
+ iterAtExitX = (unsigned)iterAtExitX;
+ }
+
+ // Check if small types will underflow
+ if (jitIterSmallUnderflow((int)iterAtExitX, iterOperType))
+ {
+ return false;
+ }
+
+ // iterator with 32bit underflow. Bad for TYP_INT and unsigneds
+ if (iterAtExitX >= constLimitX)
+ {
+ return false;
+ }
+
+ *iterCount = loopCount;
+ return true;
+
+ case GT_ASG_MUL:
+ case GT_MUL:
+ case GT_ASG_DIV:
+ case GT_DIV:
+ case GT_ASG_RSH:
+ case GT_RSH:
+ case GT_ASG_LSH:
+ case GT_LSH:
+ case GT_ASG_UDIV:
+ case GT_UDIV:
+ return false;
+
+ default:
+ noway_assert(!"Unknown operator for loop iterator");
+ return false;
+ }
+
+ default:
+ noway_assert(!"Unknown operator for loop condition");
+ }
+
+ return false;
+}
+
+/*****************************************************************************
+ *
+ * Look for loop unrolling candidates and unroll them
+ */
+
+#ifdef _PREFAST_
+#pragma warning(push)
+#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
+#endif
+void Compiler::optUnrollLoops()
+{
+ if (compCodeOpt() == SMALL_CODE)
+ {
+ return;
+ }
+
+ if (optLoopCount == 0)
+ {
+ return;
+ }
+
+#ifdef DEBUG
+ if (JitConfig.JitNoUnroll())
+ {
+ return;
+ }
+#endif
+
+ if (optCanCloneLoops())
+ {
+ return;
+ }
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("*************** In optUnrollLoops()\n");
+ }
+#endif
+ /* Look for loop unrolling candidates */
+
+ /* Double loop so that after unrolling an inner loop we set change to true
+ * and we then go back over all of the loop candidates and try to unroll
+ * the next outer loop, until we don't unroll any loops,
+ * then change will be false and we are done.
+ */
+ for (;;)
+ {
+ bool change = false;
+
+ for (unsigned lnum = 0; lnum < optLoopCount; lnum++)
+ {
+ BasicBlock* block;
+ BasicBlock* head;
+ BasicBlock* bottom;
+
+ GenTree* loop;
+ GenTree* test;
+ GenTree* incr;
+ GenTree* phdr;
+ GenTree* init;
+
+ bool dupCond;
+ int lval;
+ int lbeg; // initial value for iterator
+ int llim; // limit value for iterator
+ unsigned lvar; // iterator lclVar #
+ int iterInc; // value to increment the iterator
+ genTreeOps iterOper; // type of iterator increment (i.e. ASG_ADD, ASG_SUB, etc.)
+ var_types iterOperType; // type result of the oper (for overflow instrs)
+ genTreeOps testOper; // type of loop test (i.e. GT_LE, GT_GE, etc.)
+ bool unsTest; // Is the comparison u/int
+
+ unsigned totalIter; // total number of iterations in the constant loop
+ unsigned loopCostSz; // Cost is size of one iteration
+ unsigned loopFlags; // actual lpFlags
+ unsigned requiredFlags; // required lpFlags
+
+ GenTree* loopList; // new stmt list of the unrolled loop
+ GenTree* loopLast;
+
+ static const int ITER_LIMIT[COUNT_OPT_CODE + 1] = {
+ 10, // BLENDED_CODE
+ 0, // SMALL_CODE
+ 20, // FAST_CODE
+ 0 // COUNT_OPT_CODE
+ };
+
+ noway_assert(ITER_LIMIT[SMALL_CODE] == 0);
+ noway_assert(ITER_LIMIT[COUNT_OPT_CODE] == 0);
+
+ unsigned iterLimit = (unsigned)ITER_LIMIT[compCodeOpt()];
+
+#ifdef DEBUG
+ if (compStressCompile(STRESS_UNROLL_LOOPS, 50))
+ {
+ iterLimit *= 10;
+ }
+#endif
+
+ static const int UNROLL_LIMIT_SZ[COUNT_OPT_CODE + 1] = {
+ 30, // BLENDED_CODE
+ 0, // SMALL_CODE
+ 60, // FAST_CODE
+ 0 // COUNT_OPT_CODE
+ };
+
+ noway_assert(UNROLL_LIMIT_SZ[SMALL_CODE] == 0);
+ noway_assert(UNROLL_LIMIT_SZ[COUNT_OPT_CODE] == 0);
+
+ int unrollLimitSz = (unsigned)UNROLL_LIMIT_SZ[compCodeOpt()];
+
+#ifdef DEBUG
+ if (compStressCompile(STRESS_UNROLL_LOOPS, 50))
+ {
+ unrollLimitSz *= 10;
+ }
+#endif
+
+ loopFlags = optLoopTable[lnum].lpFlags;
+ requiredFlags = LPFLG_DO_WHILE | LPFLG_ONE_EXIT | LPFLG_CONST;
+
+ /* Ignore the loop if we don't have a do-while with a single exit
+ that has a constant number of iterations */
+
+ if ((loopFlags & requiredFlags) != requiredFlags)
+ {
+ continue;
+ }
+
+ /* ignore if removed or marked as not unrollable */
+
+ if (optLoopTable[lnum].lpFlags & (LPFLG_DONT_UNROLL | LPFLG_REMOVED))
+ {
+ continue;
+ }
+
+ head = optLoopTable[lnum].lpHead;
+ noway_assert(head);
+ bottom = optLoopTable[lnum].lpBottom;
+ noway_assert(bottom);
+
+ /* The single exit must be at the bottom of the loop */
+ noway_assert(optLoopTable[lnum].lpExit);
+ if (optLoopTable[lnum].lpExit != bottom)
+ {
+ continue;
+ }
+
+ /* Unrolling loops with jumps in them is not worth the headache
+ * Later we might consider unrolling loops after un-switching */
+
+ block = head;
+ do
+ {
+ block = block->bbNext;
+ noway_assert(block);
+
+ if (block->bbJumpKind != BBJ_NONE)
+ {
+ if (block != bottom)
+ {
+ goto DONE_LOOP;
+ }
+ }
+ } while (block != bottom);
+
+ /* Get the loop data:
+ - initial constant
+ - limit constant
+ - iterator
+ - iterator increment
+ - increment operation type (i.e. ASG_ADD, ASG_SUB, etc...)
+ - loop test type (i.e. GT_GE, GT_LT, etc...)
+ */
+
+ lbeg = optLoopTable[lnum].lpConstInit;
+ llim = optLoopTable[lnum].lpConstLimit();
+ testOper = optLoopTable[lnum].lpTestOper();
+
+ lvar = optLoopTable[lnum].lpIterVar();
+ iterInc = optLoopTable[lnum].lpIterConst();
+ iterOper = optLoopTable[lnum].lpIterOper();
+
+ iterOperType = optLoopTable[lnum].lpIterOperType();
+ unsTest = (optLoopTable[lnum].lpTestTree->gtFlags & GTF_UNSIGNED) != 0;
+
+ if (lvaTable[lvar].lvAddrExposed)
+ { // If the loop iteration variable is address-exposed then bail
+ continue;
+ }
+ if (lvaTable[lvar].lvIsStructField)
+ { // If the loop iteration variable is a promoted field from a struct then
+ // bail
+ continue;
+ }
+
+ /* Locate the pre-header and initialization and increment/test statements */
+
+ phdr = head->bbTreeList;
+ noway_assert(phdr);
+ loop = bottom->bbTreeList;
+ noway_assert(loop);
+
+ init = head->lastStmt();
+ noway_assert(init && (init->gtNext == nullptr));
+ test = bottom->lastStmt();
+ noway_assert(test && (test->gtNext == nullptr));
+ incr = test->gtPrev;
+ noway_assert(incr);
+
+ if (init->gtFlags & GTF_STMT_CMPADD)
+ {
+ /* Must be a duplicated loop condition */
+ noway_assert(init->gtStmt.gtStmtExpr->gtOper == GT_JTRUE);
+
+ dupCond = true;
+ init = init->gtPrev;
+ noway_assert(init);
+ }
+ else
+ {
+ dupCond = false;
+ }
+
+ /* Find the number of iterations - the function returns false if not a constant number */
+
+ if (!optComputeLoopRep(lbeg, llim, iterInc, iterOper, iterOperType, testOper, unsTest, dupCond, &totalIter))
+ {
+ continue;
+ }
+
+ /* Forget it if there are too many repetitions or not a constant loop */
+
+ if (totalIter > iterLimit)
+ {
+ continue;
+ }
+
+ noway_assert(init->gtOper == GT_STMT);
+ init = init->gtStmt.gtStmtExpr;
+ noway_assert(test->gtOper == GT_STMT);
+ test = test->gtStmt.gtStmtExpr;
+ noway_assert(incr->gtOper == GT_STMT);
+ incr = incr->gtStmt.gtStmtExpr;
+
+ // Don't unroll loops we don't understand.
+ if (incr->gtOper == GT_ASG)
+ {
+ continue;
+ }
+
+ /* Make sure everything looks ok */
+ if ((init->gtOper != GT_ASG) || (init->gtOp.gtOp1->gtOper != GT_LCL_VAR) ||
+ (init->gtOp.gtOp1->gtLclVarCommon.gtLclNum != lvar) || (init->gtOp.gtOp2->gtOper != GT_CNS_INT) ||
+ (init->gtOp.gtOp2->gtIntCon.gtIconVal != lbeg) ||
+
+ !((incr->gtOper == GT_ASG_ADD) || (incr->gtOper == GT_ASG_SUB)) ||
+ (incr->gtOp.gtOp1->gtOper != GT_LCL_VAR) || (incr->gtOp.gtOp1->gtLclVarCommon.gtLclNum != lvar) ||
+ (incr->gtOp.gtOp2->gtOper != GT_CNS_INT) || (incr->gtOp.gtOp2->gtIntCon.gtIconVal != iterInc) ||
+
+ (test->gtOper != GT_JTRUE))
+ {
+ noway_assert(!"Bad precondition in Compiler::optUnrollLoops()");
+ continue;
+ }
+
+ /* heuristic - Estimated cost in code size of the unrolled loop */
+
+ loopCostSz = 0;
+
+ block = head;
+
+ do
+ {
+ block = block->bbNext;
+
+ /* Visit all the statements in the block */
+
+ for (GenTreeStmt* stmt = block->firstStmt(); stmt; stmt = stmt->gtNextStmt)
+ {
+ /* Get the expression and stop if end reached */
+
+ GenTreePtr expr = stmt->gtStmtExpr;
+ if (expr == incr)
+ {
+ break;
+ }
+
+ /* Calculate gtCostSz */
+ gtSetStmtInfo(stmt);
+
+ /* Update loopCostSz */
+ loopCostSz += stmt->gtCostSz;
+ }
+ } while (block != bottom);
+
+ /* Compute the estimated increase in code size for the unrolled loop */
+
+ unsigned int fixedLoopCostSz;
+ fixedLoopCostSz = 8;
+
+ int unrollCostSz;
+ unrollCostSz = (loopCostSz * totalIter) - (loopCostSz + fixedLoopCostSz);
+
+ /* Don't unroll if too much code duplication would result. */
+
+ if (unrollCostSz > unrollLimitSz)
+ {
+ /* prevent this loop from being revisited */
+ optLoopTable[lnum].lpFlags |= LPFLG_DONT_UNROLL;
+ goto DONE_LOOP;
+ }
+
+ /* Looks like a good idea to unroll this loop, let's do it! */
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nUnrolling loop BB%02u", head->bbNext->bbNum);
+ if (head->bbNext->bbNum != bottom->bbNum)
+ {
+ printf("..BB%02u", bottom->bbNum);
+ }
+ printf(" over V%02u from %u to %u", lvar, lbeg, llim);
+ printf(" unrollCostSz = %d\n", unrollCostSz);
+ printf("\n");
+ }
+#endif
+
+ /* Create the unrolled loop statement list */
+
+ loopList = loopLast = nullptr;
+
+ for (lval = lbeg; totalIter; totalIter--)
+ {
+ block = head;
+
+ do
+ {
+ GenTreeStmt* stmt;
+ GenTree* expr;
+
+ block = block->bbNext;
+ noway_assert(block);
+
+ /* Visit all the statements in the block */
+
+ for (stmt = block->firstStmt(); stmt; stmt = stmt->gtNextStmt)
+ {
+ /* Stop if we've reached the end of the loop */
+
+ if (stmt->gtStmtExpr == incr)
+ {
+ break;
+ }
+
+ /* Clone/substitute the expression */
+
+ expr = gtCloneExpr(stmt, 0, lvar, lval);
+
+ // cloneExpr doesn't handle everything
+
+ if (!expr)
+ {
+ optLoopTable[lnum].lpFlags |= LPFLG_DONT_UNROLL;
+ goto DONE_LOOP;
+ }
+
+ /* Append the expression to our list */
+
+ if (loopList)
+ {
+ loopLast->gtNext = expr;
+ }
+ else
+ {
+ loopList = expr;
+ }
+
+ expr->gtPrev = loopLast;
+ loopLast = expr;
+ }
+ } while (block != bottom);
+
+ /* update the new value for the unrolled iterator */
+
+ switch (iterOper)
+ {
+ case GT_ASG_ADD:
+ lval += iterInc;
+ break;
+
+ case GT_ASG_SUB:
+ lval -= iterInc;
+ break;
+
+ case GT_ASG_RSH:
+ case GT_ASG_LSH:
+ noway_assert(!"Unrolling not implemented for this loop iterator");
+ goto DONE_LOOP;
+
+ default:
+ noway_assert(!"Unknown operator for constant loop iterator");
+ goto DONE_LOOP;
+ }
+ }
+
+ /* Finish the linked list */
+
+ if (loopList)
+ {
+ loopList->gtPrev = loopLast;
+ loopLast->gtNext = nullptr;
+ }
+
+ /* Replace the body with the unrolled one */
+
+ block = head;
+
+ do
+ {
+ block = block->bbNext;
+ noway_assert(block);
+ block->bbTreeList = nullptr;
+ block->bbJumpKind = BBJ_NONE;
+ block->bbFlags &= ~BBF_NEEDS_GCPOLL;
+ } while (block != bottom);
+
+ bottom->bbJumpKind = BBJ_NONE;
+ bottom->bbTreeList = loopList;
+ bottom->bbFlags &= ~BBF_NEEDS_GCPOLL;
+ bottom->modifyBBWeight(bottom->bbWeight / BB_LOOP_WEIGHT);
+
+ bool dummy;
+
+ fgMorphStmts(bottom, &dummy, &dummy, &dummy);
+
+ /* Update bbRefs and bbPreds */
+ /* Here head->bbNext is bottom !!! - Replace it */
+
+ fgRemoveRefPred(head->bbNext, bottom);
+
+ /* Now change the initialization statement in the HEAD to "lvar = lval;"
+ * (the last value of the iterator in the loop)
+ * and drop the jump condition since the unrolled loop will always execute */
+
+ init->gtOp.gtOp2->gtIntCon.gtIconVal = lval;
+
+ /* if the HEAD is a BBJ_COND drop the condition (and make HEAD a BBJ_NONE block) */
+
+ if (head->bbJumpKind == BBJ_COND)
+ {
+ phdr = head->bbTreeList;
+ noway_assert(phdr);
+ test = phdr->gtPrev;
+
+ noway_assert(test && (test->gtNext == nullptr));
+ noway_assert(test->gtOper == GT_STMT);
+ noway_assert(test->gtStmt.gtStmtExpr->gtOper == GT_JTRUE);
+
+ init = test->gtPrev;
+ noway_assert(init && (init->gtNext == test));
+ noway_assert(init->gtOper == GT_STMT);
+
+ init->gtNext = nullptr;
+ phdr->gtPrev = init;
+ head->bbJumpKind = BBJ_NONE;
+ head->bbFlags &= ~BBF_NEEDS_GCPOLL;
+
+ /* Update bbRefs and bbPreds */
+
+ fgRemoveRefPred(head->bbJumpDest, head);
+ }
+ else
+ {
+ /* the loop must execute */
+ noway_assert(head->bbJumpKind == BBJ_NONE);
+ }
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Whole unrolled loop:\n");
+
+ GenTreePtr s = loopList;
+
+ while (s)
+ {
+ noway_assert(s->gtOper == GT_STMT);
+ gtDispTree(s);
+ s = s->gtNext;
+ }
+ printf("\n");
+
+ gtDispTree(init);
+ printf("\n");
+ }
+#endif
+
+ /* Remember that something has changed */
+
+ change = true;
+
+ /* Make sure to update loop table */
+
+ /* Use the LPFLG_REMOVED flag and update the bbLoopMask acordingly
+ * (also make head and bottom NULL - to hit an assert or GPF) */
+
+ optLoopTable[lnum].lpFlags |= LPFLG_REMOVED;
+ optLoopTable[lnum].lpHead = optLoopTable[lnum].lpBottom = nullptr;
+
+ DONE_LOOP:;
+ }
+
+ if (!change)
+ {
+ break;
+ }
+ }
+
+#ifdef DEBUG
+ fgDebugCheckBBlist();
+#endif
+}
+#ifdef _PREFAST_
+#pragma warning(pop)
+#endif
+
+/*****************************************************************************
+ *
+ * Return non-zero if there is a code path from 'topBB' to 'botBB' that will
+ * not execute a method call.
+ */
+
+bool Compiler::optReachWithoutCall(BasicBlock* topBB, BasicBlock* botBB)
+{
+ // TODO-Cleanup: Currently BBF_GC_SAFE_POINT is not set for helper calls,
+ // as some helper calls are neither interruptible nor hijackable.
+ // When we can determine this, then we can set BBF_GC_SAFE_POINT for
+ // those helpers too.
+
+ noway_assert(topBB->bbNum <= botBB->bbNum);
+
+ // We can always check topBB and botBB for any gc safe points and early out
+
+ if ((topBB->bbFlags | botBB->bbFlags) & BBF_GC_SAFE_POINT)
+ {
+ return false;
+ }
+
+ // Otherwise we will need to rely upon the dominator sets
+
+ if (!fgDomsComputed)
+ {
+ // return a conservative answer of true when we don't have the dominator sets
+ return true;
+ }
+
+ BasicBlock* curBB = topBB;
+ for (;;)
+ {
+ noway_assert(curBB);
+
+ // If we added a loop pre-header block then we will
+ // have a bbNum greater than fgLastBB, and we won't have
+ // any dominator information about this block, so skip it.
+ //
+ if (curBB->bbNum <= fgLastBB->bbNum)
+ {
+ noway_assert(curBB->bbNum <= botBB->bbNum);
+
+ // Does this block contain a gc safe point?
+
+ if (curBB->bbFlags & BBF_GC_SAFE_POINT)
+ {
+ // Will this block always execute on the way to botBB ?
+ //
+ // Since we are checking every block in [topBB .. botBB] and we are using
+ // a lexical definition of a loop.
+ // (all that we know is that is that botBB is a back-edge to topBB)
+ // Thus while walking blocks in this range we may encounter some blocks
+ // that are not really part of the loop, and so we need to perform
+ // some additional checks:
+ //
+ // We will check that the current 'curBB' is reachable from 'topBB'
+ // and that it dominates the block containing the back-edge 'botBB'
+ // When both of these are true then we know that the gcsafe point in 'curBB'
+ // will be encountered in the loop and we can return false
+ //
+ if (fgDominate(curBB, botBB) && fgReachable(topBB, curBB))
+ {
+ return false;
+ }
+ }
+ else
+ {
+ // If we've reached the destination block, then we're done
+
+ if (curBB == botBB)
+ {
+ break;
+ }
+ }
+ }
+
+ curBB = curBB->bbNext;
+ }
+
+ // If we didn't find any blocks that contained a gc safe point and
+ // also met the fgDominate and fgReachable criteria then we must return true
+ //
+ return true;
+}
+
+/*****************************************************************************
+ *
+ * Find the loop termination test at the bottom of the loop
+ */
+
+static GenTreePtr optFindLoopTermTest(BasicBlock* bottom)
+{
+ GenTreePtr testt = bottom->bbTreeList;
+
+ assert(testt && testt->gtOper == GT_STMT);
+
+ GenTreePtr result = testt->gtPrev;
+
+#ifdef DEBUG
+ while (testt->gtNext)
+ {
+ testt = testt->gtNext;
+ }
+
+ assert(testt == result);
+#endif
+
+ return result;
+}
+
+/*****************************************************************************
+ * Optimize "jmp C; do{} C:while(cond);" loops to "if (cond){ do{}while(cond}; }"
+ */
+
+void Compiler::fgOptWhileLoop(BasicBlock* block)
+{
+ noway_assert(!opts.MinOpts() && !opts.compDbgCode);
+ noway_assert(compCodeOpt() != SMALL_CODE);
+
+ /*
+ Optimize while loops into do { } while loop
+ Our loop hoisting logic requires do { } while loops.
+ Specifically, we're looking for the following case:
+
+ ...
+ jmp test
+ loop:
+ ...
+ ...
+ test:
+ cond
+ jtrue loop
+
+ If we find this, and the condition is simple enough, we change
+ the loop to the following:
+
+ ...
+ cond
+ jfalse done
+ // else fall-through
+ loop:
+ ...
+ ...
+ test:
+ cond
+ jtrue loop
+ done:
+
+ */
+
+ /* Does the BB end with an unconditional jump? */
+
+ if (block->bbJumpKind != BBJ_ALWAYS || (block->bbFlags & BBF_KEEP_BBJ_ALWAYS))
+ { // It can't be one of the ones we use for our exception magic
+ return;
+ }
+
+ // It has to be a forward jump
+ // TODO-CQ: Check if we can also optimize the backwards jump as well.
+ //
+ if (fgIsForwardBranch(block) == false)
+ {
+ return;
+ }
+
+ // Get hold of the jump target
+ BasicBlock* bTest = block->bbJumpDest;
+
+ // Does the block consist of 'jtrue(cond) block' ?
+ if (bTest->bbJumpKind != BBJ_COND)
+ {
+ return;
+ }
+
+ // bTest must be a backwards jump to block->bbNext
+ if (bTest->bbJumpDest != block->bbNext)
+ {
+ return;
+ }
+
+ // Since test is a BBJ_COND it will have a bbNext
+ noway_assert(bTest->bbNext);
+
+ // 'block' must be in the same try region as the condition, since we're going to insert
+ // a duplicated condition in 'block', and the condition might include exception throwing code.
+ if (!BasicBlock::sameTryRegion(block, bTest))
+ {
+ return;
+ }
+
+ // We're going to change 'block' to branch to bTest->bbNext, so that also better be in the
+ // same try region (or no try region) to avoid generating illegal flow.
+ BasicBlock* bTestNext = bTest->bbNext;
+ if (bTestNext->hasTryIndex() && !BasicBlock::sameTryRegion(block, bTestNext))
+ {
+ return;
+ }
+
+ GenTreePtr condStmt = optFindLoopTermTest(bTest);
+
+ // bTest must only contain only a jtrue with no other stmts, we will only clone
+ // the conditional, so any other statements will not get cloned
+ // TODO-CQ: consider cloning the whole bTest block as inserting it after block.
+ //
+ if (bTest->bbTreeList != condStmt)
+ {
+ return;
+ }
+
+ /* Get to the condition node from the statement tree */
+
+ noway_assert(condStmt->gtOper == GT_STMT);
+
+ GenTreePtr condTree = condStmt->gtStmt.gtStmtExpr;
+ noway_assert(condTree->gtOper == GT_JTRUE);
+
+ condTree = condTree->gtOp.gtOp1;
+
+ // The condTree has to be a RelOp comparison
+ // TODO-CQ: Check if we can also optimize the backwards jump as well.
+ //
+ if (condTree->OperIsCompare() == false)
+ {
+ return;
+ }
+
+ /* We call gtPrepareCost to measure the cost of duplicating this tree */
+
+ gtPrepareCost(condTree);
+ unsigned estDupCostSz = condTree->gtCostSz;
+
+ double loopIterations = (double)BB_LOOP_WEIGHT;
+
+ bool allProfileWeightsAreValid = false;
+ BasicBlock::weight_t weightBlock = block->bbWeight;
+ BasicBlock::weight_t weightTest = bTest->bbWeight;
+ BasicBlock::weight_t weightNext = block->bbNext->bbWeight;
+
+ // If we have profile data then we calculate the number of time
+ // the loop will iterate into loopIterations
+ if (fgIsUsingProfileWeights())
+ {
+ // Only rely upon the profile weight when all three of these blocks
+ // have good profile weights
+ if ((block->bbFlags & BBF_PROF_WEIGHT) && (bTest->bbFlags & BBF_PROF_WEIGHT) &&
+ (block->bbNext->bbFlags & BBF_PROF_WEIGHT))
+ {
+ allProfileWeightsAreValid = true;
+
+ // If this while loop never iterates then don't bother transforming
+ if (weightNext == 0)
+ {
+ return;
+ }
+
+ // with (weighNext > 0) we should also have (weightTest >= weightBlock)
+ // if the profile weights are all valid.
+ //
+ // weightNext is the number of time this loop iterates
+ // weightBlock is the number of times that we enter the while loop
+ // loopIterations is the average number of times that this loop iterates
+ //
+ if (weightTest >= weightBlock)
+ {
+ loopIterations = (double)block->bbNext->bbWeight / (double)block->bbWeight;
+ }
+ }
+ }
+
+ unsigned maxDupCostSz = 32;
+
+ // optFastCodeOrBlendedLoop(bTest->bbWeight) does not work here as we have not
+ // set loop weights yet
+ if ((compCodeOpt() == FAST_CODE) || compStressCompile(STRESS_DO_WHILE_LOOPS, 30))
+ {
+ maxDupCostSz *= 4;
+ }
+
+ // If this loop iterates a lot then raise the maxDupCost
+ if (loopIterations >= 12.0)
+ {
+ maxDupCostSz *= 2;
+ }
+ if (loopIterations >= 96.0)
+ {
+ maxDupCostSz *= 2;
+ }
+
+ // If the loop condition has a shared static helper, we really want this loop converted
+ // as not converting the loop will disable loop hoisting, meaning the shared helper will
+ // be executed on every loop iteration.
+ int countOfHelpers = 0;
+ fgWalkTreePre(&condTree, CountSharedStaticHelper, &countOfHelpers);
+
+ if (countOfHelpers > 0 && compCodeOpt() != SMALL_CODE)
+ {
+ maxDupCostSz += 24 * min(countOfHelpers, (int)(loopIterations + 1.5));
+ }
+
+ // If the compare has too high cost then we don't want to dup
+
+ bool costIsTooHigh = (estDupCostSz > maxDupCostSz);
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nDuplication of loop condition [%06u] is %s, because the cost of duplication (%i) is %s than %i,"
+ "\n loopIterations = %7.3f, countOfHelpers = %d, validProfileWeights = %s\n",
+ condTree->gtTreeID, costIsTooHigh ? "not done" : "performed", estDupCostSz,
+ costIsTooHigh ? "greater" : "less or equal", maxDupCostSz, loopIterations, countOfHelpers,
+ allProfileWeightsAreValid ? "true" : "false");
+ }
+#endif
+
+ if (costIsTooHigh)
+ {
+ return;
+ }
+
+ /* Looks good - duplicate the condition test */
+
+ condTree->gtFlags |= GTF_RELOP_ZTT;
+
+ condTree = gtCloneExpr(condTree);
+ gtReverseCond(condTree);
+
+ // Make sure clone expr copied the flag
+ assert(condTree->gtFlags & GTF_RELOP_ZTT);
+
+ condTree = gtNewOperNode(GT_JTRUE, TYP_VOID, condTree);
+
+ /* Create a statement entry out of the condition and
+ append the condition test at the end of 'block' */
+
+ GenTreePtr copyOfCondStmt = fgInsertStmtAtEnd(block, condTree);
+
+ copyOfCondStmt->gtFlags |= GTF_STMT_CMPADD;
+
+#ifdef DEBUGGING_SUPPORT
+ if (opts.compDbgInfo)
+ {
+ copyOfCondStmt->gtStmt.gtStmtILoffsx = condStmt->gtStmt.gtStmtILoffsx;
+ }
+#endif
+
+ // Flag the block that received the copy as potentially having an array/vtable
+ // reference if the block copied from did; this is a conservative guess.
+ if (auto copyFlags = bTest->bbFlags & (BBF_HAS_VTABREF | BBF_HAS_IDX_LEN))
+ {
+ block->bbFlags |= copyFlags;
+ }
+
+ // If we have profile data for all blocks and we know that we are cloning the
+ // bTest block into block and thus changing the control flow from block so
+ // that it no longer goes directly to bTest anymore, we have to adjust the
+ // weight of bTest by subtracting out the weight of block.
+ //
+ if (allProfileWeightsAreValid)
+ {
+ //
+ // Some additional sanity checks before adjusting the weight of bTest
+ //
+ if ((weightNext > 0) && (weightTest >= weightBlock) && (weightTest != BB_MAX_WEIGHT))
+ {
+ // Get the two edge that flow out of bTest
+ flowList* edgeToNext = fgGetPredForBlock(bTest->bbNext, bTest);
+ flowList* edgeToJump = fgGetPredForBlock(bTest->bbJumpDest, bTest);
+
+ // Calculate the new weight for block bTest
+
+ BasicBlock::weight_t newWeightTest =
+ (weightTest > weightBlock) ? (weightTest - weightBlock) : BB_ZERO_WEIGHT;
+ bTest->bbWeight = newWeightTest;
+
+ if (newWeightTest == BB_ZERO_WEIGHT)
+ {
+ bTest->bbFlags |= BBF_RUN_RARELY;
+ // All out edge weights are set to zero
+ edgeToNext->flEdgeWeightMin = BB_ZERO_WEIGHT;
+ edgeToNext->flEdgeWeightMax = BB_ZERO_WEIGHT;
+ edgeToJump->flEdgeWeightMin = BB_ZERO_WEIGHT;
+ edgeToJump->flEdgeWeightMax = BB_ZERO_WEIGHT;
+ }
+ else
+ {
+ // Update the our edge weights
+ edgeToNext->flEdgeWeightMin = BB_ZERO_WEIGHT;
+ edgeToNext->flEdgeWeightMax = min(edgeToNext->flEdgeWeightMax, newWeightTest);
+ edgeToJump->flEdgeWeightMin = BB_ZERO_WEIGHT;
+ edgeToJump->flEdgeWeightMax = min(edgeToJump->flEdgeWeightMax, newWeightTest);
+ }
+ }
+ }
+
+ /* Change the block to end with a conditional jump */
+
+ block->bbJumpKind = BBJ_COND;
+ block->bbJumpDest = bTest->bbNext;
+
+ /* Mark the jump dest block as being a jump target */
+ block->bbJumpDest->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL;
+
+ /* Update bbRefs and bbPreds for 'block->bbNext' 'bTest' and 'bTest->bbNext' */
+
+ fgAddRefPred(block->bbNext, block);
+
+ fgRemoveRefPred(bTest, block);
+ fgAddRefPred(bTest->bbNext, block);
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nDuplicating loop condition in BB%02u for loop (BB%02u - BB%02u)", block->bbNum, block->bbNext->bbNum,
+ bTest->bbNum);
+ printf("\nEstimated code size expansion is %d\n ", estDupCostSz);
+
+ gtDispTree(copyOfCondStmt);
+ }
+
+#endif
+}
+
+/*****************************************************************************
+ *
+ * Optimize the BasicBlock layout of the method
+ */
+
+void Compiler::optOptimizeLayout()
+{
+ noway_assert(!opts.MinOpts() && !opts.compDbgCode);
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("*************** In optOptimizeLayout()\n");
+ fgDispHandlerTab();
+ }
+
+ /* Check that the flowgraph data (bbNum, bbRefs, bbPreds) is up-to-date */
+ fgDebugCheckBBlist();
+#endif
+
+ noway_assert(fgModified == false);
+
+ for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
+ {
+ /* Make sure the appropriate fields are initialized */
+
+ if (block->bbWeight == BB_ZERO_WEIGHT)
+ {
+ /* Zero weighted block can't have a LOOP_HEAD flag */
+ noway_assert(block->isLoopHead() == false);
+ continue;
+ }
+
+ assert(block->bbLoopNum == 0);
+
+ if (compCodeOpt() != SMALL_CODE)
+ {
+ /* Optimize "while(cond){}" loops to "cond; do{}while(cond);" */
+
+ fgOptWhileLoop(block);
+ }
+ }
+
+ if (fgModified)
+ {
+ // Recompute the edge weight if we have modified the flow graph in fgOptWhileLoop
+ fgComputeEdgeWeights();
+ }
+
+ fgUpdateFlowGraph(true);
+ fgReorderBlocks();
+ fgUpdateFlowGraph();
+}
+
+/*****************************************************************************
+ *
+ * Perform loop inversion, find and classify natural loops
+ */
+
+void Compiler::optOptimizeLoops()
+{
+ noway_assert(!opts.MinOpts() && !opts.compDbgCode);
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("*************** In optOptimizeLoops()\n");
+ }
+#endif
+
+ optSetBlockWeights();
+
+ /* Were there any loops in the flow graph? */
+
+ if (fgHasLoops)
+ {
+ /* now that we have dominator information we can find loops */
+
+ optFindNaturalLoops();
+
+ unsigned loopNum = 0;
+
+ /* Iterate over the flow graph, marking all loops */
+
+ /* We will use the following terminology:
+ * top - the first basic block in the loop (i.e. the head of the backward edge)
+ * bottom - the last block in the loop (i.e. the block from which we jump to the top)
+ * lastBottom - used when we have multiple back-edges to the same top
+ */
+
+ flowList* pred;
+
+ BasicBlock* top;
+
+ for (top = fgFirstBB; top; top = top->bbNext)
+ {
+ BasicBlock* foundBottom = nullptr;
+
+ for (pred = top->bbPreds; pred; pred = pred->flNext)
+ {
+ /* Is this a loop candidate? - We look for "back edges" */
+
+ BasicBlock* bottom = pred->flBlock;
+
+ /* is this a backward edge? (from BOTTOM to TOP) */
+
+ if (top->bbNum > bottom->bbNum)
+ {
+ continue;
+ }
+
+ /* 'top' also must have the BBF_LOOP_HEAD flag set */
+
+ if (top->isLoopHead() == false)
+ {
+ continue;
+ }
+
+ /* We only consider back-edges that are BBJ_COND or BBJ_ALWAYS for loops */
+
+ if ((bottom->bbJumpKind != BBJ_COND) && (bottom->bbJumpKind != BBJ_ALWAYS))
+ {
+ continue;
+ }
+
+ /* the top block must be able to reach the bottom block */
+ if (!fgReachable(top, bottom))
+ {
+ continue;
+ }
+
+ /* Found a new loop, record the longest backedge in foundBottom */
+
+ if ((foundBottom == nullptr) || (bottom->bbNum > foundBottom->bbNum))
+ {
+ foundBottom = bottom;
+ }
+ }
+
+ if (foundBottom)
+ {
+ loopNum++;
+#ifdef DEBUG
+ /* Mark the loop header as such */
+ assert(FitsIn<unsigned char>(loopNum));
+ top->bbLoopNum = (unsigned char)loopNum;
+#endif
+
+ /* Mark all blocks between 'top' and 'bottom' */
+
+ optMarkLoopBlocks(top, foundBottom, false);
+ }
+
+ // We track at most 255 loops
+ if (loopNum == 255)
+ {
+#if COUNT_LOOPS
+ totalUnnatLoopOverflows++;
+#endif
+ break;
+ }
+ }
+
+#if COUNT_LOOPS
+ totalUnnatLoopCount += loopNum;
+#endif
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ if (loopNum > 0)
+ {
+ printf("\nFound a total of %d loops.", loopNum);
+ printf("\nAfter loop weight marking:\n");
+ fgDispBasicBlocks();
+ printf("\n");
+ }
+ }
+#endif
+ optLoopsMarked = true;
+ }
+}
+
+//------------------------------------------------------------------------
+// optDeriveLoopCloningConditions: Derive loop cloning conditions.
+//
+// Arguments:
+// loopNum - the current loop index for which conditions are derived.
+// context - data structure where all loop cloning info is kept.
+//
+// Return Value:
+// "false" if conditions cannot be obtained. "true" otherwise.
+// The cloning conditions are updated in the "conditions"[loopNum] field
+// of the "context" parameter.
+//
+// Operation:
+// Inspect the loop cloning optimization candidates and populate the conditions necessary
+// for each optimization candidate. Checks if the loop stride is "> 0" if the loop
+// condition is "less than". If the initializer is "var" init then adds condition
+// "var >= 0", and if the loop is var limit then, "var >= 0" and "var <= a.len"
+// are added to "context". These conditions are checked in the pre-header block
+// and the cloning choice is made.
+//
+// Assumption:
+// Callers should assume AND operation is used i.e., if all conditions are
+// true, then take the fast path.
+//
+bool Compiler::optDeriveLoopCloningConditions(unsigned loopNum, LoopCloneContext* context)
+{
+ JITDUMP("------------------------------------------------------------\n");
+ JITDUMP("Deriving cloning conditions for L%02u\n", loopNum);
+
+ LoopDsc* loop = &optLoopTable[loopNum];
+ ExpandArrayStack<LcOptInfo*>* optInfos = context->GetLoopOptInfo(loopNum);
+
+ if (loop->lpTestOper() == GT_LT)
+ {
+ // Stride conditions
+ if (loop->lpIterConst() <= 0)
+ {
+ JITDUMP("> Stride %d is invalid\n", loop->lpIterConst());
+ return false;
+ }
+
+ // Init conditions
+ if (loop->lpFlags & LPFLG_CONST_INIT)
+ {
+ // Only allowing const init at this time.
+ if (loop->lpConstInit < 0)
+ {
+ JITDUMP("> Init %d is invalid\n", loop->lpConstInit);
+ return false;
+ }
+ }
+ else if (loop->lpFlags & LPFLG_VAR_INIT)
+ {
+ // limitVar >= 0
+ LC_Condition geZero(GT_GE, LC_Expr(LC_Ident(loop->lpVarInit, LC_Ident::Var)),
+ LC_Expr(LC_Ident(0, LC_Ident::Const)));
+ context->EnsureConditions(loopNum)->Push(geZero);
+ }
+ else
+ {
+ JITDUMP("> Not variable init\n");
+ return false;
+ }
+
+ // Limit Conditions
+ LC_Ident ident;
+ if (loop->lpFlags & LPFLG_CONST_LIMIT)
+ {
+ int limit = loop->lpConstLimit();
+ if (limit < 0)
+ {
+ JITDUMP("> limit %d is invalid\n", limit);
+ return false;
+ }
+ ident = LC_Ident(limit, LC_Ident::Const);
+ }
+ else if (loop->lpFlags & LPFLG_VAR_LIMIT)
+ {
+ unsigned limitLcl = loop->lpVarLimit();
+ ident = LC_Ident(limitLcl, LC_Ident::Var);
+
+ LC_Condition geZero(GT_GE, LC_Expr(ident), LC_Expr(LC_Ident(0, LC_Ident::Const)));
+
+ context->EnsureConditions(loopNum)->Push(geZero);
+ }
+ else if (loop->lpFlags & LPFLG_ARRLEN_LIMIT)
+ {
+ ArrIndex* index = new (getAllocator()) ArrIndex(getAllocator());
+ if (!loop->lpArrLenLimit(this, index))
+ {
+ JITDUMP("> ArrLen not matching");
+ return false;
+ }
+ ident = LC_Ident(LC_Array(LC_Array::Jagged, index, LC_Array::ArrLen));
+
+ // Ensure that this array must be dereference-able, before executing the actual condition.
+ LC_Array array(LC_Array::Jagged, index, LC_Array::None);
+ context->EnsureDerefs(loopNum)->Push(array);
+ }
+ else
+ {
+ JITDUMP("> Undetected limit\n");
+ return false;
+ }
+
+ for (unsigned i = 0; i < optInfos->Size(); ++i)
+ {
+ LcOptInfo* optInfo = optInfos->GetRef(i);
+ switch (optInfo->GetOptType())
+ {
+ case LcOptInfo::LcJaggedArray:
+ {
+ // limit <= arrLen
+ LcJaggedArrayOptInfo* arrIndexInfo = optInfo->AsLcJaggedArrayOptInfo();
+ LC_Array arrLen(LC_Array::Jagged, &arrIndexInfo->arrIndex, arrIndexInfo->dim, LC_Array::ArrLen);
+ LC_Ident arrLenIdent = LC_Ident(arrLen);
+
+ LC_Condition cond(GT_LE, LC_Expr(ident), LC_Expr(arrLenIdent));
+ context->EnsureConditions(loopNum)->Push(cond);
+
+ // Ensure that this array must be dereference-able, before executing the actual condition.
+ LC_Array array(LC_Array::Jagged, &arrIndexInfo->arrIndex, arrIndexInfo->dim, LC_Array::None);
+ context->EnsureDerefs(loopNum)->Push(array);
+ }
+ break;
+ case LcOptInfo::LcMdArray:
+ {
+ // limit <= mdArrLen
+ LcMdArrayOptInfo* mdArrInfo = optInfo->AsLcMdArrayOptInfo();
+ LC_Condition cond(GT_LE, LC_Expr(ident),
+ LC_Expr(LC_Ident(LC_Array(LC_Array::MdArray,
+ mdArrInfo->GetArrIndexForDim(getAllocator()),
+ mdArrInfo->dim, LC_Array::None))));
+ context->EnsureConditions(loopNum)->Push(cond);
+ }
+ break;
+
+ default:
+ JITDUMP("Unknown opt\n");
+ return false;
+ }
+ }
+ JITDUMP("Conditions: (");
+ DBEXEC(verbose, context->PrintConditions(loopNum));
+ JITDUMP(")\n");
+ return true;
+ }
+ return false;
+}
+
+//------------------------------------------------------------------------------------
+// optComputeDerefConditions: Derive loop cloning conditions for dereferencing arrays.
+//
+// Arguments:
+// loopNum - the current loop index for which conditions are derived.
+// context - data structure where all loop cloning info is kept.
+//
+// Return Value:
+// "false" if conditions cannot be obtained. "true" otherwise.
+// The deref conditions are updated in the "derefConditions"[loopNum] field
+// of the "context" parameter.
+//
+// Definition of Deref Conditions:
+// To be able to check for the loop cloning condition that (limitVar <= a.len)
+// we should first be able to dereference "a". i.e., "a" is non-null.
+//
+// Example:
+//
+// for (i in 0..n)
+// for (j in 0..n)
+// for (k in 0..n) // Inner most loop is being cloned. Cloning needs to check if
+// // (n <= a[i][j].len) and other safer conditions to take the fast path
+// a[i][j][k] = 0;
+//
+// Now, we want to deref a[i][j] to invoke length operator on it to perform the cloning fast path check.
+// This involves deref of (a), (a[i]), (a[i][j]), therefore, the following should first
+// be true to do the deref.
+//
+// (a != null) && (i < a.len) && (a[i] != null) && (j < a[i].len) && (a[i][j] != null) --> (1)
+//
+// Note the short circuiting AND. Implication: these conditions should be performed in separate
+// blocks each of which will branch to slow path if the condition evaluates to false.
+//
+// Now, imagine a situation where we have
+// a[x][y][k] = 20 and a[i][j][k] = 0
+// also in the inner most loop where x, y are parameters, then our conditions will have
+// to include
+// (x < a.len) &&
+// (y < a[x].len)
+// in addition to the above conditions (1) to get rid of bounds check on index 'k'
+//
+// But these conditions can be checked together with conditions
+// (i < a.len) without a need for a separate block. In summary, the conditions will be:
+//
+// (a != null) &&
+// ((i < a.len) & (x < a.len)) && <-- Note the bitwise AND here.
+// (a[i] != null & a[x] != null) && <-- Note the bitwise AND here.
+// (j < a[i].len & y < a[x].len) && <-- Note the bitwise AND here.
+// (a[i][j] != null & a[x][y] != null) <-- Note the bitwise AND here.
+//
+// This naturally yields a tree style pattern, where the nodes of the tree are
+// the array and indices respectively.
+//
+// Example:
+// a => {
+// i => {
+// j => {
+// k => {}
+// }
+// },
+// x => {
+// y => {
+// k => {}
+// }
+// }
+// }
+//
+// Notice that the variables in the same levels can have their conditions combined in the
+// same block with a bitwise AND. Whereas, the conditions in consecutive levels will be
+// combined with a short-circuiting AND (i.e., different basic blocks).
+//
+// Operation:
+// Construct a tree of array indices and the array which will generate the optimal
+// conditions for loop cloning.
+//
+// a[i][j][k], b[i] and a[i][y][k] are the occurrences in the loop. Then, the tree should be:
+//
+// a => {
+// i => {
+// j => {
+// k => {}
+// },
+// y => {
+// k => {}
+// },
+// }
+// },
+// b => {
+// i => {}
+// }
+// In this method, we will construct such a tree by descending depth first into the array
+// index operation and forming a tree structure as we encounter the array or the index variables.
+//
+// This tree structure will then be used to generate conditions like below:
+// (a != null) & (b != null) && // from the first level of the tree.
+//
+// (i < a.len) & (i < b.len) && // from the second level of the tree. Levels can be combined.
+// (a[i] != null) & (b[i] != null) && // from the second level of the tree.
+//
+// (j < a[i].len) & (y < a[i].len) && // from the third level.
+// (a[i][j] != null) & (a[i][y] != null) && // from the third level.
+//
+// and so on.
+//
+//
+bool Compiler::optComputeDerefConditions(unsigned loopNum, LoopCloneContext* context)
+{
+ ExpandArrayStack<LC_Deref*> nodes(getAllocator());
+ int maxRank = -1;
+
+ // Get the dereference-able arrays.
+ ExpandArrayStack<LC_Array>* deref = context->EnsureDerefs(loopNum);
+
+ // For each array in the dereference list, construct a tree,
+ // where the nodes are array and index variables and an edge 'u-v'
+ // exists if a node 'v' indexes node 'u' directly as in u[v] or an edge
+ // 'u-v-w' transitively if u[v][w] occurs.
+ for (unsigned i = 0; i < deref->Size(); ++i)
+ {
+ LC_Array& array = (*deref)[i];
+
+ // First populate the array base variable.
+ LC_Deref* node = LC_Deref::Find(&nodes, array.arrIndex->arrLcl);
+ if (node == nullptr)
+ {
+ node = new (getAllocator()) LC_Deref(array, 0 /*level*/);
+ nodes.Push(node);
+ }
+
+ // For each dimension (level) for the array, populate the tree with the variable
+ // from that dimension.
+ unsigned rank = (unsigned)array.GetDimRank();
+ for (unsigned i = 0; i < rank; ++i)
+ {
+ node->EnsureChildren(getAllocator());
+ LC_Deref* tmp = node->Find(array.arrIndex->indLcls[i]);
+ if (tmp == nullptr)
+ {
+ tmp = new (getAllocator()) LC_Deref(array, node->level + 1);
+ node->children->Push(tmp);
+ }
+
+ // Descend one level down.
+ node = tmp;
+ }
+
+ // Keep the maxRank of all array dereferences.
+ maxRank = max((int)rank, maxRank);
+ }
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ for (unsigned i = 0; i < nodes.Size(); ++i)
+ {
+ if (i != 0)
+ {
+ printf(",");
+ }
+ nodes[i]->Print();
+ printf("\n");
+ }
+ }
+#endif
+
+ if (maxRank == -1)
+ {
+ return false;
+ }
+
+ // First level will always yield the null-check, since it is made of the array base variables.
+ // All other levels (dimensions) will yield two conditions ex: (i < a.length && a[i] != null)
+ // So add 1 after rank * 2.
+ unsigned condBlocks = (unsigned)maxRank * 2 + 1;
+
+ // Heuristic to not create too many blocks;
+ if (condBlocks > 4)
+ {
+ return false;
+ }
+
+ // Derive conditions into an 'array of level x array of conditions' i.e., levelCond[levels][conds]
+ ExpandArrayStack<ExpandArrayStack<LC_Condition>*>* levelCond = context->EnsureBlockConditions(loopNum, condBlocks);
+ for (unsigned i = 0; i < nodes.Size(); ++i)
+ {
+ nodes[i]->DeriveLevelConditions(levelCond);
+ }
+
+ DBEXEC(verbose, context->PrintBlockConditions(loopNum));
+ return true;
+}
+
+#ifdef DEBUG
+//----------------------------------------------------------------------------
+// optDebugLogLoopCloning: Insert a call to jithelper that prints a message.
+//
+// Arguments:
+// block - the block in which the helper call needs to be inserted.
+// insertBefore - the tree before which the helper call will be inserted.
+//
+void Compiler::optDebugLogLoopCloning(BasicBlock* block, GenTreePtr insertBefore)
+{
+ if (JitConfig.JitDebugLogLoopCloning() == 0)
+ {
+ return;
+ }
+ GenTreePtr logCall = gtNewHelperCallNode(CORINFO_HELP_DEBUG_LOG_LOOP_CLONING, TYP_VOID);
+ GenTreePtr stmt = fgNewStmtFromTree(logCall);
+ fgInsertStmtBefore(block, insertBefore, stmt);
+ fgMorphBlockStmt(block, stmt DEBUGARG("Debug log loop cloning"));
+}
+#endif
+
+//------------------------------------------------------------------------
+// optPerformStaticOptimizations: Perform the optimizations for the optimization
+// candidates gathered during the cloning phase.
+//
+// Arguments:
+// loopNum - the current loop index for which the optimizations are performed.
+// context - data structure where all loop cloning info is kept.
+// dynamicPath - If true, the optimization is performed in the fast path among the
+// cloned loops. If false, it means this is the only path (i.e.,
+// there is no slow path.)
+//
+// Operation:
+// Perform the optimizations on the fast path i.e., the path in which the
+// optimization candidates were collected at the time of identifying them.
+// The candidates store all the information necessary (the tree/stmt/block
+// they are from) to perform the optimization.
+//
+// Assumption:
+// The unoptimized path is either already cloned when this method is called or
+// there is no unoptimized path (got eliminated statically.) So this method
+// performs the optimizations assuming that the path in which the candidates
+// were collected is the fast path in which the optimizations will be performed.
+//
+void Compiler::optPerformStaticOptimizations(unsigned loopNum, LoopCloneContext* context DEBUGARG(bool dynamicPath))
+{
+ ExpandArrayStack<LcOptInfo*>* optInfos = context->GetLoopOptInfo(loopNum);
+ for (unsigned i = 0; i < optInfos->Size(); ++i)
+ {
+ LcOptInfo* optInfo = optInfos->GetRef(i);
+ switch (optInfo->GetOptType())
+ {
+ case LcOptInfo::LcJaggedArray:
+ {
+ LcJaggedArrayOptInfo* arrIndexInfo = optInfo->AsLcJaggedArrayOptInfo();
+ compCurBB = arrIndexInfo->arrIndex.useBlock;
+ optRemoveRangeCheck(arrIndexInfo->arrIndex.bndsChks[arrIndexInfo->dim], arrIndexInfo->stmt, true,
+ GTF_ASG, true);
+ DBEXEC(dynamicPath, optDebugLogLoopCloning(arrIndexInfo->arrIndex.useBlock, arrIndexInfo->stmt));
+ }
+ break;
+ case LcOptInfo::LcMdArray:
+ // TODO-CQ: CLONE: Implement.
+ break;
+ default:
+ break;
+ }
+ }
+}
+
+//----------------------------------------------------------------------------
+// optCanCloneLoops: Use the environment flag to determine whether loop
+// cloning is allowed to be performed.
+//
+// Return Value:
+// Returns true in debug builds if COMPlus_JitCloneLoops flag is set.
+// Disabled for retail for now.
+//
+bool Compiler::optCanCloneLoops()
+{
+ // Enabled for retail builds now.
+ unsigned cloneLoopsFlag = 1;
+#ifdef DEBUG
+ cloneLoopsFlag = JitConfig.JitCloneLoops();
+#endif
+ return (cloneLoopsFlag != 0);
+}
+
+//----------------------------------------------------------------------------
+// optIsLoopClonable: Determine whether this loop can be cloned.
+//
+// Arguments:
+// loopInd loop index which needs to be checked if it can be cloned.
+//
+// Return Value:
+// Returns true if the loop can be cloned. If it returns false
+// prints a message in debug as why the loop can't be cloned.
+//
+bool Compiler::optIsLoopClonable(unsigned loopInd)
+{
+ // First, for now, make sure the loop doesn't have any embedded exception handling -- I don't want to tackle
+ // inserting new EH regions in the exception table yet.
+ BasicBlock* stopAt = optLoopTable[loopInd].lpBottom->bbNext;
+ unsigned loopRetCount = 0;
+ for (BasicBlock* blk = optLoopTable[loopInd].lpFirst; blk != stopAt; blk = blk->bbNext)
+ {
+ if (blk->bbJumpKind == BBJ_RETURN)
+ {
+ loopRetCount++;
+ }
+ if (bbIsTryBeg(blk))
+ {
+ JITDUMP("Loop cloning: rejecting loop %d in %s, because it has a try begin.\n", loopInd, info.compFullName);
+ return false;
+ }
+ }
+
+ // Is the entry block a handler or filter start? If so, then if we cloned, we could create a jump
+ // into the middle of a handler (to go to the cloned copy.) Reject.
+ if (bbIsHandlerBeg(optLoopTable[loopInd].lpEntry))
+ {
+ JITDUMP("Loop cloning: rejecting loop because entry block is a handler start.\n");
+ return false;
+ }
+
+ // If the head and entry are in different EH regions, reject.
+ if (!BasicBlock::sameEHRegion(optLoopTable[loopInd].lpHead, optLoopTable[loopInd].lpEntry))
+ {
+ JITDUMP("Loop cloning: rejecting loop because head and entry blocks are in different EH regions.\n");
+ return false;
+ }
+
+ // Is the first block after the last block of the loop a handler or filter start?
+ // Usually, we create a dummy block after the orginal loop, to skip over the loop clone
+ // and go to where the original loop did. That raises problems when we don't actually go to
+ // that block; this is one of those cases. This could be fixed fairly easily; for example,
+ // we could add a dummy nop block after the (cloned) loop bottom, in the same handler scope as the
+ // loop. This is just a corner to cut to get this working faster.
+ BasicBlock* bbAfterLoop = optLoopTable[loopInd].lpBottom->bbNext;
+ if (bbAfterLoop != nullptr && bbIsHandlerBeg(bbAfterLoop))
+ {
+ JITDUMP("Loop cloning: rejecting loop because next block after bottom is a handler start.\n");
+ return false;
+ }
+
+ // We've previously made a decision whether to have separate return epilogs, or branch to one.
+ // There's a GCInfo limitation in the x86 case, so that there can be no more than 4 separate epilogs.
+ // (I thought this was x86-specific, but it's not if-d. On other architectures, the decision should be made as a
+ // heuristic tradeoff; perhaps we're just choosing to live with 4 as the limit.)
+ if (fgReturnCount + loopRetCount > 4)
+ {
+ JITDUMP("Loop cloning: rejecting loop because it has %d returns; if added to previously-existing %d returns, "
+ "would exceed the limit of 4.\n",
+ loopRetCount, fgReturnCount);
+ return false;
+ }
+
+ // Otherwise, we're going to add those return blocks.
+ fgReturnCount += loopRetCount;
+
+ return true;
+}
+
+/*****************************************************************************
+ *
+ * Identify loop cloning opportunities, derive loop cloning conditions,
+ * perform loop cloning, use the derived conditions to choose which
+ * path to take.
+ */
+void Compiler::optCloneLoops()
+{
+ JITDUMP("\n*************** In optCloneLoops()\n");
+ if (optLoopCount == 0 || !optCanCloneLoops())
+ {
+ return;
+ }
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Blocks/Trees at start of phase\n");
+ fgDispBasicBlocks(true);
+ }
+#endif
+
+ LoopCloneContext context(optLoopCount, getAllocator());
+
+ // Obtain array optimization candidates in the context.
+ optObtainLoopCloningOpts(&context);
+
+ // For each loop, derive cloning conditions for the optimization candidates.
+ for (unsigned i = 0; i < optLoopCount; ++i)
+ {
+ ExpandArrayStack<LcOptInfo*>* optInfos = context.GetLoopOptInfo(i);
+ if (optInfos == nullptr)
+ {
+ continue;
+ }
+
+ if (!optDeriveLoopCloningConditions(i, &context) || !optComputeDerefConditions(i, &context))
+ {
+ JITDUMP("> Conditions could not be obtained\n");
+ context.CancelLoopOptInfo(i);
+ }
+ else
+ {
+ bool allTrue = false;
+ bool anyFalse = false;
+ context.EvaluateConditions(i, &allTrue, &anyFalse DEBUGARG(verbose));
+ if (anyFalse)
+ {
+ context.CancelLoopOptInfo(i);
+ }
+ if (allTrue)
+ {
+ // Perform static optimizations on the fast path since we always
+ // have to take the cloned path.
+ optPerformStaticOptimizations(i, &context DEBUGARG(false));
+
+ // No need to clone.
+ context.CancelLoopOptInfo(i);
+ }
+ }
+ }
+
+#if 0
+ // The code in this #if has been useful in debugging loop cloning issues, by
+ // enabling selective enablement of the loop cloning optimization according to
+ // method hash.
+#ifdef DEBUG
+ unsigned methHash = info.compMethodHash();
+ char* lostr = getenv("loopclonehashlo");
+ unsigned methHashLo = 0;
+ if (lostr != NULL)
+ {
+ sscanf_s(lostr, "%x", &methHashLo);
+ // methHashLo = (unsigned(atoi(lostr)) << 2); // So we don't have to use negative numbers.
+ }
+ char* histr = getenv("loopclonehashhi");
+ unsigned methHashHi = UINT32_MAX;
+ if (histr != NULL)
+ {
+ sscanf_s(histr, "%x", &methHashHi);
+ // methHashHi = (unsigned(atoi(histr)) << 2); // So we don't have to use negative numbers.
+ }
+ if (methHash < methHashLo || methHash > methHashHi)
+ return;
+#endif
+#endif
+
+ for (unsigned i = 0; i < optLoopCount; ++i)
+ {
+ if (context.GetLoopOptInfo(i) != nullptr)
+ {
+ optLoopsCloned++;
+ context.OptimizeConditions(i DEBUGARG(verbose));
+ context.OptimizeBlockConditions(i DEBUGARG(verbose));
+ optCloneLoop(i, &context);
+ }
+ }
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nAfter loop cloning:\n");
+ fgDispBasicBlocks(/*dumpTrees*/ true);
+ }
+#endif
+}
+
+void Compiler::optCloneLoop(unsigned loopInd, LoopCloneContext* context)
+{
+ assert(loopInd < optLoopCount);
+
+ JITDUMP("\nCloning loop %d: [h: %d, f: %d, t: %d, e: %d, b: %d].\n", loopInd, optLoopTable[loopInd].lpHead->bbNum,
+ optLoopTable[loopInd].lpFirst->bbNum, optLoopTable[loopInd].lpTop->bbNum,
+ optLoopTable[loopInd].lpEntry->bbNum, optLoopTable[loopInd].lpBottom->bbNum);
+
+ // Determine the depth of the loop, so we can properly weight blocks added (outside the cloned loop blocks).
+ unsigned depth = optLoopDepth(loopInd);
+ unsigned ambientWeight = 1;
+ for (unsigned j = 0; j < depth; j++)
+ {
+ unsigned lastWeight = ambientWeight;
+ ambientWeight *= BB_LOOP_WEIGHT;
+ // If the multiplication overflowed, stick at max.
+ // (Strictly speaking, a multiplication could overflow and still have a result
+ // that is >= lastWeight...but if so, the original weight must be pretty large,
+ // and it got bigger, so that's OK.)
+ if (ambientWeight < lastWeight)
+ {
+ ambientWeight = BB_MAX_WEIGHT;
+ break;
+ }
+ }
+
+ // If we're in a non-natural loop, the ambient weight might be higher than we computed above.
+ // Be safe by taking the max with the head block's weight.
+ ambientWeight = max(ambientWeight, optLoopTable[loopInd].lpHead->bbWeight);
+
+ // This is the containing loop, if any -- to label any blocks we create that are outside
+ // the loop being cloned.
+ unsigned char ambientLoop = optLoopTable[loopInd].lpParent;
+
+ // First, make sure that the loop has a unique header block, creating an empty one if necessary.
+ optEnsureUniqueHead(loopInd, ambientWeight);
+
+ // We're going to make
+
+ // H --> E
+ // F
+ // T
+ // E
+ // B ?-> T
+ // X
+ //
+ // become
+ //
+ // H ?-> E2
+ // H2--> E (Optional; if E == T == F, let H fall through to F/T/E)
+ // F
+ // T
+ // E
+ // B ?-> T
+ // X2--> X
+ // F2
+ // T2
+ // E2
+ // B2 ?-> T2
+ // X
+
+ BasicBlock* h = optLoopTable[loopInd].lpHead;
+ if (h->bbJumpKind != BBJ_NONE && h->bbJumpKind != BBJ_ALWAYS)
+ {
+ // Make a new block to be the unique entry to the loop.
+ assert(h->bbJumpKind == BBJ_COND && h->bbNext == optLoopTable[loopInd].lpEntry);
+ BasicBlock* newH = fgNewBBafter(BBJ_NONE, h,
+ /*extendRegion*/ true);
+ newH->bbWeight = (newH->isRunRarely() ? 0 : ambientWeight);
+ BlockSetOps::Assign(this, newH->bbReach, h->bbReach);
+ // This is in the scope of a surrounding loop, if one exists -- the parent of the loop we're cloning.
+ newH->bbNatLoopNum = ambientLoop;
+ h = newH;
+ optUpdateLoopHead(loopInd, optLoopTable[loopInd].lpHead, h);
+ }
+
+ // First, make X2 after B, if necessary. (Not necessary if b is a BBJ_ALWAYS.)
+ // "newPred" will be the predecessor of the blocks of the cloned loop.
+ BasicBlock* b = optLoopTable[loopInd].lpBottom;
+ BasicBlock* newPred = b;
+ if (b->bbJumpKind != BBJ_ALWAYS)
+ {
+ BasicBlock* x = b->bbNext;
+ if (x != nullptr)
+ {
+ BasicBlock* x2 = fgNewBBafter(BBJ_ALWAYS, b, /*extendRegion*/ true);
+ x2->bbWeight = (x2->isRunRarely() ? 0 : ambientWeight);
+
+ // This is in the scope of a surrounding loop, if one exists -- the parent of the loop we're cloning.
+ x2->bbNatLoopNum = ambientLoop;
+
+ x2->bbJumpDest = x;
+ BlockSetOps::Assign(this, x2->bbReach, h->bbReach);
+ newPred = x2;
+ }
+ }
+
+ // Now we'll make "h2", after "h" to go to "e" -- unless the loop is a do-while,
+ // so that "h" already falls through to "e" (e == t == f).
+ BasicBlock* h2 = nullptr;
+ if (optLoopTable[loopInd].lpHead->bbNext != optLoopTable[loopInd].lpEntry)
+ {
+ BasicBlock* h2 = fgNewBBafter(BBJ_ALWAYS, optLoopTable[loopInd].lpHead,
+ /*extendRegion*/ true);
+ h2->bbWeight = (h2->isRunRarely() ? 0 : ambientWeight);
+
+ // This is in the scope of a surrounding loop, if one exists -- the parent of the loop we're cloning.
+ h2->bbNatLoopNum = ambientLoop;
+
+ h2->bbJumpDest = optLoopTable[loopInd].lpEntry;
+ optUpdateLoopHead(loopInd, optLoopTable[loopInd].lpHead, h2);
+ }
+
+ // Now we'll clone the blocks of the loop body.
+ BasicBlock* newFirst = nullptr;
+ BasicBlock* newBot = nullptr;
+
+ BlockToBlockMap* blockMap = new (getAllocator()) BlockToBlockMap(getAllocator());
+ for (BasicBlock* blk = optLoopTable[loopInd].lpFirst; blk != optLoopTable[loopInd].lpBottom->bbNext;
+ blk = blk->bbNext)
+ {
+ BasicBlock* newBlk = fgNewBBafter(blk->bbJumpKind, newPred,
+ /*extendRegion*/ true);
+
+ BasicBlock::CloneBlockState(this, newBlk, blk);
+ // TODO-Cleanup: The above clones the bbNatLoopNum, which is incorrect. Eventually, we should probably insert
+ // the cloned loop in the loop table. For now, however, we'll just make these blocks be part of the surrounding
+ // loop, if one exists -- the parent of the loop we're cloning.
+ newBlk->bbNatLoopNum = optLoopTable[loopInd].lpParent;
+
+ if (newFirst == nullptr)
+ {
+ newFirst = newBlk;
+ }
+ newBot = newBlk; // Continually overwrite to make sure we get the last one.
+ newPred = newBlk;
+ blockMap->Set(blk, newBlk);
+ }
+
+ // Perform the static optimizations on the fast path.
+ optPerformStaticOptimizations(loopInd, context DEBUGARG(true));
+
+ // Now go through the new blocks, remapping their jump targets within the loop.
+ for (BasicBlock* blk = optLoopTable[loopInd].lpFirst; blk != optLoopTable[loopInd].lpBottom->bbNext;
+ blk = blk->bbNext)
+ {
+
+ BasicBlock* newblk = nullptr;
+ bool b = blockMap->Lookup(blk, &newblk);
+ assert(b && newblk != nullptr);
+
+ assert(blk->bbJumpKind == newblk->bbJumpKind);
+
+ // First copy the jump destination(s) from "blk".
+ optCopyBlkDest(blk, newblk);
+
+ // Now redirect the new block according to "blockMap".
+ optRedirectBlock(newblk, blockMap);
+ }
+
+ assert((h->bbJumpKind == BBJ_NONE && (h->bbNext == h2 || h->bbNext == optLoopTable[loopInd].lpEntry)) ||
+ (h->bbJumpKind == BBJ_ALWAYS));
+
+ // If all the conditions are true, go to E2.
+ BasicBlock* e2 = nullptr;
+ bool foundIt = blockMap->Lookup(optLoopTable[loopInd].lpEntry, &e2);
+
+ h->bbJumpKind = BBJ_COND;
+
+ // We will create the following structure
+ //
+ // cond0 (in h) -?> cond1
+ // slow --> e2 (slow) always
+ // !cond1 -?> slow
+ // !cond2 -?> slow
+ // ...
+ // !condn -?> slow
+ // h2/entry (fast)
+ //
+ // We should always have block conditions, at the minimum, the array should be deref-able
+ assert(context->HasBlockConditions(loopInd));
+
+ // Create a unique header for the slow path.
+ BasicBlock* slowHead = fgNewBBafter(BBJ_ALWAYS, h, true);
+ slowHead->bbWeight = (h->isRunRarely() ? 0 : ambientWeight);
+ slowHead->bbNatLoopNum = ambientLoop;
+ slowHead->bbJumpDest = e2;
+
+ BasicBlock* condLast = optInsertLoopChoiceConditions(context, loopInd, h, slowHead);
+ condLast->bbJumpDest = slowHead;
+
+ // If h2 is present it is already the head or replace 'h' by 'condLast'.
+ if (h2 == nullptr)
+ {
+ optUpdateLoopHead(loopInd, optLoopTable[loopInd].lpHead, condLast);
+ }
+ assert(foundIt && e2 != nullptr);
+
+ fgUpdateChangedFlowGraph();
+}
+
+//--------------------------------------------------------------------------------------------------
+// optInsertLoopChoiceConditions - Insert the loop conditions for a loop between loop head and entry
+//
+// Arguments:
+// context loop cloning context variable
+// loopNum the loop index
+// head loop head for "loopNum"
+// slowHead the slow path loop head
+//
+// Return Values:
+// None.
+//
+// Operation:
+// Create the following structure.
+//
+// Note below that the cond0 is inverted in head i.e., if true jump to cond1. This is because
+// condn cannot jtrue to loop head h2. It has to be from a direct pred block.
+//
+// cond0 (in h) -?> cond1
+// slowHead --> e2 (slowHead) always
+// !cond1 -?> slowHead
+// !cond2 -?> slowHead
+// ...
+// !condn -?> slowHead
+// h2/entry (fast)
+//
+// Insert condition 0 in 'h' and create other condition blocks and insert conditions in them.
+//
+BasicBlock* Compiler::optInsertLoopChoiceConditions(LoopCloneContext* context,
+ unsigned loopNum,
+ BasicBlock* head,
+ BasicBlock* slowHead)
+{
+ JITDUMP("Inserting loop cloning conditions\n");
+ assert(context->HasBlockConditions(loopNum));
+
+ BasicBlock* curCond = head;
+ ExpandArrayStack<ExpandArrayStack<LC_Condition>*>* levelCond = context->GetBlockConditions(loopNum);
+ for (unsigned i = 0; i < levelCond->Size(); ++i)
+ {
+ bool isHeaderBlock = (curCond == head);
+
+ // Flip the condition if header block.
+ context->CondToStmtInBlock(this, *((*levelCond)[i]), curCond, isHeaderBlock);
+
+ // Create each condition block ensuring wiring between them.
+ BasicBlock* tmp = fgNewBBafter(BBJ_COND, isHeaderBlock ? slowHead : curCond, true);
+ curCond->bbJumpDest = isHeaderBlock ? tmp : slowHead;
+ curCond = tmp;
+
+ curCond->inheritWeight(head);
+ curCond->bbNatLoopNum = head->bbNatLoopNum;
+ JITDUMP("Created new block %02d for new level\n", curCond->bbNum);
+ }
+
+ // Finally insert cloning conditions after all deref conditions have been inserted.
+ context->CondToStmtInBlock(this, *(context->GetConditions(loopNum)), curCond, false);
+ return curCond;
+}
+
+void Compiler::optEnsureUniqueHead(unsigned loopInd, unsigned ambientWeight)
+{
+ BasicBlock* h = optLoopTable[loopInd].lpHead;
+ BasicBlock* t = optLoopTable[loopInd].lpTop;
+ BasicBlock* e = optLoopTable[loopInd].lpEntry;
+ BasicBlock* b = optLoopTable[loopInd].lpBottom;
+
+ // If "h" dominates the entry block, then it is the unique header.
+ if (fgDominate(h, e))
+ {
+ return;
+ }
+
+ // Otherwise, create a new empty header block, make it the pred of the entry block,
+ // and redirect the preds of the entry block to go to this.
+
+ BasicBlock* beforeTop = t->bbPrev;
+ // Make sure that the new block is in the same region as the loop.
+ // (We will only create loops that are entirely within a region.)
+ BasicBlock* h2 = fgNewBBafter(BBJ_ALWAYS, beforeTop, true);
+ // This is in the containing loop.
+ h2->bbNatLoopNum = optLoopTable[loopInd].lpParent;
+ h2->bbWeight = (h2->isRunRarely() ? 0 : ambientWeight);
+
+ // We don't care where it was put; splice it between beforeTop and top.
+ if (beforeTop->bbNext != h2)
+ {
+ h2->bbPrev->setNext(h2->bbNext); // Splice h2 out.
+ beforeTop->setNext(h2); // Splice h2 in, between beforeTop and t.
+ h2->setNext(t);
+ }
+
+ if (h2->bbNext != e)
+ {
+ h2->bbJumpKind = BBJ_ALWAYS;
+ h2->bbJumpDest = e;
+ }
+ BlockSetOps::Assign(this, h2->bbReach, e->bbReach);
+
+ // Redirect paths from preds of "e" to go to "h2" instead of "e".
+ BlockToBlockMap* blockMap = new (getAllocator()) BlockToBlockMap(getAllocator());
+ blockMap->Set(e, h2);
+
+ for (flowList* predEntry = e->bbPreds; predEntry; predEntry = predEntry->flNext)
+ {
+ BasicBlock* predBlock = predEntry->flBlock;
+
+ // Skip if predBlock is in the loop.
+ if (t->bbNum <= predBlock->bbNum && predBlock->bbNum <= b->bbNum)
+ {
+ continue;
+ }
+ optRedirectBlock(predBlock, blockMap);
+ }
+
+ optUpdateLoopHead(loopInd, optLoopTable[loopInd].lpHead, h2);
+}
+
+/*****************************************************************************
+ *
+ * Determine the kind of interference for the call.
+ */
+
+/* static */ inline Compiler::callInterf Compiler::optCallInterf(GenTreePtr call)
+{
+ assert(call->gtOper == GT_CALL);
+
+ // if not a helper, kills everything
+ if (call->gtCall.gtCallType != CT_HELPER)
+ {
+ return CALLINT_ALL;
+ }
+
+ // setfield and array address store kill all indirections
+ switch (eeGetHelperNum(call->gtCall.gtCallMethHnd))
+ {
+ case CORINFO_HELP_ASSIGN_REF: // Not strictly needed as we don't make a GT_CALL with this
+ case CORINFO_HELP_CHECKED_ASSIGN_REF: // Not strictly needed as we don't make a GT_CALL with this
+ case CORINFO_HELP_ASSIGN_BYREF: // Not strictly needed as we don't make a GT_CALL with this
+ case CORINFO_HELP_SETFIELDOBJ:
+ case CORINFO_HELP_ARRADDR_ST:
+
+ return CALLINT_REF_INDIRS;
+
+ case CORINFO_HELP_SETFIELDFLOAT:
+ case CORINFO_HELP_SETFIELDDOUBLE:
+ case CORINFO_HELP_SETFIELD8:
+ case CORINFO_HELP_SETFIELD16:
+ case CORINFO_HELP_SETFIELD32:
+ case CORINFO_HELP_SETFIELD64:
+
+ return CALLINT_SCL_INDIRS;
+
+ case CORINFO_HELP_ASSIGN_STRUCT: // Not strictly needed as we don't use this in Jit32
+ case CORINFO_HELP_MEMSET: // Not strictly needed as we don't make a GT_CALL with this
+ case CORINFO_HELP_MEMCPY: // Not strictly needed as we don't make a GT_CALL with this
+ case CORINFO_HELP_SETFIELDSTRUCT:
+
+ return CALLINT_ALL_INDIRS;
+
+ default:
+ break;
+ }
+
+ // other helpers kill nothing
+ return CALLINT_NONE;
+}
+
+/*****************************************************************************
+ *
+ * See if the given tree can be computed in the given precision (which must
+ * be smaller than the type of the tree for this to make sense). If 'doit'
+ * is false, we merely check to see whether narrowing is possible; if we
+ * get called with 'doit' being true, we actually perform the narrowing.
+ */
+
+bool Compiler::optNarrowTree(GenTreePtr tree, var_types srct, var_types dstt, ValueNumPair vnpNarrow, bool doit)
+{
+ genTreeOps oper;
+ unsigned kind;
+
+ noway_assert(tree);
+ noway_assert(genActualType(tree->gtType) == genActualType(srct));
+
+ /* Assume we're only handling integer types */
+ noway_assert(varTypeIsIntegral(srct));
+ noway_assert(varTypeIsIntegral(dstt));
+
+ unsigned srcSize = genTypeSize(srct);
+ unsigned dstSize = genTypeSize(dstt);
+
+ /* dstt must be smaller than srct to narrow */
+ if (dstSize >= srcSize)
+ {
+ return false;
+ }
+
+ /* Figure out what kind of a node we have */
+ oper = tree->OperGet();
+ kind = tree->OperKind();
+
+ if (kind & GTK_ASGOP)
+ {
+ noway_assert(doit == false);
+ return false;
+ }
+
+ ValueNumPair NoVNPair = ValueNumPair();
+
+ if (kind & GTK_LEAF)
+ {
+ switch (oper)
+ {
+ /* Constants can usually be narrowed by changing their value */
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifndef _TARGET_64BIT_
+ __int64 lval;
+ __int64 lmask;
+
+ case GT_CNS_LNG:
+ lval = tree->gtIntConCommon.LngValue();
+ lmask = 0;
+
+ switch (dstt)
+ {
+ case TYP_BYTE:
+ lmask = 0x0000007F;
+ break;
+ case TYP_BOOL:
+ case TYP_UBYTE:
+ lmask = 0x000000FF;
+ break;
+ case TYP_SHORT:
+ lmask = 0x00007FFF;
+ break;
+ case TYP_CHAR:
+ lmask = 0x0000FFFF;
+ break;
+ case TYP_INT:
+ lmask = 0x7FFFFFFF;
+ break;
+ case TYP_UINT:
+ lmask = 0xFFFFFFFF;
+ break;
+
+ default:
+ return false;
+ }
+
+ if ((lval & lmask) != lval)
+ return false;
+
+ if (doit)
+ {
+ tree->ChangeOperConst(GT_CNS_INT);
+ tree->gtType = TYP_INT;
+ tree->gtIntCon.gtIconVal = (int)lval;
+ if (vnStore != nullptr)
+ {
+ fgValueNumberTreeConst(tree);
+ }
+ }
+
+ return true;
+#endif
+
+ case GT_CNS_INT:
+
+ ssize_t ival;
+ ival = tree->gtIntCon.gtIconVal;
+ ssize_t imask;
+ imask = 0;
+
+ switch (dstt)
+ {
+ case TYP_BYTE:
+ imask = 0x0000007F;
+ break;
+ case TYP_BOOL:
+ case TYP_UBYTE:
+ imask = 0x000000FF;
+ break;
+ case TYP_SHORT:
+ imask = 0x00007FFF;
+ break;
+ case TYP_CHAR:
+ imask = 0x0000FFFF;
+ break;
+#ifdef _TARGET_64BIT_
+ case TYP_INT:
+ imask = 0x7FFFFFFF;
+ break;
+ case TYP_UINT:
+ imask = 0xFFFFFFFF;
+ break;
+#endif // _TARGET_64BIT_
+ default:
+ return false;
+ }
+
+ if ((ival & imask) != ival)
+ {
+ return false;
+ }
+
+#ifdef _TARGET_64BIT_
+ if (doit)
+ {
+ tree->gtType = TYP_INT;
+ tree->gtIntCon.gtIconVal = (int)ival;
+ if (vnStore != nullptr)
+ {
+ fgValueNumberTreeConst(tree);
+ }
+ }
+#endif // _TARGET_64BIT_
+
+ return true;
+
+ /* Operands that are in memory can usually be narrowed
+ simply by changing their gtType */
+
+ case GT_LCL_VAR:
+ /* We only allow narrowing long -> int for a GT_LCL_VAR */
+ if (dstSize == sizeof(int))
+ {
+ goto NARROW_IND;
+ }
+ break;
+
+ case GT_CLS_VAR:
+ case GT_LCL_FLD:
+ goto NARROW_IND;
+ default:
+ break;
+ }
+
+ noway_assert(doit == false);
+ return false;
+ }
+
+ if (kind & (GTK_BINOP | GTK_UNOP))
+ {
+ GenTreePtr op1;
+ op1 = tree->gtOp.gtOp1;
+ GenTreePtr op2;
+ op2 = tree->gtOp.gtOp2;
+
+ switch (tree->gtOper)
+ {
+ case GT_AND:
+ noway_assert(genActualType(tree->gtType) == genActualType(op2->gtType));
+
+ // Is op2 a small constant than can be narrowed into dstt?
+ // if so the result of the GT_AND will also fit into 'dstt' and can be narrowed
+ if ((op2->gtOper == GT_CNS_INT) && optNarrowTree(op2, srct, dstt, NoVNPair, false))
+ {
+ // We will change the type of the tree and narrow op2
+ //
+ if (doit)
+ {
+ tree->gtType = genActualType(dstt);
+ tree->SetVNs(vnpNarrow);
+
+ optNarrowTree(op2, srct, dstt, NoVNPair, true);
+ // We may also need to cast away the upper bits of op1
+ if (srcSize == 8)
+ {
+ assert(tree->gtType == TYP_INT);
+ op1 = gtNewCastNode(TYP_INT, op1, TYP_INT);
+#ifdef DEBUG
+ op1->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
+#endif
+ tree->gtOp.gtOp1 = op1;
+ }
+ }
+ return true;
+ }
+
+ goto COMMON_BINOP;
+
+ case GT_ADD:
+ case GT_MUL:
+
+ if (tree->gtOverflow() || varTypeIsSmall(dstt))
+ {
+ noway_assert(doit == false);
+ return false;
+ }
+ __fallthrough;
+
+ case GT_OR:
+ case GT_XOR:
+ COMMON_BINOP:
+ noway_assert(genActualType(tree->gtType) == genActualType(op1->gtType));
+ noway_assert(genActualType(tree->gtType) == genActualType(op2->gtType));
+
+ if (gtIsActiveCSE_Candidate(op1) || gtIsActiveCSE_Candidate(op2) ||
+ !optNarrowTree(op1, srct, dstt, NoVNPair, doit) || !optNarrowTree(op2, srct, dstt, NoVNPair, doit))
+ {
+ noway_assert(doit == false);
+ return false;
+ }
+
+ /* Simply change the type of the tree */
+
+ if (doit)
+ {
+ if (tree->gtOper == GT_MUL && (tree->gtFlags & GTF_MUL_64RSLT))
+ {
+ tree->gtFlags &= ~GTF_MUL_64RSLT;
+ }
+
+ tree->gtType = genActualType(dstt);
+ tree->SetVNs(vnpNarrow);
+ }
+
+ return true;
+
+ case GT_IND:
+
+ NARROW_IND:
+ /* Simply change the type of the tree */
+
+ if (doit && (dstSize <= genTypeSize(tree->gtType)))
+ {
+ tree->gtType = genSignedType(dstt);
+ tree->SetVNs(vnpNarrow);
+
+ /* Make sure we don't mess up the variable type */
+ if ((oper == GT_LCL_VAR) || (oper == GT_LCL_FLD))
+ {
+ tree->gtFlags |= GTF_VAR_CAST;
+ }
+ }
+
+ return true;
+
+ case GT_EQ:
+ case GT_NE:
+ case GT_LT:
+ case GT_LE:
+ case GT_GT:
+ case GT_GE:
+
+ /* These can always be narrowed since they only represent 0 or 1 */
+ return true;
+
+ case GT_CAST:
+ {
+ var_types cast = tree->CastToType();
+ var_types oprt = op1->TypeGet();
+ unsigned oprSize = genTypeSize(oprt);
+
+ if (cast != srct)
+ {
+ return false;
+ }
+
+ if (varTypeIsIntegralOrI(dstt) != varTypeIsIntegralOrI(oprt))
+ {
+ return false;
+ }
+
+ if (tree->gtOverflow())
+ {
+ return false;
+ }
+
+ /* Is this a cast from the type we're narrowing to or a smaller one? */
+
+ if (oprSize <= dstSize)
+ {
+ /* Bash the target type of the cast */
+
+ if (doit)
+ {
+ dstt = genSignedType(dstt);
+
+ if (oprSize == dstSize)
+ {
+ // Same size: change the CAST into a NOP
+ tree->ChangeOper(GT_NOP);
+ tree->gtType = dstt;
+ tree->gtOp.gtOp2 = nullptr;
+ tree->gtVNPair = op1->gtVNPair; // Set to op1's ValueNumber
+ }
+ else
+ {
+ // oprSize is smaller
+ assert(oprSize < dstSize);
+
+ // Change the CastToType in the GT_CAST node
+ tree->CastToType() = dstt;
+
+ // The result type of a GT_CAST is never a small type.
+ // Use genActualType to widen dstt when it is a small types.
+ tree->gtType = genActualType(dstt);
+ tree->SetVNs(vnpNarrow);
+ }
+ }
+
+ return true;
+ }
+ }
+ return false;
+
+ case GT_COMMA:
+ if (!gtIsActiveCSE_Candidate(op2) && optNarrowTree(op2, srct, dstt, vnpNarrow, doit))
+ {
+ /* Simply change the type of the tree */
+
+ if (doit)
+ {
+ tree->gtType = genActualType(dstt);
+ tree->SetVNs(vnpNarrow);
+ }
+ return true;
+ }
+ return false;
+
+ default:
+ noway_assert(doit == false);
+ return false;
+ }
+ }
+
+ return false;
+}
+
+/*****************************************************************************
+ *
+ * The following logic figures out whether the given variable is assigned
+ * somewhere in a list of basic blocks (or in an entire loop).
+ */
+
+Compiler::fgWalkResult Compiler::optIsVarAssgCB(GenTreePtr* pTree, fgWalkData* data)
+{
+ GenTreePtr tree = *pTree;
+
+ if (tree->OperKind() & GTK_ASGOP)
+ {
+ GenTreePtr dest = tree->gtOp.gtOp1;
+ genTreeOps destOper = dest->OperGet();
+
+ isVarAssgDsc* desc = (isVarAssgDsc*)data->pCallbackData;
+ assert(desc && desc->ivaSelf == desc);
+
+ if (destOper == GT_LCL_VAR)
+ {
+ unsigned tvar = dest->gtLclVarCommon.gtLclNum;
+ if (tvar < lclMAX_ALLSET_TRACKED)
+ {
+ AllVarSetOps::AddElemD(data->compiler, desc->ivaMaskVal, tvar);
+ }
+ else
+ {
+ desc->ivaMaskIncomplete = true;
+ }
+
+ if (tvar == desc->ivaVar)
+ {
+ if (tree != desc->ivaSkip)
+ {
+ return WALK_ABORT;
+ }
+ }
+ }
+ else if (destOper == GT_LCL_FLD)
+ {
+ /* We can't track every field of every var. Moreover, indirections
+ may access different parts of the var as different (but
+ overlapping) fields. So just treat them as indirect accesses */
+
+ // unsigned lclNum = dest->gtLclFld.gtLclNum;
+ // noway_assert(lvaTable[lclNum].lvAddrTaken);
+
+ varRefKinds refs = varTypeIsGC(tree->TypeGet()) ? VR_IND_REF : VR_IND_SCL;
+ desc->ivaMaskInd = varRefKinds(desc->ivaMaskInd | refs);
+ }
+ else if (destOper == GT_CLS_VAR)
+ {
+ desc->ivaMaskInd = varRefKinds(desc->ivaMaskInd | VR_GLB_VAR);
+ }
+ else if (destOper == GT_IND)
+ {
+ /* Set the proper indirection bits */
+
+ varRefKinds refs = varTypeIsGC(tree->TypeGet()) ? VR_IND_REF : VR_IND_SCL;
+ desc->ivaMaskInd = varRefKinds(desc->ivaMaskInd | refs);
+ }
+ }
+ else if (tree->gtOper == GT_CALL)
+ {
+ isVarAssgDsc* desc = (isVarAssgDsc*)data->pCallbackData;
+ assert(desc && desc->ivaSelf == desc);
+
+ desc->ivaMaskCall = optCallInterf(tree);
+ }
+
+ return WALK_CONTINUE;
+}
+
+/*****************************************************************************/
+
+bool Compiler::optIsVarAssigned(BasicBlock* beg, BasicBlock* end, GenTreePtr skip, unsigned var)
+{
+ bool result;
+ isVarAssgDsc desc;
+
+ desc.ivaSkip = skip;
+#ifdef DEBUG
+ desc.ivaSelf = &desc;
+#endif
+ desc.ivaVar = var;
+ desc.ivaMaskCall = CALLINT_NONE;
+ AllVarSetOps::AssignNoCopy(this, desc.ivaMaskVal, AllVarSetOps::MakeEmpty(this));
+
+ for (;;)
+ {
+ noway_assert(beg);
+
+ for (GenTreeStmt* stmt = beg->firstStmt(); stmt; stmt = stmt->gtNextStmt)
+ {
+ noway_assert(stmt->gtOper == GT_STMT);
+ if (fgWalkTreePre(&stmt->gtStmtExpr, optIsVarAssgCB, &desc))
+ {
+ result = true;
+ goto DONE;
+ }
+ }
+
+ if (beg == end)
+ {
+ break;
+ }
+
+ beg = beg->bbNext;
+ }
+
+ result = false;
+
+DONE:
+
+ return result;
+}
+
+/*****************************************************************************/
+int Compiler::optIsSetAssgLoop(unsigned lnum, ALLVARSET_VALARG_TP vars, varRefKinds inds)
+{
+ LoopDsc* loop;
+
+ /* Get hold of the loop descriptor */
+
+ noway_assert(lnum < optLoopCount);
+ loop = optLoopTable + lnum;
+
+ /* Do we already know what variables are assigned within this loop? */
+
+ if (!(loop->lpFlags & LPFLG_ASGVARS_YES))
+ {
+ isVarAssgDsc desc;
+
+ BasicBlock* beg;
+ BasicBlock* end;
+
+ /* Prepare the descriptor used by the tree walker call-back */
+
+ desc.ivaVar = (unsigned)-1;
+ desc.ivaSkip = nullptr;
+#ifdef DEBUG
+ desc.ivaSelf = &desc;
+#endif
+ AllVarSetOps::AssignNoCopy(this, desc.ivaMaskVal, AllVarSetOps::MakeEmpty(this));
+ desc.ivaMaskInd = VR_NONE;
+ desc.ivaMaskCall = CALLINT_NONE;
+ desc.ivaMaskIncomplete = false;
+
+ /* Now walk all the statements of the loop */
+
+ beg = loop->lpHead->bbNext;
+ end = loop->lpBottom;
+
+ for (/**/; /**/; beg = beg->bbNext)
+ {
+ noway_assert(beg);
+
+ for (GenTreeStmt* stmt = beg->FirstNonPhiDef(); stmt; stmt = stmt->gtNextStmt)
+ {
+ noway_assert(stmt->gtOper == GT_STMT);
+ fgWalkTreePre(&stmt->gtStmtExpr, optIsVarAssgCB, &desc);
+
+ if (desc.ivaMaskIncomplete)
+ {
+ loop->lpFlags |= LPFLG_ASGVARS_INC;
+ }
+ }
+
+ if (beg == end)
+ {
+ break;
+ }
+ }
+
+ AllVarSetOps::Assign(this, loop->lpAsgVars, desc.ivaMaskVal);
+ loop->lpAsgInds = desc.ivaMaskInd;
+ loop->lpAsgCall = desc.ivaMaskCall;
+
+ /* Now we know what variables are assigned in the loop */
+
+ loop->lpFlags |= LPFLG_ASGVARS_YES;
+ }
+
+ /* Now we can finally test the caller's mask against the loop's */
+ if (!AllVarSetOps::IsEmptyIntersection(this, loop->lpAsgVars, vars) || (loop->lpAsgInds & inds))
+ {
+ return 1;
+ }
+
+ switch (loop->lpAsgCall)
+ {
+ case CALLINT_ALL:
+
+ /* Can't hoist if the call might have side effect on an indirection. */
+
+ if (loop->lpAsgInds != VR_NONE)
+ {
+ return 1;
+ }
+
+ break;
+
+ case CALLINT_REF_INDIRS:
+
+ /* Can't hoist if the call might have side effect on an ref indirection. */
+
+ if (loop->lpAsgInds & VR_IND_REF)
+ {
+ return 1;
+ }
+
+ break;
+
+ case CALLINT_SCL_INDIRS:
+
+ /* Can't hoist if the call might have side effect on an non-ref indirection. */
+
+ if (loop->lpAsgInds & VR_IND_SCL)
+ {
+ return 1;
+ }
+
+ break;
+
+ case CALLINT_ALL_INDIRS:
+
+ /* Can't hoist if the call might have side effect on any indirection. */
+
+ if (loop->lpAsgInds & (VR_IND_REF | VR_IND_SCL))
+ {
+ return 1;
+ }
+
+ break;
+
+ case CALLINT_NONE:
+
+ /* Other helpers kill nothing */
+
+ break;
+
+ default:
+ noway_assert(!"Unexpected lpAsgCall value");
+ }
+
+ return 0;
+}
+
+void Compiler::optPerformHoistExpr(GenTreePtr origExpr, unsigned lnum)
+{
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nHoisting a copy of ");
+ printTreeID(origExpr);
+ printf(" into PreHeader for loop L%02u <BB%02u..BB%02u>:\n", lnum, optLoopTable[lnum].lpFirst->bbNum,
+ optLoopTable[lnum].lpBottom->bbNum);
+ gtDispTree(origExpr);
+ printf("\n");
+ }
+#endif
+
+ // This loop has to be in a form that is approved for hoisting.
+ assert(optLoopTable[lnum].lpFlags & LPFLG_HOISTABLE);
+
+ // Create a copy of the expression and mark it for CSE's.
+ GenTreePtr hoistExpr = gtCloneExpr(origExpr, GTF_MAKE_CSE);
+
+ // At this point we should have a cloned expression, marked with the GTF_MAKE_CSE flag
+ assert(hoistExpr != origExpr);
+ assert(hoistExpr->gtFlags & GTF_MAKE_CSE);
+
+ GenTreePtr hoist = hoistExpr;
+ // The value of the expression isn't used (unless it's an assignment).
+ if (hoistExpr->OperGet() != GT_ASG)
+ {
+ hoist = gtUnusedValNode(hoistExpr);
+ }
+
+ /* Put the statement in the preheader */
+
+ fgCreateLoopPreHeader(lnum);
+
+ BasicBlock* preHead = optLoopTable[lnum].lpHead;
+ assert(preHead->bbJumpKind == BBJ_NONE);
+
+ // fgMorphTree and lvaRecursiveIncRefCounts requires that compCurBB be the block that contains
+ // (or in this case, will contain) the expression.
+ compCurBB = preHead;
+
+ // Increment the ref counts of any local vars appearing in "hoist".
+ // Note that we need to do this before fgMorphTree() as fgMorph() could constant
+ // fold away some of the lcl vars referenced by "hoist".
+ lvaRecursiveIncRefCounts(hoist);
+
+ hoist = fgMorphTree(hoist);
+
+ GenTreePtr hoistStmt = gtNewStmt(hoist);
+ hoistStmt->gtFlags |= GTF_STMT_CMPADD;
+
+ /* simply append the statement at the end of the preHead's list */
+
+ GenTreePtr treeList = preHead->bbTreeList;
+
+ if (treeList)
+ {
+ /* append after last statement */
+
+ GenTreePtr last = treeList->gtPrev;
+ assert(last->gtNext == nullptr);
+
+ last->gtNext = hoistStmt;
+ hoistStmt->gtPrev = last;
+ treeList->gtPrev = hoistStmt;
+ }
+ else
+ {
+ /* Empty pre-header - store the single statement in the block */
+
+ preHead->bbTreeList = hoistStmt;
+ hoistStmt->gtPrev = hoistStmt;
+ }
+
+ hoistStmt->gtNext = nullptr;
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("This hoisted copy placed in PreHeader (BB%02u):\n", preHead->bbNum);
+ gtDispTree(hoist);
+ }
+#endif
+
+ if (fgStmtListThreaded)
+ {
+ gtSetStmtInfo(hoistStmt);
+ fgSetStmtSeq(hoistStmt);
+ }
+
+#ifdef DEBUG
+ if (m_nodeTestData != nullptr)
+ {
+
+ // What is the depth of the loop "lnum"?
+ ssize_t depth = 0;
+ unsigned lnumIter = lnum;
+ while (optLoopTable[lnumIter].lpParent != BasicBlock::NOT_IN_LOOP)
+ {
+ depth++;
+ lnumIter = optLoopTable[lnumIter].lpParent;
+ }
+
+ NodeToTestDataMap* testData = GetNodeTestData();
+
+ TestLabelAndNum tlAndN;
+ if (testData->Lookup(origExpr, &tlAndN) && tlAndN.m_tl == TL_LoopHoist)
+ {
+ if (tlAndN.m_num == -1)
+ {
+ printf("Node ");
+ printTreeID(origExpr);
+ printf(" was declared 'do not hoist', but is being hoisted.\n");
+ assert(false);
+ }
+ else if (tlAndN.m_num != depth)
+ {
+ printf("Node ");
+ printTreeID(origExpr);
+ printf(" was declared as hoistable from loop at nesting depth %d; actually hoisted from loop at depth "
+ "%d.\n",
+ tlAndN.m_num, depth);
+ assert(false);
+ }
+ else
+ {
+ // We've correctly hoisted this, so remove the annotation. Later, we'll check for any remaining "must
+ // hoist" annotations.
+ testData->Remove(origExpr);
+ // Now we insert an annotation to make sure that "hoistExpr" is actually CSE'd.
+ tlAndN.m_tl = TL_CSE_Def;
+ tlAndN.m_num = m_loopHoistCSEClass++;
+ testData->Set(hoistExpr, tlAndN);
+ }
+ }
+ }
+#endif
+
+#if LOOP_HOIST_STATS
+ if (!m_curLoopHasHoistedExpression)
+ {
+ m_loopsWithHoistedExpressions++;
+ m_curLoopHasHoistedExpression = true;
+ }
+ m_totalHoistedExpressions++;
+#endif // LOOP_HOIST_STATS
+}
+
+void Compiler::optHoistLoopCode()
+{
+ // If we don't have any loops in the method then take an early out now.
+ if (optLoopCount == 0)
+ {
+ return;
+ }
+
+#ifdef DEBUG
+ unsigned jitNoHoist = JitConfig.JitNoHoist();
+ if (jitNoHoist > 0)
+ {
+ return;
+ }
+#endif
+
+#if 0
+ // The code in this #if has been useful in debugging loop cloning issues, by
+ // enabling selective enablement of the loop cloning optimization according to
+ // method hash.
+#ifdef DEBUG
+ unsigned methHash = info.compMethodHash();
+ char* lostr = getenv("loophoisthashlo");
+ unsigned methHashLo = 0;
+ if (lostr != NULL)
+ {
+ sscanf_s(lostr, "%x", &methHashLo);
+ // methHashLo = (unsigned(atoi(lostr)) << 2); // So we don't have to use negative numbers.
+ }
+ char* histr = getenv("loophoisthashhi");
+ unsigned methHashHi = UINT32_MAX;
+ if (histr != NULL)
+ {
+ sscanf_s(histr, "%x", &methHashHi);
+ // methHashHi = (unsigned(atoi(histr)) << 2); // So we don't have to use negative numbers.
+ }
+ if (methHash < methHashLo || methHash > methHashHi)
+ return;
+ printf("Doing loop hoisting in %s (0x%x).\n", info.compFullName, methHash);
+#endif // DEBUG
+#endif // 0 -- debugging loop cloning issues
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\n*************** In optHoistLoopCode()\n");
+ printf("Blocks/Trees before phase\n");
+ fgDispBasicBlocks(true);
+ printf("");
+ }
+#endif
+
+ // Consider all the loop nests, in outer-to-inner order (thus hoisting expressions outside the largest loop in which
+ // they are invariant.)
+ LoopHoistContext hoistCtxt(this);
+ for (unsigned lnum = 0; lnum < optLoopCount; lnum++)
+ {
+ if (optLoopTable[lnum].lpFlags & LPFLG_REMOVED)
+ {
+ continue;
+ }
+
+ if (optLoopTable[lnum].lpParent == BasicBlock::NOT_IN_LOOP)
+ {
+ optHoistLoopNest(lnum, &hoistCtxt);
+ }
+ }
+
+#if DEBUG
+ if (fgModified)
+ {
+ if (verbose)
+ {
+ printf("Blocks/Trees after optHoistLoopCode() modified flowgraph\n");
+ fgDispBasicBlocks(true);
+ printf("");
+ }
+
+ // Make sure that the predecessor lists are accurate
+ fgDebugCheckBBlist();
+ }
+#endif
+
+#ifdef DEBUG
+ // Test Data stuff..
+ // If we have no test data, early out.
+ if (m_nodeTestData == nullptr)
+ {
+ return;
+ }
+ NodeToTestDataMap* testData = GetNodeTestData();
+ for (NodeToTestDataMap::KeyIterator ki = testData->Begin(); !ki.Equal(testData->End()); ++ki)
+ {
+ TestLabelAndNum tlAndN;
+ GenTreePtr node = ki.Get();
+ bool b = testData->Lookup(node, &tlAndN);
+ assert(b);
+ if (tlAndN.m_tl != TL_LoopHoist)
+ {
+ continue;
+ }
+ // Otherwise, it is a loop hoist annotation.
+ assert(tlAndN.m_num < 100); // >= 100 indicates nested static field address, should already have been moved.
+ if (tlAndN.m_num >= 0)
+ {
+ printf("Node ");
+ printTreeID(node);
+ printf(" was declared 'must hoist', but has not been hoisted.\n");
+ assert(false);
+ }
+ }
+#endif // DEBUG
+}
+
+void Compiler::optHoistLoopNest(unsigned lnum, LoopHoistContext* hoistCtxt)
+{
+ // Do this loop, then recursively do all nested loops.
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if LOOP_HOIST_STATS
+ // Record stats
+ m_curLoopHasHoistedExpression = false;
+ m_loopsConsidered++;
+#endif // LOOP_HOIST_STATS
+
+ optHoistThisLoop(lnum, hoistCtxt);
+
+ VNSet* hoistedInCurLoop = hoistCtxt->ExtractHoistedInCurLoop();
+
+ if (optLoopTable[lnum].lpChild != BasicBlock::NOT_IN_LOOP)
+ {
+ // Add the ones hoisted in "lnum" to "hoistedInParents" for any nested loops.
+ // TODO-Cleanup: we should have a set abstraction for loops.
+ if (hoistedInCurLoop != nullptr)
+ {
+ for (VNSet::KeyIterator keys = hoistedInCurLoop->Begin(); !keys.Equal(hoistedInCurLoop->End()); ++keys)
+ {
+#ifdef DEBUG
+ bool b;
+ assert(!hoistCtxt->m_hoistedInParentLoops.Lookup(keys.Get(), &b));
+#endif
+ hoistCtxt->m_hoistedInParentLoops.Set(keys.Get(), true);
+ }
+ }
+
+ for (unsigned child = optLoopTable[lnum].lpChild; child != BasicBlock::NOT_IN_LOOP;
+ child = optLoopTable[child].lpSibling)
+ {
+ optHoistLoopNest(child, hoistCtxt);
+ }
+
+ // Now remove them.
+ // TODO-Cleanup: we should have a set abstraction for loops.
+ if (hoistedInCurLoop != nullptr)
+ {
+ for (VNSet::KeyIterator keys = hoistedInCurLoop->Begin(); !keys.Equal(hoistedInCurLoop->End()); ++keys)
+ {
+ // Note that we asserted when we added these that they hadn't been members, so removing is appropriate.
+ hoistCtxt->m_hoistedInParentLoops.Remove(keys.Get());
+ }
+ }
+ }
+}
+
+void Compiler::optHoistThisLoop(unsigned lnum, LoopHoistContext* hoistCtxt)
+{
+ LoopDsc* pLoopDsc = &optLoopTable[lnum];
+
+ /* If loop was removed continue */
+
+ if (pLoopDsc->lpFlags & LPFLG_REMOVED)
+ {
+ return;
+ }
+
+ /* Get the head and tail of the loop */
+
+ BasicBlock* head = pLoopDsc->lpHead;
+ BasicBlock* tail = pLoopDsc->lpBottom;
+ BasicBlock* lbeg = pLoopDsc->lpEntry;
+ BasicBlock* block;
+
+ // We must have a do-while loop
+ if ((pLoopDsc->lpFlags & LPFLG_DO_WHILE) == 0)
+ {
+ return;
+ }
+
+ // The loop-head must dominate the loop-entry.
+ // TODO-CQ: Couldn't we make this true if it's not?
+ if (!fgDominate(head, lbeg))
+ {
+ return;
+ }
+
+ // if lbeg is the start of a new try block then we won't be able to hoist
+ if (!BasicBlock::sameTryRegion(head, lbeg))
+ {
+ return;
+ }
+
+ // We don't bother hoisting when inside of a catch block
+ if ((lbeg->bbCatchTyp != BBCT_NONE) && (lbeg->bbCatchTyp != BBCT_FINALLY))
+ {
+ return;
+ }
+
+ pLoopDsc->lpFlags |= LPFLG_HOISTABLE;
+
+ unsigned begn = lbeg->bbNum;
+ unsigned endn = tail->bbNum;
+
+ // Ensure the per-loop sets/tables are empty.
+ hoistCtxt->m_curLoopVnInvariantCache.RemoveAll();
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("optHoistLoopCode for loop L%02u <BB%02u..BB%02u>:\n", lnum, begn, endn);
+ printf(" Loop body %s a call\n", pLoopDsc->lpContainsCall ? "contains" : "does not contain");
+ }
+#endif
+
+ VARSET_TP VARSET_INIT_NOCOPY(loopVars, VarSetOps::Intersection(this, pLoopDsc->lpVarInOut, pLoopDsc->lpVarUseDef));
+
+ pLoopDsc->lpVarInOutCount = VarSetOps::Count(this, pLoopDsc->lpVarInOut);
+ pLoopDsc->lpLoopVarCount = VarSetOps::Count(this, loopVars);
+ pLoopDsc->lpHoistedExprCount = 0;
+
+#ifndef _TARGET_64BIT_
+ unsigned longVarsCount = VarSetOps::Count(this, lvaLongVars);
+
+ if (longVarsCount > 0)
+ {
+ // Since 64-bit variables take up two registers on 32-bit targets, we increase
+ // the Counts such that each TYP_LONG variable counts twice.
+ //
+ VARSET_TP VARSET_INIT_NOCOPY(loopLongVars, VarSetOps::Intersection(this, loopVars, lvaLongVars));
+ VARSET_TP VARSET_INIT_NOCOPY(inOutLongVars, VarSetOps::Intersection(this, pLoopDsc->lpVarInOut, lvaLongVars));
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\n LONGVARS(%d)=", VarSetOps::Count(this, lvaLongVars));
+ lvaDispVarSet(lvaLongVars);
+ }
+#endif
+ pLoopDsc->lpLoopVarCount += VarSetOps::Count(this, loopLongVars);
+ pLoopDsc->lpVarInOutCount += VarSetOps::Count(this, inOutLongVars);
+ }
+#endif // !_TARGET_64BIT_
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\n USEDEF (%d)=", VarSetOps::Count(this, pLoopDsc->lpVarUseDef));
+ lvaDispVarSet(pLoopDsc->lpVarUseDef);
+
+ printf("\n INOUT (%d)=", pLoopDsc->lpVarInOutCount);
+ lvaDispVarSet(pLoopDsc->lpVarInOut);
+
+ printf("\n LOOPVARS(%d)=", pLoopDsc->lpLoopVarCount);
+ lvaDispVarSet(loopVars);
+ printf("\n");
+ }
+#endif
+
+ unsigned floatVarsCount = VarSetOps::Count(this, lvaFloatVars);
+
+ if (floatVarsCount > 0)
+ {
+ VARSET_TP VARSET_INIT_NOCOPY(loopFPVars, VarSetOps::Intersection(this, loopVars, lvaFloatVars));
+ VARSET_TP VARSET_INIT_NOCOPY(inOutFPVars, VarSetOps::Intersection(this, pLoopDsc->lpVarInOut, lvaFloatVars));
+
+ pLoopDsc->lpLoopVarFPCount = VarSetOps::Count(this, loopFPVars);
+ pLoopDsc->lpVarInOutFPCount = VarSetOps::Count(this, inOutFPVars);
+ pLoopDsc->lpHoistedFPExprCount = 0;
+
+ pLoopDsc->lpLoopVarCount -= pLoopDsc->lpLoopVarFPCount;
+ pLoopDsc->lpVarInOutCount -= pLoopDsc->lpVarInOutFPCount;
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf(" INOUT-FP(%d)=", pLoopDsc->lpVarInOutFPCount);
+ lvaDispVarSet(inOutFPVars);
+
+ printf("\n LOOPV-FP(%d)=", pLoopDsc->lpLoopVarFPCount);
+ lvaDispVarSet(loopFPVars);
+ }
+#endif
+ }
+ else // (floatVarsCount == 0)
+ {
+ pLoopDsc->lpLoopVarFPCount = 0;
+ pLoopDsc->lpVarInOutFPCount = 0;
+ pLoopDsc->lpHoistedFPExprCount = 0;
+ }
+
+ // Find the set of definitely-executed blocks.
+ // Ideally, the definitely-executed blocks are the ones that post-dominate the entry block.
+ // Until we have post-dominators, we'll special-case for single-exit blocks.
+ ExpandArrayStack<BasicBlock*> defExec(getAllocatorLoopHoist());
+ if (pLoopDsc->lpFlags & LPFLG_ONE_EXIT)
+ {
+ assert(pLoopDsc->lpExit != nullptr);
+ BasicBlock* cur = pLoopDsc->lpExit;
+ // Push dominators, until we reach "entry" or exit the loop.
+ while (cur != nullptr && pLoopDsc->lpContains(cur) && cur != pLoopDsc->lpEntry)
+ {
+ defExec.Push(cur);
+ cur = cur->bbIDom;
+ }
+ // If we didn't reach the entry block, give up and *just* push the entry block.
+ if (cur != pLoopDsc->lpEntry)
+ {
+ defExec.Reset();
+ }
+ defExec.Push(pLoopDsc->lpEntry);
+ }
+ else // More than one exit
+ {
+ // We'll assume that only the entry block is definitely executed.
+ // We could in the future do better.
+ defExec.Push(pLoopDsc->lpEntry);
+ }
+
+ while (defExec.Size() > 0)
+ {
+ // Consider in reverse order: dominator before dominatee.
+ BasicBlock* blk = defExec.Pop();
+ optHoistLoopExprsForBlock(blk, lnum, hoistCtxt);
+ }
+}
+
+// Hoist any expressions in "blk" that are invariant in loop "lnum" outside of "blk" and into a PreHead for loop "lnum".
+void Compiler::optHoistLoopExprsForBlock(BasicBlock* blk, unsigned lnum, LoopHoistContext* hoistCtxt)
+{
+ LoopDsc* pLoopDsc = &optLoopTable[lnum];
+ bool firstBlockAndBeforeSideEffect = (blk == pLoopDsc->lpEntry);
+ unsigned blkWeight = blk->getBBWeight(this);
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf(" optHoistLoopExprsForBlock BB%02u (weight=%6s) of loop L%02u <BB%02u..BB%02u>, firstBlock is %s\n",
+ blk->bbNum, refCntWtd2str(blkWeight), lnum, pLoopDsc->lpFirst->bbNum, pLoopDsc->lpBottom->bbNum,
+ firstBlockAndBeforeSideEffect ? "true" : "false");
+ if (blkWeight < (BB_UNITY_WEIGHT / 10))
+ {
+ printf(" block weight is too small to perform hoisting.\n");
+ }
+ }
+#endif
+
+ if (blkWeight < (BB_UNITY_WEIGHT / 10))
+ {
+ // Block weight is too small to perform hoisting.
+ return;
+ }
+
+ for (GenTreeStmt* stmt = blk->FirstNonPhiDef(); stmt; stmt = stmt->gtNextStmt)
+ {
+ GenTreePtr stmtTree = stmt->gtStmtExpr;
+ bool hoistable;
+ (void)optHoistLoopExprsForTree(stmtTree, lnum, hoistCtxt, &firstBlockAndBeforeSideEffect, &hoistable);
+ if (hoistable)
+ {
+ // we will try to hoist the top-level stmtTree
+ optHoistCandidate(stmtTree, lnum, hoistCtxt);
+ }
+ }
+}
+
+bool Compiler::optIsProfitableToHoistableTree(GenTreePtr tree, unsigned lnum)
+{
+ LoopDsc* pLoopDsc = &optLoopTable[lnum];
+
+ bool loopContainsCall = pLoopDsc->lpContainsCall;
+
+ int availRegCount;
+ int hoistedExprCount;
+ int loopVarCount;
+ int varInOutCount;
+
+ if (varTypeIsFloating(tree->TypeGet()))
+ {
+ hoistedExprCount = pLoopDsc->lpHoistedFPExprCount;
+ loopVarCount = pLoopDsc->lpLoopVarFPCount;
+ varInOutCount = pLoopDsc->lpVarInOutFPCount;
+
+ availRegCount = CNT_CALLEE_SAVED_FLOAT;
+ if (!loopContainsCall)
+ {
+ availRegCount += CNT_CALLEE_TRASH_FLOAT - 1;
+ }
+#ifdef _TARGET_ARM_
+ // For ARM each double takes two FP registers
+ // For now on ARM we won't track singles/doubles
+ // and instead just assume that we always have doubles.
+ //
+ availRegCount /= 2;
+#endif
+ }
+ else
+ {
+ hoistedExprCount = pLoopDsc->lpHoistedExprCount;
+ loopVarCount = pLoopDsc->lpLoopVarCount;
+ varInOutCount = pLoopDsc->lpVarInOutCount;
+
+ availRegCount = CNT_CALLEE_SAVED - 1;
+ if (!loopContainsCall)
+ {
+ availRegCount += CNT_CALLEE_TRASH - 1;
+ }
+#ifndef _TARGET_64BIT_
+ // For our 32-bit targets Long types take two registers.
+ if (varTypeIsLong(tree->TypeGet()))
+ {
+ availRegCount = (availRegCount + 1) / 2;
+ }
+#endif
+ }
+
+ // decrement the availRegCount by the count of expression that we have already hoisted.
+ availRegCount -= hoistedExprCount;
+
+ // the variables that are read/written inside the loop should
+ // always be a subset of the InOut variables for the loop
+ assert(loopVarCount <= varInOutCount);
+
+ // When loopVarCount >= availRegCount we believe that all of the
+ // available registers will get used to hold LclVars inside the loop.
+ // This pessimistically assumes that each loopVar has a conflicting
+ // lifetime with every other loopVar.
+ // For this case we will hoist the expression only if is profitable
+ // to place it in a stack home location (gtCostEx >= 2*IND_COST_EX)
+ // as we believe it will be placed in the stack or one of the other
+ // loopVars will be spilled into the stack
+ //
+ if (loopVarCount >= availRegCount)
+ {
+ // Don't hoist expressions that are not heavy: tree->gtCostEx < (2*IND_COST_EX)
+ if (tree->gtCostEx < (2 * IND_COST_EX))
+ {
+ return false;
+ }
+ }
+
+ // When varInOutCount < availRegCount we are know that there are
+ // some available register(s) when we enter the loop body.
+ // When varInOutCount == availRegCount there often will be a register
+ // available when we enter the loop body, since a loop often defines a
+ // LclVar on exit or there is often at least one LclVar that is worth
+ // spilling to the stack to make way for this hoisted expression.
+ // So we are willing hoist an expression with gtCostEx == MIN_CSE_COST
+ //
+ if (varInOutCount > availRegCount)
+ {
+ // Don't hoist expressions that barely meet CSE cost requirements: tree->gtCostEx == MIN_CSE_COST
+ if (tree->gtCostEx <= MIN_CSE_COST + 1)
+ {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+//
+// This function returns true if 'tree' is a loop invariant expression.
+// It also sets '*pHoistable' to true if 'tree' can be hoisted into a loop PreHeader block
+//
+bool Compiler::optHoistLoopExprsForTree(
+ GenTreePtr tree, unsigned lnum, LoopHoistContext* hoistCtxt, bool* pFirstBlockAndBeforeSideEffect, bool* pHoistable)
+{
+ // First do the children.
+ // We must keep track of whether each child node was hoistable or not
+ //
+ unsigned nChildren = tree->NumChildren();
+ bool childrenHoistable[GenTree::MAX_CHILDREN];
+
+ // Initialize the array elements for childrenHoistable[] to false
+ for (unsigned i = 0; i < nChildren; i++)
+ {
+ childrenHoistable[i] = false;
+ }
+
+ bool treeIsInvariant = true;
+ for (unsigned childNum = 0; childNum < nChildren; childNum++)
+ {
+ if (!optHoistLoopExprsForTree(tree->GetChild(childNum), lnum, hoistCtxt, pFirstBlockAndBeforeSideEffect,
+ &childrenHoistable[childNum]))
+ {
+ treeIsInvariant = false;
+ }
+ }
+
+ // If all the children of "tree" are hoistable, then "tree" itself can be hoisted
+ //
+ bool treeIsHoistable = treeIsInvariant;
+
+ // But we must see if anything else prevents "tree" from being hoisted.
+ //
+ if (treeIsInvariant)
+ {
+ // Tree must be a suitable CSE candidate for us to be able to hoist it.
+ treeIsHoistable = optIsCSEcandidate(tree);
+
+ // If it's a call, it must be a helper call, and be pure.
+ // Further, if it may run a cctor, it must be labeled as "Hoistable"
+ // (meaning it won't run a cctor because the class is not precise-init).
+ if (treeIsHoistable && tree->OperGet() == GT_CALL)
+ {
+ GenTreeCall* call = tree->AsCall();
+ if (call->gtCallType != CT_HELPER)
+ {
+ treeIsHoistable = false;
+ }
+ else
+ {
+ CorInfoHelpFunc helpFunc = eeGetHelperNum(call->gtCallMethHnd);
+ if (!s_helperCallProperties.IsPure(helpFunc))
+ {
+ treeIsHoistable = false;
+ }
+ else if (s_helperCallProperties.MayRunCctor(helpFunc) && (call->gtFlags & GTF_CALL_HOISTABLE) == 0)
+ {
+ treeIsHoistable = false;
+ }
+ }
+ }
+
+ if (treeIsHoistable)
+ {
+ if (!(*pFirstBlockAndBeforeSideEffect))
+ {
+ // For now, we give up on an expression that might raise an exception if it is after the
+ // first possible global side effect (and we assume we're after that if we're not in the first block).
+ // TODO-CQ: this is when we might do loop cloning.
+ //
+ if ((tree->gtFlags & GTF_EXCEPT) != 0)
+ {
+ treeIsHoistable = false;
+ }
+ }
+ // Currently we must give up on reads from static variables (even if we are in the first block).
+ //
+ if (tree->OperGet() == GT_CLS_VAR)
+ {
+ // TODO-CQ: test that fails if we hoist GT_CLS_VAR: JIT\Directed\Languages\ComponentPascal\pi_r.exe
+ // method Main
+ treeIsHoistable = false;
+ }
+ }
+
+ // Is the value of the whole tree loop invariant?
+ treeIsInvariant =
+ optVNIsLoopInvariant(tree->gtVNPair.GetLiberal(), lnum, &hoistCtxt->m_curLoopVnInvariantCache);
+
+ // Is the value of the whole tree loop invariant?
+ if (!treeIsInvariant)
+ {
+ treeIsHoistable = false;
+ }
+ }
+
+ // Check if we need to set '*pFirstBlockAndBeforeSideEffect' to false.
+ // If we encounter a tree with a call in it
+ // or if we see an assignment to global we set it to false.
+ //
+ // If we are already set to false then we can skip these checks
+ //
+ if (*pFirstBlockAndBeforeSideEffect)
+ {
+ // For this purpose, we only care about memory side effects. We assume that expressions will
+ // be hoisted so that they are evaluated in the same order as they would have been in the loop,
+ // and therefore throw exceptions in the same order. (So we don't use GTF_GLOBALLY_VISIBLE_SIDE_EFFECTS
+ // here, since that includes exceptions.)
+ if (tree->gtFlags & GTF_CALL)
+ {
+ *pFirstBlockAndBeforeSideEffect = false;
+ }
+ else if (tree->OperIsAssignment())
+ {
+ // If the LHS of the assignment has a global reference, then assume it's a global side effect.
+ GenTreePtr lhs = tree->gtOp.gtOp1;
+ if (lhs->gtFlags & GTF_GLOB_REF)
+ {
+ *pFirstBlockAndBeforeSideEffect = false;
+ }
+ }
+ else if (tree->OperIsCopyBlkOp())
+ {
+ GenTreePtr args = tree->gtOp.gtOp1;
+ assert(args->OperGet() == GT_LIST);
+ if (args->gtOp.gtOp1->gtFlags & GTF_GLOB_REF)
+ {
+ *pFirstBlockAndBeforeSideEffect = false;
+ }
+ }
+ }
+
+ // If this 'tree' is hoistable then we return and the caller will
+ // decide to hoist it as part of larger hoistable expression.
+ //
+ if (!treeIsHoistable)
+ {
+ // We are not hoistable so we will now hoist any hoistable children.
+ //
+ for (unsigned childNum = 0; childNum < nChildren; childNum++)
+ {
+ if (childrenHoistable[childNum])
+ {
+ // We can't hoist the LHS of an assignment, isn't a real use.
+ if (childNum == 0 && (tree->OperIsAssignment()))
+ {
+ continue;
+ }
+
+ GenTreePtr child = tree->GetChild(childNum);
+
+ // We try to hoist this 'child' tree
+ optHoistCandidate(child, lnum, hoistCtxt);
+ }
+ }
+ }
+
+ *pHoistable = treeIsHoistable;
+ return treeIsInvariant;
+}
+
+void Compiler::optHoistCandidate(GenTreePtr tree, unsigned lnum, LoopHoistContext* hoistCtxt)
+{
+ if (lnum == BasicBlock::NOT_IN_LOOP)
+ {
+ // The hoisted expression isn't valid at any loop head so don't hoist this expression.
+ return;
+ }
+
+ // The outer loop also must be suitable for hoisting...
+ if ((optLoopTable[lnum].lpFlags & LPFLG_HOISTABLE) == 0)
+ {
+ return;
+ }
+
+ // If the hoisted expression isn't valid at this loop head then break
+ if (!optTreeIsValidAtLoopHead(tree, lnum))
+ {
+ return;
+ }
+
+ // It must pass the hoistable profitablity tests for this loop level
+ if (!optIsProfitableToHoistableTree(tree, lnum))
+ {
+ return;
+ }
+
+ bool b;
+ if (hoistCtxt->m_hoistedInParentLoops.Lookup(tree->gtVNPair.GetLiberal(), &b))
+ {
+ // already hoisted in a parent loop, so don't hoist this expression.
+ return;
+ }
+
+ if (hoistCtxt->GetHoistedInCurLoop(this)->Lookup(tree->gtVNPair.GetLiberal(), &b))
+ {
+ // already hoisted this expression in the current loop, so don't hoist this expression.
+ return;
+ }
+
+ // Expression can be hoisted
+ optPerformHoistExpr(tree, lnum);
+
+ // Increment lpHoistedExprCount or lpHoistedFPExprCount
+ if (!varTypeIsFloating(tree->TypeGet()))
+ {
+ optLoopTable[lnum].lpHoistedExprCount++;
+#ifndef _TARGET_64BIT_
+ // For our 32-bit targets Long types take two registers.
+ if (varTypeIsLong(tree->TypeGet()))
+ {
+ optLoopTable[lnum].lpHoistedExprCount++;
+ }
+#endif
+ }
+ else // Floating point expr hoisted
+ {
+ optLoopTable[lnum].lpHoistedFPExprCount++;
+ }
+
+ // Record the hoisted expression in hoistCtxt
+ hoistCtxt->GetHoistedInCurLoop(this)->Set(tree->gtVNPair.GetLiberal(), true);
+}
+
+bool Compiler::optVNIsLoopInvariant(ValueNum vn, unsigned lnum, VNToBoolMap* loopVnInvariantCache)
+{
+ // If it is not a VN, is not loop-invariant.
+ if (vn == ValueNumStore::NoVN)
+ {
+ return false;
+ }
+
+ // We'll always short-circuit constants.
+ if (vnStore->IsVNConstant(vn) || vn == vnStore->VNForVoid())
+ {
+ return true;
+ }
+
+ // If we've done this query previously, don't repeat.
+ bool previousRes = false;
+ if (loopVnInvariantCache->Lookup(vn, &previousRes))
+ {
+ return previousRes;
+ }
+
+ bool res = true;
+ VNFuncApp funcApp;
+ if (vnStore->GetVNFunc(vn, &funcApp))
+ {
+ if (funcApp.m_func == VNF_PhiDef)
+ {
+ // First, make sure it's a "proper" phi -- the definition is a Phi application.
+ VNFuncApp phiDefValFuncApp;
+ if (!vnStore->GetVNFunc(funcApp.m_args[2], &phiDefValFuncApp) || phiDefValFuncApp.m_func != VNF_Phi)
+ {
+ // It's not *really* a definition, rather a pass-through of some other VN.
+ // (This could occur, say if both sides of an if-then-else diamond made the
+ // same assignment to a variable.)
+ res = optVNIsLoopInvariant(funcApp.m_args[2], lnum, loopVnInvariantCache);
+ }
+ else
+ {
+ // Is the definition within the loop? If so, is not loop-invariant.
+ unsigned lclNum = funcApp.m_args[0];
+ unsigned ssaNum = funcApp.m_args[1];
+ LclSsaVarDsc* ssaDef = lvaTable[lclNum].GetPerSsaData(ssaNum);
+ res = !optLoopContains(lnum, ssaDef->m_defLoc.m_blk->bbNatLoopNum);
+ }
+ }
+ else if (funcApp.m_func == VNF_PhiHeapDef)
+ {
+ BasicBlock* defnBlk = reinterpret_cast<BasicBlock*>(vnStore->ConstantValue<ssize_t>(funcApp.m_args[0]));
+ res = !optLoopContains(lnum, defnBlk->bbNatLoopNum);
+ }
+ else
+ {
+ for (unsigned i = 0; i < funcApp.m_arity; i++)
+ {
+ // TODO-CQ: We need to either make sure that *all* VN functions
+ // always take VN args, or else have a list of arg positions to exempt, as implicitly
+ // constant.
+ if (!optVNIsLoopInvariant(funcApp.m_args[i], lnum, loopVnInvariantCache))
+ {
+ res = false;
+ break;
+ }
+ }
+ }
+ }
+ else
+ {
+ // Non-function "new, unique" VN's may be annotated with the loop nest where
+ // their definition occurs.
+ BasicBlock::loopNumber vnLoopNum = vnStore->LoopOfVN(vn);
+
+ if (vnLoopNum == MAX_LOOP_NUM)
+ {
+ res = false;
+ }
+ else
+ {
+ res = !optLoopContains(lnum, vnLoopNum);
+ }
+ }
+
+ loopVnInvariantCache->Set(vn, res);
+ return res;
+}
+
+bool Compiler::optTreeIsValidAtLoopHead(GenTreePtr tree, unsigned lnum)
+{
+ if (tree->OperIsLocal())
+ {
+ GenTreeLclVarCommon* lclVar = tree->AsLclVarCommon();
+ unsigned lclNum = lclVar->gtLclNum;
+
+ // The lvlVar must be have an Ssa tracked lifetime
+ if (fgExcludeFromSsa(lclNum))
+ {
+ return false;
+ }
+
+ // If the loop does not contains the SSA def we can hoist it.
+ if (!optLoopTable[lnum].lpContains(lvaTable[lclNum].GetPerSsaData(lclVar->GetSsaNum())->m_defLoc.m_blk))
+ {
+ return true;
+ }
+ }
+ else if (tree->OperIsConst())
+ {
+ return true;
+ }
+ else // If every one of the children nodes are valid at this Loop's Head.
+ {
+ unsigned nChildren = tree->NumChildren();
+ for (unsigned childNum = 0; childNum < nChildren; childNum++)
+ {
+ if (!optTreeIsValidAtLoopHead(tree->GetChild(childNum), lnum))
+ {
+ return false;
+ }
+ }
+ return true;
+ }
+ return false;
+}
+
+/*****************************************************************************
+ *
+ * Creates a pre-header block for the given loop - a preheader is a BBJ_NONE
+ * header. The pre-header will replace the current lpHead in the loop table.
+ * The loop has to be a do-while loop. Thus, all blocks dominated by lpHead
+ * will also be dominated by the loop-top, lpHead->bbNext.
+ *
+ */
+
+void Compiler::fgCreateLoopPreHeader(unsigned lnum)
+{
+ LoopDsc* pLoopDsc = &optLoopTable[lnum];
+
+ /* This loop has to be a "do-while" loop */
+
+ assert(pLoopDsc->lpFlags & LPFLG_DO_WHILE);
+
+ /* Have we already created a loop-preheader block? */
+
+ if (pLoopDsc->lpFlags & LPFLG_HAS_PREHEAD)
+ {
+ return;
+ }
+
+ BasicBlock* head = pLoopDsc->lpHead;
+ BasicBlock* top = pLoopDsc->lpTop;
+ BasicBlock* entry = pLoopDsc->lpEntry;
+
+ // if 'entry' and 'head' are in different try regions then we won't be able to hoist
+ if (!BasicBlock::sameTryRegion(head, entry))
+ {
+ return;
+ }
+
+ // Ensure that lpHead always dominates lpEntry
+
+ noway_assert(fgDominate(head, entry));
+
+ /* Get hold of the first block of the loop body */
+
+ assert(top == entry);
+
+ /* Allocate a new basic block */
+
+ BasicBlock* preHead = bbNewBasicBlock(BBJ_NONE);
+ preHead->bbFlags |= BBF_INTERNAL | BBF_LOOP_PREHEADER;
+
+ // Must set IL code offset
+ preHead->bbCodeOffs = top->bbCodeOffs;
+
+ // Set the default value of the preHead weight in case we don't have
+ // valid profile data and since this blocks weight is just an estimate
+ // we clear any BBF_PROF_WEIGHT flag that we may have picked up from head.
+ //
+ preHead->inheritWeight(head);
+ preHead->bbFlags &= ~BBF_PROF_WEIGHT;
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nCreated PreHeader (BB%02u) for loop L%02u (BB%02u - BB%02u), with weight = %s\n", preHead->bbNum,
+ lnum, top->bbNum, pLoopDsc->lpBottom->bbNum, refCntWtd2str(preHead->getBBWeight(this)));
+ }
+#endif
+
+ // The preheader block is part of the containing loop (if any).
+ preHead->bbNatLoopNum = pLoopDsc->lpParent;
+
+ if (fgIsUsingProfileWeights() && (head->bbJumpKind == BBJ_COND))
+ {
+ if ((head->bbWeight == 0) || (head->bbNext->bbWeight == 0))
+ {
+ preHead->bbWeight = 0;
+ preHead->bbFlags |= BBF_RUN_RARELY;
+ }
+ else
+ {
+ bool allValidProfileWeights = ((head->bbFlags & BBF_PROF_WEIGHT) != 0) &&
+ ((head->bbJumpDest->bbFlags & BBF_PROF_WEIGHT) != 0) &&
+ ((head->bbNext->bbFlags & BBF_PROF_WEIGHT) != 0);
+
+ if (allValidProfileWeights)
+ {
+ double loopEnteredCount;
+ double loopSkippedCount;
+
+ if (fgHaveValidEdgeWeights)
+ {
+ flowList* edgeToNext = fgGetPredForBlock(head->bbNext, head);
+ flowList* edgeToJump = fgGetPredForBlock(head->bbJumpDest, head);
+ noway_assert(edgeToNext != nullptr);
+ noway_assert(edgeToJump != nullptr);
+
+ loopEnteredCount =
+ ((double)edgeToNext->flEdgeWeightMin + (double)edgeToNext->flEdgeWeightMax) / 2.0;
+ loopSkippedCount =
+ ((double)edgeToJump->flEdgeWeightMin + (double)edgeToJump->flEdgeWeightMax) / 2.0;
+ }
+ else
+ {
+ loopEnteredCount = (double)head->bbNext->bbWeight;
+ loopSkippedCount = (double)head->bbJumpDest->bbWeight;
+ }
+
+ double loopTakenRatio = loopEnteredCount / (loopEnteredCount + loopSkippedCount);
+
+ // Calculate a good approximation of the preHead's block weight
+ unsigned preHeadWeight = (unsigned)(((double)head->bbWeight * loopTakenRatio) + 0.5);
+ preHead->setBBWeight(max(preHeadWeight, 1));
+ noway_assert(!preHead->isRunRarely());
+ }
+ }
+ }
+
+ // Link in the preHead block.
+ fgInsertBBbefore(top, preHead);
+
+ // Ideally we would re-run SSA and VN if we optimized by doing loop hoisting.
+ // However, that is too expensive at this point. Instead, we update the phi
+ // node block references, if we created pre-header block due to hoisting.
+ // This is sufficient because any definition participating in SSA that flowed
+ // into the phi via the loop header block will now flow through the preheader
+ // block from the header block.
+
+ for (GenTreePtr stmt = top->bbTreeList; stmt; stmt = stmt->gtNext)
+ {
+ GenTreePtr tree = stmt->gtStmt.gtStmtExpr;
+ if (tree->OperGet() != GT_ASG)
+ {
+ break;
+ }
+ GenTreePtr op2 = tree->gtGetOp2();
+ if (op2->OperGet() != GT_PHI)
+ {
+ break;
+ }
+ GenTreeArgList* args = op2->gtGetOp1()->AsArgList();
+ while (args != nullptr)
+ {
+ GenTreePhiArg* phiArg = args->Current()->AsPhiArg();
+ if (phiArg->gtPredBB == head)
+ {
+ phiArg->gtPredBB = preHead;
+ }
+ args = args->Rest();
+ }
+ }
+
+ // The handler can't begin at the top of the loop. If it did, it would be incorrect
+ // to set the handler index on the pre header without updating the exception table.
+ noway_assert(!top->hasHndIndex() || fgFirstBlockOfHandler(top) != top);
+
+ // Update the EH table to make the hoisted block part of the loop's EH block.
+ fgExtendEHRegionBefore(top);
+
+ // TODO-CQ: set dominators for this block, to allow loop optimizations requiring them
+ // (e.g: hoisting expression in a loop with the same 'head' as this one)
+
+ /* Update the loop entry */
+
+ pLoopDsc->lpHead = preHead;
+ pLoopDsc->lpFlags |= LPFLG_HAS_PREHEAD;
+
+ /* The new block becomes the 'head' of the loop - update bbRefs and bbPreds
+ All predecessors of 'beg', (which is the entry in the loop)
+ now have to jump to 'preHead', unless they are dominated by 'head' */
+
+ preHead->bbRefs = 0;
+ fgAddRefPred(preHead, head);
+ bool checkNestedLoops = false;
+
+ for (flowList* pred = top->bbPreds; pred; pred = pred->flNext)
+ {
+ BasicBlock* predBlock = pred->flBlock;
+
+ if (fgDominate(top, predBlock))
+ {
+ // note: if 'top' dominates predBlock, 'head' dominates predBlock too
+ // (we know that 'head' dominates 'top'), but using 'top' instead of
+ // 'head' in the test allows us to not enter here if 'predBlock == head'
+
+ if (predBlock != pLoopDsc->lpBottom)
+ {
+ noway_assert(predBlock != head);
+ checkNestedLoops = true;
+ }
+ continue;
+ }
+
+ switch (predBlock->bbJumpKind)
+ {
+ case BBJ_NONE:
+ noway_assert(predBlock == head);
+ break;
+
+ case BBJ_COND:
+ if (predBlock == head)
+ {
+ noway_assert(predBlock->bbJumpDest != top);
+ break;
+ }
+ __fallthrough;
+
+ case BBJ_ALWAYS:
+ case BBJ_EHCATCHRET:
+ noway_assert(predBlock->bbJumpDest == top);
+ predBlock->bbJumpDest = preHead;
+ preHead->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL;
+
+ if (predBlock == head)
+ {
+ // This is essentially the same case of predBlock being a BBJ_NONE. We may not be
+ // able to make this a BBJ_NONE if it's an internal block (for example, a leave).
+ // Just break, pred will be removed after switch.
+ }
+ else
+ {
+ fgRemoveRefPred(top, predBlock);
+ fgAddRefPred(preHead, predBlock);
+ }
+ break;
+
+ case BBJ_SWITCH:
+ unsigned jumpCnt;
+ jumpCnt = predBlock->bbJumpSwt->bbsCount;
+ BasicBlock** jumpTab;
+ jumpTab = predBlock->bbJumpSwt->bbsDstTab;
+
+ do
+ {
+ assert(*jumpTab);
+ if ((*jumpTab) == top)
+ {
+ (*jumpTab) = preHead;
+
+ fgRemoveRefPred(top, predBlock);
+ fgAddRefPred(preHead, predBlock);
+ preHead->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL;
+ }
+ } while (++jumpTab, --jumpCnt);
+
+ default:
+ noway_assert(!"Unexpected bbJumpKind");
+ break;
+ }
+ }
+
+ noway_assert(!fgGetPredForBlock(top, preHead));
+ fgRemoveRefPred(top, head);
+ fgAddRefPred(top, preHead);
+
+ /*
+ If we found at least one back-edge in the flowgraph pointing to the top/entry of the loop
+ (other than the back-edge of the loop we are considering) then we likely have nested
+ do-while loops with the same entry block and inserting the preheader block changes the head
+ of all the nested loops. Now we will update this piece of information in the loop table, and
+ mark all nested loops as having a preheader (the preheader block can be shared among all nested
+ do-while loops with the same entry block).
+ */
+ if (checkNestedLoops)
+ {
+ for (unsigned l = 0; l < optLoopCount; l++)
+ {
+ if (optLoopTable[l].lpHead == head)
+ {
+ noway_assert(l != lnum); // pLoopDsc->lpHead was already changed from 'head' to 'preHead'
+ noway_assert(optLoopTable[l].lpEntry == top);
+ optUpdateLoopHead(l, optLoopTable[l].lpHead, preHead);
+ optLoopTable[l].lpFlags |= LPFLG_HAS_PREHEAD;
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Same PreHeader (BB%02u) can be used for loop L%02u (BB%02u - BB%02u)\n\n", preHead->bbNum,
+ l, top->bbNum, optLoopTable[l].lpBottom->bbNum);
+ }
+#endif
+ }
+ }
+ }
+}
+
+bool Compiler::optBlockIsLoopEntry(BasicBlock* blk, unsigned* pLnum)
+{
+ unsigned lnum = blk->bbNatLoopNum;
+ while (lnum != BasicBlock::NOT_IN_LOOP)
+ {
+ if (optLoopTable[lnum].lpEntry == blk)
+ {
+ *pLnum = lnum;
+ return true;
+ }
+ lnum = optLoopTable[lnum].lpParent;
+ }
+ return false;
+}
+
+void Compiler::optComputeLoopSideEffects()
+{
+ unsigned lnum;
+ for (lnum = 0; lnum < optLoopCount; lnum++)
+ {
+ VarSetOps::AssignNoCopy(this, optLoopTable[lnum].lpVarInOut, VarSetOps::MakeEmpty(this));
+ VarSetOps::AssignNoCopy(this, optLoopTable[lnum].lpVarUseDef, VarSetOps::MakeEmpty(this));
+ optLoopTable[lnum].lpContainsCall = false;
+ }
+
+ for (lnum = 0; lnum < optLoopCount; lnum++)
+ {
+ if (optLoopTable[lnum].lpFlags & LPFLG_REMOVED)
+ {
+ continue;
+ }
+
+ if (optLoopTable[lnum].lpParent == BasicBlock::NOT_IN_LOOP)
+ { // Is outermost...
+ optComputeLoopNestSideEffects(lnum);
+ }
+ }
+
+ VarSetOps::AssignNoCopy(this, lvaFloatVars, VarSetOps::MakeEmpty(this));
+#ifndef _TARGET_64BIT_
+ VarSetOps::AssignNoCopy(this, lvaLongVars, VarSetOps::MakeEmpty(this));
+#endif
+
+ for (unsigned i = 0; i < lvaCount; i++)
+ {
+ LclVarDsc* varDsc = &lvaTable[i];
+ if (varDsc->lvTracked)
+ {
+ if (varTypeIsFloating(varDsc->lvType))
+ {
+ VarSetOps::AddElemD(this, lvaFloatVars, varDsc->lvVarIndex);
+ }
+#ifndef _TARGET_64BIT_
+ else if (varTypeIsLong(varDsc->lvType))
+ {
+ VarSetOps::AddElemD(this, lvaLongVars, varDsc->lvVarIndex);
+ }
+#endif
+ }
+ }
+}
+
+void Compiler::optComputeLoopNestSideEffects(unsigned lnum)
+{
+ assert(optLoopTable[lnum].lpParent == BasicBlock::NOT_IN_LOOP); // Requires: lnum is outermost.
+ BasicBlock* botNext = optLoopTable[lnum].lpBottom->bbNext;
+ for (BasicBlock* bbInLoop = optLoopTable[lnum].lpFirst; bbInLoop != botNext; bbInLoop = bbInLoop->bbNext)
+ {
+ optComputeLoopSideEffectsOfBlock(bbInLoop);
+ }
+}
+
+void Compiler::optComputeLoopSideEffectsOfBlock(BasicBlock* blk)
+{
+ unsigned mostNestedLoop = blk->bbNatLoopNum;
+ assert(mostNestedLoop != BasicBlock::NOT_IN_LOOP);
+
+ AddVariableLivenessAllContainingLoops(mostNestedLoop, blk);
+
+ bool heapHavoc = false; // True ==> there's a call or a memory store that has arbitrary heap effects.
+
+ // Now iterate over the remaining statements, and their trees.
+ for (GenTreePtr stmts = blk->FirstNonPhiDef(); (stmts != nullptr); stmts = stmts->gtNext)
+ {
+ for (GenTreePtr tree = stmts->gtStmt.gtStmtList; (tree != nullptr); tree = tree->gtNext)
+ {
+ genTreeOps oper = tree->OperGet();
+
+ // Even after we set heapHavoc we still may want to know if a loop contains calls
+ if (heapHavoc)
+ {
+ if (oper == GT_CALL)
+ {
+ // Record that this loop contains a call
+ AddContainsCallAllContainingLoops(mostNestedLoop);
+ }
+
+ // If we just set lpContainsCall or it was previously set
+ if (optLoopTable[mostNestedLoop].lpContainsCall)
+ {
+ // We can early exit after both heapHavoc and lpContainsCall are both set to true.
+ break;
+ }
+
+ // We are just looking for GT_CALL nodes after heapHavoc was set.
+ continue;
+ }
+
+ // otherwise heapHavoc is not set
+ assert(!heapHavoc);
+
+ // This body is a distillation of the heap-side effect code of value numbering.
+ // We also do a very limited analysis if byref PtrTo values, to cover some cases
+ // that the compiler creates.
+
+ if (GenTree::OperIsAssignment(oper))
+ {
+ GenTreePtr lhs = tree->gtOp.gtOp1->gtEffectiveVal(/*commaOnly*/ true);
+
+ if (lhs->OperGet() == GT_IND)
+ {
+ GenTreePtr arg = lhs->gtOp.gtOp1->gtEffectiveVal(/*commaOnly*/ true);
+ FieldSeqNode* fldSeqArrElem = nullptr;
+
+ if ((tree->gtFlags & GTF_IND_VOLATILE) != 0)
+ {
+ heapHavoc = true;
+ continue;
+ }
+
+ ArrayInfo arrInfo;
+
+ if (arg->TypeGet() == TYP_BYREF && arg->OperGet() == GT_LCL_VAR)
+ {
+ // If it's a local byref for which we recorded a value number, use that...
+ GenTreeLclVar* argLcl = arg->AsLclVar();
+ if (!fgExcludeFromSsa(argLcl->GetLclNum()))
+ {
+ ValueNum argVN =
+ lvaTable[argLcl->GetLclNum()].GetPerSsaData(argLcl->GetSsaNum())->m_vnPair.GetLiberal();
+ VNFuncApp funcApp;
+ if (argVN != ValueNumStore::NoVN && vnStore->GetVNFunc(argVN, &funcApp) &&
+ funcApp.m_func == VNF_PtrToArrElem)
+ {
+ assert(vnStore->IsVNHandle(funcApp.m_args[0]));
+ CORINFO_CLASS_HANDLE elemType =
+ CORINFO_CLASS_HANDLE(vnStore->ConstantValue<size_t>(funcApp.m_args[0]));
+ AddModifiedElemTypeAllContainingLoops(mostNestedLoop, elemType);
+ // Don't set heapHavoc below.
+ continue;
+ }
+ }
+ // Otherwise...
+ heapHavoc = true;
+ }
+ // Is the LHS an array index expression?
+ else if (lhs->ParseArrayElemForm(this, &arrInfo, &fldSeqArrElem))
+ {
+ // We actually ignore "fldSeq" -- any modification to an S[], at any
+ // field of "S", will lose all information about the array type.
+ CORINFO_CLASS_HANDLE elemTypeEq = EncodeElemType(arrInfo.m_elemType, arrInfo.m_elemStructType);
+ AddModifiedElemTypeAllContainingLoops(mostNestedLoop, elemTypeEq);
+ }
+ else
+ {
+ // We are only interested in IsFieldAddr()'s fldSeq out parameter.
+ //
+ GenTreePtr obj = nullptr; // unused
+ GenTreePtr staticOffset = nullptr; // unused
+ FieldSeqNode* fldSeq = nullptr;
+
+ if (arg->IsFieldAddr(this, &obj, &staticOffset, &fldSeq) &&
+ (fldSeq != FieldSeqStore::NotAField()))
+ {
+ // Get the first (object) field from field seq. Heap[field] will yield the "field map".
+ assert(fldSeq != nullptr);
+ if (fldSeq->IsFirstElemFieldSeq())
+ {
+ fldSeq = fldSeq->m_next;
+ assert(fldSeq != nullptr);
+ }
+
+ AddModifiedFieldAllContainingLoops(mostNestedLoop, fldSeq->m_fieldHnd);
+ }
+ else
+ {
+ heapHavoc = true;
+ }
+ }
+ }
+ else if (lhs->OperIsBlk())
+ {
+ GenTreeLclVarCommon* lclVarTree;
+ bool isEntire;
+ if (!tree->DefinesLocal(this, &lclVarTree, &isEntire))
+ {
+ // For now, assume arbitrary side effects on the heap...
+ heapHavoc = true;
+ }
+ }
+ else if (lhs->OperGet() == GT_CLS_VAR)
+ {
+ AddModifiedFieldAllContainingLoops(mostNestedLoop, lhs->gtClsVar.gtClsVarHnd);
+ }
+ // Otherwise, must be local lhs form. I should assert that.
+ else if (lhs->OperGet() == GT_LCL_VAR)
+ {
+ GenTreeLclVar* lhsLcl = lhs->AsLclVar();
+ GenTreePtr rhs = tree->gtOp.gtOp2;
+ ValueNum rhsVN = rhs->gtVNPair.GetLiberal();
+ // If we gave the RHS a value number, propagate it.
+ if (rhsVN != ValueNumStore::NoVN)
+ {
+ rhsVN = vnStore->VNNormVal(rhsVN);
+ if (!fgExcludeFromSsa(lhsLcl->GetLclNum()))
+ {
+ lvaTable[lhsLcl->GetLclNum()]
+ .GetPerSsaData(lhsLcl->GetSsaNum())
+ ->m_vnPair.SetLiberal(rhsVN);
+ }
+ }
+ }
+ }
+ else // not GenTree::OperIsAssignment(oper)
+ {
+ switch (oper)
+ {
+ case GT_COMMA:
+ tree->gtVNPair = tree->gtOp.gtOp2->gtVNPair;
+ break;
+
+ case GT_ADDR:
+ // Is it an addr of a array index expression?
+ {
+ GenTreePtr addrArg = tree->gtOp.gtOp1;
+ if (addrArg->OperGet() == GT_IND)
+ {
+ // Is the LHS an array index expression?
+ if (addrArg->gtFlags & GTF_IND_ARR_INDEX)
+ {
+ ArrayInfo arrInfo;
+ bool b = GetArrayInfoMap()->Lookup(addrArg, &arrInfo);
+ assert(b);
+ CORINFO_CLASS_HANDLE elemType =
+ EncodeElemType(arrInfo.m_elemType, arrInfo.m_elemStructType);
+ tree->gtVNPair.SetBoth(
+ vnStore->VNForFunc(TYP_BYREF, VNF_PtrToArrElem,
+ vnStore->VNForHandle(ssize_t(elemType), GTF_ICON_CLASS_HDL),
+ // The rest are dummy arguments.
+ vnStore->VNForNull(), vnStore->VNForNull(),
+ vnStore->VNForNull()));
+ }
+ }
+ }
+ break;
+
+ case GT_LOCKADD: // Binop
+ case GT_XADD: // Binop
+ case GT_XCHG: // Binop
+ case GT_CMPXCHG: // Specialop
+ {
+ heapHavoc = true;
+ }
+ break;
+
+ case GT_CALL:
+ {
+ GenTreeCall* call = tree->AsCall();
+
+ // Record that this loop contains a call
+ AddContainsCallAllContainingLoops(mostNestedLoop);
+
+ if (call->gtCallType == CT_HELPER)
+ {
+ CorInfoHelpFunc helpFunc = eeGetHelperNum(call->gtCallMethHnd);
+ if (s_helperCallProperties.MutatesHeap(helpFunc))
+ {
+ heapHavoc = true;
+ }
+ else if (s_helperCallProperties.MayRunCctor(helpFunc))
+ {
+ // If the call is labeled as "Hoistable", then we've checked the
+ // class that would be constructed, and it is not precise-init, so
+ // the cctor will not be run by this call. Otherwise, it might be,
+ // and might have arbitrary side effects.
+ if ((tree->gtFlags & GTF_CALL_HOISTABLE) == 0)
+ {
+ heapHavoc = true;
+ }
+ }
+ }
+ else
+ {
+ heapHavoc = true;
+ }
+ break;
+ }
+
+ default:
+ // All other gtOper node kinds, leave 'heapHavoc' unchanged (i.e. false)
+ break;
+ }
+ }
+ }
+ }
+
+ if (heapHavoc)
+ {
+ // Record that all loops containing this block have heap havoc effects.
+ unsigned lnum = mostNestedLoop;
+ while (lnum != BasicBlock::NOT_IN_LOOP)
+ {
+ optLoopTable[lnum].lpLoopHasHeapHavoc = true;
+ lnum = optLoopTable[lnum].lpParent;
+ }
+ }
+}
+
+// Marks the containsCall information to "lnum" and any parent loops.
+void Compiler::AddContainsCallAllContainingLoops(unsigned lnum)
+{
+ assert(0 <= lnum && lnum < optLoopCount);
+ while (lnum != BasicBlock::NOT_IN_LOOP)
+ {
+ optLoopTable[lnum].lpContainsCall = true;
+ lnum = optLoopTable[lnum].lpParent;
+ }
+}
+
+// Adds the variable liveness information for 'blk' to 'this' LoopDsc
+void Compiler::LoopDsc::AddVariableLiveness(Compiler* comp, BasicBlock* blk)
+{
+ VarSetOps::UnionD(comp, this->lpVarInOut, blk->bbLiveIn);
+ VarSetOps::UnionD(comp, this->lpVarInOut, blk->bbLiveOut);
+
+ VarSetOps::UnionD(comp, this->lpVarUseDef, blk->bbVarUse);
+ VarSetOps::UnionD(comp, this->lpVarUseDef, blk->bbVarDef);
+}
+
+// Adds the variable liveness information for 'blk' to "lnum" and any parent loops.
+void Compiler::AddVariableLivenessAllContainingLoops(unsigned lnum, BasicBlock* blk)
+{
+ assert(0 <= lnum && lnum < optLoopCount);
+ while (lnum != BasicBlock::NOT_IN_LOOP)
+ {
+ optLoopTable[lnum].AddVariableLiveness(this, blk);
+ lnum = optLoopTable[lnum].lpParent;
+ }
+}
+
+// Adds "fldHnd" to the set of modified fields of "lnum" and any parent loops.
+void Compiler::AddModifiedFieldAllContainingLoops(unsigned lnum, CORINFO_FIELD_HANDLE fldHnd)
+{
+ assert(0 <= lnum && lnum < optLoopCount);
+ while (lnum != BasicBlock::NOT_IN_LOOP)
+ {
+ optLoopTable[lnum].AddModifiedField(this, fldHnd);
+ lnum = optLoopTable[lnum].lpParent;
+ }
+}
+
+// Adds "elemType" to the set of modified array element types of "lnum" and any parent loops.
+void Compiler::AddModifiedElemTypeAllContainingLoops(unsigned lnum, CORINFO_CLASS_HANDLE elemClsHnd)
+{
+ assert(0 <= lnum && lnum < optLoopCount);
+ while (lnum != BasicBlock::NOT_IN_LOOP)
+ {
+ optLoopTable[lnum].AddModifiedElemType(this, elemClsHnd);
+ lnum = optLoopTable[lnum].lpParent;
+ }
+}
+
+/*****************************************************************************
+ *
+ * Helper passed to Compiler::fgWalkAllTreesPre() to decrement the LclVar usage counts
+ * The 'keepList'is either a single tree or a list of trees that are formed by
+ * one or more GT_COMMA nodes. It is the kept side-effects as returned by the
+ * gtExtractSideEffList method.
+ */
+
+/* static */
+Compiler::fgWalkResult Compiler::optRemoveTreeVisitor(GenTreePtr* pTree, fgWalkData* data)
+{
+ GenTreePtr tree = *pTree;
+ Compiler* comp = data->compiler;
+ GenTreePtr keepList = (GenTreePtr)(data->pCallbackData);
+
+ // We may have a non-NULL side effect list that is being kept
+ //
+ if (keepList)
+ {
+ GenTreePtr keptTree = keepList;
+ while (keptTree->OperGet() == GT_COMMA)
+ {
+ assert(keptTree->OperKind() & GTK_SMPOP);
+ GenTreePtr op1 = keptTree->gtOp.gtOp1;
+ GenTreePtr op2 = keptTree->gtGetOp2();
+
+ // For the GT_COMMA case the op1 is part of the orginal CSE tree
+ // that is being kept because it contains some side-effect
+ //
+ if (tree == op1)
+ {
+ // This tree and all of its sub trees are being kept.
+ return WALK_SKIP_SUBTREES;
+ }
+
+ // For the GT_COMMA case the op2 are the remaining side-effects of the orginal CSE tree
+ // which can again be another GT_COMMA or the final side-effect part
+ //
+ keptTree = op2;
+ }
+ if (tree == keptTree)
+ {
+ // This tree and all of its sub trees are being kept.
+ return WALK_SKIP_SUBTREES;
+ }
+ }
+
+ // This node is being removed from the graph of GenTreePtr
+
+ // Look for any local variable references
+
+ if (tree->gtOper == GT_LCL_VAR && comp->lvaLocalVarRefCounted)
+ {
+ unsigned lclNum;
+ LclVarDsc* varDsc;
+
+ /* This variable ref is going away, decrease its ref counts */
+
+ lclNum = tree->gtLclVarCommon.gtLclNum;
+ assert(lclNum < comp->lvaCount);
+ varDsc = comp->lvaTable + lclNum;
+
+ // make sure it's been initialized
+ assert(comp->compCurBB != nullptr);
+ assert(comp->compCurBB->bbWeight <= BB_MAX_WEIGHT);
+
+ /* Decrement its lvRefCnt and lvRefCntWtd */
+
+ // Use getBBWeight to determine the proper block weight.
+ // This impacts the block weights when we have IBC data.
+ varDsc->decRefCnts(comp->compCurBB->getBBWeight(comp), comp);
+ }
+
+ return WALK_CONTINUE;
+}
+
+/*****************************************************************************
+ *
+ * Routine called to decrement the LclVar ref counts when removing a tree
+ * during the remove RangeCheck phase.
+ * This method will decrement the refcounts for any LclVars used below 'deadTree',
+ * unless the node is found in the 'keepList' (which are saved side effects)
+ * The keepList is communicated using the walkData.pCallbackData field
+ * Also the compCurBB must be set to the current BasicBlock which contains
+ * 'deadTree' as we need to fetch the block weight when decrementing the ref counts.
+ */
+
+void Compiler::optRemoveTree(GenTreePtr deadTree, GenTreePtr keepList)
+{
+ // We communicate this value using the walkData.pCallbackData field
+ //
+ fgWalkTreePre(&deadTree, optRemoveTreeVisitor, (void*)keepList);
+}
+
+/*****************************************************************************
+ *
+ * Given an array index node, mark it as not needing a range check.
+ */
+
+void Compiler::optRemoveRangeCheck(
+ GenTreePtr tree, GenTreePtr stmt, bool updateCSEcounts, unsigned sideEffFlags, bool forceRemove)
+{
+ GenTreePtr add1;
+ GenTreePtr* addp;
+
+ GenTreePtr nop1;
+ GenTreePtr* nopp;
+
+ GenTreePtr icon;
+ GenTreePtr mult;
+
+ GenTreePtr base;
+
+ ssize_t ival;
+
+#if !REARRANGE_ADDS
+ noway_assert(!"can't remove range checks without REARRANGE_ADDS right now");
+#endif
+
+ noway_assert(stmt->gtOper == GT_STMT);
+ noway_assert(tree->gtOper == GT_COMMA);
+ noway_assert(tree->gtOp.gtOp1->gtOper == GT_ARR_BOUNDS_CHECK);
+ noway_assert(forceRemove || optIsRangeCheckRemovable(tree->gtOp.gtOp1));
+
+ GenTreeBoundsChk* bndsChk = tree->gtOp.gtOp1->AsBoundsChk();
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Before optRemoveRangeCheck:\n");
+ gtDispTree(tree);
+ }
+#endif
+
+ GenTreePtr sideEffList = nullptr;
+ if (sideEffFlags)
+ {
+ gtExtractSideEffList(tree->gtOp.gtOp1, &sideEffList, sideEffFlags);
+ }
+
+ // Decrement the ref counts for any LclVars that are being deleted
+ //
+ optRemoveTree(tree->gtOp.gtOp1, sideEffList);
+
+ // Just replace the bndsChk with a NOP as an operand to the GT_COMMA, if there are no side effects.
+ tree->gtOp.gtOp1 = (sideEffList != nullptr) ? sideEffList : gtNewNothingNode();
+
+ // TODO-CQ: We should also remove the GT_COMMA, but in any case we can no longer CSE the GT_COMMA.
+ tree->gtFlags |= GTF_DONT_CSE;
+
+ /* Recalculate the gtCostSz, etc... */
+ gtSetStmtInfo(stmt);
+
+ /* Re-thread the nodes if necessary */
+ if (fgStmtListThreaded)
+ {
+ fgSetStmtSeq(stmt);
+ }
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("After optRemoveRangeCheck:\n");
+ gtDispTree(tree);
+ }
+#endif
+}
+
+/*****************************************************************************
+ * Return the scale in an array reference, given a pointer to the
+ * multiplication node.
+ */
+
+ssize_t Compiler::optGetArrayRefScaleAndIndex(GenTreePtr mul, GenTreePtr* pIndex DEBUGARG(bool bRngChk))
+{
+ assert(mul);
+ assert(mul->gtOper == GT_MUL || mul->gtOper == GT_LSH);
+ assert(mul->gtOp.gtOp2->IsCnsIntOrI());
+
+ ssize_t scale = mul->gtOp.gtOp2->gtIntConCommon.IconValue();
+
+ if (mul->gtOper == GT_LSH)
+ {
+ scale = ((ssize_t)1) << scale;
+ }
+
+ GenTreePtr index = mul->gtOp.gtOp1;
+
+ if (index->gtOper == GT_MUL && index->gtOp.gtOp2->IsCnsIntOrI())
+ {
+ // case of two cascading multiplications for constant int (e.g. * 20 morphed to * 5 * 4):
+ // When index->gtOper is GT_MUL and index->gtOp.gtOp2->gtOper is GT_CNS_INT (i.e. * 5),
+ // we can bump up the scale from 4 to 5*4, and then change index to index->gtOp.gtOp1.
+ // Otherwise, we cannot optimize it. We will simply keep the original scale and index.
+ scale *= index->gtOp.gtOp2->gtIntConCommon.IconValue();
+ index = index->gtOp.gtOp1;
+ }
+
+ assert(!bRngChk || index->gtOper != GT_COMMA);
+
+ if (pIndex)
+ {
+ *pIndex = index;
+ }
+
+ return scale;
+}
+
+/*****************************************************************************
+ * Find the last assignment to of the local variable in the block. Return
+ * RHS or NULL. If any local variable in the RHS has been killed in
+ * intervening code, return NULL. If the variable being searched for is killed
+ * in the intervening code, return NULL.
+ *
+ */
+
+GenTreePtr Compiler::optFindLocalInit(BasicBlock* block,
+ GenTreePtr local,
+ VARSET_TP* pKilledInOut,
+ bool* pLhsRhsKilledAfterInit)
+{
+ assert(pKilledInOut);
+ assert(pLhsRhsKilledAfterInit);
+
+ *pLhsRhsKilledAfterInit = false;
+
+ unsigned LclNum = local->gtLclVarCommon.gtLclNum;
+
+ GenTreePtr list = block->bbTreeList;
+ if (list == nullptr)
+ {
+ return nullptr;
+ }
+
+ GenTreePtr rhs = nullptr;
+ GenTreePtr stmt = list;
+ do
+ {
+ stmt = stmt->gtPrev;
+ if (stmt == nullptr)
+ {
+ break;
+ }
+
+ GenTreePtr tree = stmt->gtStmt.gtStmtExpr;
+ // If we encounter an assignment to a local variable,
+ if ((tree->OperKind() & GTK_ASGOP) && tree->gtOp.gtOp1->gtOper == GT_LCL_VAR)
+ {
+ // And the assigned variable equals the input local,
+ if (tree->gtOp.gtOp1->gtLclVarCommon.gtLclNum == LclNum)
+ {
+ // If the assignment is '=' and it is not a conditional, then return rhs.
+ if (tree->gtOper == GT_ASG && !(tree->gtFlags & GTF_COLON_COND))
+ {
+ rhs = tree->gtOp.gtOp2;
+ }
+ // If the assignment is 'op=' or a conditional equal, then the search ends here,
+ // as we found a kill to the input local.
+ else
+ {
+ *pLhsRhsKilledAfterInit = true;
+ assert(rhs == nullptr);
+ }
+ break;
+ }
+ else
+ {
+ LclVarDsc* varDsc = optIsTrackedLocal(tree->gtOp.gtOp1);
+ if (varDsc == nullptr)
+ {
+ return nullptr;
+ }
+ VarSetOps::AddElemD(this, *pKilledInOut, varDsc->lvVarIndex);
+ }
+ }
+ } while (stmt != list);
+
+ if (rhs == nullptr)
+ {
+ return nullptr;
+ }
+
+ // If any local in the RHS is killed in intervening code, or RHS has an indirection, return NULL.
+ varRefKinds rhsRefs = VR_NONE;
+ VARSET_TP VARSET_INIT_NOCOPY(rhsLocals, VarSetOps::UninitVal());
+ bool b = lvaLclVarRefs(rhs, nullptr, &rhsRefs, &rhsLocals);
+ if (!b || !VarSetOps::IsEmptyIntersection(this, rhsLocals, *pKilledInOut) || (rhsRefs != VR_NONE))
+ {
+ // If RHS has been indirectly referenced, consider it a write and a kill.
+ *pLhsRhsKilledAfterInit = true;
+ return nullptr;
+ }
+
+ return rhs;
+}
+
+/*****************************************************************************
+ *
+ * Return true if "op1" is guaranteed to be less then or equal to "op2".
+ */
+
+#if FANCY_ARRAY_OPT
+
+bool Compiler::optIsNoMore(GenTreePtr op1, GenTreePtr op2, int add1, int add2)
+{
+ if (op1->gtOper == GT_CNS_INT && op2->gtOper == GT_CNS_INT)
+ {
+ add1 += op1->gtIntCon.gtIconVal;
+ add2 += op2->gtIntCon.gtIconVal;
+ }
+ else
+ {
+ /* Check for +/- constant on either operand */
+
+ if (op1->gtOper == GT_ADD && op1->gtOp.gtOp2->gtOper == GT_CNS_INT)
+ {
+ add1 += op1->gtOp.gtOp2->gtIntCon.gtIconVal;
+ op1 = op1->gtOp.gtOp1;
+ }
+
+ if (op2->gtOper == GT_ADD && op2->gtOp.gtOp2->gtOper == GT_CNS_INT)
+ {
+ add2 += op2->gtOp.gtOp2->gtIntCon.gtIconVal;
+ op2 = op2->gtOp.gtOp1;
+ }
+
+ /* We only allow local variable references */
+
+ if (op1->gtOper != GT_LCL_VAR)
+ return false;
+ if (op2->gtOper != GT_LCL_VAR)
+ return false;
+ if (op1->gtLclVarCommon.gtLclNum != op2->gtLclVarCommon.gtLclNum)
+ return false;
+
+ /* NOTE: Caller ensures that this variable has only one def */
+
+ // printf("limit [%d]:\n", add1); gtDispTree(op1);
+ // printf("size [%d]:\n", add2); gtDispTree(op2);
+ // printf("\n");
+ }
+
+ return (bool)(add1 <= add2);
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// optObtainLoopCloningOpts: Identify optimization candidates and update
+// the "context" for array optimizations.
+//
+// Arguments:
+// context - data structure where all loop cloning info is kept. The
+// optInfo fields of the context are updated with the
+// identified optimization candidates.
+//
+void Compiler::optObtainLoopCloningOpts(LoopCloneContext* context)
+{
+ for (unsigned i = 0; i < optLoopCount; i++)
+ {
+ JITDUMP("Considering loop %d to clone for optimizations.\n", i);
+ if (optIsLoopClonable(i))
+ {
+ if (!(optLoopTable[i].lpFlags & LPFLG_REMOVED))
+ {
+ optIdentifyLoopOptInfo(i, context);
+ }
+ }
+ JITDUMP("------------------------------------------------------------\n");
+ }
+ JITDUMP("\n");
+}
+
+//------------------------------------------------------------------------
+// optIdentifyLoopOptInfo: Identify loop optimization candidates an also
+// check if the loop is suitable for the optimizations performed.
+//
+// Arguments:
+// loopNum - the current loop index for which conditions are derived.
+// context - data structure where all loop cloning candidates will be
+// updated.
+//
+// Return Value:
+// If the loop is not suitable for the optimizations, return false - context
+// should not contain any optimization candidate for the loop if false.
+// Else return true.
+//
+// Operation:
+// Check if the loop is well formed for this optimization and identify the
+// optimization candidates and update the "context" parameter with all the
+// contextual information necessary to perform the optimization later.
+//
+bool Compiler::optIdentifyLoopOptInfo(unsigned loopNum, LoopCloneContext* context)
+{
+ noway_assert(loopNum < optLoopCount);
+
+ LoopDsc* pLoop = &optLoopTable[loopNum];
+
+ if (!(pLoop->lpFlags & LPFLG_ITER))
+ {
+ JITDUMP("> No iter flag on loop %d.\n", loopNum);
+ return false;
+ }
+
+ unsigned ivLclNum = pLoop->lpIterVar();
+ if (lvaVarAddrExposed(ivLclNum))
+ {
+ JITDUMP("> Rejected V%02u as iter var because is address-exposed.\n", ivLclNum);
+ return false;
+ }
+
+ BasicBlock* head = pLoop->lpHead;
+ BasicBlock* end = pLoop->lpBottom;
+ BasicBlock* beg = head->bbNext;
+
+ if (end->bbJumpKind != BBJ_COND)
+ {
+ JITDUMP("> Couldn't find termination test.\n");
+ return false;
+ }
+
+ if (end->bbJumpDest != beg)
+ {
+ JITDUMP("> Branch at loop 'end' not looping to 'begin'.\n");
+ return false;
+ }
+
+ // TODO-CQ: CLONE: Mark increasing or decreasing loops.
+ if ((pLoop->lpIterOper() != GT_ASG_ADD && pLoop->lpIterOper() != GT_ADD) || (pLoop->lpIterConst() != 1))
+ {
+ JITDUMP("> Loop iteration operator not matching\n");
+ return false;
+ }
+
+ if ((pLoop->lpFlags & LPFLG_CONST_LIMIT) == 0 && (pLoop->lpFlags & LPFLG_VAR_LIMIT) == 0 &&
+ (pLoop->lpFlags & LPFLG_ARRLEN_LIMIT) == 0)
+ {
+ JITDUMP("> Loop limit is neither constant, variable or array length\n");
+ return false;
+ }
+
+ if (!(((pLoop->lpTestOper() == GT_LT || pLoop->lpTestOper() == GT_LE) &&
+ (pLoop->lpIterOper() == GT_ADD || pLoop->lpIterOper() == GT_ASG_ADD)) ||
+ ((pLoop->lpTestOper() == GT_GT || pLoop->lpTestOper() == GT_GE) &&
+ (pLoop->lpIterOper() == GT_SUB || pLoop->lpIterOper() == GT_ASG_SUB))))
+ {
+ JITDUMP("> Loop test (%s) doesn't agree with the direction (%s) of the pLoop->\n",
+ GenTree::NodeName(pLoop->lpTestOper()), GenTree::NodeName(pLoop->lpIterOper()));
+ return false;
+ }
+
+ if (!(pLoop->lpTestTree->OperKind() & GTK_RELOP) || !(pLoop->lpTestTree->gtFlags & GTF_RELOP_ZTT))
+ {
+ JITDUMP("> Loop inversion NOT present, loop test [%06u] may not protect entry from head.\n",
+ pLoop->lpTestTree->gtTreeID);
+ return false;
+ }
+
+#ifdef DEBUG
+ GenTreePtr op1 = pLoop->lpIterator();
+ noway_assert((op1->gtOper == GT_LCL_VAR) && (op1->gtLclVarCommon.gtLclNum == ivLclNum));
+#endif
+
+ JITDUMP("Checking blocks BB%02d..BB%02d for optimization candidates\n", beg->bbNum,
+ end->bbNext ? end->bbNext->bbNum : 0);
+
+ LoopCloneVisitorInfo info(context, loopNum, nullptr);
+ for (BasicBlock* block = beg; block != end->bbNext; block = block->bbNext)
+ {
+ compCurBB = block;
+ for (GenTreePtr stmt = block->bbTreeList; stmt; stmt = stmt->gtNext)
+ {
+ info.stmt = stmt;
+ fgWalkTreePre(&stmt->gtStmt.gtStmtExpr, optCanOptimizeByLoopCloningVisitor, &info, false, false);
+ }
+ }
+
+ return true;
+}
+
+//---------------------------------------------------------------------------------------------------------------
+// optExtractArrIndex: Try to extract the array index from "tree".
+//
+// Arguments:
+// tree the tree to be checked if it is the array [] operation.
+// result the extracted GT_INDEX information is updated in result.
+// lhsNum for the root level (function is recursive) callers should be BAD_VAR_NUM.
+//
+// Return Value:
+// Returns true if array index can be extracted, else, return false. See assumption about
+// what will be extracted. The "result" variable's rank parameter is advanced for every
+// dimension of [] encountered.
+//
+// Operation:
+// Given a "tree" extract the GT_INDEX node in "result" as ArrIndex. In FlowGraph morph
+// we have converted a GT_INDEX tree into a scaled index base offset expression. We need
+// to reconstruct this to be able to know if this is an array access.
+//
+// Assumption:
+// The method extracts only if the array base and indices are GT_LCL_VAR.
+//
+// TODO-CQ: CLONE: After morph make sure this method extracts values before morph.
+//
+// [000000001AF828D8] ---XG------- indir int
+// [000000001AF872C8] ------------ const long 16 Fseq[#FirstElem]
+// [000000001AF87340] ------------ + byref
+// [000000001AF87160] -------N---- const long 2
+// [000000001AF871D8] ------------ << long
+// [000000001AF870C0] ------------ cast long <- int
+// [000000001AF86F30] i----------- lclVar int V04 loc0
+// [000000001AF87250] ------------ + byref
+// [000000001AF86EB8] ------------ lclVar ref V01 arg1
+// [000000001AF87468] ---XG------- comma int
+// [000000001AF87020] ---X-------- arrBndsChk void
+// [000000001AF86FA8] ---X-------- arrLen int
+// [000000001AF827E8] ------------ lclVar ref V01 arg1
+// [000000001AF82860] ------------ lclVar int V04 loc0
+// [000000001AF829F0] -A-XG------- = int
+// [000000001AF82978] D------N---- lclVar int V06 tmp0
+//
+bool Compiler::optExtractArrIndex(GenTreePtr tree, ArrIndex* result, unsigned lhsNum)
+{
+ if (tree->gtOper != GT_COMMA)
+ {
+ return false;
+ }
+ GenTreePtr before = tree->gtGetOp1();
+ if (before->gtOper != GT_ARR_BOUNDS_CHECK)
+ {
+ return false;
+ }
+ GenTreeBoundsChk* arrBndsChk = before->AsBoundsChk();
+ if (arrBndsChk->gtArrLen->gtGetOp1()->gtOper != GT_LCL_VAR)
+ {
+ return false;
+ }
+ if (arrBndsChk->gtIndex->gtOper != GT_LCL_VAR)
+ {
+ return false;
+ }
+ unsigned arrLcl = arrBndsChk->gtArrLen->gtGetOp1()->gtLclVarCommon.gtLclNum;
+ if (lhsNum != BAD_VAR_NUM && arrLcl != lhsNum)
+ {
+ return false;
+ }
+
+ unsigned indLcl = arrBndsChk->gtIndex->gtLclVarCommon.gtLclNum;
+
+ GenTreePtr after = tree->gtGetOp2();
+
+ if (after->gtOper != GT_IND)
+ {
+ return false;
+ }
+ // It used to be the case that arrBndsChks for struct types would fail the previous check because
+ // after->gtOper was an address (for a block op). In order to avoid asmDiffs we will for now
+ // return false if the type of 'after' is a struct type. (This was causing us to clone loops
+ // that we were not previously cloning.)
+ // TODO-1stClassStructs: Remove this check to enable optimization of array bounds checks for struct
+ // types.
+ if (varTypeIsStruct(after))
+ {
+ return false;
+ }
+
+ GenTreePtr sibo = after->gtGetOp1();
+ if (sibo->gtOper != GT_ADD)
+ {
+ return false;
+ }
+ GenTreePtr sib = sibo->gtGetOp1();
+ GenTreePtr ofs = sibo->gtGetOp2();
+ if (ofs->gtOper != GT_CNS_INT)
+ {
+ return false;
+ }
+ if (sib->gtOper != GT_ADD)
+ {
+ return false;
+ }
+ GenTreePtr si = sib->gtGetOp2();
+ GenTreePtr base = sib->gtGetOp1();
+ if (si->gtOper != GT_LSH)
+ {
+ return false;
+ }
+ if (base->OperGet() != GT_LCL_VAR || base->gtLclVarCommon.gtLclNum != arrLcl)
+ {
+ return false;
+ }
+ GenTreePtr scale = si->gtGetOp2();
+ GenTreePtr index = si->gtGetOp1();
+ if (scale->gtOper != GT_CNS_INT)
+ {
+ return false;
+ }
+#ifdef _TARGET_AMD64_
+ if (index->gtOper != GT_CAST)
+ {
+ return false;
+ }
+ GenTreePtr indexVar = index->gtGetOp1();
+#else
+ GenTreePtr indexVar = index;
+#endif
+ if (indexVar->gtOper != GT_LCL_VAR || indexVar->gtLclVarCommon.gtLclNum != indLcl)
+ {
+ return false;
+ }
+ if (lhsNum == BAD_VAR_NUM)
+ {
+ result->arrLcl = arrLcl;
+ }
+ result->indLcls.Push(indLcl);
+ result->bndsChks.Push(tree);
+ result->useBlock = compCurBB;
+ result->rank++;
+
+ return true;
+}
+
+//---------------------------------------------------------------------------------------------------------------
+// optReconstructArrIndex: Reconstruct array index.
+//
+// Arguments:
+// tree the tree to be checked if it is an array [][][] operation.
+// result the extracted GT_INDEX information.
+// lhsNum for the root level (function is recursive) callers should be BAD_VAR_NUM.
+//
+// Return Value:
+// Returns true if array index can be extracted, else, return false. "rank" field in
+// "result" contains the array access depth. The "indLcls" fields contain the indices.
+//
+// Operation:
+// Recursively look for a list of array indices. In the example below, we encounter,
+// V03 = ((V05 = V00[V01]), (V05[V02])) which corresponds to access of V00[V01][V02]
+// The return value would then be:
+// ArrIndex result { arrLcl: V00, indLcls: [V01, V02], rank: 2 }
+//
+// V00[V01][V02] would be morphed as:
+//
+// [000000001B366848] ---XG------- indir int
+// [000000001B36BC50] ------------ V05 + (V02 << 2) + 16
+// [000000001B36C200] ---XG------- comma int
+// [000000001B36BDB8] ---X-------- arrBndsChk(V05, V02)
+// [000000001B36C278] -A-XG------- comma int
+// [000000001B366730] R--XG------- indir ref
+// [000000001B36C2F0] ------------ V00 + (V01 << 3) + 24
+// [000000001B36C818] ---XG------- comma ref
+// [000000001B36C458] ---X-------- arrBndsChk(V00, V01)
+// [000000001B36BB60] -A-XG------- = ref
+// [000000001B36BAE8] D------N---- lclVar ref V05 tmp2
+// [000000001B36A668] -A-XG------- = int
+// [000000001B36A5F0] D------N---- lclVar int V03 tmp0
+//
+// Assumption:
+// The method extracts only if the array base and indices are GT_LCL_VAR.
+//
+bool Compiler::optReconstructArrIndex(GenTreePtr tree, ArrIndex* result, unsigned lhsNum)
+{
+ // If we can extract "tree" (which is a top level comma) return.
+ if (optExtractArrIndex(tree, result, lhsNum))
+ {
+ return true;
+ }
+ // We have a comma (check if array base expr is computed in "before"), descend further.
+ else if (tree->OperGet() == GT_COMMA)
+ {
+ GenTreePtr before = tree->gtGetOp1();
+ // "before" should evaluate an array base for the "after" indexing.
+ if (before->OperGet() != GT_ASG)
+ {
+ return false;
+ }
+ GenTreePtr lhs = before->gtGetOp1();
+ GenTreePtr rhs = before->gtGetOp2();
+
+ // "rhs" should contain an GT_INDEX
+ if (!lhs->IsLocal() || !optReconstructArrIndex(rhs, result, lhsNum))
+ {
+ return false;
+ }
+ unsigned lhsNum = lhs->gtLclVarCommon.gtLclNum;
+ GenTreePtr after = tree->gtGetOp2();
+ // Pass the "lhsNum", so we can verify if indeed it is used as the array base.
+ return optExtractArrIndex(after, result, lhsNum);
+ }
+ return false;
+}
+
+/* static */
+Compiler::fgWalkResult Compiler::optCanOptimizeByLoopCloningVisitor(GenTreePtr* pTree, Compiler::fgWalkData* data)
+{
+ return data->compiler->optCanOptimizeByLoopCloning(*pTree, (LoopCloneVisitorInfo*)data->pCallbackData);
+}
+
+//-------------------------------------------------------------------------
+// optIsStackLocalInvariant: Is stack local invariant in loop.
+//
+// Arguments:
+// loopNum The loop in which the variable is tested for invariance.
+// lclNum The local that is tested for invariance in the loop.
+//
+// Return Value:
+// Returns true if the variable is loop invariant in loopNum.
+//
+bool Compiler::optIsStackLocalInvariant(unsigned loopNum, unsigned lclNum)
+{
+ if (lvaVarAddrExposed(lclNum))
+ {
+ return false;
+ }
+ if (optIsVarAssgLoop(loopNum, lclNum))
+ {
+ return false;
+ }
+ return true;
+}
+
+//----------------------------------------------------------------------------------------------
+// optCanOptimizeByLoopCloning: Check if the tree can be optimized by loop cloning and if so,
+// identify as potential candidate and update the loop context.
+//
+// Arguments:
+// tree The tree encountered during the tree walk.
+// info Supplies information about the current block or stmt in which the tree is.
+// Also supplies the "context" pointer for updating with loop cloning
+// candidates. Also supplies loopNum.
+//
+// Operation:
+// If array index can be reconstructed, check if the iter var of the loop matches the
+// array index var in some dim. Also ensure other index vars before the identified
+// dim are loop invariant.
+//
+// Return Value:
+// Skip sub trees if the optimization candidate is identified or else continue walking
+//
+Compiler::fgWalkResult Compiler::optCanOptimizeByLoopCloning(GenTreePtr tree, LoopCloneVisitorInfo* info)
+{
+ ArrIndex arrIndex(getAllocator());
+
+ // Check if array index can be optimized.
+ if (optReconstructArrIndex(tree, &arrIndex, BAD_VAR_NUM))
+ {
+ assert(tree->gtOper == GT_COMMA);
+#ifdef DEBUG
+ if (verbose)
+ {
+ JITDUMP("Found ArrIndex at tree ");
+ printTreeID(tree);
+ printf(" which is equivalent to: ");
+ arrIndex.Print();
+ JITDUMP("\n");
+ }
+#endif
+ if (!optIsStackLocalInvariant(info->loopNum, arrIndex.arrLcl))
+ {
+ return WALK_SKIP_SUBTREES;
+ }
+
+ // Walk the dimensions and see if iterVar of the loop is used as index.
+ for (unsigned dim = 0; dim < arrIndex.rank; ++dim)
+ {
+ // Is index variable also used as the loop iter var.
+ if (arrIndex.indLcls[dim] == optLoopTable[info->loopNum].lpIterVar())
+ {
+ // Check the previous indices are all loop invariant.
+ for (unsigned dim2 = 0; dim2 < dim; ++dim2)
+ {
+ if (optIsVarAssgLoop(info->loopNum, arrIndex.indLcls[dim2]))
+ {
+ JITDUMP("V%02d is assigned in loop\n", arrIndex.indLcls[dim2]);
+ return WALK_SKIP_SUBTREES;
+ }
+ }
+#ifdef DEBUG
+ if (verbose)
+ {
+ JITDUMP("Loop %d can be cloned for ArrIndex ", info->loopNum);
+ arrIndex.Print();
+ JITDUMP(" on dim %d\n", dim);
+ }
+#endif
+ // Update the loop context.
+ info->context->EnsureLoopOptInfo(info->loopNum)
+ ->Push(new (this, CMK_LoopOpt) LcJaggedArrayOptInfo(arrIndex, dim, info->stmt));
+ }
+ else
+ {
+ JITDUMP("Induction V%02d is not used as index on dim %d\n", optLoopTable[info->loopNum].lpIterVar(),
+ dim);
+ }
+ }
+ return WALK_SKIP_SUBTREES;
+ }
+ else if (tree->gtOper == GT_ARR_ELEM)
+ {
+ // TODO-CQ: CLONE: Implement.
+ return WALK_SKIP_SUBTREES;
+ }
+ return WALK_CONTINUE;
+}
+
+struct optRangeCheckDsc
+{
+ Compiler* pCompiler;
+ bool bValidIndex;
+};
+/*
+ Walk to make sure that only locals and constants are contained in the index
+ for a range check
+*/
+Compiler::fgWalkResult Compiler::optValidRangeCheckIndex(GenTreePtr* pTree, fgWalkData* data)
+{
+ GenTreePtr tree = *pTree;
+ optRangeCheckDsc* pData = (optRangeCheckDsc*)data->pCallbackData;
+
+ if (tree->gtOper == GT_IND || tree->gtOper == GT_CLS_VAR || tree->gtOper == GT_FIELD || tree->gtOper == GT_LCL_FLD)
+ {
+ pData->bValidIndex = false;
+ return WALK_ABORT;
+ }
+
+ if (tree->gtOper == GT_LCL_VAR)
+ {
+ if (pData->pCompiler->lvaTable[tree->gtLclVarCommon.gtLclNum].lvAddrExposed)
+ {
+ pData->bValidIndex = false;
+ return WALK_ABORT;
+ }
+ }
+
+ return WALK_CONTINUE;
+}
+
+/*
+ returns true if a range check can legally be removed (for the moment it checks
+ that the array is a local array (non subject to racing conditions) and that the
+ index is either a constant or a local
+*/
+bool Compiler::optIsRangeCheckRemovable(GenTreePtr tree)
+{
+ noway_assert(tree->gtOper == GT_ARR_BOUNDS_CHECK);
+ GenTreeBoundsChk* bndsChk = tree->AsBoundsChk();
+ GenTreePtr pArray = bndsChk->GetArray();
+ if (pArray == nullptr && !bndsChk->gtArrLen->IsCnsIntOrI())
+ {
+ return false;
+ }
+ GenTreePtr pIndex = bndsChk->gtIndex;
+
+ // The length must be a constant (the pArray == NULL case) or the array reference must be a local.
+ // Otherwise we can be targeted by malicious race-conditions.
+ if (pArray != nullptr)
+ {
+ if (pArray->gtOper != GT_LCL_VAR)
+ {
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Can't remove range check if the array isn't referenced with a local\n");
+ gtDispTree(pArray);
+ }
+#endif
+ return false;
+ }
+ else
+ {
+ noway_assert(pArray->gtType == TYP_REF);
+ noway_assert(pArray->gtLclVarCommon.gtLclNum < lvaCount);
+
+ if (lvaTable[pArray->gtLclVarCommon.gtLclNum].lvAddrExposed)
+ {
+ // If the array address has been taken, don't do the optimization
+ // (this restriction can be lowered a bit, but i don't think it's worth it)
+ CLANG_FORMAT_COMMENT_ANCHOR;
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Can't remove range check if the array has its address taken\n");
+ gtDispTree(pArray);
+ }
+#endif
+ return false;
+ }
+ }
+ }
+
+ optRangeCheckDsc Data;
+ Data.pCompiler = this;
+ Data.bValidIndex = true;
+
+ fgWalkTreePre(&pIndex, optValidRangeCheckIndex, &Data);
+
+ if (!Data.bValidIndex)
+ {
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Can't remove range check with this index");
+ gtDispTree(pIndex);
+ }
+#endif
+
+ return false;
+ }
+
+ return true;
+}
+
+/******************************************************************************
+ *
+ * Replace x==null with (x|x)==0 if x is a GC-type.
+ * This will stress code-gen and the emitter to make sure they support such trees.
+ */
+
+#ifdef DEBUG
+
+void Compiler::optOptimizeBoolsGcStress(BasicBlock* condBlock)
+{
+ if (!compStressCompile(STRESS_OPT_BOOLS_GC, 20))
+ {
+ return;
+ }
+
+ noway_assert(condBlock->bbJumpKind == BBJ_COND);
+ GenTreePtr condStmt = condBlock->bbTreeList->gtPrev->gtStmt.gtStmtExpr;
+
+ noway_assert(condStmt->gtOper == GT_JTRUE);
+
+ bool isBool;
+ GenTreePtr relop;
+
+ GenTreePtr comparand = optIsBoolCond(condStmt, &relop, &isBool);
+
+ if (comparand == nullptr || !varTypeIsGC(comparand->TypeGet()))
+ {
+ return;
+ }
+
+ if (comparand->gtFlags & (GTF_ASG | GTF_CALL | GTF_ORDER_SIDEEFF))
+ {
+ return;
+ }
+
+ GenTreePtr comparandClone = gtCloneExpr(comparand);
+
+ // Bump up the ref-counts of any variables in 'comparandClone'
+ compCurBB = condBlock;
+ fgWalkTreePre(&comparandClone, Compiler::lvaIncRefCntsCB, (void*)this, true);
+
+ noway_assert(relop->gtOp.gtOp1 == comparand);
+ genTreeOps oper = compStressCompile(STRESS_OPT_BOOLS_GC, 50) ? GT_OR : GT_AND;
+ relop->gtOp.gtOp1 = gtNewOperNode(oper, TYP_I_IMPL, comparand, comparandClone);
+
+ // Comparand type is already checked, and we have const int, there is no harm
+ // morphing it into a TYP_I_IMPL.
+ noway_assert(relop->gtOp.gtOp2->gtOper == GT_CNS_INT);
+ relop->gtOp.gtOp2->gtType = TYP_I_IMPL;
+}
+
+#endif
+
+/******************************************************************************
+ * Function used by folding of boolean conditionals
+ * Given a GT_JTRUE node, checks that it is a boolean comparison of the form
+ * "if (boolVal ==/!= 0/1)". This is translated into a GT_EQ node with "op1"
+ * being a boolean lclVar and "op2" the const 0/1.
+ * On success, the comparand (ie. boolVal) is returned. Else NULL.
+ * compPtr returns the compare node (i.e. GT_EQ or GT_NE node)
+ * boolPtr returns whether the comparand is a boolean value (must be 0 or 1).
+ * When return boolPtr equal to true, if the comparison was against a 1 (i.e true)
+ * value then we morph the tree by reversing the GT_EQ/GT_NE and change the 1 to 0.
+ */
+
+GenTree* Compiler::optIsBoolCond(GenTree* condBranch, GenTree** compPtr, bool* boolPtr)
+{
+ bool isBool = false;
+
+ noway_assert(condBranch->gtOper == GT_JTRUE);
+ GenTree* cond = condBranch->gtOp.gtOp1;
+
+ /* The condition must be "!= 0" or "== 0" */
+
+ if ((cond->gtOper != GT_EQ) && (cond->gtOper != GT_NE))
+ {
+ return nullptr;
+ }
+
+ /* Return the compare node to the caller */
+
+ *compPtr = cond;
+
+ /* Get hold of the comparands */
+
+ GenTree* opr1 = cond->gtOp.gtOp1;
+ GenTree* opr2 = cond->gtOp.gtOp2;
+
+ if (opr2->gtOper != GT_CNS_INT)
+ {
+ return nullptr;
+ }
+
+ if (!opr2->IsIntegralConst(0) && !opr2->IsIntegralConst(1))
+ {
+ return nullptr;
+ }
+
+ ssize_t ival2 = opr2->gtIntCon.gtIconVal;
+
+ /* Is the value a boolean?
+ * We can either have a boolean expression (marked GTF_BOOLEAN) or
+ * a local variable that is marked as being boolean (lvIsBoolean) */
+
+ if (opr1->gtFlags & GTF_BOOLEAN)
+ {
+ isBool = true;
+ }
+ else if ((opr1->gtOper == GT_CNS_INT) && (opr1->IsIntegralConst(0) || opr1->IsIntegralConst(1)))
+ {
+ isBool = true;
+ }
+ else if (opr1->gtOper == GT_LCL_VAR)
+ {
+ /* is it a boolean local variable */
+
+ unsigned lclNum = opr1->gtLclVarCommon.gtLclNum;
+ noway_assert(lclNum < lvaCount);
+
+ if (lvaTable[lclNum].lvIsBoolean)
+ {
+ isBool = true;
+ }
+ }
+
+ /* Was our comparison against the constant 1 (i.e. true) */
+ if (ival2 == 1)
+ {
+ // If this is a boolean expression tree we can reverse the relop
+ // and change the true to false.
+ if (isBool)
+ {
+ gtReverseCond(cond);
+ opr2->gtIntCon.gtIconVal = 0;
+ }
+ else
+ {
+ return nullptr;
+ }
+ }
+
+ *boolPtr = isBool;
+ return opr1;
+}
+
+void Compiler::optOptimizeBools()
+{
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("*************** In optOptimizeBools()\n");
+ if (verboseTrees)
+ {
+ printf("Blocks/Trees before phase\n");
+ fgDispBasicBlocks(true);
+ }
+ }
+#endif
+ bool change;
+
+ do
+ {
+ change = false;
+
+ for (BasicBlock* b1 = fgFirstBB; b1; b1 = b1->bbNext)
+ {
+ /* We're only interested in conditional jumps here */
+
+ if (b1->bbJumpKind != BBJ_COND)
+ {
+ continue;
+ }
+
+ /* If there is no next block, we're done */
+
+ BasicBlock* b2 = b1->bbNext;
+ if (!b2)
+ {
+ break;
+ }
+
+ /* The next block must not be marked as BBF_DONT_REMOVE */
+ if (b2->bbFlags & BBF_DONT_REMOVE)
+ {
+ continue;
+ }
+
+ /* The next block also needs to be a condition */
+
+ if (b2->bbJumpKind != BBJ_COND)
+ {
+#ifdef DEBUG
+ optOptimizeBoolsGcStress(b1);
+#endif
+ continue;
+ }
+
+ bool sameTarget; // Do b1 and b2 have the same bbJumpDest?
+
+ if (b1->bbJumpDest == b2->bbJumpDest)
+ {
+ /* Given the following sequence of blocks :
+ B1: brtrue(t1, BX)
+ B2: brtrue(t2, BX)
+ B3:
+ we wil try to fold it to :
+ B1: brtrue(t1|t2, BX)
+ B3:
+ */
+
+ sameTarget = true;
+ }
+ else if (b1->bbJumpDest == b2->bbNext) /*b1->bbJumpDest->bbNum == n1+2*/
+ {
+ /* Given the following sequence of blocks :
+ B1: brtrue(t1, B3)
+ B2: brtrue(t2, BX)
+ B3:
+ we will try to fold it to :
+ B1: brtrue((!t1)&&t2, B3)
+ B3:
+ */
+
+ sameTarget = false;
+ }
+ else
+ {
+ continue;
+ }
+
+ /* The second block must contain a single statement */
+
+ GenTreePtr s2 = b2->bbTreeList;
+ if (s2->gtPrev != s2)
+ {
+ continue;
+ }
+
+ noway_assert(s2->gtOper == GT_STMT);
+ GenTreePtr t2 = s2->gtStmt.gtStmtExpr;
+ noway_assert(t2->gtOper == GT_JTRUE);
+
+ /* Find the condition for the first block */
+
+ GenTreePtr s1 = b1->bbTreeList->gtPrev;
+
+ noway_assert(s1->gtOper == GT_STMT);
+ GenTreePtr t1 = s1->gtStmt.gtStmtExpr;
+ noway_assert(t1->gtOper == GT_JTRUE);
+
+ if (b2->countOfInEdges() > 1)
+ {
+ continue;
+ }
+
+ /* Find the branch conditions of b1 and b2 */
+
+ bool bool1, bool2;
+
+ GenTreePtr c1 = optIsBoolCond(t1, &t1, &bool1);
+ if (!c1)
+ {
+ continue;
+ }
+
+ GenTreePtr c2 = optIsBoolCond(t2, &t2, &bool2);
+ if (!c2)
+ {
+ continue;
+ }
+
+ noway_assert(t1->gtOper == GT_EQ || t1->gtOper == GT_NE && t1->gtOp.gtOp1 == c1);
+ noway_assert(t2->gtOper == GT_EQ || t2->gtOper == GT_NE && t2->gtOp.gtOp1 == c2);
+
+ // Leave out floats where the bit-representation is more complicated
+ // - there are two representations for 0.
+ //
+ if (varTypeIsFloating(c1->TypeGet()) || varTypeIsFloating(c2->TypeGet()))
+ {
+ continue;
+ }
+
+ // Make sure the types involved are of the same sizes
+ if (genTypeSize(c1->TypeGet()) != genTypeSize(c2->TypeGet()))
+ {
+ continue;
+ }
+ if (genTypeSize(t1->TypeGet()) != genTypeSize(t2->TypeGet()))
+ {
+ continue;
+ }
+#ifdef _TARGET_ARMARCH_
+ // Skip the small operand which we cannot encode.
+ if (varTypeIsSmall(c1->TypeGet()))
+ continue;
+#endif
+ /* The second condition must not contain side effects */
+
+ if (c2->gtFlags & GTF_GLOB_EFFECT)
+ {
+ continue;
+ }
+
+ /* The second condition must not be too expensive */
+
+ gtPrepareCost(c2);
+
+ if (c2->gtCostEx > 12)
+ {
+ continue;
+ }
+
+ genTreeOps foldOp;
+ genTreeOps cmpOp;
+ var_types foldType = c1->TypeGet();
+ if (varTypeIsGC(foldType))
+ {
+ foldType = TYP_I_IMPL;
+ }
+
+ if (sameTarget)
+ {
+ /* Both conditions must be the same */
+
+ if (t1->gtOper != t2->gtOper)
+ {
+ continue;
+ }
+
+ if (t1->gtOper == GT_EQ)
+ {
+ /* t1:c1==0 t2:c2==0 ==> Branch to BX if either value is 0
+ So we will branch to BX if (c1&c2)==0 */
+
+ foldOp = GT_AND;
+ cmpOp = GT_EQ;
+ }
+ else
+ {
+ /* t1:c1!=0 t2:c2!=0 ==> Branch to BX if either value is non-0
+ So we will branch to BX if (c1|c2)!=0 */
+
+ foldOp = GT_OR;
+ cmpOp = GT_NE;
+ }
+ }
+ else
+ {
+ /* The b1 condition must be the reverse of the b2 condition */
+
+ if (t1->gtOper == t2->gtOper)
+ {
+ continue;
+ }
+
+ if (t1->gtOper == GT_EQ)
+ {
+ /* t1:c1==0 t2:c2!=0 ==> Branch to BX if both values are non-0
+ So we will branch to BX if (c1&c2)!=0 */
+
+ foldOp = GT_AND;
+ cmpOp = GT_NE;
+ }
+ else
+ {
+ /* t1:c1!=0 t2:c2==0 ==> Branch to BX if both values are 0
+ So we will branch to BX if (c1|c2)==0 */
+
+ foldOp = GT_OR;
+ cmpOp = GT_EQ;
+ }
+ }
+
+ // Anding requires both values to be 0 or 1
+
+ if ((foldOp == GT_AND) && (!bool1 || !bool2))
+ {
+ continue;
+ }
+
+ //
+ // Now update the trees
+ //
+ GenTreePtr cmpOp1 = gtNewOperNode(foldOp, foldType, c1, c2);
+ if (bool1 && bool2)
+ {
+ /* When we 'OR'/'AND' two booleans, the result is boolean as well */
+ cmpOp1->gtFlags |= GTF_BOOLEAN;
+ }
+
+ t1->SetOper(cmpOp);
+ t1->gtOp.gtOp1 = cmpOp1;
+ t1->gtOp.gtOp2->gtType = foldType; // Could have been varTypeIsGC()
+
+#if FEATURE_SET_FLAGS
+ // For comparisons against zero we will have the GTF_SET_FLAGS set
+ // and this can cause an assert to fire in fgMoveOpsLeft(GenTreePtr tree)
+ // during the CSE phase.
+ //
+ // So make sure to clear any GTF_SET_FLAGS bit on these operations
+ // as they are no longer feeding directly into a comparisons against zero
+
+ // Make sure that the GTF_SET_FLAGS bit is cleared.
+ // Fix 388436 ARM JitStress WP7
+ c1->gtFlags &= ~GTF_SET_FLAGS;
+ c2->gtFlags &= ~GTF_SET_FLAGS;
+
+ // The new top level node that we just created does feed directly into
+ // a comparison against zero, so set the GTF_SET_FLAGS bit so that
+ // we generate an instuction that sets the flags, which allows us
+ // to omit the cmp with zero instruction.
+
+ // Request that the codegen for cmpOp1 sets the condition flags
+ // when it generates the code for cmpOp1.
+ //
+ cmpOp1->gtRequestSetFlags();
+#endif
+
+ flowList* edge1 = fgGetPredForBlock(b1->bbJumpDest, b1);
+ flowList* edge2;
+
+ /* Modify the target of the conditional jump and update bbRefs and bbPreds */
+
+ if (sameTarget)
+ {
+ edge2 = fgGetPredForBlock(b2->bbJumpDest, b2);
+ }
+ else
+ {
+ edge2 = fgGetPredForBlock(b2->bbNext, b2);
+
+ fgRemoveRefPred(b1->bbJumpDest, b1);
+
+ b1->bbJumpDest = b2->bbJumpDest;
+
+ fgAddRefPred(b2->bbJumpDest, b1);
+ }
+
+ noway_assert(edge1 != nullptr);
+ noway_assert(edge2 != nullptr);
+
+ BasicBlock::weight_t edgeSumMin = edge1->flEdgeWeightMin + edge2->flEdgeWeightMin;
+ BasicBlock::weight_t edgeSumMax = edge1->flEdgeWeightMax + edge2->flEdgeWeightMax;
+ if ((edgeSumMax >= edge1->flEdgeWeightMax) && (edgeSumMax >= edge2->flEdgeWeightMax))
+ {
+ edge1->flEdgeWeightMin = edgeSumMin;
+ edge1->flEdgeWeightMax = edgeSumMax;
+ }
+ else
+ {
+ edge1->flEdgeWeightMin = BB_ZERO_WEIGHT;
+ edge1->flEdgeWeightMax = BB_MAX_WEIGHT;
+ }
+
+ /* Get rid of the second block (which is a BBJ_COND) */
+
+ noway_assert(b1->bbJumpKind == BBJ_COND);
+ noway_assert(b2->bbJumpKind == BBJ_COND);
+ noway_assert(b1->bbJumpDest == b2->bbJumpDest);
+ noway_assert(b1->bbNext == b2);
+ noway_assert(b2->bbNext);
+
+ fgUnlinkBlock(b2);
+ b2->bbFlags |= BBF_REMOVED;
+
+ // If b2 was the last block of a try or handler, update the EH table.
+
+ ehUpdateForDeletedBlock(b2);
+
+ /* Update bbRefs and bbPreds */
+
+ /* Replace pred 'b2' for 'b2->bbNext' with 'b1'
+ * Remove pred 'b2' for 'b2->bbJumpDest' */
+
+ fgReplacePred(b2->bbNext, b2, b1);
+
+ fgRemoveRefPred(b2->bbJumpDest, b2);
+
+ /* Update the block numbers and try again */
+
+ change = true;
+ /*
+ do
+ {
+ b2->bbNum = ++n1;
+ b2 = b2->bbNext;
+ }
+ while (b2);
+ */
+
+ // Update loop table
+ fgUpdateLoopsAfterCompacting(b1, b2);
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Folded %sboolean conditions of BB%02u and BB%02u to :\n", c2->OperIsLeaf() ? "" : "non-leaf ",
+ b1->bbNum, b2->bbNum);
+ gtDispTree(s1);
+ printf("\n");
+ }
+#endif
+ }
+ } while (change);
+
+#ifdef DEBUG
+ fgDebugCheckBBlist();
+#endif
+}
diff --git a/src/jit/phase.h b/src/jit/phase.h
new file mode 100644
index 0000000000..d8e2940089
--- /dev/null
+++ b/src/jit/phase.h
@@ -0,0 +1,77 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*****************************************************************************/
+#ifndef _PHASE_H_
+#define _PHASE_H_
+
+class Phase
+{
+public:
+ virtual void Run();
+
+protected:
+ Phase(Compiler* _comp, const char* _name, Phases _phase = PHASE_NUMBER_OF) : comp(_comp), name(_name), phase(_phase)
+ {
+ }
+
+ virtual void PrePhase();
+ virtual void DoPhase() = 0;
+ virtual void PostPhase();
+
+ Compiler* comp;
+ const char* name;
+ Phases phase;
+};
+
+inline void Phase::Run()
+{
+ PrePhase();
+ DoPhase();
+ PostPhase();
+}
+
+inline void Phase::PrePhase()
+{
+#ifdef DEBUG
+ if (VERBOSE)
+ {
+ printf("*************** In %s\n", name);
+ printf("Trees before %s\n", name);
+ comp->fgDispBasicBlocks(true);
+ }
+
+ if (comp->expensiveDebugCheckLevel >= 2)
+ {
+ // If everyone used the Phase class, this would duplicate the PostPhase() from the previous phase.
+ // But, not everyone does, so go ahead and do the check here, too.
+ comp->fgDebugCheckBBlist();
+ comp->fgDebugCheckLinks();
+ }
+#endif // DEBUG
+}
+
+inline void Phase::PostPhase()
+{
+#ifdef DEBUG
+ if (VERBOSE)
+ {
+ printf("*************** Exiting %s\n", name);
+ printf("Trees after %s\n", name);
+ comp->fgDispBasicBlocks(true);
+ }
+#endif // DEBUG
+
+ if (phase != PHASE_NUMBER_OF)
+ {
+ comp->EndPhase(phase);
+ }
+
+#ifdef DEBUG
+ comp->fgDebugCheckBBlist();
+ comp->fgDebugCheckLinks();
+#endif // DEBUG
+}
+
+#endif /* End of _PHASE_H_ */
diff --git a/src/jit/protojit/.gitmirror b/src/jit/protojit/.gitmirror
new file mode 100644
index 0000000000..f507630f94
--- /dev/null
+++ b/src/jit/protojit/.gitmirror
@@ -0,0 +1 @@
+Only contents of this folder, excluding subfolders, will be mirrored by the Git-TFS Mirror. \ No newline at end of file
diff --git a/src/jit/protojit/CMakeLists.txt b/src/jit/protojit/CMakeLists.txt
new file mode 100644
index 0000000000..e3cc769ba0
--- /dev/null
+++ b/src/jit/protojit/CMakeLists.txt
@@ -0,0 +1,51 @@
+project(protojit)
+
+add_definitions(-DALT_JIT)
+add_definitions(-DFEATURE_NO_HOST)
+add_definitions(-DSELF_NO_HOST)
+remove_definitions(-DFEATURE_MERGE_JIT_AND_ENGINE)
+
+add_library_clr(protojit
+ SHARED
+ ${SHARED_LIB_SOURCES}
+)
+
+add_dependencies(protojit jit_exports)
+
+set_property(TARGET protojit APPEND_STRING PROPERTY LINK_FLAGS ${JIT_EXPORTS_LINKER_OPTION})
+set_property(TARGET protojit APPEND_STRING PROPERTY LINK_DEPENDS ${JIT_EXPORTS_FILE})
+
+set(RYUJIT_LINK_LIBRARIES
+ utilcodestaticnohost
+ gcinfo
+)
+
+if(CLR_CMAKE_PLATFORM_UNIX)
+ list(APPEND RYUJIT_LINK_LIBRARIES
+ mscorrc_debug
+ coreclrpal
+ palrt
+ )
+else()
+ list(APPEND RYUJIT_LINK_LIBRARIES
+ msvcrt.lib
+ kernel32.lib
+ advapi32.lib
+ ole32.lib
+ oleaut32.lib
+ uuid.lib
+ user32.lib
+ version.lib
+ shlwapi.lib
+ bcrypt.lib
+ crypt32.lib
+ RuntimeObject.lib
+ )
+endif(CLR_CMAKE_PLATFORM_UNIX)
+
+target_link_libraries(protojit
+ ${RYUJIT_LINK_LIBRARIES}
+)
+
+# add the install targets
+install_clr(protojit)
diff --git a/src/jit/protojit/SOURCES b/src/jit/protojit/SOURCES
new file mode 100644
index 0000000000..5f46bf8aad
--- /dev/null
+++ b/src/jit/protojit/SOURCES
@@ -0,0 +1,10 @@
+
+#
+# DO NOT EDIT THIS FILE!!! Modify the project file in this directory
+# This file merely allows the MSBuild project file in this directory to be integrated with Build.Exe
+#
+TARGETTYPE=NOTARGET
+CLR_TARGETTYPE=DLL
+MSBuildProjectFile=protojit.nativeproj
+SOURCES=
+
diff --git a/src/jit/protojit/makefile b/src/jit/protojit/makefile
new file mode 100644
index 0000000000..84abb1cb0d
--- /dev/null
+++ b/src/jit/protojit/makefile
@@ -0,0 +1,7 @@
+
+#
+# DO NOT EDIT THIS FILE!!! Modify the project file in this directory
+# This file merely allows the MSBuild project file in this directory to be integrated with Build.Exe
+#
+!INCLUDE $(NTMAKEENV)\msbuild.def
+ \ No newline at end of file
diff --git a/src/jit/protojit/protojit.def b/src/jit/protojit/protojit.def
new file mode 100644
index 0000000000..1603af74ca
--- /dev/null
+++ b/src/jit/protojit/protojit.def
@@ -0,0 +1,7 @@
+; Licensed to the .NET Foundation under one or more agreements.
+; The .NET Foundation licenses this file to you under the MIT license.
+; See the LICENSE file in the project root for more information.
+EXPORTS
+ getJit
+ jitStartup
+ sxsJitStartup
diff --git a/src/jit/protojit/protojit.nativeproj b/src/jit/protojit/protojit.nativeproj
new file mode 100644
index 0000000000..3de0f0aeed
--- /dev/null
+++ b/src/jit/protojit/protojit.nativeproj
@@ -0,0 +1,88 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003" ToolsVersion="dogfood">
+
+ <!--
+ PROTO JIT: The purpose of this module is to provide an isolated environment to develop
+ the RyuJIT backend without interfering with the development of the frontend. The
+ idea is to fork codegen and registerfp, that way we leave the PUCLR backend intact so
+ it can be still consumed by the RyuJIT frontend separately maintaining the code stability
+ of the PUCLR codegen.cpp logic.
+
+ This module is meant to be a 'development' JIT, i.e. try to use the generated code by this JIT
+ and in case something goes wrong, fallback to the default JIT.
+ -->
+
+ <!-- Import the CLR's settings -->
+
+ <Import Project="$(_NTDRIVE)$(_NTROOT)\ndp\clr\clr.props" />
+
+ <PropertyGroup>
+
+ <!-- Set the output -->
+
+ <OutputName>protojit</OutputName>
+ <StaticLinkJit>$(FeatureMergeJitAndEngine)</StaticLinkJit>
+ <FeatureMergeJitAndEngine>false</FeatureMergeJitAndEngine>
+ <TargetType>DYNLINK</TargetType>
+ <FileToMarkForSigning>$(BinariesDirectory)\protojit.dll</FileToMarkForSigning>
+ <BuildCoreBinaries>false</BuildCoreBinaries>
+ <BuildSysBinaries>false</BuildSysBinaries>
+
+ <!-- Motherhood & apple pie here -->
+
+ <DllEntryPoint>_DllMainCRTStartup</DllEntryPoint>
+ <LinkSubsystem>windows</LinkSubsystem>
+ <LibCLib Condition="'$(StaticLinkJit)'!='true'">$(ClrCrtLib)</LibCLib>
+
+ <!-- JIT specific baloney -->
+
+ <LinkModuleDefinitionFile>$(OutputName).def</LinkModuleDefinitionFile>
+
+ <ClDefines>$(ClDefines);ALT_JIT</ClDefines>
+ <ClDefines Condition="'$(BuildArchitecture)' == 'amd64'">$(ClDefines);FEATURE_SIMD;FEATURE_AVX_SUPPORT</ClDefines>
+
+ <Win32DllLibs>$(SdkLibPath)\kernel32.lib;$(SdkLibPath)\user32.lib;$(SdkLibPath)\advapi32.lib;$(SdkLibPath)\oleaut32.lib;$(SdkLibPath)\uuid.lib</Win32DllLibs>
+ <Win32DllLibs>$(Win32DllLibs);$(ClrLibPath)\utilcode.lib</Win32DllLibs>
+
+ <!-- Profile-guided optimization -->
+
+ <PogoOptimize>false</PogoOptimize>
+ <PogoInstrument>false</PogoInstrument>
+ <PogoUpdate>false</PogoUpdate>
+
+ <!-- Do we want to build with msvcdis disassembly capability? This should be enabled for DEBUG, disabled otherwise.
+ However, it can be useful for debugging purposes, such as generating assembly diffs between CHK and RET JITs,
+ to enable it temporarily in non-DEBUG builds, by forcing the EnableLateDisasm property to 'true'.
+ -->
+ <EnableLateDisasm Condition="'$(DebugBuild)' == 'true' and '$(BuildArchitecture)' != 'arm' and '$(BuildForCoreSystem)' != 'true'">true</EnableLateDisasm>
+ <!--
+ <EnableLateDisasm Condition="'$(BuildArchitecture)' != 'arm' and '$(BuildForCoreSystem)' != 'true'">true</EnableLateDisasm>
+ -->
+ <ClDefines Condition="'$(EnableLateDisasm)' == 'true'">$(ClDefines);LATE_DISASM=1</ClDefines>
+ <LinkDelayLoad Condition="'$(EnableLateDisasm)' == 'true'">$(LinkDelayLoad);msvcdis$(VC_NONCRT_ProdVerX).dll</LinkDelayLoad>
+ <UseDelayimpLib Condition="'$(EnableLateDisasm)' == 'true' and '$(FeatureMergeJitAndEngine)'!='true'">true</UseDelayimpLib>
+
+ </PropertyGroup>
+
+ <!-- Leaf Project Items -->
+
+ <ItemGroup>
+ <ProjectReference Include="$(ClrSrcDirectory)utilcode\dyncrt\dyncrt.nativeproj" />
+ <TargetLib Include="$(SdkLibPath)\mscoree.lib" />
+ <TargetLib Condition="'$(BuildArchitecture)'!='i386'" Include="$(ClrLibPath)\gcinfo.lib">
+ <ProjectReference>$(ClrSrcDirectory)gcinfo\lib\gcinfo.nativeproj</ProjectReference>
+ </TargetLib>
+ <TargetLib Condition="'$(UseDelayimpLib)' == 'true'" Include="$(ClrLibPath)\delayimp.lib">
+ <ProjectReference>$(ClrSrcDirectory)delayimp\delayimp.nativeproj</ProjectReference>
+ </TargetLib>
+ <TargetLib Condition="'$(DebugBuild)' == 'true'" Include="$(ClrLibPath)\gcdump.lib">
+ <ProjectReference>$(ClrSrcDirectory)gcdump\lib\gcdump.nativeproj</ProjectReference>
+ </TargetLib>
+ <TargetLib Condition="'$(DebugBuild)' == 'true'" Include="$(SdkLibPath)\ole32.lib" />
+ <TargetLib Condition="'$(EnableLateDisasm)' == 'true'" Include="$(VCToolsLibPath)\msvcdis.lib" />
+ <RCResourceFile Include="..\native.rc" />
+ </ItemGroup>
+
+ <Import Project="..\jit.settings.targets" />
+
+</Project>
diff --git a/src/jit/rangecheck.cpp b/src/jit/rangecheck.cpp
new file mode 100644
index 0000000000..ae0c792f11
--- /dev/null
+++ b/src/jit/rangecheck.cpp
@@ -0,0 +1,1388 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+//
+
+#include "jitpch.h"
+#include "rangecheck.h"
+
+// Max stack depth (path length) in walking the UD chain.
+static const int MAX_SEARCH_DEPTH = 100;
+
+// Max nodes to visit in the UD chain for the current method being compiled.
+static const int MAX_VISIT_BUDGET = 8192;
+
+// RangeCheck constructor.
+RangeCheck::RangeCheck(Compiler* pCompiler)
+ : m_pOverflowMap(nullptr)
+ , m_pRangeMap(nullptr)
+ , m_fMappedDefs(false)
+ , m_pDefTable(nullptr)
+ , m_pCompiler(pCompiler)
+ , m_nVisitBudget(MAX_VISIT_BUDGET)
+{
+}
+
+bool RangeCheck::IsOverBudget()
+{
+ return (m_nVisitBudget <= 0);
+}
+
+// Get the range map in which computed ranges are cached.
+RangeCheck::RangeMap* RangeCheck::GetRangeMap()
+{
+ if (m_pRangeMap == nullptr)
+ {
+ m_pRangeMap = new (m_pCompiler->getAllocator()) RangeMap(m_pCompiler->getAllocator());
+ }
+ return m_pRangeMap;
+}
+
+// Get the overflow map in which computed overflows are cached.
+RangeCheck::OverflowMap* RangeCheck::GetOverflowMap()
+{
+ if (m_pOverflowMap == nullptr)
+ {
+ m_pOverflowMap = new (m_pCompiler->getAllocator()) OverflowMap(m_pCompiler->getAllocator());
+ }
+ return m_pOverflowMap;
+}
+
+// Get the length of the array vn, if it is new.
+int RangeCheck::GetArrLength(ValueNum vn)
+{
+ ValueNum arrRefVN = m_pCompiler->vnStore->GetArrForLenVn(vn);
+ return m_pCompiler->vnStore->GetNewArrSize(arrRefVN);
+}
+
+// Check if the computed range is within bounds.
+bool RangeCheck::BetweenBounds(Range& range, int lower, GenTreePtr upper)
+{
+#ifdef DEBUG
+ if (m_pCompiler->verbose)
+ {
+ printf("%s BetweenBounds <%d, ", range.ToString(m_pCompiler->getAllocatorDebugOnly()), lower);
+ Compiler::printTreeID(upper);
+ printf(">\n");
+ }
+#endif // DEBUG
+
+ // Get the VN for the upper limit.
+ ValueNum uLimitVN = upper->gtVNPair.GetConservative();
+
+#ifdef DEBUG
+ JITDUMP("VN%04X upper bound is: ", uLimitVN);
+ if (m_pCompiler->verbose)
+ {
+ m_pCompiler->vnStore->vnDump(m_pCompiler, uLimitVN);
+ }
+ JITDUMP("\n");
+#endif
+
+ ValueNum arrRefVN = ValueNumStore::NoVN;
+ int arrSize = 0;
+
+ if (m_pCompiler->vnStore->IsVNConstant(uLimitVN))
+ {
+ ssize_t constVal = -1;
+ unsigned iconFlags = 0;
+
+ if (m_pCompiler->optIsTreeKnownIntValue(true, upper, &constVal, &iconFlags))
+ {
+ arrSize = (int)constVal;
+ }
+ }
+ else if (m_pCompiler->vnStore->IsVNArrLen(uLimitVN))
+ {
+ // Get the array reference from the length.
+ arrRefVN = m_pCompiler->vnStore->GetArrForLenVn(uLimitVN);
+ // Check if array size can be obtained.
+ arrSize = m_pCompiler->vnStore->GetNewArrSize(arrRefVN);
+ }
+ else
+ {
+ // If the upper limit is not length, then bail.
+ return false;
+ }
+
+#ifdef DEBUG
+ JITDUMP("Array ref VN");
+ if (m_pCompiler->verbose)
+ {
+ m_pCompiler->vnStore->vnDump(m_pCompiler, arrRefVN);
+ }
+ JITDUMP("\n");
+#endif
+
+ JITDUMP("Array size is: %d\n", arrSize);
+
+ // Upper limit: a.len + ucns (upper limit constant).
+ if (range.UpperLimit().IsBinOpArray())
+ {
+ if (range.UpperLimit().vn != arrRefVN)
+ {
+ return false;
+ }
+
+ int ucns = range.UpperLimit().GetConstant();
+
+ // Upper limit: a.Len + [0..n]
+ if (ucns >= 0)
+ {
+ return false;
+ }
+
+ // If lower limit is a.len return false.
+ if (range.LowerLimit().IsArray())
+ {
+ return false;
+ }
+
+ // Since upper limit is bounded by the array, return true if lower bound is good.
+ if (range.LowerLimit().IsConstant() && range.LowerLimit().GetConstant() >= 0)
+ {
+ return true;
+ }
+
+ // Check if we have the array size allocated by new.
+ if (arrSize <= 0)
+ {
+ return false;
+ }
+
+ // At this point,
+ // upper limit = a.len + ucns. ucns < 0
+ // lower limit = a.len + lcns.
+ if (range.LowerLimit().IsBinOpArray())
+ {
+ int lcns = range.LowerLimit().GetConstant();
+ if (lcns >= 0 || -lcns > arrSize)
+ {
+ return false;
+ }
+ return (range.LowerLimit().vn == arrRefVN && lcns <= ucns);
+ }
+ }
+ // If upper limit is constant
+ else if (range.UpperLimit().IsConstant())
+ {
+ if (arrSize <= 0)
+ {
+ return false;
+ }
+ int ucns = range.UpperLimit().GetConstant();
+ if (ucns >= arrSize)
+ {
+ return false;
+ }
+ if (range.LowerLimit().IsConstant())
+ {
+ int lcns = range.LowerLimit().GetConstant();
+ // Make sure lcns < ucns which is already less than arrSize.
+ return (lcns >= 0 && lcns <= ucns);
+ }
+ if (range.LowerLimit().IsBinOpArray())
+ {
+ int lcns = range.LowerLimit().GetConstant();
+ // a.len + lcns, make sure we don't subtract too much from a.len.
+ if (lcns >= 0 || -lcns > arrSize)
+ {
+ return false;
+ }
+ // Make sure a.len + lcns <= ucns.
+ return (range.LowerLimit().vn == arrRefVN && (arrSize + lcns) <= ucns);
+ }
+ }
+
+ return false;
+}
+
+void RangeCheck::OptimizeRangeCheck(BasicBlock* block, GenTreePtr stmt, GenTreePtr treeParent)
+{
+ // Check if we are dealing with a bounds check node.
+ if (treeParent->OperGet() != GT_COMMA)
+ {
+ return;
+ }
+
+ // If we are not looking at array bounds check, bail.
+ GenTreePtr tree = treeParent->gtOp.gtOp1;
+ if (tree->gtOper != GT_ARR_BOUNDS_CHECK)
+ {
+ return;
+ }
+
+ GenTreeBoundsChk* bndsChk = tree->AsBoundsChk();
+ m_pCurBndsChk = bndsChk;
+ GenTreePtr treeIndex = bndsChk->gtIndex;
+
+ // Take care of constant index first, like a[2], for example.
+ ValueNum idxVn = treeIndex->gtVNPair.GetConservative();
+ ValueNum arrLenVn = bndsChk->gtArrLen->gtVNPair.GetConservative();
+ int arrSize = 0;
+
+ if (m_pCompiler->vnStore->IsVNConstant(arrLenVn))
+ {
+ ssize_t constVal = -1;
+ unsigned iconFlags = 0;
+
+ if (m_pCompiler->optIsTreeKnownIntValue(true, bndsChk->gtArrLen, &constVal, &iconFlags))
+ {
+ arrSize = (int)constVal;
+ }
+ }
+ else
+ {
+ arrSize = GetArrLength(arrLenVn);
+ }
+
+ JITDUMP("ArrSize for lengthVN:%03X = %d\n", arrLenVn, arrSize);
+ if (m_pCompiler->vnStore->IsVNConstant(idxVn) && arrSize > 0)
+ {
+ ssize_t idxVal = -1;
+ unsigned iconFlags = 0;
+ if (!m_pCompiler->optIsTreeKnownIntValue(true, treeIndex, &idxVal, &iconFlags))
+ {
+ return;
+ }
+
+ JITDUMP("[RangeCheck::OptimizeRangeCheck] Is index %d in <0, arrLenVn VN%X sz:%d>.\n", idxVal, arrLenVn,
+ arrSize);
+ if (arrSize > 0 && idxVal < arrSize && idxVal >= 0)
+ {
+ JITDUMP("Removing range check\n");
+ m_pCompiler->optRemoveRangeCheck(treeParent, stmt, true, GTF_ASG, true /* force remove */);
+ return;
+ }
+ }
+
+ GetRangeMap()->RemoveAll();
+ GetOverflowMap()->RemoveAll();
+
+ // Get the range for this index.
+ SearchPath* path = new (m_pCompiler->getAllocator()) SearchPath(m_pCompiler->getAllocator());
+
+ Range range = GetRange(block, stmt, treeIndex, path, false DEBUGARG(0));
+
+ // If upper or lower limit is found to be unknown (top), or it was found to
+ // be unknown because of over budget or a deep search, then return early.
+ if (range.UpperLimit().IsUnknown() || range.LowerLimit().IsUnknown())
+ {
+ // Note: If we had stack depth too deep in the GetRange call, we'd be
+ // too deep even in the DoesOverflow call. So return early.
+ return;
+ }
+
+ if (DoesOverflow(block, stmt, treeIndex, path))
+ {
+ JITDUMP("Method determined to overflow.\n");
+ return;
+ }
+
+ JITDUMP("Range value %s\n", range.ToString(m_pCompiler->getAllocatorDebugOnly()));
+ path->RemoveAll();
+ Widen(block, stmt, treeIndex, path, &range);
+
+ // If upper or lower limit is unknown, then return.
+ if (range.UpperLimit().IsUnknown() || range.LowerLimit().IsUnknown())
+ {
+ return;
+ }
+
+ // Is the range between the lower and upper bound values.
+ if (BetweenBounds(range, 0, bndsChk->gtArrLen))
+ {
+ JITDUMP("[RangeCheck::OptimizeRangeCheck] Between bounds\n");
+ m_pCompiler->optRemoveRangeCheck(treeParent, stmt, true, GTF_ASG, true /* force remove */);
+ }
+ return;
+}
+
+void RangeCheck::Widen(BasicBlock* block, GenTreePtr stmt, GenTreePtr tree, SearchPath* path, Range* pRange)
+{
+#ifdef DEBUG
+ if (m_pCompiler->verbose)
+ {
+ printf("[RangeCheck::Widen] BB%02d, \n", block->bbNum);
+ Compiler::printTreeID(tree);
+ printf("\n");
+ }
+#endif // DEBUG
+
+ Range& range = *pRange;
+
+ // Try to deduce the lower bound, if it is not known already.
+ if (range.LowerLimit().IsDependent() || range.LowerLimit().IsUnknown())
+ {
+ // To determine the lower bound, ask if the loop increases monotonically.
+ bool increasing = IsMonotonicallyIncreasing(tree, path);
+ JITDUMP("IsMonotonicallyIncreasing %d", increasing);
+ if (increasing)
+ {
+ GetRangeMap()->RemoveAll();
+ *pRange = GetRange(block, stmt, tree, path, true DEBUGARG(0));
+ }
+ }
+}
+
+bool RangeCheck::IsBinOpMonotonicallyIncreasing(GenTreePtr op1, GenTreePtr op2, genTreeOps oper, SearchPath* path)
+{
+ JITDUMP("[RangeCheck::IsBinOpMonotonicallyIncreasing] %p, %p\n", dspPtr(op1), dspPtr(op2));
+ // Check if we have a var + const.
+ if (op2->OperGet() == GT_LCL_VAR)
+ {
+ jitstd::swap(op1, op2);
+ }
+ if (op1->OperGet() != GT_LCL_VAR)
+ {
+ JITDUMP("Not monotonic because op1 is not lclVar.\n");
+ return false;
+ }
+ switch (op2->OperGet())
+ {
+ case GT_LCL_VAR:
+ return IsMonotonicallyIncreasing(op1, path) && IsMonotonicallyIncreasing(op2, path);
+
+ case GT_CNS_INT:
+ return oper == GT_ADD && op2->AsIntConCommon()->IconValue() >= 0 && IsMonotonicallyIncreasing(op1, path);
+
+ default:
+ JITDUMP("Not monotonic because expression is not recognized.\n");
+ return false;
+ }
+}
+
+bool RangeCheck::IsMonotonicallyIncreasing(GenTreePtr expr, SearchPath* path)
+{
+ JITDUMP("[RangeCheck::IsMonotonicallyIncreasing] %p\n", dspPtr(expr));
+ if (path->Lookup(expr))
+ {
+ return true;
+ }
+
+ // Add hashtable entry for expr.
+ path->Set(expr, nullptr);
+
+ // Remove hashtable entry for expr when we exit the present scope.
+ auto code = [&] { path->Remove(expr); };
+ jitstd::utility::scoped_code<decltype(code)> finally(code);
+
+ // If the rhs expr is constant, then it is not part of the dependency
+ // loop which has to increase monotonically.
+ ValueNum vn = expr->gtVNPair.GetConservative();
+ if (path->GetCount() > MAX_SEARCH_DEPTH)
+ {
+ return false;
+ }
+ else if (m_pCompiler->vnStore->IsVNConstant(vn))
+ {
+ return true;
+ }
+ // If the rhs expr is local, then try to find the def of the local.
+ else if (expr->IsLocal())
+ {
+ Location* loc = GetDef(expr);
+ if (loc == nullptr)
+ {
+ return false;
+ }
+ GenTreePtr asg = loc->parent;
+ assert(asg->OperKind() & GTK_ASGOP);
+ switch (asg->OperGet())
+ {
+ case GT_ASG:
+ return IsMonotonicallyIncreasing(asg->gtGetOp2(), path);
+
+ case GT_ASG_ADD:
+ return IsBinOpMonotonicallyIncreasing(asg->gtGetOp1(), asg->gtGetOp2(), GT_ADD, path);
+
+ default:
+ // All other 'asg->OperGet()' kinds, return false
+ break;
+ }
+ JITDUMP("Unknown local definition type\n");
+ return false;
+ }
+ else if (expr->OperGet() == GT_ADD)
+ {
+ return IsBinOpMonotonicallyIncreasing(expr->gtGetOp1(), expr->gtGetOp2(), GT_ADD, path);
+ }
+ else if (expr->OperGet() == GT_PHI)
+ {
+ for (GenTreeArgList* args = expr->gtOp.gtOp1->AsArgList(); args != nullptr; args = args->Rest())
+ {
+ // If the arg is already in the path, skip.
+ if (path->Lookup(args->Current()))
+ {
+ continue;
+ }
+ if (!IsMonotonicallyIncreasing(args->Current(), path))
+ {
+ JITDUMP("Phi argument not monotonic\n");
+ return false;
+ }
+ }
+ return true;
+ }
+ JITDUMP("Unknown tree type\n");
+ return false;
+}
+
+UINT64 RangeCheck::HashCode(unsigned lclNum, unsigned ssaNum)
+{
+ assert(ssaNum != SsaConfig::RESERVED_SSA_NUM);
+ return UINT64(lclNum) << 32 | ssaNum;
+}
+
+// Get the def location of a given variable.
+RangeCheck::Location* RangeCheck::GetDef(unsigned lclNum, unsigned ssaNum)
+{
+ Location* loc = nullptr;
+ if (ssaNum == SsaConfig::RESERVED_SSA_NUM)
+ {
+ return nullptr;
+ }
+ if (!m_fMappedDefs)
+ {
+ MapMethodDefs();
+ }
+ // No defs.
+ if (m_pDefTable == nullptr)
+ {
+ return nullptr;
+ }
+ m_pDefTable->Lookup(HashCode(lclNum, ssaNum), &loc);
+ return loc;
+}
+
+RangeCheck::Location* RangeCheck::GetDef(GenTreePtr tree)
+{
+ assert(tree->IsLocal());
+ unsigned lclNum = tree->AsLclVarCommon()->GetLclNum();
+ unsigned ssaNum = tree->AsLclVarCommon()->GetSsaNum();
+ return GetDef(lclNum, ssaNum);
+}
+
+// Add the def location to the hash table.
+void RangeCheck::SetDef(UINT64 hash, Location* loc)
+{
+ if (m_pDefTable == nullptr)
+ {
+ m_pDefTable = new (m_pCompiler->getAllocator()) VarToLocMap(m_pCompiler->getAllocator());
+ }
+#ifdef DEBUG
+ Location* loc2;
+ if (m_pDefTable->Lookup(hash, &loc2))
+ {
+ JITDUMP("Already have BB%02d, %08X, %08X for hash => %0I64X", loc2->block->bbNum, dspPtr(loc2->stmt),
+ dspPtr(loc2->tree), hash);
+ assert(false);
+ }
+#endif
+ m_pDefTable->Set(hash, loc);
+}
+
+// Merge assertions on the edge flowing into the block about a variable.
+void RangeCheck::MergeEdgeAssertions(GenTreePtr tree, const ASSERT_VALARG_TP assertions, Range* pRange)
+{
+ if (BitVecOps::IsEmpty(m_pCompiler->apTraits, assertions))
+ {
+ return;
+ }
+
+ GenTreeLclVarCommon* lcl = (GenTreeLclVarCommon*)tree;
+ if (lcl->gtSsaNum == SsaConfig::RESERVED_SSA_NUM)
+ {
+ return;
+ }
+ // Walk through the "assertions" to check if the apply.
+ BitVecOps::Iter iter(m_pCompiler->apTraits, assertions);
+ unsigned index = 0;
+ while (iter.NextElem(m_pCompiler->apTraits, &index))
+ {
+ index++;
+
+ Compiler::AssertionDsc* curAssertion = m_pCompiler->optGetAssertion((Compiler::AssertionIndex)index);
+
+ // Current assertion is about array length.
+ if (!curAssertion->IsArrLenArithBound() && !curAssertion->IsArrLenBound() && !curAssertion->IsConstantBound())
+ {
+ continue;
+ }
+
+#ifdef DEBUG
+ if (m_pCompiler->verbose)
+ {
+ m_pCompiler->optPrintAssertion(curAssertion, (Compiler::AssertionIndex)index);
+ }
+#endif
+
+ assert(m_pCompiler->vnStore->IsVNArrLenArithBound(curAssertion->op1.vn) ||
+ m_pCompiler->vnStore->IsVNArrLenBound(curAssertion->op1.vn) ||
+ m_pCompiler->vnStore->IsVNConstantBound(curAssertion->op1.vn));
+
+ Limit limit(Limit::keUndef);
+ genTreeOps cmpOper = GT_NONE;
+
+ // Current assertion is of the form (i < a.len - cns) != 0
+ if (curAssertion->IsArrLenArithBound())
+ {
+ ValueNumStore::ArrLenArithBoundInfo info;
+
+ // Get i, a.len, cns and < as "info."
+ m_pCompiler->vnStore->GetArrLenArithBoundInfo(curAssertion->op1.vn, &info);
+
+ if (m_pCompiler->lvaTable[lcl->gtLclNum].GetPerSsaData(lcl->gtSsaNum)->m_vnPair.GetConservative() !=
+ info.cmpOp)
+ {
+ continue;
+ }
+
+ switch (info.arrOper)
+ {
+ case GT_SUB:
+ case GT_ADD:
+ {
+ // If the operand that operates on the array is not constant, then done.
+ if (!m_pCompiler->vnStore->IsVNConstant(info.arrOp) ||
+ m_pCompiler->vnStore->TypeOfVN(info.arrOp) != TYP_INT)
+ {
+ break;
+ }
+ int cons = m_pCompiler->vnStore->ConstantValue<int>(info.arrOp);
+ limit = Limit(Limit::keBinOpArray, info.vnArray, info.arrOper == GT_SUB ? -cons : cons);
+ }
+ }
+
+ cmpOper = (genTreeOps)info.cmpOper;
+ }
+ // Current assertion is of the form (i < a.len) != 0
+ else if (curAssertion->IsArrLenBound())
+ {
+ ValueNumStore::ArrLenArithBoundInfo info;
+
+ // Get the info as "i", "<" and "a.len"
+ m_pCompiler->vnStore->GetArrLenBoundInfo(curAssertion->op1.vn, &info);
+
+ ValueNum lclVn =
+ m_pCompiler->lvaTable[lcl->gtLclNum].GetPerSsaData(lcl->gtSsaNum)->m_vnPair.GetConservative();
+ // If we don't have the same variable we are comparing against, bail.
+ if (lclVn != info.cmpOp)
+ {
+ continue;
+ }
+ limit.type = Limit::keArray;
+ limit.vn = info.vnArray;
+ cmpOper = (genTreeOps)info.cmpOper;
+ }
+ // Current assertion is of the form (i < 100) != 0
+ else if (curAssertion->IsConstantBound())
+ {
+ ValueNumStore::ConstantBoundInfo info;
+
+ // Get the info as "i", "<" and "100"
+ m_pCompiler->vnStore->GetConstantBoundInfo(curAssertion->op1.vn, &info);
+
+ ValueNum lclVn =
+ m_pCompiler->lvaTable[lcl->gtLclNum].GetPerSsaData(lcl->gtSsaNum)->m_vnPair.GetConservative();
+
+ // If we don't have the same variable we are comparing against, bail.
+ if (lclVn != info.cmpOpVN)
+ {
+ continue;
+ }
+
+ limit = Limit(Limit::keConstant, ValueNumStore::NoVN, info.constVal);
+ cmpOper = (genTreeOps)info.cmpOper;
+ }
+ else
+ {
+ noway_assert(false);
+ }
+
+ if (limit.IsUndef())
+ {
+ continue;
+ }
+
+ // Make sure the assertion is of the form != 0 or == 0.
+ if (curAssertion->op2.vn != m_pCompiler->vnStore->VNZeroForType(TYP_INT))
+ {
+ continue;
+ }
+#ifdef DEBUG
+ if (m_pCompiler->verbose)
+ {
+ m_pCompiler->optPrintAssertion(curAssertion, (Compiler::AssertionIndex)index);
+ }
+#endif
+
+ noway_assert(limit.IsBinOpArray() || limit.IsArray() || limit.IsConstant());
+
+ ValueNum arrLenVN = m_pCurBndsChk->gtArrLen->gtVNPair.GetConservative();
+ ValueNum arrRefVN = ValueNumStore::NoVN;
+
+ if (m_pCompiler->vnStore->IsVNArrLen(arrLenVN))
+ {
+ // Get the array reference from the length.
+ arrRefVN = m_pCompiler->vnStore->GetArrForLenVn(arrLenVN);
+ }
+
+ // During assertion prop we add assertions of the form:
+ //
+ // (i < a.Length) == 0
+ // (i < a.Length) != 0
+ // (i < 100) == 0
+ // (i < 100) != 0
+ //
+ // At this point, we have detected that op1.vn is (i < a.Length) or (i < a.Length + cns) or
+ // (i < 100) and the op2.vn is 0.
+ //
+ // Now, let us check if we are == 0 (i.e., op1 assertion is false) or != 0 (op1 assertion
+ // is true.),
+ //
+ // If we have an assertion of the form == 0 (i.e., equals false), then reverse relop.
+ // The relop has to be reversed because we have: (i < a.Length) is false which is the same
+ // as (i >= a.Length).
+ if (curAssertion->assertionKind == Compiler::OAK_EQUAL)
+ {
+ cmpOper = GenTree::ReverseRelop(cmpOper);
+ }
+
+ // Bounds are inclusive, so add -1 for upper bound when "<". But make sure we won't overflow.
+ if (cmpOper == GT_LT && !limit.AddConstant(-1))
+ {
+ continue;
+ }
+ // Bounds are inclusive, so add +1 for lower bound when ">". But make sure we won't overflow.
+ if (cmpOper == GT_GT && !limit.AddConstant(1))
+ {
+ continue;
+ }
+
+ // Doesn't tighten the current bound. So skip.
+ if (pRange->uLimit.IsConstant() && limit.vn != arrRefVN)
+ {
+ continue;
+ }
+
+ // Check if the incoming limit from assertions tightens the existing upper limit.
+ if ((pRange->uLimit.IsArray() || pRange->uLimit.IsBinOpArray()) && pRange->uLimit.vn == arrRefVN)
+ {
+ // We have checked the current range's (pRange's) upper limit is either of the form:
+ // a.Length
+ // a.Length + cns
+ // and a == the bndsChkCandidate's arrRef
+ //
+ // We want to check if the incoming limit tightens the bound, and for that the
+ // we need to make sure that incoming limit is also on a.Length or a.Length + cns
+ // and not b.Length or some c.Length.
+
+ if (limit.vn != arrRefVN)
+ {
+ JITDUMP("Array ref did not match cur=$%x, assert=$%x\n", arrRefVN, limit.vn);
+ continue;
+ }
+
+ int curCns = (pRange->uLimit.IsBinOpArray()) ? pRange->uLimit.cns : 0;
+ int limCns = (limit.IsBinOpArray()) ? limit.cns : 0;
+
+ // Incoming limit doesn't tighten the existing upper limit.
+ if (limCns >= curCns)
+ {
+ JITDUMP("Bound limit %d doesn't tighten current bound %d\n", limCns, curCns);
+ continue;
+ }
+ }
+ else
+ {
+ // Current range's upper bound is not "a.Length or a.Length + cns" and the
+ // incoming limit is not on the same arrRef as the bounds check candidate.
+ // So we could skip this assertion. But in cases, of Dependent or Unknown
+ // type of upper limit, the incoming assertion still tightens the upper
+ // bound to a saner value. So do not skip the assertion.
+ }
+
+ // cmpOp (loop index i) cmpOper a.len +/- cns
+ switch (cmpOper)
+ {
+ case GT_LT:
+ pRange->uLimit = limit;
+ break;
+
+ case GT_GT:
+ pRange->lLimit = limit;
+ break;
+
+ case GT_GE:
+ pRange->lLimit = limit;
+ break;
+
+ case GT_LE:
+ pRange->uLimit = limit;
+ break;
+
+ default:
+ // All other 'cmpOper' kinds leave lLimit/uLimit unchanged
+ break;
+ }
+ JITDUMP("The range after edge merging:");
+ JITDUMP(pRange->ToString(m_pCompiler->getAllocatorDebugOnly()));
+ JITDUMP("\n");
+ }
+}
+
+// Merge assertions from the pred edges of the block, i.e., check for any assertions about "op's" value numbers for phi
+// arguments. If not a phi argument, check if we assertions about local variables.
+void RangeCheck::MergeAssertion(
+ BasicBlock* block, GenTreePtr stmt, GenTreePtr op, SearchPath* path, Range* pRange DEBUGARG(int indent))
+{
+ JITDUMP("Merging assertions from pred edges of BB%02d for op(%p) $%03x\n", block->bbNum, dspPtr(op),
+ op->gtVNPair.GetConservative());
+ ASSERT_TP assertions = BitVecOps::UninitVal();
+
+ // If we have a phi arg, we can get to the block from it and use its assertion out.
+ if (op->gtOper == GT_PHI_ARG)
+ {
+ GenTreePhiArg* arg = (GenTreePhiArg*)op;
+ BasicBlock* pred = arg->gtPredBB;
+ if (pred->bbFallsThrough() && pred->bbNext == block)
+ {
+ assertions = pred->bbAssertionOut;
+ JITDUMP("Merge assertions from pred BB%02d edge: %s\n", pred->bbNum,
+ BitVecOps::ToString(m_pCompiler->apTraits, assertions));
+ }
+ else if ((pred->bbJumpKind == BBJ_COND || pred->bbJumpKind == BBJ_ALWAYS) && pred->bbJumpDest == block)
+ {
+ if (m_pCompiler->bbJtrueAssertionOut != nullptr)
+ {
+ assertions = m_pCompiler->bbJtrueAssertionOut[pred->bbNum];
+ JITDUMP("Merge assertions from pred BB%02d JTrue edge: %s\n", pred->bbNum,
+ BitVecOps::ToString(m_pCompiler->apTraits, assertions));
+ }
+ }
+ }
+ // Get assertions from bbAssertionIn.
+ else if (op->IsLocal())
+ {
+ assertions = block->bbAssertionIn;
+ }
+
+ if (!BitVecOps::MayBeUninit(assertions))
+ {
+ // Perform the merge step to fine tune the range value.
+ MergeEdgeAssertions(op, assertions, pRange);
+ }
+}
+
+// Compute the range for a binary operation.
+Range RangeCheck::ComputeRangeForBinOp(BasicBlock* block,
+ GenTreePtr stmt,
+ GenTreePtr op1,
+ GenTreePtr op2,
+ genTreeOps oper,
+ SearchPath* path,
+ bool monotonic DEBUGARG(int indent))
+{
+ Range* op1RangeCached = nullptr;
+ Range op1Range = Limit(Limit::keUndef);
+ bool inPath1 = path->Lookup(op1);
+ // Check if the range value is already cached.
+ if (!GetRangeMap()->Lookup(op1, &op1RangeCached))
+ {
+ // If we already have the op in the path, then, just rely on assertions, else
+ // find the range.
+ if (!inPath1)
+ {
+ op1Range = GetRange(block, stmt, op1, path, monotonic DEBUGARG(indent));
+ }
+ else
+ {
+ op1Range = Range(Limit(Limit::keDependent));
+ }
+ MergeAssertion(block, stmt, op1, path, &op1Range DEBUGARG(indent + 1));
+ }
+ else
+ {
+ op1Range = *op1RangeCached;
+ }
+
+ Range* op2RangeCached;
+ Range op2Range = Limit(Limit::keUndef);
+ bool inPath2 = path->Lookup(op2);
+ // Check if the range value is already cached.
+ if (!GetRangeMap()->Lookup(op2, &op2RangeCached))
+ {
+ // If we already have the op in the path, then, just rely on assertions, else
+ // find the range.
+ if (!inPath2)
+ {
+ op2Range = GetRange(block, stmt, op2, path, monotonic DEBUGARG(indent));
+ }
+ else
+ {
+ op2Range = Range(Limit(Limit::keDependent));
+ }
+ MergeAssertion(block, stmt, op2, path, &op2Range DEBUGARG(indent + 1));
+ }
+ else
+ {
+ op2Range = *op2RangeCached;
+ }
+
+ assert(oper == GT_ADD); // For now just GT_ADD.
+ Range r = RangeOps::Add(op1Range, op2Range);
+ JITDUMP("BinOp add ranges %s %s = %s\n", op1Range.ToString(m_pCompiler->getAllocatorDebugOnly()),
+ op2Range.ToString(m_pCompiler->getAllocatorDebugOnly()), r.ToString(m_pCompiler->getAllocatorDebugOnly()));
+ return r;
+}
+
+// Compute the range for a local var definition.
+Range RangeCheck::ComputeRangeForLocalDef(
+ BasicBlock* block, GenTreePtr stmt, GenTreePtr expr, SearchPath* path, bool monotonic DEBUGARG(int indent))
+{
+ // Get the program location of the def.
+ Location* loc = GetDef(expr);
+
+ // If we can't reach the def, then return unknown range.
+ if (loc == nullptr)
+ {
+ return Range(Limit(Limit::keUnknown));
+ }
+#ifdef DEBUG
+ if (m_pCompiler->verbose)
+ {
+ JITDUMP("----------------------------------------------------\n");
+ m_pCompiler->gtDispTree(loc->stmt);
+ JITDUMP("----------------------------------------------------\n");
+ }
+#endif
+ GenTreePtr asg = loc->parent;
+ assert(asg->OperKind() & GTK_ASGOP);
+ switch (asg->OperGet())
+ {
+ // If the operator of the definition is assignment, then compute the range of the rhs.
+ case GT_ASG:
+ {
+ Range range = GetRange(loc->block, loc->stmt, asg->gtGetOp2(), path, monotonic DEBUGARG(indent));
+ JITDUMP("Merge assertions from BB%02d:%s for assignment about %p\n", block->bbNum,
+ BitVecOps::ToString(m_pCompiler->apTraits, block->bbAssertionIn), dspPtr(asg->gtGetOp1()));
+ MergeEdgeAssertions(asg->gtGetOp1(), block->bbAssertionIn, &range);
+ JITDUMP("done merging\n");
+ return range;
+ }
+
+ case GT_ASG_ADD:
+ // If the operator of the definition is +=, then compute the range of the operands of +.
+ // Note that gtGetOp1 will return op1 to be the lhs; in the formulation of ssa, we have
+ // a side table for defs and the lhs of a += is considered to be a use for SSA numbering.
+ return ComputeRangeForBinOp(loc->block, loc->stmt, asg->gtGetOp1(), asg->gtGetOp2(), GT_ADD, path,
+ monotonic DEBUGARG(indent));
+
+ default:
+ // All other 'asg->OperGet()' kinds, return Limit::keUnknown
+ break;
+ }
+ return Range(Limit(Limit::keUnknown));
+}
+
+// https://msdn.microsoft.com/en-us/windows/apps/hh285054.aspx
+// CLR throws IDS_EE_ARRAY_DIMENSIONS_EXCEEDED if array length is > INT_MAX.
+// new byte[INT_MAX]; still throws OutOfMemoryException on my system with 32 GB RAM.
+// I believe practical limits are still smaller than this number.
+#define ARRLEN_MAX (0x7FFFFFFF)
+
+// Get the limit's maximum possible value, treating array length to be ARRLEN_MAX.
+bool RangeCheck::GetLimitMax(Limit& limit, int* pMax)
+{
+ int& max1 = *pMax;
+ switch (limit.type)
+ {
+ case Limit::keConstant:
+ max1 = limit.GetConstant();
+ break;
+
+ case Limit::keBinOpArray:
+ {
+ int tmp = GetArrLength(limit.vn);
+ if (tmp <= 0)
+ {
+ tmp = ARRLEN_MAX;
+ }
+ if (IntAddOverflows(tmp, limit.GetConstant()))
+ {
+ return false;
+ }
+ max1 = tmp + limit.GetConstant();
+ }
+ break;
+
+ case Limit::keArray:
+ {
+ int tmp = GetArrLength(limit.vn);
+ if (tmp <= 0)
+ {
+ tmp = ARRLEN_MAX;
+ }
+ max1 = tmp;
+ }
+ break;
+
+ case Limit::keSsaVar:
+ case Limit::keBinOp:
+ if (m_pCompiler->vnStore->IsVNConstant(limit.vn) && m_pCompiler->vnStore->TypeOfVN(limit.vn) == TYP_INT)
+ {
+ max1 = m_pCompiler->vnStore->ConstantValue<int>(limit.vn);
+ }
+ else
+ {
+ return false;
+ }
+ if (limit.type == Limit::keBinOp)
+ {
+ if (IntAddOverflows(max1, limit.GetConstant()))
+ {
+ return false;
+ }
+ max1 += limit.GetConstant();
+ }
+ break;
+
+ default:
+ return false;
+ }
+ return true;
+}
+
+// Check if the arithmetic overflows.
+bool RangeCheck::AddOverflows(Limit& limit1, Limit& limit2)
+{
+ int max1;
+ if (!GetLimitMax(limit1, &max1))
+ {
+ return true;
+ }
+
+ int max2;
+ if (!GetLimitMax(limit2, &max2))
+ {
+ return true;
+ }
+
+ return IntAddOverflows(max1, max2);
+}
+
+// Does the bin operation overflow.
+bool RangeCheck::DoesBinOpOverflow(BasicBlock* block, GenTreePtr stmt, GenTreePtr op1, GenTreePtr op2, SearchPath* path)
+{
+ if (!path->Lookup(op1) && DoesOverflow(block, stmt, op1, path))
+ {
+ return true;
+ }
+
+ if (!path->Lookup(op2) && DoesOverflow(block, stmt, op2, path))
+ {
+ return true;
+ }
+
+ // Get the cached ranges of op1
+ Range* op1Range = nullptr;
+ if (!GetRangeMap()->Lookup(op1, &op1Range))
+ {
+ return true;
+ }
+ // Get the cached ranges of op2
+ Range* op2Range = nullptr;
+ if (!GetRangeMap()->Lookup(op2, &op2Range))
+ {
+ return true;
+ }
+
+ // If dependent, check if we can use some assertions.
+ if (op1Range->UpperLimit().IsDependent())
+ {
+ MergeAssertion(block, stmt, op1, path, op1Range DEBUGARG(0));
+ }
+
+ // If dependent, check if we can use some assertions.
+ if (op2Range->UpperLimit().IsDependent())
+ {
+ MergeAssertion(block, stmt, op2, path, op2Range DEBUGARG(0));
+ }
+
+ JITDUMP("Checking bin op overflow %s %s\n", op1Range->ToString(m_pCompiler->getAllocatorDebugOnly()),
+ op2Range->ToString(m_pCompiler->getAllocatorDebugOnly()));
+
+ if (!AddOverflows(op1Range->UpperLimit(), op2Range->UpperLimit()))
+ {
+ return false;
+ }
+ return true;
+}
+
+// Check if the var definition the rhs involves arithmetic that overflows.
+bool RangeCheck::DoesVarDefOverflow(BasicBlock* block, GenTreePtr stmt, GenTreePtr expr, SearchPath* path)
+{
+ // Get the definition.
+ Location* loc = GetDef(expr);
+ if (loc == nullptr)
+ {
+ return true;
+ }
+ // Get the parent node which is an asg.
+ GenTreePtr asg = loc->parent;
+ assert(asg->OperKind() & GTK_ASGOP);
+ switch (asg->OperGet())
+ {
+ case GT_ASG:
+ return DoesOverflow(loc->block, loc->stmt, asg->gtGetOp2(), path);
+
+ case GT_ASG_ADD:
+ // For GT_ASG_ADD, op2 is use, op1 is also use since we side table for defs in useasg case.
+ return DoesBinOpOverflow(loc->block, loc->stmt, asg->gtGetOp1(), asg->gtGetOp2(), path);
+
+ default:
+ // All other 'asg->OperGet()' kinds, conservatively return true
+ break;
+ }
+ return true;
+}
+
+bool RangeCheck::DoesPhiOverflow(BasicBlock* block, GenTreePtr stmt, GenTreePtr expr, SearchPath* path)
+{
+ for (GenTreeArgList* args = expr->gtOp.gtOp1->AsArgList(); args != nullptr; args = args->Rest())
+ {
+ GenTreePtr arg = args->Current();
+ if (path->Lookup(arg))
+ {
+ continue;
+ }
+ if (DoesOverflow(block, stmt, args->Current(), path))
+ {
+ return true;
+ }
+ }
+ return false;
+}
+
+bool RangeCheck::DoesOverflow(BasicBlock* block, GenTreePtr stmt, GenTreePtr expr, SearchPath* path)
+{
+ bool overflows = false;
+ if (!GetOverflowMap()->Lookup(expr, &overflows))
+ {
+ overflows = ComputeDoesOverflow(block, stmt, expr, path);
+ }
+ return overflows;
+}
+
+bool RangeCheck::ComputeDoesOverflow(BasicBlock* block, GenTreePtr stmt, GenTreePtr expr, SearchPath* path)
+{
+ JITDUMP("Does overflow %p?\n", dspPtr(expr));
+ path->Set(expr, block);
+
+ bool overflows = true;
+
+ // Remove hashtable entry for expr when we exit the present scope.
+ Range range = Limit(Limit::keUndef);
+ ValueNum vn = expr->gtVNPair.GetConservative();
+ if (path->GetCount() > MAX_SEARCH_DEPTH)
+ {
+ overflows = true;
+ }
+ // If the definition chain resolves to a constant, it doesn't overflow.
+ else if (m_pCompiler->vnStore->IsVNConstant(vn))
+ {
+ overflows = false;
+ }
+ // Check if the var def has rhs involving arithmetic that overflows.
+ else if (expr->IsLocal())
+ {
+ overflows = DoesVarDefOverflow(block, stmt, expr, path);
+ }
+ // Check if add overflows.
+ else if (expr->OperGet() == GT_ADD)
+ {
+ overflows = DoesBinOpOverflow(block, stmt, expr->gtGetOp1(), expr->gtGetOp2(), path);
+ }
+ // Walk through phi arguments to check if phi arguments involve arithmetic that overflows.
+ else if (expr->OperGet() == GT_PHI)
+ {
+ overflows = DoesPhiOverflow(block, stmt, expr, path);
+ }
+ GetOverflowMap()->Set(expr, overflows);
+ path->Remove(expr);
+ return overflows;
+}
+
+struct Node
+{
+ Range range;
+ Node* next;
+ Node() : range(Limit(Limit::keUndef)), next(nullptr)
+ {
+ }
+};
+
+// Compute the range recursively by asking for the range of each variable in the dependency chain.
+// eg.: c = a + b; ask range of "a" and "b" and add the results.
+// If the result cannot be determined i.e., the dependency chain does not terminate in a value,
+// but continues to loop, which will happen with phi nodes. We end the looping by calling the
+// value as "dependent" (dep).
+// If the loop is proven to be "monotonic", then make liberal decisions while merging phi node.
+// eg.: merge((0, dep), (dep, dep)) = (0, dep)
+Range RangeCheck::ComputeRange(
+ BasicBlock* block, GenTreePtr stmt, GenTreePtr expr, SearchPath* path, bool monotonic DEBUGARG(int indent))
+{
+ bool newlyAdded = !path->Set(expr, block);
+ Range range = Limit(Limit::keUndef);
+
+ ValueNum vn = expr->gtVNPair.GetConservative();
+ // If newly added in the current search path, then reduce the budget.
+ if (newlyAdded)
+ {
+ // Assert that we are not re-entrant for a node which has been
+ // visited and resolved before and not currently on the search path.
+ noway_assert(!GetRangeMap()->Lookup(expr));
+ m_nVisitBudget--;
+ }
+ // Prevent quadratic behavior.
+ if (IsOverBudget())
+ {
+ // Set to unknown, since an Unknown range resolution, will stop further
+ // searches. This is because anything that merges with Unknown will
+ // yield Unknown. Unknown is lattice top.
+ range = Range(Limit(Limit::keUnknown));
+ JITDUMP("GetRange not tractable within max node visit budget.\n");
+ }
+ // Prevent unbounded recursion.
+ else if (path->GetCount() > MAX_SEARCH_DEPTH)
+ {
+ // Unknown is lattice top, anything that merges with Unknown will yield Unknown.
+ range = Range(Limit(Limit::keUnknown));
+ JITDUMP("GetRange not tractable within max stack depth.\n");
+ }
+ // TODO-CQ: The current implementation is reliant on integer storage types
+ // for constants. It could use INT64. Still, representing ULONG constants
+ // might require preserving the var_type whether it is a un/signed 64-bit.
+ // JIT64 doesn't do anything for "long" either. No asm diffs.
+ else if (expr->TypeGet() == TYP_LONG || expr->TypeGet() == TYP_ULONG)
+ {
+ range = Range(Limit(Limit::keUnknown));
+ JITDUMP("GetRange long or ulong, setting to unknown value.\n");
+ }
+ // If VN is constant return range as constant.
+ else if (m_pCompiler->vnStore->IsVNConstant(vn))
+ {
+ range = (m_pCompiler->vnStore->TypeOfVN(vn) == TYP_INT)
+ ? Range(Limit(Limit::keConstant, m_pCompiler->vnStore->ConstantValue<int>(vn)))
+ : Limit(Limit::keUnknown);
+ }
+ // If local, find the definition from the def map and evaluate the range for rhs.
+ else if (expr->IsLocal())
+ {
+ range = ComputeRangeForLocalDef(block, stmt, expr, path, monotonic DEBUGARG(indent + 1));
+ MergeAssertion(block, stmt, expr, path, &range DEBUGARG(indent + 1));
+ }
+ // If add, then compute the range for the operands and add them.
+ else if (expr->OperGet() == GT_ADD)
+ {
+ range = ComputeRangeForBinOp(block, stmt, expr->gtGetOp1(), expr->gtGetOp2(), GT_ADD, path,
+ monotonic DEBUGARG(indent + 1));
+ }
+ // If phi, then compute the range for arguments, calling the result "dependent" when looping begins.
+ else if (expr->OperGet() == GT_PHI)
+ {
+ Node* cur = nullptr;
+ Node* head = nullptr;
+ for (GenTreeArgList* args = expr->gtOp.gtOp1->AsArgList(); args != nullptr; args = args->Rest())
+ {
+ // Collect the range for each phi argument in a linked list.
+ Node* node = new (m_pCompiler->getAllocator()) Node();
+ if (cur != nullptr)
+ {
+ cur->next = node;
+ cur = cur->next;
+ }
+ else
+ {
+ head = node;
+ cur = head;
+ }
+ if (path->Lookup(args->Current()))
+ {
+ JITDUMP("PhiArg %p is already being computed\n", dspPtr(args->Current()));
+ cur->range = Range(Limit(Limit::keDependent));
+ MergeAssertion(block, stmt, args->Current(), path, &cur->range DEBUGARG(indent + 1));
+ continue;
+ }
+ cur->range = GetRange(block, stmt, args->Current(), path, monotonic DEBUGARG(indent + 1));
+ MergeAssertion(block, stmt, args->Current(), path, &cur->range DEBUGARG(indent + 1));
+ }
+ // Walk the linked list and merge the ranges.
+ for (cur = head; cur; cur = cur->next)
+ {
+ assert(!cur->range.LowerLimit().IsUndef());
+ assert(!cur->range.UpperLimit().IsUndef());
+ JITDUMP("Merging ranges %s %s:", range.ToString(m_pCompiler->getAllocatorDebugOnly()),
+ cur->range.ToString(m_pCompiler->getAllocatorDebugOnly()));
+ range = RangeOps::Merge(range, cur->range, monotonic);
+ JITDUMP("%s\n", range.ToString(m_pCompiler->getAllocatorDebugOnly()));
+ }
+ }
+ else
+ {
+ // The expression is not recognized, so the result is unknown.
+ range = Range(Limit(Limit::keUnknown));
+ }
+
+ GetRangeMap()->Set(expr, new (m_pCompiler->getAllocator()) Range(range));
+ path->Remove(expr);
+ return range;
+}
+
+#ifdef DEBUG
+void Indent(int indent)
+{
+ for (int i = 0; i < indent; ++i)
+ {
+ JITDUMP(" ");
+ }
+}
+#endif
+
+// Get the range, if it is already computed, use the cached range value, else compute it.
+Range RangeCheck::GetRange(
+ BasicBlock* block, GenTreePtr stmt, GenTreePtr expr, SearchPath* path, bool monotonic DEBUGARG(int indent))
+{
+#ifdef DEBUG
+ if (m_pCompiler->verbose)
+ {
+ Indent(indent);
+ JITDUMP("[RangeCheck::GetRange] BB%02d", block->bbNum);
+ m_pCompiler->gtDispTree(expr);
+ Indent(indent);
+ JITDUMP("{\n", expr);
+ }
+#endif
+
+ Range* pRange = nullptr;
+ Range range = GetRangeMap()->Lookup(expr, &pRange) ? *pRange : ComputeRange(block, stmt, expr, path,
+ monotonic DEBUGARG(indent));
+
+#ifdef DEBUG
+ if (m_pCompiler->verbose)
+ {
+ Indent(indent);
+ JITDUMP(" %s Range (%08X) => %s\n", (pRange == nullptr) ? "Computed" : "Cached", dspPtr(expr),
+ range.ToString(m_pCompiler->getAllocatorDebugOnly()));
+ Indent(indent);
+ JITDUMP("}\n", expr);
+ }
+#endif
+ return range;
+}
+
+// If this is a tree local definition add its location to the def map.
+void RangeCheck::MapStmtDefs(const Location& loc)
+{
+ GenTreePtr tree = loc.tree;
+ if (!tree->IsLocal())
+ {
+ return;
+ }
+
+ unsigned lclNum = tree->AsLclVarCommon()->GetLclNum();
+ unsigned ssaNum = tree->AsLclVarCommon()->GetSsaNum();
+ if (ssaNum == SsaConfig::RESERVED_SSA_NUM)
+ {
+ return;
+ }
+
+ // If useasg then get the correct ssaNum to add to the map.
+ if (tree->gtFlags & GTF_VAR_USEASG)
+ {
+ unsigned ssaNum = m_pCompiler->GetSsaNumForLocalVarDef(tree);
+ if (ssaNum != SsaConfig::RESERVED_SSA_NUM)
+ {
+ // To avoid ind(addr) use asgs
+ if (loc.parent->OperKind() & GTK_ASGOP)
+ {
+ SetDef(HashCode(lclNum, ssaNum), new (m_pCompiler->getAllocator()) Location(loc));
+ }
+ }
+ }
+ // If def get the location and store it against the variable's ssaNum.
+ else if (tree->gtFlags & GTF_VAR_DEF)
+ {
+ if (loc.parent->OperGet() == GT_ASG)
+ {
+ SetDef(HashCode(lclNum, ssaNum), new (m_pCompiler->getAllocator()) Location(loc));
+ }
+ }
+}
+
+struct MapMethodDefsData
+{
+ RangeCheck* rc;
+ BasicBlock* block;
+ GenTreePtr stmt;
+
+ MapMethodDefsData(RangeCheck* rc, BasicBlock* block, GenTreePtr stmt) : rc(rc), block(block), stmt(stmt)
+ {
+ }
+};
+
+Compiler::fgWalkResult MapMethodDefsVisitor(GenTreePtr* ptr, Compiler::fgWalkData* data)
+{
+ MapMethodDefsData* rcd = ((MapMethodDefsData*)data->pCallbackData);
+ rcd->rc->MapStmtDefs(RangeCheck::Location(rcd->block, rcd->stmt, *ptr, data->parent));
+ return Compiler::WALK_CONTINUE;
+}
+
+void RangeCheck::MapMethodDefs()
+{
+ // First, gather where all definitions occur in the program and store it in a map.
+ for (BasicBlock* block = m_pCompiler->fgFirstBB; block; block = block->bbNext)
+ {
+ for (GenTreePtr stmt = block->bbTreeList; stmt; stmt = stmt->gtNext)
+ {
+ MapMethodDefsData data(this, block, stmt);
+ m_pCompiler->fgWalkTreePre(&stmt->gtStmt.gtStmtExpr, MapMethodDefsVisitor, &data, false, true);
+ }
+ }
+ m_fMappedDefs = true;
+}
+
+// Entry point to range check optimizations.
+void RangeCheck::OptimizeRangeChecks()
+{
+ if (m_pCompiler->fgSsaPassesCompleted == 0)
+ {
+ return;
+ }
+#ifdef DEBUG
+ if (m_pCompiler->verbose)
+ {
+ JITDUMP("*************** In OptimizeRangeChecks()\n");
+ JITDUMP("Blocks/trees before phase\n");
+ m_pCompiler->fgDispBasicBlocks(true);
+ }
+#endif
+
+ // Walk through trees looking for arrBndsChk node and check if it can be optimized.
+ for (BasicBlock* block = m_pCompiler->fgFirstBB; block; block = block->bbNext)
+ {
+ for (GenTreePtr stmt = block->bbTreeList; stmt; stmt = stmt->gtNext)
+ {
+ for (GenTreePtr tree = stmt->gtStmt.gtStmtList; tree; tree = tree->gtNext)
+ {
+ if (IsOverBudget())
+ {
+ return;
+ }
+ OptimizeRangeCheck(block, stmt, tree);
+ }
+ }
+ }
+}
diff --git a/src/jit/rangecheck.h b/src/jit/rangecheck.h
new file mode 100644
index 0000000000..b00bfb8a67
--- /dev/null
+++ b/src/jit/rangecheck.h
@@ -0,0 +1,603 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+//
+//
+// We take the following approach to range check analysis:
+//
+// Consider the following loop:
+// for (int i = 0; i < a.len; ++i) {
+// a[i] = 0;
+// }
+//
+// This would be represented as:
+// i_0 = 0; BB0
+// / ______ a[i_1] = 0; BB2
+// / / i_2 = i_1 + 1;
+// / / ^
+// i_1 = phi(i_0, i_2); BB1 |
+// i_1 < a.len -------------------+
+//
+// BB0 -> BB1
+// BB1 -> (i_1 < a.len) ? BB2 : BB3
+// BB2 -> BB1
+// BB3 -> return
+//
+// **Step 1. Walk the statements in the method checking if there is a bounds check.
+// If there is a bounds check, ask the range of the index variable.
+// In the above example i_1's range.
+//
+// **Step 2. Follow the defs and the dependency chain:
+// i_1 is a local, so go to its definition which is i_1 = phi(i_0, i_2).
+//
+// Since rhs is a phi, we ask the range for i_0 and i_2 in the hopes of merging
+// the resulting ranges for i_1.
+//
+// The range of i_0 follows immediately when going to its definition.
+// Ask for the range of i_2, which leads to i_1 + 1.
+// Ask for the range of i_1 and figure we are looping. Call the range of i_1 as
+// "dependent" and quit looping further. The range of "1" is just <1, 1>.
+//
+// Now we have exhausted all the variables for which the range can be determined.
+// The others are either "unknown" or "dependent."
+//
+// We also merge assertions from its pred block's edges for a phi argument otherwise
+// from the block's assertionIn. This gives us an upper bound for i_1 as a.len.
+//
+// **Step 3. Check if an overflow occurs in the dependency chain (loop.)
+// In the above case, we want to make sure there is no overflow in the definitions
+// involving i_1 and i_2. Merge assertions from the block's edges whenever possible.
+//
+// **Step 4. Check if the dependency chain is monotonic.
+//
+// **Step 5. If monotonic is true, then perform a widening step, where we assume, the
+// SSA variables that are "dependent" get their values from the definitions in the
+// dependency loop and their initial values must be the definitions that are not in
+// the dependency loop, in this case i_0's value which is 0.
+//
+
+#pragma once
+#include "compiler.h"
+#include "expandarray.h"
+
+static bool IntAddOverflows(int max1, int max2)
+{
+ if (max1 > 0 && max2 > 0 && INT_MAX - max1 < max2)
+ {
+ return true;
+ }
+ if (max1 < 0 && max2 < 0 && max1 < INT_MIN - max2)
+ {
+ return true;
+ }
+ return false;
+}
+
+// BNF for range and limit structures
+// Range -> Limit, Limit | Dependent | None | Unknown
+// Limit -> Symbol | BinOp | int
+// BinOp -> Symbol + int
+// SsaVar -> lclNum, ssaNum
+// Symbol -> SsaVar | ArrLen
+// ArrLen -> SsaVar
+// SsaVar -> vn
+struct Limit
+{
+ enum LimitType
+ {
+ keUndef, // The limit is yet to be computed.
+ keBinOp,
+ keBinOpArray,
+ keSsaVar,
+ keArray,
+ keConstant,
+ keDependent, // The limit is dependent on some other value.
+ keUnknown, // The limit could not be determined.
+ };
+
+ Limit() : type(keUndef)
+ {
+ }
+
+ Limit(LimitType type) : type(type)
+ {
+ }
+
+ Limit(LimitType type, int cns) : cns(cns), type(type)
+ {
+ assert(type == keConstant);
+ }
+
+ Limit(LimitType type, ValueNum vn, int cns) : cns(cns), vn(vn), type(type)
+ {
+ assert(type == keBinOpArray || keBinOp);
+ }
+
+ bool IsUndef()
+ {
+ return type == keUndef;
+ }
+ bool IsDependent()
+ {
+ return type == keDependent;
+ }
+ bool IsUnknown()
+ {
+ return type == keUnknown;
+ }
+ bool IsConstant()
+ {
+ return type == keConstant;
+ }
+ int GetConstant()
+ {
+ return cns;
+ }
+ bool IsArray()
+ {
+ return type == keArray;
+ }
+ bool IsSsaVar()
+ {
+ return type == keSsaVar;
+ }
+ bool IsBinOpArray()
+ {
+ return type == keBinOpArray;
+ }
+ bool IsBinOp()
+ {
+ return type == keBinOp;
+ }
+ bool AddConstant(int i)
+ {
+ switch (type)
+ {
+ case keDependent:
+ return true;
+ case keBinOp:
+ case keBinOpArray:
+ if (IntAddOverflows(cns, i))
+ {
+ return false;
+ }
+ cns += i;
+ return true;
+
+ case keSsaVar:
+ type = keBinOp;
+ cns = i;
+ return true;
+
+ case keArray:
+ type = keBinOpArray;
+ cns = i;
+ return true;
+
+ case keConstant:
+ if (IntAddOverflows(cns, i))
+ {
+ return false;
+ }
+ cns += i;
+ return true;
+
+ case keUndef:
+ case keUnknown:
+ // For these values of 'type', conservatively return false
+ break;
+ }
+
+ return false;
+ }
+
+ bool Equals(Limit& l)
+ {
+ switch (type)
+ {
+ case keUndef:
+ case keUnknown:
+ case keDependent:
+ return l.type == type;
+
+ case keBinOp:
+ case keBinOpArray:
+ return l.type == type && l.vn == vn && l.cns == cns;
+
+ case keSsaVar:
+ case keArray:
+ return l.type == type && l.vn == vn;
+
+ case keConstant:
+ return l.type == type && l.cns == cns;
+ }
+ return false;
+ }
+#ifdef DEBUG
+ const char* ToString(IAllocator* alloc)
+ {
+ unsigned size = 64;
+ char* buf = (char*)alloc->Alloc(size);
+ switch (type)
+ {
+ case keUndef:
+ return "Undef";
+
+ case keUnknown:
+ return "Unknown";
+
+ case keDependent:
+ return "Dependent";
+
+ case keBinOp:
+ case keBinOpArray:
+ sprintf_s(buf, size, "VN%04X + %d", vn, cns);
+ return buf;
+
+ case keSsaVar:
+ sprintf_s(buf, size, "VN%04X", vn);
+ return buf;
+
+ case keArray:
+ sprintf_s(buf, size, "VN%04X", vn);
+ return buf;
+
+ case keConstant:
+ sprintf_s(buf, size, "%d", cns);
+ return buf;
+ }
+ unreached();
+ }
+#endif
+ int cns;
+ ValueNum vn;
+ LimitType type;
+};
+
+// Range struct contains upper and lower limit.
+struct Range
+{
+ Limit uLimit;
+ Limit lLimit;
+
+ Range(const Limit& limit) : uLimit(limit), lLimit(limit)
+ {
+ }
+
+ Range(const Limit& lLimit, const Limit& uLimit) : uLimit(uLimit), lLimit(lLimit)
+ {
+ }
+
+ Limit& UpperLimit()
+ {
+ return uLimit;
+ }
+
+ Limit& LowerLimit()
+ {
+ return lLimit;
+ }
+
+#ifdef DEBUG
+ char* ToString(IAllocator* alloc)
+ {
+ size_t size = 64;
+ char* buf = (char*)alloc->Alloc(size);
+ sprintf_s(buf, size, "<%s, %s>", lLimit.ToString(alloc), uLimit.ToString(alloc));
+ return buf;
+ }
+#endif
+};
+
+// Helpers for operations performed on ranges
+struct RangeOps
+{
+ // Given a constant limit in "l1", add it to l2 and mutate "l2".
+ static Limit AddConstantLimit(Limit& l1, Limit& l2)
+ {
+ assert(l1.IsConstant());
+ Limit l = l2;
+ if (l.AddConstant(l1.GetConstant()))
+ {
+ return l;
+ }
+ else
+ {
+ return Limit(Limit::keUnknown);
+ }
+ }
+
+ // Given two ranges "r1" and "r2", perform an add operation on the
+ // ranges.
+ static Range Add(Range& r1, Range& r2)
+ {
+ Limit& r1lo = r1.LowerLimit();
+ Limit& r1hi = r1.UpperLimit();
+ Limit& r2lo = r2.LowerLimit();
+ Limit& r2hi = r2.UpperLimit();
+
+ Range result = Limit(Limit::keUnknown);
+
+ // Check lo ranges if they are dependent and not unknown.
+ if ((r1lo.IsDependent() && !r1lo.IsUnknown()) || (r2lo.IsDependent() && !r2lo.IsUnknown()))
+ {
+ result.lLimit = Limit(Limit::keDependent);
+ }
+ // Check hi ranges if they are dependent and not unknown.
+ if ((r1hi.IsDependent() && !r1hi.IsUnknown()) || (r2hi.IsDependent() && !r2hi.IsUnknown()))
+ {
+ result.uLimit = Limit(Limit::keDependent);
+ }
+
+ if (r1lo.IsConstant())
+ {
+ result.lLimit = AddConstantLimit(r1lo, r2lo);
+ }
+ if (r2lo.IsConstant())
+ {
+ result.lLimit = AddConstantLimit(r2lo, r1lo);
+ }
+ if (r1hi.IsConstant())
+ {
+ result.uLimit = AddConstantLimit(r1hi, r2hi);
+ }
+ if (r2hi.IsConstant())
+ {
+ result.uLimit = AddConstantLimit(r2hi, r1hi);
+ }
+ return result;
+ }
+
+ // Given two ranges "r1" and "r2", do a Phi merge. If "monotonic" is true,
+ // then ignore the dependent variables.
+ static Range Merge(Range& r1, Range& r2, bool monotonic)
+ {
+ Limit& r1lo = r1.LowerLimit();
+ Limit& r1hi = r1.UpperLimit();
+ Limit& r2lo = r2.LowerLimit();
+ Limit& r2hi = r2.UpperLimit();
+
+ // Take care of lo part.
+ Range result = Limit(Limit::keUnknown);
+ if (r1lo.IsUnknown() || r2lo.IsUnknown())
+ {
+ result.lLimit = Limit(Limit::keUnknown);
+ }
+ // Uninitialized, just copy.
+ else if (r1lo.IsUndef())
+ {
+ result.lLimit = r2lo;
+ }
+ else if (r1lo.IsDependent() || r2lo.IsDependent())
+ {
+ if (monotonic)
+ {
+ result.lLimit = r1lo.IsDependent() ? r2lo : r1lo;
+ }
+ else
+ {
+ result.lLimit = Limit(Limit::keDependent);
+ }
+ }
+
+ // Take care of hi part.
+ if (r1hi.IsUnknown() || r2hi.IsUnknown())
+ {
+ result.uLimit = Limit(Limit::keUnknown);
+ }
+ else if (r1hi.IsUndef())
+ {
+ result.uLimit = r2hi;
+ }
+ else if (r1hi.IsDependent() || r2hi.IsDependent())
+ {
+ if (monotonic)
+ {
+ result.uLimit = r1hi.IsDependent() ? r2hi : r1hi;
+ }
+ else
+ {
+ result.uLimit = Limit(Limit::keDependent);
+ }
+ }
+
+ if (r1lo.IsConstant() && r2lo.IsConstant())
+ {
+ result.lLimit = Limit(Limit::keConstant, min(r1lo.GetConstant(), r2lo.GetConstant()));
+ }
+ if (r1hi.IsConstant() && r2hi.IsConstant())
+ {
+ result.uLimit = Limit(Limit::keConstant, max(r1hi.GetConstant(), r2hi.GetConstant()));
+ }
+ if (r2hi.Equals(r1hi))
+ {
+ result.uLimit = r2hi;
+ }
+ if (r2lo.Equals(r1lo))
+ {
+ result.lLimit = r1lo;
+ }
+ // Widen Upper Limit => Max(k, (a.len + n)) yields (a.len + n),
+ // This is correct if k >= 0 and n >= k, since a.len always >= 0
+ // (a.len + n) could overflow, but the result (a.len + n) also
+ // preserves the overflow.
+ if (r1hi.IsConstant() && r1hi.GetConstant() >= 0 && r2hi.IsBinOpArray() &&
+ r2hi.GetConstant() >= r1hi.GetConstant())
+ {
+ result.uLimit = r2hi;
+ }
+ if (r2hi.IsConstant() && r2hi.GetConstant() >= 0 && r1hi.IsBinOpArray() &&
+ r1hi.GetConstant() >= r2hi.GetConstant())
+ {
+ result.uLimit = r1hi;
+ }
+ if (r1hi.IsBinOpArray() && r2hi.IsBinOpArray() && r1hi.vn == r2hi.vn)
+ {
+ result.uLimit = r1hi;
+ // Widen the upper bound if the other constant is greater.
+ if (r2hi.GetConstant() > r1hi.GetConstant())
+ {
+ result.uLimit = r2hi;
+ }
+ }
+ return result;
+ }
+};
+
+class RangeCheck
+{
+public:
+ // Constructor
+ RangeCheck(Compiler* pCompiler);
+
+ // Location information is used to map where the defs occur in the method.
+ struct Location
+ {
+ BasicBlock* block;
+ GenTreePtr stmt;
+ GenTreePtr tree;
+ GenTreePtr parent;
+ Location(BasicBlock* block, GenTreePtr stmt, GenTreePtr tree, GenTreePtr parent)
+ : block(block), stmt(stmt), tree(tree), parent(parent)
+ {
+ }
+
+ private:
+ Location();
+ };
+
+ typedef SimplerHashTable<GenTreePtr, PtrKeyFuncs<GenTree>, bool, JitSimplerHashBehavior> OverflowMap;
+ typedef SimplerHashTable<GenTreePtr, PtrKeyFuncs<GenTree>, Range*, JitSimplerHashBehavior> RangeMap;
+ typedef SimplerHashTable<GenTreePtr, PtrKeyFuncs<GenTree>, BasicBlock*, JitSimplerHashBehavior> SearchPath;
+ typedef SimplerHashTable<INT64, LargePrimitiveKeyFuncs<INT64>, Location*, JitSimplerHashBehavior> VarToLocMap;
+ typedef SimplerHashTable<INT64, LargePrimitiveKeyFuncs<INT64>, ExpandArrayStack<Location*>*, JitSimplerHashBehavior>
+ VarToLocArrayMap;
+
+ // Generate a hashcode unique for this ssa var.
+ UINT64 HashCode(unsigned lclNum, unsigned ssaNum);
+
+ // Add a location of the definition of ssa var to the location map.
+ // Requires "hash" to be computed using HashCode.
+ // Requires "location" to be the local definition.
+ void SetDef(UINT64 hash, Location* loc);
+
+ // Given a tree node that is a local, return the Location defining the local.
+ Location* GetDef(GenTreePtr tree);
+ Location* GetDef(unsigned lclNum, unsigned ssaNum);
+
+ int GetArrLength(ValueNum vn);
+
+ // Check whether the computed range is within lower and upper bounds. This function
+ // assumes that the lower range is resolved and upper range is symbolic as in an
+ // increasing loop.
+ // TODO-CQ: This is not general enough.
+ bool BetweenBounds(Range& range, int lower, GenTreePtr upper);
+
+ // Given a statement, check if it is a def and add its locations in a map.
+ void MapStmtDefs(const Location& loc);
+
+ // Given the CFG, check if it has defs and add their locations in a map.
+ void MapMethodDefs();
+
+ // Entry point to optimize range checks in the block. Assumes value numbering
+ // and assertion prop phases are completed.
+ void OptimizeRangeChecks();
+
+ // Given a "tree" node, check if it contains array bounds check node and
+ // optimize to remove it, if possible. Requires "stmt" and "block" that
+ // contain the tree.
+ void OptimizeRangeCheck(BasicBlock* block, GenTreePtr stmt, GenTreePtr tree);
+
+ // Given the index expression try to find its range.
+ // The range of a variable depends on its rhs which in turn depends on its constituent variables.
+ // The "path" is the path taken in the search for the rhs' range and its constituents' range.
+ // If "monotonic" is true, the calculations are made more liberally assuming initial values
+ // at phi definitions.
+ Range GetRange(
+ BasicBlock* block, GenTreePtr stmt, GenTreePtr expr, SearchPath* path, bool monotonic DEBUGARG(int indent));
+
+ // Given the local variable, first find the definition of the local and find the range of the rhs.
+ // Helper for GetRange.
+ Range ComputeRangeForLocalDef(
+ BasicBlock* block, GenTreePtr stmt, GenTreePtr expr, SearchPath* path, bool monotonic DEBUGARG(int indent));
+
+ // Compute the range, rather than retrieve a cached value. Helper for GetRange.
+ Range ComputeRange(
+ BasicBlock* block, GenTreePtr stmt, GenTreePtr expr, SearchPath* path, bool monotonic DEBUGARG(int indent));
+
+ // Compute the range for the op1 and op2 for the given binary operator.
+ Range ComputeRangeForBinOp(BasicBlock* block,
+ GenTreePtr stmt,
+ GenTreePtr op1,
+ GenTreePtr op2,
+ genTreeOps oper,
+ SearchPath* path,
+ bool monotonic DEBUGARG(int indent));
+
+ // Merge assertions from AssertionProp's flags, for the corresponding "phiArg."
+ // Requires "pRange" to contain range that is computed partially.
+ void MergeAssertion(
+ BasicBlock* block, GenTreePtr stmt, GenTreePtr phiArg, SearchPath* path, Range* pRange DEBUGARG(int indent));
+
+ // Inspect the "assertions" and extract assertions about the given "phiArg" and
+ // refine the "pRange" value.
+ void MergeEdgeAssertions(GenTreePtr phiArg, const ASSERT_VALARG_TP assertions, Range* pRange);
+
+ // The maximum possible value of the given "limit." If such a value could not be determined
+ // return "false." For example: ARRLEN_MAX for array length.
+ bool GetLimitMax(Limit& limit, int* pMax);
+
+ // Does the addition of the two limits overflow?
+ bool AddOverflows(Limit& limit1, Limit& limit2);
+
+ // Does the binary operation between the operands overflow? Check recursively.
+ bool DoesBinOpOverflow(BasicBlock* block, GenTreePtr stmt, GenTreePtr op1, GenTreePtr op2, SearchPath* path);
+
+ // Does the phi operands involve an assignment that could overflow?
+ bool DoesPhiOverflow(BasicBlock* block, GenTreePtr stmt, GenTreePtr expr, SearchPath* path);
+
+ // Find the def of the "expr" local and recurse on the arguments if any of them involve a
+ // calculation that overflows.
+ bool DoesVarDefOverflow(BasicBlock* block, GenTreePtr stmt, GenTreePtr expr, SearchPath* path);
+
+ bool ComputeDoesOverflow(BasicBlock* block, GenTreePtr stmt, GenTreePtr expr, SearchPath* path);
+
+ // Does the current "expr" which is a use involve a definition, that overflows.
+ bool DoesOverflow(BasicBlock* block, GenTreePtr stmt, GenTreePtr tree, SearchPath* path);
+
+ // Widen the range by first checking if the induction variable is monotonic. Requires "pRange"
+ // to be partially computed.
+ void Widen(BasicBlock* block, GenTreePtr stmt, GenTreePtr tree, SearchPath* path, Range* pRange);
+
+ // Is the binary operation increasing the value.
+ bool IsBinOpMonotonicallyIncreasing(GenTreePtr op1, GenTreePtr op2, genTreeOps oper, SearchPath* path);
+
+ // Given an "expr" trace its rhs and their definitions to check if all the assignments
+ // are monotonically increasing.
+ bool IsMonotonicallyIncreasing(GenTreePtr tree, SearchPath* path);
+
+ // We allocate a budget to avoid walking long UD chains. When traversing each link in the UD
+ // chain, we decrement the budget. When the budget hits 0, then no more range check optimization
+ // will be applied for the currently compiled method.
+ bool IsOverBudget();
+
+private:
+ GenTreeBoundsChk* m_pCurBndsChk;
+
+ // Get the cached overflow values.
+ OverflowMap* GetOverflowMap();
+ OverflowMap* m_pOverflowMap;
+
+ // Get the cached range values.
+ RangeMap* GetRangeMap();
+ RangeMap* m_pRangeMap;
+
+ bool m_fMappedDefs;
+ VarToLocMap* m_pDefTable;
+ Compiler* m_pCompiler;
+
+ // The number of nodes for which range is computed throughout the current method.
+ // When this limit is zero, we have exhausted all the budget to walk the ud-chain.
+ int m_nVisitBudget;
+};
diff --git a/src/jit/rationalize.cpp b/src/jit/rationalize.cpp
new file mode 100644
index 0000000000..03e0c9a27e
--- /dev/null
+++ b/src/jit/rationalize.cpp
@@ -0,0 +1,1056 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+// state carried over the tree walk, to be used in making
+// a splitting decision.
+struct SplitData
+{
+ GenTree* root; // root stmt of tree being processed
+ BasicBlock* block;
+ Rationalizer* thisPhase;
+};
+
+//------------------------------------------------------------------------------
+// isNodeCallArg - given a context (stack of parent nodes), determine if the TOS is an arg to a call
+//------------------------------------------------------------------------------
+
+GenTree* isNodeCallArg(ArrayStack<GenTree*>* parentStack)
+{
+ for (int i = 1; // 0 is current node, so start at 1
+ i < parentStack->Height(); i++)
+ {
+ GenTree* node = parentStack->Index(i);
+ switch (node->OperGet())
+ {
+ case GT_LIST:
+ case GT_ARGPLACE:
+ break;
+ case GT_NOP:
+ // Currently there's an issue when the rationalizer performs
+ // the fixup of a call argument: the case is when we remove an
+ // inserted NOP as a parent of a call introduced by fgMorph;
+ // when then the rationalizer removes it, the tree stack in the
+ // walk is not consistent with the node it was just deleted, so the
+ // solution is just to go 1 level deeper.
+ // TODO-Cleanup: This has to be fixed in a proper way: make the rationalizer
+ // correctly modify the evaluation stack when removing treenodes.
+ if (node->gtOp.gtOp1->gtOper == GT_CALL)
+ {
+ return node->gtOp.gtOp1;
+ }
+ break;
+ case GT_CALL:
+ return node;
+ default:
+ return nullptr;
+ }
+ }
+ return nullptr;
+}
+
+// return op that is the store equivalent of the given load opcode
+genTreeOps storeForm(genTreeOps loadForm)
+{
+ switch (loadForm)
+ {
+ case GT_LCL_VAR:
+ return GT_STORE_LCL_VAR;
+ case GT_LCL_FLD:
+ return GT_STORE_LCL_FLD;
+ case GT_REG_VAR:
+ noway_assert(!"reg vars only supported in classic backend\n");
+ unreached();
+ default:
+ noway_assert(!"not a data load opcode\n");
+ unreached();
+ }
+}
+
+// return op that is the addr equivalent of the given load opcode
+genTreeOps addrForm(genTreeOps loadForm)
+{
+ switch (loadForm)
+ {
+ case GT_LCL_VAR:
+ return GT_LCL_VAR_ADDR;
+ case GT_LCL_FLD:
+ return GT_LCL_FLD_ADDR;
+ default:
+ noway_assert(!"not a data load opcode\n");
+ unreached();
+ }
+}
+
+// return op that is the load equivalent of the given addr opcode
+genTreeOps loadForm(genTreeOps addrForm)
+{
+ switch (addrForm)
+ {
+ case GT_LCL_VAR_ADDR:
+ return GT_LCL_VAR;
+ case GT_LCL_FLD_ADDR:
+ return GT_LCL_FLD;
+ default:
+ noway_assert(!"not a local address opcode\n");
+ unreached();
+ }
+}
+
+// copy the flags determined by mask from src to dst
+void copyFlags(GenTree* dst, GenTree* src, unsigned mask)
+{
+ dst->gtFlags &= ~mask;
+ dst->gtFlags |= (src->gtFlags & mask);
+}
+
+// call args have other pointers to them which must be fixed up if
+// they are replaced
+void Compiler::fgFixupIfCallArg(ArrayStack<GenTree*>* parentStack, GenTree* oldChild, GenTree* newChild)
+{
+ GenTree* parentCall = isNodeCallArg(parentStack);
+ if (!parentCall)
+ {
+ return;
+ }
+
+ // we have replaced an arg, so update pointers in argtable
+ fgFixupArgTabEntryPtr(parentCall, oldChild, newChild);
+}
+
+//------------------------------------------------------------------------
+// fgFixupArgTabEntryPtr: Fixup the fgArgTabEntryPtr of parentCall after
+// replacing oldArg with newArg
+//
+// Arguments:
+// parentCall - a pointer to the parent call node
+// oldArg - the original argument node
+// newArg - the replacement argument node
+//
+
+void Compiler::fgFixupArgTabEntryPtr(GenTreePtr parentCall, GenTreePtr oldArg, GenTreePtr newArg)
+{
+ assert(parentCall != nullptr);
+ assert(oldArg != nullptr);
+ assert(newArg != nullptr);
+
+ JITDUMP("parent call was :\n");
+ DISPNODE(parentCall);
+
+ JITDUMP("old child was :\n");
+ DISPNODE(oldArg);
+
+ if (oldArg->gtFlags & GTF_LATE_ARG)
+ {
+ newArg->gtFlags |= GTF_LATE_ARG;
+ }
+ else
+ {
+ fgArgTabEntryPtr fp = Compiler::gtArgEntryByNode(parentCall, oldArg);
+ assert(fp->node == oldArg);
+ fp->node = newArg;
+ }
+}
+
+// Rewrite a SIMD indirection as GT_IND(GT_LEA(obj.op1)), or as a simple
+// lclVar if possible.
+//
+// Arguments:
+// use - A use reference for a block node
+// keepBlk - True if this should remain a block node if it is not a lclVar
+//
+// Return Value:
+// None.
+//
+// TODO-1stClassStructs: These should be eliminated earlier, once we can handle
+// lclVars in all the places that used to have GT_OBJ.
+//
+void Rationalizer::RewriteSIMDOperand(LIR::Use& use, bool keepBlk)
+{
+#ifdef FEATURE_SIMD
+ // No lowering is needed for non-SIMD nodes, so early out if featureSIMD is not enabled.
+ if (!comp->featureSIMD)
+ {
+ return;
+ }
+
+ GenTree* tree = use.Def();
+ if (!tree->OperIsIndir())
+ {
+ return;
+ }
+ var_types simdType = tree->TypeGet();
+
+ if (!varTypeIsSIMD(simdType))
+ {
+ return;
+ }
+
+ // If the operand of is a GT_ADDR(GT_LCL_VAR) and LclVar is known to be of simdType,
+ // replace obj by GT_LCL_VAR.
+ GenTree* addr = tree->AsIndir()->Addr();
+ if (addr->OperIsLocalAddr() && comp->isAddrOfSIMDType(addr))
+ {
+ BlockRange().Remove(tree);
+
+ addr->SetOper(loadForm(addr->OperGet()));
+ addr->gtType = simdType;
+ use.ReplaceWith(comp, addr);
+ }
+ else if (!keepBlk)
+ {
+ tree->SetOper(GT_IND);
+ tree->gtType = simdType;
+ }
+#endif // FEATURE_SIMD
+}
+
+// RewriteNodeAsCall : Replace the given tree node by a GT_CALL.
+//
+// Arguments:
+// ppTree - A pointer-to-a-pointer for the tree node
+// fgWalkData - A pointer to tree walk data providing the context
+// callHnd - The method handle of the call to be generated
+// entryPoint - The method entrypoint of the call to be generated
+// args - The argument list of the call to be generated
+//
+// Return Value:
+// None.
+//
+
+void Rationalizer::RewriteNodeAsCall(GenTree** use,
+ Compiler::fgWalkData* data,
+ CORINFO_METHOD_HANDLE callHnd,
+#ifdef FEATURE_READYTORUN_COMPILER
+ CORINFO_CONST_LOOKUP entryPoint,
+#endif
+ GenTreeArgList* args)
+{
+ GenTreePtr tree = *use;
+ Compiler* comp = data->compiler;
+ SplitData* tmpState = (SplitData*)data->pCallbackData;
+ GenTreePtr root = tmpState->root;
+ GenTreePtr treeFirstNode = comp->fgGetFirstNode(tree);
+ GenTreePtr treeLastNode = tree;
+ GenTreePtr treePrevNode = treeFirstNode->gtPrev;
+ GenTreePtr treeNextNode = treeLastNode->gtNext;
+
+ // Create the call node
+ GenTreeCall* call = comp->gtNewCallNode(CT_USER_FUNC, callHnd, tree->gtType, args);
+ call = comp->fgMorphArgs(call);
+#ifdef FEATURE_READYTORUN_COMPILER
+ call->gtCall.setEntryPoint(entryPoint);
+#endif
+
+ // Replace "tree" with "call"
+ *use = call;
+
+ // Rebuild the evaluation order.
+ comp->gtSetStmtInfo(root);
+
+ // Rebuild the execution order.
+ comp->fgSetTreeSeq(call, treePrevNode);
+
+ // Restore linear-order Prev and Next for "call".
+ if (treePrevNode)
+ {
+ treeFirstNode = comp->fgGetFirstNode(call);
+ treeFirstNode->gtPrev = treePrevNode;
+ treePrevNode->gtNext = treeFirstNode;
+ }
+ else
+ {
+ // Update the linear oder start of "root" if treeFirstNode
+ // appears to have replaced the original first node.
+ assert(treeFirstNode == root->gtStmt.gtStmtList);
+ root->gtStmt.gtStmtList = comp->fgGetFirstNode(call);
+ }
+
+ if (treeNextNode)
+ {
+ treeLastNode = call;
+ treeLastNode->gtNext = treeNextNode;
+ treeNextNode->gtPrev = treeLastNode;
+ }
+
+ comp->fgFixupIfCallArg(data->parentStack, tree, call);
+
+ // Propagate flags of "call" to its parents.
+ // 0 is current node, so start at 1
+ for (int i = 1; i < data->parentStack->Height(); i++)
+ {
+ GenTree* node = data->parentStack->Index(i);
+ node->gtFlags |= GTF_CALL;
+ node->gtFlags |= call->gtFlags & GTF_ALL_EFFECT;
+ }
+
+ // Since "tree" is replaced with "call", pop "tree" node (i.e the current node)
+ // and replace it with "call" on parent stack.
+ assert(data->parentStack->Top() == tree);
+ (void)data->parentStack->Pop();
+ data->parentStack->Push(call);
+}
+
+// RewriteIntrinsicAsUserCall : Rewrite an intrinsic operator as a GT_CALL to the original method.
+//
+// Arguments:
+// ppTree - A pointer-to-a-pointer for the intrinsic node
+// fgWalkData - A pointer to tree walk data providing the context
+//
+// Return Value:
+// None.
+//
+// Some intrinsics, such as operation Sqrt, are rewritten back to calls, and some are not.
+// The ones that are not being rewritten here must be handled in Codegen.
+// Conceptually, the lower is the right place to do the rewrite. Keeping it in rationalization is
+// mainly for throughput issue.
+
+void Rationalizer::RewriteIntrinsicAsUserCall(GenTree** use, Compiler::fgWalkData* data)
+{
+ GenTreeIntrinsic* intrinsic = (*use)->AsIntrinsic();
+ Compiler* comp = data->compiler;
+
+ GenTreeArgList* args;
+ if (intrinsic->gtOp.gtOp2 == nullptr)
+ {
+ args = comp->gtNewArgList(intrinsic->gtGetOp1());
+ }
+ else
+ {
+ args = comp->gtNewArgList(intrinsic->gtGetOp1(), intrinsic->gtGetOp2());
+ }
+
+ RewriteNodeAsCall(use, data, intrinsic->gtMethodHandle,
+#ifdef FEATURE_READYTORUN_COMPILER
+ intrinsic->gtEntryPoint,
+#endif
+ args);
+}
+
+// FixupIfSIMDLocal: Fixup the type of a lclVar tree, as needed, if it is a SIMD type vector.
+//
+// Arguments:
+// comp - the Compiler object.
+// tree - the GenTreeLclVarCommon tree to be fixed up.
+//
+// Return Value:
+// None.
+//
+// TODO-1stClassStructs: This is now only here to preserve existing behavior. It is actually not
+// desirable to change the lclFld nodes back to TYP_SIMD (it will cause them to be loaded
+// into a vector register, and then moved to an int register).
+
+void Rationalizer::FixupIfSIMDLocal(GenTreeLclVarCommon* node)
+{
+#ifdef FEATURE_SIMD
+ if (!comp->featureSIMD)
+ {
+ return;
+ }
+
+ LclVarDsc* varDsc = &(comp->lvaTable[node->gtLclNum]);
+
+ // Don't mark byref of SIMD vector as a SIMD type.
+ // Note that struct args though marked as lvIsSIMD=true,
+ // the tree node representing such an arg should not be
+ // marked as a SIMD type, since it is a byref of a SIMD type.
+ if (!varTypeIsSIMD(varDsc))
+ {
+ return;
+ }
+ switch (node->OperGet())
+ {
+ default:
+ // Nothing to do for most tree nodes.
+ break;
+
+ case GT_LCL_FLD:
+ // We may see a lclFld used for pointer-sized structs that have been morphed, in which
+ // case we can change it to GT_LCL_VAR.
+ // However, we may also see a lclFld with FieldSeqStore::NotAField() for structs that can't
+ // be analyzed, e.g. those with overlapping fields such as the IL implementation of Vector<T>.
+ if ((node->AsLclFld()->gtFieldSeq == FieldSeqStore::NotAField()) && (node->AsLclFld()->gtLclOffs == 0) &&
+ (node->gtType == TYP_I_IMPL) && (varDsc->lvExactSize == TARGET_POINTER_SIZE))
+ {
+ node->SetOper(GT_LCL_VAR);
+ node->gtFlags &= ~(GTF_VAR_USEASG);
+ }
+ else
+ {
+ // If we access a field of a SIMD lclVar via GT_LCL_FLD, it cannot have been
+ // independently promoted.
+ assert(comp->lvaGetPromotionType(varDsc) != Compiler::PROMOTION_TYPE_INDEPENDENT);
+ return;
+ }
+ break;
+ case GT_STORE_LCL_FLD:
+ assert(node->gtType == TYP_I_IMPL);
+ node->SetOper(GT_STORE_LCL_VAR);
+ node->gtFlags &= ~(GTF_VAR_USEASG);
+ break;
+ }
+ unsigned simdSize = (unsigned int)roundUp(varDsc->lvExactSize, TARGET_POINTER_SIZE);
+ node->gtType = comp->getSIMDTypeForSize(simdSize);
+#endif // FEATURE_SIMD
+}
+
+#ifdef DEBUG
+
+void Rationalizer::ValidateStatement(GenTree* tree, BasicBlock* block)
+{
+ assert(tree->gtOper == GT_STMT);
+ DBEXEC(TRUE, JitTls::GetCompiler()->fgDebugCheckNodeLinks(block, tree));
+}
+
+// sanity checks that apply to all kinds of IR
+void Rationalizer::SanityCheck()
+{
+ // TODO: assert(!IsLIR());
+ BasicBlock* block;
+ foreach_block(comp, block)
+ {
+ for (GenTree* statement = block->bbTreeList; statement != nullptr; statement = statement->gtNext)
+ {
+ ValidateStatement(statement, block);
+
+ for (GenTree* tree = statement->gtStmt.gtStmtList; tree; tree = tree->gtNext)
+ {
+ // QMARK nodes should have been removed before this phase.
+ assert(tree->OperGet() != GT_QMARK);
+
+ if (tree->OperGet() == GT_ASG)
+ {
+ if (tree->gtGetOp1()->OperGet() == GT_LCL_VAR)
+ {
+ assert(tree->gtGetOp1()->gtFlags & GTF_VAR_DEF);
+ }
+ else if (tree->gtGetOp2()->OperGet() == GT_LCL_VAR)
+ {
+ assert(!(tree->gtGetOp2()->gtFlags & GTF_VAR_DEF));
+ }
+ }
+ }
+ }
+ }
+}
+
+void Rationalizer::SanityCheckRational()
+{
+ // TODO-Cleanup : check that the tree is rational here
+ // then do normal checks
+ SanityCheck();
+}
+
+#endif // DEBUG
+
+static void RewriteAssignmentIntoStoreLclCore(GenTreeOp* assignment,
+ GenTree* location,
+ GenTree* value,
+ genTreeOps locationOp)
+{
+ assert(assignment != nullptr);
+ assert(assignment->OperGet() == GT_ASG);
+ assert(location != nullptr);
+ assert(value != nullptr);
+
+ genTreeOps storeOp = storeForm(locationOp);
+
+#ifdef DEBUG
+ JITDUMP("rewriting asg(%s, X) to %s(X)\n", GenTree::NodeName(locationOp), GenTree::NodeName(storeOp));
+#endif // DEBUG
+
+ assignment->SetOper(storeOp);
+ GenTreeLclVarCommon* store = assignment->AsLclVarCommon();
+
+ GenTreeLclVarCommon* var = location->AsLclVarCommon();
+ store->SetLclNum(var->gtLclNum);
+ store->SetSsaNum(var->gtSsaNum);
+
+ if (locationOp == GT_LCL_FLD)
+ {
+ store->gtLclFld.gtLclOffs = var->gtLclFld.gtLclOffs;
+ store->gtLclFld.gtFieldSeq = var->gtLclFld.gtFieldSeq;
+ }
+
+ copyFlags(store, var, GTF_LIVENESS_MASK);
+ store->gtFlags &= ~GTF_REVERSE_OPS;
+
+ store->gtType = var->TypeGet();
+ store->gtOp1 = value;
+
+ DISPNODE(store);
+ JITDUMP("\n");
+}
+
+void Rationalizer::RewriteAssignmentIntoStoreLcl(GenTreeOp* assignment)
+{
+ assert(assignment != nullptr);
+ assert(assignment->OperGet() == GT_ASG);
+
+ GenTree* location = assignment->gtGetOp1();
+ GenTree* value = assignment->gtGetOp2();
+
+ RewriteAssignmentIntoStoreLclCore(assignment, location, value, location->OperGet());
+}
+
+void Rationalizer::RewriteAssignment(LIR::Use& use)
+{
+ assert(use.IsInitialized());
+
+ GenTreeOp* assignment = use.Def()->AsOp();
+ assert(assignment->OperGet() == GT_ASG);
+
+ GenTree* location = assignment->gtGetOp1();
+ GenTree* value = assignment->gtGetOp2();
+
+ genTreeOps locationOp = location->OperGet();
+
+#ifdef FEATURE_SIMD
+ if (varTypeIsSIMD(location) && assignment->OperIsInitBlkOp())
+ {
+ if (location->OperGet() == GT_LCL_VAR)
+ {
+ var_types simdType = location->TypeGet();
+ GenTree* initVal = assignment->gtOp.gtOp2;
+ var_types baseType = comp->getBaseTypeOfSIMDLocal(location);
+ if (baseType != TYP_UNKNOWN)
+ {
+ GenTreeSIMD* simdTree = new (comp, GT_SIMD)
+ GenTreeSIMD(simdType, initVal, SIMDIntrinsicInit, baseType, genTypeSize(simdType));
+ assignment->gtOp.gtOp2 = simdTree;
+ value = simdTree;
+ initVal->gtNext = simdTree;
+ simdTree->gtPrev = initVal;
+
+ simdTree->gtNext = location;
+ location->gtPrev = simdTree;
+ }
+ }
+ else
+ {
+ assert(location->OperIsBlk());
+ }
+ }
+#endif // FEATURE_SIMD
+
+ switch (locationOp)
+ {
+ case GT_LCL_VAR:
+ case GT_LCL_FLD:
+ case GT_REG_VAR:
+ case GT_PHI_ARG:
+ RewriteAssignmentIntoStoreLclCore(assignment, location, value, locationOp);
+ BlockRange().Remove(location);
+ break;
+
+ case GT_IND:
+ {
+ GenTreeStoreInd* store =
+ new (comp, GT_STOREIND) GenTreeStoreInd(location->TypeGet(), location->gtGetOp1(), value);
+
+ copyFlags(store, assignment, GTF_ALL_EFFECT);
+ copyFlags(store, location, GTF_IND_FLAGS);
+
+ if (assignment->IsReverseOp())
+ {
+ store->gtFlags |= GTF_REVERSE_OPS;
+ }
+
+ // TODO: JIT dump
+
+ // Remove the GT_IND node and replace the assignment node with the store
+ BlockRange().Remove(location);
+ BlockRange().InsertBefore(assignment, store);
+ use.ReplaceWith(comp, store);
+ BlockRange().Remove(assignment);
+ }
+ break;
+
+ case GT_CLS_VAR:
+ {
+ location->SetOper(GT_CLS_VAR_ADDR);
+ location->gtType = TYP_BYREF;
+
+ assignment->SetOper(GT_STOREIND);
+
+ // TODO: JIT dump
+ }
+ break;
+
+ case GT_BLK:
+ case GT_OBJ:
+ case GT_DYN_BLK:
+ {
+ assert(varTypeIsStruct(location));
+ GenTreeBlk* storeBlk = location->AsBlk();
+ genTreeOps storeOper;
+ switch (location->gtOper)
+ {
+ case GT_BLK:
+ storeOper = GT_STORE_BLK;
+ break;
+ case GT_OBJ:
+ storeOper = GT_STORE_OBJ;
+ break;
+ case GT_DYN_BLK:
+ storeOper = GT_STORE_DYN_BLK;
+ break;
+ default:
+ unreached();
+ }
+ JITDUMP("Rewriting GT_ASG(%s(X), Y) to %s(X,Y):\n", GenTree::NodeName(location->gtOper),
+ GenTree::NodeName(storeOper));
+ storeBlk->gtOper = storeOper;
+ storeBlk->gtFlags &= ~GTF_DONT_CSE;
+ storeBlk->gtFlags |= (assignment->gtFlags & (GTF_ALL_EFFECT | GTF_REVERSE_OPS | GTF_BLK_VOLATILE |
+ GTF_BLK_UNALIGNED | GTF_BLK_INIT | GTF_DONT_CSE));
+ storeBlk->gtBlk.Data() = value;
+
+ // Replace the assignment node with the store
+ use.ReplaceWith(comp, storeBlk);
+ BlockRange().Remove(assignment);
+ DISPTREERANGE(BlockRange(), use.Def());
+ JITDUMP("\n");
+ }
+ break;
+
+ default:
+ unreached();
+ break;
+ }
+}
+
+void Rationalizer::RewriteAddress(LIR::Use& use)
+{
+ assert(use.IsInitialized());
+
+ GenTreeUnOp* address = use.Def()->AsUnOp();
+ assert(address->OperGet() == GT_ADDR);
+
+ GenTree* location = address->gtGetOp1();
+ genTreeOps locationOp = location->OperGet();
+
+ if (location->IsLocal())
+ {
+// We are changing the child from GT_LCL_VAR TO GT_LCL_VAR_ADDR.
+// Therefore gtType of the child needs to be changed to a TYP_BYREF
+#ifdef DEBUG
+ if (locationOp == GT_LCL_VAR)
+ {
+ JITDUMP("Rewriting GT_ADDR(GT_LCL_VAR) to GT_LCL_VAR_ADDR:\n");
+ }
+ else
+ {
+ assert(locationOp == GT_LCL_FLD);
+ JITDUMP("Rewriting GT_ADDR(GT_LCL_FLD) to GT_LCL_FLD_ADDR:\n");
+ }
+#endif // DEBUG
+
+ location->SetOper(addrForm(locationOp));
+ location->gtType = TYP_BYREF;
+ copyFlags(location, address, GTF_ALL_EFFECT);
+
+ use.ReplaceWith(comp, location);
+ BlockRange().Remove(address);
+ }
+ else if (locationOp == GT_CLS_VAR)
+ {
+ location->SetOper(GT_CLS_VAR_ADDR);
+ location->gtType = TYP_BYREF;
+ copyFlags(location, address, GTF_ALL_EFFECT);
+
+ use.ReplaceWith(comp, location);
+ BlockRange().Remove(address);
+
+ JITDUMP("Rewriting GT_ADDR(GT_CLS_VAR) to GT_CLS_VAR_ADDR:\n");
+ }
+ else if (location->OperIsIndir())
+ {
+ use.ReplaceWith(comp, location->gtGetOp1());
+ BlockRange().Remove(location);
+ BlockRange().Remove(address);
+
+ JITDUMP("Rewriting GT_ADDR(GT_IND(X)) to X:\n");
+ }
+
+ DISPTREERANGE(BlockRange(), use.Def());
+ JITDUMP("\n");
+}
+
+Compiler::fgWalkResult Rationalizer::RewriteNode(GenTree** useEdge, ArrayStack<GenTree*>& parentStack)
+{
+ assert(useEdge != nullptr);
+
+ GenTree* node = *useEdge;
+ assert(node != nullptr);
+
+#ifdef DEBUG
+ const bool isLateArg = (node->gtFlags & GTF_LATE_ARG) != 0;
+#endif
+
+ // First, remove any preceeding GT_LIST nodes, which are not otherwise visited by the tree walk.
+ //
+ // NOTE: GT_LIST nodes that are used as aggregates, by block ops, and by phi nodes will in fact be visited.
+ for (GenTree* prev = node->gtPrev;
+ prev != nullptr && prev->OperGet() == GT_LIST && !(prev->AsArgList()->IsAggregate());
+ prev = node->gtPrev)
+ {
+ BlockRange().Remove(prev);
+ }
+
+ // In addition, remove the current node if it is a GT_LIST node that is not an aggregate.
+ if (node->OperGet() == GT_LIST)
+ {
+ GenTreeArgList* list = node->AsArgList();
+ if (!list->IsAggregate())
+ {
+ BlockRange().Remove(list);
+ }
+ return Compiler::WALK_CONTINUE;
+ }
+
+ LIR::Use use;
+ if (parentStack.Height() < 2)
+ {
+ use = LIR::Use::GetDummyUse(BlockRange(), *useEdge);
+ }
+ else
+ {
+ use = LIR::Use(BlockRange(), useEdge, parentStack.Index(1));
+ }
+
+ assert(node == use.Def());
+ switch (node->OperGet())
+ {
+ case GT_ASG:
+ RewriteAssignment(use);
+ break;
+
+ case GT_BOX:
+ // GT_BOX at this level just passes through so get rid of it
+ use.ReplaceWith(comp, node->gtGetOp1());
+ BlockRange().Remove(node);
+ break;
+
+ case GT_ADDR:
+ RewriteAddress(use);
+ break;
+
+ case GT_NOP:
+ // fgMorph sometimes inserts NOP nodes between defs and uses
+ // supposedly 'to prevent constant folding'. In this case, remove the
+ // NOP.
+ if (node->gtGetOp1() != nullptr)
+ {
+ use.ReplaceWith(comp, node->gtGetOp1());
+ BlockRange().Remove(node);
+ }
+ break;
+
+ case GT_COMMA:
+ {
+ GenTree* op1 = node->gtGetOp1();
+ if ((op1->gtFlags & GTF_ALL_EFFECT) == 0)
+ {
+ // The LHS has no side effects. Remove it.
+ bool isClosed = false;
+ unsigned sideEffects = 0;
+ LIR::ReadOnlyRange lhsRange = BlockRange().GetTreeRange(op1, &isClosed, &sideEffects);
+
+ // None of the transforms performed herein violate tree order, so these
+ // should always be true.
+ assert(isClosed);
+ assert((sideEffects & GTF_ALL_EFFECT) == 0);
+
+ BlockRange().Delete(comp, m_block, std::move(lhsRange));
+ }
+
+ GenTree* replacement = node->gtGetOp2();
+ if (!use.IsDummyUse())
+ {
+ use.ReplaceWith(comp, replacement);
+ }
+ else
+ {
+ // This is a top-level comma. If the RHS has no side effects we can remove
+ // it as well.
+ if ((replacement->gtFlags & GTF_ALL_EFFECT) == 0)
+ {
+ bool isClosed = false;
+ unsigned sideEffects = 0;
+ LIR::ReadOnlyRange rhsRange = BlockRange().GetTreeRange(replacement, &isClosed, &sideEffects);
+
+ // None of the transforms performed herein violate tree order, so these
+ // should always be true.
+ assert(isClosed);
+ assert((sideEffects & GTF_ALL_EFFECT) == 0);
+
+ BlockRange().Delete(comp, m_block, std::move(rhsRange));
+ }
+ }
+
+ BlockRange().Remove(node);
+ }
+ break;
+
+ case GT_ARGPLACE:
+ // Remove argplace and list nodes from the execution order.
+ //
+ // TODO: remove phi args and phi nodes as well?
+ BlockRange().Remove(node);
+ break;
+
+#ifdef _TARGET_XARCH_
+ case GT_CLS_VAR:
+ {
+ // Class vars that are the target of an assignment will get rewritten into
+ // GT_STOREIND(GT_CLS_VAR_ADDR, val) by RewriteAssignment. This check is
+ // not strictly necessary--the GT_IND(GT_CLS_VAR_ADDR) pattern that would
+ // otherwise be generated would also be picked up by RewriteAssignment--but
+ // skipping the rewrite here saves an allocation and a bit of extra work.
+ const bool isLHSOfAssignment = (use.User()->OperGet() == GT_ASG) && (use.User()->gtGetOp1() == node);
+ if (!isLHSOfAssignment)
+ {
+ GenTree* ind = comp->gtNewOperNode(GT_IND, node->TypeGet(), node);
+
+ node->SetOper(GT_CLS_VAR_ADDR);
+ node->gtType = TYP_BYREF;
+
+ BlockRange().InsertAfter(node, ind);
+ use.ReplaceWith(comp, ind);
+
+ // TODO: JIT dump
+ }
+ }
+ break;
+#endif // _TARGET_XARCH_
+
+ case GT_INTRINSIC:
+ // Non-target intrinsics should have already been rewritten back into user calls.
+ assert(Compiler::IsTargetIntrinsic(node->gtIntrinsic.gtIntrinsicId));
+ break;
+
+#ifdef FEATURE_SIMD
+ case GT_BLK:
+ case GT_OBJ:
+ {
+ // TODO-1stClassStructs: These should have been transformed to GT_INDs, but in order
+ // to preserve existing behavior, we will keep this as a block node if this is the
+ // lhs of a block assignment, and either:
+ // - It is a "generic" TYP_STRUCT assignment, OR
+ // - It is an initblk, OR
+ // - Neither the lhs or rhs are known to be of SIMD type.
+
+ GenTree* parent = use.User();
+ bool keepBlk = false;
+ if ((parent->OperGet() == GT_ASG) && (node == parent->gtGetOp1()))
+ {
+ if ((node->TypeGet() == TYP_STRUCT) || parent->OperIsInitBlkOp())
+ {
+ keepBlk = true;
+ }
+ else if (!comp->isAddrOfSIMDType(node->AsBlk()->Addr()))
+ {
+ GenTree* dataSrc = parent->gtGetOp2();
+ if (!dataSrc->IsLocal() && (dataSrc->OperGet() != GT_SIMD))
+ {
+ noway_assert(dataSrc->OperIsIndir());
+ keepBlk = !comp->isAddrOfSIMDType(dataSrc->AsIndir()->Addr());
+ }
+ }
+ }
+ RewriteSIMDOperand(use, keepBlk);
+ }
+ break;
+
+ case GT_LCL_FLD:
+ case GT_STORE_LCL_FLD:
+ // TODO-1stClassStructs: Eliminate this.
+ FixupIfSIMDLocal(node->AsLclVarCommon());
+ break;
+
+ case GT_SIMD:
+ {
+ noway_assert(comp->featureSIMD);
+ GenTreeSIMD* simdNode = node->AsSIMD();
+ unsigned simdSize = simdNode->gtSIMDSize;
+ var_types simdType = comp->getSIMDTypeForSize(simdSize);
+
+ // TODO-1stClassStructs: This should be handled more generally for enregistered or promoted
+ // structs that are passed or returned in a different register type than their enregistered
+ // type(s).
+ if (simdNode->gtType == TYP_I_IMPL && simdNode->gtSIMDSize == TARGET_POINTER_SIZE)
+ {
+ // This happens when it is consumed by a GT_RET_EXPR.
+ // It can only be a Vector2f or Vector2i.
+ assert(genTypeSize(simdNode->gtSIMDBaseType) == 4);
+ simdNode->gtType = TYP_SIMD8;
+ }
+ // Certain SIMD trees require rationalizing.
+ if (simdNode->gtSIMD.gtSIMDIntrinsicID == SIMDIntrinsicInitArray)
+ {
+ // Rewrite this as an explicit load.
+ JITDUMP("Rewriting GT_SIMD array init as an explicit load:\n");
+ unsigned int baseTypeSize = genTypeSize(simdNode->gtSIMDBaseType);
+ GenTree* address = new (comp, GT_LEA) GenTreeAddrMode(TYP_BYREF, simdNode->gtOp1, simdNode->gtOp2,
+ baseTypeSize, offsetof(CORINFO_Array, u1Elems));
+ GenTree* ind = comp->gtNewOperNode(GT_IND, simdType, address);
+
+ BlockRange().InsertBefore(simdNode, address, ind);
+ use.ReplaceWith(comp, ind);
+ BlockRange().Remove(simdNode);
+
+ DISPTREERANGE(BlockRange(), use.Def());
+ JITDUMP("\n");
+ }
+ else
+ {
+ // This code depends on the fact that NONE of the SIMD intrinsics take vector operands
+ // of a different width. If that assumption changes, we will EITHER have to make these type
+ // transformations during importation, and plumb the types all the way through the JIT,
+ // OR add a lot of special handling here.
+ GenTree* op1 = simdNode->gtGetOp1();
+ if (op1 != nullptr && op1->gtType == TYP_STRUCT)
+ {
+ op1->gtType = simdType;
+ }
+
+ GenTree* op2 = simdNode->gtGetOp2();
+ if (op2 != nullptr && op2->gtType == TYP_STRUCT)
+ {
+ op2->gtType = simdType;
+ }
+ }
+ }
+ break;
+#endif // FEATURE_SIMD
+
+ default:
+ break;
+ }
+
+ // Do some extra processing on top-level nodes to remove unused local reads.
+ if (use.IsDummyUse() && node->OperIsLocalRead())
+ {
+ assert((node->gtFlags & GTF_ALL_EFFECT) == 0);
+
+ comp->lvaDecRefCnts(node);
+ BlockRange().Remove(node);
+ }
+
+ assert(isLateArg == ((node->gtFlags & GTF_LATE_ARG) != 0));
+
+ return Compiler::WALK_CONTINUE;
+}
+
+void Rationalizer::DoPhase()
+{
+ DBEXEC(TRUE, SanityCheck());
+
+ comp->compCurBB = nullptr;
+ comp->fgOrder = Compiler::FGOrderLinear;
+
+ BasicBlock* firstBlock = comp->fgFirstBB;
+
+ for (BasicBlock* block = comp->fgFirstBB; block != nullptr; block = block->bbNext)
+ {
+ comp->compCurBB = block;
+ m_block = block;
+
+ // Establish the first and last nodes for the block. This is necessary in order for the LIR
+ // utilities that hang off the BasicBlock type to work correctly.
+ GenTreeStmt* firstStatement = block->firstStmt();
+ if (firstStatement == nullptr)
+ {
+ // No statements in this block; skip it.
+ block->MakeLIR(nullptr, nullptr);
+ continue;
+ }
+
+ GenTreeStmt* lastStatement = block->lastStmt();
+
+ // Rewrite intrinsics that are not supported by the target back into user calls.
+ // This needs to be done before the transition to LIR because it relies on the use
+ // of fgMorphArgs, which is designed to operate on HIR. Once this is done for a
+ // particular statement, link that statement's nodes into the current basic block.
+ //
+ // This walk also clears the GTF_VAR_USEDEF bit on locals, which is not necessary
+ // in the backend.
+ GenTree* lastNodeInPreviousStatement = nullptr;
+ for (GenTreeStmt* statement = firstStatement; statement != nullptr; statement = statement->getNextStmt())
+ {
+ assert(statement->gtStmtList != nullptr);
+ assert(statement->gtStmtList->gtPrev == nullptr);
+ assert(statement->gtStmtExpr != nullptr);
+ assert(statement->gtStmtExpr->gtNext == nullptr);
+
+ SplitData splitData;
+ splitData.root = statement;
+ splitData.block = block;
+ splitData.thisPhase = this;
+
+ comp->fgWalkTreePost(&statement->gtStmtExpr,
+ [](GenTree** use, Compiler::fgWalkData* walkData) -> Compiler::fgWalkResult {
+ GenTree* node = *use;
+ if (node->OperGet() == GT_INTRINSIC &&
+ Compiler::IsIntrinsicImplementedByUserCall(node->gtIntrinsic.gtIntrinsicId))
+ {
+ RewriteIntrinsicAsUserCall(use, walkData);
+ }
+ else if (node->OperIsLocal())
+ {
+ node->gtFlags &= ~GTF_VAR_USEDEF;
+ }
+
+ return Compiler::WALK_CONTINUE;
+ },
+ &splitData, true);
+
+ GenTree* firstNodeInStatement = statement->gtStmtList;
+ if (lastNodeInPreviousStatement != nullptr)
+ {
+ lastNodeInPreviousStatement->gtNext = firstNodeInStatement;
+ }
+
+ firstNodeInStatement->gtPrev = lastNodeInPreviousStatement;
+ lastNodeInPreviousStatement = statement->gtStmtExpr;
+ }
+
+ block->MakeLIR(firstStatement->gtStmtList, lastStatement->gtStmtExpr);
+
+ // Rewrite HIR nodes into LIR nodes.
+ for (GenTreeStmt *statement = firstStatement, *nextStatement; statement != nullptr; statement = nextStatement)
+ {
+ nextStatement = statement->getNextStmt();
+
+ // If this statement has correct offset information, change it into an IL offset
+ // node and insert it into the LIR.
+ if (statement->gtStmtILoffsx != BAD_IL_OFFSET)
+ {
+ assert(!statement->IsPhiDefnStmt());
+ statement->SetOper(GT_IL_OFFSET);
+ statement->gtNext = nullptr;
+ statement->gtPrev = nullptr;
+
+ BlockRange().InsertBefore(statement->gtStmtList, statement);
+ }
+
+ m_statement = statement;
+ comp->fgWalkTreePost(&statement->gtStmtExpr,
+ [](GenTree** use, Compiler::fgWalkData* walkData) -> Compiler::fgWalkResult {
+ return reinterpret_cast<Rationalizer*>(walkData->pCallbackData)
+ ->RewriteNode(use, *walkData->parentStack);
+ },
+ this, true);
+ }
+
+ assert(BlockRange().CheckLIR(comp));
+ }
+
+ comp->compRationalIRForm = true;
+}
diff --git a/src/jit/rationalize.h b/src/jit/rationalize.h
new file mode 100644
index 0000000000..9b15fe4871
--- /dev/null
+++ b/src/jit/rationalize.h
@@ -0,0 +1,67 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+//===============================================================================
+#include "phase.h"
+
+class Rationalizer : public Phase
+{
+private:
+ BasicBlock* m_block;
+ GenTreeStmt* m_statement;
+
+public:
+ Rationalizer(Compiler* comp);
+
+#ifdef DEBUG
+ static void ValidateStatement(GenTree* tree, BasicBlock* block);
+
+ // general purpose sanity checking of de facto standard GenTree
+ void SanityCheck();
+
+ // sanity checking of rationalized IR
+ void SanityCheckRational();
+
+#endif // DEBUG
+
+ virtual void DoPhase() override;
+
+ static void RewriteAssignmentIntoStoreLcl(GenTreeOp* assignment);
+ static void MorphAsgIntoStoreObj(Compiler::fgWalkData* data, GenTreeStmt* stmt, GenTree** ppTree);
+
+private:
+ inline LIR::Range& BlockRange() const
+ {
+ return LIR::AsRange(m_block);
+ }
+
+ // SIMD related
+ void RewriteSIMDOperand(LIR::Use& use, bool keepBlk);
+ void FixupIfSIMDLocal(GenTreeLclVarCommon* node);
+
+ // Intrinsic related transformations
+ static void RewriteNodeAsCall(GenTreePtr* ppTree,
+ Compiler::fgWalkData* data,
+ CORINFO_METHOD_HANDLE callHnd,
+#ifdef FEATURE_READYTORUN_COMPILER
+ CORINFO_CONST_LOOKUP entryPoint,
+#endif
+ GenTreeArgList* args);
+
+ static void RewriteIntrinsicAsUserCall(GenTreePtr* ppTree, Compiler::fgWalkData* data);
+
+ // Other transformations
+ void RewriteAssignment(LIR::Use& use);
+ void RewriteAddress(LIR::Use& use);
+
+ // Root visitor
+ Compiler::fgWalkResult RewriteNode(GenTree** useEdge, ArrayStack<GenTree*>& parents);
+};
+
+inline Rationalizer::Rationalizer(Compiler* _comp) : Phase(_comp, "IR Rationalize", PHASE_RATIONALIZE)
+{
+#ifdef DEBUG
+ comp->compNumStatementLinksTraversed = 0;
+#endif
+}
diff --git a/src/jit/regalloc.cpp b/src/jit/regalloc.cpp
new file mode 100644
index 0000000000..9dd7299906
--- /dev/null
+++ b/src/jit/regalloc.cpp
@@ -0,0 +1,6841 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX RegAlloc XX
+XX XX
+XX Does the register allocation and puts the remaining lclVars on the stack XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+#include "regalloc.h"
+
+#if FEATURE_FP_REGALLOC
+Compiler::enumConfigRegisterFP Compiler::raConfigRegisterFP()
+{
+ DWORD val = JitConfig.JitRegisterFP();
+
+ return (enumConfigRegisterFP)(val & 0x3);
+}
+#endif // FEATURE_FP_REGALLOC
+
+regMaskTP Compiler::raConfigRestrictMaskFP()
+{
+ regMaskTP result = RBM_NONE;
+
+#if FEATURE_FP_REGALLOC
+ switch (raConfigRegisterFP())
+ {
+ case CONFIG_REGISTER_FP_NONE:
+ result = RBM_NONE;
+ break;
+ case CONFIG_REGISTER_FP_CALLEE_TRASH:
+ result = RBM_FLT_CALLEE_TRASH;
+ break;
+ case CONFIG_REGISTER_FP_CALLEE_SAVED:
+ result = RBM_FLT_CALLEE_SAVED;
+ break;
+ case CONFIG_REGISTER_FP_FULL:
+ result = RBM_ALLFLOAT;
+ break;
+ }
+#endif
+
+ return result;
+}
+
+#ifdef LEGACY_BACKEND // We don't use any of the old register allocator functions when LSRA is used instead.
+
+#if DOUBLE_ALIGN
+DWORD Compiler::getCanDoubleAlign()
+{
+#ifdef DEBUG
+ if (compStressCompile(STRESS_DBL_ALN, 20))
+ return MUST_DOUBLE_ALIGN;
+
+ return JitConfig.JitDoubleAlign();
+#else
+ return DEFAULT_DOUBLE_ALIGN;
+#endif
+}
+#endif // DOUBLE_ALIGN
+
+void Compiler::raInit()
+{
+#if FEATURE_STACK_FP_X87
+ /* We have not assigned any FP variables to registers yet */
+
+ VarSetOps::AssignNoCopy(this, optAllFPregVars, VarSetOps::UninitVal());
+#endif
+ codeGen->intRegState.rsIsFloat = false;
+ codeGen->floatRegState.rsIsFloat = true;
+
+ rpReverseEBPenreg = false;
+ rpAsgVarNum = -1;
+ rpPassesMax = 6;
+ rpPassesPessimize = rpPassesMax - 3;
+ if (opts.compDbgCode)
+ {
+ rpPassesMax++;
+ }
+ rpStkPredict = (unsigned)-1;
+ rpFrameType = FT_NOT_SET;
+ rpLostEnreg = false;
+ rpMustCreateEBPCalled = false;
+ rpRegAllocDone = false;
+ rpMaskPInvokeEpilogIntf = RBM_NONE;
+
+ rpPredictMap[PREDICT_NONE] = RBM_NONE;
+ rpPredictMap[PREDICT_ADDR] = RBM_NONE;
+
+#if FEATURE_FP_REGALLOC
+ rpPredictMap[PREDICT_REG] = RBM_ALLINT | RBM_ALLFLOAT;
+ rpPredictMap[PREDICT_SCRATCH_REG] = RBM_ALLINT | RBM_ALLFLOAT;
+#else
+ rpPredictMap[PREDICT_REG] = RBM_ALLINT;
+ rpPredictMap[PREDICT_SCRATCH_REG] = RBM_ALLINT;
+#endif
+
+#define REGDEF(name, rnum, mask, sname) rpPredictMap[PREDICT_REG_##name] = RBM_##name;
+#include "register.h"
+
+#if defined(_TARGET_ARM_)
+
+ rpPredictMap[PREDICT_PAIR_R0R1] = RBM_R0 | RBM_R1;
+ rpPredictMap[PREDICT_PAIR_R2R3] = RBM_R2 | RBM_R3;
+ rpPredictMap[PREDICT_REG_SP] = RBM_ILLEGAL;
+
+#elif defined(_TARGET_AMD64_)
+
+ rpPredictMap[PREDICT_NOT_REG_EAX] = RBM_ALLINT & ~RBM_EAX;
+ rpPredictMap[PREDICT_NOT_REG_ECX] = RBM_ALLINT & ~RBM_ECX;
+ rpPredictMap[PREDICT_REG_ESP] = RBM_ILLEGAL;
+
+#elif defined(_TARGET_X86_)
+
+ rpPredictMap[PREDICT_NOT_REG_EAX] = RBM_ALLINT & ~RBM_EAX;
+ rpPredictMap[PREDICT_NOT_REG_ECX] = RBM_ALLINT & ~RBM_ECX;
+ rpPredictMap[PREDICT_REG_ESP] = RBM_ILLEGAL;
+ rpPredictMap[PREDICT_PAIR_EAXEDX] = RBM_EAX | RBM_EDX;
+ rpPredictMap[PREDICT_PAIR_ECXEBX] = RBM_ECX | RBM_EBX;
+
+#endif
+
+ rpBestRecordedPrediction = NULL;
+}
+
+/*****************************************************************************
+ *
+ * The following table(s) determines the order in which registers are considered
+ * for variables to live in
+ */
+
+const regNumber* Compiler::raGetRegVarOrder(var_types regType, unsigned* wbVarOrderSize)
+{
+#if FEATURE_FP_REGALLOC
+ if (varTypeIsFloating(regType))
+ {
+ static const regNumber raRegVarOrderFlt[] = {REG_VAR_ORDER_FLT};
+ const unsigned raRegVarOrderFltSize = sizeof(raRegVarOrderFlt) / sizeof(raRegVarOrderFlt[0]);
+
+ if (wbVarOrderSize != NULL)
+ *wbVarOrderSize = raRegVarOrderFltSize;
+
+ return &raRegVarOrderFlt[0];
+ }
+ else
+#endif
+ {
+ static const regNumber raRegVarOrder[] = {REG_VAR_ORDER};
+ const unsigned raRegVarOrderSize = sizeof(raRegVarOrder) / sizeof(raRegVarOrder[0]);
+
+ if (wbVarOrderSize != NULL)
+ *wbVarOrderSize = raRegVarOrderSize;
+
+ return &raRegVarOrder[0];
+ }
+}
+
+#ifdef DEBUG
+
+/*****************************************************************************
+ *
+ * Dump out the variable interference graph
+ *
+ */
+
+void Compiler::raDumpVarIntf()
+{
+ unsigned lclNum;
+ LclVarDsc* varDsc;
+
+ printf("Var. interference graph for %s\n", info.compFullName);
+
+ for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
+ {
+ /* Ignore the variable if it's not tracked */
+
+ if (!varDsc->lvTracked)
+ continue;
+
+ /* Get hold of the index and the interference mask for the variable */
+ unsigned varIndex = varDsc->lvVarIndex;
+
+ printf(" V%02u,T%02u and ", lclNum, varIndex);
+
+ unsigned refIndex;
+
+ for (refIndex = 0; refIndex < lvaTrackedCount; refIndex++)
+ {
+ if (VarSetOps::IsMember(this, lvaVarIntf[varIndex], refIndex))
+ printf("T%02u ", refIndex);
+ else
+ printf(" ");
+ }
+
+ printf("\n");
+ }
+
+ printf("\n");
+}
+
+/*****************************************************************************
+ *
+ * Dump out the register interference graph
+ *
+ */
+void Compiler::raDumpRegIntf()
+{
+ printf("Reg. interference graph for %s\n", info.compFullName);
+
+ unsigned lclNum;
+ LclVarDsc* varDsc;
+
+ for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
+ {
+ unsigned varNum;
+
+ /* Ignore the variable if it's not tracked */
+
+ if (!varDsc->lvTracked)
+ continue;
+
+ /* Get hold of the index and the interference mask for the variable */
+
+ varNum = varDsc->lvVarIndex;
+
+ printf(" V%02u,T%02u and ", lclNum, varNum);
+
+ if (varDsc->IsFloatRegType())
+ {
+#if !FEATURE_STACK_FP_X87
+ for (regNumber regNum = REG_FP_FIRST; regNum <= REG_FP_LAST; regNum = REG_NEXT(regNum))
+ {
+ if (VarSetOps::IsMember(this, raLclRegIntf[regNum], varNum))
+ printf("%3s ", getRegName(regNum, true));
+ else
+ printf(" ");
+ }
+#endif
+ }
+ else
+ {
+ for (regNumber regNum = REG_INT_FIRST; regNum <= REG_INT_LAST; regNum = REG_NEXT(regNum))
+ {
+ if (VarSetOps::IsMember(this, raLclRegIntf[regNum], varNum))
+ printf("%3s ", getRegName(regNum));
+ else
+ printf(" ");
+ }
+ }
+
+ printf("\n");
+ }
+
+ printf("\n");
+}
+#endif // DEBUG
+
+/*****************************************************************************
+ *
+ * We'll adjust the ref counts based on interference
+ *
+ */
+
+void Compiler::raAdjustVarIntf()
+{
+ // This method was not correct and has been disabled.
+ return;
+}
+
+/*****************************************************************************/
+/*****************************************************************************/
+/* Determine register mask for a call/return from type.
+ */
+
+inline regMaskTP Compiler::genReturnRegForTree(GenTreePtr tree)
+{
+ var_types type = tree->TypeGet();
+
+ if (type == TYP_STRUCT && IsHfa(tree))
+ {
+ int retSlots = GetHfaCount(tree);
+ return ((1 << retSlots) - 1) << REG_FLOATRET;
+ }
+
+ const static regMaskTP returnMap[TYP_COUNT] = {
+ RBM_ILLEGAL, // TYP_UNDEF,
+ RBM_NONE, // TYP_VOID,
+ RBM_INTRET, // TYP_BOOL,
+ RBM_INTRET, // TYP_CHAR,
+ RBM_INTRET, // TYP_BYTE,
+ RBM_INTRET, // TYP_UBYTE,
+ RBM_INTRET, // TYP_SHORT,
+ RBM_INTRET, // TYP_USHORT,
+ RBM_INTRET, // TYP_INT,
+ RBM_INTRET, // TYP_UINT,
+ RBM_LNGRET, // TYP_LONG,
+ RBM_LNGRET, // TYP_ULONG,
+ RBM_FLOATRET, // TYP_FLOAT,
+ RBM_DOUBLERET, // TYP_DOUBLE,
+ RBM_INTRET, // TYP_REF,
+ RBM_INTRET, // TYP_BYREF,
+ RBM_INTRET, // TYP_ARRAY,
+ RBM_ILLEGAL, // TYP_STRUCT,
+ RBM_ILLEGAL, // TYP_BLK,
+ RBM_ILLEGAL, // TYP_LCLBLK,
+ RBM_ILLEGAL, // TYP_PTR,
+ RBM_ILLEGAL, // TYP_FNC,
+ RBM_ILLEGAL, // TYP_UNKNOWN,
+ };
+
+ assert((unsigned)type < sizeof(returnMap) / sizeof(returnMap[0]));
+ assert(returnMap[TYP_LONG] == RBM_LNGRET);
+ assert(returnMap[TYP_DOUBLE] == RBM_DOUBLERET);
+ assert(returnMap[TYP_REF] == RBM_INTRET);
+ assert(returnMap[TYP_STRUCT] == RBM_ILLEGAL);
+
+ regMaskTP result = returnMap[type];
+ assert(result != RBM_ILLEGAL);
+ return result;
+}
+
+/*****************************************************************************/
+
+/****************************************************************************/
+
+#ifdef DEBUG
+
+static void dispLifeSet(Compiler* comp, VARSET_VALARG_TP mask, VARSET_VALARG_TP life)
+{
+ unsigned lclNum;
+ LclVarDsc* varDsc;
+
+ for (lclNum = 0, varDsc = comp->lvaTable; lclNum < comp->lvaCount; lclNum++, varDsc++)
+ {
+ if (!varDsc->lvTracked)
+ continue;
+
+ if (!VarSetOps::IsMember(comp, mask, varDsc->lvVarIndex))
+ continue;
+
+ if (VarSetOps::IsMember(comp, life, varDsc->lvVarIndex))
+ printf("V%02u ", lclNum);
+ }
+}
+
+#endif
+
+/*****************************************************************************/
+#ifdef DEBUG
+/*****************************************************************************
+ *
+ * Debugging helpers - display variables liveness info.
+ */
+
+void dispFPvarsInBBlist(BasicBlock* beg, BasicBlock* end, VARSET_TP mask, Compiler* comp)
+{
+ do
+ {
+ printf("BB%02u: ", beg->bbNum);
+
+ printf(" in = [ ");
+ dispLifeSet(comp, mask, beg->bbLiveIn);
+ printf("] ,");
+
+ printf(" out = [ ");
+ dispLifeSet(comp, mask, beg->bbLiveOut);
+ printf("]");
+
+ if (beg->bbFlags & BBF_VISITED)
+ printf(" inner=%u", beg->bbFPinVars);
+
+ printf("\n");
+
+ beg = beg->bbNext;
+ if (!beg)
+ return;
+ } while (beg != end);
+}
+
+#if FEATURE_STACK_FP_X87
+void Compiler::raDispFPlifeInfo()
+{
+ BasicBlock* block;
+
+ for (block = fgFirstBB; block; block = block->bbNext)
+ {
+ GenTreePtr stmt;
+
+ printf("BB%02u: in = [ ", block->bbNum);
+ dispLifeSet(this, optAllFloatVars, block->bbLiveIn);
+ printf("]\n\n");
+
+ VARSET_TP VARSET_INIT(this, life, block->bbLiveIn);
+ for (stmt = block->bbTreeList; stmt; stmt = stmt->gtNext)
+ {
+ GenTreePtr tree;
+
+ noway_assert(stmt->gtOper == GT_STMT);
+
+ for (tree = stmt->gtStmt.gtStmtList; tree; tree = tree->gtNext)
+ {
+ VarSetOps::AssignNoCopy(this, life, fgUpdateLiveSet(life, tree));
+
+ dispLifeSet(this, optAllFloatVars, life);
+ printf(" ");
+ gtDispTree(tree, 0, NULL, true);
+ }
+
+ printf("\n");
+ }
+
+ printf("BB%02u: out = [ ", block->bbNum);
+ dispLifeSet(this, optAllFloatVars, block->bbLiveOut);
+ printf("]\n\n");
+ }
+}
+#endif // FEATURE_STACK_FP_X87
+/*****************************************************************************/
+#endif // DEBUG
+/*****************************************************************************/
+
+/*****************************************************************************/
+
+void Compiler::raSetRegVarOrder(
+ var_types regType, regNumber* customVarOrder, unsigned* customVarOrderSize, regMaskTP prefReg, regMaskTP avoidReg)
+{
+ unsigned normalVarOrderSize;
+ const regNumber* normalVarOrder = raGetRegVarOrder(regType, &normalVarOrderSize);
+ unsigned index;
+ unsigned listIndex = 0;
+ regMaskTP usedReg = avoidReg;
+
+ noway_assert(*customVarOrderSize >= normalVarOrderSize);
+
+ if (prefReg)
+ {
+ /* First place the preferred registers at the start of customVarOrder */
+
+ regMaskTP regBit;
+ regNumber regNum;
+
+ for (index = 0; index < normalVarOrderSize; index++)
+ {
+ regNum = normalVarOrder[index];
+ regBit = genRegMask(regNum);
+
+ if (usedReg & regBit)
+ continue;
+
+ if (prefReg & regBit)
+ {
+ usedReg |= regBit;
+ noway_assert(listIndex < normalVarOrderSize);
+ customVarOrder[listIndex++] = regNum;
+ prefReg -= regBit;
+ if (prefReg == 0)
+ break;
+ }
+ }
+
+#if CPU_HAS_BYTE_REGS
+ /* Then if byteable registers are preferred place them */
+
+ if (prefReg & RBM_BYTE_REG_FLAG)
+ {
+ for (index = 0; index < normalVarOrderSize; index++)
+ {
+ regNum = normalVarOrder[index];
+ regBit = genRegMask(regNum);
+
+ if (usedReg & regBit)
+ continue;
+
+ if (RBM_BYTE_REGS & regBit)
+ {
+ usedReg |= regBit;
+ noway_assert(listIndex < normalVarOrderSize);
+ customVarOrder[listIndex++] = regNum;
+ }
+ }
+ }
+
+#endif // CPU_HAS_BYTE_REGS
+ }
+
+ /* Now place all the non-preferred registers */
+
+ for (index = 0; index < normalVarOrderSize; index++)
+ {
+ regNumber regNum = normalVarOrder[index];
+ regMaskTP regBit = genRegMask(regNum);
+
+ if (usedReg & regBit)
+ continue;
+
+ usedReg |= regBit;
+ noway_assert(listIndex < normalVarOrderSize);
+ customVarOrder[listIndex++] = regNum;
+ }
+
+ if (avoidReg)
+ {
+ /* Now place the "avoid" registers */
+
+ for (index = 0; index < normalVarOrderSize; index++)
+ {
+ regNumber regNum = normalVarOrder[index];
+ regMaskTP regBit = genRegMask(regNum);
+
+ if (avoidReg & regBit)
+ {
+ noway_assert(listIndex < normalVarOrderSize);
+ customVarOrder[listIndex++] = regNum;
+ avoidReg -= regBit;
+ if (avoidReg == 0)
+ break;
+ }
+ }
+ }
+
+ *customVarOrderSize = listIndex;
+ noway_assert(listIndex == normalVarOrderSize);
+}
+
+/*****************************************************************************
+ *
+ * Setup the raAvoidArgRegMask and rsCalleeRegArgMaskLiveIn
+ */
+
+void Compiler::raSetupArgMasks(RegState* regState)
+{
+ /* Determine the registers holding incoming register arguments */
+ /* and setup raAvoidArgRegMask to the set of registers that we */
+ /* may want to avoid when enregistering the locals. */
+
+ regState->rsCalleeRegArgMaskLiveIn = RBM_NONE;
+ raAvoidArgRegMask = RBM_NONE;
+
+ LclVarDsc* argsEnd = lvaTable + info.compArgsCount;
+
+ for (LclVarDsc* argDsc = lvaTable; argDsc < argsEnd; argDsc++)
+ {
+ noway_assert(argDsc->lvIsParam);
+
+ // Is it a register argument ?
+ if (!argDsc->lvIsRegArg)
+ continue;
+
+ // only process args that apply to the current register file
+ if ((argDsc->IsFloatRegType() && !info.compIsVarArgs && !opts.compUseSoftFP) != regState->rsIsFloat)
+ {
+ continue;
+ }
+
+ // Is it dead on entry ??
+ // In certain cases such as when compJmpOpUsed is true,
+ // or when we have a generic type context arg that we must report
+ // then the arguments have to be kept alive throughout the prolog.
+ // So we have to consider it as live on entry.
+ //
+ bool keepArgAlive = compJmpOpUsed;
+ if ((unsigned(info.compTypeCtxtArg) != BAD_VAR_NUM) && lvaReportParamTypeArg() &&
+ ((lvaTable + info.compTypeCtxtArg) == argDsc))
+ {
+ keepArgAlive = true;
+ }
+
+ if (!keepArgAlive && argDsc->lvTracked && !VarSetOps::IsMember(this, fgFirstBB->bbLiveIn, argDsc->lvVarIndex))
+ {
+ continue;
+ }
+
+ // The code to set the regState for each arg is outlined for shared use
+ // by linear scan
+ regNumber inArgReg = raUpdateRegStateForArg(regState, argDsc);
+
+ // Do we need to try to avoid this incoming arg registers?
+
+ // If it's not tracked, don't do the stuff below.
+ if (!argDsc->lvTracked)
+ continue;
+
+ // If the incoming arg is used after a call it is live accross
+ // a call and will have to be allocated to a caller saved
+ // register anyway (a very common case).
+ //
+ // In this case it is pointless to ask that the higher ref count
+ // locals to avoid using the incoming arg register
+
+ unsigned argVarIndex = argDsc->lvVarIndex;
+
+ /* Does the incoming register and the arg variable interfere? */
+
+ if (!VarSetOps::IsMember(this, raLclRegIntf[inArgReg], argVarIndex))
+ {
+ // No they do not interfere,
+ // so we add inArgReg to raAvoidArgRegMask
+
+ raAvoidArgRegMask |= genRegMask(inArgReg);
+ }
+#ifdef _TARGET_ARM_
+ if (argDsc->lvType == TYP_DOUBLE)
+ {
+ // Avoid the double register argument pair for register allocation.
+ if (!VarSetOps::IsMember(this, raLclRegIntf[inArgReg + 1], argVarIndex))
+ {
+ raAvoidArgRegMask |= genRegMask(static_cast<regNumber>(inArgReg + 1));
+ }
+ }
+#endif
+ }
+}
+
+#endif // LEGACY_BACKEND
+
+// The code to set the regState for each arg is outlined for shared use
+// by linear scan. (It is not shared for System V AMD64 platform.)
+regNumber Compiler::raUpdateRegStateForArg(RegState* regState, LclVarDsc* argDsc)
+{
+ regNumber inArgReg = argDsc->lvArgReg;
+ regMaskTP inArgMask = genRegMask(inArgReg);
+
+ if (regState->rsIsFloat)
+ {
+ noway_assert(inArgMask & RBM_FLTARG_REGS);
+ }
+ else // regState is for the integer registers
+ {
+ // This might be the fixed return buffer register argument (on ARM64)
+ // We check and allow inArgReg to be theFixedRetBuffReg
+ if (hasFixedRetBuffReg() && (inArgReg == theFixedRetBuffReg()))
+ {
+ // We should have a TYP_BYREF or TYP_I_IMPL arg and not a TYP_STRUCT arg
+ noway_assert(argDsc->lvType == TYP_BYREF || argDsc->lvType == TYP_I_IMPL);
+ // We should have recorded the variable number for the return buffer arg
+ noway_assert(info.compRetBuffArg != BAD_VAR_NUM);
+ }
+ else // we have a regular arg
+ {
+ noway_assert(inArgMask & RBM_ARG_REGS);
+ }
+ }
+
+ regState->rsCalleeRegArgMaskLiveIn |= inArgMask;
+
+#ifdef _TARGET_ARM_
+ if (argDsc->lvType == TYP_DOUBLE)
+ {
+ if (info.compIsVarArgs || opts.compUseSoftFP)
+ {
+ assert((inArgReg == REG_R0) || (inArgReg == REG_R2));
+ assert(!regState->rsIsFloat);
+ }
+ else
+ {
+ assert(regState->rsIsFloat);
+ assert(emitter::isDoubleReg(inArgReg));
+ }
+ regState->rsCalleeRegArgMaskLiveIn |= genRegMask((regNumber)(inArgReg + 1));
+ }
+ else if (argDsc->lvType == TYP_LONG)
+ {
+ assert((inArgReg == REG_R0) || (inArgReg == REG_R2));
+ assert(!regState->rsIsFloat);
+ regState->rsCalleeRegArgMaskLiveIn |= genRegMask((regNumber)(inArgReg + 1));
+ }
+#endif // _TARGET_ARM_
+
+#if FEATURE_MULTIREG_ARGS
+ if (argDsc->lvType == TYP_STRUCT)
+ {
+ if (argDsc->lvIsHfaRegArg())
+ {
+ assert(regState->rsIsFloat);
+ unsigned cSlots = GetHfaCount(argDsc->lvVerTypeInfo.GetClassHandleForValueClass());
+ for (unsigned i = 1; i < cSlots; i++)
+ {
+ assert(inArgReg + i <= LAST_FP_ARGREG);
+ regState->rsCalleeRegArgMaskLiveIn |= genRegMask(static_cast<regNumber>(inArgReg + i));
+ }
+ }
+ else
+ {
+ unsigned cSlots = argDsc->lvSize() / TARGET_POINTER_SIZE;
+ for (unsigned i = 1; i < cSlots; i++)
+ {
+ regNumber nextArgReg = (regNumber)(inArgReg + i);
+ if (nextArgReg > REG_ARG_LAST)
+ {
+ break;
+ }
+ assert(regState->rsIsFloat == false);
+ regState->rsCalleeRegArgMaskLiveIn |= genRegMask(nextArgReg);
+ }
+ }
+ }
+#endif // FEATURE_MULTIREG_ARGS
+
+ return inArgReg;
+}
+
+#ifdef LEGACY_BACKEND // We don't use any of the old register allocator functions when LSRA is used instead.
+
+/*****************************************************************************
+ *
+ * Assign variables to live in registers, etc.
+ */
+
+void Compiler::raAssignVars()
+{
+#ifdef DEBUG
+ if (verbose)
+ printf("*************** In raAssignVars()\n");
+#endif
+ /* We need to keep track of which registers we ever touch */
+
+ codeGen->regSet.rsClearRegsModified();
+
+#if FEATURE_STACK_FP_X87
+ // FP register allocation
+ raEnregisterVarsStackFP();
+ raGenerateFPRefCounts();
+#endif
+
+ /* Predict registers used by code generation */
+ rpPredictRegUse(); // New reg predictor/allocator
+
+ // Change all unused promoted non-argument struct locals to a non-GC type (in this case TYP_INT)
+ // so that the gc tracking logic and lvMustInit logic will ignore them.
+
+ unsigned lclNum;
+ LclVarDsc* varDsc;
+
+ for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
+ {
+ if (varDsc->lvType != TYP_STRUCT)
+ continue;
+
+ if (!varDsc->lvPromoted)
+ continue;
+
+ if (varDsc->lvIsParam)
+ continue;
+
+ if (varDsc->lvRefCnt > 0)
+ continue;
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Mark unused struct local V%02u\n", lclNum);
+ }
+
+ lvaPromotionType promotionType = lvaGetPromotionType(varDsc);
+
+ if (promotionType == PROMOTION_TYPE_DEPENDENT)
+ {
+ // This should only happen when all its field locals are unused as well.
+
+ for (unsigned varNum = varDsc->lvFieldLclStart; varNum < varDsc->lvFieldLclStart + varDsc->lvFieldCnt;
+ varNum++)
+ {
+ noway_assert(lvaTable[varNum].lvRefCnt == 0);
+ }
+ }
+ else
+ {
+ noway_assert(promotionType == PROMOTION_TYPE_INDEPENDENT);
+ }
+
+ varDsc->lvUnusedStruct = 1;
+#endif
+
+ // Change such struct locals to ints
+
+ varDsc->lvType = TYP_INT; // Bash to a non-gc type.
+ noway_assert(!varDsc->lvTracked);
+ noway_assert(!varDsc->lvRegister);
+ varDsc->lvOnFrame = false; // Force it not to be onstack.
+ varDsc->lvMustInit = false; // Force not to init it.
+ varDsc->lvStkOffs = 0; // Set it to anything other than BAD_STK_OFFS to make genSetScopeInfo() happy
+ }
+}
+
+/*****************************************************************************/
+/*****************************************************************************/
+
+/*****************************************************************************
+ *
+ * Given a regNumber return the correct predictReg enum value
+ */
+
+inline static rpPredictReg rpGetPredictForReg(regNumber reg)
+{
+ return (rpPredictReg)(((int)reg) + ((int)PREDICT_REG_FIRST));
+}
+
+/*****************************************************************************
+ *
+ * Given a varIndex return the correct predictReg enum value
+ */
+
+inline static rpPredictReg rpGetPredictForVarIndex(unsigned varIndex)
+{
+ return (rpPredictReg)(varIndex + ((int)PREDICT_REG_VAR_T00));
+}
+
+/*****************************************************************************
+ *
+ * Given a rpPredictReg return the correct varNumber value
+ */
+
+inline static unsigned rpGetVarIndexForPredict(rpPredictReg predict)
+{
+ return (unsigned)predict - (unsigned)PREDICT_REG_VAR_T00;
+}
+
+/*****************************************************************************
+ *
+ * Given a rpPredictReg return true if it specifies a Txx register
+ */
+
+inline static bool rpHasVarIndexForPredict(rpPredictReg predict)
+{
+ if ((predict >= PREDICT_REG_VAR_T00) && (predict <= PREDICT_REG_VAR_MAX))
+ return true;
+ else
+ return false;
+}
+
+/*****************************************************************************
+ *
+ * Given a regmask return the correct predictReg enum value
+ */
+
+static rpPredictReg rpGetPredictForMask(regMaskTP regmask)
+{
+ rpPredictReg result = PREDICT_NONE;
+ if (regmask != 0) /* Check if regmask has zero bits set */
+ {
+ if (((regmask - 1) & regmask) == 0) /* Check if regmask has one bit set */
+ {
+ DWORD reg = 0;
+ assert(FitsIn<DWORD>(regmask));
+ BitScanForward(&reg, (DWORD)regmask);
+ return rpGetPredictForReg((regNumber)reg);
+ }
+
+#if defined(_TARGET_ARM_)
+ /* It has multiple bits set */
+ else if (regmask == (RBM_R0 | RBM_R1))
+ {
+ result = PREDICT_PAIR_R0R1;
+ }
+ else if (regmask == (RBM_R2 | RBM_R3))
+ {
+ result = PREDICT_PAIR_R2R3;
+ }
+#elif defined(_TARGET_X86_)
+ /* It has multiple bits set */
+ else if (regmask == (RBM_EAX | RBM_EDX))
+ {
+ result = PREDICT_PAIR_EAXEDX;
+ }
+ else if (regmask == (RBM_ECX | RBM_EBX))
+ {
+ result = PREDICT_PAIR_ECXEBX;
+ }
+#endif
+ else /* It doesn't match anything */
+ {
+ result = PREDICT_NONE;
+ assert(!"unreachable");
+ NO_WAY("bad regpair");
+ }
+ }
+ return result;
+}
+
+/*****************************************************************************
+ *
+ * Record a variable to register(s) interference
+ */
+
+bool Compiler::rpRecordRegIntf(regMaskTP regMask, VARSET_VALARG_TP life DEBUGARG(const char* msg))
+
+{
+ bool addedIntf = false;
+
+ if (regMask != 0)
+ {
+ for (regNumber regNum = REG_FIRST; regNum < REG_COUNT; regNum = REG_NEXT(regNum))
+ {
+ regMaskTP regBit = genRegMask(regNum);
+
+ if (regMask & regBit)
+ {
+ VARSET_TP VARSET_INIT_NOCOPY(newIntf, VarSetOps::Diff(this, life, raLclRegIntf[regNum]));
+ if (!VarSetOps::IsEmpty(this, newIntf))
+ {
+#ifdef DEBUG
+ if (verbose)
+ {
+ VARSET_ITER_INIT(this, newIntfIter, newIntf, varNum);
+ while (newIntfIter.NextElem(this, &varNum))
+ {
+ unsigned lclNum = lvaTrackedToVarNum[varNum];
+ LclVarDsc* varDsc = &lvaTable[varNum];
+#if FEATURE_FP_REGALLOC
+ // Only print the useful interferences
+ // i.e. floating point LclVar interference with floating point registers
+ // or integer LclVar interference with general purpose registers
+ if (varTypeIsFloating(varDsc->TypeGet()) == genIsValidFloatReg(regNum))
+#endif
+ {
+ printf("Record interference between V%02u,T%02u and %s -- %s\n", lclNum, varNum,
+ getRegName(regNum), msg);
+ }
+ }
+ }
+#endif
+ addedIntf = true;
+ VarSetOps::UnionD(this, raLclRegIntf[regNum], newIntf);
+ }
+
+ regMask -= regBit;
+ if (regMask == 0)
+ break;
+ }
+ }
+ }
+ return addedIntf;
+}
+
+/*****************************************************************************
+ *
+ * Record a new variable to variable(s) interference
+ */
+
+bool Compiler::rpRecordVarIntf(unsigned varNum, VARSET_VALARG_TP intfVar DEBUGARG(const char* msg))
+{
+ noway_assert((varNum >= 0) && (varNum < lvaTrackedCount));
+ noway_assert(!VarSetOps::IsEmpty(this, intfVar));
+
+ VARSET_TP VARSET_INIT_NOCOPY(oneVar, VarSetOps::MakeEmpty(this));
+ VarSetOps::AddElemD(this, oneVar, varNum);
+
+ bool newIntf = fgMarkIntf(intfVar, oneVar);
+
+ if (newIntf)
+ rpAddedVarIntf = true;
+
+#ifdef DEBUG
+ if (verbose && newIntf)
+ {
+ for (unsigned oneNum = 0; oneNum < lvaTrackedCount; oneNum++)
+ {
+ if (VarSetOps::IsMember(this, intfVar, oneNum))
+ {
+ unsigned lclNum = lvaTrackedToVarNum[varNum];
+ unsigned lclOne = lvaTrackedToVarNum[oneNum];
+ printf("Record interference between V%02u,T%02u and V%02u,T%02u -- %s\n", lclNum, varNum, lclOne,
+ oneNum, msg);
+ }
+ }
+ }
+#endif
+
+ return newIntf;
+}
+
+/*****************************************************************************
+ *
+ * Determine preferred register mask for a given predictReg value
+ */
+
+inline regMaskTP Compiler::rpPredictRegMask(rpPredictReg predictReg, var_types type)
+{
+ if (rpHasVarIndexForPredict(predictReg))
+ predictReg = PREDICT_REG;
+
+ noway_assert((unsigned)predictReg < sizeof(rpPredictMap) / sizeof(rpPredictMap[0]));
+ noway_assert(rpPredictMap[predictReg] != RBM_ILLEGAL);
+
+ regMaskTP regAvailForType = rpPredictMap[predictReg];
+ if (varTypeIsFloating(type))
+ {
+ regAvailForType &= RBM_ALLFLOAT;
+ }
+ else
+ {
+ regAvailForType &= RBM_ALLINT;
+ }
+#ifdef _TARGET_ARM_
+ if (type == TYP_DOUBLE)
+ {
+ if ((predictReg >= PREDICT_REG_F0) && (predictReg <= PREDICT_REG_F31))
+ {
+ // Fix 388433 ARM JitStress WP7
+ if ((regAvailForType & RBM_DBL_REGS) != 0)
+ {
+ regAvailForType |= (regAvailForType << 1);
+ }
+ else
+ {
+ regAvailForType = RBM_NONE;
+ }
+ }
+ }
+#endif
+ return regAvailForType;
+}
+
+/*****************************************************************************
+ *
+ * Predict register choice for a type.
+ *
+ * Adds the predicted registers to rsModifiedRegsMask.
+ */
+regMaskTP Compiler::rpPredictRegPick(var_types type, rpPredictReg predictReg, regMaskTP lockedRegs)
+{
+ regMaskTP preferReg = rpPredictRegMask(predictReg, type);
+ regNumber regNum;
+ regMaskTP regBits;
+
+ // Add any reserved register to the lockedRegs
+ lockedRegs |= codeGen->regSet.rsMaskResvd;
+
+ /* Clear out the lockedRegs from preferReg */
+ preferReg &= ~lockedRegs;
+
+ if (rpAsgVarNum != -1)
+ {
+ noway_assert((rpAsgVarNum >= 0) && (rpAsgVarNum < (int)lclMAX_TRACKED));
+
+ /* Don't pick the register used by rpAsgVarNum either */
+ LclVarDsc* tgtVar = lvaTable + lvaTrackedToVarNum[rpAsgVarNum];
+ noway_assert(tgtVar->lvRegNum != REG_STK);
+
+ preferReg &= ~genRegMask(tgtVar->lvRegNum);
+ }
+
+ switch (type)
+ {
+ case TYP_BOOL:
+ case TYP_BYTE:
+ case TYP_UBYTE:
+ case TYP_SHORT:
+ case TYP_CHAR:
+ case TYP_INT:
+ case TYP_UINT:
+ case TYP_REF:
+ case TYP_BYREF:
+#ifdef _TARGET_AMD64_
+ case TYP_LONG:
+#endif // _TARGET_AMD64_
+
+ // expand preferReg to all non-locked registers if no bits set
+ preferReg = codeGen->regSet.rsUseIfZero(preferReg & RBM_ALLINT, RBM_ALLINT & ~lockedRegs);
+
+ if (preferReg == 0) // no bits set?
+ {
+ // Add one predefined spill choice register if no bits set.
+ // (The jit will introduce one spill temp)
+ preferReg |= RBM_SPILL_CHOICE;
+ rpPredictSpillCnt++;
+
+#ifdef DEBUG
+ if (verbose)
+ printf("Predict one spill temp\n");
+#endif
+ }
+
+ if (preferReg != 0)
+ {
+ /* Iterate the registers in the order specified by rpRegTmpOrder */
+
+ for (unsigned index = 0; index < REG_TMP_ORDER_COUNT; index++)
+ {
+ regNum = rpRegTmpOrder[index];
+ regBits = genRegMask(regNum);
+
+ if ((preferReg & regBits) == regBits)
+ {
+ goto RET;
+ }
+ }
+ }
+ /* Otherwise we have allocated all registers, so do nothing */
+ break;
+
+#ifndef _TARGET_AMD64_
+ case TYP_LONG:
+
+ if ((preferReg == 0) || // no bits set?
+ ((preferReg & (preferReg - 1)) == 0)) // or only one bit set?
+ {
+ // expand preferReg to all non-locked registers
+ preferReg = RBM_ALLINT & ~lockedRegs;
+ }
+
+ if (preferReg == 0) // no bits set?
+ {
+ // Add EAX:EDX to the registers
+ // (The jit will introduce two spill temps)
+ preferReg = RBM_PAIR_TMP;
+ rpPredictSpillCnt += 2;
+#ifdef DEBUG
+ if (verbose)
+ printf("Predict two spill temps\n");
+#endif
+ }
+ else if ((preferReg & (preferReg - 1)) == 0) // only one bit set?
+ {
+ if ((preferReg & RBM_PAIR_TMP_LO) == 0)
+ {
+ // Add EAX to the registers
+ // (The jit will introduce one spill temp)
+ preferReg |= RBM_PAIR_TMP_LO;
+ }
+ else
+ {
+ // Add EDX to the registers
+ // (The jit will introduce one spill temp)
+ preferReg |= RBM_PAIR_TMP_HI;
+ }
+ rpPredictSpillCnt++;
+#ifdef DEBUG
+ if (verbose)
+ printf("Predict one spill temp\n");
+#endif
+ }
+
+ regPairNo regPair;
+ regPair = codeGen->regSet.rsFindRegPairNo(preferReg);
+ if (regPair != REG_PAIR_NONE)
+ {
+ regBits = genRegPairMask(regPair);
+ goto RET;
+ }
+
+ /* Otherwise we have allocated all registers, so do nothing */
+ break;
+#endif // _TARGET_AMD64_
+
+#ifdef _TARGET_ARM_
+ case TYP_STRUCT:
+#endif
+
+ case TYP_FLOAT:
+ case TYP_DOUBLE:
+
+#if FEATURE_FP_REGALLOC
+ regMaskTP restrictMask;
+ restrictMask = (raConfigRestrictMaskFP() | RBM_FLT_CALLEE_TRASH);
+ assert((restrictMask & RBM_SPILL_CHOICE_FLT) == RBM_SPILL_CHOICE_FLT);
+
+ // expand preferReg to all available non-locked registers if no bits set
+ preferReg = codeGen->regSet.rsUseIfZero(preferReg & restrictMask, restrictMask & ~lockedRegs);
+ regMaskTP preferDouble;
+ preferDouble = preferReg & (preferReg >> 1);
+
+ if ((preferReg == 0) // no bits set?
+#ifdef _TARGET_ARM_
+ || ((type == TYP_DOUBLE) &&
+ ((preferReg & (preferReg >> 1)) == 0)) // or two consecutive bits set for TYP_DOUBLE
+#endif
+ )
+ {
+ // Add one predefined spill choice register if no bits set.
+ // (The jit will introduce one spill temp)
+ preferReg |= RBM_SPILL_CHOICE_FLT;
+ rpPredictSpillCnt++;
+
+#ifdef DEBUG
+ if (verbose)
+ printf("Predict one spill temp (float)\n");
+#endif
+ }
+
+ assert(preferReg != 0);
+
+ /* Iterate the registers in the order specified by raRegFltTmpOrder */
+
+ for (unsigned index = 0; index < REG_FLT_TMP_ORDER_COUNT; index++)
+ {
+ regNum = raRegFltTmpOrder[index];
+ regBits = genRegMask(regNum);
+
+ if (varTypeIsFloating(type))
+ {
+#ifdef _TARGET_ARM_
+ if (type == TYP_DOUBLE)
+ {
+ if ((regBits & RBM_DBL_REGS) == 0)
+ {
+ continue; // We must restrict the set to the double registers
+ }
+ else
+ {
+ // TYP_DOUBLE use two consecutive registers
+ regBits |= genRegMask(REG_NEXT(regNum));
+ }
+ }
+#endif
+ // See if COMPlus_JitRegisterFP is restricting this FP register
+ //
+ if ((restrictMask & regBits) != regBits)
+ continue;
+ }
+
+ if ((preferReg & regBits) == regBits)
+ {
+ goto RET;
+ }
+ }
+ /* Otherwise we have allocated all registers, so do nothing */
+ break;
+
+#else // !FEATURE_FP_REGALLOC
+
+ return RBM_NONE;
+
+#endif
+
+ default:
+ noway_assert(!"unexpected type in reg use prediction");
+ }
+
+ /* Abnormal return */
+ noway_assert(!"Ran out of registers in rpPredictRegPick");
+ return RBM_NONE;
+
+RET:
+ /*
+ * If during the first prediction we need to allocate
+ * one of the registers that we used for coloring locals
+ * then flag this by setting rpPredictAssignAgain.
+ * We will have to go back and repredict the registers
+ */
+ if ((rpPasses == 0) && ((rpPredictAssignMask & regBits) == regBits))
+ rpPredictAssignAgain = true;
+
+ // Add a register interference to each of the last use variables
+ if (!VarSetOps::IsEmpty(this, rpLastUseVars) || !VarSetOps::IsEmpty(this, rpUseInPlace))
+ {
+ VARSET_TP VARSET_INIT_NOCOPY(lastUse, VarSetOps::MakeEmpty(this));
+ VarSetOps::Assign(this, lastUse, rpLastUseVars);
+ VARSET_TP VARSET_INIT_NOCOPY(inPlaceUse, VarSetOps::MakeEmpty(this));
+ VarSetOps::Assign(this, inPlaceUse, rpUseInPlace);
+ // While we still have any lastUse or inPlaceUse bits
+ VARSET_TP VARSET_INIT_NOCOPY(useUnion, VarSetOps::Union(this, lastUse, inPlaceUse));
+
+ VARSET_TP VARSET_INIT_NOCOPY(varAsSet, VarSetOps::MakeEmpty(this));
+ VARSET_ITER_INIT(this, iter, useUnion, varNum);
+ while (iter.NextElem(this, &varNum))
+ {
+ // We'll need this for one of the calls...
+ VarSetOps::ClearD(this, varAsSet);
+ VarSetOps::AddElemD(this, varAsSet, varNum);
+
+ // If this varBit and lastUse?
+ if (VarSetOps::IsMember(this, lastUse, varNum))
+ {
+ // Record a register to variable interference
+ rpRecordRegIntf(regBits, varAsSet DEBUGARG("last use RegPick"));
+ }
+
+ // If this varBit and inPlaceUse?
+ if (VarSetOps::IsMember(this, inPlaceUse, varNum))
+ {
+ // Record a register to variable interference
+ rpRecordRegIntf(regBits, varAsSet DEBUGARG("used in place RegPick"));
+ }
+ }
+ }
+ codeGen->regSet.rsSetRegsModified(regBits);
+
+ return regBits;
+}
+
+/*****************************************************************************
+ *
+ * Predict integer register use for generating an address mode for a tree,
+ * by setting tree->gtUsedRegs to all registers used by this tree and its
+ * children.
+ * tree - is the child of a GT_IND node
+ * type - the type of the GT_IND node (floating point/integer)
+ * lockedRegs - are the registers which are currently held by
+ * a previously evaluated node.
+ * rsvdRegs - registers which should not be allocated because they will
+ * be needed to evaluate a node in the future
+ * - Also if rsvdRegs has the RBM_LASTUSE bit set then
+ * the rpLastUseVars set should be saved and restored
+ * so that we don't add any new variables to rpLastUseVars
+ * lenCSE - is non-NULL only when we have a lenCSE expression
+ *
+ * Return the scratch registers to be held by this tree. (one or two registers
+ * to form an address expression)
+ */
+
+regMaskTP Compiler::rpPredictAddressMode(
+ GenTreePtr tree, var_types type, regMaskTP lockedRegs, regMaskTP rsvdRegs, GenTreePtr lenCSE)
+{
+ GenTreePtr op1;
+ GenTreePtr op2;
+ GenTreePtr opTemp;
+ genTreeOps oper = tree->OperGet();
+ regMaskTP op1Mask;
+ regMaskTP op2Mask;
+ regMaskTP regMask;
+ ssize_t sh;
+ ssize_t cns = 0;
+ bool rev;
+ bool hasTwoAddConst = false;
+ bool restoreLastUseVars = false;
+ VARSET_TP VARSET_INIT_NOCOPY(oldLastUseVars, VarSetOps::MakeEmpty(this));
+
+ /* do we need to save and restore the rpLastUseVars set ? */
+ if ((rsvdRegs & RBM_LASTUSE) && (lenCSE == NULL))
+ {
+ restoreLastUseVars = true;
+ VarSetOps::Assign(this, oldLastUseVars, rpLastUseVars);
+ }
+ rsvdRegs &= ~RBM_LASTUSE;
+
+ /* if not an add, then just force it to a register */
+
+ if (oper != GT_ADD)
+ {
+ if (oper == GT_ARR_ELEM)
+ {
+ regMask = rpPredictTreeRegUse(tree, PREDICT_NONE, lockedRegs, rsvdRegs);
+ goto DONE;
+ }
+ else
+ {
+ goto NO_ADDR_EXPR;
+ }
+ }
+
+ op1 = tree->gtOp.gtOp1;
+ op2 = tree->gtOp.gtOp2;
+ rev = ((tree->gtFlags & GTF_REVERSE_OPS) != 0);
+
+ /* look for (x + y) + icon address mode */
+
+ if (op2->OperGet() == GT_CNS_INT)
+ {
+ cns = op2->gtIntCon.gtIconVal;
+
+ /* if not an add, then just force op1 into a register */
+ if (op1->OperGet() != GT_ADD)
+ goto ONE_ADDR_EXPR;
+
+ hasTwoAddConst = true;
+
+ /* Record the 'rev' flag, reverse evaluation order */
+ rev = ((op1->gtFlags & GTF_REVERSE_OPS) != 0);
+
+ op2 = op1->gtOp.gtOp2;
+ op1 = op1->gtOp.gtOp1; // Overwrite op1 last!!
+ }
+
+ /* Check for CNS_INT or LSH of CNS_INT in op2 slot */
+
+ sh = 0;
+ if (op2->OperGet() == GT_LSH)
+ {
+ if (op2->gtOp.gtOp2->OperGet() == GT_CNS_INT)
+ {
+ sh = op2->gtOp.gtOp2->gtIntCon.gtIconVal;
+ opTemp = op2->gtOp.gtOp1;
+ }
+ else
+ {
+ opTemp = NULL;
+ }
+ }
+ else
+ {
+ opTemp = op2;
+ }
+
+ if (opTemp != NULL)
+ {
+ if (opTemp->OperGet() == GT_NOP)
+ {
+ opTemp = opTemp->gtOp.gtOp1;
+ }
+
+ // Is this a const operand?
+ if (opTemp->OperGet() == GT_CNS_INT)
+ {
+ // Compute the new cns value that Codegen will end up using
+ cns += (opTemp->gtIntCon.gtIconVal << sh);
+
+ goto ONE_ADDR_EXPR;
+ }
+ }
+
+ /* Check for LSH in op1 slot */
+
+ if (op1->OperGet() != GT_LSH)
+ goto TWO_ADDR_EXPR;
+
+ opTemp = op1->gtOp.gtOp2;
+
+ if (opTemp->OperGet() != GT_CNS_INT)
+ goto TWO_ADDR_EXPR;
+
+ sh = opTemp->gtIntCon.gtIconVal;
+
+ /* Check for LSH of 0, special case */
+ if (sh == 0)
+ goto TWO_ADDR_EXPR;
+
+#if defined(_TARGET_XARCH_)
+
+ /* Check for LSH of 1 2 or 3 */
+ if (sh > 3)
+ goto TWO_ADDR_EXPR;
+
+#elif defined(_TARGET_ARM_)
+
+ /* Check for LSH of 1 to 30 */
+ if (sh > 30)
+ goto TWO_ADDR_EXPR;
+
+#else
+
+ goto TWO_ADDR_EXPR;
+
+#endif
+
+ /* Matched a leftShift by 'sh' subtree, move op1 down */
+ op1 = op1->gtOp.gtOp1;
+
+TWO_ADDR_EXPR:
+
+ /* Now we have to evaluate op1 and op2 into registers */
+
+ /* Evaluate op1 and op2 in the correct order */
+ if (rev)
+ {
+ op2Mask = rpPredictTreeRegUse(op2, PREDICT_REG, lockedRegs, rsvdRegs | op1->gtRsvdRegs);
+ op1Mask = rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs | op2Mask, rsvdRegs);
+ }
+ else
+ {
+ op1Mask = rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs, rsvdRegs | op2->gtRsvdRegs);
+ op2Mask = rpPredictTreeRegUse(op2, PREDICT_REG, lockedRegs | op1Mask, rsvdRegs);
+ }
+
+ /* If op1 and op2 must be spilled and reloaded then
+ * op1 and op2 might be reloaded into the same register
+ * This can only happen when all the registers are lockedRegs
+ */
+ if ((op1Mask == op2Mask) && (op1Mask != 0))
+ {
+ /* We'll need to grab a different register for op2 */
+ op2Mask = rpPredictRegPick(TYP_INT, PREDICT_REG, op1Mask);
+ }
+
+#ifdef _TARGET_ARM_
+ // On the ARM we need a scratch register to evaluate the shifted operand for trees that have this form
+ // [op2 + op1<<sh + cns]
+ // when op1 is an enregistered variable, thus the op1Mask is RBM_NONE
+ //
+ if (hasTwoAddConst && (sh != 0) && (op1Mask == RBM_NONE))
+ {
+ op1Mask |= rpPredictRegPick(TYP_INT, PREDICT_REG, (lockedRegs | op1Mask | op2Mask));
+ }
+
+ //
+ // On the ARM we will need at least one scratch register for trees that have this form:
+ // [op1 + op2 + cns] or [op1 + op2<<sh + cns]
+ // or for a float/double or long when we have both op1 and op2
+ // or when we have an 'cns' that is too large for the ld/st instruction
+ //
+ if (hasTwoAddConst || varTypeIsFloating(type) || (type == TYP_LONG) || !codeGen->validDispForLdSt(cns, type))
+ {
+ op2Mask |= rpPredictRegPick(TYP_INT, PREDICT_REG, (lockedRegs | op1Mask | op2Mask));
+ }
+
+ //
+ // If we create a CSE that immediately dies then we may need to add an additional register interference
+ // so we don't color the CSE into R3
+ //
+ if (!rev && (op1Mask != RBM_NONE) && (op2->OperGet() == GT_COMMA))
+ {
+ opTemp = op2->gtOp.gtOp2;
+ if (opTemp->OperGet() == GT_LCL_VAR)
+ {
+ unsigned varNum = opTemp->gtLclVar.gtLclNum;
+ LclVarDsc* varDsc = &lvaTable[varNum];
+
+ if (varDsc->lvTracked && !VarSetOps::IsMember(this, compCurLife, varDsc->lvVarIndex))
+ {
+ rpRecordRegIntf(RBM_TMP_0,
+ VarSetOps::MakeSingleton(this, varDsc->lvVarIndex) DEBUGARG("dead CSE (gt_ind)"));
+ }
+ }
+ }
+#endif
+
+ regMask = (op1Mask | op2Mask);
+ tree->gtUsedRegs = (regMaskSmall)regMask;
+ goto DONE;
+
+ONE_ADDR_EXPR:
+
+ /* now we have to evaluate op1 into a register */
+
+ op1Mask = rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs, rsvdRegs);
+ op2Mask = RBM_NONE;
+
+#ifdef _TARGET_ARM_
+ //
+ // On the ARM we will need another scratch register when we have an 'cns' that is too large for the ld/st
+ // instruction
+ //
+ if (!codeGen->validDispForLdSt(cns, type))
+ {
+ op2Mask |= rpPredictRegPick(TYP_INT, PREDICT_REG, (lockedRegs | op1Mask | op2Mask));
+ }
+#endif
+
+ regMask = (op1Mask | op2Mask);
+ tree->gtUsedRegs = (regMaskSmall)regMask;
+ goto DONE;
+
+NO_ADDR_EXPR:
+
+#if !CPU_LOAD_STORE_ARCH
+ if (oper == GT_CNS_INT)
+ {
+ /* Indirect of a constant does not require a register */
+ regMask = RBM_NONE;
+ }
+ else
+#endif
+ {
+ /* now we have to evaluate tree into a register */
+ regMask = rpPredictTreeRegUse(tree, PREDICT_REG, lockedRegs, rsvdRegs);
+ }
+
+DONE:
+ regMaskTP regUse = tree->gtUsedRegs;
+
+ if (!VarSetOps::IsEmpty(this, compCurLife))
+ {
+ // Add interference between the current set of life variables and
+ // the set of temporary registers need to evaluate the sub tree
+ if (regUse)
+ {
+ rpRecordRegIntf(regUse, compCurLife DEBUGARG("tmp use (gt_ind)"));
+ }
+ }
+
+ /* Do we need to resore the oldLastUseVars value */
+ if (restoreLastUseVars)
+ {
+ /*
+ * If we used a GT_ASG targeted register then we need to add
+ * a variable interference between any new last use variables
+ * and the GT_ASG targeted register
+ */
+ if (!VarSetOps::Equal(this, rpLastUseVars, oldLastUseVars) && rpAsgVarNum != -1)
+ {
+ rpRecordVarIntf(rpAsgVarNum,
+ VarSetOps::Diff(this, rpLastUseVars, oldLastUseVars) DEBUGARG("asgn conflict (gt_ind)"));
+ }
+ VarSetOps::Assign(this, rpLastUseVars, oldLastUseVars);
+ }
+
+ return regMask;
+}
+
+/*****************************************************************************
+ *
+ *
+ */
+
+void Compiler::rpPredictRefAssign(unsigned lclNum)
+{
+ LclVarDsc* varDsc = lvaTable + lclNum;
+
+ varDsc->lvRefAssign = 1;
+
+#if NOGC_WRITE_BARRIERS
+#ifdef DEBUG
+ if (verbose)
+ {
+ if (!VarSetOps::IsMember(this, raLclRegIntf[REG_EDX], varDsc->lvVarIndex))
+ printf("Record interference between V%02u,T%02u and REG WRITE BARRIER -- ref assign\n", lclNum,
+ varDsc->lvVarIndex);
+ }
+#endif
+
+ /* Make sure that write barrier pointer variables never land in EDX */
+ VarSetOps::AddElemD(this, raLclRegIntf[REG_EDX], varDsc->lvVarIndex);
+#endif // NOGC_WRITE_BARRIERS
+}
+
+/*****************************************************************************
+ *
+ * Predict the internal temp physical register usage for a block assignment tree,
+ * by setting tree->gtUsedRegs.
+ * Records the internal temp physical register usage for this tree.
+ * Returns a mask of interfering registers for this tree.
+ *
+ * Each of the switch labels in this function updates regMask and assigns tree->gtUsedRegs
+ * to the set of scratch registers needed when evaluating the tree.
+ * Generally tree->gtUsedRegs and the return value retMask are the same, except when the
+ * parameter "lockedRegs" conflicts with the computed tree->gtUsedRegs, in which case we
+ * predict additional internal temp physical registers to spill into.
+ *
+ * tree - is the child of a GT_IND node
+ * predictReg - what type of register does the tree need
+ * lockedRegs - are the registers which are currently held by a previously evaluated node.
+ * Don't modify lockedRegs as it is used at the end to compute a spill mask.
+ * rsvdRegs - registers which should not be allocated because they will
+ * be needed to evaluate a node in the future
+ * - Also, if rsvdRegs has the RBM_LASTUSE bit set then
+ * the rpLastUseVars set should be saved and restored
+ * so that we don't add any new variables to rpLastUseVars.
+ */
+regMaskTP Compiler::rpPredictBlkAsgRegUse(GenTreePtr tree,
+ rpPredictReg predictReg,
+ regMaskTP lockedRegs,
+ regMaskTP rsvdRegs)
+{
+ regMaskTP regMask = RBM_NONE;
+ regMaskTP interferingRegs = RBM_NONE;
+
+ bool hasGCpointer = false;
+ bool dstIsOnStack = false;
+ bool useMemHelper = false;
+ bool useBarriers = false;
+ GenTreeBlk* dst = tree->gtGetOp1()->AsBlk();
+ GenTreePtr dstAddr = dst->Addr();
+ GenTreePtr srcAddrOrFill = tree->gtGetOp2();
+
+ size_t blkSize = dst->gtBlkSize;
+
+ hasGCpointer = (dst->HasGCPtr());
+
+ bool isCopyBlk = tree->OperIsCopyBlkOp();
+ bool isCopyObj = isCopyBlk && hasGCpointer;
+ bool isInitBlk = tree->OperIsInitBlkOp();
+
+ if (isCopyBlk)
+ {
+ assert(srcAddrOrFill->OperIsIndir());
+ srcAddrOrFill = srcAddrOrFill->AsIndir()->Addr();
+ }
+ else
+ {
+ // For initBlk, we don't need to worry about the GC pointers.
+ hasGCpointer = false;
+ }
+
+ if (blkSize != 0)
+ {
+ if (isCopyObj)
+ {
+ dstIsOnStack = (dstAddr->gtOper == GT_ADDR && (dstAddr->gtFlags & GTF_ADDR_ONSTACK));
+ }
+
+ if (isInitBlk)
+ {
+ if (srcAddrOrFill->OperGet() != GT_CNS_INT)
+ {
+ useMemHelper = true;
+ }
+ }
+ }
+ else
+ {
+ useMemHelper = true;
+ }
+
+ if (hasGCpointer && !dstIsOnStack)
+ {
+ useBarriers = true;
+ }
+
+#ifdef _TARGET_ARM_
+ //
+ // On ARM For COPYBLK & INITBLK we have special treatment for constant lengths.
+ //
+ if (!useMemHelper && !useBarriers)
+ {
+ bool useLoop = false;
+ unsigned fullStoreCount = blkSize / TARGET_POINTER_SIZE;
+
+ // A mask to use to force the predictor to choose low registers (to reduce code size)
+ regMaskTP avoidReg = (RBM_R12 | RBM_LR);
+
+ // Allow the src and dst to be used in place, unless we use a loop, in which
+ // case we will need scratch registers as we will be writing to them.
+ rpPredictReg srcAndDstPredict = PREDICT_REG;
+
+ // Will we be using a loop to implement this INITBLK/COPYBLK?
+ if ((isCopyBlk && (fullStoreCount >= 8)) || (isInitBlk && (fullStoreCount >= 16)))
+ {
+ useLoop = true;
+ avoidReg = RBM_NONE;
+ srcAndDstPredict = PREDICT_SCRATCH_REG;
+ }
+
+ if (tree->gtFlags & GTF_REVERSE_OPS)
+ {
+ regMask |= rpPredictTreeRegUse(srcAddrOrFill, srcAndDstPredict, lockedRegs,
+ dstAddr->gtRsvdRegs | avoidReg | RBM_LASTUSE);
+ regMask |= rpPredictTreeRegUse(dstAddr, srcAndDstPredict, lockedRegs | regMask, avoidReg);
+ }
+ else
+ {
+ regMask |= rpPredictTreeRegUse(dstAddr, srcAndDstPredict, lockedRegs,
+ srcAddrOrFill->gtRsvdRegs | avoidReg | RBM_LASTUSE);
+ regMask |= rpPredictTreeRegUse(srcAddrOrFill, srcAndDstPredict, lockedRegs | regMask, avoidReg);
+ }
+
+ // We need at least one scratch register for a copyBlk
+ if (isCopyBlk)
+ {
+ // Pick a low register to reduce the code size
+ regMask |= rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | regMask | avoidReg);
+ }
+
+ if (useLoop)
+ {
+ if (isCopyBlk)
+ {
+ // We need a second temp register for a copyBlk (our code gen is load two/store two)
+ // Pick another low register to reduce the code size
+ regMask |= rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | regMask | avoidReg);
+ }
+
+ // We need a loop index register
+ regMask |= rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | regMask);
+ }
+
+ tree->gtUsedRegs = dstAddr->gtUsedRegs | srcAddrOrFill->gtUsedRegs | (regMaskSmall)regMask;
+
+ return interferingRegs;
+ }
+#endif
+ // What order should the Dest, Val/Src, and Size be calculated
+ GenTreePtr opsPtr[3];
+ regMaskTP regsPtr[3];
+
+#if defined(_TARGET_XARCH_)
+ fgOrderBlockOps(tree, RBM_EDI, (isInitBlk) ? RBM_EAX : RBM_ESI, RBM_ECX, opsPtr, regsPtr);
+
+ // We're going to use these, might as well make them available now
+
+ codeGen->regSet.rsSetRegsModified(RBM_EDI | RBM_ECX);
+ if (isCopyBlk)
+ codeGen->regSet.rsSetRegsModified(RBM_ESI);
+
+#elif defined(_TARGET_ARM_)
+
+ if (useMemHelper)
+ {
+ // For all other cases that involve non-constants, we just call memcpy/memset
+ // JIT helpers
+ fgOrderBlockOps(tree, RBM_ARG_0, RBM_ARG_1, RBM_ARG_2, opsPtr, regsPtr);
+ interferingRegs |= RBM_CALLEE_TRASH;
+#ifdef DEBUG
+ if (verbose)
+ printf("Adding interference with RBM_CALLEE_TRASH for memcpy/memset\n");
+#endif
+ }
+ else // useBarriers
+ {
+ assert(useBarriers);
+ assert(isCopyBlk);
+
+ fgOrderBlockOps(tree, RBM_ARG_0, RBM_ARG_1, REG_TMP_1, opsPtr, regsPtr);
+
+ // For this case Codegen will call the CORINFO_HELP_ASSIGN_BYREF helper
+ interferingRegs |= RBM_CALLEE_TRASH_NOGC;
+#ifdef DEBUG
+ if (verbose)
+ printf("Adding interference with RBM_CALLEE_TRASH_NOGC for Byref WriteBarrier\n");
+#endif
+ }
+#else // !_TARGET_X86_ && !_TARGET_ARM_
+#error "Non-ARM or x86 _TARGET_ in RegPredict for INITBLK/COPYBLK"
+#endif // !_TARGET_X86_ && !_TARGET_ARM_
+ regMaskTP opsPtr2RsvdRegs = opsPtr[2] == nullptr ? RBM_NONE : opsPtr[2]->gtRsvdRegs;
+ regMask |= rpPredictTreeRegUse(opsPtr[0], rpGetPredictForMask(regsPtr[0]), lockedRegs,
+ opsPtr[1]->gtRsvdRegs | opsPtr2RsvdRegs | RBM_LASTUSE);
+ regMask |= regsPtr[0];
+ opsPtr[0]->gtUsedRegs |= regsPtr[0];
+ rpRecordRegIntf(regsPtr[0], compCurLife DEBUGARG("movsd dest"));
+
+ regMask |= rpPredictTreeRegUse(opsPtr[1], rpGetPredictForMask(regsPtr[1]), lockedRegs | regMask,
+ opsPtr2RsvdRegs | RBM_LASTUSE);
+ regMask |= regsPtr[1];
+ opsPtr[1]->gtUsedRegs |= regsPtr[1];
+ rpRecordRegIntf(regsPtr[1], compCurLife DEBUGARG("movsd src"));
+
+ regMaskSmall opsPtr2UsedRegs = (regMaskSmall)regsPtr[2];
+ if (opsPtr[2] == nullptr)
+ {
+ // If we have no "size" node, we will predict that regsPtr[2] will be used for the size.
+ // Note that it is quite possible that no register is required, but this preserves
+ // former behavior.
+ regMask |= rpPredictRegPick(TYP_INT, rpGetPredictForMask(regsPtr[2]), lockedRegs | regMask);
+ rpRecordRegIntf(regsPtr[2], compCurLife DEBUGARG("tmp use"));
+ }
+ else
+ {
+ regMask |= rpPredictTreeRegUse(opsPtr[2], rpGetPredictForMask(regsPtr[2]), lockedRegs | regMask, RBM_NONE);
+ opsPtr[2]->gtUsedRegs |= opsPtr2UsedRegs;
+ }
+ regMask |= opsPtr2UsedRegs;
+
+ tree->gtUsedRegs = opsPtr[0]->gtUsedRegs | opsPtr[1]->gtUsedRegs | opsPtr2UsedRegs | (regMaskSmall)regMask;
+ return interferingRegs;
+}
+
+/*****************************************************************************
+ *
+ * Predict the internal temp physical register usage for a tree by setting tree->gtUsedRegs.
+ * Returns a regMask with the internal temp physical register usage for this tree.
+ *
+ * Each of the switch labels in this function updates regMask and assigns tree->gtUsedRegs
+ * to the set of scratch registers needed when evaluating the tree.
+ * Generally tree->gtUsedRegs and the return value retMask are the same, except when the
+ * parameter "lockedRegs" conflicts with the computed tree->gtUsedRegs, in which case we
+ * predict additional internal temp physical registers to spill into.
+ *
+ * tree - is the child of a GT_IND node
+ * predictReg - what type of register does the tree need
+ * lockedRegs - are the registers which are currently held by a previously evaluated node.
+ * Don't modify lockedRegs as it is used at the end to compute a spill mask.
+ * rsvdRegs - registers which should not be allocated because they will
+ * be needed to evaluate a node in the future
+ * - Also, if rsvdRegs has the RBM_LASTUSE bit set then
+ * the rpLastUseVars set should be saved and restored
+ * so that we don't add any new variables to rpLastUseVars.
+ */
+
+#pragma warning(disable : 4701)
+
+#ifdef _PREFAST_
+#pragma warning(push)
+#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
+#endif
+regMaskTP Compiler::rpPredictTreeRegUse(GenTreePtr tree,
+ rpPredictReg predictReg,
+ regMaskTP lockedRegs,
+ regMaskTP rsvdRegs)
+{
+ regMaskTP regMask = DUMMY_INIT(RBM_ILLEGAL);
+ regMaskTP op2Mask;
+ regMaskTP tmpMask;
+ rpPredictReg op1PredictReg;
+ rpPredictReg op2PredictReg;
+ LclVarDsc* varDsc = NULL;
+ VARSET_TP VARSET_INIT_NOCOPY(oldLastUseVars, VarSetOps::UninitVal());
+
+ VARSET_TP VARSET_INIT_NOCOPY(varBits, VarSetOps::UninitVal());
+ VARSET_TP VARSET_INIT_NOCOPY(lastUseVarBits, VarSetOps::MakeEmpty(this));
+
+ bool restoreLastUseVars = false;
+ regMaskTP interferingRegs = RBM_NONE;
+
+#ifdef DEBUG
+ // if (verbose) printf("rpPredictTreeRegUse() [%08x]\n", tree);
+ noway_assert(tree);
+ noway_assert(((RBM_ILLEGAL & RBM_ALLINT) == 0));
+ noway_assert(RBM_ILLEGAL);
+ noway_assert((lockedRegs & RBM_ILLEGAL) == 0);
+ /* impossible values, to make sure that we set them */
+ tree->gtUsedRegs = RBM_ILLEGAL;
+#endif
+
+ /* Figure out what kind of a node we have */
+
+ genTreeOps oper = tree->OperGet();
+ var_types type = tree->TypeGet();
+ unsigned kind = tree->OperKind();
+
+ // In the comma case, we care about whether this is "effectively" ADDR(IND(...))
+ genTreeOps effectiveOper = tree->gtEffectiveVal()->OperGet();
+ if ((predictReg == PREDICT_ADDR) && (effectiveOper != GT_IND))
+ predictReg = PREDICT_NONE;
+ else if (rpHasVarIndexForPredict(predictReg))
+ {
+ // The only place where predictReg is set to a var is in the PURE
+ // assignment case where varIndex is the var being assigned to.
+ // We need to check whether the variable is used between here and
+ // its redefinition.
+ unsigned varIndex = rpGetVarIndexForPredict(predictReg);
+ unsigned lclNum = lvaTrackedToVarNum[varIndex];
+ bool found = false;
+ for (GenTreePtr nextTree = tree->gtNext; nextTree != NULL && !found; nextTree = nextTree->gtNext)
+ {
+ if (nextTree->gtOper == GT_LCL_VAR && nextTree->gtLclVarCommon.gtLclNum == lclNum)
+ {
+ // Is this the pure assignment?
+ if ((nextTree->gtFlags & GTF_VAR_DEF) == 0)
+ {
+ predictReg = PREDICT_SCRATCH_REG;
+ }
+ found = true;
+ break;
+ }
+ }
+ assert(found);
+ }
+
+ if (rsvdRegs & RBM_LASTUSE)
+ {
+ restoreLastUseVars = true;
+ VarSetOps::Assign(this, oldLastUseVars, rpLastUseVars);
+ rsvdRegs &= ~RBM_LASTUSE;
+ }
+
+ /* Is this a constant or leaf node? */
+
+ if (kind & (GTK_CONST | GTK_LEAF))
+ {
+ bool lastUse = false;
+ regMaskTP enregMask = RBM_NONE;
+
+ switch (oper)
+ {
+#ifdef _TARGET_ARM_
+ case GT_CNS_DBL:
+ // Codegen for floating point constants on the ARM is currently
+ // movw/movt rT1, <lo32 bits>
+ // movw/movt rT2, <hi32 bits>
+ // vmov.i2d dT0, rT1,rT2
+ //
+ // For TYP_FLOAT one integer register is required
+ //
+ // These integer register(s) immediately die
+ tmpMask = rpPredictRegPick(TYP_INT, PREDICT_REG, lockedRegs | rsvdRegs);
+ if (type == TYP_DOUBLE)
+ {
+ // For TYP_DOUBLE a second integer register is required
+ //
+ tmpMask |= rpPredictRegPick(TYP_INT, PREDICT_REG, lockedRegs | rsvdRegs | tmpMask);
+ }
+
+ // We also need a floating point register that we keep
+ //
+ if (predictReg == PREDICT_NONE)
+ predictReg = PREDICT_SCRATCH_REG;
+
+ regMask = rpPredictRegPick(type, predictReg, lockedRegs | rsvdRegs);
+ tree->gtUsedRegs = regMask | tmpMask;
+ goto RETURN_CHECK;
+#endif
+
+ case GT_CNS_INT:
+ case GT_CNS_LNG:
+
+ if (rpHasVarIndexForPredict(predictReg))
+ {
+ unsigned tgtIndex = rpGetVarIndexForPredict(predictReg);
+ rpAsgVarNum = tgtIndex;
+
+ // We don't need any register as we plan on writing to the rpAsgVarNum register
+ predictReg = PREDICT_NONE;
+
+ LclVarDsc* tgtVar = lvaTable + lvaTrackedToVarNum[tgtIndex];
+ tgtVar->lvDependReg = true;
+
+ if (type == TYP_LONG)
+ {
+ assert(oper == GT_CNS_LNG);
+
+ if (tgtVar->lvOtherReg == REG_STK)
+ {
+ // Well we do need one register for a partially enregistered
+ type = TYP_INT;
+ predictReg = PREDICT_SCRATCH_REG;
+ }
+ }
+ }
+ else
+ {
+#if !CPU_LOAD_STORE_ARCH
+ /* If the constant is a handle then it will need to have a relocation
+ applied to it. It will need to be loaded into a register.
+ But never throw away an existing hint.
+ */
+ if (opts.compReloc && tree->IsCnsIntOrI() && tree->IsIconHandle())
+#endif
+ {
+ if (predictReg == PREDICT_NONE)
+ predictReg = PREDICT_SCRATCH_REG;
+ }
+ }
+ break;
+
+ case GT_NO_OP:
+ break;
+
+ case GT_CLS_VAR:
+ if ((predictReg == PREDICT_NONE) && (genActualType(type) == TYP_INT) &&
+ (genTypeSize(type) < sizeof(int)))
+ {
+ predictReg = PREDICT_SCRATCH_REG;
+ }
+#ifdef _TARGET_ARM_
+ // Unaligned loads/stores for floating point values must first be loaded into integer register(s)
+ //
+ if ((tree->gtFlags & GTF_IND_UNALIGNED) && varTypeIsFloating(type))
+ {
+ // These integer register(s) immediately die
+ tmpMask = rpPredictRegPick(TYP_INT, PREDICT_REG, lockedRegs | rsvdRegs);
+ // Two integer registers are required for a TYP_DOUBLE
+ if (type == TYP_DOUBLE)
+ tmpMask |= rpPredictRegPick(TYP_INT, PREDICT_REG, lockedRegs | rsvdRegs | tmpMask);
+ }
+ // We need a temp register in some cases of loads/stores to a class var
+ if (predictReg == PREDICT_NONE)
+ {
+ predictReg = PREDICT_SCRATCH_REG;
+ }
+#endif
+ if (rpHasVarIndexForPredict(predictReg))
+ {
+ unsigned tgtIndex = rpGetVarIndexForPredict(predictReg);
+ rpAsgVarNum = tgtIndex;
+
+ // We don't need any register as we plan on writing to the rpAsgVarNum register
+ predictReg = PREDICT_NONE;
+
+ LclVarDsc* tgtVar = lvaTable + lvaTrackedToVarNum[tgtIndex];
+ tgtVar->lvDependReg = true;
+
+ if (type == TYP_LONG)
+ {
+ if (tgtVar->lvOtherReg == REG_STK)
+ {
+ // Well we do need one register for a partially enregistered
+ type = TYP_INT;
+ predictReg = PREDICT_SCRATCH_REG;
+ }
+ }
+ }
+ break;
+
+ case GT_LCL_FLD:
+#ifdef _TARGET_ARM_
+ // Check for a misalignment on a Floating Point field
+ //
+ if (varTypeIsFloating(type))
+ {
+ if ((tree->gtLclFld.gtLclOffs % emitTypeSize(tree->TypeGet())) != 0)
+ {
+ // These integer register(s) immediately die
+ tmpMask = rpPredictRegPick(TYP_INT, PREDICT_REG, lockedRegs | rsvdRegs);
+ // Two integer registers are required for a TYP_DOUBLE
+ if (type == TYP_DOUBLE)
+ tmpMask |= rpPredictRegPick(TYP_INT, PREDICT_REG, lockedRegs | rsvdRegs | tmpMask);
+ }
+ }
+#endif
+ __fallthrough;
+
+ case GT_LCL_VAR:
+ case GT_REG_VAR:
+
+ varDsc = lvaTable + tree->gtLclVarCommon.gtLclNum;
+
+ VarSetOps::Assign(this, varBits, fgGetVarBits(tree));
+ compUpdateLifeVar</*ForCodeGen*/ false>(tree, &lastUseVarBits);
+ lastUse = !VarSetOps::IsEmpty(this, lastUseVarBits);
+
+#if FEATURE_STACK_FP_X87
+ // If it's a floating point var, there's nothing to do
+ if (varTypeIsFloating(type))
+ {
+ tree->gtUsedRegs = RBM_NONE;
+ regMask = RBM_NONE;
+ goto RETURN_CHECK;
+ }
+#endif
+
+ // If the variable is already a register variable, no need to go further.
+ if (oper == GT_REG_VAR)
+ break;
+
+ /* Apply the type of predictReg to the LCL_VAR */
+
+ if (predictReg == PREDICT_REG)
+ {
+ PREDICT_REG_COMMON:
+ if (varDsc->lvRegNum == REG_STK)
+ break;
+
+ goto GRAB_COUNT;
+ }
+ else if (predictReg == PREDICT_SCRATCH_REG)
+ {
+ noway_assert(predictReg == PREDICT_SCRATCH_REG);
+
+ /* Is this the last use of a local var? */
+ if (lastUse)
+ {
+ if (VarSetOps::IsEmptyIntersection(this, rpUseInPlace, lastUseVarBits))
+ goto PREDICT_REG_COMMON;
+ }
+ }
+ else if (rpHasVarIndexForPredict(predictReg))
+ {
+ /* Get the tracked local variable that has an lvVarIndex of tgtIndex1 */
+ {
+ unsigned tgtIndex1 = rpGetVarIndexForPredict(predictReg);
+ LclVarDsc* tgtVar = lvaTable + lvaTrackedToVarNum[tgtIndex1];
+ VarSetOps::MakeSingleton(this, tgtIndex1);
+
+ noway_assert(tgtVar->lvVarIndex == tgtIndex1);
+ noway_assert(tgtVar->lvRegNum != REG_STK); /* Must have been enregistered */
+#ifndef _TARGET_AMD64_
+ // On amd64 we have the occasional spec-allowed implicit conversion from TYP_I_IMPL to TYP_INT
+ // so this assert is meaningless
+ noway_assert((type != TYP_LONG) || (tgtVar->TypeGet() == TYP_LONG));
+#endif // !_TARGET_AMD64_
+
+ if (varDsc->lvTracked)
+ {
+ unsigned srcIndex;
+ srcIndex = varDsc->lvVarIndex;
+
+ // If this register has it's last use here then we will prefer
+ // to color to the same register as tgtVar.
+ if (lastUse)
+ {
+ /*
+ * Add an entry in the lvaVarPref graph to indicate
+ * that it would be worthwhile to color these two variables
+ * into the same physical register.
+ * This will help us avoid having an extra copy instruction
+ */
+ VarSetOps::AddElemD(this, lvaVarPref[srcIndex], tgtIndex1);
+ VarSetOps::AddElemD(this, lvaVarPref[tgtIndex1], srcIndex);
+ }
+
+ // Add a variable interference from srcIndex to each of the last use variables
+ if (!VarSetOps::IsEmpty(this, rpLastUseVars))
+ {
+ rpRecordVarIntf(srcIndex, rpLastUseVars DEBUGARG("src reg conflict"));
+ }
+ }
+ rpAsgVarNum = tgtIndex1;
+
+ /* We will rely on the target enregistered variable from the GT_ASG */
+ varDsc = tgtVar;
+ }
+ GRAB_COUNT:
+ unsigned grabCount;
+ grabCount = 0;
+
+ if (genIsValidFloatReg(varDsc->lvRegNum))
+ {
+ enregMask = genRegMaskFloat(varDsc->lvRegNum, varDsc->TypeGet());
+ }
+ else
+ {
+ enregMask = genRegMask(varDsc->lvRegNum);
+ }
+
+#ifdef _TARGET_ARM_
+ if ((type == TYP_DOUBLE) && (varDsc->TypeGet() == TYP_FLOAT))
+ {
+ // We need to compute the intermediate value using a TYP_DOUBLE
+ // but we storing the result in a TYP_SINGLE enregistered variable
+ //
+ grabCount++;
+ }
+ else
+#endif
+ {
+ /* We can't trust a prediction of rsvdRegs or lockedRegs sets */
+ if (enregMask & (rsvdRegs | lockedRegs))
+ {
+ grabCount++;
+ }
+#ifndef _TARGET_64BIT_
+ if (type == TYP_LONG)
+ {
+ if (varDsc->lvOtherReg != REG_STK)
+ {
+ tmpMask = genRegMask(varDsc->lvOtherReg);
+ enregMask |= tmpMask;
+
+ /* We can't trust a prediction of rsvdRegs or lockedRegs sets */
+ if (tmpMask & (rsvdRegs | lockedRegs))
+ grabCount++;
+ }
+ else // lvOtherReg == REG_STK
+ {
+ grabCount++;
+ }
+ }
+#endif // _TARGET_64BIT_
+ }
+
+ varDsc->lvDependReg = true;
+
+ if (grabCount == 0)
+ {
+ /* Does not need a register */
+ predictReg = PREDICT_NONE;
+ // noway_assert(!VarSetOps::IsEmpty(this, varBits));
+ VarSetOps::UnionD(this, rpUseInPlace, varBits);
+ }
+ else // (grabCount > 0)
+ {
+#ifndef _TARGET_64BIT_
+ /* For TYP_LONG and we only need one register then change the type to TYP_INT */
+ if ((type == TYP_LONG) && (grabCount == 1))
+ {
+ /* We will need to pick one register */
+ type = TYP_INT;
+ // noway_assert(!VarSetOps::IsEmpty(this, varBits));
+ VarSetOps::UnionD(this, rpUseInPlace, varBits);
+ }
+ noway_assert((type == TYP_DOUBLE) ||
+ (grabCount == (genTypeSize(genActualType(type)) / REGSIZE_BYTES)));
+#else // !_TARGET_64BIT_
+ noway_assert(grabCount == 1);
+#endif // !_TARGET_64BIT_
+ }
+ }
+ else if (type == TYP_STRUCT)
+ {
+#ifdef _TARGET_ARM_
+ // TODO-ARM-Bug?: Passing structs in registers on ARM hits an assert here when
+ // predictReg is PREDICT_REG_R0 to PREDICT_REG_R3
+ // As a workaround we just bash it to PREDICT_NONE here
+ //
+ if (predictReg != PREDICT_NONE)
+ predictReg = PREDICT_NONE;
+#endif
+ // Currently predictReg is saying that we will not need any scratch registers
+ noway_assert(predictReg == PREDICT_NONE);
+
+ /* We may need to sign or zero extend a small type when pushing a struct */
+ if (varDsc->lvPromoted && !varDsc->lvAddrExposed)
+ {
+ for (unsigned varNum = varDsc->lvFieldLclStart;
+ varNum < varDsc->lvFieldLclStart + varDsc->lvFieldCnt; varNum++)
+ {
+ LclVarDsc* fldVar = lvaTable + varNum;
+
+ if (fldVar->lvStackAligned())
+ {
+ // When we are stack aligned Codegen will just use
+ // a push instruction and thus doesn't need any register
+ // since we can push both a register or a stack frame location
+ continue;
+ }
+
+ if (varTypeIsByte(fldVar->TypeGet()))
+ {
+ // We will need to reserve one byteable register,
+ //
+ type = TYP_BYTE;
+ predictReg = PREDICT_SCRATCH_REG;
+#if CPU_HAS_BYTE_REGS
+ // It is best to enregister this fldVar in a byteable register
+ //
+ fldVar->addPrefReg(RBM_BYTE_REG_FLAG, this);
+#endif
+ }
+ else if (varTypeIsShort(fldVar->TypeGet()))
+ {
+ bool isEnregistered = fldVar->lvTracked && (fldVar->lvRegNum != REG_STK);
+ // If fldVar is not enregistered then we will need a scratch register
+ //
+ if (!isEnregistered)
+ {
+ // We will need either an int register or a byte register
+ // If we are not requesting a byte register we will request an int register
+ //
+ if (type != TYP_BYTE)
+ type = TYP_INT;
+ predictReg = PREDICT_SCRATCH_REG;
+ }
+ }
+ }
+ }
+ }
+ else
+ {
+ regMaskTP preferReg = rpPredictRegMask(predictReg, type);
+ if (preferReg != 0)
+ {
+ if ((genTypeStSz(type) == 1) || (genCountBits(preferReg) <= genTypeStSz(type)))
+ {
+ varDsc->addPrefReg(preferReg, this);
+ }
+ }
+ }
+ break; /* end of case GT_LCL_VAR */
+
+ case GT_JMP:
+ tree->gtUsedRegs = RBM_NONE;
+ regMask = RBM_NONE;
+
+#if defined(_TARGET_ARM_) && defined(PROFILING_SUPPORTED)
+ // Mark the registers required to emit a tailcall profiler callback
+ if (compIsProfilerHookNeeded())
+ {
+ tree->gtUsedRegs |= RBM_PROFILER_JMP_USED;
+ }
+#endif
+ goto RETURN_CHECK;
+
+ default:
+ break;
+ } /* end of switch (oper) */
+
+ /* If we don't need to evaluate to register, regmask is the empty set */
+ /* Otherwise we grab a temp for the local variable */
+
+ if (predictReg == PREDICT_NONE)
+ regMask = RBM_NONE;
+ else
+ {
+ regMask = rpPredictRegPick(type, predictReg, lockedRegs | rsvdRegs | enregMask);
+
+ if ((oper == GT_LCL_VAR) && (tree->TypeGet() == TYP_STRUCT))
+ {
+ /* We need to sign or zero extend a small type when pushing a struct */
+ noway_assert((type == TYP_INT) || (type == TYP_BYTE));
+
+ varDsc = lvaTable + tree->gtLclVarCommon.gtLclNum;
+ noway_assert(varDsc->lvPromoted && !varDsc->lvAddrExposed);
+
+ for (unsigned varNum = varDsc->lvFieldLclStart; varNum < varDsc->lvFieldLclStart + varDsc->lvFieldCnt;
+ varNum++)
+ {
+ LclVarDsc* fldVar = lvaTable + varNum;
+ if (fldVar->lvTracked)
+ {
+ VARSET_TP VARSET_INIT_NOCOPY(fldBit, VarSetOps::MakeSingleton(this, fldVar->lvVarIndex));
+ rpRecordRegIntf(regMask, fldBit DEBUGARG(
+ "need scratch register when pushing a small field of a struct"));
+ }
+ }
+ }
+ }
+
+ /* Update the set of lastUse variables that we encountered so far */
+ if (lastUse)
+ {
+ VarSetOps::UnionD(this, rpLastUseVars, lastUseVarBits);
+ VARSET_TP VARSET_INIT(this, varAsSet, lastUseVarBits);
+
+ /*
+ * Add interference from any previously locked temps into this last use variable.
+ */
+ if (lockedRegs)
+ {
+ rpRecordRegIntf(lockedRegs, varAsSet DEBUGARG("last use Predict lockedRegs"));
+ }
+ /*
+ * Add interference from any reserved temps into this last use variable.
+ */
+ if (rsvdRegs)
+ {
+ rpRecordRegIntf(rsvdRegs, varAsSet DEBUGARG("last use Predict rsvdRegs"));
+ }
+ /*
+ * For partially enregistered longs add an interference with the
+ * register return by rpPredictRegPick
+ */
+ if ((type == TYP_INT) && (tree->TypeGet() == TYP_LONG))
+ {
+ rpRecordRegIntf(regMask, varAsSet DEBUGARG("last use with partial enreg"));
+ }
+ }
+
+ tree->gtUsedRegs = (regMaskSmall)regMask;
+ goto RETURN_CHECK;
+ }
+
+ /* Is it a 'simple' unary/binary operator? */
+
+ if (kind & GTK_SMPOP)
+ {
+ GenTreePtr op1 = tree->gtOp.gtOp1;
+ GenTreePtr op2 = tree->gtGetOp2();
+
+ GenTreePtr opsPtr[3];
+ regMaskTP regsPtr[3];
+
+ VARSET_TP VARSET_INIT_NOCOPY(startAsgUseInPlaceVars, VarSetOps::UninitVal());
+
+ switch (oper)
+ {
+ case GT_ASG:
+
+ if (tree->OperIsBlkOp())
+ {
+ interferingRegs |= rpPredictBlkAsgRegUse(tree, predictReg, lockedRegs, rsvdRegs);
+ regMask = 0;
+ goto RETURN_CHECK;
+ }
+ /* Is the value being assigned into a LCL_VAR? */
+ if (op1->gtOper == GT_LCL_VAR)
+ {
+ varDsc = lvaTable + op1->gtLclVarCommon.gtLclNum;
+
+ /* Are we assigning a LCL_VAR the result of a call? */
+ if (op2->gtOper == GT_CALL)
+ {
+ /* Set a preferred register for the LCL_VAR */
+ if (isRegPairType(varDsc->TypeGet()))
+ varDsc->addPrefReg(RBM_LNGRET, this);
+ else if (!varTypeIsFloating(varDsc->TypeGet()))
+ varDsc->addPrefReg(RBM_INTRET, this);
+#ifdef _TARGET_AMD64_
+ else
+ varDsc->addPrefReg(RBM_FLOATRET, this);
+#endif
+ /*
+ * When assigning the result of a call we don't
+ * bother trying to target the right side of the
+ * assignment, since we have a fixed calling convention.
+ */
+ }
+ else if (varDsc->lvTracked)
+ {
+ // We interfere with uses in place
+ if (!VarSetOps::IsEmpty(this, rpUseInPlace))
+ {
+ rpRecordVarIntf(varDsc->lvVarIndex, rpUseInPlace DEBUGARG("Assign UseInPlace conflict"));
+ }
+
+ // Did we predict that this local will be fully enregistered?
+ // and the assignment type is the same as the expression type?
+ // and it is dead on the right side of the assignment?
+ // and we current have no other rpAsgVarNum active?
+ //
+ if ((varDsc->lvRegNum != REG_STK) && ((type != TYP_LONG) || (varDsc->lvOtherReg != REG_STK)) &&
+ (type == op2->TypeGet()) && (op1->gtFlags & GTF_VAR_DEF) && (rpAsgVarNum == -1))
+ {
+ //
+ // Yes, we should try to target the right side (op2) of this
+ // assignment into the (enregistered) tracked variable.
+ //
+
+ op1PredictReg = PREDICT_NONE; /* really PREDICT_REG, but we've already done the check */
+ op2PredictReg = rpGetPredictForVarIndex(varDsc->lvVarIndex);
+
+ // Remember that this is a new use in place
+
+ // We've added "new UseInPlace"; remove from the global set.
+ VarSetOps::RemoveElemD(this, rpUseInPlace, varDsc->lvVarIndex);
+
+ // Note that later when we walk down to the leaf node for op2
+ // if we decide to actually use the register for the 'varDsc'
+ // to enregister the operand, the we will set rpAsgVarNum to
+ // varDsc->lvVarIndex, by extracting this value using
+ // rpGetVarIndexForPredict()
+ //
+ // Also we reset rpAsgVarNum back to -1 after we have finished
+ // predicting the current GT_ASG node
+ //
+ goto ASG_COMMON;
+ }
+ }
+ }
+ __fallthrough;
+
+ case GT_CHS:
+
+ case GT_ASG_OR:
+ case GT_ASG_XOR:
+ case GT_ASG_AND:
+ case GT_ASG_SUB:
+ case GT_ASG_ADD:
+ case GT_ASG_MUL:
+ case GT_ASG_DIV:
+ case GT_ASG_UDIV:
+
+ /* We can't use "reg <op>= addr" for TYP_LONG or if op2 is a short type */
+ if ((type != TYP_LONG) && !varTypeIsSmall(op2->gtType))
+ {
+ /* Is the value being assigned into an enregistered LCL_VAR? */
+ /* For debug code we only allow a simple op2 to be assigned */
+ if ((op1->gtOper == GT_LCL_VAR) && (!opts.compDbgCode || rpCanAsgOperWithoutReg(op2, false)))
+ {
+ varDsc = lvaTable + op1->gtLclVarCommon.gtLclNum;
+ /* Did we predict that this local will be enregistered? */
+ if (varDsc->lvRegNum != REG_STK)
+ {
+ /* Yes, we can use "reg <op>= addr" */
+
+ op1PredictReg = PREDICT_NONE; /* really PREDICT_REG, but we've already done the check */
+ op2PredictReg = PREDICT_NONE;
+
+ goto ASG_COMMON;
+ }
+ }
+ }
+
+#if CPU_LOAD_STORE_ARCH
+ if (oper != GT_ASG)
+ {
+ op1PredictReg = PREDICT_REG;
+ op2PredictReg = PREDICT_REG;
+ }
+ else
+#endif
+ {
+ /*
+ * Otherwise, initialize the normal forcing of operands:
+ * "addr <op>= reg"
+ */
+ op1PredictReg = PREDICT_ADDR;
+ op2PredictReg = PREDICT_REG;
+ }
+
+ ASG_COMMON:
+
+#if !CPU_LOAD_STORE_ARCH
+ if (op2PredictReg != PREDICT_NONE)
+ {
+ /* Is the value being assigned a simple one? */
+ if (rpCanAsgOperWithoutReg(op2, false))
+ op2PredictReg = PREDICT_NONE;
+ }
+#endif
+
+ bool simpleAssignment;
+ simpleAssignment = false;
+
+ if ((oper == GT_ASG) && (op1->gtOper == GT_LCL_VAR))
+ {
+ // Add a variable interference from the assign target
+ // to each of the last use variables
+ if (!VarSetOps::IsEmpty(this, rpLastUseVars))
+ {
+ varDsc = lvaTable + op1->gtLclVarCommon.gtLclNum;
+
+ if (varDsc->lvTracked)
+ {
+ unsigned varIndex = varDsc->lvVarIndex;
+
+ rpRecordVarIntf(varIndex, rpLastUseVars DEBUGARG("Assign conflict"));
+ }
+ }
+
+ /* Record whether this tree is a simple assignment to a local */
+
+ simpleAssignment = ((type != TYP_LONG) || !opts.compDbgCode);
+ }
+
+ bool requireByteReg;
+ requireByteReg = false;
+
+#if CPU_HAS_BYTE_REGS
+ /* Byte-assignments need the byte registers, unless op1 is an enregistered local */
+
+ if (varTypeIsByte(type) &&
+ ((op1->gtOper != GT_LCL_VAR) || (lvaTable[op1->gtLclVarCommon.gtLclNum].lvRegNum == REG_STK)))
+
+ {
+ // Byte-assignments typically need a byte register
+ requireByteReg = true;
+
+ if (op1->gtOper == GT_LCL_VAR)
+ {
+ varDsc = lvaTable + op1->gtLclVar.gtLclNum;
+
+ // Did we predict that this local will be enregistered?
+ if (varDsc->lvTracked && (varDsc->lvRegNum != REG_STK) && (oper != GT_CHS))
+ {
+ // We don't require a byte register when op1 is an enregistered local */
+ requireByteReg = false;
+ }
+
+ // Is op1 part of an Assign-Op or is the RHS a simple memory indirection?
+ if ((oper != GT_ASG) || (op2->gtOper == GT_IND) || (op2->gtOper == GT_CLS_VAR))
+ {
+ // We should try to put op1 in an byte register
+ varDsc->addPrefReg(RBM_BYTE_REG_FLAG, this);
+ }
+ }
+ }
+#endif
+
+ VarSetOps::Assign(this, startAsgUseInPlaceVars, rpUseInPlace);
+
+ bool isWriteBarrierAsgNode;
+ isWriteBarrierAsgNode = codeGen->gcInfo.gcIsWriteBarrierAsgNode(tree);
+#ifdef DEBUG
+ GCInfo::WriteBarrierForm wbf;
+ if (isWriteBarrierAsgNode)
+ wbf = codeGen->gcInfo.gcIsWriteBarrierCandidate(tree->gtOp.gtOp1, tree->gtOp.gtOp2);
+ else
+ wbf = GCInfo::WBF_NoBarrier;
+#endif // DEBUG
+
+ regMaskTP wbaLockedRegs;
+ wbaLockedRegs = lockedRegs;
+ if (isWriteBarrierAsgNode)
+ {
+#if defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS
+#ifdef DEBUG
+ if (wbf != GCInfo::WBF_NoBarrier_CheckNotHeapInDebug)
+ {
+#endif // DEBUG
+ wbaLockedRegs |= RBM_WRITE_BARRIER;
+ op1->gtRsvdRegs |= RBM_WRITE_BARRIER; // This will steer op2 away from REG_WRITE_BARRIER
+ assert(REG_WRITE_BARRIER == REG_EDX);
+ op1PredictReg = PREDICT_REG_EDX;
+#ifdef DEBUG
+ }
+ else
+#endif // DEBUG
+#endif // defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS
+
+#if defined(DEBUG) || !(defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS)
+ {
+#ifdef _TARGET_X86_
+ op1PredictReg = PREDICT_REG_ECX;
+ op2PredictReg = PREDICT_REG_EDX;
+#elif defined(_TARGET_ARM_)
+ op1PredictReg = PREDICT_REG_R0;
+ op2PredictReg = PREDICT_REG_R1;
+
+ // This is my best guess as to what the previous code meant by checking "gtRngChkLen() == NULL".
+ if ((op1->OperGet() == GT_IND) && (op1->gtOp.gtOp1->OperGet() != GT_ARR_BOUNDS_CHECK))
+ {
+ op1 = op1->gtOp.gtOp1;
+ }
+#else // !_TARGET_X86_ && !_TARGET_ARM_
+#error "Non-ARM or x86 _TARGET_ in RegPredict for WriteBarrierAsg"
+#endif
+ }
+#endif
+ }
+
+ /* Are we supposed to evaluate RHS first? */
+
+ if (tree->gtFlags & GTF_REVERSE_OPS)
+ {
+ op2Mask = rpPredictTreeRegUse(op2, op2PredictReg, lockedRegs, rsvdRegs | op1->gtRsvdRegs);
+
+#if CPU_HAS_BYTE_REGS
+ // Should we insure that op2 gets evaluated into a byte register?
+ if (requireByteReg && ((op2Mask & RBM_BYTE_REGS) == 0))
+ {
+ // We need to grab a byte-able register, (i.e. EAX, EDX, ECX, EBX)
+ // and we can't select one that is already reserved (i.e. lockedRegs)
+ //
+ op2Mask |= rpPredictRegPick(type, PREDICT_SCRATCH_REG, (lockedRegs | RBM_NON_BYTE_REGS));
+ op2->gtUsedRegs |= op2Mask;
+
+ // No longer a simple assignment because we're using extra registers and might
+ // have interference between op1 and op2. See DevDiv #136681
+ simpleAssignment = false;
+ }
+#endif
+ /*
+ * For a simple assignment we don't want the op2Mask to be
+ * marked as interferring with the LCL_VAR, since it is likely
+ * that we will want to enregister the LCL_VAR in exactly
+ * the register that is used to compute op2
+ */
+ tmpMask = lockedRegs;
+
+ if (!simpleAssignment)
+ tmpMask |= op2Mask;
+
+ regMask = rpPredictTreeRegUse(op1, op1PredictReg, tmpMask, RBM_NONE);
+
+ // Did we relax the register prediction for op1 and op2 above ?
+ // - because we are depending upon op1 being enregistered
+ //
+ if ((op1PredictReg == PREDICT_NONE) &&
+ ((op2PredictReg == PREDICT_NONE) || rpHasVarIndexForPredict(op2PredictReg)))
+ {
+ /* We must be assigning into an enregistered LCL_VAR */
+ noway_assert(op1->gtOper == GT_LCL_VAR);
+ varDsc = lvaTable + op1->gtLclVar.gtLclNum;
+ noway_assert(varDsc->lvRegNum != REG_STK);
+
+ /* We need to set lvDependReg, in case we lose the enregistration of op1 */
+ varDsc->lvDependReg = true;
+ }
+ }
+ else
+ {
+ // For the case of simpleAssignments op2 should always be evaluated first
+ noway_assert(!simpleAssignment);
+
+ regMask = rpPredictTreeRegUse(op1, op1PredictReg, lockedRegs, rsvdRegs | op2->gtRsvdRegs);
+ if (isWriteBarrierAsgNode)
+ {
+ wbaLockedRegs |= op1->gtUsedRegs;
+ }
+ op2Mask = rpPredictTreeRegUse(op2, op2PredictReg, wbaLockedRegs | regMask, RBM_NONE);
+
+#if CPU_HAS_BYTE_REGS
+ // Should we insure that op2 gets evaluated into a byte register?
+ if (requireByteReg && ((op2Mask & RBM_BYTE_REGS) == 0))
+ {
+ // We need to grab a byte-able register, (i.e. EAX, EDX, ECX, EBX)
+ // and we can't select one that is already reserved (i.e. lockedRegs or regMask)
+ //
+ op2Mask |=
+ rpPredictRegPick(type, PREDICT_SCRATCH_REG, (lockedRegs | regMask | RBM_NON_BYTE_REGS));
+ op2->gtUsedRegs |= op2Mask;
+ }
+#endif
+ }
+
+ if (rpHasVarIndexForPredict(op2PredictReg))
+ {
+ rpAsgVarNum = -1;
+ }
+
+ if (isWriteBarrierAsgNode)
+ {
+#if NOGC_WRITE_BARRIERS
+#ifdef DEBUG
+ if (wbf != GCInfo::WBF_NoBarrier_CheckNotHeapInDebug)
+ {
+#endif // DEBUG
+
+ /* Steer computation away from REG_WRITE_BARRIER as the pointer is
+ passed to the write-barrier call in REG_WRITE_BARRIER */
+
+ regMask = op2Mask;
+
+ if (op1->gtOper == GT_IND)
+ {
+ GenTreePtr rv1, rv2;
+ unsigned mul, cns;
+ bool rev;
+
+ /* Special handling of indirect assigns for write barrier */
+
+ bool yes = codeGen->genCreateAddrMode(op1->gtOp.gtOp1, -1, true, RBM_NONE, &rev, &rv1, &rv2,
+ &mul, &cns);
+
+ /* Check address mode for enregisterable locals */
+
+ if (yes)
+ {
+ if (rv1 != NULL && rv1->gtOper == GT_LCL_VAR)
+ {
+ rpPredictRefAssign(rv1->gtLclVarCommon.gtLclNum);
+ }
+ if (rv2 != NULL && rv2->gtOper == GT_LCL_VAR)
+ {
+ rpPredictRefAssign(rv2->gtLclVarCommon.gtLclNum);
+ }
+ }
+ }
+
+ if (op2->gtOper == GT_LCL_VAR)
+ {
+ rpPredictRefAssign(op2->gtLclVarCommon.gtLclNum);
+ }
+
+ // Add a register interference for REG_WRITE_BARRIER to each of the last use variables
+ if (!VarSetOps::IsEmpty(this, rpLastUseVars))
+ {
+ rpRecordRegIntf(RBM_WRITE_BARRIER,
+ rpLastUseVars DEBUGARG("WriteBarrier and rpLastUseVars conflict"));
+ }
+ tree->gtUsedRegs |= RBM_WRITE_BARRIER;
+#ifdef DEBUG
+ }
+ else
+#endif // DEBUG
+#endif // NOGC_WRITE_BARRIERS
+
+#if defined(DEBUG) || !NOGC_WRITE_BARRIERS
+ {
+#ifdef _TARGET_ARM_
+#ifdef DEBUG
+ if (verbose)
+ printf("Adding interference with RBM_CALLEE_TRASH_NOGC for NoGC WriteBarrierAsg\n");
+#endif
+ //
+ // For the ARM target we have an optimized JIT Helper
+ // that only trashes a subset of the callee saved registers
+ //
+
+ // NOTE: Adding it to the gtUsedRegs will cause the interference to
+ // be added appropriately
+
+ // the RBM_CALLEE_TRASH_NOGC set is killed. We will record this in interferingRegs
+ // instead of gtUsedRegs, because the latter will be modified later, but we need
+ // to remember to add the interference.
+
+ interferingRegs |= RBM_CALLEE_TRASH_NOGC;
+
+ op1->gtUsedRegs |= RBM_R0;
+ op2->gtUsedRegs |= RBM_R1;
+#else // _TARGET_ARM_
+
+#ifdef DEBUG
+ if (verbose)
+ printf("Adding interference with RBM_CALLEE_TRASH for NoGC WriteBarrierAsg\n");
+#endif
+ // We have to call a normal JIT helper to perform the Write Barrier Assignment
+ // It will trash the callee saved registers
+
+ tree->gtUsedRegs |= RBM_CALLEE_TRASH;
+#endif // _TARGET_ARM_
+ }
+#endif // defined(DEBUG) || !NOGC_WRITE_BARRIERS
+ }
+
+ if (simpleAssignment)
+ {
+ /*
+ * Consider a simple assignment to a local:
+ *
+ * lcl = expr;
+ *
+ * Since the "=" node is visited after the variable
+ * is marked live (assuming it's live after the
+ * assignment), we don't want to use the register
+ * use mask of the "=" node but rather that of the
+ * variable itself.
+ */
+ tree->gtUsedRegs = op1->gtUsedRegs;
+ }
+ else
+ {
+ tree->gtUsedRegs = op1->gtUsedRegs | op2->gtUsedRegs;
+ }
+ VarSetOps::Assign(this, rpUseInPlace, startAsgUseInPlaceVars);
+ goto RETURN_CHECK;
+
+ case GT_ASG_LSH:
+ case GT_ASG_RSH:
+ case GT_ASG_RSZ:
+ /* assigning shift operators */
+
+ noway_assert(type != TYP_LONG);
+
+#if CPU_LOAD_STORE_ARCH
+ predictReg = PREDICT_ADDR;
+#else
+ predictReg = PREDICT_NONE;
+#endif
+
+ /* shift count is handled same as ordinary shift */
+ goto HANDLE_SHIFT_COUNT;
+
+ case GT_ADDR:
+ regMask = rpPredictTreeRegUse(op1, PREDICT_ADDR, lockedRegs, RBM_LASTUSE);
+
+ if ((regMask == RBM_NONE) && (predictReg >= PREDICT_REG))
+ {
+ // We need a scratch register for the LEA instruction
+ regMask = rpPredictRegPick(TYP_INT, predictReg, lockedRegs | rsvdRegs);
+ }
+
+ tree->gtUsedRegs = op1->gtUsedRegs | (regMaskSmall)regMask;
+ goto RETURN_CHECK;
+
+ case GT_CAST:
+
+ /* Cannot cast to VOID */
+ noway_assert(type != TYP_VOID);
+
+ /* cast to long is special */
+ if (type == TYP_LONG && op1->gtType <= TYP_INT)
+ {
+ noway_assert(tree->gtCast.gtCastType == TYP_LONG || tree->gtCast.gtCastType == TYP_ULONG);
+#if CPU_LONG_USES_REGPAIR
+ rpPredictReg predictRegHi = PREDICT_SCRATCH_REG;
+
+ if (rpHasVarIndexForPredict(predictReg))
+ {
+ unsigned tgtIndex = rpGetVarIndexForPredict(predictReg);
+ rpAsgVarNum = tgtIndex;
+
+ // We don't need any register as we plan on writing to the rpAsgVarNum register
+ predictReg = PREDICT_NONE;
+
+ LclVarDsc* tgtVar = lvaTable + lvaTrackedToVarNum[tgtIndex];
+ tgtVar->lvDependReg = true;
+
+ if (tgtVar->lvOtherReg != REG_STK)
+ {
+ predictRegHi = PREDICT_NONE;
+ }
+ }
+ else
+#endif
+ if (predictReg == PREDICT_NONE)
+ {
+ predictReg = PREDICT_SCRATCH_REG;
+ }
+#ifdef _TARGET_ARM_
+ // If we are widening an int into a long using a targeted register pair we
+ // should retarget so that the low part get loaded into the appropriate register
+ else if (predictReg == PREDICT_PAIR_R0R1)
+ {
+ predictReg = PREDICT_REG_R0;
+ predictRegHi = PREDICT_REG_R1;
+ }
+ else if (predictReg == PREDICT_PAIR_R2R3)
+ {
+ predictReg = PREDICT_REG_R2;
+ predictRegHi = PREDICT_REG_R3;
+ }
+#endif
+#ifdef _TARGET_X86_
+ // If we are widening an int into a long using a targeted register pair we
+ // should retarget so that the low part get loaded into the appropriate register
+ else if (predictReg == PREDICT_PAIR_EAXEDX)
+ {
+ predictReg = PREDICT_REG_EAX;
+ predictRegHi = PREDICT_REG_EDX;
+ }
+ else if (predictReg == PREDICT_PAIR_ECXEBX)
+ {
+ predictReg = PREDICT_REG_ECX;
+ predictRegHi = PREDICT_REG_EBX;
+ }
+#endif
+
+ regMask = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs);
+
+#if CPU_LONG_USES_REGPAIR
+ if (predictRegHi != PREDICT_NONE)
+ {
+ // Now get one more reg for the upper part
+ regMask |= rpPredictRegPick(TYP_INT, predictRegHi, lockedRegs | rsvdRegs | regMask);
+ }
+#endif
+ tree->gtUsedRegs = op1->gtUsedRegs | (regMaskSmall)regMask;
+ goto RETURN_CHECK;
+ }
+
+ /* cast from long is special - it frees a register */
+ if (type <= TYP_INT // nice. this presumably is intended to mean "signed int and shorter types"
+ && op1->gtType == TYP_LONG)
+ {
+ if ((predictReg == PREDICT_NONE) || rpHasVarIndexForPredict(predictReg))
+ predictReg = PREDICT_REG;
+
+ regMask = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs);
+
+ // If we have 2 or more regs, free one of them
+ if (!genMaxOneBit(regMask))
+ {
+ /* Clear the 2nd lowest bit in regMask */
+ /* First set tmpMask to the lowest bit in regMask */
+ tmpMask = genFindLowestBit(regMask);
+ /* Next find the second lowest bit in regMask */
+ tmpMask = genFindLowestBit(regMask & ~tmpMask);
+ /* Clear this bit from regmask */
+ regMask &= ~tmpMask;
+ }
+ tree->gtUsedRegs = op1->gtUsedRegs;
+ goto RETURN_CHECK;
+ }
+
+#if CPU_HAS_BYTE_REGS
+ /* cast from signed-byte is special - it uses byteable registers */
+ if (type == TYP_INT)
+ {
+ var_types smallType;
+
+ if (genTypeSize(tree->gtCast.CastOp()->TypeGet()) < genTypeSize(tree->gtCast.gtCastType))
+ smallType = tree->gtCast.CastOp()->TypeGet();
+ else
+ smallType = tree->gtCast.gtCastType;
+
+ if (smallType == TYP_BYTE)
+ {
+ regMask = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs);
+
+ if ((regMask & RBM_BYTE_REGS) == 0)
+ regMask = rpPredictRegPick(type, PREDICT_SCRATCH_REG, RBM_NON_BYTE_REGS);
+
+ tree->gtUsedRegs = (regMaskSmall)regMask;
+ goto RETURN_CHECK;
+ }
+ }
+#endif
+
+#if FEATURE_STACK_FP_X87
+ /* cast to float/double is special */
+ if (varTypeIsFloating(type))
+ {
+ switch (op1->TypeGet())
+ {
+ /* uses fild, so don't need to be loaded to reg */
+ case TYP_INT:
+ case TYP_LONG:
+ rpPredictTreeRegUse(op1, PREDICT_NONE, lockedRegs, rsvdRegs);
+ tree->gtUsedRegs = op1->gtUsedRegs;
+ regMask = 0;
+ goto RETURN_CHECK;
+ default:
+ break;
+ }
+ }
+
+ /* Casting from integral type to floating type is special */
+ if (!varTypeIsFloating(type) && varTypeIsFloating(op1->TypeGet()))
+ {
+ if (opts.compCanUseSSE2)
+ {
+ // predict for SSE2 based casting
+ if (predictReg <= PREDICT_REG)
+ predictReg = PREDICT_SCRATCH_REG;
+ regMask = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs);
+
+ // Get one more int reg to hold cast result
+ regMask |= rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | rsvdRegs | regMask);
+ tree->gtUsedRegs = op1->gtUsedRegs | (regMaskSmall)regMask;
+ goto RETURN_CHECK;
+ }
+ }
+#endif
+
+#if FEATURE_FP_REGALLOC
+ // Are we casting between int to float or float to int
+ // Fix 388428 ARM JitStress WP7
+ if (varTypeIsFloating(type) != varTypeIsFloating(op1->TypeGet()))
+ {
+ // op1 needs to go into a register
+ regMask = rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs, rsvdRegs);
+
+#ifdef _TARGET_ARM_
+ if (varTypeIsFloating(op1->TypeGet()))
+ {
+ // We also need a fp scratch register for the convert operation
+ regMask |= rpPredictRegPick((genTypeStSz(type) == 1) ? TYP_FLOAT : TYP_DOUBLE,
+ PREDICT_SCRATCH_REG, regMask | lockedRegs | rsvdRegs);
+ }
+#endif
+ // We also need a register to hold the result
+ regMask |= rpPredictRegPick(type, PREDICT_SCRATCH_REG, regMask | lockedRegs | rsvdRegs);
+ tree->gtUsedRegs = op1->gtUsedRegs | (regMaskSmall)regMask;
+ goto RETURN_CHECK;
+ }
+#endif
+
+ /* otherwise must load op1 into a register */
+ goto GENERIC_UNARY;
+
+ case GT_INTRINSIC:
+
+#ifdef _TARGET_XARCH_
+ if (tree->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Round && tree->TypeGet() == TYP_INT)
+ {
+ // This is a special case to handle the following
+ // optimization: conv.i4(round.d(d)) -> round.i(d)
+ // if flowgraph 3186
+
+ if (predictReg <= PREDICT_REG)
+ predictReg = PREDICT_SCRATCH_REG;
+
+ rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs);
+
+ regMask = rpPredictRegPick(TYP_INT, predictReg, lockedRegs | rsvdRegs);
+
+ tree->gtUsedRegs = op1->gtUsedRegs | (regMaskSmall)regMask;
+ goto RETURN_CHECK;
+ }
+#endif
+ __fallthrough;
+
+ case GT_NEG:
+#ifdef _TARGET_ARM_
+ if (tree->TypeGet() == TYP_LONG)
+ {
+ // On ARM this consumes an extra register for the '0' value
+ if (predictReg <= PREDICT_REG)
+ predictReg = PREDICT_SCRATCH_REG;
+
+ regMaskTP op1Mask = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs);
+
+ regMask = rpPredictRegPick(TYP_INT, predictReg, lockedRegs | op1Mask | rsvdRegs);
+
+ tree->gtUsedRegs = op1->gtUsedRegs | (regMaskSmall)regMask;
+ goto RETURN_CHECK;
+ }
+#endif // _TARGET_ARM_
+
+ __fallthrough;
+
+ case GT_NOT:
+ // these unary operators will write new values
+ // and thus will need a scratch register
+ GENERIC_UNARY:
+ /* generic unary operators */
+
+ if (predictReg <= PREDICT_REG)
+ predictReg = PREDICT_SCRATCH_REG;
+
+ __fallthrough;
+
+ case GT_NOP:
+ // these unary operators do not write new values
+ // and thus won't need a scratch register
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if OPT_BOOL_OPS
+ if (!op1)
+ {
+ tree->gtUsedRegs = 0;
+ regMask = 0;
+ goto RETURN_CHECK;
+ }
+#endif
+ regMask = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs);
+ tree->gtUsedRegs = op1->gtUsedRegs;
+ goto RETURN_CHECK;
+
+ case GT_IND:
+ case GT_NULLCHECK: // At this point, nullcheck is just like an IND...
+ {
+ bool intoReg = true;
+ VARSET_TP VARSET_INIT(this, startIndUseInPlaceVars, rpUseInPlace);
+
+ if (fgIsIndirOfAddrOfLocal(tree) != NULL)
+ {
+ compUpdateLifeVar</*ForCodeGen*/ false>(tree);
+ }
+
+ if (predictReg == PREDICT_ADDR)
+ {
+ intoReg = false;
+ }
+ else if (predictReg == PREDICT_NONE)
+ {
+ if (type != TYP_LONG)
+ {
+ intoReg = false;
+ }
+ else
+ {
+ predictReg = PREDICT_REG;
+ }
+ }
+
+ /* forcing to register? */
+ if (intoReg && (type != TYP_LONG))
+ {
+ rsvdRegs |= RBM_LASTUSE;
+ }
+
+ GenTreePtr lenCSE;
+ lenCSE = NULL;
+
+ /* check for address mode */
+ regMask = rpPredictAddressMode(op1, type, lockedRegs, rsvdRegs, lenCSE);
+ tmpMask = RBM_NONE;
+
+#if CPU_LOAD_STORE_ARCH
+ // We may need a scratch register for loading a long
+ if (type == TYP_LONG)
+ {
+ /* This scratch register immediately dies */
+ tmpMask = rpPredictRegPick(TYP_BYREF, PREDICT_REG, op1->gtUsedRegs | lockedRegs | rsvdRegs);
+ }
+#endif // CPU_LOAD_STORE_ARCH
+
+#ifdef _TARGET_ARM_
+ // Unaligned loads/stores for floating point values must first be loaded into integer register(s)
+ //
+ if ((tree->gtFlags & GTF_IND_UNALIGNED) && varTypeIsFloating(type))
+ {
+ /* These integer register(s) immediately die */
+ tmpMask = rpPredictRegPick(TYP_INT, PREDICT_REG, op1->gtUsedRegs | lockedRegs | rsvdRegs);
+ // Two integer registers are required for a TYP_DOUBLE
+ if (type == TYP_DOUBLE)
+ tmpMask |=
+ rpPredictRegPick(TYP_INT, PREDICT_REG, op1->gtUsedRegs | lockedRegs | rsvdRegs | tmpMask);
+ }
+#endif
+
+ /* forcing to register? */
+ if (intoReg)
+ {
+ regMaskTP lockedMask = lockedRegs | rsvdRegs;
+ tmpMask |= regMask;
+
+ // We will compute a new regMask that holds the register(s)
+ // that we will load the indirection into.
+ //
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifndef _TARGET_64BIT_
+ if (type == TYP_LONG)
+ {
+ // We need to use multiple load instructions here:
+ // For the first register we can not choose
+ // any registers that are being used in place or
+ // any register in the current regMask
+ //
+ regMask = rpPredictRegPick(TYP_INT, predictReg, regMask | lockedMask);
+
+ // For the second register we can choose a register that was
+ // used in place or any register in the old now overwritten regMask
+ // but not the same register that we picked above in 'regMask'
+ //
+ VarSetOps::Assign(this, rpUseInPlace, startIndUseInPlaceVars);
+ regMask |= rpPredictRegPick(TYP_INT, predictReg, regMask | lockedMask);
+ }
+ else
+#endif
+ {
+ // We will use one load instruction here:
+ // The load target register can be a register that was used in place
+ // or one of the register from the orginal regMask.
+ //
+ VarSetOps::Assign(this, rpUseInPlace, startIndUseInPlaceVars);
+ regMask = rpPredictRegPick(type, predictReg, lockedMask);
+ }
+ }
+ else if (predictReg != PREDICT_ADDR)
+ {
+ /* Unless the caller specified PREDICT_ADDR */
+ /* we don't return the temp registers used */
+ /* to form the address */
+ regMask = RBM_NONE;
+ }
+ }
+
+ tree->gtUsedRegs = (regMaskSmall)(regMask | tmpMask);
+
+ goto RETURN_CHECK;
+
+ case GT_EQ:
+ case GT_NE:
+ case GT_LT:
+ case GT_LE:
+ case GT_GE:
+ case GT_GT:
+
+#ifdef _TARGET_X86_
+ /* Floating point comparison uses EAX for flags */
+ if (varTypeIsFloating(op1->TypeGet()))
+ {
+ regMask = RBM_EAX;
+ }
+ else
+#endif
+ if (!(tree->gtFlags & GTF_RELOP_JMP_USED))
+ {
+ // Some comparisons are converted to ?:
+ noway_assert(!fgMorphRelopToQmark(op1));
+
+ if (predictReg <= PREDICT_REG)
+ predictReg = PREDICT_SCRATCH_REG;
+
+ // The set instructions need a byte register
+ regMask = rpPredictRegPick(TYP_BYTE, predictReg, lockedRegs | rsvdRegs);
+ }
+ else
+ {
+ regMask = RBM_NONE;
+#ifdef _TARGET_XARCH_
+ tmpMask = RBM_NONE;
+ // Optimize the compare with a constant cases for xarch
+ if (op1->gtOper == GT_CNS_INT)
+ {
+ if (op2->gtOper == GT_CNS_INT)
+ tmpMask =
+ rpPredictTreeRegUse(op1, PREDICT_SCRATCH_REG, lockedRegs, rsvdRegs | op2->gtRsvdRegs);
+ rpPredictTreeRegUse(op2, PREDICT_NONE, lockedRegs | tmpMask, RBM_LASTUSE);
+ tree->gtUsedRegs = op2->gtUsedRegs;
+ goto RETURN_CHECK;
+ }
+ else if (op2->gtOper == GT_CNS_INT)
+ {
+ rpPredictTreeRegUse(op1, PREDICT_NONE, lockedRegs, rsvdRegs);
+ tree->gtUsedRegs = op1->gtUsedRegs;
+ goto RETURN_CHECK;
+ }
+ else if (op2->gtOper == GT_CNS_LNG)
+ {
+ regMaskTP op1Mask = rpPredictTreeRegUse(op1, PREDICT_ADDR, lockedRegs, rsvdRegs);
+#ifdef _TARGET_X86_
+ // We also need one extra register to read values from
+ tmpMask = rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | op1Mask | rsvdRegs);
+#endif // _TARGET_X86_
+ tree->gtUsedRegs = (regMaskSmall)tmpMask | op1->gtUsedRegs;
+ goto RETURN_CHECK;
+ }
+#endif // _TARGET_XARCH_
+ }
+
+ unsigned op1TypeSize;
+ unsigned op2TypeSize;
+
+ op1TypeSize = genTypeSize(op1->TypeGet());
+ op2TypeSize = genTypeSize(op2->TypeGet());
+
+ op1PredictReg = PREDICT_REG;
+ op2PredictReg = PREDICT_REG;
+
+ if (tree->gtFlags & GTF_REVERSE_OPS)
+ {
+#ifdef _TARGET_XARCH_
+ if (op1TypeSize == sizeof(int))
+ op1PredictReg = PREDICT_NONE;
+#endif
+
+ tmpMask = rpPredictTreeRegUse(op2, op2PredictReg, lockedRegs, rsvdRegs | op1->gtRsvdRegs);
+ rpPredictTreeRegUse(op1, op1PredictReg, lockedRegs | tmpMask, RBM_LASTUSE);
+ }
+ else
+ {
+#ifdef _TARGET_XARCH_
+ // For full DWORD compares we can have
+ //
+ // op1 is an address mode and op2 is a register
+ // or
+ // op1 is a register and op2 is an address mode
+ //
+ if ((op2TypeSize == sizeof(int)) && (op1TypeSize == op2TypeSize))
+ {
+ if (op2->gtOper == GT_LCL_VAR)
+ {
+ unsigned lclNum = op2->gtLclVar.gtLclNum;
+ varDsc = lvaTable + lclNum;
+ /* Did we predict that this local will be enregistered? */
+ if (varDsc->lvTracked && (varDsc->lvRegNum != REG_STK))
+ {
+ op1PredictReg = PREDICT_ADDR;
+ }
+ }
+ }
+ // Codegen will generate cmp reg,[mem] for 4 or 8-byte types, but not for 1 or 2 byte types
+ if ((op1PredictReg != PREDICT_ADDR) && (op2TypeSize >= sizeof(int)))
+ op2PredictReg = PREDICT_ADDR;
+#endif // _TARGET_XARCH_
+
+ tmpMask = rpPredictTreeRegUse(op1, op1PredictReg, lockedRegs, rsvdRegs | op2->gtRsvdRegs);
+#ifdef _TARGET_ARM_
+ if ((op2->gtOper != GT_CNS_INT) || !codeGen->validImmForAlu(op2->gtIntCon.gtIconVal))
+#endif
+ {
+ rpPredictTreeRegUse(op2, op2PredictReg, lockedRegs | tmpMask, RBM_LASTUSE);
+ }
+ }
+
+#ifdef _TARGET_XARCH_
+ // In some cases in genCondSetFlags(), we need to use a temporary register (via rsPickReg())
+ // to generate a sign/zero extension before doing a compare. Save a register for this purpose
+ // if one of the registers is small and the types aren't equal.
+
+ if (regMask == RBM_NONE)
+ {
+ rpPredictReg op1xPredictReg, op2xPredictReg;
+ GenTreePtr op1x, op2x;
+ if (tree->gtFlags & GTF_REVERSE_OPS) // TODO: do we really need to handle this case?
+ {
+ op1xPredictReg = op2PredictReg;
+ op2xPredictReg = op1PredictReg;
+ op1x = op2;
+ op2x = op1;
+ }
+ else
+ {
+ op1xPredictReg = op1PredictReg;
+ op2xPredictReg = op2PredictReg;
+ op1x = op1;
+ op2x = op2;
+ }
+ if ((op1xPredictReg < PREDICT_REG) && // op1 doesn't get a register (probably an indir)
+ (op2xPredictReg >= PREDICT_REG) && // op2 gets a register
+ varTypeIsSmall(op1x->TypeGet())) // op1 is smaller than an int
+ {
+ bool needTmp = false;
+
+ // If op1x is a byte, and op2x is not a byteable register, we'll need a temp.
+ // We could predict a byteable register for op2x, but what if we don't get it?
+ // So, be conservative and always ask for a temp. There are a couple small CQ losses as a
+ // result.
+ if (varTypeIsByte(op1x->TypeGet()))
+ {
+ needTmp = true;
+ }
+ else
+ {
+ if (op2x->gtOper == GT_LCL_VAR) // this will be a GT_REG_VAR during code generation
+ {
+ if (genActualType(op1x->TypeGet()) != lvaGetActualType(op2x->gtLclVar.gtLclNum))
+ needTmp = true;
+ }
+ else
+ {
+ if (op1x->TypeGet() != op2x->TypeGet())
+ needTmp = true;
+ }
+ }
+ if (needTmp)
+ {
+ regMask = rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | rsvdRegs);
+ }
+ }
+ }
+#endif // _TARGET_XARCH_
+
+ tree->gtUsedRegs = (regMaskSmall)regMask | op1->gtUsedRegs | op2->gtUsedRegs;
+ goto RETURN_CHECK;
+
+ case GT_MUL:
+
+#ifndef _TARGET_AMD64_
+ if (type == TYP_LONG)
+ {
+ assert(tree->gtIsValid64RsltMul());
+
+ /* Strip out the cast nodes */
+
+ noway_assert(op1->gtOper == GT_CAST && op2->gtOper == GT_CAST);
+ op1 = op1->gtCast.CastOp();
+ op2 = op2->gtCast.CastOp();
+#else
+ if (false)
+ {
+#endif // !_TARGET_AMD64_
+ USE_MULT_EAX:
+
+#if defined(_TARGET_X86_)
+ // This will done by a 64-bit imul "imul eax, reg"
+ // (i.e. EDX:EAX = EAX * reg)
+
+ /* Are we supposed to evaluate op2 first? */
+ if (tree->gtFlags & GTF_REVERSE_OPS)
+ {
+ rpPredictTreeRegUse(op2, PREDICT_PAIR_TMP_LO, lockedRegs, rsvdRegs | op1->gtRsvdRegs);
+ rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs | RBM_PAIR_TMP_LO, RBM_LASTUSE);
+ }
+ else
+ {
+ rpPredictTreeRegUse(op1, PREDICT_PAIR_TMP_LO, lockedRegs, rsvdRegs | op2->gtRsvdRegs);
+ rpPredictTreeRegUse(op2, PREDICT_REG, lockedRegs | RBM_PAIR_TMP_LO, RBM_LASTUSE);
+ }
+
+ /* set gtUsedRegs to EAX, EDX and the registers needed by op1 and op2 */
+
+ tree->gtUsedRegs = RBM_PAIR_TMP | op1->gtUsedRegs | op2->gtUsedRegs;
+
+ /* set regMask to the set of held registers */
+
+ regMask = RBM_PAIR_TMP_LO;
+
+ if (type == TYP_LONG)
+ regMask |= RBM_PAIR_TMP_HI;
+
+#elif defined(_TARGET_ARM_)
+ // This will done by a 4 operand multiply
+
+ // Are we supposed to evaluate op2 first?
+ if (tree->gtFlags & GTF_REVERSE_OPS)
+ {
+ rpPredictTreeRegUse(op2, PREDICT_REG, lockedRegs, rsvdRegs | op1->gtRsvdRegs);
+ rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs, RBM_LASTUSE);
+ }
+ else
+ {
+ rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs, rsvdRegs | op2->gtRsvdRegs);
+ rpPredictTreeRegUse(op2, PREDICT_REG, lockedRegs, RBM_LASTUSE);
+ }
+
+ // set regMask to the set of held registers,
+ // the two scratch register we need to compute the mul result
+
+ regMask = rpPredictRegPick(TYP_LONG, PREDICT_SCRATCH_REG, lockedRegs | rsvdRegs);
+
+ // set gtUsedRegs toregMask and the registers needed by op1 and op2
+
+ tree->gtUsedRegs = regMask | op1->gtUsedRegs | op2->gtUsedRegs;
+
+#else // !_TARGET_X86_ && !_TARGET_ARM_
+#error "Non-ARM or x86 _TARGET_ in RegPredict for 64-bit imul"
+#endif
+
+ goto RETURN_CHECK;
+ }
+ else
+ {
+ /* We use imulEAX for most unsigned multiply operations */
+ if (tree->gtOverflow())
+ {
+ if ((tree->gtFlags & GTF_UNSIGNED) || varTypeIsSmall(tree->TypeGet()))
+ {
+ goto USE_MULT_EAX;
+ }
+ }
+ }
+
+ __fallthrough;
+
+ case GT_OR:
+ case GT_XOR:
+ case GT_AND:
+
+ case GT_SUB:
+ case GT_ADD:
+ tree->gtUsedRegs = 0;
+
+ if (predictReg <= PREDICT_REG)
+ predictReg = PREDICT_SCRATCH_REG;
+
+ GENERIC_BINARY:
+
+ noway_assert(op2);
+ if (tree->gtFlags & GTF_REVERSE_OPS)
+ {
+ op1PredictReg = PREDICT_REG;
+#if !CPU_LOAD_STORE_ARCH
+ if (genTypeSize(op1->gtType) >= sizeof(int))
+ op1PredictReg = PREDICT_NONE;
+#endif
+ regMask = rpPredictTreeRegUse(op2, predictReg, lockedRegs, rsvdRegs | op1->gtRsvdRegs);
+ rpPredictTreeRegUse(op1, op1PredictReg, lockedRegs | regMask, RBM_LASTUSE);
+ }
+ else
+ {
+ op2PredictReg = PREDICT_REG;
+#if !CPU_LOAD_STORE_ARCH
+ if (genTypeSize(op2->gtType) >= sizeof(int))
+ op2PredictReg = PREDICT_NONE;
+#endif
+ regMask = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs | op2->gtRsvdRegs);
+#ifdef _TARGET_ARM_
+ // For most ALU operations we can generate a single instruction that encodes
+ // a small immediate integer constant value. (except for multiply)
+ //
+ if ((op2->gtOper == GT_CNS_INT) && (oper != GT_MUL))
+ {
+ ssize_t ival = op2->gtIntCon.gtIconVal;
+ if (codeGen->validImmForAlu(ival))
+ {
+ op2PredictReg = PREDICT_NONE;
+ }
+ else if (codeGen->validImmForAdd(ival, INS_FLAGS_DONT_CARE) &&
+ ((oper == GT_ADD) || (oper == GT_SUB)))
+ {
+ op2PredictReg = PREDICT_NONE;
+ }
+ }
+ if (op2PredictReg == PREDICT_NONE)
+ {
+ op2->gtUsedRegs = RBM_NONE;
+ }
+ else
+#endif
+ {
+ rpPredictTreeRegUse(op2, op2PredictReg, lockedRegs | regMask, RBM_LASTUSE);
+ }
+ }
+ tree->gtUsedRegs = (regMaskSmall)regMask | op1->gtUsedRegs | op2->gtUsedRegs;
+
+#if CPU_HAS_BYTE_REGS
+ /* We have special register requirements for byte operations */
+
+ if (varTypeIsByte(tree->TypeGet()))
+ {
+ /* For 8 bit arithmetic, one operands has to be in a
+ byte-addressable register, and the other has to be
+ in a byte-addrble reg or in memory. Assume its in a reg */
+
+ regMaskTP regByteMask = 0;
+ regMaskTP op1ByteMask = op1->gtUsedRegs;
+
+ if (!(op1->gtUsedRegs & RBM_BYTE_REGS))
+ {
+ // Pick a Byte register to use for op1
+ regByteMask = rpPredictRegPick(TYP_BYTE, PREDICT_REG, lockedRegs | rsvdRegs);
+ op1ByteMask = regByteMask;
+ }
+
+ if (!(op2->gtUsedRegs & RBM_BYTE_REGS))
+ {
+ // Pick a Byte register to use for op2, avoiding the one used by op1
+ regByteMask |= rpPredictRegPick(TYP_BYTE, PREDICT_REG, lockedRegs | rsvdRegs | op1ByteMask);
+ }
+
+ if (regByteMask)
+ {
+ tree->gtUsedRegs |= regByteMask;
+ regMask = regByteMask;
+ }
+ }
+#endif
+ goto RETURN_CHECK;
+
+ case GT_DIV:
+ case GT_MOD:
+
+ case GT_UDIV:
+ case GT_UMOD:
+
+ /* non-integer division handled in generic way */
+ if (!varTypeIsIntegral(type))
+ {
+ tree->gtUsedRegs = 0;
+ if (predictReg <= PREDICT_REG)
+ predictReg = PREDICT_SCRATCH_REG;
+ goto GENERIC_BINARY;
+ }
+
+#ifndef _TARGET_64BIT_
+
+ if (type == TYP_LONG && (oper == GT_MOD || oper == GT_UMOD))
+ {
+ /* Special case: a mod with an int op2 is done inline using idiv or div
+ to avoid a costly call to the helper */
+
+ noway_assert((op2->gtOper == GT_CNS_LNG) &&
+ (op2->gtLngCon.gtLconVal == int(op2->gtLngCon.gtLconVal)));
+
+#if defined(_TARGET_X86_) || defined(_TARGET_ARM_)
+ if (tree->gtFlags & GTF_REVERSE_OPS)
+ {
+ tmpMask = rpPredictTreeRegUse(op2, PREDICT_REG, lockedRegs | RBM_PAIR_TMP,
+ rsvdRegs | op1->gtRsvdRegs);
+ tmpMask |= rpPredictTreeRegUse(op1, PREDICT_PAIR_TMP, lockedRegs | tmpMask, RBM_LASTUSE);
+ }
+ else
+ {
+ tmpMask = rpPredictTreeRegUse(op1, PREDICT_PAIR_TMP, lockedRegs, rsvdRegs | op2->gtRsvdRegs);
+ tmpMask |=
+ rpPredictTreeRegUse(op2, PREDICT_REG, lockedRegs | tmpMask | RBM_PAIR_TMP, RBM_LASTUSE);
+ }
+ regMask = RBM_PAIR_TMP;
+#else // !_TARGET_X86_ && !_TARGET_ARM_
+#error "Non-ARM or x86 _TARGET_ in RegPredict for 64-bit MOD"
+#endif // !_TARGET_X86_ && !_TARGET_ARM_
+
+ tree->gtUsedRegs =
+ (regMaskSmall)(regMask | op1->gtUsedRegs | op2->gtUsedRegs |
+ rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, regMask | tmpMask));
+
+ goto RETURN_CHECK;
+ }
+#endif // _TARGET_64BIT_
+
+ /* no divide immediate, so force integer constant which is not
+ * a power of two to register
+ */
+
+ if (op2->OperKind() & GTK_CONST)
+ {
+ ssize_t ival = op2->gtIntConCommon.IconValue();
+
+ /* Is the divisor a power of 2 ? */
+
+ if (ival > 0 && genMaxOneBit(size_t(ival)))
+ {
+ goto GENERIC_UNARY;
+ }
+ else
+ op2PredictReg = PREDICT_SCRATCH_REG;
+ }
+ else
+ {
+ /* Non integer constant also must be enregistered */
+ op2PredictReg = PREDICT_REG;
+ }
+
+ regMaskTP trashedMask;
+ trashedMask = DUMMY_INIT(RBM_ILLEGAL);
+ regMaskTP op1ExcludeMask;
+ op1ExcludeMask = DUMMY_INIT(RBM_ILLEGAL);
+ regMaskTP op2ExcludeMask;
+ op2ExcludeMask = DUMMY_INIT(RBM_ILLEGAL);
+
+#ifdef _TARGET_XARCH_
+ /* Consider the case "a / b" - we'll need to trash EDX (via "CDQ") before
+ * we can safely allow the "b" value to die. Unfortunately, if we simply
+ * mark the node "b" as using EDX, this will not work if "b" is a register
+ * variable that dies with this particular reference. Thus, if we want to
+ * avoid this situation (where we would have to spill the variable from
+ * EDX to someplace else), we need to explicitly mark the interference
+ * of the variable at this point.
+ */
+
+ if (op2->gtOper == GT_LCL_VAR)
+ {
+ unsigned lclNum = op2->gtLclVarCommon.gtLclNum;
+ varDsc = lvaTable + lclNum;
+ if (varDsc->lvTracked)
+ {
+#ifdef DEBUG
+ if (verbose)
+ {
+ if (!VarSetOps::IsMember(this, raLclRegIntf[REG_EAX], varDsc->lvVarIndex))
+ printf("Record interference between V%02u,T%02u and EAX -- int divide\n", lclNum,
+ varDsc->lvVarIndex);
+ if (!VarSetOps::IsMember(this, raLclRegIntf[REG_EDX], varDsc->lvVarIndex))
+ printf("Record interference between V%02u,T%02u and EDX -- int divide\n", lclNum,
+ varDsc->lvVarIndex);
+ }
+#endif
+ VarSetOps::AddElemD(this, raLclRegIntf[REG_EAX], varDsc->lvVarIndex);
+ VarSetOps::AddElemD(this, raLclRegIntf[REG_EDX], varDsc->lvVarIndex);
+ }
+ }
+
+ /* set the held register based on opcode */
+ if (oper == GT_DIV || oper == GT_UDIV)
+ regMask = RBM_EAX;
+ else
+ regMask = RBM_EDX;
+ trashedMask = (RBM_EAX | RBM_EDX);
+ op1ExcludeMask = 0;
+ op2ExcludeMask = (RBM_EAX | RBM_EDX);
+
+#endif // _TARGET_XARCH_
+
+#ifdef _TARGET_ARM_
+ trashedMask = RBM_NONE;
+ op1ExcludeMask = RBM_NONE;
+ op2ExcludeMask = RBM_NONE;
+#endif
+
+ /* set the lvPref reg if possible */
+ GenTreePtr dest;
+ /*
+ * Walking the gtNext link twice from here should get us back
+ * to our parent node, if this is an simple assignment tree.
+ */
+ dest = tree->gtNext;
+ if (dest && (dest->gtOper == GT_LCL_VAR) && dest->gtNext && (dest->gtNext->OperKind() & GTK_ASGOP) &&
+ dest->gtNext->gtOp.gtOp2 == tree)
+ {
+ varDsc = lvaTable + dest->gtLclVarCommon.gtLclNum;
+ varDsc->addPrefReg(regMask, this);
+ }
+#ifdef _TARGET_XARCH_
+ op1PredictReg = PREDICT_REG_EDX; /* Normally target op1 into EDX */
+#else
+ op1PredictReg = PREDICT_SCRATCH_REG;
+#endif
+
+ /* are we supposed to evaluate op2 first? */
+ if (tree->gtFlags & GTF_REVERSE_OPS)
+ {
+ tmpMask = rpPredictTreeRegUse(op2, op2PredictReg, lockedRegs | op2ExcludeMask,
+ rsvdRegs | op1->gtRsvdRegs);
+ rpPredictTreeRegUse(op1, op1PredictReg, lockedRegs | tmpMask | op1ExcludeMask, RBM_LASTUSE);
+ }
+ else
+ {
+ tmpMask = rpPredictTreeRegUse(op1, op1PredictReg, lockedRegs | op1ExcludeMask,
+ rsvdRegs | op2->gtRsvdRegs);
+ rpPredictTreeRegUse(op2, op2PredictReg, tmpMask | lockedRegs | op2ExcludeMask, RBM_LASTUSE);
+ }
+#ifdef _TARGET_ARM_
+ regMask = tmpMask;
+#endif
+ /* grab EAX, EDX for this tree node */
+ tree->gtUsedRegs = (regMaskSmall)trashedMask | op1->gtUsedRegs | op2->gtUsedRegs;
+
+ goto RETURN_CHECK;
+
+ case GT_LSH:
+ case GT_RSH:
+ case GT_RSZ:
+
+ if (predictReg <= PREDICT_REG)
+ predictReg = PREDICT_SCRATCH_REG;
+
+#ifndef _TARGET_64BIT_
+ if (type == TYP_LONG)
+ {
+ if (op2->IsCnsIntOrI())
+ {
+ regMask = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs);
+ // no register used by op2
+ op2->gtUsedRegs = 0;
+ tree->gtUsedRegs = op1->gtUsedRegs;
+ }
+ else
+ {
+ // since RBM_LNGARG_0 and RBM_SHIFT_LNG are hardwired we can't have them in the locked registers
+ tmpMask = lockedRegs;
+ tmpMask &= ~RBM_LNGARG_0;
+ tmpMask &= ~RBM_SHIFT_LNG;
+
+ // op2 goes to RBM_SHIFT, op1 to the RBM_LNGARG_0 pair
+ if (tree->gtFlags & GTF_REVERSE_OPS)
+ {
+ rpPredictTreeRegUse(op2, PREDICT_REG_SHIFT_LNG, tmpMask, RBM_NONE);
+ tmpMask |= RBM_SHIFT_LNG;
+ // Ensure that the RBM_SHIFT_LNG register interfere with op2's compCurLife
+ // Fix 383843 X86/ARM ILGEN
+ rpRecordRegIntf(RBM_SHIFT_LNG, compCurLife DEBUGARG("SHIFT_LNG arg setup"));
+ rpPredictTreeRegUse(op1, PREDICT_PAIR_LNGARG_0, tmpMask, RBM_LASTUSE);
+ }
+ else
+ {
+ rpPredictTreeRegUse(op1, PREDICT_PAIR_LNGARG_0, tmpMask, RBM_NONE);
+ tmpMask |= RBM_LNGARG_0;
+ // Ensure that the RBM_LNGARG_0 registers interfere with op1's compCurLife
+ // Fix 383839 ARM ILGEN
+ rpRecordRegIntf(RBM_LNGARG_0, compCurLife DEBUGARG("LNGARG_0 arg setup"));
+ rpPredictTreeRegUse(op2, PREDICT_REG_SHIFT_LNG, tmpMask, RBM_LASTUSE);
+ }
+ regMask = RBM_LNGRET; // function return registers
+ op1->gtUsedRegs |= RBM_LNGARG_0;
+ op2->gtUsedRegs |= RBM_SHIFT_LNG;
+
+ tree->gtUsedRegs = op1->gtUsedRegs | op2->gtUsedRegs;
+
+ // We are using a helper function to do shift:
+ //
+ tree->gtUsedRegs |= RBM_CALLEE_TRASH;
+ }
+ }
+ else
+#endif // _TARGET_64BIT_
+ {
+#ifdef _TARGET_XARCH_
+ if (!op2->IsCnsIntOrI())
+ predictReg = PREDICT_NOT_REG_ECX;
+#endif
+
+ HANDLE_SHIFT_COUNT:
+ // Note that this code is also used by assigning shift operators (i.e. GT_ASG_LSH)
+
+ regMaskTP tmpRsvdRegs;
+
+ if ((tree->gtFlags & GTF_REVERSE_OPS) == 0)
+ {
+ regMask = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs | op2->gtRsvdRegs);
+ rsvdRegs = RBM_LASTUSE;
+ tmpRsvdRegs = RBM_NONE;
+ }
+ else
+ {
+ regMask = RBM_NONE;
+ // Special case op1 of a constant
+ if (op1->IsCnsIntOrI())
+ tmpRsvdRegs = RBM_LASTUSE; // Allow a last use to occur in op2; See
+ // System.Xml.Schema.BitSet:Get(int):bool
+ else
+ tmpRsvdRegs = op1->gtRsvdRegs;
+ }
+
+ op2Mask = RBM_NONE;
+ if (!op2->IsCnsIntOrI())
+ {
+ if ((REG_SHIFT != REG_NA) && ((RBM_SHIFT & tmpRsvdRegs) == 0))
+ {
+ op2PredictReg = PREDICT_REG_SHIFT;
+ }
+ else
+ {
+ op2PredictReg = PREDICT_REG;
+ }
+
+ /* evaluate shift count into a register, likely the PREDICT_REG_SHIFT register */
+ op2Mask = rpPredictTreeRegUse(op2, op2PredictReg, lockedRegs | regMask, tmpRsvdRegs);
+
+ // If our target arch has a REG_SHIFT register then
+ // we set the PrefReg when we have a LclVar for op2
+ // we add an interference with REG_SHIFT for any other LclVars alive at op2
+ if (REG_SHIFT != REG_NA)
+ {
+ VARSET_TP VARSET_INIT(this, liveSet, compCurLife);
+
+ while (op2->gtOper == GT_COMMA)
+ {
+ op2 = op2->gtOp.gtOp2;
+ }
+
+ if (op2->gtOper == GT_LCL_VAR)
+ {
+ varDsc = lvaTable + op2->gtLclVarCommon.gtLclNum;
+ varDsc->setPrefReg(REG_SHIFT, this);
+ if (varDsc->lvTracked)
+ {
+ VarSetOps::RemoveElemD(this, liveSet, varDsc->lvVarIndex);
+ }
+ }
+
+ // Ensure that we have a register interference with the LclVar in tree's LiveSet,
+ // excluding the LclVar that was used for the shift amount as it is read-only
+ // and can be kept alive through the shift operation
+ //
+ rpRecordRegIntf(RBM_SHIFT, liveSet DEBUGARG("Variable Shift Register"));
+ // In case op2Mask doesn't contain the required shift register,
+ // we will or it in now.
+ op2Mask |= RBM_SHIFT;
+ }
+ }
+
+ if (tree->gtFlags & GTF_REVERSE_OPS)
+ {
+ assert(regMask == RBM_NONE);
+ regMask = rpPredictTreeRegUse(op1, predictReg, lockedRegs | op2Mask, rsvdRegs | RBM_LASTUSE);
+ }
+
+#if CPU_HAS_BYTE_REGS
+ if (varTypeIsByte(type))
+ {
+ // Fix 383789 X86 ILGEN
+ // Fix 383813 X86 ILGEN
+ // Fix 383828 X86 ILGEN
+ if (op1->gtOper == GT_LCL_VAR)
+ {
+ varDsc = lvaTable + op1->gtLclVar.gtLclNum;
+ if (varDsc->lvTracked)
+ {
+ VARSET_TP VARSET_INIT_NOCOPY(op1VarBit,
+ VarSetOps::MakeSingleton(this, varDsc->lvVarIndex));
+
+ // Ensure that we don't assign a Non-Byteable register for op1's LCL_VAR
+ rpRecordRegIntf(RBM_NON_BYTE_REGS, op1VarBit DEBUGARG("Non Byte Register"));
+ }
+ }
+ if ((regMask & RBM_BYTE_REGS) == 0)
+ {
+ // We need to grab a byte-able register, (i.e. EAX, EDX, ECX, EBX)
+ // and we can't select one that is already reserved (i.e. lockedRegs or regMask)
+ //
+ regMask |=
+ rpPredictRegPick(type, PREDICT_SCRATCH_REG, (lockedRegs | regMask | RBM_NON_BYTE_REGS));
+ }
+ }
+#endif
+ tree->gtUsedRegs = (regMaskSmall)(regMask | op2Mask);
+ }
+
+ goto RETURN_CHECK;
+
+ case GT_COMMA:
+ if (tree->gtFlags & GTF_REVERSE_OPS)
+ {
+ if (predictReg == PREDICT_NONE)
+ {
+ predictReg = PREDICT_REG;
+ }
+ else if (rpHasVarIndexForPredict(predictReg))
+ {
+ /* Don't propagate the use of tgt reg use in a GT_COMMA */
+ predictReg = PREDICT_SCRATCH_REG;
+ }
+
+ regMask = rpPredictTreeRegUse(op2, predictReg, lockedRegs, rsvdRegs);
+ rpPredictTreeRegUse(op1, PREDICT_NONE, lockedRegs | regMask, RBM_LASTUSE);
+ }
+ else
+ {
+ rpPredictTreeRegUse(op1, PREDICT_NONE, lockedRegs, RBM_LASTUSE);
+
+ /* CodeGen will enregister the op2 side of a GT_COMMA */
+ if (predictReg == PREDICT_NONE)
+ {
+ predictReg = PREDICT_REG;
+ }
+ else if (rpHasVarIndexForPredict(predictReg))
+ {
+ /* Don't propagate the use of tgt reg use in a GT_COMMA */
+ predictReg = PREDICT_SCRATCH_REG;
+ }
+
+ regMask = rpPredictTreeRegUse(op2, predictReg, lockedRegs, rsvdRegs);
+ }
+ // tree should only accumulate the used registers from the op2 side of the GT_COMMA
+ //
+ tree->gtUsedRegs = op2->gtUsedRegs;
+ if ((op2->gtOper == GT_LCL_VAR) && (rsvdRegs != 0))
+ {
+ LclVarDsc* op2VarDsc = lvaTable + op2->gtLclVarCommon.gtLclNum;
+
+ if (op2VarDsc->lvTracked)
+ {
+ VARSET_TP VARSET_INIT_NOCOPY(op2VarBit, VarSetOps::MakeSingleton(this, op2VarDsc->lvVarIndex));
+ rpRecordRegIntf(rsvdRegs, op2VarBit DEBUGARG("comma use"));
+ }
+ }
+ goto RETURN_CHECK;
+
+ case GT_QMARK:
+ {
+ noway_assert(op1 != NULL && op2 != NULL);
+
+ /*
+ * If the gtUsedRegs conflicts with lockedRegs
+ * then we going to have to spill some registers
+ * into the non-trashed register set to keep it alive
+ */
+ unsigned spillCnt;
+ spillCnt = 0;
+ regMaskTP spillRegs;
+ spillRegs = lockedRegs & tree->gtUsedRegs;
+
+ while (spillRegs)
+ {
+ /* Find the next register that needs to be spilled */
+ tmpMask = genFindLowestBit(spillRegs);
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Predict spill of %s before: ", getRegName(genRegNumFromMask(tmpMask)));
+ gtDispTree(tree, 0, NULL, true);
+ }
+#endif
+ /* In Codegen it will typically introduce a spill temp here */
+ /* rather than relocating the register to a non trashed reg */
+ rpPredictSpillCnt++;
+ spillCnt++;
+
+ /* Remove it from the spillRegs and lockedRegs*/
+ spillRegs &= ~tmpMask;
+ lockedRegs &= ~tmpMask;
+ }
+ {
+ VARSET_TP VARSET_INIT(this, startQmarkCondUseInPlaceVars, rpUseInPlace);
+
+ /* Evaluate the <cond> subtree */
+ rpPredictTreeRegUse(op1, PREDICT_NONE, lockedRegs, RBM_LASTUSE);
+ VarSetOps::Assign(this, rpUseInPlace, startQmarkCondUseInPlaceVars);
+ tree->gtUsedRegs = op1->gtUsedRegs;
+
+ noway_assert(op2->gtOper == GT_COLON);
+ if (rpHasVarIndexForPredict(predictReg) && ((op2->gtFlags & (GTF_ASG | GTF_CALL)) != 0))
+ {
+ // Don't try to target the register specified in predictReg when we have complex subtrees
+ //
+ predictReg = PREDICT_SCRATCH_REG;
+ }
+ GenTreePtr elseTree = op2->AsColon()->ElseNode();
+ GenTreePtr thenTree = op2->AsColon()->ThenNode();
+
+ noway_assert(thenTree != NULL && elseTree != NULL);
+
+ // Update compCurLife to only those vars live on the <then> subtree
+
+ VarSetOps::Assign(this, compCurLife, tree->gtQmark.gtThenLiveSet);
+
+ if (type == TYP_VOID)
+ {
+ /* Evaluate the <then> subtree */
+ rpPredictTreeRegUse(thenTree, PREDICT_NONE, lockedRegs, RBM_LASTUSE);
+ regMask = RBM_NONE;
+ predictReg = PREDICT_NONE;
+ }
+ else
+ {
+ // A mask to use to force the predictor to choose low registers (to reduce code size)
+ regMaskTP avoidRegs = RBM_NONE;
+#ifdef _TARGET_ARM_
+ avoidRegs = (RBM_R12 | RBM_LR);
+#endif
+ if (predictReg <= PREDICT_REG)
+ predictReg = PREDICT_SCRATCH_REG;
+
+ /* Evaluate the <then> subtree */
+ regMask =
+ rpPredictTreeRegUse(thenTree, predictReg, lockedRegs, rsvdRegs | avoidRegs | RBM_LASTUSE);
+
+ if (regMask)
+ {
+ rpPredictReg op1PredictReg = rpGetPredictForMask(regMask);
+ if (op1PredictReg != PREDICT_NONE)
+ predictReg = op1PredictReg;
+ }
+ }
+
+ VarSetOps::Assign(this, rpUseInPlace, startQmarkCondUseInPlaceVars);
+
+ /* Evaluate the <else> subtree */
+ // First record the post-then liveness, and reset the current liveness to the else
+ // branch liveness.
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef DEBUG
+ VARSET_TP VARSET_INIT(this, postThenLive, compCurLife);
+#endif
+
+ VarSetOps::Assign(this, compCurLife, tree->gtQmark.gtElseLiveSet);
+
+ rpPredictTreeRegUse(elseTree, predictReg, lockedRegs, rsvdRegs | RBM_LASTUSE);
+ tree->gtUsedRegs |= thenTree->gtUsedRegs | elseTree->gtUsedRegs;
+
+ // The then and the else are "virtual basic blocks" that form a control-flow diamond.
+ // They each have only one successor, which they share. Their live-out sets must equal the
+ // live-in set of this virtual successor block, and thus must be the same. We can assert
+ // that equality here.
+ assert(VarSetOps::Equal(this, compCurLife, postThenLive));
+
+ if (spillCnt > 0)
+ {
+ regMaskTP reloadMask = RBM_NONE;
+
+ while (spillCnt)
+ {
+ regMaskTP reloadReg;
+
+ /* Get an extra register to hold it */
+ reloadReg = rpPredictRegPick(TYP_INT, PREDICT_REG, lockedRegs | regMask | reloadMask);
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Predict reload into %s after : ", getRegName(genRegNumFromMask(reloadReg)));
+ gtDispTree(tree, 0, NULL, true);
+ }
+#endif
+ reloadMask |= reloadReg;
+
+ spillCnt--;
+ }
+
+ /* update the gtUsedRegs mask */
+ tree->gtUsedRegs |= reloadMask;
+ }
+ }
+
+ goto RETURN_CHECK;
+ }
+ case GT_RETURN:
+ tree->gtUsedRegs = RBM_NONE;
+ regMask = RBM_NONE;
+
+ /* Is there a return value? */
+ if (op1 != NULL)
+ {
+#if FEATURE_FP_REGALLOC
+ if (varTypeIsFloating(type))
+ {
+ predictReg = PREDICT_FLTRET;
+ if (type == TYP_FLOAT)
+ regMask = RBM_FLOATRET;
+ else
+ regMask = RBM_DOUBLERET;
+ }
+ else
+#endif
+ if (isRegPairType(type))
+ {
+ predictReg = PREDICT_LNGRET;
+ regMask = RBM_LNGRET;
+ }
+ else
+ {
+ predictReg = PREDICT_INTRET;
+ regMask = RBM_INTRET;
+ }
+ if (info.compCallUnmanaged)
+ {
+ lockedRegs |= (RBM_PINVOKE_TCB | RBM_PINVOKE_FRAME);
+ }
+ rpPredictTreeRegUse(op1, predictReg, lockedRegs, RBM_LASTUSE);
+ tree->gtUsedRegs = op1->gtUsedRegs | (regMaskSmall)regMask;
+ }
+
+#if defined(_TARGET_ARM_) && defined(PROFILING_SUPPORTED)
+ // When on Arm under profiler, to emit Leave callback we would need RBM_PROFILER_RETURN_USED.
+ // We could optimize on registers based on int/long or no return value. But to
+ // keep it simple we will mark entire RBM_PROFILER_RETURN_USED as used regs here.
+ if (compIsProfilerHookNeeded())
+ {
+ tree->gtUsedRegs |= RBM_PROFILER_RET_USED;
+ }
+
+#endif
+ goto RETURN_CHECK;
+
+ case GT_RETFILT:
+ if (op1 != NULL)
+ {
+ rpPredictTreeRegUse(op1, PREDICT_NONE, lockedRegs, RBM_LASTUSE);
+ regMask = genReturnRegForTree(tree);
+ tree->gtUsedRegs = op1->gtUsedRegs | (regMaskSmall)regMask;
+ goto RETURN_CHECK;
+ }
+ tree->gtUsedRegs = 0;
+ regMask = 0;
+
+ goto RETURN_CHECK;
+
+ case GT_JTRUE:
+ /* This must be a test of a relational operator */
+
+ noway_assert(op1->OperIsCompare());
+
+ /* Only condition code set by this operation */
+
+ rpPredictTreeRegUse(op1, PREDICT_NONE, lockedRegs, RBM_NONE);
+
+ tree->gtUsedRegs = op1->gtUsedRegs;
+ regMask = 0;
+
+ goto RETURN_CHECK;
+
+ case GT_SWITCH:
+ noway_assert(type <= TYP_INT);
+ noway_assert(compCurBB->bbJumpKind == BBJ_SWITCH);
+#ifdef _TARGET_ARM_
+ {
+ regMask = rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs, RBM_NONE);
+ unsigned jumpCnt = compCurBB->bbJumpSwt->bbsCount;
+ if (jumpCnt > 2)
+ {
+ // Table based switch requires an extra register for the table base
+ regMask |= rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | regMask);
+ }
+ tree->gtUsedRegs = op1->gtUsedRegs | regMask;
+ }
+#else // !_TARGET_ARM_
+ rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs, RBM_NONE);
+ tree->gtUsedRegs = op1->gtUsedRegs;
+#endif // _TARGET_ARM_
+ regMask = 0;
+ goto RETURN_CHECK;
+
+ case GT_CKFINITE:
+ if (predictReg <= PREDICT_REG)
+ predictReg = PREDICT_SCRATCH_REG;
+
+ rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs);
+ // Need a reg to load exponent into
+ regMask = rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | rsvdRegs);
+ tree->gtUsedRegs = (regMaskSmall)regMask | op1->gtUsedRegs;
+ goto RETURN_CHECK;
+
+ case GT_LCLHEAP:
+ regMask = rpPredictTreeRegUse(op1, PREDICT_SCRATCH_REG, lockedRegs, rsvdRegs);
+ op2Mask = 0;
+
+#ifdef _TARGET_ARM_
+ if (info.compInitMem)
+ {
+ // We zero out two registers in the ARM codegen path
+ op2Mask |=
+ rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | rsvdRegs | regMask | op2Mask);
+ }
+#endif
+
+ op1->gtUsedRegs |= (regMaskSmall)regMask;
+ tree->gtUsedRegs = op1->gtUsedRegs | (regMaskSmall)op2Mask;
+
+ // The result will be put in the reg we picked for the size
+ // regMask = <already set as we want it to be>
+
+ goto RETURN_CHECK;
+
+ case GT_OBJ:
+ {
+#ifdef _TARGET_ARM_
+ if (predictReg <= PREDICT_REG)
+ predictReg = PREDICT_SCRATCH_REG;
+
+ regMaskTP avoidRegs = (RBM_R12 | RBM_LR); // A mask to use to force the predictor to choose low
+ // registers (to reduce code size)
+ regMask = RBM_NONE;
+ tmpMask = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs | avoidRegs);
+#endif
+
+ if (fgIsIndirOfAddrOfLocal(tree) != NULL)
+ {
+ compUpdateLifeVar</*ForCodeGen*/ false>(tree);
+ }
+
+#ifdef _TARGET_ARM_
+ unsigned objSize = info.compCompHnd->getClassSize(tree->gtObj.gtClass);
+ regMaskTP preferReg = rpPredictRegMask(predictReg, TYP_I_IMPL);
+ // If it has one bit set, and that's an arg reg...
+ if (preferReg != RBM_NONE && genMaxOneBit(preferReg) && ((preferReg & RBM_ARG_REGS) != 0))
+ {
+ // We are passing the 'obj' in the argument registers
+ //
+ regNumber rn = genRegNumFromMask(preferReg);
+
+ // Add the registers used to pass the 'obj' to regMask.
+ for (unsigned i = 0; i < objSize / 4; i++)
+ {
+ if (rn == MAX_REG_ARG)
+ break;
+ // Otherwise...
+ regMask |= genRegMask(rn);
+ rn = genRegArgNext(rn);
+ }
+ }
+ else
+ {
+ // We are passing the 'obj' in the outgoing arg space
+ // We will need one register to load into unless the 'obj' size is 4 or less.
+ //
+ if (objSize > 4)
+ {
+ regMask = rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | tmpMask | avoidRegs);
+ }
+ }
+ tree->gtUsedRegs = (regMaskSmall)(regMask | tmpMask);
+ goto RETURN_CHECK;
+#else // !_TARGET_ARM
+ goto GENERIC_UNARY;
+#endif // _TARGET_ARM_
+ }
+
+ case GT_MKREFANY:
+ {
+#ifdef _TARGET_ARM_
+ regMaskTP preferReg = rpPredictRegMask(predictReg, TYP_I_IMPL);
+ regMask = RBM_NONE;
+ if ((((preferReg - 1) & preferReg) == 0) && ((preferReg & RBM_ARG_REGS) != 0))
+ {
+ // A MKREFANY takes up two registers.
+ regNumber rn = genRegNumFromMask(preferReg);
+ regMask = RBM_NONE;
+ if (rn < MAX_REG_ARG)
+ {
+ regMask |= genRegMask(rn);
+ rn = genRegArgNext(rn);
+ if (rn < MAX_REG_ARG)
+ regMask |= genRegMask(rn);
+ }
+ }
+ if (regMask != RBM_NONE)
+ {
+ // Condensation of GENERIC_BINARY path.
+ assert((tree->gtFlags & GTF_REVERSE_OPS) == 0);
+ op2PredictReg = PREDICT_REG;
+ regMaskTP regMaskOp1 = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs | op2->gtRsvdRegs);
+ rpPredictTreeRegUse(op2, op2PredictReg, lockedRegs | regMaskOp1, RBM_LASTUSE);
+ regMask |= op1->gtUsedRegs | op2->gtUsedRegs;
+ tree->gtUsedRegs = (regMaskSmall)regMask;
+ goto RETURN_CHECK;
+ }
+ tree->gtUsedRegs = op1->gtUsedRegs;
+#endif // _TARGET_ARM_
+ goto GENERIC_BINARY;
+ }
+
+ case GT_BOX:
+ goto GENERIC_UNARY;
+
+ case GT_LOCKADD:
+ goto GENERIC_BINARY;
+
+ case GT_XADD:
+ case GT_XCHG:
+ // Ensure we can write to op2. op2 will hold the output.
+ if (predictReg < PREDICT_SCRATCH_REG)
+ predictReg = PREDICT_SCRATCH_REG;
+
+ if (tree->gtFlags & GTF_REVERSE_OPS)
+ {
+ op2Mask = rpPredictTreeRegUse(op2, predictReg, lockedRegs, rsvdRegs);
+ regMask = rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs, rsvdRegs | op2Mask);
+ }
+ else
+ {
+ regMask = rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs, rsvdRegs);
+ op2Mask = rpPredictTreeRegUse(op2, PREDICT_SCRATCH_REG, lockedRegs, rsvdRegs | regMask);
+ }
+ tree->gtUsedRegs = (regMaskSmall)(regMask | op2Mask);
+ goto RETURN_CHECK;
+
+ case GT_ARR_LENGTH:
+ goto GENERIC_UNARY;
+
+ default:
+#ifdef DEBUG
+ gtDispTree(tree);
+#endif
+ noway_assert(!"unexpected simple operator in reg use prediction");
+ break;
+ }
+ }
+
+ /* See what kind of a special operator we have here */
+
+ switch (oper)
+ {
+ GenTreePtr args;
+ GenTreeArgList* list;
+ regMaskTP keepMask;
+ unsigned regArgsNum;
+ int regIndex;
+ regMaskTP regArgMask;
+ regMaskTP curArgMask;
+
+ case GT_CALL:
+
+ {
+
+ /* initialize so we can just or in various bits */
+ tree->gtUsedRegs = RBM_NONE;
+
+#if GTF_CALL_REG_SAVE
+ /*
+ * Unless the GTF_CALL_REG_SAVE flag is set,
+ * we can't preserve the RBM_CALLEE_TRASH registers.
+ * (likewise we can't preserve the return registers)
+ * So we remove them from the lockedRegs set and
+ * record any of them in the keepMask
+ */
+
+ if (tree->gtFlags & GTF_CALL_REG_SAVE)
+ {
+ regMaskTP trashMask = genReturnRegForTree(tree);
+
+ keepMask = lockedRegs & trashMask;
+ lockedRegs &= ~trashMask;
+ }
+ else
+#endif
+ {
+ keepMask = lockedRegs & RBM_CALLEE_TRASH;
+ lockedRegs &= ~RBM_CALLEE_TRASH;
+ }
+
+ regArgsNum = 0;
+ regIndex = 0;
+
+ /* Is there an object pointer? */
+ if (tree->gtCall.gtCallObjp)
+ {
+ /* Evaluate the instance pointer first */
+
+ args = tree->gtCall.gtCallObjp;
+
+ /* the objPtr always goes to an integer register (through temp or directly) */
+ noway_assert(regArgsNum == 0);
+ regArgsNum++;
+
+ /* Must be passed in a register */
+
+ noway_assert(args->gtFlags & GTF_LATE_ARG);
+
+ /* Must be either a deferred reg arg node or a GT_ASG node */
+
+ noway_assert(args->IsArgPlaceHolderNode() || args->IsNothingNode() || (args->gtOper == GT_ASG) ||
+ args->OperIsCopyBlkOp() || (args->gtOper == GT_COMMA));
+
+ if (!args->IsArgPlaceHolderNode())
+ {
+ rpPredictTreeRegUse(args, PREDICT_NONE, lockedRegs, RBM_LASTUSE);
+ }
+ }
+ VARSET_TP VARSET_INIT_NOCOPY(startArgUseInPlaceVars, VarSetOps::UninitVal());
+ VarSetOps::Assign(this, startArgUseInPlaceVars, rpUseInPlace);
+
+ /* process argument list */
+ for (list = tree->gtCall.gtCallArgs; list; list = list->Rest())
+ {
+ args = list->Current();
+
+ if (args->gtFlags & GTF_LATE_ARG)
+ {
+ /* Must be either a Placeholder/NOP node or a GT_ASG node */
+
+ noway_assert(args->IsArgPlaceHolderNode() || args->IsNothingNode() || (args->gtOper == GT_ASG) ||
+ args->OperIsCopyBlkOp() || (args->gtOper == GT_COMMA));
+
+ if (!args->IsArgPlaceHolderNode())
+ {
+ rpPredictTreeRegUse(args, PREDICT_NONE, lockedRegs, RBM_LASTUSE);
+ }
+
+ regArgsNum++;
+ }
+ else
+ {
+#ifdef FEATURE_FIXED_OUT_ARGS
+ // We'll store this argument into the outgoing argument area
+ // It needs to be in a register to be stored.
+ //
+ predictReg = PREDICT_REG;
+
+#else // !FEATURE_FIXED_OUT_ARGS
+ // We'll generate a push for this argument
+ //
+ predictReg = PREDICT_NONE;
+ if (varTypeIsSmall(args->TypeGet()))
+ {
+ /* We may need to sign or zero extend a small type using a register */
+ predictReg = PREDICT_SCRATCH_REG;
+ }
+#endif
+
+ rpPredictTreeRegUse(args, predictReg, lockedRegs, RBM_LASTUSE);
+ }
+ VarSetOps::Assign(this, rpUseInPlace, startArgUseInPlaceVars);
+ tree->gtUsedRegs |= args->gtUsedRegs;
+ }
+
+ /* Is there a late argument list */
+
+ regIndex = 0;
+ regArgMask = RBM_NONE; // Set of argument registers that have already been setup.
+ args = NULL;
+
+ /* process the late argument list */
+ for (list = tree->gtCall.gtCallLateArgs; list; regIndex++)
+ {
+ // If the current argument being copied is a promoted struct local, set this pointer to its description.
+ LclVarDsc* promotedStructLocal = NULL;
+
+ curArgMask = RBM_NONE; // Set of argument registers that are going to be setup by this arg
+ tmpMask = RBM_NONE; // Set of additional temp registers that are need only to setup the current arg
+
+ assert(list->IsList());
+
+ args = list->Current();
+ list = list->Rest();
+
+ assert(!args->IsArgPlaceHolderNode()); // No place holders nodes are in gtCallLateArgs;
+
+ fgArgTabEntryPtr curArgTabEntry = gtArgEntryByNode(tree, args);
+ assert(curArgTabEntry);
+
+ regNumber regNum = curArgTabEntry->regNum; // first register use to pass this argument
+ unsigned numSlots =
+ curArgTabEntry->numSlots; // number of outgoing arg stack slots used by this argument
+
+ rpPredictReg argPredictReg;
+ regMaskTP avoidReg = RBM_NONE;
+
+ if (regNum != REG_STK)
+ {
+ argPredictReg = rpGetPredictForReg(regNum);
+ curArgMask |= genRegMask(regNum);
+ }
+ else
+ {
+ assert(numSlots > 0);
+ argPredictReg = PREDICT_NONE;
+#ifdef _TARGET_ARM_
+ // Force the predictor to choose a low register when regNum is REG_STK to reduce code bloat
+ avoidReg = (RBM_R12 | RBM_LR);
+#endif
+ }
+
+#ifdef _TARGET_ARM_
+ // For TYP_LONG or TYP_DOUBLE register arguments we need to add the second argument register
+ //
+ if ((regNum != REG_STK) && ((args->TypeGet() == TYP_LONG) || (args->TypeGet() == TYP_DOUBLE)))
+ {
+ // 64-bit longs and doubles require 2 consecutive argument registers
+ curArgMask |= genRegMask(REG_NEXT(regNum));
+ }
+ else if (args->TypeGet() == TYP_STRUCT)
+ {
+ GenTreePtr argx = args;
+ GenTreePtr lclVarTree = NULL;
+
+ /* The GT_OBJ may be be a child of a GT_COMMA */
+ while (argx->gtOper == GT_COMMA)
+ {
+ argx = argx->gtOp.gtOp2;
+ }
+ unsigned originalSize = 0;
+
+ if (argx->gtOper == GT_OBJ)
+ {
+ originalSize = info.compCompHnd->getClassSize(argx->gtObj.gtClass);
+
+ // Is it the address of a promoted struct local?
+ if (argx->gtObj.gtOp1->gtOper == GT_ADDR && argx->gtObj.gtOp1->gtOp.gtOp1->gtOper == GT_LCL_VAR)
+ {
+ lclVarTree = argx->gtObj.gtOp1->gtOp.gtOp1;
+ LclVarDsc* varDsc = &lvaTable[lclVarTree->gtLclVarCommon.gtLclNum];
+ if (varDsc->lvPromoted)
+ promotedStructLocal = varDsc;
+ }
+ }
+ else if (argx->gtOper == GT_LCL_VAR)
+ {
+ varDsc = lvaTable + argx->gtLclVarCommon.gtLclNum;
+ originalSize = varDsc->lvSize();
+
+ // Is it a promoted struct local?
+ if (varDsc->lvPromoted)
+ promotedStructLocal = varDsc;
+ }
+ else if (argx->gtOper == GT_MKREFANY)
+ {
+ originalSize = 2 * TARGET_POINTER_SIZE;
+ }
+ else
+ {
+ noway_assert(!"Can't predict unsupported TYP_STRUCT arg kind");
+ }
+
+ // We only pass arguments differently if it a struct local "independently" promoted, which
+ // allows the field locals can be independently enregistered.
+ if (promotedStructLocal != NULL)
+ {
+ if (lvaGetPromotionType(promotedStructLocal) != PROMOTION_TYPE_INDEPENDENT)
+ promotedStructLocal = NULL;
+ }
+
+ unsigned slots = ((unsigned)(roundUp(originalSize, TARGET_POINTER_SIZE))) / REGSIZE_BYTES;
+
+ // Are we passing a TYP_STRUCT in multiple integer registers?
+ // if so set up curArgMask to reflect this
+ // Also slots is updated to reflect the number of outgoing arg slots that we will write
+ if (regNum != REG_STK)
+ {
+ regNumber regLast = (curArgTabEntry->isHfaRegArg) ? LAST_FP_ARGREG : REG_ARG_LAST;
+ assert(genIsValidReg(regNum));
+ regNumber nextReg = REG_NEXT(regNum);
+ slots--;
+ while (slots > 0 && nextReg <= regLast)
+ {
+ curArgMask |= genRegMask(nextReg);
+ nextReg = REG_NEXT(nextReg);
+ slots--;
+ }
+ }
+
+ if ((promotedStructLocal != NULL) && (curArgMask != RBM_NONE))
+ {
+ // All or a portion of this struct will be placed in the argument registers indicated by
+ // "curArgMask". We build in knowledge of the order in which the code is generated here, so
+ // that the second arg to be evaluated interferes with the reg for the first, the third with
+ // the regs for the first and second, etc. But since we always place the stack slots before
+ // placing the register slots we do not add inteferences for any part of the struct that gets
+ // passed on the stack.
+
+ argPredictReg =
+ PREDICT_NONE; // We will target the indivual fields into registers but not the whole struct
+ regMaskTP prevArgMask = RBM_NONE;
+ for (unsigned i = 0; i < promotedStructLocal->lvFieldCnt; i++)
+ {
+ LclVarDsc* fieldVarDsc = &lvaTable[promotedStructLocal->lvFieldLclStart + i];
+ if (fieldVarDsc->lvTracked)
+ {
+ assert(lclVarTree != NULL);
+ if (prevArgMask != RBM_NONE)
+ {
+ rpRecordRegIntf(prevArgMask, VarSetOps::MakeSingleton(this, fieldVarDsc->lvVarIndex)
+ DEBUGARG("fieldVar/argReg"));
+ }
+ }
+ // Now see many registers this uses up.
+ unsigned firstRegOffset = fieldVarDsc->lvFldOffset / TARGET_POINTER_SIZE;
+ unsigned nextAfterLastRegOffset =
+ (fieldVarDsc->lvFldOffset + fieldVarDsc->lvExactSize + TARGET_POINTER_SIZE - 1) /
+ TARGET_POINTER_SIZE;
+ unsigned nextAfterLastArgRegOffset =
+ min(nextAfterLastRegOffset,
+ genIsValidIntReg(regNum) ? REG_NEXT(REG_ARG_LAST) : REG_NEXT(LAST_FP_ARGREG));
+
+ for (unsigned regOffset = firstRegOffset; regOffset < nextAfterLastArgRegOffset;
+ regOffset++)
+ {
+ prevArgMask |= genRegMask(regNumber(regNum + regOffset));
+ }
+
+ if (nextAfterLastRegOffset > nextAfterLastArgRegOffset)
+ {
+ break;
+ }
+
+ if ((fieldVarDsc->lvFldOffset % TARGET_POINTER_SIZE) == 0)
+ {
+ // Add the argument register used here as a preferred register for this fieldVarDsc
+ //
+ regNumber firstRegUsed = regNumber(regNum + firstRegOffset);
+ fieldVarDsc->setPrefReg(firstRegUsed, this);
+ }
+ }
+ compUpdateLifeVar</*ForCodeGen*/ false>(argx);
+ }
+
+ // If slots is greater than zero then part or all of this TYP_STRUCT
+ // argument is passed in the outgoing argument area. (except HFA arg)
+ //
+ if ((slots > 0) && !curArgTabEntry->isHfaRegArg)
+ {
+ // We will need a register to address the TYP_STRUCT
+ // Note that we can use an argument register in curArgMask as in
+ // codegen we pass the stack portion of the argument before we
+ // setup the register part.
+ //
+
+ // Force the predictor to choose a LOW_REG here to reduce code bloat
+ avoidReg = (RBM_R12 | RBM_LR);
+
+ assert(tmpMask == RBM_NONE);
+ tmpMask = rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | regArgMask | avoidReg);
+
+ // If slots > 1 then we will need a second register to perform the load/store into the outgoing
+ // arg area
+ if (slots > 1)
+ {
+ tmpMask |= rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG,
+ lockedRegs | regArgMask | tmpMask | avoidReg);
+ }
+ }
+ } // (args->TypeGet() == TYP_STRUCT)
+#endif // _TARGET_ARM_
+
+ // If we have a promotedStructLocal we don't need to call rpPredictTreeRegUse(args, ...
+ // as we have already calculated the correct tmpMask and curArgMask values and
+ // by calling rpPredictTreeRegUse we would just add unnecessary register inteferences.
+ //
+ if (promotedStructLocal == NULL)
+ {
+ /* Target the appropriate argument register */
+ tmpMask |= rpPredictTreeRegUse(args, argPredictReg, lockedRegs | regArgMask, RBM_LASTUSE);
+ }
+
+ // We mark OBJ(ADDR(LOCAL)) with GTF_VAR_DEATH since the local is required to live
+ // for the duration of the OBJ.
+ if (args->OperGet() == GT_OBJ && (args->gtFlags & GTF_VAR_DEATH))
+ {
+ GenTreePtr lclVarTree = fgIsIndirOfAddrOfLocal(args);
+ assert(lclVarTree != NULL); // Or else would not be marked with GTF_VAR_DEATH.
+ compUpdateLifeVar</*ForCodeGen*/ false>(lclVarTree);
+ }
+
+ regArgMask |= curArgMask;
+ args->gtUsedRegs |= (tmpMask | regArgMask);
+ tree->gtUsedRegs |= args->gtUsedRegs;
+ tree->gtCall.gtCallLateArgs->gtUsedRegs |= args->gtUsedRegs;
+
+ if (args->gtUsedRegs != RBM_NONE)
+ {
+ // Add register interference with the set of registers used or in use when we evaluated
+ // the current arg, with whatever is alive after the current arg
+ //
+ rpRecordRegIntf(args->gtUsedRegs, compCurLife DEBUGARG("register arg setup"));
+ }
+ VarSetOps::Assign(this, rpUseInPlace, startArgUseInPlaceVars);
+ }
+ assert(list == NULL);
+
+ regMaskTP callAddrMask;
+ callAddrMask = RBM_NONE;
+#if CPU_LOAD_STORE_ARCH
+ predictReg = PREDICT_SCRATCH_REG;
+#else
+ predictReg = PREDICT_NONE;
+#endif
+
+ switch (tree->gtFlags & GTF_CALL_VIRT_KIND_MASK)
+ {
+ case GTF_CALL_VIRT_STUB:
+
+ // We only want to record an interference between the virtual stub
+ // param reg and anything that's live AFTER the call, but we've not
+ // yet processed the indirect target. So add RBM_VIRTUAL_STUB_PARAM
+ // to interferingRegs.
+ interferingRegs |= RBM_VIRTUAL_STUB_PARAM;
+#ifdef DEBUG
+ if (verbose)
+ printf("Adding interference with Virtual Stub Param\n");
+#endif
+ codeGen->regSet.rsSetRegsModified(RBM_VIRTUAL_STUB_PARAM);
+
+ if (tree->gtCall.gtCallType == CT_INDIRECT)
+ {
+ predictReg = PREDICT_REG_VIRTUAL_STUB_PARAM;
+ }
+ break;
+
+ case GTF_CALL_VIRT_VTABLE:
+ predictReg = PREDICT_SCRATCH_REG;
+ break;
+
+ case GTF_CALL_NONVIRT:
+ predictReg = PREDICT_SCRATCH_REG;
+ break;
+ }
+
+ if (tree->gtCall.gtCallType == CT_INDIRECT)
+ {
+#if defined(_TARGET_ARM_) || defined(_TARGET_AMD64_)
+ if (tree->gtCall.gtCallCookie)
+ {
+ codeGen->regSet.rsSetRegsModified(RBM_PINVOKE_COOKIE_PARAM | RBM_PINVOKE_TARGET_PARAM);
+
+ callAddrMask |= rpPredictTreeRegUse(tree->gtCall.gtCallCookie, PREDICT_REG_PINVOKE_COOKIE_PARAM,
+ lockedRegs | regArgMask, RBM_LASTUSE);
+
+ // Just in case we predict some other registers, force interference with our two special
+ // parameters: PINVOKE_COOKIE_PARAM & PINVOKE_TARGET_PARAM
+ callAddrMask |= (RBM_PINVOKE_COOKIE_PARAM | RBM_PINVOKE_TARGET_PARAM);
+
+ predictReg = PREDICT_REG_PINVOKE_TARGET_PARAM;
+ }
+#endif
+ callAddrMask |=
+ rpPredictTreeRegUse(tree->gtCall.gtCallAddr, predictReg, lockedRegs | regArgMask, RBM_LASTUSE);
+ }
+ else if (predictReg != PREDICT_NONE)
+ {
+ callAddrMask |= rpPredictRegPick(TYP_I_IMPL, predictReg, lockedRegs | regArgMask);
+ }
+
+ if (tree->gtFlags & GTF_CALL_UNMANAGED)
+ {
+ // Need a register for tcbReg
+ callAddrMask |=
+ rpPredictRegPick(TYP_I_IMPL, PREDICT_SCRATCH_REG, lockedRegs | regArgMask | callAddrMask);
+#if CPU_LOAD_STORE_ARCH
+ // Need an extra register for tmpReg
+ callAddrMask |=
+ rpPredictRegPick(TYP_I_IMPL, PREDICT_SCRATCH_REG, lockedRegs | regArgMask | callAddrMask);
+#endif
+ }
+
+ tree->gtUsedRegs |= callAddrMask;
+
+ /* After the call restore the orginal value of lockedRegs */
+ lockedRegs |= keepMask;
+
+ /* set the return register */
+ regMask = genReturnRegForTree(tree);
+
+ if (regMask & rsvdRegs)
+ {
+ // We will need to relocate the return register value
+ regMaskTP intRegMask = (regMask & RBM_ALLINT);
+#if FEATURE_FP_REGALLOC
+ regMaskTP floatRegMask = (regMask & RBM_ALLFLOAT);
+#endif
+ regMask = RBM_NONE;
+
+ if (intRegMask)
+ {
+ if (intRegMask == RBM_INTRET)
+ {
+ regMask |= rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, rsvdRegs | regMask);
+ }
+ else if (intRegMask == RBM_LNGRET)
+ {
+ regMask |= rpPredictRegPick(TYP_LONG, PREDICT_SCRATCH_REG, rsvdRegs | regMask);
+ }
+ else
+ {
+ noway_assert(!"unexpected return regMask");
+ }
+ }
+
+#if FEATURE_FP_REGALLOC
+ if (floatRegMask)
+ {
+ if (floatRegMask == RBM_FLOATRET)
+ {
+ regMask |= rpPredictRegPick(TYP_FLOAT, PREDICT_SCRATCH_REG, rsvdRegs | regMask);
+ }
+ else if (floatRegMask == RBM_DOUBLERET)
+ {
+ regMask |= rpPredictRegPick(TYP_DOUBLE, PREDICT_SCRATCH_REG, rsvdRegs | regMask);
+ }
+ else // HFA return case
+ {
+ for (unsigned f = 0; f < genCountBits(floatRegMask); f++)
+ {
+ regMask |= rpPredictRegPick(TYP_FLOAT, PREDICT_SCRATCH_REG, rsvdRegs | regMask);
+ }
+ }
+ }
+#endif
+ }
+
+ /* the return registers (if any) are killed */
+ tree->gtUsedRegs |= regMask;
+
+#if GTF_CALL_REG_SAVE
+ if (!(tree->gtFlags & GTF_CALL_REG_SAVE))
+#endif
+ {
+ /* the RBM_CALLEE_TRASH set are killed (i.e. EAX,ECX,EDX) */
+ tree->gtUsedRegs |= RBM_CALLEE_TRASH;
+ }
+ }
+
+#if defined(_TARGET_ARM_) && defined(PROFILING_SUPPORTED)
+ // Mark required registers for emitting tailcall profiler callback as used
+ if (compIsProfilerHookNeeded() && tree->gtCall.IsTailCall() && (tree->gtCall.gtCallType == CT_USER_FUNC))
+ {
+ tree->gtUsedRegs |= RBM_PROFILER_TAIL_USED;
+ }
+#endif
+ break;
+
+ case GT_ARR_ELEM:
+
+ // Figure out which registers can't be touched
+ unsigned dim;
+ for (dim = 0; dim < tree->gtArrElem.gtArrRank; dim++)
+ rsvdRegs |= tree->gtArrElem.gtArrInds[dim]->gtRsvdRegs;
+
+ regMask = rpPredictTreeRegUse(tree->gtArrElem.gtArrObj, PREDICT_REG, lockedRegs, rsvdRegs);
+
+ regMaskTP dimsMask;
+ dimsMask = 0;
+
+#if CPU_LOAD_STORE_ARCH
+ // We need a register to load the bounds of the MD array
+ regMask |= rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | regMask);
+#endif
+
+ for (dim = 0; dim < tree->gtArrElem.gtArrRank; dim++)
+ {
+ /* We need scratch registers to compute index-lower_bound.
+ Also, gtArrInds[0]'s register will be used as the second
+ addressability register (besides gtArrObj's) */
+
+ regMaskTP dimMask = rpPredictTreeRegUse(tree->gtArrElem.gtArrInds[dim], PREDICT_SCRATCH_REG,
+ lockedRegs | regMask | dimsMask, rsvdRegs);
+ if (dim == 0)
+ regMask |= dimMask;
+
+ dimsMask |= dimMask;
+ }
+#ifdef _TARGET_XARCH_
+ // INS_imul doesnt have an immediate constant.
+ if (!jitIsScaleIndexMul(tree->gtArrElem.gtArrElemSize))
+ regMask |= rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | regMask | dimsMask);
+#endif
+ tree->gtUsedRegs = (regMaskSmall)(regMask | dimsMask);
+ break;
+
+ case GT_CMPXCHG:
+ {
+#ifdef _TARGET_XARCH_
+ rsvdRegs |= RBM_EAX;
+#endif
+ if (tree->gtCmpXchg.gtOpLocation->OperGet() == GT_LCL_VAR)
+ {
+ regMask = rpPredictTreeRegUse(tree->gtCmpXchg.gtOpLocation, PREDICT_REG, lockedRegs, rsvdRegs);
+ }
+ else
+ {
+ regMask = rpPredictTreeRegUse(tree->gtCmpXchg.gtOpLocation, PREDICT_ADDR, lockedRegs, rsvdRegs);
+ }
+ op2Mask = rpPredictTreeRegUse(tree->gtCmpXchg.gtOpValue, PREDICT_REG, lockedRegs, rsvdRegs | regMask);
+
+#ifdef _TARGET_XARCH_
+ rsvdRegs &= ~RBM_EAX;
+ tmpMask = rpPredictTreeRegUse(tree->gtCmpXchg.gtOpComparand, PREDICT_REG_EAX, lockedRegs,
+ rsvdRegs | regMask | op2Mask);
+ tree->gtUsedRegs = (regMaskSmall)(RBM_EAX | regMask | op2Mask | tmpMask);
+ predictReg = PREDICT_REG_EAX; // When this is done the result is always in EAX.
+#else
+ tmpMask = 0;
+ tree->gtUsedRegs = (regMaskSmall)(regMask | op2Mask | tmpMask);
+#endif
+ }
+ break;
+
+ case GT_ARR_BOUNDS_CHECK:
+ {
+ regMaskTP opArrLenRsvd = rsvdRegs | tree->gtBoundsChk.gtIndex->gtRsvdRegs;
+ regMask = rpPredictTreeRegUse(tree->gtBoundsChk.gtArrLen, PREDICT_REG, lockedRegs, opArrLenRsvd);
+ rpPredictTreeRegUse(tree->gtBoundsChk.gtIndex, PREDICT_REG, lockedRegs | regMask, RBM_LASTUSE);
+
+ tree->gtUsedRegs =
+ (regMaskSmall)regMask | tree->gtBoundsChk.gtArrLen->gtUsedRegs | tree->gtBoundsChk.gtIndex->gtUsedRegs;
+ }
+ break;
+
+ default:
+ NO_WAY("unexpected special operator in reg use prediction");
+ break;
+ }
+
+RETURN_CHECK:
+
+#ifdef DEBUG
+ /* make sure we set them to something reasonable */
+ if (tree->gtUsedRegs & RBM_ILLEGAL)
+ noway_assert(!"used regs not set properly in reg use prediction");
+
+ if (regMask & RBM_ILLEGAL)
+ noway_assert(!"return value not set propery in reg use prediction");
+
+#endif
+
+ /*
+ * If the gtUsedRegs conflicts with lockedRegs
+ * then we going to have to spill some registers
+ * into the non-trashed register set to keep it alive
+ */
+ regMaskTP spillMask;
+ spillMask = tree->gtUsedRegs & lockedRegs;
+
+ if (spillMask)
+ {
+ while (spillMask)
+ {
+ /* Find the next register that needs to be spilled */
+ tmpMask = genFindLowestBit(spillMask);
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Predict spill of %s before: ", getRegName(genRegNumFromMask(tmpMask)));
+ gtDispTree(tree, 0, NULL, true);
+ if ((tmpMask & regMask) == 0)
+ {
+ printf("Predict reload of %s after : ", getRegName(genRegNumFromMask(tmpMask)));
+ gtDispTree(tree, 0, NULL, true);
+ }
+ }
+#endif
+ /* In Codegen it will typically introduce a spill temp here */
+ /* rather than relocating the register to a non trashed reg */
+ rpPredictSpillCnt++;
+
+ /* Remove it from the spillMask */
+ spillMask &= ~tmpMask;
+ }
+ }
+
+ /*
+ * If the return registers in regMask conflicts with the lockedRegs
+ * then we allocate extra registers for the reload of the conflicting
+ * registers.
+ *
+ * Set spillMask to the set of locked registers that have to be reloaded here.
+ * reloadMask is set to the extra registers that are used to reload
+ * the spilled lockedRegs.
+ */
+
+ noway_assert(regMask != DUMMY_INIT(RBM_ILLEGAL));
+ spillMask = lockedRegs & regMask;
+
+ if (spillMask)
+ {
+ /* Remove the spillMask from regMask */
+ regMask &= ~spillMask;
+
+ regMaskTP reloadMask = RBM_NONE;
+ while (spillMask)
+ {
+ /* Get an extra register to hold it */
+ regMaskTP reloadReg = rpPredictRegPick(TYP_INT, PREDICT_REG, lockedRegs | regMask | reloadMask);
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Predict reload into %s after : ", getRegName(genRegNumFromMask(reloadReg)));
+ gtDispTree(tree, 0, NULL, true);
+ }
+#endif
+ reloadMask |= reloadReg;
+
+ /* Remove it from the spillMask */
+ spillMask &= ~genFindLowestBit(spillMask);
+ }
+
+ /* Update regMask to use the reloadMask */
+ regMask |= reloadMask;
+
+ /* update the gtUsedRegs mask */
+ tree->gtUsedRegs |= (regMaskSmall)regMask;
+ }
+
+ regMaskTP regUse = tree->gtUsedRegs;
+ regUse |= interferingRegs;
+
+ if (!VarSetOps::IsEmpty(this, compCurLife))
+ {
+ // Add interference between the current set of live variables and
+ // the set of temporary registers need to evaluate the sub tree
+ if (regUse)
+ {
+ rpRecordRegIntf(regUse, compCurLife DEBUGARG("tmp use"));
+ }
+ }
+
+ if (rpAsgVarNum != -1)
+ {
+ // Add interference between the registers used (if any)
+ // and the assignment target variable
+ if (regUse)
+ {
+ rpRecordRegIntf(regUse, VarSetOps::MakeSingleton(this, rpAsgVarNum) DEBUGARG("tgt var tmp use"));
+ }
+
+ // Add a variable interference from rpAsgVarNum (i.e. the enregistered left hand
+ // side of the assignment passed here using PREDICT_REG_VAR_Txx)
+ // to the set of currently live variables. This new interference will prevent us
+ // from using the register value used here for enregistering different live variable
+ //
+ if (!VarSetOps::IsEmpty(this, compCurLife))
+ {
+ rpRecordVarIntf(rpAsgVarNum, compCurLife DEBUGARG("asg tgt live conflict"));
+ }
+ }
+
+ /* Do we need to resore the oldLastUseVars value */
+ if (restoreLastUseVars)
+ {
+ /* If we used a GT_ASG targeted register then we need to add
+ * a variable interference between any new last use variables
+ * and the GT_ASG targeted register
+ */
+ if (!VarSetOps::Equal(this, rpLastUseVars, oldLastUseVars) && rpAsgVarNum != -1)
+ {
+ rpRecordVarIntf(rpAsgVarNum, VarSetOps::Diff(this, rpLastUseVars, oldLastUseVars)
+ DEBUGARG("asgn tgt last use conflict"));
+ }
+ VarSetOps::Assign(this, rpLastUseVars, oldLastUseVars);
+ }
+
+ return regMask;
+}
+#ifdef _PREFAST_
+#pragma warning(pop)
+#endif
+
+#endif // LEGACY_BACKEND
+
+/****************************************************************************/
+/* Returns true when we must create an EBP frame
+ This is used to force most managed methods to have EBP based frames
+ which allows the ETW kernel stackwalker to walk the stacks of managed code
+ this allows the kernel to perform light weight profiling
+ */
+bool Compiler::rpMustCreateEBPFrame(INDEBUG(const char** wbReason))
+{
+ bool result = false;
+#ifdef DEBUG
+ const char* reason = nullptr;
+#endif
+
+#if ETW_EBP_FRAMED
+ if (!result && (opts.MinOpts() || opts.compDbgCode))
+ {
+ INDEBUG(reason = "Debug Code");
+ result = true;
+ }
+ if (!result && (info.compMethodInfo->ILCodeSize > DEFAULT_MAX_INLINE_SIZE))
+ {
+ INDEBUG(reason = "IL Code Size");
+ result = true;
+ }
+ if (!result && (fgBBcount > 3))
+ {
+ INDEBUG(reason = "BasicBlock Count");
+ result = true;
+ }
+ if (!result && fgHasLoops)
+ {
+ INDEBUG(reason = "Method has Loops");
+ result = true;
+ }
+ if (!result && (optCallCount >= 2))
+ {
+ INDEBUG(reason = "Call Count");
+ result = true;
+ }
+ if (!result && (optIndirectCallCount >= 1))
+ {
+ INDEBUG(reason = "Indirect Call");
+ result = true;
+ }
+#endif // ETW_EBP_FRAMED
+
+ // VM wants to identify the containing frame of an InlinedCallFrame always
+ // via the frame register never the stack register so we need a frame.
+ if (!result && (optNativeCallCount != 0))
+ {
+ INDEBUG(reason = "Uses PInvoke");
+ result = true;
+ }
+
+#ifdef _TARGET_ARM64_
+ // TODO-ARM64-NYI: This is temporary: force a frame pointer-based frame until genFnProlog can handle non-frame
+ // pointer frames.
+ if (!result)
+ {
+ INDEBUG(reason = "Temporary ARM64 force frame pointer");
+ result = true;
+ }
+#endif // _TARGET_ARM64_
+
+#ifdef DEBUG
+ if ((result == true) && (wbReason != nullptr))
+ {
+ *wbReason = reason;
+ }
+#endif
+
+ return result;
+}
+
+#ifdef LEGACY_BACKEND // We don't use any of the old register allocator functions when LSRA is used instead.
+
+/*****************************************************************************
+ *
+ * Predict which variables will be assigned to registers
+ * This is x86 specific and only predicts the integer registers and
+ * must be conservative, any register that is predicted to be enregister
+ * must end up being enregistered.
+ *
+ * The rpPredictTreeRegUse takes advantage of the LCL_VARS that are
+ * predicted to be enregistered to minimize calls to rpPredictRegPick.
+ *
+ */
+
+#ifdef _PREFAST_
+#pragma warning(push)
+#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
+#endif
+regMaskTP Compiler::rpPredictAssignRegVars(regMaskTP regAvail)
+{
+ unsigned regInx;
+
+ if (rpPasses <= rpPassesPessimize)
+ {
+ // Assume that we won't have to reverse EBP enregistration
+ rpReverseEBPenreg = false;
+
+ // Set the default rpFrameType based upon codeGen->isFramePointerRequired()
+ if (codeGen->isFramePointerRequired() || codeGen->isFrameRequired())
+ rpFrameType = FT_EBP_FRAME;
+ else
+ rpFrameType = FT_ESP_FRAME;
+ }
+
+#if !ETW_EBP_FRAMED
+ // If we are using FPBASE as the frame register, we cannot also use it for
+ // a local var
+ if (rpFrameType == FT_EBP_FRAME)
+ {
+ regAvail &= ~RBM_FPBASE;
+ }
+#endif // !ETW_EBP_FRAMED
+
+ rpStkPredict = 0;
+ rpPredictAssignMask = regAvail;
+
+ raSetupArgMasks(&codeGen->intRegState);
+#if !FEATURE_STACK_FP_X87
+ raSetupArgMasks(&codeGen->floatRegState);
+#endif
+
+ // If there is a secret stub param, it is also live in
+ if (info.compPublishStubParam)
+ {
+ codeGen->intRegState.rsCalleeRegArgMaskLiveIn |= RBM_SECRET_STUB_PARAM;
+ }
+
+ if (regAvail == RBM_NONE)
+ {
+ unsigned lclNum;
+ LclVarDsc* varDsc;
+
+ for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
+ {
+#if FEATURE_STACK_FP_X87
+ if (!varDsc->IsFloatRegType())
+#endif
+ {
+ varDsc->lvRegNum = REG_STK;
+ if (isRegPairType(varDsc->lvType))
+ varDsc->lvOtherReg = REG_STK;
+ }
+ }
+ }
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nCompiler::rpPredictAssignRegVars pass #%d", rpPasses);
+ printf("\n Available registers = ");
+ dspRegMask(regAvail);
+ printf("\n");
+ }
+#endif
+
+ if (regAvail == RBM_NONE)
+ {
+ return RBM_NONE;
+ }
+
+ /* We cannot change the lvVarIndexes at this point, so we */
+ /* can only re-order the existing set of tracked variables */
+ /* Which will change the order in which we select the */
+ /* locals for enregistering. */
+
+ assert(lvaTrackedFixed); // We should have already set this to prevent us from adding any new tracked variables.
+
+ // Should not be set unless optimizing
+ noway_assert((lvaSortAgain == false) || (opts.MinOpts() == false));
+
+ if (lvaSortAgain)
+ lvaSortOnly();
+
+#ifdef DEBUG
+ fgDebugCheckBBlist();
+#endif
+
+ /* Initialize the weighted count of variables that could have */
+ /* been enregistered but weren't */
+ unsigned refCntStk = 0; // sum of ref counts for all stack based variables
+ unsigned refCntEBP = 0; // sum of ref counts for EBP enregistered variables
+ unsigned refCntWtdEBP = 0; // sum of wtd ref counts for EBP enregistered variables
+#if DOUBLE_ALIGN
+ unsigned refCntStkParam; // sum of ref counts for all stack based parameters
+ unsigned refCntWtdStkDbl; // sum of wtd ref counts for stack based doubles
+
+#if FEATURE_STACK_FP_X87
+ refCntStkParam = raCntStkParamDblStackFP;
+ refCntWtdStkDbl = raCntWtdStkDblStackFP;
+ refCntStk = raCntStkStackFP;
+#else
+ refCntStkParam = 0;
+ refCntWtdStkDbl = 0;
+ refCntStk = 0;
+#endif // FEATURE_STACK_FP_X87
+
+#endif // DOUBLE_ALIGN
+
+ /* Set of registers used to enregister variables in the predition */
+ regMaskTP regUsed = RBM_NONE;
+
+ /*-------------------------------------------------------------------------
+ *
+ * Predict/Assign the enregistered locals in ref-count order
+ *
+ */
+
+ VARSET_TP VARSET_INIT_NOCOPY(unprocessedVars, VarSetOps::MakeFull(this));
+
+ unsigned FPRegVarLiveInCnt;
+ FPRegVarLiveInCnt = 0; // How many enregistered doubles are live on entry to the method
+
+ LclVarDsc* varDsc;
+ for (unsigned sortNum = 0; sortNum < lvaCount; sortNum++)
+ {
+ bool notWorthy = false;
+
+ unsigned varIndex;
+ bool isDouble;
+ regMaskTP regAvailForType;
+ var_types regType;
+ regMaskTP avoidReg;
+ unsigned customVarOrderSize;
+ regNumber customVarOrder[MAX_VAR_ORDER_SIZE];
+ bool firstHalf;
+ regNumber saveOtherReg;
+
+ varDsc = lvaRefSorted[sortNum];
+
+#if FEATURE_STACK_FP_X87
+ if (varTypeIsFloating(varDsc->TypeGet()))
+ {
+#ifdef DEBUG
+ if (lvaIsFieldOfDependentlyPromotedStruct(varDsc))
+ {
+ // Field local of a PROMOTION_TYPE_DEPENDENT struct should not
+ // be en-registered.
+ noway_assert(!varDsc->lvRegister);
+ }
+#endif
+ continue;
+ }
+#endif
+
+ /* Check the set of invariant things that would prevent enregistration */
+
+ /* Ignore the variable if it's not tracked */
+ if (!varDsc->lvTracked)
+ goto CANT_REG;
+
+ /* Get hold of the index and the interference mask for the variable */
+ varIndex = varDsc->lvVarIndex;
+
+ // Remove 'varIndex' from unprocessedVars
+ VarSetOps::RemoveElemD(this, unprocessedVars, varIndex);
+
+ // Skip the variable if it's marked as DoNotEnregister.
+
+ if (varDsc->lvDoNotEnregister)
+ goto CANT_REG;
+
+ /* TODO: For now if we have JMP all register args go to stack
+ * TODO: Later consider extending the life of the argument or make a copy of it */
+
+ if (compJmpOpUsed && varDsc->lvIsRegArg)
+ goto CANT_REG;
+
+ /* Skip the variable if the ref count is zero */
+
+ if (varDsc->lvRefCnt == 0)
+ goto CANT_REG;
+
+ /* Ignore field of PROMOTION_TYPE_DEPENDENT type of promoted struct */
+
+ if (lvaIsFieldOfDependentlyPromotedStruct(varDsc))
+ {
+ goto CANT_REG;
+ }
+
+ /* Is the unweighted ref count too low to be interesting? */
+
+ if (!varDsc->lvIsStructField && // We do encourage enregistering field locals.
+ (varDsc->lvRefCnt <= 1))
+ {
+ /* Sometimes it's useful to enregister a variable with only one use */
+ /* arguments referenced in loops are one example */
+
+ if (varDsc->lvIsParam && varDsc->lvRefCntWtd > BB_UNITY_WEIGHT)
+ goto OK_TO_ENREGISTER;
+
+ /* If the variable has a preferred register set it may be useful to put it there */
+ if (varDsc->lvPrefReg && varDsc->lvIsRegArg)
+ goto OK_TO_ENREGISTER;
+
+ /* Keep going; the table is sorted by "weighted" ref count */
+ goto CANT_REG;
+ }
+
+ OK_TO_ENREGISTER:
+
+ if (varTypeIsFloating(varDsc->TypeGet()))
+ {
+ regType = varDsc->TypeGet();
+ regAvailForType = regAvail & RBM_ALLFLOAT;
+ }
+ else
+ {
+ regType = TYP_INT;
+ regAvailForType = regAvail & RBM_ALLINT;
+ }
+
+#ifdef _TARGET_ARM_
+ isDouble = (varDsc->TypeGet() == TYP_DOUBLE);
+
+ if (isDouble)
+ {
+ regAvailForType &= RBM_DBL_REGS; // We must restrict the set to the double registers
+ }
+#endif
+
+ /* If we don't have any registers available then skip the enregistration attempt */
+ if (regAvailForType == RBM_NONE)
+ goto NO_REG;
+
+ // On the pessimize passes don't even try to enregister LONGS
+ if (isRegPairType(varDsc->lvType))
+ {
+ if (rpPasses > rpPassesPessimize)
+ goto NO_REG;
+ else if (rpLostEnreg && (rpPasses == rpPassesPessimize))
+ goto NO_REG;
+ }
+
+ // Set of registers to avoid when performing register allocation
+ avoidReg = RBM_NONE;
+
+ if (!varDsc->lvIsRegArg)
+ {
+ /* For local variables,
+ * avoid the incoming arguments,
+ * but only if you conflict with them */
+
+ if (raAvoidArgRegMask != 0)
+ {
+ LclVarDsc* argDsc;
+ LclVarDsc* argsEnd = lvaTable + info.compArgsCount;
+
+ for (argDsc = lvaTable; argDsc < argsEnd; argDsc++)
+ {
+ if (!argDsc->lvIsRegArg)
+ continue;
+
+ bool isFloat = argDsc->IsFloatRegType();
+ regNumber inArgReg = argDsc->lvArgReg;
+ regMaskTP inArgBit = genRegMask(inArgReg);
+
+ // Is this inArgReg in the raAvoidArgRegMask set?
+
+ if (!(raAvoidArgRegMask & inArgBit))
+ continue;
+
+ noway_assert(argDsc->lvIsParam);
+ noway_assert(inArgBit & (isFloat ? RBM_FLTARG_REGS : RBM_ARG_REGS));
+
+ unsigned locVarIndex = varDsc->lvVarIndex;
+ unsigned argVarIndex = argDsc->lvVarIndex;
+
+ /* Does this variable interfere with the arg variable ? */
+ if (VarSetOps::IsMember(this, lvaVarIntf[locVarIndex], argVarIndex))
+ {
+ noway_assert(VarSetOps::IsMember(this, lvaVarIntf[argVarIndex], locVarIndex));
+ /* Yes, so try to avoid the incoming arg reg */
+ avoidReg |= inArgBit;
+ }
+ else
+ {
+ noway_assert(!VarSetOps::IsMember(this, lvaVarIntf[argVarIndex], locVarIndex));
+ }
+ }
+ }
+ }
+
+ // Now we will try to predict which register the variable
+ // could be enregistered in
+
+ customVarOrderSize = MAX_VAR_ORDER_SIZE;
+
+ raSetRegVarOrder(regType, customVarOrder, &customVarOrderSize, varDsc->lvPrefReg, avoidReg);
+
+ firstHalf = false;
+ saveOtherReg = DUMMY_INIT(REG_NA);
+
+ for (regInx = 0; regInx < customVarOrderSize; regInx++)
+ {
+ regNumber regNum = customVarOrder[regInx];
+ regMaskTP regBits = genRegMask(regNum);
+
+ /* Skip this register if it isn't available */
+ if ((regAvailForType & regBits) == 0)
+ continue;
+
+ /* Skip this register if it interferes with the variable */
+
+ if (VarSetOps::IsMember(this, raLclRegIntf[regNum], varIndex))
+ continue;
+
+ if (varTypeIsFloating(regType))
+ {
+#ifdef _TARGET_ARM_
+ if (isDouble)
+ {
+ regNumber regNext = REG_NEXT(regNum);
+ regBits |= genRegMask(regNext);
+
+ /* Skip if regNext interferes with the variable */
+ if (VarSetOps::IsMember(this, raLclRegIntf[regNext], varIndex))
+ continue;
+ }
+#endif
+ }
+
+ bool firstUseOfReg = ((regBits & (regUsed | codeGen->regSet.rsGetModifiedRegsMask())) == 0);
+ bool lessThanTwoRefWtd = (varDsc->lvRefCntWtd < (2 * BB_UNITY_WEIGHT));
+ bool calleeSavedReg = ((regBits & RBM_CALLEE_SAVED) != 0);
+
+ /* Skip this register if the weighted ref count is less than two
+ and we are considering a unused callee saved register */
+
+ if (lessThanTwoRefWtd && // less than two references (weighted)
+ firstUseOfReg && // first use of this register
+ calleeSavedReg) // callee saved register
+ {
+ unsigned int totalRefCntWtd = varDsc->lvRefCntWtd;
+
+ // psc is abbeviation for possibleSameColor
+ VARSET_TP VARSET_INIT_NOCOPY(pscVarSet, VarSetOps::Diff(this, unprocessedVars, lvaVarIntf[varIndex]));
+
+ VARSET_ITER_INIT(this, pscIndexIter, pscVarSet, pscIndex);
+ while (pscIndexIter.NextElem(this, &pscIndex))
+ {
+ LclVarDsc* pscVar = lvaTable + lvaTrackedToVarNum[pscIndex];
+ totalRefCntWtd += pscVar->lvRefCntWtd;
+ if (totalRefCntWtd > (2 * BB_UNITY_WEIGHT))
+ break;
+ }
+
+ if (totalRefCntWtd <= (2 * BB_UNITY_WEIGHT))
+ {
+ notWorthy = true;
+ continue; // not worth spilling a callee saved register
+ }
+ // otherwise we will spill this callee saved registers,
+ // because its uses when combined with the uses of
+ // other yet to be processed candidates exceed our threshold.
+ // totalRefCntWtd = totalRefCntWtd;
+ }
+
+ /* Looks good - mark the variable as living in the register */
+
+ if (isRegPairType(varDsc->lvType))
+ {
+ if (firstHalf == false)
+ {
+ /* Enregister the first half of the long */
+ varDsc->lvRegNum = regNum;
+ saveOtherReg = varDsc->lvOtherReg;
+ varDsc->lvOtherReg = REG_STK;
+ firstHalf = true;
+ }
+ else
+ {
+ /* Ensure 'well-formed' register pairs */
+ /* (those returned by gen[Pick|Grab]RegPair) */
+
+ if (regNum < varDsc->lvRegNum)
+ {
+ varDsc->lvOtherReg = varDsc->lvRegNum;
+ varDsc->lvRegNum = regNum;
+ }
+ else
+ {
+ varDsc->lvOtherReg = regNum;
+ }
+ firstHalf = false;
+ }
+ }
+ else
+ {
+ varDsc->lvRegNum = regNum;
+#ifdef _TARGET_ARM_
+ if (isDouble)
+ {
+ varDsc->lvOtherReg = REG_NEXT(regNum);
+ }
+#endif
+ }
+
+ if (regNum == REG_FPBASE)
+ {
+ refCntEBP += varDsc->lvRefCnt;
+ refCntWtdEBP += varDsc->lvRefCntWtd;
+#if DOUBLE_ALIGN
+ if (varDsc->lvIsParam)
+ {
+ refCntStkParam += varDsc->lvRefCnt;
+ }
+#endif
+ }
+
+ /* Record this register in the regUsed set */
+ regUsed |= regBits;
+
+ /* The register is now ineligible for all interfering variables */
+
+ VarSetOps::UnionD(this, raLclRegIntf[regNum], lvaVarIntf[varIndex]);
+
+#ifdef _TARGET_ARM_
+ if (isDouble)
+ {
+ regNumber secondHalf = REG_NEXT(regNum);
+ VARSET_ITER_INIT(this, iter, lvaVarIntf[varIndex], intfIndex);
+ while (iter.NextElem(this, &intfIndex))
+ {
+ VarSetOps::AddElemD(this, raLclRegIntf[secondHalf], intfIndex);
+ }
+ }
+#endif
+
+ /* If a register argument, remove its incoming register
+ * from the "avoid" list */
+
+ if (varDsc->lvIsRegArg)
+ {
+ raAvoidArgRegMask &= ~genRegMask(varDsc->lvArgReg);
+#ifdef _TARGET_ARM_
+ if (isDouble)
+ {
+ raAvoidArgRegMask &= ~genRegMask(REG_NEXT(varDsc->lvArgReg));
+ }
+#endif
+ }
+
+ /* A variable of TYP_LONG can take two registers */
+ if (firstHalf)
+ continue;
+
+ // Since we have successfully enregistered this variable it is
+ // now time to move on and consider the next variable
+ goto ENREG_VAR;
+ }
+
+ if (firstHalf)
+ {
+ noway_assert(isRegPairType(varDsc->lvType));
+
+ /* This TYP_LONG is partially enregistered */
+
+ noway_assert(saveOtherReg != DUMMY_INIT(REG_NA));
+
+ if (varDsc->lvDependReg && (saveOtherReg != REG_STK))
+ {
+ rpLostEnreg = true;
+ }
+
+ raAddToStkPredict(varDsc->lvRefCntWtd);
+ goto ENREG_VAR;
+ }
+
+ NO_REG:;
+ if (varDsc->lvDependReg)
+ {
+ rpLostEnreg = true;
+ }
+
+ if (!notWorthy)
+ {
+ /* Weighted count of variables that could have been enregistered but weren't */
+ raAddToStkPredict(varDsc->lvRefCntWtd);
+
+ if (isRegPairType(varDsc->lvType) && (varDsc->lvOtherReg == REG_STK))
+ raAddToStkPredict(varDsc->lvRefCntWtd);
+ }
+
+ CANT_REG:;
+ varDsc->lvRegister = false;
+
+ varDsc->lvRegNum = REG_STK;
+ if (isRegPairType(varDsc->lvType))
+ varDsc->lvOtherReg = REG_STK;
+
+ /* unweighted count of variables that were not enregistered */
+
+ refCntStk += varDsc->lvRefCnt;
+
+#if DOUBLE_ALIGN
+ if (varDsc->lvIsParam)
+ {
+ refCntStkParam += varDsc->lvRefCnt;
+ }
+ else
+ {
+ /* Is it a stack based double? */
+ /* Note that double params are excluded since they can not be double aligned */
+ if (varDsc->lvType == TYP_DOUBLE)
+ {
+ refCntWtdStkDbl += varDsc->lvRefCntWtd;
+ }
+ }
+#endif
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("; ");
+ gtDispLclVar((unsigned)(varDsc - lvaTable));
+ if (varDsc->lvTracked)
+ printf("T%02u", varDsc->lvVarIndex);
+ else
+ printf(" ");
+ printf(" (refcnt=%2u,refwtd=%s) not enregistered", varDsc->lvRefCnt, refCntWtd2str(varDsc->lvRefCntWtd));
+ if (varDsc->lvDoNotEnregister)
+ printf(", do-not-enregister");
+ printf("\n");
+ }
+#endif
+ continue;
+
+ ENREG_VAR:;
+
+ varDsc->lvRegister = true;
+
+ // Record the fact that we enregistered a stack arg when tail call is used.
+ if (compJmpOpUsed && !varDsc->lvIsRegArg)
+ {
+ rpMaskPInvokeEpilogIntf |= genRegMask(varDsc->lvRegNum);
+ if (isRegPairType(varDsc->lvType))
+ {
+ rpMaskPInvokeEpilogIntf |= genRegMask(varDsc->lvOtherReg);
+ }
+ }
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("; ");
+ gtDispLclVar((unsigned)(varDsc - lvaTable));
+ printf("T%02u (refcnt=%2u,refwtd=%s) predicted to be assigned to ", varIndex, varDsc->lvRefCnt,
+ refCntWtd2str(varDsc->lvRefCntWtd));
+ varDsc->PrintVarReg();
+#ifdef _TARGET_ARM_
+ if (isDouble)
+ {
+ printf(":%s", getRegName(varDsc->lvOtherReg));
+ }
+#endif
+ printf("\n");
+ }
+#endif
+ }
+
+#if ETW_EBP_FRAMED
+ noway_assert(refCntEBP == 0);
+#endif
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ if (refCntStk > 0)
+ printf("; refCntStk = %u\n", refCntStk);
+ if (refCntEBP > 0)
+ printf("; refCntEBP = %u\n", refCntEBP);
+ if (refCntWtdEBP > 0)
+ printf("; refCntWtdEBP = %u\n", refCntWtdEBP);
+#if DOUBLE_ALIGN
+ if (refCntStkParam > 0)
+ printf("; refCntStkParam = %u\n", refCntStkParam);
+ if (refCntWtdStkDbl > 0)
+ printf("; refCntWtdStkDbl = %u\n", refCntWtdStkDbl);
+#endif
+ }
+#endif
+
+ /* Determine how the EBP register should be used */
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if DOUBLE_ALIGN
+
+ if (!codeGen->isFramePointerRequired())
+ {
+ noway_assert(getCanDoubleAlign() < COUNT_DOUBLE_ALIGN);
+
+ /*
+ First let us decide if we should use EBP to create a
+ double-aligned frame, instead of enregistering variables
+ */
+
+ if (getCanDoubleAlign() == MUST_DOUBLE_ALIGN)
+ {
+ rpFrameType = FT_DOUBLE_ALIGN_FRAME;
+ goto REVERSE_EBP_ENREG;
+ }
+
+ if (getCanDoubleAlign() == CAN_DOUBLE_ALIGN && (refCntWtdStkDbl > 0))
+ {
+ /* OK, there may be some benefit to double-aligning the frame */
+ /* But let us compare the benefits vs. the costs of this */
+
+ /*
+ One cost to consider is the benefit of smaller code
+ when using EBP as a frame pointer register
+
+ Each stack variable reference is an extra byte of code
+ if we use a double-aligned frame, parameters are
+ accessed via EBP for a double-aligned frame so they
+ don't use an extra byte of code.
+
+ We pay one byte of code for each refCntStk and we pay
+ one byte or more for each refCntEBP but we save one
+ byte for each refCntStkParam.
+
+ Our savings are the elimination of a possible misaligned
+ access and a possible DCU spilt when an access crossed
+ a cache-line boundry.
+
+ We use the loop weighted value of
+ refCntWtdStkDbl * misaligned_weight (0, 4, 16)
+ to represent this savings.
+ */
+
+ // We also pay 7 extra bytes for the MOV EBP,ESP,
+ // LEA ESP,[EBP-0x10] and the AND ESP,-8 to double align ESP
+ const unsigned DBL_ALIGN_SETUP_SIZE = 7;
+
+ unsigned bytesUsed = refCntStk + refCntEBP - refCntStkParam + DBL_ALIGN_SETUP_SIZE;
+ unsigned misaligned_weight = 4;
+
+ if (compCodeOpt() == SMALL_CODE)
+ misaligned_weight = 0;
+
+ if (compCodeOpt() == FAST_CODE)
+ misaligned_weight *= 4;
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("; Double alignment:\n");
+ printf("; Bytes that could be save by not using EBP frame: %i\n", bytesUsed);
+ printf("; Sum of weighted ref counts for EBP enregistered variables: %i\n", refCntWtdEBP);
+ printf("; Sum of weighted ref counts for weighted stack based doubles: %i\n", refCntWtdStkDbl);
+ }
+#endif
+
+ if (bytesUsed > ((refCntWtdStkDbl * misaligned_weight) / BB_UNITY_WEIGHT))
+ {
+ /* It's probably better to use EBP as a frame pointer */
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef DEBUG
+ if (verbose)
+ printf("; Predicting not to double-align ESP to save %d bytes of code.\n", bytesUsed);
+#endif
+ goto NO_DOUBLE_ALIGN;
+ }
+
+ /*
+ Another cost to consider is the benefit of using EBP to enregister
+ one or more integer variables
+
+ We pay one extra memory reference for each refCntWtdEBP
+
+ Our savings are the elimination of a possible misaligned
+ access and a possible DCU spilt when an access crossed
+ a cache-line boundry.
+
+ */
+
+ // <BUGNUM>
+ // VSW 346717: On P4 2 Proc XEON's, SciMark.FFT degrades if SciMark.FFT.transform_internal is
+ // not double aligned.
+ // Here are the numbers that make this not double-aligned.
+ // refCntWtdStkDbl = 0x164
+ // refCntWtdEBP = 0x1a4
+ // We think we do need to change the heuristic to be in favor of double-align.
+ // </BUGNUM>
+
+ if (refCntWtdEBP > refCntWtdStkDbl * 2)
+ {
+ /* It's probably better to use EBP to enregister integer variables */
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef DEBUG
+ if (verbose)
+ printf("; Predicting not to double-align ESP to allow EBP to be used to enregister variables\n");
+#endif
+ goto NO_DOUBLE_ALIGN;
+ }
+
+#ifdef DEBUG
+ if (verbose)
+ printf("; Predicting to create a double-aligned frame\n");
+#endif
+ /*
+ OK we passed all of the benefit tests
+ so we'll predict a double aligned frame
+ */
+
+ rpFrameType = FT_DOUBLE_ALIGN_FRAME;
+ goto REVERSE_EBP_ENREG;
+ }
+ }
+
+NO_DOUBLE_ALIGN:
+#endif // DOUBLE_ALIGN
+
+ if (!codeGen->isFramePointerRequired() && !codeGen->isFrameRequired())
+ {
+#ifdef _TARGET_XARCH_
+// clang-format off
+ /* If we are using EBP to enregister variables then
+ will we actually save bytes by setting up an EBP frame?
+
+ Each stack reference is an extra byte of code if we use
+ an ESP frame.
+
+ Here we measure the savings that we get by using EBP to
+ enregister variables vs. the cost in code size that we
+ pay when using an ESP based frame.
+
+ We pay one byte of code for each refCntStk
+ but we save one byte (or more) for each refCntEBP.
+
+ Our savings are the elimination of a stack memory read/write.
+ We use the loop weighted value of
+ refCntWtdEBP * mem_access_weight (0, 3, 6)
+ to represent this savings.
+ */
+
+ // We also pay 5 extra bytes for the MOV EBP,ESP and LEA ESP,[EBP-0x10]
+ // to set up an EBP frame in the prolog and epilog
+ #define EBP_FRAME_SETUP_SIZE 5
+ // clang-format on
+
+ if (refCntStk > (refCntEBP + EBP_FRAME_SETUP_SIZE))
+ {
+ unsigned bytesSaved = refCntStk - (refCntEBP + EBP_FRAME_SETUP_SIZE);
+ unsigned mem_access_weight = 3;
+
+ if (compCodeOpt() == SMALL_CODE)
+ mem_access_weight = 0;
+ else if (compCodeOpt() == FAST_CODE)
+ mem_access_weight *= 2;
+
+ if (bytesSaved > ((refCntWtdEBP * mem_access_weight) / BB_UNITY_WEIGHT))
+ {
+ /* It's not be a good idea to use EBP in our predictions */
+ CLANG_FORMAT_COMMENT_ANCHOR;
+#ifdef DEBUG
+ if (verbose && (refCntEBP > 0))
+ printf("; Predicting that it's not worth using EBP to enregister variables\n");
+#endif
+ rpFrameType = FT_EBP_FRAME;
+ goto REVERSE_EBP_ENREG;
+ }
+ }
+#endif // _TARGET_XARCH_
+
+ if ((rpFrameType == FT_NOT_SET) || (rpFrameType == FT_ESP_FRAME))
+ {
+#ifdef DEBUG
+ const char* reason;
+#endif
+ if (rpMustCreateEBPCalled == false)
+ {
+ rpMustCreateEBPCalled = true;
+ if (rpMustCreateEBPFrame(INDEBUG(&reason)))
+ {
+#ifdef DEBUG
+ if (verbose)
+ printf("; Decided to create an EBP based frame for ETW stackwalking (%s)\n", reason);
+#endif
+ codeGen->setFrameRequired(true);
+
+ rpFrameType = FT_EBP_FRAME;
+ goto REVERSE_EBP_ENREG;
+ }
+ }
+ }
+ }
+
+ goto EXIT;
+
+REVERSE_EBP_ENREG:
+
+ noway_assert(rpFrameType != FT_ESP_FRAME);
+
+ rpReverseEBPenreg = true;
+
+#if !ETW_EBP_FRAMED
+ if (refCntEBP > 0)
+ {
+ noway_assert(regUsed & RBM_FPBASE);
+
+ regUsed &= ~RBM_FPBASE;
+
+ /* variables that were enregistered in EBP become stack based variables */
+ raAddToStkPredict(refCntWtdEBP);
+
+ unsigned lclNum;
+
+ /* We're going to have to undo some predicted enregistered variables */
+ for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
+ {
+ /* Is this a register variable? */
+ if (varDsc->lvRegNum != REG_STK)
+ {
+ if (isRegPairType(varDsc->lvType))
+ {
+ /* Only one can be EBP */
+ if (varDsc->lvRegNum == REG_FPBASE || varDsc->lvOtherReg == REG_FPBASE)
+ {
+ if (varDsc->lvRegNum == REG_FPBASE)
+ varDsc->lvRegNum = varDsc->lvOtherReg;
+
+ varDsc->lvOtherReg = REG_STK;
+
+ if (varDsc->lvRegNum == REG_STK)
+ varDsc->lvRegister = false;
+
+ if (varDsc->lvDependReg)
+ rpLostEnreg = true;
+#ifdef DEBUG
+ if (verbose)
+ goto DUMP_MSG;
+#endif
+ }
+ }
+ else
+ {
+ if ((varDsc->lvRegNum == REG_FPBASE) && (!varDsc->IsFloatRegType()))
+ {
+ varDsc->lvRegNum = REG_STK;
+
+ varDsc->lvRegister = false;
+
+ if (varDsc->lvDependReg)
+ rpLostEnreg = true;
+#ifdef DEBUG
+ if (verbose)
+ {
+ DUMP_MSG:
+ printf("; reversing enregisteration of V%02u,T%02u (refcnt=%2u,refwtd=%4u%s)\n", lclNum,
+ varDsc->lvVarIndex, varDsc->lvRefCnt, varDsc->lvRefCntWtd / 2,
+ (varDsc->lvRefCntWtd & 1) ? ".5" : "");
+ }
+#endif
+ }
+ }
+ }
+ }
+ }
+#endif // ETW_EBP_FRAMED
+
+EXIT:;
+
+ unsigned lclNum;
+ for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
+ {
+ /* Clear the lvDependReg flag for next iteration of the predictor */
+ varDsc->lvDependReg = false;
+
+ // If we set rpLostEnreg and this is the first pessimize pass
+ // then reverse the enreg of all TYP_LONG
+ if (rpLostEnreg && isRegPairType(varDsc->lvType) && (rpPasses == rpPassesPessimize))
+ {
+ varDsc->lvRegNum = REG_STK;
+ varDsc->lvOtherReg = REG_STK;
+ }
+ }
+
+#ifdef DEBUG
+ if (verbose && raNewBlocks)
+ {
+ printf("\nAdded FP register killing blocks:\n");
+ fgDispBasicBlocks();
+ printf("\n");
+ }
+#endif
+ noway_assert(rpFrameType != FT_NOT_SET);
+
+ /* return the set of registers used to enregister variables */
+ return regUsed;
+}
+#ifdef _PREFAST_
+#pragma warning(pop)
+#endif
+
+/*****************************************************************************
+ *
+ * Predict register use for every tree in the function. Note that we do this
+ * at different times (not to mention in a totally different way) for x86 vs
+ * RISC targets.
+ */
+void Compiler::rpPredictRegUse()
+{
+#ifdef DEBUG
+ if (verbose)
+ raDumpVarIntf();
+#endif
+
+ // We might want to adjust the ref counts based on interference
+ raAdjustVarIntf();
+
+ regMaskTP allAcceptableRegs = RBM_ALLINT;
+
+#if FEATURE_FP_REGALLOC
+ allAcceptableRegs |= raConfigRestrictMaskFP();
+#endif
+
+ allAcceptableRegs &= ~codeGen->regSet.rsMaskResvd; // Remove any register reserved for special purposes
+
+ /* For debuggable code, genJumpToThrowHlpBlk() generates an inline call
+ to acdHelper(). This is done implicitly, without creating a GT_CALL
+ node. Hence, this interference is be handled implicitly by
+ restricting the registers used for enregistering variables */
+
+ if (opts.compDbgCode)
+ {
+ allAcceptableRegs &= RBM_CALLEE_SAVED;
+ }
+
+ /* Compute the initial regmask to use for the first pass */
+ regMaskTP regAvail = RBM_CALLEE_SAVED & allAcceptableRegs;
+ regMaskTP regUsed;
+
+#if CPU_USES_BLOCK_MOVE
+ /* If we might need to generate a rep mov instruction */
+ /* remove ESI and EDI */
+ if (compBlkOpUsed)
+ regAvail &= ~(RBM_ESI | RBM_EDI);
+#endif
+
+#ifdef _TARGET_X86_
+ /* If we using longs then we remove ESI to allow */
+ /* ESI:EBX to be saved accross a call */
+ if (compLongUsed)
+ regAvail &= ~(RBM_ESI);
+#endif
+
+#ifdef _TARGET_ARM_
+ // For the first register allocation pass we don't want to color using r4
+ // as we want to allow it to be used to color the internal temps instead
+ // when r0,r1,r2,r3 are all in use.
+ //
+ regAvail &= ~(RBM_R4);
+#endif
+
+#if ETW_EBP_FRAMED
+ // We never have EBP available when ETW_EBP_FRAME is defined
+ regAvail &= ~RBM_FPBASE;
+#else
+ /* If a frame pointer is required then we remove EBP */
+ if (codeGen->isFramePointerRequired() || codeGen->isFrameRequired())
+ regAvail &= ~RBM_FPBASE;
+#endif
+
+#ifdef DEBUG
+ BOOL fJitNoRegLoc = JitConfig.JitNoRegLoc();
+ if (fJitNoRegLoc)
+ regAvail = RBM_NONE;
+#endif
+
+ if ((opts.compFlags & CLFLG_REGVAR) == 0)
+ regAvail = RBM_NONE;
+
+#if FEATURE_STACK_FP_X87
+ VarSetOps::AssignNoCopy(this, optAllNonFPvars, VarSetOps::MakeEmpty(this));
+ VarSetOps::AssignNoCopy(this, optAllFloatVars, VarSetOps::MakeEmpty(this));
+
+ // Calculate the set of all tracked FP/non-FP variables
+ // into optAllFloatVars and optAllNonFPvars
+
+ unsigned lclNum;
+ LclVarDsc* varDsc;
+
+ for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
+ {
+ /* Ignore the variable if it's not tracked */
+
+ if (!varDsc->lvTracked)
+ continue;
+
+ /* Get hold of the index and the interference mask for the variable */
+
+ unsigned varNum = varDsc->lvVarIndex;
+
+ /* add to the set of all tracked FP/non-FP variables */
+
+ if (varDsc->IsFloatRegType())
+ VarSetOps::AddElemD(this, optAllFloatVars, varNum);
+ else
+ VarSetOps::AddElemD(this, optAllNonFPvars, varNum);
+ }
+#endif
+
+ for (unsigned i = 0; i < REG_COUNT; i++)
+ {
+ VarSetOps::AssignNoCopy(this, raLclRegIntf[i], VarSetOps::MakeEmpty(this));
+ }
+ for (unsigned i = 0; i < lvaTrackedCount; i++)
+ {
+ VarSetOps::AssignNoCopy(this, lvaVarPref[i], VarSetOps::MakeEmpty(this));
+ }
+
+ raNewBlocks = false;
+ rpPredictAssignAgain = false;
+ rpPasses = 0;
+
+ bool mustPredict = true;
+ unsigned stmtNum = 0;
+ unsigned oldStkPredict = DUMMY_INIT(~0);
+ VARSET_TP oldLclRegIntf[REG_COUNT];
+
+ for (unsigned i = 0; i < REG_COUNT; i++)
+ {
+ VarSetOps::AssignNoCopy(this, oldLclRegIntf[i], VarSetOps::MakeEmpty(this));
+ }
+
+ while (true)
+ {
+ /* Assign registers to variables using the variable/register interference
+ graph (raLclRegIntf[]) calculated in the previous pass */
+ regUsed = rpPredictAssignRegVars(regAvail);
+
+ mustPredict |= rpLostEnreg;
+
+#ifdef _TARGET_ARM_
+
+ // See if we previously reserved REG_R10 and try to make it available if we have a small frame now
+ //
+ if ((rpPasses == 0) && (codeGen->regSet.rsMaskResvd & RBM_OPT_RSVD))
+ {
+ if (compRsvdRegCheck(REGALLOC_FRAME_LAYOUT))
+ {
+ // We must keep reserving R10 in this case
+ codeGen->regSet.rsMaskResvd |= RBM_OPT_RSVD;
+ }
+ else
+ {
+ // We can release our reservation on R10 and use it to color registers
+ //
+ codeGen->regSet.rsMaskResvd &= ~RBM_OPT_RSVD;
+ allAcceptableRegs |= RBM_OPT_RSVD;
+ }
+ }
+#endif
+
+ /* Is our new prediction good enough?? */
+ if (!mustPredict)
+ {
+ /* For small methods (less than 12 stmts), we add a */
+ /* extra pass if we are predicting the use of some */
+ /* of the caller saved registers. */
+ /* This fixes RAID perf bug 43440 VB Ackerman function */
+
+ if ((rpPasses == 1) && (stmtNum <= 12) && (regUsed & RBM_CALLEE_SAVED))
+ {
+ goto EXTRA_PASS;
+ }
+
+ /* If every variable was fully enregistered then we're done */
+ if (rpStkPredict == 0)
+ goto ALL_DONE;
+
+ // This was a successful prediction. Record it, in case it turns out to be the best one.
+ rpRecordPrediction();
+
+ if (rpPasses > 1)
+ {
+ noway_assert(oldStkPredict != (unsigned)DUMMY_INIT(~0));
+
+ // Be careful about overflow
+ unsigned highStkPredict = (rpStkPredict * 2 < rpStkPredict) ? ULONG_MAX : rpStkPredict * 2;
+ if (oldStkPredict < highStkPredict)
+ goto ALL_DONE;
+
+ if (rpStkPredict < rpPasses * 8)
+ goto ALL_DONE;
+
+ if (rpPasses >= (rpPassesMax - 1))
+ goto ALL_DONE;
+ }
+
+ EXTRA_PASS:
+ /* We will do another pass */;
+ }
+
+#ifdef DEBUG
+ if (JitConfig.JitAssertOnMaxRAPasses())
+ {
+ noway_assert(rpPasses < rpPassesMax &&
+ "This may not a bug, but dev team should look and see what is happening");
+ }
+#endif
+
+ // The "64" here had been "VARSET_SZ". It is unclear why this number is connected with
+ // the (max) size of a VARSET. We've eliminated this constant, so I left this as a constant. We hope
+ // that we're phasing out this code, anyway, and this leaves the behavior the way that it was.
+ if (rpPasses > (rpPassesMax - rpPassesPessimize) + 64)
+ {
+ NO_WAY("we seem to be stuck in an infinite loop. breaking out");
+ }
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ if (rpPasses > 0)
+ {
+ if (rpLostEnreg)
+ printf("\n; Another pass due to rpLostEnreg");
+ if (rpAddedVarIntf)
+ printf("\n; Another pass due to rpAddedVarIntf");
+ if ((rpPasses == 1) && rpPredictAssignAgain)
+ printf("\n; Another pass due to rpPredictAssignAgain");
+ }
+ printf("\n; Register predicting pass# %d\n", rpPasses + 1);
+ }
+#endif
+
+ /* Zero the variable/register interference graph */
+ for (unsigned i = 0; i < REG_COUNT; i++)
+ {
+ VarSetOps::ClearD(this, raLclRegIntf[i]);
+ }
+
+ // if there are PInvoke calls and compLvFrameListRoot is enregistered,
+ // it must not be in a register trashed by the callee
+ if (info.compCallUnmanaged != 0)
+ {
+ assert(!opts.ShouldUsePInvokeHelpers());
+ noway_assert(info.compLvFrameListRoot < lvaCount);
+
+ LclVarDsc* pinvokeVarDsc = &lvaTable[info.compLvFrameListRoot];
+
+ if (pinvokeVarDsc->lvTracked)
+ {
+ rpRecordRegIntf(RBM_CALLEE_TRASH, VarSetOps::MakeSingleton(this, pinvokeVarDsc->lvVarIndex)
+ DEBUGARG("compLvFrameListRoot"));
+
+ // We would prefer to have this be enregister in the PINVOKE_TCB register
+ pinvokeVarDsc->addPrefReg(RBM_PINVOKE_TCB, this);
+ }
+
+ // If we're using a single return block, the p/invoke epilog code trashes ESI and EDI (in the
+ // worst case). Make sure that the return value compiler temp that we create for the single
+ // return block knows about this interference.
+ if (genReturnLocal != BAD_VAR_NUM)
+ {
+ noway_assert(genReturnBB);
+ LclVarDsc* localTmp = &lvaTable[genReturnLocal];
+ if (localTmp->lvTracked)
+ {
+ rpRecordRegIntf(RBM_PINVOKE_TCB | RBM_PINVOKE_FRAME,
+ VarSetOps::MakeSingleton(this, localTmp->lvVarIndex) DEBUGARG("genReturnLocal"));
+ }
+ }
+ }
+
+#ifdef _TARGET_ARM_
+ if (compFloatingPointUsed)
+ {
+ bool hasMustInitFloat = false;
+
+ // if we have any must-init floating point LclVars then we will add register interferences
+ // for the arguments with RBM_SCRATCH
+ // this is so that if we need to reset the initReg to REG_SCRATCH in Compiler::genFnProlog()
+ // we won't home the arguments into REG_SCRATCH
+
+ unsigned lclNum;
+ LclVarDsc* varDsc;
+
+ for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
+ {
+ if (varDsc->lvMustInit && varTypeIsFloating(varDsc->TypeGet()))
+ {
+ hasMustInitFloat = true;
+ break;
+ }
+ }
+
+ if (hasMustInitFloat)
+ {
+ for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
+ {
+ // If is an incoming argument, that is tracked and not floating-point
+ if (varDsc->lvIsParam && varDsc->lvTracked && !varTypeIsFloating(varDsc->TypeGet()))
+ {
+ rpRecordRegIntf(RBM_SCRATCH, VarSetOps::MakeSingleton(this, varDsc->lvVarIndex)
+ DEBUGARG("arg home with must-init fp"));
+ }
+ }
+ }
+ }
+#endif
+
+ stmtNum = 0;
+ rpAddedVarIntf = false;
+ rpLostEnreg = false;
+
+ /* Walk the basic blocks and predict reg use for each tree */
+
+ for (BasicBlock* block = fgFirstBB; block != NULL; block = block->bbNext)
+ {
+ GenTreePtr stmt;
+ compCurBB = block;
+ compCurLifeTree = NULL;
+ VarSetOps::Assign(this, compCurLife, block->bbLiveIn);
+
+ compCurBB = block;
+
+ for (stmt = block->FirstNonPhiDef(); stmt != NULL; stmt = stmt->gtNext)
+ {
+ noway_assert(stmt->gtOper == GT_STMT);
+
+ rpPredictSpillCnt = 0;
+ VarSetOps::AssignNoCopy(this, rpLastUseVars, VarSetOps::MakeEmpty(this));
+ VarSetOps::AssignNoCopy(this, rpUseInPlace, VarSetOps::MakeEmpty(this));
+
+ GenTreePtr tree = stmt->gtStmt.gtStmtExpr;
+ stmtNum++;
+#ifdef DEBUG
+ if (verbose && 1)
+ {
+ printf("\nRegister predicting BB%02u, stmt %d\n", block->bbNum, stmtNum);
+ gtDispTree(tree);
+ printf("\n");
+ }
+#endif
+ rpPredictTreeRegUse(tree, PREDICT_NONE, RBM_NONE, RBM_NONE);
+
+ noway_assert(rpAsgVarNum == -1);
+
+ if (rpPredictSpillCnt > tmpIntSpillMax)
+ tmpIntSpillMax = rpPredictSpillCnt;
+ }
+ }
+ rpPasses++;
+
+ /* Decide whether we need to set mustPredict */
+ mustPredict = false;
+
+ if (rpAddedVarIntf)
+ {
+ mustPredict = true;
+#ifdef DEBUG
+ if (verbose)
+ raDumpVarIntf();
+#endif
+ }
+
+ if (rpPasses == 1)
+ {
+ if ((opts.compFlags & CLFLG_REGVAR) == 0)
+ goto ALL_DONE;
+
+ if (rpPredictAssignAgain)
+ mustPredict = true;
+#ifdef DEBUG
+ if (fJitNoRegLoc)
+ goto ALL_DONE;
+#endif
+ }
+
+ /* Calculate the new value to use for regAvail */
+
+ regAvail = allAcceptableRegs;
+
+ /* If a frame pointer is required then we remove EBP */
+ if (codeGen->isFramePointerRequired() || codeGen->isFrameRequired())
+ regAvail &= ~RBM_FPBASE;
+
+#if ETW_EBP_FRAMED
+ // We never have EBP available when ETW_EBP_FRAME is defined
+ regAvail &= ~RBM_FPBASE;
+#endif
+
+ // If we have done n-passes then we must continue to pessimize the
+ // interference graph by or-ing the interferences from the previous pass
+
+ if (rpPasses > rpPassesPessimize)
+ {
+ for (unsigned regInx = 0; regInx < REG_COUNT; regInx++)
+ VarSetOps::UnionD(this, raLclRegIntf[regInx], oldLclRegIntf[regInx]);
+
+ /* If we reverse an EBP enregistration then keep it that way */
+ if (rpReverseEBPenreg)
+ regAvail &= ~RBM_FPBASE;
+ }
+
+#ifdef DEBUG
+ if (verbose)
+ raDumpRegIntf();
+#endif
+
+ /* Save the old variable/register interference graph */
+ for (unsigned i = 0; i < REG_COUNT; i++)
+ {
+ VarSetOps::Assign(this, oldLclRegIntf[i], raLclRegIntf[i]);
+ }
+ oldStkPredict = rpStkPredict;
+ } // end of while (true)
+
+ALL_DONE:;
+
+ // If we recorded a better feasible allocation than we ended up with, go back to using it.
+ rpUseRecordedPredictionIfBetter();
+
+#if DOUBLE_ALIGN
+ codeGen->setDoubleAlign(false);
+#endif
+
+ switch (rpFrameType)
+ {
+ default:
+ noway_assert(!"rpFrameType not set correctly!");
+ break;
+ case FT_ESP_FRAME:
+ noway_assert(!codeGen->isFramePointerRequired());
+ noway_assert(!codeGen->isFrameRequired());
+ codeGen->setFramePointerUsed(false);
+ break;
+ case FT_EBP_FRAME:
+ noway_assert((regUsed & RBM_FPBASE) == 0);
+ codeGen->setFramePointerUsed(true);
+ break;
+#if DOUBLE_ALIGN
+ case FT_DOUBLE_ALIGN_FRAME:
+ noway_assert((regUsed & RBM_FPBASE) == 0);
+ noway_assert(!codeGen->isFramePointerRequired());
+ codeGen->setFramePointerUsed(false);
+ codeGen->setDoubleAlign(true);
+ break;
+#endif
+ }
+
+ /* Record the set of registers that we need */
+ codeGen->regSet.rsClearRegsModified();
+ if (regUsed != RBM_NONE)
+ {
+ codeGen->regSet.rsSetRegsModified(regUsed);
+ }
+
+ /* We need genFullPtrRegMap if :
+ * The method is fully interruptible, or
+ * We are generating an EBP-less frame (for stack-pointer deltas)
+ */
+
+ genFullPtrRegMap = (genInterruptible || !codeGen->isFramePointerUsed());
+
+ raMarkStkVars();
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("# rpPasses was %u for %s\n", rpPasses, info.compFullName);
+ printf(" rpStkPredict was %u\n", rpStkPredict);
+ }
+#endif
+ rpRegAllocDone = true;
+}
+
+#endif // LEGACY_BACKEND
+
+/*****************************************************************************
+ *
+ * Mark all variables as to whether they live on the stack frame
+ * (part or whole), and if so what the base is (FP or SP).
+ */
+
+void Compiler::raMarkStkVars()
+{
+ unsigned lclNum;
+ LclVarDsc* varDsc;
+
+ for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
+ {
+ // For RyuJIT, lvOnFrame is set by LSRA, except in the case of zero-ref, which is set below.
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef LEGACY_BACKEND
+ varDsc->lvOnFrame = false;
+#endif // LEGACY_BACKEND
+
+ if (lvaIsFieldOfDependentlyPromotedStruct(varDsc))
+ {
+ noway_assert(!varDsc->lvRegister);
+ goto ON_STK;
+ }
+
+ /* Fully enregistered variables don't need any frame space */
+
+ if (varDsc->lvRegister)
+ {
+ if (!isRegPairType(varDsc->TypeGet()))
+ {
+ goto NOT_STK;
+ }
+
+ /* For "large" variables make sure both halves are enregistered */
+
+ if (varDsc->lvRegNum != REG_STK && varDsc->lvOtherReg != REG_STK)
+ {
+ goto NOT_STK;
+ }
+ }
+ /* Unused variables typically don't get any frame space */
+ else if (varDsc->lvRefCnt == 0)
+ {
+ bool needSlot = false;
+
+ bool stkFixedArgInVarArgs =
+ info.compIsVarArgs && varDsc->lvIsParam && !varDsc->lvIsRegArg && lclNum != lvaVarargsHandleArg;
+
+ // If its address has been exposed, ignore lvRefCnt. However, exclude
+ // fixed arguments in varargs method as lvOnFrame shouldn't be set
+ // for them as we don't want to explicitly report them to GC.
+
+ if (!stkFixedArgInVarArgs)
+ {
+ needSlot |= varDsc->lvAddrExposed;
+ }
+
+#if FEATURE_FIXED_OUT_ARGS
+
+ /* Is this the dummy variable representing GT_LCLBLK ? */
+ needSlot |= (lclNum == lvaOutgoingArgSpaceVar);
+
+#endif // FEATURE_FIXED_OUT_ARGS
+
+#ifdef DEBUGGING_SUPPORT
+
+#ifdef DEBUG
+ /* For debugging, note that we have to reserve space even for
+ unused variables if they are ever in scope. However, this is not
+ an issue as fgExtendDbgLifetimes() adds an initialization and
+ variables in scope will not have a zero ref-cnt.
+ */
+ if (opts.compDbgCode && !varDsc->lvIsParam && varDsc->lvTracked)
+ {
+ for (unsigned scopeNum = 0; scopeNum < info.compVarScopesCount; scopeNum++)
+ {
+ noway_assert(info.compVarScopes[scopeNum].vsdVarNum != lclNum);
+ }
+ }
+#endif
+ /*
+ For Debug Code, we have to reserve space even if the variable is never
+ in scope. We will also need to initialize it if it is a GC var.
+ So we set lvMustInit and artifically bump up the ref-cnt.
+ */
+
+ if (opts.compDbgCode && !stkFixedArgInVarArgs && lclNum < info.compLocalsCount)
+ {
+ needSlot |= true;
+
+ if (lvaTypeIsGC(lclNum))
+ {
+ varDsc->lvRefCnt = 1;
+ }
+
+ if (!varDsc->lvIsParam)
+ {
+ varDsc->lvMustInit = true;
+ }
+ }
+#endif // DEBUGGING_SUPPORT
+
+#ifndef LEGACY_BACKEND
+ varDsc->lvOnFrame = needSlot;
+#endif // !LEGACY_BACKEND
+ if (!needSlot)
+ {
+ /* Clear the lvMustInit flag in case it is set */
+ varDsc->lvMustInit = false;
+
+ goto NOT_STK;
+ }
+ }
+
+#ifndef LEGACY_BACKEND
+ if (!varDsc->lvOnFrame)
+ {
+ goto NOT_STK;
+ }
+#endif // !LEGACY_BACKEND
+
+ ON_STK:
+ /* The variable (or part of it) lives on the stack frame */
+
+ noway_assert((varDsc->lvType != TYP_UNDEF) && (varDsc->lvType != TYP_VOID) && (varDsc->lvType != TYP_UNKNOWN));
+#if FEATURE_FIXED_OUT_ARGS
+ noway_assert((lclNum == lvaOutgoingArgSpaceVar) || lvaLclSize(lclNum) != 0);
+#else // FEATURE_FIXED_OUT_ARGS
+ noway_assert(lvaLclSize(lclNum) != 0);
+#endif // FEATURE_FIXED_OUT_ARGS
+
+ varDsc->lvOnFrame = true; // Our prediction is that the final home for this local variable will be in the
+ // stack frame
+
+ NOT_STK:;
+ varDsc->lvFramePointerBased = codeGen->isFramePointerUsed();
+
+#if DOUBLE_ALIGN
+
+ if (codeGen->doDoubleAlign())
+ {
+ noway_assert(codeGen->isFramePointerUsed() == false);
+
+ /* All arguments are off of EBP with double-aligned frames */
+
+ if (varDsc->lvIsParam && !varDsc->lvIsRegArg)
+ {
+ varDsc->lvFramePointerBased = true;
+ }
+ }
+
+#endif
+
+ /* Some basic checks */
+
+ // It must be in a register, on frame, or have zero references.
+
+ noway_assert(varDsc->lvIsInReg() || varDsc->lvOnFrame || varDsc->lvRefCnt == 0);
+
+#ifndef LEGACY_BACKEND
+ // We can't have both lvRegister and lvOnFrame for RyuJIT
+ noway_assert(!varDsc->lvRegister || !varDsc->lvOnFrame);
+#else // LEGACY_BACKEND
+
+ /* If both lvRegister and lvOnFrame are set, it must be partially enregistered */
+ noway_assert(!varDsc->lvRegister || !varDsc->lvOnFrame ||
+ (varDsc->lvType == TYP_LONG && varDsc->lvOtherReg == REG_STK));
+#endif // LEGACY_BACKEND
+
+#ifdef DEBUG
+
+ // For varargs functions, there should be no direct references to
+ // parameter variables except for 'this' (because these were morphed
+ // in the importer) and the 'arglist' parameter (which is not a GC
+ // pointer). and the return buffer argument (if we are returning a
+ // struct).
+ // This is important because we don't want to try to report them
+ // to the GC, as the frame offsets in these local varables would
+ // not be correct.
+
+ if (varDsc->lvIsParam && raIsVarargsStackArg(lclNum))
+ {
+ if (!varDsc->lvPromoted && !varDsc->lvIsStructField)
+ {
+ noway_assert(varDsc->lvRefCnt == 0 && !varDsc->lvRegister && !varDsc->lvOnFrame);
+ }
+ }
+#endif
+ }
+}
+
+#ifdef LEGACY_BACKEND
+void Compiler::rpRecordPrediction()
+{
+ if (rpBestRecordedPrediction == NULL || rpStkPredict < rpBestRecordedStkPredict)
+ {
+ if (rpBestRecordedPrediction == NULL)
+ {
+ rpBestRecordedPrediction =
+ reinterpret_cast<VarRegPrediction*>(compGetMemArrayA(lvaCount, sizeof(VarRegPrediction)));
+ }
+ for (unsigned k = 0; k < lvaCount; k++)
+ {
+ rpBestRecordedPrediction[k].m_isEnregistered = lvaTable[k].lvRegister;
+ rpBestRecordedPrediction[k].m_regNum = (regNumberSmall)lvaTable[k].GetRegNum();
+ rpBestRecordedPrediction[k].m_otherReg = (regNumberSmall)lvaTable[k].GetOtherReg();
+ }
+ rpBestRecordedStkPredict = rpStkPredict;
+ JITDUMP("Recorded a feasible reg prediction with weighted stack use count %d.\n", rpBestRecordedStkPredict);
+ }
+}
+
+void Compiler::rpUseRecordedPredictionIfBetter()
+{
+ JITDUMP("rpStkPredict is %d; previous feasible reg prediction is %d.\n", rpStkPredict,
+ rpBestRecordedPrediction != NULL ? rpBestRecordedStkPredict : 0);
+ if (rpBestRecordedPrediction != NULL && rpStkPredict > rpBestRecordedStkPredict)
+ {
+ JITDUMP("Reverting to a previously-recorded feasible reg prediction with weighted stack use count %d.\n",
+ rpBestRecordedStkPredict);
+
+ for (unsigned k = 0; k < lvaCount; k++)
+ {
+ lvaTable[k].lvRegister = rpBestRecordedPrediction[k].m_isEnregistered;
+ lvaTable[k].SetRegNum(static_cast<regNumber>(rpBestRecordedPrediction[k].m_regNum));
+ lvaTable[k].SetOtherReg(static_cast<regNumber>(rpBestRecordedPrediction[k].m_otherReg));
+ }
+ }
+}
+#endif // LEGACY_BACKEND
diff --git a/src/jit/regalloc.h b/src/jit/regalloc.h
new file mode 100644
index 0000000000..7e2d7c7eb1
--- /dev/null
+++ b/src/jit/regalloc.h
@@ -0,0 +1,111 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+#ifndef REGALLOC_H_
+#define REGALLOC_H_
+
+// Some things that are used by both LSRA and regpredict allocators.
+
+enum FrameType
+{
+ FT_NOT_SET,
+ FT_ESP_FRAME,
+ FT_EBP_FRAME,
+#if DOUBLE_ALIGN
+ FT_DOUBLE_ALIGN_FRAME,
+#endif
+};
+
+#ifdef LEGACY_BACKEND
+
+#include "varset.h"
+
+/*****************************************************************************/
+/*****************************************************************************/
+
+// This enumeration specifies register restrictions for the predictor
+enum rpPredictReg
+{
+ PREDICT_NONE, // any subtree
+ PREDICT_ADDR, // subtree is left side of an assignment
+ PREDICT_REG, // subtree must be any register
+ PREDICT_SCRATCH_REG, // subtree must be any writable register
+
+#if defined(_TARGET_ARM_)
+ PREDICT_PAIR_R0R1, // subtree will write R0 and R1
+ PREDICT_PAIR_R2R3, // subtree will write R2 and R3
+
+#elif defined(_TARGET_AMD64_)
+
+ PREDICT_NOT_REG_EAX, // subtree must be any writable register, except EAX
+ PREDICT_NOT_REG_ECX, // subtree must be any writable register, except ECX
+
+#elif defined(_TARGET_X86_)
+
+ PREDICT_NOT_REG_EAX, // subtree must be any writable register, except EAX
+ PREDICT_NOT_REG_ECX, // subtree must be any writable register, except ECX
+
+ PREDICT_PAIR_EAXEDX, // subtree will write EAX and EDX
+ PREDICT_PAIR_ECXEBX, // subtree will write ECX and EBX
+
+#else
+#error "Unknown Target!"
+#endif // _TARGET_
+
+#define REGDEF(name, rnum, mask, sname) PREDICT_REG_##name,
+#include "register.h"
+
+ // The following are use whenever we have a ASG node into a LCL_VAR that
+ // we predict to be enregistered. This flags indicates that we can expect
+ // to use the register that is being assigned into as the temporary to
+ // compute the right side of the ASGN node.
+
+ PREDICT_REG_VAR_T00, // write the register used by tracked varable 00
+ PREDICT_REG_VAR_MAX = PREDICT_REG_VAR_T00 + lclMAX_TRACKED - 1,
+
+ PREDICT_COUNT = PREDICT_REG_VAR_T00,
+
+#define REGDEF(name, rnum, mask, sname)
+#define REGALIAS(alias, realname) PREDICT_REG_##alias = PREDICT_REG_##realname,
+#include "register.h"
+
+#if defined(_TARGET_ARM_)
+
+ PREDICT_REG_FIRST = PREDICT_REG_R0,
+ PREDICT_INTRET = PREDICT_REG_R0,
+ PREDICT_LNGRET = PREDICT_PAIR_R0R1,
+ PREDICT_FLTRET = PREDICT_REG_F0,
+
+#elif defined(_TARGET_AMD64_)
+
+ PREDICT_REG_FIRST = PREDICT_REG_RAX,
+ PREDICT_INTRET = PREDICT_REG_EAX,
+ PREDICT_LNGRET = PREDICT_REG_RAX,
+
+#elif defined(_TARGET_X86_)
+
+ PREDICT_REG_FIRST = PREDICT_REG_EAX,
+ PREDICT_INTRET = PREDICT_REG_EAX,
+ PREDICT_LNGRET = PREDICT_PAIR_EAXEDX,
+
+#else
+#error "Unknown _TARGET_"
+#endif // _TARGET_
+
+};
+#if DOUBLE_ALIGN
+enum CanDoubleAlign
+{
+ CANT_DOUBLE_ALIGN,
+ CAN_DOUBLE_ALIGN,
+ MUST_DOUBLE_ALIGN,
+ COUNT_DOUBLE_ALIGN,
+
+ DEFAULT_DOUBLE_ALIGN = CAN_DOUBLE_ALIGN
+};
+#endif
+
+#endif // LEGACY_BACKEND
+
+#endif // REGALLOC_H_
diff --git a/src/jit/register.h b/src/jit/register.h
new file mode 100644
index 0000000000..9e351037fd
--- /dev/null
+++ b/src/jit/register.h
@@ -0,0 +1,124 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+// clang-format off
+
+/*****************************************************************************/
+/*****************************************************************************/
+#ifndef REGDEF
+#error Must define REGDEF macro before including this file
+#endif
+#ifndef REGALIAS
+#define REGALIAS(alias, realname)
+#endif
+
+#if defined(_TARGET_XARCH_)
+
+#if defined(_TARGET_X86_)
+/*
+REGDEF(name, rnum, mask, sname) */
+REGDEF(EAX, 0, 0x01, "eax" )
+REGDEF(ECX, 1, 0x02, "ecx" )
+REGDEF(EDX, 2, 0x04, "edx" )
+REGDEF(EBX, 3, 0x08, "ebx" )
+REGDEF(ESP, 4, 0x10, "esp" )
+REGDEF(EBP, 5, 0x20, "ebp" )
+REGDEF(ESI, 6, 0x40, "esi" )
+REGDEF(EDI, 7, 0x80, "edi" )
+REGALIAS(RAX, EAX)
+REGALIAS(RCX, ECX)
+REGALIAS(RDX, EDX)
+REGALIAS(RBX, EBX)
+REGALIAS(RSP, ESP)
+REGALIAS(RBP, EBP)
+REGALIAS(RSI, ESI)
+REGALIAS(RDI, EDI)
+
+#else // !defined(_TARGET_X86_)
+
+/*
+REGDEF(name, rnum, mask, sname) */
+REGDEF(RAX, 0, 0x0001, "rax" )
+REGDEF(RCX, 1, 0x0002, "rcx" )
+REGDEF(RDX, 2, 0x0004, "rdx" )
+REGDEF(RBX, 3, 0x0008, "rbx" )
+REGDEF(RSP, 4, 0x0010, "rsp" )
+REGDEF(RBP, 5, 0x0020, "rbp" )
+REGDEF(RSI, 6, 0x0040, "rsi" )
+REGDEF(RDI, 7, 0x0080, "rdi" )
+REGDEF(R8, 8, 0x0100, "r8" )
+REGDEF(R9, 9, 0x0200, "r9" )
+REGDEF(R10, 10, 0x0400, "r10" )
+REGDEF(R11, 11, 0x0800, "r11" )
+REGDEF(R12, 12, 0x1000, "r12" )
+REGDEF(R13, 13, 0x2000, "r13" )
+REGDEF(R14, 14, 0x4000, "r14" )
+REGDEF(R15, 15, 0x8000, "r15" )
+
+REGALIAS(EAX, RAX)
+REGALIAS(ECX, RCX)
+REGALIAS(EDX, RDX)
+REGALIAS(EBX, RBX)
+REGALIAS(ESP, RSP)
+REGALIAS(EBP, RBP)
+REGALIAS(ESI, RSI)
+REGALIAS(EDI, RDI)
+
+#endif // !defined(_TARGET_X86_)
+
+#ifdef LEGACY_BACKEND
+
+REGDEF(STK, 8, 0x00, "STK" )
+
+#else // !LEGACY_BACKEND
+
+#ifdef _TARGET_AMD64_
+#define XMMBASE 16
+#define XMMMASK(x) (__int64(1) << (x+XMMBASE))
+#else // !_TARGET_AMD64_
+#define XMMBASE 8
+#define XMMMASK(x) (__int32(1) << (x+XMMBASE))
+#endif // !_TARGET_AMD64_
+
+REGDEF(XMM0, 0+XMMBASE, XMMMASK(0), "mm0" )
+REGDEF(XMM1, 1+XMMBASE, XMMMASK(1), "mm1" )
+REGDEF(XMM2, 2+XMMBASE, XMMMASK(2), "mm2" )
+REGDEF(XMM3, 3+XMMBASE, XMMMASK(3), "mm3" )
+REGDEF(XMM4, 4+XMMBASE, XMMMASK(4), "mm4" )
+REGDEF(XMM5, 5+XMMBASE, XMMMASK(5), "mm5" )
+REGDEF(XMM6, 6+XMMBASE, XMMMASK(6), "mm6" )
+REGDEF(XMM7, 7+XMMBASE, XMMMASK(7), "mm7" )
+
+#ifdef _TARGET_X86_
+REGDEF(STK, 8+XMMBASE, 0x0000, "STK" )
+#else // !_TARGET_X86_
+REGDEF(XMM8, 8+XMMBASE, XMMMASK(8), "mm8" )
+REGDEF(XMM9, 9+XMMBASE, XMMMASK(9), "mm9" )
+REGDEF(XMM10, 10+XMMBASE, XMMMASK(10), "mm10" )
+REGDEF(XMM11, 11+XMMBASE, XMMMASK(11), "mm11" )
+REGDEF(XMM12, 12+XMMBASE, XMMMASK(12), "mm12" )
+REGDEF(XMM13, 13+XMMBASE, XMMMASK(13), "mm13" )
+REGDEF(XMM14, 14+XMMBASE, XMMMASK(14), "mm14" )
+REGDEF(XMM15, 15+XMMBASE, XMMMASK(15), "mm15" )
+REGDEF(STK, 16+XMMBASE, 0x0000, "STK" )
+#endif // !_TARGET_X86_
+
+#endif // !LEGACY_BACKEND
+
+#elif defined(_TARGET_ARM_)
+ #include "registerarm.h"
+
+#elif defined(_TARGET_ARM64_)
+ #include "registerarm64.h"
+
+#else
+ #error Unsupported or unset target architecture
+#endif // target type
+/*****************************************************************************/
+#undef REGDEF
+#undef REGALIAS
+#undef XMMMASK
+/*****************************************************************************/
+
+// clang-format on
diff --git a/src/jit/register_arg_convention.cpp b/src/jit/register_arg_convention.cpp
new file mode 100644
index 0000000000..4678cdec41
--- /dev/null
+++ b/src/jit/register_arg_convention.cpp
@@ -0,0 +1,123 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+#include "register_arg_convention.h"
+
+unsigned InitVarDscInfo::allocRegArg(var_types type, unsigned numRegs /* = 1 */)
+{
+ assert(numRegs > 0);
+
+ unsigned resultArgNum = regArgNum(type);
+ bool isBackFilled = false;
+
+#ifdef _TARGET_ARM_
+ // Check for back-filling
+ if (varTypeIsFloating(type) && // We only back-fill the float registers
+ !anyFloatStackArgs && // Is it legal to back-fill? (We haven't put any FP args on the stack yet)
+ (numRegs == 1) && // Is there a possibility we could back-fill?
+ (fltArgSkippedRegMask != RBM_NONE)) // Is there an available back-fill slot?
+ {
+ // We will never back-fill something greater than a single register
+ // (TYP_FLOAT, or TYP_STRUCT HFA with a single float). This is because
+ // we don't have any types that require > 2 register alignment, so we
+ // can't create a > 1 register alignment hole to back-fill.
+
+ // Back-fill the register
+ regMaskTP backFillBitMask = genFindLowestBit(fltArgSkippedRegMask);
+ fltArgSkippedRegMask &= ~backFillBitMask; // Remove the back-filled register(s) from the skipped mask
+ resultArgNum = genMapFloatRegNumToRegArgNum(genRegNumFromMask(backFillBitMask));
+ assert(resultArgNum < MAX_FLOAT_REG_ARG);
+ isBackFilled = true;
+ }
+#endif // _TARGET_ARM_
+
+ if (!isBackFilled)
+ {
+#if defined(_TARGET_AMD64_) && !defined(UNIX_AMD64_ABI)
+ // For System V the reg type counters should be independent.
+ nextReg(TYP_INT, numRegs);
+ nextReg(TYP_FLOAT, numRegs);
+#else
+ // We didn't back-fill a register (on ARM), so skip the number of registers that we allocated.
+ nextReg(type, numRegs);
+#endif
+ }
+
+ return resultArgNum;
+}
+
+bool InitVarDscInfo::enoughAvailRegs(var_types type, unsigned numRegs /* = 1 */)
+{
+ assert(numRegs > 0);
+
+ unsigned backFillCount = 0;
+
+#ifdef _TARGET_ARM_
+ // Check for back-filling
+ if (varTypeIsFloating(type) && // We only back-fill the float registers
+ !anyFloatStackArgs && // Is it legal to back-fill? (We haven't put any FP args on the stack yet)
+ (numRegs == 1) && // Is there a possibility we could back-fill?
+ (fltArgSkippedRegMask != RBM_NONE)) // Is there an available back-fill slot?
+ {
+ backFillCount = 1;
+ }
+#endif // _TARGET_ARM_
+
+ return regArgNum(type) + numRegs - backFillCount <= maxRegArgNum(type);
+}
+
+unsigned InitVarDscInfo::alignReg(var_types type, unsigned requiredRegAlignment)
+{
+ NYI_ARM64("alignReg");
+
+ assert(requiredRegAlignment > 0);
+ if (requiredRegAlignment == 1)
+ {
+ return 0; // Everything is always "1" aligned
+ }
+
+ assert(requiredRegAlignment == 2); // we don't expect anything else right now
+
+ int alignMask = regArgNum(type) & (requiredRegAlignment - 1);
+ if (alignMask == 0)
+ {
+ return 0; // We're already aligned
+ }
+
+ unsigned cAlignSkipped = requiredRegAlignment - alignMask;
+ assert(cAlignSkipped == 1); // Alignment is currently only 1 or 2, so misalignment can only be 1.
+
+#ifdef _TARGET_ARM_
+ if (varTypeIsFloating(type))
+ {
+ fltArgSkippedRegMask |= genMapFloatRegArgNumToRegMask(floatRegArgNum);
+ }
+#endif // _TARGET_ARM_
+
+ assert(regArgNum(type) + cAlignSkipped <= maxRegArgNum(type)); // if equal, then we aligned the last slot, and the
+ // arg can't be enregistered
+ regArgNum(type) += cAlignSkipped;
+
+ return cAlignSkipped;
+}
+
+bool InitVarDscInfo::canEnreg(var_types type, unsigned numRegs /* = 1 */)
+{
+ if (!isRegParamType(type))
+ {
+ return false;
+ }
+
+ if (!enoughAvailRegs(type, numRegs))
+ {
+ return false;
+ }
+
+ return true;
+}
diff --git a/src/jit/register_arg_convention.h b/src/jit/register_arg_convention.h
new file mode 100644
index 0000000000..5073732a3e
--- /dev/null
+++ b/src/jit/register_arg_convention.h
@@ -0,0 +1,111 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+#ifndef __register_arg_convention__
+#define __register_arg_convention__
+
+class LclVarDsc;
+
+struct InitVarDscInfo
+{
+ LclVarDsc* varDsc;
+ unsigned varNum;
+
+ unsigned intRegArgNum;
+ unsigned floatRegArgNum;
+ unsigned maxIntRegArgNum;
+ unsigned maxFloatRegArgNum;
+
+ bool hasRetBufArg;
+
+#ifdef _TARGET_ARM_
+ // Support back-filling of FP parameters. This is similar to code in gtMorphArgs() that
+ // handles arguments.
+ regMaskTP fltArgSkippedRegMask;
+ bool anyFloatStackArgs;
+#endif // _TARGET_ARM_
+
+public:
+ // set to initial values
+ void Init(LclVarDsc* lvaTable, bool _hasRetBufArg)
+ {
+ hasRetBufArg = _hasRetBufArg;
+ varDsc = &lvaTable[0]; // the first argument LclVar 0
+ varNum = 0; // the first argument varNum 0
+ intRegArgNum = 0;
+ floatRegArgNum = 0;
+ maxIntRegArgNum = MAX_REG_ARG;
+ maxFloatRegArgNum = MAX_FLOAT_REG_ARG;
+
+#ifdef _TARGET_ARM_
+ fltArgSkippedRegMask = RBM_NONE;
+ anyFloatStackArgs = false;
+#endif // _TARGET_ARM_
+ }
+
+ // return ref to current register arg for this type
+ unsigned& regArgNum(var_types type)
+ {
+ return varTypeIsFloating(type) ? floatRegArgNum : intRegArgNum;
+ }
+
+ // Allocate a set of contiguous argument registers. "type" is either an integer
+ // type, indicating to use the integer registers, or a floating-point type, indicating
+ // to use the floating-point registers. The actual type (TYP_FLOAT vs. TYP_DOUBLE) is
+ // ignored. "numRegs" is the number of registers to allocate. Thus, on ARM, to allocate
+ // a double-precision floating-point register, you need to pass numRegs=2. For an HFA,
+ // pass the number of slots/registers needed.
+ // This routine handles floating-point register back-filling on ARM.
+ // Returns the first argument register of the allocated set.
+ unsigned allocRegArg(var_types type, unsigned numRegs = 1);
+
+ // We are aligning the register to an ABI-required boundary, such as putting
+ // double-precision floats in even-numbered registers, by skipping one register.
+ // "requiredRegAlignment" is the amount to align to: 1 for no alignment (everything
+ // is 1-aligned), 2 for "double" alignment.
+ // Returns the number of registers skipped.
+ unsigned alignReg(var_types type, unsigned requiredRegAlignment);
+
+ // Return true if it is an enregisterable type and there is room.
+ // Note that for "type", we only care if it is float or not. In particular,
+ // "numRegs" must be "2" to allocate an ARM double-precision floating-point register.
+ bool canEnreg(var_types type, unsigned numRegs = 1);
+
+ // Set the fact that we have used up all remaining registers of 'type'
+ //
+ void setAllRegArgUsed(var_types type)
+ {
+ regArgNum(type) = maxRegArgNum(type);
+ }
+
+#ifdef _TARGET_ARM_
+
+ void setAnyFloatStackArgs()
+ {
+ anyFloatStackArgs = true;
+ }
+
+ bool existAnyFloatStackArgs()
+ {
+ return anyFloatStackArgs;
+ }
+
+#endif // _TARGET_ARM_
+
+private:
+ // return max register arg for this type
+ unsigned maxRegArgNum(var_types type)
+ {
+ return varTypeIsFloating(type) ? maxFloatRegArgNum : maxIntRegArgNum;
+ }
+
+ bool enoughAvailRegs(var_types type, unsigned numRegs = 1);
+
+ void nextReg(var_types type, unsigned numRegs = 1)
+ {
+ regArgNum(type) = min(regArgNum(type) + numRegs, maxRegArgNum(type));
+ }
+};
+
+#endif // __register_arg_convention__
diff --git a/src/jit/registerarm.h b/src/jit/registerarm.h
new file mode 100644
index 0000000000..38b82c26f2
--- /dev/null
+++ b/src/jit/registerarm.h
@@ -0,0 +1,86 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+// clang-format off
+
+/*****************************************************************************/
+/*****************************************************************************/
+#ifndef REGDEF
+#error Must define REGDEF macro before including this file
+#endif
+#ifndef REGALIAS
+#define REGALIAS(alias, realname)
+#endif
+
+/*
+REGDEF(name, rnum, mask, sname) */
+REGDEF(R0, 0, 0x0001, "r0" )
+REGDEF(R1, 1, 0x0002, "r1" )
+REGDEF(R2, 2, 0x0004, "r2" )
+REGDEF(R3, 3, 0x0008, "r3" )
+REGDEF(R4, 4, 0x0010, "r4" )
+REGDEF(R5, 5, 0x0020, "r5" )
+REGDEF(R6, 6, 0x0040, "r6" )
+REGDEF(R7, 7, 0x0080, "r7" )
+REGDEF(R8, 8, 0x0100, "r8" )
+REGDEF(R9, 9, 0x0200, "r9" )
+REGDEF(R10, 10, 0x0400, "r10" )
+REGDEF(R11, 11, 0x0800, "r11" )
+REGDEF(R12, 12, 0x1000, "r12" )
+REGDEF(SP, 13, 0x2000, "sp" )
+REGDEF(LR, 14, 0x4000, "lr" )
+REGDEF(PC, 15, 0x8000, "pc" )
+
+#define FPBASE 16
+#define VFPMASK(x) (((__int64)1) << (x+FPBASE))
+
+REGDEF(F0, 0+FPBASE, VFPMASK(0), "f0")
+REGDEF(F1, 1+FPBASE, VFPMASK(1), "f1")
+REGDEF(F2, 2+FPBASE, VFPMASK(2), "f2")
+REGDEF(F3, 3+FPBASE, VFPMASK(3), "f3")
+REGDEF(F4, 4+FPBASE, VFPMASK(4), "f4")
+REGDEF(F5, 5+FPBASE, VFPMASK(5), "f5")
+REGDEF(F6, 6+FPBASE, VFPMASK(6), "f6")
+REGDEF(F7, 7+FPBASE, VFPMASK(7), "f7")
+REGDEF(F8, 8+FPBASE, VFPMASK(8), "f8")
+REGDEF(F9, 9+FPBASE, VFPMASK(9), "f9")
+REGDEF(F10, 10+FPBASE, VFPMASK(10), "f10")
+REGDEF(F11, 11+FPBASE, VFPMASK(11), "f11")
+REGDEF(F12, 12+FPBASE, VFPMASK(12), "f12")
+REGDEF(F13, 13+FPBASE, VFPMASK(13), "f13")
+REGDEF(F14, 14+FPBASE, VFPMASK(14), "f14")
+REGDEF(F15, 15+FPBASE, VFPMASK(15), "f15")
+REGDEF(F16, 16+FPBASE, VFPMASK(16), "f16")
+REGDEF(F17, 17+FPBASE, VFPMASK(17), "f17")
+REGDEF(F18, 18+FPBASE, VFPMASK(18), "f18")
+REGDEF(F19, 19+FPBASE, VFPMASK(19), "f19")
+REGDEF(F20, 20+FPBASE, VFPMASK(20), "f20")
+REGDEF(F21, 21+FPBASE, VFPMASK(21), "f21")
+REGDEF(F22, 22+FPBASE, VFPMASK(22), "f22")
+REGDEF(F23, 23+FPBASE, VFPMASK(23), "f23")
+REGDEF(F24, 24+FPBASE, VFPMASK(24), "f24")
+REGDEF(F25, 25+FPBASE, VFPMASK(25), "f25")
+REGDEF(F26, 26+FPBASE, VFPMASK(26), "f26")
+REGDEF(F27, 27+FPBASE, VFPMASK(27), "f27")
+REGDEF(F28, 28+FPBASE, VFPMASK(28), "f28")
+REGDEF(F29, 29+FPBASE, VFPMASK(29), "f29")
+REGDEF(F30, 30+FPBASE, VFPMASK(30), "f30")
+REGDEF(F31, 31+FPBASE, VFPMASK(31), "f31")
+
+
+// Allow us to call R11/FP, SP, LR and PC by their register number names
+REGALIAS(FP, R11)
+REGALIAS(R13, SP)
+REGALIAS(R14, LR)
+REGALIAS(R15, PC)
+
+// This must be last!
+REGDEF(STK, 32+FPBASE, 0x0000, "STK")
+
+/*****************************************************************************/
+#undef REGDEF
+#undef REGALIAS
+/*****************************************************************************/
+
+// clang-format on
diff --git a/src/jit/registerarm64.h b/src/jit/registerarm64.h
new file mode 100644
index 0000000000..f53197259c
--- /dev/null
+++ b/src/jit/registerarm64.h
@@ -0,0 +1,114 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+// clang-format off
+
+/*****************************************************************************/
+/*****************************************************************************/
+#ifndef REGDEF
+#error Must define REGDEF macro before including this file
+#endif
+#ifndef REGALIAS
+#define REGALIAS(alias, realname)
+#endif
+
+#define RMASK(x) (1ULL << (x))
+
+/*
+REGDEF(name, rnum, mask, xname, wname) */
+REGDEF(R0, 0, 0x0001, "x0" , "w0" )
+REGDEF(R1, 1, 0x0002, "x1" , "w1" )
+REGDEF(R2, 2, 0x0004, "x2" , "w2" )
+REGDEF(R3, 3, 0x0008, "x3" , "w3" )
+REGDEF(R4, 4, 0x0010, "x4" , "w4" )
+REGDEF(R5, 5, 0x0020, "x5" , "w5" )
+REGDEF(R6, 6, 0x0040, "x6" , "w6" )
+REGDEF(R7, 7, 0x0080, "x7" , "w7" )
+REGDEF(R8, 8, 0x0100, "x8" , "w8" )
+REGDEF(R9, 9, 0x0200, "x9" , "w9" )
+REGDEF(R10, 10, 0x0400, "x10", "w10" )
+REGDEF(R11, 11, 0x0800, "x11", "w11" )
+REGDEF(R12, 12, 0x1000, "x12", "w12" )
+REGDEF(R13, 13, 0x2000, "x13", "w13" )
+REGDEF(R14, 14, 0x4000, "x14", "w14" )
+REGDEF(R15, 15, 0x8000, "x15", "w15" )
+REGDEF(IP0, 16, 0x10000, "xip0","wip0" )
+REGDEF(IP1, 17, 0x20000, "xip1","wip1" )
+REGDEF(PR, 18, 0x40000, "xpr", "wpr" )
+REGDEF(R19, 19, 0x80000, "x19", "w19" )
+REGDEF(R20, 20, 0x100000, "x20", "w20" )
+REGDEF(R21, 21, 0x200000, "x21", "w21" )
+REGDEF(R22, 22, 0x400000, "x22", "w22" )
+REGDEF(R23, 23, 0x800000, "x23", "w23" )
+REGDEF(R24, 24, 0x1000000, "x24", "w24" )
+REGDEF(R25, 25, 0x2000000, "x25", "w25" )
+REGDEF(R26, 26, 0x4000000, "x26", "w26" )
+REGDEF(R27, 27, 0x8000000, "x27", "w27" )
+REGDEF(R28, 28, 0x10000000, "x28", "w28" )
+REGDEF(FP, 29, 0x20000000, "fp" , "w29" )
+REGDEF(LR, 30, 0x40000000, "lr" , "w30" )
+REGDEF(ZR, 31, 0x80000000, "xzr", "wzr" )
+
+// Allow us to call IP0,IP1,PR,FP,LR by their register number names
+REGALIAS(R16, IP0)
+REGALIAS(R17, IP1)
+REGALIAS(R18, PR)
+REGALIAS(R29, FP)
+REGALIAS(R30, LR)
+
+#define VBASE 32
+#define VMASK(x) (1ULL << (VBASE+(x)))
+
+/*
+REGDEF(name, rnum, mask, xname, wname) */
+REGDEF(V0, 0+VBASE, VMASK(0), "d0", "s0")
+REGDEF(V1, 1+VBASE, VMASK(1), "d1", "s1")
+REGDEF(V2, 2+VBASE, VMASK(2), "d2", "s2")
+REGDEF(V3, 3+VBASE, VMASK(3), "d3", "s3")
+REGDEF(V4, 4+VBASE, VMASK(4), "d4", "s4")
+REGDEF(V5, 5+VBASE, VMASK(5), "d5", "s5")
+REGDEF(V6, 6+VBASE, VMASK(6), "d6", "s6")
+REGDEF(V7, 7+VBASE, VMASK(7), "d7", "s7")
+REGDEF(V8, 8+VBASE, VMASK(8), "d8", "s8")
+REGDEF(V9, 9+VBASE, VMASK(9), "d9", "s9")
+REGDEF(V10, 10+VBASE, VMASK(10), "d10", "s10")
+REGDEF(V11, 11+VBASE, VMASK(11), "d11", "s11")
+REGDEF(V12, 12+VBASE, VMASK(12), "d12", "s12")
+REGDEF(V13, 13+VBASE, VMASK(13), "d13", "s13")
+REGDEF(V14, 14+VBASE, VMASK(14), "d14", "s14")
+REGDEF(V15, 15+VBASE, VMASK(15), "d15", "s15")
+REGDEF(V16, 16+VBASE, VMASK(16), "d16", "s16")
+REGDEF(V17, 17+VBASE, VMASK(17), "d17", "s17")
+REGDEF(V18, 18+VBASE, VMASK(18), "d18", "s18")
+REGDEF(V19, 19+VBASE, VMASK(19), "d19", "s19")
+REGDEF(V20, 20+VBASE, VMASK(20), "d20", "s20")
+REGDEF(V21, 21+VBASE, VMASK(21), "d21", "s21")
+REGDEF(V22, 22+VBASE, VMASK(22), "d22", "s22")
+REGDEF(V23, 23+VBASE, VMASK(23), "d23", "s23")
+REGDEF(V24, 24+VBASE, VMASK(24), "d24", "s24")
+REGDEF(V25, 25+VBASE, VMASK(25), "d25", "s25")
+REGDEF(V26, 26+VBASE, VMASK(26), "d26", "s26")
+REGDEF(V27, 27+VBASE, VMASK(27), "d27", "s27")
+REGDEF(V28, 28+VBASE, VMASK(28), "d28", "s28")
+REGDEF(V29, 29+VBASE, VMASK(29), "d29", "s29")
+REGDEF(V30, 30+VBASE, VMASK(30), "d30", "s30")
+REGDEF(V31, 31+VBASE, VMASK(31), "d31", "s31")
+
+// The registers with values 64 (NBASE) and above are not real register numbers
+#define NBASE 64
+
+REGDEF(SP, 0+NBASE, 0x0000, "sp", "wsp?")
+// This must be last!
+REGDEF(STK, 1+NBASE, 0x0000, "STK", "STK")
+
+/*****************************************************************************/
+#undef RMASK
+#undef VMASK
+#undef VBASE
+#undef NBASE
+#undef REGDEF
+#undef REGALIAS
+/*****************************************************************************/
+
+// clang-format on
diff --git a/src/jit/registerfp.cpp b/src/jit/registerfp.cpp
new file mode 100644
index 0000000000..997c223ed4
--- /dev/null
+++ b/src/jit/registerfp.cpp
@@ -0,0 +1,1522 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+#ifdef LEGACY_BACKEND // This file is NOT used for the RyuJIT backend that uses the linear scan register allocator.
+
+#include "compiler.h"
+#include "emit.h"
+#include "codegen.h"
+
+#ifndef _TARGET_ARM_
+#error "Non-ARM target for registerfp.cpp"
+#endif // !_TARGET_ARM_
+
+// get the next argument register which is aligned to 'alignment' # of bytes
+regNumber alignFloatArgReg(regNumber argReg, int alignment)
+{
+ assert(isValidFloatArgReg(argReg));
+
+ int regsize_alignment = alignment /= REGSIZE_BYTES;
+ if (genMapFloatRegNumToRegArgNum(argReg) % regsize_alignment)
+ argReg = genRegArgNext(argReg);
+
+ // technically the above should be a 'while' so make sure
+ // we never should have incremented more than once
+ assert(!(genMapFloatRegNumToRegArgNum(argReg) % regsize_alignment));
+
+ return argReg;
+}
+
+// Instruction list
+// N=normal, R=reverse, P=pop
+
+void CodeGen::genFloatConst(GenTree* tree, RegSet::RegisterPreference* pref)
+{
+ assert(tree->gtOper == GT_CNS_DBL);
+ var_types type = tree->gtType;
+ double constValue = tree->gtDblCon.gtDconVal;
+ size_t* cv = (size_t*)&constValue;
+
+ regNumber dst = regSet.PickRegFloat(type, pref);
+
+ if (type == TYP_FLOAT)
+ {
+ regNumber reg = regSet.rsPickReg();
+
+ float f = forceCastToFloat(constValue);
+ genSetRegToIcon(reg, *((int*)(&f)));
+ getEmitter()->emitIns_R_R(INS_vmov_i2f, EA_4BYTE, dst, reg);
+ }
+ else
+ {
+ assert(type == TYP_DOUBLE);
+ regNumber reg1 = regSet.rsPickReg();
+ regNumber reg2 = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(reg1));
+
+ genSetRegToIcon(reg1, cv[0]);
+ regSet.rsLockReg(genRegMask(reg1));
+ genSetRegToIcon(reg2, cv[1]);
+ regSet.rsUnlockReg(genRegMask(reg1));
+
+ getEmitter()->emitIns_R_R_R(INS_vmov_i2d, EA_8BYTE, dst, reg1, reg2);
+ }
+ genMarkTreeInReg(tree, dst);
+
+ return;
+}
+
+void CodeGen::genFloatMath(GenTree* tree, RegSet::RegisterPreference* pref)
+{
+ assert(tree->OperGet() == GT_INTRINSIC);
+
+ GenTreePtr op1 = tree->gtOp.gtOp1;
+
+ // get tree into a register
+ genCodeForTreeFloat(op1, pref);
+
+ instruction ins;
+
+ switch (tree->gtIntrinsic.gtIntrinsicId)
+ {
+ case CORINFO_INTRINSIC_Sin:
+ ins = INS_invalid;
+ break;
+ case CORINFO_INTRINSIC_Cos:
+ ins = INS_invalid;
+ break;
+ case CORINFO_INTRINSIC_Sqrt:
+ ins = INS_vsqrt;
+ break;
+ case CORINFO_INTRINSIC_Abs:
+ ins = INS_vabs;
+ break;
+ case CORINFO_INTRINSIC_Round:
+ {
+ regNumber reg = regSet.PickRegFloat(tree->TypeGet(), pref);
+ genMarkTreeInReg(tree, reg);
+ // convert it to a long and back
+ inst_RV_RV(ins_FloatConv(TYP_LONG, tree->TypeGet()), reg, op1->gtRegNum, tree->TypeGet());
+ inst_RV_RV(ins_FloatConv(tree->TypeGet(), TYP_LONG), reg, reg);
+ genCodeForTreeFloat_DONE(tree, op1->gtRegNum);
+ return;
+ }
+ break;
+ default:
+ unreached();
+ }
+
+ if (ins != INS_invalid)
+ {
+ regNumber reg = regSet.PickRegFloat(tree->TypeGet(), pref);
+ genMarkTreeInReg(tree, reg);
+ inst_RV_RV(ins, reg, op1->gtRegNum, tree->TypeGet());
+ // mark register that holds tree
+ genCodeForTreeFloat_DONE(tree, reg);
+ }
+ else
+ {
+ unreached();
+ // If unreached is removed, mark register that holds tree
+ // genCodeForTreeFloat_DONE(tree, op1->gtRegNum);
+ }
+
+ return;
+}
+
+void CodeGen::genFloatSimple(GenTree* tree, RegSet::RegisterPreference* pref)
+{
+ assert(tree->OperKind() & GTK_SMPOP);
+ var_types type = tree->TypeGet();
+
+ RegSet::RegisterPreference defaultPref(RBM_ALLFLOAT, RBM_NONE);
+ if (pref == NULL)
+ {
+ pref = &defaultPref;
+ }
+
+ switch (tree->OperGet())
+ {
+ // Assignment
+ case GT_ASG:
+ {
+ genFloatAssign(tree);
+ break;
+ }
+
+ // Arithmetic binops
+ case GT_ADD:
+ case GT_SUB:
+ case GT_MUL:
+ case GT_DIV:
+ {
+ genFloatArith(tree, pref);
+ break;
+ }
+
+ case GT_NEG:
+ {
+ GenTreePtr op1 = tree->gtOp.gtOp1;
+
+ // get the tree into a register
+ genCodeForTreeFloat(op1, pref);
+
+ // change the sign
+ regNumber reg = regSet.PickRegFloat(type, pref);
+ genMarkTreeInReg(tree, reg);
+ inst_RV_RV(ins_MathOp(tree->OperGet(), type), reg, op1->gtRegNum, type);
+
+ // mark register that holds tree
+ genCodeForTreeFloat_DONE(tree, reg);
+ return;
+ }
+
+ case GT_IND:
+ {
+ regMaskTP addrReg;
+
+ // Make sure the address value is 'addressable' */
+ addrReg = genMakeAddressable(tree, 0, RegSet::FREE_REG);
+
+ // Load the value onto the FP stack
+ regNumber reg = regSet.PickRegFloat(type, pref);
+ genLoadFloat(tree, reg);
+
+ genDoneAddressable(tree, addrReg, RegSet::FREE_REG);
+
+ genCodeForTreeFloat_DONE(tree, reg);
+
+ break;
+ }
+ case GT_CAST:
+ {
+ genCodeForTreeCastFloat(tree, pref);
+ break;
+ }
+
+ // Asg-Arithmetic ops
+ case GT_ASG_ADD:
+ case GT_ASG_SUB:
+ case GT_ASG_MUL:
+ case GT_ASG_DIV:
+ {
+ genFloatAsgArith(tree);
+ break;
+ }
+ case GT_INTRINSIC:
+ genFloatMath(tree, pref);
+ break;
+
+ case GT_RETURN:
+ {
+ GenTreePtr op1 = tree->gtOp.gtOp1;
+ assert(op1);
+
+ pref->best = (type == TYP_DOUBLE) ? RBM_DOUBLERET : RBM_FLOATRET;
+
+ // Compute the result
+ genCodeForTreeFloat(op1, pref);
+
+ inst_RV_TT(ins_FloatConv(tree->TypeGet(), op1->TypeGet()), REG_FLOATRET, op1);
+ if (compiler->info.compIsVarArgs || compiler->opts.compUseSoftFP)
+ {
+ if (tree->TypeGet() == TYP_FLOAT)
+ {
+ inst_RV_RV(INS_vmov_f2i, REG_INTRET, REG_FLOATRET, TYP_FLOAT, EA_4BYTE);
+ }
+ else
+ {
+ assert(tree->TypeGet() == TYP_DOUBLE);
+ inst_RV_RV_RV(INS_vmov_d2i, REG_INTRET, REG_NEXT(REG_INTRET), REG_FLOATRET, EA_8BYTE);
+ }
+ }
+ break;
+ }
+ case GT_ARGPLACE:
+ break;
+
+ case GT_COMMA:
+ {
+ GenTreePtr op1 = tree->gtOp.gtOp1;
+ GenTreePtr op2 = tree->gtGetOp2();
+
+ if (tree->gtFlags & GTF_REVERSE_OPS)
+ {
+ genCodeForTreeFloat(op2, pref);
+
+ regSet.SetUsedRegFloat(op2, true);
+ genEvalSideEffects(op1);
+ regSet.SetUsedRegFloat(op2, false);
+ }
+ else
+ {
+ genEvalSideEffects(op1);
+ genCodeForTreeFloat(op2, pref);
+ }
+
+ genCodeForTreeFloat_DONE(tree, op2->gtRegNum);
+ break;
+ }
+
+ case GT_CKFINITE:
+ genFloatCheckFinite(tree, pref);
+ break;
+
+ default:
+ NYI("Unhandled register FP codegen");
+ }
+}
+
+// generate code for ckfinite tree/instruction
+void CodeGen::genFloatCheckFinite(GenTree* tree, RegSet::RegisterPreference* pref)
+{
+ TempDsc* temp;
+ int offs;
+
+ GenTreePtr op1 = tree->gtOp.gtOp1;
+
+ // Offset of the DWord containing the exponent
+ offs = (op1->gtType == TYP_FLOAT) ? 0 : sizeof(int);
+
+ // get tree into a register
+ genCodeForTreeFloat(op1, pref);
+
+ regNumber reg = regSet.rsPickReg();
+
+ int expMask;
+ if (op1->gtType == TYP_FLOAT)
+ {
+ getEmitter()->emitIns_R_R(INS_vmov_f2i, EA_4BYTE, reg, op1->gtRegNum);
+ expMask = 0x7F800000;
+ }
+ else // double
+ {
+ assert(op1->gtType == TYP_DOUBLE);
+ getEmitter()->emitIns_R_R(INS_vmov_f2i, EA_4BYTE, reg,
+ REG_NEXT(op1->gtRegNum)); // the high 32 bits of the double register
+ expMask = 0x7FF00000;
+ }
+ regTracker.rsTrackRegTrash(reg);
+
+ // Check if the exponent is all ones
+ inst_RV_IV(INS_and, reg, expMask, EA_4BYTE);
+ inst_RV_IV(INS_cmp, reg, expMask, EA_4BYTE);
+
+ // If exponent was all 1's, we need to throw ArithExcep
+ emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
+ genJumpToThrowHlpBlk(jmpEqual, SCK_ARITH_EXCPN);
+
+ genCodeForTreeFloat_DONE(tree, op1->gtRegNum);
+}
+
+void CodeGen::genFloatAssign(GenTree* tree)
+{
+ var_types type = tree->TypeGet();
+ GenTreePtr op1 = tree->gtGetOp1();
+ GenTreePtr op2 = tree->gtGetOp2();
+
+ regMaskTP needRegOp1 = RBM_ALLINT;
+ regMaskTP addrReg = RBM_NONE;
+ bool volat = false; // Is this a volatile store
+ bool unaligned = false; // Is this an unaligned store
+ regNumber op2reg = REG_NA;
+
+#ifdef DEBUGGING_SUPPORT
+ unsigned lclVarNum = compiler->lvaCount;
+ unsigned lclILoffs = DUMMY_INIT(0);
+#endif
+
+ noway_assert(tree->OperGet() == GT_ASG);
+
+ // Is the target a floating-point local variable?
+ // possibly even an enregistered floating-point local variable?
+ //
+ switch (op1->gtOper)
+ {
+ unsigned varNum;
+ LclVarDsc* varDsc;
+
+ case GT_LCL_FLD:
+ // Check for a misalignment on a Floating Point field
+ //
+ if (varTypeIsFloating(op1->TypeGet()))
+ {
+ if ((op1->gtLclFld.gtLclOffs % emitTypeSize(op1->TypeGet())) != 0)
+ {
+ unaligned = true;
+ }
+ }
+ break;
+
+ case GT_LCL_VAR:
+ varNum = op1->gtLclVarCommon.gtLclNum;
+ noway_assert(varNum < compiler->lvaCount);
+ varDsc = compiler->lvaTable + varNum;
+
+#ifdef DEBUGGING_SUPPORT
+ // For non-debuggable code, every definition of a lcl-var has
+ // to be checked to see if we need to open a new scope for it.
+ // Remember the local var info to call siCheckVarScope
+ // AFTER code generation of the assignment.
+ //
+ if (compiler->opts.compScopeInfo && !compiler->opts.compDbgCode && (compiler->info.compVarScopesCount > 0))
+ {
+ lclVarNum = varNum;
+ lclILoffs = op1->gtLclVar.gtLclILoffs;
+ }
+#endif
+
+ // Dead Store assert (with min opts we may have dead stores)
+ //
+ noway_assert(!varDsc->lvTracked || compiler->opts.MinOpts() || !(op1->gtFlags & GTF_VAR_DEATH));
+
+ // Does this variable live in a register?
+ //
+ if (genMarkLclVar(op1))
+ {
+ noway_assert(!compiler->opts.compDbgCode); // We don't enregister any floats with debug codegen
+
+ // Get hold of the target register
+ //
+ regNumber op1Reg = op1->gtRegVar.gtRegNum;
+
+ // the variable being assigned should be dead in op2
+ assert(!varDsc->lvTracked ||
+ !VarSetOps::IsMember(compiler, genUpdateLiveSetForward(op2), varDsc->lvVarIndex));
+
+ // Setup register preferencing, so that we try to target the op1 enregistered variable
+ //
+ regMaskTP bestMask = genRegMask(op1Reg);
+ if (type == TYP_DOUBLE)
+ {
+ assert((bestMask & RBM_DBL_REGS) != 0);
+ bestMask |= genRegMask(REG_NEXT(op1Reg));
+ }
+ RegSet::RegisterPreference pref(RBM_ALLFLOAT, bestMask);
+
+ // Evaluate op2 into a floating point register
+ //
+ genCodeForTreeFloat(op2, &pref);
+
+ noway_assert(op2->gtFlags & GTF_REG_VAL);
+
+ // Make sure the value ends up in the right place ...
+ // For example if op2 is a call that returns a result
+ // in REG_F0, we will need to do a move instruction here
+ //
+ if ((op2->gtRegNum != op1Reg) || (op2->TypeGet() != type))
+ {
+ regMaskTP spillRegs = regSet.rsMaskUsed & genRegMaskFloat(op1Reg, op1->TypeGet());
+ if (spillRegs != 0)
+ regSet.rsSpillRegs(spillRegs);
+
+ assert(type == op1->TypeGet());
+
+ inst_RV_RV(ins_FloatConv(type, op2->TypeGet()), op1Reg, op2->gtRegNum, type);
+ }
+ genUpdateLife(op1);
+ goto DONE_ASG;
+ }
+ break;
+
+ case GT_CLS_VAR:
+ case GT_IND:
+ // Check for a volatile/unaligned store
+ //
+ assert((op1->OperGet() == GT_CLS_VAR) ||
+ (op1->OperGet() == GT_IND)); // Required for GTF_IND_VOLATILE flag to be valid
+ if (op1->gtFlags & GTF_IND_VOLATILE)
+ volat = true;
+ if (op1->gtFlags & GTF_IND_UNALIGNED)
+ unaligned = true;
+ break;
+
+ default:
+ break;
+ }
+
+ // Is the value being assigned an enregistered floating-point local variable?
+ //
+ switch (op2->gtOper)
+ {
+ case GT_LCL_VAR:
+
+ if (!genMarkLclVar(op2))
+ break;
+
+ __fallthrough;
+
+ case GT_REG_VAR:
+
+ // We must honor the order evalauation in case op1 reassigns our op2 register
+ //
+ if (tree->gtFlags & GTF_REVERSE_OPS)
+ break;
+
+ // Is there an implicit conversion that we have to insert?
+ // Handle this case with the normal cases below.
+ //
+ if (type != op2->TypeGet())
+ break;
+
+ // Make the target addressable
+ //
+ addrReg = genMakeAddressable(op1, needRegOp1, RegSet::KEEP_REG, true);
+
+ noway_assert(op2->gtFlags & GTF_REG_VAL);
+ noway_assert(op2->IsRegVar());
+
+ op2reg = op2->gtRegVar.gtRegNum;
+ genUpdateLife(op2);
+
+ goto CHK_VOLAT_UNALIGN;
+ default:
+ break;
+ }
+
+ // Is the op2 (RHS) more complex than op1 (LHS)?
+ //
+ if (tree->gtFlags & GTF_REVERSE_OPS)
+ {
+ regMaskTP bestRegs = regSet.rsNarrowHint(RBM_ALLFLOAT, ~op1->gtRsvdRegs);
+ RegSet::RegisterPreference pref(RBM_ALLFLOAT, bestRegs);
+
+ // Generate op2 (RHS) into a floating point register
+ //
+ genCodeForTreeFloat(op2, &pref);
+ regSet.SetUsedRegFloat(op2, true);
+
+ // Make the target addressable
+ //
+ addrReg = genMakeAddressable(op1, needRegOp1, RegSet::KEEP_REG, true);
+
+ genRecoverReg(op2, RBM_ALLFLOAT, RegSet::KEEP_REG);
+ noway_assert(op2->gtFlags & GTF_REG_VAL);
+ regSet.SetUsedRegFloat(op2, false);
+ }
+ else
+ {
+ needRegOp1 = regSet.rsNarrowHint(needRegOp1, ~op2->gtRsvdRegs);
+
+ // Make the target addressable
+ //
+ addrReg = genMakeAddressable(op1, needRegOp1, RegSet::KEEP_REG, true);
+
+ // Generate the RHS into any floating point register
+ genCodeForTreeFloat(op2);
+ }
+ noway_assert(op2->gtFlags & GTF_REG_VAL);
+
+ op2reg = op2->gtRegNum;
+
+ // Is there an implicit conversion that we have to insert?
+ //
+ if (type != op2->TypeGet())
+ {
+ regMaskTP bestMask = genRegMask(op2reg);
+ if (type == TYP_DOUBLE)
+ {
+ if (bestMask & RBM_DBL_REGS)
+ {
+ bestMask |= genRegMask(REG_NEXT(op2reg));
+ }
+ else
+ {
+ bestMask |= genRegMask(REG_PREV(op2reg));
+ }
+ }
+ RegSet::RegisterPreference op2Pref(RBM_ALLFLOAT, bestMask);
+ op2reg = regSet.PickRegFloat(type, &op2Pref);
+
+ inst_RV_RV(ins_FloatConv(type, op2->TypeGet()), op2reg, op2->gtRegNum, type);
+ }
+
+ // Make sure the LHS is still addressable
+ //
+ addrReg = genKeepAddressable(op1, addrReg);
+
+CHK_VOLAT_UNALIGN:
+
+ regSet.rsLockUsedReg(addrReg); // Must prevent unaligned regSet.rsGrabReg from choosing an addrReg
+
+ if (volat)
+ {
+ // Emit a memory barrier instruction before the store
+ instGen_MemoryBarrier();
+ }
+ if (unaligned)
+ {
+ var_types storeType = op1->TypeGet();
+ assert(storeType == TYP_DOUBLE || storeType == TYP_FLOAT);
+
+ // Unaligned Floating-Point Stores must be done using the integer register(s)
+ regNumber intRegLo = regSet.rsGrabReg(RBM_ALLINT);
+ regNumber intRegHi = REG_NA;
+ regMaskTP tmpLockMask = genRegMask(intRegLo);
+
+ if (storeType == TYP_DOUBLE)
+ {
+ intRegHi = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(intRegLo));
+ tmpLockMask |= genRegMask(intRegHi);
+ }
+
+ // move the FP register over to the integer register(s)
+ //
+ if (storeType == TYP_DOUBLE)
+ {
+ getEmitter()->emitIns_R_R_R(INS_vmov_d2i, EA_8BYTE, intRegLo, intRegHi, op2reg);
+ regTracker.rsTrackRegTrash(intRegHi);
+ }
+ else
+ {
+ getEmitter()->emitIns_R_R(INS_vmov_f2i, EA_4BYTE, intRegLo, op2reg);
+ }
+ regTracker.rsTrackRegTrash(intRegLo);
+
+ regSet.rsLockReg(tmpLockMask); // Temporarily lock the intRegs
+ op1->gtType = TYP_INT; // Temporarily change the type to TYP_INT
+
+ inst_TT_RV(ins_Store(TYP_INT), op1, intRegLo);
+
+ if (storeType == TYP_DOUBLE)
+ {
+ inst_TT_RV(ins_Store(TYP_INT), op1, intRegHi, 4);
+ }
+
+ op1->gtType = storeType; // Change the type back to the floating point type
+ regSet.rsUnlockReg(tmpLockMask); // Unlock the intRegs
+ }
+ else
+ {
+ // Move the value into the target
+ //
+ inst_TT_RV(ins_Store(op1->TypeGet()), op1, op2reg);
+ }
+
+ // Free up anything that was tied up by the LHS
+ //
+ regSet.rsUnlockUsedReg(addrReg);
+ genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
+
+DONE_ASG:
+
+ genUpdateLife(tree);
+
+#ifdef DEBUGGING_SUPPORT
+ /* For non-debuggable code, every definition of a lcl-var has
+ * to be checked to see if we need to open a new scope for it.
+ */
+ if (lclVarNum < compiler->lvaCount)
+ siCheckVarScope(lclVarNum, lclILoffs);
+#endif
+}
+
+void CodeGen::genCodeForTreeFloat(GenTreePtr tree, RegSet::RegisterPreference* pref)
+{
+ genTreeOps oper;
+ unsigned kind;
+
+ assert(tree);
+ assert(tree->gtOper != GT_STMT);
+
+ // What kind of node do we have?
+ oper = tree->OperGet();
+ kind = tree->OperKind();
+
+ if (kind & GTK_CONST)
+ {
+ genFloatConst(tree, pref);
+ }
+ else if (kind & GTK_LEAF)
+ {
+ genFloatLeaf(tree, pref);
+ }
+ else if (kind & GTK_SMPOP)
+ {
+ genFloatSimple(tree, pref);
+ }
+ else
+ {
+ assert(oper == GT_CALL);
+ genCodeForCall(tree, true);
+ }
+}
+
+void CodeGen::genFloatLeaf(GenTree* tree, RegSet::RegisterPreference* pref)
+{
+ regNumber reg = REG_NA;
+
+ switch (tree->OperGet())
+ {
+ case GT_LCL_VAR:
+ // Does the variable live in a register?
+ //
+ if (!genMarkLclVar(tree))
+ goto MEM_LEAF;
+ __fallthrough;
+
+ case GT_REG_VAR:
+ noway_assert(tree->gtFlags & GTF_REG_VAL);
+ reg = tree->gtRegVar.gtRegNum;
+ break;
+
+ case GT_LCL_FLD:
+ // We only use GT_LCL_FLD for lvAddrTaken vars, so we don't have
+ // to worry about it being enregistered.
+ noway_assert(compiler->lvaTable[tree->gtLclFld.gtLclNum].lvRegister == 0);
+ __fallthrough;
+
+ case GT_CLS_VAR:
+
+ MEM_LEAF:
+ reg = regSet.PickRegFloat(tree->TypeGet(), pref);
+ genLoadFloat(tree, reg);
+ break;
+
+ default:
+ DISPTREE(tree);
+ assert(!"unexpected leaf");
+ }
+
+ genCodeForTreeFloat_DONE(tree, reg);
+ return;
+}
+
+void CodeGen::genLoadFloat(GenTreePtr tree, regNumber reg)
+{
+ if (tree->IsRegVar())
+ {
+ // if it has been spilled, unspill it.%
+ LclVarDsc* varDsc = &compiler->lvaTable[tree->gtLclVarCommon.gtLclNum];
+ if (varDsc->lvSpilled)
+ {
+ UnspillFloat(varDsc);
+ }
+
+ inst_RV_RV(ins_FloatCopy(tree->TypeGet()), reg, tree->gtRegNum, tree->TypeGet());
+ }
+ else
+ {
+ bool unalignedLoad = false;
+ switch (tree->OperGet())
+ {
+ case GT_IND:
+ case GT_CLS_VAR:
+ if (tree->gtFlags & GTF_IND_UNALIGNED)
+ unalignedLoad = true;
+ break;
+ case GT_LCL_FLD:
+ // Check for a misalignment on a Floating Point field
+ //
+ if (varTypeIsFloating(tree->TypeGet()))
+ {
+ if ((tree->gtLclFld.gtLclOffs % emitTypeSize(tree->TypeGet())) != 0)
+ {
+ unalignedLoad = true;
+ }
+ }
+ break;
+ default:
+ break;
+ }
+
+ if (unalignedLoad)
+ {
+ // Make the target addressable
+ //
+ regMaskTP addrReg = genMakeAddressable(tree, 0, RegSet::KEEP_REG, true);
+ regSet.rsLockUsedReg(addrReg); // Must prevent regSet.rsGrabReg from choosing an addrReg
+
+ var_types loadType = tree->TypeGet();
+ assert(loadType == TYP_DOUBLE || loadType == TYP_FLOAT);
+
+ // Unaligned Floating-Point Loads must be loaded into integer register(s)
+ // and then moved over to the Floating-Point register
+ regNumber intRegLo = regSet.rsGrabReg(RBM_ALLINT);
+ regNumber intRegHi = REG_NA;
+ regMaskTP tmpLockMask = genRegMask(intRegLo);
+
+ if (loadType == TYP_DOUBLE)
+ {
+ intRegHi = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(intRegLo));
+ tmpLockMask |= genRegMask(intRegHi);
+ }
+
+ regSet.rsLockReg(tmpLockMask); // Temporarily lock the intRegs
+ tree->gtType = TYP_INT; // Temporarily change the type to TYP_INT
+
+ inst_RV_TT(ins_Load(TYP_INT), intRegLo, tree);
+ regTracker.rsTrackRegTrash(intRegLo);
+
+ if (loadType == TYP_DOUBLE)
+ {
+ inst_RV_TT(ins_Load(TYP_INT), intRegHi, tree, 4);
+ regTracker.rsTrackRegTrash(intRegHi);
+ }
+
+ tree->gtType = loadType; // Change the type back to the floating point type
+ regSet.rsUnlockReg(tmpLockMask); // Unlock the intRegs
+
+ // move the integer register(s) over to the FP register
+ //
+ if (loadType == TYP_DOUBLE)
+ getEmitter()->emitIns_R_R_R(INS_vmov_i2d, EA_8BYTE, reg, intRegLo, intRegHi);
+ else
+ getEmitter()->emitIns_R_R(INS_vmov_i2f, EA_4BYTE, reg, intRegLo);
+
+ // Free up anything that was tied up by genMakeAddressable
+ //
+ regSet.rsUnlockUsedReg(addrReg);
+ genDoneAddressable(tree, addrReg, RegSet::KEEP_REG);
+ }
+ else
+ {
+ inst_RV_TT(ins_FloatLoad(tree->TypeGet()), reg, tree);
+ }
+ if (((tree->OperGet() == GT_CLS_VAR) || (tree->OperGet() == GT_IND)) && (tree->gtFlags & GTF_IND_VOLATILE))
+ {
+ // Emit a memory barrier instruction after the load
+ instGen_MemoryBarrier();
+ }
+ }
+}
+
+void CodeGen::genCodeForTreeFloat_DONE(GenTreePtr tree, regNumber reg)
+{
+ return genCodeForTree_DONE(tree, reg);
+}
+
+void CodeGen::genFloatAsgArith(GenTreePtr tree)
+{
+ // Set Flowgraph.cpp, line 13750
+ // arm VFP has tons of regs, 3-op instructions, and no addressing modes
+ // so asg ops are kind of pointless
+ noway_assert(!"Not Reachable for _TARGET_ARM_");
+}
+
+regNumber CodeGen::genAssignArithFloat(
+ genTreeOps oper, GenTreePtr dst, regNumber dstreg, GenTreePtr src, regNumber srcreg)
+{
+ regNumber result;
+
+ // dst should be a regvar or memory
+
+ if (dst->IsRegVar())
+ {
+ regNumber reg = dst->gtRegNum;
+
+ if (src->IsRegVar())
+ {
+ inst_RV_RV(ins_MathOp(oper, dst->gtType), reg, src->gtRegNum, dst->gtType);
+ }
+ else
+ {
+ inst_RV_TT(ins_MathOp(oper, dst->gtType), reg, src, 0, EmitSize(dst));
+ }
+ result = reg;
+ }
+ else // dst in memory
+ {
+ // since this is an asgop the ACTUAL destination is memory
+ // but it is also one of the sources and SSE ops do not allow mem dests
+ // so we have loaded it into a reg, and that is what dstreg represents
+ assert(dstreg != REG_NA);
+
+ if ((src->InReg()))
+ {
+ inst_RV_RV(ins_MathOp(oper, dst->gtType), dstreg, src->gtRegNum, dst->gtType);
+ }
+ else
+ {
+ // mem mem operation
+ inst_RV_TT(ins_MathOp(oper, dst->gtType), dstreg, src, 0, EmitSize(dst));
+ }
+
+ dst->gtFlags &= ~GTF_REG_VAL; // ???
+
+ inst_TT_RV(ins_FloatStore(dst->gtType), dst, dstreg, 0, EmitSize(dst));
+
+ result = REG_NA;
+ }
+
+ return result;
+}
+
+void CodeGen::genFloatArith(GenTreePtr tree, RegSet::RegisterPreference* tgtPref)
+{
+ var_types type = tree->TypeGet();
+ genTreeOps oper = tree->OperGet();
+ GenTreePtr op1 = tree->gtGetOp1();
+ GenTreePtr op2 = tree->gtGetOp2();
+
+ regNumber tgtReg;
+ unsigned varNum;
+ LclVarDsc* varDsc;
+ VARSET_TP varBit;
+
+ assert(oper == GT_ADD || oper == GT_SUB || oper == GT_MUL || oper == GT_DIV);
+
+ RegSet::RegisterPreference defaultPref(RBM_ALLFLOAT, RBM_NONE);
+ if (tgtPref == NULL)
+ {
+ tgtPref = &defaultPref;
+ }
+
+ // Is the op2 (RHS)more complex than op1 (LHS)?
+ //
+ if (tree->gtFlags & GTF_REVERSE_OPS)
+ {
+ regMaskTP bestRegs = regSet.rsNarrowHint(RBM_ALLFLOAT, ~op1->gtRsvdRegs);
+ RegSet::RegisterPreference pref(RBM_ALLFLOAT, bestRegs);
+
+ // Evaluate op2 into a floating point register
+ //
+ genCodeForTreeFloat(op2, &pref);
+ regSet.SetUsedRegFloat(op2, true);
+
+ // Evaluate op1 into any floating point register
+ //
+ genCodeForTreeFloat(op1);
+ regSet.SetUsedRegFloat(op1, true);
+
+ regNumber op1Reg = op1->gtRegNum;
+ regMaskTP op1Mask = genRegMaskFloat(op1Reg, type);
+
+ // Fix 388445 ARM JitStress WP7
+ regSet.rsLockUsedReg(op1Mask);
+ genRecoverReg(op2, RBM_ALLFLOAT, RegSet::KEEP_REG);
+ noway_assert(op2->gtFlags & GTF_REG_VAL);
+ regSet.rsUnlockUsedReg(op1Mask);
+
+ regSet.SetUsedRegFloat(op1, false);
+ regSet.SetUsedRegFloat(op2, false);
+ }
+ else
+ {
+ regMaskTP bestRegs = regSet.rsNarrowHint(RBM_ALLFLOAT, ~op2->gtRsvdRegs);
+ RegSet::RegisterPreference pref(RBM_ALLFLOAT, bestRegs);
+
+ // Evaluate op1 into a floating point register
+ //
+ genCodeForTreeFloat(op1, &pref);
+ regSet.SetUsedRegFloat(op1, true);
+
+ // Evaluate op2 into any floating point register
+ //
+ genCodeForTreeFloat(op2);
+ regSet.SetUsedRegFloat(op2, true);
+
+ regNumber op2Reg = op2->gtRegNum;
+ regMaskTP op2Mask = genRegMaskFloat(op2Reg, type);
+
+ // Fix 388445 ARM JitStress WP7
+ regSet.rsLockUsedReg(op2Mask);
+ genRecoverReg(op1, RBM_ALLFLOAT, RegSet::KEEP_REG);
+ noway_assert(op1->gtFlags & GTF_REG_VAL);
+ regSet.rsUnlockUsedReg(op2Mask);
+
+ regSet.SetUsedRegFloat(op2, false);
+ regSet.SetUsedRegFloat(op1, false);
+ }
+
+ tgtReg = regSet.PickRegFloat(type, tgtPref, true);
+
+ noway_assert(op1->gtFlags & GTF_REG_VAL);
+ noway_assert(op2->gtFlags & GTF_REG_VAL);
+
+ inst_RV_RV_RV(ins_MathOp(oper, type), tgtReg, op1->gtRegNum, op2->gtRegNum, emitActualTypeSize(type));
+
+ genCodeForTreeFloat_DONE(tree, tgtReg);
+}
+
+regNumber CodeGen::genArithmFloat(
+ genTreeOps oper, GenTreePtr dst, regNumber dstreg, GenTreePtr src, regNumber srcreg, bool bReverse)
+{
+ regNumber result = REG_NA;
+
+ assert(dstreg != REG_NA);
+
+ if (bReverse)
+ {
+ GenTree* temp = src;
+ regNumber tempreg = srcreg;
+ src = dst;
+ srcreg = dstreg;
+ dst = temp;
+ dstreg = tempreg;
+ }
+
+ if (srcreg == REG_NA)
+ {
+ if (src->IsRegVar())
+ {
+ inst_RV_RV(ins_MathOp(oper, dst->gtType), dst->gtRegNum, src->gtRegNum, dst->gtType);
+ }
+ else
+ {
+ inst_RV_TT(ins_MathOp(oper, dst->gtType), dst->gtRegNum, src);
+ }
+ }
+ else
+ {
+ inst_RV_RV(ins_MathOp(oper, dst->gtType), dstreg, srcreg, dst->gtType);
+ }
+
+ result = dstreg;
+
+ assert(result != REG_NA);
+ return result;
+}
+
+void CodeGen::genKeepAddressableFloat(GenTreePtr tree, regMaskTP* regMaskIntPtr, regMaskTP* regMaskFltPtr)
+{
+ regMaskTP regMaskInt, regMaskFlt;
+
+ regMaskInt = *regMaskIntPtr;
+ regMaskFlt = *regMaskFltPtr;
+
+ *regMaskIntPtr = *regMaskFltPtr = 0;
+
+ switch (tree->OperGet())
+ {
+ case GT_REG_VAR:
+ // If register has been spilled, unspill it
+ if (tree->gtFlags & GTF_SPILLED)
+ {
+ UnspillFloat(&compiler->lvaTable[tree->gtLclVarCommon.gtLclNum]);
+ }
+ break;
+
+ case GT_CNS_DBL:
+ if (tree->gtFlags & GTF_SPILLED)
+ {
+ UnspillFloat(tree);
+ }
+ *regMaskFltPtr = genRegMaskFloat(tree->gtRegNum, tree->TypeGet());
+ break;
+
+ case GT_LCL_FLD:
+ case GT_LCL_VAR:
+ case GT_CLS_VAR:
+ break;
+
+ case GT_IND:
+ if (regMaskFlt == RBM_NONE)
+ {
+ *regMaskIntPtr = genKeepAddressable(tree, regMaskInt, 0);
+ *regMaskFltPtr = 0;
+ return;
+ }
+ __fallthrough;
+
+ default:
+ *regMaskIntPtr = 0;
+ if (tree->gtFlags & GTF_SPILLED)
+ {
+ UnspillFloat(tree);
+ }
+ *regMaskFltPtr = genRegMaskFloat(tree->gtRegNum, tree->TypeGet());
+ break;
+ }
+}
+
+void CodeGen::genComputeAddressableFloat(GenTreePtr tree,
+ regMaskTP addrRegInt,
+ regMaskTP addrRegFlt,
+ RegSet::KeepReg keptReg,
+ regMaskTP needReg,
+ RegSet::KeepReg keepReg,
+ bool freeOnly /* = false */)
+{
+ noway_assert(genStillAddressable(tree));
+ noway_assert(varTypeIsFloating(tree->TypeGet()));
+
+ genDoneAddressableFloat(tree, addrRegInt, addrRegFlt, keptReg);
+
+ regNumber reg;
+ if (tree->gtFlags & GTF_REG_VAL)
+ {
+ reg = tree->gtRegNum;
+ if (freeOnly && !(genRegMaskFloat(reg, tree->TypeGet()) & regSet.RegFreeFloat()))
+ {
+ goto LOAD_REG;
+ }
+ }
+ else
+ {
+ LOAD_REG:
+ RegSet::RegisterPreference pref(needReg, RBM_NONE);
+ reg = regSet.PickRegFloat(tree->TypeGet(), &pref);
+ genLoadFloat(tree, reg);
+ }
+
+ genMarkTreeInReg(tree, reg);
+
+ if (keepReg == RegSet::KEEP_REG)
+ {
+ regSet.SetUsedRegFloat(tree, true);
+ }
+}
+
+void CodeGen::genDoneAddressableFloat(GenTreePtr tree,
+ regMaskTP addrRegInt,
+ regMaskTP addrRegFlt,
+ RegSet::KeepReg keptReg)
+{
+ assert(!(addrRegInt && addrRegFlt));
+
+ if (addrRegInt)
+ {
+ return genDoneAddressable(tree, addrRegInt, keptReg);
+ }
+ else if (addrRegFlt)
+ {
+ if (keptReg == RegSet::KEEP_REG)
+ {
+ for (regNumber r = REG_FP_FIRST; r != REG_NA; r = regNextOfType(r, tree->TypeGet()))
+ {
+ regMaskTP mask = genRegMaskFloat(r, tree->TypeGet());
+ // some masks take up more than one bit
+ if ((mask & addrRegFlt) == mask)
+ {
+ regSet.SetUsedRegFloat(tree, false);
+ }
+ }
+ }
+ }
+}
+
+GenTreePtr CodeGen::genMakeAddressableFloat(GenTreePtr tree,
+ regMaskTP* regMaskIntPtr,
+ regMaskTP* regMaskFltPtr,
+ bool bCollapseConstantDoubles)
+{
+ *regMaskIntPtr = *regMaskFltPtr = 0;
+
+ switch (tree->OperGet())
+ {
+
+ case GT_LCL_VAR:
+ genMarkLclVar(tree);
+ __fallthrough;
+
+ case GT_REG_VAR:
+ case GT_LCL_FLD:
+ case GT_CLS_VAR:
+ return tree;
+
+ case GT_IND:
+ // Try to make the address directly addressable
+
+ if (genMakeIndAddrMode(tree->gtOp.gtOp1, tree, false, RBM_ALLFLOAT, RegSet::KEEP_REG, regMaskIntPtr, false))
+ {
+ genUpdateLife(tree);
+ return tree;
+ }
+ else
+ {
+ GenTreePtr addr = tree;
+ tree = tree->gtOp.gtOp1;
+ genCodeForTree(tree, 0);
+ regSet.rsMarkRegUsed(tree, addr);
+
+ *regMaskIntPtr = genRegMask(tree->gtRegNum);
+ return addr;
+ }
+
+ // fall through
+
+ default:
+ genCodeForTreeFloat(tree);
+ regSet.SetUsedRegFloat(tree, true);
+
+ // update mask
+ *regMaskFltPtr = genRegMaskFloat(tree->gtRegNum, tree->TypeGet());
+
+ return tree;
+ break;
+ }
+}
+
+void CodeGen::genCodeForTreeCastFloat(GenTree* tree, RegSet::RegisterPreference* pref)
+{
+ GenTreePtr op1 = tree->gtOp.gtOp1;
+ var_types from = op1->gtType;
+ var_types to = tree->gtType;
+
+ if (varTypeIsFloating(from))
+ genCodeForTreeCastFromFloat(tree, pref);
+ else
+ genCodeForTreeCastToFloat(tree, pref);
+}
+
+void CodeGen::genCodeForTreeCastFromFloat(GenTree* tree, RegSet::RegisterPreference* pref)
+{
+ GenTreePtr op1 = tree->gtOp.gtOp1;
+ var_types from = op1->gtType;
+ var_types final = tree->gtType;
+ var_types intermediate = tree->CastToType();
+
+ regNumber srcReg;
+ regNumber dstReg;
+
+ assert(varTypeIsFloating(from));
+
+ // Evaluate op1 into a floating point register
+ //
+ if (varTypeIsFloating(final))
+ {
+ genCodeForTreeFloat(op1, pref);
+ }
+ else
+ {
+ RegSet::RegisterPreference defaultPref(RBM_ALLFLOAT, RBM_NONE);
+ genCodeForTreeFloat(op1, &defaultPref);
+ }
+
+ srcReg = op1->gtRegNum;
+
+ if (varTypeIsFloating(final))
+ {
+ // float => double or
+ // double => float
+
+ dstReg = regSet.PickRegFloat(final, pref);
+
+ instruction ins = ins_FloatConv(final, from);
+ if (!isMoveIns(ins) || (srcReg != dstReg))
+ {
+ inst_RV_RV(ins, dstReg, srcReg, from);
+ }
+ }
+ else
+ {
+ // float => int or
+ // double => int
+
+ dstReg = regSet.rsPickReg(pref->ok, pref->best);
+
+ RegSet::RegisterPreference defaultPref(RBM_ALLFLOAT, genRegMask(srcReg));
+ regNumber intermediateReg = regSet.PickRegFloat(TYP_FLOAT, &defaultPref);
+
+ if ((intermediate == TYP_UINT) && (final == TYP_INT))
+ {
+ // Perform the conversion using the FP unit
+ inst_RV_RV(ins_FloatConv(TYP_UINT, from), intermediateReg, srcReg, from);
+
+ // Prevent the call to genIntegerCast
+ final = TYP_UINT;
+ }
+ else
+ {
+ // Perform the conversion using the FP unit
+ inst_RV_RV(ins_FloatConv(TYP_INT, from), intermediateReg, srcReg, from);
+ }
+
+ // the integer result is now in the FP register, move it to the integer ones
+ getEmitter()->emitIns_R_R(INS_vmov_f2i, EA_4BYTE, dstReg, intermediateReg);
+
+ regTracker.rsTrackRegTrash(dstReg);
+
+ // handle things like int <- short <- double
+ if (final != intermediate)
+ {
+ // lie about the register so integer cast logic will finish the job
+ op1->gtRegNum = dstReg;
+ genIntegerCast(tree, pref->ok, pref->best);
+ }
+ }
+
+ genUpdateLife(op1);
+ genCodeForTree_DONE(tree, dstReg);
+}
+
+void CodeGen::genCodeForTreeCastToFloat(GenTreePtr tree, RegSet::RegisterPreference* pref)
+{
+ regNumber srcReg;
+ regNumber dstReg;
+ regNumber vmovReg;
+
+ regMaskTP addrReg;
+
+ GenTreePtr op1 = tree->gtOp.gtOp1;
+ op1 = genCodeForCommaTree(op1); // Trim off any comma expressions.
+ var_types from = op1->gtType;
+ var_types to = tree->gtType;
+
+ switch (from)
+ {
+ case TYP_BOOL:
+ case TYP_BYTE:
+ case TYP_UBYTE:
+ case TYP_CHAR:
+ case TYP_SHORT:
+ // load it into a register
+ genCodeForTree(op1, 0);
+
+ __fallthrough;
+
+ case TYP_BYREF:
+ from = TYP_INT;
+
+ __fallthrough;
+
+ case TYP_INT:
+ {
+ if (op1->gtOper == GT_LCL_FLD)
+ {
+ genComputeReg(op1, 0, RegSet::ANY_REG, RegSet::FREE_REG);
+ addrReg = 0;
+ }
+ else
+ {
+ addrReg = genMakeAddressable(op1, 0, RegSet::FREE_REG);
+ }
+
+ // Grab register for the cast
+ dstReg = regSet.PickRegFloat(to, pref);
+
+ // float type that is same size as the int we are coming from
+ var_types vmovType = TYP_FLOAT;
+ regNumber vmovReg = regSet.PickRegFloat(vmovType);
+
+ if (tree->gtFlags & GTF_UNSIGNED)
+ from = TYP_UINT;
+
+ // Is the value a constant, or now sitting in a register?
+ if (op1->InReg() || op1->IsCnsIntOrI())
+ {
+ if (op1->IsCnsIntOrI())
+ {
+ srcReg = genGetRegSetToIcon(op1->AsIntConCommon()->IconValue(), RBM_NONE, op1->TypeGet());
+ }
+ else
+ {
+ srcReg = op1->gtRegNum;
+ }
+
+ // move the integer register value over to the FP register
+ getEmitter()->emitIns_R_R(INS_vmov_i2f, EA_4BYTE, vmovReg, srcReg);
+ // now perform the conversion to the proper floating point representation
+ inst_RV_RV(ins_FloatConv(to, from), dstReg, vmovReg, to);
+ }
+ else
+ {
+ // Load the value from its address
+ inst_RV_TT(ins_FloatLoad(vmovType), vmovReg, op1);
+ inst_RV_RV(ins_FloatConv(to, from), dstReg, vmovReg, to);
+ }
+
+ if (addrReg)
+ {
+ genDoneAddressable(op1, addrReg, RegSet::FREE_REG);
+ }
+ genMarkTreeInReg(tree, dstReg);
+
+ break;
+ }
+ case TYP_FLOAT:
+ case TYP_DOUBLE:
+ {
+ // This is a cast from float to double or double to float
+
+ genCodeForTreeFloat(op1, pref);
+
+ // Grab register for the cast
+ dstReg = regSet.PickRegFloat(to, pref);
+
+ if ((from != to) || (dstReg != op1->gtRegNum))
+ {
+ inst_RV_RV(ins_FloatConv(to, from), dstReg, op1->gtRegNum, to);
+ }
+
+ // Assign reg to tree
+ genMarkTreeInReg(tree, dstReg);
+
+ break;
+ }
+ default:
+ {
+ assert(!"unsupported cast");
+ break;
+ }
+ }
+}
+
+void CodeGen::genRoundFloatExpression(GenTreePtr op, var_types type)
+{
+ // Do nothing with memory resident opcodes - these are the right precision
+ if (type == TYP_UNDEF)
+ type = op->TypeGet();
+
+ switch (op->gtOper)
+ {
+ case GT_LCL_VAR:
+ genMarkLclVar(op);
+ __fallthrough;
+
+ case GT_LCL_FLD:
+ case GT_CLS_VAR:
+ case GT_CNS_DBL:
+ case GT_IND:
+ if (type == op->TypeGet())
+ return;
+
+ default:
+ break;
+ }
+}
+
+#ifdef DEBUG
+
+regMaskTP CodeGenInterface::genStressLockedMaskFloat()
+{
+ return 0;
+}
+
+#endif // DEBUG
+
+/*********************************************************************
+ * Preserve used callee trashed registers across calls.
+ *
+ */
+void CodeGen::SpillForCallRegisterFP(regMaskTP noSpillMask)
+{
+ regMaskTP regBit = 1;
+ for (regNumber regNum = REG_FIRST; regNum < REG_COUNT; regNum = REG_NEXT(regNum), regBit <<= 1)
+ {
+ if (!(regBit & noSpillMask) && (regBit & RBM_FLT_CALLEE_TRASH) && regSet.rsUsedTree[regNum])
+ {
+ SpillFloat(regNum, true);
+ }
+ }
+}
+
+/*********************************************************************
+ *
+ * Spill the used floating point register or the enregistered var.
+ * If spilling for a call, then record so, so we can unspill the
+ * ones that were spilled for the call.
+ *
+ */
+void CodeGenInterface::SpillFloat(regNumber reg, bool bIsCall /* = false */)
+{
+ regSet.rsSpillReg(reg);
+}
+
+void CodeGen::UnspillFloatMachineDep(RegSet::SpillDsc* spillDsc)
+{
+ // Do actual unspill
+ regNumber reg;
+ if (spillDsc->bEnregisteredVariable)
+ {
+ NYI("unspill enreg var");
+ reg = regSet.PickRegFloat();
+ }
+ else
+ {
+ UnspillFloatMachineDep(spillDsc, false);
+ }
+}
+
+void CodeGen::UnspillFloatMachineDep(RegSet::SpillDsc* spillDsc, bool useSameReg)
+{
+ assert(!spillDsc->bEnregisteredVariable);
+
+ assert(spillDsc->spillTree->gtFlags & GTF_SPILLED);
+
+ spillDsc->spillTree->gtFlags &= ~GTF_SPILLED;
+
+ var_types type = spillDsc->spillTree->TypeGet();
+ regNumber reg;
+ if (useSameReg)
+ {
+ // Give register preference as the same register that the tree was originally using.
+ reg = spillDsc->spillTree->gtRegNum;
+
+ regMaskTP maskPref = genRegMask(reg);
+ if (type == TYP_DOUBLE)
+ {
+ assert((maskPref & RBM_DBL_REGS) != 0);
+ maskPref |= genRegMask(REG_NEXT(reg));
+ }
+
+ RegSet::RegisterPreference pref(RBM_ALLFLOAT, maskPref);
+ reg = regSet.PickRegFloat(type, &pref);
+ }
+ else
+ {
+ reg = regSet.PickRegFloat();
+ }
+
+ // load from spilled spot
+ compiler->codeGen->reloadFloatReg(type, spillDsc->spillTemp, reg);
+
+ compiler->codeGen->genMarkTreeInReg(spillDsc->spillTree, reg);
+ regSet.SetUsedRegFloat(spillDsc->spillTree, true);
+}
+
+//
+instruction genFloatJumpInstr(genTreeOps cmp, bool isUnordered)
+{
+ switch (cmp)
+ {
+ case GT_EQ:
+ return INS_beq;
+ case GT_NE:
+ return INS_bne;
+ case GT_LT:
+ return isUnordered ? INS_blt : INS_blo;
+ case GT_LE:
+ return isUnordered ? INS_ble : INS_bls;
+ case GT_GE:
+ return isUnordered ? INS_bpl : INS_bge;
+ case GT_GT:
+ return isUnordered ? INS_bhi : INS_bgt;
+ default:
+ unreached();
+ }
+}
+
+void CodeGen::genCondJumpFloat(GenTreePtr cond, BasicBlock* jumpTrue, BasicBlock* jumpFalse)
+{
+ assert(jumpTrue && jumpFalse);
+ assert(!(cond->gtFlags & GTF_REVERSE_OPS)); // Done in genCondJump()
+ assert(varTypeIsFloating(cond->gtOp.gtOp1->gtType));
+
+ GenTreePtr op1 = cond->gtOp.gtOp1;
+ GenTreePtr op2 = cond->gtOp.gtOp2;
+ genTreeOps cmp = cond->OperGet();
+ bool isUnordered = cond->gtFlags & GTF_RELOP_NAN_UN ? true : false;
+
+ regMaskTP bestRegs = regSet.rsNarrowHint(RBM_ALLFLOAT, ~op2->gtRsvdRegs);
+ RegSet::RegisterPreference pref(RBM_ALLFLOAT, bestRegs);
+
+ // Prepare operands.
+ genCodeForTreeFloat(op1, &pref);
+ regSet.SetUsedRegFloat(op1, true);
+
+ genCodeForTreeFloat(op2);
+ regSet.SetUsedRegFloat(op2, true);
+
+ genRecoverReg(op1, RBM_ALLFLOAT, RegSet::KEEP_REG);
+ noway_assert(op1->gtFlags & GTF_REG_VAL);
+
+ // cmp here
+ getEmitter()->emitIns_R_R(INS_vcmp, EmitSize(op1), op1->gtRegNum, op2->gtRegNum);
+
+ // vmrs with register 0xf has special meaning of transferring flags
+ getEmitter()->emitIns_R(INS_vmrs, EA_4BYTE, REG_R15);
+
+ regSet.SetUsedRegFloat(op2, false);
+ regSet.SetUsedRegFloat(op1, false);
+
+ getEmitter()->emitIns_J(genFloatJumpInstr(cmp, isUnordered), jumpTrue);
+}
+
+#endif // LEGACY_BACKEND
diff --git a/src/jit/registerfp.h b/src/jit/registerfp.h
new file mode 100644
index 0000000000..4c3ecb6050
--- /dev/null
+++ b/src/jit/registerfp.h
@@ -0,0 +1,26 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*****************************************************************************/
+/*****************************************************************************/
+#ifndef REGDEF
+#error Must define REGDEF macro before including this file
+#endif
+/*****************************************************************************/
+/* The following is x86 specific */
+/*****************************************************************************/
+/*
+REGDEF(name, rnum, mask, sname) */
+REGDEF(FPV0, 0, 0x01, "FPV0")
+REGDEF(FPV1, 1, 0x02, "FPV1")
+REGDEF(FPV2, 2, 0x04, "FPV2")
+REGDEF(FPV3, 3, 0x08, "FPV3")
+REGDEF(FPV4, 4, 0x10, "FPV4")
+REGDEF(FPV5, 5, 0x20, "FPV5")
+REGDEF(FPV6, 6, 0x40, "FPV6")
+REGDEF(FPV7, 7, 0x80, "FPV7")
+
+/*****************************************************************************/
+#undef REGDEF
+/*****************************************************************************/
diff --git a/src/jit/registerxmm.h b/src/jit/registerxmm.h
new file mode 100644
index 0000000000..4c34261ba8
--- /dev/null
+++ b/src/jit/registerxmm.h
@@ -0,0 +1,48 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+// clang-format off
+/*****************************************************************************/
+/*****************************************************************************/
+#ifndef REGDEF
+#error Must define REGDEF macro before including this file
+#endif
+
+#ifndef LEGACY_BACKEND
+#error This file is only used for the LEGACY_BACKEND build.
+#endif
+
+#if defined(_TARGET_XARCH_)
+
+#define XMMMASK(x) (unsigned(1) << (x-1))
+
+/*
+REGDEF(name, rnum, mask, sname) */
+REGDEF(XMM0, 0, XMMMASK(1), "xmm0" )
+REGDEF(XMM1, 1, XMMMASK(2), "xmm1" )
+REGDEF(XMM2, 2, XMMMASK(3), "xmm2" )
+REGDEF(XMM3, 3, XMMMASK(4), "xmm3" )
+REGDEF(XMM4, 4, XMMMASK(5), "xmm4" )
+REGDEF(XMM5, 5, XMMMASK(6), "xmm5" )
+REGDEF(XMM6, 6, XMMMASK(7), "xmm6" )
+REGDEF(XMM7, 7, XMMMASK(8), "xmm7" )
+
+#ifdef _TARGET_AMD64_
+REGDEF(XMM8, 8, XMMMASK(9), "xmm8" )
+REGDEF(XMM9, 9, XMMMASK(10), "xmm9" )
+REGDEF(XMM10, 10, XMMMASK(11), "xmm10" )
+REGDEF(XMM11, 11, XMMMASK(12), "xmm11" )
+REGDEF(XMM12, 12, XMMMASK(13), "xmm12" )
+REGDEF(XMM13, 13, XMMMASK(14), "xmm13" )
+REGDEF(XMM14, 14, XMMMASK(15), "xmm14" )
+REGDEF(XMM15, 15, XMMMASK(16), "xmm15" )
+#endif
+
+#endif // _TARGET_*
+
+/*****************************************************************************/
+#undef REGDEF
+/*****************************************************************************/
+
+// clang-format on
diff --git a/src/jit/reglist.h b/src/jit/reglist.h
new file mode 100644
index 0000000000..7b706110a8
--- /dev/null
+++ b/src/jit/reglist.h
@@ -0,0 +1,18 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+#ifndef REGLIST_H
+#define REGLIST_H
+
+#include "target.h"
+#include "tinyarray.h"
+
+// The "regList" type is a small set of registerse
+#ifdef _TARGET_X86_
+typedef TinyArray<unsigned short, regNumber, REGNUM_BITS> regList;
+#else
+// The regList is unused for all other targets.
+#endif // _TARGET_*
+
+#endif // REGLIST_H
diff --git a/src/jit/regpair.h b/src/jit/regpair.h
new file mode 100644
index 0000000000..cfc109b882
--- /dev/null
+++ b/src/jit/regpair.h
@@ -0,0 +1,357 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*****************************************************************************/
+
+#ifndef PAIRBEG
+#define PAIRBEG(reg)
+#endif
+
+#ifndef PAIRDEF
+#define PAIRDEF(r1, r2)
+#endif
+
+#ifndef PAIRSTK
+#define PAIRSTK(r1, r2) PAIRDEF(r1, r2)
+#endif
+
+#if defined(_TARGET_X86_)
+/*****************************************************************************/
+/* The following is for x86 */
+/*****************************************************************************/
+
+// rlo rhi
+
+PAIRBEG(EAX)
+PAIRDEF(EAX, ECX)
+PAIRDEF(EAX, EDX)
+PAIRDEF(EAX, EBX)
+PAIRDEF(EAX, EBP)
+PAIRDEF(EAX, ESI)
+PAIRDEF(EAX, EDI)
+PAIRSTK(EAX, STK)
+
+PAIRBEG(ECX)
+PAIRDEF(ECX, EAX)
+PAIRDEF(ECX, EDX)
+PAIRDEF(ECX, EBX)
+PAIRDEF(ECX, EBP)
+PAIRDEF(ECX, ESI)
+PAIRDEF(ECX, EDI)
+PAIRSTK(ECX, STK)
+
+PAIRBEG(EDX)
+PAIRDEF(EDX, EAX)
+PAIRDEF(EDX, ECX)
+PAIRDEF(EDX, EBX)
+PAIRDEF(EDX, EBP)
+PAIRDEF(EDX, ESI)
+PAIRDEF(EDX, EDI)
+PAIRSTK(EDX, STK)
+
+PAIRBEG(EBX)
+PAIRDEF(EBX, EAX)
+PAIRDEF(EBX, EDX)
+PAIRDEF(EBX, ECX)
+PAIRDEF(EBX, EBP)
+PAIRDEF(EBX, ESI)
+PAIRDEF(EBX, EDI)
+PAIRSTK(EBX, STK)
+
+PAIRBEG(EBP)
+PAIRDEF(EBP, EAX)
+PAIRDEF(EBP, EDX)
+PAIRDEF(EBP, ECX)
+PAIRDEF(EBP, EBX)
+PAIRDEF(EBP, ESI)
+PAIRDEF(EBP, EDI)
+PAIRSTK(EBP, STK)
+
+PAIRBEG(ESI)
+PAIRDEF(ESI, EAX)
+PAIRDEF(ESI, EDX)
+PAIRDEF(ESI, ECX)
+PAIRDEF(ESI, EBX)
+PAIRDEF(ESI, EBP)
+PAIRDEF(ESI, EDI)
+PAIRSTK(ESI, STK)
+
+PAIRBEG(EDI)
+PAIRDEF(EDI, EAX)
+PAIRDEF(EDI, EDX)
+PAIRDEF(EDI, ECX)
+PAIRDEF(EDI, EBX)
+PAIRDEF(EDI, EBP)
+PAIRDEF(EDI, ESI)
+PAIRSTK(EDI, STK)
+
+PAIRBEG(STK)
+PAIRSTK(STK, EAX)
+PAIRSTK(STK, EDX)
+PAIRSTK(STK, ECX)
+PAIRSTK(STK, EBX)
+PAIRSTK(STK, EBP)
+PAIRSTK(STK, ESI)
+PAIRSTK(STK, EDI)
+
+#endif
+
+/*****************************************************************************/
+
+#ifdef _TARGET_ARM_
+/*****************************************************************************/
+/* The following is for ARM */
+/*****************************************************************************/
+
+// rlo rhi
+
+PAIRBEG(R0)
+PAIRDEF(R0, R1)
+PAIRDEF(R0, R2)
+PAIRDEF(R0, R3)
+PAIRDEF(R0, R4)
+PAIRDEF(R0, R5)
+PAIRDEF(R0, R6)
+PAIRDEF(R0, R7)
+PAIRDEF(R0, R8)
+PAIRDEF(R0, R9)
+PAIRDEF(R0, R10)
+PAIRDEF(R0, R11)
+PAIRDEF(R0, R12)
+PAIRDEF(R0, LR)
+PAIRSTK(R0, STK)
+
+PAIRBEG(R1)
+PAIRDEF(R1, R0)
+PAIRDEF(R1, R2)
+PAIRDEF(R1, R3)
+PAIRDEF(R1, R4)
+PAIRDEF(R1, R5)
+PAIRDEF(R1, R6)
+PAIRDEF(R1, R7)
+PAIRDEF(R1, R8)
+PAIRDEF(R1, R9)
+PAIRDEF(R1, R10)
+PAIRDEF(R1, R11)
+PAIRDEF(R1, R12)
+PAIRDEF(R1, LR)
+PAIRSTK(R1, STK)
+
+PAIRBEG(R2)
+PAIRDEF(R2, R0)
+PAIRDEF(R2, R1)
+PAIRDEF(R2, R3)
+PAIRDEF(R2, R4)
+PAIRDEF(R2, R5)
+PAIRDEF(R2, R6)
+PAIRDEF(R2, R7)
+PAIRDEF(R2, R8)
+PAIRDEF(R2, R9)
+PAIRDEF(R2, R10)
+PAIRDEF(R2, R11)
+PAIRDEF(R2, R12)
+PAIRDEF(R2, LR)
+PAIRSTK(R2, STK)
+
+PAIRBEG(R3)
+PAIRDEF(R3, R0)
+PAIRDEF(R3, R1)
+PAIRDEF(R3, R2)
+PAIRDEF(R3, R4)
+PAIRDEF(R3, R5)
+PAIRDEF(R3, R6)
+PAIRDEF(R3, R7)
+PAIRDEF(R3, R8)
+PAIRDEF(R3, R9)
+PAIRDEF(R3, R10)
+PAIRDEF(R3, R11)
+PAIRDEF(R3, R12)
+PAIRDEF(R3, LR)
+PAIRSTK(R3, STK)
+
+PAIRBEG(R4)
+PAIRDEF(R4, R0)
+PAIRDEF(R4, R1)
+PAIRDEF(R4, R2)
+PAIRDEF(R4, R3)
+PAIRDEF(R4, R5)
+PAIRDEF(R4, R6)
+PAIRDEF(R4, R7)
+PAIRDEF(R4, R8)
+PAIRDEF(R4, R9)
+PAIRDEF(R4, R10)
+PAIRDEF(R4, R11)
+PAIRDEF(R4, R12)
+PAIRDEF(R4, LR)
+PAIRSTK(R4, STK)
+
+PAIRBEG(R5)
+PAIRDEF(R5, R0)
+PAIRDEF(R5, R1)
+PAIRDEF(R5, R2)
+PAIRDEF(R5, R3)
+PAIRDEF(R5, R4)
+PAIRDEF(R5, R6)
+PAIRDEF(R5, R7)
+PAIRDEF(R5, R8)
+PAIRDEF(R5, R9)
+PAIRDEF(R5, R10)
+PAIRDEF(R5, R11)
+PAIRDEF(R5, R12)
+PAIRDEF(R5, LR)
+PAIRSTK(R5, STK)
+
+PAIRBEG(R6)
+PAIRDEF(R6, R0)
+PAIRDEF(R6, R1)
+PAIRDEF(R6, R2)
+PAIRDEF(R6, R3)
+PAIRDEF(R6, R4)
+PAIRDEF(R6, R5)
+PAIRDEF(R6, R7)
+PAIRDEF(R6, R8)
+PAIRDEF(R6, R9)
+PAIRDEF(R6, R10)
+PAIRDEF(R6, R11)
+PAIRDEF(R6, R12)
+PAIRDEF(R6, LR)
+PAIRSTK(R6, STK)
+
+PAIRBEG(R7)
+PAIRDEF(R7, R0)
+PAIRDEF(R7, R1)
+PAIRDEF(R7, R2)
+PAIRDEF(R7, R3)
+PAIRDEF(R7, R4)
+PAIRDEF(R7, R5)
+PAIRDEF(R7, R6)
+PAIRDEF(R7, R8)
+PAIRDEF(R7, R9)
+PAIRDEF(R7, R10)
+PAIRDEF(R7, R11)
+PAIRDEF(R7, R12)
+PAIRDEF(R7, LR)
+PAIRSTK(R7, STK)
+
+PAIRBEG(R8)
+PAIRDEF(R8, R0)
+PAIRDEF(R8, R1)
+PAIRDEF(R8, R2)
+PAIRDEF(R8, R3)
+PAIRDEF(R8, R4)
+PAIRDEF(R8, R5)
+PAIRDEF(R8, R6)
+PAIRDEF(R8, R7)
+PAIRDEF(R8, R9)
+PAIRDEF(R8, R10)
+PAIRDEF(R8, R11)
+PAIRDEF(R8, R12)
+PAIRDEF(R8, LR)
+PAIRSTK(R8, STK)
+
+PAIRBEG(R9)
+PAIRDEF(R9, R0)
+PAIRDEF(R9, R1)
+PAIRDEF(R9, R2)
+PAIRDEF(R9, R3)
+PAIRDEF(R9, R4)
+PAIRDEF(R9, R5)
+PAIRDEF(R9, R6)
+PAIRDEF(R9, R7)
+PAIRDEF(R9, R8)
+PAIRDEF(R9, R10)
+PAIRDEF(R9, R11)
+PAIRDEF(R9, R12)
+PAIRDEF(R9, LR)
+PAIRSTK(R9, STK)
+
+PAIRBEG(R10)
+PAIRDEF(R10, R0)
+PAIRDEF(R10, R1)
+PAIRDEF(R10, R2)
+PAIRDEF(R10, R3)
+PAIRDEF(R10, R4)
+PAIRDEF(R10, R5)
+PAIRDEF(R10, R6)
+PAIRDEF(R10, R7)
+PAIRDEF(R10, R8)
+PAIRDEF(R10, R9)
+PAIRDEF(R10, R11)
+PAIRDEF(R10, R12)
+PAIRDEF(R10, LR)
+PAIRSTK(R10, STK)
+
+PAIRBEG(R11)
+PAIRDEF(R11, R0)
+PAIRDEF(R11, R1)
+PAIRDEF(R11, R2)
+PAIRDEF(R11, R3)
+PAIRDEF(R11, R4)
+PAIRDEF(R11, R5)
+PAIRDEF(R11, R6)
+PAIRDEF(R11, R7)
+PAIRDEF(R11, R8)
+PAIRDEF(R11, R9)
+PAIRDEF(R11, R10)
+PAIRDEF(R11, R12)
+PAIRDEF(R11, LR)
+PAIRSTK(R11, STK)
+
+PAIRBEG(R12)
+PAIRDEF(R12, R0)
+PAIRDEF(R12, R1)
+PAIRDEF(R12, R2)
+PAIRDEF(R12, R3)
+PAIRDEF(R12, R4)
+PAIRDEF(R12, R5)
+PAIRDEF(R12, R6)
+PAIRDEF(R12, R7)
+PAIRDEF(R12, R8)
+PAIRDEF(R12, R9)
+PAIRDEF(R12, R10)
+PAIRDEF(R12, R11)
+PAIRDEF(R12, LR)
+PAIRSTK(R12, STK)
+
+PAIRBEG(LR)
+PAIRDEF(LR, R0)
+PAIRDEF(LR, R1)
+PAIRDEF(LR, R2)
+PAIRDEF(LR, R3)
+PAIRDEF(LR, R4)
+PAIRDEF(LR, R5)
+PAIRDEF(LR, R6)
+PAIRDEF(LR, R7)
+PAIRDEF(LR, R8)
+PAIRDEF(LR, R9)
+PAIRDEF(LR, R10)
+PAIRDEF(LR, R11)
+PAIRDEF(LR, R12)
+PAIRSTK(LR, STK)
+
+PAIRBEG(STK)
+PAIRSTK(STK, R0)
+PAIRSTK(STK, R1)
+PAIRSTK(STK, R2)
+PAIRSTK(STK, R3)
+PAIRSTK(STK, R4)
+PAIRSTK(STK, R5)
+PAIRSTK(STK, R6)
+PAIRSTK(STK, R7)
+PAIRSTK(STK, R8)
+PAIRSTK(STK, R9)
+PAIRSTK(STK, R10)
+PAIRSTK(STK, R11)
+PAIRSTK(STK, R12)
+PAIRSTK(STK, LR)
+
+#endif
+
+/*****************************************************************************/
+
+#undef PAIRBEG
+#undef PAIRDEF
+#undef PAIRSTK
+
+/*****************************************************************************/
diff --git a/src/jit/regset.cpp b/src/jit/regset.cpp
new file mode 100644
index 0000000000..2980f96813
--- /dev/null
+++ b/src/jit/regset.cpp
@@ -0,0 +1,3777 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX RegSet XX
+XX XX
+XX Represents the register set, and their states during code generation XX
+XX Can select an unused register, keeps track of the contents of the XX
+XX registers, and can spill registers XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+#include "emit.h"
+
+/*****************************************************************************/
+
+#ifdef _TARGET_ARM64_
+const regMaskSmall regMasks[] = {
+#define REGDEF(name, rnum, mask, xname, wname) mask,
+#include "register.h"
+};
+#else // !_TARGET_ARM64_
+const regMaskSmall regMasks[] = {
+#define REGDEF(name, rnum, mask, sname) mask,
+#include "register.h"
+};
+#endif
+
+#ifdef _TARGET_X86_
+const regMaskSmall regFPMasks[] = {
+#define REGDEF(name, rnum, mask, sname) mask,
+#include "registerfp.h"
+};
+#endif // _TARGET_X86_
+
+/*
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX RegSet XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+void RegSet::rsClearRegsModified()
+{
+#ifndef LEGACY_BACKEND
+ assert(m_rsCompiler->lvaDoneFrameLayout < Compiler::FINAL_FRAME_LAYOUT);
+#endif // !LEGACY_BACKEND
+
+#ifdef DEBUG
+ if (m_rsCompiler->verbose)
+ {
+ printf("Clearing modified regs.\n");
+ }
+ rsModifiedRegsMaskInitialized = true;
+#endif // DEBUG
+
+ rsModifiedRegsMask = RBM_NONE;
+}
+
+void RegSet::rsSetRegsModified(regMaskTP mask DEBUGARG(bool suppressDump))
+{
+ assert(mask != RBM_NONE);
+ assert(rsModifiedRegsMaskInitialized);
+
+#ifndef LEGACY_BACKEND
+ // We can't update the modified registers set after final frame layout (that is, during code
+ // generation and after). Ignore prolog and epilog generation: they call register tracking to
+ // modify rbp, for example, even in functions that use rbp as a frame pointer. Make sure normal
+ // code generation isn't actually adding to set of modified registers.
+ // Frame layout is only affected by callee-saved registers, so only ensure that callee-saved
+ // registers aren't modified after final frame layout.
+ assert((m_rsCompiler->lvaDoneFrameLayout < Compiler::FINAL_FRAME_LAYOUT) || m_rsCompiler->compGeneratingProlog ||
+ m_rsCompiler->compGeneratingEpilog ||
+ (((rsModifiedRegsMask | mask) & RBM_CALLEE_SAVED) == (rsModifiedRegsMask & RBM_CALLEE_SAVED)));
+#endif // !LEGACY_BACKEND
+
+#ifdef DEBUG
+ if (m_rsCompiler->verbose && !suppressDump)
+ {
+ if (rsModifiedRegsMask != (rsModifiedRegsMask | mask))
+ {
+ printf("Marking regs modified: ");
+ dspRegMask(mask);
+ printf(" (");
+ dspRegMask(rsModifiedRegsMask);
+ printf(" => ");
+ dspRegMask(rsModifiedRegsMask | mask);
+ printf(")\n");
+ }
+ }
+#endif // DEBUG
+
+ rsModifiedRegsMask |= mask;
+}
+
+void RegSet::rsRemoveRegsModified(regMaskTP mask)
+{
+ assert(mask != RBM_NONE);
+ assert(rsModifiedRegsMaskInitialized);
+
+#ifndef LEGACY_BACKEND
+ // See comment in rsSetRegsModified().
+ assert((m_rsCompiler->lvaDoneFrameLayout < Compiler::FINAL_FRAME_LAYOUT) || m_rsCompiler->compGeneratingProlog ||
+ m_rsCompiler->compGeneratingEpilog ||
+ (((rsModifiedRegsMask & ~mask) & RBM_CALLEE_SAVED) == (rsModifiedRegsMask & RBM_CALLEE_SAVED)));
+#endif // !LEGACY_BACKEND
+
+#ifdef DEBUG
+ if (m_rsCompiler->verbose)
+ {
+ printf("Removing modified regs: ");
+ dspRegMask(mask);
+ if (rsModifiedRegsMask == (rsModifiedRegsMask & ~mask))
+ {
+ printf(" (unchanged)");
+ }
+ else
+ {
+ printf(" (");
+ dspRegMask(rsModifiedRegsMask);
+ printf(" => ");
+ dspRegMask(rsModifiedRegsMask & ~mask);
+ printf(")");
+ }
+ printf("\n");
+ }
+#endif // DEBUG
+
+ rsModifiedRegsMask &= ~mask;
+}
+
+void RegSet::SetMaskVars(regMaskTP newMaskVars)
+{
+#ifdef DEBUG
+ if (m_rsCompiler->verbose)
+ {
+ printf("\t\t\t\t\t\t\tLive regs: ");
+ if (_rsMaskVars == newMaskVars)
+ {
+ printf("(unchanged) ");
+ }
+ else
+ {
+ printRegMaskInt(_rsMaskVars);
+ m_rsCompiler->getEmitter()->emitDispRegSet(_rsMaskVars);
+ printf(" => ");
+ }
+ printRegMaskInt(newMaskVars);
+ m_rsCompiler->getEmitter()->emitDispRegSet(newMaskVars);
+ printf("\n");
+ }
+#endif // DEBUG
+
+ _rsMaskVars = newMaskVars;
+}
+
+#ifdef DEBUG
+
+RegSet::rsStressRegsType RegSet::rsStressRegs()
+{
+#ifndef LEGACY_BACKEND
+ return RS_STRESS_NONE;
+#else // LEGACY_BACKEND
+ rsStressRegsType val = (rsStressRegsType)JitConfig.JitStressRegs();
+ if (val == RS_STRESS_NONE && m_rsCompiler->compStressCompile(Compiler::STRESS_REGS, 15))
+ val = RS_PICK_BAD_REG;
+ return val;
+#endif // LEGACY_BACKEND
+}
+#endif // DEBUG
+
+#ifdef LEGACY_BACKEND
+/*****************************************************************************
+ * Includes 'includeHint' if 'regs' is empty
+ */
+
+regMaskTP RegSet::rsUseIfZero(regMaskTP regs, regMaskTP includeHint)
+{
+ return regs ? regs : includeHint;
+}
+
+/*****************************************************************************
+ * Excludes 'excludeHint' if it results in a non-empty mask
+ */
+
+regMaskTP RegSet::rsExcludeHint(regMaskTP regs, regMaskTP excludeHint)
+{
+ regMaskTP OKmask = regs & ~excludeHint;
+ return OKmask ? OKmask : regs;
+}
+
+/*****************************************************************************
+ * Narrows choice by 'narrowHint' if it results in a non-empty mask
+ */
+
+regMaskTP RegSet::rsNarrowHint(regMaskTP regs, regMaskTP narrowHint)
+{
+ regMaskTP narrowed = regs & narrowHint;
+ return narrowed ? narrowed : regs;
+}
+
+/*****************************************************************************
+ * Excludes 'exclude' from regs if non-zero, or from RBM_ALLINT
+ */
+
+regMaskTP RegSet::rsMustExclude(regMaskTP regs, regMaskTP exclude)
+{
+ // Try to exclude from current set
+ regMaskTP OKmask = regs & ~exclude;
+
+ // If current set wont work, exclude from RBM_ALLINT
+ if (OKmask == RBM_NONE)
+ OKmask = (RBM_ALLINT & ~exclude);
+
+ assert(OKmask);
+
+ return OKmask;
+}
+
+/*****************************************************************************
+ *
+ * The following returns a mask that yields all free registers.
+ */
+
+// inline
+regMaskTP RegSet::rsRegMaskFree()
+{
+ /* Any register that is locked must also be marked as 'used' */
+
+ assert((rsMaskUsed & rsMaskLock) == rsMaskLock);
+
+ /* Any register that isn't used and doesn't hold a variable is free */
+
+ return RBM_ALLINT & ~(rsMaskUsed | rsMaskVars | rsMaskResvd);
+}
+
+/*****************************************************************************
+ *
+ * The following returns a mask of registers that may be grabbed.
+ */
+
+// inline
+regMaskTP RegSet::rsRegMaskCanGrab()
+{
+ /* Any register that is locked must also be marked as 'used' */
+
+ assert((rsMaskUsed & rsMaskLock) == rsMaskLock);
+
+ /* Any register that isn't locked and doesn't hold a var can be grabbed */
+
+ regMaskTP result = (RBM_ALLINT & ~(rsMaskLock | rsMaskVars));
+
+#ifdef _TARGET_ARM_
+
+ // On the ARM when we pass structs in registers we set the rsUsedTree[]
+ // to be the full TYP_STRUCT tree, which doesn't allow us to spill/unspill
+ // these argument registers. To fix JitStress issues that can occur
+ // when rsPickReg tries to spill one of these registers we just remove them
+ // from the set of registers that we can grab
+ //
+ regMaskTP structArgMask = RBM_NONE;
+ // Load all the variable arguments in registers back to their registers.
+ for (regNumber reg = REG_ARG_FIRST; reg <= REG_ARG_LAST; reg = REG_NEXT(reg))
+ {
+ GenTreePtr regHolds = rsUsedTree[reg];
+ if ((regHolds != NULL) && (regHolds->TypeGet() == TYP_STRUCT))
+ {
+ structArgMask |= genRegMask(reg);
+ }
+ }
+ result &= ~structArgMask;
+#endif
+
+ return result;
+}
+
+/*****************************************************************************
+ *
+ * Pick a free register. It is guaranteed that a register is available.
+ * Note that rsPickReg() can spill a register, whereas rsPickFreeReg() will not.
+ */
+
+// inline
+regNumber RegSet::rsPickFreeReg(regMaskTP regMaskHint)
+{
+ regMaskTP freeRegs = rsRegMaskFree();
+ assert(freeRegs != RBM_NONE);
+
+ regMaskTP regs = rsNarrowHint(freeRegs, regMaskHint);
+
+ return rsGrabReg(regs);
+}
+
+/*****************************************************************************
+ *
+ * Mark the given set of registers as used and locked.
+ */
+
+// inline
+void RegSet::rsLockReg(regMaskTP regMask)
+{
+ /* Must not be already marked as either used or locked */
+
+ assert((rsMaskUsed & regMask) == 0);
+ rsMaskUsed |= regMask;
+ assert((rsMaskLock & regMask) == 0);
+ rsMaskLock |= regMask;
+}
+
+/*****************************************************************************
+ *
+ * Mark an already used set of registers as locked.
+ */
+
+// inline
+void RegSet::rsLockUsedReg(regMaskTP regMask)
+{
+ /* Must not be already marked as locked. Must be already marked as used. */
+
+ assert((rsMaskLock & regMask) == 0);
+ assert((rsMaskUsed & regMask) == regMask);
+
+ rsMaskLock |= regMask;
+}
+
+/*****************************************************************************
+ *
+ * Mark the given set of registers as no longer used/locked.
+ */
+
+// inline
+void RegSet::rsUnlockReg(regMaskTP regMask)
+{
+ /* Must be currently marked as both used and locked */
+
+ assert((rsMaskUsed & regMask) == regMask);
+ rsMaskUsed -= regMask;
+ assert((rsMaskLock & regMask) == regMask);
+ rsMaskLock -= regMask;
+}
+
+/*****************************************************************************
+ *
+ * Mark the given set of registers as no longer locked.
+ */
+
+// inline
+void RegSet::rsUnlockUsedReg(regMaskTP regMask)
+{
+ /* Must be currently marked as both used and locked */
+
+ assert((rsMaskUsed & regMask) == regMask);
+ assert((rsMaskLock & regMask) == regMask);
+ rsMaskLock -= regMask;
+}
+
+/*****************************************************************************
+ *
+ * Mark the given set of registers as used and locked. It may already have
+ * been marked as used.
+ */
+
+// inline
+void RegSet::rsLockReg(regMaskTP regMask, regMaskTP* usedMask)
+{
+ /* Is it already marked as used? */
+
+ regMaskTP used = (rsMaskUsed & regMask);
+ regMaskTP unused = (regMask & ~used);
+
+ if (used)
+ rsLockUsedReg(used);
+
+ if (unused)
+ rsLockReg(unused);
+
+ *usedMask = used;
+}
+
+/*****************************************************************************
+ *
+ * Mark the given set of registers as no longer
+ */
+
+// inline
+void RegSet::rsUnlockReg(regMaskTP regMask, regMaskTP usedMask)
+{
+ regMaskTP unused = (regMask & ~usedMask);
+
+ if (usedMask)
+ rsUnlockUsedReg(usedMask);
+
+ if (unused)
+ rsUnlockReg(unused);
+}
+#endif // LEGACY_BACKEND
+
+/*****************************************************************************
+ *
+ * Assume all registers contain garbage (called at start of codegen and when
+ * we encounter a code label).
+ */
+
+// inline
+void RegTracker::rsTrackRegClr()
+{
+ assert(RV_TRASH == 0);
+ memset(rsRegValues, 0, sizeof(rsRegValues));
+}
+
+/*****************************************************************************
+ *
+ * Trash the rsRegValues associated with a register
+ */
+
+// inline
+void RegTracker::rsTrackRegTrash(regNumber reg)
+{
+ /* Keep track of which registers we ever touch */
+
+ regSet->rsSetRegsModified(genRegMask(reg));
+
+ /* Record the new value for the register */
+
+ rsRegValues[reg].rvdKind = RV_TRASH;
+}
+
+/*****************************************************************************
+ *
+ * calls rsTrackRegTrash on the set of registers in regmask
+ */
+
+// inline
+void RegTracker::rsTrackRegMaskTrash(regMaskTP regMask)
+{
+ regMaskTP regBit = 1;
+
+ for (regNumber regNum = REG_FIRST; regNum < REG_COUNT; regNum = REG_NEXT(regNum), regBit <<= 1)
+ {
+ if (regBit > regMask)
+ {
+ break;
+ }
+
+ if (regBit & regMask)
+ {
+ rsTrackRegTrash(regNum);
+ }
+ }
+}
+
+/*****************************************************************************/
+
+// inline
+void RegTracker::rsTrackRegIntCns(regNumber reg, ssize_t val)
+{
+ assert(genIsValidIntReg(reg));
+
+ /* Keep track of which registers we ever touch */
+
+ regSet->rsSetRegsModified(genRegMask(reg));
+
+ /* Record the new value for the register */
+
+ rsRegValues[reg].rvdKind = RV_INT_CNS;
+ rsRegValues[reg].rvdIntCnsVal = val;
+}
+
+/*****************************************************************************/
+
+// inline
+void RegTracker::rsTrackRegLclVarLng(regNumber reg, unsigned var, bool low)
+{
+ assert(genIsValidIntReg(reg));
+
+ if (compiler->lvaTable[var].lvAddrExposed)
+ {
+ return;
+ }
+
+ /* Keep track of which registers we ever touch */
+
+ regSet->rsSetRegsModified(genRegMask(reg));
+
+ /* Record the new value for the register */
+
+ rsRegValues[reg].rvdKind = (low ? RV_LCL_VAR_LNG_LO : RV_LCL_VAR_LNG_HI);
+ rsRegValues[reg].rvdLclVarNum = var;
+}
+
+/*****************************************************************************/
+
+// inline
+bool RegTracker::rsTrackIsLclVarLng(regValKind rvKind)
+{
+ if (compiler->opts.MinOpts() || compiler->opts.compDbgCode)
+ {
+ return false;
+ }
+
+ if (rvKind == RV_LCL_VAR_LNG_LO || rvKind == RV_LCL_VAR_LNG_HI)
+ {
+ return true;
+ }
+ else
+ {
+ return false;
+ }
+}
+
+/*****************************************************************************/
+
+// inline
+void RegTracker::rsTrackRegClsVar(regNumber reg, GenTreePtr clsVar)
+{
+ rsTrackRegTrash(reg);
+}
+
+/*****************************************************************************/
+
+// inline
+void RegTracker::rsTrackRegAssign(GenTree* op1, GenTree* op2)
+{
+ /* Constant/bitvalue has precedence over local */
+ switch (rsRegValues[op2->gtRegNum].rvdKind)
+ {
+ case RV_INT_CNS:
+ break;
+
+ default:
+
+ /* Mark RHS register as containing the value */
+
+ switch (op1->gtOper)
+ {
+ case GT_LCL_VAR:
+ rsTrackRegLclVar(op2->gtRegNum, op1->gtLclVarCommon.gtLclNum);
+ break;
+ case GT_CLS_VAR:
+ rsTrackRegClsVar(op2->gtRegNum, op1);
+ break;
+ default:
+ break;
+ }
+ }
+}
+
+#ifdef LEGACY_BACKEND
+
+/*****************************************************************************
+ *
+ * Given a regmask, find the best regPairNo that can be formed
+ * or return REG_PAIR_NONE if no register pair can be formed
+ */
+
+regPairNo RegSet::rsFindRegPairNo(regMaskTP regAllowedMask)
+{
+ regPairNo regPair;
+
+ // Remove any special purpose registers such as SP, EBP, etc...
+ regMaskTP specialUseMask = (rsMaskResvd | RBM_SPBASE);
+#if ETW_EBP_FRAMED
+ specialUseMask |= RBM_FPBASE;
+#else
+ if (m_rsCompiler->codeGen->isFramePointerUsed())
+ specialUseMask |= RBM_FPBASE;
+#endif
+
+ regAllowedMask &= ~specialUseMask;
+
+ /* Check if regAllowedMask has zero or one bits set */
+ if ((regAllowedMask & (regAllowedMask - 1)) == 0)
+ {
+ /* If so we won't be able to find a reg pair */
+ return REG_PAIR_NONE;
+ }
+
+#ifdef _TARGET_X86_
+ if (regAllowedMask & RBM_EAX)
+ {
+ /* EAX is available, see if we can pair it with another reg */
+
+ if (regAllowedMask & RBM_EDX)
+ {
+ regPair = REG_PAIR_EAXEDX;
+ goto RET;
+ }
+ if (regAllowedMask & RBM_ECX)
+ {
+ regPair = REG_PAIR_EAXECX;
+ goto RET;
+ }
+ if (regAllowedMask & RBM_EBX)
+ {
+ regPair = REG_PAIR_EAXEBX;
+ goto RET;
+ }
+ if (regAllowedMask & RBM_ESI)
+ {
+ regPair = REG_PAIR_EAXESI;
+ goto RET;
+ }
+ if (regAllowedMask & RBM_EDI)
+ {
+ regPair = REG_PAIR_EAXEDI;
+ goto RET;
+ }
+ if (regAllowedMask & RBM_EBP)
+ {
+ regPair = REG_PAIR_EAXEBP;
+ goto RET;
+ }
+ }
+
+ if (regAllowedMask & RBM_ECX)
+ {
+ /* ECX is available, see if we can pair it with another reg */
+
+ if (regAllowedMask & RBM_EDX)
+ {
+ regPair = REG_PAIR_ECXEDX;
+ goto RET;
+ }
+ if (regAllowedMask & RBM_EBX)
+ {
+ regPair = REG_PAIR_ECXEBX;
+ goto RET;
+ }
+ if (regAllowedMask & RBM_ESI)
+ {
+ regPair = REG_PAIR_ECXESI;
+ goto RET;
+ }
+ if (regAllowedMask & RBM_EDI)
+ {
+ regPair = REG_PAIR_ECXEDI;
+ goto RET;
+ }
+ if (regAllowedMask & RBM_EBP)
+ {
+ regPair = REG_PAIR_ECXEBP;
+ goto RET;
+ }
+ }
+
+ if (regAllowedMask & RBM_EDX)
+ {
+ /* EDX is available, see if we can pair it with another reg */
+
+ if (regAllowedMask & RBM_EBX)
+ {
+ regPair = REG_PAIR_EDXEBX;
+ goto RET;
+ }
+ if (regAllowedMask & RBM_ESI)
+ {
+ regPair = REG_PAIR_EDXESI;
+ goto RET;
+ }
+ if (regAllowedMask & RBM_EDI)
+ {
+ regPair = REG_PAIR_EDXEDI;
+ goto RET;
+ }
+ if (regAllowedMask & RBM_EBP)
+ {
+ regPair = REG_PAIR_EDXEBP;
+ goto RET;
+ }
+ }
+
+ if (regAllowedMask & RBM_EBX)
+ {
+ /* EBX is available, see if we can pair it with another reg */
+
+ if (regAllowedMask & RBM_ESI)
+ {
+ regPair = REG_PAIR_EBXESI;
+ goto RET;
+ }
+ if (regAllowedMask & RBM_EDI)
+ {
+ regPair = REG_PAIR_EBXEDI;
+ goto RET;
+ }
+ if (regAllowedMask & RBM_EBP)
+ {
+ regPair = REG_PAIR_EBXEBP;
+ goto RET;
+ }
+ }
+
+ if (regAllowedMask & RBM_ESI)
+ {
+ /* ESI is available, see if we can pair it with another reg */
+
+ if (regAllowedMask & RBM_EDI)
+ {
+ regPair = REG_PAIR_ESIEDI;
+ goto RET;
+ }
+ if (regAllowedMask & RBM_EBP)
+ {
+ regPair = REG_PAIR_EBPESI;
+ goto RET;
+ }
+ }
+
+ if (regAllowedMask & RBM_EDI)
+ {
+ /* EDI is available, see if we can pair it with another reg */
+
+ if (regAllowedMask & RBM_EBP)
+ {
+ regPair = REG_PAIR_EBPEDI;
+ goto RET;
+ }
+ }
+#endif
+
+#ifdef _TARGET_ARM_
+ // ARM is symmetric, so don't bother to prefer some pairs to others
+ //
+ // Iterate the registers in the order specified by rpRegTmpOrder/raRegTmpOrder
+
+ for (unsigned index1 = 0; index1 < REG_TMP_ORDER_COUNT; index1++)
+ {
+ regNumber reg1;
+ if (m_rsCompiler->rpRegAllocDone)
+ reg1 = raRegTmpOrder[index1];
+ else
+ reg1 = rpRegTmpOrder[index1];
+
+ regMaskTP reg1Mask = genRegMask(reg1);
+
+ if ((regAllowedMask & reg1Mask) == 0)
+ continue;
+
+ for (unsigned index2 = index1 + 1; index2 < REG_TMP_ORDER_COUNT; index2++)
+ {
+ regNumber reg2;
+ if (m_rsCompiler->rpRegAllocDone)
+ reg2 = raRegTmpOrder[index2];
+ else
+ reg2 = rpRegTmpOrder[index2];
+
+ regMaskTP reg2Mask = genRegMask(reg2);
+
+ if ((regAllowedMask & reg2Mask) == 0)
+ continue;
+
+ regMaskTP pairMask = genRegMask(reg1) | genRegMask(reg2);
+
+ // if reg1 is larger than reg2 then swap the registers
+ if (reg1 > reg2)
+ {
+ regNumber regT = reg1;
+ reg1 = reg2;
+ reg2 = regT;
+ }
+
+ regPair = gen2regs2pair(reg1, reg2);
+ return regPair;
+ }
+ }
+#endif
+
+ assert(!"Unreachable code");
+ regPair = REG_PAIR_NONE;
+
+#ifdef _TARGET_X86_
+RET:
+#endif
+
+ return regPair;
+}
+
+#endif // LEGACY_BACKEND
+
+/*****************************************************************************/
+
+RegSet::RegSet(Compiler* compiler, GCInfo& gcInfo) : m_rsCompiler(compiler), m_rsGCInfo(gcInfo)
+{
+ /* Initialize the spill logic */
+
+ rsSpillInit();
+
+ /* Initialize the argument register count */
+ // TODO-Cleanup: Consider moving intRegState and floatRegState to RegSet. They used
+ // to be initialized here, but are now initialized in the CodeGen constructor.
+ // intRegState.rsCurRegArgNum = 0;
+ // loatRegState.rsCurRegArgNum = 0;
+
+ rsMaskResvd = RBM_NONE;
+
+#ifdef LEGACY_BACKEND
+ rsMaskMult = RBM_NONE;
+ rsMaskUsed = RBM_NONE;
+ rsMaskLock = RBM_NONE;
+#endif // LEGACY_BACKEND
+
+#ifdef _TARGET_ARMARCH_
+ rsMaskCalleeSaved = RBM_NONE;
+#endif // _TARGET_ARMARCH_
+
+#ifdef _TARGET_ARM_
+ rsMaskPreSpillRegArg = RBM_NONE;
+ rsMaskPreSpillAlign = RBM_NONE;
+#endif
+
+#ifdef DEBUG
+ rsModifiedRegsMaskInitialized = false;
+#endif // DEBUG
+}
+
+#ifdef LEGACY_BACKEND
+/*****************************************************************************
+ *
+ * Marks the register that holds the given operand value as 'used'. If 'addr'
+ * is non-zero, the register is part of a complex address mode that needs to
+ * be marked if the register is ever spilled.
+ */
+
+void RegSet::rsMarkRegUsed(GenTreePtr tree, GenTreePtr addr)
+{
+ var_types type;
+ regNumber regNum;
+ regMaskTP regMask;
+
+ /* The value must be sitting in a register */
+
+ assert(tree);
+ assert(tree->gtFlags & GTF_REG_VAL);
+
+ type = tree->TypeGet();
+ regNum = tree->gtRegNum;
+
+ if (isFloatRegType(type))
+ regMask = genRegMaskFloat(regNum, type);
+ else
+ regMask = genRegMask(regNum);
+
+#ifdef DEBUG
+ if (m_rsCompiler->verbose)
+ {
+ printf("\t\t\t\t\t\t\tThe register %s currently holds ", m_rsCompiler->compRegVarName(regNum));
+ Compiler::printTreeID(tree);
+ if (addr != NULL)
+ {
+ printf("/");
+ Compiler::printTreeID(addr);
+ }
+ else if (tree->gtOper == GT_CNS_INT)
+ {
+ if (tree->IsIconHandle())
+ printf(" / Handle(0x%08p)", dspPtr(tree->gtIntCon.gtIconVal));
+ else
+ printf(" / Constant(0x%X)", tree->gtIntCon.gtIconVal);
+ }
+ printf("]\n");
+ }
+#endif // DEBUG
+
+ /* Remember whether the register holds a pointer */
+
+ m_rsGCInfo.gcMarkRegPtrVal(regNum, type);
+
+ /* No locked register may ever be marked as free */
+
+ assert((rsMaskLock & rsRegMaskFree()) == 0);
+
+ /* Is the register used by two different values simultaneously? */
+
+ if (regMask & rsMaskUsed)
+ {
+ /* Save the preceding use information */
+
+ rsRecMultiReg(regNum, type);
+ }
+
+ /* Set the register's bit in the 'used' bitset */
+
+ rsMaskUsed |= regMask;
+
+ /* Remember what values are in what registers, in case we have to spill */
+ assert(regNum != REG_SPBASE);
+ assert(rsUsedTree[regNum] == NULL);
+ rsUsedTree[regNum] = tree;
+ assert(rsUsedAddr[regNum] == NULL);
+ rsUsedAddr[regNum] = addr;
+}
+
+void RegSet::rsMarkArgRegUsedByPromotedFieldArg(GenTreePtr promotedStructArg, regNumber regNum, bool isGCRef)
+{
+ regMaskTP regMask;
+
+ /* The value must be sitting in a register */
+
+ assert(promotedStructArg);
+ assert(promotedStructArg->TypeGet() == TYP_STRUCT);
+
+ assert(regNum < MAX_REG_ARG);
+ regMask = genRegMask(regNum);
+ assert((regMask & RBM_ARG_REGS) != RBM_NONE);
+
+#ifdef DEBUG
+ if (m_rsCompiler->verbose)
+ {
+ printf("\t\t\t\t\t\t\tThe register %s currently holds ", m_rsCompiler->compRegVarName(regNum));
+ Compiler::printTreeID(promotedStructArg);
+ if (promotedStructArg->gtOper == GT_CNS_INT)
+ {
+ if (promotedStructArg->IsIconHandle())
+ printf(" / Handle(0x%08p)", dspPtr(promotedStructArg->gtIntCon.gtIconVal));
+ else
+ printf(" / Constant(0x%X)", promotedStructArg->gtIntCon.gtIconVal);
+ }
+ printf("]\n");
+ }
+#endif
+
+ /* Remember whether the register holds a pointer */
+
+ m_rsGCInfo.gcMarkRegPtrVal(regNum, (isGCRef ? TYP_REF : TYP_INT));
+
+ /* No locked register may ever be marked as free */
+
+ assert((rsMaskLock & rsRegMaskFree()) == 0);
+
+ /* Is the register used by two different values simultaneously? */
+
+ if (regMask & rsMaskUsed)
+ {
+ /* Save the preceding use information */
+
+ assert(isValidIntArgReg(regNum)); // We are expecting only integer argument registers here
+ rsRecMultiReg(regNum, TYP_I_IMPL);
+ }
+
+ /* Set the register's bit in the 'used' bitset */
+
+ rsMaskUsed |= regMask;
+
+ /* Remember what values are in what registers, in case we have to spill */
+ assert(regNum != REG_SPBASE);
+ assert(rsUsedTree[regNum] == 0);
+ rsUsedTree[regNum] = promotedStructArg;
+}
+
+/*****************************************************************************
+ *
+ * Marks the register pair that holds the given operand value as 'used'.
+ */
+
+void RegSet::rsMarkRegPairUsed(GenTreePtr tree)
+{
+ regNumber regLo;
+ regNumber regHi;
+ regPairNo regPair;
+ regMaskTP regMask;
+
+ /* The value must be sitting in a register */
+
+ assert(tree);
+#if CPU_HAS_FP_SUPPORT
+ assert(tree->gtType == TYP_LONG);
+#else
+ assert(tree->gtType == TYP_LONG || tree->gtType == TYP_DOUBLE);
+#endif
+ assert(tree->gtFlags & GTF_REG_VAL);
+
+ regPair = tree->gtRegPair;
+ regMask = genRegPairMask(regPair);
+
+ regLo = genRegPairLo(regPair);
+ regHi = genRegPairHi(regPair);
+
+#ifdef DEBUG
+ if (m_rsCompiler->verbose)
+ {
+ printf("\t\t\t\t\t\t\tThe register %s currently holds \n", m_rsCompiler->compRegVarName(regLo));
+ Compiler::printTreeID(tree);
+ printf("/lo32\n");
+ printf("\t\t\t\t\t\t\tThe register %s currently holds \n", m_rsCompiler->compRegVarName(regHi));
+ Compiler::printTreeID(tree);
+ printf("/hi32\n");
+ }
+#endif
+
+ /* Neither register obviously holds a pointer value */
+
+ m_rsGCInfo.gcMarkRegSetNpt(regMask);
+
+ /* No locked register may ever be marked as free */
+
+ assert((rsMaskLock & rsRegMaskFree()) == 0);
+
+ /* Are the registers used by two different values simultaneously? */
+
+ if (rsMaskUsed & genRegMask(regLo))
+ {
+ /* Save the preceding use information */
+
+ rsRecMultiReg(regLo, TYP_INT);
+ }
+
+ if (rsMaskUsed & genRegMask(regHi))
+ {
+ /* Save the preceding use information */
+
+ rsRecMultiReg(regHi, TYP_INT);
+ }
+
+ /* Can't mark a register pair more than once as used */
+
+ // assert((regMask & rsMaskUsed) == 0);
+
+ /* Mark the registers as 'used' */
+
+ rsMaskUsed |= regMask;
+
+ /* Remember what values are in what registers, in case we have to spill */
+
+ if (regLo != REG_STK)
+ {
+ assert(rsUsedTree[regLo] == 0);
+ assert(regLo != REG_SPBASE);
+ rsUsedTree[regLo] = tree;
+ }
+
+ if (regHi != REG_STK)
+ {
+ assert(rsUsedTree[regHi] == 0);
+ assert(regHi != REG_SPBASE);
+ rsUsedTree[regHi] = tree;
+ }
+}
+
+/*****************************************************************************
+ *
+ * Returns true if the given tree is currently held in reg.
+ * Note that reg may by used by multiple trees, in which case we have
+ * to search rsMultiDesc[reg].
+ */
+
+bool RegSet::rsIsTreeInReg(regNumber reg, GenTreePtr tree)
+{
+ /* First do the trivial check */
+
+ if (rsUsedTree[reg] == tree)
+ return true;
+
+ /* If the register is used by multiple trees, we have to search the list
+ in rsMultiDesc[reg] */
+
+ if (genRegMask(reg) & rsMaskMult)
+ {
+ SpillDsc* multiDesc = rsMultiDesc[reg];
+ assert(multiDesc);
+
+ for (/**/; multiDesc; multiDesc = multiDesc->spillNext)
+ {
+ if (multiDesc->spillTree == tree)
+ return true;
+
+ assert((!multiDesc->spillNext) == (!multiDesc->spillMoreMultis));
+ }
+ }
+
+ /* Not found. It must be spilled */
+
+ return false;
+}
+#endif // LEGACY_BACKEND
+
+/*****************************************************************************
+ *
+ * Finds the SpillDsc corresponding to 'tree' assuming it was spilled from 'reg'.
+ */
+
+RegSet::SpillDsc* RegSet::rsGetSpillInfo(GenTreePtr tree,
+ regNumber reg,
+ SpillDsc** pPrevDsc
+#ifdef LEGACY_BACKEND
+ ,
+ SpillDsc** pMultiDsc
+#endif // LEGACY_BACKEND
+ )
+{
+ /* Normally, trees are unspilled in the order of being spilled due to
+ the post-order walking of trees during code-gen. However, this will
+ not be true for something like a GT_ARR_ELEM node */
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef LEGACY_BACKEND
+ SpillDsc* multi = rsSpillDesc[reg];
+#endif // LEGACY_BACKEND
+
+ SpillDsc* prev;
+ SpillDsc* dsc;
+ for (prev = nullptr, dsc = rsSpillDesc[reg]; dsc != nullptr; prev = dsc, dsc = dsc->spillNext)
+ {
+#ifdef LEGACY_BACKEND
+ if (prev && !prev->spillMoreMultis)
+ multi = dsc;
+#endif // LEGACY_BACKEND
+
+ if (dsc->spillTree == tree)
+ {
+ break;
+ }
+ }
+
+ if (pPrevDsc)
+ {
+ *pPrevDsc = prev;
+ }
+#ifdef LEGACY_BACKEND
+ if (pMultiDsc)
+ *pMultiDsc = multi;
+#endif // LEGACY_BACKEND
+
+ return dsc;
+}
+
+#ifdef LEGACY_BACKEND
+/*****************************************************************************
+ *
+ * Mark the register set given by the register mask as not used.
+ */
+
+void RegSet::rsMarkRegFree(regMaskTP regMask)
+{
+ /* Are we freeing any multi-use registers? */
+
+ if (regMask & rsMaskMult)
+ {
+ rsMultRegFree(regMask);
+ return;
+ }
+
+ m_rsGCInfo.gcMarkRegSetNpt(regMask);
+
+ regMaskTP regBit = 1;
+
+ for (regNumber regNum = REG_FIRST; regNum < REG_COUNT; regNum = REG_NEXT(regNum), regBit <<= 1)
+ {
+ if (regBit > regMask)
+ break;
+
+ if (regBit & regMask)
+ {
+#ifdef DEBUG
+ if (m_rsCompiler->verbose)
+ {
+ printf("\t\t\t\t\t\t\tThe register %s no longer holds ", m_rsCompiler->compRegVarName(regNum));
+ Compiler::printTreeID(rsUsedTree[regNum]);
+ Compiler::printTreeID(rsUsedAddr[regNum]);
+ printf("\n");
+ }
+#endif
+ GenTreePtr usedTree = rsUsedTree[regNum];
+ assert(usedTree != NULL);
+ rsUsedTree[regNum] = NULL;
+ rsUsedAddr[regNum] = NULL;
+#ifdef _TARGET_ARM_
+ if (usedTree->TypeGet() == TYP_DOUBLE)
+ {
+ regNum = REG_NEXT(regNum);
+ regBit <<= 1;
+
+ assert(regBit & regMask);
+ assert(rsUsedTree[regNum] == NULL);
+ assert(rsUsedAddr[regNum] == NULL);
+ }
+#endif
+ }
+ }
+
+ /* Remove the register set from the 'used' set */
+
+ assert((regMask & rsMaskUsed) == regMask);
+ rsMaskUsed -= regMask;
+
+ /* No locked register may ever be marked as free */
+
+ assert((rsMaskLock & rsRegMaskFree()) == 0);
+}
+
+/*****************************************************************************
+ *
+ * Free the register from the given tree. If the register holds other tree,
+ * it will still be marked as used, else it will be completely free.
+ */
+
+void RegSet::rsMarkRegFree(regNumber reg, GenTreePtr tree)
+{
+ assert(rsIsTreeInReg(reg, tree));
+ regMaskTP regMask = genRegMask(reg);
+
+ /* If the register is not multi-used, it's easy. Just do the default work */
+
+ if (!(regMask & rsMaskMult))
+ {
+ rsMarkRegFree(regMask);
+ return;
+ }
+
+ /* The tree is multi-used. We just have to free it off the given tree but
+ leave other trees which use the register as they are. The register may
+ not be multi-used after freeing it from the given tree */
+
+ /* Is the tree in rsUsedTree[] or in rsMultiDesc[]?
+ If it is in rsUsedTree[], update rsUsedTree[] */
+
+ if (rsUsedTree[reg] == tree)
+ {
+ rsRmvMultiReg(reg);
+ return;
+ }
+
+ /* The tree is in rsMultiDesc[] instead of in rsUsedTree[]. Find the desc
+ corresponding to the tree and just remove it from there */
+
+ for (SpillDsc *multiDesc = rsMultiDesc[reg], *prevDesc = NULL; multiDesc;
+ prevDesc = multiDesc, multiDesc = multiDesc->spillNext)
+ {
+ /* If we find the descriptor with the tree we are looking for,
+ discard it */
+
+ if (multiDesc->spillTree != tree)
+ continue;
+
+ if (prevDesc == NULL)
+ {
+ /* The very first desc in rsMultiDesc[] matched. If there are
+ no further descs, then the register is no longer multi-used */
+
+ if (!multiDesc->spillMoreMultis)
+ rsMaskMult -= regMask;
+
+ rsMultiDesc[reg] = multiDesc->spillNext;
+ }
+ else
+ {
+ /* There are a couple of other descs before the match. So the
+ register is still multi-used. However, we may have to
+ update spillMoreMultis for the previous desc. */
+
+ if (!multiDesc->spillMoreMultis)
+ prevDesc->spillMoreMultis = false;
+
+ prevDesc->spillNext = multiDesc->spillNext;
+ }
+
+ SpillDsc::freeDsc(this, multiDesc);
+
+#ifdef DEBUG
+ if (m_rsCompiler->verbose)
+ {
+ printf("\t\t\t\t\t\t\tRegister %s multi-use dec for ", m_rsCompiler->compRegVarName(reg));
+ Compiler::printTreeID(tree);
+ printf(" - now ");
+ Compiler::printTreeID(rsUsedTree[reg]);
+ printf(" multMask=" REG_MASK_ALL_FMT "\n", rsMaskMult);
+ }
+#endif
+
+ return;
+ }
+
+ assert(!"Didn't find the spilled tree in rsMultiDesc[]");
+}
+
+/*****************************************************************************
+ *
+ * Mark the register set given by the register mask as not used; there may
+ * be some 'multiple-use' registers in the set.
+ */
+
+void RegSet::rsMultRegFree(regMaskTP regMask)
+{
+ /* Free any multiple-use registers first */
+ regMaskTP nonMultMask = regMask & ~rsMaskMult;
+ regMaskTP myMultMask = regMask & rsMaskMult;
+
+ if (myMultMask)
+ {
+ regNumber regNum;
+ regMaskTP regBit;
+
+ for (regNum = REG_FIRST, regBit = 1; regNum < REG_COUNT; regNum = REG_NEXT(regNum), regBit <<= 1)
+ {
+ if (regBit > myMultMask)
+ break;
+
+ if (regBit & myMultMask)
+ {
+ /* Free the multi-use register 'regNum' */
+ var_types type = rsRmvMultiReg(regNum);
+#ifdef _TARGET_ARM_
+ if (genIsValidFloatReg(regNum) && (type == TYP_DOUBLE))
+ {
+ // On ARM32, We skip the second register for a TYP_DOUBLE
+ regNum = REG_NEXT(regNum);
+ regBit <<= 1;
+ }
+#endif // _TARGET_ARM_
+ }
+ }
+ }
+
+ /* If there are any single-use registers, free them */
+
+ if (nonMultMask)
+ rsMarkRegFree(nonMultMask);
+}
+
+/*****************************************************************************
+ *
+ * Returns the number of registers that are currently free which appear in needReg.
+ */
+
+unsigned RegSet::rsFreeNeededRegCount(regMaskTP needReg)
+{
+ regMaskTP regNeededFree = rsRegMaskFree() & needReg;
+ unsigned cntFree = 0;
+
+ /* While some registers are free ... */
+
+ while (regNeededFree)
+ {
+ /* Remove the next register bit and bump the count */
+
+ regNeededFree -= genFindLowestBit(regNeededFree);
+ cntFree += 1;
+ }
+
+ return cntFree;
+}
+#endif // LEGACY_BACKEND
+
+/*****************************************************************************
+ *
+ * Record the fact that the given register now contains the given local
+ * variable. Pointers are handled specially since reusing the register
+ * will extend the lifetime of a pointer register which is not a register
+ * variable.
+ */
+
+void RegTracker::rsTrackRegLclVar(regNumber reg, unsigned var)
+{
+ LclVarDsc* varDsc = &compiler->lvaTable[var];
+ assert(reg != REG_STK);
+#if CPU_HAS_FP_SUPPORT
+ assert(varTypeIsFloating(varDsc->TypeGet()) == false);
+#endif
+ // Kill the register before doing anything in case we take a
+ // shortcut out of here
+ rsRegValues[reg].rvdKind = RV_TRASH;
+
+ if (compiler->lvaTable[var].lvAddrExposed)
+ {
+ return;
+ }
+
+ /* Keep track of which registers we ever touch */
+
+ regSet->rsSetRegsModified(genRegMask(reg));
+
+#if REDUNDANT_LOAD
+
+ /* Is the variable a pointer? */
+
+ if (varTypeIsGC(varDsc->TypeGet()))
+ {
+ /* Don't track pointer register vars */
+
+ if (varDsc->lvRegister)
+ {
+ return;
+ }
+
+ /* Don't track when fully interruptible */
+
+ if (compiler->genInterruptible)
+ {
+ return;
+ }
+ }
+ else if (varDsc->lvNormalizeOnLoad())
+ {
+ return;
+ }
+
+#endif
+
+#ifdef DEBUG
+ if (compiler->verbose)
+ {
+ printf("\t\t\t\t\t\t\tThe register %s now holds V%02u\n", compiler->compRegVarName(reg), var);
+ }
+#endif
+
+ /* Record the new value for the register. ptr var needed for
+ * lifetime extension
+ */
+
+ rsRegValues[reg].rvdKind = RV_LCL_VAR;
+
+ // If this is a cast of a 64 bit int, then we must have the low 32 bits.
+ if (genActualType(varDsc->TypeGet()) == TYP_LONG)
+ {
+ rsRegValues[reg].rvdKind = RV_LCL_VAR_LNG_LO;
+ }
+
+ rsRegValues[reg].rvdLclVarNum = var;
+}
+
+/*****************************************************************************/
+
+void RegTracker::rsTrackRegSwap(regNumber reg1, regNumber reg2)
+{
+ RegValDsc tmp;
+
+ tmp = rsRegValues[reg1];
+ rsRegValues[reg1] = rsRegValues[reg2];
+ rsRegValues[reg2] = tmp;
+}
+
+void RegTracker::rsTrackRegCopy(regNumber reg1, regNumber reg2)
+{
+ /* Keep track of which registers we ever touch */
+
+ assert(reg1 < REG_COUNT);
+ assert(reg2 < REG_COUNT);
+
+ regSet->rsSetRegsModified(genRegMask(reg1));
+
+ rsRegValues[reg1] = rsRegValues[reg2];
+}
+
+#ifdef LEGACY_BACKEND
+
+/*****************************************************************************
+ * One of the operands of this complex address mode has been spilled
+ */
+
+void rsAddrSpillOper(GenTreePtr addr)
+{
+ if (addr)
+ {
+ assert(addr->gtOper == GT_IND || addr->gtOper == GT_ARR_ELEM || addr->gtOper == GT_LEA ||
+ addr->gtOper == GT_CMPXCHG);
+
+ // GTF_SPILLED_OP2 says "both operands have been spilled"
+ assert((addr->gtFlags & GTF_SPILLED_OP2) == 0);
+
+ if ((addr->gtFlags & GTF_SPILLED_OPER) == 0)
+ addr->gtFlags |= GTF_SPILLED_OPER;
+ else
+ addr->gtFlags |= GTF_SPILLED_OP2;
+ }
+}
+
+void rsAddrUnspillOper(GenTreePtr addr)
+{
+ if (addr)
+ {
+ assert(addr->gtOper == GT_IND || addr->gtOper == GT_ARR_ELEM || addr->gtOper == GT_LEA ||
+ addr->gtOper == GT_CMPXCHG);
+
+ assert((addr->gtFlags & GTF_SPILLED_OPER) != 0);
+
+ // Both operands spilled? */
+ if ((addr->gtFlags & GTF_SPILLED_OP2) != 0)
+ addr->gtFlags &= ~GTF_SPILLED_OP2;
+ else
+ addr->gtFlags &= ~GTF_SPILLED_OPER;
+ }
+}
+
+void RegSet::rsSpillRegIfUsed(regNumber reg)
+{
+ if (rsMaskUsed & genRegMask(reg))
+ {
+ rsSpillReg(reg);
+ }
+}
+
+#endif // LEGACY_BACKEND
+
+//------------------------------------------------------------
+// rsSpillTree: Spill the tree held in 'reg'.
+//
+// Arguments:
+// reg - Register of tree node that is to be spilled
+// tree - GenTree node that is being spilled
+// regIdx - Register index identifying the specific result
+// register of a multi-reg call node. For single-reg
+// producing tree nodes its value is zero.
+//
+// Return Value:
+// None.
+//
+// Assumption:
+// RyuJIT backend specific: in case of multi-reg call nodes, GTF_SPILL
+// flag associated with the reg that is being spilled is cleared. The
+// caller of this method is expected to clear GTF_SPILL flag on call
+// node after all of its registers marked for spilling are spilled.
+//
+void RegSet::rsSpillTree(regNumber reg, GenTreePtr tree, unsigned regIdx /* =0 */)
+{
+ assert(tree != nullptr);
+
+ GenTreeCall* call = nullptr;
+ var_types treeType;
+
+#ifndef LEGACY_BACKEND
+ if (tree->IsMultiRegCall())
+ {
+ call = tree->AsCall();
+ ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc();
+ treeType = retTypeDesc->GetReturnRegType(regIdx);
+ }
+ else
+#endif
+ {
+ treeType = tree->TypeGet();
+ }
+
+ var_types tempType = Compiler::tmpNormalizeType(treeType);
+ regMaskTP mask;
+ bool floatSpill = false;
+
+ if (isFloatRegType(treeType))
+ {
+ floatSpill = true;
+ mask = genRegMaskFloat(reg, treeType);
+ }
+ else
+ {
+ mask = genRegMask(reg);
+ }
+
+ rsNeededSpillReg = true;
+
+#ifdef LEGACY_BACKEND
+ // The register we're spilling must be used but not locked
+ // or an enregistered variable.
+
+ assert((mask & rsMaskUsed) == mask);
+ assert((mask & rsMaskLock) == 0);
+ assert((mask & rsMaskVars) == 0);
+#endif // LEGACY_BACKEND
+
+#ifndef LEGACY_BACKEND
+ // We should only be spilling nodes marked for spill,
+ // vars should be handled elsewhere, and to prevent
+ // spilling twice clear GTF_SPILL flag on tree node.
+ //
+ // In case of multi-reg call nodes only the spill flag
+ // associated with the reg is cleared. Spill flag on
+ // call node should be cleared by the caller of this method.
+ assert(tree->gtOper != GT_REG_VAR);
+ assert((tree->gtFlags & GTF_SPILL) != 0);
+
+ unsigned regFlags = 0;
+ if (call != nullptr)
+ {
+ regFlags = call->GetRegSpillFlagByIdx(regIdx);
+ assert((regFlags & GTF_SPILL) != 0);
+ regFlags &= ~GTF_SPILL;
+ }
+ else
+ {
+ assert(!varTypeIsMultiReg(tree));
+ tree->gtFlags &= ~GTF_SPILL;
+ }
+#endif // !LEGACY_BACKEND
+
+#if CPU_LONG_USES_REGPAIR
+ // Are we spilling a part of a register pair?
+ if (treeType == TYP_LONG)
+ {
+ tempType = TYP_I_IMPL;
+ assert(genRegPairLo(tree->gtRegPair) == reg || genRegPairHi(tree->gtRegPair) == reg);
+ }
+ else
+ {
+ assert(tree->gtFlags & GTF_REG_VAL);
+ assert(tree->gtRegNum == reg);
+ }
+#else
+ assert(tree->InReg());
+ assert(tree->gtRegNum == reg || (call != nullptr && call->GetRegNumByIdx(regIdx) == reg));
+#endif // CPU_LONG_USES_REGPAIR
+
+ // Are any registers free for spillage?
+ SpillDsc* spill = SpillDsc::alloc(m_rsCompiler, this, tempType);
+
+ // Grab a temp to store the spilled value
+ TempDsc* temp = m_rsCompiler->tmpGetTemp(tempType);
+ spill->spillTemp = temp;
+ tempType = temp->tdTempType();
+
+ // Remember what it is we have spilled
+ spill->spillTree = tree;
+#ifdef LEGACY_BACKEND
+ spill->spillAddr = rsUsedAddr[reg];
+#endif // LEGACY_BACKEND
+
+#ifdef DEBUG
+ if (m_rsCompiler->verbose)
+ {
+ printf("\t\t\t\t\t\t\tThe register %s spilled with ", m_rsCompiler->compRegVarName(reg));
+ Compiler::printTreeID(spill->spillTree);
+#ifdef LEGACY_BACKEND
+ printf("/");
+ Compiler::printTreeID(spill->spillAddr);
+#endif // LEGACY_BACKEND
+ }
+#endif
+
+#ifdef LEGACY_BACKEND
+ // Is the register part of a complex address mode?
+ rsAddrSpillOper(rsUsedAddr[reg]);
+#endif // LEGACY_BACKEND
+
+ // 'lastDsc' is 'spill' for simple cases, and will point to the last
+ // multi-use descriptor if 'reg' is being multi-used
+ SpillDsc* lastDsc = spill;
+
+#ifdef LEGACY_BACKEND
+ if ((rsMaskMult & mask) == 0)
+ {
+ spill->spillMoreMultis = false;
+ }
+ else
+ {
+ // The register is being multi-used and will have entries in
+ // rsMultiDesc[reg]. Spill all of them (ie. move them to
+ // rsSpillDesc[reg]).
+ // When we unspill the reg, they will all be moved back to
+ // rsMultiDesc[].
+
+ spill->spillMoreMultis = true;
+
+ SpillDsc* nextDsc = rsMultiDesc[reg];
+
+ do
+ {
+ assert(nextDsc != nullptr);
+
+ // Is this multi-use part of a complex address mode?
+ rsAddrSpillOper(nextDsc->spillAddr);
+
+ // Mark the tree node as having been spilled
+ rsMarkSpill(nextDsc->spillTree, reg);
+
+ // lastDsc points to the last of the multi-spill descrs for 'reg'
+ nextDsc->spillTemp = temp;
+
+#ifdef DEBUG
+ if (m_rsCompiler->verbose)
+ {
+ printf(", ");
+ Compiler::printTreeID(nextDsc->spillTree);
+ printf("/");
+ Compiler::printTreeID(nextDsc->spillAddr);
+ }
+#endif
+
+ lastDsc->spillNext = nextDsc;
+ lastDsc = nextDsc;
+
+ nextDsc = nextDsc->spillNext;
+ } while (lastDsc->spillMoreMultis);
+
+ rsMultiDesc[reg] = nextDsc;
+
+ // 'reg' is no longer considered to be multi-used. We will set this
+ // mask again when this value gets unspilled
+ rsMaskMult &= ~mask;
+ }
+#endif // LEGACY_BACKEND
+
+ // Insert the spill descriptor(s) in the list
+ lastDsc->spillNext = rsSpillDesc[reg];
+ rsSpillDesc[reg] = spill;
+
+#ifdef DEBUG
+ if (m_rsCompiler->verbose)
+ {
+ printf("\n");
+ }
+#endif
+
+ // Generate the code to spill the register
+ var_types storeType = floatSpill ? treeType : tempType;
+
+ m_rsCompiler->codeGen->spillReg(storeType, temp, reg);
+
+ // Mark the tree node as having been spilled
+ rsMarkSpill(tree, reg);
+
+#ifdef LEGACY_BACKEND
+ // The register is now free
+ rsMarkRegFree(mask);
+#else
+ // In case of multi-reg call node also mark the specific
+ // result reg as spilled.
+ if (call != nullptr)
+ {
+ regFlags |= GTF_SPILLED;
+ call->SetRegSpillFlagByIdx(regFlags, regIdx);
+ }
+#endif //! LEGACY_BACKEND
+}
+
+#if defined(_TARGET_X86_) && !FEATURE_STACK_FP_X87
+/*****************************************************************************
+*
+* Spill the top of the FP x87 stack.
+*/
+void RegSet::rsSpillFPStack(GenTreePtr tree)
+{
+ SpillDsc* spill;
+ TempDsc* temp;
+ var_types treeType = tree->TypeGet();
+
+ assert(tree->OperGet() == GT_CALL);
+ spill = SpillDsc::alloc(m_rsCompiler, this, treeType);
+
+ /* Grab a temp to store the spilled value */
+
+ spill->spillTemp = temp = m_rsCompiler->tmpGetTemp(treeType);
+
+ /* Remember what it is we have spilled */
+
+ spill->spillTree = tree;
+ SpillDsc* lastDsc = spill;
+
+ regNumber reg = tree->gtRegNum;
+ lastDsc->spillNext = rsSpillDesc[reg];
+ rsSpillDesc[reg] = spill;
+
+#ifdef DEBUG
+ if (m_rsCompiler->verbose)
+ printf("\n");
+#endif
+ // m_rsCompiler->codeGen->inst_FS_ST(INS_fstp, emitActualTypeSize(treeType), temp, 0);
+ m_rsCompiler->codeGen->getEmitter()->emitIns_S(INS_fstp, emitActualTypeSize(treeType), temp->tdTempNum(), 0);
+
+ /* Mark the tree node as having been spilled */
+
+ rsMarkSpill(tree, reg);
+}
+#endif // defined(_TARGET_X86_) && !FEATURE_STACK_FP_X87
+
+#ifdef LEGACY_BACKEND
+
+/*****************************************************************************
+ *
+ * Spill the given register (which we assume to be currently marked as used).
+ */
+
+void RegSet::rsSpillReg(regNumber reg)
+{
+ /* We must know the value in the register that we are spilling */
+ GenTreePtr tree = rsUsedTree[reg];
+
+#ifdef _TARGET_ARM_
+ if (tree == NULL && genIsValidFloatReg(reg) && !genIsValidDoubleReg(reg))
+ {
+ reg = REG_PREV(reg);
+ assert(rsUsedTree[reg]);
+ assert(rsUsedTree[reg]->TypeGet() == TYP_DOUBLE);
+ tree = rsUsedTree[reg];
+ }
+#endif
+
+ rsSpillTree(reg, tree);
+
+ /* The register no longer holds its original value */
+
+ rsUsedTree[reg] = NULL;
+}
+
+/*****************************************************************************
+ *
+ * Spill all registers in 'regMask' that are currently marked as used.
+ */
+
+void RegSet::rsSpillRegs(regMaskTP regMask)
+{
+ /* The registers we're spilling must not be locked,
+ or enregistered variables */
+
+ assert((regMask & rsMaskLock) == 0);
+ assert((regMask & rsMaskVars) == 0);
+
+ /* Only spill what's currently marked as used */
+
+ regMask &= rsMaskUsed;
+ assert(regMask);
+
+ regNumber regNum;
+ regMaskTP regBit;
+
+ for (regNum = REG_FIRST, regBit = 1; regNum < REG_COUNT; regNum = REG_NEXT(regNum), regBit <<= 1)
+ {
+ if (regMask & regBit)
+ {
+ rsSpillReg(regNum);
+
+ regMask &= rsMaskUsed;
+
+ if (!regMask)
+ break;
+ }
+ }
+}
+
+/*****************************************************************************
+ *
+ * The following table determines the order in which registers are considered
+ * for internal tree temps to live in
+ */
+
+extern const regNumber raRegTmpOrder[] = {REG_TMP_ORDER};
+extern const regNumber rpRegTmpOrder[] = {REG_PREDICT_ORDER};
+#if FEATURE_FP_REGALLOC
+extern const regNumber raRegFltTmpOrder[] = {REG_FLT_TMP_ORDER};
+#endif
+
+/*****************************************************************************
+ *
+ * Choose a register from the given set in the preferred order (see above);
+ * if no registers are in the set return REG_STK.
+ */
+
+regNumber RegSet::rsPickRegInTmpOrder(regMaskTP regMask)
+{
+ if (regMask == RBM_NONE)
+ return REG_STK;
+
+ bool firstPass = true;
+ regMaskTP avoidMask =
+ ~rsGetModifiedRegsMask() & RBM_CALLEE_SAVED; // We want to avoid using any new callee saved register
+
+ while (true)
+ {
+ /* Iterate the registers in the order specified by raRegTmpOrder */
+
+ for (unsigned index = 0; index < REG_TMP_ORDER_COUNT; index++)
+ {
+ regNumber candidateReg = raRegTmpOrder[index];
+ regMaskTP candidateMask = genRegMask(candidateReg);
+
+ // For a FP base frame, don't use FP register.
+ if (m_rsCompiler->codeGen->isFramePointerUsed() && (candidateMask == RBM_FPBASE))
+ continue;
+
+ // For the first pass avoid selecting a never used register when there are other registers available
+ if (firstPass && ((candidateMask & avoidMask) != 0))
+ continue;
+
+ if (regMask & candidateMask)
+ return candidateReg;
+ }
+
+ if (firstPass == true)
+ firstPass = false; // OK, now we are willing to select a never used register
+ else
+ break;
+ }
+
+ return REG_STK;
+}
+
+/*****************************************************************************
+ * Choose a register from the 'regMask' set and return it. If no registers in
+ * the set are currently free, one of them will be spilled (even if other
+ * registers - not in the set - are currently free).
+ *
+ * If you don't require a register from a particular set, you should use rsPickReg() instead.
+ *
+ * rsModifiedRegsMask is modified to include the returned register.
+ */
+
+regNumber RegSet::rsGrabReg(regMaskTP regMask)
+{
+ regMaskTP OKmask;
+ regNumber regNum;
+ regMaskTP regBit;
+
+ assert(regMask);
+ regMask &= ~rsMaskLock;
+ assert(regMask);
+
+ /* See if one of the desired registers happens to be free */
+
+ OKmask = regMask & rsRegMaskFree();
+
+ regNum = rsPickRegInTmpOrder(OKmask);
+ if (REG_STK != regNum)
+ {
+ goto RET;
+ }
+
+ /* We'll have to spill one of the registers in 'regMask' */
+
+ OKmask = regMask & rsRegMaskCanGrab();
+ assert(OKmask);
+
+ for (regNum = REG_FIRST, regBit = 1; (regBit & OKmask) == 0; regNum = REG_NEXT(regNum), regBit <<= 1)
+ {
+ if (regNum >= REG_COUNT)
+ {
+ assert(!"no register to grab!");
+ NO_WAY("Could not grab a register, Predictor should have prevented this!");
+ }
+ }
+
+ /* This will be the victim -- spill it */
+ rsSpillReg(regNum);
+
+ /* Make sure we did find a register to spill */
+ assert(genIsValidReg(regNum));
+
+RET:
+ /* Keep track of which registers we ever touch */
+ rsSetRegsModified(genRegMask(regNum));
+ return regNum;
+}
+
+/*****************************************************************************
+ * Find a register to use and return it, spilling if necessary.
+ *
+ * Look for a register in the following order: First, try and find a free register
+ * in 'regBest' (if 'regBest' is RBM_NONE, skip this step). Second, try to find a
+ * free register in 'regMask' (if 'regMask' is RBM_NONE, skip this step). Note that
+ * 'regBest' doesn't need to be a subset of 'regMask'. Third, find any free
+ * register. Fourth, spill a register. The register to spill will be in 'regMask',
+ * if 'regMask' is not RBM_NONE.
+ *
+ * Note that 'regMask' and 'regBest' are purely recommendations, and can be ignored;
+ * the caller can't expect that the returned register will be in those sets. In
+ * particular, under register stress, we specifically will pick registers not in
+ * these sets to ensure that callers don't require a register from those sets
+ * (and to ensure callers can handle the spilling that might ensue).
+ *
+ * Calling rsPickReg() with the default arguments (which sets 'regMask' and 'regBest' to RBM_NONE)
+ * is equivalent to calling rsGrabReg(rsRegMaskFree()).
+ *
+ * rsModifiedRegsMask is modified to include the returned register.
+ */
+
+regNumber RegSet::rsPickReg(regMaskTP regMask, regMaskTP regBest)
+{
+ regNumber regNum;
+ regMaskTP spillMask;
+ regMaskTP canGrabMask;
+
+#ifdef DEBUG
+ if (rsStressRegs() >= 1)
+ {
+ /* 'regMask' is purely a recommendation, and callers should be
+ able to handle the case where it is not satisfied.
+ The logic here tries to return ~regMask to check that all callers
+ are prepared to handle such a case */
+
+ regMaskTP badRegs = rsMaskMult & rsRegMaskCanGrab();
+
+ badRegs = rsUseIfZero(badRegs, rsMaskUsed & rsRegMaskCanGrab());
+ badRegs = rsUseIfZero(badRegs, rsRegMaskCanGrab());
+ badRegs = rsExcludeHint(badRegs, regMask);
+
+ assert(badRegs != RBM_NONE);
+
+ return rsGrabReg(badRegs);
+ }
+
+#endif
+
+ regMaskTP freeMask = rsRegMaskFree();
+
+AGAIN:
+
+ /* By default we'd prefer to accept all available registers */
+
+ regMaskTP OKmask = freeMask;
+
+ // OKmask = rsNarrowHint(OKmask, rsUselessRegs());
+
+ /* Is there a 'best' register set? */
+
+ if (regBest)
+ {
+ OKmask &= regBest;
+ if (OKmask)
+ goto TRY_REG;
+ else
+ goto TRY_ALL;
+ }
+
+ /* Was a register set recommended by the caller? */
+
+ if (regMask)
+ {
+ OKmask &= regMask;
+ if (!OKmask)
+ goto TRY_ALL;
+ }
+
+TRY_REG:
+
+ /* Iterate the registers in the order specified by raRegTmpOrder */
+
+ regNum = rsPickRegInTmpOrder(OKmask);
+ if (REG_STK != regNum)
+ {
+ goto RET;
+ }
+
+TRY_ALL:
+
+ /* Were we considering 'regBest' ? */
+
+ if (regBest)
+ {
+ /* 'regBest' is no good -- ignore it and try 'regMask' instead */
+
+ regBest = RBM_NONE;
+ goto AGAIN;
+ }
+
+ /* Now let's consider all available registers */
+
+ /* Were we limited in our consideration? */
+
+ if (!regMask)
+ {
+ /* We need to spill one of the free registers */
+
+ spillMask = freeMask;
+ }
+ else
+ {
+ /* Did we not consider all free registers? */
+
+ if ((regMask & freeMask) != freeMask)
+ {
+ /* The recommended regset didn't work, so try all available regs */
+
+ regNum = rsPickRegInTmpOrder(freeMask);
+ if (REG_STK != regNum)
+ goto RET;
+ }
+
+ /* If we're going to spill, might as well go for the right one */
+
+ spillMask = regMask;
+ }
+
+ /* Make sure we can spill some register. */
+
+ canGrabMask = rsRegMaskCanGrab();
+ if ((spillMask & canGrabMask) == 0)
+ spillMask = canGrabMask;
+
+ assert(spillMask);
+
+ /* We have no choice but to spill one of the regs */
+
+ return rsGrabReg(spillMask);
+
+RET:
+
+ rsSetRegsModified(genRegMask(regNum));
+ return regNum;
+}
+
+#endif // LEGACY_BACKEND
+
+/*****************************************************************************
+ *
+ * Get the temp that was spilled from the given register (and free its
+ * spill descriptor while we're at it). Returns the temp (i.e. local var)
+ */
+
+TempDsc* RegSet::rsGetSpillTempWord(regNumber reg, SpillDsc* dsc, SpillDsc* prevDsc)
+{
+ assert((prevDsc == nullptr) || (prevDsc->spillNext == dsc));
+
+#ifdef LEGACY_BACKEND
+ /* Is dsc the last of a set of multi-used values */
+
+ if (prevDsc && prevDsc->spillMoreMultis && !dsc->spillMoreMultis)
+ prevDsc->spillMoreMultis = false;
+#endif // LEGACY_BACKEND
+
+ /* Remove this spill entry from the register's list */
+
+ (prevDsc ? prevDsc->spillNext : rsSpillDesc[reg]) = dsc->spillNext;
+
+ /* Remember which temp the value is in */
+
+ TempDsc* temp = dsc->spillTemp;
+
+ SpillDsc::freeDsc(this, dsc);
+
+ /* return the temp variable */
+
+ return temp;
+}
+
+#ifdef LEGACY_BACKEND
+/*****************************************************************************
+ *
+ * Reload the value that was spilled from the given register (and free its
+ * spill descriptor while we're at it). Returns the new register (which will
+ * be a member of 'needReg' if that value is non-zero).
+ *
+ * 'willKeepNewReg' indicates if the caller intends to mark newReg as used.
+ * If not, then we can't unspill the other multi-used descriptor (if any).
+ * Instead, we will just hold on to the temp and unspill them
+ * again as needed.
+ */
+
+regNumber RegSet::rsUnspillOneReg(GenTreePtr tree, regNumber oldReg, KeepReg willKeepNewReg, regMaskTP needReg)
+{
+ /* Was oldReg multi-used when it was spilled? */
+
+ SpillDsc *prevDsc, *multiDsc;
+ SpillDsc* spillDsc = rsGetSpillInfo(tree, oldReg, &prevDsc, &multiDsc);
+ noway_assert((spillDsc != NULL) && (multiDsc != NULL));
+
+ bool multiUsed = multiDsc->spillMoreMultis;
+
+ /* We will use multiDsc to walk the rest of the spill list (if it's
+ multiUsed). As we're going to remove spillDsc from the multiDsc
+ list in the rsGetSpillTempWord() call we have to take care of the
+ case where multiDsc==spillDsc. We will set multiDsc as spillDsc->spillNext */
+ if (multiUsed && multiDsc == spillDsc)
+ {
+ assert(spillDsc->spillNext);
+ multiDsc = spillDsc->spillNext;
+ }
+
+ /* Get the temp and free the spill-descriptor */
+
+ TempDsc* temp = rsGetSpillTempWord(oldReg, spillDsc, prevDsc);
+
+ // Pick a new home for the value:
+ // This must be a register matching the 'needReg' mask, if it is non-zero.
+ // Additionally, if 'oldReg' is in 'needMask' and it is free we will select oldReg.
+ // Also note that the rsGrabReg() call below may cause the chosen register to be spilled.
+ //
+ regMaskTP prefMask;
+ regMaskTP freeMask;
+ regNumber newReg;
+ var_types regType;
+ var_types loadType;
+
+ bool floatUnspill = false;
+
+#if FEATURE_FP_REGALLOC
+ floatUnspill = genIsValidFloatReg(oldReg);
+#endif
+
+ if (floatUnspill)
+ {
+ if (temp->tdTempType() == TYP_DOUBLE)
+ regType = TYP_DOUBLE;
+ else
+ regType = TYP_FLOAT;
+ loadType = regType;
+ prefMask = genRegMaskFloat(oldReg, regType);
+ freeMask = RegFreeFloat();
+ }
+ else
+ {
+ regType = TYP_I_IMPL;
+ loadType = temp->tdTempType();
+ prefMask = genRegMask(oldReg);
+ freeMask = rsRegMaskFree();
+ }
+
+ if ((((prefMask & needReg) != 0) || (needReg == 0)) && ((prefMask & freeMask) != 0))
+ {
+ needReg = prefMask;
+ }
+
+ if (floatUnspill)
+ {
+ RegisterPreference pref(RBM_ALLFLOAT, needReg);
+ newReg = PickRegFloat(regType, &pref, true);
+ }
+ else
+ {
+ newReg = rsGrabReg(rsUseIfZero(needReg, RBM_ALLINT));
+ }
+
+ m_rsCompiler->codeGen->trashReg(newReg);
+
+ /* Reload the value from the saved location into the new register */
+
+ m_rsCompiler->codeGen->reloadReg(loadType, temp, newReg);
+
+ if (multiUsed && (willKeepNewReg == KEEP_REG))
+ {
+ /* We will unspill all the other multi-use trees if the register
+ is going to be marked as used. If it is not going to be marked
+ as used, we will have a problem if the new register gets spilled
+ again.
+ */
+
+ /* We don't do the extra unspilling for complex address modes,
+ since someone up the call chain may have a different idea about
+ what registers are used to form the complex address mode (the
+ addrReg return value from genMakeAddressable).
+
+ Also, it is not safe to unspill all the multi-uses with a TYP_LONG.
+
+ Finally, it is not safe to unspill into a different register, because
+ the caller of genMakeAddressable caches the addrReg return value
+ (register mask), but when unspilling into a different register it's
+ not possible to inform the caller that addrReg is now different.
+ See bug #89946 for an example of this. There is an assert for this
+ in rsMarkRegFree via genDoneAddressable.
+ */
+
+ for (SpillDsc* dsc = multiDsc; /**/; dsc = dsc->spillNext)
+ {
+ if ((oldReg != newReg) || (dsc->spillAddr != NULL) || (dsc->spillTree->gtType == TYP_LONG))
+ {
+ return newReg;
+ }
+
+ if (!dsc->spillMoreMultis)
+ {
+ /* All the remaining multi-uses are fine. We will now
+ unspill them all */
+ break;
+ }
+ }
+
+ bool bFound = false;
+ SpillDsc* pDsc;
+ SpillDsc** ppPrev;
+
+ for (pDsc = rsSpillDesc[oldReg], ppPrev = &rsSpillDesc[oldReg];; pDsc = pDsc->spillNext)
+ {
+ if (pDsc == multiDsc)
+ {
+ // We've found the sequence we were searching for
+ bFound = true;
+ }
+
+ if (bFound)
+ {
+ rsAddrUnspillOper(pDsc->spillAddr);
+
+ // Mark the tree node as having been unspilled into newReg
+ rsMarkUnspill(pDsc->spillTree, newReg);
+ }
+
+ if (!pDsc->spillMoreMultis)
+ {
+ if (bFound)
+ {
+ // End of sequence
+
+ // We link remaining sides of list
+ *ppPrev = pDsc->spillNext;
+
+ // Exit walk
+ break;
+ }
+ else
+ {
+ ppPrev = &(pDsc->spillNext);
+ }
+ }
+ }
+
+ /* pDsc points to the last multi-used descriptor from the spill-list
+ for the current value (pDsc->spillMoreMultis == false) */
+
+ pDsc->spillNext = rsMultiDesc[newReg];
+ rsMultiDesc[newReg] = multiDsc;
+
+ if (floatUnspill)
+ rsMaskMult |= genRegMaskFloat(newReg, regType);
+ else
+ rsMaskMult |= genRegMask(newReg);
+ }
+
+ /* Free the temp, it's no longer used */
+
+ m_rsCompiler->tmpRlsTemp(temp);
+
+ return newReg;
+}
+#endif // LEGACY_BACKEND
+
+//---------------------------------------------------------------------
+// rsUnspillInPlace: The given tree operand has been spilled; just mark
+// it as unspilled so that we can use it as "normal" local.
+//
+// Arguments:
+// tree - GenTree that needs to be marked as unspilled.
+// oldReg - reg of tree that was spilled.
+//
+// Return Value:
+// None.
+//
+// Assumptions:
+// 1. It is the responsibility of the caller to free the spill temp.
+// 2. RyuJIT backend specific: In case of multi-reg call node
+// GTF_SPILLED flag associated with reg is cleared. It is the
+// responsibility of caller to clear GTF_SPILLED flag on call node
+// itself after ensuring there are no outstanding regs in GTF_SPILLED
+// state.
+//
+TempDsc* RegSet::rsUnspillInPlace(GenTreePtr tree, regNumber oldReg, unsigned regIdx /* =0 */)
+{
+ assert(!isRegPairType(tree->gtType));
+
+ // Get the tree's SpillDsc
+ SpillDsc* prevDsc;
+ SpillDsc* spillDsc = rsGetSpillInfo(tree, oldReg, &prevDsc);
+ PREFIX_ASSUME(spillDsc != nullptr);
+
+ // Get the temp
+ TempDsc* temp = rsGetSpillTempWord(oldReg, spillDsc, prevDsc);
+
+ // The value is now unspilled
+ if (tree->IsMultiRegCall())
+ {
+ GenTreeCall* call = tree->AsCall();
+ unsigned flags = call->GetRegSpillFlagByIdx(regIdx);
+ flags &= ~GTF_SPILLED;
+ call->SetRegSpillFlagByIdx(flags, regIdx);
+ }
+ else
+ {
+ tree->gtFlags &= ~GTF_SPILLED;
+ }
+
+#ifdef DEBUG
+ if (m_rsCompiler->verbose)
+ {
+ printf("\t\t\t\t\t\t\tTree-Node marked unspilled from ");
+ Compiler::printTreeID(tree);
+ printf("\n");
+ }
+#endif
+
+ return temp;
+}
+
+#ifdef LEGACY_BACKEND
+
+/*****************************************************************************
+ *
+ * The given tree operand has been spilled; reload it into a register that
+ * is in 'needReg' (if 'needReg' is RBM_NONE, any register will do). If 'keepReg'
+ * is set to KEEP_REG, we'll mark the new register as used.
+ */
+
+void RegSet::rsUnspillReg(GenTreePtr tree, regMaskTP needReg, KeepReg keepReg)
+{
+ assert(!isRegPairType(tree->gtType)); // use rsUnspillRegPair()
+ regNumber oldReg = tree->gtRegNum;
+
+ /* Get the SpillDsc for the tree */
+
+ SpillDsc* spillDsc = rsGetSpillInfo(tree, oldReg);
+ PREFIX_ASSUME(spillDsc != NULL);
+
+ /* Before spillDsc is stomped on by rsUnspillOneReg(), note whether
+ * the reg was part of an address mode
+ */
+
+ GenTreePtr unspillAddr = spillDsc->spillAddr;
+
+ /* Pick a new home for the value */
+
+ regNumber newReg = rsUnspillOneReg(tree, oldReg, keepReg, needReg);
+
+ /* Mark the tree node as having been unspilled into newReg */
+
+ rsMarkUnspill(tree, newReg);
+
+ // If this reg was part of a complex address mode, need to clear this flag which
+ // tells address mode building that a component has been spilled
+
+ rsAddrUnspillOper(unspillAddr);
+
+#ifdef DEBUG
+ if (m_rsCompiler->verbose)
+ {
+ printf("\t\t\t\t\t\t\tThe register %s unspilled from ", m_rsCompiler->compRegVarName(newReg));
+ Compiler::printTreeID(tree);
+ printf("\n");
+ }
+#endif
+
+ /* Mark the new value as used, if the caller desires so */
+
+ if (keepReg == KEEP_REG)
+ rsMarkRegUsed(tree, unspillAddr);
+}
+#endif // LEGACY_BACKEND
+
+void RegSet::rsMarkSpill(GenTreePtr tree, regNumber reg)
+{
+ tree->gtFlags &= ~GTF_REG_VAL;
+ tree->gtFlags |= GTF_SPILLED;
+}
+
+#ifdef LEGACY_BACKEND
+
+void RegSet::rsMarkUnspill(GenTreePtr tree, regNumber reg)
+{
+#ifndef _TARGET_AMD64_
+ assert(tree->gtType != TYP_LONG);
+#endif // _TARGET_AMD64_
+
+ tree->gtFlags |= GTF_REG_VAL;
+ tree->gtFlags &= ~GTF_SPILLED;
+ tree->gtRegNum = reg;
+}
+
+/*****************************************************************************
+ *
+ * Choose a register pair from the given set (note: only registers in the
+ * given set will be considered).
+ */
+
+regPairNo RegSet::rsGrabRegPair(regMaskTP regMask)
+{
+ regPairNo regPair;
+ regMaskTP OKmask;
+ regNumber reg1;
+ regNumber reg2;
+
+ assert(regMask);
+ regMask &= ~rsMaskLock;
+ assert(regMask);
+
+ /* We'd prefer to choose a free register pair if possible */
+
+ OKmask = regMask & rsRegMaskFree();
+
+ /* Any takers in the recommended/free set? */
+
+ regPair = rsFindRegPairNo(OKmask);
+
+ if (regPair != REG_PAIR_NONE)
+ {
+ // The normal early exit
+
+ /* Keep track of which registers we ever touch */
+ rsSetRegsModified(genRegPairMask(regPair));
+
+ return regPair;
+ }
+
+ /* We have no choice but to spill one or two used regs */
+
+ if (OKmask)
+ {
+ /* One (and only one) register is free and acceptable - grab it */
+
+ assert(genMaxOneBit(OKmask));
+
+ for (reg1 = REG_INT_FIRST; reg1 <= REG_INT_LAST; reg1 = REG_NEXT(reg1))
+ {
+ if (OKmask & genRegMask(reg1))
+ break;
+ }
+ assert(OKmask & genRegMask(reg1));
+ }
+ else
+ {
+ /* No register is free and acceptable - we'll have to spill two */
+
+ reg1 = rsGrabReg(regMask);
+ }
+
+ /* Temporarily lock the first register so it doesn't go away */
+
+ rsLockReg(genRegMask(reg1));
+
+ /* Now grab another register */
+
+ reg2 = rsGrabReg(regMask);
+
+ /* We can unlock the first register now */
+
+ rsUnlockReg(genRegMask(reg1));
+
+ /* Convert the two register numbers into a pair */
+
+ if (reg1 < reg2)
+ regPair = gen2regs2pair(reg1, reg2);
+ else
+ regPair = gen2regs2pair(reg2, reg1);
+
+ return regPair;
+}
+
+/*****************************************************************************
+ *
+ * Choose a register pair from the given set (if non-zero) or from the set of
+ * currently available registers (if 'regMask' is zero).
+ */
+
+regPairNo RegSet::rsPickRegPair(regMaskTP regMask)
+{
+ regMaskTP OKmask;
+ regPairNo regPair;
+
+ int repeat = 0;
+
+ /* By default we'd prefer to accept all available registers */
+
+ OKmask = rsRegMaskFree();
+
+ if (regMask)
+ {
+ /* A register set was recommended by the caller */
+
+ OKmask &= regMask;
+ }
+
+AGAIN:
+
+ regPair = rsFindRegPairNo(OKmask);
+
+ if (regPair != REG_PAIR_NONE)
+ {
+ return regPair; // Normal early exit
+ }
+
+ regMaskTP freeMask;
+ regMaskTP spillMask;
+
+ /* Now let's consider all available registers */
+
+ freeMask = rsRegMaskFree();
+
+ /* Were we limited in our consideration? */
+
+ if (!regMask)
+ {
+ /* We need to spill two of the free registers */
+
+ spillMask = freeMask;
+ }
+ else
+ {
+ /* Did we not consider all free registers? */
+
+ if ((regMask & freeMask) != freeMask && repeat == 0)
+ {
+ /* The recommended regset didn't work, so try all available regs */
+
+ OKmask = freeMask;
+ repeat++;
+ goto AGAIN;
+ }
+
+ /* If we're going to spill, might as well go for the right one */
+
+ spillMask = regMask;
+ }
+
+ /* Make sure that we have at least two bits set */
+
+ if (genMaxOneBit(spillMask & rsRegMaskCanGrab()))
+ spillMask = rsRegMaskCanGrab();
+
+ assert(!genMaxOneBit(spillMask));
+
+ /* We have no choice but to spill 1/2 of the regs */
+
+ return rsGrabRegPair(spillMask);
+}
+
+/*****************************************************************************
+ *
+ * The given tree operand has been spilled; reload it into a register pair
+ * that is in 'needReg' (if 'needReg' is RBM_NONE, any register pair will do). If
+ * 'keepReg' is KEEP_REG, we'll mark the new register pair as used. It is
+ * assumed that the current register pair has been marked as used (modulo
+ * any spillage, of course).
+ */
+
+void RegSet::rsUnspillRegPair(GenTreePtr tree, regMaskTP needReg, KeepReg keepReg)
+{
+ assert(isRegPairType(tree->gtType));
+
+ regPairNo regPair = tree->gtRegPair;
+ regNumber regLo = genRegPairLo(regPair);
+ regNumber regHi = genRegPairHi(regPair);
+
+ /* Has the register holding the lower half been spilled? */
+
+ if (!rsIsTreeInReg(regLo, tree))
+ {
+ /* Is the upper half already in the right place? */
+
+ if (rsIsTreeInReg(regHi, tree))
+ {
+ /* Temporarily lock the high part */
+
+ rsLockUsedReg(genRegMask(regHi));
+
+ /* Pick a new home for the lower half */
+
+ regLo = rsUnspillOneReg(tree, regLo, keepReg, needReg);
+
+ /* We can unlock the high part now */
+
+ rsUnlockUsedReg(genRegMask(regHi));
+ }
+ else
+ {
+ /* Pick a new home for the lower half */
+
+ regLo = rsUnspillOneReg(tree, regLo, keepReg, needReg);
+ }
+ }
+ else
+ {
+ /* Free the register holding the lower half */
+
+ rsMarkRegFree(genRegMask(regLo));
+ }
+
+ if (regHi != REG_STK)
+ {
+ /* Has the register holding the upper half been spilled? */
+
+ if (!rsIsTreeInReg(regHi, tree))
+ {
+ regMaskTP regLoUsed;
+
+ /* Temporarily lock the low part so it doesnt get spilled */
+
+ rsLockReg(genRegMask(regLo), &regLoUsed);
+
+ /* Pick a new home for the upper half */
+
+ regHi = rsUnspillOneReg(tree, regHi, keepReg, needReg);
+
+ /* We can unlock the low register now */
+
+ rsUnlockReg(genRegMask(regLo), regLoUsed);
+ }
+ else
+ {
+ /* Free the register holding the upper half */
+
+ rsMarkRegFree(genRegMask(regHi));
+ }
+ }
+
+ /* The value is now residing in the new register */
+
+ tree->gtFlags |= GTF_REG_VAL;
+ tree->gtFlags &= ~GTF_SPILLED;
+ tree->gtRegPair = gen2regs2pair(regLo, regHi);
+
+ /* Mark the new value as used, if the caller desires so */
+
+ if (keepReg == KEEP_REG)
+ rsMarkRegPairUsed(tree);
+}
+
+/*****************************************************************************
+ *
+ * The given register is being used by multiple trees (all of which represent
+ * the same logical value). Happens mainly because of REDUNDANT_LOAD;
+ * We don't want to really spill the register as it actually holds the
+ * value we want. But the multiple trees may be part of different
+ * addressing modes.
+ * Save the previous 'use' info so that when we return the register will
+ * appear unused.
+ */
+
+void RegSet::rsRecMultiReg(regNumber reg, var_types type)
+{
+ SpillDsc* spill;
+ regMaskTP regMask;
+
+ if (genIsValidFloatReg(reg) && isFloatRegType(type))
+ regMask = genRegMaskFloat(reg, type);
+ else
+ regMask = genRegMask(reg);
+
+#ifdef DEBUG
+ if (m_rsCompiler->verbose)
+ {
+ printf("\t\t\t\t\t\t\tRegister %s multi-use inc for ", m_rsCompiler->compRegVarName(reg));
+ Compiler::printTreeID(rsUsedTree[reg]);
+ printf(" multMask=" REG_MASK_ALL_FMT "\n", rsMaskMult | regMask);
+ }
+#endif
+
+ /* The register is supposed to be already used */
+
+ assert(regMask & rsMaskUsed);
+
+ assert(rsUsedTree[reg]);
+
+ /* Allocate/reuse a spill descriptor */
+
+ spill = SpillDsc::alloc(m_rsCompiler, this, rsUsedTree[reg]->TypeGet());
+
+ /* Record the current 'use' info in the spill descriptor */
+
+ spill->spillTree = rsUsedTree[reg];
+ rsUsedTree[reg] = 0;
+ spill->spillAddr = rsUsedAddr[reg];
+ rsUsedAddr[reg] = 0;
+
+ /* Remember whether the register is already 'multi-use' */
+
+ spill->spillMoreMultis = ((rsMaskMult & regMask) != 0);
+
+ /* Insert the new multi-use record in the list for the register */
+
+ spill->spillNext = rsMultiDesc[reg];
+ rsMultiDesc[reg] = spill;
+
+ /* This register is now 'multi-use' */
+
+ rsMaskMult |= regMask;
+}
+
+/*****************************************************************************
+ *
+ * Free the given register, which is known to have multiple uses.
+ */
+
+var_types RegSet::rsRmvMultiReg(regNumber reg)
+{
+ SpillDsc* dsc;
+
+ assert(rsMaskMult & genRegMask(reg));
+
+#ifdef DEBUG
+ if (m_rsCompiler->verbose)
+ {
+ printf("\t\t\t\t\t\t\tRegister %s multi-use dec for ", m_rsCompiler->compRegVarName(reg));
+ Compiler::printTreeID(rsUsedTree[reg]);
+ printf(" multMask=" REG_MASK_ALL_FMT "\n", rsMaskMult);
+ }
+#endif
+
+ /* Get hold of the spill descriptor for the register */
+
+ dsc = rsMultiDesc[reg];
+ assert(dsc);
+ rsMultiDesc[reg] = dsc->spillNext;
+
+ /* Copy the previous 'use' info from the descriptor */
+
+ assert(reg != REG_SPBASE);
+ rsUsedTree[reg] = dsc->spillTree;
+ rsUsedAddr[reg] = dsc->spillAddr;
+
+ if (!(dsc->spillTree->gtFlags & GTF_SPILLED))
+ m_rsGCInfo.gcMarkRegPtrVal(reg, dsc->spillTree->TypeGet());
+
+ var_types type = dsc->spillTree->TypeGet();
+ regMaskTP regMask;
+
+ if (genIsValidFloatReg(reg) && isFloatRegType(type))
+ regMask = genRegMaskFloat(reg, type);
+ else
+ regMask = genRegMask(reg);
+
+ /* Is only one use of the register left? */
+
+ if (!dsc->spillMoreMultis)
+ {
+ rsMaskMult -= regMask;
+ }
+
+#ifdef DEBUG
+ if (m_rsCompiler->verbose)
+ {
+ printf("\t\t\t\t\t\t\tRegister %s multi-use dec - now ", m_rsCompiler->compRegVarName(reg));
+ Compiler::printTreeID(rsUsedTree[reg]);
+ printf(" multMask=" REG_MASK_ALL_FMT "\n", rsMaskMult);
+ }
+#endif
+
+ SpillDsc::freeDsc(this, dsc);
+ return type;
+}
+#endif // LEGACY_BACKEND
+
+/*****************************************************************************/
+#if REDUNDANT_LOAD
+/*****************************************************************************
+ *
+ * Search for a register which contains the given constant value.
+ * Return success/failure and set the register if success.
+ * If the closeDelta argument is non-NULL then look for a
+ * register that has a close constant value. For ARM, find
+ * the closest register value, independent of constant delta.
+ * For non-ARM, only consider values that are within -128..+127.
+ * If one is found, *closeDelta is set to the difference that needs
+ * to be added to the register returned. On x86/amd64, an lea instruction
+ * is used to set the target register using the register that
+ * contains the close integer constant.
+ */
+
+regNumber RegTracker::rsIconIsInReg(ssize_t val, ssize_t* closeDelta /* = NULL */)
+{
+ regNumber closeReg = REG_NA;
+
+ if (compiler->opts.MinOpts() || compiler->opts.compDbgCode)
+ {
+ return REG_NA;
+ }
+
+ for (regNumber reg = REG_INT_FIRST; reg <= REG_INT_LAST; reg = REG_NEXT(reg))
+ {
+ if (rsRegValues[reg].rvdKind == RV_INT_CNS)
+ {
+ ssize_t regCnsVal = rsRegValues[reg].rvdIntCnsVal;
+ if (regCnsVal == val)
+ {
+ if (closeDelta)
+ {
+ *closeDelta = 0;
+ }
+ return reg;
+ }
+ if (closeDelta)
+ {
+#ifdef _TARGET_ARM_
+ // Find the smallest delta; the caller checks the size
+ // TODO-CQ: find the smallest delta from a low register?
+ // That is, is it better to return a high register with a
+ // small constant delta, or a low register with
+ // a larger offset? It's better to have a low register with an offset within the low register
+ // range, or a high register otherwise...
+
+ ssize_t regCnsDelta = val - regCnsVal;
+ if ((closeReg == REG_NA) || (unsigned_abs(regCnsDelta) < unsigned_abs(*closeDelta)))
+ {
+ closeReg = reg;
+ *closeDelta = regCnsDelta;
+ }
+#else
+ if (closeReg == REG_NA)
+ {
+ ssize_t regCnsDelta = val - regCnsVal;
+ /* Does delta fit inside a byte [-128..127] */
+ if (regCnsDelta == (signed char)regCnsDelta)
+ {
+ closeReg = reg;
+ *closeDelta = (int)regCnsDelta;
+ }
+ }
+#endif
+ }
+ }
+ }
+
+ /* There was not an exact match */
+
+ return closeReg; /* will always be REG_NA when closeDelta is NULL */
+}
+
+/*****************************************************************************
+ *
+ * Assume all non-integer registers contain garbage (this is called when
+ * we encounter a code label that isn't jumped by any block; we need to
+ * clear pointer values out of the table lest the GC pointer tables get
+ * out of date).
+ */
+
+void RegTracker::rsTrackRegClrPtr()
+{
+ for (regNumber reg = REG_FIRST; reg < REG_COUNT; reg = REG_NEXT(reg))
+ {
+ /* Preserve constant values */
+
+ if (rsRegValues[reg].rvdKind == RV_INT_CNS)
+ {
+ /* Make sure we don't preserve NULL (it's a pointer) */
+
+ if (rsRegValues[reg].rvdIntCnsVal != NULL)
+ {
+ continue;
+ }
+ }
+
+ /* Preserve variables known to not be pointers */
+
+ if (rsRegValues[reg].rvdKind == RV_LCL_VAR)
+ {
+ if (!varTypeIsGC(compiler->lvaTable[rsRegValues[reg].rvdLclVarNum].TypeGet()))
+ {
+ continue;
+ }
+ }
+
+ rsRegValues[reg].rvdKind = RV_TRASH;
+ }
+}
+
+/*****************************************************************************
+ *
+ * This routine trashes the registers that hold stack GCRef/ByRef variables. (VSW: 561129)
+ * It should be called at each gc-safe point.
+ *
+ * It returns a mask of the registers that used to contain tracked stack variables that
+ * were trashed.
+ *
+ */
+
+regMaskTP RegTracker::rsTrashRegsForGCInterruptability()
+{
+ regMaskTP result = RBM_NONE;
+ for (regNumber reg = REG_FIRST; reg < REG_COUNT; reg = REG_NEXT(reg))
+ {
+ if (rsRegValues[reg].rvdKind == RV_LCL_VAR)
+ {
+ LclVarDsc* varDsc = &compiler->lvaTable[rsRegValues[reg].rvdLclVarNum];
+
+ if (!varTypeIsGC(varDsc->TypeGet()))
+ {
+ continue;
+ }
+
+ // Only stack locals got tracked.
+ assert(!varDsc->lvRegister);
+
+ rsRegValues[reg].rvdKind = RV_TRASH;
+
+ result |= genRegMask(reg);
+ }
+ }
+
+ return result;
+}
+
+/*****************************************************************************
+ *
+ * Search for a register which contains the given local var.
+ * Return success/failure and set the register if success.
+ * Return FALSE on register variables, because otherwise their lifetimes
+ * can get bungled with respect to pointer tracking.
+ */
+
+regNumber RegTracker::rsLclIsInReg(unsigned var)
+{
+ assert(var < compiler->lvaCount);
+
+ if (compiler->opts.MinOpts() || compiler->opts.compDbgCode)
+ {
+ return REG_NA;
+ }
+
+ /* return false if register var so genMarkLclVar can do its job */
+
+ if (compiler->lvaTable[var].lvRegister)
+ {
+ return REG_NA;
+ }
+
+ for (regNumber reg = REG_FIRST; reg < REG_COUNT; reg = REG_NEXT(reg))
+ {
+ if (rsRegValues[reg].rvdLclVarNum == var && rsRegValues[reg].rvdKind == RV_LCL_VAR)
+ {
+ return reg;
+ }
+ }
+
+ return REG_NA;
+}
+
+/*****************************************************************************/
+
+regPairNo RegTracker::rsLclIsInRegPair(unsigned var)
+{
+ assert(var < compiler->lvaCount);
+
+ if (compiler->opts.MinOpts() || compiler->opts.compDbgCode)
+ {
+ return REG_PAIR_NONE;
+ }
+
+ regValKind rvKind = RV_TRASH;
+ regNumber regNo = DUMMY_INIT(REG_NA);
+
+ for (regNumber reg = REG_FIRST; reg < REG_COUNT; reg = REG_NEXT(reg))
+ {
+ if (rvKind != rsRegValues[reg].rvdKind && rsTrackIsLclVarLng(rsRegValues[reg].rvdKind) &&
+ rsRegValues[reg].rvdLclVarNum == var)
+ {
+ /* first occurrence of this variable ? */
+
+ if (rvKind == RV_TRASH)
+ {
+ regNo = reg;
+ rvKind = rsRegValues[reg].rvdKind;
+ }
+ else if (rvKind == RV_LCL_VAR_LNG_HI)
+ {
+ /* We found the lower half of the long */
+
+ return gen2regs2pair(reg, regNo);
+ }
+ else
+ {
+ /* We found the upper half of the long */
+
+ assert(rvKind == RV_LCL_VAR_LNG_LO);
+ return gen2regs2pair(regNo, reg);
+ }
+ }
+ }
+
+ return REG_PAIR_NONE;
+}
+
+/*****************************************************************************/
+
+void RegTracker::rsTrashLclLong(unsigned var)
+{
+ if (compiler->opts.MinOpts() || compiler->opts.compDbgCode)
+ {
+ return;
+ }
+
+ for (regNumber reg = REG_FIRST; reg < REG_COUNT; reg = REG_NEXT(reg))
+ {
+ if (rsTrackIsLclVarLng(rsRegValues[reg].rvdKind) && rsRegValues[reg].rvdLclVarNum == var)
+ {
+ rsRegValues[reg].rvdKind = RV_TRASH;
+ }
+ }
+}
+
+/*****************************************************************************
+ *
+ * Local's value has changed, mark all regs which contained it as trash.
+ */
+
+void RegTracker::rsTrashLcl(unsigned var)
+{
+ if (compiler->opts.MinOpts() || compiler->opts.compDbgCode)
+ {
+ return;
+ }
+
+ for (regNumber reg = REG_FIRST; reg < REG_COUNT; reg = REG_NEXT(reg))
+ {
+ if (rsRegValues[reg].rvdKind == RV_LCL_VAR && rsRegValues[reg].rvdLclVarNum == var)
+ {
+ rsRegValues[reg].rvdKind = RV_TRASH;
+ }
+ }
+}
+
+/*****************************************************************************
+ *
+ * A little helper to trash the given set of registers.
+ * Usually used after a call has been generated.
+ */
+
+void RegTracker::rsTrashRegSet(regMaskTP regMask)
+{
+ if (compiler->opts.MinOpts() || compiler->opts.compDbgCode)
+ {
+ return;
+ }
+ regMaskTP regBit = 1;
+ for (regNumber regNum = REG_FIRST; regMask != 0; regNum = REG_NEXT(regNum), regBit <<= 1)
+ {
+ if (regBit & regMask)
+ {
+ rsTrackRegTrash(regNum);
+ regMask -= regBit;
+ }
+ }
+}
+
+/*****************************************************************************
+ *
+ * Return a mask of registers that hold no useful value.
+ */
+
+regMaskTP RegTracker::rsUselessRegs()
+{
+ if (compiler->opts.MinOpts() || compiler->opts.compDbgCode)
+ {
+ return RBM_ALLINT;
+ }
+
+ regMaskTP mask = RBM_NONE;
+ for (regNumber reg = REG_FIRST; reg < REG_COUNT; reg = REG_NEXT(reg))
+ {
+ if (rsRegValues[reg].rvdKind == RV_TRASH)
+ {
+ mask |= genRegMask(reg);
+ }
+ }
+
+ return mask;
+}
+
+/*****************************************************************************/
+#endif // REDUNDANT_LOAD
+/*****************************************************************************/
+
+/*
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX TempsInfo XX
+XX XX
+XX The temporary lclVars allocated by the compiler for code generation XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+void Compiler::tmpInit()
+{
+#ifdef LEGACY_BACKEND
+ tmpDoubleSpillMax = 0;
+ tmpIntSpillMax = 0;
+#endif // LEGACY_BACKEND
+
+ tmpCount = 0;
+ tmpSize = 0;
+#ifdef DEBUG
+ tmpGetCount = 0;
+#endif
+
+ memset(tmpFree, 0, sizeof(tmpFree));
+ memset(tmpUsed, 0, sizeof(tmpUsed));
+}
+
+/* static */
+var_types Compiler::tmpNormalizeType(var_types type)
+{
+#ifndef LEGACY_BACKEND
+
+ type = genActualType(type);
+
+#else // LEGACY_BACKEND
+ if (!varTypeIsGC(type))
+ {
+ switch (genTypeStSz(type))
+ {
+ case 1:
+ type = TYP_INT; // Maps all 4-byte non-GC types to TYP_INT temps
+ break;
+ case 2:
+ type = TYP_DOUBLE; // Maps all 8-byte types to TYP_DOUBLE temps
+ break;
+ default:
+ assert(!"unexpected type");
+ }
+ }
+#endif // LEGACY_BACKEND
+
+ return type;
+}
+
+/*****************************************************************************
+ *
+ * Allocate a temp of the given size (and type, if tracking pointers for
+ * the garbage collector).
+ */
+
+TempDsc* Compiler::tmpGetTemp(var_types type)
+{
+ type = tmpNormalizeType(type);
+ unsigned size = genTypeSize(type);
+
+ // If TYP_STRUCT ever gets in here we do bad things (tmpSlot returns -1)
+ noway_assert(size >= sizeof(int));
+
+ /* Find the slot to search for a free temp of the right size */
+
+ unsigned slot = tmpSlot(size);
+
+ /* Look for a temp with a matching type */
+
+ TempDsc** last = &tmpFree[slot];
+ TempDsc* temp;
+
+ for (temp = *last; temp; last = &temp->tdNext, temp = *last)
+ {
+ /* Does the type match? */
+
+ if (temp->tdTempType() == type)
+ {
+ /* We have a match -- remove it from the free list */
+
+ *last = temp->tdNext;
+ break;
+ }
+ }
+
+#ifdef DEBUG
+ /* Do we need to allocate a new temp */
+ bool isNewTemp = false;
+#endif // DEBUG
+
+#ifndef LEGACY_BACKEND
+
+ noway_assert(temp != nullptr);
+
+#else // LEGACY_BACKEND
+
+ if (temp == nullptr)
+ {
+#ifdef DEBUG
+ isNewTemp = true;
+#endif // DEBUG
+ tmpCount++;
+ tmpSize += (unsigned)size;
+
+#ifdef _TARGET_ARM_
+ if (type == TYP_DOUBLE)
+ {
+ // Adjust tmpSize in case it needs alignment
+ tmpSize += TARGET_POINTER_SIZE;
+ }
+#endif // _TARGET_ARM_
+
+ genEmitter->emitTmpSizeChanged(tmpSize);
+
+ temp = new (this, CMK_Unknown) TempDsc(-((int)tmpCount), size, type);
+ }
+
+#endif // LEGACY_BACKEND
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("%s temp #%u, slot %u, size = %u\n", isNewTemp ? "created" : "reused", -temp->tdTempNum(), slot,
+ temp->tdTempSize());
+ }
+ tmpGetCount++;
+#endif // DEBUG
+
+ temp->tdNext = tmpUsed[slot];
+ tmpUsed[slot] = temp;
+
+ return temp;
+}
+
+#ifndef LEGACY_BACKEND
+
+/*****************************************************************************
+ * Preallocate 'count' temps of type 'type'. This type must be a normalized
+ * type (by the definition of tmpNormalizeType()).
+ *
+ * This is used at the end of LSRA, which knows precisely the maximum concurrent
+ * number of each type of spill temp needed, before code generation. Code generation
+ * then uses these preallocated temp. If code generation ever asks for more than
+ * has been preallocated, it is a fatal error.
+ */
+
+void Compiler::tmpPreAllocateTemps(var_types type, unsigned count)
+{
+ assert(type == tmpNormalizeType(type));
+ unsigned size = genTypeSize(type);
+
+ // If TYP_STRUCT ever gets in here we do bad things (tmpSlot returns -1)
+ noway_assert(size >= sizeof(int));
+
+ // Find the slot to search for a free temp of the right size.
+ // Note that slots are shared by types of the identical size (e.g., TYP_REF and TYP_LONG on AMD64),
+ // so we can't assert that the slot is empty when we get here.
+
+ unsigned slot = tmpSlot(size);
+
+ for (unsigned i = 0; i < count; i++)
+ {
+ tmpCount++;
+ tmpSize += size;
+
+ TempDsc* temp = new (this, CMK_Unknown) TempDsc(-((int)tmpCount), size, type);
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("pre-allocated temp #%u, slot %u, size = %u\n", -temp->tdTempNum(), slot, temp->tdTempSize());
+ }
+#endif // DEBUG
+
+ // Add it to the front of the appropriate slot list.
+ temp->tdNext = tmpFree[slot];
+ tmpFree[slot] = temp;
+ }
+}
+
+#endif // !LEGACY_BACKEND
+
+/*****************************************************************************
+ *
+ * Release the given temp.
+ */
+
+void Compiler::tmpRlsTemp(TempDsc* temp)
+{
+ assert(temp != nullptr);
+
+ unsigned slot;
+
+ /* Add the temp to the 'free' list */
+
+ slot = tmpSlot(temp->tdTempSize());
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("release temp #%u, slot %u, size = %u\n", -temp->tdTempNum(), slot, temp->tdTempSize());
+ }
+ assert(tmpGetCount);
+ tmpGetCount--;
+#endif
+
+ // Remove it from the 'used' list.
+
+ TempDsc** last = &tmpUsed[slot];
+ TempDsc* t;
+ for (t = *last; t != nullptr; last = &t->tdNext, t = *last)
+ {
+ if (t == temp)
+ {
+ /* Found it! -- remove it from the 'used' list */
+
+ *last = t->tdNext;
+ break;
+ }
+ }
+ assert(t != nullptr); // We better have found it!
+
+ // Add it to the free list.
+
+ temp->tdNext = tmpFree[slot];
+ tmpFree[slot] = temp;
+}
+
+/*****************************************************************************
+ * Given a temp number, find the corresponding temp.
+ *
+ * When looking for temps on the "free" list, this can only be used after code generation. (This is
+ * simply because we have an assert to that effect in tmpListBeg(); we could relax that, or hoist
+ * the assert to the appropriate callers.)
+ *
+ * When looking for temps on the "used" list, this can be used any time.
+ */
+TempDsc* Compiler::tmpFindNum(int tnum, TEMP_USAGE_TYPE usageType /* = TEMP_USAGE_FREE */) const
+{
+ assert(tnum < 0); // temp numbers are negative
+
+ for (TempDsc* temp = tmpListBeg(usageType); temp != nullptr; temp = tmpListNxt(temp, usageType))
+ {
+ if (temp->tdTempNum() == tnum)
+ {
+ return temp;
+ }
+ }
+
+ return nullptr;
+}
+
+/*****************************************************************************
+ *
+ * A helper function is used to iterate over all the temps.
+ */
+
+TempDsc* Compiler::tmpListBeg(TEMP_USAGE_TYPE usageType /* = TEMP_USAGE_FREE */) const
+{
+ TempDsc* const* tmpLists;
+ if (usageType == TEMP_USAGE_FREE)
+ {
+ tmpLists = tmpFree;
+ }
+ else
+ {
+ tmpLists = tmpUsed;
+ }
+
+ // Return the first temp in the slot for the smallest size
+ unsigned slot = 0;
+ while (slot < (TEMP_SLOT_COUNT - 1) && tmpLists[slot] == nullptr)
+ {
+ slot++;
+ }
+ TempDsc* temp = tmpLists[slot];
+
+ return temp;
+}
+
+/*****************************************************************************
+ * Used with tmpListBeg() to iterate over the list of temps.
+ */
+
+TempDsc* Compiler::tmpListNxt(TempDsc* curTemp, TEMP_USAGE_TYPE usageType /* = TEMP_USAGE_FREE */) const
+{
+ assert(curTemp != nullptr);
+
+ TempDsc* temp = curTemp->tdNext;
+ if (temp == nullptr)
+ {
+ unsigned size = curTemp->tdTempSize();
+
+ // If there are no more temps in the list, check if there are more
+ // slots (for bigger sized temps) to walk.
+
+ TempDsc* const* tmpLists;
+ if (usageType == TEMP_USAGE_FREE)
+ {
+ tmpLists = tmpFree;
+ }
+ else
+ {
+ tmpLists = tmpUsed;
+ }
+
+ while (size < TEMP_MAX_SIZE && temp == nullptr)
+ {
+ size += sizeof(int);
+ unsigned slot = tmpSlot(size);
+ temp = tmpLists[slot];
+ }
+
+ assert((temp == nullptr) || (temp->tdTempSize() == size));
+ }
+
+ return temp;
+}
+
+#ifdef DEBUG
+/*****************************************************************************
+ * Return 'true' if all allocated temps are free (not in use).
+ */
+bool Compiler::tmpAllFree() const
+{
+ // The 'tmpGetCount' should equal the number of things in the 'tmpUsed' lists. This is a convenient place
+ // to assert that.
+ unsigned usedCount = 0;
+ for (TempDsc* temp = tmpListBeg(TEMP_USAGE_USED); temp != nullptr; temp = tmpListNxt(temp, TEMP_USAGE_USED))
+ {
+ ++usedCount;
+ }
+ assert(usedCount == tmpGetCount);
+
+ if (tmpGetCount != 0)
+ {
+ return false;
+ }
+
+ for (unsigned i = 0; i < sizeof(tmpUsed) / sizeof(tmpUsed[0]); i++)
+ {
+ if (tmpUsed[i] != nullptr)
+ {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+#endif // DEBUG
+
+/*
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX Register-related utility functions XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+/*****************************************************************************
+ *
+ * Returns whether regPair is a combination of two x86 registers or
+ * contains a pseudo register.
+ * In debug it also asserts that reg1 and reg2 are not the same.
+ */
+
+bool genIsProperRegPair(regPairNo regPair)
+{
+ regNumber rlo = genRegPairLo(regPair);
+ regNumber rhi = genRegPairHi(regPair);
+
+ assert(regPair >= REG_PAIR_FIRST && regPair <= REG_PAIR_LAST);
+
+ if (rlo == rhi)
+ {
+ return false;
+ }
+
+ if (rlo == REG_L_STK || rhi == REG_L_STK)
+ {
+ return false;
+ }
+
+ if (rlo >= REG_COUNT || rhi >= REG_COUNT)
+ {
+ return false;
+ }
+
+ return (rlo != REG_STK && rhi != REG_STK);
+}
+
+/*****************************************************************************
+ *
+ * Given a register that is an argument register
+ * returns the next argument register
+ *
+ * Note: that this method will return a non arg register
+ * when given REG_ARG_LAST
+ *
+ */
+
+regNumber genRegArgNext(regNumber argReg)
+{
+ regNumber result = REG_NA;
+
+ if (isValidFloatArgReg(argReg))
+ {
+ // We can iterate the floating point argument registers by using +1
+ result = REG_NEXT(argReg);
+ }
+ else
+ {
+ assert(isValidIntArgReg(argReg));
+
+#ifdef _TARGET_AMD64_
+#ifdef UNIX_AMD64_ABI
+ // Windows X64 ABI:
+ // REG_EDI, REG_ESI, REG_ECX, REG_EDX, REG_R8, REG_R9
+ //
+ if (argReg == REG_ARG_1) // REG_ESI
+ {
+ result = REG_ARG_2; // REG_ECX
+ }
+ else if (argReg == REG_ARG_3) // REG_EDX
+ {
+ result = REG_ARG_4; // REG_R8
+ }
+#else // Windows ABI
+ // Windows X64 ABI:
+ // REG_ECX, REG_EDX, REG_R8, REG_R9
+ //
+ if (argReg == REG_ARG_1) // REG_EDX
+ {
+ result = REG_ARG_2; // REG_R8
+ }
+#endif // UNIX or Windows ABI
+#endif // _TARGET_AMD64_
+
+ // If we didn't set 'result' to valid register above
+ // then we will just iterate 'argReg' using REG_NEXT
+ //
+ if (result == REG_NA)
+ {
+ // Otherwise we just iterate the argument registers by using REG_NEXT
+ result = REG_NEXT(argReg);
+ }
+ }
+
+ return result;
+}
+
+/*****************************************************************************
+ *
+ * The following table determines the order in which callee-saved registers
+ * are encoded in GC information at call sites (perhaps among other things).
+ * In any case, they establish a mapping from ordinal callee-save reg "indices" to
+ * register numbers and corresponding bitmaps.
+ */
+
+const regNumber raRegCalleeSaveOrder[] = {REG_CALLEE_SAVED_ORDER};
+const regMaskTP raRbmCalleeSaveOrder[] = {RBM_CALLEE_SAVED_ORDER};
+
+regMaskSmall genRegMaskFromCalleeSavedMask(unsigned short calleeSaveMask)
+{
+ regMaskSmall res = 0;
+ for (int i = 0; i < CNT_CALLEE_SAVED; i++)
+ {
+ if ((calleeSaveMask & ((regMaskTP)1 << i)) != 0)
+ {
+ res |= raRbmCalleeSaveOrder[i];
+ }
+ }
+ return res;
+}
+
+/*****************************************************************************
+ *
+ * Initializes the spill code. Should be called once per function compiled.
+ */
+
+// inline
+void RegSet::rsSpillInit()
+{
+ /* Clear out the spill and multi-use tables */
+
+ memset(rsSpillDesc, 0, sizeof(rsSpillDesc));
+
+#ifdef LEGACY_BACKEND
+ memset(rsUsedTree, 0, sizeof(rsUsedTree));
+ memset(rsUsedAddr, 0, sizeof(rsUsedAddr));
+ memset(rsMultiDesc, 0, sizeof(rsMultiDesc));
+ rsSpillFloat = nullptr;
+#endif // LEGACY_BACKEND
+
+ rsNeededSpillReg = false;
+
+ /* We don't have any descriptors allocated */
+
+ rsSpillFree = nullptr;
+}
+
+/*****************************************************************************
+ *
+ * Shuts down the spill code. Should be called once per function compiled.
+ */
+
+// inline
+void RegSet::rsSpillDone()
+{
+ rsSpillChk();
+}
+
+/*****************************************************************************
+ *
+ * Begin tracking spills - should be called each time before a pass is made
+ * over a function body.
+ */
+
+// inline
+void RegSet::rsSpillBeg()
+{
+ rsSpillChk();
+}
+
+/*****************************************************************************
+ *
+ * Finish tracking spills - should be called each time after a pass is made
+ * over a function body.
+ */
+
+// inline
+void RegSet::rsSpillEnd()
+{
+ rsSpillChk();
+}
+
+//****************************************************************************
+// Create a new SpillDsc or get one off the free list
+//
+
+// inline
+RegSet::SpillDsc* RegSet::SpillDsc::alloc(Compiler* pComp, RegSet* regSet, var_types type)
+{
+ RegSet::SpillDsc* spill;
+ RegSet::SpillDsc** pSpill;
+
+ pSpill = &(regSet->rsSpillFree);
+
+ // Allocate spill structure
+ if (*pSpill)
+ {
+ spill = *pSpill;
+ *pSpill = spill->spillNext;
+ }
+ else
+ {
+ spill = (RegSet::SpillDsc*)pComp->compGetMem(sizeof(SpillDsc));
+ }
+ return spill;
+}
+
+//****************************************************************************
+// Free a SpillDsc and return it to the rsSpillFree list
+//
+
+// inline
+void RegSet::SpillDsc::freeDsc(RegSet* regSet, RegSet::SpillDsc* spillDsc)
+{
+ spillDsc->spillNext = regSet->rsSpillFree;
+ regSet->rsSpillFree = spillDsc;
+}
+
+/*****************************************************************************
+ *
+ * Make sure no spills are currently active - used for debugging of the code
+ * generator.
+ */
+
+#ifdef DEBUG
+
+// inline
+void RegSet::rsSpillChk()
+{
+ // All grabbed temps should have been released
+ assert(m_rsCompiler->tmpGetCount == 0);
+
+ for (regNumber reg = REG_FIRST; reg < REG_COUNT; reg = REG_NEXT(reg))
+ {
+ assert(rsSpillDesc[reg] == nullptr);
+
+#ifdef LEGACY_BACKEND
+ assert(rsUsedTree[reg] == NULL);
+ assert(rsMultiDesc[reg] == NULL);
+#endif // LEGACY_BACKEND
+ }
+}
+
+#else
+
+// inline
+void RegSet::rsSpillChk()
+{
+}
+
+#endif
+
+/*****************************************************************************/
+#if REDUNDANT_LOAD
+
+// inline
+bool RegTracker::rsIconIsInReg(ssize_t val, regNumber reg)
+{
+ if (compiler->opts.MinOpts() || compiler->opts.compDbgCode)
+ {
+ return false;
+ }
+
+ if (rsRegValues[reg].rvdKind == RV_INT_CNS && rsRegValues[reg].rvdIntCnsVal == val)
+ {
+ return true;
+ }
+ return false;
+}
+
+#endif // REDUNDANT_LOAD
+/*****************************************************************************/
diff --git a/src/jit/regset.h b/src/jit/regset.h
new file mode 100644
index 0000000000..cdfbb1502a
--- /dev/null
+++ b/src/jit/regset.h
@@ -0,0 +1,460 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*****************************************************************************/
+
+#ifndef _REGSET_H
+#define _REGSET_H
+#include "vartype.h"
+#include "target.h"
+
+class LclVarDsc;
+class TempDsc;
+typedef struct GenTree* GenTreePtr;
+class Compiler;
+class CodeGen;
+class GCInfo;
+
+/*
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX RegSet XX
+XX XX
+XX Represents the register set, and their states during code generation XX
+XX Can select an unused register, keeps track of the contents of the XX
+XX registers, and can spill registers XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+/*****************************************************************************
+*
+* Keep track of the current state of each register. This is intended to be
+* used for things like register reload suppression, but for now the only
+* thing it does is note which registers we use in each method.
+*/
+
+enum regValKind
+{
+ RV_TRASH, // random unclassified garbage
+ RV_INT_CNS, // integer constant
+ RV_LCL_VAR, // local variable value
+ RV_LCL_VAR_LNG_LO, // lower half of long local variable
+ RV_LCL_VAR_LNG_HI,
+};
+
+/*****************************************************************************/
+
+class RegSet
+{
+ friend class CodeGen;
+ friend class CodeGenInterface;
+
+private:
+ Compiler* m_rsCompiler;
+ GCInfo& m_rsGCInfo;
+
+public:
+ RegSet(Compiler* compiler, GCInfo& gcInfo);
+
+#ifdef _TARGET_ARM_
+ regMaskTP rsMaskPreSpillRegs(bool includeAlignment)
+ {
+ return includeAlignment ? (rsMaskPreSpillRegArg | rsMaskPreSpillAlign) : rsMaskPreSpillRegArg;
+ }
+#endif // _TARGET_ARM_
+
+private:
+ // The same descriptor is also used for 'multi-use' register tracking, BTW.
+ struct SpillDsc
+ {
+ SpillDsc* spillNext; // next spilled value of same reg
+
+ union {
+ GenTreePtr spillTree; // the value that was spilled
+#ifdef LEGACY_BACKEND
+ LclVarDsc* spillVarDsc; // variable if it's an enregistered variable
+#endif // LEGACY_BACKEND
+ };
+
+ TempDsc* spillTemp; // the temp holding the spilled value
+
+#ifdef LEGACY_BACKEND
+ GenTreePtr spillAddr; // owning complex address mode or nullptr
+
+ union {
+ bool spillMoreMultis;
+ bool bEnregisteredVariable; // For FP. Indicates that what was spilled was
+ // an enregistered variable
+ };
+#endif // LEGACY_BACKEND
+
+ static SpillDsc* alloc(Compiler* pComp, RegSet* regSet, var_types type);
+ static void freeDsc(RegSet* regSet, SpillDsc* spillDsc);
+ };
+
+#ifdef LEGACY_BACKEND
+public:
+ regMaskTP rsUseIfZero(regMaskTP regs, regMaskTP includeHint);
+#endif // LEGACY_BACKEND
+
+//-------------------------------------------------------------------------
+//
+// Track the status of the registers
+//
+#ifdef LEGACY_BACKEND
+public: // TODO-Cleanup: Should be private, but Compiler uses it
+ GenTreePtr rsUsedTree[REG_COUNT]; // trees currently sitting in the registers
+private:
+ GenTreePtr rsUsedAddr[REG_COUNT]; // addr for which rsUsedTree[reg] is a part of the addressing mode
+ SpillDsc* rsMultiDesc[REG_COUNT]; // keeps track of 'multiple-use' registers.
+#endif // LEGACY_BACKEND
+
+private:
+ bool rsNeededSpillReg; // true if this method needed to spill any registers
+ regMaskTP rsModifiedRegsMask; // mask of the registers modified by the current function.
+
+#ifdef DEBUG
+ bool rsModifiedRegsMaskInitialized; // Has rsModifiedRegsMask been initialized? Guards against illegal use.
+#endif // DEBUG
+
+public:
+ regMaskTP rsGetModifiedRegsMask() const
+ {
+ assert(rsModifiedRegsMaskInitialized);
+ return rsModifiedRegsMask;
+ }
+
+ void rsClearRegsModified();
+
+ void rsSetRegsModified(regMaskTP mask DEBUGARG(bool suppressDump = false));
+
+ void rsRemoveRegsModified(regMaskTP mask);
+
+ bool rsRegsModified(regMaskTP mask) const
+ {
+ assert(rsModifiedRegsMaskInitialized);
+ return (rsModifiedRegsMask & mask) != 0;
+ }
+
+public: // TODO-Cleanup: Should be private, but GCInfo uses them
+#ifdef LEGACY_BACKEND
+ regMaskTP rsMaskUsed; // currently 'used' registers mask
+#endif // LEGACY_BACKEND
+
+ __declspec(property(get = GetMaskVars, put = SetMaskVars)) regMaskTP rsMaskVars; // mask of registers currently
+ // allocated to variables
+
+ regMaskTP GetMaskVars() const // 'get' property function for rsMaskVars property
+ {
+ return _rsMaskVars;
+ }
+
+ void SetMaskVars(regMaskTP newMaskVars); // 'put' property function for rsMaskVars property
+
+ void AddMaskVars(regMaskTP addMaskVars) // union 'addMaskVars' with the rsMaskVars set
+ {
+ SetMaskVars(_rsMaskVars | addMaskVars);
+ }
+
+ void RemoveMaskVars(regMaskTP removeMaskVars) // remove 'removeMaskVars' from the rsMaskVars set (like bitset DiffD)
+ {
+ SetMaskVars(_rsMaskVars & ~removeMaskVars);
+ }
+
+ void ClearMaskVars() // Like SetMaskVars(RBM_NONE), but without any debug output.
+ {
+ _rsMaskVars = RBM_NONE;
+ }
+
+private:
+ regMaskTP _rsMaskVars; // backing store for rsMaskVars property
+
+#ifdef LEGACY_BACKEND
+ regMaskTP rsMaskLock; // currently 'locked' registers mask
+ regMaskTP rsMaskMult; // currently 'multiply used' registers mask
+#endif // LEGACY_BACKEND
+
+#ifdef _TARGET_ARMARCH_
+ regMaskTP rsMaskCalleeSaved; // mask of the registers pushed/popped in the prolog/epilog
+#endif // _TARGET_ARM_
+
+public: // TODO-Cleanup: Should be private, but Compiler uses it
+ regMaskTP rsMaskResvd; // mask of the registers that are reserved for special purposes (typically empty)
+
+public: // The PreSpill masks are used in LclVars.cpp
+#ifdef _TARGET_ARM_
+ regMaskTP rsMaskPreSpillAlign; // Mask of alignment padding added to prespill to keep double aligned args
+ // at aligned stack addresses.
+ regMaskTP rsMaskPreSpillRegArg; // mask of incoming registers that are spilled at the start of the prolog
+ // This includes registers used to pass a struct (or part of a struct)
+ // and all enregistered user arguments in a varargs call
+#endif // _TARGET_ARM_
+
+#ifdef LEGACY_BACKEND
+
+private:
+ // These getters/setters are ifdef here so that the accesses to these values in sharedfloat.cpp are redirected
+ // to the appropriate value.
+ // With FEATURE_STACK_FP_X87 (x86 FP codegen) we have separate register mask that just handle FP registers.
+ // For all other platforms (and eventually on x86) we use unified register masks that handle both kinds.
+ //
+ regMaskTP rsGetMaskUsed(); // Getter for rsMaskUsed or rsMaskUsedFloat
+ regMaskTP rsGetMaskVars(); // Getter for rsMaskVars or rsMaskRegVarFloat
+ regMaskTP rsGetMaskLock(); // Getter for rsMaskLock or rsMaskLockedFloat
+ regMaskTP rsGetMaskMult(); // Getter for rsMaskMult or 0
+
+ void rsSetMaskUsed(regMaskTP maskUsed); // Setter for rsMaskUsed or rsMaskUsedFloat
+ void rsSetMaskVars(regMaskTP maskVars); // Setter for rsMaskVars or rsMaskRegVarFloat
+ void rsSetMaskLock(regMaskTP maskLock); // Setter for rsMaskLock or rsMaskLockedFloat
+
+ void rsSetUsedTree(regNumber regNum, GenTreePtr tree); // Setter for rsUsedTree[]/genUsedRegsFloat[]
+ void rsFreeUsedTree(regNumber regNum, GenTreePtr tree); // Free for rsUsedTree[]/genUsedRegsFloat[]
+
+public:
+ regPairNo rsFindRegPairNo(regMaskTP regMask);
+
+private:
+ bool rsIsTreeInReg(regNumber reg, GenTreePtr tree);
+
+ regMaskTP rsExcludeHint(regMaskTP regs, regMaskTP excludeHint);
+ regMaskTP rsNarrowHint(regMaskTP regs, regMaskTP narrowHint);
+ regMaskTP rsMustExclude(regMaskTP regs, regMaskTP exclude);
+ regMaskTP rsRegMaskFree();
+ regMaskTP rsRegMaskCanGrab();
+
+ void rsMarkRegUsed(GenTreePtr tree, GenTreePtr addr = 0);
+ // A special case of "rsMarkRegUsed": the register used is an argument register, used to hold part of
+ // the given argument node "promotedStructArg". (The name suggests that we're likely to use use this
+ // for register holding a promoted struct argument, but the implementation doesn't depend on that.) The
+ // "isGCRef" argument indicates whether the register contains a GC reference.
+ void rsMarkArgRegUsedByPromotedFieldArg(GenTreePtr promotedStructArg, regNumber regNum, bool isGCRef);
+
+ void rsMarkRegPairUsed(GenTreePtr tree);
+
+ void rsMarkRegFree(regMaskTP regMask);
+ void rsMarkRegFree(regNumber reg, GenTreePtr tree);
+ void rsMultRegFree(regMaskTP regMask);
+ unsigned rsFreeNeededRegCount(regMaskTP needReg);
+
+ void rsLockReg(regMaskTP regMask);
+ void rsUnlockReg(regMaskTP regMask);
+ void rsLockUsedReg(regMaskTP regMask);
+ void rsUnlockUsedReg(regMaskTP regMask);
+ void rsLockReg(regMaskTP regMask, regMaskTP* usedMask);
+ void rsUnlockReg(regMaskTP regMask, regMaskTP usedMask);
+
+ regMaskTP rsRegExclMask(regMaskTP regMask, regMaskTP rmvMask);
+
+ regNumber rsPickRegInTmpOrder(regMaskTP regMask);
+
+public: // used by emitter (!)
+ regNumber rsGrabReg(regMaskTP regMask);
+
+private:
+ regNumber rsPickReg(regMaskTP regMask = RBM_NONE, regMaskTP regBest = RBM_NONE);
+
+public: // used by emitter (!)
+ regNumber rsPickFreeReg(regMaskTP regMaskHint = RBM_ALLINT);
+
+private:
+ regPairNo rsGrabRegPair(regMaskTP regMask);
+ regPairNo rsPickRegPair(regMaskTP regMask);
+
+ class RegisterPreference
+ {
+ public:
+ regMaskTP ok;
+ regMaskTP best;
+ RegisterPreference(regMaskTP _ok, regMaskTP _best)
+ {
+ ok = _ok;
+ best = _best;
+ }
+ };
+ regNumber PickRegFloat(GenTreePtr tree,
+ var_types type = TYP_DOUBLE,
+ RegisterPreference* pref = NULL,
+ bool bUsed = true);
+ regNumber PickRegFloat(var_types type = TYP_DOUBLE, RegisterPreference* pref = NULL, bool bUsed = true);
+ regNumber PickRegFloatOtherThan(GenTreePtr tree, var_types type, regNumber reg);
+ regNumber PickRegFloatOtherThan(var_types type, regNumber reg);
+
+ regMaskTP RegFreeFloat();
+
+ void SetUsedRegFloat(GenTreePtr tree, bool bValue);
+ void SetLockedRegFloat(GenTreePtr tree, bool bValue);
+ bool IsLockedRegFloat(GenTreePtr tree);
+
+ var_types rsRmvMultiReg(regNumber reg);
+ void rsRecMultiReg(regNumber reg, var_types type);
+#endif // LEGACY_BACKEND
+
+public:
+#ifdef DEBUG
+ /*****************************************************************************
+ * Should we stress register tracking logic ?
+ * This is set via COMPlus_JitStressRegs.
+ * The following values are ordered, such that any value greater than RS_xx
+ * implies RS_xx.
+ * LSRA defines a different set of values, but uses the same COMPlus_JitStressRegs
+ * value, with the same notion of relative ordering.
+ * 1 = rsPickReg() picks 'bad' registers.
+ * 2 = codegen spills at safe points. This is still flaky
+ */
+ enum rsStressRegsType
+ {
+ RS_STRESS_NONE = 0,
+ RS_PICK_BAD_REG = 01,
+ RS_SPILL_SAFE = 02,
+ };
+ rsStressRegsType rsStressRegs();
+#endif // DEBUG
+
+private:
+ //-------------------------------------------------------------------------
+ //
+ // The following tables keep track of spilled register values.
+ //
+
+ // When a register gets spilled, the old information is stored here
+ SpillDsc* rsSpillDesc[REG_COUNT];
+ SpillDsc* rsSpillFree; // list of unused spill descriptors
+
+#ifdef LEGACY_BACKEND
+ SpillDsc* rsSpillFloat;
+#endif // LEGACY_BACKEND
+
+ void rsSpillChk();
+ void rsSpillInit();
+ void rsSpillDone();
+ void rsSpillBeg();
+ void rsSpillEnd();
+
+ void rsSpillTree(regNumber reg, GenTreePtr tree, unsigned regIdx = 0);
+
+#if defined(_TARGET_X86_) && !FEATURE_STACK_FP_X87
+ void rsSpillFPStack(GenTreePtr tree);
+#endif // defined(_TARGET_X86_) && !FEATURE_STACK_FP_X87
+
+#ifdef LEGACY_BACKEND
+ void rsSpillReg(regNumber reg);
+ void rsSpillRegIfUsed(regNumber reg);
+ void rsSpillRegs(regMaskTP regMask);
+#endif // LEGACY_BACKEND
+
+ SpillDsc* rsGetSpillInfo(GenTreePtr tree,
+ regNumber reg,
+ SpillDsc** pPrevDsc = nullptr
+#ifdef LEGACY_BACKEND
+ ,
+ SpillDsc** pMultiDsc = NULL
+#endif // LEGACY_BACKEND
+ );
+
+ TempDsc* rsGetSpillTempWord(regNumber oldReg, SpillDsc* dsc, SpillDsc* prevDsc);
+
+#ifdef LEGACY_BACKEND
+ enum ExactReg
+ {
+ ANY_REG,
+ EXACT_REG
+ };
+ enum KeepReg
+ {
+ FREE_REG,
+ KEEP_REG
+ };
+
+ regNumber rsUnspillOneReg(GenTreePtr tree, regNumber oldReg, KeepReg willKeepNewReg, regMaskTP needReg);
+#endif // LEGACY_BACKEND
+
+ TempDsc* rsUnspillInPlace(GenTreePtr tree, regNumber oldReg, unsigned regIdx = 0);
+
+#ifdef LEGACY_BACKEND
+ void rsUnspillReg(GenTreePtr tree, regMaskTP needReg, KeepReg keepReg);
+
+ void rsUnspillRegPair(GenTreePtr tree, regMaskTP needReg, KeepReg keepReg);
+#endif // LEGACY_BACKEND
+
+ void rsMarkSpill(GenTreePtr tree, regNumber reg);
+
+#ifdef LEGACY_BACKEND
+ void rsMarkUnspill(GenTreePtr tree, regNumber reg);
+#endif // LEGACY_BACKEND
+
+#if FEATURE_STACK_FP_X87
+ regMaskTP rsMaskUsedFloat;
+ regMaskTP rsMaskRegVarFloat;
+ regMaskTP rsMaskLockedFloat;
+ GenTreePtr genUsedRegsFloat[REG_FPCOUNT];
+ LclVarDsc* genRegVarsFloat[REG_FPCOUNT];
+#endif // FEATURE_STACK_FP_X87
+};
+
+//-------------------------------------------------------------------------
+//
+// These are used to track the contents of the registers during
+// code generation.
+//
+// Only integer registers are tracked.
+//
+
+struct RegValDsc
+{
+ regValKind rvdKind;
+ union {
+ ssize_t rvdIntCnsVal; // for rvdKind == RV_INT_CNS
+ unsigned rvdLclVarNum; // for rvdKind == RV_LCL_VAR, RV_LCL_VAR_LNG_LO, RV_LCL_VAR_LNG_HI
+ };
+};
+
+class RegTracker
+{
+ Compiler* compiler;
+ RegSet* regSet;
+ RegValDsc rsRegValues[REG_COUNT];
+
+public:
+ void rsTrackInit(Compiler* comp, RegSet* rs)
+ {
+ compiler = comp;
+ regSet = rs;
+ rsTrackRegClr();
+ }
+
+ void rsTrackRegClr();
+ void rsTrackRegClrPtr();
+ void rsTrackRegTrash(regNumber reg);
+ void rsTrackRegMaskTrash(regMaskTP regMask);
+ regMaskTP rsTrashRegsForGCInterruptability();
+ void rsTrackRegIntCns(regNumber reg, ssize_t val);
+ void rsTrackRegLclVar(regNumber reg, unsigned var);
+ void rsTrackRegLclVarLng(regNumber reg, unsigned var, bool low);
+ bool rsTrackIsLclVarLng(regValKind rvKind);
+ void rsTrackRegClsVar(regNumber reg, GenTreePtr clsVar);
+ void rsTrackRegCopy(regNumber reg1, regNumber reg2);
+ void rsTrackRegSwap(regNumber reg1, regNumber reg2);
+ void rsTrackRegAssign(GenTree* op1, GenTree* op2);
+
+ regNumber rsIconIsInReg(ssize_t val, ssize_t* closeDelta = nullptr);
+ bool rsIconIsInReg(ssize_t val, regNumber reg);
+ regNumber rsLclIsInReg(unsigned var);
+ regPairNo rsLclIsInRegPair(unsigned var);
+
+//---------------------- Load suppression ---------------------------------
+
+#if REDUNDANT_LOAD
+
+ void rsTrashLclLong(unsigned var);
+ void rsTrashLcl(unsigned var);
+ void rsTrashRegSet(regMaskTP regMask);
+
+ regMaskTP rsUselessRegs();
+
+#endif // REDUNDANT_LOAD
+};
+#endif // _REGSET_H
diff --git a/src/jit/scopeinfo.cpp b/src/jit/scopeinfo.cpp
new file mode 100644
index 0000000000..f2a7902317
--- /dev/null
+++ b/src/jit/scopeinfo.cpp
@@ -0,0 +1,1271 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX ScopeInfo XX
+XX XX
+XX Classes to gather the Scope information from the local variable info. XX
+XX Translates the given LocalVarTab from IL instruction offsets into XX
+XX native code offsets. XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+/******************************************************************************
+ * Debuggable code
+ *
+ * We break up blocks at the start and end IL ranges of the local variables.
+ * This is because IL offsets do not correspond exactly to native offsets
+ * except at block boundaries. No basic-blocks are deleted (not even
+ * unreachable), so there will not be any missing address-ranges, though the
+ * blocks themselves may not be ordered. (Also, internal blocks may be added).
+ * o At the start of each basic block, siBeginBlock() checks if any variables
+ * are coming in scope, and adds an open scope to siOpenScopeList if needed.
+ * o At the end of each basic block, siEndBlock() checks if any variables
+ * are going out of scope and moves the open scope from siOpenScopeLast
+ * to siScopeList.
+ *
+ * Optimized code
+ *
+ * We cannot break up the blocks as this will produce different code under
+ * the debugger. Instead we try to do a best effort.
+ * o At the start of each basic block, siBeginBlock() adds open scopes
+ * corresponding to block->bbLiveIn to siOpenScopeList. Also siUpdate()
+ * is called to close scopes for variables which are not live anymore.
+ * o siEndBlock() closes scopes for any variables which go out of range
+ * before bbCodeOffsEnd.
+ * o siCloseAllOpenScopes() closes any open scopes after all the blocks.
+ * This should only be needed if some basic block are deleted/out of order,
+ * etc.
+ * Also,
+ * o At every assignment to a variable, siCheckVarScope() adds an open scope
+ * for the variable being assigned to.
+ * o genChangeLife() calls siUpdate() which closes scopes for variables which
+ * are not live anymore.
+ *
+ ******************************************************************************
+ */
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+#include "emit.h"
+#include "codegen.h"
+
+/*****************************************************************************/
+#ifdef DEBUGGING_SUPPORT
+/*****************************************************************************/
+
+bool Compiler::siVarLoc::vlIsInReg(regNumber reg)
+{
+ switch (vlType)
+ {
+ case VLT_REG:
+ return (vlReg.vlrReg == reg);
+ case VLT_REG_REG:
+ return ((vlRegReg.vlrrReg1 == reg) || (vlRegReg.vlrrReg2 == reg));
+ case VLT_REG_STK:
+ return (vlRegStk.vlrsReg == reg);
+ case VLT_STK_REG:
+ return (vlStkReg.vlsrReg == reg);
+
+ case VLT_STK:
+ case VLT_STK2:
+ case VLT_FPSTK:
+ return false;
+
+ default:
+ assert(!"Bad locType");
+ return false;
+ }
+}
+
+bool Compiler::siVarLoc::vlIsOnStk(regNumber reg, signed offset)
+{
+ regNumber actualReg;
+
+ switch (vlType)
+ {
+
+ case VLT_REG_STK:
+ actualReg = vlRegStk.vlrsStk.vlrssBaseReg;
+ if ((int)actualReg == (int)ICorDebugInfo::REGNUM_AMBIENT_SP)
+ {
+ actualReg = REG_SPBASE;
+ }
+ return ((actualReg == reg) && (vlRegStk.vlrsStk.vlrssOffset == offset));
+ case VLT_STK_REG:
+ actualReg = vlStkReg.vlsrStk.vlsrsBaseReg;
+ if ((int)actualReg == (int)ICorDebugInfo::REGNUM_AMBIENT_SP)
+ {
+ actualReg = REG_SPBASE;
+ }
+ return ((actualReg == reg) && (vlStkReg.vlsrStk.vlsrsOffset == offset));
+ case VLT_STK:
+ actualReg = vlStk.vlsBaseReg;
+ if ((int)actualReg == (int)ICorDebugInfo::REGNUM_AMBIENT_SP)
+ {
+ actualReg = REG_SPBASE;
+ }
+ return ((actualReg == reg) && (vlStk.vlsOffset == offset));
+ case VLT_STK2:
+ actualReg = vlStk2.vls2BaseReg;
+ if ((int)actualReg == (int)ICorDebugInfo::REGNUM_AMBIENT_SP)
+ {
+ actualReg = REG_SPBASE;
+ }
+ return ((actualReg == reg) && ((vlStk2.vls2Offset == offset) || (vlStk2.vls2Offset == (offset - 4))));
+
+ case VLT_REG:
+ case VLT_REG_FP:
+ case VLT_REG_REG:
+ case VLT_FPSTK:
+ return false;
+
+ default:
+ assert(!"Bad locType");
+ return false;
+ }
+}
+
+/*============================================================================
+ *
+ * Implementation for ScopeInfo
+ *
+ *
+ * Whenever a variable comes into scope, add it to the list.
+ * When a varDsc goes dead, end its previous scope entry, and make a new one
+ * which is unavailable.
+ * When a varDsc goes live, end its previous un-available entry (if any) and
+ * set its new entry as available.
+ *
+ *============================================================================
+ */
+
+/*****************************************************************************
+ * siNewScope
+ *
+ * Creates a new scope and adds it to the Open scope list.
+ */
+
+CodeGen::siScope* CodeGen::siNewScope(unsigned LVnum, unsigned varNum)
+{
+ bool tracked = compiler->lvaTable[varNum].lvTracked;
+ unsigned varIndex = compiler->lvaTable[varNum].lvVarIndex;
+
+ if (tracked)
+ {
+ siEndTrackedScope(varIndex);
+ }
+
+ siScope* newScope = (siScope*)compiler->compGetMem(sizeof(*newScope), CMK_SiScope);
+
+ newScope->scStartLoc.CaptureLocation(getEmitter());
+ assert(newScope->scStartLoc.Valid());
+
+ newScope->scEndLoc.Init();
+
+ newScope->scLVnum = LVnum;
+ newScope->scVarNum = varNum;
+ newScope->scNext = nullptr;
+ newScope->scStackLevel = genStackLevel; // used only by stack vars
+
+ siOpenScopeLast->scNext = newScope;
+ newScope->scPrev = siOpenScopeLast;
+ siOpenScopeLast = newScope;
+
+ if (tracked)
+ {
+ siLatestTrackedScopes[varIndex] = newScope;
+ }
+
+ return newScope;
+}
+
+/*****************************************************************************
+ * siRemoveFromOpenScopeList
+ *
+ * Removes a scope from the open-scope list and puts it into the done-scope list
+ */
+
+void CodeGen::siRemoveFromOpenScopeList(CodeGen::siScope* scope)
+{
+ assert(scope);
+ assert(scope->scEndLoc.Valid());
+
+ // Remove from open-scope list
+
+ scope->scPrev->scNext = scope->scNext;
+ if (scope->scNext)
+ {
+ scope->scNext->scPrev = scope->scPrev;
+ }
+ else
+ {
+ siOpenScopeLast = scope->scPrev;
+ }
+
+ // Add to the finished scope list. (Try to) filter out scopes of length 0.
+
+ if (scope->scStartLoc != scope->scEndLoc)
+ {
+ siScopeLast->scNext = scope;
+ siScopeLast = scope;
+ siScopeCnt++;
+ }
+}
+
+/*----------------------------------------------------------------------------
+ * These functions end scopes given different types of parameters
+ *----------------------------------------------------------------------------
+ */
+
+/*****************************************************************************
+ * For tracked vars, we don't need to search for the scope in the list as we
+ * have a pointer to the open scopes of all tracked variables.
+ */
+
+void CodeGen::siEndTrackedScope(unsigned varIndex)
+{
+ siScope* scope = siLatestTrackedScopes[varIndex];
+ if (!scope)
+ {
+ return;
+ }
+
+ scope->scEndLoc.CaptureLocation(getEmitter());
+ assert(scope->scEndLoc.Valid());
+
+ siRemoveFromOpenScopeList(scope);
+
+ siLatestTrackedScopes[varIndex] = nullptr;
+}
+
+/*****************************************************************************
+ * If we don't know that the variable is tracked, this function handles both
+ * cases.
+ */
+
+void CodeGen::siEndScope(unsigned varNum)
+{
+ for (siScope* scope = siOpenScopeList.scNext; scope; scope = scope->scNext)
+ {
+ if (scope->scVarNum == varNum)
+ {
+ siEndScope(scope);
+ return;
+ }
+ }
+
+ // At this point, we probably have a bad LocalVarTab
+
+ if (compiler->opts.compDbgCode)
+ {
+ // LocalVarTab is good?? If we reached here implies that we are in a
+ // bad state, so pretend that we don't have any scope info.
+ assert(!siVerifyLocalVarTab());
+
+ compiler->opts.compScopeInfo = false;
+ }
+}
+
+/*****************************************************************************
+ * If we have a handle to the siScope structure, we handle ending this scope
+ * differently than if we just had a variable number. This saves us searching
+ * the open-scope list again.
+ */
+
+void CodeGen::siEndScope(siScope* scope)
+{
+ scope->scEndLoc.CaptureLocation(getEmitter());
+ assert(scope->scEndLoc.Valid());
+
+ siRemoveFromOpenScopeList(scope);
+
+ LclVarDsc& lclVarDsc1 = compiler->lvaTable[scope->scVarNum];
+ if (lclVarDsc1.lvTracked)
+ {
+ siLatestTrackedScopes[lclVarDsc1.lvVarIndex] = nullptr;
+ }
+}
+
+/*****************************************************************************
+ * siVerifyLocalVarTab
+ *
+ * Checks the LocalVarTab for consistency. The VM may not have properly
+ * verified the LocalVariableTable.
+ */
+
+#ifdef DEBUG
+
+bool CodeGen::siVerifyLocalVarTab()
+{
+ // No entries with overlapping lives should have the same slot.
+
+ for (unsigned i = 0; i < compiler->info.compVarScopesCount; i++)
+ {
+ for (unsigned j = i + 1; j < compiler->info.compVarScopesCount; j++)
+ {
+ unsigned slot1 = compiler->info.compVarScopes[i].vsdVarNum;
+ unsigned beg1 = compiler->info.compVarScopes[i].vsdLifeBeg;
+ unsigned end1 = compiler->info.compVarScopes[i].vsdLifeEnd;
+
+ unsigned slot2 = compiler->info.compVarScopes[j].vsdVarNum;
+ unsigned beg2 = compiler->info.compVarScopes[j].vsdLifeBeg;
+ unsigned end2 = compiler->info.compVarScopes[j].vsdLifeEnd;
+
+ if (slot1 == slot2 && (end1 > beg2 && beg1 < end2))
+ {
+ return false;
+ }
+ }
+ }
+
+ return true;
+}
+
+#endif
+
+/*============================================================================
+ * INTERFACE (public) Functions for ScopeInfo
+ *============================================================================
+ */
+
+void CodeGen::siInit()
+{
+#ifdef _TARGET_X86_
+ assert((unsigned)ICorDebugInfo::REGNUM_EAX == REG_EAX);
+ assert((unsigned)ICorDebugInfo::REGNUM_ECX == REG_ECX);
+ assert((unsigned)ICorDebugInfo::REGNUM_EDX == REG_EDX);
+ assert((unsigned)ICorDebugInfo::REGNUM_EBX == REG_EBX);
+ assert((unsigned)ICorDebugInfo::REGNUM_ESP == REG_ESP);
+ assert((unsigned)ICorDebugInfo::REGNUM_EBP == REG_EBP);
+ assert((unsigned)ICorDebugInfo::REGNUM_ESI == REG_ESI);
+ assert((unsigned)ICorDebugInfo::REGNUM_EDI == REG_EDI);
+#endif
+
+ assert((unsigned)ICorDebugInfo::VLT_REG == Compiler::VLT_REG);
+ assert((unsigned)ICorDebugInfo::VLT_STK == Compiler::VLT_STK);
+ assert((unsigned)ICorDebugInfo::VLT_REG_REG == Compiler::VLT_REG_REG);
+ assert((unsigned)ICorDebugInfo::VLT_REG_STK == Compiler::VLT_REG_STK);
+ assert((unsigned)ICorDebugInfo::VLT_STK_REG == Compiler::VLT_STK_REG);
+ assert((unsigned)ICorDebugInfo::VLT_STK2 == Compiler::VLT_STK2);
+ assert((unsigned)ICorDebugInfo::VLT_FPSTK == Compiler::VLT_FPSTK);
+ assert((unsigned)ICorDebugInfo::VLT_FIXED_VA == Compiler::VLT_FIXED_VA);
+ assert((unsigned)ICorDebugInfo::VLT_COUNT == Compiler::VLT_COUNT);
+ assert((unsigned)ICorDebugInfo::VLT_INVALID == Compiler::VLT_INVALID);
+
+ /* ICorDebugInfo::VarLoc and siVarLoc should overlap exactly as we cast
+ * one to the other in eeSetLVinfo()
+ * Below is a "required but not sufficient" condition
+ */
+
+ assert(sizeof(ICorDebugInfo::VarLoc) == sizeof(Compiler::siVarLoc));
+
+ assert(compiler->opts.compScopeInfo);
+
+ siOpenScopeList.scNext = nullptr;
+ siOpenScopeLast = &siOpenScopeList;
+ siScopeLast = &siScopeList;
+
+ siScopeCnt = 0;
+
+ VarSetOps::AssignNoCopy(compiler, siLastLife, VarSetOps::MakeEmpty(compiler));
+ siLastEndOffs = 0;
+
+ if (compiler->info.compVarScopesCount == 0)
+ {
+ return;
+ }
+
+#if FEATURE_EH_FUNCLETS
+ siInFuncletRegion = false;
+#endif // FEATURE_EH_FUNCLETS
+
+ for (unsigned i = 0; i < lclMAX_TRACKED; i++)
+ {
+ siLatestTrackedScopes[i] = nullptr;
+ }
+
+ compiler->compResetScopeLists();
+}
+
+/*****************************************************************************
+ * siBeginBlock
+ *
+ * Called at the beginning of code-gen for a block. Checks if any scopes
+ * need to be opened.
+ */
+
+void CodeGen::siBeginBlock(BasicBlock* block)
+{
+ assert(block != nullptr);
+
+ if (!compiler->opts.compScopeInfo)
+ {
+ return;
+ }
+
+ if (compiler->info.compVarScopesCount == 0)
+ {
+ return;
+ }
+
+#if FEATURE_EH_FUNCLETS
+ if (siInFuncletRegion)
+ {
+ return;
+ }
+
+ if (block->bbFlags & BBF_FUNCLET_BEG)
+ {
+ // For now, don't report any scopes in funclets. JIT64 doesn't.
+ siInFuncletRegion = true;
+
+ JITDUMP("Scope info: found beginning of funclet region at block BB%02u; ignoring following blocks\n",
+ block->bbNum);
+
+ return;
+ }
+#endif // FEATURE_EH_FUNCLETS
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nScope info: begin block BB%02u, IL range ", block->bbNum);
+ block->dspBlockILRange();
+ printf("\n");
+ }
+#endif // DEBUG
+
+ unsigned beginOffs = block->bbCodeOffs;
+
+ if (beginOffs == BAD_IL_OFFSET)
+ {
+ JITDUMP("Scope info: ignoring block beginning\n");
+ return;
+ }
+
+ if (!compiler->opts.compDbgCode)
+ {
+ /* For non-debuggable code */
+
+ // End scope of variables which are not live for this block
+
+ siUpdate();
+
+ // Check that vars which are live on entry have an open scope
+
+ VARSET_ITER_INIT(compiler, iter, block->bbLiveIn, i);
+ while (iter.NextElem(compiler, &i))
+ {
+ unsigned varNum = compiler->lvaTrackedToVarNum[i];
+ // lvRefCnt may go down to 0 after liveness-analysis.
+ // So we need to check if this tracked variable is actually used.
+ if (!compiler->lvaTable[varNum].lvIsInReg() && !compiler->lvaTable[varNum].lvOnFrame)
+ {
+ assert(compiler->lvaTable[varNum].lvRefCnt == 0);
+ continue;
+ }
+
+ siCheckVarScope(varNum, beginOffs);
+ }
+ }
+ else
+ {
+ // For debuggable code, scopes can begin only on block boundaries.
+ // Check if there are any scopes on the current block's start boundary.
+
+ VarScopeDsc* varScope;
+
+#if FEATURE_EH_FUNCLETS
+
+ // If we find a spot where the code offset isn't what we expect, because
+ // there is a gap, it might be because we've moved the funclets out of
+ // line. Catch up with the enter and exit scopes of the current block.
+ // Ignore the enter/exit scope changes of the missing scopes, which for
+ // funclets must be matched.
+
+ if (siLastEndOffs != beginOffs)
+ {
+ assert(beginOffs > 0);
+ assert(siLastEndOffs < beginOffs);
+
+ JITDUMP("Scope info: found offset hole. lastOffs=%u, currOffs=%u\n", siLastEndOffs, beginOffs);
+
+ // Skip enter scopes
+ while ((varScope = compiler->compGetNextEnterScope(beginOffs - 1, true)) != nullptr)
+ {
+ /* do nothing */
+ JITDUMP("Scope info: skipping enter scope, LVnum=%u\n", varScope->vsdLVnum);
+ }
+
+ // Skip exit scopes
+ while ((varScope = compiler->compGetNextExitScope(beginOffs - 1, true)) != nullptr)
+ {
+ /* do nothing */
+ JITDUMP("Scope info: skipping exit scope, LVnum=%u\n", varScope->vsdLVnum);
+ }
+ }
+
+#else // FEATURE_EH_FUNCLETS
+
+ if (siLastEndOffs != beginOffs)
+ {
+ assert(siLastEndOffs < beginOffs);
+ return;
+ }
+
+#endif // FEATURE_EH_FUNCLETS
+
+ while ((varScope = compiler->compGetNextEnterScope(beginOffs)) != nullptr)
+ {
+ // brace-matching editor workaround for following line: (
+ JITDUMP("Scope info: opening scope, LVnum=%u [%03X..%03X)\n", varScope->vsdLVnum, varScope->vsdLifeBeg,
+ varScope->vsdLifeEnd);
+
+ siNewScope(varScope->vsdLVnum, varScope->vsdVarNum);
+
+#ifdef DEBUG
+ LclVarDsc* lclVarDsc1 = &compiler->lvaTable[varScope->vsdVarNum];
+ if (VERBOSE)
+ {
+ printf("Scope info: >> new scope, VarNum=%u, tracked? %s, VarIndex=%u, bbLiveIn=%s ",
+ varScope->vsdVarNum, lclVarDsc1->lvTracked ? "yes" : "no", lclVarDsc1->lvVarIndex,
+ VarSetOps::ToString(compiler, block->bbLiveIn));
+ dumpConvertedVarSet(compiler, block->bbLiveIn);
+ printf("\n");
+ }
+ assert(!lclVarDsc1->lvTracked || VarSetOps::IsMember(compiler, block->bbLiveIn, lclVarDsc1->lvVarIndex));
+#endif // DEBUG
+ }
+ }
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ siDispOpenScopes();
+ }
+#endif
+}
+
+/*****************************************************************************
+ * siEndBlock
+ *
+ * Called at the end of code-gen for a block. Any closing scopes are marked
+ * as such. Note that if we are collecting LocalVar info, scopes can
+ * only begin or end at block boundaries for debuggable code.
+ */
+
+void CodeGen::siEndBlock(BasicBlock* block)
+{
+ assert(compiler->opts.compScopeInfo && (compiler->info.compVarScopesCount > 0));
+
+#if FEATURE_EH_FUNCLETS
+ if (siInFuncletRegion)
+ {
+ return;
+ }
+#endif // FEATURE_EH_FUNCLETS
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nScope info: end block BB%02u, IL range ", block->bbNum);
+ block->dspBlockILRange();
+ printf("\n");
+ }
+#endif // DEBUG
+
+ unsigned endOffs = block->bbCodeOffsEnd;
+
+ if (endOffs == BAD_IL_OFFSET)
+ {
+ JITDUMP("Scope info: ignoring block end\n");
+ return;
+ }
+
+ // If non-debuggable code, find all scopes which end over this block
+ // and close them. For debuggable code, scopes will only end on block
+ // boundaries.
+
+ VarScopeDsc* varScope;
+ while ((varScope = compiler->compGetNextExitScope(endOffs, !compiler->opts.compDbgCode)) != nullptr)
+ {
+ // brace-matching editor workaround for following line: (
+ JITDUMP("Scope info: ending scope, LVnum=%u [%03X..%03X)\n", varScope->vsdLVnum, varScope->vsdLifeBeg,
+ varScope->vsdLifeEnd);
+
+ unsigned varNum = varScope->vsdVarNum;
+ LclVarDsc* lclVarDsc1 = &compiler->lvaTable[varNum];
+
+ assert(lclVarDsc1);
+
+ if (lclVarDsc1->lvTracked)
+ {
+ siEndTrackedScope(lclVarDsc1->lvVarIndex);
+ }
+ else
+ {
+ siEndScope(varNum);
+ }
+ }
+
+ siLastEndOffs = endOffs;
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ siDispOpenScopes();
+ }
+#endif
+}
+
+/*****************************************************************************
+ * siUpdate
+ *
+ * Called at the start of basic blocks, and during code-gen of a block,
+ * for non-debuggable code, whenever the life of any tracked variable changes
+ * and the appropriate code has been generated. For debuggable code, variables are
+ * live over their entire scope, and so they go live or dead only on
+ * block boundaries.
+ */
+void CodeGen::siUpdate()
+{
+ if (!compiler->opts.compScopeInfo)
+ {
+ return;
+ }
+
+ if (compiler->opts.compDbgCode)
+ {
+ return;
+ }
+
+ if (compiler->info.compVarScopesCount == 0)
+ {
+ return;
+ }
+
+#if FEATURE_EH_FUNCLETS
+ if (siInFuncletRegion)
+ {
+ return;
+ }
+#endif // FEATURE_EH_FUNCLETS
+
+ VARSET_TP VARSET_INIT_NOCOPY(killed, VarSetOps::Diff(compiler, siLastLife, compiler->compCurLife));
+ assert(VarSetOps::IsSubset(compiler, killed, compiler->lvaTrackedVars));
+
+ VARSET_ITER_INIT(compiler, iter, killed, i);
+ while (iter.NextElem(compiler, &i))
+ {
+#ifdef DEBUG
+ unsigned lclNum = compiler->lvaTrackedToVarNum[i];
+ LclVarDsc* lclVar = &compiler->lvaTable[lclNum];
+ assert(lclVar->lvTracked);
+#endif
+
+ siScope* scope = siLatestTrackedScopes[i];
+ siEndTrackedScope(i);
+ }
+
+ VarSetOps::Assign(compiler, siLastLife, compiler->compCurLife);
+}
+
+/*****************************************************************************
+ * In optimized code, we may not have access to gtLclVar.gtLclILoffs.
+ * So there may be ambiguity as to which entry in compiler->info.compVarScopes
+ * to use. We search the entire table and find the entry whose life
+ * begins closest to the given offset.
+ */
+
+/*****************************************************************************
+ * siCheckVarScope
+ *
+ * For non-debuggable code, whenever we come across a GenTree which is an
+ * assignment to a local variable, this function is called to check if the
+ * variable has an open scope. Also, check if it has the correct LVnum.
+ */
+
+void CodeGen::siCheckVarScope(unsigned varNum, IL_OFFSET offs)
+{
+ assert(compiler->opts.compScopeInfo && !compiler->opts.compDbgCode && (compiler->info.compVarScopesCount > 0));
+
+#if FEATURE_EH_FUNCLETS
+ if (siInFuncletRegion)
+ {
+ return;
+ }
+#endif // FEATURE_EH_FUNCLETS
+
+ if (offs == BAD_IL_OFFSET)
+ {
+ return;
+ }
+
+ siScope* scope;
+ LclVarDsc* lclVarDsc1 = &compiler->lvaTable[varNum];
+
+ // If there is an open scope corresponding to varNum, find it
+
+ if (lclVarDsc1->lvTracked)
+ {
+ scope = siLatestTrackedScopes[lclVarDsc1->lvVarIndex];
+ }
+ else
+ {
+ for (scope = siOpenScopeList.scNext; scope; scope = scope->scNext)
+ {
+ if (scope->scVarNum == varNum)
+ {
+ break;
+ }
+ }
+ }
+
+ // Look up the compiler->info.compVarScopes[] to find the local var info for (varNum->lvSlotNum, offs)
+ VarScopeDsc* varScope = compiler->compFindLocalVar(varNum, offs);
+ if (varScope == nullptr)
+ {
+ return;
+ }
+
+ // If the currently open scope does not have the correct LVnum, close it
+ // and create a new scope with this new LVnum
+
+ if (scope)
+ {
+ if (scope->scLVnum != varScope->vsdLVnum)
+ {
+ siEndScope(scope);
+ siNewScope(varScope->vsdLVnum, varScope->vsdVarNum);
+ }
+ }
+ else
+ {
+ siNewScope(varScope->vsdLVnum, varScope->vsdVarNum);
+ }
+}
+
+/*****************************************************************************
+ * siCloseAllOpenScopes
+ *
+ * For unreachable code, or optimized code with blocks reordered, there may be
+ * scopes left open at the end. Simply close them.
+ */
+
+void CodeGen::siCloseAllOpenScopes()
+{
+ assert(siOpenScopeList.scNext);
+
+ while (siOpenScopeList.scNext)
+ {
+ siEndScope(siOpenScopeList.scNext);
+ }
+}
+
+/*****************************************************************************
+ * siDispOpenScopes
+ *
+ * Displays all the vars on the open-scope list
+ */
+
+#ifdef DEBUG
+
+void CodeGen::siDispOpenScopes()
+{
+ assert(compiler->opts.compScopeInfo && (compiler->info.compVarScopesCount > 0));
+
+ printf("Scope info: open scopes =\n");
+
+ if (siOpenScopeList.scNext == nullptr)
+ {
+ printf(" <none>\n");
+ }
+ else
+ {
+ for (siScope* scope = siOpenScopeList.scNext; scope != nullptr; scope = scope->scNext)
+ {
+ VarScopeDsc* localVars = compiler->info.compVarScopes;
+
+ for (unsigned i = 0; i < compiler->info.compVarScopesCount; i++, localVars++)
+ {
+ if (localVars->vsdLVnum == scope->scLVnum)
+ {
+ const char* name = compiler->VarNameToStr(localVars->vsdName);
+ // brace-matching editor workaround for following line: (
+ printf(" %u (%s) [%03X..%03X)\n", localVars->vsdLVnum, name == nullptr ? "UNKNOWN" : name,
+ localVars->vsdLifeBeg, localVars->vsdLifeEnd);
+ break;
+ }
+ }
+ }
+ }
+}
+
+#endif // DEBUG
+
+/*============================================================================
+ *
+ * Implementation for PrologScopeInfo
+ *
+ *============================================================================
+ */
+
+/*****************************************************************************
+ * psiNewPrologScope
+ *
+ * Creates a new scope and adds it to the Open scope list.
+ */
+
+CodeGen::psiScope* CodeGen::psiNewPrologScope(unsigned LVnum, unsigned slotNum)
+{
+ psiScope* newScope = (psiScope*)compiler->compGetMem(sizeof(*newScope), CMK_SiScope);
+
+ newScope->scStartLoc.CaptureLocation(getEmitter());
+ assert(newScope->scStartLoc.Valid());
+
+ newScope->scEndLoc.Init();
+
+ newScope->scLVnum = LVnum;
+ newScope->scSlotNum = slotNum;
+
+ newScope->scNext = nullptr;
+ psiOpenScopeLast->scNext = newScope;
+ newScope->scPrev = psiOpenScopeLast;
+ psiOpenScopeLast = newScope;
+
+ return newScope;
+}
+
+/*****************************************************************************
+ * psiEndPrologScope
+ *
+ * Remove the scope from the Open-scope list and add it to the finished-scopes
+ * list if its length is non-zero
+ */
+
+void CodeGen::psiEndPrologScope(psiScope* scope)
+{
+ scope->scEndLoc.CaptureLocation(getEmitter());
+ assert(scope->scEndLoc.Valid());
+
+ // Remove from open-scope list
+ scope->scPrev->scNext = scope->scNext;
+ if (scope->scNext)
+ {
+ scope->scNext->scPrev = scope->scPrev;
+ }
+ else
+ {
+ psiOpenScopeLast = scope->scPrev;
+ }
+
+ // Add to the finished scope list.
+ // If the length is zero, it means that the prolog is empty. In that case,
+ // CodeGen::genSetScopeInfo will report the liveness of all arguments
+ // as spanning the first instruction in the method, so that they can
+ // at least be inspected on entry to the method.
+ if (scope->scStartLoc != scope->scEndLoc || scope->scStartLoc.IsOffsetZero())
+ {
+ psiScopeLast->scNext = scope;
+ psiScopeLast = scope;
+ psiScopeCnt++;
+ }
+}
+
+/*============================================================================
+ * INTERFACE (protected) Functions for PrologScopeInfo
+ *============================================================================
+ */
+
+//------------------------------------------------------------------------
+// psSetScopeOffset: Set the offset of the newScope to the offset of the LslVar
+//
+// Arguments:
+// 'newScope' the new scope object whose offset is to be set to the lclVarDsc offset.
+// 'lclVarDsc' is an op that will now be contained by its parent.
+//
+//
+void CodeGen::psSetScopeOffset(psiScope* newScope, LclVarDsc* lclVarDsc)
+{
+ newScope->scRegister = false;
+ newScope->u2.scBaseReg = REG_SPBASE;
+
+#ifdef _TARGET_AMD64_
+ // scOffset = offset from caller SP - REGSIZE_BYTES
+ // TODO-Cleanup - scOffset needs to be understood. For now just matching with the existing definition.
+ newScope->u2.scOffset =
+ compiler->lvaToCallerSPRelativeOffset(lclVarDsc->lvStkOffs, lclVarDsc->lvFramePointerBased) + REGSIZE_BYTES;
+#else // !_TARGET_AMD64_
+ if (doubleAlignOrFramePointerUsed())
+ {
+ // REGSIZE_BYTES - for the pushed value of EBP
+ newScope->u2.scOffset = lclVarDsc->lvStkOffs - REGSIZE_BYTES;
+ }
+ else
+ {
+ newScope->u2.scOffset = lclVarDsc->lvStkOffs - genTotalFrameSize();
+ }
+#endif // !_TARGET_AMD64_
+}
+
+/*============================================================================
+* INTERFACE (public) Functions for PrologScopeInfo
+*============================================================================
+*/
+
+/*****************************************************************************
+ * psiBegProlog
+ *
+ * Initializes the PrologScopeInfo, and creates open scopes for all the
+ * parameters of the method.
+ */
+
+void CodeGen::psiBegProlog()
+{
+ assert(compiler->compGeneratingProlog);
+
+ VarScopeDsc* varScope;
+
+ psiOpenScopeList.scNext = nullptr;
+ psiOpenScopeLast = &psiOpenScopeList;
+ psiScopeLast = &psiScopeList;
+ psiScopeCnt = 0;
+
+ compiler->compResetScopeLists();
+
+ while ((varScope = compiler->compGetNextEnterScope(0)) != nullptr)
+ {
+ LclVarDsc* lclVarDsc1 = &compiler->lvaTable[varScope->vsdVarNum];
+
+ if (!lclVarDsc1->lvIsParam)
+ {
+ continue;
+ }
+
+ psiScope* newScope = psiNewPrologScope(varScope->vsdLVnum, varScope->vsdVarNum);
+
+ if (lclVarDsc1->lvIsRegArg)
+ {
+ bool isStructHandled = false;
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc;
+ if (varTypeIsStruct(lclVarDsc1))
+ {
+ CORINFO_CLASS_HANDLE typeHnd = lclVarDsc1->lvVerTypeInfo.GetClassHandle();
+ assert(typeHnd != nullptr);
+ compiler->eeGetSystemVAmd64PassStructInRegisterDescriptor(typeHnd, &structDesc);
+ if (structDesc.passedInRegisters)
+ {
+ regNumber regNum = REG_NA;
+ regNumber otherRegNum = REG_NA;
+ for (unsigned nCnt = 0; nCnt < structDesc.eightByteCount; nCnt++)
+ {
+ unsigned len = structDesc.eightByteSizes[nCnt];
+ var_types regType = TYP_UNDEF;
+
+ if (nCnt == 0)
+ {
+ regNum = lclVarDsc1->lvArgReg;
+ }
+ else if (nCnt == 1)
+ {
+ otherRegNum = lclVarDsc1->lvOtherArgReg;
+ }
+ else
+ {
+ assert(false && "Invalid eightbyte number.");
+ }
+
+ regType = compiler->GetEightByteType(structDesc, nCnt);
+#ifdef DEBUG
+ regType = compiler->mangleVarArgsType(regType);
+ assert(genMapRegNumToRegArgNum((nCnt == 0 ? regNum : otherRegNum), regType) != (unsigned)-1);
+#endif // DEBUG
+ }
+
+ newScope->scRegister = true;
+ newScope->u1.scRegNum = (regNumberSmall)regNum;
+ newScope->u1.scOtherReg = (regNumberSmall)otherRegNum;
+ }
+ else
+ {
+ // Stack passed argument. Get the offset from the caller's frame.
+ psSetScopeOffset(newScope, lclVarDsc1);
+ }
+
+ isStructHandled = true;
+ }
+#endif // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ if (!isStructHandled)
+ {
+#ifdef DEBUG
+ var_types regType = compiler->mangleVarArgsType(lclVarDsc1->TypeGet());
+ if (lclVarDsc1->lvIsHfaRegArg())
+ {
+ regType = lclVarDsc1->GetHfaType();
+ }
+ assert(genMapRegNumToRegArgNum(lclVarDsc1->lvArgReg, regType) != (unsigned)-1);
+#endif // DEBUG
+
+ newScope->scRegister = true;
+ newScope->u1.scRegNum = (regNumberSmall)lclVarDsc1->lvArgReg;
+ }
+ }
+ else
+ {
+ psSetScopeOffset(newScope, lclVarDsc1);
+ }
+ }
+}
+
+/*****************************************************************************
+ Enable this macro to get accurate prolog information for every instruction
+ in the prolog. However, this is overkill as nobody steps through the
+ disassembly of the prolog. Even if they do they will not expect rich debug info.
+
+ We still report all the arguments at the very start of the method so that
+ the user can see the arguments at the very start of the method (offset=0).
+
+ Disabling this decreased the debug maps in mscorlib by 10% (01/2003)
+ */
+
+#if 0
+#define ACCURATE_PROLOG_DEBUG_INFO
+#endif
+
+/*****************************************************************************
+ * psiAdjustStackLevel
+ *
+ * When ESP changes, all scopes relative to ESP have to be updated.
+ */
+
+void CodeGen::psiAdjustStackLevel(unsigned size)
+{
+#ifdef DEBUGGING_SUPPORT
+ if (!compiler->opts.compScopeInfo || (compiler->info.compVarScopesCount == 0))
+ {
+ return;
+ }
+
+ assert(compiler->compGeneratingProlog);
+
+#ifdef ACCURATE_PROLOG_DEBUG_INFO
+
+ psiScope* scope;
+
+ // walk the list backwards
+ // Works as psiEndPrologScope does not change scPrev
+ for (scope = psiOpenScopeLast; scope != &psiOpenScopeList; scope = scope->scPrev)
+ {
+ if (scope->scRegister)
+ {
+ assert(compiler->lvaTable[scope->scSlotNum].lvIsRegArg);
+ continue;
+ }
+ assert(scope->u2.scBaseReg == REG_SPBASE);
+
+ psiScope* newScope = psiNewPrologScope(scope->scLVnum, scope->scSlotNum);
+ newScope->scRegister = false;
+ newScope->u2.scBaseReg = REG_SPBASE;
+ newScope->u2.scOffset = scope->u2.scOffset + size;
+
+ psiEndPrologScope(scope);
+ }
+
+#endif // ACCURATE_PROLOG_DEBUG_INFO
+#endif // DEBUGGING_SUPPORT
+}
+
+/*****************************************************************************
+ * psiMoveESPtoEBP
+ *
+ * For EBP-frames, the parameters are accessed via ESP on entry to the function,
+ * but via EBP right after a "mov ebp,esp" instruction
+ */
+
+void CodeGen::psiMoveESPtoEBP()
+{
+#ifdef DEBUGGING_SUPPORT
+ if (!compiler->opts.compScopeInfo || (compiler->info.compVarScopesCount == 0))
+ {
+ return;
+ }
+
+ assert(compiler->compGeneratingProlog);
+ assert(doubleAlignOrFramePointerUsed());
+
+#ifdef ACCURATE_PROLOG_DEBUG_INFO
+
+ psiScope* scope;
+
+ // walk the list backwards
+ // Works as psiEndPrologScope does not change scPrev
+ for (scope = psiOpenScopeLast; scope != &psiOpenScopeList; scope = scope->scPrev)
+ {
+ if (scope->scRegister)
+ {
+ assert(compiler->lvaTable[scope->scSlotNum].lvIsRegArg);
+ continue;
+ }
+ assert(scope->u2.scBaseReg == REG_SPBASE);
+
+ psiScope* newScope = psiNewPrologScope(scope->scLVnum, scope->scSlotNum);
+ newScope->scRegister = false;
+ newScope->u2.scBaseReg = REG_FPBASE;
+ newScope->u2.scOffset = scope->u2.scOffset;
+
+ psiEndPrologScope(scope);
+ }
+
+#endif // ACCURATE_PROLOG_DEBUG_INFO
+#endif // DEBUGGING_SUPPORT
+}
+
+/*****************************************************************************
+ * psiMoveToReg
+ *
+ * Called when a parameter is loaded into its assigned register from the stack,
+ * or when parameters are moved around due to circular dependancy.
+ * If reg != REG_NA, then the parameter is being moved into its assigned
+ * register, else it may be being moved to a temp register.
+ */
+
+void CodeGen::psiMoveToReg(unsigned varNum, regNumber reg, regNumber otherReg)
+{
+#ifdef DEBUGGING_SUPPORT
+ assert(compiler->compGeneratingProlog);
+
+ if (!compiler->opts.compScopeInfo)
+ {
+ return;
+ }
+
+ if (compiler->info.compVarScopesCount == 0)
+ {
+ return;
+ }
+
+ assert((int)varNum >= 0); // It's not a spill temp number.
+ assert(compiler->lvaTable[varNum].lvIsInReg());
+
+#ifdef ACCURATE_PROLOG_DEBUG_INFO
+
+ /* If reg!=REG_NA, the parameter is part of a cirular dependancy, and is
+ * being moved through temp register "reg".
+ * If reg==REG_NA, it is being moved to its assigned register.
+ */
+ if (reg == REG_NA)
+ {
+ // Grab the assigned registers.
+
+ reg = compiler->lvaTable[varNum].lvRegNum;
+ otherReg = compiler->lvaTable[varNum].lvOtherReg;
+ }
+
+ psiScope* scope;
+
+ // walk the list backwards
+ // Works as psiEndPrologScope does not change scPrev
+ for (scope = psiOpenScopeLast; scope != &psiOpenScopeList; scope = scope->scPrev)
+ {
+ if (scope->scSlotNum != compiler->lvaTable[varNum].lvSlotNum)
+ continue;
+
+ psiScope* newScope = psiNewPrologScope(scope->scLVnum, scope->scSlotNum);
+ newScope->scRegister = true;
+ newScope->u1.scRegNum = reg;
+ newScope->u1.scOtherReg = otherReg;
+
+ psiEndPrologScope(scope);
+ return;
+ }
+
+ // May happen if a parameter does not have an entry in the LocalVarTab
+ // But assert() just in case it is because of something else.
+ assert(varNum == compiler->info.compRetBuffArg ||
+ !"Parameter scope not found (Assert doesnt always indicate error)");
+
+#endif // ACCURATE_PROLOG_DEBUG_INFO
+#endif // DEBUGGING_SUPPORT
+}
+
+/*****************************************************************************
+ * CodeGen::psiMoveToStack
+ *
+ * A incoming register-argument is being moved to its final home on the stack
+ * (ie. all adjustements to {F/S}PBASE have been made
+ */
+
+void CodeGen::psiMoveToStack(unsigned varNum)
+{
+#ifdef DEBUGGING_SUPPORT
+ if (!compiler->opts.compScopeInfo || (compiler->info.compVarScopesCount == 0))
+ {
+ return;
+ }
+
+ assert(compiler->compGeneratingProlog);
+ assert(compiler->lvaTable[varNum].lvIsRegArg);
+ assert(!compiler->lvaTable[varNum].lvRegister);
+
+#ifdef ACCURATE_PROLOG_DEBUG_INFO
+
+ psiScope* scope;
+
+ // walk the list backwards
+ // Works as psiEndPrologScope does not change scPrev
+ for (scope = psiOpenScopeLast; scope != &psiOpenScopeList; scope = scope->scPrev)
+ {
+ if (scope->scSlotNum != compiler->lvaTable[varNum].lvSlotNum)
+ continue;
+
+ /* The param must be currently sitting in the register in which it
+ was passed in */
+ assert(scope->scRegister);
+ assert(scope->u1.scRegNum == compiler->lvaTable[varNum].lvArgReg);
+
+ psiScope* newScope = psiNewPrologScope(scope->scLVnum, scope->scSlotNum);
+ newScope->scRegister = false;
+ newScope->u2.scBaseReg = (compiler->lvaTable[varNum].lvFramePointerBased) ? REG_FPBASE : REG_SPBASE;
+ newScope->u2.scOffset = compiler->lvaTable[varNum].lvStkOffs;
+
+ psiEndPrologScope(scope);
+ return;
+ }
+
+ // May happen if a parameter does not have an entry in the LocalVarTab
+ // But assert() just in case it is because of something else.
+ assert(varNum == compiler->info.compRetBuffArg ||
+ !"Parameter scope not found (Assert doesnt always indicate error)");
+
+#endif // ACCURATE_PROLOG_DEBUG_INFO
+#endif // DEBUGGING_SUPPORT
+}
+
+/*****************************************************************************
+ * psiEndProlog
+ */
+
+void CodeGen::psiEndProlog()
+{
+ assert(compiler->compGeneratingProlog);
+ psiScope* scope;
+
+ for (scope = psiOpenScopeList.scNext; scope; scope = psiOpenScopeList.scNext)
+ {
+ psiEndPrologScope(scope);
+ }
+}
+
+/*****************************************************************************/
+#endif // DEBUGGING_SUPPORT
+/*****************************************************************************/
diff --git a/src/jit/sharedfloat.cpp b/src/jit/sharedfloat.cpp
new file mode 100644
index 0000000000..0dbbac4862
--- /dev/null
+++ b/src/jit/sharedfloat.cpp
@@ -0,0 +1,498 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+//
+// NOTE: The code in this file is only used for LEGACY_BACKEND compiles.
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+#include "compiler.h"
+#include "emit.h"
+#include "codegen.h"
+
+#ifdef LEGACY_BACKEND
+
+#if FEATURE_STACK_FP_X87
+regMaskTP RegSet::rsGetMaskUsed()
+{
+ return rsMaskUsedFloat;
+}
+regMaskTP RegSet::rsGetMaskVars()
+{
+ return rsMaskRegVarFloat;
+}
+regMaskTP RegSet::rsGetMaskLock()
+{
+ return rsMaskLockedFloat;
+}
+regMaskTP RegSet::rsGetMaskMult()
+{
+ return 0;
+}
+
+void RegSet::rsSetMaskUsed(regMaskTP maskUsed)
+{
+ rsMaskUsedFloat = maskUsed;
+}
+void RegSet::rsSetMaskVars(regMaskTP maskVars)
+{
+ rsMaskRegVarFloat = maskVars;
+}
+void RegSet::rsSetMaskLock(regMaskTP maskLock)
+{
+ rsMaskLockedFloat = maskLock;
+}
+
+void RegSet::rsSetUsedTree(regNumber regNum, GenTreePtr tree)
+{
+ assert(genUsedRegsFloat[regNum] == 0);
+ genUsedRegsFloat[regNum] = tree;
+}
+void RegSet::rsFreeUsedTree(regNumber regNum, GenTreePtr tree)
+{
+ assert(genUsedRegsFloat[regNum] == tree);
+ genUsedRegsFloat[regNum] = 0;
+}
+
+#else // !FEATURE_STACK_FP_X87
+regMaskTP RegSet::rsGetMaskUsed()
+{
+ return rsMaskUsed;
+}
+regMaskTP RegSet::rsGetMaskVars()
+{
+ return rsMaskVars;
+}
+regMaskTP RegSet::rsGetMaskLock()
+{
+ return rsMaskLock;
+}
+regMaskTP RegSet::rsGetMaskMult()
+{
+ return rsMaskMult;
+}
+
+void RegSet::rsSetMaskUsed(regMaskTP maskUsed)
+{
+ rsMaskUsed = maskUsed;
+}
+void RegSet::rsSetMaskVars(regMaskTP maskVars)
+{
+ rsMaskVars = maskVars;
+}
+void RegSet::rsSetMaskLock(regMaskTP maskLock)
+{
+ rsMaskLock = maskLock;
+}
+
+void RegSet::rsSetUsedTree(regNumber regNum, GenTreePtr tree)
+{
+ assert(rsUsedTree[regNum] == 0);
+ rsUsedTree[regNum] = tree;
+}
+void RegSet::rsFreeUsedTree(regNumber regNum, GenTreePtr tree)
+{
+ assert(rsUsedTree[regNum] == tree);
+ rsUsedTree[regNum] = 0;
+}
+#endif // !FEATURE_STACK_FP_X87
+
+// float stress mode. Will lock out registers to stress high register pressure.
+// This implies setting interferences in register allocator and pushing regs in
+// the prolog and popping them before a ret.
+#ifdef DEBUG
+int CodeGenInterface::genStressFloat()
+{
+ return compiler->compStressCompile(Compiler::STRESS_FLATFP, 40) ? 1 : JitConfig.JitStressFP();
+}
+#endif
+
+regMaskTP RegSet::RegFreeFloat()
+{
+ regMaskTP mask = RBM_ALLFLOAT;
+#if FEATURE_FP_REGALLOC
+ mask &= m_rsCompiler->raConfigRestrictMaskFP();
+#endif
+
+ mask &= ~rsGetMaskUsed();
+ mask &= ~rsGetMaskLock();
+ mask &= ~rsGetMaskVars();
+
+#ifdef DEBUG
+ if (m_rsCompiler->codeGen->genStressFloat())
+ {
+ mask &= ~(m_rsCompiler->codeGen->genStressLockedMaskFloat());
+ }
+#endif
+ return mask;
+}
+
+#ifdef _TARGET_ARM_
+// order registers are picked
+// go in reverse order to minimize chance of spilling with calls
+static const regNumber pickOrder[] = {REG_F15, REG_F14, REG_F13, REG_F12, REG_F11, REG_F10, REG_F9, REG_F8,
+ REG_F7, REG_F6, REG_F5, REG_F4, REG_F3, REG_F2, REG_F1, REG_F0,
+
+ REG_F16, REG_F17, REG_F18, REG_F19, REG_F20, REG_F21, REG_F22, REG_F23,
+ REG_F24, REG_F25, REG_F26, REG_F27, REG_F28, REG_F29, REG_F30, REG_F31};
+
+#elif _TARGET_AMD64_
+// order registers are picked
+static const regNumber pickOrder[] = {REG_XMM0, REG_XMM1, REG_XMM2, REG_XMM3, REG_XMM4, REG_XMM5,
+ REG_XMM6, REG_XMM7, REG_XMM8, REG_XMM9, REG_XMM10, REG_XMM11,
+ REG_XMM12, REG_XMM13, REG_XMM14, REG_XMM15};
+
+#elif _TARGET_X86_
+// order registers are picked
+static const regNumber pickOrder[] = {REG_FPV0, REG_FPV1, REG_FPV2, REG_FPV3, REG_FPV4, REG_FPV5, REG_FPV6, REG_FPV7};
+#endif
+
+// picks a reg other than the one specified
+regNumber RegSet::PickRegFloatOtherThan(GenTreePtr tree, var_types type, regNumber reg)
+{
+ return PickRegFloatOtherThan(type, reg);
+}
+
+regNumber RegSet::PickRegFloatOtherThan(var_types type, regNumber reg)
+{
+ RegisterPreference pref(RBM_ALLFLOAT ^ genRegMask(reg), 0);
+ return PickRegFloat(type, &pref);
+}
+
+regNumber RegSet::PickRegFloat(GenTreePtr tree, var_types type, RegisterPreference* pref, bool bUsed)
+{
+ return PickRegFloat(type, pref, bUsed);
+}
+
+regNumber RegSet::PickRegFloat(var_types type, RegisterPreference* pref, bool bUsed)
+{
+ regMaskTP wantedMask;
+ bool tryBest = true;
+ bool tryOk = true;
+ bool bSpill = false;
+ regNumber reg = REG_NA;
+
+ while (tryOk)
+ {
+ if (pref)
+ {
+ if (tryBest)
+ {
+ wantedMask = pref->best;
+ tryBest = false;
+ }
+ else
+ {
+ assert(tryOk);
+ wantedMask = pref->ok;
+ tryOk = false;
+ }
+ }
+ else // pref is NULL
+ {
+ wantedMask = RBM_ALLFLOAT;
+ tryBest = false;
+ tryOk = false;
+ }
+
+ // better not have asked for a non-fp register
+ assert((wantedMask & ~RBM_ALLFLOAT) == 0);
+
+ regMaskTP availMask = RegFreeFloat();
+ regMaskTP OKmask = availMask & wantedMask;
+
+ if (OKmask == 0)
+ {
+ if (tryOk)
+ {
+ // the pref->best mask doesn't work so try the pref->ok mask next
+ continue;
+ }
+
+ if (bUsed)
+ {
+ // Allow used registers to be picked
+ OKmask |= rsGetMaskUsed() & ~rsGetMaskLock();
+ bSpill = true;
+ }
+ }
+#if FEATURE_FP_REGALLOC
+ regMaskTP restrictMask = (m_rsCompiler->raConfigRestrictMaskFP() | RBM_FLT_CALLEE_TRASH);
+#endif
+
+ for (unsigned i = 0; i < ArrLen(pickOrder); i++)
+ {
+ regNumber r = pickOrder[i];
+ if (!floatRegCanHoldType(r, type))
+ continue;
+
+ regMaskTP mask = genRegMaskFloat(r, type);
+
+#if FEATURE_FP_REGALLOC
+ if ((mask & restrictMask) != mask)
+ continue;
+#endif
+ if ((OKmask & mask) == mask)
+ {
+ reg = r;
+ goto RET;
+ }
+ }
+
+ if (tryOk)
+ {
+ // We couldn't find a register using tryBest
+ continue;
+ }
+
+ assert(!"Unable to find a free FP virtual register");
+ NO_WAY("FP register allocator was too optimistic!");
+ }
+RET:
+ if (bSpill)
+ {
+ m_rsCompiler->codeGen->SpillFloat(reg);
+ }
+
+#if FEATURE_FP_REGALLOC
+ rsSetRegsModified(genRegMaskFloat(reg, type));
+#endif
+
+ return reg;
+}
+
+void RegSet::SetUsedRegFloat(GenTreePtr tree, bool bValue)
+{
+ /* The value must be sitting in a register */
+ assert(tree);
+ assert(tree->gtFlags & GTF_REG_VAL);
+
+ var_types type = tree->TypeGet();
+#ifdef _TARGET_ARM_
+ if (type == TYP_STRUCT)
+ {
+ assert(m_rsCompiler->IsHfa(tree));
+ type = TYP_FLOAT;
+ }
+#endif
+ regNumber regNum = tree->gtRegNum;
+ regMaskTP regMask = genRegMaskFloat(regNum, type);
+
+ if (bValue)
+ {
+#ifdef DEBUG
+ if (m_rsCompiler->verbose)
+ {
+ printf("\t\t\t\t\t\t\tThe register %s currently holds ", getRegNameFloat(regNum, type));
+ Compiler::printTreeID(tree);
+ printf("\n");
+ }
+#endif
+
+ // Mark as used
+ assert((rsGetMaskLock() & regMask) == 0);
+
+#if FEATURE_STACK_FP_X87
+ assert((rsGetMaskUsed() & regMask) == 0);
+#else
+ /* Is the register used by two different values simultaneously? */
+
+ if (regMask & rsGetMaskUsed())
+ {
+ /* Save the preceding use information */
+
+ rsRecMultiReg(regNum, type);
+ }
+#endif
+ /* Set the register's bit in the 'used' bitset */
+
+ rsSetMaskUsed((rsGetMaskUsed() | regMask));
+
+ // Assign slot
+ rsSetUsedTree(regNum, tree);
+ }
+ else
+ {
+#ifdef DEBUG
+ if (m_rsCompiler->verbose)
+ {
+ printf("\t\t\t\t\t\t\tThe register %s no longer holds ", getRegNameFloat(regNum, type));
+ Compiler::printTreeID(tree);
+ printf("\n");
+ }
+#endif
+
+ // Mark as free
+ assert((rsGetMaskUsed() & regMask) == regMask);
+
+ // Are we freeing a multi-use registers?
+
+ if (regMask & rsGetMaskMult())
+ {
+ // Free any multi-use registers
+ rsMultRegFree(regMask);
+ return;
+ }
+
+ rsSetMaskUsed((rsGetMaskUsed() & ~regMask));
+
+ // Free slot
+ rsFreeUsedTree(regNum, tree);
+ }
+}
+
+void RegSet::SetLockedRegFloat(GenTree* tree, bool bValue)
+{
+ regNumber reg = tree->gtRegNum;
+ var_types type = tree->TypeGet();
+ assert(varTypeIsFloating(type));
+ regMaskTP regMask = genRegMaskFloat(reg, tree->TypeGet());
+
+ if (bValue)
+ {
+ JITDUMP("locking register %s\n", getRegNameFloat(reg, type));
+
+ assert((rsGetMaskUsed() & regMask) == regMask);
+ assert((rsGetMaskLock() & regMask) == 0);
+
+ rsSetMaskLock((rsGetMaskLock() | regMask));
+ }
+ else
+ {
+ JITDUMP("unlocking register %s\n", getRegNameFloat(reg, type));
+
+ assert((rsGetMaskUsed() & regMask) == regMask);
+ assert((rsGetMaskLock() & regMask) == regMask);
+
+ rsSetMaskLock((rsGetMaskLock() & ~regMask));
+ }
+}
+
+bool RegSet::IsLockedRegFloat(GenTreePtr tree)
+{
+ /* The value must be sitting in a register */
+ assert(tree);
+ assert(tree->gtFlags & GTF_REG_VAL);
+ assert(varTypeIsFloating(tree->TypeGet()));
+
+ regMaskTP regMask = genRegMaskFloat(tree->gtRegNum, tree->TypeGet());
+ return (rsGetMaskLock() & regMask) == regMask;
+}
+
+void CodeGen::UnspillFloat(GenTreePtr tree)
+{
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("UnspillFloat() for tree ");
+ Compiler::printTreeID(tree);
+ printf("\n");
+ }
+#endif // DEBUG
+
+ RegSet::SpillDsc* cur = regSet.rsSpillFloat;
+ assert(cur);
+
+ while (cur->spillTree != tree)
+ cur = cur->spillNext;
+
+ UnspillFloat(cur);
+}
+
+void CodeGen::UnspillFloat(LclVarDsc* varDsc)
+{
+ JITDUMP("UnspillFloat() for var [%08p]\n", dspPtr(varDsc));
+
+ RegSet::SpillDsc* cur = regSet.rsSpillFloat;
+ assert(cur);
+
+ while (cur->spillVarDsc != varDsc)
+ cur = cur->spillNext;
+
+ UnspillFloat(cur);
+}
+
+void CodeGen::RemoveSpillDsc(RegSet::SpillDsc* spillDsc)
+{
+ RegSet::SpillDsc* cur;
+ RegSet::SpillDsc** prev;
+
+ for (cur = regSet.rsSpillFloat, prev = &regSet.rsSpillFloat; cur != spillDsc;
+ prev = &cur->spillNext, cur = cur->spillNext)
+ ; // EMPTY LOOP
+
+ assert(cur);
+
+ // Remove node from list
+ *prev = cur->spillNext;
+}
+
+void CodeGen::UnspillFloat(RegSet::SpillDsc* spillDsc)
+{
+ JITDUMP("UnspillFloat() for SpillDsc [%08p]\n", dspPtr(spillDsc));
+
+ RemoveSpillDsc(spillDsc);
+ UnspillFloatMachineDep(spillDsc);
+
+ RegSet::SpillDsc::freeDsc(&regSet, spillDsc);
+ compiler->tmpRlsTemp(spillDsc->spillTemp);
+}
+
+#if FEATURE_STACK_FP_X87
+
+Compiler::fgWalkResult CodeGen::genRegVarDiesInSubTreeWorker(GenTreePtr* pTree, Compiler::fgWalkData* data)
+{
+ GenTreePtr tree = *pTree;
+ genRegVarDiesInSubTreeData* pData = (genRegVarDiesInSubTreeData*)data->pCallbackData;
+
+ // if it's dying, just rename the register, else load it normally
+ if (tree->IsRegVar() && tree->IsRegVarDeath() && tree->gtRegVar.gtRegNum == pData->reg)
+ {
+ pData->result = true;
+ return Compiler::WALK_ABORT;
+ }
+
+ return Compiler::WALK_CONTINUE;
+}
+
+bool CodeGen::genRegVarDiesInSubTree(GenTreePtr tree, regNumber reg)
+{
+ genRegVarDiesInSubTreeData Data;
+ Data.reg = reg;
+ Data.result = false;
+
+ compiler->fgWalkTreePre(&tree, genRegVarDiesInSubTreeWorker, (void*)&Data);
+
+ return Data.result;
+}
+
+#endif // FEATURE_STACK_FP_X87
+
+/*****************************************************************************
+ *
+ * Force floating point expression results to memory, to get rid of the extra
+ * 80 byte "temp-real" precision.
+ * Assumes the tree operand has been computed to the top of the stack.
+ * If type!=TYP_UNDEF, that is the desired presicion, else it is op->gtType
+ */
+
+void CodeGen::genRoundFpExpression(GenTreePtr op, var_types type)
+{
+#if FEATURE_STACK_FP_X87
+ return genRoundFpExpressionStackFP(op, type);
+#else
+ return genRoundFloatExpression(op, type);
+#endif
+}
+
+void CodeGen::genCodeForTreeFloat(GenTreePtr tree, regMaskTP needReg, regMaskTP bestReg)
+{
+ RegSet::RegisterPreference pref(needReg, bestReg);
+ genCodeForTreeFloat(tree, &pref);
+}
+
+#endif // LEGACY_BACKEND
diff --git a/src/jit/sideeffects.cpp b/src/jit/sideeffects.cpp
new file mode 100644
index 0000000000..dbfa27cfae
--- /dev/null
+++ b/src/jit/sideeffects.cpp
@@ -0,0 +1,549 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+#include "sideeffects.h"
+
+LclVarSet::LclVarSet() : m_bitVector(nullptr), m_hasAnyLcl(false), m_hasBitVector(false)
+{
+}
+
+//------------------------------------------------------------------------
+// LclVarSet::Add:
+// Adds the given lclNum to the LclVarSet.
+//
+// Arguments:
+// compiler - The compiler context
+// lclNum - The lclNum to add.
+//
+void LclVarSet::Add(Compiler* compiler, unsigned lclNum)
+{
+ if (!m_hasAnyLcl)
+ {
+ m_lclNum = lclNum;
+ m_hasAnyLcl = true;
+ }
+ else
+ {
+ if (!m_hasBitVector)
+ {
+ unsigned singleLclNum = m_lclNum;
+ m_bitVector = hashBv::Create(compiler);
+ m_bitVector->setBit(singleLclNum);
+ m_hasBitVector = true;
+ }
+
+ m_bitVector->setBit(lclNum);
+ }
+}
+
+//------------------------------------------------------------------------
+// LclVarSet::Intersects:
+// Returns true if this LclVarSet intersects with the given LclVarSet.
+//
+// Arguments:
+// other - The other lclVarSet.
+//
+bool LclVarSet::Intersects(const LclVarSet& other) const
+{
+ // If neither set has ever contained anything, the sets do not intersect.
+ if (!m_hasAnyLcl || !other.m_hasAnyLcl)
+ {
+ return false;
+ }
+
+ // If this set is not represented by a bit vector, see if the single lclNum is contained in the other set.
+ if (!m_hasBitVector)
+ {
+ if (!other.m_hasBitVector)
+ {
+ return m_lclNum == other.m_lclNum;
+ }
+
+ return other.m_bitVector->testBit(m_lclNum);
+ }
+
+ // If this set is represented by a bit vector but the other set is not, see if the single lclNum in the other
+ // set is contained in this set.
+ if (!other.m_hasBitVector)
+ {
+ return m_bitVector->testBit(other.m_lclNum);
+ }
+
+ // Both sets are represented by bit vectors. Check to see if they intersect.
+ return m_bitVector->Intersects(other.m_bitVector);
+}
+
+//------------------------------------------------------------------------
+// LclVarSet::Contains:
+// Returns true if this LclVarSet contains the given lclNum.
+//
+// Arguments:
+// lclNum - The lclNum in question.
+//
+bool LclVarSet::Contains(unsigned lclNum) const
+{
+ // If this set has never contained anything, it does not contain the lclNum.
+ if (!m_hasAnyLcl)
+ {
+ return false;
+ }
+
+ // If this set is not represented by a bit vector, see if its single lclNum is the same as the given lclNum.
+ if (!m_hasBitVector)
+ {
+ return m_lclNum == lclNum;
+ }
+
+ // This set is represented by a bit vector. See if the bit vector contains the given lclNum.
+ return m_bitVector->testBit(lclNum);
+}
+
+//------------------------------------------------------------------------
+// LclVarSet::Clear:
+// Clears the contents of this LclVarSet.
+//
+void LclVarSet::Clear()
+{
+ if (m_hasBitVector)
+ {
+ assert(m_hasAnyLcl);
+ m_bitVector->ZeroAll();
+ }
+ else if (m_hasAnyLcl)
+ {
+ m_hasAnyLcl = false;
+ }
+}
+
+AliasSet::AliasSet()
+ : m_lclVarReads(), m_lclVarWrites(), m_readsAddressableLocation(false), m_writesAddressableLocation(false)
+{
+}
+
+//------------------------------------------------------------------------
+// AliasSet::NodeInfo::NodeInfo:
+// Computes the alias info for a given node. Note that this does not
+// include the set of lclVar accesses for a node unless the node is
+// itself a lclVar access (e.g. a GT_LCL_VAR, GT_STORE_LCL_VAR, etc.).
+//
+// Arguments:
+// compiler - The compiler context.
+// node - The node in question.
+//
+AliasSet::NodeInfo::NodeInfo(Compiler* compiler, GenTree* node)
+ : m_compiler(compiler), m_node(node), m_flags(0), m_lclNum(0)
+{
+ if (node->IsCall())
+ {
+ // Calls are treated as reads and writes of addressable locations unless they are known to be pure.
+ if (node->AsCall()->IsPure(compiler))
+ {
+ m_flags = ALIAS_NONE;
+ return;
+ }
+
+ m_flags = ALIAS_READS_ADDRESSABLE_LOCATION | ALIAS_WRITES_ADDRESSABLE_LOCATION;
+ return;
+ }
+ else if (node->OperIsAtomicOp())
+ {
+ // Atomic operations both read and write addressable locations.
+ m_flags = ALIAS_READS_ADDRESSABLE_LOCATION | ALIAS_WRITES_ADDRESSABLE_LOCATION;
+ return;
+ }
+
+ // Is the operation a write? If so, set `node` to the location that is being written to.
+ bool isWrite = false;
+ if (node->OperIsAssignment())
+ {
+ isWrite = true;
+ node = node->gtGetOp1();
+ }
+ else if (node->OperIsStore() || node->OperIsAtomicOp())
+ {
+ isWrite = true;
+ }
+
+ // `node` is the location being accessed. Determine whether or not it is a memory or local variable access, and if
+ // it is the latter, get the number of the lclVar.
+ bool isMemoryAccess = false;
+ bool isLclVarAccess = false;
+ unsigned lclNum = 0;
+ if (node->OperIsIndir())
+ {
+ // If the indirection targets a lclVar, we can be more precise with regards to aliasing by treating the
+ // indirection as a lclVar access.
+ GenTree* address = node->AsIndir()->Addr();
+ if (address->OperIsLocalAddr())
+ {
+ isLclVarAccess = true;
+ lclNum = address->AsLclVarCommon()->GetLclNum();
+ }
+ else
+ {
+ isMemoryAccess = true;
+ }
+ }
+ else if (node->OperIsImplicitIndir())
+ {
+ isMemoryAccess = true;
+ }
+ else if (node->OperIsLocal())
+ {
+ isLclVarAccess = true;
+ lclNum = node->AsLclVarCommon()->GetLclNum();
+ }
+ else
+ {
+ // This is neither a memory nor a local var access.
+ m_flags = ALIAS_NONE;
+ return;
+ }
+
+ assert(isMemoryAccess || isLclVarAccess);
+
+ // Now that we've determined whether or not this access is a read or a write and whether the accessed location is
+ // memory or a lclVar, determine whther or not the location is addressable and udpate the alias set.
+ const bool isAddressableLocation = isMemoryAccess || compiler->lvaTable[lclNum].lvAddrExposed;
+
+ if (!isWrite)
+ {
+ if (isAddressableLocation)
+ {
+ m_flags |= ALIAS_READS_ADDRESSABLE_LOCATION;
+ }
+
+ if (isLclVarAccess)
+ {
+ m_flags |= ALIAS_READS_LCL_VAR;
+ m_lclNum = lclNum;
+ }
+ }
+ else
+ {
+ if (isAddressableLocation)
+ {
+ m_flags |= ALIAS_WRITES_ADDRESSABLE_LOCATION;
+ }
+
+ if (isLclVarAccess)
+ {
+ m_flags |= ALIAS_WRITES_LCL_VAR;
+ m_lclNum = lclNum;
+ }
+ }
+}
+
+//------------------------------------------------------------------------
+// AliasSet::AddNode:
+// Adds the given node's accesses to this AliasSet.
+//
+// Arguments:
+// compiler - The compiler context.
+// node - The node to add to the set.
+//
+void AliasSet::AddNode(Compiler* compiler, GenTree* node)
+{
+ // First, add all lclVar uses associated with the node to the set. This is necessary because the lclVar reads occur
+ // at the position of the user, not at the position of the GenTreeLclVar node.
+ for (GenTree* operand : node->Operands())
+ {
+ if (operand->OperIsLocalRead())
+ {
+ const unsigned lclNum = operand->AsLclVarCommon()->GetLclNum();
+ if (compiler->lvaTable[lclNum].lvAddrExposed)
+ {
+ m_readsAddressableLocation = true;
+ }
+
+ m_lclVarReads.Add(compiler, lclNum);
+ }
+ }
+
+ NodeInfo nodeInfo(compiler, node);
+ if (nodeInfo.ReadsAddressableLocation())
+ {
+ m_readsAddressableLocation = true;
+ }
+ if (nodeInfo.WritesAddressableLocation())
+ {
+ m_writesAddressableLocation = true;
+ }
+ if (nodeInfo.IsLclVarRead())
+ {
+ m_lclVarReads.Add(compiler, nodeInfo.LclNum());
+ }
+ if (nodeInfo.IsLclVarWrite())
+ {
+ m_lclVarWrites.Add(compiler, nodeInfo.LclNum());
+ }
+}
+
+//------------------------------------------------------------------------
+// AliasSet::InterferesWith:
+// Returns true if the reads and writes in this alias set interfere
+// with the given alias set.
+//
+// Two alias sets interfere under any of the following conditions:
+// - Both sets write to any addressable location (e.g. the heap,
+// address-exposed locals)
+// - One set reads any addressable location and the other set writes
+// any addressable location
+// - Both sets write to the same lclVar
+// - One set writes to a lclVar that is read by the other set
+//
+// Arguments:
+// other - The other alias set.
+//
+bool AliasSet::InterferesWith(const AliasSet& other) const
+{
+ // If both sets write any addressable location, the sets interfere.
+ if (m_writesAddressableLocation && other.m_writesAddressableLocation)
+ {
+ return true;
+ }
+
+ // If one set writes any addressable location and the other reads any addressable location, the sets interfere.
+ if ((m_readsAddressableLocation && other.m_writesAddressableLocation) ||
+ (m_writesAddressableLocation && other.m_readsAddressableLocation))
+ {
+ return true;
+ }
+
+ // If the set of lclVars written by this alias set intersects with the set of lclVars accessed by the other alias
+ // set, the alias sets interfere.
+ if (m_lclVarWrites.Intersects(other.m_lclVarReads) || m_lclVarWrites.Intersects(other.m_lclVarWrites))
+ {
+ return true;
+ }
+
+ // If the set of lclVars read by this alias set intersects with the set of lclVars written by the other alias set,
+ // the alias sets interfere. Otherwise, the alias sets do not interfere.
+ return m_lclVarReads.Intersects(other.m_lclVarWrites);
+}
+
+//------------------------------------------------------------------------
+// AliasSet::InterferesWith:
+// Returns true if the reads and writes in this alias set interfere
+// with those for the given node.
+//
+// An alias set interferes with a given node iff it interferes with the
+// alias set for that node.
+//
+// Arguments:
+// other - The info for the node in question.
+//
+bool AliasSet::InterferesWith(const NodeInfo& other) const
+{
+ // First check whether or not this set interferes with the lclVar uses associated with the given node.
+ if (m_writesAddressableLocation || !m_lclVarWrites.IsEmpty())
+ {
+ Compiler* compiler = other.TheCompiler();
+ for (GenTree* operand : other.Node()->Operands())
+ {
+ if (operand->OperIsLocalRead())
+ {
+ // If this set writes any addressable location and the node uses an address-exposed lclVar,
+ // the set interferes with the node.
+ const unsigned lclNum = operand->AsLclVarCommon()->GetLclNum();
+ if (compiler->lvaTable[lclNum].lvAddrExposed && m_writesAddressableLocation)
+ {
+ return true;
+ }
+
+ // If this set writes to a lclVar used by the node, the set interferes with the node.
+ if (m_lclVarWrites.Contains(lclNum))
+ {
+ return true;
+ }
+ }
+ }
+ }
+
+ // If the node and the set both write to any addressable location, they interfere.
+ if (m_writesAddressableLocation && other.WritesAddressableLocation())
+ {
+ return true;
+ }
+
+ // If the node or the set writes any addressable location and the other reads any addressable location,
+ // they interfere.
+ if ((m_readsAddressableLocation && other.WritesAddressableLocation()) ||
+ (m_writesAddressableLocation && other.ReadsAddressableLocation()))
+ {
+ return true;
+ }
+
+ // If the set writes a local var accessed by the node, they interfere.
+ if ((other.IsLclVarRead() || other.IsLclVarWrite()) && m_lclVarWrites.Contains(other.LclNum()))
+ {
+ return true;
+ }
+
+ // If the set reads a local var written by the node, they interfere.
+ return other.IsLclVarWrite() && m_lclVarReads.Contains(other.LclNum());
+}
+
+//------------------------------------------------------------------------
+// AliasSet::Clear:
+// Clears the current alias set.
+//
+void AliasSet::Clear()
+{
+ m_readsAddressableLocation = false;
+ m_writesAddressableLocation = false;
+
+ m_lclVarReads.Clear();
+ m_lclVarWrites.Clear();
+}
+
+SideEffectSet::SideEffectSet() : m_sideEffectFlags(0), m_aliasSet()
+{
+}
+
+//------------------------------------------------------------------------
+// SideEffectSet::SideEffectSet:
+// Constructs a side effect set initialized using the given node.
+// Equivalent to the following;
+//
+// SideEffectSet sideEffectSet;
+// sideEffectSet.AddNode(compiler, node);
+//
+// Arguments:
+// compiler - The compiler context.
+// node - The node to use for initialization.
+//
+SideEffectSet::SideEffectSet(Compiler* compiler, GenTree* node) : m_sideEffectFlags(0), m_aliasSet()
+{
+ AddNode(compiler, node);
+}
+
+//------------------------------------------------------------------------
+// SideEffectSet::AddNode:
+// Adds the given node's accesses to this SideEffectSet.
+//
+// Arguments:
+// compiler - The compiler context.
+// node - The node to add to the set.
+//
+void SideEffectSet::AddNode(Compiler* compiler, GenTree* node)
+{
+ m_sideEffectFlags |= (node->gtFlags & GTF_ALL_EFFECT);
+ m_aliasSet.AddNode(compiler, node);
+}
+
+//------------------------------------------------------------------------
+// SideEffectSet::InterferesWith:
+// Returns true if the side effects in this set interfere with the
+// given side effect flags and alias information.
+//
+// Two side effect sets interfere under any of the following
+// conditions:
+// - If the analysis is strict, and:
+// - Either set contains a compiler barrier, or
+// - Both sets produce an exception
+// - Whether or not the analysis is strict:
+// - One set produces an exception and the other set contains a
+// write
+// - One set's reads and writes interfere with the other set's
+// reads and writes
+//
+// Arguments:
+// otherSideEffectFlags - The side effect flags for the other side
+// effect set.
+// otherAliasInfo - The alias information for the other side effect
+// set.
+// strict - True if the analysis should be strict as described above.
+//
+template <typename TOtherAliasInfo>
+bool SideEffectSet::InterferesWith(unsigned otherSideEffectFlags,
+ const TOtherAliasInfo& otherAliasInfo,
+ bool strict) const
+{
+ const bool thisProducesException = (m_sideEffectFlags & GTF_EXCEPT) != 0;
+ const bool otherProducesException = (otherSideEffectFlags & GTF_EXCEPT) != 0;
+
+ if (strict)
+ {
+ // If either set contains a compiler barrier, the sets interfere.
+ if (((m_sideEffectFlags | otherSideEffectFlags) & GTF_ORDER_SIDEEFF) != 0)
+ {
+ return true;
+ }
+
+ // If both sets produce an exception, the sets interfere.
+ if (thisProducesException && otherProducesException)
+ {
+ return true;
+ }
+ }
+
+ // If one set produces an exception and the other set writes to any location, the sets interfere.
+ if ((thisProducesException && otherAliasInfo.WritesAnyLocation()) ||
+ (otherProducesException && m_aliasSet.WritesAnyLocation()))
+ {
+ return true;
+ }
+
+ // At this point, the only interference between the sets will arise from their alias sets.
+ return m_aliasSet.InterferesWith(otherAliasInfo);
+}
+
+//------------------------------------------------------------------------
+// SideEffectSet::InterferesWith:
+// Returns true if the side effects in this set interfere with the side
+// effects in the given side effect set.
+//
+// Two side effect sets interfere under any of the following
+// conditions:
+// - If the analysis is strict, and:
+// - Either set contains a compiler barrier, or
+// - Both sets produce an exception
+// - Whether or not the analysis is strict:
+// - One set produces an exception and the other set contains a
+// write
+// - One set's reads and writes interfere with the other set's
+// reads and writes
+//
+// Arguments:
+// other - The other side effect set.
+// strict - True if the analysis should be strict as described above.
+//
+bool SideEffectSet::InterferesWith(const SideEffectSet& other, bool strict) const
+{
+ return InterferesWith(other.m_sideEffectFlags, other.m_aliasSet, strict);
+}
+
+//------------------------------------------------------------------------
+// SideEffectSet::InterferesWith:
+// Returns true if the side effects in this set interfere with the side
+// effects for the given node.
+//
+// A side effect set interferes with a given node iff it interferes
+// with the side effect set of the node.
+//
+// Arguments:
+// compiler - The compiler context.
+// node - The node in question.
+// strict - True if the analysis should be strict as described above.
+//
+bool SideEffectSet::InterferesWith(Compiler* compiler, GenTree* node, bool strict) const
+{
+ return InterferesWith((node->gtFlags & GTF_ALL_EFFECT), AliasSet::NodeInfo(compiler, node), strict);
+}
+
+//------------------------------------------------------------------------
+// SideEffectSet::Clear:
+// Clears the current side effect set.
+//
+void SideEffectSet::Clear()
+{
+ m_sideEffectFlags = 0;
+ m_aliasSet.Clear();
+}
diff --git a/src/jit/sideeffects.h b/src/jit/sideeffects.h
new file mode 100644
index 0000000000..33fac16f05
--- /dev/null
+++ b/src/jit/sideeffects.h
@@ -0,0 +1,158 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+#ifndef _SIDEEFFECTS_H_
+#define _SIDEEFFECTS_H_
+
+//------------------------------------------------------------------------
+// LclVarSet:
+// Represents a set of lclVars. Optimized for the case that the set
+// never holds more than a single element. This type is used internally
+// by `AliasSet` to track the sets of lclVars that are read and
+// written for a given alias set.
+//
+class LclVarSet final
+{
+ union {
+ hashBv* m_bitVector;
+ unsigned m_lclNum;
+ };
+
+ bool m_hasAnyLcl;
+ bool m_hasBitVector;
+
+public:
+ LclVarSet();
+
+ inline bool IsEmpty() const
+ {
+ return !m_hasAnyLcl || !m_hasBitVector || !m_bitVector->anySet();
+ }
+
+ void Add(Compiler* compiler, unsigned lclNum);
+ bool Intersects(const LclVarSet& other) const;
+ bool Contains(unsigned lclNum) const;
+ void Clear();
+};
+
+//------------------------------------------------------------------------
+// AliasSet:
+// Represents a set of reads and writes for the purposes of alias
+// analysis. This type partitions storage into two categories:
+// lclVars and addressable locations. The definition of the former is
+// intuitive. The latter is the union of the set of address-exposed
+// lclVars with the set of all other memory locations. Any memory
+// access is assumed to alias any other memory access.
+//
+class AliasSet final
+{
+ LclVarSet m_lclVarReads;
+ LclVarSet m_lclVarWrites;
+
+ bool m_readsAddressableLocation;
+ bool m_writesAddressableLocation;
+
+public:
+ //------------------------------------------------------------------------
+ // AliasSet::NodeInfo:
+ // Represents basic alias information for a single IR node.
+ //
+ class NodeInfo final
+ {
+ enum : unsigned
+ {
+ ALIAS_NONE = 0x0,
+ ALIAS_READS_ADDRESSABLE_LOCATION = 0x1,
+ ALIAS_WRITES_ADDRESSABLE_LOCATION = 0x2,
+ ALIAS_READS_LCL_VAR = 0x4,
+ ALIAS_WRITES_LCL_VAR = 0x8
+ };
+
+ Compiler* m_compiler;
+ GenTree* m_node;
+ unsigned m_flags;
+ unsigned m_lclNum;
+
+ public:
+ NodeInfo(Compiler* compiler, GenTree* node);
+
+ inline Compiler* TheCompiler() const
+ {
+ return m_compiler;
+ }
+
+ inline GenTree* Node() const
+ {
+ return m_node;
+ }
+
+ inline bool ReadsAddressableLocation() const
+ {
+ return (m_flags & ALIAS_READS_ADDRESSABLE_LOCATION) != 0;
+ }
+
+ inline bool WritesAddressableLocation() const
+ {
+ return (m_flags & ALIAS_WRITES_ADDRESSABLE_LOCATION) != 0;
+ }
+
+ inline bool IsLclVarRead() const
+ {
+ return (m_flags & ALIAS_READS_LCL_VAR) != 0;
+ }
+
+ inline bool IsLclVarWrite() const
+ {
+ return (m_flags & ALIAS_WRITES_LCL_VAR) != 0;
+ }
+
+ inline unsigned LclNum() const
+ {
+ assert(IsLclVarRead() || IsLclVarWrite());
+ return m_lclNum;
+ }
+
+ inline bool WritesAnyLocation() const
+ {
+ return (m_flags & (ALIAS_WRITES_ADDRESSABLE_LOCATION | ALIAS_WRITES_LCL_VAR)) != 0;
+ }
+ };
+
+ AliasSet();
+
+ inline bool WritesAnyLocation() const
+ {
+ return m_writesAddressableLocation || !m_lclVarWrites.IsEmpty();
+ }
+
+ void AddNode(Compiler* compiler, GenTree* node);
+ bool InterferesWith(const AliasSet& other) const;
+ bool InterferesWith(const NodeInfo& node) const;
+ void Clear();
+};
+
+//------------------------------------------------------------------------
+// SideEffectSet:
+// Represents a set of side effects for the purposes of analyzing code
+// motion.
+//
+class SideEffectSet final
+{
+ unsigned m_sideEffectFlags; // A mask of GTF_* flags that represents exceptional and barrier side effects.
+ AliasSet m_aliasSet; // An AliasSet that represents read and write side effects.
+
+ template <typename TOtherAliasInfo>
+ bool InterferesWith(unsigned otherSideEffectFlags, const TOtherAliasInfo& otherAliasInfo, bool strict) const;
+
+public:
+ SideEffectSet();
+ SideEffectSet(Compiler* compiler, GenTree* node);
+
+ void AddNode(Compiler* compiler, GenTree* node);
+ bool InterferesWith(const SideEffectSet& other, bool strict) const;
+ bool InterferesWith(Compiler* compiler, GenTree* node, bool strict) const;
+ void Clear();
+};
+
+#endif // _SIDEEFFECTS_H_
diff --git a/src/jit/simd.cpp b/src/jit/simd.cpp
new file mode 100644
index 0000000000..1f0c867b55
--- /dev/null
+++ b/src/jit/simd.cpp
@@ -0,0 +1,2556 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+//
+// SIMD Support
+//
+// IMPORTANT NOTES AND CAVEATS:
+//
+// This implementation is preliminary, and may change dramatically.
+//
+// New JIT types, TYP_SIMDxx, are introduced, and the SIMD intrinsics are created as GT_SIMD nodes.
+// Nodes of SIMD types will be typed as TYP_SIMD* (e.g. TYP_SIMD8, TYP_SIMD16, etc.).
+//
+// Note that currently the "reference implementation" is the same as the runtime dll. As such, it is currently
+// providing implementations for those methods not currently supported by the JIT as intrinsics.
+//
+// These are currently recognized using string compares, in order to provide an implementation in the JIT
+// without taking a dependency on the VM.
+// Furthermore, in the CTP, in order to limit the impact of doing these string compares
+// against assembly names, we only look for the SIMDVector assembly if we are compiling a class constructor. This
+// makes it somewhat more "pay for play" but is a significant usability compromise.
+// This has been addressed for RTM by doing the assembly recognition in the VM.
+// --------------------------------------------------------------------------------------
+
+#include "jitpch.h"
+#include "simd.h"
+
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+#ifndef LEGACY_BACKEND // This file is ONLY used for the RyuJIT backend that uses the linear scan register allocator.
+
+#ifdef FEATURE_SIMD
+
+// Intrinsic Id to intrinsic info map
+const SIMDIntrinsicInfo simdIntrinsicInfoArray[] = {
+#define SIMD_INTRINSIC(mname, inst, id, name, retType, argCount, arg1, arg2, arg3, t1, t2, t3, t4, t5, t6, t7, t8, t9, \
+ t10) \
+ {SIMDIntrinsic##id, mname, inst, retType, argCount, arg1, arg2, arg3, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10},
+#include "simdintrinsiclist.h"
+};
+
+//------------------------------------------------------------------------
+// getSIMDVectorLength: Get the length (number of elements of base type) of
+// SIMD Vector given its size and base (element) type.
+//
+// Arguments:
+// simdSize - size of the SIMD vector
+// baseType - type of the elements of the SIMD vector
+//
+// static
+int Compiler::getSIMDVectorLength(unsigned simdSize, var_types baseType)
+{
+ return simdSize / genTypeSize(baseType);
+}
+
+//------------------------------------------------------------------------
+// Get the length (number of elements of base type) of SIMD Vector given by typeHnd.
+//
+// Arguments:
+// typeHnd - type handle of the SIMD vector
+//
+int Compiler::getSIMDVectorLength(CORINFO_CLASS_HANDLE typeHnd)
+{
+ unsigned sizeBytes = 0;
+ var_types baseType = getBaseTypeAndSizeOfSIMDType(typeHnd, &sizeBytes);
+ return getSIMDVectorLength(sizeBytes, baseType);
+}
+
+//------------------------------------------------------------------------
+// Get the preferred alignment of SIMD vector type for better performance.
+//
+// Arguments:
+// typeHnd - type handle of the SIMD vector
+//
+int Compiler::getSIMDTypeAlignment(var_types simdType)
+{
+#ifdef _TARGET_AMD64_
+ // Fixed length vectors have the following alignment preference
+ // Vector2/3 = 8 byte alignment
+ // Vector4 = 16-byte alignment
+ unsigned size = genTypeSize(simdType);
+
+ // preferred alignment for SSE2 128-bit vectors is 16-bytes
+ if (size == 8)
+ {
+ return 8;
+ }
+
+ // As per Intel manual, AVX vectors preferred alignment is 32-bytes but on Amd64
+ // RSP/EBP is aligned at 16-bytes, therefore to align SIMD types at 32-bytes we need even
+ // RSP/EBP to be 32-byte aligned. It is not clear whether additional stack space used in
+ // aligning stack is worth the benefit and for now will use 16-byte alignment for AVX
+ // 256-bit vectors with unaligned load/stores to/from memory.
+ return 16;
+#else
+ assert(!"getSIMDTypeAlignment() unimplemented on target arch");
+ unreached();
+#endif
+}
+
+//----------------------------------------------------------------------------------
+// Return the base type and size of SIMD vector type given its type handle.
+//
+// Arguments:
+// typeHnd - The handle of the type we're interested in.
+// sizeBytes - out param
+//
+// Return Value:
+// base type of SIMD vector.
+// sizeBytes if non-null is set to size in bytes.
+//
+// TODO-Throughput: current implementation parses class name to find base type. Change
+// this when we implement SIMD intrinsic identification for the final
+// product.
+//
+var_types Compiler::getBaseTypeAndSizeOfSIMDType(CORINFO_CLASS_HANDLE typeHnd, unsigned* sizeBytes /*= nullptr */)
+{
+ assert(featureSIMD);
+ if (typeHnd == nullptr)
+ {
+ return TYP_UNKNOWN;
+ }
+
+ // fast path search using cached type handles of important types
+ var_types simdBaseType = TYP_UNKNOWN;
+ unsigned size = 0;
+
+ // Early return if it is not a SIMD module.
+ if (!isSIMDClass(typeHnd))
+ {
+ return TYP_UNKNOWN;
+ }
+
+ // The most likely to be used type handles are looked up first followed by
+ // less likely to be used type handles
+ if (typeHnd == SIMDFloatHandle)
+ {
+ simdBaseType = TYP_FLOAT;
+ JITDUMP(" Known type SIMD Vector<Float>\n");
+ }
+ else if (typeHnd == SIMDIntHandle)
+ {
+ simdBaseType = TYP_INT;
+ JITDUMP(" Known type SIMD Vector<Int>\n");
+ }
+ else if (typeHnd == SIMDVector2Handle)
+ {
+ simdBaseType = TYP_FLOAT;
+ size = 2 * genTypeSize(TYP_FLOAT);
+ assert(size == roundUp(info.compCompHnd->getClassSize(typeHnd), TARGET_POINTER_SIZE));
+ JITDUMP(" Known type Vector2\n");
+ }
+ else if (typeHnd == SIMDVector3Handle)
+ {
+ simdBaseType = TYP_FLOAT;
+ size = 3 * genTypeSize(TYP_FLOAT);
+ assert(size == info.compCompHnd->getClassSize(typeHnd));
+ JITDUMP(" Known type Vector3\n");
+ }
+ else if (typeHnd == SIMDVector4Handle)
+ {
+ simdBaseType = TYP_FLOAT;
+ size = 4 * genTypeSize(TYP_FLOAT);
+ assert(size == roundUp(info.compCompHnd->getClassSize(typeHnd), TARGET_POINTER_SIZE));
+ JITDUMP(" Known type Vector4\n");
+ }
+ else if (typeHnd == SIMDVectorHandle)
+ {
+ JITDUMP(" Known type Vector\n");
+ }
+ else if (typeHnd == SIMDUShortHandle)
+ {
+ simdBaseType = TYP_CHAR;
+ JITDUMP(" Known type SIMD Vector<ushort>\n");
+ }
+ else if (typeHnd == SIMDUByteHandle)
+ {
+ simdBaseType = TYP_UBYTE;
+ JITDUMP(" Known type SIMD Vector<ubyte>\n");
+ }
+ else if (typeHnd == SIMDDoubleHandle)
+ {
+ simdBaseType = TYP_DOUBLE;
+ JITDUMP(" Known type SIMD Vector<Double>\n");
+ }
+ else if (typeHnd == SIMDLongHandle)
+ {
+ simdBaseType = TYP_LONG;
+ JITDUMP(" Known type SIMD Vector<Long>\n");
+ }
+ else if (typeHnd == SIMDShortHandle)
+ {
+ simdBaseType = TYP_SHORT;
+ JITDUMP(" Known type SIMD Vector<short>\n");
+ }
+ else if (typeHnd == SIMDByteHandle)
+ {
+ simdBaseType = TYP_BYTE;
+ JITDUMP(" Known type SIMD Vector<byte>\n");
+ }
+ else if (typeHnd == SIMDUIntHandle)
+ {
+ simdBaseType = TYP_UINT;
+ JITDUMP(" Known type SIMD Vector<uint>\n");
+ }
+ else if (typeHnd == SIMDULongHandle)
+ {
+ simdBaseType = TYP_ULONG;
+ JITDUMP(" Known type SIMD Vector<ulong>\n");
+ }
+
+ // slow path search
+ if (simdBaseType == TYP_UNKNOWN)
+ {
+ // Doesn't match with any of the cached type handles.
+ // Obtain base type by parsing fully qualified class name.
+ //
+ // TODO-Throughput: implement product shipping solution to query base type.
+ WCHAR className[256] = {0};
+ WCHAR* pbuf = &className[0];
+ int len = sizeof(className) / sizeof(className[0]);
+ info.compCompHnd->appendClassName(&pbuf, &len, typeHnd, TRUE, FALSE, FALSE);
+ noway_assert(pbuf < &className[256]);
+ JITDUMP("SIMD Candidate Type %S\n", className);
+
+ if (wcsncmp(className, W("System.Numerics."), 16) == 0)
+ {
+ if (wcsncmp(&(className[16]), W("Vector`1["), 9) == 0)
+ {
+ if (wcsncmp(&(className[25]), W("System.Single"), 13) == 0)
+ {
+ SIMDFloatHandle = typeHnd;
+ simdBaseType = TYP_FLOAT;
+ JITDUMP(" Found type SIMD Vector<Float>\n");
+ }
+ else if (wcsncmp(&(className[25]), W("System.Int32"), 12) == 0)
+ {
+ SIMDIntHandle = typeHnd;
+ simdBaseType = TYP_INT;
+ JITDUMP(" Found type SIMD Vector<Int>\n");
+ }
+ else if (wcsncmp(&(className[25]), W("System.UInt16"), 13) == 0)
+ {
+ SIMDUShortHandle = typeHnd;
+ simdBaseType = TYP_CHAR;
+ JITDUMP(" Found type SIMD Vector<ushort>\n");
+ }
+ else if (wcsncmp(&(className[25]), W("System.Byte"), 11) == 0)
+ {
+ SIMDUByteHandle = typeHnd;
+ simdBaseType = TYP_UBYTE;
+ JITDUMP(" Found type SIMD Vector<ubyte>\n");
+ }
+ else if (wcsncmp(&(className[25]), W("System.Double"), 13) == 0)
+ {
+ SIMDDoubleHandle = typeHnd;
+ simdBaseType = TYP_DOUBLE;
+ JITDUMP(" Found type SIMD Vector<Double>\n");
+ }
+ else if (wcsncmp(&(className[25]), W("System.Int64"), 12) == 0)
+ {
+ SIMDLongHandle = typeHnd;
+ simdBaseType = TYP_LONG;
+ JITDUMP(" Found type SIMD Vector<Long>\n");
+ }
+ else if (wcsncmp(&(className[25]), W("System.Int16"), 12) == 0)
+ {
+ SIMDShortHandle = typeHnd;
+ simdBaseType = TYP_SHORT;
+ JITDUMP(" Found type SIMD Vector<short>\n");
+ }
+ else if (wcsncmp(&(className[25]), W("System.SByte"), 12) == 0)
+ {
+ SIMDByteHandle = typeHnd;
+ simdBaseType = TYP_BYTE;
+ JITDUMP(" Found type SIMD Vector<byte>\n");
+ }
+ else if (wcsncmp(&(className[25]), W("System.UInt32"), 13) == 0)
+ {
+ SIMDUIntHandle = typeHnd;
+ simdBaseType = TYP_UINT;
+ JITDUMP(" Found type SIMD Vector<uint>\n");
+ }
+ else if (wcsncmp(&(className[25]), W("System.UInt64"), 13) == 0)
+ {
+ SIMDULongHandle = typeHnd;
+ simdBaseType = TYP_ULONG;
+ JITDUMP(" Found type SIMD Vector<ulong>\n");
+ }
+ else
+ {
+ JITDUMP(" Unknown SIMD Vector<T>\n");
+ }
+ }
+ else if (wcsncmp(&(className[16]), W("Vector2"), 8) == 0)
+ {
+ SIMDVector2Handle = typeHnd;
+
+ simdBaseType = TYP_FLOAT;
+ size = 2 * genTypeSize(TYP_FLOAT);
+ assert(size == roundUp(info.compCompHnd->getClassSize(typeHnd), TARGET_POINTER_SIZE));
+ JITDUMP(" Found Vector2\n");
+ }
+ else if (wcsncmp(&(className[16]), W("Vector3"), 8) == 0)
+ {
+ SIMDVector3Handle = typeHnd;
+
+ simdBaseType = TYP_FLOAT;
+ size = 3 * genTypeSize(TYP_FLOAT);
+ assert(size == info.compCompHnd->getClassSize(typeHnd));
+ JITDUMP(" Found Vector3\n");
+ }
+ else if (wcsncmp(&(className[16]), W("Vector4"), 8) == 0)
+ {
+ SIMDVector4Handle = typeHnd;
+
+ simdBaseType = TYP_FLOAT;
+ size = 4 * genTypeSize(TYP_FLOAT);
+ assert(size == roundUp(info.compCompHnd->getClassSize(typeHnd), TARGET_POINTER_SIZE));
+ JITDUMP(" Found Vector4\n");
+ }
+ else if (wcsncmp(&(className[16]), W("Vector"), 6) == 0)
+ {
+ SIMDVectorHandle = typeHnd;
+ JITDUMP(" Found type Vector\n");
+ }
+ else
+ {
+ JITDUMP(" Unknown SIMD Type\n");
+ }
+ }
+ }
+
+ if (simdBaseType != TYP_UNKNOWN && sizeBytes != nullptr)
+ {
+ // If not a fixed size vector then its size is same as SIMD vector
+ // register length in bytes
+ if (size == 0)
+ {
+ size = getSIMDVectorRegisterByteLength();
+ }
+
+ *sizeBytes = size;
+ }
+
+ return simdBaseType;
+}
+
+//--------------------------------------------------------------------------------------
+// getSIMDIntrinsicInfo: get SIMD intrinsic info given the method handle.
+//
+// Arguments:
+// inOutTypeHnd - The handle of the type on which the method is invoked. This is an in-out param.
+// methodHnd - The handle of the method we're interested in.
+// sig - method signature info
+// isNewObj - whether this call represents a newboj constructor call
+// argCount - argument count - out pram
+// baseType - base type of the intrinsic - out param
+// sizeBytes - size of SIMD vector type on which the method is invoked - out param
+//
+// Return Value:
+// SIMDIntrinsicInfo struct initialized corresponding to methodHnd.
+// Sets SIMDIntrinsicInfo.id to SIMDIntrinsicInvalid if methodHnd doesn't correspond
+// to any SIMD intrinsic. Also, sets the out params inOutTypeHnd, argCount, baseType and
+// sizeBytes.
+//
+// Note that VectorMath class doesn't have a base type and first argument of the method
+// determines the SIMD vector type on which intrinsic is invoked. In such a case inOutTypeHnd
+// is modified by this routine.
+//
+// TODO-Throughput: The current implementation is based on method name string parsing.
+// Although we now have type identification from the VM, the parsing of intrinsic names
+// could be made more efficient.
+//
+const SIMDIntrinsicInfo* Compiler::getSIMDIntrinsicInfo(CORINFO_CLASS_HANDLE* inOutTypeHnd,
+ CORINFO_METHOD_HANDLE methodHnd,
+ CORINFO_SIG_INFO* sig,
+ bool isNewObj,
+ unsigned* argCount,
+ var_types* baseType,
+ unsigned* sizeBytes)
+{
+ assert(featureSIMD);
+ assert(baseType != nullptr);
+ assert(sizeBytes != nullptr);
+
+ // get baseType and size of the type
+ CORINFO_CLASS_HANDLE typeHnd = *inOutTypeHnd;
+ *baseType = getBaseTypeAndSizeOfSIMDType(typeHnd, sizeBytes);
+
+ bool isHWAcceleratedIntrinsic = false;
+ if (typeHnd == SIMDVectorHandle)
+ {
+ // All of the supported intrinsics on this static class take a first argument that's a vector,
+ // which determines the baseType.
+ // The exception is the IsHardwareAccelerated property, which is handled as a special case.
+ assert(*baseType == TYP_UNKNOWN);
+ if (sig->numArgs == 0)
+ {
+ const SIMDIntrinsicInfo* hwAccelIntrinsicInfo = &(simdIntrinsicInfoArray[SIMDIntrinsicHWAccel]);
+ if ((strcmp(eeGetMethodName(methodHnd, nullptr), hwAccelIntrinsicInfo->methodName) == 0) &&
+ JITtype2varType(sig->retType) == hwAccelIntrinsicInfo->retType)
+ {
+ // Sanity check
+ assert(hwAccelIntrinsicInfo->argCount == 0 && hwAccelIntrinsicInfo->isInstMethod == false);
+ return hwAccelIntrinsicInfo;
+ }
+ return nullptr;
+ }
+ else
+ {
+ typeHnd = info.compCompHnd->getArgClass(sig, sig->args);
+ *inOutTypeHnd = typeHnd;
+ *baseType = getBaseTypeAndSizeOfSIMDType(typeHnd, sizeBytes);
+ }
+ }
+
+ if (*baseType == TYP_UNKNOWN)
+ {
+ JITDUMP("NOT a SIMD Intrinsic: unsupported baseType\n");
+ return nullptr;
+ }
+
+ // account for implicit "this" arg
+ *argCount = sig->numArgs;
+ if (sig->hasThis())
+ {
+ *argCount += 1;
+ }
+
+ // Get the Intrinsic Id by parsing method name.
+ //
+ // TODO-Throughput: replace sequential search by binary search by arranging entries
+ // sorted by method name.
+ SIMDIntrinsicID intrinsicId = SIMDIntrinsicInvalid;
+ const char* methodName = eeGetMethodName(methodHnd, nullptr);
+ for (int i = SIMDIntrinsicNone + 1; i < SIMDIntrinsicInvalid; ++i)
+ {
+ if (strcmp(methodName, simdIntrinsicInfoArray[i].methodName) == 0)
+ {
+ // Found an entry for the method; further check whether it is one of
+ // the supported base types.
+ bool found = false;
+ for (int j = 0; j < SIMD_INTRINSIC_MAX_BASETYPE_COUNT; ++j)
+ {
+ // Convention: if there are fewer base types supported than MAX_BASETYPE_COUNT,
+ // the end of the list is marked by TYP_UNDEF.
+ if (simdIntrinsicInfoArray[i].supportedBaseTypes[j] == TYP_UNDEF)
+ {
+ break;
+ }
+
+ if (simdIntrinsicInfoArray[i].supportedBaseTypes[j] == *baseType)
+ {
+ found = true;
+ break;
+ }
+ }
+
+ if (!found)
+ {
+ continue;
+ }
+
+ // Now, check the arguments.
+ unsigned int fixedArgCnt = simdIntrinsicInfoArray[i].argCount;
+ unsigned int expectedArgCnt = fixedArgCnt;
+
+ // First handle SIMDIntrinsicInitN, where the arg count depends on the type.
+ // The listed arg types include the vector and the first two init values, which is the expected number
+ // for Vector2. For other cases, we'll check their types here.
+ if (*argCount > expectedArgCnt)
+ {
+ if (i == SIMDIntrinsicInitN)
+ {
+ if (*argCount == 3 && typeHnd == SIMDVector2Handle)
+ {
+ expectedArgCnt = 3;
+ }
+ else if (*argCount == 4 && typeHnd == SIMDVector3Handle)
+ {
+ expectedArgCnt = 4;
+ }
+ else if (*argCount == 5 && typeHnd == SIMDVector4Handle)
+ {
+ expectedArgCnt = 5;
+ }
+ }
+ else if (i == SIMDIntrinsicInitFixed)
+ {
+ if (*argCount == 4 && typeHnd == SIMDVector4Handle)
+ {
+ expectedArgCnt = 4;
+ }
+ }
+ }
+ if (*argCount != expectedArgCnt)
+ {
+ continue;
+ }
+
+ // Validate the types of individual args passed are what is expected of.
+ // If any of the types don't match with what is expected, don't consider
+ // as an intrinsic. This will make an older JIT with SIMD capabilities
+ // resilient to breaking changes to SIMD managed API.
+ //
+ // Note that from IL type stack, args get popped in right to left order
+ // whereas args get listed in method signatures in left to right order.
+
+ int stackIndex = (expectedArgCnt - 1);
+
+ // Track the arguments from the signature - we currently only use this to distinguish
+ // integral and pointer types, both of which will by TYP_I_IMPL on the importer stack.
+ CORINFO_ARG_LIST_HANDLE argLst = sig->args;
+
+ CORINFO_CLASS_HANDLE argClass;
+ for (unsigned int argIndex = 0; found == true && argIndex < expectedArgCnt; argIndex++)
+ {
+ bool isThisPtr = ((argIndex == 0) && sig->hasThis());
+
+ // In case of "newobj SIMDVector<T>(T val)", thisPtr won't be present on type stack.
+ // We don't check anything in that case.
+ if (!isThisPtr || !isNewObj)
+ {
+ GenTreePtr arg = impStackTop(stackIndex).val;
+
+ var_types expectedArgType;
+ if (argIndex < fixedArgCnt)
+ {
+ // Convention:
+ // - intrinsicInfo.argType[i] == TYP_UNDEF - intrinsic doesn't have a valid arg at position i
+ // - intrinsicInfo.argType[i] == TYP_UNKNOWN - arg type should be same as basetype
+ // Note that we pop the args off in reverse order.
+ expectedArgType = simdIntrinsicInfoArray[i].argType[argIndex];
+ assert(expectedArgType != TYP_UNDEF);
+ if (expectedArgType == TYP_UNKNOWN)
+ {
+ // The type of the argument will be genActualType(*baseType).
+ expectedArgType = genActualType(*baseType);
+ }
+ }
+ else
+ {
+ expectedArgType = *baseType;
+ }
+
+ var_types argType = arg->TypeGet();
+ if (!isThisPtr && argType == TYP_I_IMPL)
+ {
+ // The reference implementation has a constructor that takes a pointer.
+ // We don't want to recognize that one. This requires us to look at the CorInfoType
+ // in order to distinguish a signature with a pointer argument from one with an
+ // integer argument of pointer size, both of which will be TYP_I_IMPL on the stack.
+ // TODO-Review: This seems quite fragile. We should consider beefing up the checking
+ // here.
+ CorInfoType corType = strip(info.compCompHnd->getArgType(sig, argLst, &argClass));
+ if (corType == CORINFO_TYPE_PTR)
+ {
+ found = false;
+ }
+ }
+
+ if (varTypeIsSIMD(argType))
+ {
+ argType = TYP_STRUCT;
+ }
+ if (argType != expectedArgType)
+ {
+ found = false;
+ }
+ }
+ if (argIndex != 0 || !sig->hasThis())
+ {
+ argLst = info.compCompHnd->getArgNext(argLst);
+ }
+ stackIndex--;
+ }
+
+ // Cross check return type and static vs. instance is what we are expecting.
+ // If not, don't consider it as an intrinsic.
+ // Note that ret type of TYP_UNKNOWN means that it is not known apriori and must be same as baseType
+ if (found)
+ {
+ var_types expectedRetType = simdIntrinsicInfoArray[i].retType;
+ if (expectedRetType == TYP_UNKNOWN)
+ {
+ // JIT maps uint/ulong type vars to TYP_INT/TYP_LONG.
+ expectedRetType =
+ (*baseType == TYP_UINT || *baseType == TYP_ULONG) ? genActualType(*baseType) : *baseType;
+ }
+
+ if (JITtype2varType(sig->retType) != expectedRetType ||
+ sig->hasThis() != simdIntrinsicInfoArray[i].isInstMethod)
+ {
+ found = false;
+ }
+ }
+
+ if (found)
+ {
+ intrinsicId = (SIMDIntrinsicID)i;
+ break;
+ }
+ }
+ }
+
+ if (intrinsicId != SIMDIntrinsicInvalid)
+ {
+ JITDUMP("Method %s maps to SIMD intrinsic %s\n", methodName, simdIntrinsicNames[intrinsicId]);
+ return &simdIntrinsicInfoArray[intrinsicId];
+ }
+ else
+ {
+ JITDUMP("Method %s is NOT a SIMD intrinsic\n", methodName);
+ }
+
+ return nullptr;
+}
+
+// Pops and returns GenTree node from importer's type stack.
+// Normalizes TYP_STRUCT value in case of GT_CALL, GT_RET_EXPR and arg nodes.
+//
+// Arguments:
+// type - the type of value that the caller expects to be popped off the stack.
+// expectAddr - if true indicates we are expecting type stack entry to be a TYP_BYREF.
+//
+// Notes:
+// If the popped value is a struct, and the expected type is a simd type, it will be set
+// to that type, otherwise it will assert if the type being popped is not the expected type.
+
+GenTreePtr Compiler::impSIMDPopStack(var_types type, bool expectAddr)
+{
+ StackEntry se = impPopStack();
+ typeInfo ti = se.seTypeInfo;
+ GenTreePtr tree = se.val;
+
+ // If expectAddr is true implies what we have on stack is address and we need
+ // SIMD type struct that it points to.
+ if (expectAddr)
+ {
+ assert(tree->TypeGet() == TYP_BYREF);
+ if (tree->OperGet() == GT_ADDR)
+ {
+ tree = tree->gtGetOp1();
+ }
+ else
+ {
+ tree = gtNewOperNode(GT_IND, type, tree);
+ }
+ }
+
+ bool isParam = false;
+
+ // If we have a ldobj of a SIMD local we need to transform it.
+ if (tree->OperGet() == GT_OBJ)
+ {
+ GenTree* addr = tree->gtOp.gtOp1;
+ if ((addr->OperGet() == GT_ADDR) && isSIMDTypeLocal(addr->gtOp.gtOp1))
+ {
+ tree = addr->gtOp.gtOp1;
+ }
+ }
+
+ if (tree->OperGet() == GT_LCL_VAR)
+ {
+ unsigned lclNum = tree->AsLclVarCommon()->GetLclNum();
+ LclVarDsc* lclVarDsc = &lvaTable[lclNum];
+ isParam = lclVarDsc->lvIsParam;
+ }
+
+ // normalize TYP_STRUCT value
+ if (varTypeIsStruct(tree) && ((tree->OperGet() == GT_RET_EXPR) || (tree->OperGet() == GT_CALL) || isParam))
+ {
+ assert(ti.IsType(TI_STRUCT));
+ CORINFO_CLASS_HANDLE structType = ti.GetClassHandleForValueClass();
+ tree = impNormStructVal(tree, structType, (unsigned)CHECK_SPILL_ALL);
+ }
+
+ // Now set the type of the tree to the specialized SIMD struct type, if applicable.
+ if (genActualType(tree->gtType) != genActualType(type))
+ {
+ assert(tree->gtType == TYP_STRUCT);
+ tree->gtType = type;
+ }
+ else if (tree->gtType == TYP_BYREF)
+ {
+ assert(tree->IsLocal() || (tree->gtOper == GT_ADDR) && varTypeIsSIMD(tree->gtGetOp1()));
+ }
+
+ return tree;
+}
+
+// impSIMDGetFixed: Create a GT_SIMD tree for a Get property of SIMD vector with a fixed index.
+//
+// Arguments:
+// baseType - The base (element) type of the SIMD vector.
+// simdSize - The total size in bytes of the SIMD vector.
+// index - The index of the field to get.
+//
+// Return Value:
+// Returns a GT_SIMD node with the SIMDIntrinsicGetItem intrinsic id.
+//
+GenTreeSIMD* Compiler::impSIMDGetFixed(var_types simdType, var_types baseType, unsigned simdSize, int index)
+{
+ assert(simdSize >= ((index + 1) * genTypeSize(baseType)));
+
+ // op1 is a SIMD source.
+ GenTree* op1 = impSIMDPopStack(simdType, true);
+
+ GenTree* op2 = gtNewIconNode(index);
+ GenTreeSIMD* simdTree = gtNewSIMDNode(baseType, op1, op2, SIMDIntrinsicGetItem, baseType, simdSize);
+ return simdTree;
+}
+
+#ifdef _TARGET_AMD64_
+// impSIMDLongRelOpEqual: transforms operands and returns the SIMD intrinsic to be applied on
+// transformed operands to obtain == comparison result.
+//
+// Argumens:
+// typeHnd - type handle of SIMD vector
+// size - SIMD vector size
+// op1 - in-out parameter; first operand
+// op2 - in-out parameter; second operand
+//
+// Return Value:
+// Modifies in-out params op1, op2 and returns intrinsic ID to be applied to modified operands
+//
+SIMDIntrinsicID Compiler::impSIMDLongRelOpEqual(CORINFO_CLASS_HANDLE typeHnd,
+ unsigned size,
+ GenTree** pOp1,
+ GenTree** pOp2)
+{
+ var_types simdType = (*pOp1)->TypeGet();
+ assert(varTypeIsSIMD(simdType) && ((*pOp2)->TypeGet() == simdType));
+
+ // There is no direct SSE2 support for comparing TYP_LONG vectors.
+ // These have to be implemented in terms of TYP_INT vector comparison operations.
+ //
+ // Equality(v1, v2):
+ // tmp = (v1 == v2) i.e. compare for equality as if v1 and v2 are vector<int>
+ // result = BitwiseAnd(t, shuffle(t, (2, 3, 1 0)))
+ // Shuffle is meant to swap the comparison results of low-32-bits and high 32-bits of respective long elements.
+
+ // Compare vector<long> as if they were vector<int> and assign the result to a temp
+ GenTree* compResult = gtNewSIMDNode(simdType, *pOp1, *pOp2, SIMDIntrinsicEqual, TYP_INT, size);
+ unsigned lclNum = lvaGrabTemp(true DEBUGARG("SIMD Long =="));
+ lvaSetStruct(lclNum, typeHnd, false);
+ GenTree* tmp = gtNewLclvNode(lclNum, simdType);
+ GenTree* asg = gtNewTempAssign(lclNum, compResult);
+
+ // op1 = GT_COMMA(tmp=compResult, tmp)
+ // op2 = Shuffle(tmp, 0xB1)
+ // IntrinsicId = BitwiseAnd
+ *pOp1 = gtNewOperNode(GT_COMMA, simdType, asg, tmp);
+ *pOp2 = gtNewSIMDNode(simdType, gtNewLclvNode(lclNum, simdType), gtNewIconNode(SHUFFLE_ZWYX, TYP_INT),
+ SIMDIntrinsicShuffleSSE2, TYP_INT, size);
+ return SIMDIntrinsicBitwiseAnd;
+}
+
+// impSIMDLongRelOpGreaterThan: transforms operands and returns the SIMD intrinsic to be applied on
+// transformed operands to obtain > comparison result.
+//
+// Argumens:
+// typeHnd - type handle of SIMD vector
+// size - SIMD vector size
+// pOp1 - in-out parameter; first operand
+// pOp2 - in-out parameter; second operand
+//
+// Return Value:
+// Modifies in-out params pOp1, pOp2 and returns intrinsic ID to be applied to modified operands
+//
+SIMDIntrinsicID Compiler::impSIMDLongRelOpGreaterThan(CORINFO_CLASS_HANDLE typeHnd,
+ unsigned size,
+ GenTree** pOp1,
+ GenTree** pOp2)
+{
+ var_types simdType = (*pOp1)->TypeGet();
+ assert(varTypeIsSIMD(simdType) && ((*pOp2)->TypeGet() == simdType));
+
+ // GreaterThan(v1, v2) where v1 and v2 are vector long.
+ // Let us consider the case of single long element comparison.
+ // say L1 = (x1, y1) and L2 = (x2, y2) where x1, y1, x2, and y2 are 32-bit integers that comprise the longs L1 and
+ // L2.
+ //
+ // GreaterThan(L1, L2) can be expressed in terms of > relationship between 32-bit integers that comprise L1 and L2
+ // as
+ // = (x1, y1) > (x2, y2)
+ // = (x1 > x2) || [(x1 == x2) && (y1 > y2)] - eq (1)
+ //
+ // t = (v1 > v2) 32-bit signed comparison
+ // u = (v1 == v2) 32-bit sized element equality
+ // v = (v1 > v2) 32-bit unsigned comparison
+ //
+ // z = shuffle(t, (3, 3, 1, 1)) - This corresponds to (x1 > x2) in eq(1) above
+ // t1 = Shuffle(v, (2, 2, 0, 0)) - This corresponds to (y1 > y2) in eq(1) above
+ // u1 = Shuffle(u, (3, 3, 1, 1)) - This corresponds to (x1 == x2) in eq(1) above
+ // w = And(t1, u1) - This corresponds to [(x1 == x2) && (y1 > y2)] in eq(1) above
+ // Result = BitwiseOr(z, w)
+
+ // Since op1 and op2 gets used multiple times, make sure side effects are computed.
+ GenTree* dupOp1 = nullptr;
+ GenTree* dupOp2 = nullptr;
+ GenTree* dupDupOp1 = nullptr;
+ GenTree* dupDupOp2 = nullptr;
+
+ if (((*pOp1)->gtFlags & GTF_SIDE_EFFECT) != 0)
+ {
+ dupOp1 = fgInsertCommaFormTemp(pOp1, typeHnd);
+ dupDupOp1 = gtNewLclvNode(dupOp1->AsLclVarCommon()->GetLclNum(), simdType);
+ }
+ else
+ {
+ dupOp1 = gtCloneExpr(*pOp1);
+ dupDupOp1 = gtCloneExpr(*pOp1);
+ }
+
+ if (((*pOp2)->gtFlags & GTF_SIDE_EFFECT) != 0)
+ {
+ dupOp2 = fgInsertCommaFormTemp(pOp2, typeHnd);
+ dupDupOp2 = gtNewLclvNode(dupOp2->AsLclVarCommon()->GetLclNum(), simdType);
+ }
+ else
+ {
+ dupOp2 = gtCloneExpr(*pOp2);
+ dupDupOp2 = gtCloneExpr(*pOp2);
+ }
+
+ assert(dupDupOp1 != nullptr && dupDupOp2 != nullptr);
+ assert(dupOp1 != nullptr && dupOp2 != nullptr);
+ assert(*pOp1 != nullptr && *pOp2 != nullptr);
+
+ // v1GreaterThanv2Signed - signed 32-bit comparison
+ GenTree* v1GreaterThanv2Signed = gtNewSIMDNode(simdType, *pOp1, *pOp2, SIMDIntrinsicGreaterThan, TYP_INT, size);
+
+ // v1Equalsv2 - 32-bit equality
+ GenTree* v1Equalsv2 = gtNewSIMDNode(simdType, dupOp1, dupOp2, SIMDIntrinsicEqual, TYP_INT, size);
+
+ // v1GreaterThanv2Unsigned - unsigned 32-bit comparison
+ var_types tempBaseType = TYP_UINT;
+ SIMDIntrinsicID sid = impSIMDRelOp(SIMDIntrinsicGreaterThan, typeHnd, size, &tempBaseType, &dupDupOp1, &dupDupOp2);
+ GenTree* v1GreaterThanv2Unsigned = gtNewSIMDNode(simdType, dupDupOp1, dupDupOp2, sid, tempBaseType, size);
+
+ GenTree* z = gtNewSIMDNode(simdType, v1GreaterThanv2Signed, gtNewIconNode(SHUFFLE_WWYY, TYP_INT),
+ SIMDIntrinsicShuffleSSE2, TYP_FLOAT, size);
+ GenTree* t1 = gtNewSIMDNode(simdType, v1GreaterThanv2Unsigned, gtNewIconNode(SHUFFLE_ZZXX, TYP_INT),
+ SIMDIntrinsicShuffleSSE2, TYP_FLOAT, size);
+ GenTree* u1 = gtNewSIMDNode(simdType, v1Equalsv2, gtNewIconNode(SHUFFLE_WWYY, TYP_INT), SIMDIntrinsicShuffleSSE2,
+ TYP_FLOAT, size);
+ GenTree* w = gtNewSIMDNode(simdType, u1, t1, SIMDIntrinsicBitwiseAnd, TYP_INT, size);
+
+ *pOp1 = z;
+ *pOp2 = w;
+ return SIMDIntrinsicBitwiseOr;
+}
+
+// impSIMDLongRelOpGreaterThanOrEqual: transforms operands and returns the SIMD intrinsic to be applied on
+// transformed operands to obtain >= comparison result.
+//
+// Argumens:
+// typeHnd - type handle of SIMD vector
+// size - SIMD vector size
+// pOp1 - in-out parameter; first operand
+// pOp2 - in-out parameter; second operand
+//
+// Return Value:
+// Modifies in-out params pOp1, pOp2 and returns intrinsic ID to be applied to modified operands
+//
+SIMDIntrinsicID Compiler::impSIMDLongRelOpGreaterThanOrEqual(CORINFO_CLASS_HANDLE typeHnd,
+ unsigned size,
+ GenTree** pOp1,
+ GenTree** pOp2)
+{
+ var_types simdType = (*pOp1)->TypeGet();
+ assert(varTypeIsSIMD(simdType) && ((*pOp2)->TypeGet() == simdType));
+
+ // expand this to (a == b) | (a > b)
+ GenTree* dupOp1 = nullptr;
+ GenTree* dupOp2 = nullptr;
+
+ if (((*pOp1)->gtFlags & GTF_SIDE_EFFECT) != 0)
+ {
+ dupOp1 = fgInsertCommaFormTemp(pOp1, typeHnd);
+ }
+ else
+ {
+ dupOp1 = gtCloneExpr(*pOp1);
+ }
+
+ if (((*pOp2)->gtFlags & GTF_SIDE_EFFECT) != 0)
+ {
+ dupOp2 = fgInsertCommaFormTemp(pOp2, typeHnd);
+ }
+ else
+ {
+ dupOp2 = gtCloneExpr(*pOp2);
+ }
+
+ assert(dupOp1 != nullptr && dupOp2 != nullptr);
+ assert(*pOp1 != nullptr && *pOp2 != nullptr);
+
+ // (a==b)
+ SIMDIntrinsicID id = impSIMDLongRelOpEqual(typeHnd, size, pOp1, pOp2);
+ *pOp1 = gtNewSIMDNode(simdType, *pOp1, *pOp2, id, TYP_LONG, size);
+
+ // (a > b)
+ id = impSIMDLongRelOpGreaterThan(typeHnd, size, &dupOp1, &dupOp2);
+ *pOp2 = gtNewSIMDNode(simdType, dupOp1, dupOp2, id, TYP_LONG, size);
+
+ return SIMDIntrinsicBitwiseOr;
+}
+
+// impSIMDInt32OrSmallIntRelOpGreaterThanOrEqual: transforms operands and returns the SIMD intrinsic to be applied on
+// transformed operands to obtain >= comparison result in case of integer base type vectors
+//
+// Argumens:
+// typeHnd - type handle of SIMD vector
+// size - SIMD vector size
+// baseType - base type of SIMD vector
+// pOp1 - in-out parameter; first operand
+// pOp2 - in-out parameter; second operand
+//
+// Return Value:
+// Modifies in-out params pOp1, pOp2 and returns intrinsic ID to be applied to modified operands
+//
+SIMDIntrinsicID Compiler::impSIMDIntegralRelOpGreaterThanOrEqual(
+ CORINFO_CLASS_HANDLE typeHnd, unsigned size, var_types baseType, GenTree** pOp1, GenTree** pOp2)
+{
+ var_types simdType = (*pOp1)->TypeGet();
+ assert(varTypeIsSIMD(simdType) && ((*pOp2)->TypeGet() == simdType));
+
+ // This routine should be used only for integer base type vectors
+ assert(varTypeIsIntegral(baseType));
+ if ((getSIMDInstructionSet() == InstructionSet_SSE2) && ((baseType == TYP_LONG) || baseType == TYP_UBYTE))
+ {
+ return impSIMDLongRelOpGreaterThanOrEqual(typeHnd, size, pOp1, pOp2);
+ }
+
+ // expand this to (a == b) | (a > b)
+ GenTree* dupOp1 = nullptr;
+ GenTree* dupOp2 = nullptr;
+
+ if (((*pOp1)->gtFlags & GTF_SIDE_EFFECT) != 0)
+ {
+ dupOp1 = fgInsertCommaFormTemp(pOp1, typeHnd);
+ }
+ else
+ {
+ dupOp1 = gtCloneExpr(*pOp1);
+ }
+
+ if (((*pOp2)->gtFlags & GTF_SIDE_EFFECT) != 0)
+ {
+ dupOp2 = fgInsertCommaFormTemp(pOp2, typeHnd);
+ }
+ else
+ {
+ dupOp2 = gtCloneExpr(*pOp2);
+ }
+
+ assert(dupOp1 != nullptr && dupOp2 != nullptr);
+ assert(*pOp1 != nullptr && *pOp2 != nullptr);
+
+ // (a==b)
+ *pOp1 = gtNewSIMDNode(simdType, *pOp1, *pOp2, SIMDIntrinsicEqual, baseType, size);
+
+ // (a > b)
+ *pOp2 = gtNewSIMDNode(simdType, dupOp1, dupOp2, SIMDIntrinsicGreaterThan, baseType, size);
+
+ return SIMDIntrinsicBitwiseOr;
+}
+#endif //_TARGET_AMD64_
+
+// Transforms operands and returns the SIMD intrinsic to be applied on
+// transformed operands to obtain given relop result.
+//
+// Argumens:
+// relOpIntrinsicId - Relational operator SIMD intrinsic
+// typeHnd - type handle of SIMD vector
+// size - SIMD vector size
+// inOutBaseType - base type of SIMD vector
+// pOp1 - in-out parameter; first operand
+// pOp2 - in-out parameter; second operand
+//
+// Return Value:
+// Modifies in-out params pOp1, pOp2, inOutBaseType and returns intrinsic ID to be applied to modified operands
+//
+SIMDIntrinsicID Compiler::impSIMDRelOp(SIMDIntrinsicID relOpIntrinsicId,
+ CORINFO_CLASS_HANDLE typeHnd,
+ unsigned size,
+ var_types* inOutBaseType,
+ GenTree** pOp1,
+ GenTree** pOp2)
+{
+ var_types simdType = (*pOp1)->TypeGet();
+ assert(varTypeIsSIMD(simdType) && ((*pOp2)->TypeGet() == simdType));
+
+ assert(isRelOpSIMDIntrinsic(relOpIntrinsicId));
+
+#ifdef _TARGET_AMD64_
+ SIMDIntrinsicID intrinsicID = relOpIntrinsicId;
+ var_types baseType = *inOutBaseType;
+
+ if (varTypeIsFloating(baseType))
+ {
+ // SSE2/AVX doesn't support > and >= on vector float/double.
+ // Therefore, we need to use < and <= with swapped operands
+ if (relOpIntrinsicId == SIMDIntrinsicGreaterThan || relOpIntrinsicId == SIMDIntrinsicGreaterThanOrEqual)
+ {
+ GenTree* tmp = *pOp1;
+ *pOp1 = *pOp2;
+ *pOp2 = tmp;
+
+ intrinsicID =
+ (relOpIntrinsicId == SIMDIntrinsicGreaterThan) ? SIMDIntrinsicLessThan : SIMDIntrinsicLessThanOrEqual;
+ }
+ }
+ else if (varTypeIsIntegral(baseType))
+ {
+ // SSE/AVX doesn't support < and <= on integer base type vectors.
+ // Therefore, we need to use > and >= with swapped operands.
+ if (intrinsicID == SIMDIntrinsicLessThan || intrinsicID == SIMDIntrinsicLessThanOrEqual)
+ {
+ GenTree* tmp = *pOp1;
+ *pOp1 = *pOp2;
+ *pOp2 = tmp;
+
+ intrinsicID = (relOpIntrinsicId == SIMDIntrinsicLessThan) ? SIMDIntrinsicGreaterThan
+ : SIMDIntrinsicGreaterThanOrEqual;
+ }
+
+ if ((getSIMDInstructionSet() == InstructionSet_SSE2) && baseType == TYP_LONG)
+ {
+ // There is no direct SSE2 support for comparing TYP_LONG vectors.
+ // These have to be implemented interms of TYP_INT vector comparison operations.
+ if (intrinsicID == SIMDIntrinsicEqual)
+ {
+ intrinsicID = impSIMDLongRelOpEqual(typeHnd, size, pOp1, pOp2);
+ }
+ else if (intrinsicID == SIMDIntrinsicGreaterThan)
+ {
+ intrinsicID = impSIMDLongRelOpGreaterThan(typeHnd, size, pOp1, pOp2);
+ }
+ else if (intrinsicID == SIMDIntrinsicGreaterThanOrEqual)
+ {
+ intrinsicID = impSIMDLongRelOpGreaterThanOrEqual(typeHnd, size, pOp1, pOp2);
+ }
+ else
+ {
+ unreached();
+ }
+ }
+ // SSE2 and AVX direct support for signed comparison of int32, int16 and int8 types
+ else if (!varTypeIsUnsigned(baseType))
+ {
+ if (intrinsicID == SIMDIntrinsicGreaterThanOrEqual)
+ {
+ intrinsicID = impSIMDIntegralRelOpGreaterThanOrEqual(typeHnd, size, baseType, pOp1, pOp2);
+ }
+ }
+ else // unsigned
+ {
+ // Vector<byte>, Vector<ushort>, Vector<uint> and Vector<ulong>:
+ // SSE2 supports > for signed comparison. Therefore, to use it for
+ // comparing unsigned numbers, we subtract a constant from both the
+ // operands such that the result fits within the corresponding signed
+ // type. The resulting signed numbers are compared using SSE2 signed
+ // comparison.
+ //
+ // Vector<byte>: constant to be subtracted is 2^7
+ // Vector<ushort> constant to be subtracted is 2^15
+ // Vector<uint> constant to be subtracted is 2^31
+ // Vector<ulong> constant to be subtracted is 2^63
+ //
+ // We need to treat op1 and op2 as signed for comparison purpose after
+ // the transformation.
+ ssize_t constVal = 0;
+ switch (baseType)
+ {
+ case TYP_UBYTE:
+ constVal = 0x80808080;
+ *inOutBaseType = TYP_BYTE;
+ break;
+ case TYP_CHAR:
+ constVal = 0x80008000;
+ *inOutBaseType = TYP_SHORT;
+ break;
+ case TYP_UINT:
+ constVal = 0x80000000;
+ *inOutBaseType = TYP_INT;
+ break;
+ case TYP_ULONG:
+ constVal = 0x8000000000000000LL;
+ *inOutBaseType = TYP_LONG;
+ break;
+ default:
+ unreached();
+ break;
+ }
+ assert(constVal != 0);
+
+ // This transformation is not required for equality.
+ if (intrinsicID != SIMDIntrinsicEqual)
+ {
+ // For constructing const vector use either long or int base type.
+ var_types tempBaseType = (baseType == TYP_ULONG) ? TYP_LONG : TYP_INT;
+ GenTree* initVal = gtNewIconNode(constVal);
+ initVal->gtType = tempBaseType;
+ GenTree* constVector = gtNewSIMDNode(simdType, initVal, nullptr, SIMDIntrinsicInit, tempBaseType, size);
+
+ // Assign constVector to a temp, since we intend to use it more than once
+ // TODO-CQ: We have quite a few such constant vectors constructed during
+ // the importation of SIMD intrinsics. Make sure that we have a single
+ // temp per distinct constant per method.
+ GenTree* tmp = fgInsertCommaFormTemp(&constVector, typeHnd);
+
+ // op1 = op1 - constVector
+ // op2 = op2 - constVector
+ *pOp1 = gtNewSIMDNode(simdType, *pOp1, constVector, SIMDIntrinsicSub, baseType, size);
+ *pOp2 = gtNewSIMDNode(simdType, *pOp2, tmp, SIMDIntrinsicSub, baseType, size);
+ }
+
+ return impSIMDRelOp(intrinsicID, typeHnd, size, inOutBaseType, pOp1, pOp2);
+ }
+ }
+
+ return intrinsicID;
+#else
+ assert(!"impSIMDRelOp() unimplemented on target arch");
+ unreached();
+#endif //_TARGET_AMD64_
+}
+
+// Creates a GT_SIMD tree for Select operation
+//
+// Argumens:
+// typeHnd - type handle of SIMD vector
+// baseType - base type of SIMD vector
+// size - SIMD vector size
+// op1 - first operand = Condition vector vc
+// op2 - second operand = va
+// op3 - third operand = vb
+//
+// Return Value:
+// Returns GT_SIMD tree that computes Select(vc, va, vb)
+//
+GenTreePtr Compiler::impSIMDSelect(
+ CORINFO_CLASS_HANDLE typeHnd, var_types baseType, unsigned size, GenTree* op1, GenTree* op2, GenTree* op3)
+{
+ assert(varTypeIsSIMD(op1));
+ var_types simdType = op1->TypeGet();
+ assert(op2->TypeGet() == simdType);
+ assert(op3->TypeGet() == simdType);
+
+ // Select(BitVector vc, va, vb) = (va & vc) | (vb & !vc)
+ // Select(op1, op2, op3) = (op2 & op1) | (op3 & !op1)
+ // = SIMDIntrinsicBitwiseOr(SIMDIntrinsicBitwiseAnd(op2, op1),
+ // SIMDIntrinsicBitwiseAndNot(op3, op1))
+ //
+ // If Op1 has side effect, create an assignment to a temp
+ GenTree* tmp = op1;
+ GenTree* asg = nullptr;
+ if ((op1->gtFlags & GTF_SIDE_EFFECT) != 0)
+ {
+ unsigned lclNum = lvaGrabTemp(true DEBUGARG("SIMD Select"));
+ lvaSetStruct(lclNum, typeHnd, false);
+ tmp = gtNewLclvNode(lclNum, op1->TypeGet());
+ asg = gtNewTempAssign(lclNum, op1);
+ }
+
+ GenTree* andExpr = gtNewSIMDNode(simdType, op2, tmp, SIMDIntrinsicBitwiseAnd, baseType, size);
+ GenTree* dupOp1 = gtCloneExpr(tmp);
+ assert(dupOp1 != nullptr);
+ GenTree* andNotExpr = gtNewSIMDNode(simdType, dupOp1, op3, SIMDIntrinsicBitwiseAndNot, baseType, size);
+ GenTree* simdTree = gtNewSIMDNode(simdType, andExpr, andNotExpr, SIMDIntrinsicBitwiseOr, baseType, size);
+
+ // If asg not null, create a GT_COMMA tree.
+ if (asg != nullptr)
+ {
+ simdTree = gtNewOperNode(GT_COMMA, simdTree->TypeGet(), asg, simdTree);
+ }
+
+ return simdTree;
+}
+
+// Creates a GT_SIMD tree for Min/Max operation
+//
+// Argumens:
+// IntrinsicId - SIMD intrinsic Id, either Min or Max
+// typeHnd - type handle of SIMD vector
+// baseType - base type of SIMD vector
+// size - SIMD vector size
+// op1 - first operand = va
+// op2 - second operand = vb
+//
+// Return Value:
+// Returns GT_SIMD tree that computes Max(va, vb)
+//
+GenTreePtr Compiler::impSIMDMinMax(SIMDIntrinsicID intrinsicId,
+ CORINFO_CLASS_HANDLE typeHnd,
+ var_types baseType,
+ unsigned size,
+ GenTree* op1,
+ GenTree* op2)
+{
+ assert(intrinsicId == SIMDIntrinsicMin || intrinsicId == SIMDIntrinsicMax);
+ assert(varTypeIsSIMD(op1));
+ var_types simdType = op1->TypeGet();
+ assert(op2->TypeGet() == simdType);
+
+#ifdef _TARGET_AMD64_
+ // SSE2 has direct support for float/double/signed word/unsigned byte.
+ // For other integer types we compute min/max as follows
+ //
+ // int32/uint32/int64/uint64:
+ // compResult = (op1 < op2) in case of Min
+ // (op1 > op2) in case of Max
+ // Min/Max(op1, op2) = Select(compResult, op1, op2)
+ //
+ // unsigned word:
+ // op1 = op1 - 2^15 ; to make it fit within a signed word
+ // op2 = op2 - 2^15 ; to make it fit within a signed word
+ // result = SSE2 signed word Min/Max(op1, op2)
+ // result = result + 2^15 ; readjust it back
+ //
+ // signed byte:
+ // op1 = op1 + 2^7 ; to make it unsigned
+ // op1 = op1 + 2^7 ; to make it unsigned
+ // result = SSE2 unsigned byte Min/Max(op1, op2)
+ // result = result - 2^15 ; readjust it back
+
+ GenTree* simdTree = nullptr;
+
+ if (varTypeIsFloating(baseType) || baseType == TYP_SHORT || baseType == TYP_UBYTE)
+ {
+ // SSE2 has direct support
+ simdTree = gtNewSIMDNode(simdType, op1, op2, intrinsicId, baseType, size);
+ }
+ else if (baseType == TYP_CHAR || baseType == TYP_BYTE)
+ {
+ int constVal;
+ SIMDIntrinsicID operIntrinsic;
+ SIMDIntrinsicID adjustIntrinsic;
+ var_types minMaxOperBaseType;
+ if (baseType == TYP_CHAR)
+ {
+ constVal = 0x80008000;
+ operIntrinsic = SIMDIntrinsicSub;
+ adjustIntrinsic = SIMDIntrinsicAdd;
+ minMaxOperBaseType = TYP_SHORT;
+ }
+ else
+ {
+ assert(baseType == TYP_BYTE);
+ constVal = 0x80808080;
+ operIntrinsic = SIMDIntrinsicAdd;
+ adjustIntrinsic = SIMDIntrinsicSub;
+ minMaxOperBaseType = TYP_UBYTE;
+ }
+
+ GenTree* initVal = gtNewIconNode(constVal);
+ GenTree* constVector = gtNewSIMDNode(simdType, initVal, nullptr, SIMDIntrinsicInit, TYP_INT, size);
+
+ // Assign constVector to a temp, since we intend to use it more than once
+ // TODO-CQ: We have quite a few such constant vectors constructed during
+ // the importation of SIMD intrinsics. Make sure that we have a single
+ // temp per distinct constant per method.
+ GenTree* tmp = fgInsertCommaFormTemp(&constVector, typeHnd);
+
+ // op1 = op1 - constVector
+ // op2 = op2 - constVector
+ op1 = gtNewSIMDNode(simdType, op1, constVector, operIntrinsic, baseType, size);
+ op2 = gtNewSIMDNode(simdType, op2, tmp, operIntrinsic, baseType, size);
+
+ // compute min/max of op1 and op2 considering them as if minMaxOperBaseType
+ simdTree = gtNewSIMDNode(simdType, op1, op2, intrinsicId, minMaxOperBaseType, size);
+
+ // re-adjust the value by adding or subtracting constVector
+ tmp = gtNewLclvNode(tmp->AsLclVarCommon()->GetLclNum(), tmp->TypeGet());
+ simdTree = gtNewSIMDNode(simdType, simdTree, tmp, adjustIntrinsic, baseType, size);
+ }
+ else
+ {
+ GenTree* dupOp1 = nullptr;
+ GenTree* dupOp2 = nullptr;
+ GenTree* op1Assign = nullptr;
+ GenTree* op2Assign = nullptr;
+ unsigned op1LclNum;
+ unsigned op2LclNum;
+
+ if ((op1->gtFlags & GTF_SIDE_EFFECT) != 0)
+ {
+ op1LclNum = lvaGrabTemp(true DEBUGARG("SIMD Min/Max"));
+ dupOp1 = gtNewLclvNode(op1LclNum, op1->TypeGet());
+ lvaSetStruct(op1LclNum, typeHnd, false);
+ op1Assign = gtNewTempAssign(op1LclNum, op1);
+ op1 = gtNewLclvNode(op1LclNum, op1->TypeGet());
+ }
+ else
+ {
+ dupOp1 = gtCloneExpr(op1);
+ }
+
+ if ((op2->gtFlags & GTF_SIDE_EFFECT) != 0)
+ {
+ op2LclNum = lvaGrabTemp(true DEBUGARG("SIMD Min/Max"));
+ dupOp2 = gtNewLclvNode(op2LclNum, op2->TypeGet());
+ lvaSetStruct(op2LclNum, typeHnd, false);
+ op2Assign = gtNewTempAssign(op2LclNum, op2);
+ op2 = gtNewLclvNode(op2LclNum, op2->TypeGet());
+ }
+ else
+ {
+ dupOp2 = gtCloneExpr(op2);
+ }
+
+ SIMDIntrinsicID relOpIntrinsic =
+ (intrinsicId == SIMDIntrinsicMin) ? SIMDIntrinsicLessThan : SIMDIntrinsicGreaterThan;
+ var_types relOpBaseType = baseType;
+
+ // compResult = op1 relOp op2
+ // simdTree = Select(compResult, op1, op2);
+ assert(dupOp1 != nullptr);
+ assert(dupOp2 != nullptr);
+ relOpIntrinsic = impSIMDRelOp(relOpIntrinsic, typeHnd, size, &relOpBaseType, &dupOp1, &dupOp2);
+ GenTree* compResult = gtNewSIMDNode(simdType, dupOp1, dupOp2, relOpIntrinsic, relOpBaseType, size);
+ unsigned compResultLclNum = lvaGrabTemp(true DEBUGARG("SIMD Min/Max"));
+ lvaSetStruct(compResultLclNum, typeHnd, false);
+ GenTree* compResultAssign = gtNewTempAssign(compResultLclNum, compResult);
+ compResult = gtNewLclvNode(compResultLclNum, compResult->TypeGet());
+ simdTree = impSIMDSelect(typeHnd, baseType, size, compResult, op1, op2);
+ simdTree = gtNewOperNode(GT_COMMA, simdTree->TypeGet(), compResultAssign, simdTree);
+
+ // Now create comma trees if we have created assignments of op1/op2 to temps
+ if (op2Assign != nullptr)
+ {
+ simdTree = gtNewOperNode(GT_COMMA, simdTree->TypeGet(), op2Assign, simdTree);
+ }
+
+ if (op1Assign != nullptr)
+ {
+ simdTree = gtNewOperNode(GT_COMMA, simdTree->TypeGet(), op1Assign, simdTree);
+ }
+ }
+
+ assert(simdTree != nullptr);
+ return simdTree;
+#else
+ assert(!"impSIMDMinMax() unimplemented on target arch");
+ unreached();
+#endif //_TARGET_AMD64_
+}
+
+//------------------------------------------------------------------------
+// getOp1ForConstructor: Get the op1 for a constructor call.
+//
+// Arguments:
+// opcode - the opcode being handled (needed to identify the CEE_NEWOBJ case)
+// newobjThis - For CEE_NEWOBJ, this is the temp grabbed for the allocated uninitalized object.
+// clsHnd - The handle of the class of the method.
+//
+// Return Value:
+// The tree node representing the object to be initialized with the constructor.
+//
+// Notes:
+// This method handles the differences between the CEE_NEWOBJ and constructor cases.
+//
+GenTreePtr Compiler::getOp1ForConstructor(OPCODE opcode, GenTreePtr newobjThis, CORINFO_CLASS_HANDLE clsHnd)
+{
+ GenTree* op1;
+ if (opcode == CEE_NEWOBJ)
+ {
+ op1 = newobjThis;
+ assert(newobjThis->gtOper == GT_ADDR && newobjThis->gtOp.gtOp1->gtOper == GT_LCL_VAR);
+
+ // push newobj result on type stack
+ unsigned tmp = op1->gtOp.gtOp1->gtLclVarCommon.gtLclNum;
+ impPushOnStack(gtNewLclvNode(tmp, lvaGetRealType(tmp)), verMakeTypeInfo(clsHnd).NormaliseForStack());
+ }
+ else
+ {
+ op1 = impSIMDPopStack(TYP_BYREF);
+ }
+ assert(op1->TypeGet() == TYP_BYREF);
+ return op1;
+}
+
+//-------------------------------------------------------------------
+// Set the flag that indicates that the lclVar referenced by this tree
+// is used in a SIMD intrinsic.
+// Arguments:
+// tree - GenTreePtr
+
+void Compiler::setLclRelatedToSIMDIntrinsic(GenTreePtr tree)
+{
+ assert(tree->OperIsLocal());
+ unsigned lclNum = tree->AsLclVarCommon()->GetLclNum();
+ LclVarDsc* lclVarDsc = &lvaTable[lclNum];
+ lclVarDsc->lvUsedInSIMDIntrinsic = true;
+}
+
+//-------------------------------------------------------------
+// Check if two field nodes reference at the same memory location.
+// Notice that this check is just based on pattern matching.
+// Arguments:
+// op1 - GenTreePtr.
+// op2 - GenTreePtr.
+// Return Value:
+// If op1's parents node and op2's parents node are at the same location, return true. Otherwise, return false
+
+bool areFieldsParentsLocatedSame(GenTreePtr op1, GenTreePtr op2)
+{
+ assert(op1->OperGet() == GT_FIELD);
+ assert(op2->OperGet() == GT_FIELD);
+
+ GenTreePtr op1ObjRef = op1->gtField.gtFldObj;
+ GenTreePtr op2ObjRef = op2->gtField.gtFldObj;
+ while (op1ObjRef != nullptr && op2ObjRef != nullptr)
+ {
+
+ if (op1ObjRef->OperGet() != op2ObjRef->OperGet())
+ {
+ break;
+ }
+ else if (op1ObjRef->OperGet() == GT_ADDR)
+ {
+ op1ObjRef = op1ObjRef->gtOp.gtOp1;
+ op2ObjRef = op2ObjRef->gtOp.gtOp1;
+ }
+
+ if (op1ObjRef->OperIsLocal() && op2ObjRef->OperIsLocal() &&
+ op1ObjRef->AsLclVarCommon()->GetLclNum() == op2ObjRef->AsLclVarCommon()->GetLclNum())
+ {
+ return true;
+ }
+ else if (op1ObjRef->OperGet() == GT_FIELD && op2ObjRef->OperGet() == GT_FIELD &&
+ op1ObjRef->gtField.gtFldHnd == op2ObjRef->gtField.gtFldHnd)
+ {
+ op1ObjRef = op1ObjRef->gtField.gtFldObj;
+ op2ObjRef = op2ObjRef->gtField.gtFldObj;
+ continue;
+ }
+ else
+ {
+ break;
+ }
+ }
+
+ return false;
+}
+
+//----------------------------------------------------------------------
+// Check whether two field are contiguous
+// Arguments:
+// first - GenTreePtr. The Type of the node should be TYP_FLOAT
+// second - GenTreePtr. The Type of the node should be TYP_FLOAT
+// Return Value:
+// if the first field is located before second field, and they are located contiguously,
+// then return true. Otherwise, return false.
+
+bool Compiler::areFieldsContiguous(GenTreePtr first, GenTreePtr second)
+{
+ assert(first->OperGet() == GT_FIELD);
+ assert(second->OperGet() == GT_FIELD);
+ assert(first->gtType == TYP_FLOAT);
+ assert(second->gtType == TYP_FLOAT);
+
+ var_types firstFieldType = first->gtType;
+ var_types secondFieldType = second->gtType;
+
+ unsigned firstFieldEndOffset = first->gtField.gtFldOffset + genTypeSize(firstFieldType);
+ unsigned secondFieldOffset = second->gtField.gtFldOffset;
+ if (firstFieldEndOffset == secondFieldOffset && firstFieldType == secondFieldType &&
+ areFieldsParentsLocatedSame(first, second))
+ {
+ return true;
+ }
+
+ return false;
+}
+
+//-------------------------------------------------------------------------------
+// Check whether two array element nodes are located contiguously or not.
+// Arguments:
+// op1 - GenTreePtr.
+// op2 - GenTreePtr.
+// Return Value:
+// if the array element op1 is located before array element op2, and they are contiguous,
+// then return true. Otherwise, return false.
+// TODO-CQ:
+// Right this can only check array element with const number as index. In future,
+// we should consider to allow this function to check the index using expression.
+
+bool Compiler::areArrayElementsContiguous(GenTreePtr op1, GenTreePtr op2)
+{
+ noway_assert(op1->gtOper == GT_INDEX);
+ noway_assert(op2->gtOper == GT_INDEX);
+ GenTreeIndex* op1Index = op1->AsIndex();
+ GenTreeIndex* op2Index = op2->AsIndex();
+
+ GenTreePtr op1ArrayRef = op1Index->Arr();
+ GenTreePtr op2ArrayRef = op2Index->Arr();
+ assert(op1ArrayRef->TypeGet() == TYP_REF);
+ assert(op2ArrayRef->TypeGet() == TYP_REF);
+
+ GenTreePtr op1IndexNode = op1Index->Index();
+ GenTreePtr op2IndexNode = op2Index->Index();
+ if ((op1IndexNode->OperGet() == GT_CNS_INT && op2IndexNode->OperGet() == GT_CNS_INT) &&
+ op1IndexNode->gtIntCon.gtIconVal + 1 == op2IndexNode->gtIntCon.gtIconVal)
+ {
+ if (op1ArrayRef->OperGet() == GT_FIELD && op2ArrayRef->OperGet() == GT_FIELD &&
+ areFieldsParentsLocatedSame(op1ArrayRef, op2ArrayRef))
+ {
+ return true;
+ }
+ else if (op1ArrayRef->OperIsLocal() && op2ArrayRef->OperIsLocal() &&
+ op1ArrayRef->AsLclVarCommon()->GetLclNum() == op2ArrayRef->AsLclVarCommon()->GetLclNum())
+ {
+ return true;
+ }
+ }
+ return false;
+}
+
+//-------------------------------------------------------------------------------
+// Check whether two argument nodes are contiguous or not.
+// Arguments:
+// op1 - GenTreePtr.
+// op2 - GenTreePtr.
+// Return Value:
+// if the argument node op1 is located before argument node op2, and they are located contiguously,
+// then return true. Otherwise, return false.
+// TODO-CQ:
+// Right now this can only check field and array. In future we should add more cases.
+//
+
+bool Compiler::areArgumentsContiguous(GenTreePtr op1, GenTreePtr op2)
+{
+ if (op1->OperGet() == GT_INDEX && op2->OperGet() == GT_INDEX)
+ {
+ return areArrayElementsContiguous(op1, op2);
+ }
+ else if (op1->OperGet() == GT_FIELD && op2->OperGet() == GT_FIELD)
+ {
+ return areFieldsContiguous(op1, op2);
+ }
+ return false;
+}
+
+//--------------------------------------------------------------------------------------------------------
+// createAddressNodeForSIMDInit: Generate the address node(GT_LEA) if we want to intialize vector2, vector3 or vector4
+// from first argument's address.
+//
+// Arguments:
+// tree - GenTreePtr. This the tree node which is used to get the address for indir.
+// simdsize - unsigned. This the simd vector size.
+// arrayElementsCount - unsigned. This is used for generating the boundary check for array.
+//
+// Return value:
+// return the address node.
+//
+// TODO-CQ:
+// 1. Currently just support for GT_FIELD and GT_INDEX, because we can only verify the GT_INDEX node or GT_Field
+// are located contiguously or not. In future we should support more cases.
+// 2. Though it happens to just work fine front-end phases are not aware of GT_LEA node. Therefore, convert these
+// to use GT_ADDR.
+GenTreePtr Compiler::createAddressNodeForSIMDInit(GenTreePtr tree, unsigned simdSize)
+{
+ assert(tree->OperGet() == GT_FIELD || tree->OperGet() == GT_INDEX);
+ GenTreePtr byrefNode = nullptr;
+ GenTreePtr startIndex = nullptr;
+ unsigned offset = 0;
+ var_types baseType = tree->gtType;
+
+ if (tree->OperGet() == GT_FIELD)
+ {
+ GenTreePtr objRef = tree->gtField.gtFldObj;
+ if (objRef != nullptr && objRef->gtOper == GT_ADDR)
+ {
+ GenTreePtr obj = objRef->gtOp.gtOp1;
+
+ // If the field is directly from a struct, then in this case,
+ // we should set this struct's lvUsedInSIMDIntrinsic as true,
+ // so that this sturct won't be promoted.
+ // e.g. s.x x is a field, and s is a struct, then we should set the s's lvUsedInSIMDIntrinsic as true.
+ // so that s won't be promoted.
+ // Notice that if we have a case like s1.s2.x. s1 s2 are struct, and x is a field, then it is possible that
+ // s1 can be promoted, so that s2 can be promoted. The reason for that is if we don't allow s1 to be
+ // promoted, then this will affect the other optimizations which are depend on s1's struct promotion.
+ // TODO-CQ:
+ // In future, we should optimize this case so that if there is a nested field like s1.s2.x and s1.s2.x's
+ // address is used for initializing the vector, then s1 can be promoted but s2 can't.
+ if (varTypeIsSIMD(obj) && obj->OperIsLocal())
+ {
+ setLclRelatedToSIMDIntrinsic(obj);
+ }
+ }
+
+ byrefNode = gtCloneExpr(tree->gtField.gtFldObj);
+ assert(byrefNode != nullptr);
+ offset = tree->gtField.gtFldOffset;
+ }
+ else if (tree->OperGet() == GT_INDEX)
+ {
+
+ GenTreePtr index = tree->AsIndex()->Index();
+ assert(index->OperGet() == GT_CNS_INT);
+
+ GenTreePtr checkIndexExpr = nullptr;
+ unsigned indexVal = (unsigned)(index->gtIntCon.gtIconVal);
+ offset = indexVal * genTypeSize(tree->TypeGet());
+ GenTreePtr arrayRef = tree->AsIndex()->Arr();
+
+ // Generate the boundary check exception.
+ // The length for boundary check should be the maximum index number which should be
+ // (first argument's index number) + (how many array arguments we have) - 1
+ // = indexVal + arrayElementsCount - 1
+ unsigned arrayElementsCount = simdSize / genTypeSize(baseType);
+ checkIndexExpr = new (this, GT_CNS_INT) GenTreeIntCon(TYP_INT, indexVal + arrayElementsCount - 1);
+ GenTreeArrLen* arrLen =
+ new (this, GT_ARR_LENGTH) GenTreeArrLen(TYP_INT, arrayRef, (int)offsetof(CORINFO_Array, length));
+ GenTreeBoundsChk* arrBndsChk = new (this, GT_ARR_BOUNDS_CHECK)
+ GenTreeBoundsChk(GT_ARR_BOUNDS_CHECK, TYP_VOID, arrLen, checkIndexExpr, SCK_RNGCHK_FAIL);
+
+ offset += offsetof(CORINFO_Array, u1Elems);
+ byrefNode = gtNewOperNode(GT_COMMA, arrayRef->TypeGet(), arrBndsChk, gtCloneExpr(arrayRef));
+ }
+ else
+ {
+ unreached();
+ }
+ GenTreePtr address =
+ new (this, GT_LEA) GenTreeAddrMode(TYP_BYREF, byrefNode, startIndex, genTypeSize(tree->TypeGet()), offset);
+ return address;
+}
+
+//-------------------------------------------------------------------------------
+// impMarkContiguousSIMDFieldAssignments: Try to identify if there are contiguous
+// assignments from SIMD field to memory. If there are, then mark the related
+// lclvar so that it won't be promoted.
+//
+// Arguments:
+// stmt - GenTreePtr. Input statement node.
+
+void Compiler::impMarkContiguousSIMDFieldAssignments(GenTreePtr stmt)
+{
+ if (!featureSIMD || opts.MinOpts())
+ {
+ return;
+ }
+ GenTreePtr expr = stmt->gtStmt.gtStmtExpr;
+ if (expr->OperGet() == GT_ASG && expr->TypeGet() == TYP_FLOAT)
+ {
+ GenTreePtr curDst = expr->gtOp.gtOp1;
+ GenTreePtr curSrc = expr->gtOp.gtOp2;
+ unsigned index = 0;
+ var_types baseType = TYP_UNKNOWN;
+ unsigned simdSize = 0;
+ GenTreePtr srcSimdStructNode = getSIMDStructFromField(curSrc, &baseType, &index, &simdSize, true);
+ if (srcSimdStructNode == nullptr || baseType != TYP_FLOAT)
+ {
+ fgPreviousCandidateSIMDFieldAsgStmt = nullptr;
+ }
+ else if (index == 0 && isSIMDTypeLocal(srcSimdStructNode))
+ {
+ fgPreviousCandidateSIMDFieldAsgStmt = stmt;
+ }
+ else if (fgPreviousCandidateSIMDFieldAsgStmt != nullptr)
+ {
+ assert(index > 0);
+ GenTreePtr prevAsgExpr = fgPreviousCandidateSIMDFieldAsgStmt->gtStmt.gtStmtExpr;
+ GenTreePtr prevDst = prevAsgExpr->gtOp.gtOp1;
+ GenTreePtr prevSrc = prevAsgExpr->gtOp.gtOp2;
+ if (!areArgumentsContiguous(prevDst, curDst) || !areArgumentsContiguous(prevSrc, curSrc))
+ {
+ fgPreviousCandidateSIMDFieldAsgStmt = nullptr;
+ }
+ else
+ {
+ if (index == (simdSize / genTypeSize(baseType) - 1))
+ {
+ // Successfully found the pattern, mark the lclvar as UsedInSIMDIntrinsic
+ if (srcSimdStructNode->OperIsLocal())
+ {
+ setLclRelatedToSIMDIntrinsic(srcSimdStructNode);
+ }
+
+ if (curDst->OperGet() == GT_FIELD)
+ {
+ GenTreePtr objRef = curDst->gtField.gtFldObj;
+ if (objRef != nullptr && objRef->gtOper == GT_ADDR)
+ {
+ GenTreePtr obj = objRef->gtOp.gtOp1;
+ if (varTypeIsStruct(obj) && obj->OperIsLocal())
+ {
+ setLclRelatedToSIMDIntrinsic(obj);
+ }
+ }
+ }
+ }
+ else
+ {
+ fgPreviousCandidateSIMDFieldAsgStmt = stmt;
+ }
+ }
+ }
+ }
+ else
+ {
+ fgPreviousCandidateSIMDFieldAsgStmt = nullptr;
+ }
+}
+
+//------------------------------------------------------------------------
+// impSIMDIntrinsic: Check method to see if it is a SIMD method
+//
+// Arguments:
+// opcode - the opcode being handled (needed to identify the CEE_NEWOBJ case)
+// newobjThis - For CEE_NEWOBJ, this is the temp grabbed for the allocated uninitalized object.
+// clsHnd - The handle of the class of the method.
+// method - The handle of the method.
+// sig - The call signature for the method.
+// memberRef - The memberRef token for the method reference.
+//
+// Return Value:
+// If clsHnd is a known SIMD type, and 'method' is one of the methods that are
+// implemented as an intrinsic in the JIT, then return the tree that implements
+// it.
+//
+GenTreePtr Compiler::impSIMDIntrinsic(OPCODE opcode,
+ GenTreePtr newobjThis,
+ CORINFO_CLASS_HANDLE clsHnd,
+ CORINFO_METHOD_HANDLE methodHnd,
+ CORINFO_SIG_INFO* sig,
+ int memberRef)
+{
+ assert(featureSIMD);
+
+ if (!isSIMDClass(clsHnd))
+ {
+ return nullptr;
+ }
+
+ // Get base type and intrinsic Id
+ var_types baseType = TYP_UNKNOWN;
+ unsigned size = 0;
+ unsigned argCount = 0;
+ const SIMDIntrinsicInfo* intrinsicInfo =
+ getSIMDIntrinsicInfo(&clsHnd, methodHnd, sig, (opcode == CEE_NEWOBJ), &argCount, &baseType, &size);
+ if (intrinsicInfo == nullptr || intrinsicInfo->id == SIMDIntrinsicInvalid)
+ {
+ return nullptr;
+ }
+
+ SIMDIntrinsicID simdIntrinsicID = intrinsicInfo->id;
+ var_types simdType;
+ if (baseType != TYP_UNKNOWN)
+ {
+ simdType = getSIMDTypeForSize(size);
+ }
+ else
+ {
+ assert(simdIntrinsicID == SIMDIntrinsicHWAccel);
+ simdType = TYP_UNKNOWN;
+ }
+ bool instMethod = intrinsicInfo->isInstMethod;
+ var_types callType = JITtype2varType(sig->retType);
+ if (callType == TYP_STRUCT)
+ {
+ // Note that here we are assuming that, if the call returns a struct, that it is the same size as the
+ // struct on which the method is declared. This is currently true for all methods on Vector types,
+ // but if this ever changes, we will need to determine the callType from the signature.
+ assert(info.compCompHnd->getClassSize(sig->retTypeClass) == genTypeSize(simdType));
+ callType = simdType;
+ }
+
+ GenTree* simdTree = nullptr;
+ GenTree* op1 = nullptr;
+ GenTree* op2 = nullptr;
+ GenTree* op3 = nullptr;
+ GenTree* retVal = nullptr;
+ GenTree* copyBlkDst = nullptr;
+ bool doCopyBlk = false;
+
+ switch (simdIntrinsicID)
+ {
+ case SIMDIntrinsicGetCount:
+ {
+ int length = getSIMDVectorLength(clsHnd);
+ GenTreeIntCon* intConstTree = new (this, GT_CNS_INT) GenTreeIntCon(TYP_INT, length);
+ retVal = intConstTree;
+ }
+ break;
+
+ case SIMDIntrinsicGetZero:
+ {
+ baseType = genActualType(baseType);
+ GenTree* initVal = gtNewZeroConNode(baseType);
+ initVal->gtType = baseType;
+ simdTree = gtNewSIMDNode(simdType, initVal, nullptr, SIMDIntrinsicInit, baseType, size);
+ retVal = simdTree;
+ }
+ break;
+
+ case SIMDIntrinsicGetOne:
+ {
+ GenTree* initVal;
+ if (varTypeIsSmallInt(baseType))
+ {
+ unsigned baseSize = genTypeSize(baseType);
+ int val;
+ if (baseSize == 1)
+ {
+ val = 0x01010101;
+ }
+ else
+ {
+ val = 0x00010001;
+ }
+ initVal = gtNewIconNode(val);
+ }
+ else
+ {
+ initVal = gtNewOneConNode(baseType);
+ }
+
+ baseType = genActualType(baseType);
+ initVal->gtType = baseType;
+ simdTree = gtNewSIMDNode(simdType, initVal, nullptr, SIMDIntrinsicInit, baseType, size);
+ retVal = simdTree;
+ }
+ break;
+
+ case SIMDIntrinsicGetAllOnes:
+ {
+ // Equivalent to (Vector<T>) new Vector<int>(0xffffffff);
+ GenTree* initVal = gtNewIconNode(0xffffffff, TYP_INT);
+ simdTree = gtNewSIMDNode(simdType, initVal, nullptr, SIMDIntrinsicInit, TYP_INT, size);
+ if (baseType != TYP_INT)
+ {
+ // cast it to required baseType if different from TYP_INT
+ simdTree = gtNewSIMDNode(simdType, simdTree, nullptr, SIMDIntrinsicCast, baseType, size);
+ }
+ retVal = simdTree;
+ }
+ break;
+
+ case SIMDIntrinsicInit:
+ case SIMDIntrinsicInitN:
+ {
+ // SIMDIntrinsicInit:
+ // op2 - the initializer value
+ // op1 - byref of vector
+ //
+ // SIMDIntrinsicInitN
+ // op2 - list of initializer values stitched into a list
+ // op1 - byref of vector
+ bool initFromFirstArgIndir = false;
+ if (simdIntrinsicID == SIMDIntrinsicInit)
+ {
+ op2 = impSIMDPopStack(baseType);
+ }
+ else
+ {
+ assert(simdIntrinsicID == SIMDIntrinsicInitN);
+ assert(baseType == TYP_FLOAT);
+
+ unsigned initCount = argCount - 1;
+ unsigned elementCount = getSIMDVectorLength(size, baseType);
+ noway_assert(initCount == elementCount);
+ GenTree* nextArg = op2;
+
+ // Build a GT_LIST with the N values.
+ // We must maintain left-to-right order of the args, but we will pop
+ // them off in reverse order (the Nth arg was pushed onto the stack last).
+
+ GenTree* list = nullptr;
+ GenTreePtr firstArg = nullptr;
+ GenTreePtr prevArg = nullptr;
+ int offset = 0;
+ bool areArgsContiguous = true;
+ for (unsigned i = 0; i < initCount; i++)
+ {
+ GenTree* nextArg = impSIMDPopStack(baseType);
+ if (areArgsContiguous)
+ {
+ GenTreePtr curArg = nextArg;
+ firstArg = curArg;
+
+ if (prevArg != nullptr)
+ {
+ // Recall that we are popping the args off the stack in reverse order.
+ areArgsContiguous = areArgumentsContiguous(curArg, prevArg);
+ }
+ prevArg = curArg;
+ }
+
+ list = new (this, GT_LIST) GenTreeOp(GT_LIST, baseType, nextArg, list);
+ }
+
+ if (areArgsContiguous && baseType == TYP_FLOAT)
+ {
+ // Since Vector2, Vector3 and Vector4's arguments type are only float,
+ // we intialize the vector from first argument address, only when
+ // the baseType is TYP_FLOAT and the arguments are located contiguously in memory
+ initFromFirstArgIndir = true;
+ GenTreePtr op2Address = createAddressNodeForSIMDInit(firstArg, size);
+ var_types simdType = getSIMDTypeForSize(size);
+ op2 = gtNewOperNode(GT_IND, simdType, op2Address);
+ }
+ else
+ {
+ op2 = list;
+ }
+ }
+
+ op1 = getOp1ForConstructor(opcode, newobjThis, clsHnd);
+
+ assert(op1->TypeGet() == TYP_BYREF);
+ assert(genActualType(op2->TypeGet()) == genActualType(baseType) || initFromFirstArgIndir);
+
+#if AVX_WITHOUT_AVX2
+ // NOTE: This #define, AVX_WITHOUT_AVX2, is never defined. This code is kept here
+ // in case we decide to implement AVX support (32 byte vectors) with AVX only.
+ // On AVX (as opposed to AVX2), broadcast is supported only for float and double,
+ // and requires taking a mem address of the value.
+ // If not a constant, take the addr of op2.
+ if (simdIntrinsicID == SIMDIntrinsicInit && canUseAVX())
+ {
+ if (!op2->OperIsConst())
+ {
+ // It is better to assign op2 to a temp and take the addr of temp
+ // rather than taking address of op2 since the latter would make op2
+ // address-taken and ineligible for register allocation.
+ //
+ // op2 = GT_COMMA(tmp=op2, GT_ADDR(tmp))
+ unsigned tmpNum = lvaGrabTemp(true DEBUGARG("Val addr for vector Init"));
+ GenTreePtr asg = gtNewTempAssign(tmpNum, op2);
+ GenTreePtr tmp = gtNewLclvNode(tmpNum, op2->TypeGet());
+ tmp = gtNewOperNode(GT_ADDR, TYP_BYREF, tmp);
+ op2 = gtNewOperNode(GT_COMMA, TYP_BYREF, asg, tmp);
+ }
+ }
+#endif
+ // For integral base types of size less than TYP_INT, expand the initializer
+ // to fill size of TYP_INT bytes.
+ if (varTypeIsSmallInt(baseType))
+ {
+ // This case should occur only for Init intrinsic.
+ assert(simdIntrinsicID == SIMDIntrinsicInit);
+
+ unsigned baseSize = genTypeSize(baseType);
+ int multiplier;
+ if (baseSize == 1)
+ {
+ multiplier = 0x01010101;
+ }
+ else
+ {
+ assert(baseSize == 2);
+ multiplier = 0x00010001;
+ }
+
+ GenTree* t1 = nullptr;
+ if (baseType == TYP_BYTE)
+ {
+ // What we have is a signed byte initializer,
+ // which when loaded to a reg will get sign extended to TYP_INT.
+ // But what we need is the initializer without sign extended or
+ // rather zero extended to 32-bits.
+ t1 = gtNewOperNode(GT_AND, TYP_INT, op2, gtNewIconNode(0xff, TYP_INT));
+ }
+ else if (baseType == TYP_SHORT)
+ {
+ // What we have is a signed short initializer,
+ // which when loaded to a reg will get sign extended to TYP_INT.
+ // But what we need is the initializer without sign extended or
+ // rather zero extended to 32-bits.
+ t1 = gtNewOperNode(GT_AND, TYP_INT, op2, gtNewIconNode(0xffff, TYP_INT));
+ }
+ else
+ {
+ assert(baseType == TYP_UBYTE || baseType == TYP_CHAR);
+ t1 = gtNewCastNode(TYP_INT, op2, TYP_INT);
+ }
+
+ assert(t1 != nullptr);
+ GenTree* t2 = gtNewIconNode(multiplier, TYP_INT);
+ op2 = gtNewOperNode(GT_MUL, TYP_INT, t1, t2);
+
+ // Construct a vector of TYP_INT with the new initializer and cast it back to vector of baseType
+ simdTree = gtNewSIMDNode(simdType, op2, nullptr, simdIntrinsicID, TYP_INT, size);
+ simdTree = gtNewSIMDNode(simdType, simdTree, nullptr, SIMDIntrinsicCast, baseType, size);
+ }
+ else
+ {
+
+ if (initFromFirstArgIndir)
+ {
+ simdTree = op2;
+ if (op1->gtOp.gtOp1->OperIsLocal())
+ {
+ // label the dst struct's lclvar is used for SIMD intrinsic,
+ // so that this dst struct won't be promoted.
+ setLclRelatedToSIMDIntrinsic(op1->gtOp.gtOp1);
+ }
+ }
+ else
+ {
+ simdTree = gtNewSIMDNode(simdType, op2, nullptr, simdIntrinsicID, baseType, size);
+ }
+ }
+
+ copyBlkDst = op1;
+ doCopyBlk = true;
+ }
+ break;
+
+ case SIMDIntrinsicInitArray:
+ case SIMDIntrinsicInitArrayX:
+ case SIMDIntrinsicCopyToArray:
+ case SIMDIntrinsicCopyToArrayX:
+ {
+ // op3 - index into array in case of SIMDIntrinsicCopyToArrayX and SIMDIntrinsicInitArrayX
+ // op2 - array itself
+ // op1 - byref to vector struct
+
+ unsigned int vectorLength = getSIMDVectorLength(size, baseType);
+ // (This constructor takes only the zero-based arrays.)
+ // We will add one or two bounds checks:
+ // 1. If we have an index, we must do a check on that first.
+ // We can't combine it with the index + vectorLength check because
+ // a. It might be negative, and b. It may need to raise a different exception
+ // (captured as SCK_ARG_RNG_EXCPN for CopyTo and SCK_RNGCHK_FAIL for Init).
+ // 2. We need to generate a check (SCK_ARG_EXCPN for CopyTo and SCK_RNGCHK_FAIL for Init)
+ // for the last array element we will access.
+ // We'll either check against (vectorLength - 1) or (index + vectorLength - 1).
+
+ GenTree* checkIndexExpr = new (this, GT_CNS_INT) GenTreeIntCon(TYP_INT, vectorLength - 1);
+
+ // Get the index into the array. If it has been provided, it will be on the
+ // top of the stack. Otherwise, it is null.
+ if (argCount == 3)
+ {
+ op3 = impSIMDPopStack(TYP_INT);
+ if (op3->IsIntegralConst(0))
+ {
+ op3 = nullptr;
+ }
+ }
+ else
+ {
+ // TODO-CQ: Here, or elsewhere, check for the pattern where op2 is a newly constructed array, and
+ // change this to the InitN form.
+ // op3 = new (this, GT_CNS_INT) GenTreeIntCon(TYP_INT, 0);
+ op3 = nullptr;
+ }
+
+ // Clone the array for use in the bounds check.
+ op2 = impSIMDPopStack(TYP_REF);
+ assert(op2->TypeGet() == TYP_REF);
+ GenTree* arrayRefForArgChk = op2;
+ GenTree* argRngChk = nullptr;
+ GenTree* asg = nullptr;
+ if ((arrayRefForArgChk->gtFlags & GTF_SIDE_EFFECT) != 0)
+ {
+ op2 = fgInsertCommaFormTemp(&arrayRefForArgChk);
+ }
+ else
+ {
+ op2 = gtCloneExpr(arrayRefForArgChk);
+ }
+ assert(op2 != nullptr);
+
+ if (op3 != nullptr)
+ {
+ SpecialCodeKind op3CheckKind;
+ if (simdIntrinsicID == SIMDIntrinsicInitArrayX)
+ {
+ op3CheckKind = SCK_RNGCHK_FAIL;
+ }
+ else
+ {
+ assert(simdIntrinsicID == SIMDIntrinsicCopyToArrayX);
+ op3CheckKind = SCK_ARG_RNG_EXCPN;
+ }
+ // We need to use the original expression on this, which is the first check.
+ GenTree* arrayRefForArgRngChk = arrayRefForArgChk;
+ // Then we clone the clone we just made for the next check.
+ arrayRefForArgChk = gtCloneExpr(op2);
+ // We know we MUST have had a cloneable expression.
+ assert(arrayRefForArgChk != nullptr);
+ GenTree* index = op3;
+ if ((index->gtFlags & GTF_SIDE_EFFECT) != 0)
+ {
+ op3 = fgInsertCommaFormTemp(&index);
+ }
+ else
+ {
+ op3 = gtCloneExpr(index);
+ }
+
+ GenTreeArrLen* arrLen = new (this, GT_ARR_LENGTH)
+ GenTreeArrLen(TYP_INT, arrayRefForArgRngChk, (int)offsetof(CORINFO_Array, length));
+ argRngChk = new (this, GT_ARR_BOUNDS_CHECK)
+ GenTreeBoundsChk(GT_ARR_BOUNDS_CHECK, TYP_VOID, arrLen, index, op3CheckKind);
+ // Now, clone op3 to create another node for the argChk
+ GenTree* index2 = gtCloneExpr(op3);
+ assert(index != nullptr);
+ checkIndexExpr = gtNewOperNode(GT_ADD, TYP_INT, index2, checkIndexExpr);
+ }
+
+ // Insert a bounds check for index + offset - 1.
+ // This must be a "normal" array.
+ SpecialCodeKind op2CheckKind;
+ if (simdIntrinsicID == SIMDIntrinsicInitArray || simdIntrinsicID == SIMDIntrinsicInitArrayX)
+ {
+ op2CheckKind = SCK_RNGCHK_FAIL;
+ }
+ else
+ {
+ op2CheckKind = SCK_ARG_EXCPN;
+ }
+ GenTreeArrLen* arrLen = new (this, GT_ARR_LENGTH)
+ GenTreeArrLen(TYP_INT, arrayRefForArgChk, (int)offsetof(CORINFO_Array, length));
+ GenTreeBoundsChk* argChk = new (this, GT_ARR_BOUNDS_CHECK)
+ GenTreeBoundsChk(GT_ARR_BOUNDS_CHECK, TYP_VOID, arrLen, checkIndexExpr, op2CheckKind);
+
+ // Create a GT_COMMA tree for the bounds check(s).
+ op2 = gtNewOperNode(GT_COMMA, op2->TypeGet(), argChk, op2);
+ if (argRngChk != nullptr)
+ {
+ op2 = gtNewOperNode(GT_COMMA, op2->TypeGet(), argRngChk, op2);
+ }
+
+ if (simdIntrinsicID == SIMDIntrinsicInitArray || simdIntrinsicID == SIMDIntrinsicInitArrayX)
+ {
+ op1 = getOp1ForConstructor(opcode, newobjThis, clsHnd);
+ simdTree = gtNewSIMDNode(simdType, op2, op3, SIMDIntrinsicInitArray, baseType, size);
+ copyBlkDst = op1;
+ doCopyBlk = true;
+ }
+ else
+ {
+ assert(simdIntrinsicID == SIMDIntrinsicCopyToArray || simdIntrinsicID == SIMDIntrinsicCopyToArrayX);
+ op1 = impSIMDPopStack(simdType, instMethod);
+ assert(op1->TypeGet() == simdType);
+
+ // copy vector (op1) to array (op2) starting at index (op3)
+ simdTree = op1;
+
+ // TODO-Cleanup: Though it happens to just work fine front-end phases are not aware of GT_LEA node.
+ // Therefore, convert these to use GT_ADDR .
+ copyBlkDst = new (this, GT_LEA)
+ GenTreeAddrMode(TYP_BYREF, op2, op3, genTypeSize(baseType), offsetof(CORINFO_Array, u1Elems));
+ doCopyBlk = true;
+ }
+ }
+ break;
+
+ case SIMDIntrinsicInitFixed:
+ {
+ // We are initializing a fixed-length vector VLarge with a smaller fixed-length vector VSmall, plus 1 or 2
+ // additional floats.
+ // op4 (optional) - float value for VLarge.W, if VLarge is Vector4, and VSmall is Vector2
+ // op3 - float value for VLarge.Z or VLarge.W
+ // op2 - VSmall
+ // op1 - byref of VLarge
+ assert(baseType == TYP_FLOAT);
+ unsigned elementByteCount = 4;
+
+ GenTree* op4 = nullptr;
+ if (argCount == 4)
+ {
+ op4 = impSIMDPopStack(TYP_FLOAT);
+ assert(op4->TypeGet() == TYP_FLOAT);
+ }
+ op3 = impSIMDPopStack(TYP_FLOAT);
+ assert(op3->TypeGet() == TYP_FLOAT);
+ // The input vector will either be TYP_SIMD8 or TYP_SIMD12.
+ var_types smallSIMDType = TYP_SIMD8;
+ if ((op4 == nullptr) && (simdType == TYP_SIMD16))
+ {
+ smallSIMDType = TYP_SIMD12;
+ }
+ op2 = impSIMDPopStack(smallSIMDType);
+ op1 = getOp1ForConstructor(opcode, newobjThis, clsHnd);
+
+ // We are going to redefine the operands so that:
+ // - op3 is the value that's going into the Z position, or null if it's a Vector4 constructor with a single
+ // operand, and
+ // - op4 is the W position value, or null if this is a Vector3 constructor.
+ if (size == 16 && argCount == 3)
+ {
+ op4 = op3;
+ op3 = nullptr;
+ }
+
+ simdTree = op2;
+ if (op3 != nullptr)
+ {
+ simdTree = gtNewSIMDNode(simdType, simdTree, op3, SIMDIntrinsicSetZ, baseType, size);
+ }
+ if (op4 != nullptr)
+ {
+ simdTree = gtNewSIMDNode(simdType, simdTree, op4, SIMDIntrinsicSetW, baseType, size);
+ }
+
+ copyBlkDst = op1;
+ doCopyBlk = true;
+ }
+ break;
+
+ case SIMDIntrinsicOpEquality:
+ case SIMDIntrinsicInstEquals:
+ {
+ op2 = impSIMDPopStack(simdType);
+ op1 = impSIMDPopStack(simdType, instMethod);
+
+ assert(op1->TypeGet() == simdType);
+ assert(op2->TypeGet() == simdType);
+
+ simdTree = gtNewSIMDNode(genActualType(callType), op1, op2, SIMDIntrinsicOpEquality, baseType, size);
+ retVal = simdTree;
+ }
+ break;
+
+ case SIMDIntrinsicOpInEquality:
+ {
+ // op1 is the first operand
+ // op2 is the second operand
+ op2 = impSIMDPopStack(simdType);
+ op1 = impSIMDPopStack(simdType, instMethod);
+ simdTree = gtNewSIMDNode(genActualType(callType), op1, op2, SIMDIntrinsicOpInEquality, baseType, size);
+ retVal = simdTree;
+ }
+ break;
+
+ case SIMDIntrinsicEqual:
+ case SIMDIntrinsicLessThan:
+ case SIMDIntrinsicLessThanOrEqual:
+ case SIMDIntrinsicGreaterThan:
+ case SIMDIntrinsicGreaterThanOrEqual:
+ {
+ op2 = impSIMDPopStack(simdType);
+ op1 = impSIMDPopStack(simdType, instMethod);
+
+ SIMDIntrinsicID intrinsicID = impSIMDRelOp(simdIntrinsicID, clsHnd, size, &baseType, &op1, &op2);
+ simdTree = gtNewSIMDNode(genActualType(callType), op1, op2, intrinsicID, baseType, size);
+ retVal = simdTree;
+ }
+ break;
+
+ case SIMDIntrinsicAdd:
+ case SIMDIntrinsicSub:
+ case SIMDIntrinsicMul:
+ case SIMDIntrinsicDiv:
+ case SIMDIntrinsicBitwiseAnd:
+ case SIMDIntrinsicBitwiseAndNot:
+ case SIMDIntrinsicBitwiseOr:
+ case SIMDIntrinsicBitwiseXor:
+ {
+#if defined(_TARGET_AMD64_) && defined(DEBUG)
+ // check for the cases where we don't support intrinsics.
+ // This check should be done before we make modifications to type stack.
+ // Note that this is more of a double safety check for robustness since
+ // we expect getSIMDIntrinsicInfo() to have filtered out intrinsics on
+ // unsupported base types. If getSIMdIntrinsicInfo() doesn't filter due
+ // to some bug, assert in chk/dbg will fire.
+ if (!varTypeIsFloating(baseType))
+ {
+ if (simdIntrinsicID == SIMDIntrinsicMul)
+ {
+ if ((baseType != TYP_INT) && (baseType != TYP_SHORT))
+ {
+ // TODO-CQ: implement mul on these integer vectors.
+ // Note that SSE2 has no direct support for these vectors.
+ assert(!"Mul not supported on long/ulong/uint/small int vectors\n");
+ return nullptr;
+ }
+ }
+
+ // common to all integer type vectors
+ if (simdIntrinsicID == SIMDIntrinsicDiv)
+ {
+ // SSE2 doesn't support div on non-floating point vectors.
+ assert(!"Div not supported on integer type vectors\n");
+ return nullptr;
+ }
+ }
+#endif //_TARGET_AMD64_ && DEBUG
+
+ // op1 is the first operand; if instance method, op1 is "this" arg
+ // op2 is the second operand
+ op2 = impSIMDPopStack(simdType);
+ op1 = impSIMDPopStack(simdType, instMethod);
+
+ simdTree = gtNewSIMDNode(simdType, op1, op2, simdIntrinsicID, baseType, size);
+ retVal = simdTree;
+ }
+ break;
+
+ case SIMDIntrinsicSelect:
+ {
+ // op3 is a SIMD variable that is the second source
+ // op2 is a SIMD variable that is the first source
+ // op1 is a SIMD variable which is the bit mask.
+ op3 = impSIMDPopStack(simdType);
+ op2 = impSIMDPopStack(simdType);
+ op1 = impSIMDPopStack(simdType);
+
+ retVal = impSIMDSelect(clsHnd, baseType, size, op1, op2, op3);
+ }
+ break;
+
+ case SIMDIntrinsicMin:
+ case SIMDIntrinsicMax:
+ {
+ // op1 is the first operand; if instance method, op1 is "this" arg
+ // op2 is the second operand
+ op2 = impSIMDPopStack(simdType);
+ op1 = impSIMDPopStack(simdType, instMethod);
+
+ retVal = impSIMDMinMax(simdIntrinsicID, clsHnd, baseType, size, op1, op2);
+ }
+ break;
+
+ case SIMDIntrinsicGetItem:
+ {
+ // op1 is a SIMD variable that is "this" arg
+ // op2 is an index of TYP_INT
+ op2 = impSIMDPopStack(TYP_INT);
+ op1 = impSIMDPopStack(simdType, instMethod);
+ unsigned int vectorLength = getSIMDVectorLength(size, baseType);
+ if (!op2->IsCnsIntOrI() || op2->AsIntCon()->gtIconVal >= vectorLength)
+ {
+ // We need to bounds-check the length of the vector.
+ // For that purpose, we need to clone the index expression.
+ GenTree* index = op2;
+ if ((index->gtFlags & GTF_SIDE_EFFECT) != 0)
+ {
+ op2 = fgInsertCommaFormTemp(&index);
+ }
+ else
+ {
+ op2 = gtCloneExpr(index);
+ }
+
+ GenTree* lengthNode = new (this, GT_CNS_INT) GenTreeIntCon(TYP_INT, vectorLength);
+ GenTreeBoundsChk* simdChk =
+ new (this, GT_SIMD_CHK) GenTreeBoundsChk(GT_SIMD_CHK, TYP_VOID, lengthNode, index, SCK_RNGCHK_FAIL);
+
+ // Create a GT_COMMA tree for the bounds check.
+ op2 = gtNewOperNode(GT_COMMA, op2->TypeGet(), simdChk, op2);
+ }
+
+ assert(op1->TypeGet() == simdType);
+ assert(op2->TypeGet() == TYP_INT);
+
+ simdTree = gtNewSIMDNode(genActualType(callType), op1, op2, simdIntrinsicID, baseType, size);
+ retVal = simdTree;
+ }
+ break;
+
+ case SIMDIntrinsicDotProduct:
+ {
+#if defined(_TARGET_AMD64_) && defined(DEBUG)
+ // Right now dot product is supported only for float vectors.
+ // See SIMDIntrinsicList.h for supported base types for this intrinsic.
+ if (!varTypeIsFloating(baseType))
+ {
+ assert(!"Dot product on integer type vectors not supported");
+ return nullptr;
+ }
+#endif //_TARGET_AMD64_ && DEBUG
+
+ // op1 is a SIMD variable that is the first source and also "this" arg.
+ // op2 is a SIMD variable which is the second source.
+ op2 = impSIMDPopStack(simdType);
+ op1 = impSIMDPopStack(simdType, instMethod);
+
+ simdTree = gtNewSIMDNode(baseType, op1, op2, simdIntrinsicID, baseType, size);
+ retVal = simdTree;
+ }
+ break;
+
+ case SIMDIntrinsicSqrt:
+ {
+#if defined(_TARGET_AMD64_) && defined(DEBUG)
+ // SSE/AVX doesn't support sqrt on integer type vectors and hence
+ // should never be seen as an intrinsic here. See SIMDIntrinsicList.h
+ // for supported base types for this intrinsic.
+ if (!varTypeIsFloating(baseType))
+ {
+ assert(!"Sqrt not supported on integer vectors\n");
+ return nullptr;
+ }
+#endif // _TARGET_AMD64_ && DEBUG
+
+ op1 = impSIMDPopStack(simdType);
+
+ retVal = gtNewSIMDNode(genActualType(callType), op1, nullptr, simdIntrinsicID, baseType, size);
+ }
+ break;
+
+ case SIMDIntrinsicAbs:
+ {
+ op1 = impSIMDPopStack(simdType);
+
+#ifdef _TARGET_AMD64_
+ if (varTypeIsFloating(baseType))
+ {
+ // Abs(vf) = vf & new SIMDVector<float>(0x7fffffff);
+ // Abs(vd) = vf & new SIMDVector<double>(0x7fffffffffffffff);
+ GenTree* bitMask = nullptr;
+ if (baseType == TYP_FLOAT)
+ {
+ float f;
+ static_assert_no_msg(sizeof(float) == sizeof(int));
+ *((int*)&f) = 0x7fffffff;
+ bitMask = gtNewDconNode(f);
+ }
+ else if (baseType == TYP_DOUBLE)
+ {
+ double d;
+ static_assert_no_msg(sizeof(double) == sizeof(__int64));
+ *((__int64*)&d) = 0x7fffffffffffffffLL;
+ bitMask = gtNewDconNode(d);
+ }
+
+ assert(bitMask != nullptr);
+ bitMask->gtType = baseType;
+ GenTree* bitMaskVector = gtNewSIMDNode(simdType, bitMask, SIMDIntrinsicInit, baseType, size);
+ retVal = gtNewSIMDNode(simdType, op1, bitMaskVector, SIMDIntrinsicBitwiseAnd, baseType, size);
+ }
+ else if (baseType == TYP_CHAR || baseType == TYP_UBYTE || baseType == TYP_UINT || baseType == TYP_ULONG)
+ {
+ // Abs is a no-op on unsigned integer type vectors
+ retVal = op1;
+ }
+ else
+ {
+ // SSE/AVX doesn't support abs on signed integer vectors and hence
+ // should never be seen as an intrinsic here. See SIMDIntrinsicList.h
+ // for supported base types for this intrinsic.
+ unreached();
+ }
+
+#else //!_TARGET_AMD64_
+ assert(!"Abs intrinsic on non-Amd64 target not implemented");
+ unreached();
+#endif //!_TARGET_AMD64_
+ }
+ break;
+
+ case SIMDIntrinsicGetW:
+ retVal = impSIMDGetFixed(simdType, baseType, size, 3);
+ break;
+
+ case SIMDIntrinsicGetZ:
+ retVal = impSIMDGetFixed(simdType, baseType, size, 2);
+ break;
+
+ case SIMDIntrinsicGetY:
+ retVal = impSIMDGetFixed(simdType, baseType, size, 1);
+ break;
+
+ case SIMDIntrinsicGetX:
+ retVal = impSIMDGetFixed(simdType, baseType, size, 0);
+ break;
+
+ case SIMDIntrinsicSetW:
+ case SIMDIntrinsicSetZ:
+ case SIMDIntrinsicSetY:
+ case SIMDIntrinsicSetX:
+ {
+ // op2 is the value to be set at indexTemp position
+ // op1 is SIMD vector that is going to be modified, which is a byref
+
+ // If op1 has a side-effect, then don't make it an intrinsic.
+ // It would be in-efficient to read the entire vector into xmm reg,
+ // modify it and write back entire xmm reg.
+ //
+ // TODO-CQ: revisit this later.
+ op1 = impStackTop(1).val;
+ if ((op1->gtFlags & GTF_SIDE_EFFECT) != 0)
+ {
+ return nullptr;
+ }
+
+ op2 = impSIMDPopStack(baseType);
+ op1 = impSIMDPopStack(simdType, instMethod);
+
+ GenTree* src = gtCloneExpr(op1);
+ assert(src != nullptr);
+ simdTree = gtNewSIMDNode(simdType, src, op2, simdIntrinsicID, baseType, size);
+
+ copyBlkDst = gtNewOperNode(GT_ADDR, TYP_BYREF, op1);
+ doCopyBlk = true;
+ }
+ break;
+
+ // Unary operators that take and return a Vector.
+ case SIMDIntrinsicCast:
+ {
+ op1 = impSIMDPopStack(simdType, instMethod);
+
+ simdTree = gtNewSIMDNode(simdType, op1, nullptr, simdIntrinsicID, baseType, size);
+ retVal = simdTree;
+ }
+ break;
+
+ case SIMDIntrinsicHWAccel:
+ {
+ GenTreeIntCon* intConstTree = new (this, GT_CNS_INT) GenTreeIntCon(TYP_INT, 1);
+ retVal = intConstTree;
+ }
+ break;
+
+ default:
+ assert(!"Unimplemented SIMD Intrinsic");
+ return nullptr;
+ }
+
+#ifdef _TARGET_AMD64_
+ // Amd64: also indicate that we use floating point registers.
+ // The need for setting this here is that a method may not have SIMD
+ // type lclvars, but might be exercising SIMD intrinsics on fields of
+ // SIMD type.
+ //
+ // e.g. public Vector<float> ComplexVecFloat::sqabs() { return this.r * this.r + this.i * this.i; }
+ compFloatingPointUsed = true;
+#endif
+
+ // At this point, we have a tree that we are going to store into a destination.
+ // TODO-1stClassStructs: This should be a simple store or assignment, and should not require
+ // GTF_ALL_EFFECT for the dest. This is currently emulating the previous behavior of
+ // block ops.
+ if (doCopyBlk)
+ {
+ GenTree* dest = new (this, GT_BLK) GenTreeBlk(GT_BLK, simdType, copyBlkDst, getSIMDTypeSizeInBytes(clsHnd));
+ dest->gtFlags |= GTF_GLOB_REF;
+ retVal = gtNewBlkOpNode(dest, simdTree, getSIMDTypeSizeInBytes(clsHnd),
+ false, // not volatile
+ true); // copyBlock
+ retVal->gtFlags |= ((simdTree->gtFlags | copyBlkDst->gtFlags) & GTF_ALL_EFFECT);
+ }
+
+ return retVal;
+}
+
+#endif // FEATURE_SIMD
+
+#endif // !LEGACY_BACKEND
diff --git a/src/jit/simd.h b/src/jit/simd.h
new file mode 100644
index 0000000000..c68899e412
--- /dev/null
+++ b/src/jit/simd.h
@@ -0,0 +1,43 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+#ifndef _SIMD_H_
+#define _SIMD_H_
+
+#ifdef FEATURE_SIMD
+
+#ifdef DEBUG
+extern const char* const simdIntrinsicNames[];
+#endif
+
+enum SIMDIntrinsicID
+{
+#define SIMD_INTRINSIC(m, i, id, n, r, ac, arg1, arg2, arg3, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10) SIMDIntrinsic##id,
+#include "simdintrinsiclist.h"
+};
+
+// Static info about a SIMD intrinsic
+struct SIMDIntrinsicInfo
+{
+ SIMDIntrinsicID id;
+ const char* methodName;
+ bool isInstMethod;
+ var_types retType;
+ unsigned char argCount;
+ var_types argType[SIMD_INTRINSIC_MAX_MODELED_PARAM_COUNT];
+ var_types supportedBaseTypes[SIMD_INTRINSIC_MAX_BASETYPE_COUNT];
+};
+
+#ifdef _TARGET_AMD64_
+// SSE2 Shuffle control byte to shuffle vector <W, Z, Y, X>
+// These correspond to shuffle immediate byte in shufps SSE2 instruction.
+#define SHUFFLE_XXXX 0x00
+#define SHUFFLE_ZWYX 0xB1
+#define SHUFFLE_WWYY 0xF5
+#define SHUFFLE_ZZXX 0xA0
+#endif
+
+#endif // FEATURE_SIMD
+
+#endif //_SIMD_H_
diff --git a/src/jit/simdcodegenxarch.cpp b/src/jit/simdcodegenxarch.cpp
new file mode 100644
index 0000000000..702f967aad
--- /dev/null
+++ b/src/jit/simdcodegenxarch.cpp
@@ -0,0 +1,2143 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX Amd64 SIMD Code Generator XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+#ifndef LEGACY_BACKEND // This file is ONLY used for the RyuJIT backend that uses the linear scan register allocator.
+
+#ifdef _TARGET_AMD64_
+#include "emit.h"
+#include "codegen.h"
+#include "sideeffects.h"
+#include "lower.h"
+#include "gcinfo.h"
+#include "gcinfoencoder.h"
+
+#ifdef FEATURE_SIMD
+
+// Instruction immediates
+
+// Insertps:
+// - bits 6 and 7 of the immediate indicate which source item to select (0..3)
+// - bits 4 and 5 of the immediate indicate which target item to insert into (0..3)
+// - bits 0 to 3 of the immediate indicate which target item to zero
+#define INSERTPS_SOURCE_SELECT(i) (i << 6)
+#define INSERTPS_TARGET_SELECT(i) (i << 4)
+#define INSERTPS_ZERO(i) (1 << i)
+
+// getOpForSIMDIntrinsic: return the opcode for the given SIMD Intrinsic
+//
+// Arguments:
+// intrinsicId - SIMD intrinsic Id
+// baseType - Base type of the SIMD vector
+// immed - Out param. Any immediate byte operand that needs to be passed to SSE2 opcode
+//
+//
+// Return Value:
+// Instruction (op) to be used, and immed is set if instruction requires an immediate operand.
+//
+instruction CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_types baseType, unsigned* ival /*=nullptr*/)
+{
+ // Minimal required instruction set is SSE2.
+ assert(compiler->canUseSSE2());
+
+ instruction result = INS_invalid;
+ switch (intrinsicId)
+ {
+ case SIMDIntrinsicInit:
+ if (compiler->canUseAVX())
+ {
+ // AVX supports broadcast instructions to populate YMM reg with a single float/double value from memory.
+ // AVX2 supports broadcast instructions to populate YMM reg with a single value from memory or mm reg.
+ // If we decide to use AVX2 only, we can remove this assert.
+ if ((compiler->opts.eeFlags & CORJIT_FLG_USE_AVX2) == 0)
+ {
+ assert(baseType == TYP_FLOAT || baseType == TYP_DOUBLE);
+ }
+ switch (baseType)
+ {
+ case TYP_FLOAT:
+ result = INS_vbroadcastss;
+ break;
+ case TYP_DOUBLE:
+ result = INS_vbroadcastsd;
+ break;
+ case TYP_ULONG:
+ __fallthrough;
+ case TYP_LONG:
+ result = INS_vpbroadcastq;
+ break;
+ case TYP_UINT:
+ __fallthrough;
+ case TYP_INT:
+ result = INS_vpbroadcastd;
+ break;
+ case TYP_CHAR:
+ __fallthrough;
+ case TYP_SHORT:
+ result = INS_vpbroadcastw;
+ break;
+ case TYP_UBYTE:
+ __fallthrough;
+ case TYP_BYTE:
+ result = INS_vpbroadcastb;
+ break;
+ default:
+ unreached();
+ }
+ break;
+ }
+ // For SSE, SIMDIntrinsicInit uses the same instruction as the SIMDIntrinsicShuffleSSE2 intrinsic.
+ __fallthrough;
+ case SIMDIntrinsicShuffleSSE2:
+ if (baseType == TYP_FLOAT)
+ {
+ result = INS_shufps;
+ }
+ else if (baseType == TYP_DOUBLE)
+ {
+ result = INS_shufpd;
+ }
+ else if (baseType == TYP_INT || baseType == TYP_UINT)
+ {
+ result = INS_pshufd;
+ }
+ else if (baseType == TYP_LONG || baseType == TYP_ULONG)
+ {
+ // We don't have a seperate SSE2 instruction and will
+ // use the instruction meant for doubles since it is
+ // of the same size as a long.
+ result = INS_shufpd;
+ }
+ break;
+
+ case SIMDIntrinsicSqrt:
+ if (baseType == TYP_FLOAT)
+ {
+ result = INS_sqrtps;
+ }
+ else if (baseType == TYP_DOUBLE)
+ {
+ result = INS_sqrtpd;
+ }
+ else
+ {
+ unreached();
+ }
+ break;
+
+ case SIMDIntrinsicAdd:
+ if (baseType == TYP_FLOAT)
+ {
+ result = INS_addps;
+ }
+ else if (baseType == TYP_DOUBLE)
+ {
+ result = INS_addpd;
+ }
+ else if (baseType == TYP_INT || baseType == TYP_UINT)
+ {
+ result = INS_paddd;
+ }
+ else if (baseType == TYP_CHAR || baseType == TYP_SHORT)
+ {
+ result = INS_paddw;
+ }
+ else if (baseType == TYP_UBYTE || baseType == TYP_BYTE)
+ {
+ result = INS_paddb;
+ }
+ else if (baseType == TYP_LONG || baseType == TYP_ULONG)
+ {
+ result = INS_paddq;
+ }
+ break;
+
+ case SIMDIntrinsicSub:
+ if (baseType == TYP_FLOAT)
+ {
+ result = INS_subps;
+ }
+ else if (baseType == TYP_DOUBLE)
+ {
+ result = INS_subpd;
+ }
+ else if (baseType == TYP_INT || baseType == TYP_UINT)
+ {
+ result = INS_psubd;
+ }
+ else if (baseType == TYP_CHAR || baseType == TYP_SHORT)
+ {
+ result = INS_psubw;
+ }
+ else if (baseType == TYP_UBYTE || baseType == TYP_BYTE)
+ {
+ result = INS_psubb;
+ }
+ else if (baseType == TYP_LONG || baseType == TYP_ULONG)
+ {
+ result = INS_psubq;
+ }
+ break;
+
+ case SIMDIntrinsicMul:
+ if (baseType == TYP_FLOAT)
+ {
+ result = INS_mulps;
+ }
+ else if (baseType == TYP_DOUBLE)
+ {
+ result = INS_mulpd;
+ }
+ else if (baseType == TYP_SHORT)
+ {
+ result = INS_pmullw;
+ }
+ else if (compiler->canUseAVX())
+ {
+ if (baseType == TYP_INT)
+ {
+ result = INS_pmulld;
+ }
+ }
+ break;
+
+ case SIMDIntrinsicDiv:
+ if (baseType == TYP_FLOAT)
+ {
+ result = INS_divps;
+ }
+ else if (baseType == TYP_DOUBLE)
+ {
+ result = INS_divpd;
+ }
+ else
+ {
+ unreached();
+ }
+ break;
+
+ case SIMDIntrinsicMin:
+ if (baseType == TYP_FLOAT)
+ {
+ result = INS_minps;
+ }
+ else if (baseType == TYP_DOUBLE)
+ {
+ result = INS_minpd;
+ }
+ else if (baseType == TYP_UBYTE)
+ {
+ result = INS_pminub;
+ }
+ else if (baseType == TYP_SHORT)
+ {
+ result = INS_pminsw;
+ }
+ else
+ {
+ unreached();
+ }
+ break;
+
+ case SIMDIntrinsicMax:
+ if (baseType == TYP_FLOAT)
+ {
+ result = INS_maxps;
+ }
+ else if (baseType == TYP_DOUBLE)
+ {
+ result = INS_maxpd;
+ }
+ else if (baseType == TYP_UBYTE)
+ {
+ result = INS_pmaxub;
+ }
+ else if (baseType == TYP_SHORT)
+ {
+ result = INS_pmaxsw;
+ }
+ else
+ {
+ unreached();
+ }
+ break;
+
+ case SIMDIntrinsicEqual:
+ if (baseType == TYP_FLOAT)
+ {
+ result = INS_cmpps;
+ assert(ival != nullptr);
+ *ival = 0;
+ }
+ else if (baseType == TYP_DOUBLE)
+ {
+ result = INS_cmppd;
+ assert(ival != nullptr);
+ *ival = 0;
+ }
+ else if (baseType == TYP_INT || baseType == TYP_UINT)
+ {
+ result = INS_pcmpeqd;
+ }
+ else if (baseType == TYP_CHAR || baseType == TYP_SHORT)
+ {
+ result = INS_pcmpeqw;
+ }
+ else if (baseType == TYP_UBYTE || baseType == TYP_BYTE)
+ {
+ result = INS_pcmpeqb;
+ }
+ else if (compiler->canUseAVX() && (baseType == TYP_ULONG || baseType == TYP_LONG))
+ {
+ result = INS_pcmpeqq;
+ }
+ break;
+
+ case SIMDIntrinsicLessThan:
+ // Packed integers use > with swapped operands
+ assert(baseType != TYP_INT);
+
+ if (baseType == TYP_FLOAT)
+ {
+ result = INS_cmpps;
+ assert(ival != nullptr);
+ *ival = 1;
+ }
+ else if (baseType == TYP_DOUBLE)
+ {
+ result = INS_cmppd;
+ assert(ival != nullptr);
+ *ival = 1;
+ }
+ break;
+
+ case SIMDIntrinsicLessThanOrEqual:
+ // Packed integers use (a==b) || ( b > a) in place of a <= b.
+ assert(baseType != TYP_INT);
+
+ if (baseType == TYP_FLOAT)
+ {
+ result = INS_cmpps;
+ assert(ival != nullptr);
+ *ival = 2;
+ }
+ else if (baseType == TYP_DOUBLE)
+ {
+ result = INS_cmppd;
+ assert(ival != nullptr);
+ *ival = 2;
+ }
+ break;
+
+ case SIMDIntrinsicGreaterThan:
+ // Packed float/double use < with swapped operands
+ assert(!varTypeIsFloating(baseType));
+
+ // SSE2 supports only signed >
+ if (baseType == TYP_INT)
+ {
+ result = INS_pcmpgtd;
+ }
+ else if (baseType == TYP_SHORT)
+ {
+ result = INS_pcmpgtw;
+ }
+ else if (baseType == TYP_BYTE)
+ {
+ result = INS_pcmpgtb;
+ }
+ else if (compiler->canUseAVX() && (baseType == TYP_LONG))
+ {
+ result = INS_pcmpgtq;
+ }
+ break;
+
+ case SIMDIntrinsicBitwiseAnd:
+ if (baseType == TYP_FLOAT)
+ {
+ result = INS_andps;
+ }
+ else if (baseType == TYP_DOUBLE)
+ {
+ result = INS_andpd;
+ }
+ else if (varTypeIsIntegral(baseType))
+ {
+ result = INS_pand;
+ }
+ break;
+
+ case SIMDIntrinsicBitwiseAndNot:
+ if (baseType == TYP_FLOAT)
+ {
+ result = INS_andnps;
+ }
+ else if (baseType == TYP_DOUBLE)
+ {
+ result = INS_andnpd;
+ }
+ else if (baseType == TYP_INT)
+ {
+ result = INS_pandn;
+ }
+ else if (varTypeIsIntegral(baseType))
+ {
+ result = INS_pandn;
+ }
+ break;
+
+ case SIMDIntrinsicBitwiseOr:
+ if (baseType == TYP_FLOAT)
+ {
+ result = INS_orps;
+ }
+ else if (baseType == TYP_DOUBLE)
+ {
+ result = INS_orpd;
+ }
+ else if (varTypeIsIntegral(baseType))
+ {
+ result = INS_por;
+ }
+ break;
+
+ case SIMDIntrinsicBitwiseXor:
+ if (baseType == TYP_FLOAT)
+ {
+ result = INS_xorps;
+ }
+ else if (baseType == TYP_DOUBLE)
+ {
+ result = INS_xorpd;
+ }
+ else if (varTypeIsIntegral(baseType))
+ {
+ result = INS_pxor;
+ }
+ break;
+
+ case SIMDIntrinsicCast:
+ result = INS_movaps;
+ break;
+
+ case SIMDIntrinsicShiftLeftInternal:
+ // base type doesn't matter since the entire vector is shifted left
+ result = INS_pslldq;
+ break;
+
+ case SIMDIntrinsicShiftRightInternal:
+ // base type doesn't matter since the entire vector is shifted right
+ result = INS_psrldq;
+ break;
+
+ case SIMDIntrinsicUpperSave:
+ result = INS_vextractf128;
+ break;
+
+ case SIMDIntrinsicUpperRestore:
+ result = INS_insertps;
+ break;
+
+ default:
+ assert(!"Unsupported SIMD intrinsic");
+ unreached();
+ }
+
+ noway_assert(result != INS_invalid);
+ return result;
+}
+
+// genSIMDScalarMove: Generate code to move a value of type "type" from src mm reg
+// to target mm reg, zeroing out the upper bits if and only if specified.
+//
+// Arguments:
+// type the type of value to be moved
+// targetReg the target reg
+// srcReg the src reg
+// moveType action to be performed on target upper bits
+//
+// Return Value:
+// None
+//
+// Notes:
+// This is currently only supported for floating point types.
+//
+void CodeGen::genSIMDScalarMove(var_types type, regNumber targetReg, regNumber srcReg, SIMDScalarMoveType moveType)
+{
+ var_types targetType = compiler->getSIMDVectorType();
+ assert(varTypeIsFloating(type));
+#ifdef FEATURE_AVX_SUPPORT
+ if (compiler->getSIMDInstructionSet() == InstructionSet_AVX)
+ {
+ switch (moveType)
+ {
+ case SMT_PreserveUpper:
+ if (srcReg != targetReg)
+ {
+ instruction ins = ins_Store(type);
+ if (getEmitter()->IsThreeOperandMoveAVXInstruction(ins))
+ {
+ // In general, when we use a three-operands move instruction, we want to merge the src with
+ // itself. This is an exception in that we actually want the "merge" behavior, so we must
+ // specify it with all 3 operands.
+ inst_RV_RV_RV(ins, targetReg, targetReg, srcReg, emitTypeSize(targetType));
+ }
+ else
+ {
+ inst_RV_RV(ins, targetReg, srcReg, targetType, emitTypeSize(targetType));
+ }
+ }
+ break;
+
+ case SMT_ZeroInitUpper:
+ {
+ // insertps is a 128-bit only instruction, and clears the upper 128 bits, which is what we want.
+ // The insertpsImm selects which fields are copied and zero'd of the lower 128 bits, so we choose
+ // to zero all but the lower bits.
+ unsigned int insertpsImm =
+ (INSERTPS_TARGET_SELECT(0) | INSERTPS_ZERO(1) | INSERTPS_ZERO(2) | INSERTPS_ZERO(3));
+ inst_RV_RV_IV(INS_insertps, EA_16BYTE, targetReg, srcReg, insertpsImm);
+ break;
+ }
+
+ case SMT_ZeroInitUpper_SrcHasUpperZeros:
+ if (srcReg != targetReg)
+ {
+ instruction ins = ins_Copy(type);
+ assert(!getEmitter()->IsThreeOperandMoveAVXInstruction(ins));
+ inst_RV_RV(ins, targetReg, srcReg, targetType, emitTypeSize(targetType));
+ }
+ break;
+
+ default:
+ unreached();
+ }
+ }
+ else
+#endif // FEATURE_AVX_SUPPORT
+ {
+ // SSE
+
+ switch (moveType)
+ {
+ case SMT_PreserveUpper:
+ if (srcReg != targetReg)
+ {
+ inst_RV_RV(ins_Store(type), targetReg, srcReg, targetType, emitTypeSize(targetType));
+ }
+ break;
+
+ case SMT_ZeroInitUpper:
+ if (srcReg == targetReg)
+ {
+ // There is no guarantee that upper bits of op1Reg are zero.
+ // We achieve this by using left logical shift 12-bytes and right logical shift 12 bytes.
+ instruction ins = getOpForSIMDIntrinsic(SIMDIntrinsicShiftLeftInternal, type);
+ getEmitter()->emitIns_R_I(ins, EA_16BYTE, srcReg, 12);
+ ins = getOpForSIMDIntrinsic(SIMDIntrinsicShiftRightInternal, type);
+ getEmitter()->emitIns_R_I(ins, EA_16BYTE, srcReg, 12);
+ }
+ else
+ {
+ genSIMDZero(targetType, TYP_FLOAT, targetReg);
+ inst_RV_RV(ins_Store(type), targetReg, srcReg);
+ }
+ break;
+
+ case SMT_ZeroInitUpper_SrcHasUpperZeros:
+ if (srcReg != targetReg)
+ {
+ inst_RV_RV(ins_Copy(type), targetReg, srcReg, targetType, emitTypeSize(targetType));
+ }
+ break;
+
+ default:
+ unreached();
+ }
+ }
+}
+
+void CodeGen::genSIMDZero(var_types targetType, var_types baseType, regNumber targetReg)
+{
+ // pxor reg, reg
+ instruction ins = getOpForSIMDIntrinsic(SIMDIntrinsicBitwiseXor, baseType);
+ inst_RV_RV(ins, targetReg, targetReg, targetType, emitActualTypeSize(targetType));
+}
+
+//------------------------------------------------------------------------
+// genSIMDIntrinsicInit: Generate code for SIMD Intrinsic Initialize.
+//
+// Arguments:
+// simdNode - The GT_SIMD node
+//
+// Return Value:
+// None.
+//
+void CodeGen::genSIMDIntrinsicInit(GenTreeSIMD* simdNode)
+{
+ assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicInit);
+
+ GenTree* op1 = simdNode->gtGetOp1();
+ var_types baseType = simdNode->gtSIMDBaseType;
+ regNumber targetReg = simdNode->gtRegNum;
+ assert(targetReg != REG_NA);
+ var_types targetType = simdNode->TypeGet();
+ InstructionSet iset = compiler->getSIMDInstructionSet();
+ unsigned size = simdNode->gtSIMDSize;
+
+ // Should never see small int base type vectors except for zero initialization.
+ noway_assert(!varTypeIsSmallInt(baseType) || op1->IsIntegralConst(0));
+
+ instruction ins = INS_invalid;
+ if (op1->isContained())
+ {
+ if (op1->IsIntegralConst(0) || op1->IsFPZero())
+ {
+ genSIMDZero(targetType, baseType, targetReg);
+ }
+ else if (varTypeIsIntegral(baseType) && op1->IsIntegralConst(-1))
+ {
+ // case of initializing elements of vector with all 1's
+ // generate pcmpeqd reg, reg
+ ins = getOpForSIMDIntrinsic(SIMDIntrinsicEqual, TYP_INT);
+ inst_RV_RV(ins, targetReg, targetReg, targetType, emitActualTypeSize(targetType));
+ }
+#ifdef FEATURE_AVX_SUPPORT
+ else
+ {
+ assert(iset == InstructionSet_AVX);
+ ins = getOpForSIMDIntrinsic(SIMDIntrinsicInit, baseType);
+ if (op1->IsCnsFltOrDbl())
+ {
+ getEmitter()->emitInsBinary(ins, emitTypeSize(targetType), simdNode, op1);
+ }
+ else if (op1->OperIsLocalAddr())
+ {
+ unsigned offset = (op1->OperGet() == GT_LCL_FLD_ADDR) ? op1->gtLclFld.gtLclOffs : 0;
+ getEmitter()->emitIns_R_S(ins, emitTypeSize(targetType), targetReg, op1->gtLclVarCommon.gtLclNum,
+ offset);
+ }
+ else
+ {
+ unreached();
+ }
+ }
+#endif // FEATURE_AVX_SUPPORT
+ }
+ else if (iset == InstructionSet_AVX && ((size == 32) || (size == 16)))
+ {
+ regNumber srcReg = genConsumeReg(op1);
+ if (baseType == TYP_INT || baseType == TYP_UINT || baseType == TYP_LONG || baseType == TYP_ULONG)
+ {
+ ins = ins_CopyIntToFloat(baseType, TYP_FLOAT);
+ assert(ins != INS_invalid);
+ inst_RV_RV(ins, targetReg, srcReg, baseType, emitTypeSize(baseType));
+ srcReg = targetReg;
+ }
+
+ ins = getOpForSIMDIntrinsic(simdNode->gtSIMDIntrinsicID, baseType);
+ getEmitter()->emitIns_R_R(ins, emitActualTypeSize(targetType), targetReg, srcReg);
+ }
+ else
+ {
+ // If we reach here, op1 is not contained and we are using SSE or it is a SubRegisterSIMDType.
+ // In either case we are going to use the SSE2 shuffle instruction.
+
+ regNumber op1Reg = genConsumeReg(op1);
+ unsigned shuffleControl = 0;
+
+ if (compiler->isSubRegisterSIMDType(simdNode))
+ {
+ assert(baseType == TYP_FLOAT);
+
+ // We cannot assume that upper bits of op1Reg or targetReg be zero.
+ // Therefore we need to explicitly zero out upper bits. This is
+ // essential for the shuffle operation performed below.
+ //
+ // If op1 is a float/double constant, we would have loaded it from
+ // data section using movss/sd. Similarly if op1 is a memory op we
+ // would have loaded it using movss/sd. Movss/sd when loading a xmm reg
+ // from memory would zero-out upper bits. In these cases we can
+ // avoid explicitly zero'ing out targetReg if targetReg and op1Reg are the same or do it more efficiently
+ // if they are not the same.
+ SIMDScalarMoveType moveType =
+ op1->IsCnsFltOrDbl() || op1->isMemoryOp() ? SMT_ZeroInitUpper_SrcHasUpperZeros : SMT_ZeroInitUpper;
+
+ genSIMDScalarMove(TYP_FLOAT, targetReg, op1Reg, moveType);
+
+ if (size == 8)
+ {
+ shuffleControl = 0x50;
+ }
+ else if (size == 12)
+ {
+ shuffleControl = 0x40;
+ }
+ else
+ {
+ noway_assert(!"Unexpected size for SIMD type");
+ }
+ }
+ else // Vector<T>
+ {
+ if (op1Reg != targetReg)
+ {
+ if (varTypeIsFloating(baseType))
+ {
+ ins = ins_Copy(targetType);
+ }
+ else if (baseType == TYP_INT || baseType == TYP_UINT || baseType == TYP_LONG || baseType == TYP_ULONG)
+ {
+ ins = ins_CopyIntToFloat(baseType, TYP_FLOAT);
+ }
+
+ assert(ins != INS_invalid);
+ inst_RV_RV(ins, targetReg, op1Reg, baseType, emitTypeSize(baseType));
+ }
+ }
+
+ ins = getOpForSIMDIntrinsic(SIMDIntrinsicShuffleSSE2, baseType);
+ getEmitter()->emitIns_R_R_I(ins, emitActualTypeSize(targetType), targetReg, targetReg, shuffleControl);
+ }
+
+ genProduceReg(simdNode);
+}
+
+//-------------------------------------------------------------------------------------------
+// genSIMDIntrinsicInitN: Generate code for SIMD Intrinsic Initialize for the form that takes
+// a number of arguments equal to the length of the Vector.
+//
+// Arguments:
+// simdNode - The GT_SIMD node
+//
+// Return Value:
+// None.
+//
+void CodeGen::genSIMDIntrinsicInitN(GenTreeSIMD* simdNode)
+{
+ assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicInitN);
+
+ // Right now this intrinsic is supported only on TYP_FLOAT vectors
+ var_types baseType = simdNode->gtSIMDBaseType;
+ noway_assert(baseType == TYP_FLOAT);
+
+ regNumber targetReg = simdNode->gtRegNum;
+ assert(targetReg != REG_NA);
+
+ var_types targetType = simdNode->TypeGet();
+
+ // Note that we cannot use targetReg before consumed all source operands. Therefore,
+ // Need an internal register to stitch together all the values into a single vector
+ // in an XMM reg.
+ assert(simdNode->gtRsvdRegs != RBM_NONE);
+ assert(genCountBits(simdNode->gtRsvdRegs) == 1);
+ regNumber vectorReg = genRegNumFromMask(simdNode->gtRsvdRegs);
+
+ // Zero out vectorReg if we are constructing a vector whose size is not equal to targetType vector size.
+ // For example in case of Vector4f we don't need to zero when using SSE2.
+ if (compiler->isSubRegisterSIMDType(simdNode))
+ {
+ genSIMDZero(targetType, baseType, vectorReg);
+ }
+
+ unsigned int baseTypeSize = genTypeSize(baseType);
+ instruction insLeftShift = getOpForSIMDIntrinsic(SIMDIntrinsicShiftLeftInternal, baseType);
+
+ // We will first consume the list items in execution (left to right) order,
+ // and record the registers.
+ regNumber operandRegs[SIMD_INTRINSIC_MAX_PARAM_COUNT];
+ unsigned initCount = 0;
+ for (GenTree* list = simdNode->gtGetOp1(); list != nullptr; list = list->gtGetOp2())
+ {
+ assert(list->OperGet() == GT_LIST);
+ GenTree* listItem = list->gtGetOp1();
+ assert(listItem->TypeGet() == baseType);
+ assert(!listItem->isContained());
+ regNumber operandReg = genConsumeReg(listItem);
+ operandRegs[initCount] = operandReg;
+ initCount++;
+ }
+
+ unsigned int offset = 0;
+ for (unsigned i = 0; i < initCount; i++)
+ {
+ // We will now construct the vector from the list items in reverse order.
+ // This allows us to efficiently stitch together a vector as follows:
+ // vectorReg = (vectorReg << offset)
+ // VectorReg[0] = listItemReg
+ // Use genSIMDScalarMove with SMT_PreserveUpper in order to ensure that the upper
+ // bits of vectorReg are not modified.
+
+ regNumber operandReg = operandRegs[initCount - i - 1];
+ if (offset != 0)
+ {
+ getEmitter()->emitIns_R_I(insLeftShift, EA_16BYTE, vectorReg, baseTypeSize);
+ }
+ genSIMDScalarMove(baseType, vectorReg, operandReg, SMT_PreserveUpper);
+
+ offset += baseTypeSize;
+ }
+
+ noway_assert(offset == simdNode->gtSIMDSize);
+
+ // Load the initialized value.
+ if (targetReg != vectorReg)
+ {
+ inst_RV_RV(ins_Copy(targetType), targetReg, vectorReg, targetType, emitActualTypeSize(targetType));
+ }
+ genProduceReg(simdNode);
+}
+
+//----------------------------------------------------------------------------------
+// genSIMDIntrinsicUnOp: Generate code for SIMD Intrinsic unary operations like sqrt.
+//
+// Arguments:
+// simdNode - The GT_SIMD node
+//
+// Return Value:
+// None.
+//
+void CodeGen::genSIMDIntrinsicUnOp(GenTreeSIMD* simdNode)
+{
+ assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicSqrt || simdNode->gtSIMDIntrinsicID == SIMDIntrinsicCast);
+
+ GenTree* op1 = simdNode->gtGetOp1();
+ var_types baseType = simdNode->gtSIMDBaseType;
+ regNumber targetReg = simdNode->gtRegNum;
+ assert(targetReg != REG_NA);
+ var_types targetType = simdNode->TypeGet();
+
+ regNumber op1Reg = genConsumeReg(op1);
+ instruction ins = getOpForSIMDIntrinsic(simdNode->gtSIMDIntrinsicID, baseType);
+ if (simdNode->gtSIMDIntrinsicID != SIMDIntrinsicCast || targetReg != op1Reg)
+ {
+ inst_RV_RV(ins, targetReg, op1Reg, targetType, emitActualTypeSize(targetType));
+ }
+ genProduceReg(simdNode);
+}
+
+//--------------------------------------------------------------------------------
+// genSIMDIntrinsicBinOp: Generate code for SIMD Intrinsic binary operations
+// add, sub, mul, bit-wise And, AndNot and Or.
+//
+// Arguments:
+// simdNode - The GT_SIMD node
+//
+// Return Value:
+// None.
+//
+void CodeGen::genSIMDIntrinsicBinOp(GenTreeSIMD* simdNode)
+{
+ assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicAdd || simdNode->gtSIMDIntrinsicID == SIMDIntrinsicSub ||
+ simdNode->gtSIMDIntrinsicID == SIMDIntrinsicMul || simdNode->gtSIMDIntrinsicID == SIMDIntrinsicDiv ||
+ simdNode->gtSIMDIntrinsicID == SIMDIntrinsicBitwiseAnd ||
+ simdNode->gtSIMDIntrinsicID == SIMDIntrinsicBitwiseAndNot ||
+ simdNode->gtSIMDIntrinsicID == SIMDIntrinsicBitwiseOr ||
+ simdNode->gtSIMDIntrinsicID == SIMDIntrinsicBitwiseXor || simdNode->gtSIMDIntrinsicID == SIMDIntrinsicMin ||
+ simdNode->gtSIMDIntrinsicID == SIMDIntrinsicMax);
+
+ GenTree* op1 = simdNode->gtGetOp1();
+ GenTree* op2 = simdNode->gtGetOp2();
+ var_types baseType = simdNode->gtSIMDBaseType;
+ regNumber targetReg = simdNode->gtRegNum;
+ assert(targetReg != REG_NA);
+ var_types targetType = simdNode->TypeGet();
+ InstructionSet iset = compiler->getSIMDInstructionSet();
+
+ genConsumeOperands(simdNode);
+ regNumber op1Reg = op1->gtRegNum;
+ regNumber op2Reg = op2->gtRegNum;
+ regNumber otherReg = op2Reg;
+
+ // Vector<Int>.Mul:
+ // SSE2 doesn't have an instruction to perform this operation directly
+ // whereas SSE4.1 does (pmulld). This is special cased and computed
+ // as follows.
+ if (simdNode->gtSIMDIntrinsicID == SIMDIntrinsicMul && baseType == TYP_INT && iset == InstructionSet_SSE2)
+ {
+ // We need a temporary register that is NOT the same as the target,
+ // and we MAY need another.
+ assert(simdNode->gtRsvdRegs != RBM_NONE);
+ assert(genCountBits(simdNode->gtRsvdRegs) == 2);
+
+ regMaskTP tmpRegsMask = simdNode->gtRsvdRegs;
+ regMaskTP tmpReg1Mask = genFindLowestBit(tmpRegsMask);
+ tmpRegsMask &= ~tmpReg1Mask;
+ regNumber tmpReg = genRegNumFromMask(tmpReg1Mask);
+ regNumber tmpReg2 = genRegNumFromMask(tmpRegsMask);
+ // The register allocator guarantees the following conditions:
+ // - the only registers that may be the same among op1Reg, op2Reg, tmpReg
+ // and tmpReg2 are op1Reg and op2Reg.
+ // Let's be extra-careful and assert that now.
+ assert((op1Reg != tmpReg) && (op1Reg != tmpReg2) && (op2Reg != tmpReg) && (op2Reg != tmpReg2) &&
+ (tmpReg != tmpReg2));
+
+ // We will start by setting things up so that:
+ // - We have op1 in op1Reg and targetReg, and they are different registers.
+ // - We have op2 in op2Reg and tmpReg
+ // - Either we will leave the input registers (the original op1Reg and op2Reg) unmodified,
+ // OR they are the targetReg that will be produced.
+ // (Note that in the code we generate below op1Reg and op2Reg are never written.)
+ // We will copy things as necessary to ensure that this is the case.
+ // Note that we can swap op1 and op2, since multiplication is commutative.
+ // We will not modify the values in op1Reg and op2Reg.
+ // (Though note that if either op1 or op2 is the same as targetReg, we will make
+ // a copy and use that copy as the input register. In that case we WILL modify
+ // the original value in the register, but will wind up with the result in targetReg
+ // in the end, as expected.)
+
+ // First, we need a tmpReg that is NOT the same as targetReg.
+ // Note that if we have another reg that is the same as targetReg,
+ // we can use tmpReg2 for that case, as we will not have hit this case.
+ if (tmpReg == targetReg)
+ {
+ tmpReg = tmpReg2;
+ }
+
+ if (op2Reg == targetReg)
+ {
+ // We will swap the operands.
+ // Since the code below only deals with registers, this now becomes the case where
+ // op1Reg == targetReg.
+ op2Reg = op1Reg;
+ op1Reg = targetReg;
+ }
+ if (op1Reg == targetReg)
+ {
+ // Copy op1, and make tmpReg2 the new op1Reg.
+ // Note that those regs can't be the same, as we asserted above.
+ // Also, we know that tmpReg2 hasn't been used, because we couldn't have hit
+ // the "tmpReg == targetReg" case.
+ inst_RV_RV(INS_movaps, tmpReg2, op1Reg, targetType, emitActualTypeSize(targetType));
+ op1Reg = tmpReg2;
+ inst_RV_RV(INS_movaps, tmpReg, op2Reg, targetType, emitActualTypeSize(targetType));
+ // However, we have one more case to worry about: what if op2Reg is also targetReg
+ // (i.e. we have the same operand as op1 and op2)?
+ // In that case we will set op2Reg to the same register as op1Reg.
+ if (op2Reg == targetReg)
+ {
+ op2Reg = tmpReg2;
+ }
+ }
+ else
+ {
+ // Copy op1 to targetReg and op2 to tmpReg.
+ inst_RV_RV(INS_movaps, targetReg, op1Reg, targetType, emitActualTypeSize(targetType));
+ inst_RV_RV(INS_movaps, tmpReg, op2Reg, targetType, emitActualTypeSize(targetType));
+ }
+ // Let's assert that things are as we expect.
+ // - We have op1 in op1Reg and targetReg, and they are different registers.
+ assert(op1Reg != targetReg);
+ // - We have op2 in op2Reg and tmpReg, and they are different registers.
+ assert(op2Reg != tmpReg);
+ // - Either we are going to leave op1's reg unmodified, or it is the targetReg.
+ assert((op1->gtRegNum == op1Reg) || (op1->gtRegNum == op2Reg) || (op1->gtRegNum == targetReg));
+ // - Similarly, we are going to leave op2's reg unmodified, or it is the targetReg.
+ assert((op2->gtRegNum == op1Reg) || (op2->gtRegNum == op2Reg) || (op2->gtRegNum == targetReg));
+
+ // Now we can generate the code.
+
+ // targetReg = op1 >> 4-bytes (op1 is already in targetReg)
+ getEmitter()->emitIns_R_I(INS_psrldq, emitActualTypeSize(targetType), targetReg, 4);
+
+ // tmpReg = op2 >> 4-bytes (op2 is already in tmpReg)
+ getEmitter()->emitIns_R_I(INS_psrldq, emitActualTypeSize(targetType), tmpReg, 4);
+
+ // tmp = unsigned double word multiply of targetReg and tmpReg. Essentially
+ // tmpReg[63:0] = op1[1] * op2[1]
+ // tmpReg[127:64] = op1[3] * op2[3]
+ inst_RV_RV(INS_pmuludq, tmpReg, targetReg, targetType, emitActualTypeSize(targetType));
+
+ // Extract first and third double word results from tmpReg
+ // tmpReg = shuffle(0,0,2,0) of tmpReg
+ getEmitter()->emitIns_R_R_I(INS_pshufd, emitActualTypeSize(targetType), tmpReg, tmpReg, 0x08);
+
+ // targetReg[63:0] = op1[0] * op2[0]
+ // targetReg[127:64] = op1[2] * op2[2]
+ inst_RV_RV(INS_movaps, targetReg, op1Reg, targetType, emitActualTypeSize(targetType));
+ inst_RV_RV(INS_pmuludq, targetReg, op2Reg, targetType, emitActualTypeSize(targetType));
+
+ // Extract first and third double word results from targetReg
+ // targetReg = shuffle(0,0,2,0) of targetReg
+ getEmitter()->emitIns_R_R_I(INS_pshufd, emitActualTypeSize(targetType), targetReg, targetReg, 0x08);
+
+ // pack the results into a single vector
+ inst_RV_RV(INS_punpckldq, targetReg, tmpReg, targetType, emitActualTypeSize(targetType));
+ }
+ else
+ {
+ instruction ins = getOpForSIMDIntrinsic(simdNode->gtSIMDIntrinsicID, baseType);
+
+ // Currently AVX doesn't support integer.
+ // if the ins is INS_cvtsi2ss or INS_cvtsi2sd, we won't use AVX.
+ if (op1Reg != targetReg && compiler->canUseAVX() && !(ins == INS_cvtsi2ss || ins == INS_cvtsi2sd) &&
+ getEmitter()->IsThreeOperandAVXInstruction(ins))
+ {
+ inst_RV_RV_RV(ins, targetReg, op1Reg, op2Reg, emitActualTypeSize(targetType));
+ }
+ else
+ {
+ if (op2Reg == targetReg)
+ {
+ otherReg = op1Reg;
+ }
+ else if (op1Reg != targetReg)
+ {
+ inst_RV_RV(ins_Copy(targetType), targetReg, op1Reg, targetType, emitActualTypeSize(targetType));
+ }
+
+ inst_RV_RV(ins, targetReg, otherReg, targetType, emitActualTypeSize(targetType));
+ }
+ }
+
+ // Vector2/3 div: since the top-most elements will be zero, we end up
+ // perfoming 0/0 which is a NAN. Therefore, post division we need to set the
+ // top-most elements to zero. This is achieved by left logical shift followed
+ // by right logical shift of targetReg.
+ if (simdNode->gtSIMDIntrinsicID == SIMDIntrinsicDiv && (simdNode->gtSIMDSize < 16))
+ {
+ // These are 16 byte operations, so we subtract from 16 bytes, not the vector register length.
+ unsigned shiftCount = 16 - simdNode->gtSIMDSize;
+ assert(shiftCount != 0);
+ instruction ins = getOpForSIMDIntrinsic(SIMDIntrinsicShiftLeftInternal, baseType);
+ getEmitter()->emitIns_R_I(ins, EA_16BYTE, targetReg, shiftCount);
+ ins = getOpForSIMDIntrinsic(SIMDIntrinsicShiftRightInternal, baseType);
+ getEmitter()->emitIns_R_I(ins, EA_16BYTE, targetReg, shiftCount);
+ }
+
+ genProduceReg(simdNode);
+}
+
+//--------------------------------------------------------------------------------
+// genSIMDIntrinsicRelOp: Generate code for a SIMD Intrinsic relational operater
+// <, <=, >, >= and ==
+//
+// Arguments:
+// simdNode - The GT_SIMD node
+//
+// Return Value:
+// None.
+//
+void CodeGen::genSIMDIntrinsicRelOp(GenTreeSIMD* simdNode)
+{
+ GenTree* op1 = simdNode->gtGetOp1();
+ GenTree* op2 = simdNode->gtGetOp2();
+ var_types baseType = simdNode->gtSIMDBaseType;
+ regNumber targetReg = simdNode->gtRegNum;
+ assert(targetReg != REG_NA);
+ var_types targetType = simdNode->TypeGet();
+ InstructionSet iset = compiler->getSIMDInstructionSet();
+
+ genConsumeOperands(simdNode);
+ regNumber op1Reg = op1->gtRegNum;
+ regNumber op2Reg = op2->gtRegNum;
+ regNumber otherReg = op2Reg;
+
+ switch (simdNode->gtSIMDIntrinsicID)
+ {
+ case SIMDIntrinsicEqual:
+ case SIMDIntrinsicGreaterThan:
+ {
+ // SSE2: vector<(u)long> relation op should be implemented in terms of TYP_INT comparison operations
+ assert(((iset == InstructionSet_AVX) || (baseType != TYP_LONG)) && (baseType != TYP_ULONG));
+
+ // Greater-than: Floating point vectors use "<" with swapped operands
+ if (simdNode->gtSIMDIntrinsicID == SIMDIntrinsicGreaterThan)
+ {
+ assert(!varTypeIsFloating(baseType));
+ }
+
+ unsigned ival = 0;
+ instruction ins = getOpForSIMDIntrinsic(simdNode->gtSIMDIntrinsicID, baseType, &ival);
+
+ // targetReg = op1reg > op2reg
+ // Therefore, we can optimize if op1Reg == targetReg
+ otherReg = op2Reg;
+ if (op1Reg != targetReg)
+ {
+ if (op2Reg == targetReg)
+ {
+ assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicEqual);
+ otherReg = op1Reg;
+ }
+ else
+ {
+ inst_RV_RV(ins_Copy(targetType), targetReg, op1Reg, targetType, emitActualTypeSize(targetType));
+ }
+ }
+
+ if (varTypeIsFloating(baseType))
+ {
+ getEmitter()->emitIns_R_R_I(ins, emitActualTypeSize(targetType), targetReg, otherReg, ival);
+ }
+ else
+ {
+ inst_RV_RV(ins, targetReg, otherReg, targetType, emitActualTypeSize(targetType));
+ }
+ }
+ break;
+
+ case SIMDIntrinsicLessThan:
+ case SIMDIntrinsicLessThanOrEqual:
+ {
+ // Int vectors use ">" and ">=" with swapped operands
+ assert(varTypeIsFloating(baseType));
+
+ // Get the instruction opcode for compare operation
+ unsigned ival;
+ instruction ins = getOpForSIMDIntrinsic(simdNode->gtSIMDIntrinsicID, baseType, &ival);
+
+ // targetReg = op1reg RelOp op2reg
+ // Thefore, we can optimize if op1Reg == targetReg
+ if (op1Reg != targetReg)
+ {
+ inst_RV_RV(ins_Copy(targetType), targetReg, op1Reg, targetType, emitActualTypeSize(targetType));
+ }
+
+ getEmitter()->emitIns_R_R_I(ins, emitActualTypeSize(targetType), targetReg, op2Reg, ival);
+ }
+ break;
+
+ // (In)Equality that produces bool result instead of a bit vector
+ case SIMDIntrinsicOpEquality:
+ case SIMDIntrinsicOpInEquality:
+ {
+ assert(genIsValidIntReg(targetReg));
+
+ // We need two additional XMM register as scratch
+ assert(simdNode->gtRsvdRegs != RBM_NONE);
+ assert(genCountBits(simdNode->gtRsvdRegs) == 2);
+
+ regMaskTP tmpRegsMask = simdNode->gtRsvdRegs;
+ regMaskTP tmpReg1Mask = genFindLowestBit(tmpRegsMask);
+ tmpRegsMask &= ~tmpReg1Mask;
+ regNumber tmpReg1 = genRegNumFromMask(tmpReg1Mask);
+ regNumber tmpReg2 = genRegNumFromMask(tmpRegsMask);
+ var_types simdType = op1->TypeGet();
+ // TODO-1stClassStructs: Temporary to minimize asmDiffs
+ if (simdType == TYP_DOUBLE)
+ {
+ simdType = TYP_SIMD8;
+ }
+
+ // Here we should consider TYP_SIMD12 operands as if they were TYP_SIMD16
+ // since both the operands will be in XMM registers.
+ if (simdType == TYP_SIMD12)
+ {
+ simdType = TYP_SIMD16;
+ }
+
+ // tmpReg1 = (op1Reg == op2Reg)
+ // Call this value of tmpReg1 as 'compResult' for further reference below.
+ regNumber otherReg = op2Reg;
+ if (tmpReg1 != op2Reg)
+ {
+ if (tmpReg1 != op1Reg)
+ {
+ inst_RV_RV(ins_Copy(simdType), tmpReg1, op1Reg, simdType, emitActualTypeSize(simdType));
+ }
+ }
+ else
+ {
+ otherReg = op1Reg;
+ }
+
+ // For all integer types we can use TYP_INT comparison.
+ unsigned ival = 0;
+ instruction ins =
+ getOpForSIMDIntrinsic(SIMDIntrinsicEqual, varTypeIsFloating(baseType) ? baseType : TYP_INT, &ival);
+
+ if (varTypeIsFloating(baseType))
+ {
+ getEmitter()->emitIns_R_R_I(ins, emitActualTypeSize(simdType), tmpReg1, otherReg, ival);
+ }
+ else
+ {
+ inst_RV_RV(ins, tmpReg1, otherReg, simdType, emitActualTypeSize(simdType));
+ }
+
+ // If we have 32 bytes, start by anding the two 16-byte halves to get a 16-byte result.
+ if (compiler->canUseAVX() && (simdType == TYP_SIMD32))
+ {
+ // Reduce tmpReg1 from 256-bits to 128-bits bitwise-Anding the lower and uppper 128-bits
+ //
+ // Generated code sequence
+ // - vextractf128 tmpReg2, tmpReg1, 0x01
+ // tmpReg2[128..255] <- 0
+ // tmpReg2[0..127] <- tmpReg1[128..255]
+ // - vandps tmpReg1, tempReg2
+ // This will zero-out upper portion of tmpReg1 and
+ // lower portion of tmpReg1 is and of upper and lower 128-bit comparison result.
+ getEmitter()->emitIns_R_R_I(INS_vextractf128, EA_32BYTE, tmpReg2, tmpReg1, 0x01);
+ inst_RV_RV(INS_andps, tmpReg1, tmpReg2, simdType, emitActualTypeSize(simdType));
+ }
+ // Next, if we have more than 8 bytes, and the two 8-byte halves to get a 8-byte result.
+ if (simdType != TYP_SIMD8)
+ {
+ // tmpReg2 = Shuffle(tmpReg1, (1,0,3,2))
+ // Note: vpshufd is a 128-bit only instruction. Therefore, explicitly pass EA_16BYTE
+ getEmitter()->emitIns_R_R_I(INS_pshufd, EA_16BYTE, tmpReg2, tmpReg1, 0x4E);
+
+ // tmpReg1 = BitwiseAnd(tmpReg1, tmpReg2)
+ //
+ // Note that what we have computed is as follows at this point:
+ // tmpReg1[0] = compResult[0] & compResult[2]
+ // tmpReg1[1] = compResult[1] & compResult[3]
+ inst_RV_RV(INS_andps, tmpReg1, tmpReg2, simdType, emitActualTypeSize(simdType));
+ }
+ // At this point, we have either reduced the result to 8 bytes: tmpReg1[0] and tmpReg1[1],
+ // OR we have a Vector2 (TYP_SIMD8) in tmpReg1, which has only those two fields.
+
+ // tmpReg2 = Shuffle(tmpReg1, (0,0,0,1))
+ // tmpReg2[0] = compResult[1] & compResult[3]
+ getEmitter()->emitIns_R_R_I(INS_pshufd, EA_16BYTE, tmpReg2, tmpReg1, 0x1);
+
+ // tmpReg1 = BitwiseAnd(tmpReg1, tmpReg2)
+ // That is tmpReg1[0] = compResult[0] & compResult[1] & compResult[2] & compResult[3]
+ inst_RV_RV(INS_pand, tmpReg1, tmpReg2, simdType, emitActualTypeSize(simdType)); // ??? INS_andps??
+
+ // targetReg = lower 32-bits of tmpReg1 = compResult[0] & compResult[1] & compResult[2] & compResult[3]
+ // (Note that for mov_xmm2i, the int register is always in the reg2 position.
+ inst_RV_RV(INS_mov_xmm2i, tmpReg1, targetReg, TYP_INT);
+
+ // Since we need to compute a bool result, targetReg needs to be set to 1 on true and zero on false.
+ // Equality:
+ // cmp targetReg, 0xFFFFFFFF
+ // sete targetReg
+ // movzx targetReg, targetReg
+ //
+ // InEquality:
+ // cmp targetReg, 0xFFFFFFFF
+ // setne targetReg
+ // movzx targetReg, targetReg
+ //
+ getEmitter()->emitIns_R_I(INS_cmp, EA_4BYTE, targetReg, 0xFFFFFFFF);
+ inst_RV((simdNode->gtSIMDIntrinsicID == SIMDIntrinsicOpEquality) ? INS_sete : INS_setne, targetReg, TYP_INT,
+ EA_1BYTE);
+ assert(simdNode->TypeGet() == TYP_INT);
+ // Set the higher bytes to 0
+ inst_RV_RV(ins_Move_Extend(TYP_UBYTE, true), targetReg, targetReg, TYP_UBYTE, emitTypeSize(TYP_UBYTE));
+ }
+ break;
+
+ default:
+ noway_assert(!"Unimplemented SIMD relational operation.");
+ unreached();
+ }
+
+ genProduceReg(simdNode);
+}
+
+//--------------------------------------------------------------------------------
+// genSIMDIntrinsicDotProduct: Generate code for SIMD Intrinsic Dot Product.
+//
+// Arguments:
+// simdNode - The GT_SIMD node
+//
+// Return Value:
+// None.
+//
+void CodeGen::genSIMDIntrinsicDotProduct(GenTreeSIMD* simdNode)
+{
+ assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicDotProduct);
+
+ GenTree* op1 = simdNode->gtGetOp1();
+ GenTree* op2 = simdNode->gtGetOp2();
+ var_types baseType = simdNode->gtSIMDBaseType;
+ var_types simdType = op1->TypeGet();
+ // TODO-1stClassStructs: Temporary to minimize asmDiffs
+ if (simdType == TYP_DOUBLE)
+ {
+ simdType = TYP_SIMD8;
+ }
+ var_types simdEvalType = (simdType == TYP_SIMD12) ? TYP_SIMD16 : simdType;
+ regNumber targetReg = simdNode->gtRegNum;
+ assert(targetReg != REG_NA);
+
+ // DotProduct is only supported on floating point types.
+ var_types targetType = simdNode->TypeGet();
+ assert(targetType == baseType);
+ assert(varTypeIsFloating(baseType));
+
+ genConsumeOperands(simdNode);
+ regNumber op1Reg = op1->gtRegNum;
+ regNumber op2Reg = op2->gtRegNum;
+
+ regNumber tmpReg = REG_NA;
+ // For SSE, or AVX with 32-byte vectors, we need an additional Xmm register as scratch.
+ // However, it must be distinct from targetReg, so we request two from the register allocator.
+ // Note that if this is a TYP_SIMD16 or smaller on AVX, then we don't need a tmpReg.
+ if ((compiler->getSIMDInstructionSet() == InstructionSet_SSE2) || (simdEvalType == TYP_SIMD32))
+ {
+ assert(simdNode->gtRsvdRegs != RBM_NONE);
+ assert(genCountBits(simdNode->gtRsvdRegs) == 2);
+
+ regMaskTP tmpRegsMask = simdNode->gtRsvdRegs;
+ regMaskTP tmpReg1Mask = genFindLowestBit(tmpRegsMask);
+ tmpRegsMask &= ~tmpReg1Mask;
+ regNumber tmpReg1 = genRegNumFromMask(tmpReg1Mask);
+ regNumber tmpReg2 = genRegNumFromMask(tmpRegsMask);
+
+ // Choose any register different from targetReg as tmpReg
+ if (tmpReg1 != targetReg)
+ {
+ tmpReg = tmpReg1;
+ }
+ else
+ {
+ assert(targetReg != tmpReg2);
+ tmpReg = tmpReg2;
+ }
+ assert(tmpReg != REG_NA);
+ assert(tmpReg != targetReg);
+ }
+
+ if (compiler->getSIMDInstructionSet() == InstructionSet_SSE2)
+ {
+ // We avoid reg move if either op1Reg == targetReg or op2Reg == targetReg
+ if (op1Reg == targetReg)
+ {
+ // Best case
+ // nothing to do, we have registers in the right place
+ }
+ else if (op2Reg == targetReg)
+ {
+ op2Reg = op1Reg;
+ }
+ else
+ {
+ inst_RV_RV(ins_Copy(simdType), targetReg, op1Reg, simdEvalType, emitActualTypeSize(simdType));
+ }
+
+ // DotProduct(v1, v2)
+ // Here v0 = targetReg, v1 = op1Reg, v2 = op2Reg and tmp = tmpReg
+ if (baseType == TYP_FLOAT)
+ {
+ // v0 = v1 * v2
+ // tmp = v0 // v0 = (3, 2, 1, 0) - each element is given by its
+ // // position
+ // tmp = shuffle(tmp, tmp, Shuffle(2,3,0,1)) // tmp = (2, 3, 0, 1)
+ // v0 = v0 + tmp // v0 = (3+2, 2+3, 1+0, 0+1)
+ // tmp = v0
+ // tmp = shuffle(tmp, tmp, Shuffle(0,1,2,3)) // tmp = (0+1, 1+0, 2+3, 3+2)
+ // v0 = v0 + tmp // v0 = (0+1+2+3, 0+1+2+3, 0+1+2+3, 0+1+2+3)
+ // // Essentially horizontal addtion of all elements.
+ // // We could achieve the same using SSEv3 instruction
+ // // HADDPS.
+ //
+ inst_RV_RV(INS_mulps, targetReg, op2Reg);
+ inst_RV_RV(INS_movaps, tmpReg, targetReg);
+ inst_RV_RV_IV(INS_shufps, EA_16BYTE, tmpReg, tmpReg, 0xb1);
+ inst_RV_RV(INS_addps, targetReg, tmpReg);
+ inst_RV_RV(INS_movaps, tmpReg, targetReg);
+ inst_RV_RV_IV(INS_shufps, EA_16BYTE, tmpReg, tmpReg, 0x1b);
+ inst_RV_RV(INS_addps, targetReg, tmpReg);
+ }
+ else if (baseType == TYP_DOUBLE)
+ {
+ // v0 = v1 * v2
+ // tmp = v0 // v0 = (1, 0) - each element is given by its position
+ // tmp = shuffle(tmp, tmp, Shuffle(0,1)) // tmp = (0, 1)
+ // v0 = v0 + tmp // v0 = (1+0, 0+1)
+ inst_RV_RV(INS_mulpd, targetReg, op2Reg);
+ inst_RV_RV(INS_movaps, tmpReg, targetReg);
+ inst_RV_RV_IV(INS_shufpd, EA_16BYTE, tmpReg, tmpReg, 0x01);
+ inst_RV_RV(INS_addpd, targetReg, tmpReg);
+ }
+ else
+ {
+ unreached();
+ }
+ }
+ else
+ {
+ // We avoid reg move if either op1Reg == targetReg or op2Reg == targetReg.
+ // Note that this is a duplicate of the code above for SSE, but in the AVX case we can eventually
+ // use the 3-op form, so that we can avoid these copies.
+ // TODO-CQ: Add inst_RV_RV_RV_IV().
+ if (op1Reg == targetReg)
+ {
+ // Best case
+ // nothing to do, we have registers in the right place
+ }
+ else if (op2Reg == targetReg)
+ {
+ op2Reg = op1Reg;
+ }
+ else
+ {
+ inst_RV_RV(ins_Copy(simdType), targetReg, op1Reg, simdEvalType, emitActualTypeSize(simdType));
+ }
+
+ emitAttr emitSize = emitActualTypeSize(simdEvalType);
+ if (baseType == TYP_FLOAT)
+ {
+ // dpps computes the dot product of the upper & lower halves of the 32-byte register.
+ // Notice that if this is a TYP_SIMD16 or smaller on AVX, then we don't need a tmpReg.
+ inst_RV_RV_IV(INS_dpps, emitSize, targetReg, op2Reg, 0xf1);
+ // If this is TYP_SIMD32, we need to combine the lower & upper results.
+ if (simdEvalType == TYP_SIMD32)
+ {
+ getEmitter()->emitIns_R_R_I(INS_vextractf128, EA_32BYTE, tmpReg, targetReg, 0x01);
+ inst_RV_RV(INS_addps, targetReg, tmpReg, targetType, emitTypeSize(targetType));
+ }
+ }
+ else if (baseType == TYP_DOUBLE)
+ {
+ // On AVX, we have no 16-byte vectors of double. Note that, if we did, we could use
+ // dppd directly.
+ assert(simdType == TYP_SIMD32);
+
+ // targetReg = targetReg * op2Reg
+ // targetReg = vhaddpd(targetReg, targetReg) ; horizontal sum of lower & upper halves
+ // tmpReg = vextractf128(targetReg, 1) ; Moves the upper sum into tempReg
+ // targetReg = targetReg + tmpReg
+ inst_RV_RV(INS_mulpd, targetReg, op2Reg, simdEvalType, emitActualTypeSize(simdType));
+ inst_RV_RV(INS_haddpd, targetReg, targetReg, simdEvalType, emitActualTypeSize(simdType));
+ getEmitter()->emitIns_R_R_I(INS_vextractf128, EA_32BYTE, tmpReg, targetReg, 0x01);
+ inst_RV_RV(INS_addpd, targetReg, tmpReg, targetType, emitTypeSize(targetType));
+ }
+ else
+ {
+ unreached();
+ }
+ }
+
+ genProduceReg(simdNode);
+}
+
+//------------------------------------------------------------------------------------
+// genSIMDIntrinsicGetItem: Generate code for SIMD Intrinsic get element at index i.
+//
+// Arguments:
+// simdNode - The GT_SIMD node
+//
+// Return Value:
+// None.
+//
+void CodeGen::genSIMDIntrinsicGetItem(GenTreeSIMD* simdNode)
+{
+ assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicGetItem);
+
+ GenTree* op1 = simdNode->gtGetOp1();
+ GenTree* op2 = simdNode->gtGetOp2();
+ var_types simdType = op1->TypeGet();
+ assert(varTypeIsSIMD(simdType));
+
+ // op1 of TYP_SIMD12 should be considered as TYP_SIMD16,
+ // since it is in XMM register.
+ if (simdType == TYP_SIMD12)
+ {
+ simdType = TYP_SIMD16;
+ }
+
+ var_types baseType = simdNode->gtSIMDBaseType;
+ regNumber targetReg = simdNode->gtRegNum;
+ assert(targetReg != REG_NA);
+ var_types targetType = simdNode->TypeGet();
+ assert(targetType == genActualType(baseType));
+
+ // GetItem has 2 operands:
+ // - the source of SIMD type (op1)
+ // - the index of the value to be returned.
+ genConsumeOperands(simdNode);
+ regNumber srcReg = op1->gtRegNum;
+
+ // SSE2 doesn't have an instruction to implement this intrinsic if the index is not a constant.
+ // For the non-constant case, we will use the SIMD temp location to store the vector, and
+ // the load the desired element.
+ // The range check will already have been performed, so at this point we know we have an index
+ // within the bounds of the vector.
+ if (!op2->IsCnsIntOrI())
+ {
+ unsigned simdInitTempVarNum = compiler->lvaSIMDInitTempVarNum;
+ noway_assert(simdInitTempVarNum != BAD_VAR_NUM);
+ bool isEBPbased;
+ unsigned offs = compiler->lvaFrameAddress(simdInitTempVarNum, &isEBPbased);
+ regNumber indexReg = op2->gtRegNum;
+
+ // Store the vector to the temp location.
+ getEmitter()->emitIns_S_R(ins_Store(simdType, compiler->isSIMDTypeLocalAligned(simdInitTempVarNum)),
+ emitTypeSize(simdType), srcReg, simdInitTempVarNum, 0);
+
+ // Now, load the desired element.
+ getEmitter()->emitIns_R_ARX(ins_Move_Extend(baseType, false), // Load
+ emitTypeSize(baseType), // Of the vector baseType
+ targetReg, // To targetReg
+ (isEBPbased) ? REG_EBP : REG_ESP, // Stack-based
+ indexReg, // Indexed
+ genTypeSize(baseType), // by the size of the baseType
+ offs);
+ genProduceReg(simdNode);
+ return;
+ }
+
+ noway_assert(op2->isContained());
+ unsigned int index = (unsigned int)op2->gtIntCon.gtIconVal;
+ unsigned int byteShiftCnt = index * genTypeSize(baseType);
+
+ // In general we shouldn't have an index greater than or equal to the length of the vector.
+ // However, if we have an out-of-range access, under minOpts it will not be optimized
+ // away. The code will throw before we reach this point, but we still need to generate
+ // code. In that case, we will simply mask off the upper bits.
+ if (byteShiftCnt >= compiler->getSIMDVectorRegisterByteLength())
+ {
+ byteShiftCnt &= (compiler->getSIMDVectorRegisterByteLength() - 1);
+ index = byteShiftCnt / genTypeSize(baseType);
+ }
+
+ regNumber tmpReg = REG_NA;
+ if (simdNode->gtRsvdRegs != RBM_NONE)
+ {
+ assert(genCountBits(simdNode->gtRsvdRegs) == 1);
+ tmpReg = genRegNumFromMask(simdNode->gtRsvdRegs);
+ }
+ else
+ {
+ assert((byteShiftCnt == 0) || varTypeIsFloating(baseType) ||
+ (varTypeIsSmallInt(baseType) && (byteShiftCnt < 16)));
+ }
+
+ if (byteShiftCnt >= 16)
+ {
+ assert(compiler->getSIMDInstructionSet() == InstructionSet_AVX);
+ byteShiftCnt -= 16;
+ regNumber newSrcReg;
+ if (varTypeIsFloating(baseType))
+ {
+ newSrcReg = targetReg;
+ }
+ else
+ {
+ // Integer types
+ assert(tmpReg != REG_NA);
+ newSrcReg = tmpReg;
+ }
+ getEmitter()->emitIns_R_R_I(INS_vextractf128, EA_32BYTE, newSrcReg, srcReg, 0x01);
+
+ srcReg = newSrcReg;
+ }
+
+ // Generate the following sequence:
+ // 1) baseType is floating point
+ // movaps targetReg, srcReg
+ // psrldq targetReg, byteShiftCnt <-- not generated if accessing zero'th element
+ //
+ // 2) baseType is not floating point
+ // movaps tmpReg, srcReg <-- not generated if accessing zero'th element
+ // OR if tmpReg == srcReg
+ // psrldq tmpReg, byteShiftCnt <-- not generated if accessing zero'th element
+ // mov_xmm2i targetReg, tmpReg
+ if (varTypeIsFloating(baseType))
+ {
+ if (targetReg != srcReg)
+ {
+ inst_RV_RV(ins_Copy(simdType), targetReg, srcReg, simdType, emitActualTypeSize(simdType));
+ }
+
+ if (byteShiftCnt != 0)
+ {
+ instruction ins = getOpForSIMDIntrinsic(SIMDIntrinsicShiftRightInternal, baseType);
+ getEmitter()->emitIns_R_I(ins, emitActualTypeSize(simdType), targetReg, byteShiftCnt);
+ }
+ }
+ else
+ {
+ if (varTypeIsSmallInt(baseType))
+ {
+ // Note that pextrw extracts 16-bit value by index and zero extends it to 32-bits.
+ // In case of vector<short> we also need to sign extend the 16-bit value in targetReg
+ // Vector<byte> - index/2 will give the index of the 16-bit value to extract. Shift right
+ // by 8-bits if index is odd. In case of Vector<sbyte> also sign extend targetReg.
+
+ unsigned baseSize = genTypeSize(baseType);
+ if (baseSize == 1)
+ {
+ index /= 2;
+ }
+ // We actually want index % 8 for the AVX case (for SSE it will never be > 8).
+ // Note that this doesn't matter functionally, because the instruction uses just the
+ // low 3 bits of index, but it's better to use the right value.
+ if (index > 8)
+ {
+ assert(compiler->getSIMDInstructionSet() == InstructionSet_AVX);
+ index -= 8;
+ }
+
+ getEmitter()->emitIns_R_R_I(INS_pextrw, emitTypeSize(TYP_INT), targetReg, srcReg, index);
+
+ bool ZeroOrSignExtnReqd = true;
+ if (baseSize == 1)
+ {
+ if ((op2->gtIntCon.gtIconVal % 2) == 1)
+ {
+ // Right shift extracted word by 8-bits if index is odd if we are extracting a byte sized element.
+ inst_RV_SH(INS_SHIFT_RIGHT_LOGICAL, EA_4BYTE, targetReg, 8);
+
+ // Since Pextrw zero extends to 32-bits, we need sign extension in case of TYP_BYTE
+ ZeroOrSignExtnReqd = (baseType == TYP_BYTE);
+ }
+ // else - we just need to zero/sign extend the byte since pextrw extracted 16-bits
+ }
+ else
+ {
+ // Since Pextrw zero extends to 32-bits, we need sign extension in case of TYP_SHORT
+ assert(baseSize == 2);
+ ZeroOrSignExtnReqd = (baseType == TYP_SHORT);
+ }
+
+ if (ZeroOrSignExtnReqd)
+ {
+ // Zero/sign extend the byte/short to 32-bits
+ inst_RV_RV(ins_Move_Extend(baseType, false), targetReg, targetReg, baseType, emitTypeSize(baseType));
+ }
+ }
+ else
+ {
+ // We need a temp xmm register if the baseType is not floating point and
+ // accessing non-zero'th element.
+ instruction ins;
+
+ if (byteShiftCnt != 0)
+ {
+ assert(tmpReg != REG_NA);
+
+ if (tmpReg != srcReg)
+ {
+ inst_RV_RV(ins_Copy(simdType), tmpReg, srcReg, simdType, emitActualTypeSize(simdType));
+ }
+
+ ins = getOpForSIMDIntrinsic(SIMDIntrinsicShiftRightInternal, baseType);
+ getEmitter()->emitIns_R_I(ins, emitActualTypeSize(simdType), tmpReg, byteShiftCnt);
+ }
+ else
+ {
+ tmpReg = srcReg;
+ }
+
+ assert(tmpReg != REG_NA);
+ ins = ins_CopyFloatToInt(TYP_FLOAT, baseType);
+ // (Note that for mov_xmm2i, the int register is always in the reg2 position.
+ inst_RV_RV(ins, tmpReg, targetReg, baseType);
+ }
+ }
+
+ genProduceReg(simdNode);
+}
+
+//------------------------------------------------------------------------------------
+// genSIMDIntrinsicSetItem: Generate code for SIMD Intrinsic set element at index i.
+//
+// Arguments:
+// simdNode - The GT_SIMD node
+//
+// Return Value:
+// None.
+//
+// TODO-CQ: Use SIMDIntrinsicShuffleSSE2 for the SSE2 case.
+//
+void CodeGen::genSIMDIntrinsicSetItem(GenTreeSIMD* simdNode)
+{
+ // Determine index based on intrinsic ID
+ int index = -1;
+ switch (simdNode->gtSIMDIntrinsicID)
+ {
+ case SIMDIntrinsicSetX:
+ index = 0;
+ break;
+ case SIMDIntrinsicSetY:
+ index = 1;
+ break;
+ case SIMDIntrinsicSetZ:
+ index = 2;
+ break;
+ case SIMDIntrinsicSetW:
+ index = 3;
+ break;
+
+ default:
+ unreached();
+ }
+ assert(index != -1);
+
+ // op1 is the SIMD vector
+ // op2 is the value to be set
+ GenTree* op1 = simdNode->gtGetOp1();
+ GenTree* op2 = simdNode->gtGetOp2();
+
+ var_types baseType = simdNode->gtSIMDBaseType;
+ regNumber targetReg = simdNode->gtRegNum;
+ assert(targetReg != REG_NA);
+ var_types targetType = simdNode->TypeGet();
+ assert(varTypeIsSIMD(targetType));
+
+ // the following assert must hold.
+ // supported only on vector2f/3f/4f right now
+ noway_assert(baseType == TYP_FLOAT);
+ assert(op2->TypeGet() == baseType);
+ assert(simdNode->gtSIMDSize >= ((index + 1) * genTypeSize(baseType)));
+
+ genConsumeOperands(simdNode);
+ regNumber op1Reg = op1->gtRegNum;
+ regNumber op2Reg = op2->gtRegNum;
+
+ // TODO-CQ: For AVX we don't need to do a copy because it supports 3 operands plus immediate.
+ if (targetReg != op1Reg)
+ {
+ inst_RV_RV(ins_Copy(targetType), targetReg, op1Reg, targetType, emitActualTypeSize(targetType));
+ }
+
+ // Right now this intrinsic is supported only for float base type vectors.
+ // If in future need to support on other base type vectors, the below
+ // logic needs modification.
+ noway_assert(baseType == TYP_FLOAT);
+
+ if (compiler->getSIMDInstructionSet() == InstructionSet_SSE2)
+ {
+ // We need one additional int register as scratch
+ assert(simdNode->gtRsvdRegs != RBM_NONE);
+ assert(genCountBits(simdNode->gtRsvdRegs) == 1);
+ regNumber tmpReg = genRegNumFromMask(simdNode->gtRsvdRegs);
+ assert(genIsValidIntReg(tmpReg));
+
+ // Move the value from xmm reg to an int reg
+ instruction ins = ins_CopyFloatToInt(TYP_FLOAT, TYP_INT);
+ // (Note that for mov_xmm2i, the int register is always in the reg2 position.
+ inst_RV_RV(ins, op2Reg, tmpReg, baseType);
+
+ // First insert the lower 16-bits of tmpReg in targetReg at 2*index position
+ // since every float has two 16-bit words.
+ getEmitter()->emitIns_R_R_I(INS_pinsrw, emitTypeSize(TYP_INT), targetReg, tmpReg, 2 * index);
+
+ // Logical right shift tmpReg by 16-bits and insert in targetReg at 2*index + 1 position
+ inst_RV_SH(INS_SHIFT_RIGHT_LOGICAL, EA_4BYTE, tmpReg, 16);
+ getEmitter()->emitIns_R_R_I(INS_pinsrw, emitTypeSize(TYP_INT), targetReg, tmpReg, 2 * index + 1);
+ }
+ else
+ {
+ unsigned int insertpsImm = (INSERTPS_SOURCE_SELECT(0) | INSERTPS_TARGET_SELECT(index));
+ inst_RV_RV_IV(INS_insertps, EA_16BYTE, targetReg, op2Reg, insertpsImm);
+ }
+
+ genProduceReg(simdNode);
+}
+
+//------------------------------------------------------------------------
+// genSIMDIntrinsicShuffleSSE2: Generate code for SIMD Intrinsic shuffle.
+//
+// Arguments:
+// simdNode - The GT_SIMD node
+//
+// Return Value:
+// None.
+//
+void CodeGen::genSIMDIntrinsicShuffleSSE2(GenTreeSIMD* simdNode)
+{
+ assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicShuffleSSE2);
+ noway_assert(compiler->getSIMDInstructionSet() == InstructionSet_SSE2);
+
+ GenTree* op1 = simdNode->gtGetOp1();
+ GenTree* op2 = simdNode->gtGetOp2();
+ assert(op2->isContained());
+ assert(op2->IsCnsIntOrI());
+ int shuffleControl = (int)op2->AsIntConCommon()->IconValue();
+ var_types baseType = simdNode->gtSIMDBaseType;
+ var_types targetType = simdNode->TypeGet();
+ regNumber targetReg = simdNode->gtRegNum;
+ assert(targetReg != REG_NA);
+
+ regNumber op1Reg = genConsumeReg(op1);
+ if (targetReg != op1Reg)
+ {
+ inst_RV_RV(ins_Copy(targetType), targetReg, op1Reg, targetType, emitActualTypeSize(targetType));
+ }
+
+ instruction ins = getOpForSIMDIntrinsic(simdNode->gtSIMDIntrinsicID, baseType);
+ getEmitter()->emitIns_R_R_I(ins, emitTypeSize(baseType), targetReg, targetReg, shuffleControl);
+ genProduceReg(simdNode);
+}
+
+//-----------------------------------------------------------------------------
+// genStoreIndTypeSIMD12: store indirect a TYP_SIMD12 (i.e. Vector3) to memory.
+// Since Vector3 is not a hardware supported write size, it is performed
+// as two writes: 8 byte followed by 4-byte.
+//
+// Arguments:
+// treeNode - tree node that is attempting to store indirect
+//
+//
+// Return Value:
+// None.
+//
+void CodeGen::genStoreIndTypeSIMD12(GenTree* treeNode)
+{
+ assert(treeNode->OperGet() == GT_STOREIND);
+
+ GenTree* addr = treeNode->gtOp.gtOp1;
+ GenTree* data = treeNode->gtOp.gtOp2;
+
+ // addr and data should not be contained.
+ assert(!data->isContained());
+ assert(!addr->isContained());
+
+#ifdef DEBUG
+ // Should not require a write barrier
+ GCInfo::WriteBarrierForm writeBarrierForm = gcInfo.gcIsWriteBarrierCandidate(treeNode, data);
+ assert(writeBarrierForm == GCInfo::WBF_NoBarrier);
+#endif
+
+ // Need an addtional Xmm register to extract upper 4 bytes from data.
+ assert(treeNode->gtRsvdRegs != RBM_NONE);
+ assert(genCountBits(treeNode->gtRsvdRegs) == 1);
+ regNumber tmpReg = genRegNumFromMask(treeNode->gtRsvdRegs);
+
+ genConsumeOperands(treeNode->AsOp());
+
+ // 8-byte write
+ getEmitter()->emitIns_AR_R(ins_Store(TYP_DOUBLE), EA_8BYTE, data->gtRegNum, addr->gtRegNum, 0);
+
+ // Extract upper 4-bytes from data
+ getEmitter()->emitIns_R_R_I(INS_pshufd, emitActualTypeSize(TYP_SIMD16), tmpReg, data->gtRegNum, 0x02);
+
+ // 4-byte write
+ getEmitter()->emitIns_AR_R(ins_Store(TYP_FLOAT), EA_4BYTE, tmpReg, addr->gtRegNum, 8);
+}
+
+//-----------------------------------------------------------------------------
+// genLoadIndTypeSIMD12: load indirect a TYP_SIMD12 (i.e. Vector3) value.
+// Since Vector3 is not a hardware supported write size, it is performed
+// as two loads: 8 byte followed by 4-byte.
+//
+// Arguments:
+// treeNode - tree node of GT_IND
+//
+//
+// Return Value:
+// None.
+//
+void CodeGen::genLoadIndTypeSIMD12(GenTree* treeNode)
+{
+ assert(treeNode->OperGet() == GT_IND);
+
+ regNumber targetReg = treeNode->gtRegNum;
+ GenTreePtr op1 = treeNode->gtOp.gtOp1;
+ assert(!op1->isContained());
+ regNumber operandReg = genConsumeReg(op1);
+
+ // Need an addtional Xmm register to read upper 4 bytes, which is different from targetReg
+ assert(treeNode->gtRsvdRegs != RBM_NONE);
+ assert(genCountBits(treeNode->gtRsvdRegs) == 2);
+
+ regNumber tmpReg = REG_NA;
+ regMaskTP tmpRegsMask = treeNode->gtRsvdRegs;
+ regMaskTP tmpReg1Mask = genFindLowestBit(tmpRegsMask);
+ tmpRegsMask &= ~tmpReg1Mask;
+ regNumber tmpReg1 = genRegNumFromMask(tmpReg1Mask);
+ regNumber tmpReg2 = genRegNumFromMask(tmpRegsMask);
+
+ // Choose any register different from targetReg as tmpReg
+ if (tmpReg1 != targetReg)
+ {
+ tmpReg = tmpReg1;
+ }
+ else
+ {
+ assert(targetReg != tmpReg2);
+ tmpReg = tmpReg2;
+ }
+ assert(tmpReg != REG_NA);
+ assert(tmpReg != targetReg);
+
+ // Load upper 4 bytes in tmpReg
+ getEmitter()->emitIns_R_AR(ins_Load(TYP_FLOAT), EA_4BYTE, tmpReg, operandReg, 8);
+
+ // Load lower 8 bytes in targetReg
+ getEmitter()->emitIns_R_AR(ins_Load(TYP_DOUBLE), EA_8BYTE, targetReg, operandReg, 0);
+
+ // combine upper 4 bytes and lower 8 bytes in targetReg
+ getEmitter()->emitIns_R_R_I(INS_shufps, emitActualTypeSize(TYP_SIMD16), targetReg, tmpReg, 0x44);
+
+ genProduceReg(treeNode);
+}
+
+//-----------------------------------------------------------------------------
+// genStoreLclFldTypeSIMD12: store a TYP_SIMD12 (i.e. Vector3) type field.
+// Since Vector3 is not a hardware supported write size, it is performed
+// as two stores: 8 byte followed by 4-byte.
+//
+// Arguments:
+// treeNode - tree node that is attempting to store TYP_SIMD12 field
+//
+// Return Value:
+// None.
+//
+void CodeGen::genStoreLclFldTypeSIMD12(GenTree* treeNode)
+{
+ assert(treeNode->OperGet() == GT_STORE_LCL_FLD);
+
+ unsigned offs = treeNode->gtLclFld.gtLclOffs;
+ unsigned varNum = treeNode->gtLclVarCommon.gtLclNum;
+ assert(varNum < compiler->lvaCount);
+
+ GenTreePtr op1 = treeNode->gtOp.gtOp1;
+ assert(!op1->isContained());
+ regNumber operandReg = genConsumeReg(op1);
+
+ // Need an addtional Xmm register to extract upper 4 bytes from data.
+ assert(treeNode->gtRsvdRegs != RBM_NONE);
+ assert(genCountBits(treeNode->gtRsvdRegs) == 1);
+ regNumber tmpReg = genRegNumFromMask(treeNode->gtRsvdRegs);
+
+ // store lower 8 bytes
+ getEmitter()->emitIns_S_R(ins_Store(TYP_DOUBLE), EA_8BYTE, operandReg, varNum, offs);
+
+ // Extract upper 4-bytes from operandReg
+ getEmitter()->emitIns_R_R_I(INS_pshufd, emitActualTypeSize(TYP_SIMD16), tmpReg, operandReg, 0x02);
+
+ // Store upper 4 bytes
+ getEmitter()->emitIns_S_R(ins_Store(TYP_FLOAT), EA_4BYTE, tmpReg, varNum, offs + 8);
+}
+
+//-----------------------------------------------------------------------------
+// genLoadLclFldTypeSIMD12: load a TYP_SIMD12 (i.e. Vector3) type field.
+// Since Vector3 is not a hardware supported write size, it is performed
+// as two reads: 8 byte followed by 4-byte.
+//
+// Arguments:
+// treeNode - tree node that is attempting to load TYP_SIMD12 field
+//
+// Return Value:
+// None.
+//
+void CodeGen::genLoadLclFldTypeSIMD12(GenTree* treeNode)
+{
+ assert(treeNode->OperGet() == GT_LCL_FLD);
+
+ regNumber targetReg = treeNode->gtRegNum;
+ unsigned offs = treeNode->gtLclFld.gtLclOffs;
+ unsigned varNum = treeNode->gtLclVarCommon.gtLclNum;
+ assert(varNum < compiler->lvaCount);
+
+ // Need an addtional Xmm register to read upper 4 bytes
+ assert(treeNode->gtRsvdRegs != RBM_NONE);
+ assert(genCountBits(treeNode->gtRsvdRegs) == 2);
+
+ regNumber tmpReg = REG_NA;
+ regMaskTP tmpRegsMask = treeNode->gtRsvdRegs;
+ regMaskTP tmpReg1Mask = genFindLowestBit(tmpRegsMask);
+ tmpRegsMask &= ~tmpReg1Mask;
+ regNumber tmpReg1 = genRegNumFromMask(tmpReg1Mask);
+ regNumber tmpReg2 = genRegNumFromMask(tmpRegsMask);
+
+ // Choose any register different from targetReg as tmpReg
+ if (tmpReg1 != targetReg)
+ {
+ tmpReg = tmpReg1;
+ }
+ else
+ {
+ assert(targetReg != tmpReg2);
+ tmpReg = tmpReg2;
+ }
+ assert(tmpReg != REG_NA);
+ assert(tmpReg != targetReg);
+
+ // Read upper 4 bytes to tmpReg
+ getEmitter()->emitIns_R_S(ins_Move_Extend(TYP_FLOAT, false), EA_4BYTE, tmpReg, varNum, offs + 8);
+
+ // Read lower 8 bytes to targetReg
+ getEmitter()->emitIns_R_S(ins_Move_Extend(TYP_DOUBLE, false), EA_8BYTE, targetReg, varNum, offs);
+
+ // combine upper 4 bytes and lower 8 bytes in targetReg
+ getEmitter()->emitIns_R_R_I(INS_shufps, emitActualTypeSize(TYP_SIMD16), targetReg, tmpReg, 0x44);
+
+ genProduceReg(treeNode);
+}
+
+//-----------------------------------------------------------------------------
+// genSIMDIntrinsicUpperSave: save the upper half of a TYP_SIMD32 vector to
+// the given register, if any, or to memory.
+//
+// Arguments:
+// simdNode - The GT_SIMD node
+//
+// Return Value:
+// None.
+//
+// Notes:
+// The upper half of all AVX registers is volatile, even the callee-save registers.
+// When a 32-byte SIMD value is live across a call, the register allocator will use this intrinsic
+// to cause the upper half to be saved. It will first attempt to find another, unused, callee-save
+// register. If such a register cannot be found, it will save it to an available caller-save register.
+// In that case, this node will be marked GTF_SPILL, which will cause genProduceReg to save the 16 byte
+// value to the stack. (Note that if there are no caller-save registers available, the entire 32 byte
+// value will be spilled to the stack.)
+//
+void CodeGen::genSIMDIntrinsicUpperSave(GenTreeSIMD* simdNode)
+{
+ assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicUpperSave);
+
+ GenTree* op1 = simdNode->gtGetOp1();
+ assert(op1->IsLocal() && op1->TypeGet() == TYP_SIMD32);
+ regNumber targetReg = simdNode->gtRegNum;
+ regNumber op1Reg = genConsumeReg(op1);
+ assert(op1Reg != REG_NA);
+ assert(targetReg != REG_NA);
+ getEmitter()->emitIns_R_R_I(INS_vextractf128, EA_32BYTE, targetReg, op1Reg, 0x01);
+
+ genProduceReg(simdNode);
+}
+
+//-----------------------------------------------------------------------------
+// genSIMDIntrinsicUpperRestore: Restore the upper half of a TYP_SIMD32 vector to
+// the given register, if any, or to memory.
+//
+// Arguments:
+// simdNode - The GT_SIMD node
+//
+// Return Value:
+// None.
+//
+// Notes:
+// For consistency with genSIMDIntrinsicUpperSave, and to ensure that lclVar nodes always
+// have their home register, this node has its targetReg on the lclVar child, and its source
+// on the simdNode.
+// Regarding spill, please see the note above on genSIMDIntrinsicUpperSave. If we have spilled
+// an upper-half to a caller save register, this node will be marked GTF_SPILLED. However, unlike
+// most spill scenarios, the saved tree will be different from the restored tree, but the spill
+// restore logic, which is triggered by the call to genConsumeReg, requires us to provide the
+// spilled tree (saveNode) in order to perform the reload. We can easily find that tree,
+// as it is in the spill descriptor for the register from which it was saved.
+//
+void CodeGen::genSIMDIntrinsicUpperRestore(GenTreeSIMD* simdNode)
+{
+ assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicUpperRestore);
+
+ GenTree* op1 = simdNode->gtGetOp1();
+ assert(op1->IsLocal() && op1->TypeGet() == TYP_SIMD32);
+ regNumber srcReg = simdNode->gtRegNum;
+ regNumber lclVarReg = genConsumeReg(op1);
+ unsigned varNum = op1->AsLclVarCommon()->gtLclNum;
+ assert(lclVarReg != REG_NA);
+ assert(srcReg != REG_NA);
+ if (simdNode->gtFlags & GTF_SPILLED)
+ {
+ GenTree* saveNode = regSet.rsSpillDesc[srcReg]->spillTree;
+ noway_assert(saveNode != nullptr && (saveNode->gtRegNum == srcReg));
+ genConsumeReg(saveNode);
+ }
+ getEmitter()->emitIns_R_R_I(INS_vinsertf128, EA_32BYTE, lclVarReg, srcReg, 0x01);
+}
+
+//------------------------------------------------------------------------
+// genSIMDIntrinsic: Generate code for a SIMD Intrinsic. This is the main
+// routine which in turn calls apropriate genSIMDIntrinsicXXX() routine.
+//
+// Arguments:
+// simdNode - The GT_SIMD node
+//
+// Return Value:
+// None.
+//
+// Notes:
+// Currently, we only recognize SIMDVector<float> and SIMDVector<int>, and
+// a limited set of methods.
+//
+void CodeGen::genSIMDIntrinsic(GenTreeSIMD* simdNode)
+{
+ // NYI for unsupported base types
+ if (simdNode->gtSIMDBaseType != TYP_INT && simdNode->gtSIMDBaseType != TYP_LONG &&
+ simdNode->gtSIMDBaseType != TYP_FLOAT && simdNode->gtSIMDBaseType != TYP_DOUBLE &&
+ simdNode->gtSIMDBaseType != TYP_CHAR && simdNode->gtSIMDBaseType != TYP_UBYTE &&
+ simdNode->gtSIMDBaseType != TYP_SHORT && simdNode->gtSIMDBaseType != TYP_BYTE &&
+ simdNode->gtSIMDBaseType != TYP_UINT && simdNode->gtSIMDBaseType != TYP_ULONG)
+ {
+ noway_assert(!"SIMD intrinsic with unsupported base type.");
+ }
+
+ switch (simdNode->gtSIMDIntrinsicID)
+ {
+ case SIMDIntrinsicInit:
+ genSIMDIntrinsicInit(simdNode);
+ break;
+
+ case SIMDIntrinsicInitN:
+ genSIMDIntrinsicInitN(simdNode);
+ break;
+
+ case SIMDIntrinsicSqrt:
+ case SIMDIntrinsicCast:
+ genSIMDIntrinsicUnOp(simdNode);
+ break;
+
+ case SIMDIntrinsicAdd:
+ case SIMDIntrinsicSub:
+ case SIMDIntrinsicMul:
+ case SIMDIntrinsicDiv:
+ case SIMDIntrinsicBitwiseAnd:
+ case SIMDIntrinsicBitwiseAndNot:
+ case SIMDIntrinsicBitwiseOr:
+ case SIMDIntrinsicBitwiseXor:
+ case SIMDIntrinsicMin:
+ case SIMDIntrinsicMax:
+ genSIMDIntrinsicBinOp(simdNode);
+ break;
+
+ case SIMDIntrinsicOpEquality:
+ case SIMDIntrinsicOpInEquality:
+ case SIMDIntrinsicEqual:
+ case SIMDIntrinsicLessThan:
+ case SIMDIntrinsicGreaterThan:
+ case SIMDIntrinsicLessThanOrEqual:
+ case SIMDIntrinsicGreaterThanOrEqual:
+ genSIMDIntrinsicRelOp(simdNode);
+ break;
+
+ case SIMDIntrinsicDotProduct:
+ genSIMDIntrinsicDotProduct(simdNode);
+ break;
+
+ case SIMDIntrinsicGetItem:
+ genSIMDIntrinsicGetItem(simdNode);
+ break;
+
+ case SIMDIntrinsicShuffleSSE2:
+ genSIMDIntrinsicShuffleSSE2(simdNode);
+ break;
+
+ case SIMDIntrinsicSetX:
+ case SIMDIntrinsicSetY:
+ case SIMDIntrinsicSetZ:
+ case SIMDIntrinsicSetW:
+ genSIMDIntrinsicSetItem(simdNode);
+ break;
+
+ case SIMDIntrinsicUpperSave:
+ genSIMDIntrinsicUpperSave(simdNode);
+ break;
+ case SIMDIntrinsicUpperRestore:
+ genSIMDIntrinsicUpperRestore(simdNode);
+ break;
+
+ default:
+ noway_assert(!"Unimplemented SIMD intrinsic.");
+ unreached();
+ }
+}
+
+#endif // FEATURE_SIMD
+#endif //_TARGET_AMD64_
+#endif // !LEGACY_BACKEND
diff --git a/src/jit/simdintrinsiclist.h b/src/jit/simdintrinsiclist.h
new file mode 100644
index 0000000000..a44fb9d0a1
--- /dev/null
+++ b/src/jit/simdintrinsiclist.h
@@ -0,0 +1,145 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*****************************************************************************/
+#ifndef SIMD_INTRINSIC
+#error Define SIMD_INTRINSIC before including this file
+#endif
+/*****************************************************************************/
+
+// clang-format off
+#ifdef FEATURE_SIMD
+
+ /*
+ Notes:
+ a) TYP_UNKNOWN means 'baseType' of SIMD vector which is not known apriori
+ b) Each method maps to a unique intrinsic Id
+ c) To facilitate argument types to be used as an array initializer, args are listed within "{}" braces.
+ d) Since comma is used as actual param seperator in a macro, TYP_UNDEF entries are added to keep param count constant.
+ e) TODO-Cleanup: when we plumb TYP_SIMD through front-end, replace TYP_STRUCT with TYP_SIMD.
+ */
+
+#ifdef _TARGET_AMD64_
+
+// Max number of parameters that we model in the table for SIMD intrinsic methods.
+#define SIMD_INTRINSIC_MAX_MODELED_PARAM_COUNT 3
+
+// Actual maximum number of parameters for any SIMD intrinsic method.
+// Constructors that take either N values, or a smaller Vector plus additional element values,
+// actually have more arguments than the "modeled" count.
+#define SIMD_INTRINSIC_MAX_PARAM_COUNT 5
+
+// Max number of base types supported by an intrinsic
+#define SIMD_INTRINSIC_MAX_BASETYPE_COUNT 10
+
+/***************************************************************************************************************************************************************************************************************************
+ Method Name, Is Instance Intrinsic Id, Display Name, return type, Arg count, Individual argument types SSE2 supported
+ Method (including implicit "this") base types
+ ***************************************************************************************************************************************************************************************************************************/
+SIMD_INTRINSIC(nullptr, false, None, "None", TYP_UNDEF, 0, {TYP_UNDEF, TYP_UNDEF, TYP_UNDEF}, {TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF})
+
+SIMD_INTRINSIC("get_Count", false, GetCount, "count", TYP_INT, 0, {TYP_VOID, TYP_UNDEF, TYP_UNDEF}, {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_CHAR, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG})
+SIMD_INTRINSIC("get_One", false, GetOne, "one", TYP_STRUCT, 0, {TYP_VOID, TYP_UNDEF, TYP_UNDEF}, {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_CHAR, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG})
+SIMD_INTRINSIC("get_Zero", false, GetZero, "zero", TYP_STRUCT, 0, {TYP_VOID, TYP_UNDEF, TYP_UNDEF}, {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_CHAR, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG})
+SIMD_INTRINSIC("get_AllOnes", false, GetAllOnes, "allOnes", TYP_STRUCT, 0, {TYP_VOID, TYP_UNDEF, TYP_UNDEF}, {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_CHAR, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG})
+
+// .ctor call or newobj - there are four forms.
+// This form takes the object plus a value of the base (element) type:
+SIMD_INTRINSIC(".ctor", true, Init, "init", TYP_VOID, 2, {TYP_BYREF, TYP_UNKNOWN, TYP_UNDEF}, {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_CHAR, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG})
+// This form takes the object plus an array of the base (element) type:
+SIMD_INTRINSIC(".ctor", true, InitArray, "initArray", TYP_VOID, 2, {TYP_BYREF, TYP_REF, TYP_UNDEF}, {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_CHAR, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG})
+// This form takes the object, an array of the base (element) type, and an index into the array:
+SIMD_INTRINSIC(".ctor", true, InitArrayX, "initArray", TYP_VOID, 3, {TYP_BYREF, TYP_REF, TYP_INT }, {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_CHAR, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG})
+// This form takes the object, and N values of the base (element) type. The actual number of arguments depends upon the Vector size, which must be a fixed type such as Vector2f/3f/4f
+// Right now this intrinsic is supported only on fixed float vectors and hence the supported base types lists only TYP_FLOAT.
+// This is currently the intrinsic that has the largest maximum number of operands - if we add new fixed vector types
+// with more than 4 elements, the above SIMD_INTRINSIC_MAX_PARAM_COUNT will have to change.
+SIMD_INTRINSIC(".ctor", true, InitN, "initN", TYP_VOID, 2, {TYP_BYREF, TYP_UNKNOWN, TYP_UNKNOWN}, {TYP_FLOAT, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF})
+// This form takes the object, a smaller fixed vector, and one or two additional arguments of the base type, e.g. Vector3 V = new Vector3(V2, x); where V2 is a Vector2, and x is a float.
+SIMD_INTRINSIC(".ctor", true, InitFixed, "initFixed", TYP_VOID, 3, {TYP_BYREF, TYP_STRUCT, TYP_UNKNOWN}, {TYP_FLOAT, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF})
+
+// Copy vector to an array
+SIMD_INTRINSIC("CopyTo", true, CopyToArray, "CopyToArray", TYP_VOID, 2, {TYP_BYREF, TYP_REF, TYP_UNDEF}, {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_CHAR, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG})
+SIMD_INTRINSIC("CopyTo", true, CopyToArrayX, "CopyToArray", TYP_VOID, 3, {TYP_BYREF, TYP_REF, TYP_INT }, {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_CHAR, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG})
+
+// Get operations
+SIMD_INTRINSIC("get_Item", true, GetItem, "get[i]", TYP_UNKNOWN, 2, {TYP_BYREF, TYP_INT, TYP_UNDEF}, {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_CHAR, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG})
+SIMD_INTRINSIC("get_X", true, GetX, "getX", TYP_UNKNOWN, 1, {TYP_BYREF, TYP_UNDEF, TYP_UNDEF}, {TYP_FLOAT, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF})
+SIMD_INTRINSIC("get_Y", true, GetY, "getY", TYP_UNKNOWN, 1, {TYP_BYREF, TYP_UNDEF, TYP_UNDEF}, {TYP_FLOAT, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF})
+SIMD_INTRINSIC("get_Z", true, GetZ, "getZ", TYP_UNKNOWN, 1, {TYP_BYREF, TYP_UNDEF, TYP_UNDEF}, {TYP_FLOAT, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF})
+SIMD_INTRINSIC("get_W", true, GetW, "getW", TYP_UNKNOWN, 1, {TYP_BYREF, TYP_UNDEF, TYP_UNDEF}, {TYP_FLOAT, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF})
+
+// Set operations
+SIMD_INTRINSIC("set_X", true, SetX, "setX", TYP_VOID, 2, {TYP_BYREF, TYP_UNKNOWN, TYP_UNDEF}, {TYP_FLOAT, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF})
+SIMD_INTRINSIC("set_Y", true, SetY, "setY", TYP_VOID, 2, {TYP_BYREF, TYP_UNKNOWN, TYP_UNDEF}, {TYP_FLOAT, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF})
+SIMD_INTRINSIC("set_Z", true, SetZ, "setZ", TYP_VOID, 2, {TYP_BYREF, TYP_UNKNOWN, TYP_UNDEF}, {TYP_FLOAT, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF})
+SIMD_INTRINSIC("set_W", true, SetW, "setW", TYP_VOID, 2, {TYP_BYREF, TYP_UNKNOWN, TYP_UNDEF}, {TYP_FLOAT, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF})
+
+// Object.Equals()
+SIMD_INTRINSIC("Equals", true, InstEquals, "equals", TYP_BOOL, 2, {TYP_BYREF, TYP_STRUCT, TYP_UNDEF}, {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_CHAR, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG})
+
+// Operator == and !=
+SIMD_INTRINSIC("op_Equality", false, OpEquality, "==", TYP_BOOL, 2, {TYP_STRUCT, TYP_STRUCT, TYP_UNDEF}, {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_CHAR, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG})
+SIMD_INTRINSIC("op_Inequality", false, OpInEquality, "!=", TYP_BOOL, 2, {TYP_STRUCT, TYP_STRUCT, TYP_UNDEF}, {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_CHAR, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG})
+
+// Arithmetic Operations
+SIMD_INTRINSIC("op_Addition", false, Add, "+", TYP_STRUCT, 2, {TYP_STRUCT, TYP_STRUCT, TYP_UNDEF}, {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_CHAR, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG})
+SIMD_INTRINSIC("op_Subtraction", false, Sub, "-", TYP_STRUCT, 2, {TYP_STRUCT, TYP_STRUCT, TYP_UNDEF}, {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_CHAR, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG})
+SIMD_INTRINSIC("op_Multiply", false, Mul, "*", TYP_STRUCT, 2, {TYP_STRUCT, TYP_STRUCT, TYP_UNDEF}, {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_SHORT,TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF})
+SIMD_INTRINSIC("op_Division", false, Div, "/", TYP_STRUCT, 2, {TYP_STRUCT, TYP_STRUCT, TYP_UNDEF}, {TYP_FLOAT, TYP_DOUBLE, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF})
+
+// Abs and SquareRoot are recognized as intrinsics only in case of float or double vectors
+SIMD_INTRINSIC("Abs", false, Abs, "abs", TYP_STRUCT, 1, {TYP_STRUCT, TYP_UNDEF, TYP_UNDEF}, {TYP_FLOAT, TYP_DOUBLE, TYP_CHAR, TYP_UBYTE, TYP_UINT, TYP_ULONG, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF})
+SIMD_INTRINSIC("SquareRoot", false, Sqrt, "sqrt", TYP_STRUCT, 1, {TYP_STRUCT, TYP_UNDEF, TYP_UNDEF}, {TYP_FLOAT, TYP_DOUBLE, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF})
+
+// Min and max methods are recognized as intrinsics only in case of float or double vectors
+SIMD_INTRINSIC("Min", false, Min, "min", TYP_STRUCT, 2, {TYP_STRUCT, TYP_STRUCT, TYP_UNDEF}, {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_CHAR, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG})
+SIMD_INTRINSIC("Max", false, Max, "max", TYP_STRUCT, 2, {TYP_STRUCT, TYP_STRUCT, TYP_UNDEF}, {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_CHAR, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG})
+
+// Vector Relational operators
+SIMD_INTRINSIC("Equals", false, Equal, "eq", TYP_STRUCT, 2, {TYP_STRUCT, TYP_STRUCT, TYP_UNDEF}, {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_CHAR, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG})
+SIMD_INTRINSIC("LessThan", false, LessThan, "lt", TYP_STRUCT, 2, {TYP_STRUCT, TYP_STRUCT, TYP_UNDEF}, {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_CHAR, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG})
+SIMD_INTRINSIC("LessThanOrEqual", false, LessThanOrEqual, "le", TYP_STRUCT, 2, {TYP_STRUCT, TYP_STRUCT, TYP_UNDEF}, {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_CHAR, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG})
+SIMD_INTRINSIC("GreaterThan", false, GreaterThan, "gt", TYP_STRUCT, 2, {TYP_STRUCT, TYP_STRUCT, TYP_UNDEF}, {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_CHAR, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG})
+SIMD_INTRINSIC("GreaterThanOrEqual", false, GreaterThanOrEqual, "ge", TYP_STRUCT, 2, {TYP_STRUCT, TYP_STRUCT, TYP_UNDEF}, {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_CHAR, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG})
+
+// Bitwise operations
+SIMD_INTRINSIC("op_BitwiseAnd", false, BitwiseAnd, "&", TYP_STRUCT, 2, {TYP_STRUCT, TYP_STRUCT, TYP_UNDEF}, {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_CHAR, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG})
+SIMD_INTRINSIC("BitwiseAndNot", false, BitwiseAndNot, "&~", TYP_STRUCT, 2, {TYP_STRUCT, TYP_STRUCT, TYP_UNDEF}, {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_CHAR, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG})
+SIMD_INTRINSIC("op_BitwiseOr", false, BitwiseOr, "|", TYP_STRUCT, 2, {TYP_STRUCT, TYP_STRUCT, TYP_UNDEF}, {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_CHAR, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG})
+SIMD_INTRINSIC("op_ExclusiveOr", false, BitwiseXor, "^", TYP_STRUCT, 2, {TYP_STRUCT, TYP_STRUCT, TYP_UNDEF}, {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_CHAR, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG})
+
+// Dot Product
+SIMD_INTRINSIC("Dot", false, DotProduct, "Dot", TYP_UNKNOWN, 2, {TYP_STRUCT, TYP_STRUCT, TYP_UNDEF}, {TYP_FLOAT, TYP_DOUBLE, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF})
+
+// Select
+SIMD_INTRINSIC("ConditionalSelect", false, Select, "Select", TYP_STRUCT, 3, {TYP_STRUCT, TYP_STRUCT, TYP_STRUCT}, {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_CHAR, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG})
+
+// Cast
+SIMD_INTRINSIC("op_Explicit", false, Cast, "Cast", TYP_STRUCT, 1, {TYP_STRUCT, TYP_UNDEF, TYP_UNDEF}, {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_CHAR, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG})
+
+// Miscellaneous
+SIMD_INTRINSIC("get_IsHardwareAccelerated", false, HWAccel, "HWAccel", TYP_BOOL, 0, {TYP_UNDEF, TYP_UNDEF, TYP_UNDEF}, {TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF})
+
+// Shuffle and Shift operations - these are internal intrinsics as there is no corresponding managed method.
+// To prevent this being accidentally recognized as an intrinsic, all of the arg types and supported base types is made TYP_UNDEF
+SIMD_INTRINSIC("ShuffleSSE2", false, ShuffleSSE2, "ShuffleSSE2", TYP_STRUCT, 2, {TYP_UNDEF, TYP_UNDEF, TYP_UNDEF}, {TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF})
+
+// Internal, logical shift operations that shift the entire vector register instead of individual elements of the vector.
+SIMD_INTRINSIC("ShiftLeftInternal", false, ShiftLeftInternal, "<< Internal", TYP_STRUCT, 2, {TYP_UNDEF, TYP_UNDEF, TYP_UNDEF}, {TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF})
+SIMD_INTRINSIC("ShiftRightInternal", false, ShiftRightInternal, ">> Internal", TYP_STRUCT, 2, {TYP_UNDEF, TYP_UNDEF, TYP_UNDEF}, {TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF})
+
+// Internal intrinsics for saving & restoring the upper half of a vector register
+SIMD_INTRINSIC("UpperSave", false, UpperSave, "UpperSave Internal", TYP_STRUCT, 2, {TYP_UNDEF, TYP_UNDEF, TYP_UNDEF}, {TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF})
+SIMD_INTRINSIC("UpperRestore", false, UpperRestore, "UpperRestore Internal", TYP_STRUCT, 2, {TYP_UNDEF, TYP_UNDEF, TYP_UNDEF}, {TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF})
+
+SIMD_INTRINSIC(nullptr, false, Invalid, "Invalid", TYP_UNDEF, 0, {TYP_UNDEF, TYP_UNDEF, TYP_UNDEF}, {TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF})
+#undef SIMD_INTRINSIC
+
+#else //_TARGET_AMD64_
+#error SIMD intrinsics not defined for target arch
+#endif //!_TARGET_AMD64_
+
+#endif //FEATURE_SIMD
+// clang-format on
diff --git a/src/jit/sm.cpp b/src/jit/sm.cpp
new file mode 100644
index 0000000000..859b238ec8
--- /dev/null
+++ b/src/jit/sm.cpp
@@ -0,0 +1,190 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX State machine used in the JIT XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+#include "smcommon.cpp"
+
+//
+// The array to map from EE opcodes (i.e. CEE_ ) to state machine opcodes (i.e. SM_ )
+//
+const SM_OPCODE smOpcodeMap[] = {
+#define OPCODEMAP(eename, eestring, smname) smname,
+#include "smopcodemap.def"
+#undef OPCODEMAP
+};
+
+// ????????? How to make this method inlinable, since it refers to smOpcodeMap????
+/* static */ SM_OPCODE CodeSeqSM::MapToSMOpcode(OPCODE opcode)
+{
+ assert(opcode < CEE_COUNT);
+
+ SM_OPCODE smOpcode = smOpcodeMap[opcode];
+ assert(smOpcode < SM_COUNT);
+ return smOpcode;
+}
+
+void CodeSeqSM::Start(Compiler* comp)
+{
+ pComp = comp;
+ States = gp_SMStates;
+ JumpTableCells = gp_SMJumpTableCells;
+ StateWeights = gp_StateWeights;
+ NativeSize = 0;
+
+ Reset();
+}
+
+void CodeSeqSM::Reset()
+{
+ curState = SM_STATE_ID_START;
+
+#ifdef DEBUG
+ // Reset the state occurence counts
+ memset(StateMatchedCounts, 0, sizeof(StateMatchedCounts));
+#endif
+}
+
+void CodeSeqSM::End()
+{
+ if (States[curState].term)
+ {
+ TermStateMatch(curState DEBUGARG(pComp->verbose));
+ }
+}
+
+void CodeSeqSM::Run(SM_OPCODE opcode DEBUGARG(int level))
+{
+ SM_STATE_ID nextState;
+ SM_STATE_ID rollbackState;
+
+ SM_OPCODE opcodesToRevisit[MAX_CODE_SEQUENCE_LENGTH];
+
+ assert(level <= MAX_CODE_SEQUENCE_LENGTH);
+
+_Next:
+ nextState = GetDestState(curState, opcode);
+
+ if (nextState != 0)
+ {
+ // This is easy, Just go to the next state.
+ curState = nextState;
+ return;
+ }
+
+ assert(curState != SM_STATE_ID_START);
+
+ if (States[curState].term)
+ {
+ TermStateMatch(curState DEBUGARG(pComp->verbose));
+ curState = SM_STATE_ID_START;
+ goto _Next;
+ }
+
+ // This is hard. We need to rollback to the longest matched term state and restart from there.
+
+ rollbackState = States[curState].longestTermState;
+ TermStateMatch(rollbackState DEBUGARG(pComp->verbose));
+
+ assert(States[curState].length > States[rollbackState].length);
+
+ unsigned numOfOpcodesToRevisit = States[curState].length - States[rollbackState].length + 1;
+ assert(numOfOpcodesToRevisit > 1 &&
+ numOfOpcodesToRevisit <= MAX_CODE_SEQUENCE_LENGTH); // So it can fit in the local array opcodesToRevisit[]
+
+ SM_OPCODE* p = opcodesToRevisit + (numOfOpcodesToRevisit - 1);
+
+ *p = opcode;
+
+ // Fill in the local array:
+ for (unsigned i = 0; i < numOfOpcodesToRevisit - 1; ++i)
+ {
+ *(--p) = States[curState].opc;
+ curState = States[curState].prevState;
+ }
+
+ assert(curState == rollbackState);
+
+ // Now revisit these opcodes, starting from SM_STATE_ID_START.
+ curState = SM_STATE_ID_START;
+ for (p = opcodesToRevisit; p < opcodesToRevisit + numOfOpcodesToRevisit; ++p)
+ {
+ Run(*p DEBUGARG(level + 1));
+ }
+}
+
+SM_STATE_ID CodeSeqSM::GetDestState(SM_STATE_ID srcState, SM_OPCODE opcode)
+{
+ assert(opcode < SM_COUNT);
+
+ JumpTableCell* pThisJumpTable = (JumpTableCell*)(((PBYTE)JumpTableCells) + States[srcState].jumpTableByteOffset);
+
+ JumpTableCell* cell = pThisJumpTable + opcode;
+
+ if (cell->srcState != srcState)
+ {
+ assert(cell->srcState == 0 ||
+ cell->srcState != srcState); // Either way means there is not outgoing edge from srcState.
+ return 0;
+ }
+ else
+ {
+ return cell->destState;
+ }
+}
+
+#ifdef DEBUG
+
+const char* CodeSeqSM::StateDesc(SM_STATE_ID stateID)
+{
+ static char s_StateDesc[500];
+ static SM_OPCODE s_StateDescOpcodes[MAX_CODE_SEQUENCE_LENGTH];
+
+ if (stateID == 0)
+ {
+ return "invalid";
+ }
+ if (stateID == SM_STATE_ID_START)
+ {
+ return "start";
+ }
+ unsigned i = 0;
+
+ SM_STATE_ID b = stateID;
+
+ while (States[b].prevState != 0)
+ {
+ s_StateDescOpcodes[i] = States[b].opc;
+ b = States[b].prevState;
+ ++i;
+ }
+
+ assert(i == States[stateID].length && i > 0);
+
+ *s_StateDesc = 0;
+
+ while (--i > 0)
+ {
+ strcat(s_StateDesc, smOpcodeNames[s_StateDescOpcodes[i]]);
+ strcat(s_StateDesc, " -> ");
+ }
+
+ strcat(s_StateDesc, smOpcodeNames[s_StateDescOpcodes[0]]);
+
+ return s_StateDesc;
+}
+
+#endif // DEBUG
diff --git a/src/jit/sm.h b/src/jit/sm.h
new file mode 100644
index 0000000000..33d65092bb
--- /dev/null
+++ b/src/jit/sm.h
@@ -0,0 +1,75 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+//
+// State machine header used ONLY in the JIT.
+//
+
+#ifndef __sm_h__
+#define __sm_h__
+
+#include "smcommon.h"
+
+extern const SMState* gp_SMStates;
+extern const JumpTableCell* gp_SMJumpTableCells;
+extern const short* gp_StateWeights;
+
+class CodeSeqSM // Represent a particualr run of the state machine
+ // For example, it maintains the array of counts for the terminated states.
+ // These counts should be stored in per method based for them to be correct
+ // under multithreadeded environment.
+{
+public:
+ Compiler* pComp;
+
+ const SMState* States;
+ const JumpTableCell* JumpTableCells;
+ const short* StateWeights; // Weight for each state. Including non-terminate states.
+
+ SM_STATE_ID curState;
+
+ int NativeSize; // This is a signed integer!
+
+ void Start(Compiler* comp);
+ void Reset();
+ void End();
+ void Run(SM_OPCODE opcode DEBUGARG(int level));
+
+ SM_STATE_ID GetDestState(SM_STATE_ID srcState, SM_OPCODE opcode);
+
+ // Matched a termination state
+ inline void TermStateMatch(SM_STATE_ID stateID DEBUGARG(bool verbose))
+ {
+ assert(States[stateID].term);
+ assert(StateMatchedCounts[stateID] < _UI16_MAX);
+#ifdef DEBUG
+ ++StateMatchedCounts[stateID];
+#ifndef SMGEN_COMPILE
+ if (verbose)
+ {
+ printf("weight=%3d : state %3d [ %s ]\n", StateWeights[stateID], stateID, StateDesc(stateID));
+ }
+#endif // SMGEN_COMPILE
+#endif // DEBUG
+
+ NativeSize += StateWeights[stateID];
+ }
+
+ // Given an SM opcode retrieve the weight for this single opcode state.
+ // For example, ID for single opcode state SM_NOSHOW is 2.
+ inline short GetWeightForOpcode(SM_OPCODE opcode)
+ {
+ SM_STATE_ID stateID = ((SM_STATE_ID)opcode) + SM_STATE_ID_START + 1;
+ return StateWeights[stateID];
+ }
+
+#ifdef DEBUG
+ WORD StateMatchedCounts[NUM_SM_STATES];
+ const char* StateDesc(SM_STATE_ID stateID);
+#endif
+
+ static SM_OPCODE MapToSMOpcode(OPCODE opcode);
+};
+
+#endif /* __sm_h__ */
diff --git a/src/jit/smallhash.h b/src/jit/smallhash.h
new file mode 100644
index 0000000000..71ea4a6269
--- /dev/null
+++ b/src/jit/smallhash.h
@@ -0,0 +1,592 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+#ifndef _SMALLHASHTABLE_H_
+#define _SMALLHASHTABLE_H_
+
+//------------------------------------------------------------------------
+// HashTableInfo: a concept that provides equality and hashing methods for
+// a particular key type. Used by HashTableBase and its
+// subclasses.
+template <typename TKey>
+struct HashTableInfo
+{
+ // static bool Equals(const TKey& x, const TKey& y);
+ // static unsigned GetHashCode(const TKey& key);
+};
+
+//------------------------------------------------------------------------
+// HashTableInfo<TKey*>: specialized version of HashTableInfo for pointer-
+// typed keys.
+template <typename TKey>
+struct HashTableInfo<TKey*>
+{
+ static bool Equals(const TKey* x, const TKey* y)
+ {
+ return x == y;
+ }
+
+ static unsigned GetHashCode(const TKey* key)
+ {
+ // Shift off bits that are not likely to be significant
+ size_t keyval = reinterpret_cast<size_t>(key) >> ConstLog2<__alignof(TKey)>::value;
+
+ // Truncate and return the result
+ return static_cast<unsigned>(keyval);
+ }
+};
+
+//------------------------------------------------------------------------
+// HashTableBase: base type for HashTable and SmallHashTable. This class
+// provides the vast majority of the implementation. The
+// subclasses differ in the storage they use at the time of
+// construction: HashTable allocates the initial bucket
+// array on the heap; SmallHashTable contains a small inline
+// array.
+//
+// This implementation is based on the ideas presented in Herlihy, Shavit,
+// and Tzafrir '08 (http://mcg.cs.tau.ac.il/papers/disc2008-hopscotch.pdf),
+// though it does not currently implement the hopscotch algorithm.
+//
+// The approach taken is intended to perform well in both space and speed.
+// This approach is a hybrid of separate chaining and open addressing with
+// linear probing: collisions are resolved using a bucket chain, but that
+// chain is stored in the bucket array itself.
+//
+// Resolving collisions using a bucket chain avoids the primary clustering
+// issue common in linearly-probed open addressed hash tables, while using
+// buckets as chain nodes avoids the allocaiton traffic typical of chained
+// tables. Applying the hopscotch algorithm in the aforementioned paper
+// could further improve performance by optimizing access patterns for
+// better cache usage.
+//
+// Template parameters:
+// TKey - The type of the table's keys.
+// TValue - The type of the table's values.
+// TKeyInfo - A type that conforms to the HashTableInfo<TKey> concept.
+template <typename TKey, typename TValue, typename TKeyInfo = HashTableInfo<TKey>>
+class HashTableBase
+{
+ friend class KeyValuePair;
+ friend class Iterator;
+
+ enum : unsigned
+ {
+ InitialNumBuckets = 8
+ };
+
+protected:
+ //------------------------------------------------------------------------
+ // HashTableBase::Bucket: provides storage for the key-value pairs that
+ // make up the contents of the table.
+ //
+ // The "home" bucket for a particular key is the bucket indexed by the
+ // key's hash code modulo the size of the bucket array (the "home index").
+ //
+ // The home bucket is always considered to be part of the chain that it
+ // roots, even if it is also part of the chain rooted at a different
+ // bucket. `m_firstOffset` indicates the offset of the first non-home
+ // bucket in the home bucket's chain. If the `m_firstOffset` of a bucket
+ // is 0, the chain rooted at that bucket is empty.
+ //
+ // The index of the next bucket in a chain is calculated by adding the
+ // value in `m_nextOffset` to the index of the current bucket. If
+ // `m_nextOffset` is 0, the current bucket is the end of its chain. Each
+ // bucket in a chain must be occupied (i.e. `m_isFull` will be true).
+ struct Bucket
+ {
+ bool m_isFull; // True if the bucket is occupied; false otherwise.
+
+ unsigned m_firstOffset; // The offset to the first node in the chain for this bucket index.
+ unsigned m_nextOffset; // The offset to the next node in the chain for this bucket index.
+
+ unsigned m_hash; // The hash code for the element stored in this bucket.
+ TKey m_key; // The key for the element stored in this bucket.
+ TValue m_value; // The value for the element stored in this bucket.
+ };
+
+private:
+ Compiler* m_compiler; // The compiler context to use for allocations.
+ Bucket* m_buckets; // The bucket array.
+ unsigned m_numBuckets; // The number of buckets in the bucket array.
+ unsigned m_numFullBuckets; // The number of occupied buckets.
+
+ //------------------------------------------------------------------------
+ // HashTableBase::Insert: inserts a key-value pair into a bucket array.
+ //
+ // Arguments:
+ // buckets - The bucket array in which to insert the key-value pair.
+ // numBuckets - The number of buckets in the bucket array.
+ // hash - The hash code of the key to insert.
+ // key - The key to insert.
+ // value - The value to insert.
+ //
+ // Returns:
+ // True if the key-value pair was successfully inserted; false
+ // otherwise.
+ static bool Insert(Bucket* buckets, unsigned numBuckets, unsigned hash, const TKey& key, const TValue& value)
+ {
+ const unsigned mask = numBuckets - 1;
+ unsigned homeIndex = hash & mask;
+
+ Bucket* home = &buckets[homeIndex];
+ if (!home->m_isFull)
+ {
+ // The home bucket is empty; use it.
+ //
+ // Note that `m_firstOffset` does not need to be updated: whether or not it is non-zero,
+ // it is already correct, since we're inserting at the head of the list. `m_nextOffset`
+ // must be 0, however, since this node should not be part of a list.
+ assert(home->m_nextOffset == 0);
+
+ home->m_isFull = true;
+ home->m_hash = hash;
+ home->m_key = key;
+ home->m_value = value;
+ return true;
+ }
+
+ // If the home bucket is full, probe to find the next empty bucket.
+ unsigned precedingIndexInChain = homeIndex;
+ unsigned nextIndexInChain = (homeIndex + home->m_firstOffset) & mask;
+ for (unsigned j = 1; j < numBuckets; j++)
+ {
+ unsigned bucketIndex = (homeIndex + j) & mask;
+ Bucket* bucket = &buckets[bucketIndex];
+ if (bucketIndex == nextIndexInChain)
+ {
+ assert(bucket->m_isFull);
+ precedingIndexInChain = bucketIndex;
+ nextIndexInChain = (bucketIndex + bucket->m_nextOffset) & mask;
+ }
+ else if (!bucket->m_isFull)
+ {
+ bucket->m_isFull = true;
+ if (precedingIndexInChain == nextIndexInChain)
+ {
+ bucket->m_nextOffset = 0;
+ }
+ else
+ {
+ assert(((nextIndexInChain - bucketIndex) & mask) > 0);
+ bucket->m_nextOffset = (nextIndexInChain - bucketIndex) & mask;
+ }
+
+ unsigned offset = (bucketIndex - precedingIndexInChain) & mask;
+ assert(offset != 0);
+
+ if (precedingIndexInChain == homeIndex)
+ {
+ buckets[precedingIndexInChain].m_firstOffset = offset;
+ }
+ else
+ {
+ buckets[precedingIndexInChain].m_nextOffset = offset;
+ }
+
+ bucket->m_hash = hash;
+ bucket->m_key = key;
+ bucket->m_value = value;
+ return true;
+ }
+ }
+
+ // No more free buckets.
+ return false;
+ }
+
+ //------------------------------------------------------------------------
+ // HashTableBase::TryGetBucket: attempts to get the bucket that holds a
+ // particular key.
+ //
+ // Arguments:
+ // hash - The hash code of the key to find.
+ // key - The key to find.
+ // precedingIndex - An output parameter that will hold the index of the
+ // preceding bucket in the chain for the key. May be
+ // equal to `bucketIndex` if the key is stored in its
+ // home bucket.
+ // bucketIndex - An output parameter that will hold the index of the
+ // bucket that stores the key.
+ //
+ // Returns:
+ // True if the key was successfully found; false otherwise.
+ bool TryGetBucket(unsigned hash, const TKey& key, unsigned* precedingIndex, unsigned* bucketIndex) const
+ {
+ if (m_numBuckets == 0)
+ {
+ return false;
+ }
+
+ const unsigned mask = m_numBuckets - 1;
+ unsigned index = hash & mask;
+
+ Bucket* bucket = &m_buckets[index];
+ if (bucket->m_isFull && bucket->m_hash == hash && TKeyInfo::Equals(bucket->m_key, key))
+ {
+ *precedingIndex = index;
+ *bucketIndex = index;
+ return true;
+ }
+
+ for (unsigned offset = bucket->m_firstOffset; offset != 0; offset = bucket->m_nextOffset)
+ {
+ unsigned precedingIndexInChain = index;
+
+ index = (index + offset) & mask;
+ bucket = &m_buckets[index];
+
+ assert(bucket->m_isFull);
+ if (bucket->m_hash == hash && TKeyInfo::Equals(bucket->m_key, key))
+ {
+ *precedingIndex = precedingIndexInChain;
+ *bucketIndex = index;
+ return true;
+ }
+ }
+
+ return false;
+ }
+
+ //------------------------------------------------------------------------
+ // HashTableBase::Resize: allocates a new bucket array twice the size of
+ // the current array and copies the key-value pairs
+ // from the current bucket array into the new array.
+ void Resize()
+ {
+ Bucket* currentBuckets = m_buckets;
+
+ unsigned newNumBuckets = m_numBuckets == 0 ? InitialNumBuckets : m_numBuckets * 2;
+ size_t allocSize = sizeof(Bucket) * newNumBuckets;
+ assert((sizeof(Bucket) * m_numBuckets) < allocSize);
+
+ auto* newBuckets = reinterpret_cast<Bucket*>(m_compiler->compGetMem(allocSize));
+ memset(newBuckets, 0, allocSize);
+
+ for (unsigned currentIndex = 0; currentIndex < m_numBuckets; currentIndex++)
+ {
+ Bucket* currentBucket = &currentBuckets[currentIndex];
+ if (!currentBucket->m_isFull)
+ {
+ continue;
+ }
+
+ bool inserted =
+ Insert(newBuckets, newNumBuckets, currentBucket->m_hash, currentBucket->m_key, currentBucket->m_value);
+ (assert(inserted), (void)inserted);
+ }
+
+ m_numBuckets = newNumBuckets;
+ m_buckets = newBuckets;
+ }
+
+protected:
+ HashTableBase(Compiler* compiler, Bucket* buckets, unsigned numBuckets)
+ : m_compiler(compiler), m_buckets(buckets), m_numBuckets(numBuckets), m_numFullBuckets(0)
+ {
+ assert(compiler != nullptr);
+
+ if (numBuckets > 0)
+ {
+ assert((numBuckets & (numBuckets - 1)) == 0); // Size must be a power of 2
+ assert(m_buckets != nullptr);
+
+ memset(m_buckets, 0, sizeof(Bucket) * numBuckets);
+ }
+ }
+
+public:
+#ifdef DEBUG
+ class Iterator;
+
+ class KeyValuePair final
+ {
+ friend class HashTableBase<TKey, TValue, TKeyInfo>::Iterator;
+
+ Bucket* m_bucket;
+
+ KeyValuePair(Bucket* bucket) : m_bucket(bucket)
+ {
+ assert(m_bucket != nullptr);
+ }
+
+ public:
+ KeyValuePair() : m_bucket(nullptr)
+ {
+ }
+
+ inline TKey& Key()
+ {
+ return m_bucket->m_key;
+ }
+
+ inline TValue& Value()
+ {
+ return m_bucket->m_value;
+ }
+ };
+
+ // NOTE: HashTableBase only provides iterators in debug builds because the order in which
+ // the iterator type produces values is undefined (e.g. it is not related to the order in
+ // which key-value pairs were inserted).
+ class Iterator final
+ {
+ friend class HashTableBase<TKey, TValue, TKeyInfo>;
+
+ Bucket* m_buckets;
+ unsigned m_numBuckets;
+ unsigned m_index;
+
+ Iterator(Bucket* buckets, unsigned numBuckets, unsigned index)
+ : m_buckets(buckets), m_numBuckets(numBuckets), m_index(index)
+ {
+ assert((buckets != nullptr) || (numBuckets == 0));
+ assert(index <= numBuckets);
+
+ // Advance to the first occupied bucket
+ while (m_index != m_numBuckets && !m_buckets[m_index].m_isFull)
+ {
+ m_index++;
+ }
+ }
+
+ public:
+ Iterator() : m_buckets(nullptr), m_numBuckets(0), m_index(0)
+ {
+ }
+
+ KeyValuePair operator*() const
+ {
+ if (m_index >= m_numBuckets)
+ {
+ return KeyValuePair();
+ }
+
+ Bucket* bucket = &m_buckets[m_index];
+ assert(bucket->m_isFull);
+ return KeyValuePair(bucket);
+ }
+
+ KeyValuePair operator->() const
+ {
+ return this->operator*();
+ }
+
+ bool operator==(const Iterator& other) const
+ {
+ return (m_buckets == other.m_buckets) && (m_index == other.m_index);
+ }
+
+ bool operator!=(const Iterator& other) const
+ {
+ return (m_buckets != other.m_buckets) || (m_index != other.m_index);
+ }
+
+ Iterator& operator++()
+ {
+ do
+ {
+ m_index++;
+ } while (m_index != m_numBuckets && !m_buckets[m_index].m_isFull);
+
+ return *this;
+ }
+ };
+
+ Iterator begin() const
+ {
+ return Iterator(m_buckets, m_numBuckets, 0);
+ }
+
+ Iterator end() const
+ {
+ return Iterator(m_buckets, m_numBuckets, m_numBuckets);
+ }
+#endif // DEBUG
+
+ unsigned Count() const
+ {
+ return m_numFullBuckets;
+ }
+
+ void Clear()
+ {
+ if (m_numBuckets > 0)
+ {
+ memset(m_buckets, 0, sizeof(Bucket) * m_numBuckets);
+ m_numFullBuckets = 0;
+ }
+ }
+
+ //------------------------------------------------------------------------
+ // HashTableBase::AddOrUpdate: adds a key-value pair to the hash table if
+ // the key does not already exist in the
+ // table, or updates the value if the key
+ // already exists.
+ //
+ // Arguments:
+ // key - The key for which to add or update a value.
+ // value - The value.
+ //
+ // Returns:
+ // True if the value was added; false if it was updated.
+ bool AddOrUpdate(const TKey& key, const TValue& value)
+ {
+ unsigned hash = TKeyInfo::GetHashCode(key);
+
+ unsigned unused, index;
+ if (TryGetBucket(hash, key, &unused, &index))
+ {
+ m_buckets[index].m_value = value;
+ return false;
+ }
+
+ // If the load is greater than 0.8, resize the table before inserting.
+ if ((m_numFullBuckets * 5) >= (m_numBuckets * 4))
+ {
+ Resize();
+ }
+
+ bool inserted = Insert(m_buckets, m_numBuckets, hash, key, value);
+ (assert(inserted), (void)inserted);
+
+ m_numFullBuckets++;
+
+ return true;
+ }
+
+ //------------------------------------------------------------------------
+ // HashTableBase::TryRemove: removes a key from the hash table and returns
+ // its value if the key exists in the table.
+ //
+ // Arguments:
+ // key - The key to remove from the table.
+ // value - An output parameter that will hold the value for the removed
+ // key.
+ //
+ // Returns:
+ // True if the key was removed from the table; false otherwise.
+ bool TryRemove(const TKey& key, TValue* value)
+ {
+ unsigned hash = TKeyInfo::GetHashCode(key);
+
+ unsigned precedingIndexInChain, bucketIndex;
+ if (!TryGetBucket(hash, key, &precedingIndexInChain, &bucketIndex))
+ {
+ return false;
+ }
+
+ Bucket* bucket = &m_buckets[bucketIndex];
+
+ if (precedingIndexInChain != bucketIndex)
+ {
+ const unsigned mask = m_numBuckets - 1;
+ unsigned homeIndex = hash & mask;
+
+ unsigned nextOffset;
+ if (bucket->m_nextOffset == 0)
+ {
+ nextOffset = 0;
+ }
+ else
+ {
+ unsigned nextIndexInChain = (bucketIndex + bucket->m_nextOffset) & mask;
+ nextOffset = (nextIndexInChain - precedingIndexInChain) & mask;
+ }
+
+ if (precedingIndexInChain == homeIndex)
+ {
+ m_buckets[precedingIndexInChain].m_firstOffset = nextOffset;
+ }
+ else
+ {
+ m_buckets[precedingIndexInChain].m_nextOffset = nextOffset;
+ }
+ }
+
+ bucket->m_isFull = false;
+ bucket->m_nextOffset = 0;
+
+ m_numFullBuckets--;
+
+ *value = bucket->m_value;
+ return true;
+ }
+
+ //------------------------------------------------------------------------
+ // HashTableBase::TryGetValue: retrieves the value for a key if the key
+ // exists in the table.
+ //
+ // Arguments:
+ // key - The key to find from the table.
+ // value - An output parameter that will hold the value for the key.
+ //
+ // Returns:
+ // True if the key was found in the table; false otherwise.
+ bool TryGetValue(const TKey& key, TValue* value) const
+ {
+ unsigned unused, index;
+ if (!TryGetBucket(TKeyInfo::GetHashCode(key), key, &unused, &index))
+ {
+ return false;
+ }
+
+ *value = m_buckets[index].m_value;
+ return true;
+ }
+};
+
+//------------------------------------------------------------------------
+// HashTable: a simple subclass of `HashTableBase` that always uses heap
+// storage for its bucket array.
+template <typename TKey, typename TValue, typename TKeyInfo = HashTableInfo<TKey>>
+class HashTable final : public HashTableBase<TKey, TValue, TKeyInfo>
+{
+ typedef HashTableBase<TKey, TValue, TKeyInfo> TBase;
+
+ static unsigned RoundUp(unsigned initialSize)
+ {
+ return 1 << genLog2(initialSize);
+ }
+
+public:
+ HashTable(Compiler* compiler) : TBase(compiler, nullptr, 0)
+ {
+ }
+
+ HashTable(Compiler* compiler, unsigned initialSize)
+ : TBase(compiler,
+ reinterpret_cast<typename TBase::Bucket*>(
+ compiler->compGetMem(RoundUp(initialSize) * sizeof(typename TBase::Bucket))),
+ RoundUp(initialSize))
+ {
+ }
+};
+
+//------------------------------------------------------------------------
+// SmallHashTable: an alternative to `HashTable` that stores the initial
+// bucket array inline. Most useful for situations where
+// the number of key-value pairs that will be stored in
+// the map at any given time falls below a certain
+// threshold. Switches to heap storage once the initial
+// inline storage is exhausted.
+template <typename TKey, typename TValue, unsigned NumInlineBuckets = 8, typename TKeyInfo = HashTableInfo<TKey>>
+class SmallHashTable final : public HashTableBase<TKey, TValue, TKeyInfo>
+{
+ typedef HashTableBase<TKey, TValue, TKeyInfo> TBase;
+
+ enum : unsigned
+ {
+ RoundedNumInlineBuckets = 1 << ConstLog2<NumInlineBuckets>::value
+ };
+
+ typename TBase::Bucket m_inlineBuckets[RoundedNumInlineBuckets];
+
+public:
+ SmallHashTable(Compiler* compiler) : TBase(compiler, m_inlineBuckets, RoundedNumInlineBuckets)
+ {
+ }
+};
+
+#endif // _SMALLHASHTABLE_H_
diff --git a/src/jit/smcommon.cpp b/src/jit/smcommon.cpp
new file mode 100644
index 0000000000..d17e21b874
--- /dev/null
+++ b/src/jit/smcommon.cpp
@@ -0,0 +1,166 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+#if defined(DEBUG) || defined(SMGEN_COMPILE)
+
+//
+// The array of state-machine-opcode names
+//
+const char* const smOpcodeNames[] = {
+#define SMOPDEF(smname, string) string,
+#include "smopcode.def"
+#undef SMOPDEF
+};
+
+//
+// The code sequences the state machine will look for.
+//
+
+const SM_OPCODE s_CodeSeqs[][MAX_CODE_SEQUENCE_LENGTH] = {
+
+#define SMOPDEF(smname, string) {smname, CODE_SEQUENCE_END},
+// ==== Single opcode states ====
+#include "smopcode.def"
+#undef SMOPDEF
+
+ // ==== Legel prefixed opcode sequences ====
+ {SM_CONSTRAINED, SM_CALLVIRT, CODE_SEQUENCE_END},
+
+ // ==== Interesting patterns ====
+
+ // Fetching of object field
+ {SM_LDARG_0, SM_LDFLD, CODE_SEQUENCE_END},
+ {SM_LDARG_1, SM_LDFLD, CODE_SEQUENCE_END},
+ {SM_LDARG_2, SM_LDFLD, CODE_SEQUENCE_END},
+ {SM_LDARG_3, SM_LDFLD, CODE_SEQUENCE_END},
+
+ // Fetching of struct field
+ {SM_LDARGA_S, SM_LDFLD, CODE_SEQUENCE_END},
+ {SM_LDLOCA_S, SM_LDFLD, CODE_SEQUENCE_END},
+
+ // Fetching of struct field from a normed struct
+ {SM_LDARGA_S_NORMED, SM_LDFLD, CODE_SEQUENCE_END},
+ {SM_LDLOCA_S_NORMED, SM_LDFLD, CODE_SEQUENCE_END},
+
+ // stloc/ldloc --> dup
+ {SM_STLOC_0, SM_LDLOC_0, CODE_SEQUENCE_END},
+ {SM_STLOC_1, SM_LDLOC_1, CODE_SEQUENCE_END},
+ {SM_STLOC_2, SM_LDLOC_2, CODE_SEQUENCE_END},
+ {SM_STLOC_3, SM_LDLOC_3, CODE_SEQUENCE_END},
+
+ // FPU operations
+ {SM_LDC_R4, SM_ADD, CODE_SEQUENCE_END},
+ {SM_LDC_R4, SM_SUB, CODE_SEQUENCE_END},
+ {SM_LDC_R4, SM_MUL, CODE_SEQUENCE_END},
+ {SM_LDC_R4, SM_DIV, CODE_SEQUENCE_END},
+
+ {SM_LDC_R8, SM_ADD, CODE_SEQUENCE_END},
+ {SM_LDC_R8, SM_SUB, CODE_SEQUENCE_END},
+ {SM_LDC_R8, SM_MUL, CODE_SEQUENCE_END},
+ {SM_LDC_R8, SM_DIV, CODE_SEQUENCE_END},
+
+ {SM_CONV_R4, SM_ADD, CODE_SEQUENCE_END},
+ {SM_CONV_R4, SM_SUB, CODE_SEQUENCE_END},
+ {SM_CONV_R4, SM_MUL, CODE_SEQUENCE_END},
+ {SM_CONV_R4, SM_DIV, CODE_SEQUENCE_END},
+
+ // {SM_CONV_R8, SM_ADD, CODE_SEQUENCE_END}, // Removed since it collides with ldelem.r8 in
+ // Math.InternalRound
+ // {SM_CONV_R8, SM_SUB, CODE_SEQUENCE_END}, // Just remove the SM_SUB as well.
+ {SM_CONV_R8, SM_MUL, CODE_SEQUENCE_END},
+ {SM_CONV_R8, SM_DIV, CODE_SEQUENCE_END},
+
+ /* Constant init constructor:
+ L_0006: ldarg.0
+ L_0007: ldc.r8 0
+ L_0010: stfld float64 raytracer.Vec::x
+ */
+
+ {SM_LDARG_0, SM_LDC_I4_0, SM_STFLD, CODE_SEQUENCE_END},
+ {SM_LDARG_0, SM_LDC_R4, SM_STFLD, CODE_SEQUENCE_END},
+ {SM_LDARG_0, SM_LDC_R8, SM_STFLD, CODE_SEQUENCE_END},
+
+ /* Copy constructor:
+ L_0006: ldarg.0
+ L_0007: ldarg.1
+ L_0008: ldfld float64 raytracer.Vec::x
+ L_000d: stfld float64 raytracer.Vec::x
+ */
+
+ {SM_LDARG_0, SM_LDARG_1, SM_LDFLD, SM_STFLD, CODE_SEQUENCE_END},
+
+ /* Field setter:
+
+ [DebuggerNonUserCode]
+ private void CtorClosed(object target, IntPtr methodPtr)
+ {
+ if (target == null)
+ {
+ this.ThrowNullThisInDelegateToInstance();
+ }
+ base._target = target;
+ base._methodPtr = methodPtr;
+ }
+
+
+ .method private hidebysig instance void CtorClosed(object target, native int methodPtr) cil managed
+ {
+ .custom instance void System.Diagnostics.DebuggerNonUserCodeAttribute::.ctor()
+ .maxstack 8
+ L_0000: ldarg.1
+ L_0001: brtrue.s L_0009
+ L_0003: ldarg.0
+ L_0004: call instance void System.MulticastDelegate::ThrowNullThisInDelegateToInstance()
+
+ L_0009: ldarg.0
+ L_000a: ldarg.1
+ L_000b: stfld object System.Delegate::_target
+
+ L_0010: ldarg.0
+ L_0011: ldarg.2
+ L_0012: stfld native int System.Delegate::_methodPtr
+
+ L_0017: ret
+ }
+ */
+
+ {SM_LDARG_0, SM_LDARG_1, SM_STFLD, CODE_SEQUENCE_END},
+ {SM_LDARG_0, SM_LDARG_2, SM_STFLD, CODE_SEQUENCE_END},
+ {SM_LDARG_0, SM_LDARG_3, SM_STFLD, CODE_SEQUENCE_END},
+
+ /* Scale operator:
+
+ L_0000: ldarg.0
+ L_0001: dup
+ L_0002: ldfld float64 raytracer.Vec::x
+ L_0007: ldarg.1
+ L_0008: mul
+ L_0009: stfld float64 raytracer.Vec::x
+ */
+
+ {SM_LDARG_0, SM_DUP, SM_LDFLD, SM_LDARG_1, SM_ADD, SM_STFLD, CODE_SEQUENCE_END},
+ {SM_LDARG_0, SM_DUP, SM_LDFLD, SM_LDARG_1, SM_SUB, SM_STFLD, CODE_SEQUENCE_END},
+ {SM_LDARG_0, SM_DUP, SM_LDFLD, SM_LDARG_1, SM_MUL, SM_STFLD, CODE_SEQUENCE_END},
+ {SM_LDARG_0, SM_DUP, SM_LDFLD, SM_LDARG_1, SM_DIV, SM_STFLD, CODE_SEQUENCE_END},
+
+ /* Add operator
+ L_0000: ldarg.0
+ L_0001: ldfld float64 raytracer.Vec::x
+ L_0006: ldarg.1
+ L_0007: ldfld float64 raytracer.Vec::x
+ L_000c: add
+ */
+
+ {SM_LDARG_0, SM_LDFLD, SM_LDARG_1, SM_LDFLD, SM_ADD, CODE_SEQUENCE_END},
+ {SM_LDARG_0, SM_LDFLD, SM_LDARG_1, SM_LDFLD, SM_SUB, CODE_SEQUENCE_END},
+ // No need for mul and div since there is no mathemetical meaning of it.
+
+ {SM_LDARGA_S, SM_LDFLD, SM_LDARGA_S, SM_LDFLD, SM_ADD, CODE_SEQUENCE_END},
+ {SM_LDARGA_S, SM_LDFLD, SM_LDARGA_S, SM_LDFLD, SM_SUB, CODE_SEQUENCE_END},
+ // No need for mul and div since there is no mathemetical meaning of it.
+
+ // The end:
+ {CODE_SEQUENCE_END}};
+
+#endif // defined(DEBUG) || defined(SMGEN_COMPILE)
diff --git a/src/jit/smcommon.h b/src/jit/smcommon.h
new file mode 100644
index 0000000000..0c33e05a7b
--- /dev/null
+++ b/src/jit/smcommon.h
@@ -0,0 +1,50 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+//
+// Common headers used both in smgen.exe and the JIT.
+//
+
+#ifndef __sm_common_h__
+#define __sm_common_h__
+
+#include "smopenum.h"
+
+#define NUM_SM_STATES 250
+
+typedef BYTE SM_STATE_ID;
+
+static_assert_no_msg(sizeof(SM_STATE_ID) == 1); // To conserve memory, we don't want to have more than 256 states.
+
+#define SM_STATE_ID_START 1
+
+static_assert_no_msg(SM_STATE_ID_START == 1); // Make sure nobody changes it. We rely on this to map the SM_OPCODE
+ // to single-opcode states. For example, in GetWeightForOpcode().
+
+struct JumpTableCell
+{
+ SM_STATE_ID srcState;
+ SM_STATE_ID destState;
+};
+
+struct SMState
+{
+ bool term; // does this state terminate a code sequence?
+ BYTE length; // the length of currently matched opcodes
+ SM_STATE_ID longestTermState; // the ID of the longest matched terminate state
+
+ SM_STATE_ID prevState; // previous state
+ SM_OPCODE opc; // opcode that leads from the previous state to current state
+
+ unsigned short jumpTableByteOffset;
+};
+
+//
+// Code sequences
+//
+
+#define MAX_CODE_SEQUENCE_LENGTH 7
+#define CODE_SEQUENCE_END ((SM_OPCODE)(SM_COUNT + 1))
+
+#endif /* __sm_common_h__ */
diff --git a/src/jit/smdata.cpp b/src/jit/smdata.cpp
new file mode 100644
index 0000000000..9fe00d4984
--- /dev/null
+++ b/src/jit/smdata.cpp
@@ -0,0 +1,705 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+// !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+//
+// Automatically generated code. DO NOT MODIFY!
+// To generate this file. Do "smgen.exe > SMData.cpp"
+//
+// !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+#include "jitpch.h"
+//
+// States in the state machine
+//
+// clang-format off
+const SMState g_SMStates[] =
+{
+ // {term, len, lng, prev, SMOpcode and SMOpcodeName , offsets } // state ID and name
+ { 0, 0, 0, 0, (SM_OPCODE) 0 /* noshow */, 0 }, // state 0 [invalid]
+ { 0, 0, 0, 0, (SM_OPCODE) 0 /* noshow */, 0 }, // state 1 [start]
+ { 1, 1, 0, 1, (SM_OPCODE) 0 /* noshow */, 0 }, // state 2 [noshow]
+ { 1, 1, 0, 1, (SM_OPCODE) 1 /* ldarg.0 */, 372 }, // state 3 [ldarg.0]
+ { 1, 1, 0, 1, (SM_OPCODE) 2 /* ldarg.1 */, 168 }, // state 4 [ldarg.1]
+ { 1, 1, 0, 1, (SM_OPCODE) 3 /* ldarg.2 */, 170 }, // state 5 [ldarg.2]
+ { 1, 1, 0, 1, (SM_OPCODE) 4 /* ldarg.3 */, 172 }, // state 6 [ldarg.3]
+ { 1, 1, 0, 1, (SM_OPCODE) 5 /* ldloc.0 */, 0 }, // state 7 [ldloc.0]
+ { 1, 1, 0, 1, (SM_OPCODE) 6 /* ldloc.1 */, 0 }, // state 8 [ldloc.1]
+ { 1, 1, 0, 1, (SM_OPCODE) 7 /* ldloc.2 */, 0 }, // state 9 [ldloc.2]
+ { 1, 1, 0, 1, (SM_OPCODE) 8 /* ldloc.3 */, 0 }, // state 10 [ldloc.3]
+ { 1, 1, 0, 1, (SM_OPCODE) 9 /* stloc.0 */, 378 }, // state 11 [stloc.0]
+ { 1, 1, 0, 1, (SM_OPCODE) 10 /* stloc.1 */, 378 }, // state 12 [stloc.1]
+ { 1, 1, 0, 1, (SM_OPCODE) 11 /* stloc.2 */, 378 }, // state 13 [stloc.2]
+ { 1, 1, 0, 1, (SM_OPCODE) 12 /* stloc.3 */, 378 }, // state 14 [stloc.3]
+ { 1, 1, 0, 1, (SM_OPCODE) 13 /* ldarg.s */, 0 }, // state 15 [ldarg.s]
+ { 1, 1, 0, 1, (SM_OPCODE) 14 /* ldarga.s */, 182 }, // state 16 [ldarga.s]
+ { 1, 1, 0, 1, (SM_OPCODE) 15 /* starg.s */, 0 }, // state 17 [starg.s]
+ { 1, 1, 0, 1, (SM_OPCODE) 16 /* ldloc.s */, 0 }, // state 18 [ldloc.s]
+ { 1, 1, 0, 1, (SM_OPCODE) 17 /* ldloca.s */, 184 }, // state 19 [ldloca.s]
+ { 1, 1, 0, 1, (SM_OPCODE) 18 /* stloc.s */, 0 }, // state 20 [stloc.s]
+ { 1, 1, 0, 1, (SM_OPCODE) 19 /* ldnull */, 0 }, // state 21 [ldnull]
+ { 1, 1, 0, 1, (SM_OPCODE) 20 /* ldc.i4.m1 */, 0 }, // state 22 [ldc.i4.m1]
+ { 1, 1, 0, 1, (SM_OPCODE) 21 /* ldc.i4.0 */, 0 }, // state 23 [ldc.i4.0]
+ { 1, 1, 0, 1, (SM_OPCODE) 22 /* ldc.i4.1 */, 0 }, // state 24 [ldc.i4.1]
+ { 1, 1, 0, 1, (SM_OPCODE) 23 /* ldc.i4.2 */, 0 }, // state 25 [ldc.i4.2]
+ { 1, 1, 0, 1, (SM_OPCODE) 24 /* ldc.i4.3 */, 0 }, // state 26 [ldc.i4.3]
+ { 1, 1, 0, 1, (SM_OPCODE) 25 /* ldc.i4.4 */, 0 }, // state 27 [ldc.i4.4]
+ { 1, 1, 0, 1, (SM_OPCODE) 26 /* ldc.i4.5 */, 0 }, // state 28 [ldc.i4.5]
+ { 1, 1, 0, 1, (SM_OPCODE) 27 /* ldc.i4.6 */, 0 }, // state 29 [ldc.i4.6]
+ { 1, 1, 0, 1, (SM_OPCODE) 28 /* ldc.i4.7 */, 0 }, // state 30 [ldc.i4.7]
+ { 1, 1, 0, 1, (SM_OPCODE) 29 /* ldc.i4.8 */, 0 }, // state 31 [ldc.i4.8]
+ { 1, 1, 0, 1, (SM_OPCODE) 30 /* ldc.i4.s */, 0 }, // state 32 [ldc.i4.s]
+ { 1, 1, 0, 1, (SM_OPCODE) 31 /* ldc.i4 */, 0 }, // state 33 [ldc.i4]
+ { 1, 1, 0, 1, (SM_OPCODE) 32 /* ldc.i8 */, 0 }, // state 34 [ldc.i8]
+ { 1, 1, 0, 1, (SM_OPCODE) 33 /* ldc.r4 */, 252 }, // state 35 [ldc.r4]
+ { 1, 1, 0, 1, (SM_OPCODE) 34 /* ldc.r8 */, 268 }, // state 36 [ldc.r8]
+ { 1, 1, 0, 1, (SM_OPCODE) 35 /* unused */, 0 }, // state 37 [unused]
+ { 1, 1, 0, 1, (SM_OPCODE) 36 /* dup */, 0 }, // state 38 [dup]
+ { 1, 1, 0, 1, (SM_OPCODE) 37 /* pop */, 0 }, // state 39 [pop]
+ { 1, 1, 0, 1, (SM_OPCODE) 38 /* call */, 0 }, // state 40 [call]
+ { 1, 1, 0, 1, (SM_OPCODE) 39 /* calli */, 0 }, // state 41 [calli]
+ { 1, 1, 0, 1, (SM_OPCODE) 40 /* ret */, 0 }, // state 42 [ret]
+ { 1, 1, 0, 1, (SM_OPCODE) 41 /* br.s */, 0 }, // state 43 [br.s]
+ { 1, 1, 0, 1, (SM_OPCODE) 42 /* brfalse.s */, 0 }, // state 44 [brfalse.s]
+ { 1, 1, 0, 1, (SM_OPCODE) 43 /* brtrue.s */, 0 }, // state 45 [brtrue.s]
+ { 1, 1, 0, 1, (SM_OPCODE) 44 /* beq.s */, 0 }, // state 46 [beq.s]
+ { 1, 1, 0, 1, (SM_OPCODE) 45 /* bge.s */, 0 }, // state 47 [bge.s]
+ { 1, 1, 0, 1, (SM_OPCODE) 46 /* bgt.s */, 0 }, // state 48 [bgt.s]
+ { 1, 1, 0, 1, (SM_OPCODE) 47 /* ble.s */, 0 }, // state 49 [ble.s]
+ { 1, 1, 0, 1, (SM_OPCODE) 48 /* blt.s */, 0 }, // state 50 [blt.s]
+ { 1, 1, 0, 1, (SM_OPCODE) 49 /* bne.un.s */, 0 }, // state 51 [bne.un.s]
+ { 1, 1, 0, 1, (SM_OPCODE) 50 /* bge.un.s */, 0 }, // state 52 [bge.un.s]
+ { 1, 1, 0, 1, (SM_OPCODE) 51 /* bgt.un.s */, 0 }, // state 53 [bgt.un.s]
+ { 1, 1, 0, 1, (SM_OPCODE) 52 /* ble.un.s */, 0 }, // state 54 [ble.un.s]
+ { 1, 1, 0, 1, (SM_OPCODE) 53 /* blt.un.s */, 0 }, // state 55 [blt.un.s]
+ { 1, 1, 0, 1, (SM_OPCODE) 54 /* long.branch */, 0 }, // state 56 [long.branch]
+ { 1, 1, 0, 1, (SM_OPCODE) 55 /* switch */, 0 }, // state 57 [switch]
+ { 1, 1, 0, 1, (SM_OPCODE) 56 /* ldind.i1 */, 0 }, // state 58 [ldind.i1]
+ { 1, 1, 0, 1, (SM_OPCODE) 57 /* ldind.u1 */, 0 }, // state 59 [ldind.u1]
+ { 1, 1, 0, 1, (SM_OPCODE) 58 /* ldind.i2 */, 0 }, // state 60 [ldind.i2]
+ { 1, 1, 0, 1, (SM_OPCODE) 59 /* ldind.u2 */, 0 }, // state 61 [ldind.u2]
+ { 1, 1, 0, 1, (SM_OPCODE) 60 /* ldind.i4 */, 0 }, // state 62 [ldind.i4]
+ { 1, 1, 0, 1, (SM_OPCODE) 61 /* ldind.u4 */, 0 }, // state 63 [ldind.u4]
+ { 1, 1, 0, 1, (SM_OPCODE) 62 /* ldind.i8 */, 0 }, // state 64 [ldind.i8]
+ { 1, 1, 0, 1, (SM_OPCODE) 63 /* ldind.i */, 0 }, // state 65 [ldind.i]
+ { 1, 1, 0, 1, (SM_OPCODE) 64 /* ldind.r4 */, 0 }, // state 66 [ldind.r4]
+ { 1, 1, 0, 1, (SM_OPCODE) 65 /* ldind.r8 */, 0 }, // state 67 [ldind.r8]
+ { 1, 1, 0, 1, (SM_OPCODE) 66 /* ldind.ref */, 0 }, // state 68 [ldind.ref]
+ { 1, 1, 0, 1, (SM_OPCODE) 67 /* stind.ref */, 0 }, // state 69 [stind.ref]
+ { 1, 1, 0, 1, (SM_OPCODE) 68 /* stind.i1 */, 0 }, // state 70 [stind.i1]
+ { 1, 1, 0, 1, (SM_OPCODE) 69 /* stind.i2 */, 0 }, // state 71 [stind.i2]
+ { 1, 1, 0, 1, (SM_OPCODE) 70 /* stind.i4 */, 0 }, // state 72 [stind.i4]
+ { 1, 1, 0, 1, (SM_OPCODE) 71 /* stind.i8 */, 0 }, // state 73 [stind.i8]
+ { 1, 1, 0, 1, (SM_OPCODE) 72 /* stind.r4 */, 0 }, // state 74 [stind.r4]
+ { 1, 1, 0, 1, (SM_OPCODE) 73 /* stind.r8 */, 0 }, // state 75 [stind.r8]
+ { 1, 1, 0, 1, (SM_OPCODE) 74 /* add */, 0 }, // state 76 [add]
+ { 1, 1, 0, 1, (SM_OPCODE) 75 /* sub */, 0 }, // state 77 [sub]
+ { 1, 1, 0, 1, (SM_OPCODE) 76 /* mul */, 0 }, // state 78 [mul]
+ { 1, 1, 0, 1, (SM_OPCODE) 77 /* div */, 0 }, // state 79 [div]
+ { 1, 1, 0, 1, (SM_OPCODE) 78 /* div.un */, 0 }, // state 80 [div.un]
+ { 1, 1, 0, 1, (SM_OPCODE) 79 /* rem */, 0 }, // state 81 [rem]
+ { 1, 1, 0, 1, (SM_OPCODE) 80 /* rem.un */, 0 }, // state 82 [rem.un]
+ { 1, 1, 0, 1, (SM_OPCODE) 81 /* and */, 0 }, // state 83 [and]
+ { 1, 1, 0, 1, (SM_OPCODE) 82 /* or */, 0 }, // state 84 [or]
+ { 1, 1, 0, 1, (SM_OPCODE) 83 /* xor */, 0 }, // state 85 [xor]
+ { 1, 1, 0, 1, (SM_OPCODE) 84 /* shl */, 0 }, // state 86 [shl]
+ { 1, 1, 0, 1, (SM_OPCODE) 85 /* shr */, 0 }, // state 87 [shr]
+ { 1, 1, 0, 1, (SM_OPCODE) 86 /* shr.un */, 0 }, // state 88 [shr.un]
+ { 1, 1, 0, 1, (SM_OPCODE) 87 /* neg */, 0 }, // state 89 [neg]
+ { 1, 1, 0, 1, (SM_OPCODE) 88 /* not */, 0 }, // state 90 [not]
+ { 1, 1, 0, 1, (SM_OPCODE) 89 /* conv.i1 */, 0 }, // state 91 [conv.i1]
+ { 1, 1, 0, 1, (SM_OPCODE) 90 /* conv.i2 */, 0 }, // state 92 [conv.i2]
+ { 1, 1, 0, 1, (SM_OPCODE) 91 /* conv.i4 */, 0 }, // state 93 [conv.i4]
+ { 1, 1, 0, 1, (SM_OPCODE) 92 /* conv.i8 */, 0 }, // state 94 [conv.i8]
+ { 1, 1, 0, 1, (SM_OPCODE) 93 /* conv.r4 */, 276 }, // state 95 [conv.r4]
+ { 1, 1, 0, 1, (SM_OPCODE) 94 /* conv.r8 */, 256 }, // state 96 [conv.r8]
+ { 1, 1, 0, 1, (SM_OPCODE) 95 /* conv.u4 */, 0 }, // state 97 [conv.u4]
+ { 1, 1, 0, 1, (SM_OPCODE) 96 /* conv.u8 */, 0 }, // state 98 [conv.u8]
+ { 1, 1, 0, 1, (SM_OPCODE) 97 /* callvirt */, 0 }, // state 99 [callvirt]
+ { 1, 1, 0, 1, (SM_OPCODE) 98 /* cpobj */, 0 }, // state 100 [cpobj]
+ { 1, 1, 0, 1, (SM_OPCODE) 99 /* ldobj */, 0 }, // state 101 [ldobj]
+ { 1, 1, 0, 1, (SM_OPCODE)100 /* ldstr */, 0 }, // state 102 [ldstr]
+ { 1, 1, 0, 1, (SM_OPCODE)101 /* newobj */, 0 }, // state 103 [newobj]
+ { 1, 1, 0, 1, (SM_OPCODE)102 /* castclass */, 0 }, // state 104 [castclass]
+ { 1, 1, 0, 1, (SM_OPCODE)103 /* isinst */, 0 }, // state 105 [isinst]
+ { 1, 1, 0, 1, (SM_OPCODE)104 /* conv.r.un */, 0 }, // state 106 [conv.r.un]
+ { 1, 1, 0, 1, (SM_OPCODE)105 /* unbox */, 0 }, // state 107 [unbox]
+ { 1, 1, 0, 1, (SM_OPCODE)106 /* throw */, 0 }, // state 108 [throw]
+ { 1, 1, 0, 1, (SM_OPCODE)107 /* ldfld */, 0 }, // state 109 [ldfld]
+ { 1, 1, 0, 1, (SM_OPCODE)108 /* ldflda */, 0 }, // state 110 [ldflda]
+ { 1, 1, 0, 1, (SM_OPCODE)109 /* stfld */, 0 }, // state 111 [stfld]
+ { 1, 1, 0, 1, (SM_OPCODE)110 /* ldsfld */, 0 }, // state 112 [ldsfld]
+ { 1, 1, 0, 1, (SM_OPCODE)111 /* ldsflda */, 0 }, // state 113 [ldsflda]
+ { 1, 1, 0, 1, (SM_OPCODE)112 /* stsfld */, 0 }, // state 114 [stsfld]
+ { 1, 1, 0, 1, (SM_OPCODE)113 /* stobj */, 0 }, // state 115 [stobj]
+ { 1, 1, 0, 1, (SM_OPCODE)114 /* ovf.notype.un */, 0 }, // state 116 [ovf.notype.un]
+ { 1, 1, 0, 1, (SM_OPCODE)115 /* box */, 0 }, // state 117 [box]
+ { 1, 1, 0, 1, (SM_OPCODE)116 /* newarr */, 0 }, // state 118 [newarr]
+ { 1, 1, 0, 1, (SM_OPCODE)117 /* ldlen */, 0 }, // state 119 [ldlen]
+ { 1, 1, 0, 1, (SM_OPCODE)118 /* ldelema */, 0 }, // state 120 [ldelema]
+ { 1, 1, 0, 1, (SM_OPCODE)119 /* ldelem.i1 */, 0 }, // state 121 [ldelem.i1]
+ { 1, 1, 0, 1, (SM_OPCODE)120 /* ldelem.u1 */, 0 }, // state 122 [ldelem.u1]
+ { 1, 1, 0, 1, (SM_OPCODE)121 /* ldelem.i2 */, 0 }, // state 123 [ldelem.i2]
+ { 1, 1, 0, 1, (SM_OPCODE)122 /* ldelem.u2 */, 0 }, // state 124 [ldelem.u2]
+ { 1, 1, 0, 1, (SM_OPCODE)123 /* ldelem.i4 */, 0 }, // state 125 [ldelem.i4]
+ { 1, 1, 0, 1, (SM_OPCODE)124 /* ldelem.u4 */, 0 }, // state 126 [ldelem.u4]
+ { 1, 1, 0, 1, (SM_OPCODE)125 /* ldelem.i8 */, 0 }, // state 127 [ldelem.i8]
+ { 1, 1, 0, 1, (SM_OPCODE)126 /* ldelem.i */, 0 }, // state 128 [ldelem.i]
+ { 1, 1, 0, 1, (SM_OPCODE)127 /* ldelem.r4 */, 0 }, // state 129 [ldelem.r4]
+ { 1, 1, 0, 1, (SM_OPCODE)128 /* ldelem.r8 */, 0 }, // state 130 [ldelem.r8]
+ { 1, 1, 0, 1, (SM_OPCODE)129 /* ldelem.ref */, 0 }, // state 131 [ldelem.ref]
+ { 1, 1, 0, 1, (SM_OPCODE)130 /* stelem.i */, 0 }, // state 132 [stelem.i]
+ { 1, 1, 0, 1, (SM_OPCODE)131 /* stelem.i1 */, 0 }, // state 133 [stelem.i1]
+ { 1, 1, 0, 1, (SM_OPCODE)132 /* stelem.i2 */, 0 }, // state 134 [stelem.i2]
+ { 1, 1, 0, 1, (SM_OPCODE)133 /* stelem.i4 */, 0 }, // state 135 [stelem.i4]
+ { 1, 1, 0, 1, (SM_OPCODE)134 /* stelem.i8 */, 0 }, // state 136 [stelem.i8]
+ { 1, 1, 0, 1, (SM_OPCODE)135 /* stelem.r4 */, 0 }, // state 137 [stelem.r4]
+ { 1, 1, 0, 1, (SM_OPCODE)136 /* stelem.r8 */, 0 }, // state 138 [stelem.r8]
+ { 1, 1, 0, 1, (SM_OPCODE)137 /* stelem.ref */, 0 }, // state 139 [stelem.ref]
+ { 1, 1, 0, 1, (SM_OPCODE)138 /* ldelem */, 0 }, // state 140 [ldelem]
+ { 1, 1, 0, 1, (SM_OPCODE)139 /* stelem */, 0 }, // state 141 [stelem]
+ { 1, 1, 0, 1, (SM_OPCODE)140 /* unbox.any */, 0 }, // state 142 [unbox.any]
+ { 1, 1, 0, 1, (SM_OPCODE)141 /* conv.ovf.i1 */, 0 }, // state 143 [conv.ovf.i1]
+ { 1, 1, 0, 1, (SM_OPCODE)142 /* conv.ovf.u1 */, 0 }, // state 144 [conv.ovf.u1]
+ { 1, 1, 0, 1, (SM_OPCODE)143 /* conv.ovf.i2 */, 0 }, // state 145 [conv.ovf.i2]
+ { 1, 1, 0, 1, (SM_OPCODE)144 /* conv.ovf.u2 */, 0 }, // state 146 [conv.ovf.u2]
+ { 1, 1, 0, 1, (SM_OPCODE)145 /* conv.ovf.i4 */, 0 }, // state 147 [conv.ovf.i4]
+ { 1, 1, 0, 1, (SM_OPCODE)146 /* conv.ovf.u4 */, 0 }, // state 148 [conv.ovf.u4]
+ { 1, 1, 0, 1, (SM_OPCODE)147 /* conv.ovf.i8 */, 0 }, // state 149 [conv.ovf.i8]
+ { 1, 1, 0, 1, (SM_OPCODE)148 /* conv.ovf.u8 */, 0 }, // state 150 [conv.ovf.u8]
+ { 1, 1, 0, 1, (SM_OPCODE)149 /* refanyval */, 0 }, // state 151 [refanyval]
+ { 1, 1, 0, 1, (SM_OPCODE)150 /* ckfinite */, 0 }, // state 152 [ckfinite]
+ { 1, 1, 0, 1, (SM_OPCODE)151 /* mkrefany */, 0 }, // state 153 [mkrefany]
+ { 1, 1, 0, 1, (SM_OPCODE)152 /* ldtoken */, 0 }, // state 154 [ldtoken]
+ { 1, 1, 0, 1, (SM_OPCODE)153 /* conv.u2 */, 0 }, // state 155 [conv.u2]
+ { 1, 1, 0, 1, (SM_OPCODE)154 /* conv.u1 */, 0 }, // state 156 [conv.u1]
+ { 1, 1, 0, 1, (SM_OPCODE)155 /* conv.i */, 0 }, // state 157 [conv.i]
+ { 1, 1, 0, 1, (SM_OPCODE)156 /* conv.ovf.i */, 0 }, // state 158 [conv.ovf.i]
+ { 1, 1, 0, 1, (SM_OPCODE)157 /* conv.ovf.u */, 0 }, // state 159 [conv.ovf.u]
+ { 1, 1, 0, 1, (SM_OPCODE)158 /* add.ovf */, 0 }, // state 160 [add.ovf]
+ { 1, 1, 0, 1, (SM_OPCODE)159 /* mul.ovf */, 0 }, // state 161 [mul.ovf]
+ { 1, 1, 0, 1, (SM_OPCODE)160 /* sub.ovf */, 0 }, // state 162 [sub.ovf]
+ { 1, 1, 0, 1, (SM_OPCODE)161 /* leave.s */, 0 }, // state 163 [leave.s]
+ { 1, 1, 0, 1, (SM_OPCODE)162 /* stind.i */, 0 }, // state 164 [stind.i]
+ { 1, 1, 0, 1, (SM_OPCODE)163 /* conv.u */, 0 }, // state 165 [conv.u]
+ { 1, 1, 0, 1, (SM_OPCODE)164 /* prefix.n */, 0 }, // state 166 [prefix.n]
+ { 1, 1, 0, 1, (SM_OPCODE)165 /* arglist */, 0 }, // state 167 [arglist]
+ { 1, 1, 0, 1, (SM_OPCODE)166 /* ceq */, 0 }, // state 168 [ceq]
+ { 1, 1, 0, 1, (SM_OPCODE)167 /* cgt */, 0 }, // state 169 [cgt]
+ { 1, 1, 0, 1, (SM_OPCODE)168 /* cgt.un */, 0 }, // state 170 [cgt.un]
+ { 1, 1, 0, 1, (SM_OPCODE)169 /* clt */, 0 }, // state 171 [clt]
+ { 1, 1, 0, 1, (SM_OPCODE)170 /* clt.un */, 0 }, // state 172 [clt.un]
+ { 1, 1, 0, 1, (SM_OPCODE)171 /* ldftn */, 0 }, // state 173 [ldftn]
+ { 1, 1, 0, 1, (SM_OPCODE)172 /* ldvirtftn */, 0 }, // state 174 [ldvirtftn]
+ { 1, 1, 0, 1, (SM_OPCODE)173 /* long.loc.arg */, 0 }, // state 175 [long.loc.arg]
+ { 1, 1, 0, 1, (SM_OPCODE)174 /* localloc */, 0 }, // state 176 [localloc]
+ { 1, 1, 0, 1, (SM_OPCODE)175 /* unaligned */, 0 }, // state 177 [unaligned]
+ { 1, 1, 0, 1, (SM_OPCODE)176 /* volatile */, 0 }, // state 178 [volatile]
+ { 1, 1, 0, 1, (SM_OPCODE)177 /* tailcall */, 0 }, // state 179 [tailcall]
+ { 1, 1, 0, 1, (SM_OPCODE)178 /* initobj */, 0 }, // state 180 [initobj]
+ { 1, 1, 0, 1, (SM_OPCODE)179 /* constrained */, 218 }, // state 181 [constrained]
+ { 1, 1, 0, 1, (SM_OPCODE)180 /* cpblk */, 0 }, // state 182 [cpblk]
+ { 1, 1, 0, 1, (SM_OPCODE)181 /* initblk */, 0 }, // state 183 [initblk]
+ { 1, 1, 0, 1, (SM_OPCODE)182 /* rethrow */, 0 }, // state 184 [rethrow]
+ { 1, 1, 0, 1, (SM_OPCODE)183 /* sizeof */, 0 }, // state 185 [sizeof]
+ { 1, 1, 0, 1, (SM_OPCODE)184 /* refanytype */, 0 }, // state 186 [refanytype]
+ { 1, 1, 0, 1, (SM_OPCODE)185 /* readonly */, 0 }, // state 187 [readonly]
+ { 1, 1, 0, 1, (SM_OPCODE)186 /* ldarga.s.normed */, 218 }, // state 188 [ldarga.s.normed]
+ { 1, 1, 0, 1, (SM_OPCODE)187 /* ldloca.s.normed */, 220 }, // state 189 [ldloca.s.normed]
+ { 1, 2, 181, 181, (SM_OPCODE) 97 /* callvirt */, 0 }, // state 190 [constrained -> callvirt]
+ { 1, 2, 3, 3, (SM_OPCODE)107 /* ldfld */, 432 }, // state 191 [ldarg.0 -> ldfld]
+ { 1, 2, 4, 4, (SM_OPCODE)107 /* ldfld */, 0 }, // state 192 [ldarg.1 -> ldfld]
+ { 1, 2, 5, 5, (SM_OPCODE)107 /* ldfld */, 0 }, // state 193 [ldarg.2 -> ldfld]
+ { 1, 2, 6, 6, (SM_OPCODE)107 /* ldfld */, 0 }, // state 194 [ldarg.3 -> ldfld]
+ { 1, 2, 16, 16, (SM_OPCODE)107 /* ldfld */, 414 }, // state 195 [ldarga.s -> ldfld]
+ { 1, 2, 19, 19, (SM_OPCODE)107 /* ldfld */, 0 }, // state 196 [ldloca.s -> ldfld]
+ { 1, 2, 188, 188, (SM_OPCODE)107 /* ldfld */, 0 }, // state 197 [ldarga.s.normed -> ldfld]
+ { 1, 2, 189, 189, (SM_OPCODE)107 /* ldfld */, 0 }, // state 198 [ldloca.s.normed -> ldfld]
+ { 1, 2, 11, 11, (SM_OPCODE) 5 /* ldloc.0 */, 0 }, // state 199 [stloc.0 -> ldloc.0]
+ { 1, 2, 12, 12, (SM_OPCODE) 6 /* ldloc.1 */, 0 }, // state 200 [stloc.1 -> ldloc.1]
+ { 1, 2, 13, 13, (SM_OPCODE) 7 /* ldloc.2 */, 0 }, // state 201 [stloc.2 -> ldloc.2]
+ { 1, 2, 14, 14, (SM_OPCODE) 8 /* ldloc.3 */, 0 }, // state 202 [stloc.3 -> ldloc.3]
+ { 1, 2, 35, 35, (SM_OPCODE) 74 /* add */, 0 }, // state 203 [ldc.r4 -> add]
+ { 1, 2, 35, 35, (SM_OPCODE) 75 /* sub */, 0 }, // state 204 [ldc.r4 -> sub]
+ { 1, 2, 35, 35, (SM_OPCODE) 76 /* mul */, 0 }, // state 205 [ldc.r4 -> mul]
+ { 1, 2, 35, 35, (SM_OPCODE) 77 /* div */, 0 }, // state 206 [ldc.r4 -> div]
+ { 1, 2, 36, 36, (SM_OPCODE) 74 /* add */, 0 }, // state 207 [ldc.r8 -> add]
+ { 1, 2, 36, 36, (SM_OPCODE) 75 /* sub */, 0 }, // state 208 [ldc.r8 -> sub]
+ { 1, 2, 36, 36, (SM_OPCODE) 76 /* mul */, 0 }, // state 209 [ldc.r8 -> mul]
+ { 1, 2, 36, 36, (SM_OPCODE) 77 /* div */, 0 }, // state 210 [ldc.r8 -> div]
+ { 1, 2, 95, 95, (SM_OPCODE) 74 /* add */, 0 }, // state 211 [conv.r4 -> add]
+ { 1, 2, 95, 95, (SM_OPCODE) 75 /* sub */, 0 }, // state 212 [conv.r4 -> sub]
+ { 1, 2, 95, 95, (SM_OPCODE) 76 /* mul */, 0 }, // state 213 [conv.r4 -> mul]
+ { 1, 2, 95, 95, (SM_OPCODE) 77 /* div */, 0 }, // state 214 [conv.r4 -> div]
+ { 1, 2, 96, 96, (SM_OPCODE) 76 /* mul */, 0 }, // state 215 [conv.r8 -> mul]
+ { 1, 2, 96, 96, (SM_OPCODE) 77 /* div */, 0 }, // state 216 [conv.r8 -> div]
+ { 0, 2, 3, 3, (SM_OPCODE) 21 /* ldc.i4.0 */, 228 }, // state 217 [ldarg.0 -> ldc.i4.0]
+ { 1, 3, 3, 217, (SM_OPCODE)109 /* stfld */, 0 }, // state 218 [ldarg.0 -> ldc.i4.0 -> stfld]
+ { 0, 2, 3, 3, (SM_OPCODE) 33 /* ldc.r4 */, 230 }, // state 219 [ldarg.0 -> ldc.r4]
+ { 1, 3, 3, 219, (SM_OPCODE)109 /* stfld */, 0 }, // state 220 [ldarg.0 -> ldc.r4 -> stfld]
+ { 0, 2, 3, 3, (SM_OPCODE) 34 /* ldc.r8 */, 232 }, // state 221 [ldarg.0 -> ldc.r8]
+ { 1, 3, 3, 221, (SM_OPCODE)109 /* stfld */, 0 }, // state 222 [ldarg.0 -> ldc.r8 -> stfld]
+ { 0, 2, 3, 3, (SM_OPCODE) 2 /* ldarg.1 */, 238 }, // state 223 [ldarg.0 -> ldarg.1]
+ { 0, 3, 3, 223, (SM_OPCODE)107 /* ldfld */, 236 }, // state 224 [ldarg.0 -> ldarg.1 -> ldfld]
+ { 1, 4, 3, 224, (SM_OPCODE)109 /* stfld */, 0 }, // state 225 [ldarg.0 -> ldarg.1 -> ldfld -> stfld]
+ { 1, 3, 3, 223, (SM_OPCODE)109 /* stfld */, 0 }, // state 226 [ldarg.0 -> ldarg.1 -> stfld]
+ { 0, 2, 3, 3, (SM_OPCODE) 3 /* ldarg.2 */, 240 }, // state 227 [ldarg.0 -> ldarg.2]
+ { 1, 3, 3, 227, (SM_OPCODE)109 /* stfld */, 0 }, // state 228 [ldarg.0 -> ldarg.2 -> stfld]
+ { 0, 2, 3, 3, (SM_OPCODE) 4 /* ldarg.3 */, 242 }, // state 229 [ldarg.0 -> ldarg.3]
+ { 1, 3, 3, 229, (SM_OPCODE)109 /* stfld */, 0 }, // state 230 [ldarg.0 -> ldarg.3 -> stfld]
+ { 0, 2, 3, 3, (SM_OPCODE) 36 /* dup */, 248 }, // state 231 [ldarg.0 -> dup]
+ { 0, 3, 3, 231, (SM_OPCODE)107 /* ldfld */, 460 }, // state 232 [ldarg.0 -> dup -> ldfld]
+ { 0, 4, 3, 232, (SM_OPCODE) 2 /* ldarg.1 */, 318 }, // state 233 [ldarg.0 -> dup -> ldfld -> ldarg.1]
+ { 0, 5, 3, 233, (SM_OPCODE) 74 /* add */, 256 }, // state 234 [ldarg.0 -> dup -> ldfld -> ldarg.1 -> add]
+ { 1, 6, 3, 234, (SM_OPCODE)109 /* stfld */, 0 }, // state 235 [ldarg.0 -> dup -> ldfld -> ldarg.1 -> add -> stfld]
+ { 0, 5, 3, 233, (SM_OPCODE) 75 /* sub */, 258 }, // state 236 [ldarg.0 -> dup -> ldfld -> ldarg.1 -> sub]
+ { 1, 6, 3, 236, (SM_OPCODE)109 /* stfld */, 0 }, // state 237 [ldarg.0 -> dup -> ldfld -> ldarg.1 -> sub -> stfld]
+ { 0, 5, 3, 233, (SM_OPCODE) 76 /* mul */, 260 }, // state 238 [ldarg.0 -> dup -> ldfld -> ldarg.1 -> mul]
+ { 1, 6, 3, 238, (SM_OPCODE)109 /* stfld */, 0 }, // state 239 [ldarg.0 -> dup -> ldfld -> ldarg.1 -> mul -> stfld]
+ { 0, 5, 3, 233, (SM_OPCODE) 77 /* div */, 262 }, // state 240 [ldarg.0 -> dup -> ldfld -> ldarg.1 -> div]
+ { 1, 6, 3, 240, (SM_OPCODE)109 /* stfld */, 0 }, // state 241 [ldarg.0 -> dup -> ldfld -> ldarg.1 -> div -> stfld]
+ { 0, 3, 191, 191, (SM_OPCODE) 2 /* ldarg.1 */, 268 }, // state 242 [ldarg.0 -> ldfld -> ldarg.1]
+ { 0, 4, 191, 242, (SM_OPCODE)107 /* ldfld */, 336 }, // state 243 [ldarg.0 -> ldfld -> ldarg.1 -> ldfld]
+ { 1, 5, 191, 243, (SM_OPCODE) 74 /* add */, 0 }, // state 244 [ldarg.0 -> ldfld -> ldarg.1 -> ldfld -> add]
+ { 1, 5, 191, 243, (SM_OPCODE) 75 /* sub */, 0 }, // state 245 [ldarg.0 -> ldfld -> ldarg.1 -> ldfld -> sub]
+ { 0, 3, 195, 195, (SM_OPCODE) 14 /* ldarga.s */, 274 }, // state 246 [ldarga.s -> ldfld -> ldarga.s]
+ { 0, 4, 195, 246, (SM_OPCODE)107 /* ldfld */, 342 }, // state 247 [ldarga.s -> ldfld -> ldarga.s -> ldfld]
+ { 1, 5, 195, 247, (SM_OPCODE) 74 /* add */, 0 }, // state 248 [ldarga.s -> ldfld -> ldarga.s -> ldfld -> add]
+ { 1, 5, 195, 247, (SM_OPCODE) 75 /* sub */, 0 }, // state 249 [ldarga.s -> ldfld -> ldarga.s -> ldfld -> sub]
+};
+// clang-format on
+
+static_assert_no_msg(NUM_SM_STATES == sizeof(g_SMStates) / sizeof(g_SMStates[0]));
+
+const SMState* gp_SMStates = g_SMStates;
+
+//
+// JumpTableCells in the state machine
+//
+// clang-format off
+const JumpTableCell g_SMJumpTableCells[] =
+{
+ // {src, dest }
+ { 1, 2 }, // cell# 0 : state 1 [start] --(0 noshow)--> state 2 [noshow]
+ { 1, 3 }, // cell# 1 : state 1 [start] --(1 ldarg.0)--> state 3 [ldarg.0]
+ { 1, 4 }, // cell# 2 : state 1 [start] --(2 ldarg.1)--> state 4 [ldarg.1]
+ { 1, 5 }, // cell# 3 : state 1 [start] --(3 ldarg.2)--> state 5 [ldarg.2]
+ { 1, 6 }, // cell# 4 : state 1 [start] --(4 ldarg.3)--> state 6 [ldarg.3]
+ { 1, 7 }, // cell# 5 : state 1 [start] --(5 ldloc.0)--> state 7 [ldloc.0]
+ { 1, 8 }, // cell# 6 : state 1 [start] --(6 ldloc.1)--> state 8 [ldloc.1]
+ { 1, 9 }, // cell# 7 : state 1 [start] --(7 ldloc.2)--> state 9 [ldloc.2]
+ { 1, 10 }, // cell# 8 : state 1 [start] --(8 ldloc.3)--> state 10 [ldloc.3]
+ { 1, 11 }, // cell# 9 : state 1 [start] --(9 stloc.0)--> state 11 [stloc.0]
+ { 1, 12 }, // cell# 10 : state 1 [start] --(10 stloc.1)--> state 12 [stloc.1]
+ { 1, 13 }, // cell# 11 : state 1 [start] --(11 stloc.2)--> state 13 [stloc.2]
+ { 1, 14 }, // cell# 12 : state 1 [start] --(12 stloc.3)--> state 14 [stloc.3]
+ { 1, 15 }, // cell# 13 : state 1 [start] --(13 ldarg.s)--> state 15 [ldarg.s]
+ { 1, 16 }, // cell# 14 : state 1 [start] --(14 ldarga.s)--> state 16 [ldarga.s]
+ { 1, 17 }, // cell# 15 : state 1 [start] --(15 starg.s)--> state 17 [starg.s]
+ { 1, 18 }, // cell# 16 : state 1 [start] --(16 ldloc.s)--> state 18 [ldloc.s]
+ { 1, 19 }, // cell# 17 : state 1 [start] --(17 ldloca.s)--> state 19 [ldloca.s]
+ { 1, 20 }, // cell# 18 : state 1 [start] --(18 stloc.s)--> state 20 [stloc.s]
+ { 1, 21 }, // cell# 19 : state 1 [start] --(19 ldnull)--> state 21 [ldnull]
+ { 1, 22 }, // cell# 20 : state 1 [start] --(20 ldc.i4.m1)--> state 22 [ldc.i4.m1]
+ { 1, 23 }, // cell# 21 : state 1 [start] --(21 ldc.i4.0)--> state 23 [ldc.i4.0]
+ { 1, 24 }, // cell# 22 : state 1 [start] --(22 ldc.i4.1)--> state 24 [ldc.i4.1]
+ { 1, 25 }, // cell# 23 : state 1 [start] --(23 ldc.i4.2)--> state 25 [ldc.i4.2]
+ { 1, 26 }, // cell# 24 : state 1 [start] --(24 ldc.i4.3)--> state 26 [ldc.i4.3]
+ { 1, 27 }, // cell# 25 : state 1 [start] --(25 ldc.i4.4)--> state 27 [ldc.i4.4]
+ { 1, 28 }, // cell# 26 : state 1 [start] --(26 ldc.i4.5)--> state 28 [ldc.i4.5]
+ { 1, 29 }, // cell# 27 : state 1 [start] --(27 ldc.i4.6)--> state 29 [ldc.i4.6]
+ { 1, 30 }, // cell# 28 : state 1 [start] --(28 ldc.i4.7)--> state 30 [ldc.i4.7]
+ { 1, 31 }, // cell# 29 : state 1 [start] --(29 ldc.i4.8)--> state 31 [ldc.i4.8]
+ { 1, 32 }, // cell# 30 : state 1 [start] --(30 ldc.i4.s)--> state 32 [ldc.i4.s]
+ { 1, 33 }, // cell# 31 : state 1 [start] --(31 ldc.i4)--> state 33 [ldc.i4]
+ { 1, 34 }, // cell# 32 : state 1 [start] --(32 ldc.i8)--> state 34 [ldc.i8]
+ { 1, 35 }, // cell# 33 : state 1 [start] --(33 ldc.r4)--> state 35 [ldc.r4]
+ { 1, 36 }, // cell# 34 : state 1 [start] --(34 ldc.r8)--> state 36 [ldc.r8]
+ { 1, 37 }, // cell# 35 : state 1 [start] --(35 unused)--> state 37 [unused]
+ { 1, 38 }, // cell# 36 : state 1 [start] --(36 dup)--> state 38 [dup]
+ { 1, 39 }, // cell# 37 : state 1 [start] --(37 pop)--> state 39 [pop]
+ { 1, 40 }, // cell# 38 : state 1 [start] --(38 call)--> state 40 [call]
+ { 1, 41 }, // cell# 39 : state 1 [start] --(39 calli)--> state 41 [calli]
+ { 1, 42 }, // cell# 40 : state 1 [start] --(40 ret)--> state 42 [ret]
+ { 1, 43 }, // cell# 41 : state 1 [start] --(41 br.s)--> state 43 [br.s]
+ { 1, 44 }, // cell# 42 : state 1 [start] --(42 brfalse.s)--> state 44 [brfalse.s]
+ { 1, 45 }, // cell# 43 : state 1 [start] --(43 brtrue.s)--> state 45 [brtrue.s]
+ { 1, 46 }, // cell# 44 : state 1 [start] --(44 beq.s)--> state 46 [beq.s]
+ { 1, 47 }, // cell# 45 : state 1 [start] --(45 bge.s)--> state 47 [bge.s]
+ { 1, 48 }, // cell# 46 : state 1 [start] --(46 bgt.s)--> state 48 [bgt.s]
+ { 1, 49 }, // cell# 47 : state 1 [start] --(47 ble.s)--> state 49 [ble.s]
+ { 1, 50 }, // cell# 48 : state 1 [start] --(48 blt.s)--> state 50 [blt.s]
+ { 1, 51 }, // cell# 49 : state 1 [start] --(49 bne.un.s)--> state 51 [bne.un.s]
+ { 1, 52 }, // cell# 50 : state 1 [start] --(50 bge.un.s)--> state 52 [bge.un.s]
+ { 1, 53 }, // cell# 51 : state 1 [start] --(51 bgt.un.s)--> state 53 [bgt.un.s]
+ { 1, 54 }, // cell# 52 : state 1 [start] --(52 ble.un.s)--> state 54 [ble.un.s]
+ { 1, 55 }, // cell# 53 : state 1 [start] --(53 blt.un.s)--> state 55 [blt.un.s]
+ { 1, 56 }, // cell# 54 : state 1 [start] --(54 long.branch)--> state 56 [long.branch]
+ { 1, 57 }, // cell# 55 : state 1 [start] --(55 switch)--> state 57 [switch]
+ { 1, 58 }, // cell# 56 : state 1 [start] --(56 ldind.i1)--> state 58 [ldind.i1]
+ { 1, 59 }, // cell# 57 : state 1 [start] --(57 ldind.u1)--> state 59 [ldind.u1]
+ { 1, 60 }, // cell# 58 : state 1 [start] --(58 ldind.i2)--> state 60 [ldind.i2]
+ { 1, 61 }, // cell# 59 : state 1 [start] --(59 ldind.u2)--> state 61 [ldind.u2]
+ { 1, 62 }, // cell# 60 : state 1 [start] --(60 ldind.i4)--> state 62 [ldind.i4]
+ { 1, 63 }, // cell# 61 : state 1 [start] --(61 ldind.u4)--> state 63 [ldind.u4]
+ { 1, 64 }, // cell# 62 : state 1 [start] --(62 ldind.i8)--> state 64 [ldind.i8]
+ { 1, 65 }, // cell# 63 : state 1 [start] --(63 ldind.i)--> state 65 [ldind.i]
+ { 1, 66 }, // cell# 64 : state 1 [start] --(64 ldind.r4)--> state 66 [ldind.r4]
+ { 1, 67 }, // cell# 65 : state 1 [start] --(65 ldind.r8)--> state 67 [ldind.r8]
+ { 1, 68 }, // cell# 66 : state 1 [start] --(66 ldind.ref)--> state 68 [ldind.ref]
+ { 1, 69 }, // cell# 67 : state 1 [start] --(67 stind.ref)--> state 69 [stind.ref]
+ { 1, 70 }, // cell# 68 : state 1 [start] --(68 stind.i1)--> state 70 [stind.i1]
+ { 1, 71 }, // cell# 69 : state 1 [start] --(69 stind.i2)--> state 71 [stind.i2]
+ { 1, 72 }, // cell# 70 : state 1 [start] --(70 stind.i4)--> state 72 [stind.i4]
+ { 1, 73 }, // cell# 71 : state 1 [start] --(71 stind.i8)--> state 73 [stind.i8]
+ { 1, 74 }, // cell# 72 : state 1 [start] --(72 stind.r4)--> state 74 [stind.r4]
+ { 1, 75 }, // cell# 73 : state 1 [start] --(73 stind.r8)--> state 75 [stind.r8]
+ { 1, 76 }, // cell# 74 : state 1 [start] --(74 add)--> state 76 [add]
+ { 1, 77 }, // cell# 75 : state 1 [start] --(75 sub)--> state 77 [sub]
+ { 1, 78 }, // cell# 76 : state 1 [start] --(76 mul)--> state 78 [mul]
+ { 1, 79 }, // cell# 77 : state 1 [start] --(77 div)--> state 79 [div]
+ { 1, 80 }, // cell# 78 : state 1 [start] --(78 div.un)--> state 80 [div.un]
+ { 1, 81 }, // cell# 79 : state 1 [start] --(79 rem)--> state 81 [rem]
+ { 1, 82 }, // cell# 80 : state 1 [start] --(80 rem.un)--> state 82 [rem.un]
+ { 1, 83 }, // cell# 81 : state 1 [start] --(81 and)--> state 83 [and]
+ { 1, 84 }, // cell# 82 : state 1 [start] --(82 or)--> state 84 [or]
+ { 1, 85 }, // cell# 83 : state 1 [start] --(83 xor)--> state 85 [xor]
+ { 1, 86 }, // cell# 84 : state 1 [start] --(84 shl)--> state 86 [shl]
+ { 1, 87 }, // cell# 85 : state 1 [start] --(85 shr)--> state 87 [shr]
+ { 1, 88 }, // cell# 86 : state 1 [start] --(86 shr.un)--> state 88 [shr.un]
+ { 1, 89 }, // cell# 87 : state 1 [start] --(87 neg)--> state 89 [neg]
+ { 1, 90 }, // cell# 88 : state 1 [start] --(88 not)--> state 90 [not]
+ { 1, 91 }, // cell# 89 : state 1 [start] --(89 conv.i1)--> state 91 [conv.i1]
+ { 1, 92 }, // cell# 90 : state 1 [start] --(90 conv.i2)--> state 92 [conv.i2]
+ { 1, 93 }, // cell# 91 : state 1 [start] --(91 conv.i4)--> state 93 [conv.i4]
+ { 1, 94 }, // cell# 92 : state 1 [start] --(92 conv.i8)--> state 94 [conv.i8]
+ { 1, 95 }, // cell# 93 : state 1 [start] --(93 conv.r4)--> state 95 [conv.r4]
+ { 1, 96 }, // cell# 94 : state 1 [start] --(94 conv.r8)--> state 96 [conv.r8]
+ { 1, 97 }, // cell# 95 : state 1 [start] --(95 conv.u4)--> state 97 [conv.u4]
+ { 1, 98 }, // cell# 96 : state 1 [start] --(96 conv.u8)--> state 98 [conv.u8]
+ { 1, 99 }, // cell# 97 : state 1 [start] --(97 callvirt)--> state 99 [callvirt]
+ { 1, 100 }, // cell# 98 : state 1 [start] --(98 cpobj)--> state 100 [cpobj]
+ { 1, 101 }, // cell# 99 : state 1 [start] --(99 ldobj)--> state 101 [ldobj]
+ { 1, 102 }, // cell# 100 : state 1 [start] --(100 ldstr)--> state 102 [ldstr]
+ { 1, 103 }, // cell# 101 : state 1 [start] --(101 newobj)--> state 103 [newobj]
+ { 1, 104 }, // cell# 102 : state 1 [start] --(102 castclass)--> state 104 [castclass]
+ { 1, 105 }, // cell# 103 : state 1 [start] --(103 isinst)--> state 105 [isinst]
+ { 1, 106 }, // cell# 104 : state 1 [start] --(104 conv.r.un)--> state 106 [conv.r.un]
+ { 1, 107 }, // cell# 105 : state 1 [start] --(105 unbox)--> state 107 [unbox]
+ { 1, 108 }, // cell# 106 : state 1 [start] --(106 throw)--> state 108 [throw]
+ { 1, 109 }, // cell# 107 : state 1 [start] --(107 ldfld)--> state 109 [ldfld]
+ { 1, 110 }, // cell# 108 : state 1 [start] --(108 ldflda)--> state 110 [ldflda]
+ { 1, 111 }, // cell# 109 : state 1 [start] --(109 stfld)--> state 111 [stfld]
+ { 1, 112 }, // cell# 110 : state 1 [start] --(110 ldsfld)--> state 112 [ldsfld]
+ { 1, 113 }, // cell# 111 : state 1 [start] --(111 ldsflda)--> state 113 [ldsflda]
+ { 1, 114 }, // cell# 112 : state 1 [start] --(112 stsfld)--> state 114 [stsfld]
+ { 1, 115 }, // cell# 113 : state 1 [start] --(113 stobj)--> state 115 [stobj]
+ { 1, 116 }, // cell# 114 : state 1 [start] --(114 ovf.notype.un)--> state 116 [ovf.notype.un]
+ { 1, 117 }, // cell# 115 : state 1 [start] --(115 box)--> state 117 [box]
+ { 1, 118 }, // cell# 116 : state 1 [start] --(116 newarr)--> state 118 [newarr]
+ { 1, 119 }, // cell# 117 : state 1 [start] --(117 ldlen)--> state 119 [ldlen]
+ { 1, 120 }, // cell# 118 : state 1 [start] --(118 ldelema)--> state 120 [ldelema]
+ { 1, 121 }, // cell# 119 : state 1 [start] --(119 ldelem.i1)--> state 121 [ldelem.i1]
+ { 1, 122 }, // cell# 120 : state 1 [start] --(120 ldelem.u1)--> state 122 [ldelem.u1]
+ { 1, 123 }, // cell# 121 : state 1 [start] --(121 ldelem.i2)--> state 123 [ldelem.i2]
+ { 1, 124 }, // cell# 122 : state 1 [start] --(122 ldelem.u2)--> state 124 [ldelem.u2]
+ { 1, 125 }, // cell# 123 : state 1 [start] --(123 ldelem.i4)--> state 125 [ldelem.i4]
+ { 1, 126 }, // cell# 124 : state 1 [start] --(124 ldelem.u4)--> state 126 [ldelem.u4]
+ { 1, 127 }, // cell# 125 : state 1 [start] --(125 ldelem.i8)--> state 127 [ldelem.i8]
+ { 1, 128 }, // cell# 126 : state 1 [start] --(126 ldelem.i)--> state 128 [ldelem.i]
+ { 1, 129 }, // cell# 127 : state 1 [start] --(127 ldelem.r4)--> state 129 [ldelem.r4]
+ { 1, 130 }, // cell# 128 : state 1 [start] --(128 ldelem.r8)--> state 130 [ldelem.r8]
+ { 1, 131 }, // cell# 129 : state 1 [start] --(129 ldelem.ref)--> state 131 [ldelem.ref]
+ { 1, 132 }, // cell# 130 : state 1 [start] --(130 stelem.i)--> state 132 [stelem.i]
+ { 1, 133 }, // cell# 131 : state 1 [start] --(131 stelem.i1)--> state 133 [stelem.i1]
+ { 1, 134 }, // cell# 132 : state 1 [start] --(132 stelem.i2)--> state 134 [stelem.i2]
+ { 1, 135 }, // cell# 133 : state 1 [start] --(133 stelem.i4)--> state 135 [stelem.i4]
+ { 1, 136 }, // cell# 134 : state 1 [start] --(134 stelem.i8)--> state 136 [stelem.i8]
+ { 1, 137 }, // cell# 135 : state 1 [start] --(135 stelem.r4)--> state 137 [stelem.r4]
+ { 1, 138 }, // cell# 136 : state 1 [start] --(136 stelem.r8)--> state 138 [stelem.r8]
+ { 1, 139 }, // cell# 137 : state 1 [start] --(137 stelem.ref)--> state 139 [stelem.ref]
+ { 1, 140 }, // cell# 138 : state 1 [start] --(138 ldelem)--> state 140 [ldelem]
+ { 1, 141 }, // cell# 139 : state 1 [start] --(139 stelem)--> state 141 [stelem]
+ { 1, 142 }, // cell# 140 : state 1 [start] --(140 unbox.any)--> state 142 [unbox.any]
+ { 1, 143 }, // cell# 141 : state 1 [start] --(141 conv.ovf.i1)--> state 143 [conv.ovf.i1]
+ { 1, 144 }, // cell# 142 : state 1 [start] --(142 conv.ovf.u1)--> state 144 [conv.ovf.u1]
+ { 1, 145 }, // cell# 143 : state 1 [start] --(143 conv.ovf.i2)--> state 145 [conv.ovf.i2]
+ { 1, 146 }, // cell# 144 : state 1 [start] --(144 conv.ovf.u2)--> state 146 [conv.ovf.u2]
+ { 1, 147 }, // cell# 145 : state 1 [start] --(145 conv.ovf.i4)--> state 147 [conv.ovf.i4]
+ { 1, 148 }, // cell# 146 : state 1 [start] --(146 conv.ovf.u4)--> state 148 [conv.ovf.u4]
+ { 1, 149 }, // cell# 147 : state 1 [start] --(147 conv.ovf.i8)--> state 149 [conv.ovf.i8]
+ { 1, 150 }, // cell# 148 : state 1 [start] --(148 conv.ovf.u8)--> state 150 [conv.ovf.u8]
+ { 1, 151 }, // cell# 149 : state 1 [start] --(149 refanyval)--> state 151 [refanyval]
+ { 1, 152 }, // cell# 150 : state 1 [start] --(150 ckfinite)--> state 152 [ckfinite]
+ { 1, 153 }, // cell# 151 : state 1 [start] --(151 mkrefany)--> state 153 [mkrefany]
+ { 1, 154 }, // cell# 152 : state 1 [start] --(152 ldtoken)--> state 154 [ldtoken]
+ { 1, 155 }, // cell# 153 : state 1 [start] --(153 conv.u2)--> state 155 [conv.u2]
+ { 1, 156 }, // cell# 154 : state 1 [start] --(154 conv.u1)--> state 156 [conv.u1]
+ { 1, 157 }, // cell# 155 : state 1 [start] --(155 conv.i)--> state 157 [conv.i]
+ { 1, 158 }, // cell# 156 : state 1 [start] --(156 conv.ovf.i)--> state 158 [conv.ovf.i]
+ { 1, 159 }, // cell# 157 : state 1 [start] --(157 conv.ovf.u)--> state 159 [conv.ovf.u]
+ { 1, 160 }, // cell# 158 : state 1 [start] --(158 add.ovf)--> state 160 [add.ovf]
+ { 1, 161 }, // cell# 159 : state 1 [start] --(159 mul.ovf)--> state 161 [mul.ovf]
+ { 1, 162 }, // cell# 160 : state 1 [start] --(160 sub.ovf)--> state 162 [sub.ovf]
+ { 1, 163 }, // cell# 161 : state 1 [start] --(161 leave.s)--> state 163 [leave.s]
+ { 1, 164 }, // cell# 162 : state 1 [start] --(162 stind.i)--> state 164 [stind.i]
+ { 1, 165 }, // cell# 163 : state 1 [start] --(163 conv.u)--> state 165 [conv.u]
+ { 1, 166 }, // cell# 164 : state 1 [start] --(164 prefix.n)--> state 166 [prefix.n]
+ { 1, 167 }, // cell# 165 : state 1 [start] --(165 arglist)--> state 167 [arglist]
+ { 1, 168 }, // cell# 166 : state 1 [start] --(166 ceq)--> state 168 [ceq]
+ { 1, 169 }, // cell# 167 : state 1 [start] --(167 cgt)--> state 169 [cgt]
+ { 1, 170 }, // cell# 168 : state 1 [start] --(168 cgt.un)--> state 170 [cgt.un]
+ { 1, 171 }, // cell# 169 : state 1 [start] --(169 clt)--> state 171 [clt]
+ { 1, 172 }, // cell# 170 : state 1 [start] --(170 clt.un)--> state 172 [clt.un]
+ { 1, 173 }, // cell# 171 : state 1 [start] --(171 ldftn)--> state 173 [ldftn]
+ { 1, 174 }, // cell# 172 : state 1 [start] --(172 ldvirtftn)--> state 174 [ldvirtftn]
+ { 1, 175 }, // cell# 173 : state 1 [start] --(173 long.loc.arg)--> state 175 [long.loc.arg]
+ { 1, 176 }, // cell# 174 : state 1 [start] --(174 localloc)--> state 176 [localloc]
+ { 1, 177 }, // cell# 175 : state 1 [start] --(175 unaligned)--> state 177 [unaligned]
+ { 1, 178 }, // cell# 176 : state 1 [start] --(176 volatile)--> state 178 [volatile]
+ { 1, 179 }, // cell# 177 : state 1 [start] --(177 tailcall)--> state 179 [tailcall]
+ { 1, 180 }, // cell# 178 : state 1 [start] --(178 initobj)--> state 180 [initobj]
+ { 1, 181 }, // cell# 179 : state 1 [start] --(179 constrained)--> state 181 [constrained]
+ { 1, 182 }, // cell# 180 : state 1 [start] --(180 cpblk)--> state 182 [cpblk]
+ { 1, 183 }, // cell# 181 : state 1 [start] --(181 initblk)--> state 183 [initblk]
+ { 1, 184 }, // cell# 182 : state 1 [start] --(182 rethrow)--> state 184 [rethrow]
+ { 1, 185 }, // cell# 183 : state 1 [start] --(183 sizeof)--> state 185 [sizeof]
+ { 1, 186 }, // cell# 184 : state 1 [start] --(184 refanytype)--> state 186 [refanytype]
+ { 1, 187 }, // cell# 185 : state 1 [start] --(185 readonly)--> state 187 [readonly]
+ { 1, 188 }, // cell# 186 : state 1 [start] --(186 ldarga.s.normed)--> state 188 [ldarga.s.normed]
+ { 1, 189 }, // cell# 187 : state 1 [start] --(187 ldloca.s.normed)--> state 189 [ldloca.s.normed]
+ { 3, 223 }, // cell# 188 : state 3 [ldarg.0] --(2 ldarg.1)--> state 223 [ldarg.0 -> ldarg.1]
+ { 3, 227 }, // cell# 189 : state 3 [ldarg.0] --(3 ldarg.2)--> state 227 [ldarg.0 -> ldarg.2]
+ { 3, 229 }, // cell# 190 : state 3 [ldarg.0] --(4 ldarg.3)--> state 229 [ldarg.0 -> ldarg.3]
+ { 4, 192 }, // cell# 191 : state 4 [ldarg.1] --(107 ldfld)--> state 192 [ldarg.1 -> ldfld]
+ { 5, 193 }, // cell# 192 : state 5 [ldarg.2] --(107 ldfld)--> state 193 [ldarg.2 -> ldfld]
+ { 6, 194 }, // cell# 193 : state 6 [ldarg.3] --(107 ldfld)--> state 194 [ldarg.3 -> ldfld]
+ { 11, 199 }, // cell# 194 : state 11 [stloc.0] --(5 ldloc.0)--> state 199 [stloc.0 -> ldloc.0]
+ { 12, 200 }, // cell# 195 : state 12 [stloc.1] --(6 ldloc.1)--> state 200 [stloc.1 -> ldloc.1]
+ { 13, 201 }, // cell# 196 : state 13 [stloc.2] --(7 ldloc.2)--> state 201 [stloc.2 -> ldloc.2]
+ { 14, 202 }, // cell# 197 : state 14 [stloc.3] --(8 ldloc.3)--> state 202 [stloc.3 -> ldloc.3]
+ { 16, 195 }, // cell# 198 : state 16 [ldarga.s] --(107 ldfld)--> state 195 [ldarga.s -> ldfld]
+ { 19, 196 }, // cell# 199 : state 19 [ldloca.s] --(107 ldfld)--> state 196 [ldloca.s -> ldfld]
+ { 35, 203 }, // cell# 200 : state 35 [ldc.r4] --(74 add)--> state 203 [ldc.r4 -> add]
+ { 35, 204 }, // cell# 201 : state 35 [ldc.r4] --(75 sub)--> state 204 [ldc.r4 -> sub]
+ { 35, 205 }, // cell# 202 : state 35 [ldc.r4] --(76 mul)--> state 205 [ldc.r4 -> mul]
+ { 35, 206 }, // cell# 203 : state 35 [ldc.r4] --(77 div)--> state 206 [ldc.r4 -> div]
+ { 96, 215 }, // cell# 204 : state 96 [conv.r8] --(76 mul)--> state 215 [conv.r8 -> mul]
+ { 96, 216 }, // cell# 205 : state 96 [conv.r8] --(77 div)--> state 216 [conv.r8 -> div]
+ {181, 190 }, // cell# 206 : state 181 [constrained] --(97 callvirt)--> state 190 [constrained -> callvirt]
+ { 3, 217 }, // cell# 207 : state 3 [ldarg.0] --(21 ldc.i4.0)--> state 217 [ldarg.0 -> ldc.i4.0]
+ { 36, 207 }, // cell# 208 : state 36 [ldc.r8] --(74 add)--> state 207 [ldc.r8 -> add]
+ { 36, 208 }, // cell# 209 : state 36 [ldc.r8] --(75 sub)--> state 208 [ldc.r8 -> sub]
+ { 36, 209 }, // cell# 210 : state 36 [ldc.r8] --(76 mul)--> state 209 [ldc.r8 -> mul]
+ { 36, 210 }, // cell# 211 : state 36 [ldc.r8] --(77 div)--> state 210 [ldc.r8 -> div]
+ { 95, 211 }, // cell# 212 : state 95 [conv.r4] --(74 add)--> state 211 [conv.r4 -> add]
+ { 95, 212 }, // cell# 213 : state 95 [conv.r4] --(75 sub)--> state 212 [conv.r4 -> sub]
+ { 95, 213 }, // cell# 214 : state 95 [conv.r4] --(76 mul)--> state 213 [conv.r4 -> mul]
+ { 95, 214 }, // cell# 215 : state 95 [conv.r4] --(77 div)--> state 214 [conv.r4 -> div]
+ {188, 197 }, // cell# 216 : state 188 [ldarga.s.normed] --(107 ldfld)--> state 197 [ldarga.s.normed -> ldfld]
+ {189, 198 }, // cell# 217 : state 189 [ldloca.s.normed] --(107 ldfld)--> state 198 [ldloca.s.normed -> ldfld]
+ {191, 242 }, // cell# 218 : state 191 [ldarg.0 -> ldfld] --(2 ldarg.1)--> state 242 [ldarg.0 -> ldfld -> ldarg.1]
+ { 3, 219 }, // cell# 219 : state 3 [ldarg.0] --(33 ldc.r4)--> state 219 [ldarg.0 -> ldc.r4]
+ { 3, 221 }, // cell# 220 : state 3 [ldarg.0] --(34 ldc.r8)--> state 221 [ldarg.0 -> ldc.r8]
+ {195, 246 }, // cell# 221 : state 195 [ldarga.s -> ldfld] --(14 ldarga.s)--> state 246 [ldarga.s -> ldfld -> ldarga.s]
+ { 3, 231 }, // cell# 222 : state 3 [ldarg.0] --(36 dup)--> state 231 [ldarg.0 -> dup]
+ {217, 218 }, // cell# 223 : state 217 [ldarg.0 -> ldc.i4.0] --(109 stfld)--> state 218 [ldarg.0 -> ldc.i4.0 -> stfld]
+ {219, 220 }, // cell# 224 : state 219 [ldarg.0 -> ldc.r4] --(109 stfld)--> state 220 [ldarg.0 -> ldc.r4 -> stfld]
+ {221, 222 }, // cell# 225 : state 221 [ldarg.0 -> ldc.r8] --(109 stfld)--> state 222 [ldarg.0 -> ldc.r8 -> stfld]
+ {223, 224 }, // cell# 226 : state 223 [ldarg.0 -> ldarg.1] --(107 ldfld)--> state 224 [ldarg.0 -> ldarg.1 -> ldfld]
+ {224, 225 }, // cell# 227 : state 224 [ldarg.0 -> ldarg.1 -> ldfld] --(109 stfld)--> state 225 [ldarg.0 -> ldarg.1 -> ldfld -> stfld]
+ {223, 226 }, // cell# 228 : state 223 [ldarg.0 -> ldarg.1] --(109 stfld)--> state 226 [ldarg.0 -> ldarg.1 -> stfld]
+ {227, 228 }, // cell# 229 : state 227 [ldarg.0 -> ldarg.2] --(109 stfld)--> state 228 [ldarg.0 -> ldarg.2 -> stfld]
+ {229, 230 }, // cell# 230 : state 229 [ldarg.0 -> ldarg.3] --(109 stfld)--> state 230 [ldarg.0 -> ldarg.3 -> stfld]
+ {231, 232 }, // cell# 231 : state 231 [ldarg.0 -> dup] --(107 ldfld)--> state 232 [ldarg.0 -> dup -> ldfld]
+ {232, 233 }, // cell# 232 : state 232 [ldarg.0 -> dup -> ldfld] --(2 ldarg.1)--> state 233 [ldarg.0 -> dup -> ldfld -> ldarg.1]
+ {233, 234 }, // cell# 233 : state 233 [ldarg.0 -> dup -> ldfld -> ldarg.1] --(74 add)--> state 234 [ldarg.0 -> dup -> ldfld -> ldarg.1 -> add]
+ {233, 236 }, // cell# 234 : state 233 [ldarg.0 -> dup -> ldfld -> ldarg.1] --(75 sub)--> state 236 [ldarg.0 -> dup -> ldfld -> ldarg.1 -> sub]
+ {233, 238 }, // cell# 235 : state 233 [ldarg.0 -> dup -> ldfld -> ldarg.1] --(76 mul)--> state 238 [ldarg.0 -> dup -> ldfld -> ldarg.1 -> mul]
+ {233, 240 }, // cell# 236 : state 233 [ldarg.0 -> dup -> ldfld -> ldarg.1] --(77 div)--> state 240 [ldarg.0 -> dup -> ldfld -> ldarg.1 -> div]
+ {234, 235 }, // cell# 237 : state 234 [ldarg.0 -> dup -> ldfld -> ldarg.1 -> add] --(109 stfld)--> state 235 [ldarg.0 -> dup -> ldfld -> ldarg.1 -> add -> stfld]
+ {236, 237 }, // cell# 238 : state 236 [ldarg.0 -> dup -> ldfld -> ldarg.1 -> sub] --(109 stfld)--> state 237 [ldarg.0 -> dup -> ldfld -> ldarg.1 -> sub -> stfld]
+ {238, 239 }, // cell# 239 : state 238 [ldarg.0 -> dup -> ldfld -> ldarg.1 -> mul] --(109 stfld)--> state 239 [ldarg.0 -> dup -> ldfld -> ldarg.1 -> mul -> stfld]
+ {240, 241 }, // cell# 240 : state 240 [ldarg.0 -> dup -> ldfld -> ldarg.1 -> div] --(109 stfld)--> state 241 [ldarg.0 -> dup -> ldfld -> ldarg.1 -> div -> stfld]
+ {242, 243 }, // cell# 241 : state 242 [ldarg.0 -> ldfld -> ldarg.1] --(107 ldfld)--> state 243 [ldarg.0 -> ldfld -> ldarg.1 -> ldfld]
+ {243, 244 }, // cell# 242 : state 243 [ldarg.0 -> ldfld -> ldarg.1 -> ldfld] --(74 add)--> state 244 [ldarg.0 -> ldfld -> ldarg.1 -> ldfld -> add]
+ {243, 245 }, // cell# 243 : state 243 [ldarg.0 -> ldfld -> ldarg.1 -> ldfld] --(75 sub)--> state 245 [ldarg.0 -> ldfld -> ldarg.1 -> ldfld -> sub]
+ {246, 247 }, // cell# 244 : state 246 [ldarga.s -> ldfld -> ldarga.s] --(107 ldfld)--> state 247 [ldarga.s -> ldfld -> ldarga.s -> ldfld]
+ {247, 248 }, // cell# 245 : state 247 [ldarga.s -> ldfld -> ldarga.s -> ldfld] --(74 add)--> state 248 [ldarga.s -> ldfld -> ldarga.s -> ldfld -> add]
+ {247, 249 }, // cell# 246 : state 247 [ldarga.s -> ldfld -> ldarga.s -> ldfld] --(75 sub)--> state 249 [ldarga.s -> ldfld -> ldarga.s -> ldfld -> sub]
+ { 0, 0 }, // cell# 247
+ { 0, 0 }, // cell# 248
+ { 0, 0 }, // cell# 249
+ { 0, 0 }, // cell# 250
+ { 0, 0 }, // cell# 251
+ { 0, 0 }, // cell# 252
+ { 0, 0 }, // cell# 253
+ { 0, 0 }, // cell# 254
+ { 0, 0 }, // cell# 255
+ { 0, 0 }, // cell# 256
+ { 0, 0 }, // cell# 257
+ { 0, 0 }, // cell# 258
+ { 0, 0 }, // cell# 259
+ { 0, 0 }, // cell# 260
+ { 0, 0 }, // cell# 261
+ { 0, 0 }, // cell# 262
+ { 0, 0 }, // cell# 263
+ { 0, 0 }, // cell# 264
+ { 0, 0 }, // cell# 265
+ { 0, 0 }, // cell# 266
+ { 0, 0 }, // cell# 267
+ { 0, 0 }, // cell# 268
+ { 0, 0 }, // cell# 269
+ { 0, 0 }, // cell# 270
+ { 0, 0 }, // cell# 271
+ { 0, 0 }, // cell# 272
+ { 0, 0 }, // cell# 273
+ { 0, 0 }, // cell# 274
+ { 0, 0 }, // cell# 275
+ { 0, 0 }, // cell# 276
+ { 0, 0 }, // cell# 277
+ { 0, 0 }, // cell# 278
+ { 0, 0 }, // cell# 279
+ { 0, 0 }, // cell# 280
+ { 0, 0 }, // cell# 281
+ { 0, 0 }, // cell# 282
+ { 0, 0 }, // cell# 283
+ { 0, 0 }, // cell# 284
+ { 0, 0 }, // cell# 285
+ { 0, 0 }, // cell# 286
+ { 0, 0 }, // cell# 287
+ { 0, 0 }, // cell# 288
+ { 0, 0 }, // cell# 289
+ { 0, 0 }, // cell# 290
+ { 0, 0 }, // cell# 291
+ { 0, 0 }, // cell# 292
+ { 3, 191 }, // cell# 293 : state 3 [ldarg.0] --(107 ldfld)--> state 191 [ldarg.0 -> ldfld]
+ { 0, 0 }, // cell# 294
+ { 0, 0 }, // cell# 295
+ { 0, 0 }, // cell# 296
+ { 0, 0 }, // cell# 297
+ { 0, 0 }, // cell# 298
+ { 0, 0 }, // cell# 299
+ { 0, 0 }, // cell# 300
+ { 0, 0 }, // cell# 301
+ { 0, 0 }, // cell# 302
+ { 0, 0 }, // cell# 303
+ { 0, 0 }, // cell# 304
+ { 0, 0 }, // cell# 305
+ { 0, 0 }, // cell# 306
+ { 0, 0 }, // cell# 307
+ { 0, 0 }, // cell# 308
+ { 0, 0 }, // cell# 309
+ { 0, 0 }, // cell# 310
+ { 0, 0 }, // cell# 311
+ { 0, 0 }, // cell# 312
+ { 0, 0 }, // cell# 313
+ { 0, 0 }, // cell# 314
+ { 0, 0 }, // cell# 315
+ { 0, 0 }, // cell# 316
+ { 0, 0 }, // cell# 317
+ { 0, 0 }, // cell# 318
+ { 0, 0 }, // cell# 319
+ { 0, 0 }, // cell# 320
+ { 0, 0 }, // cell# 321
+ { 0, 0 }, // cell# 322
+ { 0, 0 }, // cell# 323
+ { 0, 0 }, // cell# 324
+ { 0, 0 }, // cell# 325
+ { 0, 0 }, // cell# 326
+ { 0, 0 }, // cell# 327
+ { 0, 0 }, // cell# 328
+ { 0, 0 }, // cell# 329
+ { 0, 0 }, // cell# 330
+ { 0, 0 }, // cell# 331
+ { 0, 0 }, // cell# 332
+ { 0, 0 }, // cell# 333
+ { 0, 0 }, // cell# 334
+ { 0, 0 }, // cell# 335
+ { 0, 0 }, // cell# 336
+ { 0, 0 }, // cell# 337
+ { 0, 0 }, // cell# 338
+ { 0, 0 }, // cell# 339
+ { 0, 0 }, // cell# 340
+ { 0, 0 }, // cell# 341
+ { 0, 0 }, // cell# 342
+ { 0, 0 }, // cell# 343
+ { 0, 0 }, // cell# 344
+ { 0, 0 }, // cell# 345
+ { 0, 0 }, // cell# 346
+ { 0, 0 }, // cell# 347
+ { 0, 0 }, // cell# 348
+ { 0, 0 }, // cell# 349
+ { 0, 0 }, // cell# 350
+ { 0, 0 }, // cell# 351
+ { 0, 0 }, // cell# 352
+ { 0, 0 }, // cell# 353
+ { 0, 0 }, // cell# 354
+ { 0, 0 }, // cell# 355
+ { 0, 0 }, // cell# 356
+ { 0, 0 }, // cell# 357
+ { 0, 0 }, // cell# 358
+ { 0, 0 }, // cell# 359
+ { 0, 0 }, // cell# 360
+ { 0, 0 }, // cell# 361
+ { 0, 0 }, // cell# 362
+ { 0, 0 }, // cell# 363
+ { 0, 0 }, // cell# 364
+ { 0, 0 }, // cell# 365
+ { 0, 0 }, // cell# 366
+ { 0, 0 }, // cell# 367
+ { 0, 0 }, // cell# 368
+ { 0, 0 }, // cell# 369
+ { 0, 0 }, // cell# 370
+ { 0, 0 }, // cell# 371
+ { 0, 0 }, // cell# 372
+ { 0, 0 }, // cell# 373
+ { 0, 0 }, // cell# 374
+ { 0, 0 }, // cell# 375
+ { 0, 0 }, // cell# 376
+ { 0, 0 }, // cell# 377
+ { 0, 0 }, // cell# 378
+ { 0, 0 }, // cell# 379
+ { 0, 0 }, // cell# 380
+ { 0, 0 }, // cell# 381
+ { 0, 0 }, // cell# 382
+ { 0, 0 }, // cell# 383
+ { 0, 0 }, // cell# 384
+ { 0, 0 }, // cell# 385
+ { 0, 0 }, // cell# 386
+ { 0, 0 }, // cell# 387
+ { 0, 0 }, // cell# 388
+ { 0, 0 }, // cell# 389
+ { 0, 0 }, // cell# 390
+ { 0, 0 }, // cell# 391
+ { 0, 0 }, // cell# 392
+ { 0, 0 }, // cell# 393
+ { 0, 0 }, // cell# 394
+ { 0, 0 }, // cell# 395
+ { 0, 0 }, // cell# 396
+ { 0, 0 }, // cell# 397
+ { 0, 0 }, // cell# 398
+ { 0, 0 }, // cell# 399
+ { 0, 0 }, // cell# 400
+ { 0, 0 }, // cell# 401
+ { 0, 0 }, // cell# 402
+ { 0, 0 }, // cell# 403
+ { 0, 0 }, // cell# 404
+ { 0, 0 }, // cell# 405
+ { 0, 0 }, // cell# 406
+ { 0, 0 }, // cell# 407
+ { 0, 0 }, // cell# 408
+ { 0, 0 }, // cell# 409
+ { 0, 0 }, // cell# 410
+ { 0, 0 }, // cell# 411
+ { 0, 0 }, // cell# 412
+ { 0, 0 }, // cell# 413
+ { 0, 0 }, // cell# 414
+ { 0, 0 }, // cell# 415
+ { 0, 0 }, // cell# 416
+ { 0, 0 }, // cell# 417
+};
+// clang-format on
+
+const JumpTableCell* gp_SMJumpTableCells = g_SMJumpTableCells;
diff --git a/src/jit/smopcode.def b/src/jit/smopcode.def
new file mode 100644
index 0000000000..aa918601c2
--- /dev/null
+++ b/src/jit/smopcode.def
@@ -0,0 +1,205 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*******************************************************************************************
+ ** **
+ ** Auto-generated file. Do NOT modify! **
+ ** **
+ ** smopcode.def - Opcodes used in the state machine in JIT. **
+ ** **
+ ** To generate this file, run "makeSmOpcodeDef.pl > smopcode.def" **
+ ** **
+ *******************************************************************************************/
+
+//
+// SM opcode name SM opcode string
+// -------------------------------------------------------------------------------------------
+SMOPDEF(SM_NOSHOW, "noshow") // 0
+SMOPDEF(SM_LDARG_0, "ldarg.0") // 1
+SMOPDEF(SM_LDARG_1, "ldarg.1") // 2
+SMOPDEF(SM_LDARG_2, "ldarg.2") // 3
+SMOPDEF(SM_LDARG_3, "ldarg.3") // 4
+SMOPDEF(SM_LDLOC_0, "ldloc.0") // 5
+SMOPDEF(SM_LDLOC_1, "ldloc.1") // 6
+SMOPDEF(SM_LDLOC_2, "ldloc.2") // 7
+SMOPDEF(SM_LDLOC_3, "ldloc.3") // 8
+SMOPDEF(SM_STLOC_0, "stloc.0") // 9
+SMOPDEF(SM_STLOC_1, "stloc.1") // 10
+SMOPDEF(SM_STLOC_2, "stloc.2") // 11
+SMOPDEF(SM_STLOC_3, "stloc.3") // 12
+SMOPDEF(SM_LDARG_S, "ldarg.s") // 13
+SMOPDEF(SM_LDARGA_S, "ldarga.s") // 14
+SMOPDEF(SM_STARG_S, "starg.s") // 15
+SMOPDEF(SM_LDLOC_S, "ldloc.s") // 16
+SMOPDEF(SM_LDLOCA_S, "ldloca.s") // 17
+SMOPDEF(SM_STLOC_S, "stloc.s") // 18
+SMOPDEF(SM_LDNULL, "ldnull") // 19
+SMOPDEF(SM_LDC_I4_M1, "ldc.i4.m1") // 20
+SMOPDEF(SM_LDC_I4_0, "ldc.i4.0") // 21
+SMOPDEF(SM_LDC_I4_1, "ldc.i4.1") // 22
+SMOPDEF(SM_LDC_I4_2, "ldc.i4.2") // 23
+SMOPDEF(SM_LDC_I4_3, "ldc.i4.3") // 24
+SMOPDEF(SM_LDC_I4_4, "ldc.i4.4") // 25
+SMOPDEF(SM_LDC_I4_5, "ldc.i4.5") // 26
+SMOPDEF(SM_LDC_I4_6, "ldc.i4.6") // 27
+SMOPDEF(SM_LDC_I4_7, "ldc.i4.7") // 28
+SMOPDEF(SM_LDC_I4_8, "ldc.i4.8") // 29
+SMOPDEF(SM_LDC_I4_S, "ldc.i4.s") // 30
+SMOPDEF(SM_LDC_I4, "ldc.i4") // 31
+SMOPDEF(SM_LDC_I8, "ldc.i8") // 32
+SMOPDEF(SM_LDC_R4, "ldc.r4") // 33
+SMOPDEF(SM_LDC_R8, "ldc.r8") // 34
+SMOPDEF(SM_UNUSED, "unused") // 35
+SMOPDEF(SM_DUP, "dup") // 36
+SMOPDEF(SM_POP, "pop") // 37
+SMOPDEF(SM_CALL, "call") // 38
+SMOPDEF(SM_CALLI, "calli") // 39
+SMOPDEF(SM_RET, "ret") // 40
+SMOPDEF(SM_BR_S, "br.s") // 41
+SMOPDEF(SM_BRFALSE_S, "brfalse.s") // 42
+SMOPDEF(SM_BRTRUE_S, "brtrue.s") // 43
+SMOPDEF(SM_BEQ_S, "beq.s") // 44
+SMOPDEF(SM_BGE_S, "bge.s") // 45
+SMOPDEF(SM_BGT_S, "bgt.s") // 46
+SMOPDEF(SM_BLE_S, "ble.s") // 47
+SMOPDEF(SM_BLT_S, "blt.s") // 48
+SMOPDEF(SM_BNE_UN_S, "bne.un.s") // 49
+SMOPDEF(SM_BGE_UN_S, "bge.un.s") // 50
+SMOPDEF(SM_BGT_UN_S, "bgt.un.s") // 51
+SMOPDEF(SM_BLE_UN_S, "ble.un.s") // 52
+SMOPDEF(SM_BLT_UN_S, "blt.un.s") // 53
+SMOPDEF(SM_LONG_BRANCH, "long.branch") // 54
+SMOPDEF(SM_SWITCH, "switch") // 55
+SMOPDEF(SM_LDIND_I1, "ldind.i1") // 56
+SMOPDEF(SM_LDIND_U1, "ldind.u1") // 57
+SMOPDEF(SM_LDIND_I2, "ldind.i2") // 58
+SMOPDEF(SM_LDIND_U2, "ldind.u2") // 59
+SMOPDEF(SM_LDIND_I4, "ldind.i4") // 60
+SMOPDEF(SM_LDIND_U4, "ldind.u4") // 61
+SMOPDEF(SM_LDIND_I8, "ldind.i8") // 62
+SMOPDEF(SM_LDIND_I, "ldind.i") // 63
+SMOPDEF(SM_LDIND_R4, "ldind.r4") // 64
+SMOPDEF(SM_LDIND_R8, "ldind.r8") // 65
+SMOPDEF(SM_LDIND_REF, "ldind.ref") // 66
+SMOPDEF(SM_STIND_REF, "stind.ref") // 67
+SMOPDEF(SM_STIND_I1, "stind.i1") // 68
+SMOPDEF(SM_STIND_I2, "stind.i2") // 69
+SMOPDEF(SM_STIND_I4, "stind.i4") // 70
+SMOPDEF(SM_STIND_I8, "stind.i8") // 71
+SMOPDEF(SM_STIND_R4, "stind.r4") // 72
+SMOPDEF(SM_STIND_R8, "stind.r8") // 73
+SMOPDEF(SM_ADD, "add") // 74
+SMOPDEF(SM_SUB, "sub") // 75
+SMOPDEF(SM_MUL, "mul") // 76
+SMOPDEF(SM_DIV, "div") // 77
+SMOPDEF(SM_DIV_UN, "div.un") // 78
+SMOPDEF(SM_REM, "rem") // 79
+SMOPDEF(SM_REM_UN, "rem.un") // 80
+SMOPDEF(SM_AND, "and") // 81
+SMOPDEF(SM_OR, "or") // 82
+SMOPDEF(SM_XOR, "xor") // 83
+SMOPDEF(SM_SHL, "shl") // 84
+SMOPDEF(SM_SHR, "shr") // 85
+SMOPDEF(SM_SHR_UN, "shr.un") // 86
+SMOPDEF(SM_NEG, "neg") // 87
+SMOPDEF(SM_NOT, "not") // 88
+SMOPDEF(SM_CONV_I1, "conv.i1") // 89
+SMOPDEF(SM_CONV_I2, "conv.i2") // 90
+SMOPDEF(SM_CONV_I4, "conv.i4") // 91
+SMOPDEF(SM_CONV_I8, "conv.i8") // 92
+SMOPDEF(SM_CONV_R4, "conv.r4") // 93
+SMOPDEF(SM_CONV_R8, "conv.r8") // 94
+SMOPDEF(SM_CONV_U4, "conv.u4") // 95
+SMOPDEF(SM_CONV_U8, "conv.u8") // 96
+SMOPDEF(SM_CALLVIRT, "callvirt") // 97
+SMOPDEF(SM_CPOBJ, "cpobj") // 98
+SMOPDEF(SM_LDOBJ, "ldobj") // 99
+SMOPDEF(SM_LDSTR, "ldstr") // 100
+SMOPDEF(SM_NEWOBJ, "newobj") // 101
+SMOPDEF(SM_CASTCLASS, "castclass") // 102
+SMOPDEF(SM_ISINST, "isinst") // 103
+SMOPDEF(SM_CONV_R_UN, "conv.r.un") // 104
+SMOPDEF(SM_UNBOX, "unbox") // 105
+SMOPDEF(SM_THROW, "throw") // 106
+SMOPDEF(SM_LDFLD, "ldfld") // 107
+SMOPDEF(SM_LDFLDA, "ldflda") // 108
+SMOPDEF(SM_STFLD, "stfld") // 109
+SMOPDEF(SM_LDSFLD, "ldsfld") // 110
+SMOPDEF(SM_LDSFLDA, "ldsflda") // 111
+SMOPDEF(SM_STSFLD, "stsfld") // 112
+SMOPDEF(SM_STOBJ, "stobj") // 113
+SMOPDEF(SM_OVF_NOTYPE_UN, "ovf.notype.un") // 114
+SMOPDEF(SM_BOX, "box") // 115
+SMOPDEF(SM_NEWARR, "newarr") // 116
+SMOPDEF(SM_LDLEN, "ldlen") // 117
+SMOPDEF(SM_LDELEMA, "ldelema") // 118
+SMOPDEF(SM_LDELEM_I1, "ldelem.i1") // 119
+SMOPDEF(SM_LDELEM_U1, "ldelem.u1") // 120
+SMOPDEF(SM_LDELEM_I2, "ldelem.i2") // 121
+SMOPDEF(SM_LDELEM_U2, "ldelem.u2") // 122
+SMOPDEF(SM_LDELEM_I4, "ldelem.i4") // 123
+SMOPDEF(SM_LDELEM_U4, "ldelem.u4") // 124
+SMOPDEF(SM_LDELEM_I8, "ldelem.i8") // 125
+SMOPDEF(SM_LDELEM_I, "ldelem.i") // 126
+SMOPDEF(SM_LDELEM_R4, "ldelem.r4") // 127
+SMOPDEF(SM_LDELEM_R8, "ldelem.r8") // 128
+SMOPDEF(SM_LDELEM_REF, "ldelem.ref") // 129
+SMOPDEF(SM_STELEM_I, "stelem.i") // 130
+SMOPDEF(SM_STELEM_I1, "stelem.i1") // 131
+SMOPDEF(SM_STELEM_I2, "stelem.i2") // 132
+SMOPDEF(SM_STELEM_I4, "stelem.i4") // 133
+SMOPDEF(SM_STELEM_I8, "stelem.i8") // 134
+SMOPDEF(SM_STELEM_R4, "stelem.r4") // 135
+SMOPDEF(SM_STELEM_R8, "stelem.r8") // 136
+SMOPDEF(SM_STELEM_REF, "stelem.ref") // 137
+SMOPDEF(SM_LDELEM, "ldelem") // 138
+SMOPDEF(SM_STELEM, "stelem") // 139
+SMOPDEF(SM_UNBOX_ANY, "unbox.any") // 140
+SMOPDEF(SM_CONV_OVF_I1, "conv.ovf.i1") // 141
+SMOPDEF(SM_CONV_OVF_U1, "conv.ovf.u1") // 142
+SMOPDEF(SM_CONV_OVF_I2, "conv.ovf.i2") // 143
+SMOPDEF(SM_CONV_OVF_U2, "conv.ovf.u2") // 144
+SMOPDEF(SM_CONV_OVF_I4, "conv.ovf.i4") // 145
+SMOPDEF(SM_CONV_OVF_U4, "conv.ovf.u4") // 146
+SMOPDEF(SM_CONV_OVF_I8, "conv.ovf.i8") // 147
+SMOPDEF(SM_CONV_OVF_U8, "conv.ovf.u8") // 148
+SMOPDEF(SM_REFANYVAL, "refanyval") // 149
+SMOPDEF(SM_CKFINITE, "ckfinite") // 150
+SMOPDEF(SM_MKREFANY, "mkrefany") // 151
+SMOPDEF(SM_LDTOKEN, "ldtoken") // 152
+SMOPDEF(SM_CONV_U2, "conv.u2") // 153
+SMOPDEF(SM_CONV_U1, "conv.u1") // 154
+SMOPDEF(SM_CONV_I, "conv.i") // 155
+SMOPDEF(SM_CONV_OVF_I, "conv.ovf.i") // 156
+SMOPDEF(SM_CONV_OVF_U, "conv.ovf.u") // 157
+SMOPDEF(SM_ADD_OVF, "add.ovf") // 158
+SMOPDEF(SM_MUL_OVF, "mul.ovf") // 159
+SMOPDEF(SM_SUB_OVF, "sub.ovf") // 160
+SMOPDEF(SM_LEAVE_S, "leave.s") // 161
+SMOPDEF(SM_STIND_I, "stind.i") // 162
+SMOPDEF(SM_CONV_U, "conv.u") // 163
+SMOPDEF(SM_PREFIX_N, "prefix.n") // 164
+SMOPDEF(SM_ARGLIST, "arglist") // 165
+SMOPDEF(SM_CEQ, "ceq") // 166
+SMOPDEF(SM_CGT, "cgt") // 167
+SMOPDEF(SM_CGT_UN, "cgt.un") // 168
+SMOPDEF(SM_CLT, "clt") // 169
+SMOPDEF(SM_CLT_UN, "clt.un") // 170
+SMOPDEF(SM_LDFTN, "ldftn") // 171
+SMOPDEF(SM_LDVIRTFTN, "ldvirtftn") // 172
+SMOPDEF(SM_LONG_LOC_ARG, "long.loc.arg") // 173
+SMOPDEF(SM_LOCALLOC, "localloc") // 174
+SMOPDEF(SM_UNALIGNED, "unaligned") // 175
+SMOPDEF(SM_VOLATILE, "volatile") // 176
+SMOPDEF(SM_TAILCALL, "tailcall") // 177
+SMOPDEF(SM_INITOBJ, "initobj") // 178
+SMOPDEF(SM_CONSTRAINED, "constrained") // 179
+SMOPDEF(SM_CPBLK, "cpblk") // 180
+SMOPDEF(SM_INITBLK, "initblk") // 181
+SMOPDEF(SM_RETHROW, "rethrow") // 182
+SMOPDEF(SM_SIZEOF, "sizeof") // 183
+SMOPDEF(SM_REFANYTYPE, "refanytype") // 184
+SMOPDEF(SM_READONLY, "readonly") // 185
+SMOPDEF(SM_LDARGA_S_NORMED, "ldarga.s.normed") // 186
+SMOPDEF(SM_LDLOCA_S_NORMED, "ldloca.s.normed") // 187
diff --git a/src/jit/smopcodemap.def b/src/jit/smopcodemap.def
new file mode 100644
index 0000000000..7b2f71fe6f
--- /dev/null
+++ b/src/jit/smopcodemap.def
@@ -0,0 +1,323 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*******************************************************************************************
+ ** **
+ ** OpcodeMap.def - Mapping between opcodes in EE to opcodes in the state machine in JIT. **
+ ** **
+ *******************************************************************************************/
+
+//
+// EE opcode name EE opcode string State machine opcode name
+// -------------------------------------------------------------------------------------------------------------------------------------------------------
+OPCODEMAP(CEE_NOP, "nop", SM_NOSHOW)
+OPCODEMAP(CEE_BREAK, "break", SM_NOSHOW)
+OPCODEMAP(CEE_LDARG_0, "ldarg.0", SM_LDARG_0)
+OPCODEMAP(CEE_LDARG_1, "ldarg.1", SM_LDARG_1)
+OPCODEMAP(CEE_LDARG_2, "ldarg.2", SM_LDARG_2)
+OPCODEMAP(CEE_LDARG_3, "ldarg.3", SM_LDARG_3)
+OPCODEMAP(CEE_LDLOC_0, "ldloc.0", SM_LDLOC_0)
+OPCODEMAP(CEE_LDLOC_1, "ldloc.1", SM_LDLOC_1)
+OPCODEMAP(CEE_LDLOC_2, "ldloc.2", SM_LDLOC_2)
+OPCODEMAP(CEE_LDLOC_3, "ldloc.3", SM_LDLOC_3)
+OPCODEMAP(CEE_STLOC_0, "stloc.0", SM_STLOC_0)
+OPCODEMAP(CEE_STLOC_1, "stloc.1", SM_STLOC_1)
+OPCODEMAP(CEE_STLOC_2, "stloc.2", SM_STLOC_2)
+OPCODEMAP(CEE_STLOC_3, "stloc.3", SM_STLOC_3)
+OPCODEMAP(CEE_LDARG_S, "ldarg.s", SM_LDARG_S)
+OPCODEMAP(CEE_LDARGA_S, "ldarga.s", SM_LDARGA_S)
+OPCODEMAP(CEE_STARG_S, "starg.s", SM_STARG_S)
+OPCODEMAP(CEE_LDLOC_S, "ldloc.s", SM_LDLOC_S)
+OPCODEMAP(CEE_LDLOCA_S, "ldloca.s", SM_LDLOCA_S)
+OPCODEMAP(CEE_STLOC_S, "stloc.s", SM_STLOC_S)
+OPCODEMAP(CEE_LDNULL, "ldnull", SM_LDNULL)
+OPCODEMAP(CEE_LDC_I4_M1, "ldc.i4.m1", SM_LDC_I4_M1)
+OPCODEMAP(CEE_LDC_I4_0, "ldc.i4.0", SM_LDC_I4_0)
+OPCODEMAP(CEE_LDC_I4_1, "ldc.i4.1", SM_LDC_I4_1)
+OPCODEMAP(CEE_LDC_I4_2, "ldc.i4.2", SM_LDC_I4_2)
+OPCODEMAP(CEE_LDC_I4_3, "ldc.i4.3", SM_LDC_I4_3)
+OPCODEMAP(CEE_LDC_I4_4, "ldc.i4.4", SM_LDC_I4_4)
+OPCODEMAP(CEE_LDC_I4_5, "ldc.i4.5", SM_LDC_I4_5)
+OPCODEMAP(CEE_LDC_I4_6, "ldc.i4.6", SM_LDC_I4_6)
+OPCODEMAP(CEE_LDC_I4_7, "ldc.i4.7", SM_LDC_I4_7)
+OPCODEMAP(CEE_LDC_I4_8, "ldc.i4.8", SM_LDC_I4_8)
+OPCODEMAP(CEE_LDC_I4_S, "ldc.i4.s", SM_LDC_I4_S)
+OPCODEMAP(CEE_LDC_I4, "ldc.i4", SM_LDC_I4)
+OPCODEMAP(CEE_LDC_I8, "ldc.i8", SM_LDC_I8)
+OPCODEMAP(CEE_LDC_R4, "ldc.r4", SM_LDC_R4)
+OPCODEMAP(CEE_LDC_R8, "ldc.r8", SM_LDC_R8)
+OPCODEMAP(CEE_UNUSED49, "unused", SM_UNUSED)
+OPCODEMAP(CEE_DUP, "dup", SM_DUP)
+OPCODEMAP(CEE_POP, "pop", SM_POP)
+OPCODEMAP(CEE_JMP, "jmp", SM_NOSHOW)
+OPCODEMAP(CEE_CALL, "call", SM_CALL)
+OPCODEMAP(CEE_CALLI, "calli", SM_CALLI)
+OPCODEMAP(CEE_RET, "ret", SM_RET)
+OPCODEMAP(CEE_BR_S, "br.s", SM_BR_S)
+OPCODEMAP(CEE_BRFALSE_S, "brfalse.s", SM_BRFALSE_S)
+OPCODEMAP(CEE_BRTRUE_S, "brtrue.s", SM_BRTRUE_S)
+OPCODEMAP(CEE_BEQ_S, "beq.s", SM_BEQ_S)
+OPCODEMAP(CEE_BGE_S, "bge.s", SM_BGE_S)
+OPCODEMAP(CEE_BGT_S, "bgt.s", SM_BGT_S)
+OPCODEMAP(CEE_BLE_S, "ble.s", SM_BLE_S)
+OPCODEMAP(CEE_BLT_S, "blt.s", SM_BLT_S)
+OPCODEMAP(CEE_BNE_UN_S, "bne.un.s", SM_BNE_UN_S)
+OPCODEMAP(CEE_BGE_UN_S, "bge.un.s", SM_BGE_UN_S)
+OPCODEMAP(CEE_BGT_UN_S, "bgt.un.s", SM_BGT_UN_S)
+OPCODEMAP(CEE_BLE_UN_S, "ble.un.s", SM_BLE_UN_S)
+OPCODEMAP(CEE_BLT_UN_S, "blt.un.s", SM_BLT_UN_S)
+OPCODEMAP(CEE_BR, "br", SM_LONG_BRANCH)
+OPCODEMAP(CEE_BRFALSE, "brfalse", SM_LONG_BRANCH)
+OPCODEMAP(CEE_BRTRUE, "brtrue", SM_LONG_BRANCH)
+OPCODEMAP(CEE_BEQ, "beq", SM_LONG_BRANCH)
+OPCODEMAP(CEE_BGE, "bge", SM_LONG_BRANCH)
+OPCODEMAP(CEE_BGT, "bgt", SM_LONG_BRANCH)
+OPCODEMAP(CEE_BLE, "ble", SM_LONG_BRANCH)
+OPCODEMAP(CEE_BLT, "blt", SM_LONG_BRANCH)
+OPCODEMAP(CEE_BNE_UN, "bne.un", SM_LONG_BRANCH)
+OPCODEMAP(CEE_BGE_UN, "bge.un", SM_LONG_BRANCH)
+OPCODEMAP(CEE_BGT_UN, "bgt.un", SM_LONG_BRANCH)
+OPCODEMAP(CEE_BLE_UN, "ble.un", SM_LONG_BRANCH)
+OPCODEMAP(CEE_BLT_UN, "blt.un", SM_LONG_BRANCH)
+OPCODEMAP(CEE_SWITCH, "switch", SM_SWITCH)
+OPCODEMAP(CEE_LDIND_I1, "ldind.i1", SM_LDIND_I1)
+OPCODEMAP(CEE_LDIND_U1, "ldind.u1", SM_LDIND_U1)
+OPCODEMAP(CEE_LDIND_I2, "ldind.i2", SM_LDIND_I2)
+OPCODEMAP(CEE_LDIND_U2, "ldind.u2", SM_LDIND_U2)
+OPCODEMAP(CEE_LDIND_I4, "ldind.i4", SM_LDIND_I4)
+OPCODEMAP(CEE_LDIND_U4, "ldind.u4", SM_LDIND_U4)
+OPCODEMAP(CEE_LDIND_I8, "ldind.i8", SM_LDIND_I8)
+OPCODEMAP(CEE_LDIND_I, "ldind.i", SM_LDIND_I)
+OPCODEMAP(CEE_LDIND_R4, "ldind.r4", SM_LDIND_R4)
+OPCODEMAP(CEE_LDIND_R8, "ldind.r8", SM_LDIND_R8)
+OPCODEMAP(CEE_LDIND_REF, "ldind.ref", SM_LDIND_REF)
+OPCODEMAP(CEE_STIND_REF, "stind.ref", SM_STIND_REF)
+OPCODEMAP(CEE_STIND_I1, "stind.i1", SM_STIND_I1)
+OPCODEMAP(CEE_STIND_I2, "stind.i2", SM_STIND_I2)
+OPCODEMAP(CEE_STIND_I4, "stind.i4", SM_STIND_I4)
+OPCODEMAP(CEE_STIND_I8, "stind.i8", SM_STIND_I8)
+OPCODEMAP(CEE_STIND_R4, "stind.r4", SM_STIND_R4)
+OPCODEMAP(CEE_STIND_R8, "stind.r8", SM_STIND_R8)
+OPCODEMAP(CEE_ADD, "add", SM_ADD)
+OPCODEMAP(CEE_SUB, "sub", SM_SUB)
+OPCODEMAP(CEE_MUL, "mul", SM_MUL)
+OPCODEMAP(CEE_DIV, "div", SM_DIV)
+OPCODEMAP(CEE_DIV_UN, "div.un", SM_DIV_UN)
+OPCODEMAP(CEE_REM, "rem", SM_REM)
+OPCODEMAP(CEE_REM_UN, "rem.un", SM_REM_UN)
+OPCODEMAP(CEE_AND, "and", SM_AND)
+OPCODEMAP(CEE_OR, "or", SM_OR)
+OPCODEMAP(CEE_XOR, "xor", SM_XOR)
+OPCODEMAP(CEE_SHL, "shl", SM_SHL)
+OPCODEMAP(CEE_SHR, "shr", SM_SHR)
+OPCODEMAP(CEE_SHR_UN, "shr.un", SM_SHR_UN)
+OPCODEMAP(CEE_NEG, "neg", SM_NEG)
+OPCODEMAP(CEE_NOT, "not", SM_NOT)
+OPCODEMAP(CEE_CONV_I1, "conv.i1", SM_CONV_I1)
+OPCODEMAP(CEE_CONV_I2, "conv.i2", SM_CONV_I2)
+OPCODEMAP(CEE_CONV_I4, "conv.i4", SM_CONV_I4)
+OPCODEMAP(CEE_CONV_I8, "conv.i8", SM_CONV_I8)
+OPCODEMAP(CEE_CONV_R4, "conv.r4", SM_CONV_R4)
+OPCODEMAP(CEE_CONV_R8, "conv.r8", SM_CONV_R8)
+OPCODEMAP(CEE_CONV_U4, "conv.u4", SM_CONV_U4)
+OPCODEMAP(CEE_CONV_U8, "conv.u8", SM_CONV_U8)
+OPCODEMAP(CEE_CALLVIRT, "callvirt", SM_CALLVIRT)
+OPCODEMAP(CEE_CPOBJ, "cpobj", SM_CPOBJ)
+OPCODEMAP(CEE_LDOBJ, "ldobj", SM_LDOBJ)
+OPCODEMAP(CEE_LDSTR, "ldstr", SM_LDSTR)
+OPCODEMAP(CEE_NEWOBJ, "newobj", SM_NEWOBJ)
+OPCODEMAP(CEE_CASTCLASS, "castclass", SM_CASTCLASS)
+OPCODEMAP(CEE_ISINST, "isinst", SM_ISINST)
+OPCODEMAP(CEE_CONV_R_UN, "conv.r.un", SM_CONV_R_UN)
+OPCODEMAP(CEE_UNUSED58, "unused", SM_UNUSED)
+OPCODEMAP(CEE_UNUSED1, "unused", SM_UNUSED)
+OPCODEMAP(CEE_UNBOX, "unbox", SM_UNBOX)
+OPCODEMAP(CEE_THROW, "throw", SM_THROW)
+OPCODEMAP(CEE_LDFLD, "ldfld", SM_LDFLD)
+OPCODEMAP(CEE_LDFLDA, "ldflda", SM_LDFLDA)
+OPCODEMAP(CEE_STFLD, "stfld", SM_STFLD)
+OPCODEMAP(CEE_LDSFLD, "ldsfld", SM_LDSFLD)
+OPCODEMAP(CEE_LDSFLDA, "ldsflda", SM_LDSFLDA)
+OPCODEMAP(CEE_STSFLD, "stsfld", SM_STSFLD)
+OPCODEMAP(CEE_STOBJ, "stobj", SM_STOBJ)
+OPCODEMAP(CEE_CONV_OVF_I1_UN, "conv.ovf.i1.un", SM_OVF_NOTYPE_UN)
+OPCODEMAP(CEE_CONV_OVF_I2_UN, "conv.ovf.i2.un", SM_OVF_NOTYPE_UN)
+OPCODEMAP(CEE_CONV_OVF_I4_UN, "conv.ovf.i4.un", SM_OVF_NOTYPE_UN)
+OPCODEMAP(CEE_CONV_OVF_I8_UN, "conv.ovf.i8.un", SM_OVF_NOTYPE_UN)
+OPCODEMAP(CEE_CONV_OVF_U1_UN, "conv.ovf.u1.un", SM_OVF_NOTYPE_UN)
+OPCODEMAP(CEE_CONV_OVF_U2_UN, "conv.ovf.u2.un", SM_OVF_NOTYPE_UN)
+OPCODEMAP(CEE_CONV_OVF_U4_UN, "conv.ovf.u4.un", SM_OVF_NOTYPE_UN)
+OPCODEMAP(CEE_CONV_OVF_U8_UN, "conv.ovf.u8.un", SM_OVF_NOTYPE_UN)
+OPCODEMAP(CEE_CONV_OVF_I_UN, "conv.ovf.i.un", SM_OVF_NOTYPE_UN)
+OPCODEMAP(CEE_CONV_OVF_U_UN, "conv.ovf.u.un", SM_OVF_NOTYPE_UN)
+OPCODEMAP(CEE_BOX, "box", SM_BOX)
+OPCODEMAP(CEE_NEWARR, "newarr", SM_NEWARR)
+OPCODEMAP(CEE_LDLEN, "ldlen", SM_LDLEN)
+OPCODEMAP(CEE_LDELEMA, "ldelema", SM_LDELEMA)
+OPCODEMAP(CEE_LDELEM_I1, "ldelem.i1", SM_LDELEM_I1)
+OPCODEMAP(CEE_LDELEM_U1, "ldelem.u1", SM_LDELEM_U1)
+OPCODEMAP(CEE_LDELEM_I2, "ldelem.i2", SM_LDELEM_I2)
+OPCODEMAP(CEE_LDELEM_U2, "ldelem.u2", SM_LDELEM_U2)
+OPCODEMAP(CEE_LDELEM_I4, "ldelem.i4", SM_LDELEM_I4)
+OPCODEMAP(CEE_LDELEM_U4, "ldelem.u4", SM_LDELEM_U4)
+OPCODEMAP(CEE_LDELEM_I8, "ldelem.i8", SM_LDELEM_I8)
+OPCODEMAP(CEE_LDELEM_I, "ldelem.i", SM_LDELEM_I)
+OPCODEMAP(CEE_LDELEM_R4, "ldelem.r4", SM_LDELEM_R4)
+OPCODEMAP(CEE_LDELEM_R8, "ldelem.r8", SM_LDELEM_R8)
+OPCODEMAP(CEE_LDELEM_REF, "ldelem.ref", SM_LDELEM_REF)
+OPCODEMAP(CEE_STELEM_I, "stelem.i", SM_STELEM_I)
+OPCODEMAP(CEE_STELEM_I1, "stelem.i1", SM_STELEM_I1)
+OPCODEMAP(CEE_STELEM_I2, "stelem.i2", SM_STELEM_I2)
+OPCODEMAP(CEE_STELEM_I4, "stelem.i4", SM_STELEM_I4)
+OPCODEMAP(CEE_STELEM_I8, "stelem.i8", SM_STELEM_I8)
+OPCODEMAP(CEE_STELEM_R4, "stelem.r4", SM_STELEM_R4)
+OPCODEMAP(CEE_STELEM_R8, "stelem.r8", SM_STELEM_R8)
+OPCODEMAP(CEE_STELEM_REF, "stelem.ref", SM_STELEM_REF)
+OPCODEMAP(CEE_LDELEM, "ldelem", SM_LDELEM)
+OPCODEMAP(CEE_STELEM, "stelem", SM_STELEM)
+OPCODEMAP(CEE_UNBOX_ANY, "unbox.any", SM_UNBOX_ANY)
+OPCODEMAP(CEE_UNUSED5, "unused", SM_UNUSED)
+OPCODEMAP(CEE_UNUSED6, "unused", SM_UNUSED)
+OPCODEMAP(CEE_UNUSED7, "unused", SM_UNUSED)
+OPCODEMAP(CEE_UNUSED8, "unused", SM_UNUSED)
+OPCODEMAP(CEE_UNUSED9, "unused", SM_UNUSED)
+OPCODEMAP(CEE_UNUSED10, "unused", SM_UNUSED)
+OPCODEMAP(CEE_UNUSED11, "unused", SM_UNUSED)
+OPCODEMAP(CEE_UNUSED12, "unused", SM_UNUSED)
+OPCODEMAP(CEE_UNUSED13, "unused", SM_UNUSED)
+OPCODEMAP(CEE_UNUSED14, "unused", SM_UNUSED)
+OPCODEMAP(CEE_UNUSED15, "unused", SM_UNUSED)
+OPCODEMAP(CEE_UNUSED16, "unused", SM_UNUSED)
+OPCODEMAP(CEE_UNUSED17, "unused", SM_UNUSED)
+OPCODEMAP(CEE_CONV_OVF_I1, "conv.ovf.i1", SM_CONV_OVF_I1)
+OPCODEMAP(CEE_CONV_OVF_U1, "conv.ovf.u1", SM_CONV_OVF_U1)
+OPCODEMAP(CEE_CONV_OVF_I2, "conv.ovf.i2", SM_CONV_OVF_I2)
+OPCODEMAP(CEE_CONV_OVF_U2, "conv.ovf.u2", SM_CONV_OVF_U2)
+OPCODEMAP(CEE_CONV_OVF_I4, "conv.ovf.i4", SM_CONV_OVF_I4)
+OPCODEMAP(CEE_CONV_OVF_U4, "conv.ovf.u4", SM_CONV_OVF_U4)
+OPCODEMAP(CEE_CONV_OVF_I8, "conv.ovf.i8", SM_CONV_OVF_I8)
+OPCODEMAP(CEE_CONV_OVF_U8, "conv.ovf.u8", SM_CONV_OVF_U8)
+OPCODEMAP(CEE_UNUSED50, "unused", SM_UNUSED)
+OPCODEMAP(CEE_UNUSED18, "unused", SM_UNUSED)
+OPCODEMAP(CEE_UNUSED19, "unused", SM_UNUSED)
+OPCODEMAP(CEE_UNUSED20, "unused", SM_UNUSED)
+OPCODEMAP(CEE_UNUSED21, "unused", SM_UNUSED)
+OPCODEMAP(CEE_UNUSED22, "unused", SM_UNUSED)
+OPCODEMAP(CEE_UNUSED23, "unused", SM_UNUSED)
+OPCODEMAP(CEE_REFANYVAL, "refanyval", SM_REFANYVAL)
+OPCODEMAP(CEE_CKFINITE, "ckfinite", SM_CKFINITE)
+OPCODEMAP(CEE_UNUSED24, "unused", SM_UNUSED)
+OPCODEMAP(CEE_UNUSED25, "unused", SM_UNUSED)
+OPCODEMAP(CEE_MKREFANY, "mkrefany", SM_MKREFANY)
+OPCODEMAP(CEE_UNUSED59, "unused", SM_UNUSED)
+OPCODEMAP(CEE_UNUSED60, "unused", SM_UNUSED)
+OPCODEMAP(CEE_UNUSED61, "unused", SM_UNUSED)
+OPCODEMAP(CEE_UNUSED62, "unused", SM_UNUSED)
+OPCODEMAP(CEE_UNUSED63, "unused", SM_UNUSED)
+OPCODEMAP(CEE_UNUSED64, "unused", SM_UNUSED)
+OPCODEMAP(CEE_UNUSED65, "unused", SM_UNUSED)
+OPCODEMAP(CEE_UNUSED66, "unused", SM_UNUSED)
+OPCODEMAP(CEE_UNUSED67, "unused", SM_UNUSED)
+OPCODEMAP(CEE_LDTOKEN, "ldtoken", SM_LDTOKEN)
+OPCODEMAP(CEE_CONV_U2, "conv.u2", SM_CONV_U2)
+OPCODEMAP(CEE_CONV_U1, "conv.u1", SM_CONV_U1)
+OPCODEMAP(CEE_CONV_I, "conv.i", SM_CONV_I)
+OPCODEMAP(CEE_CONV_OVF_I, "conv.ovf.i", SM_CONV_OVF_I)
+OPCODEMAP(CEE_CONV_OVF_U, "conv.ovf.u", SM_CONV_OVF_U)
+OPCODEMAP(CEE_ADD_OVF, "add.ovf", SM_ADD_OVF)
+OPCODEMAP(CEE_ADD_OVF_UN, "add.ovf.un", SM_OVF_NOTYPE_UN)
+OPCODEMAP(CEE_MUL_OVF, "mul.ovf", SM_MUL_OVF)
+OPCODEMAP(CEE_MUL_OVF_UN, "mul.ovf.un", SM_OVF_NOTYPE_UN)
+OPCODEMAP(CEE_SUB_OVF, "sub.ovf", SM_SUB_OVF)
+OPCODEMAP(CEE_SUB_OVF_UN, "sub.ovf.un", SM_OVF_NOTYPE_UN)
+OPCODEMAP(CEE_ENDFINALLY, "endfinally", SM_NOSHOW)
+OPCODEMAP(CEE_LEAVE, "leave", SM_NOSHOW)
+OPCODEMAP(CEE_LEAVE_S, "leave.s", SM_LEAVE_S)
+OPCODEMAP(CEE_STIND_I, "stind.i", SM_STIND_I)
+OPCODEMAP(CEE_CONV_U, "conv.u", SM_CONV_U)
+OPCODEMAP(CEE_UNUSED26, "unused", SM_UNUSED)
+OPCODEMAP(CEE_UNUSED27, "unused", SM_UNUSED)
+OPCODEMAP(CEE_UNUSED28, "unused", SM_UNUSED)
+OPCODEMAP(CEE_UNUSED29, "unused", SM_UNUSED)
+OPCODEMAP(CEE_UNUSED30, "unused", SM_UNUSED)
+OPCODEMAP(CEE_UNUSED31, "unused", SM_UNUSED)
+OPCODEMAP(CEE_UNUSED32, "unused", SM_UNUSED)
+OPCODEMAP(CEE_UNUSED33, "unused", SM_UNUSED)
+OPCODEMAP(CEE_UNUSED34, "unused", SM_UNUSED)
+OPCODEMAP(CEE_UNUSED35, "unused", SM_UNUSED)
+OPCODEMAP(CEE_UNUSED36, "unused", SM_UNUSED)
+OPCODEMAP(CEE_UNUSED37, "unused", SM_UNUSED)
+OPCODEMAP(CEE_UNUSED38, "unused", SM_UNUSED)
+OPCODEMAP(CEE_UNUSED39, "unused", SM_UNUSED)
+OPCODEMAP(CEE_UNUSED40, "unused", SM_UNUSED)
+OPCODEMAP(CEE_UNUSED41, "unused", SM_UNUSED)
+OPCODEMAP(CEE_UNUSED42, "unused", SM_UNUSED)
+OPCODEMAP(CEE_UNUSED43, "unused", SM_UNUSED)
+OPCODEMAP(CEE_UNUSED44, "unused", SM_UNUSED)
+OPCODEMAP(CEE_UNUSED45, "unused", SM_UNUSED)
+OPCODEMAP(CEE_UNUSED46, "unused", SM_UNUSED)
+OPCODEMAP(CEE_UNUSED47, "unused", SM_UNUSED)
+OPCODEMAP(CEE_UNUSED48, "unused", SM_UNUSED)
+OPCODEMAP(CEE_PREFIX7, "prefix7", SM_PREFIX_N)
+OPCODEMAP(CEE_PREFIX6, "prefix6", SM_PREFIX_N)
+OPCODEMAP(CEE_PREFIX5, "prefix5", SM_PREFIX_N)
+OPCODEMAP(CEE_PREFIX4, "prefix4", SM_PREFIX_N)
+OPCODEMAP(CEE_PREFIX3, "prefix3", SM_PREFIX_N)
+OPCODEMAP(CEE_PREFIX2, "prefix2", SM_PREFIX_N)
+OPCODEMAP(CEE_PREFIX1, "prefix1", SM_PREFIX_N)
+OPCODEMAP(CEE_PREFIXREF, "prefixref", SM_PREFIX_N)
+
+OPCODEMAP(CEE_ARGLIST, "arglist", SM_ARGLIST)
+OPCODEMAP(CEE_CEQ, "ceq", SM_CEQ)
+OPCODEMAP(CEE_CGT, "cgt", SM_CGT)
+OPCODEMAP(CEE_CGT_UN, "cgt.un", SM_CGT_UN)
+OPCODEMAP(CEE_CLT, "clt", SM_CLT)
+OPCODEMAP(CEE_CLT_UN, "clt.un", SM_CLT_UN)
+OPCODEMAP(CEE_LDFTN, "ldftn", SM_LDFTN)
+OPCODEMAP(CEE_LDVIRTFTN, "ldvirtftn", SM_LDVIRTFTN)
+OPCODEMAP(CEE_UNUSED56, "unused", SM_UNUSED)
+OPCODEMAP(CEE_LDARG, "ldarg", SM_LONG_LOC_ARG)
+OPCODEMAP(CEE_LDARGA, "ldarga", SM_LONG_LOC_ARG)
+OPCODEMAP(CEE_STARG, "starg", SM_LONG_LOC_ARG)
+OPCODEMAP(CEE_LDLOC, "ldloc", SM_LONG_LOC_ARG)
+OPCODEMAP(CEE_LDLOCA, "ldloca", SM_LONG_LOC_ARG)
+OPCODEMAP(CEE_STLOC, "stloc", SM_LONG_LOC_ARG)
+OPCODEMAP(CEE_LOCALLOC, "localloc", SM_LOCALLOC)
+OPCODEMAP(CEE_UNUSED57, "unused", SM_UNUSED)
+OPCODEMAP(CEE_ENDFILTER, "endfilter", SM_NOSHOW)
+OPCODEMAP(CEE_UNALIGNED, "unaligned.", SM_UNALIGNED)
+OPCODEMAP(CEE_VOLATILE, "volatile.", SM_VOLATILE)
+OPCODEMAP(CEE_TAILCALL, "tail.", SM_TAILCALL)
+OPCODEMAP(CEE_INITOBJ, "initobj", SM_INITOBJ)
+OPCODEMAP(CEE_CONSTRAINED, "constrained.", SM_CONSTRAINED)
+OPCODEMAP(CEE_CPBLK, "cpblk", SM_CPBLK)
+OPCODEMAP(CEE_INITBLK, "initblk", SM_INITBLK)
+OPCODEMAP(CEE_UNUSED69, "unused", SM_UNUSED)
+OPCODEMAP(CEE_RETHROW, "rethrow", SM_RETHROW)
+OPCODEMAP(CEE_UNUSED51, "unused", SM_UNUSED)
+OPCODEMAP(CEE_SIZEOF, "sizeof", SM_SIZEOF)
+OPCODEMAP(CEE_REFANYTYPE, "refanytype", SM_REFANYTYPE)
+OPCODEMAP(CEE_READONLY, "readonly.", SM_READONLY)
+OPCODEMAP(CEE_UNUSED53, "unused", SM_UNUSED)
+OPCODEMAP(CEE_UNUSED54, "unused", SM_UNUSED)
+OPCODEMAP(CEE_UNUSED55, "unused", SM_UNUSED)
+OPCODEMAP(CEE_UNUSED70, "unused", SM_UNUSED)
+
+// These are not real opcodes, but they are handy internally in the EE
+
+OPCODEMAP(CEE_ILLEGAL, "illegal", SM_UNUSED)
+OPCODEMAP(CEE_MACRO_END, "endmac", SM_UNUSED)
+OPCODEMAP(CEE_CODE_LABEL, "codelabel", SM_UNUSED)
+
+// !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+//
+// New opcodes added just for the state machine.
+//
+// Do NOT uncomment or delete the following lines.
+// They are there for makeSmOpcodeDef.pl to automatically generate smopcode.def
+// that contains these new SM_ opcodes.
+//
+// OPCODEMAP(CEE_DUMMY, "ldarga.s(normed)", SM_LDARGA_S_NORMED)
+// OPCODEMAP(CEE_DUMMY, "ldloca.s(normed)", SM_LDLOCA_S_NORMED)
+
diff --git a/src/jit/smopenum.h b/src/jit/smopenum.h
new file mode 100644
index 0000000000..978bbc2c3b
--- /dev/null
+++ b/src/jit/smopenum.h
@@ -0,0 +1,17 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+#ifndef __smopenum_h__
+#define __smopenum_h__
+
+typedef enum smopcode_t {
+#define SMOPDEF(smname, string) smname,
+#include "smopcode.def"
+#undef SMOPDEF
+
+ SM_COUNT, /* number of state machine opcodes */
+
+} SM_OPCODE;
+
+#endif /* __smopenum_h__ */
diff --git a/src/jit/smweights.cpp b/src/jit/smweights.cpp
new file mode 100644
index 0000000000..f93d739b61
--- /dev/null
+++ b/src/jit/smweights.cpp
@@ -0,0 +1,274 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+// !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+//
+// Automatically generated code. DO NOT MODIFY!
+// To generate this file, do
+// "WeightsArrayGen.pl matrix.txt results.txt > SMWeights.cpp"
+//
+// !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+#include "jitpch.h"
+
+#define DEFAULT_WEIGHT_VALUE 65 // This is the average of all the weights.
+
+#define NA 9999
+
+const short g_StateWeights[] = {
+ NA, // state 0
+ NA, // state 1
+ DEFAULT_WEIGHT_VALUE, // state 2 [noshow]
+ 10, // state 3 [ldarg.0]
+ 16, // state 4 [ldarg.1]
+ 35, // state 5 [ldarg.2]
+ 28, // state 6 [ldarg.3]
+ 12, // state 7 [ldloc.0]
+ 9, // state 8 [ldloc.1]
+ 22, // state 9 [ldloc.2]
+ 24, // state 10 [ldloc.3]
+ 6, // state 11 [stloc.0]
+ 34, // state 12 [stloc.1]
+ 4, // state 13 [stloc.2]
+ 49, // state 14 [stloc.3]
+ 32, // state 15 [ldarg.s]
+ 77, // state 16 [ldarga.s]
+ 21, // state 17 [starg.s]
+ 32, // state 18 [ldloc.s]
+ 61, // state 19 [ldloca.s]
+ -45, // state 20 [stloc.s]
+ 7, // state 21 [ldnull]
+ 22, // state 22 [ldc.i4.m1]
+ 15, // state 23 [ldc.i4.0]
+ 28, // state 24 [ldc.i4.1]
+ 34, // state 25 [ldc.i4.2]
+ -6, // state 26 [ldc.i4.3]
+ 20, // state 27 [ldc.i4.4]
+ 4, // state 28 [ldc.i4.5]
+ 10, // state 29 [ldc.i4.6]
+ 56, // state 30 [ldc.i4.7]
+ 42, // state 31 [ldc.i4.8]
+ 41, // state 32 [ldc.i4.s]
+ 38, // state 33 [ldc.i4]
+ 160, // state 34 [ldc.i8]
+ 33, // state 35 [ldc.r4]
+ 113, // state 36 [ldc.r8]
+ DEFAULT_WEIGHT_VALUE, // state 37 [unused]
+ 11, // state 38 [dup]
+ -24, // state 39 [pop]
+ 79, // state 40 [call]
+ DEFAULT_WEIGHT_VALUE, // state 41 [calli]
+ 19, // state 42 [ret]
+ 44, // state 43 [br.s]
+ 27, // state 44 [brfalse.s]
+ 25, // state 45 [brtrue.s]
+ 6, // state 46 [beq.s]
+ 20, // state 47 [bge.s]
+ 33, // state 48 [bgt.s]
+ 53, // state 49 [ble.s]
+ 28, // state 50 [blt.s]
+ 12, // state 51 [bne.un.s]
+ 85, // state 52 [bge.un.s]
+ -52, // state 53 [bgt.un.s]
+ 147, // state 54 [ble.un.s]
+ -63, // state 55 [blt.un.s]
+ DEFAULT_WEIGHT_VALUE, // state 56 [long.branch]
+ 116, // state 57 [switch]
+ -19, // state 58 [ldind.i1]
+ 17, // state 59 [ldind.u1]
+ -18, // state 60 [ldind.i2]
+ 10, // state 61 [ldind.u2]
+ -11, // state 62 [ldind.i4]
+ -33, // state 63 [ldind.u4]
+ 41, // state 64 [ldind.i8]
+ -110, // state 65 [ldind.i]
+ 31, // state 66 [ldind.r4]
+ 45, // state 67 [ldind.r8]
+ 1, // state 68 [ldind.ref]
+ 60, // state 69 [stind.ref]
+ 36, // state 70 [stind.i1]
+ 40, // state 71 [stind.i2]
+ 11, // state 72 [stind.i4]
+ 84, // state 73 [stind.i8]
+ 50, // state 74 [stind.r4]
+ 73, // state 75 [stind.r8]
+ -12, // state 76 [add]
+ -15, // state 77 [sub]
+ -9, // state 78 [mul]
+ 35, // state 79 [div]
+ 89, // state 80 [div.un]
+ 89, // state 81 [rem]
+ 82, // state 82 [rem.un]
+ -5, // state 83 [and]
+ -7, // state 84 [or]
+ 35, // state 85 [xor]
+ 0, // state 86 [shl]
+ 17, // state 87 [shr]
+ 27, // state 88 [shr.un]
+ 58, // state 89 [neg]
+ 19, // state 90 [not]
+ 78, // state 91 [conv.i1]
+ 54, // state 92 [conv.i2]
+ 2, // state 93 [conv.i4]
+ 99, // state 94 [conv.i8]
+ 273, // state 95 [conv.r4]
+ 197, // state 96 [conv.r8]
+ 45, // state 97 [conv.u4]
+ 55, // state 98 [conv.u8]
+ 83, // state 99 [callvirt]
+ DEFAULT_WEIGHT_VALUE, // state 100 [cpobj]
+ 29, // state 101 [ldobj]
+ 66, // state 102 [ldstr]
+ 227, // state 103 [newobj]
+ 261, // state 104 [castclass]
+ 166, // state 105 [isinst]
+ 209, // state 106 [conv.r.un]
+ DEFAULT_WEIGHT_VALUE, // state 107 [unbox]
+ 210, // state 108 [throw]
+ 18, // state 109 [ldfld]
+ 17, // state 110 [ldflda]
+ 31, // state 111 [stfld]
+ 159, // state 112 [ldsfld]
+ 177, // state 113 [ldsflda]
+ 125, // state 114 [stsfld]
+ 36, // state 115 [stobj]
+ 148, // state 116 [ovf.notype.un]
+ 247, // state 117 [box]
+ 152, // state 118 [newarr]
+ 7, // state 119 [ldlen]
+ 145, // state 120 [ldelema]
+ 103, // state 121 [ldelem.i1]
+ 91, // state 122 [ldelem.u1]
+ 267, // state 123 [ldelem.i2]
+ 148, // state 124 [ldelem.u2]
+ 92, // state 125 [ldelem.i4]
+ 213, // state 126 [ldelem.u4]
+ 223, // state 127 [ldelem.i8]
+ DEFAULT_WEIGHT_VALUE, // state 128 [ldelem.i]
+ DEFAULT_WEIGHT_VALUE, // state 129 [ldelem.r4]
+ 549, // state 130 [ldelem.r8]
+ 81, // state 131 [ldelem.ref]
+ DEFAULT_WEIGHT_VALUE, // state 132 [stelem.i]
+ 14, // state 133 [stelem.i1]
+ 23, // state 134 [stelem.i2]
+ 66, // state 135 [stelem.i4]
+ 254, // state 136 [stelem.i8]
+ DEFAULT_WEIGHT_VALUE, // state 137 [stelem.r4]
+ DEFAULT_WEIGHT_VALUE, // state 138 [stelem.r8]
+ 94, // state 139 [stelem.ref]
+ DEFAULT_WEIGHT_VALUE, // state 140 [ldelem]
+ DEFAULT_WEIGHT_VALUE, // state 141 [stelem]
+ 274, // state 142 [unbox.any]
+ DEFAULT_WEIGHT_VALUE, // state 143 [conv.ovf.i1]
+ DEFAULT_WEIGHT_VALUE, // state 144 [conv.ovf.u1]
+ DEFAULT_WEIGHT_VALUE, // state 145 [conv.ovf.i2]
+ DEFAULT_WEIGHT_VALUE, // state 146 [conv.ovf.u2]
+ 242, // state 147 [conv.ovf.i4]
+ DEFAULT_WEIGHT_VALUE, // state 148 [conv.ovf.u4]
+ 293, // state 149 [conv.ovf.i8]
+ 293, // state 150 [conv.ovf.u8]
+ DEFAULT_WEIGHT_VALUE, // state 151 [refanyval]
+ DEFAULT_WEIGHT_VALUE, // state 152 [ckfinite]
+ -17, // state 153 [mkrefany]
+ 32, // state 154 [ldtoken]
+ 25, // state 155 [conv.u2]
+ 50, // state 156 [conv.u1]
+ -0, // state 157 [conv.i]
+ 178, // state 158 [conv.ovf.i]
+ DEFAULT_WEIGHT_VALUE, // state 159 [conv.ovf.u]
+ DEFAULT_WEIGHT_VALUE, // state 160 [add.ovf]
+ DEFAULT_WEIGHT_VALUE, // state 161 [mul.ovf]
+ DEFAULT_WEIGHT_VALUE, // state 162 [sub.ovf]
+ -17, // state 163 [leave.s]
+ 182, // state 164 [stind.i]
+ -36, // state 165 [conv.u]
+ DEFAULT_WEIGHT_VALUE, // state 166 [prefix.n]
+ 120, // state 167 [arglist]
+ 20, // state 168 [ceq]
+ -1, // state 169 [cgt]
+ 47, // state 170 [cgt.un]
+ 26, // state 171 [clt]
+ 85, // state 172 [clt.un]
+ 102, // state 173 [ldftn]
+ 234, // state 174 [ldvirtftn]
+ DEFAULT_WEIGHT_VALUE, // state 175 [long.loc.arg]
+ 347, // state 176 [localloc]
+ DEFAULT_WEIGHT_VALUE, // state 177 [unaligned]
+ -44, // state 178 [volatile]
+ DEFAULT_WEIGHT_VALUE, // state 179 [tailcall]
+ 55, // state 180 [initobj]
+ DEFAULT_WEIGHT_VALUE, // state 181 [constrained]
+ DEFAULT_WEIGHT_VALUE, // state 182 [cpblk]
+ DEFAULT_WEIGHT_VALUE, // state 183 [initblk]
+ DEFAULT_WEIGHT_VALUE, // state 184 [rethrow]
+ 38, // state 185 [sizeof]
+ -68, // state 186 [refanytype]
+ DEFAULT_WEIGHT_VALUE, // state 187 [readonly]
+ 55, // state 188 [ldarga.s.normed]
+ 35, // state 189 [ldloca.s.normed]
+ 161, // state 190 [constrained -> callvirt]
+ 31, // state 191 [ldarg.0 -> ldfld]
+ 29, // state 192 [ldarg.1 -> ldfld]
+ 22, // state 193 [ldarg.2 -> ldfld]
+ 321, // state 194 [ldarg.3 -> ldfld]
+ 46, // state 195 [ldarga.s -> ldfld]
+ 8, // state 196 [ldloca.s -> ldfld]
+ 19, // state 197 [ldarga.s.normed -> ldfld]
+ -35, // state 198 [ldloca.s.normed -> ldfld]
+ 20, // state 199 [stloc.0 -> ldloc.0]
+ -7, // state 200 [stloc.1 -> ldloc.1]
+ -10, // state 201 [stloc.2 -> ldloc.2]
+ -4, // state 202 [stloc.3 -> ldloc.3]
+ DEFAULT_WEIGHT_VALUE, // state 203 [ldc.r4 -> add]
+ DEFAULT_WEIGHT_VALUE, // state 204 [ldc.r4 -> sub]
+ DEFAULT_WEIGHT_VALUE, // state 205 [ldc.r4 -> mul]
+ DEFAULT_WEIGHT_VALUE, // state 206 [ldc.r4 -> div]
+ 52, // state 207 [ldc.r8 -> add]
+ DEFAULT_WEIGHT_VALUE, // state 208 [ldc.r8 -> sub]
+ -169, // state 209 [ldc.r8 -> mul]
+ -17, // state 210 [ldc.r8 -> div]
+ DEFAULT_WEIGHT_VALUE, // state 211 [conv.r4 -> add]
+ DEFAULT_WEIGHT_VALUE, // state 212 [conv.r4 -> sub]
+ DEFAULT_WEIGHT_VALUE, // state 213 [conv.r4 -> mul]
+ DEFAULT_WEIGHT_VALUE, // state 214 [conv.r4 -> div]
+ 358, // state 215 [conv.r8 -> mul]
+ DEFAULT_WEIGHT_VALUE, // state 216 [conv.r8 -> div]
+ NA, // state 217
+ 32, // state 218 [ldarg.0 -> ldc.i4.0 -> stfld]
+ NA, // state 219
+ DEFAULT_WEIGHT_VALUE, // state 220 [ldarg.0 -> ldc.r4 -> stfld]
+ NA, // state 221
+ 38, // state 222 [ldarg.0 -> ldc.r8 -> stfld]
+ NA, // state 223
+ NA, // state 224
+ 64, // state 225 [ldarg.0 -> ldarg.1 -> ldfld -> stfld]
+ 69, // state 226 [ldarg.0 -> ldarg.1 -> stfld]
+ NA, // state 227
+ 98, // state 228 [ldarg.0 -> ldarg.2 -> stfld]
+ NA, // state 229
+ 97, // state 230 [ldarg.0 -> ldarg.3 -> stfld]
+ NA, // state 231
+ NA, // state 232
+ NA, // state 233
+ NA, // state 234
+ 34, // state 235 [ldarg.0 -> dup -> ldfld -> ldarg.1 -> add -> stfld]
+ NA, // state 236
+ -10, // state 237 [ldarg.0 -> dup -> ldfld -> ldarg.1 -> sub -> stfld]
+ NA, // state 238
+ DEFAULT_WEIGHT_VALUE, // state 239 [ldarg.0 -> dup -> ldfld -> ldarg.1 -> mul -> stfld]
+ NA, // state 240
+ DEFAULT_WEIGHT_VALUE, // state 241 [ldarg.0 -> dup -> ldfld -> ldarg.1 -> div -> stfld]
+ NA, // state 242
+ NA, // state 243
+ DEFAULT_WEIGHT_VALUE, // state 244 [ldarg.0 -> ldfld -> ldarg.1 -> ldfld -> add]
+ DEFAULT_WEIGHT_VALUE, // state 245 [ldarg.0 -> ldfld -> ldarg.1 -> ldfld -> sub]
+ NA, // state 246
+ NA, // state 247
+ DEFAULT_WEIGHT_VALUE, // state 248 [ldarga.s -> ldfld -> ldarga.s -> ldfld -> add]
+ DEFAULT_WEIGHT_VALUE, // state 249 [ldarga.s -> ldfld -> ldarga.s -> ldfld -> sub]
+};
+
+static_assert_no_msg(NUM_SM_STATES == sizeof(g_StateWeights) / sizeof(g_StateWeights[0]));
+
+const short* gp_StateWeights = g_StateWeights;
diff --git a/src/jit/ssabuilder.cpp b/src/jit/ssabuilder.cpp
new file mode 100644
index 0000000000..2da6902464
--- /dev/null
+++ b/src/jit/ssabuilder.cpp
@@ -0,0 +1,1903 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+// ==++==
+//
+
+//
+
+//
+// ==--==
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX SSA XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#include "jitpch.h"
+#include "ssaconfig.h"
+#include "ssarenamestate.h"
+#include "ssabuilder.h"
+
+namespace
+{
+/**
+ * Visits basic blocks in the depth first order and arranges them in the order of
+ * their DFS finish time.
+ *
+ * @param block The fgFirstBB or entry block.
+ * @param comp A pointer to compiler.
+ * @param visited In pointer initialized to false and of size at least fgMaxBBNum.
+ * @param count Out pointer for count of all nodes reachable by DFS.
+ * @param postOrder Out poitner to arrange the blocks and of size at least fgMaxBBNum.
+ */
+static void TopologicalSortHelper(BasicBlock* block, Compiler* comp, bool* visited, int* count, BasicBlock** postOrder)
+{
+ visited[block->bbNum] = true;
+
+ ArrayStack<BasicBlock*> blocks(comp);
+ ArrayStack<AllSuccessorIter> iterators(comp);
+ ArrayStack<AllSuccessorIter> ends(comp);
+
+ // there are three stacks used here and all should be same height
+ // the first is for blocks
+ // the second is the iterator to keep track of what succ of the block we are looking at
+ // and the third is the end marker iterator
+ blocks.Push(block);
+ iterators.Push(block->GetAllSuccs(comp).begin());
+ ends.Push(block->GetAllSuccs(comp).end());
+
+ while (blocks.Height() > 0)
+ {
+ block = blocks.Top();
+
+#ifdef DEBUG
+ if (comp->verboseSsa)
+ {
+ printf("[SsaBuilder::TopologicalSortHelper] Visiting BB%02u: ", block->bbNum);
+ printf("[");
+ unsigned numSucc = block->NumSucc(comp);
+ for (unsigned i = 0; i < numSucc; ++i)
+ {
+ printf("BB%02u, ", block->GetSucc(i, comp)->bbNum);
+ }
+ EHSuccessorIter end = block->GetEHSuccs(comp).end();
+ for (EHSuccessorIter ehsi = block->GetEHSuccs(comp).begin(); ehsi != end; ++ehsi)
+ {
+ printf("[EH]BB%02u, ", (*ehsi)->bbNum);
+ }
+ printf("]\n");
+ }
+#endif
+
+ if (iterators.TopRef() != ends.TopRef())
+ {
+ // if the block on TOS still has unreached successors, visit them
+ AllSuccessorIter& iter = iterators.TopRef();
+ BasicBlock* succ = *iter;
+ ++iter;
+ // push the child
+
+ if (!visited[succ->bbNum])
+ {
+ blocks.Push(succ);
+ iterators.Push(succ->GetAllSuccs(comp).begin());
+ ends.Push(succ->GetAllSuccs(comp).end());
+ visited[succ->bbNum] = true;
+ }
+ }
+ else
+ {
+ // all successors have been visited
+ blocks.Pop();
+ iterators.Pop();
+ ends.Pop();
+
+ postOrder[*count] = block;
+ block->bbPostOrderNum = *count;
+ *count += 1;
+
+ DBG_SSA_JITDUMP("postOrder[%d] = [%p] and BB%02u\n", *count, dspPtr(block), block->bbNum);
+ }
+ }
+}
+
+/**
+ * Method that finds a common IDom parent, much like least common ancestor.
+ *
+ * @param finger1 A basic block that might share IDom ancestor with finger2.
+ * @param finger2 A basic block that might share IDom ancestor with finger1.
+ *
+ * @see "A simple, fast dominance algorithm" by Keith D. Cooper, Timothy J. Harvey, Ken Kennedy.
+ *
+ * @return A basic block whose IDom is the dominator for finger1 and finger2,
+ * or else NULL. This may be called while immediate dominators are being
+ * computed, and if the input values are members of the same loop (each reachable from the other),
+ * then one may not yet have its immediate dominator computed when we are attempting
+ * to find the immediate dominator of the other. So a NULL return value means that the
+ * the two inputs are in a cycle, not that they don't have a common dominator ancestor.
+ */
+static inline BasicBlock* IntersectDom(BasicBlock* finger1, BasicBlock* finger2)
+{
+ while (finger1 != finger2)
+ {
+ if (finger1 == nullptr || finger2 == nullptr)
+ {
+ return nullptr;
+ }
+ while (finger1 != nullptr && finger1->bbPostOrderNum < finger2->bbPostOrderNum)
+ {
+ finger1 = finger1->bbIDom;
+ }
+ if (finger1 == nullptr)
+ {
+ return nullptr;
+ }
+ while (finger2 != nullptr && finger2->bbPostOrderNum < finger1->bbPostOrderNum)
+ {
+ finger2 = finger2->bbIDom;
+ }
+ }
+ return finger1;
+}
+
+} // end of anonymous namespace.
+
+// =================================================================================
+// SSA
+// =================================================================================
+
+void Compiler::fgSsaBuild()
+{
+ IAllocator* pIAllocator = new (this, CMK_SSA) CompAllocator(this, CMK_SSA);
+
+ // If this is not the first invocation, reset data structures for SSA.
+ if (fgSsaPassesCompleted > 0)
+ {
+ fgResetForSsa();
+ }
+
+ SsaBuilder builder(this, pIAllocator);
+ builder.Build();
+ fgSsaPassesCompleted++;
+#ifdef DEBUG
+ JitTestCheckSSA();
+#endif // DEBUG
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ JITDUMP("\nAfter fgSsaBuild:\n");
+ fgDispBasicBlocks(/*dumpTrees*/ true);
+ }
+#endif // DEBUG
+}
+
+void Compiler::fgResetForSsa()
+{
+ for (unsigned i = 0; i < lvaCount; ++i)
+ {
+ lvaTable[i].lvPerSsaData.Reset();
+ }
+ for (BasicBlock* blk = fgFirstBB; blk != nullptr; blk = blk->bbNext)
+ {
+ // Eliminate phis.
+ blk->bbHeapSsaPhiFunc = nullptr;
+ if (blk->bbTreeList != nullptr)
+ {
+ GenTreePtr last = blk->bbTreeList->gtPrev;
+ blk->bbTreeList = blk->FirstNonPhiDef();
+ if (blk->bbTreeList != nullptr)
+ {
+ blk->bbTreeList->gtPrev = last;
+ }
+ }
+ }
+}
+
+/**
+ * Constructor for the SSA builder.
+ *
+ * @param pCompiler Current compiler instance.
+ *
+ * @remarks Initializes the class and member pointers/objects that use constructors.
+ */
+SsaBuilder::SsaBuilder(Compiler* pCompiler, IAllocator* pIAllocator)
+ : m_pCompiler(pCompiler)
+ , m_allocator(pIAllocator)
+
+#ifdef SSA_FEATURE_DOMARR
+ , m_pDomPreOrder(NULL)
+ , m_pDomPostOrder(NULL)
+#endif
+#ifdef SSA_FEATURE_USEDEF
+ , m_uses(jitstd::allocator<void>(pIAllocator))
+ , m_defs(jitstd::allocator<void>(pIAllocator))
+#endif
+{
+}
+
+/**
+ * Topologically sort the graph and return the number of nodes visited.
+ *
+ * @param postOrder The array in which the arranged basic blocks have to be returned.
+ * @param count The size of the postOrder array.
+ *
+ * @return The number of nodes visited while performing DFS on the graph.
+ */
+int SsaBuilder::TopologicalSort(BasicBlock** postOrder, int count)
+{
+ // Allocate and initialize visited flags.
+ bool* visited = (bool*)alloca(count * sizeof(bool));
+ memset(visited, 0, count * sizeof(bool));
+
+ // Display basic blocks.
+ DBEXEC(VERBOSE, m_pCompiler->fgDispBasicBlocks());
+ DBEXEC(VERBOSE, m_pCompiler->fgDispHandlerTab());
+
+ // Call the recursive helper.
+ int postIndex = 0;
+ TopologicalSortHelper(m_pCompiler->fgFirstBB, m_pCompiler, visited, &postIndex, postOrder);
+
+ // In the absence of EH (because catch/finally have no preds), this should be valid.
+ // assert(postIndex == (count - 1));
+
+ return postIndex;
+}
+
+/**
+ * Computes the immediate dominator IDom for each block iteratively.
+ *
+ * @param postOrder The array of basic blocks arranged in postOrder.
+ * @param count The size of valid elements in the postOrder array.
+ *
+ * @see "A simple, fast dominance algorithm." paper.
+ */
+void SsaBuilder::ComputeImmediateDom(BasicBlock** postOrder, int count)
+{
+ JITDUMP("[SsaBuilder::ComputeImmediateDom]\n");
+
+ // TODO-Cleanup: We currently have two dominance computations happening. We should unify them; for
+ // now, at least forget the results of the first.
+ for (BasicBlock* blk = m_pCompiler->fgFirstBB; blk != nullptr; blk = blk->bbNext)
+ {
+ blk->bbIDom = nullptr;
+ }
+
+ // Add entry point to processed as its IDom is NULL.
+ BitVecTraits traits(m_pCompiler->fgBBNumMax + 1, m_pCompiler);
+ BitVec BITVEC_INIT_NOCOPY(processed, BitVecOps::MakeEmpty(&traits));
+
+ BitVecOps::AddElemD(&traits, processed, m_pCompiler->fgFirstBB->bbNum);
+ assert(postOrder[count - 1] == m_pCompiler->fgFirstBB);
+
+ bool changed = true;
+ while (changed)
+ {
+ changed = false;
+
+ // In reverse post order, except for the entry block (count - 1 is entry BB).
+ for (int i = count - 2; i >= 0; --i)
+ {
+ BasicBlock* block = postOrder[i];
+
+ DBG_SSA_JITDUMP("Visiting in reverse post order: BB%02u.\n", block->bbNum);
+
+ // Find the first processed predecessor block.
+ BasicBlock* predBlock = nullptr;
+ for (flowList* pred = m_pCompiler->BlockPredsWithEH(block); pred; pred = pred->flNext)
+ {
+ if (BitVecOps::IsMember(&traits, processed, pred->flBlock->bbNum))
+ {
+ predBlock = pred->flBlock;
+ break;
+ }
+ }
+
+ // There could just be a single basic block, so just check if there were any preds.
+ if (predBlock != nullptr)
+ {
+ DBG_SSA_JITDUMP("Pred block is BB%02u.\n", predBlock->bbNum);
+ }
+
+ // Intersect DOM, if computed, for all predecessors.
+ BasicBlock* bbIDom = predBlock;
+ for (flowList* pred = m_pCompiler->BlockPredsWithEH(block); pred; pred = pred->flNext)
+ {
+ if (predBlock != pred->flBlock)
+ {
+ BasicBlock* domAncestor = IntersectDom(pred->flBlock, bbIDom);
+ // The result may be NULL if "block" and "pred->flBlock" are part of a
+ // cycle -- neither is guaranteed ordered wrt the other in reverse postorder,
+ // so we may be computing the IDom of "block" before the IDom of "pred->flBlock" has
+ // been computed. But that's OK -- if they're in a cycle, they share the same immediate
+ // dominator, so the contribution of "pred->flBlock" is not necessary to compute
+ // the result.
+ if (domAncestor != nullptr)
+ {
+ bbIDom = domAncestor;
+ }
+ }
+ }
+
+ // Did we change the bbIDom value? If so, we go around the outer loop again.
+ if (block->bbIDom != bbIDom)
+ {
+ changed = true;
+
+ // IDom has changed, update it.
+ DBG_SSA_JITDUMP("bbIDom of BB%02u becomes BB%02u.\n", block->bbNum, bbIDom ? bbIDom->bbNum : 0);
+ block->bbIDom = bbIDom;
+ }
+
+ // Mark the current block as processed.
+ BitVecOps::AddElemD(&traits, processed, block->bbNum);
+
+ DBG_SSA_JITDUMP("Marking block BB%02u as processed.\n", block->bbNum);
+ }
+ }
+}
+
+#ifdef SSA_FEATURE_DOMARR
+/**
+ * Walk the DOM tree and compute pre and post-order arrangement of the tree.
+ *
+ * @param curBlock The current block being operated on at some recursive level.
+ * @param domTree The DOM tree as a map (block -> set of child blocks.)
+ * @param preIndex The initial index given to the first block visited in pre order.
+ * @param postIndex The initial index given to the first block visited in post order.
+ *
+ * @remarks This would help us answer queries such as "a dom b?" in constant time.
+ * For example, if a dominated b, then Pre[a] < Pre[b] but Post[a] > Post[b]
+ */
+void SsaBuilder::DomTreeWalk(BasicBlock* curBlock, BlkToBlkSetMap* domTree, int* preIndex, int* postIndex)
+{
+ JITDUMP("[SsaBuilder::DomTreeWalk] block [%p], BB%02u:\n", dspPtr(curBlock), curBlock->bbNum);
+
+ // Store the order number at the block number in the pre order list.
+ m_pDomPreOrder[curBlock->bbNum] = *preIndex;
+ ++(*preIndex);
+
+ BlkSet* pBlkSet;
+ if (domTree->Lookup(curBlock, &pBlkSet))
+ {
+ for (BlkSet::KeyIterator ki = pBlkSet->Begin(); !ki.Equal(pBlkSet->End()); ++ki)
+ {
+ if (curBlock != ki.Get())
+ {
+ DomTreeWalk(ki.Get(), domTree, preIndex, postIndex);
+ }
+ }
+ }
+
+ // Store the order number at the block number in the post order list.
+ m_pDomPostOrder[curBlock->bbNum] = *postIndex;
+ ++(*postIndex);
+}
+#endif
+
+/**
+ * Using IDom of each basic block, add a mapping from block->IDom -> block.
+ * @param pCompiler Compiler instance
+ * @param block The basic block that will become the child node of it's iDom.
+ * @param domTree The output domTree which will hold the mapping "block->bbIDom" -> "block"
+ *
+ */
+/* static */
+void SsaBuilder::ConstructDomTreeForBlock(Compiler* pCompiler, BasicBlock* block, BlkToBlkSetMap* domTree)
+{
+ BasicBlock* bbIDom = block->bbIDom;
+
+ // bbIDom for (only) fgFirstBB will be NULL.
+ if (bbIDom == nullptr)
+ {
+ return;
+ }
+
+ // If the bbIDom map key doesn't exist, create one.
+ BlkSet* pBlkSet;
+ if (!domTree->Lookup(bbIDom, &pBlkSet))
+ {
+ pBlkSet = new (pCompiler->getAllocator()) BlkSet(pCompiler->getAllocator());
+ domTree->Set(bbIDom, pBlkSet);
+ }
+
+ DBG_SSA_JITDUMP("Inserting BB%02u as dom child of BB%02u.\n", block->bbNum, bbIDom->bbNum);
+ // Insert the block into the block's set.
+ pBlkSet->Set(block, true);
+}
+
+/**
+ * Using IDom of each basic block, compute the whole tree. If a block "b" has IDom "i",
+ * then, block "b" is dominated by "i". The mapping then is i -> { ..., b, ... }, in
+ * other words, "domTree" is a tree represented by nodes mapped to their children.
+ *
+ * @param pCompiler Compiler instance
+ * @param domTree The output domTree which will hold the mapping "block->bbIDom" -> "block"
+ *
+ */
+/* static */
+void SsaBuilder::ComputeDominators(Compiler* pCompiler, BlkToBlkSetMap* domTree)
+{
+ JITDUMP("*************** In SsaBuilder::ComputeDominators(Compiler*, ...)\n");
+
+ // Construct the DOM tree from bbIDom
+ for (BasicBlock* block = pCompiler->fgFirstBB; block != nullptr; block = block->bbNext)
+ {
+ ConstructDomTreeForBlock(pCompiler, block, domTree);
+ }
+
+ DBEXEC(pCompiler->verboseSsa, DisplayDominators(domTree));
+}
+
+/**
+ * Compute the DOM tree into a map(block -> set of blocks) adjacency representation.
+ *
+ * Using IDom of each basic block, compute the whole tree. If a block "b" has IDom "i",
+ * then, block "b" is dominated by "i". The mapping then is i -> { ..., b, ... }
+ *
+ * @param postOrder The array of basic blocks arranged in postOrder.
+ * @param count The size of valid elements in the postOrder array.
+ * @param domTree A map of (block -> set of blocks) tree representation that is empty.
+ *
+ */
+void SsaBuilder::ComputeDominators(BasicBlock** postOrder, int count, BlkToBlkSetMap* domTree)
+{
+ JITDUMP("*************** In SsaBuilder::ComputeDominators(BasicBlock** postOrder, int count, ...)\n");
+
+ // Construct the DOM tree from bbIDom
+ for (int i = 0; i < count; ++i)
+ {
+ ConstructDomTreeForBlock(m_pCompiler, postOrder[i], domTree);
+ }
+
+ DBEXEC(m_pCompiler->verboseSsa, DisplayDominators(domTree));
+
+#ifdef SSA_FEATURE_DOMARR
+ // Allocate space for constant time computation of (a DOM b?) query.
+ unsigned bbArrSize = m_pCompiler->fgBBNumMax + 1; // We will use 1-based bbNums as indices into these arrays, so
+ // add 1.
+ m_pDomPreOrder = jitstd::utility::allocate<int>(m_allocator, bbArrSize);
+ m_pDomPostOrder = jitstd::utility::allocate<int>(m_allocator, bbArrSize);
+
+ // Initial counters.
+ int preIndex = 0;
+ int postIndex = 0;
+
+ // Populate the pre and post order of the tree.
+ DomTreeWalk(m_pCompiler->fgFirstBB, domTree, &preIndex, &postIndex);
+#endif
+}
+
+#ifdef DEBUG
+
+/**
+ * Display the DOM tree.
+ *
+ * @param domTree A map of (block -> set of blocks) tree representation.
+ */
+/* static */
+void SsaBuilder::DisplayDominators(BlkToBlkSetMap* domTree)
+{
+ printf("After computing dominator tree: \n");
+ for (BlkToBlkSetMap::KeyIterator nodes = domTree->Begin(); !nodes.Equal(domTree->End()); ++nodes)
+ {
+ printf("BB%02u := {", nodes.Get()->bbNum);
+
+ BlkSet* pBlkSet = nodes.GetValue();
+ for (BlkSet::KeyIterator ki = pBlkSet->Begin(); !ki.Equal(pBlkSet->End()); ++ki)
+ {
+ if (!ki.Equal(pBlkSet->Begin()))
+ {
+ printf(",");
+ }
+ printf("BB%02u", ki.Get()->bbNum);
+ }
+ printf("}\n");
+ }
+}
+
+#endif // DEBUG
+
+// (Spec comment at declaration.)
+// See "A simple, fast dominance algorithm", by Cooper, Harvey, and Kennedy.
+// First we compute the dominance frontier for each block, then we convert these to iterated
+// dominance frontiers by a closure operation.
+BlkToBlkSetMap* SsaBuilder::ComputeIteratedDominanceFrontier(BasicBlock** postOrder, int count)
+{
+ BlkToBlkSetMap* frontier = new (m_pCompiler->getAllocator()) BlkToBlkSetMap(m_pCompiler->getAllocator());
+
+ DBG_SSA_JITDUMP("Computing IDF: First computing DF.\n");
+
+ for (int i = 0; i < count; ++i)
+ {
+ BasicBlock* block = postOrder[i];
+
+ DBG_SSA_JITDUMP("Considering block BB%02u.\n", block->bbNum);
+
+ // Recall that B3 is in the dom frontier of B1 if there exists a B2
+ // such that B1 dom B2, !(B1 dom B3), and B3 is an immediate successor
+ // of B2. (Note that B1 might be the same block as B2.)
+ // In that definition, we're considering "block" to be B3, and trying
+ // to find B1's. To do so, first we consider the predecessors of "block",
+ // searching for candidate B2's -- "block" is obviously an immediate successor
+ // of its immediate predecessors. If there are zero or one preds, then there
+ // is no pred, or else the single pred dominates "block", so no B2 exists.
+
+ flowList* blockPreds = m_pCompiler->BlockPredsWithEH(block);
+
+ // If block has more 0/1 predecessor, skip.
+ if (blockPreds == nullptr || blockPreds->flNext == nullptr)
+ {
+ DBG_SSA_JITDUMP(" Has %d preds; skipping.\n", blockPreds == nullptr ? 0 : 1);
+ continue;
+ }
+
+ // Otherwise, there are > 1 preds. Each is a candidate B2 in the definition --
+ // *unless* it dominates "block"/B3.
+
+ for (flowList* pred = blockPreds; pred; pred = pred->flNext)
+ {
+ DBG_SSA_JITDUMP(" Considering predecessor BB%02u.\n", pred->flBlock->bbNum);
+
+ // If we've found a B2, then consider the possible B1's. We start with
+ // B2, since a block dominates itself, then traverse upwards in the dominator
+ // tree, stopping when we reach the root, or the immediate dominator of "block"/B3.
+ // (Note that we are guaranteed to encounter this immediate dominator of "block"/B3:
+ // a predecessor must be dominated by B3's immediate dominator.)
+ // Along this way, make "block"/B3 part of the dom frontier of the B1.
+ // When we reach this immediate dominator, the definition no longer applies, since this
+ // potential B1 *does* dominate "block"/B3, so we stop.
+ for (BasicBlock* b1 = pred->flBlock; (b1 != nullptr) && (b1 != block->bbIDom); // !root && !loop
+ b1 = b1->bbIDom)
+ {
+ DBG_SSA_JITDUMP(" Adding BB%02u to dom frontier of pred dom BB%02u.\n", block->bbNum, b1->bbNum);
+ BlkSet* pBlkSet;
+ if (!frontier->Lookup(b1, &pBlkSet))
+ {
+ pBlkSet = new (m_pCompiler->getAllocator()) BlkSet(m_pCompiler->getAllocator());
+ frontier->Set(b1, pBlkSet);
+ }
+ pBlkSet->Set(block, true);
+ }
+ }
+ }
+
+#ifdef DEBUG
+ if (m_pCompiler->verboseSsa)
+ {
+ printf("\nComputed DF:\n");
+ for (int i = 0; i < count; ++i)
+ {
+ BasicBlock* block = postOrder[i];
+ printf("Block BB%02u := {", block->bbNum);
+
+ bool first = true;
+ BlkSet* blkDf;
+ if (frontier->Lookup(block, &blkDf))
+ {
+ for (BlkSet::KeyIterator blkDfIter = blkDf->Begin(); !blkDfIter.Equal(blkDf->End()); blkDfIter++)
+ {
+ if (!first)
+ {
+ printf(",");
+ }
+ printf("BB%02u", blkDfIter.Get()->bbNum);
+ first = false;
+ }
+ }
+ printf("}\n");
+ }
+ }
+#endif
+
+ // Now do the closure operation to make the dominance frontier into an IDF.
+ // There's probably a better way to do this...
+ BlkToBlkSetMap* idf = new (m_pCompiler->getAllocator()) BlkToBlkSetMap(m_pCompiler->getAllocator());
+ for (BlkToBlkSetMap::KeyIterator kiFrontBlks = frontier->Begin(); !kiFrontBlks.Equal(frontier->End());
+ kiFrontBlks++)
+ {
+ // Create IDF(b)
+ BlkSet* blkIdf = new (m_pCompiler->getAllocator()) BlkSet(m_pCompiler->getAllocator());
+ idf->Set(kiFrontBlks.Get(), blkIdf);
+
+ // Keep track of what got newly added to the IDF, so we can go after their DFs.
+ BlkSet* delta = new (m_pCompiler->getAllocator()) BlkSet(m_pCompiler->getAllocator());
+ delta->Set(kiFrontBlks.Get(), true);
+
+ // Now transitively add DF+(delta) to IDF(b), each step gathering new "delta."
+ while (delta->GetCount() > 0)
+ {
+ // Extract a block x to be worked on.
+ BlkSet::KeyIterator ki = delta->Begin();
+ BasicBlock* curBlk = ki.Get();
+ // TODO-Cleanup: Remove(ki) doesn't work correctly in SimplerHash.
+ delta->Remove(curBlk);
+
+ // Get DF(x).
+ BlkSet* blkDf;
+ if (frontier->Lookup(curBlk, &blkDf))
+ {
+ // Add DF(x) to IDF(b) and update "delta" i.e., new additions to IDF(b).
+ for (BlkSet::KeyIterator ki = blkDf->Begin(); !ki.Equal(blkDf->End()); ki++)
+ {
+ if (!blkIdf->Lookup(ki.Get()))
+ {
+ delta->Set(ki.Get(), true);
+ blkIdf->Set(ki.Get(), true);
+ }
+ }
+ }
+ }
+ }
+
+#ifdef DEBUG
+ if (m_pCompiler->verboseSsa)
+ {
+ printf("\nComputed IDF:\n");
+ for (int i = 0; i < count; ++i)
+ {
+ BasicBlock* block = postOrder[i];
+ printf("Block BB%02u := {", block->bbNum);
+
+ bool first = true;
+ BlkSet* blkIdf;
+ if (idf->Lookup(block, &blkIdf))
+ {
+ for (BlkSet::KeyIterator ki = blkIdf->Begin(); !ki.Equal(blkIdf->End()); ki++)
+ {
+ if (!first)
+ {
+ printf(",");
+ }
+ printf("BB%02u", ki.Get()->bbNum);
+ first = false;
+ }
+ }
+ printf("}\n");
+ }
+ }
+#endif
+
+ return idf;
+}
+
+/**
+ * Returns the phi GT_PHI node if the variable already has a phi node.
+ *
+ * @param block The block for which the existence of a phi node needs to be checked.
+ * @param lclNum The lclNum for which the occurrence of a phi node needs to be checked.
+ *
+ * @return If there is a phi node for the lclNum, returns the GT_PHI tree, else NULL.
+ */
+static GenTree* GetPhiNode(BasicBlock* block, unsigned lclNum)
+{
+ // Walk the statements for phi nodes.
+ for (GenTreePtr stmt = block->bbTreeList; stmt; stmt = stmt->gtNext)
+ {
+ // A prefix of the statements of the block are phi definition nodes. If we complete processing
+ // that prefix, exit.
+ if (!stmt->IsPhiDefnStmt())
+ {
+ break;
+ }
+
+ GenTreePtr tree = stmt->gtStmt.gtStmtExpr;
+
+ GenTreePtr phiLhs = tree->gtOp.gtOp1;
+ assert(phiLhs->OperGet() == GT_LCL_VAR);
+ if (phiLhs->gtLclVarCommon.gtLclNum == lclNum)
+ {
+ return tree->gtOp.gtOp2;
+ }
+ }
+ return nullptr;
+}
+
+/**
+ * Inserts phi functions at DF(b) for variables v that are live after the phi
+ * insertion point i.e., v in live-in(b).
+ *
+ * To do so, the function computes liveness, dominance frontier and inserts a phi node,
+ * if we have var v in def(b) and live-in(l) and l is in DF(b).
+ *
+ * @param postOrder The array of basic blocks arranged in postOrder.
+ * @param count The size of valid elements in the postOrder array.
+ */
+void SsaBuilder::InsertPhiFunctions(BasicBlock** postOrder, int count)
+{
+ JITDUMP("*************** In SsaBuilder::InsertPhiFunctions()\n");
+
+ // Compute liveness on the graph.
+ m_pCompiler->fgLocalVarLiveness();
+ EndPhase(PHASE_BUILD_SSA_LIVENESS);
+
+ // Compute dominance frontier.
+ BlkToBlkSetMap* frontier = ComputeIteratedDominanceFrontier(postOrder, count);
+ EndPhase(PHASE_BUILD_SSA_IDF);
+
+ JITDUMP("Inserting phi functions:\n");
+
+ for (int i = 0; i < count; ++i)
+ {
+ BasicBlock* block = postOrder[i];
+ DBG_SSA_JITDUMP("Considering dominance frontier of block BB%02u:\n", block->bbNum);
+
+ // If the block's dominance frontier is empty, go on to the next block.
+ BlkSet* blkIdf;
+ if (!frontier->Lookup(block, &blkIdf))
+ {
+ continue;
+ }
+
+ // For each local var number "lclNum" that "block" assigns to...
+ VARSET_ITER_INIT(m_pCompiler, defVars, block->bbVarDef, varIndex);
+ while (defVars.NextElem(m_pCompiler, &varIndex))
+ {
+ unsigned lclNum = m_pCompiler->lvaTrackedToVarNum[varIndex];
+ DBG_SSA_JITDUMP(" Considering local var V%02u:\n", lclNum);
+
+ if (m_pCompiler->fgExcludeFromSsa(lclNum))
+ {
+ DBG_SSA_JITDUMP(" Skipping because it is excluded.\n");
+ continue;
+ }
+
+ // For each block "bbInDomFront" that is in the dominance frontier of "block"...
+ for (BlkSet::KeyIterator iterBlk = blkIdf->Begin(); !iterBlk.Equal(blkIdf->End()); ++iterBlk)
+ {
+ BasicBlock* bbInDomFront = iterBlk.Get();
+ DBG_SSA_JITDUMP(" Considering BB%02u in dom frontier of BB%02u:\n", bbInDomFront->bbNum,
+ block->bbNum);
+
+ // Check if variable "lclNum" is live in block "*iterBlk".
+ if (!VarSetOps::IsMember(m_pCompiler, bbInDomFront->bbLiveIn, varIndex))
+ {
+ continue;
+ }
+
+ // Check if we've already inserted a phi node.
+ if (GetPhiNode(bbInDomFront, lclNum) == nullptr)
+ {
+ // We have a variable i that is defined in block j and live at l, and l belongs to dom frontier of
+ // j. So insert a phi node at l.
+ JITDUMP("Inserting phi definition for V%02u at start of BB%02u.\n", lclNum, bbInDomFront->bbNum);
+
+ GenTreePtr phiLhs = m_pCompiler->gtNewLclvNode(lclNum, m_pCompiler->lvaTable[lclNum].TypeGet());
+
+ // Create 'phiRhs' as a GT_PHI node for 'lclNum', it will eventually hold a GT_LIST of GT_PHI_ARG
+ // nodes. However we have to construct this list so for now the gtOp1 of 'phiRhs' is a nullptr.
+ // It will get replaced with a GT_LIST of GT_PHI_ARG nodes in
+ // SsaBuilder::AssignPhiNodeRhsVariables() and in SsaBuilder::AddDefToHandlerPhis()
+
+ GenTreePtr phiRhs =
+ m_pCompiler->gtNewOperNode(GT_PHI, m_pCompiler->lvaTable[lclNum].TypeGet(), nullptr);
+
+ GenTreePtr phiAsg = m_pCompiler->gtNewAssignNode(phiLhs, phiRhs);
+
+ GenTreePtr stmt = m_pCompiler->fgInsertStmtAtBeg(bbInDomFront, phiAsg);
+ m_pCompiler->gtSetStmtInfo(stmt);
+ m_pCompiler->fgSetStmtSeq(stmt);
+ }
+ }
+ }
+
+ // Now make a similar phi definition if the block defines Heap.
+ if (block->bbHeapDef)
+ {
+ // For each block "bbInDomFront" that is in the dominance frontier of "block".
+ for (BlkSet::KeyIterator iterBlk = blkIdf->Begin(); !iterBlk.Equal(blkIdf->End()); ++iterBlk)
+ {
+ BasicBlock* bbInDomFront = iterBlk.Get();
+ DBG_SSA_JITDUMP(" Considering BB%02u in dom frontier of BB%02u for Heap phis:\n",
+ bbInDomFront->bbNum, block->bbNum);
+
+ // Check if Heap is live into block "*iterBlk".
+ if (!bbInDomFront->bbHeapLiveIn)
+ {
+ continue;
+ }
+
+ // Check if we've already inserted a phi node.
+ if (bbInDomFront->bbHeapSsaPhiFunc == nullptr)
+ {
+ // We have a variable i that is defined in block j and live at l, and l belongs to dom frontier of
+ // j. So insert a phi node at l.
+ JITDUMP("Inserting phi definition for Heap at start of BB%02u.\n", bbInDomFront->bbNum);
+ bbInDomFront->bbHeapSsaPhiFunc = BasicBlock::EmptyHeapPhiDef;
+ }
+ }
+ }
+ }
+ EndPhase(PHASE_BUILD_SSA_INSERT_PHIS);
+}
+
+#ifdef SSA_FEATURE_USEDEF
+/**
+ * Record a use point of a variable.
+ *
+ * The use point is just the tree that is a local variable use.
+ *
+ * @param tree Tree node where an SSA variable is used.
+ *
+ * @remarks The result is in the m_uses map :: [lclNum, ssaNum] -> tree.
+ */
+void SsaBuilder::AddUsePoint(GenTree* tree)
+{
+ assert(tree->IsLocal());
+ SsaVarName key(tree->gtLclVarCommon.gtLclNum, tree->gtLclVarCommon.gtSsaNum);
+ VarToUses::iterator iter = m_uses.find(key);
+ if (iter == m_uses.end())
+ {
+ iter = m_uses.insert(key, VarToUses::mapped_type(m_uses.get_allocator()));
+ }
+ (*iter).second.push_back(tree);
+}
+#endif // !SSA_FEATURE_USEDEF
+
+/**
+ * Record a def point of a variable.
+ *
+ * The def point is just the tree that is a local variable def.
+ *
+ * @param tree Tree node where an SSA variable is def'ed.
+ *
+ * @remarks The result is in the m_defs map :: [lclNum, ssaNum] -> tree.
+ */
+void SsaBuilder::AddDefPoint(GenTree* tree, BasicBlock* blk)
+{
+ Compiler::IndirectAssignmentAnnotation* pIndirAnnot;
+ // In the case of an "indirect assignment", where the LHS is IND of a byref to the local actually being assigned,
+ // we make the ASG tree the def point.
+ assert(tree->IsLocal() || IsIndirectAssign(tree, &pIndirAnnot));
+ unsigned lclNum;
+ unsigned defSsaNum;
+ if (tree->IsLocal())
+ {
+ lclNum = tree->gtLclVarCommon.gtLclNum;
+ defSsaNum = m_pCompiler->GetSsaNumForLocalVarDef(tree);
+ }
+ else
+ {
+ bool b = m_pCompiler->GetIndirAssignMap()->Lookup(tree, &pIndirAnnot);
+ assert(b);
+ lclNum = pIndirAnnot->m_lclNum;
+ defSsaNum = pIndirAnnot->m_defSsaNum;
+ }
+#ifdef DEBUG
+ // Record that there's a new SSA def.
+ m_pCompiler->lvaTable[lclNum].lvNumSsaNames++;
+#endif
+ // Record where the defn happens.
+ LclSsaVarDsc* ssaDef = m_pCompiler->lvaTable[lclNum].GetPerSsaData(defSsaNum);
+ ssaDef->m_defLoc.m_blk = blk;
+ ssaDef->m_defLoc.m_tree = tree;
+
+#ifdef SSA_FEATURE_USEDEF
+ SsaVarName key(lclNum, defSsaNum);
+ VarToDef::iterator iter = m_defs.find(key);
+ if (iter == m_defs.end())
+ {
+ iter = m_defs.insert(key, tree);
+ return;
+ }
+ // There can only be a single definition for an SSA var.
+ unreached();
+#endif
+}
+
+bool SsaBuilder::IsIndirectAssign(GenTreePtr tree, Compiler::IndirectAssignmentAnnotation** ppIndirAssign)
+{
+ return tree->OperGet() == GT_ASG && m_pCompiler->m_indirAssignMap != nullptr &&
+ m_pCompiler->GetIndirAssignMap()->Lookup(tree, ppIndirAssign);
+}
+
+/**
+ * Rename the local variable tree node.
+ *
+ * If the given tree node is a local variable, then for a def give a new count, if use,
+ * then give the count in the top of stack, i.e., current count (used for last def.)
+ *
+ * @param tree Tree node where an SSA variable is used or def'ed.
+ * @param pRenameState The incremental rename information stored during renaming process.
+ *
+ * @remarks This method has to maintain parity with TreePopStacks corresponding to pushes
+ * it makes for defs.
+ */
+void SsaBuilder::TreeRenameVariables(GenTree* tree, BasicBlock* block, SsaRenameState* pRenameState, bool isPhiDefn)
+{
+ // This is perhaps temporary -- maybe should be done elsewhere. Label GT_INDs on LHS of assignments, so we
+ // can skip these during (at least) value numbering.
+ if (tree->OperIsAssignment())
+ {
+ GenTreePtr lhs = tree->gtOp.gtOp1->gtEffectiveVal(/*commaOnly*/ true);
+ GenTreePtr trueLhs = lhs->gtEffectiveVal(/*commaOnly*/ true);
+ if (trueLhs->OperIsIndir())
+ {
+ trueLhs->gtFlags |= GTF_IND_ASG_LHS;
+ }
+ else if (trueLhs->OperGet() == GT_CLS_VAR)
+ {
+ trueLhs->gtFlags |= GTF_CLS_VAR_ASG_LHS;
+ }
+ }
+
+ // Figure out if "tree" may make a new heap state (if we care for this block).
+ if (!block->bbHeapHavoc)
+ {
+ if (tree->OperIsAssignment() || tree->OperIsBlkOp())
+ {
+ if (m_pCompiler->ehBlockHasExnFlowDsc(block))
+ {
+ GenTreeLclVarCommon* lclVarNode;
+ if (!tree->DefinesLocal(m_pCompiler, &lclVarNode))
+ {
+ // It *may* define the heap in a non-havoc way. Make a new SSA # -- associate with this node.
+ unsigned count = pRenameState->CountForHeapDef();
+ pRenameState->PushHeap(block, count);
+ m_pCompiler->GetHeapSsaMap()->Set(tree, count);
+#ifdef DEBUG
+ if (JitTls::GetCompiler()->verboseSsa)
+ {
+ printf("Node ");
+ Compiler::printTreeID(tree);
+ printf(" (in try block) may define heap; ssa # = %d.\n", count);
+ }
+#endif // DEBUG
+
+ // Now add this SSA # to all phis of the reachable catch blocks.
+ AddHeapDefToHandlerPhis(block, count);
+ }
+ }
+ }
+ }
+
+ Compiler::IndirectAssignmentAnnotation* pIndirAssign = nullptr;
+ if (!tree->IsLocal() && !IsIndirectAssign(tree, &pIndirAssign))
+ {
+ return;
+ }
+
+ if (pIndirAssign != nullptr)
+ {
+ unsigned lclNum = pIndirAssign->m_lclNum;
+ // Is this a variable we exclude from SSA?
+ if (m_pCompiler->fgExcludeFromSsa(lclNum))
+ {
+ pIndirAssign->m_defSsaNum = SsaConfig::RESERVED_SSA_NUM;
+ return;
+ }
+ // Otherwise...
+ if (!pIndirAssign->m_isEntire)
+ {
+ pIndirAssign->m_useSsaNum = pRenameState->CountForUse(lclNum);
+ }
+ unsigned count = pRenameState->CountForDef(lclNum);
+ pIndirAssign->m_defSsaNum = count;
+ pRenameState->Push(block, lclNum, count);
+ AddDefPoint(tree, block);
+ }
+ else
+ {
+ unsigned lclNum = tree->gtLclVarCommon.gtLclNum;
+ // Is this a variable we exclude from SSA?
+ if (m_pCompiler->fgExcludeFromSsa(lclNum))
+ {
+ tree->gtLclVarCommon.SetSsaNum(SsaConfig::RESERVED_SSA_NUM);
+ return;
+ }
+
+ if (tree->gtFlags & GTF_VAR_DEF)
+ {
+ if (tree->gtFlags & GTF_VAR_USEASG)
+ {
+ // This the "x" in something like "x op= y"; it is both a use (first), then a def.
+ // The def will define a new SSA name, and record that in "x". If we need the SSA
+ // name of the use, we record it in a map reserved for that purpose.
+ unsigned count = pRenameState->CountForUse(lclNum);
+ tree->gtLclVarCommon.SetSsaNum(count);
+#ifdef SSA_FEATURE_USEDEF
+ AddUsePoint(tree);
+#endif
+ }
+
+ // Give a count and increment.
+ unsigned count = pRenameState->CountForDef(lclNum);
+ if (tree->gtFlags & GTF_VAR_USEASG)
+ {
+ m_pCompiler->GetOpAsgnVarDefSsaNums()->Set(tree, count);
+ }
+ else
+ {
+ tree->gtLclVarCommon.SetSsaNum(count);
+ }
+ pRenameState->Push(block, lclNum, count);
+ AddDefPoint(tree, block);
+
+ // If necessary, add "lclNum/count" to the arg list of a phi def in any
+ // handlers for try blocks that "block" is within. (But only do this for "real" definitions,
+ // not phi definitions.)
+ if (!isPhiDefn)
+ {
+ AddDefToHandlerPhis(block, lclNum, count);
+ }
+ }
+ else if (!isPhiDefn) // Phi args already have ssa numbers.
+ {
+ // This case is obviated by the short-term "early-out" above...but it's in the right direction.
+ // Is it a promoted struct local?
+ if (m_pCompiler->lvaTable[lclNum].lvPromoted)
+ {
+ assert(tree->TypeGet() == TYP_STRUCT);
+ LclVarDsc* varDsc = &m_pCompiler->lvaTable[lclNum];
+ // If has only a single field var, treat this as a use of that field var.
+ // Otherwise, we don't give SSA names to uses of promoted struct vars.
+ if (varDsc->lvFieldCnt == 1)
+ {
+ lclNum = varDsc->lvFieldLclStart;
+ }
+ else
+ {
+ tree->gtLclVarCommon.SetSsaNum(SsaConfig::RESERVED_SSA_NUM);
+ return;
+ }
+ }
+ // Give the count as top of stack.
+ unsigned count = pRenameState->CountForUse(lclNum);
+ tree->gtLclVarCommon.SetSsaNum(count);
+#ifdef SSA_FEATURE_USEDEF
+ AddUsePoint(tree);
+#endif
+ }
+ }
+}
+
+void SsaBuilder::AddDefToHandlerPhis(BasicBlock* block, unsigned lclNum, unsigned count)
+{
+ assert(m_pCompiler->lvaTable[lclNum].lvTracked); // Precondition.
+ unsigned lclIndex = m_pCompiler->lvaTable[lclNum].lvVarIndex;
+
+ EHblkDsc* tryBlk = m_pCompiler->ehGetBlockExnFlowDsc(block);
+ if (tryBlk != nullptr)
+ {
+ DBG_SSA_JITDUMP(
+ "Definition of local V%02u/d:%d in block BB%02u has exn handler; adding as phi arg to handlers.\n", lclNum,
+ count, block->bbNum);
+ while (true)
+ {
+ BasicBlock* handler = tryBlk->ExFlowBlock();
+
+ // Is "lclNum" live on entry to the handler?
+ if (VarSetOps::IsMember(m_pCompiler, handler->bbLiveIn, lclIndex))
+ {
+#ifdef DEBUG
+ bool phiFound = false;
+#endif
+ // A prefix of blocks statements will be SSA definitions. Search those for "lclNum".
+ for (GenTreePtr stmt = handler->bbTreeList; stmt; stmt = stmt->gtNext)
+ {
+ // If the tree is not an SSA def, break out of the loop: we're done.
+ if (!stmt->IsPhiDefnStmt())
+ {
+ break;
+ }
+
+ GenTreePtr tree = stmt->gtStmt.gtStmtExpr;
+
+ assert(tree->IsPhiDefn());
+
+ if (tree->gtOp.gtOp1->gtLclVar.gtLclNum == lclNum)
+ {
+ // It's the definition for the right local. Add "count" to the RHS.
+ GenTreePtr phi = tree->gtOp.gtOp2;
+ GenTreeArgList* args = nullptr;
+ if (phi->gtOp.gtOp1 != nullptr)
+ {
+ args = phi->gtOp.gtOp1->AsArgList();
+ }
+#ifdef DEBUG
+ // Make sure it isn't already present: we should only add each definition once.
+ for (GenTreeArgList* curArgs = args; curArgs != nullptr; curArgs = curArgs->Rest())
+ {
+ GenTreePhiArg* phiArg = curArgs->Current()->AsPhiArg();
+ assert(phiArg->gtSsaNum != count);
+ }
+#endif
+ var_types typ = m_pCompiler->lvaTable[lclNum].TypeGet();
+ GenTreePhiArg* newPhiArg =
+ new (m_pCompiler, GT_PHI_ARG) GenTreePhiArg(typ, lclNum, count, block);
+
+ phi->gtOp.gtOp1 = new (m_pCompiler, GT_LIST) GenTreeArgList(newPhiArg, args);
+ m_pCompiler->gtSetStmtInfo(stmt);
+ m_pCompiler->fgSetStmtSeq(stmt);
+#ifdef DEBUG
+ phiFound = true;
+#endif
+ DBG_SSA_JITDUMP(" Added phi arg u:%d for V%02u to phi defn in handler block BB%02u.\n", count,
+ lclNum, handler->bbNum);
+ break;
+ }
+ }
+ assert(phiFound);
+ }
+
+ unsigned nextTryIndex = tryBlk->ebdEnclosingTryIndex;
+ if (nextTryIndex == EHblkDsc::NO_ENCLOSING_INDEX)
+ {
+ break;
+ }
+
+ tryBlk = m_pCompiler->ehGetDsc(nextTryIndex);
+ }
+ }
+}
+
+void SsaBuilder::AddHeapDefToHandlerPhis(BasicBlock* block, unsigned count)
+{
+ if (m_pCompiler->ehBlockHasExnFlowDsc(block))
+ {
+ // Don't do anything for a compiler-inserted BBJ_ALWAYS that is a "leave helper".
+ if (block->bbJumpKind == BBJ_ALWAYS && (block->bbFlags & BBF_INTERNAL) && (block->bbPrev->isBBCallAlwaysPair()))
+ {
+ return;
+ }
+
+ // Otherwise...
+ DBG_SSA_JITDUMP("Definition of Heap/d:%d in block BB%02u has exn handler; adding as phi arg to handlers.\n",
+ count, block->bbNum);
+ EHblkDsc* tryBlk = m_pCompiler->ehGetBlockExnFlowDsc(block);
+ while (true)
+ {
+ BasicBlock* handler = tryBlk->ExFlowBlock();
+
+ // Is Heap live on entry to the handler?
+ if (handler->bbHeapLiveIn)
+ {
+ assert(handler->bbHeapSsaPhiFunc != nullptr);
+
+ // Add "count" to the phi args of Heap.
+ if (handler->bbHeapSsaPhiFunc == BasicBlock::EmptyHeapPhiDef)
+ {
+ handler->bbHeapSsaPhiFunc = new (m_pCompiler) BasicBlock::HeapPhiArg(count);
+ }
+ else
+ {
+#ifdef DEBUG
+ BasicBlock::HeapPhiArg* curArg = handler->bbHeapSsaPhiFunc;
+ while (curArg != nullptr)
+ {
+ assert(curArg->GetSsaNum() != count);
+ curArg = curArg->m_nextArg;
+ }
+#endif // DEBUG
+ handler->bbHeapSsaPhiFunc =
+ new (m_pCompiler) BasicBlock::HeapPhiArg(count, handler->bbHeapSsaPhiFunc);
+ }
+
+ DBG_SSA_JITDUMP(" Added phi arg u:%d for Heap to phi defn in handler block BB%02u.\n", count,
+ handler->bbNum);
+ }
+ unsigned tryInd = tryBlk->ebdEnclosingTryIndex;
+ if (tryInd == EHblkDsc::NO_ENCLOSING_INDEX)
+ {
+ break;
+ }
+ tryBlk = m_pCompiler->ehGetDsc(tryInd);
+ }
+ }
+}
+
+/**
+ * Walk the block's tree in the evaluation order and give var definitions and uses their
+ * SSA names.
+ *
+ * @param block Block for which SSA variables have to be renamed.
+ * @param pRenameState The incremental rename information stored during renaming process.
+ *
+ */
+void SsaBuilder::BlockRenameVariables(BasicBlock* block, SsaRenameState* pRenameState)
+{
+ // Walk the statements of the block and rename the tree variables.
+
+ // First handle the incoming Heap state.
+
+ // Is there an Phi definition for heap at the start of this block?
+ if (block->bbHeapSsaPhiFunc != nullptr)
+ {
+ unsigned count = pRenameState->CountForHeapDef();
+ pRenameState->PushHeap(block, count);
+
+ DBG_SSA_JITDUMP("Ssa # for Heap phi on entry to BB%02u is %d.\n", block->bbNum, count);
+ }
+
+ // Record the "in" Ssa # for Heap.
+ block->bbHeapSsaNumIn = pRenameState->CountForHeapUse();
+
+ // We need to iterate over phi definitions, to give them SSA names, but we need
+ // to know which are which, so we don't add phi definitions to handler phi arg lists.
+ // Statements are phi defns until they aren't.
+ bool isPhiDefn = true;
+ GenTreePtr firstNonPhi = block->FirstNonPhiDef();
+ for (GenTreePtr stmt = block->bbTreeList; stmt; stmt = stmt->gtNext)
+ {
+ if (stmt == firstNonPhi)
+ {
+ isPhiDefn = false;
+ }
+
+ for (GenTreePtr tree = stmt->gtStmt.gtStmtList; tree; tree = tree->gtNext)
+ {
+ TreeRenameVariables(tree, block, pRenameState, isPhiDefn);
+ }
+ }
+
+ // Now handle the final heap state.
+
+ // If the block defines Heap, allocate an SSA variable for the final heap state in the block.
+ // (This may be redundant with the last SSA var explicitly created, but there's no harm in that.)
+ if (block->bbHeapDef)
+ {
+ unsigned count = pRenameState->CountForHeapDef();
+ pRenameState->PushHeap(block, count);
+ AddHeapDefToHandlerPhis(block, count);
+ }
+
+ // Record the "out" Ssa" # for Heap.
+ block->bbHeapSsaNumOut = pRenameState->CountForHeapUse();
+
+ DBG_SSA_JITDUMP("Ssa # for Heap on entry to BB%02u is %d; on exit is %d.\n", block->bbNum, block->bbHeapSsaNumIn,
+ block->bbHeapSsaNumOut);
+}
+
+/**
+ * Walk through the phi nodes of a given block and assign rhs variables to them.
+ *
+ * Also renumber the rhs variables from top of the stack.
+ *
+ * @param block Block for which phi nodes have to be assigned their rhs arguments.
+ * @param pRenameState The incremental rename information stored during renaming process.
+ *
+ */
+void SsaBuilder::AssignPhiNodeRhsVariables(BasicBlock* block, SsaRenameState* pRenameState)
+{
+ BasicBlock::AllSuccs allSuccs = block->GetAllSuccs(m_pCompiler);
+ AllSuccessorIter allSuccsEnd = allSuccs.end();
+ for (AllSuccessorIter allSuccsIter = allSuccs.begin(); allSuccsIter != allSuccsEnd; ++allSuccsIter)
+ {
+ BasicBlock* succ = (*allSuccsIter);
+ // Walk the statements for phi nodes.
+ for (GenTreePtr stmt = succ->bbTreeList; stmt != nullptr && stmt->IsPhiDefnStmt(); stmt = stmt->gtNext)
+ {
+ GenTreePtr tree = stmt->gtStmt.gtStmtExpr;
+ assert(tree->IsPhiDefn());
+
+ // Get the phi node from GT_ASG.
+ GenTreePtr phiNode = tree->gtOp.gtOp2;
+ assert(phiNode->gtOp.gtOp1 == nullptr || phiNode->gtOp.gtOp1->OperGet() == GT_LIST);
+
+ unsigned lclNum = tree->gtOp.gtOp1->gtLclVar.gtLclNum;
+ unsigned ssaNum = pRenameState->CountForUse(lclNum);
+ // Search the arglist for an existing definition for ssaNum.
+ // (Can we assert that its the head of the list? This should only happen when we add
+ // during renaming for a definition that occurs within a try, and then that's the last
+ // value of the var within that basic block.)
+ GenTreeArgList* argList = (phiNode->gtOp.gtOp1 == nullptr ? nullptr : phiNode->gtOp.gtOp1->AsArgList());
+ bool found = false;
+ while (argList != nullptr)
+ {
+ if (argList->Current()->AsLclVarCommon()->GetSsaNum() == ssaNum)
+ {
+ found = true;
+ break;
+ }
+ argList = argList->Rest();
+ }
+ if (!found)
+ {
+ GenTreePtr newPhiArg =
+ new (m_pCompiler, GT_PHI_ARG) GenTreePhiArg(tree->gtOp.gtOp1->TypeGet(), lclNum, ssaNum, block);
+ argList = (phiNode->gtOp.gtOp1 == nullptr ? nullptr : phiNode->gtOp.gtOp1->AsArgList());
+ phiNode->gtOp.gtOp1 = new (m_pCompiler, GT_LIST) GenTreeArgList(newPhiArg, argList);
+ DBG_SSA_JITDUMP(" Added phi arg u:%d for V%02u from BB%02u in BB%02u.\n", ssaNum, lclNum, block->bbNum,
+ succ->bbNum);
+ }
+
+ m_pCompiler->gtSetStmtInfo(stmt);
+ m_pCompiler->fgSetStmtSeq(stmt);
+ }
+
+ // Now handle Heap.
+ if (succ->bbHeapSsaPhiFunc != nullptr)
+ {
+ if (succ->bbHeapSsaPhiFunc == BasicBlock::EmptyHeapPhiDef)
+ {
+ succ->bbHeapSsaPhiFunc = new (m_pCompiler) BasicBlock::HeapPhiArg(block);
+ }
+ else
+ {
+ BasicBlock::HeapPhiArg* curArg = succ->bbHeapSsaPhiFunc;
+ bool found = false;
+ // This is a quadratic algorithm. We might need to consider some switch over to a hash table
+ // representation for the arguments of a phi node, to make this linear.
+ while (curArg != nullptr)
+ {
+ if (curArg->m_predBB == block)
+ {
+ found = true;
+ break;
+ }
+ curArg = curArg->m_nextArg;
+ }
+ if (!found)
+ {
+ succ->bbHeapSsaPhiFunc = new (m_pCompiler) BasicBlock::HeapPhiArg(block, succ->bbHeapSsaPhiFunc);
+ }
+ }
+ DBG_SSA_JITDUMP(" Added phi arg for Heap from BB%02u in BB%02u.\n", block->bbNum, succ->bbNum);
+ }
+
+ // If "succ" is the first block of a try block (and "block" is not also in that try block)
+ // then we must look at the vars that have phi defs in the corresponding handler;
+ // the current SSA name for such vars must be included as an argument to that phi.
+ if (m_pCompiler->bbIsTryBeg(succ))
+ {
+ assert(succ->hasTryIndex());
+ unsigned tryInd = succ->getTryIndex();
+
+ while (tryInd != EHblkDsc::NO_ENCLOSING_INDEX)
+ {
+ // Check if the predecessor "block" is within the same try block.
+ if (block->hasTryIndex())
+ {
+ for (unsigned blockTryInd = block->getTryIndex(); blockTryInd != EHblkDsc::NO_ENCLOSING_INDEX;
+ blockTryInd = m_pCompiler->ehGetEnclosingTryIndex(blockTryInd))
+ {
+ if (blockTryInd == tryInd)
+ {
+ // It is; don't execute the loop below.
+ tryInd = EHblkDsc::NO_ENCLOSING_INDEX;
+ break;
+ }
+ }
+
+ // The loop just above found that the predecessor "block" is within the same
+ // try block as "succ." So we don't need to process this try, or any
+ // further outer try blocks here, since they would also contain both "succ"
+ // and "block".
+ if (tryInd == EHblkDsc::NO_ENCLOSING_INDEX)
+ {
+ break;
+ }
+ }
+
+ EHblkDsc* succTry = m_pCompiler->ehGetDsc(tryInd);
+ // This is necessarily true on the first iteration, but not
+ // necessarily on the second and subsequent.
+ if (succTry->ebdTryBeg != succ)
+ {
+ break;
+ }
+
+ // succ is the first block of this try. Look at phi defs in the handler.
+ // For a filter, we consider the filter to be the "real" handler.
+ BasicBlock* handlerStart = succTry->ExFlowBlock();
+
+ for (GenTreePtr stmt = handlerStart->bbTreeList; stmt; stmt = stmt->gtNext)
+ {
+ GenTreePtr tree = stmt->gtStmt.gtStmtExpr;
+
+ // Check if the first n of the statements are phi nodes. If not, exit.
+ if (tree->OperGet() != GT_ASG || tree->gtOp.gtOp2 == nullptr ||
+ tree->gtOp.gtOp2->OperGet() != GT_PHI)
+ {
+ break;
+ }
+
+ // Get the phi node from GT_ASG.
+ GenTreePtr lclVar = tree->gtOp.gtOp1;
+ unsigned lclNum = lclVar->gtLclVar.gtLclNum;
+
+ // If the variable is live-out of "blk", and is therefore live on entry to the try-block-start
+ // "succ", then we make sure the current SSA name for the
+ // var is one of the args of the phi node. If not, go on.
+ LclVarDsc* lclVarDsc = &m_pCompiler->lvaTable[lclNum];
+ if (!lclVarDsc->lvTracked ||
+ !VarSetOps::IsMember(m_pCompiler, block->bbLiveOut, lclVarDsc->lvVarIndex))
+ {
+ continue;
+ }
+
+ GenTreePtr phiNode = tree->gtOp.gtOp2;
+ assert(phiNode->gtOp.gtOp1 == nullptr || phiNode->gtOp.gtOp1->OperGet() == GT_LIST);
+ GenTreeArgList* argList = reinterpret_cast<GenTreeArgList*>(phiNode->gtOp.gtOp1);
+
+ // What is the current SSAName from the predecessor for this local?
+ unsigned ssaNum = pRenameState->CountForUse(lclNum);
+
+ // See if this ssaNum is already an arg to the phi.
+ bool alreadyArg = false;
+ for (GenTreeArgList* curArgs = argList; curArgs != nullptr; curArgs = curArgs->Rest())
+ {
+ if (curArgs->Current()->gtPhiArg.gtSsaNum == ssaNum)
+ {
+ alreadyArg = true;
+ break;
+ }
+ }
+ if (!alreadyArg)
+ {
+ // Add the new argument.
+ GenTreePtr newPhiArg =
+ new (m_pCompiler, GT_PHI_ARG) GenTreePhiArg(lclVar->TypeGet(), lclNum, ssaNum, block);
+ phiNode->gtOp.gtOp1 = new (m_pCompiler, GT_LIST) GenTreeArgList(newPhiArg, argList);
+
+ DBG_SSA_JITDUMP(" Added phi arg u:%d for V%02u from BB%02u in BB%02u.\n", ssaNum, lclNum,
+ block->bbNum, handlerStart->bbNum);
+
+ m_pCompiler->gtSetStmtInfo(stmt);
+ m_pCompiler->fgSetStmtSeq(stmt);
+ }
+ }
+
+ // Now handle Heap.
+ if (handlerStart->bbHeapSsaPhiFunc != nullptr)
+ {
+ if (handlerStart->bbHeapSsaPhiFunc == BasicBlock::EmptyHeapPhiDef)
+ {
+ handlerStart->bbHeapSsaPhiFunc = new (m_pCompiler) BasicBlock::HeapPhiArg(block);
+ }
+ else
+ {
+#ifdef DEBUG
+ BasicBlock::HeapPhiArg* curArg = handlerStart->bbHeapSsaPhiFunc;
+ while (curArg != nullptr)
+ {
+ assert(curArg->m_predBB != block);
+ curArg = curArg->m_nextArg;
+ }
+#endif // DEBUG
+ handlerStart->bbHeapSsaPhiFunc =
+ new (m_pCompiler) BasicBlock::HeapPhiArg(block, handlerStart->bbHeapSsaPhiFunc);
+ }
+ DBG_SSA_JITDUMP(" Added phi arg for Heap from BB%02u in BB%02u.\n", block->bbNum,
+ handlerStart->bbNum);
+ }
+
+ tryInd = succTry->ebdEnclosingTryIndex;
+ }
+ }
+ }
+}
+
+/**
+ * Walk the block's tree in the evaluation order and reclaim rename stack for var definitions.
+ *
+ * @param block Block for which SSA variables have to be renamed.
+ * @param pRenameState The incremental rename information stored during renaming process.
+ *
+ */
+void SsaBuilder::BlockPopStacks(BasicBlock* block, SsaRenameState* pRenameState)
+{
+ // Pop the names given to the non-phi nodes.
+ pRenameState->PopBlockStacks(block);
+
+ // And for Heap.
+ pRenameState->PopBlockHeapStack(block);
+}
+
+/**
+ * Perform variable renaming.
+ *
+ * Walks the blocks and renames all var defs with ssa numbers and all uses with the
+ * current count that is in the top of the stack. Assigns phi node rhs variables
+ * (i.e., the arguments to the phi.) Then, calls the function recursively on child
+ * nodes in the DOM tree to continue the renaming process.
+ *
+ * @param block Block for which SSA variables have to be renamed.
+ * @param pRenameState The incremental rename information stored during renaming process.
+ *
+ * @remarks At the end of the method, m_uses and m_defs should be populated linking the
+ * uses and defs.
+ *
+ * @see Briggs, Cooper, Harvey and Simpson "Practical Improvements to the Construction
+ * and Destruction of Static Single Assignment Form."
+ */
+
+void SsaBuilder::RenameVariables(BlkToBlkSetMap* domTree, SsaRenameState* pRenameState)
+{
+ JITDUMP("*************** In SsaBuilder::RenameVariables()\n");
+
+ // The first thing we do is treat parameters and must-init variables as if they have a
+ // virtual definition before entry -- they start out at SSA name 1.
+ for (unsigned i = 0; i < m_pCompiler->lvaCount; i++)
+ {
+ LclVarDsc* varDsc = &m_pCompiler->lvaTable[i];
+
+#ifdef DEBUG
+ varDsc->lvNumSsaNames = SsaConfig::UNINIT_SSA_NUM; // Start off fresh...
+#endif
+
+ if (varDsc->lvIsParam || m_pCompiler->info.compInitMem || varDsc->lvMustInit ||
+ (varDsc->lvTracked &&
+ VarSetOps::IsMember(m_pCompiler, m_pCompiler->fgFirstBB->bbLiveIn, varDsc->lvVarIndex)))
+ {
+ unsigned count = pRenameState->CountForDef(i);
+
+ // In ValueNum we'd assume un-inited variables get FIRST_SSA_NUM.
+ assert(count == SsaConfig::FIRST_SSA_NUM);
+#ifdef DEBUG
+ varDsc->lvNumSsaNames++;
+#endif
+ pRenameState->Push(nullptr, i, count);
+ }
+ }
+ // In ValueNum we'd assume un-inited heap gets FIRST_SSA_NUM.
+ // The heap is a parameter. Use FIRST_SSA_NUM as first SSA name.
+ unsigned initHeapCount = pRenameState->CountForHeapDef();
+ assert(initHeapCount == SsaConfig::FIRST_SSA_NUM);
+ pRenameState->PushHeap(m_pCompiler->fgFirstBB, initHeapCount);
+
+ // Initialize the heap ssa numbers for unreachable blocks. ValueNum expects
+ // heap ssa numbers to have some intitial value.
+ for (BasicBlock* block = m_pCompiler->fgFirstBB; block; block = block->bbNext)
+ {
+ if (block->bbIDom == nullptr)
+ {
+ block->bbHeapSsaNumIn = initHeapCount;
+ block->bbHeapSsaNumOut = initHeapCount;
+ }
+ }
+
+ struct BlockWork
+ {
+ BasicBlock* m_blk;
+ bool m_processed; // Whether the this block have already been processed: its var renamed, and children
+ // processed.
+ // If so, awaiting only BlockPopStacks.
+ BlockWork(BasicBlock* blk, bool processed = false) : m_blk(blk), m_processed(processed)
+ {
+ }
+ };
+ typedef jitstd::vector<BlockWork> BlockWorkStack;
+ BlockWorkStack* blocksToDo =
+ new (jitstd::utility::allocate<BlockWorkStack>(m_allocator), jitstd::placement_t()) BlockWorkStack(m_allocator);
+
+ blocksToDo->push_back(BlockWork(m_pCompiler->fgFirstBB)); // Probably have to include other roots of dom tree.
+
+ while (blocksToDo->size() != 0)
+ {
+ BlockWork blockWrk = blocksToDo->back();
+ blocksToDo->pop_back();
+ BasicBlock* block = blockWrk.m_blk;
+
+ DBG_SSA_JITDUMP("[SsaBuilder::RenameVariables](BB%02u, processed = %d)\n", block->bbNum, blockWrk.m_processed);
+
+ if (!blockWrk.m_processed)
+ {
+ // Push the block back on the stack with "m_processed" true, to record the fact that when its children have
+ // been (recursively) processed, we still need to call BlockPopStacks on it.
+ blocksToDo->push_back(BlockWork(block, true));
+
+ // Walk the block give counts to DEFs and give top of stack count for USEs.
+ BlockRenameVariables(block, pRenameState);
+
+ // Assign arguments to the phi node of successors, corresponding to the block's index.
+ AssignPhiNodeRhsVariables(block, pRenameState);
+
+ // Recurse with the block's DOM children.
+ BlkSet* pBlkSet;
+ if (domTree->Lookup(block, &pBlkSet))
+ {
+ for (BlkSet::KeyIterator child = pBlkSet->Begin(); !child.Equal(pBlkSet->End()); ++child)
+ {
+ DBG_SSA_JITDUMP("[SsaBuilder::RenameVariables](pushing dom child BB%02u)\n", child.Get()->bbNum);
+ blocksToDo->push_back(BlockWork(child.Get()));
+ }
+ }
+ }
+ else
+ {
+ // Done, pop all the stack count, if there is one for this block.
+ BlockPopStacks(block, pRenameState);
+ DBG_SSA_JITDUMP("[SsaBuilder::RenameVariables] done with BB%02u\n", block->bbNum);
+ }
+ }
+
+ // Remember the number of Heap SSA names.
+ m_pCompiler->lvHeapNumSsaNames = pRenameState->HeapCount();
+}
+
+#ifdef DEBUG
+/**
+ * Print the blocks, the phi nodes get printed as well.
+ * @example:
+ * After SSA BB02:
+ * [0027CC0C] ----------- stmtExpr void (IL 0x019...0x01B)
+ * N001 ( 1, 1) [0027CB70] ----------- const int 23
+ * N003 ( 3, 3) [0027CBD8] -A------R-- = int
+ * N002 ( 1, 1) [0027CBA4] D------N--- lclVar int V01 arg1 d:5
+ *
+ * After SSA BB04:
+ * [0027D530] ----------- stmtExpr void (IL ???... ???)
+ * N002 ( 0, 0) [0027D4C8] ----------- phi int
+ * [0027D8CC] ----------- lclVar int V01 arg1 u:5
+ * [0027D844] ----------- lclVar int V01 arg1 u:4
+ * N004 ( 2, 2) [0027D4FC] -A------R-- = int
+ * N003 ( 1, 1) [0027D460] D------N--- lclVar int V01 arg1 d:3
+ */
+void SsaBuilder::Print(BasicBlock** postOrder, int count)
+{
+ for (int i = count - 1; i >= 0; --i)
+ {
+ printf("After SSA BB%02u:\n", postOrder[i]->bbNum);
+ m_pCompiler->gtDispTreeList(postOrder[i]->bbTreeList);
+ }
+}
+#endif // DEBUG
+
+/**
+ * Build SSA form.
+ *
+ * Sorts the graph topologically.
+ * - Collects them in postOrder array.
+ *
+ * Identifies each block's immediate dominator.
+ * - Computes this in bbIDom of each BasicBlock.
+ *
+ * Computes DOM tree relation.
+ * - Computes domTree as block -> set of blocks.
+ * - Computes pre/post order traversal of the DOM tree.
+ *
+ * Inserts phi nodes.
+ * - Computes dominance frontier as block -> set of blocks.
+ * - Allocates block use/def/livein/liveout and computes it.
+ * - Inserts phi nodes with only rhs at the beginning of the blocks.
+ *
+ * Renames variables.
+ * - Walks blocks in evaluation order and gives uses and defs names.
+ * - Gives empty phi nodes their rhs arguments as they become known while renaming.
+ *
+ * @return true if successful, for now, this must always be true.
+ *
+ * @see "A simple, fast dominance algorithm" by Keith D. Cooper, Timothy J. Harvey, Ken Kennedy.
+ * @see Briggs, Cooper, Harvey and Simpson "Practical Improvements to the Construction
+ * and Destruction of Static Single Assignment Form."
+ */
+void SsaBuilder::Build()
+{
+#ifdef DEBUG
+ if (m_pCompiler->verbose)
+ {
+ printf("*************** In SsaBuilder::Build()\n");
+ }
+#endif
+
+ // Ensure that there's a first block outside a try, so that the dominator tree has a unique root.
+ SetupBBRoot();
+
+ // Just to keep block no. & index same add 1.
+ int blockCount = m_pCompiler->fgBBNumMax + 1;
+
+ JITDUMP("[SsaBuilder] Max block count is %d.\n", blockCount);
+
+ // Allocate the postOrder array for the graph.
+ BasicBlock** postOrder = (BasicBlock**)alloca(blockCount * sizeof(BasicBlock*));
+
+ // Topologically sort the graph.
+ int count = TopologicalSort(postOrder, blockCount);
+ JITDUMP("[SsaBuilder] Topologically sorted the graph.\n");
+ EndPhase(PHASE_BUILD_SSA_TOPOSORT);
+
+ // Compute IDom(b).
+ ComputeImmediateDom(postOrder, count);
+
+ // Compute the dominator tree.
+ BlkToBlkSetMap* domTree = new (m_pCompiler->getAllocator()) BlkToBlkSetMap(m_pCompiler->getAllocator());
+ ComputeDominators(postOrder, count, domTree);
+ EndPhase(PHASE_BUILD_SSA_DOMS);
+
+ // Insert phi functions.
+ InsertPhiFunctions(postOrder, count);
+
+ // Rename local variables and collect UD information for each ssa var.
+ SsaRenameState* pRenameState = new (jitstd::utility::allocate<SsaRenameState>(m_allocator), jitstd::placement_t())
+ SsaRenameState(m_allocator, m_pCompiler->lvaCount);
+ RenameVariables(domTree, pRenameState);
+ EndPhase(PHASE_BUILD_SSA_RENAME);
+
+#ifdef DEBUG
+ // At this point we are in SSA form. Print the SSA form.
+ if (m_pCompiler->verboseSsa)
+ {
+ Print(postOrder, count);
+ }
+#endif
+}
+
+void SsaBuilder::SetupBBRoot()
+{
+ // Allocate a bbroot, if necessary.
+ // We need a unique block to be the root of the dominator tree.
+ // This can be violated if the first block is in a try, or if it is the first block of
+ // a loop (which would necessarily be an infinite loop) -- i.e., it has a predecessor.
+
+ // If neither condition holds, no reason to make a new block.
+ if (!m_pCompiler->fgFirstBB->hasTryIndex() && m_pCompiler->fgFirstBB->bbPreds == nullptr)
+ {
+ return;
+ }
+
+ BasicBlock* bbRoot = m_pCompiler->bbNewBasicBlock(BBJ_NONE);
+ bbRoot->bbFlags |= BBF_INTERNAL;
+
+ // May need to fix up preds list, so remember the old first block.
+ BasicBlock* oldFirst = m_pCompiler->fgFirstBB;
+
+ // Copy the liveness information from the first basic block.
+ if (m_pCompiler->fgLocalVarLivenessDone)
+ {
+ VarSetOps::Assign(m_pCompiler, bbRoot->bbLiveIn, oldFirst->bbLiveIn);
+ VarSetOps::Assign(m_pCompiler, bbRoot->bbLiveOut, oldFirst->bbLiveIn);
+ }
+
+ // Copy the bbWeight. (This is technically wrong, if the first block is a loop head, but
+ // it shouldn't matter...)
+ bbRoot->inheritWeight(oldFirst);
+
+ // There's an artifical incoming reference count for the first BB. We're about to make it no longer
+ // the first BB, so decrement that.
+ assert(oldFirst->bbRefs > 0);
+ oldFirst->bbRefs--;
+
+ m_pCompiler->fgInsertBBbefore(m_pCompiler->fgFirstBB, bbRoot);
+
+ assert(m_pCompiler->fgFirstBB == bbRoot);
+ if (m_pCompiler->fgComputePredsDone)
+ {
+ m_pCompiler->fgAddRefPred(oldFirst, bbRoot);
+ }
+}
+
+#ifdef DEBUG
+// This method asserts that SSA name constraints specified are satisfied.
+void Compiler::JitTestCheckSSA()
+{
+ struct SSAName
+ {
+ unsigned m_lvNum;
+ unsigned m_ssaNum;
+
+ static unsigned GetHashCode(SSAName ssaNm)
+ {
+ return ssaNm.m_lvNum << 16 | ssaNm.m_ssaNum;
+ }
+
+ static bool Equals(SSAName ssaNm1, SSAName ssaNm2)
+ {
+ return ssaNm1.m_lvNum == ssaNm2.m_lvNum && ssaNm1.m_ssaNum == ssaNm2.m_ssaNum;
+ }
+ };
+
+ typedef SimplerHashTable<ssize_t, SmallPrimitiveKeyFuncs<ssize_t>, SSAName, JitSimplerHashBehavior>
+ LabelToSSANameMap;
+ typedef SimplerHashTable<SSAName, SSAName, ssize_t, JitSimplerHashBehavior> SSANameToLabelMap;
+
+ // If we have no test data, early out.
+ if (m_nodeTestData == nullptr)
+ {
+ return;
+ }
+
+ NodeToTestDataMap* testData = GetNodeTestData();
+
+ // First we have to know which nodes in the tree are reachable.
+ NodeToIntMap* reachable = FindReachableNodesInNodeTestData();
+
+ LabelToSSANameMap* labelToSSA = new (getAllocatorDebugOnly()) LabelToSSANameMap(getAllocatorDebugOnly());
+ SSANameToLabelMap* ssaToLabel = new (getAllocatorDebugOnly()) SSANameToLabelMap(getAllocatorDebugOnly());
+
+ if (verbose)
+ {
+ printf("\nJit Testing: SSA names.\n");
+ }
+ for (NodeToTestDataMap::KeyIterator ki = testData->Begin(); !ki.Equal(testData->End()); ++ki)
+ {
+ TestLabelAndNum tlAndN;
+ GenTreePtr node = ki.Get();
+ bool b = testData->Lookup(node, &tlAndN);
+ assert(b);
+ if (tlAndN.m_tl == TL_SsaName)
+ {
+ if (node->OperGet() != GT_LCL_VAR)
+ {
+ printf("SSAName constraint put on non-lcl-var expression ");
+ printTreeID(node);
+ printf(" (of type %s).\n", varTypeName(node->TypeGet()));
+ unreached();
+ }
+ GenTreeLclVarCommon* lcl = node->AsLclVarCommon();
+
+ int dummy;
+ if (!reachable->Lookup(lcl, &dummy))
+ {
+ printf("Node ");
+ printTreeID(lcl);
+ printf(" had a test constraint declared, but has become unreachable at the time the constraint is "
+ "tested.\n"
+ "(This is probably as a result of some optimization -- \n"
+ "you may need to modify the test case to defeat this opt.)\n");
+ unreached();
+ }
+
+ if (verbose)
+ {
+ printf(" Node: ");
+ printTreeID(lcl);
+ printf(", SSA name = <%d, %d> -- SSA name class %d.\n", lcl->gtLclNum, lcl->gtSsaNum, tlAndN.m_num);
+ }
+ SSAName ssaNm;
+ if (labelToSSA->Lookup(tlAndN.m_num, &ssaNm))
+ {
+ if (verbose)
+ {
+ printf(" Already in hash tables.\n");
+ }
+ // The mapping(s) must be one-to-one: if the label has a mapping, then the ssaNm must, as well.
+ ssize_t num2;
+ bool b = ssaToLabel->Lookup(ssaNm, &num2);
+ // And the mappings must be the same.
+ if (tlAndN.m_num != num2)
+ {
+ printf("Node: ");
+ printTreeID(lcl);
+ printf(", SSA name = <%d, %d> was declared in SSA name class %d,\n", lcl->gtLclNum, lcl->gtSsaNum,
+ tlAndN.m_num);
+ printf(
+ "but this SSA name <%d,%d> has already been associated with a different SSA name class: %d.\n",
+ ssaNm.m_lvNum, ssaNm.m_ssaNum, num2);
+ unreached();
+ }
+ // And the current node must be of the specified SSA family.
+ if (!(lcl->gtLclNum == ssaNm.m_lvNum && lcl->gtSsaNum == ssaNm.m_ssaNum))
+ {
+ printf("Node: ");
+ printTreeID(lcl);
+ printf(", SSA name = <%d, %d> was declared in SSA name class %d,\n", lcl->gtLclNum, lcl->gtSsaNum,
+ tlAndN.m_num);
+ printf("but that name class was previously bound to a different SSA name: <%d,%d>.\n",
+ ssaNm.m_lvNum, ssaNm.m_ssaNum);
+ unreached();
+ }
+ }
+ else
+ {
+ ssaNm.m_lvNum = lcl->gtLclNum;
+ ssaNm.m_ssaNum = lcl->gtSsaNum;
+ ssize_t num;
+ // The mapping(s) must be one-to-one: if the label has no mapping, then the ssaNm may not, either.
+ if (ssaToLabel->Lookup(ssaNm, &num))
+ {
+ printf("Node: ");
+ printTreeID(lcl);
+ printf(", SSA name = <%d, %d> was declared in SSA name class %d,\n", lcl->gtLclNum, lcl->gtSsaNum,
+ tlAndN.m_num);
+ printf("but this SSA name has already been associated with a different name class: %d.\n", num);
+ unreached();
+ }
+ // Add to both mappings.
+ labelToSSA->Set(tlAndN.m_num, ssaNm);
+ ssaToLabel->Set(ssaNm, tlAndN.m_num);
+ if (verbose)
+ {
+ printf(" added to hash tables.\n");
+ }
+ }
+ }
+ }
+}
+#endif // DEBUG
diff --git a/src/jit/ssabuilder.h b/src/jit/ssabuilder.h
new file mode 100644
index 0000000000..2fff06573e
--- /dev/null
+++ b/src/jit/ssabuilder.h
@@ -0,0 +1,212 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+// ==++==
+//
+
+//
+
+//
+// ==--==
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX SSA XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#pragma once
+#pragma warning(disable : 4503) // 'identifier' : decorated name length exceeded, name was truncated
+
+#undef SSA_FEATURE_USEDEF
+#undef SSA_FEATURE_DOMARR
+
+#include "compiler.h"
+
+struct SsaRenameState;
+
+typedef int LclVarNum;
+
+// Pair of a local var name eg: V01 and Ssa number; eg: V01_01
+typedef jitstd::pair<LclVarNum, int> SsaVarName;
+
+class SsaBuilder
+{
+private:
+ struct SsaVarNameHasher
+ {
+ /**
+ * Hash functor used in maps to hash a given key.
+ *
+ * @params key SsaVarName which is a pair of lclNum and ssaNum which defines a variable.
+ * @return Hash value corresponding to a key.
+ */
+ size_t operator()(const SsaVarName& key) const
+ {
+ return jitstd::hash<__int64>()((((__int64)key.first) << sizeof(int)) | key.second);
+ }
+ };
+
+ // Used to maintain a map of a given SSA numbering to its use or def.
+ typedef jitstd::unordered_map<SsaVarName, jitstd::vector<GenTree*>, SsaVarNameHasher> VarToUses;
+ typedef jitstd::unordered_map<SsaVarName, GenTree*, SsaVarNameHasher> VarToDef;
+
+ inline void EndPhase(Phases phase)
+ {
+ m_pCompiler->EndPhase(phase);
+ }
+
+public:
+ // Constructor
+ SsaBuilder(Compiler* pCompiler, IAllocator* pIAllocator);
+
+ // Requires stmt nodes to be already sequenced in evaluation order. Analyzes the graph
+ // for introduction of phi-nodes as GT_PHI tree nodes at the beginning of each block.
+ // Each GT_LCL_VAR is given its ssa number through its gtSsaNum field in the node.
+ // Each GT_PHI node will have gtOp1 set to lhs of the phi node and the gtOp2 to be a
+ // GT_LIST of GT_PHI_ARG. Each use or def is denoted by the corresponding GT_LCL_VAR
+ // tree. For example, to get all uses of a particular variable fully defined by its
+ // lclNum and ssaNum, one would use m_uses and look up all the uses. Similarly, a single
+ // def of an SSA variable can be looked up similarly using m_defs member.
+ void Build();
+
+ // Requires "bbIDom" of each block to be computed. Requires "domTree" to be allocated
+ // and can be updated, i.e., by adding mapping from a block to it's dominated children.
+ // Using IDom of each basic block, compute the whole domTree. If a block "b" has IDom "i",
+ // then, block "b" is dominated by "i". The mapping then is i -> { ..., b, ... }, in
+ // other words, "domTree" is a tree represented by nodes mapped to their children.
+ static void ComputeDominators(Compiler* pCompiler, BlkToBlkSetMap* domTree);
+
+private:
+ // Ensures that the basic block graph has a root for the dominator graph, by ensuring
+ // that there is a first block that is not in a try region (adding an empty block for that purpose
+ // if necessary). Eventually should move to Compiler.
+ void SetupBBRoot();
+
+ // Requires "postOrder" to be an array of size "count". Requires "count" to at least
+ // be the size of the flow graph. Sorts the current compiler's flow-graph and places
+ // the blocks in post order (i.e., a node's children first) in the array. Returns the
+ // number of nodes visited while sorting the graph. In other words, valid entries in
+ // the output array.
+ int TopologicalSort(BasicBlock** postOrder, int count);
+
+ // Requires "postOrder" to hold the blocks of the flowgraph in topologically sorted
+ // order. Requires count to be the valid entries in the "postOrder" array. Computes
+ // each block's immediate dominator and records it in the BasicBlock in bbIDom.
+ void ComputeImmediateDom(BasicBlock** postOrder, int count);
+
+#ifdef SSA_FEATURE_DOMARR
+ // Requires "curBlock" to be the first basic block at the first step of the recursion.
+ // Requires "domTree" to be a adjacency list (actually, a set of blocks with a set of blocks
+ // as children.) Requires "preIndex" and "postIndex" to be initialized to 0 at entry into recursion.
+ // Computes arrays "m_pDomPreOrder" and "m_pDomPostOrder" of block indices such that the blocks of a
+ // "domTree" are in pre and postorder respectively.
+ void DomTreeWalk(BasicBlock* curBlock, const BlkToBlkSetMap& domTree, int* preIndex, int* postIndex);
+#endif
+
+ // Requires all blocks to have computed "bbIDom." Requires "domTree" to be a preallocated BlkToBlkSetMap.
+ // Helper to compute "domTree" from the pre-computed bbIDom of the basic blocks.
+ static void ConstructDomTreeForBlock(Compiler* pCompiler, BasicBlock* block, BlkToBlkSetMap* domTree);
+
+ // Requires "postOrder" to hold the blocks of the flowgraph in topologically sorted order. Requires
+ // count to be the valid entries in the "postOrder" array. Computes "domTree" as a adjacency list
+ // like object, i.e., a set of blocks with a set of blocks as children defining the DOM relation.
+ void ComputeDominators(BasicBlock** postOrder, int count, BlkToBlkSetMap* domTree);
+
+#ifdef DEBUG
+ // Display the dominator tree.
+ static void DisplayDominators(BlkToBlkSetMap* domTree);
+#endif // DEBUG
+
+ // Requires "postOrder" to hold the blocks of the flowgraph in topologically sorted order. Requires
+ // count to be the valid entries in the "postOrder" array. Returns a mapping from blocks to their
+ // iterated dominance frontiers. (Recall that the dominance frontier of a block B is the set of blocks
+ // B3 such that there exists some B2 s.t. B3 is a successor of B2, and B dominates B2. Note that this dominance
+ // need not be strict -- B2 and B may be the same node. The iterated dominance frontier is formed by a closure
+ // operation: the IDF of B is the smallest set that includes B's dominance frontier, and also includes the dominance
+ // frontier of all elements of the set.)
+ BlkToBlkSetMap* ComputeIteratedDominanceFrontier(BasicBlock** postOrder, int count);
+
+ // Requires "postOrder" to hold the blocks of the flowgraph in topologically sorted order. Requires
+ // count to be the valid entries in the "postOrder" array. Inserts GT_PHI nodes at the beginning
+ // of basic blocks that require them like so:
+ // GT_ASG(GT_LCL_VAR, GT_PHI(GT_PHI_ARG(GT_LCL_VAR, Block*), GT_LIST(GT_PHI_ARG(GT_LCL_VAR, Block*), NULL));
+ void InsertPhiFunctions(BasicBlock** postOrder, int count);
+
+ // Requires "domTree" to be the dominator tree relation defined by a DOM b.
+ // Requires "pRenameState" to have counts and stacks at their initial state.
+ // Assigns gtSsaNames to all variables.
+ void RenameVariables(BlkToBlkSetMap* domTree, SsaRenameState* pRenameState);
+
+ // Requires "block" to be any basic block participating in variable renaming, and has at least a
+ // definition that pushed a ssa number into the rename stack for a variable. Requires "pRenameState"
+ // to have variable stacks that have counts pushed into them for the block while assigning def
+ // numbers. Pops the stack for any local variable that has an entry for block on top.
+ void BlockPopStacks(BasicBlock* block, SsaRenameState* pRenameState);
+
+ // Requires "block" to be non-NULL; and is searched for defs and uses to assign ssa numbers.
+ // Requires "pRenameState" to be non-NULL and be currently used for variables renaming.
+ void BlockRenameVariables(BasicBlock* block, SsaRenameState* pRenameState);
+
+ // Requires "tree" (assumed to be a statement in "block") to be searched for defs and uses to assign ssa numbers.
+ // Requires "pRenameState" to be non-NULL and be currently used for variables renaming. Assumes that "isPhiDefn"
+ // implies that any definition occurring within "tree" is a phi definition.
+ void TreeRenameVariables(GenTree* tree, BasicBlock* block, SsaRenameState* pRenameState, bool isPhiDefn);
+
+ // Assumes that "block" contains a definition for local var "lclNum", with SSA number "count".
+ // IF "block" is within one or more try blocks,
+ // and the local variable is live at the start of the corresponding handlers,
+ // add this SSA number "count" to the argument list of the phi for the variable in the start
+ // block of those handlers.
+ void AddDefToHandlerPhis(BasicBlock* block, unsigned lclNum, unsigned count);
+
+ // Same as above, for "Heap".
+ void AddHeapDefToHandlerPhis(BasicBlock* block, unsigned count);
+
+ // Requires "block" to be non-NULL. Requires "pRenameState" to be non-NULL and be currently used
+ // for variables renaming. Assigns the rhs arguments to the phi, i.e., block's phi node arguments.
+ void AssignPhiNodeRhsVariables(BasicBlock* block, SsaRenameState* pRenameState);
+
+ // Requires "tree" to be a local variable node. Maintains a map of <lclNum, ssaNum> -> tree
+ // information in m_defs.
+ void AddDefPoint(GenTree* tree, BasicBlock* blk);
+#ifdef SSA_FEATURE_USEDEF
+ // Requires "tree" to be a local variable node. Maintains a map of <lclNum, ssaNum> -> tree
+ // information in m_uses.
+ void AddUsePoint(GenTree* tree);
+#endif
+
+ // Returns true, and sets "*ppIndirAssign", if "tree" has been recorded as an indirect assignment.
+ // (If the tree is an assignment, it's a definition only if it's labeled as an indirect definition, where
+ // we took the address of the local elsewhere in the extended tree.)
+ bool IsIndirectAssign(GenTreePtr tree, Compiler::IndirectAssignmentAnnotation** ppIndirAssign);
+
+#ifdef DEBUG
+ void Print(BasicBlock** postOrder, int count);
+#endif
+
+private:
+#ifdef SSA_FEATURE_USEDEF
+ // Use Def information after SSA. To query the uses and def of a given ssa var,
+ // probe these data structures.
+ // Do not move these outside of this class, use accessors/interface methods.
+ VarToUses m_uses;
+ VarToDef m_defs;
+#endif
+
+#ifdef SSA_FEATURE_DOMARR
+ // To answer queries of type a DOM b.
+ // Do not move these outside of this class, use accessors/interface methods.
+ int* m_pDomPreOrder;
+ int* m_pDomPostOrder;
+#endif
+
+ Compiler* m_pCompiler;
+
+ // Used to allocate space for jitstd data structures.
+ jitstd::allocator<void> m_allocator;
+};
diff --git a/src/jit/ssaconfig.h b/src/jit/ssaconfig.h
new file mode 100644
index 0000000000..6e81ad9fd6
--- /dev/null
+++ b/src/jit/ssaconfig.h
@@ -0,0 +1,49 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+// ==++==
+//
+
+//
+
+//
+// ==--==
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX SSA XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#pragma once
+
+#ifdef DEBUG
+#define DBG_SSA_JITDUMP(...) \
+ if (JitTls::GetCompiler()->verboseSsa) \
+ JitDump(__VA_ARGS__)
+#else
+#define DBG_SSA_JITDUMP(...)
+#endif
+
+// DBG_SSA_JITDUMP prints only if DEBUG, DEBUG_SSA, and tlsCompiler->verbose are all set.
+
+namespace SsaConfig
+{
+// FIRST ssa num is given to the first definition of a variable which can either be:
+// 1. A regular definition in the program.
+// 2. Or initialization by compInitMem.
+static const int FIRST_SSA_NUM = 2;
+
+// UNINIT ssa num is given to variables whose definitions were never encountered:
+// 1. Neither by SsaBuilder
+// 2. Nor were they initialized using compInitMem.
+static const int UNINIT_SSA_NUM = 1;
+
+// Sentinel value to indicate variable not touched by SSA.
+static const int RESERVED_SSA_NUM = 0;
+
+} // end of namespace SsaConfig
diff --git a/src/jit/ssarenamestate.cpp b/src/jit/ssarenamestate.cpp
new file mode 100644
index 0000000000..a1e05f192f
--- /dev/null
+++ b/src/jit/ssarenamestate.cpp
@@ -0,0 +1,244 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+// ==++==
+//
+
+//
+
+//
+// ==--==
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX SSA XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#include "jitpch.h"
+#include "ssaconfig.h"
+#include "ssarenamestate.h"
+
+/**
+ * Constructor - initialize the stacks and counters maps (lclVar -> stack/counter) map.
+ *
+ * @params alloc The allocator class used to allocate jitstd data.
+ */
+SsaRenameState::SsaRenameState(const jitstd::allocator<int>& alloc, unsigned lvaCount)
+ : counts(nullptr)
+ , stacks(nullptr)
+ , definedLocs(alloc)
+ , heapStack(alloc)
+ , heapCount(0)
+ , lvaCount(lvaCount)
+ , m_alloc(alloc)
+{
+}
+
+/**
+ * Allocates memory to hold SSA variable def counts,
+ * if not allocated already.
+ *
+ */
+void SsaRenameState::EnsureCounts()
+{
+ if (counts == nullptr)
+ {
+ counts = jitstd::utility::allocate<unsigned>(m_alloc, lvaCount);
+ for (unsigned i = 0; i < lvaCount; ++i)
+ {
+ counts[i] = SsaConfig::FIRST_SSA_NUM;
+ }
+ }
+}
+
+/**
+ * Allocates memory for holding pointers to lcl's stacks,
+ * if not allocated already.
+ *
+ */
+void SsaRenameState::EnsureStacks()
+{
+ if (stacks == nullptr)
+ {
+ stacks = jitstd::utility::allocate<Stack*>(m_alloc, lvaCount);
+ for (unsigned i = 0; i < lvaCount; ++i)
+ {
+ stacks[i] = nullptr;
+ }
+ }
+}
+
+/**
+ * Returns a SSA count number for a local variable and does a post increment.
+ *
+ * If there is no counter for the local yet, initializes it with the default value
+ * else, returns the count with a post increment, so the next def gets a new count.
+ *
+ * @params lclNum The local variable def for which a count has to be returned.
+ * @return the variable name for the current definition.
+ *
+ */
+unsigned SsaRenameState::CountForDef(unsigned lclNum)
+{
+ EnsureCounts();
+ unsigned count = counts[lclNum];
+ counts[lclNum]++;
+ DBG_SSA_JITDUMP("Incrementing counter = %d by 1 for V%02u.\n", count, lclNum);
+ return count;
+}
+
+/**
+ * Returns a SSA count number for a local variable from top of the stack.
+ *
+ * @params lclNum The local variable def for which a count has to be returned.
+ * @return the current variable name for the "use".
+ *
+ * @remarks If the stack is empty, then we have an use before a def. To handle this
+ * special case, we need to initialize the count with 'default+1', so the
+ * next definition will always use 'default+1' but return 'default' for
+ * all uses until a definition.
+ *
+ */
+unsigned SsaRenameState::CountForUse(unsigned lclNum)
+{
+ EnsureStacks();
+ DBG_SSA_JITDUMP("[SsaRenameState::CountForUse] V%02u\n", lclNum);
+
+ Stack* stack = stacks[lclNum];
+ if (stack == nullptr || stack->empty())
+ {
+ return SsaConfig::UNINIT_SSA_NUM;
+ }
+ return stack->back().m_count;
+}
+
+/**
+ * Pushes a count value on the variable stack.
+ *
+ * @params lclNum The local variable def whose stack the count needs to be pushed onto.
+ * @params count The current count value that needs to be pushed on to the stack.
+ *
+ * @remarks Usually called when renaming a "def."
+ * Create stack lazily when needed for the first time.
+ */
+void SsaRenameState::Push(BasicBlock* bb, unsigned lclNum, unsigned count)
+{
+ EnsureStacks();
+
+ // We'll use BB00 here to indicate the "block before any real blocks..."
+ DBG_SSA_JITDUMP("[SsaRenameState::Push] BB%02u, V%02u, count = %d\n", bb != nullptr ? bb->bbNum : 0, lclNum, count);
+
+ Stack* stack = stacks[lclNum];
+
+ if (stack == nullptr)
+ {
+ DBG_SSA_JITDUMP("\tCreating a new stack\n");
+ stack = stacks[lclNum] = new (jitstd::utility::allocate<Stack>(m_alloc), jitstd::placement_t()) Stack(m_alloc);
+ }
+
+ if (stack->empty() || stack->back().m_bb != bb)
+ {
+ stack->push_back(SsaRenameStateForBlock(bb, count));
+ // Remember that we've pushed a def for this loc (so we don't have
+ // to traverse *all* the locs to do the necessary pops later).
+ definedLocs.push_back(SsaRenameStateLocDef(bb, lclNum));
+ }
+ else
+ {
+ stack->back().m_count = count;
+ }
+
+#ifdef DEBUG
+ if (JitTls::GetCompiler()->verboseSsa)
+ {
+ printf("\tContents of the stack: [");
+ for (Stack::iterator iter2 = stack->begin(); iter2 != stack->end(); iter2++)
+ {
+ printf("<BB%02u, %d>", ((*iter2).m_bb != nullptr ? (*iter2).m_bb->bbNum : 0), (*iter2).m_count);
+ }
+ printf("]\n");
+
+ DumpStacks();
+ }
+#endif
+}
+
+void SsaRenameState::PopBlockStacks(BasicBlock* block)
+{
+ DBG_SSA_JITDUMP("[SsaRenameState::PopBlockStacks] BB%02u\n", block->bbNum);
+ // Iterate over the stacks for all the variables, popping those that have an entry
+ // for "block" on top.
+ while (!definedLocs.empty() && definedLocs.back().m_bb == block)
+ {
+ unsigned lclNum = definedLocs.back().m_lclNum;
+ assert(stacks != nullptr); // Cannot be empty because definedLocs is not empty.
+ Stack* stack = stacks[lclNum];
+ assert(stack != nullptr);
+ assert(stack->back().m_bb == block);
+ stack->pop_back();
+ definedLocs.pop_back();
+ }
+#ifdef DEBUG
+ // It should now be the case that no stack in stacks has an entry for "block" on top --
+ // the loop above popped them all.
+ for (unsigned i = 0; i < lvaCount; ++i)
+ {
+ if (stacks != nullptr && stacks[i] != nullptr && !stacks[i]->empty())
+ {
+ assert(stacks[i]->back().m_bb != block);
+ }
+ }
+ if (JitTls::GetCompiler()->verboseSsa)
+ {
+ DumpStacks();
+ }
+#endif // DEBUG
+}
+
+void SsaRenameState::PopBlockHeapStack(BasicBlock* block)
+{
+ while (heapStack.size() > 0 && heapStack.back().m_bb == block)
+ {
+ heapStack.pop_back();
+ }
+}
+
+#ifdef DEBUG
+/**
+ * Print the stack data for each variable in a loop.
+ */
+void SsaRenameState::DumpStacks()
+{
+ printf("Dumping stacks:\n-------------------------------\n");
+ if (lvaCount == 0)
+ {
+ printf("None\n");
+ }
+ else
+ {
+ EnsureStacks();
+ for (unsigned i = 0; i < lvaCount; ++i)
+ {
+ Stack* stack = stacks[i];
+ printf("V%02u:\t", i);
+ if (stack != nullptr)
+ {
+ for (Stack::iterator iter2 = stack->begin(); iter2 != stack->end(); ++iter2)
+ {
+ if (iter2 != stack->begin())
+ {
+ printf(", ");
+ }
+ printf("<BB%02u, %2d>", ((*iter2).m_bb != nullptr ? (*iter2).m_bb->bbNum : 0), (*iter2).m_count);
+ }
+ }
+ printf("\n");
+ }
+ }
+}
+#endif // DEBUG
diff --git a/src/jit/ssarenamestate.h b/src/jit/ssarenamestate.h
new file mode 100644
index 0000000000..1db36c5b37
--- /dev/null
+++ b/src/jit/ssarenamestate.h
@@ -0,0 +1,129 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+// ==++==
+//
+
+//
+
+//
+// ==--==
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX SSA XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#pragma once
+
+#include "jitstd.h"
+
+struct SsaRenameStateForBlock
+{
+ BasicBlock* m_bb;
+ unsigned m_count;
+
+ SsaRenameStateForBlock(BasicBlock* bb, unsigned count) : m_bb(bb), m_count(count)
+ {
+ }
+ SsaRenameStateForBlock() : m_bb(nullptr), m_count(0)
+ {
+ }
+};
+
+// A record indicating that local "m_loc" was defined in block "m_bb".
+struct SsaRenameStateLocDef
+{
+ BasicBlock* m_bb;
+ unsigned m_lclNum;
+
+ SsaRenameStateLocDef(BasicBlock* bb, unsigned lclNum) : m_bb(bb), m_lclNum(lclNum)
+ {
+ }
+};
+
+struct SsaRenameState
+{
+ typedef jitstd::list<SsaRenameStateForBlock> Stack;
+ typedef Stack** Stacks;
+ typedef unsigned* Counts;
+ typedef jitstd::list<SsaRenameStateLocDef> DefStack;
+
+ SsaRenameState(const jitstd::allocator<int>& allocator, unsigned lvaCount);
+
+ void EnsureCounts();
+ void EnsureStacks();
+
+ // Requires "lclNum" to be a variable number for which a new count corresponding to a
+ // definition is desired. The method post increments the counter for the "lclNum."
+ unsigned CountForDef(unsigned lclNum);
+
+ // Requires "lclNum" to be a variable number for which an ssa number at the top of the
+ // stack is required i.e., for variable "uses."
+ unsigned CountForUse(unsigned lclNum);
+
+ // Requires "lclNum" to be a variable number, and requires "count" to represent
+ // an ssa number, that needs to be pushed on to the stack corresponding to the lclNum.
+ void Push(BasicBlock* bb, unsigned lclNum, unsigned count);
+
+ // Pop all stacks that have an entry for "bb" on top.
+ void PopBlockStacks(BasicBlock* bb);
+
+ // Similar functions for the special implicit "Heap" variable.
+ unsigned CountForHeapDef()
+ {
+ if (heapCount == 0)
+ {
+ heapCount = SsaConfig::FIRST_SSA_NUM;
+ }
+ unsigned res = heapCount;
+ heapCount++;
+ return res;
+ }
+ unsigned CountForHeapUse()
+ {
+ return heapStack.back().m_count;
+ }
+
+ void PushHeap(BasicBlock* bb, unsigned count)
+ {
+ heapStack.push_back(SsaRenameStateForBlock(bb, count));
+ }
+
+ void PopBlockHeapStack(BasicBlock* bb);
+
+ unsigned HeapCount()
+ {
+ return heapCount;
+ }
+
+#ifdef DEBUG
+ // Debug interface
+ void DumpStacks();
+#endif
+
+private:
+ // Map of lclNum -> count.
+ Counts counts;
+
+ // Map of lclNum -> SsaRenameStateForBlock.
+ Stacks stacks;
+
+ // This list represents the set of locals defined in the current block.
+ DefStack definedLocs;
+
+ // Same state for the special implicit Heap variable.
+ Stack heapStack;
+ unsigned heapCount;
+
+ // Number of stacks/counts to allocate.
+ unsigned lvaCount;
+
+ // Allocator to allocate stacks.
+ jitstd::allocator<void> m_alloc;
+};
diff --git a/src/jit/stackfp.cpp b/src/jit/stackfp.cpp
new file mode 100644
index 0000000000..f975822740
--- /dev/null
+++ b/src/jit/stackfp.cpp
@@ -0,0 +1,4494 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+#ifdef LEGACY_BACKEND // This file is NOT used for the RyuJIT backend that uses the linear scan register allocator.
+
+#ifdef _TARGET_AMD64_
+#error AMD64 must be !LEGACY_BACKEND
+#endif
+
+#include "compiler.h"
+#include "emit.h"
+#include "codegen.h"
+
+// Instruction list
+// N=normal, R=reverse, P=pop
+#if FEATURE_STACK_FP_X87
+const static instruction FPmathNN[] = {INS_fadd, INS_fsub, INS_fmul, INS_fdiv};
+const static instruction FPmathNP[] = {INS_faddp, INS_fsubp, INS_fmulp, INS_fdivp};
+const static instruction FPmathRN[] = {INS_fadd, INS_fsubr, INS_fmul, INS_fdivr};
+const static instruction FPmathRP[] = {INS_faddp, INS_fsubrp, INS_fmulp, INS_fdivrp};
+
+FlatFPStateX87* CodeGenInterface::FlatFPAllocFPState(FlatFPStateX87* pInitFrom)
+{
+ FlatFPStateX87* pNewState;
+
+ pNewState = new (compiler, CMK_FlatFPStateX87) FlatFPStateX87;
+ pNewState->Init(pInitFrom);
+
+ return pNewState;
+}
+
+bool CodeGen::FlatFPSameRegisters(FlatFPStateX87* pState, regMaskTP mask)
+{
+ int i;
+ for (i = REG_FPV0; i < REG_FPCOUNT; i++)
+ {
+ if (pState->Mapped(i))
+ {
+ regMaskTP regmask = genRegMaskFloat((regNumber)i);
+ if ((mask & regmask) == 0)
+ {
+ return false;
+ }
+
+ mask &= ~regmask;
+ }
+ }
+
+ return mask ? false : true;
+}
+
+bool FlatFPStateX87::Mapped(unsigned uEntry)
+{
+ return m_uVirtualMap[uEntry] != (unsigned)FP_VRNOTMAPPED;
+}
+
+void FlatFPStateX87::Unmap(unsigned uEntry)
+{
+ assert(Mapped(uEntry));
+ m_uVirtualMap[uEntry] = (unsigned)FP_VRNOTMAPPED;
+}
+
+bool FlatFPStateX87::AreEqual(FlatFPStateX87* pA, FlatFPStateX87* pB)
+{
+ unsigned i;
+
+ assert(pA->IsConsistent());
+ assert(pB->IsConsistent());
+
+ if (pA->m_uStackSize != pB->m_uStackSize)
+ {
+ return false;
+ }
+
+ for (i = 0; i < pA->m_uStackSize; i++)
+ {
+ if (pA->m_uStack[i] != pB->m_uStack[i])
+ {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+#ifdef DEBUG
+bool FlatFPStateX87::IsValidEntry(unsigned uEntry)
+{
+ return (Mapped(uEntry) && (m_uVirtualMap[uEntry] >= 0 && m_uVirtualMap[uEntry] < m_uStackSize)) || !Mapped(uEntry);
+}
+
+bool FlatFPStateX87::IsConsistent()
+{
+ unsigned i;
+
+ for (i = 0; i < FP_VIRTUALREGISTERS; i++)
+ {
+ if (!IsValidEntry(i))
+ {
+ if (m_bIgnoreConsistencyChecks)
+ {
+ return true;
+ }
+ else
+ {
+ assert(!"Virtual register is marked as mapped but out of the stack range");
+ return false;
+ }
+ }
+ }
+
+ for (i = 0; i < m_uStackSize; i++)
+ {
+ if (m_uVirtualMap[m_uStack[i]] != i)
+ {
+ if (m_bIgnoreConsistencyChecks)
+ {
+ return true;
+ }
+ else
+ {
+ assert(!"Register File and stack layout don't match!");
+ return false;
+ }
+ }
+ }
+
+ return true;
+}
+
+void FlatFPStateX87::Dump()
+{
+ unsigned i;
+
+ assert(IsConsistent());
+
+ if (m_uStackSize > 0)
+ {
+ printf("Virtual stack state: ");
+ for (i = 0; i < m_uStackSize; i++)
+ {
+ printf("ST(%i): FPV%i | ", StackToST(i), m_uStack[i]);
+ }
+ printf("\n");
+ }
+}
+
+void FlatFPStateX87::UpdateMappingFromStack()
+{
+ memset(m_uVirtualMap, -1, sizeof(m_uVirtualMap));
+
+ unsigned i;
+
+ for (i = 0; i < m_uStackSize; i++)
+ {
+ m_uVirtualMap[m_uStack[i]] = i;
+ }
+}
+
+#endif
+
+unsigned FlatFPStateX87::StackToST(unsigned uEntry)
+{
+ assert(IsValidEntry(uEntry));
+ return m_uStackSize - 1 - uEntry;
+}
+
+unsigned FlatFPStateX87::VirtualToST(unsigned uEntry)
+{
+ assert(Mapped(uEntry));
+
+ return StackToST(m_uVirtualMap[uEntry]);
+}
+
+unsigned FlatFPStateX87::STToVirtual(unsigned uST)
+{
+ assert(uST < m_uStackSize);
+
+ return m_uStack[m_uStackSize - 1 - uST];
+}
+
+void FlatFPStateX87::Init(FlatFPStateX87* pFrom)
+{
+ if (pFrom)
+ {
+ memcpy(this, pFrom, sizeof(*this));
+ }
+ else
+ {
+ memset(m_uVirtualMap, -1, sizeof(m_uVirtualMap));
+
+#ifdef DEBUG
+ memset(m_uStack, -1, sizeof(m_uStack));
+#endif
+ m_uStackSize = 0;
+ }
+
+#ifdef DEBUG
+ m_bIgnoreConsistencyChecks = false;
+#endif
+}
+
+void FlatFPStateX87::Associate(unsigned uEntry, unsigned uStack)
+{
+ assert(uStack < m_uStackSize);
+
+ m_uStack[uStack] = uEntry;
+ m_uVirtualMap[uEntry] = uStack;
+}
+
+unsigned FlatFPStateX87::TopIndex()
+{
+ return m_uStackSize - 1;
+}
+
+unsigned FlatFPStateX87::TopVirtual()
+{
+ assert(m_uStackSize > 0);
+ return m_uStack[m_uStackSize - 1];
+}
+
+void FlatFPStateX87::Rename(unsigned uVirtualTo, unsigned uVirtualFrom)
+{
+ assert(!Mapped(uVirtualTo));
+
+ unsigned uSlot = m_uVirtualMap[uVirtualFrom];
+
+ Unmap(uVirtualFrom);
+ Associate(uVirtualTo, uSlot);
+}
+
+void FlatFPStateX87::Push(unsigned uEntry)
+{
+ assert(m_uStackSize <= FP_PHYSICREGISTERS);
+ assert(!Mapped(uEntry));
+
+ m_uStackSize++;
+ Associate(uEntry, TopIndex());
+
+ assert(IsConsistent());
+}
+
+unsigned FlatFPStateX87::Pop()
+{
+ assert(m_uStackSize != 0);
+
+ unsigned uVirtual = m_uStack[--m_uStackSize];
+
+#ifdef DEBUG
+ m_uStack[m_uStackSize] = (unsigned)-1;
+#endif
+
+ Unmap(uVirtual);
+
+ return uVirtual;
+}
+
+bool FlatFPStateX87::IsEmpty()
+{
+ return m_uStackSize == 0;
+}
+
+void CodeGen::genCodeForTransitionStackFP(FlatFPStateX87* pSrc, FlatFPStateX87* pDst)
+{
+ FlatFPStateX87 fpState;
+ FlatFPStateX87* pTmp;
+ int i;
+
+ // Make a temp copy
+ memcpy(&fpState, pSrc, sizeof(FlatFPStateX87));
+ pTmp = &fpState;
+
+ // Make sure everything seems consistent.
+ assert(pSrc->m_uStackSize >= pDst->m_uStackSize);
+#ifdef DEBUG
+ for (i = 0; i < FP_VIRTUALREGISTERS; i++)
+ {
+ if (!pTmp->Mapped(i) && pDst->Mapped(i))
+ {
+ assert(!"Dst stack state can't have a virtual register live if Src target has it dead");
+ }
+ }
+#endif
+
+ // First we need to get rid of the stuff that's dead in pDst
+ for (i = 0; i < FP_VIRTUALREGISTERS; i++)
+ {
+ if (pTmp->Mapped(i) && !pDst->Mapped(i))
+ {
+ // We have to get rid of this one
+ JITDUMP("Removing virtual register V%i from stack\n", i);
+
+ // Don't need this virtual register any more
+ FlatFPX87_Unload(pTmp, i);
+ }
+ }
+
+ assert(pTmp->m_uStackSize == pDst->m_uStackSize);
+
+ // Extract cycles
+ int iProcessed = 0;
+
+ // We start with the top of the stack so that we can
+ // easily recognize the cycle that contains it
+ for (i = pTmp->m_uStackSize - 1; i >= 0; i--)
+ {
+ // Have we processed this stack element yet?
+ if (((1 << i) & iProcessed) == 0)
+ {
+ // Extract cycle
+ int iCycle[FP_VIRTUALREGISTERS];
+ int iCycleLength = 0;
+ int iCurrent = i;
+ int iTOS = pTmp->m_uStackSize - 1;
+
+ do
+ {
+ // Mark current stack element as processed
+ iProcessed |= (1 << iCurrent);
+
+ // Update cycle
+ iCycle[iCycleLength++] = iCurrent;
+
+ // Next element in cycle
+ iCurrent = pDst->m_uVirtualMap[pTmp->m_uStack[iCurrent]];
+
+ } while ((iProcessed & (1 << iCurrent)) == 0);
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Cycle: (");
+ for (int l = 0; l < iCycleLength; l++)
+ {
+ printf("%i", pTmp->StackToST(iCycle[l]));
+ if (l + 1 < iCycleLength)
+ printf(", ");
+ }
+ printf(")\n");
+ }
+#endif
+
+ // Extract cycle
+ if (iCycleLength == 1)
+ {
+ // Stack element in the same place. Nothing to do
+ }
+ else
+ {
+ if (iCycle[0] == iTOS)
+ {
+ // Cycle includes stack element 0
+ int j;
+
+ for (j = 1; j < iCycleLength; j++)
+ {
+ FlatFPX87_SwapStack(pTmp, iCycle[j], iTOS);
+ }
+ }
+ else
+ {
+ // Cycle doesn't include stack element 0
+ int j;
+
+ for (j = 0; j < iCycleLength; j++)
+ {
+ FlatFPX87_SwapStack(pTmp, iCycle[j], iTOS);
+ }
+
+ FlatFPX87_SwapStack(pTmp, iCycle[0], iTOS);
+ }
+ }
+ }
+ }
+
+ assert(FlatFPStateX87::AreEqual(pTmp, pDst));
+}
+
+void CodeGen::genCodeForTransitionFromMask(FlatFPStateX87* pSrc, regMaskTP mask, bool bEmitCode)
+{
+ unsigned i;
+ for (i = REG_FPV0; i < REG_FPCOUNT; i++)
+ {
+ if (pSrc->Mapped(i))
+ {
+ if ((mask & genRegMaskFloat((regNumber)i)) == 0)
+ {
+ FlatFPX87_Unload(pSrc, i, bEmitCode);
+ }
+ }
+ else
+ {
+ assert((mask & genRegMaskFloat((regNumber)i)) == 0 &&
+ "A register marked as incoming live in the target block isnt live in the current block");
+ }
+ }
+}
+
+void CodeGen::genCodeForPrologStackFP()
+{
+ assert(compiler->compGeneratingProlog);
+ assert(compiler->fgFirstBB);
+
+ FlatFPStateX87* pState = compiler->fgFirstBB->bbFPStateX87;
+
+ if (pState && pState->m_uStackSize)
+ {
+ VARSET_TP VARSET_INIT_NOCOPY(liveEnregIn, VarSetOps::Intersection(compiler, compiler->fgFirstBB->bbLiveIn,
+ compiler->optAllFPregVars));
+ unsigned i;
+
+#ifdef DEBUG
+ unsigned uLoads = 0;
+#endif
+
+ assert(pState->m_uStackSize <= FP_VIRTUALREGISTERS);
+ for (i = 0; i < pState->m_uStackSize; i++)
+ {
+ // Get the virtual register that matches
+ unsigned iVirtual = pState->STToVirtual(pState->m_uStackSize - i - 1);
+
+ unsigned varNum;
+ LclVarDsc* varDsc;
+
+ for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++)
+ {
+ if (varDsc->IsFloatRegType() && varDsc->lvRegister && varDsc->lvRegNum == iVirtual)
+ {
+ unsigned varIndex = varDsc->lvVarIndex;
+
+ // Is this variable live on entry?
+ if (VarSetOps::IsMember(compiler, liveEnregIn, varIndex))
+ {
+ if (varDsc->lvIsParam)
+ {
+ getEmitter()->emitIns_S(INS_fld, EmitSize(varDsc->TypeGet()), varNum, 0);
+ }
+ else
+ {
+ // unitialized regvar
+ getEmitter()->emitIns(INS_fldz);
+ }
+
+#ifdef DEBUG
+ uLoads++;
+#endif
+ break;
+ }
+ }
+ }
+
+ assert(varNum != compiler->lvaCount); // We have to find the matching var!!!!
+ }
+
+ assert(uLoads == VarSetOps::Count(compiler, liveEnregIn));
+ }
+}
+
+void CodeGen::genCodeForEndBlockTransitionStackFP(BasicBlock* block)
+{
+ switch (block->bbJumpKind)
+ {
+ case BBJ_EHFINALLYRET:
+ case BBJ_EHFILTERRET:
+ case BBJ_EHCATCHRET:
+ // Nothing to do
+ assert(compCurFPState.m_uStackSize == 0);
+ break;
+ case BBJ_THROW:
+ break;
+ case BBJ_RETURN:
+ // Nothing to do
+ assert((varTypeIsFloating(compiler->info.compRetType) && compCurFPState.m_uStackSize == 1) ||
+ compCurFPState.m_uStackSize == 0);
+ break;
+ case BBJ_COND:
+ case BBJ_NONE:
+ genCodeForBBTransitionStackFP(block->bbNext);
+ break;
+ case BBJ_ALWAYS:
+ genCodeForBBTransitionStackFP(block->bbJumpDest);
+ break;
+ case BBJ_LEAVE:
+ assert(!"BBJ_LEAVE blocks shouldn't get here");
+ break;
+ case BBJ_CALLFINALLY:
+ assert(compCurFPState.IsEmpty() && "we don't enregister variables live on entry to finallys");
+ genCodeForBBTransitionStackFP(block->bbJumpDest);
+ break;
+ case BBJ_SWITCH:
+ // Nothing to do here
+ break;
+ default:
+ noway_assert(!"Unexpected bbJumpKind");
+ break;
+ }
+}
+
+regMaskTP CodeGen::genRegMaskFromLivenessStackFP(VARSET_VALARG_TP varset)
+{
+ unsigned varNum;
+ LclVarDsc* varDsc;
+ regMaskTP result = 0;
+
+ for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++)
+ {
+ if (varDsc->IsFloatRegType() && varDsc->lvRegister)
+ {
+
+ unsigned varIndex = varDsc->lvVarIndex;
+
+ /* Is this variable live on entry? */
+
+ if (VarSetOps::IsMember(compiler, varset, varIndex))
+ {
+ // We should only call this function doing a transition
+ // To a block which hasn't state yet. All incoming live enregistered variables
+ // should have been already initialized.
+ assert(varDsc->lvRegNum != REG_FPNONE);
+
+ result |= genRegMaskFloat(varDsc->lvRegNum);
+ }
+ }
+ }
+
+ return result;
+}
+
+void CodeGen::genCodeForBBTransitionStackFP(BasicBlock* pDst)
+{
+ assert(compCurFPState.IsConsistent());
+ if (pDst->bbFPStateX87)
+ {
+ // Target block has an associated state. generate transition
+ genCodeForTransitionStackFP(&compCurFPState, pDst->bbFPStateX87);
+ }
+ else
+ {
+ // Target block hasn't got an associated state. As it can only possibly
+ // have a subset of the current state, we'll take advantage of this and
+ // generate the optimal transition
+
+ // Copy current state
+ pDst->bbFPStateX87 = FlatFPAllocFPState(&compCurFPState);
+
+ regMaskTP liveRegIn =
+ genRegMaskFromLivenessStackFP(VarSetOps::Intersection(compiler, pDst->bbLiveIn, compiler->optAllFPregVars));
+
+ // Match to live vars
+ genCodeForTransitionFromMask(pDst->bbFPStateX87, liveRegIn);
+ }
+}
+
+void CodeGen::SpillTempsStackFP(regMaskTP canSpillMask)
+{
+
+ unsigned i;
+ regMaskTP spillMask = 0;
+ regNumber reg;
+
+ // First pass we determine which registers we spill
+ for (i = 0; i < compCurFPState.m_uStackSize; i++)
+ {
+ reg = (regNumber)compCurFPState.m_uStack[i];
+ regMaskTP regMask = genRegMaskFloat(reg);
+ if ((regMask & canSpillMask) && (regMask & regSet.rsMaskRegVarFloat) == 0)
+ {
+ spillMask |= regMask;
+ }
+ }
+
+ // Second pass we do the actual spills
+ for (i = REG_FPV0; i < REG_FPCOUNT; i++)
+ {
+ if ((genRegMaskFloat((regNumber)i) & spillMask))
+ {
+ JITDUMP("spilling temp in register %s\n", regVarNameStackFP((regNumber)i));
+ SpillFloat((regNumber)i, true);
+ }
+ }
+}
+
+// Spills all the fp stack. We need this to spill
+// across calls
+void CodeGen::SpillForCallStackFP()
+{
+ unsigned i;
+ unsigned uSize = compCurFPState.m_uStackSize;
+
+ for (i = 0; i < uSize; i++)
+ {
+ SpillFloat((regNumber)compCurFPState.m_uStack[compCurFPState.TopIndex()], true);
+ }
+}
+
+void CodeGenInterface::SpillFloat(regNumber reg, bool bIsCall)
+{
+#ifdef DEBUG
+ regMaskTP mask = genRegMaskFloat(reg);
+
+ // We can allow spilling regvars, but we don't need it at the moment, and we're
+ // missing code in setupopforflatfp, so assert.
+ assert(bIsCall || (mask & (regSet.rsMaskLockedFloat | regSet.rsMaskRegVarFloat)) == 0);
+#endif
+
+ JITDUMP("SpillFloat spilling register %s\n", regVarNameStackFP(reg));
+
+ // We take the virtual register to the top of the stack
+ FlatFPX87_MoveToTOS(&compCurFPState, reg);
+
+ // Allocate spill structure
+ RegSet::SpillDsc* spill = RegSet::SpillDsc::alloc(compiler, &regSet, TYP_FLOAT);
+
+ // Fill out spill structure
+ var_types type;
+ if (regSet.genUsedRegsFloat[reg])
+ {
+ JITDUMP("will spill tree [%08p]\n", dspPtr(regSet.genUsedRegsFloat[reg]));
+ // register used for temp stack
+ spill->spillTree = regSet.genUsedRegsFloat[reg];
+ spill->bEnregisteredVariable = false;
+
+ regSet.genUsedRegsFloat[reg]->gtFlags |= GTF_SPILLED;
+
+ type = genActualType(regSet.genUsedRegsFloat[reg]->TypeGet());
+
+ // Clear used flag
+ regSet.SetUsedRegFloat(regSet.genUsedRegsFloat[reg], false);
+ }
+ else
+ {
+ JITDUMP("will spill varDsc [%08p]\n", dspPtr(regSet.genRegVarsFloat[reg]));
+
+ // enregistered variable
+ spill->spillVarDsc = regSet.genRegVarsFloat[reg];
+ assert(spill->spillVarDsc);
+
+ spill->bEnregisteredVariable = true;
+
+ // Mark as spilled
+ spill->spillVarDsc->lvSpilled = true;
+ type = genActualType(regSet.genRegVarsFloat[reg]->TypeGet());
+
+ // Clear register flag
+ SetRegVarFloat(reg, type, 0);
+ }
+
+ // Add to spill list
+ spill->spillNext = regSet.rsSpillFloat;
+ regSet.rsSpillFloat = spill;
+
+ // Obtain space
+ TempDsc* temp = spill->spillTemp = compiler->tmpGetTemp(type);
+ emitAttr size = EmitSize(type);
+
+ getEmitter()->emitIns_S(INS_fstp, size, temp->tdTempNum(), 0);
+ compCurFPState.Pop();
+}
+
+void CodeGen::UnspillFloatMachineDep(RegSet::SpillDsc* spillDsc, bool useSameReg)
+{
+ NYI(!"Need not be implemented for x86.");
+}
+
+void CodeGen::UnspillFloatMachineDep(RegSet::SpillDsc* spillDsc)
+{
+ // Do actual unspill
+ if (spillDsc->bEnregisteredVariable)
+ {
+ assert(spillDsc->spillVarDsc->lvSpilled);
+
+ // Do the logic as it was a regvar birth
+ genRegVarBirthStackFP(spillDsc->spillVarDsc);
+
+ // Mark as not spilled any more
+ spillDsc->spillVarDsc->lvSpilled = false;
+
+ // Update stack layout.
+ compCurFPState.Push(spillDsc->spillVarDsc->lvRegNum);
+ }
+ else
+ {
+ assert(spillDsc->spillTree->gtFlags & GTF_SPILLED);
+
+ spillDsc->spillTree->gtFlags &= ~GTF_SPILLED;
+
+ regNumber reg = regSet.PickRegFloat();
+ genMarkTreeInReg(spillDsc->spillTree, reg);
+ regSet.SetUsedRegFloat(spillDsc->spillTree, true);
+
+ compCurFPState.Push(reg);
+ }
+
+ // load from spilled spot
+ emitAttr size = EmitSize(spillDsc->spillTemp->tdTempType());
+ getEmitter()->emitIns_S(INS_fld, size, spillDsc->spillTemp->tdTempNum(), 0);
+}
+
+// unspills any reg var that we have in the spill list. We need this
+// because we can't have any spilled vars across basic blocks
+void CodeGen::UnspillRegVarsStackFp()
+{
+ RegSet::SpillDsc* cur;
+ RegSet::SpillDsc* next;
+
+ for (cur = regSet.rsSpillFloat; cur; cur = next)
+ {
+ next = cur->spillNext;
+
+ if (cur->bEnregisteredVariable)
+ {
+ UnspillFloat(cur);
+ }
+ }
+}
+
+#ifdef DEBUG
+const char* regNamesFP[] = {
+#define REGDEF(name, rnum, mask, sname) sname,
+#include "registerfp.h"
+};
+
+// static
+const char* CodeGenInterface::regVarNameStackFP(regNumber reg)
+{
+ return regNamesFP[reg];
+}
+
+bool CodeGen::ConsistentAfterStatementStackFP()
+{
+ if (!compCurFPState.IsConsistent())
+ {
+ return false;
+ }
+
+ if (regSet.rsMaskUsedFloat != 0)
+ {
+ assert(!"FP register marked as used after statement");
+ return false;
+ }
+ if (regSet.rsMaskLockedFloat != 0)
+ {
+ assert(!"FP register marked as locked after statement");
+ return false;
+ }
+ if (genCountBits(regSet.rsMaskRegVarFloat) > compCurFPState.m_uStackSize)
+ {
+ assert(!"number of FP regvars in regSet.rsMaskRegVarFloat doesnt match current FP state");
+ return false;
+ }
+
+ return true;
+}
+
+#endif
+
+int CodeGen::genNumberTemps()
+{
+ return compCurFPState.m_uStackSize - genCountBits(regSet.rsMaskRegVarFloat);
+}
+
+void CodeGen::genDiscardStackFP(GenTreePtr tree)
+{
+ assert(tree->InReg());
+ assert(varTypeIsFloating(tree));
+
+ FlatFPX87_Unload(&compCurFPState, tree->gtRegNum, true);
+}
+
+void CodeGen::genRegRenameWithMasks(regNumber dstReg, regNumber srcReg)
+{
+ regMaskTP dstregmask = genRegMaskFloat(dstReg);
+ regMaskTP srcregmask = genRegMaskFloat(srcReg);
+
+ // rename use register
+ compCurFPState.Rename(dstReg, srcReg);
+
+ regSet.rsMaskUsedFloat &= ~srcregmask;
+ regSet.rsMaskUsedFloat |= dstregmask;
+
+ if (srcregmask & regSet.rsMaskLockedFloat)
+ {
+ assert((dstregmask & regSet.rsMaskLockedFloat) == 0);
+ // We will set the new one as locked
+ regSet.rsMaskLockedFloat &= ~srcregmask;
+ regSet.rsMaskLockedFloat |= dstregmask;
+ }
+
+ // Updated used tree
+ assert(!regSet.genUsedRegsFloat[dstReg]);
+ regSet.genUsedRegsFloat[dstReg] = regSet.genUsedRegsFloat[srcReg];
+ regSet.genUsedRegsFloat[dstReg]->gtRegNum = dstReg;
+ regSet.genUsedRegsFloat[srcReg] = NULL;
+}
+
+void CodeGen::genRegVarBirthStackFP(LclVarDsc* varDsc)
+{
+ // Mark the virtual register we're assigning to this local;
+ regNumber reg = varDsc->lvRegNum;
+
+#ifdef DEBUG
+ regMaskTP regmask = genRegMaskFloat(reg);
+#endif
+
+ assert(varDsc->lvTracked && varDsc->lvRegister && reg != REG_FPNONE);
+ if (regSet.genUsedRegsFloat[reg])
+ {
+
+ // Register was marked as used... will have to rename it so we can put the
+ // regvar where it belongs.
+ JITDUMP("Renaming used register %s\n", regVarNameStackFP(reg));
+
+ regNumber newreg;
+
+ newreg = regSet.PickRegFloat();
+
+#ifdef DEBUG
+ regMaskTP newregmask = genRegMaskFloat(newreg);
+#endif
+
+ // Update used mask
+ assert((regSet.rsMaskUsedFloat & regmask) && (regSet.rsMaskUsedFloat & newregmask) == 0);
+
+ genRegRenameWithMasks(newreg, reg);
+ }
+
+ // Mark the reg as holding a regvar
+ varDsc->lvSpilled = false;
+ SetRegVarFloat(reg, varDsc->TypeGet(), varDsc);
+}
+
+void CodeGen::genRegVarBirthStackFP(GenTreePtr tree)
+{
+#ifdef DEBUG
+ if (compiler->verbose)
+ {
+ printf("variable V%i is going live in ", tree->gtLclVarCommon.gtLclNum);
+ Compiler::printTreeID(tree);
+ printf("\n");
+ }
+#endif // DEBUG
+
+ // Update register in local var
+ LclVarDsc* varDsc = compiler->lvaTable + tree->gtLclVarCommon.gtLclNum;
+
+ genRegVarBirthStackFP(varDsc);
+ assert(tree->gtRegNum == tree->gtRegVar.gtRegNum && tree->gtRegNum == varDsc->lvRegNum);
+}
+
+void CodeGen::genRegVarDeathStackFP(LclVarDsc* varDsc)
+{
+ regNumber reg = varDsc->lvRegNum;
+
+ assert(varDsc->lvTracked && varDsc->lvRegister && reg != REG_FPNONE);
+ SetRegVarFloat(reg, varDsc->TypeGet(), 0);
+}
+
+void CodeGen::genRegVarDeathStackFP(GenTreePtr tree)
+{
+#ifdef DEBUG
+ if (compiler->verbose)
+ {
+ printf("register %s is going dead in ", regVarNameStackFP(tree->gtRegVar.gtRegNum));
+ Compiler::printTreeID(tree);
+ printf("\n");
+ }
+#endif // DEBUG
+
+ LclVarDsc* varDsc = compiler->lvaTable + tree->gtLclVarCommon.gtLclNum;
+ genRegVarDeathStackFP(varDsc);
+}
+
+void CodeGen::genLoadStackFP(GenTreePtr tree, regNumber reg)
+{
+#ifdef DEBUG
+ if (compiler->verbose)
+ {
+ printf("genLoadStackFP");
+ Compiler::printTreeID(tree);
+ printf(" %s\n", regVarNameStackFP(reg));
+ }
+#endif // DEBUG
+
+ if (tree->IsRegVar())
+ {
+ // if it has been spilled, unspill it.%
+ LclVarDsc* varDsc = &compiler->lvaTable[tree->gtLclVarCommon.gtLclNum];
+ if (varDsc->lvSpilled)
+ {
+ UnspillFloat(varDsc);
+ }
+
+ // if it's dying, just rename the register, else load it normally
+ if (tree->IsRegVarDeath())
+ {
+ genRegVarDeathStackFP(tree);
+ compCurFPState.Rename(reg, tree->gtRegVar.gtRegNum);
+ }
+ else
+ {
+ assert(tree->gtRegNum == tree->gtRegVar.gtRegNum);
+ inst_FN(INS_fld, compCurFPState.VirtualToST(tree->gtRegVar.gtRegNum));
+ FlatFPX87_PushVirtual(&compCurFPState, reg);
+ }
+ }
+ else
+ {
+ FlatFPX87_PushVirtual(&compCurFPState, reg);
+ inst_FS_TT(INS_fld, tree);
+ }
+}
+
+void CodeGen::genMovStackFP(GenTreePtr dst, regNumber dstreg, GenTreePtr src, regNumber srcreg)
+{
+ if (dstreg == REG_FPNONE && !dst->IsRegVar())
+ {
+ regNumber reg;
+
+ // reg to mem path
+ if (srcreg == REG_FPNONE)
+ {
+ assert(src->IsRegVar());
+ reg = src->gtRegNum;
+ }
+ else
+ {
+ reg = srcreg;
+ }
+
+ // Mov src to top of the stack
+ FlatFPX87_MoveToTOS(&compCurFPState, reg);
+
+ if (srcreg != REG_FPNONE || (src->IsRegVar() && src->IsRegVarDeath()))
+ {
+ // Emit instruction
+ inst_FS_TT(INS_fstp, dst);
+
+ // Update stack
+ compCurFPState.Pop();
+ }
+ else
+ {
+ inst_FS_TT(INS_fst, dst);
+ }
+ }
+ else
+ {
+ if (dstreg == REG_FPNONE)
+ {
+ assert(dst->IsRegVar());
+ dstreg = dst->gtRegNum;
+ }
+
+ if (srcreg == REG_FPNONE && !src->IsRegVar())
+ {
+ // mem to reg
+ assert(dst->IsRegVar() && dst->IsRegVarBirth());
+
+ FlatFPX87_PushVirtual(&compCurFPState, dstreg);
+ FlatFPX87_MoveToTOS(&compCurFPState, dstreg);
+
+ if (src->gtOper == GT_CNS_DBL)
+ {
+ genConstantLoadStackFP(src);
+ }
+ else
+ {
+ inst_FS_TT(INS_fld, src);
+ }
+ }
+ else
+ {
+ // disposable reg to reg, use renaming
+ assert(dst->IsRegVar() && dst->IsRegVarBirth());
+ assert(src->IsRegVar() || (src->InReg()));
+ assert(src->gtRegNum != REG_FPNONE);
+
+ if ((src->InReg()) || (src->IsRegVar() && src->IsRegVarDeath()))
+ {
+ // src is disposable and dst is a regvar, so we'll rename src to dst
+
+ // SetupOp should have masked out the regvar
+ assert(!src->IsRegVar() || !src->IsRegVarDeath() ||
+ !(genRegMaskFloat(src->gtRegVar.gtRegNum) & regSet.rsMaskRegVarFloat));
+
+ // get slot that holds the value
+ unsigned uStack = compCurFPState.m_uVirtualMap[src->gtRegNum];
+
+ // unlink the slot that holds the value
+ compCurFPState.Unmap(src->gtRegNum);
+
+ regNumber tgtreg = dst->gtRegVar.gtRegNum;
+
+ compCurFPState.IgnoreConsistencyChecks(true);
+
+ if (regSet.genUsedRegsFloat[tgtreg])
+ {
+ // tgtreg is used, we move it to src reg. We do this here as src reg won't be
+ // marked as used, if tgtreg is used it srcreg will be a candidate for moving
+ // which is something we don't want, so we do the renaming here.
+ genRegRenameWithMasks(src->gtRegNum, tgtreg);
+ }
+
+ compCurFPState.IgnoreConsistencyChecks(false);
+
+ // Birth of FP var
+ genRegVarBirthStackFP(dst);
+
+ // Associate target reg with source physical register
+ compCurFPState.Associate(tgtreg, uStack);
+ }
+ else
+ {
+ if (src->IsRegVar())
+ {
+ // regvar that isnt dying to regvar
+ assert(!src->IsRegVarDeath());
+
+ // Birth of FP var
+ genRegVarBirthStackFP(dst);
+
+ // Load register
+ inst_FN(INS_fld, compCurFPState.VirtualToST(src->gtRegVar.gtRegNum));
+
+ // update our logic stack
+ FlatFPX87_PushVirtual(&compCurFPState, dst->gtRegVar.gtRegNum);
+ }
+ else
+ {
+ // memory to regvar
+
+ // Birth of FP var
+ genRegVarBirthStackFP(dst);
+
+ // load into stack
+ inst_FS_TT(INS_fld, src);
+
+ // update our logic stack
+ FlatFPX87_PushVirtual(&compCurFPState, dst->gtRegVar.gtRegNum);
+ }
+ }
+ }
+ }
+}
+
+void CodeGen::genCodeForTreeStackFP_DONE(GenTreePtr tree, regNumber reg)
+{
+ return genCodeForTree_DONE(tree, reg);
+}
+
+// Does the setup of the FP stack on entry to block
+void CodeGen::genSetupStateStackFP(BasicBlock* block)
+{
+ bool bGenerate = !block->bbFPStateX87;
+ if (bGenerate)
+ {
+ // Allocate FP state
+ block->bbFPStateX87 = FlatFPAllocFPState();
+ block->bbFPStateX87->Init();
+ }
+
+ // Update liveset and lock enregistered live vars on entry
+ VARSET_TP VARSET_INIT_NOCOPY(liveSet,
+ VarSetOps::Intersection(compiler, block->bbLiveIn, compiler->optAllFPregVars));
+
+ if (!VarSetOps::IsEmpty(compiler, liveSet))
+ {
+ unsigned varNum;
+ LclVarDsc* varDsc;
+
+ for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++)
+ {
+ if (varDsc->IsFloatRegType() && varDsc->lvRegister)
+ {
+
+ unsigned varIndex = varDsc->lvVarIndex;
+
+ // Is this variable live on entry?
+ if (VarSetOps::IsMember(compiler, liveSet, varIndex))
+ {
+ JITDUMP("genSetupStateStackFP(): enregistered variable V%i is live on entry to block\n", varNum);
+
+ assert(varDsc->lvTracked);
+ assert(varDsc->lvRegNum != REG_FPNONE);
+
+ genRegVarBirthStackFP(varDsc);
+
+ if (bGenerate)
+ {
+ // If we're generating layout, update it.
+ block->bbFPStateX87->Push(varDsc->lvRegNum);
+ }
+ }
+ }
+ }
+ }
+
+ compCurFPState.Init(block->bbFPStateX87);
+
+ assert(block->bbFPStateX87->IsConsistent());
+}
+
+regMaskTP CodeGen::genPushArgumentStackFP(GenTreePtr args)
+{
+ regMaskTP addrReg = 0;
+ unsigned opsz = genTypeSize(genActualType(args->TypeGet()));
+
+ switch (args->gtOper)
+ {
+ GenTreePtr temp;
+ GenTreePtr fval;
+ size_t flopsz;
+
+ case GT_CNS_DBL:
+ {
+ float f = 0.0;
+ int* addr = NULL;
+ if (args->TypeGet() == TYP_FLOAT)
+ {
+ f = (float)args->gtDblCon.gtDconVal;
+ // *(long*) (&f) used instead of *addr because of of strict
+ // pointer aliasing optimization. According to the ISO C/C++
+ // standard, an optimizer can assume two pointers of
+ // non-compatible types do not point to the same memory.
+ inst_IV(INS_push, *((int*)(&f)));
+ genSinglePush();
+ addrReg = 0;
+ }
+ else
+ {
+ addr = (int*)&args->gtDblCon.gtDconVal;
+
+ // store forwarding fix for pentium 4 and Centrino
+ // (even for down level CPUs as we don't care about their perf any more)
+ fval = genMakeConst(&args->gtDblCon.gtDconVal, args->gtType, args, true);
+ inst_FS_TT(INS_fld, fval);
+ flopsz = (size_t)8;
+ inst_RV_IV(INS_sub, REG_ESP, flopsz, EA_PTRSIZE);
+ getEmitter()->emitIns_AR_R(INS_fstp, EA_ATTR(flopsz), REG_NA, REG_ESP, 0);
+ genSinglePush();
+ genSinglePush();
+
+ addrReg = 0;
+ }
+
+ break;
+ }
+
+ case GT_CAST:
+ {
+ // Is the value a cast from double ?
+ if ((args->gtOper == GT_CAST) && (args->CastFromType() == TYP_DOUBLE))
+ {
+ /* Load the value onto the FP stack */
+
+ genCodeForTreeFlt(args->gtCast.CastOp(), false);
+
+ /* Go push the value as a float/double */
+ args = args->gtCast.CastOp();
+
+ addrReg = 0;
+ goto PUSH_FLT;
+ }
+ // Fall through to default case....
+ }
+ default:
+ {
+ temp = genMakeAddrOrFPstk(args, &addrReg, false);
+ if (temp)
+ {
+ unsigned offs;
+
+ // We have the address of the float operand, push its bytes
+ offs = opsz;
+ assert(offs % sizeof(int) == 0);
+
+ if (offs == 4)
+ {
+ assert(args->gtType == temp->gtType);
+ do
+ {
+ offs -= sizeof(int);
+ inst_TT(INS_push, temp, offs);
+ genSinglePush();
+ } while (offs);
+ }
+ else
+ {
+ // store forwarding fix for pentium 4 and Centrino
+ inst_FS_TT(INS_fld, temp);
+ flopsz = (size_t)offs;
+ inst_RV_IV(INS_sub, REG_ESP, (size_t)flopsz, EA_PTRSIZE);
+ getEmitter()->emitIns_AR_R(INS_fstp, EA_ATTR(flopsz), REG_NA, REG_ESP, 0);
+ genSinglePush();
+ genSinglePush();
+ }
+ }
+ else
+ {
+ // The argument is on the FP stack -- pop it into [ESP-4/8]
+
+ PUSH_FLT:
+
+ inst_RV_IV(INS_sub, REG_ESP, opsz, EA_PTRSIZE);
+
+ genSinglePush();
+ if (opsz == 2 * sizeof(unsigned))
+ genSinglePush();
+
+ // Take reg to top of stack
+ FlatFPX87_MoveToTOS(&compCurFPState, args->gtRegNum);
+
+ // Pop it off to stack
+ compCurFPState.Pop();
+ getEmitter()->emitIns_AR_R(INS_fstp, EA_ATTR(opsz), REG_NA, REG_ESP, 0);
+ }
+
+ gcInfo.gcMarkRegSetNpt(addrReg);
+ break;
+ }
+ }
+
+ return addrReg;
+}
+
+void CodeGen::genRoundFpExpressionStackFP(GenTreePtr op, var_types type)
+{
+ // Do nothing with memory resident opcodes - these are the right precision
+ // (even if genMakeAddrOrFPstk loads them to the FP stack)
+ if (type == TYP_UNDEF)
+ type = op->TypeGet();
+
+ switch (op->gtOper)
+ {
+ case GT_LCL_VAR:
+ case GT_LCL_FLD:
+ case GT_CLS_VAR:
+ case GT_CNS_DBL:
+ case GT_IND:
+ case GT_LEA:
+ if (type == op->TypeGet())
+ return;
+ default:
+ break;
+ }
+
+ assert(op->gtRegNum != REG_FPNONE);
+
+ // Take register to top of stack
+ FlatFPX87_MoveToTOS(&compCurFPState, op->gtRegNum);
+
+ // Allocate a temp for the expression
+ TempDsc* temp = compiler->tmpGetTemp(type);
+
+ // Store the FP value into the temp
+ inst_FS_ST(INS_fstp, EmitSize(type), temp, 0);
+
+ // Load the value back onto the FP stack
+ inst_FS_ST(INS_fld, EmitSize(type), temp, 0);
+
+ // We no longer need the temp
+ compiler->tmpRlsTemp(temp);
+}
+
+void CodeGen::genCodeForTreeStackFP_Const(GenTreePtr tree)
+{
+#ifdef DEBUG
+ if (compiler->verbose)
+ {
+ printf("genCodeForTreeStackFP_Const() ");
+ Compiler::printTreeID(tree);
+ printf("\n");
+ }
+#endif // DEBUG
+
+#ifdef DEBUG
+ if (tree->OperGet() != GT_CNS_DBL)
+ {
+ compiler->gtDispTree(tree);
+ assert(!"bogus float const");
+ }
+#endif
+ // Pick register
+ regNumber reg = regSet.PickRegFloat();
+
+ // Load constant
+ genConstantLoadStackFP(tree);
+
+ // Push register to virtual stack
+ FlatFPX87_PushVirtual(&compCurFPState, reg);
+
+ // Update tree
+ genCodeForTreeStackFP_DONE(tree, reg);
+}
+
+void CodeGen::genCodeForTreeStackFP_Leaf(GenTreePtr tree)
+{
+#ifdef DEBUG
+ if (compiler->verbose)
+ {
+ printf("genCodeForTreeStackFP_Leaf() ");
+ Compiler::printTreeID(tree);
+ printf("\n");
+ }
+#endif // DEBUG
+
+ switch (tree->OperGet())
+ {
+ case GT_LCL_VAR:
+ case GT_LCL_FLD:
+ {
+ assert(!compiler->lvaTable[tree->gtLclVarCommon.gtLclNum].lvRegister);
+
+ // Pick register
+ regNumber reg = regSet.PickRegFloat();
+
+ // Load it
+ genLoadStackFP(tree, reg);
+
+ genCodeForTreeStackFP_DONE(tree, reg);
+
+ break;
+ }
+
+ case GT_REG_VAR:
+ {
+ regNumber reg = regSet.PickRegFloat();
+
+ genLoadStackFP(tree, reg);
+
+ genCodeForTreeStackFP_DONE(tree, reg);
+
+ break;
+ }
+
+ case GT_CLS_VAR:
+ {
+ // Pick register
+ regNumber reg = regSet.PickRegFloat();
+
+ // Load it
+ genLoadStackFP(tree, reg);
+
+ genCodeForTreeStackFP_DONE(tree, reg);
+
+ break;
+ }
+
+ default:
+#ifdef DEBUG
+ compiler->gtDispTree(tree);
+#endif
+ assert(!"unexpected leaf");
+ }
+
+ genUpdateLife(tree);
+}
+
+void CodeGen::genCodeForTreeStackFP_Asg(GenTreePtr tree)
+{
+#ifdef DEBUG
+ if (compiler->verbose)
+ {
+ printf("genCodeForTreeStackFP_Asg() ");
+ Compiler::printTreeID(tree);
+ printf("\n");
+ }
+#endif // DEBUG
+
+ emitAttr size;
+ unsigned offs;
+ GenTreePtr op1 = tree->gtOp.gtOp1;
+ GenTreePtr op2 = tree->gtGetOp2();
+
+ assert(tree->OperGet() == GT_ASG);
+
+ if (!op1->IsRegVar() && (op2->gtOper == GT_CAST) && (op1->gtType == op2->gtType) &&
+ varTypeIsFloating(op2->gtCast.CastOp()))
+ {
+ /* We can discard the cast */
+ op2 = op2->gtCast.CastOp();
+ }
+
+ size = EmitSize(op1);
+ offs = 0;
+
+ // If lhs is a comma expression, evaluate the non-last parts, make op1 be the remainder.
+ // (But can't do this if the assignment is reversed...)
+ if ((tree->gtFlags & GTF_REVERSE_OPS) == 0)
+ {
+ op1 = genCodeForCommaTree(op1);
+ }
+
+ GenTreePtr op1NonCom = op1->gtEffectiveVal();
+ if (op1NonCom->gtOper == GT_LCL_VAR)
+ {
+#ifdef DEBUG
+ LclVarDsc* varDsc = &compiler->lvaTable[op1NonCom->gtLclVarCommon.gtLclNum];
+ // No dead stores
+ assert(!varDsc->lvTracked || compiler->opts.MinOpts() || !(op1NonCom->gtFlags & GTF_VAR_DEATH));
+#endif
+
+#ifdef DEBUGGING_SUPPORT
+
+ /* For non-debuggable code, every definition of a lcl-var has
+ * to be checked to see if we need to open a new scope for it.
+ */
+
+ if (compiler->opts.compScopeInfo && !compiler->opts.compDbgCode && (compiler->info.compVarScopesCount > 0))
+ {
+ siCheckVarScope(op1NonCom->gtLclVarCommon.gtLclNum, op1NonCom->gtLclVar.gtLclILoffs);
+ }
+#endif
+ }
+
+ assert(op2);
+ switch (op2->gtOper)
+ {
+ case GT_CNS_DBL:
+
+ assert(compCurFPState.m_uStackSize <= FP_PHYSICREGISTERS);
+
+ regMaskTP addrRegInt;
+ addrRegInt = 0;
+ regMaskTP addrRegFlt;
+ addrRegFlt = 0;
+
+ // op2 is already "evaluated," so doesn't matter if they're reversed or not...
+ op1 = genCodeForCommaTree(op1);
+ op1 = genMakeAddressableStackFP(op1, &addrRegInt, &addrRegFlt);
+
+ // We want to 'cast' the constant to the op1'a type
+ double constantValue;
+ constantValue = op2->gtDblCon.gtDconVal;
+ if (op1->gtType == TYP_FLOAT)
+ {
+ float temp = forceCastToFloat(constantValue);
+ constantValue = (double)temp;
+ }
+
+ GenTreePtr constantTree;
+ constantTree = compiler->gtNewDconNode(constantValue);
+ if (genConstantLoadStackFP(constantTree, true))
+ {
+ if (op1->IsRegVar())
+ {
+ // regvar birth
+ genRegVarBirthStackFP(op1);
+
+ // Update
+ compCurFPState.Push(op1->gtRegNum);
+ }
+ else
+ {
+ // store in target
+ inst_FS_TT(INS_fstp, op1);
+ }
+ }
+ else
+ {
+ // Standard constant
+ if (op1->IsRegVar())
+ {
+ // Load constant to fp stack.
+
+ GenTreePtr cnsaddr;
+
+ // Create slot for constant
+ if (op1->gtType == TYP_FLOAT || StackFPIsSameAsFloat(op2->gtDblCon.gtDconVal))
+ {
+ // We're going to use that double as a float, so recompute addr
+ float f = forceCastToFloat(op2->gtDblCon.gtDconVal);
+ cnsaddr = genMakeConst(&f, TYP_FLOAT, tree, true);
+ }
+ else
+ {
+ cnsaddr = genMakeConst(&op2->gtDblCon.gtDconVal, TYP_DOUBLE, tree, true);
+ }
+
+ // Load into stack
+ inst_FS_TT(INS_fld, cnsaddr);
+
+ // regvar birth
+ genRegVarBirthStackFP(op1);
+
+ // Update
+ compCurFPState.Push(op1->gtRegNum);
+ }
+ else
+ {
+ if (size == 4)
+ {
+
+ float f = forceCastToFloat(op2->gtDblCon.gtDconVal);
+ int* addr = (int*)&f;
+
+ do
+ {
+ inst_TT_IV(INS_mov, op1, *addr++, offs);
+ offs += sizeof(int);
+ } while (offs < size);
+ }
+ else
+ {
+ // store forwarding fix for pentium 4 and centrino and also
+ // fld for doubles that can be represented as floats, saving
+ // 4 bytes of load
+ GenTreePtr cnsaddr;
+
+ // Create slot for constant
+ if (op1->gtType == TYP_FLOAT || StackFPIsSameAsFloat(op2->gtDblCon.gtDconVal))
+ {
+ // We're going to use that double as a float, so recompute addr
+ float f = forceCastToFloat(op2->gtDblCon.gtDconVal);
+ cnsaddr = genMakeConst(&f, TYP_FLOAT, tree, true);
+ }
+ else
+ {
+ assert(tree->gtType == TYP_DOUBLE);
+ cnsaddr = genMakeConst(&op2->gtDblCon.gtDconVal, TYP_DOUBLE, tree, true);
+ }
+
+ inst_FS_TT(INS_fld, cnsaddr);
+ inst_FS_TT(INS_fstp, op1);
+ }
+ }
+ }
+
+ genDoneAddressableStackFP(op1, addrRegInt, addrRegFlt, RegSet::KEEP_REG);
+ genUpdateLife(op1);
+ return;
+
+ default:
+ break;
+ }
+
+ // Not one of the easy optimizations. Proceed normally
+ if (tree->gtFlags & GTF_REVERSE_OPS)
+ {
+ /* Evaluate the RHS onto the FP stack.
+ We don't need to round it as we will be doing a spill for
+ the assignment anyway (unless op1 is a GT_REG_VAR). */
+
+ genSetupForOpStackFP(op1, op2, true, true, false, true);
+
+ // Do the move
+ genMovStackFP(op1, REG_FPNONE, op2, (op2->InReg()) ? op2->gtRegNum : REG_FPNONE);
+ }
+ else
+ {
+ // Have to evaluate left side before
+
+ // This should never happen
+ assert(!op1->IsRegVar());
+
+ genSetupForOpStackFP(op1, op2, false, true, false, true);
+
+ // Do the actual move
+ genMovStackFP(op1, REG_FPNONE, op2, (op2->InReg()) ? op2->gtRegNum : REG_FPNONE);
+ }
+}
+
+void CodeGen::genSetupForOpStackFP(
+ GenTreePtr& op1, GenTreePtr& op2, bool bReverse, bool bMakeOp1Addressable, bool bOp1ReadOnly, bool bOp2ReadOnly)
+{
+ if (bMakeOp1Addressable)
+ {
+ if (bReverse)
+ {
+ genSetupForOpStackFP(op2, op1, false, false, bOp2ReadOnly, bOp1ReadOnly);
+ }
+ else
+ {
+ regMaskTP addrRegInt = 0;
+ regMaskTP addrRegFlt = 0;
+
+ op1 = genCodeForCommaTree(op1);
+
+ // Evaluate RHS on FP stack
+ if (bOp2ReadOnly && op2->IsRegVar() && !op2->IsRegVarDeath())
+ {
+ // read only and not dying, so just make addressable
+ op1 = genMakeAddressableStackFP(op1, &addrRegInt, &addrRegFlt);
+ genKeepAddressableStackFP(op1, &addrRegInt, &addrRegFlt);
+ genUpdateLife(op2);
+ }
+ else
+ {
+ // Make target addressable
+ op1 = genMakeAddressableStackFP(op1, &addrRegInt, &addrRegFlt);
+
+ op2 = genCodeForCommaTree(op2);
+
+ genCodeForTreeFloat(op2);
+
+ regSet.SetUsedRegFloat(op2, true);
+ regSet.SetLockedRegFloat(op2, true);
+
+ // Make sure target is still adressable
+ genKeepAddressableStackFP(op1, &addrRegInt, &addrRegFlt);
+
+ regSet.SetLockedRegFloat(op2, false);
+ regSet.SetUsedRegFloat(op2, false);
+ }
+
+ /* Free up anything that was tied up by the target address */
+ genDoneAddressableStackFP(op1, addrRegInt, addrRegFlt, RegSet::KEEP_REG);
+ }
+ }
+ else
+ {
+ assert(!bReverse ||
+ !"Can't do this. if op2 is a reg var and dies in op1, we have a serious problem. For the "
+ "moment, handle this in the caller");
+
+ regMaskTP addrRegInt = 0;
+ regMaskTP addrRegFlt = 0;
+
+ op1 = genCodeForCommaTree(op1);
+
+ if (bOp1ReadOnly && op1->IsRegVar() && !op1->IsRegVarDeath() &&
+ !genRegVarDiesInSubTree(op2, op1->gtRegVar.gtRegNum)) // regvar can't die in op2 either
+ {
+ // First update liveness for op1, since we're "evaluating" it here
+ genUpdateLife(op1);
+
+ op2 = genCodeForCommaTree(op2);
+
+ // read only and not dying, we dont have to do anything.
+ op2 = genMakeAddressableStackFP(op2, &addrRegInt, &addrRegFlt);
+ genKeepAddressableStackFP(op2, &addrRegInt, &addrRegFlt);
+ }
+ else
+ {
+ genCodeForTreeFloat(op1);
+
+ regSet.SetUsedRegFloat(op1, true);
+
+ op2 = genCodeForCommaTree(op2);
+
+ op2 = genMakeAddressableStackFP(op2, &addrRegInt, &addrRegFlt);
+
+ // Restore op1 if necessary
+ if (op1->gtFlags & GTF_SPILLED)
+ {
+ UnspillFloat(op1);
+ }
+
+ // Lock op1
+ regSet.SetLockedRegFloat(op1, true);
+
+ genKeepAddressableStackFP(op2, &addrRegInt, &addrRegFlt);
+
+ // unlock op1
+ regSet.SetLockedRegFloat(op1, false);
+
+ // mark as free
+ regSet.SetUsedRegFloat(op1, false);
+ }
+
+ genDoneAddressableStackFP(op2, addrRegInt, addrRegFlt, RegSet::KEEP_REG);
+ }
+}
+
+void CodeGen::genCodeForTreeStackFP_Arithm(GenTreePtr tree)
+{
+#ifdef DEBUG
+ if (compiler->verbose)
+ {
+ printf("genCodeForTreeStackFP_Arithm() ");
+ Compiler::printTreeID(tree);
+ printf("\n");
+ }
+#endif // DEBUG
+
+ assert(tree->OperGet() == GT_ADD || tree->OperGet() == GT_SUB || tree->OperGet() == GT_MUL ||
+ tree->OperGet() == GT_DIV);
+
+ // We handle the reverse here instead of leaving setupop to do it. As for this case
+ //
+ // + with reverse
+ // op1 regvar
+ //
+ // and in regvar dies in op1, we would need a load of regvar, instead of a noop. So we handle this
+ // here and tell genArithmStackFP to do the reverse operation
+ bool bReverse;
+
+ GenTreePtr op1, op2;
+
+ if (tree->gtFlags & GTF_REVERSE_OPS)
+ {
+ bReverse = true;
+ op1 = tree->gtGetOp2();
+ op2 = tree->gtOp.gtOp1;
+ }
+ else
+ {
+ bReverse = false;
+ op1 = tree->gtOp.gtOp1;
+ op2 = tree->gtGetOp2();
+ }
+
+ regNumber result;
+
+ // Fast paths
+ genTreeOps oper = tree->OperGet();
+ if (op1->IsRegVar() && op2->IsRegVar() && !op1->IsRegVarDeath() && op2->IsRegVarDeath())
+ {
+ // In this fastpath, we will save a load by doing the operation directly on the op2
+ // register, as it's dying.
+
+ // Mark op2 as dead
+ genRegVarDeathStackFP(op2);
+
+ // Do operation
+ result = genArithmStackFP(oper, op2, op2->gtRegVar.gtRegNum, op1, REG_FPNONE, !bReverse);
+
+ genUpdateLife(op1);
+ genUpdateLife(op2);
+ }
+ else if (!op1->IsRegVar() && // We don't do this for regvars, as we'll need a scratch reg
+ ((tree->gtFlags & GTF_SIDE_EFFECT) == 0) && // No side effects
+ GenTree::Compare(op1, op2)) // op1 and op2 are the same
+ {
+ // op1 is same thing as op2. Ideal for CSEs that werent optimized
+ // due to their low cost.
+
+ // First we need to update lifetimes from op1
+ VarSetOps::AssignNoCopy(compiler, compiler->compCurLife, genUpdateLiveSetForward(op1));
+ compiler->compCurLifeTree = op1;
+
+ genCodeForTreeFloat(op2);
+
+ result = genArithmStackFP(oper, op2, op2->gtRegNum, op2, op2->gtRegNum, bReverse);
+ }
+ else
+ {
+ genSetupForOpStackFP(op1, op2, false, false, false, true);
+
+ result = genArithmStackFP(oper, op1, (op1->InReg()) ? op1->gtRegNum : REG_FPNONE, op2,
+ (op2->InReg()) ? op2->gtRegNum : REG_FPNONE, bReverse);
+ }
+
+ genCodeForTreeStackFP_DONE(tree, result);
+}
+
+regNumber CodeGen::genArithmStackFP(
+ genTreeOps oper, GenTreePtr dst, regNumber dstreg, GenTreePtr src, regNumber srcreg, bool bReverse)
+{
+#ifdef DEBUG
+ if (compiler->verbose)
+ {
+ printf("genArithmStackFP() dst: ");
+ Compiler::printTreeID(dst);
+ printf(" src: ");
+ Compiler::printTreeID(src);
+ printf(" dstreg: %s srcreg: %s\n", dstreg == REG_FPNONE ? "NONE" : regVarNameStackFP(dstreg),
+ srcreg == REG_FPNONE ? "NONE" : regVarNameStackFP(srcreg));
+ }
+#endif // DEBUG
+
+ // Select instruction depending on oper and bReverseOp
+
+ instruction ins_NN;
+ instruction ins_RN;
+ instruction ins_RP;
+ instruction ins_NP;
+
+ switch (oper)
+ {
+ default:
+ assert(!"Unexpected oper");
+ case GT_ADD:
+ case GT_SUB:
+ case GT_MUL:
+ case GT_DIV:
+
+ /* Make sure the instruction tables look correctly ordered */
+ assert(FPmathNN[GT_ADD - GT_ADD] == INS_fadd);
+ assert(FPmathNN[GT_SUB - GT_ADD] == INS_fsub);
+ assert(FPmathNN[GT_MUL - GT_ADD] == INS_fmul);
+ assert(FPmathNN[GT_DIV - GT_ADD] == INS_fdiv);
+
+ assert(FPmathNP[GT_ADD - GT_ADD] == INS_faddp);
+ assert(FPmathNP[GT_SUB - GT_ADD] == INS_fsubp);
+ assert(FPmathNP[GT_MUL - GT_ADD] == INS_fmulp);
+ assert(FPmathNP[GT_DIV - GT_ADD] == INS_fdivp);
+
+ assert(FPmathRN[GT_ADD - GT_ADD] == INS_fadd);
+ assert(FPmathRN[GT_SUB - GT_ADD] == INS_fsubr);
+ assert(FPmathRN[GT_MUL - GT_ADD] == INS_fmul);
+ assert(FPmathRN[GT_DIV - GT_ADD] == INS_fdivr);
+
+ assert(FPmathRP[GT_ADD - GT_ADD] == INS_faddp);
+ assert(FPmathRP[GT_SUB - GT_ADD] == INS_fsubrp);
+ assert(FPmathRP[GT_MUL - GT_ADD] == INS_fmulp);
+ assert(FPmathRP[GT_DIV - GT_ADD] == INS_fdivrp);
+
+ if (bReverse)
+ {
+ ins_NN = FPmathRN[oper - GT_ADD];
+ ins_NP = FPmathRP[oper - GT_ADD];
+ ins_RN = FPmathNN[oper - GT_ADD];
+ ins_RP = FPmathNP[oper - GT_ADD];
+ }
+ else
+ {
+ ins_NN = FPmathNN[oper - GT_ADD];
+ ins_NP = FPmathNP[oper - GT_ADD];
+ ins_RN = FPmathRN[oper - GT_ADD];
+ ins_RP = FPmathRP[oper - GT_ADD];
+ }
+ }
+
+ regNumber result = REG_FPNONE;
+
+ if (dstreg != REG_FPNONE)
+ {
+ if (srcreg == REG_FPNONE)
+ {
+ if (src->IsRegVar())
+ {
+ if (src->IsRegVarDeath())
+ {
+ if (compCurFPState.TopVirtual() == (unsigned)dst->gtRegNum)
+ {
+ // Do operation and store in srcreg
+ inst_FS(ins_RP, compCurFPState.VirtualToST(src->gtRegNum));
+
+ // kill current dst and rename src as dst.
+ FlatFPX87_Kill(&compCurFPState, dstreg);
+ compCurFPState.Rename(dstreg, src->gtRegNum);
+ }
+ else
+ {
+ // Take src to top of stack
+ FlatFPX87_MoveToTOS(&compCurFPState, src->gtRegNum);
+
+ // do reverse and pop operation
+ inst_FS(ins_NP, compCurFPState.VirtualToST(dstreg));
+
+ // Kill the register
+ FlatFPX87_Kill(&compCurFPState, src->gtRegNum);
+ }
+
+ assert(!src->IsRegVar() || !src->IsRegVarDeath() ||
+ !(genRegMaskFloat(src->gtRegVar.gtRegNum) & regSet.rsMaskRegVarFloat));
+ }
+ else
+ {
+ if (compCurFPState.TopVirtual() == (unsigned)src->gtRegNum)
+ {
+ inst_FS(ins_RN, compCurFPState.VirtualToST(dst->gtRegNum));
+ }
+ else
+ {
+ FlatFPX87_MoveToTOS(&compCurFPState, dst->gtRegNum);
+ inst_FN(ins_NN, compCurFPState.VirtualToST(src->gtRegNum));
+ }
+ }
+ }
+ else
+ {
+ // do operation with memory and store in dest
+ FlatFPX87_MoveToTOS(&compCurFPState, dst->gtRegNum);
+ inst_FS_TT(ins_NN, src);
+ }
+ }
+ else
+ {
+ if (dstreg == srcreg)
+ {
+ FlatFPX87_MoveToTOS(&compCurFPState, dstreg);
+ inst_FN(ins_NN, compCurFPState.VirtualToST(dstreg));
+ }
+ else
+ {
+ if (compCurFPState.TopVirtual() == (unsigned)dst->gtRegNum)
+ {
+ // Do operation and store in srcreg
+ inst_FS(ins_RP, compCurFPState.VirtualToST(srcreg));
+
+ // kill current dst and rename src as dst.
+ FlatFPX87_Kill(&compCurFPState, dstreg);
+ compCurFPState.Rename(dstreg, srcreg);
+ }
+ else
+ {
+ FlatFPX87_MoveToTOS(&compCurFPState, srcreg);
+
+ // do reverse and pop operation
+ inst_FS(ins_NP, compCurFPState.VirtualToST(dstreg));
+
+ // Kill the register
+ FlatFPX87_Kill(&compCurFPState, srcreg);
+ }
+ }
+ }
+
+ result = dstreg;
+ }
+ else
+ {
+ assert(!"if we get here it means we didnt load op1 into a temp. Investigate why");
+ }
+
+ assert(result != REG_FPNONE);
+ return result;
+}
+
+void CodeGen::genCodeForTreeStackFP_AsgArithm(GenTreePtr tree)
+{
+#ifdef DEBUG
+ if (compiler->verbose)
+ {
+ printf("genCodeForTreeStackFP_AsgArithm() ");
+ Compiler::printTreeID(tree);
+ printf("\n");
+ }
+#endif // DEBUG
+
+ assert(tree->OperGet() == GT_ASG_ADD || tree->OperGet() == GT_ASG_SUB || tree->OperGet() == GT_ASG_MUL ||
+ tree->OperGet() == GT_ASG_DIV);
+
+ GenTreePtr op1, op2;
+
+ op1 = tree->gtOp.gtOp1;
+ op2 = tree->gtGetOp2();
+
+ genSetupForOpStackFP(op1, op2, (tree->gtFlags & GTF_REVERSE_OPS) ? true : false, true, false, true);
+
+ regNumber result = genAsgArithmStackFP(tree->OperGet(), op1, (op1->InReg()) ? op1->gtRegNum : REG_FPNONE, op2,
+ (op2->InReg()) ? op2->gtRegNum : REG_FPNONE);
+
+ genCodeForTreeStackFP_DONE(tree, result);
+}
+
+regNumber CodeGen::genAsgArithmStackFP(
+ genTreeOps oper, GenTreePtr dst, regNumber dstreg, GenTreePtr src, regNumber srcreg)
+{
+ regNumber result = REG_FPNONE;
+
+#ifdef DEBUG
+ if (compiler->verbose)
+ {
+ printf("genAsgArithmStackFP() dst: ");
+ Compiler::printTreeID(dst);
+ printf(" src: ");
+ Compiler::printTreeID(src);
+ printf(" dstreg: %s srcreg: %s\n", dstreg == REG_FPNONE ? "NONE" : regVarNameStackFP(dstreg),
+ srcreg == REG_FPNONE ? "NONE" : regVarNameStackFP(srcreg));
+ }
+#endif // DEBUG
+
+ instruction ins_NN;
+ instruction ins_RN;
+ instruction ins_RP;
+ instruction ins_NP;
+
+ switch (oper)
+ {
+ default:
+ assert(!"Unexpected oper");
+ break;
+ case GT_ASG_ADD:
+ case GT_ASG_SUB:
+ case GT_ASG_MUL:
+ case GT_ASG_DIV:
+
+ assert(FPmathRN[GT_ASG_ADD - GT_ASG_ADD] == INS_fadd);
+ assert(FPmathRN[GT_ASG_SUB - GT_ASG_ADD] == INS_fsubr);
+ assert(FPmathRN[GT_ASG_MUL - GT_ASG_ADD] == INS_fmul);
+ assert(FPmathRN[GT_ASG_DIV - GT_ASG_ADD] == INS_fdivr);
+
+ assert(FPmathRP[GT_ASG_ADD - GT_ASG_ADD] == INS_faddp);
+ assert(FPmathRP[GT_ASG_SUB - GT_ASG_ADD] == INS_fsubrp);
+ assert(FPmathRP[GT_ASG_MUL - GT_ASG_ADD] == INS_fmulp);
+ assert(FPmathRP[GT_ASG_DIV - GT_ASG_ADD] == INS_fdivrp);
+
+ ins_NN = FPmathNN[oper - GT_ASG_ADD];
+ ins_NP = FPmathNP[oper - GT_ASG_ADD];
+
+ ins_RN = FPmathRN[oper - GT_ASG_ADD];
+ ins_RP = FPmathRP[oper - GT_ASG_ADD];
+
+ if (dstreg != REG_FPNONE)
+ {
+ assert(!"dst should be a regvar or memory");
+ }
+ else
+ {
+ if (dst->IsRegVar())
+ {
+ if (src->IsRegVar())
+ {
+ if (src->IsRegVarDeath())
+ {
+ // Take src to top of stack
+ FlatFPX87_MoveToTOS(&compCurFPState, src->gtRegNum);
+
+ // Do op
+ inst_FS(ins_NP, compCurFPState.VirtualToST(dst->gtRegNum));
+
+ // Kill the register
+ FlatFPX87_Kill(&compCurFPState, src->gtRegNum);
+
+ // SetupOp should mark the regvar as dead
+ assert((genRegMaskFloat(src->gtRegVar.gtRegNum) & regSet.rsMaskRegVarFloat) == 0);
+ }
+ else
+ {
+ assert(src->gtRegNum == src->gtRegVar.gtRegNum &&
+ "We shoudnt be loading regvar src on the stack as src is readonly");
+
+ // Take src to top of stack
+ FlatFPX87_MoveToTOS(&compCurFPState, src->gtRegNum);
+
+ // Do op
+ inst_FS(ins_RN, compCurFPState.VirtualToST(dst->gtRegNum));
+ }
+ }
+ else
+ {
+ if (srcreg == REG_FPNONE)
+ {
+ // take enregistered variable to top of stack
+ FlatFPX87_MoveToTOS(&compCurFPState, dst->gtRegNum);
+
+ // Do operation with mem
+ inst_FS_TT(ins_NN, src);
+ }
+ else
+ {
+ // take enregistered variable to top of stack
+ FlatFPX87_MoveToTOS(&compCurFPState, src->gtRegNum);
+
+ // do op
+ inst_FS(ins_NP, compCurFPState.VirtualToST(dst->gtRegNum));
+
+ // Kill the register
+ FlatFPX87_Kill(&compCurFPState, src->gtRegNum);
+ }
+ }
+ }
+ else
+ {
+ // To memory
+ if ((src->IsRegVar()) && !src->IsRegVarDeath())
+ {
+ // We set src as read only, but as dst is in memory, we will need
+ // an extra physical register (which we should have, as we have a
+ // spare one for transitions).
+ //
+ // There used to be an assertion: assert(src->gtRegNum == src->gtRegVar.gtRegNum, ...)
+ // here, but there's actually no reason to assume that. AFAICT, for FP vars under stack FP,
+ // src->gtRegVar.gtRegNum is the allocated stack pseudo-register, but src->gtRegNum is the
+ // FP stack position into which that is loaded to represent a particular use of the variable.
+ inst_FN(INS_fld, compCurFPState.VirtualToST(src->gtRegNum));
+
+ // Do operation with mem
+ inst_FS_TT(ins_RN, dst);
+
+ // store back
+ inst_FS_TT(INS_fstp, dst);
+ }
+ else
+ {
+ // put src in top of stack
+ FlatFPX87_MoveToTOS(&compCurFPState, srcreg);
+
+ // Do operation with mem
+ inst_FS_TT(ins_RN, dst);
+
+ // store back
+ inst_FS_TT(INS_fstp, dst);
+
+ // SetupOp should have marked the regvar as dead in tat case
+ assert(!src->IsRegVar() || !src->IsRegVarDeath() ||
+ (genRegMaskFloat(src->gtRegVar.gtRegNum) & regSet.rsMaskRegVarFloat) == 0);
+
+ FlatFPX87_Kill(&compCurFPState, srcreg);
+ }
+ }
+ }
+ }
+
+ return result;
+}
+
+void CodeGen::genCodeForTreeStackFP_SmpOp(GenTreePtr tree)
+{
+#ifdef DEBUG
+ if (compiler->verbose)
+ {
+ printf("genCodeForTreeStackFP_SmpOp() ");
+ Compiler::printTreeID(tree);
+ printf("\n");
+ }
+#endif // DEBUG
+
+ assert(tree->OperKind() & GTK_SMPOP);
+
+ switch (tree->OperGet())
+ {
+ // Assignment
+ case GT_ASG:
+ {
+ genCodeForTreeStackFP_Asg(tree);
+ break;
+ }
+
+ // Arithmetic binops
+ case GT_ADD:
+ case GT_SUB:
+ case GT_MUL:
+ case GT_DIV:
+ {
+ genCodeForTreeStackFP_Arithm(tree);
+ break;
+ }
+
+ // Asg-Arithmetic ops
+ case GT_ASG_ADD:
+ case GT_ASG_SUB:
+ case GT_ASG_MUL:
+ case GT_ASG_DIV:
+ {
+ genCodeForTreeStackFP_AsgArithm(tree);
+ break;
+ }
+
+ case GT_IND:
+ case GT_LEA:
+ {
+ regMaskTP addrReg;
+
+ // Make sure the address value is 'addressable' */
+ addrReg = genMakeAddressable(tree, 0, RegSet::FREE_REG);
+
+ // Load the value onto the FP stack
+ regNumber reg = regSet.PickRegFloat();
+ genLoadStackFP(tree, reg);
+
+ genDoneAddressable(tree, addrReg, RegSet::FREE_REG);
+
+ genCodeForTreeStackFP_DONE(tree, reg);
+
+ break;
+ }
+
+ case GT_RETURN:
+ {
+ GenTreePtr op1 = tree->gtOp.gtOp1;
+ assert(op1);
+
+ // Compute the result onto the FP stack
+ if (op1->gtType == TYP_FLOAT)
+ {
+#if ROUND_FLOAT
+ bool roundOp1 = false;
+
+ switch (getRoundFloatLevel())
+ {
+ case ROUND_NEVER:
+ /* No rounding at all */
+ break;
+
+ case ROUND_CMP_CONST:
+ break;
+
+ case ROUND_CMP:
+ /* Round all comparands and return values*/
+ roundOp1 = true;
+ break;
+
+ case ROUND_ALWAYS:
+ /* Round everything */
+ roundOp1 = true;
+ break;
+
+ default:
+ assert(!"Unsupported Round Level");
+ break;
+ }
+#endif
+ genCodeForTreeFlt(op1);
+ }
+ else
+ {
+ assert(op1->gtType == TYP_DOUBLE);
+ genCodeForTreeFloat(op1);
+
+#if ROUND_FLOAT
+ if ((op1->gtOper == GT_CAST) && (op1->CastFromType() == TYP_LONG))
+ genRoundFpExpressionStackFP(op1);
+#endif
+ }
+
+ // kill enregistered variables
+ compCurFPState.Pop();
+ assert(compCurFPState.m_uStackSize == 0);
+ break;
+ }
+
+ case GT_COMMA:
+ {
+ GenTreePtr op1 = tree->gtOp.gtOp1;
+ GenTreePtr op2 = tree->gtGetOp2();
+
+ if (tree->gtFlags & GTF_REVERSE_OPS)
+ {
+ genCodeForTreeFloat(op2);
+
+ regSet.SetUsedRegFloat(op2, true);
+
+ genEvalSideEffects(op1);
+
+ if (op2->gtFlags & GTF_SPILLED)
+ {
+ UnspillFloat(op2);
+ }
+
+ regSet.SetUsedRegFloat(op2, false);
+ }
+ else
+ {
+ genEvalSideEffects(op1);
+ genCodeForTreeFloat(op2);
+ }
+
+ genCodeForTreeStackFP_DONE(tree, op2->gtRegNum);
+ break;
+ }
+ case GT_CAST:
+ {
+ genCodeForTreeStackFP_Cast(tree);
+ break;
+ }
+
+ case GT_NEG:
+ {
+ GenTreePtr op1 = tree->gtOp.gtOp1;
+
+ // get the tree into a register
+ genCodeForTreeFloat(op1);
+
+ // Take reg to top of stack
+ FlatFPX87_MoveToTOS(&compCurFPState, op1->gtRegNum);
+
+ // change the sign
+ instGen(INS_fchs);
+
+ // mark register that holds tree
+ genCodeForTreeStackFP_DONE(tree, op1->gtRegNum);
+ return;
+ }
+ case GT_INTRINSIC:
+ {
+ assert(Compiler::IsMathIntrinsic(tree));
+
+ GenTreePtr op1 = tree->gtOp.gtOp1;
+
+ // get tree into a register
+ genCodeForTreeFloat(op1);
+
+ // Take reg to top of stack
+ FlatFPX87_MoveToTOS(&compCurFPState, op1->gtRegNum);
+
+ static const instruction mathIns[] = {
+ INS_fsin, INS_fcos, INS_fsqrt, INS_fabs, INS_frndint,
+ };
+
+ assert(mathIns[CORINFO_INTRINSIC_Sin] == INS_fsin);
+ assert(mathIns[CORINFO_INTRINSIC_Cos] == INS_fcos);
+ assert(mathIns[CORINFO_INTRINSIC_Sqrt] == INS_fsqrt);
+ assert(mathIns[CORINFO_INTRINSIC_Abs] == INS_fabs);
+ assert(mathIns[CORINFO_INTRINSIC_Round] == INS_frndint);
+ assert((unsigned)(tree->gtIntrinsic.gtIntrinsicId) < sizeof(mathIns) / sizeof(mathIns[0]));
+ instGen(mathIns[tree->gtIntrinsic.gtIntrinsicId]);
+
+ // mark register that holds tree
+ genCodeForTreeStackFP_DONE(tree, op1->gtRegNum);
+
+ return;
+ }
+ case GT_CKFINITE:
+ {
+ TempDsc* temp;
+ int offs;
+
+ GenTreePtr op1 = tree->gtOp.gtOp1;
+
+ // Offset of the DWord containing the exponent
+ offs = (op1->gtType == TYP_FLOAT) ? 0 : sizeof(int);
+
+ // get tree into a register
+ genCodeForTreeFloat(op1);
+
+ // Take reg to top of stack
+ FlatFPX87_MoveToTOS(&compCurFPState, op1->gtRegNum);
+
+ temp = compiler->tmpGetTemp(op1->TypeGet());
+ emitAttr size = EmitSize(op1);
+
+ // Store the value from the FP stack into the temp
+ getEmitter()->emitIns_S(INS_fst, size, temp->tdTempNum(), 0);
+
+ regNumber reg = regSet.rsPickReg();
+
+ // Load the DWord containing the exponent into a general reg.
+ inst_RV_ST(INS_mov, reg, temp, offs, op1->TypeGet(), EA_4BYTE);
+ compiler->tmpRlsTemp(temp);
+
+ // 'reg' now contains the DWord containing the exponent
+ regTracker.rsTrackRegTrash(reg);
+
+ // Mask of exponent with all 1's - appropriate for given type
+
+ int expMask;
+ expMask = (op1->gtType == TYP_FLOAT) ? 0x7F800000 // TYP_FLOAT
+ : 0x7FF00000; // TYP_DOUBLE
+
+ // Check if the exponent is all 1's
+
+ inst_RV_IV(INS_and, reg, expMask, EA_4BYTE);
+ inst_RV_IV(INS_cmp, reg, expMask, EA_4BYTE);
+
+ // If exponent was all 1's, we need to throw ArithExcep
+ genJumpToThrowHlpBlk(EJ_je, SCK_ARITH_EXCPN);
+
+ genUpdateLife(tree);
+
+ genCodeForTreeStackFP_DONE(tree, op1->gtRegNum);
+ break;
+ }
+ default:
+ NYI("opertype");
+ }
+}
+
+void CodeGen::genCodeForTreeStackFP_Cast(GenTreePtr tree)
+{
+#ifdef DEBUG
+ if (compiler->verbose)
+ {
+ printf("genCodeForTreeStackFP_Cast() ");
+ Compiler::printTreeID(tree);
+ printf("\n");
+ }
+#endif // DEBUG
+
+#if ROUND_FLOAT
+ bool roundResult = true;
+#endif
+
+ regMaskTP addrReg;
+ TempDsc* temp;
+ emitAttr size;
+
+ GenTreePtr op1 = tree->gtOp.gtOp1;
+
+ // If op1 is a comma expression, evaluate the non-last parts, make op1 be the rest.
+ op1 = genCodeForCommaTree(op1);
+
+ switch (op1->gtType)
+ {
+ case TYP_BOOL:
+ case TYP_BYTE:
+ case TYP_UBYTE:
+ case TYP_CHAR:
+ case TYP_SHORT:
+ {
+
+ // Operand too small for 'fild', load it into a register
+ genCodeForTree(op1, 0);
+
+#if ROUND_FLOAT
+ // no need to round, can't overflow float or dbl
+ roundResult = false;
+#endif
+
+ // fall through
+ }
+ case TYP_INT:
+ case TYP_BYREF:
+ case TYP_LONG:
+ {
+ // Can't 'fild' a constant, it has to be loaded from memory
+ switch (op1->gtOper)
+ {
+ case GT_CNS_INT:
+ op1 = genMakeConst(&op1->gtIntCon.gtIconVal, TYP_INT, tree, false);
+ break;
+
+ case GT_CNS_LNG:
+ // Our encoder requires fild on m64int to be 64-bit aligned.
+ op1 = genMakeConst(&op1->gtLngCon.gtLconVal, TYP_LONG, tree, true);
+ break;
+ default:
+ break;
+ }
+
+ addrReg = genMakeAddressable(op1, 0, RegSet::FREE_REG);
+
+ // Grab register for the cast
+ regNumber reg = regSet.PickRegFloat();
+ genMarkTreeInReg(tree, reg);
+ compCurFPState.Push(reg);
+
+ // Is the value now sitting in a register?
+ if (op1->InReg())
+ {
+ // We'll have to store the value into the stack */
+ size = EA_ATTR(roundUp(genTypeSize(op1->gtType)));
+ temp = compiler->tmpGetTemp(op1->TypeGet());
+
+ // Move the value into the temp
+ if (op1->gtType == TYP_LONG)
+ {
+ regPairNo regPair = op1->gtRegPair;
+
+ // This code is pretty ugly, but straightforward
+
+ if (genRegPairLo(regPair) == REG_STK)
+ {
+ regNumber rg1 = genRegPairHi(regPair);
+
+ assert(rg1 != REG_STK);
+
+ /* Move enregistered half to temp */
+
+ inst_ST_RV(INS_mov, temp, 4, rg1, TYP_LONG);
+
+ /* Move lower half to temp via "high register" */
+
+ inst_RV_TT(INS_mov, rg1, op1, 0);
+ inst_ST_RV(INS_mov, temp, 0, rg1, TYP_LONG);
+
+ /* Reload transfer register */
+
+ inst_RV_ST(INS_mov, rg1, temp, 4, TYP_LONG);
+ }
+ else if (genRegPairHi(regPair) == REG_STK)
+ {
+ regNumber rg1 = genRegPairLo(regPair);
+
+ assert(rg1 != REG_STK);
+
+ /* Move enregistered half to temp */
+
+ inst_ST_RV(INS_mov, temp, 0, rg1, TYP_LONG);
+
+ /* Move high half to temp via "low register" */
+
+ inst_RV_TT(INS_mov, rg1, op1, 4);
+ inst_ST_RV(INS_mov, temp, 4, rg1, TYP_LONG);
+
+ /* Reload transfer register */
+
+ inst_RV_ST(INS_mov, rg1, temp, 0, TYP_LONG);
+ }
+ else
+ {
+ /* Move the value into the temp */
+
+ inst_ST_RV(INS_mov, temp, 0, genRegPairLo(regPair), TYP_LONG);
+ inst_ST_RV(INS_mov, temp, 4, genRegPairHi(regPair), TYP_LONG);
+ }
+ genDoneAddressable(op1, addrReg, RegSet::FREE_REG);
+
+ /* Load the long from the temp */
+
+ inst_FS_ST(INS_fildl, size, temp, 0);
+ }
+ else
+ {
+ /* Move the value into the temp */
+
+ inst_ST_RV(INS_mov, temp, 0, op1->gtRegNum, TYP_INT);
+
+ genDoneAddressable(op1, addrReg, RegSet::FREE_REG);
+
+ /* Load the integer from the temp */
+
+ inst_FS_ST(INS_fild, size, temp, 0);
+ }
+
+ // We no longer need the temp
+ compiler->tmpRlsTemp(temp);
+ }
+ else
+ {
+ // Load the value from its address
+ if (op1->gtType == TYP_LONG)
+ inst_TT(INS_fildl, op1);
+ else
+ inst_TT(INS_fild, op1);
+
+ genDoneAddressable(op1, addrReg, RegSet::FREE_REG);
+ }
+
+#if ROUND_FLOAT
+ /* integer to fp conversions can overflow. roundResult
+ * is cleared above in cases where it can't
+ */
+ if (roundResult &&
+ ((tree->gtType == TYP_FLOAT) || ((tree->gtType == TYP_DOUBLE) && (op1->gtType == TYP_LONG))))
+ genRoundFpExpression(tree);
+#endif
+
+ break;
+ }
+ case TYP_FLOAT:
+ {
+ // This is a cast from float to double.
+ // Note that conv.r(r4/r8) and conv.r8(r4/r9) are indistinguishable
+ // as we will generate GT_CAST-TYP_DOUBLE for both. This would
+ // cause us to truncate precision in either case. However,
+ // conv.r was needless in the first place, and should have
+ // been removed */
+ genCodeForTreeFloat(op1); // Trucate its precision
+
+ if (op1->gtOper == GT_LCL_VAR || op1->gtOper == GT_LCL_FLD || op1->gtOper == GT_CLS_VAR ||
+ op1->gtOper == GT_IND || op1->gtOper == GT_LEA)
+ {
+ // We take advantage here of the fact that we know that our
+ // codegen will have just loaded this from memory, and that
+ // therefore, no cast is really needed.
+ // Ideally we wouldn't do this optimization here, but in
+ // morphing, however, we need to do this after regalloc, as
+ // this optimization doesnt apply if what we're loading is a
+ // regvar
+ }
+ else
+ {
+ genRoundFpExpressionStackFP(op1, tree->TypeGet());
+ }
+
+ // Assign reg to tree
+ genMarkTreeInReg(tree, op1->gtRegNum);
+
+ break;
+ }
+ case TYP_DOUBLE:
+ {
+ // This is a cast from double to float or double
+ // Load the value, store as destType, load back
+ genCodeForTreeFlt(op1);
+
+ if ((op1->gtOper == GT_LCL_VAR || op1->gtOper == GT_LCL_FLD || op1->gtOper == GT_CLS_VAR ||
+ op1->gtOper == GT_IND || op1->gtOper == GT_LEA) &&
+ tree->TypeGet() == TYP_DOUBLE)
+ {
+ // We take advantage here of the fact that we know that our
+ // codegen will have just loaded this from memory, and that
+ // therefore, no cast is really needed.
+ // Ideally we wouldn't do this optimization here, but in
+ // morphing. However, we need to do this after regalloc, as
+ // this optimization doesnt apply if what we're loading is a
+ // regvar
+ }
+ else
+ {
+ genRoundFpExpressionStackFP(op1, tree->TypeGet());
+ }
+
+ // Assign reg to tree
+ genMarkTreeInReg(tree, op1->gtRegNum);
+
+ break;
+ }
+ default:
+ {
+ assert(!"unsupported cast");
+ break;
+ }
+ }
+}
+
+void CodeGen::genCodeForTreeStackFP_Special(GenTreePtr tree)
+{
+#ifdef DEBUG
+ if (compiler->verbose)
+ {
+ printf("genCodeForTreeStackFP_Special() ");
+ Compiler::printTreeID(tree);
+ printf("\n");
+ }
+#endif // DEBUG
+
+ switch (tree->OperGet())
+ {
+ case GT_CALL:
+ {
+ genCodeForCall(tree, true);
+ break;
+ }
+ default:
+ NYI("genCodeForTreeStackFP_Special");
+ break;
+ }
+}
+
+void CodeGen::genCodeForTreeFloat(GenTreePtr tree, RegSet::RegisterPreference* pref)
+{
+ // TestTransitions();
+ genTreeOps oper;
+ unsigned kind;
+
+ assert(tree);
+ assert(tree->gtOper != GT_STMT);
+ assert(varTypeIsFloating(tree));
+
+ // What kind of node do we have?
+ oper = tree->OperGet();
+ kind = tree->OperKind();
+
+ if (kind & GTK_CONST)
+ {
+ genCodeForTreeStackFP_Const(tree);
+ }
+ else if (kind & GTK_LEAF)
+ {
+ genCodeForTreeStackFP_Leaf(tree);
+ }
+ else if (kind & GTK_SMPOP)
+ {
+ genCodeForTreeStackFP_SmpOp(tree);
+ }
+ else
+ {
+ genCodeForTreeStackFP_Special(tree);
+ }
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ JitDumpFPState();
+ }
+ assert(compCurFPState.IsConsistent());
+#endif
+}
+
+bool CodeGen::genCompInsStackFP(GenTreePtr tos, GenTreePtr other)
+{
+ // assume gensetupop done
+
+ bool bUseFcomip = genUse_fcomip();
+ bool bReverse = false;
+
+ // Take op1 to top of the stack
+ FlatFPX87_MoveToTOS(&compCurFPState, tos->gtRegNum);
+
+ // We pop top of stack if it's not a live regvar
+ bool bPopTos = !(tos->IsRegVar() && !tos->IsRegVarDeath()) || (tos->InReg());
+ bool bPopOther = !(other->IsRegVar() && !other->IsRegVarDeath()) || (other->InReg());
+
+ assert(tos->IsRegVar() || (tos->InReg()));
+
+ if (!(other->IsRegVar() || (other->InReg())))
+ {
+ // op2 in memory
+ assert(bPopOther);
+
+ if (bUseFcomip)
+ {
+ // We should have space for a load
+ assert(compCurFPState.m_uStackSize < FP_PHYSICREGISTERS);
+
+ // load from mem, now the comparison will be the other way around
+ inst_FS_TT(INS_fld, other);
+ inst_FN(INS_fcomip, 1);
+
+ // pop if we've been asked to do so
+ if (bPopTos)
+ {
+ inst_FS(INS_fstp, 0);
+ FlatFPX87_Kill(&compCurFPState, tos->gtRegNum);
+ }
+
+ bReverse = true;
+ }
+ else
+ {
+ // compare directly with memory
+ if (bPopTos)
+ {
+ inst_FS_TT(INS_fcomp, other);
+ FlatFPX87_Kill(&compCurFPState, tos->gtRegNum);
+ }
+ else
+ {
+ inst_FS_TT(INS_fcom, other);
+ }
+ }
+ }
+ else
+ {
+ if (bUseFcomip)
+ {
+ if (bPopTos)
+ {
+ inst_FN(INS_fcomip, compCurFPState.VirtualToST(other->gtRegNum));
+ FlatFPX87_Kill(&compCurFPState, tos->gtRegNum);
+ }
+ else
+ {
+ inst_FN(INS_fcomi, compCurFPState.VirtualToST(other->gtRegNum));
+ }
+
+ if (bPopOther)
+ {
+ FlatFPX87_Unload(&compCurFPState, other->gtRegNum);
+ }
+ }
+ else
+ {
+ if (bPopTos)
+ {
+ inst_FN(INS_fcomp, compCurFPState.VirtualToST(other->gtRegNum));
+ FlatFPX87_Kill(&compCurFPState, tos->gtRegNum);
+ }
+ else
+ {
+ inst_FN(INS_fcom, compCurFPState.VirtualToST(other->gtRegNum));
+ }
+
+ if (bPopOther)
+ {
+ FlatFPX87_Unload(&compCurFPState, other->gtRegNum);
+ }
+ }
+ }
+
+ if (!bUseFcomip)
+ {
+ // oops, we have to put result of compare in eflags
+
+ // Grab EAX for the result of the fnstsw
+ regSet.rsGrabReg(RBM_EAX);
+
+ // Generate the 'fnstsw' and test its result
+ inst_RV(INS_fnstsw, REG_EAX, TYP_INT);
+ regTracker.rsTrackRegTrash(REG_EAX);
+ instGen(INS_sahf);
+ }
+
+ return bReverse;
+}
+
+void CodeGen::genCondJumpFltStackFP(GenTreePtr cond, BasicBlock* jumpTrue, BasicBlock* jumpFalse, bool bDoTransition)
+{
+ assert(jumpTrue && jumpFalse);
+ assert(!(cond->gtFlags & GTF_REVERSE_OPS)); // Done in genCondJump()
+ assert(varTypeIsFloating(cond->gtOp.gtOp1));
+
+ GenTreePtr op1 = cond->gtOp.gtOp1;
+ GenTreePtr op2 = cond->gtOp.gtOp2;
+ genTreeOps cmp = cond->OperGet();
+
+ // Prepare operands.
+ genSetupForOpStackFP(op1, op2, false, false, true, false);
+
+ GenTreePtr tos;
+ GenTreePtr other;
+ bool bReverseCmp = false;
+
+ if ((op2->IsRegVar() || (op2->InReg())) && // op2 is in a reg
+ (compCurFPState.TopVirtual() == (unsigned)op2->gtRegNum && // Is it already at the top of the stack?
+ (!op2->IsRegVar() || op2->IsRegVarDeath()))) // are we going to pop it off?
+ {
+ tos = op2;
+ other = op1;
+ bReverseCmp = true;
+ }
+ else
+ {
+ tos = op1;
+ other = op2;
+ bReverseCmp = false;
+ }
+
+ if (genCompInsStackFP(tos, other))
+ {
+ bReverseCmp = !bReverseCmp;
+ }
+
+ // do .un comparison
+ if (cond->gtFlags & GTF_RELOP_NAN_UN)
+ {
+ // Generate the first jump (NaN check)
+ genCondJmpInsStackFP(EJ_jpe, jumpTrue, NULL, bDoTransition);
+ }
+ else
+ {
+ jumpFalse->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL;
+
+ // Generate the first jump (NaN check)
+ genCondJmpInsStackFP(EJ_jpe, jumpFalse, NULL, bDoTransition);
+ }
+
+ /* Generate the second jump (comparison) */
+ const static BYTE dblCmpTstJmp2[] = {
+ EJ_je, // GT_EQ
+ EJ_jne, // GT_NE
+ EJ_jb, // GT_LT
+ EJ_jbe, // GT_LE
+ EJ_jae, // GT_GE
+ EJ_ja, // GT_GT
+ };
+
+ // Swap comp order if necessary
+ if (bReverseCmp)
+ {
+ cmp = GenTree::SwapRelop(cmp);
+ }
+
+ genCondJmpInsStackFP((emitJumpKind)dblCmpTstJmp2[cmp - GT_EQ], jumpTrue, jumpFalse, bDoTransition);
+}
+
+BasicBlock* CodeGen::genTransitionBlockStackFP(FlatFPStateX87* pState, BasicBlock* pFrom, BasicBlock* pTarget)
+{
+ // Fast paths where a transition block is not necessary
+ if (pTarget->bbFPStateX87 && FlatFPStateX87::AreEqual(pState, pTarget->bbFPStateX87) || pState->IsEmpty())
+ {
+ return pTarget;
+ }
+
+ // We shouldn't have any handlers if we're generating transition blocks, as we don't know
+ // how to recover them
+ assert(compiler->compMayHaveTransitionBlocks);
+ assert(compiler->compHndBBtabCount == 0);
+
+#ifdef DEBUG
+ compiler->fgSafeBasicBlockCreation = true;
+#endif
+
+ // Create a temp block
+ BasicBlock* pBlock = compiler->bbNewBasicBlock(BBJ_ALWAYS);
+
+#ifdef DEBUG
+ compiler->fgSafeBasicBlockCreation = false;
+#endif
+
+ VarSetOps::Assign(compiler, pBlock->bbLiveIn, pFrom->bbLiveOut);
+ VarSetOps::Assign(compiler, pBlock->bbLiveOut, pFrom->bbLiveOut);
+
+ pBlock->bbJumpDest = pTarget;
+ pBlock->bbFlags |= BBF_JMP_TARGET;
+ //
+ // If either pFrom or pTarget are cold blocks then
+ // the transition block also must be cold
+ //
+ pBlock->bbFlags |= (pFrom->bbFlags & BBF_COLD);
+ pBlock->bbFlags |= (pTarget->bbFlags & BBF_COLD);
+
+ // The FP state for the block is the same as the current one
+ pBlock->bbFPStateX87 = FlatFPAllocFPState(pState);
+
+ if ((pBlock->bbFlags & BBF_COLD) || (compiler->fgFirstColdBlock == NULL))
+ {
+ //
+ // If this block is cold or if all blocks are hot
+ // then we just insert it at the end of the method.
+ //
+ compiler->fgMoveBlocksAfter(pBlock, pBlock, compiler->fgLastBBInMainFunction());
+ }
+ else
+ {
+ //
+ // This block is hot so we need to insert it in the hot region
+ // of the method.
+ //
+ BasicBlock* lastHotBlock = compiler->fgFirstColdBlock->bbPrev;
+ noway_assert(lastHotBlock != nullptr);
+
+ if (lastHotBlock->bbFallsThrough())
+ NO_WAY("Bad fgFirstColdBlock in genTransitionBlockStackFP()");
+
+ //
+ // Insert pBlock between lastHotBlock and fgFirstColdBlock
+ //
+ compiler->fgInsertBBafter(lastHotBlock, pBlock);
+ }
+
+ return pBlock;
+}
+
+void CodeGen::genCondJumpLngStackFP(GenTreePtr cond, BasicBlock* jumpTrue, BasicBlock* jumpFalse)
+{
+ // For the moment, and so we don't have to deal with the amount of special cases
+ // we have, will insert a dummy block for jumpTrue (if necessary) that will do the
+ // transition for us. For the jumpFalse case, we play a trick. For the false case ,
+ // a Long conditional has a fallthrough (least significant DWORD check is false) and
+ // also has a jump to the fallthrough (bbNext) if the most significant DWORD check
+ // fails. However, we do want to make an FP transition if we're in the later case,
+ // So what we do is create a label and make jumpFalse go there. This label is defined
+ // before doing the FP transition logic at the end of the block, so now both exit paths
+ // for false condition will go through the transition and then fall through to bbnext.
+ assert(jumpFalse == compiler->compCurBB->bbNext);
+
+ BasicBlock* pTransition = genCreateTempLabel();
+
+ genCondJumpLng(cond, jumpTrue, pTransition, true);
+
+ genDefineTempLabel(pTransition);
+}
+
+void CodeGen::genQMarkRegVarTransition(GenTreePtr nextNode, VARSET_VALARG_TP liveset)
+{
+ // Kill any vars that may die in the transition
+ VARSET_TP VARSET_INIT_NOCOPY(newLiveSet, VarSetOps::Intersection(compiler, liveset, compiler->optAllFPregVars));
+
+ regMaskTP liveRegIn = genRegMaskFromLivenessStackFP(newLiveSet);
+ genCodeForTransitionFromMask(&compCurFPState, liveRegIn);
+
+ unsigned i;
+
+ // Kill all regvars
+ for (i = REG_FPV0; i < REG_FPCOUNT; i++)
+ {
+ if ((genRegMaskFloat((regNumber)i) & regSet.rsMaskRegVarFloat))
+ {
+
+ genRegVarDeathStackFP(regSet.genRegVarsFloat[i]);
+ }
+ }
+
+ // Born necessary regvars
+ for (i = 0; i < compiler->lvaTrackedCount; i++)
+ {
+ unsigned lclVar = compiler->lvaTrackedToVarNum[i];
+ LclVarDsc* varDsc = compiler->lvaTable + lclVar;
+
+ assert(varDsc->lvTracked);
+
+ if (varDsc->lvRegister && VarSetOps::IsMember(compiler, newLiveSet, i))
+ {
+ genRegVarBirthStackFP(varDsc);
+ }
+ }
+}
+
+void CodeGen::genQMarkBeforeElseStackFP(QmarkStateStackFP* pState, VARSET_VALARG_TP varsetCond, GenTreePtr nextNode)
+{
+ assert(regSet.rsMaskLockedFloat == 0);
+
+ // Save current state at colon
+ pState->stackState.Init(&compCurFPState);
+
+ // Kill any vars that may die in the transition to then
+ genQMarkRegVarTransition(nextNode, varsetCond);
+}
+
+void CodeGen::genQMarkAfterElseBlockStackFP(QmarkStateStackFP* pState, VARSET_VALARG_TP varsetCond, GenTreePtr nextNode)
+{
+ assert(regSet.rsMaskLockedFloat == 0);
+
+ FlatFPStateX87 tempSwap;
+
+ // Save current state. Now tempFPState will store the target state for the else block
+ tempSwap.Init(&compCurFPState);
+
+ compCurFPState.Init(&pState->stackState);
+
+ pState->stackState.Init(&tempSwap);
+
+ // Did any regvars die in the then block that are live on entry to the else block?
+ unsigned i;
+ for (i = 0; i < compiler->lvaTrackedCount; i++)
+ {
+ if (VarSetOps::IsMember(compiler, varsetCond, i) && VarSetOps::IsMember(compiler, compiler->optAllFPregVars, i))
+ {
+ // This variable should be live
+ unsigned lclnum = compiler->lvaTrackedToVarNum[i];
+ LclVarDsc* varDsc = compiler->lvaTable + lclnum;
+
+ if (regSet.genRegVarsFloat[varDsc->lvRegNum] != varDsc)
+ {
+ JITDUMP("genQMarkAfterThenBlockStackFP(): Fixing up regvar that was modified in then\n");
+ if (regSet.genRegVarsFloat[varDsc->lvRegNum])
+ {
+ genRegVarDeathStackFP(regSet.genRegVarsFloat[varDsc->lvRegNum]);
+ }
+
+ genRegVarBirthStackFP(varDsc);
+ }
+ }
+ }
+
+ // Kill any vars that may die in the transition
+ genQMarkRegVarTransition(nextNode, varsetCond);
+}
+
+void CodeGen::genQMarkAfterThenBlockStackFP(QmarkStateStackFP* pState)
+{
+ JITDUMP("genQMarkAfterThenBlockStackFP()\n");
+ assert(regSet.rsMaskLockedFloat == 0);
+
+ // Generate transition to the previous one set by the then block
+ genCodeForTransitionStackFP(&compCurFPState, &pState->stackState);
+
+ // Update state
+ compCurFPState.Init(&pState->stackState);
+}
+
+void CodeGenInterface::SetRegVarFloat(regNumber reg, var_types type, LclVarDsc* varDsc)
+{
+ regMaskTP mask = genRegMaskFloat(reg, type);
+
+ if (varDsc)
+ {
+ JITDUMP("marking register %s as a regvar\n", getRegNameFloat(reg, type));
+
+ assert(mask && ((regSet.rsMaskLockedFloat | regSet.rsMaskRegVarFloat | regSet.rsMaskUsedFloat) & mask) == 0);
+
+ regSet.rsMaskRegVarFloat |= mask;
+ }
+ else
+ {
+ JITDUMP("unmarking register %s as a regvar\n", getRegNameFloat(reg, type));
+
+ assert(mask && (regSet.rsMaskRegVarFloat & mask));
+
+ regSet.rsMaskRegVarFloat &= ~mask;
+ }
+
+ // Update lookup table
+ regSet.genRegVarsFloat[reg] = varDsc;
+}
+
+// Generates a conditional jump. It will do the appropiate stack matching for the jmpTrue.
+// We don't use jumpFalse anywhere and the integer codebase assumes that it will be bbnext, and that is
+// taken care of at the end of the bb code generation.
+void CodeGen::genCondJmpInsStackFP(emitJumpKind jumpKind,
+ BasicBlock* jumpTrue,
+ BasicBlock* jumpFalse,
+ bool bDoTransition)
+{
+ // Assert the condition above.
+ assert(!jumpFalse || jumpFalse == compiler->compCurBB->bbNext || !bDoTransition);
+
+ // Do the fp stack matching.
+ if (bDoTransition && !jumpTrue->bbFPStateX87 &&
+ FlatFPSameRegisters(&compCurFPState, genRegMaskFromLivenessStackFP(jumpTrue->bbLiveIn)))
+ {
+ // Target block doesn't have state yet, but has the same registers, so
+ // we allocate the block and generate the normal jump
+ genCodeForBBTransitionStackFP(jumpTrue);
+ inst_JMP(jumpKind, jumpTrue);
+ }
+ else if (!bDoTransition || compCurFPState.IsEmpty() || // If it's empty, target has to be empty too.
+ (jumpTrue->bbFPStateX87 && FlatFPStateX87::AreEqual(&compCurFPState, jumpTrue->bbFPStateX87)))
+ {
+ // Nothing to do here. Proceed normally and generate the jump
+ inst_JMP(jumpKind, jumpTrue);
+
+ if (jumpFalse && jumpFalse != compiler->compCurBB->bbNext)
+ {
+ inst_JMP(EJ_jmp, jumpFalse);
+ }
+ }
+ else
+ {
+ // temporal workaround for stack matching
+ // do a forward conditional jump, generate the transition and jump to the target
+ // The payload is an aditional jump instruction, but both jumps will be correctly
+ // predicted by the processor in the loop case.
+ BasicBlock* endLabel = NULL;
+
+ endLabel = genCreateTempLabel();
+
+ inst_JMP(emitter::emitReverseJumpKind(jumpKind), endLabel);
+
+ genCodeForBBTransitionStackFP(jumpTrue);
+
+ inst_JMP(EJ_jmp, jumpTrue);
+
+ genDefineTempLabel(endLabel);
+ }
+}
+
+void CodeGen::genTableSwitchStackFP(regNumber reg, unsigned jumpCnt, BasicBlock** jumpTab)
+{
+ // Only come here when we have to do something special for the FPU stack!
+ //
+ assert(!compCurFPState.IsEmpty());
+ VARSET_TP VARSET_INIT_NOCOPY(liveInFP, VarSetOps::MakeEmpty(compiler));
+ VARSET_TP VARSET_INIT_NOCOPY(liveOutFP, VarSetOps::MakeEmpty(compiler));
+ for (unsigned i = 0; i < jumpCnt; i++)
+ {
+ VarSetOps::Assign(compiler, liveInFP, jumpTab[i]->bbLiveIn);
+ VarSetOps::IntersectionD(compiler, liveInFP, compiler->optAllFPregVars);
+ VarSetOps::Assign(compiler, liveOutFP, compiler->compCurBB->bbLiveOut);
+ VarSetOps::IntersectionD(compiler, liveOutFP, compiler->optAllFPregVars);
+
+ if (!jumpTab[i]->bbFPStateX87 && VarSetOps::Equal(compiler, liveInFP, liveOutFP))
+ {
+ // Hasn't state yet and regvar set is the same, so just copy state and don't change the jump
+ jumpTab[i]->bbFPStateX87 = FlatFPAllocFPState(&compCurFPState);
+ }
+ else if (jumpTab[i]->bbFPStateX87 && FlatFPStateX87::AreEqual(&compCurFPState, jumpTab[i]->bbFPStateX87))
+ {
+ // Same state, don't change the jump
+ }
+ else
+ {
+ // We have to do a transition. First check if we can reuse another one
+ unsigned j;
+ for (j = 0; j < i; j++)
+ {
+ // Has to be already forwarded. If not it can't be targetting the same block
+ if (jumpTab[j]->bbFlags & BBF_FORWARD_SWITCH)
+ {
+ if (jumpTab[i] == jumpTab[j]->bbJumpDest)
+ {
+ // yipee, we can reuse this transition block
+ jumpTab[i] = jumpTab[j];
+ break;
+ }
+ }
+ }
+
+ if (j == i)
+ {
+ // We will have to create a new transition block
+ jumpTab[i] = genTransitionBlockStackFP(&compCurFPState, compiler->compCurBB, jumpTab[i]);
+
+ jumpTab[i]->bbFlags |= BBF_FORWARD_SWITCH;
+ }
+ }
+ }
+
+ // Clear flag
+ for (unsigned i = 0; i < jumpCnt; i++)
+ {
+ jumpTab[i]->bbFlags &= ~BBF_FORWARD_SWITCH;
+ }
+
+ // everything's fixed now, so go down the normal path
+ return genTableSwitch(reg, jumpCnt, jumpTab);
+}
+
+bool CodeGen::genConstantLoadStackFP(GenTreePtr tree, bool bOnlyNoMemAccess)
+{
+ assert(tree->gtOper == GT_CNS_DBL);
+
+ bool bFastConstant = false;
+ instruction ins_ConstantNN = INS_fldz; // keep compiler happy
+
+ // Both positive 0 and 1 are represnetable in float and double, beware if we add other constants
+ switch (*((__int64*)&(tree->gtDblCon.gtDconVal)))
+ {
+ case 0:
+ // CAREFUL here!, -0 is different than +0, a -0 shouldn't issue a fldz.
+ ins_ConstantNN = INS_fldz;
+ bFastConstant = true;
+ break;
+ case I64(0x3ff0000000000000):
+ ins_ConstantNN = INS_fld1;
+ bFastConstant = true;
+ }
+
+ if (bFastConstant == false && bOnlyNoMemAccess)
+ {
+ // Caller asked only to generate instructions if it didn't involve memory accesses
+ return false;
+ }
+
+ if (bFastConstant)
+ {
+ assert(compCurFPState.m_uStackSize <= FP_PHYSICREGISTERS);
+ instGen(ins_ConstantNN);
+ }
+ else
+ {
+ GenTreePtr addr;
+ if (tree->gtType == TYP_FLOAT || StackFPIsSameAsFloat(tree->gtDblCon.gtDconVal))
+ {
+ float f = forceCastToFloat(tree->gtDblCon.gtDconVal);
+ addr = genMakeConst(&f, TYP_FLOAT, tree, false);
+ }
+ else
+ {
+ addr = genMakeConst(&tree->gtDblCon.gtDconVal, tree->gtType, tree, true);
+ }
+
+ inst_FS_TT(INS_fld, addr);
+ }
+
+ return true;
+}
+
+// Function called at the end of every statement. For stack based x87 its mission is to
+// remove any remaining temps on the stack.
+void CodeGen::genEndOfStatement()
+{
+ unsigned i;
+
+#ifdef DEBUG
+ // Sanity check
+ unsigned uTemps = 0;
+ for (i = REG_FPV0; i < REG_FPCOUNT; i++)
+ {
+ if (compCurFPState.Mapped(i) && // register is mapped
+ (genRegMaskFloat((regNumber)i) & regSet.rsMaskRegVarFloat) == 0) // but not enregistered
+ {
+ uTemps++;
+ }
+ }
+ assert(uTemps <= 1);
+#endif
+
+ for (i = REG_FPV0; i < REG_FPCOUNT; i++)
+ {
+ if (compCurFPState.Mapped(i) && // register is mapped
+ (genRegMaskFloat((regNumber)i) & regSet.rsMaskRegVarFloat) == 0) // but not enregistered
+ {
+ // remove register from stacks
+ FlatFPX87_Unload(&compCurFPState, i);
+ }
+ }
+
+ assert(ConsistentAfterStatementStackFP());
+}
+
+bool CodeGen::StackFPIsSameAsFloat(double d)
+{
+ if (forceCastToFloat(d) == d)
+ {
+ JITDUMP("StackFPIsSameAsFloat is true for value %lf\n", d);
+ return true;
+ }
+ else
+ {
+ JITDUMP("StackFPIsSameAsFloat is false for value %lf\n", d);
+ }
+
+ return false;
+}
+
+GenTreePtr CodeGen::genMakeAddressableStackFP(GenTreePtr tree,
+ regMaskTP* regMaskIntPtr,
+ regMaskTP* regMaskFltPtr,
+ bool bCollapseConstantDoubles)
+{
+ *regMaskIntPtr = *regMaskFltPtr = 0;
+
+ switch (tree->OperGet())
+ {
+ case GT_CNS_DBL:
+ if (tree->gtDblCon.gtDconVal == 0.0 || tree->gtDblCon.gtDconVal == 1.0)
+ {
+ // For constants like 0 or 1 don't waste memory
+ genCodeForTree(tree, 0);
+ regSet.SetUsedRegFloat(tree, true);
+
+ *regMaskFltPtr = genRegMaskFloat(tree->gtRegNum);
+ return tree;
+ }
+ else
+ {
+ GenTreePtr addr;
+ if (tree->gtType == TYP_FLOAT ||
+ (bCollapseConstantDoubles && StackFPIsSameAsFloat(tree->gtDblCon.gtDconVal)))
+ {
+ float f = forceCastToFloat(tree->gtDblCon.gtDconVal);
+ addr = genMakeConst(&f, TYP_FLOAT, tree, true);
+ }
+ else
+ {
+ addr = genMakeConst(&tree->gtDblCon.gtDconVal, tree->gtType, tree, true);
+ }
+#ifdef DEBUG
+ if (compiler->verbose)
+ {
+ printf("Generated new constant in tree ");
+ Compiler::printTreeID(addr);
+ printf(" with value %lf\n", tree->gtDblCon.gtDconVal);
+ }
+#endif // DEBUG
+ tree->CopyFrom(addr, compiler);
+ return tree;
+ }
+ break;
+ case GT_REG_VAR:
+ // We take care about this in genKeepAddressableStackFP
+ return tree;
+ case GT_LCL_VAR:
+ case GT_LCL_FLD:
+ case GT_CLS_VAR:
+ return tree;
+
+ case GT_LEA:
+ if (!genMakeIndAddrMode(tree, tree, false, 0, RegSet::KEEP_REG, regMaskIntPtr, false))
+ {
+ assert(false);
+ }
+ genUpdateLife(tree);
+ return tree;
+
+ case GT_IND:
+ // Try to make the address directly addressable
+
+ if (genMakeIndAddrMode(tree->gtOp.gtOp1, tree, false, 0, RegSet::KEEP_REG, regMaskIntPtr, false))
+ {
+ genUpdateLife(tree);
+ return tree;
+ }
+ else
+ {
+ GenTreePtr addr = tree;
+ tree = tree->gtOp.gtOp1;
+
+ genCodeForTree(tree, 0);
+ regSet.rsMarkRegUsed(tree, addr);
+
+ *regMaskIntPtr = genRegMask(tree->gtRegNum);
+ return addr;
+ }
+
+ // fall through
+
+ default:
+ genCodeForTreeFloat(tree);
+ regSet.SetUsedRegFloat(tree, true);
+
+ // update mask
+ *regMaskFltPtr = genRegMaskFloat(tree->gtRegNum);
+
+ return tree;
+ break;
+ }
+}
+
+void CodeGen::genKeepAddressableStackFP(GenTreePtr tree, regMaskTP* regMaskIntPtr, regMaskTP* regMaskFltPtr)
+{
+ regMaskTP regMaskInt, regMaskFlt;
+
+ regMaskInt = *regMaskIntPtr;
+ regMaskFlt = *regMaskFltPtr;
+
+ *regMaskIntPtr = *regMaskFltPtr = 0;
+
+ switch (tree->OperGet())
+ {
+ case GT_REG_VAR:
+ // If register has been spilled, unspill it
+ if (tree->gtFlags & GTF_SPILLED)
+ {
+ UnspillFloat(&compiler->lvaTable[tree->gtLclVarCommon.gtLclNum]);
+ }
+
+ // If regvar is dying, take it out of the regvar mask
+ if (tree->IsRegVarDeath())
+ {
+ genRegVarDeathStackFP(tree);
+ }
+ genUpdateLife(tree);
+
+ return;
+ case GT_CNS_DBL:
+ {
+ if (tree->gtFlags & GTF_SPILLED)
+ {
+ UnspillFloat(tree);
+ }
+
+ *regMaskFltPtr = genRegMaskFloat(tree->gtRegNum);
+
+ return;
+ }
+ case GT_LCL_FLD:
+ case GT_LCL_VAR:
+ case GT_CLS_VAR:
+ genUpdateLife(tree);
+ return;
+ case GT_IND:
+ case GT_LEA:
+ if (regMaskFlt)
+ {
+ // fall through
+ }
+ else
+ {
+ *regMaskIntPtr = genKeepAddressable(tree, regMaskInt, 0);
+ *regMaskFltPtr = 0;
+ return;
+ }
+ default:
+
+ *regMaskIntPtr = 0;
+ if (tree->gtFlags & GTF_SPILLED)
+ {
+ UnspillFloat(tree);
+ }
+ *regMaskFltPtr = genRegMaskFloat(tree->gtRegNum);
+ return;
+ }
+}
+
+void CodeGen::genDoneAddressableStackFP(GenTreePtr tree,
+ regMaskTP addrRegInt,
+ regMaskTP addrRegFlt,
+ RegSet::KeepReg keptReg)
+{
+ assert(!(addrRegInt && addrRegFlt));
+
+ if (addrRegInt)
+ {
+ return genDoneAddressable(tree, addrRegInt, keptReg);
+ }
+ else if (addrRegFlt)
+ {
+ if (keptReg == RegSet::KEEP_REG)
+ {
+ for (unsigned i = REG_FPV0; i < REG_FPCOUNT; i++)
+ {
+ if (genRegMaskFloat((regNumber)i) & addrRegFlt)
+ {
+ regSet.SetUsedRegFloat(tree, false);
+ }
+ }
+ }
+ }
+}
+
+void CodeGen::FlatFPX87_Kill(FlatFPStateX87* pState, unsigned uVirtual)
+{
+ JITDUMP("Killing %s\n", regVarNameStackFP((regNumber)uVirtual));
+
+ assert(pState->TopVirtual() == uVirtual);
+ pState->Pop();
+}
+
+void CodeGen::FlatFPX87_PushVirtual(FlatFPStateX87* pState, unsigned uRegister, bool bEmitCode)
+{
+ JITDUMP("Pushing %s to stack\n", regVarNameStackFP((regNumber)uRegister));
+
+ pState->Push(uRegister);
+}
+
+unsigned CodeGen::FlatFPX87_Pop(FlatFPStateX87* pState, bool bEmitCode)
+{
+ assert(pState->m_uStackSize > 0);
+
+ // Update state
+ unsigned uVirtual = pState->Pop();
+
+ // Emit instruction
+ if (bEmitCode)
+ {
+ inst_FS(INS_fstp, 0);
+ }
+
+ return (uVirtual);
+}
+
+unsigned CodeGen::FlatFPX87_Top(FlatFPStateX87* pState, bool bEmitCode)
+{
+ return pState->TopVirtual();
+}
+
+void CodeGen::FlatFPX87_Unload(FlatFPStateX87* pState, unsigned uVirtual, bool bEmitCode)
+{
+ if (uVirtual != pState->TopVirtual())
+ {
+ // We will do an fstp to the right place
+
+ // Update state
+ unsigned uStack = pState->m_uVirtualMap[uVirtual];
+ unsigned uPhysic = pState->StackToST(uStack);
+
+ pState->Unmap(uVirtual);
+ pState->Associate(pState->TopVirtual(), uStack);
+ pState->m_uStackSize--;
+
+#ifdef DEBUG
+
+ pState->m_uStack[pState->m_uStackSize] = (unsigned)-1;
+#endif
+
+ // Emit instruction
+ if (bEmitCode)
+ {
+ inst_FS(INS_fstp, uPhysic);
+ }
+ }
+ else
+ {
+ // Emit fstp
+ FlatFPX87_Pop(pState, bEmitCode);
+ }
+
+ assert(pState->IsConsistent());
+}
+
+void CodeGenInterface::FlatFPX87_MoveToTOS(FlatFPStateX87* pState, unsigned uVirtual, bool bEmitCode)
+{
+ assert(!IsUninitialized(uVirtual));
+
+ JITDUMP("Moving %s to top of stack\n", regVarNameStackFP((regNumber)uVirtual));
+
+ if (uVirtual != pState->TopVirtual())
+ {
+ FlatFPX87_SwapStack(pState, pState->m_uVirtualMap[uVirtual], pState->TopIndex(), bEmitCode);
+ }
+ else
+ {
+ JITDUMP("%s already on the top of stack\n", regVarNameStackFP((regNumber)uVirtual));
+ }
+
+ assert(pState->IsConsistent());
+}
+
+void CodeGenInterface::FlatFPX87_SwapStack(FlatFPStateX87* pState, unsigned i, unsigned j, bool bEmitCode)
+{
+ assert(i != j);
+ assert(i < pState->m_uStackSize);
+ assert(j < pState->m_uStackSize);
+
+ JITDUMP("Exchanging ST(%i) and ST(%i)\n", pState->StackToST(i), pState->StackToST(j));
+
+ // issue actual swaps
+ int iPhysic = pState->StackToST(i);
+ int jPhysic = pState->StackToST(j);
+
+ if (bEmitCode)
+ {
+ if (iPhysic == 0 || jPhysic == 0)
+ {
+ inst_FN(INS_fxch, iPhysic ? iPhysic : jPhysic);
+ }
+ else
+ {
+ inst_FN(INS_fxch, iPhysic);
+ inst_FN(INS_fxch, jPhysic);
+ inst_FN(INS_fxch, iPhysic);
+ }
+ }
+
+ // Update State
+
+ // Swap Register file
+ pState->m_uVirtualMap[pState->m_uStack[i]] = j;
+ pState->m_uVirtualMap[pState->m_uStack[j]] = i;
+
+ // Swap stack
+ int temp;
+ temp = pState->m_uStack[i];
+ pState->m_uStack[i] = pState->m_uStack[j];
+ pState->m_uStack[j] = temp;
+
+ assert(pState->IsConsistent());
+}
+
+#ifdef DEBUG
+
+void CodeGen::JitDumpFPState()
+{
+ int i;
+
+ if ((regSet.rsMaskUsedFloat != 0) || (regSet.rsMaskRegVarFloat != 0))
+ {
+ printf("FPSTATE\n");
+ printf("Used virtual registers: ");
+ for (i = REG_FPV0; i < REG_FPCOUNT; i++)
+ {
+ if (genRegMaskFloat((regNumber)i) & regSet.rsMaskUsedFloat)
+ {
+ printf("FPV%i ", i);
+ }
+ }
+ printf("\n");
+
+ printf("virtual registers holding reg vars: ");
+ for (i = REG_FPV0; i < REG_FPCOUNT; i++)
+ {
+ if (genRegMaskFloat((regNumber)i) & regSet.rsMaskRegVarFloat)
+ {
+ printf("FPV%i ", i);
+ }
+ }
+ printf("\n");
+ }
+ compCurFPState.Dump();
+}
+#endif
+
+//
+//
+// Register allocation
+//
+struct ChangeToRegVarCallback
+{
+ unsigned lclnum;
+ regNumber reg;
+};
+
+void Compiler::raInitStackFP()
+{
+ // Reset local/reg interference
+ for (int i = 0; i < REG_FPCOUNT; i++)
+ {
+ VarSetOps::AssignNoCopy(this, raLclRegIntfFloat[i], VarSetOps::MakeEmpty(this));
+ }
+
+ VarSetOps::AssignNoCopy(this, optAllFPregVars, VarSetOps::MakeEmpty(this));
+ VarSetOps::AssignNoCopy(this, optAllNonFPvars, VarSetOps::MakeEmpty(this));
+ VarSetOps::AssignNoCopy(this, optAllFloatVars, VarSetOps::MakeEmpty(this));
+
+ raCntStkStackFP = 0;
+ raCntWtdStkDblStackFP = 0;
+ raCntStkParamDblStackFP = 0;
+
+ VarSetOps::AssignNoCopy(this, raMaskDontEnregFloat, VarSetOps::MakeEmpty(this));
+
+ // Calculate the set of all tracked FP/non-FP variables
+ // into compiler->optAllFloatVars and compiler->optAllNonFPvars
+ unsigned lclNum;
+ LclVarDsc* varDsc;
+
+ for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
+ {
+ /* Ignore the variable if it's not tracked */
+
+ if (!varDsc->lvTracked)
+ continue;
+
+ /* Get hold of the index and the interference mask for the variable */
+
+ unsigned varNum = varDsc->lvVarIndex;
+
+ /* add to the set of all tracked FP/non-FP variables */
+
+ if (varDsc->IsFloatRegType())
+ VarSetOps::AddElemD(this, optAllFloatVars, varNum);
+ else
+ VarSetOps::AddElemD(this, optAllNonFPvars, varNum);
+ }
+}
+
+#ifdef DEBUG
+void Compiler::raDumpVariableRegIntfFloat()
+{
+ unsigned i;
+ unsigned j;
+
+ for (i = REG_FPV0; i < REG_FPCOUNT; i++)
+ {
+ if (!VarSetOps::IsEmpty(this, raLclRegIntfFloat[i]))
+ {
+ JITDUMP("FPV%u interferes with ", i);
+ for (j = 0; j < lvaTrackedCount; j++)
+ {
+ assert(VarSetOps::IsEmpty(this, VarSetOps::Diff(this, raLclRegIntfFloat[i], optAllFloatVars)));
+
+ if (VarSetOps::IsMember(this, raLclRegIntfFloat[i], j))
+ {
+ JITDUMP("T%02u/V%02u, ", j, lvaTrackedToVarNum[j]);
+ }
+ }
+ JITDUMP("\n");
+ }
+ }
+}
+#endif
+
+// Returns the regnum for the variable passed as param takin in account
+// the fpvar to register interference mask. If we can't find anything, we
+// will return REG_FPNONE
+regNumber Compiler::raRegForVarStackFP(unsigned varTrackedIndex)
+{
+ for (unsigned i = REG_FPV0; i < REG_FPCOUNT; i++)
+ {
+ if (!VarSetOps::IsMember(this, raLclRegIntfFloat[i], varTrackedIndex))
+ {
+ return (regNumber)i;
+ }
+ }
+
+ return REG_FPNONE;
+}
+
+void Compiler::raAddPayloadStackFP(VARSET_VALARG_TP maskArg, unsigned weight)
+{
+ VARSET_TP VARSET_INIT_NOCOPY(mask, VarSetOps::Intersection(this, maskArg, optAllFloatVars));
+ if (VarSetOps::IsEmpty(this, mask))
+ {
+ return;
+ }
+
+ for (unsigned i = 0; i < lvaTrackedCount; i++)
+ {
+ if (VarSetOps::IsMember(this, mask, i))
+ {
+ raPayloadStackFP[i] += weight;
+ }
+ }
+}
+
+bool Compiler::raVarIsGreaterValueStackFP(LclVarDsc* lv1, LclVarDsc* lv2)
+{
+ assert(lv1->lvTracked);
+ assert(lv2->lvTracked);
+
+ bool bSmall = (compCodeOpt() == SMALL_CODE);
+
+ double weight1 = double(bSmall ? lv1->lvRefCnt : lv1->lvRefCntWtd) - double(raPayloadStackFP[lv1->lvVarIndex]) -
+ double(raHeightsStackFP[lv1->lvVarIndex][FP_VIRTUALREGISTERS]);
+
+ double weight2 = double(bSmall ? lv2->lvRefCnt : lv2->lvRefCntWtd) - double(raPayloadStackFP[lv2->lvVarIndex]) -
+ double(raHeightsStackFP[lv2->lvVarIndex][FP_VIRTUALREGISTERS]);
+
+ double diff = weight1 - weight2;
+
+ if (diff)
+ {
+ return diff > 0 ? true : false;
+ }
+ else
+ {
+ return int(lv1->lvRefCnt - lv2->lvRefCnt) ? true : false;
+ }
+}
+
+#ifdef DEBUG
+// Dumps only interesting vars (the ones that are not enregistered yet
+void Compiler::raDumpHeightsStackFP()
+{
+ unsigned i;
+ unsigned j;
+
+ JITDUMP("raDumpHeightsStackFP():\n");
+ JITDUMP("--------------------------------------------------------\n");
+ JITDUMP("Weighted Height Table Dump\n ");
+ for (i = 0; i < FP_VIRTUALREGISTERS; i++)
+ {
+ JITDUMP(" %i ", i + 1);
+ }
+
+ JITDUMP("OVF\n");
+
+ for (i = 0; i < lvaTrackedCount; i++)
+ {
+ if (VarSetOps::IsMember(this, optAllFloatVars, i) && !VarSetOps::IsMember(this, optAllFPregVars, i))
+ {
+ JITDUMP("V%02u/T%02u: ", lvaTrackedToVarNum[i], i);
+
+ for (j = 0; j <= FP_VIRTUALREGISTERS; j++)
+ {
+ JITDUMP("%5u ", raHeightsStackFP[i][j]);
+ }
+ JITDUMP("\n");
+ }
+ }
+
+ JITDUMP("\nNonweighted Height Table Dump\n ");
+ for (i = 0; i < FP_VIRTUALREGISTERS; i++)
+ {
+ JITDUMP(" %i ", i + 1);
+ }
+
+ JITDUMP("OVF\n");
+
+ for (i = 0; i < lvaTrackedCount; i++)
+ {
+ if (VarSetOps::IsMember(this, optAllFloatVars, i) && !VarSetOps::IsMember(this, optAllFPregVars, i))
+ {
+ JITDUMP("V%02u/T%02u: ", lvaTrackedToVarNum[i], i);
+
+ for (j = 0; j <= FP_VIRTUALREGISTERS; j++)
+ {
+ JITDUMP("%5u ", raHeightsNonWeightedStackFP[i][j]);
+ }
+ JITDUMP("\n");
+ }
+ }
+ JITDUMP("--------------------------------------------------------\n");
+}
+#endif
+
+// Increases heights for tracked variables given in mask. We call this
+// function when we enregister a variable and will cause the heights to
+// shift one place to the right.
+void Compiler::raUpdateHeightsForVarsStackFP(VARSET_VALARG_TP mask)
+{
+ assert(VarSetOps::IsSubset(this, mask, optAllFloatVars));
+
+ for (unsigned i = 0; i < lvaTrackedCount; i++)
+ {
+ if (VarSetOps::IsMember(this, mask, i))
+ {
+ for (unsigned j = FP_VIRTUALREGISTERS; j > 0; j--)
+ {
+ raHeightsStackFP[i][j] = raHeightsStackFP[i][j - 1];
+
+#ifdef DEBUG
+ raHeightsNonWeightedStackFP[i][j] = raHeightsNonWeightedStackFP[i][j - 1];
+#endif
+ }
+
+ raHeightsStackFP[i][0] = 0;
+#ifdef DEBUG
+ raHeightsNonWeightedStackFP[i][0] = 0;
+#endif
+ }
+ }
+
+#ifdef DEBUG
+ raDumpHeightsStackFP();
+#endif
+}
+
+// This is the prepass we do to adjust refcounts across calls and
+// create the height structure.
+void Compiler::raEnregisterVarsPrePassStackFP()
+{
+ BasicBlock* block;
+
+ assert(!VarSetOps::IsEmpty(this, optAllFloatVars));
+
+ // Initialization of the height table
+ memset(raHeightsStackFP, 0, sizeof(raHeightsStackFP));
+
+ // Initialization of the payload table
+ memset(raPayloadStackFP, 0, sizeof(raPayloadStackFP));
+
+#ifdef DEBUG
+ memset(raHeightsNonWeightedStackFP, 0, sizeof(raHeightsStackFP));
+#endif
+
+ // We will have a quick table with the pointers to the interesting varDscs
+ // so that we don't have to scan for them for each tree.
+ unsigned FPVars[lclMAX_TRACKED];
+ unsigned numFPVars = 0;
+ for (unsigned i = 0; i < lvaTrackedCount; i++)
+ {
+ if (VarSetOps::IsMember(this, optAllFloatVars, i))
+ {
+ FPVars[numFPVars++] = i;
+ }
+ }
+
+ assert(numFPVars == VarSetOps::Count(this, optAllFloatVars));
+
+ // Things we check here:
+ //
+ // We substract 2 for each FP variable that's live across a call, as we will
+ // have 2 memory accesses to spill and unpsill around it.
+ //
+ //
+ //
+ VARSET_TP VARSET_INIT_NOCOPY(blockLiveOutFloats, VarSetOps::MakeEmpty(this));
+ for (block = fgFirstBB; block; block = block->bbNext)
+ {
+ compCurBB = block;
+ /*
+ This opt fails in the case of a variable that has it's entire lifetime contained in the 'then' of
+ a qmark. The use mask for the whole qmark won't contain that variable as it variable's value comes
+ from a def in the else, and the def can't be set for the qmark if the else side of
+ the qmark doesn't do a def.
+
+ See VSW# 354454 for more info. Leaving the comment and code here just in case we try to be
+ 'smart' again in the future
+
+
+ if (((block->bbVarUse |
+ block->bbVarDef |
+ block->bbLiveIn ) & optAllFloatVars) == 0)
+ {
+ // Fast way out
+ continue;
+ }
+ */
+ VarSetOps::Assign(this, blockLiveOutFloats, block->bbLiveOut);
+ VarSetOps::IntersectionD(this, blockLiveOutFloats, optAllFloatVars);
+ if (!VarSetOps::IsEmpty(this, blockLiveOutFloats))
+ {
+ // See comment in compiler.h above declaration of compMayHaveTransitionBlocks
+ // to understand the reason for this limitation of FP optimizer.
+ switch (block->bbJumpKind)
+ {
+ case BBJ_COND:
+ {
+ GenTreePtr stmt;
+ stmt = block->bbTreeList->gtPrev;
+ assert(stmt->gtNext == NULL && stmt->gtStmt.gtStmtExpr->gtOper == GT_JTRUE);
+
+ assert(stmt->gtStmt.gtStmtExpr->gtOp.gtOp1);
+ GenTreePtr cond = stmt->gtStmt.gtStmtExpr->gtOp.gtOp1;
+
+ assert(cond->OperIsCompare());
+
+ if (cond->gtOp.gtOp1->TypeGet() == TYP_LONG)
+ {
+ if (compHndBBtabCount > 0)
+ {
+ // If we have any handlers we won't enregister whatever is live out of this block
+ JITDUMP("PERF Warning: Taking out FP candidates due to transition blocks + exception "
+ "handlers.\n");
+ VarSetOps::UnionD(this, raMaskDontEnregFloat,
+ VarSetOps::Intersection(this, block->bbLiveOut, optAllFloatVars));
+ }
+ else
+ {
+ // long conditional jumps can generate transition bloks
+ compMayHaveTransitionBlocks = true;
+ }
+ }
+
+ break;
+ }
+ case BBJ_SWITCH:
+ {
+ if (compHndBBtabCount > 0)
+ {
+ // If we have any handlers we won't enregister whatever is live out of this block
+ JITDUMP(
+ "PERF Warning: Taking out FP candidates due to transition blocks + exception handlers.\n");
+ VarSetOps::UnionD(this, raMaskDontEnregFloat,
+ VarSetOps::Intersection(this, block->bbLiveOut, optAllFloatVars));
+ }
+ else
+ {
+ // fp vars are live out of the switch, so we may have transition blocks
+ compMayHaveTransitionBlocks = true;
+ }
+ break;
+ default:
+ break;
+ }
+ }
+ }
+
+ VARSET_TP VARSET_INIT(this, liveSet, block->bbLiveIn);
+ for (GenTreePtr stmt = block->FirstNonPhiDef(); stmt; stmt = stmt->gtNext)
+ {
+ assert(stmt->gtOper == GT_STMT);
+
+ unsigned prevHeight = stmt->gtStmt.gtStmtList->gtFPlvl;
+ for (GenTreePtr tree = stmt->gtStmt.gtStmtList; tree; tree = tree->gtNext)
+ {
+ VarSetOps::AssignNoCopy(this, liveSet, fgUpdateLiveSet(liveSet, tree));
+ switch (tree->gtOper)
+ {
+ case GT_CALL:
+ raAddPayloadStackFP(liveSet, block->getBBWeight(this) * 2);
+ break;
+ case GT_CAST:
+ // For cast from long local var to double, decrement the ref count of the long
+ // to avoid store forwarding stall
+ if (tree->gtType == TYP_DOUBLE)
+ {
+ GenTreePtr op1 = tree->gtOp.gtOp1;
+ if (op1->gtOper == GT_LCL_VAR && op1->gtType == TYP_LONG)
+ {
+ unsigned int lclNum = op1->gtLclVarCommon.gtLclNum;
+ assert(lclNum < lvaCount);
+ LclVarDsc* varDsc = lvaTable + lclNum;
+ unsigned int weightedRefCnt = varDsc->lvRefCntWtd;
+ unsigned int refCntDecrement = 2 * block->getBBWeight(this);
+ if (refCntDecrement > weightedRefCnt)
+ {
+ varDsc->lvRefCntWtd = 0;
+ }
+ else
+ {
+ varDsc->lvRefCntWtd = weightedRefCnt - refCntDecrement;
+ }
+ }
+ }
+ break;
+ default:
+ break;
+ }
+
+ // Update heights
+ unsigned height = tree->gtFPlvl;
+
+ if (height != prevHeight)
+ {
+ if (height > prevHeight && height < FP_VIRTUALREGISTERS)
+ {
+ for (unsigned i = 0; i < numFPVars; i++)
+ {
+ if (VarSetOps::IsMember(this, liveSet, FPVars[i]))
+ {
+ // The -1 are because we don't care about stack height 0
+ // and we will use offset FP_VIRTUALREGISTERS to know what's
+ // the count when we overflow. we multiply by 2, because that
+ // is the number of memory accesses we will do for each spill
+ // (even if we op directly with the spill)
+ if (compCodeOpt() == SMALL_CODE)
+ {
+ raHeightsStackFP[FPVars[i]][height - 1] += 2;
+ }
+ else
+ {
+ raHeightsStackFP[FPVars[i]][height - 1] += 2 * block->getBBWeight(this);
+ }
+
+#ifdef DEBUG
+ raHeightsNonWeightedStackFP[FPVars[i]][height - 1]++;
+#endif
+ }
+ }
+ }
+
+ prevHeight = height;
+ }
+ }
+ }
+ }
+ compCurBB = NULL;
+
+ if (compJmpOpUsed)
+ {
+ // Disable enregistering of FP vars for methods with jmp op. We have really no
+ // coverage here.
+ // The problem with FP enreg vars is that the returning block is marked with having
+ // all variables live on exit. This works for integer vars, but for FP vars we must
+ // do the work to unload them. This is fairly straightforward to do, but I'm worried
+ // by the coverage, so I'll take the conservative aproach of disabling FP enregistering
+ // and we will fix it if there is demand
+ JITDUMP("PERF Warning: Disabling FP enregistering due to JMP op!!!!!!!.\n");
+ VarSetOps::UnionD(this, raMaskDontEnregFloat, optAllFloatVars);
+ }
+
+#ifdef DEBUG
+ raDumpHeightsStackFP();
+#endif
+}
+
+void Compiler::raSetRegLclBirthDeath(GenTreePtr tree, VARSET_VALARG_TP lastlife, bool fromLDOBJ)
+{
+ assert(tree->gtOper == GT_LCL_VAR);
+
+ unsigned lclnum = tree->gtLclVarCommon.gtLclNum;
+ assert(lclnum < lvaCount);
+
+ LclVarDsc* varDsc = lvaTable + lclnum;
+
+ if (!varDsc->lvTracked)
+ {
+ // Not tracked, can't be one of the enreg fp vars
+ return;
+ }
+
+ unsigned varIndex = varDsc->lvVarIndex;
+
+ if (!VarSetOps::IsMember(this, optAllFPregVars, varIndex))
+ {
+ // Not one of the enreg fp vars
+ return;
+ }
+
+ assert(varDsc->lvRegNum != REG_FPNONE);
+ assert(!VarSetOps::IsMember(this, raMaskDontEnregFloat, varIndex));
+
+ unsigned livenessFlags = (tree->gtFlags & GTF_LIVENESS_MASK);
+ tree->ChangeOper(GT_REG_VAR);
+ tree->gtFlags |= livenessFlags;
+ tree->gtRegNum = varDsc->lvRegNum;
+ tree->gtRegVar.gtRegNum = varDsc->lvRegNum;
+ tree->gtRegVar.SetLclNum(lclnum);
+
+ // A liveset can change in a lclvar even if the lclvar itself is not
+ // changing its life. This can happen for lclvars inside qmarks,
+ // where lclvars die across the colon edge.
+ // SO, either
+ // it is marked GTF_VAR_DEATH (already set by fgComputeLife)
+ // OR it is already live
+ // OR it is becoming live
+ //
+ if ((tree->gtFlags & GTF_VAR_DEATH) == 0)
+ {
+ if ((tree->gtFlags & GTF_VAR_DEF) != 0)
+
+ {
+ tree->gtFlags |= GTF_REG_BIRTH;
+ }
+ }
+
+#ifdef DEBUG
+ if (verbose)
+ gtDispTree(tree);
+#endif
+}
+
+// In this pass we set the regvars and set the birth and death flags. we do it
+// for all enregistered variables at once.
+void Compiler::raEnregisterVarsPostPassStackFP()
+{
+ if (VarSetOps::IsEmpty(this, optAllFPregVars))
+ {
+ // Nothing to fix up.
+ }
+
+ BasicBlock* block;
+
+ JITDUMP("raEnregisterVarsPostPassStackFP:\n");
+
+ for (block = fgFirstBB; block; block = block->bbNext)
+ {
+ compCurBB = block;
+
+ /*
+ This opt fails in the case of a variable that has it's entire lifetime contained in the 'then' of
+ a qmark. The use mask for the whole qmark won't contain that variable as it variable's value comes
+ from a def in the else, and the def can't be set for the qmark if the else side of
+ the qmark doesn't do a def.
+
+ See VSW# 354454 for more info. Leaving the comment and code here just in case we try to be
+ 'smart' again in the future
+
+
+
+ if (((block->bbVarUse |
+ block->bbVarDef |
+ block->bbLiveIn ) & optAllFPregVars) == 0)
+ {
+ // Fast way out
+ continue;
+ }
+ */
+
+ VARSET_TP VARSET_INIT(this, lastlife, block->bbLiveIn);
+ for (GenTreePtr stmt = block->FirstNonPhiDef(); stmt; stmt = stmt->gtNext)
+ {
+ assert(stmt->gtOper == GT_STMT);
+
+ for (GenTreePtr tree = stmt->gtStmt.gtStmtList; tree;
+ VarSetOps::AssignNoCopy(this, lastlife, fgUpdateLiveSet(lastlife, tree)), tree = tree->gtNext)
+ {
+ if (tree->gtOper == GT_LCL_VAR)
+ {
+ raSetRegLclBirthDeath(tree, lastlife, false);
+ }
+ }
+ }
+ assert(VarSetOps::Equal(this, lastlife, block->bbLiveOut));
+ }
+ compCurBB = NULL;
+}
+
+void Compiler::raGenerateFPRefCounts()
+{
+ // Update ref counts to stack
+ assert(raCntWtdStkDblStackFP == 0);
+ assert(raCntStkParamDblStackFP == 0);
+ assert(raCntStkStackFP == 0);
+
+ LclVarDsc* varDsc;
+ unsigned lclNum;
+ for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
+ {
+ if (varDsc->lvType == TYP_DOUBLE ||
+ varDsc->lvStructDoubleAlign) // Account for structs (A bit over aggressive here, we should
+ // account for field accesses, but should be a reasonable
+ // heuristic).
+ {
+ if (varDsc->lvRegister)
+ {
+ assert(varDsc->lvTracked);
+ }
+ else
+ {
+ // Increment tmp access
+ raCntStkStackFP += varDsc->lvRefCnt;
+
+ if (varDsc->lvIsParam)
+ {
+ // Why is this not weighted?
+ raCntStkParamDblStackFP += varDsc->lvRefCnt;
+ }
+ else
+ {
+ raCntWtdStkDblStackFP += varDsc->lvRefCntWtd;
+ }
+ }
+ }
+ }
+
+#ifdef DEBUG
+ if ((raCntWtdStkDblStackFP > 0) || (raCntStkParamDblStackFP > 0))
+ {
+ JITDUMP("StackFP double stack weighted ref count: %u ; param ref count: %u\n", raCntWtdStkDblStackFP,
+ raCntStkParamDblStackFP);
+ }
+#endif
+}
+
+void Compiler::raEnregisterVarsStackFP()
+{
+ const int FPENREGTHRESHOLD = 1;
+ const unsigned int FPENREGTHRESHOLD_WEIGHTED = FPENREGTHRESHOLD;
+
+ // Do init
+ raInitStackFP();
+
+ if (opts.compDbgCode || opts.MinOpts())
+ {
+ // no enregistering for these options.
+ return;
+ }
+
+ if (VarSetOps::IsEmpty(this, optAllFloatVars))
+ {
+ // No floating point vars. bail out
+ return;
+ }
+
+ // Do additional pass updating weights and generating height table
+ raEnregisterVarsPrePassStackFP();
+
+ // Vars are ordered by weight
+ LclVarDsc* varDsc;
+
+ // Set an interference with V0 and V1, which we reserve as a temp registers.
+ // We need only one temp. but we will take the easy way, as by using
+ // two, we will need to teach codegen how to operate with spilled variables
+ VarSetOps::Assign(this, raLclRegIntfFloat[REG_FPV0], optAllFloatVars);
+ VarSetOps::Assign(this, raLclRegIntfFloat[REG_FPV1], optAllFloatVars);
+
+#ifdef DEBUG
+ if (codeGen->genStressFloat())
+ {
+ // Lock out registers for stress.
+ regMaskTP locked = codeGen->genStressLockedMaskFloat();
+ for (unsigned i = REG_FPV0; i < REG_FPCOUNT; i++)
+ {
+ if (locked & genRegMaskFloat((regNumber)i))
+ {
+ VarSetOps::Assign(this, raLclRegIntfFloat[i], optAllFloatVars);
+ }
+ }
+ }
+#endif
+
+ // Build the interesting FP var table
+ LclVarDsc* fpLclFPVars[lclMAX_TRACKED];
+ unsigned numFPVars = 0;
+ for (unsigned i = 0; i < lvaTrackedCount; i++)
+ {
+ if (VarSetOps::IsMember(this, raMaskDontEnregFloat, i))
+ {
+ JITDUMP("Won't enregister V%02i (T%02i) because it's marked as dont enregister\n", lvaTrackedToVarNum[i],
+ i);
+ continue;
+ }
+
+ if (VarSetOps::IsMember(this, optAllFloatVars, i))
+ {
+ varDsc = lvaTable + lvaTrackedToVarNum[i];
+
+ assert(varDsc->lvTracked);
+
+ if (varDsc->lvDoNotEnregister)
+ {
+ JITDUMP("Won't enregister V%02i (T%02i) because it's marked as DoNotEnregister\n",
+ lvaTrackedToVarNum[i], i);
+ continue;
+ }
+#if !FEATURE_X87_DOUBLES
+ if (varDsc->TypeGet() == TYP_FLOAT)
+ {
+ JITDUMP("Won't enregister V%02i (T%02i) because it's a TYP_FLOAT and we have disabled "
+ "FEATURE_X87_DOUBLES\n",
+ lvaTrackedToVarNum[i], i);
+ continue;
+ }
+#endif
+
+ fpLclFPVars[numFPVars++] = lvaTable + lvaTrackedToVarNum[i];
+ }
+ }
+
+ unsigned maxRegVars = 0; // Max num of regvars at one time
+
+ for (unsigned sortNum = 0; sortNum < numFPVars; sortNum++)
+ {
+#ifdef DEBUG
+ {
+ JITDUMP("\n");
+ JITDUMP("FP regvar candidates:\n");
+
+ for (unsigned i = sortNum; i < numFPVars; i++)
+ {
+ varDsc = fpLclFPVars[i];
+ unsigned lclNum = varDsc - lvaTable;
+ unsigned varIndex;
+ varIndex = varDsc->lvVarIndex;
+
+ JITDUMP("V%02u/T%02u RefCount: %u Weight: %u ; Payload: %u ; Overflow: %u\n", lclNum, varIndex,
+ varDsc->lvRefCnt, varDsc->lvRefCntWtd, raPayloadStackFP[varIndex],
+ raHeightsStackFP[varIndex][FP_VIRTUALREGISTERS]);
+ }
+ JITDUMP("\n");
+ }
+#endif
+
+ unsigned min = sortNum;
+
+ // Find the one that will save us most
+ for (unsigned i = sortNum + 1; i < numFPVars; i++)
+ {
+ if (raVarIsGreaterValueStackFP(fpLclFPVars[i], fpLclFPVars[sortNum]))
+ {
+ min = i;
+ }
+ }
+
+ // Put it at the top of the array
+ LclVarDsc* temp;
+ temp = fpLclFPVars[min];
+ fpLclFPVars[min] = fpLclFPVars[sortNum];
+ fpLclFPVars[sortNum] = temp;
+
+ varDsc = fpLclFPVars[sortNum];
+
+#ifdef DEBUG
+ unsigned lclNum = varDsc - lvaTable;
+#endif
+ unsigned varIndex = varDsc->lvVarIndex;
+
+ assert(VarSetOps::IsMember(this, optAllFloatVars, varIndex));
+
+ JITDUMP("Candidate for enregistering: V%02u/T%02u RefCount: %u Weight: %u ; Payload: %u ; Overflow: %u\n",
+ lclNum, varIndex, varDsc->lvRefCnt, varDsc->lvRefCntWtd, raPayloadStackFP[varIndex],
+ raHeightsStackFP[varIndex][FP_VIRTUALREGISTERS]);
+
+ bool bMeetsThreshold = true;
+
+ if (varDsc->lvRefCnt < FPENREGTHRESHOLD || varDsc->lvRefCntWtd < FPENREGTHRESHOLD_WEIGHTED)
+ {
+ bMeetsThreshold = false;
+ }
+
+ // We don't want to enregister arguments with only one use, as they will be
+ // loaded in the prolog. Just don't enregister them and load them lazily(
+ if (varDsc->lvIsParam &&
+ (varDsc->lvRefCnt <= FPENREGTHRESHOLD || varDsc->lvRefCntWtd <= FPENREGTHRESHOLD_WEIGHTED))
+ {
+ bMeetsThreshold = false;
+ }
+
+ if (!bMeetsThreshold
+#ifdef DEBUG
+ && codeGen->genStressFloat() != 1
+#endif
+ )
+ {
+ // Doesn't meet bar, do next
+ JITDUMP("V%02u/T%02u doesnt meet threshold. Won't enregister\n", lclNum, varIndex);
+ continue;
+ }
+
+ // We don't want to have problems with overflow (we now have 2 unsigned counters
+ // that can possibly go to their limits), so we just promote to double here.
+ // diff
+ double balance =
+ double(varDsc->lvRefCntWtd) -
+ double(raPayloadStackFP[varIndex]) - // Additional costs of enregistering variable
+ double(raHeightsStackFP[varIndex][FP_VIRTUALREGISTERS]) - // Spilling costs of enregistering variable
+ double(FPENREGTHRESHOLD_WEIGHTED);
+
+ JITDUMP("balance = %d - %d - %d - %d\n", varDsc->lvRefCntWtd, raPayloadStackFP[varIndex],
+ raHeightsStackFP[varIndex][FP_VIRTUALREGISTERS], FPENREGTHRESHOLD_WEIGHTED);
+
+ if (balance < 0.0
+#ifdef DEBUG
+ && codeGen->genStressFloat() != 1
+#endif
+ )
+ {
+ // Doesn't meet bar, do next
+ JITDUMP("V%02u/T%02u doesnt meet threshold. Won't enregister\n", lclNum, varIndex);
+ continue;
+ }
+
+ regNumber reg = raRegForVarStackFP(varDsc->lvVarIndex);
+ if (reg == REG_FPNONE)
+ {
+ // Didn't make if (interferes with other regvars), do next
+ JITDUMP("V%02u/T%02u interferes with other enreg vars. Won't enregister\n", lclNum, varIndex);
+
+ continue;
+ }
+
+ if (lvaIsFieldOfDependentlyPromotedStruct(varDsc))
+ {
+ // Do not enregister if this is a floating field in a struct local of
+ // promotion type PROMOTION_TYPE_DEPENDENT.
+ continue;
+ }
+
+ // Yipee, we will enregister var.
+ varDsc->lvRegister = true;
+ varDsc->lvRegNum = reg;
+ VarSetOps::AddElemD(this, optAllFPregVars, varIndex);
+
+#ifdef DEBUG
+ raDumpVariableRegIntfFloat();
+
+ if (verbose)
+ {
+ printf("; ");
+ gtDispLclVar(lclNum);
+ printf("V%02u/T%02u (refcnt=%2u,refwtd=%4u%s) enregistered in %s\n", varIndex, varDsc->lvVarIndex,
+ varDsc->lvRefCnt, varDsc->lvRefCntWtd / 2, (varDsc->lvRefCntWtd & 1) ? ".5" : "",
+ CodeGen::regVarNameStackFP(varDsc->lvRegNum));
+ }
+
+ JITDUMP("\n");
+#endif
+
+ // Create interferences with other variables.
+ assert(VarSetOps::IsEmpty(this, VarSetOps::Diff(this, raLclRegIntfFloat[(int)reg], optAllFloatVars)));
+ VARSET_TP VARSET_INIT_NOCOPY(intfFloats, VarSetOps::Intersection(this, lvaVarIntf[varIndex], optAllFloatVars));
+
+ VarSetOps::UnionD(this, raLclRegIntfFloat[reg], intfFloats);
+
+ // Update height tables for variables that interfere with this one.
+ raUpdateHeightsForVarsStackFP(intfFloats);
+
+ // Update max number of reg vars at once.
+ maxRegVars = min(REG_FPCOUNT, max(maxRegVars, VarSetOps::Count(this, intfFloats)));
+ }
+
+ assert(VarSetOps::IsSubset(this, optAllFPregVars, optAllFloatVars));
+ assert(VarSetOps::IsEmpty(this, VarSetOps::Intersection(this, optAllFPregVars, raMaskDontEnregFloat)));
+
+ // This is a bit conservative, as they may not all go through a call.
+ // If we have to, we can fix this.
+ tmpDoubleSpillMax += maxRegVars;
+
+ // Do pass marking trees as egvars
+ raEnregisterVarsPostPassStackFP();
+
+#ifdef DEBUG
+ {
+ JITDUMP("FP enregistration summary\n");
+
+ unsigned i;
+ for (i = 0; i < numFPVars; i++)
+ {
+ varDsc = fpLclFPVars[i];
+
+ if (varDsc->lvRegister)
+ {
+ unsigned lclNum = varDsc - lvaTable;
+ unsigned varIndex;
+ varIndex = varDsc->lvVarIndex;
+
+ JITDUMP("Enregistered V%02u/T%02u in FPV%i RefCount: %u Weight: %u \n", lclNum, varIndex,
+ varDsc->lvRegNum, varDsc->lvRefCnt, varDsc->lvRefCntWtd);
+ }
+ }
+ JITDUMP("End of FP enregistration summary\n\n");
+ }
+#endif
+}
+
+#ifdef DEBUG
+
+regMaskTP CodeGenInterface::genStressLockedMaskFloat()
+{
+ assert(genStressFloat());
+
+ // Don't use REG_FPV0 or REG_FPV1, they're reserved
+ if (genStressFloat() == 1)
+ {
+ return genRegMaskFloat(REG_FPV4) | genRegMaskFloat(REG_FPV5) | genRegMaskFloat(REG_FPV6) |
+ genRegMaskFloat(REG_FPV7);
+ }
+ else
+ {
+ return genRegMaskFloat(REG_FPV2) | genRegMaskFloat(REG_FPV3) | genRegMaskFloat(REG_FPV4) |
+ genRegMaskFloat(REG_FPV5) | genRegMaskFloat(REG_FPV6) | genRegMaskFloat(REG_FPV7);
+ }
+}
+
+#endif
+
+#endif // FEATURE_STACK_FP_X87
+
+#endif // LEGACY_BACKEND
diff --git a/src/jit/standalone/.gitmirror b/src/jit/standalone/.gitmirror
new file mode 100644
index 0000000000..f507630f94
--- /dev/null
+++ b/src/jit/standalone/.gitmirror
@@ -0,0 +1 @@
+Only contents of this folder, excluding subfolders, will be mirrored by the Git-TFS Mirror. \ No newline at end of file
diff --git a/src/jit/standalone/CMakeLists.txt b/src/jit/standalone/CMakeLists.txt
new file mode 100644
index 0000000000..2e6317098e
--- /dev/null
+++ b/src/jit/standalone/CMakeLists.txt
@@ -0,0 +1,58 @@
+project(ryujit)
+add_definitions(-DFEATURE_NO_HOST)
+add_definitions(-DSELF_NO_HOST)
+add_definitions(-DFEATURE_READYTORUN_COMPILER)
+remove_definitions(-DFEATURE_MERGE_JIT_AND_ENGINE)
+
+if(CLR_CMAKE_TARGET_ARCH_I386 OR CLR_CMAKE_TARGET_ARCH_ARM)
+ add_definitions(-DLEGACY_BACKEND)
+endif()
+
+add_library_clr(${JIT_BASE_NAME}
+ SHARED
+ ${SHARED_LIB_SOURCES}
+)
+
+add_dependencies(${JIT_BASE_NAME} jit_exports)
+
+set_property(TARGET ${JIT_BASE_NAME} APPEND_STRING PROPERTY LINK_FLAGS ${JIT_EXPORTS_LINKER_OPTION})
+set_property(TARGET ${JIT_BASE_NAME} APPEND_STRING PROPERTY LINK_DEPENDS ${JIT_EXPORTS_FILE})
+
+set(RYUJIT_LINK_LIBRARIES
+ utilcodestaticnohost
+ gcinfo
+)
+
+if(CLR_CMAKE_PLATFORM_UNIX)
+ list(APPEND RYUJIT_LINK_LIBRARIES
+ mscorrc_debug
+ coreclrpal
+ palrt
+ )
+else()
+ list(APPEND RYUJIT_LINK_LIBRARIES
+ ${STATIC_MT_CRT_LIB}
+ ${STATIC_MT_VCRT_LIB}
+ kernel32.lib
+ advapi32.lib
+ ole32.lib
+ oleaut32.lib
+ uuid.lib
+ user32.lib
+ version.lib
+ shlwapi.lib
+ bcrypt.lib
+ crypt32.lib
+ RuntimeObject.lib
+ )
+endif(CLR_CMAKE_PLATFORM_UNIX)
+
+target_link_libraries(${JIT_BASE_NAME}
+ ${RYUJIT_LINK_LIBRARIES}
+)
+
+# add the install targets
+install_clr(${JIT_BASE_NAME})
+
+# Enable profile guided optimization
+add_pgo(${JIT_BASE_NAME})
diff --git a/src/jit/target.h b/src/jit/target.h
new file mode 100644
index 0000000000..fa0b18af3e
--- /dev/null
+++ b/src/jit/target.h
@@ -0,0 +1,2320 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*****************************************************************************/
+#ifndef _TARGET_H_
+#define _TARGET_H_
+
+// Inform includers that we're in a context in which a target has been set.
+#if defined(_TARGET_X86_) || defined(_TARGET_AMD64_) || defined(_TARGET_ARM_)
+#define _TARGET_SET_
+#endif
+
+// If the UNIX_AMD64_ABI is defined make sure that _TARGET_AMD64_ is also defined.
+#if defined(UNIX_AMD64_ABI)
+#if !defined(_TARGET_AMD64_)
+#error When UNIX_AMD64_ABI is defined you must define _TARGET_AMD64_ defined as well.
+#endif
+#endif
+
+#if (defined(FEATURE_CORECLR) && defined(PLATFORM_UNIX))
+#define FEATURE_VARARG 0
+#else // !(defined(FEATURE_CORECLR) && defined(PLATFORM_UNIX))
+#define FEATURE_VARARG 1
+#endif // !(defined(FEATURE_CORECLR) && defined(PLATFORM_UNIX))
+
+/*****************************************************************************/
+// The following are human readable names for the target architectures
+#if defined(_TARGET_X86_)
+#define TARGET_READABLE_NAME "X86"
+#elif defined(_TARGET_AMD64_)
+#define TARGET_READABLE_NAME "AMD64"
+#elif defined(_TARGET_ARM_)
+#define TARGET_READABLE_NAME "ARM"
+#elif defined(_TARGET_ARM64_)
+#define TARGET_READABLE_NAME "ARM64"
+#else
+#error Unsupported or unset target architecture
+#endif
+
+/*****************************************************************************/
+// The following are intended to capture only those #defines that cannot be replaced
+// with static const members of Target
+#if defined(_TARGET_X86_) && defined(LEGACY_BACKEND)
+#define REGMASK_BITS 8 // number of bits used to represent register mask
+#elif defined(_TARGET_XARCH_)
+#define REGMASK_BITS 32
+
+#elif defined(_TARGET_ARM_)
+#define REGMASK_BITS 64
+
+#elif defined(_TARGET_ARM64_)
+#define REGMASK_BITS 64
+
+#else
+#error Unsupported or unset target architecture
+#endif
+
+/*****************************************************************************/
+
+#if defined(_TARGET_ARM_)
+DECLARE_TYPED_ENUM(_regNumber_enum, unsigned)
+{
+#define REGDEF(name, rnum, mask, sname) REG_##name = rnum,
+#define REGALIAS(alias, realname) REG_##alias = REG_##realname,
+#include "register.h"
+
+ REG_COUNT, REG_NA = REG_COUNT, ACTUAL_REG_COUNT = REG_COUNT - 1 // everything but REG_STK (only real regs)
+}
+END_DECLARE_TYPED_ENUM(_regNumber_enum, unsigned)
+
+DECLARE_TYPED_ENUM(_regMask_enum, unsigned __int64)
+{
+ RBM_NONE = 0,
+#define REGDEF(name, rnum, mask, sname) RBM_##name = mask,
+#define REGALIAS(alias, realname) RBM_##alias = RBM_##realname,
+#include "register.h"
+}
+END_DECLARE_TYPED_ENUM(_regMask_enum, unsigned __int64)
+
+#elif defined(_TARGET_ARM64_)
+
+DECLARE_TYPED_ENUM(_regNumber_enum, unsigned)
+{
+#define REGDEF(name, rnum, mask, xname, wname) REG_##name = rnum,
+#define REGALIAS(alias, realname) REG_##alias = REG_##realname,
+#include "register.h"
+
+ REG_COUNT, REG_NA = REG_COUNT, ACTUAL_REG_COUNT = REG_COUNT - 1 // everything but REG_STK (only real regs)
+}
+END_DECLARE_TYPED_ENUM(_regNumber_enum, unsigned)
+
+DECLARE_TYPED_ENUM(_regMask_enum, unsigned __int64)
+{
+ RBM_NONE = 0,
+#define REGDEF(name, rnum, mask, xname, wname) RBM_##name = mask,
+#define REGALIAS(alias, realname) RBM_##alias = RBM_##realname,
+#include "register.h"
+}
+END_DECLARE_TYPED_ENUM(_regMask_enum, unsigned __int64)
+
+#elif defined(_TARGET_AMD64_)
+
+DECLARE_TYPED_ENUM(_regNumber_enum, unsigned)
+{
+#define REGDEF(name, rnum, mask, sname) REG_##name = rnum,
+#define REGALIAS(alias, realname) REG_##alias = REG_##realname,
+#include "register.h"
+
+ REG_COUNT, REG_NA = REG_COUNT, ACTUAL_REG_COUNT = REG_COUNT - 1 // everything but REG_STK (only real regs)
+}
+END_DECLARE_TYPED_ENUM(_regNumber_enum, unsigned)
+
+DECLARE_TYPED_ENUM(_regMask_enum, unsigned)
+{
+ RBM_NONE = 0,
+
+#define REGDEF(name, rnum, mask, sname) RBM_##name = mask,
+#define REGALIAS(alias, realname) RBM_##alias = RBM_##realname,
+#include "register.h"
+}
+END_DECLARE_TYPED_ENUM(_regMask_enum, unsigned)
+
+#elif defined(_TARGET_X86_)
+
+#ifndef LEGACY_BACKEND
+DECLARE_TYPED_ENUM(_regNumber_enum, unsigned)
+{
+#define REGDEF(name, rnum, mask, sname) REG_##name = rnum,
+#define REGALIAS(alias, realname) REG_##alias = REG_##realname,
+#include "register.h"
+
+ REG_COUNT, REG_NA = REG_COUNT, ACTUAL_REG_COUNT = REG_COUNT - 1 // everything but REG_STK (only real regs)
+}
+END_DECLARE_TYPED_ENUM(_regNumber_enum, unsigned)
+
+DECLARE_TYPED_ENUM(_regMask_enum, unsigned)
+{
+ RBM_NONE = 0,
+
+#define REGDEF(name, rnum, mask, sname) RBM_##name = mask,
+#define REGALIAS(alias, realname) RBM_##alias = RBM_##realname,
+#include "register.h"
+}
+END_DECLARE_TYPED_ENUM(_regMask_enum, unsigned)
+#else // LEGACY_BACKEND
+DECLARE_TYPED_ENUM(_regNumber_enum, unsigned)
+{
+#define REGDEF(name, rnum, mask, sname) REG_##name = rnum,
+#define REGALIAS(alias, realname) REG_##alias = REG_##realname,
+#include "register.h"
+
+ REG_COUNT, REG_NA = REG_COUNT,
+ ACTUAL_REG_COUNT = REG_COUNT - 1, // everything but REG_STK (only real regs)
+
+#define REGDEF(name, rnum, mask, sname) REG_##name = rnum,
+#include "registerfp.h"
+
+ REG_FPCOUNT, REG_FPNONE = REG_FPCOUNT,
+
+#define REGDEF(name, rnum, mask, sname) REG_##name = rnum,
+#include "registerxmm.h"
+
+ REG_XMMCOUNT
+}
+END_DECLARE_TYPED_ENUM(_regNumber_enum, unsigned)
+
+DECLARE_TYPED_ENUM(_regMask_enum, unsigned)
+{
+ RBM_NONE = 0,
+
+#define REGDEF(name, rnum, mask, sname) RBM_##name = mask,
+#define REGALIAS(alias, realname) RBM_##alias = RBM_##realname,
+#include "register.h"
+
+#define REGDEF(name, rnum, mask, sname) RBM_##name = mask,
+#include "registerfp.h"
+
+#define REGDEF(name, rnum, mask, sname) RBM_##name = mask,
+#include "registerxmm.h"
+}
+END_DECLARE_TYPED_ENUM(_regMask_enum, unsigned)
+
+#endif // LEGACY_BACKEND
+#else
+#error Unsupported target architecture
+#endif
+
+/* The following are used to hold 'long' (64-bit integer) operands */
+
+/*
+ The following yield the number of bits and the mask of a register
+ number in a register pair.
+ */
+
+#ifdef _TARGET_ARM_
+#define REG_PAIR_NBITS 6
+#else
+#define REG_PAIR_NBITS 4
+#endif
+#define REG_PAIR_NMASK ((1 << REG_PAIR_NBITS) - 1)
+
+#ifdef DEBUG
+// Under DEBUG, we want to make sure that code doesn't accidentally confuse a reg pair value
+// with a simple register number. Thus, we offset the reg pair numbers so they are distinct
+// from all register numbers. Note that this increases the minimum size of a regPairNoSmall
+// type due to the additional bits used for this offset.
+#define REG_PAIR_FIRST (7 << REG_PAIR_NBITS)
+#define REG_PAIR_NBITS_DEBUG \
+ (REG_PAIR_NBITS + \
+ 3) // extra bits needed by the debug shifting (3 instead of 0 because we shift "7", not "1", above).
+C_ASSERT(REG_COUNT < REG_PAIR_FIRST); // make sure the register numbers (including REG_NA, ignoring fp/xmm regs on
+ // x86/x64) are distinct from the pair numbers
+#else
+#define REG_PAIR_FIRST 0
+#endif
+
+DECLARE_TYPED_ENUM(_regPairNo_enum, unsigned)
+{
+#define PAIRDEF(rlo, rhi) REG_PAIR_##rlo##rhi = REG_##rlo + (REG_##rhi << REG_PAIR_NBITS) + REG_PAIR_FIRST,
+#include "regpair.h"
+
+ REG_PAIR_LAST = (REG_COUNT - 1) + ((REG_COUNT - 1) << REG_PAIR_NBITS) + REG_PAIR_FIRST,
+
+ REG_PAIR_NONE = REG_PAIR_LAST + 1
+}
+END_DECLARE_TYPED_ENUM(_regPairNo_enum, unsigned)
+
+enum regPairMask
+{
+#define PAIRDEF(rlo, rhi) RBM_PAIR_##rlo##rhi = (RBM_##rlo | RBM_##rhi),
+#include "regpair.h"
+};
+
+/*****************************************************************************/
+
+// TODO-Cleanup: The types defined below are mildly confusing: why are there both?
+// regMaskSmall is large enough to represent the entire set of registers.
+// If regMaskSmall is smaller than a "natural" integer type, regMaskTP is wider, based
+// on a belief by the original authors of the JIT that in some situations it is more
+// efficient to have the wider representation. This belief should be tested, and if it
+// is false, then we should coalesce these two types into one (the Small width, probably).
+// In any case, we believe that is OK to freely cast between these types; no information will
+// be lost.
+
+#ifdef _TARGET_ARMARCH_
+typedef unsigned __int64 regMaskTP;
+#else
+typedef unsigned regMaskTP;
+#endif
+
+#if REGMASK_BITS == 8
+typedef unsigned char regMaskSmall;
+#define REG_MASK_INT_FMT "%02X"
+#define REG_MASK_ALL_FMT "%02X"
+#elif REGMASK_BITS == 16
+typedef unsigned short regMaskSmall;
+#define REG_MASK_INT_FMT "%04X"
+#define REG_MASK_ALL_FMT "%04X"
+#elif REGMASK_BITS == 32
+typedef unsigned regMaskSmall;
+#define REG_MASK_INT_FMT "%08X"
+#define REG_MASK_ALL_FMT "%08X"
+#else
+typedef unsigned __int64 regMaskSmall;
+#define REG_MASK_INT_FMT "%04llX"
+#define REG_MASK_ALL_FMT "%016llX"
+#endif
+
+typedef _regNumber_enum regNumber;
+typedef _regPairNo_enum regPairNo;
+
+// LSRA currently converts freely between regNumber and regPairNo, so make sure they are the same size.
+C_ASSERT(sizeof(regPairNo) == sizeof(regNumber));
+
+typedef unsigned char regNumberSmall;
+
+#ifdef DEBUG
+
+// Under DEBUG, we shift the reg pair numbers to be independent of the regNumber range,
+// so we need additional bits. See the definition of REG_PAIR_FIRST for details.
+
+#if ((2 * REG_PAIR_NBITS) + REG_PAIR_NBITS_DEBUG) <= 16
+C_ASSERT(((2 * REG_PAIR_NBITS) + REG_PAIR_NBITS_DEBUG) > 8); // assert that nobody fits in 8 bits
+typedef unsigned short regPairNoSmall; // x86/x64: need 15 bits
+#else
+C_ASSERT(((2 * REG_PAIR_NBITS) + REG_PAIR_NBITS_DEBUG) <= 32);
+typedef unsigned regPairNoSmall; // arm: need 21 bits
+#endif
+
+#else // DEBUG
+
+#if (2 * REG_PAIR_NBITS) <= 8
+typedef unsigned char regPairNoSmall; // x86/x64: need 8 bits
+#else
+C_ASSERT((2 * REG_PAIR_NBITS) <= 16); // assert that nobody needs more than 16 bits
+typedef unsigned short regPairNoSmall; // arm: need 12 bits
+#endif
+
+#endif // DEBUG
+
+/*****************************************************************************/
+
+#define LEA_AVAILABLE 1
+#define SCALED_ADDR_MODES 1
+
+/*****************************************************************************/
+
+#ifdef DEBUG
+#define DSP_SRC_OPER_LEFT 0
+#define DSP_SRC_OPER_RIGHT 1
+#define DSP_DST_OPER_LEFT 1
+#define DSP_DST_OPER_RIGHT 0
+#endif
+
+/*****************************************************************************/
+
+// The pseudorandom nop insertion is not necessary for current CoreCLR scenarios
+// #if defined(FEATURE_CORECLR) && !defined(_TARGET_ARM_)
+// #define PSEUDORANDOM_NOP_INSERTION
+// #endif
+
+/*****************************************************************************/
+
+// clang-format off
+#if defined(_TARGET_X86_)
+
+ #define CPU_LOAD_STORE_ARCH 0
+
+#ifdef LEGACY_BACKEND
+ #define CPU_LONG_USES_REGPAIR 1
+#else
+ #define CPU_LONG_USES_REGPAIR 0 // RyuJIT x86 doesn't use the regPairNo field to record register pairs for long
+ // type tree nodes, and instead either decomposes them (for non-atomic operations)
+ // or stores multiple regNumber values for operations such as calls where the
+ // register definitions are effectively "atomic".
+#endif // LEGACY_BACKEND
+
+ #define CPU_HAS_FP_SUPPORT 1
+ #define ROUND_FLOAT 1 // round intermed float expression results
+ #define CPU_HAS_BYTE_REGS 1
+ #define CPU_USES_BLOCK_MOVE 1
+
+#ifndef LEGACY_BACKEND
+ // TODO-CQ: Fine tune the following xxBlk threshold values:
+
+#define CPBLK_MOVS_LIMIT 16 // When generating code for CpBlk, this is the buffer size
+ // threshold to stop generating rep movs and switch to the helper call.
+ // NOTE: Using rep movs is currently disabled since we found it has bad performance
+ // on pre-Ivy Bridge hardware.
+
+ #define CPBLK_UNROLL_LIMIT 64 // Upper bound to let the code generator to loop unroll CpBlk.
+ #define INITBLK_STOS_LIMIT 64 // When generating code for InitBlk, this is the buffer size
+ // NOTE: Using rep stos is currently disabled since we found it has bad performance
+ // on pre-Ivy Bridge hardware.
+ // threshold to stop generating rep movs and switch to the helper call.
+ #define INITBLK_UNROLL_LIMIT 128 // Upper bound to let the code generator to loop unroll InitBlk.
+ #define CPOBJ_NONGC_SLOTS_LIMIT 4 // For CpObj code generation, this is the the threshold of the number
+ // of contiguous non-gc slots that trigger generating rep movsq instead of
+ // sequences of movsq instructions
+ // The way we're currently disabling rep movs/stos is by setting a limit less than
+ // its unrolling counterparts. When lower takes the decision on which one to make it
+ // always asks for the unrolling limit first so you can say the JIT 'favors' unrolling.
+ // Setting the limit to something lower than that makes lower to never consider it.
+
+#endif // !LEGACY_BACKEND
+
+
+ #define FEATURE_WRITE_BARRIER 1 // Generate the proper WriteBarrier calls for GC
+ #define FEATURE_FIXED_OUT_ARGS 0 // X86 uses push instructions to pass args
+ #define FEATURE_STRUCTPROMOTE 1 // JIT Optimization to promote fields of structs into registers
+ #define FEATURE_MULTIREG_STRUCT_PROMOTE 0 // True when we want to promote fields of a multireg struct into registers
+ #define FEATURE_FASTTAILCALL 0 // Tail calls made as epilog+jmp
+ #define FEATURE_TAILCALL_OPT 0 // opportunistic Tail calls (without ".tail" prefix) made as fast tail calls.
+ #define FEATURE_SET_FLAGS 0 // Set to true to force the JIT to mark the trees with GTF_SET_FLAGS when
+ // the flags need to be set
+#ifdef LEGACY_BACKEND
+ #define FEATURE_MULTIREG_ARGS_OR_RET 0 // Support for passing and/or returning single values in more than one register
+ #define FEATURE_MULTIREG_ARGS 0 // Support for passing a single argument in more than one register
+ #define FEATURE_MULTIREG_RET 0 // Support for returning a single value in more than one register
+ #define MAX_PASS_MULTIREG_BYTES 0 // No multireg arguments
+ #define MAX_RET_MULTIREG_BYTES 0 // No multireg return values
+#else
+ #define FEATURE_MULTIREG_ARGS_OR_RET 1 // Support for passing and/or returning single values in more than one register
+ #define FEATURE_MULTIREG_ARGS 0 // Support for passing a single argument in more than one register
+ #define FEATURE_MULTIREG_RET 1 // Support for returning a single value in more than one register
+ #define MAX_PASS_MULTIREG_BYTES 0 // No multireg arguments (note this seems wrong as MAX_ARG_REG_COUNT is 2)
+ #define MAX_RET_MULTIREG_BYTES 8 // Maximum size of a struct that could be returned in more than one register
+#endif
+
+ #define MAX_ARG_REG_COUNT 2 // Maximum registers used to pass an argument.
+ #define MAX_RET_REG_COUNT 2 // Maximum registers used to return a value.
+
+#ifdef FEATURE_USE_ASM_GC_WRITE_BARRIERS
+ #define NOGC_WRITE_BARRIERS 1 // We have specialized WriteBarrier JIT Helpers that DO-NOT trash the
+ // RBM_CALLEE_TRASH registers
+#else
+ #define NOGC_WRITE_BARRIERS 0 // Do not modify this -- modify the definition above. (If we're not using
+ // ASM barriers we definitely don't have NOGC barriers).
+#endif
+ #define USER_ARGS_COME_LAST 0
+ #define EMIT_TRACK_STACK_DEPTH 1
+ #define TARGET_POINTER_SIZE 4 // equal to sizeof(void*) and the managed pointer size in bytes for this
+ // target
+ #define FEATURE_EH 1 // To aid platform bring-up, eliminate exceptional EH clauses (catch, filter,
+ // filter-handler, fault) and directly execute 'finally' clauses.
+ #define FEATURE_EH_FUNCLETS 0
+ #define FEATURE_EH_CALLFINALLY_THUNKS 0 // Generate call-to-finally code in "thunks" in the enclosing EH region,
+ // protected by "cloned finally" clauses.
+#ifndef LEGACY_BACKEND
+ #define FEATURE_STACK_FP_X87 0
+#else // LEGACY_BACKEND
+ #define FEATURE_STACK_FP_X87 1 // Use flat register file model
+#endif // LEGACY_BACKEND
+ #define FEATURE_X87_DOUBLES 0 // FP tree temps always use x87 doubles (when 1) or can be double or float
+ // (when 0).
+ #define ETW_EBP_FRAMED 1 // if 1 we cannot use EBP as a scratch register and must create EBP based
+ // frames for most methods
+ #define CSE_CONSTS 1 // Enable if we want to CSE constants
+
+#ifndef LEGACY_BACKEND
+ // The following defines are useful for iterating a regNumber
+ #define REG_FIRST REG_EAX
+ #define REG_INT_FIRST REG_EAX
+ #define REG_INT_LAST REG_EDI
+ #define REG_INT_COUNT (REG_INT_LAST - REG_INT_FIRST + 1)
+ #define REG_NEXT(reg) ((regNumber)((unsigned)(reg) + 1))
+ #define REG_PREV(reg) ((regNumber)((unsigned)(reg) - 1))
+
+ #define REG_FP_FIRST REG_XMM0
+ #define REG_FP_LAST REG_XMM7
+ #define FIRST_FP_ARGREG REG_XMM0
+ #define LAST_FP_ARGREG REG_XMM3
+ #define REG_FLTARG_0 REG_XMM0
+ #define REG_FLTARG_1 REG_XMM1
+ #define REG_FLTARG_2 REG_XMM2
+ #define REG_FLTARG_3 REG_XMM3
+
+ #define RBM_FLTARG_0 RBM_XMM0
+ #define RBM_FLTARG_1 RBM_XMM1
+ #define RBM_FLTARG_2 RBM_XMM2
+ #define RBM_FLTARG_3 RBM_XMM3
+
+ #define RBM_FLTARG_REGS (RBM_FLTARG_0|RBM_FLTARG_1|RBM_FLTARG_2|RBM_FLTARG_3)
+
+ #define RBM_ALLFLOAT (RBM_XMM0 | RBM_XMM1 | RBM_XMM2 | RBM_XMM3 | RBM_XMM4 | RBM_XMM5 | RBM_XMM6 | RBM_XMM7)
+ #define RBM_ALLDOUBLE RBM_ALLFLOAT
+
+ // TODO-CQ: Currently we are following the x86 ABI for SSE2 registers.
+ // This should be reconsidered.
+ #define RBM_FLT_CALLEE_SAVED RBM_NONE
+ #define RBM_FLT_CALLEE_TRASH RBM_ALLFLOAT
+ #define REG_VAR_ORDER_FLT REG_XMM0, REG_XMM1, REG_XMM2, REG_XMM3, REG_XMM4, REG_XMM5, REG_XMM6, REG_XMM7
+
+ #define REG_FLT_CALLEE_SAVED_FIRST REG_XMM6
+ #define REG_FLT_CALLEE_SAVED_LAST REG_XMM7
+
+ #define XMM_REGSIZE_BYTES 16 // XMM register size in bytes
+ #define YMM_REGSIZE_BYTES 32 // YMM register size in bytes
+
+ #define REGNUM_BITS 6 // number of bits in a REG_*
+ #define TINY_REGNUM_BITS 6 // number used in a tiny instrdesc (same)
+
+#else // LEGACY_BACKEND
+ #define FEATURE_FP_REGALLOC 0 // Enabled if RegAlloc is used to enregister Floating Point LclVars
+
+ #define FP_STK_SIZE 8
+ #define RBM_ALLFLOAT (RBM_FPV0 | RBM_FPV1 | RBM_FPV2 | RBM_FPV3 | RBM_FPV4 | RBM_FPV5 | RBM_FPV6)
+ #define REG_FP_FIRST REG_FPV0
+ #define REG_FP_LAST REG_FPV7
+ #define FIRST_FP_ARGREG REG_NA
+ #define LAST_FP_ARGREG REG_NA
+
+
+ #define REGNUM_BITS 3 // number of bits in a REG_*
+ #define TINY_REGNUM_BITS 3
+ #define REGMASK_BITS 8 // number of bits in a REGNUM_MASK
+
+ #define RBM_FLTARG_REGS 0
+ #define RBM_FLT_CALLEE_SAVED 0
+ #define RBM_FLT_CALLEE_TRASH 0
+
+#endif // LEGACY_BACKEND
+
+ #define REGSIZE_BYTES 4 // number of bytes in one register
+ #define MIN_ARG_AREA_FOR_CALL 0 // Minimum required outgoing argument space for a call.
+
+ #define CODE_ALIGN 1 // code alignment requirement
+ #define STACK_ALIGN 4 // stack alignment requirement
+ #define STACK_ALIGN_SHIFT 2 // Shift-right amount to convert stack size in bytes to size in DWORD_PTRs
+ #define STACK_ALIGN_SHIFT_ALL 2 // Shift-right amount to convert stack size in bytes to size in STACK_ALIGN units
+
+ #define RBM_INT_CALLEE_SAVED (RBM_EBX|RBM_ESI|RBM_EDI)
+ #define RBM_INT_CALLEE_TRASH (RBM_EAX|RBM_ECX|RBM_EDX)
+
+ #define RBM_CALLEE_SAVED (RBM_INT_CALLEE_SAVED | RBM_FLT_CALLEE_SAVED)
+ #define RBM_CALLEE_TRASH (RBM_INT_CALLEE_TRASH | RBM_FLT_CALLEE_TRASH)
+
+ #define RBM_ALLINT (RBM_INT_CALLEE_SAVED | RBM_INT_CALLEE_TRASH)
+
+ #define REG_VAR_ORDER REG_EAX,REG_EDX,REG_ECX,REG_ESI,REG_EDI,REG_EBX
+ #define MAX_VAR_ORDER_SIZE 6
+ #define REG_TMP_ORDER REG_EAX,REG_EDX,REG_ECX,REG_EBX,REG_ESI,REG_EDI
+ #define RBM_TMP_ORDER RBM_EAX,RBM_EDX,RBM_ECX,RBM_EBX,RBM_ESI,RBM_EDI
+ #define REG_TMP_ORDER_COUNT 6
+
+ #define REG_PREDICT_ORDER REG_EAX,REG_EDX,REG_ECX,REG_EBX,REG_ESI,REG_EDI
+
+ // The order here is fixed: it must agree with an order assumed in eetwain...
+ #define REG_CALLEE_SAVED_ORDER REG_EDI,REG_ESI,REG_EBX,REG_EBP
+ #define RBM_CALLEE_SAVED_ORDER RBM_EDI,RBM_ESI,RBM_EBX,RBM_EBP
+
+ #define CNT_CALLEE_SAVED (4)
+ #define CNT_CALLEE_TRASH (3)
+ #define CNT_CALLEE_ENREG (CNT_CALLEE_SAVED-1)
+
+ #define CNT_CALLEE_SAVED_FLOAT (0)
+ #define CNT_CALLEE_TRASH_FLOAT (6)
+
+ #define CALLEE_SAVED_REG_MAXSZ (CNT_CALLEE_SAVED*REGSIZE_BYTES) // EBX,ESI,EDI,EBP
+
+ // We reuse the ESP register as a illegal value in the register predictor
+ #define RBM_ILLEGAL RBM_ESP
+ // We reuse the ESP register as a flag for last use handling in the register predictor
+ #define RBM_LASTUSE RBM_ESP
+ // We're using the encoding for ESP to indicate a half-long on the frame
+ #define REG_L_STK REG_ESP
+
+ // This is the first register in REG_TMP_ORDER
+ #define REG_TMP_0 REG_EAX
+ #define RBM_TMP_0 RBM_EAX
+
+ // This is the second register in REG_TMP_ORDER
+ #define REG_TMP_1 REG_EDX
+ #define RBM_TMP_1 RBM_EDX
+
+ #define REG_PAIR_TMP REG_PAIR_EAXEDX
+ #define REG_PAIR_TMP_REVERSE REG_PAIR_EDXEAX
+ #define RBM_PAIR_TMP (RBM_EAX|RBM_EDX)
+ #define REG_PAIR_TMP_LO REG_EAX
+ #define RBM_PAIR_TMP_LO RBM_EAX
+ #define REG_PAIR_TMP_HI REG_EDX
+ #define RBM_PAIR_TMP_HI RBM_EDX
+ #define PREDICT_PAIR_TMP PREDICT_PAIR_EAXEDX
+ #define PREDICT_PAIR_TMP_LO PREDICT_REG_EAX
+
+ // Used when calling the 64-bit Variable shift helper
+ #define REG_LNGARG_0 REG_PAIR_EAXEDX
+ #define RBM_LNGARG_0 (RBM_EAX|RBM_EDX)
+ #define PREDICT_PAIR_LNGARG_0 PREDICT_PAIR_EAXEDX
+
+ #define REG_LNGARG_LO REG_EAX
+ #define RBM_LNGARG_LO RBM_EAX
+ #define REG_LNGARG_HI REG_EDX
+ #define RBM_LNGARG_HI RBM_EDX
+ // register to hold shift amount
+ #define REG_SHIFT REG_ECX
+ #define RBM_SHIFT RBM_ECX
+ #define PREDICT_REG_SHIFT PREDICT_REG_ECX
+
+ // register to hold shift amount when shifting 64-bit values
+ #define REG_SHIFT_LNG REG_ECX
+ #define RBM_SHIFT_LNG RBM_ECX
+ #define PREDICT_REG_SHIFT_LNG PREDICT_REG_ECX
+
+ // This is a general scratch register that does not conflict with the argument registers
+ #define REG_SCRATCH REG_EAX
+ #define RBM_SCRATCH RBM_EAX
+
+ // Where is the exception object on entry to the handler block?
+ #define REG_EXCEPTION_OBJECT REG_EAX
+ #define RBM_EXCEPTION_OBJECT RBM_EAX
+
+ // Only used on ARM for GTF_CALL_M_VIRTSTUB_REL_INDIRECT
+ #define REG_JUMP_THUNK_PARAM REG_EAX
+ #define RBM_JUMP_THUNK_PARAM RBM_EAX
+
+#if NOGC_WRITE_BARRIERS
+ #define REG_WRITE_BARRIER REG_EDX
+ #define RBM_WRITE_BARRIER RBM_EDX
+
+ // We don't allow using ebp as a source register. Maybe we should only prevent this for ETW_EBP_FRAMED (but that is always set right now).
+ #define RBM_WRITE_BARRIER_SRC (RBM_EAX|RBM_ECX|RBM_EBX|RBM_ESI|RBM_EDI)
+
+ #define RBM_CALLEE_TRASH_NOGC RBM_EDX
+#endif // NOGC_WRITE_BARRIERS
+
+ // IL stub's secret parameter (CORJIT_FLG_PUBLISH_SECRET_PARAM)
+ #define REG_SECRET_STUB_PARAM REG_EAX
+ #define RBM_SECRET_STUB_PARAM RBM_EAX
+
+ // VSD extra parameter
+ #define REG_VIRTUAL_STUB_PARAM REG_EAX
+ #define RBM_VIRTUAL_STUB_PARAM RBM_EAX
+ #define PREDICT_REG_VIRTUAL_STUB_PARAM PREDICT_REG_EAX
+
+ // Registers used by PInvoke frame setup
+ #define REG_PINVOKE_FRAME REG_EDI // EDI is p/invoke "Frame" pointer argument to CORINFO_HELP_INIT_PINVOKE_FRAME helper
+ #define RBM_PINVOKE_FRAME RBM_EDI
+ #define REG_PINVOKE_TCB REG_ESI // ESI is set to Thread Control Block (TCB) on return from
+ // CORINFO_HELP_INIT_PINVOKE_FRAME helper
+ #define RBM_PINVOKE_TCB RBM_ESI
+ #define REG_PINVOKE_SCRATCH REG_EAX // EAX is trashed by CORINFO_HELP_INIT_PINVOKE_FRAME helper
+ #define RBM_PINVOKE_SCRATCH RBM_EAX
+
+#ifdef LEGACY_BACKEND
+ #define REG_SPILL_CHOICE REG_EAX
+ #define RBM_SPILL_CHOICE RBM_EAX
+#endif // LEGACY_BACKEND
+
+ // The following defines are useful for iterating a regNumber
+ #define REG_FIRST REG_EAX
+ #define REG_INT_FIRST REG_EAX
+ #define REG_INT_LAST REG_EDI
+ #define REG_INT_COUNT (REG_INT_LAST - REG_INT_FIRST + 1)
+ #define REG_NEXT(reg) ((regNumber)((unsigned)(reg) + 1))
+ #define REG_PREV(reg) ((regNumber)((unsigned)(reg) - 1))
+
+ // genCodeForCall() moves the target address of the tailcall into this register, before pushing it on the stack
+ #define REG_TAILCALL_ADDR REG_EAX
+
+ // Which register are int and long values returned in ?
+ #define REG_INTRET REG_EAX
+ #define RBM_INTRET RBM_EAX
+ #define REG_LNGRET REG_PAIR_EAXEDX
+ #define RBM_LNGRET (RBM_EDX|RBM_EAX)
+ #define REG_LNGRET_LO REG_EAX
+ #define RBM_LNGRET_LO RBM_EAX
+ #define REG_LNGRET_HI REG_EDX
+ #define RBM_LNGRET_HI RBM_EDX
+
+ #define REG_FLOATRET REG_NA
+ #define RBM_FLOATRET RBM_NONE
+ #define RBM_DOUBLERET RBM_NONE
+
+ // The registers trashed by the CORINFO_HELP_STOP_FOR_GC helper
+ #define RBM_STOP_FOR_GC_TRASH RBM_CALLEE_TRASH
+
+ // The registers trashed by the CORINFO_HELP_INIT_PINVOKE_FRAME helper. On x86, this helper has a custom calling
+ // convention that takes EDI as argument (but doesn't trash it), trashes EAX, and returns ESI.
+ #define RBM_INIT_PINVOKE_FRAME_TRASH (RBM_PINVOKE_SCRATCH | RBM_PINVOKE_TCB)
+
+ #define REG_FPBASE REG_EBP
+ #define RBM_FPBASE RBM_EBP
+ #define STR_FPBASE "ebp"
+ #define REG_SPBASE REG_ESP
+ #define RBM_SPBASE RBM_ESP
+ #define STR_SPBASE "esp"
+
+ #define FIRST_ARG_STACK_OFFS (2*REGSIZE_BYTES) // Caller's saved EBP and return address
+
+ #define MAX_REG_ARG 2
+ #define MAX_FLOAT_REG_ARG 0
+ #define REG_ARG_FIRST REG_ECX
+ #define REG_ARG_LAST REG_EDX
+ #define INIT_ARG_STACK_SLOT 0 // No outgoing reserved stack slots
+
+ #define REG_ARG_0 REG_ECX
+ #define REG_ARG_1 REG_EDX
+
+ SELECTANY const regNumber intArgRegs [] = {REG_ECX, REG_EDX};
+ SELECTANY const regMaskTP intArgMasks[] = {RBM_ECX, RBM_EDX};
+#if !FEATURE_STACK_FP_X87
+ SELECTANY const regNumber fltArgRegs [] = {REG_XMM0, REG_XMM1, REG_XMM2, REG_XMM3};
+ SELECTANY const regMaskTP fltArgMasks[] = {RBM_XMM0, RBM_XMM1, RBM_XMM2, RBM_XMM3};
+#endif // FEATURE_STACK_FP_X87
+
+ #define RBM_ARG_0 RBM_ECX
+ #define RBM_ARG_1 RBM_EDX
+
+ #define RBM_ARG_REGS (RBM_ARG_0|RBM_ARG_1)
+
+ // What sort of reloc do we use for [disp32] address mode
+ #define IMAGE_REL_BASED_DISP32 IMAGE_REL_BASED_HIGHLOW
+
+ // What sort of reloc to we use for 'moffset' address mode (for 'mov eax, moffset' or 'mov moffset, eax')
+ #define IMAGE_REL_BASED_MOFFSET IMAGE_REL_BASED_HIGHLOW
+
+ // Pointer-sized string move instructions
+ #define INS_movsp INS_movsd
+ #define INS_r_movsp INS_r_movsd
+ #define INS_stosp INS_stosd
+ #define INS_r_stosp INS_r_stosd
+
+#elif defined(_TARGET_AMD64_)
+ // TODO-AMD64-CQ: Fine tune the following xxBlk threshold values:
+
+ #define CPU_LOAD_STORE_ARCH 0
+ #define CPU_LONG_USES_REGPAIR 0
+ #define CPU_HAS_FP_SUPPORT 1
+ #define ROUND_FLOAT 0 // Do not round intermed float expression results
+ #define CPU_HAS_BYTE_REGS 0
+ #define CPU_USES_BLOCK_MOVE 1
+
+ #define CPBLK_MOVS_LIMIT 16 // When generating code for CpBlk, this is the buffer size
+ // threshold to stop generating rep movs and switch to the helper call.
+ // NOTE: Using rep movs is currently disabled since we found it has bad performance
+ // on pre-Ivy Bridge hardware.
+
+ #define CPBLK_UNROLL_LIMIT 64 // Upper bound to let the code generator to loop unroll CpBlk.
+ #define INITBLK_STOS_LIMIT 64 // When generating code for InitBlk, this is the buffer size
+ // NOTE: Using rep stos is currently disabled since we found it has bad performance
+ // on pre-Ivy Bridge hardware.
+ // threshold to stop generating rep movs and switch to the helper call.
+ #define INITBLK_UNROLL_LIMIT 128 // Upper bound to let the code generator to loop unroll InitBlk.
+ #define CPOBJ_NONGC_SLOTS_LIMIT 4 // For CpObj code generation, this is the the threshold of the number
+ // of contiguous non-gc slots that trigger generating rep movsq instead of
+ // sequences of movsq instructions
+
+ // The way we're currently disabling rep movs/stos is by setting a limit less than
+ // its unrolling counterparts. When lower takes the decision on which one to make it
+ // always asks for the unrolling limit first so you can say the JIT 'favors' unrolling.
+ // Setting the limit to something lower than that makes lower to never consider it.
+
+
+#ifdef FEATURE_SIMD
+ #define ALIGN_SIMD_TYPES 1 // whether SIMD type locals are to be aligned
+#if defined(UNIX_AMD64_ABI) || !defined(FEATURE_AVX_SUPPORT)
+ #define FEATURE_PARTIAL_SIMD_CALLEE_SAVE 0 // Whether SIMD registers are partially saved at calls
+#else // !UNIX_AMD64_ABI && !FEATURE_AVX_SUPPORT
+ #define FEATURE_PARTIAL_SIMD_CALLEE_SAVE 1 // Whether SIMD registers are partially saved at calls
+#endif // !UNIX_AMD64_ABI
+#endif
+ #define FEATURE_WRITE_BARRIER 1 // Generate the WriteBarrier calls for GC (currently not the x86-style register-customized barriers)
+ #define FEATURE_FIXED_OUT_ARGS 1 // Preallocate the outgoing arg area in the prolog
+ #define FEATURE_STRUCTPROMOTE 1 // JIT Optimization to promote fields of structs into registers
+ #define FEATURE_MULTIREG_STRUCT_PROMOTE 0 // True when we want to promote fields of a multireg struct into registers
+ #define FEATURE_FASTTAILCALL 1 // Tail calls made as epilog+jmp
+ #define FEATURE_TAILCALL_OPT 1 // opportunistic Tail calls (i.e. without ".tail" prefix) made as fast tail calls.
+ #define FEATURE_SET_FLAGS 0 // Set to true to force the JIT to mark the trees with GTF_SET_FLAGS when the flags need to be set
+#ifdef UNIX_AMD64_ABI
+ #define FEATURE_MULTIREG_ARGS_OR_RET 1 // Support for passing and/or returning single values in more than one register
+ #define FEATURE_MULTIREG_ARGS 1 // Support for passing a single argument in more than one register
+ #define FEATURE_MULTIREG_RET 1 // Support for returning a single value in more than one register
+ #define FEATURE_STRUCT_CLASSIFIER 1 // Uses a classifier function to determine if structs are passed/returned in more than one register
+ #define MAX_PASS_MULTIREG_BYTES 32 // Maximum size of a struct that could be passed in more than one register (Max is two SIMD16s)
+ #define MAX_RET_MULTIREG_BYTES 32 // Maximum size of a struct that could be returned in more than one register (Max is two SIMD16s)
+ #define MAX_ARG_REG_COUNT 2 // Maximum registers used to pass a single argument in multiple registers.
+ #define MAX_RET_REG_COUNT 2 // Maximum registers used to return a value.
+#else // !UNIX_AMD64_ABI
+ #define WINDOWS_AMD64_ABI // Uses the Windows ABI for AMD64
+ #define FEATURE_MULTIREG_ARGS_OR_RET 0 // Support for passing and/or returning single values in more than one register
+ #define FEATURE_MULTIREG_ARGS 0 // Support for passing a single argument in more than one register
+ #define FEATURE_MULTIREG_RET 0 // Support for returning a single value in more than one register
+ #define MAX_PASS_MULTIREG_BYTES 0 // No multireg arguments
+ #define MAX_RET_MULTIREG_BYTES 0 // No multireg return values
+ #define MAX_ARG_REG_COUNT 1 // Maximum registers used to pass a single argument (no arguments are passed using multiple registers)
+ #define MAX_RET_REG_COUNT 1 // Maximum registers used to return a value.
+#endif // !UNIX_AMD64_ABI
+
+#ifdef FEATURE_USE_ASM_GC_WRITE_BARRIERS
+ #define NOGC_WRITE_BARRIERS 0 // We DO-NOT have specialized WriteBarrier JIT Helpers that DO-NOT trash the RBM_CALLEE_TRASH registers
+#else
+ #define NOGC_WRITE_BARRIERS 0 // Do not modify this -- modify the definition above. (If we're not using ASM barriers we definitely don't have NOGC barriers).
+#endif
+ #define USER_ARGS_COME_LAST 1
+ #define EMIT_TRACK_STACK_DEPTH 1
+ #define TARGET_POINTER_SIZE 8 // equal to sizeof(void*) and the managed pointer size in bytes for this target
+ #define FEATURE_EH 1 // To aid platform bring-up, eliminate exceptional EH clauses (catch, filter, filter-handler, fault) and directly execute 'finally' clauses.
+ #define FEATURE_EH_FUNCLETS 1
+ #define FEATURE_EH_CALLFINALLY_THUNKS 1 // Generate call-to-finally code in "thunks" in the enclosing EH region, protected by "cloned finally" clauses.
+ #define FEATURE_STACK_FP_X87 0
+#ifdef UNIX_AMD64_ABI
+ #define ETW_EBP_FRAMED 1 // if 1 we cannot use EBP as a scratch register and must create EBP based frames for most methods
+#else // !UNIX_AMD64_ABI
+ #define ETW_EBP_FRAMED 0 // if 1 we cannot use EBP as a scratch register and must create EBP based frames for most methods
+#endif // !UNIX_AMD64_ABI
+ #define FEATURE_FP_REGALLOC 0 // Enabled if RegAlloc is used to enregister Floating Point LclVars
+ #define CSE_CONSTS 1 // Enable if we want to CSE constants
+
+ #define RBM_ALLFLOAT (RBM_XMM0 | RBM_XMM1 | RBM_XMM2 | RBM_XMM3 | RBM_XMM4 | RBM_XMM5 | RBM_XMM6 | RBM_XMM7 | RBM_XMM8 | RBM_XMM9 | RBM_XMM10 | RBM_XMM11 | RBM_XMM12 | RBM_XMM13 | RBM_XMM14 | RBM_XMM15)
+ #define RBM_ALLDOUBLE RBM_ALLFLOAT
+ #define REG_FP_FIRST REG_XMM0
+ #define REG_FP_LAST REG_XMM15
+ #define FIRST_FP_ARGREG REG_XMM0
+
+#ifdef UNIX_AMD64_ABI
+ #define LAST_FP_ARGREG REG_XMM7
+#else // !UNIX_AMD64_ABI
+ #define LAST_FP_ARGREG REG_XMM3
+#endif // !UNIX_AMD64_ABI
+
+ #define REGNUM_BITS 6 // number of bits in a REG_*
+ #define TINY_REGNUM_BITS 6 // number used in a tiny instrdesc (same)
+ #define REGMASK_BITS 32 // number of bits in a REGNUM_MASK
+ #define REGSIZE_BYTES 8 // number of bytes in one register
+ #define XMM_REGSIZE_BYTES 16 // XMM register size in bytes
+ #define YMM_REGSIZE_BYTES 32 // YMM register size in bytes
+
+ #define CODE_ALIGN 1 // code alignment requirement
+ #define STACK_ALIGN 16 // stack alignment requirement
+ #define STACK_ALIGN_SHIFT 3 // Shift-right amount to convert stack size in bytes to size in pointer sized words
+ #define STACK_ALIGN_SHIFT_ALL 4 // Shift-right amount to convert stack size in bytes to size in STACK_ALIGN units
+
+#if ETW_EBP_FRAMED
+ #define RBM_ETW_FRAMED_EBP RBM_NONE
+ #define RBM_ETW_FRAMED_EBP_LIST
+ #define REG_ETW_FRAMED_EBP_LIST
+ #define REG_ETW_FRAMED_EBP_COUNT 0
+#else // !ETW_EBP_FRAMED
+ #define RBM_ETW_FRAMED_EBP RBM_EBP
+ #define RBM_ETW_FRAMED_EBP_LIST RBM_EBP,
+ #define REG_ETW_FRAMED_EBP_LIST REG_EBP,
+ #define REG_ETW_FRAMED_EBP_COUNT 1
+#endif // !ETW_EBP_FRAMED
+
+#ifdef UNIX_AMD64_ABI
+ #define MIN_ARG_AREA_FOR_CALL 0 // Minimum required outgoing argument space for a call.
+
+ #define RBM_INT_CALLEE_SAVED (RBM_EBX|RBM_ETW_FRAMED_EBP|RBM_R12|RBM_R13|RBM_R14|RBM_R15)
+ #define RBM_INT_CALLEE_TRASH (RBM_EAX|RBM_RDI|RBM_RSI|RBM_EDX|RBM_ECX|RBM_R8|RBM_R9|RBM_R10|RBM_R11)
+ #define RBM_FLT_CALLEE_SAVED (0)
+ #define RBM_FLT_CALLEE_TRASH (RBM_XMM0|RBM_XMM1|RBM_XMM2|RBM_XMM3|RBM_XMM4|RBM_XMM5|RBM_XMM6|RBM_XMM7| \
+ RBM_XMM8|RBM_XMM9|RBM_XMM10|RBM_XMM11|RBM_XMM12|RBM_XMM13|RBM_XMM14|RBM_XMM15)
+#else // !UNIX_AMD64_ABI
+#define MIN_ARG_AREA_FOR_CALL (4 * REGSIZE_BYTES) // Minimum required outgoing argument space for a call.
+
+ #define RBM_INT_CALLEE_SAVED (RBM_EBX|RBM_ESI|RBM_EDI|RBM_ETW_FRAMED_EBP|RBM_R12|RBM_R13|RBM_R14|RBM_R15)
+ #define RBM_INT_CALLEE_TRASH (RBM_EAX|RBM_ECX|RBM_EDX|RBM_R8|RBM_R9|RBM_R10|RBM_R11)
+ #define RBM_FLT_CALLEE_SAVED (RBM_XMM6|RBM_XMM7|RBM_XMM8|RBM_XMM9|RBM_XMM10|RBM_XMM11|RBM_XMM12|RBM_XMM13|RBM_XMM14|RBM_XMM15)
+ #define RBM_FLT_CALLEE_TRASH (RBM_XMM0|RBM_XMM1|RBM_XMM2|RBM_XMM3|RBM_XMM4|RBM_XMM5)
+#endif // !UNIX_AMD64_ABI
+
+ #define REG_FLT_CALLEE_SAVED_FIRST REG_XMM6
+ #define REG_FLT_CALLEE_SAVED_LAST REG_XMM15
+
+ #define RBM_CALLEE_TRASH (RBM_INT_CALLEE_TRASH | RBM_FLT_CALLEE_TRASH)
+ #define RBM_CALLEE_SAVED (RBM_INT_CALLEE_SAVED | RBM_FLT_CALLEE_SAVED)
+
+ #define RBM_CALLEE_TRASH_NOGC RBM_CALLEE_TRASH
+
+ #define RBM_ALLINT (RBM_INT_CALLEE_SAVED | RBM_INT_CALLEE_TRASH)
+
+#if 0
+#define REG_VAR_ORDER REG_EAX,REG_EDX,REG_ECX,REG_ESI,REG_EDI,REG_EBX,REG_ETW_FRAMED_EBP_LIST \
+ REG_R8,REG_R9,REG_R10,REG_R11,REG_R14,REG_R15,REG_R12,REG_R13
+#else
+ // TEMPORARY ORDER TO AVOID CALLEE-SAVES
+ // TODO-CQ: Review this and set appropriately
+#ifdef UNIX_AMD64_ABI
+ #define REG_VAR_ORDER REG_EAX,REG_EDI,REG_ESI, \
+ REG_EDX,REG_ECX,REG_R8,REG_R9, \
+ REG_R10,REG_R11,REG_EBX,REG_ETW_FRAMED_EBP_LIST \
+ REG_R14,REG_R15,REG_R12,REG_R13
+#else // !UNIX_AMD64_ABI
+ #define REG_VAR_ORDER REG_EAX,REG_EDX,REG_ECX, \
+ REG_R8,REG_R9,REG_R10,REG_R11, \
+ REG_ESI,REG_EDI,REG_EBX,REG_ETW_FRAMED_EBP_LIST \
+ REG_R14,REG_R15,REG_R12,REG_R13
+#endif // !UNIX_AMD64_ABI
+#endif
+
+ #define REG_VAR_ORDER_FLT REG_XMM0,REG_XMM1,REG_XMM2,REG_XMM3,REG_XMM4,REG_XMM5,REG_XMM6,REG_XMM7,REG_XMM8,REG_XMM9,REG_XMM10,REG_XMM11,REG_XMM12,REG_XMM13,REG_XMM14,REG_XMM15
+
+#ifdef UNIX_AMD64_ABI
+ #define REG_TMP_ORDER REG_EAX,REG_EDI,REG_ESI,REG_EDX,REG_ECX,REG_EBX,REG_ETW_FRAMED_EBP_LIST \
+ REG_R8,REG_R9,REG_R10,REG_R11,REG_R14,REG_R15,REG_R12,REG_R13
+#else // !UNIX_AMD64_ABI
+ #define MAX_VAR_ORDER_SIZE (14 + REG_ETW_FRAMED_EBP_COUNT)
+ #define REG_TMP_ORDER REG_EAX,REG_EDX,REG_ECX,REG_EBX,REG_ESI,REG_EDI,REG_ETW_FRAMED_EBP_LIST \
+ REG_R8,REG_R9,REG_R10,REG_R11,REG_R14,REG_R15,REG_R12,REG_R13
+#endif // !UNIX_AMD64_ABI
+
+#ifdef UNIX_AMD64_ABI
+ #define REG_PREDICT_ORDER REG_EAX,REG_EDI,REG_ESI,REG_EDX,REG_ECX,REG_EBX,REG_ETW_FRAMED_EBP_LIST \
+ REG_R8,REG_R9,REG_R10,REG_R11,REG_R14,REG_R15,REG_R12,REG_R13
+ #define CNT_CALLEE_SAVED (5 + REG_ETW_FRAMED_EBP_COUNT)
+ #define CNT_CALLEE_TRASH (9)
+ #define CNT_CALLEE_ENREG (CNT_CALLEE_SAVED)
+
+ #define CNT_CALLEE_SAVED_FLOAT (0)
+ #define CNT_CALLEE_TRASH_FLOAT (16)
+
+ #define REG_CALLEE_SAVED_ORDER REG_EBX,REG_ETW_FRAMED_EBP_LIST REG_R12,REG_R13,REG_R14,REG_R15
+ #define RBM_CALLEE_SAVED_ORDER RBM_EBX,RBM_ETW_FRAMED_EBP_LIST RBM_R12,RBM_R13,RBM_R14,RBM_R15
+#else // !UNIX_AMD64_ABI
+ #define REG_TMP_ORDER_COUNT (14 + REG_ETW_FRAMED_EBP_COUNT)
+ #define REG_PREDICT_ORDER REG_EAX,REG_EDX,REG_ECX,REG_EBX,REG_ESI,REG_EDI,REG_ETW_FRAMED_EBP_LIST \
+ REG_R8,REG_R9,REG_R10,REG_R11,REG_R14,REG_R15,REG_R12,REG_R13
+ #define CNT_CALLEE_SAVED (7 + REG_ETW_FRAMED_EBP_COUNT)
+ #define CNT_CALLEE_TRASH (7)
+ #define CNT_CALLEE_ENREG (CNT_CALLEE_SAVED)
+
+ #define CNT_CALLEE_SAVED_FLOAT (10)
+ #define CNT_CALLEE_TRASH_FLOAT (6)
+
+ #define REG_CALLEE_SAVED_ORDER REG_EBX,REG_ESI,REG_EDI,REG_ETW_FRAMED_EBP_LIST REG_R12,REG_R13,REG_R14,REG_R15
+ #define RBM_CALLEE_SAVED_ORDER RBM_EBX,RBM_ESI,RBM_EDI,RBM_ETW_FRAMED_EBP_LIST RBM_R12,RBM_R13,RBM_R14,RBM_R15
+#endif // !UNIX_AMD64_ABI
+
+ #define CALLEE_SAVED_REG_MAXSZ (CNT_CALLEE_SAVED*REGSIZE_BYTES)
+ #define CALLEE_SAVED_FLOAT_MAXSZ (CNT_CALLEE_SAVED_FLOAT*16)
+
+ // We reuse the ESP register as a illegal value in the register predictor
+ #define RBM_ILLEGAL RBM_ESP
+ // We reuse the ESP register as a flag for last use handling in the register predictor
+ #define RBM_LASTUSE RBM_ESP
+ // We're using the encoding for ESP to indicate a half-long on the frame
+ #define REG_L_STK REG_ESP
+
+ // This is the first register in REG_TMP_ORDER
+ #define REG_TMP_0 REG_EAX
+ #define RBM_TMP_0 RBM_EAX
+
+ // This is the second register in REG_TMP_ORDER
+#ifdef UNIX_AMD64_ABI
+ #define REG_TMP_1 REG_EDI
+ #define RBM_TMP_1 RBM_EDI
+#else // !UNIX_AMD64_ABI
+ #define REG_TMP_1 REG_EDX
+ #define RBM_TMP_1 RBM_EDX
+#endif // !UNIX_AMD64_ABI
+ #define REG_PAIR_TMP REG_PAIR_EAXEDX
+ #define RBM_PAIR_TMP (RBM_EAX|RBM_EDX)
+ #define REG_PAIR_TMP_LO REG_EAX
+ #define RBM_PAIR_TMP_LO RBM_EAX
+ #define REG_PAIR_TMP_HI REG_EDX
+ #define RBM_PAIR_TMP_HI RBM_EDX
+ #define PREDICT_PAIR_TMP PREDICT_PAIR_RAXRDX
+ #define PREDICT_PAIR_TMP_LO PREDICT_REG_EAX
+
+ // register to hold shift amount
+ #define REG_SHIFT REG_ECX
+ #define RBM_SHIFT RBM_ECX
+ #define PREDICT_REG_SHIFT PREDICT_REG_ECX
+
+ // This is a general scratch register that does not conflict with the argument registers
+ #define REG_SCRATCH REG_EAX
+ #define RBM_SCRATCH RBM_EAX
+
+// Where is the exception object on entry to the handler block?
+#ifdef UNIX_AMD64_ABI
+ #define REG_EXCEPTION_OBJECT REG_ESI
+ #define RBM_EXCEPTION_OBJECT RBM_ESI
+#else // !UNIX_AMD64_ABI
+ #define REG_EXCEPTION_OBJECT REG_EDX
+ #define RBM_EXCEPTION_OBJECT RBM_EDX
+#endif // !UNIX_AMD64_ABI
+
+ #define REG_JUMP_THUNK_PARAM REG_EAX
+ #define RBM_JUMP_THUNK_PARAM RBM_EAX
+
+#if NOGC_WRITE_BARRIERS
+ #define REG_WRITE_BARRIER REG_EDX
+ #define RBM_WRITE_BARRIER RBM_EDX
+#endif
+
+ // Register to be used for emitting helper calls whose call target is an indir of an
+ // absolute memory address in case of Rel32 overflow i.e. a data address could not be
+ // encoded as PC-relative 32-bit offset.
+ //
+ // Notes:
+ // 1) that RAX is callee trash register that is not used for passing parameter and
+ // also results in smaller instruction encoding.
+ // 2) Profiler Leave callback requires the return value to be preserved
+ // in some form. We can use custom calling convention for Leave callback.
+ // For e.g return value could be preserved in rcx so that it is available for
+ // profiler.
+ #define REG_DEFAULT_HELPER_CALL_TARGET REG_RAX
+
+ // GenericPInvokeCalliHelper VASigCookie Parameter
+ #define REG_PINVOKE_COOKIE_PARAM REG_R11
+ #define RBM_PINVOKE_COOKIE_PARAM RBM_R11
+ #define PREDICT_REG_PINVOKE_COOKIE_PARAM PREDICT_REG_R11
+
+ // GenericPInvokeCalliHelper unmanaged target Parameter
+ #define REG_PINVOKE_TARGET_PARAM REG_R10
+ #define RBM_PINVOKE_TARGET_PARAM RBM_R10
+ #define PREDICT_REG_PINVOKE_TARGET_PARAM PREDICT_REG_R10
+
+ // IL stub's secret MethodDesc parameter (CORJIT_FLG_PUBLISH_SECRET_PARAM)
+ #define REG_SECRET_STUB_PARAM REG_R10
+ #define RBM_SECRET_STUB_PARAM RBM_R10
+
+ // VSD extra parameter (slot address)
+ #define REG_VIRTUAL_STUB_PARAM REG_R11
+ #define RBM_VIRTUAL_STUB_PARAM RBM_R11
+ #define PREDICT_REG_VIRTUAL_STUB_PARAM PREDICT_REG_R11
+
+ // Registers used by PInvoke frame setup
+ #define REG_PINVOKE_FRAME REG_EDI
+ #define RBM_PINVOKE_FRAME RBM_EDI
+ #define REG_PINVOKE_TCB REG_EAX
+ #define RBM_PINVOKE_TCB RBM_EAX
+ #define REG_PINVOKE_SCRATCH REG_EAX
+ #define RBM_PINVOKE_SCRATCH RBM_EAX
+
+ // The following defines are useful for iterating a regNumber
+ #define REG_FIRST REG_EAX
+ #define REG_INT_FIRST REG_EAX
+ #define REG_INT_LAST REG_R15
+ #define REG_INT_COUNT (REG_INT_LAST - REG_INT_FIRST + 1)
+ #define REG_NEXT(reg) ((regNumber)((unsigned)(reg) + 1))
+ #define REG_PREV(reg) ((regNumber)((unsigned)(reg) - 1))
+
+ // genCodeForCall() moves the target address of the tailcall into this register, before pushing it on the stack
+ #define REG_TAILCALL_ADDR REG_RDX
+
+ // Which register are int and long values returned in ?
+ #define REG_INTRET REG_EAX
+ #define RBM_INTRET RBM_EAX
+
+ #define REG_LNGRET REG_EAX
+ #define RBM_LNGRET RBM_EAX
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ #define REG_INTRET_1 REG_RDX
+ #define RBM_INTRET_1 RBM_RDX
+
+ #define REG_LNGRET_1 REG_RDX
+ #define RBM_LNGRET_1 RBM_RDX
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+
+ #define REG_FLOATRET REG_XMM0
+ #define RBM_FLOATRET RBM_XMM0
+ #define REG_DOUBLERET REG_XMM0
+ #define RBM_DOUBLERET RBM_XMM0
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+#define REG_FLOATRET_1 REG_XMM1
+#define RBM_FLOATRET_1 RBM_XMM1
+
+#define REG_DOUBLERET_1 REG_XMM1
+#define RBM_DOUBLERET_1 RBM_XMM1
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+ #define REG_FPBASE REG_EBP
+ #define RBM_FPBASE RBM_EBP
+ #define STR_FPBASE "rbp"
+ #define REG_SPBASE REG_ESP
+ #define RBM_SPBASE RBM_ESP
+ #define STR_SPBASE "rsp"
+
+ #define FIRST_ARG_STACK_OFFS (REGSIZE_BYTES) // return address
+
+#ifdef UNIX_AMD64_ABI
+ #define MAX_REG_ARG 6
+ #define MAX_FLOAT_REG_ARG 8
+ #define REG_ARG_FIRST REG_EDI
+ #define REG_ARG_LAST REG_R9
+ #define INIT_ARG_STACK_SLOT 0 // No outgoing reserved stack slots
+
+ #define REG_ARG_0 REG_EDI
+ #define REG_ARG_1 REG_ESI
+ #define REG_ARG_2 REG_EDX
+ #define REG_ARG_3 REG_ECX
+ #define REG_ARG_4 REG_R8
+ #define REG_ARG_5 REG_R9
+
+ SELECTANY const regNumber intArgRegs[] = { REG_EDI, REG_ESI, REG_EDX, REG_ECX, REG_R8, REG_R9 };
+ SELECTANY const regMaskTP intArgMasks[] = { REG_EDI, REG_ESI, REG_EDX, REG_ECX, REG_R8, REG_R9 };
+ SELECTANY const regNumber fltArgRegs[] = { REG_XMM0, REG_XMM1, REG_XMM2, REG_XMM3, REG_XMM4, REG_XMM5, REG_XMM6, REG_XMM7 };
+ SELECTANY const regMaskTP fltArgMasks[] = { REG_XMM0, REG_XMM1, REG_XMM2, REG_XMM3, REG_XMM4, REG_XMM5, REG_XMM6, REG_XMM7 };
+
+ #define RBM_ARG_0 RBM_RDI
+ #define RBM_ARG_1 RBM_RSI
+ #define RBM_ARG_2 RBM_EDX
+ #define RBM_ARG_3 RBM_ECX
+ #define RBM_ARG_4 RBM_R8
+ #define RBM_ARG_5 RBM_R9
+#else // !UNIX_AMD64_ABI
+ #define MAX_REG_ARG 4
+ #define MAX_FLOAT_REG_ARG 4
+ #define REG_ARG_FIRST REG_ECX
+ #define REG_ARG_LAST REG_R9
+ #define INIT_ARG_STACK_SLOT 4 // 4 outgoing reserved stack slots
+
+ #define REG_ARG_0 REG_ECX
+ #define REG_ARG_1 REG_EDX
+ #define REG_ARG_2 REG_R8
+ #define REG_ARG_3 REG_R9
+
+ SELECTANY const regNumber intArgRegs[] = { REG_ECX, REG_EDX, REG_R8, REG_R9 };
+ SELECTANY const regMaskTP intArgMasks[] = { RBM_ECX, RBM_EDX, RBM_R8, RBM_R9 };
+ SELECTANY const regNumber fltArgRegs[] = { REG_XMM0, REG_XMM1, REG_XMM2, REG_XMM3 };
+ SELECTANY const regMaskTP fltArgMasks[] = { RBM_XMM0, RBM_XMM1, RBM_XMM2, RBM_XMM3 };
+
+ #define RBM_ARG_0 RBM_ECX
+ #define RBM_ARG_1 RBM_EDX
+ #define RBM_ARG_2 RBM_R8
+ #define RBM_ARG_3 RBM_R9
+#endif // !UNIX_AMD64_ABI
+
+ #define REG_FLTARG_0 REG_XMM0
+ #define REG_FLTARG_1 REG_XMM1
+ #define REG_FLTARG_2 REG_XMM2
+ #define REG_FLTARG_3 REG_XMM3
+
+ #define RBM_FLTARG_0 RBM_XMM0
+ #define RBM_FLTARG_1 RBM_XMM1
+ #define RBM_FLTARG_2 RBM_XMM2
+ #define RBM_FLTARG_3 RBM_XMM3
+
+#ifdef UNIX_AMD64_ABI
+ #define REG_FLTARG_4 REG_XMM4
+ #define REG_FLTARG_5 REG_XMM5
+ #define REG_FLTARG_6 REG_XMM6
+ #define REG_FLTARG_7 REG_XMM7
+
+ #define RBM_FLTARG_4 RBM_XMM4
+ #define RBM_FLTARG_5 RBM_XMM5
+ #define RBM_FLTARG_6 RBM_XMM6
+ #define RBM_FLTARG_7 RBM_XMM7
+
+ #define RBM_ARG_REGS (RBM_ARG_0|RBM_ARG_1|RBM_ARG_2|RBM_ARG_3|RBM_ARG_4|RBM_ARG_5)
+ #define RBM_FLTARG_REGS (RBM_FLTARG_0|RBM_FLTARG_1|RBM_FLTARG_2|RBM_FLTARG_3|RBM_FLTARG_4|RBM_FLTARG_5|RBM_FLTARG_6|RBM_FLTARG_7)
+#else // !UNIX_AMD64_ABI
+ #define RBM_ARG_REGS (RBM_ARG_0|RBM_ARG_1|RBM_ARG_2|RBM_ARG_3)
+ #define RBM_FLTARG_REGS (RBM_FLTARG_0|RBM_FLTARG_1|RBM_FLTARG_2|RBM_FLTARG_3)
+#endif // !UNIX_AMD64_ABI
+
+ // The registers trashed by profiler enter/leave/tailcall hook
+ // See vm\amd64\amshelpers.asm for more details.
+ #define RBM_PROFILER_ENTER_TRASH RBM_CALLEE_TRASH
+ #define RBM_PROFILER_LEAVE_TRASH (RBM_CALLEE_TRASH & ~(RBM_FLOATRET | RBM_INTRET))
+
+ // The registers trashed by the CORINFO_HELP_STOP_FOR_GC helper.
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ // See vm\amd64\unixasmhelpers.S for more details.
+ //
+ // On Unix a struct of size >=9 and <=16 bytes in size is returned in two return registers.
+ // The return registers could be any two from the set { RAX, RDX, XMM0, XMM1 }.
+ // STOP_FOR_GC helper preserves all the 4 possible return registers.
+ #define RBM_STOP_FOR_GC_TRASH (RBM_CALLEE_TRASH & ~(RBM_FLOATRET | RBM_INTRET | RBM_FLOATRET_1 | RBM_INTRET_1))
+#else
+ // See vm\amd64\asmhelpers.asm for more details.
+ #define RBM_STOP_FOR_GC_TRASH (RBM_CALLEE_TRASH & ~(RBM_FLOATRET | RBM_INTRET))
+#endif
+
+ // The registers trashed by the CORINFO_HELP_INIT_PINVOKE_FRAME helper.
+ #define RBM_INIT_PINVOKE_FRAME_TRASH RBM_CALLEE_TRASH
+
+ // What sort of reloc do we use for [disp32] address mode
+ #define IMAGE_REL_BASED_DISP32 IMAGE_REL_BASED_REL32
+
+ // What sort of reloc to we use for 'moffset' address mode (for 'mov eax, moffset' or 'mov moffset, eax')
+ #define IMAGE_REL_BASED_MOFFSET IMAGE_REL_BASED_DIR64
+
+ // Pointer-sized string move instructions
+ #define INS_movsp INS_movsq
+ #define INS_r_movsp INS_r_movsq
+ #define INS_stosp INS_stosq
+ #define INS_r_stosp INS_r_stosq
+
+#elif defined(_TARGET_ARM_)
+
+ // TODO-ARM-CQ: Use shift for division by power of 2
+ // TODO-ARM-CQ: Check for sdiv/udiv at runtime and generate it if available
+ #define USE_HELPERS_FOR_INT_DIV 1 // BeagleBoard (ARMv7A) doesn't support SDIV/UDIV
+ #define CPU_LOAD_STORE_ARCH 1
+ #define CPU_LONG_USES_REGPAIR 1
+ #define CPU_HAS_FP_SUPPORT 1
+ #define ROUND_FLOAT 0 // Do not round intermed float expression results
+ #define CPU_HAS_BYTE_REGS 0
+ #define CPU_USES_BLOCK_MOVE 0
+ #define FEATURE_WRITE_BARRIER 1 // Generate the proper WriteBarrier calls for GC
+ #define FEATURE_FIXED_OUT_ARGS 1 // Preallocate the outgoing arg area in the prolog
+ #define FEATURE_STRUCTPROMOTE 1 // JIT Optimization to promote fields of structs into registers
+ #define FEATURE_MULTIREG_STRUCT_PROMOTE 0 // True when we want to promote fields of a multireg struct into registers
+ #define FEATURE_FASTTAILCALL 0 // Tail calls made as epilog+jmp
+ #define FEATURE_TAILCALL_OPT 0 // opportunistic Tail calls (i.e. without ".tail" prefix) made as fast tail calls.
+ #define FEATURE_SET_FLAGS 1 // Set to true to force the JIT to mark the trees with GTF_SET_FLAGS when the flags need to be set
+ #define FEATURE_MULTIREG_ARGS_OR_RET 1 // Support for passing and/or returning single values in more than one register (including HFA support)
+ #define FEATURE_MULTIREG_ARGS 1 // Support for passing a single argument in more than one register (including passing HFAs)
+ #define FEATURE_MULTIREG_RET 1 // Support for returning a single value in more than one register (including HFA returns)
+ #define FEATURE_STRUCT_CLASSIFIER 0 // Uses a classifier function to determine is structs are passed/returned in more than one register
+ #define MAX_PASS_MULTIREG_BYTES 32 // Maximum size of a struct that could be passed in more than one register (Max is an HFA of 4 doubles)
+ #define MAX_RET_MULTIREG_BYTES 32 // Maximum size of a struct that could be returned in more than one register (Max is an HFA of 4 doubles)
+ #define MAX_ARG_REG_COUNT 4 // Maximum registers used to pass a single argument in multiple registers. (max is 4 floats or doubles using an HFA)
+ #define MAX_RET_REG_COUNT 4 // Maximum registers used to return a value.
+
+#ifdef FEATURE_USE_ASM_GC_WRITE_BARRIERS
+ #define NOGC_WRITE_BARRIERS 0 // We DO-NOT have specialized WriteBarrier JIT Helpers that DO-NOT trash the RBM_CALLEE_TRASH registers
+#else
+ #define NOGC_WRITE_BARRIERS 0 // Do not modify this -- modify the definition above. (If we're not using ASM barriers we definitely don't have NOGC barriers).
+#endif
+ #define USER_ARGS_COME_LAST 1
+ #define EMIT_TRACK_STACK_DEPTH 1 // This is something of a workaround. For both ARM and AMD64, the frame size is fixed, so we don't really
+ // need to track stack depth, but this is currently necessary to get GC information reported at call sites.
+ #define TARGET_POINTER_SIZE 4 // equal to sizeof(void*) and the managed pointer size in bytes for this target
+ #define FEATURE_EH 1 // To aid platform bring-up, eliminate exceptional EH clauses (catch, filter, filter-handler, fault) and directly execute 'finally' clauses.
+ #define FEATURE_EH_FUNCLETS 1
+ #define FEATURE_EH_CALLFINALLY_THUNKS 0 // Generate call-to-finally code in "thunks" in the enclosing EH region, protected by "cloned finally" clauses.
+ #define FEATURE_STACK_FP_X87 0
+ #define ETW_EBP_FRAMED 1 // if 1 we cannot use REG_FP as a scratch register and must setup the frame pointer for most methods
+ #define FEATURE_FP_REGALLOC 1 // Enabled if RegAlloc is used to enregister Floating Point LclVars
+ #define CSE_CONSTS 1 // Enable if we want to CSE constants
+
+ #define REG_FP_FIRST REG_F0
+ #define REG_FP_LAST REG_F31
+ #define FIRST_FP_ARGREG REG_F0
+ #define LAST_FP_ARGREG REG_F15
+
+ #define REGNUM_BITS 6 // number of bits in a REG_*
+ #define TINY_REGNUM_BITS 4 // number of bits we will use for a tiny instr desc (may not use float)
+ #define REGMASK_BITS 64 // number of bits in a REGNUM_MASK
+ #define REGSIZE_BYTES 4 // number of bytes in one register
+ #define MIN_ARG_AREA_FOR_CALL 0 // Minimum required outgoing argument space for a call.
+
+ #define CODE_ALIGN 2 // code alignment requirement
+ #define STACK_ALIGN 8 // stack alignment requirement
+ #define STACK_ALIGN_SHIFT 2 // Shift-right amount to convert stack size in bytes to size in DWORD_PTRs
+
+ #define RBM_INT_CALLEE_SAVED (RBM_R4|RBM_R5|RBM_R6|RBM_R7|RBM_R8|RBM_R9|RBM_R10)
+ #define RBM_INT_CALLEE_TRASH (RBM_R0|RBM_R1|RBM_R2|RBM_R3|RBM_R12|RBM_LR)
+ #define RBM_FLT_CALLEE_SAVED (RBM_F16|RBM_F17|RBM_F18|RBM_F19|RBM_F20|RBM_F21|RBM_F22|RBM_F23|RBM_F24|RBM_F25|RBM_F26|RBM_F27|RBM_F28|RBM_F29|RBM_F30|RBM_F31)
+ #define RBM_FLT_CALLEE_TRASH (RBM_F0|RBM_F1|RBM_F2|RBM_F3|RBM_F4|RBM_F5|RBM_F6|RBM_F7|RBM_F8|RBM_F9|RBM_F10|RBM_F11|RBM_F12|RBM_F13|RBM_F14|RBM_F15)
+
+ #define RBM_CALLEE_SAVED (RBM_INT_CALLEE_SAVED | RBM_FLT_CALLEE_SAVED)
+ #define RBM_CALLEE_TRASH (RBM_INT_CALLEE_TRASH | RBM_FLT_CALLEE_TRASH)
+ #define RBM_CALLEE_TRASH_NOGC (RBM_R2|RBM_R3|RBM_LR)
+
+ #define RBM_ALLINT (RBM_INT_CALLEE_SAVED | RBM_INT_CALLEE_TRASH)
+ #define RBM_ALLFLOAT (RBM_FLT_CALLEE_SAVED | RBM_FLT_CALLEE_TRASH)
+ #define RBM_ALLDOUBLE (RBM_F0|RBM_F2|RBM_F4|RBM_F6|RBM_F8|RBM_F10|RBM_F12|RBM_F14|RBM_F16|RBM_F18|RBM_F20|RBM_F22|RBM_F24|RBM_F26|RBM_F28|RBM_F30)
+
+ #define REG_VAR_ORDER REG_R3,REG_R2,REG_R1,REG_R0,REG_R4,REG_LR,REG_R12,\
+ REG_R5,REG_R6,REG_R7,REG_R8,REG_R9,REG_R10
+
+ #define REG_VAR_ORDER_FLT REG_F8, REG_F9, REG_F10, REG_F11, \
+ REG_F12, REG_F13, REG_F14, REG_F15, \
+ REG_F6, REG_F7, REG_F4, REG_F5, \
+ REG_F2, REG_F3, REG_F0, REG_F1, \
+ REG_F16, REG_F17, REG_F18, REG_F19, \
+ REG_F20, REG_F21, REG_F22, REG_F23, \
+ REG_F24, REG_F25, REG_F26, REG_F27, \
+ REG_F28, REG_F29, REG_F30, REG_F31,
+
+ #define MAX_VAR_ORDER_SIZE 32
+
+ #define REG_TMP_ORDER REG_R3,REG_R2,REG_R1,REG_R0, REG_R4,REG_R5,REG_R6,REG_R7,\
+ REG_LR,REG_R12, REG_R8,REG_R9,REG_R10
+ #define REG_TMP_ORDER_COUNT 13
+
+ #define REG_FLT_TMP_ORDER REG_F14, REG_F15, REG_F12, REG_F13, \
+ REG_F10, REG_F11, REG_F8, REG_F9, \
+ REG_F6, REG_F7, REG_F4, REG_F5, \
+ REG_F2, REG_F3, REG_F0, REG_F1, \
+ REG_F16, REG_F17, REG_F18, REG_F19, \
+ REG_F20, REG_F21, REG_F22, REG_F23, \
+ REG_F24, REG_F25, REG_F26, REG_F27, \
+ REG_F28, REG_F29, REG_F30, REG_F31,
+
+ #define REG_FLT_TMP_ORDER_COUNT 32
+
+ #define REG_PREDICT_ORDER REG_LR,REG_R12,REG_R3,REG_R2,REG_R1,REG_R0, \
+ REG_R7,REG_R6,REG_R5,REG_R4,REG_R8,REG_R9,REG_R10
+
+ #define RBM_LOW_REGS (RBM_R0|RBM_R1|RBM_R2|RBM_R3|RBM_R4|RBM_R5|RBM_R6|RBM_R7)
+ #define RBM_HIGH_REGS (RBM_R8|RBM_R9|RBM_R10|RBM_R11|RBM_R12|RBM_SP|RBM_LR|RBM_PC)
+
+ #define REG_CALLEE_SAVED_ORDER REG_R4,REG_R5,REG_R6,REG_R7,REG_R8,REG_R9,REG_R10,REG_R11
+ #define RBM_CALLEE_SAVED_ORDER RBM_R4,RBM_R5,RBM_R6,RBM_R7,RBM_R8,RBM_R9,RBM_R10,RBM_R11
+
+ #define CNT_CALLEE_SAVED (8)
+ #define CNT_CALLEE_TRASH (6)
+ #define CNT_CALLEE_ENREG (CNT_CALLEE_SAVED-1)
+
+ #define CNT_CALLEE_SAVED_FLOAT (16)
+ #define CNT_CALLEE_TRASH_FLOAT (16)
+
+ #define CALLEE_SAVED_REG_MAXSZ (CNT_CALLEE_SAVED*REGSIZE_BYTES)
+ #define CALLEE_SAVED_FLOAT_MAXSZ (CNT_CALLEE_SAVED_FLOAT*sizeof(float))
+
+ // We reuse the ESP register as a illegal value in the register predictor
+ #define RBM_ILLEGAL RBM_SP
+ // We reuse the ESP register as a flag for last use handling in the register predictor
+ #define RBM_LASTUSE RBM_SP
+ // We're using the encoding for ESP to indicate a half-long on the frame
+ #define REG_L_STK REG_SP
+
+ // This is the first register in REG_TMP_ORDER
+ #define REG_TMP_0 REG_R3
+ #define RBM_TMP_0 RBM_R3
+
+ // This is the second register in REG_TMP_ORDER
+ #define REG_TMP_1 REG_R2
+ #define RBM_TMP_1 RBM_R2
+
+ // This is the first register pair in REG_TMP_ORDER
+ #define REG_PAIR_TMP REG_PAIR_R2R3
+ #define REG_PAIR_TMP_REVERSE REG_PAIR_R3R2
+ #define RBM_PAIR_TMP (RBM_R2|RBM_R3)
+ #define REG_PAIR_TMP_LO REG_R2
+ #define RBM_PAIR_TMP_LO RBM_R2
+ #define REG_PAIR_TMP_HI REG_R3
+ #define RBM_PAIR_TMP_HI RBM_R3
+ #define PREDICT_PAIR_TMP PREDICT_PAIR_R2R3
+ #define PREDICT_PAIR_TMP_LO PREDICT_REG_R2
+
+ // Used when calling the 64-bit Variable shift helper
+ #define REG_LNGARG_0 REG_PAIR_R0R1
+ #define RBM_LNGARG_0 (RBM_R0|RBM_R1)
+ #define PREDICT_PAIR_LNGARG_0 PREDICT_PAIR_R0R1
+
+ // register to hold shift amount; no special register is required on the ARM
+ #define REG_SHIFT REG_NA
+ #define RBM_SHIFT RBM_ALLINT
+ #define PREDICT_REG_SHIFT PREDICT_REG
+
+ // register to hold shift amount when shifting 64-bit values (this uses a helper call)
+ #define REG_SHIFT_LNG REG_R2 // REG_ARG_2
+ #define RBM_SHIFT_LNG RBM_R2 // RBM_ARG_2
+ #define PREDICT_REG_SHIFT_LNG PREDICT_REG_R2
+
+
+ // This is a general scratch register that does not conflict with the argument registers
+ #define REG_SCRATCH REG_LR
+ #define RBM_SCRATCH RBM_LR
+
+ // This is a general register that can be optionally reserved for other purposes during codegen
+ #define REG_OPT_RSVD REG_R10
+ #define RBM_OPT_RSVD RBM_R10
+
+ // We reserve R9 to store SP on entry for stack unwinding when localloc is used
+ #define REG_SAVED_LOCALLOC_SP REG_R9
+ #define RBM_SAVED_LOCALLOC_SP RBM_R9
+
+ // Where is the exception object on entry to the handler block?
+ #define REG_EXCEPTION_OBJECT REG_R0
+ #define RBM_EXCEPTION_OBJECT RBM_R0
+
+ #define REG_JUMP_THUNK_PARAM REG_R12
+ #define RBM_JUMP_THUNK_PARAM RBM_R12
+
+#if NOGC_WRITE_BARRIERS
+ #define REG_WRITE_BARRIER REG_R1
+ #define RBM_WRITE_BARRIER RBM_R1
+#endif
+
+ // GenericPInvokeCalliHelper VASigCookie Parameter
+ #define REG_PINVOKE_COOKIE_PARAM REG_R4
+ #define RBM_PINVOKE_COOKIE_PARAM RBM_R4
+ #define PREDICT_REG_PINVOKE_COOKIE_PARAM PREDICT_REG_R4
+
+ // GenericPInvokeCalliHelper unmanaged target Parameter
+ #define REG_PINVOKE_TARGET_PARAM REG_R12
+ #define RBM_PINVOKE_TARGET_PARAM RBM_R12
+ #define PREDICT_REG_PINVOKE_TARGET_PARAM PREDICT_REG_R12
+
+ // IL stub's secret MethodDesc parameter (CORJIT_FLG_PUBLISH_SECRET_PARAM)
+ #define REG_SECRET_STUB_PARAM REG_R12
+ #define RBM_SECRET_STUB_PARAM RBM_R12
+
+ // VSD extra parameter (slot address)
+ #define REG_VIRTUAL_STUB_PARAM REG_R4
+ #define RBM_VIRTUAL_STUB_PARAM RBM_R4
+ #define PREDICT_REG_VIRTUAL_STUB_PARAM PREDICT_REG_R4
+
+ // Registers used by PInvoke frame setup
+ #define REG_PINVOKE_FRAME REG_R4
+ #define RBM_PINVOKE_FRAME RBM_R4
+ #define REG_PINVOKE_TCB REG_R5
+ #define RBM_PINVOKE_TCB RBM_R5
+ #define REG_PINVOKE_SCRATCH REG_R6
+ #define RBM_PINVOKE_SCRATCH RBM_R6
+
+#ifdef LEGACY_BACKEND
+ #define REG_SPILL_CHOICE REG_LR
+ #define RBM_SPILL_CHOICE RBM_LR
+ #define REG_SPILL_CHOICE_FLT REG_F14
+ #define RBM_SPILL_CHOICE_FLT (RBM_F14|RBM_F15)
+#endif // LEGACY_BACKEND
+
+ // The following defines are useful for iterating a regNumber
+ #define REG_FIRST REG_R0
+ #define REG_INT_FIRST REG_R0
+ #define REG_INT_LAST REG_LR
+ #define REG_INT_COUNT (REG_INT_LAST - REG_INT_FIRST + 1)
+ #define REG_NEXT(reg) ((regNumber)((unsigned)(reg) + 1))
+ #define REG_PREV(reg) ((regNumber)((unsigned)(reg) - 1))
+
+ // genCodeForCall() moves the target address of the tailcall into this register, before pushing it on the stack
+ #define REG_TAILCALL_ADDR REG_R1
+
+ // The following registers are used in emitting Enter/Leave/Tailcall profiler callbacks
+ #define REG_PROFILER_ENTER_ARG REG_R0
+ #define RBM_PROFILER_ENTER_ARG RBM_R0
+ #define REG_PROFILER_RET_SCRATCH REG_R2
+ #define RBM_PROFILER_RET_SCRATCH RBM_R2
+ #define RBM_PROFILER_RET_USED (RBM_R0 | RBM_R1 | RBM_R2)
+ #define REG_PROFILER_JMP_ARG REG_R0
+ #define RBM_PROFILER_JMP_USED RBM_R0
+ #define RBM_PROFILER_TAIL_USED (RBM_R0 | RBM_R12 | RBM_LR)
+
+
+ // Which register are int and long values returned in ?
+ #define REG_INTRET REG_R0
+ #define RBM_INTRET RBM_R0
+ #define REG_LNGRET REG_PAIR_R0R1
+ #define RBM_LNGRET (RBM_R1|RBM_R0)
+
+ #define REG_FLOATRET REG_F0
+ #define RBM_FLOATRET RBM_F0
+ #define RBM_DOUBLERET (RBM_F0|RBM_F1)
+
+ // The registers trashed by the CORINFO_HELP_STOP_FOR_GC helper
+ // See vm\arm\amshelpers.asm for more details.
+ #define RBM_STOP_FOR_GC_TRASH (RBM_CALLEE_TRASH & ~(RBM_FLOATRET | RBM_INTRET))
+
+ // The registers trashed by the CORINFO_HELP_INIT_PINVOKE_FRAME helper.
+ #define RBM_INIT_PINVOKE_FRAME_TRASH RBM_CALLEE_TRASH
+
+ #define REG_FPBASE REG_R11
+ #define RBM_FPBASE RBM_R11
+ #define STR_FPBASE "r11"
+ #define REG_SPBASE REG_SP
+ #define RBM_SPBASE RBM_SP
+ #define STR_SPBASE "sp"
+
+ #define FIRST_ARG_STACK_OFFS (2*REGSIZE_BYTES) // Caller's saved FP and return address
+
+ #define MAX_REG_ARG 4
+ #define MAX_FLOAT_REG_ARG 16
+ #define MAX_HFA_RET_SLOTS 8
+
+ #define REG_ARG_FIRST REG_R0
+ #define REG_ARG_LAST REG_R3
+ #define INIT_ARG_STACK_SLOT 0 // No outgoing reserved stack slots
+
+ #define REG_ARG_0 REG_R0
+ #define REG_ARG_1 REG_R1
+ #define REG_ARG_2 REG_R2
+ #define REG_ARG_3 REG_R3
+
+ SELECTANY const regNumber intArgRegs [] = {REG_R0, REG_R1, REG_R2, REG_R3};
+ SELECTANY const regMaskTP intArgMasks[] = {RBM_R0, RBM_R1, RBM_R2, RBM_R3};
+
+ #define RBM_ARG_0 RBM_R0
+ #define RBM_ARG_1 RBM_R1
+ #define RBM_ARG_2 RBM_R2
+ #define RBM_ARG_3 RBM_R3
+
+ #define RBM_ARG_REGS (RBM_ARG_0|RBM_ARG_1|RBM_ARG_2|RBM_ARG_3)
+ #define RBM_FLTARG_REGS (RBM_F0|RBM_F1|RBM_F2|RBM_F3|RBM_F4|RBM_F5|RBM_F6|RBM_F7|RBM_F8|RBM_F9|RBM_F10|RBM_F11|RBM_F12|RBM_F13|RBM_F14|RBM_F15)
+ #define RBM_DBL_REGS RBM_ALLDOUBLE
+
+ SELECTANY const regNumber fltArgRegs [] = {REG_F0, REG_F1, REG_F2, REG_F3, REG_F4, REG_F5, REG_F6, REG_F7, REG_F8, REG_F9, REG_F10, REG_F11, REG_F12, REG_F13, REG_F14, REG_F15 };
+ SELECTANY const regMaskTP fltArgMasks[] = {RBM_F0, RBM_F1, RBM_F2, RBM_F3, RBM_F4, RBM_F5, RBM_F6, RBM_F7, RBM_F8, RBM_F9, RBM_F10, RBM_F11, RBM_F12, RBM_F13, RBM_F14, RBM_F15 };
+
+ #define LBL_DIST_SMALL_MAX_NEG (0)
+ #define LBL_DIST_SMALL_MAX_POS (+1020)
+ #define LBL_DIST_MED_MAX_NEG (-4095)
+ #define LBL_DIST_MED_MAX_POS (+4096)
+
+ #define JMP_DIST_SMALL_MAX_NEG (-2048)
+ #define JMP_DIST_SMALL_MAX_POS (+2046)
+
+ #define JCC_DIST_SMALL_MAX_NEG (-256)
+ #define JCC_DIST_SMALL_MAX_POS (+254)
+
+ #define JCC_DIST_MEDIUM_MAX_NEG (-1048576)
+ #define JCC_DIST_MEDIUM_MAX_POS (+1048574)
+
+ #define LBL_SIZE_SMALL (2)
+
+ #define JMP_SIZE_SMALL (2)
+ #define JMP_SIZE_LARGE (4)
+
+ #define JCC_SIZE_SMALL (2)
+ #define JCC_SIZE_MEDIUM (4)
+ #define JCC_SIZE_LARGE (6)
+
+#elif defined(_TARGET_ARM64_)
+
+ #define CPU_LOAD_STORE_ARCH 1
+ #define CPU_LONG_USES_REGPAIR 0
+ #define CPU_HAS_FP_SUPPORT 1
+ #define ROUND_FLOAT 0 // Do not round intermed float expression results
+ #define CPU_HAS_BYTE_REGS 0
+ #define CPU_USES_BLOCK_MOVE 0
+
+ #define CPBLK_UNROLL_LIMIT 64 // Upper bound to let the code generator to loop unroll CpBlk.
+ #define INITBLK_UNROLL_LIMIT 64 // Upper bound to let the code generator to loop unroll InitBlk.
+
+ #define FEATURE_WRITE_BARRIER 1 // Generate the proper WriteBarrier calls for GC
+ #define FEATURE_FIXED_OUT_ARGS 1 // Preallocate the outgoing arg area in the prolog
+ #define FEATURE_STRUCTPROMOTE 1 // JIT Optimization to promote fields of structs into registers
+ #define FEATURE_MULTIREG_STRUCT_PROMOTE 1 // True when we want to promote fields of a multireg struct into registers
+ #define FEATURE_FASTTAILCALL 1 // Tail calls made as epilog+jmp
+ #define FEATURE_TAILCALL_OPT 0 // opportunistic Tail calls (i.e. without ".tail" prefix) made as fast tail calls.
+ #define FEATURE_SET_FLAGS 1 // Set to true to force the JIT to mark the trees with GTF_SET_FLAGS when the flags need to be set
+ #define FEATURE_MULTIREG_ARGS_OR_RET 1 // Support for passing and/or returning single values in more than one register
+ #define FEATURE_MULTIREG_ARGS 1 // Support for passing a single argument in more than one register
+ #define FEATURE_MULTIREG_RET 1 // Support for returning a single value in more than one register
+ #define FEATURE_STRUCT_CLASSIFIER 0 // Uses a classifier function to determine is structs are passed/returned in more than one register
+ #define MAX_PASS_MULTIREG_BYTES 32 // Maximum size of a struct that could be passed in more than one register (max is 4 doubles using an HFA)
+ #define MAX_RET_MULTIREG_BYTES 32 // Maximum size of a struct that could be returned in more than one register (Max is an HFA of 4 doubles)
+ #define MAX_ARG_REG_COUNT 4 // Maximum registers used to pass a single argument in multiple registers. (max is 4 floats or doubles using an HFA)
+ #define MAX_RET_REG_COUNT 4 // Maximum registers used to return a value.
+
+#ifdef FEATURE_USE_ASM_GC_WRITE_BARRIERS
+ #define NOGC_WRITE_BARRIERS 1 // We have specialized WriteBarrier JIT Helpers that DO-NOT trash the RBM_CALLEE_TRASH registers
+#else
+ #define NOGC_WRITE_BARRIERS 0 // Do not modify this -- modify the definition above. (If we're not using ASM barriers we definitely don't have NOGC barriers).
+#endif
+ #define USER_ARGS_COME_LAST 1
+ #define EMIT_TRACK_STACK_DEPTH 1 // This is something of a workaround. For both ARM and AMD64, the frame size is fixed, so we don't really
+ // need to track stack depth, but this is currently necessary to get GC information reported at call sites.
+ #define TARGET_POINTER_SIZE 8 // equal to sizeof(void*) and the managed pointer size in bytes for this target
+ #define FEATURE_EH 1 // To aid platform bring-up, eliminate exceptional EH clauses (catch, filter, filter-handler, fault) and directly execute 'finally' clauses.
+ #define FEATURE_EH_FUNCLETS 1
+ #define FEATURE_EH_CALLFINALLY_THUNKS 1 // Generate call-to-finally code in "thunks" in the enclosing EH region, protected by "cloned finally" clauses.
+ #define FEATURE_STACK_FP_X87 0
+ #define ETW_EBP_FRAMED 1 // if 1 we cannot use REG_FP as a scratch register and must setup the frame pointer for most methods
+ #define FEATURE_FP_REGALLOC 0 // Enabled if RegAlloc is used to enregister Floating Point LclVars
+ #define CSE_CONSTS 1 // Enable if we want to CSE constants
+
+ #define REG_FP_FIRST REG_V0
+ #define REG_FP_LAST REG_V31
+ #define FIRST_FP_ARGREG REG_V0
+ #define LAST_FP_ARGREG REG_V15
+
+ #define REGNUM_BITS 6 // number of bits in a REG_*
+ #define TINY_REGNUM_BITS 5 // number of bits we will use for a tiny instr desc (may not use float)
+ #define REGMASK_BITS 64 // number of bits in a REGNUM_MASK
+ #define REGSIZE_BYTES 8 // number of bytes in one general purpose register
+ #define FP_REGSIZE_BYTES 16 // number of bytes in one FP/SIMD register
+ #define FPSAVE_REGSIZE_BYTES 8 // number of bytes in one FP/SIMD register that are saved/restored, for callee-saved registers
+
+ #define MIN_ARG_AREA_FOR_CALL 0 // Minimum required outgoing argument space for a call.
+
+ #define CODE_ALIGN 4 // code alignment requirement
+ #define STACK_ALIGN 16 // stack alignment requirement
+ #define STACK_ALIGN_SHIFT 3 // Shift-right amount to convert stack size in bytes to size in DWORD_PTRs
+
+ #define RBM_INT_CALLEE_SAVED (RBM_R19|RBM_R20|RBM_R21|RBM_R22|RBM_R23|RBM_R24|RBM_R25|RBM_R26|RBM_R27|RBM_R28)
+ #define RBM_INT_CALLEE_TRASH (RBM_R0|RBM_R1|RBM_R2|RBM_R3|RBM_R4|RBM_R5|RBM_R6|RBM_R7|RBM_R8|RBM_R9|RBM_R10|RBM_R11|RBM_R12|RBM_R13|RBM_R14|RBM_R15|RBM_IP0|RBM_IP1|RBM_LR)
+ #define RBM_FLT_CALLEE_SAVED (RBM_V8|RBM_V9|RBM_V10|RBM_V11|RBM_V12|RBM_V13|RBM_V14|RBM_V15)
+ #define RBM_FLT_CALLEE_TRASH (RBM_V0|RBM_V1|RBM_V2|RBM_V3|RBM_V4|RBM_V5|RBM_V6|RBM_V7|RBM_V16|RBM_V17|RBM_V18|RBM_V19|RBM_V20|RBM_V21|RBM_V22|RBM_V23|RBM_V24|RBM_V25|RBM_V26|RBM_V27|RBM_V28|RBM_V29|RBM_V30|RBM_V31)
+
+ #define RBM_CALLEE_SAVED (RBM_INT_CALLEE_SAVED | RBM_FLT_CALLEE_SAVED)
+ #define RBM_CALLEE_TRASH (RBM_INT_CALLEE_TRASH | RBM_FLT_CALLEE_TRASH)
+ #define RBM_CALLEE_TRASH_NOGC (RBM_R12|RBM_R13|RBM_R14|RBM_R15)
+ #define REG_DEFAULT_HELPER_CALL_TARGET REG_R12
+
+ #define RBM_ALLINT (RBM_INT_CALLEE_SAVED | RBM_INT_CALLEE_TRASH)
+ #define RBM_ALLFLOAT (RBM_FLT_CALLEE_SAVED | RBM_FLT_CALLEE_TRASH)
+ #define RBM_ALLDOUBLE RBM_ALLFLOAT
+
+ #define REG_VAR_ORDER REG_R9,REG_R10,REG_R11,REG_R12,REG_R13,REG_R14,REG_R15,\
+ REG_R8,REG_R7,REG_R6,REG_R5,REG_R4,REG_R3,REG_R2,REG_R1,REG_R0,\
+ REG_R19,REG_R20,REG_R21,REG_R22,REG_R23,REG_R24,REG_R25,REG_R26,REG_R27,REG_R28,\
+
+ #define REG_VAR_ORDER_FLT REG_V16, REG_V17, REG_V18, REG_V19, \
+ REG_V20, REG_V21, REG_V22, REG_V23, \
+ REG_V24, REG_V25, REG_V26, REG_V27, \
+ REG_V28, REG_V29, REG_V30, REG_V31, \
+ REG_V7, REG_V6, REG_V5, REG_V4, \
+ REG_V8, REG_V9, REG_V10, REG_V11, \
+ REG_V12, REG_V13, REG_V14, REG_V16, \
+ REG_V3, REG_V2, REG_V1, REG_V0
+
+ #define REG_CALLEE_SAVED_ORDER REG_R19,REG_R20,REG_R21,REG_R22,REG_R23,REG_R24,REG_R25,REG_R26,REG_R27,REG_R28
+ #define RBM_CALLEE_SAVED_ORDER RBM_R19,RBM_R20,RBM_R21,RBM_R22,RBM_R23,RBM_R24,RBM_R25,RBM_R26,RBM_R27,RBM_R28
+
+ #define CNT_CALLEE_SAVED (11)
+ #define CNT_CALLEE_TRASH (17)
+ #define CNT_CALLEE_ENREG (CNT_CALLEE_SAVED-1)
+
+ #define CNT_CALLEE_SAVED_FLOAT (8)
+ #define CNT_CALLEE_TRASH_FLOAT (24)
+
+ #define CALLEE_SAVED_REG_MAXSZ (CNT_CALLEE_SAVED * REGSIZE_BYTES)
+ #define CALLEE_SAVED_FLOAT_MAXSZ (CNT_CALLEE_SAVED_FLOAT * FPSAVE_REGSIZE_BYTES)
+
+ // TODO-ARM64-Cleanup: Remove this
+ #define REG_L_STK REG_ZR
+
+ // This is the first register in REG_TMP_ORDER
+ #define REG_TMP_0 REG_R9
+ #define RBM_TMP_0 RBM_R9
+
+ // This is the second register in REG_TMP_ORDER
+ #define REG_TMP_1 REG_R10
+ #define RBM_TMP_1 RBM_R10
+
+ // register to hold shift amount; no special register is required on ARM64.
+ #define REG_SHIFT REG_NA
+ #define RBM_SHIFT RBM_ALLINT
+ #define PREDICT_REG_SHIFT PREDICT_REG
+
+ // This is a general scratch register that does not conflict with the argument registers
+ #define REG_SCRATCH REG_R9
+ #define RBM_SCRATCH RBM_R9
+
+ // This is a general register that can be optionally reserved for other purposes during codegen
+ #define REG_OPT_RSVD REG_IP1
+ #define RBM_OPT_RSVD RBM_IP1
+
+ // Where is the exception object on entry to the handler block?
+ #define REG_EXCEPTION_OBJECT REG_R0
+ #define RBM_EXCEPTION_OBJECT RBM_R0
+
+ #define REG_JUMP_THUNK_PARAM REG_R12
+ #define RBM_JUMP_THUNK_PARAM RBM_R12
+
+#if NOGC_WRITE_BARRIERS
+ #define REG_WRITE_BARRIER_SRC_BYREF REG_R13
+ #define RBM_WRITE_BARRIER_SRC_BYREF RBM_R13
+
+ #define REG_WRITE_BARRIER_DST_BYREF REG_R14
+ #define RBM_WRITE_BARRIER_DST_BYREF RBM_R14
+
+ #define REG_WRITE_BARRIER REG_R15
+ #define RBM_WRITE_BARRIER RBM_R15
+#endif
+
+ // GenericPInvokeCalliHelper VASigCookie Parameter
+ #define REG_PINVOKE_COOKIE_PARAM REG_R15
+ #define RBM_PINVOKE_COOKIE_PARAM RBM_R15
+ #define PREDICT_REG_PINVOKE_COOKIE_PARAM PREDICT_REG_R15
+
+ // GenericPInvokeCalliHelper unmanaged target Parameter
+ #define REG_PINVOKE_TARGET_PARAM REG_R14
+ #define RBM_PINVOKE_TARGET_PARAM RBM_R14
+ #define PREDICT_REG_PINVOKE_TARGET_PARAM PREDICT_REG_R14
+
+ // IL stub's secret MethodDesc parameter (CORJIT_FLG_PUBLISH_SECRET_PARAM)
+ #define REG_SECRET_STUB_PARAM REG_R12
+ #define RBM_SECRET_STUB_PARAM RBM_R12
+
+ // VSD extra parameter (slot address)
+ #define REG_VIRTUAL_STUB_PARAM REG_R11
+ #define RBM_VIRTUAL_STUB_PARAM RBM_R11
+ #define PREDICT_REG_VIRTUAL_STUB_PARAM PREDICT_REG_R11
+
+ // R2R indirect call. Use the same registers as VSD
+ #define REG_R2R_INDIRECT_PARAM REG_R11
+ #define RBM_R2R_INDIRECT_PARAM RBM_R11
+ #define PREDICT_REG_RER_INDIRECT_PARAM PREDICT_REG_R11
+
+ // Registers used by PInvoke frame setup
+ #define REG_PINVOKE_FRAME REG_R9
+ #define RBM_PINVOKE_FRAME RBM_R9
+ #define REG_PINVOKE_TCB REG_R10
+ #define RBM_PINVOKE_TCB RBM_R10
+ #define REG_PINVOKE_SCRATCH REG_R10
+ #define RBM_PINVOKE_SCRATCH RBM_R10
+
+ // The following defines are useful for iterating a regNumber
+ #define REG_FIRST REG_R0
+ #define REG_INT_FIRST REG_R0
+ #define REG_INT_LAST REG_ZR
+ #define REG_INT_COUNT (REG_INT_LAST - REG_INT_FIRST + 1)
+ #define REG_NEXT(reg) ((regNumber)((unsigned)(reg) + 1))
+ #define REG_PREV(reg) ((regNumber)((unsigned)(reg) - 1))
+
+ // genCodeForCall() moves the target address of the tailcall into this register, before pushing it on the stack
+ #define REG_TAILCALL_ADDR REG_R9
+
+ // The following registers are used in emitting Enter/Leave/Tailcall profiler callbacks
+ #define REG_PROFILER_ENTER_ARG REG_R0
+ #define RBM_PROFILER_ENTER_ARG RBM_R0
+ #define REG_PROFILER_RET_SCRATCH REG_R2
+ #define RBM_PROFILER_RET_SCRATCH RBM_R2
+ #define RBM_PROFILER_RET_USED (RBM_R0 | RBM_R1 | RBM_R2)
+ #define REG_PROFILER_JMP_ARG REG_R0
+ #define RBM_PROFILER_JMP_USED RBM_R0
+ #define RBM_PROFILER_TAIL_USED (RBM_R0 | RBM_R12 | RBM_LR)
+
+
+ // Which register are int and long values returned in ?
+ #define REG_INTRET REG_R0
+ #define RBM_INTRET RBM_R0
+ #define REG_LNGRET REG_R0
+ #define RBM_LNGRET RBM_R0
+ // second return register for 16-byte structs
+ #define REG_INTRET_1 REG_R1
+ #define RBM_INTRET_1 RBM_R1
+
+ #define REG_FLOATRET REG_V0
+ #define RBM_FLOATRET RBM_V0
+ #define RBM_DOUBLERET RBM_V0
+
+ // The registers trashed by the CORINFO_HELP_STOP_FOR_GC helper
+ #define RBM_STOP_FOR_GC_TRASH RBM_CALLEE_TRASH
+
+ // The registers trashed by the CORINFO_HELP_INIT_PINVOKE_FRAME helper.
+ #define RBM_INIT_PINVOKE_FRAME_TRASH RBM_CALLEE_TRASH
+
+ #define REG_FPBASE REG_FP
+ #define RBM_FPBASE RBM_FP
+ #define STR_FPBASE "fp"
+ #define REG_SPBASE REG_SP
+ #define RBM_SPBASE RBM_ZR // reuse the RBM for REG_ZR
+ #define STR_SPBASE "sp"
+
+ #define FIRST_ARG_STACK_OFFS (2*REGSIZE_BYTES) // Caller's saved FP and return address
+
+ // On ARM64 the calling convention defines REG_R8 (x8) as an additional argument register
+ // It isn't allocated for the normal user arguments, so it isn't counted by MAX_REG_ARG
+ // whether we use this register to pass the RetBuff is controlled by the function hasFixedRetBuffReg()
+ // it is consider to be the next integer argnum, which is 8
+ //
+ #define REG_ARG_RET_BUFF REG_R8
+ #define RBM_ARG_RET_BUFF RBM_R8
+ #define RET_BUFF_ARGNUM 8
+
+ #define MAX_REG_ARG 8
+ #define MAX_FLOAT_REG_ARG 8
+
+ #define REG_ARG_FIRST REG_R0
+ #define REG_ARG_LAST REG_R7
+ #define REG_ARG_FP_FIRST REG_V0
+ #define REG_ARG_FP_LAST REG_V7
+ #define INIT_ARG_STACK_SLOT 0 // No outgoing reserved stack slots
+
+ #define REG_ARG_0 REG_R0
+ #define REG_ARG_1 REG_R1
+ #define REG_ARG_2 REG_R2
+ #define REG_ARG_3 REG_R3
+ #define REG_ARG_4 REG_R4
+ #define REG_ARG_5 REG_R5
+ #define REG_ARG_6 REG_R6
+ #define REG_ARG_7 REG_R7
+
+ SELECTANY const regNumber intArgRegs [] = {REG_R0, REG_R1, REG_R2, REG_R3, REG_R4, REG_R5, REG_R6, REG_R7};
+ SELECTANY const regMaskTP intArgMasks[] = {RBM_R0, RBM_R1, RBM_R2, RBM_R3, RBM_R4, RBM_R5, RBM_R6, RBM_R7};
+
+ #define RBM_ARG_0 RBM_R0
+ #define RBM_ARG_1 RBM_R1
+ #define RBM_ARG_2 RBM_R2
+ #define RBM_ARG_3 RBM_R3
+ #define RBM_ARG_4 RBM_R4
+ #define RBM_ARG_5 RBM_R5
+ #define RBM_ARG_6 RBM_R6
+ #define RBM_ARG_7 RBM_R7
+
+ #define REG_FLTARG_0 REG_V0
+ #define REG_FLTARG_1 REG_V1
+ #define REG_FLTARG_2 REG_V2
+ #define REG_FLTARG_3 REG_V3
+ #define REG_FLTARG_4 REG_V4
+ #define REG_FLTARG_5 REG_V5
+ #define REG_FLTARG_6 REG_V6
+ #define REG_FLTARG_7 REG_V7
+
+ #define RBM_FLTARG_0 RBM_V0
+ #define RBM_FLTARG_1 RBM_V1
+ #define RBM_FLTARG_2 RBM_V2
+ #define RBM_FLTARG_3 RBM_V3
+ #define RBM_FLTARG_4 RBM_V4
+ #define RBM_FLTARG_5 RBM_V5
+ #define RBM_FLTARG_6 RBM_V6
+ #define RBM_FLTARG_7 RBM_V7
+
+ #define RBM_ARG_REGS (RBM_ARG_0|RBM_ARG_1|RBM_ARG_2|RBM_ARG_3|RBM_ARG_4|RBM_ARG_5|RBM_ARG_6|RBM_ARG_7)
+ #define RBM_FLTARG_REGS (RBM_FLTARG_0|RBM_FLTARG_1|RBM_FLTARG_2|RBM_FLTARG_3|RBM_FLTARG_4|RBM_FLTARG_5|RBM_FLTARG_6|RBM_FLTARG_7)
+
+ SELECTANY const regNumber fltArgRegs [] = {REG_V0, REG_V1, REG_V2, REG_V3, REG_V4, REG_V5, REG_V6, REG_V7 };
+ SELECTANY const regMaskTP fltArgMasks[] = {RBM_V0, RBM_V1, RBM_V2, RBM_V3, RBM_V4, RBM_V5, RBM_V6, RBM_V7 };
+
+ #define LBL_DIST_SMALL_MAX_NEG (-1048576)
+ #define LBL_DIST_SMALL_MAX_POS (+1048575)
+
+ #define LBL_SIZE_SMALL (4)
+ #define LBL_SIZE_LARGE (8)
+
+ #define JCC_DIST_SMALL_MAX_NEG (-1048576)
+ #define JCC_DIST_SMALL_MAX_POS (+1048575)
+
+ #define JCC_SIZE_SMALL (4)
+ #define JCC_SIZE_LARGE (8)
+
+ #define LDC_DIST_SMALL_MAX_NEG (-1048576)
+ #define LDC_DIST_SMALL_MAX_POS (+1048575)
+
+ #define LDC_SIZE_SMALL (4)
+ #define LDC_SIZE_LARGE (8)
+
+ #define JMP_SIZE_SMALL (4)
+
+#else
+ #error Unsupported or unset target architecture
+#endif
+
+#ifdef _TARGET_XARCH_
+
+ #define JMP_DIST_SMALL_MAX_NEG (-128)
+ #define JMP_DIST_SMALL_MAX_POS (+127)
+
+ #define JCC_DIST_SMALL_MAX_NEG (-128)
+ #define JCC_DIST_SMALL_MAX_POS (+127)
+
+ #define JMP_SIZE_SMALL (2)
+ #define JMP_SIZE_LARGE (5)
+
+ #define JCC_SIZE_SMALL (2)
+ #define JCC_SIZE_LARGE (6)
+
+ #define PUSH_INST_SIZE (5)
+ #define CALL_INST_SIZE (5)
+
+#endif // _TARGET_XARCH_
+
+C_ASSERT(REG_FIRST == 0);
+C_ASSERT(REG_INT_FIRST < REG_INT_LAST);
+C_ASSERT(REG_FP_FIRST < REG_FP_LAST);
+
+// Opportunistic tail call feature converts non-tail prefixed calls into
+// tail calls where possible. It requires fast tail calling mechanism for
+// performance. Otherwise, we are better off not converting non-tail prefixed
+// calls into tail calls.
+C_ASSERT((FEATURE_TAILCALL_OPT == 0) || (FEATURE_FASTTAILCALL == 1));
+
+/*****************************************************************************/
+
+#define BITS_PER_BYTE 8
+#define REGNUM_MASK ((1 << REGNUM_BITS) - 1) // a n-bit mask use to encode multiple REGNUMs into a unsigned int
+#define RBM_ALL(type) (varTypeIsFloating(type) ? RBM_ALLFLOAT : RBM_ALLINT)
+
+/*****************************************************************************/
+
+#if CPU_HAS_BYTE_REGS
+ #define RBM_BYTE_REGS (RBM_EAX|RBM_ECX|RBM_EDX|RBM_EBX)
+ #define RBM_NON_BYTE_REGS (RBM_ESI|RBM_EDI)
+ // We reuse the ESP register as a flag for byteable registers in lvPrefReg
+ #define RBM_BYTE_REG_FLAG RBM_ESP
+#else
+ #define RBM_BYTE_REGS RBM_ALLINT
+ #define RBM_NON_BYTE_REGS RBM_NONE
+ #define RBM_BYTE_REG_FLAG RBM_NONE
+#endif
+// clang-format on
+
+/*****************************************************************************/
+class Target
+{
+public:
+ static const char* g_tgtCPUName;
+ static const char* g_tgtPlatformName;
+
+ enum ArgOrder
+ {
+ ARG_ORDER_R2L,
+ ARG_ORDER_L2R
+ };
+ static const enum ArgOrder g_tgtArgOrder;
+
+#if NOGC_WRITE_BARRIERS
+ static regMaskTP exclude_WriteBarrierReg(regMaskTP mask)
+ {
+ unsigned result = (mask & ~RBM_WRITE_BARRIER);
+ if (result)
+ return result;
+ else
+ return RBM_ALLINT & ~RBM_WRITE_BARRIER;
+ }
+#endif // NOGC_WRITE_BARRIERS
+};
+
+#if defined(DEBUG) || defined(LATE_DISASM)
+const char* getRegName(unsigned reg, bool isFloat = false); // this is for gcencode.cpp and disasm.cpp that don't use
+ // the regNumber type
+const char* getRegName(regNumber reg, bool isFloat = false);
+#endif // defined(DEBUG) || defined(LATE_DISASM)
+
+#ifdef DEBUG
+const char* getRegNameFloat(regNumber reg, var_types type);
+extern void dspRegMask(regMaskTP regMask, size_t minSiz = 0);
+#endif
+
+#if CPU_HAS_BYTE_REGS
+inline BOOL isByteReg(regNumber reg)
+{
+ return (reg <= REG_EBX);
+}
+#else
+inline BOOL isByteReg(regNumber reg)
+{
+ return true;
+}
+#endif
+
+#ifdef LEGACY_BACKEND
+extern const regNumber raRegTmpOrder[REG_TMP_ORDER_COUNT];
+extern const regNumber rpRegTmpOrder[REG_TMP_ORDER_COUNT];
+#if FEATURE_FP_REGALLOC
+extern const regNumber raRegFltTmpOrder[REG_FLT_TMP_ORDER_COUNT];
+#endif
+#endif // LEGACY_BACKEND
+
+inline regMaskTP genRegMask(regNumber reg);
+inline regMaskTP genRegMaskFloat(regNumber reg, var_types type = TYP_DOUBLE);
+
+/*****************************************************************************
+ * Return true if the register number is valid
+ */
+inline bool genIsValidReg(regNumber reg)
+{
+ /* It's safest to perform an unsigned comparison in case reg is negative */
+ return ((unsigned)reg < (unsigned)REG_COUNT);
+}
+
+/*****************************************************************************
+ * Return true if the register is a valid integer register
+ */
+inline bool genIsValidIntReg(regNumber reg)
+{
+ return reg >= REG_INT_FIRST && reg <= REG_INT_LAST;
+}
+
+/*****************************************************************************
+ * Return true if the register is a valid floating point register
+ */
+inline bool genIsValidFloatReg(regNumber reg)
+{
+ return reg >= REG_FP_FIRST && reg <= REG_FP_LAST;
+}
+
+#if defined(LEGACY_BACKEND) && defined(_TARGET_ARM_)
+
+/*****************************************************************************
+ * Return true if the register is a valid floating point double register
+ */
+inline bool genIsValidDoubleReg(regNumber reg)
+{
+ return genIsValidFloatReg(reg) && (((reg - REG_FP_FIRST) & 0x1) == 0);
+}
+
+#endif // defined(LEGACY_BACKEND) && defined(_TARGET_ARM_)
+
+//-------------------------------------------------------------------------------------------
+// hasFixedRetBuffReg:
+// Returns true if our target architecture uses a fixed return buffer register
+//
+inline bool hasFixedRetBuffReg()
+{
+#ifdef _TARGET_ARM64_
+ return true;
+#else
+ return false;
+#endif
+}
+
+//-------------------------------------------------------------------------------------------
+// theFixedRetBuffReg:
+// Returns the regNumber to use for the fixed return buffer
+//
+inline regNumber theFixedRetBuffReg()
+{
+ assert(hasFixedRetBuffReg()); // This predicate should be checked before calling this method
+#ifdef _TARGET_ARM64_
+ return REG_ARG_RET_BUFF;
+#else
+ return REG_NA;
+#endif
+}
+
+//-------------------------------------------------------------------------------------------
+// theFixedRetBuffMask:
+// Returns the regNumber to use for the fixed return buffer
+//
+inline regMaskTP theFixedRetBuffMask()
+{
+ assert(hasFixedRetBuffReg()); // This predicate should be checked before calling this method
+#ifdef _TARGET_ARM64_
+ return RBM_ARG_RET_BUFF;
+#else
+ return 0;
+#endif
+}
+
+//-------------------------------------------------------------------------------------------
+// theFixedRetBuffArgNum:
+// Returns the argNum to use for the fixed return buffer
+//
+inline unsigned theFixedRetBuffArgNum()
+{
+ assert(hasFixedRetBuffReg()); // This predicate should be checked before calling this method
+#ifdef _TARGET_ARM64_
+ return RET_BUFF_ARGNUM;
+#else
+ return BAD_VAR_NUM;
+#endif
+}
+
+//-------------------------------------------------------------------------------------------
+// fullIntArgRegMask:
+// Returns the full mask of all possible integer registers
+// Note this includes the fixed return buffer register on Arm64
+//
+inline regMaskTP fullIntArgRegMask()
+{
+ if (hasFixedRetBuffReg())
+ {
+ return RBM_ARG_REGS | theFixedRetBuffMask();
+ }
+ else
+ {
+ return RBM_ARG_REGS;
+ }
+}
+
+//-------------------------------------------------------------------------------------------
+// isValidIntArgReg:
+// Returns true if the register is a valid integer argument register
+// Note this method also returns true on Arm64 when 'reg' is the RetBuff register
+//
+inline bool isValidIntArgReg(regNumber reg)
+{
+ return (genRegMask(reg) & fullIntArgRegMask()) != 0;
+}
+
+//-------------------------------------------------------------------------------------------
+// genRegArgNext:
+// Given a register that is an integer or floating point argument register
+// returns the next argument register
+//
+regNumber genRegArgNext(regNumber argReg);
+
+//-------------------------------------------------------------------------------------------
+// isValidFloatArgReg:
+// Returns true if the register is a valid floating-point argument register
+//
+inline bool isValidFloatArgReg(regNumber reg)
+{
+ if (reg == REG_NA)
+ {
+ return false;
+ }
+ else
+ {
+ return (reg >= FIRST_FP_ARGREG) && (reg <= LAST_FP_ARGREG);
+ }
+}
+
+/*****************************************************************************
+ *
+ * Can the register hold the argument type?
+ */
+
+#ifdef _TARGET_ARM_
+inline bool floatRegCanHoldType(regNumber reg, var_types type)
+{
+ assert(genIsValidFloatReg(reg));
+ if (type == TYP_DOUBLE)
+ {
+ return ((reg - REG_F0) % 2) == 0;
+ }
+ else
+ {
+ // Can be TYP_STRUCT for HFA. It's not clear that's correct; what about
+ // HFA of double? We wouldn't be asserting the right alignment, and
+ // callers like genRegMaskFloat() wouldn't be generating the right mask.
+
+ assert((type == TYP_FLOAT) || (type == TYP_STRUCT));
+ return true;
+ }
+}
+#else
+// AMD64: xmm registers can hold any float type
+// x86: FP stack can hold any float type
+// ARM64: Floating-point/SIMD registers can hold any type.
+inline bool floatRegCanHoldType(regNumber reg, var_types type)
+{
+ return true;
+}
+#endif
+
+/*****************************************************************************
+ *
+ * Map a register number to a register mask.
+ */
+
+extern const regMaskSmall regMasks[REG_COUNT];
+
+inline regMaskTP genRegMask(regNumber reg)
+{
+ assert((unsigned)reg < ArrLen(regMasks));
+#ifdef _TARGET_AMD64_
+ // shift is faster than a L1 hit on modern x86
+ // (L1 latency on sandy bridge is 4 cycles for [base] and 5 for [base + index*c] )
+ // the reason this is AMD-only is because the x86 BE will try to get reg masks for REG_STK
+ // and the result needs to be zero.
+ regMaskTP result = 1 << reg;
+ assert(result == regMasks[reg]);
+ return result;
+#else
+ return regMasks[reg];
+#endif
+}
+
+/*****************************************************************************
+ *
+ * Map a register number to a floating-point register mask.
+ */
+
+#if defined(_TARGET_X86_) && defined(LEGACY_BACKEND)
+extern const regMaskSmall regFPMasks[REG_FPCOUNT];
+#endif // defined(_TARGET_X86_) && defined(LEGACY_BACKEND)
+
+inline regMaskTP genRegMaskFloat(regNumber reg, var_types type /* = TYP_DOUBLE */)
+{
+#if defined(_TARGET_X86_) && defined(LEGACY_BACKEND)
+ assert(reg >= REG_FPV0 && reg < REG_FPCOUNT);
+ assert((unsigned)reg < ArrLen(regFPMasks));
+ return regFPMasks[reg];
+#elif defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_) || defined(_TARGET_X86_)
+ assert(genIsValidFloatReg(reg));
+ assert((unsigned)reg < ArrLen(regMasks));
+ return regMasks[reg];
+#elif defined _TARGET_ARM_
+ assert(floatRegCanHoldType(reg, type));
+ assert(reg >= REG_F0 && reg <= REG_F31);
+
+ if (type == TYP_DOUBLE)
+ {
+ return regMasks[reg] | regMasks[reg + 1];
+ }
+ else
+ {
+ return regMasks[reg];
+ }
+#else
+#error Unsupported or unset target architecture
+#endif
+}
+
+//------------------------------------------------------------------------
+// genRegMask: Given a register, and its type, generate the appropriate regMask
+//
+// Arguments:
+// regNum - the register of interest
+// type - the type of regNum (i.e. the type it is being used as)
+//
+// Return Value:
+// This will usually return the same value as genRegMask(regNum), but
+// on architectures where multiple registers are used for certain types
+// (e.g. TYP_DOUBLE on ARM), it will return a regMask that includes
+// all the registers.
+// Registers that are used in pairs, but separately named (e.g. TYP_LONG
+// on ARM) will return just the regMask for the given register.
+//
+// Assumptions:
+// For registers that are used in pairs, the caller will be handling
+// each member of the pair separately.
+//
+inline regMaskTP genRegMask(regNumber regNum, var_types type)
+{
+#ifndef _TARGET_ARM_
+ return genRegMask(regNum);
+#else
+ regMaskTP regMask = RBM_NONE;
+
+ if (varTypeIsFloating(type))
+ {
+ regMask = genRegMaskFloat(regNum, type);
+ }
+ else
+ {
+ regMask = genRegMask(regNum);
+ }
+ return regMask;
+#endif
+}
+
+/*****************************************************************************
+ *
+ * These arrays list the callee-saved register numbers (and bitmaps, respectively) for
+ * the current architecture.
+ */
+extern const regNumber raRegCalleeSaveOrder[CNT_CALLEE_SAVED];
+extern const regMaskTP raRbmCalleeSaveOrder[CNT_CALLEE_SAVED];
+
+// This method takes a "compact" bitset of the callee-saved registers, and "expands" it to a full register mask.
+regMaskSmall genRegMaskFromCalleeSavedMask(unsigned short);
+
+/*****************************************************************************
+ *
+ * Returns the register that holds the low 32 bits of the long value given
+ * by the register pair 'regPair'.
+ */
+inline regNumber genRegPairLo(regPairNo regPair)
+{
+ assert(regPair >= REG_PAIR_FIRST && regPair <= REG_PAIR_LAST);
+
+ return (regNumber)((regPair - REG_PAIR_FIRST) & REG_PAIR_NMASK);
+}
+
+/*****************************************************************************
+ *
+ * Returns the register that holds the high 32 bits of the long value given
+ * by the register pair 'regPair'.
+ */
+inline regNumber genRegPairHi(regPairNo regPair)
+{
+ assert(regPair >= REG_PAIR_FIRST && regPair <= REG_PAIR_LAST);
+
+ return (regNumber)(((regPair - REG_PAIR_FIRST) >> REG_PAIR_NBITS) & REG_PAIR_NMASK);
+}
+
+/*****************************************************************************
+ *
+ * Returns whether regPair is a combination of two "real" registers
+ * or whether it contains a pseudo register.
+ *
+ * In debug it also asserts that reg1 and reg2 are not the same.
+ */
+bool genIsProperRegPair(regPairNo regPair);
+
+/*****************************************************************************
+ *
+ * Returns the register pair number that corresponds to the given two regs.
+ */
+inline regPairNo gen2regs2pair(regNumber regLo, regNumber regHi)
+{
+ assert(regLo != regHi || regLo == REG_STK);
+ assert(genIsValidReg(regLo) && genIsValidReg(regHi));
+ assert(regLo != REG_L_STK && regHi != REG_L_STK);
+
+ regPairNo regPair = (regPairNo)(regLo + (regHi << REG_PAIR_NBITS) + REG_PAIR_FIRST);
+
+ assert(regLo == genRegPairLo(regPair));
+ assert(regHi == genRegPairHi(regPair));
+
+ return regPair;
+}
+
+/*****************************************************************************/
+inline regMaskTP genRegPairMask(regPairNo regPair)
+{
+ assert(regPair >= REG_PAIR_FIRST && regPair <= REG_PAIR_LAST);
+
+ return genRegMask(genRegPairLo(regPair)) | genRegMask(genRegPairHi(regPair));
+}
+
+/*****************************************************************************
+ *
+ * Assumes that "reg" is of the given "type". Return the next unused reg number after "reg"
+ * of this type, else REG_NA if there are no more.
+ */
+
+inline regNumber regNextOfType(regNumber reg, var_types type)
+{
+ regNumber regReturn;
+
+#ifdef _TARGET_ARM_
+ if (type == TYP_DOUBLE)
+ {
+ // Skip odd FP registers for double-precision types
+ assert(floatRegCanHoldType(reg, type));
+ regReturn = regNumber(reg + 2);
+ }
+ else
+ {
+ regReturn = REG_NEXT(reg);
+ }
+#else // _TARGET_ARM_
+ regReturn = REG_NEXT(reg);
+#endif
+
+ if (varTypeIsFloating(type))
+ {
+ if (regReturn > REG_FP_LAST)
+ {
+ regReturn = REG_NA;
+ }
+ }
+ else
+ {
+ if (regReturn > REG_INT_LAST)
+ {
+ regReturn = REG_NA;
+ }
+ }
+
+ return regReturn;
+}
+
+/*****************************************************************************
+ *
+ * Type checks
+ */
+
+inline bool isRegPairType(int /* s/b "var_types" */ type)
+{
+#ifdef _TARGET_64BIT_
+ return false;
+#elif CPU_HAS_FP_SUPPORT
+ return type == TYP_LONG;
+#else
+ return type == TYP_LONG || type == TYP_DOUBLE;
+#endif
+}
+
+inline bool isFloatRegType(int /* s/b "var_types" */ type)
+{
+#if CPU_HAS_FP_SUPPORT
+ return type == TYP_DOUBLE || type == TYP_FLOAT;
+#else
+ return false;
+#endif
+}
+
+// If the WINDOWS_AMD64_ABI is defined make sure that _TARGET_AMD64_ is also defined.
+#if defined(WINDOWS_AMD64_ABI)
+#if !defined(_TARGET_AMD64_)
+#error When WINDOWS_AMD64_ABI is defined you must define _TARGET_AMD64_ defined as well.
+#endif
+#endif
+
+/*****************************************************************************/
+// Some sanity checks on some of the register masks
+// Stack pointer is never part of RBM_ALLINT
+C_ASSERT((RBM_ALLINT & RBM_SPBASE) == RBM_NONE);
+C_ASSERT((RBM_INT_CALLEE_SAVED & RBM_SPBASE) == RBM_NONE);
+
+#if ETW_EBP_FRAMED
+// Frame pointer isn't either if we're supporting ETW frame chaining
+C_ASSERT((RBM_ALLINT & RBM_FPBASE) == RBM_NONE);
+C_ASSERT((RBM_INT_CALLEE_SAVED & RBM_FPBASE) == RBM_NONE);
+#endif
+/*****************************************************************************/
+
+/*****************************************************************************/
+#endif // _TARGET_H_
+/*****************************************************************************/
diff --git a/src/jit/targetamd64.cpp b/src/jit/targetamd64.cpp
new file mode 100644
index 0000000000..0cb302ae34
--- /dev/null
+++ b/src/jit/targetamd64.cpp
@@ -0,0 +1,19 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*****************************************************************************/
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+#if defined(_TARGET_AMD64_)
+
+#include "target.h"
+
+const char* Target::g_tgtCPUName = "x64";
+const Target::ArgOrder Target::g_tgtArgOrder = ARG_ORDER_R2L;
+
+#endif // _TARGET_AMD64_
diff --git a/src/jit/targetarm.cpp b/src/jit/targetarm.cpp
new file mode 100644
index 0000000000..f0ea5ca534
--- /dev/null
+++ b/src/jit/targetarm.cpp
@@ -0,0 +1,19 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*****************************************************************************/
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+#if defined(_TARGET_ARM_)
+
+#include "target.h"
+
+const char* Target::g_tgtCPUName = "arm";
+const Target::ArgOrder Target::g_tgtArgOrder = ARG_ORDER_R2L;
+
+#endif // _TARGET_ARM_
diff --git a/src/jit/targetarm64.cpp b/src/jit/targetarm64.cpp
new file mode 100644
index 0000000000..2acbe1a050
--- /dev/null
+++ b/src/jit/targetarm64.cpp
@@ -0,0 +1,19 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*****************************************************************************/
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+#if defined(_TARGET_ARM64_)
+
+#include "target.h"
+
+const char* Target::g_tgtCPUName = "arm64";
+const Target::ArgOrder Target::g_tgtArgOrder = ARG_ORDER_R2L;
+
+#endif // _TARGET_ARM64_
diff --git a/src/jit/targetx86.cpp b/src/jit/targetx86.cpp
new file mode 100644
index 0000000000..500f4e0651
--- /dev/null
+++ b/src/jit/targetx86.cpp
@@ -0,0 +1,19 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*****************************************************************************/
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+#if defined(_TARGET_X86_)
+
+#include "target.h"
+
+const char* Target::g_tgtCPUName = "x86";
+const Target::ArgOrder Target::g_tgtArgOrder = ARG_ORDER_L2R;
+
+#endif // _TARGET_X86_
diff --git a/src/jit/tinyarray.h b/src/jit/tinyarray.h
new file mode 100644
index 0000000000..17d7e044b2
--- /dev/null
+++ b/src/jit/tinyarray.h
@@ -0,0 +1,79 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+#ifndef TINYARRAY_H
+#define TINYARRAY_H
+
+/*****************************************************************************/
+
+// This is an array packed into some kind of integral data type
+// storagetype is the type (integral) which your array is going to be packed into
+// itemtype is the type of array elements
+// bits_per_element is size of the elements in bits
+template <class storageType, class itemType, int bits_per_element>
+class TinyArray
+{
+public:
+ // operator[] returns a 'ref' (usually a ref to the element type)
+ // This presents a problem if you wanted to implement something like a
+ // bitvector via this packed array, because you cannot make a ref to
+ // the element type.
+ // The trick is you define something that acts like a ref (TinyArrayRef in this case)
+ // which for our purposes means you can assign to and from it and our chosen
+ // element type.
+ class TinyArrayRef
+ {
+ public:
+ // this is really the getter for the array.
+ operator itemType()
+ {
+ storageType mask = ((1 << bits_per_element) - 1);
+ int shift = bits_per_element * index;
+
+ itemType result = (itemType)((*data >> shift) & mask);
+ return result;
+ }
+
+ void operator=(const itemType b)
+ {
+ storageType mask = ((1 << bits_per_element) - 1);
+ assert(itemType(b & mask) == b);
+
+ mask <<= bits_per_element * index;
+
+ *data &= ~mask;
+ *data |= b << (bits_per_element * index);
+ }
+ friend class TinyArray;
+
+ protected:
+ TinyArrayRef(storageType* d, int idx) : data(d), index(idx)
+ {
+ }
+
+ storageType* data;
+ int index;
+ };
+
+ storageType data;
+
+ void clear()
+ {
+ data = 0;
+ }
+
+ TinyArrayRef operator[](unsigned int n)
+ {
+ assert((n + 1) * bits_per_element <= sizeof(itemType) * 8);
+ return TinyArrayRef(&data, n);
+ }
+ // only use this for clearing it
+ void operator=(void* rhs)
+ {
+ assert(rhs == NULL);
+ data = 0;
+ }
+};
+
+#endif // TINYARRAY_H
diff --git a/src/jit/titypes.h b/src/jit/titypes.h
new file mode 100644
index 0000000000..a659320709
--- /dev/null
+++ b/src/jit/titypes.h
@@ -0,0 +1,15 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+DEF_TI(TI_ERROR, "<ERROR>")
+DEF_TI(TI_REF, "Ref")
+DEF_TI(TI_STRUCT, "Struct")
+DEF_TI(TI_METHOD, "Method")
+DEF_TI(TI_BYTE, "Byte")
+DEF_TI(TI_SHORT, "Short")
+DEF_TI(TI_INT, "Int")
+DEF_TI(TI_LONG, "Long")
+DEF_TI(TI_FLOAT, "Float")
+DEF_TI(TI_DOUBLE, "Double")
+DEF_TI(TI_NULL, "Null")
diff --git a/src/jit/typeinfo.cpp b/src/jit/typeinfo.cpp
new file mode 100644
index 0000000000..51429cca38
--- /dev/null
+++ b/src/jit/typeinfo.cpp
@@ -0,0 +1,405 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX typeInfo XX
+XX XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+#include "_typeinfo.h"
+
+BOOL Compiler::tiCompatibleWith(const typeInfo& child, const typeInfo& parent, bool normalisedForStack) const
+{
+#ifdef DEBUG
+#if VERBOSE_VERIFY
+ if (VERBOSE && tiVerificationNeeded)
+ {
+ printf("\n");
+ printf(TI_DUMP_PADDING);
+ printf("Verifying compatibility against types: ");
+ child.Dump();
+ printf(" and ");
+ parent.Dump();
+ }
+#endif // VERBOSE_VERIFY
+#endif // DEBUG
+
+ BOOL compatible = typeInfo::tiCompatibleWith(info.compCompHnd, child, parent, normalisedForStack);
+
+#ifdef DEBUG
+#if VERBOSE_VERIFY
+ if (VERBOSE && tiVerificationNeeded)
+ {
+ printf(compatible ? " [YES]" : " [NO]");
+ }
+#endif // VERBOSE_VERIFY
+#endif // DEBUG
+
+ return compatible;
+}
+
+BOOL Compiler::tiMergeCompatibleWith(const typeInfo& child, const typeInfo& parent, bool normalisedForStack) const
+{
+ return typeInfo::tiMergeCompatibleWith(info.compCompHnd, child, parent, normalisedForStack);
+}
+
+BOOL Compiler::tiMergeToCommonParent(typeInfo* pDest, const typeInfo* pSrc, bool* changed) const
+{
+#ifdef DEBUG
+#if VERBOSE_VERIFY
+ if (VERBOSE && tiVerificationNeeded)
+ {
+ printf("\n");
+ printf(TI_DUMP_PADDING);
+ printf("Attempting to merge types: ");
+ pDest->Dump();
+ printf(" and ");
+ pSrc->Dump();
+ printf("\n");
+ }
+#endif // VERBOSE_VERIFY
+#endif // DEBUG
+
+ BOOL mergeable = typeInfo::tiMergeToCommonParent(info.compCompHnd, pDest, pSrc, changed);
+
+#ifdef DEBUG
+#if VERBOSE_VERIFY
+ if (VERBOSE && tiVerificationNeeded)
+ {
+ printf(TI_DUMP_PADDING);
+ printf((mergeable == TRUE) ? "Merge successful" : "Couldn't merge types");
+ if (*changed)
+ {
+ assert(mergeable);
+ printf(", destination type changed to: ");
+ pDest->Dump();
+ }
+ printf("\n");
+ }
+#endif // VERBOSE_VERIFY
+#endif // DEBUG
+
+ return mergeable;
+}
+
+static BOOL tiCompatibleWithByRef(COMP_HANDLE CompHnd, const typeInfo& child, const typeInfo& parent)
+{
+ assert(parent.IsByRef());
+
+ if (!child.IsByRef())
+ {
+ return FALSE;
+ }
+
+ if (child.IsReadonlyByRef() && !parent.IsReadonlyByRef())
+ {
+ return FALSE;
+ }
+
+ // Byrefs are compatible if the underlying types are equivalent
+ typeInfo childTarget = ::DereferenceByRef(child);
+ typeInfo parentTarget = ::DereferenceByRef(parent);
+
+ if (typeInfo::AreEquivalent(childTarget, parentTarget))
+ {
+ return TRUE;
+ }
+
+ // Make sure that both types have a valid m_cls
+ if ((childTarget.IsType(TI_REF) || childTarget.IsType(TI_STRUCT)) &&
+ (parentTarget.IsType(TI_REF) || parentTarget.IsType(TI_STRUCT)))
+ {
+ return CompHnd->areTypesEquivalent(childTarget.GetClassHandle(), parentTarget.GetClassHandle());
+ }
+
+ return FALSE;
+}
+
+/*****************************************************************************
+ * Verify child is compatible with the template parent. Basically, that
+ * child is a "subclass" of parent -it can be substituted for parent
+ * anywhere. Note that if parent contains fancy flags, such as "uninitialized"
+ * , "is this ptr", or "has byref local/field" info, then child must also
+ * contain those flags, otherwise FALSE will be returned !
+ *
+ * Rules for determining compatibility:
+ *
+ * If parent is a primitive type or value class, then child must be the
+ * same primitive type or value class. The exception is that the built in
+ * value classes System/Boolean etc. are treated as synonyms for
+ * TI_BYTE etc.
+ *
+ * If parent is a byref of a primitive type or value class, then child
+ * must be a byref of the same (rules same as above case).
+ *
+ * Byrefs are compatible only with byrefs.
+ *
+ * If parent is an object, child must be a subclass of it, implement it
+ * (if it is an interface), or be null.
+ *
+ * If parent is an array, child must be the same or subclassed array.
+ *
+ * If parent is a null objref, only null is compatible with it.
+ *
+ * If the "uninitialized", "by ref local/field", "this pointer" or other flags
+ * are different, the items are incompatible.
+ *
+ * parent CANNOT be an undefined (dead) item.
+ *
+ */
+
+BOOL typeInfo::tiCompatibleWith(COMP_HANDLE CompHnd,
+ const typeInfo& child,
+ const typeInfo& parent,
+ bool normalisedForStack)
+{
+ assert(child.IsDead() || !normalisedForStack || typeInfo::AreEquivalent(::NormaliseForStack(child), child));
+ assert(parent.IsDead() || !normalisedForStack || typeInfo::AreEquivalent(::NormaliseForStack(parent), parent));
+
+ if (typeInfo::AreEquivalent(child, parent))
+ {
+ return TRUE;
+ }
+
+ if (parent.IsUnboxedGenericTypeVar() || child.IsUnboxedGenericTypeVar())
+ {
+ return (FALSE); // need to have had child == parent
+ }
+ else if (parent.IsType(TI_REF))
+ {
+ // An uninitialized objRef is not compatible to initialized.
+ if (child.IsUninitialisedObjRef() && !parent.IsUninitialisedObjRef())
+ {
+ return FALSE;
+ }
+
+ if (child.IsNullObjRef())
+ { // NULL can be any reference type
+ return TRUE;
+ }
+ if (!child.IsType(TI_REF))
+ {
+ return FALSE;
+ }
+
+ return CompHnd->canCast(child.m_cls, parent.m_cls);
+ }
+ else if (parent.IsType(TI_METHOD))
+ {
+ if (!child.IsType(TI_METHOD))
+ {
+ return FALSE;
+ }
+
+ // Right now we don't bother merging method handles
+ return FALSE;
+ }
+ else if (parent.IsType(TI_STRUCT))
+ {
+ if (!child.IsType(TI_STRUCT))
+ {
+ return FALSE;
+ }
+
+ // Structures are compatible if they are equivalent
+ return CompHnd->areTypesEquivalent(child.m_cls, parent.m_cls);
+ }
+ else if (parent.IsByRef())
+ {
+ return tiCompatibleWithByRef(CompHnd, child, parent);
+ }
+#ifdef _TARGET_64BIT_
+ // On 64-bit targets we have precise representation for native int, so these rules
+ // represent the fact that the ECMA spec permits the implicit conversion
+ // between an int32 and a native int.
+ else if (parent.IsType(TI_INT) && typeInfo::AreEquivalent(nativeInt(), child))
+ {
+ return TRUE;
+ }
+ else if (typeInfo::AreEquivalent(nativeInt(), parent) && child.IsType(TI_INT))
+ {
+ return TRUE;
+ }
+#endif // _TARGET_64BIT_
+ return FALSE;
+}
+
+BOOL typeInfo::tiMergeCompatibleWith(COMP_HANDLE CompHnd,
+ const typeInfo& child,
+ const typeInfo& parent,
+ bool normalisedForStack)
+{
+ if (!child.IsPermanentHomeByRef() && parent.IsPermanentHomeByRef())
+ {
+ return FALSE;
+ }
+
+ return typeInfo::tiCompatibleWith(CompHnd, child, parent, normalisedForStack);
+}
+
+/*****************************************************************************
+ * Merge pDest and pSrc to find some commonality (e.g. a common parent).
+ * Copy the result to pDest, marking it dead if no commonality can be found.
+ *
+ * null ^ null -> null
+ * Object ^ null -> Object
+ * [I4 ^ null -> [I4
+ * InputStream ^ OutputStream -> Stream
+ * InputStream ^ NULL -> InputStream
+ * [I4 ^ Object -> Object
+ * [I4 ^ [Object -> Array
+ * [I4 ^ [R8 -> Array
+ * [Foo ^ I4 -> DEAD
+ * [Foo ^ [I1 -> Array
+ * [InputStream ^ [OutputStream -> Array
+ * DEAD ^ X -> DEAD
+ * [Intfc ^ [OutputStream -> Array
+ * Intf ^ [OutputStream -> Object
+ * [[InStream ^ [[OutStream -> Array
+ * [[InStream ^ [OutStream -> Array
+ * [[Foo ^ [Object -> Array
+ *
+ * Importantly:
+ * [I1 ^ [U1 -> either [I1 or [U1
+ * etc.
+ *
+ * Also, System/Int32 and I4 merge -> I4, etc.
+ *
+ * Returns FALSE if the merge was completely incompatible (i.e. the item became
+ * dead).
+ *
+ */
+
+BOOL typeInfo::tiMergeToCommonParent(COMP_HANDLE CompHnd, typeInfo* pDest, const typeInfo* pSrc, bool* changed)
+{
+ assert(pSrc->IsDead() || typeInfo::AreEquivalent(::NormaliseForStack(*pSrc), *pSrc));
+ assert(pDest->IsDead() || typeInfo::AreEquivalent(::NormaliseForStack(*pDest), *pDest));
+
+ // Merge the auxiliary information like "this" pointer tracking, etc...
+
+ // Remember the pre-state, so we can tell if it changed.
+ *changed = false;
+ DWORD destFlagsBefore = pDest->m_flags;
+
+ // This bit is only set if both pDest and pSrc have it set
+ pDest->m_flags &= (pSrc->m_flags | ~TI_FLAG_THIS_PTR);
+
+ // This bit is set if either pDest or pSrc have it set
+ pDest->m_flags |= (pSrc->m_flags & TI_FLAG_UNINIT_OBJREF);
+
+ // This bit is set if either pDest or pSrc have it set
+ pDest->m_flags |= (pSrc->m_flags & TI_FLAG_BYREF_READONLY);
+
+ // If the byref wasn't permanent home in both sides, then merge won't have the bit set
+ pDest->m_flags &= (pSrc->m_flags | ~TI_FLAG_BYREF_PERMANENT_HOME);
+
+ if (pDest->m_flags != destFlagsBefore)
+ {
+ *changed = true;
+ }
+
+ // OK the main event. Merge the main types
+ if (typeInfo::AreEquivalent(*pDest, *pSrc))
+ {
+ return (TRUE);
+ }
+
+ if (pDest->IsUnboxedGenericTypeVar() || pSrc->IsUnboxedGenericTypeVar())
+ {
+ // Should have had *pDest == *pSrc
+ goto FAIL;
+ }
+ if (pDest->IsType(TI_REF))
+ {
+ if (pSrc->IsType(TI_NULL))
+ { // NULL can be any reference type
+ return TRUE;
+ }
+ if (!pSrc->IsType(TI_REF))
+ {
+ goto FAIL;
+ }
+
+ // Ask the EE to find the common parent, This always succeeds since System.Object always works
+ CORINFO_CLASS_HANDLE pDestClsBefore = pDest->m_cls;
+ pDest->m_cls = CompHnd->mergeClasses(pDest->GetClassHandle(), pSrc->GetClassHandle());
+ if (pDestClsBefore != pDest->m_cls)
+ {
+ *changed = true;
+ }
+ return TRUE;
+ }
+ else if (pDest->IsType(TI_NULL))
+ {
+ if (pSrc->IsType(TI_REF)) // NULL can be any reference type
+ {
+ *pDest = *pSrc;
+ *changed = true;
+ return TRUE;
+ }
+ goto FAIL;
+ }
+ else if (pDest->IsType(TI_STRUCT))
+ {
+ if (pSrc->IsType(TI_STRUCT) && CompHnd->areTypesEquivalent(pDest->GetClassHandle(), pSrc->GetClassHandle()))
+ {
+ return TRUE;
+ }
+ goto FAIL;
+ }
+ else if (pDest->IsByRef())
+ {
+ return tiCompatibleWithByRef(CompHnd, *pSrc, *pDest);
+ }
+#ifdef _TARGET_64BIT_
+ // On 64-bit targets we have precise representation for native int, so these rules
+ // represent the fact that the ECMA spec permits the implicit conversion
+ // between an int32 and a native int.
+ else if (typeInfo::AreEquivalent(*pDest, typeInfo::nativeInt()) && pSrc->IsType(TI_INT))
+ {
+ return TRUE;
+ }
+ else if (typeInfo::AreEquivalent(*pSrc, typeInfo::nativeInt()) && pDest->IsType(TI_INT))
+ {
+ *pDest = *pSrc;
+ *changed = true;
+ return TRUE;
+ }
+#endif // _TARGET_64BIT_
+
+FAIL:
+ *pDest = typeInfo();
+ return FALSE;
+}
+
+#ifdef DEBUG
+#if VERBOSE_VERIFY
+// Utility method to have a detailed dump of a TypeInfo object
+void typeInfo::Dump() const
+{
+ char flagsStr[8];
+
+ flagsStr[0] = ((m_flags & TI_FLAG_UNINIT_OBJREF) != 0) ? 'U' : '-';
+ flagsStr[1] = ((m_flags & TI_FLAG_BYREF) != 0) ? 'B' : '-';
+ flagsStr[2] = ((m_flags & TI_FLAG_BYREF_READONLY) != 0) ? 'R' : '-';
+ flagsStr[3] = ((m_flags & TI_FLAG_NATIVE_INT) != 0) ? 'N' : '-';
+ flagsStr[4] = ((m_flags & TI_FLAG_THIS_PTR) != 0) ? 'T' : '-';
+ flagsStr[5] = ((m_flags & TI_FLAG_BYREF_PERMANENT_HOME) != 0) ? 'P' : '-';
+ flagsStr[6] = ((m_flags & TI_FLAG_GENERIC_TYPE_VAR) != 0) ? 'G' : '-';
+ flagsStr[7] = '\0';
+
+ printf("[%s(%X) {%s}]", tiType2Str(m_bits.type), m_cls, flagsStr);
+}
+#endif // VERBOSE_VERIFY
+#endif // DEBUG
diff --git a/src/jit/typelist.h b/src/jit/typelist.h
new file mode 100644
index 0000000000..ed5884359d
--- /dev/null
+++ b/src/jit/typelist.h
@@ -0,0 +1,81 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+#define GCS EA_GCREF
+#define BRS EA_BYREF
+#define PS EA_PTRSIZE
+#define PST (sizeof(void*) / sizeof(int))
+
+#ifdef _TARGET_64BIT_
+#define VTF_I32 0
+#define VTF_I64 VTF_I
+#else
+#define VTF_I32 VTF_I
+#define VTF_I64 0
+#endif
+
+/* tn - TYP_name
+ nm - name string
+ jitType - The jit compresses types that are 'equivalent', this is the jit type genActualType()
+ verType - Used for type checking
+ sz - size in bytes (genTypeSize(t))
+ sze - size in bytes for the emitter (GC types are encoded) (emitTypeSize(t))
+ asze- size in bytes for the emitter (GC types are encoded) (emitActualTypeSize(t))
+ st - stack slots (slots are sizeof(int) bytes) (genTypeStSzs())
+ al - alignment
+ tf - flags
+ howUsed - If a variable is used (referenced) as the type
+
+DEF_TP(tn ,nm , jitType, verType, sz,sze,asze, st,al, tf, howUsed )
+*/
+
+// clang-format off
+DEF_TP(UNDEF ,"<UNDEF>" , TYP_UNDEF, TI_ERROR, 0, 0, 0, 0, 0, VTF_ANY, 0 )
+DEF_TP(VOID ,"void" , TYP_VOID, TI_ERROR, 0, 0, 0, 0, 0, VTF_ANY, 0 )
+
+DEF_TP(BOOL ,"bool" , TYP_INT, TI_BYTE, 1, 1, 4, 1, 1, VTF_INT|VTF_UNS,TYPE_REF_INT)
+DEF_TP(BYTE ,"byte" , TYP_INT, TI_BYTE, 1, 1, 4, 1, 1, VTF_INT, TYPE_REF_INT)
+DEF_TP(UBYTE ,"ubyte" , TYP_INT, TI_BYTE, 1, 1, 4, 1, 1, VTF_INT|VTF_UNS,TYPE_REF_INT)
+
+DEF_TP(CHAR ,"char" , TYP_INT, TI_SHORT, 2, 2, 4, 1, 2, VTF_INT|VTF_UNS,TYPE_REF_INT)
+DEF_TP(SHORT ,"short" , TYP_INT, TI_SHORT, 2, 2, 4, 1, 2, VTF_INT, TYPE_REF_INT)
+DEF_TP(USHORT ,"ushort" , TYP_INT, TI_SHORT, 2, 2, 4, 1, 2, VTF_INT|VTF_UNS,TYPE_REF_INT)
+
+DEF_TP(INT ,"int" , TYP_INT, TI_INT, 4, 4, 4, 1, 4, VTF_INT|VTF_I32, TYPE_REF_INT)
+DEF_TP(UINT ,"uint" , TYP_INT, TI_INT, 4, 4, 4, 1, 4, VTF_INT|VTF_UNS|VTF_I32,TYPE_REF_INT) // Only used in GT_CAST nodes
+
+DEF_TP(LONG ,"long" , TYP_LONG, TI_LONG, 8, PS, PS, 2, 8, VTF_INT|VTF_I64, TYPE_REF_LNG)
+DEF_TP(ULONG ,"ulong" , TYP_LONG, TI_LONG, 8, PS, PS, 2, 8, VTF_INT|VTF_UNS|VTF_I64,TYPE_REF_LNG) // Only used in GT_CAST nodes
+
+DEF_TP(FLOAT ,"float" , TYP_FLOAT, TI_FLOAT, 4, 4, 4, 1, 4, VTF_FLT, TYPE_REF_FLT)
+DEF_TP(DOUBLE ,"double" , TYP_DOUBLE, TI_DOUBLE,8, 8, 8, 2, 8, VTF_FLT, TYPE_REF_DBL)
+
+DEF_TP(REF ,"ref" , TYP_REF, TI_REF, PS,GCS,GCS, PST,PS, VTF_ANY|VTF_GCR|VTF_I,TYPE_REF_PTR)
+DEF_TP(BYREF ,"byref" , TYP_BYREF, TI_ERROR,PS,BRS,BRS, PST,PS, VTF_ANY|VTF_BYR|VTF_I,TYPE_REF_BYR)
+DEF_TP(ARRAY ,"array" , TYP_REF, TI_REF, PS,GCS,GCS, PST,PS, VTF_ANY|VTF_GCR|VTF_I,TYPE_REF_PTR)
+DEF_TP(STRUCT ,"struct" , TYP_STRUCT, TI_STRUCT,0, 0, 0, 1, 4, VTF_S, TYPE_REF_STC)
+
+DEF_TP(BLK ,"blk" , TYP_BLK, TI_ERROR, 0, 0, 0, 1, 4, VTF_ANY, 0 ) // blob of memory
+DEF_TP(LCLBLK ,"lclBlk" , TYP_LCLBLK, TI_ERROR, 0, 0, 0, 1, 4, VTF_ANY, 0 ) // preallocated memory for locspace
+
+DEF_TP(PTR ,"pointer" , TYP_PTR, TI_ERROR,PS, PS, PS, PST,PS, VTF_ANY|VTF_I, TYPE_REF_PTR) // (not currently used)
+DEF_TP(FNC ,"function", TYP_FNC, TI_ERROR, 0, PS, PS, 0, 0, VTF_ANY|VTF_I, 0 )
+
+#ifdef FEATURE_SIMD
+// Amd64: The size and alignment of SIMD vector varies at JIT time based on whether target arch supports AVX or SSE2.
+DEF_TP(SIMD8 ,"simd8" , TYP_SIMD8, TI_STRUCT, 8, 8, 8, 2, 8, VTF_S, TYPE_REF_STC)
+DEF_TP(SIMD12 ,"simd12" , TYP_SIMD12, TI_STRUCT,12,16, 16, 4,16, VTF_S, TYPE_REF_STC)
+DEF_TP(SIMD16 ,"simd16" , TYP_SIMD16, TI_STRUCT,16,16, 16, 4,16, VTF_S, TYPE_REF_STC)
+DEF_TP(SIMD32 ,"simd32" , TYP_SIMD32, TI_STRUCT,32,32, 32, 8,16, VTF_S, TYPE_REF_STC)
+#endif // FEATURE_SIMD
+
+DEF_TP(UNKNOWN ,"unknown" ,TYP_UNKNOWN, TI_ERROR, 0, 0, 0, 0, 0, VTF_ANY, 0 )
+// clang-format on
+
+#undef GCS
+#undef BRS
+#undef PS
+#undef PST
+#undef VTF_I32
+#undef VTF_I64
diff --git a/src/jit/unwind.cpp b/src/jit/unwind.cpp
new file mode 100644
index 0000000000..4568fed75a
--- /dev/null
+++ b/src/jit/unwind.cpp
@@ -0,0 +1,171 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX UnwindInfo XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+#if FEATURE_EH_FUNCLETS
+
+//------------------------------------------------------------------------
+// Compiler::unwindGetFuncLocations: Get the start/end emitter locations for this
+// function or funclet. If 'getHotSectionData' is true, get the start/end locations
+// for the hot section. Otherwise, get the data for the cold section.
+//
+// Note that we grab these locations before the prolog and epilogs are generated, so the
+// locations must remain correct after the prolog and epilogs are generated.
+//
+// For the prolog, instructions are put in the special, preallocated, prolog instruction group.
+// We don't want to expose the emitPrologIG unnecessarily (locations are actually pointers to
+// emitter instruction groups). Since we know the offset of the start of the function/funclet,
+// where the prolog is, will be zero, we use a nullptr start location to indicate that.
+//
+// There is no instruction group beyond the end of the end of the function, so there is no
+// location to indicate that. Once again, use nullptr for that.
+//
+// Intermediate locations point at the first instruction group of a funclet, which is a
+// placeholder IG. These are converted to real IGs, not deleted and replaced, so the location
+// remains valid.
+//
+// Arguments:
+// func - main function or funclet to get locations for.
+// getHotSectionData - 'true' to get the hot section data, 'false' to get the cold section data.
+// ppStartLoc - OUT parameter. Set to the start emitter location.
+// ppEndLoc - OUT parameter. Set to the end emitter location (the location immediately
+// the range; the 'end' location is not inclusive).
+//
+// Notes:
+// A start location of nullptr means the beginning of the code.
+// An end location of nullptr means the end of the code.
+//
+void Compiler::unwindGetFuncLocations(FuncInfoDsc* func,
+ bool getHotSectionData,
+ /* OUT */ emitLocation** ppStartLoc,
+ /* OUT */ emitLocation** ppEndLoc)
+{
+ if (func->funKind == FUNC_ROOT)
+ {
+ // Since all funclets are pulled out of line, the main code size is everything
+ // up to the first handler. If the function is hot/cold split, we need to get the
+ // appropriate sub-range.
+
+ if (getHotSectionData)
+ {
+ *ppStartLoc = nullptr; // nullptr emit location means the beginning of the code. This is to handle the first
+ // fragment prolog.
+
+ if (fgFirstColdBlock != nullptr)
+ {
+ // The hot section only goes up to the cold section
+ assert(fgFirstFuncletBB == nullptr);
+
+ *ppEndLoc = new (this, CMK_UnwindInfo) emitLocation(ehEmitCookie(fgFirstColdBlock));
+ }
+ else
+ {
+ if (fgFirstFuncletBB != nullptr)
+ {
+ *ppEndLoc = new (this, CMK_UnwindInfo) emitLocation(ehEmitCookie(fgFirstFuncletBB));
+ }
+ else
+ {
+ *ppEndLoc = nullptr; // nullptr end location means the end of the code
+ }
+ }
+ }
+ else
+ {
+ assert(fgFirstFuncletBB == nullptr); // TODO-CQ: support hot/cold splitting in functions with EH
+ assert(fgFirstColdBlock != nullptr); // There better be a cold section!
+
+ *ppStartLoc = new (this, CMK_UnwindInfo) emitLocation(ehEmitCookie(fgFirstColdBlock));
+ *ppEndLoc = nullptr; // nullptr end location means the end of the code
+ }
+ }
+ else
+ {
+ assert(getHotSectionData); // TODO-CQ: support funclets in cold section
+
+ EHblkDsc* HBtab = ehGetDsc(func->funEHIndex);
+
+ if (func->funKind == FUNC_FILTER)
+ {
+ assert(HBtab->HasFilter());
+ *ppStartLoc = new (this, CMK_UnwindInfo) emitLocation(ehEmitCookie(HBtab->ebdFilter));
+ *ppEndLoc = new (this, CMK_UnwindInfo) emitLocation(ehEmitCookie(HBtab->ebdHndBeg));
+ }
+ else
+ {
+ assert(func->funKind == FUNC_HANDLER);
+ *ppStartLoc = new (this, CMK_UnwindInfo) emitLocation(ehEmitCookie(HBtab->ebdHndBeg));
+ *ppEndLoc = (HBtab->ebdHndLast->bbNext == nullptr)
+ ? nullptr
+ : new (this, CMK_UnwindInfo) emitLocation(ehEmitCookie(HBtab->ebdHndLast->bbNext));
+ }
+ }
+}
+
+#endif // FEATURE_EH_FUNCLETS
+
+#if defined(_TARGET_AMD64_)
+
+// See unwindAmd64.cpp
+
+#elif defined(_TARGET_ARM64_)
+
+// See unwindArm64.cpp
+
+#elif defined(_TARGET_ARM_)
+
+// See unwindArm.cpp
+
+#elif defined(_TARGET_X86_)
+
+// Stub routines that do nothing
+void Compiler::unwindBegProlog()
+{
+}
+void Compiler::unwindEndProlog()
+{
+}
+void Compiler::unwindBegEpilog()
+{
+}
+void Compiler::unwindEndEpilog()
+{
+}
+void Compiler::unwindReserve()
+{
+}
+void Compiler::unwindEmit(void* pHotCode, void* pColdCode)
+{
+}
+void Compiler::unwindPush(regNumber reg)
+{
+}
+void Compiler::unwindAllocStack(unsigned size)
+{
+}
+void Compiler::unwindSetFrameReg(regNumber reg, unsigned offset)
+{
+}
+void Compiler::unwindSaveReg(regNumber reg, unsigned offset)
+{
+}
+
+#else // _TARGET_*
+
+#error Unsupported or unset target architecture
+
+#endif // _TARGET_*
diff --git a/src/jit/unwind.h b/src/jit/unwind.h
new file mode 100644
index 0000000000..27d23b1b54
--- /dev/null
+++ b/src/jit/unwind.h
@@ -0,0 +1,852 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX Unwind Info XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#ifdef _TARGET_ARMARCH_
+
+// Windows no longer imposes a maximum prolog size. However, we still have an
+// assert here just to inform us if we increase the size of the prolog
+// accidentally, as there is still a slight performance advantage in the
+// OS unwinder to having as few unwind codes as possible.
+// You can increase this "max" number if necessary.
+
+#if defined(_TARGET_ARM_)
+const unsigned MAX_PROLOG_SIZE_BYTES = 40;
+const unsigned MAX_EPILOG_SIZE_BYTES = 40;
+#define UWC_END 0xFF // "end" unwind code
+#define UW_MAX_FRAGMENT_SIZE_BYTES (1U << 19)
+#define UW_MAX_CODE_WORDS_COUNT 15 // Max number that can be encoded in the "Code Words" field of the .pdata record
+#define UW_MAX_EPILOG_START_INDEX 0xFFU // Max number that can be encoded in the "Epilog Start Index" field
+ // of the .pdata record
+#elif defined(_TARGET_ARM64_)
+const unsigned MAX_PROLOG_SIZE_BYTES = 100;
+const unsigned MAX_EPILOG_SIZE_BYTES = 100;
+#define UWC_END 0xE4 // "end" unwind code
+#define UWC_END_C 0xE5 // "end_c" unwind code
+#define UW_MAX_FRAGMENT_SIZE_BYTES (1U << 20)
+#define UW_MAX_CODE_WORDS_COUNT 31
+#define UW_MAX_EPILOG_START_INDEX 0x3FFU
+#endif // _TARGET_ARM64_
+
+#define UW_MAX_EPILOG_COUNT 31 // Max number that can be encoded in the "Epilog count" field
+ // of the .pdata record
+#define UW_MAX_EXTENDED_CODE_WORDS_COUNT 0xFFU // Max number that can be encoded in the "Extended Code Words"
+ // field of the .pdata record
+#define UW_MAX_EXTENDED_EPILOG_COUNT 0xFFFFU // Max number that can be encoded in the "Extended Epilog Count"
+ // field of the .pdata record
+#define UW_MAX_EPILOG_START_OFFSET 0x3FFFFU // Max number that can be encoded in the "Epilog Start Offset"
+ // field of the .pdata record
+
+//
+// Forward declaration of class defined in emit.h
+//
+
+class emitLocation;
+
+//
+// Forward declarations of classes defined in this file
+//
+
+class UnwindCodesBase;
+class UnwindPrologCodes;
+class UnwindEpilogCodes;
+class UnwindEpilogInfo;
+class UnwindFragmentInfo;
+class UnwindInfo;
+
+// UnwindBase: A base class shared by the the unwind classes that require
+// a Compiler* for memory allocation.
+
+class UnwindBase
+{
+protected:
+ UnwindBase(Compiler* comp) : uwiComp(comp)
+ {
+ }
+
+ UnwindBase()
+ {
+ }
+ ~UnwindBase()
+ {
+ }
+
+// TODO: How do we get the ability to access uwiComp without error on Clang?
+#if defined(DEBUG) && !defined(__GNUC__)
+
+ template <typename T>
+ T dspPtr(T p)
+ {
+ return uwiComp->dspPtr(p);
+ }
+
+ template <typename T>
+ T dspOffset(T o)
+ {
+ return uwiComp->dspOffset(o);
+ }
+
+ static char* dspBool(bool b)
+ {
+ return (b) ? "true" : "false";
+ }
+
+#endif // DEBUG
+
+ //
+ // Data
+ //
+
+ Compiler* uwiComp;
+};
+
+// UnwindCodesBase: A base class shared by the the classes used to represent the prolog
+// and epilog unwind codes.
+
+class UnwindCodesBase
+{
+public:
+ // Add a single unwind code.
+
+ virtual void AddCode(BYTE b1) = 0;
+ virtual void AddCode(BYTE b1, BYTE b2) = 0;
+ virtual void AddCode(BYTE b1, BYTE b2, BYTE b3) = 0;
+ virtual void AddCode(BYTE b1, BYTE b2, BYTE b3, BYTE b4) = 0;
+
+ // Get access to the unwind codes
+
+ virtual BYTE* GetCodes() = 0;
+
+ bool IsEndCode(BYTE b)
+ {
+#if defined(_TARGET_ARM_)
+ return b >= 0xFD;
+#elif defined(_TARGET_ARM64_)
+ return (b == UWC_END); // TODO-ARM64-Bug?: what about the "end_c" code?
+#endif // _TARGET_ARM64_
+ }
+
+#ifdef DEBUG
+
+ unsigned GetCodeSizeFromUnwindCodes(bool isProlog);
+
+#endif // DEBUG
+};
+
+// UnwindPrologCodes: represents the unwind codes for a prolog sequence.
+// Prolog unwind codes arrive in reverse order from how they will be emitted.
+// Store them as a stack, storing from the end of an array towards the beginning.
+// This class is also re-used as the final location of the consolidated unwind
+// information for a function, including unwind info header, the prolog codes,
+// and any epilog codes.
+
+class UnwindPrologCodes : public UnwindBase, public UnwindCodesBase
+{
+ // UPC_LOCAL_COUNT is the amount of memory local to this class. For ARM mscorlib.dll, the maximum size is 34.
+ // Here is a histogram of other interesting sizes:
+ // <=16 79%
+ // <=24 96%
+ // <=32 99%
+ // From this data, we choose to use 24.
+
+ static const int UPC_LOCAL_COUNT = 24;
+
+public:
+ UnwindPrologCodes(Compiler* comp)
+ : UnwindBase(comp)
+ , upcMem(upcMemLocal)
+ , upcMemSize(UPC_LOCAL_COUNT)
+ , upcCodeSlot(UPC_LOCAL_COUNT)
+ , upcHeaderSlot(-1)
+ , upcEpilogSlot(-1)
+ {
+ // Assume we've got a normal end code.
+ // Push four so we can generate an array that is a multiple of 4 bytes in size with the
+ // end codes (and padding) already in place. One is the end code for the prolog codes,
+ // three are end-of-array alignment padding.
+ PushByte(UWC_END);
+ PushByte(UWC_END);
+ PushByte(UWC_END);
+ PushByte(UWC_END);
+ }
+
+ //
+ // Implementation of UnwindCodesBase
+ //
+
+ virtual void AddCode(BYTE b1)
+ {
+ PushByte(b1);
+ }
+
+ virtual void AddCode(BYTE b1, BYTE b2)
+ {
+ PushByte(b2);
+ PushByte(b1);
+ }
+
+ virtual void AddCode(BYTE b1, BYTE b2, BYTE b3)
+ {
+ PushByte(b3);
+ PushByte(b2);
+ PushByte(b1);
+ }
+
+ virtual void AddCode(BYTE b1, BYTE b2, BYTE b3, BYTE b4)
+ {
+ PushByte(b4);
+ PushByte(b3);
+ PushByte(b2);
+ PushByte(b1);
+ }
+
+ // Return a pointer to the first unwind code byte
+ virtual BYTE* GetCodes()
+ {
+ assert(upcCodeSlot < upcMemSize); // There better be at least one code!
+ return &upcMem[upcCodeSlot];
+ }
+
+ ///////////////////////////////////////////////////////////////////////////
+
+ BYTE GetByte(int index)
+ {
+ assert(upcCodeSlot <= index && index < upcMemSize);
+ return upcMem[index];
+ }
+
+ // Push a single byte on the unwind code stack
+ void PushByte(BYTE b)
+ {
+ if (upcCodeSlot == 0)
+ {
+ // We've run out of space! Reallocate, and copy everything to a new array.
+ EnsureSize(upcMemSize + 1);
+ }
+
+ --upcCodeSlot;
+ noway_assert(0 <= upcCodeSlot && upcCodeSlot < upcMemSize);
+
+ upcMem[upcCodeSlot] = b;
+ }
+
+ // Return the size of the unwind codes, in bytes. The size is the exact size, not an aligned size.
+ // The size includes exactly one "end" code.
+ int Size()
+ {
+ // -3 because we put 4 "end" codes at the end in the constructor, and we shouldn't count that here
+ return upcMemSize - upcCodeSlot - 3;
+ }
+
+ void SetFinalSize(int headerBytes, int epilogBytes);
+
+ void AddHeaderWord(DWORD d);
+
+ void GetFinalInfo(/* OUT */ BYTE** ppUnwindBlock, /* OUT */ ULONG* pUnwindBlockSize);
+
+ // AppendEpilog: copy the epilog bytes to the next epilog bytes slot
+ void AppendEpilog(UnwindEpilogInfo* pEpi);
+
+ // Match the prolog codes to a set of epilog codes
+ int Match(UnwindEpilogInfo* pEpi);
+
+ // Copy the prolog codes from another prolog
+ void CopyFrom(UnwindPrologCodes* pCopyFrom);
+
+ UnwindPrologCodes()
+ {
+ }
+ ~UnwindPrologCodes()
+ {
+ }
+
+#ifdef DEBUG
+ void Dump(int indent = 0);
+#endif // DEBUG
+
+private:
+ void EnsureSize(int requiredSize);
+
+ // No copy constructor or operator=
+ UnwindPrologCodes(const UnwindPrologCodes& info);
+ UnwindPrologCodes& operator=(const UnwindPrologCodes&);
+
+ //
+ // Data
+ //
+
+ // To store the unwind codes, we first use a local array that should satisfy almost all cases.
+ // If there are more unwind codes, we dynamically allocate memory.
+ BYTE upcMemLocal[UPC_LOCAL_COUNT];
+ BYTE* upcMem;
+
+ // upcMemSize is the number of bytes in upcMem. This is equal to UPC_LOCAL_COUNT unless
+ // we've dynamically allocated memory to store the codes.
+ int upcMemSize;
+
+ // upcCodeSlot points to the last unwind code added to the array. The array is filled in from
+ // the end, so it starts pointing one beyond the array end.
+ int upcCodeSlot;
+
+ // upcHeaderSlot points to the last header byte prepended to the array. Headers bytes are
+ // filled in from the beginning, and only after SetFinalSize() is called.
+ int upcHeaderSlot;
+
+ // upcEpilogSlot points to the next epilog location to fill
+ int upcEpilogSlot;
+
+ // upcUnwindBlockSlot is only set after SetFinalSize() is called. It is the index of the first
+ // byte of the final unwind data, namely the first byte of the header.
+ int upcUnwindBlockSlot;
+};
+
+// UnwindEpilogCodes: represents the unwind codes for a single epilog sequence.
+// Epilog unwind codes arrive in the order they will be emitted. Store them as an array,
+// adding new ones to the end of the array.
+
+class UnwindEpilogCodes : public UnwindBase, public UnwindCodesBase
+{
+ // UEC_LOCAL_COUNT is the amount of memory local to this class. For ARM mscorlib.dll, the maximum size is 6,
+ // while 89% of epilogs fit in 4. So, set it to 4 to maintain array alignment and hit most cases.
+ static const int UEC_LOCAL_COUNT = 4;
+
+public:
+ UnwindEpilogCodes(Compiler* comp)
+ : UnwindBase(comp), uecMem(uecMemLocal), uecMemSize(UEC_LOCAL_COUNT), uecCodeSlot(-1), uecFinalized(false)
+ {
+ }
+
+ //
+ // Implementation of UnwindCodesBase
+ //
+
+ virtual void AddCode(BYTE b1)
+ {
+ AppendByte(b1);
+ }
+
+ virtual void AddCode(BYTE b1, BYTE b2)
+ {
+ AppendByte(b1);
+ AppendByte(b2);
+ }
+
+ virtual void AddCode(BYTE b1, BYTE b2, BYTE b3)
+ {
+ AppendByte(b1);
+ AppendByte(b2);
+ AppendByte(b3);
+ }
+
+ virtual void AddCode(BYTE b1, BYTE b2, BYTE b3, BYTE b4)
+ {
+ AppendByte(b1);
+ AppendByte(b2);
+ AppendByte(b3);
+ AppendByte(b4);
+ }
+
+ // Return a pointer to the first unwind code byte
+ virtual BYTE* GetCodes()
+ {
+ assert(uecFinalized);
+
+ // Codes start at the beginning
+ return uecMem;
+ }
+
+ ///////////////////////////////////////////////////////////////////////////
+
+ BYTE GetByte(int index)
+ {
+ assert(0 <= index && index <= uecCodeSlot);
+ return uecMem[index];
+ }
+
+ // Add a single byte on the unwind code array
+ void AppendByte(BYTE b)
+ {
+ if (uecCodeSlot == uecMemSize - 1)
+ {
+ // We've run out of space! Reallocate, and copy everything to a new array.
+ EnsureSize(uecMemSize + 1);
+ }
+
+ ++uecCodeSlot;
+ noway_assert(0 <= uecCodeSlot && uecCodeSlot < uecMemSize);
+
+ uecMem[uecCodeSlot] = b;
+ }
+
+ // Return the size of the unwind codes, in bytes. The size is the exact size, not an aligned size.
+ int Size()
+ {
+ if (uecFinalized)
+ {
+ // Add one because uecCodeSlot is 0-based
+ return uecCodeSlot + 1;
+ }
+ else
+ {
+ // Add one because uecCodeSlot is 0-based, and one for an "end" code that isn't stored (yet).
+ return uecCodeSlot + 2;
+ }
+ }
+
+ void FinalizeCodes()
+ {
+ assert(!uecFinalized);
+ noway_assert(0 <= uecCodeSlot && uecCodeSlot < uecMemSize); // There better be at least one code!
+ BYTE lastCode = uecMem[uecCodeSlot];
+ if (!IsEndCode(lastCode)) // If the last code is an end code, we don't need to append one.
+ {
+ AppendByte(UWC_END); // Add a default "end" code to the end of the array of unwind codes
+ }
+ uecFinalized = true; // With the "end" code in place, now we're done
+
+#ifdef DEBUG
+ unsigned codeSize = GetCodeSizeFromUnwindCodes(false);
+ assert(codeSize <= MAX_EPILOG_SIZE_BYTES);
+#endif // DEBUG
+ }
+
+ UnwindEpilogCodes()
+ {
+ }
+ ~UnwindEpilogCodes()
+ {
+ }
+
+#ifdef DEBUG
+ void Dump(int indent = 0);
+#endif // DEBUG
+
+private:
+ void EnsureSize(int requiredSize);
+
+ // No destructor, copy constructor or operator=
+ UnwindEpilogCodes(const UnwindEpilogCodes& info);
+ UnwindEpilogCodes& operator=(const UnwindEpilogCodes&);
+
+ //
+ // Data
+ //
+
+ // To store the unwind codes, we first use a local array that should satisfy almost all cases.
+ // If there are more unwind codes, we dynamically allocate memory.
+ BYTE uecMemLocal[UEC_LOCAL_COUNT];
+ BYTE* uecMem;
+
+ // uecMemSize is the number of bytes/slots in uecMem. This is equal to UEC_LOCAL_COUNT unless
+ // we've dynamically allocated memory to store the codes.
+ int uecMemSize;
+
+ // uecCodeSlot points to the last unwind code added to the array. The array is filled in from
+ // the beginning, so it starts at -1.
+ int uecCodeSlot;
+
+ // Is the unwind information finalized? Finalized info has an end code appended.
+ bool uecFinalized;
+};
+
+// UnwindEpilogInfo: represents the unwind information for a single epilog sequence. Epilogs for a
+// single function/funclet are in a linked list.
+
+class UnwindEpilogInfo : public UnwindBase
+{
+ friend class UnwindFragmentInfo;
+
+ static const unsigned EPI_ILLEGAL_OFFSET = 0xFFFFFFFF;
+
+public:
+ UnwindEpilogInfo(Compiler* comp)
+ : UnwindBase(comp)
+ , epiNext(NULL)
+ , epiEmitLocation(NULL)
+ , epiCodes(comp)
+ , epiStartOffset(EPI_ILLEGAL_OFFSET)
+ , epiMatches(false)
+ , epiStartIndex(-1)
+ {
+ }
+
+ void CaptureEmitLocation();
+
+ void FinalizeOffset();
+
+ void FinalizeCodes()
+ {
+ epiCodes.FinalizeCodes();
+ }
+
+ UNATIVE_OFFSET GetStartOffset()
+ {
+ assert(epiStartOffset != EPI_ILLEGAL_OFFSET);
+ return epiStartOffset;
+ }
+
+ int GetStartIndex()
+ {
+ assert(epiStartIndex != -1);
+ return epiStartIndex; // The final "Epilog Start Index" of this epilog's unwind codes
+ }
+
+ void SetStartIndex(int index)
+ {
+ assert(epiStartIndex == -1);
+ epiStartIndex = (int)index;
+ }
+
+ void SetMatches()
+ {
+ epiMatches = true;
+ }
+
+ bool Matches()
+ {
+ return epiMatches;
+ }
+
+ // Size of epilog unwind codes in bytes
+ int Size()
+ {
+ return epiCodes.Size();
+ }
+
+ // Return a pointer to the first unwind code byte
+ BYTE* GetCodes()
+ {
+ return epiCodes.GetCodes();
+ }
+
+ // Match the codes to a set of epilog codes
+ int Match(UnwindEpilogInfo* pEpi);
+
+ UnwindEpilogInfo()
+ {
+ }
+ ~UnwindEpilogInfo()
+ {
+ }
+
+#ifdef DEBUG
+ void Dump(int indent = 0);
+#endif // DEBUG
+
+private:
+ // No copy constructor or operator=
+ UnwindEpilogInfo(const UnwindEpilogInfo& info);
+ UnwindEpilogInfo& operator=(const UnwindEpilogInfo&);
+
+ //
+ // Data
+ //
+
+ UnwindEpilogInfo* epiNext;
+ emitLocation* epiEmitLocation; // The emitter location of the beginning of the epilog
+ UnwindEpilogCodes epiCodes;
+ UNATIVE_OFFSET epiStartOffset; // Actual offset of the epilog, in bytes, from the start of the function. Set in
+ // FinalizeOffset().
+ bool epiMatches; // Do the epilog unwind codes match some other set of codes? If so, we don't copy these to the
+ // final set; we just point to another set.
+ int epiStartIndex; // The final "Epilog Start Index" of this epilog's unwind codes
+};
+
+// UnwindFragmentInfo: represents all the unwind information for a single fragment of a function or funclet.
+// A fragment is a section with a code size less than the maximum unwind code size: either 512K bytes, or
+// that specified by COMPlus_JitSplitFunctionSize. In most cases, there will be exactly one fragment.
+
+class UnwindFragmentInfo : public UnwindBase
+{
+ friend class UnwindInfo;
+
+ static const unsigned UFI_ILLEGAL_OFFSET = 0xFFFFFFFF;
+
+public:
+ UnwindFragmentInfo(Compiler* comp, emitLocation* emitLoc, bool hasPhantomProlog);
+
+ void FinalizeOffset();
+
+ UNATIVE_OFFSET GetStartOffset()
+ {
+ assert(ufiStartOffset != UFI_ILLEGAL_OFFSET);
+ return ufiStartOffset;
+ }
+
+ // Add an unwind code. It could be for a prolog, or for the current epilog.
+ // A single unwind code can be from 1 to 4 bytes.
+
+ void AddCode(BYTE b1)
+ {
+ assert(ufiInitialized == UFI_INITIALIZED_PATTERN);
+ ufiCurCodes->AddCode(b1);
+ }
+
+ void AddCode(BYTE b1, BYTE b2)
+ {
+ assert(ufiInitialized == UFI_INITIALIZED_PATTERN);
+ ufiCurCodes->AddCode(b1, b2);
+ }
+
+ void AddCode(BYTE b1, BYTE b2, BYTE b3)
+ {
+ assert(ufiInitialized == UFI_INITIALIZED_PATTERN);
+ ufiCurCodes->AddCode(b1, b2, b3);
+ }
+
+ void AddCode(BYTE b1, BYTE b2, BYTE b3, BYTE b4)
+ {
+ assert(ufiInitialized == UFI_INITIALIZED_PATTERN);
+ ufiCurCodes->AddCode(b1, b2, b3, b4);
+ }
+
+ unsigned EpilogCount()
+ {
+ unsigned count = 0;
+ for (UnwindEpilogInfo* pEpi = ufiEpilogList; pEpi != NULL; pEpi = pEpi->epiNext)
+ {
+ ++count;
+ }
+ return count;
+ }
+
+ void AddEpilog();
+
+ void MergeCodes();
+
+ void CopyPrologCodes(UnwindFragmentInfo* pCopyFrom);
+
+ void SplitEpilogCodes(emitLocation* emitLoc, UnwindFragmentInfo* pSplitFrom);
+
+ bool IsAtFragmentEnd(UnwindEpilogInfo* pEpi);
+
+ // Return the full, final size of unwind block. This will be used to allocate memory for
+ // the unwind block. This is called before the code offsets are finalized.
+ // Size is in bytes.
+ ULONG Size()
+ {
+ assert(ufiSize != 0);
+ return ufiSize;
+ }
+
+ void Finalize(UNATIVE_OFFSET functionLength);
+
+ // GetFinalInfo: return a pointer to the final unwind info to hand to the VM, and the size of this info in bytes
+ void GetFinalInfo(/* OUT */ BYTE** ppUnwindBlock, /* OUT */ ULONG* pUnwindBlockSize)
+ {
+ ufiPrologCodes.GetFinalInfo(ppUnwindBlock, pUnwindBlockSize);
+ }
+
+ void Reserve(BOOL isFunclet, bool isHotCode);
+
+ void Allocate(
+ CorJitFuncKind funKind, void* pHotCode, void* pColdCode, UNATIVE_OFFSET funcEndOffset, bool isHotCode);
+
+ UnwindFragmentInfo()
+ {
+ }
+ ~UnwindFragmentInfo()
+ {
+ }
+
+#ifdef DEBUG
+ void Dump(int indent = 0);
+#endif // DEBUG
+
+private:
+ // No copy constructor or operator=
+ UnwindFragmentInfo(const UnwindFragmentInfo& info);
+ UnwindFragmentInfo& operator=(const UnwindFragmentInfo&);
+
+ //
+ // Data
+ //
+
+ UnwindFragmentInfo* ufiNext; // The next fragment
+ emitLocation* ufiEmitLoc; // Emitter location for start of fragment
+ bool ufiHasPhantomProlog; // Are the prolog codes for a phantom prolog, or a real prolog?
+ // (For a phantom prolog, this code fragment represents a fragment in
+ // the sense of the unwind info spec; something without a real prolog.)
+ UnwindPrologCodes ufiPrologCodes; // The unwind codes for the prolog
+ UnwindEpilogInfo ufiEpilogFirst; // In-line the first epilog to avoid separate memory allocation, since
+ // almost all functions will have at least one epilog. It is pointed
+ // to by ufiEpilogList when the first epilog is added.
+ UnwindEpilogInfo* ufiEpilogList; // The head of the epilog list
+ UnwindEpilogInfo* ufiEpilogLast; // The last entry in the epilog list (the last epilog added)
+ UnwindCodesBase* ufiCurCodes; // Pointer to current unwind codes, either prolog or epilog
+
+ // Some data computed when merging the unwind codes, and used when finalizing the
+ // unwind block for emission.
+ unsigned ufiSize; // The size of the unwind data for this fragment, in bytes
+ bool ufiSetEBit;
+ bool ufiNeedExtendedCodeWordsEpilogCount;
+ unsigned ufiCodeWords;
+ unsigned ufiEpilogScopes;
+ UNATIVE_OFFSET ufiStartOffset;
+
+#ifdef DEBUG
+
+ unsigned ufiNum;
+
+ // Are we processing the prolog? The prolog must come first, followed by a (possibly empty)
+ // set of epilogs, for this function/funclet.
+ bool ufiInProlog;
+
+ static const unsigned UFI_INITIALIZED_PATTERN = 0x0FACADE0; // Something unlikely to be the fill pattern for
+ // uninitialized memory
+ unsigned ufiInitialized;
+
+#endif // DEBUG
+};
+
+// UnwindInfo: represents all the unwind information for a single function or funclet
+
+class UnwindInfo : public UnwindBase
+{
+public:
+ void InitUnwindInfo(Compiler* comp, emitLocation* startLoc, emitLocation* endLoc);
+
+ void HotColdSplitCodes(UnwindInfo* puwi);
+
+ // The following act on all the fragments that make up the unwind info for this function or funclet.
+
+ void Split();
+
+ static void EmitSplitCallback(void* context, emitLocation* emitLoc);
+
+ void Reserve(BOOL isFunclet, bool isHotCode);
+
+ void Allocate(CorJitFuncKind funKind, void* pHotCode, void* pColdCode, bool isHotCode);
+
+ // The following act on the current fragment (the one pointed to by 'uwiFragmentLast').
+
+ // Add an unwind code. It could be for a prolog, or for the current epilog.
+ // A single unwind code can be from 1 to 4 bytes.
+
+ void AddCode(BYTE b1)
+ {
+ assert(uwiInitialized == UWI_INITIALIZED_PATTERN);
+ assert(uwiFragmentLast != NULL);
+ INDEBUG(CheckOpsize(b1));
+
+ uwiFragmentLast->AddCode(b1);
+ CaptureLocation();
+ }
+
+ void AddCode(BYTE b1, BYTE b2)
+ {
+ assert(uwiInitialized == UWI_INITIALIZED_PATTERN);
+ assert(uwiFragmentLast != NULL);
+ INDEBUG(CheckOpsize(b1));
+
+ uwiFragmentLast->AddCode(b1, b2);
+ CaptureLocation();
+ }
+
+ void AddCode(BYTE b1, BYTE b2, BYTE b3)
+ {
+ assert(uwiInitialized == UWI_INITIALIZED_PATTERN);
+ assert(uwiFragmentLast != NULL);
+ INDEBUG(CheckOpsize(b1));
+
+ uwiFragmentLast->AddCode(b1, b2, b3);
+ CaptureLocation();
+ }
+
+ void AddCode(BYTE b1, BYTE b2, BYTE b3, BYTE b4)
+ {
+ assert(uwiInitialized == UWI_INITIALIZED_PATTERN);
+ assert(uwiFragmentLast != NULL);
+ INDEBUG(CheckOpsize(b1));
+
+ uwiFragmentLast->AddCode(b1, b2, b3, b4);
+ CaptureLocation();
+ }
+
+ void AddEpilog();
+
+ emitLocation* GetCurrentEmitterLocation()
+ {
+ return uwiCurLoc;
+ }
+
+#if defined(_TARGET_ARM_)
+ unsigned GetInstructionSize();
+#endif // defined(_TARGET_ARM_)
+
+ void CaptureLocation();
+
+ UnwindInfo()
+ {
+ }
+ ~UnwindInfo()
+ {
+ }
+
+#ifdef DEBUG
+
+#if defined(_TARGET_ARM_)
+ // Given the first byte of the unwind code, check that its opsize matches
+ // the last instruction added in the emitter.
+ void CheckOpsize(BYTE b1);
+#elif defined(_TARGET_ARM64_)
+ void CheckOpsize(BYTE b1)
+ {
+ } // nothing to do; all instructions are 4 bytes
+#endif // defined(_TARGET_ARM64_)
+
+ void Dump(bool isHotCode, int indent = 0);
+
+ bool uwiAddingNOP;
+
+#endif // DEBUG
+
+private:
+ void AddFragment(emitLocation* emitLoc);
+
+ // No copy constructor or operator=
+ UnwindInfo(const UnwindInfo& info);
+ UnwindInfo& operator=(const UnwindInfo&);
+
+ //
+ // Data
+ //
+
+ UnwindFragmentInfo uwiFragmentFirst; // The first fragment is directly here, so it doesn't need to be separately
+ // allocated.
+ UnwindFragmentInfo* uwiFragmentLast; // The last entry in the fragment list (the last fragment added)
+ emitLocation* uwiEndLoc; // End emitter location of this function/funclet (NULL == end of all code)
+ emitLocation* uwiCurLoc; // The current emitter location (updated after an unwind code is added), used for NOP
+ // padding, and asserts.
+
+#ifdef DEBUG
+
+ static const unsigned UWI_INITIALIZED_PATTERN = 0x0FACADE1; // Something unlikely to be the fill pattern for
+ // uninitialized memory
+ unsigned uwiInitialized;
+
+#endif // DEBUG
+};
+
+#ifdef DEBUG
+
+// Forward declaration
+void DumpUnwindInfo(Compiler* comp,
+ bool isHotCode,
+ UNATIVE_OFFSET startOffset,
+ UNATIVE_OFFSET endOffset,
+ const BYTE* const pHeader,
+ ULONG unwindBlockSize);
+
+#endif // DEBUG
+
+#endif // _TARGET_ARMARCH_
diff --git a/src/jit/unwindamd64.cpp b/src/jit/unwindamd64.cpp
new file mode 100644
index 0000000000..89abdff2b3
--- /dev/null
+++ b/src/jit/unwindamd64.cpp
@@ -0,0 +1,1056 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX UnwindInfo XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+#if defined(_TARGET_AMD64_)
+#ifdef UNIX_AMD64_ABI
+int Compiler::mapRegNumToDwarfReg(regNumber reg)
+{
+ int dwarfReg = DWARF_REG_ILLEGAL;
+
+ switch (reg)
+ {
+ case REG_RAX:
+ dwarfReg = 0;
+ break;
+ case REG_RCX:
+ dwarfReg = 2;
+ break;
+ case REG_RDX:
+ dwarfReg = 1;
+ break;
+ case REG_RBX:
+ dwarfReg = 3;
+ break;
+ case REG_RSP:
+ dwarfReg = 7;
+ break;
+ case REG_RBP:
+ dwarfReg = 6;
+ break;
+ case REG_RSI:
+ dwarfReg = 4;
+ break;
+ case REG_RDI:
+ dwarfReg = 5;
+ break;
+ case REG_R8:
+ dwarfReg = 8;
+ break;
+ case REG_R9:
+ dwarfReg = 9;
+ break;
+ case REG_R10:
+ dwarfReg = 10;
+ break;
+ case REG_R11:
+ dwarfReg = 11;
+ break;
+ case REG_R12:
+ dwarfReg = 12;
+ break;
+ case REG_R13:
+ dwarfReg = 13;
+ break;
+ case REG_R14:
+ dwarfReg = 14;
+ break;
+ case REG_R15:
+ dwarfReg = 15;
+ break;
+ case REG_XMM0:
+ dwarfReg = 17;
+ break;
+ case REG_XMM1:
+ dwarfReg = 18;
+ break;
+ case REG_XMM2:
+ dwarfReg = 19;
+ break;
+ case REG_XMM3:
+ dwarfReg = 20;
+ break;
+ case REG_XMM4:
+ dwarfReg = 21;
+ break;
+ case REG_XMM5:
+ dwarfReg = 22;
+ break;
+ case REG_XMM6:
+ dwarfReg = 23;
+ break;
+ case REG_XMM7:
+ dwarfReg = 24;
+ break;
+ case REG_XMM8:
+ dwarfReg = 25;
+ break;
+ case REG_XMM9:
+ dwarfReg = 26;
+ break;
+ case REG_XMM10:
+ dwarfReg = 27;
+ break;
+ case REG_XMM11:
+ dwarfReg = 28;
+ break;
+ case REG_XMM12:
+ dwarfReg = 29;
+ break;
+ case REG_XMM13:
+ dwarfReg = 30;
+ break;
+ case REG_XMM14:
+ dwarfReg = 31;
+ break;
+ case REG_XMM15:
+ dwarfReg = 32;
+ break;
+ default:
+ noway_assert(!"unexpected REG_NUM");
+ }
+
+ return dwarfReg;
+}
+
+void Compiler::createCfiCode(FuncInfoDsc* func, UCHAR codeOffset, UCHAR cfiOpcode, USHORT dwarfReg, INT offset)
+{
+ CFI_CODE cfiEntry(codeOffset, cfiOpcode, dwarfReg, offset);
+ func->cfiCodes->push_back(cfiEntry);
+}
+#endif // UNIX_AMD64_ABI
+
+//------------------------------------------------------------------------
+// Compiler::unwindGetCurrentOffset: Calculate the current byte offset of the
+// prolog being generated.
+//
+// Arguments:
+// func - The main function or funclet of interest.
+//
+// Return Value:
+// The byte offset of the prolog currently being generated.
+//
+UNATIVE_OFFSET Compiler::unwindGetCurrentOffset(FuncInfoDsc* func)
+{
+ assert(compGeneratingProlog);
+ UNATIVE_OFFSET offset;
+ if (func->funKind == FUNC_ROOT)
+ {
+ offset = genEmitter->emitGetPrologOffsetEstimate();
+ }
+ else
+ {
+ assert(func->startLoc != nullptr);
+ offset = func->startLoc->GetFuncletPrologOffset(genEmitter);
+ }
+
+ return offset;
+}
+
+//------------------------------------------------------------------------
+// Compiler::unwindBegProlog: Initialize the unwind info data structures.
+// Called at the beginning of main function or funclet prolog generation.
+//
+void Compiler::unwindBegProlog()
+{
+#ifdef UNIX_AMD64_ABI
+ if (generateCFIUnwindCodes())
+ {
+ unwindBegPrologCFI();
+ }
+ else
+#endif // UNIX_AMD64_ABI
+ {
+ unwindBegPrologWindows();
+ }
+}
+
+void Compiler::unwindBegPrologWindows()
+{
+ assert(compGeneratingProlog);
+
+ FuncInfoDsc* func = funCurrentFunc();
+
+ // There is only one prolog for a function/funclet, and it comes first. So now is
+ // a good time to initialize all the unwind data structures.
+
+ unwindGetFuncLocations(func, true, &func->startLoc, &func->endLoc);
+
+ if (fgFirstColdBlock != nullptr)
+ {
+ unwindGetFuncLocations(func, false, &func->coldStartLoc, &func->coldEndLoc);
+ }
+
+ func->unwindCodeSlot = sizeof(func->unwindCodes);
+ func->unwindHeader.Version = 1;
+ func->unwindHeader.Flags = 0;
+ func->unwindHeader.CountOfUnwindCodes = 0;
+ func->unwindHeader.FrameRegister = 0;
+ func->unwindHeader.FrameOffset = 0;
+}
+
+#ifdef UNIX_AMD64_ABI
+template <typename T>
+inline static T* allocate_any(jitstd::allocator<void>& alloc, size_t count = 5)
+{
+ return jitstd::allocator<T>(alloc).allocate(count);
+}
+typedef jitstd::vector<CFI_CODE> CFICodeVector;
+
+void Compiler::unwindBegPrologCFI()
+{
+ assert(compGeneratingProlog);
+
+ FuncInfoDsc* func = funCurrentFunc();
+
+ // There is only one prolog for a function/funclet, and it comes first. So now is
+ // a good time to initialize all the unwind data structures.
+
+ unwindGetFuncLocations(func, true, &func->startLoc, &func->endLoc);
+
+ if (fgFirstColdBlock != nullptr)
+ {
+ unwindGetFuncLocations(func, false, &func->coldStartLoc, &func->coldEndLoc);
+ }
+
+ jitstd::allocator<void> allocator(getAllocator());
+
+ func->cfiCodes = new (allocate_any<CFICodeVector>(allocator), jitstd::placement_t()) CFICodeVector(allocator);
+}
+#endif // UNIX_AMD64_ABI
+
+//------------------------------------------------------------------------
+// Compiler::unwindEndProlog: Called at the end of main function or funclet
+// prolog generation to indicate there is no more unwind information for this prolog.
+//
+void Compiler::unwindEndProlog()
+{
+ assert(compGeneratingProlog);
+}
+
+//------------------------------------------------------------------------
+// Compiler::unwindBegEpilog: Called at the beginning of main function or funclet
+// epilog generation.
+//
+void Compiler::unwindBegEpilog()
+{
+ assert(compGeneratingEpilog);
+}
+
+//------------------------------------------------------------------------
+// Compiler::unwindEndEpilog: Called at the end of main function or funclet
+// epilog generation.
+//
+void Compiler::unwindEndEpilog()
+{
+ assert(compGeneratingEpilog);
+}
+
+//------------------------------------------------------------------------
+// Compiler::unwindPush: Record a push/save of a register.
+//
+// Arguments:
+// reg - The register being pushed/saved.
+//
+void Compiler::unwindPush(regNumber reg)
+{
+#ifdef UNIX_AMD64_ABI
+ if (generateCFIUnwindCodes())
+ {
+ unwindPushCFI(reg);
+ }
+ else
+#endif // UNIX_AMD64_ABI
+ {
+ unwindPushWindows(reg);
+ }
+}
+
+void Compiler::unwindPushWindows(regNumber reg)
+{
+ assert(compGeneratingProlog);
+
+ FuncInfoDsc* func = funCurrentFunc();
+
+ assert(func->unwindHeader.Version == 1); // Can't call this before unwindBegProlog
+ assert(func->unwindHeader.CountOfUnwindCodes == 0); // Can't call this after unwindReserve
+ assert(func->unwindCodeSlot > sizeof(UNWIND_CODE));
+ UNWIND_CODE* code = (UNWIND_CODE*)&func->unwindCodes[func->unwindCodeSlot -= sizeof(UNWIND_CODE)];
+ unsigned int cbProlog = unwindGetCurrentOffset(func);
+ noway_assert((BYTE)cbProlog == cbProlog);
+ code->CodeOffset = (BYTE)cbProlog;
+
+ if ((RBM_CALLEE_SAVED & genRegMask(reg))
+#if ETW_EBP_FRAMED
+ // In case of ETW_EBP_FRAMED defined the REG_FPBASE (RBP)
+ // is excluded from the callee-save register list.
+ // Make sure the register gets PUSH unwind info in this case,
+ // since it is pushed as a frame register.
+ || (reg == REG_FPBASE)
+#endif // ETW_EBP_FRAMED
+ )
+ {
+ code->UnwindOp = UWOP_PUSH_NONVOL;
+ code->OpInfo = (BYTE)reg;
+ }
+ else
+ {
+ // Push of a volatile register is just a small stack allocation
+ code->UnwindOp = UWOP_ALLOC_SMALL;
+ code->OpInfo = 0;
+ }
+}
+
+#ifdef UNIX_AMD64_ABI
+void Compiler::unwindPushCFI(regNumber reg)
+{
+ assert(compGeneratingProlog);
+
+ FuncInfoDsc* func = funCurrentFunc();
+
+ unsigned int cbProlog = unwindGetCurrentOffset(func);
+ noway_assert((BYTE)cbProlog == cbProlog);
+
+ createCfiCode(func, cbProlog, CFI_ADJUST_CFA_OFFSET, DWARF_REG_ILLEGAL, 8);
+ if ((RBM_CALLEE_SAVED & genRegMask(reg))
+#if ETW_EBP_FRAMED
+ // In case of ETW_EBP_FRAMED defined the REG_FPBASE (RBP)
+ // is excluded from the callee-save register list.
+ // Make sure the register gets PUSH unwind info in this case,
+ // since it is pushed as a frame register.
+ || (reg == REG_FPBASE)
+#endif // ETW_EBP_FRAMED
+ )
+ {
+ createCfiCode(func, cbProlog, CFI_REL_OFFSET, mapRegNumToDwarfReg(reg));
+ }
+}
+#endif // UNIX_AMD64_ABI
+
+//------------------------------------------------------------------------
+// Compiler::unwindAllocStack: Record a stack frame allocation (sub sp, X).
+//
+// Arguments:
+// size - The size of the stack frame allocation (the amount subtracted from the stack pointer).
+//
+void Compiler::unwindAllocStack(unsigned size)
+{
+#ifdef UNIX_AMD64_ABI
+ if (generateCFIUnwindCodes())
+ {
+ unwindAllocStackCFI(size);
+ }
+ else
+#endif // UNIX_AMD64_ABI
+ {
+ unwindAllocStackWindows(size);
+ }
+}
+
+void Compiler::unwindAllocStackWindows(unsigned size)
+{
+ assert(compGeneratingProlog);
+
+ FuncInfoDsc* func = funCurrentFunc();
+
+ assert(func->unwindHeader.Version == 1); // Can't call this before unwindBegProlog
+ assert(func->unwindHeader.CountOfUnwindCodes == 0); // Can't call this after unwindReserve
+ assert(size % 8 == 0); // Stack size is *always* 8 byte aligned
+ UNWIND_CODE* code;
+ if (size <= 128)
+ {
+ assert(func->unwindCodeSlot > sizeof(UNWIND_CODE));
+ code = (UNWIND_CODE*)&func->unwindCodes[func->unwindCodeSlot -= sizeof(UNWIND_CODE)];
+ code->UnwindOp = UWOP_ALLOC_SMALL;
+ code->OpInfo = (size - 8) / 8;
+ }
+ else if (size <= 0x7FFF8)
+ {
+ assert(func->unwindCodeSlot > (sizeof(UNWIND_CODE) + sizeof(USHORT)));
+ USHORT* codedSize = (USHORT*)&func->unwindCodes[func->unwindCodeSlot -= sizeof(USHORT)];
+ *codedSize = (USHORT)(size / 8);
+ code = (UNWIND_CODE*)&func->unwindCodes[func->unwindCodeSlot -= sizeof(UNWIND_CODE)];
+ code->UnwindOp = UWOP_ALLOC_LARGE;
+ code->OpInfo = 0;
+ }
+ else
+ {
+ assert(func->unwindCodeSlot > (sizeof(UNWIND_CODE) + sizeof(ULONG)));
+ ULONG* codedSize = (ULONG*)&func->unwindCodes[func->unwindCodeSlot -= sizeof(ULONG)];
+ *codedSize = size;
+ code = (UNWIND_CODE*)&func->unwindCodes[func->unwindCodeSlot -= sizeof(UNWIND_CODE)];
+ code->UnwindOp = UWOP_ALLOC_LARGE;
+ code->OpInfo = 1;
+ }
+ unsigned int cbProlog = unwindGetCurrentOffset(func);
+ noway_assert((BYTE)cbProlog == cbProlog);
+ code->CodeOffset = (BYTE)cbProlog;
+}
+
+#ifdef UNIX_AMD64_ABI
+void Compiler::unwindAllocStackCFI(unsigned size)
+{
+ assert(compGeneratingProlog);
+
+ FuncInfoDsc* func = funCurrentFunc();
+
+ unsigned int cbProlog = unwindGetCurrentOffset(func);
+ noway_assert((BYTE)cbProlog == cbProlog);
+ createCfiCode(func, cbProlog, CFI_ADJUST_CFA_OFFSET, DWARF_REG_ILLEGAL, size);
+}
+#endif // UNIX_AMD64_ABI
+
+//------------------------------------------------------------------------
+// Compiler::unwindSetFrameReg: Record a frame register.
+//
+// Arguments:
+// reg - The register being set as the frame register.
+// offset - The offset from the current stack pointer that the frame pointer will point at.
+//
+void Compiler::unwindSetFrameReg(regNumber reg, unsigned offset)
+{
+#ifdef UNIX_AMD64_ABI
+ if (generateCFIUnwindCodes())
+ {
+ unwindSetFrameRegCFI(reg, offset);
+ }
+ else
+#endif // UNIX_AMD64_ABI
+ {
+ unwindSetFrameRegWindows(reg, offset);
+ }
+}
+
+void Compiler::unwindSetFrameRegWindows(regNumber reg, unsigned offset)
+{
+ assert(compGeneratingProlog);
+
+ FuncInfoDsc* func = funCurrentFunc();
+
+ assert(func->unwindHeader.Version == 1); // Can't call this before unwindBegProlog
+ assert(func->unwindHeader.CountOfUnwindCodes == 0); // Can't call this after unwindReserve
+ unsigned int cbProlog = unwindGetCurrentOffset(func);
+ noway_assert((BYTE)cbProlog == cbProlog);
+
+ func->unwindHeader.FrameRegister = (BYTE)reg;
+
+#ifdef PLATFORM_UNIX
+ if (offset > 240)
+ {
+ // On Unix only, we have a CLR-only extension to the AMD64 unwind codes: UWOP_SET_FPREG_LARGE.
+ // It has a 32-bit offset (scaled). You must set UNWIND_INFO.FrameOffset to 15. The 32-bit
+ // offset follows in 2 UNWIND_CODE fields.
+
+ assert(func->unwindCodeSlot > (sizeof(UNWIND_CODE) + sizeof(ULONG)));
+ ULONG* codedSize = (ULONG*)&func->unwindCodes[func->unwindCodeSlot -= sizeof(ULONG)];
+ assert(offset % 16 == 0);
+ *codedSize = offset / 16;
+
+ UNWIND_CODE* code = (UNWIND_CODE*)&func->unwindCodes[func->unwindCodeSlot -= sizeof(UNWIND_CODE)];
+ code->CodeOffset = (BYTE)cbProlog;
+ code->OpInfo = 0;
+ code->UnwindOp = UWOP_SET_FPREG_LARGE;
+ func->unwindHeader.FrameOffset = 15;
+ }
+ else
+#endif // PLATFORM_UNIX
+ {
+ assert(func->unwindCodeSlot > sizeof(UNWIND_CODE));
+ UNWIND_CODE* code = (UNWIND_CODE*)&func->unwindCodes[func->unwindCodeSlot -= sizeof(UNWIND_CODE)];
+ code->CodeOffset = (BYTE)cbProlog;
+ code->OpInfo = 0;
+ code->UnwindOp = UWOP_SET_FPREG;
+ assert(offset <= 240);
+ assert(offset % 16 == 0);
+ func->unwindHeader.FrameOffset = offset / 16;
+ }
+}
+
+#ifdef UNIX_AMD64_ABI
+void Compiler::unwindSetFrameRegCFI(regNumber reg, unsigned offset)
+{
+ assert(compGeneratingProlog);
+ FuncInfoDsc* func = funCurrentFunc();
+
+ unsigned int cbProlog = unwindGetCurrentOffset(func);
+ noway_assert((BYTE)cbProlog == cbProlog);
+
+ createCfiCode(func, cbProlog, CFI_DEF_CFA_REGISTER, mapRegNumToDwarfReg(reg));
+ if (offset != 0)
+ {
+ createCfiCode(func, cbProlog, CFI_ADJUST_CFA_OFFSET, DWARF_REG_ILLEGAL, offset);
+ }
+}
+#endif // UNIX_AMD64_ABI
+
+//------------------------------------------------------------------------
+// Compiler::unwindSaveReg: Record a register save.
+//
+// Arguments:
+// reg - The register being saved.
+// offset - The offset from the current stack pointer where the register is being saved.
+//
+void Compiler::unwindSaveReg(regNumber reg, unsigned offset)
+{
+#ifdef UNIX_AMD64_ABI
+ if (generateCFIUnwindCodes())
+ {
+ unwindSaveRegCFI(reg, offset);
+ }
+ else
+#endif // UNIX_AMD64_ABI
+ {
+ unwindSaveRegWindows(reg, offset);
+ }
+}
+
+void Compiler::unwindSaveRegWindows(regNumber reg, unsigned offset)
+{
+ assert(compGeneratingProlog);
+
+ FuncInfoDsc* func = funCurrentFunc();
+
+ assert(func->unwindHeader.Version == 1); // Can't call this before unwindBegProlog
+ assert(func->unwindHeader.CountOfUnwindCodes == 0); // Can't call this after unwindReserve
+ if (RBM_CALLEE_SAVED & genRegMask(reg))
+ {
+ UNWIND_CODE* code;
+ if (offset < 0x80000)
+ {
+ assert(func->unwindCodeSlot > (sizeof(UNWIND_CODE) + sizeof(USHORT)));
+ USHORT* codedSize = (USHORT*)&func->unwindCodes[func->unwindCodeSlot -= sizeof(USHORT)];
+ code = (UNWIND_CODE*)&func->unwindCodes[func->unwindCodeSlot -= sizeof(UNWIND_CODE)];
+
+ // As per AMD64 ABI, if saving entire xmm reg, then offset need to be scaled by 16.
+ if (genIsValidFloatReg(reg))
+ {
+ *codedSize = (USHORT)(offset / 16);
+ code->UnwindOp = UWOP_SAVE_XMM128;
+ }
+ else
+ {
+ *codedSize = (USHORT)(offset / 8);
+ code->UnwindOp = UWOP_SAVE_NONVOL;
+ }
+ }
+ else
+ {
+ assert(func->unwindCodeSlot > (sizeof(UNWIND_CODE) + sizeof(ULONG)));
+ ULONG* codedSize = (ULONG*)&func->unwindCodes[func->unwindCodeSlot -= sizeof(ULONG)];
+ *codedSize = offset;
+ code = (UNWIND_CODE*)&func->unwindCodes[func->unwindCodeSlot -= sizeof(UNWIND_CODE)];
+ code->UnwindOp = (genIsValidFloatReg(reg)) ? UWOP_SAVE_XMM128_FAR : UWOP_SAVE_NONVOL_FAR;
+ }
+ code->OpInfo = (BYTE)reg;
+ unsigned int cbProlog = unwindGetCurrentOffset(func);
+ noway_assert((BYTE)cbProlog == cbProlog);
+ code->CodeOffset = (BYTE)cbProlog;
+ }
+}
+
+#ifdef UNIX_AMD64_ABI
+void Compiler::unwindSaveRegCFI(regNumber reg, unsigned offset)
+{
+ assert(compGeneratingProlog);
+
+ if (RBM_CALLEE_SAVED & genRegMask(reg))
+ {
+ FuncInfoDsc* func = funCurrentFunc();
+
+ unsigned int cbProlog = unwindGetCurrentOffset(func);
+ noway_assert((BYTE)cbProlog == cbProlog);
+ createCfiCode(func, cbProlog, CFI_REL_OFFSET, mapRegNumToDwarfReg(reg), offset);
+ }
+}
+#endif // UNIX_AMD64_ABI
+
+#ifdef DEBUG
+
+//------------------------------------------------------------------------
+// DumpUnwindInfo: Dump the unwind data.
+//
+// Arguments:
+// isHotCode - true if this unwind data is for the hot section, false otherwise.
+// startOffset - byte offset of the code start that this unwind data represents.
+// endOffset - byte offset of the code end that this unwind data represents.
+// pHeader - pointer to the unwind data blob.
+//
+void DumpUnwindInfo(bool isHotCode,
+ UNATIVE_OFFSET startOffset,
+ UNATIVE_OFFSET endOffset,
+ const UNWIND_INFO* const pHeader)
+{
+ printf("Unwind Info%s:\n", isHotCode ? "" : " COLD");
+ printf(" >> Start offset : 0x%06x (not in unwind data)\n", dspOffset(startOffset));
+ printf(" >> End offset : 0x%06x (not in unwind data)\n", dspOffset(endOffset));
+
+ if (pHeader == nullptr)
+ {
+ // Cold AMD64 code doesn't have unwind info; the VM creates chained unwind info.
+ assert(!isHotCode);
+ return;
+ }
+
+ printf(" Version : %u\n", pHeader->Version);
+ printf(" Flags : 0x%02x", pHeader->Flags);
+ if (pHeader->Flags)
+ {
+ const UCHAR flags = pHeader->Flags;
+ printf(" (");
+ if (flags & UNW_FLAG_EHANDLER)
+ {
+ printf(" UNW_FLAG_EHANDLER");
+ }
+ if (flags & UNW_FLAG_UHANDLER)
+ {
+ printf(" UNW_FLAG_UHANDLER");
+ }
+ if (flags & UNW_FLAG_CHAININFO)
+ {
+ printf(" UNW_FLAG_CHAININFO");
+ }
+ printf(")");
+ }
+ printf("\n");
+ printf(" SizeOfProlog : 0x%02X\n", pHeader->SizeOfProlog);
+ printf(" CountOfUnwindCodes: %u\n", pHeader->CountOfUnwindCodes);
+ printf(" FrameRegister : %s (%u)\n",
+ (pHeader->FrameRegister == 0) ? "none" : getRegName(pHeader->FrameRegister),
+ pHeader->FrameRegister); // RAX (0) is not allowed as a frame register
+ if (pHeader->FrameRegister == 0)
+ {
+ printf(" FrameOffset : N/A (no FrameRegister) (Value=%u)\n", pHeader->FrameOffset);
+ }
+ else
+ {
+ printf(" FrameOffset : %u * 16 = 0x%02X\n", pHeader->FrameOffset, pHeader->FrameOffset * 16);
+ }
+ printf(" UnwindCodes :\n");
+
+ for (unsigned i = 0; i < pHeader->CountOfUnwindCodes; i++)
+ {
+ unsigned offset;
+ const UNWIND_CODE* const pCode = &(pHeader->UnwindCode[i]);
+ switch (pCode->UnwindOp)
+ {
+ case UWOP_PUSH_NONVOL:
+ printf(" CodeOffset: 0x%02X UnwindOp: UWOP_PUSH_NONVOL (%u) OpInfo: %s (%u)\n",
+ pCode->CodeOffset, pCode->UnwindOp, getRegName(pCode->OpInfo), pCode->OpInfo);
+ break;
+
+ case UWOP_ALLOC_LARGE:
+ printf(" CodeOffset: 0x%02X UnwindOp: UWOP_ALLOC_LARGE (%u) OpInfo: %u - ", pCode->CodeOffset,
+ pCode->UnwindOp, pCode->OpInfo);
+ if (pCode->OpInfo == 0)
+ {
+ i++;
+ printf("Scaled small \n Size: %u * 8 = %u = 0x%05X\n", pHeader->UnwindCode[i].FrameOffset,
+ pHeader->UnwindCode[i].FrameOffset * 8, pHeader->UnwindCode[i].FrameOffset * 8);
+ }
+ else if (pCode->OpInfo == 1)
+ {
+ i++;
+ printf("Unscaled large\n Size: %u = 0x%08X\n\n", *(ULONG*)&(pHeader->UnwindCode[i]),
+ *(ULONG*)&(pHeader->UnwindCode[i]));
+ i++;
+ }
+ else
+ {
+ printf("Unknown\n");
+ }
+ break;
+
+ case UWOP_ALLOC_SMALL:
+ printf(" CodeOffset: 0x%02X UnwindOp: UWOP_ALLOC_SMALL (%u) OpInfo: %u * 8 + 8 = %u = 0x%02X\n",
+ pCode->CodeOffset, pCode->UnwindOp, pCode->OpInfo, pCode->OpInfo * 8 + 8, pCode->OpInfo * 8 + 8);
+ break;
+
+ case UWOP_SET_FPREG:
+ printf(" CodeOffset: 0x%02X UnwindOp: UWOP_SET_FPREG (%u) OpInfo: Unused (%u)\n",
+ pCode->CodeOffset, pCode->UnwindOp, pCode->OpInfo); // This should be zero
+ break;
+
+#ifdef PLATFORM_UNIX
+
+ case UWOP_SET_FPREG_LARGE:
+ printf(" CodeOffset: 0x%02X UnwindOp: UWOP_SET_FPREG_LARGE (%u) OpInfo: Unused (%u)\n",
+ pCode->CodeOffset, pCode->UnwindOp, pCode->OpInfo); // This should be zero
+ i++;
+ offset = *(ULONG*)&(pHeader->UnwindCode[i]);
+ i++;
+ printf(" Scaled Offset: %u * 16 = %u = 0x%08X\n", offset, offset * 16, offset * 16);
+ if ((offset & 0xF0000000) != 0)
+ {
+ printf(" Illegal unscaled offset: too large\n");
+ }
+ break;
+
+#endif // PLATFORM_UNIX
+
+ case UWOP_SAVE_NONVOL:
+ printf(" CodeOffset: 0x%02X UnwindOp: UWOP_SAVE_NONVOL (%u) OpInfo: %s (%u)\n",
+ pCode->CodeOffset, pCode->UnwindOp, getRegName(pCode->OpInfo), pCode->OpInfo);
+ i++;
+ printf(" Scaled Small Offset: %u * 8 = %u = 0x%05X\n", pHeader->UnwindCode[i].FrameOffset,
+ pHeader->UnwindCode[i].FrameOffset * 8, pHeader->UnwindCode[i].FrameOffset * 8);
+ break;
+
+ case UWOP_SAVE_NONVOL_FAR:
+ printf(" CodeOffset: 0x%02X UnwindOp: UWOP_SAVE_NONVOL_FAR (%u) OpInfo: %s (%u)\n",
+ pCode->CodeOffset, pCode->UnwindOp, getRegName(pCode->OpInfo), pCode->OpInfo);
+ i++;
+ printf(" Unscaled Large Offset: 0x%08X\n\n", *(ULONG*)&(pHeader->UnwindCode[i]));
+ i++;
+ break;
+
+ case UWOP_SAVE_XMM128:
+ printf(" CodeOffset: 0x%02X UnwindOp: UWOP_SAVE_XMM128 (%u) OpInfo: XMM%u (%u)\n",
+ pCode->CodeOffset, pCode->UnwindOp, pCode->OpInfo, pCode->OpInfo);
+ i++;
+ printf(" Scaled Small Offset: %u * 16 = %u = 0x%05X\n", pHeader->UnwindCode[i].FrameOffset,
+ pHeader->UnwindCode[i].FrameOffset * 16, pHeader->UnwindCode[i].FrameOffset * 16);
+ break;
+
+ case UWOP_SAVE_XMM128_FAR:
+ printf(" CodeOffset: 0x%02X UnwindOp: UWOP_SAVE_XMM128_FAR (%u) OpInfo: XMM%u (%u)\n",
+ pCode->CodeOffset, pCode->UnwindOp, pCode->OpInfo, pCode->OpInfo);
+ i++;
+ printf(" Unscaled Large Offset: 0x%08X\n\n", *(ULONG*)&(pHeader->UnwindCode[i]));
+ i++;
+ break;
+
+ case UWOP_EPILOG:
+ case UWOP_SPARE_CODE:
+ case UWOP_PUSH_MACHFRAME:
+ default:
+ printf(" Unrecognized UNWIND_CODE: 0x%04X\n", *(USHORT*)pCode);
+ break;
+ }
+ }
+}
+
+#ifdef UNIX_AMD64_ABI
+//------------------------------------------------------------------------
+// DumpCfiInfo: Dump the Cfi data.
+//
+// Arguments:
+// isHotCode - true if this cfi data is for the hot section, false otherwise.
+// startOffset - byte offset of the code start that this cfi data represents.
+// endOffset - byte offset of the code end that this cfi data represents.
+// pcFiCode - pointer to the cfi data blob.
+//
+void DumpCfiInfo(bool isHotCode,
+ UNATIVE_OFFSET startOffset,
+ UNATIVE_OFFSET endOffset,
+ DWORD cfiCodeBytes,
+ const CFI_CODE* const pCfiCode)
+{
+ printf("Cfi Info%s:\n", isHotCode ? "" : " COLD");
+ printf(" >> Start offset : 0x%06x \n", dspOffset(startOffset));
+ printf(" >> End offset : 0x%06x \n", dspOffset(endOffset));
+
+ for (int i = 0; i < cfiCodeBytes / sizeof(CFI_CODE); i++)
+ {
+ const CFI_CODE* const pCode = &(pCfiCode[i]);
+
+ UCHAR codeOffset = pCode->CodeOffset;
+ SHORT dwarfReg = pCode->DwarfReg;
+ INT offset = pCode->Offset;
+
+ switch (pCode->CfiOpCode)
+ {
+ case CFI_REL_OFFSET:
+ printf(" CodeOffset: 0x%02X Op: RelOffset DwarfReg:0x%x Offset:0x%X\n", codeOffset, dwarfReg,
+ offset);
+ break;
+ case CFI_DEF_CFA_REGISTER:
+ assert(offset == 0);
+ printf(" CodeOffset: 0x%02X Op: DefCfaRegister DwarfReg:0x%X\n", codeOffset, dwarfReg);
+ break;
+ case CFI_ADJUST_CFA_OFFSET:
+ assert(dwarfReg == DWARF_REG_ILLEGAL);
+ printf(" CodeOffset: 0x%02X Op: AdjustCfaOffset Offset:0x%X\n", codeOffset, offset);
+ break;
+ default:
+ printf(" Unrecognized CFI_CODE: 0x%IX\n", *(UINT64*)pCode);
+ break;
+ }
+ }
+}
+#endif // UNIX_AMD64_ABI
+#endif // DEBUG
+
+//------------------------------------------------------------------------
+// Compiler::unwindReserve: Ask the VM to reserve space for the unwind information
+// for the function and all its funclets. Called once, just before asking the VM
+// for memory and emitting the generated code. Calls unwindReserveFunc() to handle
+// the main function and each of the funclets, in turn.
+//
+void Compiler::unwindReserve()
+{
+ assert(!compGeneratingProlog);
+ assert(!compGeneratingEpilog);
+
+ assert(compFuncInfoCount > 0);
+ for (unsigned funcIdx = 0; funcIdx < compFuncInfoCount; funcIdx++)
+ {
+ unwindReserveFunc(funGetFunc(funcIdx));
+ }
+}
+
+//------------------------------------------------------------------------
+// Compiler::unwindReserveFunc: Reserve the unwind information from the VM for a
+// given main function or funclet.
+//
+// Arguments:
+// func - The main function or funclet to reserve unwind info for.
+//
+void Compiler::unwindReserveFunc(FuncInfoDsc* func)
+{
+ unwindReserveFuncHelper(func, true);
+
+ if (fgFirstColdBlock != nullptr)
+ {
+ unwindReserveFuncHelper(func, false);
+ }
+}
+
+//------------------------------------------------------------------------
+// Compiler::unwindReserveFuncHelper: Reserve the unwind information from the VM for a
+// given main function or funclet, for either the hot or the cold section.
+//
+// Arguments:
+// func - The main function or funclet to reserve unwind info for.
+// isHotCode - 'true' to reserve the hot section, 'false' to reserve the cold section.
+//
+void Compiler::unwindReserveFuncHelper(FuncInfoDsc* func, bool isHotCode)
+{
+ DWORD unwindCodeBytes = 0;
+ if (isHotCode)
+ {
+#ifdef UNIX_AMD64_ABI
+ if (generateCFIUnwindCodes())
+ {
+ unwindCodeBytes = func->cfiCodes->size() * sizeof(CFI_CODE);
+ }
+ else
+#endif // UNIX_AMD64_ABI
+ {
+ assert(func->unwindHeader.Version == 1); // Can't call this before unwindBegProlog
+ assert(func->unwindHeader.CountOfUnwindCodes == 0); // Only call this once per prolog
+
+ // Set the size of the prolog to be the last encoded action
+ if (func->unwindCodeSlot < sizeof(func->unwindCodes))
+ {
+ UNWIND_CODE* code = (UNWIND_CODE*)&func->unwindCodes[func->unwindCodeSlot];
+ func->unwindHeader.SizeOfProlog = code->CodeOffset;
+ }
+ else
+ {
+ func->unwindHeader.SizeOfProlog = 0;
+ }
+ func->unwindHeader.CountOfUnwindCodes =
+ (BYTE)((sizeof(func->unwindCodes) - func->unwindCodeSlot) / sizeof(UNWIND_CODE));
+
+ // Prepend the unwindHeader onto the unwind codes
+ assert(func->unwindCodeSlot >= offsetof(UNWIND_INFO, UnwindCode));
+
+ func->unwindCodeSlot -= offsetof(UNWIND_INFO, UnwindCode);
+ UNWIND_INFO* pHeader = (UNWIND_INFO*)&func->unwindCodes[func->unwindCodeSlot];
+ memcpy(pHeader, &func->unwindHeader, offsetof(UNWIND_INFO, UnwindCode));
+
+ unwindCodeBytes = sizeof(func->unwindCodes) - func->unwindCodeSlot;
+ }
+ }
+
+ BOOL isFunclet = (func->funKind != FUNC_ROOT);
+ BOOL isColdCode = isHotCode ? FALSE : TRUE;
+
+ eeReserveUnwindInfo(isFunclet, isColdCode, unwindCodeBytes);
+}
+
+//------------------------------------------------------------------------
+// Compiler::unwindEmit: Report all the unwind information to the VM.
+//
+// Arguments:
+// pHotCode - Pointer to the beginning of the memory with the function and funclet hot code.
+// pColdCode - Pointer to the beginning of the memory with the function and funclet cold code.
+//
+void Compiler::unwindEmit(void* pHotCode, void* pColdCode)
+{
+ assert(!compGeneratingProlog);
+ assert(!compGeneratingEpilog);
+
+ assert(compFuncInfoCount > 0);
+ for (unsigned funcIdx = 0; funcIdx < compFuncInfoCount; funcIdx++)
+ {
+ unwindEmitFunc(funGetFunc(funcIdx), pHotCode, pColdCode);
+ }
+}
+
+//------------------------------------------------------------------------
+// Compiler::unwindEmitFuncHelper: Report the unwind information to the VM for a
+// given main function or funclet, for either the hot or cold section.
+//
+// Arguments:
+// func - The main function or funclet to reserve unwind info for.
+// pHotCode - Pointer to the beginning of the memory with the function and funclet hot code.
+// pColdCode - Pointer to the beginning of the memory with the function and funclet cold code.
+// Ignored if 'isHotCode' is true.
+// isHotCode - 'true' to report the hot section, 'false' to report the cold section.
+//
+void Compiler::unwindEmitFuncHelper(FuncInfoDsc* func, void* pHotCode, void* pColdCode, bool isHotCode)
+{
+ UNATIVE_OFFSET startOffset;
+ UNATIVE_OFFSET endOffset;
+ DWORD unwindCodeBytes = 0;
+ BYTE* pUnwindBlock = nullptr;
+
+ if (isHotCode)
+ {
+ if (func->startLoc == nullptr)
+ {
+ startOffset = 0;
+ }
+ else
+ {
+ startOffset = func->startLoc->CodeOffset(genEmitter);
+ }
+
+ if (func->endLoc == nullptr)
+ {
+ endOffset = info.compNativeCodeSize;
+ }
+ else
+ {
+ endOffset = func->endLoc->CodeOffset(genEmitter);
+ }
+
+#ifdef UNIX_AMD64_ABI
+ if (generateCFIUnwindCodes())
+ {
+ int size = func->cfiCodes->size();
+ if (size > 0)
+ {
+ unwindCodeBytes = size * sizeof(CFI_CODE);
+ pUnwindBlock = (BYTE*)&(*func->cfiCodes)[0];
+ }
+ }
+ else
+#endif // UNIX_AMD64_ABI
+ {
+ unwindCodeBytes = sizeof(func->unwindCodes) - func->unwindCodeSlot;
+
+#ifdef DEBUG
+ UNWIND_INFO* pUnwindInfo = (UNWIND_INFO*)(&func->unwindCodes[func->unwindCodeSlot]);
+ DWORD unwindCodeBytesSpecified =
+ offsetof(UNWIND_INFO, UnwindCode) +
+ pUnwindInfo->CountOfUnwindCodes * sizeof(UNWIND_CODE); // This is what the unwind codes themselves say;
+ // it better match what we tell the VM.
+ assert(unwindCodeBytes == unwindCodeBytesSpecified);
+#endif // DEBUG
+
+ pUnwindBlock = &func->unwindCodes[func->unwindCodeSlot];
+ }
+ }
+ else
+ {
+ assert(fgFirstColdBlock != nullptr);
+ assert(func->funKind == FUNC_ROOT); // No splitting of funclets.
+
+ if (func->coldStartLoc == nullptr)
+ {
+ startOffset = 0;
+ }
+ else
+ {
+ startOffset = func->coldStartLoc->CodeOffset(genEmitter);
+ }
+
+ if (func->coldEndLoc == nullptr)
+ {
+ endOffset = info.compNativeCodeSize;
+ }
+ else
+ {
+ endOffset = func->coldEndLoc->CodeOffset(genEmitter);
+ }
+ }
+
+#ifdef DEBUG
+ if (opts.dspUnwind)
+ {
+#ifdef UNIX_AMD64_ABI
+ if (generateCFIUnwindCodes())
+ {
+ DumpCfiInfo(isHotCode, startOffset, endOffset, unwindCodeBytes, (const CFI_CODE* const)pUnwindBlock);
+ }
+ else
+#endif // UNIX_AMD64_ABI
+ {
+ DumpUnwindInfo(isHotCode, startOffset, endOffset, (const UNWIND_INFO* const)pUnwindBlock);
+ }
+ }
+#endif // DEBUG
+
+ // Adjust for cold or hot code:
+ // 1. The VM doesn't want the cold code pointer unless this is cold code.
+ // 2. The startOffset and endOffset need to be from the base of the hot section for hot code
+ // and from the base of the cold section for cold code
+
+ if (isHotCode)
+ {
+ assert(endOffset <= info.compTotalHotCodeSize);
+ pColdCode = nullptr;
+ }
+ else
+ {
+ assert(startOffset >= info.compTotalHotCodeSize);
+ startOffset -= info.compTotalHotCodeSize;
+ endOffset -= info.compTotalHotCodeSize;
+ }
+
+ eeAllocUnwindInfo((BYTE*)pHotCode, (BYTE*)pColdCode, startOffset, endOffset, unwindCodeBytes, pUnwindBlock,
+ (CorJitFuncKind)func->funKind);
+}
+
+//------------------------------------------------------------------------
+// Compiler::unwindEmitFunc: Report the unwind information to the VM for a
+// given main function or funclet. Reports the hot section, then the cold
+// section if necessary.
+//
+// Arguments:
+// func - The main function or funclet to reserve unwind info for.
+// pHotCode - Pointer to the beginning of the memory with the function and funclet hot code.
+// pColdCode - Pointer to the beginning of the memory with the function and funclet cold code.
+//
+void Compiler::unwindEmitFunc(FuncInfoDsc* func, void* pHotCode, void* pColdCode)
+{
+ // Verify that the JIT enum is in sync with the JIT-EE interface enum
+ static_assert_no_msg(FUNC_ROOT == (FuncKind)CORJIT_FUNC_ROOT);
+ static_assert_no_msg(FUNC_HANDLER == (FuncKind)CORJIT_FUNC_HANDLER);
+ static_assert_no_msg(FUNC_FILTER == (FuncKind)CORJIT_FUNC_FILTER);
+
+ unwindEmitFuncHelper(func, pHotCode, pColdCode, true);
+
+ if (pColdCode != nullptr)
+ {
+ unwindEmitFuncHelper(func, pHotCode, pColdCode, false);
+ }
+}
+
+#endif // _TARGET_AMD64_
diff --git a/src/jit/unwindarm.cpp b/src/jit/unwindarm.cpp
new file mode 100644
index 0000000000..b537bef4a3
--- /dev/null
+++ b/src/jit/unwindarm.cpp
@@ -0,0 +1,2320 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX UnwindInfo XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+#ifdef _TARGET_ARMARCH_
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX Unwind APIs XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+void Compiler::unwindBegProlog()
+{
+ assert(compGeneratingProlog);
+
+ FuncInfoDsc* func = funCurrentFunc();
+
+ // There is only one prolog for a function/funclet, and it comes first. So now is
+ // a good time to initialize all the unwind data structures.
+
+ emitLocation* startLoc;
+ emitLocation* endLoc;
+ unwindGetFuncLocations(func, true, &startLoc, &endLoc);
+
+ func->uwi.InitUnwindInfo(this, startLoc, endLoc);
+ func->uwi.CaptureLocation();
+
+ func->uwiCold = NULL; // No cold data yet
+}
+
+void Compiler::unwindEndProlog()
+{
+ assert(compGeneratingProlog);
+}
+
+void Compiler::unwindBegEpilog()
+{
+ assert(compGeneratingEpilog);
+ funCurrentFunc()->uwi.AddEpilog();
+}
+
+void Compiler::unwindEndEpilog()
+{
+ assert(compGeneratingEpilog);
+}
+
+#if defined(_TARGET_ARM_)
+
+void Compiler::unwindPushPopMaskInt(regMaskTP maskInt, bool useOpsize16)
+{
+ // floating point registers cannot be specified in 'maskInt'
+ assert((maskInt & RBM_ALLFLOAT) == 0);
+
+ UnwindInfo* pu = &funCurrentFunc()->uwi;
+
+ if (useOpsize16)
+ {
+ // The 16-bit opcode only encode R0-R7 and LR
+ assert((maskInt & ~(RBM_R0 | RBM_R1 | RBM_R2 | RBM_R3 | RBM_R4 | RBM_R5 | RBM_R6 | RBM_R7 | RBM_LR)) == 0);
+
+ bool shortFormat = false;
+ BYTE val = 0;
+
+ if ((maskInt & (RBM_R0 | RBM_R1 | RBM_R2 | RBM_R3)) == 0)
+ {
+ regMaskTP matchMask = maskInt & (RBM_R4 | RBM_R5 | RBM_R6 | RBM_R7);
+ regMaskTP valMask = RBM_R4;
+ while (val < 4)
+ {
+ if (matchMask == valMask)
+ {
+ shortFormat = true;
+ break;
+ }
+
+ valMask <<= 1;
+ valMask |= RBM_R4;
+
+ val++;
+ }
+ }
+
+ if (shortFormat)
+ {
+ // D0-D7 : pop {r4-rX,lr} (X=4-7) (opsize 16)
+ pu->AddCode(0xD0 | ((maskInt >> 12) & 0x4) | val);
+ }
+ else
+ {
+ // EC-ED : pop {r0-r7,lr} (opsize 16)
+ pu->AddCode(0xEC | ((maskInt >> 14) & 0x1), (BYTE)maskInt);
+ }
+ }
+ else
+ {
+ assert((maskInt &
+ ~(RBM_R0 | RBM_R1 | RBM_R2 | RBM_R3 | RBM_R4 | RBM_R5 | RBM_R6 | RBM_R7 | RBM_R8 | RBM_R9 | RBM_R10 |
+ RBM_R11 | RBM_R12 | RBM_LR)) == 0);
+
+ bool shortFormat = false;
+ BYTE val = 0;
+
+ if (((maskInt & (RBM_R0 | RBM_R1 | RBM_R2 | RBM_R3)) == 0) &&
+ ((maskInt & (RBM_R4 | RBM_R5 | RBM_R6 | RBM_R7 | RBM_R8)) == (RBM_R4 | RBM_R5 | RBM_R6 | RBM_R7 | RBM_R8)))
+ {
+ regMaskTP matchMask = maskInt & (RBM_R4 | RBM_R5 | RBM_R6 | RBM_R7 | RBM_R8 | RBM_R9 | RBM_R10 | RBM_R11);
+ regMaskTP valMask = RBM_R4 | RBM_R5 | RBM_R6 | RBM_R7 | RBM_R8;
+ while (val < 4)
+ {
+ if (matchMask == valMask)
+ {
+ shortFormat = true;
+ break;
+ }
+
+ valMask <<= 1;
+ valMask |= RBM_R4;
+
+ val++;
+ }
+ }
+
+ if (shortFormat)
+ {
+ // D8-DF : pop {r4-rX,lr} (X=8-11) (opsize 32)
+ pu->AddCode(0xD8 | ((maskInt >> 12) & 0x4) | val);
+ }
+ else
+ {
+ // 80-BF : pop {r0-r12,lr} (opsize 32)
+ pu->AddCode(0x80 | ((maskInt >> 8) & 0x1F) | ((maskInt >> 9) & 0x20), (BYTE)maskInt);
+ }
+ }
+}
+
+void Compiler::unwindPushPopMaskFloat(regMaskTP maskFloat)
+{
+ // Only floating pointer registers can be specified in 'maskFloat'
+ assert((maskFloat & ~RBM_ALLFLOAT) == 0);
+
+ // If the maskFloat is zero there is no unwind code to emit
+ //
+ if (maskFloat == RBM_NONE)
+ {
+ return;
+ }
+
+ UnwindInfo* pu = &funCurrentFunc()->uwi;
+
+ BYTE val = 0;
+ regMaskTP valMask = (RBM_F16 | RBM_F17);
+
+ while (maskFloat != valMask)
+ {
+ valMask <<= 2;
+ valMask |= (RBM_F16 | RBM_F17);
+
+ val++;
+
+ if (val == 8)
+ {
+ noway_assert(!"Illegal maskFloat");
+ }
+ }
+
+ // E0-E7 : vpop {d8-dX} (X=8-15) (opsize 32)
+ assert(0 <= val && val <= 7);
+ pu->AddCode(0xE0 | val);
+}
+
+void Compiler::unwindPushMaskInt(regMaskTP maskInt)
+{
+ // Only r0-r12 and lr are supported
+ assert((maskInt &
+ ~(RBM_R0 | RBM_R1 | RBM_R2 | RBM_R3 | RBM_R4 | RBM_R5 | RBM_R6 | RBM_R7 | RBM_R8 | RBM_R9 | RBM_R10 |
+ RBM_R11 | RBM_R12 | RBM_LR)) == 0);
+
+ bool useOpsize16 = ((maskInt & (RBM_LOW_REGS | RBM_LR)) == maskInt); // Can PUSH use the 16-bit encoding?
+ unwindPushPopMaskInt(maskInt, useOpsize16);
+}
+
+void Compiler::unwindPushMaskFloat(regMaskTP maskFloat)
+{
+ // Only floating point registers should be in maskFloat
+ assert((maskFloat & RBM_ALLFLOAT) == maskFloat);
+ unwindPushPopMaskFloat(maskFloat);
+}
+
+void Compiler::unwindPopMaskInt(regMaskTP maskInt)
+{
+ // Only r0-r12 and lr and pc are supported (pc is mapped to lr when encoding)
+ assert((maskInt &
+ ~(RBM_R0 | RBM_R1 | RBM_R2 | RBM_R3 | RBM_R4 | RBM_R5 | RBM_R6 | RBM_R7 | RBM_R8 | RBM_R9 | RBM_R10 |
+ RBM_R11 | RBM_R12 | RBM_LR | RBM_PC)) == 0);
+
+ bool useOpsize16 = ((maskInt & (RBM_LOW_REGS | RBM_PC)) == maskInt); // Can POP use the 16-bit encoding?
+
+ // If we are popping PC, then we'll return from the function. In this case, we assume
+ // the first thing the prolog did was push LR, so give the unwind codes in terms of
+ // the LR that was pushed. Note that the epilog unwind codes are meant to reverse
+ // the effect of the prolog. For "pop {pc}", the prolog had "push {lr}", so we need
+ // an epilog code to model the reverse of that.
+ if (maskInt & RBM_PC)
+ {
+ maskInt = (maskInt & ~RBM_PC) | RBM_LR;
+ }
+ unwindPushPopMaskInt(maskInt, useOpsize16);
+}
+
+void Compiler::unwindPopMaskFloat(regMaskTP maskFloat)
+{
+ // Only floating point registers should be in maskFloat
+ assert((maskFloat & RBM_ALLFLOAT) == maskFloat);
+ unwindPushPopMaskFloat(maskFloat);
+}
+
+void Compiler::unwindAllocStack(unsigned size)
+{
+ UnwindInfo* pu = &funCurrentFunc()->uwi;
+
+ assert(size % 4 == 0);
+ size /= 4;
+
+ if (size <= 0x7F)
+ {
+ // 00-7F : add sp, sp, #X*4 (opsize 16)
+ pu->AddCode((BYTE)size);
+ }
+ else if (size <= 0x3FF)
+ {
+ // E8-EB : addw sp, sp, #X*4 (opsize 32)
+ pu->AddCode(0xE8 | (BYTE)(size >> 8), (BYTE)size);
+ }
+ else if (size <= 0xFFFF)
+ {
+ // F7 : add sp, sp, #X*4 (opsize 16)
+ // F9 : add sp, sp, #X*4 (opsize 32)
+ //
+ // For large stack size, the most significant bits
+ // are stored first (and next to the opCode (F9)) per the unwind spec.
+ unsigned instrSizeInBytes = pu->GetInstructionSize();
+ BYTE b1 = (instrSizeInBytes == 2) ? 0xF7 : 0xF9;
+ pu->AddCode(b1,
+ (BYTE)(size >> 8), // msb
+ (BYTE)size); // lsb
+ }
+ else
+ {
+ // F8 : add sp, sp, #X*4 (opsize 16)
+ // FA : add sp, sp, #X*4 (opsize 32)
+ //
+ // For large stack size, the most significant bits
+ // are stored first (and next to the opCode (FA)) per the unwind spec.
+ unsigned instrSizeInBytes = pu->GetInstructionSize();
+ BYTE b1 = (instrSizeInBytes == 2) ? 0xF8 : 0xFA;
+ pu->AddCode(b1, (BYTE)(size >> 16), (BYTE)(size >> 8), (BYTE)size);
+ }
+}
+
+void Compiler::unwindSetFrameReg(regNumber reg, unsigned offset)
+{
+ UnwindInfo* pu = &funCurrentFunc()->uwi;
+
+ // Arm unwind info does not allow offset
+ assert(offset == 0);
+ assert(0 <= reg && reg <= 15);
+
+ // C0-CF : mov sp, rX (opsize 16)
+ pu->AddCode((BYTE)(0xC0 + reg));
+}
+
+void Compiler::unwindSaveReg(regNumber reg, unsigned offset)
+{
+ unreached();
+}
+
+void Compiler::unwindBranch16()
+{
+ UnwindInfo* pu = &funCurrentFunc()->uwi;
+
+ // TODO-CQ: need to handle changing the exit code from 0xFF to 0xFD. Currently, this will waste an extra 0xFF at the
+ // end, automatically added.
+ pu->AddCode(0xFD);
+}
+
+void Compiler::unwindNop(unsigned codeSizeInBytes) // codeSizeInBytes is 2 or 4 bytes for Thumb2 instruction
+{
+ UnwindInfo* pu = &funCurrentFunc()->uwi;
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("unwindNop: adding NOP for %d byte instruction\n", codeSizeInBytes);
+ }
+#endif
+
+ INDEBUG(pu->uwiAddingNOP = true);
+
+ if (codeSizeInBytes == 2)
+ {
+ // FB : nop (opsize 16)
+ pu->AddCode(0xFB);
+ }
+ else
+ {
+ noway_assert(codeSizeInBytes == 4);
+
+ // FC : nop (opsize 32)
+ pu->AddCode(0xFC);
+ }
+
+ INDEBUG(pu->uwiAddingNOP = false);
+}
+
+#endif // defined(_TARGET_ARM_)
+
+// The instructions between the last captured "current state" and the current instruction
+// are in the prolog but have no effect for unwinding. Emit the appropriate NOP unwind codes
+// for them.
+void Compiler::unwindPadding()
+{
+ UnwindInfo* pu = &funCurrentFunc()->uwi;
+ genEmitter->emitUnwindNopPadding(pu->GetCurrentEmitterLocation(), this);
+}
+
+// Ask the VM to reserve space for the unwind information for the function and
+// all its funclets.
+void Compiler::unwindReserve()
+{
+ assert(compFuncInfoCount > 0);
+ for (unsigned funcIdx = 0; funcIdx < compFuncInfoCount; funcIdx++)
+ {
+ unwindReserveFunc(funGetFunc(funcIdx));
+ }
+}
+
+void Compiler::unwindReserveFunc(FuncInfoDsc* func)
+{
+ BOOL isFunclet = (func->funKind == FUNC_ROOT) ? FALSE : TRUE;
+ bool funcHasColdSection = false;
+
+ // If there is cold code, split the unwind data between the hot section and the
+ // cold section. This needs to be done before we split into fragments, as each
+ // of the hot and cold sections can have multiple fragments.
+
+ if (fgFirstColdBlock != NULL)
+ {
+ assert(!isFunclet); // TODO-CQ: support hot/cold splitting with EH
+
+ emitLocation* startLoc;
+ emitLocation* endLoc;
+ unwindGetFuncLocations(func, false, &startLoc, &endLoc);
+
+ func->uwiCold = new (this, CMK_UnwindInfo) UnwindInfo();
+ func->uwiCold->InitUnwindInfo(this, startLoc, endLoc);
+ func->uwiCold->HotColdSplitCodes(&func->uwi);
+
+ funcHasColdSection = true;
+ }
+
+ // First we need to split the function or funclet into fragments that are no larger
+ // than 512K, so the fragment size will fit in the unwind data "Function Length" field.
+ // The ARM Exception Data specification "Function Fragments" section describes this.
+ func->uwi.Split();
+
+ func->uwi.Reserve(isFunclet, true);
+
+ // After the hot section, split and reserve the cold section
+
+ if (funcHasColdSection)
+ {
+ assert(func->uwiCold != NULL);
+
+ func->uwiCold->Split();
+ func->uwiCold->Reserve(isFunclet, false);
+ }
+}
+
+// unwindEmit: Report all the unwind information to the VM.
+// Arguments:
+// pHotCode: Pointer to the beginning of the memory with the function and funclet hot code
+// pColdCode: Pointer to the beginning of the memory with the function and funclet cold code.
+
+void Compiler::unwindEmit(void* pHotCode, void* pColdCode)
+{
+ assert(compFuncInfoCount > 0);
+ for (unsigned funcIdx = 0; funcIdx < compFuncInfoCount; funcIdx++)
+ {
+ unwindEmitFunc(funGetFunc(funcIdx), pHotCode, pColdCode);
+ }
+}
+
+void Compiler::unwindEmitFunc(FuncInfoDsc* func, void* pHotCode, void* pColdCode)
+{
+ // Verify that the JIT enum is in sync with the JIT-EE interface enum
+ static_assert_no_msg(FUNC_ROOT == (FuncKind)CORJIT_FUNC_ROOT);
+ static_assert_no_msg(FUNC_HANDLER == (FuncKind)CORJIT_FUNC_HANDLER);
+ static_assert_no_msg(FUNC_FILTER == (FuncKind)CORJIT_FUNC_FILTER);
+
+ func->uwi.Allocate((CorJitFuncKind)func->funKind, pHotCode, pColdCode, true);
+
+ if (func->uwiCold != NULL)
+ {
+ func->uwiCold->Allocate((CorJitFuncKind)func->funKind, pHotCode, pColdCode, false);
+ }
+}
+
+#if defined(_TARGET_ARM_)
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX Unwind Info Debug helpers XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#ifdef DEBUG
+
+// Return the opcode size of an instruction, in bytes, given the first byte of
+// its corresponding unwind code.
+
+unsigned GetOpcodeSizeFromUnwindHeader(BYTE b1)
+{
+ static BYTE s_UnwindOpsize[256] = {
+ // array of opsizes, in bytes (as specified in the ARM unwind specification)
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 00-0F
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 10-1F
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 20-2F
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 30-3F
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 40-4F
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 50-5F
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 60-6F
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 70-7F
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, // 80-8F
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, // 90-9F
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, // A0-AF
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, // B0-BF
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // C0-CF
+ 2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 4, 4, 4, 4, // D0-DF
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 2, 4, // E0-EF
+ 0, 0, 0, 0, 0, 4, 4, 2, 2, 4, 4, 2, 4, 2, 4, 0 // F0-FF
+ };
+
+ BYTE opsize = s_UnwindOpsize[b1];
+ assert(opsize == 2 ||
+ opsize == 4); // We shouldn't get a code with no opsize (the 0xFF end code is handled specially)
+ return opsize;
+}
+
+// Return the size of the unwind code (from 1 to 4 bytes), given the first byte of the unwind bytes
+
+unsigned GetUnwindSizeFromUnwindHeader(BYTE b1)
+{
+ static BYTE s_UnwindSize[256] = {
+ // array of unwind sizes, in bytes (as specified in the ARM unwind specification)
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 00-0F
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 10-1F
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 20-2F
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 30-3F
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 40-4F
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 50-5F
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 60-6F
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 70-7F
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 80-8F
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 90-9F
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // A0-AF
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // B0-BF
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // C0-CF
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // D0-DF
+ 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, // E0-EF
+ 1, 1, 1, 1, 1, 2, 2, 3, 4, 3, 4, 1, 1, 1, 1, 1 // F0-FF
+ };
+
+ unsigned size = s_UnwindSize[b1];
+ assert(1 <= size && size <= 4);
+ return size;
+}
+
+#endif // DEBUG
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX Unwind Info Support Classes XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+///////////////////////////////////////////////////////////////////////////////
+//
+// UnwindCodesBase
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#ifdef DEBUG
+
+// Walk the prolog codes and calculate the size of the prolog or epilog, in bytes.
+// The 0xFD and 0xFE "end + NOP" codes need to be handled differently between
+// the prolog and epilog. They count as pure "end" codes in a prolog, but they
+// count as 16 and 32 bit NOPs (respectively), as well as an "end", in an epilog.
+unsigned UnwindCodesBase::GetCodeSizeFromUnwindCodes(bool isProlog)
+{
+ BYTE* pCodesStart = GetCodes();
+ BYTE* pCodes = pCodesStart;
+ unsigned size = 0;
+ for (;;)
+ {
+ BYTE b1 = *pCodes;
+ if (b1 >= 0xFD)
+ {
+ // 0xFD, 0xFE, 0xFF are "end" codes
+
+ if (!isProlog && (b1 == 0xFD || b1 == 0xFE))
+ {
+ // Count the special "end + NOP" code size in the epilog
+ size += GetOpcodeSizeFromUnwindHeader(b1);
+ }
+
+ break; // We hit an "end" code; we're done
+ }
+ size += GetOpcodeSizeFromUnwindHeader(b1);
+ pCodes += GetUnwindSizeFromUnwindHeader(b1);
+ assert(pCodes - pCodesStart < 256); // 255 is the absolute maximum number of code bytes allowed
+ }
+ return size;
+}
+
+#endif // DEBUG
+
+#endif // defined(_TARGET_ARM_)
+
+///////////////////////////////////////////////////////////////////////////////
+//
+// UnwindPrologCodes
+//
+///////////////////////////////////////////////////////////////////////////////
+
+// We're going to use the prolog codes memory to store the final unwind data.
+// Ensure we have enough memory to store everything. If 'epilogBytes' > 0, then
+// move the prolog codes so there are 'epilogBytes' bytes after the prolog codes.
+// Set the header pointer for future use, adding the header bytes (this pointer
+// is updated when a header byte is added), and remember the index that points
+// to the beginning of the header.
+
+void UnwindPrologCodes::SetFinalSize(int headerBytes, int epilogBytes)
+{
+#ifdef DEBUG
+ // We're done adding codes. Check that we didn't accidentally create a bigger prolog.
+ unsigned codeSize = GetCodeSizeFromUnwindCodes(true);
+ assert(codeSize <= MAX_PROLOG_SIZE_BYTES);
+#endif // DEBUG
+
+ int prologBytes = Size();
+
+ EnsureSize(headerBytes + prologBytes + epilogBytes + 3); // 3 = padding bytes for alignment
+
+ upcUnwindBlockSlot = upcCodeSlot - headerBytes - epilogBytes; // Index of the first byte of the unwind header
+
+ assert(upcMemSize == upcUnwindBlockSlot + headerBytes + prologBytes + epilogBytes + 3);
+
+ upcHeaderSlot = upcUnwindBlockSlot - 1; // upcHeaderSlot is always incremented before storing
+ assert(upcHeaderSlot >= -1);
+
+ if (epilogBytes > 0)
+ {
+ // The prolog codes that are already at the end of the array need to get moved to the middle,
+ // with space for the non-matching epilog codes to follow.
+
+ memmove_s(&upcMem[upcUnwindBlockSlot + headerBytes], upcMemSize - (upcUnwindBlockSlot + headerBytes),
+ &upcMem[upcCodeSlot], prologBytes);
+
+ // Note that the three UWC_END padding bytes still exist at the end of the array.
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef DEBUG
+ // Zero out the epilog codes memory, to ensure we've copied the right bytes. Don't zero the padding bytes.
+ memset(&upcMem[upcUnwindBlockSlot + headerBytes + prologBytes], 0, epilogBytes);
+#endif // DEBUG
+
+ upcEpilogSlot =
+ upcUnwindBlockSlot + headerBytes + prologBytes; // upcEpilogSlot points to the next epilog location to fill
+
+ // Update upcCodeSlot to point at the new beginning of the prolog codes
+ upcCodeSlot = upcUnwindBlockSlot + headerBytes;
+ }
+}
+
+// Add a header word. Header words are added starting at the beginning, in order: first to last.
+// This is in contrast to the prolog unwind codes, which are added in reverse order.
+void UnwindPrologCodes::AddHeaderWord(DWORD d)
+{
+ assert(-1 <= upcHeaderSlot);
+ assert(upcHeaderSlot + 4 < upcCodeSlot); // Don't collide with the unwind codes that are already there!
+
+ // Store it byte-by-byte in little-endian format. We've already ensured there is enough space
+ // in SetFinalSize().
+ upcMem[++upcHeaderSlot] = (BYTE)d;
+ upcMem[++upcHeaderSlot] = (BYTE)(d >> 8);
+ upcMem[++upcHeaderSlot] = (BYTE)(d >> 16);
+ upcMem[++upcHeaderSlot] = (BYTE)(d >> 24);
+}
+
+// AppendEpilog: copy the epilog bytes to the next epilog bytes slot
+void UnwindPrologCodes::AppendEpilog(UnwindEpilogInfo* pEpi)
+{
+ assert(upcEpilogSlot != -1);
+
+ int epiSize = pEpi->Size();
+ memcpy_s(&upcMem[upcEpilogSlot], upcMemSize - upcEpilogSlot - 3, pEpi->GetCodes(),
+ epiSize); // -3 to avoid writing to the alignment padding
+ assert(pEpi->GetStartIndex() ==
+ upcEpilogSlot - upcCodeSlot); // Make sure we copied it where we expected to copy it.
+
+ upcEpilogSlot += epiSize;
+ assert(upcEpilogSlot <= upcMemSize - 3);
+}
+
+// GetFinalInfo: return a pointer to the final unwind info to hand to the VM, and the size of this info in bytes
+void UnwindPrologCodes::GetFinalInfo(/* OUT */ BYTE** ppUnwindBlock, /* OUT */ ULONG* pUnwindBlockSize)
+{
+ assert(upcHeaderSlot + 1 == upcCodeSlot); // We better have filled in the header before asking for the final data!
+
+ *ppUnwindBlock = &upcMem[upcUnwindBlockSlot];
+
+ // We put 4 'end' codes at the end for padding, so we can ensure we have an
+ // unwind block that is a multiple of 4 bytes in size. Subtract off three 'end'
+ // codes (leave one), and then align the size up to a multiple of 4.
+ *pUnwindBlockSize = AlignUp((UINT)(upcMemSize - upcUnwindBlockSlot - 3), sizeof(DWORD));
+}
+
+// Do the argument unwind codes match our unwind codes?
+// If they don't match, return -1. If they do, return the offset into
+// our codes at which they match. Note that this means that the
+// argument codes can match a subset of our codes. The subset needs to be at
+// the end, for the "end" code to match.
+//
+// This is similar to UnwindEpilogInfo::Match().
+//
+#if defined(_TARGET_ARM_)
+// Note that if we wanted to handle 0xFD and 0xFE codes, by converting
+// an existing 0xFF code to one of those, we might do that here.
+#endif // defined(_TARGET_ARM_)
+
+int UnwindPrologCodes::Match(UnwindEpilogInfo* pEpi)
+{
+ if (Size() < pEpi->Size())
+ {
+ return -1;
+ }
+
+ int matchIndex = Size() - pEpi->Size();
+
+ if (0 == memcmp(GetCodes() + matchIndex, pEpi->GetCodes(), pEpi->Size()))
+ {
+ return matchIndex;
+ }
+
+ return -1;
+}
+
+// Copy the prolog codes from another prolog. The only time this is legal is
+// if we are at the initial state and no prolog codes have been added.
+// This is used to create the 'phantom' prolog for non-first fragments.
+
+void UnwindPrologCodes::CopyFrom(UnwindPrologCodes* pCopyFrom)
+{
+ assert(uwiComp == pCopyFrom->uwiComp);
+ assert(upcMem == upcMemLocal);
+ assert(upcMemSize == UPC_LOCAL_COUNT);
+ assert(upcHeaderSlot == -1);
+ assert(upcEpilogSlot == -1);
+
+ // Copy the codes
+ EnsureSize(pCopyFrom->upcMemSize);
+ assert(upcMemSize == pCopyFrom->upcMemSize);
+ memcpy_s(upcMem, upcMemSize, pCopyFrom->upcMem, pCopyFrom->upcMemSize);
+
+ // Copy the other data
+ upcCodeSlot = pCopyFrom->upcCodeSlot;
+ upcHeaderSlot = pCopyFrom->upcHeaderSlot;
+ upcEpilogSlot = pCopyFrom->upcEpilogSlot;
+ upcUnwindBlockSlot = pCopyFrom->upcUnwindBlockSlot;
+}
+
+void UnwindPrologCodes::EnsureSize(int requiredSize)
+{
+ if (requiredSize > upcMemSize)
+ {
+ // Reallocate, and copy everything to a new array.
+
+ // Choose the next power of two size. This may or may not be the best choice.
+ noway_assert((requiredSize & 0xC0000000) == 0); // too big!
+ int newSize;
+ for (newSize = upcMemSize << 1; newSize < requiredSize; newSize <<= 1)
+ {
+ // do nothing
+ }
+
+ BYTE* newUnwindCodes = new (uwiComp, CMK_UnwindInfo) BYTE[newSize];
+ memcpy_s(newUnwindCodes + newSize - upcMemSize, upcMemSize, upcMem,
+ upcMemSize); // copy the existing data to the end
+#ifdef DEBUG
+ // Clear the old unwind codes; nobody should be looking at them
+ memset(upcMem, 0xFF, upcMemSize);
+#endif // DEBUG
+ upcMem = newUnwindCodes; // we don't free anything that used to be there since we have a no-release allocator
+ upcCodeSlot += newSize - upcMemSize;
+ upcMemSize = newSize;
+ }
+}
+
+#ifdef DEBUG
+void UnwindPrologCodes::Dump(int indent)
+{
+ printf("%*sUnwindPrologCodes @0x%08p, size:%d:\n", indent, "", dspPtr(this), sizeof(*this));
+ printf("%*s uwiComp: 0x%08p\n", indent, "", dspPtr(uwiComp));
+ printf("%*s &upcMemLocal[0]: 0x%08p\n", indent, "", dspPtr(&upcMemLocal[0]));
+ printf("%*s upcMem: 0x%08p\n", indent, "", dspPtr(upcMem));
+ printf("%*s upcMemSize: %d\n", indent, "", upcMemSize);
+ printf("%*s upcCodeSlot: %d\n", indent, "", upcCodeSlot);
+ printf("%*s upcHeaderSlot: %d\n", indent, "", upcHeaderSlot);
+ printf("%*s upcEpilogSlot: %d\n", indent, "", upcEpilogSlot);
+ printf("%*s upcUnwindBlockSlot: %d\n", indent, "", upcUnwindBlockSlot);
+
+ if (upcMemSize > 0)
+ {
+ printf("%*s codes:", indent, "");
+ for (int i = 0; i < upcMemSize; i++)
+ {
+ printf(" %02x", upcMem[i]);
+ if (i == upcCodeSlot)
+ printf(" <-C");
+ else if (i == upcHeaderSlot)
+ printf(" <-H");
+ else if (i == upcEpilogSlot)
+ printf(" <-E");
+ else if (i == upcUnwindBlockSlot)
+ printf(" <-U");
+ }
+ printf("\n");
+ }
+}
+#endif // DEBUG
+
+///////////////////////////////////////////////////////////////////////////////
+//
+// UnwindEpilogCodes
+//
+///////////////////////////////////////////////////////////////////////////////
+
+void UnwindEpilogCodes::EnsureSize(int requiredSize)
+{
+ if (requiredSize > uecMemSize)
+ {
+ // Reallocate, and copy everything to a new array.
+
+ // Choose the next power of two size. This may or may not be the best choice.
+ noway_assert((requiredSize & 0xC0000000) == 0); // too big!
+ int newSize;
+ for (newSize = uecMemSize << 1; newSize < requiredSize; newSize <<= 1)
+ {
+ // do nothing
+ }
+
+ BYTE* newUnwindCodes = new (uwiComp, CMK_UnwindInfo) BYTE[newSize];
+ memcpy_s(newUnwindCodes, newSize, uecMem, uecMemSize);
+#ifdef DEBUG
+ // Clear the old unwind codes; nobody should be looking at them
+ memset(uecMem, 0xFF, uecMemSize);
+#endif // DEBUG
+ uecMem = newUnwindCodes; // we don't free anything that used to be there since we have a no-release allocator
+ // uecCodeSlot stays the same
+ uecMemSize = newSize;
+ }
+}
+
+#ifdef DEBUG
+void UnwindEpilogCodes::Dump(int indent)
+{
+ printf("%*sUnwindEpilogCodes @0x%08p, size:%d:\n", indent, "", dspPtr(this), sizeof(*this));
+ printf("%*s uwiComp: 0x%08p\n", indent, "", dspPtr(uwiComp));
+ printf("%*s &uecMemLocal[0]: 0x%08p\n", indent, "", dspPtr(&uecMemLocal[0]));
+ printf("%*s uecMem: 0x%08p\n", indent, "", dspPtr(uecMem));
+ printf("%*s uecMemSize: %d\n", indent, "", uecMemSize);
+ printf("%*s uecCodeSlot: %d\n", indent, "", uecCodeSlot);
+ printf("%*s uecFinalized: %s\n", indent, "", dspBool(uecFinalized));
+
+ if (uecMemSize > 0)
+ {
+ printf("%*s codes:", indent, "");
+ for (int i = 0; i < uecMemSize; i++)
+ {
+ printf(" %02x", uecMem[i]);
+ if (i == uecCodeSlot)
+ printf(" <-C"); // Indicate the current pointer
+ }
+ printf("\n");
+ }
+}
+#endif // DEBUG
+
+///////////////////////////////////////////////////////////////////////////////
+//
+// UnwindEpilogInfo
+//
+///////////////////////////////////////////////////////////////////////////////
+
+// Do the current unwind codes match those of the argument epilog?
+// If they don't match, return -1. If they do, return the offset into
+// our codes at which the argument codes match. Note that this means that
+// the argument codes can match a subset of our codes. The subset needs to be at
+// the end, for the "end" code to match.
+//
+// Note that if we wanted to handle 0xFD and 0xFE codes, by converting
+// an existing 0xFF code to one of those, we might do that here.
+
+int UnwindEpilogInfo::Match(UnwindEpilogInfo* pEpi)
+{
+ if (Matches())
+ {
+ // We are already matched to someone else, and won't provide codes to the final layout
+ return -1;
+ }
+
+ if (Size() < pEpi->Size())
+ {
+ return -1;
+ }
+
+ int matchIndex = Size() - pEpi->Size();
+
+ if (0 == memcmp(GetCodes() + matchIndex, pEpi->GetCodes(), pEpi->Size()))
+ {
+ return matchIndex;
+ }
+
+ return -1;
+}
+
+void UnwindEpilogInfo::CaptureEmitLocation()
+{
+ noway_assert(epiEmitLocation == NULL); // This function is only called once per epilog
+ epiEmitLocation = new (uwiComp, CMK_UnwindInfo) emitLocation();
+ epiEmitLocation->CaptureLocation(uwiComp->genEmitter);
+}
+
+void UnwindEpilogInfo::FinalizeOffset()
+{
+ epiStartOffset = epiEmitLocation->CodeOffset(uwiComp->genEmitter);
+}
+
+#ifdef DEBUG
+void UnwindEpilogInfo::Dump(int indent)
+{
+ printf("%*sUnwindEpilogInfo @0x%08p, size:%d:\n", indent, "", dspPtr(this), sizeof(*this));
+ printf("%*s uwiComp: 0x%08p\n", indent, "", dspPtr(uwiComp));
+ printf("%*s epiNext: 0x%08p\n", indent, "", dspPtr(epiNext));
+ printf("%*s epiEmitLocation: 0x%08p\n", indent, "", dspPtr(epiEmitLocation));
+ printf("%*s epiStartOffset: 0x%x\n", indent, "", epiStartOffset);
+ printf("%*s epiMatches: %s\n", indent, "", dspBool(epiMatches));
+ printf("%*s epiStartIndex: %d\n", indent, "", epiStartIndex);
+
+ epiCodes.Dump(indent + 2);
+}
+#endif // DEBUG
+
+///////////////////////////////////////////////////////////////////////////////
+//
+// UnwindFragmentInfo
+//
+///////////////////////////////////////////////////////////////////////////////
+
+UnwindFragmentInfo::UnwindFragmentInfo(Compiler* comp, emitLocation* emitLoc, bool hasPhantomProlog)
+ : UnwindBase(comp)
+ , ufiNext(NULL)
+ , ufiEmitLoc(emitLoc)
+ , ufiHasPhantomProlog(hasPhantomProlog)
+ , ufiPrologCodes(comp)
+ , ufiEpilogFirst(comp)
+ , ufiEpilogList(NULL)
+ , ufiEpilogLast(NULL)
+ , ufiCurCodes(&ufiPrologCodes)
+ , ufiSize(0)
+ , ufiStartOffset(UFI_ILLEGAL_OFFSET)
+{
+#ifdef DEBUG
+ ufiNum = 1;
+ ufiInProlog = true;
+ ufiInitialized = UFI_INITIALIZED_PATTERN;
+#endif // DEBUG
+}
+
+void UnwindFragmentInfo::FinalizeOffset()
+{
+ if (ufiEmitLoc == NULL)
+ {
+ // NULL emit location means the beginning of the code. This is to handle the first fragment prolog.
+ ufiStartOffset = 0;
+ }
+ else
+ {
+ ufiStartOffset = ufiEmitLoc->CodeOffset(uwiComp->genEmitter);
+ }
+
+ for (UnwindEpilogInfo* pEpi = ufiEpilogList; pEpi != NULL; pEpi = pEpi->epiNext)
+ {
+ pEpi->FinalizeOffset();
+ }
+}
+
+void UnwindFragmentInfo::AddEpilog()
+{
+ assert(ufiInitialized == UFI_INITIALIZED_PATTERN);
+
+#ifdef DEBUG
+ if (ufiInProlog)
+ {
+ assert(ufiEpilogList == NULL);
+ ufiInProlog = false;
+ }
+ else
+ {
+ assert(ufiEpilogList != NULL);
+ }
+#endif // DEBUG
+
+ // Either allocate a new epilog object, or, for the first one, use the
+ // preallocated one that is a member of the UnwindFragmentInfo class.
+
+ UnwindEpilogInfo* newepi;
+
+ if (ufiEpilogList == NULL)
+ {
+ // Use the epilog that's in the class already. Be sure to initialize it!
+ newepi = ufiEpilogList = &ufiEpilogFirst;
+ }
+ else
+ {
+ newepi = new (uwiComp, CMK_UnwindInfo) UnwindEpilogInfo(uwiComp);
+ }
+
+ // Put the new epilog at the end of the epilog list
+
+ if (ufiEpilogLast != NULL)
+ {
+ ufiEpilogLast->epiNext = newepi;
+ }
+
+ ufiEpilogLast = newepi;
+
+ // What is the starting code offset of the epilog? Store an emitter location
+ // so we can ask the emitter later, after codegen.
+
+ newepi->CaptureEmitLocation();
+
+ // Put subsequent unwind codes in this new epilog
+
+ ufiCurCodes = &newepi->epiCodes;
+}
+
+// Copy the prolog codes from the 'pCopyFrom' fragment. These prolog codes will
+// become 'phantom' prolog codes in this fragment. Note that this fragment should
+// not have any prolog codes currently; it is at the initial state.
+
+void UnwindFragmentInfo::CopyPrologCodes(UnwindFragmentInfo* pCopyFrom)
+{
+ ufiPrologCodes.CopyFrom(&pCopyFrom->ufiPrologCodes);
+#ifdef _TARGET_ARM64_
+ ufiPrologCodes.AddCode(UWC_END_C);
+#endif
+}
+
+// Split the epilog codes that currently exist in 'pSplitFrom'. The ones that represent
+// epilogs that start at or after the location represented by 'emitLoc' are removed
+// from 'pSplitFrom' and moved to this fragment. Note that this fragment should not have
+// any epilog codes currently; it is at the initial state.
+
+void UnwindFragmentInfo::SplitEpilogCodes(emitLocation* emitLoc, UnwindFragmentInfo* pSplitFrom)
+{
+ UnwindEpilogInfo* pEpiPrev;
+ UnwindEpilogInfo* pEpi;
+
+ UNATIVE_OFFSET splitOffset = emitLoc->CodeOffset(uwiComp->genEmitter);
+
+ for (pEpiPrev = NULL, pEpi = pSplitFrom->ufiEpilogList; pEpi != NULL; pEpiPrev = pEpi, pEpi = pEpi->epiNext)
+ {
+ pEpi->FinalizeOffset(); // Get the offset of the epilog from the emitter so we can compare it
+ if (pEpi->GetStartOffset() >= splitOffset)
+ {
+ // This epilog and all following epilogs, which must be in order of increasing offsets,
+ // get moved to this fragment.
+
+ // Splice in the epilogs to this fragment. Set the head of the epilog
+ // list to this epilog.
+ ufiEpilogList = pEpi; // In this case, don't use 'ufiEpilogFirst'
+ ufiEpilogLast = pSplitFrom->ufiEpilogLast;
+
+ // Splice out the tail of the list from the 'pSplitFrom' epilog list
+ pSplitFrom->ufiEpilogLast = pEpiPrev;
+ if (pSplitFrom->ufiEpilogLast == NULL)
+ {
+ pSplitFrom->ufiEpilogList = NULL;
+ }
+ else
+ {
+ pSplitFrom->ufiEpilogLast->epiNext = NULL;
+ }
+
+ // No more codes should be added once we start splitting
+ pSplitFrom->ufiCurCodes = NULL;
+ ufiCurCodes = NULL;
+
+ break;
+ }
+ }
+}
+
+// Is this epilog at the end of an unwind fragment? Ask the emitter.
+// Note that we need to know this before all code offsets are finalized,
+// so we can determine whether we can omit an epilog scope word for a
+// single matching epilog.
+
+bool UnwindFragmentInfo::IsAtFragmentEnd(UnwindEpilogInfo* pEpi)
+{
+ return uwiComp->genEmitter->emitIsFuncEnd(pEpi->epiEmitLocation, (ufiNext == NULL) ? NULL : ufiNext->ufiEmitLoc);
+}
+
+// Merge the unwind codes as much as possible.
+// This function is called before all offsets are final.
+// Also, compute the size of the final unwind block. Store this
+// and some other data for later, when we actually emit the
+// unwind block.
+
+void UnwindFragmentInfo::MergeCodes()
+{
+ assert(ufiInitialized == UFI_INITIALIZED_PATTERN);
+
+ unsigned epilogCount = 0;
+ unsigned epilogCodeBytes = 0; // The total number of unwind code bytes used by epilogs that don't match the
+ // prolog codes
+ unsigned epilogIndex = ufiPrologCodes.Size(); // The "Epilog Start Index" for the next non-matching epilog codes
+ UnwindEpilogInfo* pEpi;
+
+ for (pEpi = ufiEpilogList; pEpi != NULL; pEpi = pEpi->epiNext)
+ {
+ ++epilogCount;
+
+ pEpi->FinalizeCodes();
+
+ // Does this epilog match the prolog?
+ // NOTE: for the purpose of matching, we don't handle the 0xFD and 0xFE end codes that allow slightly unequal
+ // prolog and epilog codes.
+
+ int matchIndex;
+
+ matchIndex = ufiPrologCodes.Match(pEpi);
+ if (matchIndex != -1)
+ {
+ pEpi->SetMatches();
+ pEpi->SetStartIndex(matchIndex); // Prolog codes start at zero, so matchIndex is exactly the start index
+ }
+ else
+ {
+ // The epilog codes don't match the prolog codes. Do they match any of the epilogs
+ // we've seen so far?
+
+ bool matched = false;
+ for (UnwindEpilogInfo* pEpi2 = ufiEpilogList; pEpi2 != pEpi; pEpi2 = pEpi2->epiNext)
+ {
+ matchIndex = pEpi2->Match(pEpi);
+ if (matchIndex != -1)
+ {
+ // Use the same epilog index as the one we matched, as it has already been set.
+ pEpi->SetMatches();
+ pEpi->SetStartIndex(pEpi2->GetStartIndex() + matchIndex); // We might match somewhere inside pEpi2's
+ // codes, in which case matchIndex > 0
+ matched = true;
+ break;
+ }
+ }
+
+ if (!matched)
+ {
+ pEpi->SetStartIndex(epilogIndex); // We'll copy these codes to the next available location
+ epilogCodeBytes += pEpi->Size();
+ epilogIndex += pEpi->Size();
+ }
+ }
+ }
+
+ DWORD codeBytes = ufiPrologCodes.Size() + epilogCodeBytes;
+ codeBytes = AlignUp(codeBytes, sizeof(DWORD));
+
+ DWORD codeWords =
+ codeBytes / sizeof(DWORD); // This is how many words we need to store all the unwind codes in the unwind block
+
+ // Do we need the 2nd header word for "Extended Code Words" or "Extended Epilog Count"?
+
+ bool needExtendedCodeWordsEpilogCount =
+ (codeWords > UW_MAX_CODE_WORDS_COUNT) || (epilogCount > UW_MAX_EPILOG_COUNT);
+
+ // How many epilog scope words do we need?
+
+ bool setEBit = false; // do we need to set the E bit?
+ unsigned epilogScopes = epilogCount; // Note that this could be zero if we have no epilogs!
+
+ if (epilogCount == 1)
+ {
+ assert(ufiEpilogList != NULL);
+ assert(ufiEpilogList->epiNext == NULL);
+
+ if (ufiEpilogList->Matches() && (ufiEpilogList->GetStartIndex() == 0) && // The match is with the prolog
+ !needExtendedCodeWordsEpilogCount && IsAtFragmentEnd(ufiEpilogList))
+ {
+ epilogScopes = 0; // Don't need any epilog scope words
+ setEBit = true;
+ }
+ }
+
+ DWORD headerBytes = (1 // Always need first header DWORD
+ + (needExtendedCodeWordsEpilogCount ? 1 : 0) // Do we need the 2nd DWORD for Extended Code
+ // Words or Extended Epilog Count?
+ + epilogScopes // One DWORD per epilog scope, for EBit = 0
+ ) *
+ sizeof(DWORD); // convert it to bytes
+
+ DWORD finalSize = headerBytes + codeBytes; // Size of actual unwind codes, aligned up to 4-byte words,
+ // including end padding if necessary
+
+ // Construct the final unwind information.
+
+ // We re-use the memory for the prolog unwind codes to construct the full unwind data. If all the epilogs
+ // match the prolog, this is easy: we just prepend the header. If there are epilog codes that don't match
+ // the prolog, we still use the prolog codes memory, but it's a little more complicated, since the
+ // unwind info is ordered as: (a) header, (b) prolog codes, (c) non-matching epilog codes. And, the prolog
+ // codes array is filled in from end-to-beginning. So, we compute the size of memory we need, ensure we
+ // have that much memory, and then copy the prolog codes to the right place, appending the non-matching
+ // epilog codes and prepending the header.
+
+ ufiPrologCodes.SetFinalSize(headerBytes, epilogCodeBytes);
+
+ if (epilogCodeBytes != 0)
+ {
+ // We need to copy the epilog code bytes to their final memory location
+
+ for (pEpi = ufiEpilogList; pEpi != NULL; pEpi = pEpi->epiNext)
+ {
+ if (!pEpi->Matches())
+ {
+ ufiPrologCodes.AppendEpilog(pEpi);
+ }
+ }
+ }
+
+ // Save some data for later
+
+ ufiSize = finalSize;
+ ufiSetEBit = setEBit;
+ ufiNeedExtendedCodeWordsEpilogCount = needExtendedCodeWordsEpilogCount;
+ ufiCodeWords = codeWords;
+ ufiEpilogScopes = epilogScopes;
+}
+
+// Finalize: Prepare the unwind information for the VM. Compute and prepend the unwind header.
+
+void UnwindFragmentInfo::Finalize(UNATIVE_OFFSET functionLength)
+{
+ assert(ufiInitialized == UFI_INITIALIZED_PATTERN);
+
+#ifdef DEBUG
+ if (0 && uwiComp->verbose)
+ {
+ printf("*************** Before fragment #%d finalize\n", ufiNum);
+ Dump();
+ }
+#endif
+
+// Compute the header
+
+#if defined(_TARGET_ARM_)
+ noway_assert((functionLength & 1) == 0);
+ DWORD headerFunctionLength = functionLength / 2;
+#elif defined(_TARGET_ARM64_)
+ noway_assert((functionLength & 3) == 0);
+ DWORD headerFunctionLength = functionLength / 4;
+#endif // _TARGET_ARM64_
+
+ DWORD headerVers = 0; // Version of the unwind info is zero. No other version number is currently defined.
+ DWORD headerXBit = 0; // We never generate "exception data", but the VM might add some.
+ DWORD headerEBit;
+#if defined(_TARGET_ARM_)
+ DWORD headerFBit = ufiHasPhantomProlog ? 1 : 0; // Is this data a fragment in the sense of the unwind data
+ // specification? That is, do the prolog codes represent a real
+ // prolog or not?
+#endif // defined(_TARGET_ARM_)
+ DWORD headerEpilogCount; // This depends on how we set headerEBit.
+ DWORD headerCodeWords;
+ DWORD headerExtendedEpilogCount = 0; // This depends on how we set headerEBit.
+ DWORD headerExtendedCodeWords = 0;
+
+ if (ufiSetEBit)
+ {
+ headerEBit = 1;
+ headerEpilogCount = ufiEpilogList->GetStartIndex(); // probably zero -- the start of the prolog codes!
+ headerCodeWords = ufiCodeWords;
+ }
+ else
+ {
+ headerEBit = 0;
+
+ if (ufiNeedExtendedCodeWordsEpilogCount)
+ {
+ headerEpilogCount = 0;
+ headerCodeWords = 0;
+ headerExtendedEpilogCount = ufiEpilogScopes;
+ headerExtendedCodeWords = ufiCodeWords;
+ }
+ else
+ {
+ headerEpilogCount = ufiEpilogScopes;
+ headerCodeWords = ufiCodeWords;
+ }
+ }
+
+ // Start writing the header
+
+ noway_assert(headerFunctionLength <=
+ 0x3FFFFU); // We create fragments to prevent this from firing, so if it hits, we have an internal error
+
+ if ((headerEpilogCount > UW_MAX_EPILOG_COUNT) || (headerCodeWords > UW_MAX_CODE_WORDS_COUNT))
+ {
+ IMPL_LIMITATION("unwind data too large");
+ }
+
+#if defined(_TARGET_ARM_)
+ DWORD header = headerFunctionLength | (headerVers << 18) | (headerXBit << 20) | (headerEBit << 21) |
+ (headerFBit << 22) | (headerEpilogCount << 23) | (headerCodeWords << 28);
+#elif defined(_TARGET_ARM64_)
+ DWORD header = headerFunctionLength | (headerVers << 18) | (headerXBit << 20) | (headerEBit << 21) |
+ (headerEpilogCount << 22) | (headerCodeWords << 27);
+#endif // defined(_TARGET_ARM64_)
+
+ ufiPrologCodes.AddHeaderWord(header);
+
+ // Construct the second header word, if needed
+
+ if (ufiNeedExtendedCodeWordsEpilogCount)
+ {
+ noway_assert(headerEBit == 0);
+ noway_assert(headerEpilogCount == 0);
+ noway_assert(headerCodeWords == 0);
+ noway_assert((headerExtendedEpilogCount > UW_MAX_EPILOG_COUNT) ||
+ (headerExtendedCodeWords > UW_MAX_CODE_WORDS_COUNT));
+
+ if ((headerExtendedEpilogCount > UW_MAX_EXTENDED_EPILOG_COUNT) ||
+ (headerExtendedCodeWords > UW_MAX_EXTENDED_CODE_WORDS_COUNT))
+ {
+ IMPL_LIMITATION("unwind data too large");
+ }
+
+ DWORD header2 = headerExtendedEpilogCount | (headerExtendedCodeWords << 16);
+
+ ufiPrologCodes.AddHeaderWord(header2);
+ }
+
+ // Construct the epilog scope words, if needed
+
+ if (!ufiSetEBit)
+ {
+ for (UnwindEpilogInfo* pEpi = ufiEpilogList; pEpi != NULL; pEpi = pEpi->epiNext)
+ {
+#if defined(_TARGET_ARM_)
+ DWORD headerCondition = 0xE; // The epilog is unconditional. We don't have epilogs under the IT instruction.
+#endif // defined(_TARGET_ARM_)
+
+ // The epilog must strictly follow the prolog. The prolog is in the first fragment of
+ // the hot section. If this epilog is at the start of a fragment, it can't be the
+ // first fragment in the hot section. We actually don't know if we're processing
+ // the hot or cold section (or a funclet), so we can't distinguish these cases. Thus,
+ // we just assert that the epilog starts within the fragment.
+ assert(pEpi->GetStartOffset() >= GetStartOffset());
+
+ // We report the offset of an epilog as the offset from the beginning of the function/funclet fragment,
+ // NOT the offset from the beginning of the main function.
+ DWORD headerEpilogStartOffset = pEpi->GetStartOffset() - GetStartOffset();
+
+#if defined(_TARGET_ARM_)
+ noway_assert((headerEpilogStartOffset & 1) == 0);
+ headerEpilogStartOffset /= 2; // The unwind data stores the actual offset divided by 2 (since the low bit of
+ // the actual offset is always zero)
+#elif defined(_TARGET_ARM64_)
+ noway_assert((headerEpilogStartOffset & 3) == 0);
+ headerEpilogStartOffset /= 4; // The unwind data stores the actual offset divided by 4 (since the low 2 bits
+ // of the actual offset is always zero)
+#endif // defined(_TARGET_ARM64_)
+
+ DWORD headerEpilogStartIndex = pEpi->GetStartIndex();
+
+ if ((headerEpilogStartOffset > UW_MAX_EPILOG_START_OFFSET) ||
+ (headerEpilogStartIndex > UW_MAX_EPILOG_START_INDEX))
+ {
+ IMPL_LIMITATION("unwind data too large");
+ }
+
+#if defined(_TARGET_ARM_)
+ DWORD epilogScopeWord = headerEpilogStartOffset | (headerCondition << 20) | (headerEpilogStartIndex << 24);
+#elif defined(_TARGET_ARM64_)
+ DWORD epilogScopeWord = headerEpilogStartOffset | (headerEpilogStartIndex << 22);
+#endif // defined(_TARGET_ARM64_)
+
+ ufiPrologCodes.AddHeaderWord(epilogScopeWord);
+ }
+ }
+
+ // The unwind code words are already here, following the header, so we're done!
+}
+
+void UnwindFragmentInfo::Reserve(BOOL isFunclet, bool isHotCode)
+{
+ assert(isHotCode || !isFunclet); // TODO-CQ: support hot/cold splitting in functions with EH
+
+ MergeCodes();
+
+ BOOL isColdCode = isHotCode ? FALSE : TRUE;
+
+ ULONG unwindSize = Size();
+
+#ifdef DEBUG
+ if (uwiComp->verbose)
+ {
+ if (ufiNum != 1)
+ printf("reserveUnwindInfo: fragment #%d:\n", ufiNum);
+ }
+#endif
+
+ uwiComp->eeReserveUnwindInfo(isFunclet, isColdCode, unwindSize);
+}
+
+// Allocate the unwind info for a fragment with the VM.
+// Arguments:
+// funKind: funclet kind
+// pHotCode: hot section code buffer
+// pColdCode: cold section code buffer
+// funcEndOffset: offset of the end of this function/funclet. Used if this fragment is the last one for a
+// function/funclet.
+// isHotCode: are we allocating the unwind info for the hot code section?
+
+void UnwindFragmentInfo::Allocate(
+ CorJitFuncKind funKind, void* pHotCode, void* pColdCode, UNATIVE_OFFSET funcEndOffset, bool isHotCode)
+{
+ UNATIVE_OFFSET startOffset;
+ UNATIVE_OFFSET endOffset;
+ UNATIVE_OFFSET codeSize;
+
+ // We don't support hot/cold splitting with EH, so if there is cold code, this
+ // better not be a funclet!
+ // TODO-CQ: support funclets in cold code
+
+ noway_assert(isHotCode || funKind == CORJIT_FUNC_ROOT);
+
+ // Compute the final size, and start and end offsets of the fragment
+
+ startOffset = GetStartOffset();
+
+ if (ufiNext == NULL)
+ {
+ // This is the last fragment, so the fragment extends to the end of the function/fragment.
+ assert(funcEndOffset != 0);
+ endOffset = funcEndOffset;
+ }
+ else
+ {
+ // The fragment length is all the code between the beginning of this fragment
+ // and the beginning of the next fragment. Note that all fragments have had their
+ // offsets computed before any fragment is allocated.
+ endOffset = ufiNext->GetStartOffset();
+ }
+
+ assert(endOffset > startOffset);
+ codeSize = endOffset - startOffset;
+
+ // Finalize the fragment unwind block to hand to the VM
+
+ Finalize(codeSize);
+
+ // Get the final unwind information and hand it to the VM
+
+ ULONG unwindBlockSize;
+ BYTE* pUnwindBlock;
+
+ GetFinalInfo(&pUnwindBlock, &unwindBlockSize);
+
+#ifdef DEBUG
+ if (uwiComp->opts.dspUnwind)
+ {
+ DumpUnwindInfo(uwiComp, isHotCode, startOffset, endOffset, pUnwindBlock, unwindBlockSize);
+ }
+#endif // DEBUG
+
+ // Adjust for cold or hot code:
+ // 1. The VM doesn't want the cold code pointer unless this is cold code.
+ // 2. The startOffset and endOffset need to be from the base of the hot section for hot code
+ // and from the base of the cold section for cold code
+
+ if (isHotCode)
+ {
+ assert(endOffset <= uwiComp->info.compTotalHotCodeSize);
+ pColdCode = NULL;
+ }
+ else
+ {
+ assert(startOffset >= uwiComp->info.compTotalHotCodeSize);
+ startOffset -= uwiComp->info.compTotalHotCodeSize;
+ endOffset -= uwiComp->info.compTotalHotCodeSize;
+ }
+
+#ifdef DEBUG
+ if (uwiComp->verbose)
+ {
+ if (ufiNum != 1)
+ printf("unwindEmit: fragment #%d:\n", ufiNum);
+ }
+#endif // DEBUG
+
+ uwiComp->eeAllocUnwindInfo((BYTE*)pHotCode, (BYTE*)pColdCode, startOffset, endOffset, unwindBlockSize, pUnwindBlock,
+ funKind);
+}
+
+#ifdef DEBUG
+void UnwindFragmentInfo::Dump(int indent)
+{
+ unsigned count;
+ UnwindEpilogInfo* pEpi;
+
+ count = 0;
+ for (pEpi = ufiEpilogList; pEpi != NULL; pEpi = pEpi->epiNext)
+ {
+ ++count;
+ }
+
+ printf("%*sUnwindFragmentInfo #%d, @0x%08p, size:%d:\n", indent, "", ufiNum, dspPtr(this), sizeof(*this));
+ printf("%*s uwiComp: 0x%08p\n", indent, "", dspPtr(uwiComp));
+ printf("%*s ufiNext: 0x%08p\n", indent, "", dspPtr(ufiNext));
+ printf("%*s ufiEmitLoc: 0x%08p\n", indent, "", dspPtr(ufiEmitLoc));
+ printf("%*s ufiHasPhantomProlog: %s\n", indent, "", dspBool(ufiHasPhantomProlog));
+ printf("%*s %d epilog%s\n", indent, "", count, (count != 1) ? "s" : "");
+ printf("%*s ufiEpilogList: 0x%08p\n", indent, "", dspPtr(ufiEpilogList));
+ printf("%*s ufiEpilogLast: 0x%08p\n", indent, "", dspPtr(ufiEpilogLast));
+ printf("%*s ufiCurCodes: 0x%08p\n", indent, "", dspPtr(ufiCurCodes));
+ printf("%*s ufiSize: %u\n", indent, "", ufiSize);
+ printf("%*s ufiSetEBit: %s\n", indent, "", dspBool(ufiSetEBit));
+ printf("%*s ufiNeedExtendedCodeWordsEpilogCount: %s\n", indent, "", dspBool(ufiNeedExtendedCodeWordsEpilogCount));
+ printf("%*s ufiCodeWords: %u\n", indent, "", ufiCodeWords);
+ printf("%*s ufiEpilogScopes: %u\n", indent, "", ufiEpilogScopes);
+ printf("%*s ufiStartOffset: 0x%x\n", indent, "", ufiStartOffset);
+ printf("%*s ufiInProlog: %s\n", indent, "", dspBool(ufiInProlog));
+ printf("%*s ufiInitialized: 0x%08x\n", indent, "", ufiInitialized);
+
+ ufiPrologCodes.Dump(indent + 2);
+
+ for (pEpi = ufiEpilogList; pEpi != NULL; pEpi = pEpi->epiNext)
+ {
+ pEpi->Dump(indent + 2);
+ }
+}
+#endif // DEBUG
+
+///////////////////////////////////////////////////////////////////////////////
+//
+// UnwindInfo
+//
+///////////////////////////////////////////////////////////////////////////////
+
+void UnwindInfo::InitUnwindInfo(Compiler* comp, emitLocation* startLoc, emitLocation* endLoc)
+{
+ uwiComp = comp;
+
+ // The first fragment is a member of UnwindInfo, so it doesn't need to be allocated.
+ // However, its constructor needs to be explicitly called, since the constructor for
+ // UnwindInfo is not called.
+
+ uwiFragmentFirst.UnwindFragmentInfo::UnwindFragmentInfo(comp, startLoc, false);
+
+ uwiFragmentLast = &uwiFragmentFirst;
+
+ uwiEndLoc = endLoc;
+
+ // Allocate an emitter location object. It is initialized to something
+ // invalid: it has a null 'ig' that needs to get set before it can be used.
+ // Note that when we create an UnwindInfo for the cold section, this never
+ // gets initialized with anything useful, since we never add unwind codes
+ // to the cold section; we simply distribute the existing (previously added) codes.
+ uwiCurLoc = new (uwiComp, CMK_UnwindInfo) emitLocation();
+
+#ifdef DEBUG
+ uwiInitialized = UWI_INITIALIZED_PATTERN;
+ uwiAddingNOP = false;
+#endif // DEBUG
+}
+
+// Split the unwind codes in 'puwi' into those that are in the hot section (leave them in 'puwi')
+// and those that are in the cold section (move them to 'this'). There is exactly one fragment
+// in each UnwindInfo; the fragments haven't been split for size, yet.
+
+void UnwindInfo::HotColdSplitCodes(UnwindInfo* puwi)
+{
+ // Ensure that there is exactly a single fragment in both the hot and the cold sections
+ assert(&uwiFragmentFirst == uwiFragmentLast);
+ assert(&puwi->uwiFragmentFirst == puwi->uwiFragmentLast);
+ assert(uwiFragmentLast->ufiNext == NULL);
+ assert(puwi->uwiFragmentLast->ufiNext == NULL);
+
+ // The real prolog is in the hot section, so this, cold, section has a phantom prolog
+ uwiFragmentLast->ufiHasPhantomProlog = true;
+ uwiFragmentLast->CopyPrologCodes(puwi->uwiFragmentLast);
+
+ // Now split the epilog codes
+ uwiFragmentLast->SplitEpilogCodes(uwiFragmentLast->ufiEmitLoc, puwi->uwiFragmentLast);
+}
+
+// Split the function or funclet into fragments that are no larger than 512K,
+// so the fragment size will fit in the unwind data "Function Length" field.
+// The ARM Exception Data specification "Function Fragments" section describes this.
+// We split the function so that it is no larger than 512K bytes, or the value of
+// the COMPlus_JitSplitFunctionSize value, if defined (and smaller). We must determine
+// how to split the function/funclet before we issue the instructions, so we can
+// reserve the unwind space with the VM. The instructions issued may shrink (but not
+// expand!) during issuing (although this is extremely rare in any case, and may not
+// actually occur on ARM), so we don't finalize actual sizes or offsets.
+//
+// ARM64 has very similar limitations, except functions can be up to 1MB. TODO-ARM64-Bug?: make sure this works!
+//
+// We don't split any prolog or epilog. Ideally, we might not split an instruction,
+// although that doesn't matter because the unwind at any point would still be
+// well-defined.
+
+void UnwindInfo::Split()
+{
+ UNATIVE_OFFSET maxFragmentSize; // The maximum size of a code fragment in bytes
+
+ maxFragmentSize = UW_MAX_FRAGMENT_SIZE_BYTES;
+
+#ifdef DEBUG
+ // Consider COMPlus_JitSplitFunctionSize
+ unsigned splitFunctionSize = (unsigned)JitConfig.JitSplitFunctionSize();
+
+ if (splitFunctionSize != 0)
+ if (splitFunctionSize < maxFragmentSize)
+ maxFragmentSize = splitFunctionSize;
+#endif // DEBUG
+
+ // Now, there should be exactly one fragment.
+
+ assert(uwiFragmentLast != NULL);
+ assert(uwiFragmentLast == &uwiFragmentFirst);
+ assert(uwiFragmentLast->ufiNext == NULL);
+
+ // Find the code size of this function/funclet.
+
+ UNATIVE_OFFSET startOffset;
+ UNATIVE_OFFSET endOffset;
+ UNATIVE_OFFSET codeSize;
+
+ if (uwiFragmentLast->ufiEmitLoc == NULL)
+ {
+ // NULL emit location means the beginning of the code. This is to handle the first fragment prolog.
+ startOffset = 0;
+ }
+ else
+ {
+ startOffset = uwiFragmentLast->ufiEmitLoc->CodeOffset(uwiComp->genEmitter);
+ }
+
+ if (uwiEndLoc == NULL)
+ {
+ // Note that compTotalHotCodeSize and compTotalColdCodeSize are computed before issuing instructions
+ // from the emitter instruction group offsets, and will be accurate unless the issued code shrinks.
+ // compNativeCodeSize is precise, but is only set after instructions are issued, which is too late
+ // for us, since we need to decide how many fragments we need before the code memory is allocated
+ // (which is before instruction issuing).
+ UNATIVE_OFFSET estimatedTotalCodeSize =
+ uwiComp->info.compTotalHotCodeSize + uwiComp->info.compTotalColdCodeSize;
+ assert(estimatedTotalCodeSize != 0);
+ endOffset = estimatedTotalCodeSize;
+ }
+ else
+ {
+ endOffset = uwiEndLoc->CodeOffset(uwiComp->genEmitter);
+ }
+
+ assert(endOffset > startOffset); // there better be at least 1 byte of code
+ codeSize = endOffset - startOffset;
+
+ // Now that we know the code size for this section (main function hot or cold, or funclet),
+ // figure out how many fragments we're going to need.
+
+ UNATIVE_OFFSET numberOfFragments = (codeSize + maxFragmentSize - 1) / maxFragmentSize; // round up
+ assert(numberOfFragments > 0);
+
+ if (numberOfFragments == 1)
+ {
+ // No need to split; we're done
+ return;
+ }
+
+ // Now, we're going to commit to splitting the function into "numberOfFragments" fragments,
+ // for the purpose of unwind information. We need to do the actual splits so we can figure out
+ // the size of each piece of unwind data for the call to reserveUnwindInfo(). We won't know
+ // the actual offsets of the splits since we haven't issued the instructions yet, so store
+ // an emitter location instead of an offset, and "finalize" the offset in the unwindEmit() phase,
+ // like we do for the function length and epilog offsets.
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef DEBUG
+ if (uwiComp->verbose)
+ {
+ printf("Split unwind info into %d fragments (function/funclet size: %d, maximum fragment size: %d)\n",
+ numberOfFragments, codeSize, maxFragmentSize);
+ }
+#endif // DEBUG
+
+ // Call the emitter to do the split, and call us back for every split point it chooses.
+ uwiComp->genEmitter->emitSplit(uwiFragmentLast->ufiEmitLoc, uwiEndLoc, maxFragmentSize, (void*)this,
+ EmitSplitCallback);
+
+#ifdef DEBUG
+ // Did the emitter split the function/funclet into as many fragments as we asked for?
+ // It might be fewer if the COMPlus_JitSplitFunctionSize was used, but it better not
+ // be fewer if we're splitting into 512K blocks!
+
+ unsigned fragCount = 0;
+ for (UnwindFragmentInfo* pFrag = &uwiFragmentFirst; pFrag != NULL; pFrag = pFrag->ufiNext)
+ {
+ ++fragCount;
+ }
+ if (fragCount < numberOfFragments)
+ {
+ if (uwiComp->verbose)
+ {
+ printf("WARNING: asked the emitter for %d fragments, but only got %d\n", numberOfFragments, fragCount);
+ }
+
+ // If this fires, then we split into fewer fragments than we asked for, and we are using
+ // the default, unwind-data-defined 512K maximum fragment size. We won't be able to fit
+ // this fragment into the unwind data! If you set COMPlus_JitSplitFunctionSize to something
+ // small, we might not be able to split into as many fragments as asked for, because we
+ // can't split prologs or epilogs.
+ assert(maxFragmentSize != UW_MAX_FRAGMENT_SIZE_BYTES);
+ }
+#endif // DEBUG
+}
+
+/*static*/ void UnwindInfo::EmitSplitCallback(void* context, emitLocation* emitLoc)
+{
+ UnwindInfo* puwi = (UnwindInfo*)context;
+ puwi->AddFragment(emitLoc);
+}
+
+// Reserve space for the unwind info for all fragments
+
+void UnwindInfo::Reserve(BOOL isFunclet, bool isHotCode)
+{
+ assert(uwiInitialized == UWI_INITIALIZED_PATTERN);
+ assert(isHotCode || !isFunclet);
+
+ for (UnwindFragmentInfo* pFrag = &uwiFragmentFirst; pFrag != NULL; pFrag = pFrag->ufiNext)
+ {
+ pFrag->Reserve(isFunclet, isHotCode);
+ }
+}
+
+// Allocate and populate VM unwind info for all fragments
+
+void UnwindInfo::Allocate(CorJitFuncKind funKind, void* pHotCode, void* pColdCode, bool isHotCode)
+{
+ assert(uwiInitialized == UWI_INITIALIZED_PATTERN);
+
+ UnwindFragmentInfo* pFrag;
+
+ // First, finalize all the offsets (the location of the beginning of fragments, and epilogs),
+ // so a fragment can use the finalized offset of the subsequent fragment to determine its code size.
+
+ UNATIVE_OFFSET endOffset;
+
+ if (uwiEndLoc == NULL)
+ {
+ assert(uwiComp->info.compNativeCodeSize != 0);
+ endOffset = uwiComp->info.compNativeCodeSize;
+ }
+ else
+ {
+ endOffset = uwiEndLoc->CodeOffset(uwiComp->genEmitter);
+ }
+
+ for (pFrag = &uwiFragmentFirst; pFrag != NULL; pFrag = pFrag->ufiNext)
+ {
+ pFrag->FinalizeOffset();
+ }
+
+ for (pFrag = &uwiFragmentFirst; pFrag != NULL; pFrag = pFrag->ufiNext)
+ {
+ pFrag->Allocate(funKind, pHotCode, pColdCode, endOffset, isHotCode);
+ }
+}
+
+void UnwindInfo::AddEpilog()
+{
+ assert(uwiInitialized == UWI_INITIALIZED_PATTERN);
+ assert(uwiFragmentLast != NULL);
+ uwiFragmentLast->AddEpilog();
+ CaptureLocation();
+}
+
+#if defined(_TARGET_ARM_)
+
+unsigned UnwindInfo::GetInstructionSize()
+{
+ assert(uwiInitialized == UWI_INITIALIZED_PATTERN);
+ return uwiComp->genEmitter->emitGetInstructionSize(uwiCurLoc);
+}
+
+#endif // defined(_TARGET_ARM_)
+
+void UnwindInfo::CaptureLocation()
+{
+ assert(uwiInitialized == UWI_INITIALIZED_PATTERN);
+ assert(uwiCurLoc != NULL);
+ uwiCurLoc->CaptureLocation(uwiComp->genEmitter);
+}
+
+void UnwindInfo::AddFragment(emitLocation* emitLoc)
+{
+ assert(uwiInitialized == UWI_INITIALIZED_PATTERN);
+ assert(uwiFragmentLast != NULL);
+
+ UnwindFragmentInfo* newFrag = new (uwiComp, CMK_UnwindInfo) UnwindFragmentInfo(uwiComp, emitLoc, true);
+
+#ifdef DEBUG
+ newFrag->ufiNum = uwiFragmentLast->ufiNum + 1;
+#endif // DEBUG
+
+ newFrag->CopyPrologCodes(&uwiFragmentFirst);
+ newFrag->SplitEpilogCodes(emitLoc, uwiFragmentLast);
+
+ // Link the new fragment in at the end of the fragment list
+ uwiFragmentLast->ufiNext = newFrag;
+ uwiFragmentLast = newFrag;
+}
+
+#ifdef DEBUG
+
+#if defined(_TARGET_ARM_)
+
+// Given the first byte of the unwind code, check that its opsize matches
+// the last instruction added in the emitter.
+void UnwindInfo::CheckOpsize(BYTE b1)
+{
+ // Adding NOP padding goes through the same path, but doesn't update the location to indicate
+ // the correct location of the instruction for which we are adding a NOP, so just skip the
+ // assert. Should be ok, because the emitter is telling us the size of the instruction for
+ // which we are adding the NOP.
+ if (uwiAddingNOP)
+ return;
+
+ unsigned opsizeInBytes = GetOpcodeSizeFromUnwindHeader(b1);
+ unsigned instrSizeInBytes = GetInstructionSize();
+ assert(opsizeInBytes == instrSizeInBytes);
+}
+
+#endif // defined(_TARGET_ARM_)
+
+void UnwindInfo::Dump(bool isHotCode, int indent)
+{
+ unsigned count;
+ UnwindFragmentInfo* pFrag;
+
+ count = 0;
+ for (pFrag = &uwiFragmentFirst; pFrag != NULL; pFrag = pFrag->ufiNext)
+ {
+ ++count;
+ }
+
+ printf("%*sUnwindInfo %s@0x%08p, size:%d:\n", indent, "", isHotCode ? "" : "COLD ", dspPtr(this), sizeof(*this));
+ printf("%*s uwiComp: 0x%08p\n", indent, "", dspPtr(uwiComp));
+ printf("%*s %d fragment%s\n", indent, "", count, (count != 1) ? "s" : "");
+ printf("%*s uwiFragmentLast: 0x%08p\n", indent, "", dspPtr(uwiFragmentLast));
+ printf("%*s uwiEndLoc: 0x%08p\n", indent, "", dspPtr(uwiEndLoc));
+ printf("%*s uwiInitialized: 0x%08x\n", indent, "", uwiInitialized);
+
+ for (pFrag = &uwiFragmentFirst; pFrag != NULL; pFrag = pFrag->ufiNext)
+ {
+ pFrag->Dump(indent + 2);
+ }
+}
+
+#endif // DEBUG
+
+#if defined(_TARGET_ARM_)
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX Debug dumpers XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#ifdef DEBUG
+
+// start is 0-based index from LSB, length is number of bits
+DWORD ExtractBits(DWORD dw, DWORD start, DWORD length)
+{
+ return (dw >> start) & ((1 << length) - 1);
+}
+
+// Dump an integer register set. 'x' is an array of bits where bit 0 = r0, bit 1 = r1, etc.
+// The highest register considered is r12.
+// If 'lr' is non-zero, the "lr" register is emitted last.
+// Returns the number of characters printed.
+DWORD DumpIntRegSet(DWORD x, DWORD lr)
+{
+ assert(x != 0 || lr != 0); // we must have one
+ assert((x & 0xE000) == 0); // don't handle r13 (sp), r14 (lr), r15 (pc) in 'x'
+ DWORD printed = 0;
+
+ printf("{");
+ ++printed;
+ bool first = true;
+ DWORD bitMask = 1;
+ for (DWORD bitNum = 0; bitNum < 12; bitNum++)
+ {
+ if (x & bitMask)
+ {
+ if (!first)
+ {
+ printf(",");
+ ++printed;
+ }
+ printf("r%u", bitNum);
+ printed += (bitNum < 10) ? 2 : 3;
+ first = false;
+ }
+ bitMask <<= 1;
+ }
+ if (lr)
+ {
+ if (!first)
+ {
+ printf(",");
+ ++printed;
+ }
+ printf("lr");
+ printed += 2;
+ }
+ printf("}");
+ ++printed;
+
+ return printed;
+}
+
+// Dump a register set range from register 'start' to register 'end'.
+// rtype should be "r" or "d" to indicate register type.
+// If 'lr' is non-zero, the "lr" register is emitted last. (Note that
+// 'lr' should be zero for rtype == "d".)
+// Returns the number of characters printed.
+DWORD DumpRegSetRange(const char* const rtype, DWORD start, DWORD end, DWORD lr)
+{
+ assert(start <= end);
+ DWORD printed = 0;
+ DWORD rtypeLen = strlen(rtype);
+
+ printf("{");
+ ++printed;
+ bool first = true;
+ for (DWORD reg = start; reg <= end; reg++)
+ {
+ if (!first)
+ {
+ printf(",");
+ ++printed;
+ }
+ printf("%s%u", rtype, reg);
+ printed += rtypeLen + ((reg < 10) ? 1 : 2);
+ first = false;
+ }
+ if (lr)
+ {
+ assert(!first); // If 'lr' is set, it can't be first, since we require a non-empty range
+ printf(",lr");
+ printed += 3;
+ }
+ printf("}");
+ ++printed;
+
+ return printed;
+}
+
+// Dump the opsize.
+// Returns the number of characters printed.
+DWORD DumpOpsize(DWORD padding, DWORD opsize)
+{
+ if (padding > 100) // underflow?
+ padding = 4;
+ DWORD printed = padding;
+ for (; padding > 0; padding--)
+ printf(" ");
+ printf("; opsize %d\n", opsize);
+ return printed + 11; // assumes opsize is always 2 digits
+}
+
+// Dump the unwind data.
+// Arguments:
+// isHotCode: true if this unwind data is for the hot section
+// startOffset: byte offset of the code start that this unwind data represents
+// endOffset: byte offset of the code end that this unwind data represents
+// pHeader: pointer to the unwind data blob
+// unwindBlockSize: size in bytes of the unwind data blob
+
+void DumpUnwindInfo(Compiler* comp,
+ bool isHotCode,
+ UNATIVE_OFFSET startOffset,
+ UNATIVE_OFFSET endOffset,
+ const BYTE* const pHeader,
+ ULONG unwindBlockSize)
+{
+ printf("Unwind Info%s:\n", isHotCode ? "" : " COLD");
+
+ // pHeader is not guaranteed to be aligned. We put four 0xFF end codes at the end
+ // to provide padding, and round down to get a multiple of 4 bytes in size.
+ DWORD UNALIGNED* pdw = (DWORD UNALIGNED*)pHeader;
+ DWORD dw;
+
+ dw = *pdw++;
+
+ DWORD codeWords = ExtractBits(dw, 28, 4);
+ DWORD epilogCount = ExtractBits(dw, 23, 5);
+ DWORD FBit = ExtractBits(dw, 22, 1);
+ DWORD EBit = ExtractBits(dw, 21, 1);
+ DWORD XBit = ExtractBits(dw, 20, 1);
+ DWORD Vers = ExtractBits(dw, 18, 2);
+ DWORD functionLength = ExtractBits(dw, 0, 18);
+
+ printf(" >> Start offset : 0x%06x (not in unwind data)\n", comp->dspOffset(startOffset));
+ printf(" >> End offset : 0x%06x (not in unwind data)\n", comp->dspOffset(endOffset));
+ printf(" Code Words : %u\n", codeWords);
+ printf(" Epilog Count : %u\n", epilogCount);
+ printf(" F bit : %u\n", FBit);
+ printf(" E bit : %u\n", EBit);
+ printf(" X bit : %u\n", XBit);
+ printf(" Vers : %u\n", Vers);
+ printf(" Function Length : %u (0x%05x) Actual length = %u (0x%06x)\n", functionLength, functionLength,
+ functionLength * 2, functionLength * 2);
+
+ assert(functionLength * 2 == endOffset - startOffset);
+
+ if (codeWords == 0 && epilogCount == 0)
+ {
+ // We have an extension word specifying a larger number of Code Words or Epilog Counts
+ // than can be specified in the header word.
+
+ dw = *pdw++;
+
+ codeWords = ExtractBits(dw, 16, 8);
+ epilogCount = ExtractBits(dw, 0, 16);
+ assert((dw & 0xF0000000) == 0); // reserved field should be zero
+
+ printf(" ---- Extension word ----\n");
+ printf(" Extended Code Words : %u\n", codeWords);
+ printf(" Extended Epilog Count : %u\n", epilogCount);
+ }
+
+ bool epilogStartAt[256] = {}; // One byte per possible epilog start index; initialized to false
+
+ if (EBit == 0)
+ {
+ // We have an array of epilog scopes
+
+ printf(" ---- Epilog scopes ----\n");
+ if (epilogCount == 0)
+ {
+ printf(" No epilogs\n");
+ }
+ else
+ {
+ for (DWORD scope = 0; scope < epilogCount; scope++)
+ {
+ dw = *pdw++;
+
+ DWORD epilogStartOffset = ExtractBits(dw, 0, 18);
+ DWORD res = ExtractBits(dw, 18, 2);
+ DWORD condition = ExtractBits(dw, 20, 4);
+ DWORD epilogStartIndex = ExtractBits(dw, 24, 8);
+
+ // Note that epilogStartOffset for a funclet is the offset from the beginning
+ // of the current funclet, not the offset from the beginning of the main function.
+ // To help find it when looking through JitDump output, also show the offset from
+ // the beginning of the main function.
+ DWORD epilogStartOffsetFromMainFunctionBegin = epilogStartOffset * 2 + startOffset;
+
+ assert(res == 0);
+
+ printf(" ---- Scope %d\n", scope);
+ printf(" Epilog Start Offset : %u (0x%05x) Actual offset = %u (0x%06x) Offset from main "
+ "function begin = %u (0x%06x)\n",
+ comp->dspOffset(epilogStartOffset), comp->dspOffset(epilogStartOffset),
+ comp->dspOffset(epilogStartOffset * 2), comp->dspOffset(epilogStartOffset * 2),
+ comp->dspOffset(epilogStartOffsetFromMainFunctionBegin),
+ comp->dspOffset(epilogStartOffsetFromMainFunctionBegin));
+ printf(" Condition : %u (0x%x)%s\n", condition, condition,
+ (condition == 0xE) ? " (always)" : "");
+ printf(" Epilog Start Index : %u (0x%02x)\n", epilogStartIndex, epilogStartIndex);
+
+ epilogStartAt[epilogStartIndex] = true; // an epilog starts at this offset in the unwind codes
+ }
+ }
+ }
+ else
+ {
+ printf(" --- One epilog, unwind codes at %u\n", epilogCount);
+ assert(epilogCount < sizeof(epilogStartAt) / sizeof(epilogStartAt[0]));
+ epilogStartAt[epilogCount] = true; // the one and only epilog starts its unwind codes at this offset
+ }
+
+ if (FBit)
+ {
+ printf(" ---- Note: 'F' bit is set. Prolog codes are for a 'phantom' prolog.\n");
+ }
+
+ // Dump the unwind codes
+
+ printf(" ---- Unwind codes ----\n");
+
+ DWORD countOfUnwindCodes = codeWords * 4;
+ PBYTE pUnwindCode = (PBYTE)pdw;
+ BYTE b1, b2, b3, b4;
+ DWORD x, y;
+ DWORD opsize;
+ DWORD opCol = 52;
+ DWORD printed;
+ for (DWORD i = 0; i < countOfUnwindCodes; i++)
+ {
+ // Does this byte start an epilog sequence? If so, note that fact.
+ if (epilogStartAt[i])
+ {
+ printf(" ---- Epilog start at index %u ----\n", i);
+ }
+
+ b1 = *pUnwindCode++;
+
+ if ((b1 & 0x80) == 0)
+ {
+ // 00-7F : add sp, sp, #X*4 (opsize 16)
+ x = b1 & 0x7F;
+ printf(" %02X add sp, sp, #%-8d", b1, x * 4);
+ DumpOpsize(opCol - 37, 16);
+ }
+ else if ((b1 & 0xC0) == 0x80)
+ {
+ // 80-BF : pop {r0-r12,lr} (X = bitmask) (opsize 32)
+ assert(i + 1 < countOfUnwindCodes);
+ b2 = *pUnwindCode++;
+ i++;
+
+ DWORD LBit = ExtractBits(b1, 5, 1);
+ x = ((DWORD)(b1 & 0x1F) << 8) | (DWORD)b2;
+
+ printf(" %02X %02X pop ", b1, b2);
+ printed = 20;
+ printed += DumpIntRegSet(x, LBit);
+ DumpOpsize(opCol - printed, 32);
+ }
+ else if ((b1 & 0xF0) == 0xC0)
+ {
+ // C0-CF : mov sp, rX (X=0-15) (opsize 16)
+ x = b1 & 0xF;
+ printf(" %02X mov sp, r%u", b1, x);
+ printed = 25 + ((x > 10) ? 2 : 1);
+ DumpOpsize(opCol - printed, 16);
+ }
+ else if ((b1 & 0xF8) == 0xD0)
+ {
+ // D0-D7 : pop {r4-rX,lr} (X=4-7) (opsize 16)
+ x = b1 & 0x3;
+ DWORD LBit = b1 & 0x4;
+ printf(" %02X pop ", b1);
+ printed = 20;
+ printed += DumpRegSetRange("r", 4, x + 4, LBit);
+ DumpOpsize(opCol - printed, 16);
+ }
+ else if ((b1 & 0xF8) == 0xD8)
+ {
+ // D8-DF : pop {r4-rX,lr} (X=8-11) (opsize 32)
+ x = b1 & 0x3;
+ DWORD LBit = b1 & 0x4;
+ printf(" %02X pop ", b1);
+ printed = 20;
+ printed += DumpRegSetRange("r", 4, x + 8, LBit);
+ DumpOpsize(opCol - printed, 32);
+ }
+ else if ((b1 & 0xF8) == 0xE0)
+ {
+ // E0-E7 : vpop {d8-dX} (X=8-15) (opsize 32)
+ x = b1 & 0x7;
+ printf(" %02X vpop ", b1);
+ printed = 21;
+ printed += DumpRegSetRange("d", 8, x + 8, 0);
+ DumpOpsize(opCol - printed, 32);
+ }
+ else if ((b1 & 0xFC) == 0xE8)
+ {
+ // E8-EB : addw sp, sp, #X*4 (opsize 32)
+ assert(i + 1 < countOfUnwindCodes);
+ b2 = *pUnwindCode++;
+ i++;
+
+ x = ((DWORD)(b1 & 0x3) << 8) | (DWORD)b2;
+
+ printf(" %02X %02X addw sp, sp, #%-8u", b1, b2, x * 4);
+ DumpOpsize(opCol - 38, 32);
+ }
+ else if ((b1 & 0xFE) == 0xEC)
+ {
+ // EC-ED : pop {r0-r7,lr} (X = bitmask) (opsize 16)
+ assert(i + 1 < countOfUnwindCodes);
+ b2 = *pUnwindCode++;
+ i++;
+
+ DWORD LBit = ExtractBits(b1, 0, 1);
+ x = (DWORD)b2;
+
+ printf(" %02X %02X pop ", b1, b2);
+ printed = 20;
+ printed += DumpIntRegSet(x, LBit);
+ DumpOpsize(opCol - printed, 16);
+ }
+ else if (b1 == 0xEE)
+ {
+ assert(i + 1 < countOfUnwindCodes);
+ b2 = *pUnwindCode++;
+ i++;
+
+ if ((b2 & 0xF0) == 0)
+ {
+ // EE/0x (opsize 16)
+ x = b2 & 0xF;
+ printf(" %02X %02X Microsoft-specific (x = %02X)", b1, b2, x);
+ DumpOpsize(4, 16);
+ }
+ else
+ {
+ // EE/xy (opsize 16)
+ x = ExtractBits(b2, 4, 4);
+ y = ExtractBits(b2, 0, 4);
+ printf(" %02X %02X Available (x = %02X, y = %02X)", b1, b2, x, y);
+ DumpOpsize(4, 16);
+ }
+ }
+ else if (b1 == 0xEF)
+ {
+ assert(i + 1 < countOfUnwindCodes);
+ b2 = *pUnwindCode++;
+ i++;
+
+ if ((b2 & 0xF0) == 0)
+ {
+ // EF/0x : ldr lr, [sp], #X*4 (opsize 32)
+ x = b2 & 0xF;
+ printf(" %02X %02X ldr lr, [sp], #%-8u", b1, b2, x * 4);
+ DumpOpsize(opCol - 39, 32);
+ }
+ else
+ {
+ // EF/xy (opsize 32)
+ x = ExtractBits(b2, 4, 4);
+ y = ExtractBits(b2, 0, 4);
+ printf(" %02X %02X Available (x = %02X, y = %02X)", b1, b2, x, y);
+ DumpOpsize(4, 32);
+ }
+ }
+ else if ((b1 & 0xF7) == 0xF0)
+ {
+ // F0-F4
+ x = b1 & 0x7;
+ printf(" %02X Available (x = %02X)\n", b1, x);
+ }
+ else if (b1 == 0xF5)
+ {
+ // F5 : vpop {dS-dE} (opsize 32)
+
+ assert(i + 1 < countOfUnwindCodes);
+ b2 = *pUnwindCode++;
+ i++;
+
+ DWORD s = ExtractBits(b2, 4, 4);
+ DWORD e = ExtractBits(b2, 0, 4);
+
+ printf(" %02X %02X vpop ", b1, b2);
+ printed = 21;
+ printed += DumpRegSetRange("d", s, e, 0);
+ DumpOpsize(opCol - printed, 32);
+ }
+ else if (b1 == 0xF6)
+ {
+ // F6 : vpop {d(S+16)-d(E+16)} (opsize 32)
+
+ assert(i + 1 < countOfUnwindCodes);
+ b2 = *pUnwindCode++;
+ i++;
+
+ DWORD s = ExtractBits(b2, 4, 4);
+ DWORD e = ExtractBits(b2, 0, 4);
+
+ printf(" %02X %02X vpop ", b1, b2);
+ printed = 21;
+ printed += DumpRegSetRange("d", s + 16, e + 16, 0);
+ DumpOpsize(opCol - printed, 32);
+ }
+ else if (b1 == 0xF7 || b1 == 0xF9)
+ {
+ // F7, F9 : add sp, sp, #X*4
+ // 0xF7 has opsize 16, 0xF9 has opsize 32
+
+ assert(i + 2 < countOfUnwindCodes);
+ b2 = *pUnwindCode++;
+ b3 = *pUnwindCode++;
+ i += 2;
+
+ x = ((DWORD)b2 << 8) | (DWORD)b3;
+
+ opsize = (b1 == 0xF7) ? 16 : 32;
+
+ printf(" %02X %02X %02X add sp, sp, #%-8u", b1, b2, b3, x * 4, opsize);
+ DumpOpsize(opCol - 37, opsize);
+ }
+ else if (b1 == 0xF8 || b1 == 0xFA)
+ {
+ // F8, FA : add sp, sp, #X*4
+ // 0xF8 has opsize 16, 0xFA has opsize 32
+
+ assert(i + 3 < countOfUnwindCodes);
+ b2 = *pUnwindCode++;
+ b3 = *pUnwindCode++;
+ b4 = *pUnwindCode++;
+ i += 3;
+
+ x = ((DWORD)b2 << 16) | ((DWORD)b3 << 8) | (DWORD)b4;
+
+ opsize = (b1 == 0xF8) ? 16 : 32;
+
+ printf(" %02X %02X %02X %02X add sp, sp, #%-8u", b1, b2, b3, b4, x * 4, opsize);
+ DumpOpsize(opCol - 37, opsize);
+ }
+ else if (b1 == 0xFB || b1 == 0xFC)
+ {
+ // FB, FC : nop
+ // 0xFB has opsize 16, 0xFC has opsize 32
+
+ opsize = (b1 == 0xFB) ? 16 : 32;
+
+ printf(" %02X nop", b1, opsize);
+ DumpOpsize(opCol - 19, opsize);
+ }
+ else if (b1 == 0xFD || b1 == 0xFE)
+ {
+ // FD, FE : end + nop
+ // 0xFD has opsize 16, 0xFE has opsize 32
+
+ opsize = (b1 == 0xFD) ? 16 : 32;
+
+ printf(" %02X end + nop", b1, opsize);
+ DumpOpsize(opCol - 25, opsize);
+ }
+ else if (b1 == 0xFF)
+ {
+ // FF : end
+
+ printf(" %02X end\n", b1);
+ }
+ else
+ {
+ assert(!"Internal error decoding unwind codes");
+ }
+ }
+
+ pdw += codeWords;
+ assert((PBYTE)pdw == pUnwindCode);
+ assert((PBYTE)pdw == pHeader + unwindBlockSize);
+
+ assert(XBit == 0); // We don't handle the case where exception data is present, such as the Exception Handler RVA
+
+ printf("\n");
+}
+
+#endif // DEBUG
+
+#endif // defined(_TARGET_ARM_)
+
+#endif // _TARGET_ARMARCH_
diff --git a/src/jit/unwindarm64.cpp b/src/jit/unwindarm64.cpp
new file mode 100644
index 0000000000..21e2a36b2a
--- /dev/null
+++ b/src/jit/unwindarm64.cpp
@@ -0,0 +1,802 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX UnwindInfo XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+#if defined(_TARGET_ARM64_)
+
+void Compiler::unwindPush(regNumber reg)
+{
+ unreached(); // use one of the unwindSaveReg* functions instead.
+}
+
+void Compiler::unwindAllocStack(unsigned size)
+{
+ UnwindInfo* pu = &funCurrentFunc()->uwi;
+
+ assert(size % 16 == 0);
+ unsigned x = size / 16;
+
+ if (x <= 0x1F)
+ {
+ // alloc_s: 000xxxxx: allocate small stack with size < 128 (2^5 * 16)
+ // TODO-Review: should say size < 512
+
+ pu->AddCode((BYTE)x);
+ }
+ else if (x <= 0x7FF)
+ {
+ // alloc_m: 11000xxx | xxxxxxxx: allocate large stack with size < 16k (2^11 * 16)
+ // TODO-Review: should say size < 32K
+
+ pu->AddCode(0xC0 | (BYTE)(x >> 8), (BYTE)x);
+ }
+ else
+ {
+ // alloc_l: 11100000 | xxxxxxxx | xxxxxxxx | xxxxxxxx : allocate large stack with size < 256M (2^24 * 16)
+ //
+ // For large stack size, the most significant bits
+ // are stored first (and next to the opCode) per the unwind spec.
+
+ pu->AddCode(0xE0, (BYTE)(x >> 16), (BYTE)(x >> 8), (BYTE)x);
+ }
+}
+
+void Compiler::unwindSetFrameReg(regNumber reg, unsigned offset)
+{
+ UnwindInfo* pu = &funCurrentFunc()->uwi;
+
+ if (offset == 0)
+ {
+ assert(reg == REG_FP);
+
+ // set_fp: 11100001 : set up r29 : with : mov r29, sp
+ pu->AddCode(0xE1);
+ }
+ else
+ {
+ // add_fp: 11100010 | xxxxxxxx : set up r29 with : add r29, sp, #x * 8
+
+ assert(reg == REG_FP);
+ assert((offset % 8) == 0);
+
+ unsigned x = offset / 8;
+ assert(x <= 0xFF);
+
+ pu->AddCode(0xE2, (BYTE)x);
+ }
+}
+
+void Compiler::unwindSaveReg(regNumber reg, unsigned offset)
+{
+ unreached();
+}
+
+void Compiler::unwindNop()
+{
+ UnwindInfo* pu = &funCurrentFunc()->uwi;
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("unwindNop: adding NOP\n");
+ }
+#endif
+
+ INDEBUG(pu->uwiAddingNOP = true);
+
+ // nop: 11100011: no unwind operation is required.
+ pu->AddCode(0xE3);
+
+ INDEBUG(pu->uwiAddingNOP = false);
+}
+
+// unwindSaveRegPair: save a register pair to the stack at the specified byte offset (which must be positive,
+// a multiple of 8 from 0 to 504). Note that for ARM64 unwind codes, reg2 must be exactly one register higher than reg1,
+// except for the case of a pair including LR, in which case reg1 must be either FP or R19/R21/R23/R25/R27 (note that it
+// can't be even, such as R20, because that would mean R19 was saved separately, instead of saving <R19,R20> as a pair,
+// which we should do instead).
+void Compiler::unwindSaveRegPair(regNumber reg1, regNumber reg2, int offset)
+{
+ UnwindInfo* pu = &funCurrentFunc()->uwi;
+
+ // stp reg1, reg2, [sp, #offset]
+
+ // offset for store pair in prolog must be positive and a multiple of 8.
+ assert(0 <= offset && offset <= 504);
+ assert((offset % 8) == 0);
+
+ int z = offset / 8;
+ assert(0 <= z && z <= 0x3F);
+
+ if (reg1 == REG_FP)
+ {
+ // save_fplr: 01zzzzzz: save <r29,lr> pair at [sp+#Z*8], offset <= 504
+
+ assert(reg2 == REG_LR);
+
+ pu->AddCode(0x40 | (BYTE)z);
+ }
+ else if (reg2 == REG_LR)
+ {
+ // save_lrpair: 1101011x | xxzzzzzz: save pair <r19 + 2 * #X, lr> at [sp + #Z * 8], offset <= 504
+
+ assert(REG_R19 <= reg1 && // first legal pair: R19, LR
+ reg1 <= REG_R27); // last legal pair: R27, LR
+
+ BYTE x = (BYTE)(reg1 - REG_R19);
+ assert((x % 2) == 0); // only legal reg1: R19, R21, R23, R25, R27
+ x /= 2;
+ assert(0 <= x && x <= 0x7);
+
+ pu->AddCode(0xD6 | (BYTE)(x >> 2), (BYTE)(x << 6) | (BYTE)z);
+ }
+ else if (emitter::isGeneralRegister(reg1))
+ {
+ // save_regp: 110010xx | xxzzzzzz: save r(19 + #X) pair at [sp + #Z * 8], offset <= 504
+
+ assert(REG_NEXT(reg1) == reg2);
+ assert(REG_R19 <= reg1 && // first legal pair: R19, R20
+ reg1 <= REG_R27); // last legal pair: R27, R28 (FP is never saved without LR)
+
+ BYTE x = (BYTE)(reg1 - REG_R19);
+ assert(0 <= x && x <= 0xF);
+
+ pu->AddCode(0xC8 | (BYTE)(x >> 2), (BYTE)(x << 6) | (BYTE)z);
+ }
+ else
+ {
+ // save_fregp: 1101100x | xxzzzzzz : save pair d(8 + #X) at [sp + #Z * 8], offset <= 504
+
+ assert(REG_NEXT(reg1) == reg2);
+ assert(REG_V8 <= reg1 && // first legal pair: V8, V9
+ reg1 <= REG_V14); // last legal pair: V14, V15
+
+ BYTE x = (BYTE)(reg1 - REG_V8);
+ assert(0 <= x && x <= 0x7);
+
+ pu->AddCode(0xD8 | (BYTE)(x >> 2), (BYTE)(x << 6) | (BYTE)z);
+ }
+}
+
+// unwindSaveRegPairPreindexed: save a register pair to the stack at the specified byte offset (which must be negative,
+// a multiple of 8 from -512 to -8). Note that for ARM64 unwind codes, reg2 must be exactly one register higher than
+// reg1.
+void Compiler::unwindSaveRegPairPreindexed(regNumber reg1, regNumber reg2, int offset)
+{
+ UnwindInfo* pu = &funCurrentFunc()->uwi;
+
+ // stp reg1, reg2, [sp, #offset]!
+
+ // pre-indexed offset in prolog must be negative and a multiple of 8.
+ assert(offset < 0);
+ assert((offset % 8) == 0);
+
+ if (reg1 == REG_FP)
+ {
+ // save_fplr_x: 10zzzzzz: save <r29,lr> pair at [sp-(#Z+1)*8]!, pre-indexed offset >= -512
+
+ assert(-512 <= offset);
+ int z = (-offset) / 8 - 1;
+ assert(0 <= z && z <= 0x3F);
+
+ assert(reg2 == REG_LR);
+
+ pu->AddCode(0x80 | (BYTE)z);
+ }
+ else if ((reg1 == REG_R19) &&
+ (-256 <= offset)) // If the offset is between -512 and -256, we use the save_regp_x unwind code.
+ {
+ // save_r19r20_x: 001zzzzz: save <r19,r20> pair at [sp-#Z*8]!, pre-indexed offset >= -248
+ // NOTE: I'm not sure why we allow Z==0 here; seems useless, and the calculation of offset is different from the
+ // other cases.
+
+ int z = (-offset) / 8;
+ assert(0 <= z && z <= 0x1F);
+
+ assert(reg2 == REG_R20);
+
+ pu->AddCode(0x20 | (BYTE)z);
+ }
+ else if (emitter::isGeneralRegister(reg1))
+ {
+ // save_regp_x: 110011xx | xxzzzzzz: save pair r(19 + #X) at [sp - (#Z + 1) * 8]!, pre-indexed offset >= -512
+
+ assert(-512 <= offset);
+ int z = (-offset) / 8 - 1;
+ assert(0 <= z && z <= 0x3F);
+
+ assert(REG_NEXT(reg1) == reg2);
+ assert(REG_R19 <= reg1 && // first legal pair: R19, R20
+ reg1 <= REG_R27); // last legal pair: R27, R28 (FP is never saved without LR)
+
+ BYTE x = (BYTE)(reg1 - REG_R19);
+ assert(0 <= x && x <= 0xF);
+
+ pu->AddCode(0xCC | (BYTE)(x >> 2), (BYTE)(x << 6) | (BYTE)z);
+ }
+ else
+ {
+ // save_fregp_x: 1101101x | xxzzzzzz : save pair d(8 + #X), at [sp - (#Z + 1) * 8]!, pre-indexed offset >= -512
+
+ assert(-512 <= offset);
+ int z = (-offset) / 8 - 1;
+ assert(0 <= z && z <= 0x3F);
+
+ assert(REG_NEXT(reg1) == reg2);
+ assert(REG_V8 <= reg1 && // first legal pair: V8, V9
+ reg1 <= REG_V14); // last legal pair: V14, V15
+
+ BYTE x = (BYTE)(reg1 - REG_V8);
+ assert(0 <= x && x <= 0x7);
+
+ pu->AddCode(0xDA | (BYTE)(x >> 2), (BYTE)(x << 6) | (BYTE)z);
+ }
+}
+
+void Compiler::unwindSaveReg(regNumber reg, int offset)
+{
+ UnwindInfo* pu = &funCurrentFunc()->uwi;
+
+ // str reg, [sp, #offset]
+
+ // offset for store in prolog must be positive and a multiple of 8.
+ assert(0 <= offset && offset <= 504);
+ assert((offset % 8) == 0);
+
+ int z = offset / 8;
+ assert(0 <= z && z <= 0x3F);
+
+ if (emitter::isGeneralRegister(reg))
+ {
+ // save_reg: 110100xx | xxzzzzzz: save reg r(19 + #X) at [sp + #Z * 8], offset <= 504
+
+ assert(REG_R19 <= reg && // first legal register: R19
+ reg <= REG_LR); // last legal register: LR
+
+ BYTE x = (BYTE)(reg - REG_R19);
+ assert(0 <= x && x <= 0xF);
+
+ pu->AddCode(0xD0 | (BYTE)(x >> 2), (BYTE)(x << 6) | (BYTE)z);
+ }
+ else
+ {
+ // save_freg: 1101110x | xxzzzzzz : save reg d(8 + #X) at [sp + #Z * 8], offset <= 504
+
+ assert(REG_V8 <= reg && // first legal register: V8
+ reg <= REG_V15); // last legal register: V15
+
+ BYTE x = (BYTE)(reg - REG_V8);
+ assert(0 <= x && x <= 0x7);
+
+ pu->AddCode(0xDC | (BYTE)(x >> 2), (BYTE)(x << 6) | (BYTE)z);
+ }
+}
+
+void Compiler::unwindSaveRegPreindexed(regNumber reg, int offset)
+{
+ UnwindInfo* pu = &funCurrentFunc()->uwi;
+
+ // str reg, [sp, #offset]!
+
+ // pre-indexed offset in prolog must be negative and a multiple of 8.
+ assert(-256 <= offset && offset < 0);
+ assert((offset % 8) == 0);
+
+ int z = (-offset) / 8 - 1;
+ assert(0 <= z && z <= 0x1F);
+
+ if (emitter::isGeneralRegister(reg))
+ {
+ // save_reg_x: 1101010x | xxxzzzzz: save reg r(19 + #X) at [sp - (#Z + 1) * 8]!, pre-indexed offset >= -256
+
+ assert(REG_R19 <= reg && // first legal register: R19
+ reg <= REG_LR); // last legal register: LR
+
+ BYTE x = (BYTE)(reg - REG_R19);
+ assert(0 <= x && x <= 0xF);
+
+ pu->AddCode(0xD4 | (BYTE)(x >> 3), (BYTE)(x << 5) | (BYTE)z);
+ }
+ else
+ {
+ // save_freg_x: 11011110 | xxxzzzzz : save reg d(8 + #X) at [sp - (#Z + 1) * 8]!, pre - indexed offset >= -256
+
+ assert(REG_V8 <= reg && // first legal register: V8
+ reg <= REG_V15); // last legal register: V15
+
+ BYTE x = (BYTE)(reg - REG_V8);
+ assert(0 <= x && x <= 0x7);
+
+ pu->AddCode(0xDE, (BYTE)(x << 5) | (BYTE)z);
+ }
+}
+
+void Compiler::unwindSaveNext()
+{
+ UnwindInfo* pu = &funCurrentFunc()->uwi;
+
+ // We're saving the next register pair. The caller is responsible for ensuring this is correct!
+
+ // save_next: 11100110 : save next non - volatile Int or FP register pair.
+ pu->AddCode(0xE6);
+}
+
+void Compiler::unwindReturn(regNumber reg)
+{
+ // Nothing to do; we will always have at least one trailing "end" opcode in our padding.
+}
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX Unwind Info Debug helpers XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#ifdef DEBUG
+
+// Return the size of the unwind code (from 1 to 4 bytes), given the first byte of the unwind bytes
+
+unsigned GetUnwindSizeFromUnwindHeader(BYTE b1)
+{
+ static BYTE s_UnwindSize[256] = {
+ // array of unwind sizes, in bytes (as specified in the ARM unwind specification)
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 00-0F
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 10-1F
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 20-2F
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 30-3F
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 40-4F
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 50-5F
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 60-6F
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 70-7F
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 80-8F
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 90-9F
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // A0-AF
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // B0-BF
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // C0-CF
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, // D0-DF
+ 4, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // E0-EF
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // F0-FF
+ };
+
+ unsigned size = s_UnwindSize[b1];
+ assert(1 <= size && size <= 4);
+ return size;
+}
+
+#endif // DEBUG
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX Unwind Info Support Classes XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+///////////////////////////////////////////////////////////////////////////////
+//
+// UnwindCodesBase
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#ifdef DEBUG
+
+// Walk the prolog codes and calculate the size of the prolog or epilog, in bytes.
+unsigned UnwindCodesBase::GetCodeSizeFromUnwindCodes(bool isProlog)
+{
+ BYTE* pCodesStart = GetCodes();
+ BYTE* pCodes = pCodesStart;
+ unsigned size = 0;
+ for (;;)
+ {
+ BYTE b1 = *pCodes;
+ if (IsEndCode(b1))
+ {
+ break; // We hit an "end" code; we're done
+ }
+ size += 4; // All codes represent 4 byte instructions.
+ pCodes += GetUnwindSizeFromUnwindHeader(b1);
+ assert(pCodes - pCodesStart < 256); // 255 is the absolute maximum number of code bytes allowed
+ }
+ return size;
+}
+
+#endif // DEBUG
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX Debug dumpers XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#ifdef DEBUG
+
+// start is 0-based index from LSB, length is number of bits
+DWORD ExtractBits(DWORD dw, DWORD start, DWORD length)
+{
+ return (dw >> start) & ((1 << length) - 1);
+}
+
+// Dump the unwind data.
+// Arguments:
+// isHotCode: true if this unwind data is for the hot section
+// startOffset: byte offset of the code start that this unwind data represents
+// endOffset: byte offset of the code end that this unwind data represents
+// pHeader: pointer to the unwind data blob
+// unwindBlockSize: size in bytes of the unwind data blob
+
+void DumpUnwindInfo(Compiler* comp,
+ bool isHotCode,
+ UNATIVE_OFFSET startOffset,
+ UNATIVE_OFFSET endOffset,
+ const BYTE* const pHeader,
+ ULONG unwindBlockSize)
+{
+ printf("Unwind Info%s:\n", isHotCode ? "" : " COLD");
+
+ // pHeader is not guaranteed to be aligned. We put four 0xFF end codes at the end
+ // to provide padding, and round down to get a multiple of 4 bytes in size.
+ DWORD UNALIGNED* pdw = (DWORD UNALIGNED*)pHeader;
+ DWORD dw;
+
+ dw = *pdw++;
+
+ DWORD codeWords = ExtractBits(dw, 27, 5);
+ DWORD epilogCount = ExtractBits(dw, 22, 5);
+ DWORD EBit = ExtractBits(dw, 21, 1);
+ DWORD XBit = ExtractBits(dw, 20, 1);
+ DWORD Vers = ExtractBits(dw, 18, 2);
+ DWORD functionLength = ExtractBits(dw, 0, 18);
+
+ printf(" >> Start offset : 0x%06x (not in unwind data)\n", comp->dspOffset(startOffset));
+ printf(" >> End offset : 0x%06x (not in unwind data)\n", comp->dspOffset(endOffset));
+ printf(" Code Words : %u\n", codeWords);
+ printf(" Epilog Count : %u\n", epilogCount);
+ printf(" E bit : %u\n", EBit);
+ printf(" X bit : %u\n", XBit);
+ printf(" Vers : %u\n", Vers);
+ printf(" Function Length : %u (0x%05x) Actual length = %u (0x%06x)\n", functionLength, functionLength,
+ functionLength * 4, functionLength * 4);
+
+ assert(functionLength * 4 == endOffset - startOffset);
+
+ if (codeWords == 0 && epilogCount == 0)
+ {
+ // We have an extension word specifying a larger number of Code Words or Epilog Counts
+ // than can be specified in the header word.
+
+ dw = *pdw++;
+
+ codeWords = ExtractBits(dw, 16, 8);
+ epilogCount = ExtractBits(dw, 0, 16);
+ assert((dw & 0xF0000000) == 0); // reserved field should be zero
+
+ printf(" ---- Extension word ----\n");
+ printf(" Extended Code Words : %u\n", codeWords);
+ printf(" Extended Epilog Count : %u\n", epilogCount);
+ }
+
+ bool epilogStartAt[1024] = {}; // One byte per possible epilog start index; initialized to false
+
+ if (EBit == 0)
+ {
+ // We have an array of epilog scopes
+
+ printf(" ---- Epilog scopes ----\n");
+ if (epilogCount == 0)
+ {
+ printf(" No epilogs\n");
+ }
+ else
+ {
+ for (DWORD scope = 0; scope < epilogCount; scope++)
+ {
+ dw = *pdw++;
+
+ DWORD epilogStartOffset = ExtractBits(dw, 0, 18);
+ DWORD res = ExtractBits(dw, 18, 4);
+ DWORD epilogStartIndex = ExtractBits(dw, 22, 10);
+
+ // Note that epilogStartOffset for a funclet is the offset from the beginning
+ // of the current funclet, not the offset from the beginning of the main function.
+ // To help find it when looking through JitDump output, also show the offset from
+ // the beginning of the main function.
+ DWORD epilogStartOffsetFromMainFunctionBegin = epilogStartOffset * 4 + startOffset;
+
+ assert(res == 0);
+
+ printf(" ---- Scope %d\n", scope);
+ printf(" Epilog Start Offset : %u (0x%05x) Actual offset = %u (0x%06x) Offset from main "
+ "function begin = %u (0x%06x)\n",
+ comp->dspOffset(epilogStartOffset), comp->dspOffset(epilogStartOffset),
+ comp->dspOffset(epilogStartOffset * 4), comp->dspOffset(epilogStartOffset * 4),
+ comp->dspOffset(epilogStartOffsetFromMainFunctionBegin),
+ comp->dspOffset(epilogStartOffsetFromMainFunctionBegin));
+ printf(" Epilog Start Index : %u (0x%02x)\n", epilogStartIndex, epilogStartIndex);
+
+ epilogStartAt[epilogStartIndex] = true; // an epilog starts at this offset in the unwind codes
+ }
+ }
+ }
+ else
+ {
+ printf(" --- One epilog, unwind codes at %u\n", epilogCount);
+ assert(epilogCount < ArrLen(epilogStartAt));
+ epilogStartAt[epilogCount] = true; // the one and only epilog starts its unwind codes at this offset
+ }
+
+ // Dump the unwind codes
+
+ printf(" ---- Unwind codes ----\n");
+
+ DWORD countOfUnwindCodes = codeWords * 4;
+ PBYTE pUnwindCode = (PBYTE)pdw;
+ BYTE b1, b2, b3, b4;
+ DWORD x, z;
+ for (DWORD i = 0; i < countOfUnwindCodes; i++)
+ {
+ // Does this byte start an epilog sequence? If so, note that fact.
+ if (epilogStartAt[i])
+ {
+ printf(" ---- Epilog start at index %u ----\n", i);
+ }
+
+ b1 = *pUnwindCode++;
+
+ if ((b1 & 0xE0) == 0)
+ {
+ // alloc_s: 000xxxxx: allocate small stack with size < 128 (2^5 * 16)
+ // TODO-Review:should say size < 512
+ x = b1 & 0x1F;
+ printf(" %02X alloc_s #%u (0x%02X); sub sp, sp, #%u (0x%03X)\n", b1, x, x, x * 16, x * 16);
+ }
+ else if ((b1 & 0xE0) == 0x20)
+ {
+ // save_r19r20_x: 001zzzzz: save <r19,r20> pair at [sp-#Z*8]!, pre-indexed offset >= -248
+ z = b1 & 0x1F;
+ printf(" %02X save_r19r20_x #%u (0x%02X); stp %s, %s, [sp, #-%u]!\n", b1, z, z,
+ getRegName(REG_R19), getRegName(REG_R20), z * 8);
+ }
+ else if ((b1 & 0xC0) == 0x40)
+ {
+ // save_fplr: 01zzzzzz: save <r29,lr> pair at [sp+#Z*8], offset <= 504
+ z = b1 & 0x3F;
+ printf(" %02X save_fplr #%u (0x%02X); stp %s, %s, [sp, #%u]\n", b1, z, z, getRegName(REG_FP),
+ getRegName(REG_LR), z * 8);
+ }
+ else if ((b1 & 0xC0) == 0x80)
+ {
+ // save_fplr_x: 10zzzzzz: save <r29,lr> pair at [sp-(#Z+1)*8]!, pre-indexed offset >= -512
+ z = b1 & 0x3F;
+ printf(" %02X save_fplr_x #%u (0x%02X); stp %s, %s, [sp, #-%u]!\n", b1, z, z,
+ getRegName(REG_FP), getRegName(REG_LR), (z + 1) * 8);
+ }
+ else if ((b1 & 0xF8) == 0xC0)
+ {
+ // alloc_m: 11000xxx | xxxxxxxx: allocate large stack with size < 16k (2^11 * 16)
+ // TODO-Review: should save size < 32K
+ assert(i + 1 < countOfUnwindCodes);
+ b2 = *pUnwindCode++;
+ i++;
+
+ x = ((DWORD)(b1 & 0x7) << 8) | (DWORD)b2;
+
+ printf(" %02X %02X alloc_m #%u (0x%03X); sub sp, sp, #%u (0x%04X)\n", b1, b2, x, x, x * 16,
+ x * 16);
+ }
+ else if ((b1 & 0xFC) == 0xC8)
+ {
+ // save_regp: 110010xx | xxzzzzzz: save r(19 + #X) pair at [sp + #Z * 8], offset <= 504
+ assert(i + 1 < countOfUnwindCodes);
+ b2 = *pUnwindCode++;
+ i++;
+
+ x = ((DWORD)(b1 & 0x3) << 2) | (DWORD)(b2 >> 6);
+ z = (DWORD)(b2 & 0x3F);
+
+ printf(" %02X %02X save_regp X#%u Z#%u (0x%02X); stp %s, %s, [sp, #%u]\n", b1, b2, x, z, z,
+ getRegName(REG_R19 + x), getRegName(REG_R19 + x + 1), z * 8);
+ }
+ else if ((b1 & 0xFC) == 0xCC)
+ {
+ // save_regp_x: 110011xx | xxzzzzzz: save pair r(19 + #X) at [sp - (#Z + 1) * 8]!, pre-indexed offset >=
+ // -512
+ assert(i + 1 < countOfUnwindCodes);
+ b2 = *pUnwindCode++;
+ i++;
+
+ x = ((DWORD)(b1 & 0x3) << 2) | (DWORD)(b2 >> 6);
+ z = (DWORD)(b2 & 0x3F);
+
+ printf(" %02X %02X save_regp_x X#%u Z#%u (0x%02X); stp %s, %s, [sp, #-%u]!\n", b1, b2, x, z, z,
+ getRegName(REG_R19 + x), getRegName(REG_R19 + x + 1), (z + 1) * 8);
+ }
+ else if ((b1 & 0xFC) == 0xD0)
+ {
+ // save_reg: 110100xx | xxzzzzzz: save reg r(19 + #X) at [sp + #Z * 8], offset <= 504
+ assert(i + 1 < countOfUnwindCodes);
+ b2 = *pUnwindCode++;
+ i++;
+
+ x = ((DWORD)(b1 & 0x3) << 2) | (DWORD)(b2 >> 6);
+ z = (DWORD)(b2 & 0x3F);
+
+ printf(" %02X %02X save_reg X#%u Z#%u (0x%02X); str %s, [sp, #%u]\n", b1, b2, x, z, z,
+ getRegName(REG_R19 + x), z * 8);
+ }
+ else if ((b1 & 0xFE) == 0xD4)
+ {
+ // save_reg_x: 1101010x | xxxzzzzz: save reg r(19 + #X) at [sp - (#Z + 1) * 8]!, pre-indexed offset >= -256
+ assert(i + 1 < countOfUnwindCodes);
+ b2 = *pUnwindCode++;
+ i++;
+
+ x = ((DWORD)(b1 & 0x1) << 3) | (DWORD)(b2 >> 5);
+ z = (DWORD)(b2 & 0x1F);
+
+ printf(" %02X %02X save_reg_x X#%u Z#%u (0x%02X); str %s, [sp, #-%u]!\n", b1, b2, x, z, z,
+ getRegName(REG_R19 + x), (z + 1) * 8);
+ }
+ else if ((b1 & 0xFE) == 0xD6)
+ {
+ // save_lrpair: 1101011x | xxzzzzzz: save pair <r19 + 2 * #X, lr> at [sp + #Z * 8], offset <= 504
+ assert(i + 1 < countOfUnwindCodes);
+ b2 = *pUnwindCode++;
+ i++;
+
+ x = ((DWORD)(b1 & 0x1) << 2) | (DWORD)(b2 >> 6);
+ z = (DWORD)(b2 & 0x3F);
+
+ printf(" %02X %02X save_lrpair X#%u Z#%u (0x%02X); stp %s, %s, [sp, #%u]\n", b1, b2, x, z, z,
+ getRegName(REG_R19 + 2 * x), getRegName(REG_LR), z * 8);
+ }
+ else if ((b1 & 0xFE) == 0xD8)
+ {
+ // save_fregp: 1101100x | xxzzzzzz : save pair d(8 + #X) at [sp + #Z * 8], offset <= 504
+ assert(i + 1 < countOfUnwindCodes);
+ b2 = *pUnwindCode++;
+ i++;
+
+ x = ((DWORD)(b1 & 0x1) << 2) | (DWORD)(b2 >> 6);
+ z = (DWORD)(b2 & 0x3F);
+
+ printf(" %02X %02X save_fregp X#%u Z#%u (0x%02X); stp %s, %s, [sp, #%u]\n", b1, b2, x, z, z,
+ getRegName(REG_V8 + x, true), getRegName(REG_V8 + x + 1, true), z * 8);
+ }
+ else if ((b1 & 0xFE) == 0xDA)
+ {
+ // save_fregp_x: 1101101x | xxzzzzzz : save pair d(8 + #X), at [sp - (#Z + 1) * 8]!, pre-indexed offset >=
+ // -512
+ assert(i + 1 < countOfUnwindCodes);
+ b2 = *pUnwindCode++;
+ i++;
+
+ x = ((DWORD)(b1 & 0x1) << 2) | (DWORD)(b2 >> 6);
+ z = (DWORD)(b2 & 0x3F);
+
+ printf(" %02X %02X save_fregp_x X#%u Z#%u (0x%02X); stp %s, %s, [sp, #-%u]!\n", b1, b2, x, z, z,
+ getRegName(REG_V8 + x, true), getRegName(REG_V8 + x + 1, true), (z + 1) * 8);
+ }
+ else if ((b1 & 0xFE) == 0xDC)
+ {
+ // save_freg: 1101110x | xxzzzzzz : save reg d(8 + #X) at [sp + #Z * 8], offset <= 504
+ assert(i + 1 < countOfUnwindCodes);
+ b2 = *pUnwindCode++;
+ i++;
+
+ x = ((DWORD)(b1 & 0x1) << 2) | (DWORD)(b2 >> 6);
+ z = (DWORD)(b2 & 0x3F);
+
+ printf(" %02X %02X save_freg X#%u Z#%u (0x%02X); str %s, [sp, #%u]\n", b1, b2, x, z, z,
+ getRegName(REG_V8 + x, true), z * 8);
+ }
+ else if (b1 == 0xDE)
+ {
+ // save_freg_x: 11011110 | xxxzzzzz : save reg d(8 + #X) at [sp - (#Z + 1) * 8]!, pre - indexed offset >=
+ // -256
+ assert(i + 1 < countOfUnwindCodes);
+ b2 = *pUnwindCode++;
+ i++;
+
+ x = (DWORD)(b2 >> 5);
+ z = (DWORD)(b2 & 0x1F);
+
+ printf(" %02X %02X save_freg_x X#%u Z#%u (0x%02X); str %s, [sp, #-%u]!\n", b1, b2, x, z, z,
+ getRegName(REG_V8 + x, true), (z + 1) * 8);
+ }
+ else if (b1 == 0xE0)
+ {
+ // alloc_l: 11100000 | xxxxxxxx | xxxxxxxx | xxxxxxxx : allocate large stack with size < 256M (2^24 * 16)
+ assert(i + 3 < countOfUnwindCodes);
+ b2 = *pUnwindCode++;
+ b3 = *pUnwindCode++;
+ b4 = *pUnwindCode++;
+ i += 3;
+
+ x = ((DWORD)b2 << 16) | ((DWORD)b3 << 8) | (DWORD)b4;
+
+ printf(" %02X %02X %02X %02X alloc_l %u (0x%06X); sub sp, sp, #%u (%06X)\n", b1, b2, b3, b4, x, x,
+ x * 16, x * 16);
+ }
+ else if (b1 == 0xE1)
+ {
+ // set_fp: 11100001 : set up r29 : with : mov r29, sp
+
+ printf(" %02X set_fp; mov %s, sp\n", b1, getRegName(REG_FP));
+ }
+ else if (b1 == 0xE2)
+ {
+ // add_fp: 11100010 | xxxxxxxx : set up r29 with : add r29, sp, #x * 8
+ assert(i + 1 < countOfUnwindCodes);
+ b2 = *pUnwindCode++;
+ i++;
+
+ x = (DWORD)b2;
+
+ printf(" %02X %02X add_fp %u (0x%02X); add %s, sp, #%u\n", b1, b2, x, x, getRegName(REG_FP),
+ x * 8);
+ }
+ else if (b1 == 0xE3)
+ {
+ // nop: 11100011: no unwind operation is required.
+
+ printf(" %02X nop\n", b1);
+ }
+ else if (b1 == 0xE4)
+ {
+ // end: 11100100 : end of unwind code
+
+ printf(" %02X end\n", b1);
+ }
+ else if (b1 == 0xE5)
+ {
+ // end_c: 11100101 : end of unwind code in current chained scope.
+
+ printf(" %02X end_c\n", b1);
+ }
+ else if (b1 == 0xE6)
+ {
+ // save_next: 11100110 : save next non - volatile Int or FP register pair.
+
+ printf(" %02X save_next\n", b1);
+ }
+ else
+ {
+ // Unknown / reserved unwind code
+ assert(!"Internal error decoding unwind codes");
+ }
+ }
+
+ pdw += codeWords;
+ assert((PBYTE)pdw == pUnwindCode);
+ assert((PBYTE)pdw == pHeader + unwindBlockSize);
+
+ assert(XBit == 0); // We don't handle the case where exception data is present, such as the Exception Handler RVA
+
+ printf("\n");
+}
+
+#endif // DEBUG
+
+#endif // _TARGET_ARM64_
diff --git a/src/jit/utils.cpp b/src/jit/utils.cpp
new file mode 100644
index 0000000000..9934416412
--- /dev/null
+++ b/src/jit/utils.cpp
@@ -0,0 +1,1767 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX Utils.cpp XX
+XX XX
+XX Has miscellaneous utility functions XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+#include "opcode.h"
+
+/*****************************************************************************/
+// Define the string platform name based on compilation #ifdefs. This is the
+// same code for all platforms, hence it is here instead of in the targetXXX.cpp
+// files.
+
+#ifdef PLATFORM_UNIX
+// Should we distinguish Mac? Can we?
+// Should we distinguish flavors of Unix? Can we?
+const char* Target::g_tgtPlatformName = "Unix";
+#else // !PLATFORM_UNIX
+const char* Target::g_tgtPlatformName = "Windows";
+#endif // !PLATFORM_UNIX
+
+/*****************************************************************************/
+
+#define DECLARE_DATA
+
+// clang-format off
+extern
+const signed char opcodeSizes[] =
+{
+ #define InlineNone_size 0
+ #define ShortInlineVar_size 1
+ #define InlineVar_size 2
+ #define ShortInlineI_size 1
+ #define InlineI_size 4
+ #define InlineI8_size 8
+ #define ShortInlineR_size 4
+ #define InlineR_size 8
+ #define ShortInlineBrTarget_size 1
+ #define InlineBrTarget_size 4
+ #define InlineMethod_size 4
+ #define InlineField_size 4
+ #define InlineType_size 4
+ #define InlineString_size 4
+ #define InlineSig_size 4
+ #define InlineRVA_size 4
+ #define InlineTok_size 4
+ #define InlineSwitch_size 0 // for now
+ #define InlinePhi_size 0 // for now
+ #define InlineVarTok_size 0 // remove
+
+ #define OPDEF(name,string,pop,push,oprType,opcType,l,s1,s2,ctrl) oprType ## _size ,
+ #include "opcode.def"
+ #undef OPDEF
+
+ #undef InlineNone_size
+ #undef ShortInlineVar_size
+ #undef InlineVar_size
+ #undef ShortInlineI_size
+ #undef InlineI_size
+ #undef InlineI8_size
+ #undef ShortInlineR_size
+ #undef InlineR_size
+ #undef ShortInlineBrTarget_size
+ #undef InlineBrTarget_size
+ #undef InlineMethod_size
+ #undef InlineField_size
+ #undef InlineType_size
+ #undef InlineString_size
+ #undef InlineSig_size
+ #undef InlineRVA_size
+ #undef InlineTok_size
+ #undef InlineSwitch_size
+ #undef InlinePhi_size
+};
+// clang-format on
+
+const BYTE varTypeClassification[] = {
+#define DEF_TP(tn, nm, jitType, verType, sz, sze, asze, st, al, tf, howUsed) tf,
+#include "typelist.h"
+#undef DEF_TP
+};
+
+/*****************************************************************************/
+/*****************************************************************************/
+#ifdef DEBUG
+extern const char* const opcodeNames[] = {
+#define OPDEF(name, string, pop, push, oprType, opcType, l, s1, s2, ctrl) string,
+#include "opcode.def"
+#undef OPDEF
+};
+
+extern const BYTE opcodeArgKinds[] = {
+#define OPDEF(name, string, pop, push, oprType, opcType, l, s1, s2, ctrl) (BYTE) oprType,
+#include "opcode.def"
+#undef OPDEF
+};
+#endif
+
+/*****************************************************************************/
+
+const char* varTypeName(var_types vt)
+{
+ static const char* const varTypeNames[] = {
+#define DEF_TP(tn, nm, jitType, verType, sz, sze, asze, st, al, tf, howUsed) nm,
+#include "typelist.h"
+#undef DEF_TP
+ };
+
+ assert((unsigned)vt < sizeof(varTypeNames) / sizeof(varTypeNames[0]));
+
+ return varTypeNames[vt];
+}
+
+#if defined(DEBUG) || defined(LATE_DISASM)
+/*****************************************************************************
+ *
+ * Return the name of the given register.
+ */
+
+const char* getRegName(regNumber reg, bool isFloat)
+{
+ // Special-case REG_NA; it's not in the regNames array, but we might want to print it.
+ if (reg == REG_NA)
+ {
+ return "NA";
+ }
+#if defined(_TARGET_X86_) && defined(LEGACY_BACKEND)
+ static const char* const regNames[] = {
+#define REGDEF(name, rnum, mask, sname) sname,
+#include "register.h"
+ };
+
+ static const char* const floatRegNames[] = {
+#define REGDEF(name, rnum, mask, sname) sname,
+#include "registerxmm.h"
+ };
+ if (isFloat)
+ {
+ assert(reg < ArrLen(floatRegNames));
+ return floatRegNames[reg];
+ }
+ else
+ {
+ assert(reg < ArrLen(regNames));
+ return regNames[reg];
+ }
+#elif defined(_TARGET_ARM64_)
+ static const char* const regNames[] = {
+#define REGDEF(name, rnum, mask, xname, wname) xname,
+#include "register.h"
+ };
+ assert(reg < ArrLen(regNames));
+ return regNames[reg];
+#else
+ static const char* const regNames[] = {
+#define REGDEF(name, rnum, mask, sname) sname,
+#include "register.h"
+ };
+ assert(reg < ArrLen(regNames));
+ return regNames[reg];
+#endif
+}
+
+const char* getRegName(unsigned reg,
+ bool isFloat) // this is for gcencode.cpp and disasm.cpp that dont use the regNumber type
+{
+ return getRegName((regNumber)reg, isFloat);
+}
+#endif // defined(DEBUG) || defined(LATE_DISASM)
+
+#if defined(DEBUG)
+
+const char* getRegNameFloat(regNumber reg, var_types type)
+{
+#ifdef _TARGET_ARM_
+ assert(genIsValidFloatReg(reg));
+ if (type == TYP_FLOAT)
+ return getRegName(reg);
+ else
+ {
+ const char* regName;
+
+ switch (reg)
+ {
+ default:
+ assert(!"Bad double register");
+ regName = "d??";
+ break;
+ case REG_F0:
+ regName = "d0";
+ break;
+ case REG_F2:
+ regName = "d2";
+ break;
+ case REG_F4:
+ regName = "d4";
+ break;
+ case REG_F6:
+ regName = "d6";
+ break;
+ case REG_F8:
+ regName = "d8";
+ break;
+ case REG_F10:
+ regName = "d10";
+ break;
+ case REG_F12:
+ regName = "d12";
+ break;
+ case REG_F14:
+ regName = "d14";
+ break;
+ case REG_F16:
+ regName = "d16";
+ break;
+ case REG_F18:
+ regName = "d18";
+ break;
+ case REG_F20:
+ regName = "d20";
+ break;
+ case REG_F22:
+ regName = "d22";
+ break;
+ case REG_F24:
+ regName = "d24";
+ break;
+ case REG_F26:
+ regName = "d26";
+ break;
+ case REG_F28:
+ regName = "d28";
+ break;
+ case REG_F30:
+ regName = "d30";
+ break;
+ }
+ return regName;
+ }
+
+#elif defined(_TARGET_X86_) && defined(LEGACY_BACKEND)
+
+ static const char* regNamesFloat[] = {
+#define REGDEF(name, rnum, mask, sname) sname,
+#include "registerxmm.h"
+ };
+ assert((unsigned)reg < ArrLen(regNamesFloat));
+
+ return regNamesFloat[reg];
+
+#elif defined(_TARGET_ARM64_)
+
+ static const char* regNamesFloat[] = {
+#define REGDEF(name, rnum, mask, xname, wname) xname,
+#include "register.h"
+ };
+ assert((unsigned)reg < ArrLen(regNamesFloat));
+
+ return regNamesFloat[reg];
+
+#else
+ static const char* regNamesFloat[] = {
+#define REGDEF(name, rnum, mask, sname) "x" sname,
+#include "register.h"
+ };
+#ifdef FEATURE_AVX_SUPPORT
+ static const char* regNamesYMM[] = {
+#define REGDEF(name, rnum, mask, sname) "y" sname,
+#include "register.h"
+ };
+#endif // FEATURE_AVX_SUPPORT
+ assert((unsigned)reg < ArrLen(regNamesFloat));
+
+#ifdef FEATURE_AVX_SUPPORT
+ if (type == TYP_SIMD32)
+ {
+ return regNamesYMM[reg];
+ }
+#endif // FEATURE_AVX_SUPPORT
+
+ return regNamesFloat[reg];
+#endif
+}
+
+/*****************************************************************************
+ *
+ * Displays a register set.
+ * TODO-ARM64-Cleanup: don't allow ip0, ip1 as part of a range.
+ */
+
+void dspRegMask(regMaskTP regMask, size_t minSiz)
+{
+ const char* sep = "";
+
+ printf("[");
+
+ bool inRegRange = false;
+ regNumber regPrev = REG_NA;
+ regNumber regHead = REG_NA; // When we start a range, remember the first register of the range, so we don't use
+ // range notation if the range contains just a single register.
+ for (regNumber regNum = REG_INT_FIRST; regNum <= REG_INT_LAST; regNum = REG_NEXT(regNum))
+ {
+ regMaskTP regBit = genRegMask(regNum);
+
+ if ((regMask & regBit) != 0)
+ {
+ // We have a register to display. It gets displayed now if:
+ // 1. This is the first register to display of a new range of registers (possibly because
+ // no register has ever been displayed).
+ // 2. This is the last register of an acceptable range (either the last integer register,
+ // or the last of a range that is displayed with range notation).
+ if (!inRegRange)
+ {
+ // It's the first register of a potential range.
+ const char* nam = getRegName(regNum);
+ printf("%s%s", sep, nam);
+ minSiz -= strlen(sep) + strlen(nam);
+
+ // By default, we're not starting a potential register range.
+ sep = " ";
+
+ // What kind of separator should we use for this range (if it is indeed going to be a range)?
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if defined(_TARGET_AMD64_)
+ // For AMD64, create ranges for int registers R8 through R15, but not the "old" registers.
+ if (regNum >= REG_R8)
+ {
+ regHead = regNum;
+ inRegRange = true;
+ sep = "-";
+ }
+#elif defined(_TARGET_ARM64_)
+ // R17 and R28 can't be the start of a range, since the range would include TEB or FP
+ if ((regNum < REG_R17) || ((REG_R19 <= regNum) && (regNum < REG_R28)))
+ {
+ regHead = regNum;
+ inRegRange = true;
+ sep = "-";
+ }
+#elif defined(_TARGET_ARM_)
+ if (regNum < REG_R12)
+ {
+ regHead = regNum;
+ inRegRange = true;
+ sep = "-";
+ }
+#elif defined(_TARGET_X86_)
+// No register ranges
+#else // _TARGET_*
+#error Unsupported or unset target architecture
+#endif // _TARGET_*
+ }
+
+#if defined(_TARGET_ARM64_)
+ // We've already printed a register. Is this the end of a range?
+ else if ((regNum == REG_INT_LAST) || (regNum == REG_R17) // last register before TEB
+ || (regNum == REG_R28)) // last register before FP
+#else // _TARGET_ARM64_
+ // We've already printed a register. Is this the end of a range?
+ else if (regNum == REG_INT_LAST)
+#endif // _TARGET_ARM64_
+ {
+ const char* nam = getRegName(regNum);
+ printf("%s%s", sep, nam);
+ minSiz -= strlen(sep) + strlen(nam);
+ inRegRange = false; // No longer in the middle of a register range
+ regHead = REG_NA;
+ sep = " ";
+ }
+ }
+ else // ((regMask & regBit) == 0)
+ {
+ if (inRegRange)
+ {
+ assert(regHead != REG_NA);
+ if (regPrev != regHead)
+ {
+ // Close out the previous range, if it included more than one register.
+ const char* nam = getRegName(regPrev);
+ printf("%s%s", sep, nam);
+ minSiz -= strlen(sep) + strlen(nam);
+ }
+ sep = " ";
+ inRegRange = false;
+ regHead = REG_NA;
+ }
+ }
+
+ if (regBit > regMask)
+ {
+ break;
+ }
+
+ regPrev = regNum;
+ }
+
+#if CPU_HAS_BYTE_REGS
+ if (regMask & RBM_BYTE_REG_FLAG)
+ {
+ const char* nam = "BYTE";
+ printf("%s%s", sep, nam);
+ minSiz -= (strlen(sep) + strlen(nam));
+ }
+#endif
+
+#if !FEATURE_STACK_FP_X87
+ if (strlen(sep) > 0)
+ {
+ // We've already printed something.
+ sep = " ";
+ }
+ inRegRange = false;
+ regPrev = REG_NA;
+ regHead = REG_NA;
+ for (regNumber regNum = REG_FP_FIRST; regNum <= REG_FP_LAST; regNum = REG_NEXT(regNum))
+ {
+ regMaskTP regBit = genRegMask(regNum);
+
+ if (regMask & regBit)
+ {
+ if (!inRegRange || (regNum == REG_FP_LAST))
+ {
+ const char* nam = getRegName(regNum);
+ printf("%s%s", sep, nam);
+ minSiz -= strlen(sep) + strlen(nam);
+ sep = "-";
+ regHead = regNum;
+ }
+ inRegRange = true;
+ }
+ else
+ {
+ if (inRegRange)
+ {
+ if (regPrev != regHead)
+ {
+ const char* nam = getRegName(regPrev);
+ printf("%s%s", sep, nam);
+ minSiz -= (strlen(sep) + strlen(nam));
+ }
+ sep = " ";
+ }
+ inRegRange = false;
+ }
+
+ if (regBit > regMask)
+ {
+ break;
+ }
+
+ regPrev = regNum;
+ }
+#endif
+
+ printf("]");
+
+ while ((int)minSiz > 0)
+ {
+ printf(" ");
+ minSiz--;
+ }
+}
+
+//------------------------------------------------------------------------
+// dumpILBytes: Helper for dumpSingleInstr() to dump hex bytes of an IL stream,
+// aligning up to a minimum alignment width.
+//
+// Arguments:
+// codeAddr - Pointer to IL byte stream to display.
+// codeSize - Number of bytes of IL byte stream to display.
+// alignSize - Pad out to this many characters, if fewer than this were written.
+//
+void dumpILBytes(const BYTE* const codeAddr,
+ unsigned codeSize,
+ unsigned alignSize) // number of characters to write, for alignment
+{
+ for (IL_OFFSET offs = 0; offs < codeSize; ++offs)
+ {
+ printf(" %02x", *(codeAddr + offs));
+ }
+
+ unsigned charsWritten = 3 * codeSize;
+ for (unsigned i = charsWritten; i < alignSize; i++)
+ {
+ printf(" ");
+ }
+}
+
+//------------------------------------------------------------------------
+// dumpSingleInstr: Display a single IL instruction.
+//
+// Arguments:
+// codeAddr - Base pointer to a stream of IL instructions.
+// offs - Offset from codeAddr of the IL instruction to display.
+// prefix - Optional string to prefix the IL instruction with (if nullptr, no prefix is output).
+//
+// Return Value:
+// Size of the displayed IL instruction in the instruction stream, in bytes. (Add this to 'offs' to
+// get to the next instruction.)
+//
+unsigned dumpSingleInstr(const BYTE* const codeAddr, IL_OFFSET offs, const char* prefix)
+{
+ const BYTE* opcodePtr = codeAddr + offs;
+ const BYTE* startOpcodePtr = opcodePtr;
+ const unsigned ALIGN_WIDTH = 3 * 6; // assume 3 characters * (1 byte opcode + 4 bytes data + 1 prefix byte) for
+ // most things
+
+ if (prefix != nullptr)
+ {
+ printf("%s", prefix);
+ }
+
+ OPCODE opcode = (OPCODE)getU1LittleEndian(opcodePtr);
+ opcodePtr += sizeof(__int8);
+
+DECODE_OPCODE:
+
+ if (opcode >= CEE_COUNT)
+ {
+ printf("\nIllegal opcode: %02X\n", (int)opcode);
+ return (IL_OFFSET)(opcodePtr - startOpcodePtr);
+ }
+
+ /* Get the size of additional parameters */
+
+ size_t sz = opcodeSizes[opcode];
+ unsigned argKind = opcodeArgKinds[opcode];
+
+ /* See what kind of an opcode we have, then */
+
+ switch (opcode)
+ {
+ case CEE_PREFIX1:
+ opcode = OPCODE(getU1LittleEndian(opcodePtr) + 256);
+ opcodePtr += sizeof(__int8);
+ goto DECODE_OPCODE;
+
+ default:
+ {
+ __int64 iOp;
+ double dOp;
+ int jOp;
+ DWORD jOp2;
+
+ switch (argKind)
+ {
+ case InlineNone:
+ dumpILBytes(startOpcodePtr, (unsigned)(opcodePtr - startOpcodePtr), ALIGN_WIDTH);
+ printf(" %-12s", opcodeNames[opcode]);
+ break;
+
+ case ShortInlineVar:
+ iOp = getU1LittleEndian(opcodePtr);
+ goto INT_OP;
+ case ShortInlineI:
+ iOp = getI1LittleEndian(opcodePtr);
+ goto INT_OP;
+ case InlineVar:
+ iOp = getU2LittleEndian(opcodePtr);
+ goto INT_OP;
+ case InlineTok:
+ case InlineMethod:
+ case InlineField:
+ case InlineType:
+ case InlineString:
+ case InlineSig:
+ case InlineI:
+ iOp = getI4LittleEndian(opcodePtr);
+ goto INT_OP;
+ case InlineI8:
+ iOp = getU4LittleEndian(opcodePtr);
+ iOp |= (__int64)getU4LittleEndian(opcodePtr + 4) << 32;
+ goto INT_OP;
+
+ INT_OP:
+ dumpILBytes(startOpcodePtr, (unsigned)((opcodePtr - startOpcodePtr) + sz), ALIGN_WIDTH);
+ printf(" %-12s 0x%X", opcodeNames[opcode], iOp);
+ break;
+
+ case ShortInlineR:
+ dOp = getR4LittleEndian(opcodePtr);
+ goto FLT_OP;
+ case InlineR:
+ dOp = getR8LittleEndian(opcodePtr);
+ goto FLT_OP;
+
+ FLT_OP:
+ dumpILBytes(startOpcodePtr, (unsigned)((opcodePtr - startOpcodePtr) + sz), ALIGN_WIDTH);
+ printf(" %-12s %f", opcodeNames[opcode], dOp);
+ break;
+
+ case ShortInlineBrTarget:
+ jOp = getI1LittleEndian(opcodePtr);
+ goto JMP_OP;
+ case InlineBrTarget:
+ jOp = getI4LittleEndian(opcodePtr);
+ goto JMP_OP;
+
+ JMP_OP:
+ dumpILBytes(startOpcodePtr, (unsigned)((opcodePtr - startOpcodePtr) + sz), ALIGN_WIDTH);
+ printf(" %-12s %d (IL_%04x)", opcodeNames[opcode], jOp, (int)(opcodePtr + sz - codeAddr) + jOp);
+ break;
+
+ case InlineSwitch:
+ jOp2 = getU4LittleEndian(opcodePtr);
+ opcodePtr += 4;
+ opcodePtr += jOp2 * 4; // Jump over the table
+ dumpILBytes(startOpcodePtr, (unsigned)(opcodePtr - startOpcodePtr), ALIGN_WIDTH);
+ printf(" %-12s", opcodeNames[opcode]);
+ break;
+
+ case InlinePhi:
+ jOp2 = getU1LittleEndian(opcodePtr);
+ opcodePtr += 1;
+ opcodePtr += jOp2 * 2; // Jump over the table
+ dumpILBytes(startOpcodePtr, (unsigned)(opcodePtr - startOpcodePtr), ALIGN_WIDTH);
+ printf(" %-12s", opcodeNames[opcode]);
+ break;
+
+ default:
+ assert(!"Bad argKind");
+ }
+
+ opcodePtr += sz;
+ break;
+ }
+ }
+
+ printf("\n");
+ return (IL_OFFSET)(opcodePtr - startOpcodePtr);
+}
+
+//------------------------------------------------------------------------
+// dumpILRange: Display a range of IL instructions from an IL instruction stream.
+//
+// Arguments:
+// codeAddr - Pointer to IL byte stream to display.
+// codeSize - Number of bytes of IL byte stream to display.
+//
+void dumpILRange(const BYTE* const codeAddr, unsigned codeSize) // in bytes
+{
+ for (IL_OFFSET offs = 0; offs < codeSize;)
+ {
+ char prefix[100];
+ sprintf(prefix, "IL_%04x ", offs);
+ unsigned codeBytesDumped = dumpSingleInstr(codeAddr, offs, prefix);
+ offs += codeBytesDumped;
+ }
+}
+
+/*****************************************************************************
+ *
+ * Display a variable set (which may be a 32-bit or 64-bit number); only
+ * one or two of these can be used at once.
+ */
+
+const char* genES2str(EXPSET_TP set)
+{
+ const int bufSize = 17;
+ static char num1[bufSize];
+
+ static char num2[bufSize];
+
+ static char* nump = num1;
+
+ char* temp = nump;
+
+ nump = (nump == num1) ? num2 : num1;
+
+#if EXPSET_SZ == 32
+ sprintf_s(temp, bufSize, "%08X", set);
+#else
+ sprintf_s(temp, bufSize, "%08X%08X", (int)(set >> 32), (int)set);
+#endif
+
+ return temp;
+}
+
+const char* refCntWtd2str(unsigned refCntWtd)
+{
+ const int bufSize = 17;
+ static char num1[bufSize];
+
+ static char num2[bufSize];
+
+ static char* nump = num1;
+
+ char* temp = nump;
+
+ nump = (nump == num1) ? num2 : num1;
+
+ unsigned valueInt = refCntWtd / BB_UNITY_WEIGHT;
+ unsigned valueFrac = refCntWtd % BB_UNITY_WEIGHT;
+
+ if (valueFrac == 0)
+ {
+ sprintf_s(temp, bufSize, "%2u ", valueInt);
+ }
+ else
+ {
+ sprintf_s(temp, bufSize, "%2u.%1u", valueInt, (valueFrac * 10 / BB_UNITY_WEIGHT));
+ }
+
+ return temp;
+}
+
+#endif // DEBUG
+
+#if defined(DEBUG) || defined(INLINE_DATA)
+
+//------------------------------------------------------------------------
+// Contains: check if the range includes a particular method
+//
+// Arguments:
+// info -- jit interface pointer
+// method -- method handle for the method of interest
+
+bool ConfigMethodRange::Contains(ICorJitInfo* info, CORINFO_METHOD_HANDLE method)
+{
+ _ASSERT(m_inited == 1);
+
+ // No ranges specified means all methods included.
+ if (m_lastRange == 0)
+ {
+ return true;
+ }
+
+ // Check the hash. Note we can't use the cached hash here since
+ // we may not be asking about the method currently being jitted.
+ const unsigned hash = info->getMethodHash(method);
+
+ for (unsigned i = 0; i < m_lastRange; i++)
+ {
+ if ((m_ranges[i].m_low <= hash) && (hash <= m_ranges[i].m_high))
+ {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+//------------------------------------------------------------------------
+// InitRanges: parse the range string and set up the range info
+//
+// Arguments:
+// rangeStr -- string to parse (may be nullptr)
+// capacity -- number ranges to allocate in the range array
+//
+// Notes:
+// Does some internal error checking; clients can use Error()
+// to determine if the range string couldn't be fully parsed
+// because of bad characters or too many entries, or had values
+// that were too large to represent.
+
+void ConfigMethodRange::InitRanges(const wchar_t* rangeStr, unsigned capacity)
+{
+ // Make sure that the memory was zero initialized
+ assert(m_inited == 0 || m_inited == 1);
+ assert(m_entries == 0);
+ assert(m_ranges == nullptr);
+ assert(m_lastRange == 0);
+
+ // Flag any crazy-looking requests
+ assert(capacity < 100000);
+
+ if (rangeStr == nullptr)
+ {
+ m_inited = 1;
+ return;
+ }
+
+ // Allocate some persistent memory
+ ICorJitHost* jitHost = JitHost::getJitHost();
+ m_ranges = (Range*)jitHost->allocateMemory(capacity * sizeof(Range));
+ m_entries = capacity;
+
+ const wchar_t* p = rangeStr;
+ unsigned lastRange = 0;
+ bool setHighPart = false;
+
+ while ((*p != 0) && (lastRange < m_entries))
+ {
+ while (*p == L' ')
+ {
+ p++;
+ }
+
+ int i = 0;
+
+ while (L'0' <= *p && *p <= L'9')
+ {
+ int j = 10 * i + ((*p++) - L'0');
+
+ // Check for overflow
+ if ((m_badChar != 0) && (j <= i))
+ {
+ m_badChar = (p - rangeStr) + 1;
+ }
+
+ i = j;
+ }
+
+ // Was this the high part of a low-high pair?
+ if (setHighPart)
+ {
+ // Yep, set it and move to the next range
+ m_ranges[lastRange].m_high = i;
+
+ // Sanity check that range is proper
+ if ((m_badChar != 0) && (m_ranges[lastRange].m_high < m_ranges[lastRange].m_low))
+ {
+ m_badChar = (p - rangeStr) + 1;
+ }
+
+ lastRange++;
+ setHighPart = false;
+ continue;
+ }
+
+ // Must have been looking for the low part of a range
+ m_ranges[lastRange].m_low = i;
+
+ while (*p == L' ')
+ {
+ p++;
+ }
+
+ // Was that the low part of a low-high pair?
+ if (*p == L'-')
+ {
+ // Yep, skip the dash and set high part next time around.
+ p++;
+ setHighPart = true;
+ continue;
+ }
+
+ // Else we have a point range, so set high = low
+ m_ranges[lastRange].m_high = i;
+ lastRange++;
+ }
+
+ // If we didn't parse the full range string, note index of the the
+ // first bad char.
+ if ((m_badChar != 0) && (*p != 0))
+ {
+ m_badChar = (p - rangeStr) + 1;
+ }
+
+ // Finish off any remaining open range
+ if (setHighPart)
+ {
+ m_ranges[lastRange].m_high = UINT_MAX;
+ lastRange++;
+ }
+
+ assert(lastRange <= m_entries);
+ m_lastRange = lastRange;
+ m_inited = 1;
+}
+
+#endif // defined(DEBUG) || defined(INLINE_DATA)
+
+#if CALL_ARG_STATS || COUNT_BASIC_BLOCKS || COUNT_LOOPS || EMITTER_STATS || MEASURE_NODE_SIZE
+
+/*****************************************************************************
+ * Histogram class.
+ */
+
+Histogram::Histogram(IAllocator* allocator, const unsigned* const sizeTable)
+ : m_allocator(allocator), m_sizeTable(sizeTable), m_counts(nullptr)
+{
+ unsigned sizeCount = 0;
+ do
+ {
+ sizeCount++;
+ } while ((sizeTable[sizeCount] != 0) && (sizeCount < 1000));
+
+ m_sizeCount = sizeCount;
+}
+
+Histogram::~Histogram()
+{
+ m_allocator->Free(m_counts);
+}
+
+// We need to lazy allocate the histogram data so static `Histogram` variables don't try to
+// call the host memory allocator in the loader lock, which doesn't work.
+void Histogram::ensureAllocated()
+{
+ if (m_counts == nullptr)
+ {
+ m_counts = new (m_allocator) unsigned[m_sizeCount + 1];
+ memset(m_counts, 0, (m_sizeCount + 1) * sizeof(*m_counts));
+ }
+}
+
+void Histogram::dump(FILE* output)
+{
+ ensureAllocated();
+
+ unsigned t = 0;
+ for (unsigned i = 0; i < m_sizeCount; i++)
+ {
+ t += m_counts[i];
+ }
+
+ for (unsigned c = 0, i = 0; i <= m_sizeCount; i++)
+ {
+ if (i == m_sizeCount)
+ {
+ if (m_counts[i] == 0)
+ {
+ break;
+ }
+
+ fprintf(output, " > %7u", m_sizeTable[i - 1]);
+ }
+ else
+ {
+ if (i == 0)
+ {
+ fprintf(output, " <= ");
+ }
+ else
+ {
+ fprintf(output, "%7u .. ", m_sizeTable[i - 1] + 1);
+ }
+
+ fprintf(output, "%7u", m_sizeTable[i]);
+ }
+
+ c += m_counts[i];
+
+ fprintf(output, " ===> %7u count (%3u%% of total)\n", m_counts[i], (int)(100.0 * c / t));
+ }
+}
+
+void Histogram::record(unsigned size)
+{
+ ensureAllocated();
+
+ unsigned i;
+ for (i = 0; i < m_sizeCount; i++)
+ {
+ if (m_sizeTable[i] >= size)
+ {
+ break;
+ }
+ }
+
+ m_counts[i]++;
+}
+
+#endif // CALL_ARG_STATS || COUNT_BASIC_BLOCKS || COUNT_LOOPS || EMITTER_STATS || MEASURE_NODE_SIZE
+
+/*****************************************************************************
+ * Fixed bit vector class
+ */
+
+// bitChunkSize() - Returns number of bits in a bitVect chunk
+inline UINT FixedBitVect::bitChunkSize()
+{
+ return sizeof(UINT) * 8;
+}
+
+// bitNumToBit() - Returns a bit mask of the given bit number
+inline UINT FixedBitVect::bitNumToBit(UINT bitNum)
+{
+ assert(bitNum < bitChunkSize());
+ assert(bitChunkSize() <= sizeof(int) * 8);
+
+ return 1 << bitNum;
+}
+
+// bitVectInit() - Initializes a bit vector of a given size
+FixedBitVect* FixedBitVect::bitVectInit(UINT size, Compiler* comp)
+{
+ UINT bitVectMemSize, numberOfChunks;
+ FixedBitVect* bv;
+
+ assert(size != 0);
+
+ numberOfChunks = (size - 1) / bitChunkSize() + 1;
+ bitVectMemSize = numberOfChunks * (bitChunkSize() / 8); // size in bytes
+
+ assert(bitVectMemSize * bitChunkSize() >= size);
+
+ bv = (FixedBitVect*)comp->compGetMemA(sizeof(FixedBitVect) + bitVectMemSize, CMK_FixedBitVect);
+ memset(bv->bitVect, 0, bitVectMemSize);
+
+ bv->bitVectSize = size;
+
+ return bv;
+}
+
+// bitVectSet() - Sets the given bit
+void FixedBitVect::bitVectSet(UINT bitNum)
+{
+ UINT index;
+
+ assert(bitNum <= bitVectSize);
+
+ index = bitNum / bitChunkSize();
+ bitNum -= index * bitChunkSize();
+
+ bitVect[index] |= bitNumToBit(bitNum);
+}
+
+// bitVectTest() - Tests the given bit
+bool FixedBitVect::bitVectTest(UINT bitNum)
+{
+ UINT index;
+
+ assert(bitNum <= bitVectSize);
+
+ index = bitNum / bitChunkSize();
+ bitNum -= index * bitChunkSize();
+
+ return (bitVect[index] & bitNumToBit(bitNum)) != 0;
+}
+
+// bitVectOr() - Or in the given bit vector
+void FixedBitVect::bitVectOr(FixedBitVect* bv)
+{
+ UINT bitChunkCnt = (bitVectSize - 1) / bitChunkSize() + 1;
+
+ assert(bitVectSize == bv->bitVectSize);
+
+ // Or each chunks
+ for (UINT i = 0; i < bitChunkCnt; i++)
+ {
+ bitVect[i] |= bv->bitVect[i];
+ }
+}
+
+// bitVectAnd() - And with passed in bit vector
+void FixedBitVect::bitVectAnd(FixedBitVect& bv)
+{
+ UINT bitChunkCnt = (bitVectSize - 1) / bitChunkSize() + 1;
+
+ assert(bitVectSize == bv.bitVectSize);
+
+ // And each chunks
+ for (UINT i = 0; i < bitChunkCnt; i++)
+ {
+ bitVect[i] &= bv.bitVect[i];
+ }
+}
+
+// bitVectGetFirst() - Find the first bit on and return bit num,
+// Return -1 if no bits found.
+UINT FixedBitVect::bitVectGetFirst()
+{
+ return bitVectGetNext((UINT)-1);
+}
+
+// bitVectGetNext() - Find the next bit on given previous position and return bit num.
+// Return -1 if no bits found.
+UINT FixedBitVect::bitVectGetNext(UINT bitNumPrev)
+{
+ UINT bitNum = (UINT)-1;
+ UINT index;
+ UINT bitMask;
+ UINT bitChunkCnt = (bitVectSize - 1) / bitChunkSize() + 1;
+ UINT i;
+
+ if (bitNumPrev == (UINT)-1)
+ {
+ index = 0;
+ bitMask = (UINT)-1;
+ }
+ else
+ {
+ UINT bit;
+
+ index = bitNumPrev / bitChunkSize();
+ bitNumPrev -= index * bitChunkSize();
+ bit = bitNumToBit(bitNumPrev);
+ bitMask = ~(bit | (bit - 1));
+ }
+
+ // Find first bit
+ for (i = index; i < bitChunkCnt; i++)
+ {
+ UINT bitChunk = bitVect[i] & bitMask;
+
+ if (bitChunk != 0)
+ {
+ BitScanForward((ULONG*)&bitNum, bitChunk);
+ break;
+ }
+
+ bitMask = 0xFFFFFFFF;
+ }
+
+ // Empty bit vector?
+ if (bitNum == (UINT)-1)
+ {
+ return (UINT)-1;
+ }
+
+ bitNum += i * bitChunkSize();
+
+ assert(bitNum <= bitVectSize);
+
+ return bitNum;
+}
+
+// bitVectGetNextAndClear() - Find the first bit on, clear it and return it.
+// Return -1 if no bits found.
+UINT FixedBitVect::bitVectGetNextAndClear()
+{
+ UINT bitNum = (UINT)-1;
+ UINT bitChunkCnt = (bitVectSize - 1) / bitChunkSize() + 1;
+ UINT i;
+
+ // Find first bit
+ for (i = 0; i < bitChunkCnt; i++)
+ {
+ if (bitVect[i] != 0)
+ {
+ BitScanForward((ULONG*)&bitNum, bitVect[i]);
+ break;
+ }
+ }
+
+ // Empty bit vector?
+ if (bitNum == (UINT)-1)
+ {
+ return (UINT)-1;
+ }
+
+ // Clear the bit in the right chunk
+ bitVect[i] &= ~bitNumToBit(bitNum);
+
+ bitNum += i * bitChunkSize();
+
+ assert(bitNum <= bitVectSize);
+
+ return bitNum;
+}
+
+int SimpleSprintf_s(__in_ecount(cbBufSize - (pWriteStart - pBufStart)) char* pWriteStart,
+ __in_ecount(cbBufSize) char* pBufStart,
+ size_t cbBufSize,
+ __in_z const char* fmt,
+ ...)
+{
+ assert(fmt);
+ assert(pBufStart);
+ assert(pWriteStart);
+ assert((size_t)pBufStart <= (size_t)pWriteStart);
+ int ret;
+
+ // compute the space left in the buffer.
+ if ((pBufStart + cbBufSize) < pWriteStart)
+ {
+ NO_WAY("pWriteStart is past end of buffer");
+ }
+ size_t cbSpaceLeft = (size_t)((pBufStart + cbBufSize) - pWriteStart);
+ va_list args;
+ va_start(args, fmt);
+ ret = vsprintf_s(pWriteStart, cbSpaceLeft, const_cast<char*>(fmt), args);
+ va_end(args);
+ if (ret < 0)
+ {
+ NO_WAY("vsprintf_s failed.");
+ }
+ return ret;
+}
+
+#ifdef DEBUG
+
+void hexDump(FILE* dmpf, const char* name, BYTE* addr, size_t size)
+{
+ if (!size)
+ {
+ return;
+ }
+
+ assert(addr);
+
+ fprintf(dmpf, "Hex dump of %s:\n", name);
+
+ for (unsigned i = 0; i < size; i++)
+ {
+ if ((i % 16) == 0)
+ {
+ fprintf(dmpf, "\n %04X: ", i);
+ }
+
+ fprintf(dmpf, "%02X ", *addr++);
+ }
+
+ fprintf(dmpf, "\n\n");
+}
+
+#endif // DEBUG
+
+void HelperCallProperties::init()
+{
+ for (CorInfoHelpFunc helper = CORINFO_HELP_UNDEF; // initialize helper
+ (helper < CORINFO_HELP_COUNT); // test helper for loop exit
+ helper = CorInfoHelpFunc(int(helper) + 1)) // update helper to next
+ {
+ // Generally you want initialize these to their most typical/safest result
+ //
+ bool isPure = false; // true if the result only depends upon input args and not any global state
+ bool noThrow = false; // true if the helper will never throw
+ bool nonNullReturn = false; // true if the result will never be null or zero
+ bool isAllocator = false; // true if the result is usually a newly created heap item, or may throw OutOfMemory
+ bool mutatesHeap = false; // true if any previous heap objects [are|can be] modified
+ bool mayRunCctor = false; // true if the helper call may cause a static constructor to be run.
+ bool mayFinalize = false; // true if the helper call allocates an object that may need to run a finalizer
+
+ switch (helper)
+ {
+ // Arithmetic helpers that cannot throw
+ case CORINFO_HELP_LLSH:
+ case CORINFO_HELP_LRSH:
+ case CORINFO_HELP_LRSZ:
+ case CORINFO_HELP_LMUL:
+ case CORINFO_HELP_LNG2DBL:
+ case CORINFO_HELP_ULNG2DBL:
+ case CORINFO_HELP_DBL2INT:
+ case CORINFO_HELP_DBL2LNG:
+ case CORINFO_HELP_DBL2UINT:
+ case CORINFO_HELP_DBL2ULNG:
+ case CORINFO_HELP_FLTREM:
+ case CORINFO_HELP_DBLREM:
+ case CORINFO_HELP_FLTROUND:
+ case CORINFO_HELP_DBLROUND:
+
+ isPure = true;
+ noThrow = true;
+ break;
+
+ // Arithmetic helpers that *can* throw.
+
+ // This (or these) are not pure, in that they have "VM side effects"...but they don't mutate the heap.
+ case CORINFO_HELP_ENDCATCH:
+ break;
+
+ // Arithmetic helpers that may throw
+ case CORINFO_HELP_LMOD: // Mods throw div-by zero, and signed mods have problems with the smallest integer
+ // mod -1,
+ case CORINFO_HELP_MOD: // which is not representable as a positive integer.
+ case CORINFO_HELP_UMOD:
+ case CORINFO_HELP_ULMOD:
+
+ case CORINFO_HELP_UDIV: // Divs throw divide-by-zero.
+ case CORINFO_HELP_DIV:
+ case CORINFO_HELP_LDIV:
+ case CORINFO_HELP_ULDIV:
+
+ case CORINFO_HELP_LMUL_OVF:
+ case CORINFO_HELP_ULMUL_OVF:
+ case CORINFO_HELP_DBL2INT_OVF:
+ case CORINFO_HELP_DBL2LNG_OVF:
+ case CORINFO_HELP_DBL2UINT_OVF:
+ case CORINFO_HELP_DBL2ULNG_OVF:
+
+ isPure = true;
+ break;
+
+ // Heap Allocation helpers, these all never return null
+ case CORINFO_HELP_NEWSFAST:
+ case CORINFO_HELP_NEWSFAST_ALIGN8:
+
+ isAllocator = true;
+ nonNullReturn = true;
+ noThrow = true; // only can throw OutOfMemory
+ break;
+
+ case CORINFO_HELP_NEW_CROSSCONTEXT:
+ case CORINFO_HELP_NEWFAST:
+ case CORINFO_HELP_READYTORUN_NEW:
+
+ mayFinalize = true; // These may run a finalizer
+ isAllocator = true;
+ nonNullReturn = true;
+ noThrow = true; // only can throw OutOfMemory
+ break;
+
+ // These allocation helpers do some checks on the size (and lower bound) inputs,
+ // and can throw exceptions other than OOM.
+ case CORINFO_HELP_NEWARR_1_VC:
+ case CORINFO_HELP_NEWARR_1_ALIGN8:
+
+ isAllocator = true;
+ nonNullReturn = true;
+ break;
+
+ // These allocation helpers do some checks on the size (and lower bound) inputs,
+ // and can throw exceptions other than OOM.
+ case CORINFO_HELP_NEW_MDARR:
+ case CORINFO_HELP_NEWARR_1_DIRECT:
+ case CORINFO_HELP_NEWARR_1_OBJ:
+ case CORINFO_HELP_READYTORUN_NEWARR_1:
+
+ mayFinalize = true; // These may run a finalizer
+ isAllocator = true;
+ nonNullReturn = true;
+ break;
+
+ // Heap Allocation helpers that are also pure
+ case CORINFO_HELP_STRCNS:
+
+ isPure = true;
+ isAllocator = true;
+ nonNullReturn = true;
+ noThrow = true; // only can throw OutOfMemory
+ break;
+
+ case CORINFO_HELP_BOX:
+ nonNullReturn = true;
+ isAllocator = true;
+ noThrow = true; // only can throw OutOfMemory
+ break;
+
+ case CORINFO_HELP_BOX_NULLABLE:
+ // Box Nullable is not a 'pure' function
+ // It has a Byref argument that it reads the contents of.
+ //
+ // So two calls to Box Nullable that pass the same address (with the same Value Number)
+ // will produce different results when the contents of the memory pointed to by the Byref changes
+ //
+ isAllocator = true;
+ noThrow = true; // only can throw OutOfMemory
+ break;
+
+ case CORINFO_HELP_RUNTIMEHANDLE_METHOD:
+ case CORINFO_HELP_RUNTIMEHANDLE_CLASS:
+ case CORINFO_HELP_RUNTIMEHANDLE_METHOD_LOG:
+ case CORINFO_HELP_RUNTIMEHANDLE_CLASS_LOG:
+ // logging helpers are not technically pure but can be optimized away
+ isPure = true;
+ noThrow = true;
+ nonNullReturn = true;
+ break;
+
+ // type casting helpers
+ case CORINFO_HELP_ISINSTANCEOFINTERFACE:
+ case CORINFO_HELP_ISINSTANCEOFARRAY:
+ case CORINFO_HELP_ISINSTANCEOFCLASS:
+ case CORINFO_HELP_ISINSTANCEOFANY:
+ case CORINFO_HELP_READYTORUN_ISINSTANCEOF:
+
+ isPure = true;
+ noThrow = true; // These return null for a failing cast
+ break;
+
+ // type casting helpers that throw
+ case CORINFO_HELP_CHKCASTINTERFACE:
+ case CORINFO_HELP_CHKCASTARRAY:
+ case CORINFO_HELP_CHKCASTCLASS:
+ case CORINFO_HELP_CHKCASTANY:
+ case CORINFO_HELP_CHKCASTCLASS_SPECIAL:
+ case CORINFO_HELP_READYTORUN_CHKCAST:
+
+ // These throw for a failing cast
+ // But if given a null input arg will return null
+ isPure = true;
+ break;
+
+ // helpers returning addresses, these can also throw
+ case CORINFO_HELP_UNBOX:
+ case CORINFO_HELP_GETREFANY:
+ case CORINFO_HELP_LDELEMA_REF:
+
+ isPure = true;
+ break;
+
+ // helpers that return internal handle
+ // TODO-ARM64-Bug?: Can these throw or not?
+ case CORINFO_HELP_GETCLASSFROMMETHODPARAM:
+ case CORINFO_HELP_GETSYNCFROMCLASSHANDLE:
+
+ isPure = true;
+ break;
+
+ // Helpers that load the base address for static variables.
+ // We divide these between those that may and may not invoke
+ // static class constructors.
+ case CORINFO_HELP_GETSHARED_GCSTATIC_BASE:
+ case CORINFO_HELP_GETSHARED_NONGCSTATIC_BASE:
+ case CORINFO_HELP_GETSHARED_GCSTATIC_BASE_DYNAMICCLASS:
+ case CORINFO_HELP_GETSHARED_NONGCSTATIC_BASE_DYNAMICCLASS:
+ case CORINFO_HELP_GETGENERICS_GCTHREADSTATIC_BASE:
+ case CORINFO_HELP_GETGENERICS_NONGCTHREADSTATIC_BASE:
+ case CORINFO_HELP_GETSHARED_GCTHREADSTATIC_BASE:
+ case CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE:
+ case CORINFO_HELP_CLASSINIT_SHARED_DYNAMICCLASS:
+ case CORINFO_HELP_GETSHARED_GCTHREADSTATIC_BASE_DYNAMICCLASS:
+ case CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE_DYNAMICCLASS:
+ case CORINFO_HELP_GETSTATICFIELDADDR_CONTEXT:
+ case CORINFO_HELP_GETSTATICFIELDADDR_TLS:
+ case CORINFO_HELP_GETGENERICS_GCSTATIC_BASE:
+ case CORINFO_HELP_GETGENERICS_NONGCSTATIC_BASE:
+ case CORINFO_HELP_READYTORUN_STATIC_BASE:
+
+ // These may invoke static class constructors
+ // These can throw InvalidProgram exception if the class can not be constructed
+ //
+ isPure = true;
+ nonNullReturn = true;
+ mayRunCctor = true;
+ break;
+
+ case CORINFO_HELP_GETSHARED_GCSTATIC_BASE_NOCTOR:
+ case CORINFO_HELP_GETSHARED_NONGCSTATIC_BASE_NOCTOR:
+ case CORINFO_HELP_GETSHARED_GCTHREADSTATIC_BASE_NOCTOR:
+ case CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE_NOCTOR:
+
+ // These do not invoke static class constructors
+ //
+ isPure = true;
+ noThrow = true;
+ nonNullReturn = true;
+ break;
+
+ // GC Write barrier support
+ // TODO-ARM64-Bug?: Can these throw or not?
+ case CORINFO_HELP_ASSIGN_REF:
+ case CORINFO_HELP_CHECKED_ASSIGN_REF:
+ case CORINFO_HELP_ASSIGN_REF_ENSURE_NONHEAP:
+ case CORINFO_HELP_ASSIGN_BYREF:
+ case CORINFO_HELP_ASSIGN_STRUCT:
+
+ mutatesHeap = true;
+ break;
+
+ // Accessing fields (write)
+ case CORINFO_HELP_SETFIELD32:
+ case CORINFO_HELP_SETFIELD64:
+ case CORINFO_HELP_SETFIELDOBJ:
+ case CORINFO_HELP_SETFIELDSTRUCT:
+ case CORINFO_HELP_SETFIELDFLOAT:
+ case CORINFO_HELP_SETFIELDDOUBLE:
+ case CORINFO_HELP_ARRADDR_ST:
+
+ mutatesHeap = true;
+ break;
+
+ // These helper calls always throw an exception
+ case CORINFO_HELP_OVERFLOW:
+ case CORINFO_HELP_VERIFICATION:
+ case CORINFO_HELP_RNGCHKFAIL:
+ case CORINFO_HELP_THROWDIVZERO:
+#if COR_JIT_EE_VERSION > 460
+ case CORINFO_HELP_THROWNULLREF:
+#endif // COR_JIT_EE_VERSION
+ case CORINFO_HELP_THROW:
+ case CORINFO_HELP_RETHROW:
+
+ break;
+
+ // These helper calls may throw an exception
+ case CORINFO_HELP_METHOD_ACCESS_CHECK:
+ case CORINFO_HELP_FIELD_ACCESS_CHECK:
+ case CORINFO_HELP_CLASS_ACCESS_CHECK:
+ case CORINFO_HELP_DELEGATE_SECURITY_CHECK:
+
+ break;
+
+ // This is a debugging aid; it simply returns a constant address.
+ case CORINFO_HELP_LOOP_CLONE_CHOICE_ADDR:
+ isPure = true;
+ noThrow = true;
+ break;
+
+ // Not sure how to handle optimization involving the rest of these helpers
+ default:
+
+ // The most pessimistic results are returned for these helpers
+ mutatesHeap = true;
+ break;
+ }
+
+ m_isPure[helper] = isPure;
+ m_noThrow[helper] = noThrow;
+ m_nonNullReturn[helper] = nonNullReturn;
+ m_isAllocator[helper] = isAllocator;
+ m_mutatesHeap[helper] = mutatesHeap;
+ m_mayRunCctor[helper] = mayRunCctor;
+ m_mayFinalize[helper] = mayFinalize;
+ }
+}
+
+//=============================================================================
+// AssemblyNamesList2
+//=============================================================================
+// The string should be of the form
+// MyAssembly
+// MyAssembly;mscorlib;System
+// MyAssembly;mscorlib System
+
+AssemblyNamesList2::AssemblyNamesList2(const wchar_t* list, IAllocator* alloc) : m_alloc(alloc)
+{
+ assert(m_alloc != nullptr);
+
+ WCHAR prevChar = '?'; // dummy
+ LPWSTR nameStart = nullptr; // start of the name currently being processed. nullptr if no current name
+ AssemblyName** ppPrevLink = &m_pNames;
+
+ for (LPWSTR listWalk = const_cast<LPWSTR>(list); prevChar != '\0'; prevChar = *listWalk, listWalk++)
+ {
+ WCHAR curChar = *listWalk;
+
+ if (iswspace(curChar) || curChar == W(';') || curChar == W('\0'))
+ {
+ //
+ // Found white-space
+ //
+
+ if (nameStart)
+ {
+ // Found the end of the current name; add a new assembly name to the list.
+
+ AssemblyName* newName = new (m_alloc) AssemblyName();
+
+ // Null out the current character so we can do zero-terminated string work; we'll restore it later.
+ *listWalk = W('\0');
+
+ // How much space do we need?
+ int convertedNameLenBytes =
+ WszWideCharToMultiByte(CP_UTF8, 0, nameStart, -1, nullptr, 0, nullptr, nullptr);
+ newName->m_assemblyName = new (m_alloc) char[convertedNameLenBytes]; // convertedNameLenBytes includes
+ // the trailing null character
+ if (WszWideCharToMultiByte(CP_UTF8, 0, nameStart, -1, newName->m_assemblyName, convertedNameLenBytes,
+ nullptr, nullptr) != 0)
+ {
+ *ppPrevLink = newName;
+ ppPrevLink = &newName->m_next;
+ }
+ else
+ {
+ // Failed to convert the string. Ignore this string (and leak the memory).
+ }
+
+ nameStart = nullptr;
+
+ // Restore the current character.
+ *listWalk = curChar;
+ }
+ }
+ else if (!nameStart)
+ {
+ //
+ // Found the start of a new name
+ //
+
+ nameStart = listWalk;
+ }
+ }
+
+ assert(nameStart == nullptr); // cannot be in the middle of a name
+ *ppPrevLink = nullptr; // Terminate the last element of the list.
+}
+
+AssemblyNamesList2::~AssemblyNamesList2()
+{
+ for (AssemblyName* pName = m_pNames; pName != nullptr; /**/)
+ {
+ AssemblyName* cur = pName;
+ pName = pName->m_next;
+
+ m_alloc->Free(cur->m_assemblyName);
+ m_alloc->Free(cur);
+ }
+}
+
+bool AssemblyNamesList2::IsInList(const char* assemblyName)
+{
+ for (AssemblyName* pName = m_pNames; pName != nullptr; pName = pName->m_next)
+ {
+ if (_stricmp(pName->m_assemblyName, assemblyName) == 0)
+ {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+#ifdef FEATURE_JIT_METHOD_PERF
+CycleCount::CycleCount() : cps(CycleTimer::CyclesPerSecond())
+{
+}
+
+bool CycleCount::GetCycles(unsigned __int64* time)
+{
+ return CycleTimer::GetThreadCyclesS(time);
+}
+
+bool CycleCount::Start()
+{
+ return GetCycles(&beginCycles);
+}
+
+double CycleCount::ElapsedTime()
+{
+ unsigned __int64 nowCycles;
+ (void)GetCycles(&nowCycles);
+ return ((double)(nowCycles - beginCycles) / cps) * 1000.0;
+}
+
+bool PerfCounter::Start()
+{
+ bool result = QueryPerformanceFrequency(&beg) != 0;
+ if (!result)
+ {
+ return result;
+ }
+ freq = (double)beg.QuadPart / 1000.0;
+ (void)QueryPerformanceCounter(&beg);
+ return result;
+}
+
+// Return elapsed time from Start() in millis.
+double PerfCounter::ElapsedTime()
+{
+ LARGE_INTEGER li;
+ (void)QueryPerformanceCounter(&li);
+ return (double)(li.QuadPart - beg.QuadPart) / freq;
+}
+
+#endif
+
+#ifdef DEBUG
+
+/*****************************************************************************
+ * Return the number of digits in a number of the given base (default base 10).
+ * Used when outputting strings.
+ */
+unsigned CountDigits(unsigned num, unsigned base /* = 10 */)
+{
+ assert(2 <= base && base <= 16); // sanity check
+ unsigned count = 1;
+ while (num >= base)
+ {
+ num /= base;
+ ++count;
+ }
+ return count;
+}
+
+#endif // DEBUG
+
+double FloatingPointUtils::convertUInt64ToDouble(unsigned __int64 uIntVal)
+{
+ __int64 s64 = uIntVal;
+ double d;
+ if (s64 < 0)
+ {
+#if defined(_TARGET_XARCH_)
+ // RyuJIT codegen and clang (or gcc) may produce different results for casting uint64 to
+ // double, and the clang result is more accurate. For example,
+ // 1) (double)0x84595161401484A0UL --> 43e08b2a2c280290 (RyuJIT codegen or VC++)
+ // 2) (double)0x84595161401484A0UL --> 43e08b2a2c280291 (clang or gcc)
+ // If the folding optimization below is implemented by simple casting of (double)uint64_val
+ // and it is compiled by clang, casting result can be inconsistent, depending on whether
+ // the folding optimization is triggered or the codegen generates instructions for casting. //
+ // The current solution is to force the same math as the codegen does, so that casting
+ // result is always consistent.
+
+ // d = (double)(int64_t)uint64 + 0x1p64
+ uint64_t adjHex = 0x43F0000000000000UL;
+ d = (double)s64 + *(double*)&adjHex;
+#else
+ d = (double)uIntVal;
+#endif
+ }
+ else
+ {
+ d = (double)uIntVal;
+ }
+ return d;
+}
+
+float FloatingPointUtils::convertUInt64ToFloat(unsigned __int64 u64)
+{
+ double d = convertUInt64ToDouble(u64);
+ return (float)d;
+}
+
+unsigned __int64 FloatingPointUtils::convertDoubleToUInt64(double d)
+{
+ unsigned __int64 u64;
+ if (d >= 0.0)
+ {
+ // Work around a C++ issue where it doesn't properly convert large positive doubles
+ const double two63 = 2147483648.0 * 4294967296.0;
+ if (d < two63)
+ {
+ u64 = UINT64(d);
+ }
+ else
+ {
+ // subtract 0x8000000000000000, do the convert then add it back again
+ u64 = INT64(d - two63) + I64(0x8000000000000000);
+ }
+ return u64;
+ }
+
+#ifdef _TARGET_XARCH_
+
+ // While the Ecma spec does not specifically call this out,
+ // the case of conversion from negative double to unsigned integer is
+ // effectively an overflow and therefore the result is unspecified.
+ // With MSVC for x86/x64, such a conversion results in the bit-equivalent
+ // unsigned value of the conversion to integer. Other compilers convert
+ // negative doubles to zero when the target is unsigned.
+ // To make the behavior consistent across OS's on TARGET_XARCH,
+ // this double cast is needed to conform MSVC behavior.
+
+ u64 = UINT64(INT64(d));
+#else
+ u64 = UINT64(d);
+#endif // _TARGET_XARCH_
+
+ return u64;
+}
+
+// Rounds a double-precision floating-point value to the nearest integer,
+// and rounds midpoint values to the nearest even number.
+// Note this should align with classlib in floatdouble.cpp
+// Specializing for x86 using a x87 instruction is optional since
+// this outcome is identical across targets.
+double FloatingPointUtils::round(double x)
+{
+ // If the number has no fractional part do nothing
+ // This shortcut is necessary to workaround precision loss in borderline cases on some platforms
+ if (x == ((double)((__int64)x)))
+ {
+ return x;
+ }
+
+ // We had a number that was equally close to 2 integers.
+ // We need to return the even one.
+
+ double tempVal = (x + 0.5);
+ double flrTempVal = floor(tempVal);
+
+ if ((flrTempVal == tempVal) && (fmod(tempVal, 2.0) != 0))
+ {
+ flrTempVal -= 1.0;
+ }
+
+ return _copysign(flrTempVal, x);
+}
diff --git a/src/jit/utils.h b/src/jit/utils.h
new file mode 100644
index 0000000000..1cd35903dd
--- /dev/null
+++ b/src/jit/utils.h
@@ -0,0 +1,710 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX Utils.h XX
+XX XX
+XX Has miscellaneous utility functions XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#ifndef _UTILS_H_
+#define _UTILS_H_
+
+#include "iallocator.h"
+#include "cycletimer.h"
+
+// Needed for unreached()
+#include "error.h"
+
+#ifdef _TARGET_64BIT_
+#define BitScanForwardPtr BitScanForward64
+#else
+#define BitScanForwardPtr BitScanForward
+#endif
+
+template <typename T, int size>
+unsigned ArrLen(T (&)[size])
+{
+ return size;
+}
+
+// return true if arg is a power of 2
+template <typename T>
+inline bool isPow2(T i)
+{
+ return (i > 0 && ((i - 1) & i) == 0);
+}
+
+// Adapter for iterators to a type that is compatible with C++11
+// range-based for loops.
+template <typename TIterator>
+class IteratorPair
+{
+ TIterator m_begin;
+ TIterator m_end;
+
+public:
+ IteratorPair(TIterator begin, TIterator end) : m_begin(begin), m_end(end)
+ {
+ }
+
+ inline TIterator begin()
+ {
+ return m_begin;
+ }
+
+ inline TIterator end()
+ {
+ return m_end;
+ }
+};
+
+template <typename TIterator>
+inline IteratorPair<TIterator> MakeIteratorPair(TIterator begin, TIterator end)
+{
+ return IteratorPair<TIterator>(begin, end);
+}
+
+// Recursive template definition to calculate the base-2 logarithm
+// of a constant value.
+template <unsigned val, unsigned acc = 0>
+struct ConstLog2
+{
+ enum
+ {
+ value = ConstLog2<val / 2, acc + 1>::value
+ };
+};
+
+template <unsigned acc>
+struct ConstLog2<0, acc>
+{
+ enum
+ {
+ value = acc
+ };
+};
+
+template <unsigned acc>
+struct ConstLog2<1, acc>
+{
+ enum
+ {
+ value = acc
+ };
+};
+
+inline const char* dspBool(bool b)
+{
+ return (b) ? "true" : "false";
+}
+
+#ifdef FEATURE_CORECLR
+#ifdef _CRT_ABS_DEFINED
+// we don't have the full standard library
+inline int64_t abs(int64_t t)
+{
+ return t > 0 ? t : -t;
+}
+#endif
+#endif // FEATURE_CORECLR
+
+template <typename T>
+int signum(T val)
+{
+ if (val < T(0))
+ {
+ return -1;
+ }
+ else if (val > T(0))
+ {
+ return 1;
+ }
+ else
+ {
+ return 0;
+ }
+}
+
+class JitSimplerHashBehavior
+{
+public:
+ static const unsigned s_growth_factor_numerator = 3;
+ static const unsigned s_growth_factor_denominator = 2;
+
+ static const unsigned s_density_factor_numerator = 3;
+ static const unsigned s_density_factor_denominator = 4;
+
+ static const unsigned s_minimum_allocation = 7;
+
+ inline static void DECLSPEC_NORETURN NoMemory()
+ {
+ NOMEM();
+ }
+};
+
+#if defined(DEBUG) || defined(INLINE_DATA)
+
+// ConfigMethodRange describes a set of methods, specified via their
+// hash codes. This can be used for binary search and/or specifying an
+// explicit method set.
+//
+// Note method hash codes are not necessarily unique. For instance
+// many IL stubs may have the same hash.
+//
+// If range string is null or just whitespace, range includes all
+// methods.
+//
+// Parses values as decimal numbers.
+//
+// Examples:
+//
+// [string with just spaces] : all methods
+// 12345678 : a single method
+// 12345678-23456789 : a range of methods
+// 99998888 12345678-23456789 : a range of methods plus a single method
+
+class ConfigMethodRange
+{
+
+public:
+ // Default capacity
+ enum
+ {
+ DEFAULT_CAPACITY = 50
+ };
+
+ // Does the range include this method's hash?
+ bool Contains(class ICorJitInfo* info, CORINFO_METHOD_HANDLE method);
+
+ // Ensure the range string has been parsed.
+ void EnsureInit(const wchar_t* rangeStr, unsigned capacity = DEFAULT_CAPACITY)
+ {
+ // Make sure that the memory was zero initialized
+ assert(m_inited == 0 || m_inited == 1);
+
+ if (!m_inited)
+ {
+ InitRanges(rangeStr, capacity);
+ assert(m_inited == 1);
+ }
+ }
+
+ // Error checks
+ bool Error() const
+ {
+ return m_badChar != 0;
+ }
+ size_t BadCharIndex() const
+ {
+ return m_badChar - 1;
+ }
+
+private:
+ struct Range
+ {
+ unsigned m_low;
+ unsigned m_high;
+ };
+
+ void InitRanges(const wchar_t* rangeStr, unsigned capacity);
+
+ unsigned m_entries; // number of entries in the range array
+ unsigned m_lastRange; // count of low-high pairs
+ unsigned m_inited; // 1 if range string has been parsed
+ size_t m_badChar; // index + 1 of any bad character in range string
+ Range* m_ranges; // ranges of functions to include
+};
+
+#endif // defined(DEBUG) || defined(INLINE_DATA)
+
+class Compiler;
+
+/*****************************************************************************
+ * Fixed bit vector class
+ */
+class FixedBitVect
+{
+private:
+ UINT bitVectSize;
+ UINT bitVect[];
+
+ // bitChunkSize() - Returns number of bits in a bitVect chunk
+ static UINT bitChunkSize();
+
+ // bitNumToBit() - Returns a bit mask of the given bit number
+ static UINT bitNumToBit(UINT bitNum);
+
+public:
+ // bitVectInit() - Initializes a bit vector of a given size
+ static FixedBitVect* bitVectInit(UINT size, Compiler* comp);
+
+ // bitVectSet() - Sets the given bit
+ void bitVectSet(UINT bitNum);
+
+ // bitVectTest() - Tests the given bit
+ bool bitVectTest(UINT bitNum);
+
+ // bitVectOr() - Or in the given bit vector
+ void bitVectOr(FixedBitVect* bv);
+
+ // bitVectAnd() - And with passed in bit vector
+ void bitVectAnd(FixedBitVect& bv);
+
+ // bitVectGetFirst() - Find the first bit on and return the bit num.
+ // Return -1 if no bits found.
+ UINT bitVectGetFirst();
+
+ // bitVectGetNext() - Find the next bit on given previous bit and return bit num.
+ // Return -1 if no bits found.
+ UINT bitVectGetNext(UINT bitNumPrev);
+
+ // bitVectGetNextAndClear() - Find the first bit on, clear it and return it.
+ // Return -1 if no bits found.
+ UINT bitVectGetNextAndClear();
+};
+
+/******************************************************************************
+ * A specialized version of sprintf_s to simplify conversion to SecureCRT
+ *
+ * pWriteStart -> A pointer to the first byte to which data is written.
+ * pBufStart -> the start of the buffer into which the data is written. If
+ * composing a complex string with multiple calls to sprintf, this
+ * should not change.
+ * cbBufSize -> The size of the overall buffer (i.e. the size of the buffer
+ * pointed to by pBufStart). For subsequent calls, this does not
+ * change.
+ * fmt -> The format string
+ * ... -> Arguments.
+ *
+ * returns -> number of bytes successfully written, not including the null
+ * terminator. Calls NO_WAY on error.
+ */
+int SimpleSprintf_s(__in_ecount(cbBufSize - (pWriteStart - pBufStart)) char* pWriteStart,
+ __in_ecount(cbBufSize) char* pBufStart,
+ size_t cbBufSize,
+ __in_z const char* fmt,
+ ...);
+
+#ifdef DEBUG
+void hexDump(FILE* dmpf, const char* name, BYTE* addr, size_t size);
+#endif // DEBUG
+
+/******************************************************************************
+ * ScopedSetVariable: A simple class to set and restore a variable within a scope.
+ * For example, it can be used to set a 'bool' flag to 'true' at the beginning of a
+ * function and automatically back to 'false' either at the end the function, or at
+ * any other return location. The variable should not be changed during the scope:
+ * the destructor asserts that the value at destruction time is the same one we set.
+ * Usage: ScopedSetVariable<bool> _unused_name(&variable, true);
+ */
+template <typename T>
+class ScopedSetVariable
+{
+public:
+ ScopedSetVariable(T* pVariable, T value) : m_pVariable(pVariable)
+ {
+ m_oldValue = *m_pVariable;
+ *m_pVariable = value;
+ INDEBUG(m_value = value;)
+ }
+
+ ~ScopedSetVariable()
+ {
+ assert(*m_pVariable == m_value); // Assert that the value didn't change between ctor and dtor
+ *m_pVariable = m_oldValue;
+ }
+
+private:
+#ifdef DEBUG
+ T m_value; // The value we set the variable to (used for assert).
+#endif // DEBUG
+ T m_oldValue; // The old value, to restore the variable to.
+ T* m_pVariable; // Address of the variable to change
+};
+
+/******************************************************************************
+ * PhasedVar: A class to represent a variable that has phases, in particular,
+ * a write phase where the variable is computed, and a read phase where the
+ * variable is used. Once the variable has been read, it can no longer be changed.
+ * Reading the variable essentially commits everyone to using that value forever,
+ * and it is assumed that subsequent changes to the variable would invalidate
+ * whatever assumptions were made by the previous readers, leading to bad generated code.
+ * These assumptions are asserted in DEBUG builds.
+ * The phase ordering is clean for AMD64, but not for x86/ARM. So don't do the phase
+ * ordering asserts for those platforms.
+ */
+template <typename T>
+class PhasedVar
+{
+public:
+ PhasedVar()
+#ifdef DEBUG
+ : m_initialized(false), m_writePhase(true)
+#endif // DEBUG
+ {
+ }
+
+ PhasedVar(T value)
+ : m_value(value)
+#ifdef DEBUG
+ , m_initialized(true)
+ , m_writePhase(true)
+#endif // DEBUG
+ {
+ }
+
+ ~PhasedVar()
+ {
+#ifdef DEBUG
+ m_initialized = false;
+ m_writePhase = true;
+#endif // DEBUG
+ }
+
+ // Read the value. Change to the read phase.
+ // Marked 'const' because we don't change the encapsulated value, even though
+ // we do change the write phase, which is only for debugging asserts.
+
+ operator T() const
+ {
+#ifdef DEBUG
+ assert(m_initialized);
+ (const_cast<PhasedVar*>(this))->m_writePhase = false;
+#endif // DEBUG
+ return m_value;
+ }
+
+ // Functions/operators to write the value. Must be in the write phase.
+
+ PhasedVar& operator=(const T& value)
+ {
+#ifdef DEBUG
+#ifndef LEGACY_BACKEND
+ assert(m_writePhase);
+#endif // !LEGACY_BACKEND
+ m_initialized = true;
+#endif // DEBUG
+ m_value = value;
+ return *this;
+ }
+
+ PhasedVar& operator&=(const T& value)
+ {
+#ifdef DEBUG
+#ifndef LEGACY_BACKEND
+ assert(m_writePhase);
+#endif // !LEGACY_BACKEND
+ m_initialized = true;
+#endif // DEBUG
+ m_value &= value;
+ return *this;
+ }
+
+ // Note: if you need more <op>= functions, you can define them here, like operator&=
+
+ // Assign a value, but don't assert if we're not in the write phase, and
+ // don't change the phase (if we're actually in the read phase, we'll stay
+ // in the read phase). This is a dangerous function, and overrides the main
+ // benefit of this class. Use it wisely!
+ void OverrideAssign(const T& value)
+ {
+#ifdef DEBUG
+ m_initialized = true;
+#endif // DEBUG
+ m_value = value;
+ }
+
+ // We've decided that this variable can go back to write phase, even if it has been
+ // written. This can be used, for example, for variables set and read during frame
+ // layout calculation, as long as it is before final layout, such that anything
+ // being calculated is just an estimate anyway. Obviously, it must be used carefully,
+ // since it overrides the main benefit of this class.
+ void ResetWritePhase()
+ {
+#ifdef DEBUG
+ m_writePhase = true;
+#endif // DEBUG
+ }
+
+private:
+ // Don't allow a copy constructor. (This could be allowed, but only add it once it is actually needed.)
+
+ PhasedVar(const PhasedVar& o)
+ {
+ unreached();
+ }
+
+ T m_value;
+#ifdef DEBUG
+ bool m_initialized; // true once the variable has been initialized, that is, written once.
+ bool m_writePhase; // true if we are in the (initial) "write" phase. Once the value is read, this changes to false,
+ // and can't be changed back.
+#endif // DEBUG
+};
+
+class HelperCallProperties
+{
+private:
+ bool m_isPure[CORINFO_HELP_COUNT];
+ bool m_noThrow[CORINFO_HELP_COUNT];
+ bool m_nonNullReturn[CORINFO_HELP_COUNT];
+ bool m_isAllocator[CORINFO_HELP_COUNT];
+ bool m_mutatesHeap[CORINFO_HELP_COUNT];
+ bool m_mayRunCctor[CORINFO_HELP_COUNT];
+ bool m_mayFinalize[CORINFO_HELP_COUNT];
+
+ void init();
+
+public:
+ HelperCallProperties()
+ {
+ init();
+ }
+
+ bool IsPure(CorInfoHelpFunc helperId)
+ {
+ assert(helperId > CORINFO_HELP_UNDEF);
+ assert(helperId < CORINFO_HELP_COUNT);
+ return m_isPure[helperId];
+ }
+
+ bool NoThrow(CorInfoHelpFunc helperId)
+ {
+ assert(helperId > CORINFO_HELP_UNDEF);
+ assert(helperId < CORINFO_HELP_COUNT);
+ return m_noThrow[helperId];
+ }
+
+ bool NonNullReturn(CorInfoHelpFunc helperId)
+ {
+ assert(helperId > CORINFO_HELP_UNDEF);
+ assert(helperId < CORINFO_HELP_COUNT);
+ return m_nonNullReturn[helperId];
+ }
+
+ bool IsAllocator(CorInfoHelpFunc helperId)
+ {
+ assert(helperId > CORINFO_HELP_UNDEF);
+ assert(helperId < CORINFO_HELP_COUNT);
+ return m_isAllocator[helperId];
+ }
+
+ bool MutatesHeap(CorInfoHelpFunc helperId)
+ {
+ assert(helperId > CORINFO_HELP_UNDEF);
+ assert(helperId < CORINFO_HELP_COUNT);
+ return m_mutatesHeap[helperId];
+ }
+
+ bool MayRunCctor(CorInfoHelpFunc helperId)
+ {
+ assert(helperId > CORINFO_HELP_UNDEF);
+ assert(helperId < CORINFO_HELP_COUNT);
+ return m_mayRunCctor[helperId];
+ }
+
+ bool MayFinalize(CorInfoHelpFunc helperId)
+ {
+ assert(helperId > CORINFO_HELP_UNDEF);
+ assert(helperId < CORINFO_HELP_COUNT);
+ return m_mayFinalize[helperId];
+ }
+};
+
+//*****************************************************************************
+// AssemblyNamesList2: Parses and stores a list of Assembly names, and provides
+// a function for determining whether a given assembly name is part of the list.
+//
+// This is a clone of the AssemblyNamesList class that exists in the VM's utilcode,
+// modified to use the JIT's memory allocator and throw on out of memory behavior.
+// It is named AssemblyNamesList2 to avoid a name conflict with the VM version.
+// It might be preferable to adapt the VM's code to be more flexible (for example,
+// by using an IAllocator), but the string handling code there is heavily macroized,
+// and for the small usage we have of this class, investing in genericizing the VM
+// implementation didn't seem worth it.
+//*****************************************************************************
+
+class AssemblyNamesList2
+{
+ struct AssemblyName
+ {
+ char* m_assemblyName;
+ AssemblyName* m_next;
+ };
+
+ AssemblyName* m_pNames; // List of names
+ IAllocator* m_alloc; // IAllocator to use in this class
+
+public:
+ // Take a Unicode string list of assembly names, parse it, and store it.
+ AssemblyNamesList2(const wchar_t* list, __in IAllocator* alloc);
+
+ ~AssemblyNamesList2();
+
+ // Return 'true' if 'assemblyName' (in UTF-8 format) is in the stored list of assembly names.
+ bool IsInList(const char* assemblyName);
+
+ // Return 'true' if the assembly name list is empty.
+ bool IsEmpty()
+ {
+ return m_pNames == nullptr;
+ }
+};
+
+#ifdef FEATURE_JIT_METHOD_PERF
+// When Start() is called time is noted and when ElapsedTime
+// is called we know how much time was spent in msecs.
+//
+class CycleCount
+{
+private:
+ double cps; // cycles per second
+ unsigned __int64 beginCycles; // cycles at stop watch construction
+public:
+ CycleCount();
+
+ // Kick off the counter, and if re-entrant will use the latest cycles as starting point.
+ // If the method returns false, any other query yield unpredictable results.
+ bool Start();
+
+ // Return time elapsed in msecs, if Start returned true.
+ double ElapsedTime();
+
+private:
+ // Return true if successful.
+ bool GetCycles(unsigned __int64* time);
+};
+
+// Uses win API QueryPerformanceCounter/QueryPerformanceFrequency.
+class PerfCounter
+{
+ LARGE_INTEGER beg;
+ double freq;
+
+public:
+ // If the method returns false, any other query yield unpredictable results.
+ bool Start();
+
+ // Return time elapsed from start in millis, if Start returned true.
+ double ElapsedTime();
+};
+
+#endif // FEATURE_JIT_METHOD_PERF
+
+#ifdef DEBUG
+
+/*****************************************************************************
+ * Return the number of digits in a number of the given base (default base 10).
+ * Used when outputting strings.
+ */
+unsigned CountDigits(unsigned num, unsigned base = 10);
+
+#endif // DEBUG
+
+// Utility class for lists.
+template <typename T>
+struct ListNode
+{
+ T data;
+ ListNode<T>* next;
+
+ // Create the class without using constructors.
+ static ListNode<T>* Create(T value, IAllocator* alloc)
+ {
+ ListNode<T>* node = new (alloc) ListNode<T>;
+ node->data = value;
+ node->next = nullptr;
+ return node;
+ }
+};
+
+/*****************************************************************************
+* Floating point utility class
+*/
+class FloatingPointUtils
+{
+public:
+ static double convertUInt64ToDouble(unsigned __int64 u64);
+
+ static float convertUInt64ToFloat(unsigned __int64 u64);
+
+ static unsigned __int64 convertDoubleToUInt64(double d);
+
+ static double round(double x);
+};
+
+// The CLR requires that critical section locks be initialized via its ClrCreateCriticalSection API...but
+// that can't be called until the CLR is initialized. If we have static data that we'd like to protect by a
+// lock, and we have a statically allocated lock to protect that data, there's an issue in how to initialize
+// that lock. We could insert an initialize call in the startup path, but one might prefer to keep the code
+// more local. For such situations, CritSecObject solves the initialization problem, via a level of
+// indirection. A pointer to the lock is initially null, and when we query for the lock pointer via "Val()".
+// If the lock has not yet been allocated, this allocates one (here a leaf lock), and uses a
+// CompareAndExchange-based lazy-initialization to update the field. If this fails, the allocated lock is
+// destroyed. This will work as long as the first locking attempt occurs after enough CLR initialization has
+// happened to make ClrCreateCriticalSection calls legal.
+
+class CritSecObject
+{
+public:
+ CritSecObject()
+ {
+ m_pCs = nullptr;
+ }
+
+ CRITSEC_COOKIE Val()
+ {
+ if (m_pCs == nullptr)
+ {
+ // CompareExchange-based lazy init.
+ CRITSEC_COOKIE newCs = ClrCreateCriticalSection(CrstLeafLock, CRST_DEFAULT);
+ CRITSEC_COOKIE observed = InterlockedCompareExchangeT(&m_pCs, newCs, NULL);
+ if (observed != nullptr)
+ {
+ ClrDeleteCriticalSection(newCs);
+ }
+ }
+ return m_pCs;
+ }
+
+private:
+ // CRITSEC_COOKIE is an opaque pointer type.
+ CRITSEC_COOKIE m_pCs;
+
+ // No copying or assignment allowed.
+ CritSecObject(const CritSecObject&) = delete;
+ CritSecObject& operator=(const CritSecObject&) = delete;
+};
+
+// Stack-based holder for a critial section lock.
+// Ensures lock is released.
+
+class CritSecHolder
+{
+public:
+ CritSecHolder(CritSecObject& critSec) : m_CritSec(critSec)
+ {
+ ClrEnterCriticalSection(m_CritSec.Val());
+ }
+
+ ~CritSecHolder()
+ {
+ ClrLeaveCriticalSection(m_CritSec.Val());
+ }
+
+private:
+ CritSecObject& m_CritSec;
+
+ // No copying or assignment allowed.
+ CritSecHolder(const CritSecHolder&) = delete;
+ CritSecHolder& operator=(const CritSecHolder&) = delete;
+};
+
+#endif // _UTILS_H_
diff --git a/src/jit/valuenum.cpp b/src/jit/valuenum.cpp
new file mode 100644
index 0000000000..5bc96ed4a9
--- /dev/null
+++ b/src/jit/valuenum.cpp
@@ -0,0 +1,7518 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX ValueNum XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+#include "valuenum.h"
+#include "ssaconfig.h"
+
+VNFunc GetVNFuncForOper(genTreeOps oper, bool isUnsigned)
+{
+ if (!isUnsigned || (oper == GT_EQ) || (oper == GT_NE))
+ {
+ return VNFunc(oper);
+ }
+ switch (oper)
+ {
+ case GT_LT:
+ return VNF_LT_UN;
+ case GT_LE:
+ return VNF_LE_UN;
+ case GT_GE:
+ return VNF_GT_UN;
+ case GT_GT:
+ return VNF_GT_UN;
+ case GT_ADD:
+ return VNF_ADD_UN;
+ case GT_SUB:
+ return VNF_SUB_UN;
+ case GT_MUL:
+ return VNF_MUL_UN;
+ case GT_DIV:
+ return VNF_DIV_UN;
+ case GT_MOD:
+ return VNF_MOD_UN;
+
+ case GT_NOP:
+ case GT_COMMA:
+ return VNFunc(oper);
+ default:
+ unreached();
+ }
+}
+
+ValueNumStore::ValueNumStore(Compiler* comp, IAllocator* alloc)
+ : m_pComp(comp)
+ , m_alloc(alloc)
+ ,
+#ifdef DEBUG
+ m_numMapSels(0)
+ ,
+#endif
+ m_nextChunkBase(0)
+ , m_fixedPointMapSels(alloc, 8)
+ , m_chunks(alloc, 8)
+ , m_intCnsMap(nullptr)
+ , m_longCnsMap(nullptr)
+ , m_handleMap(nullptr)
+ , m_floatCnsMap(nullptr)
+ , m_doubleCnsMap(nullptr)
+ , m_byrefCnsMap(nullptr)
+ , m_VNFunc0Map(nullptr)
+ , m_VNFunc1Map(nullptr)
+ , m_VNFunc2Map(nullptr)
+ , m_VNFunc3Map(nullptr)
+ , m_VNFunc4Map(nullptr)
+ , m_uPtrToLocNotAFieldCount(1)
+{
+ // We have no current allocation chunks.
+ for (unsigned i = 0; i < TYP_COUNT; i++)
+ {
+ for (unsigned j = CEA_None; j <= CEA_Count + MAX_LOOP_NUM; j++)
+ {
+ m_curAllocChunk[i][j] = NoChunk;
+ }
+ }
+
+ for (unsigned i = 0; i < SmallIntConstNum; i++)
+ {
+ m_VNsForSmallIntConsts[i] = NoVN;
+ }
+ // We will reserve chunk 0 to hold some special constants, like the constant NULL, the "exception" value, and the
+ // "zero map."
+ Chunk* specialConstChunk = new (m_alloc) Chunk(m_alloc, &m_nextChunkBase, TYP_REF, CEA_Const, MAX_LOOP_NUM);
+ specialConstChunk->m_numUsed +=
+ SRC_NumSpecialRefConsts; // Implicitly allocate 0 ==> NULL, and 1 ==> Exception, 2 ==> ZeroMap.
+ ChunkNum cn = m_chunks.Push(specialConstChunk);
+ assert(cn == 0);
+
+ m_mapSelectBudget = JitConfig.JitVNMapSelBudget();
+}
+
+// static.
+template <typename T>
+T ValueNumStore::EvalOp(VNFunc vnf, T v0)
+{
+ genTreeOps oper = genTreeOps(vnf);
+
+ // Here we handle those unary ops that are the same for integral and floating-point types.
+ switch (oper)
+ {
+ case GT_NEG:
+ return -v0;
+ default:
+ // Must be int-specific
+ return EvalOpIntegral(vnf, v0);
+ }
+}
+
+template <typename T>
+T ValueNumStore::EvalOpIntegral(VNFunc vnf, T v0)
+{
+ genTreeOps oper = genTreeOps(vnf);
+
+ // Here we handle unary ops that are the same for all integral types.
+ switch (oper)
+ {
+ case GT_NOT:
+ return ~v0;
+ default:
+ unreached();
+ }
+}
+
+// static
+template <typename T>
+T ValueNumStore::EvalOp(VNFunc vnf, T v0, T v1, ValueNum* pExcSet)
+{
+ if (vnf < VNF_Boundary)
+ {
+ genTreeOps oper = genTreeOps(vnf);
+ // Here we handle those that are the same for integral and floating-point types.
+ switch (oper)
+ {
+ case GT_ADD:
+ return v0 + v1;
+ case GT_SUB:
+ return v0 - v1;
+ case GT_MUL:
+ return v0 * v1;
+ case GT_DIV:
+ if (IsIntZero(v1))
+ {
+ *pExcSet = VNExcSetSingleton(VNForFunc(TYP_REF, VNF_DivideByZeroExc));
+ return (T)0;
+ }
+ if (IsOverflowIntDiv(v0, v1))
+ {
+ *pExcSet = VNExcSetSingleton(VNForFunc(TYP_REF, VNF_ArithmeticExc));
+ return (T)0;
+ }
+ else
+ {
+ return v0 / v1;
+ }
+
+ default:
+ // Must be int-specific
+ return EvalOpIntegral(vnf, v0, v1, pExcSet);
+ }
+ }
+ else // must be a VNF_ function
+ {
+ typedef typename jitstd::make_unsigned<T>::type UT;
+ switch (vnf)
+ {
+ case VNF_GT_UN:
+ return T(UT(v0) > UT(v1));
+ case VNF_GE_UN:
+ return T(UT(v0) >= UT(v1));
+ case VNF_LT_UN:
+ return T(UT(v0) < UT(v1));
+ case VNF_LE_UN:
+ return T(UT(v0) <= UT(v1));
+ case VNF_ADD_UN:
+ return T(UT(v0) + UT(v1));
+ case VNF_SUB_UN:
+ return T(UT(v0) - UT(v1));
+ case VNF_MUL_UN:
+ return T(UT(v0) * UT(v1));
+ case VNF_DIV_UN:
+ if (IsIntZero(v1))
+ {
+ *pExcSet = VNExcSetSingleton(VNForFunc(TYP_REF, VNF_DivideByZeroExc));
+ return (T)0;
+ }
+ else
+ {
+ return T(UT(v0) / UT(v1));
+ }
+ default:
+ // Must be int-specific
+ return EvalOpIntegral(vnf, v0, v1, pExcSet);
+ }
+ }
+}
+
+// Specialize for double for floating operations, that doesn't involve unsigned.
+template <>
+double ValueNumStore::EvalOp<double>(VNFunc vnf, double v0, double v1, ValueNum* pExcSet)
+{
+ genTreeOps oper = genTreeOps(vnf);
+ // Here we handle those that are the same for floating-point types.
+ switch (oper)
+ {
+ case GT_ADD:
+ return v0 + v1;
+ case GT_SUB:
+ return v0 - v1;
+ case GT_MUL:
+ return v0 * v1;
+ case GT_DIV:
+ return v0 / v1;
+ case GT_MOD:
+ return fmod(v0, v1);
+
+ default:
+ unreached();
+ }
+}
+
+template <typename T>
+int ValueNumStore::EvalComparison(VNFunc vnf, T v0, T v1)
+{
+ if (vnf < VNF_Boundary)
+ {
+ genTreeOps oper = genTreeOps(vnf);
+ // Here we handle those that are the same for floating-point types.
+ switch (oper)
+ {
+ case GT_EQ:
+ return v0 == v1;
+ case GT_NE:
+ return v0 != v1;
+ case GT_GT:
+ return v0 > v1;
+ case GT_GE:
+ return v0 >= v1;
+ case GT_LT:
+ return v0 < v1;
+ case GT_LE:
+ return v0 <= v1;
+ default:
+ unreached();
+ }
+ }
+ else // must be a VNF_ function
+ {
+ switch (vnf)
+ {
+ case VNF_GT_UN:
+ return unsigned(v0) > unsigned(v1);
+ case VNF_GE_UN:
+ return unsigned(v0) >= unsigned(v1);
+ case VNF_LT_UN:
+ return unsigned(v0) < unsigned(v1);
+ case VNF_LE_UN:
+ return unsigned(v0) <= unsigned(v1);
+ default:
+ unreached();
+ }
+ }
+}
+
+/* static */
+template <typename T>
+int ValueNumStore::EvalOrderedComparisonFloat(VNFunc vnf, T v0, T v1)
+{
+ // !! NOTE !!
+ //
+ // All comparisons below are ordered comparisons.
+ //
+ // We should guard this function from unordered comparisons
+ // identified by the GTF_RELOP_NAN_UN flag. Either the flag
+ // should be bubbled (similar to GTF_UNSIGNED for ints)
+ // to this point or we should bail much earlier if any of
+ // the operands are NaN.
+ //
+ genTreeOps oper = genTreeOps(vnf);
+ // Here we handle those that are the same for floating-point types.
+ switch (oper)
+ {
+ case GT_EQ:
+ return v0 == v1;
+ case GT_NE:
+ return v0 != v1;
+ case GT_GT:
+ return v0 > v1;
+ case GT_GE:
+ return v0 >= v1;
+ case GT_LT:
+ return v0 < v1;
+ case GT_LE:
+ return v0 <= v1;
+ default:
+ unreached();
+ }
+}
+
+template <>
+int ValueNumStore::EvalComparison<double>(VNFunc vnf, double v0, double v1)
+{
+ return EvalOrderedComparisonFloat(vnf, v0, v1);
+}
+
+template <>
+int ValueNumStore::EvalComparison<float>(VNFunc vnf, float v0, float v1)
+{
+ return EvalOrderedComparisonFloat(vnf, v0, v1);
+}
+
+template <typename T>
+T ValueNumStore::EvalOpIntegral(VNFunc vnf, T v0, T v1, ValueNum* pExcSet)
+{
+ genTreeOps oper = genTreeOps(vnf);
+ switch (oper)
+ {
+ case GT_EQ:
+ return v0 == v1;
+ case GT_NE:
+ return v0 != v1;
+ case GT_GT:
+ return v0 > v1;
+ case GT_GE:
+ return v0 >= v1;
+ case GT_LT:
+ return v0 < v1;
+ case GT_LE:
+ return v0 <= v1;
+ case GT_OR:
+ return v0 | v1;
+ case GT_XOR:
+ return v0 ^ v1;
+ case GT_AND:
+ return v0 & v1;
+ case GT_LSH:
+ return v0 << v1;
+ case GT_RSH:
+ return v0 >> v1;
+ case GT_RSZ:
+ if (sizeof(T) == 8)
+ {
+ return UINT64(v0) >> v1;
+ }
+ else
+ {
+ return UINT32(v0) >> v1;
+ }
+ case GT_ROL:
+ if (sizeof(T) == 8)
+ {
+ return (v0 << v1) | (UINT64(v0) >> (64 - v1));
+ }
+ else
+ {
+ return (v0 << v1) | (UINT32(v0) >> (32 - v1));
+ }
+
+ case GT_ROR:
+ if (sizeof(T) == 8)
+ {
+ return (v0 << (64 - v1)) | (UINT64(v0) >> v1);
+ }
+ else
+ {
+ return (v0 << (32 - v1)) | (UINT32(v0) >> v1);
+ }
+
+ case GT_DIV:
+ case GT_MOD:
+ if (v1 == 0)
+ {
+ *pExcSet = VNExcSetSingleton(VNForFunc(TYP_REF, VNF_DivideByZeroExc));
+ }
+ else if (IsOverflowIntDiv(v0, v1))
+ {
+ *pExcSet = VNExcSetSingleton(VNForFunc(TYP_REF, VNF_ArithmeticExc));
+ return 0;
+ }
+ else // We are not dividing by Zero, so we can calculate the exact result.
+ {
+ // Perform the appropriate operation.
+ if (oper == GT_DIV)
+ {
+ return v0 / v1;
+ }
+ else // Must be GT_MOD
+ {
+ return v0 % v1;
+ }
+ }
+
+ case GT_UDIV:
+ case GT_UMOD:
+ if (v1 == 0)
+ {
+ *pExcSet = VNExcSetSingleton(VNForFunc(TYP_REF, VNF_DivideByZeroExc));
+ return 0;
+ }
+ else // We are not dividing by Zero, so we can calculate the exact result.
+ {
+ typedef typename jitstd::make_unsigned<T>::type UT;
+ // We need for force the source operands for the divide or mod operation
+ // to be considered unsigned.
+ //
+ if (oper == GT_UDIV)
+ {
+ // This is return unsigned(v0) / unsigned(v1) for both sizes of integers
+ return T(UT(v0) / UT(v1));
+ }
+ else // Must be GT_UMOD
+ {
+ // This is return unsigned(v0) % unsigned(v1) for both sizes of integers
+ return T(UT(v0) % UT(v1));
+ }
+ }
+ default:
+ unreached(); // NYI?
+ }
+}
+
+ValueNum ValueNumStore::VNExcSetSingleton(ValueNum x)
+{
+ ValueNum res = VNForFunc(TYP_REF, VNF_ExcSetCons, x, VNForEmptyExcSet());
+#ifdef DEBUG
+ if (m_pComp->verbose)
+ {
+ printf(" " STR_VN "%x = singleton exc set", res);
+ vnDump(m_pComp, x);
+ printf("\n");
+ }
+#endif
+ return res;
+}
+
+ValueNumPair ValueNumStore::VNPExcSetSingleton(ValueNumPair xp)
+{
+ return ValueNumPair(VNExcSetSingleton(xp.GetLiberal()), VNExcSetSingleton(xp.GetConservative()));
+}
+
+ValueNum ValueNumStore::VNExcSetUnion(ValueNum xs0, ValueNum xs1 DEBUGARG(bool topLevel))
+{
+ if (xs0 == VNForEmptyExcSet())
+ {
+ return xs1;
+ }
+ else if (xs1 == VNForEmptyExcSet())
+ {
+ return xs0;
+ }
+ else
+ {
+ VNFuncApp funcXs0;
+ bool b0 = GetVNFunc(xs0, &funcXs0);
+ assert(b0 && funcXs0.m_func == VNF_ExcSetCons); // Precondition: xs0 is an exception set.
+ VNFuncApp funcXs1;
+ bool b1 = GetVNFunc(xs1, &funcXs1);
+ assert(b1 && funcXs1.m_func == VNF_ExcSetCons); // Precondition: xs1 is an exception set.
+ ValueNum res = NoVN;
+ if (funcXs0.m_args[0] < funcXs1.m_args[0])
+ {
+ res = VNForFunc(TYP_REF, VNF_ExcSetCons, funcXs0.m_args[0],
+ VNExcSetUnion(funcXs0.m_args[1], xs1 DEBUGARG(false)));
+ }
+ else if (funcXs0.m_args[0] == funcXs1.m_args[0])
+ {
+ // Equal elements; only add one to the result.
+ res = VNExcSetUnion(funcXs0.m_args[1], xs1);
+ }
+ else
+ {
+ assert(funcXs0.m_args[0] > funcXs1.m_args[0]);
+ res = VNForFunc(TYP_REF, VNF_ExcSetCons, funcXs1.m_args[0],
+ VNExcSetUnion(xs0, funcXs1.m_args[1] DEBUGARG(false)));
+ }
+
+ return res;
+ }
+}
+
+ValueNumPair ValueNumStore::VNPExcSetUnion(ValueNumPair xs0vnp, ValueNumPair xs1vnp)
+{
+ return ValueNumPair(VNExcSetUnion(xs0vnp.GetLiberal(), xs1vnp.GetLiberal()),
+ VNExcSetUnion(xs0vnp.GetConservative(), xs1vnp.GetConservative()));
+}
+
+void ValueNumStore::VNUnpackExc(ValueNum vnWx, ValueNum* pvn, ValueNum* pvnx)
+{
+ assert(vnWx != NoVN);
+ VNFuncApp funcApp;
+ if (GetVNFunc(vnWx, &funcApp) && funcApp.m_func == VNF_ValWithExc)
+ {
+ *pvn = funcApp.m_args[0];
+ *pvnx = funcApp.m_args[1];
+ }
+ else
+ {
+ *pvn = vnWx;
+ }
+}
+
+void ValueNumStore::VNPUnpackExc(ValueNumPair vnWx, ValueNumPair* pvn, ValueNumPair* pvnx)
+{
+ VNUnpackExc(vnWx.GetLiberal(), pvn->GetLiberalAddr(), pvnx->GetLiberalAddr());
+ VNUnpackExc(vnWx.GetConservative(), pvn->GetConservativeAddr(), pvnx->GetConservativeAddr());
+}
+
+ValueNum ValueNumStore::VNNormVal(ValueNum vn)
+{
+ VNFuncApp funcApp;
+ if (GetVNFunc(vn, &funcApp) && funcApp.m_func == VNF_ValWithExc)
+ {
+ return funcApp.m_args[0];
+ }
+ else
+ {
+ return vn;
+ }
+}
+
+ValueNumPair ValueNumStore::VNPNormVal(ValueNumPair vnp)
+{
+ return ValueNumPair(VNNormVal(vnp.GetLiberal()), VNNormVal(vnp.GetConservative()));
+}
+
+ValueNum ValueNumStore::VNExcVal(ValueNum vn)
+{
+ VNFuncApp funcApp;
+ if (GetVNFunc(vn, &funcApp) && funcApp.m_func == VNF_ValWithExc)
+ {
+ return funcApp.m_args[1];
+ }
+ else
+ {
+ return VNForEmptyExcSet();
+ }
+}
+
+ValueNumPair ValueNumStore::VNPExcVal(ValueNumPair vnp)
+{
+ return ValueNumPair(VNExcVal(vnp.GetLiberal()), VNExcVal(vnp.GetConservative()));
+}
+
+// If vn "excSet" is not "VNForEmptyExcSet()", return "VNF_ValWithExc(vn, excSet)". Otherwise,
+// just return "vn".
+ValueNum ValueNumStore::VNWithExc(ValueNum vn, ValueNum excSet)
+{
+ if (excSet == VNForEmptyExcSet())
+ {
+ return vn;
+ }
+ else
+ {
+ ValueNum vnNorm;
+ ValueNum vnX = VNForEmptyExcSet();
+ VNUnpackExc(vn, &vnNorm, &vnX);
+ return VNForFunc(TypeOfVN(vnNorm), VNF_ValWithExc, vnNorm, VNExcSetUnion(vnX, excSet));
+ }
+}
+
+ValueNumPair ValueNumStore::VNPWithExc(ValueNumPair vnp, ValueNumPair excSetVNP)
+{
+ return ValueNumPair(VNWithExc(vnp.GetLiberal(), excSetVNP.GetLiberal()),
+ VNWithExc(vnp.GetConservative(), excSetVNP.GetConservative()));
+}
+
+bool ValueNumStore::IsKnownNonNull(ValueNum vn)
+{
+ if (vn == NoVN)
+ {
+ return false;
+ }
+ VNFuncApp funcAttr;
+ return GetVNFunc(vn, &funcAttr) && (s_vnfOpAttribs[funcAttr.m_func] & VNFOA_KnownNonNull) != 0;
+}
+
+bool ValueNumStore::IsSharedStatic(ValueNum vn)
+{
+ if (vn == NoVN)
+ {
+ return false;
+ }
+ VNFuncApp funcAttr;
+ return GetVNFunc(vn, &funcAttr) && (s_vnfOpAttribs[funcAttr.m_func] & VNFOA_SharedStatic) != 0;
+}
+
+ValueNumStore::Chunk::Chunk(
+ IAllocator* alloc, ValueNum* pNextBaseVN, var_types typ, ChunkExtraAttribs attribs, BasicBlock::loopNumber loopNum)
+ : m_defs(nullptr), m_numUsed(0), m_baseVN(*pNextBaseVN), m_typ(typ), m_attribs(attribs), m_loopNum(loopNum)
+{
+ // Allocate "m_defs" here, according to the typ/attribs pair.
+ switch (attribs)
+ {
+ case CEA_None:
+ break; // Nothing to do.
+ case CEA_Const:
+ switch (typ)
+ {
+ case TYP_INT:
+ m_defs = new (alloc) Alloc<TYP_INT>::Type[ChunkSize];
+ break;
+ case TYP_FLOAT:
+ m_defs = new (alloc) Alloc<TYP_FLOAT>::Type[ChunkSize];
+ break;
+ case TYP_LONG:
+ m_defs = new (alloc) Alloc<TYP_LONG>::Type[ChunkSize];
+ break;
+ case TYP_DOUBLE:
+ m_defs = new (alloc) Alloc<TYP_DOUBLE>::Type[ChunkSize];
+ break;
+ case TYP_BYREF:
+ m_defs = new (alloc) Alloc<TYP_BYREF>::Type[ChunkSize];
+ break;
+ case TYP_REF:
+ // We allocate space for a single REF constant, NULL, so we can access these values uniformly.
+ // Since this value is always the same, we represent it as a static.
+ m_defs = &s_specialRefConsts[0];
+ break; // Nothing to do.
+ default:
+ assert(false); // Should not reach here.
+ }
+ break;
+
+ case CEA_Handle:
+ m_defs = new (alloc) VNHandle[ChunkSize];
+ break;
+
+ case CEA_Func0:
+ m_defs = new (alloc) VNFunc[ChunkSize];
+ break;
+
+ case CEA_Func1:
+ m_defs = new (alloc) VNDefFunc1Arg[ChunkSize];
+ break;
+ case CEA_Func2:
+ m_defs = new (alloc) VNDefFunc2Arg[ChunkSize];
+ break;
+ case CEA_Func3:
+ m_defs = new (alloc) VNDefFunc3Arg[ChunkSize];
+ break;
+ case CEA_Func4:
+ m_defs = new (alloc) VNDefFunc4Arg[ChunkSize];
+ break;
+ default:
+ unreached();
+ }
+ *pNextBaseVN += ChunkSize;
+}
+
+ValueNumStore::Chunk* ValueNumStore::GetAllocChunk(var_types typ,
+ ChunkExtraAttribs attribs,
+ BasicBlock::loopNumber loopNum)
+{
+ Chunk* res;
+ unsigned index;
+ if (loopNum == MAX_LOOP_NUM)
+ {
+ // Loop nest is unknown/irrelevant for this VN.
+ index = attribs;
+ }
+ else
+ {
+ // Loop nest is interesting. Since we know this is only true for unique VNs, we know attribs will
+ // be CEA_None and can just index based on loop number.
+ noway_assert(attribs == CEA_None);
+ // Map NOT_IN_LOOP -> MAX_LOOP_NUM to make the index range contiguous [0..MAX_LOOP_NUM]
+ index = CEA_Count + (loopNum == BasicBlock::NOT_IN_LOOP ? MAX_LOOP_NUM : loopNum);
+ }
+ ChunkNum cn = m_curAllocChunk[typ][index];
+ if (cn != NoChunk)
+ {
+ res = m_chunks.Get(cn);
+ if (res->m_numUsed < ChunkSize)
+ {
+ return res;
+ }
+ }
+ // Otherwise, must allocate a new one.
+ res = new (m_alloc) Chunk(m_alloc, &m_nextChunkBase, typ, attribs, loopNum);
+ cn = m_chunks.Push(res);
+ m_curAllocChunk[typ][index] = cn;
+ return res;
+}
+
+ValueNum ValueNumStore::VNForIntCon(INT32 cnsVal)
+{
+ if (IsSmallIntConst(cnsVal))
+ {
+ unsigned ind = cnsVal - SmallIntConstMin;
+ ValueNum vn = m_VNsForSmallIntConsts[ind];
+ if (vn != NoVN)
+ {
+ return vn;
+ }
+ vn = GetVNForIntCon(cnsVal);
+ m_VNsForSmallIntConsts[ind] = vn;
+ return vn;
+ }
+ else
+ {
+ return GetVNForIntCon(cnsVal);
+ }
+}
+
+ValueNum ValueNumStore::VNForLongCon(INT64 cnsVal)
+{
+ ValueNum res;
+ if (GetLongCnsMap()->Lookup(cnsVal, &res))
+ {
+ return res;
+ }
+ else
+ {
+ Chunk* c = GetAllocChunk(TYP_LONG, CEA_Const);
+ unsigned offsetWithinChunk = c->AllocVN();
+ res = c->m_baseVN + offsetWithinChunk;
+ reinterpret_cast<INT64*>(c->m_defs)[offsetWithinChunk] = cnsVal;
+ GetLongCnsMap()->Set(cnsVal, res);
+ return res;
+ }
+}
+
+ValueNum ValueNumStore::VNForFloatCon(float cnsVal)
+{
+ ValueNum res;
+ if (GetFloatCnsMap()->Lookup(cnsVal, &res))
+ {
+ return res;
+ }
+ else
+ {
+ Chunk* c = GetAllocChunk(TYP_FLOAT, CEA_Const);
+ unsigned offsetWithinChunk = c->AllocVN();
+ res = c->m_baseVN + offsetWithinChunk;
+ reinterpret_cast<float*>(c->m_defs)[offsetWithinChunk] = cnsVal;
+ GetFloatCnsMap()->Set(cnsVal, res);
+ return res;
+ }
+}
+
+ValueNum ValueNumStore::VNForDoubleCon(double cnsVal)
+{
+ ValueNum res;
+ if (GetDoubleCnsMap()->Lookup(cnsVal, &res))
+ {
+ return res;
+ }
+ else
+ {
+ Chunk* c = GetAllocChunk(TYP_DOUBLE, CEA_Const);
+ unsigned offsetWithinChunk = c->AllocVN();
+ res = c->m_baseVN + offsetWithinChunk;
+ reinterpret_cast<double*>(c->m_defs)[offsetWithinChunk] = cnsVal;
+ GetDoubleCnsMap()->Set(cnsVal, res);
+ return res;
+ }
+}
+
+ValueNum ValueNumStore::VNForByrefCon(INT64 cnsVal)
+{
+ ValueNum res;
+ if (GetByrefCnsMap()->Lookup(cnsVal, &res))
+ {
+ return res;
+ }
+ else
+ {
+ Chunk* c = GetAllocChunk(TYP_BYREF, CEA_Const);
+ unsigned offsetWithinChunk = c->AllocVN();
+ res = c->m_baseVN + offsetWithinChunk;
+ reinterpret_cast<INT64*>(c->m_defs)[offsetWithinChunk] = cnsVal;
+ GetByrefCnsMap()->Set(cnsVal, res);
+ return res;
+ }
+}
+
+ValueNum ValueNumStore::VNForCastOper(var_types castToType, bool srcIsUnsigned /*=false*/)
+{
+ assert(castToType != TYP_STRUCT);
+ INT32 cnsVal = INT32(castToType) << INT32(VCA_BitCount);
+ assert((cnsVal & INT32(VCA_ReservedBits)) == 0);
+
+ if (srcIsUnsigned)
+ {
+ // We record the srcIsUnsigned by or-ing a 0x01
+ cnsVal |= INT32(VCA_UnsignedSrc);
+ }
+ ValueNum result = VNForIntCon(cnsVal);
+
+#ifdef DEBUG
+ if (m_pComp->verbose)
+ {
+ printf(" VNForCastOper(%s%s) is " STR_VN "%x\n", varTypeName(castToType),
+ srcIsUnsigned ? ", unsignedSrc" : "", result);
+ }
+#endif
+
+ return result;
+}
+
+ValueNum ValueNumStore::VNForHandle(ssize_t cnsVal, unsigned handleFlags)
+{
+ assert((handleFlags & ~GTF_ICON_HDL_MASK) == 0);
+
+ ValueNum res;
+ VNHandle handle;
+ VNHandle::Initialize(&handle, cnsVal, handleFlags);
+ if (GetHandleMap()->Lookup(handle, &res))
+ {
+ return res;
+ }
+ else
+ {
+ Chunk* c = GetAllocChunk(TYP_I_IMPL, CEA_Handle);
+ unsigned offsetWithinChunk = c->AllocVN();
+ res = c->m_baseVN + offsetWithinChunk;
+ reinterpret_cast<VNHandle*>(c->m_defs)[offsetWithinChunk] = handle;
+ GetHandleMap()->Set(handle, res);
+ return res;
+ }
+}
+
+// Returns the value number for zero of the given "typ".
+// It has an unreached() for a "typ" that has no zero value, such as TYP_BYREF.
+ValueNum ValueNumStore::VNZeroForType(var_types typ)
+{
+ switch (typ)
+ {
+ case TYP_BOOL:
+ case TYP_BYTE:
+ case TYP_UBYTE:
+ case TYP_CHAR:
+ case TYP_SHORT:
+ case TYP_USHORT:
+ case TYP_INT:
+ case TYP_UINT:
+ return VNForIntCon(0);
+ case TYP_LONG:
+ case TYP_ULONG:
+ return VNForLongCon(0);
+ case TYP_FLOAT:
+#if FEATURE_X87_DOUBLES
+ return VNForDoubleCon(0.0);
+#else
+ return VNForFloatCon(0.0f);
+#endif
+ case TYP_DOUBLE:
+ return VNForDoubleCon(0.0);
+ case TYP_REF:
+ case TYP_ARRAY:
+ return VNForNull();
+ case TYP_STRUCT:
+#ifdef FEATURE_SIMD
+ // TODO-CQ: Improve value numbering for SIMD types.
+ case TYP_SIMD8:
+ case TYP_SIMD12:
+ case TYP_SIMD16:
+ case TYP_SIMD32:
+#endif // FEATURE_SIMD
+ return VNForZeroMap(); // Recursion!
+
+ // These should be unreached.
+ default:
+ unreached(); // Should handle all types.
+ }
+}
+
+// Returns the value number for one of the given "typ".
+// It returns NoVN for a "typ" that has no one value, such as TYP_REF.
+ValueNum ValueNumStore::VNOneForType(var_types typ)
+{
+ switch (typ)
+ {
+ case TYP_BOOL:
+ case TYP_BYTE:
+ case TYP_UBYTE:
+ case TYP_CHAR:
+ case TYP_SHORT:
+ case TYP_USHORT:
+ case TYP_INT:
+ case TYP_UINT:
+ return VNForIntCon(1);
+ case TYP_LONG:
+ case TYP_ULONG:
+ return VNForLongCon(1);
+ case TYP_FLOAT:
+ return VNForFloatCon(1.0f);
+ case TYP_DOUBLE:
+ return VNForDoubleCon(1.0);
+
+ default:
+ return NoVN;
+ }
+}
+
+class Object* ValueNumStore::s_specialRefConsts[] = {nullptr, nullptr, nullptr};
+
+// Nullary operators (i.e., symbolic constants).
+ValueNum ValueNumStore::VNForFunc(var_types typ, VNFunc func)
+{
+ assert(VNFuncArity(func) == 0);
+
+ ValueNum res;
+
+ if (GetVNFunc0Map()->Lookup(func, &res))
+ {
+ return res;
+ }
+ else
+ {
+ Chunk* c = GetAllocChunk(typ, CEA_Func0);
+ unsigned offsetWithinChunk = c->AllocVN();
+ res = c->m_baseVN + offsetWithinChunk;
+ reinterpret_cast<VNFunc*>(c->m_defs)[offsetWithinChunk] = func;
+ GetVNFunc0Map()->Set(func, res);
+ return res;
+ }
+}
+
+ValueNum ValueNumStore::VNForFunc(var_types typ, VNFunc func, ValueNum arg0VN)
+{
+ assert(arg0VN == VNNormVal(arg0VN)); // Arguments don't carry exceptions.
+
+ ValueNum res;
+ VNDefFunc1Arg fstruct(func, arg0VN);
+
+ // Do constant-folding.
+ if (CanEvalForConstantArgs(func) && IsVNConstant(arg0VN))
+ {
+ return EvalFuncForConstantArgs(typ, func, arg0VN);
+ }
+
+ if (GetVNFunc1Map()->Lookup(fstruct, &res))
+ {
+ return res;
+ }
+ else
+ {
+ // Otherwise, create a new VN for this application.
+ Chunk* c = GetAllocChunk(typ, CEA_Func1);
+ unsigned offsetWithinChunk = c->AllocVN();
+ res = c->m_baseVN + offsetWithinChunk;
+ reinterpret_cast<VNDefFunc1Arg*>(c->m_defs)[offsetWithinChunk] = fstruct;
+ GetVNFunc1Map()->Set(fstruct, res);
+ return res;
+ }
+}
+
+ValueNum ValueNumStore::VNForFunc(var_types typ, VNFunc func, ValueNum arg0VN, ValueNum arg1VN)
+{
+ assert(arg0VN != NoVN && arg1VN != NoVN);
+ assert(arg0VN == VNNormVal(arg0VN)); // Arguments carry no exceptions.
+ assert(arg1VN == VNNormVal(arg1VN)); // Arguments carry no exceptions.
+ assert(VNFuncArity(func) == 2);
+ assert(func != VNF_MapSelect); // Precondition: use the special function VNForMapSelect defined for that.
+
+ ValueNum res;
+
+ // Do constant-folding.
+ if (CanEvalForConstantArgs(func) && IsVNConstant(arg0VN) && IsVNConstant(arg1VN))
+ {
+ bool canFold = true; // Normally we will be able to fold this 'func'
+
+ // Special case for VNF_Cast of constant handles
+ // Don't allow eval/fold of a GT_CAST(non-I_IMPL, Handle)
+ //
+ if ((func == VNF_Cast) && (typ != TYP_I_IMPL) && IsVNHandle(arg0VN))
+ {
+ canFold = false;
+ }
+
+ // It is possible for us to have mismatched types (see Bug 750863)
+ // We don't try to fold a binary operation when one of the constant operands
+ // is a floating-point constant and the other is not.
+ //
+ bool arg0IsFloating = varTypeIsFloating(TypeOfVN(arg0VN));
+ bool arg1IsFloating = varTypeIsFloating(TypeOfVN(arg1VN));
+ if (arg0IsFloating != arg1IsFloating)
+ {
+ canFold = false;
+ }
+
+ // NaNs are unordered wrt to other floats. While an ordered
+ // comparison would return false, an unordered comparison
+ // will return true if any operands are a NaN. We only perform
+ // ordered NaN comparison in EvalComparison.
+ if ((arg0IsFloating && _isnan(GetConstantDouble(arg0VN))) ||
+ (arg1IsFloating && _isnan(GetConstantDouble(arg1VN))))
+ {
+ canFold = false;
+ }
+
+ if (canFold)
+ {
+ return EvalFuncForConstantArgs(typ, func, arg0VN, arg1VN);
+ }
+ }
+ // We canonicalize commutative operations.
+ // (Perhaps should eventually handle associative/commutative [AC] ops -- but that gets complicated...)
+ if (VNFuncIsCommutative(func))
+ {
+ // Order arg0 arg1 by numerical VN value.
+ if (arg0VN > arg1VN)
+ {
+ jitstd::swap(arg0VN, arg1VN);
+ }
+ }
+ VNDefFunc2Arg fstruct(func, arg0VN, arg1VN);
+ if (GetVNFunc2Map()->Lookup(fstruct, &res))
+ {
+ return res;
+ }
+ else
+ {
+ // We have ways of evaluating some binary functions.
+ if (func < VNF_Boundary)
+ {
+ if (typ != TYP_BYREF) // We don't want/need to optimize a zero byref
+ {
+ genTreeOps oper = genTreeOps(func);
+ ValueNum ZeroVN, OneVN; // We may need to create one of these in the switch below.
+ switch (oper)
+ {
+ case GT_ADD:
+ // This identity does not apply for floating point (when x == -0.0)
+ if (!varTypeIsFloating(typ))
+ {
+ // (x + 0) == (0 + x) => x
+ ZeroVN = VNZeroForType(typ);
+ if (arg0VN == ZeroVN)
+ {
+ return arg1VN;
+ }
+ else if (arg1VN == ZeroVN)
+ {
+ return arg0VN;
+ }
+ }
+ break;
+
+ case GT_SUB:
+ // (x - 0) => x
+ ZeroVN = VNZeroForType(typ);
+ if (arg1VN == ZeroVN)
+ {
+ return arg0VN;
+ }
+ break;
+
+ case GT_MUL:
+ // (x * 1) == (1 * x) => x
+ OneVN = VNOneForType(typ);
+ if (OneVN != NoVN)
+ {
+ if (arg0VN == OneVN)
+ {
+ return arg1VN;
+ }
+ else if (arg1VN == OneVN)
+ {
+ return arg0VN;
+ }
+ }
+
+ if (!varTypeIsFloating(typ))
+ {
+ // (x * 0) == (0 * x) => 0 (unless x is NaN, which we must assume a fp value may be)
+ ZeroVN = VNZeroForType(typ);
+ if (arg0VN == ZeroVN)
+ {
+ return ZeroVN;
+ }
+ else if (arg1VN == ZeroVN)
+ {
+ return ZeroVN;
+ }
+ }
+ break;
+
+ case GT_DIV:
+ case GT_UDIV:
+ // (x / 1) => x
+ OneVN = VNOneForType(typ);
+ if (OneVN != NoVN)
+ {
+ if (arg1VN == OneVN)
+ {
+ return arg0VN;
+ }
+ }
+ break;
+
+ case GT_OR:
+ case GT_XOR:
+ // (x | 0) == (0 | x) => x
+ // (x ^ 0) == (0 ^ x) => x
+ ZeroVN = VNZeroForType(typ);
+ if (arg0VN == ZeroVN)
+ {
+ return arg1VN;
+ }
+ else if (arg1VN == ZeroVN)
+ {
+ return arg0VN;
+ }
+ break;
+
+ case GT_AND:
+ // (x & 0) == (0 & x) => 0
+ ZeroVN = VNZeroForType(typ);
+ if (arg0VN == ZeroVN)
+ {
+ return ZeroVN;
+ }
+ else if (arg1VN == ZeroVN)
+ {
+ return ZeroVN;
+ }
+ break;
+
+ case GT_LSH:
+ case GT_RSH:
+ case GT_RSZ:
+ case GT_ROL:
+ case GT_ROR:
+ // (x << 0) => x
+ // (x >> 0) => x
+ // (x rol 0) => x
+ // (x ror 0) => x
+ ZeroVN = VNZeroForType(typ);
+ if (arg1VN == ZeroVN)
+ {
+ return arg0VN;
+ }
+ break;
+
+ case GT_EQ:
+ // (x == x) => true (unless x is NaN)
+ if (!varTypeIsFloating(TypeOfVN(arg0VN)) && (arg0VN != NoVN) && (arg0VN == arg1VN))
+ {
+ return VNOneForType(typ);
+ }
+ if ((arg0VN == VNForNull() && IsKnownNonNull(arg1VN)) ||
+ (arg1VN == VNForNull() && IsKnownNonNull(arg0VN)))
+ {
+ return VNZeroForType(typ);
+ }
+ break;
+ case GT_NE:
+ // (x != x) => false (unless x is NaN)
+ if (!varTypeIsFloating(TypeOfVN(arg0VN)) && (arg0VN != NoVN) && (arg0VN == arg1VN))
+ {
+ return VNZeroForType(typ);
+ }
+ if ((arg0VN == VNForNull() && IsKnownNonNull(arg1VN)) ||
+ (arg1VN == VNForNull() && IsKnownNonNull(arg0VN)))
+ {
+ return VNOneForType(typ);
+ }
+ break;
+
+ default:
+ break;
+ }
+ }
+ }
+ else // must be a VNF_ function
+ {
+ if (func == VNF_CastClass)
+ {
+ // In terms of values, a castclass always returns its second argument, the object being cast.
+ // The IL operation may also throw an exception
+ return VNWithExc(arg1VN, VNExcSetSingleton(VNForFunc(TYP_REF, VNF_InvalidCastExc, arg1VN, arg0VN)));
+ }
+ }
+
+ // Otherwise, assign a new VN for the function application.
+ Chunk* c = GetAllocChunk(typ, CEA_Func2);
+ unsigned offsetWithinChunk = c->AllocVN();
+ res = c->m_baseVN + offsetWithinChunk;
+ reinterpret_cast<VNDefFunc2Arg*>(c->m_defs)[offsetWithinChunk] = fstruct;
+ GetVNFunc2Map()->Set(fstruct, res);
+ return res;
+ }
+}
+
+//------------------------------------------------------------------------------
+// VNForMapStore : Evaluate VNF_MapStore with the given arguments.
+//
+//
+// Arguments:
+// typ - Value type
+// arg0VN - Map value number
+// arg1VN - Index value number
+// arg2VN - New value for map[index]
+//
+// Return Value:
+// Value number for the result of the evaluation.
+
+ValueNum ValueNumStore::VNForMapStore(var_types typ, ValueNum arg0VN, ValueNum arg1VN, ValueNum arg2VN)
+{
+ ValueNum result = VNForFunc(typ, VNF_MapStore, arg0VN, arg1VN, arg2VN);
+#ifdef DEBUG
+ if (m_pComp->verbose)
+ {
+ printf(" VNForMapStore(" STR_VN "%x, " STR_VN "%x, " STR_VN "%x):%s returns ", arg0VN, arg1VN, arg2VN,
+ varTypeName(typ));
+ m_pComp->vnPrint(result, 1);
+ printf("\n");
+ }
+#endif
+ return result;
+}
+
+//------------------------------------------------------------------------------
+// VNForMapSelect : Evaluate VNF_MapSelect with the given arguments.
+//
+//
+// Arguments:
+// vnk - Value number kind
+// typ - Value type
+// arg0VN - Map value number
+// arg1VN - Index value number
+//
+// Return Value:
+// Value number for the result of the evaluation.
+//
+// Notes:
+// This requires a "ValueNumKind" because it will attempt, given "select(phi(m1, ..., mk), ind)", to evaluate
+// "select(m1, ind)", ..., "select(mk, ind)" to see if they agree. It needs to know which kind of value number
+// (liberal/conservative) to read from the SSA def referenced in the phi argument.
+
+ValueNum ValueNumStore::VNForMapSelect(ValueNumKind vnk, var_types typ, ValueNum arg0VN, ValueNum arg1VN)
+{
+ unsigned budget = m_mapSelectBudget;
+ bool usedRecursiveVN = false;
+ ValueNum result = VNForMapSelectWork(vnk, typ, arg0VN, arg1VN, &budget, &usedRecursiveVN);
+#ifdef DEBUG
+ if (m_pComp->verbose)
+ {
+ printf(" VNForMapSelect(" STR_VN "%x, " STR_VN "%x):%s returns ", arg0VN, arg1VN, varTypeName(typ));
+ m_pComp->vnPrint(result, 1);
+ printf("\n");
+ }
+#endif
+ return result;
+}
+
+//------------------------------------------------------------------------------
+// VNForMapSelectWork : A method that does the work for VNForMapSelect and may call itself recursively.
+//
+//
+// Arguments:
+// vnk - Value number kind
+// typ - Value type
+// arg0VN - Zeroth argument
+// arg1VN - First argument
+// pBudget - Remaining budget for the outer evaluation
+// pUsedRecursiveVN - Out-parameter that is set to true iff RecursiveVN was returned from this method
+// or from a method called during one of recursive invocations.
+//
+// Return Value:
+// Value number for the result of the evaluation.
+//
+// Notes:
+// This requires a "ValueNumKind" because it will attempt, given "select(phi(m1, ..., mk), ind)", to evaluate
+// "select(m1, ind)", ..., "select(mk, ind)" to see if they agree. It needs to know which kind of value number
+// (liberal/conservative) to read from the SSA def referenced in the phi argument.
+
+ValueNum ValueNumStore::VNForMapSelectWork(
+ ValueNumKind vnk, var_types typ, ValueNum arg0VN, ValueNum arg1VN, unsigned* pBudget, bool* pUsedRecursiveVN)
+{
+TailCall:
+ // This label allows us to directly implement a tail call by setting up the arguments, and doing a goto to here.
+ assert(arg0VN != NoVN && arg1VN != NoVN);
+ assert(arg0VN == VNNormVal(arg0VN)); // Arguments carry no exceptions.
+ assert(arg1VN == VNNormVal(arg1VN)); // Arguments carry no exceptions.
+
+ *pUsedRecursiveVN = false;
+
+#ifdef DEBUG
+ // Provide a mechanism for writing tests that ensure we don't call this ridiculously often.
+ m_numMapSels++;
+#if 1
+// This printing is sometimes useful in debugging.
+// if ((m_numMapSels % 1000) == 0) printf("%d VNF_MapSelect applications.\n", m_numMapSels);
+#endif
+ unsigned selLim = JitConfig.JitVNMapSelLimit();
+ assert(selLim == 0 || m_numMapSels < selLim);
+#endif
+ ValueNum res;
+
+ VNDefFunc2Arg fstruct(VNF_MapSelect, arg0VN, arg1VN);
+ if (GetVNFunc2Map()->Lookup(fstruct, &res))
+ {
+ return res;
+ }
+ else
+ {
+
+ // Give up if we've run out of budget.
+ if (--(*pBudget) == 0)
+ {
+ // We have to use 'nullptr' for the basic block here, because subsequent expressions
+ // in different blocks may find this result in the VNFunc2Map -- other expressions in
+ // the IR may "evaluate" to this same VNForExpr, so it is not "unique" in the sense
+ // that permits the BasicBlock attribution.
+ res = VNForExpr(nullptr, typ);
+ GetVNFunc2Map()->Set(fstruct, res);
+ return res;
+ }
+
+ // If it's recursive, stop the recursion.
+ if (SelectIsBeingEvaluatedRecursively(arg0VN, arg1VN))
+ {
+ *pUsedRecursiveVN = true;
+ return RecursiveVN;
+ }
+
+ if (arg0VN == VNForZeroMap())
+ {
+ return VNZeroForType(typ);
+ }
+ else if (IsVNFunc(arg0VN))
+ {
+ VNFuncApp funcApp;
+ GetVNFunc(arg0VN, &funcApp);
+ if (funcApp.m_func == VNF_MapStore)
+ {
+ // select(store(m, i, v), i) == v
+ if (funcApp.m_args[1] == arg1VN)
+ {
+#if FEATURE_VN_TRACE_APPLY_SELECTORS
+ JITDUMP(" AX1: select([" STR_VN "%x]store(" STR_VN "%x, " STR_VN "%x, " STR_VN "%x), " STR_VN
+ "%x) ==> " STR_VN "%x.\n",
+ funcApp.m_args[0], arg0VN, funcApp.m_args[1], funcApp.m_args[2], arg1VN, funcApp.m_args[2]);
+#endif
+ return funcApp.m_args[2];
+ }
+ // i # j ==> select(store(m, i, v), j) == select(m, j)
+ // Currently the only source of distinctions is when both indices are constants.
+ else if (IsVNConstant(arg1VN) && IsVNConstant(funcApp.m_args[1]))
+ {
+ assert(funcApp.m_args[1] != arg1VN); // we already checked this above.
+#if FEATURE_VN_TRACE_APPLY_SELECTORS
+ JITDUMP(" AX2: " STR_VN "%x != " STR_VN "%x ==> select([" STR_VN "%x]store(" STR_VN
+ "%x, " STR_VN "%x, " STR_VN "%x), " STR_VN "%x) ==> select(" STR_VN "%x, " STR_VN "%x).\n",
+ arg1VN, funcApp.m_args[1], arg0VN, funcApp.m_args[0], funcApp.m_args[1], funcApp.m_args[2],
+ arg1VN, funcApp.m_args[0], arg1VN);
+#endif
+ // This is the equivalent of the recursive tail call:
+ // return VNForMapSelect(vnk, typ, funcApp.m_args[0], arg1VN);
+ // Make sure we capture any exceptions from the "i" and "v" of the store...
+ arg0VN = funcApp.m_args[0];
+ goto TailCall;
+ }
+ }
+ else if (funcApp.m_func == VNF_PhiDef || funcApp.m_func == VNF_PhiHeapDef)
+ {
+ unsigned lclNum = BAD_VAR_NUM;
+ bool isHeap = false;
+ VNFuncApp phiFuncApp;
+ bool defArgIsFunc = false;
+ if (funcApp.m_func == VNF_PhiDef)
+ {
+ lclNum = unsigned(funcApp.m_args[0]);
+ defArgIsFunc = GetVNFunc(funcApp.m_args[2], &phiFuncApp);
+ }
+ else
+ {
+ assert(funcApp.m_func == VNF_PhiHeapDef);
+ isHeap = true;
+ defArgIsFunc = GetVNFunc(funcApp.m_args[1], &phiFuncApp);
+ }
+ if (defArgIsFunc && phiFuncApp.m_func == VNF_Phi)
+ {
+ // select(phi(m1, m2), x): if select(m1, x) == select(m2, x), return that, else new fresh.
+ // Get the first argument of the phi.
+
+ // We need to be careful about breaking infinite recursion. Record the outer select.
+ m_fixedPointMapSels.Push(VNDefFunc2Arg(VNF_MapSelect, arg0VN, arg1VN));
+
+ assert(IsVNConstant(phiFuncApp.m_args[0]));
+ unsigned phiArgSsaNum = ConstantValue<unsigned>(phiFuncApp.m_args[0]);
+ ValueNum phiArgVN;
+ if (isHeap)
+ {
+ phiArgVN = m_pComp->GetHeapPerSsaData(phiArgSsaNum)->m_vnPair.Get(vnk);
+ }
+ else
+ {
+ phiArgVN = m_pComp->lvaTable[lclNum].GetPerSsaData(phiArgSsaNum)->m_vnPair.Get(vnk);
+ }
+ if (phiArgVN != ValueNumStore::NoVN)
+ {
+ bool allSame = true;
+ ValueNum argRest = phiFuncApp.m_args[1];
+ ValueNum sameSelResult =
+ VNForMapSelectWork(vnk, typ, phiArgVN, arg1VN, pBudget, pUsedRecursiveVN);
+ while (allSame && argRest != ValueNumStore::NoVN)
+ {
+ ValueNum cur = argRest;
+ VNFuncApp phiArgFuncApp;
+ if (GetVNFunc(argRest, &phiArgFuncApp) && phiArgFuncApp.m_func == VNF_Phi)
+ {
+ cur = phiArgFuncApp.m_args[0];
+ argRest = phiArgFuncApp.m_args[1];
+ }
+ else
+ {
+ argRest = ValueNumStore::NoVN; // Cause the loop to terminate.
+ }
+ assert(IsVNConstant(cur));
+ phiArgSsaNum = ConstantValue<unsigned>(cur);
+ if (isHeap)
+ {
+ phiArgVN = m_pComp->GetHeapPerSsaData(phiArgSsaNum)->m_vnPair.Get(vnk);
+ }
+ else
+ {
+ phiArgVN = m_pComp->lvaTable[lclNum].GetPerSsaData(phiArgSsaNum)->m_vnPair.Get(vnk);
+ }
+ if (phiArgVN == ValueNumStore::NoVN)
+ {
+ allSame = false;
+ }
+ else
+ {
+ bool usedRecursiveVN = false;
+ ValueNum curResult =
+ VNForMapSelectWork(vnk, typ, phiArgVN, arg1VN, pBudget, &usedRecursiveVN);
+ *pUsedRecursiveVN |= usedRecursiveVN;
+ if (sameSelResult == ValueNumStore::RecursiveVN)
+ {
+ sameSelResult = curResult;
+ }
+ if (curResult != ValueNumStore::RecursiveVN && curResult != sameSelResult)
+ {
+ allSame = false;
+ }
+ }
+ }
+ if (allSame && sameSelResult != ValueNumStore::RecursiveVN)
+ {
+ // Make sure we're popping what we pushed.
+ assert(FixedPointMapSelsTopHasValue(arg0VN, arg1VN));
+ m_fixedPointMapSels.Pop();
+
+ // To avoid exponential searches, we make sure that this result is memo-ized.
+ // The result is always valid for memoization if we didn't rely on RecursiveVN to get it.
+ // If RecursiveVN was used, we are processing a loop and we can't memo-ize this intermediate
+ // result if, e.g., this block is in a multi-entry loop.
+ if (!*pUsedRecursiveVN)
+ {
+ GetVNFunc2Map()->Set(fstruct, sameSelResult);
+ }
+
+ return sameSelResult;
+ }
+ // Otherwise, fall through to creating the select(phi(m1, m2), x) function application.
+ }
+ // Make sure we're popping what we pushed.
+ assert(FixedPointMapSelsTopHasValue(arg0VN, arg1VN));
+ m_fixedPointMapSels.Pop();
+ }
+ }
+ }
+
+ // Otherwise, assign a new VN for the function application.
+ Chunk* c = GetAllocChunk(typ, CEA_Func2);
+ unsigned offsetWithinChunk = c->AllocVN();
+ res = c->m_baseVN + offsetWithinChunk;
+ reinterpret_cast<VNDefFunc2Arg*>(c->m_defs)[offsetWithinChunk] = fstruct;
+ GetVNFunc2Map()->Set(fstruct, res);
+ return res;
+ }
+}
+
+ValueNum ValueNumStore::EvalFuncForConstantArgs(var_types typ, VNFunc func, ValueNum arg0VN)
+{
+ assert(CanEvalForConstantArgs(func));
+ assert(IsVNConstant(arg0VN));
+ switch (TypeOfVN(arg0VN))
+ {
+ case TYP_INT:
+ {
+ int resVal = EvalOp(func, ConstantValue<int>(arg0VN));
+ // Unary op on a handle results in a handle.
+ return IsVNHandle(arg0VN) ? VNForHandle(ssize_t(resVal), GetHandleFlags(arg0VN)) : VNForIntCon(resVal);
+ }
+ case TYP_LONG:
+ {
+ INT64 resVal = EvalOp(func, ConstantValue<INT64>(arg0VN));
+ // Unary op on a handle results in a handle.
+ return IsVNHandle(arg0VN) ? VNForHandle(ssize_t(resVal), GetHandleFlags(arg0VN)) : VNForLongCon(resVal);
+ }
+ case TYP_FLOAT:
+ return VNForFloatCon(EvalOp(func, ConstantValue<float>(arg0VN)));
+ case TYP_DOUBLE:
+ return VNForDoubleCon(EvalOp(func, ConstantValue<double>(arg0VN)));
+ case TYP_REF:
+ // If arg0 has a possible exception, it wouldn't have been constant.
+ assert(!VNHasExc(arg0VN));
+ // Otherwise...
+ assert(arg0VN == VNForNull()); // Only other REF constant.
+ assert(func == VNFunc(GT_ARR_LENGTH)); // Only function we can apply to a REF constant!
+ return VNWithExc(VNForVoid(), VNExcSetSingleton(VNForFunc(TYP_REF, VNF_NullPtrExc, VNForNull())));
+ default:
+ unreached();
+ }
+}
+
+bool ValueNumStore::SelectIsBeingEvaluatedRecursively(ValueNum map, ValueNum ind)
+{
+ for (unsigned i = 0; i < m_fixedPointMapSels.Size(); i++)
+ {
+ VNDefFunc2Arg& elem = m_fixedPointMapSels.GetRef(i);
+ assert(elem.m_func == VNF_MapSelect);
+ if (elem.m_arg0 == map && elem.m_arg1 == ind)
+ {
+ return true;
+ }
+ }
+ return false;
+}
+
+#ifdef DEBUG
+bool ValueNumStore::FixedPointMapSelsTopHasValue(ValueNum map, ValueNum index)
+{
+ if (m_fixedPointMapSels.Size() == 0)
+ {
+ return false;
+ }
+ VNDefFunc2Arg& top = m_fixedPointMapSels.TopRef();
+ return top.m_func == VNF_MapSelect && top.m_arg0 == map && top.m_arg1 == index;
+}
+#endif
+
+// Given an integer constant value number return its value as an int.
+//
+int ValueNumStore::GetConstantInt32(ValueNum argVN)
+{
+ assert(IsVNConstant(argVN));
+ var_types argVNtyp = TypeOfVN(argVN);
+
+ int result = 0;
+
+ switch (argVNtyp)
+ {
+ case TYP_INT:
+ result = ConstantValue<int>(argVN);
+ break;
+#ifndef _TARGET_64BIT_
+ case TYP_REF:
+ case TYP_BYREF:
+ result = (int)ConstantValue<size_t>(argVN);
+ break;
+#endif
+ default:
+ unreached();
+ }
+ return result;
+}
+
+// Given an integer constant value number return its value as an INT64.
+//
+INT64 ValueNumStore::GetConstantInt64(ValueNum argVN)
+{
+ assert(IsVNConstant(argVN));
+ var_types argVNtyp = TypeOfVN(argVN);
+
+ INT64 result = 0;
+
+ switch (argVNtyp)
+ {
+ case TYP_INT:
+ result = (INT64)ConstantValue<int>(argVN);
+ break;
+ case TYP_LONG:
+ result = ConstantValue<INT64>(argVN);
+ break;
+ case TYP_REF:
+ case TYP_BYREF:
+ result = (INT64)ConstantValue<size_t>(argVN);
+ break;
+ default:
+ unreached();
+ }
+ return result;
+}
+
+// Given a float or a double constant value number return its value as a double.
+//
+double ValueNumStore::GetConstantDouble(ValueNum argVN)
+{
+ assert(IsVNConstant(argVN));
+ var_types argVNtyp = TypeOfVN(argVN);
+
+ double result = 0;
+
+ switch (argVNtyp)
+ {
+ case TYP_FLOAT:
+ result = (double)ConstantValue<float>(argVN);
+ break;
+ case TYP_DOUBLE:
+ result = ConstantValue<double>(argVN);
+ break;
+ default:
+ unreached();
+ }
+ return result;
+}
+
+// Compute the proper value number when the VNFunc has all constant arguments
+// This essentially performs constant folding at value numbering time
+//
+ValueNum ValueNumStore::EvalFuncForConstantArgs(var_types typ, VNFunc func, ValueNum arg0VN, ValueNum arg1VN)
+{
+ assert(CanEvalForConstantArgs(func));
+ assert(IsVNConstant(arg0VN) && IsVNConstant(arg1VN));
+ assert(!VNHasExc(arg0VN) && !VNHasExc(arg1VN)); // Otherwise, would not be constant.
+
+ // if our func is the VNF_Cast operation we handle it first
+ if (func == VNF_Cast)
+ {
+ return EvalCastForConstantArgs(typ, func, arg0VN, arg1VN);
+ }
+
+ if (typ == TYP_BYREF)
+ {
+ // We don't want to fold expressions that produce TYP_BYREF
+ return false;
+ }
+
+ var_types arg0VNtyp = TypeOfVN(arg0VN);
+ var_types arg1VNtyp = TypeOfVN(arg1VN);
+
+ // When both arguments are floating point types
+ // We defer to the EvalFuncForConstantFPArgs()
+ if (varTypeIsFloating(arg0VNtyp) && varTypeIsFloating(arg1VNtyp))
+ {
+ return EvalFuncForConstantFPArgs(typ, func, arg0VN, arg1VN);
+ }
+
+ // after this we shouldn't have to deal with floating point types for arg0VN or arg1VN
+ assert(!varTypeIsFloating(arg0VNtyp));
+ assert(!varTypeIsFloating(arg1VNtyp));
+
+ // Stack-normalize the result type.
+ if (varTypeIsSmall(typ))
+ {
+ typ = TYP_INT;
+ }
+
+ ValueNum result; // left uninitialized, we are required to initialize it on all paths below.
+ ValueNum excSet = VNForEmptyExcSet();
+
+ // Are both args of the same type?
+ if (arg0VNtyp == arg1VNtyp)
+ {
+ if (arg0VNtyp == TYP_INT)
+ {
+ int arg0Val = ConstantValue<int>(arg0VN);
+ int arg1Val = ConstantValue<int>(arg1VN);
+
+ assert(typ == TYP_INT);
+ int resultVal = EvalOp(func, arg0Val, arg1Val, &excSet);
+ // Bin op on a handle results in a handle.
+ ValueNum handleVN = IsVNHandle(arg0VN) ? arg0VN : IsVNHandle(arg1VN) ? arg1VN : NoVN;
+ ValueNum resultVN = (handleVN != NoVN)
+ ? VNForHandle(ssize_t(resultVal), GetHandleFlags(handleVN)) // Use VN for Handle
+ : VNForIntCon(resultVal);
+ result = VNWithExc(resultVN, excSet);
+ }
+ else if (arg0VNtyp == TYP_LONG)
+ {
+ INT64 arg0Val = ConstantValue<INT64>(arg0VN);
+ INT64 arg1Val = ConstantValue<INT64>(arg1VN);
+
+ if (VNFuncIsComparison(func))
+ {
+ assert(typ == TYP_INT);
+ result = VNForIntCon(EvalComparison(func, arg0Val, arg1Val));
+ }
+ else
+ {
+ assert(typ == TYP_LONG);
+ INT64 resultVal = EvalOp(func, arg0Val, arg1Val, &excSet);
+ ValueNum handleVN = IsVNHandle(arg0VN) ? arg0VN : IsVNHandle(arg1VN) ? arg1VN : NoVN;
+ ValueNum resultVN = (handleVN != NoVN)
+ ? VNForHandle(ssize_t(resultVal), GetHandleFlags(handleVN)) // Use VN for Handle
+ : VNForLongCon(resultVal);
+ result = VNWithExc(resultVN, excSet);
+ }
+ }
+ else // both args are TYP_REF or both args are TYP_BYREF
+ {
+ INT64 arg0Val = ConstantValue<size_t>(arg0VN); // We represent ref/byref constants as size_t's.
+ INT64 arg1Val = ConstantValue<size_t>(arg1VN); // Also we consider null to be zero.
+
+ if (VNFuncIsComparison(func))
+ {
+ assert(typ == TYP_INT);
+ result = VNForIntCon(EvalComparison(func, arg0Val, arg1Val));
+ }
+ else if (typ == TYP_INT) // We could see GT_OR of a constant ByRef and Null
+ {
+ int resultVal = (int)EvalOp(func, arg0Val, arg1Val, &excSet);
+ result = VNWithExc(VNForIntCon(resultVal), excSet);
+ }
+ else // We could see GT_OR of a constant ByRef and Null
+ {
+ assert((typ == TYP_BYREF) || (typ == TYP_LONG));
+ INT64 resultVal = EvalOp(func, arg0Val, arg1Val, &excSet);
+ result = VNWithExc(VNForByrefCon(resultVal), excSet);
+ }
+ }
+ }
+ else // We have args of different types
+ {
+ // We represent ref/byref constants as size_t's.
+ // Also we consider null to be zero.
+ //
+ INT64 arg0Val = GetConstantInt64(arg0VN);
+ INT64 arg1Val = GetConstantInt64(arg1VN);
+
+ if (VNFuncIsComparison(func))
+ {
+ assert(typ == TYP_INT);
+ result = VNForIntCon(EvalComparison(func, arg0Val, arg1Val));
+ }
+ else if (typ == TYP_INT) // We could see GT_OR of an int and constant ByRef or Null
+ {
+ int resultVal = (int)EvalOp(func, arg0Val, arg1Val, &excSet);
+ result = VNWithExc(VNForIntCon(resultVal), excSet);
+ }
+ else
+ {
+ assert(typ != TYP_INT);
+ ValueNum resultValx = VNForEmptyExcSet();
+ INT64 resultVal = EvalOp(func, arg0Val, arg1Val, &resultValx);
+
+ // check for the Exception case
+ if (resultValx != VNForEmptyExcSet())
+ {
+ result = VNWithExc(VNForVoid(), resultValx);
+ }
+ else
+ {
+ switch (typ)
+ {
+ case TYP_BYREF:
+ result = VNForByrefCon(resultVal);
+ break;
+ case TYP_LONG:
+ result = VNForLongCon(resultVal);
+ break;
+ case TYP_REF:
+ assert(resultVal == 0); // Only valid REF constant
+ result = VNForNull();
+ break;
+ default:
+ unreached();
+ }
+ }
+ }
+ }
+
+ return result;
+}
+
+// Compute the proper value number when the VNFunc has all constant floating-point arguments
+// This essentially must perform constant folding at value numbering time
+//
+ValueNum ValueNumStore::EvalFuncForConstantFPArgs(var_types typ, VNFunc func, ValueNum arg0VN, ValueNum arg1VN)
+{
+ assert(CanEvalForConstantArgs(func));
+ assert(IsVNConstant(arg0VN) && IsVNConstant(arg1VN));
+
+ // We expect both argument types to be floating point types
+ var_types arg0VNtyp = TypeOfVN(arg0VN);
+ var_types arg1VNtyp = TypeOfVN(arg1VN);
+
+ assert(varTypeIsFloating(arg0VNtyp));
+ assert(varTypeIsFloating(arg1VNtyp));
+
+ double arg0Val = GetConstantDouble(arg0VN);
+ double arg1Val = GetConstantDouble(arg1VN);
+
+ ValueNum result; // left uninitialized, we are required to initialize it on all paths below.
+
+ if (VNFuncIsComparison(func))
+ {
+ assert(genActualType(typ) == TYP_INT);
+ result = VNForIntCon(EvalComparison(func, arg0Val, arg1Val));
+ }
+ else
+ {
+ assert(varTypeIsFloating(typ)); // We must be computing a floating point result
+
+ // We always compute the result using a double
+ ValueNum exception = VNForEmptyExcSet();
+ double doubleResultVal = EvalOp(func, arg0Val, arg1Val, &exception);
+ assert(exception == VNForEmptyExcSet()); // Floating point ops don't throw.
+
+ if (typ == TYP_FLOAT)
+ {
+ float floatResultVal = float(doubleResultVal);
+ result = VNForFloatCon(floatResultVal);
+ }
+ else
+ {
+ assert(typ == TYP_DOUBLE);
+ result = VNForDoubleCon(doubleResultVal);
+ }
+ }
+
+ return result;
+}
+
+// Compute the proper value number for a VNF_Cast with constant arguments
+// This essentially must perform constant folding at value numbering time
+//
+ValueNum ValueNumStore::EvalCastForConstantArgs(var_types typ, VNFunc func, ValueNum arg0VN, ValueNum arg1VN)
+{
+ assert(func == VNF_Cast);
+ assert(IsVNConstant(arg0VN) && IsVNConstant(arg1VN));
+
+ // Stack-normalize the result type.
+ if (varTypeIsSmall(typ))
+ {
+ typ = TYP_INT;
+ }
+
+ var_types arg0VNtyp = TypeOfVN(arg0VN);
+ var_types arg1VNtyp = TypeOfVN(arg1VN);
+
+ // arg1VN is really the gtCastType that we are casting to
+ assert(arg1VNtyp == TYP_INT);
+ int arg1Val = ConstantValue<int>(arg1VN);
+ assert(arg1Val >= 0);
+
+ if (IsVNHandle(arg0VN))
+ {
+ // We don't allow handles to be cast to random var_types.
+ assert(typ == TYP_I_IMPL);
+ }
+
+ // We previously encoded the castToType operation using vnForCastOper()
+ //
+ bool srcIsUnsigned = ((arg1Val & INT32(VCA_UnsignedSrc)) != 0);
+ var_types castToType = var_types(arg1Val >> INT32(VCA_BitCount));
+
+ var_types castFromType = arg0VNtyp;
+
+ switch (castFromType) // GT_CAST source type
+ {
+#ifndef _TARGET_64BIT_
+ case TYP_REF:
+#endif
+ case TYP_INT:
+ {
+ int arg0Val = GetConstantInt32(arg0VN);
+
+ switch (castToType)
+ {
+ case TYP_BYTE:
+ assert(typ == TYP_INT);
+ return VNForIntCon(INT8(arg0Val));
+ case TYP_BOOL:
+ case TYP_UBYTE:
+ assert(typ == TYP_INT);
+ return VNForIntCon(UINT8(arg0Val));
+ case TYP_SHORT:
+ assert(typ == TYP_INT);
+ return VNForIntCon(INT16(arg0Val));
+ case TYP_CHAR:
+ case TYP_USHORT:
+ assert(typ == TYP_INT);
+ return VNForIntCon(UINT16(arg0Val));
+ case TYP_INT:
+ case TYP_UINT:
+ assert(typ == TYP_INT);
+ return arg0VN;
+ case TYP_LONG:
+ case TYP_ULONG:
+ assert(!IsVNHandle(arg0VN));
+#ifdef _TARGET_64BIT_
+ if (typ == TYP_LONG)
+ {
+ if (srcIsUnsigned)
+ {
+ return VNForLongCon(INT64(unsigned(arg0Val)));
+ }
+ else
+ {
+ return VNForLongCon(INT64(arg0Val));
+ }
+ }
+ else
+ {
+ assert(typ == TYP_BYREF);
+ if (srcIsUnsigned)
+ {
+ return VNForByrefCon(INT64(unsigned(arg0Val)));
+ }
+ else
+ {
+ return VNForByrefCon(INT64(arg0Val));
+ }
+ }
+#else // TARGET_32BIT
+ if (srcIsUnsigned)
+ return VNForLongCon(INT64(unsigned(arg0Val)));
+ else
+ return VNForLongCon(INT64(arg0Val));
+#endif
+ case TYP_FLOAT:
+ assert(typ == TYP_FLOAT);
+ if (srcIsUnsigned)
+ {
+ return VNForFloatCon(float(unsigned(arg0Val)));
+ }
+ else
+ {
+ return VNForFloatCon(float(arg0Val));
+ }
+ case TYP_DOUBLE:
+ assert(typ == TYP_DOUBLE);
+ if (srcIsUnsigned)
+ {
+ return VNForDoubleCon(double(unsigned(arg0Val)));
+ }
+ else
+ {
+ return VNForDoubleCon(double(arg0Val));
+ }
+ default:
+ unreached();
+ }
+ break;
+ }
+ {
+#ifdef _TARGET_64BIT_
+ case TYP_REF:
+#endif
+ case TYP_LONG:
+ INT64 arg0Val = GetConstantInt64(arg0VN);
+
+ switch (castToType)
+ {
+ case TYP_BYTE:
+ assert(typ == TYP_INT);
+ return VNForIntCon(INT8(arg0Val));
+ case TYP_BOOL:
+ case TYP_UBYTE:
+ assert(typ == TYP_INT);
+ return VNForIntCon(UINT8(arg0Val));
+ case TYP_SHORT:
+ assert(typ == TYP_INT);
+ return VNForIntCon(INT16(arg0Val));
+ case TYP_CHAR:
+ case TYP_USHORT:
+ assert(typ == TYP_INT);
+ return VNForIntCon(UINT16(arg0Val));
+ case TYP_INT:
+ assert(typ == TYP_INT);
+ return VNForIntCon(INT32(arg0Val));
+ case TYP_UINT:
+ assert(typ == TYP_INT);
+ return VNForIntCon(UINT32(arg0Val));
+ case TYP_LONG:
+ case TYP_ULONG:
+ assert(typ == TYP_LONG);
+ return arg0VN;
+ case TYP_FLOAT:
+ assert(typ == TYP_FLOAT);
+ if (srcIsUnsigned)
+ {
+ return VNForFloatCon(FloatingPointUtils::convertUInt64ToFloat(UINT64(arg0Val)));
+ }
+ else
+ {
+ return VNForFloatCon(float(arg0Val));
+ }
+ case TYP_DOUBLE:
+ assert(typ == TYP_DOUBLE);
+ if (srcIsUnsigned)
+ {
+ return VNForDoubleCon(FloatingPointUtils::convertUInt64ToDouble(UINT64(arg0Val)));
+ }
+ else
+ {
+ return VNForDoubleCon(double(arg0Val));
+ }
+ default:
+ unreached();
+ }
+ }
+ case TYP_FLOAT:
+ case TYP_DOUBLE:
+ {
+ double arg0Val = GetConstantDouble(arg0VN);
+
+ switch (castToType)
+ {
+ case TYP_BYTE:
+ assert(typ == TYP_INT);
+ return VNForIntCon(INT8(arg0Val));
+ case TYP_BOOL:
+ case TYP_UBYTE:
+ assert(typ == TYP_INT);
+ return VNForIntCon(UINT8(arg0Val));
+ case TYP_SHORT:
+ assert(typ == TYP_INT);
+ return VNForIntCon(INT16(arg0Val));
+ case TYP_CHAR:
+ case TYP_USHORT:
+ assert(typ == TYP_INT);
+ return VNForIntCon(UINT16(arg0Val));
+ case TYP_INT:
+ assert(typ == TYP_INT);
+ return VNForIntCon(INT32(arg0Val));
+ case TYP_UINT:
+ assert(typ == TYP_INT);
+ return VNForIntCon(UINT32(arg0Val));
+ case TYP_LONG:
+ assert(typ == TYP_LONG);
+ return VNForLongCon(INT64(arg0Val));
+ case TYP_ULONG:
+ assert(typ == TYP_LONG);
+ return VNForLongCon(UINT64(arg0Val));
+ case TYP_FLOAT:
+ assert(typ == TYP_FLOAT);
+ return VNForFloatCon(float(arg0Val));
+ case TYP_DOUBLE:
+ assert(typ == TYP_DOUBLE);
+ return VNForDoubleCon(arg0Val);
+ default:
+ unreached();
+ }
+ }
+ default:
+ unreached();
+ }
+}
+
+bool ValueNumStore::CanEvalForConstantArgs(VNFunc vnf)
+{
+ if (vnf < VNF_Boundary)
+ {
+ // We'll refine this as we get counterexamples. But to
+ // a first approximation, VNFuncs that are genTreeOps should
+ // be things we can evaluate.
+ genTreeOps oper = genTreeOps(vnf);
+ // Some exceptions...
+ switch (oper)
+ {
+ case GT_MKREFANY: // We can't evaluate these.
+ case GT_RETFILT:
+ case GT_LIST:
+ case GT_ARR_LENGTH:
+ return false;
+ case GT_MULHI:
+ // should be rare, not worth the complexity and risk of getting it wrong
+ return false;
+ default:
+ return true;
+ }
+ }
+ else
+ {
+ // some VNF_ that we can evaluate
+ switch (vnf)
+ {
+ case VNF_Cast: // We can evaluate these.
+ return true;
+ case VNF_ObjGetType:
+ return false;
+ default:
+ return false;
+ }
+ }
+}
+
+unsigned ValueNumStore::VNFuncArity(VNFunc vnf)
+{
+ // Read the bit field out of the table...
+ return (s_vnfOpAttribs[vnf] & VNFOA_ArityMask) >> VNFOA_ArityShift;
+}
+
+template <>
+bool ValueNumStore::IsOverflowIntDiv(int v0, int v1)
+{
+ return (v1 == -1) && (v0 == INT32_MIN);
+}
+template <>
+bool ValueNumStore::IsOverflowIntDiv(INT64 v0, INT64 v1)
+{
+ return (v1 == -1) && (v0 == INT64_MIN);
+}
+template <typename T>
+bool ValueNumStore::IsOverflowIntDiv(T v0, T v1)
+{
+ return false;
+}
+
+template <>
+bool ValueNumStore::IsIntZero(int v)
+{
+ return v == 0;
+}
+template <>
+bool ValueNumStore::IsIntZero(unsigned v)
+{
+ return v == 0;
+}
+template <>
+bool ValueNumStore::IsIntZero(INT64 v)
+{
+ return v == 0;
+}
+template <>
+bool ValueNumStore::IsIntZero(UINT64 v)
+{
+ return v == 0;
+}
+template <typename T>
+bool ValueNumStore::IsIntZero(T v)
+{
+ return false;
+}
+
+template <>
+float ValueNumStore::EvalOpIntegral<float>(VNFunc vnf, float v0)
+{
+ assert(!"EvalOpIntegral<float>");
+ return 0.0f;
+}
+
+template <>
+double ValueNumStore::EvalOpIntegral<double>(VNFunc vnf, double v0)
+{
+ assert(!"EvalOpIntegral<double>");
+ return 0.0;
+}
+
+template <>
+float ValueNumStore::EvalOpIntegral<float>(VNFunc vnf, float v0, float v1, ValueNum* pExcSet)
+{
+ genTreeOps oper = genTreeOps(vnf);
+ switch (oper)
+ {
+ case GT_MOD:
+ return fmodf(v0, v1);
+ default:
+ // For any other values of 'oper', we will assert and return 0.0f
+ break;
+ }
+ assert(!"EvalOpIntegral<float> with pExcSet");
+ return 0.0f;
+}
+
+template <>
+double ValueNumStore::EvalOpIntegral<double>(VNFunc vnf, double v0, double v1, ValueNum* pExcSet)
+{
+ genTreeOps oper = genTreeOps(vnf);
+ switch (oper)
+ {
+ case GT_MOD:
+ return fmod(v0, v1);
+ default:
+ // For any other value of 'oper', we will assert and return 0.0
+ break;
+ }
+ assert(!"EvalOpIntegral<double> with pExcSet");
+ return 0.0;
+}
+
+ValueNum ValueNumStore::VNForFunc(var_types typ, VNFunc func, ValueNum arg0VN, ValueNum arg1VN, ValueNum arg2VN)
+{
+ assert(arg0VN != NoVN);
+ assert(arg1VN != NoVN);
+ assert(arg2VN != NoVN);
+ assert(VNFuncArity(func) == 3);
+
+ // Function arguments carry no exceptions.
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef DEBUG
+ if (func != VNF_PhiDef)
+ {
+ // For a phi definition first and second argument are "plain" local/ssa numbers.
+ // (I don't know if having such non-VN arguments to a VN function is a good idea -- if we wanted to declare
+ // ValueNum to be "short" it would be a problem, for example. But we'll leave it for now, with these explicit
+ // exceptions.)
+ assert(arg0VN == VNNormVal(arg0VN));
+ assert(arg1VN == VNNormVal(arg1VN));
+ }
+ assert(arg2VN == VNNormVal(arg2VN));
+
+#endif
+ assert(VNFuncArity(func) == 3);
+
+ ValueNum res;
+ VNDefFunc3Arg fstruct(func, arg0VN, arg1VN, arg2VN);
+ if (GetVNFunc3Map()->Lookup(fstruct, &res))
+ {
+ return res;
+ }
+ else
+ {
+ Chunk* c = GetAllocChunk(typ, CEA_Func3);
+ unsigned offsetWithinChunk = c->AllocVN();
+ res = c->m_baseVN + offsetWithinChunk;
+ reinterpret_cast<VNDefFunc3Arg*>(c->m_defs)[offsetWithinChunk] = fstruct;
+ GetVNFunc3Map()->Set(fstruct, res);
+ return res;
+ }
+}
+
+ValueNum ValueNumStore::VNForFunc(
+ var_types typ, VNFunc func, ValueNum arg0VN, ValueNum arg1VN, ValueNum arg2VN, ValueNum arg3VN)
+{
+ assert(arg0VN != NoVN && arg1VN != NoVN && arg2VN != NoVN && arg3VN != NoVN);
+ // Function arguments carry no exceptions.
+ assert(arg0VN == VNNormVal(arg0VN));
+ assert(arg1VN == VNNormVal(arg1VN));
+ assert(arg2VN == VNNormVal(arg2VN));
+ assert(arg3VN == VNNormVal(arg3VN));
+ assert(VNFuncArity(func) == 4);
+
+ ValueNum res;
+ VNDefFunc4Arg fstruct(func, arg0VN, arg1VN, arg2VN, arg3VN);
+ if (GetVNFunc4Map()->Lookup(fstruct, &res))
+ {
+ return res;
+ }
+ else
+ {
+ Chunk* c = GetAllocChunk(typ, CEA_Func4);
+ unsigned offsetWithinChunk = c->AllocVN();
+ res = c->m_baseVN + offsetWithinChunk;
+ reinterpret_cast<VNDefFunc4Arg*>(c->m_defs)[offsetWithinChunk] = fstruct;
+ GetVNFunc4Map()->Set(fstruct, res);
+ return res;
+ }
+}
+
+//------------------------------------------------------------------------
+// VNForExpr: Opaque value number that is equivalent to itself but unique
+// from all other value numbers.
+//
+// Arguments:
+// block - BasicBlock where the expression that produces this value occurs.
+// May be nullptr to force conservative "could be anywhere" interpretation.
+// typ - Type of the expression in the IR
+//
+// Return Value:
+// A new value number distinct from any previously generated, that compares as equal
+// to itself, but not any other value number, and is annotated with the given
+// type and block.
+
+ValueNum ValueNumStore::VNForExpr(BasicBlock* block, var_types typ)
+{
+ BasicBlock::loopNumber loopNum;
+ if (block == nullptr)
+ {
+ loopNum = MAX_LOOP_NUM;
+ }
+ else
+ {
+ loopNum = block->bbNatLoopNum;
+ }
+
+ // We always allocate a new, unique VN in this call.
+ // The 'typ' is used to partition the allocation of VNs into different chunks.
+ Chunk* c = GetAllocChunk(typ, CEA_None, loopNum);
+ unsigned offsetWithinChunk = c->AllocVN();
+ ValueNum result = c->m_baseVN + offsetWithinChunk;
+ return result;
+}
+
+ValueNum ValueNumStore::VNApplySelectors(ValueNumKind vnk,
+ ValueNum map,
+ FieldSeqNode* fieldSeq,
+ size_t* wbFinalStructSize)
+{
+ if (fieldSeq == nullptr)
+ {
+ return map;
+ }
+ else
+ {
+ assert(fieldSeq != FieldSeqStore::NotAField());
+
+ // Skip any "FirstElem" pseudo-fields or any "ConstantIndex" pseudo-fields
+ if (fieldSeq->IsPseudoField())
+ {
+ return VNApplySelectors(vnk, map, fieldSeq->m_next, wbFinalStructSize);
+ }
+
+ // Otherwise, is a real field handle.
+ CORINFO_FIELD_HANDLE fldHnd = fieldSeq->m_fieldHnd;
+ CORINFO_CLASS_HANDLE structHnd = NO_CLASS_HANDLE;
+ ValueNum fldHndVN = VNForHandle(ssize_t(fldHnd), GTF_ICON_FIELD_HDL);
+ noway_assert(fldHnd != nullptr);
+ CorInfoType fieldCit = m_pComp->info.compCompHnd->getFieldType(fldHnd, &structHnd);
+ var_types fieldType = JITtype2varType(fieldCit);
+
+ size_t structSize = 0;
+ if (varTypeIsStruct(fieldType))
+ {
+ structSize = m_pComp->info.compCompHnd->getClassSize(structHnd);
+ // We do not normalize the type field accesses during importation unless they
+ // are used in a call, return or assignment.
+ if ((fieldType == TYP_STRUCT) && (structSize <= m_pComp->largestEnregisterableStructSize()))
+ {
+ fieldType = m_pComp->impNormStructType(structHnd);
+ }
+ }
+ if (wbFinalStructSize != nullptr)
+ {
+ *wbFinalStructSize = structSize;
+ }
+
+#ifdef DEBUG
+ if (m_pComp->verbose)
+ {
+ printf(" VNApplySelectors:\n");
+ const char* modName;
+ const char* fldName = m_pComp->eeGetFieldName(fldHnd, &modName);
+ printf(" VNForHandle(Fseq[%s]) is " STR_VN "%x, fieldType is %s", fldName, fldHndVN,
+ varTypeName(fieldType));
+ if (varTypeIsStruct(fieldType))
+ {
+ printf(", size = %d", structSize);
+ }
+ printf("\n");
+ }
+#endif
+
+ if (fieldSeq->m_next != nullptr)
+ {
+ ValueNum newMap = VNForMapSelect(vnk, fieldType, map, fldHndVN);
+ return VNApplySelectors(vnk, newMap, fieldSeq->m_next, wbFinalStructSize);
+ }
+ else // end of fieldSeq
+ {
+ return VNForMapSelect(vnk, fieldType, map, fldHndVN);
+ }
+ }
+}
+
+ValueNum ValueNumStore::VNApplySelectorsTypeCheck(ValueNum elem, var_types indType, size_t elemStructSize)
+{
+ var_types elemTyp = TypeOfVN(elem);
+
+ // Check if the elemTyp is matching/compatible
+
+ if (indType != elemTyp)
+ {
+ bool isConstant = IsVNConstant(elem);
+ if (isConstant && (elemTyp == genActualType(indType)))
+ {
+ // (i.e. We recorded a constant of TYP_INT for a TYP_BYTE field)
+ }
+ else
+ {
+ // We are trying to read from an 'elem' of type 'elemType' using 'indType' read
+
+ size_t elemTypSize = (elemTyp == TYP_STRUCT) ? elemStructSize : genTypeSize(elemTyp);
+ size_t indTypeSize = genTypeSize(indType);
+
+ if ((indType == TYP_REF) && (varTypeIsStruct(elemTyp)))
+ {
+ // indType is TYP_REF and elemTyp is TYP_STRUCT
+ //
+ // We have a pointer to a static that is a Boxed Struct
+ //
+ return elem;
+ }
+ else if (indTypeSize > elemTypSize)
+ {
+ // Reading beyong the end of 'elem'
+
+ // return a new unique value number
+ elem = VNForExpr(nullptr, indType);
+ JITDUMP(" *** Mismatched types in VNApplySelectorsTypeCheck (reading beyond the end)\n");
+ }
+ else if (varTypeIsStruct(indType))
+ {
+ // indType is TYP_STRUCT
+
+ // return a new unique value number
+ elem = VNForExpr(nullptr, indType);
+ JITDUMP(" *** Mismatched types in VNApplySelectorsTypeCheck (indType is TYP_STRUCT)\n");
+ }
+ else
+ {
+ // We are trying to read an 'elem' of type 'elemType' using 'indType' read
+
+ // insert a cast of elem to 'indType'
+ elem = VNForCast(elem, indType, elemTyp);
+ }
+ }
+ }
+ return elem;
+}
+
+ValueNum ValueNumStore::VNApplySelectorsAssignTypeCoerce(ValueNum elem, var_types indType, BasicBlock* block)
+{
+ var_types elemTyp = TypeOfVN(elem);
+
+ // Check if the elemTyp is matching/compatible
+
+ if (indType != elemTyp)
+ {
+ bool isConstant = IsVNConstant(elem);
+ if (isConstant && (elemTyp == genActualType(indType)))
+ {
+ // (i.e. We recorded a constant of TYP_INT for a TYP_BYTE field)
+ }
+ else
+ {
+ // We are trying to write an 'elem' of type 'elemType' using 'indType' store
+
+ if (varTypeIsStruct(indType))
+ {
+ // return a new unique value number
+ elem = VNForExpr(block, indType);
+ JITDUMP(" *** Mismatched types in VNApplySelectorsAssignTypeCoerce (indType is TYP_STRUCT)\n");
+ }
+ else
+ {
+ // We are trying to write an 'elem' of type 'elemType' using 'indType' store
+
+ // insert a cast of elem to 'indType'
+ elem = VNForCast(elem, indType, elemTyp);
+ }
+ }
+ }
+ return elem;
+}
+
+//------------------------------------------------------------------------
+// VNApplySelectorsAssign: Compute the value number corresponding to "map" but with
+// the element at "fieldSeq" updated to have type "elem"; this is the new heap
+// value for an assignment of value "elem" into the heap at location "fieldSeq"
+// that occurs in block "block" and has type "indType".
+//
+// Arguments:
+// vnk - Identifies whether to recurse to Conservative or Liberal value numbers
+// when recursing through phis
+// map - Value number for the field map before the assignment
+// elem - Value number for the value being stored (to the given field)
+// indType - Type of the indirection storing the value to the field
+// block - Block where the assignment occurs
+//
+// Return Value:
+// The value number corresopnding to the heap after the assignment.
+
+ValueNum ValueNumStore::VNApplySelectorsAssign(
+ ValueNumKind vnk, ValueNum map, FieldSeqNode* fieldSeq, ValueNum elem, var_types indType, BasicBlock* block)
+{
+ if (fieldSeq == nullptr)
+ {
+ return VNApplySelectorsAssignTypeCoerce(elem, indType, block);
+ }
+ else
+ {
+ assert(fieldSeq != FieldSeqStore::NotAField());
+
+ // Skip any "FirstElem" pseudo-fields or any "ConstantIndex" pseudo-fields
+ // These will occur, at least, in struct static expressions, for method table offsets.
+ if (fieldSeq->IsPseudoField())
+ {
+ return VNApplySelectorsAssign(vnk, map, fieldSeq->m_next, elem, indType, block);
+ }
+
+ // Otherwise, fldHnd is a real field handle.
+ CORINFO_FIELD_HANDLE fldHnd = fieldSeq->m_fieldHnd;
+ CORINFO_CLASS_HANDLE structType = nullptr;
+ noway_assert(fldHnd != nullptr);
+ CorInfoType fieldCit = m_pComp->info.compCompHnd->getFieldType(fldHnd, &structType);
+ var_types fieldType = JITtype2varType(fieldCit);
+
+ ValueNum fieldHndVN = VNForHandle(ssize_t(fldHnd), GTF_ICON_FIELD_HDL);
+
+#ifdef DEBUG
+ if (m_pComp->verbose)
+ {
+ printf(" fieldHnd " STR_VN "%x is ", fieldHndVN);
+ vnDump(m_pComp, fieldHndVN);
+ printf("\n");
+
+ ValueNum seqNextVN = VNForFieldSeq(fieldSeq->m_next);
+ ValueNum fieldSeqVN = VNForFunc(TYP_REF, VNF_FieldSeq, fieldHndVN, seqNextVN);
+
+ printf(" fieldSeq " STR_VN "%x is ", fieldSeqVN);
+ vnDump(m_pComp, fieldSeqVN);
+ printf("\n");
+ }
+#endif
+
+ ValueNum elemAfter;
+ if (fieldSeq->m_next)
+ {
+ ValueNum fseqMap = VNForMapSelect(vnk, fieldType, map, fieldHndVN);
+ elemAfter = VNApplySelectorsAssign(vnk, fseqMap, fieldSeq->m_next, elem, indType, block);
+ }
+ else
+ {
+ elemAfter = VNApplySelectorsAssignTypeCoerce(elem, indType, block);
+ }
+
+ ValueNum newMap = VNForMapStore(fieldType, map, fieldHndVN, elemAfter);
+ return newMap;
+ }
+}
+
+ValueNumPair ValueNumStore::VNPairApplySelectors(ValueNumPair map, FieldSeqNode* fieldSeq, var_types indType)
+{
+ size_t structSize = 0;
+ ValueNum liberalVN = VNApplySelectors(VNK_Liberal, map.GetLiberal(), fieldSeq, &structSize);
+ liberalVN = VNApplySelectorsTypeCheck(liberalVN, indType, structSize);
+
+ structSize = 0;
+ ValueNum conservVN = VNApplySelectors(VNK_Conservative, map.GetConservative(), fieldSeq, &structSize);
+ conservVN = VNApplySelectorsTypeCheck(conservVN, indType, structSize);
+
+ return ValueNumPair(liberalVN, conservVN);
+}
+
+ValueNum ValueNumStore::VNForFieldSeq(FieldSeqNode* fieldSeq)
+{
+ if (fieldSeq == nullptr)
+ {
+ return VNForNull();
+ }
+ else if (fieldSeq == FieldSeqStore::NotAField())
+ {
+ return VNForNotAField();
+ }
+ else
+ {
+ ssize_t fieldHndVal = ssize_t(fieldSeq->m_fieldHnd);
+ ValueNum fieldHndVN = VNForHandle(fieldHndVal, GTF_ICON_FIELD_HDL);
+ ValueNum seqNextVN = VNForFieldSeq(fieldSeq->m_next);
+ ValueNum fieldSeqVN = VNForFunc(TYP_REF, VNF_FieldSeq, fieldHndVN, seqNextVN);
+
+#ifdef DEBUG
+ if (m_pComp->verbose)
+ {
+ printf(" fieldHnd " STR_VN "%x is ", fieldHndVN);
+ vnDump(m_pComp, fieldHndVN);
+ printf("\n");
+
+ printf(" fieldSeq " STR_VN "%x is ", fieldSeqVN);
+ vnDump(m_pComp, fieldSeqVN);
+ printf("\n");
+ }
+#endif
+
+ return fieldSeqVN;
+ }
+}
+
+FieldSeqNode* ValueNumStore::FieldSeqVNToFieldSeq(ValueNum vn)
+{
+ if (vn == VNForNull())
+ {
+ return nullptr;
+ }
+ else if (vn == VNForNotAField())
+ {
+ return FieldSeqStore::NotAField();
+ }
+ else
+ {
+ assert(IsVNFunc(vn));
+ VNFuncApp funcApp;
+ GetVNFunc(vn, &funcApp);
+ assert(funcApp.m_func == VNF_FieldSeq);
+ ssize_t fieldHndVal = ConstantValue<ssize_t>(funcApp.m_args[0]);
+ FieldSeqNode* head =
+ m_pComp->GetFieldSeqStore()->CreateSingleton(reinterpret_cast<CORINFO_FIELD_HANDLE>(fieldHndVal));
+ FieldSeqNode* tail = FieldSeqVNToFieldSeq(funcApp.m_args[1]);
+ return m_pComp->GetFieldSeqStore()->Append(head, tail);
+ }
+}
+
+ValueNum ValueNumStore::FieldSeqVNAppend(ValueNum fsVN1, ValueNum fsVN2)
+{
+ if (fsVN1 == VNForNull())
+ {
+ return fsVN2;
+ }
+ else if (fsVN1 == VNForNotAField() || fsVN2 == VNForNotAField())
+ {
+ return VNForNotAField();
+ }
+ else
+ {
+ assert(IsVNFunc(fsVN1));
+ VNFuncApp funcApp1;
+ GetVNFunc(fsVN1, &funcApp1);
+ assert(funcApp1.m_func == VNF_FieldSeq);
+ ValueNum tailRes = FieldSeqVNAppend(funcApp1.m_args[1], fsVN2);
+ ValueNum fieldSeqVN = VNForFunc(TYP_REF, VNF_FieldSeq, funcApp1.m_args[0], tailRes);
+
+#ifdef DEBUG
+ if (m_pComp->verbose)
+ {
+ printf(" fieldSeq " STR_VN "%x is ", fieldSeqVN);
+ vnDump(m_pComp, fieldSeqVN);
+ printf("\n");
+ }
+#endif
+
+ return fieldSeqVN;
+ }
+}
+
+ValueNum ValueNumStore::VNForPtrToLoc(var_types typ, ValueNum lclVarVN, ValueNum fieldSeqVN)
+{
+ if (fieldSeqVN == VNForNotAField())
+ {
+ // To distinguish two different not a fields, append a unique value.
+ return VNForFunc(typ, VNF_PtrToLoc, lclVarVN, fieldSeqVN, VNForIntCon(++m_uPtrToLocNotAFieldCount));
+ }
+ return VNForFunc(typ, VNF_PtrToLoc, lclVarVN, fieldSeqVN, VNForIntCon(0));
+}
+
+ValueNum ValueNumStore::ExtendPtrVN(GenTreePtr opA, GenTreePtr opB)
+{
+ if (opB->OperGet() == GT_CNS_INT)
+ {
+ FieldSeqNode* fldSeq = opB->gtIntCon.gtFieldSeq;
+ if ((fldSeq != nullptr) && (fldSeq != FieldSeqStore::NotAField()))
+ {
+ return ExtendPtrVN(opA, opB->gtIntCon.gtFieldSeq);
+ }
+ }
+ return NoVN;
+}
+
+ValueNum ValueNumStore::ExtendPtrVN(GenTreePtr opA, FieldSeqNode* fldSeq)
+{
+ ValueNum res = NoVN;
+ assert(fldSeq != FieldSeqStore::NotAField());
+
+ ValueNum opAvnWx = opA->gtVNPair.GetLiberal();
+ assert(VNIsValid(opAvnWx));
+ ValueNum opAvn;
+ ValueNum opAvnx = VNForEmptyExcSet();
+ VNUnpackExc(opAvnWx, &opAvn, &opAvnx);
+ assert(VNIsValid(opAvn) && VNIsValid(opAvnx));
+
+ VNFuncApp funcApp;
+ if (!GetVNFunc(opAvn, &funcApp))
+ {
+ return res;
+ }
+
+ if (funcApp.m_func == VNF_PtrToLoc)
+ {
+#ifdef DEBUG
+ // For PtrToLoc, lib == cons.
+ VNFuncApp consFuncApp;
+ assert(GetVNFunc(VNNormVal(opA->GetVN(VNK_Conservative)), &consFuncApp) && consFuncApp.Equals(funcApp));
+#endif
+ ValueNum fldSeqVN = VNForFieldSeq(fldSeq);
+ res = VNForPtrToLoc(TYP_BYREF, funcApp.m_args[0], FieldSeqVNAppend(funcApp.m_args[1], fldSeqVN));
+ }
+ else if (funcApp.m_func == VNF_PtrToStatic)
+ {
+ ValueNum fldSeqVN = VNForFieldSeq(fldSeq);
+ res = VNForFunc(TYP_BYREF, VNF_PtrToStatic, FieldSeqVNAppend(funcApp.m_args[0], fldSeqVN));
+ }
+ else if (funcApp.m_func == VNF_PtrToArrElem)
+ {
+ ValueNum fldSeqVN = VNForFieldSeq(fldSeq);
+ res = VNForFunc(TYP_BYREF, VNF_PtrToArrElem, funcApp.m_args[0], funcApp.m_args[1], funcApp.m_args[2],
+ FieldSeqVNAppend(funcApp.m_args[3], fldSeqVN));
+ }
+ if (res != NoVN)
+ {
+ res = VNWithExc(res, opAvnx);
+ }
+ return res;
+}
+
+void Compiler::fgValueNumberArrIndexAssign(CORINFO_CLASS_HANDLE elemTypeEq,
+ ValueNum arrVN,
+ ValueNum inxVN,
+ FieldSeqNode* fldSeq,
+ ValueNum rhsVN,
+ var_types indType)
+{
+ bool invalidateArray = false;
+ ValueNum elemTypeEqVN = vnStore->VNForHandle(ssize_t(elemTypeEq), GTF_ICON_CLASS_HDL);
+ var_types arrElemType = DecodeElemType(elemTypeEq);
+ ValueNum hAtArrType = vnStore->VNForMapSelect(VNK_Liberal, TYP_REF, fgCurHeapVN, elemTypeEqVN);
+ ValueNum hAtArrTypeAtArr = vnStore->VNForMapSelect(VNK_Liberal, TYP_REF, hAtArrType, arrVN);
+ ValueNum hAtArrTypeAtArrAtInx = vnStore->VNForMapSelect(VNK_Liberal, arrElemType, hAtArrTypeAtArr, inxVN);
+
+ ValueNum newValAtInx = ValueNumStore::NoVN;
+ ValueNum newValAtArr = ValueNumStore::NoVN;
+ ValueNum newValAtArrType = ValueNumStore::NoVN;
+
+ if (fldSeq == FieldSeqStore::NotAField())
+ {
+ // This doesn't represent a proper array access
+ JITDUMP(" *** NotAField sequence encountered in fgValueNumberArrIndexAssign\n");
+
+ // Store a new unique value for newValAtArrType
+ newValAtArrType = vnStore->VNForExpr(compCurBB, TYP_REF);
+ invalidateArray = true;
+ }
+ else
+ {
+ // Note that this does the right thing if "fldSeq" is null -- returns last "rhs" argument.
+ // This is the value that should be stored at "arr[inx]".
+ newValAtInx =
+ vnStore->VNApplySelectorsAssign(VNK_Liberal, hAtArrTypeAtArrAtInx, fldSeq, rhsVN, indType, compCurBB);
+
+ var_types arrElemFldType = arrElemType; // Uses arrElemType unless we has a non-null fldSeq
+ if (vnStore->IsVNFunc(newValAtInx))
+ {
+ VNFuncApp funcApp;
+ vnStore->GetVNFunc(newValAtInx, &funcApp);
+ if (funcApp.m_func == VNF_MapStore)
+ {
+ arrElemFldType = vnStore->TypeOfVN(newValAtInx);
+ }
+ }
+
+ if (indType != arrElemFldType)
+ {
+ // Mismatched types: Store between different types (indType into array of arrElemFldType)
+ //
+
+ JITDUMP(" *** Mismatched types in fgValueNumberArrIndexAssign\n");
+
+ // Store a new unique value for newValAtArrType
+ newValAtArrType = vnStore->VNForExpr(compCurBB, TYP_REF);
+ invalidateArray = true;
+ }
+ }
+
+ if (!invalidateArray)
+ {
+ newValAtArr = vnStore->VNForMapStore(indType, hAtArrTypeAtArr, inxVN, newValAtInx);
+ newValAtArrType = vnStore->VNForMapStore(TYP_REF, hAtArrType, arrVN, newValAtArr);
+ }
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf(" hAtArrType " STR_VN "%x is MapSelect(curHeap(" STR_VN "%x), ", hAtArrType, fgCurHeapVN);
+
+ if (arrElemType == TYP_STRUCT)
+ {
+ printf("%s[]).\n", eeGetClassName(elemTypeEq));
+ }
+ else
+ {
+ printf("%s[]).\n", varTypeName(arrElemType));
+ }
+ printf(" hAtArrTypeAtArr " STR_VN "%x is MapSelect(hAtArrType(" STR_VN "%x), arr=" STR_VN "%x)\n",
+ hAtArrTypeAtArr, hAtArrType, arrVN);
+ printf(" hAtArrTypeAtArrAtInx " STR_VN "%x is MapSelect(hAtArrTypeAtArr(" STR_VN "%x), inx=" STR_VN "%x):%s\n",
+ hAtArrTypeAtArrAtInx, hAtArrTypeAtArr, inxVN, varTypeName(arrElemType));
+
+ if (!invalidateArray)
+ {
+ printf(" newValAtInd " STR_VN "%x is ", newValAtInx);
+ vnStore->vnDump(this, newValAtInx);
+ printf("\n");
+
+ printf(" newValAtArr " STR_VN "%x is ", newValAtArr);
+ vnStore->vnDump(this, newValAtArr);
+ printf("\n");
+ }
+
+ printf(" newValAtArrType " STR_VN "%x is ", newValAtArrType);
+ vnStore->vnDump(this, newValAtArrType);
+ printf("\n");
+
+ printf(" fgCurHeapVN assigned:\n");
+ }
+#endif // DEBUG
+
+ // bbHeapDef must be set to true for any block that Mutates the global Heap
+ assert(compCurBB->bbHeapDef);
+
+ fgCurHeapVN = vnStore->VNForMapStore(TYP_REF, fgCurHeapVN, elemTypeEqVN, newValAtArrType);
+}
+
+ValueNum Compiler::fgValueNumberArrIndexVal(GenTreePtr tree, VNFuncApp* pFuncApp, ValueNum addrXvn)
+{
+ assert(vnStore->IsVNHandle(pFuncApp->m_args[0]));
+ CORINFO_CLASS_HANDLE arrElemTypeEQ = CORINFO_CLASS_HANDLE(vnStore->ConstantValue<ssize_t>(pFuncApp->m_args[0]));
+ ValueNum arrVN = pFuncApp->m_args[1];
+ ValueNum inxVN = pFuncApp->m_args[2];
+ FieldSeqNode* fldSeq = vnStore->FieldSeqVNToFieldSeq(pFuncApp->m_args[3]);
+ return fgValueNumberArrIndexVal(tree, arrElemTypeEQ, arrVN, inxVN, addrXvn, fldSeq);
+}
+
+ValueNum Compiler::fgValueNumberArrIndexVal(GenTreePtr tree,
+ CORINFO_CLASS_HANDLE elemTypeEq,
+ ValueNum arrVN,
+ ValueNum inxVN,
+ ValueNum excVN,
+ FieldSeqNode* fldSeq)
+{
+ assert(tree == nullptr || tree->OperIsIndir());
+
+ // The VN inputs are required to be non-exceptional values.
+ assert(arrVN == vnStore->VNNormVal(arrVN));
+ assert(inxVN == vnStore->VNNormVal(inxVN));
+
+ var_types elemTyp = DecodeElemType(elemTypeEq);
+ var_types indType = (tree == nullptr) ? elemTyp : tree->TypeGet();
+ ValueNum selectedElem;
+
+ if (fldSeq == FieldSeqStore::NotAField())
+ {
+ // This doesn't represent a proper array access
+ JITDUMP(" *** NotAField sequence encountered in fgValueNumberArrIndexVal\n");
+
+ // a new unique value number
+ selectedElem = vnStore->VNForExpr(compCurBB, elemTyp);
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf(" IND of PtrToArrElem is unique VN " STR_VN "%x.\n", selectedElem);
+ }
+#endif // DEBUG
+
+ if (tree != nullptr)
+ {
+ tree->gtVNPair.SetBoth(selectedElem);
+ }
+ }
+ else
+ {
+ ValueNum elemTypeEqVN = vnStore->VNForHandle(ssize_t(elemTypeEq), GTF_ICON_CLASS_HDL);
+ ValueNum hAtArrType = vnStore->VNForMapSelect(VNK_Liberal, TYP_REF, fgCurHeapVN, elemTypeEqVN);
+ ValueNum hAtArrTypeAtArr = vnStore->VNForMapSelect(VNK_Liberal, TYP_REF, hAtArrType, arrVN);
+ ValueNum wholeElem = vnStore->VNForMapSelect(VNK_Liberal, elemTyp, hAtArrTypeAtArr, inxVN);
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf(" hAtArrType " STR_VN "%x is MapSelect(curHeap(" STR_VN "%x), ", hAtArrType, fgCurHeapVN);
+ if (elemTyp == TYP_STRUCT)
+ {
+ printf("%s[]).\n", eeGetClassName(elemTypeEq));
+ }
+ else
+ {
+ printf("%s[]).\n", varTypeName(elemTyp));
+ }
+
+ printf(" hAtArrTypeAtArr " STR_VN "%x is MapSelect(hAtArrType(" STR_VN "%x), arr=" STR_VN "%x).\n",
+ hAtArrTypeAtArr, hAtArrType, arrVN);
+
+ printf(" wholeElem " STR_VN "%x is MapSelect(hAtArrTypeAtArr(" STR_VN "%x), ind=" STR_VN "%x).\n",
+ wholeElem, hAtArrTypeAtArr, inxVN);
+ }
+#endif // DEBUG
+
+ selectedElem = wholeElem;
+ size_t elemStructSize = 0;
+ if (fldSeq)
+ {
+ selectedElem = vnStore->VNApplySelectors(VNK_Liberal, wholeElem, fldSeq, &elemStructSize);
+ elemTyp = vnStore->TypeOfVN(selectedElem);
+ }
+ selectedElem = vnStore->VNApplySelectorsTypeCheck(selectedElem, indType, elemStructSize);
+ selectedElem = vnStore->VNWithExc(selectedElem, excVN);
+
+#ifdef DEBUG
+ if (verbose && (selectedElem != wholeElem))
+ {
+ printf(" selectedElem is " STR_VN "%x after applying selectors.\n", selectedElem);
+ }
+#endif // DEBUG
+
+ if (tree != nullptr)
+ {
+ tree->gtVNPair.SetLiberal(selectedElem);
+ // TODO-CQ: what to do here about exceptions? We don't have the array and ind conservative
+ // values, so we don't have their exceptions. Maybe we should.
+ tree->gtVNPair.SetConservative(vnStore->VNForExpr(compCurBB, tree->TypeGet()));
+ }
+ }
+
+ return selectedElem;
+}
+
+var_types ValueNumStore::TypeOfVN(ValueNum vn)
+{
+ Chunk* c = m_chunks.GetNoExpand(GetChunkNum(vn));
+ return c->m_typ;
+}
+
+//------------------------------------------------------------------------
+// LoopOfVN: If the given value number is an opaque one associated with a particular
+// expression in the IR, give the loop number where the expression occurs; otherwise,
+// returns MAX_LOOP_NUM.
+//
+// Arguments:
+// vn - Value number to query
+//
+// Return Value:
+// The correspondingblock's bbNatLoopNum, which may be BasicBlock::NOT_IN_LOOP.
+// Returns MAX_LOOP_NUM if this VN is not an opaque value number associated with
+// a particular expression/location in the IR.
+
+BasicBlock::loopNumber ValueNumStore::LoopOfVN(ValueNum vn)
+{
+ Chunk* c = m_chunks.GetNoExpand(GetChunkNum(vn));
+ return c->m_loopNum;
+}
+
+bool ValueNumStore::IsVNConstant(ValueNum vn)
+{
+ if (vn == NoVN)
+ {
+ return false;
+ }
+ Chunk* c = m_chunks.GetNoExpand(GetChunkNum(vn));
+ if (c->m_attribs == CEA_Const)
+ {
+ return vn != VNForVoid(); // Void is not a "real" constant -- in the sense that it represents no value.
+ }
+ else
+ {
+ return c->m_attribs == CEA_Handle;
+ }
+}
+
+bool ValueNumStore::IsVNInt32Constant(ValueNum vn)
+{
+ if (!IsVNConstant(vn))
+ {
+ return false;
+ }
+
+ return TypeOfVN(vn) == TYP_INT;
+}
+
+unsigned ValueNumStore::GetHandleFlags(ValueNum vn)
+{
+ assert(IsVNHandle(vn));
+ Chunk* c = m_chunks.GetNoExpand(GetChunkNum(vn));
+ unsigned offset = ChunkOffset(vn);
+ VNHandle* handle = &reinterpret_cast<VNHandle*>(c->m_defs)[offset];
+ return handle->m_flags;
+}
+
+bool ValueNumStore::IsVNHandle(ValueNum vn)
+{
+ if (vn == NoVN)
+ {
+ return false;
+ }
+
+ Chunk* c = m_chunks.GetNoExpand(GetChunkNum(vn));
+ return c->m_attribs == CEA_Handle;
+}
+
+bool ValueNumStore::IsVNConstantBound(ValueNum vn)
+{
+ // Do we have "var < 100"?
+ if (vn == NoVN)
+ {
+ return false;
+ }
+
+ VNFuncApp funcAttr;
+ if (!GetVNFunc(vn, &funcAttr))
+ {
+ return false;
+ }
+ if (funcAttr.m_func != (VNFunc)GT_LE && funcAttr.m_func != (VNFunc)GT_GE && funcAttr.m_func != (VNFunc)GT_LT &&
+ funcAttr.m_func != (VNFunc)GT_GT)
+ {
+ return false;
+ }
+
+ return IsVNInt32Constant(funcAttr.m_args[0]) != IsVNInt32Constant(funcAttr.m_args[1]);
+}
+
+void ValueNumStore::GetConstantBoundInfo(ValueNum vn, ConstantBoundInfo* info)
+{
+ assert(IsVNConstantBound(vn));
+ assert(info);
+
+ // Do we have var < 100?
+ VNFuncApp funcAttr;
+ GetVNFunc(vn, &funcAttr);
+
+ bool isOp1Const = IsVNInt32Constant(funcAttr.m_args[1]);
+
+ if (isOp1Const)
+ {
+ info->cmpOper = funcAttr.m_func;
+ info->cmpOpVN = funcAttr.m_args[0];
+ info->constVal = GetConstantInt32(funcAttr.m_args[1]);
+ }
+ else
+ {
+ info->cmpOper = GenTree::SwapRelop((genTreeOps)funcAttr.m_func);
+ info->cmpOpVN = funcAttr.m_args[1];
+ info->constVal = GetConstantInt32(funcAttr.m_args[0]);
+ }
+}
+
+bool ValueNumStore::IsVNArrLenBound(ValueNum vn)
+{
+ // Do we have "var < a.len"?
+ if (vn == NoVN)
+ {
+ return false;
+ }
+
+ VNFuncApp funcAttr;
+ if (!GetVNFunc(vn, &funcAttr))
+ {
+ return false;
+ }
+ if (funcAttr.m_func != (VNFunc)GT_LE && funcAttr.m_func != (VNFunc)GT_GE && funcAttr.m_func != (VNFunc)GT_LT &&
+ funcAttr.m_func != (VNFunc)GT_GT)
+ {
+ return false;
+ }
+ if (!IsVNArrLen(funcAttr.m_args[0]) && !IsVNArrLen(funcAttr.m_args[1]))
+ {
+ return false;
+ }
+
+ return true;
+}
+
+void ValueNumStore::GetArrLenBoundInfo(ValueNum vn, ArrLenArithBoundInfo* info)
+{
+ assert(IsVNArrLenBound(vn));
+
+ // Do we have var < a.len?
+ VNFuncApp funcAttr;
+ GetVNFunc(vn, &funcAttr);
+
+ bool isOp1ArrLen = IsVNArrLen(funcAttr.m_args[1]);
+ if (isOp1ArrLen)
+ {
+ info->cmpOper = funcAttr.m_func;
+ info->cmpOp = funcAttr.m_args[0];
+ info->vnArray = GetArrForLenVn(funcAttr.m_args[1]);
+ }
+ else
+ {
+ info->cmpOper = GenTree::SwapRelop((genTreeOps)funcAttr.m_func);
+ info->cmpOp = funcAttr.m_args[1];
+ info->vnArray = GetArrForLenVn(funcAttr.m_args[0]);
+ }
+}
+
+bool ValueNumStore::IsVNArrLenArith(ValueNum vn)
+{
+ // Do we have "a.len +or- var"
+ if (vn == NoVN)
+ {
+ return false;
+ }
+
+ VNFuncApp funcAttr;
+
+ return GetVNFunc(vn, &funcAttr) && // vn is a func.
+ (funcAttr.m_func == (VNFunc)GT_ADD || funcAttr.m_func == (VNFunc)GT_SUB) && // the func is +/-
+ (IsVNArrLen(funcAttr.m_args[0]) || IsVNArrLen(funcAttr.m_args[1])); // either op1 or op2 is a.len
+}
+
+void ValueNumStore::GetArrLenArithInfo(ValueNum vn, ArrLenArithBoundInfo* info)
+{
+ // Do we have a.len +/- var?
+ assert(IsVNArrLenArith(vn));
+ VNFuncApp funcArith;
+ GetVNFunc(vn, &funcArith);
+
+ bool isOp1ArrLen = IsVNArrLen(funcArith.m_args[1]);
+ if (isOp1ArrLen)
+ {
+ info->arrOper = funcArith.m_func;
+ info->arrOp = funcArith.m_args[0];
+ info->vnArray = GetArrForLenVn(funcArith.m_args[1]);
+ }
+ else
+ {
+ info->arrOper = funcArith.m_func;
+ info->arrOp = funcArith.m_args[1];
+ info->vnArray = GetArrForLenVn(funcArith.m_args[0]);
+ }
+}
+
+bool ValueNumStore::IsVNArrLenArithBound(ValueNum vn)
+{
+ // Do we have: "var < a.len - var"
+ if (vn == NoVN)
+ {
+ return false;
+ }
+
+ VNFuncApp funcAttr;
+ if (!GetVNFunc(vn, &funcAttr))
+ {
+ return false;
+ }
+
+ // Suitable comparator.
+ if (funcAttr.m_func != (VNFunc)GT_LE && funcAttr.m_func != (VNFunc)GT_GE && funcAttr.m_func != (VNFunc)GT_LT &&
+ funcAttr.m_func != (VNFunc)GT_GT)
+ {
+ return false;
+ }
+
+ // Either the op0 or op1 is arr len arithmetic.
+ if (!IsVNArrLenArith(funcAttr.m_args[0]) && !IsVNArrLenArith(funcAttr.m_args[1]))
+ {
+ return false;
+ }
+
+ return true;
+}
+
+void ValueNumStore::GetArrLenArithBoundInfo(ValueNum vn, ArrLenArithBoundInfo* info)
+{
+ assert(IsVNArrLenArithBound(vn));
+
+ VNFuncApp funcAttr;
+ GetVNFunc(vn, &funcAttr);
+
+ // Check whether op0 or op1 ia arr len arithmetic.
+ bool isOp1ArrLenArith = IsVNArrLenArith(funcAttr.m_args[1]);
+ if (isOp1ArrLenArith)
+ {
+ info->cmpOper = funcAttr.m_func;
+ info->cmpOp = funcAttr.m_args[0];
+ GetArrLenArithInfo(funcAttr.m_args[1], info);
+ }
+ else
+ {
+ info->cmpOper = GenTree::SwapRelop((genTreeOps)funcAttr.m_func);
+ info->cmpOp = funcAttr.m_args[1];
+ GetArrLenArithInfo(funcAttr.m_args[0], info);
+ }
+}
+
+ValueNum ValueNumStore::GetArrForLenVn(ValueNum vn)
+{
+ if (vn == NoVN)
+ {
+ return NoVN;
+ }
+
+ VNFuncApp funcAttr;
+ if (GetVNFunc(vn, &funcAttr) && funcAttr.m_func == (VNFunc)GT_ARR_LENGTH)
+ {
+ return funcAttr.m_args[0];
+ }
+ return NoVN;
+}
+
+bool ValueNumStore::IsVNNewArr(ValueNum vn, VNFuncApp* funcApp)
+{
+ if (vn == NoVN)
+ {
+ return false;
+ }
+ bool result = false;
+ if (GetVNFunc(vn, funcApp))
+ {
+ result = (funcApp->m_func == VNF_JitNewArr) || (funcApp->m_func == VNF_JitReadyToRunNewArr);
+ }
+ return result;
+}
+
+int ValueNumStore::GetNewArrSize(ValueNum vn)
+{
+ VNFuncApp funcApp;
+ if (IsVNNewArr(vn, &funcApp))
+ {
+ ValueNum arg1VN = funcApp.m_args[1];
+ if (IsVNConstant(arg1VN) && TypeOfVN(arg1VN) == TYP_INT)
+ {
+ return ConstantValue<int>(arg1VN);
+ }
+ }
+ return 0;
+}
+
+bool ValueNumStore::IsVNArrLen(ValueNum vn)
+{
+ if (vn == NoVN)
+ {
+ return false;
+ }
+ VNFuncApp funcAttr;
+ return (GetVNFunc(vn, &funcAttr) && funcAttr.m_func == (VNFunc)GT_ARR_LENGTH);
+}
+
+ValueNum ValueNumStore::EvalMathFuncUnary(var_types typ, CorInfoIntrinsics gtMathFN, ValueNum arg0VN)
+{
+ assert(arg0VN == VNNormVal(arg0VN));
+ if (IsVNConstant(arg0VN) && Compiler::IsTargetIntrinsic(gtMathFN))
+ {
+ // If the math intrinsic is not implemented by target-specific instructions, such as implemented
+ // by user calls, then don't do constant folding on it. This minimizes precision loss.
+ // I *may* need separate tracks for the double/float -- if the intrinsic funcs have overloads for these.
+ double arg0Val = GetConstantDouble(arg0VN);
+
+ double res = 0.0;
+ switch (gtMathFN)
+ {
+ case CORINFO_INTRINSIC_Sin:
+ res = sin(arg0Val);
+ break;
+ case CORINFO_INTRINSIC_Cos:
+ res = cos(arg0Val);
+ break;
+ case CORINFO_INTRINSIC_Sqrt:
+ res = sqrt(arg0Val);
+ break;
+ case CORINFO_INTRINSIC_Abs:
+ res = fabs(arg0Val); // The result and params are doubles.
+ break;
+ case CORINFO_INTRINSIC_Round:
+ res = FloatingPointUtils::round(arg0Val);
+ break;
+ default:
+ unreached(); // the above are the only math intrinsics at the time of this writing.
+ }
+ if (typ == TYP_DOUBLE)
+ {
+ return VNForDoubleCon(res);
+ }
+ else if (typ == TYP_FLOAT)
+ {
+ return VNForFloatCon(float(res));
+ }
+ else
+ {
+ assert(typ == TYP_INT);
+ assert(gtMathFN == CORINFO_INTRINSIC_Round);
+
+ return VNForIntCon(int(res));
+ }
+ }
+ else
+ {
+ assert(typ == TYP_DOUBLE || typ == TYP_FLOAT || (typ == TYP_INT && gtMathFN == CORINFO_INTRINSIC_Round));
+
+ VNFunc vnf = VNF_Boundary;
+ switch (gtMathFN)
+ {
+ case CORINFO_INTRINSIC_Sin:
+ vnf = VNF_Sin;
+ break;
+ case CORINFO_INTRINSIC_Cos:
+ vnf = VNF_Cos;
+ break;
+ case CORINFO_INTRINSIC_Sqrt:
+ vnf = VNF_Sqrt;
+ break;
+ case CORINFO_INTRINSIC_Abs:
+ vnf = VNF_Abs;
+ break;
+ case CORINFO_INTRINSIC_Round:
+ if (typ == TYP_DOUBLE)
+ {
+ vnf = VNF_RoundDouble;
+ }
+ else if (typ == TYP_FLOAT)
+ {
+ vnf = VNF_RoundFloat;
+ }
+ else if (typ == TYP_INT)
+ {
+ vnf = VNF_RoundInt;
+ }
+ else
+ {
+ noway_assert(!"Invalid INTRINSIC_Round");
+ }
+ break;
+ case CORINFO_INTRINSIC_Cosh:
+ vnf = VNF_Cosh;
+ break;
+ case CORINFO_INTRINSIC_Sinh:
+ vnf = VNF_Sinh;
+ break;
+ case CORINFO_INTRINSIC_Tan:
+ vnf = VNF_Tan;
+ break;
+ case CORINFO_INTRINSIC_Tanh:
+ vnf = VNF_Tanh;
+ break;
+ case CORINFO_INTRINSIC_Asin:
+ vnf = VNF_Asin;
+ break;
+ case CORINFO_INTRINSIC_Acos:
+ vnf = VNF_Acos;
+ break;
+ case CORINFO_INTRINSIC_Atan:
+ vnf = VNF_Atan;
+ break;
+ case CORINFO_INTRINSIC_Log10:
+ vnf = VNF_Log10;
+ break;
+ case CORINFO_INTRINSIC_Exp:
+ vnf = VNF_Exp;
+ break;
+ case CORINFO_INTRINSIC_Ceiling:
+ vnf = VNF_Ceiling;
+ break;
+ case CORINFO_INTRINSIC_Floor:
+ vnf = VNF_Floor;
+ break;
+ default:
+ unreached(); // the above are the only math intrinsics at the time of this writing.
+ }
+
+ return VNForFunc(typ, vnf, arg0VN);
+ }
+}
+
+ValueNum ValueNumStore::EvalMathFuncBinary(var_types typ, CorInfoIntrinsics gtMathFN, ValueNum arg0VN, ValueNum arg1VN)
+{
+ assert(varTypeIsFloating(typ));
+ assert(arg0VN == VNNormVal(arg0VN));
+ assert(arg1VN == VNNormVal(arg1VN));
+
+ VNFunc vnf = VNF_Boundary;
+
+ // Currently, none of the binary math intrinsic are implemented by target-specific instructions.
+ // To minimize precision loss, do not do constant folding on them.
+
+ switch (gtMathFN)
+ {
+ case CORINFO_INTRINSIC_Atan2:
+ vnf = VNF_Atan2;
+ break;
+
+ case CORINFO_INTRINSIC_Pow:
+ vnf = VNF_Pow;
+ break;
+
+ default:
+ unreached(); // the above are the only binary math intrinsics at the time of this writing.
+ }
+
+ return VNForFunc(typ, vnf, arg0VN, arg1VN);
+}
+
+bool ValueNumStore::IsVNFunc(ValueNum vn)
+{
+ if (vn == NoVN)
+ {
+ return false;
+ }
+ Chunk* c = m_chunks.GetNoExpand(GetChunkNum(vn));
+ switch (c->m_attribs)
+ {
+ case CEA_Func0:
+ case CEA_Func1:
+ case CEA_Func2:
+ case CEA_Func3:
+ case CEA_Func4:
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool ValueNumStore::GetVNFunc(ValueNum vn, VNFuncApp* funcApp)
+{
+ Chunk* c = m_chunks.GetNoExpand(GetChunkNum(vn));
+ unsigned offset = ChunkOffset(vn);
+ assert(offset < c->m_numUsed);
+ switch (c->m_attribs)
+ {
+ case CEA_Func4:
+ {
+ VNDefFunc4Arg* farg4 = &reinterpret_cast<VNDefFunc4Arg*>(c->m_defs)[offset];
+ funcApp->m_func = farg4->m_func;
+ funcApp->m_arity = 4;
+ funcApp->m_args[0] = farg4->m_arg0;
+ funcApp->m_args[1] = farg4->m_arg1;
+ funcApp->m_args[2] = farg4->m_arg2;
+ funcApp->m_args[3] = farg4->m_arg3;
+ }
+ return true;
+ case CEA_Func3:
+ {
+ VNDefFunc3Arg* farg3 = &reinterpret_cast<VNDefFunc3Arg*>(c->m_defs)[offset];
+ funcApp->m_func = farg3->m_func;
+ funcApp->m_arity = 3;
+ funcApp->m_args[0] = farg3->m_arg0;
+ funcApp->m_args[1] = farg3->m_arg1;
+ funcApp->m_args[2] = farg3->m_arg2;
+ }
+ return true;
+ case CEA_Func2:
+ {
+ VNDefFunc2Arg* farg2 = &reinterpret_cast<VNDefFunc2Arg*>(c->m_defs)[offset];
+ funcApp->m_func = farg2->m_func;
+ funcApp->m_arity = 2;
+ funcApp->m_args[0] = farg2->m_arg0;
+ funcApp->m_args[1] = farg2->m_arg1;
+ }
+ return true;
+ case CEA_Func1:
+ {
+ VNDefFunc1Arg* farg1 = &reinterpret_cast<VNDefFunc1Arg*>(c->m_defs)[offset];
+ funcApp->m_func = farg1->m_func;
+ funcApp->m_arity = 1;
+ funcApp->m_args[0] = farg1->m_arg0;
+ }
+ return true;
+ case CEA_Func0:
+ {
+ VNDefFunc0Arg* farg0 = &reinterpret_cast<VNDefFunc0Arg*>(c->m_defs)[offset];
+ funcApp->m_func = farg0->m_func;
+ funcApp->m_arity = 0;
+ }
+ return true;
+ default:
+ return false;
+ }
+}
+
+ValueNum ValueNumStore::VNForRefInAddr(ValueNum vn)
+{
+ var_types vnType = TypeOfVN(vn);
+ if (vnType == TYP_REF)
+ {
+ return vn;
+ }
+ // Otherwise...
+ assert(vnType == TYP_BYREF);
+ VNFuncApp funcApp;
+ if (GetVNFunc(vn, &funcApp))
+ {
+ assert(funcApp.m_arity == 2 && (funcApp.m_func == VNFunc(GT_ADD) || funcApp.m_func == VNFunc(GT_SUB)));
+ var_types vnArg0Type = TypeOfVN(funcApp.m_args[0]);
+ if (vnArg0Type == TYP_REF || vnArg0Type == TYP_BYREF)
+ {
+ return VNForRefInAddr(funcApp.m_args[0]);
+ }
+ else
+ {
+ assert(funcApp.m_func == VNFunc(GT_ADD) &&
+ (TypeOfVN(funcApp.m_args[1]) == TYP_REF || TypeOfVN(funcApp.m_args[1]) == TYP_BYREF));
+ return VNForRefInAddr(funcApp.m_args[1]);
+ }
+ }
+ else
+ {
+ assert(IsVNConstant(vn));
+ return vn;
+ }
+}
+
+bool ValueNumStore::VNIsValid(ValueNum vn)
+{
+ ChunkNum cn = GetChunkNum(vn);
+ if (cn >= m_chunks.Size())
+ {
+ return false;
+ }
+ // Otherwise...
+ Chunk* c = m_chunks.GetNoExpand(cn);
+ return ChunkOffset(vn) < c->m_numUsed;
+}
+
+#ifdef DEBUG
+
+void ValueNumStore::vnDump(Compiler* comp, ValueNum vn, bool isPtr)
+{
+ printf(" {");
+ if (vn == NoVN)
+ {
+ printf("NoVN");
+ }
+ else if (IsVNHandle(vn))
+ {
+ ssize_t val = ConstantValue<ssize_t>(vn);
+ printf("Hnd const: 0x%p", dspPtr(val));
+ }
+ else if (IsVNConstant(vn))
+ {
+ var_types vnt = TypeOfVN(vn);
+ switch (vnt)
+ {
+ case TYP_BOOL:
+ case TYP_BYTE:
+ case TYP_UBYTE:
+ case TYP_CHAR:
+ case TYP_SHORT:
+ case TYP_USHORT:
+ case TYP_INT:
+ case TYP_UINT:
+ {
+ int val = ConstantValue<int>(vn);
+ if (isPtr)
+ {
+ printf("PtrCns[%p]", dspPtr(val));
+ }
+ else
+ {
+ printf("IntCns");
+ if ((val > -1000) && (val < 1000))
+ {
+ printf(" %ld", val);
+ }
+ else
+ {
+ printf(" 0x%X", val);
+ }
+ }
+ }
+ break;
+ case TYP_LONG:
+ case TYP_ULONG:
+ {
+ INT64 val = ConstantValue<INT64>(vn);
+ if (isPtr)
+ {
+ printf("LngPtrCns: 0x%p", dspPtr(val));
+ }
+ else
+ {
+ printf("LngCns: ");
+ if ((val > -1000) && (val < 1000))
+ {
+ printf(" %ld", val);
+ }
+ else if ((val & 0xFFFFFFFF00000000LL) == 0)
+ {
+ printf(" 0x%X", val);
+ }
+ else
+ {
+ printf(" 0x%llx", val);
+ }
+ }
+ }
+ break;
+ case TYP_FLOAT:
+ printf("FltCns[%f]", ConstantValue<float>(vn));
+ break;
+ case TYP_DOUBLE:
+ printf("DblCns[%f]", ConstantValue<double>(vn));
+ break;
+ case TYP_REF:
+ case TYP_ARRAY:
+ if (vn == VNForNull())
+ {
+ printf("null");
+ }
+ else if (vn == VNForVoid())
+ {
+ printf("void");
+ }
+ else
+ {
+ assert(vn == VNForZeroMap());
+ printf("zeroMap");
+ }
+ break;
+ case TYP_BYREF:
+ printf("byrefVal");
+ break;
+ case TYP_STRUCT:
+#ifdef FEATURE_SIMD
+ case TYP_SIMD8:
+ case TYP_SIMD12:
+ case TYP_SIMD16:
+ case TYP_SIMD32:
+#endif // FEATURE_SIMD
+ printf("structVal");
+ break;
+
+ // These should be unreached.
+ default:
+ unreached();
+ }
+ }
+ else if (IsVNArrLenBound(vn))
+ {
+ ArrLenArithBoundInfo info;
+ GetArrLenBoundInfo(vn, &info);
+ info.dump(this);
+ }
+ else if (IsVNArrLenArithBound(vn))
+ {
+ ArrLenArithBoundInfo info;
+ GetArrLenArithBoundInfo(vn, &info);
+ info.dump(this);
+ }
+ else if (IsVNFunc(vn))
+ {
+ VNFuncApp funcApp;
+ GetVNFunc(vn, &funcApp);
+ // A few special cases...
+ switch (funcApp.m_func)
+ {
+ case VNF_FieldSeq:
+ vnDumpFieldSeq(comp, &funcApp, true);
+ break;
+ case VNF_MapSelect:
+ vnDumpMapSelect(comp, &funcApp);
+ break;
+ case VNF_MapStore:
+ vnDumpMapStore(comp, &funcApp);
+ break;
+ default:
+ printf("%s(", VNFuncName(funcApp.m_func));
+ for (unsigned i = 0; i < funcApp.m_arity; i++)
+ {
+ if (i > 0)
+ {
+ printf(", ");
+ }
+
+ printf(STR_VN "%x", funcApp.m_args[i]);
+
+#if FEATURE_VN_DUMP_FUNC_ARGS
+ printf("=");
+ vnDump(comp, funcApp.m_args[i]);
+#endif
+ }
+ printf(")");
+ }
+ }
+ else
+ {
+ // Otherwise, just a VN with no structure; print just the VN.
+ printf("%x", vn);
+ }
+ printf("}");
+}
+
+void ValueNumStore::vnDumpFieldSeq(Compiler* comp, VNFuncApp* fieldSeq, bool isHead)
+{
+ assert(fieldSeq->m_func == VNF_FieldSeq); // Precondition.
+ // First arg is the field handle VN.
+ assert(IsVNConstant(fieldSeq->m_args[0]) && TypeOfVN(fieldSeq->m_args[0]) == TYP_I_IMPL);
+ ssize_t fieldHndVal = ConstantValue<ssize_t>(fieldSeq->m_args[0]);
+ bool hasTail = (fieldSeq->m_args[1] != VNForNull());
+
+ if (isHead && hasTail)
+ {
+ printf("(");
+ }
+
+ CORINFO_FIELD_HANDLE fldHnd = CORINFO_FIELD_HANDLE(fieldHndVal);
+ if (fldHnd == FieldSeqStore::FirstElemPseudoField)
+ {
+ printf("#FirstElem");
+ }
+ else if (fldHnd == FieldSeqStore::ConstantIndexPseudoField)
+ {
+ printf("#ConstantIndex");
+ }
+ else
+ {
+ const char* modName;
+ const char* fldName = m_pComp->eeGetFieldName(fldHnd, &modName);
+ printf("%s", fldName);
+ }
+
+ if (hasTail)
+ {
+ printf(", ");
+ assert(IsVNFunc(fieldSeq->m_args[1]));
+ VNFuncApp tail;
+ GetVNFunc(fieldSeq->m_args[1], &tail);
+ vnDumpFieldSeq(comp, &tail, false);
+ }
+
+ if (isHead && hasTail)
+ {
+ printf(")");
+ }
+}
+
+void ValueNumStore::vnDumpMapSelect(Compiler* comp, VNFuncApp* mapSelect)
+{
+ assert(mapSelect->m_func == VNF_MapSelect); // Precondition.
+
+ ValueNum mapVN = mapSelect->m_args[0]; // First arg is the map id
+ ValueNum indexVN = mapSelect->m_args[1]; // Second arg is the index
+
+ comp->vnPrint(mapVN, 0);
+ printf("[");
+ comp->vnPrint(indexVN, 0);
+ printf("]");
+}
+
+void ValueNumStore::vnDumpMapStore(Compiler* comp, VNFuncApp* mapStore)
+{
+ assert(mapStore->m_func == VNF_MapStore); // Precondition.
+
+ ValueNum mapVN = mapStore->m_args[0]; // First arg is the map id
+ ValueNum indexVN = mapStore->m_args[1]; // Second arg is the index
+ ValueNum newValVN = mapStore->m_args[2]; // Third arg is the new value
+
+ comp->vnPrint(mapVN, 0);
+ printf("[");
+ comp->vnPrint(indexVN, 0);
+ printf(" := ");
+ comp->vnPrint(newValVN, 0);
+ printf("]");
+}
+#endif // DEBUG
+
+// Static fields, methods.
+static UINT8 vnfOpAttribs[VNF_COUNT];
+static genTreeOps genTreeOpsIllegalAsVNFunc[] = {GT_IND, // When we do heap memory.
+ GT_NULLCHECK, GT_QMARK, GT_COLON, GT_LOCKADD, GT_XADD, GT_XCHG,
+ GT_CMPXCHG, GT_LCLHEAP, GT_BOX,
+
+ // These need special semantics:
+ GT_COMMA, // == second argument (but with exception(s) from first).
+ GT_ADDR, GT_ARR_BOUNDS_CHECK,
+ GT_OBJ, // May reference heap memory.
+ GT_BLK, // May reference heap memory.
+
+ // These control-flow operations need no values.
+ GT_JTRUE, GT_RETURN, GT_SWITCH, GT_RETFILT, GT_CKFINITE};
+
+UINT8* ValueNumStore::s_vnfOpAttribs = nullptr;
+
+void ValueNumStore::InitValueNumStoreStatics()
+{
+ // Make sure we've gotten constants right...
+ assert(unsigned(VNFOA_Arity) == (1 << VNFOA_ArityShift));
+ assert(unsigned(VNFOA_AfterArity) == (unsigned(VNFOA_Arity) << VNFOA_ArityBits));
+
+ s_vnfOpAttribs = &vnfOpAttribs[0];
+ for (unsigned i = 0; i < GT_COUNT; i++)
+ {
+ genTreeOps gtOper = static_cast<genTreeOps>(i);
+ unsigned arity = 0;
+ if (GenTree::OperIsUnary(gtOper))
+ {
+ arity = 1;
+ }
+ else if (GenTree::OperIsBinary(gtOper))
+ {
+ arity = 2;
+ }
+ // Since GT_ARR_BOUNDS_CHECK is not currently GTK_BINOP
+ else if (gtOper == GT_ARR_BOUNDS_CHECK)
+ {
+ arity = 2;
+ }
+ vnfOpAttribs[i] |= (arity << VNFOA_ArityShift);
+
+ if (GenTree::OperIsCommutative(gtOper))
+ {
+ vnfOpAttribs[i] |= VNFOA_Commutative;
+ }
+ }
+
+ // I so wish this wasn't the best way to do this...
+
+ int vnfNum = VNF_Boundary + 1; // The macro definition below will update this after using it.
+
+#define ValueNumFuncDef(vnf, arity, commute, knownNonNull, sharedStatic) \
+ if (commute) \
+ vnfOpAttribs[vnfNum] |= VNFOA_Commutative; \
+ if (knownNonNull) \
+ vnfOpAttribs[vnfNum] |= VNFOA_KnownNonNull; \
+ if (sharedStatic) \
+ vnfOpAttribs[vnfNum] |= VNFOA_SharedStatic; \
+ vnfOpAttribs[vnfNum] |= (arity << VNFOA_ArityShift); \
+ vnfNum++;
+
+#include "valuenumfuncs.h"
+#undef ValueNumFuncDef
+
+ unsigned n = sizeof(genTreeOpsIllegalAsVNFunc) / sizeof(genTreeOps);
+ for (unsigned i = 0; i < n; i++)
+ {
+ vnfOpAttribs[genTreeOpsIllegalAsVNFunc[i]] |= VNFOA_IllegalGenTreeOp;
+ }
+}
+
+#ifdef DEBUG
+// Define the name array.
+#define ValueNumFuncDef(vnf, arity, commute, knownNonNull, sharedStatic) #vnf,
+
+const char* ValueNumStore::VNFuncNameArr[] = {
+#include "valuenumfuncs.h"
+#undef ValueNumFuncDef
+};
+
+// static
+const char* ValueNumStore::VNFuncName(VNFunc vnf)
+{
+ if (vnf < VNF_Boundary)
+ {
+ return GenTree::NodeName(genTreeOps(vnf));
+ }
+ else
+ {
+ return VNFuncNameArr[vnf - (VNF_Boundary + 1)];
+ }
+}
+
+static const char* s_reservedNameArr[] = {
+ "$VN.Recursive", // -2 RecursiveVN
+ "$VN.No", // -1 NoVN
+ "$VN.Null", // 0 VNForNull()
+ "$VN.ZeroMap", // 1 VNForZeroMap()
+ "$VN.NotAField", // 2 VNForNotAField()
+ "$VN.ReadOnlyHeap", // 3 VNForROH()
+ "$VN.Void", // 4 VNForVoid()
+ "$VN.EmptyExcSet" // 5 VNForEmptyExcSet()
+};
+
+// Returns the string name of "vn" when it is a reserved value number, nullptr otherwise
+// static
+const char* ValueNumStore::reservedName(ValueNum vn)
+{
+ int val = vn - ValueNumStore::RecursiveVN; // Add two, making 'RecursiveVN' equal to zero
+ int max = ValueNumStore::SRC_NumSpecialRefConsts - ValueNumStore::RecursiveVN;
+
+ if ((val >= 0) && (val < max))
+ {
+ return s_reservedNameArr[val];
+ }
+ return nullptr;
+}
+
+#endif // DEBUG
+
+// Returns true if "vn" is a reserved value number
+
+// static
+bool ValueNumStore::isReservedVN(ValueNum vn)
+{
+ int val = vn - ValueNumStore::RecursiveVN; // Adding two, making 'RecursiveVN' equal to zero
+ int max = ValueNumStore::SRC_NumSpecialRefConsts - ValueNumStore::RecursiveVN;
+
+ if ((val >= 0) && (val < max))
+ {
+ return true;
+ }
+ return false;
+}
+
+#ifdef DEBUG
+void ValueNumStore::RunTests(Compiler* comp)
+{
+ VNFunc VNF_Add = GenTreeOpToVNFunc(GT_ADD);
+
+ ValueNumStore* vns = new (comp->getAllocatorDebugOnly()) ValueNumStore(comp, comp->getAllocatorDebugOnly());
+ ValueNum vnNull = VNForNull();
+ assert(vnNull == VNForNull());
+
+ ValueNum vnFor1 = vns->VNForIntCon(1);
+ assert(vnFor1 == vns->VNForIntCon(1));
+ assert(vns->TypeOfVN(vnFor1) == TYP_INT);
+ assert(vns->IsVNConstant(vnFor1));
+ assert(vns->ConstantValue<int>(vnFor1) == 1);
+
+ ValueNum vnFor100 = vns->VNForIntCon(100);
+ assert(vnFor100 == vns->VNForIntCon(100));
+ assert(vnFor100 != vnFor1);
+ assert(vns->TypeOfVN(vnFor100) == TYP_INT);
+ assert(vns->IsVNConstant(vnFor100));
+ assert(vns->ConstantValue<int>(vnFor100) == 100);
+
+ ValueNum vnFor1F = vns->VNForFloatCon(1.0f);
+ assert(vnFor1F == vns->VNForFloatCon(1.0f));
+ assert(vnFor1F != vnFor1 && vnFor1F != vnFor100);
+ assert(vns->TypeOfVN(vnFor1F) == TYP_FLOAT);
+ assert(vns->IsVNConstant(vnFor1F));
+ assert(vns->ConstantValue<float>(vnFor1F) == 1.0f);
+
+ ValueNum vnFor1D = vns->VNForDoubleCon(1.0);
+ assert(vnFor1D == vns->VNForDoubleCon(1.0));
+ assert(vnFor1D != vnFor1F && vnFor1D != vnFor1 && vnFor1D != vnFor100);
+ assert(vns->TypeOfVN(vnFor1D) == TYP_DOUBLE);
+ assert(vns->IsVNConstant(vnFor1D));
+ assert(vns->ConstantValue<double>(vnFor1D) == 1.0);
+
+ ValueNum vnRandom1 = vns->VNForExpr(nullptr, TYP_INT);
+ ValueNum vnForFunc2a = vns->VNForFunc(TYP_INT, VNF_Add, vnFor1, vnRandom1);
+ assert(vnForFunc2a == vns->VNForFunc(TYP_INT, VNF_Add, vnFor1, vnRandom1));
+ assert(vnForFunc2a != vnFor1D && vnForFunc2a != vnFor1F && vnForFunc2a != vnFor1 && vnForFunc2a != vnRandom1);
+ assert(vns->TypeOfVN(vnForFunc2a) == TYP_INT);
+ assert(!vns->IsVNConstant(vnForFunc2a));
+ assert(vns->IsVNFunc(vnForFunc2a));
+ VNFuncApp fa2a;
+ bool b = vns->GetVNFunc(vnForFunc2a, &fa2a);
+ assert(b);
+ assert(fa2a.m_func == VNF_Add && fa2a.m_arity == 2 && fa2a.m_args[0] == vnFor1 && fa2a.m_args[1] == vnRandom1);
+
+ ValueNum vnForFunc2b = vns->VNForFunc(TYP_INT, VNF_Add, vnFor1, vnFor100);
+ assert(vnForFunc2b == vns->VNForFunc(TYP_INT, VNF_Add, vnFor1, vnFor100));
+ assert(vnForFunc2b != vnFor1D && vnForFunc2b != vnFor1F && vnForFunc2b != vnFor1 && vnForFunc2b != vnFor100);
+ assert(vns->TypeOfVN(vnForFunc2b) == TYP_INT);
+ assert(vns->IsVNConstant(vnForFunc2b));
+ assert(vns->ConstantValue<int>(vnForFunc2b) == 101);
+
+ // printf("Did ValueNumStore::RunTests.\n");
+}
+#endif // DEBUG
+
+typedef ExpandArrayStack<BasicBlock*> BlockStack;
+
+// This represents the "to do" state of the value number computation.
+struct ValueNumberState
+{
+ // These two stacks collectively represent the set of blocks that are candidates for
+ // processing, because at least one predecessor has been processed. Blocks on "m_toDoAllPredsDone"
+ // have had *all* predecessors processed, and thus are candidates for some extra optimizations.
+ // Blocks on "m_toDoNotAllPredsDone" have at least one predecessor that has not been processed.
+ // Blocks are initially on "m_toDoNotAllPredsDone" may be moved to "m_toDoAllPredsDone" when their last
+ // unprocessed predecessor is processed, thus maintaining the invariants.
+ BlockStack m_toDoAllPredsDone;
+ BlockStack m_toDoNotAllPredsDone;
+
+ Compiler* m_comp;
+
+ // TBD: This should really be a bitset...
+ // For now:
+ // first bit indicates completed,
+ // second bit indicates that it's been pushed on all-done stack,
+ // third bit indicates that it's been pushed on not-all-done stack.
+ BYTE* m_visited;
+
+ enum BlockVisitBits
+ {
+ BVB_complete = 0x1,
+ BVB_onAllDone = 0x2,
+ BVB_onNotAllDone = 0x4,
+ };
+
+ bool GetVisitBit(unsigned bbNum, BlockVisitBits bvb)
+ {
+ return (m_visited[bbNum] & bvb) != 0;
+ }
+ void SetVisitBit(unsigned bbNum, BlockVisitBits bvb)
+ {
+ m_visited[bbNum] |= bvb;
+ }
+
+ ValueNumberState(Compiler* comp)
+ : m_toDoAllPredsDone(comp->getAllocator(), /*minSize*/ 4)
+ , m_toDoNotAllPredsDone(comp->getAllocator(), /*minSize*/ 4)
+ , m_comp(comp)
+ , m_visited(new (comp, CMK_ValueNumber) BYTE[comp->fgBBNumMax + 1]())
+ {
+ }
+
+ BasicBlock* ChooseFromNotAllPredsDone()
+ {
+ assert(m_toDoAllPredsDone.Size() == 0);
+ // If we have no blocks with all preds done, then (ideally, if all cycles have been captured by loops)
+ // we must have at least one block within a loop. We want to do the loops first. Doing a loop entry block
+ // should break the cycle, making the rest of the body of the loop (unless there's a nested loop) doable by the
+ // all-preds-done rule. If several loop entry blocks are available, at least one should have all non-loop preds
+ // done -- we choose that.
+ for (unsigned i = 0; i < m_toDoNotAllPredsDone.Size(); i++)
+ {
+ BasicBlock* cand = m_toDoNotAllPredsDone.Get(i);
+
+ // Skip any already-completed blocks (a block may have all its preds finished, get added to the
+ // all-preds-done todo set, and get processed there). Do this by moving the last one down, to
+ // keep the array compact.
+ while (GetVisitBit(cand->bbNum, BVB_complete))
+ {
+ if (i + 1 < m_toDoNotAllPredsDone.Size())
+ {
+ cand = m_toDoNotAllPredsDone.Pop();
+ m_toDoNotAllPredsDone.Set(i, cand);
+ }
+ else
+ {
+ // "cand" is the last element; delete it.
+ (void)m_toDoNotAllPredsDone.Pop();
+ break;
+ }
+ }
+ // We may have run out of non-complete candidates above. If so, we're done.
+ if (i == m_toDoNotAllPredsDone.Size())
+ {
+ break;
+ }
+
+ // See if "cand" is a loop entry.
+ unsigned lnum;
+ if (m_comp->optBlockIsLoopEntry(cand, &lnum))
+ {
+ // "lnum" is the innermost loop of which "cand" is the entry; find the outermost.
+ unsigned lnumPar = m_comp->optLoopTable[lnum].lpParent;
+ while (lnumPar != BasicBlock::NOT_IN_LOOP)
+ {
+ if (m_comp->optLoopTable[lnumPar].lpEntry == cand)
+ {
+ lnum = lnumPar;
+ }
+ else
+ {
+ break;
+ }
+ lnumPar = m_comp->optLoopTable[lnumPar].lpParent;
+ }
+
+ bool allNonLoopPredsDone = true;
+ for (flowList* pred = m_comp->BlockPredsWithEH(cand); pred != nullptr; pred = pred->flNext)
+ {
+ BasicBlock* predBlock = pred->flBlock;
+ if (!m_comp->optLoopTable[lnum].lpContains(predBlock))
+ {
+ if (!GetVisitBit(predBlock->bbNum, BVB_complete))
+ {
+ allNonLoopPredsDone = false;
+ }
+ }
+ }
+ if (allNonLoopPredsDone)
+ {
+ return cand;
+ }
+ }
+ }
+
+ // If we didn't find a loop entry block with all non-loop preds done above, then return a random member (if
+ // there is one).
+ if (m_toDoNotAllPredsDone.Size() == 0)
+ {
+ return nullptr;
+ }
+ else
+ {
+ return m_toDoNotAllPredsDone.Pop();
+ }
+ }
+
+// Debugging output that is too detailed for a normal JIT dump...
+#define DEBUG_VN_VISIT 0
+
+ // Record that "blk" has been visited, and add any unvisited successors of "blk" to the appropriate todo set.
+ void FinishVisit(BasicBlock* blk)
+ {
+#ifdef DEBUG_VN_VISIT
+ JITDUMP("finish(BB%02u).\n", blk->bbNum);
+#endif // DEBUG_VN_VISIT
+
+ SetVisitBit(blk->bbNum, BVB_complete);
+
+ AllSuccessorIter succsEnd = blk->GetAllSuccs(m_comp).end();
+ for (AllSuccessorIter succs = blk->GetAllSuccs(m_comp).begin(); succs != succsEnd; ++succs)
+ {
+ BasicBlock* succ = (*succs);
+#ifdef DEBUG_VN_VISIT
+ JITDUMP(" Succ(BB%02u).\n", succ->bbNum);
+#endif // DEBUG_VN_VISIT
+
+ if (GetVisitBit(succ->bbNum, BVB_complete))
+ {
+ continue;
+ }
+#ifdef DEBUG_VN_VISIT
+ JITDUMP(" Not yet completed.\n");
+#endif // DEBUG_VN_VISIT
+
+ bool allPredsVisited = true;
+ for (flowList* pred = m_comp->BlockPredsWithEH(succ); pred != nullptr; pred = pred->flNext)
+ {
+ BasicBlock* predBlock = pred->flBlock;
+ if (!GetVisitBit(predBlock->bbNum, BVB_complete))
+ {
+ allPredsVisited = false;
+ break;
+ }
+ }
+
+ if (allPredsVisited)
+ {
+#ifdef DEBUG_VN_VISIT
+ JITDUMP(" All preds complete, adding to allDone.\n");
+#endif // DEBUG_VN_VISIT
+
+ assert(!GetVisitBit(succ->bbNum, BVB_onAllDone)); // Only last completion of last succ should add to
+ // this.
+ m_toDoAllPredsDone.Push(succ);
+ SetVisitBit(succ->bbNum, BVB_onAllDone);
+ }
+ else
+ {
+#ifdef DEBUG_VN_VISIT
+ JITDUMP(" Not all preds complete Adding to notallDone, if necessary...\n");
+#endif // DEBUG_VN_VISIT
+
+ if (!GetVisitBit(succ->bbNum, BVB_onNotAllDone))
+ {
+#ifdef DEBUG_VN_VISIT
+ JITDUMP(" Was necessary.\n");
+#endif // DEBUG_VN_VISIT
+ m_toDoNotAllPredsDone.Push(succ);
+ SetVisitBit(succ->bbNum, BVB_onNotAllDone);
+ }
+ }
+ }
+ }
+
+ bool ToDoExists()
+ {
+ return m_toDoAllPredsDone.Size() > 0 || m_toDoNotAllPredsDone.Size() > 0;
+ }
+};
+
+void Compiler::fgValueNumber()
+{
+#ifdef DEBUG
+ // This could be a JITDUMP, but some people find it convenient to set a breakpoint on the printf.
+ if (verbose)
+ {
+ printf("\n*************** In fgValueNumber()\n");
+ }
+#endif
+
+ // If we skipped SSA, skip VN as well.
+ if (fgSsaPassesCompleted == 0)
+ {
+ return;
+ }
+
+ // Allocate the value number store.
+ assert(fgVNPassesCompleted > 0 || vnStore == nullptr);
+ if (fgVNPassesCompleted == 0)
+ {
+ CompAllocator* allocator = new (this, CMK_ValueNumber) CompAllocator(this, CMK_ValueNumber);
+ vnStore = new (this, CMK_ValueNumber) ValueNumStore(this, allocator);
+ }
+ else
+ {
+ ValueNumPair noVnp;
+ // Make sure the heap SSA names have no value numbers.
+ for (unsigned i = 0; i < lvHeapNumSsaNames; i++)
+ {
+ lvHeapPerSsaData.GetRef(i).m_vnPair = noVnp;
+ }
+ for (BasicBlock* blk = fgFirstBB; blk != nullptr; blk = blk->bbNext)
+ {
+ // Now iterate over the block's statements, and their trees.
+ for (GenTreePtr stmts = blk->FirstNonPhiDef(); stmts != nullptr; stmts = stmts->gtNext)
+ {
+ assert(stmts->IsStatement());
+ for (GenTreePtr tree = stmts->gtStmt.gtStmtList; tree; tree = tree->gtNext)
+ {
+ tree->gtVNPair.SetBoth(ValueNumStore::NoVN);
+ }
+ }
+ }
+ }
+
+ // Compute the side effects of loops.
+ optComputeLoopSideEffects();
+
+ // At the block level, we will use a modified worklist algorithm. We will have two
+ // "todo" sets of unvisited blocks. Blocks (other than the entry block) are put in a
+ // todo set only when some predecessor has been visited, so all blocks have at least one
+ // predecessor visited. The distinction between the two sets is whether *all* predecessors have
+ // already been visited. We visit such blocks preferentially if they exist, since phi definitions
+ // in such blocks will have all arguments defined, enabling a simplification in the case that all
+ // arguments to the phi have the same VN. If no such blocks exist, we pick a block with at least
+ // one unvisited predecessor. In this case, we assign a new VN for phi definitions.
+
+ // Start by giving incoming arguments value numbers.
+ // Also give must-init vars a zero of their type.
+ for (unsigned i = 0; i < lvaCount; i++)
+ {
+ LclVarDsc* varDsc = &lvaTable[i];
+ if (varDsc->lvIsParam)
+ {
+ // We assume that code equivalent to this variable initialization loop
+ // has been performed when doing SSA naming, so that all the variables we give
+ // initial VNs to here have been given initial SSA definitions there.
+ // SSA numbers always start from FIRST_SSA_NUM, and we give the value number to SSA name FIRST_SSA_NUM.
+ // We use the VNF_InitVal(i) from here so we know that this value is loop-invariant
+ // in all loops.
+ ValueNum initVal = vnStore->VNForFunc(varDsc->TypeGet(), VNF_InitVal, vnStore->VNForIntCon(i));
+ LclSsaVarDsc* ssaDef = varDsc->GetPerSsaData(SsaConfig::FIRST_SSA_NUM);
+ ssaDef->m_vnPair.SetBoth(initVal);
+ ssaDef->m_defLoc.m_blk = fgFirstBB;
+ }
+ else if (info.compInitMem || varDsc->lvMustInit ||
+ (varDsc->lvTracked && VarSetOps::IsMember(this, fgFirstBB->bbLiveIn, varDsc->lvVarIndex)))
+ {
+ // The last clause covers the use-before-def variables (the ones that are live-in to the the first block),
+ // these are variables that are read before being initialized (at least on some control flow paths)
+ // if they are not must-init, then they get VNF_InitVal(i), as with the param case.)
+
+ bool isZeroed = (info.compInitMem || varDsc->lvMustInit);
+ ValueNum initVal = ValueNumStore::NoVN; // We must assign a new value to initVal
+ var_types typ = varDsc->TypeGet();
+
+ switch (typ)
+ {
+ case TYP_LCLBLK: // The outgoing args area for arm and x64
+ case TYP_BLK: // A blob of memory
+ // TYP_BLK is used for the EHSlots LclVar on x86 (aka shadowSPslotsVar)
+ // and for the lvaInlinedPInvokeFrameVar on x64, arm and x86
+ // The stack associated with these LclVars are not zero initialized
+ // thus we set 'initVN' to a new, unique VN.
+ //
+ initVal = vnStore->VNForExpr(fgFirstBB);
+ break;
+
+ case TYP_BYREF:
+ if (isZeroed)
+ {
+ // LclVars of TYP_BYREF can be zero-inited.
+ initVal = vnStore->VNForByrefCon(0);
+ }
+ else
+ {
+ // Here we have uninitialized TYP_BYREF
+ initVal = vnStore->VNForFunc(typ, VNF_InitVal, vnStore->VNForIntCon(i));
+ }
+ break;
+
+ default:
+ if (isZeroed)
+ {
+ // By default we will zero init these LclVars
+ initVal = vnStore->VNZeroForType(typ);
+ }
+ else
+ {
+ initVal = vnStore->VNForFunc(typ, VNF_InitVal, vnStore->VNForIntCon(i));
+ }
+ break;
+ }
+#ifdef _TARGET_X86_
+ bool isVarargParam = (i == lvaVarargsBaseOfStkArgs || i == lvaVarargsHandleArg);
+ if (isVarargParam)
+ initVal = vnStore->VNForExpr(fgFirstBB); // a new, unique VN.
+#endif
+ assert(initVal != ValueNumStore::NoVN);
+
+ LclSsaVarDsc* ssaDef = varDsc->GetPerSsaData(SsaConfig::FIRST_SSA_NUM);
+ ssaDef->m_vnPair.SetBoth(initVal);
+ ssaDef->m_defLoc.m_blk = fgFirstBB;
+ }
+ }
+ // Give "Heap" an initial value number (about which we know nothing).
+ ValueNum heapInitVal = vnStore->VNForFunc(TYP_REF, VNF_InitVal, vnStore->VNForIntCon(-1)); // Use -1 for the heap.
+ GetHeapPerSsaData(SsaConfig::FIRST_SSA_NUM)->m_vnPair.SetBoth(heapInitVal);
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Heap Initial Value in BB01 is: " STR_VN "%x\n", heapInitVal);
+ }
+#endif // DEBUG
+
+ ValueNumberState vs(this);
+
+ // Push the first block. This has no preds.
+ vs.m_toDoAllPredsDone.Push(fgFirstBB);
+
+ while (vs.ToDoExists())
+ {
+ while (vs.m_toDoAllPredsDone.Size() > 0)
+ {
+ BasicBlock* toDo = vs.m_toDoAllPredsDone.Pop();
+ fgValueNumberBlock(toDo, /*newVNsForPhis*/ false);
+ // Record that we've visited "toDo", and add successors to the right sets.
+ vs.FinishVisit(toDo);
+ }
+ // OK, we've run out of blocks whose predecessors are done. Pick one whose predecessors are not all done,
+ // process that. This may make more "all-done" blocks, so we'll go around the outer loop again --
+ // note that this is an "if", not a "while" loop.
+ if (vs.m_toDoNotAllPredsDone.Size() > 0)
+ {
+ BasicBlock* toDo = vs.ChooseFromNotAllPredsDone();
+ if (toDo == nullptr)
+ {
+ continue; // We may have run out, because of completed blocks on the not-all-preds done list.
+ }
+
+ fgValueNumberBlock(toDo, /*newVNsForPhis*/ true);
+ // Record that we've visited "toDo", and add successors to the right sest.
+ vs.FinishVisit(toDo);
+ }
+ }
+
+#ifdef DEBUG
+ JitTestCheckVN();
+#endif // DEBUG
+
+ fgVNPassesCompleted++;
+}
+
+void Compiler::fgValueNumberBlock(BasicBlock* blk, bool newVNsForPhis)
+{
+ compCurBB = blk;
+
+#ifdef DEBUG
+ compCurStmtNum = blk->bbStmtNum - 1; // Set compCurStmtNum
+#endif
+
+ unsigned outerLoopNum = BasicBlock::NOT_IN_LOOP;
+
+ // First: visit phi's. If "newVNForPhis", give them new VN's. If not,
+ // first check to see if all phi args have the same value.
+ GenTreePtr firstNonPhi = blk->FirstNonPhiDef();
+ for (GenTreePtr phiDefs = blk->bbTreeList; phiDefs != firstNonPhi; phiDefs = phiDefs->gtNext)
+ {
+ // TODO-Cleanup: It has been proposed that we should have an IsPhiDef predicate. We would use it
+ // in Block::FirstNonPhiDef as well.
+ GenTreePtr phiDef = phiDefs->gtStmt.gtStmtExpr;
+ assert(phiDef->OperGet() == GT_ASG);
+ GenTreeLclVarCommon* newSsaVar = phiDef->gtOp.gtOp1->AsLclVarCommon();
+
+ ValueNumPair phiAppVNP;
+ ValueNumPair sameVNPair;
+
+ GenTreePtr phiFunc = phiDef->gtOp.gtOp2;
+
+ // At this point a GT_PHI node should never have a nullptr for gtOp1
+ // and the gtOp1 should always be a GT_LIST node.
+ GenTreePtr phiOp1 = phiFunc->gtOp.gtOp1;
+ noway_assert(phiOp1 != nullptr);
+ noway_assert(phiOp1->OperGet() == GT_LIST);
+
+ GenTreeArgList* phiArgs = phiFunc->gtOp.gtOp1->AsArgList();
+
+ // A GT_PHI node should have more than one argument.
+ noway_assert(phiArgs->Rest() != nullptr);
+
+ GenTreeLclVarCommon* phiArg = phiArgs->Current()->AsLclVarCommon();
+ phiArgs = phiArgs->Rest();
+
+ phiAppVNP.SetBoth(vnStore->VNForIntCon(phiArg->gtSsaNum));
+ bool allSameLib = true;
+ bool allSameCons = true;
+ sameVNPair = lvaTable[phiArg->gtLclNum].GetPerSsaData(phiArg->gtSsaNum)->m_vnPair;
+ if (!sameVNPair.BothDefined())
+ {
+ allSameLib = false;
+ allSameCons = false;
+ }
+ while (phiArgs != nullptr)
+ {
+ phiArg = phiArgs->Current()->AsLclVarCommon();
+ // Set the VN of the phi arg.
+ phiArg->gtVNPair = lvaTable[phiArg->gtLclNum].GetPerSsaData(phiArg->gtSsaNum)->m_vnPair;
+ if (phiArg->gtVNPair.BothDefined())
+ {
+ if (phiArg->gtVNPair.GetLiberal() != sameVNPair.GetLiberal())
+ {
+ allSameLib = false;
+ }
+ if (phiArg->gtVNPair.GetConservative() != sameVNPair.GetConservative())
+ {
+ allSameCons = false;
+ }
+ }
+ else
+ {
+ allSameLib = false;
+ allSameCons = false;
+ }
+ ValueNumPair phiArgSsaVNP;
+ phiArgSsaVNP.SetBoth(vnStore->VNForIntCon(phiArg->gtSsaNum));
+ phiAppVNP = vnStore->VNPairForFunc(newSsaVar->TypeGet(), VNF_Phi, phiArgSsaVNP, phiAppVNP);
+ phiArgs = phiArgs->Rest();
+ }
+
+ ValueNumPair newVNPair;
+ if (allSameLib)
+ {
+ newVNPair.SetLiberal(sameVNPair.GetLiberal());
+ }
+ else
+ {
+ newVNPair.SetLiberal(phiAppVNP.GetLiberal());
+ }
+ if (allSameCons)
+ {
+ newVNPair.SetConservative(sameVNPair.GetConservative());
+ }
+ else
+ {
+ newVNPair.SetConservative(phiAppVNP.GetConservative());
+ }
+
+ LclSsaVarDsc* newSsaVarDsc = lvaTable[newSsaVar->gtLclNum].GetPerSsaData(newSsaVar->GetSsaNum());
+ // If all the args of the phi had the same value(s, liberal and conservative), then there wasn't really
+ // a reason to have the phi -- just pass on that value.
+ if (allSameLib && allSameCons)
+ {
+ newSsaVarDsc->m_vnPair = newVNPair;
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("In SSA definition, incoming phi args all same, set VN of local %d/%d to ",
+ newSsaVar->GetLclNum(), newSsaVar->GetSsaNum());
+ vnpPrint(newVNPair, 1);
+ printf(".\n");
+ }
+#endif // DEBUG
+ }
+ else
+ {
+ // They were not the same; we need to create a phi definition.
+ ValueNumPair lclNumVNP;
+ lclNumVNP.SetBoth(ValueNum(newSsaVar->GetLclNum()));
+ ValueNumPair ssaNumVNP;
+ ssaNumVNP.SetBoth(ValueNum(newSsaVar->GetSsaNum()));
+ ValueNumPair vnPhiDef =
+ vnStore->VNPairForFunc(newSsaVar->TypeGet(), VNF_PhiDef, lclNumVNP, ssaNumVNP, phiAppVNP);
+ newSsaVarDsc->m_vnPair = vnPhiDef;
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("SSA definition: set VN of local %d/%d to ", newSsaVar->GetLclNum(), newSsaVar->GetSsaNum());
+ vnpPrint(vnPhiDef, 1);
+ printf(".\n");
+ }
+#endif // DEBUG
+ }
+ }
+
+ // Now do the same for "Heap".
+ // Is there a phi for this block?
+ if (blk->bbHeapSsaPhiFunc == nullptr)
+ {
+ fgCurHeapVN = GetHeapPerSsaData(blk->bbHeapSsaNumIn)->m_vnPair.GetLiberal();
+ assert(fgCurHeapVN != ValueNumStore::NoVN);
+ }
+ else
+ {
+ unsigned loopNum;
+ ValueNum newHeapVN;
+ if (optBlockIsLoopEntry(blk, &loopNum))
+ {
+ newHeapVN = fgHeapVNForLoopSideEffects(blk, loopNum);
+ }
+ else
+ {
+ // Are all the VN's the same?
+ BasicBlock::HeapPhiArg* phiArgs = blk->bbHeapSsaPhiFunc;
+ assert(phiArgs != BasicBlock::EmptyHeapPhiDef);
+ // There should be > 1 args to a phi.
+ assert(phiArgs->m_nextArg != nullptr);
+ ValueNum phiAppVN = vnStore->VNForIntCon(phiArgs->GetSsaNum());
+ JITDUMP(" Building phi application: $%x = SSA# %d.\n", phiAppVN, phiArgs->GetSsaNum());
+ bool allSame = true;
+ ValueNum sameVN = GetHeapPerSsaData(phiArgs->GetSsaNum())->m_vnPair.GetLiberal();
+ if (sameVN == ValueNumStore::NoVN)
+ {
+ allSame = false;
+ }
+ phiArgs = phiArgs->m_nextArg;
+ while (phiArgs != nullptr)
+ {
+ ValueNum phiArgVN = GetHeapPerSsaData(phiArgs->GetSsaNum())->m_vnPair.GetLiberal();
+ if (phiArgVN == ValueNumStore::NoVN || phiArgVN != sameVN)
+ {
+ allSame = false;
+ }
+#ifdef DEBUG
+ ValueNum oldPhiAppVN = phiAppVN;
+#endif
+ unsigned phiArgSSANum = phiArgs->GetSsaNum();
+ ValueNum phiArgSSANumVN = vnStore->VNForIntCon(phiArgSSANum);
+ JITDUMP(" Building phi application: $%x = SSA# %d.\n", phiArgSSANumVN, phiArgSSANum);
+ phiAppVN = vnStore->VNForFunc(TYP_REF, VNF_Phi, phiArgSSANumVN, phiAppVN);
+ JITDUMP(" Building phi application: $%x = phi($%x, $%x).\n", phiAppVN, phiArgSSANumVN, oldPhiAppVN);
+ phiArgs = phiArgs->m_nextArg;
+ }
+ if (allSame)
+ {
+ newHeapVN = sameVN;
+ }
+ else
+ {
+ newHeapVN =
+ vnStore->VNForFunc(TYP_REF, VNF_PhiHeapDef, vnStore->VNForHandle(ssize_t(blk), 0), phiAppVN);
+ }
+ }
+ GetHeapPerSsaData(blk->bbHeapSsaNumIn)->m_vnPair.SetLiberal(newHeapVN);
+ fgCurHeapVN = newHeapVN;
+ }
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("The SSA definition for heap (#%d) at start of BB%02u is ", blk->bbHeapSsaNumIn, blk->bbNum);
+ vnPrint(fgCurHeapVN, 1);
+ printf("\n");
+ }
+#endif // DEBUG
+
+ // Now iterate over the remaining statements, and their trees.
+ for (GenTreePtr stmt = firstNonPhi; stmt != nullptr; stmt = stmt->gtNext)
+ {
+ assert(stmt->IsStatement());
+
+#ifdef DEBUG
+ compCurStmtNum++;
+ if (verbose)
+ {
+ printf("\n***** BB%02u, stmt %d (before)\n", blk->bbNum, compCurStmtNum);
+ gtDispTree(stmt->gtStmt.gtStmtExpr);
+ printf("\n");
+ }
+#endif
+
+ for (GenTreePtr tree = stmt->gtStmt.gtStmtList; tree; tree = tree->gtNext)
+ {
+ fgValueNumberTree(tree);
+ }
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\n***** BB%02u, stmt %d (after)\n", blk->bbNum, compCurStmtNum);
+ gtDispTree(stmt->gtStmt.gtStmtExpr);
+ printf("\n");
+ if (stmt->gtNext)
+ {
+ printf("---------\n");
+ }
+ }
+#endif
+ }
+
+ if (blk->bbHeapSsaNumOut != blk->bbHeapSsaNumIn)
+ {
+ GetHeapPerSsaData(blk->bbHeapSsaNumOut)->m_vnPair.SetLiberal(fgCurHeapVN);
+ }
+
+ compCurBB = nullptr;
+}
+
+ValueNum Compiler::fgHeapVNForLoopSideEffects(BasicBlock* entryBlock, unsigned innermostLoopNum)
+{
+ // "loopNum" is the innermost loop for which "blk" is the entry; find the outermost one.
+ assert(innermostLoopNum != BasicBlock::NOT_IN_LOOP);
+ unsigned loopsInNest = innermostLoopNum;
+ unsigned loopNum = innermostLoopNum;
+ while (loopsInNest != BasicBlock::NOT_IN_LOOP)
+ {
+ if (optLoopTable[loopsInNest].lpEntry != entryBlock)
+ {
+ break;
+ }
+ loopNum = loopsInNest;
+ loopsInNest = optLoopTable[loopsInNest].lpParent;
+ }
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Computing heap state for block BB%02u, entry block for loops %d to %d:\n", entryBlock->bbNum,
+ innermostLoopNum, loopNum);
+ }
+#endif // DEBUG
+
+ // If this loop has heap havoc effects, just use a new, unique VN.
+ if (optLoopTable[loopNum].lpLoopHasHeapHavoc)
+ {
+ ValueNum res = vnStore->VNForExpr(entryBlock, TYP_REF);
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf(" Loop %d has heap havoc effect; heap state is new fresh $%x.\n", loopNum, res);
+ }
+#endif // DEBUG
+ return res;
+ }
+
+ // Otherwise, find the predecessors of the entry block that are not in the loop.
+ // If there is only one such, use its heap value as the "base." If more than one,
+ // use a new unique heap VN.
+ BasicBlock* nonLoopPred = nullptr;
+ bool multipleNonLoopPreds = false;
+ for (flowList* pred = BlockPredsWithEH(entryBlock); pred != nullptr; pred = pred->flNext)
+ {
+ BasicBlock* predBlock = pred->flBlock;
+ if (!optLoopTable[loopNum].lpContains(predBlock))
+ {
+ if (nonLoopPred == nullptr)
+ {
+ nonLoopPred = predBlock;
+ }
+ else
+ {
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf(" Entry block has >1 non-loop preds: (at least) BB%02u and BB%02u.\n", nonLoopPred->bbNum,
+ predBlock->bbNum);
+ }
+#endif // DEBUG
+ multipleNonLoopPreds = true;
+ break;
+ }
+ }
+ }
+ if (multipleNonLoopPreds)
+ {
+ ValueNum res = vnStore->VNForExpr(entryBlock, TYP_REF);
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf(" Therefore, heap state is new, fresh $%x.\n", res);
+ }
+#endif // DEBUG
+ return res;
+ }
+ // Otherwise, there is a single non-loop pred.
+ assert(nonLoopPred != nullptr);
+ // What is it's heap post-state?
+ ValueNum newHeapVN = GetHeapPerSsaData(nonLoopPred->bbHeapSsaNumOut)->m_vnPair.GetLiberal();
+ assert(newHeapVN !=
+ ValueNumStore::NoVN); // We must have processed the single non-loop pred before reaching the loop entry.
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf(" Init heap state is $%x, with new, fresh VN at:\n", newHeapVN);
+ }
+#endif // DEBUG
+ // Modify "base" by setting all the modified fields/field maps/array maps to unknown values.
+ // First the fields/field maps.
+
+ Compiler::LoopDsc::FieldHandleSet* fieldsMod = optLoopTable[loopNum].lpFieldsModified;
+ if (fieldsMod != nullptr)
+ {
+ for (Compiler::LoopDsc::FieldHandleSet::KeyIterator ki = fieldsMod->Begin(); !ki.Equal(fieldsMod->End()); ++ki)
+ {
+ CORINFO_FIELD_HANDLE fldHnd = ki.Get();
+ ValueNum fldHndVN = vnStore->VNForHandle(ssize_t(fldHnd), GTF_ICON_FIELD_HDL);
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ const char* modName;
+ const char* fldName = eeGetFieldName(fldHnd, &modName);
+ printf(" VNForHandle(Fseq[%s]) is " STR_VN "%x\n", fldName, fldHndVN);
+
+ printf(" fgCurHeapVN assigned:\n");
+ }
+#endif // DEBUG
+
+ newHeapVN = vnStore->VNForMapStore(TYP_REF, newHeapVN, fldHndVN, vnStore->VNForExpr(entryBlock, TYP_REF));
+ }
+ }
+ // Now do the array maps.
+ Compiler::LoopDsc::ClassHandleSet* elemTypesMod = optLoopTable[loopNum].lpArrayElemTypesModified;
+ if (elemTypesMod != nullptr)
+ {
+ for (Compiler::LoopDsc::ClassHandleSet::KeyIterator ki = elemTypesMod->Begin(); !ki.Equal(elemTypesMod->End());
+ ++ki)
+ {
+ CORINFO_CLASS_HANDLE elemClsHnd = ki.Get();
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ var_types elemTyp = DecodeElemType(elemClsHnd);
+ if (varTypeIsStruct(elemTyp))
+ {
+ printf(" Array map %s[]\n", eeGetClassName(elemClsHnd));
+ }
+ else
+ {
+ printf(" Array map %s[]\n", varTypeName(elemTyp));
+ }
+ printf(" fgCurHeapVN assigned:\n");
+ }
+#endif // DEBUG
+
+ ValueNum elemTypeVN = vnStore->VNForHandle(ssize_t(elemClsHnd), GTF_ICON_CLASS_HDL);
+ ValueNum uniqueVN = vnStore->VNForExpr(entryBlock, TYP_REF);
+ newHeapVN = vnStore->VNForMapStore(TYP_REF, newHeapVN, elemTypeVN, uniqueVN);
+ }
+ }
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf(" Final heap state is $%x.\n", newHeapVN);
+ }
+#endif // DEBUG
+ return newHeapVN;
+}
+
+void Compiler::fgMutateHeap(GenTreePtr tree DEBUGARG(const char* msg))
+{
+ // bbHeapDef must be set to true for any block that Mutates the global Heap
+ assert(compCurBB->bbHeapDef);
+
+ fgCurHeapVN = vnStore->VNForExpr(compCurBB, TYP_REF);
+
+ // If we're tracking the heap SSA # caused by this node, record it.
+ fgValueNumberRecordHeapSsa(tree);
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf(" fgCurHeapVN assigned by %s at ", msg);
+ Compiler::printTreeID(tree);
+ printf(" to new unique VN: " STR_VN "%x.\n", fgCurHeapVN);
+ }
+#endif // DEBUG
+}
+
+void Compiler::fgValueNumberRecordHeapSsa(GenTreePtr tree)
+{
+ unsigned ssaNum;
+ if (GetHeapSsaMap()->Lookup(tree, &ssaNum))
+ {
+ GetHeapPerSsaData(ssaNum)->m_vnPair.SetLiberal(fgCurHeapVN);
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Node ");
+ Compiler::printTreeID(tree);
+ printf(" sets heap SSA # %d to VN $%x: ", ssaNum, fgCurHeapVN);
+ vnStore->vnDump(this, fgCurHeapVN);
+ printf("\n");
+ }
+#endif // DEBUG
+ }
+}
+
+// The input 'tree' is a leaf node that is a constant
+// Assign the proper value number to the tree
+void Compiler::fgValueNumberTreeConst(GenTreePtr tree)
+{
+ genTreeOps oper = tree->OperGet();
+ var_types typ = tree->TypeGet();
+ assert(GenTree::OperIsConst(oper));
+
+ switch (typ)
+ {
+ case TYP_LONG:
+ case TYP_ULONG:
+ case TYP_INT:
+ case TYP_UINT:
+ case TYP_CHAR:
+ case TYP_SHORT:
+ case TYP_BYTE:
+ case TYP_UBYTE:
+ case TYP_BOOL:
+ if (tree->IsCnsIntOrI() && tree->IsIconHandle())
+ {
+ tree->gtVNPair.SetBoth(
+ vnStore->VNForHandle(ssize_t(tree->gtIntConCommon.IconValue()), tree->GetIconHandleFlag()));
+ }
+ else if ((typ == TYP_LONG) || (typ == TYP_ULONG))
+ {
+ tree->gtVNPair.SetBoth(vnStore->VNForLongCon(INT64(tree->gtIntConCommon.LngValue())));
+ }
+ else
+ {
+ tree->gtVNPair.SetBoth(vnStore->VNForIntCon(int(tree->gtIntConCommon.IconValue())));
+ }
+ break;
+
+ case TYP_FLOAT:
+ tree->gtVNPair.SetBoth(vnStore->VNForFloatCon((float)tree->gtDblCon.gtDconVal));
+ break;
+ case TYP_DOUBLE:
+ tree->gtVNPair.SetBoth(vnStore->VNForDoubleCon(tree->gtDblCon.gtDconVal));
+ break;
+ case TYP_REF:
+ // Null is the only constant. (Except maybe for String?)
+ tree->gtVNPair.SetBoth(ValueNumStore::VNForNull());
+ break;
+
+ case TYP_BYREF:
+ if (tree->gtIntConCommon.IconValue() == 0)
+ {
+ tree->gtVNPair.SetBoth(ValueNumStore::VNForNull());
+ }
+ else
+ {
+ assert(tree->IsCnsIntOrI());
+
+ if (tree->IsIconHandle())
+ {
+ tree->gtVNPair.SetBoth(
+ vnStore->VNForHandle(ssize_t(tree->gtIntConCommon.IconValue()), tree->GetIconHandleFlag()));
+ }
+ else
+ {
+ tree->gtVNPair.SetBoth(vnStore->VNForByrefCon(tree->gtIntConCommon.IconValue()));
+ }
+ }
+ break;
+
+ default:
+ unreached();
+ }
+}
+
+//------------------------------------------------------------------------
+// fgValueNumberBlockAssignment: Perform value numbering for block assignments.
+//
+// Arguments:
+// tree - the block assignment to be value numbered.
+// evalAsgLhsInd - true iff we should value number the LHS of the assignment.
+//
+// Return Value:
+// None.
+//
+// Assumptions:
+// 'tree' must be a block assignment (GT_INITBLK, GT_COPYBLK, GT_COPYOBJ).
+
+void Compiler::fgValueNumberBlockAssignment(GenTreePtr tree, bool evalAsgLhsInd)
+{
+ GenTree* lhs = tree->gtGetOp1();
+ GenTree* rhs = tree->gtGetOp2();
+#ifdef DEBUG
+ // Sometimes we query the heap ssa map, and need a dummy location for the ignored result.
+ unsigned heapSsaNum;
+#endif
+
+ if (tree->OperIsInitBlkOp())
+ {
+ GenTreeLclVarCommon* lclVarTree;
+ bool isEntire;
+
+ if (tree->DefinesLocal(this, &lclVarTree, &isEntire))
+ {
+ assert(lclVarTree->gtFlags & GTF_VAR_DEF);
+ // Should not have been recorded as updating the heap.
+ assert(!GetHeapSsaMap()->Lookup(tree, &heapSsaNum));
+
+ unsigned lclNum = lclVarTree->GetLclNum();
+
+ // Ignore vars that we excluded from SSA (for example, because they're address-exposed). They don't have
+ // SSA names in which to store VN's on defs. We'll yield unique VN's when we read from them.
+ if (!fgExcludeFromSsa(lclNum))
+ {
+ unsigned lclDefSsaNum = GetSsaNumForLocalVarDef(lclVarTree);
+
+ ValueNum initBlkVN = ValueNumStore::NoVN;
+ GenTreePtr initConst = rhs;
+ if (isEntire && initConst->OperGet() == GT_CNS_INT)
+ {
+ unsigned initVal = 0xFF & (unsigned)initConst->AsIntConCommon()->IconValue();
+ if (initVal == 0)
+ {
+ initBlkVN = vnStore->VNZeroForType(lclVarTree->TypeGet());
+ }
+ }
+ ValueNum lclVarVN = (initBlkVN != ValueNumStore::NoVN)
+ ? initBlkVN
+ : vnStore->VNForExpr(compCurBB, var_types(lvaTable[lclNum].lvType));
+
+ lvaTable[lclNum].GetPerSsaData(lclDefSsaNum)->m_vnPair.SetBoth(lclVarVN);
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("N%03u ", tree->gtSeqNum);
+ Compiler::printTreeID(tree);
+ printf(" ");
+ gtDispNodeName(tree);
+ printf(" V%02u/%d => ", lclNum, lclDefSsaNum);
+ vnPrint(lclVarVN, 1);
+ printf("\n");
+ }
+#endif // DEBUG
+ }
+ // Initblock's are of type void. Give them the void "value" -- they may occur in argument lists, which we
+ // want to be able to give VN's to.
+ tree->gtVNPair.SetBoth(ValueNumStore::VNForVoid());
+ }
+ else
+ {
+ // For now, arbitrary side effect on Heap.
+ // TODO-CQ: Why not be complete, and get this case right?
+ fgMutateHeap(tree DEBUGARG("INITBLK - non local"));
+ }
+ }
+ else
+ {
+ assert(tree->OperIsCopyBlkOp());
+ // TODO-Cleanup: We should factor things so that we uniformly rely on "PtrTo" VN's, and
+ // the heap cases can be shared with assignments.
+ GenTreeLclVarCommon* lclVarTree = nullptr;
+ bool isEntire = false;
+ // Note that we don't care about exceptions here, since we're only using the values
+ // to perform an assignment (which happens after any exceptions are raised...)
+
+ if (tree->DefinesLocal(this, &lclVarTree, &isEntire))
+ {
+ // Should not have been recorded as updating the heap.
+ assert(!GetHeapSsaMap()->Lookup(tree, &heapSsaNum));
+
+ unsigned lhsLclNum = lclVarTree->GetLclNum();
+ FieldSeqNode* lhsFldSeq = nullptr;
+ // If it's excluded from SSA, don't need to do anything.
+ if (!fgExcludeFromSsa(lhsLclNum))
+ {
+ unsigned lclDefSsaNum = GetSsaNumForLocalVarDef(lclVarTree);
+
+ if (lhs->IsLocalExpr(this, &lclVarTree, &lhsFldSeq) ||
+ (lhs->OperIsBlk() && (lhs->AsBlk()->gtBlkSize == lvaLclSize(lhsLclNum))))
+ {
+ noway_assert(lclVarTree->gtLclNum == lhsLclNum);
+ }
+ else
+ {
+ GenTree* lhsAddr;
+ if (lhs->OperIsBlk())
+ {
+ lhsAddr = lhs->AsBlk()->Addr();
+ }
+ else
+ {
+ assert(lhs->OperGet() == GT_IND);
+ lhsAddr = lhs->gtOp.gtOp1;
+ }
+ // For addr-of-local expressions, lib/cons shouldn't matter.
+ assert(lhsAddr->gtVNPair.BothEqual());
+ ValueNum lhsAddrVN = lhsAddr->GetVN(VNK_Liberal);
+
+ // Unpack the PtrToLoc value number of the address.
+ assert(vnStore->IsVNFunc(lhsAddrVN));
+ VNFuncApp lhsAddrFuncApp;
+ vnStore->GetVNFunc(lhsAddrVN, &lhsAddrFuncApp);
+ assert(lhsAddrFuncApp.m_func == VNF_PtrToLoc);
+ assert(vnStore->IsVNConstant(lhsAddrFuncApp.m_args[0]) &&
+ vnStore->ConstantValue<unsigned>(lhsAddrFuncApp.m_args[0]) == lhsLclNum);
+ lhsFldSeq = vnStore->FieldSeqVNToFieldSeq(lhsAddrFuncApp.m_args[1]);
+ }
+
+ // Now we need to get the proper RHS.
+ GenTreeLclVarCommon* rhsLclVarTree = nullptr;
+ LclVarDsc* rhsVarDsc = nullptr;
+ FieldSeqNode* rhsFldSeq = nullptr;
+ ValueNumPair rhsVNPair;
+ bool isNewUniq = false;
+ if (!rhs->OperIsIndir())
+ {
+ if (rhs->IsLocalExpr(this, &rhsLclVarTree, &rhsFldSeq))
+ {
+ unsigned rhsLclNum = rhsLclVarTree->GetLclNum();
+ rhsVarDsc = &lvaTable[rhsLclNum];
+ if (fgExcludeFromSsa(rhsLclNum) || rhsFldSeq == FieldSeqStore::NotAField())
+ {
+ rhsVNPair.SetBoth(vnStore->VNForExpr(compCurBB, rhsLclVarTree->TypeGet()));
+ isNewUniq = true;
+ }
+ else
+ {
+ rhsVNPair = lvaTable[rhsLclVarTree->GetLclNum()]
+ .GetPerSsaData(rhsLclVarTree->GetSsaNum())
+ ->m_vnPair;
+ var_types indType = rhsLclVarTree->TypeGet();
+
+ rhsVNPair = vnStore->VNPairApplySelectors(rhsVNPair, rhsFldSeq, indType);
+ }
+ }
+ else
+ {
+ rhsVNPair.SetBoth(vnStore->VNForExpr(compCurBB, rhs->TypeGet()));
+ isNewUniq = true;
+ }
+ }
+ else
+ {
+ GenTreePtr srcAddr = rhs->AsIndir()->Addr();
+ VNFuncApp srcAddrFuncApp;
+ if (srcAddr->IsLocalAddrExpr(this, &rhsLclVarTree, &rhsFldSeq))
+ {
+ unsigned rhsLclNum = rhsLclVarTree->GetLclNum();
+ rhsVarDsc = &lvaTable[rhsLclNum];
+ if (fgExcludeFromSsa(rhsLclNum) || rhsFldSeq == FieldSeqStore::NotAField())
+ {
+ isNewUniq = true;
+ }
+ else
+ {
+ rhsVNPair = lvaTable[rhsLclVarTree->GetLclNum()]
+ .GetPerSsaData(rhsLclVarTree->GetSsaNum())
+ ->m_vnPair;
+ var_types indType = rhsLclVarTree->TypeGet();
+
+ rhsVNPair = vnStore->VNPairApplySelectors(rhsVNPair, rhsFldSeq, indType);
+ }
+ }
+ else if (vnStore->GetVNFunc(vnStore->VNNormVal(srcAddr->gtVNPair.GetLiberal()), &srcAddrFuncApp))
+ {
+ if (srcAddrFuncApp.m_func == VNF_PtrToStatic)
+ {
+ var_types indType = lclVarTree->TypeGet();
+ ValueNum fieldSeqVN = srcAddrFuncApp.m_args[0];
+
+ FieldSeqNode* zeroOffsetFldSeq = nullptr;
+ if (GetZeroOffsetFieldMap()->Lookup(srcAddr, &zeroOffsetFldSeq))
+ {
+ fieldSeqVN =
+ vnStore->FieldSeqVNAppend(fieldSeqVN, vnStore->VNForFieldSeq(zeroOffsetFldSeq));
+ }
+
+ FieldSeqNode* fldSeqForStaticVar = vnStore->FieldSeqVNToFieldSeq(fieldSeqVN);
+
+ if (fldSeqForStaticVar != FieldSeqStore::NotAField())
+ {
+ // We model statics as indices into the heap variable.
+ ValueNum selectedStaticVar;
+ size_t structSize = 0;
+ selectedStaticVar = vnStore->VNApplySelectors(VNK_Liberal, fgCurHeapVN,
+ fldSeqForStaticVar, &structSize);
+ selectedStaticVar =
+ vnStore->VNApplySelectorsTypeCheck(selectedStaticVar, indType, structSize);
+
+ rhsVNPair.SetLiberal(selectedStaticVar);
+ rhsVNPair.SetConservative(vnStore->VNForExpr(compCurBB, indType));
+ }
+ else
+ {
+ JITDUMP(" *** Missing field sequence info for Src/RHS of COPYBLK\n");
+ rhsVNPair.SetBoth(vnStore->VNForExpr(compCurBB, indType)); // a new unique value number
+ }
+ }
+ else if (srcAddrFuncApp.m_func == VNF_PtrToArrElem)
+ {
+ ValueNum elemLib =
+ fgValueNumberArrIndexVal(nullptr, &srcAddrFuncApp, vnStore->VNForEmptyExcSet());
+ rhsVNPair.SetLiberal(elemLib);
+ rhsVNPair.SetConservative(vnStore->VNForExpr(compCurBB, lclVarTree->TypeGet()));
+ }
+ else
+ {
+ isNewUniq = true;
+ }
+ }
+ else
+ {
+ isNewUniq = true;
+ }
+ }
+
+ if (lhsFldSeq == FieldSeqStore::NotAField())
+ {
+ // We don't have proper field sequence information for the lhs
+ //
+ JITDUMP(" *** Missing field sequence info for Dst/LHS of COPYBLK\n");
+ isNewUniq = true;
+ }
+ else if (lhsFldSeq != nullptr && isEntire)
+ {
+ // This can occur in for structs with one field, itself of a struct type.
+ // We won't promote these.
+ // TODO-Cleanup: decide what exactly to do about this.
+ // Always treat them as maps, making them use/def, or reconstitute the
+ // map view here?
+ isNewUniq = true;
+ }
+ else if (!isNewUniq)
+ {
+ ValueNumPair oldLhsVNPair = lvaTable[lhsLclNum].GetPerSsaData(lclVarTree->GetSsaNum())->m_vnPair;
+ rhsVNPair = vnStore->VNPairApplySelectorsAssign(oldLhsVNPair, lhsFldSeq, rhsVNPair,
+ lclVarTree->TypeGet(), compCurBB);
+ }
+
+ if (isNewUniq)
+ {
+ rhsVNPair.SetBoth(vnStore->VNForExpr(compCurBB, lclVarTree->TypeGet()));
+ }
+
+ lvaTable[lhsLclNum].GetPerSsaData(lclDefSsaNum)->m_vnPair = vnStore->VNPNormVal(rhsVNPair);
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Tree ");
+ Compiler::printTreeID(tree);
+ printf(" assigned VN to local var V%02u/%d: ", lhsLclNum, lclDefSsaNum);
+ if (isNewUniq)
+ {
+ printf("new uniq ");
+ }
+ vnpPrint(rhsVNPair, 1);
+ printf("\n");
+ }
+#endif // DEBUG
+ }
+ }
+ else
+ {
+ // For now, arbitrary side effect on Heap.
+ // TODO-CQ: Why not be complete, and get this case right?
+ fgMutateHeap(tree DEBUGARG("COPYBLK - non local"));
+ }
+ // Copyblock's are of type void. Give them the void "value" -- they may occur in argument lists, which we want
+ // to be able to give VN's to.
+ tree->gtVNPair.SetBoth(ValueNumStore::VNForVoid());
+ }
+}
+
+void Compiler::fgValueNumberTree(GenTreePtr tree, bool evalAsgLhsInd)
+{
+ genTreeOps oper = tree->OperGet();
+
+#ifdef FEATURE_SIMD
+ // TODO-CQ: For now TYP_SIMD values are not handled by value numbering to be amenable for CSE'ing.
+ if (oper == GT_SIMD)
+ {
+ tree->gtVNPair.SetBoth(vnStore->VNForExpr(compCurBB, TYP_UNKNOWN));
+ return;
+ }
+#endif
+
+ var_types typ = tree->TypeGet();
+ if (GenTree::OperIsConst(oper))
+ {
+ // If this is a struct assignment, with a constant rhs, it is an initBlk, and it is not
+ // really useful to value number the constant.
+ if (!varTypeIsStruct(tree))
+ {
+ fgValueNumberTreeConst(tree);
+ }
+ }
+ else if (GenTree::OperIsLeaf(oper))
+ {
+ switch (oper)
+ {
+ case GT_LCL_VAR:
+ case GT_REG_VAR:
+ {
+ GenTreeLclVarCommon* lcl = tree->AsLclVarCommon();
+ unsigned lclNum = lcl->gtLclNum;
+
+ if ((lcl->gtFlags & GTF_VAR_DEF) == 0 ||
+ (lcl->gtFlags & GTF_VAR_USEASG)) // If it is a "pure" def, will handled as part of the assignment.
+ {
+ LclVarDsc* varDsc = &lvaTable[lcl->gtLclNum];
+ if (varDsc->lvPromoted && varDsc->lvFieldCnt == 1)
+ {
+ // If the promoted var has only one field var, treat like a use of the field var.
+ lclNum = varDsc->lvFieldLclStart;
+ }
+
+ // Initialize to the undefined value, so we know whether we hit any of the cases here.
+ lcl->gtVNPair = ValueNumPair();
+
+ if (lcl->gtSsaNum == SsaConfig::RESERVED_SSA_NUM)
+ {
+ // Not an SSA variable. Assign each occurrence a new, unique, VN.
+ lcl->gtVNPair.SetBoth(vnStore->VNForExpr(compCurBB, lcl->TypeGet()));
+ }
+ else
+ {
+ var_types varType = varDsc->TypeGet();
+ ValueNumPair wholeLclVarVNP = varDsc->GetPerSsaData(lcl->gtSsaNum)->m_vnPair;
+
+ // Check for mismatched LclVar size
+ //
+ unsigned typSize = genTypeSize(genActualType(typ));
+ unsigned varSize = genTypeSize(genActualType(varType));
+
+ if (typSize == varSize)
+ {
+ lcl->gtVNPair = wholeLclVarVNP;
+ }
+ else // mismatched LclVar definition and LclVar use size
+ {
+ if (typSize < varSize)
+ {
+ // the indirection is reading less that the whole LclVar
+ // create a new VN that represent the partial value
+ //
+ ValueNumPair partialLclVarVNP = vnStore->VNPairForCast(wholeLclVarVNP, typ, varType);
+ lcl->gtVNPair = partialLclVarVNP;
+ }
+ else
+ {
+ assert(typSize > varSize);
+ // the indirection is reading beyond the end of the field
+ //
+ lcl->gtVNPair.SetBoth(vnStore->VNForExpr(compCurBB, typ)); // return a new unique value
+ // number
+ }
+ }
+ }
+ // Temporary, to make progress.
+ // TODO-CQ: This should become an assert again...
+ if (lcl->gtVNPair.GetLiberal() == ValueNumStore::NoVN)
+ {
+ assert(lcl->gtVNPair.GetConservative() == ValueNumStore::NoVN);
+
+ // We don't want to fabricate arbitrary value numbers to things we can't reason about.
+ // So far, we know about two of these cases:
+ // Case 1) We have a local var who has never been defined but it's seen as a use.
+ // This is the case of storeIndir(addr(lclvar)) = expr. In this case since we only
+ // take the address of the variable, this doesn't mean it's a use nor we have to
+ // initialize it, so in this very rare case, we fabricate a value number.
+ // Case 2) Local variables that represent structs which are assigned using CpBlk.
+ GenTree* nextNode = lcl->gtNext;
+ assert((nextNode->gtOper == GT_ADDR && nextNode->gtOp.gtOp1 == lcl) ||
+ varTypeIsStruct(lcl->TypeGet()));
+ lcl->gtVNPair.SetBoth(vnStore->VNForExpr(compCurBB, lcl->TypeGet()));
+ }
+ assert(lcl->gtVNPair.BothDefined());
+ }
+
+ // TODO-Review: For the short term, we have a workaround for copyblk/initblk. Those that use
+ // addrSpillTemp will have a statement like "addrSpillTemp = addr(local)." If we previously decided
+ // that this block operation defines the local, we will have labeled the "local" node as a DEF
+ // (or USEDEF). This flag propogates to the "local" on the RHS. So we'll assume that this is correct,
+ // and treat it as a def (to a new, unique VN).
+ else if ((lcl->gtFlags & GTF_VAR_DEF) != 0)
+ {
+ LclVarDsc* varDsc = &lvaTable[lcl->gtLclNum];
+ if (lcl->gtSsaNum != SsaConfig::RESERVED_SSA_NUM)
+ {
+ lvaTable[lclNum]
+ .GetPerSsaData(lcl->gtSsaNum)
+ ->m_vnPair.SetBoth(vnStore->VNForExpr(compCurBB, lcl->TypeGet()));
+ }
+ lcl->gtVNPair = ValueNumPair(); // Avoid confusion -- we don't set the VN of a lcl being defined.
+ }
+ }
+ break;
+
+ case GT_FTN_ADDR:
+ // Use the value of the function pointer (actually, a method handle.)
+ tree->gtVNPair.SetBoth(
+ vnStore->VNForHandle(ssize_t(tree->gtFptrVal.gtFptrMethod), GTF_ICON_METHOD_HDL));
+ break;
+
+ // This group passes through a value from a child node.
+ case GT_RET_EXPR:
+ tree->SetVNsFromNode(tree->gtRetExpr.gtInlineCandidate);
+ break;
+
+ case GT_LCL_FLD:
+ {
+ GenTreeLclFld* lclFld = tree->AsLclFld();
+ assert(fgExcludeFromSsa(lclFld->GetLclNum()) || lclFld->gtFieldSeq != nullptr);
+ // If this is a (full) def, then the variable will be labeled with the new SSA number,
+ // which will not have a value. We skip; it will be handled by one of the assignment-like
+ // forms (assignment, or initBlk or copyBlk).
+ if (((lclFld->gtFlags & GTF_VAR_DEF) == 0) || (lclFld->gtFlags & GTF_VAR_USEASG))
+ {
+ unsigned lclNum = lclFld->GetLclNum();
+ unsigned ssaNum = lclFld->GetSsaNum();
+ LclVarDsc* varDsc = &lvaTable[lclNum];
+
+ if (ssaNum == SsaConfig::UNINIT_SSA_NUM)
+ {
+ if (varDsc->GetPerSsaData(ssaNum)->m_vnPair.GetLiberal() == ValueNumStore::NoVN)
+ {
+ ValueNum vnForLcl = vnStore->VNForExpr(compCurBB, lclFld->TypeGet());
+ varDsc->GetPerSsaData(ssaNum)->m_vnPair = ValueNumPair(vnForLcl, vnForLcl);
+ }
+ }
+
+ var_types indType = tree->TypeGet();
+ if (lclFld->gtFieldSeq == FieldSeqStore::NotAField() || fgExcludeFromSsa(lclFld->GetLclNum()))
+ {
+ // This doesn't represent a proper field access or it's a struct
+ // with overlapping fields that is hard to reason about; return a new unique VN.
+ tree->gtVNPair.SetBoth(vnStore->VNForExpr(compCurBB, indType));
+ }
+ else
+ {
+ ValueNumPair lclVNPair = varDsc->GetPerSsaData(ssaNum)->m_vnPair;
+ tree->gtVNPair = vnStore->VNPairApplySelectors(lclVNPair, lclFld->gtFieldSeq, indType);
+ }
+ }
+ }
+ break;
+
+ // The ones below here all get a new unique VN -- but for various reasons, explained after each.
+ case GT_CATCH_ARG:
+ // We know nothing about the value of a caught expression.
+ tree->gtVNPair.SetBoth(vnStore->VNForExpr(compCurBB, tree->TypeGet()));
+ break;
+
+ case GT_CLS_VAR:
+ // Skip GT_CLS_VAR nodes that are the LHS of an assignment. (We labeled these earlier.)
+ // We will "evaluate" this as part of the assignment. (Unless we're explicitly told by
+ // the caller to evaluate anyway -- perhaps the assignment is an "op=" assignment.)
+ //
+ if (((tree->gtFlags & GTF_CLS_VAR_ASG_LHS) == 0) || evalAsgLhsInd)
+ {
+ bool isVolatile = (tree->gtFlags & GTF_FLD_VOLATILE) != 0;
+
+ if (isVolatile)
+ {
+ // For Volatile indirection, first mutate the global heap
+ fgMutateHeap(tree DEBUGARG("GTF_FLD_VOLATILE - read"));
+ }
+
+ // We just mutate the heap if isVolatile is true, and then do the read as normal.
+ //
+ // This allows:
+ // 1: read s;
+ // 2: volatile read s;
+ // 3: read s;
+ //
+ // We should never assume that the values read by 1 and 2 are the same (because the heap was mutated
+ // in between them)... but we *should* be able to prove that the values read in 2 and 3 are the
+ // same.
+ //
+
+ ValueNumPair clsVarVNPair;
+
+ // If the static field handle is for a struct type field, then the value of the static
+ // is a "ref" to the boxed struct -- treat it as the address of the static (we assume that a
+ // first element offset will be added to get to the actual struct...)
+ GenTreeClsVar* clsVar = tree->AsClsVar();
+ FieldSeqNode* fldSeq = clsVar->gtFieldSeq;
+ assert(fldSeq != nullptr); // We need to have one.
+ ValueNum selectedStaticVar = ValueNumStore::NoVN;
+ if (gtIsStaticFieldPtrToBoxedStruct(clsVar->TypeGet(), fldSeq->m_fieldHnd))
+ {
+ clsVarVNPair.SetBoth(
+ vnStore->VNForFunc(TYP_BYREF, VNF_PtrToStatic, vnStore->VNForFieldSeq(fldSeq)));
+ }
+ else
+ {
+ // This is a reference to heap memory.
+ // We model statics as indices into the heap variable.
+
+ FieldSeqNode* fldSeqForStaticVar =
+ GetFieldSeqStore()->CreateSingleton(tree->gtClsVar.gtClsVarHnd);
+ size_t structSize = 0;
+ selectedStaticVar =
+ vnStore->VNApplySelectors(VNK_Liberal, fgCurHeapVN, fldSeqForStaticVar, &structSize);
+ selectedStaticVar =
+ vnStore->VNApplySelectorsTypeCheck(selectedStaticVar, tree->TypeGet(), structSize);
+
+ clsVarVNPair.SetLiberal(selectedStaticVar);
+ // The conservative interpretation always gets a new, unique VN.
+ clsVarVNPair.SetConservative(vnStore->VNForExpr(compCurBB, tree->TypeGet()));
+ }
+
+ // The ValueNum returned must represent the full-sized IL-Stack value
+ // If we need to widen this value then we need to introduce a VNF_Cast here to represent
+ // the widened value. This is necessary since the CSE package can replace all occurances
+ // of a given ValueNum with a LclVar that is a full-sized IL-Stack value
+ //
+ if (varTypeIsSmall(tree->TypeGet()))
+ {
+ var_types castToType = tree->TypeGet();
+ clsVarVNPair = vnStore->VNPairForCast(clsVarVNPair, castToType, castToType);
+ }
+ tree->gtVNPair = clsVarVNPair;
+ }
+ break;
+
+ case GT_MEMORYBARRIER: // Leaf
+ // For MEMORYBARRIER add an arbitrary side effect on Heap.
+ fgMutateHeap(tree DEBUGARG("MEMORYBARRIER"));
+ break;
+
+ // These do not represent values.
+ case GT_NO_OP:
+ case GT_JMP: // Control flow
+ case GT_LABEL: // Control flow
+#if !FEATURE_EH_FUNCLETS
+ case GT_END_LFIN: // Control flow
+#endif
+ case GT_ARGPLACE:
+ // This node is a standin for an argument whose value will be computed later. (Perhaps it's
+ // a register argument, and we don't want to preclude use of the register in arg evaluation yet.)
+ // We give this a "fake" value number now; if the call in which it occurs cares about the
+ // value (e.g., it's a helper call whose result is a function of argument values) we'll reset
+ // this later, when the later args have been assigned VNs.
+ tree->gtVNPair.SetBoth(vnStore->VNForExpr(compCurBB, tree->TypeGet()));
+ break;
+
+ case GT_PHI_ARG:
+ // This one is special because we should never process it in this method: it should
+ // always be taken care of, when needed, during pre-processing of a blocks phi definitions.
+ assert(false);
+ break;
+
+ default:
+ unreached();
+ }
+ }
+ else if (GenTree::OperIsSimple(oper))
+ {
+#ifdef DEBUG
+ // Sometimes we query the heap ssa map, and need a dummy location for the ignored result.
+ unsigned heapSsaNum;
+#endif
+
+ if (GenTree::OperIsAssignment(oper) && !varTypeIsStruct(tree))
+ {
+
+ GenTreePtr lhs = tree->gtOp.gtOp1;
+ GenTreePtr rhs = tree->gtOp.gtOp2;
+
+ ValueNumPair rhsVNPair;
+ if (oper == GT_ASG)
+ {
+ rhsVNPair = rhs->gtVNPair;
+ }
+ else // Must be an "op="
+ {
+ // If the LHS is an IND, we didn't evaluate it when we visited it previously.
+ // But we didn't know that the parent was an op=. We do now, so go back and evaluate it.
+ // (We actually check if the effective val is the IND. We will have evaluated any non-last
+ // args of an LHS comma already -- including their heap effects.)
+ GenTreePtr lhsVal = lhs->gtEffectiveVal(/*commaOnly*/ true);
+ if (lhsVal->OperIsIndir() || (lhsVal->OperGet() == GT_CLS_VAR))
+ {
+ fgValueNumberTree(lhsVal, /*evalAsgLhsInd*/ true);
+ }
+ // Now we can make this assertion:
+ assert(lhsVal->gtVNPair.BothDefined());
+ genTreeOps op = GenTree::OpAsgToOper(oper);
+ if (GenTree::OperIsBinary(op))
+ {
+ ValueNumPair lhsNormVNP;
+ ValueNumPair lhsExcVNP;
+ lhsExcVNP.SetBoth(ValueNumStore::VNForEmptyExcSet());
+ vnStore->VNPUnpackExc(lhsVal->gtVNPair, &lhsNormVNP, &lhsExcVNP);
+ assert(rhs->gtVNPair.BothDefined());
+ ValueNumPair rhsNormVNP;
+ ValueNumPair rhsExcVNP;
+ rhsExcVNP.SetBoth(ValueNumStore::VNForEmptyExcSet());
+ vnStore->VNPUnpackExc(rhs->gtVNPair, &rhsNormVNP, &rhsExcVNP);
+ rhsVNPair = vnStore->VNPWithExc(vnStore->VNPairForFunc(tree->TypeGet(),
+ GetVNFuncForOper(op, (tree->gtFlags &
+ GTF_UNSIGNED) != 0),
+ lhsNormVNP, rhsNormVNP),
+ vnStore->VNPExcSetUnion(lhsExcVNP, rhsExcVNP));
+ }
+ else
+ {
+ // As of now, GT_CHS ==> GT_NEG is the only pattern fitting this.
+ assert(GenTree::OperIsUnary(op));
+ ValueNumPair lhsNormVNP;
+ ValueNumPair lhsExcVNP;
+ lhsExcVNP.SetBoth(ValueNumStore::VNForEmptyExcSet());
+ vnStore->VNPUnpackExc(lhsVal->gtVNPair, &lhsNormVNP, &lhsExcVNP);
+ rhsVNPair = vnStore->VNPWithExc(vnStore->VNPairForFunc(tree->TypeGet(),
+ GetVNFuncForOper(op, (tree->gtFlags &
+ GTF_UNSIGNED) != 0),
+ lhsNormVNP),
+ lhsExcVNP);
+ }
+ }
+ if (tree->TypeGet() != TYP_VOID)
+ {
+ // Assignment operators, as expressions, return the value of the RHS.
+ tree->gtVNPair = rhsVNPair;
+ }
+
+ // Now that we've labeled the assignment as a whole, we don't care about exceptions.
+ rhsVNPair = vnStore->VNPNormVal(rhsVNPair);
+
+ // If the types of the rhs and lhs are different then we
+ // may want to change the ValueNumber assigned to the lhs.
+ //
+ if (rhs->TypeGet() != lhs->TypeGet())
+ {
+ if (rhs->TypeGet() == TYP_REF)
+ {
+ // If we have an unsafe IL assignment of a TYP_REF to a non-ref (typically a TYP_BYREF)
+ // then don't propagate this ValueNumber to the lhs, instead create a new unique VN
+ //
+ rhsVNPair.SetBoth(vnStore->VNForExpr(compCurBB, lhs->TypeGet()));
+ }
+ }
+
+ // We have to handle the case where the LHS is a comma. In that case, we don't evaluate the comma,
+ // so we give it VNForVoid, and we're really interested in the effective value.
+ GenTreePtr lhsCommaIter = lhs;
+ while (lhsCommaIter->OperGet() == GT_COMMA)
+ {
+ lhsCommaIter->gtVNPair.SetBoth(vnStore->VNForVoid());
+ lhsCommaIter = lhsCommaIter->gtOp.gtOp2;
+ }
+ lhs = lhs->gtEffectiveVal();
+
+ // Now, record the new VN for an assignment (performing the indicated "state update").
+ // It's safe to use gtEffectiveVal here, because the non-last elements of a comma list on the
+ // LHS will come before the assignment in evaluation order.
+ switch (lhs->OperGet())
+ {
+ case GT_LCL_VAR:
+ case GT_REG_VAR:
+ {
+ GenTreeLclVarCommon* lcl = lhs->AsLclVarCommon();
+ unsigned lclDefSsaNum = GetSsaNumForLocalVarDef(lcl);
+
+ // Should not have been recorded as updating the heap.
+ assert(!GetHeapSsaMap()->Lookup(tree, &heapSsaNum));
+
+ if (lclDefSsaNum != SsaConfig::RESERVED_SSA_NUM)
+ {
+ assert(rhsVNPair.GetLiberal() != ValueNumStore::NoVN);
+
+ lhs->gtVNPair = rhsVNPair;
+ lvaTable[lcl->gtLclNum].GetPerSsaData(lclDefSsaNum)->m_vnPair = rhsVNPair;
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("N%03u ", lhs->gtSeqNum);
+ Compiler::printTreeID(lhs);
+ printf(" ");
+ gtDispNodeName(lhs);
+ gtDispLeaf(lhs, nullptr);
+ printf(" => ");
+ vnpPrint(lhs->gtVNPair, 1);
+ printf("\n");
+ }
+#endif // DEBUG
+ }
+#ifdef DEBUG
+ else
+ {
+ if (verbose)
+ {
+ JITDUMP("Tree ");
+ Compiler::printTreeID(tree);
+ printf(" assigns to local var V%02u; excluded from SSA, so value not tracked.\n",
+ lcl->GetLclNum());
+ }
+ }
+#endif // DEBUG
+ }
+ break;
+ case GT_LCL_FLD:
+ {
+ GenTreeLclFld* lclFld = lhs->AsLclFld();
+ unsigned lclDefSsaNum = GetSsaNumForLocalVarDef(lclFld);
+
+ // Should not have been recorded as updating the heap.
+ assert(!GetHeapSsaMap()->Lookup(tree, &heapSsaNum));
+
+ if (lclDefSsaNum != SsaConfig::RESERVED_SSA_NUM)
+ {
+ ValueNumPair newLhsVNPair;
+ // Is this a full definition?
+ if ((lclFld->gtFlags & GTF_VAR_USEASG) == 0)
+ {
+ assert(!lclFld->IsPartialLclFld(this));
+ assert(rhsVNPair.GetLiberal() != ValueNumStore::NoVN);
+ newLhsVNPair = rhsVNPair;
+ }
+ else
+ {
+ // We should never have a null field sequence here.
+ assert(lclFld->gtFieldSeq != nullptr);
+ if (lclFld->gtFieldSeq == FieldSeqStore::NotAField())
+ {
+ // We don't know what field this represents. Assign a new VN to the whole variable
+ // (since we may be writing to an unknown portion of it.)
+ newLhsVNPair.SetBoth(vnStore->VNForExpr(compCurBB, lvaGetActualType(lclFld->gtLclNum)));
+ }
+ else
+ {
+ // We do know the field sequence.
+ // The "lclFld" node will be labeled with the SSA number of its "use" identity
+ // (we looked in a side table above for its "def" identity). Look up that value.
+ ValueNumPair oldLhsVNPair =
+ lvaTable[lclFld->GetLclNum()].GetPerSsaData(lclFld->GetSsaNum())->m_vnPair;
+ newLhsVNPair =
+ vnStore->VNPairApplySelectorsAssign(oldLhsVNPair, lclFld->gtFieldSeq,
+ rhsVNPair, // Pre-value.
+ lvaGetActualType(lclFld->gtLclNum), compCurBB);
+ }
+ }
+ lvaTable[lclFld->GetLclNum()].GetPerSsaData(lclDefSsaNum)->m_vnPair = newLhsVNPair;
+ lhs->gtVNPair = newLhsVNPair;
+#ifdef DEBUG
+ if (verbose)
+ {
+ if (lhs->gtVNPair.GetLiberal() != ValueNumStore::NoVN)
+ {
+ printf("N%03u ", lhs->gtSeqNum);
+ Compiler::printTreeID(lhs);
+ printf(" ");
+ gtDispNodeName(lhs);
+ gtDispLeaf(lhs, nullptr);
+ printf(" => ");
+ vnpPrint(lhs->gtVNPair, 1);
+ printf("\n");
+ }
+ }
+#endif // DEBUG
+ }
+ }
+ break;
+
+ case GT_PHI_ARG:
+ assert(false); // Phi arg cannot be LHS.
+
+ case GT_BLK:
+ case GT_OBJ:
+ case GT_IND:
+ {
+ bool isVolatile = (lhs->gtFlags & GTF_IND_VOLATILE) != 0;
+
+ if (isVolatile)
+ {
+ // For Volatile store indirection, first mutate the global heap
+ fgMutateHeap(lhs DEBUGARG("GTF_IND_VOLATILE - store"));
+ tree->gtVNPair.SetBoth(vnStore->VNForExpr(compCurBB, lhs->TypeGet()));
+ }
+
+ GenTreePtr arg = lhs->gtOp.gtOp1;
+
+ // Indicates whether the argument of the IND is the address of a local.
+ bool wasLocal = false;
+
+ lhs->gtVNPair = rhsVNPair;
+
+ VNFuncApp funcApp;
+ ValueNum argVN = arg->gtVNPair.GetLiberal();
+
+ bool argIsVNFunc = vnStore->GetVNFunc(vnStore->VNNormVal(argVN), &funcApp);
+
+ // Is this an assignment to a (field of, perhaps) a local?
+ // If it is a PtrToLoc, lib and cons VNs will be the same.
+ if (argIsVNFunc)
+ {
+ IndirectAssignmentAnnotation* pIndirAnnot =
+ nullptr; // This will be used if "tree" is an "indirect assignment",
+ // explained below.
+ if (funcApp.m_func == VNF_PtrToLoc)
+ {
+ assert(arg->gtVNPair.BothEqual()); // If it's a PtrToLoc, lib/cons shouldn't differ.
+ assert(vnStore->IsVNConstant(funcApp.m_args[0]));
+ unsigned lclNum = vnStore->ConstantValue<unsigned>(funcApp.m_args[0]);
+
+ wasLocal = true;
+
+ if (!fgExcludeFromSsa(lclNum))
+ {
+ FieldSeqNode* fieldSeq = vnStore->FieldSeqVNToFieldSeq(funcApp.m_args[1]);
+
+ // Either "arg" is the address of (part of) a local itself, or the assignment is an
+ // "indirect assignment", where an outer comma expression assigned the address of a
+ // local to a temp, and that temp is our lhs, and we recorded this in a table when we
+ // made the indirect assignment...or else we have a "rogue" PtrToLoc, one that should
+ // have made the local in question address-exposed. Assert on that.
+ GenTreeLclVarCommon* lclVarTree = nullptr;
+ bool isEntire = false;
+ unsigned lclDefSsaNum = SsaConfig::RESERVED_SSA_NUM;
+ ValueNumPair newLhsVNPair;
+
+ if (arg->DefinesLocalAddr(this, genTypeSize(lhs->TypeGet()), &lclVarTree, &isEntire))
+ {
+ // The local #'s should agree.
+ assert(lclNum == lclVarTree->GetLclNum());
+
+ if (fieldSeq == FieldSeqStore::NotAField())
+ {
+ // We don't know where we're storing, so give the local a new, unique VN.
+ // Do this by considering it an "entire" assignment, with an unknown RHS.
+ isEntire = true;
+ rhsVNPair.SetBoth(vnStore->VNForExpr(compCurBB, lclVarTree->TypeGet()));
+ }
+
+ if (isEntire)
+ {
+ newLhsVNPair = rhsVNPair;
+ lclDefSsaNum = lclVarTree->GetSsaNum();
+ }
+ else
+ {
+ // Don't use the lclVarTree's VN: if it's a local field, it will
+ // already be dereferenced by it's field sequence.
+ ValueNumPair oldLhsVNPair = lvaTable[lclVarTree->GetLclNum()]
+ .GetPerSsaData(lclVarTree->GetSsaNum())
+ ->m_vnPair;
+ lclDefSsaNum = GetSsaNumForLocalVarDef(lclVarTree);
+ newLhsVNPair =
+ vnStore->VNPairApplySelectorsAssign(oldLhsVNPair, fieldSeq, rhsVNPair,
+ lhs->TypeGet(), compCurBB);
+ }
+ lvaTable[lclNum].GetPerSsaData(lclDefSsaNum)->m_vnPair = newLhsVNPair;
+ }
+ else if (m_indirAssignMap != nullptr && GetIndirAssignMap()->Lookup(tree, &pIndirAnnot))
+ {
+ // The local #'s should agree.
+ assert(lclNum == pIndirAnnot->m_lclNum);
+ assert(pIndirAnnot->m_defSsaNum != SsaConfig::RESERVED_SSA_NUM);
+ lclDefSsaNum = pIndirAnnot->m_defSsaNum;
+ // Does this assignment write the entire width of the local?
+ if (genTypeSize(lhs->TypeGet()) == genTypeSize(var_types(lvaTable[lclNum].lvType)))
+ {
+ assert(pIndirAnnot->m_useSsaNum == SsaConfig::RESERVED_SSA_NUM);
+ assert(pIndirAnnot->m_isEntire);
+ newLhsVNPair = rhsVNPair;
+ }
+ else
+ {
+ assert(pIndirAnnot->m_useSsaNum != SsaConfig::RESERVED_SSA_NUM);
+ assert(!pIndirAnnot->m_isEntire);
+ assert(pIndirAnnot->m_fieldSeq == fieldSeq);
+ ValueNumPair oldLhsVNPair =
+ lvaTable[lclNum].GetPerSsaData(pIndirAnnot->m_useSsaNum)->m_vnPair;
+ newLhsVNPair =
+ vnStore->VNPairApplySelectorsAssign(oldLhsVNPair, fieldSeq, rhsVNPair,
+ lhs->TypeGet(), compCurBB);
+ }
+ lvaTable[lclNum].GetPerSsaData(lclDefSsaNum)->m_vnPair = newLhsVNPair;
+ }
+ else
+ {
+ unreached(); // "Rogue" PtrToLoc, as discussed above.
+ }
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Tree ");
+ Compiler::printTreeID(tree);
+ printf(" assigned VN to local var V%02u/%d: VN ", lclNum, lclDefSsaNum);
+ vnpPrint(newLhsVNPair, 1);
+ printf("\n");
+ }
+#endif // DEBUG
+ }
+ }
+ }
+
+ // Was the argument of the GT_IND the address of a local, handled above?
+ if (!wasLocal)
+ {
+ GenTreePtr obj = nullptr;
+ GenTreePtr staticOffset = nullptr;
+ FieldSeqNode* fldSeq = nullptr;
+
+ // Is the LHS an array index expression?
+ if (argIsVNFunc && funcApp.m_func == VNF_PtrToArrElem)
+ {
+ CORINFO_CLASS_HANDLE elemTypeEq =
+ CORINFO_CLASS_HANDLE(vnStore->ConstantValue<ssize_t>(funcApp.m_args[0]));
+ ValueNum arrVN = funcApp.m_args[1];
+ ValueNum inxVN = funcApp.m_args[2];
+ FieldSeqNode* fldSeq = vnStore->FieldSeqVNToFieldSeq(funcApp.m_args[3]);
+
+ // Does the child of the GT_IND 'arg' have an associated zero-offset field sequence?
+ FieldSeqNode* addrFieldSeq = nullptr;
+ if (GetZeroOffsetFieldMap()->Lookup(arg, &addrFieldSeq))
+ {
+ fldSeq = GetFieldSeqStore()->Append(addrFieldSeq, fldSeq);
+ }
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Tree ");
+ Compiler::printTreeID(tree);
+ printf(" assigns to an array element:\n");
+ }
+#endif // DEBUG
+
+ fgValueNumberArrIndexAssign(elemTypeEq, arrVN, inxVN, fldSeq, rhsVNPair.GetLiberal(),
+ lhs->TypeGet());
+ fgValueNumberRecordHeapSsa(tree);
+ }
+ // It may be that we haven't parsed it yet. Try.
+ else if (lhs->gtFlags & GTF_IND_ARR_INDEX)
+ {
+ ArrayInfo arrInfo;
+ bool b = GetArrayInfoMap()->Lookup(lhs, &arrInfo);
+ assert(b);
+ ValueNum arrVN = ValueNumStore::NoVN;
+ ValueNum inxVN = ValueNumStore::NoVN;
+ FieldSeqNode* fldSeq = nullptr;
+
+ // Try to parse it.
+ GenTreePtr arr = nullptr;
+ arg->ParseArrayAddress(this, &arrInfo, &arr, &inxVN, &fldSeq);
+ if (arr == nullptr)
+ {
+ fgMutateHeap(tree DEBUGARG("assignment to unparseable array expression"));
+ return;
+ }
+ // Otherwise, parsing succeeded.
+
+ // Need to form H[arrType][arr][ind][fldSeq] = rhsVNPair.GetLiberal()
+
+ // Get the element type equivalence class representative.
+ CORINFO_CLASS_HANDLE elemTypeEq =
+ EncodeElemType(arrInfo.m_elemType, arrInfo.m_elemStructType);
+ arrVN = arr->gtVNPair.GetLiberal();
+
+ FieldSeqNode* zeroOffsetFldSeq = nullptr;
+ if (GetZeroOffsetFieldMap()->Lookup(arg, &zeroOffsetFldSeq))
+ {
+ fldSeq = GetFieldSeqStore()->Append(fldSeq, zeroOffsetFldSeq);
+ }
+
+ fgValueNumberArrIndexAssign(elemTypeEq, arrVN, inxVN, fldSeq, rhsVNPair.GetLiberal(),
+ lhs->TypeGet());
+ fgValueNumberRecordHeapSsa(tree);
+ }
+ else if (arg->IsFieldAddr(this, &obj, &staticOffset, &fldSeq))
+ {
+ if (fldSeq == FieldSeqStore::NotAField())
+ {
+ fgMutateHeap(tree DEBUGARG("NotAField"));
+ }
+ else
+ {
+ assert(fldSeq != nullptr);
+#ifdef DEBUG
+ CORINFO_CLASS_HANDLE fldCls = info.compCompHnd->getFieldClass(fldSeq->m_fieldHnd);
+ if (obj != nullptr)
+ {
+ // Make sure that the class containing it is not a value class (as we are expecting
+ // an instance field)
+ assert((info.compCompHnd->getClassAttribs(fldCls) & CORINFO_FLG_VALUECLASS) == 0);
+ assert(staticOffset == nullptr);
+ }
+#endif // DEBUG
+ // Get the first (instance or static) field from field seq. Heap[field] will yield the
+ // "field map".
+ if (fldSeq->IsFirstElemFieldSeq())
+ {
+ fldSeq = fldSeq->m_next;
+ assert(fldSeq != nullptr);
+ }
+
+ // Get a field sequence for just the first field in the sequence
+ //
+ FieldSeqNode* firstFieldOnly = GetFieldSeqStore()->CreateSingleton(fldSeq->m_fieldHnd);
+
+ // The final field in the sequence will need to match the 'indType'
+ var_types indType = lhs->TypeGet();
+ ValueNum fldMapVN = vnStore->VNApplySelectors(VNK_Liberal, fgCurHeapVN, firstFieldOnly);
+
+ // The type of the field is "struct" if there are more fields in the sequence,
+ // otherwise it is the type returned from VNApplySelectors above.
+ var_types firstFieldType = vnStore->TypeOfVN(fldMapVN);
+
+ ValueNum storeVal =
+ rhsVNPair.GetLiberal(); // The value number from the rhs of the assignment
+ ValueNum newFldMapVN = ValueNumStore::NoVN;
+
+ // when (obj != nullptr) we have an instance field, otherwise a static field
+ // when (staticOffset != nullptr) it represents a offset into a static or the call to
+ // Shared Static Base
+ if ((obj != nullptr) || (staticOffset != nullptr))
+ {
+ ValueNum valAtAddr = fldMapVN;
+ ValueNum normVal = ValueNumStore::NoVN;
+
+ if (obj != nullptr)
+ {
+ // construct the ValueNumber for 'fldMap at obj'
+ normVal = vnStore->VNNormVal(obj->GetVN(VNK_Liberal));
+ valAtAddr =
+ vnStore->VNForMapSelect(VNK_Liberal, firstFieldType, fldMapVN, normVal);
+ }
+ else // (staticOffset != nullptr)
+ {
+ // construct the ValueNumber for 'fldMap at staticOffset'
+ normVal = vnStore->VNNormVal(staticOffset->GetVN(VNK_Liberal));
+ valAtAddr =
+ vnStore->VNForMapSelect(VNK_Liberal, firstFieldType, fldMapVN, normVal);
+ }
+ // Now get rid of any remaining struct field dereferences. (if they exist)
+ if (fldSeq->m_next)
+ {
+ storeVal =
+ vnStore->VNApplySelectorsAssign(VNK_Liberal, valAtAddr, fldSeq->m_next,
+ storeVal, indType, compCurBB);
+ }
+
+ // From which we can construct the new ValueNumber for 'fldMap at normVal'
+ newFldMapVN = vnStore->VNForMapStore(vnStore->TypeOfVN(fldMapVN), fldMapVN, normVal,
+ storeVal);
+ }
+ else
+ {
+ // plain static field
+
+ // Now get rid of any remaining struct field dereferences. (if they exist)
+ if (fldSeq->m_next)
+ {
+ storeVal =
+ vnStore->VNApplySelectorsAssign(VNK_Liberal, fldMapVN, fldSeq->m_next,
+ storeVal, indType, compCurBB);
+ }
+
+ newFldMapVN = vnStore->VNApplySelectorsAssign(VNK_Liberal, fgCurHeapVN, fldSeq,
+ storeVal, indType, compCurBB);
+ }
+
+ // It is not strictly necessary to set the lhs value number,
+ // but the dumps read better with it set to the 'storeVal' that we just computed
+ lhs->gtVNPair.SetBoth(storeVal);
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf(" fgCurHeapVN assigned:\n");
+ }
+#endif // DEBUG
+ // bbHeapDef must be set to true for any block that Mutates the global Heap
+ assert(compCurBB->bbHeapDef);
+
+ // Update the field map for firstField in Heap to this new value.
+ fgCurHeapVN = vnStore->VNApplySelectorsAssign(VNK_Liberal, fgCurHeapVN, firstFieldOnly,
+ newFldMapVN, indType, compCurBB);
+
+ fgValueNumberRecordHeapSsa(tree);
+ }
+ }
+ else
+ {
+ GenTreeLclVarCommon* dummyLclVarTree = nullptr;
+ if (!tree->DefinesLocal(this, &dummyLclVarTree))
+ {
+ // If it doesn't define a local, then it might update the heap.
+ fgMutateHeap(tree DEBUGARG("assign-of-IND"));
+ }
+ }
+ }
+
+ // We don't actually evaluate an IND on the LHS, so give it the Void value.
+ tree->gtVNPair.SetBoth(vnStore->VNForVoid());
+ }
+ break;
+
+ case GT_CLS_VAR:
+ {
+ bool isVolatile = (lhs->gtFlags & GTF_FLD_VOLATILE) != 0;
+
+ if (isVolatile)
+ {
+ // For Volatile store indirection, first mutate the global heap
+ fgMutateHeap(lhs DEBUGARG("GTF_CLS_VAR - store")); // always change fgCurHeapVN
+ }
+
+ // We model statics as indices into the heap variable.
+ FieldSeqNode* fldSeqForStaticVar = GetFieldSeqStore()->CreateSingleton(lhs->gtClsVar.gtClsVarHnd);
+ assert(fldSeqForStaticVar != FieldSeqStore::NotAField());
+
+ ValueNum storeVal = rhsVNPair.GetLiberal(); // The value number from the rhs of the assignment
+ storeVal = vnStore->VNApplySelectorsAssign(VNK_Liberal, fgCurHeapVN, fldSeqForStaticVar, storeVal,
+ lhs->TypeGet(), compCurBB);
+
+ // It is not strictly necessary to set the lhs value number,
+ // but the dumps read better with it set to the 'storeVal' that we just computed
+ lhs->gtVNPair.SetBoth(storeVal);
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf(" fgCurHeapVN assigned:\n");
+ }
+#endif // DEBUG
+ // bbHeapDef must be set to true for any block that Mutates the global Heap
+ assert(compCurBB->bbHeapDef);
+
+ // Update the field map for the fgCurHeapVN
+ fgCurHeapVN = storeVal;
+ fgValueNumberRecordHeapSsa(tree);
+ }
+ break;
+
+ default:
+ assert(!"Unknown node for lhs of assignment!");
+
+ // For Unknown stores, mutate the global heap
+ fgMutateHeap(lhs DEBUGARG("Unkwown Assignment - store")); // always change fgCurHeapVN
+ break;
+ }
+ }
+ // Other kinds of assignment: initblk and copyblk.
+ else if (oper == GT_ASG && varTypeIsStruct(tree))
+ {
+ fgValueNumberBlockAssignment(tree, evalAsgLhsInd);
+ }
+ else if (oper == GT_ADDR)
+ {
+ // We have special representations for byrefs to lvalues.
+ GenTreePtr arg = tree->gtOp.gtOp1;
+ if (arg->OperIsLocal())
+ {
+ FieldSeqNode* fieldSeq = nullptr;
+ ValueNum newVN = ValueNumStore::NoVN;
+ if (fgExcludeFromSsa(arg->gtLclVarCommon.GetLclNum()))
+ {
+ newVN = vnStore->VNForExpr(compCurBB, TYP_BYREF);
+ }
+ else if (arg->OperGet() == GT_LCL_FLD)
+ {
+ fieldSeq = arg->AsLclFld()->gtFieldSeq;
+ if (fieldSeq == nullptr)
+ {
+ // Local field with unknown field seq -- not a precise pointer.
+ newVN = vnStore->VNForExpr(compCurBB, TYP_BYREF);
+ }
+ }
+ if (newVN == ValueNumStore::NoVN)
+ {
+ assert(arg->gtLclVarCommon.GetSsaNum() != ValueNumStore::NoVN);
+ newVN = vnStore->VNForPtrToLoc(TYP_BYREF, vnStore->VNForIntCon(arg->gtLclVarCommon.GetLclNum()),
+ vnStore->VNForFieldSeq(fieldSeq));
+ }
+ tree->gtVNPair.SetBoth(newVN);
+ }
+ else if ((arg->gtOper == GT_IND) || arg->OperIsBlk())
+ {
+ // Usually the ADDR and IND just cancel out...
+ // except when this GT_ADDR has a valid zero-offset field sequence
+ //
+ FieldSeqNode* zeroOffsetFieldSeq = nullptr;
+ if (GetZeroOffsetFieldMap()->Lookup(tree, &zeroOffsetFieldSeq) &&
+ (zeroOffsetFieldSeq != FieldSeqStore::NotAField()))
+ {
+ ValueNum addrExtended = vnStore->ExtendPtrVN(arg->gtOp.gtOp1, zeroOffsetFieldSeq);
+ if (addrExtended != ValueNumStore::NoVN)
+ {
+ tree->gtVNPair.SetBoth(addrExtended); // We don't care about lib/cons differences for addresses.
+ }
+ else
+ {
+ // ExtendPtrVN returned a failure result
+ // So give this address a new unique value
+ tree->gtVNPair.SetBoth(vnStore->VNForExpr(compCurBB, TYP_BYREF));
+ }
+ }
+ else
+ {
+ // They just cancel, so fetch the ValueNumber from the op1 of the GT_IND node.
+ //
+ GenTree* addr = arg->AsIndir()->Addr();
+ tree->gtVNPair = addr->gtVNPair;
+
+ // For the CSE phase mark the address as GTF_DONT_CSE
+ // because it will end up with the same value number as tree (the GT_ADDR).
+ addr->gtFlags |= GTF_DONT_CSE;
+ }
+ }
+ else
+ {
+ // May be more cases to do here! But we'll punt for now.
+ tree->gtVNPair.SetBoth(vnStore->VNForExpr(compCurBB, TYP_BYREF));
+ }
+ }
+ else if ((oper == GT_IND) || GenTree::OperIsBlk(oper))
+ {
+ // So far, we handle cases in which the address is a ptr-to-local, or if it's
+ // a pointer to an object field.
+ GenTreePtr addr = tree->AsIndir()->Addr();
+ GenTreeLclVarCommon* lclVarTree = nullptr;
+ FieldSeqNode* fldSeq1 = nullptr;
+ FieldSeqNode* fldSeq2 = nullptr;
+ GenTreePtr obj = nullptr;
+ GenTreePtr staticOffset = nullptr;
+ bool isVolatile = (tree->gtFlags & GTF_IND_VOLATILE) != 0;
+
+ // See if the addr has any exceptional part.
+ ValueNumPair addrNvnp;
+ ValueNumPair addrXvnp = ValueNumPair(ValueNumStore::VNForEmptyExcSet(), ValueNumStore::VNForEmptyExcSet());
+ vnStore->VNPUnpackExc(addr->gtVNPair, &addrNvnp, &addrXvnp);
+
+ // Is the dereference immutable? If so, model it as referencing the read-only heap.
+ if (tree->gtFlags & GTF_IND_INVARIANT)
+ {
+ assert(!isVolatile); // We don't expect both volatile and invariant
+ tree->gtVNPair =
+ ValueNumPair(vnStore->VNForMapSelect(VNK_Liberal, TYP_REF, ValueNumStore::VNForROH(),
+ addrNvnp.GetLiberal()),
+ vnStore->VNForMapSelect(VNK_Conservative, TYP_REF, ValueNumStore::VNForROH(),
+ addrNvnp.GetConservative()));
+ tree->gtVNPair = vnStore->VNPWithExc(tree->gtVNPair, addrXvnp);
+ }
+ else if (isVolatile)
+ {
+ // For Volatile indirection, mutate the global heap
+ fgMutateHeap(tree DEBUGARG("GTF_IND_VOLATILE - read"));
+
+ // The value read by the GT_IND can immediately change
+ ValueNum newUniq = vnStore->VNForExpr(compCurBB, tree->TypeGet());
+ tree->gtVNPair = vnStore->VNPWithExc(ValueNumPair(newUniq, newUniq), addrXvnp);
+ }
+ // We always want to evaluate the LHS when the GT_IND node is marked with GTF_IND_ARR_INDEX
+ // as this will relabel the GT_IND child correctly using the VNF_PtrToArrElem
+ else if ((tree->gtFlags & GTF_IND_ARR_INDEX) != 0)
+ {
+ ArrayInfo arrInfo;
+ bool b = GetArrayInfoMap()->Lookup(tree, &arrInfo);
+ assert(b);
+
+ ValueNum inxVN = ValueNumStore::NoVN;
+ FieldSeqNode* fldSeq = nullptr;
+
+ // GenTreePtr addr = tree->gtOp.gtOp1;
+ ValueNum addrVN = addrNvnp.GetLiberal();
+
+ // Try to parse it.
+ GenTreePtr arr = nullptr;
+ addr->ParseArrayAddress(this, &arrInfo, &arr, &inxVN, &fldSeq);
+ if (arr == nullptr)
+ {
+ tree->gtVNPair.SetBoth(vnStore->VNForExpr(compCurBB, tree->TypeGet()));
+ return;
+ }
+ assert(fldSeq != FieldSeqStore::NotAField());
+
+ // Otherwise...
+ // Need to form H[arrType][arr][ind][fldSeq]
+ // Get the array element type equivalence class rep.
+ CORINFO_CLASS_HANDLE elemTypeEq = EncodeElemType(arrInfo.m_elemType, arrInfo.m_elemStructType);
+ ValueNum elemTypeEqVN = vnStore->VNForHandle(ssize_t(elemTypeEq), GTF_ICON_CLASS_HDL);
+
+ // We take the "VNNormVal"s here, because if either has exceptional outcomes, they will be captured
+ // as part of the value of the composite "addr" operation...
+ ValueNum arrVN = vnStore->VNNormVal(arr->gtVNPair.GetLiberal());
+ inxVN = vnStore->VNNormVal(inxVN);
+
+ // Additionally, relabel the address with a PtrToArrElem value number.
+ ValueNum fldSeqVN = vnStore->VNForFieldSeq(fldSeq);
+ ValueNum elemAddr =
+ vnStore->VNForFunc(TYP_BYREF, VNF_PtrToArrElem, elemTypeEqVN, arrVN, inxVN, fldSeqVN);
+
+ // The aggregate "addr" VN should have had all the exceptions bubble up...
+ elemAddr = vnStore->VNWithExc(elemAddr, addrXvnp.GetLiberal());
+ addr->gtVNPair.SetBoth(elemAddr);
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf(" Relabeled IND_ARR_INDEX address node ");
+ Compiler::printTreeID(addr);
+ printf(" with l:" STR_VN "%x: ", elemAddr);
+ vnStore->vnDump(this, elemAddr);
+ printf("\n");
+ if (vnStore->VNNormVal(elemAddr) != elemAddr)
+ {
+ printf(" [" STR_VN "%x is: ", vnStore->VNNormVal(elemAddr));
+ vnStore->vnDump(this, vnStore->VNNormVal(elemAddr));
+ printf("]\n");
+ }
+ }
+#endif // DEBUG
+ // We now need to retrieve the value number for the array element value
+ // and give this value number to the GT_IND node 'tree'
+ // We do this whenever we have an rvalue, or for the LHS when we have an "op=",
+ // but we don't do it for a normal LHS assignment into an array element.
+ //
+ if (evalAsgLhsInd || ((tree->gtFlags & GTF_IND_ASG_LHS) == 0))
+ {
+ fgValueNumberArrIndexVal(tree, elemTypeEq, arrVN, inxVN, addrXvnp.GetLiberal(), fldSeq);
+ }
+ }
+
+ // In general we skip GT_IND nodes on that are the LHS of an assignment. (We labeled these earlier.)
+ // We will "evaluate" this as part of the assignment. (Unless we're explicitly told by
+ // the caller to evaluate anyway -- perhaps the assignment is an "op=" assignment.)
+ else if (((tree->gtFlags & GTF_IND_ASG_LHS) == 0) || evalAsgLhsInd)
+ {
+ FieldSeqNode* localFldSeq = nullptr;
+ VNFuncApp funcApp;
+
+ // Is it a local or a heap address?
+ if (addr->IsLocalAddrExpr(this, &lclVarTree, &localFldSeq) &&
+ !fgExcludeFromSsa(lclVarTree->GetLclNum()))
+ {
+ unsigned lclNum = lclVarTree->GetLclNum();
+ unsigned ssaNum = lclVarTree->GetSsaNum();
+ LclVarDsc* varDsc = &lvaTable[lclNum];
+
+ if ((localFldSeq == FieldSeqStore::NotAField()) || (localFldSeq == nullptr))
+ {
+ tree->gtVNPair.SetBoth(vnStore->VNForExpr(compCurBB, tree->TypeGet()));
+ }
+ else
+ {
+ var_types indType = tree->TypeGet();
+ ValueNumPair lclVNPair = varDsc->GetPerSsaData(ssaNum)->m_vnPair;
+ tree->gtVNPair = vnStore->VNPairApplySelectors(lclVNPair, localFldSeq, indType);
+ ;
+ }
+ tree->gtVNPair = vnStore->VNPWithExc(tree->gtVNPair, addrXvnp);
+ }
+ else if (vnStore->GetVNFunc(addrNvnp.GetLiberal(), &funcApp) && funcApp.m_func == VNF_PtrToStatic)
+ {
+ var_types indType = tree->TypeGet();
+ ValueNum fieldSeqVN = funcApp.m_args[0];
+
+ FieldSeqNode* fldSeqForStaticVar = vnStore->FieldSeqVNToFieldSeq(fieldSeqVN);
+
+ if (fldSeqForStaticVar != FieldSeqStore::NotAField())
+ {
+ ValueNum selectedStaticVar;
+ // We model statics as indices into the heap variable.
+ size_t structSize = 0;
+ selectedStaticVar =
+ vnStore->VNApplySelectors(VNK_Liberal, fgCurHeapVN, fldSeqForStaticVar, &structSize);
+ selectedStaticVar = vnStore->VNApplySelectorsTypeCheck(selectedStaticVar, indType, structSize);
+
+ tree->gtVNPair.SetLiberal(selectedStaticVar);
+ tree->gtVNPair.SetConservative(vnStore->VNForExpr(compCurBB, indType));
+ }
+ else
+ {
+ JITDUMP(" *** Missing field sequence info for VNF_PtrToStatic value GT_IND\n");
+ tree->gtVNPair.SetBoth(vnStore->VNForExpr(compCurBB, indType)); // a new unique value number
+ }
+ tree->gtVNPair = vnStore->VNPWithExc(tree->gtVNPair, addrXvnp);
+ }
+ else if (!varTypeIsStruct(tree) && vnStore->GetVNFunc(addrNvnp.GetLiberal(), &funcApp) &&
+ (funcApp.m_func == VNF_PtrToArrElem))
+ {
+ // TODO-1stClassStructs: The above condition need not exclude struct types, but it is
+ // excluded for now to minimize diffs.
+ fgValueNumberArrIndexVal(tree, &funcApp, addrXvnp.GetLiberal());
+ }
+ else if (!varTypeIsStruct(tree) && addr->IsFieldAddr(this, &obj, &staticOffset, &fldSeq2))
+ {
+ // TODO-1stClassStructs: The above condition need not exclude struct types, but it is
+ // excluded for now to minimize diffs.
+ if (fldSeq2 == FieldSeqStore::NotAField())
+ {
+ tree->gtVNPair.SetBoth(vnStore->VNForExpr(compCurBB, tree->TypeGet()));
+ }
+ else if (fldSeq2 != nullptr)
+ {
+ // Get the first (instance or static) field from field seq. Heap[field] will yield the "field
+ // map".
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef DEBUG
+ CORINFO_CLASS_HANDLE fldCls = info.compCompHnd->getFieldClass(fldSeq2->m_fieldHnd);
+ if (obj != nullptr)
+ {
+ // Make sure that the class containing it is not a value class (as we are expecting an
+ // instance field)
+ assert((info.compCompHnd->getClassAttribs(fldCls) & CORINFO_FLG_VALUECLASS) == 0);
+ assert(staticOffset == nullptr);
+ }
+#endif // DEBUG
+ // Get a field sequence for just the first field in the sequence
+ //
+ FieldSeqNode* firstFieldOnly = GetFieldSeqStore()->CreateSingleton(fldSeq2->m_fieldHnd);
+ size_t structSize = 0;
+ ValueNum fldMapVN =
+ vnStore->VNApplySelectors(VNK_Liberal, fgCurHeapVN, firstFieldOnly, &structSize);
+
+ // The final field in the sequence will need to match the 'indType'
+ var_types indType = tree->TypeGet();
+
+ // The type of the field is "struct" if there are more fields in the sequence,
+ // otherwise it is the type returned from VNApplySelectors above.
+ var_types firstFieldType = vnStore->TypeOfVN(fldMapVN);
+
+ ValueNum valAtAddr = fldMapVN;
+ if (obj != nullptr)
+ {
+ // construct the ValueNumber for 'fldMap at obj'
+ ValueNum objNormVal = vnStore->VNNormVal(obj->GetVN(VNK_Liberal));
+ valAtAddr = vnStore->VNForMapSelect(VNK_Liberal, firstFieldType, fldMapVN, objNormVal);
+ }
+ else if (staticOffset != nullptr)
+ {
+ // construct the ValueNumber for 'fldMap at staticOffset'
+ ValueNum offsetNormVal = vnStore->VNNormVal(staticOffset->GetVN(VNK_Liberal));
+ valAtAddr = vnStore->VNForMapSelect(VNK_Liberal, firstFieldType, fldMapVN, offsetNormVal);
+ }
+
+ // Now get rid of any remaining struct field dereferences.
+ if (fldSeq2->m_next)
+ {
+ valAtAddr = vnStore->VNApplySelectors(VNK_Liberal, valAtAddr, fldSeq2->m_next, &structSize);
+ }
+ valAtAddr = vnStore->VNApplySelectorsTypeCheck(valAtAddr, indType, structSize);
+
+ tree->gtVNPair.SetLiberal(valAtAddr);
+
+ // The conservative value is a new, unique VN.
+ tree->gtVNPair.SetConservative(vnStore->VNForExpr(compCurBB, tree->TypeGet()));
+ tree->gtVNPair = vnStore->VNPWithExc(tree->gtVNPair, addrXvnp);
+ }
+ else
+ {
+ // Occasionally we do an explicit null test on a REF, so we just dereference it with no
+ // field sequence. The result is probably unused.
+ tree->gtVNPair.SetBoth(vnStore->VNForExpr(compCurBB, tree->TypeGet()));
+ tree->gtVNPair = vnStore->VNPWithExc(tree->gtVNPair, addrXvnp);
+ }
+ }
+ else // We don't know where the address points.
+ {
+ tree->gtVNPair.SetBoth(vnStore->VNForExpr(compCurBB, tree->TypeGet()));
+ tree->gtVNPair = vnStore->VNPWithExc(tree->gtVNPair, addrXvnp);
+ }
+ }
+ }
+ else if (tree->OperGet() == GT_CAST)
+ {
+ fgValueNumberCastTree(tree);
+ }
+ else if (tree->OperGet() == GT_INTRINSIC)
+ {
+ fgValueNumberIntrinsic(tree);
+ }
+ else if (ValueNumStore::VNFuncIsLegal(GetVNFuncForOper(oper, (tree->gtFlags & GTF_UNSIGNED) != 0)))
+ {
+ if (GenTree::OperIsUnary(oper))
+ {
+ if (tree->gtOp.gtOp1 != nullptr)
+ {
+ if (tree->OperGet() == GT_NOP)
+ {
+ // Pass through arg vn.
+ tree->gtVNPair = tree->gtOp.gtOp1->gtVNPair;
+ }
+ else
+ {
+ ValueNumPair op1VNP;
+ ValueNumPair op1VNPx = ValueNumStore::VNPForEmptyExcSet();
+ vnStore->VNPUnpackExc(tree->gtOp.gtOp1->gtVNPair, &op1VNP, &op1VNPx);
+ tree->gtVNPair =
+ vnStore->VNPWithExc(vnStore->VNPairForFunc(tree->TypeGet(),
+ GetVNFuncForOper(oper, (tree->gtFlags &
+ GTF_UNSIGNED) != 0),
+ op1VNP),
+ op1VNPx);
+ }
+ }
+ else // Is actually nullary.
+ {
+ // Mostly we'll leave these without a value number, assuming we'll detect these as VN failures
+ // if they actually need to have values. With the exception of NOPs, which can sometimes have
+ // meaning.
+ if (tree->OperGet() == GT_NOP)
+ {
+ tree->gtVNPair.SetBoth(vnStore->VNForExpr(compCurBB, tree->TypeGet()));
+ }
+ }
+ }
+ else
+ {
+ assert(!GenTree::OperIsAssignment(oper)); // We handled assignments earlier.
+ assert(GenTree::OperIsBinary(oper));
+ // Standard binary operator.
+ ValueNumPair op2VNPair;
+ if (tree->gtOp.gtOp2 == nullptr)
+ {
+ op2VNPair.SetBoth(ValueNumStore::VNForNull());
+ }
+ else
+ {
+ op2VNPair = tree->gtOp.gtOp2->gtVNPair;
+ }
+ // A few special case: if we add a field offset constant to a PtrToXXX, we get back a new PtrToXXX.
+ ValueNum newVN = ValueNumStore::NoVN;
+
+ ValueNumPair op1vnp;
+ ValueNumPair op1Xvnp = ValueNumStore::VNPForEmptyExcSet();
+ vnStore->VNPUnpackExc(tree->gtOp.gtOp1->gtVNPair, &op1vnp, &op1Xvnp);
+ ValueNumPair op2vnp;
+ ValueNumPair op2Xvnp = ValueNumStore::VNPForEmptyExcSet();
+ vnStore->VNPUnpackExc(op2VNPair, &op2vnp, &op2Xvnp);
+ ValueNumPair excSet = vnStore->VNPExcSetUnion(op1Xvnp, op2Xvnp);
+
+ if (oper == GT_ADD)
+ {
+ newVN = vnStore->ExtendPtrVN(tree->gtOp.gtOp1, tree->gtOp.gtOp2);
+ if (newVN == ValueNumStore::NoVN)
+ {
+ newVN = vnStore->ExtendPtrVN(tree->gtOp.gtOp2, tree->gtOp.gtOp1);
+ }
+ }
+ if (newVN != ValueNumStore::NoVN)
+ {
+ newVN = vnStore->VNWithExc(newVN, excSet.GetLiberal());
+ // We don't care about differences between liberal and conservative for pointer values.
+ tree->gtVNPair.SetBoth(newVN);
+ }
+ else
+ {
+
+ ValueNumPair normalRes =
+ vnStore->VNPairForFunc(tree->TypeGet(),
+ GetVNFuncForOper(oper, (tree->gtFlags & GTF_UNSIGNED) != 0), op1vnp,
+ op2vnp);
+ // Overflow-checking operations add an overflow exception
+ if (tree->gtOverflowEx())
+ {
+ ValueNum overflowExcSet =
+ vnStore->VNExcSetSingleton(vnStore->VNForFunc(TYP_REF, VNF_OverflowExc));
+ excSet = vnStore->VNPExcSetUnion(excSet, ValueNumPair(overflowExcSet, overflowExcSet));
+ }
+ tree->gtVNPair = vnStore->VNPWithExc(normalRes, excSet);
+ }
+ }
+ }
+ else // ValueNumStore::VNFuncIsLegal returns false
+ {
+ // Some of the genTreeOps that aren't legal VNFuncs so they get special handling.
+ switch (oper)
+ {
+ case GT_COMMA:
+ {
+ ValueNumPair op1vnp;
+ ValueNumPair op1Xvnp = ValueNumStore::VNPForEmptyExcSet();
+ vnStore->VNPUnpackExc(tree->gtOp.gtOp1->gtVNPair, &op1vnp, &op1Xvnp);
+ ValueNumPair op2vnp;
+ ValueNumPair op2Xvnp = ValueNumStore::VNPForEmptyExcSet();
+
+ GenTree* op2 = tree->gtGetOp2();
+ if (op2->OperIsIndir() && ((op2->gtFlags & GTF_IND_ASG_LHS) != 0))
+ {
+ // If op2 represents the lhs of an assignment then we give a VNForVoid for the lhs
+ op2vnp = ValueNumPair(ValueNumStore::VNForVoid(), ValueNumStore::VNForVoid());
+ }
+ else if ((op2->OperGet() == GT_CLS_VAR) && (op2->gtFlags & GTF_CLS_VAR_ASG_LHS))
+ {
+ // If op2 represents the lhs of an assignment then we give a VNForVoid for the lhs
+ op2vnp = ValueNumPair(ValueNumStore::VNForVoid(), ValueNumStore::VNForVoid());
+ }
+ else
+ {
+ vnStore->VNPUnpackExc(op2->gtVNPair, &op2vnp, &op2Xvnp);
+ }
+
+ tree->gtVNPair = vnStore->VNPWithExc(op2vnp, vnStore->VNPExcSetUnion(op1Xvnp, op2Xvnp));
+ }
+ break;
+
+ case GT_NULLCHECK:
+ // Explicit null check.
+ tree->gtVNPair =
+ vnStore->VNPWithExc(ValueNumPair(ValueNumStore::VNForVoid(), ValueNumStore::VNForVoid()),
+ vnStore->VNPExcSetSingleton(
+ vnStore->VNPairForFunc(TYP_REF, VNF_NullPtrExc,
+ tree->gtOp.gtOp1->gtVNPair)));
+ break;
+
+ case GT_BLK:
+ case GT_OBJ:
+ case GT_IND:
+ if (tree->gtFlags & GTF_IND_ARR_LEN)
+ {
+ // It's an array length. The argument is the sum of an array ref with some integer values...
+ ValueNum arrRefLib = vnStore->VNForRefInAddr(tree->gtOp.gtOp1->gtVNPair.GetLiberal());
+ ValueNum arrRefCons = vnStore->VNForRefInAddr(tree->gtOp.gtOp1->gtVNPair.GetConservative());
+
+ assert(vnStore->TypeOfVN(arrRefLib) == TYP_REF || vnStore->TypeOfVN(arrRefLib) == TYP_BYREF);
+ if (vnStore->IsVNConstant(arrRefLib))
+ {
+ // (or in weird cases, a REF or BYREF constant, in which case the result is an exception).
+ tree->gtVNPair.SetLiberal(
+ vnStore->VNWithExc(ValueNumStore::VNForVoid(),
+ vnStore->VNExcSetSingleton(
+ vnStore->VNForFunc(TYP_REF, VNF_NullPtrExc, arrRefLib))));
+ }
+ else
+ {
+ tree->gtVNPair.SetLiberal(vnStore->VNForFunc(TYP_INT, VNFunc(GT_ARR_LENGTH), arrRefLib));
+ }
+ assert(vnStore->TypeOfVN(arrRefCons) == TYP_REF || vnStore->TypeOfVN(arrRefCons) == TYP_BYREF);
+ if (vnStore->IsVNConstant(arrRefCons))
+ {
+ // (or in weird cases, a REF or BYREF constant, in which case the result is an exception).
+ tree->gtVNPair.SetConservative(
+ vnStore->VNWithExc(ValueNumStore::VNForVoid(),
+ vnStore->VNExcSetSingleton(
+ vnStore->VNForFunc(TYP_REF, VNF_NullPtrExc, arrRefCons))));
+ }
+ else
+ {
+ tree->gtVNPair.SetConservative(
+ vnStore->VNForFunc(TYP_INT, VNFunc(GT_ARR_LENGTH), arrRefCons));
+ }
+ }
+ else
+ {
+ tree->gtVNPair.SetBoth(vnStore->VNForExpr(compCurBB, tree->TypeGet()));
+ }
+ break;
+
+ case GT_LOCKADD: // Binop
+ case GT_XADD: // Binop
+ case GT_XCHG: // Binop
+ // For CMPXCHG and other intrinsics add an arbitrary side effect on Heap.
+ fgMutateHeap(tree DEBUGARG("Interlocked intrinsic"));
+ tree->gtVNPair.SetBoth(vnStore->VNForExpr(compCurBB, tree->TypeGet()));
+ break;
+
+ case GT_JTRUE:
+ case GT_LIST:
+ // These nodes never need to have a ValueNumber
+ tree->gtVNPair.SetBoth(ValueNumStore::NoVN);
+ break;
+
+ default:
+ // The default action is to give the node a new, unique VN.
+ tree->gtVNPair.SetBoth(vnStore->VNForExpr(compCurBB, tree->TypeGet()));
+ break;
+ }
+ }
+ }
+ else
+ {
+ assert(GenTree::OperIsSpecial(oper));
+
+ // TBD: We must handle these individually. For now:
+ switch (oper)
+ {
+ case GT_CALL:
+ fgValueNumberCall(tree->AsCall());
+ break;
+
+ case GT_ARR_BOUNDS_CHECK:
+#ifdef FEATURE_SIMD
+ case GT_SIMD_CHK:
+#endif // FEATURE_SIMD
+ {
+ // A bounds check node has no value, but may throw exceptions.
+ ValueNumPair excSet = vnStore->VNPExcSetSingleton(
+ vnStore->VNPairForFunc(TYP_REF, VNF_IndexOutOfRangeExc,
+ vnStore->VNPNormVal(tree->AsBoundsChk()->gtArrLen->gtVNPair),
+ vnStore->VNPNormVal(tree->AsBoundsChk()->gtIndex->gtVNPair)));
+ excSet = vnStore->VNPExcSetUnion(excSet, vnStore->VNPExcVal(tree->AsBoundsChk()->gtArrLen->gtVNPair));
+ excSet = vnStore->VNPExcSetUnion(excSet, vnStore->VNPExcVal(tree->AsBoundsChk()->gtIndex->gtVNPair));
+
+ tree->gtVNPair = vnStore->VNPWithExc(vnStore->VNPForVoid(), excSet);
+ }
+ break;
+
+ case GT_CMPXCHG: // Specialop
+ // For CMPXCHG and other intrinsics add an arbitrary side effect on Heap.
+ fgMutateHeap(tree DEBUGARG("Interlocked intrinsic"));
+ tree->gtVNPair.SetBoth(vnStore->VNForExpr(compCurBB, tree->TypeGet()));
+ break;
+
+ default:
+ tree->gtVNPair.SetBoth(vnStore->VNForExpr(compCurBB, tree->TypeGet()));
+ }
+ }
+#ifdef DEBUG
+ if (verbose)
+ {
+ if (tree->gtVNPair.GetLiberal() != ValueNumStore::NoVN)
+ {
+ printf("N%03u ", tree->gtSeqNum);
+ printTreeID(tree);
+ printf(" ");
+ gtDispNodeName(tree);
+ if (tree->OperIsLeaf() || tree->OperIsLocalStore()) // local stores used to be leaves
+ {
+ gtDispLeaf(tree, nullptr);
+ }
+ printf(" => ");
+ vnpPrint(tree->gtVNPair, 1);
+ printf("\n");
+ }
+ }
+#endif // DEBUG
+}
+
+void Compiler::fgValueNumberIntrinsic(GenTreePtr tree)
+{
+ assert(tree->OperGet() == GT_INTRINSIC);
+ GenTreeIntrinsic* intrinsic = tree->AsIntrinsic();
+ ValueNumPair arg0VNP, arg1VNP;
+ ValueNumPair arg0VNPx = ValueNumStore::VNPForEmptyExcSet();
+ ValueNumPair arg1VNPx = ValueNumStore::VNPForEmptyExcSet();
+
+ vnStore->VNPUnpackExc(intrinsic->gtOp.gtOp1->gtVNPair, &arg0VNP, &arg0VNPx);
+
+ if (intrinsic->gtOp.gtOp2 != nullptr)
+ {
+ vnStore->VNPUnpackExc(intrinsic->gtOp.gtOp2->gtVNPair, &arg1VNP, &arg1VNPx);
+ }
+
+ switch (intrinsic->gtIntrinsicId)
+ {
+ case CORINFO_INTRINSIC_Sin:
+ case CORINFO_INTRINSIC_Sqrt:
+ case CORINFO_INTRINSIC_Abs:
+ case CORINFO_INTRINSIC_Cos:
+ case CORINFO_INTRINSIC_Round:
+ case CORINFO_INTRINSIC_Cosh:
+ case CORINFO_INTRINSIC_Sinh:
+ case CORINFO_INTRINSIC_Tan:
+ case CORINFO_INTRINSIC_Tanh:
+ case CORINFO_INTRINSIC_Asin:
+ case CORINFO_INTRINSIC_Acos:
+ case CORINFO_INTRINSIC_Atan:
+ case CORINFO_INTRINSIC_Atan2:
+ case CORINFO_INTRINSIC_Log10:
+ case CORINFO_INTRINSIC_Pow:
+ case CORINFO_INTRINSIC_Exp:
+ case CORINFO_INTRINSIC_Ceiling:
+ case CORINFO_INTRINSIC_Floor:
+
+ // GT_INTRINSIC is a currently a subtype of binary operators. But most of
+ // the math intrinsics are actually unary operations.
+
+ if (intrinsic->gtOp.gtOp2 == nullptr)
+ {
+ intrinsic->gtVNPair =
+ vnStore->VNPWithExc(vnStore->EvalMathFuncUnary(tree->TypeGet(), intrinsic->gtIntrinsicId, arg0VNP),
+ arg0VNPx);
+ }
+ else
+ {
+ ValueNumPair newVNP =
+ vnStore->EvalMathFuncBinary(tree->TypeGet(), intrinsic->gtIntrinsicId, arg0VNP, arg1VNP);
+ ValueNumPair excSet = vnStore->VNPExcSetUnion(arg0VNPx, arg1VNPx);
+ intrinsic->gtVNPair = vnStore->VNPWithExc(newVNP, excSet);
+ }
+
+ break;
+
+ case CORINFO_INTRINSIC_Object_GetType:
+ intrinsic->gtVNPair =
+ vnStore->VNPWithExc(vnStore->VNPairForFunc(intrinsic->TypeGet(), VNF_ObjGetType, arg0VNP), arg0VNPx);
+ break;
+
+ default:
+ unreached();
+ }
+}
+
+void Compiler::fgValueNumberCastTree(GenTreePtr tree)
+{
+ assert(tree->OperGet() == GT_CAST);
+
+ ValueNumPair srcVNPair = tree->gtOp.gtOp1->gtVNPair;
+ var_types castToType = tree->CastToType();
+ var_types castFromType = tree->CastFromType();
+ bool srcIsUnsigned = ((tree->gtFlags & GTF_UNSIGNED) != 0);
+ bool hasOverflowCheck = tree->gtOverflowEx();
+
+ assert(genActualType(castToType) == tree->TypeGet()); // Insure that the resultType is correct
+
+ tree->gtVNPair = vnStore->VNPairForCast(srcVNPair, castToType, castFromType, srcIsUnsigned, hasOverflowCheck);
+}
+
+// Compute the normal ValueNumber for a cast operation with no exceptions
+ValueNum ValueNumStore::VNForCast(ValueNum srcVN,
+ var_types castToType,
+ var_types castFromType,
+ bool srcIsUnsigned /* = false */)
+{
+ // The resulting type after performingthe cast is always widened to a supported IL stack size
+ var_types resultType = genActualType(castToType);
+
+ // When we're considering actual value returned by a non-checking cast whether or not the source is
+ // unsigned does *not* matter for non-widening casts. That is, if we cast an int or a uint to short,
+ // we just extract the first two bytes from the source bit pattern, not worrying about the interpretation.
+ // The same is true in casting between signed/unsigned types of the same width. Only when we're doing
+ // a widening cast do we care about whether the source was unsigned,so we know whether to sign or zero extend it.
+ //
+ bool srcIsUnsignedNorm = srcIsUnsigned;
+ if (genTypeSize(castToType) <= genTypeSize(castFromType))
+ {
+ srcIsUnsignedNorm = false;
+ }
+
+ ValueNum castTypeVN = VNForCastOper(castToType, srcIsUnsigned);
+ ValueNum resultVN = VNForFunc(resultType, VNF_Cast, srcVN, castTypeVN);
+
+#ifdef DEBUG
+ if (m_pComp->verbose)
+ {
+ printf(" VNForCast(" STR_VN "%x, " STR_VN "%x) returns ", srcVN, castTypeVN);
+ m_pComp->vnPrint(resultVN, 1);
+ printf("\n");
+ }
+#endif
+
+ return resultVN;
+}
+
+// Compute the ValueNumberPair for a cast operation
+ValueNumPair ValueNumStore::VNPairForCast(ValueNumPair srcVNPair,
+ var_types castToType,
+ var_types castFromType,
+ bool srcIsUnsigned, /* = false */
+ bool hasOverflowCheck) /* = false */
+{
+ // The resulting type after performingthe cast is always widened to a supported IL stack size
+ var_types resultType = genActualType(castToType);
+
+ ValueNumPair castArgVNP;
+ ValueNumPair castArgxVNP = ValueNumStore::VNPForEmptyExcSet();
+ VNPUnpackExc(srcVNPair, &castArgVNP, &castArgxVNP);
+
+ // When we're considering actual value returned by a non-checking cast (or a checking cast that succeeds),
+ // whether or not the source is unsigned does *not* matter for non-widening casts.
+ // That is, if we cast an int or a uint to short, we just extract the first two bytes from the source
+ // bit pattern, not worrying about the interpretation. The same is true in casting between signed/unsigned
+ // types of the same width. Only when we're doing a widening cast do we care about whether the source
+ // was unsigned, so we know whether to sign or zero extend it.
+ //
+ // Important: Casts to floating point cannot be optimized in this fashion. (bug 946768)
+ //
+ bool srcIsUnsignedNorm = srcIsUnsigned;
+ if (genTypeSize(castToType) <= genTypeSize(castFromType) && !varTypeIsFloating(castToType))
+ {
+ srcIsUnsignedNorm = false;
+ }
+
+ ValueNum castTypeVN = VNForCastOper(castToType, srcIsUnsignedNorm);
+ ValueNumPair castTypeVNPair(castTypeVN, castTypeVN);
+ ValueNumPair castNormRes = VNPairForFunc(resultType, VNF_Cast, castArgVNP, castTypeVNPair);
+
+ ValueNumPair resultVNP = VNPWithExc(castNormRes, castArgxVNP);
+
+ // If we have a check for overflow, add the exception information.
+ if (hasOverflowCheck)
+ {
+ // For overflow checking, we always need to know whether the source is unsigned.
+ castTypeVNPair.SetBoth(VNForCastOper(castToType, srcIsUnsigned));
+ ValueNumPair excSet =
+ VNPExcSetSingleton(VNPairForFunc(TYP_REF, VNF_ConvOverflowExc, castArgVNP, castTypeVNPair));
+ excSet = VNPExcSetUnion(excSet, castArgxVNP);
+ resultVNP = VNPWithExc(castNormRes, excSet);
+ }
+
+ return resultVNP;
+}
+
+void Compiler::fgValueNumberHelperCallFunc(GenTreeCall* call, VNFunc vnf, ValueNumPair vnpExc)
+{
+ unsigned nArgs = ValueNumStore::VNFuncArity(vnf);
+ assert(vnf != VNF_Boundary);
+ GenTreeArgList* args = call->gtCallArgs;
+ bool generateUniqueVN = false;
+ bool useEntryPointAddrAsArg0 = false;
+
+ switch (vnf)
+ {
+ case VNF_JitNew:
+ {
+ generateUniqueVN = true;
+ vnpExc = ValueNumStore::VNPForEmptyExcSet();
+ }
+ break;
+
+ case VNF_JitNewArr:
+ {
+ generateUniqueVN = true;
+ ValueNumPair vnp1 = vnStore->VNPNormVal(args->Rest()->Current()->gtVNPair);
+
+ // The New Array helper may throw an overflow exception
+ vnpExc = vnStore->VNPExcSetSingleton(vnStore->VNPairForFunc(TYP_REF, VNF_NewArrOverflowExc, vnp1));
+ }
+ break;
+
+ case VNF_BoxNullable:
+ {
+ // Generate unique VN so, VNForFunc generates a uniq value number for box nullable.
+ // Alternatively instead of using vnpUniq below in VNPairForFunc(...),
+ // we could use the value number of what the byref arg0 points to.
+ //
+ // But retrieving the value number of what the byref arg0 points to is quite a bit more work
+ // and doing so only very rarely allows for an additional optimization.
+ generateUniqueVN = true;
+ }
+ break;
+
+ case VNF_JitReadyToRunNew:
+ {
+ generateUniqueVN = true;
+ vnpExc = ValueNumStore::VNPForEmptyExcSet();
+ useEntryPointAddrAsArg0 = true;
+ }
+ break;
+
+ case VNF_JitReadyToRunNewArr:
+ {
+ generateUniqueVN = true;
+ ValueNumPair vnp1 = vnStore->VNPNormVal(args->Current()->gtVNPair);
+
+ // The New Array helper may throw an overflow exception
+ vnpExc = vnStore->VNPExcSetSingleton(vnStore->VNPairForFunc(TYP_REF, VNF_NewArrOverflowExc, vnp1));
+ useEntryPointAddrAsArg0 = true;
+ }
+ break;
+
+ case VNF_ReadyToRunStaticBase:
+ case VNF_ReadyToRunIsInstanceOf:
+ case VNF_ReadyToRunCastClass:
+ {
+ useEntryPointAddrAsArg0 = true;
+ }
+ break;
+
+ default:
+ {
+ assert(s_helperCallProperties.IsPure(eeGetHelperNum(call->gtCallMethHnd)));
+ }
+ break;
+ }
+
+ if (generateUniqueVN)
+ {
+ nArgs--;
+ }
+
+ ValueNumPair vnpUniq;
+ if (generateUniqueVN)
+ {
+ // Generate unique VN so, VNForFunc generates a unique value number.
+ vnpUniq.SetBoth(vnStore->VNForExpr(compCurBB, call->TypeGet()));
+ }
+
+ if (nArgs == 0)
+ {
+ if (generateUniqueVN)
+ {
+ call->gtVNPair = vnStore->VNPairForFunc(call->TypeGet(), vnf, vnpUniq);
+ }
+ else
+ {
+ call->gtVNPair.SetBoth(vnStore->VNForFunc(call->TypeGet(), vnf));
+ }
+ }
+ else
+ {
+ // Has at least one argument.
+ ValueNumPair vnp0;
+ ValueNumPair vnp0x = ValueNumStore::VNPForEmptyExcSet();
+#ifdef FEATURE_READYTORUN_COMPILER
+ if (useEntryPointAddrAsArg0)
+ {
+ ValueNum callAddrVN = vnStore->VNForPtrSizeIntCon((ssize_t)call->gtCall.gtEntryPoint.addr);
+ vnp0 = ValueNumPair(callAddrVN, callAddrVN);
+ }
+ else
+#endif
+ {
+ assert(!useEntryPointAddrAsArg0);
+ ValueNumPair vnp0wx = args->Current()->gtVNPair;
+ vnStore->VNPUnpackExc(vnp0wx, &vnp0, &vnp0x);
+
+ // Also include in the argument exception sets
+ vnpExc = vnStore->VNPExcSetUnion(vnpExc, vnp0x);
+
+ args = args->Rest();
+ }
+ if (nArgs == 1)
+ {
+ if (generateUniqueVN)
+ {
+ call->gtVNPair = vnStore->VNPairForFunc(call->TypeGet(), vnf, vnp0, vnpUniq);
+ }
+ else
+ {
+ call->gtVNPair = vnStore->VNPairForFunc(call->TypeGet(), vnf, vnp0);
+ }
+ }
+ else
+ {
+ // Has at least two arguments.
+ ValueNumPair vnp1wx = args->Current()->gtVNPair;
+ ValueNumPair vnp1;
+ ValueNumPair vnp1x = ValueNumStore::VNPForEmptyExcSet();
+ vnStore->VNPUnpackExc(vnp1wx, &vnp1, &vnp1x);
+ vnpExc = vnStore->VNPExcSetUnion(vnpExc, vnp1x);
+
+ args = args->Rest();
+ if (nArgs == 2)
+ {
+ if (generateUniqueVN)
+ {
+ call->gtVNPair = vnStore->VNPairForFunc(call->TypeGet(), vnf, vnp0, vnp1, vnpUniq);
+ }
+ else
+ {
+ call->gtVNPair = vnStore->VNPairForFunc(call->TypeGet(), vnf, vnp0, vnp1);
+ }
+ }
+ else
+ {
+ ValueNumPair vnp2wx = args->Current()->gtVNPair;
+ ValueNumPair vnp2;
+ ValueNumPair vnp2x = ValueNumStore::VNPForEmptyExcSet();
+ vnStore->VNPUnpackExc(vnp2wx, &vnp2, &vnp2x);
+ vnpExc = vnStore->VNPExcSetUnion(vnpExc, vnp2x);
+
+ args = args->Rest();
+ assert(nArgs == 3); // Our current maximum.
+ assert(args == nullptr);
+ if (generateUniqueVN)
+ {
+ call->gtVNPair = vnStore->VNPairForFunc(call->TypeGet(), vnf, vnp0, vnp1, vnp2, vnpUniq);
+ }
+ else
+ {
+ call->gtVNPair = vnStore->VNPairForFunc(call->TypeGet(), vnf, vnp0, vnp1, vnp2);
+ }
+ }
+ }
+ // Add the accumulated exceptions.
+ call->gtVNPair = vnStore->VNPWithExc(call->gtVNPair, vnpExc);
+ }
+}
+
+void Compiler::fgValueNumberCall(GenTreeCall* call)
+{
+ // First: do value numbering of any argument placeholder nodes in the argument list
+ // (by transferring from the VN of the late arg that they are standing in for...)
+ unsigned i = 0;
+ GenTreeArgList* args = call->gtCallArgs;
+ bool updatedArgPlace = false;
+ while (args != nullptr)
+ {
+ GenTreePtr arg = args->Current();
+ if (arg->OperGet() == GT_ARGPLACE)
+ {
+ // Find the corresponding late arg.
+ GenTreePtr lateArg = nullptr;
+ for (unsigned j = 0; j < call->fgArgInfo->ArgCount(); j++)
+ {
+ if (call->fgArgInfo->ArgTable()[j]->argNum == i)
+ {
+ lateArg = call->fgArgInfo->ArgTable()[j]->node;
+ break;
+ }
+ }
+ assert(lateArg != nullptr);
+ assert(lateArg->gtVNPair.BothDefined());
+ arg->gtVNPair = lateArg->gtVNPair;
+ updatedArgPlace = true;
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("VN of ARGPLACE tree ");
+ Compiler::printTreeID(arg);
+ printf(" updated to ");
+ vnpPrint(arg->gtVNPair, 1);
+ printf("\n");
+ }
+#endif
+ }
+ i++;
+ args = args->Rest();
+ }
+ if (updatedArgPlace)
+ {
+ // Now we have to update the VN's of the argument list nodes, since that will be used in determining
+ // loop-invariance.
+ fgUpdateArgListVNs(call->gtCallArgs);
+ }
+
+ if (call->gtCallType == CT_HELPER)
+ {
+ bool modHeap = fgValueNumberHelperCall(call);
+
+ if (modHeap)
+ {
+ // For now, arbitrary side effect on Heap.
+ fgMutateHeap(call DEBUGARG("HELPER - modifies heap"));
+ }
+ }
+ else
+ {
+ if (call->TypeGet() == TYP_VOID)
+ {
+ call->gtVNPair.SetBoth(ValueNumStore::VNForVoid());
+ }
+ else
+ {
+ call->gtVNPair.SetBoth(vnStore->VNForExpr(compCurBB, call->TypeGet()));
+ }
+
+ // For now, arbitrary side effect on Heap.
+ fgMutateHeap(call DEBUGARG("CALL"));
+ }
+}
+
+void Compiler::fgUpdateArgListVNs(GenTreeArgList* args)
+{
+ if (args == nullptr)
+ {
+ return;
+ }
+ // Otherwise...
+ fgUpdateArgListVNs(args->Rest());
+ fgValueNumberTree(args);
+}
+
+VNFunc Compiler::fgValueNumberHelperMethVNFunc(CorInfoHelpFunc helpFunc)
+{
+ assert(s_helperCallProperties.IsPure(helpFunc) || s_helperCallProperties.IsAllocator(helpFunc));
+
+ VNFunc vnf = VNF_Boundary; // An illegal value...
+ switch (helpFunc)
+ {
+ // These translate to other function symbols:
+ case CORINFO_HELP_DIV:
+ vnf = VNFunc(GT_DIV);
+ break;
+ case CORINFO_HELP_MOD:
+ vnf = VNFunc(GT_MOD);
+ break;
+ case CORINFO_HELP_UDIV:
+ vnf = VNFunc(GT_UDIV);
+ break;
+ case CORINFO_HELP_UMOD:
+ vnf = VNFunc(GT_UMOD);
+ break;
+ case CORINFO_HELP_LLSH:
+ vnf = VNFunc(GT_LSH);
+ break;
+ case CORINFO_HELP_LRSH:
+ vnf = VNFunc(GT_RSH);
+ break;
+ case CORINFO_HELP_LRSZ:
+ vnf = VNFunc(GT_RSZ);
+ break;
+ case CORINFO_HELP_LMUL:
+ case CORINFO_HELP_LMUL_OVF:
+ vnf = VNFunc(GT_MUL);
+ break;
+ case CORINFO_HELP_ULMUL_OVF:
+ vnf = VNFunc(GT_MUL);
+ break; // Is this the right thing?
+ case CORINFO_HELP_LDIV:
+ vnf = VNFunc(GT_DIV);
+ break;
+ case CORINFO_HELP_LMOD:
+ vnf = VNFunc(GT_MOD);
+ break;
+ case CORINFO_HELP_ULDIV:
+ vnf = VNFunc(GT_DIV);
+ break; // Is this the right thing?
+ case CORINFO_HELP_ULMOD:
+ vnf = VNFunc(GT_MOD);
+ break; // Is this the right thing?
+
+ case CORINFO_HELP_LNG2DBL:
+ vnf = VNF_Lng2Dbl;
+ break;
+ case CORINFO_HELP_ULNG2DBL:
+ vnf = VNF_ULng2Dbl;
+ break;
+ case CORINFO_HELP_DBL2INT:
+ vnf = VNF_Dbl2Int;
+ break;
+ case CORINFO_HELP_DBL2INT_OVF:
+ vnf = VNF_Dbl2Int;
+ break;
+ case CORINFO_HELP_DBL2LNG:
+ vnf = VNF_Dbl2Lng;
+ break;
+ case CORINFO_HELP_DBL2LNG_OVF:
+ vnf = VNF_Dbl2Lng;
+ break;
+ case CORINFO_HELP_DBL2UINT:
+ vnf = VNF_Dbl2UInt;
+ break;
+ case CORINFO_HELP_DBL2UINT_OVF:
+ vnf = VNF_Dbl2UInt;
+ break;
+ case CORINFO_HELP_DBL2ULNG:
+ vnf = VNF_Dbl2ULng;
+ break;
+ case CORINFO_HELP_DBL2ULNG_OVF:
+ vnf = VNF_Dbl2ULng;
+ break;
+ case CORINFO_HELP_FLTREM:
+ vnf = VNFunc(GT_MOD);
+ break;
+ case CORINFO_HELP_DBLREM:
+ vnf = VNFunc(GT_MOD);
+ break;
+ case CORINFO_HELP_FLTROUND:
+ vnf = VNF_FltRound;
+ break; // Is this the right thing?
+ case CORINFO_HELP_DBLROUND:
+ vnf = VNF_DblRound;
+ break; // Is this the right thing?
+
+ // These allocation operations probably require some augmentation -- perhaps allocSiteId,
+ // something about array length...
+ case CORINFO_HELP_NEW_CROSSCONTEXT:
+ case CORINFO_HELP_NEWFAST:
+ case CORINFO_HELP_NEWSFAST:
+ case CORINFO_HELP_NEWSFAST_ALIGN8:
+ vnf = VNF_JitNew;
+ break;
+
+ case CORINFO_HELP_READYTORUN_NEW:
+ vnf = VNF_JitReadyToRunNew;
+ break;
+
+ case CORINFO_HELP_NEWARR_1_DIRECT:
+ case CORINFO_HELP_NEWARR_1_OBJ:
+ case CORINFO_HELP_NEWARR_1_VC:
+ case CORINFO_HELP_NEWARR_1_ALIGN8:
+ vnf = VNF_JitNewArr;
+ break;
+
+ case CORINFO_HELP_READYTORUN_NEWARR_1:
+ vnf = VNF_JitReadyToRunNewArr;
+ break;
+
+ case CORINFO_HELP_GETGENERICS_GCSTATIC_BASE:
+ vnf = VNF_GetgenericsGcstaticBase;
+ break;
+ case CORINFO_HELP_GETGENERICS_NONGCSTATIC_BASE:
+ vnf = VNF_GetgenericsNongcstaticBase;
+ break;
+ case CORINFO_HELP_GETSHARED_GCSTATIC_BASE:
+ vnf = VNF_GetsharedGcstaticBase;
+ break;
+ case CORINFO_HELP_GETSHARED_NONGCSTATIC_BASE:
+ vnf = VNF_GetsharedNongcstaticBase;
+ break;
+ case CORINFO_HELP_GETSHARED_GCSTATIC_BASE_NOCTOR:
+ vnf = VNF_GetsharedGcstaticBaseNoctor;
+ break;
+ case CORINFO_HELP_GETSHARED_NONGCSTATIC_BASE_NOCTOR:
+ vnf = VNF_GetsharedNongcstaticBaseNoctor;
+ break;
+ case CORINFO_HELP_READYTORUN_STATIC_BASE:
+ vnf = VNF_ReadyToRunStaticBase;
+ break;
+ case CORINFO_HELP_GETSHARED_GCSTATIC_BASE_DYNAMICCLASS:
+ vnf = VNF_GetsharedGcstaticBaseDynamicclass;
+ break;
+ case CORINFO_HELP_GETSHARED_NONGCSTATIC_BASE_DYNAMICCLASS:
+ vnf = VNF_GetsharedNongcstaticBaseDynamicclass;
+ break;
+ case CORINFO_HELP_CLASSINIT_SHARED_DYNAMICCLASS:
+ vnf = VNF_ClassinitSharedDynamicclass;
+ break;
+ case CORINFO_HELP_GETGENERICS_GCTHREADSTATIC_BASE:
+ vnf = VNF_GetgenericsGcthreadstaticBase;
+ break;
+ case CORINFO_HELP_GETGENERICS_NONGCTHREADSTATIC_BASE:
+ vnf = VNF_GetgenericsNongcthreadstaticBase;
+ break;
+ case CORINFO_HELP_GETSHARED_GCTHREADSTATIC_BASE:
+ vnf = VNF_GetsharedGcthreadstaticBase;
+ break;
+ case CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE:
+ vnf = VNF_GetsharedNongcthreadstaticBase;
+ break;
+ case CORINFO_HELP_GETSHARED_GCTHREADSTATIC_BASE_NOCTOR:
+ vnf = VNF_GetsharedGcthreadstaticBaseNoctor;
+ break;
+ case CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE_NOCTOR:
+ vnf = VNF_GetsharedNongcthreadstaticBaseNoctor;
+ break;
+ case CORINFO_HELP_GETSHARED_GCTHREADSTATIC_BASE_DYNAMICCLASS:
+ vnf = VNF_GetsharedGcthreadstaticBaseDynamicclass;
+ break;
+ case CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE_DYNAMICCLASS:
+ vnf = VNF_GetsharedNongcthreadstaticBaseDynamicclass;
+ break;
+ case CORINFO_HELP_GETSTATICFIELDADDR_CONTEXT:
+ vnf = VNF_GetStaticAddrContext;
+ break;
+ case CORINFO_HELP_GETSTATICFIELDADDR_TLS:
+ vnf = VNF_GetStaticAddrTLS;
+ break;
+
+ case CORINFO_HELP_RUNTIMEHANDLE_METHOD:
+ case CORINFO_HELP_RUNTIMEHANDLE_METHOD_LOG:
+ vnf = VNF_RuntimeHandleMethod;
+ break;
+
+ case CORINFO_HELP_RUNTIMEHANDLE_CLASS:
+ case CORINFO_HELP_RUNTIMEHANDLE_CLASS_LOG:
+ vnf = VNF_RuntimeHandleClass;
+ break;
+
+ case CORINFO_HELP_STRCNS:
+ vnf = VNF_StrCns;
+ break;
+
+ case CORINFO_HELP_CHKCASTCLASS:
+ case CORINFO_HELP_CHKCASTCLASS_SPECIAL:
+ case CORINFO_HELP_CHKCASTARRAY:
+ case CORINFO_HELP_CHKCASTINTERFACE:
+ case CORINFO_HELP_CHKCASTANY:
+ vnf = VNF_CastClass;
+ break;
+
+ case CORINFO_HELP_READYTORUN_CHKCAST:
+ vnf = VNF_ReadyToRunCastClass;
+ break;
+
+ case CORINFO_HELP_ISINSTANCEOFCLASS:
+ case CORINFO_HELP_ISINSTANCEOFINTERFACE:
+ case CORINFO_HELP_ISINSTANCEOFARRAY:
+ case CORINFO_HELP_ISINSTANCEOFANY:
+ vnf = VNF_IsInstanceOf;
+ break;
+
+ case CORINFO_HELP_READYTORUN_ISINSTANCEOF:
+ vnf = VNF_ReadyToRunIsInstanceOf;
+ break;
+
+ case CORINFO_HELP_LDELEMA_REF:
+ vnf = VNF_LdElemA;
+ break;
+
+ case CORINFO_HELP_UNBOX:
+ vnf = VNF_Unbox;
+ break;
+
+ // A constant within any method.
+ case CORINFO_HELP_GETCURRENTMANAGEDTHREADID:
+ vnf = VNF_ManagedThreadId;
+ break;
+
+ case CORINFO_HELP_GETREFANY:
+ // TODO-CQ: This should really be interpreted as just a struct field reference, in terms of values.
+ vnf = VNF_GetRefanyVal;
+ break;
+
+ case CORINFO_HELP_GETCLASSFROMMETHODPARAM:
+ vnf = VNF_GetClassFromMethodParam;
+ break;
+
+ case CORINFO_HELP_GETSYNCFROMCLASSHANDLE:
+ vnf = VNF_GetSyncFromClassHandle;
+ break;
+
+ case CORINFO_HELP_LOOP_CLONE_CHOICE_ADDR:
+ vnf = VNF_LoopCloneChoiceAddr;
+ break;
+
+ case CORINFO_HELP_BOX_NULLABLE:
+ vnf = VNF_BoxNullable;
+ break;
+
+ default:
+ unreached();
+ }
+
+ assert(vnf != VNF_Boundary);
+ return vnf;
+}
+
+bool Compiler::fgValueNumberHelperCall(GenTreeCall* call)
+{
+ CorInfoHelpFunc helpFunc = eeGetHelperNum(call->gtCallMethHnd);
+ bool pure = s_helperCallProperties.IsPure(helpFunc);
+ bool isAlloc = s_helperCallProperties.IsAllocator(helpFunc);
+ bool modHeap = s_helperCallProperties.MutatesHeap(helpFunc);
+ bool mayRunCctor = s_helperCallProperties.MayRunCctor(helpFunc);
+ bool noThrow = s_helperCallProperties.NoThrow(helpFunc);
+
+ ValueNumPair vnpExc = ValueNumStore::VNPForEmptyExcSet();
+
+ // If the JIT helper can throw an exception make sure that we fill in
+ // vnpExc with a Value Number that represents the exception(s) that can be thrown.
+ if (!noThrow)
+ {
+ // If the helper is known to only throw only one particular exception
+ // we can set vnpExc to that exception, otherwise we conservatively
+ // model the JIT helper as possibly throwing multiple different exceptions
+ //
+ switch (helpFunc)
+ {
+ case CORINFO_HELP_OVERFLOW:
+ // This helper always throws the VNF_OverflowExc exception
+ vnpExc = vnStore->VNPExcSetSingleton(vnStore->VNPairForFunc(TYP_REF, VNF_OverflowExc));
+ break;
+
+ default:
+ // Setup vnpExc with the information that multiple different exceptions
+ // could be generated by this helper
+ vnpExc = vnStore->VNPExcSetSingleton(vnStore->VNPairForFunc(TYP_REF, VNF_HelperMultipleExc));
+ }
+ }
+
+ ValueNumPair vnpNorm;
+
+ if (call->TypeGet() == TYP_VOID)
+ {
+ vnpNorm = ValueNumStore::VNPForVoid();
+ }
+ else
+ {
+ // TODO-CQ: this is a list of helpers we're going to treat as non-pure,
+ // because they raise complications. Eventually, we need to handle those complications...
+ bool needsFurtherWork = false;
+ switch (helpFunc)
+ {
+ case CORINFO_HELP_NEW_MDARR:
+ // This is a varargs helper. We need to represent the array shape in the VN world somehow.
+ needsFurtherWork = true;
+ break;
+ default:
+ break;
+ }
+
+ if (!needsFurtherWork && (pure || isAlloc))
+ {
+ VNFunc vnf = fgValueNumberHelperMethVNFunc(helpFunc);
+
+ if (mayRunCctor)
+ {
+ if ((call->gtFlags & GTF_CALL_HOISTABLE) == 0)
+ {
+ modHeap = true;
+ }
+ }
+
+ fgValueNumberHelperCallFunc(call, vnf, vnpExc);
+ return modHeap;
+ }
+ else
+ {
+ vnpNorm.SetBoth(vnStore->VNForExpr(compCurBB, call->TypeGet()));
+ }
+ }
+
+ call->gtVNPair = vnStore->VNPWithExc(vnpNorm, vnpExc);
+ return modHeap;
+}
+
+#ifdef DEBUG
+// This method asserts that SSA name constraints specified are satisfied.
+// Until we figure out otherwise, all VN's are assumed to be liberal.
+// TODO-Cleanup: new JitTestLabels for lib vs cons vs both VN classes?
+void Compiler::JitTestCheckVN()
+{
+ typedef SimplerHashTable<ssize_t, SmallPrimitiveKeyFuncs<ssize_t>, ValueNum, JitSimplerHashBehavior> LabelToVNMap;
+ typedef SimplerHashTable<ValueNum, SmallPrimitiveKeyFuncs<ValueNum>, ssize_t, JitSimplerHashBehavior> VNToLabelMap;
+
+ // If we have no test data, early out.
+ if (m_nodeTestData == nullptr)
+ {
+ return;
+ }
+
+ NodeToTestDataMap* testData = GetNodeTestData();
+
+ // First we have to know which nodes in the tree are reachable.
+ typedef SimplerHashTable<GenTreePtr, PtrKeyFuncs<GenTree>, int, JitSimplerHashBehavior> NodeToIntMap;
+ NodeToIntMap* reachable = FindReachableNodesInNodeTestData();
+
+ LabelToVNMap* labelToVN = new (getAllocatorDebugOnly()) LabelToVNMap(getAllocatorDebugOnly());
+ VNToLabelMap* vnToLabel = new (getAllocatorDebugOnly()) VNToLabelMap(getAllocatorDebugOnly());
+
+ if (verbose)
+ {
+ printf("\nJit Testing: Value numbering.\n");
+ }
+ for (NodeToTestDataMap::KeyIterator ki = testData->Begin(); !ki.Equal(testData->End()); ++ki)
+ {
+ TestLabelAndNum tlAndN;
+ GenTreePtr node = ki.Get();
+ ValueNum nodeVN = node->GetVN(VNK_Liberal);
+
+ bool b = testData->Lookup(node, &tlAndN);
+ assert(b);
+ if (tlAndN.m_tl == TL_VN || tlAndN.m_tl == TL_VNNorm)
+ {
+ int dummy;
+ if (!reachable->Lookup(node, &dummy))
+ {
+ printf("Node ");
+ Compiler::printTreeID(node);
+ printf(" had a test constraint declared, but has become unreachable at the time the constraint is "
+ "tested.\n"
+ "(This is probably as a result of some optimization -- \n"
+ "you may need to modify the test case to defeat this opt.)\n");
+ assert(false);
+ }
+
+ if (verbose)
+ {
+ printf(" Node ");
+ Compiler::printTreeID(node);
+ printf(" -- VN class %d.\n", tlAndN.m_num);
+ }
+
+ if (tlAndN.m_tl == TL_VNNorm)
+ {
+ nodeVN = vnStore->VNNormVal(nodeVN);
+ }
+
+ ValueNum vn;
+ if (labelToVN->Lookup(tlAndN.m_num, &vn))
+ {
+ if (verbose)
+ {
+ printf(" Already in hash tables.\n");
+ }
+ // The mapping(s) must be one-to-one: if the label has a mapping, then the ssaNm must, as well.
+ ssize_t num2;
+ bool b = vnToLabel->Lookup(vn, &num2);
+ // And the mappings must be the same.
+ if (tlAndN.m_num != num2)
+ {
+ printf("Node: ");
+ Compiler::printTreeID(node);
+ printf(", with value number " STR_VN "%x, was declared in VN class %d,\n", nodeVN, tlAndN.m_num);
+ printf("but this value number " STR_VN
+ "%x has already been associated with a different SSA name class: %d.\n",
+ vn, num2);
+ assert(false);
+ }
+ // And the current node must be of the specified SSA family.
+ if (nodeVN != vn)
+ {
+ printf("Node: ");
+ Compiler::printTreeID(node);
+ printf(", " STR_VN "%x was declared in SSA name class %d,\n", nodeVN, tlAndN.m_num);
+ printf("but that name class was previously bound to a different value number: " STR_VN "%x.\n", vn);
+ assert(false);
+ }
+ }
+ else
+ {
+ ssize_t num;
+ // The mapping(s) must be one-to-one: if the label has no mapping, then the ssaNm may not, either.
+ if (vnToLabel->Lookup(nodeVN, &num))
+ {
+ printf("Node: ");
+ Compiler::printTreeID(node);
+ printf(", " STR_VN "%x was declared in value number class %d,\n", nodeVN, tlAndN.m_num);
+ printf(
+ "but this value number has already been associated with a different value number class: %d.\n",
+ num);
+ assert(false);
+ }
+ // Add to both mappings.
+ labelToVN->Set(tlAndN.m_num, nodeVN);
+ vnToLabel->Set(nodeVN, tlAndN.m_num);
+ if (verbose)
+ {
+ printf(" added to hash tables.\n");
+ }
+ }
+ }
+ }
+}
+
+void Compiler::vnpPrint(ValueNumPair vnp, unsigned level)
+{
+ if (vnp.BothEqual())
+ {
+ vnPrint(vnp.GetLiberal(), level);
+ }
+ else
+ {
+ printf("<l:");
+ vnPrint(vnp.GetLiberal(), level);
+ printf(", c:");
+ vnPrint(vnp.GetConservative(), level);
+ printf(">");
+ }
+}
+
+void Compiler::vnPrint(ValueNum vn, unsigned level)
+{
+
+ if (ValueNumStore::isReservedVN(vn))
+ {
+ printf(ValueNumStore::reservedName(vn));
+ }
+ else
+ {
+ printf(STR_VN "%x", vn);
+ if (level > 0)
+ {
+ vnStore->vnDump(this, vn);
+ }
+ }
+}
+
+#endif // DEBUG
+
+// Methods of ValueNumPair.
+ValueNumPair::ValueNumPair() : m_liberal(ValueNumStore::NoVN), m_conservative(ValueNumStore::NoVN)
+{
+}
+
+bool ValueNumPair::BothDefined() const
+{
+ return (m_liberal != ValueNumStore::NoVN) && (m_conservative != ValueNumStore::NoVN);
+}
diff --git a/src/jit/valuenum.h b/src/jit/valuenum.h
new file mode 100644
index 0000000000..17dacfbb54
--- /dev/null
+++ b/src/jit/valuenum.h
@@ -0,0 +1,1378 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+// Defines the class "ValueNumStore", which maintains value numbers for a compilation.
+
+// Recall that "value numbering" assigns an integer value number to each expression. The "value
+// number property" is that two expressions with the same value number will evaluate to the same value
+// at runtime. Expressions with different value numbers may or may not be equivalent. This property
+// of value numbers has obvious applications in redundancy-elimination optimizations.
+//
+// Since value numbers give us a way of talking about the (immutable) values to which expressions
+// evaluate, they provide a good "handle" to use for attributing properties to values. For example,
+// we might note that some value number represents some particular integer constant -- which has obvious
+// application to constant propagation. Or that we know the exact type of some object reference,
+// which might be used in devirtualization.
+//
+// Finally, we will also use value numbers to express control-flow-dependent assertions. Some test may
+// imply that after the test, something new is known about a value: that an object reference is non-null
+// after a dereference (since control flow continued because no exception was thrown); that an integer value
+// is restricted to some subrange in after a comparison test; etc.
+
+/*****************************************************************************/
+#ifndef _VALUENUM_H_
+#define _VALUENUM_H_
+/*****************************************************************************/
+
+#include "vartype.h"
+// For "GT_COUNT"
+#include "gentree.h"
+// Defines the type ValueNum.
+#include "valuenumtype.h"
+
+// A "ValueNumStore" represents the "universe" of value numbers used in a single
+// compilation.
+
+// All members of the enumeration genTreeOps are also members of VNFunc.
+// (Though some of these may be labeled "illegal").
+enum VNFunc
+{
+ // Implicitly, elements of genTreeOps here.
+ VNF_Boundary = GT_COUNT,
+#define ValueNumFuncDef(nm, arity, commute, knownNonNull, sharedStatic) VNF_##nm,
+#include "valuenumfuncs.h"
+ VNF_COUNT
+};
+
+// Given an "oper" and associated flags with it, transform the oper into a
+// more accurate oper that can be used in evaluation. For example, (GT_ADD, unsigned)
+// transforms to GT_ADD_UN.
+VNFunc GetVNFuncForOper(genTreeOps oper, bool isUnsigned);
+
+// An instance of this struct represents an application of the function symbol
+// "m_func" to the first "m_arity" (<= 4) argument values in "m_args."
+struct VNFuncApp
+{
+ VNFunc m_func;
+ unsigned m_arity;
+ ValueNum m_args[4];
+
+ bool Equals(const VNFuncApp& funcApp)
+ {
+ if (m_func != funcApp.m_func)
+ {
+ return false;
+ }
+ if (m_arity != funcApp.m_arity)
+ {
+ return false;
+ }
+ for (unsigned i = 0; i < m_arity; i++)
+ {
+ if (m_args[i] != funcApp.m_args[i])
+ {
+ return false;
+ }
+ }
+ return true;
+ }
+};
+
+// A unique prefix character to use when dumping a tree's gtVN in the tree dumps
+// We use this together with string concatenation to put this in printf format strings
+// static const char* const VN_DumpPrefix = "$";
+#define STR_VN "$"
+
+class ValueNumStore
+{
+
+public:
+ // We will reserve "max unsigned" to represent "not a value number", for maps that might start uninitialized.
+ static const ValueNum NoVN = UINT32_MAX;
+ // A second special value, used to indicate that a function evaluation would cause infinite recursion.
+ static const ValueNum RecursiveVN = UINT32_MAX - 1;
+
+ // ==================================================================================================
+ // VNMap - map from something to ValueNum, where something is typically a constant value or a VNFunc
+ // This class has two purposes - to abstract the implementation and to validate the ValueNums
+ // being stored or retrieved.
+ template <class fromType, class keyfuncs = LargePrimitiveKeyFuncs<fromType>>
+ class VNMap : public SimplerHashTable<fromType, keyfuncs, ValueNum, JitSimplerHashBehavior>
+ {
+ public:
+ VNMap(IAllocator* alloc) : SimplerHashTable<fromType, keyfuncs, ValueNum, JitSimplerHashBehavior>(alloc)
+ {
+ }
+ ~VNMap()
+ {
+ ~VNMap<fromType, keyfuncs>::SimplerHashTable();
+ }
+
+ bool Set(fromType k, ValueNum val)
+ {
+ assert(val != RecursiveVN);
+ return SimplerHashTable<fromType, keyfuncs, ValueNum, JitSimplerHashBehavior>::Set(k, val);
+ }
+ bool Lookup(fromType k, ValueNum* pVal = nullptr) const
+ {
+ bool result = SimplerHashTable<fromType, keyfuncs, ValueNum, JitSimplerHashBehavior>::Lookup(k, pVal);
+ assert(!result || *pVal != RecursiveVN);
+ return result;
+ }
+ };
+
+private:
+ Compiler* m_pComp;
+
+ // For allocations. (Other things?)
+ IAllocator* m_alloc;
+
+ // TODO-Cleanup: should transform "attribs" into a struct with bit fields. That would be simpler...
+
+ enum VNFOpAttrib
+ {
+ VNFOA_IllegalGenTreeOp = 0x1, // corresponds to a genTreeOps value that is not a legal VN func.
+ VNFOA_Commutative = 0x2, // 1 iff the function is commutative.
+ VNFOA_Arity = 0x4, // Bits 2..3 encode the arity.
+ VNFOA_AfterArity = 0x20, // Makes it clear what value the next flag(s) after Arity should have.
+ VNFOA_KnownNonNull = 0x20, // 1 iff the result is known to be non-null.
+ VNFOA_SharedStatic = 0x40, // 1 iff this VNF is represent one of the shared static jit helpers
+ };
+
+ static const unsigned VNFOA_ArityShift = 2;
+ static const unsigned VNFOA_ArityBits = 3;
+ static const unsigned VNFOA_MaxArity = (1 << VNFOA_ArityBits) - 1; // Max arity we can represent.
+ static const unsigned VNFOA_ArityMask = VNFOA_AfterArity - VNFOA_Arity;
+
+ // These enum constants are used to encode the cast operation in the lowest bits by VNForCastOper
+ enum VNFCastAttrib
+ {
+ VCA_UnsignedSrc = 0x01,
+
+ VCA_BitCount = 1, // the number of reserved bits
+ VCA_ReservedBits = 0x01, // i.e. (VCA_UnsignedSrc)
+ };
+
+ // An array of length GT_COUNT, mapping genTreeOp values to their VNFOpAttrib.
+ static UINT8* s_vnfOpAttribs;
+
+ // Returns "true" iff gtOper is a legal value number function.
+ // (Requires InitValueNumStoreStatics to have been run.)
+ static bool GenTreeOpIsLegalVNFunc(genTreeOps gtOper);
+
+ // Returns "true" iff "vnf" is a commutative (and thus binary) operator.
+ // (Requires InitValueNumStoreStatics to have been run.)
+ static bool VNFuncIsCommutative(VNFunc vnf);
+
+ // Returns "true" iff "vnf" is a comparison (and thus binary) operator.
+ static bool VNFuncIsComparison(VNFunc vnf);
+
+ // Returns "true" iff "vnf" can be evaluated for constant arguments.
+ static bool CanEvalForConstantArgs(VNFunc vnf);
+
+ // return vnf(v0)
+ template <typename T>
+ static T EvalOp(VNFunc vnf, T v0);
+
+ // If vnf(v0, v1) would raise an exception, sets *pExcSet to the singleton set containing the exception, and
+ // returns (T)0. Otherwise, returns vnf(v0, v1).
+ template <typename T>
+ T EvalOp(VNFunc vnf, T v0, T v1, ValueNum* pExcSet);
+
+ template <typename T>
+ static int EvalComparison(VNFunc vnf, T v0, T v1);
+ template <typename T>
+ static int EvalOrderedComparisonFloat(VNFunc vnf, T v0, T v1);
+ // return vnf(v0) or vnf(v0, v1), respectively (must, of course be unary/binary ops, respectively.)
+ // Should only be instantiated for integral types.
+ template <typename T>
+ static T EvalOpIntegral(VNFunc vnf, T v0);
+ template <typename T>
+ T EvalOpIntegral(VNFunc vnf, T v0, T v1, ValueNum* pExcSet);
+
+ // Should only instantiate (in a non-trivial way) for "int" and "INT64". Returns true iff dividing "v0" by "v1"
+ // would produce integer overflow (an ArithmeticException -- *not* division by zero, which is separate.)
+ template <typename T>
+ static bool IsOverflowIntDiv(T v0, T v1);
+
+ // Should only instantiate (in a non-trivial way) for integral types (signed/unsigned int32/int64).
+ // Returns true iff v is the zero of the appropriate type.
+ template <typename T>
+ static bool IsIntZero(T v);
+
+ // Given an constant value number return its value.
+ int GetConstantInt32(ValueNum argVN);
+ INT64 GetConstantInt64(ValueNum argVN);
+ double GetConstantDouble(ValueNum argVN);
+
+ // Assumes that all the ValueNum arguments of each of these functions have been shown to represent constants.
+ // Assumes that "vnf" is a operator of the appropriate arity (unary for the first, binary for the second).
+ // Assume that "CanEvalForConstantArgs(vnf)" is true.
+ // Returns the result of evaluating the function with those constant arguments.
+ ValueNum EvalFuncForConstantArgs(var_types typ, VNFunc vnf, ValueNum vn0);
+ ValueNum EvalFuncForConstantArgs(var_types typ, VNFunc vnf, ValueNum vn0, ValueNum vn1);
+ ValueNum EvalFuncForConstantFPArgs(var_types typ, VNFunc vnf, ValueNum vn0, ValueNum vn1);
+ ValueNum EvalCastForConstantArgs(var_types typ, VNFunc vnf, ValueNum vn0, ValueNum vn1);
+
+#ifdef DEBUG
+ // This helps test some performance pathologies related to "evaluation" of VNF_MapSelect terms,
+ // especially relating to the heap. We count the number of applications of such terms we consider,
+ // and if this exceeds a limit, indicated by a COMPlus_ variable, we assert.
+ unsigned m_numMapSels;
+#endif
+
+ // This is the maximum number of MapSelect terms that can be "considered" as part of evaluation of a top-level
+ // MapSelect application.
+ unsigned m_mapSelectBudget;
+
+public:
+ // Initializes any static variables of ValueNumStore.
+ static void InitValueNumStoreStatics();
+
+ // Initialize an empty ValueNumStore.
+ ValueNumStore(Compiler* comp, IAllocator* allocator);
+
+ // Returns "true" iff "vnf" (which may have been created by a cast from an integral value) represents
+ // a legal value number function.
+ // (Requires InitValueNumStoreStatics to have been run.)
+ static bool VNFuncIsLegal(VNFunc vnf)
+ {
+ return unsigned(vnf) > VNF_Boundary || GenTreeOpIsLegalVNFunc(static_cast<genTreeOps>(vnf));
+ }
+
+ // Returns the arity of "vnf".
+ static unsigned VNFuncArity(VNFunc vnf);
+
+ // Requires "gtOper" to be a genTreeOps legally representing a VNFunc, and returns that
+ // VNFunc.
+ // (Requires InitValueNumStoreStatics to have been run.)
+ static VNFunc GenTreeOpToVNFunc(genTreeOps gtOper)
+ {
+ assert(GenTreeOpIsLegalVNFunc(gtOper));
+ return static_cast<VNFunc>(gtOper);
+ }
+
+#ifdef DEBUG
+ static void RunTests(Compiler* comp);
+#endif // DEBUG
+
+ // This block of methods gets value numbers for constants of primitive types.
+
+ ValueNum VNForIntCon(INT32 cnsVal);
+ ValueNum VNForLongCon(INT64 cnsVal);
+ ValueNum VNForFloatCon(float cnsVal);
+ ValueNum VNForDoubleCon(double cnsVal);
+ ValueNum VNForByrefCon(INT64 byrefVal);
+
+#ifdef _TARGET_64BIT_
+ ValueNum VNForPtrSizeIntCon(INT64 cnsVal)
+ {
+ return VNForLongCon(cnsVal);
+ }
+#else
+ ValueNum VNForPtrSizeIntCon(INT32 cnsVal)
+ {
+ return VNForIntCon(cnsVal);
+ }
+#endif
+
+ ValueNum VNForCastOper(var_types castToType, bool srcIsUnsigned = false);
+
+ // We keep handle values in a separate pool, so we don't confuse a handle with an int constant
+ // that happens to be the same...
+ ValueNum VNForHandle(ssize_t cnsVal, unsigned iconFlags);
+
+ // And the single constant for an object reference type.
+ static ValueNum VNForNull()
+ {
+ // We reserve Chunk 0 for "special" VNs. SRC_Null (== 0) is the VN of "null".
+ return ValueNum(SRC_Null);
+ }
+
+ // The zero map is the map that returns a zero "for the appropriate type" when indexed at any index.
+ static ValueNum VNForZeroMap()
+ {
+ // We reserve Chunk 0 for "special" VNs. Let SRC_ZeroMap (== 1) be the zero map.
+ return ValueNum(SRC_ZeroMap);
+ }
+
+ // The value number for the special "NotAField" field sequence.
+ static ValueNum VNForNotAField()
+ {
+ // We reserve Chunk 0 for "special" VNs. Let SRC_NotAField (== 2) be the "not a field seq".
+ return ValueNum(SRC_NotAField);
+ }
+
+ // The ROH map is the map for the "read-only heap". We assume that this is never mutated, and always
+ // has the same value number.
+ static ValueNum VNForROH()
+ {
+ // We reserve Chunk 0 for "special" VNs. Let SRC_ReadOnlyHeap (== 3) be the read-only heap.
+ return ValueNum(SRC_ReadOnlyHeap);
+ }
+
+ // A special value number for "void" -- sometimes a type-void thing is an argument to a
+ // GT_LIST, and we want the args to be non-NoVN.
+ static ValueNum VNForVoid()
+ {
+ // We reserve Chunk 0 for "special" VNs. Let SRC_Void (== 4) be the value for "void".
+ return ValueNum(SRC_Void);
+ }
+ static ValueNumPair VNPForVoid()
+ {
+ return ValueNumPair(VNForVoid(), VNForVoid());
+ }
+
+ // A special value number for the empty set of exceptions.
+ static ValueNum VNForEmptyExcSet()
+ {
+ // We reserve Chunk 0 for "special" VNs. Let SRC_EmptyExcSet (== 5) be the value for the empty set of
+ // exceptions.
+ return ValueNum(SRC_EmptyExcSet);
+ }
+ static ValueNumPair VNPForEmptyExcSet()
+ {
+ return ValueNumPair(VNForEmptyExcSet(), VNForEmptyExcSet());
+ }
+
+ // Returns the value number for zero of the given "typ".
+ // It has an unreached() for a "typ" that has no zero value, such as TYP_BYREF.
+ ValueNum VNZeroForType(var_types typ);
+
+ // Returns the value number for one of the given "typ".
+ // It returns NoVN for a "typ" that has no one value, such as TYP_REF.
+ ValueNum VNOneForType(var_types typ);
+
+ // Return the value number representing the singleton exception set containing the exception value "x".
+ ValueNum VNExcSetSingleton(ValueNum x);
+ ValueNumPair VNPExcSetSingleton(ValueNumPair x);
+
+ // Returns the VN representing the union of the two exception sets "xs0" and "xs1".
+ // These must be VNForEmtpyExcSet() or applications of VNF_ExcSetCons, obeying
+ // the ascending order invariant (which is preserved in the result.)
+ ValueNum VNExcSetUnion(ValueNum xs0, ValueNum xs1 DEBUGARG(bool topLevel = true));
+
+ ValueNumPair VNPExcSetUnion(ValueNumPair xs0vnp, ValueNumPair xs1vnp);
+
+ // Returns "true" iff "vn" is an application of "VNF_ValWithExc".
+ bool VNHasExc(ValueNum vn)
+ {
+ VNFuncApp funcApp;
+ return GetVNFunc(vn, &funcApp) && funcApp.m_func == VNF_ValWithExc;
+ }
+
+ // Requires that "vn" is *not* a "VNF_ValWithExc" appliation.
+ // If vn "excSet" is not "VNForEmptyExcSet()", return "VNF_ValWithExc(vn, excSet)". Otherwise,
+ // just return "vn".
+ ValueNum VNWithExc(ValueNum vn, ValueNum excSet);
+
+ ValueNumPair VNPWithExc(ValueNumPair vnp, ValueNumPair excSetVNP);
+
+ // If "vnWx" is a "VNF_ValWithExc(normal, excSet)" application, sets "*pvn" to "normal", and
+ // "*pvnx" to "excSet". Otherwise, just sets "*pvn" to "normal".
+ void VNUnpackExc(ValueNum vnWx, ValueNum* pvn, ValueNum* pvnx);
+
+ void VNPUnpackExc(ValueNumPair vnWx, ValueNumPair* pvn, ValueNumPair* pvnx);
+
+ // If "vn" is a "VNF_ValWithExc(norm, excSet)" value, returns the "norm" argument; otherwise,
+ // just returns "vn".
+ ValueNum VNNormVal(ValueNum vn);
+ ValueNumPair VNPNormVal(ValueNumPair vn);
+
+ // If "vn" is a "VNF_ValWithExc(norm, excSet)" value, returns the "excSet" argument; otherwise,
+ // just returns "EmptyExcSet()".
+ ValueNum VNExcVal(ValueNum vn);
+ ValueNumPair VNPExcVal(ValueNumPair vn);
+
+ // True "iff" vn is a value known to be non-null. (For example, the result of an allocation...)
+ bool IsKnownNonNull(ValueNum vn);
+
+ // True "iff" vn is a value returned by a call to a shared static helper.
+ bool IsSharedStatic(ValueNum vn);
+
+ // VN's for functions of other values.
+ // Four overloads, for arities 0, 1, 2, and 3. If we need other arities, we'll consider it.
+ ValueNum VNForFunc(var_types typ, VNFunc func);
+ ValueNum VNForFunc(var_types typ, VNFunc func, ValueNum opVNwx);
+ // This must not be used for VNF_MapSelect applications; instead use VNForMapSelect, below.
+ ValueNum VNForFunc(var_types typ, VNFunc func, ValueNum op1VNwx, ValueNum op2VNwx);
+ ValueNum VNForFunc(var_types typ, VNFunc func, ValueNum op1VNwx, ValueNum op2VNwx, ValueNum op3VNwx);
+
+ // The following four op VNForFunc is only used for VNF_PtrToArrElem, elemTypeEqVN, arrVN, inxVN, fldSeqVN
+ ValueNum VNForFunc(
+ var_types typ, VNFunc func, ValueNum op1VNwx, ValueNum op2VNwx, ValueNum op3VNwx, ValueNum op4VNwx);
+
+ // This requires a "ValueNumKind" because it will attempt, given "select(phi(m1, ..., mk), ind)", to evaluate
+ // "select(m1, ind)", ..., "select(mk, ind)" to see if they agree. It needs to know which kind of value number
+ // (liberal/conservative) to read from the SSA def referenced in the phi argument.
+ ValueNum VNForMapSelect(ValueNumKind vnk, var_types typ, ValueNum op1VN, ValueNum op2VN);
+
+ // A method that does the work for VNForMapSelect and may call itself recursively.
+ ValueNum VNForMapSelectWork(
+ ValueNumKind vnk, var_types typ, ValueNum op1VN, ValueNum op2VN, unsigned* pBudget, bool* pUsedRecursiveVN);
+
+ // A specialized version of VNForFunc that is used for VNF_MapStore and provides some logging when verbose is set
+ ValueNum VNForMapStore(var_types typ, ValueNum arg0VN, ValueNum arg1VN, ValueNum arg2VN);
+
+ // These functions parallel the ones above, except that they take liberal/conservative VN pairs
+ // as arguments, and return such a pair (the pair of the function applied to the liberal args, and
+ // the function applied to the conservative args).
+ ValueNumPair VNPairForFunc(var_types typ, VNFunc func)
+ {
+ ValueNumPair res;
+ res.SetBoth(VNForFunc(typ, func));
+ return res;
+ }
+ ValueNumPair VNPairForFunc(var_types typ, VNFunc func, ValueNumPair opVN)
+ {
+ return ValueNumPair(VNForFunc(typ, func, opVN.GetLiberal()), VNForFunc(typ, func, opVN.GetConservative()));
+ }
+ ValueNumPair VNPairForFunc(var_types typ, VNFunc func, ValueNumPair op1VN, ValueNumPair op2VN)
+ {
+ return ValueNumPair(VNForFunc(typ, func, op1VN.GetLiberal(), op2VN.GetLiberal()),
+ VNForFunc(typ, func, op1VN.GetConservative(), op2VN.GetConservative()));
+ }
+ ValueNumPair VNPairForFunc(var_types typ, VNFunc func, ValueNumPair op1VN, ValueNumPair op2VN, ValueNumPair op3VN)
+ {
+ return ValueNumPair(VNForFunc(typ, func, op1VN.GetLiberal(), op2VN.GetLiberal(), op3VN.GetLiberal()),
+ VNForFunc(typ, func, op1VN.GetConservative(), op2VN.GetConservative(),
+ op3VN.GetConservative()));
+ }
+ ValueNumPair VNPairForFunc(
+ var_types typ, VNFunc func, ValueNumPair op1VN, ValueNumPair op2VN, ValueNumPair op3VN, ValueNumPair op4VN)
+ {
+ return ValueNumPair(VNForFunc(typ, func, op1VN.GetLiberal(), op2VN.GetLiberal(), op3VN.GetLiberal(),
+ op4VN.GetLiberal()),
+ VNForFunc(typ, func, op1VN.GetConservative(), op2VN.GetConservative(),
+ op3VN.GetConservative(), op4VN.GetConservative()));
+ }
+
+ // Get a new, unique value number for an expression that we're not equating to some function,
+ // which is the value of a tree in the given block.
+ ValueNum VNForExpr(BasicBlock *block, var_types typ = TYP_UNKNOWN);
+
+// This controls extra tracing of the "evaluation" of "VNF_MapSelect" functions.
+#define FEATURE_VN_TRACE_APPLY_SELECTORS 1
+
+ // Return the value number corresponding to constructing "MapSelect(map, f0)", where "f0" is the
+ // (value number of) the first field in "fieldSeq". (The type of this application will be the type of "f0".)
+ // If there are no remaining fields in "fieldSeq", return that value number; otherwise, return VNApplySelectors
+ // applied to that value number and the remainder of "fieldSeq". When the 'fieldSeq' specifies a TYP_STRUCT
+ // then the size of the struct is returned by 'wbFinalStructSize' (when it is non-null)
+ ValueNum VNApplySelectors(ValueNumKind vnk,
+ ValueNum map,
+ FieldSeqNode* fieldSeq,
+ size_t* wbFinalStructSize = nullptr);
+
+ // Used after VNApplySelectors has determined that "selectedVN" is contained in a Map using VNForMapSelect
+ // It determines whether the 'selectedVN' is of an appropriate type to be read using and indirection of 'indType'
+ // If it is appropriate type then 'selectedVN' is returned, otherwise it may insert a cast to indType
+ // or return a unique value number for an incompatible indType.
+ ValueNum VNApplySelectorsTypeCheck(ValueNum selectedVN, var_types indType, size_t structSize);
+
+ // Assumes that "map" represents a map that is addressable by the fields in "fieldSeq", to get
+ // to a value of the type of "rhs". Returns an expression for the RHS of an assignment, in the given "block",
+ // to a location containing value "map" that will change the field addressed by "fieldSeq" to "rhs", leaving
+ // all other indices in "map" the same.
+ ValueNum VNApplySelectorsAssign(
+ ValueNumKind vnk, ValueNum map, FieldSeqNode* fieldSeq, ValueNum rhs, var_types indType, BasicBlock* block);
+
+ // Used after VNApplySelectorsAssign has determined that "elem" is to be writen into a Map using VNForMapStore
+ // It determines whether the 'elem' is of an appropriate type to be writen using using an indirection of 'indType'
+ // It may insert a cast to indType or return a unique value number for an incompatible indType.
+ ValueNum VNApplySelectorsAssignTypeCoerce(ValueNum elem, var_types indType, BasicBlock* block);
+
+ ValueNumPair VNPairApplySelectors(ValueNumPair map, FieldSeqNode* fieldSeq, var_types indType);
+
+ ValueNumPair VNPairApplySelectorsAssign(ValueNumPair map,
+ FieldSeqNode* fieldSeq,
+ ValueNumPair rhs,
+ var_types indType,
+ BasicBlock* block)
+ {
+ return ValueNumPair(VNApplySelectorsAssign(VNK_Liberal, map.GetLiberal(), fieldSeq, rhs.GetLiberal(), indType, block),
+ VNApplySelectorsAssign(VNK_Conservative, map.GetConservative(), fieldSeq,
+ rhs.GetConservative(), indType, block));
+ }
+
+ // Compute the normal ValueNumber for a cast with no exceptions
+ ValueNum VNForCast(ValueNum srcVN, var_types castToType, var_types castFromType, bool srcIsUnsigned = false);
+
+ // Compute the ValueNumberPair for a cast
+ ValueNumPair VNPairForCast(ValueNumPair srcVNPair,
+ var_types castToType,
+ var_types castFromType,
+ bool srcIsUnsigned = false,
+ bool hasOverflowCheck = false);
+
+ // PtrToLoc values need to express a field sequence as one of their arguments. VN for null represents
+ // empty sequence, otherwise, "FieldSeq(VN(FieldHandle), restOfSeq)".
+ ValueNum VNForFieldSeq(FieldSeqNode* fieldSeq);
+
+ // Requires that "vn" represents a field sequence, that is, is the result of a call to VNForFieldSeq.
+ // Returns the FieldSequence it represents.
+ FieldSeqNode* FieldSeqVNToFieldSeq(ValueNum vn);
+
+ // Both argument must represent field sequences; returns the value number representing the
+ // concatenation "fsVN1 || fsVN2".
+ ValueNum FieldSeqVNAppend(ValueNum fsVN1, ValueNum fsVN2);
+
+ // Requires "lclVarVN" be a value number for a GT_LCL_VAR pointer tree.
+ // Requires "fieldSeqVN" be a field sequence value number.
+ // Requires "typ" to be a TYP_REF/TYP_BYREF used for VNF_PtrToLoc.
+ // When "fieldSeqVN" is VNForNotAField, a unique VN is generated using m_uPtrToLocNotAFieldCount.
+ ValueNum VNForPtrToLoc(var_types typ, ValueNum lclVarVN, ValueNum fieldSeqVN);
+
+ // If "opA" has a PtrToLoc, PtrToArrElem, or PtrToStatic application as its value numbers, and "opB" is an integer
+ // with a "fieldSeq", returns the VN for the pointer form extended with the field sequence; or else NoVN.
+ ValueNum ExtendPtrVN(GenTreePtr opA, GenTreePtr opB);
+ // If "opA" has a PtrToLoc, PtrToArrElem, or PtrToStatic application as its value numbers, returns the VN for the
+ // pointer form extended with "fieldSeq"; or else NoVN.
+ ValueNum ExtendPtrVN(GenTreePtr opA, FieldSeqNode* fieldSeq);
+
+ // Queries on value numbers.
+ // All queries taking value numbers require that those value numbers are valid, that is, that
+ // they have been returned by previous "VNFor..." operations. They can assert false if this is
+ // not true.
+
+ // Returns TYP_UNKNOWN if the given value number has not been given a type.
+ var_types TypeOfVN(ValueNum vn);
+
+ // Returns MAX_LOOP_NUM if the given value number's loop nest is unknown or ill-defined.
+ BasicBlock::loopNumber LoopOfVN(ValueNum vn);
+
+ // Returns true iff the VN represents a (non-handle) constant.
+ bool IsVNConstant(ValueNum vn);
+
+ // Returns true iff the VN represents an integeral constant.
+ bool IsVNInt32Constant(ValueNum vn);
+
+ struct ArrLenArithBoundInfo
+ {
+ // (vnArr.len - 1) > vnOp
+ // (vnArr.len arrOper arrOp) cmpOper cmpOp
+ ValueNum vnArray;
+ unsigned arrOper;
+ ValueNum arrOp;
+ unsigned cmpOper;
+ ValueNum cmpOp;
+ ArrLenArithBoundInfo() : vnArray(NoVN), arrOper(GT_NONE), arrOp(NoVN), cmpOper(GT_NONE), cmpOp(NoVN)
+ {
+ }
+#ifdef DEBUG
+ void dump(ValueNumStore* vnStore)
+ {
+ vnStore->vnDump(vnStore->m_pComp, cmpOp);
+ printf(" ");
+ printf(vnStore->VNFuncName((VNFunc)cmpOper));
+ printf(" ");
+ vnStore->vnDump(vnStore->m_pComp, vnArray);
+ if (arrOper != GT_NONE)
+ {
+ printf(vnStore->VNFuncName((VNFunc)arrOper));
+ vnStore->vnDump(vnStore->m_pComp, arrOp);
+ }
+ }
+#endif
+ };
+
+ struct ConstantBoundInfo
+ {
+ // 100 > vnOp
+ int constVal;
+ unsigned cmpOper;
+ ValueNum cmpOpVN;
+
+ ConstantBoundInfo() : constVal(0), cmpOper(GT_NONE), cmpOpVN(NoVN)
+ {
+ }
+
+#ifdef DEBUG
+ void dump(ValueNumStore* vnStore)
+ {
+ vnStore->vnDump(vnStore->m_pComp, cmpOpVN);
+ printf(" ");
+ printf(vnStore->VNFuncName((VNFunc)cmpOper));
+ printf(" ");
+ printf("%d", constVal);
+ }
+#endif
+ };
+
+ // Check if "vn" is "new [] (type handle, size)"
+ bool IsVNNewArr(ValueNum vn, VNFuncApp* funcApp);
+
+ // Check if "vn" IsVNNewArr and return <= 0 if arr size cannot be determined, else array size.
+ int GetNewArrSize(ValueNum vn);
+
+ // Check if "vn" is "a.len"
+ bool IsVNArrLen(ValueNum vn);
+
+ // If "vn" is VN(a.len) then return VN(a); NoVN if VN(a) can't be determined.
+ ValueNum GetArrForLenVn(ValueNum vn);
+
+ // Return true with any Relop except for == and != and one operand has to be a 32-bit integer constant.
+ bool IsVNConstantBound(ValueNum vn);
+
+ // If "vn" is constant bound, then populate the "info" fields for constVal, cmpOp, cmpOper.
+ void GetConstantBoundInfo(ValueNum vn, ConstantBoundInfo* info);
+
+ // If "vn" is of the form "var < a.len" or "a.len <= var" return true.
+ bool IsVNArrLenBound(ValueNum vn);
+
+ // If "vn" is arr len bound, then populate the "info" fields for the arrVn, cmpOp, cmpOper.
+ void GetArrLenBoundInfo(ValueNum vn, ArrLenArithBoundInfo* info);
+
+ // If "vn" is of the form "a.len +/- var" return true.
+ bool IsVNArrLenArith(ValueNum vn);
+
+ // If "vn" is arr len arith, then populate the "info" fields for arrOper, arrVn, arrOp.
+ void GetArrLenArithInfo(ValueNum vn, ArrLenArithBoundInfo* info);
+
+ // If "vn" is of the form "var < a.len +/- k" return true.
+ bool IsVNArrLenArithBound(ValueNum vn);
+
+ // If "vn" is arr len arith bound, then populate the "info" fields for cmpOp, cmpOper.
+ void GetArrLenArithBoundInfo(ValueNum vn, ArrLenArithBoundInfo* info);
+
+ // Returns the flags on the current handle. GTF_ICON_SCOPE_HDL for example.
+ unsigned GetHandleFlags(ValueNum vn);
+
+ // Returns true iff the VN represents a handle constant.
+ bool IsVNHandle(ValueNum vn);
+
+ // Convert a vartype_t to the value number's storage type for that vartype_t.
+ // For example, ValueNum of type TYP_LONG are stored in a map of INT64 variables.
+ // Lang is the language (C++) type for the corresponding vartype_t.
+ template <int N>
+ struct VarTypConv
+ {
+ };
+
+private:
+ struct Chunk;
+
+ template <typename T>
+ static T CoerceTypRefToT(Chunk* c, unsigned offset);
+
+ // Get the actual value and coerce the actual type c->m_typ to the wanted type T.
+ template <typename T>
+ FORCEINLINE T SafeGetConstantValue(Chunk* c, unsigned offset);
+
+ template <typename T>
+ T ConstantValueInternal(ValueNum vn DEBUGARG(bool coerce))
+ {
+ Chunk* c = m_chunks.GetNoExpand(GetChunkNum(vn));
+ assert(c->m_attribs == CEA_Const || c->m_attribs == CEA_Handle);
+
+ unsigned offset = ChunkOffset(vn);
+
+ switch (c->m_typ)
+ {
+ case TYP_REF:
+ assert(0 <= offset && offset <= 1); // Null or exception.
+ __fallthrough;
+
+ case TYP_BYREF:
+#ifndef PLATFORM_UNIX
+ assert(&typeid(T) == &typeid(size_t)); // We represent ref/byref constants as size_t's.
+#endif // PLATFORM_UNIX
+ __fallthrough;
+
+ case TYP_INT:
+ case TYP_LONG:
+ case TYP_FLOAT:
+ case TYP_DOUBLE:
+ if (c->m_attribs == CEA_Handle)
+ {
+ C_ASSERT(offsetof(VNHandle, m_cnsVal) == 0);
+ return (T) reinterpret_cast<VNHandle*>(c->m_defs)[offset].m_cnsVal;
+ }
+#ifdef DEBUG
+ if (!coerce)
+ {
+ T val1 = reinterpret_cast<T*>(c->m_defs)[offset];
+ T val2 = SafeGetConstantValue<T>(c, offset);
+
+ // Detect if there is a mismatch between the VN storage type and explicitly
+ // passed-in type T.
+ bool mismatch = false;
+ if (varTypeIsFloating(c->m_typ))
+ {
+ mismatch = (memcmp(&val1, &val2, sizeof(val1)) != 0);
+ }
+ else
+ {
+ mismatch = (val1 != val2);
+ }
+
+ if (mismatch)
+ {
+ assert(
+ !"Called ConstantValue<T>(vn), but type(T) != type(vn); Use CoercedConstantValue instead.");
+ }
+ }
+#endif
+ return SafeGetConstantValue<T>(c, offset);
+
+ default:
+ assert(false); // We do not record constants of this typ.
+ return (T)0;
+ }
+ }
+
+public:
+ // Requires that "vn" is a constant, and that its type is compatible with the explicitly passed
+ // type "T". Also, note that "T" has to have an accurate storage size of the TypeOfVN(vn).
+ template <typename T>
+ T ConstantValue(ValueNum vn)
+ {
+ return ConstantValueInternal<T>(vn DEBUGARG(false));
+ }
+
+ // Requires that "vn" is a constant, and that its type can be coerced to the explicitly passed
+ // type "T".
+ template <typename T>
+ T CoercedConstantValue(ValueNum vn)
+ {
+ return ConstantValueInternal<T>(vn DEBUGARG(true));
+ }
+
+ // Given a value number "vn", go through the list of VNs that are handles
+ // to find if it is present, if so, return "true", else "false."
+ bool IsHandle(ValueNum vn);
+
+ // Requires "mthFunc" to be an intrinsic math function (one of the allowable values for the "gtMath" field
+ // of a GenTreeMath node). For unary ops, return the value number for the application of this function to
+ // "arg0VN". For binary ops, return the value number for the application of this function to "arg0VN" and
+ // "arg1VN".
+
+ ValueNum EvalMathFuncUnary(var_types typ, CorInfoIntrinsics mthFunc, ValueNum arg0VN);
+
+ ValueNum EvalMathFuncBinary(var_types typ, CorInfoIntrinsics mthFunc, ValueNum arg0VN, ValueNum arg1VN);
+
+ ValueNumPair EvalMathFuncUnary(var_types typ, CorInfoIntrinsics mthFunc, ValueNumPair arg0VNP)
+ {
+ return ValueNumPair(EvalMathFuncUnary(typ, mthFunc, arg0VNP.GetLiberal()),
+ EvalMathFuncUnary(typ, mthFunc, arg0VNP.GetConservative()));
+ }
+
+ ValueNumPair EvalMathFuncBinary(var_types typ,
+ CorInfoIntrinsics mthFunc,
+ ValueNumPair arg0VNP,
+ ValueNumPair arg1VNP)
+ {
+ return ValueNumPair(EvalMathFuncBinary(typ, mthFunc, arg0VNP.GetLiberal(), arg1VNP.GetLiberal()),
+ EvalMathFuncBinary(typ, mthFunc, arg0VNP.GetConservative(), arg1VNP.GetConservative()));
+ }
+
+ // Returns "true" iff "vn" represents a function application.
+ bool IsVNFunc(ValueNum vn);
+
+ // If "vn" represents a function application, returns "true" and set "*funcApp" to
+ // the function application it represents; otherwise, return "false."
+ bool GetVNFunc(ValueNum vn, VNFuncApp* funcApp);
+
+ // Requires that "vn" represents a "heap address" the sum of a "TYP_REF" value and some integer
+ // value. Returns the TYP_REF value.
+ ValueNum VNForRefInAddr(ValueNum vn);
+
+ // Returns "true" iff "vn" is a valid value number -- one that has been previously returned.
+ bool VNIsValid(ValueNum vn);
+
+#ifdef DEBUG
+// This controls whether we recursively call vnDump on function arguments.
+#define FEATURE_VN_DUMP_FUNC_ARGS 0
+
+ // Prints, to standard out, a representation of "vn".
+ void vnDump(Compiler* comp, ValueNum vn, bool isPtr = false);
+
+ // Requires "fieldSeq" to be a field sequence VNFuncApp.
+ // Prints a representation (comma-separated list of field names) on standard out.
+ void vnDumpFieldSeq(Compiler* comp, VNFuncApp* fieldSeq, bool isHead);
+
+ // Requires "mapSelect" to be a map select VNFuncApp.
+ // Prints a representation of a MapSelect operation on standard out.
+ void vnDumpMapSelect(Compiler* comp, VNFuncApp* mapSelect);
+
+ // Requires "mapStore" to be a map store VNFuncApp.
+ // Prints a representation of a MapStore operation on standard out.
+ void vnDumpMapStore(Compiler* comp, VNFuncApp* mapStore);
+
+ // Returns the string name of "vnf".
+ static const char* VNFuncName(VNFunc vnf);
+ // Used in the implementation of the above.
+ static const char* VNFuncNameArr[];
+
+ // Returns the string name of "vn" when it is a reserved value number, nullptr otherwise
+ static const char* reservedName(ValueNum vn);
+
+#endif // DEBUG
+
+ // Returns true if "vn" is a reserved value number
+ static bool isReservedVN(ValueNum);
+
+#define VALUENUM_SUPPORT_MERGE 0
+#if VALUENUM_SUPPORT_MERGE
+ // If we're going to support the Merge operation, and do it right, we really need to use an entire
+ // egraph data structure, so that we can do congruence closure, and discover congruences implied
+ // by the eq-class merge.
+
+ // It may be that we provisionally give two expressions distinct value numbers, then later discover
+ // that the values of the expressions are provably equal. We allow the two value numbers to be
+ // "merged" -- after the merge, they represent the same abstract value.
+ void MergeVNs(ValueNum vn1, ValueNum vn2);
+#endif
+
+private:
+ // We will allocate value numbers in "chunks". Each chunk will have the same type and "constness".
+ static const unsigned LogChunkSize = 6;
+ static const unsigned ChunkSize = 1 << LogChunkSize;
+ static const unsigned ChunkOffsetMask = ChunkSize - 1;
+
+ // A "ChunkNum" is a zero-based index naming a chunk in the Store, or else the special "NoChunk" value.
+ typedef UINT32 ChunkNum;
+ static const ChunkNum NoChunk = UINT32_MAX;
+
+ // Returns the ChunkNum of the Chunk that holds "vn" (which is required to be a valid
+ // value number, i.e., one returned by some VN-producing method of this class).
+ static ChunkNum GetChunkNum(ValueNum vn)
+ {
+ return vn >> LogChunkSize;
+ }
+
+ // Returns the offset of the given "vn" within its chunk.
+ static unsigned ChunkOffset(ValueNum vn)
+ {
+ return vn & ChunkOffsetMask;
+ }
+
+ // The base VN of the next chunk to be allocated. Should always be a multiple of ChunkSize.
+ ValueNum m_nextChunkBase;
+
+ DECLARE_TYPED_ENUM(ChunkExtraAttribs, BYTE)
+ {
+ CEA_None, // No extra attributes.
+ CEA_Const, // This chunk contains constant values.
+ CEA_Handle, // This chunk contains handle constants.
+ CEA_Func0, // Represents functions of arity 0.
+ CEA_Func1, // ...arity 1.
+ CEA_Func2, // ...arity 2.
+ CEA_Func3, // ...arity 3.
+ CEA_Func4, // ...arity 4.
+ CEA_Count
+ }
+ END_DECLARE_TYPED_ENUM(ChunkExtraAttribs, BYTE);
+
+ // A "Chunk" holds "ChunkSize" value numbers, starting at "m_baseVN". All of these share the same
+ // "m_typ" and "m_attribs". These properties determine the interpretation of "m_defs", as discussed below.
+ struct Chunk
+ {
+ // If "m_defs" is non-null, it is an array of size ChunkSize, whose element type is determined by the other
+ // members. The "m_numUsed" field indicates the number of elements of "m_defs" that are already consumed (the
+ // next one to allocate).
+ void* m_defs;
+ unsigned m_numUsed;
+
+ // The value number of the first VN in the chunk.
+ ValueNum m_baseVN;
+
+ // The common attributes of this chunk.
+ var_types m_typ;
+ ChunkExtraAttribs m_attribs;
+ BasicBlock::loopNumber m_loopNum;
+
+ // Initialize a chunk, starting at "*baseVN", for the given "typ", "attribs", and "loopNum" (using "alloc" for allocations).
+ // (Increments "*baseVN" by ChunkSize.)
+ Chunk(IAllocator* alloc, ValueNum* baseVN, var_types typ, ChunkExtraAttribs attribs, BasicBlock::loopNumber loopNum);
+
+ // Requires that "m_numUsed < ChunkSize." Returns the offset of the allocated VN within the chunk; the
+ // actual VN is this added to the "m_baseVN" of the chunk.
+ unsigned AllocVN()
+ {
+ assert(m_numUsed < ChunkSize);
+ return m_numUsed++;
+ }
+
+ template <int N>
+ struct Alloc
+ {
+ typedef typename ValueNumStore::VarTypConv<N>::Type Type;
+ };
+ };
+
+ struct VNHandle : public KeyFuncsDefEquals<VNHandle>
+ {
+ ssize_t m_cnsVal;
+ unsigned m_flags;
+ // Don't use a constructor to use the default copy constructor for hashtable rehash.
+ static void Initialize(VNHandle* handle, ssize_t m_cnsVal, unsigned m_flags)
+ {
+ handle->m_cnsVal = m_cnsVal;
+ handle->m_flags = m_flags;
+ }
+ bool operator==(const VNHandle& y) const
+ {
+ return m_cnsVal == y.m_cnsVal && m_flags == y.m_flags;
+ }
+ static unsigned GetHashCode(const VNHandle& val)
+ {
+ return static_cast<unsigned>(val.m_cnsVal);
+ }
+ };
+
+ struct VNDefFunc0Arg
+ {
+ VNFunc m_func;
+ VNDefFunc0Arg(VNFunc func) : m_func(func)
+ {
+ }
+
+ VNDefFunc0Arg() : m_func(VNF_COUNT)
+ {
+ }
+
+ bool operator==(const VNDefFunc0Arg& y) const
+ {
+ return m_func == y.m_func;
+ }
+ };
+
+ struct VNDefFunc1Arg : public VNDefFunc0Arg
+ {
+ ValueNum m_arg0;
+ VNDefFunc1Arg(VNFunc func, ValueNum arg0) : VNDefFunc0Arg(func), m_arg0(arg0)
+ {
+ }
+
+ VNDefFunc1Arg() : VNDefFunc0Arg(), m_arg0(ValueNumStore::NoVN)
+ {
+ }
+
+ bool operator==(const VNDefFunc1Arg& y) const
+ {
+ return VNDefFunc0Arg::operator==(y) && m_arg0 == y.m_arg0;
+ }
+ };
+
+ struct VNDefFunc2Arg : public VNDefFunc1Arg
+ {
+ ValueNum m_arg1;
+ VNDefFunc2Arg(VNFunc func, ValueNum arg0, ValueNum arg1) : VNDefFunc1Arg(func, arg0), m_arg1(arg1)
+ {
+ }
+
+ VNDefFunc2Arg() : m_arg1(ValueNumStore::NoVN)
+ {
+ }
+
+ bool operator==(const VNDefFunc2Arg& y) const
+ {
+ return VNDefFunc1Arg::operator==(y) && m_arg1 == y.m_arg1;
+ }
+ };
+
+ struct VNDefFunc3Arg : public VNDefFunc2Arg
+ {
+ ValueNum m_arg2;
+ VNDefFunc3Arg(VNFunc func, ValueNum arg0, ValueNum arg1, ValueNum arg2)
+ : VNDefFunc2Arg(func, arg0, arg1), m_arg2(arg2)
+ {
+ }
+ VNDefFunc3Arg() : m_arg2(ValueNumStore::NoVN)
+ {
+ }
+
+ bool operator==(const VNDefFunc3Arg& y) const
+ {
+ return VNDefFunc2Arg::operator==(y) && m_arg2 == y.m_arg2;
+ }
+ };
+
+ struct VNDefFunc4Arg : public VNDefFunc3Arg
+ {
+ ValueNum m_arg3;
+ VNDefFunc4Arg(VNFunc func, ValueNum arg0, ValueNum arg1, ValueNum arg2, ValueNum arg3)
+ : VNDefFunc3Arg(func, arg0, arg1, arg2), m_arg3(arg3)
+ {
+ }
+ VNDefFunc4Arg() : m_arg3(ValueNumStore::NoVN)
+ {
+ }
+
+ bool operator==(const VNDefFunc4Arg& y) const
+ {
+ return VNDefFunc3Arg::operator==(y) && m_arg3 == y.m_arg3;
+ }
+ };
+
+ // When we evaluate "select(m, i)", if "m" is a the value of a phi definition, we look at
+ // all the values of the phi args, and see if doing the "select" on each of them yields identical
+ // results. If so, that is the result of the entire "select" form. We have to be careful, however,
+ // because phis may be recursive in the presence of loop structures -- the VN for the phi may be (or be
+ // part of the definition of) the VN's of some of the arguments. But there will be at least one
+ // argument that does *not* depend on the outer phi VN -- after all, we had to get into the loop somehow.
+ // So we have to be careful about breaking infinite recursion. We can ignore "recursive" results -- if all the
+ // non-recursive results are the same, the recursion indicates that the loop structure didn't alter the result.
+ // This stack represents the set of outer phis such that select(phi, ind) is being evaluated.
+ ExpandArrayStack<VNDefFunc2Arg> m_fixedPointMapSels;
+
+#ifdef DEBUG
+ // Returns "true" iff "m_fixedPointMapSels" is non-empty, and it's top element is
+ // "select(map, index)".
+ bool FixedPointMapSelsTopHasValue(ValueNum map, ValueNum index);
+#endif
+
+ // Returns true if "sel(map, ind)" is a member of "m_fixedPointMapSels".
+ bool SelectIsBeingEvaluatedRecursively(ValueNum map, ValueNum ind);
+
+ // This is a map from "chunk number" to the attributes of the chunk.
+ ExpandArrayStack<Chunk*> m_chunks;
+
+ // These entries indicate the current allocation chunk, if any, for each valid combination of <var_types,
+ // ChunkExtraAttribute, loopNumber>. Valid combinations require attribs==CEA_None or loopNum==MAX_LOOP_NUM.
+ // If the value is NoChunk, it indicates that there is no current allocation chunk for that pair, otherwise
+ // it is the index in "m_chunks" of a chunk with the given attributes, in which the next allocation should
+ // be attempted.
+ ChunkNum m_curAllocChunk[TYP_COUNT][CEA_Count + MAX_LOOP_NUM + 1];
+
+ // Returns a (pointer to a) chunk in which a new value number may be allocated.
+ Chunk* GetAllocChunk(var_types typ, ChunkExtraAttribs attribs, BasicBlock::loopNumber loopNum = MAX_LOOP_NUM);
+
+ // First, we need mechanisms for mapping from constants to value numbers.
+ // For small integers, we'll use an array.
+ static const int SmallIntConstMin = -1;
+ static const int SmallIntConstMax = 10;
+ static const unsigned SmallIntConstNum = SmallIntConstMax - SmallIntConstMin + 1;
+ static bool IsSmallIntConst(int i)
+ {
+ return SmallIntConstMin <= i && i <= SmallIntConstMax;
+ }
+ ValueNum m_VNsForSmallIntConsts[SmallIntConstNum];
+
+ struct ValueNumList
+ {
+ ValueNum vn;
+ ValueNumList* next;
+ ValueNumList(const ValueNum& v, ValueNumList* n = nullptr) : vn(v), next(n)
+ {
+ }
+ };
+
+ // Keeps track of value numbers that are integer constants and also handles (GTG_ICON_HDL_MASK.)
+ ValueNumList* m_intConHandles;
+
+ typedef VNMap<INT32> IntToValueNumMap;
+ IntToValueNumMap* m_intCnsMap;
+ IntToValueNumMap* GetIntCnsMap()
+ {
+ if (m_intCnsMap == nullptr)
+ {
+ m_intCnsMap = new (m_alloc) IntToValueNumMap(m_alloc);
+ }
+ return m_intCnsMap;
+ }
+
+ ValueNum GetVNForIntCon(INT32 cnsVal)
+ {
+ ValueNum res;
+ if (GetIntCnsMap()->Lookup(cnsVal, &res))
+ {
+ return res;
+ }
+ else
+ {
+ Chunk* c = GetAllocChunk(TYP_INT, CEA_Const);
+ unsigned offsetWithinChunk = c->AllocVN();
+ res = c->m_baseVN + offsetWithinChunk;
+ reinterpret_cast<INT32*>(c->m_defs)[offsetWithinChunk] = cnsVal;
+ GetIntCnsMap()->Set(cnsVal, res);
+ return res;
+ }
+ }
+
+ typedef VNMap<INT64> LongToValueNumMap;
+ LongToValueNumMap* m_longCnsMap;
+ LongToValueNumMap* GetLongCnsMap()
+ {
+ if (m_longCnsMap == nullptr)
+ {
+ m_longCnsMap = new (m_alloc) LongToValueNumMap(m_alloc);
+ }
+ return m_longCnsMap;
+ }
+
+ typedef VNMap<VNHandle, VNHandle> HandleToValueNumMap;
+ HandleToValueNumMap* m_handleMap;
+ HandleToValueNumMap* GetHandleMap()
+ {
+ if (m_handleMap == nullptr)
+ {
+ m_handleMap = new (m_alloc) HandleToValueNumMap(m_alloc);
+ }
+ return m_handleMap;
+ }
+
+ struct LargePrimitiveKeyFuncsFloat : public LargePrimitiveKeyFuncs<float>
+ {
+ static bool Equals(float x, float y)
+ {
+ return *(unsigned*)&x == *(unsigned*)&y;
+ }
+ };
+
+ typedef VNMap<float, LargePrimitiveKeyFuncsFloat> FloatToValueNumMap;
+ FloatToValueNumMap* m_floatCnsMap;
+ FloatToValueNumMap* GetFloatCnsMap()
+ {
+ if (m_floatCnsMap == nullptr)
+ {
+ m_floatCnsMap = new (m_alloc) FloatToValueNumMap(m_alloc);
+ }
+ return m_floatCnsMap;
+ }
+
+ // In the JIT we need to distinguish -0.0 and 0.0 for optimizations.
+ struct LargePrimitiveKeyFuncsDouble : public LargePrimitiveKeyFuncs<double>
+ {
+ static bool Equals(double x, double y)
+ {
+ return *(__int64*)&x == *(__int64*)&y;
+ }
+ };
+
+ typedef VNMap<double, LargePrimitiveKeyFuncsDouble> DoubleToValueNumMap;
+ DoubleToValueNumMap* m_doubleCnsMap;
+ DoubleToValueNumMap* GetDoubleCnsMap()
+ {
+ if (m_doubleCnsMap == nullptr)
+ {
+ m_doubleCnsMap = new (m_alloc) DoubleToValueNumMap(m_alloc);
+ }
+ return m_doubleCnsMap;
+ }
+
+ LongToValueNumMap* m_byrefCnsMap;
+ LongToValueNumMap* GetByrefCnsMap()
+ {
+ if (m_byrefCnsMap == nullptr)
+ {
+ m_byrefCnsMap = new (m_alloc) LongToValueNumMap(m_alloc);
+ }
+ return m_byrefCnsMap;
+ }
+
+ struct VNDefFunc0ArgKeyFuncs : public KeyFuncsDefEquals<VNDefFunc1Arg>
+ {
+ static unsigned GetHashCode(VNDefFunc1Arg val)
+ {
+ return (val.m_func << 24) + val.m_arg0;
+ }
+ };
+ typedef VNMap<VNFunc> VNFunc0ToValueNumMap;
+ VNFunc0ToValueNumMap* m_VNFunc0Map;
+ VNFunc0ToValueNumMap* GetVNFunc0Map()
+ {
+ if (m_VNFunc0Map == nullptr)
+ {
+ m_VNFunc0Map = new (m_alloc) VNFunc0ToValueNumMap(m_alloc);
+ }
+ return m_VNFunc0Map;
+ }
+
+ struct VNDefFunc1ArgKeyFuncs : public KeyFuncsDefEquals<VNDefFunc1Arg>
+ {
+ static unsigned GetHashCode(VNDefFunc1Arg val)
+ {
+ return (val.m_func << 24) + val.m_arg0;
+ }
+ };
+ typedef VNMap<VNDefFunc1Arg, VNDefFunc1ArgKeyFuncs> VNFunc1ToValueNumMap;
+ VNFunc1ToValueNumMap* m_VNFunc1Map;
+ VNFunc1ToValueNumMap* GetVNFunc1Map()
+ {
+ if (m_VNFunc1Map == nullptr)
+ {
+ m_VNFunc1Map = new (m_alloc) VNFunc1ToValueNumMap(m_alloc);
+ }
+ return m_VNFunc1Map;
+ }
+
+ struct VNDefFunc2ArgKeyFuncs : public KeyFuncsDefEquals<VNDefFunc2Arg>
+ {
+ static unsigned GetHashCode(VNDefFunc2Arg val)
+ {
+ return (val.m_func << 24) + (val.m_arg0 << 8) + val.m_arg1;
+ }
+ };
+ typedef VNMap<VNDefFunc2Arg, VNDefFunc2ArgKeyFuncs> VNFunc2ToValueNumMap;
+ VNFunc2ToValueNumMap* m_VNFunc2Map;
+ VNFunc2ToValueNumMap* GetVNFunc2Map()
+ {
+ if (m_VNFunc2Map == nullptr)
+ {
+ m_VNFunc2Map = new (m_alloc) VNFunc2ToValueNumMap(m_alloc);
+ }
+ return m_VNFunc2Map;
+ }
+
+ struct VNDefFunc3ArgKeyFuncs : public KeyFuncsDefEquals<VNDefFunc3Arg>
+ {
+ static unsigned GetHashCode(VNDefFunc3Arg val)
+ {
+ return (val.m_func << 24) + (val.m_arg0 << 16) + (val.m_arg1 << 8) + val.m_arg2;
+ }
+ };
+ typedef VNMap<VNDefFunc3Arg, VNDefFunc3ArgKeyFuncs> VNFunc3ToValueNumMap;
+ VNFunc3ToValueNumMap* m_VNFunc3Map;
+ VNFunc3ToValueNumMap* GetVNFunc3Map()
+ {
+ if (m_VNFunc3Map == nullptr)
+ {
+ m_VNFunc3Map = new (m_alloc) VNFunc3ToValueNumMap(m_alloc);
+ }
+ return m_VNFunc3Map;
+ }
+
+ struct VNDefFunc4ArgKeyFuncs : public KeyFuncsDefEquals<VNDefFunc4Arg>
+ {
+ static unsigned GetHashCode(VNDefFunc4Arg val)
+ {
+ return (val.m_func << 24) + (val.m_arg0 << 16) + (val.m_arg1 << 8) + val.m_arg2 + (val.m_arg3 << 12);
+ }
+ };
+ typedef VNMap<VNDefFunc4Arg, VNDefFunc4ArgKeyFuncs> VNFunc4ToValueNumMap;
+ VNFunc4ToValueNumMap* m_VNFunc4Map;
+ VNFunc4ToValueNumMap* GetVNFunc4Map()
+ {
+ if (m_VNFunc4Map == nullptr)
+ {
+ m_VNFunc4Map = new (m_alloc) VNFunc4ToValueNumMap(m_alloc);
+ }
+ return m_VNFunc4Map;
+ }
+
+ enum SpecialRefConsts
+ {
+ SRC_Null,
+ SRC_ZeroMap,
+ SRC_NotAField,
+ SRC_ReadOnlyHeap,
+ SRC_Void,
+ SRC_EmptyExcSet,
+
+ SRC_NumSpecialRefConsts
+ };
+
+ // Counter to keep track of all the unique not a field sequences that have been assigned to
+ // PtrToLoc, because the ptr was added to an offset that was not a field.
+ unsigned m_uPtrToLocNotAFieldCount;
+
+ // The "values" of special ref consts will be all be "null" -- their differing meanings will
+ // be carried by the distinct value numbers.
+ static class Object* s_specialRefConsts[SRC_NumSpecialRefConsts];
+ static class Object* s_nullConst;
+};
+
+template <>
+struct ValueNumStore::VarTypConv<TYP_INT>
+{
+ typedef INT32 Type;
+ typedef int Lang;
+};
+template <>
+struct ValueNumStore::VarTypConv<TYP_FLOAT>
+{
+ typedef INT32 Type;
+ typedef float Lang;
+};
+template <>
+struct ValueNumStore::VarTypConv<TYP_LONG>
+{
+ typedef INT64 Type;
+ typedef INT64 Lang;
+};
+template <>
+struct ValueNumStore::VarTypConv<TYP_DOUBLE>
+{
+ typedef INT64 Type;
+ typedef double Lang;
+};
+template <>
+struct ValueNumStore::VarTypConv<TYP_BYREF>
+{
+ typedef INT64 Type;
+ typedef void* Lang;
+};
+template <>
+struct ValueNumStore::VarTypConv<TYP_REF>
+{
+ typedef class Object* Type;
+ typedef class Object* Lang;
+};
+
+// Get the actual value and coerce the actual type c->m_typ to the wanted type T.
+template <typename T>
+FORCEINLINE T ValueNumStore::SafeGetConstantValue(Chunk* c, unsigned offset)
+{
+ switch (c->m_typ)
+ {
+ case TYP_REF:
+ return CoerceTypRefToT<T>(c, offset);
+ case TYP_BYREF:
+ return static_cast<T>(reinterpret_cast<VarTypConv<TYP_BYREF>::Type*>(c->m_defs)[offset]);
+ case TYP_INT:
+ return static_cast<T>(reinterpret_cast<VarTypConv<TYP_INT>::Type*>(c->m_defs)[offset]);
+ case TYP_LONG:
+ return static_cast<T>(reinterpret_cast<VarTypConv<TYP_LONG>::Type*>(c->m_defs)[offset]);
+ case TYP_FLOAT:
+ return static_cast<T>(reinterpret_cast<VarTypConv<TYP_FLOAT>::Lang*>(c->m_defs)[offset]);
+ case TYP_DOUBLE:
+ return static_cast<T>(reinterpret_cast<VarTypConv<TYP_DOUBLE>::Lang*>(c->m_defs)[offset]);
+ default:
+ assert(false);
+ return (T)0;
+ }
+}
+
+// Inline functions.
+
+// static
+inline bool ValueNumStore::GenTreeOpIsLegalVNFunc(genTreeOps gtOper)
+{
+ return (s_vnfOpAttribs[gtOper] & VNFOA_IllegalGenTreeOp) == 0;
+}
+
+// static
+inline bool ValueNumStore::VNFuncIsCommutative(VNFunc vnf)
+{
+ return (s_vnfOpAttribs[vnf] & VNFOA_Commutative) != 0;
+}
+
+inline bool ValueNumStore::VNFuncIsComparison(VNFunc vnf)
+{
+ if (vnf >= VNF_Boundary)
+ {
+ return false;
+ }
+ genTreeOps gtOp = genTreeOps(vnf);
+ return GenTree::OperIsCompare(gtOp) != 0;
+}
+
+template <>
+inline size_t ValueNumStore::CoerceTypRefToT(Chunk* c, unsigned offset)
+{
+ return reinterpret_cast<size_t>(reinterpret_cast<VarTypConv<TYP_REF>::Type*>(c->m_defs)[offset]);
+}
+
+template <typename T>
+inline T ValueNumStore::CoerceTypRefToT(Chunk* c, unsigned offset)
+{
+ noway_assert(sizeof(T) >= sizeof(VarTypConv<TYP_REF>::Type));
+ unreached();
+}
+
+/*****************************************************************************/
+#endif // _VALUENUM_H_
+/*****************************************************************************/
diff --git a/src/jit/valuenumfuncs.h b/src/jit/valuenumfuncs.h
new file mode 100644
index 0000000000..064a33707b
--- /dev/null
+++ b/src/jit/valuenumfuncs.h
@@ -0,0 +1,141 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+// Defines the functions understood by the value-numbering system.
+// ValueNumFuncDef(<name of function>, <arity (1-4)>, <is-commutative (for arity = 2)>, <non-null (for gc functions)>,
+// <is-shared-static>)
+
+// clang-format off
+ValueNumFuncDef(MapStore, 3, false, false, false)
+ValueNumFuncDef(MapSelect, 2, false, false, false)
+
+ValueNumFuncDef(FieldSeq, 2, false, false, false) // Sequence (VN of null == empty) of (VN's of) field handles.
+ValueNumFuncDef(ZeroMap, 0, false, false, false) // The "ZeroMap": indexing at any index yields "zero of the desired type".
+
+ValueNumFuncDef(PtrToLoc, 3, false, false, false) // Pointer (byref) to a local variable. Args: VN's of: 0: var num, 1: FieldSeq, 2: Unique value for this PtrToLoc.
+ValueNumFuncDef(PtrToArrElem, 4, false, false, false) // Pointer (byref) to an array element. Args: 0: array elem type eq class var_types value, VN's of: 1: array, 2: index, 3: FieldSeq.
+ValueNumFuncDef(PtrToStatic, 1, false, false, false) // Pointer (byref) to a static variable (or possibly a field thereof, if the static variable is a struct). Args: 0: FieldSeq, first element
+ // of which is the static var.
+ValueNumFuncDef(Phi, 2, false, false, false) // A phi function. Only occurs as arg of PhiDef or PhiHeapDef. Arguments are SSA numbers of var being defined.
+ValueNumFuncDef(PhiDef, 3, false, false, false) // Args: 0: local var # (or -1 for Heap), 1: SSA #, 2: VN of definition.
+// Wouldn't need this if I'd made Heap a regular local variable...
+ValueNumFuncDef(PhiHeapDef, 2, false, false, false) // Args: 0: VN for basic block pointer, 1: VN of definition
+ValueNumFuncDef(InitVal, 1, false, false, false) // An input arg, or init val of a local Args: 0: a constant VN.
+
+
+ValueNumFuncDef(Cast, 2, false, false, false) // VNF_Cast: Cast Operation changes the representations size and unsigned-ness.
+ // Args: 0: Source for the cast operation.
+ // 1: Constant integer representing the operation .
+ // Use VNForCastOper() to construct.
+
+ValueNumFuncDef(CastClass, 2, false, false, false) // Args: 0: Handle of class being cast to, 1: object being cast.
+ValueNumFuncDef(IsInstanceOf, 2, false, false, false) // Args: 0: Handle of class being queried, 1: object being queried.
+ValueNumFuncDef(ReadyToRunCastClass, 2, false, false, false) // Args: 0: Helper stub address, 1: object being cast.
+ValueNumFuncDef(ReadyToRunIsInstanceOf, 2, false, false, false) // Args: 0: Helper stub address, 1: object being queried.
+
+ValueNumFuncDef(LdElemA, 3, false, false, false) // Args: 0: array value; 1: index value; 2: type handle of element.
+
+ValueNumFuncDef(GetRefanyVal, 2, false, false, false) // Args: 0: type handle; 1: typedref value. Returns the value (asserting that the type is right).
+
+ValueNumFuncDef(GetClassFromMethodParam, 1, false, true, false) // Args: 0: method generic argument.
+ValueNumFuncDef(GetSyncFromClassHandle, 1, false, true, false) // Args: 0: class handle.
+ValueNumFuncDef(LoopCloneChoiceAddr, 0, false, true, false)
+
+// How we represent values of expressions with exceptional side effects:
+ValueNumFuncDef(ValWithExc, 2, false, false, false) // Args: 0: value number from normal execution; 1: VN for set of possible exceptions.
+
+ValueNumFuncDef(ExcSetCons, 2, false, false, false) // Args: 0: exception; 1: exception set (including EmptyExcSet). Invariant: "car"s are always in ascending order.
+
+// Various exception values.
+ValueNumFuncDef(NullPtrExc, 1, false, false, false) // Null pointer exception.
+ValueNumFuncDef(ArithmeticExc, 0, false, false, false) // E.g., for signed its, MinInt / -1.
+ValueNumFuncDef(OverflowExc, 0, false, false, false) // Integer overflow.
+ValueNumFuncDef(ConvOverflowExc, 2, false, false, false) // Integer overflow produced by converion. Args: 0: input value; 1: var_types of target type
+ // (shifted left one bit; low bit encode whether source is unsigned.)
+ValueNumFuncDef(DivideByZeroExc, 0, false, false, false) // Division by zero.
+ValueNumFuncDef(IndexOutOfRangeExc, 2, false, false, false) // Args: 0: array length; 1: index. The exception raised if this bounds check fails.
+ValueNumFuncDef(InvalidCastExc, 2, false, false, false) // Args: 0: ref value being cast; 1: handle of type being cast to. Represents the exception thrown if the cast fails.
+ValueNumFuncDef(NewArrOverflowExc, 1, false, false, false) // Raises Integer overflow when Arg 0 is negative
+ValueNumFuncDef(HelperMultipleExc, 0, false, false, false) // Represents one or more different exceptions that may be thrown by a JitHelper
+
+ValueNumFuncDef(Lng2Dbl, 1, false, false, false)
+ValueNumFuncDef(ULng2Dbl, 1, false, false, false)
+ValueNumFuncDef(Dbl2Int, 1, false, false, false)
+ValueNumFuncDef(Dbl2UInt, 1, false, false, false)
+ValueNumFuncDef(Dbl2Lng, 1, false, false, false)
+ValueNumFuncDef(Dbl2ULng, 1, false, false, false)
+ValueNumFuncDef(FltRound, 1, false, false, false)
+ValueNumFuncDef(DblRound, 1, false, false, false)
+
+ValueNumFuncDef(Sin, 1, false, false, false)
+ValueNumFuncDef(Cos, 1, false, false, false)
+ValueNumFuncDef(Sqrt, 1, false, false, false)
+ValueNumFuncDef(Abs, 1, false, false, false)
+ValueNumFuncDef(RoundDouble, 1, false, false, false)
+ValueNumFuncDef(RoundFloat, 1, false, false, false)
+ValueNumFuncDef(RoundInt, 1, false, false, false)
+ValueNumFuncDef(Cosh, 1, false, false, false)
+ValueNumFuncDef(Sinh, 1, false, false, false)
+ValueNumFuncDef(Tan, 1, false, false, false)
+ValueNumFuncDef(Tanh, 1, false, false, false)
+ValueNumFuncDef(Asin, 1, false, false, false)
+ValueNumFuncDef(Acos, 1, false, false, false)
+ValueNumFuncDef(Atan, 1, false, false, false)
+ValueNumFuncDef(Atan2, 2, false, false, false)
+ValueNumFuncDef(Log10, 1, false, false, false)
+ValueNumFuncDef(Pow, 2, false, false, false)
+ValueNumFuncDef(Exp, 1, false, false, false)
+ValueNumFuncDef(Ceiling, 1, false, false, false)
+ValueNumFuncDef(Floor, 1, false, false, false)
+
+ValueNumFuncDef(ManagedThreadId, 0, false, false, false)
+
+ValueNumFuncDef(ObjGetType, 1, false, false, false)
+ValueNumFuncDef(GetgenericsGcstaticBase, 1, false, true, true)
+ValueNumFuncDef(GetgenericsNongcstaticBase, 1, false, true, true)
+ValueNumFuncDef(GetsharedGcstaticBase, 2, false, true, true)
+ValueNumFuncDef(GetsharedNongcstaticBase, 2, false, true, true)
+ValueNumFuncDef(GetsharedGcstaticBaseNoctor, 1, false, true, true)
+ValueNumFuncDef(GetsharedNongcstaticBaseNoctor, 1, false, true, true)
+ValueNumFuncDef(ReadyToRunStaticBase, 1, false, true, true)
+ValueNumFuncDef(GetsharedGcstaticBaseDynamicclass, 2, false, true, true)
+ValueNumFuncDef(GetsharedNongcstaticBaseDynamicclass, 2, false, true, true)
+ValueNumFuncDef(GetgenericsGcthreadstaticBase, 1, false, true, true)
+ValueNumFuncDef(GetgenericsNongcthreadstaticBase, 1, false, true, true)
+ValueNumFuncDef(GetsharedGcthreadstaticBase, 2, false, true, true)
+ValueNumFuncDef(GetsharedNongcthreadstaticBase, 2, false, true, true)
+ValueNumFuncDef(GetsharedGcthreadstaticBaseNoctor, 2, false, true, true)
+ValueNumFuncDef(GetsharedNongcthreadstaticBaseNoctor, 2, false, true, true)
+ValueNumFuncDef(GetsharedGcthreadstaticBaseDynamicclass, 2, false, true, true)
+ValueNumFuncDef(GetsharedNongcthreadstaticBaseDynamicclass, 2, false, true, true)
+
+ValueNumFuncDef(ClassinitSharedDynamicclass, 2, false, false, false)
+ValueNumFuncDef(RuntimeHandleMethod, 2, false, true, false)
+ValueNumFuncDef(RuntimeHandleClass, 2, false, true, false)
+
+ValueNumFuncDef(GetStaticAddrContext, 1, false, true, false)
+ValueNumFuncDef(GetStaticAddrTLS, 1, false, true, false)
+
+ValueNumFuncDef(JitNew, 2, false, true, false)
+ValueNumFuncDef(JitNewArr, 3, false, true, false)
+ValueNumFuncDef(JitReadyToRunNew, 2, false, true, false)
+ValueNumFuncDef(JitReadyToRunNewArr, 3, false, true, false)
+ValueNumFuncDef(BoxNullable, 3, false, false, false)
+
+ValueNumFuncDef(LT_UN, 2, false, false, false)
+ValueNumFuncDef(LE_UN, 2, false, false, false)
+ValueNumFuncDef(GE_UN, 2, false, false, false)
+ValueNumFuncDef(GT_UN, 2, false, false, false)
+ValueNumFuncDef(ADD_UN, 2, true, false, false)
+ValueNumFuncDef(SUB_UN, 2, false, false, false)
+ValueNumFuncDef(MUL_UN, 2, true, false, false)
+ValueNumFuncDef(DIV_UN, 2, false, false, false)
+ValueNumFuncDef(MOD_UN, 2, false, false, false)
+
+ValueNumFuncDef(StrCns, 2, false, true, false)
+
+ValueNumFuncDef(Unbox, 2, false, true, false)
+// clang-format on
+
+#undef ValueNumFuncDef
diff --git a/src/jit/valuenumtype.h b/src/jit/valuenumtype.h
new file mode 100644
index 0000000000..f898d87532
--- /dev/null
+++ b/src/jit/valuenumtype.h
@@ -0,0 +1,101 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+// Defines the type "ValueNum".
+
+// This file exists only to break an include file cycle -- had been in ValueNum.h. But that
+// file wanted to include gentree.h to get GT_COUNT, and gentree.h wanted ton include ValueNum.h to
+// the ValueNum type.
+
+/*****************************************************************************/
+#ifndef _VALUENUMTYPE_H_
+#define _VALUENUMTYPE_H_
+/*****************************************************************************/
+
+// We will represent ValueNum's as unsigned integers.
+typedef UINT32 ValueNum;
+
+// There are two "kinds" of value numbers, which differ in their modeling of the actions of other threads.
+// "Liberal" value numbers assume that the other threads change contents of heap locations only at
+// synchronization points. Liberal VNs are appropriate, for example, in identifying CSE opportunities.
+// "Conservative" value numbers assume that the contents of heap locations change arbitrarily between
+// every two accesses. Conservative VNs are appropriate, for example, in assertion prop, where an observation
+// of a property of the value in some storage location is used to perform an optimization downstream on
+// an operation involving the contents of that storage location. If other threads may modify the storage
+// location between the two accesses, the observed property may no longer hold -- and conservative VNs make
+// it clear that the values need not be the same.
+//
+enum ValueNumKind
+{
+ VNK_Liberal,
+ VNK_Conservative
+};
+
+struct ValueNumPair
+{
+private:
+ ValueNum m_liberal;
+ ValueNum m_conservative;
+
+public:
+ ValueNum GetLiberal() const
+ {
+ return m_liberal;
+ }
+ void SetLiberal(ValueNum vn)
+ {
+ m_liberal = vn;
+ }
+ ValueNum GetConservative() const
+ {
+ return m_conservative;
+ }
+ void SetConservative(ValueNum vn)
+ {
+ m_conservative = vn;
+ }
+
+ ValueNum* GetLiberalAddr()
+ {
+ return &m_liberal;
+ }
+ ValueNum* GetConservativeAddr()
+ {
+ return &m_conservative;
+ }
+
+ ValueNum Get(ValueNumKind vnk)
+ {
+ return vnk == VNK_Liberal ? m_liberal : m_conservative;
+ }
+
+ void SetBoth(ValueNum vn)
+ {
+ m_liberal = vn;
+ m_conservative = vn;
+ }
+
+ void operator=(const ValueNumPair& vn2)
+ {
+ m_liberal = vn2.m_liberal;
+ m_conservative = vn2.m_conservative;
+ }
+
+ // Initializes both elements to "NoVN". Defined in ValueNum.cpp.
+ ValueNumPair();
+
+ ValueNumPair(ValueNum lib, ValueNum cons) : m_liberal(lib), m_conservative(cons)
+ {
+ }
+
+ // True iff neither element is "NoVN". Defined in ValueNum.cpp.
+ bool BothDefined() const;
+
+ bool BothEqual() const
+ {
+ return m_liberal == m_conservative;
+ }
+};
+
+#endif // _VALUENUMTYPE_H_
diff --git a/src/jit/varset.h b/src/jit/varset.h
new file mode 100644
index 0000000000..6a2c37ed40
--- /dev/null
+++ b/src/jit/varset.h
@@ -0,0 +1,211 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+//
+// This include file determines how VARSET_TP is implemented.
+//
+#ifndef _VARSET_INCLUDED_
+#define _VARSET_INCLUDED_ 1
+
+// A VARSET_TP is a set of (small) integers representing local variables.
+// We implement varsets using the BitSet abstraction, which supports
+// several different implementations.
+//
+// The set of tracked variables may change during a compilation, and variables may be
+// re-sorted, so the tracked variable index of a variable is decidedly *not* stable. The
+// bitset abstraction supports labeling of bitsets with "epochs", and supports a
+// debugging mode in which live bitsets must have the current epoch. To use this feature,
+// divide a compilation up into epochs, during which tracked variable indices are
+// stable.
+
+// Some implementations of BitSet may use a level of indirection. Therefore, we
+// must be careful about about assignment and initialization. We often want to
+// reason about VARSET_TP as immutable values, and just copying the contents would
+// introduce sharing in the indirect case, which is usually not what's desired. On
+// the other hand, there are many cases in which the RHS value has just been
+// created functionally, and the intialization/assignment is obviously its last
+// use. In these cases, allocating a new indirect representation for the lhs (if
+// it does not already have one) would be unnecessary and wasteful. Thus, for both
+// initialization and assignment, we have normal versions, which do make copies to
+// prevent sharing and definitely preserve value semantics, and "NOCOPY" versions,
+// which do not. Obviously, the latter should be used with care.
+
+#include "bitset.h"
+#include "compilerbitsettraits.h"
+
+const unsigned UInt64Bits = sizeof(UINT64) * 8;
+
+// This #define chooses the BitSet representation used for VARSET.
+// The choices are defined in "bitset.h"; they currently include
+// BSUInt64, BSShortLong, and BSUInt64Class.
+#define VARSET_REP BSShortLong
+
+#if VARSET_REP == BSUInt64
+
+#include "bitsetasuint64.h"
+
+typedef BitSetOps</*BitSetType*/ UINT64,
+ /*Brand*/ VARSET_REP,
+ /*Env*/ Compiler*,
+ /*BitSetTraits*/ TrackedVarBitSetTraits>
+ VarSetOpsRaw;
+
+typedef UINT64 VARSET_TP;
+
+const unsigned lclMAX_TRACKED = UInt64Bits;
+
+#define VARSET_REP_IS_CLASS 0
+
+#elif VARSET_REP == BSShortLong
+
+#include "bitsetasshortlong.h"
+
+typedef BitSetOps</*BitSetType*/ BitSetShortLongRep,
+ /*Brand*/ VARSET_REP,
+ /*Env*/ Compiler*,
+ /*BitSetTraits*/ TrackedVarBitSetTraits>
+ VarSetOpsRaw;
+
+typedef BitSetShortLongRep VARSET_TP;
+
+// Tested various sizes for max tracked locals. The largest value for which no throughput regression
+// could be measured was 512. Going to 1024 showed the first throughput regressions.
+// We anticipate the larger size will be needed to support better inlining.
+// There were a number of failures when 512 was used for legacy, so we just retain the 128 value
+// for legacy backend.
+
+#if !defined(LEGACY_BACKEND)
+const unsigned lclMAX_TRACKED = 512;
+#else
+const unsigned lclMAX_TRACKED = 128;
+#endif
+
+#define VARSET_REP_IS_CLASS 0
+
+#elif VARSET_REP == BSUInt64Class
+
+#include "bitsetasuint64inclass.h"
+
+typedef BitSetOps</*BitSetType*/ BitSetUint64<Compiler*, TrackedVarBitSetTraits>,
+ /*Brand*/ VARSET_REP,
+ /*Env*/ Compiler*,
+ /*BitSetTraits*/ TrackedVarBitSetTraits>
+ VarSetOpsRaw;
+
+typedef BitSetUint64<Compiler*, TrackedVarBitSetTraits> VARSET_TP;
+
+const unsigned lclMAX_TRACKED = UInt64Bits;
+
+#define VARSET_REP_IS_CLASS 1
+
+#else
+
+#error "Unrecognized BitSet implemention for VarSet."
+
+#endif
+
+// These types should be used as the types for VARSET_TP arguments and return values, respectively.
+typedef VarSetOpsRaw::ValArgType VARSET_VALARG_TP;
+typedef VarSetOpsRaw::RetValType VARSET_VALRET_TP;
+
+#define VARSET_COUNTOPS 0
+#if VARSET_COUNTOPS
+typedef BitSetOpsWithCounter<VARSET_TP,
+ VARSET_REP,
+ Compiler*,
+ TrackedVarBitSetTraits,
+ VARSET_VALARG_TP,
+ VARSET_VALRET_TP,
+ VarSetOpsRaw::Iter>
+ VarSetOps;
+#else
+typedef VarSetOpsRaw VarSetOps;
+#endif
+
+#define ALLVARSET_REP BSUInt64
+
+#if ALLVARSET_REP == BSUInt64
+
+#include "bitsetasuint64.h"
+
+typedef BitSetOps</*BitSetType*/ UINT64,
+ /*Brand*/ ALLVARSET_REP,
+ /*Env*/ Compiler*,
+ /*BitSetTraits*/ AllVarBitSetTraits>
+ AllVarSetOps;
+
+typedef UINT64 ALLVARSET_TP;
+
+const unsigned lclMAX_ALLSET_TRACKED = UInt64Bits;
+
+#define ALLVARSET_REP_IS_CLASS 0
+
+#elif ALLVARSET_REP == BSShortLong
+
+#include "bitsetasshortlong.h"
+
+typedef BitSetOps</*BitSetType*/ BitSetShortLongRep,
+ /*Brand*/ ALLVARSET_REP,
+ /*Env*/ Compiler*,
+ /*BitSetTraits*/ AllVarBitSetTraits>
+ AllVarSetOps;
+
+typedef BitSetShortLongRep ALLVARSET_TP;
+
+const unsigned lclMAX_ALLSET_TRACKED = lclMAX_TRACKED;
+
+#define ALLVARSET_REP_IS_CLASS 0
+
+#elif ALLVARSET_REP == BSUInt64Class
+
+#include "bitsetasuint64inclass.h"
+
+typedef BitSetOps</*BitSetType*/ BitSetUint64<Compiler*, AllVarBitSetTraits>,
+ /*Brand*/ ALLVARSET_REP,
+ /*Env*/ Compiler*,
+ /*BitSetTraits*/ AllVarBitSetTraits>
+ AllVarSetOps;
+
+typedef BitSetUint64<Compiler*, AllVarBitSetTraits> ALLVARSET_TP;
+
+const unsigned lclMAX_ALLSET_TRACKED = UInt64Bits;
+
+#define ALLVARSET_REP_IS_CLASS 1
+
+#else
+#error "Unrecognized BitSet implemention for AllVarSet."
+#endif
+
+// These types should be used as the types for VARSET_TP arguments and return values, respectively.
+typedef AllVarSetOps::ValArgType ALLVARSET_VALARG_TP;
+typedef AllVarSetOps::RetValType ALLVARSET_VALRET_TP;
+
+// Initialize "varName" to "initVal." Copies contents, not references; if "varName" is uninitialized, allocates a var
+// set for it (using "comp" for any necessary allocation), and copies the contents of "initVal" into it.
+#define VARSET_INIT(comp, varName, initVal) varName(VarSetOps::MakeCopy(comp, initVal))
+#define ALLVARSET_INIT(comp, varName, initVal) varName(AllVarSetOps::MakeCopy(comp, initVal))
+
+// Initializes "varName" to "initVal", without copying: if "initVal" is an indirect representation, copies its
+// pointer into "varName".
+#if defined(DEBUG) && VARSET_REP_IS_CLASS
+#define VARSET_INIT_NOCOPY(varName, initVal) varName(initVal, 0)
+#else
+#define VARSET_INIT_NOCOPY(varName, initVal) varName(initVal)
+#endif
+
+#if defined(DEBUG) && ALLVARSET_REP_IS_CLASS
+#define ALLVARSET_INIT_NOCOPY(varName, initVal) varName(initVal, 0)
+#else
+#define ALLVARSET_INIT_NOCOPY(varName, initVal) varName(initVal)
+#endif
+
+// The iterator pattern.
+
+// Use this to initialize an iterator "iterName" to iterate over a VARSET_TP "vs".
+// "varIndex" will be an unsigned variable to which we assign the elements of "vs".
+#define VARSET_ITER_INIT(comp, iterName, vs, varIndex) \
+ unsigned varIndex = 0; \
+ VarSetOps::Iter iterName(comp, vs)
+
+#endif // _VARSET_INCLUDED_
diff --git a/src/jit/vartype.h b/src/jit/vartype.h
new file mode 100644
index 0000000000..550aeb9c5b
--- /dev/null
+++ b/src/jit/vartype.h
@@ -0,0 +1,285 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*****************************************************************************/
+#ifndef _VARTYPE_H_
+#define _VARTYPE_H_
+/*****************************************************************************/
+#include "error.h"
+
+enum var_types_classification
+{
+ VTF_ANY = 0x0000,
+ VTF_INT = 0x0001,
+ VTF_UNS = 0x0002, // type is unsigned
+ VTF_FLT = 0x0004,
+ VTF_GCR = 0x0008, // type is GC ref
+ VTF_BYR = 0x0010, // type is Byref
+ VTF_I = 0x0020, // is machine sized
+ VTF_S = 0x0040, // is a struct type
+};
+
+DECLARE_TYPED_ENUM(var_types, BYTE)
+{
+#define DEF_TP(tn, nm, jitType, verType, sz, sze, asze, st, al, tf, howUsed) TYP_##tn,
+#include "typelist.h"
+#undef DEF_TP
+
+ TYP_COUNT,
+
+ TYP_lastIntrins = TYP_DOUBLE
+}
+END_DECLARE_TYPED_ENUM(var_types, BYTE)
+
+/*****************************************************************************
+ * C-style pointers are implemented as TYP_INT or TYP_LONG depending on the
+ * platform
+ */
+
+#ifdef _TARGET_64BIT_
+#define TYP_I_IMPL TYP_LONG
+#define TYP_U_IMPL TYP_ULONG
+#define TYPE_REF_IIM TYPE_REF_LNG
+#else
+#define TYP_I_IMPL TYP_INT
+#define TYP_U_IMPL TYP_UINT
+#define TYPE_REF_IIM TYPE_REF_INT
+#ifdef _PREFAST_
+// We silence this in the 32-bit build because for portability, we like to have asserts like this:
+// assert(op2->gtType == TYP_INT || op2->gtType == TYP_I_IMPL);
+// This is obviously redundant for 32-bit builds, but we don't want to have ifdefs and different
+// asserts just for 64-bit builds, so for now just silence the assert
+#pragma warning(disable : 6287) // warning 6287: the left and right sub-expressions are identical
+#endif //_PREFAST_
+#endif
+
+/*****************************************************************************/
+
+const extern BYTE varTypeClassification[TYP_COUNT];
+
+// make any class with a TypeGet member also have a function TypeGet() that does the same thing
+template <class T>
+inline var_types TypeGet(T* t)
+{
+ return t->TypeGet();
+}
+
+// make a TypeGet function which is the identity function for var_types
+// the point of this and the preceding template is now you can make template functions
+// that work on var_types as well as any object that exposes a TypeGet method.
+// such as all of these varTypeIs* functions
+inline var_types TypeGet(var_types v)
+{
+ return v;
+}
+
+#ifdef FEATURE_SIMD
+template <class T>
+inline bool varTypeIsSIMD(T vt)
+{
+ switch (TypeGet(vt))
+ {
+ case TYP_SIMD8:
+ case TYP_SIMD12:
+ case TYP_SIMD16:
+#ifdef FEATURE_AVX_SUPPORT
+ case TYP_SIMD32:
+#endif // FEATURE_AVX_SUPPORT
+ return true;
+ default:
+ return false;
+ }
+}
+#else // FEATURE_SIMD
+
+// Always return false if FEATURE_SIMD is not enabled
+template <class T>
+inline bool varTypeIsSIMD(T vt)
+{
+ return false;
+}
+#endif // !FEATURE_SIMD
+
+template <class T>
+inline bool varTypeIsIntegral(T vt)
+{
+ return ((varTypeClassification[TypeGet(vt)] & (VTF_INT)) != 0);
+}
+
+template <class T>
+inline bool varTypeIsIntegralOrI(T vt)
+{
+ return ((varTypeClassification[TypeGet(vt)] & (VTF_INT | VTF_I)) != 0);
+}
+
+template <class T>
+inline bool varTypeIsUnsigned(T vt)
+{
+ return ((varTypeClassification[TypeGet(vt)] & (VTF_UNS)) != 0);
+}
+
+// If "vt" is an unsigned integral type, returns the corresponding signed integral type, otherwise
+// return "vt".
+inline var_types varTypeUnsignedToSigned(var_types vt)
+{
+ if (varTypeIsUnsigned(vt))
+ {
+ switch (vt)
+ {
+ case TYP_BOOL:
+ case TYP_UBYTE:
+ return TYP_BYTE;
+ case TYP_USHORT:
+ case TYP_CHAR:
+ return TYP_SHORT;
+ case TYP_UINT:
+ return TYP_INT;
+ case TYP_ULONG:
+ return TYP_LONG;
+ default:
+ unreached();
+ }
+ }
+ else
+ {
+ return vt;
+ }
+}
+
+template <class T>
+inline bool varTypeIsFloating(T vt)
+{
+ return ((varTypeClassification[TypeGet(vt)] & (VTF_FLT)) != 0);
+}
+
+template <class T>
+inline bool varTypeIsArithmetic(T vt)
+{
+ return ((varTypeClassification[TypeGet(vt)] & (VTF_INT | VTF_FLT)) != 0);
+}
+
+template <class T>
+inline unsigned varTypeGCtype(T vt)
+{
+ return (unsigned)(varTypeClassification[TypeGet(vt)] & (VTF_GCR | VTF_BYR));
+}
+
+template <class T>
+inline bool varTypeIsGC(T vt)
+{
+ return (varTypeGCtype(vt) != 0);
+}
+
+template <class T>
+inline bool varTypeIsI(T vt)
+{
+ return ((varTypeClassification[TypeGet(vt)] & VTF_I) != 0);
+}
+
+template <class T>
+inline bool varTypeCanReg(T vt)
+{
+ return ((varTypeClassification[TypeGet(vt)] & (VTF_INT | VTF_I | VTF_FLT)) != 0);
+}
+
+template <class T>
+inline bool varTypeIsByte(T vt)
+{
+ return (TypeGet(vt) >= TYP_BOOL) && (TypeGet(vt) <= TYP_UBYTE);
+}
+
+template <class T>
+inline bool varTypeIsShort(T vt)
+{
+ return (TypeGet(vt) >= TYP_CHAR) && (TypeGet(vt) <= TYP_USHORT);
+}
+
+template <class T>
+inline bool varTypeIsSmall(T vt)
+{
+ return (TypeGet(vt) >= TYP_BOOL) && (TypeGet(vt) <= TYP_USHORT);
+}
+
+template <class T>
+inline bool varTypeIsSmallInt(T vt)
+{
+ return (TypeGet(vt) >= TYP_BYTE) && (TypeGet(vt) <= TYP_USHORT);
+}
+
+template <class T>
+inline bool varTypeIsIntOrI(T vt)
+{
+ return ((TypeGet(vt) == TYP_INT)
+#ifdef _TARGET_64BIT_
+ || (TypeGet(vt) == TYP_I_IMPL)
+#endif // _TARGET_64BIT_
+ );
+}
+
+template <class T>
+inline bool genActualTypeIsIntOrI(T vt)
+{
+ return ((TypeGet(vt) >= TYP_BOOL) && (TypeGet(vt) <= TYP_U_IMPL));
+}
+
+template <class T>
+inline bool varTypeIsLong(T vt)
+{
+ return (TypeGet(vt) >= TYP_LONG) && (TypeGet(vt) <= TYP_ULONG);
+}
+
+template <class T>
+inline bool varTypeIsMultiReg(T vt)
+{
+#ifdef _TARGET_64BIT_
+ return false;
+#else
+ return (TypeGet(vt) == TYP_LONG);
+#endif
+}
+
+template <class T>
+inline bool varTypeIsSingleReg(T vt)
+{
+ return !varTypeIsMultiReg(vt);
+}
+
+template <class T>
+inline bool varTypeIsComposite(T vt)
+{
+ return (!varTypeIsArithmetic(TypeGet(vt)) && TypeGet(vt) != TYP_VOID);
+}
+
+// Is this type promotable?
+// In general only structs are promotable.
+// However, a SIMD type, e.g. TYP_SIMD may be handled as either a struct, OR a
+// fully-promoted register type.
+// On 32-bit systems longs are split into an upper and lower half, and they are
+// handled as if they are structs with two integer fields.
+
+template <class T>
+inline bool varTypeIsPromotable(T vt)
+{
+ return (varTypeIsStruct(vt) || (TypeGet(vt) == TYP_BLK)
+#if !defined(_TARGET_64BIT_)
+ || varTypeIsLong(vt)
+#endif // !defined(_TARGET_64BIT_)
+ );
+}
+
+template <class T>
+inline bool varTypeIsStruct(T vt)
+{
+ return ((varTypeClassification[TypeGet(vt)] & VTF_S) != 0);
+}
+
+template <class T>
+inline bool varTypeIsEnregisterableStruct(T vt)
+{
+ return (TypeGet(vt) != TYP_STRUCT);
+}
+
+/*****************************************************************************/
+#endif // _VARTYPE_H_
+/*****************************************************************************/
diff --git a/src/jit/x86_instrs.h b/src/jit/x86_instrs.h
new file mode 100644
index 0000000000..1c3489d3b4
--- /dev/null
+++ b/src/jit/x86_instrs.h
@@ -0,0 +1,10 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+//
+// This is a temporary file which defined the x86 instructions that
+// are currently still referenced when building the Arm Jit compiler
+//
+
+INS_lea,